summaryrefslogtreecommitdiff
path: root/tools/perf/scripts/python/stackcollapse.py
diff options
context:
space:
mode:
authorKrzysztof Kozlowski <krzysztof.kozlowski@linaro.org>2022-09-12 08:17:30 +0200
committerLinus Walleij <linus.walleij@linaro.org>2022-09-14 12:18:38 +0200
commitc35edcef53f8ca7a07bc4bbe95f756e55a74feb0 (patch)
tree5d73e4dea22350e5ae4434fa6596425d538d2cbb /tools/perf/scripts/python/stackcollapse.py
parentb76881c1288eca49c1579ed5f2bf8e6bedf25a2b (diff)
dt-bindings: pinctrl: qcom,sc7280-pinctrl: do not require function on non-GPIOs
Certain pins, like SDcard related, do not have functions and such should not be required: sdc1-clk-pins: 'function' is a required property Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org> Reviewed-by: Bjorn Andersson <andersson@kernel.org> Link: https://lore.kernel.org/r/20220912061746.6311-25-krzysztof.kozlowski@linaro.org Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Diffstat (limited to 'tools/perf/scripts/python/stackcollapse.py')
0 files changed, 0 insertions, 0 deletions
class='rem' style='width: 3.0%;'/> -rw-r--r--arch/powerpc/boot/4xx.h9
-rw-r--r--arch/powerpc/boot/Makefile339
-rw-r--r--arch/powerpc/boot/addnote.c12
-rw-r--r--arch/powerpc/boot/bamboo.c5
-rw-r--r--arch/powerpc/boot/cpm-serial.c1
-rw-r--r--arch/powerpc/boot/crt0.S132
-rw-r--r--arch/powerpc/boot/crtsavres.S8
-rw-r--r--arch/powerpc/boot/cuboot-52xx.c5
-rw-r--r--arch/powerpc/boot/cuboot-824x.c5
-rw-r--r--arch/powerpc/boot/cuboot-83xx.c5
-rw-r--r--arch/powerpc/boot/cuboot-85xx-cpm2.c5
-rw-r--r--arch/powerpc/boot/cuboot-85xx.c5
-rw-r--r--arch/powerpc/boot/cuboot-8xx.c5
-rw-r--r--arch/powerpc/boot/cuboot-acadia.c174
-rw-r--r--arch/powerpc/boot/cuboot-amigaone.c5
-rw-r--r--arch/powerpc/boot/cuboot-bamboo.c5
-rw-r--r--arch/powerpc/boot/cuboot-c2k.c190
-rw-r--r--arch/powerpc/boot/cuboot-ebony.c5
-rw-r--r--arch/powerpc/boot/cuboot-hotfoot.c142
-rw-r--r--arch/powerpc/boot/cuboot-katmai.c5
-rw-r--r--arch/powerpc/boot/cuboot-kilauea.c49
-rw-r--r--arch/powerpc/boot/cuboot-mpc7448hpc2.c48
-rw-r--r--arch/powerpc/boot/cuboot-pq2.c5
-rw-r--r--arch/powerpc/boot/cuboot-rainier.c5
-rw-r--r--arch/powerpc/boot/cuboot-sam440ep.c5
-rw-r--r--arch/powerpc/boot/cuboot-sequoia.c5
-rw-r--r--arch/powerpc/boot/cuboot-taishan.c5
-rw-r--r--arch/powerpc/boot/cuboot-warp.c5
-rw-r--r--arch/powerpc/boot/cuboot-yosemite.c5
-rw-r--r--arch/powerpc/boot/cuboot.c5
-rw-r--r--arch/powerpc/boot/cuboot.h1
-rw-r--r--arch/powerpc/boot/dcr.h12
-rw-r--r--arch/powerpc/boot/decompress.c143
-rw-r--r--arch/powerpc/boot/devtree.c65
-rw-r--r--arch/powerpc/boot/div64.S6
-rw-r--r--arch/powerpc/boot/dts/Makefile5
-rw-r--r--arch/powerpc/boot/dts/a3m071.dts16
-rw-r--r--arch/powerpc/boot/dts/a4m072.dts12
-rw-r--r--arch/powerpc/boot/dts/ac14xx.dts30
-rw-r--r--arch/powerpc/boot/dts/acadia.dts224
-rw-r--r--arch/powerpc/boot/dts/adder875-redboot.dts8
-rw-r--r--arch/powerpc/boot/dts/adder875-uboot.dts8
-rw-r--r--arch/powerpc/boot/dts/akebono.dts24
-rw-r--r--arch/powerpc/boot/dts/amigaone.dts8
-rw-r--r--arch/powerpc/boot/dts/arches.dts16
-rw-r--r--arch/powerpc/boot/dts/asp834x-redboot.dts8
-rw-r--r--arch/powerpc/boot/dts/b4860emu.dts223
-rw-r--r--arch/powerpc/boot/dts/bamboo.dts6
-rw-r--r--arch/powerpc/boot/dts/bluestone.dts48
-rw-r--r--arch/powerpc/boot/dts/bsc9132qds.dts35
-rw-r--r--arch/powerpc/boot/dts/c2k.dts366
-rw-r--r--arch/powerpc/boot/dts/canyonlands.dts39
-rw-r--r--arch/powerpc/boot/dts/charon.dts14
-rw-r--r--arch/powerpc/boot/dts/cm5200.dts6
-rw-r--r--arch/powerpc/boot/dts/currituck.dts14
-rw-r--r--arch/powerpc/boot/dts/digsy_mtc.dts26
-rw-r--r--arch/powerpc/boot/dts/ebony.dts2
-rw-r--r--arch/powerpc/boot/dts/eiger.dts2
-rw-r--r--arch/powerpc/boot/dts/ep405.dts230
-rw-r--r--arch/powerpc/boot/dts/ep8248e.dts6
-rw-r--r--arch/powerpc/boot/dts/ep88xc.dts6
-rw-r--r--arch/powerpc/boot/dts/fsl/Makefile3
-rw-r--r--arch/powerpc/boot/dts/fsl/b4420qds.dts (renamed from arch/powerpc/boot/dts/b4420qds.dts)4
-rw-r--r--arch/powerpc/boot/dts/fsl/b4420si-post.dtsi45
-rw-r--r--arch/powerpc/boot/dts/fsl/b4420si-pre.dtsi18
-rw-r--r--arch/powerpc/boot/dts/fsl/b4860qds.dts117
-rw-r--r--arch/powerpc/boot/dts/fsl/b4860si-post.dtsi186
-rw-r--r--arch/powerpc/boot/dts/fsl/b4860si-pre.dtsi29
-rw-r--r--arch/powerpc/boot/dts/fsl/b4qds.dtsi (renamed from arch/powerpc/boot/dts/b4qds.dtsi)114
-rw-r--r--arch/powerpc/boot/dts/fsl/b4si-post.dtsi238
-rw-r--r--arch/powerpc/boot/dts/fsl/bsc9131rdb.dts (renamed from arch/powerpc/boot/dts/bsc9131rdb.dts)12
-rw-r--r--arch/powerpc/boot/dts/fsl/bsc9131rdb.dtsi (renamed from arch/powerpc/boot/dts/bsc9131rdb.dtsi)64
-rw-r--r--arch/powerpc/boot/dts/fsl/bsc9131si-post.dtsi6
-rw-r--r--arch/powerpc/boot/dts/fsl/bsc9132qds.dts46
-rw-r--r--arch/powerpc/boot/dts/fsl/bsc9132qds.dtsi (renamed from arch/powerpc/boot/dts/bsc9132qds.dtsi)14
-rw-r--r--arch/powerpc/boot/dts/fsl/bsc9132si-post.dtsi34
-rw-r--r--arch/powerpc/boot/dts/fsl/bsc9132si-pre.dtsi1
-rw-r--r--arch/powerpc/boot/dts/fsl/c293pcie.dts (renamed from arch/powerpc/boot/dts/c293pcie.dts)10
-rw-r--r--arch/powerpc/boot/dts/fsl/c293si-post.dtsi18
-rw-r--r--arch/powerpc/boot/dts/fsl/cyrus_p5020.dts151
-rw-r--r--arch/powerpc/boot/dts/fsl/e500v1_power_isa.dtsi (renamed from arch/powerpc/boot/dts/b4860qds.dts)42
-rw-r--r--arch/powerpc/boot/dts/fsl/ge_imp3a.dts (renamed from arch/powerpc/boot/dts/ge_imp3a.dts)10
-rw-r--r--arch/powerpc/boot/dts/fsl/gef_ppc9a.dts216
-rw-r--r--arch/powerpc/boot/dts/fsl/gef_sbc310.dts234
-rw-r--r--arch/powerpc/boot/dts/fsl/gef_sbc610.dts214
-rw-r--r--arch/powerpc/boot/dts/fsl/kmcent2.dts339
-rw-r--r--arch/powerpc/boot/dts/fsl/kmcoge4.dts (renamed from arch/powerpc/boot/dts/kmcoge4.dts)82
-rw-r--r--arch/powerpc/boot/dts/fsl/mpc8536ds.dts (renamed from arch/powerpc/boot/dts/mpc8536ds.dts)10
-rw-r--r--arch/powerpc/boot/dts/fsl/mpc8536ds.dtsi (renamed from arch/powerpc/boot/dts/mpc8536ds.dtsi)8
-rw-r--r--arch/powerpc/boot/dts/fsl/mpc8536ds_36b.dts (renamed from arch/powerpc/boot/dts/mpc8536ds_36b.dts)10
-rw-r--r--arch/powerpc/boot/dts/fsl/mpc8536si-post.dtsi16
-rw-r--r--arch/powerpc/boot/dts/fsl/mpc8544ds.dts (renamed from arch/powerpc/boot/dts/mpc8544ds.dts)10
-rw-r--r--arch/powerpc/boot/dts/fsl/mpc8544ds.dtsi (renamed from arch/powerpc/boot/dts/mpc8544ds.dtsi)0
-rw-r--r--arch/powerpc/boot/dts/fsl/mpc8544si-post.dtsi2
-rw-r--r--arch/powerpc/boot/dts/fsl/mpc8548si-post.dtsi2
-rw-r--r--arch/powerpc/boot/dts/fsl/mpc8568mds.dts (renamed from arch/powerpc/boot/dts/mpc8568mds.dts)22
-rw-r--r--arch/powerpc/boot/dts/fsl/mpc8569mds.dts (renamed from arch/powerpc/boot/dts/mpc8569mds.dts)34
-rw-r--r--arch/powerpc/boot/dts/fsl/mpc8572ds.dts (renamed from arch/powerpc/boot/dts/mpc8572ds.dts)10
-rw-r--r--arch/powerpc/boot/dts/fsl/mpc8572ds.dtsi (renamed from arch/powerpc/boot/dts/mpc8572ds.dtsi)0
-rw-r--r--arch/powerpc/boot/dts/fsl/mpc8572ds_36b.dts (renamed from arch/powerpc/boot/dts/mpc8572ds_36b.dts)10
-rw-r--r--arch/powerpc/boot/dts/fsl/mpc8572ds_camp_core0.dts (renamed from arch/powerpc/boot/dts/mpc8572ds_camp_core0.dts)6
-rw-r--r--arch/powerpc/boot/dts/fsl/mpc8572ds_camp_core1.dts (renamed from arch/powerpc/boot/dts/mpc8572ds_camp_core1.dts)6
-rw-r--r--arch/powerpc/boot/dts/fsl/mpc8572si-post.dtsi4
-rw-r--r--arch/powerpc/boot/dts/fsl/mpc8641si-post.dtsi144
-rw-r--r--arch/powerpc/boot/dts/fsl/mpc8641si-pre.dtsi54
-rw-r--r--arch/powerpc/boot/dts/fsl/mvme2500.dts276
-rw-r--r--arch/powerpc/boot/dts/fsl/mvme7100.dts148
-rw-r--r--arch/powerpc/boot/dts/fsl/oca4080.dts (renamed from arch/powerpc/boot/dts/oca4080.dts)31
-rw-r--r--arch/powerpc/boot/dts/fsl/p1010rdb-pa.dts19
-rw-r--r--arch/powerpc/boot/dts/fsl/p1010rdb-pa.dtsi (renamed from arch/powerpc/boot/dts/p1010rdb-pa.dtsi)0
-rw-r--r--arch/powerpc/boot/dts/fsl/p1010rdb-pa_36b.dts (renamed from arch/powerpc/boot/dts/p1010rdb-pa_36b.dts)4
-rw-r--r--arch/powerpc/boot/dts/fsl/p1010rdb-pb.dts47
-rw-r--r--arch/powerpc/boot/dts/fsl/p1010rdb-pb_36b.dts (renamed from arch/powerpc/boot/dts/p1010rdb-pb_36b.dts)20
-rw-r--r--arch/powerpc/boot/dts/fsl/p1010rdb.dtsi (renamed from arch/powerpc/boot/dts/p1010rdb.dtsi)16
-rw-r--r--arch/powerpc/boot/dts/fsl/p1010rdb_32b.dtsi (renamed from arch/powerpc/boot/dts/p1010rdb_32b.dtsi)2
-rw-r--r--arch/powerpc/boot/dts/fsl/p1010rdb_36b.dtsi (renamed from arch/powerpc/boot/dts/p1010rdb_36b.dtsi)2
-rw-r--r--arch/powerpc/boot/dts/fsl/p1010si-post.dtsi35
-rw-r--r--arch/powerpc/boot/dts/fsl/p1020mbg-pc.dtsi (renamed from arch/powerpc/boot/dts/p1020mbg-pc.dtsi)0
-rw-r--r--arch/powerpc/boot/dts/fsl/p1020mbg-pc_32b.dts (renamed from arch/powerpc/boot/dts/p1020mbg-pc_32b.dts)4
-rw-r--r--arch/powerpc/boot/dts/fsl/p1020mbg-pc_36b.dts (renamed from arch/powerpc/boot/dts/p1020mbg-pc_36b.dts)4
-rw-r--r--arch/powerpc/boot/dts/fsl/p1020rdb-pc.dtsi (renamed from arch/powerpc/boot/dts/p1020rdb-pc.dtsi)6
-rw-r--r--arch/powerpc/boot/dts/fsl/p1020rdb-pc_32b.dts (renamed from arch/powerpc/boot/dts/p1020rdb-pc_32b.dts)4
-rw-r--r--arch/powerpc/boot/dts/fsl/p1020rdb-pc_36b.dts (renamed from arch/powerpc/boot/dts/p1020rdb-pc_36b.dts)4
-rw-r--r--arch/powerpc/boot/dts/fsl/p1020rdb-pc_camp_core0.dts (renamed from arch/powerpc/boot/dts/p1020rdb-pc_camp_core0.dts)6
-rw-r--r--arch/powerpc/boot/dts/fsl/p1020rdb-pc_camp_core1.dts (renamed from arch/powerpc/boot/dts/p1020rdb-pc_camp_core1.dts)6
-rw-r--r--arch/powerpc/boot/dts/fsl/p1020rdb-pd.dts (renamed from arch/powerpc/boot/dts/p1020rdb-pd.dts)18
-rw-r--r--arch/powerpc/boot/dts/fsl/p1020rdb.dts (renamed from arch/powerpc/boot/dts/p1020rdb.dts)10
-rw-r--r--arch/powerpc/boot/dts/fsl/p1020rdb.dtsi (renamed from arch/powerpc/boot/dts/p1020rdb.dtsi)2
-rw-r--r--arch/powerpc/boot/dts/fsl/p1020rdb_36b.dts (renamed from arch/powerpc/boot/dts/p1020rdb_36b.dts)10
-rw-r--r--arch/powerpc/boot/dts/fsl/p1020si-post.dtsi5
-rw-r--r--arch/powerpc/boot/dts/fsl/p1020utm-pc.dtsi (renamed from arch/powerpc/boot/dts/p1020utm-pc.dtsi)0
-rw-r--r--arch/powerpc/boot/dts/fsl/p1020utm-pc_32b.dts (renamed from arch/powerpc/boot/dts/p1020utm-pc_32b.dts)4
-rw-r--r--arch/powerpc/boot/dts/fsl/p1020utm-pc_36b.dts (renamed from arch/powerpc/boot/dts/p1020utm-pc_36b.dts)4
-rw-r--r--arch/powerpc/boot/dts/fsl/p1021mds.dts (renamed from arch/powerpc/boot/dts/p1021mds.dts)18
-rw-r--r--arch/powerpc/boot/dts/fsl/p1021rdb-pc.dtsi (renamed from arch/powerpc/boot/dts/p1021rdb-pc.dtsi)14
-rw-r--r--arch/powerpc/boot/dts/fsl/p1021rdb-pc_32b.dts (renamed from arch/powerpc/boot/dts/p1021rdb-pc_32b.dts)4
-rw-r--r--arch/powerpc/boot/dts/fsl/p1021rdb-pc_36b.dts (renamed from arch/powerpc/boot/dts/p1021rdb-pc_36b.dts)4
-rw-r--r--arch/powerpc/boot/dts/fsl/p1021si-post.dtsi5
-rw-r--r--arch/powerpc/boot/dts/fsl/p1022ds.dtsi (renamed from arch/powerpc/boot/dts/p1022ds.dtsi)14
-rw-r--r--arch/powerpc/boot/dts/fsl/p1022ds_32b.dts (renamed from arch/powerpc/boot/dts/p1022ds_32b.dts)4
-rw-r--r--arch/powerpc/boot/dts/fsl/p1022ds_36b.dts (renamed from arch/powerpc/boot/dts/p1022ds_36b.dts)4
-rw-r--r--arch/powerpc/boot/dts/fsl/p1022rdk.dts (renamed from arch/powerpc/boot/dts/p1022rdk.dts)18
-rw-r--r--arch/powerpc/boot/dts/fsl/p1022si-post.dtsi13
-rw-r--r--arch/powerpc/boot/dts/fsl/p1022si-pre.dtsi2
-rw-r--r--arch/powerpc/boot/dts/fsl/p1023rdb.dts (renamed from arch/powerpc/boot/dts/p1023rdb.dts)36
-rw-r--r--arch/powerpc/boot/dts/fsl/p1023si-post.dtsi80
-rw-r--r--arch/powerpc/boot/dts/fsl/p1024rdb.dtsi (renamed from arch/powerpc/boot/dts/p1024rdb.dtsi)2
-rw-r--r--arch/powerpc/boot/dts/fsl/p1024rdb_32b.dts (renamed from arch/powerpc/boot/dts/p1024rdb_32b.dts)4
-rw-r--r--arch/powerpc/boot/dts/fsl/p1024rdb_36b.dts (renamed from arch/powerpc/boot/dts/p1024rdb_36b.dts)4
-rw-r--r--arch/powerpc/boot/dts/fsl/p1025rdb.dtsi (renamed from arch/powerpc/boot/dts/p1025rdb.dtsi)10
-rw-r--r--arch/powerpc/boot/dts/fsl/p1025rdb_32b.dts (renamed from arch/powerpc/boot/dts/p1025rdb_32b.dts)6
-rw-r--r--arch/powerpc/boot/dts/fsl/p1025rdb_36b.dts (renamed from arch/powerpc/boot/dts/p1025rdb_36b.dts)4
-rw-r--r--arch/powerpc/boot/dts/fsl/p1025twr.dts (renamed from arch/powerpc/boot/dts/p1025twr.dts)4
-rw-r--r--arch/powerpc/boot/dts/fsl/p1025twr.dtsi (renamed from arch/powerpc/boot/dts/p1025twr.dtsi)20
-rw-r--r--arch/powerpc/boot/dts/fsl/p2020ds.dts (renamed from arch/powerpc/boot/dts/p2020ds.dts)10
-rw-r--r--arch/powerpc/boot/dts/fsl/p2020ds.dtsi (renamed from arch/powerpc/boot/dts/p2020ds.dtsi)0
-rw-r--r--arch/powerpc/boot/dts/fsl/p2020rdb-pc.dtsi (renamed from arch/powerpc/boot/dts/p2020rdb-pc.dtsi)14
-rw-r--r--arch/powerpc/boot/dts/fsl/p2020rdb-pc_32b.dts (renamed from arch/powerpc/boot/dts/p2020rdb-pc_32b.dts)4
-rw-r--r--arch/powerpc/boot/dts/fsl/p2020rdb-pc_36b.dts (renamed from arch/powerpc/boot/dts/p2020rdb-pc_36b.dts)4
-rw-r--r--arch/powerpc/boot/dts/fsl/p2020rdb.dts (renamed from arch/powerpc/boot/dts/p2020rdb.dts)12
-rw-r--r--arch/powerpc/boot/dts/fsl/p2020si-post.dtsi17
-rw-r--r--arch/powerpc/boot/dts/fsl/p2041rdb.dts (renamed from arch/powerpc/boot/dts/p2041rdb.dts)129
-rw-r--r--arch/powerpc/boot/dts/fsl/p2041si-post.dtsi134
-rw-r--r--arch/powerpc/boot/dts/fsl/p2041si-pre.dtsi18
-rw-r--r--arch/powerpc/boot/dts/fsl/p3041ds.dts (renamed from arch/powerpc/boot/dts/p3041ds.dts)169
-rw-r--r--arch/powerpc/boot/dts/fsl/p3041si-post.dtsi118
-rw-r--r--arch/powerpc/boot/dts/fsl/p3041si-pre.dtsi18
-rw-r--r--arch/powerpc/boot/dts/fsl/p4080ds.dts (renamed from arch/powerpc/boot/dts/p4080ds.dts)260
-rw-r--r--arch/powerpc/boot/dts/fsl/p4080si-post.dtsi189
-rw-r--r--arch/powerpc/boot/dts/fsl/p4080si-pre.dtsi31
-rw-r--r--arch/powerpc/boot/dts/fsl/p5020ds.dts (renamed from arch/powerpc/boot/dts/p5020ds.dts)169
-rw-r--r--arch/powerpc/boot/dts/fsl/p5020si-post.dtsi100
-rw-r--r--arch/powerpc/boot/dts/fsl/p5020si-pre.dtsi14
-rw-r--r--arch/powerpc/boot/dts/fsl/p5040ds.dts486
-rw-r--r--arch/powerpc/boot/dts/fsl/p5040si-post.dtsi146
-rw-r--r--arch/powerpc/boot/dts/fsl/p5040si-pre.dtsi31
-rw-r--r--arch/powerpc/boot/dts/fsl/ppa8548.dts (renamed from arch/powerpc/boot/dts/ppa8548.dts)10
-rw-r--r--arch/powerpc/boot/dts/fsl/pq3-etsec2-0.dtsi1
-rw-r--r--arch/powerpc/boot/dts/fsl/pq3-etsec2-1.dtsi1
-rw-r--r--arch/powerpc/boot/dts/fsl/pq3-etsec2-2.dtsi1
-rw-r--r--arch/powerpc/boot/dts/fsl/pq3-gpio-0.dtsi6
-rw-r--r--arch/powerpc/boot/dts/fsl/pq3-power.dtsi19
-rw-r--r--arch/powerpc/boot/dts/fsl/qonverge-usb2-dr-0.dtsi2
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-bman1-portals.dtsi90
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-bman1.dtsi (renamed from arch/powerpc/boot/dts/t2080qds.dts)34
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-clockgen1.dtsi (renamed from arch/powerpc/boot/dts/t2080rdb.dts)30
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-clockgen2.dtsi39
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman-0-10g-0.dtsi62
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman-0-1g-0.dtsi69
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman-0-1g-1.dtsi68
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman-0-1g-2.dtsi68
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman-0-1g-3.dtsi68
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman-0-1g-4.dtsi68
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman-0.dtsi104
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman-1-10g-0.dtsi61
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman-1-1g-0.dtsi68
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman-1-1g-1.dtsi68
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman-1-1g-2.dtsi68
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman-1-1g-3.dtsi68
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman-1-1g-4.dtsi68
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman-1.dtsi104
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi73
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi77
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi80
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi77
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-2.dtsi45
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-3.dtsi45
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi69
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi76
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi76
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi76
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi69
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi76
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman3-0.dtsi109
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi77
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi77
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi69
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi76
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi76
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi76
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi69
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi76
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman3-1.dtsi109
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi99
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-qman1-portals.dtsi101
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-qman1.dtsi41
-rw-r--r--arch/powerpc/boot/dts/fsl/qoriq-qman3.dtsi41
-rw-r--r--arch/powerpc/boot/dts/fsl/t1023rdb.dts (renamed from arch/powerpc/boot/dts/t4240rdb.dts)170
-rw-r--r--arch/powerpc/boot/dts/fsl/t1023si-post.dtsi523
-rw-r--r--arch/powerpc/boot/dts/fsl/t1024qds.dts (renamed from arch/powerpc/boot/dts/t4240qds.dts)243
-rw-r--r--arch/powerpc/boot/dts/fsl/t1024rdb.dts268
-rw-r--r--arch/powerpc/boot/dts/fsl/t1024si-post.dtsi100
-rw-r--r--arch/powerpc/boot/dts/fsl/t102xsi-pre.dtsi95
-rw-r--r--arch/powerpc/boot/dts/fsl/t1040d4rdb.dts (renamed from arch/powerpc/boot/dts/t2081qds.dts)14
-rw-r--r--arch/powerpc/boot/dts/fsl/t1040qds.dts (renamed from arch/powerpc/boot/dts/t1040qds.dts)4
-rw-r--r--arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts29
-rw-r--r--arch/powerpc/boot/dts/fsl/t1040rdb.dts188
-rw-r--r--arch/powerpc/boot/dts/fsl/t1040si-post.dtsi481
-rw-r--r--arch/powerpc/boot/dts/fsl/t1042d4rdb.dts105
-rw-r--r--arch/powerpc/boot/dts/fsl/t1042qds.dts (renamed from arch/powerpc/boot/dts/t1042qds.dts)4
-rw-r--r--arch/powerpc/boot/dts/fsl/t1042rdb.dts76
-rw-r--r--arch/powerpc/boot/dts/fsl/t1042rdb_pi.dts73
-rw-r--r--arch/powerpc/boot/dts/fsl/t1042si-post.dtsi2
-rw-r--r--arch/powerpc/boot/dts/fsl/t104xd4rdb.dtsi253
-rw-r--r--arch/powerpc/boot/dts/fsl/t104xqds.dtsi407
-rw-r--r--arch/powerpc/boot/dts/fsl/t104xrdb.dtsi (renamed from arch/powerpc/boot/dts/t104xqds.dtsi)133
-rw-r--r--arch/powerpc/boot/dts/fsl/t104xsi-pre.dtsi21
-rw-r--r--arch/powerpc/boot/dts/fsl/t2080qds.dts213
-rw-r--r--arch/powerpc/boot/dts/fsl/t2080rdb.dts122
-rw-r--r--arch/powerpc/boot/dts/fsl/t2081qds.dts265
-rw-r--r--arch/powerpc/boot/dts/fsl/t2081si-post.dtsi348
-rw-r--r--arch/powerpc/boot/dts/fsl/t208xqds.dtsi (renamed from arch/powerpc/boot/dts/t208xqds.dtsi)58
-rw-r--r--arch/powerpc/boot/dts/fsl/t208xrdb.dtsi (renamed from arch/powerpc/boot/dts/t208xrdb.dtsi)31
-rw-r--r--arch/powerpc/boot/dts/fsl/t208xsi-pre.dtsi19
-rw-r--r--arch/powerpc/boot/dts/fsl/t4240qds.dts708
-rw-r--r--arch/powerpc/boot/dts/fsl/t4240rdb.dts363
-rw-r--r--arch/powerpc/boot/dts/fsl/t4240si-post.dtsi765
-rw-r--r--arch/powerpc/boot/dts/fsl/t4240si-pre.dtsi48
-rw-r--r--arch/powerpc/boot/dts/fsp2.dts613
-rw-r--r--arch/powerpc/boot/dts/gamecube.dts21
-rw-r--r--arch/powerpc/boot/dts/gef_ppc9a.dts425
-rw-r--r--arch/powerpc/boot/dts/gef_sbc310.dts459
-rw-r--r--arch/powerpc/boot/dts/gef_sbc610.dts423
-rw-r--r--arch/powerpc/boot/dts/glacier.dts6
-rw-r--r--arch/powerpc/boot/dts/haleakala.dts281
-rw-r--r--arch/powerpc/boot/dts/holly.dts2
-rw-r--r--arch/powerpc/boot/dts/hotfoot.dts296
-rw-r--r--arch/powerpc/boot/dts/icon.dts15
l---------arch/powerpc/boot/dts/include/dt-bindings1
-rw-r--r--arch/powerpc/boot/dts/iss4xx-mpic.dts4
-rw-r--r--arch/powerpc/boot/dts/iss4xx.dts2
-rw-r--r--arch/powerpc/boot/dts/katmai.dts26
-rw-r--r--arch/powerpc/boot/dts/kilauea.dts435
-rw-r--r--arch/powerpc/boot/dts/klondike.dts227
-rw-r--r--arch/powerpc/boot/dts/kmeter1.dts16
-rw-r--r--arch/powerpc/boot/dts/ksi8560.dts4
-rw-r--r--arch/powerpc/boot/dts/lite5200.dts14
-rw-r--r--arch/powerpc/boot/dts/lite5200b.dts14
-rw-r--r--arch/powerpc/boot/dts/makalu.dts353
-rw-r--r--arch/powerpc/boot/dts/media5200.dts16
-rw-r--r--arch/powerpc/boot/dts/mgcoge.dts15
-rw-r--r--arch/powerpc/boot/dts/microwatt.dts255
-rw-r--r--arch/powerpc/boot/dts/motionpro.dts6
-rw-r--r--arch/powerpc/boot/dts/mpc5121.dtsi19
-rw-r--r--arch/powerpc/boot/dts/mpc5121ads.dts12
-rw-r--r--arch/powerpc/boot/dts/mpc5125twr.dts19
-rw-r--r--arch/powerpc/boot/dts/mpc5200b.dtsi12
-rw-r--r--arch/powerpc/boot/dts/mpc7448hpc2.dts196
-rw-r--r--arch/powerpc/boot/dts/mpc8272ads.dts267
-rw-r--r--arch/powerpc/boot/dts/mpc8308_p1m.dts8
-rw-r--r--arch/powerpc/boot/dts/mpc8308rdb.dts6
-rw-r--r--arch/powerpc/boot/dts/mpc8313erdb.dts6
-rw-r--r--arch/powerpc/boot/dts/mpc8315erdb.dts18
-rw-r--r--arch/powerpc/boot/dts/mpc832x_mds.dts439
-rw-r--r--arch/powerpc/boot/dts/mpc832x_rdb.dts20
-rw-r--r--arch/powerpc/boot/dts/mpc8349emitx.dts10
-rw-r--r--arch/powerpc/boot/dts/mpc8349emitxgp.dts6
-rw-r--r--arch/powerpc/boot/dts/mpc834x_mds.dts407
-rw-r--r--arch/powerpc/boot/dts/mpc836x_mds.dts485
-rw-r--r--arch/powerpc/boot/dts/mpc836x_rdk.dts8
-rw-r--r--arch/powerpc/boot/dts/mpc8377_mds.dts509
-rw-r--r--arch/powerpc/boot/dts/mpc8377_rdb.dts10
-rw-r--r--arch/powerpc/boot/dts/mpc8377_wlan.dts8
-rw-r--r--arch/powerpc/boot/dts/mpc8378_mds.dts493
-rw-r--r--arch/powerpc/boot/dts/mpc8378_rdb.dts10
-rw-r--r--arch/powerpc/boot/dts/mpc8379_mds.dts459
-rw-r--r--arch/powerpc/boot/dts/mpc8379_rdb.dts10
-rw-r--r--arch/powerpc/boot/dts/mpc8540ads.dts359
-rw-r--r--arch/powerpc/boot/dts/mpc8541cds.dts379
-rw-r--r--arch/powerpc/boot/dts/mpc8548cds.dtsi302
-rw-r--r--arch/powerpc/boot/dts/mpc8548cds_32b.dts86
-rw-r--r--arch/powerpc/boot/dts/mpc8548cds_36b.dts86
-rw-r--r--arch/powerpc/boot/dts/mpc8555cds.dts379
-rw-r--r--arch/powerpc/boot/dts/mpc8560ads.dts392
-rw-r--r--arch/powerpc/boot/dts/mpc8610_hpcd.dts506
-rw-r--r--arch/powerpc/boot/dts/mpc8641_hpcn.dts663
-rw-r--r--arch/powerpc/boot/dts/mpc8641_hpcn_36b.dts605
-rw-r--r--arch/powerpc/boot/dts/mpc866ads.dts8
-rw-r--r--arch/powerpc/boot/dts/mpc885ads.dts21
-rw-r--r--arch/powerpc/boot/dts/mucmc52.dts12
-rw-r--r--arch/powerpc/boot/dts/mvme5100.dts2
-rw-r--r--arch/powerpc/boot/dts/o2d.dts8
-rw-r--r--arch/powerpc/boot/dts/o2d.dtsi14
-rw-r--r--arch/powerpc/boot/dts/o2d300.dts6
-rw-r--r--arch/powerpc/boot/dts/o2dnt2.dts8
-rw-r--r--arch/powerpc/boot/dts/o2i.dts6
-rw-r--r--arch/powerpc/boot/dts/o2mnt.dts6
-rw-r--r--arch/powerpc/boot/dts/o3dnt.dts8
-rw-r--r--arch/powerpc/boot/dts/obs600.dts314
-rw-r--r--arch/powerpc/boot/dts/p1010rdb-pa.dts23
-rw-r--r--arch/powerpc/boot/dts/p1010rdb-pb.dts35
-rw-r--r--arch/powerpc/boot/dts/p5040ds.dts207
-rw-r--r--arch/powerpc/boot/dts/pcm030.dts14
-rw-r--r--arch/powerpc/boot/dts/pcm032.dts20
-rw-r--r--arch/powerpc/boot/dts/pdm360ng.dts12
-rw-r--r--arch/powerpc/boot/dts/pq2fads.dts247
-rw-r--r--arch/powerpc/boot/dts/prpmc2800.dts297
-rw-r--r--arch/powerpc/boot/dts/ps3.dts14
-rw-r--r--arch/powerpc/boot/dts/rainier.dts2
-rw-r--r--arch/powerpc/boot/dts/redwood.dts27
-rw-r--r--arch/powerpc/boot/dts/sam440ep.dts4
-rw-r--r--arch/powerpc/boot/dts/sbc8349.dts331
-rw-r--r--arch/powerpc/boot/dts/sbc8548-altflash.dts115
-rw-r--r--arch/powerpc/boot/dts/sbc8548-post.dtsi293
-rw-r--r--arch/powerpc/boot/dts/sbc8548-pre.dtsi52
-rw-r--r--arch/powerpc/boot/dts/sbc8548.dts110
-rw-r--r--arch/powerpc/boot/dts/sbc8641d.dts455
-rw-r--r--arch/powerpc/boot/dts/sequoia.dts4
-rw-r--r--arch/powerpc/boot/dts/socrates.dts6
-rw-r--r--arch/powerpc/boot/dts/storcenter.dts2
-rw-r--r--arch/powerpc/boot/dts/stx_gp3_8560.dts8
-rw-r--r--arch/powerpc/boot/dts/stxssa8555.dts8
-rw-r--r--arch/powerpc/boot/dts/t4240emu.dts281
-rw-r--r--arch/powerpc/boot/dts/taishan.dts2
-rw-r--r--arch/powerpc/boot/dts/tqm5200.dts14
-rw-r--r--arch/powerpc/boot/dts/tqm8540.dts8
-rw-r--r--arch/powerpc/boot/dts/tqm8541.dts8
-rw-r--r--arch/powerpc/boot/dts/tqm8548-bigflash.dts6
-rw-r--r--arch/powerpc/boot/dts/tqm8548.dts6
-rw-r--r--arch/powerpc/boot/dts/tqm8555.dts8
-rw-r--r--arch/powerpc/boot/dts/tqm8560.dts8
-rw-r--r--arch/powerpc/boot/dts/tqm8xx.dts6
-rw-r--r--arch/powerpc/boot/dts/turris1x.dts520
-rw-r--r--arch/powerpc/boot/dts/uc101.dts6
-rw-r--r--arch/powerpc/boot/dts/virtex440-ml507.dts406
-rw-r--r--arch/powerpc/boot/dts/virtex440-ml510.dts465
-rw-r--r--arch/powerpc/boot/dts/walnut.dts246
-rw-r--r--arch/powerpc/boot/dts/warp.dts8
-rw-r--r--arch/powerpc/boot/dts/wii.dts100
-rw-r--r--arch/powerpc/boot/dts/xcalibur1501.dts7
-rw-r--r--arch/powerpc/boot/dts/xpedite5200.dts9
-rw-r--r--arch/powerpc/boot/dts/xpedite5200_xmon.dts11
-rw-r--r--arch/powerpc/boot/dts/xpedite5301.dts7
-rw-r--r--arch/powerpc/boot/dts/xpedite5330.dts7
-rw-r--r--arch/powerpc/boot/dts/xpedite5370.dts7
-rw-r--r--arch/powerpc/boot/dts/yosemite.dts2
-rw-r--r--arch/powerpc/boot/dummy.c4
-rw-r--r--arch/powerpc/boot/ebony.c6
-rw-r--r--arch/powerpc/boot/elf.h1
-rw-r--r--arch/powerpc/boot/elf_util.c6
-rw-r--r--arch/powerpc/boot/ep405.c74
-rw-r--r--arch/powerpc/boot/ep8248e.c5
-rw-r--r--arch/powerpc/boot/ep88xc.c5
-rw-r--r--arch/powerpc/boot/epapr-wrapper.c1
-rw-r--r--arch/powerpc/boot/epapr.c5
-rw-r--r--arch/powerpc/boot/fixed-head.S1
-rw-r--r--arch/powerpc/boot/fixup-headers.sed12
-rw-r--r--arch/powerpc/boot/fsl-soc.c5
-rw-r--r--arch/powerpc/boot/fsl-soc.h1
-rw-r--r--arch/powerpc/boot/gamecube-head.S7
-rw-r--r--arch/powerpc/boot/gamecube.c7
-rw-r--r--arch/powerpc/boot/gunzip_util.c204
-rw-r--r--arch/powerpc/boot/gunzip_util.h45
-rw-r--r--arch/powerpc/boot/hack-coff.c6
-rw-r--r--arch/powerpc/boot/holly.c5
-rwxr-xr-x[-rw-r--r--]arch/powerpc/boot/install.sh38
-rw-r--r--arch/powerpc/boot/io.h1
-rw-r--r--arch/powerpc/boot/libfdt-wrapper.c22
-rw-r--r--arch/powerpc/boot/libfdt_env.h25
-rw-r--r--arch/powerpc/boot/main.c125
-rw-r--r--arch/powerpc/boot/microwatt.c24
-rw-r--r--arch/powerpc/boot/mktree.c1
-rw-r--r--arch/powerpc/boot/motload-head.S12
-rw-r--r--arch/powerpc/boot/mpc52xx-psc.c1
-rw-r--r--arch/powerpc/boot/mpc8xx.c8
-rw-r--r--arch/powerpc/boot/mpc8xx.h1
-rw-r--r--arch/powerpc/boot/mpsc.c169
-rw-r--r--arch/powerpc/boot/mv64x60.c581
-rw-r--r--arch/powerpc/boot/mv64x60.h70
-rw-r--r--arch/powerpc/boot/mv64x60_i2c.c204
-rw-r--r--arch/powerpc/boot/mvme5100.c6
-rw-r--r--arch/powerpc/boot/mvme7100.c54
-rw-r--r--arch/powerpc/boot/ns16550.c10
-rw-r--r--arch/powerpc/boot/of.c6
-rw-r--r--arch/powerpc/boot/of.h11
-rw-r--r--arch/powerpc/boot/ofconsole.c6
-rw-r--r--arch/powerpc/boot/oflib.c6
-rw-r--r--arch/powerpc/boot/opal-calls.S67
-rw-r--r--arch/powerpc/boot/opal.c97
-rw-r--r--arch/powerpc/boot/ops.h23
-rw-r--r--arch/powerpc/boot/page.h12
-rw-r--r--arch/powerpc/boot/planetcore.c38
-rw-r--r--arch/powerpc/boot/planetcore.h4
-rw-r--r--arch/powerpc/boot/ppc_asm.h40
-rw-r--r--arch/powerpc/boot/ppcboot-hotfoot.h133
-rw-r--r--arch/powerpc/boot/ppcboot.h20
-rw-r--r--arch/powerpc/boot/pq2.c5
-rw-r--r--arch/powerpc/boot/pq2.h1
-rw-r--r--arch/powerpc/boot/prpmc2800.c571
-rw-r--r--arch/powerpc/boot/ps3-head.S19
-rw-r--r--arch/powerpc/boot/ps3-hvcall.S14
-rw-r--r--arch/powerpc/boot/ps3.c33
-rw-r--r--arch/powerpc/boot/pseries-head.S1
-rw-r--r--arch/powerpc/boot/redboot-83xx.c5
-rw-r--r--arch/powerpc/boot/redboot-8xx.c5
-rw-r--r--arch/powerpc/boot/redboot.h1
-rw-r--r--arch/powerpc/boot/reg.h6
-rw-r--r--arch/powerpc/boot/rs6000.h9
-rw-r--r--arch/powerpc/boot/serial.c28
-rw-r--r--arch/powerpc/boot/simple_alloc.c13
-rw-r--r--arch/powerpc/boot/simpleboot.c7
-rw-r--r--arch/powerpc/boot/stdbool.h9
-rw-r--r--arch/powerpc/boot/stdint.h9
-rw-r--r--arch/powerpc/boot/stdio.c16
-rw-r--r--arch/powerpc/boot/stdio.h1
-rw-r--r--arch/powerpc/boot/stdlib.c5
-rw-r--r--arch/powerpc/boot/stdlib.h1
-rw-r--r--arch/powerpc/boot/string.S6
-rw-r--r--arch/powerpc/boot/string.h2
-rw-r--r--arch/powerpc/boot/swab.h1
-rw-r--r--arch/powerpc/boot/treeboot-akebono.c8
-rw-r--r--arch/powerpc/boot/treeboot-bamboo.c5
-rw-r--r--arch/powerpc/boot/treeboot-currituck.c8
-rw-r--r--arch/powerpc/boot/treeboot-ebony.c5
-rw-r--r--arch/powerpc/boot/treeboot-iss4xx.c8
-rw-r--r--arch/powerpc/boot/treeboot-walnut.c84
-rw-r--r--arch/powerpc/boot/types.h25
-rw-r--r--arch/powerpc/boot/uartlite.c79
-rw-r--r--arch/powerpc/boot/ugecon.c7
-rw-r--r--arch/powerpc/boot/ugecon.h7
-rw-r--r--arch/powerpc/boot/util.S55
-rw-r--r--arch/powerpc/boot/virtex.c100
-rw-r--r--arch/powerpc/boot/virtex405-head.S30
-rw-r--r--arch/powerpc/boot/wii-head.S7
-rw-r--r--arch/powerpc/boot/wii.c7
-rwxr-xr-xarch/powerpc/boot/wrapper238
-rw-r--r--arch/powerpc/boot/xz_config.h57
-rw-r--r--arch/powerpc/boot/zImage.coff.lds.S1
-rw-r--r--arch/powerpc/boot/zImage.lds.S20
-rw-r--r--arch/powerpc/boot/zImage.ps3.lds.S3
-rw-r--r--arch/powerpc/configs/32-bit.config1
-rw-r--r--arch/powerpc/configs/40x/acadia_defconfig87
-rw-r--r--arch/powerpc/configs/40x/ep405_defconfig84
-rw-r--r--arch/powerpc/configs/40x/kilauea_defconfig97
-rw-r--r--arch/powerpc/configs/40x/klondike_defconfig55
-rw-r--r--arch/powerpc/configs/40x/makalu_defconfig84
-rw-r--r--arch/powerpc/configs/40x/obs600_defconfig83
-rw-r--r--arch/powerpc/configs/40x/virtex_defconfig91
-rw-r--r--arch/powerpc/configs/40x/walnut_defconfig77
-rw-r--r--arch/powerpc/configs/44x.config2
-rw-r--r--arch/powerpc/configs/44x/akebono_defconfig34
-rw-r--r--arch/powerpc/configs/44x/arches_defconfig30
-rw-r--r--arch/powerpc/configs/44x/bamboo_defconfig23
-rw-r--r--arch/powerpc/configs/44x/bluestone_defconfig21
-rw-r--r--arch/powerpc/configs/44x/canyonlands_defconfig35
-rw-r--r--arch/powerpc/configs/44x/currituck_defconfig28
-rw-r--r--arch/powerpc/configs/44x/ebony_defconfig23
-rw-r--r--arch/powerpc/configs/44x/eiger_defconfig40
-rw-r--r--arch/powerpc/configs/44x/fsp2_defconfig121
-rw-r--r--arch/powerpc/configs/44x/icon_defconfig34
-rw-r--r--arch/powerpc/configs/44x/iss476-smp_defconfig30
-rw-r--r--arch/powerpc/configs/44x/katmai_defconfig23
-rw-r--r--arch/powerpc/configs/44x/rainier_defconfig23
-rw-r--r--arch/powerpc/configs/44x/redwood_defconfig38
-rw-r--r--arch/powerpc/configs/44x/sam440ep_defconfig37
-rw-r--r--arch/powerpc/configs/44x/sequoia_defconfig31
-rw-r--r--arch/powerpc/configs/44x/taishan_defconfig24
-rw-r--r--arch/powerpc/configs/44x/virtex5_defconfig90
-rw-r--r--arch/powerpc/configs/44x/warp_defconfig36
-rw-r--r--arch/powerpc/configs/52xx/cm5200_defconfig25
-rw-r--r--arch/powerpc/configs/52xx/lite5200b_defconfig28
-rw-r--r--arch/powerpc/configs/52xx/motionpro_defconfig42
-rw-r--r--arch/powerpc/configs/52xx/pcm030_defconfig32
-rw-r--r--arch/powerpc/configs/52xx/tqm5200_defconfig29
-rw-r--r--arch/powerpc/configs/64-bit.config1
-rw-r--r--arch/powerpc/configs/83xx/asp8347_defconfig29
-rw-r--r--arch/powerpc/configs/83xx/kmeter1_defconfig12
-rw-r--r--arch/powerpc/configs/83xx/mpc8313_rdb_defconfig35
-rw-r--r--arch/powerpc/configs/83xx/mpc8315_rdb_defconfig31
-rw-r--r--arch/powerpc/configs/83xx/mpc832x_mds_defconfig75
-rw-r--r--arch/powerpc/configs/83xx/mpc832x_rdb_defconfig35
-rw-r--r--arch/powerpc/configs/83xx/mpc834x_itx_defconfig29
-rw-r--r--arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig27
-rw-r--r--arch/powerpc/configs/83xx/mpc834x_mds_defconfig74
-rw-r--r--arch/powerpc/configs/83xx/mpc836x_mds_defconfig81
-rw-r--r--arch/powerpc/configs/83xx/mpc836x_rdk_defconfig26
-rw-r--r--arch/powerpc/configs/83xx/mpc837x_mds_defconfig74
-rw-r--r--arch/powerpc/configs/83xx/mpc837x_rdb_defconfig33
-rw-r--r--arch/powerpc/configs/83xx/sbc834x_defconfig93
-rw-r--r--arch/powerpc/configs/85xx-32bit.config6
-rw-r--r--arch/powerpc/configs/85xx-64bit.config4
-rw-r--r--arch/powerpc/configs/85xx-hw.config141
-rw-r--r--arch/powerpc/configs/85xx-smp.config2
-rw-r--r--arch/powerpc/configs/85xx/ge_imp3a_defconfig36
-rw-r--r--arch/powerpc/configs/85xx/kmp204x_defconfig224
-rw-r--r--arch/powerpc/configs/85xx/ksi8560_defconfig27
-rw-r--r--arch/powerpc/configs/85xx/mpc8540_ads_defconfig63
-rw-r--r--arch/powerpc/configs/85xx/mpc8560_ads_defconfig64
-rw-r--r--arch/powerpc/configs/85xx/mpc85xx_cds_defconfig68
-rw-r--r--arch/powerpc/configs/85xx/ppa8548_defconfig60
-rw-r--r--arch/powerpc/configs/85xx/sbc8548_defconfig76
-rw-r--r--arch/powerpc/configs/85xx/socrates_defconfig33
-rw-r--r--arch/powerpc/configs/85xx/stx_gp3_defconfig29
-rw-r--r--arch/powerpc/configs/85xx/tqm8540_defconfig31
-rw-r--r--arch/powerpc/configs/85xx/tqm8541_defconfig31
-rw-r--r--arch/powerpc/configs/85xx/tqm8548_defconfig30
-rw-r--r--arch/powerpc/configs/85xx/tqm8555_defconfig31
-rw-r--r--arch/powerpc/configs/85xx/tqm8560_defconfig31
-rw-r--r--arch/powerpc/configs/85xx/xes_mpc85xx_defconfig51
-rw-r--r--arch/powerpc/configs/86xx-hw.config101
-rw-r--r--arch/powerpc/configs/86xx-smp.config2
-rw-r--r--arch/powerpc/configs/86xx/gef_ppc9a_defconfig237
-rw-r--r--arch/powerpc/configs/86xx/gef_sbc310_defconfig234
-rw-r--r--arch/powerpc/configs/86xx/gef_sbc610_defconfig307
-rw-r--r--arch/powerpc/configs/86xx/mpc8610_hpcd_defconfig124
-rw-r--r--arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig175
-rw-r--r--arch/powerpc/configs/86xx/sbc8641d_defconfig279
-rw-r--r--arch/powerpc/configs/8xx.config2
-rw-r--r--arch/powerpc/configs/adder875_defconfig37
-rw-r--r--arch/powerpc/configs/altivec.config1
-rw-r--r--arch/powerpc/configs/amigaone_defconfig56
-rw-r--r--arch/powerpc/configs/be.config1
-rw-r--r--arch/powerpc/configs/book3s_32.config2
-rw-r--r--arch/powerpc/configs/c2k_defconfig450
-rw-r--r--arch/powerpc/configs/cell_defconfig62
-rw-r--r--arch/powerpc/configs/celleb_defconfig161
-rw-r--r--arch/powerpc/configs/chrp32_defconfig58
-rw-r--r--arch/powerpc/configs/corenet32_smp_defconfig183
-rw-r--r--arch/powerpc/configs/corenet64_smp_defconfig182
-rw-r--r--arch/powerpc/configs/corenet_base.config2
-rw-r--r--arch/powerpc/configs/debug.config5
-rw-r--r--arch/powerpc/configs/disable-werror.config2
-rw-r--r--arch/powerpc/configs/dpaa.config5
-rw-r--r--arch/powerpc/configs/ep8248e_defconfig28
-rw-r--r--arch/powerpc/configs/ep88xc_defconfig39
-rw-r--r--arch/powerpc/configs/fsl-emb-nonhw.config126
-rw-r--r--arch/powerpc/configs/g5_defconfig114
-rw-r--r--arch/powerpc/configs/gamecube_defconfig34
-rw-r--r--arch/powerpc/configs/guest.config14
-rw-r--r--arch/powerpc/configs/hardening.config10
-rw-r--r--arch/powerpc/configs/holly_defconfig23
l---------arch/powerpc/configs/kvm_guest.config1
-rw-r--r--arch/powerpc/configs/le.config1
-rw-r--r--arch/powerpc/configs/linkstation_defconfig40
-rw-r--r--arch/powerpc/configs/maple_defconfig145
-rw-r--r--arch/powerpc/configs/mgcoge_defconfig22
-rw-r--r--arch/powerpc/configs/microwatt_defconfig108
-rw-r--r--arch/powerpc/configs/mpc512x_defconfig45
-rw-r--r--arch/powerpc/configs/mpc5200_defconfig28
-rw-r--r--arch/powerpc/configs/mpc7448_hpc2_defconfig69
-rw-r--r--arch/powerpc/configs/mpc8272_ads_defconfig95
-rw-r--r--arch/powerpc/configs/mpc83xx_defconfig31
-rw-r--r--arch/powerpc/configs/mpc85xx_base.config20
-rw-r--r--arch/powerpc/configs/mpc85xx_defconfig274
-rw-r--r--arch/powerpc/configs/mpc85xx_smp_defconfig276
-rw-r--r--arch/powerpc/configs/mpc866_ads_defconfig32
-rw-r--r--arch/powerpc/configs/mpc86xx_base.config8
-rw-r--r--arch/powerpc/configs/mpc86xx_defconfig178
-rw-r--r--arch/powerpc/configs/mpc885_ads_defconfig67
-rw-r--r--arch/powerpc/configs/mvme5100_defconfig24
-rw-r--r--arch/powerpc/configs/pasemi_defconfig30
-rw-r--r--arch/powerpc/configs/pmac32_defconfig140
-rw-r--r--arch/powerpc/configs/powernv_defconfig (renamed from arch/powerpc/configs/pseries_defconfig)257
-rw-r--r--arch/powerpc/configs/ppc40x_defconfig105
-rw-r--r--arch/powerpc/configs/ppc44x_defconfig35
-rw-r--r--arch/powerpc/configs/ppc64_defconfig336
-rw-r--r--arch/powerpc/configs/ppc64e_defconfig50
-rw-r--r--arch/powerpc/configs/ppc64le.config2
-rw-r--r--arch/powerpc/configs/ppc6xx_defconfig494
-rw-r--r--arch/powerpc/configs/pq2fads_defconfig99
-rw-r--r--arch/powerpc/configs/ps3_defconfig79
-rw-r--r--arch/powerpc/configs/pseries_le_defconfig304
-rw-r--r--arch/powerpc/configs/security.config17
-rw-r--r--arch/powerpc/configs/skiroot_defconfig302
-rw-r--r--arch/powerpc/configs/storcenter_defconfig29
-rw-r--r--arch/powerpc/configs/tqm8xx_defconfig43
-rw-r--r--arch/powerpc/configs/wii_defconfig57
-rw-r--r--arch/powerpc/crypto/.gitignore5
-rw-r--r--arch/powerpc/crypto/Kconfig65
-rw-r--r--arch/powerpc/crypto/Makefile35
-rw-r--r--arch/powerpc/crypto/aes-gcm-p10-glue.c433
-rw-r--r--arch/powerpc/crypto/aes-gcm-p10.S1236
-rw-r--r--arch/powerpc/crypto/aes-spe-core.S346
-rw-r--r--arch/powerpc/crypto/aes-spe-glue.c522
-rw-r--r--arch/powerpc/crypto/aes-spe-keys.S278
-rw-r--r--arch/powerpc/crypto/aes-spe-modes.S625
-rw-r--r--arch/powerpc/crypto/aes-spe-regs.h37
-rw-r--r--arch/powerpc/crypto/aes-tab-4k.S326
-rw-r--r--arch/powerpc/crypto/aes.c134
-rw-r--r--arch/powerpc/crypto/aes_cbc.c137
-rw-r--r--arch/powerpc/crypto/aes_ctr.c153
-rw-r--r--arch/powerpc/crypto/aes_xts.c166
-rw-r--r--arch/powerpc/crypto/aesp10-ppc.pl585
-rw-r--r--arch/powerpc/crypto/aesp8-ppc.h30
-rw-r--r--arch/powerpc/crypto/aesp8-ppc.pl3889
-rw-r--r--arch/powerpc/crypto/ghash.c160
-rw-r--r--arch/powerpc/crypto/ghashp10-ppc.pl370
-rw-r--r--arch/powerpc/crypto/ghashp8-ppc.pl243
-rw-r--r--arch/powerpc/crypto/ppc-xlate.pl229
-rw-r--r--arch/powerpc/crypto/sha1-powerpc-asm.S179
-rw-r--r--arch/powerpc/crypto/sha1.c157
-rw-r--r--arch/powerpc/crypto/vmx.c77
-rw-r--r--arch/powerpc/include/asm/8xx_immap.h2
-rw-r--r--arch/powerpc/include/asm/Kbuild15
-rw-r--r--arch/powerpc/include/asm/accounting.h32
-rw-r--r--arch/powerpc/include/asm/agp.h18
-rw-r--r--arch/powerpc/include/asm/archrandom.h46
-rw-r--r--arch/powerpc/include/asm/asm-compat.h44
-rw-r--r--arch/powerpc/include/asm/asm-const.h15
-rw-r--r--arch/powerpc/include/asm/asm-prototypes.h75
-rw-r--r--arch/powerpc/include/asm/asm.h7
-rw-r--r--arch/powerpc/include/asm/async_tx.h18
-rw-r--r--arch/powerpc/include/asm/atomic.h524
-rw-r--r--arch/powerpc/include/asm/backlight.h6
-rw-r--r--arch/powerpc/include/asm/barrier.h89
-rw-r--r--arch/powerpc/include/asm/bitops.h256
-rw-r--r--arch/powerpc/include/asm/book3s/32/kup.h175
-rw-r--r--arch/powerpc/include/asm/book3s/32/mmu-hash.h236
-rw-r--r--arch/powerpc/include/asm/book3s/32/pgalloc.h78
-rw-r--r--arch/powerpc/include/asm/book3s/32/pgtable.h607
-rw-r--r--arch/powerpc/include/asm/book3s/32/tlbflush.h91
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash-4k.h173
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash-64k.h286
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash-pkey.h45
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash.h300
-rw-r--r--arch/powerpc/include/asm/book3s/64/hugetlb.h103
-rw-r--r--arch/powerpc/include/asm/book3s/64/kexec.h33
-rw-r--r--arch/powerpc/include/asm/book3s/64/kup.h418
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu-hash.h885
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu.h292
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgalloc.h183
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable-64k.h18
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h1385
-rw-r--r--arch/powerpc/include/asm/book3s/64/pkeys.h25
-rw-r--r--arch/powerpc/include/asm/book3s/64/radix-4k.h22
-rw-r--r--arch/powerpc/include/asm/book3s/64/radix-64k.h23
-rw-r--r--arch/powerpc/include/asm/book3s/64/radix.h366
-rw-r--r--arch/powerpc/include/asm/book3s/64/slice.h42
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush-hash.h79
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush-radix.h99
-rw-r--r--arch/powerpc/include/asm/book3s/64/tlbflush.h225
-rw-r--r--arch/powerpc/include/asm/book3s/pgalloc.h15
-rw-r--r--arch/powerpc/include/asm/book3s/pgtable.h11
-rw-r--r--arch/powerpc/include/asm/book3s/tlbflush.h11
-rw-r--r--arch/powerpc/include/asm/bootx.h1
-rw-r--r--arch/powerpc/include/asm/bpf_perf_event.h9
-rw-r--r--arch/powerpc/include/asm/btext.h15
-rw-r--r--arch/powerpc/include/asm/bug.h107
-rw-r--r--arch/powerpc/include/asm/bugs.h18
-rw-r--r--arch/powerpc/include/asm/cache.h130
-rw-r--r--arch/powerpc/include/asm/cacheflush.h168
-rw-r--r--arch/powerpc/include/asm/cell-pmu.h71
-rw-r--r--arch/powerpc/include/asm/cell-regs.h297
-rw-r--r--arch/powerpc/include/asm/checksum.h237
-rw-r--r--arch/powerpc/include/asm/clocksource.h7
-rw-r--r--arch/powerpc/include/asm/cmpxchg.h592
-rw-r--r--arch/powerpc/include/asm/code-patching-asm.h18
-rw-r--r--arch/powerpc/include/asm/code-patching.h102
-rw-r--r--arch/powerpc/include/asm/compat.h224
-rw-r--r--arch/powerpc/include/asm/context_tracking.h3
-rw-r--r--arch/powerpc/include/asm/copro.h21
-rw-r--r--arch/powerpc/include/asm/cpm.h213
-rw-r--r--arch/powerpc/include/asm/cpm1.h11
-rw-r--r--arch/powerpc/include/asm/cpm2.h16
-rw-r--r--arch/powerpc/include/asm/cpu_has_feature.h55
-rw-r--r--arch/powerpc/include/asm/cpu_setup.h49
-rw-r--r--arch/powerpc/include/asm/cpufeature.h37
-rw-r--r--arch/powerpc/include/asm/cpuidle.h105
-rw-r--r--arch/powerpc/include/asm/cputable.h498
-rw-r--r--arch/powerpc/include/asm/cputhreads.h82
-rw-r--r--arch/powerpc/include/asm/cputime.h226
-rw-r--r--arch/powerpc/include/asm/crash_reserve.h8
-rw-r--r--arch/powerpc/include/asm/crashdump-ppc64.h19
-rw-r--r--arch/powerpc/include/asm/current.h8
-rw-r--r--arch/powerpc/include/asm/dbdma.h13
-rw-r--r--arch/powerpc/include/asm/dbell.h112
-rw-r--r--arch/powerpc/include/asm/dcr-generic.h49
-rw-r--r--arch/powerpc/include/asm/dcr-mmio.h57
-rw-r--r--arch/powerpc/include/asm/dcr-native.h31
-rw-r--r--arch/powerpc/include/asm/dcr-regs.h1
-rw-r--r--arch/powerpc/include/asm/dcr.h51
-rw-r--r--arch/powerpc/include/asm/debug.h17
-rw-r--r--arch/powerpc/include/asm/delay.h24
-rw-r--r--arch/powerpc/include/asm/device.h40
-rw-r--r--arch/powerpc/include/asm/disassemble.h23
-rw-r--r--arch/powerpc/include/asm/div64.h1
-rw-r--r--arch/powerpc/include/asm/dma-direct.h14
-rw-r--r--arch/powerpc/include/asm/dma-mapping.h225
-rw-r--r--arch/powerpc/include/asm/dma.h10
-rw-r--r--arch/powerpc/include/asm/drmem.h125
-rw-r--r--arch/powerpc/include/asm/dt_cpu_ftrs.h25
-rw-r--r--arch/powerpc/include/asm/dtl.h43
-rw-r--r--arch/powerpc/include/asm/edac.h4
-rw-r--r--arch/powerpc/include/asm/eeh.h247
-rw-r--r--arch/powerpc/include/asm/eeh_event.h18
-rw-r--r--arch/powerpc/include/asm/elf.h91
-rw-r--r--arch/powerpc/include/asm/elfnote.h24
-rw-r--r--arch/powerpc/include/asm/emulated_ops.h18
-rw-r--r--arch/powerpc/include/asm/epapr_hcalls.h44
-rw-r--r--arch/powerpc/include/asm/exception-64e.h76
-rw-r--r--arch/powerpc/include/asm/exception-64s.h644
-rw-r--r--arch/powerpc/include/asm/exec.h1
-rw-r--r--arch/powerpc/include/asm/extable.h44
-rw-r--r--arch/powerpc/include/asm/fadump-internal.h196
-rw-r--r--arch/powerpc/include/asm/fadump.h221
-rw-r--r--arch/powerpc/include/asm/fb.h21
-rw-r--r--arch/powerpc/include/asm/feature-fixups.h138
-rw-r--r--arch/powerpc/include/asm/firmware.h65
-rw-r--r--arch/powerpc/include/asm/fixmap.h68
-rw-r--r--arch/powerpc/include/asm/floppy.h39
-rw-r--r--arch/powerpc/include/asm/fprobe.h12
-rw-r--r--arch/powerpc/include/asm/fpu.h28
-rw-r--r--arch/powerpc/include/asm/fs_pd.h49
-rw-r--r--arch/powerpc/include/asm/fsl_85xx_cache_sram.h48
-rw-r--r--arch/powerpc/include/asm/fsl_gtm.h6
-rw-r--r--arch/powerpc/include/asm/fsl_guts.h189
-rw-r--r--arch/powerpc/include/asm/fsl_hcalls.h2
-rw-r--r--arch/powerpc/include/asm/fsl_lbc.h18
-rw-r--r--arch/powerpc/include/asm/fsl_pamu_stash.h26
-rw-r--r--arch/powerpc/include/asm/fsl_pm.h47
-rw-r--r--arch/powerpc/include/asm/ftrace.h217
-rw-r--r--arch/powerpc/include/asm/futex.h50
-rw-r--r--arch/powerpc/include/asm/gpio.h4
-rw-r--r--arch/powerpc/include/asm/grackle.h1
-rw-r--r--arch/powerpc/include/asm/guest-state-buffer.h1019
-rw-r--r--arch/powerpc/include/asm/hardirq.h10
-rw-r--r--arch/powerpc/include/asm/head-64.h172
-rw-r--r--arch/powerpc/include/asm/heathrow.h1
-rw-r--r--arch/powerpc/include/asm/highmem.h53
-rw-r--r--arch/powerpc/include/asm/hmi.h37
-rw-r--r--arch/powerpc/include/asm/hugetlb.h209
-rw-r--r--arch/powerpc/include/asm/hvcall.h404
-rw-r--r--arch/powerpc/include/asm/hvconsole.h22
-rw-r--r--arch/powerpc/include/asm/hvcserver.h15
-rw-r--r--arch/powerpc/include/asm/hvsi.h19
-rw-r--r--arch/powerpc/include/asm/hw_breakpoint.h86
-rw-r--r--arch/powerpc/include/asm/hw_irq.h455
-rw-r--r--arch/powerpc/include/asm/hydra.h7
-rw-r--r--arch/powerpc/include/asm/i8259.h3
-rw-r--r--arch/powerpc/include/asm/ibmebus.h4
-rw-r--r--arch/powerpc/include/asm/icswx.h204
-rw-r--r--arch/powerpc/include/asm/ide.h17
-rw-r--r--arch/powerpc/include/asm/idle.h93
-rw-r--r--arch/powerpc/include/asm/imc-pmu.h172
-rw-r--r--arch/powerpc/include/asm/immap_cpm2.h1
-rw-r--r--arch/powerpc/include/asm/immap_qe.h491
-rw-r--r--arch/powerpc/include/asm/inst.h166
-rw-r--r--arch/powerpc/include/asm/interrupt.h680
-rw-r--r--arch/powerpc/include/asm/io-defs.h71
-rw-r--r--arch/powerpc/include/asm/io-workarounds.h48
-rw-r--r--arch/powerpc/include/asm/io.h691
-rw-r--r--arch/powerpc/include/asm/io_event_irq.h6
-rw-r--r--arch/powerpc/include/asm/iommu.h262
-rw-r--r--arch/powerpc/include/asm/ipic.h12
-rw-r--r--arch/powerpc/include/asm/irq.h48
-rw-r--r--arch/powerpc/include/asm/irq_regs.h2
-rw-r--r--arch/powerpc/include/asm/irq_work.h10
-rw-r--r--arch/powerpc/include/asm/irqflags.h61
-rw-r--r--arch/powerpc/include/asm/isa-bridge.h30
-rw-r--r--arch/powerpc/include/asm/jump_label.h47
-rw-r--r--arch/powerpc/include/asm/kasan.h72
-rw-r--r--arch/powerpc/include/asm/kdebug.h1
-rw-r--r--arch/powerpc/include/asm/kdump.h5
-rw-r--r--arch/powerpc/include/asm/kexec.h146
-rw-r--r--arch/powerpc/include/asm/kexec_ranges.h15
-rw-r--r--arch/powerpc/include/asm/keylargo.h1
-rw-r--r--arch/powerpc/include/asm/kfence.h61
-rw-r--r--arch/powerpc/include/asm/kgdb.h11
-rw-r--r--arch/powerpc/include/asm/kmap_types.h16
-rw-r--r--arch/powerpc/include/asm/kprobes.h87
-rw-r--r--arch/powerpc/include/asm/kup.h186
-rw-r--r--arch/powerpc/include/asm/kvm_asm.h57
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h479
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_32.h13
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h562
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_asm.h50
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_uvmem.h100
-rw-r--r--arch/powerpc/include/asm/kvm_booke.h60
-rw-r--r--arch/powerpc/include/asm/kvm_booke_hv_asm.h11
-rw-r--r--arch/powerpc/include/asm/kvm_fpu.h13
-rw-r--r--arch/powerpc/include/asm/kvm_guest.h25
-rw-r--r--arch/powerpc/include/asm/kvm_host.h552
-rw-r--r--arch/powerpc/include/asm/kvm_para.h44
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h719
-rw-r--r--arch/powerpc/include/asm/kvm_types.h15
-rw-r--r--arch/powerpc/include/asm/libata-portmap.h5
-rw-r--r--arch/powerpc/include/asm/linkage.h7
-rw-r--r--arch/powerpc/include/asm/livepatch.h23
-rw-r--r--arch/powerpc/include/asm/local.h220
-rw-r--r--arch/powerpc/include/asm/local64.h1
-rw-r--r--arch/powerpc/include/asm/lppaca.h104
-rw-r--r--arch/powerpc/include/asm/lv1call.h18
-rw-r--r--arch/powerpc/include/asm/machdep.h237
-rw-r--r--arch/powerpc/include/asm/macio.h8
-rw-r--r--arch/powerpc/include/asm/mc146818rtc.h6
-rw-r--r--arch/powerpc/include/asm/mce.h252
-rw-r--r--arch/powerpc/include/asm/mediabay.h1
-rw-r--r--arch/powerpc/include/asm/mem_encrypt.h21
-rw-r--r--arch/powerpc/include/asm/membarrier.h28
-rw-r--r--arch/powerpc/include/asm/mman.h48
-rw-r--r--arch/powerpc/include/asm/mmiowb.h18
-rw-r--r--arch/powerpc/include/asm/mmu-40x.h67
-rw-r--r--arch/powerpc/include/asm/mmu-hash32.h93
-rw-r--r--arch/powerpc/include/asm/mmu-hash64.h591
-rw-r--r--arch/powerpc/include/asm/mmu.h331
-rw-r--r--arch/powerpc/include/asm/mmu_context.h283
-rw-r--r--arch/powerpc/include/asm/mmzone.h12
-rw-r--r--arch/powerpc/include/asm/module.h66
-rw-r--r--arch/powerpc/include/asm/module.lds.h8
-rw-r--r--arch/powerpc/include/asm/mpc5121.h63
-rw-r--r--arch/powerpc/include/asm/mpc52xx.h65
-rw-r--r--arch/powerpc/include/asm/mpc52xx_psc.h7
-rw-r--r--arch/powerpc/include/asm/mpc5xxx.h16
-rw-r--r--arch/powerpc/include/asm/mpc6xx.h1
-rw-r--r--arch/powerpc/include/asm/mpc8260.h25
-rw-r--r--arch/powerpc/include/asm/mpc85xx.h7
-rw-r--r--arch/powerpc/include/asm/mpic.h15
-rw-r--r--arch/powerpc/include/asm/mpic_msgr.h13
-rw-r--r--arch/powerpc/include/asm/mpic_timer.h14
-rw-r--r--arch/powerpc/include/asm/msi_bitmap.h8
-rw-r--r--arch/powerpc/include/asm/mutex.h132
-rw-r--r--arch/powerpc/include/asm/nmi.h14
-rw-r--r--arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h60
-rw-r--r--arch/powerpc/include/asm/nohash/32/kup-8xx.h89
-rw-r--r--arch/powerpc/include/asm/nohash/32/mmu-44x.h (renamed from arch/powerpc/include/asm/mmu-44x.h)20
-rw-r--r--arch/powerpc/include/asm/nohash/32/mmu-8xx.h (renamed from arch/powerpc/include/asm/mmu-8xx.h)135
-rw-r--r--arch/powerpc/include/asm/nohash/32/pgalloc.h35
-rw-r--r--arch/powerpc/include/asm/nohash/32/pgtable.h204
-rw-r--r--arch/powerpc/include/asm/nohash/32/pte-44x.h (renamed from arch/powerpc/include/asm/pte-44x.h)53
-rw-r--r--arch/powerpc/include/asm/nohash/32/pte-85xx.h59
-rw-r--r--arch/powerpc/include/asm/nohash/32/pte-8xx.h241
-rw-r--r--arch/powerpc/include/asm/nohash/64/pgalloc.h67
-rw-r--r--arch/powerpc/include/asm/nohash/64/pgtable-4k.h (renamed from arch/powerpc/include/asm/pgtable-ppc64-4k.h)59
-rw-r--r--arch/powerpc/include/asm/nohash/64/pgtable.h214
-rw-r--r--arch/powerpc/include/asm/nohash/hugetlb-e500.h24
-rw-r--r--arch/powerpc/include/asm/nohash/kup-booke.h112
-rw-r--r--arch/powerpc/include/asm/nohash/mmu-e500.h (renamed from arch/powerpc/include/asm/mmu-book3e.h)43
-rw-r--r--arch/powerpc/include/asm/nohash/mmu.h16
-rw-r--r--arch/powerpc/include/asm/nohash/pgalloc.h76
-rw-r--r--arch/powerpc/include/asm/nohash/pgtable.h377
-rw-r--r--arch/powerpc/include/asm/nohash/pte-e500.h140
-rw-r--r--arch/powerpc/include/asm/nohash/tlbflush.h84
-rw-r--r--arch/powerpc/include/asm/nvram.h61
-rw-r--r--arch/powerpc/include/asm/ohare.h1
-rw-r--r--arch/powerpc/include/asm/opal-api.h1188
-rw-r--r--arch/powerpc/include/asm/opal.h975
-rw-r--r--arch/powerpc/include/asm/oprofile_impl.h139
-rw-r--r--arch/powerpc/include/asm/paca.h187
-rw-r--r--arch/powerpc/include/asm/page.h265
-rw-r--r--arch/powerpc/include/asm/page_32.h39
-rw-r--r--arch/powerpc/include/asm/page_64.h138
-rw-r--r--arch/powerpc/include/asm/papr-sysparm.h44
-rw-r--r--arch/powerpc/include/asm/paravirt.h223
-rw-r--r--arch/powerpc/include/asm/paravirt_api_clock.h2
-rw-r--r--arch/powerpc/include/asm/parport.h5
-rw-r--r--arch/powerpc/include/asm/pasemi_dma.h14
-rw-r--r--arch/powerpc/include/asm/pci-bridge.h167
-rw-r--r--arch/powerpc/include/asm/pci.h95
-rw-r--r--arch/powerpc/include/asm/percpu.h15
-rw-r--r--arch/powerpc/include/asm/perf_event.h20
-rw-r--r--arch/powerpc/include/asm/perf_event_fsl_emb.h6
-rw-r--r--arch/powerpc/include/asm/perf_event_server.h64
-rw-r--r--arch/powerpc/include/asm/pgalloc-32.h91
-rw-r--r--arch/powerpc/include/asm/pgalloc-64.h245
-rw-r--r--arch/powerpc/include/asm/pgalloc.h77
-rw-r--r--arch/powerpc/include/asm/pgtable-be-types.h104
-rw-r--r--arch/powerpc/include/asm/pgtable-masks.h32
-rw-r--r--arch/powerpc/include/asm/pgtable-ppc32.h341
-rw-r--r--arch/powerpc/include/asm/pgtable-ppc64-64k.h41
-rw-r--r--arch/powerpc/include/asm/pgtable-ppc64.h580
-rw-r--r--arch/powerpc/include/asm/pgtable-types.h96
-rw-r--r--arch/powerpc/include/asm/pgtable.h415
-rw-r--r--arch/powerpc/include/asm/pkeys.h172
-rw-r--r--arch/powerpc/include/asm/plpar_wrappers.h494
-rw-r--r--arch/powerpc/include/asm/plpks.h194
-rw-r--r--arch/powerpc/include/asm/pmac_feature.h18
-rw-r--r--arch/powerpc/include/asm/pmac_low_i2c.h7
-rw-r--r--arch/powerpc/include/asm/pmac_pfunc.h2
-rw-r--r--arch/powerpc/include/asm/pmc.h34
-rw-r--r--arch/powerpc/include/asm/pmi.h66
-rw-r--r--arch/powerpc/include/asm/pnv-ocxl.h85
-rw-r--r--arch/powerpc/include/asm/pnv-pci.h61
-rw-r--r--arch/powerpc/include/asm/powernv.h21
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h771
-rw-r--r--arch/powerpc/include/asm/ppc-pci.h60
-rw-r--r--arch/powerpc/include/asm/ppc4xx.h9
-rw-r--r--arch/powerpc/include/asm/ppc4xx_ocm.h45
-rw-r--r--arch/powerpc/include/asm/ppc_asm.h724
-rw-r--r--arch/powerpc/include/asm/preempt.h16
-rw-r--r--arch/powerpc/include/asm/probes.h59
-rw-r--r--arch/powerpc/include/asm/processor.h325
-rw-r--r--arch/powerpc/include/asm/prom.h87
-rw-r--r--arch/powerpc/include/asm/ps3.h42
-rw-r--r--arch/powerpc/include/asm/ps3av.h18
-rw-r--r--arch/powerpc/include/asm/ps3gpu.h14
-rw-r--r--arch/powerpc/include/asm/ps3stor.h16
-rw-r--r--arch/powerpc/include/asm/pte-40x.h65
-rw-r--r--arch/powerpc/include/asm/pte-8xx.h67
-rw-r--r--arch/powerpc/include/asm/pte-book3e.h87
-rw-r--r--arch/powerpc/include/asm/pte-common.h187
-rw-r--r--arch/powerpc/include/asm/pte-fsl-booke.h43
-rw-r--r--arch/powerpc/include/asm/pte-hash32.h47
-rw-r--r--arch/powerpc/include/asm/pte-hash64-4k.h17
-rw-r--r--arch/powerpc/include/asm/pte-hash64-64k.h102
-rw-r--r--arch/powerpc/include/asm/pte-hash64.h60
-rw-r--r--arch/powerpc/include/asm/pte-walk.h63
-rw-r--r--arch/powerpc/include/asm/ptrace.h332
-rw-r--r--arch/powerpc/include/asm/qe.h740
-rw-r--r--arch/powerpc/include/asm/qe_ic.h144
-rw-r--r--arch/powerpc/include/asm/qspinlock.h174
-rw-r--r--arch/powerpc/include/asm/qspinlock_types.h72
-rw-r--r--arch/powerpc/include/asm/reg.h528
-rw-r--r--arch/powerpc/include/asm/reg_8xx.h41
-rw-r--r--arch/powerpc/include/asm/reg_a2.h156
-rw-r--r--arch/powerpc/include/asm/reg_booke.h211
-rw-r--r--arch/powerpc/include/asm/reg_fsl_emb.h32
-rw-r--r--arch/powerpc/include/asm/rheap.h3
-rw-r--r--arch/powerpc/include/asm/rio.h7
-rw-r--r--arch/powerpc/include/asm/rtas-types.h114
-rw-r--r--arch/powerpc/include/asm/rtas-work-area.h96
-rw-r--r--arch/powerpc/include/asm/rtas.h477
-rw-r--r--arch/powerpc/include/asm/rtc.h78
-rw-r--r--arch/powerpc/include/asm/runlatch.h7
-rw-r--r--arch/powerpc/include/asm/scom.h167
-rw-r--r--arch/powerpc/include/asm/seccomp.h34
-rw-r--r--arch/powerpc/include/asm/sections.h72
-rw-r--r--arch/powerpc/include/asm/secure_boot.h29
-rw-r--r--arch/powerpc/include/asm/security_features.h113
-rw-r--r--arch/powerpc/include/asm/secvar.h40
-rw-r--r--arch/powerpc/include/asm/serial.h5
-rw-r--r--arch/powerpc/include/asm/set_memory.h51
-rw-r--r--arch/powerpc/include/asm/setjmp.h13
-rw-r--r--arch/powerpc/include/asm/setup.h79
-rw-r--r--arch/powerpc/include/asm/sfp-machine.h92
-rw-r--r--arch/powerpc/include/asm/shmparam.h1
-rw-r--r--arch/powerpc/include/asm/signal.h9
-rw-r--r--arch/powerpc/include/asm/simple_spinlock.h268
-rw-r--r--arch/powerpc/include/asm/simple_spinlock_types.h21
-rw-r--r--arch/powerpc/include/asm/smp.h121
-rw-r--r--arch/powerpc/include/asm/smu.h18
-rw-r--r--arch/powerpc/include/asm/sparsemem.h10
-rw-r--r--arch/powerpc/include/asm/spinlock.h311
-rw-r--r--arch/powerpc/include/asm/spinlock_types.h22
-rw-r--r--arch/powerpc/include/asm/spu.h57
-rw-r--r--arch/powerpc/include/asm/spu_csa.h25
-rw-r--r--arch/powerpc/include/asm/spu_info.h15
-rw-r--r--arch/powerpc/include/asm/spu_priv1.h17
-rw-r--r--arch/powerpc/include/asm/sstep.h173
-rw-r--r--arch/powerpc/include/asm/stackprotector.h30
-rw-r--r--arch/powerpc/include/asm/stacktrace.h13
-rw-r--r--arch/powerpc/include/asm/static_call.h31
-rw-r--r--arch/powerpc/include/asm/string.h72
-rw-r--r--arch/powerpc/include/asm/svm.h33
-rw-r--r--arch/powerpc/include/asm/swab.h31
-rw-r--r--arch/powerpc/include/asm/swiotlb.h15
-rw-r--r--arch/powerpc/include/asm/switch_to.h92
-rw-r--r--arch/powerpc/include/asm/synch.h38
-rw-r--r--arch/powerpc/include/asm/syscall.h117
-rw-r--r--arch/powerpc/include/asm/syscall_wrapper.h49
-rw-r--r--arch/powerpc/include/asm/syscalls.h156
-rw-r--r--arch/powerpc/include/asm/syscalls_32.h (renamed from arch/powerpc/kernel/ppc32.h)12
-rw-r--r--arch/powerpc/include/asm/systbl.h364
-rw-r--r--arch/powerpc/include/asm/systemcfg.h52
-rw-r--r--arch/powerpc/include/asm/task_size_32.h21
-rw-r--r--arch/powerpc/include/asm/task_size_64.h83
-rw-r--r--arch/powerpc/include/asm/tce.h26
-rw-r--r--arch/powerpc/include/asm/termios.h22
-rw-r--r--arch/powerpc/include/asm/text-patching.h275
-rw-r--r--arch/powerpc/include/asm/thread_info.h178
-rw-r--r--arch/powerpc/include/asm/time.h173
-rw-r--r--arch/powerpc/include/asm/timex.h39
-rw-r--r--arch/powerpc/include/asm/tlb.h73
-rw-r--r--arch/powerpc/include/asm/tlbflush.h174
-rw-r--r--arch/powerpc/include/asm/tm.h19
-rw-r--r--arch/powerpc/include/asm/topology.h107
-rw-r--r--arch/powerpc/include/asm/trace.h208
-rw-r--r--arch/powerpc/include/asm/trace_clock.h17
-rw-r--r--arch/powerpc/include/asm/tsi108.h12
-rw-r--r--arch/powerpc/include/asm/tsi108_irq.h16
-rw-r--r--arch/powerpc/include/asm/tsi108_pci.h19
-rw-r--r--arch/powerpc/include/asm/types.h16
-rw-r--r--arch/powerpc/include/asm/uaccess.h701
-rw-r--r--arch/powerpc/include/asm/ucc.h64
-rw-r--r--arch/powerpc/include/asm/ucc_fast.h244
-rw-r--r--arch/powerpc/include/asm/ucc_slow.h290
-rw-r--r--arch/powerpc/include/asm/udbg.h69
-rw-r--r--arch/powerpc/include/asm/uic.h6
-rw-r--r--arch/powerpc/include/asm/ultravisor-api.h39
-rw-r--r--arch/powerpc/include/asm/ultravisor.h85
-rw-r--r--arch/powerpc/include/asm/unaligned.h21
-rw-r--r--arch/powerpc/include/asm/uninorth.h5
-rw-r--r--arch/powerpc/include/asm/unistd.h25
-rw-r--r--arch/powerpc/include/asm/uprobes.h23
-rw-r--r--arch/powerpc/include/asm/user.h8
-rw-r--r--arch/powerpc/include/asm/vas.h294
-rw-r--r--arch/powerpc/include/asm/vdso.h61
-rw-r--r--arch/powerpc/include/asm/vdso/arch_data.h37
-rw-r--r--arch/powerpc/include/asm/vdso/clocksource.h7
-rw-r--r--arch/powerpc/include/asm/vdso/getrandom.h67
-rw-r--r--arch/powerpc/include/asm/vdso/gettimeofday.h146
-rw-r--r--arch/powerpc/include/asm/vdso/processor.h38
-rw-r--r--arch/powerpc/include/asm/vdso/timebase.h73
-rw-r--r--arch/powerpc/include/asm/vdso/vsyscall.h14
-rw-r--r--arch/powerpc/include/asm/vdso_datapage.h119
-rw-r--r--arch/powerpc/include/asm/vermagic.h22
-rw-r--r--arch/powerpc/include/asm/vga.h16
-rw-r--r--arch/powerpc/include/asm/video.h17
-rw-r--r--arch/powerpc/include/asm/vio.h24
-rw-r--r--arch/powerpc/include/asm/vmalloc.h24
-rw-r--r--arch/powerpc/include/asm/vphn.h24
-rw-r--r--arch/powerpc/include/asm/word-at-a-time.h121
-rw-r--r--arch/powerpc/include/asm/xics.h35
-rw-r--r--arch/powerpc/include/asm/xilinx_intc.h20
-rw-r--r--arch/powerpc/include/asm/xilinx_pci.h21
-rw-r--r--arch/powerpc/include/asm/xive-regs.h134
-rw-r--r--arch/powerpc/include/asm/xive.h168
-rw-r--r--arch/powerpc/include/asm/xmon.h12
-rw-r--r--arch/powerpc/include/asm/xor.h27
-rw-r--r--arch/powerpc/include/asm/xor_altivec.h22
-rw-r--r--arch/powerpc/include/uapi/asm/Kbuild49
-rw-r--r--arch/powerpc/include/uapi/asm/auxvec.h36
-rw-r--r--arch/powerpc/include/uapi/asm/bitsperlong.h1
-rw-r--r--arch/powerpc/include/uapi/asm/bootx.h3
-rw-r--r--arch/powerpc/include/uapi/asm/byteorder.h1
-rw-r--r--arch/powerpc/include/uapi/asm/cputable.h17
-rw-r--r--arch/powerpc/include/uapi/asm/eeh.h44
-rw-r--r--arch/powerpc/include/uapi/asm/elf.h46
-rw-r--r--arch/powerpc/include/uapi/asm/epapr_hcalls.h5
-rw-r--r--arch/powerpc/include/uapi/asm/errno.h4
-rw-r--r--arch/powerpc/include/uapi/asm/fcntl.h1
-rw-r--r--arch/powerpc/include/uapi/asm/ioctl.h1
-rw-r--r--arch/powerpc/include/uapi/asm/ioctls.h12
-rw-r--r--arch/powerpc/include/uapi/asm/ipcbuf.h1
-rw-r--r--arch/powerpc/include/uapi/asm/kvm.h200
-rw-r--r--arch/powerpc/include/uapi/asm/kvm_para.h16
-rw-r--r--arch/powerpc/include/uapi/asm/mman.h16
-rw-r--r--arch/powerpc/include/uapi/asm/msgbuf.h25
-rw-r--r--arch/powerpc/include/uapi/asm/nvram.h1
-rw-r--r--arch/powerpc/include/uapi/asm/opal-prd.h59
-rw-r--r--arch/powerpc/include/uapi/asm/papr-hvpipe.h33
-rw-r--r--arch/powerpc/include/uapi/asm/papr-indices.h41
-rw-r--r--arch/powerpc/include/uapi/asm/papr-miscdev.h9
-rw-r--r--arch/powerpc/include/uapi/asm/papr-physical-attestation.h31
-rw-r--r--arch/powerpc/include/uapi/asm/papr-platform-dump.h16
-rw-r--r--arch/powerpc/include/uapi/asm/papr-sysparm.h58
-rw-r--r--arch/powerpc/include/uapi/asm/papr-vpd.h22
-rw-r--r--arch/powerpc/include/uapi/asm/param.h1
-rw-r--r--arch/powerpc/include/uapi/asm/perf_event.h1
-rw-r--r--arch/powerpc/include/uapi/asm/perf_regs.h95
-rw-r--r--arch/powerpc/include/uapi/asm/poll.h1
-rw-r--r--arch/powerpc/include/uapi/asm/posix_types.h6
-rw-r--r--arch/powerpc/include/uapi/asm/ps3fb.h14
-rw-r--r--arch/powerpc/include/uapi/asm/ptrace.h27
-rw-r--r--arch/powerpc/include/uapi/asm/resource.h1
-rw-r--r--arch/powerpc/include/uapi/asm/seccomp.h16
-rw-r--r--arch/powerpc/include/uapi/asm/sembuf.h19
-rw-r--r--arch/powerpc/include/uapi/asm/setup.h1
-rw-r--r--arch/powerpc/include/uapi/asm/shmbuf.h31
-rw-r--r--arch/powerpc/include/uapi/asm/sigcontext.h11
-rw-r--r--arch/powerpc/include/uapi/asm/siginfo.h20
-rw-r--r--arch/powerpc/include/uapi/asm/signal.h32
-rw-r--r--arch/powerpc/include/uapi/asm/socket.h77
-rw-r--r--arch/powerpc/include/uapi/asm/sockios.h20
-rw-r--r--arch/powerpc/include/uapi/asm/spu_info.h15
-rw-r--r--arch/powerpc/include/uapi/asm/stat.h13
-rw-r--r--arch/powerpc/include/uapi/asm/statfs.h6
-rw-r--r--arch/powerpc/include/uapi/asm/swab.h1
-rw-r--r--arch/powerpc/include/uapi/asm/termbits.h183
-rw-r--r--arch/powerpc/include/uapi/asm/termios.h1
-rw-r--r--arch/powerpc/include/uapi/asm/tm.h3
-rw-r--r--arch/powerpc/include/uapi/asm/types.h5
-rw-r--r--arch/powerpc/include/uapi/asm/ucontext.h1
-rw-r--r--arch/powerpc/include/uapi/asm/unistd.h371
-rw-r--r--arch/powerpc/include/uapi/asm/vas-api.h28
-rw-r--r--arch/powerpc/kernel/.gitignore2
-rw-r--r--arch/powerpc/kernel/85xx_entry_mapping.S (renamed from arch/powerpc/kernel/fsl_booke_entry_mapping.S)31
-rw-r--r--arch/powerpc/kernel/Makefile239
-rw-r--r--arch/powerpc/kernel/align.c860
-rw-r--r--arch/powerpc/kernel/asm-offsets.c945
-rw-r--r--arch/powerpc/kernel/audit.c16
-rw-r--r--arch/powerpc/kernel/audit_32.h7
-rw-r--r--arch/powerpc/kernel/btext.c435
-rw-r--r--arch/powerpc/kernel/cacheinfo.c254
-rw-r--r--arch/powerpc/kernel/cacheinfo.h5
-rw-r--r--arch/powerpc/kernel/compat_audit.c16
-rw-r--r--arch/powerpc/kernel/cpu_setup_44x.S7
-rw-r--r--arch/powerpc/kernel/cpu_setup_6xx.S67
-rw-r--r--arch/powerpc/kernel/cpu_setup_e500.S (renamed from arch/powerpc/kernel/cpu_setup_fsl_booke.S)142
-rw-r--r--arch/powerpc/kernel/cpu_setup_pa6t.S15
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.S182
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.c288
-rw-r--r--arch/powerpc/kernel/cpu_setup_ppc970.S7
-rw-r--r--arch/powerpc/kernel/cpu_specs.h25
-rw-r--r--arch/powerpc/kernel/cpu_specs_44x.h304
-rw-r--r--arch/powerpc/kernel/cpu_specs_47x.h74
-rw-r--r--arch/powerpc/kernel/cpu_specs_85xx.h57
-rw-r--r--arch/powerpc/kernel/cpu_specs_8xx.h23
-rw-r--r--arch/powerpc/kernel/cpu_specs_book3s_32.h605
-rw-r--r--arch/powerpc/kernel/cpu_specs_book3s_64.h530
-rw-r--r--arch/powerpc/kernel/cpu_specs_e500mc.h76
-rw-r--r--arch/powerpc/kernel/cputable.c2173
-rw-r--r--arch/powerpc/kernel/crash_dump.c66
-rw-r--r--arch/powerpc/kernel/dawr.c110
-rw-r--r--arch/powerpc/kernel/dbell.c42
-rw-r--r--arch/powerpc/kernel/dexcr.c124
-rw-r--r--arch/powerpc/kernel/dma-iommu.c148
-rw-r--r--arch/powerpc/kernel/dma-mask.c13
-rw-r--r--arch/powerpc/kernel/dma-swiotlb.c113
-rw-r--r--arch/powerpc/kernel/dma.c243
-rw-r--r--arch/powerpc/kernel/dt_cpu_ftrs.c1128
-rw-r--r--arch/powerpc/kernel/early_32.c38
-rw-r--r--arch/powerpc/kernel/eeh.c1437
-rw-r--r--arch/powerpc/kernel/eeh_cache.c131
-rw-r--r--arch/powerpc/kernel/eeh_dev.c113
-rw-r--r--arch/powerpc/kernel/eeh_driver.c1161
-rw-r--r--arch/powerpc/kernel/eeh_event.c79
-rw-r--r--arch/powerpc/kernel/eeh_pe.c599
-rw-r--r--arch/powerpc/kernel/eeh_sysfs.c142
-rw-r--r--arch/powerpc/kernel/entry_32.S1504
-rw-r--r--arch/powerpc/kernel/entry_64.S1289
-rw-r--r--arch/powerpc/kernel/epapr_hcalls.S19
-rw-r--r--arch/powerpc/kernel/epapr_paravirt.c19
-rw-r--r--arch/powerpc/kernel/exceptions-64e.S573
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S4257
-rw-r--r--arch/powerpc/kernel/fadump.c2207
-rw-r--r--arch/powerpc/kernel/firmware.c31
-rw-r--r--arch/powerpc/kernel/fpu.S173
-rw-r--r--arch/powerpc/kernel/ftrace.c594
-rw-r--r--arch/powerpc/kernel/head_32.h212
-rw-r--r--arch/powerpc/kernel/head_40x.S982
-rw-r--r--arch/powerpc/kernel/head_44x.S164
-rw-r--r--arch/powerpc/kernel/head_64.S567
-rw-r--r--arch/powerpc/kernel/head_85xx.S (renamed from arch/powerpc/kernel/head_fsl_booke.S)537
-rw-r--r--arch/powerpc/kernel/head_8xx.S1113
-rw-r--r--arch/powerpc/kernel/head_book3s_32.S (renamed from arch/powerpc/kernel/head_32.S)1053
-rw-r--r--arch/powerpc/kernel/head_booke.h335
-rw-r--r--arch/powerpc/kernel/hw_breakpoint.c552
-rw-r--r--arch/powerpc/kernel/hw_breakpoint_constraints.c158
-rw-r--r--arch/powerpc/kernel/idle.c59
-rw-r--r--arch/powerpc/kernel/idle_64e.S (renamed from arch/powerpc/kernel/idle_book3e.S)30
-rw-r--r--arch/powerpc/kernel/idle_6xx.S28
-rw-r--r--arch/powerpc/kernel/idle_85xx.S (renamed from arch/powerpc/kernel/idle_e500.S)37
-rw-r--r--arch/powerpc/kernel/idle_book3s.S218
-rw-r--r--arch/powerpc/kernel/idle_power4.S73
-rw-r--r--arch/powerpc/kernel/idle_power7.S244
-rw-r--r--arch/powerpc/kernel/ima_arch.c78
-rw-r--r--arch/powerpc/kernel/interrupt.c509
-rw-r--r--arch/powerpc/kernel/interrupt_64.S772
-rw-r--r--arch/powerpc/kernel/io-workarounds.c212
-rw-r--r--arch/powerpc/kernel/io.c76
-rw-r--r--arch/powerpc/kernel/iomap.c104
-rw-r--r--arch/powerpc/kernel/iommu.c755
-rw-r--r--arch/powerpc/kernel/irq.c617
-rw-r--r--arch/powerpc/kernel/irq_64.c522
-rw-r--r--arch/powerpc/kernel/isa-bridge.c168
-rw-r--r--arch/powerpc/kernel/jump_label.c19
-rw-r--r--arch/powerpc/kernel/kdebugfs.c14
-rw-r--r--arch/powerpc/kernel/kgdb.c112
-rw-r--r--arch/powerpc/kernel/kprobes-ftrace.c75
-rw-r--r--arch/powerpc/kernel/kprobes.c575
-rw-r--r--arch/powerpc/kernel/kvm.c89
-rw-r--r--arch/powerpc/kernel/kvm_emul.S30
-rw-r--r--arch/powerpc/kernel/l2cr_6xx.S29
-rw-r--r--arch/powerpc/kernel/legacy_serial.c170
-rw-r--r--arch/powerpc/kernel/machine_kexec.c281
-rw-r--r--arch/powerpc/kernel/mce.c591
-rw-r--r--arch/powerpc/kernel/mce_power.c914
-rw-r--r--arch/powerpc/kernel/misc.S52
-rw-r--r--arch/powerpc/kernel/misc_32.S936
-rw-r--r--arch/powerpc/kernel/misc_64.S373
-rw-r--r--arch/powerpc/kernel/module.c44
-rw-r--r--arch/powerpc/kernel/module_32.c191
-rw-r--r--arch/powerpc/kernel/module_64.c961
-rw-r--r--arch/powerpc/kernel/msi.c34
-rw-r--r--arch/powerpc/kernel/note.S40
-rw-r--r--arch/powerpc/kernel/nvram_64.c802
-rw-r--r--arch/powerpc/kernel/of_platform.c125
-rw-r--r--arch/powerpc/kernel/optprobes.c304
-rw-r--r--arch/powerpc/kernel/optprobes_head.S136
-rw-r--r--arch/powerpc/kernel/paca.c345
-rw-r--r--arch/powerpc/kernel/pci-common.c759
-rw-r--r--arch/powerpc/kernel/pci-hotplug.c130
-rw-r--r--arch/powerpc/kernel/pci_32.c73
-rw-r--r--arch/powerpc/kernel/pci_64.c114
-rw-r--r--arch/powerpc/kernel/pci_dn.c423
-rw-r--r--arch/powerpc/kernel/pci_of_scan.c147
-rw-r--r--arch/powerpc/kernel/pmc.c10
-rw-r--r--arch/powerpc/kernel/ppc_ksyms.c209
-rw-r--r--arch/powerpc/kernel/ppc_save_regs.S70
-rw-r--r--arch/powerpc/kernel/proc_powerpc.c69
-rw-r--r--arch/powerpc/kernel/process.c2123
-rw-r--r--arch/powerpc/kernel/prom.c624
-rw-r--r--arch/powerpc/kernel/prom_entry_64.S87
-rw-r--r--arch/powerpc/kernel/prom_init.c1687
-rw-r--r--arch/powerpc/kernel/prom_init_check.sh73
-rw-r--r--arch/powerpc/kernel/prom_parse.c1
-rw-r--r--arch/powerpc/kernel/ptrace.c1821
-rw-r--r--arch/powerpc/kernel/ptrace/Makefile21
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-adv.c494
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-altivec.c115
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-decl.h183
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-fpu.c58
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-noadv.c298
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-novsx.c64
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-spe.c60
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-tm.c788
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-view.c948
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-vsx.c148
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace.c447
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace32.c (renamed from arch/powerpc/kernel/ptrace32.c)30
-rw-r--r--arch/powerpc/kernel/reloc_32.S8
-rw-r--r--arch/powerpc/kernel/reloc_64.S79
-rw-r--r--arch/powerpc/kernel/rethook.c73
-rw-r--r--arch/powerpc/kernel/rtas-proc.c207
-rw-r--r--arch/powerpc/kernel/rtas-rtc.c14
-rw-r--r--arch/powerpc/kernel/rtas.c2082
-rw-r--r--arch/powerpc/kernel/rtas_entry.S176
-rw-r--r--arch/powerpc/kernel/rtas_flash.c136
-rw-r--r--arch/powerpc/kernel/rtas_pci.c147
-rw-r--r--arch/powerpc/kernel/rtasd.c140
-rw-r--r--arch/powerpc/kernel/secure_boot.c69
-rw-r--r--arch/powerpc/kernel/security.c866
-rw-r--r--arch/powerpc/kernel/secvar-ops.c23
-rw-r--r--arch/powerpc/kernel/secvar-sysfs.c293
-rw-r--r--arch/powerpc/kernel/setup-common.c626
-rw-r--r--arch/powerpc/kernel/setup.h67
-rw-r--r--arch/powerpc/kernel/setup_32.c268
-rw-r--r--arch/powerpc/kernel/setup_64.c956
-rw-r--r--arch/powerpc/kernel/signal.c269
-rw-r--r--arch/powerpc/kernel/signal.h197
-rw-r--r--arch/powerpc/kernel/signal_32.c1313
-rw-r--r--arch/powerpc/kernel/signal_64.c685
-rw-r--r--arch/powerpc/kernel/smp-tbsync.c1
-rw-r--r--arch/powerpc/kernel/smp.c1467
-rw-r--r--arch/powerpc/kernel/stacktrace.c206
-rw-r--r--arch/powerpc/kernel/static_call.c65
-rw-r--r--arch/powerpc/kernel/suspend.c8
-rw-r--r--arch/powerpc/kernel/switch.S257
-rw-r--r--arch/powerpc/kernel/swsusp.c13
-rw-r--r--arch/powerpc/kernel/swsusp_32.S84
-rw-r--r--arch/powerpc/kernel/swsusp_64.c11
-rw-r--r--arch/powerpc/kernel/swsusp_85xx.S (renamed from arch/powerpc/kernel/swsusp_booke.S)1
-rw-r--r--arch/powerpc/kernel/swsusp_asm64.S27
-rw-r--r--arch/powerpc/kernel/sys_ppc32.c123
-rw-r--r--arch/powerpc/kernel/syscall.c189
-rw-r--r--arch/powerpc/kernel/syscalls.c122
-rw-r--r--arch/powerpc/kernel/syscalls/Makefile48
-rw-r--r--arch/powerpc/kernel/syscalls/syscall.tbl562
-rw-r--r--arch/powerpc/kernel/sysfs.c609
-rw-r--r--arch/powerpc/kernel/systbl.S48
-rw-r--r--arch/powerpc/kernel/systbl.c46
-rw-r--r--arch/powerpc/kernel/systbl_chk.c58
-rw-r--r--arch/powerpc/kernel/systbl_chk.sh33
-rw-r--r--arch/powerpc/kernel/tau_6xx.c169
-rw-r--r--arch/powerpc/kernel/time.c1078
-rw-r--r--arch/powerpc/kernel/tm.S353
-rw-r--r--arch/powerpc/kernel/trace/Makefile34
-rw-r--r--arch/powerpc/kernel/trace/ftrace.c678
-rw-r--r--arch/powerpc/kernel/trace/ftrace_64_pg.c832
-rw-r--r--arch/powerpc/kernel/trace/ftrace_64_pg_entry.S132
-rw-r--r--arch/powerpc/kernel/trace/ftrace_entry.S479
-rw-r--r--arch/powerpc/kernel/trace/trace_clock.c13
-rw-r--r--arch/powerpc/kernel/traps.c1551
-rw-r--r--arch/powerpc/kernel/ucall.S14
-rw-r--r--arch/powerpc/kernel/udbg.c37
-rw-r--r--arch/powerpc/kernel/udbg_16550.c84
-rw-r--r--arch/powerpc/kernel/uprobes.c44
-rw-r--r--arch/powerpc/kernel/vdso.c811
-rw-r--r--arch/powerpc/kernel/vdso/.gitignore5
-rw-r--r--arch/powerpc/kernel/vdso/Makefile123
-rw-r--r--arch/powerpc/kernel/vdso/cacheflush.S (renamed from arch/powerpc/kernel/vdso32/cacheflush.S)60
-rw-r--r--arch/powerpc/kernel/vdso/datapage.S (renamed from arch/powerpc/kernel/vdso32/datapage.S)49
-rwxr-xr-xarch/powerpc/kernel/vdso/gen_vdso32_offsets.sh16
-rwxr-xr-xarch/powerpc/kernel/vdso/gen_vdso64_offsets.sh16
-rw-r--r--arch/powerpc/kernel/vdso/getcpu.S50
-rw-r--r--arch/powerpc/kernel/vdso/getrandom.S56
-rw-r--r--arch/powerpc/kernel/vdso/gettimeofday.S115
-rw-r--r--arch/powerpc/kernel/vdso/note.S (renamed from arch/powerpc/kernel/vdso32/note.S)3
-rw-r--r--arch/powerpc/kernel/vdso/sigtramp32.S (renamed from arch/powerpc/kernel/vdso32/sigtramp.S)6
-rw-r--r--arch/powerpc/kernel/vdso/sigtramp64.S (renamed from arch/powerpc/kernel/vdso64/sigtramp.S)28
-rw-r--r--arch/powerpc/kernel/vdso/vdso32.lds.S (renamed from arch/powerpc/kernel/vdso32/vdso32.lds.S)80
-rw-r--r--arch/powerpc/kernel/vdso/vdso64.lds.S (renamed from arch/powerpc/kernel/vdso64/vdso64.lds.S)77
-rw-r--r--arch/powerpc/kernel/vdso/vgetrandom-chacha.S365
-rw-r--r--arch/powerpc/kernel/vdso/vgetrandom.c14
-rw-r--r--arch/powerpc/kernel/vdso/vgettimeofday.c49
-rw-r--r--arch/powerpc/kernel/vdso32/.gitignore2
-rw-r--r--arch/powerpc/kernel/vdso32/Makefile58
-rw-r--r--arch/powerpc/kernel/vdso32/getcpu.S45
-rw-r--r--arch/powerpc/kernel/vdso32/gettimeofday.S298
-rw-r--r--arch/powerpc/kernel/vdso32_wrapper.S (renamed from arch/powerpc/kernel/vdso32/vdso32_wrapper.S)5
-rw-r--r--arch/powerpc/kernel/vdso64/.gitignore2
-rw-r--r--arch/powerpc/kernel/vdso64/Makefile51
-rw-r--r--arch/powerpc/kernel/vdso64/cacheflush.S84
-rw-r--r--arch/powerpc/kernel/vdso64/datapage.S85
-rw-r--r--arch/powerpc/kernel/vdso64/getcpu.S45
-rw-r--r--arch/powerpc/kernel/vdso64/gettimeofday.S244
-rw-r--r--arch/powerpc/kernel/vdso64/note.S1
-rw-r--r--arch/powerpc/kernel/vdso64_wrapper.S (renamed from arch/powerpc/kernel/vdso64/vdso64_wrapper.S)5
-rw-r--r--arch/powerpc/kernel/vecemu.c24
-rw-r--r--arch/powerpc/kernel/vector.S256
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S358
-rw-r--r--arch/powerpc/kernel/watchdog.c590
-rw-r--r--arch/powerpc/kexec/Makefile19
-rw-r--r--arch/powerpc/kexec/core.c215
-rw-r--r--arch/powerpc/kexec/core_32.c (renamed from arch/powerpc/kernel/machine_kexec_32.c)11
-rw-r--r--arch/powerpc/kexec/core_64.c (renamed from arch/powerpc/kernel/machine_kexec_64.c)285
-rw-r--r--arch/powerpc/kexec/crash.c (renamed from arch/powerpc/kernel/crash.c)325
-rw-r--r--arch/powerpc/kexec/elf_64.c164
-rw-r--r--arch/powerpc/kexec/file_load.c109
-rw-r--r--arch/powerpc/kexec/file_load_64.c871
-rw-r--r--arch/powerpc/kexec/ranges.c708
-rw-r--r--arch/powerpc/kexec/relocate_32.S499
-rw-r--r--arch/powerpc/kexec/vmcore_info.c32
-rw-r--r--arch/powerpc/kvm/Kconfig137
-rw-r--r--arch/powerpc/kvm/Makefile69
-rw-r--r--arch/powerpc/kvm/book3s.c722
-rw-r--r--arch/powerpc/kvm/book3s.h31
-rw-r--r--arch/powerpc/kvm/book3s_32_mmu.c34
-rw-r--r--arch/powerpc/kvm/book3s_32_mmu_host.c33
-rw-r--r--arch/powerpc/kvm/book3s_32_sr.S39
-rw-r--r--arch/powerpc/kvm/book3s_64_entry.S429
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu.c51
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_host.c76
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c1617
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_radix.c1476
-rw-r--r--arch/powerpc/kvm/book3s_64_slb.S18
-rw-r--r--arch/powerpc/kvm/book3s_64_vio.c768
-rw-r--r--arch/powerpc/kvm/book3s_64_vio_hv.c105
-rw-r--r--arch/powerpc/kvm/book3s_emulate.c466
-rw-r--r--arch/powerpc/kvm/book3s_exports.c13
-rw-r--r--arch/powerpc/kvm/book3s_hv.c5751
-rw-r--r--arch/powerpc/kvm/book3s_hv.h131
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c651
-rw-r--r--arch/powerpc/kvm/book3s_hv_hmi.c50
-rw-r--r--arch/powerpc/kvm/book3s_hv_interrupts.S173
-rw-r--r--arch/powerpc/kvm/book3s_hv_nested.c1714
-rw-r--r--arch/powerpc/kvm/book3s_hv_nestedv2.c1072
-rw-r--r--arch/powerpc/kvm/book3s_hv_p9_entry.c930
-rw-r--r--arch/powerpc/kvm/book3s_hv_p9_perf.c219
-rw-r--r--arch/powerpc/kvm/book3s_hv_ras.c296
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c912
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_xics.c647
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S2881
-rw-r--r--arch/powerpc/kvm/book3s_hv_tm.c248
-rw-r--r--arch/powerpc/kvm/book3s_hv_tm_builtin.c119
-rw-r--r--arch/powerpc/kvm/book3s_hv_uvmem.c1222
-rw-r--r--arch/powerpc/kvm/book3s_interrupts.S76
-rw-r--r--arch/powerpc/kvm/book3s_mmu_hpte.c23
-rw-r--r--arch/powerpc/kvm/book3s_paired_singles.c100
-rw-r--r--arch/powerpc/kvm/book3s_pr.c1017
-rw-r--r--arch/powerpc/kvm/book3s_pr_papr.c155
-rw-r--r--arch/powerpc/kvm/book3s_rmhandlers.S24
-rw-r--r--arch/powerpc/kvm/book3s_rtas.c69
-rw-r--r--arch/powerpc/kvm/book3s_segment.S65
-rw-r--r--arch/powerpc/kvm/book3s_xics.c555
-rw-r--r--arch/powerpc/kvm/book3s_xics.h37
-rw-r--r--arch/powerpc/kvm/book3s_xive.c2980
-rw-r--r--arch/powerpc/kvm/book3s_xive.h313
-rw-r--r--arch/powerpc/kvm/book3s_xive_native.c1284
-rw-r--r--arch/powerpc/kvm/booke.c706
-rw-r--r--arch/powerpc/kvm/booke.h66
-rw-r--r--arch/powerpc/kvm/booke_emulate.c182
-rw-r--r--arch/powerpc/kvm/booke_interrupts.S26
-rw-r--r--arch/powerpc/kvm/bookehv_interrupts.S59
-rw-r--r--arch/powerpc/kvm/e500.c72
-rw-r--r--arch/powerpc/kvm/e500.h31
-rw-r--r--arch/powerpc/kvm/e500_emulate.c72
-rw-r--r--arch/powerpc/kvm/e500_mmu.c88
-rw-r--r--arch/powerpc/kvm/e500_mmu_host.c320
-rw-r--r--arch/powerpc/kvm/e500_mmu_host.h5
-rw-r--r--arch/powerpc/kvm/e500mc.c120
-rw-r--r--arch/powerpc/kvm/emulate.c67
-rw-r--r--arch/powerpc/kvm/emulate_loadstore.c507
-rw-r--r--arch/powerpc/kvm/fpu.S26
-rw-r--r--arch/powerpc/kvm/guest-state-buffer.c660
-rw-r--r--arch/powerpc/kvm/irq.h20
-rw-r--r--arch/powerpc/kvm/mpic.c23
-rw-r--r--arch/powerpc/kvm/powerpc.c1764
-rw-r--r--arch/powerpc/kvm/test-guest-state-buffer.c543
-rw-r--r--arch/powerpc/kvm/timing.c46
-rw-r--r--arch/powerpc/kvm/timing.h30
-rw-r--r--arch/powerpc/kvm/tm.S398
-rw-r--r--arch/powerpc/kvm/trace.h9
-rw-r--r--arch/powerpc/kvm/trace_book3s.h33
-rw-r--r--arch/powerpc/kvm/trace_booke.h72
-rw-r--r--arch/powerpc/kvm/trace_hv.h554
-rw-r--r--arch/powerpc/kvm/trace_pr.h50
-rw-r--r--arch/powerpc/lib/Makefile87
-rw-r--r--arch/powerpc/lib/alloc.c21
-rw-r--r--arch/powerpc/lib/checksum_32.S450
-rw-r--r--arch/powerpc/lib/checksum_64.S163
-rw-r--r--arch/powerpc/lib/checksum_wrappers.c39
-rw-r--r--arch/powerpc/lib/checksum_wrappers_64.c102
-rw-r--r--arch/powerpc/lib/code-patching.c971
-rw-r--r--arch/powerpc/lib/copy_32.S185
-rw-r--r--arch/powerpc/lib/copy_mc_64.S242
-rw-r--r--arch/powerpc/lib/copypage_64.S32
-rw-r--r--arch/powerpc/lib/copypage_power7.S65
-rw-r--r--arch/powerpc/lib/copyuser_64.S635
-rw-r--r--arch/powerpc/lib/copyuser_power7.S308
-rw-r--r--arch/powerpc/lib/crtsavres.S102
-rw-r--r--arch/powerpc/lib/devres.c43
-rw-r--r--arch/powerpc/lib/div64.S6
-rw-r--r--arch/powerpc/lib/error-inject.c16
-rw-r--r--arch/powerpc/lib/feature-fixups-test.S119
-rw-r--r--arch/powerpc/lib/feature-fixups.c916
-rw-r--r--arch/powerpc/lib/hweight_64.S28
-rw-r--r--arch/powerpc/lib/ldstfp.S322
-rw-r--r--arch/powerpc/lib/locks.c40
-rw-r--r--arch/powerpc/lib/mem_64.S43
-rw-r--r--arch/powerpc/lib/memcmp_32.S45
-rw-r--r--arch/powerpc/lib/memcmp_64.S638
-rw-r--r--arch/powerpc/lib/memcpy_64.S23
-rw-r--r--arch/powerpc/lib/memcpy_power7.S283
-rw-r--r--arch/powerpc/lib/pmem.c87
-rw-r--r--arch/powerpc/lib/qspinlock.c998
-rw-r--r--arch/powerpc/lib/quad.S58
-rw-r--r--arch/powerpc/lib/restart_table.c56
-rw-r--r--arch/powerpc/lib/rheap.c6
-rw-r--r--arch/powerpc/lib/sstep.c3642
-rw-r--r--arch/powerpc/lib/string.S117
-rw-r--r--arch/powerpc/lib/string_32.S90
-rw-r--r--arch/powerpc/lib/string_64.S51
-rw-r--r--arch/powerpc/lib/strlen_32.S78
-rw-r--r--arch/powerpc/lib/test-code-patching.c495
-rw-r--r--arch/powerpc/lib/test_emulate_step.c1741
-rw-r--r--arch/powerpc/lib/test_emulate_step_exec_instr.S150
-rw-r--r--arch/powerpc/lib/usercopy_64.c41
-rw-r--r--arch/powerpc/lib/vmx-helper.c41
-rw-r--r--arch/powerpc/lib/xor_vmx.c81
-rw-r--r--arch/powerpc/lib/xor_vmx.h22
-rw-r--r--arch/powerpc/lib/xor_vmx_glue.c63
-rw-r--r--arch/powerpc/math-emu/Makefile8
-rw-r--r--arch/powerpc/math-emu/fabs.c3
-rw-r--r--arch/powerpc/math-emu/fadd.c3
-rw-r--r--arch/powerpc/math-emu/fadds.c3
-rw-r--r--arch/powerpc/math-emu/fcmpo.c3
-rw-r--r--arch/powerpc/math-emu/fcmpu.c3
-rw-r--r--arch/powerpc/math-emu/fctiw.c3
-rw-r--r--arch/powerpc/math-emu/fctiwz.c3
-rw-r--r--arch/powerpc/math-emu/fdiv.c3
-rw-r--r--arch/powerpc/math-emu/fdivs.c3
-rw-r--r--arch/powerpc/math-emu/fmadd.c3
-rw-r--r--arch/powerpc/math-emu/fmadds.c3
-rw-r--r--arch/powerpc/math-emu/fmr.c3
-rw-r--r--arch/powerpc/math-emu/fmsub.c3
-rw-r--r--arch/powerpc/math-emu/fmsubs.c3
-rw-r--r--arch/powerpc/math-emu/fmul.c3
-rw-r--r--arch/powerpc/math-emu/fmuls.c3
-rw-r--r--arch/powerpc/math-emu/fnabs.c3
-rw-r--r--arch/powerpc/math-emu/fneg.c3
-rw-r--r--arch/powerpc/math-emu/fnmadd.c3
-rw-r--r--arch/powerpc/math-emu/fnmadds.c3
-rw-r--r--arch/powerpc/math-emu/fnmsub.c3
-rw-r--r--arch/powerpc/math-emu/fnmsubs.c3
-rw-r--r--arch/powerpc/math-emu/fre.c3
-rw-r--r--arch/powerpc/math-emu/fres.c3
-rw-r--r--arch/powerpc/math-emu/frsp.c3
-rw-r--r--arch/powerpc/math-emu/frsqrte.c3
-rw-r--r--arch/powerpc/math-emu/frsqrtes.c3
-rw-r--r--arch/powerpc/math-emu/fsel.c3
-rw-r--r--arch/powerpc/math-emu/fsqrt.c3
-rw-r--r--arch/powerpc/math-emu/fsqrts.c3
-rw-r--r--arch/powerpc/math-emu/fsub.c3
-rw-r--r--arch/powerpc/math-emu/fsubs.c3
-rw-r--r--arch/powerpc/math-emu/lfd.c3
-rw-r--r--arch/powerpc/math-emu/lfs.c3
-rw-r--r--arch/powerpc/math-emu/math.c27
-rw-r--r--arch/powerpc/math-emu/math_efp.c70
-rw-r--r--arch/powerpc/math-emu/mcrfs.c3
-rw-r--r--arch/powerpc/math-emu/mffs.c3
-rw-r--r--arch/powerpc/math-emu/mtfsb0.c3
-rw-r--r--arch/powerpc/math-emu/mtfsb1.c3
-rw-r--r--arch/powerpc/math-emu/mtfsf.c3
-rw-r--r--arch/powerpc/math-emu/mtfsfi.c3
-rw-r--r--arch/powerpc/math-emu/stfd.c3
-rw-r--r--arch/powerpc/math-emu/stfiwx.c3
-rw-r--r--arch/powerpc/math-emu/stfs.c3
-rw-r--r--arch/powerpc/math-emu/udivmodti4.c1
-rw-r--r--arch/powerpc/mm/40x_mmu.c159
-rw-r--r--arch/powerpc/mm/Makefile45
-rw-r--r--arch/powerpc/mm/book3s32/Makefile12
-rw-r--r--arch/powerpc/mm/book3s32/hash_low.S (renamed from arch/powerpc/mm/hash_low_32.S)358
-rw-r--r--arch/powerpc/mm/book3s32/kuap.c22
-rw-r--r--arch/powerpc/mm/book3s32/mmu.c446
-rw-r--r--arch/powerpc/mm/book3s32/mmu_context.c (renamed from arch/powerpc/mm/mmu_context_hash32.c)55
-rw-r--r--arch/powerpc/mm/book3s32/nohash_low.S80
-rw-r--r--arch/powerpc/mm/book3s32/tlb.c107
-rw-r--r--arch/powerpc/mm/book3s64/Makefile33
-rw-r--r--arch/powerpc/mm/book3s64/hash_4k.c129
-rw-r--r--arch/powerpc/mm/book3s64/hash_64k.c343
-rw-r--r--arch/powerpc/mm/book3s64/hash_hugepage.c188
-rw-r--r--arch/powerpc/mm/book3s64/hash_native.c (renamed from arch/powerpc/mm/hash_native_64.c)445
-rw-r--r--arch/powerpc/mm/book3s64/hash_pgtable.c563
-rw-r--r--arch/powerpc/mm/book3s64/hash_tlb.c (renamed from arch/powerpc/mm/tlb_hash64.c)84
-rw-r--r--arch/powerpc/mm/book3s64/hash_utils.c2465
-rw-r--r--arch/powerpc/mm/book3s64/hugetlbpage.c177
-rw-r--r--arch/powerpc/mm/book3s64/internal.h31
-rw-r--r--arch/powerpc/mm/book3s64/iommu_api.c402
-rw-r--r--arch/powerpc/mm/book3s64/mmu_context.c349
-rw-r--r--arch/powerpc/mm/book3s64/pgtable.c664
-rw-r--r--arch/powerpc/mm/book3s64/pkeys.c471
-rw-r--r--arch/powerpc/mm/book3s64/radix_hugetlbpage.c63
-rw-r--r--arch/powerpc/mm/book3s64/radix_pgtable.c1694
-rw-r--r--arch/powerpc/mm/book3s64/radix_tlb.c1587
-rw-r--r--arch/powerpc/mm/book3s64/slb.c870
-rw-r--r--arch/powerpc/mm/book3s64/slice.c819
-rw-r--r--arch/powerpc/mm/book3s64/subpage_prot.c (renamed from arch/powerpc/mm/subpage-prot.c)122
-rw-r--r--arch/powerpc/mm/book3s64/trace.c7
-rw-r--r--arch/powerpc/mm/cacheflush.c221
-rw-r--r--arch/powerpc/mm/copro_fault.c136
-rw-r--r--arch/powerpc/mm/dma-noncoherent.c348
-rw-r--r--arch/powerpc/mm/drmem.c514
-rw-r--r--arch/powerpc/mm/fault.c944
-rw-r--r--arch/powerpc/mm/gup.c235
-rw-r--r--arch/powerpc/mm/hash_low_64.S1002
-rw-r--r--arch/powerpc/mm/hash_utils_64.c1465
-rw-r--r--arch/powerpc/mm/highmem.c86
-rw-r--r--arch/powerpc/mm/hugepage-hash64.c245
-rw-r--r--arch/powerpc/mm/hugetlbpage-hash64.c129
-rw-r--r--arch/powerpc/mm/hugetlbpage.c1048
-rw-r--r--arch/powerpc/mm/icswx.c292
-rw-r--r--arch/powerpc/mm/icswx.h68
-rw-r--r--arch/powerpc/mm/icswx_pid.c87
-rw-r--r--arch/powerpc/mm/init-common.c167
-rw-r--r--arch/powerpc/mm/init_32.c118
-rw-r--r--arch/powerpc/mm/init_64.c719
-rw-r--r--arch/powerpc/mm/ioremap.c63
-rw-r--r--arch/powerpc/mm/ioremap_32.c92
-rw-r--r--arch/powerpc/mm/ioremap_64.c57
-rw-r--r--arch/powerpc/mm/kasan/8xx.c78
-rw-r--r--arch/powerpc/mm/kasan/Makefile10
-rw-r--r--arch/powerpc/mm/kasan/book3s_32.c60
-rw-r--r--arch/powerpc/mm/kasan/init_32.c192
-rw-r--r--arch/powerpc/mm/kasan/init_book3e_64.c133
-rw-r--r--arch/powerpc/mm/kasan/init_book3s_64.c100
-rw-r--r--arch/powerpc/mm/maccess.c13
-rw-r--r--arch/powerpc/mm/mem.c685
-rw-r--r--arch/powerpc/mm/mmap.c99
-rw-r--r--arch/powerpc/mm/mmu_context.c117
-rw-r--r--arch/powerpc/mm/mmu_context_hash64.c146
-rw-r--r--arch/powerpc/mm/mmu_decl.h141
-rw-r--r--arch/powerpc/mm/nohash/44x.c (renamed from arch/powerpc/mm/44x_mmu.c)32
-rw-r--r--arch/powerpc/mm/nohash/8xx.c224
-rw-r--r--arch/powerpc/mm/nohash/Makefile16
-rw-r--r--arch/powerpc/mm/nohash/book3e_pgtable.c132
-rw-r--r--arch/powerpc/mm/nohash/e500.c (renamed from arch/powerpc/mm/fsl_booke_mmu.c)151
-rw-r--r--arch/powerpc/mm/nohash/e500_hugetlbpage.c (renamed from arch/powerpc/mm/hugetlbpage-book3e.c)130
-rw-r--r--arch/powerpc/mm/nohash/kaslr_booke.c395
-rw-r--r--arch/powerpc/mm/nohash/kup.c27
-rw-r--r--arch/powerpc/mm/nohash/mmu_context.c (renamed from arch/powerpc/mm/mmu_context_nohash.c)335
-rw-r--r--arch/powerpc/mm/nohash/tlb.c341
-rw-r--r--arch/powerpc/mm/nohash/tlb_64e.c314
-rw-r--r--arch/powerpc/mm/nohash/tlb_low.S (renamed from arch/powerpc/mm/tlb_nohash_low.S)171
-rw-r--r--arch/powerpc/mm/nohash/tlb_low_64e.S (renamed from arch/powerpc/mm/tlb_low_64e.S)671
-rw-r--r--arch/powerpc/mm/numa.c1797
-rw-r--r--arch/powerpc/mm/pageattr.c127
-rw-r--r--arch/powerpc/mm/pgtable-frag.c141
-rw-r--r--arch/powerpc/mm/pgtable.c449
-rw-r--r--arch/powerpc/mm/pgtable_32.c436
-rw-r--r--arch/powerpc/mm/pgtable_64.c916
-rw-r--r--arch/powerpc/mm/ppc_mmu_32.c288
-rw-r--r--arch/powerpc/mm/ptdump/8xx.c89
-rw-r--r--arch/powerpc/mm/ptdump/Makefile14
-rw-r--r--arch/powerpc/mm/ptdump/bats.c99
-rw-r--r--arch/powerpc/mm/ptdump/book3s64.c122
-rw-r--r--arch/powerpc/mm/ptdump/hashpagetable.c543
-rw-r--r--arch/powerpc/mm/ptdump/ptdump.c424
-rw-r--r--arch/powerpc/mm/ptdump/ptdump.h22
-rw-r--r--arch/powerpc/mm/ptdump/segment_regs.c52
-rw-r--r--arch/powerpc/mm/ptdump/shared.c87
-rw-r--r--arch/powerpc/mm/slb.c335
-rw-r--r--arch/powerpc/mm/slb_low.S321
-rw-r--r--arch/powerpc/mm/slice.c731
-rw-r--r--arch/powerpc/mm/tlb_hash32.c184
-rw-r--r--arch/powerpc/mm/tlb_nohash.c753
-rw-r--r--arch/powerpc/net/Makefile3
-rw-r--r--arch/powerpc/net/bpf_jit.h336
-rw-r--r--arch/powerpc/net/bpf_jit_64.S229
-rw-r--r--arch/powerpc/net/bpf_jit_comp.c1776
-rw-r--r--arch/powerpc/net/bpf_jit_comp32.c1388
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c1630
-rw-r--r--arch/powerpc/oprofile/Makefile19
-rw-r--r--arch/powerpc/oprofile/backtrace.c127
-rw-r--r--arch/powerpc/oprofile/cell/pr_util.h114
-rw-r--r--arch/powerpc/oprofile/cell/spu_profiler.c252
-rw-r--r--arch/powerpc/oprofile/cell/spu_task_sync.c660
-rw-r--r--arch/powerpc/oprofile/cell/vma_map.c283
-rw-r--r--arch/powerpc/oprofile/common.c247
-rw-r--r--arch/powerpc/oprofile/op_model_7450.c211
-rw-r--r--arch/powerpc/oprofile/op_model_cell.c1717
-rw-r--r--arch/powerpc/oprofile/op_model_fsl_emb.c384
-rw-r--r--arch/powerpc/oprofile/op_model_pa6t.c239
-rw-r--r--arch/powerpc/oprofile/op_model_power4.c442
-rw-r--r--arch/powerpc/perf/8xx-pmu.c197
-rw-r--r--arch/powerpc/perf/Makefile21
-rw-r--r--arch/powerpc/perf/bhrb.S8
-rw-r--r--arch/powerpc/perf/callchain.c412
-rw-r--r--arch/powerpc/perf/callchain.h35
-rw-r--r--arch/powerpc/perf/callchain_32.c178
-rw-r--r--arch/powerpc/perf/callchain_64.c120
-rw-r--r--arch/powerpc/perf/core-book3s.c838
-rw-r--r--arch/powerpc/perf/core-fsl-emb.c63
-rw-r--r--arch/powerpc/perf/e500-pmu.c15
-rw-r--r--arch/powerpc/perf/e6500-pmu.c11
-rw-r--r--arch/powerpc/perf/generic-compat-pmu.c342
-rw-r--r--arch/powerpc/perf/hv-24x7-catalog.h26
-rw-r--r--arch/powerpc/perf/hv-24x7-domains.h29
-rw-r--r--arch/powerpc/perf/hv-24x7.c1590
-rw-r--r--arch/powerpc/perf/hv-24x7.h81
-rw-r--r--arch/powerpc/perf/hv-common.c11
-rw-r--r--arch/powerpc/perf/hv-common.h11
-rw-r--r--arch/powerpc/perf/hv-gpci-requests.h266
-rw-r--r--arch/powerpc/perf/hv-gpci.c845
-rw-r--r--arch/powerpc/perf/hv-gpci.h66
-rw-r--r--arch/powerpc/perf/imc-pmu.c1878
-rw-r--r--arch/powerpc/perf/internal.h14
-rw-r--r--arch/powerpc/perf/isa207-common.c852
-rw-r--r--arch/powerpc/perf/isa207-common.h293
-rw-r--r--arch/powerpc/perf/kvm-hv-pmu.c435
-rw-r--r--arch/powerpc/perf/mpc7450-pmu.c39
-rw-r--r--arch/powerpc/perf/perf_regs.c149
-rw-r--r--arch/powerpc/perf/power10-events-list.h79
-rw-r--r--arch/powerpc/perf/power10-pmu.c664
-rw-r--r--arch/powerpc/perf/power4-pmu.c622
-rw-r--r--arch/powerpc/perf/power5+-pmu.c40
-rw-r--r--arch/powerpc/perf/power5-pmu.c39
-rw-r--r--arch/powerpc/perf/power6-pmu.c84
-rw-r--r--arch/powerpc/perf/power7-events-list.h6
-rw-r--r--arch/powerpc/perf/power7-pmu.c64
-rw-r--r--arch/powerpc/perf/power8-events-list.h93
-rw-r--r--arch/powerpc/perf/power8-pmu.c683
-rw-r--r--arch/powerpc/perf/power9-events-list.h117
-rw-r--r--arch/powerpc/perf/power9-pmu.c495
-rw-r--r--arch/powerpc/perf/ppc970-pmu.c48
-rw-r--r--arch/powerpc/perf/req-gen/_begin.h16
-rw-r--r--arch/powerpc/perf/req-gen/_clear.h6
-rw-r--r--arch/powerpc/perf/req-gen/_end.h4
-rw-r--r--arch/powerpc/perf/req-gen/_request-begin.h16
-rw-r--r--arch/powerpc/perf/req-gen/_request-end.h9
-rw-r--r--arch/powerpc/perf/req-gen/perf.h177
-rw-r--r--arch/powerpc/perf/vpa-dtl.c596
-rw-r--r--arch/powerpc/perf/vpa-pmu.c204
-rw-r--r--arch/powerpc/platforms/40x/Kconfig161
-rw-r--r--arch/powerpc/platforms/40x/Makefile4
-rw-r--r--arch/powerpc/platforms/40x/ep405.c125
-rw-r--r--arch/powerpc/platforms/40x/ppc40x_simple.c82
-rw-r--r--arch/powerpc/platforms/40x/virtex.c56
-rw-r--r--arch/powerpc/platforms/40x/walnut.c67
-rw-r--r--arch/powerpc/platforms/44x/44x.h1
-rw-r--r--arch/powerpc/platforms/44x/Kconfig197
-rw-r--r--arch/powerpc/platforms/44x/Makefile12
-rw-r--r--arch/powerpc/platforms/44x/canyonlands.c31
-rw-r--r--arch/powerpc/platforms/44x/cpm.c (renamed from arch/powerpc/sysdev/ppc4xx_cpm.c)44
-rw-r--r--arch/powerpc/platforms/44x/ebony.c15
-rw-r--r--arch/powerpc/platforms/44x/fsp2.c316
-rw-r--r--arch/powerpc/platforms/44x/fsp2.h272
-rw-r--r--arch/powerpc/platforms/44x/gpio.c (renamed from arch/powerpc/sysdev/ppc4xx_gpio.c)145
-rw-r--r--arch/powerpc/platforms/44x/hsta_msi.c (renamed from arch/powerpc/sysdev/ppc4xx_hsta_msi.c)59
-rw-r--r--arch/powerpc/platforms/44x/idle.c17
-rw-r--r--arch/powerpc/platforms/44x/iss4xx.c26
-rw-r--r--arch/powerpc/platforms/44x/machine_check.c102
-rw-r--r--arch/powerpc/platforms/44x/misc_44x.S7
-rw-r--r--arch/powerpc/platforms/44x/pci.c (renamed from arch/powerpc/sysdev/ppc4xx_pci.c)282
-rw-r--r--arch/powerpc/platforms/44x/pci.h (renamed from arch/powerpc/sysdev/ppc4xx_pci.h)0
-rw-r--r--arch/powerpc/platforms/44x/ppc44x_simple.c12
-rw-r--r--arch/powerpc/platforms/44x/ppc476.c69
-rw-r--r--arch/powerpc/platforms/44x/sam440ep.c18
-rw-r--r--arch/powerpc/platforms/44x/soc.c (renamed from arch/powerpc/sysdev/ppc4xx_soc.c)20
-rw-r--r--arch/powerpc/platforms/44x/uic.c (renamed from arch/powerpc/sysdev/uic.c)49
-rw-r--r--arch/powerpc/platforms/44x/virtex.c62
-rw-r--r--arch/powerpc/platforms/44x/virtex_ml510.c29
-rw-r--r--arch/powerpc/platforms/44x/warp.c182
-rw-r--r--arch/powerpc/platforms/512x/Kconfig16
-rw-r--r--arch/powerpc/platforms/512x/Makefile2
-rw-r--r--arch/powerpc/platforms/512x/clock-commonclk.c99
-rw-r--r--arch/powerpc/platforms/512x/mpc5121_ads.c30
-rw-r--r--arch/powerpc/platforms/512x/mpc5121_ads.h6
-rw-r--r--arch/powerpc/platforms/512x/mpc5121_ads_cpld.c45
-rw-r--r--arch/powerpc/platforms/512x/mpc512x.h11
-rw-r--r--arch/powerpc/platforms/512x/mpc512x_generic.c17
-rw-r--r--arch/powerpc/platforms/512x/mpc512x_lpbfifo.c516
-rw-r--r--arch/powerpc/platforms/512x/mpc512x_shared.c69
-rw-r--r--arch/powerpc/platforms/512x/pdm360ng.c19
-rw-r--r--arch/powerpc/platforms/52xx/Kconfig13
-rw-r--r--arch/powerpc/platforms/52xx/Makefile5
-rw-r--r--arch/powerpc/platforms/52xx/efika.c33
-rw-r--r--arch/powerpc/platforms/52xx/lite5200.c24
-rw-r--r--arch/powerpc/platforms/52xx/lite5200_pm.c14
-rw-r--r--arch/powerpc/platforms/52xx/lite5200_sleep.S28
-rw-r--r--arch/powerpc/platforms/52xx/media5200.c50
-rw-r--r--arch/powerpc/platforms/52xx/mpc5200_simple.c22
-rw-r--r--arch/powerpc/platforms/52xx/mpc52xx_common.c62
-rw-r--r--arch/powerpc/platforms/52xx/mpc52xx_gpt.c152
-rw-r--r--arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c581
-rw-r--r--arch/powerpc/platforms/52xx/mpc52xx_pci.c39
-rw-r--r--arch/powerpc/platforms/52xx/mpc52xx_pic.c19
-rw-r--r--arch/powerpc/platforms/52xx/mpc52xx_pm.c10
-rw-r--r--arch/powerpc/platforms/52xx/mpc52xx_sleep.S1
-rw-r--r--arch/powerpc/platforms/82xx/Kconfig62
-rw-r--r--arch/powerpc/platforms/82xx/Makefile4
-rw-r--r--arch/powerpc/platforms/82xx/ep8248e.c48
-rw-r--r--arch/powerpc/platforms/82xx/km82xx.c28
-rw-r--r--arch/powerpc/platforms/82xx/m82xx_pci.h17
-rw-r--r--arch/powerpc/platforms/82xx/mpc8272_ads.c218
-rw-r--r--arch/powerpc/platforms/82xx/pq2.c57
-rw-r--r--arch/powerpc/platforms/82xx/pq2.h3
-rw-r--r--arch/powerpc/platforms/82xx/pq2ads-pci-pic.c180
-rw-r--r--arch/powerpc/platforms/82xx/pq2ads.h44
-rw-r--r--arch/powerpc/platforms/82xx/pq2fads.c196
-rw-r--r--arch/powerpc/platforms/83xx/Kconfig47
-rw-r--r--arch/powerpc/platforms/83xx/Makefile11
-rw-r--r--arch/powerpc/platforms/83xx/asp834x.c23
-rw-r--r--arch/powerpc/platforms/83xx/km83xx.c41
-rw-r--r--arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c66
-rw-r--r--arch/powerpc/platforms/83xx/misc.c67
-rw-r--r--arch/powerpc/platforms/83xx/mpc830x_rdb.c23
-rw-r--r--arch/powerpc/platforms/83xx/mpc831x_rdb.c23
-rw-r--r--arch/powerpc/platforms/83xx/mpc832x_mds.c122
-rw-r--r--arch/powerpc/platforms/83xx/mpc832x_rdb.c53
-rw-r--r--arch/powerpc/platforms/83xx/mpc834x_itx.c28
-rw-r--r--arch/powerpc/platforms/83xx/mpc834x_mds.c109
-rw-r--r--arch/powerpc/platforms/83xx/mpc836x_mds.c229
-rw-r--r--arch/powerpc/platforms/83xx/mpc836x_rdk.c34
-rw-r--r--arch/powerpc/platforms/83xx/mpc837x_mds.c111
-rw-r--r--arch/powerpc/platforms/83xx/mpc837x_rdb.c25
-rw-r--r--arch/powerpc/platforms/83xx/mpc83xx.h21
-rw-r--r--arch/powerpc/platforms/83xx/sbc834x.c78
-rw-r--r--arch/powerpc/platforms/83xx/suspend-asm.S46
-rw-r--r--arch/powerpc/platforms/83xx/suspend.c91
-rw-r--r--arch/powerpc/platforms/83xx/usb.c254
-rw-r--r--arch/powerpc/platforms/83xx/usb_831x.c128
-rw-r--r--arch/powerpc/platforms/83xx/usb_834x.c90
-rw-r--r--arch/powerpc/platforms/83xx/usb_837x.c58
-rw-r--r--arch/powerpc/platforms/85xx/Kconfig106
-rw-r--r--arch/powerpc/platforms/85xx/Makefile14
-rw-r--r--arch/powerpc/platforms/85xx/bsc913x_qds.c33
-rw-r--r--arch/powerpc/platforms/85xx/bsc913x_rdb.c25
-rw-r--r--arch/powerpc/platforms/85xx/c293pcie.c31
-rw-r--r--arch/powerpc/platforms/85xx/common.c42
-rw-r--r--arch/powerpc/platforms/85xx/corenet_generic.c78
-rw-r--r--arch/powerpc/platforms/85xx/ge_imp3a.c39
-rw-r--r--arch/powerpc/platforms/85xx/ksi8560.c21
-rw-r--r--arch/powerpc/platforms/85xx/mpc8536_ds.c27
-rw-r--r--arch/powerpc/platforms/85xx/mpc85xx.h9
-rw-r--r--arch/powerpc/platforms/85xx/mpc85xx_8259.c64
-rw-r--r--arch/powerpc/platforms/85xx/mpc85xx_ads.c193
-rw-r--r--arch/powerpc/platforms/85xx/mpc85xx_cds.c394
-rw-r--r--arch/powerpc/platforms/85xx/mpc85xx_ds.c182
-rw-r--r--arch/powerpc/platforms/85xx/mpc85xx_mds.c101
-rw-r--r--arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c107
-rw-r--r--arch/powerpc/platforms/85xx/mpc85xx_rdb.c213
-rw-r--r--arch/powerpc/platforms/85xx/mvme2500.c57
-rw-r--r--arch/powerpc/platforms/85xx/p1010rdb.c20
-rw-r--r--arch/powerpc/platforms/85xx/p1022_ds.c35
-rw-r--r--arch/powerpc/platforms/85xx/p1022_rdk.c31
-rw-r--r--arch/powerpc/platforms/85xx/p1023_rdb.c31
-rw-r--r--arch/powerpc/platforms/85xx/p2020.c81
-rw-r--r--arch/powerpc/platforms/85xx/ppa8548.c22
-rw-r--r--arch/powerpc/platforms/85xx/qemu_e500.c26
-rw-r--r--arch/powerpc/platforms/85xx/sbc8548.c142
-rw-r--r--arch/powerpc/platforms/85xx/sgy_cts1000.c136
-rw-r--r--arch/powerpc/platforms/85xx/smp.c440
-rw-r--r--arch/powerpc/platforms/85xx/smp.h2
-rw-r--r--arch/powerpc/platforms/85xx/socrates.c26
-rw-r--r--arch/powerpc/platforms/85xx/socrates_fpga_pic.c32
-rw-r--r--arch/powerpc/platforms/85xx/socrates_fpga_pic.h9
-rw-r--r--arch/powerpc/platforms/85xx/stx_gp3.c23
-rw-r--r--arch/powerpc/platforms/85xx/t1042rdb_diu.c153
-rw-r--r--arch/powerpc/platforms/85xx/tqm85xx.c21
-rw-r--r--arch/powerpc/platforms/85xx/twr_p102x.c49
-rw-r--r--arch/powerpc/platforms/85xx/xes_mpc85xx.c55
-rw-r--r--arch/powerpc/platforms/86xx/Kconfig54
-rw-r--r--arch/powerpc/platforms/86xx/Makefile7
-rw-r--r--arch/powerpc/platforms/86xx/common.c43
-rw-r--r--arch/powerpc/platforms/86xx/gef_ppc9a.c64
-rw-r--r--arch/powerpc/platforms/86xx/gef_sbc310.c64
-rw-r--r--arch/powerpc/platforms/86xx/gef_sbc610.c64
-rw-r--r--arch/powerpc/platforms/86xx/mpc8610_hpcd.c359
-rw-r--r--arch/powerpc/platforms/86xx/mpc86xx.h8
-rw-r--r--arch/powerpc/platforms/86xx/mpc86xx_hpcn.c161
-rw-r--r--arch/powerpc/platforms/86xx/mpc86xx_smp.c15
-rw-r--r--arch/powerpc/platforms/86xx/mvme7100.c114
-rw-r--r--arch/powerpc/platforms/86xx/pic.c16
-rw-r--r--arch/powerpc/platforms/86xx/sbc8641d.c124
-rw-r--r--arch/powerpc/platforms/8xx/Kconfig80
-rw-r--r--arch/powerpc/platforms/8xx/Makefile5
-rw-r--r--arch/powerpc/platforms/8xx/adder875.c25
-rw-r--r--arch/powerpc/platforms/8xx/cpm1-ic.c188
-rw-r--r--arch/powerpc/platforms/8xx/cpm1.c (renamed from arch/powerpc/sysdev/cpm1.c)381
-rw-r--r--arch/powerpc/platforms/8xx/ep88xc.c15
-rw-r--r--arch/powerpc/platforms/8xx/m8xx_setup.c144
-rw-r--r--arch/powerpc/platforms/8xx/machine_check.c34
-rw-r--r--arch/powerpc/platforms/8xx/micropatch.c388
-rw-r--r--arch/powerpc/platforms/8xx/mpc86xads_setup.c14
-rw-r--r--arch/powerpc/platforms/8xx/mpc885ads_setup.c18
-rw-r--r--arch/powerpc/platforms/8xx/mpc8xx.h3
-rw-r--r--arch/powerpc/platforms/8xx/pic.c (renamed from arch/powerpc/sysdev/mpc8xx_pic.c)35
-rw-r--r--arch/powerpc/platforms/8xx/pic.h (renamed from arch/powerpc/sysdev/mpc8xx_pic.h)2
-rw-r--r--arch/powerpc/platforms/8xx/tqm8xx_setup.c20
-rw-r--r--arch/powerpc/platforms/Kconfig169
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype526
-rw-r--r--arch/powerpc/platforms/Makefile7
-rw-r--r--arch/powerpc/platforms/amigaone/Kconfig5
-rw-r--r--arch/powerpc/platforms/amigaone/Makefile1
-rw-r--r--arch/powerpc/platforms/amigaone/setup.c58
-rw-r--r--arch/powerpc/platforms/book3s/Kconfig15
-rw-r--r--arch/powerpc/platforms/book3s/Makefile2
-rw-r--r--arch/powerpc/platforms/book3s/vas-api.c673
-rw-r--r--arch/powerpc/platforms/cell/Kconfig123
-rw-r--r--arch/powerpc/platforms/cell/Makefile46
-rw-r--r--arch/powerpc/platforms/cell/axon_msi.c502
-rw-r--r--arch/powerpc/platforms/cell/beat.c264
-rw-r--r--arch/powerpc/platforms/cell/beat.h39
-rw-r--r--arch/powerpc/platforms/cell/beat_htab.c445
-rw-r--r--arch/powerpc/platforms/cell/beat_hvCall.S285
-rw-r--r--arch/powerpc/platforms/cell/beat_interrupt.c253
-rw-r--r--arch/powerpc/platforms/cell/beat_interrupt.h30
-rw-r--r--arch/powerpc/platforms/cell/beat_iommu.c115
-rw-r--r--arch/powerpc/platforms/cell/beat_spu_priv1.c205
-rw-r--r--arch/powerpc/platforms/cell/beat_syscall.h164
-rw-r--r--arch/powerpc/platforms/cell/beat_udbg.c98
-rw-r--r--arch/powerpc/platforms/cell/beat_wrapper.h290
-rw-r--r--arch/powerpc/platforms/cell/cbe_powerbutton.c118
-rw-r--r--arch/powerpc/platforms/cell/cbe_regs.c281
-rw-r--r--arch/powerpc/platforms/cell/cbe_thermal.c399
-rw-r--r--arch/powerpc/platforms/cell/celleb_pci.c500
-rw-r--r--arch/powerpc/platforms/cell/celleb_pci.h46
-rw-r--r--arch/powerpc/platforms/cell/celleb_scc.h232
-rw-r--r--arch/powerpc/platforms/cell/celleb_scc_epci.c429
-rw-r--r--arch/powerpc/platforms/cell/celleb_scc_pciex.c539
-rw-r--r--arch/powerpc/platforms/cell/celleb_scc_sio.c99
-rw-r--r--arch/powerpc/platforms/cell/celleb_scc_uhc.c95
-rw-r--r--arch/powerpc/platforms/cell/celleb_setup.c243
-rw-r--r--arch/powerpc/platforms/cell/cpufreq_spudemand.c171
-rw-r--r--arch/powerpc/platforms/cell/interrupt.c411
-rw-r--r--arch/powerpc/platforms/cell/interrupt.h89
-rw-r--r--arch/powerpc/platforms/cell/iommu.c1237
-rw-r--r--arch/powerpc/platforms/cell/pervasive.c133
-rw-r--r--arch/powerpc/platforms/cell/pervasive.h42
-rw-r--r--arch/powerpc/platforms/cell/pmu.c424
-rw-r--r--arch/powerpc/platforms/cell/qpace_setup.c148
-rw-r--r--arch/powerpc/platforms/cell/ras.c356
-rw-r--r--arch/powerpc/platforms/cell/ras.h9
-rw-r--r--arch/powerpc/platforms/cell/setup.c281
-rw-r--r--arch/powerpc/platforms/cell/smp.c175
-rw-r--r--arch/powerpc/platforms/cell/spider-pci.c184
-rw-r--r--arch/powerpc/platforms/cell/spider-pic.c359
-rw-r--r--arch/powerpc/platforms/cell/spu_base.c133
-rw-r--r--arch/powerpc/platforms/cell/spu_callbacks.c21
-rw-r--r--arch/powerpc/platforms/cell/spu_fault.c94
-rw-r--r--arch/powerpc/platforms/cell/spu_manage.c555
-rw-r--r--arch/powerpc/platforms/cell/spu_notify.c68
-rw-r--r--arch/powerpc/platforms/cell/spu_priv1_mmio.c180
-rw-r--r--arch/powerpc/platforms/cell/spu_priv1_mmio.h26
-rw-r--r--arch/powerpc/platforms/cell/spu_syscalls.c85
-rw-r--r--arch/powerpc/platforms/cell/spufs/.gitignore1
-rw-r--r--arch/powerpc/platforms/cell/spufs/Makefile1
-rw-r--r--arch/powerpc/platforms/cell/spufs/backing_ops.c29
-rw-r--r--arch/powerpc/platforms/cell/spufs/context.c17
-rw-r--r--arch/powerpc/platforms/cell/spufs/coredump.c122
-rw-r--r--arch/powerpc/platforms/cell/spufs/fault.c54
-rw-r--r--arch/powerpc/platforms/cell/spufs/file.c647
-rw-r--r--arch/powerpc/platforms/cell/spufs/gang.c16
-rw-r--r--arch/powerpc/platforms/cell/spufs/hw_ops.c28
-rw-r--r--arch/powerpc/platforms/cell/spufs/inode.c427
-rw-r--r--arch/powerpc/platforms/cell/spufs/lscsa_alloc.c141
-rw-r--r--arch/powerpc/platforms/cell/spufs/run.c13
-rw-r--r--arch/powerpc/platforms/cell/spufs/sched.c75
-rw-r--r--arch/powerpc/platforms/cell/spufs/spu_restore.c16
-rw-r--r--arch/powerpc/platforms/cell/spufs/spu_restore_crt0.S16
-rw-r--r--arch/powerpc/platforms/cell/spufs/spu_save.c16
-rw-r--r--arch/powerpc/platforms/cell/spufs/spu_save_crt0.S16
-rw-r--r--arch/powerpc/platforms/cell/spufs/spu_utils.h15
-rw-r--r--arch/powerpc/platforms/cell/spufs/spufs.h36
-rw-r--r--arch/powerpc/platforms/cell/spufs/sputrace.h2
-rw-r--r--arch/powerpc/platforms/cell/spufs/switch.c24
-rw-r--r--arch/powerpc/platforms/cell/spufs/syscalls.c7
-rw-r--r--arch/powerpc/platforms/chrp/Kconfig7
-rw-r--r--arch/powerpc/platforms/chrp/Makefile3
-rw-r--r--arch/powerpc/platforms/chrp/chrp.h2
-rw-r--r--arch/powerpc/platforms/chrp/nvram.c33
-rw-r--r--arch/powerpc/platforms/chrp/pci.c54
-rw-r--r--arch/powerpc/platforms/chrp/pegasos_eth.c12
-rw-r--r--arch/powerpc/platforms/chrp/setup.c87
-rw-r--r--arch/powerpc/platforms/chrp/smp.c5
-rw-r--r--arch/powerpc/platforms/chrp/time.c9
-rw-r--r--arch/powerpc/platforms/embedded6xx/Kconfig38
-rw-r--r--arch/powerpc/platforms/embedded6xx/Makefile3
-rw-r--r--arch/powerpc/platforms/embedded6xx/c2k.c148
-rw-r--r--arch/powerpc/platforms/embedded6xx/flipper-pic.c30
-rw-r--r--arch/powerpc/platforms/embedded6xx/flipper-pic.h7
-rw-r--r--arch/powerpc/platforms/embedded6xx/gamecube.c36
-rw-r--r--arch/powerpc/platforms/embedded6xx/hlwd-pic.c46
-rw-r--r--arch/powerpc/platforms/embedded6xx/hlwd-pic.h9
-rw-r--r--arch/powerpc/platforms/embedded6xx/holly.c75
-rw-r--r--arch/powerpc/platforms/embedded6xx/linkstation.c38
-rw-r--r--arch/powerpc/platforms/embedded6xx/ls_uart.c19
-rw-r--r--arch/powerpc/platforms/embedded6xx/mpc10x.h13
-rw-r--r--arch/powerpc/platforms/embedded6xx/mpc7448_hpc2.c207
-rw-r--r--arch/powerpc/platforms/embedded6xx/mvme5100.c48
-rw-r--r--arch/powerpc/platforms/embedded6xx/storcenter.c28
-rw-r--r--arch/powerpc/platforms/embedded6xx/usbgecko_udbg.c40
-rw-r--r--arch/powerpc/platforms/embedded6xx/usbgecko_udbg.h7
-rw-r--r--arch/powerpc/platforms/embedded6xx/wii.c134
-rw-r--r--arch/powerpc/platforms/fsl_uli1575.c36
-rw-r--r--arch/powerpc/platforms/maple/Kconfig18
-rw-r--r--arch/powerpc/platforms/maple/Makefile1
-rw-r--r--arch/powerpc/platforms/maple/maple.h12
-rw-r--r--arch/powerpc/platforms/maple/pci.c663
-rw-r--r--arch/powerpc/platforms/maple/setup.c393
-rw-r--r--arch/powerpc/platforms/maple/time.c176
-rw-r--r--arch/powerpc/platforms/microwatt/Kconfig12
-rw-r--r--arch/powerpc/platforms/microwatt/Makefile2
-rw-r--r--arch/powerpc/platforms/microwatt/microwatt.h8
-rw-r--r--arch/powerpc/platforms/microwatt/rng.c44
-rw-r--r--arch/powerpc/platforms/microwatt/setup.c61
-rw-r--r--arch/powerpc/platforms/microwatt/smp.c80
-rw-r--r--arch/powerpc/platforms/pasemi/Kconfig19
-rw-r--r--arch/powerpc/platforms/pasemi/Makefile2
-rw-r--r--arch/powerpc/platforms/pasemi/dma_lib.c34
-rw-r--r--arch/powerpc/platforms/pasemi/gpio_mdio.c32
-rw-r--r--arch/powerpc/platforms/pasemi/idle.c35
-rw-r--r--arch/powerpc/platforms/pasemi/iommu.c68
-rw-r--r--arch/powerpc/platforms/pasemi/misc.c19
-rw-r--r--arch/powerpc/platforms/pasemi/msi.c (renamed from arch/powerpc/sysdev/mpic_pasemi_msi.c)65
-rw-r--r--arch/powerpc/platforms/pasemi/pasemi.h9
-rw-r--r--arch/powerpc/platforms/pasemi/pci.c115
-rw-r--r--arch/powerpc/platforms/pasemi/powersave.S15
-rw-r--r--arch/powerpc/platforms/pasemi/setup.c242
-rw-r--r--arch/powerpc/platforms/pasemi/time.c20
-rw-r--r--arch/powerpc/platforms/powermac/Kconfig11
-rw-r--r--arch/powerpc/platforms/powermac/Makefile12
-rw-r--r--arch/powerpc/platforms/powermac/backlight.c48
-rw-r--r--arch/powerpc/platforms/powermac/bootx_init.c32
-rw-r--r--arch/powerpc/platforms/powermac/cache.S20
-rw-r--r--arch/powerpc/platforms/powermac/feature.c160
-rw-r--r--arch/powerpc/platforms/powermac/low_i2c.c112
-rw-r--r--arch/powerpc/platforms/powermac/nvram.c31
-rw-r--r--arch/powerpc/platforms/powermac/pci.c346
-rw-r--r--arch/powerpc/platforms/powermac/pfunc_base.c50
-rw-r--r--arch/powerpc/platforms/powermac/pfunc_core.c17
-rw-r--r--arch/powerpc/platforms/powermac/pic.c97
-rw-r--r--arch/powerpc/platforms/powermac/pmac.h13
-rw-r--r--arch/powerpc/platforms/powermac/setup.c184
-rw-r--r--arch/powerpc/platforms/powermac/sleep.S194
-rw-r--r--arch/powerpc/platforms/powermac/smp.c146
-rw-r--r--arch/powerpc/platforms/powermac/time.c158
-rw-r--r--arch/powerpc/platforms/powermac/udbg_adb.c5
-rw-r--r--arch/powerpc/platforms/powermac/udbg_scc.c24
-rw-r--r--arch/powerpc/platforms/powernv/Kconfig40
-rw-r--r--arch/powerpc/platforms/powernv/Makefile35
-rw-r--r--arch/powerpc/platforms/powernv/copy-paste.h42
-rw-r--r--arch/powerpc/platforms/powernv/eeh-ioda.c975
-rw-r--r--arch/powerpc/platforms/powernv/eeh-powernv.c1663
-rw-r--r--arch/powerpc/platforms/powernv/idle.c1507
-rw-r--r--arch/powerpc/platforms/powernv/memtrace.c324
-rw-r--r--arch/powerpc/platforms/powernv/ocxl.c592
-rw-r--r--arch/powerpc/platforms/powernv/opal-async.c199
-rw-r--r--arch/powerpc/platforms/powernv/opal-call.c295
-rw-r--r--arch/powerpc/platforms/powernv/opal-core.c663
-rw-r--r--arch/powerpc/platforms/powernv/opal-dump.c173
-rw-r--r--arch/powerpc/platforms/powernv/opal-elog.c121
-rw-r--r--arch/powerpc/platforms/powernv/opal-fadump.c719
-rw-r--r--arch/powerpc/platforms/powernv/opal-fadump.h146
-rw-r--r--arch/powerpc/platforms/powernv/opal-flash.c56
-rw-r--r--arch/powerpc/platforms/powernv/opal-hmi.c261
-rw-r--r--arch/powerpc/platforms/powernv/opal-imc.c324
-rw-r--r--arch/powerpc/platforms/powernv/opal-irqchip.c314
-rw-r--r--arch/powerpc/platforms/powernv/opal-kmsg.c47
-rw-r--r--arch/powerpc/platforms/powernv/opal-lpc.c103
-rw-r--r--arch/powerpc/platforms/powernv/opal-memory-errors.c27
-rw-r--r--arch/powerpc/platforms/powernv/opal-msglog.c77
-rw-r--r--arch/powerpc/platforms/powernv/opal-nvram.c39
-rw-r--r--arch/powerpc/platforms/powernv/opal-power.c174
-rw-r--r--arch/powerpc/platforms/powernv/opal-powercap.c251
-rw-r--r--arch/powerpc/platforms/powernv/opal-prd.c453
-rw-r--r--arch/powerpc/platforms/powernv/opal-psr.c175
-rw-r--r--arch/powerpc/platforms/powernv/opal-rtc.c91
-rw-r--r--arch/powerpc/platforms/powernv/opal-secvar.c182
-rw-r--r--arch/powerpc/platforms/powernv/opal-sensor-groups.c240
-rw-r--r--arch/powerpc/platforms/powernv/opal-sensor.c138
-rw-r--r--arch/powerpc/platforms/powernv/opal-sysparam.c74
-rw-r--r--arch/powerpc/platforms/powernv/opal-tracepoints.c11
-rw-r--r--arch/powerpc/platforms/powernv/opal-wrappers.S272
-rw-r--r--arch/powerpc/platforms/powernv/opal-xscom.c251
-rw-r--r--arch/powerpc/platforms/powernv/opal.c996
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda-tce.c430
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c2795
-rw-r--r--arch/powerpc/platforms/powernv/pci-p5ioc2.c239
-rw-r--r--arch/powerpc/platforms/powernv/pci-sriov.c760
-rw-r--r--arch/powerpc/platforms/powernv/pci.c744
-rw-r--r--arch/powerpc/platforms/powernv/pci.h372
-rw-r--r--arch/powerpc/platforms/powernv/powernv.h29
-rw-r--r--arch/powerpc/platforms/powernv/rng.c148
-rw-r--r--arch/powerpc/platforms/powernv/setup.c431
-rw-r--r--arch/powerpc/platforms/powernv/smp.c413
-rw-r--r--arch/powerpc/platforms/powernv/subcore-asm.S6
-rw-r--r--arch/powerpc/platforms/powernv/subcore.c86
-rw-r--r--arch/powerpc/platforms/powernv/subcore.h17
-rw-r--r--arch/powerpc/platforms/powernv/ultravisor.c70
-rw-r--r--arch/powerpc/platforms/powernv/vas-debug.c168
-rw-r--r--arch/powerpc/platforms/powernv/vas-fault.c245
-rw-r--r--arch/powerpc/platforms/powernv/vas-trace.h113
-rw-r--r--arch/powerpc/platforms/powernv/vas-window.c1471
-rw-r--r--arch/powerpc/platforms/powernv/vas.c253
-rw-r--r--arch/powerpc/platforms/powernv/vas.h501
-rw-r--r--arch/powerpc/platforms/ps3/Kconfig32
-rw-r--r--arch/powerpc/platforms/ps3/Makefile3
-rw-r--r--arch/powerpc/platforms/ps3/device-init.c115
-rw-r--r--arch/powerpc/platforms/ps3/exports.c14
-rw-r--r--arch/powerpc/platforms/ps3/gelic_udbg.c82
-rw-r--r--arch/powerpc/platforms/ps3/htab.c34
-rw-r--r--arch/powerpc/platforms/ps3/hvcall.S312
-rw-r--r--arch/powerpc/platforms/ps3/interrupt.c42
-rw-r--r--arch/powerpc/platforms/ps3/mm.c176
-rw-r--r--arch/powerpc/platforms/ps3/os-area.c37
-rw-r--r--arch/powerpc/platforms/ps3/platform.h43
-rw-r--r--arch/powerpc/platforms/ps3/repository.c68
-rw-r--r--arch/powerpc/platforms/ps3/setup.c106
-rw-r--r--arch/powerpc/platforms/ps3/smp.c28
-rw-r--r--arch/powerpc/platforms/ps3/spu.c39
-rw-r--r--arch/powerpc/platforms/ps3/system-bus.c107
-rw-r--r--arch/powerpc/platforms/ps3/time.c45
-rw-r--r--arch/powerpc/platforms/pseries/Kconfig152
-rw-r--r--arch/powerpc/platforms/pseries/Makefile39
-rw-r--r--arch/powerpc/platforms/pseries/cc_platform.c26
-rw-r--r--arch/powerpc/platforms/pseries/cmm.c465
-rw-r--r--arch/powerpc/platforms/pseries/dlpar.c715
-rw-r--r--arch/powerpc/platforms/pseries/dtl.c179
-rw-r--r--arch/powerpc/platforms/pseries/eeh_pseries.c1023
-rw-r--r--arch/powerpc/platforms/pseries/event_sources.c84
-rw-r--r--arch/powerpc/platforms/pseries/firmware.c78
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-cpu.c936
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-memory.c953
-rw-r--r--arch/powerpc/platforms/pseries/htmdump.c490
-rw-r--r--arch/powerpc/platforms/pseries/hvCall.S106
-rw-r--r--arch/powerpc/platforms/pseries/hvCall_inst.c66
-rw-r--r--arch/powerpc/platforms/pseries/hvconsole.c23
-rw-r--r--arch/powerpc/platforms/pseries/hvcserver.c21
-rw-r--r--arch/powerpc/platforms/pseries/ibmebus.c (renamed from arch/powerpc/kernel/ibmebus.c)402
-rw-r--r--arch/powerpc/platforms/pseries/io_event_irq.c10
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c2220
-rw-r--r--arch/powerpc/platforms/pseries/kexec.c45
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c1559
-rw-r--r--arch/powerpc/platforms/pseries/lparcfg.c280
-rw-r--r--arch/powerpc/platforms/pseries/mobility.c654
-rw-r--r--arch/powerpc/platforms/pseries/msi.c379
-rw-r--r--arch/powerpc/platforms/pseries/nvram.c688
-rw-r--r--arch/powerpc/platforms/pseries/of_helpers.c97
-rw-r--r--arch/powerpc/platforms/pseries/of_helpers.h9
-rw-r--r--arch/powerpc/platforms/pseries/offline_states.h37
-rw-r--r--arch/powerpc/platforms/pseries/papr-hvpipe.c818
-rw-r--r--arch/powerpc/platforms/pseries/papr-hvpipe.h42
-rw-r--r--arch/powerpc/platforms/pseries/papr-indices.c488
-rw-r--r--arch/powerpc/platforms/pseries/papr-phy-attest.c288
-rw-r--r--arch/powerpc/platforms/pseries/papr-platform-dump.c411
-rw-r--r--arch/powerpc/platforms/pseries/papr-rtas-common.c311
-rw-r--r--arch/powerpc/platforms/pseries/papr-rtas-common.h61
-rw-r--r--arch/powerpc/platforms/pseries/papr-sysparm.c352
-rw-r--r--arch/powerpc/platforms/pseries/papr-vpd.c275
-rw-r--r--arch/powerpc/platforms/pseries/papr_platform_attributes.c364
-rw-r--r--arch/powerpc/platforms/pseries/papr_scm.c1542
-rw-r--r--arch/powerpc/platforms/pseries/pci.c278
-rw-r--r--arch/powerpc/platforms/pseries/pci_dlpar.c96
-rw-r--r--arch/powerpc/platforms/pseries/plpks-secvar.c253
-rw-r--r--arch/powerpc/platforms/pseries/plpks.c711
-rw-r--r--arch/powerpc/platforms/pseries/plpks_sed_ops.c131
-rw-r--r--arch/powerpc/platforms/pseries/pmem.c167
-rw-r--r--arch/powerpc/platforms/pseries/power.c18
-rw-r--r--arch/powerpc/platforms/pseries/pseries.h118
-rw-r--r--arch/powerpc/platforms/pseries/pseries_energy.c180
-rw-r--r--arch/powerpc/platforms/pseries/ras.c680
-rw-r--r--arch/powerpc/platforms/pseries/reconfig.c77
-rw-r--r--arch/powerpc/platforms/pseries/rng.c20
-rw-r--r--arch/powerpc/platforms/pseries/rtas-fadump.c649
-rw-r--r--arch/powerpc/platforms/pseries/rtas-fadump.h121
-rw-r--r--arch/powerpc/platforms/pseries/rtas-work-area.c210
-rw-r--r--arch/powerpc/platforms/pseries/scanlog.c200
-rw-r--r--arch/powerpc/platforms/pseries/setup.c928
-rw-r--r--arch/powerpc/platforms/pseries/smp.c192
-rw-r--r--arch/powerpc/platforms/pseries/suspend.c131
-rw-r--r--arch/powerpc/platforms/pseries/svm.c95
-rw-r--r--arch/powerpc/platforms/pseries/vas-sysfs.c281
-rw-r--r--arch/powerpc/platforms/pseries/vas.c1141
-rw-r--r--arch/powerpc/platforms/pseries/vas.h157
-rw-r--r--arch/powerpc/platforms/pseries/vio.c (renamed from arch/powerpc/kernel/vio.c)375
-rw-r--r--arch/powerpc/platforms/pseries/vphn.c90
-rw-r--r--arch/powerpc/purgatory/.gitignore2
-rw-r--r--arch/powerpc/purgatory/Makefile17
-rw-r--r--arch/powerpc/purgatory/kexec-purgatory.S14
-rw-r--r--arch/powerpc/purgatory/trampoline_64.S162
-rwxr-xr-xarch/powerpc/relocs_check.pl66
-rw-r--r--arch/powerpc/sysdev/6xx-suspend.S10
-rw-r--r--arch/powerpc/sysdev/Kconfig29
-rw-r--r--arch/powerpc/sysdev/Makefile41
-rw-r--r--arch/powerpc/sysdev/axonram.c366
-rw-r--r--arch/powerpc/sysdev/cpm2.c57
-rw-r--r--arch/powerpc/sysdev/cpm2_pic.c15
-rw-r--r--arch/powerpc/sysdev/cpm2_pic.h1
-rw-r--r--arch/powerpc/sysdev/cpm_common.c277
-rw-r--r--arch/powerpc/sysdev/cpm_gpio.c80
-rw-r--r--arch/powerpc/sysdev/dart.h15
-rw-r--r--arch/powerpc/sysdev/dart_iommu.c304
-rw-r--r--arch/powerpc/sysdev/dcr-low.S11
-rw-r--r--arch/powerpc/sysdev/dcr.c200
-rw-r--r--arch/powerpc/sysdev/ehv_pic.c38
-rw-r--r--arch/powerpc/sysdev/fsl_85xx_cache_ctlr.h101
-rw-r--r--arch/powerpc/sysdev/fsl_85xx_cache_sram.c160
-rw-r--r--arch/powerpc/sysdev/fsl_85xx_l2ctlr.c234
-rw-r--r--arch/powerpc/sysdev/fsl_gtm.c32
-rw-r--r--arch/powerpc/sysdev/fsl_lbc.c66
-rw-r--r--arch/powerpc/sysdev/fsl_mpic_err.c27
-rw-r--r--arch/powerpc/sysdev/fsl_mpic_timer_wakeup.c55
-rw-r--r--arch/powerpc/sysdev/fsl_msi.c245
-rw-r--r--arch/powerpc/sysdev/fsl_msi.h13
-rw-r--r--arch/powerpc/sysdev/fsl_pci.c378
-rw-r--r--arch/powerpc/sysdev/fsl_pci.h20
-rw-r--r--arch/powerpc/sysdev/fsl_pmc.c17
-rw-r--r--arch/powerpc/sysdev/fsl_rcpm.c382
-rw-r--r--arch/powerpc/sysdev/fsl_rio.c284
-rw-r--r--arch/powerpc/sysdev/fsl_rio.h22
-rw-r--r--arch/powerpc/sysdev/fsl_rmu.c64
-rw-r--r--arch/powerpc/sysdev/fsl_soc.c123
-rw-r--r--arch/powerpc/sysdev/fsl_soc.h9
-rw-r--r--arch/powerpc/sysdev/ge/Makefile1
-rw-r--r--arch/powerpc/sysdev/ge/ge_pic.c23
-rw-r--r--arch/powerpc/sysdev/ge/ge_pic.h3
-rw-r--r--arch/powerpc/sysdev/grackle.c27
-rw-r--r--arch/powerpc/sysdev/i8259.c33
-rw-r--r--arch/powerpc/sysdev/indirect_pci.c32
-rw-r--r--arch/powerpc/sysdev/ipic.c108
-rw-r--r--arch/powerpc/sysdev/ipic.h6
-rw-r--r--arch/powerpc/sysdev/micropatch.c748
-rw-r--r--arch/powerpc/sysdev/mmio_nvram.c19
-rw-r--r--arch/powerpc/sysdev/mpc5xxx_clocks.c48
-rw-r--r--arch/powerpc/sysdev/mpic.c174
-rw-r--r--arch/powerpc/sysdev/mpic.h25
-rw-r--r--arch/powerpc/sysdev/mpic_msgr.c39
-rw-r--r--arch/powerpc/sysdev/mpic_msi.c22
-rw-r--r--arch/powerpc/sysdev/mpic_timer.c89
-rw-r--r--arch/powerpc/sysdev/mpic_u3msi.c74
-rw-r--r--arch/powerpc/sysdev/msi_bitmap.c132
-rw-r--r--arch/powerpc/sysdev/mv64x60.h12
-rw-r--r--arch/powerpc/sysdev/mv64x60_dev.c535
-rw-r--r--arch/powerpc/sysdev/mv64x60_pci.c171
-rw-r--r--arch/powerpc/sysdev/mv64x60_pic.c297
-rw-r--r--arch/powerpc/sysdev/mv64x60_udbg.c152
-rw-r--r--arch/powerpc/sysdev/of_rtc.c24
-rw-r--r--arch/powerpc/sysdev/pmi.c282
-rw-r--r--arch/powerpc/sysdev/ppc4xx_msi.c291
-rw-r--r--arch/powerpc/sysdev/ppc4xx_ocm.c416
-rw-r--r--arch/powerpc/sysdev/qe_lib/Kconfig27
-rw-r--r--arch/powerpc/sysdev/qe_lib/Makefile10
-rw-r--r--arch/powerpc/sysdev/qe_lib/gpio.c317
-rw-r--r--arch/powerpc/sysdev/qe_lib/qe.c708
-rw-r--r--arch/powerpc/sysdev/qe_lib/qe_ic.c501
-rw-r--r--arch/powerpc/sysdev/qe_lib/qe_ic.h103
-rw-r--r--arch/powerpc/sysdev/qe_lib/qe_io.c217
-rw-r--r--arch/powerpc/sysdev/qe_lib/ucc.c212
-rw-r--r--arch/powerpc/sysdev/qe_lib/ucc_fast.c363
-rw-r--r--arch/powerpc/sysdev/qe_lib/ucc_slow.c379
-rw-r--r--arch/powerpc/sysdev/qe_lib/usb.c56
-rw-r--r--arch/powerpc/sysdev/rtc_cmos_setup.c3
-rw-r--r--arch/powerpc/sysdev/scom.c236
-rw-r--r--arch/powerpc/sysdev/simple_gpio.c152
-rw-r--r--arch/powerpc/sysdev/simple_gpio.h12
-rw-r--r--arch/powerpc/sysdev/tsi108_dev.c28
-rw-r--r--arch/powerpc/sysdev/tsi108_pci.c46
-rw-r--r--arch/powerpc/sysdev/udbg_memcons.c14
-rw-r--r--arch/powerpc/sysdev/xics/Kconfig14
-rw-r--r--arch/powerpc/sysdev/xics/Makefile5
-rw-r--r--arch/powerpc/sysdev/xics/icp-hv.c19
-rw-r--r--arch/powerpc/sysdev/xics/icp-native.c84
-rw-r--r--arch/powerpc/sysdev/xics/icp-opal.c202
-rw-r--r--arch/powerpc/sysdev/xics/ics-native.c254
-rw-r--r--arch/powerpc/sysdev/xics/ics-opal.c60
-rw-r--r--arch/powerpc/sysdev/xics/ics-rtas.c63
-rw-r--r--arch/powerpc/sysdev/xics/xics-common.c265
-rw-r--r--arch/powerpc/sysdev/xilinx_intc.c295
-rw-r--r--arch/powerpc/sysdev/xilinx_pci.c132
-rw-r--r--arch/powerpc/sysdev/xive/Kconfig14
-rw-r--r--arch/powerpc/sysdev/xive/Makefile5
-rw-r--r--arch/powerpc/sysdev/xive/common.c1863
-rw-r--r--arch/powerpc/sysdev/xive/native.c875
-rw-r--r--arch/powerpc/sysdev/xive/spapr.c892
-rw-r--r--arch/powerpc/sysdev/xive/xive-internal.h78
-rw-r--r--arch/powerpc/tools/.gitignore2
-rw-r--r--arch/powerpc/tools/Makefile10
-rwxr-xr-xarch/powerpc/tools/checkpatch.sh22
-rwxr-xr-xarch/powerpc/tools/ftrace-gen-ool-stubs.sh52
-rwxr-xr-xarch/powerpc/tools/ftrace_check.sh50
-rwxr-xr-xarch/powerpc/tools/gcc-check-fpatchable-function-entry.sh26
-rwxr-xr-xarch/powerpc/tools/gcc-check-mprofile-kernel.sh27
-rw-r--r--arch/powerpc/tools/head_check.sh80
-rwxr-xr-xarch/powerpc/tools/relocs_check.sh43
-rwxr-xr-xarch/powerpc/tools/unrel_branch_check.sh79
-rw-r--r--arch/powerpc/xmon/Makefile21
-rw-r--r--arch/powerpc/xmon/ansidecl.h15
-rw-r--r--arch/powerpc/xmon/dis-asm.h10
-rw-r--r--arch/powerpc/xmon/nonstdio.c72
-rw-r--r--arch/powerpc/xmon/nonstdio.h12
-rw-r--r--arch/powerpc/xmon/ppc-dis.c265
-rw-r--r--arch/powerpc/xmon/ppc-opc.c9011
-rw-r--r--arch/powerpc/xmon/ppc.h266
-rw-r--r--arch/powerpc/xmon/spr_access.S47
-rw-r--r--arch/powerpc/xmon/spu-dis.c248
-rw-r--r--arch/powerpc/xmon/spu-insns.h410
-rw-r--r--arch/powerpc/xmon/spu-opc.c45
-rw-r--r--arch/powerpc/xmon/spu.h126
-rw-r--r--arch/powerpc/xmon/xmon.c2269
-rw-r--r--arch/powerpc/xmon/xmon_bpts.S11
-rw-r--r--arch/powerpc/xmon/xmon_bpts.h14
2251 files changed, 244652 insertions, 159055 deletions
diff --git a/arch/powerpc/Kbuild b/arch/powerpc/Kbuild
new file mode 100644
index 000000000000..b010ccb071b6
--- /dev/null
+++ b/arch/powerpc/Kbuild
@@ -0,0 +1,22 @@
+# SPDX-License-Identifier: GPL-2.0
+subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror -Wa,--fatal-warnings
+subdir-asflags-$(CONFIG_PPC_WERROR) := -Wa,--fatal-warnings
+
+obj-y += kernel/
+obj-y += mm/
+obj-y += lib/
+obj-y += sysdev/
+obj-y += platforms/
+obj-y += math-emu/
+obj-y += crypto/
+obj-y += net/
+
+obj-$(CONFIG_XMON) += xmon/
+obj-$(CONFIG_KVM) += kvm/
+
+obj-$(CONFIG_PERF_EVENTS) += perf/
+obj-$(CONFIG_KEXEC_CORE) += kexec/
+obj-$(CONFIG_KEXEC_FILE) += purgatory/
+
+# for cleaning
+subdir- += boot tools
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index a577609f8ed6..e24f4d88885a 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -1,8 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0
source "arch/powerpc/platforms/Kconfig.cputype"
-config PPC32
- bool
- default y if !PPC64
+config CC_HAS_ELFV2
+ def_bool PPC64 && $(cc-option, -mabi=elfv2)
+
+config CC_HAS_PREFIXED
+ def_bool PPC64 && $(cc-option, -mcpu=power10 -mprefixed)
+
+config CC_HAS_PCREL
+ # Clang has a bug (https://github.com/llvm/llvm-project/issues/62372)
+ # where pcrel code is not generated if -msoft-float, -mno-altivec, or
+ # -mno-vsx options are also given. Without these options, fp/vec
+ # instructions are generated from regular kernel code. So Clang can't
+ # do pcrel yet.
+ def_bool PPC64 && CC_IS_GCC && $(cc-option, -mcpu=power10 -mpcrel)
config 32BIT
bool
@@ -12,30 +23,59 @@ config 64BIT
bool
default y if PPC64
-config WORD_SIZE
- int
- default 64 if PPC64
- default 32 if !PPC64
-
-config ARCH_PHYS_ADDR_T_64BIT
- def_bool PPC64 || PHYS_64BIT
-
-config ARCH_DMA_ADDR_T_64BIT
- def_bool ARCH_PHYS_ADDR_T_64BIT
+config LIVEPATCH_64
+ def_bool PPC64
+ depends on LIVEPATCH
config MMU
bool
default y
-config HAVE_SETUP_PER_CPU_AREA
- def_bool PPC64
-
-config NEED_PER_CPU_EMBED_FIRST_CHUNK
- def_bool PPC64
+config ARCH_MMAP_RND_BITS_MAX
+ # On Book3S 64, the default virtual address space for 64-bit processes
+ # is 2^47 (128TB). As a maximum, allow randomisation to consume up to
+ # 32T of address space (2^45), which should ensure a reasonable gap
+ # between bottom-up and top-down allocations for applications that
+ # consume "normal" amounts of address space. Book3S 64 only supports 64K
+ # and 4K page sizes.
+ default 29 if PPC_BOOK3S_64 && PPC_64K_PAGES # 29 = 45 (32T) - 16 (64K)
+ default 33 if PPC_BOOK3S_64 # 33 = 45 (32T) - 12 (4K)
+ #
+ # On all other 64-bit platforms (currently only Book3E), the virtual
+ # address space is 2^46 (64TB). Allow randomisation to consume up to 16T
+ # of address space (2^44). Only 4K page sizes are supported.
+ default 32 if 64BIT # 32 = 44 (16T) - 12 (4K)
+ #
+ # For 32-bit, use the compat values, as they're the same.
+ default ARCH_MMAP_RND_COMPAT_BITS_MAX
+
+config ARCH_MMAP_RND_BITS_MIN
+ # Allow randomisation to consume up to 1GB of address space (2^30).
+ default 14 if 64BIT && PPC_64K_PAGES # 14 = 30 (1GB) - 16 (64K)
+ default 18 if 64BIT # 18 = 30 (1GB) - 12 (4K)
+ #
+ # For 32-bit, use the compat values, as they're the same.
+ default ARCH_MMAP_RND_COMPAT_BITS_MIN
+
+config ARCH_MMAP_RND_COMPAT_BITS_MAX
+ # Total virtual address space for 32-bit processes is 2^31 (2GB).
+ # Allow randomisation to consume up to 512MB of address space (2^29).
+ default 11 if PPC_256K_PAGES # 11 = 29 (512MB) - 18 (256K)
+ default 13 if PPC_64K_PAGES # 13 = 29 (512MB) - 16 (64K)
+ default 15 if PPC_16K_PAGES # 15 = 29 (512MB) - 14 (16K)
+ default 17 # 17 = 29 (512MB) - 12 (4K)
+
+config ARCH_MMAP_RND_COMPAT_BITS_MIN
+ # Total virtual address space for 32-bit processes is 2^31 (2GB).
+ # Allow randomisation to consume up to 8MB of address space (2^23).
+ default 5 if PPC_256K_PAGES # 5 = 23 (8MB) - 18 (256K)
+ default 7 if PPC_64K_PAGES # 7 = 23 (8MB) - 16 (64K)
+ default 9 if PPC_16K_PAGES # 9 = 23 (8MB) - 14 (16K)
+ default 11 # 11 = 23 (8MB) - 12 (4K)
config NR_IRQS
int "Number of virtual interrupt numbers"
- range 32 32768
+ range 32 1048576
default "512"
help
This defines the number of virtual interrupt numbers the kernel
@@ -43,40 +83,33 @@ config NR_IRQS
/proc/interrupts. If you configure your system to have too few,
drivers will fail to load or worse - handle with care.
-config STACKTRACE_SUPPORT
+config NMI_IPI
bool
+ depends on SMP && (DEBUGGER || KEXEC_CORE || HARDLOCKUP_DETECTOR)
default y
-config HAVE_LATENCYTOP_SUPPORT
- def_bool y
-
-config TRACE_IRQFLAGS_SUPPORT
+config PPC_WATCHDOG
bool
+ depends on HARDLOCKUP_DETECTOR_ARCH
default y
+ help
+ This is a placeholder when the powerpc hardlockup detector
+ watchdog is selected (arch/powerpc/kernel/watchdog.c). It is
+ selected via the generic lockup detector menu which is why we
+ have no standalone config option for it here.
-config LOCKDEP_SUPPORT
+config STACKTRACE_SUPPORT
bool
default y
-config RWSEM_GENERIC_SPINLOCK
- bool
-
-config RWSEM_XCHGADD_ALGORITHM
+config LOCKDEP_SUPPORT
bool
default y
config GENERIC_LOCKBREAK
bool
default y
- depends on SMP && PREEMPT
-
-config ARCH_HAS_ILOG2_U32
- bool
- default y
-
-config ARCH_HAS_ILOG2_U64
- bool
- default y if 64BIT
+ depends on SMP && PREEMPTION && !PPC_QUEUED_SPINLOCKS
config GENERIC_HWEIGHT
bool
@@ -85,71 +118,224 @@ config GENERIC_HWEIGHT
config PPC
bool
default y
+ #
+ # Please keep this list sorted alphabetically.
+ #
+ select ARCH_32BIT_OFF_T if PPC32
+ select ARCH_NEEDS_DEFER_KASAN if PPC_RADIX_MMU
+ select ARCH_DISABLE_KASAN_INLINE if PPC_RADIX_MMU
+ select ARCH_DMA_DEFAULT_COHERENT if !NOT_COHERENT_CACHE
+ select ARCH_ENABLE_MEMORY_HOTPLUG
+ select ARCH_ENABLE_MEMORY_HOTREMOVE
+ select ARCH_HAS_COPY_MC if PPC64
+ select ARCH_HAS_CURRENT_STACK_POINTER
+ select ARCH_HAS_DEBUG_VIRTUAL
+ select ARCH_HAS_DEBUG_VM_PGTABLE
+ select ARCH_HAS_DEBUG_WX if STRICT_KERNEL_RWX
+ select ARCH_HAS_DEVMEM_IS_ALLOWED
+ select ARCH_HAS_DMA_MAP_DIRECT if PPC_PSERIES
+ select ARCH_HAS_DMA_OPS if PPC64
+ select ARCH_HAS_FORTIFY_SOURCE
+ select ARCH_HAS_GCOV_PROFILE_ALL
+ select ARCH_HAS_KCOV
+ select ARCH_HAS_KERNEL_FPU_SUPPORT if PPC64 && PPC_FPU
+ select ARCH_HAS_MEMBARRIER_CALLBACKS
+ select ARCH_HAS_MEMBARRIER_SYNC_CORE
+ select ARCH_HAS_MEMREMAP_COMPAT_ALIGN if PPC_64S_HASH_MMU
+ select ARCH_HAS_MMIOWB if PPC64
+ select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
+ select ARCH_HAS_PHYS_TO_DMA
+ select ARCH_HAS_PMEM_API
+ select ARCH_HAS_PREEMPT_LAZY
+ select ARCH_HAS_PTDUMP
+ select ARCH_HAS_PTE_SPECIAL
+ select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC_BOOK3S_64
+ select ARCH_HAS_SET_MEMORY
+ select ARCH_HAS_STRICT_KERNEL_RWX if (PPC_BOOK3S || PPC_8xx) && !HIBERNATION
+ select ARCH_HAS_STRICT_KERNEL_RWX if PPC_85xx && !HIBERNATION && !RANDOMIZE_BASE
+ select ARCH_HAS_STRICT_MODULE_RWX if ARCH_HAS_STRICT_KERNEL_RWX
+ select ARCH_HAS_SYSCALL_WRAPPER if !SPU_BASE && !COMPAT
+ select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
+ select ARCH_HAS_UACCESS_FLUSHCACHE
+ select ARCH_HAS_UBSAN
+ select ARCH_HAS_VDSO_ARCH_DATA
+ select ARCH_HAVE_NMI_SAFE_CMPXCHG
+ select ARCH_HAVE_EXTRA_ELF_NOTES if SPU_BASE
+ select ARCH_KEEP_MEMBLOCK
+ select ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE if PPC_RADIX_MMU
select ARCH_MIGHT_HAVE_PC_PARPORT
select ARCH_MIGHT_HAVE_PC_SERIO
+ select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
+ select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT
+ select ARCH_SPLIT_ARG64 if PPC32
+ select ARCH_STACKWALK
+ select ARCH_SUPPORTS_ATOMIC_RMW
+ select ARCH_SUPPORTS_DEBUG_PAGEALLOC if PPC_BOOK3S || PPC_8xx
+ select ARCH_SUPPORTS_SCHED_MC if SMP
+ select ARCH_SUPPORTS_SCHED_SMT if PPC64 && SMP
+ select SCHED_MC if ARCH_SUPPORTS_SCHED_MC
+ select ARCH_USE_BUILTIN_BSWAP
+ select ARCH_USE_CMPXCHG_LOCKREF if PPC64
+ select ARCH_USE_MEMTEST
+ select ARCH_USE_QUEUED_RWLOCKS if PPC_QUEUED_SPINLOCKS
+ select ARCH_WANT_DEFAULT_BPF_JIT
+ select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
+ select ARCH_WANT_IPC_PARSE_VERSION
+ select ARCH_WANT_IRQS_OFF_ACTIVATE_MM
+ select ARCH_WANT_LD_ORPHAN_WARN
+ select ARCH_WANT_OPTIMIZE_DAX_VMEMMAP if PPC_RADIX_MMU
+ select ARCH_WANTS_MODULES_DATA_IN_VMALLOC if PPC_BOOK3S_32 || PPC_8xx
+ select ARCH_WEAK_RELEASE_ACQUIRE
select BINFMT_ELF
- select OF
- select OF_EARLY_FLATTREE
- select OF_RESERVED_MEM
- select HAVE_FTRACE_MCOUNT_RECORD
+ select BUILDTIME_TABLE_SORT
+ select CLONE_BACKWARDS
+ select CPUMASK_OFFSTACK if NR_CPUS >= 8192
+ select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN
+ select DMA_OPS_BYPASS if PPC64
+ select DYNAMIC_FTRACE if FUNCTION_TRACER
+ select EDAC_ATOMIC_SCRUB
+ select EDAC_SUPPORT
+ select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY if ARCH_USING_PATCHABLE_FUNCTION_ENTRY
+ select FUNCTION_ALIGNMENT_4B
+ select GENERIC_ATOMIC64 if PPC32
+ select GENERIC_CLOCKEVENTS_BROADCAST if SMP
+ select GENERIC_CMOS_UPDATE
+ select GENERIC_CPU_AUTOPROBE
+ select GENERIC_CPU_VULNERABILITIES if PPC_BARRIER_NOSPEC
+ select GENERIC_EARLY_IOREMAP
+ select GENERIC_GETTIMEOFDAY
+ select GENERIC_IDLE_POLL_SETUP
+ select GENERIC_IOREMAP
+ select GENERIC_IRQ_SHOW
+ select GENERIC_IRQ_SHOW_LEVEL
+ select GENERIC_PCI_IOMAP if PCI
+ select GENERIC_SMP_IDLE_THREAD
+ select GENERIC_TIME_VSYSCALL
+ select HAS_IOPORT if PCI
+ select HAVE_ARCH_AUDITSYSCALL
+ select HAVE_ARCH_HUGE_VMALLOC if HAVE_ARCH_HUGE_VMAP
+ select HAVE_ARCH_HUGE_VMAP if PPC_RADIX_MMU || PPC_8xx
+ select HAVE_ARCH_JUMP_LABEL
+ select HAVE_ARCH_JUMP_LABEL_RELATIVE
+ select HAVE_ARCH_KASAN if PPC32 && PAGE_SHIFT <= 14
+ select HAVE_ARCH_KASAN if PPC_RADIX_MMU
+ select HAVE_ARCH_KASAN if PPC_BOOK3E_64
+ select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN
+ select HAVE_ARCH_KCSAN
+ select HAVE_ARCH_KFENCE if ARCH_SUPPORTS_DEBUG_PAGEALLOC
+ select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
+ select HAVE_ARCH_WITHIN_STACK_FRAMES
+ select HAVE_ARCH_KGDB
+ select HAVE_ARCH_MMAP_RND_BITS
+ select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
+ select HAVE_ARCH_NVRAM_OPS
+ select HAVE_ARCH_SECCOMP_FILTER
+ select HAVE_ARCH_TRACEHOOK
+ select HAVE_ASM_MODVERSIONS
+ select HAVE_CONTEXT_TRACKING_USER
+ select HAVE_C_RECORDMCOUNT
+ select HAVE_DEBUG_KMEMLEAK
+ select HAVE_DEBUG_STACKOVERFLOW
select HAVE_DYNAMIC_FTRACE
- select HAVE_FUNCTION_TRACER
+ select HAVE_DYNAMIC_FTRACE_WITH_ARGS if ARCH_USING_PATCHABLE_FUNCTION_ENTRY || MPROFILE_KERNEL || PPC32
+ select HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS if PPC_FTRACE_OUT_OF_LINE || (PPC32 && ARCH_USING_PATCHABLE_FUNCTION_ENTRY)
+ select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS if HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS
+ select HAVE_DYNAMIC_FTRACE_WITH_REGS if ARCH_USING_PATCHABLE_FUNCTION_ENTRY || MPROFILE_KERNEL || PPC32
+ select HAVE_EBPF_JIT
+ select HAVE_EFFICIENT_UNALIGNED_ACCESS
+ select HAVE_GUP_FAST
+ select HAVE_FTRACE_GRAPH_FUNC
+ select HAVE_FTRACE_REGS_HAVING_PT_REGS
+ select HAVE_FUNCTION_ARG_ACCESS_API
+ select HAVE_FUNCTION_DESCRIPTORS if PPC64_ELF_ABI_V1
+ select HAVE_FUNCTION_ERROR_INJECTION
+ select HAVE_FUNCTION_GRAPH_FREGS
select HAVE_FUNCTION_GRAPH_TRACER
- select SYSCTL_EXCEPTION_TRACE
- select ARCH_WANT_OPTIONAL_GPIOLIB
- select VIRT_TO_BUS if !PPC64
- select HAVE_IDE
+ select HAVE_FUNCTION_TRACER if !COMPILE_TEST && (PPC64 || (PPC32 && CC_IS_GCC))
+ select HAVE_GCC_PLUGINS
+ select HAVE_GENERIC_VDSO
+ select HAVE_HARDLOCKUP_DETECTOR_ARCH if PPC_BOOK3S_64 && SMP
+ select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI
+ select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx)
select HAVE_IOREMAP_PROT
- select HAVE_EFFICIENT_UNALIGNED_ACCESS if !CPU_LITTLE_ENDIAN
+ select HAVE_IRQ_TIME_ACCOUNTING
+ select HAVE_KERNEL_GZIP
+ select HAVE_KERNEL_LZMA if DEFAULT_UIMAGE
+ select HAVE_KERNEL_LZO if DEFAULT_UIMAGE
+ select HAVE_KERNEL_XZ if PPC_BOOK3S || 44x
select HAVE_KPROBES
- select HAVE_ARCH_KGDB
+ select HAVE_KPROBES_ON_FTRACE
select HAVE_KRETPROBES
- select HAVE_ARCH_TRACEHOOK
- select HAVE_MEMBLOCK
- select HAVE_MEMBLOCK_NODE_MAP
- select HAVE_DMA_ATTRS
- select HAVE_DMA_API_DEBUG
- select HAVE_OPROFILE
- select HAVE_DEBUG_KMEMLEAK
- select ARCH_HAS_SG_CHAIN
- select GENERIC_ATOMIC64 if PPC32
- select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
+ select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if HAVE_OBJTOOL_MCOUNT && (!ARCH_USING_PATCHABLE_FUNCTION_ENTRY || (!CC_IS_GCC || GCC_VERSION >= 110100))
+ select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS
+ select HAVE_MOD_ARCH_SPECIFIC
+ select HAVE_NMI if PERF_EVENTS || (PPC64 && PPC_BOOK3S)
+ select HAVE_OPTPROBES
+ select HAVE_OBJTOOL if ARCH_USING_PATCHABLE_FUNCTION_ENTRY || MPROFILE_KERNEL || PPC32
+ select HAVE_OBJTOOL_MCOUNT if HAVE_OBJTOOL
select HAVE_PERF_EVENTS
+ select HAVE_PERF_EVENTS_NMI if PPC64
+ select HAVE_PERF_REGS
+ select HAVE_PERF_USER_STACK_DUMP
+ select HAVE_PREEMPT_DYNAMIC_KEY
+ select HAVE_RETHOOK if KPROBES
select HAVE_REGS_AND_STACK_ACCESS_API
- select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64
- select ARCH_WANT_IPC_PARSE_VERSION
- select SPARSE_IRQ
+ select HAVE_RELIABLE_STACKTRACE
+ select HAVE_RSEQ
+ select HAVE_SAMPLE_FTRACE_DIRECT if HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+ select HAVE_SAMPLE_FTRACE_DIRECT_MULTI if HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+ select HAVE_SETUP_PER_CPU_AREA if PPC64
+ select HAVE_SOFTIRQ_ON_OWN_STACK
+ select HAVE_STACKPROTECTOR if PPC32 && $(cc-option,$(m32-flag) -mstack-protector-guard=tls -mstack-protector-guard-reg=r2 -mstack-protector-guard-offset=0)
+ select HAVE_STACKPROTECTOR if PPC64 && $(cc-option,$(m64-flag) -mstack-protector-guard=tls -mstack-protector-guard-reg=r13 -mstack-protector-guard-offset=0)
+ select HAVE_STATIC_CALL if PPC32
+ select HAVE_STATIC_CALL_INLINE if PPC32
+ select HAVE_SYSCALL_TRACEPOINTS
+ select HAVE_VIRT_CPU_ACCOUNTING
+ select HAVE_VIRT_CPU_ACCOUNTING_GEN
+ select HOTPLUG_SMT if HOTPLUG_CPU
+ select SMT_NUM_THREADS_DYNAMIC
+ select HUGETLB_PAGE_SIZE_VARIABLE if PPC_BOOK3S_64 && HUGETLB_PAGE
+ select IOMMU_HELPER if PPC64
select IRQ_DOMAIN
- select GENERIC_IRQ_SHOW
- select GENERIC_IRQ_SHOW_LEVEL
select IRQ_FORCED_THREADING
- select HAVE_RCU_TABLE_FREE if SMP
- select HAVE_SYSCALL_TRACEPOINTS
- select HAVE_BPF_JIT if PPC64
- select HAVE_ARCH_JUMP_LABEL
- select ARCH_HAVE_NMI_SAFE_CMPXCHG
- select GENERIC_SMP_IDLE_THREAD
- select GENERIC_CMOS_UPDATE
- select GENERIC_TIME_VSYSCALL_OLD
- select GENERIC_CLOCKEVENTS
- select GENERIC_CLOCKEVENTS_BROADCAST if SMP
- select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
- select GENERIC_STRNCPY_FROM_USER
- select GENERIC_STRNLEN_USER
- select HAVE_MOD_ARCH_SPECIFIC
+ select KASAN_VMALLOC if KASAN && EXECMEM
+ select LOCK_MM_AND_FIND_VMA
+ select MMU_GATHER_PAGE_SIZE
+ select MMU_GATHER_RCU_TABLE_FREE
+ select MMU_GATHER_MERGE_VMAS
+ select MMU_LAZY_TLB_SHOOTDOWN if PPC_BOOK3S_64
select MODULES_USE_ELF_RELA
- select CLONE_BACKWARDS
- select ARCH_USE_BUILTIN_BSWAP
+ select NEED_DMA_MAP_STATE if PPC64 || NOT_COHERENT_CACHE
+ select NEED_PER_CPU_EMBED_FIRST_CHUNK if PPC64
+ select NEED_PER_CPU_PAGE_FIRST_CHUNK if PPC64
+ select NEED_SG_DMA_LENGTH
+ select OF
+ select OF_EARLY_FLATTREE
+ select OLD_SIGACTION if PPC32
select OLD_SIGSUSPEND
- select OLD_SIGACTION if PPC32
- select HAVE_DEBUG_STACKOVERFLOW
- select HAVE_IRQ_EXIT_ON_IRQ_STACK
- select ARCH_USE_CMPXCHG_LOCKREF if PPC64
- select HAVE_ARCH_AUDITSYSCALL
- select ARCH_SUPPORTS_ATOMIC_RMW
+ select PCI_DOMAINS if PCI
+ select PCI_MSI_ARCH_FALLBACKS if PCI_MSI
+ select PCI_SYSCALL if PCI
+ select PPC_DAWR if PPC64
+ select RTC_LIB
+ select SPARSE_IRQ
+ select STRICT_KERNEL_RWX if STRICT_MODULE_RWX
+ select SYSCTL_EXCEPTION_TRACE
+ select THREAD_INFO_IN_TASK
+ select TRACE_IRQFLAGS_SUPPORT
+ select VDSO_GETRANDOM
+ #
+ # Please keep this list sorted alphabetically.
+ #
+
+config PPC_BARRIER_NOSPEC
+ bool
+ default y
+ depends on PPC_BOOK3S_64 || PPC_E500
-config GENERIC_CSUM
- def_bool CPU_LITTLE_ENDIAN
+config PPC_HAS_LBARX_LHARX
+ bool
config EARLY_PRINTK
bool
@@ -160,41 +346,26 @@ config PANIC_TIMEOUT
default 180
config COMPAT
- bool
- default y if PPC64
- select COMPAT_BINFMT_ELF
+ bool "Enable support for 32bit binaries"
+ depends on PPC64
+ default y if !CPU_LITTLE_ENDIAN
select ARCH_WANT_OLD_COMPAT_IPC
select COMPAT_OLD_SIGACTION
-config SYSVIPC_COMPAT
- bool
- depends on COMPAT && SYSVIPC
- default y
-
-# All PPC32s use generic nvram driver through ppc_md
-config GENERIC_NVRAM
- bool
- default y if PPC32
-
config SCHED_OMIT_FRAME_POINTER
bool
default y
config ARCH_MAY_HAVE_PC_FDC
bool
- default !PPC_PSERIES || PCI
-
-config PPC_OF
- def_bool y
+ default PCI
config PPC_UDBG_16550
bool
- default n
config GENERIC_TBSYNC
bool
default y if PPC32 && SMP
- default n
config AUDIT_ARCH
bool
@@ -205,6 +376,10 @@ config GENERIC_BUG
default y
depends on BUG
+config GENERIC_BUG_RELATIVE_POINTERS
+ def_bool y
+ depends on GENERIC_BUG
+
config SYS_SUPPORTS_APM_EMULATION
default y if PMAC_APM_EMU
bool
@@ -213,13 +388,11 @@ config EPAPR_BOOT
bool
help
Used to allow a board to specify it wants an ePAPR compliant wrapper.
- default n
config DEFAULT_UIMAGE
bool
help
Used to allow a board to specify it wants a uImage built by default
- default n
config ARCH_HIBERNATION_POSSIBLE
bool
@@ -229,36 +402,53 @@ config ARCH_SUSPEND_POSSIBLE
def_bool y
depends on ADB_PMU || PPC_EFIKA || PPC_LITE5200 || PPC_83xx || \
(PPC_85xx && !PPC_E500MC) || PPC_86xx || PPC_PSERIES \
- || 44x || 40x
+ || 44x
-config PPC_DCR_NATIVE
- bool
- default n
+config ARCH_SUSPEND_NONZERO_CPU
+ def_bool y
+ depends on PPC_POWERNV || PPC_PSERIES
-config PPC_DCR_MMIO
+config ARCH_HAS_ADD_PAGES
+ def_bool y
+ depends on ARCH_ENABLE_MEMORY_HOTPLUG
+
+config PPC_DCR_NATIVE
bool
- default n
config PPC_DCR
bool
- depends on PPC_DCR_NATIVE || PPC_DCR_MMIO
+ depends on PPC_DCR_NATIVE
default y
-config PPC_OF_PLATFORM_PCI
- bool
- depends on PCI
- depends on PPC64 # not supported on 32 bits yet
- default n
+config PPC_PCI_OF_BUS_MAP
+ bool "Use pci_to_OF_bus_map (deprecated)"
+ depends on PPC32
+ depends on PPC_PMAC || PPC_CHRP
+ help
+ This option uses pci_to_OF_bus_map to map OF nodes to PCI devices, which
+ restricts the system to only having 256 PCI buses. On CHRP it also causes
+ the "pci-OF-bus-map" property to be created in the device tree.
-config ARCH_SUPPORTS_DEBUG_PAGEALLOC
- def_bool y
+ If unsure, say "N".
+
+config PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT
+ depends on PPC32
+ depends on !PPC_PCI_OF_BUS_MAP
+ bool "Assign PCI bus numbers from zero individually for each PCI domain"
+ default y
+ help
+ By default on PPC32 were PCI bus numbers unique across all PCI domains.
+ So system could have only 256 PCI buses independently of available
+ PCI domains. When this option is enabled then PCI bus numbers are
+ PCI domain dependent and each PCI controller on own domain can have
+ 256 PCI buses, like it is on other Linux architectures.
config ARCH_SUPPORTS_UPROBES
def_bool y
config PPC_ADV_DEBUG_REGS
bool
- depends on 40x || BOOKE
+ depends on BOOKE
default y
config PPC_ADV_DEBUG_IACS
@@ -283,13 +473,13 @@ config PPC_ADV_DEBUG_DAC_RANGE
depends on PPC_ADV_DEBUG_REGS && 44x
default y
-config PPC_EMULATE_SSTEP
+config PPC_DAWR
bool
- default y if KPROBES || UPROBES || XMON || HAVE_HW_BREAKPOINT
-
-source "init/Kconfig"
-source "kernel/Kconfig.freezer"
+config PGTABLE_LEVELS
+ int
+ default 2 if !PPC64
+ default 4
source "arch/powerpc/sysdev/Kconfig"
source "arch/powerpc/platforms/Kconfig"
@@ -299,20 +489,15 @@ menu "Kernel options"
config HIGHMEM
bool "High memory support"
depends on PPC32
+ select KMAP_LOCAL
-source kernel/Kconfig.hz
-source kernel/Kconfig.preempt
-source "fs/Kconfig.binfmt"
-
-config HUGETLB_PAGE_SIZE_VARIABLE
- bool
- depends on HUGETLB_PAGE
- default y
+source "kernel/Kconfig.hz"
config MATH_EMULATION
bool "Math emulation"
- depends on 4xx || 8xx || PPC_MPC832x || BOOKE
- ---help---
+ depends on 44x || PPC_8xx || PPC_MPC832x || BOOKE || PPC_MICROWATT
+ select PPC_FPU_REGS
+ help
Some PowerPC chips designed for embedded applications do not have
a floating-point unit and therefore do not implement the
floating-point instructions in the PowerPC instruction set. If you
@@ -329,66 +514,112 @@ choice
default MATH_EMULATION_FULL
depends on MATH_EMULATION
-config MATH_EMULATION_FULL
+config MATH_EMULATION_FULL
bool "Emulate all the floating point instructions"
- ---help---
+ help
Select this option will enable the kernel to support to emulate
all the floating point instructions. If your SoC doesn't have
a FPU, you should select this.
config MATH_EMULATION_HW_UNIMPLEMENTED
bool "Just emulate the FPU unimplemented instructions"
- ---help---
+ help
Select this if you know there does have a hardware FPU on your
SoC, but some floating point instructions are not implemented by that.
endchoice
config PPC_TRANSACTIONAL_MEM
- bool "Transactional Memory support for POWERPC"
- depends on PPC_BOOK3S_64
- depends on SMP
- select ALTIVEC
- select VSX
- default n
- ---help---
- Support user-mode Transactional Memory on POWERPC.
-
-config IOMMU_HELPER
- def_bool PPC64
+ bool "Transactional Memory support for POWERPC"
+ depends on PPC_BOOK3S_64
+ depends on SMP
+ select ALTIVEC
+ select VSX
+ help
+ Support user-mode Transactional Memory on POWERPC.
-config SWIOTLB
- bool "SWIOTLB support"
+config PPC_UV
+ bool "Ultravisor support"
+ depends on KVM_BOOK3S_HV_POSSIBLE
+ depends on DEVICE_PRIVATE
default n
- select IOMMU_HELPER
- ---help---
- Support for IO bounce buffering for systems without an IOMMU.
- This allows us to DMA to the full physical address space on
- platforms where the size of a physical address is larger
- than the bus address. Not all platforms support this.
+ help
+ This option paravirtualizes the kernel to run in POWER platforms that
+ supports the Protected Execution Facility (PEF). On such platforms,
+ the ultravisor firmware runs at a privilege level above the
+ hypervisor.
+
+ If unsure, say "N".
+
+config LD_HEAD_STUB_CATCH
+ bool "Reserve 256 bytes to cope with linker stubs in HEAD text" if EXPERT
+ depends on PPC64
+ help
+ Very large kernels can cause linker branch stubs to be generated by
+ code in head_64.S, which moves the head text sections out of their
+ specified location. This option can work around the problem.
+
+ If unsure, say "N".
+
+config MPROFILE_KERNEL
+ depends on PPC64_ELF_ABI_V2 && FUNCTION_TRACER
+ def_bool $(success,$(srctree)/arch/powerpc/tools/gcc-check-mprofile-kernel.sh $(CC) -mlittle-endian) if CPU_LITTLE_ENDIAN
+ def_bool $(success,$(srctree)/arch/powerpc/tools/gcc-check-mprofile-kernel.sh $(CC) -mbig-endian) if CPU_BIG_ENDIAN
+
+config ARCH_USING_PATCHABLE_FUNCTION_ENTRY
+ depends on FUNCTION_TRACER && (PPC32 || PPC64_ELF_ABI_V2)
+ depends on $(cc-option,-fpatchable-function-entry=2)
+ def_bool y if PPC32
+ def_bool $(success,$(srctree)/arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh $(CC) -mlittle-endian) if PPC64 && CPU_LITTLE_ENDIAN
+ def_bool $(success,$(srctree)/arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh $(CC) -mbig-endian) if PPC64 && CPU_BIG_ENDIAN
+
+config PPC_FTRACE_OUT_OF_LINE
+ def_bool PPC64 && ARCH_USING_PATCHABLE_FUNCTION_ENTRY
+ select ARCH_WANTS_PRE_LINK_VMLINUX
+
+config PPC_FTRACE_OUT_OF_LINE_NUM_RESERVE
+ int "Number of ftrace out-of-line stubs to reserve within .text"
+ depends on PPC_FTRACE_OUT_OF_LINE
+ default 32768
+ help
+ Number of stubs to reserve for use by ftrace. This space is
+ reserved within .text, and is distinct from any additional space
+ added at the end of .text before the final vmlinux link. Set to
+ zero to have stubs only be generated at the end of vmlinux (only
+ if the size of vmlinux is less than 32MB). Set to a higher value
+ if building vmlinux larger than 48MB.
config HOTPLUG_CPU
bool "Support for enabling/disabling CPUs"
depends on SMP && (PPC_PSERIES || \
- PPC_PMAC || PPC_POWERNV || (PPC_85xx && !PPC_E500MC))
- ---help---
+ PPC_PMAC || PPC_POWERNV || FSL_SOC_BOOKE)
+ help
Say Y here to be able to disable and re-enable individual
CPUs at runtime on SMP machines.
Say N if you are unsure.
-config ARCH_CPU_PROBE_RELEASE
- def_bool y
- depends on HOTPLUG_CPU
-
-config ARCH_ENABLE_MEMORY_HOTPLUG
- def_bool y
+config INTERRUPT_SANITIZE_REGISTERS
+ bool "Clear gprs on interrupt arrival"
+ depends on PPC64 && ARCH_HAS_SYSCALL_WRAPPER
+ default PPC_BOOK3E_64 || PPC_PSERIES || PPC_POWERNV
+ help
+ Reduce the influence of user register state on interrupt handlers and
+ syscalls through clearing user state from registers before handling
+ the exception.
-config ARCH_HAS_WALK_MEMORY
- def_bool y
+config PPC_QUEUED_SPINLOCKS
+ bool "Queued spinlocks" if EXPERT
+ depends on SMP
+ default PPC_BOOK3S_64
+ help
+ Say Y here to use queued spinlocks which give better scalability and
+ fairness on large SMP and NUMA systems without harming single threaded
+ performance.
-config ARCH_ENABLE_MEMORY_HOTREMOVE
+config ARCH_CPU_PROBE_RELEASE
def_bool y
+ depends on HOTPLUG_CPU
config PPC64_SUPPORTS_MEMORY_FAILURE
bool "Add support for memory hwpoison"
@@ -396,46 +627,130 @@ config PPC64_SUPPORTS_MEMORY_FAILURE
default "y" if PPC_POWERNV
select ARCH_SUPPORTS_MEMORY_FAILURE
-config KEXEC
- bool "kexec system call"
- depends on (PPC_BOOK3S || FSL_BOOKE || (44x && !SMP))
- select CRYPTO
- select CRYPTO_SHA256
+config ARCH_SUPPORTS_KEXEC
+ def_bool PPC_BOOK3S || PPC_E500 || (44x && !SMP)
+
+config ARCH_SUPPORTS_KEXEC_FILE
+ def_bool PPC64
+
+config ARCH_SUPPORTS_KEXEC_PURGATORY
+ def_bool y
+
+config ARCH_SELECTS_KEXEC_FILE
+ def_bool y
+ depends on KEXEC_FILE
+ select KEXEC_ELF
+ select HAVE_IMA_KEXEC if IMA
+
+config PPC64_BIG_ENDIAN_ELF_ABI_V2
+ # Option is available to BFD, but LLD does not support ELFv1 so this is
+ # always true there.
+ prompt "Build big-endian kernel using ELF ABI V2" if LD_IS_BFD && EXPERT
+ def_bool y
+ depends on PPC64 && CPU_BIG_ENDIAN
+ depends on CC_HAS_ELFV2
+ help
+ This builds the kernel image using the "Power Architecture 64-Bit ELF
+ V2 ABI Specification", which has a reduced stack overhead and faster
+ function calls. This internal kernel ABI option does not affect
+ userspace compatibility.
+
+ The V2 ABI is standard for 64-bit little-endian, but for big-endian
+ it is less well tested by kernel and toolchain. However some distros
+ build userspace this way, and it can produce a functioning kernel.
+
+config RELOCATABLE
+ bool "Build a relocatable kernel"
+ depends on PPC64 || (FLATMEM && (44x || PPC_85xx))
+ select NONSTATIC_KERNEL
+ help
+ This builds a kernel image that is capable of running at the
+ location the kernel is loaded at. For ppc32, there is no any
+ alignment restrictions, and this feature is a superset of
+ DYNAMIC_MEMSTART and hence overrides it. For ppc64, we should use
+ 16k-aligned base address. The kernel is linked as a
+ position-independent executable (PIE) and contains dynamic relocations
+ which are processed early in the bootup process.
+
+ One use is for the kexec on panic case where the recovery kernel
+ must live at a different physical address than the primary
+ kernel.
+
+ Note: If CONFIG_RELOCATABLE=y, then the kernel runs from the address
+ it has been loaded at and the compile time physical addresses
+ CONFIG_PHYSICAL_START is ignored. However CONFIG_PHYSICAL_START
+ setting can still be useful to bootwrappers that need to know the
+ load address of the kernel (eg. u-boot/mkimage).
+
+config RANDOMIZE_BASE
+ bool "Randomize the address of the kernel image"
+ depends on PPC_85xx && FLATMEM
+ depends on RELOCATABLE
help
- kexec is a system call that implements the ability to shutdown your
- current kernel, and to start another kernel. It is like a reboot
- but it is independent of the system firmware. And like a reboot
- you can start any kernel with it, not just Linux.
-
- The name comes from the similarity to the exec system call.
-
- It is an ongoing process to be certain the hardware in a machine
- is properly shutdown, so do not be surprised if this code does not
- initially work for you. As of this writing the exact hardware
- interface is strongly in flux, so no good recommendation can be
- made.
-
-config CRASH_DUMP
- bool "Build a kdump crash kernel"
- depends on PPC64 || 6xx || FSL_BOOKE || (44x && !SMP)
- select RELOCATABLE if (PPC64 && !COMPILE_TEST) || 44x || FSL_BOOKE
+ Randomizes the virtual address at which the kernel image is
+ loaded, as a security feature that deters exploit attempts
+ relying on knowledge of the location of kernel internals.
+
+ If unsure, say Y.
+
+config RELOCATABLE_TEST
+ bool "Test relocatable kernel"
+ depends on (PPC64 && RELOCATABLE)
help
- Build a kernel suitable for use as a kdump capture kernel.
- The same kernel binary can be used as production kernel and dump
- capture kernel.
+ This runs the relocatable kernel at the address it was initially
+ loaded at, which tends to be non-zero and therefore test the
+ relocation code.
+
+config ARCH_SUPPORTS_CRASH_DUMP
+ def_bool PPC64 || PPC_BOOK3S_32 || PPC_85xx || (44x && !SMP)
+
+config ARCH_DEFAULT_CRASH_DUMP
+ bool
+ default y if !PPC_BOOK3S_32
+
+config ARCH_SELECTS_CRASH_DUMP
+ def_bool y
+ depends on CRASH_DUMP
+ select RELOCATABLE if PPC64 || 44x || PPC_85xx
+
+config ARCH_SUPPORTS_CRASH_HOTPLUG
+ def_bool y
+ depends on PPC64
+
+config ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION
+ def_bool CRASH_RESERVE
config FA_DUMP
bool "Firmware-assisted dump"
- depends on PPC64 && PPC_RTAS && CRASH_DUMP && KEXEC
+ depends on CRASH_DUMP && PPC64 && (PPC_RTAS || PPC_POWERNV)
help
A robust mechanism to get reliable kernel crash dump with
assistance from firmware. This approach does not use kexec,
- instead firmware assists in booting the kdump kernel
+ instead firmware assists in booting the capture kernel
while preserving memory contents. Firmware-assisted dump
is meant to be a kdump replacement offering robustness and
speed not possible without system firmware assistance.
- If unsure, say "N"
+ If unsure, say "y". Only special kernels like petitboot may
+ need to say "N" here.
+
+config PRESERVE_FA_DUMP
+ bool "Preserve Firmware-assisted dump"
+ depends on PPC64 && PPC_POWERNV && !FA_DUMP
+ help
+ On a kernel with FA_DUMP disabled, this option helps to preserve
+ crash data from a previously crash'ed kernel. Useful when the next
+ memory preserving kernel boot would process this crash data.
+ Petitboot kernel is the typical usecase for this option.
+
+config OPAL_CORE
+ bool "Export OPAL memory as /sys/firmware/opal/core"
+ depends on PPC64 && PPC_POWERNV
+ help
+ This option uses the MPIPL support in firmware to provide an
+ ELF core of OPAL memory after a crash. The ELF core is exported
+ as /sys/firmware/opal/core file which is helpful in debugging
+ OPAL crashes using GDB.
config IRQ_ALL_CPUS
bool "Distribute interrupts on all CPUs by default"
@@ -447,18 +762,21 @@ config IRQ_ALL_CPUS
reported with SMP Power Macintoshes with this option enabled.
config NUMA
- bool "NUMA support"
- depends on PPC64
- default y if SMP && PPC_PSERIES
+ bool "NUMA Memory Allocation and Scheduler Support"
+ depends on PPC64 && SMP
+ default y if PPC_PSERIES || PPC_POWERNV
+ select USE_PERCPU_NUMA_NODE_ID
+ help
+ Enable NUMA (Non-Uniform Memory Access) support.
+
+ The kernel will try to allocate memory used by a CPU on the
+ local memory controller of the CPU and add some more
+ NUMA awareness to the kernel.
config NODES_SHIFT
int
default "8" if PPC64
default "4"
- depends on NEED_MULTIPLE_NODES
-
-config USE_PERCPU_NUMA_NODE_ID
- def_bool y
depends on NUMA
config HAVE_MEMORYLESS_NODES
@@ -480,45 +798,22 @@ config ARCH_SPARSEMEM_ENABLE
config ARCH_SPARSEMEM_DEFAULT
def_bool y
- depends on (SMP && PPC_PSERIES) || PPC_PS3
-
-config SYS_SUPPORTS_HUGETLBFS
- bool
+ depends on PPC_BOOK3S_64
-source "mm/Kconfig"
+config ILLEGAL_POINTER_VALUE
+ hex
+ # This is roughly half way between the top of user space and the bottom
+ # of kernel space, which seems about as good as we can get.
+ default 0x5deadbeef0000000 if PPC64
+ default 0
config ARCH_MEMORY_PROBE
def_bool y
depends on MEMORY_HOTPLUG
-# Some NUMA nodes have memory ranges that span
-# other nodes. Even though a pfn is valid and
-# between a node's start and end pfns, it may not
-# reside on that node. See memmap_init_zone()
-# for details.
-config NODES_SPAN_OTHER_NODES
- def_bool y
- depends on NEED_MULTIPLE_NODES
-
-config PPC_HAS_HASH_64K
- bool
- depends on PPC64
- default n
-
-config STDBINUTILS
- bool "Using standard binutils settings"
- depends on 44x
- default y
- help
- Turning this option off allows you to select 256KB PAGE_SIZE on 44x.
- Note, that kernel will be able to run only those applications,
- which had been compiled using binutils later than 2.17.50.0.3 with
- '-zmax-page-size' set to 256K (the default is 64K). Or, if using
- the older binutils, you can patch them with a trivial patch, which
- changes the ELF_MAXPAGESIZE definition from 0x10000 to 0x40000.
-
choice
prompt "Page size"
+ default PPC_64K_PAGES if PPC_BOOK3S_64
default PPC_4K_PAGES
help
Select the kernel logical page size. Increasing the page size
@@ -544,105 +839,179 @@ choice
config PPC_4K_PAGES
bool "4k page size"
+ select HAVE_ARCH_SOFT_DIRTY if PPC_BOOK3S_64
+ select HAVE_PAGE_SIZE_4KB
config PPC_16K_PAGES
- bool "16k page size" if 44x
+ bool "16k page size"
+ depends on 44x || PPC_8xx
+ select HAVE_PAGE_SIZE_16KB
config PPC_64K_PAGES
- bool "64k page size" if 44x || PPC_STD_MMU_64 || PPC_BOOK3E_64
- depends on !PPC_FSL_BOOK3E
- select PPC_HAS_HASH_64K if PPC_STD_MMU_64
+ bool "64k page size"
+ depends on 44x || PPC_BOOK3S_64
+ select HAVE_ARCH_SOFT_DIRTY if PPC_BOOK3S_64
+ select HAVE_PAGE_SIZE_64KB
config PPC_256K_PAGES
- bool "256k page size" if 44x
- depends on !STDBINUTILS
+ bool "256k page size (Requires non-standard binutils settings)"
+ depends on 44x && !PPC_47x
+ select HAVE_PAGE_SIZE_256KB
help
Make the page size 256k.
- As the ELF standard only requires alignment to support page
- sizes up to 64k, you will need to compile all of your user
- space applications with a non-standard binutils settings
- (see the STDBINUTILS description for details).
-
- Say N unless you know what you are doing.
+ The kernel will only be able to run applications that have been
+ compiled with '-zmax-page-size' set to 256K (the default is 64K) using
+ binutils later than 2.17.50.0.3, or by patching the ELF_MAXPAGESIZE
+ definition from 0x10000 to 0x40000 in older versions.
endchoice
-config FORCE_MAX_ZONEORDER
- int "Maximum zone order"
- range 9 64 if PPC64 && PPC_64K_PAGES
- default "9" if PPC64 && PPC_64K_PAGES
- range 13 64 if PPC64 && !PPC_64K_PAGES
- default "13" if PPC64 && !PPC_64K_PAGES
- range 9 64 if PPC32 && PPC_16K_PAGES
- default "9" if PPC32 && PPC_16K_PAGES
- range 7 64 if PPC32 && PPC_64K_PAGES
- default "7" if PPC32 && PPC_64K_PAGES
- range 5 64 if PPC32 && PPC_256K_PAGES
- default "5" if PPC32 && PPC_256K_PAGES
- range 11 64
- default "11"
+config THREAD_SHIFT
+ int "Thread shift" if EXPERT
+ range 13 15
+ default "15" if PPC_256K_PAGES
+ default "15" if PPC_PSERIES || PPC_POWERNV
+ default "14" if PPC64
+ default "13"
help
- The kernel memory allocator divides physically contiguous memory
- blocks into "zones", where each zone is a power of two number of
- pages. This option selects the largest power of two that the kernel
- keeps in the memory allocator. If you need to allocate very large
- blocks of physically contiguous memory, then you may need to
- increase this value.
+ Used to define the stack size. The default is almost always what you
+ want. Only change this if you know what you are doing.
- This config option is actually maximum order plus one. For example,
- a value of 11 means that the largest free memory block is 2^10 pages.
+config DATA_SHIFT_BOOL
+ bool "Set custom data alignment"
+ depends on ADVANCED_OPTIONS
+ depends on STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE
+ depends on (PPC_8xx && !PIN_TLB_DATA && (!STRICT_KERNEL_RWX || !PIN_TLB_TEXT)) || \
+ PPC_BOOK3S_32 || PPC_85xx
+ help
+ This option allows you to set the kernel data alignment. When
+ RAM is mapped by blocks, the alignment needs to fit the size and
+ number of possible blocks. The default should be OK for most configs.
+
+ Say N here unless you know what you are doing.
+
+config DATA_SHIFT
+ int "Data shift" if DATA_SHIFT_BOOL
+ default 24 if STRICT_KERNEL_RWX && PPC64
+ range 17 28 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_BOOK3S_32
+ range 14 23 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_8xx
+ range 20 24 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_85xx
+ default 22 if STRICT_KERNEL_RWX && PPC_BOOK3S_32
+ default 18 if (DEBUG_PAGEALLOC || KFENCE) && PPC_BOOK3S_32
+ default 23 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_8xx && \
+ (PIN_TLB_DATA || PIN_TLB_TEXT)
+ default 19 if (STRICT_KERNEL_RWX || DEBUG_PAGEALLOC || KFENCE) && PPC_8xx
+ default 24 if STRICT_KERNEL_RWX && PPC_85xx
+ default PAGE_SHIFT
+ help
+ On Book3S 32 (603+), DBATs are used to map kernel text and rodata RO.
+ Smaller is the alignment, greater is the number of necessary DBATs.
+
+ On 8xx, large pages (16kb or 512kb or 8M) are used to map kernel
+ linear memory. Aligning to 8M reduces TLB misses as only 8M pages
+ are used in that case. If PIN_TLB is selected, it must be aligned
+ to 8M as 8M pages will be pinned.
+
+config ARCH_FORCE_MAX_ORDER
+ int "Order of maximal physically contiguous allocations"
+ range 7 8 if PPC64 && PPC_64K_PAGES
+ default "8" if PPC64 && PPC_64K_PAGES
+ range 12 12 if PPC64 && !PPC_64K_PAGES
+ default "12" if PPC64 && !PPC_64K_PAGES
+ range 8 10 if PPC32 && PPC_16K_PAGES
+ default "8" if PPC32 && PPC_16K_PAGES
+ range 6 10 if PPC32 && PPC_64K_PAGES
+ default "6" if PPC32 && PPC_64K_PAGES
+ range 4 10 if PPC32 && PPC_256K_PAGES
+ default "4" if PPC32 && PPC_256K_PAGES
+ range 10 12
+ default "10"
+ help
+ The kernel page allocator limits the size of maximal physically
+ contiguous allocations. The limit is called MAX_PAGE_ORDER and it
+ defines the maximal power of two of number of pages that can be
+ allocated as a single contiguous block. This option allows
+ overriding the default setting when ability to allocate very
+ large blocks of physically contiguous memory is required.
The page size is not necessarily 4KB. For example, on 64-bit
systems, 64KB pages can be enabled via CONFIG_PPC_64K_PAGES. Keep
this in mind when choosing a value for this option.
+ Don't change if unsure.
+
config PPC_SUBPAGE_PROT
- bool "Support setting protections for 4k subpages"
- depends on PPC_STD_MMU_64 && PPC_64K_PAGES
+ bool "Support setting protections for 4k subpages (subpage_prot syscall)"
+ default n
+ depends on PPC_64S_HASH_MMU && PPC_64K_PAGES
help
- This option adds support for a system call to allow user programs
+ This option adds support for system call to allow user programs
to set access permissions (read/write, readonly, or no access)
on the 4k subpages of each 64k page.
-config SCHED_SMT
- bool "SMT (Hyperthreading) scheduler support"
- depends on PPC64 && SMP
+ If unsure, say N here.
+
+config PPC_PROT_SAO_LPAR
+ bool "Support PROT_SAO mappings in LPARs"
+ depends on PPC_BOOK3S_64
help
- SMT scheduler support improves the CPU scheduler's decision making
- when dealing with POWER5 cpus at a cost of slightly increased
- overhead in some places. If unsure say N here.
+ This option adds support for PROT_SAO mappings from userspace
+ inside LPARs on supported CPUs.
+
+ This may cause issues when performing guest migration from
+ a CPU that supports SAO to one that does not.
+
+ If unsure, say N here.
+
+config PPC_COPRO_BASE
+ bool
config PPC_DENORMALISATION
bool "PowerPC denormalisation exception handling"
depends on PPC_BOOK3S_64
default "y" if PPC_POWERNV
- ---help---
+ help
Add support for handling denormalisation of single precision
values. Useful for bare metal only. If unsure say Y here.
-config CMDLINE_BOOL
- bool "Default bootloader kernel arguments"
-
config CMDLINE
string "Initial kernel command string"
- depends on CMDLINE_BOOL
- default "console=ttyS0,9600 console=tty0 root=/dev/sda2"
+ default ""
help
On some platforms, there is currently no way for the boot loader to
pass arguments to the kernel. For these platforms, you can supply
some command-line options at build time by entering them here. In
most cases you will need to specify the root device here.
+choice
+ prompt "Kernel command line type"
+ depends on CMDLINE != ""
+ default CMDLINE_FROM_BOOTLOADER
+
+config CMDLINE_FROM_BOOTLOADER
+ bool "Use bootloader kernel arguments if available"
+ help
+ Uses the command-line options passed by the boot loader. If
+ the boot loader doesn't provide any, the default kernel command
+ string provided in CMDLINE will be used.
+
+config CMDLINE_EXTEND
+ bool "Extend bootloader kernel arguments"
+ help
+ The command-line arguments provided by the boot loader will be
+ appended to the default kernel command string.
+
config CMDLINE_FORCE
bool "Always use the default kernel command string"
- depends on CMDLINE_BOOL
help
Always use the default kernel command string, even if the boot
loader passes other arguments to the kernel.
This is useful if you cannot or don't want to change the
command-line options your boot loader passes to the kernel.
+endchoice
+
config EXTRA_TARGETS
string "Additional default image types"
help
@@ -660,24 +1029,51 @@ config ARCH_WANTS_FREEZER_CONTROL
def_bool y
depends on ADB_PMU
-source kernel/power/Kconfig
+source "kernel/power/Kconfig"
+
+config PPC_MEM_KEYS
+ prompt "PowerPC Memory Protection Keys"
+ def_bool y
+ depends on PPC_BOOK3S_64
+ depends on PPC_64S_HASH_MMU
+ select ARCH_USES_HIGH_VMA_FLAGS
+ select ARCH_HAS_PKEYS
+ help
+ Memory Protection Keys provides a mechanism for enforcing
+ page-based protections, but without requiring modification of the
+ page tables when an application changes protection domains.
+
+ For details, see Documentation/core-api/protection-keys.rst
+
+ If unsure, say y.
-config SECCOMP
- bool "Enable seccomp to safely compute untrusted bytecode"
- depends on PROC_FS
+config ARCH_PKEY_BITS
+ int
+ default 5
+
+config PPC_SECURE_BOOT
+ prompt "Enable secure boot support"
+ bool
+ depends on PPC_POWERNV || PPC_PSERIES
+ depends on IMA_ARCH_POLICY
+ imply IMA_SECURE_AND_OR_TRUSTED_BOOT
+ select PSERIES_PLPKS if PPC_PSERIES
+ help
+ Systems with firmware secure boot enabled need to define security
+ policies to extend secure boot to the OS. This config allows a user
+ to enable OS secure boot on systems that have firmware support for
+ it. If in doubt say N.
+
+config PPC_SECVAR_SYSFS
+ bool "Enable sysfs interface for POWER secure variables"
default y
+ depends on PPC_SECURE_BOOT
+ depends on SYSFS
help
- This kernel feature is useful for number crunching applications
- that may need to compute untrusted bytecode during their
- execution. By using pipes or other transports made available to
- the process as file descriptors supporting the read/write
- syscalls, it's possible to isolate those applications in
- their own address space using seccomp. Once seccomp is
- enabled via /proc/<pid>/seccomp, it cannot be disabled
- and the task is only allowed to execute a few safe syscalls
- defined by each seccomp mode.
-
- If unsure, say Y. Only embedded should say N here.
+ POWER secure variables are managed and controlled by firmware.
+ These variables are exposed to userspace via sysfs to enable
+ read/write operations on these variables. Say Y if you have
+ secure boot enabled and want to expose variables to userspace.
endmenu
@@ -698,16 +1094,6 @@ config ISA
have an IBM RS/6000 or pSeries machine, say Y. If you have an
embedded board, consult your board documentation.
-config ZONE_DMA
- bool
- default y
-
-config NEED_DMA_MAP_STATE
- def_bool (PPC64 || NOT_COHERENT_CACHE)
-
-config NEED_SG_DMA_LENGTH
- def_bool y
-
config GENERIC_ISA_DMA
bool
depends on ISA_DMA_API
@@ -716,11 +1102,7 @@ config GENERIC_ISA_DMA
config PPC_INDIRECT_PCI
bool
depends on PCI
- default y if 40x || 44x
- default n
-
-config EISA
- bool
+ default y if 44x
config SBUS
bool
@@ -729,7 +1111,8 @@ config FSL_SOC
bool
config FSL_PCI
- bool
+ bool
+ select ARCH_HAS_DMA_SET_MASK
select PPC_INDIRECT_PCI
select PCI_QUIRKS
@@ -744,18 +1127,14 @@ config FSL_PMC
config PPC4xx_CPM
bool
default y
- depends on SUSPEND && (44x || 40x)
+ depends on SUSPEND && 44x
help
PPC4xx Clock Power Management (CPM) support (suspend/resume).
It also enables support for two different idle states (idle-wait
and idle-doze).
-config 4xx_SOC
- bool
-
config FSL_LBC
bool "Freescale Local Bus support"
- depends on FSL_SOC
help
Enables reporting of errors from the Freescale local bus
controller. Also contains some common code used by
@@ -767,82 +1146,18 @@ config FSL_GTM
help
Freescale General-purpose Timers support
-# Yes MCA RS/6000s exist but Linux-PPC does not currently support any
-config MCA
- bool
-
-# Platforms that what PCI turned unconditionally just do select PCI
-# in their config node. Platforms that want to choose at config
-# time should select PPC_PCI_CHOICE
-config PPC_PCI_CHOICE
- bool
-
-config PCI
- bool "PCI support" if PPC_PCI_CHOICE
- default y if !40x && !CPM2 && !8xx && !PPC_83xx \
- && !PPC_85xx && !PPC_86xx && !GAMECUBE_COMMON
- default PCI_QSPAN if !4xx && !CPM2 && 8xx
- select GENERIC_PCI_IOMAP
- help
- Find out whether your system includes a PCI bus. PCI is the name of
- a bus system, i.e. the way the CPU talks to the other stuff inside
- your box. If you say Y here, the kernel will include drivers and
- infrastructure code to support PCI bus devices.
-
-config PCI_DOMAINS
- def_bool PCI
-
-config PCI_SYSCALL
- def_bool PCI
-
-config PCI_QSPAN
- bool "QSpan PCI"
- depends on !4xx && !CPM2 && 8xx
- select PPC_I8259
- help
- Say Y here if you have a system based on a Motorola 8xx-series
- embedded processor with a QSPAN PCI interface, otherwise say N.
-
-config PCI_8260
- bool
- depends on PCI && 8260
- select PPC_INDIRECT_PCI
- default y
-
-source "drivers/pci/pcie/Kconfig"
-
-source "drivers/pci/Kconfig"
-
-source "drivers/pcmcia/Kconfig"
-
-source "drivers/pci/hotplug/Kconfig"
-
-config HAS_RAPIDIO
- bool
- default n
-
-config RAPIDIO
- tristate "RapidIO support"
- depends on HAS_RAPIDIO || PCI
- help
- If you say Y here, the kernel will include drivers and
- infrastructure code to support RapidIO interconnect devices.
-
config FSL_RIO
bool "Freescale Embedded SRIO Controller support"
- depends on RAPIDIO = y && HAS_RAPIDIO
+ depends on RAPIDIO = y && HAVE_RAPIDIO
default "n"
- ---help---
+ help
Include support for RapidIO controller on Freescale embedded
processors (MPC8548, MPC8641, etc).
-source "drivers/rapidio/Kconfig"
-
endmenu
config NONSTATIC_KERNEL
bool
- default n
menu "Advanced setup"
depends on PPC32
@@ -878,7 +1193,7 @@ config LOWMEM_SIZE
config LOWMEM_CAM_NUM_BOOL
bool "Set number of CAMs to use to map low memory"
- depends on ADVANCED_OPTIONS && FSL_BOOKE
+ depends on ADVANCED_OPTIONS && PPC_85xx
help
This option allows you to set the maximum number of CAM slots that
will be used to map low memory. There are a limited number of slots
@@ -889,51 +1204,31 @@ config LOWMEM_CAM_NUM_BOOL
Say N here unless you know what you are doing.
config LOWMEM_CAM_NUM
- depends on FSL_BOOKE
+ depends on PPC_85xx
int "Number of CAMs to use to map low memory" if LOWMEM_CAM_NUM_BOOL
- default 3
+ default 3 if !STRICT_KERNEL_RWX
+ default 9 if DATA_SHIFT >= 24
+ default 12 if DATA_SHIFT >= 22
+ default 15
config DYNAMIC_MEMSTART
bool "Enable page aligned dynamic load address for kernel"
- depends on ADVANCED_OPTIONS && FLATMEM && (FSL_BOOKE || 44x)
+ depends on ADVANCED_OPTIONS && FLATMEM && (PPC_85xx || 44x)
select NONSTATIC_KERNEL
help
This option enables the kernel to be loaded at any page aligned
- physical address. The kernel creates a mapping from KERNELBASE to
+ physical address. The kernel creates a mapping from KERNELBASE to
the address where the kernel is loaded. The page size here implies
the TLB page size of the mapping for kernel on the particular platform.
Please refer to the init code for finding the TLB page size.
DYNAMIC_MEMSTART is an easy way of implementing pseudo-RELOCATABLE
kernel image, where the only restriction is the page aligned kernel
- load address. When this option is enabled, the compile time physical
+ load address. When this option is enabled, the compile time physical
address CONFIG_PHYSICAL_START is ignored.
This option is overridden by CONFIG_RELOCATABLE
-config RELOCATABLE
- bool "Build a relocatable kernel"
- depends on ADVANCED_OPTIONS && FLATMEM && (44x || FSL_BOOKE)
- select NONSTATIC_KERNEL
- help
- This builds a kernel image that is capable of running at the
- location the kernel is loaded at, without any alignment restrictions.
- This feature is a superset of DYNAMIC_MEMSTART and hence overrides it.
-
- One use is for the kexec on panic case where the recovery kernel
- must live at a different physical address than the primary
- kernel.
-
- Note: If CONFIG_RELOCATABLE=y, then the kernel runs from the address
- it has been loaded at and the compile time physical addresses
- CONFIG_PHYSICAL_START is ignored. However CONFIG_PHYSICAL_START
- setting can still be useful to bootwrappers that need to know the
- load address of the kernel (eg. u-boot/mkimage).
-
-config RELOCATABLE_PPC32
- def_bool y
- depends on PPC32 && RELOCATABLE
-
config PAGE_OFFSET_BOOL
bool "Set custom page offset address"
depends on ADVANCED_OPTIONS
@@ -967,7 +1262,7 @@ config KERNEL_START
config PHYSICAL_START_BOOL
bool "Set physical address where the kernel is loaded"
- depends on ADVANCED_OPTIONS && FLATMEM && FSL_BOOKE
+ depends on ADVANCED_OPTIONS && FLATMEM && PPC_85xx
help
This gives the physical address where the kernel is loaded.
@@ -975,12 +1270,12 @@ config PHYSICAL_START_BOOL
config PHYSICAL_START
hex "Physical address where the kernel is loaded" if PHYSICAL_START_BOOL
- default "0x02000000" if PPC_STD_MMU && CRASH_DUMP && !NONSTATIC_KERNEL
+ default "0x02000000" if PPC_BOOK3S && CRASH_DUMP && !NONSTATIC_KERNEL
default "0x00000000"
config PHYSICAL_ALIGN
hex
- default "0x04000000" if FSL_BOOKE
+ default "0x04000000" if PPC_85xx
help
This value puts the alignment restrictions on physical address
where kernel is loaded and run from. Kernel is compiled for an
@@ -999,41 +1294,38 @@ config TASK_SIZE_BOOL
config TASK_SIZE
hex "Size of user task space" if TASK_SIZE_BOOL
default "0x80000000" if PPC_8xx
+ default "0xb0000000" if PPC_BOOK3S_32 && EXECMEM
default "0xc0000000"
-config CONSISTENT_SIZE_BOOL
- bool "Set custom consistent memory pool size"
- depends on ADVANCED_OPTIONS && NOT_COHERENT_CACHE
+config MODULES_SIZE_BOOL
+ bool "Set custom size for modules/execmem area"
+ depends on EXECMEM && ADVANCED_OPTIONS
help
- This option allows you to set the size of the
- consistent memory pool. This pool of virtual memory
- is used to make consistent memory allocations.
+ This option allows you to set the size of kernel virtual address
+ space dedicated for modules/execmem.
+ For the time being it is only for 8xx and book3s/32. Other
+ platform share it with vmalloc space.
+
+ Say N here unless you know what you are doing.
-config CONSISTENT_SIZE
- hex "Size of consistent memory pool" if CONSISTENT_SIZE_BOOL
- default "0x00200000" if NOT_COHERENT_CACHE
+config MODULES_SIZE
+ int "Size of modules/execmem area (In Mbytes)" if MODULES_SIZE_BOOL
+ range 1 256 if EXECMEM
+ default 64 if EXECMEM && PPC_BOOK3S_32
+ default 32 if EXECMEM && PPC_8xx
+ default 0
-config PIN_TLB
- bool "Pinned Kernel TLBs (860 ONLY)"
- depends on ADVANCED_OPTIONS && 8xx
endmenu
-if PPC64
-config RELOCATABLE
- bool "Build a relocatable kernel"
- depends on !COMPILE_TEST
- select NONSTATIC_KERNEL
+config PPC64_PROC_SYSTEMCFG
+ def_bool y
+ depends on PPC64 && PROC_FS
help
- This builds a kernel image that is capable of running anywhere
- in the RMA (real memory area) at any 16k-aligned base address.
- The kernel is linked as a position-independent executable (PIE)
- and contains dynamic relocations which are processed early
- in the bootup process.
-
- One use is for the kexec on panic case where the recovery kernel
- must live at a different physical address than the primary
- kernel.
+ This option enables the presence of /proc/ppc64/systemcfg through
+ which the systemcfg page can be accessed.
+ This interface only exists for backwards-compatibility.
+if PPC64
# This value must have zeroes in the bottom 60 bits otherwise lots will break
config PAGE_OFFSET
hex
@@ -1046,31 +1338,9 @@ config PHYSICAL_START
default "0x00000000"
endif
-config ARCH_RANDOM
- def_bool n
-
-source "net/Kconfig"
-
-source "drivers/Kconfig"
-
-source "fs/Kconfig"
-
-source "arch/powerpc/sysdev/qe_lib/Kconfig"
-
-source "lib/Kconfig"
-
-source "arch/powerpc/Kconfig.debug"
-
-source "security/Kconfig"
-
-config KEYS_COMPAT
- bool
- depends on COMPAT && KEYS
- default y
-
-source "crypto/Kconfig"
-
config PPC_LIB_RHEAP
bool
source "arch/powerpc/kvm/Kconfig"
+
+source "kernel/livepatch/Kconfig"
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index ec2e40f2cc11..f15e5920080b 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -1,10 +1,7 @@
-menu "Kernel hacking"
-
-source "lib/Kconfig.debug"
+# SPDX-License-Identifier: GPL-2.0
config PPC_DISABLE_WERROR
bool "Don't build arch/powerpc code with -Werror"
- default n
help
This option tells the compiler NOT to build the code under
arch/powerpc with the -Werror flag (which means warnings
@@ -56,23 +53,59 @@ config PPC_EMULATED_STATS
emulated.
config CODE_PATCHING_SELFTEST
- bool "Run self-tests of the code-patching code."
+ bool "Run self-tests of the code-patching code"
depends on DEBUG_KERNEL
- default n
+
+config JUMP_LABEL_FEATURE_CHECKS
+ bool "Enable use of jump label for cpu/mmu_has_feature()"
+ depends on JUMP_LABEL
+ default y
+ help
+ Selecting this options enables use of jump labels for some internal
+ feature checks. This should generate more optimal code for those
+ checks.
+
+config JUMP_LABEL_FEATURE_CHECK_DEBUG
+ bool "Do extra check on feature fixup calls"
+ depends on DEBUG_KERNEL && JUMP_LABEL_FEATURE_CHECKS
+ help
+ This tries to catch incorrect usage of cpu_has_feature() and
+ mmu_has_feature() in the code.
+
+ If you don't know what this means, say N.
config FTR_FIXUP_SELFTEST
- bool "Run self-tests of the feature-fixup code."
+ bool "Run self-tests of the feature-fixup code"
depends on DEBUG_KERNEL
- default n
config MSI_BITMAP_SELFTEST
- bool "Run self-tests of the MSI bitmap code."
+ bool "Run self-tests of the MSI bitmap code"
depends on DEBUG_KERNEL
- default n
+
+config GUEST_STATE_BUFFER_TEST
+ def_tristate n
+ prompt "Enable Guest State Buffer unit tests"
+ depends on KUNIT
+ depends on KVM_BOOK3S_HV_POSSIBLE
+ default KUNIT_ALL_TESTS
+ help
+ The Guest State Buffer is a data format specified in the PAPR.
+ It is by hcalls to communicate the state of L2 guests between
+ the L1 and L0 hypervisors. Enable unit tests for the library
+ used to create and use guest state buffers.
+
+config PPC_IRQ_SOFT_MASK_DEBUG
+ bool "Include extra checks for powerpc irq soft masking"
+ depends on PPC64
+
+config PPC_RFI_SRR_DEBUG
+ bool "Include extra checks for RFI SRR register validity"
+ depends on PPC_BOOK3S_64
config XMON
bool "Include xmon kernel debugger"
depends on DEBUG_KERNEL
+ select CONSOLE_POLL if SERIAL_CPM_CONSOLE
help
Include in-kernel hooks for the xmon kernel monitor/debugger.
Unless you are intending to debug the kernel, say N here.
@@ -102,6 +135,14 @@ config XMON_DISASSEMBLY
to say Y here, unless you're building for a memory-constrained
system.
+config XMON_DEFAULT_RO_MODE
+ bool "Restrict xmon to read-only operations by default"
+ depends on XMON
+ default y
+ help
+ Operate xmon in read-only mode. The cmdline options 'xmon=rw' and
+ 'xmon=ro' override this default.
+
config DEBUGGER
bool
depends on KGDB || XMON
@@ -117,7 +158,9 @@ config BDI_SWITCH
config BOOTX_TEXT
bool "Support for early boot text console (BootX or OpenFirmware only)"
- depends on PPC_OF && PPC_BOOK3S
+ depends on PPC_BOOK3S
+ select FONT_SUN8x16
+ select FONT_SUPPORT
help
Say Y here to see progress messages from the boot firmware in text
mode. Requires either BootX or Open Firmware.
@@ -149,14 +192,14 @@ config PPC_EARLY_DEBUG_BOOTX
config PPC_EARLY_DEBUG_LPAR
bool "LPAR HV Console"
- depends on PPC_PSERIES
+ depends on PPC_PSERIES && HVC_CONSOLE
help
Select this to enable early debugging for a machine with a HVC
console on vterm 0.
config PPC_EARLY_DEBUG_LPAR_HVSI
bool "LPAR HVSI Console"
- depends on PPC_PSERIES
+ depends on PPC_PSERIES && HVC_CONSOLE
help
Select this to enable early debugging for a machine with a HVSI
console on a specified vterm.
@@ -173,19 +216,6 @@ config PPC_EARLY_DEBUG_RTAS_PANEL
help
Select this to enable early debugging via the RTAS panel.
-config PPC_EARLY_DEBUG_RTAS_CONSOLE
- bool "RTAS Console"
- depends on PPC_RTAS
- select UDBG_RTAS_CONSOLE
- help
- Select this to enable early debugging via the RTAS console.
-
-config PPC_EARLY_DEBUG_MAPLE
- bool "Maple real mode"
- depends on PPC_MAPLE
- help
- Select this to enable early debugging for Maple.
-
config PPC_EARLY_DEBUG_PAS_REALMODE
bool "PA Semi real mode"
depends on PPC_PASEMI
@@ -193,33 +223,17 @@ config PPC_EARLY_DEBUG_PAS_REALMODE
Select this to enable early debugging for PA Semi.
Output will be on UART0.
-config PPC_EARLY_DEBUG_BEAT
- bool "Beat HV Console"
- depends on PPC_CELLEB
- select PPC_UDBG_BEAT
- help
- Select this to enable early debugging for Celleb with Beat.
-
config PPC_EARLY_DEBUG_44x
bool "Early serial debugging for IBM/AMCC 44x CPUs"
depends on 44x
help
Select this to enable early debugging for IBM 44x chips via the
inbuilt serial port. If you enable this, ensure you set
- PPC_EARLY_DEBUG_44x_PHYSLOW below to suit your target board.
-
-config PPC_EARLY_DEBUG_40x
- bool "Early serial debugging for IBM/AMCC 40x CPUs"
- depends on 40x
- help
- Select this to enable early debugging for IBM 40x chips via the
- inbuilt serial port. This works on chips with a 16550 compatible
- UART. Xilinx chips with uartlite cannot use this option.
+ PPC_EARLY_DEBUG_44x_PHYSLOW below to suit your target board.
config PPC_EARLY_DEBUG_CPM
bool "Early serial debugging for Freescale CPM-based serial ports"
- depends on SERIAL_CPM
- select PIN_TLB if PPC_8xx
+ depends on SERIAL_CPM=y
help
Select this to enable early debugging for Freescale chips
using a CPM-based serial port. This assumes that the bootwrapper
@@ -236,7 +250,6 @@ config PPC_EARLY_DEBUG_USBGECKO
config PPC_EARLY_DEBUG_PS3GELIC
bool "Early debugging through the PS3 Ethernet port"
depends on PPC_PS3
- select PS3GELIC_UDBG
help
Select this to enable early debugging for the PlayStation3 via
UDP broadcasts sent out through the Ethernet port.
@@ -262,6 +275,12 @@ config PPC_EARLY_DEBUG_MEMCONS
This console provides input and output buffers stored within the
kernel BSS and should be safe to select on any system. A debugger
can then be used to read kernel output or send input to the console.
+
+config PPC_EARLY_DEBUG_16550
+ bool "Serial 16550"
+ depends on PPC_UDBG_16550
+ help
+ Select this to enable early debugging via Serial 16550 console
endchoice
config PPC_MEMCONS_OUTPUT_SIZE
@@ -284,7 +303,6 @@ config PPC_EARLY_DEBUG_OPAL
def_bool y
depends on PPC_EARLY_DEBUG_OPAL_RAW || PPC_EARLY_DEBUG_OPAL_HVSI
-
config PPC_EARLY_DEBUG_HVSI_VTERMNO
hex "vterm number to use with early debug HVSI"
depends on PPC_EARLY_DEBUG_LPAR_HVSI
@@ -310,18 +328,13 @@ config PPC_EARLY_DEBUG_44x_PHYSLOW
default "0x40000200"
help
You probably want 0x40000200 for ebony boards and
- 0x40000300 for taishan
+ 0x40000300 for taishan
config PPC_EARLY_DEBUG_44x_PHYSHIGH
hex "EPRN of early debug UART physical address"
depends on PPC_EARLY_DEBUG_44x
default "0x1"
-config PPC_EARLY_DEBUG_40x_PHYSADDR
- hex "Early debug UART physical address"
- depends on PPC_EARLY_DEBUG_40x
- default "0xef600300"
-
config PPC_EARLY_DEBUG_CPM_ADDR
hex "CPM UART early debug transmit descriptor address"
depends on PPC_EARLY_DEBUG_CPM
@@ -334,25 +347,28 @@ config PPC_EARLY_DEBUG_CPM_ADDR
platform probing is done, all platforms selected must
share the same address.
-config STRICT_DEVMEM
- def_bool y
- prompt "Filter access to /dev/mem"
- help
- This option restricts access to /dev/mem. If this option is
- disabled, you allow userspace access to all memory, including
- kernel and userspace memory. Accidental memory access is likely
- to be disastrous.
- Memory access is required for experts who want to debug the kernel.
+config PPC_EARLY_DEBUG_16550_PHYSADDR
+ hex "Early debug Serial 16550 physical address"
+ depends on PPC_EARLY_DEBUG_16550
- If you are unsure, say Y.
+config PPC_EARLY_DEBUG_16550_STRIDE
+ int "Early debug Serial 16550 stride"
+ depends on PPC_EARLY_DEBUG_16550
+ default 1
config FAIL_IOMMU
bool "Fault-injection capability for IOMMU"
depends on FAULT_INJECTION
+ depends on PCI || IBMVIO
help
Provide fault-injection capability for IOMMU. Each device can
be selectively enabled via the fail_iommu property.
If you are unsure, say N.
-endmenu
+config KASAN_SHADOW_OFFSET
+ hex
+ depends on KASAN
+ default 0xe0000000 if PPC32
+ default 0xa80e000000000000 if PPC_BOOK3S_64
+ default 0xa8001c0000000000 if PPC_BOOK3E_64
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 5687e299d0a5..a58b1029592c 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -1,7 +1,5 @@
# This file is included by the global makefile so that you can add your own
-# architecture-specific flags and dependencies. Remember to do have actions
-# for "archclean" and "archdep" for cleaning up and making dependencies for
-# this architecture.
+# architecture-specific flags and dependencies.
#
# This file is subject to the terms and conditions of the GNU General Public
# License. See the file "COPYING" in the main directory of this archive
@@ -12,93 +10,103 @@
# Rewritten by Cort Dougan and Paul Mackerras
#
+ifdef cross_compiling
+ ifeq ($(CROSS_COMPILE),)
+ # Auto detect cross compiler prefix.
+ # Look for: (powerpc(64(le)?)?)(-unknown)?-linux(-gnu)?-
+ CC_ARCHES := powerpc powerpc64 powerpc64le
+ CC_SUFFIXES := linux linux-gnu unknown-linux-gnu
+ CROSS_COMPILE := $(call cc-cross-prefix, $(foreach a,$(CC_ARCHES), \
+ $(foreach s,$(CC_SUFFIXES),$(a)-$(s)-)))
+ endif
+endif
+
HAS_BIARCH := $(call cc-option-yn, -m32)
# Set default 32 bits cross compilers for vdso and boot wrapper
CROSS32_COMPILE ?=
-CROSS32CC := $(CROSS32_COMPILE)gcc
-CROSS32AR := $(CROSS32_COMPILE)ar
+# If we're on a ppc/ppc64/ppc64le machine use that defconfig, otherwise just use
+# ppc64le_defconfig because we have nothing better to go on.
+uname := $(shell uname -m)
+KBUILD_DEFCONFIG := $(if $(filter ppc%,$(uname)),$(uname),ppc64le)_defconfig
-ifeq ($(HAS_BIARCH),y)
-ifeq ($(CROSS32_COMPILE),)
-CROSS32CC := $(CC) -m32
-CROSS32AR := GNUTARGET=elf32-powerpc $(AR)
-endif
-endif
-
-export CROSS32CC CROSS32AR
-
-ifeq ($(CROSS_COMPILE),)
-KBUILD_DEFCONFIG := $(shell uname -m)_defconfig
-else
-KBUILD_DEFCONFIG := ppc64_defconfig
-endif
-
-ifeq ($(CONFIG_PPC64),y)
new_nm := $(shell if $(NM) --help 2>&1 | grep -- '--synthetic' > /dev/null; then echo y; else echo n; fi)
ifeq ($(new_nm),y)
NM := $(NM) --synthetic
endif
-endif
-ifeq ($(CONFIG_PPC64),y)
-ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
-OLDARCH := ppc64le
-else
-OLDARCH := ppc64
-endif
-else
-ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
-OLDARCH := ppcle
+# BITS is used as extension for files which are available in a 32 bit
+# and a 64 bit version to simplify shared Makefiles.
+# e.g.: obj-y += foo_$(BITS).o
+export BITS
+
+ifdef CONFIG_PPC64
+ BITS := 64
else
-OLDARCH := ppc
-endif
+ BITS := 32
endif
-# It seems there are times we use this Makefile without
-# including the config file, but this replicates the old behaviour
-ifeq ($(CONFIG_WORD_SIZE),)
-CONFIG_WORD_SIZE := 32
-endif
+machine-y = ppc
+machine-$(CONFIG_PPC64) += 64
+machine-$(CONFIG_CPU_LITTLE_ENDIAN) += le
+UTS_MACHINE := $(subst $(space),,$(machine-y))
-UTS_MACHINE := $(OLDARCH)
+ifeq ($(CONFIG_PPC64)$(CONFIG_LD_IS_BFD),yy)
+# Have the linker provide sfpr if possible.
+# There is a corresponding test in arch/powerpc/lib/Makefile
+KBUILD_LDFLAGS_MODULE += --save-restore-funcs
+else
+KBUILD_LDFLAGS_MODULE += $(objtree)/arch/powerpc/lib/crtsavres.o
+endif
-ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
-override CC += -mlittle-endian -mno-strict-align
-override AS += -mlittle-endian
-override LD += -EL
-override CROSS32CC += -mlittle-endian
-override CROSS32AS += -mlittle-endian
+ifdef CONFIG_CPU_LITTLE_ENDIAN
+KBUILD_CPPFLAGS += -mlittle-endian
+KBUILD_LDFLAGS += -EL
LDEMULATION := lppc
GNUTARGET := powerpcle
MULTIPLEWORD := -mno-multiple
KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-save-toc-indirect)
else
-ifeq ($(call cc-option-yn,-mbig-endian),y)
-override CC += -mbig-endian
-override AS += -mbig-endian
-endif
-override LD += -EB
+KBUILD_CPPFLAGS += $(call cc-option,-mbig-endian)
+KBUILD_LDFLAGS += -EB
LDEMULATION := ppc
GNUTARGET := powerpc
MULTIPLEWORD := -mmultiple
endif
+ifdef CONFIG_PPC64
+ifndef CONFIG_CC_IS_CLANG
+cflags-$(CONFIG_PPC64_ELF_ABI_V1) += $(call cc-option,-mabi=elfv1)
+cflags-$(CONFIG_PPC64_ELF_ABI_V1) += $(call cc-option,-mcall-aixdesc)
+aflags-$(CONFIG_PPC64_ELF_ABI_V1) += $(call cc-option,-mabi=elfv1)
+aflags-$(CONFIG_PPC64_ELF_ABI_V2) += -mabi=elfv2
+endif
+endif
+
+ifndef CONFIG_CC_IS_CLANG
+ cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mno-strict-align
+endif
+
+cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mbig-endian)
+cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mlittle-endian
+aflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mbig-endian)
+aflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mlittle-endian
+
ifeq ($(HAS_BIARCH),y)
-override AS += -a$(CONFIG_WORD_SIZE)
-override LD += -m elf$(CONFIG_WORD_SIZE)$(LDEMULATION)
-override CC += -m$(CONFIG_WORD_SIZE)
-override AR := GNUTARGET=elf$(CONFIG_WORD_SIZE)-$(GNUTARGET) $(AR)
+KBUILD_CPPFLAGS += -m$(BITS)
+KBUILD_AFLAGS += -m$(BITS)
+KBUILD_LDFLAGS += -m elf$(BITS)$(LDEMULATION)
endif
LDFLAGS_vmlinux-y := -Bstatic
-LDFLAGS_vmlinux-$(CONFIG_RELOCATABLE) := -pie
+LDFLAGS_vmlinux-$(CONFIG_RELOCATABLE) := -pie --no-dynamic-linker
+LDFLAGS_vmlinux-$(CONFIG_RELOCATABLE) += -z notext
LDFLAGS_vmlinux := $(LDFLAGS_vmlinux-y)
-ifeq ($(CONFIG_PPC64),y)
-ifeq ($(call cc-option-yn,-mcmodel=medium),y)
+ifdef CONFIG_PPC64
+ifndef CONFIG_PPC_KERNEL_PCREL
# -mcmodel=medium breaks modules because it uses 32bit offsets from
# the TOC pointer to create pointers where possible. Pointers into the
# percpu data area are created by this method.
@@ -108,131 +116,110 @@ ifeq ($(call cc-option-yn,-mcmodel=medium),y)
# kernel percpu data space (starting with 0xc...). We need a full
# 64bit relocation for this to work, hence -mcmodel=large.
KBUILD_CFLAGS_MODULE += -mcmodel=large
-else
- export NO_MINIMAL_TOC := -mno-minimal-toc
endif
endif
-CFLAGS-$(CONFIG_PPC64) := -mtraceback=no
-ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
-CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2,-mcall-aixdesc)
-AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2)
+CFLAGS-$(CONFIG_PPC64) := $(call cc-option,-mtraceback=no)
+ifdef CONFIG_PPC64_ELF_ABI_V2
+CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2,$(call cc-option,-mcall-aixdesc))
else
-CFLAGS-$(CONFIG_PPC64) += -mcall-aixdesc
+ifndef CONFIG_CC_IS_CLANG
+CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1)
+CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcall-aixdesc)
endif
-CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcmodel=medium,-mminimal-toc)
+endif
+CFLAGS-$(CONFIG_PPC64) += -mcmodel=medium
CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mno-pointers-to-nested-functions)
-CFLAGS-$(CONFIG_PPC32) := -ffixed-r2 $(MULTIPLEWORD)
+CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mlong-double-128)
-ifeq ($(CONFIG_PPC_BOOK3S_64),y)
-CFLAGS-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=power7,-mtune=power4)
-else
-CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=powerpc64
-endif
+# Clang unconditionally reserves r2 on ppc32 and does not support the flag
+# https://llvm.org/pr39555
+CFLAGS-$(CONFIG_PPC32) := $(call cc-option, -ffixed-r2)
+
+# Clang doesn't support -mmultiple / -mno-multiple
+# https://llvm.org/pr39556
+CFLAGS-$(CONFIG_PPC32) += $(call cc-option, $(MULTIPLEWORD))
-CFLAGS-$(CONFIG_CELL_CPU) += $(call cc-option,-mcpu=cell)
-CFLAGS-$(CONFIG_POWER4_CPU) += $(call cc-option,-mcpu=power4)
-CFLAGS-$(CONFIG_POWER5_CPU) += $(call cc-option,-mcpu=power5)
-CFLAGS-$(CONFIG_POWER6_CPU) += $(call cc-option,-mcpu=power6)
-CFLAGS-$(CONFIG_POWER7_CPU) += $(call cc-option,-mcpu=power7)
+CFLAGS-$(CONFIG_PPC32) += $(call cc-option,-mno-readonly-in-sdata)
-# Altivec option not allowed with e500mc64 in GCC.
-ifeq ($(CONFIG_ALTIVEC),y)
-E5500_CPU := -mcpu=powerpc64
+CC_FLAGS_FPU := $(call cc-option,-mhard-float)
+CC_FLAGS_NO_FPU := $(call cc-option,-msoft-float)
+
+ifdef CONFIG_FUNCTION_TRACER
+ifdef CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY
+KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
+ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE
+CC_FLAGS_FTRACE := -fpatchable-function-entry=1
+else
+ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS # PPC32 only
+CC_FLAGS_FTRACE := -fpatchable-function-entry=3,1
else
-E5500_CPU := $(call cc-option,-mcpu=e500mc64,-mcpu=powerpc64)
+CC_FLAGS_FTRACE := -fpatchable-function-entry=2
+endif
endif
-CFLAGS-$(CONFIG_E5500_CPU) += $(E5500_CPU)
-CFLAGS-$(CONFIG_E6500_CPU) += $(call cc-option,-mcpu=e6500,$(E5500_CPU))
-
-ifeq ($(CONFIG_PPC32),y)
-ifeq ($(CONFIG_PPC_E500MC),y)
-CFLAGS-y += $(call cc-option,-mcpu=e500mc,-mcpu=powerpc)
else
-CFLAGS-$(CONFIG_E500) += $(call cc-option,-mcpu=8540 -msoft-float,-mcpu=powerpc)
+CC_FLAGS_FTRACE := -pg
+ifdef CONFIG_MPROFILE_KERNEL
+CC_FLAGS_FTRACE += -mprofile-kernel
+endif
endif
endif
-CFLAGS-$(CONFIG_TUNE_CELL) += $(call cc-option,-mtune=cell)
+CFLAGS-$(CONFIG_TARGET_CPU_BOOL) += -mcpu=$(CONFIG_TARGET_CPU)
+AFLAGS-$(CONFIG_TARGET_CPU_BOOL) += -mcpu=$(CONFIG_TARGET_CPU)
-asinstr := $(call as-instr,lis 9$(comma)foo@high,-DHAVE_AS_ATHIGH=1)
+CFLAGS-y += $(CONFIG_TUNE_CPU)
-KBUILD_CPPFLAGS += -Iarch/$(ARCH) $(asinstr)
-KBUILD_AFLAGS += -Iarch/$(ARCH) $(AFLAGS-y)
-KBUILD_CFLAGS += -msoft-float -pipe -Iarch/$(ARCH) $(CFLAGS-y)
-CPP = $(CC) -E $(KBUILD_CFLAGS)
+asinstr := $(call as-instr,lis 9$(comma)foo@high,-DHAVE_AS_ATHIGH=1)
-CHECKFLAGS += -m$(CONFIG_WORD_SIZE) -D__powerpc__ -D__powerpc$(CONFIG_WORD_SIZE)__
+KBUILD_CPPFLAGS += -I $(srctree)/arch/powerpc $(asinstr)
+KBUILD_AFLAGS += $(AFLAGS-y)
+KBUILD_CFLAGS += $(CC_FLAGS_NO_FPU)
+KBUILD_CFLAGS += $(CFLAGS-y)
-KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o
+CHECKFLAGS += -m$(BITS) -D__powerpc__ -D__powerpc$(BITS)__
+ifdef CONFIG_CPU_BIG_ENDIAN
+CHECKFLAGS += -D__BIG_ENDIAN__
+else
+CHECKFLAGS += -D__LITTLE_ENDIAN__
+endif
-ifeq ($(CONFIG_476FPE_ERR46),y)
+ifdef CONFIG_476FPE_ERR46
KBUILD_LDFLAGS_MODULE += --ppc476-workaround \
-T $(srctree)/arch/powerpc/platforms/44x/ppc476_modules.lds
endif
-# No AltiVec or VSX instructions when building kernel
+# No prefix or pcrel
+ifdef CONFIG_PPC_KERNEL_PREFIXED
+KBUILD_CFLAGS += $(call cc-option,-mprefixed)
+else
+KBUILD_CFLAGS += $(call cc-option,-mno-prefixed)
+endif
+ifdef CONFIG_PPC_KERNEL_PCREL
+KBUILD_CFLAGS += $(call cc-option,-mpcrel)
+else
+KBUILD_CFLAGS += $(call cc-option,-mno-pcrel)
+endif
+
+# No AltiVec or VSX or MMA instructions when building kernel
KBUILD_CFLAGS += $(call cc-option,-mno-altivec)
KBUILD_CFLAGS += $(call cc-option,-mno-vsx)
+KBUILD_CFLAGS += $(call cc-option,-mno-mma)
# No SPE instruction when building kernel
# (We use all available options to help semi-broken compilers)
KBUILD_CFLAGS += $(call cc-option,-mno-spe)
KBUILD_CFLAGS += $(call cc-option,-mspe=no)
-# Enable unit-at-a-time mode when possible. It shrinks the
-# kernel considerably.
-KBUILD_CFLAGS += $(call cc-option,-funit-at-a-time)
-
-# FIXME: the module load should be taught about the additional relocs
-# generated by this.
-# revert to pre-gcc-4.4 behaviour of .eh_frame
-KBUILD_CFLAGS += $(call cc-option,-fno-dwarf2-cfi-asm)
+# Don't emit .eh_frame since we have no use for it
+KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
# Never use string load/store instructions as they are
# often slow when they are implemented at all
-KBUILD_CFLAGS += -mno-string
-
-ifeq ($(CONFIG_6xx),y)
-KBUILD_CFLAGS += -mcpu=powerpc
-endif
-
-# Work around a gcc code-gen bug with -fno-omit-frame-pointer.
-ifeq ($(CONFIG_FUNCTION_TRACER),y)
-KBUILD_CFLAGS += -mno-sched-epilog
-endif
+KBUILD_CFLAGS += $(call cc-option,-mno-string)
-cpu-as-$(CONFIG_4xx) += -Wa,-m405
-cpu-as-$(CONFIG_ALTIVEC) += -Wa,-maltivec
-cpu-as-$(CONFIG_E200) += -Wa,-me200
-
-KBUILD_AFLAGS += $(cpu-as-y)
-KBUILD_CFLAGS += $(cpu-as-y)
-
-head-y := arch/powerpc/kernel/head_$(CONFIG_WORD_SIZE).o
-head-$(CONFIG_8xx) := arch/powerpc/kernel/head_8xx.o
-head-$(CONFIG_40x) := arch/powerpc/kernel/head_40x.o
-head-$(CONFIG_44x) := arch/powerpc/kernel/head_44x.o
-head-$(CONFIG_FSL_BOOKE) := arch/powerpc/kernel/head_fsl_booke.o
-
-head-$(CONFIG_PPC64) += arch/powerpc/kernel/entry_64.o
-head-$(CONFIG_PPC_FPU) += arch/powerpc/kernel/fpu.o
-head-$(CONFIG_ALTIVEC) += arch/powerpc/kernel/vector.o
-head-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += arch/powerpc/kernel/prom_init.o
-
-core-y += arch/powerpc/kernel/ \
- arch/powerpc/mm/ \
- arch/powerpc/lib/ \
- arch/powerpc/sysdev/ \
- arch/powerpc/platforms/ \
- arch/powerpc/math-emu/ \
- arch/powerpc/crypto/ \
- arch/powerpc/net/
-core-$(CONFIG_XMON) += arch/powerpc/xmon/
-core-$(CONFIG_KVM) += arch/powerpc/kvm/
-core-$(CONFIG_PERF_EVENTS) += arch/powerpc/perf/
-
-drivers-$(CONFIG_OPROFILE) += arch/powerpc/oprofile/
+KBUILD_AFLAGS += $(aflags-y)
+KBUILD_CFLAGS += $(cflags-y)
# Default to zImage, override when needed
all: zImage
@@ -243,98 +230,195 @@ BOOT_TARGETS2 := zImage% dtbImage% treeImage.% cuImage.% simpleImage.% uImage.%
PHONY += $(BOOT_TARGETS1) $(BOOT_TARGETS2)
-boot := arch/$(ARCH)/boot
-
-ifeq ($(CONFIG_RELOCATABLE),y)
-quiet_cmd_relocs_check = CALL $<
- cmd_relocs_check = perl $< "$(OBJDUMP)" "$(obj)/vmlinux"
-
-PHONY += relocs_check
-relocs_check: arch/powerpc/relocs_check.pl vmlinux
- $(call cmd,relocs_check)
-
-zImage: relocs_check
-endif
+boot := arch/powerpc/boot
$(BOOT_TARGETS1): vmlinux
- $(Q)$(MAKE) ARCH=ppc64 $(build)=$(boot) $(patsubst %,$(boot)/%,$@)
+ $(Q)$(MAKE) $(build)=$(boot) $(patsubst %,$(boot)/%,$@)
$(BOOT_TARGETS2): vmlinux
- $(Q)$(MAKE) ARCH=ppc64 $(build)=$(boot) $(patsubst %,$(boot)/%,$@)
+ $(Q)$(MAKE) $(build)=$(boot) $(patsubst %,$(boot)/%,$@)
+PHONY += bootwrapper_install
bootwrapper_install:
- $(Q)$(MAKE) ARCH=ppc64 $(build)=$(boot) $(patsubst %,$(boot)/%,$@)
-
-%.dtb: scripts
- $(Q)$(MAKE) ARCH=ppc64 $(build)=$(boot) $(patsubst %,$(boot)/%,$@)
+ $(Q)$(MAKE) $(build)=$(boot) $(patsubst %,$(boot)/%,$@)
+
+include $(srctree)/scripts/Makefile.defconf
+
+generated_configs += ppc64le_defconfig
+ppc64le_defconfig:
+ $(call merge_into_defconfig,ppc64_defconfig,le)
+
+generated_configs += ppc64le_guest_defconfig
+ppc64le_guest_defconfig:
+ $(call merge_into_defconfig,ppc64_defconfig,le guest kvm_guest)
+
+generated_configs += ppc64_guest_defconfig
+ppc64_guest_defconfig:
+ $(call merge_into_defconfig,ppc64_defconfig,be guest kvm_guest)
+
+generated_configs += pseries_le_defconfig
+pseries_le_defconfig: ppc64le_guest_defconfig
+
+generated_configs += pseries_defconfig
+pseries_defconfig: ppc64le_guest_defconfig
+
+generated_configs += powernv_be_defconfig
+powernv_be_defconfig:
+ $(call merge_into_defconfig,powernv_defconfig,be)
+
+generated_configs += mpc85xx_defconfig
+mpc85xx_defconfig:
+ $(call merge_into_defconfig,mpc85xx_base.config,\
+ 85xx-32bit 85xx-hw fsl-emb-nonhw)
+
+generated_configs += mpc85xx_smp_defconfig
+mpc85xx_smp_defconfig:
+ $(call merge_into_defconfig,mpc85xx_base.config,\
+ 85xx-32bit 85xx-smp 85xx-hw fsl-emb-nonhw)
+
+generated_configs += corenet32_smp_defconfig
+corenet32_smp_defconfig:
+ $(call merge_into_defconfig,corenet_base.config,\
+ 85xx-32bit 85xx-smp 85xx-hw fsl-emb-nonhw dpaa)
+
+generated_configs += corenet64_smp_defconfig
+corenet64_smp_defconfig:
+ $(call merge_into_defconfig,corenet_base.config,\
+ 85xx-64bit 85xx-smp altivec 85xx-hw fsl-emb-nonhw dpaa)
+
+generated_configs += mpc86xx_defconfig
+mpc86xx_defconfig:
+ $(call merge_into_defconfig,mpc86xx_base.config,\
+ 86xx-hw fsl-emb-nonhw)
+
+generated_configs += mpc86xx_smp_defconfig
+mpc86xx_smp_defconfig:
+ $(call merge_into_defconfig,mpc86xx_base.config,\
+ 86xx-smp 86xx-hw fsl-emb-nonhw)
+
+generated_configs += ppc32_allmodconfig
+ppc32_allmodconfig:
+ $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/book3s_32.config \
+ -f $(srctree)/Makefile allmodconfig
+
+generated_configs += ppc44x_allmodconfig
+ppc44x_allmodconfig:
+ $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/44x.config \
+ -f $(srctree)/Makefile allmodconfig
+
+generated_configs += ppc8xx_allmodconfig
+ppc8xx_allmodconfig:
+ $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/8xx.config \
+ -f $(srctree)/Makefile allmodconfig
+
+generated_configs += ppc85xx_allmodconfig
+ppc85xx_allmodconfig:
+ $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/85xx-32bit.config \
+ -f $(srctree)/Makefile allmodconfig
+
+generated_configs += ppc_defconfig
+ppc_defconfig:
+ $(call merge_into_defconfig,book3s_32.config,)
+
+generated_configs += ppc64le_allmodconfig
+ppc64le_allmodconfig:
+ $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/le.config \
+ -f $(srctree)/Makefile allmodconfig
+
+generated_configs += ppc64le_allnoconfig
+ppc64le_allnoconfig:
+ $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/ppc64le.config \
+ -f $(srctree)/Makefile allnoconfig
+
+generated_configs += ppc64_book3e_allmodconfig
+ppc64_book3e_allmodconfig:
+ $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/85xx-64bit.config \
+ -f $(srctree)/Makefile allmodconfig
+
+generated_configs += ppc32_randconfig
+ppc32_randconfig:
+ $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/32-bit.config \
+ -f $(srctree)/Makefile randconfig
+
+generated_configs += ppc64_randconfig
+ppc64_randconfig:
+ $(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/64-bit.config \
+ -f $(srctree)/Makefile randconfig
+
+PHONY += $(generated_configs)
define archhelp
- @echo '* zImage - Build default images selected by kernel config'
- @echo ' zImage.* - Compressed kernel image (arch/$(ARCH)/boot/zImage.*)'
- @echo ' uImage - U-Boot native image format'
- @echo ' cuImage.<dt> - Backwards compatible U-Boot image for older'
- @echo ' versions which do not support device trees'
- @echo ' dtbImage.<dt> - zImage with an embedded device tree blob'
- @echo ' simpleImage.<dt> - Firmware independent image.'
- @echo ' treeImage.<dt> - Support for older IBM 4xx firmware (not U-Boot)'
- @echo ' install - Install kernel using'
- @echo ' (your) ~/bin/$(INSTALLKERNEL) or'
- @echo ' (distribution) /sbin/$(INSTALLKERNEL) or'
- @echo ' install to $$(INSTALL_PATH) and run lilo'
- @echo ' *_defconfig - Select default config from arch/$(ARCH)/configs'
- @echo ''
- @echo ' Targets with <dt> embed a device tree blob inside the image'
- @echo ' These targets support board with firmware that does not'
- @echo ' support passing a device tree directly. Replace <dt> with the'
- @echo ' name of a dts file from the arch/$(ARCH)/boot/dts/ directory'
- @echo ' (minus the .dts extension).'
+ echo '* zImage - Build default images selected by kernel config'
+ echo ' zImage.* - Compressed kernel image (arch/powerpc/boot/zImage.*)'
+ echo ' uImage - U-Boot native image format'
+ echo ' cuImage.<dt> - Backwards compatible U-Boot image for older'
+ echo ' versions which do not support device trees'
+ echo ' dtbImage.<dt> - zImage with an embedded device tree blob'
+ echo ' simpleImage.<dt> - Firmware independent image.'
+ echo ' treeImage.<dt> - Support for older IBM 4xx firmware (not U-Boot)'
+ echo ' install - Install kernel using'
+ echo ' (your) ~/bin/$(INSTALLKERNEL) or'
+ echo ' (distribution) /sbin/$(INSTALLKERNEL) or'
+ echo ' install to $$(INSTALL_PATH)'
+ echo ' *_defconfig - Select default config from arch/powerpc/configs'
+ echo ''
+ echo ' Targets with <dt> embed a device tree blob inside the image'
+ echo ' These targets support board with firmware that does not'
+ echo ' support passing a device tree directly. Replace <dt> with the'
+ echo ' name of a dts file from the arch/powerpc/boot/dts/ directory'
+ echo ' (minus the .dts extension).'
+ echo
+ $(foreach cfg,$(generated_configs),
+ printf " %-27s - Build for %s\\n" $(cfg) $(subst _defconfig,,$(cfg));)
endef
+PHONY += install
install:
- $(Q)$(MAKE) $(build)=$(boot) install
-
-vdso_install:
-ifeq ($(CONFIG_PPC64),y)
- $(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso64 $@
+ $(call cmd,install)
+
+ifeq ($(KBUILD_EXTMOD),)
+# We need to generate vdso-offsets.h before compiling certain files in kernel/.
+# In order to do that, we should use the archprepare target, but we can't since
+# asm-offsets.h is included in some files used to generate vdso-offsets.h, and
+# asm-offsets.h is built in prepare0, for which archprepare is a dependency.
+# Therefore we need to generate the header after prepare0 has been made, hence
+# this hack.
+prepare: vdso_prepare
+vdso_prepare: prepare0
+ $(if $(CONFIG_VDSO32),$(Q)$(MAKE) \
+ $(build)=arch/powerpc/kernel/vdso include/generated/vdso32-offsets.h)
+ $(if $(CONFIG_PPC64),$(Q)$(MAKE) \
+ $(build)=arch/powerpc/kernel/vdso include/generated/vdso64-offsets.h)
endif
- $(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso32 $@
-
-archclean:
- $(Q)$(MAKE) $(clean)=$(boot)
archprepare: checkbin
-# Use the file '.tmp_gas_check' for binutils tests, as gas won't output
-# to stdout and these checks are run even on install targets.
-TOUT := .tmp_gas_check
+archheaders:
+ $(Q)$(MAKE) $(build)=arch/powerpc/kernel/syscalls all
-# Check gcc and binutils versions:
-# - gcc-3.4 and binutils-2.14 are a fatal combination
-# - Require gcc 4.0 or above on 64-bit
-# - gcc-4.2.0 has issues compiling modules on 64-bit
+ifdef CONFIG_STACKPROTECTOR
+prepare: stack_protector_prepare
+
+PHONY += stack_protector_prepare
+stack_protector_prepare: prepare0
+ifdef CONFIG_PPC64
+ $(eval KBUILD_CFLAGS += -mstack-protector-guard=tls -mstack-protector-guard-reg=r13 \
+ -mstack-protector-guard-offset=$(shell awk '{if ($$2 == "PACA_CANARY") print $$3;}' \
+ $(objtree)/include/generated/asm-offsets.h))
+else
+ $(eval KBUILD_CFLAGS += -mstack-protector-guard=tls -mstack-protector-guard-reg=r2 \
+ -mstack-protector-guard-offset=$(shell awk '{if ($$2 == "TASK_CANARY") print $$3;}' \
+ $(objtree)/include/generated/asm-offsets.h))
+endif
+endif
+
+PHONY += checkbin
checkbin:
- @if test "$(call cc-version)" = "0304" ; then \
- if ! /bin/echo mftb 5 | $(AS) -v -mppc -many -o $(TOUT) >/dev/null 2>&1 ; then \
- echo -n '*** ${VERSION}.${PATCHLEVEL} kernels no longer build '; \
- echo 'correctly with gcc-3.4 and your version of binutils.'; \
- echo '*** Please upgrade your binutils or downgrade your gcc'; \
- false; \
- fi ; \
- fi
- @if test "$(call cc-version)" -lt "0400" \
- && test "x${CONFIG_PPC64}" = "xy" ; then \
- echo -n "Sorry, GCC v4.0 or above is required to build " ; \
- echo "the 64-bit powerpc kernel." ; \
- false ; \
- fi
- @if test "$(call cc-fullversion)" = "040200" \
- && test "x${CONFIG_MODULES}${CONFIG_PPC64}" = "xyy" ; then \
- echo -n '*** GCC-4.2.0 cannot compile the 64-bit powerpc ' ; \
- echo 'kernel with modules enabled.' ; \
- echo -n '*** Please use a different GCC version or ' ; \
- echo 'disable kernel modules' ; \
+ @if test "x${CONFIG_FTRACE_MCOUNT_USE_RECORDMCOUNT}" = "xy" -a \
+ "x${CONFIG_LD_IS_BFD}" = "xy" -a \
+ "${CONFIG_LD_VERSION}" = "23700" ; then \
+ echo -n '*** binutils 2.37 drops unused section symbols, which recordmcount ' ; \
+ echo 'is unable to handle.' ; \
+ echo '*** Please use a different binutils version.' ; \
false ; \
fi
-
-CLEAN_FILES += $(TOUT)
-
diff --git a/arch/powerpc/Makefile.postlink b/arch/powerpc/Makefile.postlink
new file mode 100644
index 000000000000..bb601be36173
--- /dev/null
+++ b/arch/powerpc/Makefile.postlink
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: GPL-2.0
+# ===========================================================================
+# Post-link powerpc pass
+# ===========================================================================
+#
+# 1. Check that vmlinux relocations look sane
+
+PHONY := __archpost
+__archpost:
+
+-include include/config/auto.conf
+include $(srctree)/scripts/Kbuild.include
+
+quiet_cmd_head_check = CHKHEAD $@
+ cmd_head_check = $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/head_check.sh "$(NM)" "$@"
+
+quiet_cmd_relocs_check = CHKREL $@
+ifdef CONFIG_PPC_BOOK3S_64
+ cmd_relocs_check = \
+ $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$(NM)" "$@" ; \
+ $(BASH) $(srctree)/arch/powerpc/tools/unrel_branch_check.sh "$(OBJDUMP)" "$(NM)" "$@"
+else
+ cmd_relocs_check = \
+ $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$(NM)" "$@"
+endif
+
+quiet_cmd_ftrace_check = CHKFTRC $@
+ cmd_ftrace_check = $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/ftrace_check.sh "$(NM)" "$@"
+
+# `@true` prevents complaint when there is nothing to be done
+
+vmlinux: FORCE
+ @true
+ifdef CONFIG_PPC64
+ $(call cmd,head_check)
+endif
+ifdef CONFIG_RELOCATABLE
+ $(call if_changed,relocs_check)
+endif
+ifdef CONFIG_FUNCTION_TRACER
+ifndef CONFIG_PPC64_ELF_ABI_V1
+ $(call cmd,ftrace_check)
+endif
+endif
+
+clean:
+ rm -f .tmp_symbols.txt
+
+PHONY += FORCE clean
+
+FORCE:
+
+.PHONY: $(PHONY)
diff --git a/arch/powerpc/boot/.gitignore b/arch/powerpc/boot/.gitignore
index d61c03525777..5a867f23fe7f 100644
--- a/arch/powerpc/boot/.gitignore
+++ b/arch/powerpc/boot/.gitignore
@@ -1,4 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-only
addnote
+decompress_inflate.c
empty.c
hack-coff
inffast.c
@@ -13,11 +15,13 @@ infutil.h
kernel-vmlinux.strip.c
kernel-vmlinux.strip.gz
mktree
+otheros.bld
+otheros-too-big.bld
uImage
cuImage.*
dtbImage.*
-*.dtb
treeImage.*
+vmlinux.strip
zImage
zImage.initrd
zImage.bin.*
@@ -41,4 +45,3 @@ fdt_sw.c
fdt_wip.c
libfdt.h
libfdt_internal.h
-
diff --git a/arch/powerpc/boot/44x.h b/arch/powerpc/boot/44x.h
index 02563443788a..9b15e59522d6 100644
--- a/arch/powerpc/boot/44x.h
+++ b/arch/powerpc/boot/44x.h
@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* PowerPC 44x related functions
*
* Copyright 2007 David Gibson, IBM Corporation.
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
*/
#ifndef _PPC_BOOT_44X_H_
#define _PPC_BOOT_44X_H_
diff --git a/arch/powerpc/boot/4xx.c b/arch/powerpc/boot/4xx.c
index 9d3bd4c45a24..682ca3827892 100644
--- a/arch/powerpc/boot/4xx.c
+++ b/arch/powerpc/boot/4xx.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright 2007 David Gibson, IBM Corporation.
*
@@ -11,11 +12,6 @@
* Copyright (C) 2009 Wind River Systems, Inc.
* Updated for supporting PPC405EX on Kilauea.
* Tiejun Chen <tiejun.chen@windriver.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <stddef.h>
#include "types.h"
@@ -232,7 +228,7 @@ void ibm4xx_denali_fixup_memsize(void)
dpath = 8; /* 64 bits */
/* get address pins (rows) */
- val = SDRAM0_READ(DDR0_42);
+ val = SDRAM0_READ(DDR0_42);
row = DDR_GET_VAL(val, DDR_APIN, DDR_APIN_SHIFT);
if (row > max_row)
@@ -257,7 +253,6 @@ void ibm4xx_denali_fixup_memsize(void)
dt_fixup_memory(0, memsize);
}
-#define SPRN_DBCR0_40X 0x3F2
#define SPRN_DBCR0_44X 0x134
#define DBCR0_RST_SYSTEM 0x30000000
@@ -274,18 +269,6 @@ void ibm44x_dbcr_reset(void)
}
-void ibm40x_dbcr_reset(void)
-{
- unsigned long tmp;
-
- asm volatile (
- "mfspr %0,%1\n"
- "oris %0,%0,%2@h\n"
- "mtspr %1,%0"
- : "=&r"(tmp) : "i"(SPRN_DBCR0_40X), "i"(DBCR0_RST_SYSTEM)
- );
-}
-
#define EMAC_RESET 0x20000000
void ibm4xx_quiesce_eth(u32 *emac0, u32 *emac1)
{
@@ -548,256 +531,3 @@ void ibm440spe_fixup_clocks(unsigned int sys_clk,
eplike_fixup_uart_clk(1, "/plb/opb/serial@f0000300", ser_clk, plb_clk);
eplike_fixup_uart_clk(2, "/plb/opb/serial@f0000600", ser_clk, plb_clk);
}
-
-void ibm405gp_fixup_clocks(unsigned int sys_clk, unsigned int ser_clk)
-{
- u32 pllmr = mfdcr(DCRN_CPC0_PLLMR);
- u32 cpc0_cr0 = mfdcr(DCRN_405_CPC0_CR0);
- u32 cpc0_cr1 = mfdcr(DCRN_405_CPC0_CR1);
- u32 psr = mfdcr(DCRN_405_CPC0_PSR);
- u32 cpu, plb, opb, ebc, tb, uart0, uart1, m;
- u32 fwdv, fwdvb, fbdv, cbdv, opdv, epdv, ppdv, udiv;
-
- fwdv = (8 - ((pllmr & 0xe0000000) >> 29));
- fbdv = (pllmr & 0x1e000000) >> 25;
- if (fbdv == 0)
- fbdv = 16;
- cbdv = ((pllmr & 0x00060000) >> 17) + 1; /* CPU:PLB */
- opdv = ((pllmr & 0x00018000) >> 15) + 1; /* PLB:OPB */
- ppdv = ((pllmr & 0x00001800) >> 13) + 1; /* PLB:PCI */
- epdv = ((pllmr & 0x00001800) >> 11) + 2; /* PLB:EBC */
- udiv = ((cpc0_cr0 & 0x3e) >> 1) + 1;
-
- /* check for 405GPr */
- if ((mfpvr() & 0xfffffff0) == (0x50910951 & 0xfffffff0)) {
- fwdvb = 8 - (pllmr & 0x00000007);
- if (!(psr & 0x00001000)) /* PCI async mode enable == 0 */
- if (psr & 0x00000020) /* New mode enable */
- m = fwdvb * 2 * ppdv;
- else
- m = fwdvb * cbdv * ppdv;
- else if (psr & 0x00000020) /* New mode enable */
- if (psr & 0x00000800) /* PerClk synch mode */
- m = fwdvb * 2 * epdv;
- else
- m = fbdv * fwdv;
- else if (epdv == fbdv)
- m = fbdv * cbdv * epdv;
- else
- m = fbdv * fwdvb * cbdv;
-
- cpu = sys_clk * m / fwdv;
- plb = sys_clk * m / (fwdvb * cbdv);
- } else {
- m = fwdv * fbdv * cbdv;
- cpu = sys_clk * m / fwdv;
- plb = cpu / cbdv;
- }
- opb = plb / opdv;
- ebc = plb / epdv;
-
- if (cpc0_cr0 & 0x80)
- /* uart0 uses the external clock */
- uart0 = ser_clk;
- else
- uart0 = cpu / udiv;
-
- if (cpc0_cr0 & 0x40)
- /* uart1 uses the external clock */
- uart1 = ser_clk;
- else
- uart1 = cpu / udiv;
-
- /* setup the timebase clock to tick at the cpu frequency */
- cpc0_cr1 = cpc0_cr1 & ~0x00800000;
- mtdcr(DCRN_405_CPC0_CR1, cpc0_cr1);
- tb = cpu;
-
- dt_fixup_cpu_clocks(cpu, tb, 0);
- dt_fixup_clock("/plb", plb);
- dt_fixup_clock("/plb/opb", opb);
- dt_fixup_clock("/plb/ebc", ebc);
- dt_fixup_clock("/plb/opb/serial@ef600300", uart0);
- dt_fixup_clock("/plb/opb/serial@ef600400", uart1);
-}
-
-
-void ibm405ep_fixup_clocks(unsigned int sys_clk)
-{
- u32 pllmr0 = mfdcr(DCRN_CPC0_PLLMR0);
- u32 pllmr1 = mfdcr(DCRN_CPC0_PLLMR1);
- u32 cpc0_ucr = mfdcr(DCRN_CPC0_UCR);
- u32 cpu, plb, opb, ebc, uart0, uart1;
- u32 fwdva, fwdvb, fbdv, cbdv, opdv, epdv;
- u32 pllmr0_ccdv, tb, m;
-
- fwdva = 8 - ((pllmr1 & 0x00070000) >> 16);
- fwdvb = 8 - ((pllmr1 & 0x00007000) >> 12);
- fbdv = (pllmr1 & 0x00f00000) >> 20;
- if (fbdv == 0)
- fbdv = 16;
-
- cbdv = ((pllmr0 & 0x00030000) >> 16) + 1; /* CPU:PLB */
- epdv = ((pllmr0 & 0x00000300) >> 8) + 2; /* PLB:EBC */
- opdv = ((pllmr0 & 0x00003000) >> 12) + 1; /* PLB:OPB */
-
- m = fbdv * fwdvb;
-
- pllmr0_ccdv = ((pllmr0 & 0x00300000) >> 20) + 1;
- if (pllmr1 & 0x80000000)
- cpu = sys_clk * m / (fwdva * pllmr0_ccdv);
- else
- cpu = sys_clk / pllmr0_ccdv;
-
- plb = cpu / cbdv;
- opb = plb / opdv;
- ebc = plb / epdv;
- tb = cpu;
- uart0 = cpu / (cpc0_ucr & 0x0000007f);
- uart1 = cpu / ((cpc0_ucr & 0x00007f00) >> 8);
-
- dt_fixup_cpu_clocks(cpu, tb, 0);
- dt_fixup_clock("/plb", plb);
- dt_fixup_clock("/plb/opb", opb);
- dt_fixup_clock("/plb/ebc", ebc);
- dt_fixup_clock("/plb/opb/serial@ef600300", uart0);
- dt_fixup_clock("/plb/opb/serial@ef600400", uart1);
-}
-
-static u8 ibm405ex_fwdv_multi_bits[] = {
- /* values for: 1 - 16 */
- 0x01, 0x02, 0x0e, 0x09, 0x04, 0x0b, 0x10, 0x0d, 0x0c, 0x05,
- 0x06, 0x0f, 0x0a, 0x07, 0x08, 0x03
-};
-
-u32 ibm405ex_get_fwdva(unsigned long cpr_fwdv)
-{
- u32 index;
-
- for (index = 0; index < ARRAY_SIZE(ibm405ex_fwdv_multi_bits); index++)
- if (cpr_fwdv == (u32)ibm405ex_fwdv_multi_bits[index])
- return index + 1;
-
- return 0;
-}
-
-static u8 ibm405ex_fbdv_multi_bits[] = {
- /* values for: 1 - 100 */
- 0x00, 0xff, 0x7e, 0xfd, 0x7a, 0xf5, 0x6a, 0xd5, 0x2a, 0xd4,
- 0x29, 0xd3, 0x26, 0xcc, 0x19, 0xb3, 0x67, 0xce, 0x1d, 0xbb,
- 0x77, 0xee, 0x5d, 0xba, 0x74, 0xe9, 0x52, 0xa5, 0x4b, 0x96,
- 0x2c, 0xd8, 0x31, 0xe3, 0x46, 0x8d, 0x1b, 0xb7, 0x6f, 0xde,
- 0x3d, 0xfb, 0x76, 0xed, 0x5a, 0xb5, 0x6b, 0xd6, 0x2d, 0xdb,
- 0x36, 0xec, 0x59, 0xb2, 0x64, 0xc9, 0x12, 0xa4, 0x48, 0x91,
- 0x23, 0xc7, 0x0e, 0x9c, 0x38, 0xf0, 0x61, 0xc2, 0x05, 0x8b,
- 0x17, 0xaf, 0x5f, 0xbe, 0x7c, 0xf9, 0x72, 0xe5, 0x4a, 0x95,
- 0x2b, 0xd7, 0x2e, 0xdc, 0x39, 0xf3, 0x66, 0xcd, 0x1a, 0xb4,
- 0x68, 0xd1, 0x22, 0xc4, 0x09, 0x93, 0x27, 0xcf, 0x1e, 0xbc,
- /* values for: 101 - 200 */
- 0x78, 0xf1, 0x62, 0xc5, 0x0a, 0x94, 0x28, 0xd0, 0x21, 0xc3,
- 0x06, 0x8c, 0x18, 0xb0, 0x60, 0xc1, 0x02, 0x84, 0x08, 0x90,
- 0x20, 0xc0, 0x01, 0x83, 0x07, 0x8f, 0x1f, 0xbf, 0x7f, 0xfe,
- 0x7d, 0xfa, 0x75, 0xea, 0x55, 0xaa, 0x54, 0xa9, 0x53, 0xa6,
- 0x4c, 0x99, 0x33, 0xe7, 0x4e, 0x9d, 0x3b, 0xf7, 0x6e, 0xdd,
- 0x3a, 0xf4, 0x69, 0xd2, 0x25, 0xcb, 0x16, 0xac, 0x58, 0xb1,
- 0x63, 0xc6, 0x0d, 0x9b, 0x37, 0xef, 0x5e, 0xbd, 0x7b, 0xf6,
- 0x6d, 0xda, 0x35, 0xeb, 0x56, 0xad, 0x5b, 0xb6, 0x6c, 0xd9,
- 0x32, 0xe4, 0x49, 0x92, 0x24, 0xc8, 0x11, 0xa3, 0x47, 0x8e,
- 0x1c, 0xb8, 0x70, 0xe1, 0x42, 0x85, 0x0b, 0x97, 0x2f, 0xdf,
- /* values for: 201 - 255 */
- 0x3e, 0xfc, 0x79, 0xf2, 0x65, 0xca, 0x15, 0xab, 0x57, 0xae,
- 0x5c, 0xb9, 0x73, 0xe6, 0x4d, 0x9a, 0x34, 0xe8, 0x51, 0xa2,
- 0x44, 0x89, 0x13, 0xa7, 0x4f, 0x9e, 0x3c, 0xf8, 0x71, 0xe2,
- 0x45, 0x8a, 0x14, 0xa8, 0x50, 0xa1, 0x43, 0x86, 0x0c, 0x98,
- 0x30, 0xe0, 0x41, 0x82, 0x04, 0x88, 0x10, 0xa0, 0x40, 0x81,
- 0x03, 0x87, 0x0f, 0x9f, 0x3f /* END */
-};
-
-u32 ibm405ex_get_fbdv(unsigned long cpr_fbdv)
-{
- u32 index;
-
- for (index = 0; index < ARRAY_SIZE(ibm405ex_fbdv_multi_bits); index++)
- if (cpr_fbdv == (u32)ibm405ex_fbdv_multi_bits[index])
- return index + 1;
-
- return 0;
-}
-
-void ibm405ex_fixup_clocks(unsigned int sys_clk, unsigned int uart_clk)
-{
- /* PLL config */
- u32 pllc = CPR0_READ(DCRN_CPR0_PLLC);
- u32 plld = CPR0_READ(DCRN_CPR0_PLLD);
- u32 cpud = CPR0_READ(DCRN_CPR0_PRIMAD);
- u32 plbd = CPR0_READ(DCRN_CPR0_PRIMBD);
- u32 opbd = CPR0_READ(DCRN_CPR0_OPBD);
- u32 perd = CPR0_READ(DCRN_CPR0_PERD);
-
- /* Dividers */
- u32 fbdv = ibm405ex_get_fbdv(__fix_zero((plld >> 24) & 0xff, 1));
-
- u32 fwdva = ibm405ex_get_fwdva(__fix_zero((plld >> 16) & 0x0f, 1));
-
- u32 cpudv0 = __fix_zero((cpud >> 24) & 7, 8);
-
- /* PLBDV0 is hardwared to 010. */
- u32 plbdv0 = 2;
- u32 plb2xdv0 = __fix_zero((plbd >> 16) & 7, 8);
-
- u32 opbdv0 = __fix_zero((opbd >> 24) & 3, 4);
-
- u32 perdv0 = __fix_zero((perd >> 24) & 3, 4);
-
- /* Resulting clocks */
- u32 cpu, plb, opb, ebc, vco, tb, uart0, uart1;
-
- /* PLL's VCO is the source for primary forward ? */
- if (pllc & 0x40000000) {
- u32 m;
-
- /* Feedback path */
- switch ((pllc >> 24) & 7) {
- case 0:
- /* PLLOUTx */
- m = fbdv;
- break;
- case 1:
- /* CPU */
- m = fbdv * fwdva * cpudv0;
- break;
- case 5:
- /* PERClk */
- m = fbdv * fwdva * plb2xdv0 * plbdv0 * opbdv0 * perdv0;
- break;
- default:
- printf("WARNING ! Invalid PLL feedback source !\n");
- goto bypass;
- }
-
- vco = (unsigned int)(sys_clk * m);
- } else {
-bypass:
- /* Bypass system PLL */
- vco = 0;
- }
-
- /* CPU = VCO / ( FWDVA x CPUDV0) */
- cpu = vco / (fwdva * cpudv0);
- /* PLB = VCO / ( FWDVA x PLB2XDV0 x PLBDV0) */
- plb = vco / (fwdva * plb2xdv0 * plbdv0);
- /* OPB = PLB / OPBDV0 */
- opb = plb / opbdv0;
- /* EBC = OPB / PERDV0 */
- ebc = opb / perdv0;
-
- tb = cpu;
- uart0 = uart1 = uart_clk;
-
- dt_fixup_cpu_clocks(cpu, tb, 0);
- dt_fixup_clock("/plb", plb);
- dt_fixup_clock("/plb/opb", opb);
- dt_fixup_clock("/plb/opb/ebc", ebc);
- dt_fixup_clock("/plb/opb/serial@ef600200", uart0);
- dt_fixup_clock("/plb/opb/serial@ef600300", uart1);
-}
diff --git a/arch/powerpc/boot/4xx.h b/arch/powerpc/boot/4xx.h
index 7dc5d45361bc..62df496b7ba6 100644
--- a/arch/powerpc/boot/4xx.h
+++ b/arch/powerpc/boot/4xx.h
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* PowerPC 4xx related functions
*
* Copyright 2007 IBM Corporation.
* Josh Boyer <jwboyer@linux.vnet.ibm.com>
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
*/
#ifndef _POWERPC_BOOT_4XX_H_
#define _POWERPC_BOOT_4XX_H_
@@ -15,13 +12,9 @@ void ibm4xx_sdram_fixup_memsize(void);
void ibm440spe_fixup_memsize(void);
void ibm4xx_denali_fixup_memsize(void);
void ibm44x_dbcr_reset(void);
-void ibm40x_dbcr_reset(void);
void ibm4xx_quiesce_eth(u32 *emac0, u32 *emac1);
void ibm4xx_fixup_ebc_ranges(const char *ebc);
-void ibm405gp_fixup_clocks(unsigned int sys_clk, unsigned int ser_clk);
-void ibm405ep_fixup_clocks(unsigned int sys_clk);
-void ibm405ex_fixup_clocks(unsigned int sys_clk, unsigned int uart_clk);
void ibm440gp_fixup_clocks(unsigned int sys_clk, unsigned int ser_clk);
void ibm440ep_fixup_clocks(unsigned int sys_clk, unsigned int ser_clk,
unsigned int tmr_clk);
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index ccc25eddbcb8..c47b78c1d3e7 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
# Makefile for making ELF bootable images for booting on CHRP
# using Open Firmware.
#
@@ -6,7 +7,7 @@
# Based on coffboot by Paul Mackerras
# Simplified for ppc64 by Todd Inglett
#
-# NOTE: this code is built for 32 bit in ELF32 format even though
+# NOTE: this code may be built for 32 bit in ELF32 format even though
# it packages a 64 bit kernel. We do this to simplify the
# bootloader and increase compatibility with OpenFirmware.
#
@@ -19,18 +20,74 @@
all: $(obj)/zImage
-BOOTCFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
- -fno-strict-aliasing -Os -msoft-float -pipe \
- -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \
- -isystem $(shell $(CROSS32CC) -print-file-name=include)
+ifdef CROSS32_COMPILE
+ifdef CONFIG_CC_IS_CLANG
+ BOOTCC := $(CROSS32_COMPILE)clang
+else
+ BOOTCC := $(CROSS32_COMPILE)gcc
+endif
+ BOOTAR := $(CROSS32_COMPILE)ar
+else
+ BOOTCC := $(CC)
+ BOOTAR := $(AR)
+endif
+
ifdef CONFIG_PPC64_BOOT_WRAPPER
-BOOTCFLAGS += -m64
+BOOTTARGETFLAGS += -m64
+BOOTTARGETFLAGS += -mabi=elfv2
+ifdef CONFIG_PPC64_ELF_ABI_V2
+BOOTTARGETFLAGS += $(call cc-option,-mabi=elfv2)
+endif
+else
+BOOTTARGETFLAGS := -m32
+endif
+
+ifdef CONFIG_TARGET_CPU_BOOL
+BOOTTARGETFLAGS += -mcpu=$(CONFIG_TARGET_CPU)
+else ifdef CONFIG_PPC64_BOOT_WRAPPER
+ifdef CONFIG_CPU_LITTLE_ENDIAN
+BOOTTARGETFLAGS += -mcpu=powerpc64le
+else
+BOOTTARGETFLAGS += -mcpu=powerpc64
endif
+endif
+
+$(obj)/4xx.o: BOOTTARGETFLAGS += -mcpu=405
+$(obj)/ebony.o: BOOTTARGETFLAGS += -mcpu=440
+$(obj)/cuboot-taishan.o: BOOTTARGETFLAGS += -mcpu=440
+$(obj)/cuboot-katmai.o: BOOTTARGETFLAGS += -mcpu=440
+$(obj)/treeboot-iss4xx.o: BOOTTARGETFLAGS += -mcpu=405
+$(obj)/treeboot-currituck.o: BOOTTARGETFLAGS += -mcpu=405
+$(obj)/treeboot-akebono.o: BOOTTARGETFLAGS += -mcpu=405
+
ifdef CONFIG_CPU_BIG_ENDIAN
-BOOTCFLAGS += -mbig-endian
+BOOTTARGETFLAGS += -mbig-endian
+else
+BOOTTARGETFLAGS += -mlittle-endian
endif
-BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional -nostdinc
+BOOTCPPFLAGS := -nostdinc $(LINUXINCLUDE)
+BOOTCPPFLAGS += -isystem $(shell $(BOOTCC) -print-file-name=include)
+
+BOOTCFLAGS := $(BOOTTARGETFLAGS) \
+ -std=gnu11 \
+ -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
+ -fno-strict-aliasing -O2 \
+ -msoft-float -mno-altivec -mno-vsx \
+ $(call cc-option,-mno-prefixed) \
+ $(call cc-option,-mno-pcrel) \
+ $(call cc-option,-mno-mma) \
+ $(call cc-option,-mno-spe) $(call cc-option,-mspe=no) \
+ -fomit-frame-pointer -fno-builtin -fPIC
+
+BOOTAFLAGS := $(BOOTTARGETFLAGS) -D__ASSEMBLY__
+
+BOOTARFLAGS := -crD
+
+ifdef CONFIG_CC_IS_CLANG
+BOOTCFLAGS += $(CLANG_FLAGS)
+BOOTAFLAGS += $(CLANG_FLAGS)
+endif
ifdef CONFIG_DEBUG_INFO
BOOTCFLAGS += -g
@@ -40,53 +97,61 @@ ifeq ($(call cc-option-yn, -fstack-protector),y)
BOOTCFLAGS += -fno-stack-protector
endif
-BOOTCFLAGS += -I$(obj) -I$(srctree)/$(obj)
+BOOTCFLAGS += -include $(srctree)/include/linux/compiler_attributes.h
+BOOTCFLAGS += -I$(objtree)/$(obj) -I$(srctree)/$(obj)
DTC_FLAGS ?= -p 1024
-$(obj)/4xx.o: BOOTCFLAGS += -mcpu=405
-$(obj)/ebony.o: BOOTCFLAGS += -mcpu=405
-$(obj)/cuboot-hotfoot.o: BOOTCFLAGS += -mcpu=405
-$(obj)/cuboot-taishan.o: BOOTCFLAGS += -mcpu=405
-$(obj)/cuboot-katmai.o: BOOTCFLAGS += -mcpu=405
-$(obj)/cuboot-acadia.o: BOOTCFLAGS += -mcpu=405
-$(obj)/treeboot-walnut.o: BOOTCFLAGS += -mcpu=405
-$(obj)/treeboot-iss4xx.o: BOOTCFLAGS += -mcpu=405
-$(obj)/treeboot-currituck.o: BOOTCFLAGS += -mcpu=405
-$(obj)/treeboot-akebono.o: BOOTCFLAGS += -mcpu=405
-$(obj)/virtex405-head.o: BOOTAFLAGS += -mcpu=405
+# The pre-boot decompressors pull in a lot of kernel headers and other source
+# files. This creates a bit of a dependency headache since we need to copy
+# these files into the build dir, fix up any includes and ensure that dependent
+# files are copied in the right order.
+
+# these need to be separate variables because they are copied out of different
+# directories in the kernel tree. Sure you COULD merge them, but it's a
+# cure-is-worse-than-disease situation.
+zlib-decomp-$(CONFIG_KERNEL_GZIP) := decompress_inflate.c
+zlib-$(CONFIG_KERNEL_GZIP) := inffast.c inflate.c inftrees.c
+zlibheader-$(CONFIG_KERNEL_GZIP) := inffast.h inffixed.h inflate.h inftrees.h infutil.h
+zliblinuxheader-$(CONFIG_KERNEL_GZIP) := zlib.h zconf.h zutil.h
+$(addprefix $(obj)/, decompress.o): \
+ $(addprefix $(obj)/,$(zlib-decomp-y))
-zlib := inffast.c inflate.c inftrees.c
-zlibheader := inffast.h inffixed.h inflate.h inftrees.h infutil.h
-zliblinuxheader := zlib.h zconf.h zutil.h
+$(addprefix $(obj)/, $(zlib-decomp-y)): \
+ $(addprefix $(obj)/,$(zliblinuxheader-y)) \
+ $(addprefix $(obj)/,$(zlibheader-y)) \
+ $(addprefix $(obj)/,$(zlib-y))
-$(addprefix $(obj)/,$(zlib) cuboot-c2k.o gunzip_util.o main.o): \
- $(addprefix $(obj)/,$(zliblinuxheader)) $(addprefix $(obj)/,$(zlibheader))
+$(addprefix $(obj)/,$(zlib-y)): \
+ $(addprefix $(obj)/,$(zliblinuxheader-y)) \
+ $(addprefix $(obj)/,$(zlibheader-y))
libfdt := fdt.c fdt_ro.c fdt_wip.c fdt_sw.c fdt_rw.c fdt_strerror.c
libfdtheader := fdt.h libfdt.h libfdt_internal.h
-$(addprefix $(obj)/,$(libfdt) libfdt-wrapper.o simpleboot.o epapr.o): \
+$(addprefix $(obj)/,$(libfdt) libfdt-wrapper.o simpleboot.o epapr.o opal.o \
+ treeboot-akebono.o treeboot-currituck.o treeboot-iss4xx.o): \
$(addprefix $(obj)/,$(libfdtheader))
-src-wlib-y := string.S crt0.S crtsavres.S stdio.c main.c \
+src-wlib-y := string.S crt0.S stdio.c decompress.c main.c \
$(libfdt) libfdt-wrapper.c \
ns16550.c serial.c simple_alloc.c div64.S util.S \
- gunzip_util.c elf_util.c $(zlib) devtree.c stdlib.c \
- oflib.c ofconsole.c cuboot.c mpsc.c cpm-serial.c \
- uartlite.c mpc52xx-psc.c
-src-wlib-$(CONFIG_40x) += 4xx.c planetcore.c
+ elf_util.c $(zlib-y) devtree.c stdlib.c \
+ oflib.c ofconsole.c cuboot.c
+
+src-wlib-$(CONFIG_PPC_MPC52xx) += mpc52xx-psc.c
+src-wlib-$(CONFIG_PPC_POWERNV) += opal-calls.S opal.c
+ifndef CONFIG_PPC64_BOOT_WRAPPER
+src-wlib-y += crtsavres.S
+endif
src-wlib-$(CONFIG_44x) += 4xx.c ebony.c bamboo.c
-src-wlib-$(CONFIG_8xx) += mpc8xx.c planetcore.c fsl-soc.c
+src-wlib-$(CONFIG_PPC_8xx) += mpc8xx.c planetcore.c fsl-soc.c
src-wlib-$(CONFIG_PPC_82xx) += pq2.c fsl-soc.c planetcore.c
-src-wlib-$(CONFIG_EMBEDDED6xx) += mv64x60.c mv64x60_i2c.c ugecon.c fsl-soc.c
+src-wlib-$(CONFIG_EMBEDDED6xx) += ugecon.c fsl-soc.c
+src-wlib-$(CONFIG_CPM) += cpm-serial.c
src-plat-y := of.c epapr.c
-src-plat-$(CONFIG_40x) += fixed-head.S ep405.c cuboot-hotfoot.c \
- treeboot-walnut.c cuboot-acadia.c \
- cuboot-kilauea.c simpleboot.c \
- virtex405-head.S virtex.c
src-plat-$(CONFIG_44x) += treeboot-ebony.c cuboot-ebony.c treeboot-bamboo.c \
cuboot-bamboo.c cuboot-sam440ep.c \
cuboot-sequoia.c cuboot-rainier.c \
@@ -94,24 +159,24 @@ src-plat-$(CONFIG_44x) += treeboot-ebony.c cuboot-ebony.c treeboot-bamboo.c \
cuboot-warp.c cuboot-yosemite.c \
treeboot-iss4xx.c treeboot-currituck.c \
treeboot-akebono.c \
- simpleboot.c fixed-head.S virtex.c
-src-plat-$(CONFIG_8xx) += cuboot-8xx.c fixed-head.S ep88xc.c redboot-8xx.c
+ simpleboot.c fixed-head.S
+src-plat-$(CONFIG_PPC_8xx) += cuboot-8xx.c fixed-head.S ep88xc.c redboot-8xx.c
src-plat-$(CONFIG_PPC_MPC52xx) += cuboot-52xx.c
src-plat-$(CONFIG_PPC_82xx) += cuboot-pq2.c fixed-head.S ep8248e.c cuboot-824x.c
src-plat-$(CONFIG_PPC_83xx) += cuboot-83xx.c fixed-head.S redboot-83xx.c
src-plat-$(CONFIG_FSL_SOC_BOOKE) += cuboot-85xx.c cuboot-85xx-cpm2.c
-src-plat-$(CONFIG_EMBEDDED6xx) += cuboot-pq2.c cuboot-mpc7448hpc2.c \
- cuboot-c2k.c gamecube-head.S \
- gamecube.c wii-head.S wii.c holly.c \
+src-plat-$(CONFIG_EMBEDDED6xx) += cuboot-pq2.c \
+ gamecube-head.S gamecube.c \
+ wii-head.S wii.c holly.c \
fixed-head.S mvme5100.c
src-plat-$(CONFIG_AMIGAONE) += cuboot-amigaone.c
src-plat-$(CONFIG_PPC_PS3) += ps3-head.S ps3-hvcall.S ps3.c
src-plat-$(CONFIG_EPAPR_BOOT) += epapr.c epapr-wrapper.c
src-plat-$(CONFIG_PPC_PSERIES) += pseries-head.S
src-plat-$(CONFIG_PPC_POWERNV) += pseries-head.S
-src-plat-$(CONFIG_PPC_IBM_CELL_BLADE) += pseries-head.S
-src-plat-$(CONFIG_PPC_CELLEB) += pseries-head.S
-src-plat-$(CONFIG_PPC_CELL_QPACE) += pseries-head.S
+src-plat-$(CONFIG_MVME7100) += motload-head.S mvme7100.c
+
+src-plat-$(CONFIG_PPC_MICROWATT) += fixed-head.S microwatt.c
src-wlib := $(sort $(src-wlib-y))
src-plat := $(sort $(src-plat-y))
@@ -123,23 +188,20 @@ obj-wlib := $(addsuffix .o, $(basename $(addprefix $(obj)/, $(src-wlib))))
obj-plat := $(addsuffix .o, $(basename $(addprefix $(obj)/, $(src-plat))))
obj-plat: $(libfdt)
-quiet_cmd_copy_zlib = COPY $@
- cmd_copy_zlib = sed "s@__used@@;s@<linux/\([^>]*\).*@\"\1\"@" $< > $@
+quiet_cmd_copy_kern_src = COPY $@
+ cmd_copy_kern_src = sed -f $(srctree)/arch/powerpc/boot/fixup-headers.sed $< > $@
-quiet_cmd_copy_zlibheader = COPY $@
- cmd_copy_zlibheader = sed "s@<linux/\([^>]*\).*@\"\1\"@" $< > $@
-# stddef.h for NULL
-quiet_cmd_copy_zliblinuxheader = COPY $@
- cmd_copy_zliblinuxheader = sed "s@<linux/string.h>@\"string.h\"@;s@<linux/kernel.h>@<stddef.h>@;s@<linux/\([^>]*\).*@\"\1\"@" $< > $@
+$(addprefix $(obj)/,$(zlib-y)): $(obj)/%: $(srctree)/lib/zlib_inflate/%
+ $(call cmd,copy_kern_src)
-$(addprefix $(obj)/,$(zlib)): $(obj)/%: $(srctree)/lib/zlib_inflate/%
- $(call cmd,copy_zlib)
+$(addprefix $(obj)/,$(zlibheader-y)): $(obj)/%: $(srctree)/lib/zlib_inflate/%
+ $(call cmd,copy_kern_src)
-$(addprefix $(obj)/,$(zlibheader)): $(obj)/%: $(srctree)/lib/zlib_inflate/%
- $(call cmd,copy_zlibheader)
+$(addprefix $(obj)/,$(zliblinuxheader-y)): $(obj)/%: $(srctree)/include/linux/%
+ $(call cmd,copy_kern_src)
-$(addprefix $(obj)/,$(zliblinuxheader)): $(obj)/%: $(srctree)/include/linux/%
- $(call cmd,copy_zliblinuxheader)
+$(addprefix $(obj)/,$(zlib-decomp-y)): $(obj)/%: $(srctree)/lib/%
+ $(call cmd,copy_kern_src)
quiet_cmd_copy_libfdt = COPY $@
cmd_copy_libfdt = cp $< $@
@@ -148,27 +210,23 @@ $(addprefix $(obj)/,$(libfdt) $(libfdtheader)): $(obj)/%: $(srctree)/scripts/dtc
$(call cmd,copy_libfdt)
$(obj)/empty.c:
- @touch $@
+ $(Q)touch $@
-$(obj)/zImage.lds: $(obj)/%: $(srctree)/$(src)/%.S
- $(CROSS32CC) $(cpp_flags) -E -Wp,-MD,$(depfile) -P -Upowerpc \
- -D__ASSEMBLY__ -DLINKER_SCRIPT -o $@ $<
+$(obj)/zImage.coff.lds $(obj)/zImage.ps3.lds : $(obj)/%: $(src)/%.S
+ $(Q)cp $< $@
-$(obj)/zImage.coff.lds $(obj)/zImage.ps3.lds : $(obj)/%: $(srctree)/$(src)/%.S
- @cp $< $@
-
-clean-files := $(zlib) $(zlibheader) $(zliblinuxheader) \
- $(libfdt) $(libfdtheader) \
+clean-files := $(zlib-) $(zlibheader-) $(zliblinuxheader-) \
+ $(zlib-decomp-) $(libfdt) $(libfdtheader) \
empty.c zImage.coff.lds zImage.ps3.lds zImage.lds
quiet_cmd_bootcc = BOOTCC $@
- cmd_bootcc = $(CROSS32CC) -Wp,-MD,$(depfile) $(BOOTCFLAGS) -c -o $@ $<
+ cmd_bootcc = $(BOOTCC) -Wp,-MD,$(depfile) $(BOOTCPPFLAGS) $(BOOTCFLAGS) -c -o $@ $<
quiet_cmd_bootas = BOOTAS $@
- cmd_bootas = $(CROSS32CC) -Wp,-MD,$(depfile) $(BOOTAFLAGS) -c -o $@ $<
+ cmd_bootas = $(BOOTCC) -Wp,-MD,$(depfile) $(BOOTCPPFLAGS) $(BOOTAFLAGS) -c -o $@ $<
quiet_cmd_bootar = BOOTAR $@
- cmd_bootar = $(CROSS32AR) -cr$(KBUILD_ARFLAGS) $@.$$$$ $(filter-out FORCE,$^); mv $@.$$$$ $@
+ cmd_bootar = $(BOOTAR) $(BOOTARFLAGS) $@.$$$$ $(real-prereqs); mv $@.$$$$ $@
$(obj-libfdt): $(obj)/%.o: $(srctree)/scripts/dtc/libfdt/%.c FORCE
$(call if_changed_dep,bootcc)
@@ -182,16 +240,16 @@ $(patsubst %.S,%.o, $(filter %.S, $(src-boot))): %.o: %.S FORCE
$(obj)/wrapper.a: $(obj-wlib) FORCE
$(call if_changed,bootar)
-hostprogs-y := addnote hack-coff mktree
+hostprogs := addnote hack-coff mktree
-targets += $(patsubst $(obj)/%,%,$(obj-boot) wrapper.a)
-extra-y := $(obj)/wrapper.a $(obj-plat) $(obj)/empty.o \
+targets += $(patsubst $(obj)/%,%,$(obj-boot) wrapper.a) zImage.lds
+always-y := $(obj)/wrapper.a $(obj-plat) $(obj)/empty.o \
$(obj)/zImage.lds $(obj)/zImage.coff.lds $(obj)/zImage.ps3.lds
-dtstree := $(srctree)/$(src)/dts
+dtstree := $(src)/dts
-wrapper :=$(srctree)/$(src)/wrapper
-wrapperbits := $(extra-y) $(addprefix $(obj)/,addnote hack-coff mktree) \
+wrapper := $(src)/wrapper
+wrapperbits := $(always-y) $(addprefix $(obj)/,addnote hack-coff mktree) \
$(wrapper) FORCE
#############
@@ -205,18 +263,20 @@ CROSSWRAP := -C "$(CROSS_COMPILE)"
endif
endif
+compressor-$(CONFIG_KERNEL_GZIP) := gz
+compressor-$(CONFIG_KERNEL_XZ) := xz
+compressor-$(CONFIG_KERNEL_LZMA) := lzma
+compressor-$(CONFIG_KERNEL_LZO) := lzo
+
# args (to if_changed): 1 = (this rule), 2 = platform, 3 = dts 4=dtb 5=initrd
quiet_cmd_wrap = WRAP $@
- cmd_wrap =$(CONFIG_SHELL) $(wrapper) -c -o $@ -p $2 $(CROSSWRAP) \
- $(if $3, -s $3)$(if $4, -d $4)$(if $5, -i $5) vmlinux
+ cmd_wrap =$(CONFIG_SHELL) $(wrapper) -Z $(compressor-y) -c -o $@ -p $2 \
+ $(CROSSWRAP) $(if $3, -s $3)$(if $4, -d $4)$(if $5, -i $5) \
+ vmlinux
image-$(CONFIG_PPC_PSERIES) += zImage.pseries
image-$(CONFIG_PPC_POWERNV) += zImage.pseries
-image-$(CONFIG_PPC_MAPLE) += zImage.maple
-image-$(CONFIG_PPC_IBM_CELL_BLADE) += zImage.pseries
image-$(CONFIG_PPC_PS3) += dtbImage.ps3
-image-$(CONFIG_PPC_CELLEB) += zImage.pseries
-image-$(CONFIG_PPC_CELL_QPACE) += zImage.pseries
image-$(CONFIG_PPC_CHRP) += zImage.chrp
image-$(CONFIG_PPC_EFIKA) += zImage.chrp
image-$(CONFIG_PPC_PMAC) += zImage.pmac
@@ -232,13 +292,6 @@ image-$(CONFIG_EPAPR_BOOT) += zImage.epapr
# Boards with newish u-boot firmware can use the uImage target above
#
-# Board ports in arch/powerpc/platform/40x/Kconfig
-image-$(CONFIG_EP405) += dtbImage.ep405
-image-$(CONFIG_HOTFOOT) += cuImage.hotfoot
-image-$(CONFIG_WALNUT) += treeImage.walnut
-image-$(CONFIG_ACADIA) += cuImage.acadia
-image-$(CONFIG_OBS600) += uImage.obs600
-
# Board ports in arch/powerpc/platform/44x/Kconfig
image-$(CONFIG_EBONY) += treeImage.ebony cuImage.ebony
image-$(CONFIG_BAMBOO) += treeImage.bamboo cuImage.bamboo
@@ -262,30 +315,20 @@ image-$(CONFIG_PPC_ADDER875) += cuImage.adder875-uboot \
dtbImage.adder875-redboot
# Board ports in arch/powerpc/platform/52xx/Kconfig
-image-$(CONFIG_PPC_LITE5200) += cuImage.lite5200 lite5200.dtb
-image-$(CONFIG_PPC_LITE5200) += cuImage.lite5200b lite5200b.dtb
-image-$(CONFIG_PPC_MEDIA5200) += cuImage.media5200 media5200.dtb
+image-$(CONFIG_PPC_LITE5200) += cuImage.lite5200
+image-$(CONFIG_PPC_LITE5200) += cuImage.lite5200b
+image-$(CONFIG_PPC_MEDIA5200) += cuImage.media5200
# Board ports in arch/powerpc/platform/82xx/Kconfig
-image-$(CONFIG_MPC8272_ADS) += cuImage.mpc8272ads
-image-$(CONFIG_PQ2FADS) += cuImage.pq2fads
image-$(CONFIG_EP8248E) += dtbImage.ep8248e
# Board ports in arch/powerpc/platform/83xx/Kconfig
-image-$(CONFIG_MPC832x_MDS) += cuImage.mpc832x_mds
image-$(CONFIG_MPC832x_RDB) += cuImage.mpc832x_rdb
image-$(CONFIG_MPC834x_ITX) += cuImage.mpc8349emitx \
cuImage.mpc8349emitxgp
-image-$(CONFIG_MPC834x_MDS) += cuImage.mpc834x_mds
-image-$(CONFIG_MPC836x_MDS) += cuImage.mpc836x_mds
image-$(CONFIG_ASP834x) += dtbImage.asp834x-redboot
# Board ports in arch/powerpc/platform/85xx/Kconfig
-image-$(CONFIG_MPC8540_ADS) += cuImage.mpc8540ads
-image-$(CONFIG_MPC8560_ADS) += cuImage.mpc8560ads
-image-$(CONFIG_MPC85xx_CDS) += cuImage.mpc8541cds \
- cuImage.mpc8548cds_32b \
- cuImage.mpc8555cds
image-$(CONFIG_MPC85xx_MDS) += cuImage.mpc8568mds
image-$(CONFIG_MPC85xx_DS) += cuImage.mpc8544ds \
cuImage.mpc8572ds
@@ -294,13 +337,13 @@ image-$(CONFIG_TQM8541) += cuImage.tqm8541
image-$(CONFIG_TQM8548) += cuImage.tqm8548
image-$(CONFIG_TQM8555) += cuImage.tqm8555
image-$(CONFIG_TQM8560) += cuImage.tqm8560
-image-$(CONFIG_SBC8548) += cuImage.sbc8548
image-$(CONFIG_KSI8560) += cuImage.ksi8560
+# Board ports in arch/powerpc/platform/86xx/Kconfig
+image-$(CONFIG_MVME7100) += dtbImage.mvme7100
+
# Board ports in arch/powerpc/platform/embedded6xx/Kconfig
image-$(CONFIG_STORCENTER) += cuImage.storcenter
-image-$(CONFIG_MPC7448HPC2) += cuImage.mpc7448hpc2
-image-$(CONFIG_PPC_C2K) += cuImage.c2k
image-$(CONFIG_GAMECUBE) += dtbImage.gamecube
image-$(CONFIG_WII) += dtbImage.wii
image-$(CONFIG_MVME5100) += dtbImage.mvme5100
@@ -308,76 +351,82 @@ image-$(CONFIG_MVME5100) += dtbImage.mvme5100
# Board port in arch/powerpc/platform/amigaone/Kconfig
image-$(CONFIG_AMIGAONE) += cuImage.amigaone
+image-$(CONFIG_PPC_MICROWATT) += dtbImage.microwatt
+
# For 32-bit powermacs, build the COFF and miboot images
# as well as the ELF images.
-ifeq ($(CONFIG_PPC32),y)
+ifdef CONFIG_PPC32
image-$(CONFIG_PPC_PMAC) += zImage.coff zImage.miboot
endif
# Allow extra targets to be added to the defconfig
-image-y += $(subst ",,$(CONFIG_EXTRA_TARGETS))
+image-y += $(CONFIG_EXTRA_TARGETS)
-initrd- := $(patsubst zImage%, zImage.initrd%, $(image-n) $(image-))
+initrd- := $(patsubst zImage%, zImage.initrd%, $(image-))
initrd-y := $(patsubst zImage%, zImage.initrd%, \
$(patsubst dtbImage%, dtbImage.initrd%, \
$(patsubst simpleImage%, simpleImage.initrd%, \
$(patsubst treeImage%, treeImage.initrd%, $(image-y)))))
initrd-y := $(filter-out $(image-y), $(initrd-y))
targets += $(image-y) $(initrd-y)
+targets += $(foreach x, dtbImage uImage cuImage simpleImage treeImage, \
+ $(patsubst $(x).%, dts/%.dtb, $(filter $(x).%, $(image-y))))
+targets += $(foreach x, dtbImage uImage cuImage simpleImage treeImage, \
+ $(patsubst $(x).%, dts/fsl/%.dtb, $(filter $(x).%, $(image-y))))
$(addprefix $(obj)/, $(initrd-y)): $(obj)/ramdisk.image.gz
# Don't put the ramdisk on the pattern rule; when its missing make will try
# the pattern rule with less dependencies that also matches (even with the
# hard dependency listed).
-$(obj)/zImage.initrd.%: vmlinux $(wrapperbits)
+$(obj)/zImage.initrd.%: vmlinux $(wrapperbits) FORCE
$(call if_changed,wrap,$*,,,$(obj)/ramdisk.image.gz)
-$(addprefix $(obj)/, $(sort $(filter zImage.%, $(image-y)))): vmlinux $(wrapperbits)
+$(addprefix $(obj)/, $(sort $(filter zImage.%, $(image-y)))): vmlinux $(wrapperbits) FORCE
$(call if_changed,wrap,$(subst $(obj)/zImage.,,$@))
# dtbImage% - a dtbImage is a zImage with an embedded device tree blob
-$(obj)/dtbImage.initrd.%: vmlinux $(wrapperbits) $(obj)/%.dtb
- $(call if_changed,wrap,$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz)
+$(obj)/dtbImage.initrd.%: vmlinux $(wrapperbits) $(obj)/dts/%.dtb FORCE
+ $(call if_changed,wrap,$*,,$(obj)/dts/$*.dtb,$(obj)/ramdisk.image.gz)
-$(obj)/dtbImage.%: vmlinux $(wrapperbits) $(obj)/%.dtb
- $(call if_changed,wrap,$*,,$(obj)/$*.dtb)
+$(obj)/dtbImage.%: vmlinux $(wrapperbits) $(obj)/dts/%.dtb FORCE
+ $(call if_changed,wrap,$*,,$(obj)/dts/$*.dtb)
# This cannot be in the root of $(src) as the zImage rule always adds a $(obj)
# prefix
$(obj)/vmlinux.strip: vmlinux
$(STRIP) -s -R .comment $< -o $@
-$(obj)/uImage: vmlinux $(wrapperbits)
+$(obj)/uImage: vmlinux $(wrapperbits) FORCE
$(call if_changed,wrap,uboot)
-$(obj)/uImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits)
- $(call if_changed,wrap,uboot-$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz)
+$(obj)/uImage.initrd.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+ $(call if_changed,wrap,uboot-$*,,$(obj)/dts/$*.dtb,$(obj)/ramdisk.image.gz)
-$(obj)/uImage.%: vmlinux $(obj)/%.dtb $(wrapperbits)
- $(call if_changed,wrap,uboot-$*,,$(obj)/$*.dtb)
+$(obj)/uImage.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+ $(call if_changed,wrap,uboot-$*,,$(obj)/dts/$*.dtb)
-$(obj)/cuImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits)
- $(call if_changed,wrap,cuboot-$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz)
+$(obj)/cuImage.initrd.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+ $(call if_changed,wrap,cuboot-$*,,$(obj)/dts/$*.dtb,$(obj)/ramdisk.image.gz)
-$(obj)/cuImage.%: vmlinux $(obj)/%.dtb $(wrapperbits)
- $(call if_changed,wrap,cuboot-$*,,$(obj)/$*.dtb)
+$(obj)/cuImage.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+ $(call if_changed,wrap,cuboot-$*,,$(obj)/dts/$*.dtb)
-$(obj)/simpleImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits)
- $(call if_changed,wrap,simpleboot-$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz)
+$(obj)/simpleImage.initrd.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+ $(call if_changed,wrap,simpleboot-$*,,$(obj)/dts/$*.dtb,$(obj)/ramdisk.image.gz)
-$(obj)/simpleImage.%: vmlinux $(obj)/%.dtb $(wrapperbits)
- $(call if_changed,wrap,simpleboot-$*,,$(obj)/$*.dtb)
+$(obj)/simpleImage.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+ $(call if_changed,wrap,simpleboot-$*,,$(obj)/dts/$*.dtb)
-$(obj)/treeImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits)
- $(call if_changed,wrap,treeboot-$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz)
+$(obj)/treeImage.initrd.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+ $(call if_changed,wrap,treeboot-$*,,$(obj)/dts/$*.dtb,$(obj)/ramdisk.image.gz)
-$(obj)/treeImage.%: vmlinux $(obj)/%.dtb $(wrapperbits)
- $(call if_changed,wrap,treeboot-$*,,$(obj)/$*.dtb)
+$(obj)/treeImage.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+ $(call if_changed,wrap,treeboot-$*,,$(obj)/dts/$*.dtb)
-# Rule to build device tree blobs
-$(obj)/%.dtb: $(src)/dts/%.dts FORCE
- $(call if_changed_dep,dtc)
+# Needed for the above targets to work with dts/fsl/ files
+$(obj)/dts/%.dtb: $(obj)/dts/fsl/%.dtb
+ @cp $< $@
# If there isn't a platform selected then just strip the vmlinux.
ifeq (,$(image-y))
@@ -385,32 +434,30 @@ image-y := vmlinux.strip
endif
$(obj)/zImage: $(addprefix $(obj)/, $(image-y))
- @rm -f $@; ln $< $@
+ $(Q)rm -f $@; ln $< $@
$(obj)/zImage.initrd: $(addprefix $(obj)/, $(initrd-y))
- @rm -f $@; ln $< $@
-
-install: $(CONFIGURE) $(addprefix $(obj)/, $(image-y))
- sh -x $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" vmlinux System.map "$(INSTALL_PATH)" $^
+ $(Q)rm -f $@; ln $< $@
# anything not in $(targets)
clean-files += $(image-) $(initrd-) cuImage.* dtbImage.* treeImage.* \
zImage zImage.initrd zImage.chrp zImage.coff zImage.holly \
zImage.miboot zImage.pmac zImage.pseries \
- zImage.maple simpleImage.* otheros.bld *.dtb
+ simpleImage.* otheros.bld
# clean up files cached by wrapper
-clean-kernel := vmlinux.strip vmlinux.bin
-clean-kernel += $(addsuffix .gz,$(clean-kernel))
-# If not absolute clean-files are relative to $(obj).
-clean-files += $(addprefix $(objtree)/, $(clean-kernel))
+clean-kernel-base := vmlinux.strip vmlinux.bin
+clean-kernel := $(addsuffix .gz,$(clean-kernel-base))
+clean-kernel += $(addsuffix .xz,$(clean-kernel-base))
+# clean-files are relative to $(obj).
+clean-files += $(addprefix ../../../, $(clean-kernel))
WRAPPER_OBJDIR := /usr/lib/kernel-wrapper
WRAPPER_DTSDIR := /usr/lib/kernel-wrapper/dts
WRAPPER_BINDIR := /usr/sbin
INSTALL := install
-extra-installed := $(patsubst $(obj)/%, $(DESTDIR)$(WRAPPER_OBJDIR)/%, $(extra-y))
-hostprogs-installed := $(patsubst %, $(DESTDIR)$(WRAPPER_BINDIR)/%, $(hostprogs-y))
+extra-installed := $(patsubst $(obj)/%, $(DESTDIR)$(WRAPPER_OBJDIR)/%, $(always-y))
+hostprogs-installed := $(patsubst %, $(DESTDIR)$(WRAPPER_BINDIR)/%, $(hostprogs))
wrapper-installed := $(DESTDIR)$(WRAPPER_BINDIR)/wrapper
dts-installed := $(patsubst $(dtstree)/%, $(DESTDIR)$(WRAPPER_DTSDIR)/%, $(wildcard $(dtstree)/*.dts))
diff --git a/arch/powerpc/boot/addnote.c b/arch/powerpc/boot/addnote.c
index 9d9f6f334d3c..53b3b2621457 100644
--- a/arch/powerpc/boot/addnote.c
+++ b/arch/powerpc/boot/addnote.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Program to hack in a PT_NOTE program header entry in an ELF file.
* This is needed for OF on RS/6000s to load an image correctly.
@@ -8,11 +9,6 @@
*
* Adapted for 64 bit little endian images by Andrew Tauferner.
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Usage: addnote zImage
*/
#include <stdio.h>
@@ -223,7 +219,11 @@ main(int ac, char **av)
PUT_16(E_PHNUM, np + 2);
/* write back */
- lseek(fd, (long) 0, SEEK_SET);
+ i = lseek(fd, (long) 0, SEEK_SET);
+ if (i < 0) {
+ perror("lseek");
+ exit(1);
+ }
i = write(fd, buf, n);
if (i < 0) {
perror("write");
diff --git a/arch/powerpc/boot/bamboo.c b/arch/powerpc/boot/bamboo.c
index b82cacbc60db..dcdfa586add9 100644
--- a/arch/powerpc/boot/bamboo.c
+++ b/arch/powerpc/boot/bamboo.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright IBM Corporation, 2007
* Josh Boyer <jwboyer@linux.vnet.ibm.com>
@@ -7,10 +8,6 @@
*
* Clocking code based on code by:
* Stefan Roese <sr@denx.de>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2 of the License
*/
#include <stdarg.h>
#include <stddef.h>
diff --git a/arch/powerpc/boot/cpm-serial.c b/arch/powerpc/boot/cpm-serial.c
index 19dc15abe43d..dfb56829cace 100644
--- a/arch/powerpc/boot/cpm-serial.c
+++ b/arch/powerpc/boot/cpm-serial.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* CPM serial console support.
*
diff --git a/arch/powerpc/boot/crt0.S b/arch/powerpc/boot/crt0.S
index 14de4f8778a7..121cab9d579b 100644
--- a/arch/powerpc/boot/crt0.S
+++ b/arch/powerpc/boot/crt0.S
@@ -1,21 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (C) Paul Mackerras 1997.
*
* Adapted for 64 bit LE PowerPC by Andrew Tauferner
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
#include "ppc_asm.h"
RELA = 7
-RELACOUNT = 0x6ffffff9
+RELASZ = 8
+RELAENT = 9
- .text
+ .data
/* A procedure descriptor used when booting this as a COFF file.
* When making COFF, this comes first in the link and we're
* linked at 0x500000.
@@ -23,20 +19,22 @@ RELACOUNT = 0x6ffffff9
.globl _zimage_start_opd
_zimage_start_opd:
.long 0x500000, 0, 0, 0
+ .text
+ b _zimage_start
#ifdef __powerpc64__
.balign 8
-p_start: .llong _start
-p_etext: .llong _etext
-p_bss_start: .llong __bss_start
-p_end: .llong _end
-
-p_toc: .llong __toc_start + 0x8000 - p_base
-p_dyn: .llong __dynamic_start - p_base
-p_rela: .llong __rela_dyn_start - p_base
-p_prom: .llong 0
+p_start: .8byte _start
+p_etext: .8byte _etext
+p_bss_start: .8byte __bss_start
+p_end: .8byte _end
+
+p_toc: .8byte .TOC. - p_base
+p_dyn: .8byte __dynamic_start - p_base
+p_rela: .8byte __rela_dyn_start - p_base
+p_prom: .8byte 0
.weak _platform_stack_top
-p_pstack: .llong _platform_stack_top
+p_pstack: .8byte _platform_stack_top
#else
p_start: .long _start
p_etext: .long _etext
@@ -48,13 +46,12 @@ p_pstack: .long _platform_stack_top
#endif
.weak _zimage_start
- .globl _zimage_start
_zimage_start:
.globl _zimage_start_lib
_zimage_start_lib:
/* Work out the offset between the address we were linked at
and the address where we're running. */
- bl .+4
+ bcl 20,31,.+4
p_base: mflr r10 /* r10 now points to runtime addr of p_base */
#ifndef __powerpc64__
/* grab the link address of the dynamic section in r11 */
@@ -79,34 +76,39 @@ p_base: mflr r10 /* r10 now points to runtime addr of p_base */
bne 11f
lwz r9,4(r12) /* get RELA pointer in r9 */
b 12f
-11: addis r8,r8,(-RELACOUNT)@ha
- cmpwi r8,RELACOUNT@l
+11: cmpwi r8,RELASZ
+ bne .Lcheck_for_relaent
+ lwz r0,4(r12) /* get RELASZ value in r0 */
+ b 12f
+.Lcheck_for_relaent:
+ cmpwi r8,RELAENT
bne 12f
- lwz r0,4(r12) /* get RELACOUNT value in r0 */
+ lwz r14,4(r12) /* get RELAENT value in r14 */
12: addi r12,r12,8
b 9b
/* The relocation section contains a list of relocations.
* We now do the R_PPC_RELATIVE ones, which point to words
- * which need to be initialized with addend + offset.
- * The R_PPC_RELATIVE ones come first and there are RELACOUNT
- * of them. */
+ * which need to be initialized with addend + offset */
10: /* skip relocation if we don't have both */
cmpwi r0,0
beq 3f
cmpwi r9,0
beq 3f
+ cmpwi r14,0
+ beq 3f
add r9,r9,r11 /* Relocate RELA pointer */
+ divwu r0,r0,r14 /* RELASZ / RELAENT */
mtctr r0
2: lbz r0,4+3(r9) /* ELF32_R_INFO(reloc->r_info) */
cmpwi r0,22 /* R_PPC_RELATIVE */
- bne 3f
+ bne .Lnext
lwz r12,0(r9) /* reloc->r_offset */
lwz r0,8(r9) /* reloc->r_addend */
add r0,r0,r11
stwx r0,r11,r12
- addi r9,r9,12
+.Lnext: add r9,r9,r14
bdnz 2b
/* Do a cache flush for our text, in case the loader didn't */
@@ -155,41 +157,48 @@ p_base: mflr r10 /* r10 now points to runtime addr of p_base */
ld r9,(p_rela-p_base)(r10)
add r9,r9,r10
- li r7,0
+ li r13,0
li r8,0
-9: ld r6,0(r11) /* get tag */
- cmpdi r6,0
+9: ld r12,0(r11) /* get tag */
+ cmpdi r12,0
beq 12f /* end of list */
- cmpdi r6,RELA
+ cmpdi r12,RELA
bne 10f
- ld r7,8(r11) /* get RELA pointer in r7 */
+ ld r13,8(r11) /* get RELA pointer in r13 */
b 11f
-10: addis r6,r6,(-RELACOUNT)@ha
- cmpdi r6,RELACOUNT@l
- bne 11f
- ld r8,8(r11) /* get RELACOUNT value in r8 */
+10: cmpwi r12,RELASZ
+ bne .Lcheck_for_relaent
+ lwz r8,8(r11) /* get RELASZ pointer in r8 */
+ b 11f
+.Lcheck_for_relaent:
+ cmpwi r12,RELAENT
+ bne 11f
+ lwz r14,8(r11) /* get RELAENT pointer in r14 */
11: addi r11,r11,16
b 9b
12:
- cmpdi r7,0 /* check we have both RELA and RELACOUNT */
+ cmpdi r13,0 /* check we have both RELA, RELASZ, RELAENT*/
cmpdi cr1,r8,0
beq 3f
beq cr1,3f
+ cmpdi r14,0
+ beq 3f
/* Calcuate the runtime offset. */
- subf r7,r7,r9
+ subf r13,r13,r9
/* Run through the list of relocations and process the
* R_PPC64_RELATIVE ones. */
+ divdu r8,r8,r14 /* RELASZ / RELAENT */
mtctr r8
13: ld r0,8(r9) /* ELF64_R_TYPE(reloc->r_info) */
cmpdi r0,22 /* R_PPC64_RELATIVE */
- bne 3f
- ld r6,0(r9) /* reloc->r_offset */
+ bne .Lnext
+ ld r12,0(r9) /* reloc->r_offset */
ld r0,16(r9) /* reloc->r_addend */
- add r0,r0,r7
- stdx r0,r7,r6
- addi r9,r9,24
+ add r0,r0,r13
+ stdx r0,r13,r12
+.Lnext: add r9,r9,r14
bdnz 13b
/* Do a cache flush for our text, in case the loader didn't */
@@ -218,7 +227,7 @@ p_base: mflr r10 /* r10 now points to runtime addr of p_base */
beq 6f
ld r1,0(r8)
li r0,0
- stdu r0,-16(r1) /* establish a stack frame */
+ stdu r0,-112(r1) /* establish a stack frame */
6:
#endif /* __powerpc64__ */
/* Call platform_init() */
@@ -230,16 +239,19 @@ p_base: mflr r10 /* r10 now points to runtime addr of p_base */
#ifdef __powerpc64__
#define PROM_FRAME_SIZE 512
-#define SAVE_GPR(n, base) std n,8*(n)(base)
-#define REST_GPR(n, base) ld n,8*(n)(base)
-#define SAVE_2GPRS(n, base) SAVE_GPR(n, base); SAVE_GPR(n+1, base)
-#define SAVE_4GPRS(n, base) SAVE_2GPRS(n, base); SAVE_2GPRS(n+2, base)
-#define SAVE_8GPRS(n, base) SAVE_4GPRS(n, base); SAVE_4GPRS(n+4, base)
-#define SAVE_10GPRS(n, base) SAVE_8GPRS(n, base); SAVE_2GPRS(n+8, base)
-#define REST_2GPRS(n, base) REST_GPR(n, base); REST_GPR(n+1, base)
-#define REST_4GPRS(n, base) REST_2GPRS(n, base); REST_2GPRS(n+2, base)
-#define REST_8GPRS(n, base) REST_4GPRS(n, base); REST_4GPRS(n+4, base)
-#define REST_10GPRS(n, base) REST_8GPRS(n, base); REST_2GPRS(n+8, base)
+
+.macro OP_REGS op, width, start, end, base, offset
+ .Lreg=\start
+ .rept (\end - \start + 1)
+ \op .Lreg,\offset+\width*.Lreg(\base)
+ .Lreg=.Lreg+1
+ .endr
+.endm
+
+#define SAVE_GPRS(start, end, base) OP_REGS std, 8, start, end, base, 0
+#define REST_GPRS(start, end, base) OP_REGS ld, 8, start, end, base, 0
+#define SAVE_GPR(n, base) SAVE_GPRS(n, n, base)
+#define REST_GPR(n, base) REST_GPRS(n, n, base)
/* prom handles the jump into and return from firmware. The prom args pointer
is loaded in r3. */
@@ -250,9 +262,7 @@ prom:
stdu r1,-PROM_FRAME_SIZE(r1) /* Save SP and create stack space */
SAVE_GPR(2, r1)
- SAVE_GPR(13, r1)
- SAVE_8GPRS(14, r1)
- SAVE_10GPRS(22, r1)
+ SAVE_GPRS(13, 31, r1)
mfcr r10
std r10,8*32(r1)
mfmsr r10
@@ -264,7 +274,7 @@ prom:
mtsrr1 r10
/* Load FW address, set LR to label 1, and jump to FW */
- bl 0f
+ bcl 20,31,0f
0: mflr r10
addi r11,r10,(1f-0b)
mtlr r11
@@ -287,9 +297,7 @@ prom:
/* Restore other registers */
REST_GPR(2, r1)
- REST_GPR(13, r1)
- REST_8GPRS(14, r1)
- REST_10GPRS(22, r1)
+ REST_GPRS(13, 31, r1)
ld r10,8*32(r1)
mtcr r10
diff --git a/arch/powerpc/boot/crtsavres.S b/arch/powerpc/boot/crtsavres.S
index f3d9b35c07d4..085fb2b9a8b8 100644
--- a/arch/powerpc/boot/crtsavres.S
+++ b/arch/powerpc/boot/crtsavres.S
@@ -37,12 +37,13 @@
* the executable file might be covered by the GNU General Public License.
*/
+#ifdef __powerpc64__
+#error "On PPC64, FPR save/restore functions are provided by the linker."
+#endif
+
.file "crtsavres.S"
.section ".text"
-/* On PowerPC64 Linux, these functions are provided by the linker. */
-#ifndef __powerpc64__
-
#define _GLOBAL(name) \
.type name,@function; \
.globl name; \
@@ -230,4 +231,3 @@ _GLOBAL(_rest32gpr_31_x)
mtlr 0
mr 1,11
blr
-#endif
diff --git a/arch/powerpc/boot/cuboot-52xx.c b/arch/powerpc/boot/cuboot-52xx.c
index 4c42ec8687be..b332056f2420 100644
--- a/arch/powerpc/boot/cuboot-52xx.c
+++ b/arch/powerpc/boot/cuboot-52xx.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Old U-boot compatibility for MPC5200
*
@@ -5,10 +6,6 @@
*
* Copyright (c) 2007 Secret Lab Technologies Ltd.
* Copyright (c) 2007 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
*/
#include "ops.h"
diff --git a/arch/powerpc/boot/cuboot-824x.c b/arch/powerpc/boot/cuboot-824x.c
index ced90c53de48..15818cb97c44 100644
--- a/arch/powerpc/boot/cuboot-824x.c
+++ b/arch/powerpc/boot/cuboot-824x.c
@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Old U-boot compatibility for 824x
*
* Copyright (c) 2007 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
*/
#include "ops.h"
diff --git a/arch/powerpc/boot/cuboot-83xx.c b/arch/powerpc/boot/cuboot-83xx.c
index 61af1c1e8255..4063c6263c31 100644
--- a/arch/powerpc/boot/cuboot-83xx.c
+++ b/arch/powerpc/boot/cuboot-83xx.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Old U-boot compatibility for 83xx
*
* Author: Scott Wood <scottwood@freescale.com>
*
* Copyright (c) 2007 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
*/
#include "ops.h"
diff --git a/arch/powerpc/boot/cuboot-85xx-cpm2.c b/arch/powerpc/boot/cuboot-85xx-cpm2.c
index 723872ddd447..ac5115beb348 100644
--- a/arch/powerpc/boot/cuboot-85xx-cpm2.c
+++ b/arch/powerpc/boot/cuboot-85xx-cpm2.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Old U-boot compatibility for 85xx
*
* Author: Scott Wood <scottwood@freescale.com>
*
* Copyright (c) 2007 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
*/
#include "ops.h"
diff --git a/arch/powerpc/boot/cuboot-85xx.c b/arch/powerpc/boot/cuboot-85xx.c
index 277ba4a79b5a..1466cc63d623 100644
--- a/arch/powerpc/boot/cuboot-85xx.c
+++ b/arch/powerpc/boot/cuboot-85xx.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Old U-boot compatibility for 85xx
*
* Author: Scott Wood <scottwood@freescale.com>
*
* Copyright (c) 2007 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
*/
#include "ops.h"
diff --git a/arch/powerpc/boot/cuboot-8xx.c b/arch/powerpc/boot/cuboot-8xx.c
index c202c8868bd6..e4499fba5d2b 100644
--- a/arch/powerpc/boot/cuboot-8xx.c
+++ b/arch/powerpc/boot/cuboot-8xx.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Old U-boot compatibility for 8xx
*
* Author: Scott Wood <scottwood@freescale.com>
*
* Copyright (c) 2007 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
*/
#include "ops.h"
diff --git a/arch/powerpc/boot/cuboot-acadia.c b/arch/powerpc/boot/cuboot-acadia.c
deleted file mode 100644
index 0634aba6348a..000000000000
--- a/arch/powerpc/boot/cuboot-acadia.c
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * Old U-boot compatibility for Acadia
- *
- * Author: Josh Boyer <jwboyer@linux.vnet.ibm.com>
- *
- * Copyright 2008 IBM Corporation
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- */
-
-#include "ops.h"
-#include "io.h"
-#include "dcr.h"
-#include "stdio.h"
-#include "4xx.h"
-#include "44x.h"
-#include "cuboot.h"
-
-#define TARGET_4xx
-#include "ppcboot.h"
-
-static bd_t bd;
-
-#define CPR_PERD0_SPIDV_MASK 0x000F0000 /* SPI Clock Divider */
-
-#define PLLC_SRC_MASK 0x20000000 /* PLL feedback source */
-
-#define PLLD_FBDV_MASK 0x1F000000 /* PLL feedback divider value */
-#define PLLD_FWDVA_MASK 0x000F0000 /* PLL forward divider A value */
-#define PLLD_FWDVB_MASK 0x00000700 /* PLL forward divider B value */
-
-#define PRIMAD_CPUDV_MASK 0x0F000000 /* CPU Clock Divisor Mask */
-#define PRIMAD_PLBDV_MASK 0x000F0000 /* PLB Clock Divisor Mask */
-#define PRIMAD_OPBDV_MASK 0x00000F00 /* OPB Clock Divisor Mask */
-#define PRIMAD_EBCDV_MASK 0x0000000F /* EBC Clock Divisor Mask */
-
-#define PERD0_PWMDV_MASK 0xFF000000 /* PWM Divider Mask */
-#define PERD0_SPIDV_MASK 0x000F0000 /* SPI Divider Mask */
-#define PERD0_U0DV_MASK 0x0000FF00 /* UART 0 Divider Mask */
-#define PERD0_U1DV_MASK 0x000000FF /* UART 1 Divider Mask */
-
-static void get_clocks(void)
-{
- unsigned long sysclk, cpr_plld, cpr_pllc, cpr_primad, plloutb, i;
- unsigned long pllFwdDiv, pllFwdDivB, pllFbkDiv, pllPlbDiv, pllExtBusDiv;
- unsigned long pllOpbDiv, freqEBC, freqUART, freqOPB;
- unsigned long div; /* total divisor udiv * bdiv */
- unsigned long umin; /* minimum udiv */
- unsigned short diff; /* smallest diff */
- unsigned long udiv; /* best udiv */
- unsigned short idiff; /* current diff */
- unsigned short ibdiv; /* current bdiv */
- unsigned long est; /* current estimate */
- unsigned long baud;
- void *np;
-
- /* read the sysclk value from the CPLD */
- sysclk = (in_8((unsigned char *)0x80000000) == 0xc) ? 66666666 : 33333000;
-
- /*
- * Read PLL Mode registers
- */
- cpr_plld = CPR0_READ(DCRN_CPR0_PLLD);
- cpr_pllc = CPR0_READ(DCRN_CPR0_PLLC);
-
- /*
- * Determine forward divider A
- */
- pllFwdDiv = ((cpr_plld & PLLD_FWDVA_MASK) >> 16);
-
- /*
- * Determine forward divider B
- */
- pllFwdDivB = ((cpr_plld & PLLD_FWDVB_MASK) >> 8);
- if (pllFwdDivB == 0)
- pllFwdDivB = 8;
-
- /*
- * Determine FBK_DIV.
- */
- pllFbkDiv = ((cpr_plld & PLLD_FBDV_MASK) >> 24);
- if (pllFbkDiv == 0)
- pllFbkDiv = 256;
-
- /*
- * Read CPR_PRIMAD register
- */
- cpr_primad = CPR0_READ(DCRN_CPR0_PRIMAD);
-
- /*
- * Determine PLB_DIV.
- */
- pllPlbDiv = ((cpr_primad & PRIMAD_PLBDV_MASK) >> 16);
- if (pllPlbDiv == 0)
- pllPlbDiv = 16;
-
- /*
- * Determine EXTBUS_DIV.
- */
- pllExtBusDiv = (cpr_primad & PRIMAD_EBCDV_MASK);
- if (pllExtBusDiv == 0)
- pllExtBusDiv = 16;
-
- /*
- * Determine OPB_DIV.
- */
- pllOpbDiv = ((cpr_primad & PRIMAD_OPBDV_MASK) >> 8);
- if (pllOpbDiv == 0)
- pllOpbDiv = 16;
-
- /* There is a bug in U-Boot that prevents us from using
- * bd.bi_opbfreq because U-Boot doesn't populate it for
- * 405EZ. We get to calculate it, yay!
- */
- freqOPB = (sysclk *pllFbkDiv) /pllOpbDiv;
-
- freqEBC = (sysclk * pllFbkDiv) / pllExtBusDiv;
-
- plloutb = ((sysclk * ((cpr_pllc & PLLC_SRC_MASK) ?
- pllFwdDivB : pllFwdDiv) *
- pllFbkDiv) / pllFwdDivB);
-
- np = find_node_by_alias("serial0");
- if (getprop(np, "current-speed", &baud, sizeof(baud)) != sizeof(baud))
- fatal("no current-speed property\n\r");
-
- udiv = 256; /* Assume lowest possible serial clk */
- div = plloutb / (16 * baud); /* total divisor */
- umin = (plloutb / freqOPB) << 1; /* 2 x OPB divisor */
- diff = 256; /* highest possible */
-
- /* i is the test udiv value -- start with the largest
- * possible (256) to minimize serial clock and constrain
- * search to umin.
- */
- for (i = 256; i > umin; i--) {
- ibdiv = div / i;
- est = i * ibdiv;
- idiff = (est > div) ? (est-div) : (div-est);
- if (idiff == 0) {
- udiv = i;
- break; /* can't do better */
- } else if (idiff < diff) {
- udiv = i; /* best so far */
- diff = idiff; /* update lowest diff*/
- }
- }
- freqUART = plloutb / udiv;
-
- dt_fixup_cpu_clocks(bd.bi_procfreq, bd.bi_intfreq, bd.bi_plb_busfreq);
- dt_fixup_clock("/plb/ebc", freqEBC);
- dt_fixup_clock("/plb/opb", freqOPB);
- dt_fixup_clock("/plb/opb/serial@ef600300", freqUART);
- dt_fixup_clock("/plb/opb/serial@ef600400", freqUART);
-}
-
-static void acadia_fixups(void)
-{
- dt_fixup_memory(bd.bi_memstart, bd.bi_memsize);
- get_clocks();
- dt_fixup_mac_address_by_alias("ethernet0", bd.bi_enetaddr);
-}
-
-void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
- unsigned long r6, unsigned long r7)
-{
- CUBOOT_INIT();
- platform_ops.fixups = acadia_fixups;
- platform_ops.exit = ibm40x_dbcr_reset;
- fdt_init(_dtb_start);
- serial_console_init();
-}
diff --git a/arch/powerpc/boot/cuboot-amigaone.c b/arch/powerpc/boot/cuboot-amigaone.c
index d5029674030b..f3b6d6236ca7 100644
--- a/arch/powerpc/boot/cuboot-amigaone.c
+++ b/arch/powerpc/boot/cuboot-amigaone.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Old U-boot compatibility for AmigaOne
*
@@ -5,10 +6,6 @@
*
* Based on cuboot-83xx.c
* Copyright (c) 2007 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
*/
#include "ops.h"
diff --git a/arch/powerpc/boot/cuboot-bamboo.c b/arch/powerpc/boot/cuboot-bamboo.c
index b5c30f766c40..a5dcf3091d45 100644
--- a/arch/powerpc/boot/cuboot-bamboo.c
+++ b/arch/powerpc/boot/cuboot-bamboo.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Old U-boot compatibility for Bamboo
*
@@ -6,10 +7,6 @@
* Copyright 2007 IBM Corporation
*
* Based on cuboot-ebony.c
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
*/
#include "ops.h"
diff --git a/arch/powerpc/boot/cuboot-c2k.c b/arch/powerpc/boot/cuboot-c2k.c
deleted file mode 100644
index e43594950ba3..000000000000
--- a/arch/powerpc/boot/cuboot-c2k.c
+++ /dev/null
@@ -1,190 +0,0 @@
-/*
- * GEFanuc C2K platform code.
- *
- * Author: Remi Machet <rmachet@slac.stanford.edu>
- *
- * Originated from prpmc2800.c
- *
- * 2008 (c) Stanford University
- * 2007 (c) MontaVista, Software, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- */
-
-#include "types.h"
-#include "stdio.h"
-#include "io.h"
-#include "ops.h"
-#include "elf.h"
-#include "gunzip_util.h"
-#include "mv64x60.h"
-#include "cuboot.h"
-#include "ppcboot.h"
-
-static u8 *bridge_base;
-
-static void c2k_bridge_setup(u32 mem_size)
-{
- u32 i, v[30], enables, acc_bits;
- u32 pci_base_hi, pci_base_lo, size, buf[2];
- unsigned long cpu_base;
- int rc;
- void *devp, *mv64x60_devp;
- u8 *bridge_pbase, is_coherent;
- struct mv64x60_cpu2pci_win *tbl;
- int bus;
-
- bridge_pbase = mv64x60_get_bridge_pbase();
- is_coherent = mv64x60_is_coherent();
-
- if (is_coherent)
- acc_bits = MV64x60_PCI_ACC_CNTL_SNOOP_WB
- | MV64x60_PCI_ACC_CNTL_SWAP_NONE
- | MV64x60_PCI_ACC_CNTL_MBURST_32_BYTES
- | MV64x60_PCI_ACC_CNTL_RDSIZE_32_BYTES;
- else
- acc_bits = MV64x60_PCI_ACC_CNTL_SNOOP_NONE
- | MV64x60_PCI_ACC_CNTL_SWAP_NONE
- | MV64x60_PCI_ACC_CNTL_MBURST_128_BYTES
- | MV64x60_PCI_ACC_CNTL_RDSIZE_256_BYTES;
-
- mv64x60_config_ctlr_windows(bridge_base, bridge_pbase, is_coherent);
- mv64x60_devp = find_node_by_compatible(NULL, "marvell,mv64360");
- if (mv64x60_devp == NULL)
- fatal("Error: Missing marvell,mv64360 device tree node\n\r");
-
- enables = in_le32((u32 *)(bridge_base + MV64x60_CPU_BAR_ENABLE));
- enables |= 0x007ffe00; /* Disable all cpu->pci windows */
- out_le32((u32 *)(bridge_base + MV64x60_CPU_BAR_ENABLE), enables);
-
- /* Get the cpu -> pci i/o & mem mappings from the device tree */
- devp = NULL;
- for (bus = 0; ; bus++) {
- char name[] = "pci ";
-
- name[strlen(name)-1] = bus+'0';
-
- devp = find_node_by_alias(name);
- if (devp == NULL)
- break;
-
- if (bus >= 2)
- fatal("Error: Only 2 PCI controllers are supported at" \
- " this time.\n");
-
- mv64x60_config_pci_windows(bridge_base, bridge_pbase, bus, 0,
- mem_size, acc_bits);
-
- rc = getprop(devp, "ranges", v, sizeof(v));
- if (rc == 0)
- fatal("Error: Can't find marvell,mv64360-pci ranges"
- " property\n\r");
-
- /* Get the cpu -> pci i/o & mem mappings from the device tree */
-
- for (i = 0; i < rc; i += 6) {
- switch (v[i] & 0xff000000) {
- case 0x01000000: /* PCI I/O Space */
- tbl = mv64x60_cpu2pci_io;
- break;
- case 0x02000000: /* PCI MEM Space */
- tbl = mv64x60_cpu2pci_mem;
- break;
- default:
- continue;
- }
-
- pci_base_hi = v[i+1];
- pci_base_lo = v[i+2];
- cpu_base = v[i+3];
- size = v[i+5];
-
- buf[0] = cpu_base;
- buf[1] = size;
-
- if (!dt_xlate_addr(devp, buf, sizeof(buf), &cpu_base))
- fatal("Error: Can't translate PCI address " \
- "0x%x\n\r", (u32)cpu_base);
-
- mv64x60_config_cpu2pci_window(bridge_base, bus,
- pci_base_hi, pci_base_lo, cpu_base, size, tbl);
- }
-
- enables &= ~(3<<(9+bus*5)); /* Enable cpu->pci<bus> i/o,
- cpu->pci<bus> mem0 */
- out_le32((u32 *)(bridge_base + MV64x60_CPU_BAR_ENABLE),
- enables);
- };
-}
-
-static void c2k_fixups(void)
-{
- u32 mem_size;
-
- mem_size = mv64x60_get_mem_size(bridge_base);
- c2k_bridge_setup(mem_size); /* Do necessary bridge setup */
-}
-
-#define MV64x60_MPP_CNTL_0 0xf000
-#define MV64x60_MPP_CNTL_2 0xf008
-#define MV64x60_GPP_IO_CNTL 0xf100
-#define MV64x60_GPP_LEVEL_CNTL 0xf110
-#define MV64x60_GPP_VALUE_SET 0xf118
-
-static void c2k_reset(void)
-{
- u32 temp;
-
- udelay(5000000);
-
- if (bridge_base != 0) {
- temp = in_le32((u32 *)(bridge_base + MV64x60_MPP_CNTL_0));
- temp &= 0xFFFF0FFF;
- out_le32((u32 *)(bridge_base + MV64x60_MPP_CNTL_0), temp);
-
- temp = in_le32((u32 *)(bridge_base + MV64x60_GPP_LEVEL_CNTL));
- temp |= 0x00000004;
- out_le32((u32 *)(bridge_base + MV64x60_GPP_LEVEL_CNTL), temp);
-
- temp = in_le32((u32 *)(bridge_base + MV64x60_GPP_IO_CNTL));
- temp |= 0x00000004;
- out_le32((u32 *)(bridge_base + MV64x60_GPP_IO_CNTL), temp);
-
- temp = in_le32((u32 *)(bridge_base + MV64x60_MPP_CNTL_2));
- temp &= 0xFFFF0FFF;
- out_le32((u32 *)(bridge_base + MV64x60_MPP_CNTL_2), temp);
-
- temp = in_le32((u32 *)(bridge_base + MV64x60_GPP_LEVEL_CNTL));
- temp |= 0x00080000;
- out_le32((u32 *)(bridge_base + MV64x60_GPP_LEVEL_CNTL), temp);
-
- temp = in_le32((u32 *)(bridge_base + MV64x60_GPP_IO_CNTL));
- temp |= 0x00080000;
- out_le32((u32 *)(bridge_base + MV64x60_GPP_IO_CNTL), temp);
-
- out_le32((u32 *)(bridge_base + MV64x60_GPP_VALUE_SET),
- 0x00080004);
- }
-
- for (;;);
-}
-
-static bd_t bd;
-
-void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
- unsigned long r6, unsigned long r7)
-{
- CUBOOT_INIT();
-
- fdt_init(_dtb_start);
-
- bridge_base = mv64x60_get_bridge_base();
-
- platform_ops.fixups = c2k_fixups;
- platform_ops.exit = c2k_reset;
-
- if (serial_console_init() < 0)
- exit();
-}
diff --git a/arch/powerpc/boot/cuboot-ebony.c b/arch/powerpc/boot/cuboot-ebony.c
index 56564ba37f62..3e602ee0e183 100644
--- a/arch/powerpc/boot/cuboot-ebony.c
+++ b/arch/powerpc/boot/cuboot-ebony.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Old U-boot compatibility for Ebony
*
@@ -6,10 +7,6 @@
* Copyright 2007 David Gibson, IBM Corporatio.
* Based on cuboot-83xx.c, which is:
* Copyright (c) 2007 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
*/
#include "ops.h"
diff --git a/arch/powerpc/boot/cuboot-hotfoot.c b/arch/powerpc/boot/cuboot-hotfoot.c
deleted file mode 100644
index 8f697b958e45..000000000000
--- a/arch/powerpc/boot/cuboot-hotfoot.c
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Old U-boot compatibility for Esteem 195E Hotfoot CPU Board
- *
- * Author: Solomon Peachy <solomon@linux-wlan.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- */
-
-#include "ops.h"
-#include "stdio.h"
-#include "reg.h"
-#include "dcr.h"
-#include "4xx.h"
-#include "cuboot.h"
-
-#define TARGET_4xx
-#define TARGET_HOTFOOT
-
-#include "ppcboot-hotfoot.h"
-
-static bd_t bd;
-
-#define NUM_REGS 3
-
-static void hotfoot_fixups(void)
-{
- u32 uart = mfdcr(DCRN_CPC0_UCR) & 0x7f;
-
- dt_fixup_memory(bd.bi_memstart, bd.bi_memsize);
-
- dt_fixup_cpu_clocks(bd.bi_procfreq, bd.bi_procfreq, 0);
- dt_fixup_clock("/plb", bd.bi_plb_busfreq);
- dt_fixup_clock("/plb/opb", bd.bi_opbfreq);
- dt_fixup_clock("/plb/ebc", bd.bi_pci_busfreq);
- dt_fixup_clock("/plb/opb/serial@ef600300", bd.bi_procfreq / uart);
- dt_fixup_clock("/plb/opb/serial@ef600400", bd.bi_procfreq / uart);
-
- dt_fixup_mac_address_by_alias("ethernet0", bd.bi_enetaddr);
- dt_fixup_mac_address_by_alias("ethernet1", bd.bi_enet1addr);
-
- /* Is this a single eth/serial board? */
- if ((bd.bi_enet1addr[0] == 0) &&
- (bd.bi_enet1addr[1] == 0) &&
- (bd.bi_enet1addr[2] == 0) &&
- (bd.bi_enet1addr[3] == 0) &&
- (bd.bi_enet1addr[4] == 0) &&
- (bd.bi_enet1addr[5] == 0)) {
- void *devp;
-
- printf("Trimming devtree for single serial/eth board\n");
-
- devp = finddevice("/plb/opb/serial@ef600300");
- if (!devp)
- fatal("Can't find node for /plb/opb/serial@ef600300");
- del_node(devp);
-
- devp = finddevice("/plb/opb/ethernet@ef600900");
- if (!devp)
- fatal("Can't find node for /plb/opb/ethernet@ef600900");
- del_node(devp);
- }
-
- ibm4xx_quiesce_eth((u32 *)0xef600800, (u32 *)0xef600900);
-
- /* Fix up flash size in fdt for 4M boards. */
- if (bd.bi_flashsize < 0x800000) {
- u32 regs[NUM_REGS];
- void *devp = finddevice("/plb/ebc/nor_flash@0");
- if (!devp)
- fatal("Can't find FDT node for nor_flash!??");
-
- printf("Fixing devtree for 4M Flash\n");
-
- /* First fix up the base addresse */
- getprop(devp, "reg", regs, sizeof(regs));
- regs[0] = 0;
- regs[1] = 0xffc00000;
- regs[2] = 0x00400000;
- setprop(devp, "reg", regs, sizeof(regs));
-
- /* Then the offsets */
- devp = finddevice("/plb/ebc/nor_flash@0/partition@0");
- if (!devp)
- fatal("Can't find FDT node for partition@0");
- getprop(devp, "reg", regs, 2*sizeof(u32));
- regs[0] -= 0x400000;
- setprop(devp, "reg", regs, 2*sizeof(u32));
-
- devp = finddevice("/plb/ebc/nor_flash@0/partition@1");
- if (!devp)
- fatal("Can't find FDT node for partition@1");
- getprop(devp, "reg", regs, 2*sizeof(u32));
- regs[0] -= 0x400000;
- setprop(devp, "reg", regs, 2*sizeof(u32));
-
- devp = finddevice("/plb/ebc/nor_flash@0/partition@2");
- if (!devp)
- fatal("Can't find FDT node for partition@2");
- getprop(devp, "reg", regs, 2*sizeof(u32));
- regs[0] -= 0x400000;
- setprop(devp, "reg", regs, 2*sizeof(u32));
-
- devp = finddevice("/plb/ebc/nor_flash@0/partition@3");
- if (!devp)
- fatal("Can't find FDT node for partition@3");
- getprop(devp, "reg", regs, 2*sizeof(u32));
- regs[0] -= 0x400000;
- setprop(devp, "reg", regs, 2*sizeof(u32));
-
- devp = finddevice("/plb/ebc/nor_flash@0/partition@4");
- if (!devp)
- fatal("Can't find FDT node for partition@4");
- getprop(devp, "reg", regs, 2*sizeof(u32));
- regs[0] -= 0x400000;
- setprop(devp, "reg", regs, 2*sizeof(u32));
-
- devp = finddevice("/plb/ebc/nor_flash@0/partition@6");
- if (!devp)
- fatal("Can't find FDT node for partition@6");
- getprop(devp, "reg", regs, 2*sizeof(u32));
- regs[0] -= 0x400000;
- setprop(devp, "reg", regs, 2*sizeof(u32));
-
- /* Delete the FeatFS node */
- devp = finddevice("/plb/ebc/nor_flash@0/partition@5");
- if (!devp)
- fatal("Can't find FDT node for partition@5");
- del_node(devp);
- }
-}
-
-void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
- unsigned long r6, unsigned long r7)
-{
- CUBOOT_INIT();
- platform_ops.fixups = hotfoot_fixups;
- platform_ops.exit = ibm40x_dbcr_reset;
- fdt_init(_dtb_start);
- serial_console_init();
-}
diff --git a/arch/powerpc/boot/cuboot-katmai.c b/arch/powerpc/boot/cuboot-katmai.c
index 5434d70b5660..034a748fde24 100644
--- a/arch/powerpc/boot/cuboot-katmai.c
+++ b/arch/powerpc/boot/cuboot-katmai.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Old U-boot compatibility for Katmai
*
@@ -8,10 +9,6 @@
* Copyright 2007 David Gibson, IBM Corporation.
* Based on cuboot-83xx.c, which is:
* Copyright (c) 2007 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
*/
#include "ops.h"
diff --git a/arch/powerpc/boot/cuboot-kilauea.c b/arch/powerpc/boot/cuboot-kilauea.c
deleted file mode 100644
index 80cdad6bbc3f..000000000000
--- a/arch/powerpc/boot/cuboot-kilauea.c
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Old U-boot compatibility for PPC405EX. This image is already included
- * a dtb.
- *
- * Author: Tiejun Chen <tiejun.chen@windriver.com>
- *
- * Copyright (C) 2009 Wind River Systems, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- */
-
-#include "ops.h"
-#include "io.h"
-#include "dcr.h"
-#include "stdio.h"
-#include "4xx.h"
-#include "44x.h"
-#include "cuboot.h"
-
-#define TARGET_4xx
-#define TARGET_44x
-#include "ppcboot.h"
-
-#define KILAUEA_SYS_EXT_SERIAL_CLOCK 11059200 /* ext. 11.059MHz clk */
-
-static bd_t bd;
-
-static void kilauea_fixups(void)
-{
- unsigned long sysclk = 33333333;
-
- ibm405ex_fixup_clocks(sysclk, KILAUEA_SYS_EXT_SERIAL_CLOCK);
- dt_fixup_memory(bd.bi_memstart, bd.bi_memsize);
- ibm4xx_fixup_ebc_ranges("/plb/opb/ebc");
- dt_fixup_mac_address_by_alias("ethernet0", bd.bi_enetaddr);
- dt_fixup_mac_address_by_alias("ethernet1", bd.bi_enet1addr);
-}
-
-void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
- unsigned long r6, unsigned long r7)
-{
- CUBOOT_INIT();
- platform_ops.fixups = kilauea_fixups;
- platform_ops.exit = ibm40x_dbcr_reset;
- fdt_init(_dtb_start);
- serial_console_init();
-}
diff --git a/arch/powerpc/boot/cuboot-mpc7448hpc2.c b/arch/powerpc/boot/cuboot-mpc7448hpc2.c
deleted file mode 100644
index 1b8953259d75..000000000000
--- a/arch/powerpc/boot/cuboot-mpc7448hpc2.c
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved.
- *
- * Author: Roy Zang <tie-fei.zang@freescale.com>
- *
- * Description:
- * Old U-boot compatibility for mpc7448hpc2 board
- * Based on the code of Scott Wood <scottwood@freescale.com>
- * for 83xx and 85xx.
- *
- * This is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- */
-
-#include "ops.h"
-#include "stdio.h"
-#include "cuboot.h"
-
-#define TARGET_HAS_ETH1
-#include "ppcboot.h"
-
-static bd_t bd;
-extern char _dtb_start[], _dtb_end[];
-
-static void platform_fixups(void)
-{
- void *tsi;
-
- dt_fixup_memory(bd.bi_memstart, bd.bi_memsize);
- dt_fixup_mac_addresses(bd.bi_enetaddr, bd.bi_enet1addr);
- dt_fixup_cpu_clocks(bd.bi_intfreq, bd.bi_busfreq / 4, bd.bi_busfreq);
- tsi = find_node_by_devtype(NULL, "tsi-bridge");
- if (tsi)
- setprop(tsi, "bus-frequency", &bd.bi_busfreq,
- sizeof(bd.bi_busfreq));
-}
-
-void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
- unsigned long r6, unsigned long r7)
-{
- CUBOOT_INIT();
- fdt_init(_dtb_start);
- serial_console_init();
- platform_ops.fixups = platform_fixups;
-}
diff --git a/arch/powerpc/boot/cuboot-pq2.c b/arch/powerpc/boot/cuboot-pq2.c
index 9c7d13428293..d32765c03edd 100644
--- a/arch/powerpc/boot/cuboot-pq2.c
+++ b/arch/powerpc/boot/cuboot-pq2.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Old U-boot compatibility for PowerQUICC II
* (a.k.a. 82xx with CPM, not the 8240 family of chips)
@@ -5,10 +6,6 @@
* Author: Scott Wood <scottwood@freescale.com>
*
* Copyright (c) 2007 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
*/
#include "ops.h"
diff --git a/arch/powerpc/boot/cuboot-rainier.c b/arch/powerpc/boot/cuboot-rainier.c
index 0a3fddee54df..046478544a5e 100644
--- a/arch/powerpc/boot/cuboot-rainier.c
+++ b/arch/powerpc/boot/cuboot-rainier.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Old U-boot compatibility for Rainier
*
@@ -9,10 +10,6 @@
*
* Based on Bamboo code by Josh Boyer <jwboyer@linux.vnet.ibm.com>
* Copyright IBM Corporation, 2007
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2 of the License
*/
#include <stdarg.h>
diff --git a/arch/powerpc/boot/cuboot-sam440ep.c b/arch/powerpc/boot/cuboot-sam440ep.c
index ec10a47460dd..d875119e3c4a 100644
--- a/arch/powerpc/boot/cuboot-sam440ep.c
+++ b/arch/powerpc/boot/cuboot-sam440ep.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Old U-boot compatibility for Sam440ep based off bamboo.c code
* original copyrights below
@@ -10,10 +11,6 @@
*
* Modified from cuboot-bamboo.c for sam440ep:
* Copyright 2008 Giuseppe Coviello <gicoviello@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
*/
#include "ops.h"
diff --git a/arch/powerpc/boot/cuboot-sequoia.c b/arch/powerpc/boot/cuboot-sequoia.c
index caf8f2e842ea..e0285c20e3bb 100644
--- a/arch/powerpc/boot/cuboot-sequoia.c
+++ b/arch/powerpc/boot/cuboot-sequoia.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Old U-boot compatibility for Sequoia
*
@@ -9,10 +10,6 @@
*
* Based on Bamboo code by Josh Boyer <jwboyer@linux.vnet.ibm.com>
* Copyright IBM Corporation, 2007
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2 of the License
*/
#include <stdarg.h>
diff --git a/arch/powerpc/boot/cuboot-taishan.c b/arch/powerpc/boot/cuboot-taishan.c
index 9bc906a754dd..3d40670b248b 100644
--- a/arch/powerpc/boot/cuboot-taishan.c
+++ b/arch/powerpc/boot/cuboot-taishan.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Old U-boot compatibility for Taishan
*
@@ -8,10 +9,6 @@
* Copyright 2007 David Gibson, IBM Corporation.
* Based on cuboot-83xx.c, which is:
* Copyright (c) 2007 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
*/
#include "ops.h"
diff --git a/arch/powerpc/boot/cuboot-warp.c b/arch/powerpc/boot/cuboot-warp.c
index 806df693fea6..1ec0fa28480b 100644
--- a/arch/powerpc/boot/cuboot-warp.c
+++ b/arch/powerpc/boot/cuboot-warp.c
@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2008 PIKA Technologies
* Sean MacLennan <smaclennan@pikatech.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
*/
#include "ops.h"
diff --git a/arch/powerpc/boot/cuboot-yosemite.c b/arch/powerpc/boot/cuboot-yosemite.c
index cc6e338c5d0d..ce3fdb73798e 100644
--- a/arch/powerpc/boot/cuboot-yosemite.c
+++ b/arch/powerpc/boot/cuboot-yosemite.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Old U-boot compatibility for Yosemite
*
* Author: Josh Boyer <jwboyer@linux.vnet.ibm.com>
*
* Copyright 2008 IBM Corporation
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
*/
#include "ops.h"
diff --git a/arch/powerpc/boot/cuboot.c b/arch/powerpc/boot/cuboot.c
index 7768b2306b7a..7f186658ff06 100644
--- a/arch/powerpc/boot/cuboot.c
+++ b/arch/powerpc/boot/cuboot.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Compatibility for old (not device tree aware) U-Boot versions
*
@@ -6,10 +7,6 @@
*
* Copyright 2007 David Gibson, IBM Corporation.
* Copyright (c) 2007 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
*/
#include "ops.h"
diff --git a/arch/powerpc/boot/cuboot.h b/arch/powerpc/boot/cuboot.h
index cd2aa7f348f3..c2b2c58eaa0b 100644
--- a/arch/powerpc/boot/cuboot.h
+++ b/arch/powerpc/boot/cuboot.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _PPC_BOOT_CUBOOT_H_
#define _PPC_BOOT_CUBOOT_H_
diff --git a/arch/powerpc/boot/dcr.h b/arch/powerpc/boot/dcr.h
index bf8f4ede1928..91dc3a302cc8 100644
--- a/arch/powerpc/boot/dcr.h
+++ b/arch/powerpc/boot/dcr.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _PPC_BOOT_DCR_H_
#define _PPC_BOOT_DCR_H_
@@ -152,17 +153,6 @@ static const unsigned long sdram_bxcr[] = { SDRAM0_B0CR, SDRAM0_B1CR,
#define CPR0_SCPID 0x120
#define CPR0_PLLC0 0x40
-/* 405GP Clocking/Power Management/Chip Control regs */
-#define DCRN_CPC0_PLLMR 0xb0
-#define DCRN_405_CPC0_CR0 0xb1
-#define DCRN_405_CPC0_CR1 0xb2
-#define DCRN_405_CPC0_PSR 0xb4
-
-/* 405EP Clocking/Power Management/Chip Control regs */
-#define DCRN_CPC0_PLLMR0 0xf0
-#define DCRN_CPC0_PLLMR1 0xf4
-#define DCRN_CPC0_UCR 0xf5
-
/* 440GX/405EX Clock Control reg */
#define DCRN_CPR0_CLKUPD 0x020
#define DCRN_CPR0_PLLC 0x040
diff --git a/arch/powerpc/boot/decompress.c b/arch/powerpc/boot/decompress.c
new file mode 100644
index 000000000000..6835cb53f034
--- /dev/null
+++ b/arch/powerpc/boot/decompress.c
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Wrapper around the kernel's pre-boot decompression library.
+ *
+ * Copyright (C) IBM Corporation 2016.
+ */
+
+#include "elf.h"
+#include "page.h"
+#include "string.h"
+#include "stdio.h"
+#include "ops.h"
+#include "reg.h"
+#include "types.h"
+
+/*
+ * The decompressor_*.c files play #ifdef games so they can be used in both
+ * pre-boot and regular kernel code. We need these definitions to make the
+ * includes work.
+ */
+
+#define STATIC static
+#define INIT
+
+/*
+ * The build process will copy the required zlib source files and headers
+ * out of lib/ and "fix" the includes so they do not pull in other kernel
+ * headers.
+ */
+
+#ifdef CONFIG_KERNEL_GZIP
+# include "decompress_inflate.c"
+#endif
+
+#ifdef CONFIG_KERNEL_XZ
+# include "xz_config.h"
+# include "../../../lib/decompress_unxz.c"
+#endif
+
+/* globals for tracking the state of the decompression */
+static unsigned long decompressed_bytes;
+static unsigned long limit;
+static unsigned long skip;
+static char *output_buffer;
+
+/*
+ * flush() is called by __decompress() when the decompressor's scratch buffer is
+ * full.
+ */
+static long flush(void *v, unsigned long buffer_size)
+{
+ unsigned long end = decompressed_bytes + buffer_size;
+ unsigned long size = buffer_size;
+ unsigned long offset = 0;
+ char *in = v;
+ char *out;
+
+ /*
+ * if we hit our decompression limit, we need to fake an error to abort
+ * the in-progress decompression.
+ */
+ if (decompressed_bytes >= limit)
+ return -1;
+
+ /* skip this entire block */
+ if (end <= skip) {
+ decompressed_bytes += buffer_size;
+ return buffer_size;
+ }
+
+ /* skip some data at the start, but keep the rest of the block */
+ if (decompressed_bytes < skip && end > skip) {
+ offset = skip - decompressed_bytes;
+
+ in += offset;
+ size -= offset;
+ decompressed_bytes += offset;
+ }
+
+ out = &output_buffer[decompressed_bytes - skip];
+ size = min(decompressed_bytes + size, limit) - decompressed_bytes;
+
+ memcpy(out, in, size);
+ decompressed_bytes += size;
+
+ return buffer_size;
+}
+
+static void print_err(char *s)
+{
+ /* suppress the "error" when we terminate the decompressor */
+ if (decompressed_bytes >= limit)
+ return;
+
+ printf("Decompression error: '%s'\n\r", s);
+}
+
+/**
+ * partial_decompress - decompresses part or all of a compressed buffer
+ * @inbuf: input buffer
+ * @input_size: length of the input buffer
+ * @outbuf: output buffer
+ * @output_size: length of the output buffer
+ * @_skip: number of output bytes to ignore
+ *
+ * This function takes compressed data from inbuf, decompresses and write it to
+ * outbuf. Once output_size bytes are written to the output buffer, or the
+ * stream is exhausted the function will return the number of bytes that were
+ * decompressed. Otherwise it will return whatever error code the decompressor
+ * reported (NB: This is specific to each decompressor type).
+ *
+ * The skip functionality is mainly there so the program and discover
+ * the size of the compressed image so that it can ask firmware (if present)
+ * for an appropriately sized buffer.
+ */
+long partial_decompress(void *inbuf, unsigned long input_size,
+ void *outbuf, unsigned long output_size, unsigned long _skip)
+{
+ int ret;
+
+ /*
+ * The skipped bytes needs to be included in the size of data we want
+ * to decompress.
+ */
+ output_size += _skip;
+
+ decompressed_bytes = 0;
+ output_buffer = outbuf;
+ limit = output_size;
+ skip = _skip;
+
+ ret = __decompress(inbuf, input_size, NULL, flush, outbuf,
+ output_size, NULL, print_err);
+
+ /*
+ * If decompression was aborted due to an actual error rather than
+ * a fake error that we used to abort, then we should report it.
+ */
+ if (decompressed_bytes < limit)
+ return ret;
+
+ return decompressed_bytes - skip;
+}
diff --git a/arch/powerpc/boot/devtree.c b/arch/powerpc/boot/devtree.c
index a7e21a35c03a..58fbcfcc98c9 100644
--- a/arch/powerpc/boot/devtree.c
+++ b/arch/powerpc/boot/devtree.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* devtree.c - convenience functions for device tree manipulation
* Copyright 2007 David Gibson, IBM Corporation.
@@ -5,11 +6,6 @@
*
* Authors: David Gibson <david@gibson.dropbear.id.au>
* Scott Wood <scottwood@freescale.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <stdarg.h>
#include <stddef.h>
@@ -17,6 +13,7 @@
#include "string.h"
#include "stdio.h"
#include "ops.h"
+#include "of.h"
void dt_fixup_memory(u64 start, u64 size)
{
@@ -27,21 +24,25 @@ void dt_fixup_memory(u64 start, u64 size)
root = finddevice("/");
if (getprop(root, "#address-cells", &naddr, sizeof(naddr)) < 0)
naddr = 2;
+ else
+ naddr = be32_to_cpu(naddr);
if (naddr < 1 || naddr > 2)
fatal("Can't cope with #address-cells == %d in /\n\r", naddr);
if (getprop(root, "#size-cells", &nsize, sizeof(nsize)) < 0)
nsize = 1;
+ else
+ nsize = be32_to_cpu(nsize);
if (nsize < 1 || nsize > 2)
fatal("Can't cope with #size-cells == %d in /\n\r", nsize);
i = 0;
if (naddr == 2)
- memreg[i++] = start >> 32;
- memreg[i++] = start & 0xffffffff;
+ memreg[i++] = cpu_to_be32(start >> 32);
+ memreg[i++] = cpu_to_be32(start & 0xffffffff);
if (nsize == 2)
- memreg[i++] = size >> 32;
- memreg[i++] = size & 0xffffffff;
+ memreg[i++] = cpu_to_be32(size >> 32);
+ memreg[i++] = cpu_to_be32(size & 0xffffffff);
memory = finddevice("/memory");
if (! memory) {
@@ -49,9 +50,9 @@ void dt_fixup_memory(u64 start, u64 size)
setprop_str(memory, "device_type", "memory");
}
- printf("Memory <- <0x%x", memreg[0]);
+ printf("Memory <- <0x%x", be32_to_cpu(memreg[0]));
for (i = 1; i < (naddr + nsize); i++)
- printf(" 0x%x", memreg[i]);
+ printf(" 0x%x", be32_to_cpu(memreg[i]));
printf("> (%ldMB)\n\r", (unsigned long)(size >> 20));
setprop(memory, "reg", memreg, (naddr + nsize)*sizeof(u32));
@@ -69,10 +70,10 @@ void dt_fixup_cpu_clocks(u32 cpu, u32 tb, u32 bus)
printf("CPU bus-frequency <- 0x%x (%dMHz)\n\r", bus, MHZ(bus));
while ((devp = find_node_by_devtype(devp, "cpu"))) {
- setprop_val(devp, "clock-frequency", cpu);
- setprop_val(devp, "timebase-frequency", tb);
+ setprop_val(devp, "clock-frequency", cpu_to_be32(cpu));
+ setprop_val(devp, "timebase-frequency", cpu_to_be32(tb));
if (bus > 0)
- setprop_val(devp, "bus-frequency", bus);
+ setprop_val(devp, "bus-frequency", cpu_to_be32(bus));
}
timebase_period_ns = 1000000000 / tb;
@@ -84,7 +85,7 @@ void dt_fixup_clock(const char *path, u32 freq)
if (devp) {
printf("%s: clock-frequency <- %x (%dMHz)\n\r", path, freq, MHZ(freq));
- setprop_val(devp, "clock-frequency", freq);
+ setprop_val(devp, "clock-frequency", cpu_to_be32(freq));
}
}
@@ -137,8 +138,12 @@ void dt_get_reg_format(void *node, u32 *naddr, u32 *nsize)
{
if (getprop(node, "#address-cells", naddr, 4) != 4)
*naddr = 2;
+ else
+ *naddr = be32_to_cpu(*naddr);
if (getprop(node, "#size-cells", nsize, 4) != 4)
*nsize = 1;
+ else
+ *nsize = be32_to_cpu(*nsize);
}
static void copy_val(u32 *dest, u32 *src, int naddr)
@@ -167,9 +172,9 @@ static int add_reg(u32 *reg, u32 *add, int naddr)
int i, carry = 0;
for (i = MAX_ADDR_CELLS - 1; i >= MAX_ADDR_CELLS - naddr; i--) {
- u64 tmp = (u64)reg[i] + add[i] + carry;
+ u64 tmp = (u64)be32_to_cpu(reg[i]) + be32_to_cpu(add[i]) + carry;
carry = tmp >> 32;
- reg[i] = (u32)tmp;
+ reg[i] = cpu_to_be32((u32)tmp);
}
return !carry;
@@ -184,18 +189,18 @@ static int compare_reg(u32 *reg, u32 *range, u32 *rangesize)
u32 end;
for (i = 0; i < MAX_ADDR_CELLS; i++) {
- if (reg[i] < range[i])
+ if (be32_to_cpu(reg[i]) < be32_to_cpu(range[i]))
return 0;
- if (reg[i] > range[i])
+ if (be32_to_cpu(reg[i]) > be32_to_cpu(range[i]))
break;
}
for (i = 0; i < MAX_ADDR_CELLS; i++) {
- end = range[i] + rangesize[i];
+ end = be32_to_cpu(range[i]) + be32_to_cpu(rangesize[i]);
- if (reg[i] < end)
+ if (be32_to_cpu(reg[i]) < end)
break;
- if (reg[i] > end)
+ if (be32_to_cpu(reg[i]) > end)
return 0;
}
@@ -244,7 +249,6 @@ static int dt_xlate(void *node, int res, int reglen, unsigned long *addr,
return 0;
dt_get_reg_format(parent, &naddr, &nsize);
-
if (nsize > 2)
return 0;
@@ -256,10 +260,10 @@ static int dt_xlate(void *node, int res, int reglen, unsigned long *addr,
copy_val(last_addr, prop_buf + offset, naddr);
- ret_size = prop_buf[offset + naddr];
+ ret_size = be32_to_cpu(prop_buf[offset + naddr]);
if (nsize == 2) {
ret_size <<= 32;
- ret_size |= prop_buf[offset + naddr + 1];
+ ret_size |= be32_to_cpu(prop_buf[offset + naddr + 1]);
}
for (;;) {
@@ -282,7 +286,6 @@ static int dt_xlate(void *node, int res, int reglen, unsigned long *addr,
offset = find_range(last_addr, prop_buf, prev_naddr,
naddr, prev_nsize, buflen / 4);
-
if (offset < 0)
return 0;
@@ -300,8 +303,7 @@ static int dt_xlate(void *node, int res, int reglen, unsigned long *addr,
if (naddr > 2)
return 0;
- ret_addr = ((u64)last_addr[2] << 32) | last_addr[3];
-
+ ret_addr = ((u64)be32_to_cpu(last_addr[2]) << 32) | be32_to_cpu(last_addr[3]);
if (sizeof(void *) == 4 &&
(ret_addr >= 0x100000000ULL || ret_size > 0x100000000ULL ||
ret_addr + ret_size > 0x100000000ULL))
@@ -354,11 +356,14 @@ int dt_is_compatible(void *node, const char *compat)
int dt_get_virtual_reg(void *node, void **addr, int nres)
{
unsigned long xaddr;
- int n;
+ int n, i;
n = getprop(node, "virtual-reg", addr, nres * 4);
- if (n > 0)
+ if (n > 0) {
+ for (i = 0; i < n/4; i ++)
+ ((u32 *)addr)[i] = be32_to_cpu(((u32 *)addr)[i]);
return n / 4;
+ }
for (n = 0; n < nres; n++) {
if (!dt_xlate_reg(node, n, &xaddr, NULL))
diff --git a/arch/powerpc/boot/div64.S b/arch/powerpc/boot/div64.S
index bbcb8a4cc121..4354928ed62e 100644
--- a/arch/powerpc/boot/div64.S
+++ b/arch/powerpc/boot/div64.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Divide a 64-bit unsigned number by a 32-bit unsigned number.
* This routine assumes that the top 32 bits of the dividend are
@@ -7,11 +8,6 @@
* On exit, r3 contains the remainder.
*
* Copyright (C) 2002 Paul Mackerras, IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include "ppc_asm.h"
diff --git a/arch/powerpc/boot/dts/Makefile b/arch/powerpc/boot/dts/Makefile
new file mode 100644
index 000000000000..0cd0d8558b47
--- /dev/null
+++ b/arch/powerpc/boot/dts/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+
+subdir-y += fsl
+
+dtb-$(CONFIG_OF_ALL_DTBS) := $(patsubst $(src)/%.dts,%.dtb, $(wildcard $(src)/*.dts))
diff --git a/arch/powerpc/boot/dts/a3m071.dts b/arch/powerpc/boot/dts/a3m071.dts
index bf81b8f9704c..034cfd8aa95b 100644
--- a/arch/powerpc/boot/dts/a3m071.dts
+++ b/arch/powerpc/boot/dts/a3m071.dts
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* a3m071 board Device Tree Source
*
@@ -8,11 +9,6 @@
*
* Copyright (C) 2007 Semihalf
* Marian Balakowicz <m8@semihalf.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/include/ "mpc5200b.dtsi"
@@ -105,24 +101,24 @@
reg = <0 0x0 0x02000000>;
compatible = "cfi-flash";
bank-width = <2>;
- partition@0x0 {
+ partition@0 {
label = "u-boot";
reg = <0x00000000 0x00040000>;
read-only;
};
- partition@0x00040000 {
+ partition@40000 {
label = "env";
reg = <0x00040000 0x00020000>;
};
- partition@0x00060000 {
+ partition@60000 {
label = "dtb";
reg = <0x00060000 0x00020000>;
};
- partition@0x00080000 {
+ partition@80000 {
label = "kernel";
reg = <0x00080000 0x00500000>;
};
- partition@0x00580000 {
+ partition@580000 {
label = "root";
reg = <0x00580000 0x00A80000>;
};
diff --git a/arch/powerpc/boot/dts/a4m072.dts b/arch/powerpc/boot/dts/a4m072.dts
index 1f02034c7e99..d4270a2ec6c7 100644
--- a/arch/powerpc/boot/dts/a4m072.dts
+++ b/arch/powerpc/boot/dts/a4m072.dts
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* a4m072 board Device Tree Source
*
@@ -6,11 +7,6 @@
*
* Copyright (C) 2007 Semihalf
* Marian Balakowicz <m8@semihalf.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/include/ "mpc5200b.dtsi"
@@ -144,8 +140,8 @@
clock-frequency = <0>; /* From boot loader */
interrupts = <2 8 0 2 9 0 2 10 0>;
bus-range = <0 0>;
- ranges = <0x42000000 0 0x80000000 0x80000000 0 0x10000000
- 0x02000000 0 0x90000000 0x90000000 0 0x10000000
- 0x01000000 0 0x00000000 0xa0000000 0 0x01000000>;
+ ranges = <0x42000000 0 0x80000000 0x80000000 0 0x10000000>,
+ <0x02000000 0 0x90000000 0x90000000 0 0x10000000>,
+ <0x01000000 0 0x00000000 0xa0000000 0 0x01000000>;
};
};
diff --git a/arch/powerpc/boot/dts/ac14xx.dts b/arch/powerpc/boot/dts/ac14xx.dts
index a1b883730b31..5d8877e1f4ad 100644
--- a/arch/powerpc/boot/dts/ac14xx.dts
+++ b/arch/powerpc/boot/dts/ac14xx.dts
@@ -1,16 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Device Tree Source for the MPC5121e based ac14xx board
*
* Copyright 2012 Anatolij Gustschin <agust@denx.de>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
-#include <mpc5121.dtsi>
+#include "mpc5121.dtsi"
/ {
model = "ac14xx";
@@ -176,12 +172,12 @@
clock-frequency = <400000>;
at24@30 {
- compatible = "at24,24c01";
+ compatible = "atmel,24c01";
reg = <0x30>;
};
at24@31 {
- compatible = "at24,24c01";
+ compatible = "atmel,24c01";
reg = <0x31>;
};
@@ -191,47 +187,47 @@
};
at24@50 {
- compatible = "at24,24c01";
+ compatible = "atmel,24c01";
reg = <0x50>;
};
at24@51 {
- compatible = "at24,24c01";
+ compatible = "atmel,24c01";
reg = <0x51>;
};
at24@52 {
- compatible = "at24,24c01";
+ compatible = "atmel,24c01";
reg = <0x52>;
};
at24@53 {
- compatible = "at24,24c01";
+ compatible = "atmel,24c01";
reg = <0x53>;
};
at24@54 {
- compatible = "at24,24c01";
+ compatible = "atmel,24c01";
reg = <0x54>;
};
at24@55 {
- compatible = "at24,24c01";
+ compatible = "atmel,24c01";
reg = <0x55>;
};
at24@56 {
- compatible = "at24,24c01";
+ compatible = "atmel,24c01";
reg = <0x56>;
};
at24@57 {
- compatible = "at24,24c01";
+ compatible = "atmel,24c01";
reg = <0x57>;
};
rtc@68 {
- compatible = "stm,m41t00";
+ compatible = "st,m41t00";
reg = <0x68>;
};
};
diff --git a/arch/powerpc/boot/dts/acadia.dts b/arch/powerpc/boot/dts/acadia.dts
deleted file mode 100644
index 57291f61ffe7..000000000000
--- a/arch/powerpc/boot/dts/acadia.dts
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
- * Device Tree Source for AMCC Acadia (405EZ)
- *
- * Copyright IBM Corp. 2008
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
- */
-
-/dts-v1/;
-
-/ {
- #address-cells = <1>;
- #size-cells = <1>;
- model = "amcc,acadia";
- compatible = "amcc,acadia";
- dcr-parent = <&{/cpus/cpu@0}>;
-
- aliases {
- ethernet0 = &EMAC0;
- serial0 = &UART0;
- serial1 = &UART1;
- };
-
- cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- cpu@0 {
- device_type = "cpu";
- model = "PowerPC,405EZ";
- reg = <0x0>;
- clock-frequency = <0>; /* Filled in by wrapper */
- timebase-frequency = <0>; /* Filled in by wrapper */
- i-cache-line-size = <32>;
- d-cache-line-size = <32>;
- i-cache-size = <16384>;
- d-cache-size = <16384>;
- dcr-controller;
- dcr-access-method = "native";
- };
- };
-
- memory {
- device_type = "memory";
- reg = <0x0 0x0>; /* Filled in by wrapper */
- };
-
- UIC0: interrupt-controller {
- compatible = "ibm,uic-405ez", "ibm,uic";
- interrupt-controller;
- dcr-reg = <0x0c0 0x009>;
- cell-index = <0>;
- #address-cells = <0>;
- #size-cells = <0>;
- #interrupt-cells = <2>;
- };
-
- plb {
- compatible = "ibm,plb-405ez", "ibm,plb3";
- #address-cells = <1>;
- #size-cells = <1>;
- ranges;
- clock-frequency = <0>; /* Filled in by wrapper */
-
- MAL0: mcmal {
- compatible = "ibm,mcmal-405ez", "ibm,mcmal";
- dcr-reg = <0x380 0x62>;
- num-tx-chans = <1>;
- num-rx-chans = <1>;
- interrupt-parent = <&UIC0>;
- /* 405EZ has only 3 interrupts to the UIC, as
- * SERR, TXDE, and RXDE are or'd together into
- * one UIC bit
- */
- interrupts = <
- 0x13 0x4 /* TXEOB */
- 0x15 0x4 /* RXEOB */
- 0x12 0x4 /* SERR, TXDE, RXDE */>;
- };
-
- POB0: opb {
- compatible = "ibm,opb-405ez", "ibm,opb";
- #address-cells = <1>;
- #size-cells = <1>;
- ranges;
- dcr-reg = <0x0a 0x05>;
- clock-frequency = <0>; /* Filled in by wrapper */
-
- UART0: serial@ef600300 {
- device_type = "serial";
- compatible = "ns16550";
- reg = <0xef600300 0x8>;
- virtual-reg = <0xef600300>;
- clock-frequency = <0>; /* Filled in by wrapper */
- current-speed = <115200>;
- interrupt-parent = <&UIC0>;
- interrupts = <0x5 0x4>;
- };
-
- UART1: serial@ef600400 {
- device_type = "serial";
- compatible = "ns16550";
- reg = <0xef600400 0x8>;
- clock-frequency = <0>; /* Filled in by wrapper */
- current-speed = <115200>;
- interrupt-parent = <&UIC0>;
- interrupts = <0x6 0x4>;
- };
-
- IIC: i2c@ef600500 {
- compatible = "ibm,iic-405ez", "ibm,iic";
- reg = <0xef600500 0x11>;
- interrupt-parent = <&UIC0>;
- interrupts = <0xa 0x4>;
- };
-
- GPIO0: gpio@ef600700 {
- compatible = "ibm,gpio-405ez";
- reg = <0xef600700 0x20>;
- };
-
- GPIO1: gpio@ef600800 {
- compatible = "ibm,gpio-405ez";
- reg = <0xef600800 0x20>;
- };
-
- EMAC0: ethernet@ef600900 {
- device_type = "network";
- compatible = "ibm,emac-405ez", "ibm,emac";
- interrupt-parent = <&UIC0>;
- interrupts = <
- 0x10 0x4 /* Ethernet */
- 0x11 0x4 /* Ethernet Wake up */>;
- local-mac-address = [000000000000]; /* Filled in by wrapper */
- reg = <0xef600900 0x70>;
- mal-device = <&MAL0>;
- mal-tx-channel = <0>;
- mal-rx-channel = <0>;
- cell-index = <0>;
- max-frame-size = <1500>;
- rx-fifo-size = <4096>;
- tx-fifo-size = <2048>;
- phy-mode = "mii";
- phy-map = <0x0>;
- };
-
- CAN0: can@ef601000 {
- compatible = "amcc,can-405ez";
- reg = <0xef601000 0x620>;
- interrupt-parent = <&UIC0>;
- interrupts = <0x7 0x4>;
- };
-
- CAN1: can@ef601800 {
- compatible = "amcc,can-405ez";
- reg = <0xef601800 0x620>;
- interrupt-parent = <&UIC0>;
- interrupts = <0x8 0x4>;
- };
-
- cameleon@ef602000 {
- compatible = "amcc,cameleon-405ez";
- reg = <0xef602000 0x800>;
- interrupt-parent = <&UIC0>;
- interrupts = <0xb 0x4 0xc 0x4>;
- };
-
- ieee1588@ef602800 {
- compatible = "amcc,ieee1588-405ez";
- reg = <0xef602800 0x60>;
- interrupt-parent = <&UIC0>;
- interrupts = <0x4 0x4>;
- /* This thing is a bit weird. It has it's own UIC
- * that it uses to generate snapshot triggers. We
- * don't really support this device yet, and it needs
- * work to figure this out.
- */
- dcr-reg = <0xe0 0x9>;
- };
-
- usb@ef603000 {
- compatible = "ohci-be";
- reg = <0xef603000 0x80>;
- interrupts-parent = <&UIC0>;
- interrupts = <0xd 0x4 0xe 0x4>;
- };
-
- dac@ef603300 {
- compatible = "amcc,dac-405ez";
- reg = <0xef603300 0x40>;
- interrupt-parent = <&UIC0>;
- interrupts = <0x18 0x4>;
- };
-
- adc@ef603400 {
- compatible = "amcc,adc-405ez";
- reg = <0xef603400 0x40>;
- interrupt-parent = <&UIC0>;
- interrupts = <0x17 0x4>;
- };
-
- spi@ef603500 {
- compatible = "amcc,spi-405ez";
- reg = <0xef603500 0x100>;
- interrupt-parent = <&UIC0>;
- interrupts = <0x9 0x4>;
- };
- };
-
- EBC0: ebc {
- compatible = "ibm,ebc-405ez", "ibm,ebc";
- dcr-reg = <0x12 0x2>;
- #address-cells = <2>;
- #size-cells = <1>;
- clock-frequency = <0>; /* Filled in by wrapper */
- };
- };
-
- chosen {
- linux,stdout-path = "/plb/opb/serial@ef600300";
- };
-};
diff --git a/arch/powerpc/boot/dts/adder875-redboot.dts b/arch/powerpc/boot/dts/adder875-redboot.dts
index 083984720b2f..b51c97abface 100644
--- a/arch/powerpc/boot/dts/adder875-redboot.dts
+++ b/arch/powerpc/boot/dts/adder875-redboot.dts
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Device Tree Source for MPC885 ADS running RedBoot
*
* Copyright 2006 MontaVista Software, Inc.
* Copyright 2007 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/dts-v1/;
@@ -178,6 +174,6 @@
};
chosen {
- linux,stdout-path = &console;
+ stdout-path = &console;
};
};
diff --git a/arch/powerpc/boot/dts/adder875-uboot.dts b/arch/powerpc/boot/dts/adder875-uboot.dts
index e4554caf8f8d..ec776103f540 100644
--- a/arch/powerpc/boot/dts/adder875-uboot.dts
+++ b/arch/powerpc/boot/dts/adder875-uboot.dts
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Device Tree Source for MPC885 ADS running U-Boot
*
* Copyright 2006 MontaVista Software, Inc.
* Copyright 2007 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/dts-v1/;
@@ -177,6 +173,6 @@
};
chosen {
- linux,stdout-path = &console;
+ stdout-path = &console;
};
};
diff --git a/arch/powerpc/boot/dts/akebono.dts b/arch/powerpc/boot/dts/akebono.dts
index f92ecfed3d2f..343326c30380 100644
--- a/arch/powerpc/boot/dts/akebono.dts
+++ b/arch/powerpc/boot/dts/akebono.dts
@@ -40,7 +40,7 @@
d-cache-size = <32768>;
dcr-controller;
dcr-access-method = "native";
- status = "ok";
+ status = "okay";
};
cpu@1 {
device_type = "cpu";
@@ -126,7 +126,7 @@
interrupts = <93 2>;
};
- EHCI0: ehci@30010000000 {
+ EHCI0: usb@30010000000 {
compatible = "ibm,476gtr-ehci", "generic-ehci";
reg = <0x300 0x10000000 0x0 0x10000>;
interrupt-parent = <&MPIC>;
@@ -140,14 +140,14 @@
interrupt-parent = <&MPIC>;
};
- OHCI0: ohci@30010010000 {
+ OHCI0: usb@30010010000 {
compatible = "ibm,476gtr-ohci", "generic-ohci";
reg = <0x300 0x10010000 0x0 0x10000>;
interrupt-parent = <&MPIC>;
interrupts = <89 1>;
};
- OHCI1: ohci@30010020000 {
+ OHCI1: usb@30010020000 {
compatible = "ibm,476gtr-ohci", "generic-ohci";
reg = <0x300 0x10020000 0x0 0x10000>;
interrupt-parent = <&MPIC>;
@@ -216,7 +216,7 @@
interrupts = <39 2>;
};
- IIC0: i2c@00000000 {
+ IIC0: i2c@0 {
compatible = "ibm,iic-476gtr", "ibm,iic";
reg = <0x0 0x00000020>;
interrupt-parent = <&MPIC>;
@@ -224,12 +224,12 @@
#address-cells = <1>;
#size-cells = <0>;
rtc@68 {
- compatible = "stm,m41t80", "m41st85";
+ compatible = "st,m41t80", "m41st85";
reg = <0x68>;
};
};
- IIC1: i2c@00000100 {
+ IIC1: i2c@100 {
compatible = "ibm,iic-476gtr", "ibm,iic";
reg = <0x100 0x00000020>;
interrupt-parent = <&MPIC>;
@@ -248,7 +248,7 @@
};
};
- PCIE0: pciex@10100000000 {
+ PCIE0: pcie@10100000000 {
device_type = "pci";
#interrupt-cells = <1>;
#size-cells = <2>;
@@ -288,7 +288,7 @@
0x0 0x0 0x0 0x4 &MPIC 48 0x2 /* int D */>;
};
- PCIE1: pciex@20100000000 {
+ PCIE1: pcie@20100000000 {
device_type = "pci";
#interrupt-cells = <1>;
#size-cells = <2>;
@@ -328,7 +328,7 @@
0x0 0x0 0x0 0x4 &MPIC 56 0x2 /* int D */>;
};
- PCIE2: pciex@18100000000 {
+ PCIE2: pcie@18100000000 {
device_type = "pci";
#interrupt-cells = <1>;
#size-cells = <2>;
@@ -368,7 +368,7 @@
0x0 0x0 0x0 0x4 &MPIC 64 0x2 /* int D */>;
};
- PCIE3: pciex@28100000000 {
+ PCIE3: pcie@28100000000 {
device_type = "pci";
#interrupt-cells = <1>;
#size-cells = <2>;
@@ -410,6 +410,6 @@
};
chosen {
- linux,stdout-path = &UART0;
+ stdout-path = &UART0;
};
};
diff --git a/arch/powerpc/boot/dts/amigaone.dts b/arch/powerpc/boot/dts/amigaone.dts
index 49ac36b16dd7..5c68db36d83b 100644
--- a/arch/powerpc/boot/dts/amigaone.dts
+++ b/arch/powerpc/boot/dts/amigaone.dts
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* AmigaOne Device Tree Source
*
* Copyright 2008 Gerhard Pircher (gerhard_pircher@gmx.net)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/dts-v1/;
@@ -168,6 +164,6 @@
};
chosen {
- linux,stdout-path = "/pci@80000000/isa@7/serial@3f8";
+ stdout-path = "/pci@80000000/isa@7/serial@3f8";
};
};
diff --git a/arch/powerpc/boot/dts/arches.dts b/arch/powerpc/boot/dts/arches.dts
index 30f41204acfa..75a376a99892 100644
--- a/arch/powerpc/boot/dts/arches.dts
+++ b/arch/powerpc/boot/dts/arches.dts
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Device Tree Source for AMCC Arches (dual 460GT board)
*
@@ -11,21 +12,6 @@
*
* See file CREDITS for list of people who contributed to this
* project.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
- * MA 02111-1307 USA
*/
/dts-v1/;
diff --git a/arch/powerpc/boot/dts/asp834x-redboot.dts b/arch/powerpc/boot/dts/asp834x-redboot.dts
index 9198745f45fb..52a84561c4f0 100644
--- a/arch/powerpc/boot/dts/asp834x-redboot.dts
+++ b/arch/powerpc/boot/dts/asp834x-redboot.dts
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Analogue & Micro ASP8347 Device Tree Source
*
* Copyright 2008 Codehermit
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/dts-v1/;
@@ -304,7 +300,7 @@
chosen {
bootargs = "console=ttyS0,38400 root=/dev/mtdblock3 rootfstype=jffs2";
- linux,stdout-path = &serial0;
+ stdout-path = &serial0;
};
};
diff --git a/arch/powerpc/boot/dts/b4860emu.dts b/arch/powerpc/boot/dts/b4860emu.dts
deleted file mode 100644
index 85646b4f96e1..000000000000
--- a/arch/powerpc/boot/dts/b4860emu.dts
+++ /dev/null
@@ -1,223 +0,0 @@
-/*
- * B4860 emulator Device Tree Source
- *
- * Copyright 2013 Freescale Semiconductor Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Freescale Semiconductor nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- *
- * ALTERNATIVELY, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") as published by the Free Software
- * Foundation, either version 2 of that License or (at your option) any
- * later version.
- *
- * This software is provided by Freescale Semiconductor "as is" and any
- * express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are
- * disclaimed. In no event shall Freescale Semiconductor be liable for any
- * direct, indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused and
- * on any theory of liability, whether in contract, strict liability, or tort
- * (including negligence or otherwise) arising in any way out of the use of
- * this software, even if advised of the possibility of such damage.
- */
-
-/dts-v1/;
-
-/include/ "fsl/e6500_power_isa.dtsi"
-
-/ {
- compatible = "fsl,B4860";
- #address-cells = <2>;
- #size-cells = <2>;
- interrupt-parent = <&mpic>;
-
- aliases {
- ccsr = &soc;
-
- serial0 = &serial0;
- serial1 = &serial1;
- serial2 = &serial2;
- serial3 = &serial3;
- dma0 = &dma0;
- dma1 = &dma1;
- };
-
- cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- cpu0: PowerPC,e6500@0 {
- device_type = "cpu";
- reg = <0 1>;
- next-level-cache = <&L2>;
- fsl,portid-mapping = <0x80000000>;
- };
- cpu1: PowerPC,e6500@2 {
- device_type = "cpu";
- reg = <2 3>;
- next-level-cache = <&L2>;
- fsl,portid-mapping = <0x80000000>;
- };
- cpu2: PowerPC,e6500@4 {
- device_type = "cpu";
- reg = <4 5>;
- next-level-cache = <&L2>;
- fsl,portid-mapping = <0x80000000>;
- };
- cpu3: PowerPC,e6500@6 {
- device_type = "cpu";
- reg = <6 7>;
- next-level-cache = <&L2>;
- fsl,portid-mapping = <0x80000000>;
- };
- };
-};
-
-/ {
- model = "fsl,B4860QDS";
- compatible = "fsl,B4860EMU", "fsl,B4860QDS";
- #address-cells = <2>;
- #size-cells = <2>;
- interrupt-parent = <&mpic>;
-
- ifc: localbus@ffe124000 {
- reg = <0xf 0xfe124000 0 0x2000>;
- ranges = <0 0 0xf 0xe8000000 0x08000000
- 2 0 0xf 0xff800000 0x00010000
- 3 0 0xf 0xffdf0000 0x00008000>;
-
- nor@0,0 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "cfi-flash";
- reg = <0x0 0x0 0x8000000>;
- bank-width = <2>;
- device-width = <1>;
- };
- };
-
- memory {
- device_type = "memory";
- };
-
- soc: soc@ffe000000 {
- ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
- reg = <0xf 0xfe000000 0 0x00001000>;
- };
-};
-
-&ifc {
- #address-cells = <2>;
- #size-cells = <1>;
- compatible = "fsl,ifc", "simple-bus";
- interrupts = <25 2 0 0>;
-};
-
-&soc {
- #address-cells = <1>;
- #size-cells = <1>;
- device_type = "soc";
- compatible = "simple-bus";
-
- soc-sram-error {
- compatible = "fsl,soc-sram-error";
- interrupts = <16 2 1 2>;
- };
-
- corenet-law@0 {
- compatible = "fsl,corenet-law";
- reg = <0x0 0x1000>;
- fsl,num-laws = <32>;
- };
-
- ddr1: memory-controller@8000 {
- compatible = "fsl,qoriq-memory-controller-v4.5", "fsl,qoriq-memory-controller";
- reg = <0x8000 0x1000>;
- interrupts = <16 2 1 8>;
- };
-
- ddr2: memory-controller@9000 {
- compatible = "fsl,qoriq-memory-controller-v4.5","fsl,qoriq-memory-controller";
- reg = <0x9000 0x1000>;
- interrupts = <16 2 1 9>;
- };
-
- cpc: l3-cache-controller@10000 {
- compatible = "fsl,b4-l3-cache-controller", "cache";
- reg = <0x10000 0x1000
- 0x11000 0x1000>;
- interrupts = <16 2 1 4>;
- };
-
- corenet-cf@18000 {
- compatible = "fsl,corenet2-cf", "fsl,corenet-cf";
- reg = <0x18000 0x1000>;
- interrupts = <16 2 1 0>;
- fsl,ccf-num-csdids = <32>;
- fsl,ccf-num-snoopids = <32>;
- };
-
- iommu@20000 {
- compatible = "fsl,pamu-v1.0", "fsl,pamu";
- reg = <0x20000 0x4000>;
- fsl,portid-mapping = <0x8000>;
- #address-cells = <1>;
- #size-cells = <1>;
- interrupts = <
- 24 2 0 0
- 16 2 1 1>;
- pamu0: pamu@0 {
- reg = <0 0x1000>;
- fsl,primary-cache-geometry = <8 1>;
- fsl,secondary-cache-geometry = <32 2>;
- };
- };
-
-/include/ "fsl/qoriq-mpic.dtsi"
-
- guts: global-utilities@e0000 {
- compatible = "fsl,b4-device-config";
- reg = <0xe0000 0xe00>;
- fsl,has-rstcr;
- fsl,liodn-bits = <12>;
- };
-
- clockgen: global-utilities@e1000 {
- compatible = "fsl,b4-clockgen", "fsl,qoriq-clockgen-2.0";
- reg = <0xe1000 0x1000>;
- };
-
-/include/ "fsl/qoriq-dma-0.dtsi"
- dma@100300 {
- fsl,iommu-parent = <&pamu0>;
- fsl,liodn-reg = <&guts 0x580>; /* DMA1LIODNR */
- };
-
-/include/ "fsl/qoriq-dma-1.dtsi"
- dma@101300 {
- fsl,iommu-parent = <&pamu0>;
- fsl,liodn-reg = <&guts 0x584>; /* DMA2LIODNR */
- };
-
-/include/ "fsl/qoriq-i2c-0.dtsi"
-/include/ "fsl/qoriq-i2c-1.dtsi"
-/include/ "fsl/qoriq-duart-0.dtsi"
-/include/ "fsl/qoriq-duart-1.dtsi"
-
- L2: l2-cache-controller@c20000 {
- compatible = "fsl,b4-l2-cache-controller";
- reg = <0xc20000 0x1000>;
- next-level-cache = <&cpc>;
- };
-};
diff --git a/arch/powerpc/boot/dts/bamboo.dts b/arch/powerpc/boot/dts/bamboo.dts
index aa68911f6560..b5861fa3836c 100644
--- a/arch/powerpc/boot/dts/bamboo.dts
+++ b/arch/powerpc/boot/dts/bamboo.dts
@@ -268,8 +268,10 @@
/* Outbound ranges, one memory and one IO,
* later cannot be changed. Chip supports a second
* IO range but we don't use it for now
+ * The chip also supports a larger memory range but
+ * it's not naturally aligned, so our code will break
*/
- ranges = <0x02000000 0x00000000 0xa0000000 0x00000000 0xa0000000 0x00000000 0x40000000
+ ranges = <0x02000000 0x00000000 0xa0000000 0x00000000 0xa0000000 0x00000000 0x20000000
0x02000000 0x00000000 0x00000000 0x00000000 0xe0000000 0x00000000 0x00100000
0x01000000 0x00000000 0x00000000 0x00000000 0xe8000000 0x00000000 0x00010000>;
@@ -295,6 +297,6 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@ef600300";
+ stdout-path = "/plb/opb/serial@ef600300";
};
};
diff --git a/arch/powerpc/boot/dts/bluestone.dts b/arch/powerpc/boot/dts/bluestone.dts
index 7daaca324c01..6971595319c1 100644
--- a/arch/powerpc/boot/dts/bluestone.dts
+++ b/arch/powerpc/boot/dts/bluestone.dts
@@ -1,24 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Device Tree for Bluestone (APM821xx) board.
*
* Copyright (c) 2010, Applied Micro Circuits Corporation
* Author: Tirumala R Marri <tmarri@apm.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
- * MA 02111-1307 USA
- *
*/
/dts-v1/;
@@ -109,7 +94,7 @@
OCM: ocm@400040000 {
compatible = "ibm,ocm";
- status = "ok";
+ status = "okay";
cell-index = <1>;
/* configured in U-Boot */
reg = <4 0x00040000 0x8000>; /* 32K */
@@ -279,7 +264,7 @@
#address-cells = <1>;
#size-cells = <0>;
rtc@68 {
- compatible = "stm,m41t80";
+ compatible = "st,m41t80";
reg = <0x68>;
interrupt-parent = <&UIC0>;
interrupts = <0x9 0x8>;
@@ -340,7 +325,7 @@
};
};
- PCIE0: pciex@d00000000 {
+ PCIE0: pcie@d00000000 {
device_type = "pci";
#interrupt-cells = <1>;
#size-cells = <2>;
@@ -381,30 +366,5 @@
0x0 0x0 0x0 0x3 &UIC3 0xe 0x4 /* swizzled int C */
0x0 0x0 0x0 0x4 &UIC3 0xf 0x4 /* swizzled int D */>;
};
-
- MSI: ppc4xx-msi@C10000000 {
- compatible = "amcc,ppc4xx-msi", "ppc4xx-msi";
- reg = < 0xC 0x10000000 0x100
- 0xC 0x10000000 0x100>;
- sdr-base = <0x36C>;
- msi-data = <0x00004440>;
- msi-mask = <0x0000ffe0>;
- interrupts =<0 1 2 3 4 5 6 7>;
- interrupt-parent = <&MSI>;
- #interrupt-cells = <1>;
- #address-cells = <0>;
- #size-cells = <0>;
- msi-available-ranges = <0x0 0x100>;
- interrupt-map = <
- 0 &UIC3 0x18 1
- 1 &UIC3 0x19 1
- 2 &UIC3 0x1A 1
- 3 &UIC3 0x1B 1
- 4 &UIC3 0x1C 1
- 5 &UIC3 0x1D 1
- 6 &UIC3 0x1E 1
- 7 &UIC3 0x1F 1
- >;
- };
};
};
diff --git a/arch/powerpc/boot/dts/bsc9132qds.dts b/arch/powerpc/boot/dts/bsc9132qds.dts
deleted file mode 100644
index 6cab1062bc74..000000000000
--- a/arch/powerpc/boot/dts/bsc9132qds.dts
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * BSC9132 QDS Device Tree Source
- *
- * Copyright 2014 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-/include/ "fsl/bsc9132si-pre.dtsi"
-
-/ {
- model = "fsl,bsc9132qds";
- compatible = "fsl,bsc9132qds";
-
- memory {
- device_type = "memory";
- };
-
- ifc: ifc@ff71e000 {
- /* NOR, NAND Flash on board */
- ranges = <0x0 0x0 0x0 0x88000000 0x08000000
- 0x1 0x0 0x0 0xff800000 0x00010000>;
- reg = <0x0 0xff71e000 0x0 0x2000>;
- };
-
- soc: soc@ff700000 {
- ranges = <0x0 0x0 0xff700000 0x100000>;
- };
-};
-
-/include/ "bsc9132qds.dtsi"
-/include/ "fsl/bsc9132si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/c2k.dts b/arch/powerpc/boot/dts/c2k.dts
deleted file mode 100644
index 1e32903cb0a8..000000000000
--- a/arch/powerpc/boot/dts/c2k.dts
+++ /dev/null
@@ -1,366 +0,0 @@
-/* Device Tree Source for GEFanuc C2K
- *
- * Author: Remi Machet <rmachet@slac.stanford.edu>
- *
- * Originated from prpmc2800.dts
- *
- * 2008 (c) Stanford University
- * 2007 (c) MontaVista, Software, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- */
-
-/dts-v1/;
-
-/ {
- #address-cells = <1>;
- #size-cells = <1>;
- model = "C2K";
- compatible = "GEFanuc,C2K";
- coherency-off;
-
- aliases {
- pci0 = &PCI0;
- pci1 = &PCI1;
- };
-
- cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- cpu@0 {
- device_type = "cpu";
- compatible = "PowerPC,7447";
- reg = <0>;
- clock-frequency = <996000000>; /* 996 MHz */
- bus-frequency = <166666667>; /* 166.6666 MHz */
- timebase-frequency = <41666667>; /* 166.6666/4 MHz */
- i-cache-line-size = <32>;
- d-cache-line-size = <32>;
- i-cache-size = <32768>;
- d-cache-size = <32768>;
- };
- };
-
- memory {
- device_type = "memory";
- reg = <0x00000000 0x40000000>; /* 1GB */
- };
-
- system-controller@d8000000 { /* Marvell Discovery */
- #address-cells = <1>;
- #size-cells = <1>;
- model = "mv64460";
- compatible = "marvell,mv64360";
- clock-frequency = <166666667>; /* 166.66... MHz */
- reg = <0xd8000000 0x00010000>;
- virtual-reg = <0xd8000000>;
- ranges = <0xd4000000 0xd4000000 0x01000000 /* PCI 0 I/O Space */
- 0x80000000 0x80000000 0x08000000 /* PCI 0 MEM Space */
- 0xd0000000 0xd0000000 0x01000000 /* PCI 1 I/O Space */
- 0xa0000000 0xa0000000 0x08000000 /* PCI 1 MEM Space */
- 0xd8100000 0xd8100000 0x00010000 /* FPGA */
- 0xd8110000 0xd8110000 0x00010000 /* FPGA USARTs */
- 0xf8000000 0xf8000000 0x08000000 /* User FLASH */
- 0x00000000 0xd8000000 0x00010000 /* Bridge's regs */
- 0xd8140000 0xd8140000 0x00040000>; /* Integrated SRAM */
-
- mdio@2000 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "marvell,mv64360-mdio";
- reg = <0x2000 4>;
- PHY0: ethernet-phy@0 {
- interrupts = <76>; /* GPP 12 */
- interrupt-parent = <&PIC>;
- reg = <0>;
- };
- PHY1: ethernet-phy@1 {
- interrupts = <76>; /* GPP 12 */
- interrupt-parent = <&PIC>;
- reg = <1>;
- };
- PHY2: ethernet-phy@2 {
- interrupts = <76>; /* GPP 12 */
- interrupt-parent = <&PIC>;
- reg = <2>;
- };
- };
-
- ethernet-group@2000 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "marvell,mv64360-eth-group";
- reg = <0x2000 0x2000>;
- ethernet@0 {
- device_type = "network";
- compatible = "marvell,mv64360-eth";
- reg = <0>;
- interrupts = <32>;
- interrupt-parent = <&PIC>;
- phy = <&PHY0>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- };
- ethernet@1 {
- device_type = "network";
- compatible = "marvell,mv64360-eth";
- reg = <1>;
- interrupts = <33>;
- interrupt-parent = <&PIC>;
- phy = <&PHY1>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- };
- ethernet@2 {
- device_type = "network";
- compatible = "marvell,mv64360-eth";
- reg = <2>;
- interrupts = <34>;
- interrupt-parent = <&PIC>;
- phy = <&PHY2>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- };
- };
-
- SDMA0: sdma@4000 {
- compatible = "marvell,mv64360-sdma";
- reg = <0x4000 0xc18>;
- virtual-reg = <0xd8004000>;
- interrupt-base = <0>;
- interrupts = <36>;
- interrupt-parent = <&PIC>;
- };
-
- SDMA1: sdma@6000 {
- compatible = "marvell,mv64360-sdma";
- reg = <0x6000 0xc18>;
- virtual-reg = <0xd8006000>;
- interrupt-base = <0>;
- interrupts = <38>;
- interrupt-parent = <&PIC>;
- };
-
- BRG0: brg@b200 {
- compatible = "marvell,mv64360-brg";
- reg = <0xb200 0x8>;
- clock-src = <8>;
- clock-frequency = <133333333>;
- current-speed = <115200>;
- };
-
- BRG1: brg@b208 {
- compatible = "marvell,mv64360-brg";
- reg = <0xb208 0x8>;
- clock-src = <8>;
- clock-frequency = <133333333>;
- current-speed = <115200>;
- };
-
- CUNIT: cunit@f200 {
- reg = <0xf200 0x200>;
- };
-
- MPSCROUTING: mpscrouting@b400 {
- reg = <0xb400 0xc>;
- };
-
- MPSCINTR: mpscintr@b800 {
- reg = <0xb800 0x100>;
- virtual-reg = <0xd800b800>;
- };
-
- MPSC0: mpsc@8000 {
- compatible = "marvell,mv64360-mpsc";
- reg = <0x8000 0x38>;
- virtual-reg = <0xd8008000>;
- sdma = <&SDMA0>;
- brg = <&BRG0>;
- cunit = <&CUNIT>;
- mpscrouting = <&MPSCROUTING>;
- mpscintr = <&MPSCINTR>;
- cell-index = <0>;
- interrupts = <40>;
- interrupt-parent = <&PIC>;
- };
-
- MPSC1: mpsc@9000 {
- compatible = "marvell,mv64360-mpsc";
- reg = <0x9000 0x38>;
- virtual-reg = <0xd8009000>;
- sdma = <&SDMA1>;
- brg = <&BRG1>;
- cunit = <&CUNIT>;
- mpscrouting = <&MPSCROUTING>;
- mpscintr = <&MPSCINTR>;
- cell-index = <1>;
- interrupts = <42>;
- interrupt-parent = <&PIC>;
- };
-
- wdt@b410 { /* watchdog timer */
- compatible = "marvell,mv64360-wdt";
- reg = <0xb410 0x8>;
- };
-
- i2c@c000 {
- compatible = "marvell,mv64360-i2c";
- reg = <0xc000 0x20>;
- virtual-reg = <0xd800c000>;
- interrupts = <37>;
- interrupt-parent = <&PIC>;
- };
-
- PIC: pic {
- #interrupt-cells = <1>;
- #address-cells = <0>;
- compatible = "marvell,mv64360-pic";
- reg = <0x0000 0x88>;
- interrupt-controller;
- };
-
- mpp@f000 {
- compatible = "marvell,mv64360-mpp";
- reg = <0xf000 0x10>;
- };
-
- gpp@f100 {
- compatible = "marvell,mv64360-gpp";
- reg = <0xf100 0x20>;
- };
-
- PCI0: pci@80000000 {
- #address-cells = <3>;
- #size-cells = <2>;
- #interrupt-cells = <1>;
- device_type = "pci";
- compatible = "marvell,mv64360-pci";
- reg = <0x0cf8 0x8>;
- ranges = <0x01000000 0x0 0x00000000 0xd4000000 0x0 0x01000000
- 0x02000000 0x0 0x80000000 0x80000000 0x0 0x08000000>;
- bus-range = <0 255>;
- clock-frequency = <66000000>;
- interrupt-pci-iack = <0x0c34>;
- interrupt-parent = <&PIC>;
- interrupt-map-mask = <0x0000 0x0 0x0 0x7>;
- interrupt-map = <
- /* Only one interrupt line for PMC0 slot (INTA) */
- 0x0000 0 0 1 &PIC 88
- >;
- };
-
-
- PCI1: pci@a0000000 {
- #address-cells = <3>;
- #size-cells = <2>;
- #interrupt-cells = <1>;
- device_type = "pci";
- compatible = "marvell,mv64360-pci";
- reg = <0x0c78 0x8>;
- ranges = <0x01000000 0x0 0x00000000 0xd0000000 0x0 0x01000000
- 0x02000000 0x0 0x80000000 0xa0000000 0x0 0x08000000>;
- bus-range = <0 255>;
- clock-frequency = <66000000>;
- interrupt-pci-iack = <0x0cb4>;
- interrupt-parent = <&PIC>;
- interrupt-map-mask = <0xf800 0x00 0x00 0x7>;
- interrupt-map = <
- /* IDSEL 0x01: PMC1 ? */
- 0x0800 0 0 1 &PIC 88
- /* IDSEL 0x02: cPCI bridge */
- 0x1000 0 0 1 &PIC 88
- /* IDSEL 0x03: USB controller */
- 0x1800 0 0 1 &PIC 91
- /* IDSEL 0x04: SATA controller */
- 0x2000 0 0 1 &PIC 95
- >;
- };
-
- cpu-error@0070 {
- compatible = "marvell,mv64360-cpu-error";
- reg = <0x0070 0x10 0x0128 0x28>;
- interrupts = <3>;
- interrupt-parent = <&PIC>;
- };
-
- sram-ctrl@0380 {
- compatible = "marvell,mv64360-sram-ctrl";
- reg = <0x0380 0x80>;
- interrupts = <13>;
- interrupt-parent = <&PIC>;
- };
-
- pci-error@1d40 {
- compatible = "marvell,mv64360-pci-error";
- reg = <0x1d40 0x40 0x0c28 0x4>;
- interrupts = <12>;
- interrupt-parent = <&PIC>;
- };
-
- pci-error@1dc0 {
- compatible = "marvell,mv64360-pci-error";
- reg = <0x1dc0 0x40 0x0ca8 0x4>;
- interrupts = <16>;
- interrupt-parent = <&PIC>;
- };
-
- mem-ctrl@1400 {
- compatible = "marvell,mv64360-mem-ctrl";
- reg = <0x1400 0x60>;
- interrupts = <17>;
- interrupt-parent = <&PIC>;
- };
- /* Devices attached to the device controller */
- devicebus@045c {
- #address-cells = <2>;
- #size-cells = <1>;
- compatible = "marvell,mv64306-devctrl";
- reg = <0x45C 0x88>;
- interrupts = <1>;
- interrupt-parent = <&PIC>;
- ranges = <0 0 0xd8100000 0x10000
- 2 0 0xd8110000 0x10000
- 4 0 0xf8000000 0x8000000>;
- fpga@0,0 {
- compatible = "sbs,fpga-c2k";
- reg = <0 0 0x10000>;
- };
- fpga_usart@2,0 {
- compatible = "sbs,fpga_usart-c2k";
- reg = <2 0 0x10000>;
- };
- nor_flash@4,0 {
- compatible = "cfi-flash";
- reg = <4 0 0x8000000>; /* 128MB */
- bank-width = <4>;
- device-width = <1>;
- #address-cells = <1>;
- #size-cells = <1>;
- partition@0 {
- label = "boot";
- reg = <0x00000000 0x00080000>;
- };
- partition@40000 {
- label = "kernel";
- reg = <0x00080000 0x00400000>;
- };
- partition@440000 {
- label = "initrd";
- reg = <0x00480000 0x00B80000>;
- };
- partition@1000000 {
- label = "rootfs";
- reg = <0x01000000 0x06800000>;
- };
- partition@7800000 {
- label = "recovery";
- reg = <0x07800000 0x00800000>;
- read-only;
- };
- };
- };
- };
- chosen {
- linux,stdout-path = &MPSC0;
- };
-};
diff --git a/arch/powerpc/boot/dts/canyonlands.dts b/arch/powerpc/boot/dts/canyonlands.dts
index 3dc75deafbb3..5db1bff6b23d 100644
--- a/arch/powerpc/boot/dts/canyonlands.dts
+++ b/arch/powerpc/boot/dts/canyonlands.dts
@@ -190,12 +190,21 @@
/* DMA */ 0x2 &UIC0 0xc 0x4>;
};
+ AHBDMA: dma@bffd0800 {
+ compatible = "snps,dma-spear1340";
+ reg = <4 0xbffd0800 0x400>;
+ interrupt-parent = <&UIC3>;
+ interrupts = <0x5 0x4>;
+ #dma-cells = <3>;
+ };
+
SATA0: sata@bffd1000 {
compatible = "amcc,sata-460ex";
- reg = <4 0xbffd1000 0x800 4 0xbffd0800 0x400>;
+ reg = <4 0xbffd1000 0x800>;
interrupt-parent = <&UIC3>;
- interrupts = <0x0 0x4 /* SATA */
- 0x5 0x4>; /* AHBDMA */
+ interrupts = <0x0 0x4>;
+ dmas = <&AHBDMA 0 1 0>;
+ dma-names = "sata-dma";
};
POB0: opb {
@@ -310,7 +319,7 @@
#address-cells = <1>;
#size-cells = <0>;
rtc@68 {
- compatible = "stm,m41t80";
+ compatible = "st,m41t80";
reg = <0x68>;
interrupt-parent = <&UIC2>;
interrupts = <0x19 0x8>;
@@ -452,7 +461,7 @@
interrupt-map = < 0x0 0x0 0x0 0x0 &UIC1 0x0 0x8 >;
};
- PCIE0: pciex@d00000000 {
+ PCIE0: pcie@d00000000 {
device_type = "pci";
#interrupt-cells = <1>;
#size-cells = <2>;
@@ -494,7 +503,7 @@
0x0 0x0 0x0 0x4 &UIC3 0xf 0x4 /* swizzled int D */>;
};
- PCIE1: pciex@d20000000 {
+ PCIE1: pcie@d20000000 {
device_type = "pci";
#interrupt-cells = <1>;
#size-cells = <2>;
@@ -535,23 +544,5 @@
0x0 0x0 0x0 0x3 &UIC3 0x12 0x4 /* swizzled int C */
0x0 0x0 0x0 0x4 &UIC3 0x13 0x4 /* swizzled int D */>;
};
-
- MSI: ppc4xx-msi@C10000000 {
- compatible = "amcc,ppc4xx-msi", "ppc4xx-msi";
- reg = < 0xC 0x10000000 0x100>;
- sdr-base = <0x36C>;
- msi-data = <0x00000000>;
- msi-mask = <0x44440000>;
- interrupt-count = <3>;
- interrupts = <0 1 2 3>;
- interrupt-parent = <&UIC3>;
- #interrupt-cells = <1>;
- #address-cells = <0>;
- #size-cells = <0>;
- interrupt-map = <0 &UIC3 0x18 1
- 1 &UIC3 0x19 1
- 2 &UIC3 0x1A 1
- 3 &UIC3 0x1B 1>;
- };
};
};
diff --git a/arch/powerpc/boot/dts/charon.dts b/arch/powerpc/boot/dts/charon.dts
index 0e00e508eaa6..ea6e76ae2545 100644
--- a/arch/powerpc/boot/dts/charon.dts
+++ b/arch/powerpc/boot/dts/charon.dts
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* charon board Device Tree Source
*
@@ -6,11 +7,6 @@
*
* Copyright (C) 2010 DENX Software Engineering GmbH
* Heiko Schocher <hs@denx.de>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/dts-v1/;
@@ -39,7 +35,7 @@
};
};
- memory {
+ memory@0 {
device_type = "memory";
reg = <0x00000000 0x08000000>; // 128MB
};
@@ -229,8 +225,8 @@
clock-frequency = <0>; // From boot loader
interrupts = <2 8 0 2 9 0 2 10 0>;
bus-range = <0 0>;
- ranges = <0x42000000 0 0x80000000 0x80000000 0 0x10000000
- 0x02000000 0 0x90000000 0x90000000 0 0x10000000
- 0x01000000 0 0x00000000 0xa0000000 0 0x01000000>;
+ ranges = <0x42000000 0 0x80000000 0x80000000 0 0x10000000>,
+ <0x02000000 0 0x90000000 0x90000000 0 0x10000000>,
+ <0x01000000 0 0x00000000 0xa0000000 0 0x01000000>;
};
};
diff --git a/arch/powerpc/boot/dts/cm5200.dts b/arch/powerpc/boot/dts/cm5200.dts
index fb580dd84ddf..66cae7be60c4 100644
--- a/arch/powerpc/boot/dts/cm5200.dts
+++ b/arch/powerpc/boot/dts/cm5200.dts
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* CM5200 board Device Tree Source
*
* Copyright (C) 2007 Semihalf
* Marian Balakowicz <m8@semihalf.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/include/ "mpc5200b.dtsi"
diff --git a/arch/powerpc/boot/dts/currituck.dts b/arch/powerpc/boot/dts/currituck.dts
index d2c8a872308e..aea8af810106 100644
--- a/arch/powerpc/boot/dts/currituck.dts
+++ b/arch/powerpc/boot/dts/currituck.dts
@@ -39,7 +39,7 @@
d-cache-size = <32768>;
dcr-controller;
dcr-access-method = "native";
- status = "ok";
+ status = "okay";
};
cpu@1 {
device_type = "cpu";
@@ -108,7 +108,7 @@
reg = <0x50000000 0x4>;
};
- IIC0: i2c@00000000 {
+ IIC0: i2c@0 {
compatible = "ibm,iic-currituck", "ibm,iic";
reg = <0x0 0x00000014>;
interrupt-parent = <&MPIC>;
@@ -116,13 +116,13 @@
#address-cells = <1>;
#size-cells = <0>;
rtc@68 {
- compatible = "stm,m41t80", "m41st85";
+ compatible = "st,m41t80", "m41st85";
reg = <0x68>;
};
};
};
- PCIE0: pciex@10100000000 { // 4xGBIF1
+ PCIE0: pcie@10100000000 { // 4xGBIF1
device_type = "pci";
#interrupt-cells = <1>;
#size-cells = <2>;
@@ -160,7 +160,7 @@
0x0 0x0 0x0 0x4 &MPIC 49 0x2 /* int D */>;
};
- PCIE1: pciex@30100000000 { // 4xGBIF0
+ PCIE1: pcie@30100000000 { // 4xGBIF0
device_type = "pci";
#interrupt-cells = <1>;
#size-cells = <2>;
@@ -197,7 +197,7 @@
0x0 0x0 0x0 0x4 &MPIC 41 0x2 /* int D */>;
};
- PCIE2: pciex@38100000000 { // 2xGBIF0
+ PCIE2: pcie@38100000000 { // 2xGBIF0
device_type = "pci";
#interrupt-cells = <1>;
#size-cells = <2>;
@@ -237,6 +237,6 @@
};
chosen {
- linux,stdout-path = &UART0;
+ stdout-path = &UART0;
};
};
diff --git a/arch/powerpc/boot/dts/digsy_mtc.dts b/arch/powerpc/boot/dts/digsy_mtc.dts
index 955bff629df3..dfaf974c0ce6 100644
--- a/arch/powerpc/boot/dts/digsy_mtc.dts
+++ b/arch/powerpc/boot/dts/digsy_mtc.dts
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Digsy MTC board Device Tree Source
*
* Copyright (C) 2009 Semihalf
*
* Based on the CM5200 by M. Balakowicz
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/include/ "mpc5200b.dtsi"
@@ -20,7 +16,7 @@
model = "intercontrol,digsy-mtc";
compatible = "intercontrol,digsy-mtc";
- memory {
+ memory@0 {
reg = <0x00000000 0x02000000>; // 32MB
};
@@ -29,14 +25,6 @@
status = "disabled";
};
- spi@f00 {
- msp430@0 {
- compatible = "spidev";
- spi-max-frequency = <32000>;
- reg = <0>;
- };
- };
-
psc@2000 { // PSC1
status = "disabled";
};
@@ -73,12 +61,12 @@
i2c@3d00 {
eeprom@50 {
- compatible = "at,24c08";
+ compatible = "atmel,24c08";
reg = <0x50>;
};
rtc@56 {
- compatible = "mc,rv3029c2";
+ compatible = "microcrystal,rv3029";
reg = <0x56>;
};
@@ -102,9 +90,9 @@
clock-frequency = <0>; // From boot loader
interrupts = <2 8 0 2 9 0 2 10 0>;
bus-range = <0 0>;
- ranges = <0x42000000 0 0x80000000 0x80000000 0 0x10000000
- 0x02000000 0 0x90000000 0x90000000 0 0x10000000
- 0x01000000 0 0x00000000 0xa0000000 0 0x01000000>;
+ ranges = <0x42000000 0 0x80000000 0x80000000 0 0x10000000>,
+ <0x02000000 0 0x90000000 0x90000000 0 0x10000000>,
+ <0x01000000 0 0x00000000 0xa0000000 0 0x01000000>;
};
localbus {
diff --git a/arch/powerpc/boot/dts/ebony.dts b/arch/powerpc/boot/dts/ebony.dts
index ec2d142291b4..5d11e6ea7405 100644
--- a/arch/powerpc/boot/dts/ebony.dts
+++ b/arch/powerpc/boot/dts/ebony.dts
@@ -332,6 +332,6 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@40000200";
+ stdout-path = "/plb/opb/serial@40000200";
};
};
diff --git a/arch/powerpc/boot/dts/eiger.dts b/arch/powerpc/boot/dts/eiger.dts
index 48bcf7187924..7a1231d9d6f0 100644
--- a/arch/powerpc/boot/dts/eiger.dts
+++ b/arch/powerpc/boot/dts/eiger.dts
@@ -421,7 +421,7 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@ef600200";
+ stdout-path = "/plb/opb/serial@ef600200";
};
};
diff --git a/arch/powerpc/boot/dts/ep405.dts b/arch/powerpc/boot/dts/ep405.dts
deleted file mode 100644
index 53ef06cc2134..000000000000
--- a/arch/powerpc/boot/dts/ep405.dts
+++ /dev/null
@@ -1,230 +0,0 @@
-/*
- * Device Tree Source for EP405
- *
- * Copyright 2007 IBM Corp.
- * Benjamin Herrenschmidt <benh@kernel.crashing.org>
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without
- * any warranty of any kind, whether express or implied.
- */
-
-/dts-v1/;
-
-/ {
- #address-cells = <1>;
- #size-cells = <1>;
- model = "ep405";
- compatible = "ep405";
- dcr-parent = <&{/cpus/cpu@0}>;
-
- aliases {
- ethernet0 = &EMAC;
- serial0 = &UART0;
- serial1 = &UART1;
- };
-
- cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- cpu@0 {
- device_type = "cpu";
- model = "PowerPC,405GP";
- reg = <0x00000000>;
- clock-frequency = <200000000>; /* Filled in by zImage */
- timebase-frequency = <0>; /* Filled in by zImage */
- i-cache-line-size = <32>;
- d-cache-line-size = <32>;
- i-cache-size = <16384>;
- d-cache-size = <16384>;
- dcr-controller;
- dcr-access-method = "native";
- };
- };
-
- memory {
- device_type = "memory";
- reg = <0x00000000 0x00000000>; /* Filled in by zImage */
- };
-
- UIC0: interrupt-controller {
- compatible = "ibm,uic";
- interrupt-controller;
- cell-index = <0>;
- dcr-reg = <0x0c0 0x009>;
- #address-cells = <0>;
- #size-cells = <0>;
- #interrupt-cells = <2>;
- };
-
- plb {
- compatible = "ibm,plb3";
- #address-cells = <1>;
- #size-cells = <1>;
- ranges;
- clock-frequency = <0>; /* Filled in by zImage */
-
- SDRAM0: memory-controller {
- compatible = "ibm,sdram-405gp";
- dcr-reg = <0x010 0x002>;
- };
-
- MAL: mcmal {
- compatible = "ibm,mcmal-405gp", "ibm,mcmal";
- dcr-reg = <0x180 0x062>;
- num-tx-chans = <1>;
- num-rx-chans = <1>;
- interrupt-parent = <&UIC0>;
- interrupts = <
- 0xb 0x4 /* TXEOB */
- 0xc 0x4 /* RXEOB */
- 0xa 0x4 /* SERR */
- 0xd 0x4 /* TXDE */
- 0xe 0x4 /* RXDE */>;
- };
-
- POB0: opb {
- compatible = "ibm,opb-405gp", "ibm,opb";
- #address-cells = <1>;
- #size-cells = <1>;
- ranges = <0xef600000 0xef600000 0x00a00000>;
- dcr-reg = <0x0a0 0x005>;
- clock-frequency = <0>; /* Filled in by zImage */
-
- UART0: serial@ef600300 {
- device_type = "serial";
- compatible = "ns16550";
- reg = <0xef600300 0x00000008>;
- virtual-reg = <0xef600300>;
- clock-frequency = <0>; /* Filled in by zImage */
- current-speed = <9600>;
- interrupt-parent = <&UIC0>;
- interrupts = <0x0 0x4>;
- };
-
- UART1: serial@ef600400 {
- device_type = "serial";
- compatible = "ns16550";
- reg = <0xef600400 0x00000008>;
- virtual-reg = <0xef600400>;
- clock-frequency = <0>; /* Filled in by zImage */
- current-speed = <9600>;
- interrupt-parent = <&UIC0>;
- interrupts = <0x1 0x4>;
- };
-
- IIC: i2c@ef600500 {
- compatible = "ibm,iic-405gp", "ibm,iic";
- reg = <0xef600500 0x00000011>;
- interrupt-parent = <&UIC0>;
- interrupts = <0x2 0x4>;
- };
-
- GPIO: gpio@ef600700 {
- compatible = "ibm,gpio-405gp";
- reg = <0xef600700 0x00000020>;
- };
-
- EMAC: ethernet@ef600800 {
- linux,network-index = <0x0>;
- device_type = "network";
- compatible = "ibm,emac-405gp", "ibm,emac";
- interrupt-parent = <&UIC0>;
- interrupts = <
- 0xf 0x4 /* Ethernet */
- 0x9 0x4 /* Ethernet Wake Up */>;
- local-mac-address = [000000000000]; /* Filled in by zImage */
- reg = <0xef600800 0x00000070>;
- mal-device = <&MAL>;
- mal-tx-channel = <0>;
- mal-rx-channel = <0>;
- cell-index = <0>;
- max-frame-size = <1500>;
- rx-fifo-size = <4096>;
- tx-fifo-size = <2048>;
- phy-mode = "rmii";
- phy-map = <0x00000000>;
- };
-
- };
-
- EBC0: ebc {
- compatible = "ibm,ebc-405gp", "ibm,ebc";
- dcr-reg = <0x012 0x002>;
- #address-cells = <2>;
- #size-cells = <1>;
-
-
- /* The ranges property is supplied by the bootwrapper
- * and is based on the firmware's configuration of the
- * EBC bridge
- */
- clock-frequency = <0>; /* Filled in by zImage */
-
- /* NVRAM and RTC */
- nvrtc@4,200000 {
- compatible = "ds1742";
- reg = <0x00000004 0x00200000 0x00000000>; /* size fixed up by zImage */
- };
-
- /* "BCSR" CPLD contains a PCI irq controller */
- bcsr@4,0 {
- compatible = "ep405-bcsr";
- reg = <0x00000004 0x00000000 0x00000010>;
- interrupt-controller;
- /* Routing table */
- irq-routing = [ 00 /* SYSERR */
- 01 /* STTM */
- 01 /* RTC */
- 01 /* FENET */
- 02 /* NB PCIIRQ mux ? */
- 03 /* SB Winbond 8259 ? */
- 04 /* Serial Ring */
- 05 /* USB (ep405pc) */
- 06 /* XIRQ 0 */
- 06 /* XIRQ 1 */
- 06 /* XIRQ 2 */
- 06 /* XIRQ 3 */
- 06 /* XIRQ 4 */
- 06 /* XIRQ 5 */
- 06 /* XIRQ 6 */
- 07]; /* Reserved */
- };
- };
-
- PCI0: pci@ec000000 {
- device_type = "pci";
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- compatible = "ibm,plb405gp-pci", "ibm,plb-pci";
- primary;
- reg = <0xeec00000 0x00000008 /* Config space access */
- 0xeed80000 0x00000004 /* IACK */
- 0xeed80000 0x00000004 /* Special cycle */
- 0xef480000 0x00000040>; /* Internal registers */
-
- /* Outbound ranges, one memory and one IO,
- * later cannot be changed. Chip supports a second
- * IO range but we don't use it for now
- */
- ranges = <0x02000000 0x00000000 0x80000000 0x80000000 0x00000000 0x20000000
- 0x01000000 0x00000000 0x00000000 0xe8000000 0x00000000 0x00010000>;
-
- /* Inbound 2GB range starting at 0 */
- dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x80000000>;
-
- /* That's all I know about IRQs on that thing ... */
- interrupt-map-mask = <0xf800 0x0 0x0 0x0>;
- interrupt-map = <
- /* USB */
- 0x7000 0x0 0x0 0x0 &UIC0 0x1e 0x8 /* IRQ5 */
- >;
- };
- };
-
- chosen {
- linux,stdout-path = "/plb/opb/serial@ef600300";
- };
-};
diff --git a/arch/powerpc/boot/dts/ep8248e.dts b/arch/powerpc/boot/dts/ep8248e.dts
index 8b3a49f34f5a..9ae2d92f54f0 100644
--- a/arch/powerpc/boot/dts/ep8248e.dts
+++ b/arch/powerpc/boot/dts/ep8248e.dts
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Device Tree for the Embedded Planet EP8248E board running PlanetCore.
*
* Copyright 2007 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/dts-v1/;
diff --git a/arch/powerpc/boot/dts/ep88xc.dts b/arch/powerpc/boot/dts/ep88xc.dts
index 2aa5bf559645..b6b7e97876ad 100644
--- a/arch/powerpc/boot/dts/ep88xc.dts
+++ b/arch/powerpc/boot/dts/ep88xc.dts
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* EP88xC Device Tree Source
*
* Copyright 2006 MontaVista Software, Inc.
* Copyright 2007,2008 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/dts-v1/;
diff --git a/arch/powerpc/boot/dts/fsl/Makefile b/arch/powerpc/boot/dts/fsl/Makefile
new file mode 100644
index 000000000000..d3ecdf14bc42
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+
+dtb-$(CONFIG_OF_ALL_DTBS) := $(patsubst $(src)/%.dts,%.dtb, $(wildcard $(src)/*.dts))
diff --git a/arch/powerpc/boot/dts/b4420qds.dts b/arch/powerpc/boot/dts/fsl/b4420qds.dts
index 508dbdf33c81..cd9203ceedc0 100644
--- a/arch/powerpc/boot/dts/b4420qds.dts
+++ b/arch/powerpc/boot/dts/fsl/b4420qds.dts
@@ -32,7 +32,7 @@
* this software, even if advised of the possibility of such damage.
*/
-/include/ "fsl/b4420si-pre.dtsi"
+/include/ "b4420si-pre.dtsi"
/include/ "b4qds.dtsi"
/ {
@@ -47,4 +47,4 @@
};
-/include/ "fsl/b4420si-post.dtsi"
+/include/ "b4420si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/b4420si-post.dtsi b/arch/powerpc/boot/dts/fsl/b4420si-post.dtsi
index d67894459ac8..f996cced45e0 100644
--- a/arch/powerpc/boot/dts/fsl/b4420si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/b4420si-post.dtsi
@@ -80,51 +80,18 @@
compatible = "fsl,b4420-device-config", "fsl,qoriq-device-config-2.0";
};
- clockgen: global-utilities@e1000 {
- compatible = "fsl,b4420-clockgen", "fsl,qoriq-clockgen-2.0";
- ranges = <0x0 0xe1000 0x1000>;
- #address-cells = <1>;
- #size-cells = <1>;
-
- sysclk: sysclk {
- #clock-cells = <0>;
- compatible = "fsl,qoriq-sysclk-2.0";
- clock-output-names = "sysclk";
- };
-
- pll0: pll0@800 {
- #clock-cells = <1>;
- reg = <0x800 0x4>;
- compatible = "fsl,qoriq-core-pll-2.0";
- clocks = <&sysclk>;
- clock-output-names = "pll0", "pll0-div2", "pll0-div4";
- };
-
- pll1: pll1@820 {
- #clock-cells = <1>;
- reg = <0x820 0x4>;
- compatible = "fsl,qoriq-core-pll-2.0";
- clocks = <&sysclk>;
- clock-output-names = "pll1", "pll1-div2", "pll1-div4";
- };
-
- mux0: mux0@0 {
- #clock-cells = <0>;
- reg = <0x0 0x4>;
- compatible = "fsl,qoriq-core-mux-2.0";
- clocks = <&pll0 0>, <&pll0 1>, <&pll0 2>,
- <&pll1 0>, <&pll1 1>, <&pll1 2>;
- clock-names = "pll0", "pll0-div2", "pll0-div4",
- "pll1", "pll1-div2", "pll1-div4";
- clock-output-names = "cmux0";
- };
+ global-utilities@e1000 {
+ compatible = "fsl,b4420-clockgen", "fsl,b4-clockgen",
+ "fsl,qoriq-clockgen-2.0";
};
rcpm: global-utilities@e2000 {
compatible = "fsl,b4420-rcpm", "fsl,qoriq-rcpm-2.0";
};
- L2: l2-cache-controller@c20000 {
+ L2_1: l2-cache-controller@c20000 {
compatible = "fsl,b4420-l2-cache-controller";
+ reg = <0xc20000 0x40000>;
+ next-level-cache = <&cpc>;
};
};
diff --git a/arch/powerpc/boot/dts/fsl/b4420si-pre.dtsi b/arch/powerpc/boot/dts/fsl/b4420si-pre.dtsi
index 338af7e39dd9..bb7b9b9f3f5f 100644
--- a/arch/powerpc/boot/dts/fsl/b4420si-pre.dtsi
+++ b/arch/powerpc/boot/dts/fsl/b4420si-pre.dtsi
@@ -1,7 +1,7 @@
/*
* B4420 Silicon/SoC Device Tree Source (pre include)
*
- * Copyright 2012 Freescale Semiconductor, Inc.
+ * Copyright 2012 - 2015 Freescale Semiconductor, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -51,11 +51,17 @@
serial2 = &serial2;
serial3 = &serial3;
pci0 = &pci0;
+ usb0 = &usb0;
dma0 = &dma0;
dma1 = &dma1;
sdhc = &sdhc;
- };
+ fman0 = &fman0;
+ ethernet0 = &enet0;
+ ethernet1 = &enet1;
+ ethernet2 = &enet2;
+ ethernet3 = &enet3;
+ };
cpus {
#address-cells = <1>;
@@ -64,15 +70,15 @@
cpu0: PowerPC,e6500@0 {
device_type = "cpu";
reg = <0 1>;
- clocks = <&mux0>;
- next-level-cache = <&L2>;
+ clocks = <&clockgen 1 0>;
+ next-level-cache = <&L2_1>;
fsl,portid-mapping = <0x80000000>;
};
cpu1: PowerPC,e6500@2 {
device_type = "cpu";
reg = <2 3>;
- clocks = <&mux0>;
- next-level-cache = <&L2>;
+ clocks = <&clockgen 1 0>;
+ next-level-cache = <&L2_1>;
fsl,portid-mapping = <0x80000000>;
};
};
diff --git a/arch/powerpc/boot/dts/fsl/b4860qds.dts b/arch/powerpc/boot/dts/fsl/b4860qds.dts
new file mode 100644
index 000000000000..a8bc419959ca
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/b4860qds.dts
@@ -0,0 +1,117 @@
+/*
+ * B4860DS Device Tree Source
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "b4860si-pre.dtsi"
+/include/ "b4qds.dtsi"
+
+/ {
+ model = "fsl,B4860QDS";
+ compatible = "fsl,B4860QDS";
+
+ aliases {
+ phy_sgmii_1e = &phy_sgmii_1e;
+ phy_sgmii_1f = &phy_sgmii_1f;
+ phy_xaui_slot1 = &phy_xaui_slot1;
+ phy_xaui_slot2 = &phy_xaui_slot2;
+ };
+
+ ifc: localbus@ffe124000 {
+ board-control@3,0 {
+ compatible = "fsl,b4860qds-fpga", "fsl,fpga-qixis";
+ };
+ };
+
+ soc@ffe000000 {
+ fman@400000 {
+ ethernet@e8000 {
+ phy-handle = <&phy_sgmii_1e>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@ea000 {
+ phy-handle = <&phy_sgmii_1f>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@f0000 {
+ phy-handle = <&phy_xaui_slot1>;
+ phy-connection-type = "xgmii";
+ };
+
+ ethernet@f2000 {
+ phy-handle = <&phy_xaui_slot2>;
+ phy-connection-type = "xgmii";
+ };
+
+ mdio@fc000 {
+ phy_sgmii_1e: ethernet-phy@1e {
+ reg = <0x1e>;
+ status = "disabled";
+ };
+
+ phy_sgmii_1f: ethernet-phy@1f {
+ reg = <0x1f>;
+ status = "disabled";
+ };
+ };
+
+ mdio@fd000 {
+ phy_xaui_slot1: xaui-phy@slot1 {
+ compatible = "ethernet-phy-ieee802.3-c45";
+ reg = <0x7>;
+ status = "disabled";
+ };
+
+ phy_xaui_slot2: xaui-phy@slot2 {
+ compatible = "ethernet-phy-ieee802.3-c45";
+ reg = <0x6>;
+ status = "disabled";
+ };
+ };
+ };
+ };
+
+ rio: rapidio@ffe0c0000 {
+ reg = <0xf 0xfe0c0000 0 0x11000>;
+
+ port1 {
+ ranges = <0 0 0xc 0x20000000 0 0x10000000>;
+ };
+ port2 {
+ ranges = <0 0 0xc 0x30000000 0 0x10000000>;
+ };
+ };
+};
+
+/include/ "b4860si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi b/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi
index 582381dba1d7..868719821106 100644
--- a/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/b4860si-post.dtsi
@@ -1,7 +1,7 @@
/*
* B4860 Silicon/SoC Device Tree Source (post include)
*
- * Copyright 2012 Freescale Semiconductor Inc.
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -51,14 +51,12 @@
#address-cells = <2>;
#size-cells = <2>;
cell-index = <1>;
- fsl,liodn-reg = <&guts 0x510>; /* RIO1LIODNR */
};
port2 {
#address-cells = <2>;
#size-cells = <2>;
cell-index = <2>;
- fsl,liodn-reg = <&guts 0x514>; /* RIO2LIODNR */
};
};
@@ -109,6 +107,133 @@
};
};
+&bportals {
+ bman-portal@38000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x38000 0x4000>, <0x100e000 0x1000>;
+ interrupts = <133 2 0 0>;
+ };
+ bman-portal@3c000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x3c000 0x4000>, <0x100f000 0x1000>;
+ interrupts = <135 2 0 0>;
+ };
+ bman-portal@40000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x40000 0x4000>, <0x1010000 0x1000>;
+ interrupts = <137 2 0 0>;
+ };
+ bman-portal@44000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x44000 0x4000>, <0x1011000 0x1000>;
+ interrupts = <139 2 0 0>;
+ };
+ bman-portal@48000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x48000 0x4000>, <0x1012000 0x1000>;
+ interrupts = <141 2 0 0>;
+ };
+ bman-portal@4c000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x4c000 0x4000>, <0x1013000 0x1000>;
+ interrupts = <143 2 0 0>;
+ };
+ bman-portal@50000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x50000 0x4000>, <0x1014000 0x1000>;
+ interrupts = <145 2 0 0>;
+ };
+ bman-portal@54000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x54000 0x4000>, <0x1015000 0x1000>;
+ interrupts = <147 2 0 0>;
+ };
+ bman-portal@58000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x58000 0x4000>, <0x1016000 0x1000>;
+ interrupts = <149 2 0 0>;
+ };
+ bman-portal@5c000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x5c000 0x4000>, <0x1017000 0x1000>;
+ interrupts = <151 2 0 0>;
+ };
+ bman-portal@60000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x60000 0x4000>, <0x1018000 0x1000>;
+ interrupts = <153 2 0 0>;
+ };
+};
+
+&qportals {
+ qportal14: qman-portal@38000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x38000 0x4000>, <0x100e000 0x1000>;
+ interrupts = <132 0x2 0 0>;
+ cell-index = <0xe>;
+ };
+ qportal15: qman-portal@3c000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x3c000 0x4000>, <0x100f000 0x1000>;
+ interrupts = <134 0x2 0 0>;
+ cell-index = <0xf>;
+ };
+ qportal16: qman-portal@40000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x40000 0x4000>, <0x1010000 0x1000>;
+ interrupts = <136 0x2 0 0>;
+ cell-index = <0x10>;
+ };
+ qportal17: qman-portal@44000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x44000 0x4000>, <0x1011000 0x1000>;
+ interrupts = <138 0x2 0 0>;
+ cell-index = <0x11>;
+ };
+ qportal18: qman-portal@48000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x48000 0x4000>, <0x1012000 0x1000>;
+ interrupts = <140 0x2 0 0>;
+ cell-index = <0x12>;
+ };
+ qportal19: qman-portal@4c000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x4c000 0x4000>, <0x1013000 0x1000>;
+ interrupts = <142 0x2 0 0>;
+ cell-index = <0x13>;
+ };
+ qportal20: qman-portal@50000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x50000 0x4000>, <0x1014000 0x1000>;
+ interrupts = <144 0x2 0 0>;
+ cell-index = <0x14>;
+ };
+ qportal21: qman-portal@54000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x54000 0x4000>, <0x1015000 0x1000>;
+ interrupts = <146 0x2 0 0>;
+ cell-index = <0x15>;
+ };
+ qportal22: qman-portal@58000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x58000 0x4000>, <0x1016000 0x1000>;
+ interrupts = <148 0x2 0 0>;
+ cell-index = <0x16>;
+ };
+ qportal23: qman-portal@5c000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x5c000 0x4000>, <0x1017000 0x1000>;
+ interrupts = <150 0x2 0 0>;
+ cell-index = <0x17>;
+ };
+ qportal24: qman-portal@60000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x60000 0x4000>, <0x1018000 0x1000>;
+ interrupts = <152 0x2 0 0>;
+ cell-index = <0x18>;
+ };
+};
+
&soc {
ddr2: memory-controller@9000 {
compatible = "fsl,qoriq-memory-controller-v4.5", "fsl,qoriq-memory-controller";
@@ -124,51 +249,36 @@
compatible = "fsl,b4860-device-config", "fsl,qoriq-device-config-2.0";
};
- clockgen: global-utilities@e1000 {
- compatible = "fsl,b4860-clockgen", "fsl,qoriq-clockgen-2.0";
- ranges = <0x0 0xe1000 0x1000>;
- #address-cells = <1>;
- #size-cells = <1>;
+ global-utilities@e1000 {
+ compatible = "fsl,b4860-clockgen", "fsl,b4-clockgen",
+ "fsl,qoriq-clockgen-2.0";
+ };
- sysclk: sysclk {
- #clock-cells = <0>;
- compatible = "fsl,qoriq-sysclk-2.0";
- clock-output-names = "sysclk";
- };
+ rcpm: global-utilities@e2000 {
+ compatible = "fsl,b4860-rcpm", "fsl,qoriq-rcpm-2.0";
+ };
- pll0: pll0@800 {
- #clock-cells = <1>;
- reg = <0x800 0x4>;
- compatible = "fsl,qoriq-core-pll-2.0";
- clocks = <&sysclk>;
- clock-output-names = "pll0", "pll0-div2", "pll0-div4";
+/include/ "qoriq-fman3-0-1g-4.dtsi"
+/include/ "qoriq-fman3-0-1g-5.dtsi"
+/include/ "qoriq-fman3-0-10g-0.dtsi"
+/include/ "qoriq-fman3-0-10g-1.dtsi"
+ fman@400000 {
+ enet4: ethernet@e8000 {
};
- pll1: pll1@820 {
- #clock-cells = <1>;
- reg = <0x820 0x4>;
- compatible = "fsl,qoriq-core-pll-2.0";
- clocks = <&sysclk>;
- clock-output-names = "pll1", "pll1-div2", "pll1-div4";
+ enet5: ethernet@ea000 {
};
- mux0: mux0@0 {
- #clock-cells = <0>;
- reg = <0x0 0x4>;
- compatible = "fsl,qoriq-core-mux-2.0";
- clocks = <&pll0 0>, <&pll0 1>, <&pll0 2>,
- <&pll1 0>, <&pll1 1>, <&pll1 2>;
- clock-names = "pll0", "pll0-div2", "pll0-div4",
- "pll1", "pll1-div2", "pll1-div4";
- clock-output-names = "cmux0";
+ enet6: ethernet@f0000 {
};
- };
- rcpm: global-utilities@e2000 {
- compatible = "fsl,b4860-rcpm", "fsl,qoriq-rcpm-2.0";
+ enet7: ethernet@f2000 {
+ };
};
- L2: l2-cache-controller@c20000 {
+ L2_1: l2-cache-controller@c20000 {
compatible = "fsl,b4860-l2-cache-controller";
+ reg = <0xc20000 0x40000>;
+ next-level-cache = <&cpc>;
};
};
diff --git a/arch/powerpc/boot/dts/fsl/b4860si-pre.dtsi b/arch/powerpc/boot/dts/fsl/b4860si-pre.dtsi
index 1948f73fd26b..388ba1b15f8c 100644
--- a/arch/powerpc/boot/dts/fsl/b4860si-pre.dtsi
+++ b/arch/powerpc/boot/dts/fsl/b4860si-pre.dtsi
@@ -1,7 +1,7 @@
/*
* B4860 Silicon/SoC Device Tree Source (pre include)
*
- * Copyright 2012 Freescale Semiconductor Inc.
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -51,9 +51,20 @@
serial2 = &serial2;
serial3 = &serial3;
pci0 = &pci0;
+ usb0 = &usb0;
dma0 = &dma0;
dma1 = &dma1;
sdhc = &sdhc;
+
+ fman0 = &fman0;
+ ethernet0 = &enet0;
+ ethernet1 = &enet1;
+ ethernet2 = &enet2;
+ ethernet3 = &enet3;
+ ethernet4 = &enet4;
+ ethernet5 = &enet5;
+ ethernet6 = &enet6;
+ ethernet7 = &enet7;
};
@@ -64,29 +75,29 @@
cpu0: PowerPC,e6500@0 {
device_type = "cpu";
reg = <0 1>;
- clocks = <&mux0>;
- next-level-cache = <&L2>;
+ clocks = <&clockgen 1 0>;
+ next-level-cache = <&L2_1>;
fsl,portid-mapping = <0x80000000>;
};
cpu1: PowerPC,e6500@2 {
device_type = "cpu";
reg = <2 3>;
- clocks = <&mux0>;
- next-level-cache = <&L2>;
+ clocks = <&clockgen 1 0>;
+ next-level-cache = <&L2_1>;
fsl,portid-mapping = <0x80000000>;
};
cpu2: PowerPC,e6500@4 {
device_type = "cpu";
reg = <4 5>;
- clocks = <&mux0>;
- next-level-cache = <&L2>;
+ clocks = <&clockgen 1 0>;
+ next-level-cache = <&L2_1>;
fsl,portid-mapping = <0x80000000>;
};
cpu3: PowerPC,e6500@6 {
device_type = "cpu";
reg = <6 7>;
- clocks = <&mux0>;
- next-level-cache = <&L2>;
+ clocks = <&clockgen 1 0>;
+ next-level-cache = <&L2_1>;
fsl,portid-mapping = <0x80000000>;
};
};
diff --git a/arch/powerpc/boot/dts/b4qds.dtsi b/arch/powerpc/boot/dts/fsl/b4qds.dtsi
index 8b47edcfabf0..05be919f3545 100644
--- a/arch/powerpc/boot/dts/b4qds.dtsi
+++ b/arch/powerpc/boot/dts/fsl/b4qds.dtsi
@@ -1,7 +1,7 @@
/*
* B4420DS Device Tree Source
*
- * Copyright 2012 Freescale Semiconductor, Inc.
+ * Copyright 2012 - 2015 Freescale Semiconductor, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -39,6 +39,14 @@
#size-cells = <2>;
interrupt-parent = <&mpic>;
+ aliases {
+ crypto = &crypto;
+ phy_sgmii_10 = &phy_sgmii_10;
+ phy_sgmii_11 = &phy_sgmii_11;
+ phy_sgmii_1c = &phy_sgmii_1c;
+ phy_sgmii_1d = &phy_sgmii_1d;
+ };
+
ifc: localbus@ffe124000 {
reg = <0xf 0xfe124000 0 0x2000>;
ranges = <0 0 0xf 0xe8000000 0x08000000
@@ -97,10 +105,37 @@
device_type = "memory";
};
+ reserved-memory {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ bman_fbpr: bman-fbpr {
+ size = <0 0x1000000>;
+ alignment = <0 0x1000000>;
+ };
+ qman_fqd: qman-fqd {
+ size = <0 0x400000>;
+ alignment = <0 0x400000>;
+ };
+ qman_pfdr: qman-pfdr {
+ size = <0 0x2000000>;
+ alignment = <0 0x2000000>;
+ };
+ };
+
dcsr: dcsr@f00000000 {
ranges = <0x00000000 0xf 0x00000000 0x01052000>;
};
+ bportals: bman-portals@ff4000000 {
+ ranges = <0x0 0xf 0xf4000000 0x2000000>;
+ };
+
+ qportals: qman-portals@ff6000000 {
+ ranges = <0x0 0xf 0xf6000000 0x2000000>;
+ };
+
soc: soc@ffe000000 {
ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
reg = <0xf 0xfe000000 0 0x00001000>;
@@ -108,7 +143,7 @@
flash@0 {
#address-cells = <1>;
#size-cells = <1>;
- compatible = "sst,sst25wf040";
+ compatible = "sst,sst25wf040", "jedec,spi-nor";
reg = <0>;
spi-max-frequency = <40000000>; /* input clock */
};
@@ -132,19 +167,19 @@
reg = <0>;
eeprom@50 {
- compatible = "at24,24c64";
+ compatible = "atmel,24c64";
reg = <0x50>;
};
eeprom@51 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x51>;
};
eeprom@53 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x53>;
};
eeprom@57 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x57>;
};
rtc@68 {
@@ -152,6 +187,29 @@
reg = <0x68>;
};
};
+
+ i2c@2 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x2>;
+
+ ina220@40 {
+ compatible = "ti,ina220";
+ reg = <0x40>;
+ shunt-resistor = <1000>;
+ };
+ };
+
+ i2c@3 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x3>;
+
+ adt7461@4c {
+ compatible = "adi,adt7461";
+ reg = <0x4c>;
+ };
+ };
};
};
@@ -160,6 +218,47 @@
phy_type = "ulpi";
};
+ fman@400000 {
+ ethernet@e0000 {
+ phy-handle = <&phy_sgmii_10>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e2000 {
+ phy-handle = <&phy_sgmii_11>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e4000 {
+ phy-handle = <&phy_sgmii_1c>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e6000 {
+ phy-handle = <&phy_sgmii_1d>;
+ phy-connection-type = "sgmii";
+ };
+
+ mdio@fc000 {
+ phy_sgmii_10: ethernet-phy@10 {
+ reg = <0x10>;
+ };
+
+ phy_sgmii_11: ethernet-phy@11 {
+ reg = <0x11>;
+ };
+
+ phy_sgmii_1c: ethernet-phy@1c {
+ reg = <0x1c>;
+ status = "disabled";
+ };
+
+ phy_sgmii_1d: ethernet-phy@1d {
+ reg = <0x1d>;
+ status = "disabled";
+ };
+ };
+ };
};
pci0: pcie@ffe200000 {
@@ -176,7 +275,6 @@
0 0x00010000>;
};
};
-
};
-/include/ "fsl/b4si-post.dtsi"
+/include/ "b4si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/b4si-post.dtsi b/arch/powerpc/boot/dts/fsl/b4si-post.dtsi
index 1a54ba71f685..fb3200b006ad 100644
--- a/arch/powerpc/boot/dts/fsl/b4si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/b4si-post.dtsi
@@ -1,7 +1,7 @@
/*
* B4420 Silicon/SoC Device Tree Source (post include)
*
- * Copyright 2012 Freescale Semiconductor, Inc.
+ * Copyright 2012 - 2015 Freescale Semiconductor, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -32,10 +32,25 @@
* this software, even if advised of the possibility of such damage.
*/
+&bman_fbpr {
+ compatible = "fsl,bman-fbpr";
+ alloc-ranges = <0 0 0x10000 0>;
+};
+
+&qman_fqd {
+ compatible = "fsl,qman-fqd";
+ alloc-ranges = <0 0 0x10000 0>;
+};
+
+&qman_pfdr {
+ compatible = "fsl,qman-pfdr";
+ alloc-ranges = <0 0 0x10000 0>;
+};
+
&ifc {
#address-cells = <2>;
#size-cells = <1>;
- compatible = "fsl,ifc", "simple-bus";
+ compatible = "fsl,ifc";
interrupts = <25 2 0 0>;
};
@@ -128,6 +143,174 @@
};
};
+&bportals {
+ #address-cells = <0x1>;
+ #size-cells = <0x1>;
+ compatible = "simple-bus";
+
+ bman-portal@0 {
+ compatible = "fsl,bman-portal";
+ reg = <0x0 0x4000>, <0x1000000 0x1000>;
+ interrupts = <105 2 0 0>;
+ };
+ bman-portal@4000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x4000 0x4000>, <0x1001000 0x1000>;
+ interrupts = <107 2 0 0>;
+ };
+ bman-portal@8000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x8000 0x4000>, <0x1002000 0x1000>;
+ interrupts = <109 2 0 0>;
+ };
+ bman-portal@c000 {
+ compatible = "fsl,bman-portal";
+ reg = <0xc000 0x4000>, <0x1003000 0x1000>;
+ interrupts = <111 2 0 0>;
+ };
+ bman-portal@10000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x10000 0x4000>, <0x1004000 0x1000>;
+ interrupts = <113 2 0 0>;
+ };
+ bman-portal@14000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x14000 0x4000>, <0x1005000 0x1000>;
+ interrupts = <115 2 0 0>;
+ };
+ bman-portal@18000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x18000 0x4000>, <0x1006000 0x1000>;
+ interrupts = <117 2 0 0>;
+ };
+ bman-portal@1c000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x1c000 0x4000>, <0x1007000 0x1000>;
+ interrupts = <119 2 0 0>;
+ };
+ bman-portal@20000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x20000 0x4000>, <0x1008000 0x1000>;
+ interrupts = <121 2 0 0>;
+ };
+ bman-portal@24000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x24000 0x4000>, <0x1009000 0x1000>;
+ interrupts = <123 2 0 0>;
+ };
+ bman-portal@28000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x28000 0x4000>, <0x100a000 0x1000>;
+ interrupts = <125 2 0 0>;
+ };
+ bman-portal@2c000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x2c000 0x4000>, <0x100b000 0x1000>;
+ interrupts = <127 2 0 0>;
+ };
+ bman-portal@30000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x30000 0x4000>, <0x100c000 0x1000>;
+ interrupts = <129 2 0 0>;
+ };
+ bman-portal@34000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x34000 0x4000>, <0x100d000 0x1000>;
+ interrupts = <131 2 0 0>;
+ };
+};
+
+&qportals {
+ #address-cells = <0x1>;
+ #size-cells = <0x1>;
+ compatible = "simple-bus";
+
+ qportal0: qman-portal@0 {
+ compatible = "fsl,qman-portal";
+ reg = <0x0 0x4000>, <0x1000000 0x1000>;
+ interrupts = <104 0x2 0 0>;
+ cell-index = <0x0>;
+ };
+ qportal1: qman-portal@4000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x4000 0x4000>, <0x1001000 0x1000>;
+ interrupts = <106 0x2 0 0>;
+ cell-index = <0x1>;
+ };
+ qportal2: qman-portal@8000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x8000 0x4000>, <0x1002000 0x1000>;
+ interrupts = <108 0x2 0 0>;
+ cell-index = <0x2>;
+ };
+ qportal3: qman-portal@c000 {
+ compatible = "fsl,qman-portal";
+ reg = <0xc000 0x4000>, <0x1003000 0x1000>;
+ interrupts = <110 0x2 0 0>;
+ cell-index = <0x3>;
+ };
+ qportal4: qman-portal@10000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x10000 0x4000>, <0x1004000 0x1000>;
+ interrupts = <112 0x2 0 0>;
+ cell-index = <0x4>;
+ };
+ qportal5: qman-portal@14000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x14000 0x4000>, <0x1005000 0x1000>;
+ interrupts = <114 0x2 0 0>;
+ cell-index = <0x5>;
+ };
+ qportal6: qman-portal@18000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x18000 0x4000>, <0x1006000 0x1000>;
+ interrupts = <116 0x2 0 0>;
+ cell-index = <0x6>;
+ };
+ qportal7: qman-portal@1c000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x1c000 0x4000>, <0x1007000 0x1000>;
+ interrupts = <118 0x2 0 0>;
+ cell-index = <0x7>;
+ };
+ qportal8: qman-portal@20000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x20000 0x4000>, <0x1008000 0x1000>;
+ interrupts = <120 0x2 0 0>;
+ cell-index = <0x8>;
+ };
+ qportal9: qman-portal@24000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x24000 0x4000>, <0x1009000 0x1000>;
+ interrupts = <122 0x2 0 0>;
+ cell-index = <0x9>;
+ };
+ qportal10: qman-portal@28000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x28000 0x4000>, <0x100a000 0x1000>;
+ interrupts = <124 0x2 0 0>;
+ cell-index = <0xa>;
+ };
+ qportal11: qman-portal@2c000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x2c000 0x4000>, <0x100b000 0x1000>;
+ interrupts = <126 0x2 0 0>;
+ cell-index = <0xb>;
+ };
+ qportal12: qman-portal@30000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x30000 0x4000>, <0x100c000 0x1000>;
+ interrupts = <128 0x2 0 0>;
+ cell-index = <0xc>;
+ };
+ qportal13: qman-portal@34000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x34000 0x4000>, <0x100d000 0x1000>;
+ interrupts = <130 0x2 0 0>;
+ cell-index = <0xd>;
+ };
+};
+
&soc {
#address-cells = <1>;
#size-cells = <1>;
@@ -214,10 +397,7 @@
fsl,liodn-bits = <12>;
};
- clockgen: global-utilities@e1000 {
- compatible = "fsl,b4-clockgen", "fsl,qoriq-clockgen-2.0";
- reg = <0xe1000 0x1000>;
- };
+/include/ "qoriq-clockgen2.dtsi"
rcpm: global-utilities@e2000 {
compatible = "fsl,b4-rcpm", "fsl,qoriq-rcpm-2.0";
@@ -261,9 +441,47 @@
/include/ "qoriq-duart-1.dtsi"
/include/ "qoriq-sec5.3-0.dtsi"
- L2: l2-cache-controller@c20000 {
- compatible = "fsl,b4-l2-cache-controller";
- reg = <0xc20000 0x1000>;
- next-level-cache = <&cpc>;
+/include/ "qoriq-qman3.dtsi"
+ qman: qman@318000 {
+ interrupts = <16 2 1 28>;
+ };
+
+/include/ "qoriq-bman1.dtsi"
+ bman: bman@31a000 {
+ interrupts = <16 2 1 29>;
+ };
+
+/include/ "qoriq-fman3-0.dtsi"
+/include/ "qoriq-fman3-0-1g-0.dtsi"
+/include/ "qoriq-fman3-0-1g-1.dtsi"
+/include/ "qoriq-fman3-0-1g-2.dtsi"
+/include/ "qoriq-fman3-0-1g-3.dtsi"
+ fman@400000 {
+ interrupts = <96 2 0 0>, <16 2 1 30>;
+
+ muram@0 {
+ compatible = "fsl,fman-muram";
+ reg = <0x0 0x80000>;
+ };
+
+ enet0: ethernet@e0000 {
+ };
+
+ enet1: ethernet@e2000 {
+ };
+
+ enet2: ethernet@e4000 {
+ };
+
+ enet3: ethernet@e6000 {
+ };
+
+ mdio@fc000 {
+ interrupts = <100 1 0 0>;
+ };
+
+ mdio@fd000 {
+ interrupts = <101 1 0 0>;
+ };
};
};
diff --git a/arch/powerpc/boot/dts/bsc9131rdb.dts b/arch/powerpc/boot/dts/fsl/bsc9131rdb.dts
index e13d2d4877b0..0ba86a6dce1b 100644
--- a/arch/powerpc/boot/dts/bsc9131rdb.dts
+++ b/arch/powerpc/boot/dts/fsl/bsc9131rdb.dts
@@ -1,15 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* BSC9131 RDB Device Tree Source
*
* Copyright 2011-2012 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
-/include/ "fsl/bsc9131si-pre.dtsi"
+/include/ "bsc9131si-pre.dtsi"
/ {
model = "fsl,bsc9131rdb";
@@ -19,7 +15,7 @@
device_type = "memory";
};
- board_ifc: ifc: ifc@ff71e000 {
+ board_ifc: ifc: memory-controller@ff71e000 {
/* NAND Flash on board */
ranges = <0x0 0x0 0x0 0xff800000 0x00004000>;
reg = <0x0 0xff71e000 0x0 0x2000>;
@@ -31,4 +27,4 @@
};
/include/ "bsc9131rdb.dtsi"
-/include/ "fsl/bsc9131si-post.dtsi"
+/include/ "bsc9131si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/bsc9131rdb.dtsi b/arch/powerpc/boot/dts/fsl/bsc9131rdb.dtsi
index 9e6c01339ccc..53f8b956340f 100644
--- a/arch/powerpc/boot/dts/bsc9131rdb.dtsi
+++ b/arch/powerpc/boot/dts/fsl/bsc9131rdb.dtsi
@@ -40,31 +40,6 @@
compatible = "fsl,ifc-nand";
reg = <0x0 0x0 0x4000>;
- partition@0 {
- /* This location must not be altered */
- /* 3MB for u-boot Bootloader Image */
- reg = <0x0 0x00300000>;
- label = "NAND U-Boot Image";
- read-only;
- };
-
- partition@300000 {
- /* 1MB for DTB Image */
- reg = <0x00300000 0x00100000>;
- label = "NAND DTB Image";
- };
-
- partition@400000 {
- /* 8MB for Linux Kernel Image */
- reg = <0x00400000 0x00800000>;
- label = "NAND Linux Kernel Image";
- };
-
- partition@c00000 {
- /* Rest space for Root file System Image */
- reg = <0x00c00000 0x07400000>;
- label = "NAND RFS Image";
- };
};
};
@@ -78,35 +53,10 @@
flash@0 {
#address-cells = <1>;
#size-cells = <1>;
- compatible = "spansion,s25sl12801";
+ compatible = "spansion,s25sl12801", "jedec,spi-nor";
reg = <0>;
spi-max-frequency = <50000000>;
- /* 512KB for u-boot Bootloader Image */
- partition@0 {
- reg = <0x0 0x00080000>;
- label = "SPI Flash U-Boot Image";
- read-only;
- };
-
- /* 512KB for DTB Image */
- partition@80000 {
- reg = <0x00080000 0x00080000>;
- label = "SPI Flash DTB Image";
- };
-
- /* 4MB for Linux Kernel Image */
- partition@100000 {
- reg = <0x00100000 0x00400000>;
- label = "SPI Flash Kernel Image";
- };
-
- /*11MB for RFS Image */
- partition@500000 {
- reg = <0x00500000 0x00B00000>;
- label = "SPI Flash RFS Image";
- };
-
};
};
@@ -130,6 +80,18 @@
status = "disabled";
};
+ ptp_clock@b0e00 {
+ compatible = "fsl,etsec-ptp";
+ reg = <0xb0e00 0xb0>;
+ interrupts = <68 2 0 0 69 2 0 0>;
+ fsl,tclk-period = <5>;
+ fsl,tmr-prsc = <2>;
+ fsl,tmr-add = <0xcccccccd>;
+ fsl,tmr-fiper1 = <999999995>;
+ fsl,tmr-fiper2 = <99990>;
+ fsl,max-adj = <249999999>;
+ };
+
enet0: ethernet@b0000 {
phy-handle = <&phy0>;
phy-connection-type = "rgmii-id";
diff --git a/arch/powerpc/boot/dts/fsl/bsc9131si-post.dtsi b/arch/powerpc/boot/dts/fsl/bsc9131si-post.dtsi
index 0c0efa94cfb4..5c53cee8755f 100644
--- a/arch/powerpc/boot/dts/fsl/bsc9131si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/bsc9131si-post.dtsi
@@ -35,7 +35,7 @@
&ifc {
#address-cells = <2>;
#size-cells = <1>;
- compatible = "fsl,ifc", "simple-bus";
+ compatible = "fsl,ifc";
interrupts = <16 2 0 0 20 2 0 0>;
};
@@ -170,8 +170,6 @@ timer@41100 {
/include/ "pq3-etsec2-0.dtsi"
enet0: ethernet@b0000 {
queue-group@b0000 {
- fsl,rx-bit-map = <0xff>;
- fsl,tx-bit-map = <0xff>;
interrupts = <26 2 0 0 27 2 0 0 28 2 0 0>;
};
};
@@ -179,8 +177,6 @@ enet0: ethernet@b0000 {
/include/ "pq3-etsec2-1.dtsi"
enet1: ethernet@b1000 {
queue-group@b1000 {
- fsl,rx-bit-map = <0xff>;
- fsl,tx-bit-map = <0xff>;
interrupts = <33 2 0 0 34 2 0 0 35 2 0 0>;
};
};
diff --git a/arch/powerpc/boot/dts/fsl/bsc9132qds.dts b/arch/powerpc/boot/dts/fsl/bsc9132qds.dts
new file mode 100644
index 000000000000..ce642e879a1b
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/bsc9132qds.dts
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * BSC9132 QDS Device Tree Source
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ */
+
+/include/ "bsc9132si-pre.dtsi"
+
+/ {
+ model = "fsl,bsc9132qds";
+ compatible = "fsl,bsc9132qds";
+
+ memory {
+ device_type = "memory";
+ };
+
+ ifc: memory-controller@ff71e000 {
+ /* NOR, NAND Flash on board */
+ ranges = <0x0 0x0 0x0 0x88000000 0x08000000
+ 0x1 0x0 0x0 0xff800000 0x00010000>;
+ reg = <0x0 0xff71e000 0x0 0x2000>;
+ };
+
+ soc: soc@ff700000 {
+ ranges = <0x0 0x0 0xff700000 0x100000>;
+ };
+
+ pci0: pcie@ff70a000 {
+ reg = <0 0xff70a000 0 0x1000>;
+ ranges = <0x2000000 0x0 0x90000000 0 0x90000000 0x0 0x20000000
+ 0x1000000 0x0 0x00000000 0 0xc0010000 0x0 0x10000>;
+ pcie@0 {
+ ranges = <0x2000000 0x0 0x90000000
+ 0x2000000 0x0 0x90000000
+ 0x0 0x20000000
+
+ 0x1000000 0x0 0x0
+ 0x1000000 0x0 0x0
+ 0x0 0x100000>;
+ };
+ };
+};
+
+/include/ "bsc9132qds.dtsi"
+/include/ "bsc9132si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/bsc9132qds.dtsi b/arch/powerpc/boot/dts/fsl/bsc9132qds.dtsi
index af8e88830221..fead484a8180 100644
--- a/arch/powerpc/boot/dts/bsc9132qds.dtsi
+++ b/arch/powerpc/boot/dts/fsl/bsc9132qds.dtsi
@@ -55,7 +55,7 @@
flash@0 {
#address-cells = <1>;
#size-cells = <1>;
- compatible = "spansion,s25sl12801";
+ compatible = "spansion,s25sl12801", "jedec,spi-nor";
reg = <0>;
spi-max-frequency = <30000000>;
};
@@ -87,6 +87,18 @@
};
};
+ ptp_clock@b0e00 {
+ compatible = "fsl,etsec-ptp";
+ reg = <0xb0e00 0xb0>;
+ interrupts = <68 2 0 0 69 2 0 0>;
+ fsl,tclk-period = <5>;
+ fsl,tmr-prsc = <2>;
+ fsl,tmr-add = <0xcccccccd>;
+ fsl,tmr-fiper1 = <999999995>;
+ fsl,tmr-fiper2 = <99990>;
+ fsl,max-adj = <249999999>;
+ };
+
enet0: ethernet@b0000 {
phy-handle = <&phy0>;
tbi-handle = <&tbi0>;
diff --git a/arch/powerpc/boot/dts/fsl/bsc9132si-post.dtsi b/arch/powerpc/boot/dts/fsl/bsc9132si-post.dtsi
index c72307198140..4da451e000d9 100644
--- a/arch/powerpc/boot/dts/fsl/bsc9132si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/bsc9132si-post.dtsi
@@ -35,11 +35,39 @@
&ifc {
#address-cells = <2>;
#size-cells = <1>;
- compatible = "fsl,ifc", "simple-bus";
+ compatible = "fsl,ifc";
/* FIXME: Test whether interrupts are split */
interrupts = <16 2 0 0 20 2 0 0>;
};
+/* controller at 0xa000 */
+&pci0 {
+ compatible = "fsl,bsc9132-pcie", "fsl,qoriq-pcie-v2.2";
+ device_type = "pci";
+ #size-cells = <2>;
+ #address-cells = <3>;
+ bus-range = <0 255>;
+ interrupts = <16 2 0 0>;
+
+ pcie@0 {
+ reg = <0 0 0 0 0>;
+ #interrupt-cells = <1>;
+ #size-cells = <2>;
+ #address-cells = <3>;
+ device_type = "pci";
+ interrupts = <16 2 0 0>;
+ interrupt-map-mask = <0xf800 0 0 7>;
+
+ interrupt-map = <
+ /* IDSEL 0x0 */
+ 0000 0x0 0x0 0x1 &mpic 0x0 0x2 0x0 0x0
+ 0000 0x0 0x0 0x2 &mpic 0x1 0x2 0x0 0x0
+ 0000 0x0 0x0 0x3 &mpic 0x2 0x2 0x0 0x0
+ 0000 0x0 0x0 0x4 &mpic 0x3 0x2 0x0 0x0
+ >;
+ };
+};
+
&soc {
#address-cells = <1>;
#size-cells = <1>;
@@ -162,8 +190,6 @@ crypto@30000 {
/include/ "pq3-etsec2-0.dtsi"
enet0: ethernet@b0000 {
queue-group@b0000 {
- fsl,rx-bit-map = <0xff>;
- fsl,tx-bit-map = <0xff>;
interrupts = <26 2 0 0 27 2 0 0 28 2 0 0>;
};
};
@@ -171,8 +197,6 @@ enet0: ethernet@b0000 {
/include/ "pq3-etsec2-1.dtsi"
enet1: ethernet@b1000 {
queue-group@b1000 {
- fsl,rx-bit-map = <0xff>;
- fsl,tx-bit-map = <0xff>;
interrupts = <33 2 0 0 34 2 0 0 35 2 0 0>;
};
};
diff --git a/arch/powerpc/boot/dts/fsl/bsc9132si-pre.dtsi b/arch/powerpc/boot/dts/fsl/bsc9132si-pre.dtsi
index 301a9dba5790..90f7949fe312 100644
--- a/arch/powerpc/boot/dts/fsl/bsc9132si-pre.dtsi
+++ b/arch/powerpc/boot/dts/fsl/bsc9132si-pre.dtsi
@@ -45,6 +45,7 @@
serial0 = &serial0;
ethernet0 = &enet0;
ethernet1 = &enet1;
+ pci0 = &pci0;
};
cpus {
diff --git a/arch/powerpc/boot/dts/c293pcie.dts b/arch/powerpc/boot/dts/fsl/c293pcie.dts
index 6681cc21030b..e2fdac2ed420 100644
--- a/arch/powerpc/boot/dts/c293pcie.dts
+++ b/arch/powerpc/boot/dts/fsl/c293pcie.dts
@@ -32,7 +32,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/include/ "fsl/c293si-pre.dtsi"
+/include/ "c293si-pre.dtsi"
/ {
model = "fsl,C293PCIE";
@@ -42,7 +42,7 @@
device_type = "memory";
};
- ifc: ifc@fffe1e000 {
+ ifc: memory-controller@fffe1e000 {
reg = <0xf 0xffe1e000 0 0x2000>;
ranges = <0x0 0x0 0xf 0xec000000 0x04000000
0x1 0x0 0xf 0xff800000 0x00010000
@@ -153,7 +153,7 @@
&soc {
i2c@3000 {
eeprom@50 {
- compatible = "st,24c1024";
+ compatible = "st,24c1024", "atmel,24c1024";
reg = <0x50>;
};
@@ -167,7 +167,7 @@
flash@0 {
#address-cells = <1>;
#size-cells = <1>;
- compatible = "spansion,s25sl12801";
+ compatible = "spansion,s25sl12801", "jedec,spi-nor";
reg = <0>;
spi-max-frequency = <50000000>;
@@ -221,4 +221,4 @@
phy-connection-type = "rgmii-id";
};
};
-/include/ "fsl/c293si-post.dtsi"
+/include/ "c293si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/c293si-post.dtsi b/arch/powerpc/boot/dts/fsl/c293si-post.dtsi
index bd208320bff5..2d443d519274 100644
--- a/arch/powerpc/boot/dts/fsl/c293si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/c293si-post.dtsi
@@ -35,7 +35,7 @@
&ifc {
#address-cells = <2>;
#size-cells = <1>;
- compatible = "fsl,ifc", "simple-bus";
+ compatible = "fsl,ifc";
interrupts = <19 2 0 0>;
};
@@ -124,10 +124,10 @@
reg = <0x80000 0x20000>;
ranges = <0x0 0x80000 0x20000>;
- jr@1000{
+ jr@1000 {
interrupts = <45 2 0 0>;
};
- jr@2000{
+ jr@2000 {
interrupts = <57 2 0 0>;
};
};
@@ -140,10 +140,10 @@
reg = <0xa0000 0x20000>;
ranges = <0x0 0xa0000 0x20000>;
- jr@1000{
+ jr@1000 {
interrupts = <49 2 0 0>;
};
- jr@2000{
+ jr@2000 {
interrupts = <50 2 0 0>;
};
};
@@ -156,10 +156,10 @@
reg = <0xc0000 0x20000>;
ranges = <0x0 0xc0000 0x20000>;
- jr@1000{
+ jr@1000 {
interrupts = <55 2 0 0>;
};
- jr@2000{
+ jr@2000 {
interrupts = <56 2 0 0>;
};
};
@@ -171,8 +171,6 @@
enet0: ethernet@b0000 {
queue-group@b0000 {
reg = <0x10000 0x1000>;
- fsl,rx-bit-map = <0xff>;
- fsl,tx-bit-map = <0xff>;
};
};
@@ -180,8 +178,6 @@
enet1: ethernet@b1000 {
queue-group@b1000 {
reg = <0x11000 0x1000>;
- fsl,rx-bit-map = <0xff>;
- fsl,tx-bit-map = <0xff>;
};
};
diff --git a/arch/powerpc/boot/dts/fsl/cyrus_p5020.dts b/arch/powerpc/boot/dts/fsl/cyrus_p5020.dts
new file mode 100644
index 000000000000..40ba0606ec55
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/cyrus_p5020.dts
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Cyrus 5020 Device Tree Source, based on p5020ds.dts
+ *
+ * Copyright 2015 Andy Fleming
+ *
+ * p5020ds.dts copyright:
+ * Copyright 2010 - 2014 Freescale Semiconductor Inc.
+ */
+
+/include/ "p5020si-pre.dtsi"
+
+/ {
+ model = "varisys,CYRUS";
+ compatible = "varisys,CYRUS";
+ #address-cells = <2>;
+ #size-cells = <2>;
+ interrupt-parent = <&mpic>;
+
+ memory {
+ device_type = "memory";
+ };
+
+ reserved-memory {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ bman_fbpr: bman-fbpr {
+ size = <0 0x1000000>;
+ alignment = <0 0x1000000>;
+ };
+ qman_fqd: qman-fqd {
+ size = <0 0x400000>;
+ alignment = <0 0x400000>;
+ };
+ qman_pfdr: qman-pfdr {
+ size = <0 0x2000000>;
+ alignment = <0 0x2000000>;
+ };
+ };
+
+ dcsr: dcsr@f00000000 {
+ ranges = <0x00000000 0xf 0x00000000 0x01008000>;
+ };
+
+ bportals: bman-portals@ff4000000 {
+ ranges = <0x0 0xf 0xf4000000 0x200000>;
+ };
+
+ qportals: qman-portals@ff4200000 {
+ ranges = <0x0 0xf 0xf4200000 0x200000>;
+ };
+
+ soc: soc@ffe000000 {
+ ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
+ reg = <0xf 0xfe000000 0 0x00001000>;
+ spi@110000 {
+ };
+
+ i2c@118100 {
+ };
+
+ i2c@119100 {
+ rtc@6f {
+ compatible = "microchip,mcp7941x";
+ reg = <0x6f>;
+ };
+ };
+ };
+
+ rio: rapidio@ffe0c0000 {
+ reg = <0xf 0xfe0c0000 0 0x11000>;
+
+ port1 {
+ ranges = <0 0 0xc 0x20000000 0 0x10000000>;
+ };
+ port2 {
+ ranges = <0 0 0xc 0x30000000 0 0x10000000>;
+ };
+ };
+
+ lbc: localbus@ffe124000 {
+ reg = <0xf 0xfe124000 0 0x1000>;
+ ranges = <0 0 0xf 0xe8000000 0x08000000
+ 2 0 0xf 0xffa00000 0x00040000
+ 3 0 0xf 0xffdf0000 0x00008000>;
+ };
+
+ pci0: pcie@ffe200000 {
+ reg = <0xf 0xfe200000 0 0x1000>;
+ ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x20000000
+ 0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>;
+ pcie@0 {
+ ranges = <0x02000000 0 0xe0000000
+ 0x02000000 0 0xe0000000
+ 0 0x20000000
+
+ 0x01000000 0 0x00000000
+ 0x01000000 0 0x00000000
+ 0 0x00010000>;
+ };
+ };
+
+ pci1: pcie@ffe201000 {
+ reg = <0xf 0xfe201000 0 0x1000>;
+ ranges = <0x02000000 0x0 0xe0000000 0xc 0x20000000 0x0 0x20000000
+ 0x01000000 0x0 0x00000000 0xf 0xf8010000 0x0 0x00010000>;
+ pcie@0 {
+ ranges = <0x02000000 0 0xe0000000
+ 0x02000000 0 0xe0000000
+ 0 0x20000000
+
+ 0x01000000 0 0x00000000
+ 0x01000000 0 0x00000000
+ 0 0x00010000>;
+ };
+ };
+
+ pci2: pcie@ffe202000 {
+ reg = <0xf 0xfe202000 0 0x1000>;
+ ranges = <0x02000000 0 0xe0000000 0xc 0x40000000 0 0x20000000
+ 0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>;
+ pcie@0 {
+ ranges = <0x02000000 0 0xe0000000
+ 0x02000000 0 0xe0000000
+ 0 0x20000000
+
+ 0x01000000 0 0x00000000
+ 0x01000000 0 0x00000000
+ 0 0x00010000>;
+ };
+ };
+
+ pci3: pcie@ffe203000 {
+ reg = <0xf 0xfe203000 0 0x1000>;
+ ranges = <0x02000000 0 0xe0000000 0xc 0x60000000 0 0x20000000
+ 0x01000000 0 0x00000000 0xf 0xf8030000 0 0x00010000>;
+ pcie@0 {
+ ranges = <0x02000000 0 0xe0000000
+ 0x02000000 0 0xe0000000
+ 0 0x20000000
+
+ 0x01000000 0 0x00000000
+ 0x01000000 0 0x00000000
+ 0 0x00010000>;
+ };
+ };
+};
+
+/include/ "p5020si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/b4860qds.dts b/arch/powerpc/boot/dts/fsl/e500v1_power_isa.dtsi
index 6bb3707ffe3d..7e2a90cde72e 100644
--- a/arch/powerpc/boot/dts/b4860qds.dts
+++ b/arch/powerpc/boot/dts/fsl/e500v1_power_isa.dtsi
@@ -1,5 +1,5 @@
/*
- * B4860DS Device Tree Source
+ * e500v1 Power ISA Device Tree Source (include)
*
* Copyright 2012 Freescale Semiconductor Inc.
*
@@ -20,7 +20,7 @@
* Foundation, either version 2 of that License or (at your option) any
* later version.
*
- * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
@@ -32,30 +32,20 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/include/ "fsl/b4860si-pre.dtsi"
-/include/ "b4qds.dtsi"
-
/ {
- model = "fsl,B4860QDS";
- compatible = "fsl,B4860QDS";
-
- ifc: localbus@ffe124000 {
- board-control@3,0 {
- compatible = "fsl,b4860qds-fpga", "fsl,fpga-qixis";
- };
+ cpus {
+ power-isa-version = "2.03";
+ power-isa-b; // Base
+ power-isa-e; // Embedded
+ power-isa-atb; // Alternate Time Base
+ power-isa-cs; // Cache Specification
+ power-isa-e.le; // Embedded.Little-Endian
+ power-isa-e.pm; // Embedded.Performance Monitor
+ power-isa-ecl; // Embedded Cache Locking
+ power-isa-mmc; // Memory Coherence
+ power-isa-sp; // Signal Processing Engine
+ power-isa-sp.fs; // SPE.Embedded Float Scalar Single
+ power-isa-sp.fv; // SPE.Embedded Float Vector
+ mmu-type = "power-embedded";
};
-
- rio: rapidio@ffe0c0000 {
- reg = <0xf 0xfe0c0000 0 0x11000>;
-
- port1 {
- ranges = <0 0 0xc 0x20000000 0 0x10000000>;
- };
- port2 {
- ranges = <0 0 0xc 0x30000000 0 0x10000000>;
- };
- };
-
};
-
-/include/ "fsl/b4860si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/ge_imp3a.dts b/arch/powerpc/boot/dts/fsl/ge_imp3a.dts
index fefae416a097..da3de8e2b7d2 100644
--- a/arch/powerpc/boot/dts/ge_imp3a.dts
+++ b/arch/powerpc/boot/dts/fsl/ge_imp3a.dts
@@ -1,18 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* GE IMP3A Device Tree Source
*
* Copyright 2010-2011 GE Intelligent Platforms Embedded Systems, Inc.
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
* Based on: P2020 DS Device Tree Source
* Copyright 2009 Freescale Semiconductor Inc.
*/
-/include/ "fsl/p2020si-pre.dtsi"
+/include/ "p2020si-pre.dtsi"
/ {
model = "GE_IMP3A";
@@ -252,4 +248,4 @@
};
};
-/include/ "fsl/p2020si-post.dtsi"
+/include/ "p2020si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/gef_ppc9a.dts b/arch/powerpc/boot/dts/fsl/gef_ppc9a.dts
new file mode 100644
index 000000000000..fc92bb032c51
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/gef_ppc9a.dts
@@ -0,0 +1,216 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * GE PPC9A Device Tree Source
+ *
+ * Copyright 2008 GE Intelligent Platforms Embedded Systems, Inc.
+ *
+ * Based on: SBS CM6 Device Tree Source
+ * Copyright 2007 SBS Technologies GmbH & Co. KG
+ * And: mpc8641_hpcn.dts (MPC8641 HPCN Device Tree Source)
+ * Copyright 2006 Freescale Semiconductor Inc.
+ */
+
+/*
+ * Compiled with dtc -I dts -O dtb -o gef_ppc9a.dtb gef_ppc9a.dts
+ */
+
+/include/ "mpc8641si-pre.dtsi"
+
+/ {
+ model = "GEF_PPC9A";
+ compatible = "gef,ppc9a";
+
+ memory {
+ device_type = "memory";
+ reg = <0x0 0x40000000>; // set by uboot
+ };
+
+ lbc: localbus@fef05000 {
+ reg = <0xfef05000 0x1000>;
+
+ ranges = <0 0 0xff000000 0x01000000 // 16MB Boot flash
+ 1 0 0xe8000000 0x08000000 // Paged Flash 0
+ 2 0 0xe0000000 0x08000000 // Paged Flash 1
+ 3 0 0xfc100000 0x00020000 // NVRAM
+ 4 0 0xfc000000 0x00008000 // FPGA
+ 5 0 0xfc008000 0x00008000 // AFIX FPGA
+ 6 0 0xfd000000 0x00800000 // IO FPGA (8-bit)
+ 7 0 0xfd800000 0x00800000>; // IO FPGA (32-bit)
+
+ /* flash@0,0 is a mirror of part of the memory in flash@1,0
+ flash@0,0 {
+ compatible = "gef,ppc9a-firmware-mirror", "cfi-flash";
+ reg = <0x0 0x0 0x1000000>;
+ bank-width = <4>;
+ device-width = <2>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ partition@0 {
+ label = "firmware";
+ reg = <0x0 0x1000000>;
+ read-only;
+ };
+ };
+ */
+
+ flash@1,0 {
+ compatible = "gef,ppc9a-paged-flash", "cfi-flash";
+ reg = <0x1 0x0 0x8000000>;
+ bank-width = <4>;
+ device-width = <2>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ partition@0 {
+ label = "user";
+ reg = <0x0 0x7800000>;
+ };
+ partition@7800000 {
+ label = "firmware";
+ reg = <0x7800000 0x800000>;
+ read-only;
+ };
+ };
+
+ nvram@3,0 {
+ device_type = "nvram";
+ compatible = "simtek,stk14ca8";
+ reg = <0x3 0x0 0x20000>;
+ };
+
+ fpga@4,0 {
+ compatible = "gef,ppc9a-fpga-regs";
+ reg = <0x4 0x0 0x40>;
+ };
+
+ wdt@4,2000 {
+ compatible = "gef,ppc9a-fpga-wdt", "gef,fpga-wdt-1.00",
+ "gef,fpga-wdt";
+ reg = <0x4 0x2000 0x8>;
+ interrupts = <0x1a 0x4>;
+ interrupt-parent = <&gef_pic>;
+ };
+ /* Second watchdog available, driver currently supports one.
+ wdt@4,2010 {
+ compatible = "gef,ppc9a-fpga-wdt", "gef,fpga-wdt-1.00",
+ "gef,fpga-wdt";
+ reg = <0x4 0x2010 0x8>;
+ interrupts = <0x1b 0x4>;
+ interrupt-parent = <&gef_pic>;
+ };
+ */
+ gef_pic: pic@4,4000 {
+ #interrupt-cells = <1>;
+ interrupt-controller;
+ compatible = "gef,ppc9a-fpga-pic", "gef,fpga-pic-1.00";
+ reg = <0x4 0x4000 0x20>;
+ interrupts = <0x8 0x9 0 0>;
+
+ };
+ gef_gpio: gpio@7,14000 {
+ #gpio-cells = <2>;
+ compatible = "gef,ppc9a-gpio", "gef,sbc610-gpio";
+ reg = <0x7 0x14000 0x24>;
+ gpio-controller;
+ };
+ };
+
+ soc: soc@fef00000 {
+ ranges = <0x0 0xfef00000 0x00100000>;
+
+ i2c@3000 {
+ hwmon@48 {
+ compatible = "national,lm92";
+ reg = <0x48>;
+ };
+
+ hwmon@4c {
+ compatible = "adi,adt7461";
+ reg = <0x4c>;
+ };
+
+ rtc@51 {
+ compatible = "epson,rx8581";
+ reg = <0x00000051>;
+ };
+
+ eti@6b {
+ compatible = "dallas,ds1682";
+ reg = <0x6b>;
+ };
+ };
+
+ enet0: ethernet@24000 {
+ tbi-handle = <&tbi0>;
+ phy-handle = <&phy0>;
+ phy-connection-type = "gmii";
+ };
+
+ mdio@24520 {
+ phy0: ethernet-phy@0 {
+ interrupt-parent = <&gef_pic>;
+ interrupts = <0x9 0x4>;
+ reg = <1>;
+ };
+ phy2: ethernet-phy@2 {
+ interrupt-parent = <&gef_pic>;
+ interrupts = <0x8 0x4>;
+ reg = <3>;
+ };
+ tbi0: tbi-phy@11 {
+ reg = <0x11>;
+ device_type = "tbi-phy";
+ };
+ };
+
+ enet1: ethernet@26000 {
+ tbi-handle = <&tbi2>;
+ phy-handle = <&phy2>;
+ phy-connection-type = "gmii";
+ };
+
+ mdio@26520 {
+ tbi2: tbi-phy@11 {
+ reg = <0x11>;
+ device_type = "tbi-phy";
+ };
+ };
+
+ enet2: ethernet@25000 {
+ status = "disabled";
+ };
+
+ mdio@25520 {
+ status = "disabled";
+ };
+
+ enet3: ethernet@27000 {
+ status = "disabled";
+ };
+
+ mdio@27520 {
+ status = "disabled";
+ };
+ };
+
+ pci0: pcie@fef08000 {
+ reg = <0xfef08000 0x1000>;
+ ranges = <0x02000000 0x0 0x80000000 0x80000000 0x0 0x40000000
+ 0x01000000 0x0 0x00000000 0xfe000000 0x0 0x00400000>;
+
+ pcie@0 {
+ ranges = <0x02000000 0x0 0x80000000
+ 0x02000000 0x0 0x80000000
+ 0x0 0x40000000
+
+ 0x01000000 0x0 0x00000000
+ 0x01000000 0x0 0x00000000
+ 0x0 0x00400000>;
+ };
+ };
+
+ pci1: pcie@fef09000 {
+ status = "disabled";
+ };
+};
+
+/include/ "mpc8641si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/gef_sbc310.dts b/arch/powerpc/boot/dts/fsl/gef_sbc310.dts
new file mode 100644
index 000000000000..47ae85c34635
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/gef_sbc310.dts
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * GE SBC310 Device Tree Source
+ *
+ * Copyright 2008 GE Intelligent Platforms Embedded Systems, Inc.
+ *
+ * Based on: SBS CM6 Device Tree Source
+ * Copyright 2007 SBS Technologies GmbH & Co. KG
+ * And: mpc8641_hpcn.dts (MPC8641 HPCN Device Tree Source)
+ * Copyright 2006 Freescale Semiconductor Inc.
+ */
+
+/*
+ * Compiled with dtc -I dts -O dtb -o gef_sbc310.dtb gef_sbc310.dts
+ */
+
+/include/ "mpc8641si-pre.dtsi"
+
+/ {
+ model = "GEF_SBC310";
+ compatible = "gef,sbc310";
+
+ memory {
+ device_type = "memory";
+ reg = <0x0 0x40000000>; // set by uboot
+ };
+
+ lbc: localbus@fef05000 {
+ reg = <0xfef05000 0x1000>;
+
+ ranges = <0 0 0xff000000 0x01000000 // 16MB Boot flash
+ 1 0 0xe0000000 0x08000000 // Paged Flash 0
+ 2 0 0xe8000000 0x08000000 // Paged Flash 1
+ 3 0 0xfc100000 0x00020000 // NVRAM
+ 4 0 0xfc000000 0x00010000>; // FPGA
+
+ /* flash@0,0 is a mirror of part of the memory in flash@1,0
+ flash@0,0 {
+ compatible = "gef,sbc310-firmware-mirror", "cfi-flash";
+ reg = <0x0 0x0 0x01000000>;
+ bank-width = <2>;
+ device-width = <2>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ partition@0 {
+ label = "firmware";
+ reg = <0x0 0x01000000>;
+ read-only;
+ };
+ };
+ */
+
+ flash@1,0 {
+ compatible = "gef,sbc310-paged-flash", "cfi-flash";
+ reg = <0x1 0x0 0x8000000>;
+ bank-width = <2>;
+ device-width = <2>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ partition@0 {
+ label = "user";
+ reg = <0x0 0x7800000>;
+ };
+ partition@7800000 {
+ label = "firmware";
+ reg = <0x7800000 0x800000>;
+ read-only;
+ };
+ };
+
+ nvram@3,0 {
+ device_type = "nvram";
+ compatible = "simtek,stk14ca8";
+ reg = <0x3 0x0 0x20000>;
+ };
+
+ fpga@4,0 {
+ compatible = "gef,fpga-regs";
+ reg = <0x4 0x0 0x40>;
+ };
+
+ wdt@4,2000 {
+ compatible = "gef,sbc310-fpga-wdt", "gef,fpga-wdt-1.00",
+ "gef,fpga-wdt";
+ reg = <0x4 0x2000 0x8>;
+ interrupts = <0x1a 0x4>;
+ interrupt-parent = <&gef_pic>;
+ };
+/*
+ wdt@4,2010 {
+ compatible = "gef,sbc310-fpga-wdt", "gef,fpga-wdt-1.00",
+ "gef,fpga-wdt";
+ reg = <0x4 0x2010 0x8>;
+ interrupts = <0x1b 0x4>;
+ interrupt-parent = <&gef_pic>;
+ };
+*/
+ gef_pic: pic@4,4000 {
+ #interrupt-cells = <1>;
+ interrupt-controller;
+ compatible = "gef,sbc310-fpga-pic", "gef,fpga-pic";
+ reg = <0x4 0x4000 0x20>;
+ interrupts = <0x8 0x9 0 0>;
+
+ };
+ gef_gpio: gpio@4,8000 {
+ #gpio-cells = <2>;
+ compatible = "gef,sbc310-gpio";
+ reg = <0x4 0x8000 0x24>;
+ gpio-controller;
+ };
+ };
+
+ soc: soc@fef00000 {
+ ranges = <0x0 0xfef00000 0x00100000>;
+
+ i2c@3000 {
+ rtc@51 {
+ compatible = "epson,rx8581";
+ reg = <0x00000051>;
+ };
+ };
+
+ i2c@3100 {
+ hwmon@48 {
+ compatible = "national,lm92";
+ reg = <0x48>;
+ };
+
+ hwmon@4c {
+ compatible = "adi,adt7461";
+ reg = <0x4c>;
+ };
+
+ eti@6b {
+ compatible = "dallas,ds1682";
+ reg = <0x6b>;
+ };
+ };
+
+ enet0: ethernet@24000 {
+ tbi-handle = <&tbi0>;
+ phy-handle = <&phy0>;
+ phy-connection-type = "gmii";
+ };
+
+ mdio@24520 {
+ phy0: ethernet-phy@0 {
+ interrupt-parent = <&gef_pic>;
+ interrupts = <0x9 0x4>;
+ reg = <1>;
+ };
+ phy2: ethernet-phy@2 {
+ interrupt-parent = <&gef_pic>;
+ interrupts = <0x8 0x4>;
+ reg = <3>;
+ };
+ tbi0: tbi-phy@11 {
+ reg = <0x11>;
+ device_type = "tbi-phy";
+ };
+ };
+
+ enet1: ethernet@26000 {
+ tbi-handle = <&tbi2>;
+ phy-handle = <&phy2>;
+ phy-connection-type = "gmii";
+ };
+
+ mdio@26520 {
+ tbi2: tbi-phy@11 {
+ reg = <0x11>;
+ device_type = "tbi-phy";
+ };
+ };
+
+ enet2: ethernet@25000 {
+ status = "disabled";
+ };
+
+ mdio@25520 {
+ status = "disabled";
+ };
+
+ enet3: ethernet@27000 {
+ status = "disabled";
+ };
+
+ mdio@27520 {
+ status = "disabled";
+ };
+ };
+
+ pci0: pcie@fef08000 {
+ reg = <0xfef08000 0x1000>;
+ ranges = <0x02000000 0x0 0x80000000 0x80000000 0x0 0x40000000
+ 0x01000000 0x0 0x00000000 0xfe000000 0x0 0x00400000>;
+ interrupt-map-mask = <0xff00 0x0 0x0 0x7>;
+ interrupt-map = <
+ 0x0000 0x0 0x0 0x1 &mpic 0x0 0x2
+ 0x0000 0x0 0x0 0x2 &mpic 0x1 0x2
+ 0x0000 0x0 0x0 0x3 &mpic 0x2 0x2
+ 0x0000 0x0 0x0 0x4 &mpic 0x3 0x2
+ >;
+
+ pcie@0 {
+ ranges = <0x02000000 0x0 0x80000000
+ 0x02000000 0x0 0x80000000
+ 0x0 0x40000000
+
+ 0x01000000 0x0 0x00000000
+ 0x01000000 0x0 0x00000000
+ 0x0 0x00400000>;
+ };
+ };
+
+ pci1: pcie@fef09000 {
+ reg = <0xfef09000 0x1000>;
+ ranges = <0x02000000 0x0 0xc0000000 0xc0000000 0x0 0x20000000
+ 0x01000000 0x0 0x00000000 0xfe400000 0x0 0x00400000>;
+
+ pcie@0 {
+ ranges = <0x02000000 0x0 0xc0000000
+ 0x02000000 0x0 0xc0000000
+ 0x0 0x20000000
+
+ 0x01000000 0x0 0x00000000
+ 0x01000000 0x0 0x00000000
+ 0x0 0x00400000>;
+ };
+ };
+};
+
+/include/ "mpc8641si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/gef_sbc610.dts b/arch/powerpc/boot/dts/fsl/gef_sbc610.dts
new file mode 100644
index 000000000000..5322be44b62e
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/gef_sbc610.dts
@@ -0,0 +1,214 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * GE SBC610 Device Tree Source
+ *
+ * Copyright 2008 GE Intelligent Platforms Embedded Systems, Inc.
+ *
+ * Based on: SBS CM6 Device Tree Source
+ * Copyright 2007 SBS Technologies GmbH & Co. KG
+ * And: mpc8641_hpcn.dts (MPC8641 HPCN Device Tree Source)
+ * Copyright 2006 Freescale Semiconductor Inc.
+ */
+
+/*
+ * Compiled with dtc -I dts -O dtb -o gef_sbc610.dtb gef_sbc610.dts
+ */
+
+/include/ "mpc8641si-pre.dtsi"
+
+/ {
+ model = "GEF_SBC610";
+ compatible = "gef,sbc610";
+
+ memory {
+ device_type = "memory";
+ reg = <0x0 0x40000000>; // set by uboot
+ };
+
+ lbc: localbus@fef05000 {
+ reg = <0xfef05000 0x1000>;
+
+ ranges = <0 0 0xff000000 0x01000000 // 16MB Boot flash
+ 1 0 0xe8000000 0x08000000 // Paged Flash 0
+ 2 0 0xe0000000 0x08000000 // Paged Flash 1
+ 3 0 0xfc100000 0x00020000 // NVRAM
+ 4 0 0xfc000000 0x00008000 // FPGA
+ 5 0 0xfc008000 0x00008000 // AFIX FPGA
+ 6 0 0xfd000000 0x00800000 // IO FPGA (8-bit)
+ 7 0 0xfd800000 0x00800000>; // IO FPGA (32-bit)
+
+ /* flash@0,0 is a mirror of part of the memory in flash@1,0
+ flash@0,0 {
+ compatible = "gef,sbc610-firmware-mirror", "cfi-flash";
+ reg = <0x0 0x0 0x1000000>;
+ bank-width = <4>;
+ device-width = <2>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ partition@0 {
+ label = "firmware";
+ reg = <0x0 0x1000000>;
+ read-only;
+ };
+ };
+ */
+
+ flash@1,0 {
+ compatible = "gef,sbc610-paged-flash", "cfi-flash";
+ reg = <0x1 0x0 0x8000000>;
+ bank-width = <4>;
+ device-width = <2>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ partition@0 {
+ label = "user";
+ reg = <0x0 0x7800000>;
+ };
+ partition@7800000 {
+ label = "firmware";
+ reg = <0x7800000 0x800000>;
+ read-only;
+ };
+ };
+
+ nvram@3,0 {
+ device_type = "nvram";
+ compatible = "simtek,stk14ca8";
+ reg = <0x3 0x0 0x20000>;
+ };
+
+ fpga@4,0 {
+ compatible = "gef,fpga-regs";
+ reg = <0x4 0x0 0x40>;
+ };
+
+ wdt@4,2000 {
+ compatible = "gef,fpga-wdt";
+ reg = <0x4 0x2000 0x8>;
+ interrupts = <0x1a 0x4>;
+ interrupt-parent = <&gef_pic>;
+ };
+ /* Second watchdog available, driver currently supports one.
+ wdt@4,2010 {
+ compatible = "gef,fpga-wdt";
+ reg = <0x4 0x2010 0x8>;
+ interrupts = <0x1b 0x4>;
+ interrupt-parent = <&gef_pic>;
+ };
+ */
+ gef_pic: pic@4,4000 {
+ #interrupt-cells = <1>;
+ interrupt-controller;
+ compatible = "gef,fpga-pic";
+ reg = <0x4 0x4000 0x20>;
+ interrupts = <0x8 0x9 0 0>;
+
+ };
+ gef_gpio: gpio@7,14000 {
+ #gpio-cells = <2>;
+ compatible = "gef,sbc610-gpio";
+ reg = <0x7 0x14000 0x24>;
+ gpio-controller;
+ };
+ };
+
+ soc: soc@fef00000 {
+ ranges = <0x0 0xfef00000 0x00100000>;
+
+ i2c@3000 {
+ hwmon@48 {
+ compatible = "national,lm92";
+ reg = <0x48>;
+ };
+
+ hwmon@4c {
+ compatible = "adi,adt7461";
+ reg = <0x4c>;
+ };
+
+ rtc@51 {
+ compatible = "epson,rx8581";
+ reg = <0x00000051>;
+ };
+
+ eti@6b {
+ compatible = "dallas,ds1682";
+ reg = <0x6b>;
+ };
+ };
+
+ enet0: ethernet@24000 {
+ tbi-handle = <&tbi0>;
+ phy-handle = <&phy0>;
+ phy-connection-type = "gmii";
+ };
+
+ mdio@24520 {
+ phy0: ethernet-phy@0 {
+ interrupt-parent = <&gef_pic>;
+ interrupts = <0x9 0x4>;
+ reg = <1>;
+ };
+ phy2: ethernet-phy@2 {
+ interrupt-parent = <&gef_pic>;
+ interrupts = <0x8 0x4>;
+ reg = <3>;
+ };
+ tbi0: tbi-phy@11 {
+ reg = <0x11>;
+ device_type = "tbi-phy";
+ };
+ };
+
+ enet1: ethernet@26000 {
+ tbi-handle = <&tbi2>;
+ phy-handle = <&phy2>;
+ phy-connection-type = "gmii";
+ };
+
+ mdio@26520 {
+ tbi2: tbi-phy@11 {
+ reg = <0x11>;
+ device_type = "tbi-phy";
+ };
+ };
+
+ enet2: ethernet@25000 {
+ status = "disabled";
+ };
+
+ mdio@25520 {
+ status = "disabled";
+ };
+
+ enet3: ethernet@27000 {
+ status = "disabled";
+ };
+
+ mdio@27520 {
+ status = "disabled";
+ };
+ };
+
+ pci0: pcie@fef08000 {
+ reg = <0xfef08000 0x1000>;
+ ranges = <0x02000000 0x0 0x80000000 0x80000000 0x0 0x40000000
+ 0x01000000 0x0 0x00000000 0xfe000000 0x0 0x00400000>;
+
+ pcie@0 {
+ ranges = <0x02000000 0x0 0x80000000
+ 0x02000000 0x0 0x80000000
+ 0x0 0x40000000
+
+ 0x01000000 0x0 0x00000000
+ 0x01000000 0x0 0x00000000
+ 0x0 0x00400000>;
+ };
+ };
+
+ pci1: pcie@fef09000 {
+ status = "disabled";
+ };
+};
+
+/include/ "mpc8641si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/kmcent2.dts b/arch/powerpc/boot/dts/fsl/kmcent2.dts
new file mode 100644
index 000000000000..8e7f0828af29
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/kmcent2.dts
@@ -0,0 +1,339 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Keymile kmcent2 Device Tree Source, based on T1040RDB DTS
+ *
+ * (C) Copyright 2016
+ * Valentin Longchamp, Keymile AG, valentin.longchamp@keymile.com
+ *
+ * Copyright 2014 - 2015 Freescale Semiconductor Inc.
+ */
+
+/include/ "t104xsi-pre.dtsi"
+
+/ {
+ model = "keymile,kmcent2";
+ compatible = "keymile,kmcent2";
+
+ aliases {
+ front_phy = &front_phy;
+ };
+
+ reserved-memory {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ bman_fbpr: bman-fbpr {
+ size = <0 0x1000000>;
+ alignment = <0 0x1000000>;
+ };
+ qman_fqd: qman-fqd {
+ size = <0 0x400000>;
+ alignment = <0 0x400000>;
+ };
+ qman_pfdr: qman-pfdr {
+ size = <0 0x2000000>;
+ alignment = <0 0x2000000>;
+ };
+ };
+
+ ifc: localbus@ffe124000 {
+ reg = <0xf 0xfe124000 0 0x2000>;
+ ranges = <0 0 0xf 0xe8000000 0x04000000
+ 1 0 0xf 0xfa000000 0x00010000
+ 2 0 0xf 0xfb000000 0x00010000
+ 4 0 0xf 0xc0000000 0x08000000
+ 6 0 0xf 0xd0000000 0x08000000
+ 7 0 0xf 0xd8000000 0x08000000>;
+
+ nor@0,0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "cfi-flash";
+ reg = <0x0 0x0 0x04000000>;
+ bank-width = <2>;
+ device-width = <2>;
+ };
+
+ nand@1,0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "fsl,ifc-nand";
+ reg = <0x1 0x0 0x10000>;
+ };
+
+ board-control@2,0 {
+ compatible = "keymile,qriox";
+ reg = <0x2 0x0 0x80>;
+ };
+
+ chassis-mgmt@6,0 {
+ compatible = "keymile,bfticu";
+ reg = <6 0 0x100>;
+ interrupt-controller;
+ interrupt-parent = <&mpic>;
+ interrupts = <11 1 0 0>;
+ #interrupt-cells = <1>;
+ };
+
+ };
+
+ memory {
+ device_type = "memory";
+ };
+
+ dcsr: dcsr@f00000000 {
+ ranges = <0x00000000 0xf 0x00000000 0x01072000>;
+ };
+
+ bportals: bman-portals@ff4000000 {
+ ranges = <0x0 0xf 0xf4000000 0x2000000>;
+ };
+
+ qportals: qman-portals@ff6000000 {
+ ranges = <0x0 0xf 0xf6000000 0x2000000>;
+ };
+
+ soc: soc@ffe000000 {
+ ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
+ reg = <0xf 0xfe000000 0 0x00001000>;
+
+ spi@110000 {
+ network-clock@1 {
+ compatible = "zarlink,zl30364";
+ reg = <1>;
+ spi-max-frequency = <1000000>;
+ };
+ };
+
+ sdhc@114000 {
+ status = "disabled";
+ };
+
+ i2c@118000 {
+ clock-frequency = <100000>;
+
+ mux@70 {
+ compatible = "nxp,pca9547";
+ reg = <0x70>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ i2c-mux-idle-disconnect;
+
+ i2c@0 {
+ reg = <0>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ eeprom@54 {
+ compatible = "atmel,24c02";
+ reg = <0x54>;
+ pagesize = <2>;
+ read-only;
+ label = "ddr3-spd";
+ };
+ };
+
+ i2c@7 {
+ reg = <7>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ temp-sensor@48 {
+ compatible = "national,lm75";
+ reg = <0x48>;
+ label = "SENSOR_0";
+ };
+ temp-sensor@4a {
+ compatible = "national,lm75";
+ reg = <0x4a>;
+ label = "SENSOR_2";
+ };
+ temp-sensor@4b {
+ compatible = "national,lm75";
+ reg = <0x4b>;
+ label = "SENSOR_3";
+ };
+ };
+ };
+ };
+
+ i2c@118100 {
+ clock-frequency = <100000>;
+
+ eeprom@50 {
+ compatible = "atmel,24c08";
+ reg = <0x50>;
+ pagesize = <16>;
+ };
+
+ eeprom@54 {
+ compatible = "atmel,24c08";
+ reg = <0x54>;
+ pagesize = <16>;
+ };
+ };
+
+ i2c@119000 {
+ status = "disabled";
+ };
+
+ i2c@119100 {
+ status = "disabled";
+ };
+
+ serial2: serial@11d500 {
+ status = "disabled";
+ };
+
+ serial3: serial@11d600 {
+ status = "disabled";
+ };
+
+ usb0: usb@210000 {
+ status = "disabled";
+ };
+ usb1: usb@211000 {
+ status = "disabled";
+ };
+
+ display@180000 {
+ status = "disabled";
+ };
+
+ sata@220000 {
+ status = "disabled";
+ };
+ sata@221000 {
+ status = "disabled";
+ };
+
+ fman@400000 {
+ ethernet@e0000 {
+ phy-mode = "sgmii";
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ };
+ };
+
+ ethernet@e2000 {
+ phy-mode = "sgmii";
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ };
+ };
+
+ ethernet@e4000 {
+ status = "disabled";
+ };
+
+ ethernet@e6000 {
+ status = "disabled";
+ };
+
+ ethernet@e8000 {
+ phy-handle = <&front_phy>;
+ phy-mode = "rgmii-id";
+ };
+
+ mdio0: mdio@fc000 {
+ front_phy: ethernet-phy@11 {
+ reg = <0x11>;
+ };
+ };
+ };
+ };
+
+
+ pci0: pcie@ffe240000 {
+ reg = <0xf 0xfe240000 0 0x10000>;
+ ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x20000000
+ 0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>;
+ pcie@0 {
+ ranges = <0x02000000 0 0xe0000000
+ 0x02000000 0 0xe0000000
+ 0 0x20000000
+
+ 0x01000000 0 0x00000000
+ 0x01000000 0 0x00000000
+ 0 0x00010000>;
+ };
+ };
+
+ pci1: pcie@ffe250000 {
+ status = "disabled";
+ reg = <0xf 0xfe250000 0 0x10000>;
+ ranges = <0x02000000 0 0xe0000000 0xc 0x10000000 0 0x10000000
+ 0x01000000 0 0 0xf 0xf8010000 0 0x00010000>;
+ pcie@0 {
+ ranges = <0x02000000 0 0xe0000000
+ 0x02000000 0 0xe0000000
+ 0 0x10000000
+
+ 0x01000000 0 0x00000000
+ 0x01000000 0 0x00000000
+ 0 0x00010000>;
+ };
+ };
+
+ pci2: pcie@ffe260000 {
+ status = "disabled";
+ reg = <0xf 0xfe260000 0 0x10000>;
+ ranges = <0x02000000 0 0xe0000000 0xc 0x20000000 0 0x10000000
+ 0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>;
+ pcie@0 {
+ ranges = <0x02000000 0 0xe0000000
+ 0x02000000 0 0xe0000000
+ 0 0x10000000
+
+ 0x01000000 0 0x00000000
+ 0x01000000 0 0x00000000
+ 0 0x00010000>;
+ };
+ };
+
+ pci3: pcie@ffe270000 {
+ status = "disabled";
+ reg = <0xf 0xfe270000 0 0x10000>;
+ ranges = <0x02000000 0 0xe0000000 0xc 0x30000000 0 0x10000000
+ 0x01000000 0 0x00000000 0xf 0xf8030000 0 0x00010000>;
+ pcie@0 {
+ ranges = <0x02000000 0 0xe0000000
+ 0x02000000 0 0xe0000000
+ 0 0x10000000
+
+ 0x01000000 0 0x00000000
+ 0x01000000 0 0x00000000
+ 0 0x00010000>;
+ };
+ };
+
+ qe: qe@ffe140000 {
+ ranges = <0x0 0xf 0xfe140000 0x40000>;
+ reg = <0xf 0xfe140000 0 0x480>;
+ brg-frequency = <0>;
+ bus-frequency = <0>;
+
+ si1: si@700 {
+ compatible = "fsl,t1040-qe-si";
+ reg = <0x700 0x80>;
+ };
+
+ siram1: siram@1000 {
+ compatible = "fsl,t1040-qe-siram";
+ reg = <0x1000 0x800>;
+ };
+
+ ucc_hdlc: ucc@2000 {
+ device_type = "hdlc";
+ compatible = "fsl,ucc-hdlc";
+ rx-clock-name = "clk9";
+ tx-clock-name = "clk9";
+ fsl,hdlc-bus;
+ };
+ };
+};
+
+#include "t1040si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/kmcoge4.dts b/arch/powerpc/boot/dts/fsl/kmcoge4.dts
index 89b4119f3b19..1c5f942311ee 100644
--- a/arch/powerpc/boot/dts/kmcoge4.dts
+++ b/arch/powerpc/boot/dts/fsl/kmcoge4.dts
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Keymile kmcoge4 Device Tree Source, based on the P2041RDB DTS
*
@@ -5,14 +6,9 @@
* Valentin Longchamp, Keymile AG, valentin.longchamp@keymile.com
*
* Copyright 2011 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
-/include/ "fsl/p2041si-pre.dtsi"
+/include/ "p2041si-pre.dtsi"
/ {
model = "keymile,kmcoge4";
@@ -25,10 +21,37 @@
device_type = "memory";
};
+ reserved-memory {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ bman_fbpr: bman-fbpr {
+ size = <0 0x1000000>;
+ alignment = <0 0x1000000>;
+ };
+ qman_fqd: qman-fqd {
+ size = <0 0x400000>;
+ alignment = <0 0x400000>;
+ };
+ qman_pfdr: qman-pfdr {
+ size = <0 0x2000000>;
+ alignment = <0 0x2000000>;
+ };
+ };
+
dcsr: dcsr@f00000000 {
ranges = <0x00000000 0xf 0x00000000 0x01008000>;
};
+ bportals: bman-portals@ff4000000 {
+ ranges = <0x0 0xf 0xf4000000 0x200000>;
+ };
+
+ qportals: qman-portals@ff4200000 {
+ ranges = <0x0 0xf 0xf4200000 0x200000>;
+ };
+
soc: soc@ffe000000 {
ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
reg = <0xf 0xfe000000 0 0x00001000>;
@@ -36,7 +59,7 @@
flash@0 {
#address-cells = <1>;
#size-cells = <1>;
- compatible = "spansion,s25fl256s1";
+ compatible = "spansion,s25fl256s1", "jedec,spi-nor";
reg = <0>;
spi-max-frequency = <20000000>; /* input clock */
};
@@ -50,12 +73,16 @@
flash@2 {
#address-cells = <1>;
#size-cells = <1>;
- compatible = "micron,m25p32";
+ compatible = "micron,m25p32", "jedec,spi-nor";
reg = <2>;
spi-max-frequency = <15000000>;
};
};
+ sdhc@114000 {
+ status = "disabled";
+ };
+
i2c@119000 {
status = "disabled";
};
@@ -79,6 +106,43 @@
sata@221000 {
status = "disabled";
};
+
+ fman0: fman@400000 {
+ enet0: ethernet@e0000 {
+ phy-connection-type = "sgmii";
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ };
+ };
+ mdio0: mdio@e1120 {
+ front_phy: ethernet-phy@11 {
+ reg = <0x11>;
+ };
+ };
+
+ enet1: ethernet@e2000 {
+ phy-connection-type = "sgmii";
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ };
+ };
+ enet2: ethernet@e4000 {
+ status = "disabled";
+ };
+
+ enet3: ethernet@e6000 {
+ status = "disabled";
+ };
+ enet4: ethernet@e8000 {
+ phy-handle = <&front_phy>;
+ phy-connection-type = "rgmii";
+ };
+ enet5: ethernet@f0000 {
+ status = "disabled";
+ };
+ };
};
rio: rapidio@ffe0c0000 {
@@ -149,4 +213,4 @@
};
};
-/include/ "fsl/p2041si-post.dtsi"
+/include/ "p2041si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/mpc8536ds.dts b/arch/powerpc/boot/dts/fsl/mpc8536ds.dts
index 19736222a0b9..ab6997a0fd1b 100644
--- a/arch/powerpc/boot/dts/mpc8536ds.dts
+++ b/arch/powerpc/boot/dts/fsl/mpc8536ds.dts
@@ -1,15 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* MPC8536 DS Device Tree Source
*
* Copyright 2008, 2011 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
-/include/ "fsl/mpc8536si-pre.dtsi"
+/include/ "mpc8536si-pre.dtsi"
/ {
model = "fsl,mpc8536ds";
@@ -105,5 +101,5 @@
};
};
-/include/ "fsl/mpc8536si-post.dtsi"
+/include/ "mpc8536si-post.dtsi"
/include/ "mpc8536ds.dtsi"
diff --git a/arch/powerpc/boot/dts/mpc8536ds.dtsi b/arch/powerpc/boot/dts/fsl/mpc8536ds.dtsi
index 937ad7e46119..a925fe49a73e 100644
--- a/arch/powerpc/boot/dts/mpc8536ds.dtsi
+++ b/arch/powerpc/boot/dts/fsl/mpc8536ds.dtsi
@@ -142,7 +142,7 @@
flash@0 {
#address-cells = <1>;
#size-cells = <1>;
- compatible = "spansion,s25sl12801";
+ compatible = "spansion,s25sl12801", "jedec,spi-nor";
reg = <0>;
spi-max-frequency = <40000000>;
partition@u-boot {
@@ -166,17 +166,17 @@
};
};
flash@1 {
- compatible = "spansion,s25sl12801";
+ compatible = "spansion,s25sl12801", "jedec,spi-nor";
reg = <1>;
spi-max-frequency = <40000000>;
};
flash@2 {
- compatible = "spansion,s25sl12801";
+ compatible = "spansion,s25sl12801", "jedec,spi-nor";
reg = <2>;
spi-max-frequency = <40000000>;
};
flash@3 {
- compatible = "spansion,s25sl12801";
+ compatible = "spansion,s25sl12801", "jedec,spi-nor";
reg = <3>;
spi-max-frequency = <40000000>;
};
diff --git a/arch/powerpc/boot/dts/mpc8536ds_36b.dts b/arch/powerpc/boot/dts/fsl/mpc8536ds_36b.dts
index 6c723ee108cd..1b799741cd46 100644
--- a/arch/powerpc/boot/dts/mpc8536ds_36b.dts
+++ b/arch/powerpc/boot/dts/fsl/mpc8536ds_36b.dts
@@ -1,15 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* MPC8536DS Device Tree Source (36-bit address map)
*
* Copyright 2008-2009, 2011 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
-/include/ "fsl/mpc8536si-pre.dtsi"
+/include/ "mpc8536si-pre.dtsi"
/ {
model = "fsl,mpc8536ds";
@@ -105,5 +101,5 @@
};
};
-/include/ "fsl/mpc8536si-post.dtsi"
+/include/ "mpc8536si-post.dtsi"
/include/ "mpc8536ds.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/mpc8536si-post.dtsi b/arch/powerpc/boot/dts/fsl/mpc8536si-post.dtsi
index c8b2daa40ac8..fba40a1bccc0 100644
--- a/arch/powerpc/boot/dts/fsl/mpc8536si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/mpc8536si-post.dtsi
@@ -172,7 +172,7 @@
/* mark compat w/8572 to get some erratum treatment */
gpio-controller@f000 {
- compatible = "fsl,mpc8572-gpio", "fsl,pq3-gpio";
+ compatible = "fsl,mpc8572-gpio";
};
sata@18000 {
@@ -199,6 +199,10 @@
/include/ "pq3-dma-0.dtsi"
/include/ "pq3-etsec1-0.dtsi"
+ enet0: ethernet@24000 {
+ fsl,wake-on-filer;
+ fsl,pmc-handle = <&etsec1_clk>;
+ };
/include/ "pq3-etsec1-timer-0.dtsi"
usb@22000 {
@@ -222,9 +226,10 @@
};
/include/ "pq3-etsec1-2.dtsi"
-
- ethernet@26000 {
+ enet2: ethernet@26000 {
cell-index = <1>;
+ fsl,wake-on-filer;
+ fsl,pmc-handle = <&etsec3_clk>;
};
usb@2b000 {
@@ -249,4 +254,9 @@
reg = <0xe0000 0x1000>;
fsl,has-rstcr;
};
+
+/include/ "pq3-power.dtsi"
+ power@e0070 {
+ compatible = "fsl,mpc8536-pmc", "fsl,mpc8548-pmc";
+ };
};
diff --git a/arch/powerpc/boot/dts/mpc8544ds.dts b/arch/powerpc/boot/dts/fsl/mpc8544ds.dts
index ed38874c3a36..f4a8b71396a5 100644
--- a/arch/powerpc/boot/dts/mpc8544ds.dts
+++ b/arch/powerpc/boot/dts/fsl/mpc8544ds.dts
@@ -1,15 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* MPC8544 DS Device Tree Source
*
* Copyright 2007, 2008 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
-/include/ "fsl/mpc8544si-pre.dtsi"
+/include/ "mpc8544si-pre.dtsi"
/ {
model = "MPC8544DS";
@@ -103,5 +99,5 @@
* for interrupt-map & interrupt-map-mask
*/
-/include/ "fsl/mpc8544si-post.dtsi"
+/include/ "mpc8544si-post.dtsi"
/include/ "mpc8544ds.dtsi"
diff --git a/arch/powerpc/boot/dts/mpc8544ds.dtsi b/arch/powerpc/boot/dts/fsl/mpc8544ds.dtsi
index 47d986b041f6..47d986b041f6 100644
--- a/arch/powerpc/boot/dts/mpc8544ds.dtsi
+++ b/arch/powerpc/boot/dts/fsl/mpc8544ds.dtsi
diff --git a/arch/powerpc/boot/dts/fsl/mpc8544si-post.dtsi b/arch/powerpc/boot/dts/fsl/mpc8544si-post.dtsi
index b68eb119faef..ea7416af7ee3 100644
--- a/arch/powerpc/boot/dts/fsl/mpc8544si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/mpc8544si-post.dtsi
@@ -188,4 +188,6 @@
reg = <0xe0000 0x1000>;
fsl,has-rstcr;
};
+
+/include/ "pq3-power.dtsi"
};
diff --git a/arch/powerpc/boot/dts/fsl/mpc8548si-post.dtsi b/arch/powerpc/boot/dts/fsl/mpc8548si-post.dtsi
index 579d76cb8e32..dddb7374508d 100644
--- a/arch/powerpc/boot/dts/fsl/mpc8548si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/mpc8548si-post.dtsi
@@ -156,4 +156,6 @@
reg = <0xe0000 0x1000>;
fsl,has-rstcr;
};
+
+/include/ "pq3-power.dtsi"
};
diff --git a/arch/powerpc/boot/dts/mpc8568mds.dts b/arch/powerpc/boot/dts/fsl/mpc8568mds.dts
index bead2b655b9f..3603b5ae1230 100644
--- a/arch/powerpc/boot/dts/mpc8568mds.dts
+++ b/arch/powerpc/boot/dts/fsl/mpc8568mds.dts
@@ -1,15 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* MPC8568E MDS Device Tree Source
*
* Copyright 2007, 2008 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
-/include/ "fsl/mpc8568si-pre.dtsi"
+/include/ "mpc8568si-pre.dtsi"
/ {
model = "MPC8568EMDS";
@@ -126,7 +122,7 @@
par_io@e0100 {
num-ports = <7>;
- pio1: ucc_pin@01 {
+ pio1: ucc_pin@1 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x4 0xa 0x1 0x0 0x2 0x0 /* TxD0 */
@@ -154,7 +150,7 @@
0x1 0x1f 0x2 0x0 0x3 0x0>; /* GTX125 */
};
- pio2: ucc_pin@02 {
+ pio2: ucc_pin@2 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x5 0xa 0x1 0x0 0x2 0x0 /* TxD0 */
@@ -228,22 +224,22 @@
/* These are the same PHYs as on
* gianfar's MDIO bus */
- qe_phy0: ethernet-phy@07 {
+ qe_phy0: ethernet-phy@7 {
interrupt-parent = <&mpic>;
interrupts = <1 1 0 0>;
reg = <0x7>;
};
- qe_phy1: ethernet-phy@01 {
+ qe_phy1: ethernet-phy@1 {
interrupt-parent = <&mpic>;
interrupts = <2 1 0 0>;
reg = <0x1>;
};
- qe_phy2: ethernet-phy@02 {
+ qe_phy2: ethernet-phy@2 {
interrupt-parent = <&mpic>;
interrupts = <1 1 0 0>;
reg = <0x2>;
};
- qe_phy3: ethernet-phy@03 {
+ qe_phy3: ethernet-phy@3 {
interrupt-parent = <&mpic>;
interrupts = <2 1 0 0>;
reg = <0x3>;
@@ -311,4 +307,4 @@
};
};
-/include/ "fsl/mpc8568si-post.dtsi"
+/include/ "mpc8568si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/mpc8569mds.dts b/arch/powerpc/boot/dts/fsl/mpc8569mds.dts
index d0dcdafa5eb2..206614ea2269 100644
--- a/arch/powerpc/boot/dts/mpc8569mds.dts
+++ b/arch/powerpc/boot/dts/fsl/mpc8569mds.dts
@@ -1,15 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* MPC8569E MDS Device Tree Source
*
* Copyright (C) 2009 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
-/include/ "fsl/mpc8569si-pre.dtsi"
+/include/ "mpc8569si-pre.dtsi"
/ {
model = "MPC8569EMDS";
@@ -55,7 +51,7 @@
label = "kernel";
reg = <0x01c00000 0x002e0000>;
};
- partiton@1ee0000 {
+ partition@1ee0000 {
label = "dtb";
reg = <0x01ee0000 0x00020000>;
};
@@ -141,7 +137,7 @@
gpio-controller;
};
- pio1: ucc_pin@01 {
+ pio1: ucc_pin@1 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x2 0x1f 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */
@@ -161,7 +157,7 @@
0x2 0x14 0x1 0x0 0x2 0x0>; /* ENET1_GTXCLK */
};
- pio2: ucc_pin@02 {
+ pio2: ucc_pin@2 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x2 0x1f 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */
@@ -181,7 +177,7 @@
0x2 0x2 0x1 0x0 0x2 0x0>; /* ENET2_GTXCLK */
};
- pio3: ucc_pin@03 {
+ pio3: ucc_pin@3 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x2 0x1f 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */
@@ -201,7 +197,7 @@
0x2 0x19 0x1 0x0 0x2 0x0>; /* ENET3_GTXCLK */
};
- pio4: ucc_pin@04 {
+ pio4: ucc_pin@4 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x2 0x1f 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */
@@ -232,7 +228,7 @@
mode = "cpu-qe";
serial-flash@0 {
- compatible = "stm,m25p40";
+ compatible = "st,m25p40";
reg = <0>;
spi-max-frequency = <25000000>;
};
@@ -272,30 +268,30 @@
reg = <0x2120 0x18>;
compatible = "fsl,ucc-mdio";
- qe_phy0: ethernet-phy@07 {
+ qe_phy0: ethernet-phy@7 {
interrupt-parent = <&mpic>;
interrupts = <1 1 0 0>;
reg = <0x7>;
};
- qe_phy1: ethernet-phy@01 {
+ qe_phy1: ethernet-phy@1 {
interrupt-parent = <&mpic>;
interrupts = <2 1 0 0>;
reg = <0x1>;
};
- qe_phy2: ethernet-phy@02 {
+ qe_phy2: ethernet-phy@2 {
interrupt-parent = <&mpic>;
interrupts = <3 1 0 0>;
reg = <0x2>;
};
- qe_phy3: ethernet-phy@03 {
+ qe_phy3: ethernet-phy@3 {
interrupt-parent = <&mpic>;
interrupts = <4 1 0 0>;
reg = <0x3>;
};
- qe_phy5: ethernet-phy@04 {
+ qe_phy5: ethernet-phy@4 {
reg = <0x04>;
};
- qe_phy7: ethernet-phy@06 {
+ qe_phy7: ethernet-phy@6 {
reg = <0x6>;
};
tbi1: tbi-phy@11 {
@@ -444,4 +440,4 @@
};
};
-/include/ "fsl/mpc8569si-post.dtsi"
+/include/ "mpc8569si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/mpc8572ds.dts b/arch/powerpc/boot/dts/fsl/mpc8572ds.dts
index 0c9f2955deb4..679d53c4a946 100644
--- a/arch/powerpc/boot/dts/mpc8572ds.dts
+++ b/arch/powerpc/boot/dts/fsl/mpc8572ds.dts
@@ -1,15 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* MPC8572 DS Device Tree Source
*
* Copyright 2007-2009 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
-/include/ "fsl/mpc8572si-pre.dtsi"
+/include/ "mpc8572si-pre.dtsi"
/ {
model = "fsl,MPC8572DS";
@@ -86,5 +82,5 @@
* for interrupt-map & interrupt-map-mask
*/
-/include/ "fsl/mpc8572si-post.dtsi"
+/include/ "mpc8572si-post.dtsi"
/include/ "mpc8572ds.dtsi"
diff --git a/arch/powerpc/boot/dts/mpc8572ds.dtsi b/arch/powerpc/boot/dts/fsl/mpc8572ds.dtsi
index 357490bb84da..357490bb84da 100644
--- a/arch/powerpc/boot/dts/mpc8572ds.dtsi
+++ b/arch/powerpc/boot/dts/fsl/mpc8572ds.dtsi
diff --git a/arch/powerpc/boot/dts/mpc8572ds_36b.dts b/arch/powerpc/boot/dts/fsl/mpc8572ds_36b.dts
index 6c3d0b305e1b..f2abce2bb201 100644
--- a/arch/powerpc/boot/dts/mpc8572ds_36b.dts
+++ b/arch/powerpc/boot/dts/fsl/mpc8572ds_36b.dts
@@ -1,15 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* MPC8572DS Device Tree Source (36-bit address map)
*
* Copyright 2007-2009 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
-/include/ "fsl/mpc8572si-pre.dtsi"
+/include/ "mpc8572si-pre.dtsi"
/ {
model = "fsl,MPC8572DS";
@@ -86,5 +82,5 @@
* for interrupt-map & interrupt-map-mask
*/
-/include/ "fsl/mpc8572si-post.dtsi"
+/include/ "mpc8572si-post.dtsi"
/include/ "mpc8572ds.dtsi"
diff --git a/arch/powerpc/boot/dts/mpc8572ds_camp_core0.dts b/arch/powerpc/boot/dts/fsl/mpc8572ds_camp_core0.dts
index ef9ef56b3eeb..d1a4993caf55 100644
--- a/arch/powerpc/boot/dts/mpc8572ds_camp_core0.dts
+++ b/arch/powerpc/boot/dts/fsl/mpc8572ds_camp_core0.dts
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* MPC8572 DS Core0 Device Tree Source in CAMP mode.
*
@@ -7,11 +8,6 @@
* eth1, crypto, pci0, pci1.
*
* Copyright 2007-2009 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/include/ "mpc8572ds.dts"
diff --git a/arch/powerpc/boot/dts/mpc8572ds_camp_core1.dts b/arch/powerpc/boot/dts/fsl/mpc8572ds_camp_core1.dts
index 24564ee108e5..63e8243ff349 100644
--- a/arch/powerpc/boot/dts/mpc8572ds_camp_core1.dts
+++ b/arch/powerpc/boot/dts/fsl/mpc8572ds_camp_core1.dts
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* MPC8572 DS Core1 Device Tree Source in CAMP mode.
*
@@ -8,11 +9,6 @@
* Please note to add "-b 1" for core1's dts compiling.
*
* Copyright 2007-2009 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/include/ "mpc8572ds.dts"
diff --git a/arch/powerpc/boot/dts/fsl/mpc8572si-post.dtsi b/arch/powerpc/boot/dts/fsl/mpc8572si-post.dtsi
index d44e25a48734..40a6cff77032 100644
--- a/arch/powerpc/boot/dts/fsl/mpc8572si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/mpc8572si-post.dtsi
@@ -162,7 +162,7 @@
/include/ "pq3-dma-1.dtsi"
/include/ "pq3-gpio-0.dtsi"
gpio-controller@f000 {
- compatible = "fsl,mpc8572-gpio", "fsl,pq3-gpio";
+ compatible = "fsl,mpc8572-gpio";
};
L2: l2-cache-controller@20000 {
@@ -193,4 +193,6 @@
reg = <0xe0000 0x1000>;
fsl,has-rstcr;
};
+
+/include/ "pq3-power.dtsi"
};
diff --git a/arch/powerpc/boot/dts/fsl/mpc8641si-post.dtsi b/arch/powerpc/boot/dts/fsl/mpc8641si-post.dtsi
new file mode 100644
index 000000000000..77900b924151
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/mpc8641si-post.dtsi
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC8641 Silicon/SoC Device Tree Source (post include)
+ *
+ * Copyright 2016 Elettra-Sincrotrone Trieste S.C.p.A.
+ */
+
+&lbc {
+ #address-cells = <2>;
+ #size-cells = <1>;
+ compatible = "fsl,mpc8641-localbus", "simple-bus";
+ interrupts = <19 2 0 0>;
+};
+
+&soc {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ device_type = "soc";
+ compatible = "fsl,mpc8641-soc", "simple-bus";
+ bus-frequency = <0>;
+
+ mcm-law@0 {
+ compatible = "fsl,mcm-law";
+ reg = <0x0 0x1000>;
+ fsl,num-laws = <10>;
+ };
+
+ mcm@1000 {
+ compatible = "fsl,mpc8641-mcm", "fsl,mcm";
+ reg = <0x1000 0x1000>;
+ interrupts = <17 2 0 0>;
+ };
+
+/include/ "pq3-i2c-0.dtsi"
+/include/ "pq3-i2c-1.dtsi"
+/include/ "pq3-duart-0.dtsi"
+ serial@4600 {
+ interrupts = <28 2 0 0>;
+ };
+/include/ "pq3-dma-0.dtsi"
+ dma@21300 {
+ compatible = "fsl,mpc8641-dma", "fsl,eloplus-dma";
+ };
+ dma-channel@0 {
+ compatible = "fsl,mpc8641-dma-channel", "fsl,eloplus-dma-channel";
+ };
+ dma-channel@80 {
+ compatible = "fsl,mpc8641-dma-channel", "fsl,eloplus-dma-channel";
+ };
+ dma-channel@100 {
+ compatible = "fsl,mpc8641-dma-channel", "fsl,eloplus-dma-channel";
+ };
+ dma-channel@180 {
+ compatible = "fsl,mpc8641-dma-channel", "fsl,eloplus-dma-channel";
+ };
+
+/include/ "pq3-etsec1-0.dtsi"
+ ethernet@24000 {
+ model = "TSEC";
+ };
+/include/ "pq3-etsec1-1.dtsi"
+ ethernet@25000 {
+ model = "TSEC";
+ };
+/include/ "pq3-etsec1-2.dtsi"
+ ethernet@26000 {
+ model = "TSEC";
+ };
+/include/ "pq3-etsec1-3.dtsi"
+ ethernet@27000 {
+ model = "TSEC";
+ };
+
+/include/ "qoriq-mpic.dtsi"
+ msi@41600 {
+ compatible = "fsl,mpc8641-msi", "fsl,mpic-msi";
+ };
+ msi@41800 {
+ compatible = "fsl,mpc8641-msi", "fsl,mpic-msi";
+ };
+ msi@41a00 {
+ compatible = "fsl,mpc8641-msi", "fsl,mpic-msi";
+ };
+
+ global-utilities@e0000 {
+ compatible = "fsl,mpc8641-guts";
+ reg = <0xe0000 0x1000>;
+ fsl,has-rstcr;
+ };
+};
+
+&pci0 {
+ compatible = "fsl,mpc8641-pcie";
+ device_type = "pci";
+ #interrupt-cells = <1>;
+ #size-cells = <2>;
+ #address-cells = <3>;
+ bus-range = <0x0 0xff>;
+ clock-frequency = <100000000>;
+ interrupts = <24 2 0 0>;
+
+ pcie@0 {
+ reg = <0 0 0 0 0>;
+ #interrupt-cells = <1>;
+ #size-cells = <2>;
+ #address-cells = <3>;
+ device_type = "pci";
+ interrupts = <24 2 0 0>;
+ interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+ interrupt-map = <
+ 0x0000 0x0 0x0 0x1 &mpic 0x0 0x1 0x0 0x0
+ 0x0000 0x0 0x0 0x2 &mpic 0x1 0x1 0x0 0x0
+ 0x0000 0x0 0x0 0x3 &mpic 0x2 0x1 0x0 0x0
+ 0x0000 0x0 0x0 0x4 &mpic 0x3 0x1 0x0 0x0
+ >;
+ };
+};
+
+&pci1 {
+ compatible = "fsl,mpc8641-pcie";
+ device_type = "pci";
+ #interrupt-cells = <1>;
+ #size-cells = <2>;
+ #address-cells = <3>;
+ bus-range = <0x0 0xff>;
+ clock-frequency = <100000000>;
+ interrupts = <25 2 0 0>;
+
+ pcie@0 {
+ reg = <0 0 0 0 0>;
+ #interrupt-cells = <1>;
+ #size-cells = <2>;
+ #address-cells = <3>;
+ device_type = "pci";
+ interrupts = <25 2 0 0>;
+ interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+ interrupt-map = <
+ 0x0000 0x0 0x0 0x1 &mpic 0x4 0x1 0x0 0x0
+ 0x0000 0x0 0x0 0x2 &mpic 0x5 0x1 0x0 0x0
+ 0x0000 0x0 0x0 0x3 &mpic 0x6 0x1 0x0 0x0
+ 0x0000 0x0 0x0 0x4 &mpic 0x7 0x1 0x0 0x0
+ >;
+ };
+};
diff --git a/arch/powerpc/boot/dts/fsl/mpc8641si-pre.dtsi b/arch/powerpc/boot/dts/fsl/mpc8641si-pre.dtsi
new file mode 100644
index 000000000000..a9f7e79d3364
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/mpc8641si-pre.dtsi
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC8641 Silicon/SoC Device Tree Source (pre include)
+ *
+ * Copyright 2016 Elettra-Sincrotrone Trieste S.C.p.A.
+ */
+
+/dts-v1/;
+
+/ {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ interrupt-parent = <&mpic>;
+
+ aliases {
+ ethernet0 = &enet0;
+ ethernet1 = &enet1;
+ ethernet2 = &enet2;
+ ethernet3 = &enet3;
+ serial0 = &serial0;
+ serial1 = &serial1;
+ pci0 = &pci0;
+ pci1 = &pci1;
+ };
+
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ PowerPC,8641@0 {
+ device_type = "cpu";
+ reg = <0>;
+ d-cache-line-size = <32>;
+ i-cache-line-size = <32>;
+ d-cache-size = <32768>;
+ i-cache-size = <32768>;
+ timebase-frequency = <0>;
+ bus-frequency = <0>;
+ clock-frequency = <0>;
+ };
+
+ PowerPC,8641@1 {
+ device_type = "cpu";
+ reg = <1>;
+ d-cache-line-size = <32>;
+ i-cache-line-size = <32>;
+ d-cache-size = <32768>;
+ i-cache-size = <32768>;
+ timebase-frequency = <0>;
+ bus-frequency = <0>;
+ clock-frequency = <0>;
+ };
+ };
+};
diff --git a/arch/powerpc/boot/dts/fsl/mvme2500.dts b/arch/powerpc/boot/dts/fsl/mvme2500.dts
new file mode 100644
index 000000000000..e0f048a03956
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/mvme2500.dts
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Device tree source for the Emerson/Artesyn MVME2500
+ *
+ * Copyright 2014 Elettra-Sincrotrone Trieste S.C.p.A.
+ *
+ * Based on: P2020 DS Device Tree Source
+ * Copyright 2009 Freescale Semiconductor Inc.
+ */
+
+/include/ "p2020si-pre.dtsi"
+
+/ {
+ model = "MVME2500";
+ compatible = "artesyn,MVME2500";
+
+ aliases {
+ serial2 = &serial2;
+ serial3 = &serial3;
+ serial4 = &serial4;
+ serial5 = &serial5;
+ };
+
+ memory {
+ device_type = "memory";
+ };
+
+ soc: soc@ffe00000 {
+ ranges = <0x0 0 0xffe00000 0x100000>;
+
+ i2c@3000 {
+ hwmon@4c {
+ compatible = "adi,adt7461";
+ reg = <0x4c>;
+ };
+
+ rtc@68 {
+ compatible = "dallas,ds1337";
+ reg = <0x68>;
+ interrupts = <8 1 0 0>;
+ };
+
+ eeprom@54 {
+ compatible = "atmel,24c64";
+ reg = <0x54>;
+ };
+
+ eeprom@52 {
+ compatible = "atmel,24c512";
+ reg = <0x52>;
+ };
+
+ eeprom@53 {
+ compatible = "atmel,24c512";
+ reg = <0x53>;
+ };
+
+ eeprom@50 {
+ compatible = "atmel,24c02";
+ reg = <0x50>;
+ };
+
+ };
+
+ spi0: spi@7000 {
+ fsl,espi-num-chipselects = <2>;
+
+ flash@0 {
+ compatible = "atmel,at25df641", "jedec,spi-nor";
+ reg = <0>;
+ spi-max-frequency = <10000000>;
+ };
+ flash@1 {
+ compatible = "atmel,at25df641", "jedec,spi-nor";
+ reg = <1>;
+ spi-max-frequency = <10000000>;
+ };
+ };
+
+ usb@22000 {
+ dr_mode = "host";
+ phy_type = "ulpi";
+ };
+
+ enet0: ethernet@24000 {
+ tbi-handle = <&tbi0>;
+ phy-handle = <&phy1>;
+ phy-connection-type = "rgmii-id";
+ };
+
+ mdio@24520 {
+ phy1: ethernet-phy@1 {
+ compatible = "brcm,bcm54616S";
+ interrupts = <6 1 0 0>;
+ reg = <0x1>;
+ };
+
+ phy2: ethernet-phy@2 {
+ compatible = "brcm,bcm54616S";
+ interrupts = <6 1 0 0>;
+ reg = <0x2>;
+ };
+
+ phy3: ethernet-phy@3 {
+ compatible = "brcm,bcm54616S";
+ interrupts = <5 1 0 0>;
+ reg = <0x3>;
+ };
+
+ phy7: ethernet-phy@7 {
+ compatible = "brcm,bcm54616S";
+ interrupts = <7 1 0 0>;
+ reg = <0x7>;
+ };
+
+ tbi0: tbi-phy@11 {
+ reg = <0x11>;
+ device_type = "tbi-phy";
+ };
+ };
+
+ enet1: ethernet@25000 {
+ tbi-handle = <&tbi1>;
+ phy-handle = <&phy7>;
+ phy-connection-type = "rgmii-id";
+ };
+
+ mdio@25520 {
+ tbi1: tbi-phy@11 {
+ reg = <0x11>;
+ device_type = "tbi-phy";
+ };
+ };
+
+ enet2: ethernet@26000 {
+ tbi-handle = <&tbi2>;
+ phy-handle = <&phy3>;
+ phy-connection-type = "rgmii-id";
+ };
+
+ mdio@26520 {
+ tbi2: tbi-phy@11 {
+ reg = <0x11>;
+ device_type = "tbi-phy";
+ };
+ };
+ };
+
+ lbc: localbus@ffe05000 {
+ reg = <0 0xffe05000 0 0x1000>;
+
+ ranges = <0x0 0x0 0x0 0xfff00000 0x00080000
+ 0x1 0x0 0x0 0xffc40000 0x00010000
+ 0x2 0x0 0x0 0xffc50000 0x00010000
+ 0x3 0x0 0x0 0xffc60000 0x00010000
+ 0x4 0x0 0x0 0xffc70000 0x00010000
+ 0x6 0x0 0x0 0xffc80000 0x00010000
+ 0x5 0x0 0x0 0xffdf0000 0x00008000>;
+
+ serial2: serial@1,0 {
+ device_type = "serial";
+ compatible = "ns16550";
+ reg = <0x1 0x0 0x100>;
+ clock-frequency = <1843200>;
+ interrupts = <11 2 0 0>;
+ };
+
+ serial3: serial@2,0 {
+ device_type = "serial";
+ compatible = "ns16550";
+ reg = <0x2 0x0 0x100>;
+ clock-frequency = <1843200>;
+ interrupts = <1 2 0 0>;
+ };
+
+ serial4: serial@3,0 {
+ device_type = "serial";
+ compatible = "ns16550";
+ reg = <0x3 0x0 0x100>;
+ clock-frequency = <1843200>;
+ interrupts = <2 2 0 0>;
+ };
+
+ serial5: serial@4,0 {
+ device_type = "serial";
+ compatible = "ns16550";
+ reg = <0x4 0x0 0x100>;
+ clock-frequency = <1843200>;
+ interrupts = <3 2 0 0>;
+ };
+
+ mram@0,0 {
+ compatible = "everspin,mram", "mtd-ram";
+ reg = <0x0 0x0 0x80000>;
+ bank-width = <2>;
+ };
+
+ board-control@5,0 {
+ compatible = "artesyn,mvme2500-fpga";
+ reg = <0x5 0x0 0x01000>;
+ };
+
+ cpld@6,0 {
+ compatible = "artesyn,mvme2500-cpld";
+ reg = <0x6 0x0 0x10000>;
+ interrupts = <9 1 0 0>;
+ };
+ };
+
+ pci0: pcie@ffe08000 {
+ reg = <0 0xffe08000 0 0x1000>;
+ ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000
+ 0x1000000 0x0 0x00000000 0 0xffc00000 0x0 0x10000>;
+ pcie@0 {
+ ranges = <0x2000000 0x0 0x80000000
+ 0x2000000 0x0 0x80000000
+ 0x0 0x20000000
+
+ 0x1000000 0x0 0x0
+ 0x1000000 0x0 0x0
+ 0x0 0x10000>;
+ };
+ };
+
+ pci1: pcie@ffe09000 {
+ reg = <0 0xffe09000 0 0x1000>;
+ ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000
+ 0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x10000>;
+ pcie@0 {
+ ranges = <0x2000000 0x0 0xa0000000
+ 0x2000000 0x0 0xa0000000
+ 0x0 0x20000000
+
+ 0x1000000 0x0 0x0
+ 0x1000000 0x0 0x0
+ 0x0 0x10000>;
+ };
+
+ };
+
+ pci2: pcie@ffe0a000 {
+ reg = <0 0xffe0a000 0 0x1000>;
+ ranges = <0x2000000 0x0 0xc0000000 0 0xc0000000 0x0 0x20000000
+ 0x1000000 0x0 0x00000000 0 0xffc20000 0x0 0x10000>;
+ pcie@0 {
+ ranges = <0x2000000 0x0 0xc0000000
+ 0x2000000 0x0 0xc0000000
+ 0x0 0x20000000
+
+ 0x1000000 0x0 0x0
+ 0x1000000 0x0 0x0
+ 0x0 0x10000>;
+ };
+ };
+};
+
+/include/ "p2020si-post.dtsi"
+
+/ {
+ soc@ffe00000 {
+ serial@4600 {
+ status = "disabled";
+ };
+
+ i2c@3100 {
+ status = "disabled";
+ };
+
+ sdhc@2e000 {
+ compatible = "fsl,p2020-esdhc", "fsl,esdhc";
+ non-removable;
+ };
+
+ };
+
+};
diff --git a/arch/powerpc/boot/dts/fsl/mvme7100.dts b/arch/powerpc/boot/dts/fsl/mvme7100.dts
new file mode 100644
index 000000000000..bcc9dedd630f
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/mvme7100.dts
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Device tree source for the Emerson/Artesyn MVME7100
+ *
+ * Copyright 2016 Elettra-Sincrotrone Trieste S.C.p.A.
+ *
+ * Author: Alessio Igor Bogani <alessio.bogani@elettra.eu>
+ */
+
+/include/ "mpc8641si-pre.dtsi"
+
+/ {
+ model = "MVME7100";
+ compatible = "artesyn,MVME7100";
+
+ memory {
+ device_type = "memory";
+ reg = <0x00000000 0x80000000>;
+ };
+
+ soc: soc@f1000000 {
+ ranges = <0x00000000 0xf1000000 0x00100000>;
+
+ i2c@3000 {
+ hwmon@4c {
+ compatible = "dallas,max6649";
+ reg = <0x4c>;
+ };
+
+ rtc@68 {
+ status = "disabled";
+ };
+ };
+
+
+ enet0: ethernet@24000 {
+ phy-handle = <&phy0>;
+ phy-connection-type = "rgmii-id";
+ };
+
+ mdio@24520 {
+ phy0: ethernet-phy@1 {
+ reg = <1>;
+ };
+ phy1: ethernet-phy@2 {
+ reg = <2>;
+ };
+ phy2: ethernet-phy@3 {
+ reg = <3>;
+ };
+ phy3: ethernet-phy@4 {
+ reg = <4>;
+ };
+ };
+
+ enet1: ethernet@25000 {
+ phy-handle = <&phy1>;
+ phy-connection-type = "rgmii-id";
+ };
+
+ mdio@25520 {
+ status = "disabled";
+ };
+
+ enet2: ethernet@26000 {
+ phy-handle = <&phy2>;
+ phy-connection-type = "rgmii-id";
+ };
+
+ mdio@26520 {
+ status = "disabled";
+ };
+
+ enet3: ethernet@27000 {
+ phy-handle = <&phy3>;
+ phy-connection-type = "rgmii-id";
+ };
+
+ mdio@27520 {
+ status = "disabled";
+ };
+
+ serial1: serial@4600 {
+ status = "disabled";
+ };
+ };
+
+ lbc: localbus@f1005000 {
+ reg = <0xf1005000 0x1000>;
+
+ ranges = <0 0 0xf8000000 0x08000000 // NOR Flash (128MB)
+ 2 0 0xf2030000 0x00010000 // NAND Flash (8GB)
+ 3 0 0xf2400000 0x00080000 // MRAM (512KB)
+ 4 0 0xf2000000 0x00010000 // BCSR
+ 5 0 0xf2010000 0x00010000>; // QUART
+
+ bcsr@4,0 {
+ compatible = "artesyn,mvme7100-bcsr";
+ reg = <4 0 0x10000>;
+ };
+
+ serial@5,1000 {
+ device_type = "serial";
+ compatible = "ns16550";
+ reg = <5 0x1000 0x100>;
+ clock-frequency = <1843200>;
+ interrupts = <11 1 0 0>;
+ };
+
+ serial@5,2000 {
+ device_type = "serial";
+ compatible = "ns16550";
+ reg = <5 0x2000 0x100>;
+ clock-frequency = <1843200>;
+ interrupts = <11 1 0 0>;
+ };
+
+ serial@5,3000 {
+ device_type = "serial";
+ compatible = "ns16550";
+ reg = <5 0x3000 0x100>;
+ clock-frequency = <1843200>;
+ interrupts = <11 1 0 0>;
+ };
+
+ serial@5,4000 {
+ device_type = "serial";
+ compatible = "ns16550";
+ reg = <5 0x4000 0x100>;
+ clock-frequency = <1843200>;
+ interrupts = <11 1 0 0>;
+ };
+ };
+
+ pci0: pcie@f1008000 {
+ status = "disabled";
+ };
+
+ pci1: pcie@f1009000 {
+ status = "disabled";
+ };
+
+ chosen {
+ stdout-path = &serial0;
+ };
+};
+
+/include/ "mpc8641si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/oca4080.dts b/arch/powerpc/boot/dts/fsl/oca4080.dts
index 3d4c751d1608..17bc6f391248 100644
--- a/arch/powerpc/boot/dts/oca4080.dts
+++ b/arch/powerpc/boot/dts/fsl/oca4080.dts
@@ -36,7 +36,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/include/ "fsl/p4080si-pre.dtsi"
+/include/ "p4080si-pre.dtsi"
/ {
model = "fsl,OCA4080";
@@ -49,10 +49,37 @@
device_type = "memory";
};
+ reserved-memory {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ bman_fbpr: bman-fbpr {
+ size = <0 0x1000000>;
+ alignment = <0 0x1000000>;
+ };
+ qman_fqd: qman-fqd {
+ size = <0 0x400000>;
+ alignment = <0 0x400000>;
+ };
+ qman_pfdr: qman-pfdr {
+ size = <0 0x2000000>;
+ alignment = <0 0x2000000>;
+ };
+ };
+
dcsr: dcsr@f00000000 {
ranges = <0x00000000 0xf 0x00000000 0x01008000>;
};
+ bportals: bman-portals@ff4000000 {
+ ranges = <0x0 0xf 0xf4000000 0x200000>;
+ };
+
+ qportals: qman-portals@ff4200000 {
+ ranges = <0x0 0xf 0xf4200000 0x200000>;
+ };
+
soc: soc@ffe000000 {
ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
reg = <0xf 0xfe000000 0 0x00001000>;
@@ -115,4 +142,4 @@
};
};
-/include/ "fsl/p4080si-post.dtsi"
+/include/ "p4080si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p1010rdb-pa.dts b/arch/powerpc/boot/dts/fsl/p1010rdb-pa.dts
new file mode 100644
index 000000000000..1e33d78d8c0b
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1010rdb-pa.dts
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * P1010 RDB Device Tree Source
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ */
+
+/include/ "p1010si-pre.dtsi"
+
+/ {
+ model = "fsl,P1010RDB";
+ compatible = "fsl,P1010RDB";
+
+ /include/ "p1010rdb_32b.dtsi"
+};
+
+/include/ "p1010rdb.dtsi"
+/include/ "p1010rdb-pa.dtsi"
+/include/ "p1010si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/p1010rdb-pa.dtsi b/arch/powerpc/boot/dts/fsl/p1010rdb-pa.dtsi
index 434fb2d58575..434fb2d58575 100644
--- a/arch/powerpc/boot/dts/p1010rdb-pa.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p1010rdb-pa.dtsi
diff --git a/arch/powerpc/boot/dts/p1010rdb-pa_36b.dts b/arch/powerpc/boot/dts/fsl/p1010rdb-pa_36b.dts
index 3033371bc007..03bd76ca8406 100644
--- a/arch/powerpc/boot/dts/p1010rdb-pa_36b.dts
+++ b/arch/powerpc/boot/dts/fsl/p1010rdb-pa_36b.dts
@@ -32,7 +32,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/include/ "fsl/p1010si-pre.dtsi"
+/include/ "p1010si-pre.dtsi"
/ {
model = "fsl,P1010RDB";
@@ -43,4 +43,4 @@
/include/ "p1010rdb.dtsi"
/include/ "p1010rdb-pa.dtsi"
-/include/ "fsl/p1010si-post.dtsi"
+/include/ "p1010si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p1010rdb-pb.dts b/arch/powerpc/boot/dts/fsl/p1010rdb-pb.dts
new file mode 100644
index 000000000000..ce3346d77858
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p1010rdb-pb.dts
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * P1010 RDB Device Tree Source
+ *
+ * Copyright 2011 Freescale Semiconductor Inc.
+ */
+
+/include/ "p1010si-pre.dtsi"
+
+/ {
+ model = "fsl,P1010RDB-PB";
+ compatible = "fsl,P1010RDB-PB";
+
+ /include/ "p1010rdb_32b.dtsi"
+};
+
+/include/ "p1010rdb.dtsi"
+
+&phy0 {
+ interrupts = <0 1 0 0>;
+};
+
+&phy1 {
+ interrupts = <2 1 0 0>;
+};
+
+&phy2 {
+ interrupts = <1 1 0 0>;
+};
+
+/include/ "p1010si-post.dtsi"
+
+&pci0 {
+ pcie@0 {
+ interrupt-map = <
+ /* IDSEL 0x0 */
+ /*
+ *irq[4:5] are active-high
+ *irq[6:7] are active-low
+ */
+ 0000 0x0 0x0 0x1 &mpic 0x4 0x2 0x0 0x0
+ 0000 0x0 0x0 0x2 &mpic 0x5 0x2 0x0 0x0
+ 0000 0x0 0x0 0x3 &mpic 0x6 0x1 0x0 0x0
+ 0000 0x0 0x0 0x4 &mpic 0x7 0x1 0x0 0x0
+ >;
+ };
+};
diff --git a/arch/powerpc/boot/dts/p1010rdb-pb_36b.dts b/arch/powerpc/boot/dts/fsl/p1010rdb-pb_36b.dts
index 7ab3c907b326..83590354f9a0 100644
--- a/arch/powerpc/boot/dts/p1010rdb-pb_36b.dts
+++ b/arch/powerpc/boot/dts/fsl/p1010rdb-pb_36b.dts
@@ -32,7 +32,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/include/ "fsl/p1010si-pre.dtsi"
+/include/ "p1010si-pre.dtsi"
/ {
model = "fsl,P1010RDB-PB";
@@ -55,4 +55,20 @@
interrupts = <1 1 0 0>;
};
-/include/ "fsl/p1010si-post.dtsi"
+/include/ "p1010si-post.dtsi"
+
+&pci0 {
+ pcie@0 {
+ interrupt-map = <
+ /* IDSEL 0x0 */
+ /*
+ *irq[4:5] are active-high
+ *irq[6:7] are active-low
+ */
+ 0000 0x0 0x0 0x1 &mpic 0x4 0x2 0x0 0x0
+ 0000 0x0 0x0 0x2 &mpic 0x5 0x2 0x0 0x0
+ 0000 0x0 0x0 0x3 &mpic 0x6 0x1 0x0 0x0
+ 0000 0x0 0x0 0x4 &mpic 0x7 0x1 0x0 0x0
+ >;
+ };
+};
diff --git a/arch/powerpc/boot/dts/p1010rdb.dtsi b/arch/powerpc/boot/dts/fsl/p1010rdb.dtsi
index ea534efa790d..ef49a7d6c69d 100644
--- a/arch/powerpc/boot/dts/p1010rdb.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p1010rdb.dtsi
@@ -89,7 +89,7 @@
&board_soc {
i2c@3000 {
eeprom@50 {
- compatible = "st,24c256";
+ compatible = "st,24c256", "atmel,24c256";
reg = <0x50>;
};
@@ -110,7 +110,7 @@
flash@0 {
#address-cells = <1>;
#size-cells = <1>;
- compatible = "spansion,s25sl12801";
+ compatible = "spansion,s25sl12801", "jedec,spi-nor";
reg = <0>;
spi-max-frequency = <40000000>;
@@ -186,6 +186,18 @@
};
};
+ ptp_clock@b0e00 {
+ compatible = "fsl,etsec-ptp";
+ reg = <0xb0e00 0xb0>;
+ interrupts = <68 2 0 0 69 2 0 0>;
+ fsl,tclk-period = <10>;
+ fsl,tmr-prsc = <2>;
+ fsl,tmr-add = <0x80000016>;
+ fsl,tmr-fiper1 = <999999990>;
+ fsl,tmr-fiper2 = <99990>;
+ fsl,max-adj = <199999999>;
+ };
+
enet0: ethernet@b0000 {
phy-handle = <&phy0>;
phy-connection-type = "rgmii-id";
diff --git a/arch/powerpc/boot/dts/p1010rdb_32b.dtsi b/arch/powerpc/boot/dts/fsl/p1010rdb_32b.dtsi
index fdc19aab2f70..583a6cd05079 100644
--- a/arch/powerpc/boot/dts/p1010rdb_32b.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p1010rdb_32b.dtsi
@@ -36,7 +36,7 @@ memory {
device_type = "memory";
};
-board_ifc: ifc: ifc@ffe1e000 {
+board_ifc: ifc: memory-controller@ffe1e000 {
/* NOR, NAND Flashes and CPLD on board */
ranges = <0x0 0x0 0x0 0xee000000 0x02000000
0x1 0x0 0x0 0xff800000 0x00010000
diff --git a/arch/powerpc/boot/dts/p1010rdb_36b.dtsi b/arch/powerpc/boot/dts/fsl/p1010rdb_36b.dtsi
index de2fceed4f79..4d41efe0038f 100644
--- a/arch/powerpc/boot/dts/p1010rdb_36b.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p1010rdb_36b.dtsi
@@ -36,7 +36,7 @@ memory {
device_type = "memory";
};
-board_ifc: ifc: ifc@fffe1e000 {
+board_ifc: ifc: memory-controller@fffe1e000 {
/* NOR, NAND Flashes and CPLD on board */
ranges = <0x0 0x0 0xf 0xee000000 0x02000000
0x1 0x0 0xf 0xff800000 0x00010000
diff --git a/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi b/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi
index af12ead88c5f..2d2550729dcc 100644
--- a/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p1010si-post.dtsi
@@ -35,7 +35,7 @@
&ifc {
#address-cells = <2>;
#size-cells = <1>;
- compatible = "fsl,ifc", "simple-bus";
+ compatible = "fsl,ifc";
interrupts = <16 2 0 0 19 2 0 0>;
};
@@ -122,7 +122,15 @@
};
/include/ "pq3-i2c-0.dtsi"
+ i2c@3000 {
+ fsl,i2c-erratum-a004447;
+ };
+
/include/ "pq3-i2c-1.dtsi"
+ i2c@3100 {
+ fsl,i2c-erratum-a004447;
+ };
+
/include/ "pq3-duart-0.dtsi"
/include/ "pq3-espi-0.dtsi"
spi0: spi@7000 {
@@ -137,12 +145,14 @@
compatible = "fsl,p1010-flexcan";
reg = <0x1c000 0x1000>;
interrupts = <48 0x2 0 0>;
+ big-endian;
};
can1: can@1d000 {
compatible = "fsl,p1010-flexcan";
reg = <0x1d000 0x1000>;
interrupts = <61 0x2 0 0>;
+ big-endian;
};
L2: l2-cache-controller@20000 {
@@ -170,28 +180,19 @@
/include/ "pq3-mpic-timer-B.dtsi"
/include/ "pq3-etsec2-0.dtsi"
+/include/ "pq3-etsec2-1.dtsi"
+/include/ "pq3-etsec2-2.dtsi"
+
enet0: ethernet@b0000 {
- queue-group@b0000 {
- fsl,rx-bit-map = <0xff>;
- fsl,tx-bit-map = <0xff>;
- };
+ fsl,pmc-handle = <&etsec1_clk>;
};
-/include/ "pq3-etsec2-1.dtsi"
enet1: ethernet@b1000 {
- queue-group@b1000 {
- fsl,rx-bit-map = <0xff>;
- fsl,tx-bit-map = <0xff>;
- };
+ fsl,pmc-handle = <&etsec2_clk>;
};
-/include/ "pq3-etsec2-2.dtsi"
enet2: ethernet@b2000 {
- queue-group@b2000 {
- fsl,rx-bit-map = <0xff>;
- fsl,tx-bit-map = <0xff>;
- };
-
+ fsl,pmc-handle = <&etsec3_clk>;
};
global-utilities@e0000 {
@@ -199,4 +200,6 @@
reg = <0xe0000 0x1000>;
fsl,has-rstcr;
};
+
+/include/ "pq3-power.dtsi"
};
diff --git a/arch/powerpc/boot/dts/p1020mbg-pc.dtsi b/arch/powerpc/boot/dts/fsl/p1020mbg-pc.dtsi
index a24699cfea9c..a24699cfea9c 100644
--- a/arch/powerpc/boot/dts/p1020mbg-pc.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p1020mbg-pc.dtsi
diff --git a/arch/powerpc/boot/dts/p1020mbg-pc_32b.dts b/arch/powerpc/boot/dts/fsl/p1020mbg-pc_32b.dts
index ab8f076eae90..b29d1fcb5e6b 100644
--- a/arch/powerpc/boot/dts/p1020mbg-pc_32b.dts
+++ b/arch/powerpc/boot/dts/fsl/p1020mbg-pc_32b.dts
@@ -32,7 +32,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/include/ "fsl/p1020si-pre.dtsi"
+/include/ "p1020si-pre.dtsi"
/ {
model = "fsl,P1020MBG-PC";
compatible = "fsl,P1020MBG-PC";
@@ -86,4 +86,4 @@
};
/include/ "p1020mbg-pc.dtsi"
-/include/ "fsl/p1020si-post.dtsi"
+/include/ "p1020si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/p1020mbg-pc_36b.dts b/arch/powerpc/boot/dts/fsl/p1020mbg-pc_36b.dts
index 9e9f401419b1..678d0eec24e2 100644
--- a/arch/powerpc/boot/dts/p1020mbg-pc_36b.dts
+++ b/arch/powerpc/boot/dts/fsl/p1020mbg-pc_36b.dts
@@ -32,7 +32,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/include/ "fsl/p1020si-pre.dtsi"
+/include/ "p1020si-pre.dtsi"
/ {
model = "fsl,P1020MBG-PC";
compatible = "fsl,P1020MBG-PC";
@@ -86,4 +86,4 @@
};
/include/ "p1020mbg-pc.dtsi"
-/include/ "fsl/p1020si-post.dtsi"
+/include/ "p1020si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/p1020rdb-pc.dtsi b/arch/powerpc/boot/dts/fsl/p1020rdb-pc.dtsi
index c952cd37cf6d..a13876c05c1e 100644
--- a/arch/powerpc/boot/dts/p1020rdb-pc.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p1020rdb-pc.dtsi
@@ -151,7 +151,7 @@
flash@0 {
#address-cells = <1>;
#size-cells = <1>;
- compatible = "spansion,s25sl12801";
+ compatible = "spansion,s25sl12801", "jedec,spi-nor";
reg = <0>;
spi-max-frequency = <40000000>; /* input clock */
@@ -205,13 +205,13 @@
mdio@24000 {
phy0: ethernet-phy@0 {
interrupt-parent = <&mpic>;
- interrupts = <3 1>;
+ interrupts = <3 1 0 0>;
reg = <0x0>;
};
phy1: ethernet-phy@1 {
interrupt-parent = <&mpic>;
- interrupts = <2 1>;
+ interrupts = <2 1 0 0>;
reg = <0x1>;
};
diff --git a/arch/powerpc/boot/dts/p1020rdb-pc_32b.dts b/arch/powerpc/boot/dts/fsl/p1020rdb-pc_32b.dts
index 4de69b726dc5..8175bf6f3e9c 100644
--- a/arch/powerpc/boot/dts/p1020rdb-pc_32b.dts
+++ b/arch/powerpc/boot/dts/fsl/p1020rdb-pc_32b.dts
@@ -32,7 +32,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/include/ "fsl/p1020si-pre.dtsi"
+/include/ "p1020si-pre.dtsi"
/ {
model = "fsl,P1020RDB-PC";
compatible = "fsl,P1020RDB-PC";
@@ -87,4 +87,4 @@
};
/include/ "p1020rdb-pc.dtsi"
-/include/ "fsl/p1020si-post.dtsi"
+/include/ "p1020si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/p1020rdb-pc_36b.dts b/arch/powerpc/boot/dts/fsl/p1020rdb-pc_36b.dts
index 5237da7441bc..01c305795163 100644
--- a/arch/powerpc/boot/dts/p1020rdb-pc_36b.dts
+++ b/arch/powerpc/boot/dts/fsl/p1020rdb-pc_36b.dts
@@ -32,7 +32,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/include/ "fsl/p1020si-pre.dtsi"
+/include/ "p1020si-pre.dtsi"
/ {
model = "fsl,P1020RDB-PC";
compatible = "fsl,P1020RDB-PC";
@@ -87,4 +87,4 @@
};
/include/ "p1020rdb-pc.dtsi"
-/include/ "fsl/p1020si-post.dtsi"
+/include/ "p1020si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/p1020rdb-pc_camp_core0.dts b/arch/powerpc/boot/dts/fsl/p1020rdb-pc_camp_core0.dts
index f411515937ec..42e1e2fc0892 100644
--- a/arch/powerpc/boot/dts/p1020rdb-pc_camp_core0.dts
+++ b/arch/powerpc/boot/dts/fsl/p1020rdb-pc_camp_core0.dts
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* P1020 RDB-PC Core0 Device Tree Source in CAMP mode.
*
@@ -9,11 +10,6 @@
* Please note to add "-b 0" for core0's dts compiling.
*
* Copyright 2012 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/include/ "p1020rdb-pc_32b.dts"
diff --git a/arch/powerpc/boot/dts/p1020rdb-pc_camp_core1.dts b/arch/powerpc/boot/dts/fsl/p1020rdb-pc_camp_core1.dts
index a91335ad82c2..da9a8e73b3e2 100644
--- a/arch/powerpc/boot/dts/p1020rdb-pc_camp_core1.dts
+++ b/arch/powerpc/boot/dts/fsl/p1020rdb-pc_camp_core1.dts
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* P1020 RDB-PC Core1 Device Tree Source in CAMP mode.
*
@@ -8,11 +9,6 @@
* Please note to add "-b 1" for core1's dts compiling.
*
* Copyright 2012 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/include/ "p1020rdb-pc_32b.dts"
diff --git a/arch/powerpc/boot/dts/p1020rdb-pd.dts b/arch/powerpc/boot/dts/fsl/p1020rdb-pd.dts
index 987017ea36b6..f2dc6c09be52 100644
--- a/arch/powerpc/boot/dts/p1020rdb-pd.dts
+++ b/arch/powerpc/boot/dts/fsl/p1020rdb-pd.dts
@@ -32,7 +32,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/include/ "fsl/p1020si-pre.dtsi"
+/include/ "p1020si-pre.dtsi"
/ {
model = "fsl,P1020RDB-PD";
compatible = "fsl,P1020RDB-PD";
@@ -155,7 +155,7 @@
flash@0 {
#address-cells = <1>;
#size-cells = <1>;
- compatible = "spansion,s25sl12801";
+ compatible = "spansion,s25sl12801", "jedec,spi-nor";
reg = <0>;
/* input clock */
spi-max-frequency = <40000000>;
@@ -225,6 +225,18 @@
};
};
+ ptp_clock@b0e00 {
+ compatible = "fsl,etsec-ptp";
+ reg = <0xb0e00 0xb0>;
+ interrupts = <68 2 0 0 69 2 0 0>;
+ fsl,tclk-period = <10>;
+ fsl,tmr-prsc = <2>;
+ fsl,tmr-add = <0x80000016>;
+ fsl,tmr-fiper1 = <999999990>;
+ fsl,tmr-fiper2 = <99990>;
+ fsl,max-adj = <199999999>;
+ };
+
enet0: ethernet@b0000 {
fixed-link = <1 1 1000 0 0>;
phy-connection-type = "rgmii-id";
@@ -277,4 +289,4 @@
};
};
-/include/ "fsl/p1020si-post.dtsi"
+/include/ "p1020si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/p1020rdb.dts b/arch/powerpc/boot/dts/fsl/p1020rdb.dts
index 518bf99b1f50..1a8d81ee4168 100644
--- a/arch/powerpc/boot/dts/p1020rdb.dts
+++ b/arch/powerpc/boot/dts/fsl/p1020rdb.dts
@@ -1,15 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* P1020 RDB Device Tree Source
*
* Copyright 2009-2011 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
-/include/ "fsl/p1020si-pre.dtsi"
+/include/ "p1020si-pre.dtsi"
/ {
model = "fsl,P1020RDB";
compatible = "fsl,P1020RDB";
@@ -63,4 +59,4 @@
};
/include/ "p1020rdb.dtsi"
-/include/ "fsl/p1020si-post.dtsi"
+/include/ "p1020si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/p1020rdb.dtsi b/arch/powerpc/boot/dts/fsl/p1020rdb.dtsi
index 1fb7e0e0940f..703142ee6627 100644
--- a/arch/powerpc/boot/dts/p1020rdb.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p1020rdb.dtsi
@@ -148,7 +148,7 @@
flash@0 {
#address-cells = <1>;
#size-cells = <1>;
- compatible = "spansion,s25sl12801";
+ compatible = "spansion,s25sl12801", "jedec,spi-nor";
reg = <0>;
spi-max-frequency = <40000000>; /* input clock */
diff --git a/arch/powerpc/boot/dts/p1020rdb_36b.dts b/arch/powerpc/boot/dts/fsl/p1020rdb_36b.dts
index bdbdb6097e57..fd09a19789e5 100644
--- a/arch/powerpc/boot/dts/p1020rdb_36b.dts
+++ b/arch/powerpc/boot/dts/fsl/p1020rdb_36b.dts
@@ -1,15 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* P1020 RDB Device Tree Source (36-bit address map)
*
* Copyright 2009-2011 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
-/include/ "fsl/p1020si-pre.dtsi"
+/include/ "p1020si-pre.dtsi"
/ {
model = "fsl,P1020RDB";
compatible = "fsl,P1020RDB";
@@ -63,4 +59,4 @@
};
/include/ "p1020rdb.dtsi"
-/include/ "fsl/p1020si-post.dtsi"
+/include/ "p1020si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p1020si-post.dtsi b/arch/powerpc/boot/dts/fsl/p1020si-post.dtsi
index 642dc3a83d0e..cc4c7461003b 100644
--- a/arch/powerpc/boot/dts/fsl/p1020si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p1020si-post.dtsi
@@ -163,14 +163,17 @@
/include/ "pq3-etsec2-0.dtsi"
enet0: enet0_grp2: ethernet@b0000 {
+ fsl,pmc-handle = <&etsec1_clk>;
};
/include/ "pq3-etsec2-1.dtsi"
enet1: enet1_grp2: ethernet@b1000 {
+ fsl,pmc-handle = <&etsec2_clk>;
};
/include/ "pq3-etsec2-2.dtsi"
enet2: enet2_grp2: ethernet@b2000 {
+ fsl,pmc-handle = <&etsec3_clk>;
};
global-utilities@e0000 {
@@ -178,6 +181,8 @@
reg = <0xe0000 0x1000>;
fsl,has-rstcr;
};
+
+/include/ "pq3-power.dtsi"
};
/include/ "pq3-etsec2-grp2-0.dtsi"
diff --git a/arch/powerpc/boot/dts/p1020utm-pc.dtsi b/arch/powerpc/boot/dts/fsl/p1020utm-pc.dtsi
index 7ea85eabcc5c..7ea85eabcc5c 100644
--- a/arch/powerpc/boot/dts/p1020utm-pc.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p1020utm-pc.dtsi
diff --git a/arch/powerpc/boot/dts/p1020utm-pc_32b.dts b/arch/powerpc/boot/dts/fsl/p1020utm-pc_32b.dts
index 4bfdd8971cdb..bc03ef611f98 100644
--- a/arch/powerpc/boot/dts/p1020utm-pc_32b.dts
+++ b/arch/powerpc/boot/dts/fsl/p1020utm-pc_32b.dts
@@ -32,7 +32,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/include/ "fsl/p1020si-pre.dtsi"
+/include/ "p1020si-pre.dtsi"
/ {
model = "fsl,P1020UTM-PC";
compatible = "fsl,P1020UTM-PC";
@@ -86,4 +86,4 @@
};
/include/ "p1020utm-pc.dtsi"
-/include/ "fsl/p1020si-post.dtsi"
+/include/ "p1020si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/p1020utm-pc_36b.dts b/arch/powerpc/boot/dts/fsl/p1020utm-pc_36b.dts
index abec53557501..32766f6a475e 100644
--- a/arch/powerpc/boot/dts/p1020utm-pc_36b.dts
+++ b/arch/powerpc/boot/dts/fsl/p1020utm-pc_36b.dts
@@ -32,7 +32,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/include/ "fsl/p1020si-pre.dtsi"
+/include/ "p1020si-pre.dtsi"
/ {
model = "fsl,P1020UTM-PC";
compatible = "fsl,P1020UTM-PC";
@@ -86,4 +86,4 @@
};
/include/ "p1020utm-pc.dtsi"
-/include/ "fsl/p1020si-post.dtsi"
+/include/ "p1020si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/p1021mds.dts b/arch/powerpc/boot/dts/fsl/p1021mds.dts
index 76559044df41..54af8de53371 100644
--- a/arch/powerpc/boot/dts/p1021mds.dts
+++ b/arch/powerpc/boot/dts/fsl/p1021mds.dts
@@ -1,15 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* P1021 MDS Device Tree Source
*
* Copyright 2010,2012 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
-/include/ "fsl/p1021si-pre.dtsi"
+/include/ "p1021si-pre.dtsi"
/ {
model = "fsl,P1021";
compatible = "fsl,P1021MDS";
@@ -123,7 +119,7 @@
flash@0 {
#address-cells = <1>;
#size-cells = <1>;
- compatible = "spansion,s25sl12801";
+ compatible = "spansion,s25sl12801", "jedec,spi-nor";
reg = <0>;
spi-max-frequency = <40000000>; /* input clock */
@@ -202,7 +198,7 @@
ranges = <0x0 0xe0100 0x60>;
device_type = "par_io";
num-ports = <3>;
- pio1: ucc_pin@01 {
+ pio1: ucc_pin@1 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x1 0x13 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */
@@ -225,7 +221,7 @@
0x0 0x10 0x2 0x0 0x2 0x0>; /* ENET1_COL */
};
- pio2: ucc_pin@02 {
+ pio2: ucc_pin@2 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x1 0x13 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */
@@ -296,7 +292,7 @@
interrupts = <4 1 0 0>;
reg = <0x0>;
};
- qe_phy1: ethernet-phy@03 {
+ qe_phy1: ethernet-phy@3 {
interrupt-parent = <&mpic>;
interrupts = <5 1 0 0>;
reg = <0x3>;
@@ -320,4 +316,4 @@
};
};
-/include/ "fsl/p1021si-post.dtsi"
+/include/ "p1021si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/p1021rdb-pc.dtsi b/arch/powerpc/boot/dts/fsl/p1021rdb-pc.dtsi
index d6274c58f496..18f9b31602d0 100644
--- a/arch/powerpc/boot/dts/p1021rdb-pc.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p1021rdb-pc.dtsi
@@ -150,7 +150,7 @@
flash@0 {
#address-cells = <1>;
#size-cells = <1>;
- compatible = "spansion,s25sl12801";
+ compatible = "spansion,s25sl12801", "jedec,spi-nor";
reg = <0>;
spi-max-frequency = <40000000>; /* input clock */
@@ -224,6 +224,18 @@
};
};
+ ptp_clock@b0e00 {
+ compatible = "fsl,etsec-ptp";
+ reg = <0xb0e00 0xb0>;
+ interrupts = <68 2 0 0 69 2 0 0>;
+ fsl,tclk-period = <10>;
+ fsl,tmr-prsc = <2>;
+ fsl,tmr-add = <0x80000016>;
+ fsl,tmr-fiper1 = <999999990>;
+ fsl,tmr-fiper2 = <99990>;
+ fsl,max-adj = <199999999>;
+ };
+
enet0: ethernet@b0000 {
fixed-link = <1 1 1000 0 0>;
phy-connection-type = "rgmii-id";
diff --git a/arch/powerpc/boot/dts/p1021rdb-pc_32b.dts b/arch/powerpc/boot/dts/fsl/p1021rdb-pc_32b.dts
index 7cefa12b629a..d2b4710357ac 100644
--- a/arch/powerpc/boot/dts/p1021rdb-pc_32b.dts
+++ b/arch/powerpc/boot/dts/fsl/p1021rdb-pc_32b.dts
@@ -32,7 +32,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/include/ "fsl/p1021si-pre.dtsi"
+/include/ "p1021si-pre.dtsi"
/ {
model = "fsl,P1021RDB";
compatible = "fsl,P1021RDB-PC";
@@ -93,4 +93,4 @@
};
/include/ "p1021rdb-pc.dtsi"
-/include/ "fsl/p1021si-post.dtsi"
+/include/ "p1021si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/p1021rdb-pc_36b.dts b/arch/powerpc/boot/dts/fsl/p1021rdb-pc_36b.dts
index 53d0c889039c..e298c29e5606 100644
--- a/arch/powerpc/boot/dts/p1021rdb-pc_36b.dts
+++ b/arch/powerpc/boot/dts/fsl/p1021rdb-pc_36b.dts
@@ -32,7 +32,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/include/ "fsl/p1021si-pre.dtsi"
+/include/ "p1021si-pre.dtsi"
/ {
model = "fsl,P1021RDB";
compatible = "fsl,P1021RDB-PC";
@@ -93,4 +93,4 @@
};
/include/ "p1021rdb-pc.dtsi"
-/include/ "fsl/p1021si-post.dtsi"
+/include/ "p1021si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p1021si-post.dtsi b/arch/powerpc/boot/dts/fsl/p1021si-post.dtsi
index 407cb5fd0f5b..378195db9fca 100644
--- a/arch/powerpc/boot/dts/fsl/p1021si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p1021si-post.dtsi
@@ -159,14 +159,17 @@
/include/ "pq3-etsec2-0.dtsi"
enet0: enet0_grp2: ethernet@b0000 {
+ fsl,pmc-handle = <&etsec1_clk>;
};
/include/ "pq3-etsec2-1.dtsi"
enet1: enet1_grp2: ethernet@b1000 {
+ fsl,pmc-handle = <&etsec2_clk>;
};
/include/ "pq3-etsec2-2.dtsi"
enet2: enet2_grp2: ethernet@b2000 {
+ fsl,pmc-handle = <&etsec3_clk>;
};
global-utilities@e0000 {
@@ -174,6 +177,8 @@
reg = <0xe0000 0x1000>;
fsl,has-rstcr;
};
+
+/include/ "pq3-power.dtsi"
};
&qe {
diff --git a/arch/powerpc/boot/dts/p1022ds.dtsi b/arch/powerpc/boot/dts/fsl/p1022ds.dtsi
index 957e0dc1dc0f..ddefbf64f7f8 100644
--- a/arch/powerpc/boot/dts/p1022ds.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p1022ds.dtsi
@@ -160,7 +160,7 @@
flash@0 {
#address-cells = <1>;
#size-cells = <1>;
- compatible = "spansion,s25sl12801";
+ compatible = "spansion,s25sl12801", "jedec,spi-nor";
reg = <0>;
spi-max-frequency = <40000000>; /* input clock */
@@ -215,6 +215,18 @@
};
};
+ ptp_clock@b0e00 {
+ compatible = "fsl,etsec-ptp";
+ reg = <0xb0e00 0xb0>;
+ interrupts = <68 2 0 0 69 2 0 0>;
+ fsl,tclk-period = <5>;
+ fsl,tmr-prsc = <2>;
+ fsl,tmr-add = <0xc01ebd3d>;
+ fsl,tmr-fiper1 = <999999995>;
+ fsl,tmr-fiper2 = <99990>;
+ fsl,max-adj = <266499999>;
+ };
+
ethernet@b0000 {
phy-handle = <&phy0>;
phy-connection-type = "rgmii-id";
diff --git a/arch/powerpc/boot/dts/p1022ds_32b.dts b/arch/powerpc/boot/dts/fsl/p1022ds_32b.dts
index d96cae00a9e3..5a7eaceb9e8e 100644
--- a/arch/powerpc/boot/dts/p1022ds_32b.dts
+++ b/arch/powerpc/boot/dts/fsl/p1022ds_32b.dts
@@ -32,7 +32,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/include/ "fsl/p1022si-pre.dtsi"
+/include/ "p1022si-pre.dtsi"
/ {
model = "fsl,P1022DS";
compatible = "fsl,P1022DS";
@@ -99,5 +99,5 @@
};
};
-/include/ "fsl/p1022si-post.dtsi"
+/include/ "p1022si-post.dtsi"
/include/ "p1022ds.dtsi"
diff --git a/arch/powerpc/boot/dts/p1022ds_36b.dts b/arch/powerpc/boot/dts/fsl/p1022ds_36b.dts
index f7aacce40bf6..88063cd9e20a 100644
--- a/arch/powerpc/boot/dts/p1022ds_36b.dts
+++ b/arch/powerpc/boot/dts/fsl/p1022ds_36b.dts
@@ -32,7 +32,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/include/ "fsl/p1022si-pre.dtsi"
+/include/ "p1022si-pre.dtsi"
/ {
model = "fsl,P1022DS";
compatible = "fsl,P1022DS";
@@ -99,5 +99,5 @@
};
};
-/include/ "fsl/p1022si-post.dtsi"
+/include/ "p1022si-post.dtsi"
/include/ "p1022ds.dtsi"
diff --git a/arch/powerpc/boot/dts/p1022rdk.dts b/arch/powerpc/boot/dts/fsl/p1022rdk.dts
index 51d82de223f3..4261c2f7e4b3 100644
--- a/arch/powerpc/boot/dts/p1022rdk.dts
+++ b/arch/powerpc/boot/dts/fsl/p1022rdk.dts
@@ -32,7 +32,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/include/ "fsl/p1022si-pre.dtsi"
+/include/ "p1022si-pre.dtsi"
/ {
model = "fsl,P1022RDK";
compatible = "fsl,P1022RDK";
@@ -57,26 +57,26 @@
clock-frequency = <12288000>;
};
rtc@68 {
- compatible = "stm,m41t62";
+ compatible = "st,m41t62";
reg = <0x68>;
};
- adt7461@4c{
+ adt7461@4c {
compatible = "adi,adt7461";
reg = <0x4c>;
};
- zl6100@21{
+ zl6100@21 {
compatible = "isil,zl6100";
reg = <0x21>;
};
- zl6100@24{
+ zl6100@24 {
compatible = "isil,zl6100";
reg = <0x24>;
};
- zl6100@26{
+ zl6100@26 {
compatible = "isil,zl6100";
reg = <0x26>;
};
- zl6100@29{
+ zl6100@29 {
compatible = "isil,zl6100";
reg = <0x29>;
};
@@ -86,7 +86,7 @@
flash@0 {
#address-cells = <1>;
#size-cells = <1>;
- compatible = "spansion,m25p80";
+ compatible = "spansion,m25p80", "jedec,spi-nor";
reg = <0>;
spi-max-frequency = <1000000>;
partition@0 {
@@ -185,4 +185,4 @@
};
};
-/include/ "fsl/p1022si-post.dtsi"
+/include/ "p1022si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p1022si-post.dtsi b/arch/powerpc/boot/dts/fsl/p1022si-post.dtsi
index ebf202234549..6ac21e81344a 100644
--- a/arch/powerpc/boot/dts/fsl/p1022si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p1022si-post.dtsi
@@ -175,7 +175,7 @@
/include/ "pq3-gpio-0.dtsi"
- display@10000 {
+ display: display@10000 {
compatible = "fsl,diu", "fsl,p1022-diu";
reg = <0x10000 1000>;
interrupts = <64 2 0 0>;
@@ -224,10 +224,14 @@
/include/ "pq3-etsec2-0.dtsi"
enet0: enet0_grp2: ethernet@b0000 {
+ fsl,wake-on-filer;
+ fsl,pmc-handle = <&etsec1_clk>;
};
/include/ "pq3-etsec2-1.dtsi"
enet1: enet1_grp2: ethernet@b1000 {
+ fsl,wake-on-filer;
+ fsl,pmc-handle = <&etsec2_clk>;
};
global-utilities@e0000 {
@@ -236,9 +240,10 @@
fsl,has-rstcr;
};
- power@e0070{
- compatible = "fsl,mpc8536-pmc", "fsl,mpc8548-pmc";
- reg = <0xe0070 0x20>;
+/include/ "pq3-power.dtsi"
+ power@e0070 {
+ compatible = "fsl,p1022-pmc", "fsl,mpc8536-pmc",
+ "fsl,mpc8548-pmc";
};
};
diff --git a/arch/powerpc/boot/dts/fsl/p1022si-pre.dtsi b/arch/powerpc/boot/dts/fsl/p1022si-pre.dtsi
index 1956dea040cc..de76ae8992c6 100644
--- a/arch/powerpc/boot/dts/fsl/p1022si-pre.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p1022si-pre.dtsi
@@ -50,6 +50,8 @@
pci0 = &pci0;
pci1 = &pci1;
pci2 = &pci2;
+ vga = &display;
+ display = &display;
};
cpus {
diff --git a/arch/powerpc/boot/dts/p1023rdb.dts b/arch/powerpc/boot/dts/fsl/p1023rdb.dts
index 0a06a88ddbd5..ead928364beb 100644
--- a/arch/powerpc/boot/dts/p1023rdb.dts
+++ b/arch/powerpc/boot/dts/fsl/p1023rdb.dts
@@ -1,7 +1,7 @@
/*
* P1023 RDB Device Tree Source
*
- * Copyright 2013 Freescale Semiconductor Inc.
+ * Copyright 2013 - 2014 Freescale Semiconductor Inc.
*
* Author: Chunhe Lan <Chunhe.Lan@freescale.com>
*
@@ -34,7 +34,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/include/ "fsl/p1023si-pre.dtsi"
+/include/ "p1023si-pre.dtsi"
/ {
model = "fsl,P1023";
@@ -47,12 +47,39 @@
device_type = "memory";
};
+ reserved-memory {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ bman_fbpr: bman-fbpr {
+ size = <0 0x1000000>;
+ alignment = <0 0x1000000>;
+ };
+ qman_fqd: qman-fqd {
+ size = <0 0x400000>;
+ alignment = <0 0x400000>;
+ };
+ qman_pfdr: qman-pfdr {
+ size = <0 0x2000000>;
+ alignment = <0 0x2000000>;
+ };
+ };
+
+ qportals: qman-portals@ff000000 {
+ ranges = <0x0 0xf 0xff000000 0x200000>;
+ };
+
+ bportals: bman-portals@ff200000 {
+ ranges = <0x0 0xf 0xff200000 0x200000>;
+ };
+
soc: soc@ff600000 {
ranges = <0x0 0x0 0xff600000 0x200000>;
i2c@3000 {
eeprom@53 {
- compatible = "at24,24c04";
+ compatible = "atmel,24c04";
reg = <0x53>;
};
@@ -228,7 +255,6 @@
0x0 0x100000>;
};
};
-
};
-/include/ "fsl/p1023si-post.dtsi"
+/include/ "p1023si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p1023si-post.dtsi b/arch/powerpc/boot/dts/fsl/p1023si-post.dtsi
index 81437fdf1db4..da6d3fc6ba41 100644
--- a/arch/powerpc/boot/dts/fsl/p1023si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p1023si-post.dtsi
@@ -1,7 +1,7 @@
/*
* P1023/P1017 Silicon/SoC Device Tree Source (post include)
*
- * Copyright 2011 Freescale Semiconductor Inc.
+ * Copyright 2011 - 2014 Freescale Semiconductor Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -32,6 +32,21 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+&bman_fbpr {
+ compatible = "fsl,bman-fbpr";
+ alloc-ranges = <0 0 0x10 0>;
+};
+
+&qman_fqd {
+ compatible = "fsl,qman-fqd";
+ alloc-ranges = <0 0 0x10 0>;
+};
+
+&qman_pfdr {
+ compatible = "fsl,qman-pfdr";
+ alloc-ranges = <0 0 0x10 0>;
+};
+
&lbc {
#address-cells = <2>;
#size-cells = <1>;
@@ -97,6 +112,53 @@
};
};
+&qportals {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "simple-bus";
+
+ qportal0: qman-portal@0 {
+ compatible = "fsl,qman-portal";
+ reg = <0x0 0x4000>, <0x100000 0x1000>;
+ interrupts = <29 2 0 0>;
+ cell-index = <0>;
+ };
+ qportal1: qman-portal@4000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x4000 0x4000>, <0x101000 0x1000>;
+ interrupts = <31 2 0 0>;
+ cell-index = <1>;
+ };
+ qportal2: qman-portal@8000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x8000 0x4000>, <0x102000 0x1000>;
+ interrupts = <33 2 0 0>;
+ cell-index = <2>;
+ };
+};
+
+&bportals {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "simple-bus";
+
+ bman-portal@0 {
+ compatible = "fsl,bman-portal";
+ reg = <0x0 0x4000>, <0x100000 0x1000>;
+ interrupts = <30 2 0 0>;
+ };
+ bman-portal@4000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x4000 0x4000>, <0x101000 0x1000>;
+ interrupts = <32 2 0 0>;
+ };
+ bman-portal@8000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x8000 0x4000>, <0x102000 0x1000>;
+ interrupts = <34 2 0 0>;
+ };
+};
+
&soc {
#address-cells = <1>;
#size-cells = <1>;
@@ -221,6 +283,22 @@
/include/ "pq3-mpic.dtsi"
/include/ "pq3-mpic-timer-B.dtsi"
+ qman: qman@88000 {
+ compatible = "fsl,qman";
+ reg = <0x88000 0x1000>;
+ interrupts = <16 2 0 0>;
+ fsl,qman-portals = <&qportals>;
+ memory-region = <&qman_fqd &qman_pfdr>;
+ };
+
+ bman: bman@8a000 {
+ compatible = "fsl,bman";
+ reg = <0x8a000 0x1000>;
+ interrupts = <16 2 0 0>;
+ fsl,bman-portals = <&bportals>;
+ memory-region = <&bman_fbpr>;
+ };
+
global-utilities@e0000 {
compatible = "fsl,p1023-guts";
reg = <0xe0000 0x1000>;
diff --git a/arch/powerpc/boot/dts/p1024rdb.dtsi b/arch/powerpc/boot/dts/fsl/p1024rdb.dtsi
index b05dcb40f800..b4d05867f707 100644
--- a/arch/powerpc/boot/dts/p1024rdb.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p1024rdb.dtsi
@@ -129,7 +129,7 @@
flash@0 {
#address-cells = <1>;
#size-cells = <1>;
- compatible = "spansion,m25p80";
+ compatible = "spansion,m25p80", "jedec,spi-nor";
reg = <0>;
spi-max-frequency = <40000000>;
diff --git a/arch/powerpc/boot/dts/p1024rdb_32b.dts b/arch/powerpc/boot/dts/fsl/p1024rdb_32b.dts
index 90e803e9ba5f..8b09b9d56ad1 100644
--- a/arch/powerpc/boot/dts/p1024rdb_32b.dts
+++ b/arch/powerpc/boot/dts/fsl/p1024rdb_32b.dts
@@ -32,7 +32,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/include/ "fsl/p1020si-pre.dtsi"
+/include/ "p1020si-pre.dtsi"
/ {
model = "fsl,P1024RDB";
compatible = "fsl,P1024RDB";
@@ -84,4 +84,4 @@
};
/include/ "p1024rdb.dtsi"
-/include/ "fsl/p1020si-post.dtsi"
+/include/ "p1020si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/p1024rdb_36b.dts b/arch/powerpc/boot/dts/fsl/p1024rdb_36b.dts
index 3656825b65a1..e7093aef28f1 100644
--- a/arch/powerpc/boot/dts/p1024rdb_36b.dts
+++ b/arch/powerpc/boot/dts/fsl/p1024rdb_36b.dts
@@ -32,7 +32,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/include/ "fsl/p1020si-pre.dtsi"
+/include/ "p1020si-pre.dtsi"
/ {
model = "fsl,P1024RDB";
compatible = "fsl,P1024RDB";
@@ -84,4 +84,4 @@
};
/include/ "p1024rdb.dtsi"
-/include/ "fsl/p1020si-post.dtsi"
+/include/ "p1020si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/p1025rdb.dtsi b/arch/powerpc/boot/dts/fsl/p1025rdb.dtsi
index f50256482297..0a5434a631c3 100644
--- a/arch/powerpc/boot/dts/p1025rdb.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p1025rdb.dtsi
@@ -137,7 +137,7 @@
flash@0 {
#address-cells = <1>;
#size-cells = <1>;
- compatible = "spansion,s25sl12801";
+ compatible = "spansion,s25sl12801", "jedec,spi-nor";
reg = <0>;
spi-max-frequency = <40000000>; /* input clock */
@@ -245,7 +245,7 @@
ranges = <0x0 0xe0100 0x60>;
device_type = "par_io";
num-ports = <3>;
- pio1: ucc_pin@01 {
+ pio1: ucc_pin@1 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x1 0x13 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */
@@ -268,7 +268,7 @@
0x0 0x10 0x2 0x0 0x2 0x0>; /* ENET1_COL */
};
- pio2: ucc_pin@02 {
+ pio2: ucc_pin@2 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x1 0x13 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */
@@ -283,7 +283,7 @@
0x1 0x8 0x2 0x0 0x2 0x0>; /* ENET5_RX_ER_SER5_CD_B */
};
- pio3: ucc_pin@03 {
+ pio3: ucc_pin@3 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x0 0x16 0x2 0x0 0x2 0x0 /* SER7_CD_B*/
@@ -293,7 +293,7 @@
0x0 0x15 0x1 0x0 0x2 0x0>; /* SER7_TXD0*/
};
- pio4: ucc_pin@04 {
+ pio4: ucc_pin@4 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x1 0x0 0x2 0x0 0x2 0x0 /* SER3_CD_B*/
diff --git a/arch/powerpc/boot/dts/p1025rdb_32b.dts b/arch/powerpc/boot/dts/fsl/p1025rdb_32b.dts
index a2ed6280ba7a..ea33b57f8774 100644
--- a/arch/powerpc/boot/dts/p1025rdb_32b.dts
+++ b/arch/powerpc/boot/dts/fsl/p1025rdb_32b.dts
@@ -32,7 +32,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/include/ "fsl/p1021si-pre.dtsi"
+/include/ "p1021si-pre.dtsi"
/ {
model = "fsl,P1025RDB";
compatible = "fsl,P1025RDB";
@@ -106,7 +106,7 @@
interrupts = <4 1 0 0>;
reg = <0x6>;
};
- qe_phy1: ethernet-phy@03 {
+ qe_phy1: ethernet-phy@3 {
interrupt-parent = <&mpic>;
interrupts = <5 1 0 0>;
reg = <0x3>;
@@ -130,4 +130,4 @@
};
/include/ "p1025rdb.dtsi"
-/include/ "fsl/p1021si-post.dtsi"
+/include/ "p1021si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/p1025rdb_36b.dts b/arch/powerpc/boot/dts/fsl/p1025rdb_36b.dts
index 06deb6f341ba..b0ded5e8bd0b 100644
--- a/arch/powerpc/boot/dts/p1025rdb_36b.dts
+++ b/arch/powerpc/boot/dts/fsl/p1025rdb_36b.dts
@@ -32,7 +32,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/include/ "fsl/p1021si-pre.dtsi"
+/include/ "p1021si-pre.dtsi"
/ {
model = "fsl,P1025RDB";
compatible = "fsl,P1025RDB";
@@ -90,4 +90,4 @@
};
/include/ "p1025rdb.dtsi"
-/include/ "fsl/p1021si-post.dtsi"
+/include/ "p1021si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/p1025twr.dts b/arch/powerpc/boot/dts/fsl/p1025twr.dts
index 9036a4987905..9b8863b74b60 100644
--- a/arch/powerpc/boot/dts/p1025twr.dts
+++ b/arch/powerpc/boot/dts/fsl/p1025twr.dts
@@ -32,7 +32,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/include/ "fsl/p1021si-pre.dtsi"
+/include/ "p1021si-pre.dtsi"
/ {
model = "fsl,P1025";
compatible = "fsl,TWR-P1025";
@@ -92,4 +92,4 @@
};
/include/ "p1025twr.dtsi"
-/include/ "fsl/p1021si-post.dtsi"
+/include/ "p1021si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/p1025twr.dtsi b/arch/powerpc/boot/dts/fsl/p1025twr.dtsi
index 8453501c256e..ab75b8f29ae2 100644
--- a/arch/powerpc/boot/dts/p1025twr.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p1025twr.dtsi
@@ -138,6 +138,18 @@
};
};
+ ptp_clock@b0e00 {
+ compatible = "fsl,etsec-ptp";
+ reg = <0xb0e00 0xb0>;
+ interrupts = <68 2 0 0 69 2 0 0>;
+ fsl,tclk-period = <10>;
+ fsl,tmr-prsc = <2>;
+ fsl,tmr-add = <0xc0000021>;
+ fsl,tmr-fiper1 = <999999990>;
+ fsl,tmr-fiper2 = <99990>;
+ fsl,max-adj = <133333332>;
+ };
+
enet0: ethernet@b0000 {
phy-handle = <&phy0>;
phy-connection-type = "rgmii-id";
@@ -160,7 +172,7 @@
ranges = <0x0 0xe0100 0x60>;
device_type = "par_io";
num-ports = <3>;
- pio1: ucc_pin@01 {
+ pio1: ucc_pin@1 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x1 0x13 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */
@@ -183,7 +195,7 @@
0x0 0x10 0x2 0x0 0x2 0x0>; /* ENET1_COL */
};
- pio2: ucc_pin@02 {
+ pio2: ucc_pin@2 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x1 0x13 0x1 0x0 0x1 0x0 /* QE_MUX_MDC */
@@ -198,7 +210,7 @@
0x1 0x8 0x2 0x0 0x2 0x0>; /* ENET5_RX_ER_SER5_CD_B */
};
- pio3: ucc_pin@03 {
+ pio3: ucc_pin@3 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x0 0x16 0x2 0x0 0x2 0x0 /* SER7_CD_B*/
@@ -208,7 +220,7 @@
0x0 0x15 0x1 0x0 0x2 0x0>; /* SER7_TXD0*/
};
- pio4: ucc_pin@04 {
+ pio4: ucc_pin@4 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0x1 0x0 0x2 0x0 0x2 0x0 /* SER3_CD_B*/
diff --git a/arch/powerpc/boot/dts/p2020ds.dts b/arch/powerpc/boot/dts/fsl/p2020ds.dts
index 237310cc7e6c..ae380ebe55cf 100644
--- a/arch/powerpc/boot/dts/p2020ds.dts
+++ b/arch/powerpc/boot/dts/fsl/p2020ds.dts
@@ -1,15 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* P2020 DS Device Tree Source
*
* Copyright 2009-2011 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
-/include/ "fsl/p2020si-pre.dtsi"
+/include/ "p2020si-pre.dtsi"
/ {
model = "fsl,P2020DS";
@@ -85,5 +81,5 @@
* for interrupt-map & interrupt-map-mask
*/
-/include/ "fsl/p2020si-post.dtsi"
+/include/ "p2020si-post.dtsi"
/include/ "p2020ds.dtsi"
diff --git a/arch/powerpc/boot/dts/p2020ds.dtsi b/arch/powerpc/boot/dts/fsl/p2020ds.dtsi
index e699cf95b063..e699cf95b063 100644
--- a/arch/powerpc/boot/dts/p2020ds.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p2020ds.dtsi
diff --git a/arch/powerpc/boot/dts/p2020rdb-pc.dtsi b/arch/powerpc/boot/dts/fsl/p2020rdb-pc.dtsi
index c21d1c7d16cd..03c9afc82436 100644
--- a/arch/powerpc/boot/dts/p2020rdb-pc.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p2020rdb-pc.dtsi
@@ -151,7 +151,7 @@
flash@0 {
#address-cells = <1>;
#size-cells = <1>;
- compatible = "spansion,m25p80";
+ compatible = "spansion,m25p80", "jedec,spi-nor";
reg = <0>;
spi-max-frequency = <40000000>;
@@ -215,12 +215,12 @@
};
ptp_clock@24e00 {
- fsl,tclk-period = <5>;
- fsl,tmr-prsc = <200>;
- fsl,tmr-add = <0xCCCCCCCD>;
- fsl,tmr-fiper1 = <0x3B9AC9FB>;
- fsl,tmr-fiper2 = <0x0001869B>;
- fsl,max-adj = <249999999>;
+ fsl,tclk-period = <5>;
+ fsl,tmr-prsc = <2>;
+ fsl,tmr-add = <0xaaaaaaab>;
+ fsl,tmr-fiper1 = <999999995>;
+ fsl,tmr-fiper2 = <99990>;
+ fsl,max-adj = <299999999>;
};
enet0: ethernet@24000 {
diff --git a/arch/powerpc/boot/dts/p2020rdb-pc_32b.dts b/arch/powerpc/boot/dts/fsl/p2020rdb-pc_32b.dts
index 57573bd52caa..d3295c204bbf 100644
--- a/arch/powerpc/boot/dts/p2020rdb-pc_32b.dts
+++ b/arch/powerpc/boot/dts/fsl/p2020rdb-pc_32b.dts
@@ -32,7 +32,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/include/ "fsl/p2020si-pre.dtsi"
+/include/ "p2020si-pre.dtsi"
/ {
model = "fsl,P2020RDB";
@@ -93,4 +93,4 @@
};
/include/ "p2020rdb-pc.dtsi"
-/include/ "fsl/p2020si-post.dtsi"
+/include/ "p2020si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/p2020rdb-pc_36b.dts b/arch/powerpc/boot/dts/fsl/p2020rdb-pc_36b.dts
index 470247ea68b4..9307a8f41ddb 100644
--- a/arch/powerpc/boot/dts/p2020rdb-pc_36b.dts
+++ b/arch/powerpc/boot/dts/fsl/p2020rdb-pc_36b.dts
@@ -32,7 +32,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/include/ "fsl/p2020si-pre.dtsi"
+/include/ "p2020si-pre.dtsi"
/ {
model = "fsl,P2020RDB";
@@ -93,4 +93,4 @@
};
/include/ "p2020rdb-pc.dtsi"
-/include/ "fsl/p2020si-post.dtsi"
+/include/ "p2020si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/p2020rdb.dts b/arch/powerpc/boot/dts/fsl/p2020rdb.dts
index 4d52bce1d5b0..3acd3890b397 100644
--- a/arch/powerpc/boot/dts/p2020rdb.dts
+++ b/arch/powerpc/boot/dts/fsl/p2020rdb.dts
@@ -1,15 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* P2020 RDB Device Tree Source
*
* Copyright 2009-2012 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
-/include/ "fsl/p2020si-pre.dtsi"
+/include/ "p2020si-pre.dtsi"
/ {
model = "fsl,P2020RDB";
@@ -155,7 +151,7 @@
flash@0 {
#address-cells = <1>;
#size-cells = <1>;
- compatible = "spansion,s25sl12801";
+ compatible = "spansion,s25sl12801", "jedec,spi-nor";
reg = <0>;
spi-max-frequency = <40000000>;
@@ -288,4 +284,4 @@
};
};
-/include/ "fsl/p2020si-post.dtsi"
+/include/ "p2020si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p2020si-post.dtsi b/arch/powerpc/boot/dts/fsl/p2020si-post.dtsi
index 884e01bcb243..d410082d21c0 100644
--- a/arch/powerpc/boot/dts/fsl/p2020si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p2020si-post.dtsi
@@ -48,6 +48,7 @@
bus-range = <0 255>;
clock-frequency = <33333333>;
interrupts = <26 2 0 0>;
+ law_trgt_if = <2>;
pcie@0 {
reg = <0 0 0 0 0>;
@@ -76,6 +77,7 @@
bus-range = <0 255>;
clock-frequency = <33333333>;
interrupts = <25 2 0 0>;
+ law_trgt_if = <1>;
pcie@0 {
reg = <0 0 0 0 0>;
@@ -105,6 +107,7 @@
bus-range = <0 255>;
clock-frequency = <33333333>;
interrupts = <24 2 0 0>;
+ law_trgt_if = <0>;
pcie@0 {
reg = <0 0 0 0 0>;
@@ -175,6 +178,10 @@
compatible = "fsl-usb2-dr-v1.6", "fsl-usb2-dr";
};
/include/ "pq3-etsec1-0.dtsi"
+ enet0: ethernet@24000 {
+ fsl,pmc-handle = <&etsec1_clk>;
+
+ };
/include/ "pq3-etsec1-timer-0.dtsi"
ptp_clock@24e00 {
@@ -183,7 +190,15 @@
/include/ "pq3-etsec1-1.dtsi"
+ enet1: ethernet@25000 {
+ fsl,pmc-handle = <&etsec2_clk>;
+ };
+
/include/ "pq3-etsec1-2.dtsi"
+ enet2: ethernet@26000 {
+ fsl,pmc-handle = <&etsec3_clk>;
+ };
+
/include/ "pq3-esdhc-0.dtsi"
sdhc@2e000 {
compatible = "fsl,p2020-esdhc", "fsl,esdhc";
@@ -198,4 +213,6 @@
reg = <0xe0000 0x1000>;
fsl,has-rstcr;
};
+
+/include/ "pq3-power.dtsi"
};
diff --git a/arch/powerpc/boot/dts/p2041rdb.dts b/arch/powerpc/boot/dts/fsl/p2041rdb.dts
index d97ad74c7279..950816b9d6e1 100644
--- a/arch/powerpc/boot/dts/p2041rdb.dts
+++ b/arch/powerpc/boot/dts/fsl/p2041rdb.dts
@@ -1,7 +1,7 @@
/*
* P2041RDB Device Tree Source
*
- * Copyright 2011 Freescale Semiconductor Inc.
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -32,7 +32,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/include/ "fsl/p2041si-pre.dtsi"
+/include/ "p2041si-pre.dtsi"
/ {
model = "fsl,P2041RDB";
@@ -41,14 +41,54 @@
#size-cells = <2>;
interrupt-parent = <&mpic>;
+ aliases {
+ phy_rgmii_0 = &phy_rgmii_0;
+ phy_rgmii_1 = &phy_rgmii_1;
+ phy_sgmii_2 = &phy_sgmii_2;
+ phy_sgmii_3 = &phy_sgmii_3;
+ phy_sgmii_4 = &phy_sgmii_4;
+ phy_sgmii_1c = &phy_sgmii_1c;
+ phy_sgmii_1d = &phy_sgmii_1d;
+ phy_sgmii_1e = &phy_sgmii_1e;
+ phy_sgmii_1f = &phy_sgmii_1f;
+ phy_xgmii_2 = &phy_xgmii_2;
+ };
+
memory {
device_type = "memory";
};
+ reserved-memory {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ bman_fbpr: bman-fbpr {
+ size = <0 0x1000000>;
+ alignment = <0 0x1000000>;
+ };
+ qman_fqd: qman-fqd {
+ size = <0 0x400000>;
+ alignment = <0 0x400000>;
+ };
+ qman_pfdr: qman-pfdr {
+ size = <0 0x2000000>;
+ alignment = <0 0x2000000>;
+ };
+ };
+
dcsr: dcsr@f00000000 {
ranges = <0x00000000 0xf 0x00000000 0x01008000>;
};
+ bportals: bman-portals@ff4000000 {
+ ranges = <0x0 0xf 0xf4000000 0x200000>;
+ };
+
+ qportals: qman-portals@ff4200000 {
+ ranges = <0x0 0xf 0xf4200000 0x200000>;
+ };
+
soc: soc@ffe000000 {
ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
reg = <0xf 0xfe000000 0 0x00001000>;
@@ -56,7 +96,7 @@
flash@0 {
#address-cells = <1>;
#size-cells = <1>;
- compatible = "spansion,s25sl12801";
+ compatible = "spansion,s25sl12801", "jedec,spi-nor";
reg = <0>;
spi-max-frequency = <40000000>; /* input clock */
partition@u-boot {
@@ -87,7 +127,7 @@
reg = <0x48>;
};
eeprom@50 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x50>;
};
rtc@68 {
@@ -102,7 +142,7 @@
i2c@118100 {
eeprom@50 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x50>;
};
};
@@ -110,6 +150,83 @@
usb1: usb@211000 {
dr_mode = "host";
};
+
+ fman@400000 {
+ ethernet@e0000 {
+ phy-handle = <&phy_sgmii_2>;
+ phy-connection-type = "sgmii";
+ };
+
+ mdio@e1120 {
+ phy_rgmii_0: ethernet-phy@0 {
+ reg = <0x0>;
+ };
+
+ phy_rgmii_1: ethernet-phy@1 {
+ reg = <0x1>;
+ };
+
+ phy_sgmii_2: ethernet-phy@2 {
+ reg = <0x2>;
+ };
+
+ phy_sgmii_3: ethernet-phy@3 {
+ reg = <0x3>;
+ };
+
+ phy_sgmii_4: ethernet-phy@4 {
+ reg = <0x4>;
+ };
+
+ phy_sgmii_1c: ethernet-phy@1c {
+ reg = <0x1c>;
+ };
+
+ phy_sgmii_1d: ethernet-phy@1d {
+ reg = <0x1d>;
+ };
+
+ phy_sgmii_1e: ethernet-phy@1e {
+ reg = <0x1e>;
+ };
+
+ phy_sgmii_1f: ethernet-phy@1f {
+ reg = <0x1f>;
+ };
+ };
+
+ ethernet@e2000 {
+ phy-handle = <&phy_sgmii_3>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e4000 {
+ phy-handle = <&phy_sgmii_4>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e6000 {
+ phy-handle = <&phy_rgmii_1>;
+ phy-connection-type = "rgmii";
+ };
+
+ ethernet@e8000 {
+ phy-handle = <&phy_rgmii_0>;
+ phy-connection-type = "rgmii";
+ };
+
+ ethernet@f0000 {
+ phy-handle = <&phy_xgmii_2>;
+ phy-connection-type = "xgmii";
+ };
+
+ mdio@f1000 {
+ phy_xgmii_2: ethernet-phy@0 {
+ compatible = "ethernet-phy-ieee802.3-c45";
+ reg = <0x0>;
+ };
+ };
+ };
};
rio: rapidio@ffe0c0000 {
@@ -220,4 +337,4 @@
};
};
-/include/ "fsl/p2041si-post.dtsi"
+/include/ "p2041si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi b/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi
index 69ce1026c948..ddc018d42252 100644
--- a/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p2041si-post.dtsi
@@ -1,7 +1,7 @@
/*
* P2041/P2040 Silicon/SoC Device Tree Source (post include)
*
- * Copyright 2011 Freescale Semiconductor Inc.
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -32,6 +32,21 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+&bman_fbpr {
+ compatible = "fsl,bman-fbpr";
+ alloc-ranges = <0 0 0x10 0>;
+};
+
+&qman_fqd {
+ compatible = "fsl,qman-fqd";
+ alloc-ranges = <0 0 0x10 0>;
+};
+
+&qman_pfdr {
+ compatible = "fsl,qman-pfdr";
+ alloc-ranges = <0 0 0x10 0>;
+};
+
&lbc {
compatible = "fsl,p2041-elbc", "fsl,elbc", "simple-bus";
interrupts = <25 2 0 0>;
@@ -216,6 +231,10 @@
};
};
+/include/ "qoriq-bman1-portals.dtsi"
+
+/include/ "qoriq-qman1-portals.dtsi"
+
&soc {
#address-cells = <1>;
#size-cells = <1>;
@@ -305,71 +324,9 @@
#sleep-cells = <2>;
};
- clockgen: global-utilities@e1000 {
+/include/ "qoriq-clockgen1.dtsi"
+ global-utilities@e1000 {
compatible = "fsl,p2041-clockgen", "fsl,qoriq-clockgen-1.0";
- ranges = <0x0 0xe1000 0x1000>;
- reg = <0xe1000 0x1000>;
- clock-frequency = <0>;
- #address-cells = <1>;
- #size-cells = <1>;
-
- sysclk: sysclk {
- #clock-cells = <0>;
- compatible = "fsl,qoriq-sysclk-1.0";
- clock-output-names = "sysclk";
- };
-
- pll0: pll0@800 {
- #clock-cells = <1>;
- reg = <0x800 0x4>;
- compatible = "fsl,qoriq-core-pll-1.0";
- clocks = <&sysclk>;
- clock-output-names = "pll0", "pll0-div2";
- };
-
- pll1: pll1@820 {
- #clock-cells = <1>;
- reg = <0x820 0x4>;
- compatible = "fsl,qoriq-core-pll-1.0";
- clocks = <&sysclk>;
- clock-output-names = "pll1", "pll1-div2";
- };
-
- mux0: mux0@0 {
- #clock-cells = <0>;
- reg = <0x0 0x4>;
- compatible = "fsl,qoriq-core-mux-1.0";
- clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>;
- clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2";
- clock-output-names = "cmux0";
- };
-
- mux1: mux1@20 {
- #clock-cells = <0>;
- reg = <0x20 0x4>;
- compatible = "fsl,qoriq-core-mux-1.0";
- clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>;
- clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2";
- clock-output-names = "cmux1";
- };
-
- mux2: mux2@40 {
- #clock-cells = <0>;
- reg = <0x40 0x4>;
- compatible = "fsl,qoriq-core-mux-1.0";
- clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>;
- clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2";
- clock-output-names = "cmux2";
- };
-
- mux3: mux3@60 {
- #clock-cells = <0>;
- reg = <0x60 0x4>;
- compatible = "fsl,qoriq-core-mux-1.0";
- clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>;
- clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2";
- clock-output-names = "cmux3";
- };
};
rcpm: global-utilities@e2000 {
@@ -407,13 +364,30 @@
/include/ "qoriq-esdhc-0.dtsi"
sdhc@114000 {
+ compatible = "fsl,p2041-esdhc", "fsl,esdhc";
fsl,iommu-parent = <&pamu1>;
fsl,liodn-reg = <&guts 0x530>; /* eSDHCLIODNR */
sdhci,auto-cmd12;
};
/include/ "qoriq-i2c-0.dtsi"
+ i2c@118000 {
+ fsl,i2c-erratum-a004447;
+ };
+
+ i2c@118100 {
+ fsl,i2c-erratum-a004447;
+ };
+
/include/ "qoriq-i2c-1.dtsi"
+ i2c@119000 {
+ fsl,i2c-erratum-a004447;
+ };
+
+ i2c@119100 {
+ fsl,i2c-erratum-a004447;
+ };
+
/include/ "qoriq-duart-0.dtsi"
/include/ "qoriq-duart-1.dtsi"
/include/ "qoriq-gpio-0.dtsi"
@@ -451,4 +425,34 @@
crypto: crypto@300000 {
fsl,iommu-parent = <&pamu1>;
};
+
+/include/ "qoriq-qman1.dtsi"
+/include/ "qoriq-bman1.dtsi"
+
+/include/ "qoriq-fman-0.dtsi"
+/include/ "qoriq-fman-0-1g-0.dtsi"
+/include/ "qoriq-fman-0-1g-1.dtsi"
+/include/ "qoriq-fman-0-1g-2.dtsi"
+/include/ "qoriq-fman-0-1g-3.dtsi"
+/include/ "qoriq-fman-0-1g-4.dtsi"
+/include/ "qoriq-fman-0-10g-0.dtsi"
+ fman@400000 {
+ enet0: ethernet@e0000 {
+ };
+
+ enet1: ethernet@e2000 {
+ };
+
+ enet2: ethernet@e4000 {
+ };
+
+ enet3: ethernet@e6000 {
+ };
+
+ enet4: ethernet@e8000 {
+ };
+
+ enet5: ethernet@f0000 {
+ };
+ };
};
diff --git a/arch/powerpc/boot/dts/fsl/p2041si-pre.dtsi b/arch/powerpc/boot/dts/fsl/p2041si-pre.dtsi
index b1ea147f2995..6318962e8d14 100644
--- a/arch/powerpc/boot/dts/fsl/p2041si-pre.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p2041si-pre.dtsi
@@ -1,7 +1,7 @@
/*
* P2041 Silicon/SoC Device Tree Source (pre include)
*
- * Copyright 2011 Freescale Semiconductor Inc.
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -72,6 +72,14 @@
rtic_c = &rtic_c;
rtic_d = &rtic_d;
sec_mon = &sec_mon;
+
+ fman0 = &fman0;
+ ethernet0 = &enet0;
+ ethernet1 = &enet1;
+ ethernet2 = &enet2;
+ ethernet3 = &enet3;
+ ethernet4 = &enet4;
+ ethernet5 = &enet5;
};
cpus {
@@ -81,7 +89,7 @@
cpu0: PowerPC,e500mc@0 {
device_type = "cpu";
reg = <0>;
- clocks = <&mux0>;
+ clocks = <&clockgen 1 0>;
next-level-cache = <&L2_0>;
fsl,portid-mapping = <0x80000000>;
L2_0: l2-cache {
@@ -91,7 +99,7 @@
cpu1: PowerPC,e500mc@1 {
device_type = "cpu";
reg = <1>;
- clocks = <&mux1>;
+ clocks = <&clockgen 1 1>;
next-level-cache = <&L2_1>;
fsl,portid-mapping = <0x40000000>;
L2_1: l2-cache {
@@ -101,7 +109,7 @@
cpu2: PowerPC,e500mc@2 {
device_type = "cpu";
reg = <2>;
- clocks = <&mux2>;
+ clocks = <&clockgen 1 2>;
next-level-cache = <&L2_2>;
fsl,portid-mapping = <0x20000000>;
L2_2: l2-cache {
@@ -111,7 +119,7 @@
cpu3: PowerPC,e500mc@3 {
device_type = "cpu";
reg = <3>;
- clocks = <&mux3>;
+ clocks = <&clockgen 1 3>;
next-level-cache = <&L2_3>;
fsl,portid-mapping = <0x10000000>;
L2_3: l2-cache {
diff --git a/arch/powerpc/boot/dts/p3041ds.dts b/arch/powerpc/boot/dts/fsl/p3041ds.dts
index 2fed3bc0b990..ca0e0272ac62 100644
--- a/arch/powerpc/boot/dts/p3041ds.dts
+++ b/arch/powerpc/boot/dts/fsl/p3041ds.dts
@@ -1,7 +1,7 @@
/*
* P3041DS Device Tree Source
*
- * Copyright 2010-2011 Freescale Semiconductor Inc.
+ * Copyright 2010 - 2015 Freescale Semiconductor Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -32,7 +32,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/include/ "fsl/p3041si-pre.dtsi"
+/include/ "p3041si-pre.dtsi"
/ {
model = "fsl,P3041DS";
@@ -41,14 +41,55 @@
#size-cells = <2>;
interrupt-parent = <&mpic>;
+ aliases {
+ phy_rgmii_0 = &phy_rgmii_0;
+ phy_rgmii_1 = &phy_rgmii_1;
+ phy_sgmii_1c = &phy_sgmii_1c;
+ phy_sgmii_1d = &phy_sgmii_1d;
+ phy_sgmii_1e = &phy_sgmii_1e;
+ phy_sgmii_1f = &phy_sgmii_1f;
+ phy_xgmii_1 = &phy_xgmii_1;
+ phy_xgmii_2 = &phy_xgmii_2;
+ emi1_rgmii = &hydra_mdio_rgmii;
+ emi1_sgmii = &hydra_mdio_sgmii;
+ emi2_xgmii = &hydra_mdio_xgmii;
+ };
+
memory {
device_type = "memory";
};
+ reserved-memory {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ bman_fbpr: bman-fbpr {
+ size = <0 0x1000000>;
+ alignment = <0 0x1000000>;
+ };
+ qman_fqd: qman-fqd {
+ size = <0 0x400000>;
+ alignment = <0 0x400000>;
+ };
+ qman_pfdr: qman-pfdr {
+ size = <0 0x2000000>;
+ alignment = <0 0x2000000>;
+ };
+ };
+
dcsr: dcsr@f00000000 {
ranges = <0x00000000 0xf 0x00000000 0x01008000>;
};
+ bportals: bman-portals@ff4000000 {
+ ranges = <0x0 0xf 0xf4000000 0x200000>;
+ };
+
+ qportals: qman-portals@ff4200000 {
+ ranges = <0x0 0xf 0xf4200000 0x200000>;
+ };
+
soc: soc@ffe000000 {
ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
reg = <0xf 0xfe000000 0 0x00001000>;
@@ -56,7 +97,7 @@
flash@0 {
#address-cells = <1>;
#size-cells = <1>;
- compatible = "spansion,s25sl12801";
+ compatible = "spansion,s25sl12801", "jedec,spi-nor";
reg = <0>;
spi-max-frequency = <35000000>; /* input clock */
partition@u-boot {
@@ -83,11 +124,11 @@
i2c@118100 {
eeprom@51 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x51>;
};
eeprom@52 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x52>;
};
};
@@ -98,11 +139,77 @@
reg = <0x68>;
interrupts = <0x1 0x1 0 0>;
};
+ ina220@40 {
+ compatible = "ti,ina220";
+ reg = <0x40>;
+ shunt-resistor = <1000>;
+ };
+ ina220@41 {
+ compatible = "ti,ina220";
+ reg = <0x41>;
+ shunt-resistor = <1000>;
+ };
+ ina220@44 {
+ compatible = "ti,ina220";
+ reg = <0x44>;
+ shunt-resistor = <1000>;
+ };
+ ina220@45 {
+ compatible = "ti,ina220";
+ reg = <0x45>;
+ shunt-resistor = <1000>;
+ };
adt7461@4c {
compatible = "adi,adt7461";
reg = <0x4c>;
};
};
+
+ fman@400000 {
+ ethernet@e0000 {
+ phy-handle = <&phy_sgmii_1c>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e2000 {
+ phy-handle = <&phy_sgmii_1d>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e4000 {
+ phy-handle = <&phy_sgmii_1e>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e6000 {
+ phy-handle = <&phy_sgmii_1f>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e8000 {
+ phy-handle = <&phy_rgmii_1>;
+ phy-connection-type = "rgmii";
+ };
+
+ ethernet@f0000 {
+ phy-handle = <&phy_xgmii_1>;
+ phy-connection-type = "xgmii";
+ };
+
+ hydra_mdio_xgmii: mdio@f1000 {
+ status = "disabled";
+
+ phy_xgmii_1: ethernet-phy@4 {
+ compatible = "ethernet-phy-ieee802.3-c45";
+ reg = <0x4>;
+ };
+
+ phy_xgmii_2: ethernet-phy@0 {
+ compatible = "ethernet-phy-ieee802.3-c45";
+ reg = <0x0>;
+ };
+ };
+ };
};
rio: rapidio@ffe0c0000 {
@@ -168,8 +275,58 @@
};
board-control@3,0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
compatible = "fsl,p3041ds-fpga", "fsl,fpga-ngpixis";
reg = <3 0 0x30>;
+ ranges = <0 3 0 0x30>;
+
+ mdio-mux-emi1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "mdio-mux-mmioreg", "mdio-mux";
+ mdio-parent-bus = <&mdio0>;
+ reg = <9 1>;
+ mux-mask = <0x78>;
+
+ hydra_mdio_rgmii: rgmii-mdio@8 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <8>;
+ status = "disabled";
+
+ phy_rgmii_0: ethernet-phy@0 {
+ reg = <0x0>;
+ };
+
+ phy_rgmii_1: ethernet-phy@1 {
+ reg = <0x1>;
+ };
+ };
+
+ hydra_mdio_sgmii: sgmii-mdio@28 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x28>;
+ status = "disabled";
+
+ phy_sgmii_1c: ethernet-phy@1c {
+ reg = <0x1c>;
+ };
+
+ phy_sgmii_1d: ethernet-phy@1d {
+ reg = <0x1d>;
+ };
+
+ phy_sgmii_1e: ethernet-phy@1e {
+ reg = <0x1e>;
+ };
+
+ phy_sgmii_1f: ethernet-phy@1f {
+ reg = <0x1f>;
+ };
+ };
+ };
};
};
@@ -234,4 +391,4 @@
};
};
-/include/ "fsl/p3041si-post.dtsi"
+/include/ "p3041si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p3041si-post.dtsi b/arch/powerpc/boot/dts/fsl/p3041si-post.dtsi
index cd63cb1b1042..81bc75aca2e0 100644
--- a/arch/powerpc/boot/dts/fsl/p3041si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p3041si-post.dtsi
@@ -1,7 +1,7 @@
/*
* P3041 Silicon/SoC Device Tree Source (post include)
*
- * Copyright 2011 Freescale Semiconductor Inc.
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -32,6 +32,21 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+&bman_fbpr {
+ compatible = "fsl,bman-fbpr";
+ alloc-ranges = <0 0 0x10 0>;
+};
+
+&qman_fqd {
+ compatible = "fsl,qman-fqd";
+ alloc-ranges = <0 0 0x10 0>;
+};
+
+&qman_pfdr {
+ compatible = "fsl,qman-pfdr";
+ alloc-ranges = <0 0 0x10 0>;
+};
+
&lbc {
compatible = "fsl,p3041-elbc", "fsl,elbc", "simple-bus";
interrupts = <25 2 0 0>;
@@ -243,6 +258,10 @@
};
};
+/include/ "qoriq-bman1-portals.dtsi"
+
+/include/ "qoriq-qman1-portals.dtsi"
+
&soc {
#address-cells = <1>;
#size-cells = <1>;
@@ -332,71 +351,9 @@
#sleep-cells = <2>;
};
- clockgen: global-utilities@e1000 {
+/include/ "qoriq-clockgen1.dtsi"
+ global-utilities@e1000 {
compatible = "fsl,p3041-clockgen", "fsl,qoriq-clockgen-1.0";
- ranges = <0x0 0xe1000 0x1000>;
- reg = <0xe1000 0x1000>;
- clock-frequency = <0>;
- #address-cells = <1>;
- #size-cells = <1>;
-
- sysclk: sysclk {
- #clock-cells = <0>;
- compatible = "fsl,qoriq-sysclk-1.0";
- clock-output-names = "sysclk";
- };
-
- pll0: pll0@800 {
- #clock-cells = <1>;
- reg = <0x800 0x4>;
- compatible = "fsl,qoriq-core-pll-1.0";
- clocks = <&sysclk>;
- clock-output-names = "pll0", "pll0-div2";
- };
-
- pll1: pll1@820 {
- #clock-cells = <1>;
- reg = <0x820 0x4>;
- compatible = "fsl,qoriq-core-pll-1.0";
- clocks = <&sysclk>;
- clock-output-names = "pll1", "pll1-div2";
- };
-
- mux0: mux0@0 {
- #clock-cells = <0>;
- reg = <0x0 0x4>;
- compatible = "fsl,qoriq-core-mux-1.0";
- clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>;
- clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2";
- clock-output-names = "cmux0";
- };
-
- mux1: mux1@20 {
- #clock-cells = <0>;
- reg = <0x20 0x4>;
- compatible = "fsl,qoriq-core-mux-1.0";
- clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>;
- clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2";
- clock-output-names = "cmux1";
- };
-
- mux2: mux2@40 {
- #clock-cells = <0>;
- reg = <0x40 0x4>;
- compatible = "fsl,qoriq-core-mux-1.0";
- clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>;
- clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2";
- clock-output-names = "cmux2";
- };
-
- mux3: mux3@60 {
- #clock-cells = <0>;
- reg = <0x60 0x4>;
- compatible = "fsl,qoriq-core-mux-1.0";
- clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>;
- clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2";
- clock-output-names = "cmux3";
- };
};
rcpm: global-utilities@e2000 {
@@ -434,6 +391,7 @@
/include/ "qoriq-esdhc-0.dtsi"
sdhc@114000 {
+ compatible = "fsl,p3041-esdhc", "fsl,esdhc";
fsl,iommu-parent = <&pamu1>;
fsl,liodn-reg = <&guts 0x530>; /* eSDHCLIODNR */
sdhci,auto-cmd12;
@@ -478,4 +436,34 @@
crypto: crypto@300000 {
fsl,iommu-parent = <&pamu1>;
};
+
+/include/ "qoriq-qman1.dtsi"
+/include/ "qoriq-bman1.dtsi"
+
+/include/ "qoriq-fman-0.dtsi"
+/include/ "qoriq-fman-0-1g-0.dtsi"
+/include/ "qoriq-fman-0-1g-1.dtsi"
+/include/ "qoriq-fman-0-1g-2.dtsi"
+/include/ "qoriq-fman-0-1g-3.dtsi"
+/include/ "qoriq-fman-0-1g-4.dtsi"
+/include/ "qoriq-fman-0-10g-0.dtsi"
+ fman@400000 {
+ enet0: ethernet@e0000 {
+ };
+
+ enet1: ethernet@e2000 {
+ };
+
+ enet2: ethernet@e4000 {
+ };
+
+ enet3: ethernet@e6000 {
+ };
+
+ enet4: ethernet@e8000 {
+ };
+
+ enet5: ethernet@f0000 {
+ };
+ };
};
diff --git a/arch/powerpc/boot/dts/fsl/p3041si-pre.dtsi b/arch/powerpc/boot/dts/fsl/p3041si-pre.dtsi
index dc5f4b362c24..db92f1151a48 100644
--- a/arch/powerpc/boot/dts/fsl/p3041si-pre.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p3041si-pre.dtsi
@@ -1,7 +1,7 @@
/*
* P3041 Silicon/SoC Device Tree Source (pre include)
*
- * Copyright 2011 Freescale Semiconductor Inc.
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -73,6 +73,14 @@
rtic_c = &rtic_c;
rtic_d = &rtic_d;
sec_mon = &sec_mon;
+
+ fman0 = &fman0;
+ ethernet0 = &enet0;
+ ethernet1 = &enet1;
+ ethernet2 = &enet2;
+ ethernet3 = &enet3;
+ ethernet4 = &enet4;
+ ethernet5 = &enet5;
};
cpus {
@@ -82,7 +90,7 @@
cpu0: PowerPC,e500mc@0 {
device_type = "cpu";
reg = <0>;
- clocks = <&mux0>;
+ clocks = <&clockgen 1 0>;
next-level-cache = <&L2_0>;
fsl,portid-mapping = <0x80000000>;
L2_0: l2-cache {
@@ -92,7 +100,7 @@
cpu1: PowerPC,e500mc@1 {
device_type = "cpu";
reg = <1>;
- clocks = <&mux1>;
+ clocks = <&clockgen 1 1>;
next-level-cache = <&L2_1>;
fsl,portid-mapping = <0x40000000>;
L2_1: l2-cache {
@@ -102,7 +110,7 @@
cpu2: PowerPC,e500mc@2 {
device_type = "cpu";
reg = <2>;
- clocks = <&mux2>;
+ clocks = <&clockgen 1 2>;
next-level-cache = <&L2_2>;
fsl,portid-mapping = <0x20000000>;
L2_2: l2-cache {
@@ -112,7 +120,7 @@
cpu3: PowerPC,e500mc@3 {
device_type = "cpu";
reg = <3>;
- clocks = <&mux3>;
+ clocks = <&clockgen 1 3>;
next-level-cache = <&L2_3>;
fsl,portid-mapping = <0x10000000>;
L2_3: l2-cache {
diff --git a/arch/powerpc/boot/dts/p4080ds.dts b/arch/powerpc/boot/dts/fsl/p4080ds.dts
index 1cf6148b8b05..969b32c4f2d5 100644
--- a/arch/powerpc/boot/dts/p4080ds.dts
+++ b/arch/powerpc/boot/dts/fsl/p4080ds.dts
@@ -1,7 +1,7 @@
/*
* P4080DS Device Tree Source
*
- * Copyright 2009-2011 Freescale Semiconductor Inc.
+ * Copyright 2009 - 2015 Freescale Semiconductor Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -32,7 +32,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/include/ "fsl/p4080si-pre.dtsi"
+/include/ "p4080si-pre.dtsi"
/ {
model = "fsl,P4080DS";
@@ -41,14 +41,55 @@
#size-cells = <2>;
interrupt-parent = <&mpic>;
+ aliases {
+ phy_rgmii = &phyrgmii;
+ phy5_slot3 = &phy5slot3;
+ phy6_slot3 = &phy6slot3;
+ phy7_slot3 = &phy7slot3;
+ phy8_slot3 = &phy8slot3;
+ emi1_slot3 = &p4080mdio2;
+ emi1_slot4 = &p4080mdio1;
+ emi1_slot5 = &p4080mdio3;
+ emi1_rgmii = &p4080mdio0;
+ emi2_slot4 = &p4080xmdio1;
+ emi2_slot5 = &p4080xmdio3;
+ };
+
memory {
device_type = "memory";
};
+ reserved-memory {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ bman_fbpr: bman-fbpr {
+ size = <0 0x1000000>;
+ alignment = <0 0x1000000>;
+ };
+ qman_fqd: qman-fqd {
+ size = <0 0x400000>;
+ alignment = <0 0x400000>;
+ };
+ qman_pfdr: qman-pfdr {
+ size = <0 0x2000000>;
+ alignment = <0 0x2000000>;
+ };
+ };
+
dcsr: dcsr@f00000000 {
ranges = <0x00000000 0xf 0x00000000 0x01008000>;
};
+ bportals: bman-portals@ff4000000 {
+ ranges = <0x0 0xf 0xf4000000 0x200000>;
+ };
+
+ qportals: qman-portals@ff4200000 {
+ ranges = <0x0 0xf 0xf4200000 0x200000>;
+ };
+
soc: soc@ffe000000 {
ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
reg = <0xf 0xfe000000 0 0x00001000>;
@@ -57,7 +98,7 @@
flash@0 {
#address-cells = <1>;
#size-cells = <1>;
- compatible = "spansion,s25sl12801";
+ compatible = "spansion,s25sl12801", "jedec,spi-nor";
reg = <0>;
spi-max-frequency = <40000000>; /* input clock */
partition@u-boot {
@@ -84,11 +125,11 @@
i2c@118100 {
eeprom@51 {
- compatible = "at24,24c256";
+ compatible = "atmel,spd";
reg = <0x51>;
};
eeprom@52 {
- compatible = "at24,24c256";
+ compatible = "atmel,spd";
reg = <0x52>;
};
rtc@68 {
@@ -102,6 +143,45 @@
};
};
+ i2c@118000 {
+ zl2006@21 {
+ compatible = "zl2006";
+ reg = <0x21>;
+ };
+ zl2006@22 {
+ compatible = "zl2006";
+ reg = <0x22>;
+ };
+ zl2006@23 {
+ compatible = "zl2006";
+ reg = <0x23>;
+ };
+ zl2006@24 {
+ compatible = "zl2006";
+ reg = <0x24>;
+ };
+ eeprom@50 {
+ compatible = "atmel,24c64";
+ reg = <0x50>;
+ };
+ eeprom@55 {
+ compatible = "atmel,24c64";
+ reg = <0x55>;
+ };
+ eeprom@56 {
+ compatible = "atmel,24c64";
+ reg = <0x56>;
+ };
+ eeprom@57 {
+ compatible = "atmel,24c02";
+ reg = <0x57>;
+ };
+ };
+
+ i2c@119100 {
+ /* 0x6E: ICS9FG108 */
+ };
+
usb0: usb@210000 {
phy_type = "ulpi";
};
@@ -110,6 +190,60 @@
dr_mode = "host";
phy_type = "ulpi";
};
+
+ fman@400000 {
+ ethernet@e0000 {
+ phy-handle = <&phy0>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e2000 {
+ phy-handle = <&phy1>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e4000 {
+ phy-handle = <&phy2>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e6000 {
+ phy-handle = <&phy3>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@f0000 {
+ phy-handle = <&phy10>;
+ phy-connection-type = "xgmii";
+ };
+ };
+
+ fman@500000 {
+ ethernet@e0000 {
+ phy-handle = <&phy5>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e2000 {
+ phy-handle = <&phy6>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e4000 {
+ phy-handle = <&phy7>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e6000 {
+ phy-handle = <&phy8>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@f0000 {
+ phy-handle = <&phy11>;
+ phy-connection-type = "xgmii";
+ };
+ };
};
rio: rapidio@ffe0c0000 {
@@ -186,6 +320,120 @@
};
};
+ mdio-mux-emi1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "mdio-mux-gpio", "mdio-mux";
+ mdio-parent-bus = <&mdio0>;
+ gpios = <&gpio0 1 0>, <&gpio0 0 0>;
+
+ p4080mdio0: mdio@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+
+ phyrgmii: ethernet-phy@0 {
+ reg = <0x0>;
+ };
+ };
+
+ p4080mdio1: mdio@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>;
+
+ phy5: ethernet-phy@1c {
+ reg = <0x1c>;
+ };
+
+ phy6: ethernet-phy@1d {
+ reg = <0x1d>;
+ };
+
+ phy7: ethernet-phy@1e {
+ reg = <0x1e>;
+ };
+
+ phy8: ethernet-phy@1f {
+ reg = <0x1f>;
+ };
+ };
+
+ p4080mdio2: mdio@2 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <2>;
+ status = "disabled";
+
+ phy5slot3: ethernet-phy@1c {
+ reg = <0x1c>;
+ };
+
+ phy6slot3: ethernet-phy@1d {
+ reg = <0x1d>;
+ };
+
+ phy7slot3: ethernet-phy@1e {
+ reg = <0x1e>;
+ };
+
+ phy8slot3: ethernet-phy@1f {
+ reg = <0x1f>;
+ };
+ };
+
+ p4080mdio3: mdio@3 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <3>;
+
+ phy0: ethernet-phy@1c {
+ reg = <0x1c>;
+ };
+
+ phy1: ethernet-phy@1d {
+ reg = <0x1d>;
+ };
+
+ phy2: ethernet-phy@1e {
+ reg = <0x1e>;
+ };
+
+ phy3: ethernet-phy@1f {
+ reg = <0x1f>;
+ };
+ };
+ };
+
+ mdio-mux-emi2 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "mdio-mux-gpio", "mdio-mux";
+ mdio-parent-bus = <&xmdio0>;
+ gpios = <&gpio0 3 0>, <&gpio0 2 0>;
+
+ p4080xmdio1: mdio@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>;
+
+ phy11: ethernet-phy@0 {
+ compatible = "ethernet-phy-ieee802.3-c45";
+ reg = <0x0>;
+ };
+ };
+
+ p4080xmdio3: mdio@3 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <3>;
+
+ phy10: ethernet-phy@4 {
+ compatible = "ethernet-phy-ieee802.3-c45";
+ reg = <0x4>;
+ };
+ };
+ };
};
-/include/ "fsl/p4080si-post.dtsi"
+/include/ "p4080si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi b/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi
index 12947ccddf25..4da49b6dd3f5 100644
--- a/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p4080si-post.dtsi
@@ -1,7 +1,7 @@
/*
* P4080/P4040 Silicon/SoC Device Tree Source (post include)
*
- * Copyright 2011 Freescale Semiconductor Inc.
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -32,6 +32,21 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+&bman_fbpr {
+ compatible = "fsl,bman-fbpr";
+ alloc-ranges = <0 0 0x10 0>;
+};
+
+&qman_fqd {
+ compatible = "fsl,qman-fqd";
+ alloc-ranges = <0 0 0x10 0>;
+};
+
+&qman_pfdr {
+ compatible = "fsl,qman-pfdr";
+ alloc-ranges = <0 0 0x10 0>;
+};
+
&lbc {
compatible = "fsl,p4080-elbc", "fsl,elbc", "simple-bus";
interrupts = <25 2 0 0>;
@@ -243,6 +258,10 @@
};
+/include/ "qoriq-bman1-portals.dtsi"
+
+/include/ "qoriq-qman1-portals.dtsi"
+
&soc {
#address-cells = <1>;
#size-cells = <1>;
@@ -352,123 +371,9 @@
#sleep-cells = <2>;
};
- clockgen: global-utilities@e1000 {
+/include/ "qoriq-clockgen1.dtsi"
+ global-utilities@e1000 {
compatible = "fsl,p4080-clockgen", "fsl,qoriq-clockgen-1.0";
- ranges = <0x0 0xe1000 0x1000>;
- reg = <0xe1000 0x1000>;
- clock-frequency = <0>;
- #address-cells = <1>;
- #size-cells = <1>;
-
- sysclk: sysclk {
- #clock-cells = <0>;
- compatible = "fsl,qoriq-sysclk-1.0";
- clock-output-names = "sysclk";
- };
-
- pll0: pll0@800 {
- #clock-cells = <1>;
- reg = <0x800 0x4>;
- compatible = "fsl,qoriq-core-pll-1.0";
- clocks = <&sysclk>;
- clock-output-names = "pll0", "pll0-div2";
- };
-
- pll1: pll1@820 {
- #clock-cells = <1>;
- reg = <0x820 0x4>;
- compatible = "fsl,qoriq-core-pll-1.0";
- clocks = <&sysclk>;
- clock-output-names = "pll1", "pll1-div2";
- };
-
- pll2: pll2@840 {
- #clock-cells = <1>;
- reg = <0x840 0x4>;
- compatible = "fsl,qoriq-core-pll-1.0";
- clocks = <&sysclk>;
- clock-output-names = "pll2", "pll2-div2";
- };
-
- pll3: pll3@860 {
- #clock-cells = <1>;
- reg = <0x860 0x4>;
- compatible = "fsl,qoriq-core-pll-1.0";
- clocks = <&sysclk>;
- clock-output-names = "pll3", "pll3-div2";
- };
-
- mux0: mux0@0 {
- #clock-cells = <0>;
- reg = <0x0 0x4>;
- compatible = "fsl,qoriq-core-mux-1.0";
- clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>;
- clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2";
- clock-output-names = "cmux0";
- };
-
- mux1: mux1@20 {
- #clock-cells = <0>;
- reg = <0x20 0x4>;
- compatible = "fsl,qoriq-core-mux-1.0";
- clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>;
- clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2";
- clock-output-names = "cmux1";
- };
-
- mux2: mux2@40 {
- #clock-cells = <0>;
- reg = <0x40 0x4>;
- compatible = "fsl,qoriq-core-mux-1.0";
- clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>;
- clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2";
- clock-output-names = "cmux2";
- };
-
- mux3: mux3@60 {
- #clock-cells = <0>;
- reg = <0x60 0x4>;
- compatible = "fsl,qoriq-core-mux-1.0";
- clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>;
- clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2";
- clock-output-names = "cmux3";
- };
-
- mux4: mux4@80 {
- #clock-cells = <0>;
- reg = <0x80 0x4>;
- compatible = "fsl,qoriq-core-mux-1.0";
- clocks = <&pll2 0>, <&pll2 1>, <&pll3 0>, <&pll3 1>;
- clock-names = "pll2", "pll2-div2", "pll3", "pll3-div2";
- clock-output-names = "cmux4";
- };
-
- mux5: mux5@a0 {
- #clock-cells = <0>;
- reg = <0xa0 0x4>;
- compatible = "fsl,qoriq-core-mux-1.0";
- clocks = <&pll2 0>, <&pll2 1>, <&pll3 0>, <&pll3 1>;
- clock-names = "pll2", "pll2-div2", "pll3", "pll3-div2";
- clock-output-names = "cmux5";
- };
-
- mux6: mux6@c0 {
- #clock-cells = <0>;
- reg = <0xc0 0x4>;
- compatible = "fsl,qoriq-core-mux-1.0";
- clocks = <&pll2 0>, <&pll2 1>, <&pll3 0>, <&pll3 1>;
- clock-names = "pll2", "pll2-div2", "pll3", "pll3-div2";
- clock-output-names = "cmux6";
- };
-
- mux7: mux7@e0 {
- #clock-cells = <0>;
- reg = <0xe0 0x4>;
- compatible = "fsl,qoriq-core-mux-1.0";
- clocks = <&pll2 0>, <&pll2 1>, <&pll3 0>, <&pll3 1>;
- clock-names = "pll2", "pll2-div2", "pll3", "pll3-div2";
- clock-output-names = "cmux7";
- };
};
rcpm: global-utilities@e2000 {
@@ -506,6 +411,7 @@
/include/ "qoriq-esdhc-0.dtsi"
sdhc@114000 {
+ compatible = "fsl,p4080-esdhc", "fsl,esdhc";
fsl,iommu-parent = <&pamu1>;
fsl,liodn-reg = <&guts 0x530>; /* eSDHCLIODNR */
voltage-ranges = <3300 3300>;
@@ -534,4 +440,53 @@
crypto: crypto@300000 {
fsl,iommu-parent = <&pamu1>;
};
+
+/include/ "qoriq-qman1.dtsi"
+/include/ "qoriq-bman1.dtsi"
+
+/include/ "qoriq-fman-0.dtsi"
+/include/ "qoriq-fman-0-1g-0.dtsi"
+/include/ "qoriq-fman-0-1g-1.dtsi"
+/include/ "qoriq-fman-0-1g-2.dtsi"
+/include/ "qoriq-fman-0-1g-3.dtsi"
+/include/ "qoriq-fman-0-10g-0.dtsi"
+ fman@400000 {
+ enet0: ethernet@e0000 {
+ };
+
+ enet1: ethernet@e2000 {
+ };
+
+ enet2: ethernet@e4000 {
+ };
+
+ enet3: ethernet@e6000 {
+ };
+
+ enet4: ethernet@f0000 {
+ };
+ };
+
+/include/ "qoriq-fman-1.dtsi"
+/include/ "qoriq-fman-1-1g-0.dtsi"
+/include/ "qoriq-fman-1-1g-1.dtsi"
+/include/ "qoriq-fman-1-1g-2.dtsi"
+/include/ "qoriq-fman-1-1g-3.dtsi"
+/include/ "qoriq-fman-1-10g-0.dtsi"
+ fman@500000 {
+ enet5: ethernet@e0000 {
+ };
+
+ enet6: ethernet@e2000 {
+ };
+
+ enet7: ethernet@e4000 {
+ };
+
+ enet8: ethernet@e6000 {
+ };
+
+ enet9: ethernet@f0000 {
+ };
+ };
};
diff --git a/arch/powerpc/boot/dts/fsl/p4080si-pre.dtsi b/arch/powerpc/boot/dts/fsl/p4080si-pre.dtsi
index 38bde0958672..0a7c65a00e5e 100644
--- a/arch/powerpc/boot/dts/fsl/p4080si-pre.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p4080si-pre.dtsi
@@ -1,7 +1,7 @@
/*
* P4080/P4040 Silicon/SoC Device Tree Source (pre include)
*
- * Copyright 2011 Freescale Semiconductor Inc.
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -72,6 +72,19 @@
rtic_c = &rtic_c;
rtic_d = &rtic_d;
sec_mon = &sec_mon;
+
+ fman0 = &fman0;
+ fman1 = &fman1;
+ ethernet0 = &enet0;
+ ethernet1 = &enet1;
+ ethernet2 = &enet2;
+ ethernet3 = &enet3;
+ ethernet4 = &enet4;
+ ethernet5 = &enet5;
+ ethernet6 = &enet6;
+ ethernet7 = &enet7;
+ ethernet8 = &enet8;
+ ethernet9 = &enet9;
};
cpus {
@@ -81,7 +94,7 @@
cpu0: PowerPC,e500mc@0 {
device_type = "cpu";
reg = <0>;
- clocks = <&mux0>;
+ clocks = <&clockgen 1 0>;
next-level-cache = <&L2_0>;
fsl,portid-mapping = <0x80000000>;
L2_0: l2-cache {
@@ -91,7 +104,7 @@
cpu1: PowerPC,e500mc@1 {
device_type = "cpu";
reg = <1>;
- clocks = <&mux1>;
+ clocks = <&clockgen 1 1>;
next-level-cache = <&L2_1>;
fsl,portid-mapping = <0x40000000>;
L2_1: l2-cache {
@@ -101,7 +114,7 @@
cpu2: PowerPC,e500mc@2 {
device_type = "cpu";
reg = <2>;
- clocks = <&mux2>;
+ clocks = <&clockgen 1 2>;
next-level-cache = <&L2_2>;
fsl,portid-mapping = <0x20000000>;
L2_2: l2-cache {
@@ -111,7 +124,7 @@
cpu3: PowerPC,e500mc@3 {
device_type = "cpu";
reg = <3>;
- clocks = <&mux3>;
+ clocks = <&clockgen 1 3>;
next-level-cache = <&L2_3>;
fsl,portid-mapping = <0x10000000>;
L2_3: l2-cache {
@@ -121,7 +134,7 @@
cpu4: PowerPC,e500mc@4 {
device_type = "cpu";
reg = <4>;
- clocks = <&mux4>;
+ clocks = <&clockgen 1 4>;
next-level-cache = <&L2_4>;
fsl,portid-mapping = <0x08000000>;
L2_4: l2-cache {
@@ -131,7 +144,7 @@
cpu5: PowerPC,e500mc@5 {
device_type = "cpu";
reg = <5>;
- clocks = <&mux5>;
+ clocks = <&clockgen 1 5>;
next-level-cache = <&L2_5>;
fsl,portid-mapping = <0x04000000>;
L2_5: l2-cache {
@@ -141,7 +154,7 @@
cpu6: PowerPC,e500mc@6 {
device_type = "cpu";
reg = <6>;
- clocks = <&mux6>;
+ clocks = <&clockgen 1 6>;
next-level-cache = <&L2_6>;
fsl,portid-mapping = <0x02000000>;
L2_6: l2-cache {
@@ -151,7 +164,7 @@
cpu7: PowerPC,e500mc@7 {
device_type = "cpu";
reg = <7>;
- clocks = <&mux7>;
+ clocks = <&clockgen 1 7>;
next-level-cache = <&L2_7>;
fsl,portid-mapping = <0x01000000>;
L2_7: l2-cache {
diff --git a/arch/powerpc/boot/dts/p5020ds.dts b/arch/powerpc/boot/dts/fsl/p5020ds.dts
index 2869fea717dd..b24adf902d8d 100644
--- a/arch/powerpc/boot/dts/p5020ds.dts
+++ b/arch/powerpc/boot/dts/fsl/p5020ds.dts
@@ -1,7 +1,7 @@
/*
* P5020DS Device Tree Source
*
- * Copyright 2010-2011 Freescale Semiconductor Inc.
+ * Copyright 2010 - 2015 Freescale Semiconductor Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -32,7 +32,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/include/ "fsl/p5020si-pre.dtsi"
+/include/ "p5020si-pre.dtsi"
/ {
model = "fsl,P5020DS";
@@ -41,14 +41,55 @@
#size-cells = <2>;
interrupt-parent = <&mpic>;
+ aliases {
+ phy_rgmii_0 = &phy_rgmii_0;
+ phy_rgmii_1 = &phy_rgmii_1;
+ phy_sgmii_1c = &phy_sgmii_1c;
+ phy_sgmii_1d = &phy_sgmii_1d;
+ phy_sgmii_1e = &phy_sgmii_1e;
+ phy_sgmii_1f = &phy_sgmii_1f;
+ phy_xgmii_1 = &phy_xgmii_1;
+ phy_xgmii_2 = &phy_xgmii_2;
+ emi1_rgmii = &hydra_mdio_rgmii;
+ emi1_sgmii = &hydra_mdio_sgmii;
+ emi2_xgmii = &hydra_mdio_xgmii;
+ };
+
memory {
device_type = "memory";
};
+ reserved-memory {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ bman_fbpr: bman-fbpr {
+ size = <0 0x1000000>;
+ alignment = <0 0x1000000>;
+ };
+ qman_fqd: qman-fqd {
+ size = <0 0x400000>;
+ alignment = <0 0x400000>;
+ };
+ qman_pfdr: qman-pfdr {
+ size = <0 0x2000000>;
+ alignment = <0 0x2000000>;
+ };
+ };
+
dcsr: dcsr@f00000000 {
ranges = <0x00000000 0xf 0x00000000 0x01008000>;
};
+ bportals: bman-portals@ff4000000 {
+ ranges = <0x0 0xf 0xf4000000 0x200000>;
+ };
+
+ qportals: qman-portals@ff4200000 {
+ ranges = <0x0 0xf 0xf4200000 0x200000>;
+ };
+
soc: soc@ffe000000 {
ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
reg = <0xf 0xfe000000 0 0x00001000>;
@@ -56,7 +97,7 @@
flash@0 {
#address-cells = <1>;
#size-cells = <1>;
- compatible = "spansion,s25sl12801";
+ compatible = "spansion,s25sl12801", "jedec,spi-nor";
reg = <0>;
spi-max-frequency = <40000000>; /* input clock */
partition@u-boot {
@@ -83,11 +124,11 @@
i2c@118100 {
eeprom@51 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x51>;
};
eeprom@52 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x52>;
};
};
@@ -98,11 +139,77 @@
reg = <0x68>;
interrupts = <0x1 0x1 0 0>;
};
+ ina220@40 {
+ compatible = "ti,ina220";
+ reg = <0x40>;
+ shunt-resistor = <1000>;
+ };
+ ina220@41 {
+ compatible = "ti,ina220";
+ reg = <0x41>;
+ shunt-resistor = <1000>;
+ };
+ ina220@44 {
+ compatible = "ti,ina220";
+ reg = <0x44>;
+ shunt-resistor = <1000>;
+ };
+ ina220@45 {
+ compatible = "ti,ina220";
+ reg = <0x45>;
+ shunt-resistor = <1000>;
+ };
adt7461@4c {
compatible = "adi,adt7461";
reg = <0x4c>;
};
};
+
+ fman@400000 {
+ ethernet@e0000 {
+ phy-handle = <&phy_sgmii_1c>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e2000 {
+ phy-handle = <&phy_sgmii_1d>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e4000 {
+ phy-handle = <&phy_sgmii_1e>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e6000 {
+ phy-handle = <&phy_sgmii_1f>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e8000 {
+ phy-handle = <&phy_rgmii_1>;
+ phy-connection-type = "rgmii";
+ };
+
+ ethernet@f0000 {
+ phy-handle = <&phy_xgmii_1>;
+ phy-connection-type = "xgmii";
+ };
+
+ hydra_mdio_xgmii: mdio@f1000 {
+ status = "disabled";
+
+ phy_xgmii_1: ethernet-phy@4 {
+ compatible = "ethernet-phy-ieee802.3-c45";
+ reg = <0x4>;
+ };
+
+ phy_xgmii_2: ethernet-phy@0 {
+ compatible = "ethernet-phy-ieee802.3-c45";
+ reg = <0x0>;
+ };
+ };
+ };
};
rio: rapidio@ffe0c0000 {
@@ -168,8 +275,58 @@
};
board-control@3,0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
compatible = "fsl,p5020ds-fpga", "fsl,fpga-ngpixis";
reg = <3 0 0x30>;
+ ranges = <0 3 0 0x30>;
+
+ mdio-mux-emi1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "mdio-mux-mmioreg", "mdio-mux";
+ mdio-parent-bus = <&mdio0>;
+ reg = <9 1>;
+ mux-mask = <0x78>;
+
+ hydra_mdio_rgmii: rgmii-mdio@8 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <8>;
+ status = "disabled";
+
+ phy_rgmii_0: ethernet-phy@0 {
+ reg = <0x0>;
+ };
+
+ phy_rgmii_1: ethernet-phy@1 {
+ reg = <0x1>;
+ };
+ };
+
+ hydra_mdio_sgmii: sgmii-mdio@28 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x28>;
+ status = "disabled";
+
+ phy_sgmii_1c: ethernet-phy@1c {
+ reg = <0x1c>;
+ };
+
+ phy_sgmii_1d: ethernet-phy@1d {
+ reg = <0x1d>;
+ };
+
+ phy_sgmii_1e: ethernet-phy@1e {
+ reg = <0x1e>;
+ };
+
+ phy_sgmii_1f: ethernet-phy@1f {
+ reg = <0x1f>;
+ };
+ };
+ };
};
};
@@ -234,4 +391,4 @@
};
};
-/include/ "fsl/p5020si-post.dtsi"
+/include/ "p5020si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi b/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi
index 4c4a2b0436b2..cd008cdd2889 100644
--- a/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p5020si-post.dtsi
@@ -1,7 +1,7 @@
/*
* P5020/5010 Silicon/SoC Device Tree Source (post include)
*
- * Copyright 2011 Freescale Semiconductor Inc.
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -32,6 +32,21 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+&bman_fbpr {
+ compatible = "fsl,bman-fbpr";
+ alloc-ranges = <0 0 0x10000 0>;
+};
+
+&qman_fqd {
+ compatible = "fsl,qman-fqd";
+ alloc-ranges = <0 0 0x10000 0>;
+};
+
+&qman_pfdr {
+ compatible = "fsl,qman-pfdr";
+ alloc-ranges = <0 0 0x10000 0>;
+};
+
&lbc {
compatible = "fsl,p5020-elbc", "fsl,elbc", "simple-bus";
interrupts = <25 2 0 0>;
@@ -240,6 +255,10 @@
};
};
+/include/ "qoriq-bman1-portals.dtsi"
+
+/include/ "qoriq-qman1-portals.dtsi"
+
&soc {
#address-cells = <1>;
#size-cells = <1>;
@@ -337,53 +356,9 @@
#sleep-cells = <2>;
};
- clockgen: global-utilities@e1000 {
+/include/ "qoriq-clockgen1.dtsi"
+ global-utilities@e1000 {
compatible = "fsl,p5020-clockgen", "fsl,qoriq-clockgen-1.0";
- ranges = <0x0 0xe1000 0x1000>;
- reg = <0xe1000 0x1000>;
- clock-frequency = <0>;
- #address-cells = <1>;
- #size-cells = <1>;
-
- sysclk: sysclk {
- #clock-cells = <0>;
- compatible = "fsl,qoriq-sysclk-1.0";
- clock-output-names = "sysclk";
- };
-
- pll0: pll0@800 {
- #clock-cells = <1>;
- reg = <0x800 0x4>;
- compatible = "fsl,qoriq-core-pll-1.0";
- clocks = <&sysclk>;
- clock-output-names = "pll0", "pll0-div2";
- };
-
- pll1: pll1@820 {
- #clock-cells = <1>;
- reg = <0x820 0x4>;
- compatible = "fsl,qoriq-core-pll-1.0";
- clocks = <&sysclk>;
- clock-output-names = "pll1", "pll1-div2";
- };
-
- mux0: mux0@0 {
- #clock-cells = <0>;
- reg = <0x0 0x4>;
- compatible = "fsl,qoriq-core-mux-1.0";
- clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>;
- clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2";
- clock-output-names = "cmux0";
- };
-
- mux1: mux1@20 {
- #clock-cells = <0>;
- reg = <0x20 0x4>;
- compatible = "fsl,qoriq-core-mux-1.0";
- clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>;
- clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2";
- clock-output-names = "cmux1";
- };
};
rcpm: global-utilities@e2000 {
@@ -421,6 +396,7 @@
/include/ "qoriq-esdhc-0.dtsi"
sdhc@114000 {
+ compatible = "fsl,p5020-esdhc", "fsl,esdhc";
fsl,iommu-parent = <&pamu1>;
fsl,liodn-reg = <&guts 0x530>; /* eSDHCLIODNR */
sdhci,auto-cmd12;
@@ -465,8 +441,38 @@
fsl,iommu-parent = <&pamu1>;
};
+/include/ "qoriq-qman1.dtsi"
+/include/ "qoriq-bman1.dtsi"
+
/include/ "qoriq-raid1.0-0.dtsi"
raideng@320000 {
fsl,iommu-parent = <&pamu1>;
};
+
+/include/ "qoriq-fman-0.dtsi"
+/include/ "qoriq-fman-0-1g-0.dtsi"
+/include/ "qoriq-fman-0-1g-1.dtsi"
+/include/ "qoriq-fman-0-1g-2.dtsi"
+/include/ "qoriq-fman-0-1g-3.dtsi"
+/include/ "qoriq-fman-0-1g-4.dtsi"
+/include/ "qoriq-fman-0-10g-0.dtsi"
+ fman@400000 {
+ enet0: ethernet@e0000 {
+ };
+
+ enet1: ethernet@e2000 {
+ };
+
+ enet2: ethernet@e4000 {
+ };
+
+ enet3: ethernet@e6000 {
+ };
+
+ enet4: ethernet@e8000 {
+ };
+
+ enet5: ethernet@f0000 {
+ };
+ };
};
diff --git a/arch/powerpc/boot/dts/fsl/p5020si-pre.dtsi b/arch/powerpc/boot/dts/fsl/p5020si-pre.dtsi
index 1cc61e126e4c..2d74ea85e5df 100644
--- a/arch/powerpc/boot/dts/fsl/p5020si-pre.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p5020si-pre.dtsi
@@ -1,7 +1,7 @@
/*
* P5020/P5010 Silicon/SoC Device Tree Source (pre include)
*
- * Copyright 2011 Freescale Semiconductor Inc.
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -79,6 +79,14 @@
raideng_jr1 = &raideng_jr1;
raideng_jr2 = &raideng_jr2;
raideng_jr3 = &raideng_jr3;
+
+ fman0 = &fman0;
+ ethernet0 = &enet0;
+ ethernet1 = &enet1;
+ ethernet2 = &enet2;
+ ethernet3 = &enet3;
+ ethernet4 = &enet4;
+ ethernet5 = &enet5;
};
cpus {
@@ -88,7 +96,7 @@
cpu0: PowerPC,e5500@0 {
device_type = "cpu";
reg = <0>;
- clocks = <&mux0>;
+ clocks = <&clockgen 1 0>;
next-level-cache = <&L2_0>;
fsl,portid-mapping = <0x80000000>;
L2_0: l2-cache {
@@ -98,7 +106,7 @@
cpu1: PowerPC,e5500@1 {
device_type = "cpu";
reg = <1>;
- clocks = <&mux1>;
+ clocks = <&clockgen 1 1>;
next-level-cache = <&L2_1>;
fsl,portid-mapping = <0x40000000>;
L2_1: l2-cache {
diff --git a/arch/powerpc/boot/dts/fsl/p5040ds.dts b/arch/powerpc/boot/dts/fsl/p5040ds.dts
new file mode 100644
index 000000000000..5cfc689ee474
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/p5040ds.dts
@@ -0,0 +1,486 @@
+/*
+ * P5040DS Device Tree Source
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * This software is provided by Freescale Semiconductor "as is" and any
+ * express or implied warranties, including, but not limited to, the implied
+ * warranties of merchantability and fitness for a particular purpose are
+ * disclaimed. In no event shall Freescale Semiconductor be liable for any
+ * direct, indirect, incidental, special, exemplary, or consequential damages
+ * (including, but not limited to, procurement of substitute goods or services;
+ * loss of use, data, or profits; or business interruption) however caused and
+ * on any theory of liability, whether in contract, strict liability, or tort
+ * (including negligence or otherwise) arising in any way out of the use of this
+ * software, even if advised of the possibility of such damage.
+ */
+
+/include/ "p5040si-pre.dtsi"
+
+/ {
+ model = "fsl,P5040DS";
+ compatible = "fsl,P5040DS";
+ #address-cells = <2>;
+ #size-cells = <2>;
+ interrupt-parent = <&mpic>;
+
+ aliases {
+ phy_sgmii_slot2_1c = &phy_sgmii_slot2_1c;
+ phy_sgmii_slot2_1d = &phy_sgmii_slot2_1d;
+ phy_sgmii_slot2_1e = &phy_sgmii_slot2_1e;
+ phy_sgmii_slot2_1f = &phy_sgmii_slot2_1f;
+ phy_sgmii_slot3_1c = &phy_sgmii_slot3_1c;
+ phy_sgmii_slot3_1d = &phy_sgmii_slot3_1d;
+ phy_sgmii_slot3_1e = &phy_sgmii_slot3_1e;
+ phy_sgmii_slot3_1f = &phy_sgmii_slot3_1f;
+ phy_sgmii_slot5_1c = &phy_sgmii_slot5_1c;
+ phy_sgmii_slot5_1d = &phy_sgmii_slot5_1d;
+ phy_sgmii_slot5_1e = &phy_sgmii_slot5_1e;
+ phy_sgmii_slot5_1f = &phy_sgmii_slot5_1f;
+ phy_sgmii_slot6_1c = &phy_sgmii_slot6_1c;
+ phy_sgmii_slot6_1d = &phy_sgmii_slot6_1d;
+ phy_sgmii_slot6_1e = &phy_sgmii_slot6_1e;
+ phy_sgmii_slot6_1f = &phy_sgmii_slot6_1f;
+ hydra_rg = &hydra_rg;
+ hydra_sg_slot2 = &hydra_sg_slot2;
+ hydra_sg_slot3 = &hydra_sg_slot3;
+ hydra_sg_slot5 = &hydra_sg_slot5;
+ hydra_sg_slot6 = &hydra_sg_slot6;
+ hydra_xg_slot1 = &hydra_xg_slot1;
+ hydra_xg_slot2 = &hydra_xg_slot2;
+ };
+
+ memory {
+ device_type = "memory";
+ };
+
+ reserved-memory {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ bman_fbpr: bman-fbpr {
+ size = <0 0x1000000>;
+ alignment = <0 0x1000000>;
+ };
+ qman_fqd: qman-fqd {
+ size = <0 0x400000>;
+ alignment = <0 0x400000>;
+ };
+ qman_pfdr: qman-pfdr {
+ size = <0 0x2000000>;
+ alignment = <0 0x2000000>;
+ };
+ };
+
+ dcsr: dcsr@f00000000 {
+ ranges = <0x00000000 0xf 0x00000000 0x01008000>;
+ };
+
+ bportals: bman-portals@ff4000000 {
+ ranges = <0x0 0xf 0xf4000000 0x200000>;
+ };
+
+ qportals: qman-portals@ff4200000 {
+ ranges = <0x0 0xf 0xf4200000 0x200000>;
+ };
+
+ soc: soc@ffe000000 {
+ ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
+ reg = <0xf 0xfe000000 0 0x00001000>;
+ spi@110000 {
+ flash@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "spansion,s25sl12801", "jedec,spi-nor";
+ reg = <0>;
+ spi-max-frequency = <40000000>; /* input clock */
+ partition@u-boot {
+ label = "u-boot";
+ reg = <0x00000000 0x00100000>;
+ };
+ partition@kernel {
+ label = "kernel";
+ reg = <0x00100000 0x00500000>;
+ };
+ partition@dtb {
+ label = "dtb";
+ reg = <0x00600000 0x00100000>;
+ };
+ partition@fs {
+ label = "file system";
+ reg = <0x00700000 0x00900000>;
+ };
+ };
+ };
+
+ i2c@118100 {
+ eeprom@51 {
+ compatible = "atmel,24c256";
+ reg = <0x51>;
+ };
+ eeprom@52 {
+ compatible = "atmel,24c256";
+ reg = <0x52>;
+ };
+ };
+
+ i2c@119100 {
+ rtc@68 {
+ compatible = "dallas,ds3232";
+ reg = <0x68>;
+ interrupts = <0x1 0x1 0 0>;
+ };
+ ina220@40 {
+ compatible = "ti,ina220";
+ reg = <0x40>;
+ shunt-resistor = <1000>;
+ };
+ ina220@41 {
+ compatible = "ti,ina220";
+ reg = <0x41>;
+ shunt-resistor = <1000>;
+ };
+ ina220@44 {
+ compatible = "ti,ina220";
+ reg = <0x44>;
+ shunt-resistor = <1000>;
+ };
+ ina220@45 {
+ compatible = "ti,ina220";
+ reg = <0x45>;
+ shunt-resistor = <1000>;
+ };
+ adt7461@4c {
+ compatible = "adi,adt7461";
+ reg = <0x4c>;
+ };
+ };
+
+ fman@400000 {
+ ethernet@e0000 {
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e2000 {
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e4000 {
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e6000 {
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e8000 {
+ phy-handle = <&phy_rgmii_0>;
+ phy-connection-type = "rgmii";
+ };
+
+ ethernet@f0000 {
+ phy-handle = <&phy_xgmii_slot_2>;
+ phy-connection-type = "xgmii";
+ };
+ };
+
+ fman@500000 {
+ ethernet@e0000 {
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e2000 {
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e4000 {
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e6000 {
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e8000 {
+ phy-handle = <&phy_rgmii_1>;
+ phy-connection-type = "rgmii";
+ };
+
+ ethernet@f0000 {
+ phy-handle = <&phy_xgmii_slot_1>;
+ phy-connection-type = "xgmii";
+ };
+ };
+ };
+
+ lbc: localbus@ffe124000 {
+ reg = <0xf 0xfe124000 0 0x1000>;
+ ranges = <0 0 0xf 0xe8000000 0x08000000
+ 2 0 0xf 0xffa00000 0x00040000
+ 3 0 0xf 0xffdf0000 0x00008000>;
+
+ flash@0,0 {
+ compatible = "cfi-flash";
+ reg = <0 0 0x08000000>;
+ bank-width = <2>;
+ device-width = <2>;
+ };
+
+ nand@2,0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "fsl,elbc-fcm-nand";
+ reg = <0x2 0x0 0x40000>;
+
+ partition@0 {
+ label = "NAND U-Boot Image";
+ reg = <0x0 0x02000000>;
+ };
+
+ partition@2000000 {
+ label = "NAND Root File System";
+ reg = <0x02000000 0x10000000>;
+ };
+
+ partition@12000000 {
+ label = "NAND Compressed RFS Image";
+ reg = <0x12000000 0x08000000>;
+ };
+
+ partition@1a000000 {
+ label = "NAND Linux Kernel Image";
+ reg = <0x1a000000 0x04000000>;
+ };
+
+ partition@1e000000 {
+ label = "NAND DTB Image";
+ reg = <0x1e000000 0x01000000>;
+ };
+
+ partition@1f000000 {
+ label = "NAND Writable User area";
+ reg = <0x1f000000 0x01000000>;
+ };
+ };
+
+ board-control@3,0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "fsl,p5040ds-fpga", "fsl,fpga-ngpixis";
+ reg = <3 0 0x40>;
+ ranges = <0 3 0 0x40>;
+
+ mdio-mux-emi1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "mdio-mux-mmioreg", "mdio-mux";
+ mdio-parent-bus = <&mdio0>;
+ reg = <9 1>;
+ mux-mask = <0x78>;
+
+ hydra_rg:rgmii-mdio@8 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <8>;
+ status = "disabled";
+
+ phy_rgmii_0: ethernet-phy@0 {
+ reg = <0x0>;
+ };
+
+ phy_rgmii_1: ethernet-phy@1 {
+ reg = <0x1>;
+ };
+ };
+
+ hydra_sg_slot2: sgmii-mdio@28 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x28>;
+ status = "disabled";
+
+ phy_sgmii_slot2_1c: ethernet-phy@1c {
+ reg = <0x1c>;
+ };
+
+ phy_sgmii_slot2_1d: ethernet-phy@1d {
+ reg = <0x1d>;
+ };
+
+ phy_sgmii_slot2_1e: ethernet-phy@1e {
+ reg = <0x1e>;
+ };
+
+ phy_sgmii_slot2_1f: ethernet-phy@1f {
+ reg = <0x1f>;
+ };
+ };
+
+ hydra_sg_slot3: sgmii-mdio@68 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x68>;
+ status = "disabled";
+
+ phy_sgmii_slot3_1c: ethernet-phy@1c {
+ reg = <0x1c>;
+ };
+
+ phy_sgmii_slot3_1d: ethernet-phy@1d {
+ reg = <0x1d>;
+ };
+
+ phy_sgmii_slot3_1e: ethernet-phy@1e {
+ reg = <0x1e>;
+ };
+
+ phy_sgmii_slot3_1f: ethernet-phy@1f {
+ reg = <0x1f>;
+ };
+ };
+
+ hydra_sg_slot5: sgmii-mdio@38 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x38>;
+ status = "disabled";
+
+ phy_sgmii_slot5_1c: ethernet-phy@1c {
+ reg = <0x1c>;
+ };
+
+ phy_sgmii_slot5_1d: ethernet-phy@1d {
+ reg = <0x1d>;
+ };
+
+ phy_sgmii_slot5_1e: ethernet-phy@1e {
+ reg = <0x1e>;
+ };
+
+ phy_sgmii_slot5_1f: ethernet-phy@1f {
+ reg = <0x1f>;
+ };
+ };
+ hydra_sg_slot6: sgmii-mdio@48 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x48>;
+ status = "disabled";
+
+ phy_sgmii_slot6_1c: ethernet-phy@1c {
+ reg = <0x1c>;
+ };
+
+ phy_sgmii_slot6_1d: ethernet-phy@1d {
+ reg = <0x1d>;
+ };
+
+ phy_sgmii_slot6_1e: ethernet-phy@1e {
+ reg = <0x1e>;
+ };
+
+ phy_sgmii_slot6_1f: ethernet-phy@1f {
+ reg = <0x1f>;
+ };
+ };
+ };
+
+ mdio-mux-emi2 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "mdio-mux-mmioreg", "mdio-mux";
+ mdio-parent-bus = <&xmdio0>;
+ reg = <9 1>;
+ mux-mask = <0x06>;
+
+ hydra_xg_slot1: hydra-xg-slot1@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+ status = "disabled";
+
+ phy_xgmii_slot_1: ethernet-phy@0 {
+ compatible = "ethernet-phy-ieee802.3-c45";
+ reg = <4>;
+ };
+ };
+
+ hydra_xg_slot2: hydra-xg-slot2@2 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <2>;
+
+ phy_xgmii_slot_2: ethernet-phy@4 {
+ compatible = "ethernet-phy-ieee802.3-c45";
+ reg = <0>;
+ };
+ };
+ };
+ };
+ };
+
+ pci0: pcie@ffe200000 {
+ reg = <0xf 0xfe200000 0 0x1000>;
+ ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x20000000
+ 0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>;
+ pcie@0 {
+ ranges = <0x02000000 0 0xe0000000
+ 0x02000000 0 0xe0000000
+ 0 0x20000000
+
+ 0x01000000 0 0x00000000
+ 0x01000000 0 0x00000000
+ 0 0x00010000>;
+ };
+ };
+
+ pci1: pcie@ffe201000 {
+ reg = <0xf 0xfe201000 0 0x1000>;
+ ranges = <0x02000000 0x0 0xe0000000 0xc 0x20000000 0x0 0x20000000
+ 0x01000000 0x0 0x00000000 0xf 0xf8010000 0x0 0x00010000>;
+ pcie@0 {
+ ranges = <0x02000000 0 0xe0000000
+ 0x02000000 0 0xe0000000
+ 0 0x20000000
+
+ 0x01000000 0 0x00000000
+ 0x01000000 0 0x00000000
+ 0 0x00010000>;
+ };
+ };
+
+ pci2: pcie@ffe202000 {
+ reg = <0xf 0xfe202000 0 0x1000>;
+ ranges = <0x02000000 0 0xe0000000 0xc 0x40000000 0 0x20000000
+ 0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>;
+ pcie@0 {
+ ranges = <0x02000000 0 0xe0000000
+ 0x02000000 0 0xe0000000
+ 0 0x20000000
+
+ 0x01000000 0 0x00000000
+ 0x01000000 0 0x00000000
+ 0 0x00010000>;
+ };
+ };
+};
+
+/include/ "p5040si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi b/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi
index 67296fdd9698..16b454b504e2 100644
--- a/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p5040si-post.dtsi
@@ -1,7 +1,7 @@
/*
* P5040 Silicon/SoC Device Tree Source (post include)
*
- * Copyright 2012 Freescale Semiconductor Inc.
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -32,6 +32,21 @@
* software, even if advised of the possibility of such damage.
*/
+&bman_fbpr {
+ compatible = "fsl,bman-fbpr";
+ alloc-ranges = <0 0 0x10000 0>;
+};
+
+&qman_fqd {
+ compatible = "fsl,qman-fqd";
+ alloc-ranges = <0 0 0x10000 0>;
+};
+
+&qman_pfdr {
+ compatible = "fsl,qman-pfdr";
+ alloc-ranges = <0 0 0x10000 0>;
+};
+
&lbc {
compatible = "fsl,p5040-elbc", "fsl,elbc", "simple-bus";
interrupts = <25 2 0 0>;
@@ -195,6 +210,10 @@
};
};
+/include/ "qoriq-bman1-portals.dtsi"
+
+/include/ "qoriq-qman1-portals.dtsi"
+
&soc {
#address-cells = <1>;
#size-cells = <1>;
@@ -297,71 +316,9 @@
#sleep-cells = <2>;
};
- clockgen: global-utilities@e1000 {
+/include/ "qoriq-clockgen1.dtsi"
+ global-utilities@e1000 {
compatible = "fsl,p5040-clockgen", "fsl,qoriq-clockgen-1.0";
- ranges = <0x0 0xe1000 0x1000>;
- reg = <0xe1000 0x1000>;
- clock-frequency = <0>;
- #address-cells = <1>;
- #size-cells = <1>;
-
- sysclk: sysclk {
- #clock-cells = <0>;
- compatible = "fsl,qoriq-sysclk-1.0";
- clock-output-names = "sysclk";
- };
-
- pll0: pll0@800 {
- #clock-cells = <1>;
- reg = <0x800 0x4>;
- compatible = "fsl,qoriq-core-pll-1.0";
- clocks = <&sysclk>;
- clock-output-names = "pll0", "pll0-div2";
- };
-
- pll1: pll1@820 {
- #clock-cells = <1>;
- reg = <0x820 0x4>;
- compatible = "fsl,qoriq-core-pll-1.0";
- clocks = <&sysclk>;
- clock-output-names = "pll1", "pll1-div2";
- };
-
- mux0: mux0@0 {
- #clock-cells = <0>;
- reg = <0x0 0x4>;
- compatible = "fsl,qoriq-core-mux-1.0";
- clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>;
- clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2";
- clock-output-names = "cmux0";
- };
-
- mux1: mux1@20 {
- #clock-cells = <0>;
- reg = <0x20 0x4>;
- compatible = "fsl,qoriq-core-mux-1.0";
- clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>;
- clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2";
- clock-output-names = "cmux1";
- };
-
- mux2: mux2@40 {
- #clock-cells = <0>;
- reg = <0x40 0x4>;
- compatible = "fsl,qoriq-core-mux-1.0";
- clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>;
- clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2";
- clock-output-names = "cmux2";
- };
-
- mux3: mux3@60 {
- #clock-cells = <0>;
- reg = <0x60 0x4>;
- compatible = "fsl,qoriq-core-mux-1.0";
- clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>;
- clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2";
- clock-output-names = "cmux3";
- };
};
rcpm: global-utilities@e2000 {
@@ -399,6 +356,7 @@
/include/ "qoriq-esdhc-0.dtsi"
sdhc@114000 {
+ compatible = "fsl,p5040-esdhc", "fsl,esdhc";
fsl,iommu-parent = <&pamu2>;
fsl,liodn-reg = <&guts 0x530>; /* eSDHCLIODNR */
sdhci,auto-cmd12;
@@ -443,4 +401,62 @@
crypto@300000 {
fsl,iommu-parent = <&pamu4>;
};
+
+/include/ "qoriq-raid1.0-0.dtsi"
+/include/ "qoriq-qman1.dtsi"
+/include/ "qoriq-bman1.dtsi"
+
+/include/ "qoriq-fman-0.dtsi"
+/include/ "qoriq-fman-0-1g-0.dtsi"
+/include/ "qoriq-fman-0-1g-1.dtsi"
+/include/ "qoriq-fman-0-1g-2.dtsi"
+/include/ "qoriq-fman-0-1g-3.dtsi"
+/include/ "qoriq-fman-0-1g-4.dtsi"
+/include/ "qoriq-fman-0-10g-0.dtsi"
+ fman@400000 {
+ enet0: ethernet@e0000 {
+ };
+
+ enet1: ethernet@e2000 {
+ };
+
+ enet2: ethernet@e4000 {
+ };
+
+ enet3: ethernet@e6000 {
+ };
+
+ enet4: ethernet@e8000 {
+ };
+
+ enet5: ethernet@f0000 {
+ };
+ };
+
+/include/ "qoriq-fman-1.dtsi"
+/include/ "qoriq-fman-1-1g-0.dtsi"
+/include/ "qoriq-fman-1-1g-1.dtsi"
+/include/ "qoriq-fman-1-1g-2.dtsi"
+/include/ "qoriq-fman-1-1g-3.dtsi"
+/include/ "qoriq-fman-1-1g-4.dtsi"
+/include/ "qoriq-fman-1-10g-0.dtsi"
+ fman@500000 {
+ enet6: ethernet@e0000 {
+ };
+
+ enet7: ethernet@e2000 {
+ };
+
+ enet8: ethernet@e4000 {
+ };
+
+ enet9: ethernet@e6000 {
+ };
+
+ enet10: ethernet@e8000 {
+ };
+
+ enet11: ethernet@f0000 {
+ };
+ };
};
diff --git a/arch/powerpc/boot/dts/fsl/p5040si-pre.dtsi b/arch/powerpc/boot/dts/fsl/p5040si-pre.dtsi
index b048a2be05a8..ed89dbbdacf0 100644
--- a/arch/powerpc/boot/dts/fsl/p5040si-pre.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p5040si-pre.dtsi
@@ -1,7 +1,7 @@
/*
* P5040 Silicon/SoC Device Tree Source (pre include)
*
- * Copyright 2012 Freescale Semiconductor Inc.
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -72,6 +72,27 @@
rtic_c = &rtic_c;
rtic_d = &rtic_d;
sec_mon = &sec_mon;
+
+ raideng = &raideng;
+ raideng_jr0 = &raideng_jr0;
+ raideng_jr1 = &raideng_jr1;
+ raideng_jr2 = &raideng_jr2;
+ raideng_jr3 = &raideng_jr3;
+
+ fman0 = &fman0;
+ fman1 = &fman1;
+ ethernet0 = &enet0;
+ ethernet1 = &enet1;
+ ethernet2 = &enet2;
+ ethernet3 = &enet3;
+ ethernet4 = &enet4;
+ ethernet5 = &enet5;
+ ethernet6 = &enet6;
+ ethernet7 = &enet7;
+ ethernet8 = &enet8;
+ ethernet9 = &enet9;
+ ethernet10 = &enet10;
+ ethernet11 = &enet11;
};
cpus {
@@ -81,7 +102,7 @@
cpu0: PowerPC,e5500@0 {
device_type = "cpu";
reg = <0>;
- clocks = <&mux0>;
+ clocks = <&clockgen 1 0>;
next-level-cache = <&L2_0>;
fsl,portid-mapping = <0x80000000>;
L2_0: l2-cache {
@@ -91,7 +112,7 @@
cpu1: PowerPC,e5500@1 {
device_type = "cpu";
reg = <1>;
- clocks = <&mux1>;
+ clocks = <&clockgen 1 1>;
next-level-cache = <&L2_1>;
fsl,portid-mapping = <0x40000000>;
L2_1: l2-cache {
@@ -101,7 +122,7 @@
cpu2: PowerPC,e5500@2 {
device_type = "cpu";
reg = <2>;
- clocks = <&mux2>;
+ clocks = <&clockgen 1 2>;
next-level-cache = <&L2_2>;
fsl,portid-mapping = <0x20000000>;
L2_2: l2-cache {
@@ -111,7 +132,7 @@
cpu3: PowerPC,e5500@3 {
device_type = "cpu";
reg = <3>;
- clocks = <&mux3>;
+ clocks = <&clockgen 1 3>;
next-level-cache = <&L2_3>;
fsl,portid-mapping = <0x10000000>;
L2_3: l2-cache {
diff --git a/arch/powerpc/boot/dts/ppa8548.dts b/arch/powerpc/boot/dts/fsl/ppa8548.dts
index 27b0699ee923..f39838d93994 100644
--- a/arch/powerpc/boot/dts/ppa8548.dts
+++ b/arch/powerpc/boot/dts/fsl/ppa8548.dts
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* PPA8548 Device Tree Source (36-bit address map)
* Copyright 2013 Prodrive B.V.
@@ -5,14 +6,9 @@
* Based on:
* MPC8548 CDS Device Tree Source (36-bit address map)
* Copyright 2012 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
-/include/ "fsl/mpc8548si-pre.dtsi"
+/include/ "mpc8548si-pre.dtsi"
/ {
model = "ppa8548";
@@ -161,4 +157,4 @@
};
};
-/include/ "fsl/mpc8548si-post.dtsi"
+/include/ "mpc8548si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/pq3-etsec2-0.dtsi b/arch/powerpc/boot/dts/fsl/pq3-etsec2-0.dtsi
index 1382fec9e8c5..7fcb1ac0f232 100644
--- a/arch/powerpc/boot/dts/fsl/pq3-etsec2-0.dtsi
+++ b/arch/powerpc/boot/dts/fsl/pq3-etsec2-0.dtsi
@@ -50,6 +50,7 @@ ethernet@b0000 {
fsl,num_tx_queues = <0x8>;
fsl,magic-packet;
local-mac-address = [ 00 00 00 00 00 00 ];
+ ranges;
queue-group@b0000 {
#address-cells = <1>;
diff --git a/arch/powerpc/boot/dts/fsl/pq3-etsec2-1.dtsi b/arch/powerpc/boot/dts/fsl/pq3-etsec2-1.dtsi
index 221cd2ea5b31..9f25427c1527 100644
--- a/arch/powerpc/boot/dts/fsl/pq3-etsec2-1.dtsi
+++ b/arch/powerpc/boot/dts/fsl/pq3-etsec2-1.dtsi
@@ -50,6 +50,7 @@ ethernet@b1000 {
fsl,num_tx_queues = <0x8>;
fsl,magic-packet;
local-mac-address = [ 00 00 00 00 00 00 ];
+ ranges;
queue-group@b1000 {
#address-cells = <1>;
diff --git a/arch/powerpc/boot/dts/fsl/pq3-etsec2-2.dtsi b/arch/powerpc/boot/dts/fsl/pq3-etsec2-2.dtsi
index 61456c317609..cd7c318ab131 100644
--- a/arch/powerpc/boot/dts/fsl/pq3-etsec2-2.dtsi
+++ b/arch/powerpc/boot/dts/fsl/pq3-etsec2-2.dtsi
@@ -49,6 +49,7 @@ ethernet@b2000 {
fsl,num_tx_queues = <0x8>;
fsl,magic-packet;
local-mac-address = [ 00 00 00 00 00 00 ];
+ ranges;
queue-group@b2000 {
#address-cells = <1>;
diff --git a/arch/powerpc/boot/dts/fsl/pq3-gpio-0.dtsi b/arch/powerpc/boot/dts/fsl/pq3-gpio-0.dtsi
index 72a3ef5945c1..a1b48546b02d 100644
--- a/arch/powerpc/boot/dts/fsl/pq3-gpio-0.dtsi
+++ b/arch/powerpc/boot/dts/fsl/pq3-gpio-0.dtsi
@@ -1,5 +1,5 @@
/*
- * PQ3 GPIO device tree stub [ controller @ offset 0xf000 ]
+ * PQ3 GPIO device tree stub [ controller @ offset 0xfc00 ]
*
* Copyright 2011 Freescale Semiconductor Inc.
*
@@ -32,10 +32,10 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-gpio-controller@f000 {
+gpio-controller@fc00 {
#gpio-cells = <2>;
compatible = "fsl,pq3-gpio";
- reg = <0xf000 0x100>;
+ reg = <0xfc00 0x100>;
interrupts = <47 0x2 0 0>;
gpio-controller;
};
diff --git a/arch/powerpc/boot/dts/fsl/pq3-power.dtsi b/arch/powerpc/boot/dts/fsl/pq3-power.dtsi
new file mode 100644
index 000000000000..6af12401004d
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/pq3-power.dtsi
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: (GPL-2.0+)
+/*
+ * Copyright 2024 NXP
+ */
+
+power@e0070 {
+ compatible = "fsl,mpc8548-pmc";
+ reg = <0xe0070 0x20>;
+
+ etsec1_clk: soc-clk@24 {
+ fsl,pmcdr-mask = <0x00000080>;
+ };
+ etsec2_clk: soc-clk@25 {
+ fsl,pmcdr-mask = <0x00000040>;
+ };
+ etsec3_clk: soc-clk@26 {
+ fsl,pmcdr-mask = <0x00000020>;
+ };
+};
diff --git a/arch/powerpc/boot/dts/fsl/qonverge-usb2-dr-0.dtsi b/arch/powerpc/boot/dts/fsl/qonverge-usb2-dr-0.dtsi
index 29dad723091e..fcc7e5b7fd47 100644
--- a/arch/powerpc/boot/dts/fsl/qonverge-usb2-dr-0.dtsi
+++ b/arch/powerpc/boot/dts/fsl/qonverge-usb2-dr-0.dtsi
@@ -32,7 +32,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-usb@210000 {
+usb0: usb@210000 {
compatible = "fsl-usb2-dr";
reg = <0x210000 0x1000>;
#address-cells = <1>;
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-bman1-portals.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-bman1-portals.dtsi
new file mode 100644
index 000000000000..5022432ebaa9
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-bman1-portals.dtsi
@@ -0,0 +1,90 @@
+/*
+ * QorIQ BMan Portal device tree stub for 10 portals
+ *
+ * Copyright 2011 - 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&bportals {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "simple-bus";
+
+ bman-portal@0 {
+ compatible = "fsl,bman-portal";
+ reg = <0x0 0x4000>, <0x100000 0x1000>;
+ interrupts = <105 2 0 0>;
+ };
+ bman-portal@4000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x4000 0x4000>, <0x101000 0x1000>;
+ interrupts = <107 2 0 0>;
+ };
+ bman-portal@8000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x8000 0x4000>, <0x102000 0x1000>;
+ interrupts = <109 2 0 0>;
+ };
+ bman-portal@c000 {
+ compatible = "fsl,bman-portal";
+ reg = <0xc000 0x4000>, <0x103000 0x1000>;
+ interrupts = <111 2 0 0>;
+ };
+ bman-portal@10000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x10000 0x4000>, <0x104000 0x1000>;
+ interrupts = <113 2 0 0>;
+ };
+ bman-portal@14000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x14000 0x4000>, <0x105000 0x1000>;
+ interrupts = <115 2 0 0>;
+ };
+ bman-portal@18000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x18000 0x4000>, <0x106000 0x1000>;
+ interrupts = <117 2 0 0>;
+ };
+ bman-portal@1c000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x1c000 0x4000>, <0x107000 0x1000>;
+ interrupts = <119 2 0 0>;
+ };
+ bman-portal@20000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x20000 0x4000>, <0x108000 0x1000>;
+ interrupts = <121 2 0 0>;
+ };
+ bman-portal@24000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x24000 0x4000>, <0x109000 0x1000>;
+ interrupts = <123 2 0 0>;
+ };
+};
diff --git a/arch/powerpc/boot/dts/t2080qds.dts b/arch/powerpc/boot/dts/fsl/qoriq-bman1.dtsi
index aa1d6d8c169b..3b5e3504acb7 100644
--- a/arch/powerpc/boot/dts/t2080qds.dts
+++ b/arch/powerpc/boot/dts/fsl/qoriq-bman1.dtsi
@@ -1,7 +1,7 @@
/*
- * T2080QDS Device Tree Source
+ * QorIQ BMan device tree stub [ controller @ offset 0x31a000 ]
*
- * Copyright 2013 Freescale Semiconductor Inc.
+ * Copyright 2011 - 2014 Freescale Semiconductor Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -20,7 +20,7 @@
* Foundation, either version 2 of that License or (at your option) any
* later version.
*
- * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
@@ -32,26 +32,10 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/include/ "fsl/t208xsi-pre.dtsi"
-/include/ "t208xqds.dtsi"
-
-/ {
- model = "fsl,T2080QDS";
- compatible = "fsl,T2080QDS";
- #address-cells = <2>;
- #size-cells = <2>;
- interrupt-parent = <&mpic>;
-
- rio: rapidio@ffe0c0000 {
- reg = <0xf 0xfe0c0000 0 0x11000>;
-
- port1 {
- ranges = <0 0 0xc 0x20000000 0 0x10000000>;
- };
- port2 {
- ranges = <0 0 0xc 0x30000000 0 0x10000000>;
- };
- };
+bman: bman@31a000 {
+ compatible = "fsl,bman";
+ reg = <0x31a000 0x1000>;
+ interrupts = <16 2 1 2>;
+ fsl,bman-portals = <&bportals>;
+ memory-region = <&bman_fbpr>;
};
-
-/include/ "fsl/t2080si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/t2080rdb.dts b/arch/powerpc/boot/dts/fsl/qoriq-clockgen1.dtsi
index e8891047600c..463c1ed9ffdd 100644
--- a/arch/powerpc/boot/dts/t2080rdb.dts
+++ b/arch/powerpc/boot/dts/fsl/qoriq-clockgen1.dtsi
@@ -1,5 +1,5 @@
/*
- * T2080PCIe-RDB Board Device Tree Source
+ * QorIQ clock control device tree stub [ controller @ offset 0xe1000 ]
*
* Copyright 2014 Freescale Semiconductor Inc.
*
@@ -20,7 +20,7 @@
* Foundation, either version 2 of that License or (at your option) any
* later version.
*
- * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
@@ -32,26 +32,8 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/include/ "fsl/t208xsi-pre.dtsi"
-/include/ "t208xrdb.dtsi"
-
-/ {
- model = "fsl,T2080RDB";
- compatible = "fsl,T2080RDB";
- #address-cells = <2>;
- #size-cells = <2>;
- interrupt-parent = <&mpic>;
-
- rio: rapidio@ffe0c0000 {
- reg = <0xf 0xfe0c0000 0 0x11000>;
-
- port1 {
- ranges = <0 0 0xc 0x20000000 0 0x10000000>;
- };
- port2 {
- ranges = <0 0 0xc 0x30000000 0 0x10000000>;
- };
- };
+clockgen: global-utilities@e1000 {
+ compatible = "fsl,qoriq-clockgen-1.0";
+ reg = <0xe1000 0x1000>;
+ #clock-cells = <2>;
};
-
-/include/ "fsl/t2080si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-clockgen2.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-clockgen2.dtsi
new file mode 100644
index 000000000000..0361050bb56a
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-clockgen2.dtsi
@@ -0,0 +1,39 @@
+/*
+ * QorIQ clock control device tree stub [ controller @ offset 0xe1000 ]
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+clockgen: global-utilities@e1000 {
+ compatible = "fsl,qoriq-clockgen-2.0";
+ reg = <0xe1000 0x1000>;
+ #clock-cells = <2>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman-0-10g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman-0-10g-0.dtsi
new file mode 100644
index 000000000000..eb77675c255a
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman-0-10g-0.dtsi
@@ -0,0 +1,62 @@
+/*
+ * QorIQ FMan 10g port #0 device tree stub [ controller @ offset 0x400000 ]
+ *
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@400000 {
+ fman0_rx_0x10: port@90000 {
+ cell-index = <0x10>;
+ compatible = "fsl,fman-v2-port-rx";
+ reg = <0x90000 0x1000>;
+ };
+
+ fman0_tx_0x30: port@b0000 {
+ cell-index = <0x30>;
+ compatible = "fsl,fman-v2-port-tx";
+ reg = <0xb0000 0x1000>;
+ };
+
+ ethernet@f0000 {
+ cell-index = <0x8>;
+ compatible = "fsl,fman-xgec";
+ reg = <0xf0000 0x1000>;
+ fsl,fman-ports = <&fman0_rx_0x10 &fman0_tx_0x30>;
+ };
+
+ xmdio0: mdio@f1000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,fman-xmdio";
+ reg = <0xf1000 0x1000>;
+ interrupts = <101 2 0 0>;
+ };
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman-0-1g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman-0-1g-0.dtsi
new file mode 100644
index 000000000000..b965bc219bae
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman-0-1g-0.dtsi
@@ -0,0 +1,69 @@
+/*
+ * QorIQ FMan 1g port #0 device tree stub [ controller @ offset 0x400000 ]
+ *
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@400000 {
+ fman0_rx_0x08: port@88000 {
+ cell-index = <0x8>;
+ compatible = "fsl,fman-v2-port-rx";
+ reg = <0x88000 0x1000>;
+ };
+
+ fman0_tx_0x28: port@a8000 {
+ cell-index = <0x28>;
+ compatible = "fsl,fman-v2-port-tx";
+ reg = <0xa8000 0x1000>;
+ };
+
+ ethernet@e0000 {
+ cell-index = <0>;
+ compatible = "fsl,fman-dtsec";
+ reg = <0xe0000 0x1000>;
+ fsl,fman-ports = <&fman0_rx_0x08 &fman0_tx_0x28>;
+ tbi-handle = <&tbi0>;
+ ptp-timer = <&ptp_timer0>;
+ };
+
+ mdio0: mdio@e1120 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,fman-mdio";
+ reg = <0xe1120 0xee0>;
+ interrupts = <100 2 0 0>;
+
+ tbi0: tbi-phy@8 {
+ reg = <0x8>;
+ device_type = "tbi-phy";
+ };
+ };
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman-0-1g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman-0-1g-1.dtsi
new file mode 100644
index 000000000000..9eb6e6dd7cf9
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman-0-1g-1.dtsi
@@ -0,0 +1,68 @@
+/*
+ * QorIQ FMan 1g port #1 device tree stub [ controller @ offset 0x400000 ]
+ *
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@400000 {
+ fman0_rx_0x09: port@89000 {
+ cell-index = <0x9>;
+ compatible = "fsl,fman-v2-port-rx";
+ reg = <0x89000 0x1000>;
+ };
+
+ fman0_tx_0x29: port@a9000 {
+ cell-index = <0x29>;
+ compatible = "fsl,fman-v2-port-tx";
+ reg = <0xa9000 0x1000>;
+ };
+
+ ethernet@e2000 {
+ cell-index = <1>;
+ compatible = "fsl,fman-dtsec";
+ reg = <0xe2000 0x1000>;
+ fsl,fman-ports = <&fman0_rx_0x09 &fman0_tx_0x29>;
+ tbi-handle = <&tbi1>;
+ ptp-timer = <&ptp_timer0>;
+ };
+
+ mdio@e3120 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,fman-mdio";
+ reg = <0xe3120 0xee0>;
+
+ tbi1: tbi-phy@8 {
+ reg = <0x8>;
+ device_type = "tbi-phy";
+ };
+ };
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman-0-1g-2.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman-0-1g-2.dtsi
new file mode 100644
index 000000000000..092b89936743
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman-0-1g-2.dtsi
@@ -0,0 +1,68 @@
+/*
+ * QorIQ FMan 1g port #2 device tree stub [ controller @ offset 0x400000 ]
+ *
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@400000 {
+ fman0_rx_0x0a: port@8a000 {
+ cell-index = <0xa>;
+ compatible = "fsl,fman-v2-port-rx";
+ reg = <0x8a000 0x1000>;
+ };
+
+ fman0_tx_0x2a: port@aa000 {
+ cell-index = <0x2a>;
+ compatible = "fsl,fman-v2-port-tx";
+ reg = <0xaa000 0x1000>;
+ };
+
+ ethernet@e4000 {
+ cell-index = <2>;
+ compatible = "fsl,fman-dtsec";
+ reg = <0xe4000 0x1000>;
+ fsl,fman-ports = <&fman0_rx_0x0a &fman0_tx_0x2a>;
+ tbi-handle = <&tbi2>;
+ ptp-timer = <&ptp_timer0>;
+ };
+
+ mdio@e5120 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,fman-mdio";
+ reg = <0xe5120 0xee0>;
+
+ tbi2: tbi-phy@8 {
+ reg = <0x8>;
+ device_type = "tbi-phy";
+ };
+ };
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman-0-1g-3.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman-0-1g-3.dtsi
new file mode 100644
index 000000000000..2df0dc876045
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman-0-1g-3.dtsi
@@ -0,0 +1,68 @@
+/*
+ * QorIQ FMan 1g port #3 device tree stub [ controller @ offset 0x400000 ]
+ *
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@400000 {
+ fman0_rx_0x0b: port@8b000 {
+ cell-index = <0xb>;
+ compatible = "fsl,fman-v2-port-rx";
+ reg = <0x8b000 0x1000>;
+ };
+
+ fman0_tx_0x2b: port@ab000 {
+ cell-index = <0x2b>;
+ compatible = "fsl,fman-v2-port-tx";
+ reg = <0xab000 0x1000>;
+ };
+
+ ethernet@e6000 {
+ cell-index = <3>;
+ compatible = "fsl,fman-dtsec";
+ reg = <0xe6000 0x1000>;
+ fsl,fman-ports = <&fman0_rx_0x0b &fman0_tx_0x2b>;
+ tbi-handle = <&tbi3>;
+ ptp-timer = <&ptp_timer0>;
+ };
+
+ mdio@e7120 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,fman-mdio";
+ reg = <0xe7120 0xee0>;
+
+ tbi3: tbi-phy@8 {
+ reg = <0x8>;
+ device_type = "tbi-phy";
+ };
+ };
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman-0-1g-4.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman-0-1g-4.dtsi
new file mode 100644
index 000000000000..5fceb2438fdc
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman-0-1g-4.dtsi
@@ -0,0 +1,68 @@
+/*
+ * QorIQ FMan 1g port #4 device tree stub [ controller @ offset 0x400000 ]
+ *
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@400000 {
+ fman0_rx_0x0c: port@8c000 {
+ cell-index = <0xc>;
+ compatible = "fsl,fman-v2-port-rx";
+ reg = <0x8c000 0x1000>;
+ };
+
+ fman0_tx_0x2c: port@ac000 {
+ cell-index = <0x2c>;
+ compatible = "fsl,fman-v2-port-tx";
+ reg = <0xac000 0x1000>;
+ };
+
+ ethernet@e8000 {
+ cell-index = <4>;
+ compatible = "fsl,fman-dtsec";
+ reg = <0xe8000 0x1000>;
+ fsl,fman-ports = <&fman0_rx_0x0c &fman0_tx_0x2c>;
+ tbi-handle = <&tbi4>;
+ ptp-timer = <&ptp_timer0>;
+ };
+
+ mdio@e9120 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,fman-mdio";
+ reg = <0xe9120 0xee0>;
+
+ tbi4: tbi-phy@8 {
+ reg = <0x8>;
+ device_type = "tbi-phy";
+ };
+ };
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman-0.dtsi
new file mode 100644
index 000000000000..9b6cf9149937
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman-0.dtsi
@@ -0,0 +1,104 @@
+/*
+ * QorIQ FMan device tree stub [ controller @ offset 0x400000 ]
+ *
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman0: fman@400000 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ cell-index = <0>;
+ compatible = "fsl,fman";
+ ranges = <0 0x400000 0xfe000>;
+ reg = <0x400000 0xfe000>;
+ interrupts = <96 2 0 0>, <16 2 1 1>;
+ clocks = <&clockgen 3 0>;
+ clock-names = "fmanclk";
+ fsl,qman-channel-range = <0x40 0xc>;
+ ptimer-handle = <&ptp_timer0>;
+
+ muram@0 {
+ compatible = "fsl,fman-muram";
+ reg = <0x0 0x28000>;
+ };
+
+ fman0_oh_0x1: port@81000 {
+ cell-index = <0x1>;
+ compatible = "fsl,fman-v2-port-oh";
+ reg = <0x81000 0x1000>;
+ };
+
+ fman0_oh_0x2: port@82000 {
+ cell-index = <0x2>;
+ compatible = "fsl,fman-v2-port-oh";
+ reg = <0x82000 0x1000>;
+ };
+
+ fman0_oh_0x3: port@83000 {
+ cell-index = <0x3>;
+ compatible = "fsl,fman-v2-port-oh";
+ reg = <0x83000 0x1000>;
+ };
+
+ fman0_oh_0x4: port@84000 {
+ cell-index = <0x4>;
+ compatible = "fsl,fman-v2-port-oh";
+ reg = <0x84000 0x1000>;
+ };
+
+ fman0_oh_0x5: port@85000 {
+ cell-index = <0x5>;
+ compatible = "fsl,fman-v2-port-oh";
+ reg = <0x85000 0x1000>;
+ status = "disabled";
+ };
+
+ fman0_oh_0x6: port@86000 {
+ cell-index = <0x6>;
+ compatible = "fsl,fman-v2-port-oh";
+ reg = <0x86000 0x1000>;
+ status = "disabled";
+ };
+
+ fman0_oh_0x7: port@87000 {
+ cell-index = <0x7>;
+ compatible = "fsl,fman-v2-port-oh";
+ reg = <0x87000 0x1000>;
+ status = "disabled";
+ };
+};
+
+ptp_timer0: ptp-timer@4fe000 {
+ compatible = "fsl,fman-ptp-timer";
+ reg = <0x4fe000 0x1000>;
+ interrupts = <96 2 0 0>;
+ clocks = <&clockgen 3 0>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman-1-10g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman-1-10g-0.dtsi
new file mode 100644
index 000000000000..83ae87b69d92
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman-1-10g-0.dtsi
@@ -0,0 +1,61 @@
+/*
+ * QorIQ FMan 10g port #0 device tree stub [ controller @ offset 0x500000 ]
+ *
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@500000 {
+ fman1_rx_0x10: port@90000 {
+ cell-index = <0x10>;
+ compatible = "fsl,fman-v2-port-rx";
+ reg = <0x90000 0x1000>;
+ };
+
+ fman1_tx_0x30: port@b0000 {
+ cell-index = <0x30>;
+ compatible = "fsl,fman-v2-port-tx";
+ reg = <0xb0000 0x1000>;
+ };
+
+ ethernet@f0000 {
+ cell-index = <0x8>;
+ compatible = "fsl,fman-xgec";
+ reg = <0xf0000 0x1000>;
+ fsl,fman-ports = <&fman1_rx_0x10 &fman1_tx_0x30>;
+ };
+
+ mdio@f1000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,fman-xmdio";
+ reg = <0xf1000 0x1000>;
+ };
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman-1-1g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman-1-1g-0.dtsi
new file mode 100644
index 000000000000..b0f0e36a4eac
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman-1-1g-0.dtsi
@@ -0,0 +1,68 @@
+/*
+ * QorIQ FMan 1g port #0 device tree stub [ controller @ offset 0x500000 ]
+ *
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@500000 {
+ fman1_rx_0x08: port@88000 {
+ cell-index = <0x8>;
+ compatible = "fsl,fman-v2-port-rx";
+ reg = <0x88000 0x1000>;
+ };
+
+ fman1_tx_0x28: port@a8000 {
+ cell-index = <0x28>;
+ compatible = "fsl,fman-v2-port-tx";
+ reg = <0xa8000 0x1000>;
+ };
+
+ ethernet@e0000 {
+ cell-index = <0>;
+ compatible = "fsl,fman-dtsec";
+ reg = <0xe0000 0x1000>;
+ fsl,fman-ports = <&fman1_rx_0x08 &fman1_tx_0x28>;
+ tbi-handle = <&tbi5>;
+ ptp-timer = <&ptp_timer1>;
+ };
+
+ mdio@e1120 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,fman-mdio";
+ reg = <0xe1120 0xee0>;
+
+ tbi5: tbi-phy@8 {
+ reg = <0x8>;
+ device_type = "tbi-phy";
+ };
+ };
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman-1-1g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman-1-1g-1.dtsi
new file mode 100644
index 000000000000..a3a79f8552a3
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman-1-1g-1.dtsi
@@ -0,0 +1,68 @@
+/*
+ * QorIQ FMan 1g port #1 device tree stub [ controller @ offset 0x500000 ]
+ *
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@500000 {
+ fman1_rx_0x09: port@89000 {
+ cell-index = <0x9>;
+ compatible = "fsl,fman-v2-port-rx";
+ reg = <0x89000 0x1000>;
+ };
+
+ fman1_tx_0x29: port@a9000 {
+ cell-index = <0x29>;
+ compatible = "fsl,fman-v2-port-tx";
+ reg = <0xa9000 0x1000>;
+ };
+
+ ethernet@e2000 {
+ cell-index = <1>;
+ compatible = "fsl,fman-dtsec";
+ reg = <0xe2000 0x1000>;
+ fsl,fman-ports = <&fman1_rx_0x09 &fman1_tx_0x29>;
+ tbi-handle = <&tbi6>;
+ ptp-timer = <&ptp_timer1>;
+ };
+
+ mdio@e3120 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,fman-mdio";
+ reg = <0xe3120 0xee0>;
+
+ tbi6: tbi-phy@8 {
+ reg = <0x8>;
+ device_type = "tbi-phy";
+ };
+ };
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman-1-1g-2.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman-1-1g-2.dtsi
new file mode 100644
index 000000000000..96a69a84b8a8
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman-1-1g-2.dtsi
@@ -0,0 +1,68 @@
+/*
+ * QorIQ FMan 1g port #2 device tree stub [ controller @ offset 0x500000 ]
+ *
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@500000 {
+ fman1_rx_0x0a: port@8a000 {
+ cell-index = <0xa>;
+ compatible = "fsl,fman-v2-port-rx";
+ reg = <0x8a000 0x1000>;
+ };
+
+ fman1_tx_0x2a: port@aa000 {
+ cell-index = <0x2a>;
+ compatible = "fsl,fman-v2-port-tx";
+ reg = <0xaa000 0x1000>;
+ };
+
+ ethernet@e4000 {
+ cell-index = <2>;
+ compatible = "fsl,fman-dtsec";
+ reg = <0xe4000 0x1000>;
+ fsl,fman-ports = <&fman1_rx_0x0a &fman1_tx_0x2a>;
+ tbi-handle = <&tbi7>;
+ ptp-timer = <&ptp_timer1>;
+ };
+
+ mdio@e5120 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,fman-mdio";
+ reg = <0xe5120 0xee0>;
+
+ tbi7: tbi-phy@8 {
+ reg = <0x8>;
+ device_type = "tbi-phy";
+ };
+ };
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman-1-1g-3.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman-1-1g-3.dtsi
new file mode 100644
index 000000000000..7405d1940133
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman-1-1g-3.dtsi
@@ -0,0 +1,68 @@
+/*
+ * QorIQ FMan 1g port #3 device tree stub [ controller @ offset 0x500000 ]
+ *
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@500000 {
+ fman1_rx_0x0b: port@8b000 {
+ cell-index = <0xb>;
+ compatible = "fsl,fman-v2-port-rx";
+ reg = <0x8b000 0x1000>;
+ };
+
+ fman1_tx_0x2b: port@ab000 {
+ cell-index = <0x2b>;
+ compatible = "fsl,fman-v2-port-tx";
+ reg = <0xab000 0x1000>;
+ };
+
+ ethernet@e6000 {
+ cell-index = <3>;
+ compatible = "fsl,fman-dtsec";
+ reg = <0xe6000 0x1000>;
+ fsl,fman-ports = <&fman1_rx_0x0b &fman1_tx_0x2b>;
+ tbi-handle = <&tbi8>;
+ ptp-timer = <&ptp_timer1>;
+ };
+
+ mdio@e7120 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,fman-mdio";
+ reg = <0xe7120 0xee0>;
+
+ tbi8: tbi-phy@8 {
+ reg = <0x8>;
+ device_type = "tbi-phy";
+ };
+ };
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman-1-1g-4.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman-1-1g-4.dtsi
new file mode 100644
index 000000000000..f49ad69e5212
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman-1-1g-4.dtsi
@@ -0,0 +1,68 @@
+/*
+ * QorIQ FMan 1g port #4 device tree stub [ controller @ offset 0x500000 ]
+ *
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@500000 {
+ fman1_rx_0x0c: port@8c000 {
+ cell-index = <0xc>;
+ compatible = "fsl,fman-v2-port-rx";
+ reg = <0x8c000 0x1000>;
+ };
+
+ fman1_tx_0x2c: port@ac000 {
+ cell-index = <0x2c>;
+ compatible = "fsl,fman-v2-port-tx";
+ reg = <0xac000 0x1000>;
+ };
+
+ ethernet@e8000 {
+ cell-index = <4>;
+ compatible = "fsl,fman-dtsec";
+ reg = <0xe8000 0x1000>;
+ fsl,fman-ports = <&fman1_rx_0x0c &fman1_tx_0x2c>;
+ tbi-handle = <&tbi9>;
+ ptp-timer = <&ptp_timer1>;
+ };
+
+ mdio@e9120 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,fman-mdio";
+ reg = <0xe9120 0xee0>;
+
+ tbi9: tbi-phy@8 {
+ reg = <0x8>;
+ device_type = "tbi-phy";
+ };
+ };
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman-1.dtsi
new file mode 100644
index 000000000000..e95c11ff0417
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman-1.dtsi
@@ -0,0 +1,104 @@
+/*
+ * QorIQ FMan device tree stub [ controller @ offset 0x500000 ]
+ *
+ * Copyright 2011 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman1: fman@500000 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ cell-index = <1>;
+ compatible = "fsl,fman";
+ ranges = <0 0x500000 0xfe000>;
+ reg = <0x500000 0xfe000>;
+ interrupts = <97 2 0 0>, <16 2 1 0>;
+ clocks = <&clockgen 3 1>;
+ clock-names = "fmanclk";
+ fsl,qman-channel-range = <0x60 0xc>;
+ ptimer-handle = <&ptp_timer1>;
+
+ muram@0 {
+ compatible = "fsl,fman-muram";
+ reg = <0x0 0x28000>;
+ };
+
+ fman1_oh_0x1: port@81000 {
+ cell-index = <0x1>;
+ compatible = "fsl,fman-v2-port-oh";
+ reg = <0x81000 0x1000>;
+ };
+
+ fman1_oh_0x2: port@82000 {
+ cell-index = <0x2>;
+ compatible = "fsl,fman-v2-port-oh";
+ reg = <0x82000 0x1000>;
+ };
+
+ fman1_oh_0x3: port@83000 {
+ cell-index = <0x3>;
+ compatible = "fsl,fman-v2-port-oh";
+ reg = <0x83000 0x1000>;
+ };
+
+ fman1_oh_0x4: port@84000 {
+ cell-index = <0x4>;
+ compatible = "fsl,fman-v2-port-oh";
+ reg = <0x84000 0x1000>;
+ };
+
+ fman1_oh_0x5: port@85000 {
+ cell-index = <0x5>;
+ compatible = "fsl,fman-v2-port-oh";
+ reg = <0x85000 0x1000>;
+ status = "disabled";
+ };
+
+ fman1_oh_0x6: port@86000 {
+ cell-index = <0x6>;
+ compatible = "fsl,fman-v2-port-oh";
+ reg = <0x86000 0x1000>;
+ status = "disabled";
+ };
+
+ fman1_oh_0x7: port@87000 {
+ cell-index = <0x7>;
+ compatible = "fsl,fman-v2-port-oh";
+ reg = <0x87000 0x1000>;
+ status = "disabled";
+ };
+};
+
+ptp_timer1: ptp-timer@5fe000 {
+ compatible = "fsl,fman-ptp-timer";
+ reg = <0x5fe000 0x1000>;
+ interrupts = <97 2 0 0>;
+ clocks = <&clockgen 3 1>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi
new file mode 100644
index 000000000000..7e70977f282a
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi
@@ -0,0 +1,73 @@
+/*
+ * QorIQ FMan v3 1g port #0 device tree stub [ controller @ offset 0x400000 ]
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@400000 {
+ fman0_rx_0x08: port@88000 {
+ cell-index = <0x8>;
+ compatible = "fsl,fman-v3-port-rx";
+ reg = <0x88000 0x1000>;
+ fsl,fman-10g-port;
+ fsl,fman-best-effort-port;
+ };
+
+ fman0_tx_0x28: port@a8000 {
+ cell-index = <0x28>;
+ compatible = "fsl,fman-v3-port-tx";
+ reg = <0xa8000 0x1000>;
+ fsl,fman-10g-port;
+ fsl,fman-best-effort-port;
+ };
+
+ ethernet@e0000 {
+ cell-index = <0>;
+ compatible = "fsl,fman-memac";
+ reg = <0xe0000 0x1000>;
+ fsl,fman-ports = <&fman0_rx_0x08 &fman0_tx_0x28>;
+ ptp-timer = <&ptp_timer0>;
+ pcsphy-handle = <&pcsphy0>, <&pcsphy0>;
+ pcs-handle-names = "sgmii", "qsgmii";
+ };
+
+ mdio@e1000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+ reg = <0xe1000 0x1000>;
+ fsl,erratum-a011043; /* must ignore read errors */
+
+ pcsphy0: ethernet-phy@0 {
+ reg = <0x0>;
+ };
+ };
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi
new file mode 100644
index 000000000000..5f89f7c1761f
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi
@@ -0,0 +1,77 @@
+/*
+ * QorIQ FMan v3 10g port #0 device tree stub [ controller @ offset 0x400000 ]
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@400000 {
+ fman0_rx_0x10: port@90000 {
+ cell-index = <0x10>;
+ compatible = "fsl,fman-v3-port-rx";
+ reg = <0x90000 0x1000>;
+ fsl,fman-10g-port;
+ };
+
+ fman0_tx_0x30: port@b0000 {
+ cell-index = <0x30>;
+ compatible = "fsl,fman-v3-port-tx";
+ reg = <0xb0000 0x1000>;
+ fsl,fman-10g-port;
+ };
+
+ ethernet@f0000 {
+ cell-index = <0x8>;
+ compatible = "fsl,fman-memac";
+ reg = <0xf0000 0x1000>;
+ fsl,fman-ports = <&fman0_rx_0x10 &fman0_tx_0x30>;
+ pcsphy-handle = <&pcsphy6>, <&qsgmiib_pcs2>, <&pcsphy6>;
+ pcs-handle-names = "sgmii", "qsgmii", "xfi";
+ };
+
+ mdio@e9000 {
+ qsgmiib_pcs2: ethernet-pcs@2 {
+ compatible = "fsl,lynx-pcs";
+ reg = <2>;
+ };
+ };
+
+ mdio@f1000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+ reg = <0xf1000 0x1000>;
+ fsl,erratum-a011043; /* must ignore read errors */
+
+ pcsphy6: ethernet-phy@0 {
+ reg = <0x0>;
+ };
+ };
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi
new file mode 100644
index 000000000000..71eb75e82c2e
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi
@@ -0,0 +1,80 @@
+/*
+ * QorIQ FMan v3 1g port #1 device tree stub [ controller @ offset 0x400000 ]
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@400000 {
+ fman0_rx_0x09: port@89000 {
+ cell-index = <0x9>;
+ compatible = "fsl,fman-v3-port-rx";
+ reg = <0x89000 0x1000>;
+ fsl,fman-10g-port;
+ fsl,fman-best-effort-port;
+ };
+
+ fman0_tx_0x29: port@a9000 {
+ cell-index = <0x29>;
+ compatible = "fsl,fman-v3-port-tx";
+ reg = <0xa9000 0x1000>;
+ fsl,fman-10g-port;
+ fsl,fman-best-effort-port;
+ };
+
+ ethernet@e2000 {
+ cell-index = <1>;
+ compatible = "fsl,fman-memac";
+ reg = <0xe2000 0x1000>;
+ fsl,fman-ports = <&fman0_rx_0x09 &fman0_tx_0x29>;
+ ptp-timer = <&ptp_timer0>;
+ pcsphy-handle = <&pcsphy1>, <&qsgmiia_pcs1>;
+ pcs-handle-names = "sgmii", "qsgmii";
+ };
+
+ mdio@e1000 {
+ qsgmiia_pcs1: ethernet-pcs@1 {
+ compatible = "fsl,lynx-pcs";
+ reg = <1>;
+ };
+ };
+
+ mdio@e3000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+ reg = <0xe3000 0x1000>;
+ fsl,erratum-a011043; /* must ignore read errors */
+
+ pcsphy1: ethernet-phy@0 {
+ reg = <0x0>;
+ };
+ };
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi
new file mode 100644
index 000000000000..fb7032ddb7fc
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi
@@ -0,0 +1,77 @@
+/*
+ * QorIQ FMan v3 10g port #1 device tree stub [ controller @ offset 0x400000 ]
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@400000 {
+ fman0_rx_0x11: port@91000 {
+ cell-index = <0x11>;
+ compatible = "fsl,fman-v3-port-rx";
+ reg = <0x91000 0x1000>;
+ fsl,fman-10g-port;
+ };
+
+ fman0_tx_0x31: port@b1000 {
+ cell-index = <0x31>;
+ compatible = "fsl,fman-v3-port-tx";
+ reg = <0xb1000 0x1000>;
+ fsl,fman-10g-port;
+ };
+
+ ethernet@f2000 {
+ cell-index = <0x9>;
+ compatible = "fsl,fman-memac";
+ reg = <0xf2000 0x1000>;
+ fsl,fman-ports = <&fman0_rx_0x11 &fman0_tx_0x31>;
+ pcsphy-handle = <&pcsphy7>, <&qsgmiib_pcs3>, <&pcsphy7>;
+ pcs-handle-names = "sgmii", "qsgmii", "xfi";
+ };
+
+ mdio@e9000 {
+ qsgmiib_pcs3: ethernet-pcs@3 {
+ compatible = "fsl,lynx-pcs";
+ reg = <3>;
+ };
+ };
+
+ mdio@f3000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+ reg = <0xf3000 0x1000>;
+ fsl,erratum-a011043; /* must ignore read errors */
+
+ pcsphy7: ethernet-phy@0 {
+ reg = <0x0>;
+ };
+ };
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-2.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-2.dtsi
new file mode 100644
index 000000000000..6b3609574b0f
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-2.dtsi
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-or-later
+/*
+ * QorIQ FMan v3 10g port #2 device tree stub [ controller @ offset 0x400000 ]
+ *
+ * Copyright 2022 Sean Anderson <sean.anderson@seco.com>
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ */
+
+fman@400000 {
+ fman0_rx_0x08: port@88000 {
+ cell-index = <0x8>;
+ compatible = "fsl,fman-v3-port-rx";
+ reg = <0x88000 0x1000>;
+ fsl,fman-10g-port;
+ };
+
+ fman0_tx_0x28: port@a8000 {
+ cell-index = <0x28>;
+ compatible = "fsl,fman-v3-port-tx";
+ reg = <0xa8000 0x1000>;
+ fsl,fman-10g-port;
+ };
+
+ ethernet@e0000 {
+ cell-index = <0>;
+ compatible = "fsl,fman-memac";
+ reg = <0xe0000 0x1000>;
+ fsl,fman-ports = <&fman0_rx_0x08 &fman0_tx_0x28>;
+ ptp-timer = <&ptp_timer0>;
+ pcsphy-handle = <&pcsphy0>, <&pcsphy0>;
+ pcs-handle-names = "sgmii", "xfi";
+ };
+
+ mdio@e1000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+ reg = <0xe1000 0x1000>;
+ fsl,erratum-a011043; /* must ignore read errors */
+
+ pcsphy0: ethernet-phy@0 {
+ reg = <0x0>;
+ };
+ };
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-3.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-3.dtsi
new file mode 100644
index 000000000000..28ed1a85a436
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-3.dtsi
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-or-later
+/*
+ * QorIQ FMan v3 10g port #3 device tree stub [ controller @ offset 0x400000 ]
+ *
+ * Copyright 2022 Sean Anderson <sean.anderson@seco.com>
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ */
+
+fman@400000 {
+ fman0_rx_0x09: port@89000 {
+ cell-index = <0x9>;
+ compatible = "fsl,fman-v3-port-rx";
+ reg = <0x89000 0x1000>;
+ fsl,fman-10g-port;
+ };
+
+ fman0_tx_0x29: port@a9000 {
+ cell-index = <0x29>;
+ compatible = "fsl,fman-v3-port-tx";
+ reg = <0xa9000 0x1000>;
+ fsl,fman-10g-port;
+ };
+
+ ethernet@e2000 {
+ cell-index = <1>;
+ compatible = "fsl,fman-memac";
+ reg = <0xe2000 0x1000>;
+ fsl,fman-ports = <&fman0_rx_0x09 &fman0_tx_0x29>;
+ ptp-timer = <&ptp_timer0>;
+ pcsphy-handle = <&pcsphy1>, <&pcsphy1>;
+ pcs-handle-names = "sgmii", "xfi";
+ };
+
+ mdio@e3000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+ reg = <0xe3000 0x1000>;
+ fsl,erratum-a011043; /* must ignore read errors */
+
+ pcsphy1: ethernet-phy@0 {
+ reg = <0x0>;
+ };
+ };
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi
new file mode 100644
index 000000000000..1089d6861bfb
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi
@@ -0,0 +1,69 @@
+/*
+ * QorIQ FMan v3 1g port #0 device tree stub [ controller @ offset 0x400000 ]
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@400000 {
+ fman0_rx_0x08: port@88000 {
+ cell-index = <0x8>;
+ compatible = "fsl,fman-v3-port-rx";
+ reg = <0x88000 0x1000>;
+ };
+
+ fman0_tx_0x28: port@a8000 {
+ cell-index = <0x28>;
+ compatible = "fsl,fman-v3-port-tx";
+ reg = <0xa8000 0x1000>;
+ };
+
+ ethernet@e0000 {
+ cell-index = <0>;
+ compatible = "fsl,fman-memac";
+ reg = <0xe0000 0x1000>;
+ fsl,fman-ports = <&fman0_rx_0x08 &fman0_tx_0x28>;
+ ptp-timer = <&ptp_timer0>;
+ pcsphy-handle = <&pcsphy0>, <&pcsphy0>;
+ pcs-handle-names = "sgmii", "qsgmii";
+ };
+
+ mdio@e1000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+ reg = <0xe1000 0x1000>;
+ fsl,erratum-a011043; /* must ignore read errors */
+
+ pcsphy0: ethernet-phy@0 {
+ reg = <0x0>;
+ };
+ };
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi
new file mode 100644
index 000000000000..a95bbb4fc827
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi
@@ -0,0 +1,76 @@
+/*
+ * QorIQ FMan v3 1g port #1 device tree stub [ controller @ offset 0x400000 ]
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@400000 {
+ fman0_rx_0x09: port@89000 {
+ cell-index = <0x9>;
+ compatible = "fsl,fman-v3-port-rx";
+ reg = <0x89000 0x1000>;
+ };
+
+ fman0_tx_0x29: port@a9000 {
+ cell-index = <0x29>;
+ compatible = "fsl,fman-v3-port-tx";
+ reg = <0xa9000 0x1000>;
+ };
+
+ ethernet@e2000 {
+ cell-index = <1>;
+ compatible = "fsl,fman-memac";
+ reg = <0xe2000 0x1000>;
+ fsl,fman-ports = <&fman0_rx_0x09 &fman0_tx_0x29>;
+ ptp-timer = <&ptp_timer0>;
+ pcsphy-handle = <&pcsphy1>, <&qsgmiia_pcs1>;
+ pcs-handle-names = "sgmii", "qsgmii";
+ };
+
+ mdio@e1000 {
+ qsgmiia_pcs1: ethernet-pcs@1 {
+ compatible = "fsl,lynx-pcs";
+ reg = <1>;
+ };
+ };
+
+ mdio@e3000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+ reg = <0xe3000 0x1000>;
+ fsl,erratum-a011043; /* must ignore read errors */
+
+ pcsphy1: ethernet-phy@0 {
+ reg = <0x0>;
+ };
+ };
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi
new file mode 100644
index 000000000000..7d5af0147a25
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi
@@ -0,0 +1,76 @@
+/*
+ * QorIQ FMan v3 1g port #2 device tree stub [ controller @ offset 0x400000 ]
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@400000 {
+ fman0_rx_0x0a: port@8a000 {
+ cell-index = <0xa>;
+ compatible = "fsl,fman-v3-port-rx";
+ reg = <0x8a000 0x1000>;
+ };
+
+ fman0_tx_0x2a: port@aa000 {
+ cell-index = <0x2a>;
+ compatible = "fsl,fman-v3-port-tx";
+ reg = <0xaa000 0x1000>;
+ };
+
+ ethernet@e4000 {
+ cell-index = <2>;
+ compatible = "fsl,fman-memac";
+ reg = <0xe4000 0x1000>;
+ fsl,fman-ports = <&fman0_rx_0x0a &fman0_tx_0x2a>;
+ ptp-timer = <&ptp_timer0>;
+ pcsphy-handle = <&pcsphy2>, <&qsgmiia_pcs2>;
+ pcs-handle-names = "sgmii", "qsgmii";
+ };
+
+ mdio@e1000 {
+ qsgmiia_pcs2: ethernet-pcs@2 {
+ compatible = "fsl,lynx-pcs";
+ reg = <2>;
+ };
+ };
+
+ mdio@e5000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+ reg = <0xe5000 0x1000>;
+ fsl,erratum-a011043; /* must ignore read errors */
+
+ pcsphy2: ethernet-phy@0 {
+ reg = <0x0>;
+ };
+ };
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi
new file mode 100644
index 000000000000..61e5466ec854
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi
@@ -0,0 +1,76 @@
+/*
+ * QorIQ FMan v3 1g port #3 device tree stub [ controller @ offset 0x400000 ]
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@400000 {
+ fman0_rx_0x0b: port@8b000 {
+ cell-index = <0xb>;
+ compatible = "fsl,fman-v3-port-rx";
+ reg = <0x8b000 0x1000>;
+ };
+
+ fman0_tx_0x2b: port@ab000 {
+ cell-index = <0x2b>;
+ compatible = "fsl,fman-v3-port-tx";
+ reg = <0xab000 0x1000>;
+ };
+
+ ethernet@e6000 {
+ cell-index = <3>;
+ compatible = "fsl,fman-memac";
+ reg = <0xe6000 0x1000>;
+ fsl,fman-ports = <&fman0_rx_0x0b &fman0_tx_0x2b>;
+ ptp-timer = <&ptp_timer0>;
+ pcsphy-handle = <&pcsphy3>, <&qsgmiia_pcs3>;
+ pcs-handle-names = "sgmii", "qsgmii";
+ };
+
+ mdio@e1000 {
+ qsgmiia_pcs3: ethernet-pcs@3 {
+ compatible = "fsl,lynx-pcs";
+ reg = <3>;
+ };
+ };
+
+ mdio@e7000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+ reg = <0xe7000 0x1000>;
+ fsl,erratum-a011043; /* must ignore read errors */
+
+ pcsphy3: ethernet-phy@0 {
+ reg = <0x0>;
+ };
+ };
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi
new file mode 100644
index 000000000000..3ba0cdafc069
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi
@@ -0,0 +1,69 @@
+/*
+ * QorIQ FMan v3 1g port #4 device tree stub [ controller @ offset 0x400000 ]
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@400000 {
+ fman0_rx_0x0c: port@8c000 {
+ cell-index = <0xc>;
+ compatible = "fsl,fman-v3-port-rx";
+ reg = <0x8c000 0x1000>;
+ };
+
+ fman0_tx_0x2c: port@ac000 {
+ cell-index = <0x2c>;
+ compatible = "fsl,fman-v3-port-tx";
+ reg = <0xac000 0x1000>;
+ };
+
+ ethernet@e8000 {
+ cell-index = <4>;
+ compatible = "fsl,fman-memac";
+ reg = <0xe8000 0x1000>;
+ fsl,fman-ports = <&fman0_rx_0x0c &fman0_tx_0x2c>;
+ ptp-timer = <&ptp_timer0>;
+ pcsphy-handle = <&pcsphy4>, <&pcsphy4>;
+ pcs-handle-names = "sgmii", "qsgmii";
+ };
+
+ mdio@e9000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+ reg = <0xe9000 0x1000>;
+ fsl,erratum-a011043; /* must ignore read errors */
+
+ pcsphy4: ethernet-phy@0 {
+ reg = <0x0>;
+ };
+ };
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi
new file mode 100644
index 000000000000..51748de0a289
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi
@@ -0,0 +1,76 @@
+/*
+ * QorIQ FMan v3 1g port #5 device tree stub [ controller @ offset 0x400000 ]
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@400000 {
+ fman0_rx_0x0d: port@8d000 {
+ cell-index = <0xd>;
+ compatible = "fsl,fman-v3-port-rx";
+ reg = <0x8d000 0x1000>;
+ };
+
+ fman0_tx_0x2d: port@ad000 {
+ cell-index = <0x2d>;
+ compatible = "fsl,fman-v3-port-tx";
+ reg = <0xad000 0x1000>;
+ };
+
+ ethernet@ea000 {
+ cell-index = <5>;
+ compatible = "fsl,fman-memac";
+ reg = <0xea000 0x1000>;
+ fsl,fman-ports = <&fman0_rx_0x0d &fman0_tx_0x2d>;
+ ptp-timer = <&ptp_timer0>;
+ pcsphy-handle = <&pcsphy5>, <&qsgmiib_pcs1>;
+ pcs-handle-names = "sgmii", "qsgmii";
+ };
+
+ mdio@e9000 {
+ qsgmiib_pcs1: ethernet-pcs@1 {
+ compatible = "fsl,lynx-pcs";
+ reg = <1>;
+ };
+ };
+
+ mdio@eb000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+ reg = <0xeb000 0x1000>;
+ fsl,erratum-a011043; /* must ignore read errors */
+
+ pcsphy5: ethernet-phy@0 {
+ reg = <0x0>;
+ };
+ };
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0.dtsi
new file mode 100644
index 000000000000..d62b36c5a329
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0.dtsi
@@ -0,0 +1,109 @@
+/*
+ * QorIQ FMan v3 device tree stub [ controller @ offset 0x400000 ]
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman0: fman@400000 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ cell-index = <0>;
+ compatible = "fsl,fman";
+ ranges = <0 0x400000 0xfe000>;
+ reg = <0x400000 0xfe000>;
+ interrupts = <96 2 0 0>, <16 2 1 1>;
+ clocks = <&clockgen 3 0>;
+ clock-names = "fmanclk";
+ fsl,qman-channel-range = <0x800 0x10>;
+ ptimer-handle = <&ptp_timer0>;
+
+ muram@0 {
+ compatible = "fsl,fman-muram";
+ reg = <0x0 0x60000>;
+ };
+
+ fman0_oh_0x2: port@82000 {
+ cell-index = <0x2>;
+ compatible = "fsl,fman-v3-port-oh";
+ reg = <0x82000 0x1000>;
+ };
+
+ fman0_oh_0x3: port@83000 {
+ cell-index = <0x3>;
+ compatible = "fsl,fman-v3-port-oh";
+ reg = <0x83000 0x1000>;
+ };
+
+ fman0_oh_0x4: port@84000 {
+ cell-index = <0x4>;
+ compatible = "fsl,fman-v3-port-oh";
+ reg = <0x84000 0x1000>;
+ };
+
+ fman0_oh_0x5: port@85000 {
+ cell-index = <0x5>;
+ compatible = "fsl,fman-v3-port-oh";
+ reg = <0x85000 0x1000>;
+ };
+
+ fman0_oh_0x6: port@86000 {
+ cell-index = <0x6>;
+ compatible = "fsl,fman-v3-port-oh";
+ reg = <0x86000 0x1000>;
+ };
+
+ fman0_oh_0x7: port@87000 {
+ cell-index = <0x7>;
+ compatible = "fsl,fman-v3-port-oh";
+ reg = <0x87000 0x1000>;
+ };
+
+ mdio0: mdio@fc000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+ reg = <0xfc000 0x1000>;
+ };
+
+ xmdio0: mdio@fd000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+ reg = <0xfd000 0x1000>;
+ };
+};
+
+ptp_timer0: ptp-timer@4fe000 {
+ compatible = "fsl,fman-ptp-timer";
+ reg = <0x4fe000 0x1000>;
+ interrupts = <96 2 0 0>;
+ clocks = <&clockgen 3 0>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi
new file mode 100644
index 000000000000..ee4f5170f632
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi
@@ -0,0 +1,77 @@
+/*
+ * QorIQ FMan v3 10g port #0 device tree stub [ controller @ offset 0x500000 ]
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@500000 {
+ fman1_rx_0x10: port@90000 {
+ cell-index = <0x10>;
+ compatible = "fsl,fman-v3-port-rx";
+ reg = <0x90000 0x1000>;
+ fsl,fman-10g-port;
+ };
+
+ fman1_tx_0x30: port@b0000 {
+ cell-index = <0x30>;
+ compatible = "fsl,fman-v3-port-tx";
+ reg = <0xb0000 0x1000>;
+ fsl,fman-10g-port;
+ };
+
+ ethernet@f0000 {
+ cell-index = <0x8>;
+ compatible = "fsl,fman-memac";
+ reg = <0xf0000 0x1000>;
+ fsl,fman-ports = <&fman1_rx_0x10 &fman1_tx_0x30>;
+ pcsphy-handle = <&pcsphy14>, <&qsgmiid_pcs2>, <&pcsphy14>;
+ pcs-handle-names = "sgmii", "qsgmii", "xfi";
+ };
+
+ mdio@e9000 {
+ qsgmiid_pcs2: ethernet-pcs@2 {
+ compatible = "fsl,lynx-pcs";
+ reg = <2>;
+ };
+ };
+
+ mdio@f1000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+ reg = <0xf1000 0x1000>;
+ fsl,erratum-a011043; /* must ignore read errors */
+
+ pcsphy14: ethernet-phy@0 {
+ reg = <0x0>;
+ };
+ };
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi
new file mode 100644
index 000000000000..83d2e0ce8f7b
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi
@@ -0,0 +1,77 @@
+/*
+ * QorIQ FMan v3 10g port #1 device tree stub [ controller @ offset 0x500000 ]
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@500000 {
+ fman1_rx_0x11: port@91000 {
+ cell-index = <0x11>;
+ compatible = "fsl,fman-v3-port-rx";
+ reg = <0x91000 0x1000>;
+ fsl,fman-10g-port;
+ };
+
+ fman1_tx_0x31: port@b1000 {
+ cell-index = <0x31>;
+ compatible = "fsl,fman-v3-port-tx";
+ reg = <0xb1000 0x1000>;
+ fsl,fman-10g-port;
+ };
+
+ ethernet@f2000 {
+ cell-index = <0x9>;
+ compatible = "fsl,fman-memac";
+ reg = <0xf2000 0x1000>;
+ fsl,fman-ports = <&fman1_rx_0x11 &fman1_tx_0x31>;
+ pcsphy-handle = <&pcsphy15>, <&qsgmiid_pcs3>, <&pcsphy15>;
+ pcs-handle-names = "sgmii", "qsgmii", "xfi";
+ };
+
+ mdio@e9000 {
+ qsgmiid_pcs3: ethernet-pcs@3 {
+ compatible = "fsl,lynx-pcs";
+ reg = <3>;
+ };
+ };
+
+ mdio@f3000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+ reg = <0xf3000 0x1000>;
+ fsl,erratum-a011043; /* must ignore read errors */
+
+ pcsphy15: ethernet-phy@0 {
+ reg = <0x0>;
+ };
+ };
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi
new file mode 100644
index 000000000000..3132fc73f133
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi
@@ -0,0 +1,69 @@
+/*
+ * QorIQ FMan v3 1g port #0 device tree stub [ controller @ offset 0x500000 ]
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@500000 {
+ fman1_rx_0x08: port@88000 {
+ cell-index = <0x8>;
+ compatible = "fsl,fman-v3-port-rx";
+ reg = <0x88000 0x1000>;
+ };
+
+ fman1_tx_0x28: port@a8000 {
+ cell-index = <0x28>;
+ compatible = "fsl,fman-v3-port-tx";
+ reg = <0xa8000 0x1000>;
+ };
+
+ ethernet@e0000 {
+ cell-index = <0>;
+ compatible = "fsl,fman-memac";
+ reg = <0xe0000 0x1000>;
+ fsl,fman-ports = <&fman1_rx_0x08 &fman1_tx_0x28>;
+ ptp-timer = <&ptp_timer1>;
+ pcsphy-handle = <&pcsphy8>, <&pcsphy8>;
+ pcs-handle-names = "sgmii", "qsgmii";
+ };
+
+ mdio@e1000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+ reg = <0xe1000 0x1000>;
+ fsl,erratum-a011043; /* must ignore read errors */
+
+ pcsphy8: ethernet-phy@0 {
+ reg = <0x0>;
+ };
+ };
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi
new file mode 100644
index 000000000000..75e904d96602
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi
@@ -0,0 +1,76 @@
+/*
+ * QorIQ FMan v3 1g port #1 device tree stub [ controller @ offset 0x500000 ]
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@500000 {
+ fman1_rx_0x09: port@89000 {
+ cell-index = <0x9>;
+ compatible = "fsl,fman-v3-port-rx";
+ reg = <0x89000 0x1000>;
+ };
+
+ fman1_tx_0x29: port@a9000 {
+ cell-index = <0x29>;
+ compatible = "fsl,fman-v3-port-tx";
+ reg = <0xa9000 0x1000>;
+ };
+
+ ethernet@e2000 {
+ cell-index = <1>;
+ compatible = "fsl,fman-memac";
+ reg = <0xe2000 0x1000>;
+ fsl,fman-ports = <&fman1_rx_0x09 &fman1_tx_0x29>;
+ ptp-timer = <&ptp_timer1>;
+ pcsphy-handle = <&pcsphy9>, <&qsgmiic_pcs1>;
+ pcs-handle-names = "sgmii", "qsgmii";
+ };
+
+ mdio@e1000 {
+ qsgmiic_pcs1: ethernet-pcs@1 {
+ compatible = "fsl,lynx-pcs";
+ reg = <1>;
+ };
+ };
+
+ mdio@e3000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+ reg = <0xe3000 0x1000>;
+ fsl,erratum-a011043; /* must ignore read errors */
+
+ pcsphy9: ethernet-phy@0 {
+ reg = <0x0>;
+ };
+ };
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi
new file mode 100644
index 000000000000..69f2cc7b8f19
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi
@@ -0,0 +1,76 @@
+/*
+ * QorIQ FMan v3 1g port #2 device tree stub [ controller @ offset 0x500000 ]
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@500000 {
+ fman1_rx_0x0a: port@8a000 {
+ cell-index = <0xa>;
+ compatible = "fsl,fman-v3-port-rx";
+ reg = <0x8a000 0x1000>;
+ };
+
+ fman1_tx_0x2a: port@aa000 {
+ cell-index = <0x2a>;
+ compatible = "fsl,fman-v3-port-tx";
+ reg = <0xaa000 0x1000>;
+ };
+
+ ethernet@e4000 {
+ cell-index = <2>;
+ compatible = "fsl,fman-memac";
+ reg = <0xe4000 0x1000>;
+ fsl,fman-ports = <&fman1_rx_0x0a &fman1_tx_0x2a>;
+ ptp-timer = <&ptp_timer1>;
+ pcsphy-handle = <&pcsphy10>, <&qsgmiic_pcs2>;
+ pcs-handle-names = "sgmii", "qsgmii";
+ };
+
+ mdio@e1000 {
+ qsgmiic_pcs2: ethernet-pcs@2 {
+ compatible = "fsl,lynx-pcs";
+ reg = <2>;
+ };
+ };
+
+ mdio@e5000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+ reg = <0xe5000 0x1000>;
+ fsl,erratum-a011043; /* must ignore read errors */
+
+ pcsphy10: ethernet-phy@0 {
+ reg = <0x0>;
+ };
+ };
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi
new file mode 100644
index 000000000000..b3aaf01d7da0
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi
@@ -0,0 +1,76 @@
+/*
+ * QorIQ FMan v3 1g port #3 device tree stub [ controller @ offset 0x500000 ]
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@500000 {
+ fman1_rx_0x0b: port@8b000 {
+ cell-index = <0xb>;
+ compatible = "fsl,fman-v3-port-rx";
+ reg = <0x8b000 0x1000>;
+ };
+
+ fman1_tx_0x2b: port@ab000 {
+ cell-index = <0x2b>;
+ compatible = "fsl,fman-v3-port-tx";
+ reg = <0xab000 0x1000>;
+ };
+
+ ethernet@e6000 {
+ cell-index = <3>;
+ compatible = "fsl,fman-memac";
+ reg = <0xe6000 0x1000>;
+ fsl,fman-ports = <&fman1_rx_0x0b &fman1_tx_0x2b>;
+ ptp-timer = <&ptp_timer1>;
+ pcsphy-handle = <&pcsphy11>, <&qsgmiic_pcs3>;
+ pcs-handle-names = "sgmii", "qsgmii";
+ };
+
+ mdio@e1000 {
+ qsgmiic_pcs3: ethernet-pcs@3 {
+ compatible = "fsl,lynx-pcs";
+ reg = <3>;
+ };
+ };
+
+ mdio@e7000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+ reg = <0xe7000 0x1000>;
+ fsl,erratum-a011043; /* must ignore read errors */
+
+ pcsphy11: ethernet-phy@0 {
+ reg = <0x0>;
+ };
+ };
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi
new file mode 100644
index 000000000000..18e020432807
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi
@@ -0,0 +1,69 @@
+/*
+ * QorIQ FMan v3 1g port #4 device tree stub [ controller @ offset 0x500000 ]
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@500000 {
+ fman1_rx_0x0c: port@8c000 {
+ cell-index = <0xc>;
+ compatible = "fsl,fman-v3-port-rx";
+ reg = <0x8c000 0x1000>;
+ };
+
+ fman1_tx_0x2c: port@ac000 {
+ cell-index = <0x2c>;
+ compatible = "fsl,fman-v3-port-tx";
+ reg = <0xac000 0x1000>;
+ };
+
+ ethernet@e8000 {
+ cell-index = <4>;
+ compatible = "fsl,fman-memac";
+ reg = <0xe8000 0x1000>;
+ fsl,fman-ports = <&fman1_rx_0x0c &fman1_tx_0x2c>;
+ ptp-timer = <&ptp_timer1>;
+ pcsphy-handle = <&pcsphy12>, <&pcsphy12>;
+ pcs-handle-names = "sgmii", "qsgmii";
+ };
+
+ mdio@e9000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+ reg = <0xe9000 0x1000>;
+ fsl,erratum-a011043; /* must ignore read errors */
+
+ pcsphy12: ethernet-phy@0 {
+ reg = <0x0>;
+ };
+ };
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi
new file mode 100644
index 000000000000..55f329d13f19
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi
@@ -0,0 +1,76 @@
+/*
+ * QorIQ FMan v3 1g port #5 device tree stub [ controller @ offset 0x500000 ]
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman@500000 {
+ fman1_rx_0x0d: port@8d000 {
+ cell-index = <0xd>;
+ compatible = "fsl,fman-v3-port-rx";
+ reg = <0x8d000 0x1000>;
+ };
+
+ fman1_tx_0x2d: port@ad000 {
+ cell-index = <0x2d>;
+ compatible = "fsl,fman-v3-port-tx";
+ reg = <0xad000 0x1000>;
+ };
+
+ ethernet@ea000 {
+ cell-index = <5>;
+ compatible = "fsl,fman-memac";
+ reg = <0xea000 0x1000>;
+ fsl,fman-ports = <&fman1_rx_0x0d &fman1_tx_0x2d>;
+ ptp-timer = <&ptp_timer1>;
+ pcsphy-handle = <&pcsphy13>, <&qsgmiid_pcs1>;
+ pcs-handle-names = "sgmii", "qsgmii";
+ };
+
+ mdio@e9000 {
+ qsgmiid_pcs1: ethernet-pcs@1 {
+ compatible = "fsl,lynx-pcs";
+ reg = <1>;
+ };
+ };
+
+ mdio@eb000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+ reg = <0xeb000 0x1000>;
+ fsl,erratum-a011043; /* must ignore read errors */
+
+ pcsphy13: ethernet-phy@0 {
+ reg = <0x0>;
+ };
+ };
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1.dtsi
new file mode 100644
index 000000000000..310232460500
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1.dtsi
@@ -0,0 +1,109 @@
+/*
+ * QorIQ FMan v3 device tree stub [ controller @ offset 0x500000 ]
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman1: fman@500000 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ cell-index = <1>;
+ compatible = "fsl,fman";
+ ranges = <0 0x500000 0xfe000>;
+ reg = <0x500000 0xfe000>;
+ interrupts = <97 2 0 0>, <16 2 1 0>;
+ clocks = <&clockgen 3 1>;
+ clock-names = "fmanclk";
+ fsl,qman-channel-range = <0x820 0x10>;
+ ptimer-handle = <&ptp_timer1>;
+
+ muram@0 {
+ compatible = "fsl,fman-muram";
+ reg = <0x0 0x60000>;
+ };
+
+ fman1_oh_0x2: port@82000 {
+ cell-index = <0x2>;
+ compatible = "fsl,fman-v3-port-oh";
+ reg = <0x82000 0x1000>;
+ };
+
+ fman1_oh_0x3: port@83000 {
+ cell-index = <0x3>;
+ compatible = "fsl,fman-v3-port-oh";
+ reg = <0x83000 0x1000>;
+ };
+
+ fman1_oh_0x4: port@84000 {
+ cell-index = <0x4>;
+ compatible = "fsl,fman-v3-port-oh";
+ reg = <0x84000 0x1000>;
+ };
+
+ fman1_oh_0x5: port@85000 {
+ cell-index = <0x5>;
+ compatible = "fsl,fman-v3-port-oh";
+ reg = <0x85000 0x1000>;
+ };
+
+ fman1_oh_0x6: port@86000 {
+ cell-index = <0x6>;
+ compatible = "fsl,fman-v3-port-oh";
+ reg = <0x86000 0x1000>;
+ };
+
+ fman1_oh_0x7: port@87000 {
+ cell-index = <0x7>;
+ compatible = "fsl,fman-v3-port-oh";
+ reg = <0x87000 0x1000>;
+ };
+
+ mdio1: mdio@fc000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+ reg = <0xfc000 0x1000>;
+ };
+
+ mdio@fd000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+ reg = <0xfd000 0x1000>;
+ };
+};
+
+ptp_timer1: ptp-timer@5fe000 {
+ compatible = "fsl,fman-ptp-timer";
+ reg = <0x5fe000 0x1000>;
+ interrupts = <97 2 0 0>;
+ clocks = <&clockgen 3 1>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi
new file mode 100644
index 000000000000..48e5cd61599c
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi
@@ -0,0 +1,99 @@
+/*
+ * QorIQ FMan v3 device tree stub [ controller @ offset 0x400000 ]
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+fman0: fman@400000 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ cell-index = <0>;
+ compatible = "fsl,fman";
+ ranges = <0 0x400000 0xfe000>;
+ reg = <0x400000 0xfe000>;
+ interrupts = <96 2 0 0>, <16 2 1 1>;
+ clocks = <&clockgen 3 0>;
+ clock-names = "fmanclk";
+ fsl,qman-channel-range = <0x800 0x10>;
+ ptimer-handle = <&ptp_timer0>;
+
+ muram@0 {
+ compatible = "fsl,fman-muram";
+ reg = <0x0 0x30000>;
+ };
+
+ fman0_oh_0x2: port@82000 {
+ cell-index = <0x2>;
+ compatible = "fsl,fman-v3-port-oh";
+ reg = <0x82000 0x1000>;
+ };
+
+ fman0_oh_0x3: port@83000 {
+ cell-index = <0x3>;
+ compatible = "fsl,fman-v3-port-oh";
+ reg = <0x83000 0x1000>;
+ };
+
+ fman0_oh_0x4: port@84000 {
+ cell-index = <0x4>;
+ compatible = "fsl,fman-v3-port-oh";
+ reg = <0x84000 0x1000>;
+ };
+
+ fman0_oh_0x5: port@85000 {
+ cell-index = <0x5>;
+ compatible = "fsl,fman-v3-port-oh";
+ reg = <0x85000 0x1000>;
+ };
+
+ mdio0: mdio@fc000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+ reg = <0xfc000 0x1000>;
+ fsl,erratum-a009885;
+ };
+
+ xmdio0: mdio@fd000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio";
+ reg = <0xfd000 0x1000>;
+ fsl,erratum-a009885;
+ };
+};
+
+ptp_timer0: ptp-timer@4fe000 {
+ compatible = "fsl,fman-ptp-timer";
+ reg = <0x4fe000 0x1000>;
+ interrupts = <96 2 0 0>;
+ clocks = <&clockgen 3 0>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-qman1-portals.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-qman1-portals.dtsi
new file mode 100644
index 000000000000..e77e4b4ed53b
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-qman1-portals.dtsi
@@ -0,0 +1,101 @@
+/*
+ * QorIQ QMan Portal device tree stub for 10 portals & 15 pool channels
+ *
+ * Copyright 2011 - 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+&qportals {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "simple-bus";
+
+ qportal0: qman-portal@0 {
+ compatible = "fsl,qman-portal";
+ reg = <0x0 0x4000>, <0x100000 0x1000>;
+ interrupts = <104 2 0 0>;
+ cell-index = <0x0>;
+ };
+ qportal1: qman-portal@4000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x4000 0x4000>, <0x101000 0x1000>;
+ interrupts = <106 2 0 0>;
+ cell-index = <1>;
+ };
+ qportal2: qman-portal@8000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x8000 0x4000>, <0x102000 0x1000>;
+ interrupts = <108 2 0 0>;
+ cell-index = <2>;
+ };
+ qportal3: qman-portal@c000 {
+ compatible = "fsl,qman-portal";
+ reg = <0xc000 0x4000>, <0x103000 0x1000>;
+ interrupts = <110 2 0 0>;
+ cell-index = <3>;
+ };
+ qportal4: qman-portal@10000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x10000 0x4000>, <0x104000 0x1000>;
+ interrupts = <112 2 0 0>;
+ cell-index = <4>;
+ };
+ qportal5: qman-portal@14000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x14000 0x4000>, <0x105000 0x1000>;
+ interrupts = <114 2 0 0>;
+ cell-index = <5>;
+ };
+ qportal6: qman-portal@18000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x18000 0x4000>, <0x106000 0x1000>;
+ interrupts = <116 2 0 0>;
+ cell-index = <6>;
+ };
+
+ qportal7: qman-portal@1c000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x1c000 0x4000>, <0x107000 0x1000>;
+ interrupts = <118 2 0 0>;
+ cell-index = <7>;
+ };
+ qportal8: qman-portal@20000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x20000 0x4000>, <0x108000 0x1000>;
+ interrupts = <120 2 0 0>;
+ cell-index = <8>;
+ };
+ qportal9: qman-portal@24000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x24000 0x4000>, <0x109000 0x1000>;
+ interrupts = <122 2 0 0>;
+ cell-index = <9>;
+ };
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-qman1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-qman1.dtsi
new file mode 100644
index 000000000000..0695778c4386
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-qman1.dtsi
@@ -0,0 +1,41 @@
+/*
+ * QorIQ QMan device tree stub [ controller @ offset 0x318000 ]
+ *
+ * Copyright 2011 - 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+qman: qman@318000 {
+ compatible = "fsl,qman";
+ reg = <0x318000 0x1000>;
+ interrupts = <16 2 1 3>;
+ fsl,qman-portals = <&qportals>;
+ memory-region = <&qman_fqd &qman_pfdr>;
+};
diff --git a/arch/powerpc/boot/dts/fsl/qoriq-qman3.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-qman3.dtsi
new file mode 100644
index 000000000000..b379abd1439d
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/qoriq-qman3.dtsi
@@ -0,0 +1,41 @@
+/*
+ * QorIQ QMan rev3 device tree stub [ controller @ offset 0x318000 ]
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+qman: qman@318000 {
+ compatible = "fsl,qman";
+ reg = <0x318000 0x2000>;
+ interrupts = <16 2 1 3>;
+ fsl,qman-portals = <&qportals>;
+ memory-region = <&qman_fqd &qman_pfdr>;
+};
diff --git a/arch/powerpc/boot/dts/t4240rdb.dts b/arch/powerpc/boot/dts/fsl/t1023rdb.dts
index 53761d4e8c51..f82f85c65964 100644
--- a/arch/powerpc/boot/dts/t4240rdb.dts
+++ b/arch/powerpc/boot/dts/fsl/t1023rdb.dts
@@ -1,5 +1,5 @@
/*
- * T4240RDB Device Tree Source
+ * T1023 RDB Device Tree Source
*
* Copyright 2014 Freescale Semiconductor Inc.
*
@@ -32,36 +32,56 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/include/ "fsl/t4240si-pre.dtsi"
+/include/ "t102xsi-pre.dtsi"
/ {
- model = "fsl,T4240RDB";
- compatible = "fsl,T4240RDB";
+ model = "fsl,T1023RDB";
+ compatible = "fsl,T1023RDB";
#address-cells = <2>;
#size-cells = <2>;
interrupt-parent = <&mpic>;
+ reserved-memory {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ bman_fbpr: bman-fbpr {
+ size = <0 0x1000000>;
+ alignment = <0 0x1000000>;
+ };
+
+ qman_fqd: qman-fqd {
+ size = <0 0x400000>;
+ alignment = <0 0x400000>;
+ };
+
+ qman_pfdr: qman-pfdr {
+ size = <0 0x2000000>;
+ alignment = <0 0x2000000>;
+ };
+ };
+
ifc: localbus@ffe124000 {
reg = <0xf 0xfe124000 0 0x2000>;
ranges = <0 0 0xf 0xe8000000 0x08000000
- 2 0 0xf 0xff800000 0x00010000
- 3 0 0xf 0xffdf0000 0x00008000>;
+ 1 0 0xf 0xff800000 0x00010000>;
nor@0,0 {
#address-cells = <1>;
#size-cells = <1>;
+ status = "disabled";
compatible = "cfi-flash";
reg = <0x0 0x0 0x8000000>;
-
bank-width = <2>;
device-width = <1>;
};
- nand@2,0 {
+ nand@1,0 {
#address-cells = <1>;
#size-cells = <1>;
compatible = "fsl,ifc-nand";
- reg = <0x2 0x0 0x10000>;
+ reg = <0x1 0x0 0x10000>;
};
};
@@ -73,6 +93,14 @@
ranges = <0x00000000 0xf 0x00000000 0x01072000>;
};
+ bportals: bman-portals@ff4000000 {
+ ranges = <0x0 0xf 0xf4000000 0x2000000>;
+ };
+
+ qportals: qman-portals@ff6000000 {
+ ranges = <0x0 0xf 0xf6000000 0x2000000>;
+ };
+
soc: soc@ffe000000 {
ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
reg = <0xf 0xfe000000 0 0x00001000>;
@@ -80,45 +108,89 @@
flash@0 {
#address-cells = <1>;
#size-cells = <1>;
- compatible = "sst,sst25wf040";
+ compatible = "spansion,s25fl512s", "jedec,spi-nor";
reg = <0>;
- spi-max-frequency = <40000000>; /* input clock */
+ spi-max-frequency = <10000000>; /* input clk */
};
};
i2c@118000 {
- eeprom@52 {
- compatible = "at24,24c256";
- reg = <0x52>;
- };
- eeprom@54 {
- compatible = "at24,24c256";
- reg = <0x54>;
- };
- eeprom@56 {
- compatible = "at24,24c256";
- reg = <0x56>;
+ eeprom@50 {
+ compatible = "st,m24256";
+ reg = <0x50>;
};
+
rtc@68 {
- compatible = "dallas,ds1374";
+ compatible = "dallas,ds1339";
reg = <0x68>;
- interrupts = <0x1 0x1 0 0>;
+ interrupts = <0x5 0x1 0 0>;
+ };
+ };
+
+ i2c@118100 {
+ current-sensor@40 {
+ compatible = "ti,ina220";
+ reg = <0x40>;
+ shunt-resistor = <1000>;
+ };
+
+ current-sensor@41 {
+ compatible = "ti,ina220";
+ reg = <0x41>;
+ shunt-resistor = <1000>;
};
};
- sdhc@114000 {
- voltage-ranges = <1800 1800 3300 3300>;
+ fman@400000 {
+ fm1mac1: ethernet@e0000 {
+ phy-handle = <&sgmii_rtk_phy2>;
+ phy-connection-type = "sgmii";
+ sleep = <&rcpm 0x80000000>;
+ };
+
+ fm1mac2: ethernet@e2000 {
+ sleep = <&rcpm 0x40000000>;
+ };
+
+ fm1mac3: ethernet@e4000 {
+ phy-handle = <&sgmii_aqr_phy3>;
+ phy-connection-type = "2500base-x";
+ sleep = <&rcpm 0x20000000>;
+ };
+
+ fm1mac4: ethernet@e6000 {
+ phy-handle = <&rgmii_rtk_phy1>;
+ phy-connection-type = "rgmii";
+ sleep = <&rcpm 0x10000000>;
+ };
+
+
+ mdio0: mdio@fc000 {
+ rgmii_rtk_phy1: ethernet-phy@1 {
+ reg = <0x1>;
+ };
+ sgmii_rtk_phy2: ethernet-phy@3 {
+ reg = <0x3>;
+ };
+ };
+
+ xmdio0: mdio@fd000 {
+ sgmii_aqr_phy3: ethernet-phy@2 {
+ compatible = "ethernet-phy-ieee802.3-c45";
+ reg = <0x2>;
+ };
+ };
};
};
pci0: pcie@ffe240000 {
reg = <0xf 0xfe240000 0 0x10000>;
- ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x20000000
- 0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>;
+ ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0 0x10000000
+ 0x01000000 0 0x00000000 0xf 0xf8000000 0 0x00010000>;
pcie@0 {
ranges = <0x02000000 0 0xe0000000
0x02000000 0 0xe0000000
- 0 0x20000000
+ 0 0x10000000
0x01000000 0 0x00000000
0x01000000 0 0x00000000
@@ -128,12 +200,12 @@
pci1: pcie@ffe250000 {
reg = <0xf 0xfe250000 0 0x10000>;
- ranges = <0x02000000 0x0 0xe0000000 0xc 0x20000000 0x0 0x20000000
- 0x01000000 0x0 0x00000000 0xf 0xf8010000 0x0 0x00010000>;
+ ranges = <0x02000000 0 0xe0000000 0xc 0x10000000 0 0x10000000
+ 0x01000000 0 0x00000000 0xf 0xf8010000 0 0x00010000>;
pcie@0 {
ranges = <0x02000000 0 0xe0000000
0x02000000 0 0xe0000000
- 0 0x20000000
+ 0 0x10000000
0x01000000 0 0x00000000
0x01000000 0 0x00000000
@@ -142,45 +214,19 @@
};
pci2: pcie@ffe260000 {
- reg = <0xf 0xfe260000 0 0x1000>;
- ranges = <0x02000000 0 0xe0000000 0xc 0x40000000 0 0x20000000
+ reg = <0xf 0xfe260000 0 0x10000>;
+ ranges = <0x02000000 0 0xe0000000 0xc 0x20000000 0 0x10000000
0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>;
pcie@0 {
ranges = <0x02000000 0 0xe0000000
0x02000000 0 0xe0000000
- 0 0x20000000
-
- 0x01000000 0 0x00000000
- 0x01000000 0 0x00000000
- 0 0x00010000>;
- };
- };
-
- pci3: pcie@ffe270000 {
- reg = <0xf 0xfe270000 0 0x10000>;
- ranges = <0x02000000 0 0xe0000000 0xc 0x60000000 0 0x20000000
- 0x01000000 0 0x00000000 0xf 0xf8030000 0 0x00010000>;
- pcie@0 {
- ranges = <0x02000000 0 0xe0000000
- 0x02000000 0 0xe0000000
- 0 0x20000000
+ 0 0x10000000
0x01000000 0 0x00000000
0x01000000 0 0x00000000
0 0x00010000>;
};
};
-
- rio: rapidio@ffe0c0000 {
- reg = <0xf 0xfe0c0000 0 0x11000>;
-
- port1 {
- ranges = <0 0 0xc 0x20000000 0 0x10000000>;
- };
- port2 {
- ranges = <0 0 0xc 0x30000000 0 0x10000000>;
- };
- };
};
-/include/ "fsl/t4240si-post.dtsi"
+#include "t1023si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi
new file mode 100644
index 000000000000..8ef0c020206b
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi
@@ -0,0 +1,523 @@
+/*
+ * T1023 Silicon/SoC Device Tree Source (post include)
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <dt-bindings/thermal/thermal.h>
+
+&bman_fbpr {
+ compatible = "fsl,bman-fbpr";
+ alloc-ranges = <0 0 0x10000 0>;
+};
+
+&qman_fqd {
+ compatible = "fsl,qman-fqd";
+ alloc-ranges = <0 0 0x10000 0>;
+};
+
+&qman_pfdr {
+ compatible = "fsl,qman-pfdr";
+ alloc-ranges = <0 0 0x10000 0>;
+};
+
+&ifc {
+ #address-cells = <2>;
+ #size-cells = <1>;
+ compatible = "fsl,ifc";
+ interrupts = <25 2 0 0>;
+};
+
+&pci0 {
+ compatible = "fsl,t1023-pcie", "fsl,qoriq-pcie-v2.4", "fsl,qoriq-pcie";
+ device_type = "pci";
+ #size-cells = <2>;
+ #address-cells = <3>;
+ bus-range = <0x0 0xff>;
+ interrupts = <20 2 0 0>;
+ fsl,iommu-parent = <&pamu0>;
+ pcie@0 {
+ reg = <0 0 0 0 0>;
+ #interrupt-cells = <1>;
+ #size-cells = <2>;
+ #address-cells = <3>;
+ device_type = "pci";
+ interrupts = <20 2 0 0>;
+ interrupt-map-mask = <0xf800 0 0 7>;
+ interrupt-map = <
+ /* IDSEL 0x0 */
+ 0000 0 0 1 &mpic 40 1 0 0
+ 0000 0 0 2 &mpic 1 1 0 0
+ 0000 0 0 3 &mpic 2 1 0 0
+ 0000 0 0 4 &mpic 3 1 0 0
+ >;
+ };
+};
+
+&pci1 {
+ compatible = "fsl,t1023-pcie", "fsl,qoriq-pcie-v2.4", "fsl,qoriq-pcie";
+ device_type = "pci";
+ #size-cells = <2>;
+ #address-cells = <3>;
+ bus-range = <0 0xff>;
+ interrupts = <21 2 0 0>;
+ fsl,iommu-parent = <&pamu0>;
+ pcie@0 {
+ reg = <0 0 0 0 0>;
+ #interrupt-cells = <1>;
+ #size-cells = <2>;
+ #address-cells = <3>;
+ device_type = "pci";
+ interrupts = <21 2 0 0>;
+ interrupt-map-mask = <0xf800 0 0 7>;
+ interrupt-map = <
+ /* IDSEL 0x0 */
+ 0000 0 0 1 &mpic 41 1 0 0
+ 0000 0 0 2 &mpic 5 1 0 0
+ 0000 0 0 3 &mpic 6 1 0 0
+ 0000 0 0 4 &mpic 7 1 0 0
+ >;
+ };
+};
+
+&pci2 {
+ compatible = "fsl,t1023-pcie", "fsl,qoriq-pcie-v2.4", "fsl,qoriq-pcie";
+ device_type = "pci";
+ #size-cells = <2>;
+ #address-cells = <3>;
+ bus-range = <0x0 0xff>;
+ interrupts = <22 2 0 0>;
+ fsl,iommu-parent = <&pamu0>;
+ pcie@0 {
+ reg = <0 0 0 0 0>;
+ #interrupt-cells = <1>;
+ #size-cells = <2>;
+ #address-cells = <3>;
+ device_type = "pci";
+ interrupts = <22 2 0 0>;
+ interrupt-map-mask = <0xf800 0 0 7>;
+ interrupt-map = <
+ /* IDSEL 0x0 */
+ 0000 0 0 1 &mpic 42 1 0 0
+ 0000 0 0 2 &mpic 9 1 0 0
+ 0000 0 0 3 &mpic 10 1 0 0
+ 0000 0 0 4 &mpic 11 1 0 0
+ >;
+ };
+};
+
+&dcsr {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "fsl,dcsr", "simple-bus";
+
+ dcsr-epu@0 {
+ compatible = "fsl,t1023-dcsr-epu", "fsl,dcsr-epu";
+ interrupts = <52 2 0 0
+ 84 2 0 0
+ 85 2 0 0>;
+ reg = <0x0 0x1000>;
+ };
+ dcsr-npc {
+ compatible = "fsl,t1023-dcsr-cnpc", "fsl,dcsr-cnpc";
+ reg = <0x1000 0x1000 0x1002000 0x10000>;
+ };
+ dcsr-nxc@2000 {
+ compatible = "fsl,dcsr-nxc";
+ reg = <0x2000 0x1000>;
+ };
+ dcsr-corenet {
+ compatible = "fsl,dcsr-corenet";
+ reg = <0x8000 0x1000 0x1A000 0x1000>;
+ };
+ dcsr-ocn@11000 {
+ compatible = "fsl,t1023-dcsr-ocn", "fsl,dcsr-ocn";
+ reg = <0x11000 0x1000>;
+ };
+ dcsr-ddr@12000 {
+ compatible = "fsl,dcsr-ddr";
+ dev-handle = <&ddr1>;
+ reg = <0x12000 0x1000>;
+ };
+ dcsr-nal@18000 {
+ compatible = "fsl,t1023-dcsr-nal", "fsl,dcsr-nal";
+ reg = <0x18000 0x1000>;
+ };
+ dcsr-rcpm@22000 {
+ compatible = "fsl,t1023-dcsr-rcpm", "fsl,dcsr-rcpm";
+ reg = <0x22000 0x1000>;
+ };
+ dcsr-snpc@30000 {
+ compatible = "fsl,t1023-dcsr-snpc", "fsl,dcsr-snpc";
+ reg = <0x30000 0x1000 0x1022000 0x10000>;
+ };
+ dcsr-snpc@31000 {
+ compatible = "fsl,t1023-dcsr-snpc", "fsl,dcsr-snpc";
+ reg = <0x31000 0x1000 0x1042000 0x10000>;
+ };
+ dcsr-cpu-sb-proxy@100000 {
+ compatible = "fsl,dcsr-e5500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+ cpu-handle = <&cpu0>;
+ reg = <0x100000 0x1000 0x101000 0x1000>;
+ };
+ dcsr-cpu-sb-proxy@108000 {
+ compatible = "fsl,dcsr-e5500-sb-proxy", "fsl,dcsr-cpu-sb-proxy";
+ cpu-handle = <&cpu1>;
+ reg = <0x108000 0x1000 0x109000 0x1000>;
+ };
+};
+
+&bportals {
+ #address-cells = <0x1>;
+ #size-cells = <0x1>;
+ compatible = "simple-bus";
+
+ bman-portal@0 {
+ cell-index = <0x0>;
+ compatible = "fsl,bman-portal";
+ reg = <0x0 0x4000>, <0x1000000 0x1000>;
+ interrupts = <105 2 0 0>;
+ };
+ bman-portal@4000 {
+ cell-index = <0x1>;
+ compatible = "fsl,bman-portal";
+ reg = <0x4000 0x4000>, <0x1001000 0x1000>;
+ interrupts = <107 2 0 0>;
+ };
+ bman-portal@8000 {
+ cell-index = <2>;
+ compatible = "fsl,bman-portal";
+ reg = <0x8000 0x4000>, <0x1002000 0x1000>;
+ interrupts = <109 2 0 0>;
+ };
+ bman-portal@c000 {
+ cell-index = <0x3>;
+ compatible = "fsl,bman-portal";
+ reg = <0xc000 0x4000>, <0x1003000 0x1000>;
+ interrupts = <111 2 0 0>;
+ };
+ bman-portal@10000 {
+ cell-index = <0x4>;
+ compatible = "fsl,bman-portal";
+ reg = <0x10000 0x4000>, <0x1004000 0x1000>;
+ interrupts = <113 2 0 0>;
+ };
+ bman-portal@14000 {
+ cell-index = <0x5>;
+ compatible = "fsl,bman-portal";
+ reg = <0x14000 0x4000>, <0x1005000 0x1000>;
+ interrupts = <115 2 0 0>;
+ };
+};
+
+&qportals {
+ #address-cells = <0x1>;
+ #size-cells = <0x1>;
+ compatible = "simple-bus";
+
+ qportal0: qman-portal@0 {
+ compatible = "fsl,qman-portal";
+ reg = <0x0 0x4000>, <0x1000000 0x1000>;
+ interrupts = <104 0x2 0 0>;
+ cell-index = <0x0>;
+ };
+ qportal1: qman-portal@4000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x4000 0x4000>, <0x1001000 0x1000>;
+ interrupts = <106 0x2 0 0>;
+ cell-index = <0x1>;
+ };
+ qportal2: qman-portal@8000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x8000 0x4000>, <0x1002000 0x1000>;
+ interrupts = <108 0x2 0 0>;
+ cell-index = <0x2>;
+ };
+ qportal3: qman-portal@c000 {
+ compatible = "fsl,qman-portal";
+ reg = <0xc000 0x4000>, <0x1003000 0x1000>;
+ interrupts = <110 0x2 0 0>;
+ cell-index = <0x3>;
+ };
+ qportal4: qman-portal@10000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x10000 0x4000>, <0x1004000 0x1000>;
+ interrupts = <112 0x2 0 0>;
+ cell-index = <0x4>;
+ };
+ qportal5: qman-portal@14000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x14000 0x4000>, <0x1005000 0x1000>;
+ interrupts = <114 0x2 0 0>;
+ cell-index = <0x5>;
+ };
+};
+
+&soc {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ device_type = "soc";
+ compatible = "simple-bus";
+
+ soc-sram-error {
+ compatible = "fsl,soc-sram-error";
+ interrupts = <16 2 1 29>;
+ };
+
+ corenet-law@0 {
+ compatible = "fsl,corenet-law";
+ reg = <0x0 0x1000>;
+ fsl,num-laws = <16>;
+ };
+
+ ddr1: memory-controller@8000 {
+ compatible = "fsl,qoriq-memory-controller-v5.0",
+ "fsl,qoriq-memory-controller";
+ reg = <0x8000 0x1000>;
+ interrupts = <16 2 1 23>;
+ };
+
+ cpc: l3-cache-controller@10000 {
+ compatible = "fsl,t1023-l3-cache-controller", "cache";
+ reg = <0x10000 0x1000>;
+ interrupts = <16 2 1 27>;
+ };
+
+ corenet-cf@18000 {
+ compatible = "fsl,corenet2-cf";
+ reg = <0x18000 0x1000>;
+ interrupts = <16 2 1 31>;
+ };
+
+ iommu@20000 {
+ compatible = "fsl,pamu-v1.0", "fsl,pamu";
+ reg = <0x20000 0x1000>;
+ ranges = <0 0x20000 0x1000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ interrupts = <
+ 24 2 0 0
+ 16 2 1 30>;
+ pamu0: pamu@0 {
+ reg = <0 0x1000>;
+ fsl,primary-cache-geometry = <128 1>;
+ fsl,secondary-cache-geometry = <32 2>;
+ };
+ };
+
+/include/ "qoriq-mpic.dtsi"
+
+ guts: global-utilities@e0000 {
+ compatible = "fsl,t1023-device-config", "fsl,qoriq-device-config-2.0";
+ reg = <0xe0000 0xe00>;
+ fsl,has-rstcr;
+ fsl,liodn-bits = <12>;
+ };
+
+/include/ "qoriq-clockgen2.dtsi"
+ global-utilities@e1000 {
+ compatible = "fsl,t1023-clockgen", "fsl,qoriq-clockgen-2.0";
+ };
+
+ rcpm: global-utilities@e2000 {
+ compatible = "fsl,t1023-rcpm", "fsl,qoriq-rcpm-2.1";
+ reg = <0xe2000 0x1000>;
+ };
+
+ sfp: sfp@e8000 {
+ compatible = "fsl,t1023-sfp";
+ reg = <0xe8000 0x1000>;
+ };
+
+ serdes: serdes@ea000 {
+ compatible = "fsl,t1023-serdes";
+ reg = <0xea000 0x4000>;
+ };
+
+ tmu: tmu@f0000 {
+ compatible = "fsl,qoriq-tmu";
+ reg = <0xf0000 0x1000>;
+ interrupts = <18 2 0 0>;
+ fsl,tmu-range = <0xb0000 0xa0026 0x80048 0x30061>;
+ fsl,tmu-calibration =
+ <0x00000000 0x0000000f>,
+ <0x00000001 0x00000017>,
+ <0x00000002 0x0000001e>,
+ <0x00000003 0x00000026>,
+ <0x00000004 0x0000002e>,
+ <0x00000005 0x00000035>,
+ <0x00000006 0x0000003d>,
+ <0x00000007 0x00000044>,
+ <0x00000008 0x0000004c>,
+ <0x00000009 0x00000053>,
+ <0x0000000a 0x0000005b>,
+ <0x0000000b 0x00000064>,
+
+ <0x00010000 0x00000011>,
+ <0x00010001 0x0000001c>,
+ <0x00010002 0x00000024>,
+ <0x00010003 0x0000002b>,
+ <0x00010004 0x00000034>,
+ <0x00010005 0x00000039>,
+ <0x00010006 0x00000042>,
+ <0x00010007 0x0000004c>,
+ <0x00010008 0x00000051>,
+ <0x00010009 0x0000005a>,
+ <0x0001000a 0x00000063>,
+
+ <0x00020000 0x00000013>,
+ <0x00020001 0x00000019>,
+ <0x00020002 0x00000024>,
+ <0x00020003 0x0000002c>,
+ <0x00020004 0x00000035>,
+ <0x00020005 0x0000003d>,
+ <0x00020006 0x00000046>,
+ <0x00020007 0x00000050>,
+ <0x00020008 0x00000059>,
+
+ <0x00030000 0x00000002>,
+ <0x00030001 0x0000000d>,
+ <0x00030002 0x00000019>,
+ <0x00030003 0x00000024>;
+ #thermal-sensor-cells = <1>;
+ };
+
+ thermal-zones {
+ cpu_thermal: cpu-thermal {
+ polling-delay-passive = <1000>;
+ polling-delay = <5000>;
+
+ thermal-sensors = <&tmu 0>;
+
+ trips {
+ cpu_alert: cpu-alert {
+ temperature = <85000>;
+ hysteresis = <2000>;
+ type = "passive";
+ };
+ cpu_crit: cpu-crit {
+ temperature = <95000>;
+ hysteresis = <2000>;
+ type = "critical";
+ };
+ };
+
+ cooling-maps {
+ map0 {
+ trip = <&cpu_alert>;
+ cooling-device =
+ <&cpu0 THERMAL_NO_LIMIT
+ THERMAL_NO_LIMIT>;
+ };
+ map1 {
+ trip = <&cpu_alert>;
+ cooling-device =
+ <&cpu1 THERMAL_NO_LIMIT
+ THERMAL_NO_LIMIT>;
+ };
+ };
+ };
+ };
+
+ scfg: global-utilities@fc000 {
+ compatible = "fsl,t1023-scfg";
+ reg = <0xfc000 0x1000>;
+ };
+
+/include/ "elo3-dma-0.dtsi"
+/include/ "elo3-dma-1.dtsi"
+
+/include/ "qoriq-espi-0.dtsi"
+ spi@110000 {
+ fsl,espi-num-chipselects = <4>;
+ };
+
+/include/ "qoriq-esdhc-0.dtsi"
+ sdhc@114000 {
+ compatible = "fsl,t1023-esdhc", "fsl,esdhc";
+ fsl,iommu-parent = <&pamu0>;
+ fsl,liodn-reg = <&guts 0x530>; /* eSDHCLIODNR */
+ sdhci,auto-cmd12;
+ no-1-8-v;
+ };
+/include/ "qoriq-i2c-0.dtsi"
+/include/ "qoriq-i2c-1.dtsi"
+/include/ "qoriq-duart-0.dtsi"
+/include/ "qoriq-duart-1.dtsi"
+/include/ "qoriq-gpio-0.dtsi"
+/include/ "qoriq-gpio-1.dtsi"
+/include/ "qoriq-gpio-2.dtsi"
+/include/ "qoriq-gpio-3.dtsi"
+/include/ "qoriq-usb2-mph-0.dtsi"
+ usb0: usb@210000 {
+ compatible = "fsl-usb2-mph-v2.5", "fsl-usb2-mph";
+ fsl,iommu-parent = <&pamu0>;
+ fsl,liodn-reg = <&guts 0x520>; /* USB1LIODNR */
+ phy_type = "utmi";
+ port0;
+ };
+/include/ "qoriq-usb2-dr-0.dtsi"
+ usb1: usb@211000 {
+ compatible = "fsl-usb2-dr-v2.5", "fsl-usb2-dr";
+ fsl,iommu-parent = <&pamu0>;
+ fsl,liodn-reg = <&guts 0x524>; /* USB2LIODNR */
+ dr_mode = "host";
+ phy_type = "utmi";
+ };
+/include/ "qoriq-sata2-0.dtsi"
+ sata@220000 {
+ fsl,iommu-parent = <&pamu0>;
+ fsl,liodn-reg = <&guts 0x550>; /* SATA1LIODNR */
+ };
+
+/include/ "qoriq-sec5.0-0.dtsi"
+/include/ "qoriq-qman3.dtsi"
+/include/ "qoriq-bman1.dtsi"
+
+/include/ "qoriq-fman3l-0.dtsi"
+/include/ "qoriq-fman3-0-10g-0-best-effort.dtsi"
+/include/ "qoriq-fman3-0-1g-1.dtsi"
+/include/ "qoriq-fman3-0-1g-2.dtsi"
+/include/ "qoriq-fman3-0-1g-3.dtsi"
+ fman@400000 {
+ enet0: ethernet@e0000 {
+ };
+
+ enet1: ethernet@e2000 {
+ };
+
+ enet2: ethernet@e4000 {
+ };
+
+ enet3: ethernet@e6000 {
+ };
+ };
+};
diff --git a/arch/powerpc/boot/dts/t4240qds.dts b/arch/powerpc/boot/dts/fsl/t1024qds.dts
index 97683f6a2936..9ea7942f914e 100644
--- a/arch/powerpc/boot/dts/t4240qds.dts
+++ b/arch/powerpc/boot/dts/fsl/t1024qds.dts
@@ -1,18 +1,18 @@
/*
- * T4240QDS Device Tree Source
+ * T1024 QDS Device Tree Source
*
- * Copyright 2012 Freescale Semiconductor Inc.
+ * Copyright 2014 Freescale Semiconductor Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
+ * notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
* * Neither the name of Freescale Semiconductor nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
*
*
* ALTERNATIVELY, this software may be distributed under the terms of the
@@ -20,7 +20,7 @@
* Foundation, either version 2 of that License or (at your option) any
* later version.
*
- * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
@@ -32,15 +32,36 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/include/ "fsl/t4240si-pre.dtsi"
+/include/ "t102xsi-pre.dtsi"
/ {
- model = "fsl,T4240QDS";
- compatible = "fsl,T4240QDS";
+ model = "fsl,T1024QDS";
+ compatible = "fsl,T1024QDS";
#address-cells = <2>;
#size-cells = <2>;
interrupt-parent = <&mpic>;
+ reserved-memory {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ bman_fbpr: bman-fbpr {
+ size = <0 0x1000000>;
+ alignment = <0 0x1000000>;
+ };
+
+ qman_fqd: qman-fqd {
+ size = <0 0x400000>;
+ alignment = <0 0x400000>;
+ };
+
+ qman_pfdr: qman-pfdr {
+ size = <0 0x2000000>;
+ alignment = <0 0x2000000>;
+ };
+ };
+
ifc: localbus@ffe124000 {
reg = <0xf 0xfe124000 0 0x2000>;
ranges = <0 0 0xf 0xe8000000 0x08000000
@@ -52,7 +73,6 @@
#size-cells = <1>;
compatible = "cfi-flash";
reg = <0x0 0x0 0x8000000>;
-
bank-width = <2>;
device-width = <1>;
};
@@ -62,37 +82,14 @@
#size-cells = <1>;
compatible = "fsl,ifc-nand";
reg = <0x2 0x0 0x10000>;
-
- partition@0 {
- /* This location must not be altered */
- /* 1MB for u-boot Bootloader Image */
- reg = <0x0 0x00100000>;
- label = "NAND U-Boot Image";
- read-only;
- };
-
- partition@100000 {
- /* 1MB for DTB Image */
- reg = <0x00100000 0x00100000>;
- label = "NAND DTB Image";
- };
-
- partition@200000 {
- /* 10MB for Linux Kernel Image */
- reg = <0x00200000 0x00A00000>;
- label = "NAND Linux Kernel Image";
- };
-
- partition@C00000 {
- /* 500MB for Root file System Image */
- reg = <0x00c00000 0x1F400000>;
- label = "NAND RFS Image";
- };
};
board-control@3,0 {
- compatible = "fsl,t4240qds-fpga", "fsl,fpga-qixis";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "fsl,tetra-fpga", "fsl,fpga-qixis";
reg = <3 0 0x300>;
+ ranges = <0 3 0 0x300>;
};
};
@@ -104,6 +101,14 @@
ranges = <0x00000000 0xf 0x00000000 0x01072000>;
};
+ bportals: bman-portals@ff4000000 {
+ ranges = <0x0 0xf 0xf4000000 0x2000000>;
+ };
+
+ qportals: qman-portals@ff6000000 {
+ ranges = <0x0 0xf 0xf6000000 0x2000000>;
+ };
+
soc: soc@ffe000000 {
ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
reg = <0xf 0xfe000000 0 0x00001000>;
@@ -111,14 +116,42 @@
flash@0 {
#address-cells = <1>;
#size-cells = <1>;
- compatible = "sst,sst25wf040";
+ compatible = "micron,n25q128a11", "jedec,spi-nor"; /* 16MB */
reg = <0>;
- spi-max-frequency = <40000000>; /* input clock */
+ spi-max-frequency = <10000000>;
+ };
+
+ flash@1 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "sst,sst25wf040", "jedec,spi-nor"; /* 512KB */
+ reg = <1>;
+ spi-max-frequency = <10000000>;
+ };
+
+ flash@2 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "eon,en25s64", "jedec,spi-nor"; /* 8MB */
+ reg = <2>;
+ spi-max-frequency = <10000000>;
+ };
+
+ slic@2 {
+ compatible = "maxim,ds26522";
+ reg = <2>;
+ spi-max-frequency = <2000000>;
+ };
+
+ slic@3 {
+ compatible = "maxim,ds26522";
+ reg = <3>;
+ spi-max-frequency = <2000000>;
};
};
i2c@118000 {
- mux@77 {
+ i2c-mux@77 {
compatible = "nxp,pca9547";
reg = <0x77>;
#address-cells = <1>;
@@ -127,36 +160,21 @@
i2c@0 {
#address-cells = <1>;
#size-cells = <0>;
- reg = <0>;
+ reg = <0x0>;
+
+ eeprom@50 {
+ compatible = "atmel,24c512";
+ reg = <0x50>;
+ };
eeprom@51 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c02";
reg = <0x51>;
};
- eeprom@52 {
- compatible = "at24,24c256";
- reg = <0x52>;
- };
- eeprom@53 {
- compatible = "at24,24c256";
- reg = <0x53>;
- };
- eeprom@54 {
- compatible = "at24,24c256";
- reg = <0x54>;
- };
- eeprom@55 {
- compatible = "at24,24c256";
- reg = <0x55>;
- };
- eeprom@56 {
- compatible = "at24,24c256";
- reg = <0x56>;
- };
- rtc@68 {
- compatible = "dallas,ds3232";
- reg = <0x68>;
- interrupts = <0x1 0x1 0 0>;
+
+ eeprom@57 {
+ compatible = "atmel,24c02";
+ reg = <0x57>;
};
};
@@ -176,47 +194,51 @@
reg = <0x41>;
shunt-resistor = <1000>;
};
+ };
- ina220@44 {
- compatible = "ti,ina220";
- reg = <0x44>;
- shunt-resistor = <1000>;
+ i2c@3 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x3>;
+
+ adt7461@4c {
+ /* Thermal Monitor */
+ compatible = "adi,adt7461";
+ reg = <0x4c>;
};
- ina220@45 {
- compatible = "ti,ina220";
- reg = <0x45>;
- shunt-resistor = <1000>;
+ eeprom@55 {
+ compatible = "atmel,24c02";
+ reg = <0x55>;
};
- ina220@46 {
- compatible = "ti,ina220";
- reg = <0x46>;
- shunt-resistor = <1000>;
+ eeprom@56 {
+ compatible = "atmel,24c512";
+ reg = <0x56>;
};
- ina220@47 {
- compatible = "ti,ina220";
- reg = <0x47>;
- shunt-resistor = <1000>;
+ eeprom@57 {
+ compatible = "atmel,24c512";
+ reg = <0x57>;
};
};
};
- };
-
- sdhc@114000 {
- voltage-ranges = <1800 1800 3300 3300>;
+ rtc@68 {
+ compatible = "dallas,ds3232";
+ reg = <0x68>;
+ interrupts = <0x5 0x1 0 0>;
+ };
};
};
pci0: pcie@ffe240000 {
reg = <0xf 0xfe240000 0 0x10000>;
- ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x20000000
- 0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>;
+ ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0 0x10000000
+ 0x01000000 0 0x00000000 0xf 0xf8000000 0 0x00010000>;
pcie@0 {
ranges = <0x02000000 0 0xe0000000
0x02000000 0 0xe0000000
- 0 0x20000000
+ 0 0x10000000
0x01000000 0 0x00000000
0x01000000 0 0x00000000
@@ -226,12 +248,12 @@
pci1: pcie@ffe250000 {
reg = <0xf 0xfe250000 0 0x10000>;
- ranges = <0x02000000 0x0 0xe0000000 0xc 0x20000000 0x0 0x20000000
- 0x01000000 0x0 0x00000000 0xf 0xf8010000 0x0 0x00010000>;
+ ranges = <0x02000000 0 0xe0000000 0xc 0x10000000 0 0x10000000
+ 0x01000000 0 0x00000000 0xf 0xf8010000 0 0x00010000>;
pcie@0 {
ranges = <0x02000000 0 0xe0000000
0x02000000 0 0xe0000000
- 0 0x20000000
+ 0 0x10000000
0x01000000 0 0x00000000
0x01000000 0 0x00000000
@@ -240,44 +262,19 @@
};
pci2: pcie@ffe260000 {
- reg = <0xf 0xfe260000 0 0x1000>;
- ranges = <0x02000000 0 0xe0000000 0xc 0x40000000 0 0x20000000
+ reg = <0xf 0xfe260000 0 0x10000>;
+ ranges = <0x02000000 0 0xe0000000 0xc 0x20000000 0 0x10000000
0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>;
pcie@0 {
ranges = <0x02000000 0 0xe0000000
0x02000000 0 0xe0000000
- 0 0x20000000
-
- 0x01000000 0 0x00000000
- 0x01000000 0 0x00000000
- 0 0x00010000>;
- };
- };
-
- pci3: pcie@ffe270000 {
- reg = <0xf 0xfe270000 0 0x10000>;
- ranges = <0x02000000 0 0xe0000000 0xc 0x60000000 0 0x20000000
- 0x01000000 0 0x00000000 0xf 0xf8030000 0 0x00010000>;
- pcie@0 {
- ranges = <0x02000000 0 0xe0000000
- 0x02000000 0 0xe0000000
- 0 0x20000000
+ 0 0x10000000
0x01000000 0 0x00000000
0x01000000 0 0x00000000
0 0x00010000>;
};
};
- rio: rapidio@ffe0c0000 {
- reg = <0xf 0xfe0c0000 0 0x11000>;
-
- port1 {
- ranges = <0 0 0xc 0x20000000 0 0x10000000>;
- };
- port2 {
- ranges = <0 0 0xc 0x30000000 0 0x10000000>;
- };
- };
};
-/include/ "fsl/t4240si-post.dtsi"
+#include "t1024si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/t1024rdb.dts b/arch/powerpc/boot/dts/fsl/t1024rdb.dts
new file mode 100644
index 000000000000..7d003e07a9fb
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t1024rdb.dts
@@ -0,0 +1,268 @@
+/*
+ * T1024 RDB Device Tree Source
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "t102xsi-pre.dtsi"
+
+/ {
+ model = "fsl,T1024RDB";
+ compatible = "fsl,T1024RDB";
+ #address-cells = <2>;
+ #size-cells = <2>;
+ interrupt-parent = <&mpic>;
+
+ aliases {
+ sg_2500_aqr105_phy4 = &sg_2500_aqr105_phy4;
+ };
+
+ reserved-memory {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ bman_fbpr: bman-fbpr {
+ size = <0 0x1000000>;
+ alignment = <0 0x1000000>;
+ };
+
+ qman_fqd: qman-fqd {
+ size = <0 0x400000>;
+ alignment = <0 0x400000>;
+ };
+
+ qman_pfdr: qman-pfdr {
+ size = <0 0x2000000>;
+ alignment = <0 0x2000000>;
+ };
+ };
+
+ ifc: localbus@ffe124000 {
+ reg = <0xf 0xfe124000 0 0x2000>;
+ ranges = <0 0 0xf 0xe8000000 0x08000000
+ 2 0 0xf 0xff800000 0x00010000
+ 3 0 0xf 0xffdf0000 0x00008000>;
+
+ nor@0,0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "cfi-flash";
+ reg = <0x0 0x0 0x8000000>;
+ bank-width = <2>;
+ device-width = <1>;
+ };
+
+ nand@1,0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "fsl,ifc-nand";
+ reg = <0x2 0x0 0x10000>;
+ };
+
+ board-control@2,0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "fsl,t1024-cpld", "fsl,deepsleep-cpld";
+ reg = <3 0 0x300>;
+ ranges = <0 3 0 0x300>;
+ bank-width = <1>;
+ device-width = <1>;
+ };
+ };
+
+ memory {
+ device_type = "memory";
+ };
+
+ dcsr: dcsr@f00000000 {
+ ranges = <0x00000000 0xf 0x00000000 0x01072000>;
+ };
+
+ bportals: bman-portals@ff4000000 {
+ ranges = <0x0 0xf 0xf4000000 0x2000000>;
+ };
+
+ qportals: qman-portals@ff6000000 {
+ ranges = <0x0 0xf 0xf6000000 0x2000000>;
+ };
+
+ soc: soc@ffe000000 {
+ ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
+ reg = <0xf 0xfe000000 0 0x00001000>;
+ spi@110000 {
+ flash@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "micron,n25q512ax3", "jedec,spi-nor";
+ reg = <0>;
+ spi-max-frequency = <10000000>; /* input clk */
+ };
+
+ slic@1 {
+ compatible = "maxim,ds26522";
+ reg = <1>;
+ spi-max-frequency = <2000000>;
+ };
+
+ slic@2 {
+ compatible = "maxim,ds26522";
+ reg = <2>;
+ spi-max-frequency = <2000000>;
+ };
+ };
+
+ i2c@118000 {
+ adt7461@4c {
+ /* Thermal Monitor */
+ compatible = "adi,adt7461";
+ reg = <0x4c>;
+ };
+
+ current-sensor@40 {
+ compatible = "ti,ina220";
+ reg = <0x40>;
+ shunt-resistor = <1000>;
+ };
+
+ eeprom@50 {
+ compatible = "atmel,24c256";
+ reg = <0x50>;
+ };
+
+ rtc@68 {
+ compatible = "dallas,ds1339";
+ reg = <0x68>;
+ };
+ };
+
+ i2c@118100 {
+ i2c-mux@77 {
+ compatible = "nxp,pca9546";
+ reg = <0x77>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+ };
+
+ fman@400000 {
+ fm1mac1: ethernet@e0000 {
+ phy-handle = <&xg_aqr105_phy3>;
+ phy-connection-type = "xgmii";
+ sleep = <&rcpm 0x80000000>;
+ };
+
+ fm1mac2: ethernet@e2000 {
+ sleep = <&rcpm 0x40000000>;
+ };
+
+ fm1mac3: ethernet@e4000 {
+ phy-handle = <&rgmii_phy2>;
+ phy-connection-type = "rgmii";
+ sleep = <&rcpm 0x20000000>;
+ };
+
+ fm1mac4: ethernet@e6000 {
+ phy-handle = <&rgmii_phy1>;
+ phy-connection-type = "rgmii";
+ sleep = <&rcpm 0x10000000>;
+ };
+
+
+ mdio0: mdio@fc000 {
+ rgmii_phy1: ethernet-phy@2 {
+ reg = <0x2>;
+ };
+ rgmii_phy2: ethernet-phy@6 {
+ reg = <0x6>;
+ };
+ };
+
+ xmdio0: mdio@fd000 {
+ xg_aqr105_phy3: ethernet-phy@1 {
+ compatible = "ethernet-phy-ieee802.3-c45";
+ reg = <0x1>;
+ };
+ sg_2500_aqr105_phy4: ethernet-phy@2 {
+ compatible = "ethernet-phy-ieee802.3-c45";
+ reg = <0x2>;
+ };
+ };
+ };
+ };
+
+ pci0: pcie@ffe240000 {
+ reg = <0xf 0xfe240000 0 0x10000>;
+ ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0 0x10000000
+ 0x01000000 0 0x00000000 0xf 0xf8000000 0 0x00010000>;
+ pcie@0 {
+ ranges = <0x02000000 0 0xe0000000
+ 0x02000000 0 0xe0000000
+ 0 0x10000000
+
+ 0x01000000 0 0x00000000
+ 0x01000000 0 0x00000000
+ 0 0x00010000>;
+ };
+ };
+
+ pci1: pcie@ffe250000 {
+ reg = <0xf 0xfe250000 0 0x10000>;
+ ranges = <0x02000000 0 0xe0000000 0xc 0x10000000 0 0x10000000
+ 0x01000000 0 0x00000000 0xf 0xf8010000 0 0x00010000>;
+ pcie@0 {
+ ranges = <0x02000000 0 0xe0000000
+ 0x02000000 0 0xe0000000
+ 0 0x10000000
+
+ 0x01000000 0 0x00000000
+ 0x01000000 0 0x00000000
+ 0 0x00010000>;
+ };
+ };
+
+ pci2: pcie@ffe260000 {
+ reg = <0xf 0xfe260000 0 0x10000>;
+ ranges = <0x02000000 0 0xe0000000 0xc 0x20000000 0 0x10000000
+ 0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>;
+ pcie@0 {
+ ranges = <0x02000000 0 0xe0000000
+ 0x02000000 0 0xe0000000
+ 0 0x10000000
+
+ 0x01000000 0 0x00000000
+ 0x01000000 0 0x00000000
+ 0 0x00010000>;
+ };
+ };
+};
+
+#include "t1024si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/t1024si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1024si-post.dtsi
new file mode 100644
index 000000000000..bb480346a58d
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t1024si-post.dtsi
@@ -0,0 +1,100 @@
+/*
+ * T1024 Silicon/SoC Device Tree Source (post include)
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "t1023si-post.dtsi"
+
+/ {
+ aliases {
+ vga = &display;
+ display = &display;
+ };
+
+ qe:qe@ffe140000 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ device_type = "qe";
+ compatible = "fsl,qe";
+ ranges = <0x0 0xf 0xfe140000 0x40000>;
+ reg = <0xf 0xfe140000 0 0x480>;
+ fsl,qe-num-riscs = <1>;
+ fsl,qe-num-snums = <28>;
+ brg-frequency = <0>;
+ bus-frequency = <0>;
+ };
+};
+
+&soc {
+ display:display@180000 {
+ compatible = "fsl,t1024-diu", "fsl,diu";
+ reg = <0x180000 1000>;
+ interrupts = <74 2 0 0>;
+ };
+};
+
+&qe {
+ qeic: interrupt-controller@80 {
+ interrupt-controller;
+ compatible = "fsl,qe-ic";
+ #address-cells = <0>;
+ #interrupt-cells = <1>;
+ reg = <0x80 0x80>;
+ interrupts = <95 2 0 0 94 2 0 0>; //high:79 low:78
+ };
+
+ ucc@2000 {
+ cell-index = <1>;
+ reg = <0x2000 0x200>;
+ interrupts = <32>;
+ interrupt-parent = <&qeic>;
+ };
+
+ ucc@2200 {
+ cell-index = <3>;
+ reg = <0x2200 0x200>;
+ interrupts = <34>;
+ interrupt-parent = <&qeic>;
+ };
+
+ muram@10000 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "fsl,qe-muram", "fsl,cpm-muram";
+ ranges = <0x0 0x10000 0x6000>;
+
+ data-only@0 {
+ compatible = "fsl,qe-muram-data", "fsl,cpm-muram-data";
+ reg = <0x0 0x6000>;
+ };
+ };
+};
diff --git a/arch/powerpc/boot/dts/fsl/t102xsi-pre.dtsi b/arch/powerpc/boot/dts/fsl/t102xsi-pre.dtsi
new file mode 100644
index 000000000000..d87ea13164f2
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t102xsi-pre.dtsi
@@ -0,0 +1,95 @@
+/*
+ * T1024/T1023 Silicon/SoC Device Tree Source (pre include)
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/dts-v1/;
+
+/include/ "e5500_power_isa.dtsi"
+
+/ {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ interrupt-parent = <&mpic>;
+
+ aliases {
+ ccsr = &soc;
+ dcsr = &dcsr;
+
+ dma0 = &dma0;
+ dma1 = &dma1;
+ serial0 = &serial0;
+ serial1 = &serial1;
+ serial2 = &serial2;
+ serial3 = &serial3;
+ pci0 = &pci0;
+ pci1 = &pci1;
+ pci2 = &pci2;
+ usb0 = &usb0;
+ usb1 = &usb1;
+ sdhc = &sdhc;
+
+ crypto = &crypto;
+
+ fman0 = &fman0;
+ ethernet0 = &enet0;
+ ethernet1 = &enet1;
+ ethernet2 = &enet2;
+ ethernet3 = &enet3;
+ };
+
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu0: PowerPC,e5500@0 {
+ device_type = "cpu";
+ reg = <0>;
+ clocks = <&clockgen 1 0>;
+ next-level-cache = <&L2_1>;
+ #cooling-cells = <2>;
+ L2_1: l2-cache {
+ next-level-cache = <&cpc>;
+ };
+ };
+ cpu1: PowerPC,e5500@1 {
+ device_type = "cpu";
+ reg = <1>;
+ clocks = <&clockgen 1 1>;
+ next-level-cache = <&L2_2>;
+ #cooling-cells = <2>;
+ L2_2: l2-cache {
+ next-level-cache = <&cpc>;
+ };
+ };
+ };
+};
diff --git a/arch/powerpc/boot/dts/t2081qds.dts b/arch/powerpc/boot/dts/fsl/t1040d4rdb.dts
index 8ec80a71e102..fb6bc02ebb60 100644
--- a/arch/powerpc/boot/dts/t2081qds.dts
+++ b/arch/powerpc/boot/dts/fsl/t1040d4rdb.dts
@@ -1,7 +1,7 @@
/*
- * T2081QDS Device Tree Source
+ * T1040D4RDB Device Tree Source
*
- * Copyright 2013 Freescale Semiconductor Inc.
+ * Copyright 2015 Freescale Semiconductor Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -32,15 +32,15 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/include/ "fsl/t208xsi-pre.dtsi"
-/include/ "t208xqds.dtsi"
+/include/ "t104xsi-pre.dtsi"
+/include/ "t104xd4rdb.dtsi"
/ {
- model = "fsl,T2081QDS";
- compatible = "fsl,T2081QDS";
+ model = "fsl,T1040D4RDB";
+ compatible = "fsl,T1040D4RDB";
#address-cells = <2>;
#size-cells = <2>;
interrupt-parent = <&mpic>;
};
-/include/ "fsl/t2081si-post.dtsi"
+#include "t1040si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/t1040qds.dts b/arch/powerpc/boot/dts/fsl/t1040qds.dts
index 973c29c2f56e..5f76edc7838c 100644
--- a/arch/powerpc/boot/dts/t1040qds.dts
+++ b/arch/powerpc/boot/dts/fsl/t1040qds.dts
@@ -32,7 +32,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/include/ "fsl/t104xsi-pre.dtsi"
+/include/ "t104xsi-pre.dtsi"
/include/ "t104xqds.dtsi"
/ {
@@ -43,4 +43,4 @@
interrupt-parent = <&mpic>;
};
-/include/ "fsl/t1040si-post.dtsi"
+#include "t1040si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts b/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts
new file mode 100644
index 000000000000..d4f5f159d6f2
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * T1040RDB-REV-A Device Tree Source
+ *
+ * Copyright 2014 - 2015 Freescale Semiconductor Inc.
+ *
+ */
+
+#include "t1040rdb.dts"
+
+/ {
+ model = "fsl,T1040RDB-REV-A";
+};
+
+&seville_port0 {
+ label = "ETH5";
+};
+
+&seville_port2 {
+ label = "ETH7";
+};
+
+&seville_port4 {
+ label = "ETH9";
+};
+
+&seville_port6 {
+ label = "ETH11";
+};
diff --git a/arch/powerpc/boot/dts/fsl/t1040rdb.dts b/arch/powerpc/boot/dts/fsl/t1040rdb.dts
new file mode 100644
index 000000000000..4347924e9aa7
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t1040rdb.dts
@@ -0,0 +1,188 @@
+/*
+ * T1040RDB Device Tree Source
+ *
+ * Copyright 2014 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "t104xsi-pre.dtsi"
+/include/ "t104xrdb.dtsi"
+
+/ {
+ model = "fsl,T1040RDB";
+ compatible = "fsl,T1040RDB";
+
+ aliases {
+ phy_sgmii_2 = &phy_sgmii_2;
+ };
+
+ soc@ffe000000 {
+ fman@400000 {
+ ethernet@e0000 {
+ fixed-link = <0 1 1000 0 0>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e2000 {
+ fixed-link = <1 1 1000 0 0>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e4000 {
+ phy-handle = <&phy_sgmii_2>;
+ phy-connection-type = "sgmii";
+ };
+
+ mdio@fc000 {
+ phy_sgmii_2: ethernet-phy@3 {
+ reg = <0x03>;
+ };
+
+ /* VSC8514 QSGMII PHY */
+ phy_qsgmii_0: ethernet-phy@4 {
+ reg = <0x4>;
+ };
+
+ phy_qsgmii_1: ethernet-phy@5 {
+ reg = <0x5>;
+ };
+
+ phy_qsgmii_2: ethernet-phy@6 {
+ reg = <0x6>;
+ };
+
+ phy_qsgmii_3: ethernet-phy@7 {
+ reg = <0x7>;
+ };
+
+ /* VSC8514 QSGMII PHY */
+ phy_qsgmii_4: ethernet-phy@8 {
+ reg = <0x8>;
+ };
+
+ phy_qsgmii_5: ethernet-phy@9 {
+ reg = <0x9>;
+ };
+
+ phy_qsgmii_6: ethernet-phy@a {
+ reg = <0xa>;
+ };
+
+ phy_qsgmii_7: ethernet-phy@b {
+ reg = <0xb>;
+ };
+ };
+ };
+ };
+
+ ifc: localbus@ffe124000 {
+ cpld@3,0 {
+ compatible = "fsl,t104xrdb-cpld", "fsl,deepsleep-cpld";
+ };
+ };
+};
+
+#include "t1040si-post.dtsi"
+
+&seville_switch {
+ status = "okay";
+};
+
+&seville_port0 {
+ managed = "in-band-status";
+ phy-handle = <&phy_qsgmii_0>;
+ phy-mode = "qsgmii";
+ label = "ETH3";
+ status = "okay";
+};
+
+&seville_port1 {
+ managed = "in-band-status";
+ phy-handle = <&phy_qsgmii_1>;
+ phy-mode = "qsgmii";
+ label = "ETH4";
+ status = "okay";
+};
+
+&seville_port2 {
+ managed = "in-band-status";
+ phy-handle = <&phy_qsgmii_2>;
+ phy-mode = "qsgmii";
+ label = "ETH5";
+ status = "okay";
+};
+
+&seville_port3 {
+ managed = "in-band-status";
+ phy-handle = <&phy_qsgmii_3>;
+ phy-mode = "qsgmii";
+ label = "ETH6";
+ status = "okay";
+};
+
+&seville_port4 {
+ managed = "in-band-status";
+ phy-handle = <&phy_qsgmii_4>;
+ phy-mode = "qsgmii";
+ label = "ETH7";
+ status = "okay";
+};
+
+&seville_port5 {
+ managed = "in-band-status";
+ phy-handle = <&phy_qsgmii_5>;
+ phy-mode = "qsgmii";
+ label = "ETH8";
+ status = "okay";
+};
+
+&seville_port6 {
+ managed = "in-band-status";
+ phy-handle = <&phy_qsgmii_6>;
+ phy-mode = "qsgmii";
+ label = "ETH9";
+ status = "okay";
+};
+
+&seville_port7 {
+ managed = "in-band-status";
+ phy-handle = <&phy_qsgmii_7>;
+ phy-mode = "qsgmii";
+ label = "ETH10";
+ status = "okay";
+};
+
+&seville_port8 {
+ status = "okay";
+};
+
+&seville_port9 {
+ status = "okay";
+};
diff --git a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi
index 12e597eea3c8..c9542b73bd7f 100644
--- a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi
@@ -1,7 +1,7 @@
/*
* T1040 Silicon/SoC Device Tree Source (post include)
*
- * Copyright 2013 Freescale Semiconductor Inc.
+ * Copyright 2013 - 2014 Freescale Semiconductor Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -32,10 +32,27 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#include <dt-bindings/thermal/thermal.h>
+
+&bman_fbpr {
+ compatible = "fsl,bman-fbpr";
+ alloc-ranges = <0 0 0x10000 0>;
+};
+
+&qman_fqd {
+ compatible = "fsl,qman-fqd";
+ alloc-ranges = <0 0 0x10000 0>;
+};
+
+&qman_pfdr {
+ compatible = "fsl,qman-pfdr";
+ alloc-ranges = <0 0 0x10000 0>;
+};
+
&ifc {
#address-cells = <2>;
#size-cells = <1>;
- compatible = "fsl,ifc", "simple-bus";
+ compatible = "fsl,ifc";
interrupts = <25 2 0 0>;
};
@@ -218,6 +235,130 @@
};
};
+&bportals {
+ #address-cells = <0x1>;
+ #size-cells = <0x1>;
+ compatible = "simple-bus";
+
+ bman-portal@0 {
+ compatible = "fsl,bman-portal";
+ reg = <0x0 0x4000>, <0x1000000 0x1000>;
+ interrupts = <105 2 0 0>;
+ };
+ bman-portal@4000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x4000 0x4000>, <0x1001000 0x1000>;
+ interrupts = <107 2 0 0>;
+ };
+ bman-portal@8000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x8000 0x4000>, <0x1002000 0x1000>;
+ interrupts = <109 2 0 0>;
+ };
+ bman-portal@c000 {
+ compatible = "fsl,bman-portal";
+ reg = <0xc000 0x4000>, <0x1003000 0x1000>;
+ interrupts = <111 2 0 0>;
+ };
+ bman-portal@10000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x10000 0x4000>, <0x1004000 0x1000>;
+ interrupts = <113 2 0 0>;
+ };
+ bman-portal@14000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x14000 0x4000>, <0x1005000 0x1000>;
+ interrupts = <115 2 0 0>;
+ };
+ bman-portal@18000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x18000 0x4000>, <0x1006000 0x1000>;
+ interrupts = <117 2 0 0>;
+ };
+ bman-portal@1c000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x1c000 0x4000>, <0x1007000 0x1000>;
+ interrupts = <119 2 0 0>;
+ };
+ bman-portal@20000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x20000 0x4000>, <0x1008000 0x1000>;
+ interrupts = <121 2 0 0>;
+ };
+ bman-portal@24000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x24000 0x4000>, <0x1009000 0x1000>;
+ interrupts = <123 2 0 0>;
+ };
+};
+
+&qportals {
+ #address-cells = <0x1>;
+ #size-cells = <0x1>;
+ compatible = "simple-bus";
+
+ qportal0: qman-portal@0 {
+ compatible = "fsl,qman-portal";
+ reg = <0x0 0x4000>, <0x1000000 0x1000>;
+ interrupts = <104 0x2 0 0>;
+ cell-index = <0x0>;
+ };
+ qportal1: qman-portal@4000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x4000 0x4000>, <0x1001000 0x1000>;
+ interrupts = <106 0x2 0 0>;
+ cell-index = <0x1>;
+ };
+ qportal2: qman-portal@8000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x8000 0x4000>, <0x1002000 0x1000>;
+ interrupts = <108 0x2 0 0>;
+ cell-index = <0x2>;
+ };
+ qportal3: qman-portal@c000 {
+ compatible = "fsl,qman-portal";
+ reg = <0xc000 0x4000>, <0x1003000 0x1000>;
+ interrupts = <110 0x2 0 0>;
+ cell-index = <0x3>;
+ };
+ qportal4: qman-portal@10000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x10000 0x4000>, <0x1004000 0x1000>;
+ interrupts = <112 0x2 0 0>;
+ cell-index = <0x4>;
+ };
+ qportal5: qman-portal@14000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x14000 0x4000>, <0x1005000 0x1000>;
+ interrupts = <114 0x2 0 0>;
+ cell-index = <0x5>;
+ };
+ qportal6: qman-portal@18000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x18000 0x4000>, <0x1006000 0x1000>;
+ interrupts = <116 0x2 0 0>;
+ cell-index = <0x6>;
+ };
+ qportal7: qman-portal@1c000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x1c000 0x4000>, <0x1007000 0x1000>;
+ interrupts = <118 0x2 0 0>;
+ cell-index = <0x7>;
+ };
+ qportal8: qman-portal@20000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x20000 0x4000>, <0x1008000 0x1000>;
+ interrupts = <120 0x2 0 0>;
+ cell-index = <0x8>;
+ };
+ qportal9: qman-portal@24000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x24000 0x4000>, <0x1009000 0x1000>;
+ interrupts = <122 0x2 0 0>;
+ cell-index = <0x9>;
+ };
+};
+
&soc {
#address-cells = <1>;
#size-cells = <1>;
@@ -281,83 +422,13 @@
fsl,liodn-bits = <12>;
};
- clockgen: global-utilities@e1000 {
+/include/ "qoriq-clockgen2.dtsi"
+ global-utilities@e1000 {
compatible = "fsl,t1040-clockgen", "fsl,qoriq-clockgen-2.0";
- ranges = <0x0 0xe1000 0x1000>;
- reg = <0xe1000 0x1000>;
- #address-cells = <1>;
- #size-cells = <1>;
-
- sysclk: sysclk {
- #clock-cells = <0>;
- compatible = "fsl,qoriq-sysclk-2.0";
- clock-output-names = "sysclk", "fixed-clock";
- };
-
-
- pll0: pll0@800 {
- #clock-cells = <1>;
- reg = <0x800 4>;
- compatible = "fsl,qoriq-core-pll-2.0";
- clocks = <&sysclk>;
- clock-output-names = "pll0", "pll0-div2", "pll0-div4";
- };
-
- pll1: pll1@820 {
- #clock-cells = <1>;
- reg = <0x820 4>;
- compatible = "fsl,qoriq-core-pll-2.0";
- clocks = <&sysclk>;
- clock-output-names = "pll1", "pll1-div2", "pll1-div4";
- };
-
- mux0: mux0@0 {
- #clock-cells = <0>;
- reg = <0x0 4>;
- compatible = "fsl,qoriq-core-mux-2.0";
- clocks = <&pll0 0>, <&pll0 1>, <&pll0 2>,
- <&pll1 0>, <&pll1 1>, <&pll1 2>;
- clock-names = "pll0", "pll0-div2", "pll1-div4",
- "pll1", "pll1-div2", "pll1-div4";
- clock-output-names = "cmux0";
- };
-
- mux1: mux1@20 {
- #clock-cells = <0>;
- reg = <0x20 4>;
- compatible = "fsl,qoriq-core-mux-2.0";
- clocks = <&pll0 0>, <&pll0 1>, <&pll0 2>,
- <&pll1 0>, <&pll1 1>, <&pll1 2>;
- clock-names = "pll0", "pll0-div2", "pll1-div4",
- "pll1", "pll1-div2", "pll1-div4";
- clock-output-names = "cmux1";
- };
-
- mux2: mux2@40 {
- #clock-cells = <0>;
- reg = <0x40 4>;
- compatible = "fsl,qoriq-core-mux-2.0";
- clocks = <&pll0 0>, <&pll0 1>, <&pll0 2>,
- <&pll1 0>, <&pll1 1>, <&pll1 2>;
- clock-names = "pll0", "pll0-div2", "pll1-div4",
- "pll1", "pll1-div2", "pll1-div4";
- clock-output-names = "cmux2";
- };
-
- mux3: mux3@60 {
- #clock-cells = <0>;
- reg = <0x60 4>;
- compatible = "fsl,qoriq-core-mux-2.0";
- clocks = <&pll0 0>, <&pll0 1>, <&pll0 2>,
- <&pll1 0>, <&pll1 1>, <&pll1 2>;
- clock-names = "pll0_0", "pll0_1", "pll0_2",
- "pll1_0", "pll1_1", "pll1_2";
- clock-output-names = "cmux3";
- };
};
rcpm: global-utilities@e2000 {
- compatible = "fsl,t1040-rcpm", "fsl,qoriq-rcpm-2.0";
+ compatible = "fsl,t1040-rcpm", "fsl,qoriq-rcpm-2.1";
reg = <0xe2000 0x1000>;
};
@@ -371,6 +442,104 @@
reg = <0xea000 0x4000>;
};
+ tmu: tmu@f0000 {
+ compatible = "fsl,qoriq-tmu";
+ reg = <0xf0000 0x1000>;
+ interrupts = <18 2 0 0>;
+ fsl,tmu-range = <0xa0000 0x90026 0x8004a 0x1006a>;
+ fsl,tmu-calibration =
+ <0x00000000 0x00000025>,
+ <0x00000001 0x00000028>,
+ <0x00000002 0x0000002d>,
+ <0x00000003 0x00000031>,
+ <0x00000004 0x00000036>,
+ <0x00000005 0x0000003a>,
+ <0x00000006 0x00000040>,
+ <0x00000007 0x00000044>,
+ <0x00000008 0x0000004a>,
+ <0x00000009 0x0000004f>,
+ <0x0000000a 0x00000054>,
+
+ <0x00010000 0x0000000d>,
+ <0x00010001 0x00000013>,
+ <0x00010002 0x00000019>,
+ <0x00010003 0x0000001f>,
+ <0x00010004 0x00000025>,
+ <0x00010005 0x0000002d>,
+ <0x00010006 0x00000033>,
+ <0x00010007 0x00000043>,
+ <0x00010008 0x0000004b>,
+ <0x00010009 0x00000053>,
+
+ <0x00020000 0x00000010>,
+ <0x00020001 0x00000017>,
+ <0x00020002 0x0000001f>,
+ <0x00020003 0x00000029>,
+ <0x00020004 0x00000031>,
+ <0x00020005 0x0000003c>,
+ <0x00020006 0x00000042>,
+ <0x00020007 0x0000004d>,
+ <0x00020008 0x00000056>,
+
+ <0x00030000 0x00000012>,
+ <0x00030001 0x0000001d>;
+ #thermal-sensor-cells = <1>;
+ };
+
+ thermal-zones {
+ cpu_thermal: cpu-thermal {
+ polling-delay-passive = <1000>;
+ polling-delay = <5000>;
+
+ thermal-sensors = <&tmu 2>;
+
+ trips {
+ cpu_alert: cpu-alert {
+ temperature = <85000>;
+ hysteresis = <2000>;
+ type = "passive";
+ };
+ cpu_crit: cpu-crit {
+ temperature = <95000>;
+ hysteresis = <2000>;
+ type = "critical";
+ };
+ };
+
+ cooling-maps {
+ map0 {
+ trip = <&cpu_alert>;
+ cooling-device =
+ <&cpu0 THERMAL_NO_LIMIT
+ THERMAL_NO_LIMIT>;
+ };
+ map1 {
+ trip = <&cpu_alert>;
+ cooling-device =
+ <&cpu1 THERMAL_NO_LIMIT
+ THERMAL_NO_LIMIT>;
+ };
+ map2 {
+ trip = <&cpu_alert>;
+ cooling-device =
+ <&cpu2 THERMAL_NO_LIMIT
+ THERMAL_NO_LIMIT>;
+ };
+ map3 {
+ trip = <&cpu_alert>;
+ cooling-device =
+ <&cpu3 THERMAL_NO_LIMIT
+ THERMAL_NO_LIMIT>;
+ };
+ };
+ };
+ };
+
+ scfg: global-utilities@fc000 {
+ compatible = "fsl,t1040-scfg";
+ reg = <0xfc000 0x1000>;
+ };
+
/include/ "elo3-dma-0.dtsi"
/include/ "elo3-dma-1.dtsi"
/include/ "qoriq-espi-0.dtsi"
@@ -395,7 +564,7 @@
/include/ "qoriq-gpio-3.dtsi"
/include/ "qoriq-usb2-mph-0.dtsi"
usb0: usb@210000 {
- compatible = "fsl-usb2-mph-v2.4", "fsl-usb2-mph";
+ compatible = "fsl-usb2-mph-v2.5", "fsl-usb2-mph";
fsl,iommu-parent = <&pamu0>;
fsl,liodn-reg = <&guts 0x520>; /* USB1LIODNR */
phy_type = "utmi";
@@ -403,7 +572,7 @@
};
/include/ "qoriq-usb2-dr-0.dtsi"
usb1: usb@211000 {
- compatible = "fsl-usb2-dr-v2.4", "fsl-usb2-dr";
+ compatible = "fsl-usb2-dr-v2.5", "fsl-usb2-dr";
fsl,iommu-parent = <&pamu0>;
fsl,liodn-reg = <&guts 0x524>; /* USB2LIODNR */
dr_mode = "host";
@@ -427,4 +596,162 @@
fsl,liodn-reg = <&guts 0x554>; /* SATA2LIODNR */
};
/include/ "qoriq-sec5.0-0.dtsi"
+/include/ "qoriq-qman3.dtsi"
+/include/ "qoriq-bman1.dtsi"
+
+/include/ "qoriq-fman3l-0.dtsi"
+/include/ "qoriq-fman3-0-1g-0.dtsi"
+/include/ "qoriq-fman3-0-1g-1.dtsi"
+/include/ "qoriq-fman3-0-1g-2.dtsi"
+/include/ "qoriq-fman3-0-1g-3.dtsi"
+/include/ "qoriq-fman3-0-1g-4.dtsi"
+ fman@400000 {
+ enet0: ethernet@e0000 {
+ };
+
+ enet1: ethernet@e2000 {
+ };
+
+ enet2: ethernet@e4000 {
+ };
+
+ enet3: ethernet@e6000 {
+ };
+
+ enet4: ethernet@e8000 {
+ };
+
+ mdio@fc000 {
+ interrupts = <100 1 0 0>;
+ };
+
+ mdio@fd000 {
+ status = "disabled";
+ };
+ };
+
+ seville_switch: ethernet-switch@800000 {
+ compatible = "mscc,vsc9953-switch";
+ reg = <0x800000 0x290000>;
+ interrupts = <26 2 0 0>;
+ interrupt-names = "xtr";
+ little-endian;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ seville_port0: port@0 {
+ reg = <0>;
+ status = "disabled";
+ };
+
+ seville_port1: port@1 {
+ reg = <1>;
+ status = "disabled";
+ };
+
+ seville_port2: port@2 {
+ reg = <2>;
+ status = "disabled";
+ };
+
+ seville_port3: port@3 {
+ reg = <3>;
+ status = "disabled";
+ };
+
+ seville_port4: port@4 {
+ reg = <4>;
+ status = "disabled";
+ };
+
+ seville_port5: port@5 {
+ reg = <5>;
+ status = "disabled";
+ };
+
+ seville_port6: port@6 {
+ reg = <6>;
+ status = "disabled";
+ };
+
+ seville_port7: port@7 {
+ reg = <7>;
+ status = "disabled";
+ };
+
+ seville_port8: port@8 {
+ reg = <8>;
+ phy-mode = "internal";
+ ethernet = <&enet0>;
+ status = "disabled";
+
+ fixed-link {
+ speed = <2500>;
+ full-duplex;
+ };
+ };
+
+ seville_port9: port@9 {
+ reg = <9>;
+ phy-mode = "internal";
+ ethernet = <&enet1>;
+ status = "disabled";
+
+ fixed-link {
+ speed = <2500>;
+ full-duplex;
+ };
+ };
+ };
+ };
+};
+
+&qe {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ device_type = "qe";
+ compatible = "fsl,qe";
+ fsl,qe-num-riscs = <1>;
+ fsl,qe-num-snums = <28>;
+
+ qeic: interrupt-controller@80 {
+ interrupt-controller;
+ compatible = "fsl,qe-ic";
+ #address-cells = <0>;
+ #interrupt-cells = <1>;
+ reg = <0x80 0x80>;
+ interrupts = <95 2 0 0 94 2 0 0>; //high:79 low:78
+ };
+
+ ucc@2000 {
+ cell-index = <1>;
+ reg = <0x2000 0x200>;
+ interrupts = <32>;
+ interrupt-parent = <&qeic>;
+ };
+
+ ucc@2200 {
+ cell-index = <3>;
+ reg = <0x2200 0x200>;
+ interrupts = <34>;
+ interrupt-parent = <&qeic>;
+ };
+
+ muram@10000 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "fsl,qe-muram", "fsl,cpm-muram";
+ ranges = <0x0 0x10000 0x6000>;
+
+ data-only@0 {
+ compatible = "fsl,qe-muram-data",
+ "fsl,cpm-muram-data";
+ reg = <0x0 0x6000>;
+ };
+ };
};
diff --git a/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts b/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts
new file mode 100644
index 000000000000..4fa15f48a4c3
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts
@@ -0,0 +1,105 @@
+/*
+ * T1042D4RDB Device Tree Source
+ *
+ * Copyright 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "t104xsi-pre.dtsi"
+/include/ "t104xd4rdb.dtsi"
+
+/ {
+ model = "fsl,T1042D4RDB";
+ compatible = "fsl,T1042D4RDB";
+ #address-cells = <2>;
+ #size-cells = <2>;
+ interrupt-parent = <&mpic>;
+
+ ifc: localbus@ffe124000 {
+ cpld@3,0 {
+ compatible = "fsl,t1040d4rdb-cpld",
+ "fsl,deepsleep-cpld";
+ };
+ };
+
+ soc: soc@ffe000000 {
+ fman0: fman@400000 {
+ ethernet@e0000 {
+ phy-handle = <&phy_sgmii_0>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e2000 {
+ phy-handle = <&phy_sgmii_1>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e4000 {
+ phy-handle = <&phy_sgmii_2>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e6000 {
+ phy-handle = <&phy_rgmii_0>;
+ phy-connection-type = "rgmii";
+ };
+
+ ethernet@e8000 {
+ phy-handle = <&phy_rgmii_1>;
+ phy-connection-type = "rgmii";
+ };
+
+ mdio0: mdio@fc000 {
+ phy_sgmii_0: ethernet-phy@2 {
+ reg = <0x02>;
+ };
+
+ phy_sgmii_1: ethernet-phy@3 {
+ reg = <0x03>;
+ };
+
+ phy_sgmii_2: ethernet-phy@1 {
+ reg = <0x01>;
+ };
+
+ phy_rgmii_0: ethernet-phy@4 {
+ reg = <0x04>;
+ };
+
+ phy_rgmii_1: ethernet-phy@5 {
+ reg = <0x05>;
+ };
+ };
+ };
+ };
+
+};
+
+#include "t1042si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/t1042qds.dts b/arch/powerpc/boot/dts/fsl/t1042qds.dts
index 45bd03752154..90a4a73bb905 100644
--- a/arch/powerpc/boot/dts/t1042qds.dts
+++ b/arch/powerpc/boot/dts/fsl/t1042qds.dts
@@ -32,7 +32,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/include/ "fsl/t104xsi-pre.dtsi"
+/include/ "t104xsi-pre.dtsi"
/include/ "t104xqds.dtsi"
/ {
@@ -43,4 +43,4 @@
interrupt-parent = <&mpic>;
};
-/include/ "fsl/t1042si-post.dtsi"
+#include "t1042si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/t1042rdb.dts b/arch/powerpc/boot/dts/fsl/t1042rdb.dts
new file mode 100644
index 000000000000..099764322b33
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t1042rdb.dts
@@ -0,0 +1,76 @@
+/*
+ * T1042RDB Device Tree Source
+ *
+ * Copyright 2014 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "t104xsi-pre.dtsi"
+/include/ "t104xrdb.dtsi"
+
+/ {
+ model = "fsl,T1042RDB";
+ compatible = "fsl,T1042RDB";
+
+ aliases {
+ phy_sgmii_2 = &phy_sgmii_2;
+ };
+
+ soc@ffe000000 {
+ fman@400000 {
+ ethernet@e0000 {
+ status = "disabled";
+ };
+
+ ethernet@e2000 {
+ status = "disabled";
+ };
+
+ ethernet@e4000 {
+ phy-handle = <&phy_sgmii_2>;
+ phy-connection-type = "sgmii";
+ };
+
+ mdio@fc000 {
+ phy_sgmii_2: ethernet-phy@3 {
+ reg = <0x03>;
+ };
+ };
+ };
+ };
+
+ ifc: localbus@ffe124000 {
+ cpld@3,0 {
+ compatible = "fsl,t104xrdb-cpld", "fsl,deepsleep-cpld";
+ };
+ };
+};
+
+#include "t1042si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/t1042rdb_pi.dts b/arch/powerpc/boot/dts/fsl/t1042rdb_pi.dts
new file mode 100644
index 000000000000..b10cab1a347b
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t1042rdb_pi.dts
@@ -0,0 +1,73 @@
+/*
+ * T1042RDB_PI Device Tree Source
+ *
+ * Copyright 2014 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "t104xsi-pre.dtsi"
+/include/ "t104xrdb.dtsi"
+
+/ {
+ model = "fsl,T1042RDB_PI";
+ compatible = "fsl,T1042RDB_PI";
+
+ ifc: localbus@ffe124000 {
+ cpld@3,0 {
+ compatible = "fsl,t104xrdb-cpld", "fsl,deepsleep-cpld";
+ };
+ };
+
+ soc: soc@ffe000000 {
+ i2c@118000 {
+ rtc@68 {
+ compatible = "dallas,ds1337";
+ reg = <0x68>;
+ interrupts = <0x2 0x1 0 0>;
+ };
+ };
+
+ fman@400000 {
+ ethernet@e0000 {
+ status = "disabled";
+ };
+
+ ethernet@e2000 {
+ status = "disabled";
+ };
+
+ ethernet@e4000 {
+ status = "disabled";
+ };
+ };
+ };
+};
+
+#include "t1042si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/t1042si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1042si-post.dtsi
index 319b74f29724..a5544f93689c 100644
--- a/arch/powerpc/boot/dts/fsl/t1042si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t1042si-post.dtsi
@@ -32,6 +32,6 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/include/ "t1040si-post.dtsi"
+#include "t1040si-post.dtsi"
/* Place holder for ethernet related device tree nodes */
diff --git a/arch/powerpc/boot/dts/fsl/t104xd4rdb.dtsi b/arch/powerpc/boot/dts/fsl/t104xd4rdb.dtsi
new file mode 100644
index 000000000000..863f9431285f
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t104xd4rdb.dtsi
@@ -0,0 +1,253 @@
+/*
+ * T1040D4RDB/T1042D4RDB Device Tree Source
+ *
+ * Copyright 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/ {
+ reserved-memory {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ bman_fbpr: bman-fbpr {
+ size = <0 0x1000000>;
+ alignment = <0 0x1000000>;
+ };
+ qman_fqd: qman-fqd {
+ size = <0 0x400000>;
+ alignment = <0 0x400000>;
+ };
+ qman_pfdr: qman-pfdr {
+ size = <0 0x2000000>;
+ alignment = <0 0x2000000>;
+ };
+ };
+
+ ifc: localbus@ffe124000 {
+ reg = <0xf 0xfe124000 0 0x2000>;
+ ranges = <0 0 0xf 0xe8000000 0x08000000
+ 2 0 0xf 0xff800000 0x00010000
+ 3 0 0xf 0xffdf0000 0x00008000>;
+
+ nor@0,0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "cfi-flash";
+ reg = <0x0 0x0 0x8000000>;
+ bank-width = <2>;
+ device-width = <1>;
+ };
+
+ nand@2,0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "fsl,ifc-nand";
+ reg = <0x2 0x0 0x10000>;
+ };
+
+ cpld@3,0 {
+ compatible = "fsl,t1040d4rdb-cpld";
+ reg = <3 0 0x300>;
+ };
+ };
+
+ memory {
+ device_type = "memory";
+ };
+
+ dcsr: dcsr@f00000000 {
+ ranges = <0x00000000 0xf 0x00000000 0x01072000>;
+ };
+
+ bportals: bman-portals@ff4000000 {
+ ranges = <0x0 0xf 0xf4000000 0x2000000>;
+ };
+
+ qportals: qman-portals@ff6000000 {
+ ranges = <0x0 0xf 0xf6000000 0x2000000>;
+ };
+
+ soc: soc@ffe000000 {
+ ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
+ reg = <0xf 0xfe000000 0 0x00001000>;
+
+ spi@110000 {
+ flash@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "micron,n25q512ax3", "jedec,spi-nor";
+ reg = <0>;
+ /* input clock */
+ spi-max-frequency = <10000000>;
+ };
+ slic@1 {
+ compatible = "maxim,ds26522";
+ reg = <1>;
+ spi-max-frequency = <2000000>; /* input clock */
+ };
+ slic@2 {
+ compatible = "maxim,ds26522";
+ reg = <2>;
+ spi-max-frequency = <2000000>; /* input clock */
+ };
+ };
+ i2c@118000 {
+ hwmon@4c {
+ compatible = "adi,adt7461";
+ reg = <0x4c>;
+ };
+
+ rtc@68 {
+ compatible = "dallas,ds1337";
+ reg = <0x68>;
+ interrupts = <0x2 0x1 0 0>;
+ };
+ };
+
+ i2c@118100 {
+ mux@77 {
+ /*
+ * Child nodes of mux depend on which i2c
+ * devices are connected via the mini PCI
+ * connector slot1, the mini PCI connector
+ * slot2, the HDMI connector, and the PEX
+ * slot. Systems with such devices attached
+ * should provide a wrapper .dts file that
+ * includes this one, and adds those nodes
+ */
+ compatible = "nxp,pca9546";
+ reg = <0x77>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+ };
+
+ };
+
+ pci0: pcie@ffe240000 {
+ reg = <0xf 0xfe240000 0 0x10000>;
+ ranges = <0x02000000 0 0xe0000000 0xc 0x0 0x0 0x10000000
+ 0x01000000 0 0x0 0xf 0xf8000000 0x0 0x00010000>;
+ pcie@0 {
+ ranges = <0x02000000 0 0xe0000000
+ 0x02000000 0 0xe0000000
+ 0 0x10000000
+
+ 0x01000000 0 0x00000000
+ 0x01000000 0 0x00000000
+ 0 0x00010000>;
+ };
+ };
+
+ pci1: pcie@ffe250000 {
+ reg = <0xf 0xfe250000 0 0x10000>;
+ ranges = <0x02000000 0 0xe0000000 0xc 0x10000000 0 0x10000000
+ 0x01000000 0 0 0xf 0xf8010000 0 0x00010000>;
+ pcie@0 {
+ ranges = <0x02000000 0 0xe0000000
+ 0x02000000 0 0xe0000000
+ 0 0x10000000
+
+ 0x01000000 0 0x00000000
+ 0x01000000 0 0x00000000
+ 0 0x00010000>;
+ };
+ };
+
+ pci2: pcie@ffe260000 {
+ reg = <0xf 0xfe260000 0 0x10000>;
+ ranges = <0x02000000 0 0xe0000000 0xc 0x20000000 0 0x10000000
+ 0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>;
+ pcie@0 {
+ ranges = <0x02000000 0 0xe0000000
+ 0x02000000 0 0xe0000000
+ 0 0x10000000
+
+ 0x01000000 0 0x00000000
+ 0x01000000 0 0x00000000
+ 0 0x00010000>;
+ };
+ };
+
+ pci3: pcie@ffe270000 {
+ reg = <0xf 0xfe270000 0 0x10000>;
+ ranges = <0x02000000 0 0xe0000000 0xc 0x30000000 0 0x10000000
+ 0x01000000 0 0x00000000 0xf 0xf8030000 0 0x00010000>;
+ pcie@0 {
+ ranges = <0x02000000 0 0xe0000000
+ 0x02000000 0 0xe0000000
+ 0 0x10000000
+
+ 0x01000000 0 0x00000000
+ 0x01000000 0 0x00000000
+ 0 0x00010000>;
+ };
+ };
+
+ qe: qe@ffe140000 {
+ ranges = <0x0 0xf 0xfe140000 0x40000>;
+ reg = <0xf 0xfe140000 0 0x480>;
+ brg-frequency = <0>;
+ bus-frequency = <0>;
+
+ si1: si@700 {
+ compatible = "fsl,t1040-qe-si";
+ reg = <0x700 0x80>;
+ };
+
+ siram1: siram@1000 {
+ compatible = "fsl,t1040-qe-siram";
+ reg = <0x1000 0x800>;
+ };
+
+ ucc_hdlc: ucc@2000 {
+ compatible = "fsl,ucc-hdlc";
+ rx-clock-name = "clk8";
+ tx-clock-name = "clk9";
+ fsl,rx-sync-clock = "rsync_pin";
+ fsl,tx-sync-clock = "tsync_pin";
+ fsl,tx-timeslot-mask = <0xfffffffe>;
+ fsl,rx-timeslot-mask = <0xfffffffe>;
+ fsl,tdm-framer-type = "e1";
+ fsl,tdm-id = <0>;
+ fsl,siram-entry-id = <0>;
+ fsl,tdm-interface;
+ };
+
+ ucc_serial: ucc@2200 {
+ compatible = "fsl,t1040-ucc-uart";
+ port-number = <0>;
+ rx-clock-name = "brg2";
+ tx-clock-name = "brg2";
+ };
+ };
+};
diff --git a/arch/powerpc/boot/dts/fsl/t104xqds.dtsi b/arch/powerpc/boot/dts/fsl/t104xqds.dtsi
new file mode 100644
index 000000000000..1c329f076f64
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t104xqds.dtsi
@@ -0,0 +1,407 @@
+/*
+ * T104xQDS Device Tree Source
+ *
+ * Copyright 2013 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/ {
+ model = "fsl,T1040QDS";
+ #address-cells = <2>;
+ #size-cells = <2>;
+ interrupt-parent = <&mpic>;
+
+ aliases {
+ emi1_rgmii0 = &t1040mdio0;
+ emi1_rgmii1 = &t1040mdio1;
+ emi1_slot3 = &t1040mdio3;
+ emi1_slot5 = &t1040mdio5;
+ emi1_slot6 = &t1040mdio6;
+ emi1_slot7 = &t1040mdio7;
+ rgmii_phy1 = &rgmii_phy1;
+ rgmii_phy2 = &rgmii_phy2;
+ phy_s3_01 = &phy_s3_01;
+ phy_s3_02 = &phy_s3_02;
+ phy_s3_03 = &phy_s3_03;
+ phy_s3_04 = &phy_s3_04;
+ phy_s5_01 = &phy_s5_01;
+ phy_s5_02 = &phy_s5_02;
+ phy_s5_03 = &phy_s5_03;
+ phy_s5_04 = &phy_s5_04;
+ phy_s6_01 = &phy_s6_01;
+ phy_s6_02 = &phy_s6_02;
+ phy_s6_03 = &phy_s6_03;
+ phy_s6_04 = &phy_s6_04;
+ phy_s7_01 = &phy_s7_01;
+ phy_s7_02 = &phy_s7_02;
+ phy_s7_03 = &phy_s7_03;
+ phy_s7_04 = &phy_s7_04;
+ };
+
+ reserved-memory {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ bman_fbpr: bman-fbpr {
+ size = <0 0x1000000>;
+ alignment = <0 0x1000000>;
+ };
+ qman_fqd: qman-fqd {
+ size = <0 0x400000>;
+ alignment = <0 0x400000>;
+ };
+ qman_pfdr: qman-pfdr {
+ size = <0 0x2000000>;
+ alignment = <0 0x2000000>;
+ };
+ };
+
+ ifc: localbus@ffe124000 {
+ reg = <0xf 0xfe124000 0 0x2000>;
+ ranges = <0 0 0xf 0xe8000000 0x08000000
+ 2 0 0xf 0xff800000 0x00010000
+ 3 0 0xf 0xffdf0000 0x00008000>;
+
+ nor@0,0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "cfi-flash";
+ reg = <0x0 0x0 0x8000000>;
+
+ bank-width = <2>;
+ device-width = <1>;
+ };
+
+ nand@2,0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "fsl,ifc-nand";
+ reg = <0x2 0x0 0x10000>;
+ };
+
+ board-control@3,0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "fsl,fpga-qixis";
+ reg = <3 0 0x300>;
+ ranges = <0 3 0 0x300>;
+
+ mdio-mux-emi1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "mdio-mux-mmioreg", "mdio-mux";
+ mdio-parent-bus = <&mdio0>;
+ reg = <0x54 1>;
+ mux-mask = <0xe0>;
+
+ t1040mdio0: mdio@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x00>;
+ status = "disabled";
+
+ rgmii_phy1: ethernet-phy@1 {
+ reg = <0x1>;
+ };
+ };
+
+ t1040mdio1: mdio@20 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x20>;
+ status = "disabled";
+
+ rgmii_phy2: ethernet-phy@2 {
+ reg = <0x2>;
+ };
+ };
+
+ t1040mdio3: mdio@60 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x60>;
+ status = "disabled";
+
+ phy_s3_01: ethernet-phy@1c {
+ reg = <0x1c>;
+ };
+
+ phy_s3_02: ethernet-phy@1d {
+ reg = <0x1d>;
+ };
+
+ phy_s3_03: ethernet-phy@1e {
+ reg = <0x1e>;
+ };
+
+ phy_s3_04: ethernet-phy@1f {
+ reg = <0x1f>;
+ };
+ };
+
+ t1040mdio5: mdio@a0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0xa0>;
+
+ phy_s5_01: ethernet-phy@1c {
+ reg = <0x14>;
+ };
+
+ phy_s5_02: ethernet-phy@1d {
+ reg = <0x15>;
+ };
+
+ phy_s5_03: ethernet-phy@1e {
+ reg = <0x16>;
+ };
+
+ phy_s5_04: ethernet-phy@1f {
+ reg = <0x17>;
+ };
+ };
+
+ t1040mdio6: mdio@c0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0xc0>;
+
+ phy_s6_01: ethernet-phy@1c {
+ reg = <0x18>;
+ };
+
+ phy_s6_02: ethernet-phy@1d {
+ reg = <0x19>;
+ };
+
+ phy_s6_03: ethernet-phy@1e {
+ reg = <0x1a>;
+ };
+
+ phy_s6_04: ethernet-phy@1f {
+ reg = <0x1b>;
+ };
+ };
+
+ t1040mdio7: mdio@e0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0xe0>;
+ status = "disabled";
+
+ phy_s7_01: ethernet-phy@1c {
+ reg = <0x1c>;
+ };
+
+ phy_s7_02: ethernet-phy@1d {
+ reg = <0x1d>;
+ };
+
+ phy_s7_03: ethernet-phy@1e {
+ reg = <0x1e>;
+ };
+
+ phy_s7_04: ethernet-phy@1f {
+ reg = <0x1f>;
+ };
+ };
+ };
+ };
+ };
+
+ memory {
+ device_type = "memory";
+ };
+
+ dcsr: dcsr@f00000000 {
+ ranges = <0x00000000 0xf 0x00000000 0x01072000>;
+ };
+
+ bportals: bman-portals@ff4000000 {
+ ranges = <0x0 0xf 0xf4000000 0x2000000>;
+ };
+
+ qportals: qman-portals@ff6000000 {
+ ranges = <0x0 0xf 0xf6000000 0x2000000>;
+ };
+
+ soc: soc@ffe000000 {
+ ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
+ reg = <0xf 0xfe000000 0 0x00001000>;
+
+ spi@110000 {
+ flash@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "micron,n25q128a11", "jedec,spi-nor";
+ reg = <0>;
+ spi-max-frequency = <10000000>; /* input clock */
+ };
+ };
+
+ i2c@118000 {
+ i2c-mux@77 {
+ compatible = "nxp,pca9547";
+ reg = <0x77>;
+ };
+ rtc@68 {
+ compatible = "dallas,ds3232";
+ reg = <0x68>;
+ interrupts = <0x1 0x1 0 0>;
+ };
+ };
+
+ fman@400000 {
+ ethernet@e0000 {
+ fixed-link = <0 1 1000 0 0>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e2000 {
+ fixed-link = <1 1 1000 0 0>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e4000 {
+ phy-handle = <&phy_s7_03>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e6000 {
+ phy-handle = <&rgmii_phy1>;
+ phy-connection-type = "rgmii";
+ };
+
+ ethernet@e8000 {
+ phy-handle = <&rgmii_phy2>;
+ phy-connection-type = "rgmii";
+ };
+ };
+ };
+
+ pci0: pcie@ffe240000 {
+ reg = <0xf 0xfe240000 0 0x10000>;
+ ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x10000000
+ 0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>;
+ pcie@0 {
+ ranges = <0x02000000 0 0xe0000000
+ 0x02000000 0 0xe0000000
+ 0 0x10000000
+
+ 0x01000000 0 0x00000000
+ 0x01000000 0 0x00000000
+ 0 0x00010000>;
+ };
+ };
+
+ pci1: pcie@ffe250000 {
+ reg = <0xf 0xfe250000 0 0x10000>;
+ ranges = <0x02000000 0x0 0xe0000000 0xc 0x10000000 0x0 0x10000000
+ 0x01000000 0x0 0x00000000 0xf 0xf8010000 0x0 0x00010000>;
+ pcie@0 {
+ ranges = <0x02000000 0 0xe0000000
+ 0x02000000 0 0xe0000000
+ 0 0x10000000
+
+ 0x01000000 0 0x00000000
+ 0x01000000 0 0x00000000
+ 0 0x00010000>;
+ };
+ };
+
+ pci2: pcie@ffe260000 {
+ reg = <0xf 0xfe260000 0 0x10000>;
+ ranges = <0x02000000 0 0xe0000000 0xc 0x20000000 0 0x10000000
+ 0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>;
+ pcie@0 {
+ ranges = <0x02000000 0 0xe0000000
+ 0x02000000 0 0xe0000000
+ 0 0x10000000
+
+ 0x01000000 0 0x00000000
+ 0x01000000 0 0x00000000
+ 0 0x00010000>;
+ };
+ };
+
+ pci3: pcie@ffe270000 {
+ reg = <0xf 0xfe270000 0 0x10000>;
+ ranges = <0x02000000 0 0xe0000000 0xc 0x30000000 0 0x10000000
+ 0x01000000 0 0x00000000 0xf 0xf8030000 0 0x00010000>;
+ pcie@0 {
+ ranges = <0x02000000 0 0xe0000000
+ 0x02000000 0 0xe0000000
+ 0 0x10000000
+
+ 0x01000000 0 0x00000000
+ 0x01000000 0 0x00000000
+ 0 0x00010000>;
+ };
+ };
+
+ qe: qe@ffe140000 {
+ ranges = <0x0 0xf 0xfe140000 0x40000>;
+ reg = <0xf 0xfe140000 0 0x480>;
+ brg-frequency = <0>;
+ bus-frequency = <0>;
+
+ si1: si@700 {
+ compatible = "fsl,t1040-qe-si";
+ reg = <0x700 0x80>;
+ };
+
+ siram1: siram@1000 {
+ compatible = "fsl,t1040-qe-siram";
+ reg = <0x1000 0x800>;
+ };
+
+ ucc_hdlc: ucc@2000 {
+ compatible = "fsl,ucc-hdlc";
+ rx-clock-name = "clk8";
+ tx-clock-name = "clk9";
+ fsl,rx-sync-clock = "rsync_pin";
+ fsl,tx-sync-clock = "tsync_pin";
+ fsl,tx-timeslot-mask = <0xfffffffe>;
+ fsl,rx-timeslot-mask = <0xfffffffe>;
+ fsl,tdm-framer-type = "e1";
+ fsl,tdm-id = <0>;
+ fsl,siram-entry-id = <0>;
+ fsl,tdm-interface;
+ };
+
+ ucc_serial: ucc@2200 {
+ compatible = "fsl,t1040-ucc-uart";
+ port-number = <0>;
+ rx-clock-name = "brg2";
+ tx-clock-name = "brg2";
+ };
+ };
+};
diff --git a/arch/powerpc/boot/dts/t104xqds.dtsi b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi
index 234f4b596c5b..fc7bec5dcb90 100644
--- a/arch/powerpc/boot/dts/t104xqds.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi
@@ -1,7 +1,7 @@
/*
- * T104xQDS Device Tree Source
+ * T1040RDB/T1042RDB Device Tree Source
*
- * Copyright 2013 Freescale Semiconductor Inc.
+ * Copyright 2014 - 2015 Freescale Semiconductor Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -33,10 +33,30 @@
*/
/ {
- model = "fsl,T1040QDS";
- #address-cells = <2>;
- #size-cells = <2>;
- interrupt-parent = <&mpic>;
+ aliases {
+ phy_rgmii_0 = &phy_rgmii_0;
+ phy_rgmii_1 = &phy_rgmii_1;
+ phy_sgmii_2 = &phy_sgmii_2;
+ };
+
+ reserved-memory {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ bman_fbpr: bman-fbpr {
+ size = <0 0x1000000>;
+ alignment = <0 0x1000000>;
+ };
+ qman_fqd: qman-fqd {
+ size = <0 0x400000>;
+ alignment = <0 0x400000>;
+ };
+ qman_pfdr: qman-pfdr {
+ size = <0 0x2000000>;
+ alignment = <0 0x2000000>;
+ };
+ };
ifc: localbus@ffe124000 {
reg = <0xf 0xfe124000 0 0x2000>;
@@ -49,7 +69,6 @@
#size-cells = <1>;
compatible = "cfi-flash";
reg = <0x0 0x0 0x8000000>;
-
bank-width = <2>;
device-width = <1>;
};
@@ -61,10 +80,7 @@
reg = <0x2 0x0 0x10000>;
};
- board-control@3,0 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "fsl,fpga-qixis";
+ cpld@3,0 {
reg = <3 0 0x300>;
};
};
@@ -77,6 +93,14 @@
ranges = <0x00000000 0xf 0x00000000 0x01072000>;
};
+ bportals: bman-portals@ff4000000 {
+ ranges = <0x0 0xf 0xf4000000 0x2000000>;
+ };
+
+ qportals: qman-portals@ff6000000 {
+ ranges = <0x0 0xf 0xf6000000 0x2000000>;
+ };
+
soc: soc@ffe000000 {
ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
reg = <0xf 0xfe000000 0 0x00001000>;
@@ -85,21 +109,56 @@
flash@0 {
#address-cells = <1>;
#size-cells = <1>;
- compatible = "micron,n25q128a11";
+ compatible = "micron,n25q512ax3", "jedec,spi-nor";
reg = <0>;
spi-max-frequency = <10000000>; /* input clock */
};
+ slic@3 {
+ compatible = "maxim,ds26522";
+ reg = <3>;
+ spi-max-frequency = <2000000>; /* input clock */
+ };
};
i2c@118000 {
- pca9547@77 {
- compatible = "philips,pca9547";
+ adt7461@4c {
+ compatible = "adi,adt7461";
+ reg = <0x4c>;
+ };
+ };
+
+ i2c@118100 {
+ i2c-mux@77 {
+ compatible = "nxp,pca9546";
reg = <0x77>;
+ #address-cells = <1>;
+ #size-cells = <0>;
};
- rtc@68 {
- compatible = "dallas,ds3232";
- reg = <0x68>;
- interrupts = <0x1 0x1 0 0>;
+ };
+
+ fman@400000 {
+ ethernet@e6000 {
+ phy-handle = <&phy_rgmii_0>;
+ phy-connection-type = "rgmii-id";
+ };
+
+ ethernet@e8000 {
+ phy-handle = <&phy_rgmii_1>;
+ phy-connection-type = "rgmii-id";
+ };
+
+ mdio0: mdio@fc000 {
+ phy_sgmii_2: ethernet-phy@3 {
+ reg = <0x03>;
+ };
+
+ phy_rgmii_0: ethernet-phy@1 {
+ reg = <0x01>;
+ };
+
+ phy_rgmii_1: ethernet-phy@2 {
+ reg = <0x02>;
+ };
};
};
};
@@ -163,4 +222,42 @@
0 0x00010000>;
};
};
+
+ qe: qe@ffe140000 {
+ ranges = <0x0 0xf 0xfe140000 0x40000>;
+ reg = <0xf 0xfe140000 0 0x480>;
+ brg-frequency = <0>;
+ bus-frequency = <0>;
+
+ si1: si@700 {
+ compatible = "fsl,t1040-qe-si";
+ reg = <0x700 0x80>;
+ };
+
+ siram1: siram@1000 {
+ compatible = "fsl,t1040-qe-siram";
+ reg = <0x1000 0x800>;
+ };
+
+ ucc_hdlc: ucc@2000 {
+ compatible = "fsl,ucc-hdlc";
+ rx-clock-name = "clk8";
+ tx-clock-name = "clk9";
+ fsl,rx-sync-clock = "rsync_pin";
+ fsl,tx-sync-clock = "tsync_pin";
+ fsl,tx-timeslot-mask = <0xfffffffe>;
+ fsl,rx-timeslot-mask = <0xfffffffe>;
+ fsl,tdm-framer-type = "e1";
+ fsl,tdm-id = <0>;
+ fsl,siram-entry-id = <0>;
+ fsl,tdm-interface;
+ };
+
+ ucc_serial: ucc@2200 {
+ compatible = "fsl,t1040-ucc-uart";
+ port-number = <0>;
+ rx-clock-name = "brg2";
+ tx-clock-name = "brg2";
+ };
+ };
};
diff --git a/arch/powerpc/boot/dts/fsl/t104xsi-pre.dtsi b/arch/powerpc/boot/dts/fsl/t104xsi-pre.dtsi
index bbb7025ca9c2..dd59e4b69480 100644
--- a/arch/powerpc/boot/dts/fsl/t104xsi-pre.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t104xsi-pre.dtsi
@@ -1,7 +1,7 @@
/*
* T1040/T1042 Silicon/SoC Device Tree Source (pre include)
*
- * Copyright 2013 Freescale Semiconductor Inc.
+ * Copyright 2013-2014 Freescale Semiconductor Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -58,6 +58,13 @@
sdhc = &sdhc;
crypto = &crypto;
+
+ fman0 = &fman0;
+ ethernet0 = &enet0;
+ ethernet1 = &enet1;
+ ethernet2 = &enet2;
+ ethernet3 = &enet3;
+ ethernet4 = &enet4;
};
cpus {
@@ -67,8 +74,9 @@
cpu0: PowerPC,e5500@0 {
device_type = "cpu";
reg = <0>;
- clocks = <&mux0>;
+ clocks = <&clockgen 1 0>;
next-level-cache = <&L2_1>;
+ #cooling-cells = <2>;
L2_1: l2-cache {
next-level-cache = <&cpc>;
};
@@ -76,8 +84,9 @@
cpu1: PowerPC,e5500@1 {
device_type = "cpu";
reg = <1>;
- clocks = <&mux1>;
+ clocks = <&clockgen 1 1>;
next-level-cache = <&L2_2>;
+ #cooling-cells = <2>;
L2_2: l2-cache {
next-level-cache = <&cpc>;
};
@@ -85,8 +94,9 @@
cpu2: PowerPC,e5500@2 {
device_type = "cpu";
reg = <2>;
- clocks = <&mux2>;
+ clocks = <&clockgen 1 2>;
next-level-cache = <&L2_3>;
+ #cooling-cells = <2>;
L2_3: l2-cache {
next-level-cache = <&cpc>;
};
@@ -94,8 +104,9 @@
cpu3: PowerPC,e5500@3 {
device_type = "cpu";
reg = <3>;
- clocks = <&mux3>;
+ clocks = <&clockgen 1 3>;
next-level-cache = <&L2_4>;
+ #cooling-cells = <2>;
L2_4: l2-cache {
next-level-cache = <&cpc>;
};
diff --git a/arch/powerpc/boot/dts/fsl/t2080qds.dts b/arch/powerpc/boot/dts/fsl/t2080qds.dts
new file mode 100644
index 000000000000..8d190e8c62ce
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t2080qds.dts
@@ -0,0 +1,213 @@
+/*
+ * T2080QDS Device Tree Source
+ *
+ * Copyright 2013 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "t208xsi-pre.dtsi"
+/include/ "t208xqds.dtsi"
+
+/ {
+ model = "fsl,T2080QDS";
+ compatible = "fsl,T2080QDS";
+ #address-cells = <2>;
+ #size-cells = <2>;
+ interrupt-parent = <&mpic>;
+
+ aliases {
+ emi1_slot1 = &t2080mdio2;
+ emi1_slot2 = &t2080mdio3;
+ emi1_slot3 = &t2080mdio4;
+ };
+
+ rio: rapidio@ffe0c0000 {
+ reg = <0xf 0xfe0c0000 0 0x11000>;
+
+ port1 {
+ ranges = <0 0 0xc 0x20000000 0 0x10000000>;
+ };
+ port2 {
+ ranges = <0 0 0xc 0x30000000 0 0x10000000>;
+ };
+ };
+};
+
+&soc {
+ fman@400000 {
+ ethernet@e0000 {
+ phy-handle = <&phy_sgmii_s3_1e>;
+ phy-connection-type = "xgmii";
+ };
+
+ ethernet@e2000 {
+ phy-handle = <&phy_sgmii_s3_1f>;
+ phy-connection-type = "xgmii";
+ };
+
+ ethernet@e4000 {
+ phy-handle = <&rgmii_phy1>;
+ phy-connection-type = "rgmii";
+ };
+
+ ethernet@e6000 {
+ phy-handle = <&rgmii_phy2>;
+ phy-connection-type = "rgmii";
+ };
+
+ ethernet@e8000 {
+ phy-handle = <&phy_sgmii_s2_1e>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@ea000 {
+ phy-handle = <&phy_sgmii_s2_1d>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@f0000 {
+ phy-handle = <&phy_xaui_slot3>;
+ phy-connection-type = "xgmii";
+ };
+
+ ethernet@f2000 {
+ phy-handle = <&phy_sgmii_s3_1f>;
+ phy-connection-type = "xgmii";
+ };
+
+ mdio@fd000 {
+ phy_xaui_slot3: ethernet-phy@3 {
+ compatible = "ethernet-phy-ieee802.3-c45";
+ reg = <0x3>;
+ };
+ };
+ };
+};
+
+&boardctrl {
+ mdio-mux-emi1 {
+ compatible = "mdio-mux-mmioreg", "mdio-mux";
+ mdio-parent-bus = <&mdio0>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x54 1>;
+ mux-mask = <0xe0>;
+
+ t2080mdio0: mdio@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+
+ rgmii_phy1: ethernet-phy@1 {
+ reg = <0x1>;
+ };
+ };
+
+ t2080mdio1: mdio@20 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x20>;
+
+ rgmii_phy2: ethernet-phy@2 {
+ reg = <0x2>;
+ };
+ };
+
+ t2080mdio2: mdio@40 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x40>;
+ status = "disabled";
+
+ phy_sgmii_s1_1c: ethernet-phy@1c {
+ reg = <0x1c>;
+ };
+
+ phy_sgmii_s1_1d: ethernet-phy@1d {
+ reg = <0x1d>;
+ };
+
+ phy_sgmii_s1_1e: ethernet-phy@1e {
+ reg = <0x1e>;
+ };
+
+ phy_sgmii_s1_1f: ethernet-phy@1f {
+ reg = <0x1f>;
+ };
+ };
+
+ t2080mdio3: mdio@c0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0xc0>;
+
+ phy_sgmii_s2_1c: ethernet-phy@1c {
+ reg = <0x1c>;
+ };
+
+ phy_sgmii_s2_1d: ethernet-phy@1d {
+ reg = <0x1d>;
+ };
+
+ phy_sgmii_s2_1e: ethernet-phy@1e {
+ reg = <0x1e>;
+ };
+
+ phy_sgmii_s2_1f: ethernet-phy@1f {
+ reg = <0x1f>;
+ };
+ };
+
+ t2080mdio4: mdio@60 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x60>;
+ status = "disabled";
+
+ phy_sgmii_s3_1c: ethernet-phy@1c {
+ reg = <0x1c>;
+ };
+
+ phy_sgmii_s3_1d: ethernet-phy@1d {
+ reg = <0x1d>;
+ };
+
+ phy_sgmii_s3_1e: ethernet-phy@1e {
+ reg = <0x1e>;
+ };
+
+ phy_sgmii_s3_1f: ethernet-phy@1f {
+ reg = <0x1f>;
+ };
+ };
+ };
+};
+
+/include/ "t2080si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/t2080rdb.dts b/arch/powerpc/boot/dts/fsl/t2080rdb.dts
new file mode 100644
index 000000000000..092a400740f8
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t2080rdb.dts
@@ -0,0 +1,122 @@
+/*
+ * T2080PCIe-RDB Board Device Tree Source
+ *
+ * Copyright 2014 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "t208xsi-pre.dtsi"
+/include/ "t208xrdb.dtsi"
+
+/ {
+ model = "fsl,T2080RDB";
+ compatible = "fsl,T2080RDB";
+ #address-cells = <2>;
+ #size-cells = <2>;
+ interrupt-parent = <&mpic>;
+
+ rio: rapidio@ffe0c0000 {
+ reg = <0xf 0xfe0c0000 0 0x11000>;
+
+ port1 {
+ ranges = <0 0 0xc 0x20000000 0 0x10000000>;
+ };
+ port2 {
+ ranges = <0 0 0xc 0x30000000 0 0x10000000>;
+ };
+ };
+};
+
+&soc {
+ fman@400000 {
+ ethernet@e0000 {
+ phy-handle = <&xg_aq1202_phy3>;
+ phy-connection-type = "xgmii";
+ };
+
+ ethernet@e2000 {
+ phy-handle = <&xg_aq1202_phy4>;
+ phy-connection-type = "xgmii";
+ };
+
+ ethernet@e4000 {
+ phy-handle = <&rgmii_phy1>;
+ phy-connection-type = "rgmii";
+ };
+
+ ethernet@e6000 {
+ phy-handle = <&rgmii_phy2>;
+ phy-connection-type = "rgmii";
+ };
+
+ ethernet@f0000 {
+ phy-handle = <&xg_cs4315_phy2>;
+ phy-connection-type = "xgmii";
+ };
+
+ ethernet@f2000 {
+ phy-handle = <&xg_cs4315_phy1>;
+ phy-connection-type = "xgmii";
+ };
+
+ mdio@fc000 {
+ rgmii_phy1: ethernet-phy@1 {
+ reg = <0x1>;
+ };
+ rgmii_phy2: ethernet-phy@2 {
+ reg = <0x2>;
+ };
+ };
+
+ mdio@fd000 {
+ xg_cs4315_phy1: ethernet-phy@c {
+ compatible = "ethernet-phy-id13e5.1002";
+ reg = <0xc>;
+ };
+
+ xg_cs4315_phy2: ethernet-phy@d {
+ compatible = "ethernet-phy-id13e5.1002";
+ reg = <0xd>;
+ };
+
+ xg_aq1202_phy3: ethernet-phy@0 {
+ compatible = "ethernet-phy-ieee802.3-c45";
+ reg = <0x0>;
+ };
+
+ xg_aq1202_phy4: ethernet-phy@1 {
+ compatible = "ethernet-phy-ieee802.3-c45";
+ reg = <0x1>;
+ };
+ };
+ };
+};
+
+/include/ "t2080si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/t2081qds.dts b/arch/powerpc/boot/dts/fsl/t2081qds.dts
new file mode 100644
index 000000000000..fc5c4a30f7ad
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t2081qds.dts
@@ -0,0 +1,265 @@
+/*
+ * T2081QDS Device Tree Source
+ *
+ * Copyright 2013 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "t208xsi-pre.dtsi"
+/include/ "t208xqds.dtsi"
+
+/ {
+ model = "fsl,T2081QDS";
+ compatible = "fsl,T2081QDS";
+ #address-cells = <2>;
+ #size-cells = <2>;
+ interrupt-parent = <&mpic>;
+
+ aliases {
+ emi1_slot1 = &t2081mdio2;
+ emi1_slot2 = &t2081mdio3;
+ emi1_slot3 = &t2081mdio4;
+ emi1_slot5 = &t2081mdio5;
+ emi1_slot6 = &t2081mdio6;
+ emi1_slot7 = &t2081mdio7;
+ };
+};
+
+&soc {
+ fman@400000 {
+ ethernet@e0000 {
+ phy-handle = <&phy_sgmii_s7_1c>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e2000 {
+ phy-handle = <&phy_sgmii_s7_1d>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e4000 {
+ phy-handle = <&rgmii_phy1>;
+ phy-connection-type = "rgmii";
+ };
+
+ ethernet@e6000 {
+ phy-handle = <&rgmii_phy2>;
+ phy-connection-type = "rgmii";
+ };
+
+ ethernet@e8000 {
+ phy-handle = <&phy_sgmii_s3_1c>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@ea000 {
+ phy-handle = <&phy_sgmii_s7_1f>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@f0000 {
+ phy-handle = <&phy_sgmii_s2_1c>;
+ phy-connection-type = "xgmii";
+ };
+
+ ethernet@f2000 {
+ phy-handle = <&phy_sgmii_s7_1e>;
+ phy-connection-type = "xgmii";
+ };
+ };
+};
+
+&boardctrl {
+ mdio-mux-emi1 {
+ compatible = "mdio-mux-mmioreg", "mdio-mux";
+ mdio-parent-bus = <&mdio0>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x54 1>;
+ mux-mask = <0xe0>;
+
+ t2081mdio0: mdio@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+
+ rgmii_phy1: ethernet-phy@1 {
+ reg = <0x1>;
+ };
+ };
+
+ t2081mdio1: mdio@20 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x20>;
+
+ rgmii_phy2: ethernet-phy@2 {
+ reg = <0x2>;
+ };
+ };
+
+ t2081mdio2: mdio@40 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x40>;
+
+ phy_sgmii_s1_1c: ethernet-phy@1c {
+ reg = <0x1c>;
+ };
+
+ phy_sgmii_s1_1d: ethernet-phy@1d {
+ reg = <0x1d>;
+ };
+
+ phy_sgmii_s1_1e: ethernet-phy@1e {
+ reg = <0x1e>;
+ };
+
+ phy_sgmii_s1_1f: ethernet-phy@1f {
+ reg = <0x1f>;
+ };
+ };
+
+ t2081mdio3: mdio@60 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x60>;
+
+ phy_sgmii_s2_1c: ethernet-phy@1c {
+ reg = <0x1c>;
+ };
+
+ phy_sgmii_s2_1d: ethernet-phy@1d {
+ reg = <0x1d>;
+ };
+
+ phy_sgmii_s2_1e: ethernet-phy@1e {
+ reg = <0x1e>;
+ };
+
+ phy_sgmii_s2_1f: ethernet-phy@1f {
+ reg = <0x1f>;
+ };
+ };
+
+ t2081mdio4: mdio@80 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x80>;
+ status = "disabled";
+
+ phy_sgmii_s3_1c: ethernet-phy@1c {
+ reg = <0x1c>;
+ };
+
+ phy_sgmii_s3_1d: ethernet-phy@1d {
+ reg = <0x1d>;
+ };
+
+ phy_sgmii_s3_1e: ethernet-phy@1e {
+ reg = <0x1e>;
+ };
+
+ phy_sgmii_s3_1f: ethernet-phy@1f {
+ reg = <0x1f>;
+ };
+ };
+
+ t2081mdio5: mdio@a0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0xa0>;
+ status = "disabled";
+
+ phy_sgmii_s5_1c: ethernet-phy@1c {
+ reg = <0x1c>;
+ };
+
+ phy_sgmii_s5_1d: ethernet-phy@1d {
+ reg = <0x1d>;
+ };
+
+ phy_sgmii_s5_1e: ethernet-phy@1e {
+ reg = <0x1e>;
+ };
+
+ phy_sgmii_s5_1f: ethernet-phy@1f {
+ reg = <0x1f>;
+ };
+ };
+
+ t2081mdio6: mdio@c0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0xc0>;
+ status = "disabled";
+
+ phy_sgmii_s6_1c: ethernet-phy@1c {
+ reg = <0x1c>;
+ };
+
+ phy_sgmii_s6_1d: ethernet-phy@1d {
+ reg = <0x1d>;
+ };
+
+ phy_sgmii_s6_1e: ethernet-phy@1e {
+ reg = <0x1e>;
+ };
+
+ phy_sgmii_s6_1f: ethernet-phy@1f {
+ reg = <0x1f>;
+ };
+ };
+
+ t2081mdio7: mdio@e0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0xe0>;
+
+ phy_sgmii_s7_1c: ethernet-phy@1c {
+ reg = <0x1c>;
+ };
+
+ phy_sgmii_s7_1d: ethernet-phy@1d {
+ reg = <0x1d>;
+ };
+
+ phy_sgmii_s7_1e: ethernet-phy@1e {
+ reg = <0x1e>;
+ };
+
+ phy_sgmii_s7_1f: ethernet-phy@1f {
+ reg = <0x1f>;
+ };
+ };
+ };
+};
+
+/include/ "t2081si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi
index 97479f0ce630..6bb95878d39d 100644
--- a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi
@@ -1,7 +1,7 @@
/*
* T2081 Silicon/SoC Device Tree Source (post include)
*
- * Copyright 2013 Freescale Semiconductor Inc.
+ * Copyright 2013 - 2014 Freescale Semiconductor Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -32,10 +32,25 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+&bman_fbpr {
+ compatible = "fsl,bman-fbpr";
+ alloc-ranges = <0 0 0x10000 0>;
+};
+
+&qman_fqd {
+ compatible = "fsl,qman-fqd";
+ alloc-ranges = <0 0 0x10000 0>;
+};
+
+&qman_pfdr {
+ compatible = "fsl,qman-pfdr";
+ alloc-ranges = <0 0 0x10000 0>;
+};
+
&ifc {
#address-cells = <2>;
#size-cells = <1>;
- compatible = "fsl,ifc", "simple-bus";
+ compatible = "fsl,ifc";
interrupts = <25 2 0 0>;
};
@@ -224,6 +239,218 @@
};
};
+&bportals {
+ #address-cells = <0x1>;
+ #size-cells = <0x1>;
+ compatible = "simple-bus";
+
+ bman-portal@0 {
+ compatible = "fsl,bman-portal";
+ reg = <0x0 0x4000>, <0x1000000 0x1000>;
+ interrupts = <105 2 0 0>;
+ };
+ bman-portal@4000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x4000 0x4000>, <0x1001000 0x1000>;
+ interrupts = <107 2 0 0>;
+ };
+ bman-portal@8000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x8000 0x4000>, <0x1002000 0x1000>;
+ interrupts = <109 2 0 0>;
+ };
+ bman-portal@c000 {
+ compatible = "fsl,bman-portal";
+ reg = <0xc000 0x4000>, <0x1003000 0x1000>;
+ interrupts = <111 2 0 0>;
+ };
+ bman-portal@10000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x10000 0x4000>, <0x1004000 0x1000>;
+ interrupts = <113 2 0 0>;
+ };
+ bman-portal@14000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x14000 0x4000>, <0x1005000 0x1000>;
+ interrupts = <115 2 0 0>;
+ };
+ bman-portal@18000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x18000 0x4000>, <0x1006000 0x1000>;
+ interrupts = <117 2 0 0>;
+ };
+ bman-portal@1c000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x1c000 0x4000>, <0x1007000 0x1000>;
+ interrupts = <119 2 0 0>;
+ };
+ bman-portal@20000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x20000 0x4000>, <0x1008000 0x1000>;
+ interrupts = <121 2 0 0>;
+ };
+ bman-portal@24000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x24000 0x4000>, <0x1009000 0x1000>;
+ interrupts = <123 2 0 0>;
+ };
+ bman-portal@28000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x28000 0x4000>, <0x100a000 0x1000>;
+ interrupts = <125 2 0 0>;
+ };
+ bman-portal@2c000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x2c000 0x4000>, <0x100b000 0x1000>;
+ interrupts = <127 2 0 0>;
+ };
+ bman-portal@30000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x30000 0x4000>, <0x100c000 0x1000>;
+ interrupts = <129 2 0 0>;
+ };
+ bman-portal@34000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x34000 0x4000>, <0x100d000 0x1000>;
+ interrupts = <131 2 0 0>;
+ };
+ bman-portal@38000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x38000 0x4000>, <0x100e000 0x1000>;
+ interrupts = <133 2 0 0>;
+ };
+ bman-portal@3c000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x3c000 0x4000>, <0x100f000 0x1000>;
+ interrupts = <135 2 0 0>;
+ };
+ bman-portal@40000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x40000 0x4000>, <0x1010000 0x1000>;
+ interrupts = <137 2 0 0>;
+ };
+ bman-portal@44000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x44000 0x4000>, <0x1011000 0x1000>;
+ interrupts = <139 2 0 0>;
+ };
+};
+
+&qportals {
+ #address-cells = <0x1>;
+ #size-cells = <0x1>;
+ compatible = "simple-bus";
+
+ qportal0: qman-portal@0 {
+ compatible = "fsl,qman-portal";
+ reg = <0x0 0x4000>, <0x1000000 0x1000>;
+ interrupts = <104 0x2 0 0>;
+ cell-index = <0x0>;
+ };
+ qportal1: qman-portal@4000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x4000 0x4000>, <0x1001000 0x1000>;
+ interrupts = <106 0x2 0 0>;
+ cell-index = <0x1>;
+ };
+ qportal2: qman-portal@8000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x8000 0x4000>, <0x1002000 0x1000>;
+ interrupts = <108 0x2 0 0>;
+ cell-index = <0x2>;
+ };
+ qportal3: qman-portal@c000 {
+ compatible = "fsl,qman-portal";
+ reg = <0xc000 0x4000>, <0x1003000 0x1000>;
+ interrupts = <110 0x2 0 0>;
+ cell-index = <0x3>;
+ };
+ qportal4: qman-portal@10000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x10000 0x4000>, <0x1004000 0x1000>;
+ interrupts = <112 0x2 0 0>;
+ cell-index = <0x4>;
+ };
+ qportal5: qman-portal@14000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x14000 0x4000>, <0x1005000 0x1000>;
+ interrupts = <114 0x2 0 0>;
+ cell-index = <0x5>;
+ };
+ qportal6: qman-portal@18000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x18000 0x4000>, <0x1006000 0x1000>;
+ interrupts = <116 0x2 0 0>;
+ cell-index = <0x6>;
+ };
+ qportal7: qman-portal@1c000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x1c000 0x4000>, <0x1007000 0x1000>;
+ interrupts = <118 0x2 0 0>;
+ cell-index = <0x7>;
+ };
+ qportal8: qman-portal@20000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x20000 0x4000>, <0x1008000 0x1000>;
+ interrupts = <120 0x2 0 0>;
+ cell-index = <0x8>;
+ };
+ qportal9: qman-portal@24000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x24000 0x4000>, <0x1009000 0x1000>;
+ interrupts = <122 0x2 0 0>;
+ cell-index = <0x9>;
+ };
+ qportal10: qman-portal@28000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x28000 0x4000>, <0x100a000 0x1000>;
+ interrupts = <124 0x2 0 0>;
+ cell-index = <0xa>;
+ };
+ qportal11: qman-portal@2c000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x2c000 0x4000>, <0x100b000 0x1000>;
+ interrupts = <126 0x2 0 0>;
+ cell-index = <0xb>;
+ };
+ qportal12: qman-portal@30000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x30000 0x4000>, <0x100c000 0x1000>;
+ interrupts = <128 0x2 0 0>;
+ cell-index = <0xc>;
+ };
+ qportal13: qman-portal@34000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x34000 0x4000>, <0x100d000 0x1000>;
+ interrupts = <130 0x2 0 0>;
+ cell-index = <0xd>;
+ };
+ qportal14: qman-portal@38000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x38000 0x4000>, <0x100e000 0x1000>;
+ interrupts = <132 0x2 0 0>;
+ cell-index = <0xe>;
+ };
+ qportal15: qman-portal@3c000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x3c000 0x4000>, <0x100f000 0x1000>;
+ interrupts = <134 0x2 0 0>;
+ cell-index = <0xf>;
+ };
+ qportal16: qman-portal@40000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x40000 0x4000>, <0x1010000 0x1000>;
+ interrupts = <136 0x2 0 0>;
+ cell-index = <0x10>;
+ };
+ qportal17: qman-portal@44000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x44000 0x4000>, <0x1011000 0x1000>;
+ interrupts = <138 0x2 0 0>;
+ cell-index = <0x11>;
+ };
+};
+
&soc {
#address-cells = <1>;
#size-cells = <1>;
@@ -305,56 +532,9 @@
fsl,liodn-bits = <12>;
};
- clockgen: global-utilities@e1000 {
+/include/ "qoriq-clockgen2.dtsi"
+ global-utilities@e1000 {
compatible = "fsl,t2080-clockgen", "fsl,qoriq-clockgen-2.0";
- ranges = <0x0 0xe1000 0x1000>;
- reg = <0xe1000 0x1000>;
- #address-cells = <1>;
- #size-cells = <1>;
-
- sysclk: sysclk {
- #clock-cells = <0>;
- compatible = "fsl,qoriq-sysclk-2.0";
- clock-output-names = "sysclk", "fixed-clock";
- };
-
- pll0: pll0@800 {
- #clock-cells = <1>;
- reg = <0x800 4>;
- compatible = "fsl,qoriq-core-pll-2.0";
- clocks = <&sysclk>;
- clock-output-names = "pll0", "pll0-div2", "pll0-div4";
- };
-
- pll1: pll1@820 {
- #clock-cells = <1>;
- reg = <0x820 4>;
- compatible = "fsl,qoriq-core-pll-2.0";
- clocks = <&sysclk>;
- clock-output-names = "pll1", "pll1-div2", "pll1-div4";
- };
-
- mux0: mux0@0 {
- #clock-cells = <0>;
- reg = <0x0 4>;
- compatible = "fsl,qoriq-core-mux-2.0";
- clocks = <&pll0 0>, <&pll0 1>, <&pll0 2>,
- <&pll1 0>, <&pll1 1>, <&pll1 2>;
- clock-names = "pll0", "pll0-div2", "pll1-div4",
- "pll1", "pll1-div2", "pll1-div4";
- clock-output-names = "cmux0";
- };
-
- mux1: mux1@20 {
- #clock-cells = <0>;
- reg = <0x20 4>;
- compatible = "fsl,qoriq-core-mux-2.0";
- clocks = <&pll0 0>, <&pll0 1>, <&pll0 2>,
- <&pll1 0>, <&pll1 1>, <&pll1 2>;
- clock-names = "pll0", "pll0-div2", "pll1-div4",
- "pll1", "pll1-div2", "pll1-div4";
- clock-output-names = "cmux1";
- };
};
rcpm: global-utilities@e2000 {
@@ -410,7 +590,7 @@
/include/ "qoriq-gpio-3.dtsi"
/include/ "qoriq-usb2-mph-0.dtsi"
usb0: usb@210000 {
- compatible = "fsl-usb2-mph-v2.4", "fsl-usb2-mph";
+ compatible = "fsl-usb2-mph-v2.5", "fsl-usb2-mph";
fsl,iommu-parent = <&pamu1>;
fsl,liodn-reg = <&guts 0x520>; /* USB1LIODNR */
phy_type = "utmi";
@@ -418,18 +598,80 @@
};
/include/ "qoriq-usb2-dr-0.dtsi"
usb1: usb@211000 {
- compatible = "fsl-usb2-dr-v2.4", "fsl-usb2-dr";
+ compatible = "fsl-usb2-dr-v2.5", "fsl-usb2-dr";
fsl,iommu-parent = <&pamu1>;
fsl,liodn-reg = <&guts 0x524>; /* USB1LIODNR */
dr_mode = "host";
phy_type = "utmi";
};
/include/ "qoriq-sec5.2-0.dtsi"
+/include/ "qoriq-qman3.dtsi"
+/include/ "qoriq-bman1.dtsi"
+
+/include/ "qoriq-fman3-0.dtsi"
+/include/ "qoriq-fman3-0-10g-2.dtsi"
+/include/ "qoriq-fman3-0-10g-3.dtsi"
+/include/ "qoriq-fman3-0-1g-2.dtsi"
+/include/ "qoriq-fman3-0-1g-3.dtsi"
+/include/ "qoriq-fman3-0-1g-4.dtsi"
+/include/ "qoriq-fman3-0-1g-5.dtsi"
+/include/ "qoriq-fman3-0-10g-0.dtsi"
+/include/ "qoriq-fman3-0-10g-1.dtsi"
+ fman@400000 {
+ enet0: ethernet@e0000 {
+ };
+
+ enet1: ethernet@e2000 {
+ };
+
+ enet2: ethernet@e4000 {
+ };
+
+ enet3: ethernet@e6000 {
+ };
+
+ enet4: ethernet@e8000 {
+ };
+
+ enet5: ethernet@ea000 {
+ };
+
+ enet6: ethernet@f0000 {
+ };
+
+ enet7: ethernet@f2000 {
+ };
+
+ mdio@fc000 {
+ interrupts = <100 1 0 0>;
+ };
+
+ mdio@fd000 {
+ interrupts = <101 1 0 0>;
+ };
+ };
L2_1: l2-cache-controller@c20000 {
/* Cluster 0 L2 cache */
compatible = "fsl,t2080-l2-cache-controller";
reg = <0xc20000 0x40000>;
next-level-cache = <&cpc>;
+ interrupts = <16 2 1 9>;
};
};
+
+&fman0_rx_0x08 {
+ /delete-property/ fsl,fman-10g-port;
+};
+
+&fman0_tx_0x28 {
+ /delete-property/ fsl,fman-10g-port;
+};
+
+&fman0_rx_0x09 {
+ /delete-property/ fsl,fman-10g-port;
+};
+
+&fman0_tx_0x29 {
+ /delete-property/ fsl,fman-10g-port;
+};
diff --git a/arch/powerpc/boot/dts/t208xqds.dtsi b/arch/powerpc/boot/dts/fsl/t208xqds.dtsi
index 555dc6e03d89..962c99941645 100644
--- a/arch/powerpc/boot/dts/t208xqds.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t208xqds.dtsi
@@ -1,7 +1,7 @@
/*
* T2080/T2081 QDS Device Tree Source
*
- * Copyright 2013 Freescale Semiconductor Inc.
+ * Copyright 2013 - 2014 Freescale Semiconductor Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -39,6 +39,25 @@
#size-cells = <2>;
interrupt-parent = <&mpic>;
+ reserved-memory {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ bman_fbpr: bman-fbpr {
+ size = <0 0x1000000>;
+ alignment = <0 0x1000000>;
+ };
+ qman_fqd: qman-fqd {
+ size = <0 0x400000>;
+ alignment = <0 0x400000>;
+ };
+ qman_pfdr: qman-pfdr {
+ size = <0 0x2000000>;
+ alignment = <0 0x2000000>;
+ };
+ };
+
ifc: localbus@ffe124000 {
reg = <0xf 0xfe124000 0 0x2000>;
ranges = <0 0 0xf 0xe8000000 0x08000000
@@ -78,6 +97,14 @@
ranges = <0x00000000 0xf 0x00000000 0x01072000>;
};
+ bportals: bman-portals@ff4000000 {
+ ranges = <0x0 0xf 0xf4000000 0x2000000>;
+ };
+
+ qportals: qman-portals@ff6000000 {
+ ranges = <0x0 0xf 0xf6000000 0x2000000>;
+ };
+
soc: soc@ffe000000 {
ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
reg = <0xf 0xfe000000 0 0x00001000>;
@@ -85,7 +112,7 @@
flash@0 {
#address-cells = <1>;
#size-cells = <1>;
- compatible = "micron,n25q128a11"; /* 16MB */
+ compatible = "micron,n25q128a11", "jedec,spi-nor"; /* 16MB */
reg = <0>;
spi-max-frequency = <40000000>; /* input clock */
};
@@ -93,7 +120,7 @@
flash@1 {
#address-cells = <1>;
#size-cells = <1>;
- compatible = "sst,sst25wf040";
+ compatible = "sst,sst25wf040", "jedec,spi-nor";
reg = <1>;
spi-max-frequency = <35000000>;
};
@@ -101,14 +128,14 @@
flash@2 {
#address-cells = <1>;
#size-cells = <1>;
- compatible = "eon,en25s64";
+ compatible = "eon,en25s64", "jedec,spi-nor";
reg = <2>;
spi-max-frequency = <35000000>;
};
};
i2c@118000 {
- pca9547@77 {
+ i2c-mux@77 {
compatible = "nxp,pca9547";
reg = <0x77>;
#address-cells = <1>;
@@ -120,24 +147,24 @@
reg = <0x0>;
eeprom@50 {
- compatible = "at24,24c512";
+ compatible = "atmel,24c512";
reg = <0x50>;
};
eeprom@51 {
- compatible = "at24,24c02";
+ compatible = "atmel,24c02";
reg = <0x51>;
};
eeprom@57 {
- compatible = "at24,24c02";
+ compatible = "atmel,24c02";
reg = <0x57>;
};
rtc@68 {
compatible = "dallas,ds3232";
reg = <0x68>;
- interrupts = <0x1 0x1 0 0>;
+ interrupts = <0xb 0x1 0 0>;
};
};
@@ -147,7 +174,7 @@
reg = <0x1>;
eeprom@55 {
- compatible = "at24,24c02";
+ compatible = "atmel,24c02";
reg = <0x55>;
};
};
@@ -169,6 +196,17 @@
shunt-resistor = <1000>;
};
};
+
+ i2c@3 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x3>;
+
+ adt7461@4c {
+ compatible = "adi,adt7461";
+ reg = <0x4c>;
+ };
+ };
};
};
diff --git a/arch/powerpc/boot/dts/t208xrdb.dtsi b/arch/powerpc/boot/dts/fsl/t208xrdb.dtsi
index 1481e192e783..ecc3e8c7394c 100644
--- a/arch/powerpc/boot/dts/t208xrdb.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t208xrdb.dtsi
@@ -39,6 +39,25 @@
#size-cells = <2>;
interrupt-parent = <&mpic>;
+ reserved-memory {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ bman_fbpr: bman-fbpr {
+ size = <0 0x1000000>;
+ alignment = <0 0x1000000>;
+ };
+ qman_fqd: qman-fqd {
+ size = <0 0x400000>;
+ alignment = <0 0x400000>;
+ };
+ qman_pfdr: qman-pfdr {
+ size = <0 0x2000000>;
+ alignment = <0 0x2000000>;
+ };
+ };
+
ifc: localbus@ffe124000 {
reg = <0xf 0xfe124000 0 0x2000>;
ranges = <0 0 0xf 0xe8000000 0x08000000
@@ -79,6 +98,14 @@
ranges = <0x00000000 0xf 0x00000000 0x01072000>;
};
+ bportals: bman-portals@ff4000000 {
+ ranges = <0x0 0xf 0xf4000000 0x2000000>;
+ };
+
+ qportals: qman-portals@ff6000000 {
+ ranges = <0x0 0xf 0xf6000000 0x2000000>;
+ };
+
soc: soc@ffe000000 {
ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
reg = <0xf 0xfe000000 0 0x00001000>;
@@ -86,7 +113,7 @@
flash@0 {
#address-cells = <1>;
#size-cells = <1>;
- compatible = "micron,n25q512a";
+ compatible = "micron,n25q512ax3", "jedec,spi-nor";
reg = <0>;
spi-max-frequency = <10000000>; /* input clock */
};
@@ -111,7 +138,7 @@
};
i2c@118100 {
- pca9546@77 {
+ i2c-mux@77 {
compatible = "nxp,pca9546";
reg = <0x77>;
};
diff --git a/arch/powerpc/boot/dts/fsl/t208xsi-pre.dtsi b/arch/powerpc/boot/dts/fsl/t208xsi-pre.dtsi
index e71ceb0e1100..3f745de44284 100644
--- a/arch/powerpc/boot/dts/fsl/t208xsi-pre.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t208xsi-pre.dtsi
@@ -51,6 +51,17 @@
serial3 = &serial3;
crypto = &crypto;
+
+ fman0 = &fman0;
+ ethernet0 = &enet0;
+ ethernet1 = &enet1;
+ ethernet2 = &enet2;
+ ethernet3 = &enet3;
+ ethernet4 = &enet4;
+ ethernet5 = &enet5;
+ ethernet6 = &enet6;
+ ethernet7 = &enet7;
+
pci0 = &pci0;
pci1 = &pci1;
pci2 = &pci2;
@@ -70,28 +81,28 @@
cpu0: PowerPC,e6500@0 {
device_type = "cpu";
reg = <0 1>;
- clocks = <&mux0>;
+ clocks = <&clockgen 1 0>;
next-level-cache = <&L2_1>;
fsl,portid-mapping = <0x80000000>;
};
cpu1: PowerPC,e6500@2 {
device_type = "cpu";
reg = <2 3>;
- clocks = <&mux0>;
+ clocks = <&clockgen 1 0>;
next-level-cache = <&L2_1>;
fsl,portid-mapping = <0x80000000>;
};
cpu2: PowerPC,e6500@4 {
device_type = "cpu";
reg = <4 5>;
- clocks = <&mux0>;
+ clocks = <&clockgen 1 0>;
next-level-cache = <&L2_1>;
fsl,portid-mapping = <0x80000000>;
};
cpu3: PowerPC,e6500@6 {
device_type = "cpu";
reg = <6 7>;
- clocks = <&mux0>;
+ clocks = <&clockgen 1 0>;
next-level-cache = <&L2_1>;
fsl,portid-mapping = <0x80000000>;
};
diff --git a/arch/powerpc/boot/dts/fsl/t4240qds.dts b/arch/powerpc/boot/dts/fsl/t4240qds.dts
new file mode 100644
index 000000000000..128b5798bb97
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t4240qds.dts
@@ -0,0 +1,708 @@
+/*
+ * T4240QDS Device Tree Source
+ *
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "t4240si-pre.dtsi"
+
+/ {
+ model = "fsl,T4240QDS";
+ compatible = "fsl,T4240QDS";
+ #address-cells = <2>;
+ #size-cells = <2>;
+ interrupt-parent = <&mpic>;
+
+ aliases {
+ phy_rgmii1 = &phyrgmii1;
+ phy_rgmii2 = &phyrgmii2;
+ phy_sgmii3 = &phy3;
+ phy_sgmii4 = &phy4;
+ phy_sgmii11 = &phy11;
+ phy_sgmii12 = &phy12;
+ sgmii_phy11 = &sgmiiphy11;
+ sgmii_phy12 = &sgmiiphy12;
+ sgmii_phy13 = &sgmiiphy13;
+ sgmii_phy14 = &sgmiiphy14;
+ sgmii_phy21 = &sgmiiphy21;
+ sgmii_phy22 = &sgmiiphy22;
+ sgmii_phy23 = &sgmiiphy23;
+ sgmii_phy24 = &sgmiiphy24;
+ sgmii_phy31 = &sgmiiphy31;
+ sgmii_phy32 = &sgmiiphy32;
+ sgmii_phy33 = &sgmiiphy33;
+ sgmii_phy34 = &sgmiiphy34;
+ sgmii_phy41 = &sgmiiphy41;
+ sgmii_phy42 = &sgmiiphy42;
+ sgmii_phy43 = &sgmiiphy43;
+ sgmii_phy44 = &sgmiiphy44;
+ phy_xfi1 = &xfiphy1;
+ phy_xfi2 = &xfiphy2;
+ phy_xfi3 = &xfiphy3;
+ phy_xfi4 = &xfiphy4;
+ xfi_pcs_mdio1 = &xfimdio0;
+ xfi_pcs_mdio2 = &xfimdio1;
+ xfi_pcs_mdio3 = &xfimdio2;
+ xfi_pcs_mdio4 = &xfimdio3;
+ emi1_rgmii = &t4240mdio0;
+ emi1_slot1 = &t4240mdio1;
+ emi1_slot2 = &t4240mdio2;
+ emi1_slot3 = &t4240mdio3;
+ emi1_slot4 = &t4240mdio4;
+ };
+
+ ifc: localbus@ffe124000 {
+ reg = <0xf 0xfe124000 0 0x2000>;
+ ranges = <0 0 0xf 0xe8000000 0x08000000
+ 2 0 0xf 0xff800000 0x00010000
+ 3 0 0xf 0xffdf0000 0x00008000>;
+
+ nor@0,0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "cfi-flash";
+ reg = <0x0 0x0 0x8000000>;
+
+ bank-width = <2>;
+ device-width = <1>;
+ };
+
+ nand@2,0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "fsl,ifc-nand";
+ reg = <0x2 0x0 0x10000>;
+
+ partition@0 {
+ /* This location must not be altered */
+ /* 1MB for u-boot Bootloader Image */
+ reg = <0x0 0x00100000>;
+ label = "NAND U-Boot Image";
+ read-only;
+ };
+
+ partition@100000 {
+ /* 1MB for DTB Image */
+ reg = <0x00100000 0x00100000>;
+ label = "NAND DTB Image";
+ };
+
+ partition@200000 {
+ /* 10MB for Linux Kernel Image */
+ reg = <0x00200000 0x00A00000>;
+ label = "NAND Linux Kernel Image";
+ };
+
+ partition@C00000 {
+ /* 500MB for Root file System Image */
+ reg = <0x00c00000 0x1F400000>;
+ label = "NAND RFS Image";
+ };
+ };
+
+ board-control@3,0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "fsl,t4240qds-fpga", "fsl,fpga-qixis";
+ reg = <3 0 0x300>;
+ ranges = <0 3 0 0x300>;
+
+ mdio-mux-emi1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "mdio-mux-mmioreg", "mdio-mux";
+ mdio-parent-bus = <&mdio1>;
+ reg = <0x54 1>;
+ mux-mask = <0xe0>;
+
+ t4240mdio0: mdio@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+
+ phyrgmii1: ethernet-phy@1 {
+ reg = <0x1>;
+ };
+
+ phyrgmii2: ethernet-phy@2 {
+ reg = <0x2>;
+ };
+ };
+
+ t4240mdio1: mdio@20 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x20>;
+ status = "disabled";
+
+ phy1: ethernet-phy@0 {
+ reg = <0x0>;
+ };
+
+ phy2: ethernet-phy@1 {
+ reg = <0x1>;
+ };
+
+ phy3: ethernet-phy@2 {
+ reg = <0x2>;
+ };
+
+ phy4: ethernet-phy@3 {
+ reg = <0x3>;
+ };
+
+ sgmiiphy11: ethernet-phy@1c {
+ reg = <0x1c>;
+ };
+
+ sgmiiphy12: ethernet-phy@1d {
+ reg = <0x1d>;
+ };
+
+ sgmiiphy13: ethernet-phy@1e {
+ reg = <0x1e>;
+ };
+
+ sgmiiphy14: ethernet-phy@1f {
+ reg = <0x1f>;
+ };
+ };
+
+ t4240mdio2: mdio@40 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x40>;
+ status = "disabled";
+
+ phy5: ethernet-phy@4 {
+ reg = <0x4>;
+ };
+
+ phy6: ethernet-phy@5 {
+ reg = <0x5>;
+ };
+
+ phy7: ethernet-phy@6 {
+ reg = <0x6>;
+ };
+
+ phy8: ethernet-phy@7 {
+ reg = <0x7>;
+ };
+
+ sgmiiphy21: ethernet-phy@1c {
+ reg = <0x1c>;
+ };
+
+ sgmiiphy22: ethernet-phy@1d {
+ reg = <0x1d>;
+ };
+
+ sgmiiphy23: ethernet-phy@1e {
+ reg = <0x1e>;
+ };
+
+ sgmiiphy24: ethernet-phy@1f {
+ reg = <0x1f>;
+ };
+ };
+
+ t4240mdio3: mdio@60 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x60>;
+ status = "disabled";
+
+ phy9: ethernet-phy@8 {
+ reg = <0x8>;
+ };
+
+ phy10: ethernet-phy@9 {
+ reg = <0x9>;
+ };
+
+ phy11: ethernet-phy@a {
+ reg = <0xa>;
+ };
+
+ phy12: ethernet-phy@b {
+ reg = <0xb>;
+ };
+
+ sgmiiphy31: ethernet-phy@1c {
+ reg = <0x1c>;
+ };
+
+ sgmiiphy32: ethernet-phy@1d {
+ reg = <0x1d>;
+ };
+
+ sgmiiphy33: ethernet-phy@1e {
+ reg = <0x1e>;
+ };
+
+ sgmiiphy34: ethernet-phy@1f {
+ reg = <0x1f>;
+ };
+ };
+
+ t4240mdio4: mdio@80 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x80>;
+ status = "disabled";
+
+ phy13: ethernet-phy@c {
+ reg = <0xc>;
+ };
+
+ phy14: ethernet-phy@d {
+ reg = <0xd>;
+ };
+
+ phy15: ethernet-phy@e {
+ reg = <0xe>;
+ };
+
+ phy16: ethernet-phy@f {
+ reg = <0xf>;
+ };
+
+ sgmiiphy41: ethernet-phy@1c {
+ reg = <0x1c>;
+ };
+
+ sgmiiphy42: ethernet-phy@1d {
+ reg = <0x1d>;
+ };
+
+ sgmiiphy43: ethernet-phy@1e {
+ reg = <0x1e>;
+ };
+
+ sgmiiphy44: ethernet-phy@1f {
+ reg = <0x1f>;
+ };
+ };
+ };
+ };
+ };
+
+ memory {
+ device_type = "memory";
+ };
+
+ reserved-memory {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ bman_fbpr: bman-fbpr {
+ size = <0 0x1000000>;
+ alignment = <0 0x1000000>;
+ };
+ qman_fqd: qman-fqd {
+ size = <0 0x400000>;
+ alignment = <0 0x400000>;
+ };
+ qman_pfdr: qman-pfdr {
+ size = <0 0x2000000>;
+ alignment = <0 0x2000000>;
+ };
+ };
+
+ dcsr: dcsr@f00000000 {
+ ranges = <0x00000000 0xf 0x00000000 0x01072000>;
+ };
+
+ bportals: bman-portals@ff4000000 {
+ ranges = <0x0 0xf 0xf4000000 0x2000000>;
+ };
+
+ qportals: qman-portals@ff6000000 {
+ ranges = <0x0 0xf 0xf6000000 0x2000000>;
+ };
+
+ soc: soc@ffe000000 {
+ ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
+ reg = <0xf 0xfe000000 0 0x00001000>;
+ spi@110000 {
+ flash@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "sst,sst25wf040", "jedec,spi-nor";
+ reg = <0>;
+ spi-max-frequency = <40000000>; /* input clock */
+ };
+ };
+
+ i2c@118000 {
+ mux@77 {
+ compatible = "nxp,pca9547";
+ reg = <0x77>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ i2c@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+
+ eeprom@51 {
+ compatible = "atmel,24c256";
+ reg = <0x51>;
+ };
+ eeprom@52 {
+ compatible = "atmel,24c256";
+ reg = <0x52>;
+ };
+ eeprom@53 {
+ compatible = "atmel,24c256";
+ reg = <0x53>;
+ };
+ eeprom@54 {
+ compatible = "atmel,24c256";
+ reg = <0x54>;
+ };
+ eeprom@55 {
+ compatible = "atmel,24c256";
+ reg = <0x55>;
+ };
+ eeprom@56 {
+ compatible = "atmel,24c256";
+ reg = <0x56>;
+ };
+ rtc@68 {
+ compatible = "dallas,ds3232";
+ reg = <0x68>;
+ interrupts = <0x1 0x1 0 0>;
+ };
+ };
+
+ i2c@2 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x2>;
+
+ ina220@40 {
+ compatible = "ti,ina220";
+ reg = <0x40>;
+ shunt-resistor = <1000>;
+ };
+
+ ina220@41 {
+ compatible = "ti,ina220";
+ reg = <0x41>;
+ shunt-resistor = <1000>;
+ };
+
+ ina220@44 {
+ compatible = "ti,ina220";
+ reg = <0x44>;
+ shunt-resistor = <1000>;
+ };
+
+ ina220@45 {
+ compatible = "ti,ina220";
+ reg = <0x45>;
+ shunt-resistor = <1000>;
+ };
+
+ ina220@46 {
+ compatible = "ti,ina220";
+ reg = <0x46>;
+ shunt-resistor = <1000>;
+ };
+
+ ina220@47 {
+ compatible = "ti,ina220";
+ reg = <0x47>;
+ shunt-resistor = <1000>;
+ };
+ };
+ };
+ };
+
+ sdhc@114000 {
+ voltage-ranges = <1800 1800 3300 3300>;
+ };
+
+ fman@400000 {
+ port@83000 {
+ status = "disabled";
+ };
+
+ port@84000 {
+ status = "disabled";
+ };
+
+ port@85000 {
+ status = "disabled";
+ };
+
+ port@86000 {
+ status = "disabled";
+ };
+
+ port@87000 {
+ status = "disabled";
+ };
+
+ ethernet@e0000 {
+ phy-handle = <&phy5>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e2000 {
+ phy-handle = <&phy6>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e4000 {
+ phy-handle = <&phy7>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e6000 {
+ phy-handle = <&phy8>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e8000 {
+ phy-handle = <&phyrgmii2>;
+ phy-connection-type = "rgmii";
+ };
+
+ ethernet@ea000 {
+ phy-handle = <&phy2>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@f0000 {
+ phy-handle = <&xauiphy1>;
+ phy-connection-type = "xgmii";
+ };
+
+ ethernet@f2000 {
+ phy-handle = <&xauiphy2>;
+ phy-connection-type = "xgmii";
+ };
+
+ xfimdio0: mdio@f1000 {
+ status = "disabled";
+
+ xfiphy1: ethernet-phy@0 {
+ compatible = "ethernet-phy-ieee802.3-c45";
+ reg = <0x0>;
+ };
+ };
+
+ xfimdio1: mdio@f3000 {
+ status = "disabled";
+
+ xfiphy2: ethernet-phy@0 {
+ compatible = "ethernet-phy-ieee802.3-c45";
+ reg = <0x0>;
+ };
+ };
+ };
+
+ fman@500000 {
+ port@84000 {
+ status = "disabled";
+ };
+
+ port@85000 {
+ status = "disabled";
+ };
+
+ port@86000 {
+ status = "disabled";
+ };
+
+ port@87000 {
+ status = "disabled";
+ };
+
+ ethernet@e0000 {
+ phy-handle = <&phy13>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e2000 {
+ phy-handle = <&phy14>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e4000 {
+ phy-handle = <&phy15>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e6000 {
+ phy-handle = <&phy16>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e8000 {
+ phy-handle = <&phyrgmii1>;
+ phy-connection-type = "rgmii";
+ };
+
+ ethernet@ea000 {
+ phy-handle = <&phy10>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@f0000 {
+ phy-handle = <&xauiphy3>;
+ phy-connection-type = "xgmii";
+ };
+
+ ethernet@f2000 {
+ phy-handle = <&xauiphy4>;
+ phy-connection-type = "xgmii";
+ };
+
+ xfimdio2: mdio@f1000 {
+ status = "disabled";
+
+ xfiphy3: ethernet-phy@0 {
+ compatible = "ethernet-phy-ieee802.3-c45";
+ reg = <0x0>;
+ };
+ };
+
+ xfimdio3: mdio@f3000 {
+ status = "disabled";
+
+ xfiphy4: ethernet-phy@0 {
+ compatible = "ethernet-phy-ieee802.3-c45";
+ reg = <0x0>;
+ };
+ };
+
+ mdio@fd000 {
+ xauiphy1: ethernet-phy@0 {
+ compatible = "ethernet-phy-ieee802.3-c45";
+ reg = <0x0>;
+ };
+
+ xauiphy2: ethernet-phy@1 {
+ compatible = "ethernet-phy-ieee802.3-c45";
+ reg = <0x1>;
+ };
+
+ xauiphy3: ethernet-phy@2 {
+ compatible = "ethernet-phy-ieee802.3-c45";
+ reg = <0x2>;
+ };
+
+ xauiphy4: ethernet-phy@3 {
+ compatible = "ethernet-phy-ieee802.3-c45";
+ reg = <0x3>;
+ };
+ };
+ };
+ };
+
+ pci0: pcie@ffe240000 {
+ reg = <0xf 0xfe240000 0 0x10000>;
+ ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x20000000
+ 0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>;
+ pcie@0 {
+ ranges = <0x02000000 0 0xe0000000
+ 0x02000000 0 0xe0000000
+ 0 0x20000000
+
+ 0x01000000 0 0x00000000
+ 0x01000000 0 0x00000000
+ 0 0x00010000>;
+ };
+ };
+
+ pci1: pcie@ffe250000 {
+ reg = <0xf 0xfe250000 0 0x10000>;
+ ranges = <0x02000000 0x0 0xe0000000 0xc 0x20000000 0x0 0x20000000
+ 0x01000000 0x0 0x00000000 0xf 0xf8010000 0x0 0x00010000>;
+ pcie@0 {
+ ranges = <0x02000000 0 0xe0000000
+ 0x02000000 0 0xe0000000
+ 0 0x20000000
+
+ 0x01000000 0 0x00000000
+ 0x01000000 0 0x00000000
+ 0 0x00010000>;
+ };
+ };
+
+ pci2: pcie@ffe260000 {
+ reg = <0xf 0xfe260000 0 0x1000>;
+ ranges = <0x02000000 0 0xe0000000 0xc 0x40000000 0 0x20000000
+ 0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>;
+ pcie@0 {
+ ranges = <0x02000000 0 0xe0000000
+ 0x02000000 0 0xe0000000
+ 0 0x20000000
+
+ 0x01000000 0 0x00000000
+ 0x01000000 0 0x00000000
+ 0 0x00010000>;
+ };
+ };
+
+ pci3: pcie@ffe270000 {
+ reg = <0xf 0xfe270000 0 0x10000>;
+ ranges = <0x02000000 0 0xe0000000 0xc 0x60000000 0 0x20000000
+ 0x01000000 0 0x00000000 0xf 0xf8030000 0 0x00010000>;
+ pcie@0 {
+ ranges = <0x02000000 0 0xe0000000
+ 0x02000000 0 0xe0000000
+ 0 0x20000000
+
+ 0x01000000 0 0x00000000
+ 0x01000000 0 0x00000000
+ 0 0x00010000>;
+ };
+ };
+ rio: rapidio@ffe0c0000 {
+ reg = <0xf 0xfe0c0000 0 0x11000>;
+
+ port1 {
+ ranges = <0 0 0xc 0x20000000 0 0x10000000>;
+ };
+ port2 {
+ ranges = <0 0 0xc 0x30000000 0 0x10000000>;
+ };
+ };
+};
+
+/include/ "t4240si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/t4240rdb.dts b/arch/powerpc/boot/dts/fsl/t4240rdb.dts
new file mode 100644
index 000000000000..145896f2eef6
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsl/t4240rdb.dts
@@ -0,0 +1,363 @@
+/*
+ * T4240RDB Device Tree Source
+ *
+ * Copyright 2014 - 2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Freescale Semiconductor nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/include/ "t4240si-pre.dtsi"
+
+/ {
+ model = "fsl,T4240RDB";
+ compatible = "fsl,T4240RDB";
+ #address-cells = <2>;
+ #size-cells = <2>;
+ interrupt-parent = <&mpic>;
+
+ aliases {
+ sgmii_phy21 = &sgmiiphy21;
+ sgmii_phy22 = &sgmiiphy22;
+ sgmii_phy23 = &sgmiiphy23;
+ sgmii_phy24 = &sgmiiphy24;
+ sgmii_phy41 = &sgmiiphy41;
+ sgmii_phy42 = &sgmiiphy42;
+ sgmii_phy43 = &sgmiiphy43;
+ sgmii_phy44 = &sgmiiphy44;
+ };
+
+ ifc: localbus@ffe124000 {
+ reg = <0xf 0xfe124000 0 0x2000>;
+ ranges = <0 0 0xf 0xe8000000 0x08000000
+ 2 0 0xf 0xff800000 0x00010000
+ 3 0 0xf 0xffdf0000 0x00008000>;
+
+ nor@0,0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "cfi-flash";
+ reg = <0x0 0x0 0x8000000>;
+
+ bank-width = <2>;
+ device-width = <1>;
+ };
+
+ nand@2,0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "fsl,ifc-nand";
+ reg = <0x2 0x0 0x10000>;
+ };
+ };
+
+ memory {
+ device_type = "memory";
+ };
+
+ reserved-memory {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ bman_fbpr: bman-fbpr {
+ size = <0 0x1000000>;
+ alignment = <0 0x1000000>;
+ };
+ qman_fqd: qman-fqd {
+ size = <0 0x400000>;
+ alignment = <0 0x400000>;
+ };
+ qman_pfdr: qman-pfdr {
+ size = <0 0x2000000>;
+ alignment = <0 0x2000000>;
+ };
+ };
+
+ dcsr: dcsr@f00000000 {
+ ranges = <0x00000000 0xf 0x00000000 0x01072000>;
+ };
+
+ bportals: bman-portals@ff4000000 {
+ ranges = <0x0 0xf 0xf4000000 0x2000000>;
+ };
+
+ qportals: qman-portals@ff6000000 {
+ ranges = <0x0 0xf 0xf6000000 0x2000000>;
+ };
+
+ soc: soc@ffe000000 {
+ ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
+ reg = <0xf 0xfe000000 0 0x00001000>;
+ spi@110000 {
+ flash@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "sst,sst25wf040", "jedec,spi-nor";
+ reg = <0>;
+ spi-max-frequency = <40000000>; /* input clock */
+ };
+ };
+
+ i2c@118000 {
+ hwmon@2f {
+ compatible = "winbond,w83793";
+ reg = <0x2f>;
+ };
+ eeprom@52 {
+ compatible = "atmel,24c256";
+ reg = <0x52>;
+ };
+ eeprom@54 {
+ compatible = "atmel,24c256";
+ reg = <0x54>;
+ };
+ eeprom@56 {
+ compatible = "atmel,24c256";
+ reg = <0x56>;
+ };
+ rtc@68 {
+ compatible = "dallas,ds1374";
+ reg = <0x68>;
+ };
+ };
+
+ sdhc@114000 {
+ voltage-ranges = <1800 1800 3300 3300>;
+ };
+
+ fman@400000 {
+ ethernet@e0000 {
+ phy-handle = <&sgmiiphy21>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e2000 {
+ phy-handle = <&sgmiiphy22>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e4000 {
+ phy-handle = <&sgmiiphy23>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e6000 {
+ phy-handle = <&sgmiiphy24>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e8000 {
+ status = "disabled";
+ };
+
+ ethernet@ea000 {
+ status = "disabled";
+ };
+
+ ethernet@f0000 {
+ phy-handle = <&xfiphy1>;
+ phy-connection-type = "xgmii";
+ };
+
+ ethernet@f2000 {
+ phy-handle = <&xfiphy2>;
+ phy-connection-type = "xgmii";
+ };
+ };
+
+ fman@500000 {
+ ethernet@e0000 {
+ phy-handle = <&sgmiiphy41>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e2000 {
+ phy-handle = <&sgmiiphy42>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e4000 {
+ phy-handle = <&sgmiiphy43>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e6000 {
+ phy-handle = <&sgmiiphy44>;
+ phy-connection-type = "sgmii";
+ };
+
+ ethernet@e8000 {
+ status = "disabled";
+ };
+
+ ethernet@ea000 {
+ status = "disabled";
+ };
+
+ ethernet@f0000 {
+ phy-handle = <&xfiphy3>;
+ phy-connection-type = "xgmii";
+ };
+
+ ethernet@f2000 {
+ phy-handle = <&xfiphy4>;
+ phy-connection-type = "xgmii";
+ };
+
+ mdio@fc000 {
+ sgmiiphy21: ethernet-phy@0 {
+ reg = <0x0>;
+ };
+
+ sgmiiphy22: ethernet-phy@1 {
+ reg = <0x1>;
+ };
+
+ sgmiiphy23: ethernet-phy@2 {
+ reg = <0x2>;
+ };
+
+ sgmiiphy24: ethernet-phy@3 {
+ reg = <0x3>;
+ };
+
+ sgmiiphy41: ethernet-phy@4 {
+ reg = <0x4>;
+ };
+
+ sgmiiphy42: ethernet-phy@5 {
+ reg = <0x5>;
+ };
+
+ sgmiiphy43: ethernet-phy@6 {
+ reg = <0x6>;
+ };
+
+ sgmiiphy44: ethernet-phy@7 {
+ reg = <0x7>;
+ };
+ };
+
+ mdio@fd000 {
+ xfiphy1: ethernet-phy@10 {
+ compatible = "ethernet-phy-id13e5.1002";
+ reg = <0x10>;
+ };
+
+ xfiphy2: ethernet-phy@11 {
+ compatible = "ethernet-phy-id13e5.1002";
+ reg = <0x11>;
+ };
+
+ xfiphy3: ethernet-phy@13 {
+ compatible = "ethernet-phy-id13e5.1002";
+ reg = <0x13>;
+ };
+
+ xfiphy4: ethernet-phy@12 {
+ compatible = "ethernet-phy-id13e5.1002";
+ reg = <0x12>;
+ };
+ };
+ };
+ };
+
+ pci0: pcie@ffe240000 {
+ reg = <0xf 0xfe240000 0 0x10000>;
+ ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x20000000
+ 0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>;
+ pcie@0 {
+ ranges = <0x02000000 0 0xe0000000
+ 0x02000000 0 0xe0000000
+ 0 0x20000000
+
+ 0x01000000 0 0x00000000
+ 0x01000000 0 0x00000000
+ 0 0x00010000>;
+ };
+ };
+
+ pci1: pcie@ffe250000 {
+ reg = <0xf 0xfe250000 0 0x10000>;
+ ranges = <0x02000000 0x0 0xe0000000 0xc 0x20000000 0x0 0x20000000
+ 0x01000000 0x0 0x00000000 0xf 0xf8010000 0x0 0x00010000>;
+ pcie@0 {
+ ranges = <0x02000000 0 0xe0000000
+ 0x02000000 0 0xe0000000
+ 0 0x20000000
+
+ 0x01000000 0 0x00000000
+ 0x01000000 0 0x00000000
+ 0 0x00010000>;
+ };
+ };
+
+ pci2: pcie@ffe260000 {
+ reg = <0xf 0xfe260000 0 0x1000>;
+ ranges = <0x02000000 0 0xe0000000 0xc 0x40000000 0 0x20000000
+ 0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>;
+ pcie@0 {
+ ranges = <0x02000000 0 0xe0000000
+ 0x02000000 0 0xe0000000
+ 0 0x20000000
+
+ 0x01000000 0 0x00000000
+ 0x01000000 0 0x00000000
+ 0 0x00010000>;
+ };
+ };
+
+ pci3: pcie@ffe270000 {
+ reg = <0xf 0xfe270000 0 0x10000>;
+ ranges = <0x02000000 0 0xe0000000 0xc 0x60000000 0 0x20000000
+ 0x01000000 0 0x00000000 0xf 0xf8030000 0 0x00010000>;
+ pcie@0 {
+ ranges = <0x02000000 0 0xe0000000
+ 0x02000000 0 0xe0000000
+ 0 0x20000000
+
+ 0x01000000 0 0x00000000
+ 0x01000000 0 0x00000000
+ 0 0x00010000>;
+ };
+ };
+
+ rio: rapidio@ffe0c0000 {
+ reg = <0xf 0xfe0c0000 0 0x11000>;
+
+ port1 {
+ ranges = <0 0 0xc 0x20000000 0 0x10000000>;
+ };
+ port2 {
+ ranges = <0 0 0xc 0x30000000 0 0x10000000>;
+ };
+ };
+};
+
+/include/ "t4240si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi b/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi
index a3d582e0361a..65f3e17c0d41 100644
--- a/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t4240si-post.dtsi
@@ -1,7 +1,7 @@
/*
* T4240 Silicon/SoC Device Tree Source (post include)
*
- * Copyright 2012 Freescale Semiconductor Inc.
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -32,10 +32,25 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+&bman_fbpr {
+ compatible = "fsl,bman-fbpr";
+ alloc-ranges = <0 0 0x10000 0>;
+};
+
+&qman_fqd {
+ compatible = "fsl,qman-fqd";
+ alloc-ranges = <0 0 0x10000 0>;
+};
+
+&qman_pfdr {
+ compatible = "fsl,qman-pfdr";
+ alloc-ranges = <0 0 0x10000 0>;
+};
+
&ifc {
#address-cells = <2>;
#size-cells = <1>;
- compatible = "fsl,ifc", "simple-bus";
+ compatible = "fsl,ifc";
interrupts = <25 2 0 0>;
};
@@ -294,6 +309,570 @@
};
};
+&bportals {
+ #address-cells = <0x1>;
+ #size-cells = <0x1>;
+ compatible = "simple-bus";
+
+ bman-portal@0 {
+ compatible = "fsl,bman-portal";
+ reg = <0x0 0x4000>, <0x1000000 0x1000>;
+ interrupts = <105 2 0 0>;
+ };
+ bman-portal@4000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x4000 0x4000>, <0x1001000 0x1000>;
+ interrupts = <107 2 0 0>;
+ };
+ bman-portal@8000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x8000 0x4000>, <0x1002000 0x1000>;
+ interrupts = <109 2 0 0>;
+ };
+ bman-portal@c000 {
+ compatible = "fsl,bman-portal";
+ reg = <0xc000 0x4000>, <0x1003000 0x1000>;
+ interrupts = <111 2 0 0>;
+ };
+ bman-portal@10000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x10000 0x4000>, <0x1004000 0x1000>;
+ interrupts = <113 2 0 0>;
+ };
+ bman-portal@14000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x14000 0x4000>, <0x1005000 0x1000>;
+ interrupts = <115 2 0 0>;
+ };
+ bman-portal@18000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x18000 0x4000>, <0x1006000 0x1000>;
+ interrupts = <117 2 0 0>;
+ };
+ bman-portal@1c000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x1c000 0x4000>, <0x1007000 0x1000>;
+ interrupts = <119 2 0 0>;
+ };
+ bman-portal@20000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x20000 0x4000>, <0x1008000 0x1000>;
+ interrupts = <121 2 0 0>;
+ };
+ bman-portal@24000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x24000 0x4000>, <0x1009000 0x1000>;
+ interrupts = <123 2 0 0>;
+ };
+ bman-portal@28000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x28000 0x4000>, <0x100a000 0x1000>;
+ interrupts = <125 2 0 0>;
+ };
+ bman-portal@2c000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x2c000 0x4000>, <0x100b000 0x1000>;
+ interrupts = <127 2 0 0>;
+ };
+ bman-portal@30000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x30000 0x4000>, <0x100c000 0x1000>;
+ interrupts = <129 2 0 0>;
+ };
+ bman-portal@34000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x34000 0x4000>, <0x100d000 0x1000>;
+ interrupts = <131 2 0 0>;
+ };
+ bman-portal@38000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x38000 0x4000>, <0x100e000 0x1000>;
+ interrupts = <133 2 0 0>;
+ };
+ bman-portal@3c000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x3c000 0x4000>, <0x100f000 0x1000>;
+ interrupts = <135 2 0 0>;
+ };
+ bman-portal@40000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x40000 0x4000>, <0x1010000 0x1000>;
+ interrupts = <137 2 0 0>;
+ };
+ bman-portal@44000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x44000 0x4000>, <0x1011000 0x1000>;
+ interrupts = <139 2 0 0>;
+ };
+ bman-portal@48000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x48000 0x4000>, <0x1012000 0x1000>;
+ interrupts = <141 2 0 0>;
+ };
+ bman-portal@4c000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x4c000 0x4000>, <0x1013000 0x1000>;
+ interrupts = <143 2 0 0>;
+ };
+ bman-portal@50000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x50000 0x4000>, <0x1014000 0x1000>;
+ interrupts = <145 2 0 0>;
+ };
+ bman-portal@54000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x54000 0x4000>, <0x1015000 0x1000>;
+ interrupts = <147 2 0 0>;
+ };
+ bman-portal@58000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x58000 0x4000>, <0x1016000 0x1000>;
+ interrupts = <149 2 0 0>;
+ };
+ bman-portal@5c000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x5c000 0x4000>, <0x1017000 0x1000>;
+ interrupts = <151 2 0 0>;
+ };
+ bman-portal@60000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x60000 0x4000>, <0x1018000 0x1000>;
+ interrupts = <153 2 0 0>;
+ };
+ bman-portal@64000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x64000 0x4000>, <0x1019000 0x1000>;
+ interrupts = <155 2 0 0>;
+ };
+ bman-portal@68000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x68000 0x4000>, <0x101a000 0x1000>;
+ interrupts = <157 2 0 0>;
+ };
+ bman-portal@6c000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x6c000 0x4000>, <0x101b000 0x1000>;
+ interrupts = <159 2 0 0>;
+ };
+ bman-portal@70000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x70000 0x4000>, <0x101c000 0x1000>;
+ interrupts = <161 2 0 0>;
+ };
+ bman-portal@74000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x74000 0x4000>, <0x101d000 0x1000>;
+ interrupts = <163 2 0 0>;
+ };
+ bman-portal@78000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x78000 0x4000>, <0x101e000 0x1000>;
+ interrupts = <165 2 0 0>;
+ };
+ bman-portal@7c000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x7c000 0x4000>, <0x101f000 0x1000>;
+ interrupts = <167 2 0 0>;
+ };
+ bman-portal@80000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x80000 0x4000>, <0x1020000 0x1000>;
+ interrupts = <169 2 0 0>;
+ };
+ bman-portal@84000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x84000 0x4000>, <0x1021000 0x1000>;
+ interrupts = <171 2 0 0>;
+ };
+ bman-portal@88000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x88000 0x4000>, <0x1022000 0x1000>;
+ interrupts = <173 2 0 0>;
+ };
+ bman-portal@8c000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x8c000 0x4000>, <0x1023000 0x1000>;
+ interrupts = <175 2 0 0>;
+ };
+ bman-portal@90000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x90000 0x4000>, <0x1024000 0x1000>;
+ interrupts = <385 2 0 0>;
+ };
+ bman-portal@94000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x94000 0x4000>, <0x1025000 0x1000>;
+ interrupts = <387 2 0 0>;
+ };
+ bman-portal@98000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x98000 0x4000>, <0x1026000 0x1000>;
+ interrupts = <389 2 0 0>;
+ };
+ bman-portal@9c000 {
+ compatible = "fsl,bman-portal";
+ reg = <0x9c000 0x4000>, <0x1027000 0x1000>;
+ interrupts = <391 2 0 0>;
+ };
+ bman-portal@a0000 {
+ compatible = "fsl,bman-portal";
+ reg = <0xa0000 0x4000>, <0x1028000 0x1000>;
+ interrupts = <393 2 0 0>;
+ };
+ bman-portal@a4000 {
+ compatible = "fsl,bman-portal";
+ reg = <0xa4000 0x4000>, <0x1029000 0x1000>;
+ interrupts = <395 2 0 0>;
+ };
+ bman-portal@a8000 {
+ compatible = "fsl,bman-portal";
+ reg = <0xa8000 0x4000>, <0x102a000 0x1000>;
+ interrupts = <397 2 0 0>;
+ };
+ bman-portal@ac000 {
+ compatible = "fsl,bman-portal";
+ reg = <0xac000 0x4000>, <0x102b000 0x1000>;
+ interrupts = <399 2 0 0>;
+ };
+ bman-portal@b0000 {
+ compatible = "fsl,bman-portal";
+ reg = <0xb0000 0x4000>, <0x102c000 0x1000>;
+ interrupts = <401 2 0 0>;
+ };
+ bman-portal@b4000 {
+ compatible = "fsl,bman-portal";
+ reg = <0xb4000 0x4000>, <0x102d000 0x1000>;
+ interrupts = <403 2 0 0>;
+ };
+ bman-portal@b8000 {
+ compatible = "fsl,bman-portal";
+ reg = <0xb8000 0x4000>, <0x102e000 0x1000>;
+ interrupts = <405 2 0 0>;
+ };
+ bman-portal@bc000 {
+ compatible = "fsl,bman-portal";
+ reg = <0xbc000 0x4000>, <0x102f000 0x1000>;
+ interrupts = <407 2 0 0>;
+ };
+ bman-portal@c0000 {
+ compatible = "fsl,bman-portal";
+ reg = <0xc0000 0x4000>, <0x1030000 0x1000>;
+ interrupts = <409 2 0 0>;
+ };
+ bman-portal@c4000 {
+ compatible = "fsl,bman-portal";
+ reg = <0xc4000 0x4000>, <0x1031000 0x1000>;
+ interrupts = <411 2 0 0>;
+ };
+};
+
+&qportals {
+ #address-cells = <0x1>;
+ #size-cells = <0x1>;
+ compatible = "simple-bus";
+
+ qportal0: qman-portal@0 {
+ compatible = "fsl,qman-portal";
+ reg = <0x0 0x4000>, <0x1000000 0x1000>;
+ interrupts = <104 0x2 0 0>;
+ cell-index = <0x0>;
+ };
+ qportal1: qman-portal@4000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x4000 0x4000>, <0x1001000 0x1000>;
+ interrupts = <106 0x2 0 0>;
+ cell-index = <0x1>;
+ };
+ qportal2: qman-portal@8000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x8000 0x4000>, <0x1002000 0x1000>;
+ interrupts = <108 0x2 0 0>;
+ cell-index = <0x2>;
+ };
+ qportal3: qman-portal@c000 {
+ compatible = "fsl,qman-portal";
+ reg = <0xc000 0x4000>, <0x1003000 0x1000>;
+ interrupts = <110 0x2 0 0>;
+ cell-index = <0x3>;
+ };
+ qportal4: qman-portal@10000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x10000 0x4000>, <0x1004000 0x1000>;
+ interrupts = <112 0x2 0 0>;
+ cell-index = <0x4>;
+ };
+ qportal5: qman-portal@14000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x14000 0x4000>, <0x1005000 0x1000>;
+ interrupts = <114 0x2 0 0>;
+ cell-index = <0x5>;
+ };
+ qportal6: qman-portal@18000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x18000 0x4000>, <0x1006000 0x1000>;
+ interrupts = <116 0x2 0 0>;
+ cell-index = <0x6>;
+ };
+ qportal7: qman-portal@1c000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x1c000 0x4000>, <0x1007000 0x1000>;
+ interrupts = <118 0x2 0 0>;
+ cell-index = <0x7>;
+ };
+ qportal8: qman-portal@20000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x20000 0x4000>, <0x1008000 0x1000>;
+ interrupts = <120 0x2 0 0>;
+ cell-index = <0x8>;
+ };
+ qportal9: qman-portal@24000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x24000 0x4000>, <0x1009000 0x1000>;
+ interrupts = <122 0x2 0 0>;
+ cell-index = <0x9>;
+ };
+ qportal10: qman-portal@28000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x28000 0x4000>, <0x100a000 0x1000>;
+ interrupts = <124 0x2 0 0>;
+ cell-index = <0xa>;
+ };
+ qportal11: qman-portal@2c000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x2c000 0x4000>, <0x100b000 0x1000>;
+ interrupts = <126 0x2 0 0>;
+ cell-index = <0xb>;
+ };
+ qportal12: qman-portal@30000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x30000 0x4000>, <0x100c000 0x1000>;
+ interrupts = <128 0x2 0 0>;
+ cell-index = <0xc>;
+ };
+ qportal13: qman-portal@34000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x34000 0x4000>, <0x100d000 0x1000>;
+ interrupts = <130 0x2 0 0>;
+ cell-index = <0xd>;
+ };
+ qportal14: qman-portal@38000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x38000 0x4000>, <0x100e000 0x1000>;
+ interrupts = <132 0x2 0 0>;
+ cell-index = <0xe>;
+ };
+ qportal15: qman-portal@3c000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x3c000 0x4000>, <0x100f000 0x1000>;
+ interrupts = <134 0x2 0 0>;
+ cell-index = <0xf>;
+ };
+ qportal16: qman-portal@40000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x40000 0x4000>, <0x1010000 0x1000>;
+ interrupts = <136 0x2 0 0>;
+ cell-index = <0x10>;
+ };
+ qportal17: qman-portal@44000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x44000 0x4000>, <0x1011000 0x1000>;
+ interrupts = <138 0x2 0 0>;
+ cell-index = <0x11>;
+ };
+ qportal18: qman-portal@48000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x48000 0x4000>, <0x1012000 0x1000>;
+ interrupts = <140 0x2 0 0>;
+ cell-index = <0x12>;
+ };
+ qportal19: qman-portal@4c000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x4c000 0x4000>, <0x1013000 0x1000>;
+ interrupts = <142 0x2 0 0>;
+ cell-index = <0x13>;
+ };
+ qportal20: qman-portal@50000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x50000 0x4000>, <0x1014000 0x1000>;
+ interrupts = <144 0x2 0 0>;
+ cell-index = <0x14>;
+ };
+ qportal21: qman-portal@54000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x54000 0x4000>, <0x1015000 0x1000>;
+ interrupts = <146 0x2 0 0>;
+ cell-index = <0x15>;
+ };
+ qportal22: qman-portal@58000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x58000 0x4000>, <0x1016000 0x1000>;
+ interrupts = <148 0x2 0 0>;
+ cell-index = <0x16>;
+ };
+ qportal23: qman-portal@5c000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x5c000 0x4000>, <0x1017000 0x1000>;
+ interrupts = <150 0x2 0 0>;
+ cell-index = <0x17>;
+ };
+ qportal24: qman-portal@60000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x60000 0x4000>, <0x1018000 0x1000>;
+ interrupts = <152 0x2 0 0>;
+ cell-index = <0x18>;
+ };
+ qportal25: qman-portal@64000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x64000 0x4000>, <0x1019000 0x1000>;
+ interrupts = <154 0x2 0 0>;
+ cell-index = <0x19>;
+ };
+ qportal26: qman-portal@68000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x68000 0x4000>, <0x101a000 0x1000>;
+ interrupts = <156 0x2 0 0>;
+ cell-index = <0x1a>;
+ };
+ qportal27: qman-portal@6c000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x6c000 0x4000>, <0x101b000 0x1000>;
+ interrupts = <158 0x2 0 0>;
+ cell-index = <0x1b>;
+ };
+ qportal28: qman-portal@70000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x70000 0x4000>, <0x101c000 0x1000>;
+ interrupts = <160 0x2 0 0>;
+ cell-index = <0x1c>;
+ };
+ qportal29: qman-portal@74000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x74000 0x4000>, <0x101d000 0x1000>;
+ interrupts = <162 0x2 0 0>;
+ cell-index = <0x1d>;
+ };
+ qportal30: qman-portal@78000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x78000 0x4000>, <0x101e000 0x1000>;
+ interrupts = <164 0x2 0 0>;
+ cell-index = <0x1e>;
+ };
+ qportal31: qman-portal@7c000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x7c000 0x4000>, <0x101f000 0x1000>;
+ interrupts = <166 0x2 0 0>;
+ cell-index = <0x1f>;
+ };
+ qportal32: qman-portal@80000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x80000 0x4000>, <0x1020000 0x1000>;
+ interrupts = <168 0x2 0 0>;
+ cell-index = <0x20>;
+ };
+ qportal33: qman-portal@84000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x84000 0x4000>, <0x1021000 0x1000>;
+ interrupts = <170 0x2 0 0>;
+ cell-index = <0x21>;
+ };
+ qportal34: qman-portal@88000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x88000 0x4000>, <0x1022000 0x1000>;
+ interrupts = <172 0x2 0 0>;
+ cell-index = <0x22>;
+ };
+ qportal35: qman-portal@8c000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x8c000 0x4000>, <0x1023000 0x1000>;
+ interrupts = <174 0x2 0 0>;
+ cell-index = <0x23>;
+ };
+ qportal36: qman-portal@90000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x90000 0x4000>, <0x1024000 0x1000>;
+ interrupts = <384 0x2 0 0>;
+ cell-index = <0x24>;
+ };
+ qportal37: qman-portal@94000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x94000 0x4000>, <0x1025000 0x1000>;
+ interrupts = <386 0x2 0 0>;
+ cell-index = <0x25>;
+ };
+ qportal38: qman-portal@98000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x98000 0x4000>, <0x1026000 0x1000>;
+ interrupts = <388 0x2 0 0>;
+ cell-index = <0x26>;
+ };
+ qportal39: qman-portal@9c000 {
+ compatible = "fsl,qman-portal";
+ reg = <0x9c000 0x4000>, <0x1027000 0x1000>;
+ interrupts = <390 0x2 0 0>;
+ cell-index = <0x27>;
+ };
+ qportal40: qman-portal@a0000 {
+ compatible = "fsl,qman-portal";
+ reg = <0xa0000 0x4000>, <0x1028000 0x1000>;
+ interrupts = <392 0x2 0 0>;
+ cell-index = <0x28>;
+ };
+ qportal41: qman-portal@a4000 {
+ compatible = "fsl,qman-portal";
+ reg = <0xa4000 0x4000>, <0x1029000 0x1000>;
+ interrupts = <394 0x2 0 0>;
+ cell-index = <0x29>;
+ };
+ qportal42: qman-portal@a8000 {
+ compatible = "fsl,qman-portal";
+ reg = <0xa8000 0x4000>, <0x102a000 0x1000>;
+ interrupts = <396 0x2 0 0>;
+ cell-index = <0x2a>;
+ };
+ qportal43: qman-portal@ac000 {
+ compatible = "fsl,qman-portal";
+ reg = <0xac000 0x4000>, <0x102b000 0x1000>;
+ interrupts = <398 0x2 0 0>;
+ cell-index = <0x2b>;
+ };
+ qportal44: qman-portal@b0000 {
+ compatible = "fsl,qman-portal";
+ reg = <0xb0000 0x4000>, <0x102c000 0x1000>;
+ interrupts = <400 0x2 0 0>;
+ cell-index = <0x2c>;
+ };
+ qportal45: qman-portal@b4000 {
+ compatible = "fsl,qman-portal";
+ reg = <0xb4000 0x4000>, <0x102d000 0x1000>;
+ interrupts = <402 0x2 0 0>;
+ cell-index = <0x2d>;
+ };
+ qportal46: qman-portal@b8000 {
+ compatible = "fsl,qman-portal";
+ reg = <0xb8000 0x4000>, <0x102e000 0x1000>;
+ interrupts = <404 0x2 0 0>;
+ cell-index = <0x2e>;
+ };
+ qportal47: qman-portal@bc000 {
+ compatible = "fsl,qman-portal";
+ reg = <0xbc000 0x4000>, <0x102f000 0x1000>;
+ interrupts = <406 0x2 0 0>;
+ cell-index = <0x2f>;
+ };
+ qportal48: qman-portal@c0000 {
+ compatible = "fsl,qman-portal";
+ reg = <0xc0000 0x4000>, <0x1030000 0x1000>;
+ interrupts = <408 0x2 0 0>;
+ cell-index = <0x30>;
+ };
+ qportal49: qman-portal@c4000 {
+ compatible = "fsl,qman-portal";
+ reg = <0xc4000 0x4000>, <0x1031000 0x1000>;
+ interrupts = <410 0x2 0 0>;
+ cell-index = <0x31>;
+ };
+};
+
&soc {
#address-cells = <1>;
#size-cells = <1>;
@@ -368,95 +947,9 @@
fsl,liodn-bits = <12>;
};
- clockgen: global-utilities@e1000 {
+/include/ "qoriq-clockgen2.dtsi"
+ global-utilities@e1000 {
compatible = "fsl,t4240-clockgen", "fsl,qoriq-clockgen-2.0";
- ranges = <0x0 0xe1000 0x1000>;
- reg = <0xe1000 0x1000>;
- #address-cells = <1>;
- #size-cells = <1>;
-
- sysclk: sysclk {
- #clock-cells = <0>;
- compatible = "fsl,qoriq-sysclk-2.0";
- clock-output-names = "sysclk";
- };
-
- pll0: pll0@800 {
- #clock-cells = <1>;
- reg = <0x800 0x4>;
- compatible = "fsl,qoriq-core-pll-2.0";
- clocks = <&sysclk>;
- clock-output-names = "pll0", "pll0-div2", "pll0-div4";
- };
-
- pll1: pll1@820 {
- #clock-cells = <1>;
- reg = <0x820 0x4>;
- compatible = "fsl,qoriq-core-pll-2.0";
- clocks = <&sysclk>;
- clock-output-names = "pll1", "pll1-div2", "pll1-div4";
- };
-
- pll2: pll2@840 {
- #clock-cells = <1>;
- reg = <0x840 0x4>;
- compatible = "fsl,qoriq-core-pll-2.0";
- clocks = <&sysclk>;
- clock-output-names = "pll2", "pll2-div2", "pll2-div4";
- };
-
- pll3: pll3@860 {
- #clock-cells = <1>;
- reg = <0x860 0x4>;
- compatible = "fsl,qoriq-core-pll-2.0";
- clocks = <&sysclk>;
- clock-output-names = "pll3", "pll3-div2", "pll3-div4";
- };
-
- pll4: pll4@880 {
- #clock-cells = <1>;
- reg = <0x880 0x4>;
- compatible = "fsl,qoriq-core-pll-2.0";
- clocks = <&sysclk>;
- clock-output-names = "pll4", "pll4-div2", "pll4-div4";
- };
-
- mux0: mux0@0 {
- #clock-cells = <0>;
- reg = <0x0 0x4>;
- compatible = "fsl,qoriq-core-mux-2.0";
- clocks = <&pll0 0>, <&pll0 1>, <&pll0 2>,
- <&pll1 0>, <&pll1 1>, <&pll1 2>,
- <&pll2 0>, <&pll2 1>, <&pll2 2>;
- clock-names = "pll0", "pll0-div2", "pll0-div4",
- "pll1", "pll1-div2", "pll1-div4",
- "pll2", "pll2-div2", "pll2-div4";
- clock-output-names = "cmux0";
- };
-
- mux1: mux1@20 {
- #clock-cells = <0>;
- reg = <0x20 0x4>;
- compatible = "fsl,qoriq-core-mux-2.0";
- clocks = <&pll0 0>, <&pll0 1>, <&pll0 2>,
- <&pll1 0>, <&pll1 1>, <&pll1 2>,
- <&pll2 0>, <&pll2 1>, <&pll2 2>;
- clock-names = "pll0", "pll0-div2", "pll0-div4",
- "pll1", "pll1-div2", "pll1-div4",
- "pll2", "pll2-div2", "pll2-div4";
- clock-output-names = "cmux1";
- };
-
- mux2: mux2@40 {
- #clock-cells = <0>;
- reg = <0x40 0x4>;
- compatible = "fsl,qoriq-core-mux-2.0";
- clocks = <&pll3 0>, <&pll3 1>, <&pll3 2>,
- <&pll4 0>, <&pll4 1>, <&pll4 2>;
- clock-names = "pll3", "pll3-div2", "pll3-div4",
- "pll4", "pll4-div2", "pll4-div4";
- clock-output-names = "cmux2";
- };
};
rcpm: global-utilities@e2000 {
@@ -498,19 +991,107 @@
/include/ "qoriq-gpio-3.dtsi"
/include/ "qoriq-usb2-mph-0.dtsi"
usb0: usb@210000 {
- compatible = "fsl-usb2-mph-v2.4", "fsl-usb2-mph";
+ compatible = "fsl-usb2-mph-v2.5", "fsl-usb2-mph";
phy_type = "utmi";
port0;
};
/include/ "qoriq-usb2-dr-0.dtsi"
usb1: usb@211000 {
- compatible = "fsl-usb2-dr-v2.4", "fsl-usb2-dr";
+ compatible = "fsl-usb2-dr-v2.5", "fsl-usb2-dr";
dr_mode = "host";
phy_type = "utmi";
};
/include/ "qoriq-sata2-0.dtsi"
/include/ "qoriq-sata2-1.dtsi"
/include/ "qoriq-sec5.0-0.dtsi"
+/include/ "qoriq-qman3.dtsi"
+/include/ "qoriq-bman1.dtsi"
+
+/include/ "qoriq-fman3-0.dtsi"
+/include/ "qoriq-fman3-0-1g-0.dtsi"
+/include/ "qoriq-fman3-0-1g-1.dtsi"
+/include/ "qoriq-fman3-0-1g-2.dtsi"
+/include/ "qoriq-fman3-0-1g-3.dtsi"
+/include/ "qoriq-fman3-0-1g-4.dtsi"
+/include/ "qoriq-fman3-0-1g-5.dtsi"
+/include/ "qoriq-fman3-0-10g-0.dtsi"
+/include/ "qoriq-fman3-0-10g-1.dtsi"
+ fman@400000 {
+ enet0: ethernet@e0000 {
+ };
+
+ enet1: ethernet@e2000 {
+ };
+
+ enet2: ethernet@e4000 {
+ };
+
+ enet3: ethernet@e6000 {
+ };
+
+ enet4: ethernet@e8000 {
+ };
+
+ enet5: ethernet@ea000 {
+ };
+
+ enet6: ethernet@f0000 {
+ };
+
+ enet7: ethernet@f2000 {
+ };
+
+ mdio@fc000 {
+ status = "disabled";
+ };
+
+ mdio@fd000 {
+ status = "disabled";
+ };
+ };
+
+/include/ "qoriq-fman3-1.dtsi"
+/include/ "qoriq-fman3-1-1g-0.dtsi"
+/include/ "qoriq-fman3-1-1g-1.dtsi"
+/include/ "qoriq-fman3-1-1g-2.dtsi"
+/include/ "qoriq-fman3-1-1g-3.dtsi"
+/include/ "qoriq-fman3-1-1g-4.dtsi"
+/include/ "qoriq-fman3-1-1g-5.dtsi"
+/include/ "qoriq-fman3-1-10g-0.dtsi"
+/include/ "qoriq-fman3-1-10g-1.dtsi"
+ fman@500000 {
+ enet8: ethernet@e0000 {
+ };
+
+ enet9: ethernet@e2000 {
+ };
+
+ enet10: ethernet@e4000 {
+ };
+
+ enet11: ethernet@e6000 {
+ };
+
+ enet12: ethernet@e8000 {
+ };
+
+ enet13: ethernet@ea000 {
+ };
+
+ enet14: ethernet@f0000 {
+ };
+
+ enet15: ethernet@f2000 {
+ };
+
+ mdio@fc000 {
+ interrupts = <100 1 0 0>;
+ };
+
+ mdio@fd000 {
+ interrupts = <101 1 0 0>;
+ };
+ };
L2_1: l2-cache-controller@c20000 {
compatible = "fsl,t4240-l2-cache-controller";
diff --git a/arch/powerpc/boot/dts/fsl/t4240si-pre.dtsi b/arch/powerpc/boot/dts/fsl/t4240si-pre.dtsi
index 261a3abb1a55..632314c6faa9 100644
--- a/arch/powerpc/boot/dts/fsl/t4240si-pre.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t4240si-pre.dtsi
@@ -1,7 +1,7 @@
/*
* T4240 Silicon/SoC Device Tree Source (pre include)
*
- * Copyright 2012 Freescale Semiconductor Inc.
+ * Copyright 2012 - 2015 Freescale Semiconductor Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -51,14 +51,36 @@
serial2 = &serial2;
serial3 = &serial3;
crypto = &crypto;
+
pci0 = &pci0;
pci1 = &pci1;
pci2 = &pci2;
pci3 = &pci3;
+ usb0 = &usb0;
+ usb1 = &usb1;
dma0 = &dma0;
dma1 = &dma1;
dma2 = &dma2;
sdhc = &sdhc;
+
+ fman0 = &fman0;
+ fman1 = &fman1;
+ ethernet0 = &enet0;
+ ethernet1 = &enet1;
+ ethernet2 = &enet2;
+ ethernet3 = &enet3;
+ ethernet4 = &enet4;
+ ethernet5 = &enet5;
+ ethernet6 = &enet6;
+ ethernet7 = &enet7;
+ ethernet8 = &enet8;
+ ethernet9 = &enet9;
+ ethernet10 = &enet10;
+ ethernet11 = &enet11;
+ ethernet12 = &enet12;
+ ethernet13 = &enet13;
+ ethernet14 = &enet14;
+ ethernet15 = &enet15;
};
cpus {
@@ -68,84 +90,84 @@
cpu0: PowerPC,e6500@0 {
device_type = "cpu";
reg = <0 1>;
- clocks = <&mux0>;
+ clocks = <&clockgen 1 0>;
next-level-cache = <&L2_1>;
fsl,portid-mapping = <0x80000000>;
};
cpu1: PowerPC,e6500@2 {
device_type = "cpu";
reg = <2 3>;
- clocks = <&mux0>;
+ clocks = <&clockgen 1 0>;
next-level-cache = <&L2_1>;
fsl,portid-mapping = <0x80000000>;
};
cpu2: PowerPC,e6500@4 {
device_type = "cpu";
reg = <4 5>;
- clocks = <&mux0>;
+ clocks = <&clockgen 1 0>;
next-level-cache = <&L2_1>;
fsl,portid-mapping = <0x80000000>;
};
cpu3: PowerPC,e6500@6 {
device_type = "cpu";
reg = <6 7>;
- clocks = <&mux0>;
+ clocks = <&clockgen 1 0>;
next-level-cache = <&L2_1>;
fsl,portid-mapping = <0x80000000>;
};
cpu4: PowerPC,e6500@8 {
device_type = "cpu";
reg = <8 9>;
- clocks = <&mux1>;
+ clocks = <&clockgen 1 1>;
next-level-cache = <&L2_2>;
fsl,portid-mapping = <0x40000000>;
};
cpu5: PowerPC,e6500@10 {
device_type = "cpu";
reg = <10 11>;
- clocks = <&mux1>;
+ clocks = <&clockgen 1 1>;
next-level-cache = <&L2_2>;
fsl,portid-mapping = <0x40000000>;
};
cpu6: PowerPC,e6500@12 {
device_type = "cpu";
reg = <12 13>;
- clocks = <&mux1>;
+ clocks = <&clockgen 1 1>;
next-level-cache = <&L2_2>;
fsl,portid-mapping = <0x40000000>;
};
cpu7: PowerPC,e6500@14 {
device_type = "cpu";
reg = <14 15>;
- clocks = <&mux1>;
+ clocks = <&clockgen 1 1>;
next-level-cache = <&L2_2>;
fsl,portid-mapping = <0x40000000>;
};
cpu8: PowerPC,e6500@16 {
device_type = "cpu";
reg = <16 17>;
- clocks = <&mux2>;
+ clocks = <&clockgen 1 2>;
next-level-cache = <&L2_3>;
fsl,portid-mapping = <0x20000000>;
};
cpu9: PowerPC,e6500@18 {
device_type = "cpu";
reg = <18 19>;
- clocks = <&mux2>;
+ clocks = <&clockgen 1 2>;
next-level-cache = <&L2_3>;
fsl,portid-mapping = <0x20000000>;
};
cpu10: PowerPC,e6500@20 {
device_type = "cpu";
reg = <20 21>;
- clocks = <&mux2>;
+ clocks = <&clockgen 1 2>;
next-level-cache = <&L2_3>;
fsl,portid-mapping = <0x20000000>;
};
cpu11: PowerPC,e6500@22 {
device_type = "cpu";
reg = <22 23>;
- clocks = <&mux2>;
+ clocks = <&clockgen 1 2>;
next-level-cache = <&L2_3>;
fsl,portid-mapping = <0x20000000>;
};
diff --git a/arch/powerpc/boot/dts/fsp2.dts b/arch/powerpc/boot/dts/fsp2.dts
new file mode 100644
index 000000000000..9311b86b1bd9
--- /dev/null
+++ b/arch/powerpc/boot/dts/fsp2.dts
@@ -0,0 +1,613 @@
+/*
+ * Device Tree Source for FSP2
+ *
+ * Copyright 2010,2012 IBM Corp.
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without
+ * any warranty of any kind, whether express or implied.
+ */
+
+
+/dts-v1/;
+
+/ {
+ #address-cells = <2>;
+ #size-cells = <1>;
+ model = "ibm,fsp2";
+ compatible = "ibm,fsp2";
+ dcr-parent = <&{/cpus/cpu@0}>;
+
+ aliases {
+ ethernet0 = &EMAC0;
+ ethernet1 = &EMAC1;
+ serial0 = &UART0;
+ };
+
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu@0 {
+ device_type = "cpu";
+ model = "PowerPC, 476FSP2";
+ reg = <0x0>;
+ clock-frequency = <0>; /* Filled in by cuboot */
+ timebase-frequency = <0>; /* Filled in by cuboot */
+ i-cache-line-size = <32>;
+ d-cache-line-size = <32>;
+ d-cache-size = <32768>;
+ i-cache-size = <32768>;
+ dcr-controller;
+ dcr-access-method = "native";
+ };
+ };
+
+ memory {
+ device_type = "memory";
+ reg = <0x00000000 0x00000000 0x00000000>; /* Filled in by
+ cuboot */
+ };
+
+ clocks {
+ mmc_clk: mmc_clk {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <50000000>;
+ clock-output-names = "mmc_clk";
+ };
+ };
+
+ UIC0: uic0 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <0>;
+ dcr-reg = <0x2c0 0x8>;
+ };
+
+ /* "interrupts" field is <bit level bit level>
+ first pair is non-critical, second is critical */
+ UIC1_0: uic1_0 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <1>;
+ dcr-reg = <0x2c8 0x8>;
+ interrupt-parent = <&UIC0>;
+ interrupts = <21 0x4 4 0x84>;
+ };
+
+ /* PSI and DMA */
+ UIC1_1: uic1_1 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <2>;
+ dcr-reg = <0x350 0x8>;
+ interrupt-parent = <&UIC0>;
+ interrupts = <22 0x4 5 0x84>;
+ };
+
+ /* Ethernet and USB */
+ UIC1_2: uic1_2 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <3>;
+ dcr-reg = <0x358 0x8>;
+ interrupt-parent = <&UIC0>;
+ interrupts = <23 0x4 6 0x84>;
+ };
+
+ /* PLB Errors */
+ UIC1_3: uic1_3 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <4>;
+ dcr-reg = <0x360 0x8>;
+ interrupt-parent = <&UIC0>;
+ interrupts = <24 0x4 7 0x84>;
+ };
+
+ UIC1_4: uic1_4 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <5>;
+ dcr-reg = <0x368 0x8>;
+ interrupt-parent = <&UIC0>;
+ interrupts = <25 0x4 8 0x84>;
+ };
+
+ UIC1_5: uic1_5 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <6>;
+ dcr-reg = <0x370 0x8>;
+ interrupt-parent = <&UIC0>;
+ interrupts = <26 0x4 9 0x84>;
+ };
+
+ /* 2nd level UICs for FSI */
+ UIC2_0: uic2_0 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <7>;
+ dcr-reg = <0x2d0 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <16 0x4 0 0x84>;
+ };
+
+ UIC2_1: uic2_1 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <8>;
+ dcr-reg = <0x2d8 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <17 0x4 1 0x84>;
+ };
+
+ UIC2_2: uic2_2 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <9>;
+ dcr-reg = <0x2e0 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <18 0x4 2 0x84>;
+ };
+
+ UIC2_3: uic2_3 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <10>;
+ dcr-reg = <0x2e8 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <19 0x4 3 0x84>;
+ };
+
+ UIC2_4: uic2_4 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <11>;
+ dcr-reg = <0x2f0 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <20 0x4 4 0x84>;
+ };
+
+ UIC2_5: uic2_5 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <12>;
+ dcr-reg = <0x2f8 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <21 0x4 5 0x84>;
+ };
+
+ UIC2_6: uic2_6 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <13>;
+ dcr-reg = <0x300 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <22 0x4 6 0x84>;
+ };
+
+ UIC2_7: uic2_7 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <14>;
+ dcr-reg = <0x308 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <23 0x4 7 0x84>;
+ };
+
+ UIC2_8: uic2_8 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <15>;
+ dcr-reg = <0x310 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <24 0x4 8 0x84>;
+ };
+
+ UIC2_9: uic2_9 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <16>;
+ dcr-reg = <0x318 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <25 0x4 9 0x84>;
+ };
+
+ UIC2_10: uic2_10 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <17>;
+ dcr-reg = <0x320 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <26 0x4 10 0x84>;
+ };
+
+ UIC2_11: uic2_11 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <18>;
+ dcr-reg = <0x328 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <27 0x4 11 0x84>;
+ };
+
+ UIC2_12: uic2_12 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <19>;
+ dcr-reg = <0x330 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <28 0x4 12 0x84>;
+ };
+
+ UIC2_13: uic2_13 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <20>;
+ dcr-reg = <0x338 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <29 0x4 13 0x84>;
+ };
+
+ UIC2_14: uic2_14 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <21>;
+ dcr-reg = <0x340 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <30 0x4 14 0x84>;
+ };
+
+ UIC2_15: uic2_15 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+
+ compatible = "ibm,uic";
+ interrupt-controller;
+ cell-index = <22>;
+ dcr-reg = <0x348 0x8>;
+ interrupt-parent = <&UIC1_0>;
+ interrupts = <31 0x4 15 0x84>;
+ };
+
+ plb6 {
+ compatible = "ibm,plb6";
+ #address-cells = <2>;
+ #size-cells = <1>;
+ ranges;
+
+ MCW0: memory-controller-wrapper {
+ compatible = "ibm,cw-476fsp2";
+ dcr-reg = <0x11111800 0x40>;
+ };
+
+ MCIF0: memory-controller {
+ compatible = "ibm,sdram-476fsp2", "ibm,sdram-4xx-ddr3";
+ dcr-reg = <0x11120000 0x10000>;
+ mcer-device = <&MCW0>;
+ interrupt-parent = <&UIC0>;
+ interrupts = <10 0x84 /* ECC UE */
+ 11 0x84>; /* ECC CE */
+ };
+ };
+
+ plb4 {
+ compatible = "ibm,plb4";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0x00000000 0x00000010 0x00000000 0x80000000
+ 0x80000000 0x00000010 0x80000000 0x80000000>;
+ clock-frequency = <333333334>;
+
+ plb6-system-hung-irq {
+ compatible = "ibm,bus-error-irq";
+ #interrupt-cells = <2>;
+ interrupt-parent = <&UIC0>;
+ interrupts = <0 0x84>;
+ };
+
+ l2-error-irq {
+ compatible = "ibm,bus-error-irq";
+ #interrupt-cells = <2>;
+ interrupt-parent = <&UIC0>;
+ interrupts = <20 0x84>;
+ };
+
+ plb6-plb4-irq {
+ compatible = "ibm,bus-error-irq";
+ #interrupt-cells = <2>;
+ interrupt-parent = <&UIC0>;
+ interrupts = <1 0x84>;
+ };
+
+ plb4-ahb-irq {
+ compatible = "ibm,bus-error-irq";
+ #interrupt-cells = <2>;
+ interrupt-parent = <&UIC1_3>;
+ interrupts = <20 0x84>;
+ };
+
+ opbd-error-irq {
+ compatible = "ibm,opbd-error-irq";
+ #interrupt-cells = <2>;
+ interrupt-parent = <&UIC1_4>;
+ interrupts = <5 0x84>;
+ };
+
+ cmu-error-irq {
+ compatible = "ibm,cmu-error-irq";
+ #interrupt-cells = <2>;
+ interrupt-parent = <&UIC0>;
+ interrupts = <28 0x84>;
+ };
+
+ conf-error-irq {
+ compatible = "ibm,conf-error-irq";
+ #interrupt-cells = <2>;
+ interrupt-parent = <&UIC1_4>;
+ interrupts = <11 0x84>;
+ };
+
+ mc-ue-irq {
+ compatible = "ibm,mc-ue-irq";
+ #interrupt-cells = <2>;
+ interrupt-parent = <&UIC0>;
+ interrupts = <10 0x84>;
+ };
+
+ reset-warning-irq {
+ compatible = "ibm,reset-warning-irq";
+ #interrupt-cells = <2>;
+ interrupt-parent = <&UIC0>;
+ interrupts = <17 0x84>;
+ };
+
+ MAL0: mcmal0 {
+ #interrupt-cells = <1>;
+ #address-cells = <0>;
+ #size-cells = <0>;
+ compatible = "ibm,mcmal";
+ dcr-reg = <0x80 0x80>;
+ num-tx-chans = <1>;
+ num-rx-chans = <1>;
+ interrupt-parent = <&MAL0>;
+ interrupts = <0 1 2 3 4>;
+ /* index interrupt-parent interrupt# type */
+ interrupt-map = </*TXEOB*/ 0 &UIC1_2 4 0x4
+ /*RXEOB*/ 1 &UIC1_2 3 0x4
+ /*SERR*/ 2 &UIC1_2 7 0x4
+ /*TXDE*/ 3 &UIC1_2 6 0x4
+ /*RXDE*/ 4 &UIC1_2 5 0x4>;
+ };
+
+ MAL1: mcmal1 {
+ #interrupt-cells = <1>;
+ #address-cells = <0>;
+ #size-cells = <0>;
+ compatible = "ibm,mcmal";
+ dcr-reg = <0x100 0x80>;
+ num-tx-chans = <1>;
+ num-rx-chans = <1>;
+ interrupt-parent = <&MAL1>;
+ interrupts = <0 1 2 3 4>;
+ /* index interrupt-parent interrupt# type */
+ interrupt-map = </*TXEOB*/ 0 &UIC1_2 12 0x4
+ /*RXEOB*/ 1 &UIC1_2 11 0x4
+ /*SERR*/ 2 &UIC1_2 15 0x4
+ /*TXDE*/ 3 &UIC1_2 14 0x4
+ /*RXDE*/ 4 &UIC1_2 13 0x4>;
+ };
+
+ mmc0: mmc@20c0000 {
+ compatible = "st,sdhci-stih407", "st,sdhci";
+ reg = <0x020c0000 0x20000>;
+ reg-names = "mmc";
+ interrupts = <21 0x4>;
+ interrupt-parent = <&UIC1_3>;
+ interrupt-names = "mmcirq";
+ pinctrl-names = "default";
+ pinctrl-0 = <>;
+ clock-names = "mmc";
+ clocks = <&mmc_clk>;
+ bus-width = <4>;
+ non-removable;
+ sd-uhs-sdr50;
+ sd-uhs-sdr104;
+ sd-uhs-ddr50;
+ };
+
+ opb {
+ compatible = "ibm,opb";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges; // pass-thru to parent bus
+ clock-frequency = <83333334>;
+
+ EMAC0: ethernet@b0000000 {
+ linux,network-index = <0>;
+ device_type = "network";
+ compatible = "ibm,emac4sync";
+ has-inverted-stacr-oc;
+ interrupt-parent = <&UIC1_2>;
+ interrupts = <1 0x4 0 0x4>;
+ reg = <0xb0000000 0x100>;
+ local-mac-address = [000000000000]; /* Filled in by
+ cuboot */
+ mal-device = <&MAL0>;
+ mal-tx-channel = <0>;
+ mal-rx-channel = <0>;
+ cell-index = <0>;
+ max-frame-size = <1500>;
+ rx-fifo-size = <4096>;
+ tx-fifo-size = <4096>;
+ rx-fifo-size-gige = <16384>;
+ tx-fifo-size-gige = <8192>;
+ phy-address = <1>;
+ phy-mode = "rgmii";
+ phy-map = <00000003>;
+ rgmii-device = <&RGMII>;
+ rgmii-channel = <0>;
+ };
+
+ EMAC1: ethernet@b0000100 {
+ linux,network-index = <1>;
+ device_type = "network";
+ compatible = "ibm,emac4sync";
+ has-inverted-stacr-oc;
+ interrupt-parent = <&UIC1_2>;
+ interrupts = <9 0x4 8 0x4>;
+ reg = <0xb0000100 0x100>;
+ local-mac-address = [000000000000]; /* Filled in by
+ cuboot */
+ mal-device = <&MAL1>;
+ mal-tx-channel = <0>;
+ mal-rx-channel = <0>;
+ cell-index = <1>;
+ max-frame-size = <1500>;
+ rx-fifo-size = <4096>;
+ tx-fifo-size = <4096>;
+ rx-fifo-size-gige = <16384>;
+ tx-fifo-size-gige = <8192>;
+ phy-address = <2>;
+ phy-mode = "rgmii";
+ phy-map = <00000003>;
+ rgmii-device = <&RGMII>;
+ rgmii-channel = <1>;
+ };
+
+ RGMII: rgmii@b0000600 {
+ compatible = "ibm,rgmii";
+ has-mdio;
+ reg = <0xb0000600 0x8>;
+ };
+
+ UART0: serial@b0020000 {
+ device_type = "serial";
+ compatible = "ns16550";
+ reg = <0xb0020000 0x8>;
+ virtual-reg = <0xb0020000>;
+ clock-frequency = <20833333>;
+ current-speed = <115200>;
+ interrupt-parent = <&UIC0>;
+ interrupts = <31 0x4>;
+ };
+ };
+
+ OHCI1: ohci@2040000 {
+ compatible = "ohci-le";
+ reg = <0x02040000 0xa0>;
+ interrupt-parent = <&UIC1_3>;
+ interrupts = <28 0x8 29 0x8>;
+ };
+
+ OHCI2: ohci@2080000 {
+ compatible = "ohci-le";
+ reg = <0x02080000 0xa0>;
+ interrupt-parent = <&UIC1_3>;
+ interrupts = <30 0x8 31 0x8>;
+ };
+
+ EHCI: ehci@2000000 {
+ compatible = "usb-ehci";
+ reg = <0x02000000 0xa4>;
+ interrupt-parent = <&UIC1_3>;
+ interrupts = <23 0x4>;
+ };
+
+ };
+
+ chosen {
+ stdout-path = "/plb/opb/serial@b0020000";
+ bootargs = "console=ttyS0,115200 rw log_buf_len=32768 debug";
+ };
+};
diff --git a/arch/powerpc/boot/dts/gamecube.dts b/arch/powerpc/boot/dts/gamecube.dts
index ef3be0e58b02..a564cb7cb1e3 100644
--- a/arch/powerpc/boot/dts/gamecube.dts
+++ b/arch/powerpc/boot/dts/gamecube.dts
@@ -1,15 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* arch/powerpc/boot/dts/gamecube.dts
*
* Nintendo GameCube platform device tree source
* Copyright (C) 2007-2009 The GameCube Linux Team
* Copyright (C) 2007,2008,2009 Albert Herranz
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
*/
/dts-v1/;
@@ -54,13 +49,13 @@
ranges = <0x0c000000 0x0c000000 0x00010000>;
interrupt-parent = <&PIC>;
- video@0c002000 {
+ video@c002000 {
compatible = "nintendo,flipper-vi";
reg = <0x0c002000 0x100>;
interrupts = <8>;
};
- processor-interface@0c003000 {
+ processor-interface@c003000 {
compatible = "nintendo,flipper-pi";
reg = <0x0c003000 0x100>;
@@ -71,7 +66,7 @@
};
};
- dsp@0c005000 {
+ dsp@c005000 {
#address-cells = <1>;
#size-cells = <1>;
compatible = "nintendo,flipper-dsp";
@@ -84,26 +79,26 @@
};
};
- disk@0c006000 {
+ disk@c006000 {
compatible = "nintendo,flipper-di";
reg = <0x0c006000 0x40>;
interrupts = <2>;
};
- audio@0c006c00 {
+ audio@c006c00 {
compatible = "nintendo,flipper-ai";
reg = <0x0c006c00 0x20>;
interrupts = <6>;
};
- gamepad-controller@0c006400 {
+ gamepad-controller@c006400 {
compatible = "nintendo,flipper-si";
reg = <0x0c006400 0x100>;
interrupts = <3>;
};
/* External Interface bus */
- exi@0c006800 {
+ exi@c006800 {
compatible = "nintendo,flipper-exi";
reg = <0x0c006800 0x40>;
virtual-reg = <0x0c006800>;
diff --git a/arch/powerpc/boot/dts/gef_ppc9a.dts b/arch/powerpc/boot/dts/gef_ppc9a.dts
deleted file mode 100644
index 83eb0fda2666..000000000000
--- a/arch/powerpc/boot/dts/gef_ppc9a.dts
+++ /dev/null
@@ -1,425 +0,0 @@
-/*
- * GE PPC9A Device Tree Source
- *
- * Copyright 2008 GE Intelligent Platforms Embedded Systems, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
- * Based on: SBS CM6 Device Tree Source
- * Copyright 2007 SBS Technologies GmbH & Co. KG
- * And: mpc8641_hpcn.dts (MPC8641 HPCN Device Tree Source)
- * Copyright 2006 Freescale Semiconductor Inc.
- */
-
-/*
- * Compiled with dtc -I dts -O dtb -o gef_ppc9a.dtb gef_ppc9a.dts
- */
-
-/dts-v1/;
-
-/ {
- model = "GEF_PPC9A";
- compatible = "gef,ppc9a";
- #address-cells = <1>;
- #size-cells = <1>;
-
- aliases {
- ethernet0 = &enet0;
- ethernet1 = &enet1;
- serial0 = &serial0;
- serial1 = &serial1;
- pci0 = &pci0;
- };
-
- cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- PowerPC,8641@0 {
- device_type = "cpu";
- reg = <0>;
- d-cache-line-size = <32>; // 32 bytes
- i-cache-line-size = <32>; // 32 bytes
- d-cache-size = <32768>; // L1, 32K
- i-cache-size = <32768>; // L1, 32K
- timebase-frequency = <0>; // From uboot
- bus-frequency = <0>; // From uboot
- clock-frequency = <0>; // From uboot
- };
- PowerPC,8641@1 {
- device_type = "cpu";
- reg = <1>;
- d-cache-line-size = <32>; // 32 bytes
- i-cache-line-size = <32>; // 32 bytes
- d-cache-size = <32768>; // L1, 32K
- i-cache-size = <32768>; // L1, 32K
- timebase-frequency = <0>; // From uboot
- bus-frequency = <0>; // From uboot
- clock-frequency = <0>; // From uboot
- };
- };
-
- memory {
- device_type = "memory";
- reg = <0x0 0x40000000>; // set by uboot
- };
-
- localbus@fef05000 {
- #address-cells = <2>;
- #size-cells = <1>;
- compatible = "fsl,mpc8641-localbus", "simple-bus";
- reg = <0xfef05000 0x1000>;
- interrupts = <19 2>;
- interrupt-parent = <&mpic>;
-
- ranges = <0 0 0xff000000 0x01000000 // 16MB Boot flash
- 1 0 0xe8000000 0x08000000 // Paged Flash 0
- 2 0 0xe0000000 0x08000000 // Paged Flash 1
- 3 0 0xfc100000 0x00020000 // NVRAM
- 4 0 0xfc000000 0x00008000 // FPGA
- 5 0 0xfc008000 0x00008000 // AFIX FPGA
- 6 0 0xfd000000 0x00800000 // IO FPGA (8-bit)
- 7 0 0xfd800000 0x00800000>; // IO FPGA (32-bit)
-
- /* flash@0,0 is a mirror of part of the memory in flash@1,0
- flash@0,0 {
- compatible = "gef,ppc9a-firmware-mirror", "cfi-flash";
- reg = <0x0 0x0 0x1000000>;
- bank-width = <4>;
- device-width = <2>;
- #address-cells = <1>;
- #size-cells = <1>;
- partition@0 {
- label = "firmware";
- reg = <0x0 0x1000000>;
- read-only;
- };
- };
- */
-
- flash@1,0 {
- compatible = "gef,ppc9a-paged-flash", "cfi-flash";
- reg = <0x1 0x0 0x8000000>;
- bank-width = <4>;
- device-width = <2>;
- #address-cells = <1>;
- #size-cells = <1>;
- partition@0 {
- label = "user";
- reg = <0x0 0x7800000>;
- };
- partition@7800000 {
- label = "firmware";
- reg = <0x7800000 0x800000>;
- read-only;
- };
- };
-
- nvram@3,0 {
- device_type = "nvram";
- compatible = "simtek,stk14ca8";
- reg = <0x3 0x0 0x20000>;
- };
-
- fpga@4,0 {
- compatible = "gef,ppc9a-fpga-regs";
- reg = <0x4 0x0 0x40>;
- };
-
- wdt@4,2000 {
- compatible = "gef,ppc9a-fpga-wdt", "gef,fpga-wdt-1.00",
- "gef,fpga-wdt";
- reg = <0x4 0x2000 0x8>;
- interrupts = <0x1a 0x4>;
- interrupt-parent = <&gef_pic>;
- };
- /* Second watchdog available, driver currently supports one.
- wdt@4,2010 {
- compatible = "gef,ppc9a-fpga-wdt", "gef,fpga-wdt-1.00",
- "gef,fpga-wdt";
- reg = <0x4 0x2010 0x8>;
- interrupts = <0x1b 0x4>;
- interrupt-parent = <&gef_pic>;
- };
- */
- gef_pic: pic@4,4000 {
- #interrupt-cells = <1>;
- interrupt-controller;
- compatible = "gef,ppc9a-fpga-pic", "gef,fpga-pic-1.00";
- reg = <0x4 0x4000 0x20>;
- interrupts = <0x8
- 0x9>;
- interrupt-parent = <&mpic>;
-
- };
- gef_gpio: gpio@7,14000 {
- #gpio-cells = <2>;
- compatible = "gef,ppc9a-gpio", "gef,sbc610-gpio";
- reg = <0x7 0x14000 0x24>;
- gpio-controller;
- };
- };
-
- soc@fef00000 {
- #address-cells = <1>;
- #size-cells = <1>;
- #interrupt-cells = <2>;
- device_type = "soc";
- compatible = "fsl,mpc8641-soc", "simple-bus";
- ranges = <0x0 0xfef00000 0x00100000>;
- bus-frequency = <33333333>;
-
- mcm-law@0 {
- compatible = "fsl,mcm-law";
- reg = <0x0 0x1000>;
- fsl,num-laws = <10>;
- };
-
- mcm@1000 {
- compatible = "fsl,mpc8641-mcm", "fsl,mcm";
- reg = <0x1000 0x1000>;
- interrupts = <17 2>;
- interrupt-parent = <&mpic>;
- };
-
- i2c1: i2c@3000 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl-i2c";
- reg = <0x3000 0x100>;
- interrupts = <0x2b 0x2>;
- interrupt-parent = <&mpic>;
- dfsrr;
-
- hwmon@48 {
- compatible = "national,lm92";
- reg = <0x48>;
- };
-
- hwmon@4c {
- compatible = "adi,adt7461";
- reg = <0x4c>;
- };
-
- rtc@51 {
- compatible = "epson,rx8581";
- reg = <0x00000051>;
- };
-
- eti@6b {
- compatible = "dallas,ds1682";
- reg = <0x6b>;
- };
- };
-
- i2c2: i2c@3100 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl-i2c";
- reg = <0x3100 0x100>;
- interrupts = <0x2b 0x2>;
- interrupt-parent = <&mpic>;
- dfsrr;
- };
-
- dma@21300 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "fsl,mpc8641-dma", "fsl,eloplus-dma";
- reg = <0x21300 0x4>;
- ranges = <0x0 0x21100 0x200>;
- cell-index = <0>;
- dma-channel@0 {
- compatible = "fsl,mpc8641-dma-channel",
- "fsl,eloplus-dma-channel";
- reg = <0x0 0x80>;
- cell-index = <0>;
- interrupt-parent = <&mpic>;
- interrupts = <20 2>;
- };
- dma-channel@80 {
- compatible = "fsl,mpc8641-dma-channel",
- "fsl,eloplus-dma-channel";
- reg = <0x80 0x80>;
- cell-index = <1>;
- interrupt-parent = <&mpic>;
- interrupts = <21 2>;
- };
- dma-channel@100 {
- compatible = "fsl,mpc8641-dma-channel",
- "fsl,eloplus-dma-channel";
- reg = <0x100 0x80>;
- cell-index = <2>;
- interrupt-parent = <&mpic>;
- interrupts = <22 2>;
- };
- dma-channel@180 {
- compatible = "fsl,mpc8641-dma-channel",
- "fsl,eloplus-dma-channel";
- reg = <0x180 0x80>;
- cell-index = <3>;
- interrupt-parent = <&mpic>;
- interrupts = <23 2>;
- };
- };
-
- enet0: ethernet@24000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <0>;
- device_type = "network";
- model = "TSEC";
- compatible = "gianfar";
- reg = <0x24000 0x1000>;
- ranges = <0x0 0x24000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <29 2 30 2 34 2>;
- interrupt-parent = <&mpic>;
- tbi-handle = <&tbi0>;
- phy-handle = <&phy0>;
- phy-connection-type = "gmii";
-
- mdio@520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-mdio";
- reg = <0x520 0x20>;
-
- phy0: ethernet-phy@0 {
- interrupt-parent = <&gef_pic>;
- interrupts = <0x9 0x4>;
- reg = <1>;
- };
- phy2: ethernet-phy@2 {
- interrupt-parent = <&gef_pic>;
- interrupts = <0x8 0x4>;
- reg = <3>;
- };
- tbi0: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
- };
-
- enet1: ethernet@26000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <2>;
- device_type = "network";
- model = "TSEC";
- compatible = "gianfar";
- reg = <0x26000 0x1000>;
- ranges = <0x0 0x26000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <31 2 32 2 33 2>;
- interrupt-parent = <&mpic>;
- tbi-handle = <&tbi2>;
- phy-handle = <&phy2>;
- phy-connection-type = "gmii";
-
- mdio@520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-tbi";
- reg = <0x520 0x20>;
-
- tbi2: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
- };
-
- serial0: serial@4500 {
- cell-index = <0>;
- device_type = "serial";
- compatible = "fsl,ns16550", "ns16550";
- reg = <0x4500 0x100>;
- clock-frequency = <0>;
- interrupts = <0x2a 0x2>;
- interrupt-parent = <&mpic>;
- };
-
- serial1: serial@4600 {
- cell-index = <1>;
- device_type = "serial";
- compatible = "fsl,ns16550", "ns16550";
- reg = <0x4600 0x100>;
- clock-frequency = <0>;
- interrupts = <0x1c 0x2>;
- interrupt-parent = <&mpic>;
- };
-
- mpic: pic@40000 {
- clock-frequency = <0>;
- interrupt-controller;
- #address-cells = <0>;
- #interrupt-cells = <2>;
- reg = <0x40000 0x40000>;
- compatible = "chrp,open-pic";
- device_type = "open-pic";
- };
-
- msi@41600 {
- compatible = "fsl,mpc8641-msi", "fsl,mpic-msi";
- reg = <0x41600 0x80>;
- msi-available-ranges = <0 0x100>;
- interrupts = <
- 0xe0 0
- 0xe1 0
- 0xe2 0
- 0xe3 0
- 0xe4 0
- 0xe5 0
- 0xe6 0
- 0xe7 0>;
- interrupt-parent = <&mpic>;
- };
-
- global-utilities@e0000 {
- compatible = "fsl,mpc8641-guts";
- reg = <0xe0000 0x1000>;
- fsl,has-rstcr;
- };
- };
-
- pci0: pcie@fef08000 {
- compatible = "fsl,mpc8641-pcie";
- device_type = "pci";
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- reg = <0xfef08000 0x1000>;
- bus-range = <0x0 0xff>;
- ranges = <0x02000000 0x0 0x80000000 0x80000000 0x0 0x40000000
- 0x01000000 0x0 0x00000000 0xfe000000 0x0 0x00400000>;
- clock-frequency = <33333333>;
- interrupt-parent = <&mpic>;
- interrupts = <0x18 0x2>;
- interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
- interrupt-map = <
- 0x0000 0x0 0x0 0x1 &mpic 0x0 0x1
- 0x0000 0x0 0x0 0x2 &mpic 0x1 0x1
- 0x0000 0x0 0x0 0x3 &mpic 0x2 0x1
- 0x0000 0x0 0x0 0x4 &mpic 0x3 0x1
- >;
-
- pcie@0 {
- reg = <0 0 0 0 0>;
- #size-cells = <2>;
- #address-cells = <3>;
- device_type = "pci";
- ranges = <0x02000000 0x0 0x80000000
- 0x02000000 0x0 0x80000000
- 0x0 0x40000000
-
- 0x01000000 0x0 0x00000000
- 0x01000000 0x0 0x00000000
- 0x0 0x00400000>;
- };
- };
-};
diff --git a/arch/powerpc/boot/dts/gef_sbc310.dts b/arch/powerpc/boot/dts/gef_sbc310.dts
deleted file mode 100644
index d426dd3de9ef..000000000000
--- a/arch/powerpc/boot/dts/gef_sbc310.dts
+++ /dev/null
@@ -1,459 +0,0 @@
-/*
- * GE SBC310 Device Tree Source
- *
- * Copyright 2008 GE Intelligent Platforms Embedded Systems, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
- * Based on: SBS CM6 Device Tree Source
- * Copyright 2007 SBS Technologies GmbH & Co. KG
- * And: mpc8641_hpcn.dts (MPC8641 HPCN Device Tree Source)
- * Copyright 2006 Freescale Semiconductor Inc.
- */
-
-/*
- * Compiled with dtc -I dts -O dtb -o gef_sbc310.dtb gef_sbc310.dts
- */
-
-/dts-v1/;
-
-/ {
- model = "GEF_SBC310";
- compatible = "gef,sbc310";
- #address-cells = <1>;
- #size-cells = <1>;
-
- aliases {
- ethernet0 = &enet0;
- ethernet1 = &enet1;
- serial0 = &serial0;
- serial1 = &serial1;
- pci0 = &pci0;
- pci1 = &pci1;
- };
-
- cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- PowerPC,8641@0 {
- device_type = "cpu";
- reg = <0>;
- d-cache-line-size = <32>; // 32 bytes
- i-cache-line-size = <32>; // 32 bytes
- d-cache-size = <32768>; // L1, 32K
- i-cache-size = <32768>; // L1, 32K
- timebase-frequency = <0>; // From uboot
- bus-frequency = <0>; // From uboot
- clock-frequency = <0>; // From uboot
- };
- PowerPC,8641@1 {
- device_type = "cpu";
- reg = <1>;
- d-cache-line-size = <32>; // 32 bytes
- i-cache-line-size = <32>; // 32 bytes
- d-cache-size = <32768>; // L1, 32K
- i-cache-size = <32768>; // L1, 32K
- timebase-frequency = <0>; // From uboot
- bus-frequency = <0>; // From uboot
- clock-frequency = <0>; // From uboot
- };
- };
-
- memory {
- device_type = "memory";
- reg = <0x0 0x40000000>; // set by uboot
- };
-
- localbus@fef05000 {
- #address-cells = <2>;
- #size-cells = <1>;
- compatible = "fsl,mpc8641-localbus", "simple-bus";
- reg = <0xfef05000 0x1000>;
- interrupts = <19 2>;
- interrupt-parent = <&mpic>;
-
- ranges = <0 0 0xff000000 0x01000000 // 16MB Boot flash
- 1 0 0xe0000000 0x08000000 // Paged Flash 0
- 2 0 0xe8000000 0x08000000 // Paged Flash 1
- 3 0 0xfc100000 0x00020000 // NVRAM
- 4 0 0xfc000000 0x00010000>; // FPGA
-
- /* flash@0,0 is a mirror of part of the memory in flash@1,0
- flash@0,0 {
- compatible = "gef,sbc310-firmware-mirror", "cfi-flash";
- reg = <0x0 0x0 0x01000000>;
- bank-width = <2>;
- device-width = <2>;
- #address-cells = <1>;
- #size-cells = <1>;
- partition@0 {
- label = "firmware";
- reg = <0x0 0x01000000>;
- read-only;
- };
- };
- */
-
- flash@1,0 {
- compatible = "gef,sbc310-paged-flash", "cfi-flash";
- reg = <0x1 0x0 0x8000000>;
- bank-width = <2>;
- device-width = <2>;
- #address-cells = <1>;
- #size-cells = <1>;
- partition@0 {
- label = "user";
- reg = <0x0 0x7800000>;
- };
- partition@7800000 {
- label = "firmware";
- reg = <0x7800000 0x800000>;
- read-only;
- };
- };
-
- nvram@3,0 {
- device_type = "nvram";
- compatible = "simtek,stk14ca8";
- reg = <0x3 0x0 0x20000>;
- };
-
- fpga@4,0 {
- compatible = "gef,fpga-regs";
- reg = <0x4 0x0 0x40>;
- };
-
- wdt@4,2000 {
- compatible = "gef,sbc310-fpga-wdt", "gef,fpga-wdt-1.00",
- "gef,fpga-wdt";
- reg = <0x4 0x2000 0x8>;
- interrupts = <0x1a 0x4>;
- interrupt-parent = <&gef_pic>;
- };
-/*
- wdt@4,2010 {
- compatible = "gef,sbc310-fpga-wdt", "gef,fpga-wdt-1.00",
- "gef,fpga-wdt";
- reg = <0x4 0x2010 0x8>;
- interrupts = <0x1b 0x4>;
- interrupt-parent = <&gef_pic>;
- };
-*/
- gef_pic: pic@4,4000 {
- #interrupt-cells = <1>;
- interrupt-controller;
- compatible = "gef,sbc310-fpga-pic", "gef,fpga-pic";
- reg = <0x4 0x4000 0x20>;
- interrupts = <0x8
- 0x9>;
- interrupt-parent = <&mpic>;
-
- };
- gef_gpio: gpio@4,8000 {
- #gpio-cells = <2>;
- compatible = "gef,sbc310-gpio";
- reg = <0x4 0x8000 0x24>;
- gpio-controller;
- };
- };
-
- soc@fef00000 {
- #address-cells = <1>;
- #size-cells = <1>;
- #interrupt-cells = <2>;
- device_type = "soc";
- compatible = "fsl,mpc8641-soc", "simple-bus";
- ranges = <0x0 0xfef00000 0x00100000>;
- bus-frequency = <33333333>;
-
- mcm-law@0 {
- compatible = "fsl,mcm-law";
- reg = <0x0 0x1000>;
- fsl,num-laws = <10>;
- };
-
- mcm@1000 {
- compatible = "fsl,mpc8641-mcm", "fsl,mcm";
- reg = <0x1000 0x1000>;
- interrupts = <17 2>;
- interrupt-parent = <&mpic>;
- };
-
- i2c1: i2c@3000 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl-i2c";
- reg = <0x3000 0x100>;
- interrupts = <0x2b 0x2>;
- interrupt-parent = <&mpic>;
- dfsrr;
-
- rtc@51 {
- compatible = "epson,rx8581";
- reg = <0x00000051>;
- };
- };
-
- i2c2: i2c@3100 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl-i2c";
- reg = <0x3100 0x100>;
- interrupts = <0x2b 0x2>;
- interrupt-parent = <&mpic>;
- dfsrr;
-
- hwmon@48 {
- compatible = "national,lm92";
- reg = <0x48>;
- };
-
- hwmon@4c {
- compatible = "adi,adt7461";
- reg = <0x4c>;
- };
-
- eti@6b {
- compatible = "dallas,ds1682";
- reg = <0x6b>;
- };
- };
-
- dma@21300 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "fsl,mpc8641-dma", "fsl,eloplus-dma";
- reg = <0x21300 0x4>;
- ranges = <0x0 0x21100 0x200>;
- cell-index = <0>;
- dma-channel@0 {
- compatible = "fsl,mpc8641-dma-channel",
- "fsl,eloplus-dma-channel";
- reg = <0x0 0x80>;
- cell-index = <0>;
- interrupt-parent = <&mpic>;
- interrupts = <20 2>;
- };
- dma-channel@80 {
- compatible = "fsl,mpc8641-dma-channel",
- "fsl,eloplus-dma-channel";
- reg = <0x80 0x80>;
- cell-index = <1>;
- interrupt-parent = <&mpic>;
- interrupts = <21 2>;
- };
- dma-channel@100 {
- compatible = "fsl,mpc8641-dma-channel",
- "fsl,eloplus-dma-channel";
- reg = <0x100 0x80>;
- cell-index = <2>;
- interrupt-parent = <&mpic>;
- interrupts = <22 2>;
- };
- dma-channel@180 {
- compatible = "fsl,mpc8641-dma-channel",
- "fsl,eloplus-dma-channel";
- reg = <0x180 0x80>;
- cell-index = <3>;
- interrupt-parent = <&mpic>;
- interrupts = <23 2>;
- };
- };
-
- enet0: ethernet@24000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <0>;
- device_type = "network";
- model = "TSEC";
- compatible = "gianfar";
- reg = <0x24000 0x1000>;
- ranges = <0x0 0x24000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <29 2 30 2 34 2>;
- interrupt-parent = <&mpic>;
- tbi-handle = <&tbi0>;
- phy-handle = <&phy0>;
- phy-connection-type = "gmii";
-
- mdio@520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-mdio";
- reg = <0x520 0x20>;
-
- phy0: ethernet-phy@0 {
- interrupt-parent = <&gef_pic>;
- interrupts = <0x9 0x4>;
- reg = <1>;
- };
- phy2: ethernet-phy@2 {
- interrupt-parent = <&gef_pic>;
- interrupts = <0x8 0x4>;
- reg = <3>;
- };
- tbi0: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
- };
-
- enet1: ethernet@26000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <2>;
- device_type = "network";
- model = "TSEC";
- compatible = "gianfar";
- reg = <0x26000 0x1000>;
- ranges = <0x0 0x26000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <31 2 32 2 33 2>;
- interrupt-parent = <&mpic>;
- tbi-handle = <&tbi2>;
- phy-handle = <&phy2>;
- phy-connection-type = "gmii";
-
- mdio@520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-tbi";
- reg = <0x520 0x20>;
-
- tbi2: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
- };
-
- serial0: serial@4500 {
- cell-index = <0>;
- device_type = "serial";
- compatible = "fsl,ns16550", "ns16550";
- reg = <0x4500 0x100>;
- clock-frequency = <0>;
- interrupts = <0x2a 0x2>;
- interrupt-parent = <&mpic>;
- };
-
- serial1: serial@4600 {
- cell-index = <1>;
- device_type = "serial";
- compatible = "fsl,ns16550", "ns16550";
- reg = <0x4600 0x100>;
- clock-frequency = <0>;
- interrupts = <0x1c 0x2>;
- interrupt-parent = <&mpic>;
- };
-
- mpic: pic@40000 {
- clock-frequency = <0>;
- interrupt-controller;
- #address-cells = <0>;
- #interrupt-cells = <2>;
- reg = <0x40000 0x40000>;
- compatible = "chrp,open-pic";
- device_type = "open-pic";
- };
-
- msi@41600 {
- compatible = "fsl,mpc8641-msi", "fsl,mpic-msi";
- reg = <0x41600 0x80>;
- msi-available-ranges = <0 0x100>;
- interrupts = <
- 0xe0 0
- 0xe1 0
- 0xe2 0
- 0xe3 0
- 0xe4 0
- 0xe5 0
- 0xe6 0
- 0xe7 0>;
- interrupt-parent = <&mpic>;
- };
-
- global-utilities@e0000 {
- compatible = "fsl,mpc8641-guts";
- reg = <0xe0000 0x1000>;
- fsl,has-rstcr;
- };
- };
-
- pci0: pcie@fef08000 {
- compatible = "fsl,mpc8641-pcie";
- device_type = "pci";
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- reg = <0xfef08000 0x1000>;
- bus-range = <0x0 0xff>;
- ranges = <0x02000000 0x0 0x80000000 0x80000000 0x0 0x40000000
- 0x01000000 0x0 0x00000000 0xfe000000 0x0 0x00400000>;
- clock-frequency = <33333333>;
- interrupt-parent = <&mpic>;
- interrupts = <0x18 0x2>;
- interrupt-map-mask = <0xff00 0x0 0x0 0x7>;
- interrupt-map = <
- 0x0000 0x0 0x0 0x1 &mpic 0x0 0x2
- 0x0000 0x0 0x0 0x2 &mpic 0x1 0x2
- 0x0000 0x0 0x0 0x3 &mpic 0x2 0x2
- 0x0000 0x0 0x0 0x4 &mpic 0x3 0x2
- >;
-
- pcie@0 {
- reg = <0 0 0 0 0>;
- #size-cells = <2>;
- #address-cells = <3>;
- device_type = "pci";
- ranges = <0x02000000 0x0 0x80000000
- 0x02000000 0x0 0x80000000
- 0x0 0x40000000
-
- 0x01000000 0x0 0x00000000
- 0x01000000 0x0 0x00000000
- 0x0 0x00400000>;
- };
- };
-
- pci1: pcie@fef09000 {
- compatible = "fsl,mpc8641-pcie";
- device_type = "pci";
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- reg = <0xfef09000 0x1000>;
- bus-range = <0x0 0xff>;
- ranges = <0x02000000 0x0 0xc0000000 0xc0000000 0x0 0x20000000
- 0x01000000 0x0 0x00000000 0xfe400000 0x0 0x00400000>;
- clock-frequency = <33333333>;
- interrupt-parent = <&mpic>;
- interrupts = <0x19 0x2>;
- interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
- interrupt-map = <
- 0x0000 0x0 0x0 0x1 &mpic 0x4 0x2
- 0x0000 0x0 0x0 0x2 &mpic 0x5 0x2
- 0x0000 0x0 0x0 0x3 &mpic 0x6 0x2
- 0x0000 0x0 0x0 0x4 &mpic 0x7 0x2
- >;
-
- pcie@0 {
- reg = <0 0 0 0 0>;
- #size-cells = <2>;
- #address-cells = <3>;
- device_type = "pci";
- ranges = <0x02000000 0x0 0xc0000000
- 0x02000000 0x0 0xc0000000
- 0x0 0x20000000
-
- 0x01000000 0x0 0x00000000
- 0x01000000 0x0 0x00000000
- 0x0 0x00400000>;
- };
- };
-};
diff --git a/arch/powerpc/boot/dts/gef_sbc610.dts b/arch/powerpc/boot/dts/gef_sbc610.dts
deleted file mode 100644
index 5db3399b76b7..000000000000
--- a/arch/powerpc/boot/dts/gef_sbc610.dts
+++ /dev/null
@@ -1,423 +0,0 @@
-/*
- * GE SBC610 Device Tree Source
- *
- * Copyright 2008 GE Intelligent Platforms Embedded Systems, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
- * Based on: SBS CM6 Device Tree Source
- * Copyright 2007 SBS Technologies GmbH & Co. KG
- * And: mpc8641_hpcn.dts (MPC8641 HPCN Device Tree Source)
- * Copyright 2006 Freescale Semiconductor Inc.
- */
-
-/*
- * Compiled with dtc -I dts -O dtb -o gef_sbc610.dtb gef_sbc610.dts
- */
-
-/dts-v1/;
-
-/ {
- model = "GEF_SBC610";
- compatible = "gef,sbc610";
- #address-cells = <1>;
- #size-cells = <1>;
-
- aliases {
- ethernet0 = &enet0;
- ethernet1 = &enet1;
- serial0 = &serial0;
- serial1 = &serial1;
- pci0 = &pci0;
- };
-
- cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- PowerPC,8641@0 {
- device_type = "cpu";
- reg = <0>;
- d-cache-line-size = <32>; // 32 bytes
- i-cache-line-size = <32>; // 32 bytes
- d-cache-size = <32768>; // L1, 32K
- i-cache-size = <32768>; // L1, 32K
- timebase-frequency = <0>; // From uboot
- bus-frequency = <0>; // From uboot
- clock-frequency = <0>; // From uboot
- };
- PowerPC,8641@1 {
- device_type = "cpu";
- reg = <1>;
- d-cache-line-size = <32>; // 32 bytes
- i-cache-line-size = <32>; // 32 bytes
- d-cache-size = <32768>; // L1, 32K
- i-cache-size = <32768>; // L1, 32K
- timebase-frequency = <0>; // From uboot
- bus-frequency = <0>; // From uboot
- clock-frequency = <0>; // From uboot
- };
- };
-
- memory {
- device_type = "memory";
- reg = <0x0 0x40000000>; // set by uboot
- };
-
- localbus@fef05000 {
- #address-cells = <2>;
- #size-cells = <1>;
- compatible = "fsl,mpc8641-localbus", "simple-bus";
- reg = <0xfef05000 0x1000>;
- interrupts = <19 2>;
- interrupt-parent = <&mpic>;
-
- ranges = <0 0 0xff000000 0x01000000 // 16MB Boot flash
- 1 0 0xe8000000 0x08000000 // Paged Flash 0
- 2 0 0xe0000000 0x08000000 // Paged Flash 1
- 3 0 0xfc100000 0x00020000 // NVRAM
- 4 0 0xfc000000 0x00008000 // FPGA
- 5 0 0xfc008000 0x00008000 // AFIX FPGA
- 6 0 0xfd000000 0x00800000 // IO FPGA (8-bit)
- 7 0 0xfd800000 0x00800000>; // IO FPGA (32-bit)
-
- /* flash@0,0 is a mirror of part of the memory in flash@1,0
- flash@0,0 {
- compatible = "gef,sbc610-firmware-mirror", "cfi-flash";
- reg = <0x0 0x0 0x1000000>;
- bank-width = <4>;
- device-width = <2>;
- #address-cells = <1>;
- #size-cells = <1>;
- partition@0 {
- label = "firmware";
- reg = <0x0 0x1000000>;
- read-only;
- };
- };
- */
-
- flash@1,0 {
- compatible = "gef,sbc610-paged-flash", "cfi-flash";
- reg = <0x1 0x0 0x8000000>;
- bank-width = <4>;
- device-width = <2>;
- #address-cells = <1>;
- #size-cells = <1>;
- partition@0 {
- label = "user";
- reg = <0x0 0x7800000>;
- };
- partition@7800000 {
- label = "firmware";
- reg = <0x7800000 0x800000>;
- read-only;
- };
- };
-
- nvram@3,0 {
- device_type = "nvram";
- compatible = "simtek,stk14ca8";
- reg = <0x3 0x0 0x20000>;
- };
-
- fpga@4,0 {
- compatible = "gef,fpga-regs";
- reg = <0x4 0x0 0x40>;
- };
-
- wdt@4,2000 {
- compatible = "gef,fpga-wdt";
- reg = <0x4 0x2000 0x8>;
- interrupts = <0x1a 0x4>;
- interrupt-parent = <&gef_pic>;
- };
- /* Second watchdog available, driver currently supports one.
- wdt@4,2010 {
- compatible = "gef,fpga-wdt";
- reg = <0x4 0x2010 0x8>;
- interrupts = <0x1b 0x4>;
- interrupt-parent = <&gef_pic>;
- };
- */
- gef_pic: pic@4,4000 {
- #interrupt-cells = <1>;
- interrupt-controller;
- compatible = "gef,fpga-pic";
- reg = <0x4 0x4000 0x20>;
- interrupts = <0x8
- 0x9>;
- interrupt-parent = <&mpic>;
-
- };
- gef_gpio: gpio@7,14000 {
- #gpio-cells = <2>;
- compatible = "gef,sbc610-gpio";
- reg = <0x7 0x14000 0x24>;
- gpio-controller;
- };
- };
-
- soc@fef00000 {
- #address-cells = <1>;
- #size-cells = <1>;
- #interrupt-cells = <2>;
- device_type = "soc";
- compatible = "simple-bus";
- ranges = <0x0 0xfef00000 0x00100000>;
- bus-frequency = <33333333>;
-
- mcm-law@0 {
- compatible = "fsl,mcm-law";
- reg = <0x0 0x1000>;
- fsl,num-laws = <10>;
- };
-
- mcm@1000 {
- compatible = "fsl,mpc8641-mcm", "fsl,mcm";
- reg = <0x1000 0x1000>;
- interrupts = <17 2>;
- interrupt-parent = <&mpic>;
- };
-
- i2c1: i2c@3000 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl-i2c";
- reg = <0x3000 0x100>;
- interrupts = <0x2b 0x2>;
- interrupt-parent = <&mpic>;
- dfsrr;
-
- hwmon@48 {
- compatible = "national,lm92";
- reg = <0x48>;
- };
-
- hwmon@4c {
- compatible = "adi,adt7461";
- reg = <0x4c>;
- };
-
- rtc@51 {
- compatible = "epson,rx8581";
- reg = <0x00000051>;
- };
-
- eti@6b {
- compatible = "dallas,ds1682";
- reg = <0x6b>;
- };
- };
-
- i2c2: i2c@3100 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl-i2c";
- reg = <0x3100 0x100>;
- interrupts = <0x2b 0x2>;
- interrupt-parent = <&mpic>;
- dfsrr;
- };
-
- dma@21300 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "fsl,mpc8641-dma", "fsl,eloplus-dma";
- reg = <0x21300 0x4>;
- ranges = <0x0 0x21100 0x200>;
- cell-index = <0>;
- dma-channel@0 {
- compatible = "fsl,mpc8641-dma-channel",
- "fsl,eloplus-dma-channel";
- reg = <0x0 0x80>;
- cell-index = <0>;
- interrupt-parent = <&mpic>;
- interrupts = <20 2>;
- };
- dma-channel@80 {
- compatible = "fsl,mpc8641-dma-channel",
- "fsl,eloplus-dma-channel";
- reg = <0x80 0x80>;
- cell-index = <1>;
- interrupt-parent = <&mpic>;
- interrupts = <21 2>;
- };
- dma-channel@100 {
- compatible = "fsl,mpc8641-dma-channel",
- "fsl,eloplus-dma-channel";
- reg = <0x100 0x80>;
- cell-index = <2>;
- interrupt-parent = <&mpic>;
- interrupts = <22 2>;
- };
- dma-channel@180 {
- compatible = "fsl,mpc8641-dma-channel",
- "fsl,eloplus-dma-channel";
- reg = <0x180 0x80>;
- cell-index = <3>;
- interrupt-parent = <&mpic>;
- interrupts = <23 2>;
- };
- };
-
- enet0: ethernet@24000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <0>;
- device_type = "network";
- model = "TSEC";
- compatible = "gianfar";
- reg = <0x24000 0x1000>;
- ranges = <0x0 0x24000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <29 2 30 2 34 2>;
- interrupt-parent = <&mpic>;
- tbi-handle = <&tbi0>;
- phy-handle = <&phy0>;
- phy-connection-type = "gmii";
-
- mdio@520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-mdio";
- reg = <0x520 0x20>;
-
- phy0: ethernet-phy@0 {
- interrupt-parent = <&gef_pic>;
- interrupts = <0x9 0x4>;
- reg = <1>;
- };
- phy2: ethernet-phy@2 {
- interrupt-parent = <&gef_pic>;
- interrupts = <0x8 0x4>;
- reg = <3>;
- };
- tbi0: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
- };
-
- enet1: ethernet@26000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <2>;
- device_type = "network";
- model = "TSEC";
- compatible = "gianfar";
- reg = <0x26000 0x1000>;
- ranges = <0x0 0x26000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <31 2 32 2 33 2>;
- interrupt-parent = <&mpic>;
- tbi-handle = <&tbi2>;
- phy-handle = <&phy2>;
- phy-connection-type = "gmii";
-
- mdio@520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-tbi";
- reg = <0x520 0x20>;
-
- tbi2: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
- };
-
- serial0: serial@4500 {
- cell-index = <0>;
- device_type = "serial";
- compatible = "fsl,ns16550", "ns16550";
- reg = <0x4500 0x100>;
- clock-frequency = <0>;
- interrupts = <0x2a 0x2>;
- interrupt-parent = <&mpic>;
- };
-
- serial1: serial@4600 {
- cell-index = <1>;
- device_type = "serial";
- compatible = "fsl,ns16550", "ns16550";
- reg = <0x4600 0x100>;
- clock-frequency = <0>;
- interrupts = <0x1c 0x2>;
- interrupt-parent = <&mpic>;
- };
-
- mpic: pic@40000 {
- clock-frequency = <0>;
- interrupt-controller;
- #address-cells = <0>;
- #interrupt-cells = <2>;
- reg = <0x40000 0x40000>;
- compatible = "chrp,open-pic";
- device_type = "open-pic";
- };
-
- msi@41600 {
- compatible = "fsl,mpc8641-msi", "fsl,mpic-msi";
- reg = <0x41600 0x80>;
- msi-available-ranges = <0 0x100>;
- interrupts = <
- 0xe0 0
- 0xe1 0
- 0xe2 0
- 0xe3 0
- 0xe4 0
- 0xe5 0
- 0xe6 0
- 0xe7 0>;
- interrupt-parent = <&mpic>;
- };
-
- global-utilities@e0000 {
- compatible = "fsl,mpc8641-guts";
- reg = <0xe0000 0x1000>;
- fsl,has-rstcr;
- };
- };
-
- pci0: pcie@fef08000 {
- compatible = "fsl,mpc8641-pcie";
- device_type = "pci";
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- reg = <0xfef08000 0x1000>;
- bus-range = <0x0 0xff>;
- ranges = <0x02000000 0x0 0x80000000 0x80000000 0x0 0x40000000
- 0x01000000 0x0 0x00000000 0xfe000000 0x0 0x00400000>;
- clock-frequency = <33333333>;
- interrupt-parent = <&mpic>;
- interrupts = <0x18 0x2>;
- interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
- interrupt-map = <
- 0x0000 0x0 0x0 0x1 &mpic 0x0 0x1
- 0x0000 0x0 0x0 0x2 &mpic 0x1 0x1
- 0x0000 0x0 0x0 0x3 &mpic 0x2 0x1
- 0x0000 0x0 0x0 0x4 &mpic 0x3 0x1
- >;
-
- pcie@0 {
- reg = <0 0 0 0 0>;
- #size-cells = <2>;
- #address-cells = <3>;
- device_type = "pci";
- ranges = <0x02000000 0x0 0x80000000
- 0x02000000 0x0 0x80000000
- 0x0 0x40000000
-
- 0x01000000 0x0 0x00000000
- 0x01000000 0x0 0x00000000
- 0x0 0x00400000>;
- };
- };
-};
diff --git a/arch/powerpc/boot/dts/glacier.dts b/arch/powerpc/boot/dts/glacier.dts
index 2000060386d7..e84ff1afb58c 100644
--- a/arch/powerpc/boot/dts/glacier.dts
+++ b/arch/powerpc/boot/dts/glacier.dts
@@ -287,7 +287,7 @@
#address-cells = <1>;
#size-cells = <0>;
rtc@68 {
- compatible = "stm,m41t80";
+ compatible = "st,m41t80";
reg = <0x68>;
interrupt-parent = <&UIC2>;
interrupts = <0x19 0x8>;
@@ -489,7 +489,7 @@
interrupt-map = < 0x0 0x0 0x0 0x0 &UIC1 0x0 0x8 >;
};
- PCIE0: pciex@d00000000 {
+ PCIE0: pcie@d00000000 {
device_type = "pci";
#interrupt-cells = <1>;
#size-cells = <2>;
@@ -531,7 +531,7 @@
0x0 0x0 0x0 0x4 &UIC3 0xf 0x4 /* swizzled int D */>;
};
- PCIE1: pciex@d20000000 {
+ PCIE1: pcie@d20000000 {
device_type = "pci";
#interrupt-cells = <1>;
#size-cells = <2>;
diff --git a/arch/powerpc/boot/dts/haleakala.dts b/arch/powerpc/boot/dts/haleakala.dts
deleted file mode 100644
index 2b256694eca6..000000000000
--- a/arch/powerpc/boot/dts/haleakala.dts
+++ /dev/null
@@ -1,281 +0,0 @@
-/*
- * Device Tree Source for AMCC Haleakala (405EXr)
- *
- * Copyright 2008 DENX Software Engineering, Stefan Roese <sr@denx.de>
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without
- * any warranty of any kind, whether express or implied.
- */
-
-/dts-v1/;
-
-/ {
- #address-cells = <1>;
- #size-cells = <1>;
- model = "amcc,haleakala";
- compatible = "amcc,haleakala", "amcc,kilauea";
- dcr-parent = <&{/cpus/cpu@0}>;
-
- aliases {
- ethernet0 = &EMAC0;
- serial0 = &UART0;
- serial1 = &UART1;
- };
-
- cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- cpu@0 {
- device_type = "cpu";
- model = "PowerPC,405EXr";
- reg = <0x00000000>;
- clock-frequency = <0>; /* Filled in by U-Boot */
- timebase-frequency = <0>; /* Filled in by U-Boot */
- i-cache-line-size = <32>;
- d-cache-line-size = <32>;
- i-cache-size = <16384>; /* 16 kB */
- d-cache-size = <16384>; /* 16 kB */
- dcr-controller;
- dcr-access-method = "native";
- };
- };
-
- memory {
- device_type = "memory";
- reg = <0x00000000 0x00000000>; /* Filled in by U-Boot */
- };
-
- UIC0: interrupt-controller {
- compatible = "ibm,uic-405exr", "ibm,uic";
- interrupt-controller;
- cell-index = <0>;
- dcr-reg = <0x0c0 0x009>;
- #address-cells = <0>;
- #size-cells = <0>;
- #interrupt-cells = <2>;
- };
-
- UIC1: interrupt-controller1 {
- compatible = "ibm,uic-405exr","ibm,uic";
- interrupt-controller;
- cell-index = <1>;
- dcr-reg = <0x0d0 0x009>;
- #address-cells = <0>;
- #size-cells = <0>;
- #interrupt-cells = <2>;
- interrupts = <0x1e 0x4 0x1f 0x4>; /* cascade */
- interrupt-parent = <&UIC0>;
- };
-
- UIC2: interrupt-controller2 {
- compatible = "ibm,uic-405exr","ibm,uic";
- interrupt-controller;
- cell-index = <2>;
- dcr-reg = <0x0e0 0x009>;
- #address-cells = <0>;
- #size-cells = <0>;
- #interrupt-cells = <2>;
- interrupts = <0x1c 0x4 0x1d 0x4>; /* cascade */
- interrupt-parent = <&UIC0>;
- };
-
- plb {
- compatible = "ibm,plb-405exr", "ibm,plb4";
- #address-cells = <1>;
- #size-cells = <1>;
- ranges;
- clock-frequency = <0>; /* Filled in by U-Boot */
-
- SDRAM0: memory-controller {
- compatible = "ibm,sdram-405exr", "ibm,sdram-4xx-ddr2";
- dcr-reg = <0x010 0x002>;
- interrupt-parent = <&UIC2>;
- interrupts = <0x5 0x4 /* ECC DED Error */
- 0x6 0x4>; /* ECC SEC Error */
- };
-
- MAL0: mcmal {
- compatible = "ibm,mcmal-405exr", "ibm,mcmal2";
- dcr-reg = <0x180 0x062>;
- num-tx-chans = <2>;
- num-rx-chans = <2>;
- interrupt-parent = <&MAL0>;
- interrupts = <0x0 0x1 0x2 0x3 0x4>;
- #interrupt-cells = <1>;
- #address-cells = <0>;
- #size-cells = <0>;
- interrupt-map = </*TXEOB*/ 0x0 &UIC0 0xa 0x4
- /*RXEOB*/ 0x1 &UIC0 0xb 0x4
- /*SERR*/ 0x2 &UIC1 0x0 0x4
- /*TXDE*/ 0x3 &UIC1 0x1 0x4
- /*RXDE*/ 0x4 &UIC1 0x2 0x4>;
- interrupt-map-mask = <0xffffffff>;
- };
-
- POB0: opb {
- compatible = "ibm,opb-405exr", "ibm,opb";
- #address-cells = <1>;
- #size-cells = <1>;
- ranges = <0x80000000 0x80000000 0x10000000
- 0xef600000 0xef600000 0x00a00000
- 0xf0000000 0xf0000000 0x10000000>;
- dcr-reg = <0x0a0 0x005>;
- clock-frequency = <0>; /* Filled in by U-Boot */
-
- EBC0: ebc {
- compatible = "ibm,ebc-405exr", "ibm,ebc";
- dcr-reg = <0x012 0x002>;
- #address-cells = <2>;
- #size-cells = <1>;
- clock-frequency = <0>; /* Filled in by U-Boot */
- /* ranges property is supplied by U-Boot */
- interrupts = <0x5 0x1>;
- interrupt-parent = <&UIC1>;
-
- nor_flash@0,0 {
- compatible = "amd,s29gl512n", "cfi-flash";
- bank-width = <2>;
- reg = <0x00000000 0x00000000 0x04000000>;
- #address-cells = <1>;
- #size-cells = <1>;
- partition@0 {
- label = "kernel";
- reg = <0x00000000 0x00200000>;
- };
- partition@200000 {
- label = "root";
- reg = <0x00200000 0x00200000>;
- };
- partition@400000 {
- label = "user";
- reg = <0x00400000 0x03b60000>;
- };
- partition@3f60000 {
- label = "env";
- reg = <0x03f60000 0x00040000>;
- };
- partition@3fa0000 {
- label = "u-boot";
- reg = <0x03fa0000 0x00060000>;
- };
- };
- };
-
- UART0: serial@ef600200 {
- device_type = "serial";
- compatible = "ns16550";
- reg = <0xef600200 0x00000008>;
- virtual-reg = <0xef600200>;
- clock-frequency = <0>; /* Filled in by U-Boot */
- current-speed = <0>;
- interrupt-parent = <&UIC0>;
- interrupts = <0x1a 0x4>;
- };
-
- UART1: serial@ef600300 {
- device_type = "serial";
- compatible = "ns16550";
- reg = <0xef600300 0x00000008>;
- virtual-reg = <0xef600300>;
- clock-frequency = <0>; /* Filled in by U-Boot */
- current-speed = <0>;
- interrupt-parent = <&UIC0>;
- interrupts = <0x1 0x4>;
- };
-
- IIC0: i2c@ef600400 {
- compatible = "ibm,iic-405exr", "ibm,iic";
- reg = <0xef600400 0x00000014>;
- interrupt-parent = <&UIC0>;
- interrupts = <0x2 0x4>;
- };
-
- IIC1: i2c@ef600500 {
- compatible = "ibm,iic-405exr", "ibm,iic";
- reg = <0xef600500 0x00000014>;
- interrupt-parent = <&UIC0>;
- interrupts = <0x7 0x4>;
- };
-
-
- RGMII0: emac-rgmii@ef600b00 {
- compatible = "ibm,rgmii-405exr", "ibm,rgmii";
- reg = <0xef600b00 0x00000104>;
- has-mdio;
- };
-
- EMAC0: ethernet@ef600900 {
- linux,network-index = <0x0>;
- device_type = "network";
- compatible = "ibm,emac-405exr", "ibm,emac4sync";
- interrupt-parent = <&EMAC0>;
- interrupts = <0x0 0x1>;
- #interrupt-cells = <1>;
- #address-cells = <0>;
- #size-cells = <0>;
- interrupt-map = </*Status*/ 0x0 &UIC0 0x18 0x4
- /*Wake*/ 0x1 &UIC1 0x1d 0x4>;
- reg = <0xef600900 0x000000c4>;
- local-mac-address = [000000000000]; /* Filled in by U-Boot */
- mal-device = <&MAL0>;
- mal-tx-channel = <0>;
- mal-rx-channel = <0>;
- cell-index = <0>;
- max-frame-size = <9000>;
- rx-fifo-size = <4096>;
- tx-fifo-size = <2048>;
- rx-fifo-size-gige = <16384>;
- tx-fifo-size-gige = <16384>;
- phy-mode = "rgmii";
- phy-map = <0x00000000>;
- rgmii-device = <&RGMII0>;
- rgmii-channel = <0>;
- has-inverted-stacr-oc;
- has-new-stacr-staopc;
- };
- };
-
- PCIE0: pciex@0a0000000 {
- device_type = "pci";
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- compatible = "ibm,plb-pciex-405ex", "ibm,plb-pciex";
- primary;
- port = <0x0>; /* port number */
- reg = <0xa0000000 0x20000000 /* Config space access */
- 0xef000000 0x00001000>; /* Registers */
- dcr-reg = <0x040 0x020>;
- sdr-base = <0x400>;
-
- /* Outbound ranges, one memory and one IO,
- * later cannot be changed
- */
- ranges = <0x02000000 0x00000000 0x80000000 0x90000000 0x00000000 0x08000000
- 0x01000000 0x00000000 0x00000000 0xe0000000 0x00000000 0x00010000>;
-
- /* Inbound 2GB range starting at 0 */
- dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x80000000>;
-
- /* This drives busses 0x00 to 0x3f */
- bus-range = <0x0 0x3f>;
-
- /* Legacy interrupts (note the weird polarity, the bridge seems
- * to invert PCIe legacy interrupts).
- * We are de-swizzling here because the numbers are actually for
- * port of the root complex virtual P2P bridge. But I want
- * to avoid putting a node for it in the tree, so the numbers
- * below are basically de-swizzled numbers.
- * The real slot is on idsel 0, so the swizzling is 1:1
- */
- interrupt-map-mask = <0x0 0x0 0x0 0x7>;
- interrupt-map = <
- 0x0 0x0 0x0 0x1 &UIC2 0x0 0x4 /* swizzled int A */
- 0x0 0x0 0x0 0x2 &UIC2 0x1 0x4 /* swizzled int B */
- 0x0 0x0 0x0 0x3 &UIC2 0x2 0x4 /* swizzled int C */
- 0x0 0x0 0x0 0x4 &UIC2 0x3 0x4 /* swizzled int D */>;
- };
- };
-};
diff --git a/arch/powerpc/boot/dts/holly.dts b/arch/powerpc/boot/dts/holly.dts
index 43e6f0c8e449..02bd304c7d38 100644
--- a/arch/powerpc/boot/dts/holly.dts
+++ b/arch/powerpc/boot/dts/holly.dts
@@ -191,6 +191,6 @@
};
chosen {
- linux,stdout-path = "/tsi109@c0000000/serial@7808";
+ stdout-path = "/tsi109@c0000000/serial@7808";
};
};
diff --git a/arch/powerpc/boot/dts/hotfoot.dts b/arch/powerpc/boot/dts/hotfoot.dts
deleted file mode 100644
index 71d3bb4931dc..000000000000
--- a/arch/powerpc/boot/dts/hotfoot.dts
+++ /dev/null
@@ -1,296 +0,0 @@
-/*
- * Device Tree Source for ESTeem 195E Hotfoot
- *
- * Copyright 2009 AbsoluteValue Systems <solomon@linux-wlan.com>
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without
- * any warranty of any kind, whether express or implied.
- */
-
-/dts-v1/;
-
-/ {
- #address-cells = <1>;
- #size-cells = <1>;
- model = "est,hotfoot";
- compatible = "est,hotfoot";
- dcr-parent = <&{/cpus/cpu@0}>;
-
- aliases {
- ethernet0 = &EMAC0;
- ethernet1 = &EMAC1;
- serial0 = &UART0;
- serial1 = &UART1;
- };
-
- cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- cpu@0 {
- device_type = "cpu";
- model = "PowerPC,405EP";
- reg = <0x00000000>;
- clock-frequency = <0>; /* Filled in by zImage */
- timebase-frequency = <0>; /* Filled in by zImage */
- i-cache-line-size = <0x20>;
- d-cache-line-size = <0x20>;
- i-cache-size = <0x4000>;
- d-cache-size = <0x4000>;
- dcr-controller;
- dcr-access-method = "native";
- };
- };
-
- memory {
- device_type = "memory";
- reg = <0x00000000 0x00000000>; /* Filled in by zImage */
- };
-
- UIC0: interrupt-controller {
- compatible = "ibm,uic";
- interrupt-controller;
- cell-index = <0>;
- dcr-reg = <0x0c0 0x009>;
- #address-cells = <0>;
- #size-cells = <0>;
- #interrupt-cells = <2>;
- };
-
- plb {
- compatible = "ibm,plb3";
- #address-cells = <1>;
- #size-cells = <1>;
- ranges;
- clock-frequency = <0>; /* Filled in by zImage */
-
- SDRAM0: memory-controller {
- compatible = "ibm,sdram-405ep";
- dcr-reg = <0x010 0x002>;
- };
-
- MAL: mcmal {
- compatible = "ibm,mcmal-405ep", "ibm,mcmal";
- dcr-reg = <0x180 0x062>;
- num-tx-chans = <4>;
- num-rx-chans = <2>;
- interrupt-parent = <&UIC0>;
- interrupts = <
- 0xb 0x4 /* TXEOB */
- 0xc 0x4 /* RXEOB */
- 0xa 0x4 /* SERR */
- 0xd 0x4 /* TXDE */
- 0xe 0x4 /* RXDE */>;
- };
-
- POB0: opb {
- compatible = "ibm,opb-405ep", "ibm,opb";
- #address-cells = <1>;
- #size-cells = <1>;
- ranges = <0xef600000 0xef600000 0x00a00000>;
- dcr-reg = <0x0a0 0x005>;
- clock-frequency = <0>; /* Filled in by zImage */
-
- /* Hotfoot has UART0/UART1 swapped */
-
- UART0: serial@ef600400 {
- device_type = "serial";
- compatible = "ns16550";
- reg = <0xef600400 0x00000008>;
- virtual-reg = <0xef600400>;
- clock-frequency = <0>; /* Filled in by zImage */
- current-speed = <0x9600>;
- interrupt-parent = <&UIC0>;
- interrupts = <0x1 0x4>;
- };
-
- UART1: serial@ef600300 {
- device_type = "serial";
- compatible = "ns16550";
- reg = <0xef600300 0x00000008>;
- virtual-reg = <0xef600300>;
- clock-frequency = <0>; /* Filled in by zImage */
- current-speed = <0x9600>;
- interrupt-parent = <&UIC0>;
- interrupts = <0x0 0x4>;
- };
-
- IIC: i2c@ef600500 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "ibm,iic-405ep", "ibm,iic";
- reg = <0xef600500 0x00000011>;
- interrupt-parent = <&UIC0>;
- interrupts = <0x2 0x4>;
-
- rtc@68 {
- /* Actually a DS1339 */
- compatible = "dallas,ds1307";
- reg = <0x68>;
- };
-
- temp@4a {
- /* Not present on all boards */
- compatible = "national,lm75";
- reg = <0x4a>;
- };
- };
-
- GPIO: gpio@ef600700 {
- #gpio-cells = <2>;
- compatible = "ibm,ppc4xx-gpio";
- reg = <0xef600700 0x00000020>;
- gpio-controller;
- };
-
- gpio-leds {
- compatible = "gpio-leds";
- status {
- label = "Status";
- gpios = <&GPIO 1 0>;
- };
- radiorx {
- label = "Rx";
- gpios = <&GPIO 0xe 0>;
- };
- };
-
- EMAC0: ethernet@ef600800 {
- linux,network-index = <0x0>;
- device_type = "network";
- compatible = "ibm,emac-405ep", "ibm,emac";
- interrupt-parent = <&UIC0>;
- interrupts = <
- 0xf 0x4 /* Ethernet */
- 0x9 0x4 /* Ethernet Wake Up */>;
- local-mac-address = [000000000000]; /* Filled in by zImage */
- reg = <0xef600800 0x00000070>;
- mal-device = <&MAL>;
- mal-tx-channel = <0>;
- mal-rx-channel = <0>;
- cell-index = <0>;
- max-frame-size = <0x5dc>;
- rx-fifo-size = <0x1000>;
- tx-fifo-size = <0x800>;
- phy-mode = "mii";
- phy-map = <0x00000000>;
- };
-
- EMAC1: ethernet@ef600900 {
- linux,network-index = <0x1>;
- device_type = "network";
- compatible = "ibm,emac-405ep", "ibm,emac";
- interrupt-parent = <&UIC0>;
- interrupts = <
- 0x11 0x4 /* Ethernet */
- 0x9 0x4 /* Ethernet Wake Up */>;
- local-mac-address = [000000000000]; /* Filled in by zImage */
- reg = <0xef600900 0x00000070>;
- mal-device = <&MAL>;
- mal-tx-channel = <2>;
- mal-rx-channel = <1>;
- cell-index = <1>;
- max-frame-size = <0x5dc>;
- rx-fifo-size = <0x1000>;
- tx-fifo-size = <0x800>;
- mdio-device = <&EMAC0>;
- phy-mode = "mii";
- phy-map = <0x0000001>;
- };
- };
-
- EBC0: ebc {
- compatible = "ibm,ebc-405ep", "ibm,ebc";
- dcr-reg = <0x012 0x002>;
- #address-cells = <2>;
- #size-cells = <1>;
-
- /* The ranges property is supplied by the bootwrapper
- * and is based on the firmware's configuration of the
- * EBC bridge
- */
- clock-frequency = <0>; /* Filled in by zImage */
-
- nor_flash@0 {
- compatible = "cfi-flash";
- bank-width = <2>;
- reg = <0x0 0xff800000 0x00800000>;
- #address-cells = <1>;
- #size-cells = <1>;
-
- /* This mapping is for the 8M flash
- 4M flash has all ofssets -= 4M,
- and FeatFS partition is not present */
- partition@0 {
- label = "Bootloader";
- reg = <0x7c0000 0x40000>;
- /* read-only; */
- };
- partition@1 {
- label = "Env_and_Config_Primary";
- reg = <0x400000 0x10000>;
- };
- partition@2 {
- label = "Kernel";
- reg = <0x420000 0x100000>;
- };
- partition@3 {
- label = "Filesystem";
- reg = <0x520000 0x2a0000>;
- };
- partition@4 {
- label = "Env_and_Config_Secondary";
- reg = <0x410000 0x10000>;
- };
- partition@5 {
- label = "FeatFS";
- reg = <0x000000 0x400000>;
- };
- partition@6 {
- label = "Bootloader_Env";
- reg = <0x7d0000 0x10000>;
- };
- };
- };
-
- PCI0: pci@ec000000 {
- device_type = "pci";
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- compatible = "ibm,plb405ep-pci", "ibm,plb-pci";
- primary;
- reg = <0xeec00000 0x00000008 /* Config space access */
- 0xeed80000 0x00000004 /* IACK */
- 0xeed80000 0x00000004 /* Special cycle */
- 0xef480000 0x00000040>; /* Internal registers */
-
- /* Outbound ranges, one memory and one IO,
- * later cannot be changed. Chip supports a second
- * IO range but we don't use it for now
- */
- ranges = <0x02000000 0x00000000 0x80000000 0x80000000 0x00000000 0x20000000
- 0x01000000 0x00000000 0x00000000 0xe8000000 0x00000000 0x00010000>;
-
- /* Inbound 2GB range starting at 0 */
- dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x80000000>;
-
- interrupt-parent = <&UIC0>;
- interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
- interrupt-map = <
- /* IDSEL 3 -- slot1 (optional) 27/29 A/B IRQ2/4 */
- 0x1800 0x0 0x0 0x1 &UIC0 0x1b 0x8
- 0x1800 0x0 0x0 0x2 &UIC0 0x1d 0x8
-
- /* IDSEL 4 -- slot0, 26/28 A/B IRQ1/3 */
- 0x2000 0x0 0x0 0x1 &UIC0 0x1a 0x8
- 0x2000 0x0 0x0 0x2 &UIC0 0x1c 0x8
- >;
- };
- };
-
- chosen {
- linux,stdout-path = &UART0;
- };
-};
diff --git a/arch/powerpc/boot/dts/icon.dts b/arch/powerpc/boot/dts/icon.dts
index abcd0caeccae..4fd7a4fbb4fb 100644
--- a/arch/powerpc/boot/dts/icon.dts
+++ b/arch/powerpc/boot/dts/icon.dts
@@ -197,13 +197,6 @@
reg = <0x00fa0000 0x00060000>;
};
};
-
- SysACE_CompactFlash: sysace@1,0 {
- compatible = "xlnx,sysace";
- interrupt-parent = <&UIC2>;
- interrupts = <24 0x4>;
- reg = <0x00000001 0x00000000 0x10000>;
- };
};
UART0: serial@f0000200 {
@@ -256,7 +249,7 @@
#size-cells = <0>;
rtc@68 {
- compatible = "stm,m41t00";
+ compatible = "st,m41t00";
reg = <0x68>;
};
};
@@ -315,7 +308,7 @@
interrupt-map = <0x0 0x0 0x0 0x0 &UIC1 19 0x8>;
};
- PCIE0: pciex@d00000000 {
+ PCIE0: pcie@d00000000 {
device_type = "pci";
#interrupt-cells = <1>;
#size-cells = <2>;
@@ -356,7 +349,7 @@
0x0 0x0 0x0 0x4 &UIC3 0x3 0x4 /* swizzled int D */>;
};
- PCIE1: pciex@d20000000 {
+ PCIE1: pcie@d20000000 {
device_type = "pci";
#interrupt-cells = <1>;
#size-cells = <2>;
@@ -442,6 +435,6 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@f0000200";
+ stdout-path = "/plb/opb/serial@f0000200";
};
};
diff --git a/arch/powerpc/boot/dts/include/dt-bindings b/arch/powerpc/boot/dts/include/dt-bindings
deleted file mode 120000
index 08c00e4972fa..000000000000
--- a/arch/powerpc/boot/dts/include/dt-bindings
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../include/dt-bindings \ No newline at end of file
diff --git a/arch/powerpc/boot/dts/iss4xx-mpic.dts b/arch/powerpc/boot/dts/iss4xx-mpic.dts
index 23e9d9b7e400..c9f90f1a9c8e 100644
--- a/arch/powerpc/boot/dts/iss4xx-mpic.dts
+++ b/arch/powerpc/boot/dts/iss4xx-mpic.dts
@@ -43,7 +43,7 @@
d-cache-size = <32768>;
dcr-controller;
dcr-access-method = "native";
- status = "ok";
+ status = "okay";
};
cpu@1 {
device_type = "cpu";
@@ -150,6 +150,6 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@40000200";
+ stdout-path = "/plb/opb/serial@40000200";
};
};
diff --git a/arch/powerpc/boot/dts/iss4xx.dts b/arch/powerpc/boot/dts/iss4xx.dts
index 4ff6555c866d..5533aff25e41 100644
--- a/arch/powerpc/boot/dts/iss4xx.dts
+++ b/arch/powerpc/boot/dts/iss4xx.dts
@@ -111,6 +111,6 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@40000200";
+ stdout-path = "/plb/opb/serial@40000200";
};
};
diff --git a/arch/powerpc/boot/dts/katmai.dts b/arch/powerpc/boot/dts/katmai.dts
index f913dbe25d35..4262b2bbd6de 100644
--- a/arch/powerpc/boot/dts/katmai.dts
+++ b/arch/powerpc/boot/dts/katmai.dts
@@ -319,7 +319,7 @@
>;
};
- PCIE0: pciex@d00000000 {
+ PCIE0: pcie@d00000000 {
device_type = "pci";
#interrupt-cells = <1>;
#size-cells = <2>;
@@ -360,7 +360,7 @@
0x0 0x0 0x0 0x4 &UIC3 0x3 0x4 /* swizzled int D */>;
};
- PCIE1: pciex@d20000000 {
+ PCIE1: pcie@d20000000 {
device_type = "pci";
#interrupt-cells = <1>;
#size-cells = <2>;
@@ -401,7 +401,7 @@
0x0 0x0 0x0 0x4 &UIC3 0x7 0x4 /* swizzled int D */>;
};
- PCIE2: pciex@d40000000 {
+ PCIE2: pcie@d40000000 {
device_type = "pci";
#interrupt-cells = <1>;
#size-cells = <2>;
@@ -442,24 +442,6 @@
0x0 0x0 0x0 0x4 &UIC3 0xb 0x4 /* swizzled int D */>;
};
- MSI: ppc4xx-msi@400300000 {
- compatible = "amcc,ppc4xx-msi", "ppc4xx-msi";
- reg = < 0x4 0x00300000 0x100>;
- sdr-base = <0x3B0>;
- msi-data = <0x00000000>;
- msi-mask = <0x44440000>;
- interrupt-count = <3>;
- interrupts =<0 1 2 3>;
- interrupt-parent = <&UIC0>;
- #interrupt-cells = <1>;
- #address-cells = <0>;
- #size-cells = <0>;
- interrupt-map = <0 &UIC0 0xC 1
- 1 &UIC0 0x0D 1
- 2 &UIC0 0x0E 1
- 3 &UIC0 0x0F 1>;
- };
-
I2O: i2o@400100000 {
compatible = "ibm,i2o-440spe";
reg = <0x00000004 0x00100000 0x100>;
@@ -505,6 +487,6 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@f0000200";
+ stdout-path = "/plb/opb/serial@f0000200";
};
};
diff --git a/arch/powerpc/boot/dts/kilauea.dts b/arch/powerpc/boot/dts/kilauea.dts
deleted file mode 100644
index 5ba7f01e2a29..000000000000
--- a/arch/powerpc/boot/dts/kilauea.dts
+++ /dev/null
@@ -1,435 +0,0 @@
-/*
- * Device Tree Source for AMCC Kilauea (405EX)
- *
- * Copyright 2007-2009 DENX Software Engineering, Stefan Roese <sr@denx.de>
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without
- * any warranty of any kind, whether express or implied.
- */
-
-/dts-v1/;
-
-/ {
- #address-cells = <1>;
- #size-cells = <1>;
- model = "amcc,kilauea";
- compatible = "amcc,kilauea";
- dcr-parent = <&{/cpus/cpu@0}>;
-
- aliases {
- ethernet0 = &EMAC0;
- ethernet1 = &EMAC1;
- serial0 = &UART0;
- serial1 = &UART1;
- };
-
- cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- cpu@0 {
- device_type = "cpu";
- model = "PowerPC,405EX";
- reg = <0x00000000>;
- clock-frequency = <0>; /* Filled in by U-Boot */
- timebase-frequency = <0>; /* Filled in by U-Boot */
- i-cache-line-size = <32>;
- d-cache-line-size = <32>;
- i-cache-size = <16384>; /* 16 kB */
- d-cache-size = <16384>; /* 16 kB */
- dcr-controller;
- dcr-access-method = "native";
- };
- };
-
- memory {
- device_type = "memory";
- reg = <0x00000000 0x00000000>; /* Filled in by U-Boot */
- };
-
- UIC0: interrupt-controller {
- compatible = "ibm,uic-405ex", "ibm,uic";
- interrupt-controller;
- cell-index = <0>;
- dcr-reg = <0x0c0 0x009>;
- #address-cells = <0>;
- #size-cells = <0>;
- #interrupt-cells = <2>;
- };
-
- UIC1: interrupt-controller1 {
- compatible = "ibm,uic-405ex","ibm,uic";
- interrupt-controller;
- cell-index = <1>;
- dcr-reg = <0x0d0 0x009>;
- #address-cells = <0>;
- #size-cells = <0>;
- #interrupt-cells = <2>;
- interrupts = <0x1e 0x4 0x1f 0x4>; /* cascade */
- interrupt-parent = <&UIC0>;
- };
-
- UIC2: interrupt-controller2 {
- compatible = "ibm,uic-405ex","ibm,uic";
- interrupt-controller;
- cell-index = <2>;
- dcr-reg = <0x0e0 0x009>;
- #address-cells = <0>;
- #size-cells = <0>;
- #interrupt-cells = <2>;
- interrupts = <0x1c 0x4 0x1d 0x4>; /* cascade */
- interrupt-parent = <&UIC0>;
- };
-
- CPM0: cpm {
- compatible = "ibm,cpm";
- dcr-access-method = "native";
- dcr-reg = <0x0b0 0x003>;
- unused-units = <0x00000000>;
- idle-doze = <0x02000000>;
- standby = <0xe3e74800>;
- };
-
- plb {
- compatible = "ibm,plb-405ex", "ibm,plb4";
- #address-cells = <1>;
- #size-cells = <1>;
- ranges;
- clock-frequency = <0>; /* Filled in by U-Boot */
-
- SDRAM0: memory-controller {
- compatible = "ibm,sdram-405ex", "ibm,sdram-4xx-ddr2";
- dcr-reg = <0x010 0x002>;
- interrupt-parent = <&UIC2>;
- interrupts = <0x5 0x4 /* ECC DED Error */
- 0x6 0x4>; /* ECC SEC Error */
- };
-
- CRYPTO: crypto@ef700000 {
- compatible = "amcc,ppc405ex-crypto", "amcc,ppc4xx-crypto";
- reg = <0xef700000 0x80400>;
- interrupt-parent = <&UIC0>;
- interrupts = <0x17 0x2>;
- };
-
- MAL0: mcmal {
- compatible = "ibm,mcmal-405ex", "ibm,mcmal2";
- dcr-reg = <0x180 0x062>;
- num-tx-chans = <2>;
- num-rx-chans = <2>;
- interrupt-parent = <&MAL0>;
- interrupts = <0x0 0x1 0x2 0x3 0x4>;
- #interrupt-cells = <1>;
- #address-cells = <0>;
- #size-cells = <0>;
- interrupt-map = </*TXEOB*/ 0x0 &UIC0 0xa 0x4
- /*RXEOB*/ 0x1 &UIC0 0xb 0x4
- /*SERR*/ 0x2 &UIC1 0x0 0x4
- /*TXDE*/ 0x3 &UIC1 0x1 0x4
- /*RXDE*/ 0x4 &UIC1 0x2 0x4>;
- interrupt-map-mask = <0xffffffff>;
- };
-
- POB0: opb {
- compatible = "ibm,opb-405ex", "ibm,opb";
- #address-cells = <1>;
- #size-cells = <1>;
- ranges = <0x80000000 0x80000000 0x10000000
- 0xef600000 0xef600000 0x00a00000
- 0xf0000000 0xf0000000 0x10000000>;
- dcr-reg = <0x0a0 0x005>;
- clock-frequency = <0>; /* Filled in by U-Boot */
-
- EBC0: ebc {
- compatible = "ibm,ebc-405ex", "ibm,ebc";
- dcr-reg = <0x012 0x002>;
- #address-cells = <2>;
- #size-cells = <1>;
- clock-frequency = <0>; /* Filled in by U-Boot */
- /* ranges property is supplied by U-Boot */
- interrupts = <0x5 0x1>;
- interrupt-parent = <&UIC1>;
-
- nor_flash@0,0 {
- compatible = "amd,s29gl512n", "cfi-flash";
- bank-width = <2>;
- reg = <0x00000000 0x00000000 0x04000000>;
- #address-cells = <1>;
- #size-cells = <1>;
- partition@0 {
- label = "kernel";
- reg = <0x00000000 0x001e0000>;
- };
- partition@1e0000 {
- label = "dtb";
- reg = <0x001e0000 0x00020000>;
- };
- partition@200000 {
- label = "root";
- reg = <0x00200000 0x00200000>;
- };
- partition@400000 {
- label = "user";
- reg = <0x00400000 0x03b60000>;
- };
- partition@3f60000 {
- label = "env";
- reg = <0x03f60000 0x00040000>;
- };
- partition@3fa0000 {
- label = "u-boot";
- reg = <0x03fa0000 0x00060000>;
- };
- };
-
- ndfc@1,0 {
- compatible = "ibm,ndfc";
- reg = <0x00000001 0x00000000 0x00002000>;
- ccr = <0x00001000>;
- bank-settings = <0x80002222>;
- #address-cells = <1>;
- #size-cells = <1>;
-
- nand {
- #address-cells = <1>;
- #size-cells = <1>;
-
- partition@0 {
- label = "u-boot";
- reg = <0x00000000 0x00100000>;
- };
- partition@100000 {
- label = "user";
- reg = <0x00000000 0x03f00000>;
- };
- };
- };
- };
-
- UART0: serial@ef600200 {
- device_type = "serial";
- compatible = "ns16550";
- reg = <0xef600200 0x00000008>;
- virtual-reg = <0xef600200>;
- clock-frequency = <0>; /* Filled in by U-Boot */
- current-speed = <0>;
- interrupt-parent = <&UIC0>;
- interrupts = <0x1a 0x4>;
- };
-
- UART1: serial@ef600300 {
- device_type = "serial";
- compatible = "ns16550";
- reg = <0xef600300 0x00000008>;
- virtual-reg = <0xef600300>;
- clock-frequency = <0>; /* Filled in by U-Boot */
- current-speed = <0>;
- interrupt-parent = <&UIC0>;
- interrupts = <0x1 0x4>;
- };
-
- IIC0: i2c@ef600400 {
- compatible = "ibm,iic-405ex", "ibm,iic";
- reg = <0xef600400 0x00000014>;
- interrupt-parent = <&UIC0>;
- interrupts = <0x2 0x4>;
- #address-cells = <1>;
- #size-cells = <0>;
-
- rtc@68 {
- compatible = "dallas,ds1338";
- reg = <0x68>;
- };
-
- dtt@48 {
- compatible = "dallas,ds1775";
- reg = <0x48>;
- };
- };
-
- IIC1: i2c@ef600500 {
- compatible = "ibm,iic-405ex", "ibm,iic";
- reg = <0xef600500 0x00000014>;
- interrupt-parent = <&UIC0>;
- interrupts = <0x7 0x4>;
- };
-
- RGMII0: emac-rgmii@ef600b00 {
- compatible = "ibm,rgmii-405ex", "ibm,rgmii";
- reg = <0xef600b00 0x00000104>;
- has-mdio;
- };
-
- EMAC0: ethernet@ef600900 {
- linux,network-index = <0x0>;
- device_type = "network";
- compatible = "ibm,emac-405ex", "ibm,emac4sync";
- interrupt-parent = <&EMAC0>;
- interrupts = <0x0 0x1>;
- #interrupt-cells = <1>;
- #address-cells = <0>;
- #size-cells = <0>;
- interrupt-map = </*Status*/ 0x0 &UIC0 0x18 0x4
- /*Wake*/ 0x1 &UIC1 0x1d 0x4>;
- reg = <0xef600900 0x000000c4>;
- local-mac-address = [000000000000]; /* Filled in by U-Boot */
- mal-device = <&MAL0>;
- mal-tx-channel = <0>;
- mal-rx-channel = <0>;
- cell-index = <0>;
- max-frame-size = <9000>;
- rx-fifo-size = <4096>;
- tx-fifo-size = <2048>;
- rx-fifo-size-gige = <16384>;
- tx-fifo-size-gige = <16384>;
- phy-mode = "rgmii";
- phy-map = <0x00000000>;
- rgmii-device = <&RGMII0>;
- rgmii-channel = <0>;
- has-inverted-stacr-oc;
- has-new-stacr-staopc;
- };
-
- EMAC1: ethernet@ef600a00 {
- linux,network-index = <0x1>;
- device_type = "network";
- compatible = "ibm,emac-405ex", "ibm,emac4sync";
- interrupt-parent = <&EMAC1>;
- interrupts = <0x0 0x1>;
- #interrupt-cells = <1>;
- #address-cells = <0>;
- #size-cells = <0>;
- interrupt-map = </*Status*/ 0x0 &UIC0 0x19 0x4
- /*Wake*/ 0x1 &UIC1 0x1f 0x4>;
- reg = <0xef600a00 0x000000c4>;
- local-mac-address = [000000000000]; /* Filled in by U-Boot */
- mal-device = <&MAL0>;
- mal-tx-channel = <1>;
- mal-rx-channel = <1>;
- cell-index = <1>;
- max-frame-size = <9000>;
- rx-fifo-size = <4096>;
- tx-fifo-size = <2048>;
- rx-fifo-size-gige = <16384>;
- tx-fifo-size-gige = <16384>;
- phy-mode = "rgmii";
- phy-map = <0x00000000>;
- rgmii-device = <&RGMII0>;
- rgmii-channel = <1>;
- has-inverted-stacr-oc;
- has-new-stacr-staopc;
- };
- };
-
- PCIE0: pciex@0a0000000 {
- device_type = "pci";
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- compatible = "ibm,plb-pciex-405ex", "ibm,plb-pciex";
- primary;
- port = <0x0>; /* port number */
- reg = <0xa0000000 0x20000000 /* Config space access */
- 0xef000000 0x00001000>; /* Registers */
- dcr-reg = <0x040 0x020>;
- sdr-base = <0x400>;
-
- /* Outbound ranges, one memory and one IO,
- * later cannot be changed
- */
- ranges = <0x02000000 0x00000000 0x80000000 0x90000000 0x00000000 0x08000000
- 0x01000000 0x00000000 0x00000000 0xe0000000 0x00000000 0x00010000>;
-
- /* Inbound 2GB range starting at 0 */
- dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x80000000>;
-
- /* This drives busses 0x00 to 0x3f */
- bus-range = <0x0 0x3f>;
-
- /* Legacy interrupts (note the weird polarity, the bridge seems
- * to invert PCIe legacy interrupts).
- * We are de-swizzling here because the numbers are actually for
- * port of the root complex virtual P2P bridge. But I want
- * to avoid putting a node for it in the tree, so the numbers
- * below are basically de-swizzled numbers.
- * The real slot is on idsel 0, so the swizzling is 1:1
- */
- interrupt-map-mask = <0x0 0x0 0x0 0x7>;
- interrupt-map = <
- 0x0 0x0 0x0 0x1 &UIC2 0x0 0x4 /* swizzled int A */
- 0x0 0x0 0x0 0x2 &UIC2 0x1 0x4 /* swizzled int B */
- 0x0 0x0 0x0 0x3 &UIC2 0x2 0x4 /* swizzled int C */
- 0x0 0x0 0x0 0x4 &UIC2 0x3 0x4 /* swizzled int D */>;
- };
-
- PCIE1: pciex@0c0000000 {
- device_type = "pci";
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- compatible = "ibm,plb-pciex-405ex", "ibm,plb-pciex";
- primary;
- port = <0x1>; /* port number */
- reg = <0xc0000000 0x20000000 /* Config space access */
- 0xef001000 0x00001000>; /* Registers */
- dcr-reg = <0x060 0x020>;
- sdr-base = <0x440>;
-
- /* Outbound ranges, one memory and one IO,
- * later cannot be changed
- */
- ranges = <0x02000000 0x00000000 0x80000000 0x98000000 0x00000000 0x08000000
- 0x01000000 0x00000000 0x00000000 0xe0010000 0x00000000 0x00010000>;
-
- /* Inbound 2GB range starting at 0 */
- dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x80000000>;
-
- /* This drives busses 0x40 to 0x7f */
- bus-range = <0x40 0x7f>;
-
- /* Legacy interrupts (note the weird polarity, the bridge seems
- * to invert PCIe legacy interrupts).
- * We are de-swizzling here because the numbers are actually for
- * port of the root complex virtual P2P bridge. But I want
- * to avoid putting a node for it in the tree, so the numbers
- * below are basically de-swizzled numbers.
- * The real slot is on idsel 0, so the swizzling is 1:1
- */
- interrupt-map-mask = <0x0 0x0 0x0 0x7>;
- interrupt-map = <
- 0x0 0x0 0x0 0x1 &UIC2 0xb 0x4 /* swizzled int A */
- 0x0 0x0 0x0 0x2 &UIC2 0xc 0x4 /* swizzled int B */
- 0x0 0x0 0x0 0x3 &UIC2 0xd 0x4 /* swizzled int C */
- 0x0 0x0 0x0 0x4 &UIC2 0xe 0x4 /* swizzled int D */>;
- };
-
- MSI: ppc4xx-msi@C10000000 {
- compatible = "amcc,ppc4xx-msi", "ppc4xx-msi";
- reg = <0xEF620000 0x100>;
- sdr-base = <0x4B0>;
- msi-data = <0x00000000>;
- msi-mask = <0x44440000>;
- interrupt-count = <12>;
- interrupts = <0 1 2 3 4 5 6 7 8 9 0xA 0xB 0xC 0xD>;
- interrupt-parent = <&UIC2>;
- #interrupt-cells = <1>;
- #address-cells = <0>;
- #size-cells = <0>;
- interrupt-map = <0 &UIC2 0x10 1
- 1 &UIC2 0x11 1
- 2 &UIC2 0x12 1
- 2 &UIC2 0x13 1
- 2 &UIC2 0x14 1
- 2 &UIC2 0x15 1
- 2 &UIC2 0x16 1
- 2 &UIC2 0x17 1
- 2 &UIC2 0x18 1
- 2 &UIC2 0x19 1
- 2 &UIC2 0x1A 1
- 2 &UIC2 0x1B 1
- 2 &UIC2 0x1C 1
- 3 &UIC2 0x1D 1>;
- };
- };
-};
diff --git a/arch/powerpc/boot/dts/klondike.dts b/arch/powerpc/boot/dts/klondike.dts
deleted file mode 100644
index 8c9429033618..000000000000
--- a/arch/powerpc/boot/dts/klondike.dts
+++ /dev/null
@@ -1,227 +0,0 @@
-/*
- * Device Tree for Klondike (APM8018X) board.
- *
- * Copyright (c) 2010, Applied Micro Circuits Corporation
- * Author: Tanmay Inamdar <tinamdar@apm.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
- * MA 02111-1307 USA
- *
- */
-
-/dts-v1/;
-
-/ {
- #address-cells = <1>;
- #size-cells = <1>;
- model = "apm,klondike";
- compatible = "apm,klondike";
- dcr-parent = <&{/cpus/cpu@0}>;
-
- aliases {
- ethernet0 = &EMAC0;
- ethernet1 = &EMAC1;
- };
-
- cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- cpu@0 {
- device_type = "cpu";
- model = "PowerPC,apm8018x";
- reg = <0x00000000>;
- clock-frequency = <300000000>; /* Filled in by U-Boot */
- timebase-frequency = <300000000>; /* Filled in by U-Boot */
- i-cache-line-size = <32>;
- d-cache-line-size = <32>;
- i-cache-size = <16384>; /* 16 kB */
- d-cache-size = <16384>; /* 16 kB */
- dcr-controller;
- dcr-access-method = "native";
- };
- };
-
- memory {
- device_type = "memory";
- reg = <0x00000000 0x20000000>; /* Filled in by U-Boot */
- };
-
- UIC0: interrupt-controller {
- compatible = "ibm,uic";
- interrupt-controller;
- cell-index = <0>;
- dcr-reg = <0x0c0 0x010>;
- #address-cells = <0>;
- #size-cells = <0>;
- #interrupt-cells = <2>;
- };
-
- UIC1: interrupt-controller1 {
- compatible = "ibm,uic";
- interrupt-controller;
- cell-index = <1>;
- dcr-reg = <0x0d0 0x010>;
- #address-cells = <0>;
- #size-cells = <0>;
- #interrupt-cells = <2>;
- interrupts = <0x1e 0x4 0x1f 0x4>; /* cascade */
- interrupt-parent = <&UIC0>;
- };
-
- UIC2: interrupt-controller2 {
- compatible = "ibm,uic";
- interrupt-controller;
- cell-index = <2>;
- dcr-reg = <0x0e0 0x010>;
- #address-cells = <0>;
- #size-cells = <0>;
- #interrupt-cells = <2>;
- interrupts = <0x0a 0x4 0x0b 0x4>; /* cascade */
- interrupt-parent = <&UIC0>;
- };
-
- UIC3: interrupt-controller3 {
- compatible = "ibm,uic";
- interrupt-controller;
- cell-index = <3>;
- dcr-reg = <0x0f0 0x010>;
- #address-cells = <0>;
- #size-cells = <0>;
- #interrupt-cells = <2>;
- interrupts = <0x10 0x4 0x11 0x4>; /* cascade */
- interrupt-parent = <&UIC0>;
- };
-
- plb {
- compatible = "ibm,plb4";
- #address-cells = <1>;
- #size-cells = <1>;
- ranges;
- clock-frequency = <0>; /* Filled in by U-Boot */
-
- SDRAM0: memory-controller {
- compatible = "ibm,sdram-apm8018x";
- dcr-reg = <0x010 0x002>;
- };
-
- MAL0: mcmal {
- compatible = "ibm,mcmal2";
- dcr-reg = <0x180 0x062>;
- num-tx-chans = <2>;
- num-rx-chans = <16>;
- #address-cells = <0>;
- #size-cells = <0>;
- interrupt-parent = <&UIC1>;
- interrupts = </*TXEOB*/ 0x6 0x4
- /*RXEOB*/ 0x7 0x4
- /*SERR*/ 0x1 0x4
- /*TXDE*/ 0x2 0x4
- /*RXDE*/ 0x3 0x4>;
- };
-
- POB0: opb {
- compatible = "ibm,opb";
- #address-cells = <1>;
- #size-cells = <1>;
- ranges = <0x20000000 0x20000000 0x30000000
- 0x50000000 0x50000000 0x10000000
- 0x60000000 0x60000000 0x10000000
- 0xFE000000 0xFE000000 0x00010000>;
- dcr-reg = <0x100 0x020>;
- clock-frequency = <300000000>; /* Filled in by U-Boot */
-
- RGMII0: emac-rgmii@400a2000 {
- compatible = "ibm,rgmii";
- reg = <0x400a2000 0x00000010>;
- has-mdio;
- };
-
- TAH0: emac-tah@400a3000 {
- compatible = "ibm,tah";
- reg = <0x400a3000 0x100>;
- };
-
- TAH1: emac-tah@400a4000 {
- compatible = "ibm,tah";
- reg = <0x400a4000 0x100>;
- };
-
- EMAC0: ethernet@400a0000 {
- compatible = "ibm,emac4", "ibm-emac4sync";
- interrupt-parent = <&EMAC0>;
- interrupts = <0x0>;
- #interrupt-cells = <1>;
- #address-cells = <0>;
- #size-cells = <0>;
- interrupt-map = </*Status*/ 0x0 &UIC0 0x13 0x4>;
- reg = <0x400a0000 0x00000100>;
- local-mac-address = [000000000000]; /* Filled in by U-Boot */
- mal-device = <&MAL0>;
- mal-tx-channel = <0x0>;
- mal-rx-channel = <0x0>;
- cell-index = <0>;
- max-frame-size = <9000>;
- rx-fifo-size = <4096>;
- tx-fifo-size = <2048>;
- phy-mode = "rgmii";
- phy-address = <0x2>;
- turbo = "no";
- phy-map = <0x00000000>;
- rgmii-device = <&RGMII0>;
- rgmii-channel = <0>;
- tah-device = <&TAH0>;
- tah-channel = <0>;
- has-inverted-stacr-oc;
- has-new-stacr-staopc;
- };
-
- EMAC1: ethernet@400a1000 {
- compatible = "ibm,emac4", "ibm-emac4sync";
- status = "disabled";
- interrupt-parent = <&EMAC1>;
- interrupts = <0x0>;
- #interrupt-cells = <1>;
- #address-cells = <0>;
- #size-cells = <0>;
- interrupt-map = </*Status*/ 0x0 &UIC0 0x14 0x4>;
- reg = <0x400a1000 0x00000100>;
- local-mac-address = [000000000000]; /* Filled in by U-Boot */
- mal-device = <&MAL0>;
- mal-tx-channel = <1>;
- mal-rx-channel = <8>;
- cell-index = <1>;
- max-frame-size = <9000>;
- rx-fifo-size = <4096>;
- tx-fifo-size = <2048>;
- phy-mode = "rgmii";
- phy-address = <0x3>;
- turbo = "no";
- phy-map = <0x00000000>;
- rgmii-device = <&RGMII0>;
- rgmii-channel = <1>;
- tah-device = <&TAH1>;
- tah-channel = <0>;
- has-inverted-stacr-oc;
- has-new-stacr-staopc;
- mdio-device = <&EMAC0>;
- };
- };
- };
-
- chosen {
- linux,stdout-path = "/plb/opb/serial@50001000";
- };
-};
diff --git a/arch/powerpc/boot/dts/kmeter1.dts b/arch/powerpc/boot/dts/kmeter1.dts
index 983aee185793..154f5d293fd3 100644
--- a/arch/powerpc/boot/dts/kmeter1.dts
+++ b/arch/powerpc/boot/dts/kmeter1.dts
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Keymile KMETER1 Device Tree Source
*
* 2008-2011 DENX Software Engineering GmbH
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/dts-v1/;
@@ -434,27 +430,27 @@
compatible = "fsl,ucc-mdio";
/* Piggy2 (UCC4, MDIO 0x00, RMII) */
- phy_piggy2: ethernet-phy@00 {
+ phy_piggy2: ethernet-phy@0 {
reg = <0x0>;
};
/* Eth-1 (UCC5, MDIO 0x08, RMII) */
- phy_eth1: ethernet-phy@08 {
+ phy_eth1: ethernet-phy@8 {
reg = <0x08>;
};
/* Eth-2 (UCC6, MDIO 0x09, RMII) */
- phy_eth2: ethernet-phy@09 {
+ phy_eth2: ethernet-phy@9 {
reg = <0x09>;
};
/* Eth-3 (UCC7, MDIO 0x0a, RMII) */
- phy_eth3: ethernet-phy@0a {
+ phy_eth3: ethernet-phy@a {
reg = <0x0a>;
};
/* Eth-4 (UCC8, MDIO 0x0b, RMII) */
- phy_eth4: ethernet-phy@0b {
+ phy_eth4: ethernet-phy@b {
reg = <0x0b>;
};
diff --git a/arch/powerpc/boot/dts/ksi8560.dts b/arch/powerpc/boot/dts/ksi8560.dts
index 5d68236e7c3c..37a7eb576d02 100644
--- a/arch/powerpc/boot/dts/ksi8560.dts
+++ b/arch/powerpc/boot/dts/ksi8560.dts
@@ -14,6 +14,8 @@
/dts-v1/;
+/include/ "fsl/e500v1_power_isa.dtsi"
+
/ {
model = "KSI8560";
compatible = "emerson,KSI8560";
@@ -339,6 +341,6 @@
chosen {
- linux,stdout-path = "/soc/cpm/serial@91a00";
+ stdout-path = "/soc/cpm/serial@91a00";
};
};
diff --git a/arch/powerpc/boot/dts/lite5200.dts b/arch/powerpc/boot/dts/lite5200.dts
index 179a1785d645..b9d8487813b4 100644
--- a/arch/powerpc/boot/dts/lite5200.dts
+++ b/arch/powerpc/boot/dts/lite5200.dts
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Lite5200 board Device Tree Source
*
* Copyright 2006-2007 Secret Lab Technologies Ltd.
* Grant Likely <grant.likely@secretlab.ca>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/dts-v1/;
@@ -36,7 +32,7 @@
};
};
- memory {
+ memory@0 {
device_type = "memory";
reg = <0x00000000 0x04000000>; // 64MB
};
@@ -287,9 +283,9 @@
clock-frequency = <0>; // From boot loader
interrupts = <2 8 0 2 9 0 2 10 0>;
bus-range = <0 0>;
- ranges = <0x42000000 0 0x80000000 0x80000000 0 0x20000000
- 0x02000000 0 0xa0000000 0xa0000000 0 0x10000000
- 0x01000000 0 0x00000000 0xb0000000 0 0x01000000>;
+ ranges = <0x42000000 0 0x80000000 0x80000000 0 0x20000000>,
+ <0x02000000 0 0xa0000000 0xa0000000 0 0x10000000>,
+ <0x01000000 0 0x00000000 0xb0000000 0 0x01000000>;
};
localbus {
diff --git a/arch/powerpc/boot/dts/lite5200b.dts b/arch/powerpc/boot/dts/lite5200b.dts
index 5abb46c5cc95..7e2d91c7cb66 100644
--- a/arch/powerpc/boot/dts/lite5200b.dts
+++ b/arch/powerpc/boot/dts/lite5200b.dts
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Lite5200B board Device Tree Source
*
* Copyright 2006-2007 Secret Lab Technologies Ltd.
* Grant Likely <grant.likely@secretlab.ca>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/include/ "mpc5200b.dtsi"
@@ -35,7 +31,7 @@
led4 { gpios = <&gpio_simple 2 1>; };
};
- memory {
+ memory@0 {
reg = <0x00000000 0x10000000>; // 256MB
};
@@ -120,9 +116,9 @@
clock-frequency = <0>; // From boot loader
interrupts = <2 8 0 2 9 0 2 10 0>;
bus-range = <0 0>;
- ranges = <0x42000000 0 0x80000000 0x80000000 0 0x20000000
- 0x02000000 0 0xa0000000 0xa0000000 0 0x10000000
- 0x01000000 0 0x00000000 0xb0000000 0 0x01000000>;
+ ranges = <0x42000000 0 0x80000000 0x80000000 0 0x20000000>,
+ <0x02000000 0 0xa0000000 0xa0000000 0 0x10000000>,
+ <0x01000000 0 0x00000000 0xb0000000 0 0x01000000>;
};
localbus {
diff --git a/arch/powerpc/boot/dts/makalu.dts b/arch/powerpc/boot/dts/makalu.dts
deleted file mode 100644
index 63d48b632c84..000000000000
--- a/arch/powerpc/boot/dts/makalu.dts
+++ /dev/null
@@ -1,353 +0,0 @@
-/*
- * Device Tree Source for AMCC Makalu (405EX)
- *
- * Copyright 2007 DENX Software Engineering, Stefan Roese <sr@denx.de>
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without
- * any warranty of any kind, whether express or implied.
- */
-
-/dts-v1/;
-
-/ {
- #address-cells = <1>;
- #size-cells = <1>;
- model = "amcc,makalu";
- compatible = "amcc,makalu";
- dcr-parent = <&{/cpus/cpu@0}>;
-
- aliases {
- ethernet0 = &EMAC0;
- ethernet1 = &EMAC1;
- serial0 = &UART0;
- serial1 = &UART1;
- };
-
- cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- cpu@0 {
- device_type = "cpu";
- model = "PowerPC,405EX";
- reg = <0x00000000>;
- clock-frequency = <0>; /* Filled in by U-Boot */
- timebase-frequency = <0>; /* Filled in by U-Boot */
- i-cache-line-size = <32>;
- d-cache-line-size = <32>;
- i-cache-size = <16384>; /* 16 kB */
- d-cache-size = <16384>; /* 16 kB */
- dcr-controller;
- dcr-access-method = "native";
- };
- };
-
- memory {
- device_type = "memory";
- reg = <0x00000000 0x00000000>; /* Filled in by U-Boot */
- };
-
- UIC0: interrupt-controller {
- compatible = "ibm,uic-405ex", "ibm,uic";
- interrupt-controller;
- cell-index = <0>;
- dcr-reg = <0x0c0 0x009>;
- #address-cells = <0>;
- #size-cells = <0>;
- #interrupt-cells = <2>;
- };
-
- UIC1: interrupt-controller1 {
- compatible = "ibm,uic-405ex","ibm,uic";
- interrupt-controller;
- cell-index = <1>;
- dcr-reg = <0x0d0 0x009>;
- #address-cells = <0>;
- #size-cells = <0>;
- #interrupt-cells = <2>;
- interrupts = <0x1e 0x4 0x1f 0x4>; /* cascade */
- interrupt-parent = <&UIC0>;
- };
-
- UIC2: interrupt-controller2 {
- compatible = "ibm,uic-405ex","ibm,uic";
- interrupt-controller;
- cell-index = <2>;
- dcr-reg = <0x0e0 0x009>;
- #address-cells = <0>;
- #size-cells = <0>;
- #interrupt-cells = <2>;
- interrupts = <0x1c 0x4 0x1d 0x4>; /* cascade */
- interrupt-parent = <&UIC0>;
- };
-
- plb {
- compatible = "ibm,plb-405ex", "ibm,plb4";
- #address-cells = <1>;
- #size-cells = <1>;
- ranges;
- clock-frequency = <0>; /* Filled in by U-Boot */
-
- SDRAM0: memory-controller {
- compatible = "ibm,sdram-405ex", "ibm,sdram-4xx-ddr2";
- dcr-reg = <0x010 0x002>;
- interrupt-parent = <&UIC2>;
- interrupts = <0x5 0x4 /* ECC DED Error */
- 0x6 0x4 /* ECC SEC Error */ >;
- };
-
- MAL0: mcmal {
- compatible = "ibm,mcmal-405ex", "ibm,mcmal2";
- dcr-reg = <0x180 0x062>;
- num-tx-chans = <2>;
- num-rx-chans = <2>;
- interrupt-parent = <&MAL0>;
- interrupts = <0x0 0x1 0x2 0x3 0x4>;
- #interrupt-cells = <1>;
- #address-cells = <0>;
- #size-cells = <0>;
- interrupt-map = </*TXEOB*/ 0x0 &UIC0 0xa 0x4
- /*RXEOB*/ 0x1 &UIC0 0xb 0x4
- /*SERR*/ 0x2 &UIC1 0x0 0x4
- /*TXDE*/ 0x3 &UIC1 0x1 0x4
- /*RXDE*/ 0x4 &UIC1 0x2 0x4>;
- interrupt-map-mask = <0xffffffff>;
- };
-
- POB0: opb {
- compatible = "ibm,opb-405ex", "ibm,opb";
- #address-cells = <1>;
- #size-cells = <1>;
- ranges = <0x80000000 0x80000000 0x10000000
- 0xef600000 0xef600000 0x00a00000
- 0xf0000000 0xf0000000 0x10000000>;
- dcr-reg = <0x0a0 0x005>;
- clock-frequency = <0>; /* Filled in by U-Boot */
-
- EBC0: ebc {
- compatible = "ibm,ebc-405ex", "ibm,ebc";
- dcr-reg = <0x012 0x002>;
- #address-cells = <2>;
- #size-cells = <1>;
- clock-frequency = <0>; /* Filled in by U-Boot */
- /* ranges property is supplied by U-Boot */
- interrupts = <0x5 0x1>;
- interrupt-parent = <&UIC1>;
-
- nor_flash@0,0 {
- compatible = "amd,s29gl512n", "cfi-flash";
- bank-width = <2>;
- reg = <0x00000000 0x00000000 0x04000000>;
- #address-cells = <1>;
- #size-cells = <1>;
- partition@0 {
- label = "kernel";
- reg = <0x00000000 0x00200000>;
- };
- partition@200000 {
- label = "root";
- reg = <0x00200000 0x00200000>;
- };
- partition@400000 {
- label = "user";
- reg = <0x00400000 0x03b60000>;
- };
- partition@3f60000 {
- label = "env";
- reg = <0x03f60000 0x00040000>;
- };
- partition@3fa0000 {
- label = "u-boot";
- reg = <0x03fa0000 0x00060000>;
- };
- };
- };
-
- UART0: serial@ef600200 {
- device_type = "serial";
- compatible = "ns16550";
- reg = <0xef600200 0x00000008>;
- virtual-reg = <0xef600200>;
- clock-frequency = <0>; /* Filled in by U-Boot */
- current-speed = <0>;
- interrupt-parent = <&UIC0>;
- interrupts = <0x1a 0x4>;
- };
-
- UART1: serial@ef600300 {
- device_type = "serial";
- compatible = "ns16550";
- reg = <0xef600300 0x00000008>;
- virtual-reg = <0xef600300>;
- clock-frequency = <0>; /* Filled in by U-Boot */
- current-speed = <0>;
- interrupt-parent = <&UIC0>;
- interrupts = <0x1 0x4>;
- };
-
- IIC0: i2c@ef600400 {
- compatible = "ibm,iic-405ex", "ibm,iic";
- reg = <0xef600400 0x00000014>;
- interrupt-parent = <&UIC0>;
- interrupts = <0x2 0x4>;
- };
-
- IIC1: i2c@ef600500 {
- compatible = "ibm,iic-405ex", "ibm,iic";
- reg = <0xef600500 0x00000014>;
- interrupt-parent = <&UIC0>;
- interrupts = <0x7 0x4>;
- };
-
-
- RGMII0: emac-rgmii@ef600b00 {
- compatible = "ibm,rgmii-405ex", "ibm,rgmii";
- reg = <0xef600b00 0x00000104>;
- has-mdio;
- };
-
- EMAC0: ethernet@ef600900 {
- linux,network-index = <0x0>;
- device_type = "network";
- compatible = "ibm,emac-405ex", "ibm,emac4sync";
- interrupt-parent = <&EMAC0>;
- interrupts = <0x0 0x1>;
- #interrupt-cells = <1>;
- #address-cells = <0>;
- #size-cells = <0>;
- interrupt-map = </*Status*/ 0x0 &UIC0 0x18 0x4
- /*Wake*/ 0x1 &UIC1 0x1d 0x4>;
- reg = <0xef600900 0x000000c4>;
- local-mac-address = [000000000000]; /* Filled in by U-Boot */
- mal-device = <&MAL0>;
- mal-tx-channel = <0>;
- mal-rx-channel = <0>;
- cell-index = <0>;
- max-frame-size = <9000>;
- rx-fifo-size = <4096>;
- tx-fifo-size = <2048>;
- rx-fifo-size-gige = <16384>;
- tx-fifo-size-gige = <16384>;
- phy-mode = "rgmii";
- phy-map = <0x0000003f>; /* Start at 6 */
- rgmii-device = <&RGMII0>;
- rgmii-channel = <0>;
- has-inverted-stacr-oc;
- has-new-stacr-staopc;
- };
-
- EMAC1: ethernet@ef600a00 {
- linux,network-index = <0x1>;
- device_type = "network";
- compatible = "ibm,emac-405ex", "ibm,emac4sync";
- interrupt-parent = <&EMAC1>;
- interrupts = <0x0 0x1>;
- #interrupt-cells = <1>;
- #address-cells = <0>;
- #size-cells = <0>;
- interrupt-map = </*Status*/ 0x0 &UIC0 0x19 0x4
- /*Wake*/ 0x1 &UIC1 0x1f 0x4>;
- reg = <0xef600a00 0x000000c4>;
- local-mac-address = [000000000000]; /* Filled in by U-Boot */
- mal-device = <&MAL0>;
- mal-tx-channel = <1>;
- mal-rx-channel = <1>;
- cell-index = <1>;
- max-frame-size = <9000>;
- rx-fifo-size = <4096>;
- tx-fifo-size = <2048>;
- rx-fifo-size-gige = <16384>;
- tx-fifo-size-gige = <16384>;
- phy-mode = "rgmii";
- phy-map = <0x00000000>;
- rgmii-device = <&RGMII0>;
- rgmii-channel = <1>;
- has-inverted-stacr-oc;
- has-new-stacr-staopc;
- };
- };
-
- PCIE0: pciex@0a0000000 {
- device_type = "pci";
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- compatible = "ibm,plb-pciex-405ex", "ibm,plb-pciex";
- primary;
- port = <0x0>; /* port number */
- reg = <0xa0000000 0x20000000 /* Config space access */
- 0xef000000 0x00001000>; /* Registers */
- dcr-reg = <0x040 0x020>;
- sdr-base = <0x400>;
-
- /* Outbound ranges, one memory and one IO,
- * later cannot be changed
- */
- ranges = <0x02000000 0x00000000 0x80000000 0x90000000 0x00000000 0x08000000
- 0x01000000 0x00000000 0x00000000 0xe0000000 0x00000000 0x00010000>;
-
- /* Inbound 2GB range starting at 0 */
- dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x80000000>;
-
- /* This drives busses 0x00 to 0x3f */
- bus-range = <0x0 0x3f>;
-
- /* Legacy interrupts (note the weird polarity, the bridge seems
- * to invert PCIe legacy interrupts).
- * We are de-swizzling here because the numbers are actually for
- * port of the root complex virtual P2P bridge. But I want
- * to avoid putting a node for it in the tree, so the numbers
- * below are basically de-swizzled numbers.
- * The real slot is on idsel 0, so the swizzling is 1:1
- */
- interrupt-map-mask = <0x0 0x0 0x0 0x7>;
- interrupt-map = <
- 0x0 0x0 0x0 0x1 &UIC2 0x0 0x4 /* swizzled int A */
- 0x0 0x0 0x0 0x2 &UIC2 0x1 0x4 /* swizzled int B */
- 0x0 0x0 0x0 0x3 &UIC2 0x2 0x4 /* swizzled int C */
- 0x0 0x0 0x0 0x4 &UIC2 0x3 0x4 /* swizzled int D */>;
- };
-
- PCIE1: pciex@0c0000000 {
- device_type = "pci";
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- compatible = "ibm,plb-pciex-405ex", "ibm,plb-pciex";
- primary;
- port = <0x1>; /* port number */
- reg = <0xc0000000 0x20000000 /* Config space access */
- 0xef001000 0x00001000>; /* Registers */
- dcr-reg = <0x060 0x020>;
- sdr-base = <0x440>;
-
- /* Outbound ranges, one memory and one IO,
- * later cannot be changed
- */
- ranges = <0x02000000 0x00000000 0x80000000 0x98000000 0x00000000 0x08000000
- 0x01000000 0x00000000 0x00000000 0xe0010000 0x00000000 0x00010000>;
-
- /* Inbound 2GB range starting at 0 */
- dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x80000000>;
-
- /* This drives busses 0x40 to 0x7f */
- bus-range = <0x40 0x7f>;
-
- /* Legacy interrupts (note the weird polarity, the bridge seems
- * to invert PCIe legacy interrupts).
- * We are de-swizzling here because the numbers are actually for
- * port of the root complex virtual P2P bridge. But I want
- * to avoid putting a node for it in the tree, so the numbers
- * below are basically de-swizzled numbers.
- * The real slot is on idsel 0, so the swizzling is 1:1
- */
- interrupt-map-mask = <0x0 0x0 0x0 0x7>;
- interrupt-map = <
- 0x0 0x0 0x0 0x1 &UIC2 0xb 0x4 /* swizzled int A */
- 0x0 0x0 0x0 0x2 &UIC2 0xc 0x4 /* swizzled int B */
- 0x0 0x0 0x0 0x3 &UIC2 0xd 0x4 /* swizzled int C */
- 0x0 0x0 0x0 0x4 &UIC2 0xe 0x4 /* swizzled int D */>;
- };
- };
-};
diff --git a/arch/powerpc/boot/dts/media5200.dts b/arch/powerpc/boot/dts/media5200.dts
index b5413cb85f13..96524ede16cd 100644
--- a/arch/powerpc/boot/dts/media5200.dts
+++ b/arch/powerpc/boot/dts/media5200.dts
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Freescale Media5200 board Device Tree Source
*
* Copyright 2009 Secret Lab Technologies Ltd.
* Grant Likely <grant.likely@secretlab.ca>
* Steven Cavanagh <scavanagh@secretlab.ca>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/include/ "mpc5200b.dtsi"
@@ -25,7 +21,7 @@
};
chosen {
- linux,stdout-path = &console;
+ stdout-path = &console;
};
cpus {
@@ -36,7 +32,7 @@
};
};
- memory {
+ memory@0 {
reg = <0x00000000 0x08000000>; // 128MB RAM
};
@@ -100,9 +96,9 @@
0xe000 0 0 1 &media5200_fpga 0 5 // CoralIP
>;
- ranges = <0x42000000 0 0x80000000 0x80000000 0 0x20000000
- 0x02000000 0 0xa0000000 0xa0000000 0 0x10000000
- 0x01000000 0 0x00000000 0xb0000000 0 0x01000000>;
+ ranges = <0x42000000 0 0x80000000 0x80000000 0 0x20000000>,
+ <0x02000000 0 0xa0000000 0xa0000000 0 0x10000000>,
+ <0x01000000 0 0x00000000 0xb0000000 0 0x01000000>;
interrupt-parent = <&mpc5200_pic>;
};
diff --git a/arch/powerpc/boot/dts/mgcoge.dts b/arch/powerpc/boot/dts/mgcoge.dts
index d72fb5e219d0..9cefed207234 100644
--- a/arch/powerpc/boot/dts/mgcoge.dts
+++ b/arch/powerpc/boot/dts/mgcoge.dts
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Device Tree for the MGCOGE plattform from keymile
*
* Copyright 2008 DENX Software Engineering GmbH
* Heiko Schocher <hs@denx.de>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/dts-v1/;
@@ -228,14 +224,7 @@
reg = <0x11a80 0x40 0x89fc 0x2>;
interrupts = <2 8>;
interrupt-parent = <&PIC>;
- gpios = < &cpm2_pio_d 19 0>;
- #address-cells = <1>;
- #size-cells = <0>;
- ds3106@1 {
- compatible = "gen,spidev";
- reg = <0>;
- spi-max-frequency = <8000000>;
- };
+ cs-gpios = < &cpm2_pio_d 19 0>;
};
};
diff --git a/arch/powerpc/boot/dts/microwatt.dts b/arch/powerpc/boot/dts/microwatt.dts
new file mode 100644
index 000000000000..292b909ca9ce
--- /dev/null
+++ b/arch/powerpc/boot/dts/microwatt.dts
@@ -0,0 +1,255 @@
+/dts-v1/;
+#include <dt-bindings/gpio/gpio.h>
+
+/ {
+ #size-cells = <0x02>;
+ #address-cells = <0x02>;
+ model = "microwatt";
+ compatible = "microwatt-soc";
+
+ aliases {
+ serial0 = &UART0;
+ ethernet = &enet0;
+ };
+
+ reserved-memory {
+ #size-cells = <0x02>;
+ #address-cells = <0x02>;
+ ranges;
+ };
+
+ memory@0 {
+ device_type = "memory";
+ reg = <0x00000000 0x00000000 0x00000000 0x10000000>;
+ };
+
+ clocks {
+ sys_clk: litex_sys_clk {
+ #clock-cells = <0>;
+ compatible = "fixed-clock";
+ clock-frequency = <100000000>;
+ };
+ };
+
+ cpus {
+ #size-cells = <0x00>;
+ #address-cells = <0x01>;
+
+ ibm,powerpc-cpu-features {
+ display-name = "Microwatt";
+ isa = <3100>;
+ device_type = "cpu-features";
+ compatible = "ibm,powerpc-cpu-features";
+
+ mmu-radix {
+ isa = <3000>;
+ usable-privilege = <6>;
+ os-support = <0>;
+ };
+
+ little-endian {
+ isa = <0>;
+ usable-privilege = <7>;
+ os-support = <0>;
+ hwcap-bit-nr = <1>;
+ };
+
+ cache-inhibited-large-page {
+ isa = <0>;
+ usable-privilege = <6>;
+ os-support = <0>;
+ };
+
+ fixed-point-v3 {
+ isa = <3000>;
+ usable-privilege = <7>;
+ };
+
+ no-execute {
+ isa = <0x00>;
+ usable-privilege = <2>;
+ os-support = <0>;
+ };
+
+ floating-point {
+ hfscr-bit-nr = <0>;
+ hwcap-bit-nr = <27>;
+ isa = <0>;
+ usable-privilege = <7>;
+ hv-support = <1>;
+ os-support = <0>;
+ };
+
+ prefixed-instructions {
+ hfscr-bit-nr = <13>;
+ fscr-bit-nr = <13>;
+ isa = <3010>;
+ usable-privilege = <7>;
+ os-support = <1>;
+ hv-support = <1>;
+ };
+
+ tar {
+ hfscr-bit-nr = <8>;
+ fscr-bit-nr = <8>;
+ isa = <2070>;
+ usable-privilege = <7>;
+ os-support = <1>;
+ hv-support = <1>;
+ hwcap-bit-nr = <58>;
+ };
+
+ control-register {
+ isa = <0>;
+ usable-privilege = <7>;
+ };
+
+ system-call-vectored {
+ isa = <3000>;
+ usable-privilege = <7>;
+ os-support = <1>;
+ fscr-bit-nr = <12>;
+ hwcap-bit-nr = <52>;
+ };
+ };
+
+ PowerPC,Microwatt@0 {
+ i-cache-sets = <2>;
+ ibm,dec-bits = <64>;
+ reservation-granule-size = <64>;
+ clock-frequency = <100000000>;
+ timebase-frequency = <100000000>;
+ i-tlb-sets = <1>;
+ ibm,ppc-interrupt-server#s = <0>;
+ i-cache-block-size = <64>;
+ d-cache-block-size = <64>;
+ d-cache-sets = <2>;
+ i-tlb-size = <64>;
+ cpu-version = <0x990000>;
+ status = "okay";
+ i-cache-size = <0x1000>;
+ ibm,processor-radix-AP-encodings = <0x0c 0xa0000010 0x20000015 0x4000001e>;
+ tlb-size = <0>;
+ tlb-sets = <0>;
+ device_type = "cpu";
+ d-tlb-size = <128>;
+ d-tlb-sets = <2>;
+ reg = <0>;
+ general-purpose;
+ 64-bit;
+ d-cache-size = <0x1000>;
+ ibm,chip-id = <0>;
+ ibm,mmu-lpid-bits = <12>;
+ ibm,mmu-pid-bits = <20>;
+ };
+
+ PowerPC,Microwatt@1 {
+ i-cache-sets = <2>;
+ ibm,dec-bits = <64>;
+ reservation-granule-size = <64>;
+ clock-frequency = <100000000>;
+ timebase-frequency = <100000000>;
+ i-tlb-sets = <1>;
+ ibm,ppc-interrupt-server#s = <1>;
+ i-cache-block-size = <64>;
+ d-cache-block-size = <64>;
+ d-cache-sets = <2>;
+ i-tlb-size = <64>;
+ cpu-version = <0x990000>;
+ status = "okay";
+ i-cache-size = <0x1000>;
+ ibm,processor-radix-AP-encodings = <0x0c 0xa0000010 0x20000015 0x4000001e>;
+ tlb-size = <0>;
+ tlb-sets = <0>;
+ device_type = "cpu";
+ d-tlb-size = <128>;
+ d-tlb-sets = <2>;
+ reg = <1>;
+ general-purpose;
+ 64-bit;
+ d-cache-size = <0x1000>;
+ ibm,chip-id = <0>;
+ ibm,mmu-lpid-bits = <12>;
+ ibm,mmu-pid-bits = <20>;
+ };
+ };
+
+ soc@c0000000 {
+ compatible = "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ interrupt-parent = <&ICS>;
+
+ ranges = <0 0 0xc0000000 0x40000000>;
+
+ interrupt-controller@4000 {
+ compatible = "openpower,xics-presentation", "ibm,ppc-xicp";
+ ibm,interrupt-server-ranges = <0x0 0x2>;
+ reg = <0x4000 0x10 0x4010 0x10>;
+ };
+
+ ICS: interrupt-controller@5000 {
+ compatible = "openpower,xics-sources";
+ interrupt-controller;
+ interrupt-ranges = <0x10 0x10>;
+ reg = <0x5000 0x100>;
+ #address-cells = <0>;
+ #size-cells = <0>;
+ #interrupt-cells = <2>;
+ };
+
+ UART0: serial@2000 {
+ device_type = "serial";
+ compatible = "ns16550";
+ reg = <0x2000 0x8>;
+ clock-frequency = <100000000>;
+ current-speed = <115200>;
+ reg-shift = <2>;
+ fifo-size = <16>;
+ interrupts = <0x10 0x1>;
+ };
+
+ gpio: gpio@7000 {
+ device_type = "gpio";
+ compatible = "faraday,ftgpio010";
+ gpio-controller;
+ #gpio-cells = <2>;
+ reg = <0x7000 0x80>;
+ interrupts = <0x14 1>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ };
+
+ enet0: ethernet@8020000 {
+ compatible = "litex,liteeth";
+ reg = <0x8021000 0x100
+ 0x8020800 0x100
+ 0x8030000 0x2000>;
+ reg-names = "mac", "mido", "buffer";
+ litex,rx-slots = <2>;
+ litex,tx-slots = <2>;
+ litex,slot-size = <0x800>;
+ interrupts = <0x11 0x1>;
+ };
+
+ mmc@8040000 {
+ compatible = "litex,mmc";
+ reg = <0x8042800 0x800
+ 0x8041000 0x800
+ 0x8040800 0x800
+ 0x8042000 0x800
+ 0x8041800 0x800>;
+ reg-names = "phy", "core", "reader", "writer", "irq";
+ bus-width = <4>;
+ interrupts = <0x13 1>;
+ clocks = <&sys_clk>;
+ };
+ };
+
+ chosen {
+ bootargs = "";
+ ibm,architecture-vec-5 = [19 00 10 00 00 00 00 00 00 00 00 00 00 00 00 00
+ 00 00 00 00 00 00 00 00 40 00 40];
+ stdout-path = &UART0;
+ };
+};
diff --git a/arch/powerpc/boot/dts/motionpro.dts b/arch/powerpc/boot/dts/motionpro.dts
index bbabd97492ad..c23676093da8 100644
--- a/arch/powerpc/boot/dts/motionpro.dts
+++ b/arch/powerpc/boot/dts/motionpro.dts
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Motion-PRO board Device Tree Source
*
* Copyright (C) 2007 Semihalf
* Marian Balakowicz <m8@semihalf.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/include/ "mpc5200b.dtsi"
diff --git a/arch/powerpc/boot/dts/mpc5121.dtsi b/arch/powerpc/boot/dts/mpc5121.dtsi
index 7f9d14f5c4da..d3fc8062fbcd 100644
--- a/arch/powerpc/boot/dts/mpc5121.dtsi
+++ b/arch/powerpc/boot/dts/mpc5121.dtsi
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* base MPC5121 Device Tree Source
*
* Copyright 2007-2008 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#include <dt-bindings/clock/mpc512x-clock.h>
@@ -77,7 +73,6 @@
#address-cells = <2>;
#size-cells = <1>;
reg = <0x80000020 0x40>;
- interrupts = <7 0x8>;
ranges = <0x0 0x0 0xfc000000 0x04000000>;
};
@@ -145,7 +140,7 @@
};
/* Power Management Controller */
- pmc@1000{
+ pmc@1000 {
compatible = "fsl,mpc5121-pmc";
reg = <0x1000 0x100>;
interrupts = <83 0x8>;
@@ -329,7 +324,15 @@
/* LocalPlus controller */
lpc@10000 {
compatible = "fsl,mpc5121-lpc";
- reg = <0x10000 0x200>;
+ reg = <0x10000 0x100>;
+ };
+
+ sclpc@10100 {
+ compatible = "fsl,mpc512x-lpbfifo";
+ reg = <0x10100 0x50>;
+ interrupts = <7 0x8>;
+ dmas = <&dma0 26>;
+ dma-names = "rx-tx";
};
pata@10200 {
diff --git a/arch/powerpc/boot/dts/mpc5121ads.dts b/arch/powerpc/boot/dts/mpc5121ads.dts
index c228a0a232a6..b407a50ee622 100644
--- a/arch/powerpc/boot/dts/mpc5121ads.dts
+++ b/arch/powerpc/boot/dts/mpc5121ads.dts
@@ -1,15 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* MPC5121E ADS Device Tree Source
*
* Copyright 2007-2008 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
-#include <mpc5121.dtsi>
+#include "mpc5121.dtsi"
/ {
model = "mpc5121ads";
@@ -94,12 +90,12 @@
};
eeprom@50 {
- compatible = "at,24c32";
+ compatible = "atmel,24c32";
reg = <0x50>;
};
rtc@68 {
- compatible = "stm,m41t62";
+ compatible = "st,m41t62";
reg = <0x68>;
};
};
diff --git a/arch/powerpc/boot/dts/mpc5125twr.dts b/arch/powerpc/boot/dts/mpc5125twr.dts
index e4f297471748..ee090709aa3a 100644
--- a/arch/powerpc/boot/dts/mpc5125twr.dts
+++ b/arch/powerpc/boot/dts/mpc5125twr.dts
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* STx/Freescale ADS5125 MPC5125 silicon
*
@@ -5,11 +6,6 @@
*
* Reworked by Matteo Facchinetti (engineering@sirius-es.it)
* Copyright (C) 2013 Sirius Electronic Systems
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#include <dt-bindings/clock/mpc512x-clock.h>
@@ -108,7 +104,7 @@
clock-names = "osc";
};
- pmc@1000{ // Power Management Controller
+ pmc@1000 { // Power Management Controller
compatible = "fsl,mpc5121-pmc";
reg = <0x1000 0x100>;
interrupts = <83 0x2>;
@@ -246,6 +242,14 @@
status = "disabled";
};
+ sclpc@10100 {
+ compatible = "fsl,mpc512x-lpbfifo";
+ reg = <0x10100 0x50>;
+ interrupts = <7 0x8>;
+ dmas = <&dma0 26>;
+ dma-names = "rx-tx";
+ };
+
// 5125 PSCs are not 52xx or 5121 PSC compatible
// PSC1 uart0 aka ttyPSC0
serial@11100 {
@@ -279,10 +283,11 @@
clock-names = "ipg";
};
- dma@14000 {
+ dma0: dma@14000 {
compatible = "fsl,mpc5121-dma"; // BSP name: "mpc512x-dma2"
reg = <0x14000 0x1800>;
interrupts = <65 0x8>;
+ #dma-cells = <1>;
};
};
};
diff --git a/arch/powerpc/boot/dts/mpc5200b.dtsi b/arch/powerpc/boot/dts/mpc5200b.dtsi
index 969b2200b2f9..ffa82c7e1055 100644
--- a/arch/powerpc/boot/dts/mpc5200b.dtsi
+++ b/arch/powerpc/boot/dts/mpc5200b.dtsi
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* base MPC5200b Device Tree Source
*
* Copyright (C) 2010 SecretLab
* Grant Likely <grant@secretlab.ca>
* John Bonesio <bones@secretlab.ca>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/dts-v1/;
@@ -37,7 +33,7 @@
};
};
- memory: memory {
+ memory: memory@0 {
device_type = "memory";
reg = <0x00000000 0x04000000>; // 64MB
};
@@ -280,7 +276,9 @@
clock-frequency = <0>; // From boot loader
interrupts = <2 8 0 2 9 0 2 10 0>;
bus-range = <0 0>;
- // ranges = need to add
+ ranges = <0x42000000 0 0x80000000 0x80000000 0 0x10000000>,
+ <0x02000000 0 0x90000000 0x90000000 0 0x10000000>,
+ <0x01000000 0 0x00000000 0xa0000000 0 0x01000000>;
};
localbus: localbus {
diff --git a/arch/powerpc/boot/dts/mpc7448hpc2.dts b/arch/powerpc/boot/dts/mpc7448hpc2.dts
deleted file mode 100644
index 20a0d22df473..000000000000
--- a/arch/powerpc/boot/dts/mpc7448hpc2.dts
+++ /dev/null
@@ -1,196 +0,0 @@
-/*
- * MPC7448HPC2 (Taiga) board Device Tree Source
- *
- * Copyright 2006, 2008 Freescale Semiconductor Inc.
- * 2006 Roy Zang <Roy Zang at freescale.com>.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-/dts-v1/;
-
-/ {
- model = "mpc7448hpc2";
- compatible = "mpc74xx";
- #address-cells = <1>;
- #size-cells = <1>;
-
- aliases {
- ethernet0 = &enet0;
- ethernet1 = &enet1;
-
- serial0 = &serial0;
- serial1 = &serial1;
-
- pci0 = &pci0;
- };
-
- cpus {
- #address-cells = <1>;
- #size-cells =<0>;
-
- PowerPC,7448@0 {
- device_type = "cpu";
- reg = <0x0>;
- d-cache-line-size = <32>; // 32 bytes
- i-cache-line-size = <32>; // 32 bytes
- d-cache-size = <0x8000>; // L1, 32K bytes
- i-cache-size = <0x8000>; // L1, 32K bytes
- timebase-frequency = <0>; // 33 MHz, from uboot
- clock-frequency = <0>; // From U-Boot
- bus-frequency = <0>; // From U-Boot
- };
- };
-
- memory {
- device_type = "memory";
- reg = <0x0 0x20000000 // DDR2 512M at 0
- >;
- };
-
- tsi108@c0000000 {
- #address-cells = <1>;
- #size-cells = <1>;
- device_type = "tsi-bridge";
- ranges = <0x0 0xc0000000 0x10000>;
- reg = <0xc0000000 0x10000>;
- bus-frequency = <0>;
-
- i2c@7000 {
- interrupt-parent = <&mpic>;
- interrupts = <14 0>;
- reg = <0x7000 0x400>;
- device_type = "i2c";
- compatible = "tsi108-i2c";
- };
-
- MDIO: mdio@6000 {
- compatible = "tsi108-mdio";
- reg = <0x6000 0x50>;
- #address-cells = <1>;
- #size-cells = <0>;
-
- phy8: ethernet-phy@8 {
- interrupt-parent = <&mpic>;
- interrupts = <2 1>;
- reg = <0x8>;
- };
-
- phy9: ethernet-phy@9 {
- interrupt-parent = <&mpic>;
- interrupts = <2 1>;
- reg = <0x9>;
- };
-
- };
-
- enet0: ethernet@6200 {
- linux,network-index = <0>;
- #size-cells = <0>;
- device_type = "network";
- compatible = "tsi108-ethernet";
- reg = <0x6000 0x200>;
- address = [ 00 06 D2 00 00 01 ];
- interrupts = <16 2>;
- interrupt-parent = <&mpic>;
- mdio-handle = <&MDIO>;
- phy-handle = <&phy8>;
- };
-
- enet1: ethernet@6600 {
- linux,network-index = <1>;
- #address-cells = <1>;
- #size-cells = <0>;
- device_type = "network";
- compatible = "tsi108-ethernet";
- reg = <0x6400 0x200>;
- address = [ 00 06 D2 00 00 02 ];
- interrupts = <17 2>;
- interrupt-parent = <&mpic>;
- mdio-handle = <&MDIO>;
- phy-handle = <&phy9>;
- };
-
- serial0: serial@7808 {
- device_type = "serial";
- compatible = "ns16550";
- reg = <0x7808 0x200>;
- clock-frequency = <1064000000>;
- interrupts = <12 0>;
- interrupt-parent = <&mpic>;
- };
-
- serial1: serial@7c08 {
- device_type = "serial";
- compatible = "ns16550";
- reg = <0x7c08 0x200>;
- clock-frequency = <1064000000>;
- interrupts = <13 0>;
- interrupt-parent = <&mpic>;
- };
-
- mpic: pic@7400 {
- interrupt-controller;
- #address-cells = <0>;
- #interrupt-cells = <2>;
- reg = <0x7400 0x400>;
- compatible = "chrp,open-pic";
- device_type = "open-pic";
- };
- pci0: pci@1000 {
- compatible = "tsi108-pci";
- device_type = "pci";
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- reg = <0x1000 0x1000>;
- bus-range = <0 0>;
- ranges = <0x2000000 0x0 0xe0000000 0xe0000000 0x0 0x1a000000
- 0x1000000 0x0 0x0 0xfa000000 0x0 0x10000>;
- clock-frequency = <133333332>;
- interrupt-parent = <&mpic>;
- interrupts = <23 2>;
- interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
- interrupt-map = <
-
- /* IDSEL 0x11 */
- 0x800 0x0 0x0 0x1 &RT0 0x24 0x0
- 0x800 0x0 0x0 0x2 &RT0 0x25 0x0
- 0x800 0x0 0x0 0x3 &RT0 0x26 0x0
- 0x800 0x0 0x0 0x4 &RT0 0x27 0x0
-
- /* IDSEL 0x12 */
- 0x1000 0x0 0x0 0x1 &RT0 0x25 0x0
- 0x1000 0x0 0x0 0x2 &RT0 0x26 0x0
- 0x1000 0x0 0x0 0x3 &RT0 0x27 0x0
- 0x1000 0x0 0x0 0x4 &RT0 0x24 0x0
-
- /* IDSEL 0x13 */
- 0x1800 0x0 0x0 0x1 &RT0 0x26 0x0
- 0x1800 0x0 0x0 0x2 &RT0 0x27 0x0
- 0x1800 0x0 0x0 0x3 &RT0 0x24 0x0
- 0x1800 0x0 0x0 0x4 &RT0 0x25 0x0
-
- /* IDSEL 0x14 */
- 0x2000 0x0 0x0 0x1 &RT0 0x27 0x0
- 0x2000 0x0 0x0 0x2 &RT0 0x24 0x0
- 0x2000 0x0 0x0 0x3 &RT0 0x25 0x0
- 0x2000 0x0 0x0 0x4 &RT0 0x26 0x0
- >;
-
- RT0: router@1180 {
- clock-frequency = <0>;
- interrupt-controller;
- device_type = "pic-router";
- #address-cells = <0>;
- #interrupt-cells = <2>;
- big-endian;
- interrupts = <23 2>;
- interrupt-parent = <&mpic>;
- };
- };
- };
-};
diff --git a/arch/powerpc/boot/dts/mpc8272ads.dts b/arch/powerpc/boot/dts/mpc8272ads.dts
deleted file mode 100644
index 6d2cddf64cfd..000000000000
--- a/arch/powerpc/boot/dts/mpc8272ads.dts
+++ /dev/null
@@ -1,267 +0,0 @@
-/*
- * MPC8272 ADS Device Tree Source
- *
- * Copyright 2005,2008 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-/dts-v1/;
-
-/ {
- model = "MPC8272ADS";
- compatible = "fsl,mpc8272ads";
- #address-cells = <1>;
- #size-cells = <1>;
-
- aliases {
- ethernet0 = &eth0;
- ethernet1 = &eth1;
- serial0 = &scc1;
- serial1 = &scc4;
- };
-
- cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- PowerPC,8272@0 {
- device_type = "cpu";
- reg = <0x0>;
- d-cache-line-size = <32>;
- i-cache-line-size = <32>;
- d-cache-size = <16384>;
- i-cache-size = <16384>;
- timebase-frequency = <0>;
- bus-frequency = <0>;
- clock-frequency = <0>;
- };
- };
-
- memory {
- device_type = "memory";
- reg = <0x0 0x0>;
- };
-
- localbus@f0010100 {
- compatible = "fsl,mpc8272-localbus",
- "fsl,pq2-localbus";
- #address-cells = <2>;
- #size-cells = <1>;
- reg = <0xf0010100 0x40>;
-
- ranges = <0x0 0x0 0xff800000 0x00800000
- 0x1 0x0 0xf4500000 0x8000
- 0x3 0x0 0xf8200000 0x8000>;
-
- flash@0,0 {
- compatible = "jedec-flash";
- reg = <0x0 0x0 0x00800000>;
- bank-width = <4>;
- device-width = <1>;
- };
-
- board-control@1,0 {
- reg = <0x1 0x0 0x20>;
- compatible = "fsl,mpc8272ads-bcsr";
- };
-
- PCI_PIC: interrupt-controller@3,0 {
- compatible = "fsl,mpc8272ads-pci-pic",
- "fsl,pq2ads-pci-pic";
- #interrupt-cells = <1>;
- interrupt-controller;
- reg = <0x3 0x0 0x8>;
- interrupt-parent = <&PIC>;
- interrupts = <20 8>;
- };
- };
-
-
- pci@f0010800 {
- device_type = "pci";
- reg = <0xf0010800 0x10c 0xf00101ac 0x8 0xf00101c4 0x8>;
- compatible = "fsl,mpc8272-pci", "fsl,pq2-pci";
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- clock-frequency = <66666666>;
- interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
- interrupt-map = <
- /* IDSEL 0x16 */
- 0xb000 0x0 0x0 0x1 &PCI_PIC 0
- 0xb000 0x0 0x0 0x2 &PCI_PIC 1
- 0xb000 0x0 0x0 0x3 &PCI_PIC 2
- 0xb000 0x0 0x0 0x4 &PCI_PIC 3
-
- /* IDSEL 0x17 */
- 0xb800 0x0 0x0 0x1 &PCI_PIC 4
- 0xb800 0x0 0x0 0x2 &PCI_PIC 5
- 0xb800 0x0 0x0 0x3 &PCI_PIC 6
- 0xb800 0x0 0x0 0x4 &PCI_PIC 7
-
- /* IDSEL 0x18 */
- 0xc000 0x0 0x0 0x1 &PCI_PIC 8
- 0xc000 0x0 0x0 0x2 &PCI_PIC 9
- 0xc000 0x0 0x0 0x3 &PCI_PIC 10
- 0xc000 0x0 0x0 0x4 &PCI_PIC 11>;
-
- interrupt-parent = <&PIC>;
- interrupts = <18 8>;
- ranges = <0x42000000 0x0 0x80000000 0x80000000 0x0 0x20000000
- 0x2000000 0x0 0xa0000000 0xa0000000 0x0 0x20000000
- 0x1000000 0x0 0x0 0xf6000000 0x0 0x2000000>;
- };
-
- soc@f0000000 {
- #address-cells = <1>;
- #size-cells = <1>;
- device_type = "soc";
- compatible = "fsl,mpc8272", "fsl,pq2-soc";
- ranges = <0x0 0xf0000000 0x53000>;
-
- // Temporary -- will go away once kernel uses ranges for get_immrbase().
- reg = <0xf0000000 0x53000>;
-
- cpm@119c0 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "fsl,mpc8272-cpm", "fsl,cpm2";
- reg = <0x119c0 0x30>;
- ranges;
-
- muram@0 {
- #address-cells = <1>;
- #size-cells = <1>;
- ranges = <0x0 0x0 0x10000>;
-
- data@0 {
- compatible = "fsl,cpm-muram-data";
- reg = <0x0 0x2000 0x9800 0x800>;
- };
- };
-
- brg@119f0 {
- compatible = "fsl,mpc8272-brg",
- "fsl,cpm2-brg",
- "fsl,cpm-brg";
- reg = <0x119f0 0x10 0x115f0 0x10>;
- };
-
- scc1: serial@11a00 {
- device_type = "serial";
- compatible = "fsl,mpc8272-scc-uart",
- "fsl,cpm2-scc-uart";
- reg = <0x11a00 0x20 0x8000 0x100>;
- interrupts = <40 8>;
- interrupt-parent = <&PIC>;
- fsl,cpm-brg = <1>;
- fsl,cpm-command = <0x800000>;
- };
-
- scc4: serial@11a60 {
- device_type = "serial";
- compatible = "fsl,mpc8272-scc-uart",
- "fsl,cpm2-scc-uart";
- reg = <0x11a60 0x20 0x8300 0x100>;
- interrupts = <43 8>;
- interrupt-parent = <&PIC>;
- fsl,cpm-brg = <4>;
- fsl,cpm-command = <0xce00000>;
- };
-
- usb@11b60 {
- compatible = "fsl,mpc8272-cpm-usb";
- reg = <0x11b60 0x40 0x8b00 0x100>;
- interrupts = <11 8>;
- interrupt-parent = <&PIC>;
- mode = "peripheral";
- };
-
- mdio@10d40 {
- compatible = "fsl,mpc8272ads-mdio-bitbang",
- "fsl,mpc8272-mdio-bitbang",
- "fsl,cpm2-mdio-bitbang";
- reg = <0x10d40 0x14>;
- #address-cells = <1>;
- #size-cells = <0>;
- fsl,mdio-pin = <18>;
- fsl,mdc-pin = <19>;
-
- PHY0: ethernet-phy@0 {
- interrupt-parent = <&PIC>;
- interrupts = <23 8>;
- reg = <0x0>;
- };
-
- PHY1: ethernet-phy@1 {
- interrupt-parent = <&PIC>;
- interrupts = <23 8>;
- reg = <0x3>;
- };
- };
-
- eth0: ethernet@11300 {
- device_type = "network";
- compatible = "fsl,mpc8272-fcc-enet",
- "fsl,cpm2-fcc-enet";
- reg = <0x11300 0x20 0x8400 0x100 0x11390 0x1>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <32 8>;
- interrupt-parent = <&PIC>;
- phy-handle = <&PHY0>;
- linux,network-index = <0>;
- fsl,cpm-command = <0x12000300>;
- };
-
- eth1: ethernet@11320 {
- device_type = "network";
- compatible = "fsl,mpc8272-fcc-enet",
- "fsl,cpm2-fcc-enet";
- reg = <0x11320 0x20 0x8500 0x100 0x113b0 0x1>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <33 8>;
- interrupt-parent = <&PIC>;
- phy-handle = <&PHY1>;
- linux,network-index = <1>;
- fsl,cpm-command = <0x16200300>;
- };
-
- i2c@11860 {
- compatible = "fsl,mpc8272-i2c",
- "fsl,cpm2-i2c";
- reg = <0x11860 0x20 0x8afc 0x2>;
- interrupts = <1 8>;
- interrupt-parent = <&PIC>;
- fsl,cpm-command = <0x29600000>;
- #address-cells = <1>;
- #size-cells = <0>;
- };
- };
-
- PIC: interrupt-controller@10c00 {
- #interrupt-cells = <2>;
- interrupt-controller;
- reg = <0x10c00 0x80>;
- compatible = "fsl,mpc8272-pic", "fsl,cpm2-pic";
- };
-
- crypto@30000 {
- compatible = "fsl,sec1.0";
- reg = <0x40000 0x13000>;
- interrupts = <47 0x8>;
- interrupt-parent = <&PIC>;
- fsl,num-channels = <4>;
- fsl,channel-fifo-len = <24>;
- fsl,exec-units-mask = <0x7e>;
- fsl,descriptor-types-mask = <0x1010415>;
- };
- };
-
- chosen {
- linux,stdout-path = "/soc/cpm/serial@11a00";
- };
-};
diff --git a/arch/powerpc/boot/dts/mpc8308_p1m.dts b/arch/powerpc/boot/dts/mpc8308_p1m.dts
index 57f86cdf9f36..2638555afcc4 100644
--- a/arch/powerpc/boot/dts/mpc8308_p1m.dts
+++ b/arch/powerpc/boot/dts/mpc8308_p1m.dts
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* mpc8308_p1m Device Tree Source
*
* Copyright 2010 Ilya Yanok, Emcraft Systems, yanok@emcraft.com
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/dts-v1/;
@@ -123,7 +119,7 @@
interrupt-parent = <&ipic>;
dfsrr;
fram@50 {
- compatible = "ramtron,24c64";
+ compatible = "ramtron,24c64", "atmel,24c64";
reg = <0x50>;
};
};
diff --git a/arch/powerpc/boot/dts/mpc8308rdb.dts b/arch/powerpc/boot/dts/mpc8308rdb.dts
index d0211f0413c6..af2ed8380a86 100644
--- a/arch/powerpc/boot/dts/mpc8308rdb.dts
+++ b/arch/powerpc/boot/dts/mpc8308rdb.dts
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* MPC8308RDB Device Tree Source
*
* Copyright 2009 Freescale Semiconductor Inc.
* Copyright 2010 Ilya Yanok, Emcraft Systems, yanok@emcraft.com
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/dts-v1/;
diff --git a/arch/powerpc/boot/dts/mpc8313erdb.dts b/arch/powerpc/boot/dts/mpc8313erdb.dts
index 4b635dc4ecde..a8315795b2c9 100644
--- a/arch/powerpc/boot/dts/mpc8313erdb.dts
+++ b/arch/powerpc/boot/dts/mpc8313erdb.dts
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* MPC8313E RDB Device Tree Source
*
* Copyright 2005, 2006, 2007 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/dts-v1/;
diff --git a/arch/powerpc/boot/dts/mpc8315erdb.dts b/arch/powerpc/boot/dts/mpc8315erdb.dts
index 43546844ea5a..a89cb3139ca8 100644
--- a/arch/powerpc/boot/dts/mpc8315erdb.dts
+++ b/arch/powerpc/boot/dts/mpc8315erdb.dts
@@ -1,15 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* MPC8315E RDB Device Tree Source
*
* Copyright 2007 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/dts-v1/;
+#include <dt-bindings/interrupt-controller/irq.h>
/ {
compatible = "fsl,mpc8315erdb";
@@ -362,6 +359,15 @@
interrupt-parent = <&ipic>;
fsl,mpc8313-wakeup-timer = <&gtm1>;
};
+
+ gpio: gpio-controller@c00 {
+ compatible = "fsl,mpc8314-gpio";
+ reg = <0xc00 0x100>;
+ interrupts = <74 IRQ_TYPE_LEVEL_LOW>;
+ interrupt-parent = <&ipic>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
};
pci0: pci@e0008500 {
@@ -472,7 +478,7 @@
hdd {
gpios = <&mcu_pio 1 0>;
- linux,default-trigger = "ide-disk";
+ linux,default-trigger = "disk-activity";
};
};
};
diff --git a/arch/powerpc/boot/dts/mpc832x_mds.dts b/arch/powerpc/boot/dts/mpc832x_mds.dts
deleted file mode 100644
index 0793cdf0d46e..000000000000
--- a/arch/powerpc/boot/dts/mpc832x_mds.dts
+++ /dev/null
@@ -1,439 +0,0 @@
-/*
- * MPC8323E EMDS Device Tree Source
- *
- * Copyright 2006 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
-
- * To enable external serial I/O on a Freescale MPC 8323 SYS/MDS board, do
- * this:
- *
- * 1) On chip U61, lift (disconnect) pins 21 (TXD) and 22 (RXD) from the board.
- * 2) Solder a wire from U61-21 to P19A-23. P19 is a grid of pins on the board
- * next to the serial ports.
- * 3) Solder a wire from U61-22 to P19K-22.
- *
- * Note that there's a typo in the schematic. The board labels the last column
- * of pins "P19K", but in the schematic, that column is called "P19J". So if
- * you're going by the schematic, the pin is called "P19J-K22".
- */
-
-/dts-v1/;
-
-/ {
- model = "MPC8323EMDS";
- compatible = "MPC8323EMDS", "MPC832xMDS", "MPC83xxMDS";
- #address-cells = <1>;
- #size-cells = <1>;
-
- aliases {
- ethernet0 = &enet0;
- ethernet1 = &enet1;
- serial0 = &serial0;
- serial1 = &serial1;
- pci0 = &pci0;
- };
-
- cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- PowerPC,8323@0 {
- device_type = "cpu";
- reg = <0x0>;
- d-cache-line-size = <32>; // 32 bytes
- i-cache-line-size = <32>; // 32 bytes
- d-cache-size = <16384>; // L1, 16K
- i-cache-size = <16384>; // L1, 16K
- timebase-frequency = <0>;
- bus-frequency = <0>;
- clock-frequency = <0>;
- };
- };
-
- memory {
- device_type = "memory";
- reg = <0x00000000 0x08000000>;
- };
-
- bcsr@f8000000 {
- compatible = "fsl,mpc8323mds-bcsr";
- reg = <0xf8000000 0x8000>;
- };
-
- soc8323@e0000000 {
- #address-cells = <1>;
- #size-cells = <1>;
- device_type = "soc";
- compatible = "simple-bus";
- ranges = <0x0 0xe0000000 0x00100000>;
- reg = <0xe0000000 0x00000200>;
- bus-frequency = <132000000>;
-
- wdt@200 {
- device_type = "watchdog";
- compatible = "mpc83xx_wdt";
- reg = <0x200 0x100>;
- };
-
- pmc: power@b00 {
- compatible = "fsl,mpc8323-pmc", "fsl,mpc8349-pmc";
- reg = <0xb00 0x100 0xa00 0x100>;
- interrupts = <80 0x8>;
- interrupt-parent = <&ipic>;
- };
-
- i2c@3000 {
- #address-cells = <1>;
- #size-cells = <0>;
- cell-index = <0>;
- compatible = "fsl-i2c";
- reg = <0x3000 0x100>;
- interrupts = <14 0x8>;
- interrupt-parent = <&ipic>;
- dfsrr;
-
- rtc@68 {
- compatible = "dallas,ds1374";
- reg = <0x68>;
- };
- };
-
- serial0: serial@4500 {
- cell-index = <0>;
- device_type = "serial";
- compatible = "fsl,ns16550", "ns16550";
- reg = <0x4500 0x100>;
- clock-frequency = <0>;
- interrupts = <9 0x8>;
- interrupt-parent = <&ipic>;
- };
-
- serial1: serial@4600 {
- cell-index = <1>;
- device_type = "serial";
- compatible = "fsl,ns16550", "ns16550";
- reg = <0x4600 0x100>;
- clock-frequency = <0>;
- interrupts = <10 0x8>;
- interrupt-parent = <&ipic>;
- };
-
- dma@82a8 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "fsl,mpc8323-dma", "fsl,elo-dma";
- reg = <0x82a8 4>;
- ranges = <0 0x8100 0x1a8>;
- interrupt-parent = <&ipic>;
- interrupts = <71 8>;
- cell-index = <0>;
- dma-channel@0 {
- compatible = "fsl,mpc8323-dma-channel", "fsl,elo-dma-channel";
- reg = <0 0x80>;
- cell-index = <0>;
- interrupt-parent = <&ipic>;
- interrupts = <71 8>;
- };
- dma-channel@80 {
- compatible = "fsl,mpc8323-dma-channel", "fsl,elo-dma-channel";
- reg = <0x80 0x80>;
- cell-index = <1>;
- interrupt-parent = <&ipic>;
- interrupts = <71 8>;
- };
- dma-channel@100 {
- compatible = "fsl,mpc8323-dma-channel", "fsl,elo-dma-channel";
- reg = <0x100 0x80>;
- cell-index = <2>;
- interrupt-parent = <&ipic>;
- interrupts = <71 8>;
- };
- dma-channel@180 {
- compatible = "fsl,mpc8323-dma-channel", "fsl,elo-dma-channel";
- reg = <0x180 0x28>;
- cell-index = <3>;
- interrupt-parent = <&ipic>;
- interrupts = <71 8>;
- };
- };
-
- crypto@30000 {
- compatible = "fsl,sec2.2", "fsl,sec2.1", "fsl,sec2.0";
- reg = <0x30000 0x10000>;
- interrupts = <11 0x8>;
- interrupt-parent = <&ipic>;
- fsl,num-channels = <1>;
- fsl,channel-fifo-len = <24>;
- fsl,exec-units-mask = <0x4c>;
- fsl,descriptor-types-mask = <0x0122003f>;
- sleep = <&pmc 0x03000000>;
- };
-
- ipic: pic@700 {
- interrupt-controller;
- #address-cells = <0>;
- #interrupt-cells = <2>;
- reg = <0x700 0x100>;
- device_type = "ipic";
- };
-
- par_io@1400 {
- reg = <0x1400 0x100>;
- device_type = "par_io";
- num-ports = <7>;
-
- pio3: ucc_pin@03 {
- pio-map = <
- /* port pin dir open_drain assignment has_irq */
- 3 4 3 0 2 0 /* MDIO */
- 3 5 1 0 2 0 /* MDC */
- 0 13 2 0 1 0 /* RX_CLK (CLK9) */
- 3 24 2 0 1 0 /* TX_CLK (CLK10) */
- 1 0 1 0 1 0 /* TxD0 */
- 1 1 1 0 1 0 /* TxD1 */
- 1 2 1 0 1 0 /* TxD2 */
- 1 3 1 0 1 0 /* TxD3 */
- 1 4 2 0 1 0 /* RxD0 */
- 1 5 2 0 1 0 /* RxD1 */
- 1 6 2 0 1 0 /* RxD2 */
- 1 7 2 0 1 0 /* RxD3 */
- 1 8 2 0 1 0 /* RX_ER */
- 1 9 1 0 1 0 /* TX_ER */
- 1 10 2 0 1 0 /* RX_DV */
- 1 11 2 0 1 0 /* COL */
- 1 12 1 0 1 0 /* TX_EN */
- 1 13 2 0 1 0>; /* CRS */
- };
- pio4: ucc_pin@04 {
- pio-map = <
- /* port pin dir open_drain assignment has_irq */
- 3 31 2 0 1 0 /* RX_CLK (CLK7) */
- 3 6 2 0 1 0 /* TX_CLK (CLK8) */
- 1 18 1 0 1 0 /* TxD0 */
- 1 19 1 0 1 0 /* TxD1 */
- 1 20 1 0 1 0 /* TxD2 */
- 1 21 1 0 1 0 /* TxD3 */
- 1 22 2 0 1 0 /* RxD0 */
- 1 23 2 0 1 0 /* RxD1 */
- 1 24 2 0 1 0 /* RxD2 */
- 1 25 2 0 1 0 /* RxD3 */
- 1 26 2 0 1 0 /* RX_ER */
- 1 27 1 0 1 0 /* TX_ER */
- 1 28 2 0 1 0 /* RX_DV */
- 1 29 2 0 1 0 /* COL */
- 1 30 1 0 1 0 /* TX_EN */
- 1 31 2 0 1 0>; /* CRS */
- };
- pio5: ucc_pin@05 {
- pio-map = <
- /*
- * open has
- * port pin dir drain sel irq
- */
- 2 0 1 0 2 0 /* TxD5 */
- 2 8 2 0 2 0 /* RxD5 */
-
- 2 29 2 0 0 0 /* CTS5 */
- 2 31 1 0 2 0 /* RTS5 */
-
- 2 24 2 0 0 0 /* CD */
-
- >;
- };
-
- };
- };
-
- qe@e0100000 {
- #address-cells = <1>;
- #size-cells = <1>;
- device_type = "qe";
- compatible = "fsl,qe";
- ranges = <0x0 0xe0100000 0x00100000>;
- reg = <0xe0100000 0x480>;
- brg-frequency = <0>;
- bus-frequency = <198000000>;
- fsl,qe-num-riscs = <1>;
- fsl,qe-num-snums = <28>;
-
- muram@10000 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "fsl,qe-muram", "fsl,cpm-muram";
- ranges = <0x0 0x00010000 0x00004000>;
-
- data-only@0 {
- compatible = "fsl,qe-muram-data",
- "fsl,cpm-muram-data";
- reg = <0x0 0x4000>;
- };
- };
-
- spi@4c0 {
- cell-index = <0>;
- compatible = "fsl,spi";
- reg = <0x4c0 0x40>;
- interrupts = <2>;
- interrupt-parent = <&qeic>;
- mode = "cpu";
- };
-
- spi@500 {
- cell-index = <1>;
- compatible = "fsl,spi";
- reg = <0x500 0x40>;
- interrupts = <1>;
- interrupt-parent = <&qeic>;
- mode = "cpu";
- };
-
- usb@6c0 {
- compatible = "qe_udc";
- reg = <0x6c0 0x40 0x8b00 0x100>;
- interrupts = <11>;
- interrupt-parent = <&qeic>;
- mode = "slave";
- };
-
- enet0: ucc@2200 {
- device_type = "network";
- compatible = "ucc_geth";
- cell-index = <3>;
- reg = <0x2200 0x200>;
- interrupts = <34>;
- interrupt-parent = <&qeic>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- rx-clock-name = "clk9";
- tx-clock-name = "clk10";
- phy-handle = <&phy3>;
- pio-handle = <&pio3>;
- };
-
- enet1: ucc@3200 {
- device_type = "network";
- compatible = "ucc_geth";
- cell-index = <4>;
- reg = <0x3200 0x200>;
- interrupts = <35>;
- interrupt-parent = <&qeic>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- rx-clock-name = "clk7";
- tx-clock-name = "clk8";
- phy-handle = <&phy4>;
- pio-handle = <&pio4>;
- };
-
- ucc@2400 {
- device_type = "serial";
- compatible = "ucc_uart";
- cell-index = <5>; /* The UCC number, 1-7*/
- port-number = <0>; /* Which ttyQEx device */
- soft-uart; /* We need Soft-UART */
- reg = <0x2400 0x200>;
- interrupts = <40>; /* From Table 18-12 */
- interrupt-parent = < &qeic >;
- /*
- * For Soft-UART, we need to set TX to 1X, which
- * means specifying separate clock sources.
- */
- rx-clock-name = "brg5";
- tx-clock-name = "brg6";
- pio-handle = < &pio5 >;
- };
-
-
- mdio@2320 {
- #address-cells = <1>;
- #size-cells = <0>;
- reg = <0x2320 0x18>;
- compatible = "fsl,ucc-mdio";
-
- phy3: ethernet-phy@03 {
- interrupt-parent = <&ipic>;
- interrupts = <17 0x8>;
- reg = <0x3>;
- };
- phy4: ethernet-phy@04 {
- interrupt-parent = <&ipic>;
- interrupts = <18 0x8>;
- reg = <0x4>;
- };
- };
-
- qeic: interrupt-controller@80 {
- interrupt-controller;
- compatible = "fsl,qe-ic";
- #address-cells = <0>;
- #interrupt-cells = <1>;
- reg = <0x80 0x80>;
- big-endian;
- interrupts = <32 0x8 33 0x8>; //high:32 low:33
- interrupt-parent = <&ipic>;
- };
- };
-
- pci0: pci@e0008500 {
- interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
- interrupt-map = <
- /* IDSEL 0x11 AD17 */
- 0x8800 0x0 0x0 0x1 &ipic 20 0x8
- 0x8800 0x0 0x0 0x2 &ipic 21 0x8
- 0x8800 0x0 0x0 0x3 &ipic 22 0x8
- 0x8800 0x0 0x0 0x4 &ipic 23 0x8
-
- /* IDSEL 0x12 AD18 */
- 0x9000 0x0 0x0 0x1 &ipic 22 0x8
- 0x9000 0x0 0x0 0x2 &ipic 23 0x8
- 0x9000 0x0 0x0 0x3 &ipic 20 0x8
- 0x9000 0x0 0x0 0x4 &ipic 21 0x8
-
- /* IDSEL 0x13 AD19 */
- 0x9800 0x0 0x0 0x1 &ipic 23 0x8
- 0x9800 0x0 0x0 0x2 &ipic 20 0x8
- 0x9800 0x0 0x0 0x3 &ipic 21 0x8
- 0x9800 0x0 0x0 0x4 &ipic 22 0x8
-
- /* IDSEL 0x15 AD21*/
- 0xa800 0x0 0x0 0x1 &ipic 20 0x8
- 0xa800 0x0 0x0 0x2 &ipic 21 0x8
- 0xa800 0x0 0x0 0x3 &ipic 22 0x8
- 0xa800 0x0 0x0 0x4 &ipic 23 0x8
-
- /* IDSEL 0x16 AD22*/
- 0xb000 0x0 0x0 0x1 &ipic 23 0x8
- 0xb000 0x0 0x0 0x2 &ipic 20 0x8
- 0xb000 0x0 0x0 0x3 &ipic 21 0x8
- 0xb000 0x0 0x0 0x4 &ipic 22 0x8
-
- /* IDSEL 0x17 AD23*/
- 0xb800 0x0 0x0 0x1 &ipic 22 0x8
- 0xb800 0x0 0x0 0x2 &ipic 23 0x8
- 0xb800 0x0 0x0 0x3 &ipic 20 0x8
- 0xb800 0x0 0x0 0x4 &ipic 21 0x8
-
- /* IDSEL 0x18 AD24*/
- 0xc000 0x0 0x0 0x1 &ipic 21 0x8
- 0xc000 0x0 0x0 0x2 &ipic 22 0x8
- 0xc000 0x0 0x0 0x3 &ipic 23 0x8
- 0xc000 0x0 0x0 0x4 &ipic 20 0x8>;
- interrupt-parent = <&ipic>;
- interrupts = <66 0x8>;
- bus-range = <0x0 0x0>;
- ranges = <0x02000000 0x0 0x90000000 0x90000000 0x0 0x10000000
- 0x42000000 0x0 0x80000000 0x80000000 0x0 0x10000000
- 0x01000000 0x0 0x00000000 0xd0000000 0x0 0x00100000>;
- clock-frequency = <0>;
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- reg = <0xe0008500 0x100 /* internal registers */
- 0xe0008300 0x8>; /* config space access registers */
- compatible = "fsl,mpc8349-pci";
- device_type = "pci";
- sleep = <&pmc 0x00010000>;
- };
-};
diff --git a/arch/powerpc/boot/dts/mpc832x_rdb.dts b/arch/powerpc/boot/dts/mpc832x_rdb.dts
index 91df1eb16667..ecebc27a2898 100644
--- a/arch/powerpc/boot/dts/mpc832x_rdb.dts
+++ b/arch/powerpc/boot/dts/mpc832x_rdb.dts
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* MPC832x RDB Device Tree Source
*
* Copyright 2007 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/dts-v1/;
@@ -175,7 +171,7 @@
gpio-controller;
};
- ucc2pio:ucc_pin@02 {
+ ucc2pio:ucc_pin@2 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
3 4 3 0 2 0 /* MDIO */
@@ -197,7 +193,7 @@
0 30 1 0 1 0 /* TX_EN */
0 31 2 0 1 0>; /* CRS */
};
- ucc3pio:ucc_pin@03 {
+ ucc3pio:ucc_pin@3 {
pio-map = <
/* port pin dir open_drain assignment has_irq */
0 13 2 0 1 0 /* RX_CLK (CLK9) */
@@ -253,7 +249,7 @@
reg = <0x4c0 0x40>;
interrupts = <2>;
interrupt-parent = <&qeic>;
- gpios = <&qe_pio_d 13 0>;
+ cs-gpios = <&qe_pio_d 13 0>;
mode = "cpu-qe";
mmc-slot@0 {
@@ -310,14 +306,10 @@
reg = <0x3120 0x18>;
compatible = "fsl,ucc-mdio";
- phy00:ethernet-phy@00 {
- interrupt-parent = <&ipic>;
- interrupts = <0>;
+ phy00:ethernet-phy@0 {
reg = <0x0>;
};
- phy04:ethernet-phy@04 {
- interrupt-parent = <&ipic>;
- interrupts = <0>;
+ phy04:ethernet-phy@4 {
reg = <0x4>;
};
};
diff --git a/arch/powerpc/boot/dts/mpc8349emitx.dts b/arch/powerpc/boot/dts/mpc8349emitx.dts
index cf8542401a3c..d4ebbb93de0b 100644
--- a/arch/powerpc/boot/dts/mpc8349emitx.dts
+++ b/arch/powerpc/boot/dts/mpc8349emitx.dts
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* MPC8349E-mITX Device Tree Source
*
* Copyright 2006 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/dts-v1/;
@@ -92,7 +88,7 @@
dfsrr;
eeprom: at24@50 {
- compatible = "st-micro,24c256";
+ compatible = "st,24c256", "atmel,24c256";
reg = <0x50>;
};
@@ -130,7 +126,7 @@
};
spd: at24@51 {
- compatible = "at24,spd";
+ compatible = "atmel,spd";
reg = <0x51>;
};
diff --git a/arch/powerpc/boot/dts/mpc8349emitxgp.dts b/arch/powerpc/boot/dts/mpc8349emitxgp.dts
index f00066dcc8de..bcf68a0a7b55 100644
--- a/arch/powerpc/boot/dts/mpc8349emitxgp.dts
+++ b/arch/powerpc/boot/dts/mpc8349emitxgp.dts
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* MPC8349E-mITX-GP Device Tree Source
*
* Copyright 2007 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/dts-v1/;
diff --git a/arch/powerpc/boot/dts/mpc834x_mds.dts b/arch/powerpc/boot/dts/mpc834x_mds.dts
deleted file mode 100644
index 4843c3ff7166..000000000000
--- a/arch/powerpc/boot/dts/mpc834x_mds.dts
+++ /dev/null
@@ -1,407 +0,0 @@
-/*
- * MPC8349E MDS Device Tree Source
- *
- * Copyright 2005, 2006 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-/dts-v1/;
-
-/ {
- model = "MPC8349EMDS";
- compatible = "MPC8349EMDS", "MPC834xMDS", "MPC83xxMDS";
- #address-cells = <1>;
- #size-cells = <1>;
-
- aliases {
- ethernet0 = &enet0;
- ethernet1 = &enet1;
- serial0 = &serial0;
- serial1 = &serial1;
- pci0 = &pci0;
- pci1 = &pci1;
- };
-
- cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- PowerPC,8349@0 {
- device_type = "cpu";
- reg = <0x0>;
- d-cache-line-size = <32>;
- i-cache-line-size = <32>;
- d-cache-size = <32768>;
- i-cache-size = <32768>;
- timebase-frequency = <0>; // from bootloader
- bus-frequency = <0>; // from bootloader
- clock-frequency = <0>; // from bootloader
- };
- };
-
- memory {
- device_type = "memory";
- reg = <0x00000000 0x10000000>; // 256MB at 0
- };
-
- bcsr@e2400000 {
- compatible = "fsl,mpc8349mds-bcsr";
- reg = <0xe2400000 0x8000>;
- };
-
- soc8349@e0000000 {
- #address-cells = <1>;
- #size-cells = <1>;
- device_type = "soc";
- compatible = "simple-bus";
- ranges = <0x0 0xe0000000 0x00100000>;
- reg = <0xe0000000 0x00000200>;
- bus-frequency = <0>;
-
- wdt@200 {
- device_type = "watchdog";
- compatible = "mpc83xx_wdt";
- reg = <0x200 0x100>;
- };
-
- i2c@3000 {
- #address-cells = <1>;
- #size-cells = <0>;
- cell-index = <0>;
- compatible = "fsl-i2c";
- reg = <0x3000 0x100>;
- interrupts = <14 0x8>;
- interrupt-parent = <&ipic>;
- dfsrr;
-
- rtc@68 {
- compatible = "dallas,ds1374";
- reg = <0x68>;
- };
- };
-
- i2c@3100 {
- #address-cells = <1>;
- #size-cells = <0>;
- cell-index = <1>;
- compatible = "fsl-i2c";
- reg = <0x3100 0x100>;
- interrupts = <15 0x8>;
- interrupt-parent = <&ipic>;
- dfsrr;
- };
-
- spi@7000 {
- cell-index = <0>;
- compatible = "fsl,spi";
- reg = <0x7000 0x1000>;
- interrupts = <16 0x8>;
- interrupt-parent = <&ipic>;
- mode = "cpu";
- };
-
- dma@82a8 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "fsl,mpc8349-dma", "fsl,elo-dma";
- reg = <0x82a8 4>;
- ranges = <0 0x8100 0x1a8>;
- interrupt-parent = <&ipic>;
- interrupts = <71 8>;
- cell-index = <0>;
- dma-channel@0 {
- compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel";
- reg = <0 0x80>;
- cell-index = <0>;
- interrupt-parent = <&ipic>;
- interrupts = <71 8>;
- };
- dma-channel@80 {
- compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel";
- reg = <0x80 0x80>;
- cell-index = <1>;
- interrupt-parent = <&ipic>;
- interrupts = <71 8>;
- };
- dma-channel@100 {
- compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel";
- reg = <0x100 0x80>;
- cell-index = <2>;
- interrupt-parent = <&ipic>;
- interrupts = <71 8>;
- };
- dma-channel@180 {
- compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel";
- reg = <0x180 0x28>;
- cell-index = <3>;
- interrupt-parent = <&ipic>;
- interrupts = <71 8>;
- };
- };
-
- /* phy type (ULPI or SERIAL) are only types supported for MPH */
- /* port = 0 or 1 */
- usb@22000 {
- compatible = "fsl-usb2-mph";
- reg = <0x22000 0x1000>;
- #address-cells = <1>;
- #size-cells = <0>;
- interrupt-parent = <&ipic>;
- interrupts = <39 0x8>;
- phy_type = "ulpi";
- port0;
- };
- /* phy type (ULPI, UTMI, UTMI_WIDE, SERIAL) */
- usb@23000 {
- compatible = "fsl-usb2-dr";
- reg = <0x23000 0x1000>;
- #address-cells = <1>;
- #size-cells = <0>;
- interrupt-parent = <&ipic>;
- interrupts = <38 0x8>;
- dr_mode = "otg";
- phy_type = "ulpi";
- };
-
- enet0: ethernet@24000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <0>;
- device_type = "network";
- model = "TSEC";
- compatible = "gianfar";
- reg = <0x24000 0x1000>;
- ranges = <0x0 0x24000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <32 0x8 33 0x8 34 0x8>;
- interrupt-parent = <&ipic>;
- tbi-handle = <&tbi0>;
- phy-handle = <&phy0>;
- linux,network-index = <0>;
-
- mdio@520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-mdio";
- reg = <0x520 0x20>;
-
- phy0: ethernet-phy@0 {
- interrupt-parent = <&ipic>;
- interrupts = <17 0x8>;
- reg = <0x0>;
- };
-
- phy1: ethernet-phy@1 {
- interrupt-parent = <&ipic>;
- interrupts = <18 0x8>;
- reg = <0x1>;
- };
-
- tbi0: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
- };
-
- enet1: ethernet@25000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <1>;
- device_type = "network";
- model = "TSEC";
- compatible = "gianfar";
- reg = <0x25000 0x1000>;
- ranges = <0x0 0x25000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <35 0x8 36 0x8 37 0x8>;
- interrupt-parent = <&ipic>;
- tbi-handle = <&tbi1>;
- phy-handle = <&phy1>;
- linux,network-index = <1>;
-
- mdio@520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-tbi";
- reg = <0x520 0x20>;
-
- tbi1: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
- };
-
- serial0: serial@4500 {
- cell-index = <0>;
- device_type = "serial";
- compatible = "fsl,ns16550", "ns16550";
- reg = <0x4500 0x100>;
- clock-frequency = <0>;
- interrupts = <9 0x8>;
- interrupt-parent = <&ipic>;
- };
-
- serial1: serial@4600 {
- cell-index = <1>;
- device_type = "serial";
- compatible = "fsl,ns16550", "ns16550";
- reg = <0x4600 0x100>;
- clock-frequency = <0>;
- interrupts = <10 0x8>;
- interrupt-parent = <&ipic>;
- };
-
- crypto@30000 {
- compatible = "fsl,sec2.0";
- reg = <0x30000 0x10000>;
- interrupts = <11 0x8>;
- interrupt-parent = <&ipic>;
- fsl,num-channels = <4>;
- fsl,channel-fifo-len = <24>;
- fsl,exec-units-mask = <0x7e>;
- fsl,descriptor-types-mask = <0x01010ebf>;
- };
-
- /* IPIC
- * interrupts cell = <intr #, sense>
- * sense values match linux IORESOURCE_IRQ_* defines:
- * sense == 8: Level, low assertion
- * sense == 2: Edge, high-to-low change
- */
- ipic: pic@700 {
- interrupt-controller;
- #address-cells = <0>;
- #interrupt-cells = <2>;
- reg = <0x700 0x100>;
- device_type = "ipic";
- };
- };
-
- pci0: pci@e0008500 {
- interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
- interrupt-map = <
-
- /* IDSEL 0x11 */
- 0x8800 0x0 0x0 0x1 &ipic 20 0x8
- 0x8800 0x0 0x0 0x2 &ipic 21 0x8
- 0x8800 0x0 0x0 0x3 &ipic 22 0x8
- 0x8800 0x0 0x0 0x4 &ipic 23 0x8
-
- /* IDSEL 0x12 */
- 0x9000 0x0 0x0 0x1 &ipic 22 0x8
- 0x9000 0x0 0x0 0x2 &ipic 23 0x8
- 0x9000 0x0 0x0 0x3 &ipic 20 0x8
- 0x9000 0x0 0x0 0x4 &ipic 21 0x8
-
- /* IDSEL 0x13 */
- 0x9800 0x0 0x0 0x1 &ipic 23 0x8
- 0x9800 0x0 0x0 0x2 &ipic 20 0x8
- 0x9800 0x0 0x0 0x3 &ipic 21 0x8
- 0x9800 0x0 0x0 0x4 &ipic 22 0x8
-
- /* IDSEL 0x15 */
- 0xa800 0x0 0x0 0x1 &ipic 20 0x8
- 0xa800 0x0 0x0 0x2 &ipic 21 0x8
- 0xa800 0x0 0x0 0x3 &ipic 22 0x8
- 0xa800 0x0 0x0 0x4 &ipic 23 0x8
-
- /* IDSEL 0x16 */
- 0xb000 0x0 0x0 0x1 &ipic 23 0x8
- 0xb000 0x0 0x0 0x2 &ipic 20 0x8
- 0xb000 0x0 0x0 0x3 &ipic 21 0x8
- 0xb000 0x0 0x0 0x4 &ipic 22 0x8
-
- /* IDSEL 0x17 */
- 0xb800 0x0 0x0 0x1 &ipic 22 0x8
- 0xb800 0x0 0x0 0x2 &ipic 23 0x8
- 0xb800 0x0 0x0 0x3 &ipic 20 0x8
- 0xb800 0x0 0x0 0x4 &ipic 21 0x8
-
- /* IDSEL 0x18 */
- 0xc000 0x0 0x0 0x1 &ipic 21 0x8
- 0xc000 0x0 0x0 0x2 &ipic 22 0x8
- 0xc000 0x0 0x0 0x3 &ipic 23 0x8
- 0xc000 0x0 0x0 0x4 &ipic 20 0x8>;
- interrupt-parent = <&ipic>;
- interrupts = <66 0x8>;
- bus-range = <0 0>;
- ranges = <0x02000000 0x0 0x90000000 0x90000000 0x0 0x10000000
- 0x42000000 0x0 0x80000000 0x80000000 0x0 0x10000000
- 0x01000000 0x0 0x00000000 0xe2000000 0x0 0x00100000>;
- clock-frequency = <66666666>;
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- reg = <0xe0008500 0x100 /* internal registers */
- 0xe0008300 0x8>; /* config space access registers */
- compatible = "fsl,mpc8349-pci";
- device_type = "pci";
- };
-
- pci1: pci@e0008600 {
- interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
- interrupt-map = <
-
- /* IDSEL 0x11 */
- 0x8800 0x0 0x0 0x1 &ipic 20 0x8
- 0x8800 0x0 0x0 0x2 &ipic 21 0x8
- 0x8800 0x0 0x0 0x3 &ipic 22 0x8
- 0x8800 0x0 0x0 0x4 &ipic 23 0x8
-
- /* IDSEL 0x12 */
- 0x9000 0x0 0x0 0x1 &ipic 22 0x8
- 0x9000 0x0 0x0 0x2 &ipic 23 0x8
- 0x9000 0x0 0x0 0x3 &ipic 20 0x8
- 0x9000 0x0 0x0 0x4 &ipic 21 0x8
-
- /* IDSEL 0x13 */
- 0x9800 0x0 0x0 0x1 &ipic 23 0x8
- 0x9800 0x0 0x0 0x2 &ipic 20 0x8
- 0x9800 0x0 0x0 0x3 &ipic 21 0x8
- 0x9800 0x0 0x0 0x4 &ipic 22 0x8
-
- /* IDSEL 0x15 */
- 0xa800 0x0 0x0 0x1 &ipic 20 0x8
- 0xa800 0x0 0x0 0x2 &ipic 21 0x8
- 0xa800 0x0 0x0 0x3 &ipic 22 0x8
- 0xa800 0x0 0x0 0x4 &ipic 23 0x8
-
- /* IDSEL 0x16 */
- 0xb000 0x0 0x0 0x1 &ipic 23 0x8
- 0xb000 0x0 0x0 0x2 &ipic 20 0x8
- 0xb000 0x0 0x0 0x3 &ipic 21 0x8
- 0xb000 0x0 0x0 0x4 &ipic 22 0x8
-
- /* IDSEL 0x17 */
- 0xb800 0x0 0x0 0x1 &ipic 22 0x8
- 0xb800 0x0 0x0 0x2 &ipic 23 0x8
- 0xb800 0x0 0x0 0x3 &ipic 20 0x8
- 0xb800 0x0 0x0 0x4 &ipic 21 0x8
-
- /* IDSEL 0x18 */
- 0xc000 0x0 0x0 0x1 &ipic 21 0x8
- 0xc000 0x0 0x0 0x2 &ipic 22 0x8
- 0xc000 0x0 0x0 0x3 &ipic 23 0x8
- 0xc000 0x0 0x0 0x4 &ipic 20 0x8>;
- interrupt-parent = <&ipic>;
- interrupts = <67 0x8>;
- bus-range = <0 0>;
- ranges = <0x02000000 0x0 0xb0000000 0xb0000000 0x0 0x10000000
- 0x42000000 0x0 0xa0000000 0xa0000000 0x0 0x10000000
- 0x01000000 0x0 0x00000000 0xe2100000 0x0 0x00100000>;
- clock-frequency = <66666666>;
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- reg = <0xe0008600 0x100 /* internal registers */
- 0xe0008380 0x8>; /* config space access registers */
- compatible = "fsl,mpc8349-pci";
- device_type = "pci";
- };
-};
diff --git a/arch/powerpc/boot/dts/mpc836x_mds.dts b/arch/powerpc/boot/dts/mpc836x_mds.dts
deleted file mode 100644
index ecb6ccd3a6aa..000000000000
--- a/arch/powerpc/boot/dts/mpc836x_mds.dts
+++ /dev/null
@@ -1,485 +0,0 @@
-/*
- * MPC8360E EMDS Device Tree Source
- *
- * Copyright 2006 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-
-/*
-/memreserve/ 00000000 1000000;
-*/
-
-/dts-v1/;
-
-/ {
- model = "MPC8360MDS";
- compatible = "MPC8360EMDS", "MPC836xMDS", "MPC83xxMDS";
- #address-cells = <1>;
- #size-cells = <1>;
-
- aliases {
- ethernet0 = &enet0;
- ethernet1 = &enet1;
- serial0 = &serial0;
- serial1 = &serial1;
- pci0 = &pci0;
- };
-
- cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- PowerPC,8360@0 {
- device_type = "cpu";
- reg = <0x0>;
- d-cache-line-size = <32>; // 32 bytes
- i-cache-line-size = <32>; // 32 bytes
- d-cache-size = <32768>; // L1, 32K
- i-cache-size = <32768>; // L1, 32K
- timebase-frequency = <66000000>;
- bus-frequency = <264000000>;
- clock-frequency = <528000000>;
- };
- };
-
- memory {
- device_type = "memory";
- reg = <0x00000000 0x10000000>;
- };
-
- localbus@e0005000 {
- #address-cells = <2>;
- #size-cells = <1>;
- compatible = "fsl,mpc8360-localbus", "fsl,pq2pro-localbus",
- "simple-bus";
- reg = <0xe0005000 0xd8>;
- ranges = <0 0 0xfe000000 0x02000000
- 1 0 0xf8000000 0x00008000>;
-
- flash@0,0 {
- compatible = "cfi-flash";
- reg = <0 0 0x2000000>;
- bank-width = <2>;
- device-width = <1>;
- };
-
- bcsr@1,0 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "fsl,mpc8360mds-bcsr";
- reg = <1 0 0x8000>;
- ranges = <0 1 0 0x8000>;
-
- bcsr13: gpio-controller@d {
- #gpio-cells = <2>;
- compatible = "fsl,mpc8360mds-bcsr-gpio";
- reg = <0xd 1>;
- gpio-controller;
- };
- };
- };
-
- soc8360@e0000000 {
- #address-cells = <1>;
- #size-cells = <1>;
- device_type = "soc";
- compatible = "simple-bus";
- ranges = <0x0 0xe0000000 0x00100000>;
- reg = <0xe0000000 0x00000200>;
- bus-frequency = <264000000>;
-
- wdt@200 {
- device_type = "watchdog";
- compatible = "mpc83xx_wdt";
- reg = <0x200 0x100>;
- };
-
- pmc: power@b00 {
- compatible = "fsl,mpc8360-pmc", "fsl,mpc8349-pmc";
- reg = <0xb00 0x100 0xa00 0x100>;
- interrupts = <80 0x8>;
- interrupt-parent = <&ipic>;
- };
-
- i2c@3000 {
- #address-cells = <1>;
- #size-cells = <0>;
- cell-index = <0>;
- compatible = "fsl-i2c";
- reg = <0x3000 0x100>;
- interrupts = <14 0x8>;
- interrupt-parent = <&ipic>;
- dfsrr;
-
- rtc@68 {
- compatible = "dallas,ds1374";
- reg = <0x68>;
- };
- };
-
- i2c@3100 {
- #address-cells = <1>;
- #size-cells = <0>;
- cell-index = <1>;
- compatible = "fsl-i2c";
- reg = <0x3100 0x100>;
- interrupts = <15 0x8>;
- interrupt-parent = <&ipic>;
- dfsrr;
- };
-
- serial0: serial@4500 {
- cell-index = <0>;
- device_type = "serial";
- compatible = "fsl,ns16550", "ns16550";
- reg = <0x4500 0x100>;
- clock-frequency = <264000000>;
- interrupts = <9 0x8>;
- interrupt-parent = <&ipic>;
- };
-
- serial1: serial@4600 {
- cell-index = <1>;
- device_type = "serial";
- compatible = "fsl,ns16550", "ns16550";
- reg = <0x4600 0x100>;
- clock-frequency = <264000000>;
- interrupts = <10 0x8>;
- interrupt-parent = <&ipic>;
- };
-
- dma@82a8 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "fsl,mpc8360-dma", "fsl,elo-dma";
- reg = <0x82a8 4>;
- ranges = <0 0x8100 0x1a8>;
- interrupt-parent = <&ipic>;
- interrupts = <71 8>;
- cell-index = <0>;
- dma-channel@0 {
- compatible = "fsl,mpc8360-dma-channel", "fsl,elo-dma-channel";
- reg = <0 0x80>;
- cell-index = <0>;
- interrupt-parent = <&ipic>;
- interrupts = <71 8>;
- };
- dma-channel@80 {
- compatible = "fsl,mpc8360-dma-channel", "fsl,elo-dma-channel";
- reg = <0x80 0x80>;
- cell-index = <1>;
- interrupt-parent = <&ipic>;
- interrupts = <71 8>;
- };
- dma-channel@100 {
- compatible = "fsl,mpc8360-dma-channel", "fsl,elo-dma-channel";
- reg = <0x100 0x80>;
- cell-index = <2>;
- interrupt-parent = <&ipic>;
- interrupts = <71 8>;
- };
- dma-channel@180 {
- compatible = "fsl,mpc8360-dma-channel", "fsl,elo-dma-channel";
- reg = <0x180 0x28>;
- cell-index = <3>;
- interrupt-parent = <&ipic>;
- interrupts = <71 8>;
- };
- };
-
- crypto@30000 {
- compatible = "fsl,sec2.0";
- reg = <0x30000 0x10000>;
- interrupts = <11 0x8>;
- interrupt-parent = <&ipic>;
- fsl,num-channels = <4>;
- fsl,channel-fifo-len = <24>;
- fsl,exec-units-mask = <0x7e>;
- fsl,descriptor-types-mask = <0x01010ebf>;
- sleep = <&pmc 0x03000000>;
- };
-
- ipic: pic@700 {
- interrupt-controller;
- #address-cells = <0>;
- #interrupt-cells = <2>;
- reg = <0x700 0x100>;
- device_type = "ipic";
- };
-
- par_io@1400 {
- #address-cells = <1>;
- #size-cells = <1>;
- reg = <0x1400 0x100>;
- ranges = <0 0x1400 0x100>;
- device_type = "par_io";
- num-ports = <7>;
-
- qe_pio_b: gpio-controller@18 {
- #gpio-cells = <2>;
- compatible = "fsl,mpc8360-qe-pario-bank",
- "fsl,mpc8323-qe-pario-bank";
- reg = <0x18 0x18>;
- gpio-controller;
- };
-
- pio1: ucc_pin@01 {
- pio-map = <
- /* port pin dir open_drain assignment has_irq */
- 0 3 1 0 1 0 /* TxD0 */
- 0 4 1 0 1 0 /* TxD1 */
- 0 5 1 0 1 0 /* TxD2 */
- 0 6 1 0 1 0 /* TxD3 */
- 1 6 1 0 3 0 /* TxD4 */
- 1 7 1 0 1 0 /* TxD5 */
- 1 9 1 0 2 0 /* TxD6 */
- 1 10 1 0 2 0 /* TxD7 */
- 0 9 2 0 1 0 /* RxD0 */
- 0 10 2 0 1 0 /* RxD1 */
- 0 11 2 0 1 0 /* RxD2 */
- 0 12 2 0 1 0 /* RxD3 */
- 0 13 2 0 1 0 /* RxD4 */
- 1 1 2 0 2 0 /* RxD5 */
- 1 0 2 0 2 0 /* RxD6 */
- 1 4 2 0 2 0 /* RxD7 */
- 0 7 1 0 1 0 /* TX_EN */
- 0 8 1 0 1 0 /* TX_ER */
- 0 15 2 0 1 0 /* RX_DV */
- 0 16 2 0 1 0 /* RX_ER */
- 0 0 2 0 1 0 /* RX_CLK */
- 2 9 1 0 3 0 /* GTX_CLK - CLK10 */
- 2 8 2 0 1 0>; /* GTX125 - CLK9 */
- };
- pio2: ucc_pin@02 {
- pio-map = <
- /* port pin dir open_drain assignment has_irq */
- 0 17 1 0 1 0 /* TxD0 */
- 0 18 1 0 1 0 /* TxD1 */
- 0 19 1 0 1 0 /* TxD2 */
- 0 20 1 0 1 0 /* TxD3 */
- 1 2 1 0 1 0 /* TxD4 */
- 1 3 1 0 2 0 /* TxD5 */
- 1 5 1 0 3 0 /* TxD6 */
- 1 8 1 0 3 0 /* TxD7 */
- 0 23 2 0 1 0 /* RxD0 */
- 0 24 2 0 1 0 /* RxD1 */
- 0 25 2 0 1 0 /* RxD2 */
- 0 26 2 0 1 0 /* RxD3 */
- 0 27 2 0 1 0 /* RxD4 */
- 1 12 2 0 2 0 /* RxD5 */
- 1 13 2 0 3 0 /* RxD6 */
- 1 11 2 0 2 0 /* RxD7 */
- 0 21 1 0 1 0 /* TX_EN */
- 0 22 1 0 1 0 /* TX_ER */
- 0 29 2 0 1 0 /* RX_DV */
- 0 30 2 0 1 0 /* RX_ER */
- 0 31 2 0 1 0 /* RX_CLK */
- 2 2 1 0 2 0 /* GTX_CLK - CLK10 */
- 2 3 2 0 1 0 /* GTX125 - CLK4 */
- 0 1 3 0 2 0 /* MDIO */
- 0 2 1 0 1 0>; /* MDC */
- };
-
- };
- };
-
- qe@e0100000 {
- #address-cells = <1>;
- #size-cells = <1>;
- device_type = "qe";
- compatible = "fsl,qe";
- ranges = <0x0 0xe0100000 0x00100000>;
- reg = <0xe0100000 0x480>;
- brg-frequency = <0>;
- bus-frequency = <396000000>;
- fsl,qe-num-riscs = <2>;
- fsl,qe-num-snums = <28>;
-
- muram@10000 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "fsl,qe-muram", "fsl,cpm-muram";
- ranges = <0x0 0x00010000 0x0000c000>;
-
- data-only@0 {
- compatible = "fsl,qe-muram-data",
- "fsl,cpm-muram-data";
- reg = <0x0 0xc000>;
- };
- };
-
- timer@440 {
- compatible = "fsl,mpc8360-qe-gtm",
- "fsl,qe-gtm", "fsl,gtm";
- reg = <0x440 0x40>;
- clock-frequency = <132000000>;
- interrupts = <12 13 14 15>;
- interrupt-parent = <&qeic>;
- };
-
- spi@4c0 {
- cell-index = <0>;
- compatible = "fsl,spi";
- reg = <0x4c0 0x40>;
- interrupts = <2>;
- interrupt-parent = <&qeic>;
- mode = "cpu";
- };
-
- spi@500 {
- cell-index = <1>;
- compatible = "fsl,spi";
- reg = <0x500 0x40>;
- interrupts = <1>;
- interrupt-parent = <&qeic>;
- mode = "cpu";
- };
-
- usb@6c0 {
- compatible = "fsl,mpc8360-qe-usb",
- "fsl,mpc8323-qe-usb";
- reg = <0x6c0 0x40 0x8b00 0x100>;
- interrupts = <11>;
- interrupt-parent = <&qeic>;
- fsl,fullspeed-clock = "clk21";
- fsl,lowspeed-clock = "brg9";
- gpios = <&qe_pio_b 2 0 /* USBOE */
- &qe_pio_b 3 0 /* USBTP */
- &qe_pio_b 8 0 /* USBTN */
- &qe_pio_b 9 0 /* USBRP */
- &qe_pio_b 11 0 /* USBRN */
- &bcsr13 5 0 /* SPEED */
- &bcsr13 4 1>; /* POWER */
- };
-
- enet0: ucc@2000 {
- device_type = "network";
- compatible = "ucc_geth";
- cell-index = <1>;
- reg = <0x2000 0x200>;
- interrupts = <32>;
- interrupt-parent = <&qeic>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- rx-clock-name = "none";
- tx-clock-name = "clk9";
- phy-handle = <&phy0>;
- phy-connection-type = "rgmii-id";
- pio-handle = <&pio1>;
- };
-
- enet1: ucc@3000 {
- device_type = "network";
- compatible = "ucc_geth";
- cell-index = <2>;
- reg = <0x3000 0x200>;
- interrupts = <33>;
- interrupt-parent = <&qeic>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- rx-clock-name = "none";
- tx-clock-name = "clk4";
- phy-handle = <&phy1>;
- phy-connection-type = "rgmii-id";
- pio-handle = <&pio2>;
- };
-
- mdio@2120 {
- #address-cells = <1>;
- #size-cells = <0>;
- reg = <0x2120 0x18>;
- compatible = "fsl,ucc-mdio";
-
- phy0: ethernet-phy@00 {
- interrupt-parent = <&ipic>;
- interrupts = <17 0x8>;
- reg = <0x0>;
- };
- phy1: ethernet-phy@01 {
- interrupt-parent = <&ipic>;
- interrupts = <18 0x8>;
- reg = <0x1>;
- };
- tbi-phy@2 {
- device_type = "tbi-phy";
- reg = <0x2>;
- };
- };
-
- qeic: interrupt-controller@80 {
- interrupt-controller;
- compatible = "fsl,qe-ic";
- #address-cells = <0>;
- #interrupt-cells = <1>;
- reg = <0x80 0x80>;
- big-endian;
- interrupts = <32 0x8 33 0x8>; // high:32 low:33
- interrupt-parent = <&ipic>;
- };
- };
-
- pci0: pci@e0008500 {
- interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
- interrupt-map = <
-
- /* IDSEL 0x11 AD17 */
- 0x8800 0x0 0x0 0x1 &ipic 20 0x8
- 0x8800 0x0 0x0 0x2 &ipic 21 0x8
- 0x8800 0x0 0x0 0x3 &ipic 22 0x8
- 0x8800 0x0 0x0 0x4 &ipic 23 0x8
-
- /* IDSEL 0x12 AD18 */
- 0x9000 0x0 0x0 0x1 &ipic 22 0x8
- 0x9000 0x0 0x0 0x2 &ipic 23 0x8
- 0x9000 0x0 0x0 0x3 &ipic 20 0x8
- 0x9000 0x0 0x0 0x4 &ipic 21 0x8
-
- /* IDSEL 0x13 AD19 */
- 0x9800 0x0 0x0 0x1 &ipic 23 0x8
- 0x9800 0x0 0x0 0x2 &ipic 20 0x8
- 0x9800 0x0 0x0 0x3 &ipic 21 0x8
- 0x9800 0x0 0x0 0x4 &ipic 22 0x8
-
- /* IDSEL 0x15 AD21*/
- 0xa800 0x0 0x0 0x1 &ipic 20 0x8
- 0xa800 0x0 0x0 0x2 &ipic 21 0x8
- 0xa800 0x0 0x0 0x3 &ipic 22 0x8
- 0xa800 0x0 0x0 0x4 &ipic 23 0x8
-
- /* IDSEL 0x16 AD22*/
- 0xb000 0x0 0x0 0x1 &ipic 23 0x8
- 0xb000 0x0 0x0 0x2 &ipic 20 0x8
- 0xb000 0x0 0x0 0x3 &ipic 21 0x8
- 0xb000 0x0 0x0 0x4 &ipic 22 0x8
-
- /* IDSEL 0x17 AD23*/
- 0xb800 0x0 0x0 0x1 &ipic 22 0x8
- 0xb800 0x0 0x0 0x2 &ipic 23 0x8
- 0xb800 0x0 0x0 0x3 &ipic 20 0x8
- 0xb800 0x0 0x0 0x4 &ipic 21 0x8
-
- /* IDSEL 0x18 AD24*/
- 0xc000 0x0 0x0 0x1 &ipic 21 0x8
- 0xc000 0x0 0x0 0x2 &ipic 22 0x8
- 0xc000 0x0 0x0 0x3 &ipic 23 0x8
- 0xc000 0x0 0x0 0x4 &ipic 20 0x8>;
- interrupt-parent = <&ipic>;
- interrupts = <66 0x8>;
- bus-range = <0 0>;
- ranges = <0x02000000 0x0 0xa0000000 0xa0000000 0x0 0x10000000
- 0x42000000 0x0 0x80000000 0x80000000 0x0 0x10000000
- 0x01000000 0x0 0x00000000 0xe2000000 0x0 0x00100000>;
- clock-frequency = <66666666>;
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- reg = <0xe0008500 0x100 /* internal registers */
- 0xe0008300 0x8>; /* config space access registers */
- compatible = "fsl,mpc8349-pci";
- device_type = "pci";
- sleep = <&pmc 0x00010000>;
- };
-};
diff --git a/arch/powerpc/boot/dts/mpc836x_rdk.dts b/arch/powerpc/boot/dts/mpc836x_rdk.dts
index daeacbdcf8b4..a0cc1953484d 100644
--- a/arch/powerpc/boot/dts/mpc836x_rdk.dts
+++ b/arch/powerpc/boot/dts/mpc836x_rdk.dts
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* MPC8360E RDK Device Tree Source
*
@@ -5,11 +6,6 @@
* Copyright 2007-2008 MontaVista Software, Inc.
*
* Author: Anton Vorontsov <avorontsov@ru.mvista.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/dts-v1/;
@@ -416,7 +412,7 @@
gpios = <&qe_pio_e 18 0>;
flash {
- compatible = "stm,nand512-a";
+ compatible = "st,nand512-a";
};
};
diff --git a/arch/powerpc/boot/dts/mpc8377_mds.dts b/arch/powerpc/boot/dts/mpc8377_mds.dts
deleted file mode 100644
index c2c062e8175d..000000000000
--- a/arch/powerpc/boot/dts/mpc8377_mds.dts
+++ /dev/null
@@ -1,509 +0,0 @@
-/*
- * MPC8377E MDS Device Tree Source
- *
- * Copyright 2007 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-/dts-v1/;
-
-/ {
- model = "fsl,mpc8377emds";
- compatible = "fsl,mpc8377emds","fsl,mpc837xmds";
- #address-cells = <1>;
- #size-cells = <1>;
-
- aliases {
- ethernet0 = &enet0;
- ethernet1 = &enet1;
- serial0 = &serial0;
- serial1 = &serial1;
- pci0 = &pci0;
- pci1 = &pci1;
- pci2 = &pci2;
- };
-
- cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- PowerPC,8377@0 {
- device_type = "cpu";
- reg = <0x0>;
- d-cache-line-size = <32>;
- i-cache-line-size = <32>;
- d-cache-size = <32768>;
- i-cache-size = <32768>;
- timebase-frequency = <0>;
- bus-frequency = <0>;
- clock-frequency = <0>;
- };
- };
-
- memory {
- device_type = "memory";
- reg = <0x00000000 0x20000000>; // 512MB at 0
- };
-
- localbus@e0005000 {
- #address-cells = <2>;
- #size-cells = <1>;
- compatible = "fsl,mpc8377-elbc", "fsl,elbc", "simple-bus";
- reg = <0xe0005000 0x1000>;
- interrupts = <77 0x8>;
- interrupt-parent = <&ipic>;
-
- // booting from NOR flash
- ranges = <0 0x0 0xfe000000 0x02000000
- 1 0x0 0xf8000000 0x00008000
- 3 0x0 0xe0600000 0x00008000>;
-
- flash@0,0 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "cfi-flash";
- reg = <0 0x0 0x2000000>;
- bank-width = <2>;
- device-width = <1>;
-
- u-boot@0 {
- reg = <0x0 0x100000>;
- read-only;
- };
-
- fs@100000 {
- reg = <0x100000 0x800000>;
- };
-
- kernel@1d00000 {
- reg = <0x1d00000 0x200000>;
- };
-
- dtb@1f00000 {
- reg = <0x1f00000 0x100000>;
- };
- };
-
- bcsr@1,0 {
- reg = <1 0x0 0x8000>;
- compatible = "fsl,mpc837xmds-bcsr";
- };
-
- nand@3,0 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "fsl,mpc8377-fcm-nand",
- "fsl,elbc-fcm-nand";
- reg = <3 0x0 0x8000>;
-
- u-boot@0 {
- reg = <0x0 0x100000>;
- read-only;
- };
-
- kernel@100000 {
- reg = <0x100000 0x300000>;
- };
-
- fs@400000 {
- reg = <0x400000 0x1c00000>;
- };
- };
- };
-
- soc@e0000000 {
- #address-cells = <1>;
- #size-cells = <1>;
- device_type = "soc";
- compatible = "simple-bus";
- ranges = <0x0 0xe0000000 0x00100000>;
- reg = <0xe0000000 0x00000200>;
- bus-frequency = <0>;
-
- wdt@200 {
- compatible = "mpc83xx_wdt";
- reg = <0x200 0x100>;
- };
-
- sleep-nexus {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "simple-bus";
- sleep = <&pmc 0x0c000000>;
- ranges;
-
- i2c@3000 {
- #address-cells = <1>;
- #size-cells = <0>;
- cell-index = <0>;
- compatible = "fsl-i2c";
- reg = <0x3000 0x100>;
- interrupts = <14 0x8>;
- interrupt-parent = <&ipic>;
- dfsrr;
-
- rtc@68 {
- compatible = "dallas,ds1374";
- reg = <0x68>;
- interrupts = <19 0x8>;
- interrupt-parent = <&ipic>;
- };
- };
-
- sdhci@2e000 {
- compatible = "fsl,mpc8377-esdhc", "fsl,esdhc";
- reg = <0x2e000 0x1000>;
- interrupts = <42 0x8>;
- interrupt-parent = <&ipic>;
- sdhci,wp-inverted;
- /* Filled in by U-Boot */
- clock-frequency = <0>;
- };
- };
-
- i2c@3100 {
- #address-cells = <1>;
- #size-cells = <0>;
- cell-index = <1>;
- compatible = "fsl-i2c";
- reg = <0x3100 0x100>;
- interrupts = <15 0x8>;
- interrupt-parent = <&ipic>;
- dfsrr;
- };
-
- spi@7000 {
- cell-index = <0>;
- compatible = "fsl,spi";
- reg = <0x7000 0x1000>;
- interrupts = <16 0x8>;
- interrupt-parent = <&ipic>;
- mode = "cpu";
- };
-
- usb@23000 {
- compatible = "fsl-usb2-dr";
- reg = <0x23000 0x1000>;
- #address-cells = <1>;
- #size-cells = <0>;
- interrupt-parent = <&ipic>;
- interrupts = <38 0x8>;
- dr_mode = "host";
- phy_type = "ulpi";
- sleep = <&pmc 0x00c00000>;
- };
-
- enet0: ethernet@24000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <0>;
- device_type = "network";
- model = "eTSEC";
- compatible = "gianfar";
- reg = <0x24000 0x1000>;
- ranges = <0x0 0x24000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <32 0x8 33 0x8 34 0x8>;
- phy-connection-type = "mii";
- interrupt-parent = <&ipic>;
- tbi-handle = <&tbi0>;
- phy-handle = <&phy2>;
- sleep = <&pmc 0xc0000000>;
- fsl,magic-packet;
-
- mdio@520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-mdio";
- reg = <0x520 0x20>;
-
- phy2: ethernet-phy@2 {
- interrupt-parent = <&ipic>;
- interrupts = <17 0x8>;
- reg = <0x2>;
- };
-
- phy3: ethernet-phy@3 {
- interrupt-parent = <&ipic>;
- interrupts = <18 0x8>;
- reg = <0x3>;
- };
-
- tbi0: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
- };
-
- enet1: ethernet@25000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <1>;
- device_type = "network";
- model = "eTSEC";
- compatible = "gianfar";
- reg = <0x25000 0x1000>;
- ranges = <0x0 0x25000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <35 0x8 36 0x8 37 0x8>;
- phy-connection-type = "mii";
- interrupt-parent = <&ipic>;
- tbi-handle = <&tbi1>;
- phy-handle = <&phy3>;
- sleep = <&pmc 0x30000000>;
- fsl,magic-packet;
-
- mdio@520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-tbi";
- reg = <0x520 0x20>;
-
- tbi1: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
- };
-
- serial0: serial@4500 {
- cell-index = <0>;
- device_type = "serial";
- compatible = "fsl,ns16550", "ns16550";
- reg = <0x4500 0x100>;
- clock-frequency = <0>;
- interrupts = <9 0x8>;
- interrupt-parent = <&ipic>;
- };
-
- serial1: serial@4600 {
- cell-index = <1>;
- device_type = "serial";
- compatible = "fsl,ns16550", "ns16550";
- reg = <0x4600 0x100>;
- clock-frequency = <0>;
- interrupts = <10 0x8>;
- interrupt-parent = <&ipic>;
- };
-
- dma@82a8 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "fsl,mpc8377-dma", "fsl,elo-dma";
- reg = <0x82a8 4>;
- ranges = <0 0x8100 0x1a8>;
- interrupt-parent = <&ipic>;
- interrupts = <0x47 8>;
- cell-index = <0>;
- dma-channel@0 {
- compatible = "fsl,mpc8377-dma-channel", "fsl,elo-dma-channel";
- reg = <0 0x80>;
- cell-index = <0>;
- interrupt-parent = <&ipic>;
- interrupts = <0x47 8>;
- };
- dma-channel@80 {
- compatible = "fsl,mpc8377-dma-channel", "fsl,elo-dma-channel";
- reg = <0x80 0x80>;
- cell-index = <1>;
- interrupt-parent = <&ipic>;
- interrupts = <0x47 8>;
- };
- dma-channel@100 {
- compatible = "fsl,mpc8377-dma-channel", "fsl,elo-dma-channel";
- reg = <0x100 0x80>;
- cell-index = <2>;
- interrupt-parent = <&ipic>;
- interrupts = <0x47 8>;
- };
- dma-channel@180 {
- compatible = "fsl,mpc8377-dma-channel", "fsl,elo-dma-channel";
- reg = <0x180 0x28>;
- cell-index = <3>;
- interrupt-parent = <&ipic>;
- interrupts = <0x47 8>;
- };
- };
-
- crypto@30000 {
- compatible = "fsl,sec3.0", "fsl,sec2.4", "fsl,sec2.2",
- "fsl,sec2.1", "fsl,sec2.0";
- reg = <0x30000 0x10000>;
- interrupts = <11 0x8>;
- interrupt-parent = <&ipic>;
- fsl,num-channels = <4>;
- fsl,channel-fifo-len = <24>;
- fsl,exec-units-mask = <0x9fe>;
- fsl,descriptor-types-mask = <0x3ab0ebf>;
- sleep = <&pmc 0x03000000>;
- };
-
- sata@18000 {
- compatible = "fsl,mpc8379-sata", "fsl,pq-sata";
- reg = <0x18000 0x1000>;
- interrupts = <44 0x8>;
- interrupt-parent = <&ipic>;
- sleep = <&pmc 0x000000c0>;
- };
-
- sata@19000 {
- compatible = "fsl,mpc8379-sata", "fsl,pq-sata";
- reg = <0x19000 0x1000>;
- interrupts = <45 0x8>;
- interrupt-parent = <&ipic>;
- sleep = <&pmc 0x00000030>;
- };
-
- /* IPIC
- * interrupts cell = <intr #, sense>
- * sense values match linux IORESOURCE_IRQ_* defines:
- * sense == 8: Level, low assertion
- * sense == 2: Edge, high-to-low change
- */
- ipic: pic@700 {
- compatible = "fsl,ipic";
- interrupt-controller;
- #address-cells = <0>;
- #interrupt-cells = <2>;
- reg = <0x700 0x100>;
- };
-
- pmc: power@b00 {
- compatible = "fsl,mpc8377-pmc", "fsl,mpc8349-pmc";
- reg = <0xb00 0x100 0xa00 0x100>;
- interrupts = <80 0x8>;
- interrupt-parent = <&ipic>;
- };
- };
-
- pci0: pci@e0008500 {
- interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
- interrupt-map = <
-
- /* IDSEL 0x11 */
- 0x8800 0x0 0x0 0x1 &ipic 20 0x8
- 0x8800 0x0 0x0 0x2 &ipic 21 0x8
- 0x8800 0x0 0x0 0x3 &ipic 22 0x8
- 0x8800 0x0 0x0 0x4 &ipic 23 0x8
-
- /* IDSEL 0x12 */
- 0x9000 0x0 0x0 0x1 &ipic 22 0x8
- 0x9000 0x0 0x0 0x2 &ipic 23 0x8
- 0x9000 0x0 0x0 0x3 &ipic 20 0x8
- 0x9000 0x0 0x0 0x4 &ipic 21 0x8
-
- /* IDSEL 0x13 */
- 0x9800 0x0 0x0 0x1 &ipic 23 0x8
- 0x9800 0x0 0x0 0x2 &ipic 20 0x8
- 0x9800 0x0 0x0 0x3 &ipic 21 0x8
- 0x9800 0x0 0x0 0x4 &ipic 22 0x8
-
- /* IDSEL 0x15 */
- 0xa800 0x0 0x0 0x1 &ipic 20 0x8
- 0xa800 0x0 0x0 0x2 &ipic 21 0x8
- 0xa800 0x0 0x0 0x3 &ipic 22 0x8
- 0xa800 0x0 0x0 0x4 &ipic 23 0x8
-
- /* IDSEL 0x16 */
- 0xb000 0x0 0x0 0x1 &ipic 23 0x8
- 0xb000 0x0 0x0 0x2 &ipic 20 0x8
- 0xb000 0x0 0x0 0x3 &ipic 21 0x8
- 0xb000 0x0 0x0 0x4 &ipic 22 0x8
-
- /* IDSEL 0x17 */
- 0xb800 0x0 0x0 0x1 &ipic 22 0x8
- 0xb800 0x0 0x0 0x2 &ipic 23 0x8
- 0xb800 0x0 0x0 0x3 &ipic 20 0x8
- 0xb800 0x0 0x0 0x4 &ipic 21 0x8
-
- /* IDSEL 0x18 */
- 0xc000 0x0 0x0 0x1 &ipic 21 0x8
- 0xc000 0x0 0x0 0x2 &ipic 22 0x8
- 0xc000 0x0 0x0 0x3 &ipic 23 0x8
- 0xc000 0x0 0x0 0x4 &ipic 20 0x8>;
- interrupt-parent = <&ipic>;
- interrupts = <66 0x8>;
- bus-range = <0x0 0x0>;
- ranges = <0x02000000 0x0 0x90000000 0x90000000 0x0 0x10000000
- 0x42000000 0x0 0x80000000 0x80000000 0x0 0x10000000
- 0x01000000 0x0 0x00000000 0xe0300000 0x0 0x00100000>;
- sleep = <&pmc 0x00010000>;
- clock-frequency = <0>;
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- reg = <0xe0008500 0x100 /* internal registers */
- 0xe0008300 0x8>; /* config space access registers */
- compatible = "fsl,mpc8349-pci";
- device_type = "pci";
- };
-
- pci1: pcie@e0009000 {
- #address-cells = <3>;
- #size-cells = <2>;
- #interrupt-cells = <1>;
- device_type = "pci";
- compatible = "fsl,mpc8377-pcie", "fsl,mpc8314-pcie";
- reg = <0xe0009000 0x00001000>;
- ranges = <0x02000000 0 0xa8000000 0xa8000000 0 0x10000000
- 0x01000000 0 0x00000000 0xb8000000 0 0x00800000>;
- bus-range = <0 255>;
- interrupt-map-mask = <0xf800 0 0 7>;
- interrupt-map = <0 0 0 1 &ipic 1 8
- 0 0 0 2 &ipic 1 8
- 0 0 0 3 &ipic 1 8
- 0 0 0 4 &ipic 1 8>;
- sleep = <&pmc 0x00300000>;
- clock-frequency = <0>;
-
- pcie@0 {
- #address-cells = <3>;
- #size-cells = <2>;
- device_type = "pci";
- reg = <0 0 0 0 0>;
- ranges = <0x02000000 0 0xa8000000
- 0x02000000 0 0xa8000000
- 0 0x10000000
- 0x01000000 0 0x00000000
- 0x01000000 0 0x00000000
- 0 0x00800000>;
- };
- };
-
- pci2: pcie@e000a000 {
- #address-cells = <3>;
- #size-cells = <2>;
- #interrupt-cells = <1>;
- device_type = "pci";
- compatible = "fsl,mpc8377-pcie", "fsl,mpc8314-pcie";
- reg = <0xe000a000 0x00001000>;
- ranges = <0x02000000 0 0xc8000000 0xc8000000 0 0x10000000
- 0x01000000 0 0x00000000 0xd8000000 0 0x00800000>;
- bus-range = <0 255>;
- interrupt-map-mask = <0xf800 0 0 7>;
- interrupt-map = <0 0 0 1 &ipic 2 8
- 0 0 0 2 &ipic 2 8
- 0 0 0 3 &ipic 2 8
- 0 0 0 4 &ipic 2 8>;
- sleep = <&pmc 0x000c0000>;
- clock-frequency = <0>;
-
- pcie@0 {
- #address-cells = <3>;
- #size-cells = <2>;
- device_type = "pci";
- reg = <0 0 0 0 0>;
- ranges = <0x02000000 0 0xc8000000
- 0x02000000 0 0xc8000000
- 0 0x10000000
- 0x01000000 0 0x00000000
- 0x01000000 0 0x00000000
- 0 0x00800000>;
- };
- };
-};
diff --git a/arch/powerpc/boot/dts/mpc8377_rdb.dts b/arch/powerpc/boot/dts/mpc8377_rdb.dts
index 2b4b6532d69c..7df452efa957 100644
--- a/arch/powerpc/boot/dts/mpc8377_rdb.dts
+++ b/arch/powerpc/boot/dts/mpc8377_rdb.dts
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* MPC8377E RDB Device Tree Source
*
* Copyright 2007, 2008 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/dts-v1/;
@@ -150,7 +146,7 @@
};
at24@50 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x50>;
};
@@ -496,7 +492,7 @@
hdd {
gpios = <&mcu_pio 1 0>;
- linux,default-trigger = "ide-disk";
+ linux,default-trigger = "disk-activity";
};
};
};
diff --git a/arch/powerpc/boot/dts/mpc8377_wlan.dts b/arch/powerpc/boot/dts/mpc8377_wlan.dts
index c0c790168b96..d8e7d40aeae4 100644
--- a/arch/powerpc/boot/dts/mpc8377_wlan.dts
+++ b/arch/powerpc/boot/dts/mpc8377_wlan.dts
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* MPC8377E WLAN Device Tree Source
*
* Copyright 2007-2009 Freescale Semiconductor Inc.
* Copyright 2009 MontaVista Software, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/dts-v1/;
@@ -135,7 +131,7 @@
dfsrr;
at24@50 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x50>;
};
diff --git a/arch/powerpc/boot/dts/mpc8378_mds.dts b/arch/powerpc/boot/dts/mpc8378_mds.dts
deleted file mode 100644
index 1b82b77f9415..000000000000
--- a/arch/powerpc/boot/dts/mpc8378_mds.dts
+++ /dev/null
@@ -1,493 +0,0 @@
-/*
- * MPC8378E MDS Device Tree Source
- *
- * Copyright 2007 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-/dts-v1/;
-
-/ {
- model = "fsl,mpc8378emds";
- compatible = "fsl,mpc8378emds","fsl,mpc837xmds";
- #address-cells = <1>;
- #size-cells = <1>;
-
- aliases {
- ethernet0 = &enet0;
- ethernet1 = &enet1;
- serial0 = &serial0;
- serial1 = &serial1;
- pci0 = &pci0;
- pci1 = &pci1;
- pci2 = &pci2;
- };
-
- cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- PowerPC,8378@0 {
- device_type = "cpu";
- reg = <0x0>;
- d-cache-line-size = <32>;
- i-cache-line-size = <32>;
- d-cache-size = <32768>;
- i-cache-size = <32768>;
- timebase-frequency = <0>;
- bus-frequency = <0>;
- clock-frequency = <0>;
- };
- };
-
- memory {
- device_type = "memory";
- reg = <0x00000000 0x20000000>; // 512MB at 0
- };
-
- localbus@e0005000 {
- #address-cells = <2>;
- #size-cells = <1>;
- compatible = "fsl,mpc8378-elbc", "fsl,elbc", "simple-bus";
- reg = <0xe0005000 0x1000>;
- interrupts = <77 0x8>;
- interrupt-parent = <&ipic>;
-
- // booting from NOR flash
- ranges = <0 0x0 0xfe000000 0x02000000
- 1 0x0 0xf8000000 0x00008000
- 3 0x0 0xe0600000 0x00008000>;
-
- flash@0,0 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "cfi-flash";
- reg = <0 0x0 0x2000000>;
- bank-width = <2>;
- device-width = <1>;
-
- u-boot@0 {
- reg = <0x0 0x100000>;
- read-only;
- };
-
- fs@100000 {
- reg = <0x100000 0x800000>;
- };
-
- kernel@1d00000 {
- reg = <0x1d00000 0x200000>;
- };
-
- dtb@1f00000 {
- reg = <0x1f00000 0x100000>;
- };
- };
-
- bcsr@1,0 {
- reg = <1 0x0 0x8000>;
- compatible = "fsl,mpc837xmds-bcsr";
- };
-
- nand@3,0 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "fsl,mpc8378-fcm-nand",
- "fsl,elbc-fcm-nand";
- reg = <3 0x0 0x8000>;
-
- u-boot@0 {
- reg = <0x0 0x100000>;
- read-only;
- };
-
- kernel@100000 {
- reg = <0x100000 0x300000>;
- };
-
- fs@400000 {
- reg = <0x400000 0x1c00000>;
- };
- };
- };
-
- soc@e0000000 {
- #address-cells = <1>;
- #size-cells = <1>;
- device_type = "soc";
- compatible = "simple-bus";
- ranges = <0x0 0xe0000000 0x00100000>;
- reg = <0xe0000000 0x00000200>;
- bus-frequency = <0>;
-
- wdt@200 {
- compatible = "mpc83xx_wdt";
- reg = <0x200 0x100>;
- };
-
- sleep-nexus {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "simple-bus";
- sleep = <&pmc 0x0c000000>;
- ranges;
-
- i2c@3000 {
- #address-cells = <1>;
- #size-cells = <0>;
- cell-index = <0>;
- compatible = "fsl-i2c";
- reg = <0x3000 0x100>;
- interrupts = <14 0x8>;
- interrupt-parent = <&ipic>;
- dfsrr;
-
- rtc@68 {
- compatible = "dallas,ds1374";
- reg = <0x68>;
- interrupts = <19 0x8>;
- interrupt-parent = <&ipic>;
- };
- };
-
- sdhci@2e000 {
- compatible = "fsl,mpc8378-esdhc", "fsl,esdhc";
- reg = <0x2e000 0x1000>;
- interrupts = <42 0x8>;
- interrupt-parent = <&ipic>;
- sdhci,wp-inverted;
- /* Filled in by U-Boot */
- clock-frequency = <0>;
- };
- };
-
- i2c@3100 {
- #address-cells = <1>;
- #size-cells = <0>;
- cell-index = <1>;
- compatible = "fsl-i2c";
- reg = <0x3100 0x100>;
- interrupts = <15 0x8>;
- interrupt-parent = <&ipic>;
- dfsrr;
- };
-
- spi@7000 {
- cell-index = <0>;
- compatible = "fsl,spi";
- reg = <0x7000 0x1000>;
- interrupts = <16 0x8>;
- interrupt-parent = <&ipic>;
- mode = "cpu";
- };
-
- dma@82a8 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "fsl,mpc8378-dma", "fsl,elo-dma";
- reg = <0x82a8 4>;
- ranges = <0 0x8100 0x1a8>;
- interrupt-parent = <&ipic>;
- interrupts = <71 8>;
- cell-index = <0>;
- dma-channel@0 {
- compatible = "fsl,mpc8378-dma-channel", "fsl,elo-dma-channel";
- reg = <0 0x80>;
- cell-index = <0>;
- interrupt-parent = <&ipic>;
- interrupts = <71 8>;
- };
- dma-channel@80 {
- compatible = "fsl,mpc8378-dma-channel", "fsl,elo-dma-channel";
- reg = <0x80 0x80>;
- cell-index = <1>;
- interrupt-parent = <&ipic>;
- interrupts = <71 8>;
- };
- dma-channel@100 {
- compatible = "fsl,mpc8378-dma-channel", "fsl,elo-dma-channel";
- reg = <0x100 0x80>;
- cell-index = <2>;
- interrupt-parent = <&ipic>;
- interrupts = <71 8>;
- };
- dma-channel@180 {
- compatible = "fsl,mpc8378-dma-channel", "fsl,elo-dma-channel";
- reg = <0x180 0x28>;
- cell-index = <3>;
- interrupt-parent = <&ipic>;
- interrupts = <71 8>;
- };
- };
-
- usb@23000 {
- compatible = "fsl-usb2-dr";
- reg = <0x23000 0x1000>;
- #address-cells = <1>;
- #size-cells = <0>;
- interrupt-parent = <&ipic>;
- interrupts = <38 0x8>;
- dr_mode = "host";
- phy_type = "ulpi";
- sleep = <&pmc 0x00c00000>;
- };
-
- enet0: ethernet@24000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <0>;
- device_type = "network";
- model = "eTSEC";
- compatible = "gianfar";
- reg = <0x24000 0x1000>;
- ranges = <0x0 0x24000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <32 0x8 33 0x8 34 0x8>;
- phy-connection-type = "mii";
- interrupt-parent = <&ipic>;
- tbi-handle = <&tbi0>;
- phy-handle = <&phy2>;
- sleep = <&pmc 0xc0000000>;
- fsl,magic-packet;
-
- mdio@520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-mdio";
- reg = <0x520 0x20>;
-
- phy2: ethernet-phy@2 {
- interrupt-parent = <&ipic>;
- interrupts = <17 0x8>;
- reg = <0x2>;
- };
-
- phy3: ethernet-phy@3 {
- interrupt-parent = <&ipic>;
- interrupts = <18 0x8>;
- reg = <0x3>;
- };
-
- tbi0: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
- };
-
- enet1: ethernet@25000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <1>;
- device_type = "network";
- model = "eTSEC";
- compatible = "gianfar";
- reg = <0x25000 0x1000>;
- ranges = <0x0 0x25000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <35 0x8 36 0x8 37 0x8>;
- phy-connection-type = "mii";
- interrupt-parent = <&ipic>;
- tbi-handle = <&tbi1>;
- phy-handle = <&phy3>;
- sleep = <&pmc 0x30000000>;
- fsl,magic-packet;
-
- mdio@520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-tbi";
- reg = <0x520 0x20>;
-
- tbi1: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
- };
-
- serial0: serial@4500 {
- cell-index = <0>;
- device_type = "serial";
- compatible = "fsl,ns16550", "ns16550";
- reg = <0x4500 0x100>;
- clock-frequency = <0>;
- interrupts = <9 0x8>;
- interrupt-parent = <&ipic>;
- };
-
- serial1: serial@4600 {
- cell-index = <1>;
- device_type = "serial";
- compatible = "fsl,ns16550", "ns16550";
- reg = <0x4600 0x100>;
- clock-frequency = <0>;
- interrupts = <10 0x8>;
- interrupt-parent = <&ipic>;
- };
-
- crypto@30000 {
- compatible = "fsl,sec3.0", "fsl,sec2.4", "fsl,sec2.2",
- "fsl,sec2.1", "fsl,sec2.0";
- reg = <0x30000 0x10000>;
- interrupts = <11 0x8>;
- interrupt-parent = <&ipic>;
- fsl,num-channels = <4>;
- fsl,channel-fifo-len = <24>;
- fsl,exec-units-mask = <0x9fe>;
- fsl,descriptor-types-mask = <0x3ab0ebf>;
- sleep = <&pmc 0x03000000>;
- };
-
- /* IPIC
- * interrupts cell = <intr #, sense>
- * sense values match linux IORESOURCE_IRQ_* defines:
- * sense == 8: Level, low assertion
- * sense == 2: Edge, high-to-low change
- */
- ipic: pic@700 {
- compatible = "fsl,ipic";
- interrupt-controller;
- #address-cells = <0>;
- #interrupt-cells = <2>;
- reg = <0x700 0x100>;
- };
-
- pmc: power@b00 {
- compatible = "fsl,mpc8378-pmc", "fsl,mpc8349-pmc";
- reg = <0xb00 0x100 0xa00 0x100>;
- interrupts = <80 0x8>;
- interrupt-parent = <&ipic>;
- };
- };
-
- pci0: pci@e0008500 {
- interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
- interrupt-map = <
-
- /* IDSEL 0x11 */
- 0x8800 0x0 0x0 0x1 &ipic 20 0x8
- 0x8800 0x0 0x0 0x2 &ipic 21 0x8
- 0x8800 0x0 0x0 0x3 &ipic 22 0x8
- 0x8800 0x0 0x0 0x4 &ipic 23 0x8
-
- /* IDSEL 0x12 */
- 0x9000 0x0 0x0 0x1 &ipic 22 0x8
- 0x9000 0x0 0x0 0x2 &ipic 23 0x8
- 0x9000 0x0 0x0 0x3 &ipic 20 0x8
- 0x9000 0x0 0x0 0x4 &ipic 21 0x8
-
- /* IDSEL 0x13 */
- 0x9800 0x0 0x0 0x1 &ipic 23 0x8
- 0x9800 0x0 0x0 0x2 &ipic 20 0x8
- 0x9800 0x0 0x0 0x3 &ipic 21 0x8
- 0x9800 0x0 0x0 0x4 &ipic 22 0x8
-
- /* IDSEL 0x15 */
- 0xa800 0x0 0x0 0x1 &ipic 20 0x8
- 0xa800 0x0 0x0 0x2 &ipic 21 0x8
- 0xa800 0x0 0x0 0x3 &ipic 22 0x8
- 0xa800 0x0 0x0 0x4 &ipic 23 0x8
-
- /* IDSEL 0x16 */
- 0xb000 0x0 0x0 0x1 &ipic 23 0x8
- 0xb000 0x0 0x0 0x2 &ipic 20 0x8
- 0xb000 0x0 0x0 0x3 &ipic 21 0x8
- 0xb000 0x0 0x0 0x4 &ipic 22 0x8
-
- /* IDSEL 0x17 */
- 0xb800 0x0 0x0 0x1 &ipic 22 0x8
- 0xb800 0x0 0x0 0x2 &ipic 23 0x8
- 0xb800 0x0 0x0 0x3 &ipic 20 0x8
- 0xb800 0x0 0x0 0x4 &ipic 21 0x8
-
- /* IDSEL 0x18 */
- 0xc000 0x0 0x0 0x1 &ipic 21 0x8
- 0xc000 0x0 0x0 0x2 &ipic 22 0x8
- 0xc000 0x0 0x0 0x3 &ipic 23 0x8
- 0xc000 0x0 0x0 0x4 &ipic 20 0x8>;
- interrupt-parent = <&ipic>;
- interrupts = <66 0x8>;
- bus-range = <0x0 0x0>;
- ranges = <0x02000000 0x0 0x90000000 0x90000000 0x0 0x10000000
- 0x42000000 0x0 0x80000000 0x80000000 0x0 0x10000000
- 0x01000000 0x0 0x00000000 0xe0300000 0x0 0x00100000>;
- clock-frequency = <0>;
- sleep = <&pmc 0x00010000>;
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- reg = <0xe0008500 0x100 /* internal registers */
- 0xe0008300 0x8>; /* config space access registers */
- compatible = "fsl,mpc8349-pci";
- device_type = "pci";
- };
-
- pci1: pcie@e0009000 {
- #address-cells = <3>;
- #size-cells = <2>;
- #interrupt-cells = <1>;
- device_type = "pci";
- compatible = "fsl,mpc8378-pcie", "fsl,mpc8314-pcie";
- reg = <0xe0009000 0x00001000>;
- ranges = <0x02000000 0 0xa8000000 0xa8000000 0 0x10000000
- 0x01000000 0 0x00000000 0xb8000000 0 0x00800000>;
- bus-range = <0 255>;
- interrupt-map-mask = <0xf800 0 0 7>;
- interrupt-map = <0 0 0 1 &ipic 1 8
- 0 0 0 2 &ipic 1 8
- 0 0 0 3 &ipic 1 8
- 0 0 0 4 &ipic 1 8>;
- sleep = <&pmc 0x00300000>;
- clock-frequency = <0>;
-
- pcie@0 {
- #address-cells = <3>;
- #size-cells = <2>;
- device_type = "pci";
- reg = <0 0 0 0 0>;
- ranges = <0x02000000 0 0xa8000000
- 0x02000000 0 0xa8000000
- 0 0x10000000
- 0x01000000 0 0x00000000
- 0x01000000 0 0x00000000
- 0 0x00800000>;
- };
- };
-
- pci2: pcie@e000a000 {
- #address-cells = <3>;
- #size-cells = <2>;
- #interrupt-cells = <1>;
- device_type = "pci";
- compatible = "fsl,mpc8378-pcie", "fsl,mpc8314-pcie";
- reg = <0xe000a000 0x00001000>;
- ranges = <0x02000000 0 0xc8000000 0xc8000000 0 0x10000000
- 0x01000000 0 0x00000000 0xd8000000 0 0x00800000>;
- bus-range = <0 255>;
- interrupt-map-mask = <0xf800 0 0 7>;
- interrupt-map = <0 0 0 1 &ipic 2 8
- 0 0 0 2 &ipic 2 8
- 0 0 0 3 &ipic 2 8
- 0 0 0 4 &ipic 2 8>;
- sleep = <&pmc 0x000c0000>;
- clock-frequency = <0>;
-
- pcie@0 {
- #address-cells = <3>;
- #size-cells = <2>;
- device_type = "pci";
- reg = <0 0 0 0 0>;
- ranges = <0x02000000 0 0xc8000000
- 0x02000000 0 0xc8000000
- 0 0x10000000
- 0x01000000 0 0x00000000
- 0x01000000 0 0x00000000
- 0 0x00800000>;
- };
- };
-};
diff --git a/arch/powerpc/boot/dts/mpc8378_rdb.dts b/arch/powerpc/boot/dts/mpc8378_rdb.dts
index 74b6a535a413..bdcfe83a561e 100644
--- a/arch/powerpc/boot/dts/mpc8378_rdb.dts
+++ b/arch/powerpc/boot/dts/mpc8378_rdb.dts
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* MPC8378E RDB Device Tree Source
*
* Copyright 2007, 2008 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/dts-v1/;
@@ -150,7 +146,7 @@
};
at24@50 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x50>;
};
@@ -480,7 +476,7 @@
hdd {
gpios = <&mcu_pio 1 0>;
- linux,default-trigger = "ide-disk";
+ linux,default-trigger = "disk-activity";
};
};
};
diff --git a/arch/powerpc/boot/dts/mpc8379_mds.dts b/arch/powerpc/boot/dts/mpc8379_mds.dts
deleted file mode 100644
index 38e5048d65d2..000000000000
--- a/arch/powerpc/boot/dts/mpc8379_mds.dts
+++ /dev/null
@@ -1,459 +0,0 @@
-/*
- * MPC8379E MDS Device Tree Source
- *
- * Copyright 2007 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-/dts-v1/;
-
-/ {
- model = "fsl,mpc8379emds";
- compatible = "fsl,mpc8379emds","fsl,mpc837xmds";
- #address-cells = <1>;
- #size-cells = <1>;
-
- aliases {
- ethernet0 = &enet0;
- ethernet1 = &enet1;
- serial0 = &serial0;
- serial1 = &serial1;
- pci0 = &pci0;
- };
-
- cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- PowerPC,8379@0 {
- device_type = "cpu";
- reg = <0x0>;
- d-cache-line-size = <32>;
- i-cache-line-size = <32>;
- d-cache-size = <32768>;
- i-cache-size = <32768>;
- timebase-frequency = <0>;
- bus-frequency = <0>;
- clock-frequency = <0>;
- };
- };
-
- memory {
- device_type = "memory";
- reg = <0x00000000 0x20000000>; // 512MB at 0
- };
-
- localbus@e0005000 {
- #address-cells = <2>;
- #size-cells = <1>;
- compatible = "fsl,mpc8379-elbc", "fsl,elbc", "simple-bus";
- reg = <0xe0005000 0x1000>;
- interrupts = <77 0x8>;
- interrupt-parent = <&ipic>;
-
- // booting from NOR flash
- ranges = <0 0x0 0xfe000000 0x02000000
- 1 0x0 0xf8000000 0x00008000
- 3 0x0 0xe0600000 0x00008000>;
-
- flash@0,0 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "cfi-flash";
- reg = <0 0x0 0x2000000>;
- bank-width = <2>;
- device-width = <1>;
-
- u-boot@0 {
- reg = <0x0 0x100000>;
- read-only;
- };
-
- fs@100000 {
- reg = <0x100000 0x800000>;
- };
-
- kernel@1d00000 {
- reg = <0x1d00000 0x200000>;
- };
-
- dtb@1f00000 {
- reg = <0x1f00000 0x100000>;
- };
- };
-
- bcsr@1,0 {
- reg = <1 0x0 0x8000>;
- compatible = "fsl,mpc837xmds-bcsr";
- };
-
- nand@3,0 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "fsl,mpc8379-fcm-nand",
- "fsl,elbc-fcm-nand";
- reg = <3 0x0 0x8000>;
-
- u-boot@0 {
- reg = <0x0 0x100000>;
- read-only;
- };
-
- kernel@100000 {
- reg = <0x100000 0x300000>;
- };
-
- fs@400000 {
- reg = <0x400000 0x1c00000>;
- };
- };
- };
-
- soc@e0000000 {
- #address-cells = <1>;
- #size-cells = <1>;
- device_type = "soc";
- compatible = "simple-bus";
- ranges = <0x0 0xe0000000 0x00100000>;
- reg = <0xe0000000 0x00000200>;
- bus-frequency = <0>;
-
- wdt@200 {
- compatible = "mpc83xx_wdt";
- reg = <0x200 0x100>;
- };
-
- sleep-nexus {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "simple-bus";
- sleep = <&pmc 0x0c000000>;
- ranges;
-
- i2c@3000 {
- #address-cells = <1>;
- #size-cells = <0>;
- cell-index = <0>;
- compatible = "fsl-i2c";
- reg = <0x3000 0x100>;
- interrupts = <14 0x8>;
- interrupt-parent = <&ipic>;
- dfsrr;
-
- rtc@68 {
- compatible = "dallas,ds1374";
- reg = <0x68>;
- interrupts = <19 0x8>;
- interrupt-parent = <&ipic>;
- };
- };
-
- sdhci@2e000 {
- compatible = "fsl,mpc8379-esdhc", "fsl,esdhc";
- reg = <0x2e000 0x1000>;
- interrupts = <42 0x8>;
- interrupt-parent = <&ipic>;
- sdhci,wp-inverted;
- /* Filled in by U-Boot */
- clock-frequency = <0>;
- };
- };
-
- i2c@3100 {
- #address-cells = <1>;
- #size-cells = <0>;
- cell-index = <1>;
- compatible = "fsl-i2c";
- reg = <0x3100 0x100>;
- interrupts = <15 0x8>;
- interrupt-parent = <&ipic>;
- dfsrr;
- };
-
- spi@7000 {
- cell-index = <0>;
- compatible = "fsl,spi";
- reg = <0x7000 0x1000>;
- interrupts = <16 0x8>;
- interrupt-parent = <&ipic>;
- mode = "cpu";
- };
-
- dma@82a8 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "fsl,mpc8379-dma", "fsl,elo-dma";
- reg = <0x82a8 4>;
- ranges = <0 0x8100 0x1a8>;
- interrupt-parent = <&ipic>;
- interrupts = <71 8>;
- cell-index = <0>;
- dma-channel@0 {
- compatible = "fsl,mpc8379-dma-channel", "fsl,elo-dma-channel";
- reg = <0 0x80>;
- cell-index = <0>;
- interrupt-parent = <&ipic>;
- interrupts = <71 8>;
- };
- dma-channel@80 {
- compatible = "fsl,mpc8379-dma-channel", "fsl,elo-dma-channel";
- reg = <0x80 0x80>;
- cell-index = <1>;
- interrupt-parent = <&ipic>;
- interrupts = <71 8>;
- };
- dma-channel@100 {
- compatible = "fsl,mpc8379-dma-channel", "fsl,elo-dma-channel";
- reg = <0x100 0x80>;
- cell-index = <2>;
- interrupt-parent = <&ipic>;
- interrupts = <71 8>;
- };
- dma-channel@180 {
- compatible = "fsl,mpc8379-dma-channel", "fsl,elo-dma-channel";
- reg = <0x180 0x28>;
- cell-index = <3>;
- interrupt-parent = <&ipic>;
- interrupts = <71 8>;
- };
- };
-
- usb@23000 {
- compatible = "fsl-usb2-dr";
- reg = <0x23000 0x1000>;
- #address-cells = <1>;
- #size-cells = <0>;
- interrupt-parent = <&ipic>;
- interrupts = <38 0x8>;
- dr_mode = "host";
- phy_type = "ulpi";
- sleep = <&pmc 0x00c00000>;
- };
-
- enet0: ethernet@24000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <0>;
- device_type = "network";
- model = "eTSEC";
- compatible = "gianfar";
- reg = <0x24000 0x1000>;
- ranges = <0x0 0x24000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <32 0x8 33 0x8 34 0x8>;
- phy-connection-type = "mii";
- interrupt-parent = <&ipic>;
- tbi-handle = <&tbi0>;
- phy-handle = <&phy2>;
- sleep = <&pmc 0xc0000000>;
- fsl,magic-packet;
-
- mdio@520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-mdio";
- reg = <0x520 0x20>;
-
- phy2: ethernet-phy@2 {
- interrupt-parent = <&ipic>;
- interrupts = <17 0x8>;
- reg = <0x2>;
- };
-
- phy3: ethernet-phy@3 {
- interrupt-parent = <&ipic>;
- interrupts = <18 0x8>;
- reg = <0x3>;
- };
-
- tbi0: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
- };
-
- enet1: ethernet@25000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <1>;
- device_type = "network";
- model = "eTSEC";
- compatible = "gianfar";
- reg = <0x25000 0x1000>;
- ranges = <0x0 0x25000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <35 0x8 36 0x8 37 0x8>;
- phy-connection-type = "mii";
- interrupt-parent = <&ipic>;
- tbi-handle = <&tbi1>;
- phy-handle = <&phy3>;
- sleep = <&pmc 0x30000000>;
- fsl,magic-packet;
-
- mdio@520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-tbi";
- reg = <0x520 0x20>;
-
- tbi1: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
- };
-
- serial0: serial@4500 {
- cell-index = <0>;
- device_type = "serial";
- compatible = "fsl,ns16550", "ns16550";
- reg = <0x4500 0x100>;
- clock-frequency = <0>;
- interrupts = <9 0x8>;
- interrupt-parent = <&ipic>;
- };
-
- serial1: serial@4600 {
- cell-index = <1>;
- device_type = "serial";
- compatible = "fsl,ns16550", "ns16550";
- reg = <0x4600 0x100>;
- clock-frequency = <0>;
- interrupts = <10 0x8>;
- interrupt-parent = <&ipic>;
- };
-
- crypto@30000 {
- compatible = "fsl,sec3.0", "fsl,sec2.4", "fsl,sec2.2",
- "fsl,sec2.1", "fsl,sec2.0";
- reg = <0x30000 0x10000>;
- interrupts = <11 0x8>;
- interrupt-parent = <&ipic>;
- fsl,num-channels = <4>;
- fsl,channel-fifo-len = <24>;
- fsl,exec-units-mask = <0x9fe>;
- fsl,descriptor-types-mask = <0x3ab0ebf>;
- sleep = <&pmc 0x03000000>;
- };
-
- sata@18000 {
- compatible = "fsl,mpc8379-sata", "fsl,pq-sata";
- reg = <0x18000 0x1000>;
- interrupts = <44 0x8>;
- interrupt-parent = <&ipic>;
- sleep = <&pmc 0x000000c0>;
- };
-
- sata@19000 {
- compatible = "fsl,mpc8379-sata", "fsl,pq-sata";
- reg = <0x19000 0x1000>;
- interrupts = <45 0x8>;
- interrupt-parent = <&ipic>;
- sleep = <&pmc 0x00000030>;
- };
-
- sata@1a000 {
- compatible = "fsl,mpc8379-sata", "fsl,pq-sata";
- reg = <0x1a000 0x1000>;
- interrupts = <46 0x8>;
- interrupt-parent = <&ipic>;
- sleep = <&pmc 0x0000000c>;
- };
-
- sata@1b000 {
- compatible = "fsl,mpc8379-sata", "fsl,pq-sata";
- reg = <0x1b000 0x1000>;
- interrupts = <47 0x8>;
- interrupt-parent = <&ipic>;
- sleep = <&pmc 0x00000003>;
- };
-
- /* IPIC
- * interrupts cell = <intr #, sense>
- * sense values match linux IORESOURCE_IRQ_* defines:
- * sense == 8: Level, low assertion
- * sense == 2: Edge, high-to-low change
- */
- ipic: pic@700 {
- compatible = "fsl,ipic";
- interrupt-controller;
- #address-cells = <0>;
- #interrupt-cells = <2>;
- reg = <0x700 0x100>;
- };
-
- pmc: power@b00 {
- compatible = "fsl,mpc8379-pmc", "fsl,mpc8349-pmc";
- reg = <0xb00 0x100 0xa00 0x100>;
- interrupts = <80 0x8>;
- interrupt-parent = <&ipic>;
- };
- };
-
- pci0: pci@e0008500 {
- interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
- interrupt-map = <
-
- /* IDSEL 0x11 */
- 0x8800 0x0 0x0 0x1 &ipic 20 0x8
- 0x8800 0x0 0x0 0x2 &ipic 21 0x8
- 0x8800 0x0 0x0 0x3 &ipic 22 0x8
- 0x8800 0x0 0x0 0x4 &ipic 23 0x8
-
- /* IDSEL 0x12 */
- 0x9000 0x0 0x0 0x1 &ipic 22 0x8
- 0x9000 0x0 0x0 0x2 &ipic 23 0x8
- 0x9000 0x0 0x0 0x3 &ipic 20 0x8
- 0x9000 0x0 0x0 0x4 &ipic 21 0x8
-
- /* IDSEL 0x13 */
- 0x9800 0x0 0x0 0x1 &ipic 23 0x8
- 0x9800 0x0 0x0 0x2 &ipic 20 0x8
- 0x9800 0x0 0x0 0x3 &ipic 21 0x8
- 0x9800 0x0 0x0 0x4 &ipic 22 0x8
-
- /* IDSEL 0x15 */
- 0xa800 0x0 0x0 0x1 &ipic 20 0x8
- 0xa800 0x0 0x0 0x2 &ipic 21 0x8
- 0xa800 0x0 0x0 0x3 &ipic 22 0x8
- 0xa800 0x0 0x0 0x4 &ipic 23 0x8
-
- /* IDSEL 0x16 */
- 0xb000 0x0 0x0 0x1 &ipic 23 0x8
- 0xb000 0x0 0x0 0x2 &ipic 20 0x8
- 0xb000 0x0 0x0 0x3 &ipic 21 0x8
- 0xb000 0x0 0x0 0x4 &ipic 22 0x8
-
- /* IDSEL 0x17 */
- 0xb800 0x0 0x0 0x1 &ipic 22 0x8
- 0xb800 0x0 0x0 0x2 &ipic 23 0x8
- 0xb800 0x0 0x0 0x3 &ipic 20 0x8
- 0xb800 0x0 0x0 0x4 &ipic 21 0x8
-
- /* IDSEL 0x18 */
- 0xc000 0x0 0x0 0x1 &ipic 21 0x8
- 0xc000 0x0 0x0 0x2 &ipic 22 0x8
- 0xc000 0x0 0x0 0x3 &ipic 23 0x8
- 0xc000 0x0 0x0 0x4 &ipic 20 0x8>;
- interrupt-parent = <&ipic>;
- interrupts = <66 0x8>;
- bus-range = <0x0 0x0>;
- ranges = <0x02000000 0x0 0x90000000 0x90000000 0x0 0x10000000
- 0x42000000 0x0 0x80000000 0x80000000 0x0 0x10000000
- 0x01000000 0x0 0x00000000 0xe0300000 0x0 0x00100000>;
- sleep = <&pmc 0x00010000>;
- clock-frequency = <0>;
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- reg = <0xe0008500 0x100 /* internal registers */
- 0xe0008300 0x8>; /* config space access registers */
- compatible = "fsl,mpc8349-pci";
- device_type = "pci";
- };
-};
diff --git a/arch/powerpc/boot/dts/mpc8379_rdb.dts b/arch/powerpc/boot/dts/mpc8379_rdb.dts
index 3b5cbac85368..a5f702304a35 100644
--- a/arch/powerpc/boot/dts/mpc8379_rdb.dts
+++ b/arch/powerpc/boot/dts/mpc8379_rdb.dts
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* MPC8379E RDB Device Tree Source
*
* Copyright 2007, 2008 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/dts-v1/;
@@ -148,7 +144,7 @@
};
at24@50 {
- compatible = "at24,24c256";
+ compatible = "atmel,24c256";
reg = <0x50>;
};
@@ -446,7 +442,7 @@
hdd {
gpios = <&mcu_pio 1 0>;
- linux,default-trigger = "ide-disk";
+ linux,default-trigger = "disk-activity";
};
};
};
diff --git a/arch/powerpc/boot/dts/mpc8540ads.dts b/arch/powerpc/boot/dts/mpc8540ads.dts
deleted file mode 100644
index 7ce274c9a2d5..000000000000
--- a/arch/powerpc/boot/dts/mpc8540ads.dts
+++ /dev/null
@@ -1,359 +0,0 @@
-/*
- * MPC8540 ADS Device Tree Source
- *
- * Copyright 2006, 2008 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-/dts-v1/;
-
-/include/ "fsl/e500v2_power_isa.dtsi"
-
-/ {
- model = "MPC8540ADS";
- compatible = "MPC8540ADS", "MPC85xxADS";
- #address-cells = <1>;
- #size-cells = <1>;
-
- aliases {
- ethernet0 = &enet0;
- ethernet1 = &enet1;
- ethernet2 = &enet2;
- serial0 = &serial0;
- serial1 = &serial1;
- pci0 = &pci0;
- };
-
- cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- PowerPC,8540@0 {
- device_type = "cpu";
- reg = <0x0>;
- d-cache-line-size = <32>; // 32 bytes
- i-cache-line-size = <32>; // 32 bytes
- d-cache-size = <0x8000>; // L1, 32K
- i-cache-size = <0x8000>; // L1, 32K
- timebase-frequency = <0>; // 33 MHz, from uboot
- bus-frequency = <0>; // 166 MHz
- clock-frequency = <0>; // 825 MHz, from uboot
- next-level-cache = <&L2>;
- };
- };
-
- memory {
- device_type = "memory";
- reg = <0x0 0x8000000>; // 128M at 0x0
- };
-
- soc8540@e0000000 {
- #address-cells = <1>;
- #size-cells = <1>;
- device_type = "soc";
- compatible = "simple-bus";
- ranges = <0x0 0xe0000000 0x100000>;
- bus-frequency = <0>;
-
- ecm-law@0 {
- compatible = "fsl,ecm-law";
- reg = <0x0 0x1000>;
- fsl,num-laws = <8>;
- };
-
- ecm@1000 {
- compatible = "fsl,mpc8540-ecm", "fsl,ecm";
- reg = <0x1000 0x1000>;
- interrupts = <17 2>;
- interrupt-parent = <&mpic>;
- };
-
- memory-controller@2000 {
- compatible = "fsl,mpc8540-memory-controller";
- reg = <0x2000 0x1000>;
- interrupt-parent = <&mpic>;
- interrupts = <18 2>;
- };
-
- L2: l2-cache-controller@20000 {
- compatible = "fsl,mpc8540-l2-cache-controller";
- reg = <0x20000 0x1000>;
- cache-line-size = <32>; // 32 bytes
- cache-size = <0x40000>; // L2, 256K
- interrupt-parent = <&mpic>;
- interrupts = <16 2>;
- };
-
- i2c@3000 {
- #address-cells = <1>;
- #size-cells = <0>;
- cell-index = <0>;
- compatible = "fsl-i2c";
- reg = <0x3000 0x100>;
- interrupts = <43 2>;
- interrupt-parent = <&mpic>;
- dfsrr;
- };
-
- dma@21300 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "fsl,mpc8540-dma", "fsl,eloplus-dma";
- reg = <0x21300 0x4>;
- ranges = <0x0 0x21100 0x200>;
- cell-index = <0>;
- dma-channel@0 {
- compatible = "fsl,mpc8540-dma-channel",
- "fsl,eloplus-dma-channel";
- reg = <0x0 0x80>;
- cell-index = <0>;
- interrupt-parent = <&mpic>;
- interrupts = <20 2>;
- };
- dma-channel@80 {
- compatible = "fsl,mpc8540-dma-channel",
- "fsl,eloplus-dma-channel";
- reg = <0x80 0x80>;
- cell-index = <1>;
- interrupt-parent = <&mpic>;
- interrupts = <21 2>;
- };
- dma-channel@100 {
- compatible = "fsl,mpc8540-dma-channel",
- "fsl,eloplus-dma-channel";
- reg = <0x100 0x80>;
- cell-index = <2>;
- interrupt-parent = <&mpic>;
- interrupts = <22 2>;
- };
- dma-channel@180 {
- compatible = "fsl,mpc8540-dma-channel",
- "fsl,eloplus-dma-channel";
- reg = <0x180 0x80>;
- cell-index = <3>;
- interrupt-parent = <&mpic>;
- interrupts = <23 2>;
- };
- };
-
- enet0: ethernet@24000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <0>;
- device_type = "network";
- model = "TSEC";
- compatible = "gianfar";
- reg = <0x24000 0x1000>;
- ranges = <0x0 0x24000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <29 2 30 2 34 2>;
- interrupt-parent = <&mpic>;
- tbi-handle = <&tbi0>;
- phy-handle = <&phy0>;
-
- mdio@520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-mdio";
- reg = <0x520 0x20>;
-
- phy0: ethernet-phy@0 {
- interrupt-parent = <&mpic>;
- interrupts = <5 1>;
- reg = <0x0>;
- };
- phy1: ethernet-phy@1 {
- interrupt-parent = <&mpic>;
- interrupts = <5 1>;
- reg = <0x1>;
- };
- phy3: ethernet-phy@3 {
- interrupt-parent = <&mpic>;
- interrupts = <7 1>;
- reg = <0x3>;
- };
- tbi0: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
- };
-
- enet1: ethernet@25000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <1>;
- device_type = "network";
- model = "TSEC";
- compatible = "gianfar";
- reg = <0x25000 0x1000>;
- ranges = <0x0 0x25000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <35 2 36 2 40 2>;
- interrupt-parent = <&mpic>;
- tbi-handle = <&tbi1>;
- phy-handle = <&phy1>;
-
- mdio@520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-tbi";
- reg = <0x520 0x20>;
-
- tbi1: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
- };
-
- enet2: ethernet@26000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <2>;
- device_type = "network";
- model = "FEC";
- compatible = "gianfar";
- reg = <0x26000 0x1000>;
- ranges = <0x0 0x26000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <41 2>;
- interrupt-parent = <&mpic>;
- tbi-handle = <&tbi2>;
- phy-handle = <&phy3>;
-
- mdio@520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-tbi";
- reg = <0x520 0x20>;
-
- tbi2: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
- };
-
- serial0: serial@4500 {
- cell-index = <0>;
- device_type = "serial";
- compatible = "fsl,ns16550", "ns16550";
- reg = <0x4500 0x100>; // reg base, size
- clock-frequency = <0>; // should we fill in in uboot?
- interrupts = <42 2>;
- interrupt-parent = <&mpic>;
- };
-
- serial1: serial@4600 {
- cell-index = <1>;
- device_type = "serial";
- compatible = "fsl,ns16550", "ns16550";
- reg = <0x4600 0x100>; // reg base, size
- clock-frequency = <0>; // should we fill in in uboot?
- interrupts = <42 2>;
- interrupt-parent = <&mpic>;
- };
- mpic: pic@40000 {
- interrupt-controller;
- #address-cells = <0>;
- #interrupt-cells = <2>;
- reg = <0x40000 0x40000>;
- compatible = "chrp,open-pic";
- device_type = "open-pic";
- };
- };
-
- pci0: pci@e0008000 {
- interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
- interrupt-map = <
-
- /* IDSEL 0x02 */
- 0x1000 0x0 0x0 0x1 &mpic 0x1 0x1
- 0x1000 0x0 0x0 0x2 &mpic 0x2 0x1
- 0x1000 0x0 0x0 0x3 &mpic 0x3 0x1
- 0x1000 0x0 0x0 0x4 &mpic 0x4 0x1
-
- /* IDSEL 0x03 */
- 0x1800 0x0 0x0 0x1 &mpic 0x4 0x1
- 0x1800 0x0 0x0 0x2 &mpic 0x1 0x1
- 0x1800 0x0 0x0 0x3 &mpic 0x2 0x1
- 0x1800 0x0 0x0 0x4 &mpic 0x3 0x1
-
- /* IDSEL 0x04 */
- 0x2000 0x0 0x0 0x1 &mpic 0x3 0x1
- 0x2000 0x0 0x0 0x2 &mpic 0x4 0x1
- 0x2000 0x0 0x0 0x3 &mpic 0x1 0x1
- 0x2000 0x0 0x0 0x4 &mpic 0x2 0x1
-
- /* IDSEL 0x05 */
- 0x2800 0x0 0x0 0x1 &mpic 0x2 0x1
- 0x2800 0x0 0x0 0x2 &mpic 0x3 0x1
- 0x2800 0x0 0x0 0x3 &mpic 0x4 0x1
- 0x2800 0x0 0x0 0x4 &mpic 0x1 0x1
-
- /* IDSEL 0x0c */
- 0x6000 0x0 0x0 0x1 &mpic 0x1 0x1
- 0x6000 0x0 0x0 0x2 &mpic 0x2 0x1
- 0x6000 0x0 0x0 0x3 &mpic 0x3 0x1
- 0x6000 0x0 0x0 0x4 &mpic 0x4 0x1
-
- /* IDSEL 0x0d */
- 0x6800 0x0 0x0 0x1 &mpic 0x4 0x1
- 0x6800 0x0 0x0 0x2 &mpic 0x1 0x1
- 0x6800 0x0 0x0 0x3 &mpic 0x2 0x1
- 0x6800 0x0 0x0 0x4 &mpic 0x3 0x1
-
- /* IDSEL 0x0e */
- 0x7000 0x0 0x0 0x1 &mpic 0x3 0x1
- 0x7000 0x0 0x0 0x2 &mpic 0x4 0x1
- 0x7000 0x0 0x0 0x3 &mpic 0x1 0x1
- 0x7000 0x0 0x0 0x4 &mpic 0x2 0x1
-
- /* IDSEL 0x0f */
- 0x7800 0x0 0x0 0x1 &mpic 0x2 0x1
- 0x7800 0x0 0x0 0x2 &mpic 0x3 0x1
- 0x7800 0x0 0x0 0x3 &mpic 0x4 0x1
- 0x7800 0x0 0x0 0x4 &mpic 0x1 0x1
-
- /* IDSEL 0x12 */
- 0x9000 0x0 0x0 0x1 &mpic 0x1 0x1
- 0x9000 0x0 0x0 0x2 &mpic 0x2 0x1
- 0x9000 0x0 0x0 0x3 &mpic 0x3 0x1
- 0x9000 0x0 0x0 0x4 &mpic 0x4 0x1
-
- /* IDSEL 0x13 */
- 0x9800 0x0 0x0 0x1 &mpic 0x4 0x1
- 0x9800 0x0 0x0 0x2 &mpic 0x1 0x1
- 0x9800 0x0 0x0 0x3 &mpic 0x2 0x1
- 0x9800 0x0 0x0 0x4 &mpic 0x3 0x1
-
- /* IDSEL 0x14 */
- 0xa000 0x0 0x0 0x1 &mpic 0x3 0x1
- 0xa000 0x0 0x0 0x2 &mpic 0x4 0x1
- 0xa000 0x0 0x0 0x3 &mpic 0x1 0x1
- 0xa000 0x0 0x0 0x4 &mpic 0x2 0x1
-
- /* IDSEL 0x15 */
- 0xa800 0x0 0x0 0x1 &mpic 0x2 0x1
- 0xa800 0x0 0x0 0x2 &mpic 0x3 0x1
- 0xa800 0x0 0x0 0x3 &mpic 0x4 0x1
- 0xa800 0x0 0x0 0x4 &mpic 0x1 0x1>;
- interrupt-parent = <&mpic>;
- interrupts = <24 2>;
- bus-range = <0 0>;
- ranges = <0x2000000 0x0 0x80000000 0x80000000 0x0 0x20000000
- 0x1000000 0x0 0x0 0xe2000000 0x0 0x100000>;
- clock-frequency = <66666666>;
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- reg = <0xe0008000 0x1000>;
- compatible = "fsl,mpc8540-pcix", "fsl,mpc8540-pci";
- device_type = "pci";
- };
-};
diff --git a/arch/powerpc/boot/dts/mpc8541cds.dts b/arch/powerpc/boot/dts/mpc8541cds.dts
deleted file mode 100644
index 4d35a3e0fb02..000000000000
--- a/arch/powerpc/boot/dts/mpc8541cds.dts
+++ /dev/null
@@ -1,379 +0,0 @@
-/*
- * MPC8541 CDS Device Tree Source
- *
- * Copyright 2006, 2008 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-/dts-v1/;
-
-/include/ "fsl/e500v2_power_isa.dtsi"
-
-/ {
- model = "MPC8541CDS";
- compatible = "MPC8541CDS", "MPC85xxCDS";
- #address-cells = <1>;
- #size-cells = <1>;
-
- aliases {
- ethernet0 = &enet0;
- ethernet1 = &enet1;
- serial0 = &serial0;
- serial1 = &serial1;
- pci0 = &pci0;
- pci1 = &pci1;
- };
-
- cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- PowerPC,8541@0 {
- device_type = "cpu";
- reg = <0x0>;
- d-cache-line-size = <32>; // 32 bytes
- i-cache-line-size = <32>; // 32 bytes
- d-cache-size = <0x8000>; // L1, 32K
- i-cache-size = <0x8000>; // L1, 32K
- timebase-frequency = <0>; // 33 MHz, from uboot
- bus-frequency = <0>; // 166 MHz
- clock-frequency = <0>; // 825 MHz, from uboot
- next-level-cache = <&L2>;
- };
- };
-
- memory {
- device_type = "memory";
- reg = <0x0 0x8000000>; // 128M at 0x0
- };
-
- soc8541@e0000000 {
- #address-cells = <1>;
- #size-cells = <1>;
- device_type = "soc";
- compatible = "simple-bus";
- ranges = <0x0 0xe0000000 0x100000>;
- bus-frequency = <0>;
-
- ecm-law@0 {
- compatible = "fsl,ecm-law";
- reg = <0x0 0x1000>;
- fsl,num-laws = <8>;
- };
-
- ecm@1000 {
- compatible = "fsl,mpc8541-ecm", "fsl,ecm";
- reg = <0x1000 0x1000>;
- interrupts = <17 2>;
- interrupt-parent = <&mpic>;
- };
-
- memory-controller@2000 {
- compatible = "fsl,mpc8541-memory-controller";
- reg = <0x2000 0x1000>;
- interrupt-parent = <&mpic>;
- interrupts = <18 2>;
- };
-
- L2: l2-cache-controller@20000 {
- compatible = "fsl,mpc8541-l2-cache-controller";
- reg = <0x20000 0x1000>;
- cache-line-size = <32>; // 32 bytes
- cache-size = <0x40000>; // L2, 256K
- interrupt-parent = <&mpic>;
- interrupts = <16 2>;
- };
-
- i2c@3000 {
- #address-cells = <1>;
- #size-cells = <0>;
- cell-index = <0>;
- compatible = "fsl-i2c";
- reg = <0x3000 0x100>;
- interrupts = <43 2>;
- interrupt-parent = <&mpic>;
- dfsrr;
- };
-
- dma@21300 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "fsl,mpc8541-dma", "fsl,eloplus-dma";
- reg = <0x21300 0x4>;
- ranges = <0x0 0x21100 0x200>;
- cell-index = <0>;
- dma-channel@0 {
- compatible = "fsl,mpc8541-dma-channel",
- "fsl,eloplus-dma-channel";
- reg = <0x0 0x80>;
- cell-index = <0>;
- interrupt-parent = <&mpic>;
- interrupts = <20 2>;
- };
- dma-channel@80 {
- compatible = "fsl,mpc8541-dma-channel",
- "fsl,eloplus-dma-channel";
- reg = <0x80 0x80>;
- cell-index = <1>;
- interrupt-parent = <&mpic>;
- interrupts = <21 2>;
- };
- dma-channel@100 {
- compatible = "fsl,mpc8541-dma-channel",
- "fsl,eloplus-dma-channel";
- reg = <0x100 0x80>;
- cell-index = <2>;
- interrupt-parent = <&mpic>;
- interrupts = <22 2>;
- };
- dma-channel@180 {
- compatible = "fsl,mpc8541-dma-channel",
- "fsl,eloplus-dma-channel";
- reg = <0x180 0x80>;
- cell-index = <3>;
- interrupt-parent = <&mpic>;
- interrupts = <23 2>;
- };
- };
-
- enet0: ethernet@24000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <0>;
- device_type = "network";
- model = "TSEC";
- compatible = "gianfar";
- reg = <0x24000 0x1000>;
- ranges = <0x0 0x24000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <29 2 30 2 34 2>;
- interrupt-parent = <&mpic>;
- tbi-handle = <&tbi0>;
- phy-handle = <&phy0>;
-
- mdio@520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-mdio";
- reg = <0x520 0x20>;
-
- phy0: ethernet-phy@0 {
- interrupt-parent = <&mpic>;
- interrupts = <5 1>;
- reg = <0x0>;
- };
- phy1: ethernet-phy@1 {
- interrupt-parent = <&mpic>;
- interrupts = <5 1>;
- reg = <0x1>;
- };
- tbi0: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
- };
-
- enet1: ethernet@25000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <1>;
- device_type = "network";
- model = "TSEC";
- compatible = "gianfar";
- reg = <0x25000 0x1000>;
- ranges = <0x0 0x25000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <35 2 36 2 40 2>;
- interrupt-parent = <&mpic>;
- tbi-handle = <&tbi1>;
- phy-handle = <&phy1>;
-
- mdio@520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-tbi";
- reg = <0x520 0x20>;
-
- tbi1: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
- };
-
- serial0: serial@4500 {
- cell-index = <0>;
- device_type = "serial";
- compatible = "fsl,ns16550", "ns16550";
- reg = <0x4500 0x100>; // reg base, size
- clock-frequency = <0>; // should we fill in in uboot?
- interrupts = <42 2>;
- interrupt-parent = <&mpic>;
- };
-
- serial1: serial@4600 {
- cell-index = <1>;
- device_type = "serial";
- compatible = "fsl,ns16550", "ns16550";
- reg = <0x4600 0x100>; // reg base, size
- clock-frequency = <0>; // should we fill in in uboot?
- interrupts = <42 2>;
- interrupt-parent = <&mpic>;
- };
-
- crypto@30000 {
- compatible = "fsl,sec2.0";
- reg = <0x30000 0x10000>;
- interrupts = <45 2>;
- interrupt-parent = <&mpic>;
- fsl,num-channels = <4>;
- fsl,channel-fifo-len = <24>;
- fsl,exec-units-mask = <0x7e>;
- fsl,descriptor-types-mask = <0x01010ebf>;
- };
-
- mpic: pic@40000 {
- interrupt-controller;
- #address-cells = <0>;
- #interrupt-cells = <2>;
- reg = <0x40000 0x40000>;
- compatible = "chrp,open-pic";
- device_type = "open-pic";
- };
-
- cpm@919c0 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "fsl,mpc8541-cpm", "fsl,cpm2";
- reg = <0x919c0 0x30>;
- ranges;
-
- muram@80000 {
- #address-cells = <1>;
- #size-cells = <1>;
- ranges = <0x0 0x80000 0x10000>;
-
- data@0 {
- compatible = "fsl,cpm-muram-data";
- reg = <0x0 0x2000 0x9000 0x1000>;
- };
- };
-
- brg@919f0 {
- compatible = "fsl,mpc8541-brg",
- "fsl,cpm2-brg",
- "fsl,cpm-brg";
- reg = <0x919f0 0x10 0x915f0 0x10>;
- };
-
- cpmpic: pic@90c00 {
- interrupt-controller;
- #address-cells = <0>;
- #interrupt-cells = <2>;
- interrupts = <46 2>;
- interrupt-parent = <&mpic>;
- reg = <0x90c00 0x80>;
- compatible = "fsl,mpc8541-cpm-pic", "fsl,cpm2-pic";
- };
- };
- };
-
- pci0: pci@e0008000 {
- interrupt-map-mask = <0x1f800 0x0 0x0 0x7>;
- interrupt-map = <
-
- /* IDSEL 0x10 */
- 0x8000 0x0 0x0 0x1 &mpic 0x0 0x1
- 0x8000 0x0 0x0 0x2 &mpic 0x1 0x1
- 0x8000 0x0 0x0 0x3 &mpic 0x2 0x1
- 0x8000 0x0 0x0 0x4 &mpic 0x3 0x1
-
- /* IDSEL 0x11 */
- 0x8800 0x0 0x0 0x1 &mpic 0x0 0x1
- 0x8800 0x0 0x0 0x2 &mpic 0x1 0x1
- 0x8800 0x0 0x0 0x3 &mpic 0x2 0x1
- 0x8800 0x0 0x0 0x4 &mpic 0x3 0x1
-
- /* IDSEL 0x12 (Slot 1) */
- 0x9000 0x0 0x0 0x1 &mpic 0x0 0x1
- 0x9000 0x0 0x0 0x2 &mpic 0x1 0x1
- 0x9000 0x0 0x0 0x3 &mpic 0x2 0x1
- 0x9000 0x0 0x0 0x4 &mpic 0x3 0x1
-
- /* IDSEL 0x13 (Slot 2) */
- 0x9800 0x0 0x0 0x1 &mpic 0x1 0x1
- 0x9800 0x0 0x0 0x2 &mpic 0x2 0x1
- 0x9800 0x0 0x0 0x3 &mpic 0x3 0x1
- 0x9800 0x0 0x0 0x4 &mpic 0x0 0x1
-
- /* IDSEL 0x14 (Slot 3) */
- 0xa000 0x0 0x0 0x1 &mpic 0x2 0x1
- 0xa000 0x0 0x0 0x2 &mpic 0x3 0x1
- 0xa000 0x0 0x0 0x3 &mpic 0x0 0x1
- 0xa000 0x0 0x0 0x4 &mpic 0x1 0x1
-
- /* IDSEL 0x15 (Slot 4) */
- 0xa800 0x0 0x0 0x1 &mpic 0x3 0x1
- 0xa800 0x0 0x0 0x2 &mpic 0x0 0x1
- 0xa800 0x0 0x0 0x3 &mpic 0x1 0x1
- 0xa800 0x0 0x0 0x4 &mpic 0x2 0x1
-
- /* Bus 1 (Tundra Bridge) */
- /* IDSEL 0x12 (ISA bridge) */
- 0x19000 0x0 0x0 0x1 &mpic 0x0 0x1
- 0x19000 0x0 0x0 0x2 &mpic 0x1 0x1
- 0x19000 0x0 0x0 0x3 &mpic 0x2 0x1
- 0x19000 0x0 0x0 0x4 &mpic 0x3 0x1>;
- interrupt-parent = <&mpic>;
- interrupts = <24 2>;
- bus-range = <0 0>;
- ranges = <0x2000000 0x0 0x80000000 0x80000000 0x0 0x20000000
- 0x1000000 0x0 0x0 0xe2000000 0x0 0x100000>;
- clock-frequency = <66666666>;
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- reg = <0xe0008000 0x1000>;
- compatible = "fsl,mpc8540-pci";
- device_type = "pci";
-
- i8259@19000 {
- interrupt-controller;
- device_type = "interrupt-controller";
- reg = <0x19000 0x0 0x0 0x0 0x1>;
- #address-cells = <0>;
- #interrupt-cells = <2>;
- compatible = "chrp,iic";
- interrupts = <1>;
- interrupt-parent = <&pci0>;
- };
- };
-
- pci1: pci@e0009000 {
- interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
- interrupt-map = <
-
- /* IDSEL 0x15 */
- 0xa800 0x0 0x0 0x1 &mpic 0xb 0x1
- 0xa800 0x0 0x0 0x2 &mpic 0xb 0x1
- 0xa800 0x0 0x0 0x3 &mpic 0xb 0x1
- 0xa800 0x0 0x0 0x4 &mpic 0xb 0x1>;
- interrupt-parent = <&mpic>;
- interrupts = <25 2>;
- bus-range = <0 0>;
- ranges = <0x2000000 0x0 0xa0000000 0xa0000000 0x0 0x20000000
- 0x1000000 0x0 0x0 0xe3000000 0x0 0x100000>;
- clock-frequency = <66666666>;
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- reg = <0xe0009000 0x1000>;
- compatible = "fsl,mpc8540-pci";
- device_type = "pci";
- };
-};
diff --git a/arch/powerpc/boot/dts/mpc8548cds.dtsi b/arch/powerpc/boot/dts/mpc8548cds.dtsi
deleted file mode 100644
index 3bc7d4711220..000000000000
--- a/arch/powerpc/boot/dts/mpc8548cds.dtsi
+++ /dev/null
@@ -1,302 +0,0 @@
-/*
- * MPC8548CDS Device Tree Source stub (no addresses or top-level ranges)
- *
- * Copyright 2012 Freescale Semiconductor Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Freescale Semiconductor nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- *
- * ALTERNATIVELY, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") as published by the Free Software
- * Foundation, either version 2 of that License or (at your option) any
- * later version.
- *
- * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-&board_lbc {
- nor@0,0 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "cfi-flash";
- reg = <0x0 0x0 0x01000000>;
- bank-width = <2>;
- device-width = <2>;
-
- partition@0 {
- reg = <0x0 0x0b00000>;
- label = "ramdisk-nor";
- };
-
- partition@300000 {
- reg = <0x0b00000 0x0400000>;
- label = "kernel-nor";
- };
-
- partition@700000 {
- reg = <0x0f00000 0x060000>;
- label = "dtb-nor";
- };
-
- partition@760000 {
- reg = <0x0f60000 0x020000>;
- label = "env-nor";
- read-only;
- };
-
- partition@780000 {
- reg = <0x0f80000 0x080000>;
- label = "u-boot-nor";
- read-only;
- };
- };
-
- board-control@1,0 {
- compatible = "fsl,mpc8548cds-fpga";
- reg = <0x1 0x0 0x1000>;
- };
-};
-
-&board_soc {
- i2c@3000 {
- eeprom@50 {
- compatible = "atmel,24c64";
- reg = <0x50>;
- };
-
- eeprom@56 {
- compatible = "atmel,24c64";
- reg = <0x56>;
- };
-
- eeprom@57 {
- compatible = "atmel,24c64";
- reg = <0x57>;
- };
- };
-
- i2c@3100 {
- eeprom@50 {
- compatible = "atmel,24c64";
- reg = <0x50>;
- };
- };
-
- enet0: ethernet@24000 {
- tbi-handle = <&tbi0>;
- phy-handle = <&phy0>;
- };
-
- mdio@24520 {
- phy0: ethernet-phy@0 {
- interrupts = <5 1 0 0>;
- reg = <0x0>;
- };
- phy1: ethernet-phy@1 {
- interrupts = <5 1 0 0>;
- reg = <0x1>;
- };
- phy2: ethernet-phy@2 {
- interrupts = <5 1 0 0>;
- reg = <0x2>;
- };
- phy3: ethernet-phy@3 {
- interrupts = <5 1 0 0>;
- reg = <0x3>;
- };
- tbi0: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
-
- enet1: ethernet@25000 {
- tbi-handle = <&tbi1>;
- phy-handle = <&phy1>;
- };
-
- mdio@25520 {
- tbi1: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
-
- enet2: ethernet@26000 {
- tbi-handle = <&tbi2>;
- phy-handle = <&phy2>;
- };
-
- mdio@26520 {
- tbi2: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
-
- enet3: ethernet@27000 {
- tbi-handle = <&tbi3>;
- phy-handle = <&phy3>;
- };
-
- mdio@27520 {
- tbi3: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
-};
-
-&board_pci0 {
- interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
- interrupt-map = <
- /* IDSEL 0x4 (PCIX Slot 2) */
- 0x2000 0x0 0x0 0x1 &mpic 0x0 0x1 0 0
- 0x2000 0x0 0x0 0x2 &mpic 0x1 0x1 0 0
- 0x2000 0x0 0x0 0x3 &mpic 0x2 0x1 0 0
- 0x2000 0x0 0x0 0x4 &mpic 0x3 0x1 0 0
-
- /* IDSEL 0x5 (PCIX Slot 3) */
- 0x2800 0x0 0x0 0x1 &mpic 0x1 0x1 0 0
- 0x2800 0x0 0x0 0x2 &mpic 0x2 0x1 0 0
- 0x2800 0x0 0x0 0x3 &mpic 0x3 0x1 0 0
- 0x2800 0x0 0x0 0x4 &mpic 0x0 0x1 0 0
-
- /* IDSEL 0x6 (PCIX Slot 4) */
- 0x3000 0x0 0x0 0x1 &mpic 0x2 0x1 0 0
- 0x3000 0x0 0x0 0x2 &mpic 0x3 0x1 0 0
- 0x3000 0x0 0x0 0x3 &mpic 0x0 0x1 0 0
- 0x3000 0x0 0x0 0x4 &mpic 0x1 0x1 0 0
-
- /* IDSEL 0x8 (PCIX Slot 5) */
- 0x4000 0x0 0x0 0x1 &mpic 0x0 0x1 0 0
- 0x4000 0x0 0x0 0x2 &mpic 0x1 0x1 0 0
- 0x4000 0x0 0x0 0x3 &mpic 0x2 0x1 0 0
- 0x4000 0x0 0x0 0x4 &mpic 0x3 0x1 0 0
-
- /* IDSEL 0xC (Tsi310 bridge) */
- 0x6000 0x0 0x0 0x1 &mpic 0x0 0x1 0 0
- 0x6000 0x0 0x0 0x2 &mpic 0x1 0x1 0 0
- 0x6000 0x0 0x0 0x3 &mpic 0x2 0x1 0 0
- 0x6000 0x0 0x0 0x4 &mpic 0x3 0x1 0 0
-
- /* IDSEL 0x14 (Slot 2) */
- 0xa000 0x0 0x0 0x1 &mpic 0x0 0x1 0 0
- 0xa000 0x0 0x0 0x2 &mpic 0x1 0x1 0 0
- 0xa000 0x0 0x0 0x3 &mpic 0x2 0x1 0 0
- 0xa000 0x0 0x0 0x4 &mpic 0x3 0x1 0 0
-
- /* IDSEL 0x15 (Slot 3) */
- 0xa800 0x0 0x0 0x1 &mpic 0x1 0x1 0 0
- 0xa800 0x0 0x0 0x2 &mpic 0x2 0x1 0 0
- 0xa800 0x0 0x0 0x3 &mpic 0x3 0x1 0 0
- 0xa800 0x0 0x0 0x4 &mpic 0x0 0x1 0 0
-
- /* IDSEL 0x16 (Slot 4) */
- 0xb000 0x0 0x0 0x1 &mpic 0x2 0x1 0 0
- 0xb000 0x0 0x0 0x2 &mpic 0x3 0x1 0 0
- 0xb000 0x0 0x0 0x3 &mpic 0x0 0x1 0 0
- 0xb000 0x0 0x0 0x4 &mpic 0x1 0x1 0 0
-
- /* IDSEL 0x18 (Slot 5) */
- 0xc000 0x0 0x0 0x1 &mpic 0x0 0x1 0 0
- 0xc000 0x0 0x0 0x2 &mpic 0x1 0x1 0 0
- 0xc000 0x0 0x0 0x3 &mpic 0x2 0x1 0 0
- 0xc000 0x0 0x0 0x4 &mpic 0x3 0x1 0 0
-
- /* IDSEL 0x1C (Tsi310 bridge PCI primary) */
- 0xe000 0x0 0x0 0x1 &mpic 0x0 0x1 0 0
- 0xe000 0x0 0x0 0x2 &mpic 0x1 0x1 0 0
- 0xe000 0x0 0x0 0x3 &mpic 0x2 0x1 0 0
- 0xe000 0x0 0x0 0x4 &mpic 0x3 0x1 0 0>;
-
- pci_bridge@1c {
- interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
- interrupt-map = <
-
- /* IDSEL 0x00 (PrPMC Site) */
- 0000 0x0 0x0 0x1 &mpic 0x0 0x1 0 0
- 0000 0x0 0x0 0x2 &mpic 0x1 0x1 0 0
- 0000 0x0 0x0 0x3 &mpic 0x2 0x1 0 0
- 0000 0x0 0x0 0x4 &mpic 0x3 0x1 0 0
-
- /* IDSEL 0x04 (VIA chip) */
- 0x2000 0x0 0x0 0x1 &mpic 0x0 0x1 0 0
- 0x2000 0x0 0x0 0x2 &mpic 0x1 0x1 0 0
- 0x2000 0x0 0x0 0x3 &mpic 0x2 0x1 0 0
- 0x2000 0x0 0x0 0x4 &mpic 0x3 0x1 0 0
-
- /* IDSEL 0x05 (8139) */
- 0x2800 0x0 0x0 0x1 &mpic 0x1 0x1 0 0
-
- /* IDSEL 0x06 (Slot 6) */
- 0x3000 0x0 0x0 0x1 &mpic 0x2 0x1 0 0
- 0x3000 0x0 0x0 0x2 &mpic 0x3 0x1 0 0
- 0x3000 0x0 0x0 0x3 &mpic 0x0 0x1 0 0
- 0x3000 0x0 0x0 0x4 &mpic 0x1 0x1 0 0
-
- /* IDESL 0x07 (Slot 7) */
- 0x3800 0x0 0x0 0x1 &mpic 0x3 0x1 0 0
- 0x3800 0x0 0x0 0x2 &mpic 0x0 0x1 0 0
- 0x3800 0x0 0x0 0x3 &mpic 0x1 0x1 0 0
- 0x3800 0x0 0x0 0x4 &mpic 0x2 0x1 0 0>;
-
- reg = <0xe000 0x0 0x0 0x0 0x0>;
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- ranges = <0x2000000 0x0 0x80000000
- 0x2000000 0x0 0x80000000
- 0x0 0x20000000
- 0x1000000 0x0 0x0
- 0x1000000 0x0 0x0
- 0x0 0x80000>;
- clock-frequency = <33333333>;
-
- isa@4 {
- device_type = "isa";
- #interrupt-cells = <2>;
- #size-cells = <1>;
- #address-cells = <2>;
- reg = <0x2000 0x0 0x0 0x0 0x0>;
- ranges = <0x1 0x0 0x1000000 0x0 0x0 0x1000>;
- interrupt-parent = <&i8259>;
-
- i8259: interrupt-controller@20 {
- interrupt-controller;
- device_type = "interrupt-controller";
- reg = <0x1 0x20 0x2
- 0x1 0xa0 0x2
- 0x1 0x4d0 0x2>;
- #address-cells = <0>;
- #interrupt-cells = <2>;
- compatible = "chrp,iic";
- interrupts = <0 1 0 0>;
- interrupt-parent = <&mpic>;
- };
-
- rtc@70 {
- compatible = "pnpPNP,b00";
- reg = <0x1 0x70 0x2>;
- };
- };
- };
-};
diff --git a/arch/powerpc/boot/dts/mpc8548cds_32b.dts b/arch/powerpc/boot/dts/mpc8548cds_32b.dts
deleted file mode 100644
index 6fd63163fc6b..000000000000
--- a/arch/powerpc/boot/dts/mpc8548cds_32b.dts
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * MPC8548 CDS Device Tree Source (32-bit address map)
- *
- * Copyright 2006, 2008, 2011-2012 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-/include/ "fsl/mpc8548si-pre.dtsi"
-
-/ {
- model = "MPC8548CDS";
- compatible = "MPC8548CDS", "MPC85xxCDS";
-
- memory {
- device_type = "memory";
- reg = <0 0 0x0 0x8000000>; // 128M at 0x0
- };
-
- board_lbc: lbc: localbus@e0005000 {
- reg = <0 0xe0005000 0 0x1000>;
-
- ranges = <0x0 0x0 0x0 0xff000000 0x01000000
- 0x1 0x0 0x0 0xf8004000 0x00001000>;
-
- };
-
- board_soc: soc: soc8548@e0000000 {
- ranges = <0 0x0 0xe0000000 0x100000>;
- };
-
- board_pci0: pci0: pci@e0008000 {
- reg = <0 0xe0008000 0 0x1000>;
- ranges = <0x2000000 0x0 0x80000000 0 0x80000000 0x0 0x10000000
- 0x1000000 0x0 0x00000000 0 0xe2000000 0x0 0x800000>;
- clock-frequency = <66666666>;
- };
-
- pci1: pci@e0009000 {
- reg = <0 0xe0009000 0 0x1000>;
- ranges = <0x2000000 0x0 0x90000000 0 0x90000000 0x0 0x10000000
- 0x1000000 0x0 0x00000000 0 0xe2800000 0x0 0x800000>;
- clock-frequency = <66666666>;
- interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
- interrupt-map = <
-
- /* IDSEL 0x15 */
- 0xa800 0x0 0x0 0x1 &mpic 0xb 0x1 0 0
- 0xa800 0x0 0x0 0x2 &mpic 0x1 0x1 0 0
- 0xa800 0x0 0x0 0x3 &mpic 0x2 0x1 0 0
- 0xa800 0x0 0x0 0x4 &mpic 0x3 0x1 0 0>;
- };
-
- pci2: pcie@e000a000 {
- reg = <0 0xe000a000 0 0x1000>;
- ranges = <0x2000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000
- 0x1000000 0x0 0x00000000 0 0xe3000000 0x0 0x100000>;
- pcie@0 {
- ranges = <0x2000000 0x0 0xa0000000
- 0x2000000 0x0 0xa0000000
- 0x0 0x20000000
-
- 0x1000000 0x0 0x0
- 0x1000000 0x0 0x0
- 0x0 0x100000>;
- };
- };
-
- rio: rapidio@e00c0000 {
- reg = <0x0 0xe00c0000 0x0 0x20000>;
- port1 {
- ranges = <0x0 0x0 0x0 0xc0000000 0x0 0x20000000>;
- };
- };
-};
-
-/*
- * mpc8548cds.dtsi must be last to ensure board_pci0 overrides pci0 settings
- * for interrupt-map & interrupt-map-mask.
- */
-
-/include/ "fsl/mpc8548si-post.dtsi"
-/include/ "mpc8548cds.dtsi"
diff --git a/arch/powerpc/boot/dts/mpc8548cds_36b.dts b/arch/powerpc/boot/dts/mpc8548cds_36b.dts
deleted file mode 100644
index 10e551b11bd6..000000000000
--- a/arch/powerpc/boot/dts/mpc8548cds_36b.dts
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * MPC8548 CDS Device Tree Source (36-bit address map)
- *
- * Copyright 2012 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-/include/ "fsl/mpc8548si-pre.dtsi"
-
-/ {
- model = "MPC8548CDS";
- compatible = "MPC8548CDS", "MPC85xxCDS";
-
- memory {
- device_type = "memory";
- reg = <0 0 0x0 0x8000000>; // 128M at 0x0
- };
-
- board_lbc: lbc: localbus@fe0005000 {
- reg = <0xf 0xe0005000 0 0x1000>;
-
- ranges = <0x0 0x0 0xf 0xff000000 0x01000000
- 0x1 0x0 0xf 0xf8004000 0x00001000>;
-
- };
-
- board_soc: soc: soc8548@fe0000000 {
- ranges = <0 0xf 0xe0000000 0x100000>;
- };
-
- board_pci0: pci0: pci@fe0008000 {
- reg = <0xf 0xe0008000 0 0x1000>;
- ranges = <0x2000000 0x0 0xe0000000 0xc 0x00000000 0x0 0x10000000
- 0x1000000 0x0 0x00000000 0xf 0xe2000000 0x0 0x800000>;
- clock-frequency = <66666666>;
- };
-
- pci1: pci@fe0009000 {
- reg = <0xf 0xe0009000 0 0x1000>;
- ranges = <0x2000000 0x0 0xe0000000 0xc 0x10000000 0x0 0x10000000
- 0x1000000 0x0 0x00000000 0xf 0xe2800000 0x0 0x800000>;
- clock-frequency = <66666666>;
- interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
- interrupt-map = <
-
- /* IDSEL 0x15 */
- 0xa800 0x0 0x0 0x1 &mpic 0xb 0x1 0 0
- 0xa800 0x0 0x0 0x2 &mpic 0x1 0x1 0 0
- 0xa800 0x0 0x0 0x3 &mpic 0x2 0x1 0 0
- 0xa800 0x0 0x0 0x4 &mpic 0x3 0x1 0 0>;
- };
-
- pci2: pcie@fe000a000 {
- reg = <0xf 0xe000a000 0 0x1000>;
- ranges = <0x2000000 0x0 0xe0000000 0xc 0x20000000 0x0 0x20000000
- 0x1000000 0x0 0x00000000 0xf 0xe3000000 0x0 0x100000>;
- pcie@0 {
- ranges = <0x2000000 0x0 0xa0000000
- 0x2000000 0x0 0xa0000000
- 0x0 0x20000000
-
- 0x1000000 0x0 0x0
- 0x1000000 0x0 0x0
- 0x0 0x100000>;
- };
- };
-
- rio: rapidio@fe00c0000 {
- reg = <0xf 0xe00c0000 0x0 0x20000>;
- port1 {
- ranges = <0x0 0x0 0xc 0x40000000 0x0 0x20000000>;
- };
- };
-};
-
-/*
- * mpc8548cds.dtsi must be last to ensure board_pci0 overrides pci0 settings
- * for interrupt-map & interrupt-map-mask.
- */
-
-/include/ "fsl/mpc8548si-post.dtsi"
-/include/ "mpc8548cds.dtsi"
diff --git a/arch/powerpc/boot/dts/mpc8555cds.dts b/arch/powerpc/boot/dts/mpc8555cds.dts
deleted file mode 100644
index f115f21cb0ae..000000000000
--- a/arch/powerpc/boot/dts/mpc8555cds.dts
+++ /dev/null
@@ -1,379 +0,0 @@
-/*
- * MPC8555 CDS Device Tree Source
- *
- * Copyright 2006, 2008 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-/dts-v1/;
-
-/include/ "fsl/e500v2_power_isa.dtsi"
-
-/ {
- model = "MPC8555CDS";
- compatible = "MPC8555CDS", "MPC85xxCDS";
- #address-cells = <1>;
- #size-cells = <1>;
-
- aliases {
- ethernet0 = &enet0;
- ethernet1 = &enet1;
- serial0 = &serial0;
- serial1 = &serial1;
- pci0 = &pci0;
- pci1 = &pci1;
- };
-
- cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- PowerPC,8555@0 {
- device_type = "cpu";
- reg = <0x0>;
- d-cache-line-size = <32>; // 32 bytes
- i-cache-line-size = <32>; // 32 bytes
- d-cache-size = <0x8000>; // L1, 32K
- i-cache-size = <0x8000>; // L1, 32K
- timebase-frequency = <0>; // 33 MHz, from uboot
- bus-frequency = <0>; // 166 MHz
- clock-frequency = <0>; // 825 MHz, from uboot
- next-level-cache = <&L2>;
- };
- };
-
- memory {
- device_type = "memory";
- reg = <0x0 0x8000000>; // 128M at 0x0
- };
-
- soc8555@e0000000 {
- #address-cells = <1>;
- #size-cells = <1>;
- device_type = "soc";
- compatible = "simple-bus";
- ranges = <0x0 0xe0000000 0x100000>;
- bus-frequency = <0>;
-
- ecm-law@0 {
- compatible = "fsl,ecm-law";
- reg = <0x0 0x1000>;
- fsl,num-laws = <8>;
- };
-
- ecm@1000 {
- compatible = "fsl,mpc8555-ecm", "fsl,ecm";
- reg = <0x1000 0x1000>;
- interrupts = <17 2>;
- interrupt-parent = <&mpic>;
- };
-
- memory-controller@2000 {
- compatible = "fsl,mpc8555-memory-controller";
- reg = <0x2000 0x1000>;
- interrupt-parent = <&mpic>;
- interrupts = <18 2>;
- };
-
- L2: l2-cache-controller@20000 {
- compatible = "fsl,mpc8555-l2-cache-controller";
- reg = <0x20000 0x1000>;
- cache-line-size = <32>; // 32 bytes
- cache-size = <0x40000>; // L2, 256K
- interrupt-parent = <&mpic>;
- interrupts = <16 2>;
- };
-
- i2c@3000 {
- #address-cells = <1>;
- #size-cells = <0>;
- cell-index = <0>;
- compatible = "fsl-i2c";
- reg = <0x3000 0x100>;
- interrupts = <43 2>;
- interrupt-parent = <&mpic>;
- dfsrr;
- };
-
- dma@21300 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "fsl,mpc8555-dma", "fsl,eloplus-dma";
- reg = <0x21300 0x4>;
- ranges = <0x0 0x21100 0x200>;
- cell-index = <0>;
- dma-channel@0 {
- compatible = "fsl,mpc8555-dma-channel",
- "fsl,eloplus-dma-channel";
- reg = <0x0 0x80>;
- cell-index = <0>;
- interrupt-parent = <&mpic>;
- interrupts = <20 2>;
- };
- dma-channel@80 {
- compatible = "fsl,mpc8555-dma-channel",
- "fsl,eloplus-dma-channel";
- reg = <0x80 0x80>;
- cell-index = <1>;
- interrupt-parent = <&mpic>;
- interrupts = <21 2>;
- };
- dma-channel@100 {
- compatible = "fsl,mpc8555-dma-channel",
- "fsl,eloplus-dma-channel";
- reg = <0x100 0x80>;
- cell-index = <2>;
- interrupt-parent = <&mpic>;
- interrupts = <22 2>;
- };
- dma-channel@180 {
- compatible = "fsl,mpc8555-dma-channel",
- "fsl,eloplus-dma-channel";
- reg = <0x180 0x80>;
- cell-index = <3>;
- interrupt-parent = <&mpic>;
- interrupts = <23 2>;
- };
- };
-
- enet0: ethernet@24000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <0>;
- device_type = "network";
- model = "TSEC";
- compatible = "gianfar";
- reg = <0x24000 0x1000>;
- ranges = <0x0 0x24000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <29 2 30 2 34 2>;
- interrupt-parent = <&mpic>;
- tbi-handle = <&tbi0>;
- phy-handle = <&phy0>;
-
- mdio@520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-mdio";
- reg = <0x520 0x20>;
-
- phy0: ethernet-phy@0 {
- interrupt-parent = <&mpic>;
- interrupts = <5 1>;
- reg = <0x0>;
- };
- phy1: ethernet-phy@1 {
- interrupt-parent = <&mpic>;
- interrupts = <5 1>;
- reg = <0x1>;
- };
- tbi0: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
- };
-
- enet1: ethernet@25000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <1>;
- device_type = "network";
- model = "TSEC";
- compatible = "gianfar";
- reg = <0x25000 0x1000>;
- ranges = <0x0 0x25000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <35 2 36 2 40 2>;
- interrupt-parent = <&mpic>;
- tbi-handle = <&tbi1>;
- phy-handle = <&phy1>;
-
- mdio@520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-tbi";
- reg = <0x520 0x20>;
-
- tbi1: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
- };
-
- serial0: serial@4500 {
- cell-index = <0>;
- device_type = "serial";
- compatible = "fsl,ns16550", "ns16550";
- reg = <0x4500 0x100>; // reg base, size
- clock-frequency = <0>; // should we fill in in uboot?
- interrupts = <42 2>;
- interrupt-parent = <&mpic>;
- };
-
- serial1: serial@4600 {
- cell-index = <1>;
- device_type = "serial";
- compatible = "fsl,ns16550", "ns16550";
- reg = <0x4600 0x100>; // reg base, size
- clock-frequency = <0>; // should we fill in in uboot?
- interrupts = <42 2>;
- interrupt-parent = <&mpic>;
- };
-
- crypto@30000 {
- compatible = "fsl,sec2.0";
- reg = <0x30000 0x10000>;
- interrupts = <45 2>;
- interrupt-parent = <&mpic>;
- fsl,num-channels = <4>;
- fsl,channel-fifo-len = <24>;
- fsl,exec-units-mask = <0x7e>;
- fsl,descriptor-types-mask = <0x01010ebf>;
- };
-
- mpic: pic@40000 {
- interrupt-controller;
- #address-cells = <0>;
- #interrupt-cells = <2>;
- reg = <0x40000 0x40000>;
- compatible = "chrp,open-pic";
- device_type = "open-pic";
- };
-
- cpm@919c0 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "fsl,mpc8555-cpm", "fsl,cpm2";
- reg = <0x919c0 0x30>;
- ranges;
-
- muram@80000 {
- #address-cells = <1>;
- #size-cells = <1>;
- ranges = <0x0 0x80000 0x10000>;
-
- data@0 {
- compatible = "fsl,cpm-muram-data";
- reg = <0x0 0x2000 0x9000 0x1000>;
- };
- };
-
- brg@919f0 {
- compatible = "fsl,mpc8555-brg",
- "fsl,cpm2-brg",
- "fsl,cpm-brg";
- reg = <0x919f0 0x10 0x915f0 0x10>;
- };
-
- cpmpic: pic@90c00 {
- interrupt-controller;
- #address-cells = <0>;
- #interrupt-cells = <2>;
- interrupts = <46 2>;
- interrupt-parent = <&mpic>;
- reg = <0x90c00 0x80>;
- compatible = "fsl,mpc8555-cpm-pic", "fsl,cpm2-pic";
- };
- };
- };
-
- pci0: pci@e0008000 {
- interrupt-map-mask = <0x1f800 0x0 0x0 0x7>;
- interrupt-map = <
-
- /* IDSEL 0x10 */
- 0x8000 0x0 0x0 0x1 &mpic 0x0 0x1
- 0x8000 0x0 0x0 0x2 &mpic 0x1 0x1
- 0x8000 0x0 0x0 0x3 &mpic 0x2 0x1
- 0x8000 0x0 0x0 0x4 &mpic 0x3 0x1
-
- /* IDSEL 0x11 */
- 0x8800 0x0 0x0 0x1 &mpic 0x0 0x1
- 0x8800 0x0 0x0 0x2 &mpic 0x1 0x1
- 0x8800 0x0 0x0 0x3 &mpic 0x2 0x1
- 0x8800 0x0 0x0 0x4 &mpic 0x3 0x1
-
- /* IDSEL 0x12 (Slot 1) */
- 0x9000 0x0 0x0 0x1 &mpic 0x0 0x1
- 0x9000 0x0 0x0 0x2 &mpic 0x1 0x1
- 0x9000 0x0 0x0 0x3 &mpic 0x2 0x1
- 0x9000 0x0 0x0 0x4 &mpic 0x3 0x1
-
- /* IDSEL 0x13 (Slot 2) */
- 0x9800 0x0 0x0 0x1 &mpic 0x1 0x1
- 0x9800 0x0 0x0 0x2 &mpic 0x2 0x1
- 0x9800 0x0 0x0 0x3 &mpic 0x3 0x1
- 0x9800 0x0 0x0 0x4 &mpic 0x0 0x1
-
- /* IDSEL 0x14 (Slot 3) */
- 0xa000 0x0 0x0 0x1 &mpic 0x2 0x1
- 0xa000 0x0 0x0 0x2 &mpic 0x3 0x1
- 0xa000 0x0 0x0 0x3 &mpic 0x0 0x1
- 0xa000 0x0 0x0 0x4 &mpic 0x1 0x1
-
- /* IDSEL 0x15 (Slot 4) */
- 0xa800 0x0 0x0 0x1 &mpic 0x3 0x1
- 0xa800 0x0 0x0 0x2 &mpic 0x0 0x1
- 0xa800 0x0 0x0 0x3 &mpic 0x1 0x1
- 0xa800 0x0 0x0 0x4 &mpic 0x2 0x1
-
- /* Bus 1 (Tundra Bridge) */
- /* IDSEL 0x12 (ISA bridge) */
- 0x19000 0x0 0x0 0x1 &mpic 0x0 0x1
- 0x19000 0x0 0x0 0x2 &mpic 0x1 0x1
- 0x19000 0x0 0x0 0x3 &mpic 0x2 0x1
- 0x19000 0x0 0x0 0x4 &mpic 0x3 0x1>;
- interrupt-parent = <&mpic>;
- interrupts = <24 2>;
- bus-range = <0 0>;
- ranges = <0x2000000 0x0 0x80000000 0x80000000 0x0 0x20000000
- 0x1000000 0x0 0x0 0xe2000000 0x0 0x100000>;
- clock-frequency = <66666666>;
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- reg = <0xe0008000 0x1000>;
- compatible = "fsl,mpc8540-pci";
- device_type = "pci";
-
- i8259@19000 {
- interrupt-controller;
- device_type = "interrupt-controller";
- reg = <0x19000 0x0 0x0 0x0 0x1>;
- #address-cells = <0>;
- #interrupt-cells = <2>;
- compatible = "chrp,iic";
- interrupts = <1>;
- interrupt-parent = <&pci0>;
- };
- };
-
- pci1: pci@e0009000 {
- interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
- interrupt-map = <
-
- /* IDSEL 0x15 */
- 0xa800 0x0 0x0 0x1 &mpic 0xb 0x1
- 0xa800 0x0 0x0 0x2 &mpic 0xb 0x1
- 0xa800 0x0 0x0 0x3 &mpic 0xb 0x1
- 0xa800 0x0 0x0 0x4 &mpic 0xb 0x1>;
- interrupt-parent = <&mpic>;
- interrupts = <25 2>;
- bus-range = <0 0>;
- ranges = <0x2000000 0x0 0xa0000000 0xa0000000 0x0 0x20000000
- 0x1000000 0x0 0x0 0xe3000000 0x0 0x100000>;
- clock-frequency = <66666666>;
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- reg = <0xe0009000 0x1000>;
- compatible = "fsl,mpc8540-pci";
- device_type = "pci";
- };
-};
diff --git a/arch/powerpc/boot/dts/mpc8560ads.dts b/arch/powerpc/boot/dts/mpc8560ads.dts
deleted file mode 100644
index 0d70921d6125..000000000000
--- a/arch/powerpc/boot/dts/mpc8560ads.dts
+++ /dev/null
@@ -1,392 +0,0 @@
-/*
- * MPC8560 ADS Device Tree Source
- *
- * Copyright 2006, 2008 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-/dts-v1/;
-
-/include/ "fsl/e500v2_power_isa.dtsi"
-
-/ {
- model = "MPC8560ADS";
- compatible = "MPC8560ADS", "MPC85xxADS";
- #address-cells = <1>;
- #size-cells = <1>;
-
- aliases {
- ethernet0 = &enet0;
- ethernet1 = &enet1;
- ethernet2 = &enet2;
- ethernet3 = &enet3;
- serial0 = &serial0;
- serial1 = &serial1;
- pci0 = &pci0;
- };
-
- cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- PowerPC,8560@0 {
- device_type = "cpu";
- reg = <0x0>;
- d-cache-line-size = <32>; // 32 bytes
- i-cache-line-size = <32>; // 32 bytes
- d-cache-size = <0x8000>; // L1, 32K
- i-cache-size = <0x8000>; // L1, 32K
- timebase-frequency = <82500000>;
- bus-frequency = <330000000>;
- clock-frequency = <825000000>;
- };
- };
-
- memory {
- device_type = "memory";
- reg = <0x0 0x10000000>;
- };
-
- soc8560@e0000000 {
- #address-cells = <1>;
- #size-cells = <1>;
- device_type = "soc";
- compatible = "simple-bus";
- ranges = <0x0 0xe0000000 0x100000>;
- bus-frequency = <330000000>;
-
- ecm-law@0 {
- compatible = "fsl,ecm-law";
- reg = <0x0 0x1000>;
- fsl,num-laws = <8>;
- };
-
- ecm@1000 {
- compatible = "fsl,mpc8560-ecm", "fsl,ecm";
- reg = <0x1000 0x1000>;
- interrupts = <17 2>;
- interrupt-parent = <&mpic>;
- };
-
- memory-controller@2000 {
- compatible = "fsl,mpc8540-memory-controller";
- reg = <0x2000 0x1000>;
- interrupt-parent = <&mpic>;
- interrupts = <18 2>;
- };
-
- L2: l2-cache-controller@20000 {
- compatible = "fsl,mpc8540-l2-cache-controller";
- reg = <0x20000 0x1000>;
- cache-line-size = <32>; // 32 bytes
- cache-size = <0x40000>; // L2, 256K
- interrupt-parent = <&mpic>;
- interrupts = <16 2>;
- };
-
- dma@21300 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "fsl,mpc8560-dma", "fsl,eloplus-dma";
- reg = <0x21300 0x4>;
- ranges = <0x0 0x21100 0x200>;
- cell-index = <0>;
- dma-channel@0 {
- compatible = "fsl,mpc8560-dma-channel",
- "fsl,eloplus-dma-channel";
- reg = <0x0 0x80>;
- cell-index = <0>;
- interrupt-parent = <&mpic>;
- interrupts = <20 2>;
- };
- dma-channel@80 {
- compatible = "fsl,mpc8560-dma-channel",
- "fsl,eloplus-dma-channel";
- reg = <0x80 0x80>;
- cell-index = <1>;
- interrupt-parent = <&mpic>;
- interrupts = <21 2>;
- };
- dma-channel@100 {
- compatible = "fsl,mpc8560-dma-channel",
- "fsl,eloplus-dma-channel";
- reg = <0x100 0x80>;
- cell-index = <2>;
- interrupt-parent = <&mpic>;
- interrupts = <22 2>;
- };
- dma-channel@180 {
- compatible = "fsl,mpc8560-dma-channel",
- "fsl,eloplus-dma-channel";
- reg = <0x180 0x80>;
- cell-index = <3>;
- interrupt-parent = <&mpic>;
- interrupts = <23 2>;
- };
- };
-
- enet0: ethernet@24000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <0>;
- device_type = "network";
- model = "TSEC";
- compatible = "gianfar";
- reg = <0x24000 0x1000>;
- ranges = <0x0 0x24000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <29 2 30 2 34 2>;
- interrupt-parent = <&mpic>;
- tbi-handle = <&tbi0>;
- phy-handle = <&phy0>;
-
- mdio@520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-mdio";
- reg = <0x520 0x20>;
-
- phy0: ethernet-phy@0 {
- interrupt-parent = <&mpic>;
- interrupts = <5 1>;
- reg = <0x0>;
- };
- phy1: ethernet-phy@1 {
- interrupt-parent = <&mpic>;
- interrupts = <5 1>;
- reg = <0x1>;
- };
- phy2: ethernet-phy@2 {
- interrupt-parent = <&mpic>;
- interrupts = <7 1>;
- reg = <0x2>;
- };
- phy3: ethernet-phy@3 {
- interrupt-parent = <&mpic>;
- interrupts = <7 1>;
- reg = <0x3>;
- };
- tbi0: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
- };
-
- enet1: ethernet@25000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <1>;
- device_type = "network";
- model = "TSEC";
- compatible = "gianfar";
- reg = <0x25000 0x1000>;
- ranges = <0x0 0x25000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <35 2 36 2 40 2>;
- interrupt-parent = <&mpic>;
- tbi-handle = <&tbi1>;
- phy-handle = <&phy1>;
-
- mdio@520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-tbi";
- reg = <0x520 0x20>;
-
- tbi1: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
- };
-
- mpic: pic@40000 {
- interrupt-controller;
- #address-cells = <0>;
- #interrupt-cells = <2>;
- reg = <0x40000 0x40000>;
- compatible = "chrp,open-pic";
- device_type = "open-pic";
- };
-
- cpm@919c0 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "fsl,mpc8560-cpm", "fsl,cpm2";
- reg = <0x919c0 0x30>;
- ranges;
-
- muram@80000 {
- #address-cells = <1>;
- #size-cells = <1>;
- ranges = <0x0 0x80000 0x10000>;
-
- data@0 {
- compatible = "fsl,cpm-muram-data";
- reg = <0x0 0x4000 0x9000 0x2000>;
- };
- };
-
- brg@919f0 {
- compatible = "fsl,mpc8560-brg",
- "fsl,cpm2-brg",
- "fsl,cpm-brg";
- reg = <0x919f0 0x10 0x915f0 0x10>;
- clock-frequency = <165000000>;
- };
-
- cpmpic: pic@90c00 {
- interrupt-controller;
- #address-cells = <0>;
- #interrupt-cells = <2>;
- interrupts = <46 2>;
- interrupt-parent = <&mpic>;
- reg = <0x90c00 0x80>;
- compatible = "fsl,mpc8560-cpm-pic", "fsl,cpm2-pic";
- };
-
- serial0: serial@91a00 {
- device_type = "serial";
- compatible = "fsl,mpc8560-scc-uart",
- "fsl,cpm2-scc-uart";
- reg = <0x91a00 0x20 0x88000 0x100>;
- fsl,cpm-brg = <1>;
- fsl,cpm-command = <0x800000>;
- current-speed = <115200>;
- interrupts = <40 8>;
- interrupt-parent = <&cpmpic>;
- };
-
- serial1: serial@91a20 {
- device_type = "serial";
- compatible = "fsl,mpc8560-scc-uart",
- "fsl,cpm2-scc-uart";
- reg = <0x91a20 0x20 0x88100 0x100>;
- fsl,cpm-brg = <2>;
- fsl,cpm-command = <0x4a00000>;
- current-speed = <115200>;
- interrupts = <41 8>;
- interrupt-parent = <&cpmpic>;
- };
-
- enet2: ethernet@91320 {
- device_type = "network";
- compatible = "fsl,mpc8560-fcc-enet",
- "fsl,cpm2-fcc-enet";
- reg = <0x91320 0x20 0x88500 0x100 0x913b0 0x1>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- fsl,cpm-command = <0x16200300>;
- interrupts = <33 8>;
- interrupt-parent = <&cpmpic>;
- phy-handle = <&phy2>;
- };
-
- enet3: ethernet@91340 {
- device_type = "network";
- compatible = "fsl,mpc8560-fcc-enet",
- "fsl,cpm2-fcc-enet";
- reg = <0x91340 0x20 0x88600 0x100 0x913d0 0x1>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- fsl,cpm-command = <0x1a400300>;
- interrupts = <34 8>;
- interrupt-parent = <&cpmpic>;
- phy-handle = <&phy3>;
- };
- };
- };
-
- pci0: pci@e0008000 {
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- compatible = "fsl,mpc8540-pcix", "fsl,mpc8540-pci";
- device_type = "pci";
- reg = <0xe0008000 0x1000>;
- clock-frequency = <66666666>;
- interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
- interrupt-map = <
-
- /* IDSEL 0x2 */
- 0x1000 0x0 0x0 0x1 &mpic 0x1 0x1
- 0x1000 0x0 0x0 0x2 &mpic 0x2 0x1
- 0x1000 0x0 0x0 0x3 &mpic 0x3 0x1
- 0x1000 0x0 0x0 0x4 &mpic 0x4 0x1
-
- /* IDSEL 0x3 */
- 0x1800 0x0 0x0 0x1 &mpic 0x4 0x1
- 0x1800 0x0 0x0 0x2 &mpic 0x1 0x1
- 0x1800 0x0 0x0 0x3 &mpic 0x2 0x1
- 0x1800 0x0 0x0 0x4 &mpic 0x3 0x1
-
- /* IDSEL 0x4 */
- 0x2000 0x0 0x0 0x1 &mpic 0x3 0x1
- 0x2000 0x0 0x0 0x2 &mpic 0x4 0x1
- 0x2000 0x0 0x0 0x3 &mpic 0x1 0x1
- 0x2000 0x0 0x0 0x4 &mpic 0x2 0x1
-
- /* IDSEL 0x5 */
- 0x2800 0x0 0x0 0x1 &mpic 0x2 0x1
- 0x2800 0x0 0x0 0x2 &mpic 0x3 0x1
- 0x2800 0x0 0x0 0x3 &mpic 0x4 0x1
- 0x2800 0x0 0x0 0x4 &mpic 0x1 0x1
-
- /* IDSEL 12 */
- 0x6000 0x0 0x0 0x1 &mpic 0x1 0x1
- 0x6000 0x0 0x0 0x2 &mpic 0x2 0x1
- 0x6000 0x0 0x0 0x3 &mpic 0x3 0x1
- 0x6000 0x0 0x0 0x4 &mpic 0x4 0x1
-
- /* IDSEL 13 */
- 0x6800 0x0 0x0 0x1 &mpic 0x4 0x1
- 0x6800 0x0 0x0 0x2 &mpic 0x1 0x1
- 0x6800 0x0 0x0 0x3 &mpic 0x2 0x1
- 0x6800 0x0 0x0 0x4 &mpic 0x3 0x1
-
- /* IDSEL 14*/
- 0x7000 0x0 0x0 0x1 &mpic 0x3 0x1
- 0x7000 0x0 0x0 0x2 &mpic 0x4 0x1
- 0x7000 0x0 0x0 0x3 &mpic 0x1 0x1
- 0x7000 0x0 0x0 0x4 &mpic 0x2 0x1
-
- /* IDSEL 15 */
- 0x7800 0x0 0x0 0x1 &mpic 0x2 0x1
- 0x7800 0x0 0x0 0x2 &mpic 0x3 0x1
- 0x7800 0x0 0x0 0x3 &mpic 0x4 0x1
- 0x7800 0x0 0x0 0x4 &mpic 0x1 0x1
-
- /* IDSEL 18 */
- 0x9000 0x0 0x0 0x1 &mpic 0x1 0x1
- 0x9000 0x0 0x0 0x2 &mpic 0x2 0x1
- 0x9000 0x0 0x0 0x3 &mpic 0x3 0x1
- 0x9000 0x0 0x0 0x4 &mpic 0x4 0x1
-
- /* IDSEL 19 */
- 0x9800 0x0 0x0 0x1 &mpic 0x4 0x1
- 0x9800 0x0 0x0 0x2 &mpic 0x1 0x1
- 0x9800 0x0 0x0 0x3 &mpic 0x2 0x1
- 0x9800 0x0 0x0 0x4 &mpic 0x3 0x1
-
- /* IDSEL 20 */
- 0xa000 0x0 0x0 0x1 &mpic 0x3 0x1
- 0xa000 0x0 0x0 0x2 &mpic 0x4 0x1
- 0xa000 0x0 0x0 0x3 &mpic 0x1 0x1
- 0xa000 0x0 0x0 0x4 &mpic 0x2 0x1
-
- /* IDSEL 21 */
- 0xa800 0x0 0x0 0x1 &mpic 0x2 0x1
- 0xa800 0x0 0x0 0x2 &mpic 0x3 0x1
- 0xa800 0x0 0x0 0x3 &mpic 0x4 0x1
- 0xa800 0x0 0x0 0x4 &mpic 0x1 0x1>;
-
- interrupt-parent = <&mpic>;
- interrupts = <24 2>;
- bus-range = <0 0>;
- ranges = <0x2000000 0x0 0x80000000 0x80000000 0x0 0x20000000
- 0x1000000 0x0 0x0 0xe2000000 0x0 0x1000000>;
- };
-};
diff --git a/arch/powerpc/boot/dts/mpc8610_hpcd.dts b/arch/powerpc/boot/dts/mpc8610_hpcd.dts
deleted file mode 100644
index 6a109a0ceac9..000000000000
--- a/arch/powerpc/boot/dts/mpc8610_hpcd.dts
+++ /dev/null
@@ -1,506 +0,0 @@
-/*
- * MPC8610 HPCD Device Tree Source
- *
- * Copyright 2007-2008 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License Version 2 as published
- * by the Free Software Foundation.
- */
-
-/dts-v1/;
-
-/ {
- model = "MPC8610HPCD";
- compatible = "fsl,MPC8610HPCD";
- #address-cells = <1>;
- #size-cells = <1>;
-
- aliases {
- serial0 = &serial0;
- serial1 = &serial1;
- pci0 = &pci0;
- pci1 = &pci1;
- pci2 = &pci2;
- };
-
- cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- PowerPC,8610@0 {
- device_type = "cpu";
- reg = <0>;
- d-cache-line-size = <32>;
- i-cache-line-size = <32>;
- d-cache-size = <32768>; // L1
- i-cache-size = <32768>; // L1
- sleep = <&pmc 0x00008000 0 // core
- &pmc 0x00004000 0>; // timebase
- timebase-frequency = <0>; // From uboot
- bus-frequency = <0>; // From uboot
- clock-frequency = <0>; // From uboot
- };
- };
-
- memory {
- device_type = "memory";
- reg = <0x00000000 0x20000000>; // 512M at 0x0
- };
-
- localbus@e0005000 {
- #address-cells = <2>;
- #size-cells = <1>;
- compatible = "fsl,mpc8610-elbc", "fsl,elbc", "simple-bus";
- reg = <0xe0005000 0x1000>;
- interrupts = <19 2>;
- interrupt-parent = <&mpic>;
- ranges = <0 0 0xf8000000 0x08000000
- 1 0 0xf0000000 0x08000000
- 2 0 0xe8400000 0x00008000
- 4 0 0xe8440000 0x00008000
- 5 0 0xe8480000 0x00008000
- 6 0 0xe84c0000 0x00008000
- 3 0 0xe8000000 0x00000020>;
- sleep = <&pmc 0x08000000 0>;
-
- flash@0,0 {
- compatible = "cfi-flash";
- reg = <0 0 0x8000000>;
- bank-width = <2>;
- device-width = <1>;
- };
-
- flash@1,0 {
- compatible = "cfi-flash";
- reg = <1 0 0x8000000>;
- bank-width = <2>;
- device-width = <1>;
- };
-
- flash@2,0 {
- compatible = "fsl,mpc8610-fcm-nand",
- "fsl,elbc-fcm-nand";
- reg = <2 0 0x8000>;
- };
-
- flash@4,0 {
- compatible = "fsl,mpc8610-fcm-nand",
- "fsl,elbc-fcm-nand";
- reg = <4 0 0x8000>;
- };
-
- flash@5,0 {
- compatible = "fsl,mpc8610-fcm-nand",
- "fsl,elbc-fcm-nand";
- reg = <5 0 0x8000>;
- };
-
- flash@6,0 {
- compatible = "fsl,mpc8610-fcm-nand",
- "fsl,elbc-fcm-nand";
- reg = <6 0 0x8000>;
- };
-
- board-control@3,0 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "fsl,fpga-pixis";
- reg = <3 0 0x20>;
- ranges = <0 3 0 0x20>;
- interrupt-parent = <&mpic>;
- interrupts = <8 8>;
-
- sdcsr_pio: gpio-controller@a {
- #gpio-cells = <2>;
- compatible = "fsl,fpga-pixis-gpio-bank";
- reg = <0xa 1>;
- gpio-controller;
- };
- };
- };
-
- soc@e0000000 {
- #address-cells = <1>;
- #size-cells = <1>;
- #interrupt-cells = <2>;
- device_type = "soc";
- compatible = "fsl,mpc8610-immr", "simple-bus";
- ranges = <0x0 0xe0000000 0x00100000>;
- bus-frequency = <0>;
-
- mcm-law@0 {
- compatible = "fsl,mcm-law";
- reg = <0x0 0x1000>;
- fsl,num-laws = <10>;
- };
-
- mcm@1000 {
- compatible = "fsl,mpc8610-mcm", "fsl,mcm";
- reg = <0x1000 0x1000>;
- interrupts = <17 2>;
- interrupt-parent = <&mpic>;
- };
-
- i2c@3000 {
- #address-cells = <1>;
- #size-cells = <0>;
- cell-index = <0>;
- compatible = "fsl-i2c";
- reg = <0x3000 0x100>;
- interrupts = <43 2>;
- interrupt-parent = <&mpic>;
- dfsrr;
-
- cs4270:codec@4f {
- compatible = "cirrus,cs4270";
- reg = <0x4f>;
- /* MCLK source is a stand-alone oscillator */
- clock-frequency = <12288000>;
- };
- };
-
- i2c@3100 {
- #address-cells = <1>;
- #size-cells = <0>;
- cell-index = <1>;
- compatible = "fsl-i2c";
- reg = <0x3100 0x100>;
- interrupts = <43 2>;
- interrupt-parent = <&mpic>;
- sleep = <&pmc 0x00000004 0>;
- dfsrr;
- };
-
- serial0: serial@4500 {
- cell-index = <0>;
- device_type = "serial";
- compatible = "fsl,ns16550", "ns16550";
- reg = <0x4500 0x100>;
- clock-frequency = <0>;
- interrupts = <42 2>;
- interrupt-parent = <&mpic>;
- sleep = <&pmc 0x00000002 0>;
- };
-
- serial1: serial@4600 {
- cell-index = <1>;
- device_type = "serial";
- compatible = "fsl,ns16550", "ns16550";
- reg = <0x4600 0x100>;
- clock-frequency = <0>;
- interrupts = <42 2>;
- interrupt-parent = <&mpic>;
- sleep = <&pmc 0x00000008 0>;
- };
-
- spi@7000 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,mpc8610-spi", "fsl,spi";
- reg = <0x7000 0x40>;
- cell-index = <0>;
- interrupts = <59 2>;
- interrupt-parent = <&mpic>;
- mode = "cpu";
- gpios = <&sdcsr_pio 7 0>;
- sleep = <&pmc 0x00000800 0>;
-
- mmc-slot@0 {
- compatible = "fsl,mpc8610hpcd-mmc-slot",
- "mmc-spi-slot";
- reg = <0>;
- gpios = <&sdcsr_pio 0 1 /* nCD */
- &sdcsr_pio 1 0>; /* WP */
- voltage-ranges = <3300 3300>;
- spi-max-frequency = <50000000>;
- };
- };
-
- display@2c000 {
- compatible = "fsl,diu";
- reg = <0x2c000 100>;
- interrupts = <72 2>;
- interrupt-parent = <&mpic>;
- sleep = <&pmc 0x04000000 0>;
- };
-
- mpic: interrupt-controller@40000 {
- interrupt-controller;
- #address-cells = <0>;
- #interrupt-cells = <2>;
- reg = <0x40000 0x40000>;
- compatible = "chrp,open-pic";
- device_type = "open-pic";
- };
-
- msi@41600 {
- compatible = "fsl,mpc8610-msi", "fsl,mpic-msi";
- reg = <0x41600 0x80>;
- msi-available-ranges = <0 0x100>;
- interrupts = <
- 0xe0 0
- 0xe1 0
- 0xe2 0
- 0xe3 0
- 0xe4 0
- 0xe5 0
- 0xe6 0
- 0xe7 0>;
- interrupt-parent = <&mpic>;
- };
-
- global-utilities@e0000 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "fsl,mpc8610-guts";
- reg = <0xe0000 0x1000>;
- ranges = <0 0xe0000 0x1000>;
- fsl,has-rstcr;
-
- pmc: power@70 {
- compatible = "fsl,mpc8610-pmc",
- "fsl,mpc8641d-pmc";
- reg = <0x70 0x20>;
- };
- };
-
- wdt@e4000 {
- compatible = "fsl,mpc8610-wdt";
- reg = <0xe4000 0x100>;
- };
-
- ssi@16000 {
- compatible = "fsl,mpc8610-ssi";
- cell-index = <0>;
- reg = <0x16000 0x100>;
- interrupt-parent = <&mpic>;
- interrupts = <62 2>;
- fsl,mode = "i2s-slave";
- codec-handle = <&cs4270>;
- fsl,playback-dma = <&dma00>;
- fsl,capture-dma = <&dma01>;
- fsl,fifo-depth = <8>;
- sleep = <&pmc 0 0x08000000>;
- };
-
- ssi@16100 {
- compatible = "fsl,mpc8610-ssi";
- status = "disabled";
- cell-index = <1>;
- reg = <0x16100 0x100>;
- interrupt-parent = <&mpic>;
- interrupts = <63 2>;
- fsl,fifo-depth = <8>;
- sleep = <&pmc 0 0x04000000>;
- };
-
- dma@21300 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "fsl,mpc8610-dma", "fsl,eloplus-dma";
- cell-index = <0>;
- reg = <0x21300 0x4>; /* DMA general status register */
- ranges = <0x0 0x21100 0x200>;
- sleep = <&pmc 0x00000400 0>;
-
- dma00: dma-channel@0 {
- compatible = "fsl,mpc8610-dma-channel",
- "fsl,ssi-dma-channel";
- cell-index = <0>;
- reg = <0x0 0x80>;
- interrupt-parent = <&mpic>;
- interrupts = <20 2>;
- };
- dma01: dma-channel@1 {
- compatible = "fsl,mpc8610-dma-channel",
- "fsl,ssi-dma-channel";
- cell-index = <1>;
- reg = <0x80 0x80>;
- interrupt-parent = <&mpic>;
- interrupts = <21 2>;
- };
- dma-channel@2 {
- compatible = "fsl,mpc8610-dma-channel",
- "fsl,eloplus-dma-channel";
- cell-index = <2>;
- reg = <0x100 0x80>;
- interrupt-parent = <&mpic>;
- interrupts = <22 2>;
- };
- dma-channel@3 {
- compatible = "fsl,mpc8610-dma-channel",
- "fsl,eloplus-dma-channel";
- cell-index = <3>;
- reg = <0x180 0x80>;
- interrupt-parent = <&mpic>;
- interrupts = <23 2>;
- };
- };
-
- dma@c300 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "fsl,mpc8610-dma", "fsl,eloplus-dma";
- cell-index = <1>;
- reg = <0xc300 0x4>; /* DMA general status register */
- ranges = <0x0 0xc100 0x200>;
- sleep = <&pmc 0x00000200 0>;
-
- dma-channel@0 {
- compatible = "fsl,mpc8610-dma-channel",
- "fsl,eloplus-dma-channel";
- cell-index = <0>;
- reg = <0x0 0x80>;
- interrupt-parent = <&mpic>;
- interrupts = <76 2>;
- };
- dma-channel@1 {
- compatible = "fsl,mpc8610-dma-channel",
- "fsl,eloplus-dma-channel";
- cell-index = <1>;
- reg = <0x80 0x80>;
- interrupt-parent = <&mpic>;
- interrupts = <77 2>;
- };
- dma-channel@2 {
- compatible = "fsl,mpc8610-dma-channel",
- "fsl,eloplus-dma-channel";
- cell-index = <2>;
- reg = <0x100 0x80>;
- interrupt-parent = <&mpic>;
- interrupts = <78 2>;
- };
- dma-channel@3 {
- compatible = "fsl,mpc8610-dma-channel",
- "fsl,eloplus-dma-channel";
- cell-index = <3>;
- reg = <0x180 0x80>;
- interrupt-parent = <&mpic>;
- interrupts = <79 2>;
- };
- };
-
- };
-
- pci0: pci@e0008000 {
- compatible = "fsl,mpc8610-pci";
- device_type = "pci";
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- reg = <0xe0008000 0x1000>;
- bus-range = <0 0>;
- ranges = <0x02000000 0x0 0x80000000 0x80000000 0x0 0x10000000
- 0x01000000 0x0 0x00000000 0xe1000000 0x0 0x00100000>;
- sleep = <&pmc 0x80000000 0>;
- clock-frequency = <33333333>;
- interrupt-parent = <&mpic>;
- interrupts = <24 2>;
- interrupt-map-mask = <0xf800 0 0 7>;
- interrupt-map = <
- /* IDSEL 0x11 */
- 0x8800 0 0 1 &mpic 4 1
- 0x8800 0 0 2 &mpic 5 1
- 0x8800 0 0 3 &mpic 6 1
- 0x8800 0 0 4 &mpic 7 1
-
- /* IDSEL 0x12 */
- 0x9000 0 0 1 &mpic 5 1
- 0x9000 0 0 2 &mpic 6 1
- 0x9000 0 0 3 &mpic 7 1
- 0x9000 0 0 4 &mpic 4 1
- >;
- };
-
- pci1: pcie@e000a000 {
- compatible = "fsl,mpc8641-pcie";
- device_type = "pci";
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- reg = <0xe000a000 0x1000>;
- bus-range = <1 3>;
- ranges = <0x02000000 0x0 0xa0000000 0xa0000000 0x0 0x10000000
- 0x01000000 0x0 0x00000000 0xe3000000 0x0 0x00100000>;
- sleep = <&pmc 0x40000000 0>;
- clock-frequency = <33333333>;
- interrupt-parent = <&mpic>;
- interrupts = <26 2>;
- interrupt-map-mask = <0xf800 0 0 7>;
-
- interrupt-map = <
- /* IDSEL 0x1b */
- 0xd800 0 0 1 &mpic 2 1
-
- /* IDSEL 0x1c*/
- 0xe000 0 0 1 &mpic 1 1
- 0xe000 0 0 2 &mpic 1 1
- 0xe000 0 0 3 &mpic 1 1
- 0xe000 0 0 4 &mpic 1 1
-
- /* IDSEL 0x1f */
- 0xf800 0 0 1 &mpic 3 2
- 0xf800 0 0 2 &mpic 0 1
- >;
-
- pcie@0 {
- reg = <0 0 0 0 0>;
- #size-cells = <2>;
- #address-cells = <3>;
- device_type = "pci";
- ranges = <0x02000000 0x0 0xa0000000
- 0x02000000 0x0 0xa0000000
- 0x0 0x10000000
- 0x01000000 0x0 0x00000000
- 0x01000000 0x0 0x00000000
- 0x0 0x00100000>;
- uli1575@0 {
- reg = <0 0 0 0 0>;
- #size-cells = <2>;
- #address-cells = <3>;
- ranges = <0x02000000 0x0 0xa0000000
- 0x02000000 0x0 0xa0000000
- 0x0 0x10000000
- 0x01000000 0x0 0x00000000
- 0x01000000 0x0 0x00000000
- 0x0 0x00100000>;
-
- isa@1e {
- device_type = "isa";
- #size-cells = <1>;
- #address-cells = <2>;
- reg = <0xf000 0 0 0 0>;
- ranges = <1 0 0x01000000 0 0
- 0x00001000>;
-
- rtc@70 {
- compatible = "pnpPNP,b00";
- reg = <1 0x70 2>;
- };
- };
- };
- };
- };
-
- pci2: pcie@e0009000 {
- #address-cells = <3>;
- #size-cells = <2>;
- #interrupt-cells = <1>;
- device_type = "pci";
- compatible = "fsl,mpc8641-pcie";
- reg = <0xe0009000 0x00001000>;
- ranges = <0x02000000 0 0x90000000 0x90000000 0 0x10000000
- 0x01000000 0 0x00000000 0xe2000000 0 0x00100000>;
- bus-range = <0 255>;
- interrupt-map-mask = <0xf800 0 0 7>;
- interrupt-map = <0x0000 0 0 1 &mpic 4 1
- 0x0000 0 0 2 &mpic 5 1
- 0x0000 0 0 3 &mpic 6 1
- 0x0000 0 0 4 &mpic 7 1>;
- interrupt-parent = <&mpic>;
- interrupts = <25 2>;
- sleep = <&pmc 0x20000000 0>;
- clock-frequency = <33333333>;
- };
-};
diff --git a/arch/powerpc/boot/dts/mpc8641_hpcn.dts b/arch/powerpc/boot/dts/mpc8641_hpcn.dts
deleted file mode 100644
index 1c03060dd0b8..000000000000
--- a/arch/powerpc/boot/dts/mpc8641_hpcn.dts
+++ /dev/null
@@ -1,663 +0,0 @@
-/*
- * MPC8641 HPCN Device Tree Source
- *
- * Copyright 2006 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-/dts-v1/;
-
-/ {
- model = "MPC8641HPCN";
- compatible = "fsl,mpc8641hpcn";
- #address-cells = <1>;
- #size-cells = <1>;
-
- aliases {
- ethernet0 = &enet0;
- ethernet1 = &enet1;
- ethernet2 = &enet2;
- ethernet3 = &enet3;
- serial0 = &serial0;
- serial1 = &serial1;
- pci0 = &pci0;
- pci1 = &pci1;
- };
-
- cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- PowerPC,8641@0 {
- device_type = "cpu";
- reg = <0>;
- d-cache-line-size = <32>;
- i-cache-line-size = <32>;
- d-cache-size = <32768>; // L1
- i-cache-size = <32768>; // L1
- timebase-frequency = <0>; // From uboot
- bus-frequency = <0>; // From uboot
- clock-frequency = <0>; // From uboot
- };
- PowerPC,8641@1 {
- device_type = "cpu";
- reg = <1>;
- d-cache-line-size = <32>;
- i-cache-line-size = <32>;
- d-cache-size = <32768>;
- i-cache-size = <32768>;
- timebase-frequency = <0>; // From uboot
- bus-frequency = <0>; // From uboot
- clock-frequency = <0>; // From uboot
- };
- };
-
- memory {
- device_type = "memory";
- reg = <0x00000000 0x40000000>; // 1G at 0x0
- };
-
- localbus@ffe05000 {
- #address-cells = <2>;
- #size-cells = <1>;
- compatible = "fsl,mpc8641-localbus", "simple-bus";
- reg = <0xffe05000 0x1000>;
- interrupts = <19 2>;
- interrupt-parent = <&mpic>;
-
- ranges = <0 0 0xef800000 0x00800000
- 2 0 0xffdf8000 0x00008000
- 3 0 0xffdf0000 0x00008000>;
-
- flash@0,0 {
- compatible = "cfi-flash";
- reg = <0 0 0x00800000>;
- bank-width = <2>;
- device-width = <2>;
- #address-cells = <1>;
- #size-cells = <1>;
- partition@0 {
- label = "kernel";
- reg = <0x00000000 0x00300000>;
- };
- partition@300000 {
- label = "firmware b";
- reg = <0x00300000 0x00100000>;
- read-only;
- };
- partition@400000 {
- label = "fs";
- reg = <0x00400000 0x00300000>;
- };
- partition@700000 {
- label = "firmware a";
- reg = <0x00700000 0x00100000>;
- read-only;
- };
- };
- };
-
- soc8641@ffe00000 {
- #address-cells = <1>;
- #size-cells = <1>;
- device_type = "soc";
- compatible = "simple-bus";
- ranges = <0x00000000 0xffe00000 0x00100000>;
- bus-frequency = <0>;
-
- mcm-law@0 {
- compatible = "fsl,mcm-law";
- reg = <0x0 0x1000>;
- fsl,num-laws = <10>;
- };
-
- mcm@1000 {
- compatible = "fsl,mpc8641-mcm", "fsl,mcm";
- reg = <0x1000 0x1000>;
- interrupts = <17 2>;
- interrupt-parent = <&mpic>;
- };
-
- i2c@3000 {
- #address-cells = <1>;
- #size-cells = <0>;
- cell-index = <0>;
- compatible = "fsl-i2c";
- reg = <0x3000 0x100>;
- interrupts = <43 2>;
- interrupt-parent = <&mpic>;
- dfsrr;
- };
-
- i2c@3100 {
- #address-cells = <1>;
- #size-cells = <0>;
- cell-index = <1>;
- compatible = "fsl-i2c";
- reg = <0x3100 0x100>;
- interrupts = <43 2>;
- interrupt-parent = <&mpic>;
- dfsrr;
- };
-
- dma@21300 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "fsl,mpc8641-dma", "fsl,eloplus-dma";
- reg = <0x21300 0x4>;
- ranges = <0x0 0x21100 0x200>;
- cell-index = <0>;
- dma-channel@0 {
- compatible = "fsl,mpc8641-dma-channel",
- "fsl,eloplus-dma-channel";
- reg = <0x0 0x80>;
- cell-index = <0>;
- interrupt-parent = <&mpic>;
- interrupts = <20 2>;
- };
- dma-channel@80 {
- compatible = "fsl,mpc8641-dma-channel",
- "fsl,eloplus-dma-channel";
- reg = <0x80 0x80>;
- cell-index = <1>;
- interrupt-parent = <&mpic>;
- interrupts = <21 2>;
- };
- dma-channel@100 {
- compatible = "fsl,mpc8641-dma-channel",
- "fsl,eloplus-dma-channel";
- reg = <0x100 0x80>;
- cell-index = <2>;
- interrupt-parent = <&mpic>;
- interrupts = <22 2>;
- };
- dma-channel@180 {
- compatible = "fsl,mpc8641-dma-channel",
- "fsl,eloplus-dma-channel";
- reg = <0x180 0x80>;
- cell-index = <3>;
- interrupt-parent = <&mpic>;
- interrupts = <23 2>;
- };
- };
-
- enet0: ethernet@24000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <0>;
- device_type = "network";
- model = "TSEC";
- compatible = "gianfar";
- reg = <0x24000 0x1000>;
- ranges = <0x0 0x24000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <29 2 30 2 34 2>;
- interrupt-parent = <&mpic>;
- tbi-handle = <&tbi0>;
- phy-handle = <&phy0>;
- phy-connection-type = "rgmii-id";
-
- mdio@520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-mdio";
- reg = <0x520 0x20>;
-
- phy0: ethernet-phy@0 {
- interrupt-parent = <&mpic>;
- interrupts = <10 1>;
- reg = <0>;
- };
- phy1: ethernet-phy@1 {
- interrupt-parent = <&mpic>;
- interrupts = <10 1>;
- reg = <1>;
- };
- phy2: ethernet-phy@2 {
- interrupt-parent = <&mpic>;
- interrupts = <10 1>;
- reg = <2>;
- };
- phy3: ethernet-phy@3 {
- interrupt-parent = <&mpic>;
- interrupts = <10 1>;
- reg = <3>;
- };
- tbi0: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
- };
-
- enet1: ethernet@25000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <1>;
- device_type = "network";
- model = "TSEC";
- compatible = "gianfar";
- reg = <0x25000 0x1000>;
- ranges = <0x0 0x25000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <35 2 36 2 40 2>;
- interrupt-parent = <&mpic>;
- tbi-handle = <&tbi1>;
- phy-handle = <&phy1>;
- phy-connection-type = "rgmii-id";
-
- mdio@520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-tbi";
- reg = <0x520 0x20>;
-
- tbi1: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
- };
-
- enet2: ethernet@26000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <2>;
- device_type = "network";
- model = "TSEC";
- compatible = "gianfar";
- reg = <0x26000 0x1000>;
- ranges = <0x0 0x26000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <31 2 32 2 33 2>;
- interrupt-parent = <&mpic>;
- tbi-handle = <&tbi2>;
- phy-handle = <&phy2>;
- phy-connection-type = "rgmii-id";
-
- mdio@520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-tbi";
- reg = <0x520 0x20>;
-
- tbi2: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
- };
-
- enet3: ethernet@27000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <3>;
- device_type = "network";
- model = "TSEC";
- compatible = "gianfar";
- reg = <0x27000 0x1000>;
- ranges = <0x0 0x27000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <37 2 38 2 39 2>;
- interrupt-parent = <&mpic>;
- tbi-handle = <&tbi3>;
- phy-handle = <&phy3>;
- phy-connection-type = "rgmii-id";
-
- mdio@520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-tbi";
- reg = <0x520 0x20>;
-
- tbi3: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
- };
-
- serial0: serial@4500 {
- cell-index = <0>;
- device_type = "serial";
- compatible = "fsl,ns16550", "ns16550";
- reg = <0x4500 0x100>;
- clock-frequency = <0>;
- interrupts = <42 2>;
- interrupt-parent = <&mpic>;
- };
-
- serial1: serial@4600 {
- cell-index = <1>;
- device_type = "serial";
- compatible = "fsl,ns16550", "ns16550";
- reg = <0x4600 0x100>;
- clock-frequency = <0>;
- interrupts = <28 2>;
- interrupt-parent = <&mpic>;
- };
-
- mpic: pic@40000 {
- interrupt-controller;
- #address-cells = <0>;
- #interrupt-cells = <2>;
- reg = <0x40000 0x40000>;
- compatible = "chrp,open-pic";
- device_type = "open-pic";
- };
-
- rmu: rmu@d3000 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "fsl,srio-rmu";
- reg = <0xd3000 0x500>;
- ranges = <0x0 0xd3000 0x500>;
-
- message-unit@0 {
- compatible = "fsl,srio-msg-unit";
- reg = <0x0 0x100>;
- interrupts = <
- 53 2 /* msg1_tx_irq */
- 54 2>;/* msg1_rx_irq */
- };
- message-unit@100 {
- compatible = "fsl,srio-msg-unit";
- reg = <0x100 0x100>;
- interrupts = <
- 55 2 /* msg2_tx_irq */
- 56 2>;/* msg2_rx_irq */
- };
- doorbell-unit@400 {
- compatible = "fsl,srio-dbell-unit";
- reg = <0x400 0x80>;
- interrupts = <
- 49 2 /* bell_outb_irq */
- 50 2>;/* bell_inb_irq */
- };
- port-write-unit@4e0 {
- compatible = "fsl,srio-port-write-unit";
- reg = <0x4e0 0x20>;
- interrupts = <48 2>;
- };
- };
-
- global-utilities@e0000 {
- compatible = "fsl,mpc8641-guts";
- reg = <0xe0000 0x1000>;
- fsl,has-rstcr;
- };
- };
-
- pci0: pcie@ffe08000 {
- compatible = "fsl,mpc8641-pcie";
- device_type = "pci";
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- reg = <0xffe08000 0x1000>;
- bus-range = <0x0 0xff>;
- ranges = <0x02000000 0x0 0x80000000 0x80000000 0x0 0x20000000
- 0x01000000 0x0 0x00000000 0xffc00000 0x0 0x00010000>;
- clock-frequency = <33333333>;
- interrupt-parent = <&mpic>;
- interrupts = <24 2>;
- interrupt-map-mask = <0xff00 0 0 7>;
- interrupt-map = <
- /* IDSEL 0x11 func 0 - PCI slot 1 */
- 0x8800 0 0 1 &mpic 2 1
- 0x8800 0 0 2 &mpic 3 1
- 0x8800 0 0 3 &mpic 4 1
- 0x8800 0 0 4 &mpic 1 1
-
- /* IDSEL 0x11 func 1 - PCI slot 1 */
- 0x8900 0 0 1 &mpic 2 1
- 0x8900 0 0 2 &mpic 3 1
- 0x8900 0 0 3 &mpic 4 1
- 0x8900 0 0 4 &mpic 1 1
-
- /* IDSEL 0x11 func 2 - PCI slot 1 */
- 0x8a00 0 0 1 &mpic 2 1
- 0x8a00 0 0 2 &mpic 3 1
- 0x8a00 0 0 3 &mpic 4 1
- 0x8a00 0 0 4 &mpic 1 1
-
- /* IDSEL 0x11 func 3 - PCI slot 1 */
- 0x8b00 0 0 1 &mpic 2 1
- 0x8b00 0 0 2 &mpic 3 1
- 0x8b00 0 0 3 &mpic 4 1
- 0x8b00 0 0 4 &mpic 1 1
-
- /* IDSEL 0x11 func 4 - PCI slot 1 */
- 0x8c00 0 0 1 &mpic 2 1
- 0x8c00 0 0 2 &mpic 3 1
- 0x8c00 0 0 3 &mpic 4 1
- 0x8c00 0 0 4 &mpic 1 1
-
- /* IDSEL 0x11 func 5 - PCI slot 1 */
- 0x8d00 0 0 1 &mpic 2 1
- 0x8d00 0 0 2 &mpic 3 1
- 0x8d00 0 0 3 &mpic 4 1
- 0x8d00 0 0 4 &mpic 1 1
-
- /* IDSEL 0x11 func 6 - PCI slot 1 */
- 0x8e00 0 0 1 &mpic 2 1
- 0x8e00 0 0 2 &mpic 3 1
- 0x8e00 0 0 3 &mpic 4 1
- 0x8e00 0 0 4 &mpic 1 1
-
- /* IDSEL 0x11 func 7 - PCI slot 1 */
- 0x8f00 0 0 1 &mpic 2 1
- 0x8f00 0 0 2 &mpic 3 1
- 0x8f00 0 0 3 &mpic 4 1
- 0x8f00 0 0 4 &mpic 1 1
-
- /* IDSEL 0x12 func 0 - PCI slot 2 */
- 0x9000 0 0 1 &mpic 3 1
- 0x9000 0 0 2 &mpic 4 1
- 0x9000 0 0 3 &mpic 1 1
- 0x9000 0 0 4 &mpic 2 1
-
- /* IDSEL 0x12 func 1 - PCI slot 2 */
- 0x9100 0 0 1 &mpic 3 1
- 0x9100 0 0 2 &mpic 4 1
- 0x9100 0 0 3 &mpic 1 1
- 0x9100 0 0 4 &mpic 2 1
-
- /* IDSEL 0x12 func 2 - PCI slot 2 */
- 0x9200 0 0 1 &mpic 3 1
- 0x9200 0 0 2 &mpic 4 1
- 0x9200 0 0 3 &mpic 1 1
- 0x9200 0 0 4 &mpic 2 1
-
- /* IDSEL 0x12 func 3 - PCI slot 2 */
- 0x9300 0 0 1 &mpic 3 1
- 0x9300 0 0 2 &mpic 4 1
- 0x9300 0 0 3 &mpic 1 1
- 0x9300 0 0 4 &mpic 2 1
-
- /* IDSEL 0x12 func 4 - PCI slot 2 */
- 0x9400 0 0 1 &mpic 3 1
- 0x9400 0 0 2 &mpic 4 1
- 0x9400 0 0 3 &mpic 1 1
- 0x9400 0 0 4 &mpic 2 1
-
- /* IDSEL 0x12 func 5 - PCI slot 2 */
- 0x9500 0 0 1 &mpic 3 1
- 0x9500 0 0 2 &mpic 4 1
- 0x9500 0 0 3 &mpic 1 1
- 0x9500 0 0 4 &mpic 2 1
-
- /* IDSEL 0x12 func 6 - PCI slot 2 */
- 0x9600 0 0 1 &mpic 3 1
- 0x9600 0 0 2 &mpic 4 1
- 0x9600 0 0 3 &mpic 1 1
- 0x9600 0 0 4 &mpic 2 1
-
- /* IDSEL 0x12 func 7 - PCI slot 2 */
- 0x9700 0 0 1 &mpic 3 1
- 0x9700 0 0 2 &mpic 4 1
- 0x9700 0 0 3 &mpic 1 1
- 0x9700 0 0 4 &mpic 2 1
-
- // IDSEL 0x1c USB
- 0xe000 0 0 1 &i8259 12 2
- 0xe100 0 0 2 &i8259 9 2
- 0xe200 0 0 3 &i8259 10 2
- 0xe300 0 0 4 &i8259 11 2
-
- // IDSEL 0x1d Audio
- 0xe800 0 0 1 &i8259 6 2
-
- // IDSEL 0x1e Legacy
- 0xf000 0 0 1 &i8259 7 2
- 0xf100 0 0 1 &i8259 7 2
-
- // IDSEL 0x1f IDE/SATA
- 0xf800 0 0 1 &i8259 14 2
- 0xf900 0 0 1 &i8259 5 2
- >;
-
- pcie@0 {
- reg = <0 0 0 0 0>;
- #size-cells = <2>;
- #address-cells = <3>;
- device_type = "pci";
- ranges = <0x02000000 0x0 0x80000000
- 0x02000000 0x0 0x80000000
- 0x0 0x20000000
-
- 0x01000000 0x0 0x00000000
- 0x01000000 0x0 0x00000000
- 0x0 0x00010000>;
- uli1575@0 {
- reg = <0 0 0 0 0>;
- #size-cells = <2>;
- #address-cells = <3>;
- ranges = <0x02000000 0x0 0x80000000
- 0x02000000 0x0 0x80000000
- 0x0 0x20000000
- 0x01000000 0x0 0x00000000
- 0x01000000 0x0 0x00000000
- 0x0 0x00010000>;
- isa@1e {
- device_type = "isa";
- #interrupt-cells = <2>;
- #size-cells = <1>;
- #address-cells = <2>;
- reg = <0xf000 0 0 0 0>;
- ranges = <1 0 0x01000000 0 0
- 0x00001000>;
- interrupt-parent = <&i8259>;
-
- i8259: interrupt-controller@20 {
- reg = <1 0x20 2
- 1 0xa0 2
- 1 0x4d0 2>;
- interrupt-controller;
- device_type = "interrupt-controller";
- #address-cells = <0>;
- #interrupt-cells = <2>;
- compatible = "chrp,iic";
- interrupts = <9 2>;
- interrupt-parent = <&mpic>;
- };
-
- i8042@60 {
- #size-cells = <0>;
- #address-cells = <1>;
- reg = <1 0x60 1 1 0x64 1>;
- interrupts = <1 3 12 3>;
- interrupt-parent =
- <&i8259>;
-
- keyboard@0 {
- reg = <0>;
- compatible = "pnpPNP,303";
- };
-
- mouse@1 {
- reg = <1>;
- compatible = "pnpPNP,f03";
- };
- };
-
- rtc@70 {
- compatible =
- "pnpPNP,b00";
- reg = <1 0x70 2>;
- };
-
- gpio@400 {
- reg = <1 0x400 0x80>;
- };
- };
- };
- };
-
- };
-
- pci1: pcie@ffe09000 {
- compatible = "fsl,mpc8641-pcie";
- device_type = "pci";
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- reg = <0xffe09000 0x1000>;
- bus-range = <0 0xff>;
- ranges = <0x02000000 0x0 0xa0000000 0xa0000000 0x0 0x20000000
- 0x01000000 0x0 0x00000000 0xffc10000 0x0 0x00010000>;
- clock-frequency = <33333333>;
- interrupt-parent = <&mpic>;
- interrupts = <25 2>;
- interrupt-map-mask = <0xf800 0 0 7>;
- interrupt-map = <
- /* IDSEL 0x0 */
- 0x0000 0 0 1 &mpic 4 1
- 0x0000 0 0 2 &mpic 5 1
- 0x0000 0 0 3 &mpic 6 1
- 0x0000 0 0 4 &mpic 7 1
- >;
- pcie@0 {
- reg = <0 0 0 0 0>;
- #size-cells = <2>;
- #address-cells = <3>;
- device_type = "pci";
- ranges = <0x02000000 0x0 0xa0000000
- 0x02000000 0x0 0xa0000000
- 0x0 0x20000000
-
- 0x01000000 0x0 0x00000000
- 0x01000000 0x0 0x00000000
- 0x0 0x00010000>;
- };
- };
-/*
- * Only one of Rapid IO or PCI can be present due to HW limitations and
- * due to the fact that the 2 now share address space in the new memory
- * map. The most likely case is that we have PCI, so comment out the
- * rapidio node. Leave it here for reference.
-
- rapidio@ffec0000 {
- reg = <0xffec0000 0x11000>;
- compatible = "fsl,srio";
- interrupt-parent = <&mpic>;
- interrupts = <48 2>;
- #address-cells = <2>;
- #size-cells = <2>;
- fsl,srio-rmu-handle = <&rmu>;
- ranges;
-
- port1 {
- #address-cells = <2>;
- #size-cells = <2>;
- cell-index = <1>;
- ranges = <0 0 0x80000000 0 0x20000000>;
- };
- };
-*/
-
-};
diff --git a/arch/powerpc/boot/dts/mpc8641_hpcn_36b.dts b/arch/powerpc/boot/dts/mpc8641_hpcn_36b.dts
deleted file mode 100644
index bb575e28042a..000000000000
--- a/arch/powerpc/boot/dts/mpc8641_hpcn_36b.dts
+++ /dev/null
@@ -1,605 +0,0 @@
-/*
- * MPC8641 HPCN Device Tree Source
- *
- * Copyright 2008-2009 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-/dts-v1/;
-
-/ {
- model = "MPC8641HPCN";
- compatible = "fsl,mpc8641hpcn";
- #address-cells = <2>;
- #size-cells = <2>;
-
- aliases {
- ethernet0 = &enet0;
- ethernet1 = &enet1;
- ethernet2 = &enet2;
- ethernet3 = &enet3;
- serial0 = &serial0;
- serial1 = &serial1;
- pci0 = &pci0;
- pci1 = &pci1;
- };
-
- cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- PowerPC,8641@0 {
- device_type = "cpu";
- reg = <0>;
- d-cache-line-size = <32>; // 32 bytes
- i-cache-line-size = <32>; // 32 bytes
- d-cache-size = <32768>; // L1, 32K
- i-cache-size = <32768>; // L1, 32K
- timebase-frequency = <0>; // 33 MHz, from uboot
- bus-frequency = <0>; // From uboot
- clock-frequency = <0>; // From uboot
- };
- PowerPC,8641@1 {
- device_type = "cpu";
- reg = <1>;
- d-cache-line-size = <32>; // 32 bytes
- i-cache-line-size = <32>; // 32 bytes
- d-cache-size = <32768>; // L1, 32K
- i-cache-size = <32768>; // L1, 32K
- timebase-frequency = <0>; // 33 MHz, from uboot
- bus-frequency = <0>; // From uboot
- clock-frequency = <0>; // From uboot
- };
- };
-
- memory {
- device_type = "memory";
- reg = <0x0 0x00000000 0x0 0x40000000>; // 1G at 0x0
- };
-
- localbus@fffe05000 {
- #address-cells = <2>;
- #size-cells = <1>;
- compatible = "fsl,mpc8641-localbus", "simple-bus";
- reg = <0x0f 0xffe05000 0x0 0x1000>;
- interrupts = <19 2>;
- interrupt-parent = <&mpic>;
-
- ranges = <0 0 0xf 0xef800000 0x00800000
- 2 0 0xf 0xffdf8000 0x00008000
- 3 0 0xf 0xffdf0000 0x00008000>;
-
- flash@0,0 {
- compatible = "cfi-flash";
- reg = <0 0 0x00800000>;
- bank-width = <2>;
- device-width = <2>;
- #address-cells = <1>;
- #size-cells = <1>;
- partition@0 {
- label = "kernel";
- reg = <0x00000000 0x00300000>;
- };
- partition@300000 {
- label = "firmware b";
- reg = <0x00300000 0x00100000>;
- read-only;
- };
- partition@400000 {
- label = "fs";
- reg = <0x00400000 0x00300000>;
- };
- partition@700000 {
- label = "firmware a";
- reg = <0x00700000 0x00100000>;
- read-only;
- };
- };
- };
-
- soc8641@fffe00000 {
- #address-cells = <1>;
- #size-cells = <1>;
- device_type = "soc";
- compatible = "simple-bus";
- ranges = <0x00000000 0x0f 0xffe00000 0x00100000>;
- bus-frequency = <0>;
-
- mcm-law@0 {
- compatible = "fsl,mcm-law";
- reg = <0x0 0x1000>;
- fsl,num-laws = <10>;
- };
-
- mcm@1000 {
- compatible = "fsl,mpc8641-mcm", "fsl,mcm";
- reg = <0x1000 0x1000>;
- interrupts = <17 2>;
- interrupt-parent = <&mpic>;
- };
-
- i2c@3000 {
- #address-cells = <1>;
- #size-cells = <0>;
- cell-index = <0>;
- compatible = "fsl-i2c";
- reg = <0x3000 0x100>;
- interrupts = <43 2>;
- interrupt-parent = <&mpic>;
- dfsrr;
- };
-
- i2c@3100 {
- #address-cells = <1>;
- #size-cells = <0>;
- cell-index = <1>;
- compatible = "fsl-i2c";
- reg = <0x3100 0x100>;
- interrupts = <43 2>;
- interrupt-parent = <&mpic>;
- dfsrr;
- };
-
- dma@21300 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "fsl,mpc8641-dma", "fsl,eloplus-dma";
- reg = <0x21300 0x4>;
- ranges = <0x0 0x21100 0x200>;
- cell-index = <0>;
- dma-channel@0 {
- compatible = "fsl,mpc8641-dma-channel",
- "fsl,eloplus-dma-channel";
- reg = <0x0 0x80>;
- cell-index = <0>;
- interrupt-parent = <&mpic>;
- interrupts = <20 2>;
- };
- dma-channel@80 {
- compatible = "fsl,mpc8641-dma-channel",
- "fsl,eloplus-dma-channel";
- reg = <0x80 0x80>;
- cell-index = <1>;
- interrupt-parent = <&mpic>;
- interrupts = <21 2>;
- };
- dma-channel@100 {
- compatible = "fsl,mpc8641-dma-channel",
- "fsl,eloplus-dma-channel";
- reg = <0x100 0x80>;
- cell-index = <2>;
- interrupt-parent = <&mpic>;
- interrupts = <22 2>;
- };
- dma-channel@180 {
- compatible = "fsl,mpc8641-dma-channel",
- "fsl,eloplus-dma-channel";
- reg = <0x180 0x80>;
- cell-index = <3>;
- interrupt-parent = <&mpic>;
- interrupts = <23 2>;
- };
- };
-
- enet0: ethernet@24000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <0>;
- device_type = "network";
- model = "TSEC";
- compatible = "gianfar";
- reg = <0x24000 0x1000>;
- ranges = <0x0 0x24000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <29 2 30 2 34 2>;
- interrupt-parent = <&mpic>;
- tbi-handle = <&tbi0>;
- phy-handle = <&phy0>;
- phy-connection-type = "rgmii-id";
-
- mdio@520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-mdio";
- reg = <0x520 0x20>;
-
- phy0: ethernet-phy@0 {
- interrupt-parent = <&mpic>;
- interrupts = <10 1>;
- reg = <0>;
- };
- phy1: ethernet-phy@1 {
- interrupt-parent = <&mpic>;
- interrupts = <10 1>;
- reg = <1>;
- };
- phy2: ethernet-phy@2 {
- interrupt-parent = <&mpic>;
- interrupts = <10 1>;
- reg = <2>;
- };
- phy3: ethernet-phy@3 {
- interrupt-parent = <&mpic>;
- interrupts = <10 1>;
- reg = <3>;
- };
- tbi0: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
- };
-
- enet1: ethernet@25000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <1>;
- device_type = "network";
- model = "TSEC";
- compatible = "gianfar";
- reg = <0x25000 0x1000>;
- ranges = <0x0 0x25000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <35 2 36 2 40 2>;
- interrupt-parent = <&mpic>;
- tbi-handle = <&tbi1>;
- phy-handle = <&phy1>;
- phy-connection-type = "rgmii-id";
-
- mdio@520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-tbi";
- reg = <0x520 0x20>;
-
- tbi1: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
- };
-
- enet2: ethernet@26000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <2>;
- device_type = "network";
- model = "TSEC";
- compatible = "gianfar";
- reg = <0x26000 0x1000>;
- ranges = <0x0 0x26000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <31 2 32 2 33 2>;
- interrupt-parent = <&mpic>;
- tbi-handle = <&tbi2>;
- phy-handle = <&phy2>;
- phy-connection-type = "rgmii-id";
-
- mdio@520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-tbi";
- reg = <0x520 0x20>;
-
- tbi2: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
- };
-
- enet3: ethernet@27000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <3>;
- device_type = "network";
- model = "TSEC";
- compatible = "gianfar";
- reg = <0x27000 0x1000>;
- ranges = <0x0 0x27000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <37 2 38 2 39 2>;
- interrupt-parent = <&mpic>;
- tbi-handle = <&tbi3>;
- phy-handle = <&phy3>;
- phy-connection-type = "rgmii-id";
-
- mdio@520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-tbi";
- reg = <0x520 0x20>;
-
- tbi3: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
- };
-
- serial0: serial@4500 {
- cell-index = <0>;
- device_type = "serial";
- compatible = "fsl,ns16550", "ns16550";
- reg = <0x4500 0x100>;
- clock-frequency = <0>;
- interrupts = <42 2>;
- interrupt-parent = <&mpic>;
- };
-
- serial1: serial@4600 {
- cell-index = <1>;
- device_type = "serial";
- compatible = "fsl,ns16550", "ns16550";
- reg = <0x4600 0x100>;
- clock-frequency = <0>;
- interrupts = <28 2>;
- interrupt-parent = <&mpic>;
- };
-
- mpic: pic@40000 {
- interrupt-controller;
- #address-cells = <0>;
- #interrupt-cells = <2>;
- reg = <0x40000 0x40000>;
- compatible = "chrp,open-pic";
- device_type = "open-pic";
- };
-
- global-utilities@e0000 {
- compatible = "fsl,mpc8641-guts";
- reg = <0xe0000 0x1000>;
- fsl,has-rstcr;
- };
- };
-
- pci0: pcie@fffe08000 {
- cell-index = <0>;
- compatible = "fsl,mpc8641-pcie";
- device_type = "pci";
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- reg = <0x0f 0xffe08000 0x0 0x1000>;
- bus-range = <0x0 0xff>;
- ranges = <0x02000000 0x0 0xe0000000 0x0c 0x00000000 0x0 0x20000000
- 0x01000000 0x0 0x00000000 0x0f 0xffc00000 0x0 0x00010000>;
- clock-frequency = <33333333>;
- interrupt-parent = <&mpic>;
- interrupts = <24 2>;
- interrupt-map-mask = <0xff00 0 0 7>;
- interrupt-map = <
- /* IDSEL 0x11 func 0 - PCI slot 1 */
- 0x8800 0 0 1 &mpic 2 1
- 0x8800 0 0 2 &mpic 3 1
- 0x8800 0 0 3 &mpic 4 1
- 0x8800 0 0 4 &mpic 1 1
-
- /* IDSEL 0x11 func 1 - PCI slot 1 */
- 0x8900 0 0 1 &mpic 2 1
- 0x8900 0 0 2 &mpic 3 1
- 0x8900 0 0 3 &mpic 4 1
- 0x8900 0 0 4 &mpic 1 1
-
- /* IDSEL 0x11 func 2 - PCI slot 1 */
- 0x8a00 0 0 1 &mpic 2 1
- 0x8a00 0 0 2 &mpic 3 1
- 0x8a00 0 0 3 &mpic 4 1
- 0x8a00 0 0 4 &mpic 1 1
-
- /* IDSEL 0x11 func 3 - PCI slot 1 */
- 0x8b00 0 0 1 &mpic 2 1
- 0x8b00 0 0 2 &mpic 3 1
- 0x8b00 0 0 3 &mpic 4 1
- 0x8b00 0 0 4 &mpic 1 1
-
- /* IDSEL 0x11 func 4 - PCI slot 1 */
- 0x8c00 0 0 1 &mpic 2 1
- 0x8c00 0 0 2 &mpic 3 1
- 0x8c00 0 0 3 &mpic 4 1
- 0x8c00 0 0 4 &mpic 1 1
-
- /* IDSEL 0x11 func 5 - PCI slot 1 */
- 0x8d00 0 0 1 &mpic 2 1
- 0x8d00 0 0 2 &mpic 3 1
- 0x8d00 0 0 3 &mpic 4 1
- 0x8d00 0 0 4 &mpic 1 1
-
- /* IDSEL 0x11 func 6 - PCI slot 1 */
- 0x8e00 0 0 1 &mpic 2 1
- 0x8e00 0 0 2 &mpic 3 1
- 0x8e00 0 0 3 &mpic 4 1
- 0x8e00 0 0 4 &mpic 1 1
-
- /* IDSEL 0x11 func 7 - PCI slot 1 */
- 0x8f00 0 0 1 &mpic 2 1
- 0x8f00 0 0 2 &mpic 3 1
- 0x8f00 0 0 3 &mpic 4 1
- 0x8f00 0 0 4 &mpic 1 1
-
- /* IDSEL 0x12 func 0 - PCI slot 2 */
- 0x9000 0 0 1 &mpic 3 1
- 0x9000 0 0 2 &mpic 4 1
- 0x9000 0 0 3 &mpic 1 1
- 0x9000 0 0 4 &mpic 2 1
-
- /* IDSEL 0x12 func 1 - PCI slot 2 */
- 0x9100 0 0 1 &mpic 3 1
- 0x9100 0 0 2 &mpic 4 1
- 0x9100 0 0 3 &mpic 1 1
- 0x9100 0 0 4 &mpic 2 1
-
- /* IDSEL 0x12 func 2 - PCI slot 2 */
- 0x9200 0 0 1 &mpic 3 1
- 0x9200 0 0 2 &mpic 4 1
- 0x9200 0 0 3 &mpic 1 1
- 0x9200 0 0 4 &mpic 2 1
-
- /* IDSEL 0x12 func 3 - PCI slot 2 */
- 0x9300 0 0 1 &mpic 3 1
- 0x9300 0 0 2 &mpic 4 1
- 0x9300 0 0 3 &mpic 1 1
- 0x9300 0 0 4 &mpic 2 1
-
- /* IDSEL 0x12 func 4 - PCI slot 2 */
- 0x9400 0 0 1 &mpic 3 1
- 0x9400 0 0 2 &mpic 4 1
- 0x9400 0 0 3 &mpic 1 1
- 0x9400 0 0 4 &mpic 2 1
-
- /* IDSEL 0x12 func 5 - PCI slot 2 */
- 0x9500 0 0 1 &mpic 3 1
- 0x9500 0 0 2 &mpic 4 1
- 0x9500 0 0 3 &mpic 1 1
- 0x9500 0 0 4 &mpic 2 1
-
- /* IDSEL 0x12 func 6 - PCI slot 2 */
- 0x9600 0 0 1 &mpic 3 1
- 0x9600 0 0 2 &mpic 4 1
- 0x9600 0 0 3 &mpic 1 1
- 0x9600 0 0 4 &mpic 2 1
-
- /* IDSEL 0x12 func 7 - PCI slot 2 */
- 0x9700 0 0 1 &mpic 3 1
- 0x9700 0 0 2 &mpic 4 1
- 0x9700 0 0 3 &mpic 1 1
- 0x9700 0 0 4 &mpic 2 1
-
- // IDSEL 0x1c USB
- 0xe000 0 0 1 &i8259 12 2
- 0xe100 0 0 2 &i8259 9 2
- 0xe200 0 0 3 &i8259 10 2
- 0xe300 0 0 4 &i8259 11 2
-
- // IDSEL 0x1d Audio
- 0xe800 0 0 1 &i8259 6 2
-
- // IDSEL 0x1e Legacy
- 0xf000 0 0 1 &i8259 7 2
- 0xf100 0 0 1 &i8259 7 2
-
- // IDSEL 0x1f IDE/SATA
- 0xf800 0 0 1 &i8259 14 2
- 0xf900 0 0 1 &i8259 5 2
- >;
-
- pcie@0 {
- reg = <0 0 0 0 0>;
- #size-cells = <2>;
- #address-cells = <3>;
- device_type = "pci";
- ranges = <0x02000000 0x0 0xe0000000
- 0x02000000 0x0 0xe0000000
- 0x0 0x20000000
-
- 0x01000000 0x0 0x00000000
- 0x01000000 0x0 0x00000000
- 0x0 0x00010000>;
- uli1575@0 {
- reg = <0 0 0 0 0>;
- #size-cells = <2>;
- #address-cells = <3>;
- ranges = <0x02000000 0x0 0xe0000000
- 0x02000000 0x0 0xe0000000
- 0x0 0x20000000
- 0x01000000 0x0 0x00000000
- 0x01000000 0x0 0x00000000
- 0x0 0x00010000>;
- isa@1e {
- device_type = "isa";
- #interrupt-cells = <2>;
- #size-cells = <1>;
- #address-cells = <2>;
- reg = <0xf000 0 0 0 0>;
- ranges = <1 0 0x01000000 0 0
- 0x00001000>;
- interrupt-parent = <&i8259>;
-
- i8259: interrupt-controller@20 {
- reg = <1 0x20 2
- 1 0xa0 2
- 1 0x4d0 2>;
- interrupt-controller;
- device_type = "interrupt-controller";
- #address-cells = <0>;
- #interrupt-cells = <2>;
- compatible = "chrp,iic";
- interrupts = <9 2>;
- interrupt-parent = <&mpic>;
- };
-
- i8042@60 {
- #size-cells = <0>;
- #address-cells = <1>;
- reg = <1 0x60 1 1 0x64 1>;
- interrupts = <1 3 12 3>;
- interrupt-parent =
- <&i8259>;
-
- keyboard@0 {
- reg = <0>;
- compatible = "pnpPNP,303";
- };
-
- mouse@1 {
- reg = <1>;
- compatible = "pnpPNP,f03";
- };
- };
-
- rtc@70 {
- compatible =
- "pnpPNP,b00";
- reg = <1 0x70 2>;
- };
-
- gpio@400 {
- reg = <1 0x400 0x80>;
- };
- };
- };
- };
-
- };
-
- pci1: pcie@fffe09000 {
- cell-index = <1>;
- compatible = "fsl,mpc8641-pcie";
- device_type = "pci";
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- reg = <0x0f 0xffe09000 0x0 0x1000>;
- bus-range = <0x0 0xff>;
- ranges = <0x02000000 0x0 0xe0000000 0x0c 0x20000000 0x0 0x20000000
- 0x01000000 0x0 0x00000000 0x0f 0xffc10000 0x0 0x00010000>;
- clock-frequency = <33333333>;
- interrupt-parent = <&mpic>;
- interrupts = <25 2>;
- interrupt-map-mask = <0xf800 0 0 7>;
- interrupt-map = <
- /* IDSEL 0x0 */
- 0x0000 0 0 1 &mpic 4 1
- 0x0000 0 0 2 &mpic 5 1
- 0x0000 0 0 3 &mpic 6 1
- 0x0000 0 0 4 &mpic 7 1
- >;
- pcie@0 {
- reg = <0 0 0 0 0>;
- #size-cells = <2>;
- #address-cells = <3>;
- device_type = "pci";
- ranges = <0x02000000 0x0 0xe0000000
- 0x02000000 0x0 0xe0000000
- 0x0 0x20000000
-
- 0x01000000 0x0 0x00000000
- 0x01000000 0x0 0x00000000
- 0x0 0x00010000>;
- };
- };
-};
diff --git a/arch/powerpc/boot/dts/mpc866ads.dts b/arch/powerpc/boot/dts/mpc866ads.dts
index 34c1f48b1a09..ff60d678c6a2 100644
--- a/arch/powerpc/boot/dts/mpc866ads.dts
+++ b/arch/powerpc/boot/dts/mpc866ads.dts
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* MPC866 ADS Device Tree Source
*
* Copyright 2006 MontaVista Software, Inc.
* Copyright 2008 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/dts-v1/;
@@ -185,6 +181,6 @@
};
chosen {
- linux,stdout-path = "/soc/cpm/serial@a80";
+ stdout-path = "/soc/cpm/serial@a80";
};
};
diff --git a/arch/powerpc/boot/dts/mpc885ads.dts b/arch/powerpc/boot/dts/mpc885ads.dts
index 4e93bd961e0f..be58e7f29c9b 100644
--- a/arch/powerpc/boot/dts/mpc885ads.dts
+++ b/arch/powerpc/boot/dts/mpc885ads.dts
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* MPC885 ADS Device Tree Source
*
* Copyright 2006 MontaVista Software, Inc.
* Copyright 2007,2008 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/dts-v1/;
@@ -72,7 +68,7 @@
#address-cells = <1>;
#size-cells = <1>;
device_type = "soc";
- ranges = <0x0 0xff000000 0x4000>;
+ ranges = <0x0 0xff000000 0x28000>;
bus-frequency = <0>;
// Temporary -- will go away once kernel uses ranges for get_immrbase().
@@ -224,9 +220,20 @@
#size-cells = <0>;
};
};
+
+ crypto@20000 {
+ compatible = "fsl,sec1.2", "fsl,sec1.0";
+ reg = <0x20000 0x8000>;
+ interrupts = <1 1>;
+ interrupt-parent = <&PIC>;
+ fsl,num-channels = <1>;
+ fsl,channel-fifo-len = <24>;
+ fsl,exec-units-mask = <0x4c>;
+ fsl,descriptor-types-mask = <0x05000154>;
+ };
};
chosen {
- linux,stdout-path = "/soc/cpm/serial@a80";
+ stdout-path = "/soc/cpm/serial@a80";
};
};
diff --git a/arch/powerpc/boot/dts/mucmc52.dts b/arch/powerpc/boot/dts/mucmc52.dts
index d3a792bb5c1a..e88a7bd4034d 100644
--- a/arch/powerpc/boot/dts/mucmc52.dts
+++ b/arch/powerpc/boot/dts/mucmc52.dts
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Manroland mucmc52 board Device Tree Source
*
* Copyright (C) 2009 DENX Software Engineering GmbH
* Heiko Schocher <hs@denx.de>
* Copyright 2006-2007 Secret Lab Technologies Ltd.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/include/ "mpc5200b.dtsi"
@@ -110,9 +106,9 @@
0x8000 0 0 3 &mpc5200_pic 0 2 3
0x8000 0 0 4 &mpc5200_pic 0 1 3
>;
- ranges = <0x42000000 0 0x60000000 0x60000000 0 0x10000000
- 0x02000000 0 0x90000000 0x90000000 0 0x10000000
- 0x01000000 0 0x00000000 0xa0000000 0 0x01000000>;
+ ranges = <0x42000000 0 0x60000000 0x60000000 0 0x10000000>,
+ <0x02000000 0 0x90000000 0x90000000 0 0x10000000>,
+ <0x01000000 0 0x00000000 0xa0000000 0 0x01000000>;
};
localbus {
diff --git a/arch/powerpc/boot/dts/mvme5100.dts b/arch/powerpc/boot/dts/mvme5100.dts
index 1ecb341a232a..a7eb6d25903d 100644
--- a/arch/powerpc/boot/dts/mvme5100.dts
+++ b/arch/powerpc/boot/dts/mvme5100.dts
@@ -179,7 +179,7 @@
};
chosen {
- linux,stdout-path = &serial0;
+ stdout-path = &serial0;
};
};
diff --git a/arch/powerpc/boot/dts/o2d.dts b/arch/powerpc/boot/dts/o2d.dts
index 9f6dd4d889b3..e0a8d3034417 100644
--- a/arch/powerpc/boot/dts/o2d.dts
+++ b/arch/powerpc/boot/dts/o2d.dts
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* O2D Device Tree Source
*
* Copyright (C) 2012 DENX Software Engineering
* Anatolij Gustschin <agust@denx.de>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/include/ "o2d.dtsi"
@@ -16,7 +12,7 @@
model = "ifm,o2d";
compatible = "ifm,o2d";
- memory {
+ memory@0 {
reg = <0x00000000 0x08000000>; // 128MB
};
diff --git a/arch/powerpc/boot/dts/o2d.dtsi b/arch/powerpc/boot/dts/o2d.dtsi
index cf073e693f24..7e52509fa506 100644
--- a/arch/powerpc/boot/dts/o2d.dtsi
+++ b/arch/powerpc/boot/dts/o2d.dtsi
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* O2D base Device Tree Source
*
* Copyright (C) 2012 DENX Software Engineering
* Anatolij Gustschin <agust@denx.de>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/include/ "mpc5200b.dtsi"
@@ -23,7 +19,7 @@
model = "ifm,o2d";
compatible = "ifm,o2d";
- memory {
+ memory@0 {
reg = <0x00000000 0x04000000>; // 64MB
};
@@ -38,12 +34,6 @@
#address-cells = <1>;
#size-cells = <0>;
cell-index = <0>;
-
- spidev@0 {
- compatible = "spidev";
- spi-max-frequency = <250000>;
- reg = <0>;
- };
};
psc@2200 { // PSC2
diff --git a/arch/powerpc/boot/dts/o2d300.dts b/arch/powerpc/boot/dts/o2d300.dts
index 29affe0f0da3..55a25b700bed 100644
--- a/arch/powerpc/boot/dts/o2d300.dts
+++ b/arch/powerpc/boot/dts/o2d300.dts
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* O2D300 Device Tree Source
*
* Copyright (C) 2012 DENX Software Engineering
* Anatolij Gustschin <agust@denx.de>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/include/ "o2d.dtsi"
diff --git a/arch/powerpc/boot/dts/o2dnt2.dts b/arch/powerpc/boot/dts/o2dnt2.dts
index a0f5b97a4f06..c2eedbd1f5fc 100644
--- a/arch/powerpc/boot/dts/o2dnt2.dts
+++ b/arch/powerpc/boot/dts/o2dnt2.dts
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* O2DNT2 Device Tree Source
*
* Copyright (C) 2012 DENX Software Engineering
* Anatolij Gustschin <agust@denx.de>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/include/ "o2d.dtsi"
@@ -16,7 +12,7 @@
model = "ifm,o2dnt2";
compatible = "ifm,o2d";
- memory {
+ memory@0 {
reg = <0x00000000 0x08000000>; // 128MB
};
diff --git a/arch/powerpc/boot/dts/o2i.dts b/arch/powerpc/boot/dts/o2i.dts
index e3cc99d1360b..3fb2e0ad7387 100644
--- a/arch/powerpc/boot/dts/o2i.dts
+++ b/arch/powerpc/boot/dts/o2i.dts
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* O2I Device Tree Source
*
* Copyright (C) 2012 DENX Software Engineering
* Anatolij Gustschin <agust@denx.de>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/include/ "o2d.dtsi"
diff --git a/arch/powerpc/boot/dts/o2mnt.dts b/arch/powerpc/boot/dts/o2mnt.dts
index d91859a9e940..c5e0ba6e8f2b 100644
--- a/arch/powerpc/boot/dts/o2mnt.dts
+++ b/arch/powerpc/boot/dts/o2mnt.dts
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* O2MNT Device Tree Source
*
* Copyright (C) 2012 DENX Software Engineering
* Anatolij Gustschin <agust@denx.de>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/include/ "o2d.dtsi"
diff --git a/arch/powerpc/boot/dts/o3dnt.dts b/arch/powerpc/boot/dts/o3dnt.dts
index acce49326491..e4c1bdd41271 100644
--- a/arch/powerpc/boot/dts/o3dnt.dts
+++ b/arch/powerpc/boot/dts/o3dnt.dts
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* O3DNT Device Tree Source
*
* Copyright (C) 2012 DENX Software Engineering
* Anatolij Gustschin <agust@denx.de>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/include/ "o2d.dtsi"
@@ -16,7 +12,7 @@
model = "ifm,o3dnt";
compatible = "ifm,o2d";
- memory {
+ memory@0 {
reg = <0x00000000 0x04000000>; // 64MB
};
diff --git a/arch/powerpc/boot/dts/obs600.dts b/arch/powerpc/boot/dts/obs600.dts
deleted file mode 100644
index 18e7d79ee4c3..000000000000
--- a/arch/powerpc/boot/dts/obs600.dts
+++ /dev/null
@@ -1,314 +0,0 @@
-/*
- * Device Tree Source for PlatHome OpenBlockS 600 (405EX)
- *
- * Copyright 2011 Ben Herrenschmidt, IBM Corp.
- *
- * Based on Kilauea by:
- *
- * Copyright 2007-2009 DENX Software Engineering, Stefan Roese <sr@denx.de>
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without
- * any warranty of any kind, whether express or implied.
- */
-
-/dts-v1/;
-
-/ {
- #address-cells = <1>;
- #size-cells = <1>;
- model = "PlatHome,OpenBlockS 600";
- compatible = "plathome,obs600";
- dcr-parent = <&{/cpus/cpu@0}>;
-
- aliases {
- ethernet0 = &EMAC0;
- ethernet1 = &EMAC1;
- serial0 = &UART0;
- serial1 = &UART1;
- };
-
- cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- cpu@0 {
- device_type = "cpu";
- model = "PowerPC,405EX";
- reg = <0x00000000>;
- clock-frequency = <0>; /* Filled in by U-Boot */
- timebase-frequency = <0>; /* Filled in by U-Boot */
- i-cache-line-size = <32>;
- d-cache-line-size = <32>;
- i-cache-size = <16384>; /* 16 kB */
- d-cache-size = <16384>; /* 16 kB */
- dcr-controller;
- dcr-access-method = "native";
- };
- };
-
- memory {
- device_type = "memory";
- reg = <0x00000000 0x00000000>; /* Filled in by U-Boot */
- };
-
- UIC0: interrupt-controller {
- compatible = "ibm,uic-405ex", "ibm,uic";
- interrupt-controller;
- cell-index = <0>;
- dcr-reg = <0x0c0 0x009>;
- #address-cells = <0>;
- #size-cells = <0>;
- #interrupt-cells = <2>;
- };
-
- UIC1: interrupt-controller1 {
- compatible = "ibm,uic-405ex","ibm,uic";
- interrupt-controller;
- cell-index = <1>;
- dcr-reg = <0x0d0 0x009>;
- #address-cells = <0>;
- #size-cells = <0>;
- #interrupt-cells = <2>;
- interrupts = <0x1e 0x4 0x1f 0x4>; /* cascade */
- interrupt-parent = <&UIC0>;
- };
-
- UIC2: interrupt-controller2 {
- compatible = "ibm,uic-405ex","ibm,uic";
- interrupt-controller;
- cell-index = <2>;
- dcr-reg = <0x0e0 0x009>;
- #address-cells = <0>;
- #size-cells = <0>;
- #interrupt-cells = <2>;
- interrupts = <0x1c 0x4 0x1d 0x4>; /* cascade */
- interrupt-parent = <&UIC0>;
- };
-
- CPM0: cpm {
- compatible = "ibm,cpm";
- dcr-access-method = "native";
- dcr-reg = <0x0b0 0x003>;
- unused-units = <0x00000000>;
- idle-doze = <0x02000000>;
- standby = <0xe3e74800>;
- };
-
- plb {
- compatible = "ibm,plb-405ex", "ibm,plb4";
- #address-cells = <1>;
- #size-cells = <1>;
- ranges;
- clock-frequency = <0>; /* Filled in by U-Boot */
-
- SDRAM0: memory-controller {
- compatible = "ibm,sdram-405ex", "ibm,sdram-4xx-ddr2";
- dcr-reg = <0x010 0x002>;
- interrupt-parent = <&UIC2>;
- interrupts = <0x5 0x4 /* ECC DED Error */
- 0x6 0x4>; /* ECC SEC Error */
- };
-
- CRYPTO: crypto@ef700000 {
- compatible = "amcc,ppc405ex-crypto", "amcc,ppc4xx-crypto";
- reg = <0xef700000 0x80400>;
- interrupt-parent = <&UIC0>;
- interrupts = <0x17 0x2>;
- };
-
- MAL0: mcmal {
- compatible = "ibm,mcmal-405ex", "ibm,mcmal2";
- dcr-reg = <0x180 0x062>;
- num-tx-chans = <2>;
- num-rx-chans = <2>;
- interrupt-parent = <&MAL0>;
- interrupts = <0x0 0x1 0x2 0x3 0x4>;
- #interrupt-cells = <1>;
- #address-cells = <0>;
- #size-cells = <0>;
- interrupt-map = </*TXEOB*/ 0x0 &UIC0 0xa 0x4
- /*RXEOB*/ 0x1 &UIC0 0xb 0x4
- /*SERR*/ 0x2 &UIC1 0x0 0x4
- /*TXDE*/ 0x3 &UIC1 0x1 0x4
- /*RXDE*/ 0x4 &UIC1 0x2 0x4>;
- interrupt-map-mask = <0xffffffff>;
- };
-
- POB0: opb {
- compatible = "ibm,opb-405ex", "ibm,opb";
- #address-cells = <1>;
- #size-cells = <1>;
- ranges = <0x80000000 0x80000000 0x10000000
- 0xef600000 0xef600000 0x00a00000
- 0xf0000000 0xf0000000 0x10000000>;
- dcr-reg = <0x0a0 0x005>;
- clock-frequency = <0>; /* Filled in by U-Boot */
-
- EBC0: ebc {
- compatible = "ibm,ebc-405ex", "ibm,ebc";
- dcr-reg = <0x012 0x002>;
- #address-cells = <2>;
- #size-cells = <1>;
- clock-frequency = <0>; /* Filled in by U-Boot */
- /* ranges property is supplied by U-Boot */
- interrupts = <0x5 0x1>;
- interrupt-parent = <&UIC1>;
-
- nor_flash@0,0 {
- compatible = "amd,s29gl512n", "cfi-flash";
- bank-width = <2>;
- reg = <0x00000000 0x00000000 0x08000000>;
- #address-cells = <1>;
- #size-cells = <1>;
- partition@0 {
- label = "kernel + initrd";
- reg = <0x00000000 0x03de0000>;
- };
- partition@3de0000 {
- label = "user config area";
- reg = <0x03de0000 0x00080000>;
- };
- partition@3e60000 {
- label = "user program area";
- reg = <0x03e60000 0x04000000>;
- };
- partition@7e60000 {
- label = "flat device tree";
- reg = <0x07e60000 0x00080000>;
- };
- partition@7ee0000 {
- label = "test program";
- reg = <0x07ee0000 0x00080000>;
- };
- partition@7f60000 {
- label = "u-boot env";
- reg = <0x07f60000 0x00040000>;
- };
- partition@7fa0000 {
- label = "u-boot";
- reg = <0x07fa0000 0x00060000>;
- };
- };
- };
-
- UART0: serial@ef600200 {
- device_type = "serial";
- compatible = "ns16550";
- reg = <0xef600200 0x00000008>;
- virtual-reg = <0xef600200>;
- clock-frequency = <0>; /* Filled in by U-Boot */
- current-speed = <0>;
- interrupt-parent = <&UIC0>;
- interrupts = <0x1a 0x4>;
- };
-
- UART1: serial@ef600300 {
- device_type = "serial";
- compatible = "ns16550";
- reg = <0xef600300 0x00000008>;
- virtual-reg = <0xef600300>;
- clock-frequency = <0>; /* Filled in by U-Boot */
- current-speed = <0>;
- interrupt-parent = <&UIC0>;
- interrupts = <0x1 0x4>;
- };
-
- IIC0: i2c@ef600400 {
- compatible = "ibm,iic-405ex", "ibm,iic";
- reg = <0xef600400 0x00000014>;
- interrupt-parent = <&UIC0>;
- interrupts = <0x2 0x4>;
- #address-cells = <1>;
- #size-cells = <0>;
-
- rtc@68 {
- compatible = "dallas,ds1340";
- reg = <0x68>;
- };
- };
-
- IIC1: i2c@ef600500 {
- compatible = "ibm,iic-405ex", "ibm,iic";
- reg = <0xef600500 0x00000014>;
- interrupt-parent = <&UIC0>;
- interrupts = <0x7 0x4>;
- };
-
- RGMII0: emac-rgmii@ef600b00 {
- compatible = "ibm,rgmii-405ex", "ibm,rgmii";
- reg = <0xef600b00 0x00000104>;
- has-mdio;
- };
-
- EMAC0: ethernet@ef600900 {
- linux,network-index = <0x0>;
- device_type = "network";
- compatible = "ibm,emac-405ex", "ibm,emac4sync";
- interrupt-parent = <&EMAC0>;
- interrupts = <0x0 0x1>;
- #interrupt-cells = <1>;
- #address-cells = <0>;
- #size-cells = <0>;
- interrupt-map = </*Status*/ 0x0 &UIC0 0x18 0x4
- /*Wake*/ 0x1 &UIC1 0x1d 0x4>;
- reg = <0xef600900 0x000000c4>;
- local-mac-address = [000000000000]; /* Filled in by U-Boot */
- mal-device = <&MAL0>;
- mal-tx-channel = <0>;
- mal-rx-channel = <0>;
- cell-index = <0>;
- max-frame-size = <9000>;
- rx-fifo-size = <4096>;
- tx-fifo-size = <2048>;
- rx-fifo-size-gige = <16384>;
- tx-fifo-size-gige = <16384>;
- phy-mode = "rgmii";
- phy-map = <0x00000000>;
- rgmii-device = <&RGMII0>;
- rgmii-channel = <0>;
- has-inverted-stacr-oc;
- has-new-stacr-staopc;
- };
-
- EMAC1: ethernet@ef600a00 {
- linux,network-index = <0x1>;
- device_type = "network";
- compatible = "ibm,emac-405ex", "ibm,emac4sync";
- interrupt-parent = <&EMAC1>;
- interrupts = <0x0 0x1>;
- #interrupt-cells = <1>;
- #address-cells = <0>;
- #size-cells = <0>;
- interrupt-map = </*Status*/ 0x0 &UIC0 0x19 0x4
- /*Wake*/ 0x1 &UIC1 0x1f 0x4>;
- reg = <0xef600a00 0x000000c4>;
- local-mac-address = [000000000000]; /* Filled in by U-Boot */
- mal-device = <&MAL0>;
- mal-tx-channel = <1>;
- mal-rx-channel = <1>;
- cell-index = <1>;
- max-frame-size = <9000>;
- rx-fifo-size = <4096>;
- tx-fifo-size = <2048>;
- rx-fifo-size-gige = <16384>;
- tx-fifo-size-gige = <16384>;
- phy-mode = "rgmii";
- phy-map = <0x00000000>;
- rgmii-device = <&RGMII0>;
- rgmii-channel = <1>;
- has-inverted-stacr-oc;
- has-new-stacr-staopc;
- };
-
- GPIO: gpio@ef600800 {
- device_type = "gpio";
- compatible = "ibm,gpio-405ex", "ibm,ppc4xx-gpio";
- reg = <0xef600800 0x50>;
- };
- };
- };
- chosen {
- linux,stdout-path = "/plb/opb/serial@ef600200";
- };
-};
diff --git a/arch/powerpc/boot/dts/p1010rdb-pa.dts b/arch/powerpc/boot/dts/p1010rdb-pa.dts
deleted file mode 100644
index 767d4c032857..000000000000
--- a/arch/powerpc/boot/dts/p1010rdb-pa.dts
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * P1010 RDB Device Tree Source
- *
- * Copyright 2011 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-/include/ "fsl/p1010si-pre.dtsi"
-
-/ {
- model = "fsl,P1010RDB";
- compatible = "fsl,P1010RDB";
-
- /include/ "p1010rdb_32b.dtsi"
-};
-
-/include/ "p1010rdb.dtsi"
-/include/ "p1010rdb-pa.dtsi"
-/include/ "fsl/p1010si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/p1010rdb-pb.dts b/arch/powerpc/boot/dts/p1010rdb-pb.dts
deleted file mode 100644
index 6eeb7d3185be..000000000000
--- a/arch/powerpc/boot/dts/p1010rdb-pb.dts
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * P1010 RDB Device Tree Source
- *
- * Copyright 2011 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-/include/ "fsl/p1010si-pre.dtsi"
-
-/ {
- model = "fsl,P1010RDB-PB";
- compatible = "fsl,P1010RDB-PB";
-
- /include/ "p1010rdb_32b.dtsi"
-};
-
-/include/ "p1010rdb.dtsi"
-
-&phy0 {
- interrupts = <0 1 0 0>;
-};
-
-&phy1 {
- interrupts = <2 1 0 0>;
-};
-
-&phy2 {
- interrupts = <1 1 0 0>;
-};
-
-/include/ "fsl/p1010si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/p5040ds.dts b/arch/powerpc/boot/dts/p5040ds.dts
deleted file mode 100644
index 860b5ccf76c0..000000000000
--- a/arch/powerpc/boot/dts/p5040ds.dts
+++ /dev/null
@@ -1,207 +0,0 @@
-/*
- * P5040DS Device Tree Source
- *
- * Copyright 2012 Freescale Semiconductor Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Freescale Semiconductor nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- *
- * ALTERNATIVELY, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") as published by the Free Software
- * Foundation, either version 2 of that License or (at your option) any
- * later version.
- *
- * This software is provided by Freescale Semiconductor "as is" and any
- * express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are
- * disclaimed. In no event shall Freescale Semiconductor be liable for any
- * direct, indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused and
- * on any theory of liability, whether in contract, strict liability, or tort
- * (including negligence or otherwise) arising in any way out of the use of this
- * software, even if advised of the possibility of such damage.
- */
-
-/include/ "fsl/p5040si-pre.dtsi"
-
-/ {
- model = "fsl,P5040DS";
- compatible = "fsl,P5040DS";
- #address-cells = <2>;
- #size-cells = <2>;
- interrupt-parent = <&mpic>;
-
- memory {
- device_type = "memory";
- };
-
- dcsr: dcsr@f00000000 {
- ranges = <0x00000000 0xf 0x00000000 0x01008000>;
- };
-
- soc: soc@ffe000000 {
- ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
- reg = <0xf 0xfe000000 0 0x00001000>;
- spi@110000 {
- flash@0 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "spansion,s25sl12801";
- reg = <0>;
- spi-max-frequency = <40000000>; /* input clock */
- partition@u-boot {
- label = "u-boot";
- reg = <0x00000000 0x00100000>;
- };
- partition@kernel {
- label = "kernel";
- reg = <0x00100000 0x00500000>;
- };
- partition@dtb {
- label = "dtb";
- reg = <0x00600000 0x00100000>;
- };
- partition@fs {
- label = "file system";
- reg = <0x00700000 0x00900000>;
- };
- };
- };
-
- i2c@118100 {
- eeprom@51 {
- compatible = "at24,24c256";
- reg = <0x51>;
- };
- eeprom@52 {
- compatible = "at24,24c256";
- reg = <0x52>;
- };
- };
-
- i2c@119100 {
- rtc@68 {
- compatible = "dallas,ds3232";
- reg = <0x68>;
- interrupts = <0x1 0x1 0 0>;
- };
- adt7461@4c {
- compatible = "adi,adt7461";
- reg = <0x4c>;
- };
- };
- };
-
- lbc: localbus@ffe124000 {
- reg = <0xf 0xfe124000 0 0x1000>;
- ranges = <0 0 0xf 0xe8000000 0x08000000
- 2 0 0xf 0xffa00000 0x00040000
- 3 0 0xf 0xffdf0000 0x00008000>;
-
- flash@0,0 {
- compatible = "cfi-flash";
- reg = <0 0 0x08000000>;
- bank-width = <2>;
- device-width = <2>;
- };
-
- nand@2,0 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "fsl,elbc-fcm-nand";
- reg = <0x2 0x0 0x40000>;
-
- partition@0 {
- label = "NAND U-Boot Image";
- reg = <0x0 0x02000000>;
- };
-
- partition@2000000 {
- label = "NAND Root File System";
- reg = <0x02000000 0x10000000>;
- };
-
- partition@12000000 {
- label = "NAND Compressed RFS Image";
- reg = <0x12000000 0x08000000>;
- };
-
- partition@1a000000 {
- label = "NAND Linux Kernel Image";
- reg = <0x1a000000 0x04000000>;
- };
-
- partition@1e000000 {
- label = "NAND DTB Image";
- reg = <0x1e000000 0x01000000>;
- };
-
- partition@1f000000 {
- label = "NAND Writable User area";
- reg = <0x1f000000 0x01000000>;
- };
- };
-
- board-control@3,0 {
- compatible = "fsl,p5040ds-fpga", "fsl,fpga-ngpixis";
- reg = <3 0 0x40>;
- };
- };
-
- pci0: pcie@ffe200000 {
- reg = <0xf 0xfe200000 0 0x1000>;
- ranges = <0x02000000 0 0xe0000000 0xc 0x00000000 0x0 0x20000000
- 0x01000000 0 0x00000000 0xf 0xf8000000 0x0 0x00010000>;
- pcie@0 {
- ranges = <0x02000000 0 0xe0000000
- 0x02000000 0 0xe0000000
- 0 0x20000000
-
- 0x01000000 0 0x00000000
- 0x01000000 0 0x00000000
- 0 0x00010000>;
- };
- };
-
- pci1: pcie@ffe201000 {
- reg = <0xf 0xfe201000 0 0x1000>;
- ranges = <0x02000000 0x0 0xe0000000 0xc 0x20000000 0x0 0x20000000
- 0x01000000 0x0 0x00000000 0xf 0xf8010000 0x0 0x00010000>;
- pcie@0 {
- ranges = <0x02000000 0 0xe0000000
- 0x02000000 0 0xe0000000
- 0 0x20000000
-
- 0x01000000 0 0x00000000
- 0x01000000 0 0x00000000
- 0 0x00010000>;
- };
- };
-
- pci2: pcie@ffe202000 {
- reg = <0xf 0xfe202000 0 0x1000>;
- ranges = <0x02000000 0 0xe0000000 0xc 0x40000000 0 0x20000000
- 0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>;
- pcie@0 {
- ranges = <0x02000000 0 0xe0000000
- 0x02000000 0 0xe0000000
- 0 0x20000000
-
- 0x01000000 0 0x00000000
- 0x01000000 0 0x00000000
- 0 0x00010000>;
- };
- };
-};
-
-/include/ "fsl/p5040si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/pcm030.dts b/arch/powerpc/boot/dts/pcm030.dts
index 192e66af0001..5cee474dcc4c 100644
--- a/arch/powerpc/boot/dts/pcm030.dts
+++ b/arch/powerpc/boot/dts/pcm030.dts
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* phyCORE-MPC5200B-tiny (pcm030) board Device Tree Source
*
@@ -5,11 +6,6 @@
* Sascha Hauer <s.hauer@pengutronix.de>
* Copyright 2007 Pengutronix
* Juergen Beisert <j.beisert@pengutronix.de>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/include/ "mpc5200b.dtsi"
@@ -71,7 +67,7 @@
reg = <0x51>;
};
eeprom@52 {
- compatible = "catalyst,24c32";
+ compatible = "catalyst,24c32", "atmel,24c32";
reg = <0x52>;
pagesize = <32>;
};
@@ -94,9 +90,9 @@
0xc800 0 0 2 &mpc5200_pic 1 2 3
0xc800 0 0 3 &mpc5200_pic 1 3 3
0xc800 0 0 4 &mpc5200_pic 0 0 3>;
- ranges = <0x42000000 0 0x80000000 0x80000000 0 0x20000000
- 0x02000000 0 0xa0000000 0xa0000000 0 0x10000000
- 0x01000000 0 0x00000000 0xb0000000 0 0x01000000>;
+ ranges = <0x42000000 0 0x80000000 0x80000000 0 0x20000000>,
+ <0x02000000 0 0xa0000000 0xa0000000 0 0x10000000>,
+ <0x01000000 0 0x00000000 0xb0000000 0 0x01000000>;
};
localbus {
diff --git a/arch/powerpc/boot/dts/pcm032.dts b/arch/powerpc/boot/dts/pcm032.dts
index 96b139bf50e9..d00f13b62510 100644
--- a/arch/powerpc/boot/dts/pcm032.dts
+++ b/arch/powerpc/boot/dts/pcm032.dts
@@ -1,15 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* phyCORE-MPC5200B-IO (pcm032) board Device Tree Source
*
* Copyright (C) 2006-2009 Pengutronix
- * Sascha Hauer <s.hauer@pengutronix.de>
- * Juergen Beisert <j.beisert@pengutronix.de>
- * Wolfram Sang <w.sang@pengutronix.de>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
+ * Sascha Hauer, Juergen Beisert, Wolfram Sang <kernel@pengutronix.de>
*/
/include/ "mpc5200b.dtsi"
@@ -26,7 +20,7 @@
model = "phytec,pcm032";
compatible = "phytec,pcm032";
- memory {
+ memory@0 {
reg = <0x00000000 0x08000000>; // 128MB
};
@@ -75,7 +69,7 @@
reg = <0x51>;
};
eeprom@52 {
- compatible = "catalyst,24c32";
+ compatible = "catalyst,24c32", "atmel,24c32";
reg = <0x52>;
pagesize = <32>;
};
@@ -93,9 +87,9 @@
0xc800 0 0 2 &mpc5200_pic 1 2 3
0xc800 0 0 3 &mpc5200_pic 1 3 3
0xc800 0 0 4 &mpc5200_pic 0 0 3>;
- ranges = <0x42000000 0 0x80000000 0x80000000 0 0x20000000
- 0x02000000 0 0xa0000000 0xa0000000 0 0x10000000
- 0x01000000 0 0x00000000 0xb0000000 0 0x01000000>;
+ ranges = <0x42000000 0 0x80000000 0x80000000 0 0x20000000>,
+ <0x02000000 0 0xa0000000 0xa0000000 0 0x10000000>,
+ <0x01000000 0 0x00000000 0xb0000000 0 0x01000000>;
};
localbus {
diff --git a/arch/powerpc/boot/dts/pdm360ng.dts b/arch/powerpc/boot/dts/pdm360ng.dts
index 871c16d1ad5e..67c3b9db75d7 100644
--- a/arch/powerpc/boot/dts/pdm360ng.dts
+++ b/arch/powerpc/boot/dts/pdm360ng.dts
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Device Tree Source for IFM PDM360NG.
*
@@ -6,14 +7,9 @@
*
* Based on MPC5121E ADS dts.
* Copyright 2008 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
-#include <mpc5121.dtsi>
+#include "mpc5121.dtsi"
/ {
model = "pdm360ng";
@@ -98,12 +94,12 @@
fsl,preserve-clocking;
eeprom@50 {
- compatible = "at,24c01";
+ compatible = "atmel,24c01";
reg = <0x50>;
};
rtc@68 {
- compatible = "stm,m41t00";
+ compatible = "st,m41t00";
reg = <0x68>;
};
};
diff --git a/arch/powerpc/boot/dts/pq2fads.dts b/arch/powerpc/boot/dts/pq2fads.dts
deleted file mode 100644
index 0c525ff0c257..000000000000
--- a/arch/powerpc/boot/dts/pq2fads.dts
+++ /dev/null
@@ -1,247 +0,0 @@
-/*
- * Device Tree for the PQ2FADS-ZU board with an MPC8280 chip.
- *
- * Copyright 2007,2008 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-/dts-v1/;
-
-/ {
- model = "pq2fads";
- compatible = "fsl,pq2fads";
- #address-cells = <1>;
- #size-cells = <1>;
-
- aliases {
- ethernet0 = &enet0;
- ethernet1 = &enet1;
- serial0 = &serial0;
- serial1 = &serial1;
- pci0 = &pci0;
- };
-
- cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- cpu@0 {
- device_type = "cpu";
- reg = <0x0>;
- d-cache-line-size = <32>;
- i-cache-line-size = <32>;
- d-cache-size = <16384>;
- i-cache-size = <16384>;
- timebase-frequency = <0>;
- clock-frequency = <0>;
- };
- };
-
- memory {
- device_type = "memory";
- reg = <0x0 0x0>;
- };
-
- localbus@f0010100 {
- compatible = "fsl,mpc8280-localbus",
- "fsl,pq2-localbus";
- #address-cells = <2>;
- #size-cells = <1>;
- reg = <0xf0010100 0x60>;
-
- ranges = <0x0 0x0 0xff800000 0x800000
- 0x1 0x0 0xf4500000 0x8000
- 0x8 0x0 0xf8200000 0x8000>;
-
- flash@0,0 {
- compatible = "jedec-flash";
- reg = <0x0 0x0 0x800000>;
- bank-width = <4>;
- device-width = <1>;
- };
-
- bcsr@1,0 {
- reg = <0x1 0x0 0x20>;
- compatible = "fsl,pq2fads-bcsr";
- };
-
- PCI_PIC: pic@8,0 {
- #interrupt-cells = <1>;
- interrupt-controller;
- reg = <0x8 0x0 0x8>;
- compatible = "fsl,pq2ads-pci-pic";
- interrupt-parent = <&PIC>;
- interrupts = <24 8>;
- };
- };
-
- pci0: pci@f0010800 {
- device_type = "pci";
- reg = <0xf0010800 0x10c 0xf00101ac 0x8 0xf00101c4 0x8>;
- compatible = "fsl,mpc8280-pci", "fsl,pq2-pci";
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- clock-frequency = <66000000>;
- interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
- interrupt-map = <
- /* IDSEL 0x16 */
- 0xb000 0x0 0x0 0x1 &PCI_PIC 0
- 0xb000 0x0 0x0 0x2 &PCI_PIC 1
- 0xb000 0x0 0x0 0x3 &PCI_PIC 2
- 0xb000 0x0 0x0 0x4 &PCI_PIC 3
-
- /* IDSEL 0x17 */
- 0xb800 0x0 0x0 0x1 &PCI_PIC 4
- 0xb800 0x0 0x0 0x2 &PCI_PIC 5
- 0xb800 0x0 0x0 0x3 &PCI_PIC 6
- 0xb800 0x0 0x0 0x4 &PCI_PIC 7
-
- /* IDSEL 0x18 */
- 0xc000 0x0 0x0 0x1 &PCI_PIC 8
- 0xc000 0x0 0x0 0x2 &PCI_PIC 9
- 0xc000 0x0 0x0 0x3 &PCI_PIC 10
- 0xc000 0x0 0x0 0x4 &PCI_PIC 11>;
-
- interrupt-parent = <&PIC>;
- interrupts = <18 8>;
- ranges = <0x42000000 0x0 0x80000000 0x80000000 0x0 0x20000000
- 0x2000000 0x0 0xa0000000 0xa0000000 0x0 0x20000000
- 0x1000000 0x0 0x0 0xf6000000 0x0 0x2000000>;
- };
-
- soc@f0000000 {
- #address-cells = <1>;
- #size-cells = <1>;
- device_type = "soc";
- compatible = "fsl,mpc8280", "fsl,pq2-soc";
- ranges = <0x0 0xf0000000 0x53000>;
-
- // Temporary -- will go away once kernel uses ranges for get_immrbase().
- reg = <0xf0000000 0x53000>;
-
- cpm@119c0 {
- #address-cells = <1>;
- #size-cells = <1>;
- #interrupt-cells = <2>;
- compatible = "fsl,mpc8280-cpm", "fsl,cpm2";
- reg = <0x119c0 0x30>;
- ranges;
-
- muram@0 {
- #address-cells = <1>;
- #size-cells = <1>;
- ranges = <0x0 0x0 0x10000>;
-
- data@0 {
- compatible = "fsl,cpm-muram-data";
- reg = <0x0 0x2000 0x9800 0x800>;
- };
- };
-
- brg@119f0 {
- compatible = "fsl,mpc8280-brg",
- "fsl,cpm2-brg",
- "fsl,cpm-brg";
- reg = <0x119f0 0x10 0x115f0 0x10>;
- };
-
- serial0: serial@11a00 {
- device_type = "serial";
- compatible = "fsl,mpc8280-scc-uart",
- "fsl,cpm2-scc-uart";
- reg = <0x11a00 0x20 0x8000 0x100>;
- interrupts = <40 8>;
- interrupt-parent = <&PIC>;
- fsl,cpm-brg = <1>;
- fsl,cpm-command = <0x800000>;
- };
-
- serial1: serial@11a20 {
- device_type = "serial";
- compatible = "fsl,mpc8280-scc-uart",
- "fsl,cpm2-scc-uart";
- reg = <0x11a20 0x20 0x8100 0x100>;
- interrupts = <41 8>;
- interrupt-parent = <&PIC>;
- fsl,cpm-brg = <2>;
- fsl,cpm-command = <0x4a00000>;
- };
-
- enet0: ethernet@11320 {
- device_type = "network";
- compatible = "fsl,mpc8280-fcc-enet",
- "fsl,cpm2-fcc-enet";
- reg = <0x11320 0x20 0x8500 0x100 0x113b0 0x1>;
- interrupts = <33 8>;
- interrupt-parent = <&PIC>;
- phy-handle = <&PHY0>;
- linux,network-index = <0>;
- fsl,cpm-command = <0x16200300>;
- };
-
- enet1: ethernet@11340 {
- device_type = "network";
- compatible = "fsl,mpc8280-fcc-enet",
- "fsl,cpm2-fcc-enet";
- reg = <0x11340 0x20 0x8600 0x100 0x113d0 0x1>;
- interrupts = <34 8>;
- interrupt-parent = <&PIC>;
- phy-handle = <&PHY1>;
- linux,network-index = <1>;
- fsl,cpm-command = <0x1a400300>;
- local-mac-address = [00 e0 0c 00 79 01];
- };
-
- mdio@10d40 {
- compatible = "fsl,pq2fads-mdio-bitbang",
- "fsl,mpc8280-mdio-bitbang",
- "fsl,cpm2-mdio-bitbang";
- #address-cells = <1>;
- #size-cells = <0>;
- reg = <0x10d40 0x14>;
- fsl,mdio-pin = <9>;
- fsl,mdc-pin = <10>;
-
- PHY0: ethernet-phy@0 {
- interrupt-parent = <&PIC>;
- interrupts = <25 2>;
- reg = <0x0>;
- };
-
- PHY1: ethernet-phy@1 {
- interrupt-parent = <&PIC>;
- interrupts = <25 2>;
- reg = <0x3>;
- };
- };
-
- usb@11b60 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,mpc8280-usb",
- "fsl,cpm2-usb";
- reg = <0x11b60 0x18 0x8b00 0x100>;
- interrupt-parent = <&PIC>;
- interrupts = <11 8>;
- fsl,cpm-command = <0x2e600000>;
- };
- };
-
- PIC: interrupt-controller@10c00 {
- #interrupt-cells = <2>;
- interrupt-controller;
- reg = <0x10c00 0x80>;
- compatible = "fsl,mpc8280-pic", "fsl,cpm2-pic";
- };
-
- };
-
- chosen {
- linux,stdout-path = "/soc/cpm/serial@11a00";
- };
-};
diff --git a/arch/powerpc/boot/dts/prpmc2800.dts b/arch/powerpc/boot/dts/prpmc2800.dts
deleted file mode 100644
index 00afaacf8c8c..000000000000
--- a/arch/powerpc/boot/dts/prpmc2800.dts
+++ /dev/null
@@ -1,297 +0,0 @@
-/* Device Tree Source for Motorola PrPMC2800
- *
- * Author: Mark A. Greer <mgreer@mvista.com>
- *
- * 2007 (c) MontaVista, Software, Inc. This file is licensed under
- * the terms of the GNU General Public License version 2. This program
- * is licensed "as is" without any warranty of any kind, whether express
- * or implied.
- *
- * Property values that are labeled as "Default" will be updated by bootwrapper
- * if it can determine the exact PrPMC type.
- */
-
-/dts-v1/;
-
-/ {
- #address-cells = <1>;
- #size-cells = <1>;
- model = "PrPMC280/PrPMC2800"; /* Default */
- compatible = "motorola,PrPMC2800";
- coherency-off;
-
- cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- PowerPC,7447 {
- device_type = "cpu";
- reg = <0>;
- clock-frequency = <733333333>; /* Default */
- bus-frequency = <133333333>;
- timebase-frequency = <33333333>;
- i-cache-line-size = <32>;
- d-cache-line-size = <32>;
- i-cache-size = <32768>;
- d-cache-size = <32768>;
- };
- };
-
- memory {
- device_type = "memory";
- reg = <0x0 0x20000000>; /* Default (512MB) */
- };
-
- system-controller@f1000000 { /* Marvell Discovery mv64360 */
- #address-cells = <1>;
- #size-cells = <1>;
- model = "mv64360"; /* Default */
- compatible = "marvell,mv64360";
- clock-frequency = <133333333>;
- reg = <0xf1000000 0x10000>;
- virtual-reg = <0xf1000000>;
- ranges = <0x88000000 0x88000000 0x1000000 /* PCI 0 I/O Space */
- 0x80000000 0x80000000 0x8000000 /* PCI 0 MEM Space */
- 0xa0000000 0xa0000000 0x4000000 /* User FLASH */
- 0x00000000 0xf1000000 0x0010000 /* Bridge's regs */
- 0xf2000000 0xf2000000 0x0040000>;/* Integrated SRAM */
-
- flash@a0000000 {
- device_type = "rom";
- compatible = "direct-mapped";
- reg = <0xa0000000 0x4000000>; /* Default (64MB) */
- probe-type = "CFI";
- bank-width = <4>;
- partitions = <0x00000000 0x00100000 /* RO */
- 0x00100000 0x00040001 /* RW */
- 0x00140000 0x00400000 /* RO */
- 0x00540000 0x039c0000 /* RO */
- 0x03f00000 0x00100000>; /* RO */
- partition-names = "FW Image A", "FW Config Data", "Kernel Image", "Filesystem", "FW Image B";
- };
-
- mdio {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "marvell,mv64360-mdio";
- PHY0: ethernet-phy@1 {
- compatible = "broadcom,bcm5421";
- interrupts = <76>; /* GPP 12 */
- interrupt-parent = <&PIC>;
- reg = <1>;
- };
- PHY1: ethernet-phy@3 {
- compatible = "broadcom,bcm5421";
- interrupts = <76>; /* GPP 12 */
- interrupt-parent = <&PIC>;
- reg = <3>;
- };
- };
-
- ethernet-group@2000 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "marvell,mv64360-eth-group";
- reg = <0x2000 0x2000>;
- ethernet@0 {
- device_type = "network";
- compatible = "marvell,mv64360-eth";
- reg = <0>;
- interrupts = <32>;
- interrupt-parent = <&PIC>;
- phy = <&PHY0>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- };
- ethernet@1 {
- device_type = "network";
- compatible = "marvell,mv64360-eth";
- reg = <1>;
- interrupts = <33>;
- interrupt-parent = <&PIC>;
- phy = <&PHY1>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- };
- };
-
- SDMA0: sdma@4000 {
- compatible = "marvell,mv64360-sdma";
- reg = <0x4000 0xc18>;
- virtual-reg = <0xf1004000>;
- interrupts = <36>;
- interrupt-parent = <&PIC>;
- };
-
- SDMA1: sdma@6000 {
- compatible = "marvell,mv64360-sdma";
- reg = <0x6000 0xc18>;
- virtual-reg = <0xf1006000>;
- interrupts = <38>;
- interrupt-parent = <&PIC>;
- };
-
- BRG0: brg@b200 {
- compatible = "marvell,mv64360-brg";
- reg = <0xb200 0x8>;
- clock-src = <8>;
- clock-frequency = <133333333>;
- current-speed = <9600>;
- };
-
- BRG1: brg@b208 {
- compatible = "marvell,mv64360-brg";
- reg = <0xb208 0x8>;
- clock-src = <8>;
- clock-frequency = <133333333>;
- current-speed = <9600>;
- };
-
- CUNIT: cunit@f200 {
- reg = <0xf200 0x200>;
- };
-
- MPSCROUTING: mpscrouting@b400 {
- reg = <0xb400 0xc>;
- };
-
- MPSCINTR: mpscintr@b800 {
- reg = <0xb800 0x100>;
- virtual-reg = <0xf100b800>;
- };
-
- MPSC0: mpsc@8000 {
- compatible = "marvell,mv64360-mpsc";
- reg = <0x8000 0x38>;
- virtual-reg = <0xf1008000>;
- sdma = <&SDMA0>;
- brg = <&BRG0>;
- cunit = <&CUNIT>;
- mpscrouting = <&MPSCROUTING>;
- mpscintr = <&MPSCINTR>;
- cell-index = <0>;
- interrupts = <40>;
- interrupt-parent = <&PIC>;
- };
-
- MPSC1: mpsc@9000 {
- compatible = "marvell,mv64360-mpsc";
- reg = <0x9000 0x38>;
- virtual-reg = <0xf1009000>;
- sdma = <&SDMA1>;
- brg = <&BRG1>;
- cunit = <&CUNIT>;
- mpscrouting = <&MPSCROUTING>;
- mpscintr = <&MPSCINTR>;
- cell-index = <1>;
- interrupts = <42>;
- interrupt-parent = <&PIC>;
- };
-
- wdt@b410 { /* watchdog timer */
- compatible = "marvell,mv64360-wdt";
- reg = <0xb410 0x8>;
- };
-
- i2c@c000 {
- device_type = "i2c";
- compatible = "marvell,mv64360-i2c";
- reg = <0xc000 0x20>;
- virtual-reg = <0xf100c000>;
- interrupts = <37>;
- interrupt-parent = <&PIC>;
- };
-
- PIC: pic {
- #interrupt-cells = <1>;
- #address-cells = <0>;
- compatible = "marvell,mv64360-pic";
- reg = <0x0 0x88>;
- interrupt-controller;
- };
-
- mpp@f000 {
- compatible = "marvell,mv64360-mpp";
- reg = <0xf000 0x10>;
- };
-
- gpp@f100 {
- compatible = "marvell,mv64360-gpp";
- reg = <0xf100 0x20>;
- };
-
- pci@80000000 {
- #address-cells = <3>;
- #size-cells = <2>;
- #interrupt-cells = <1>;
- device_type = "pci";
- compatible = "marvell,mv64360-pci";
- reg = <0xcf8 0x8>;
- ranges = <0x01000000 0x0 0x0
- 0x88000000 0x0 0x01000000
- 0x02000000 0x0 0x80000000
- 0x80000000 0x0 0x08000000>;
- bus-range = <0 255>;
- clock-frequency = <66000000>;
- interrupt-pci-iack = <0xc34>;
- interrupt-parent = <&PIC>;
- interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
- interrupt-map = <
- /* IDSEL 0x0a */
- 0x5000 0 0 1 &PIC 80
- 0x5000 0 0 2 &PIC 81
- 0x5000 0 0 3 &PIC 91
- 0x5000 0 0 4 &PIC 93
-
- /* IDSEL 0x0b */
- 0x5800 0 0 1 &PIC 91
- 0x5800 0 0 2 &PIC 93
- 0x5800 0 0 3 &PIC 80
- 0x5800 0 0 4 &PIC 81
-
- /* IDSEL 0x0c */
- 0x6000 0 0 1 &PIC 91
- 0x6000 0 0 2 &PIC 93
- 0x6000 0 0 3 &PIC 80
- 0x6000 0 0 4 &PIC 81
-
- /* IDSEL 0x0d */
- 0x6800 0 0 1 &PIC 93
- 0x6800 0 0 2 &PIC 80
- 0x6800 0 0 3 &PIC 81
- 0x6800 0 0 4 &PIC 91
- >;
- };
-
- cpu-error@0070 {
- compatible = "marvell,mv64360-cpu-error";
- reg = <0x70 0x10 0x128 0x28>;
- interrupts = <3>;
- interrupt-parent = <&PIC>;
- };
-
- sram-ctrl@0380 {
- compatible = "marvell,mv64360-sram-ctrl";
- reg = <0x380 0x80>;
- interrupts = <13>;
- interrupt-parent = <&PIC>;
- };
-
- pci-error@1d40 {
- compatible = "marvell,mv64360-pci-error";
- reg = <0x1d40 0x40 0xc28 0x4>;
- interrupts = <12>;
- interrupt-parent = <&PIC>;
- };
-
- mem-ctrl@1400 {
- compatible = "marvell,mv64360-mem-ctrl";
- reg = <0x1400 0x60>;
- interrupts = <17>;
- interrupt-parent = <&PIC>;
- };
- };
-
- chosen {
- bootargs = "ip=on";
- linux,stdout-path = &MPSC0;
- };
-};
diff --git a/arch/powerpc/boot/dts/ps3.dts b/arch/powerpc/boot/dts/ps3.dts
index 96ba5b512afe..6bdfba6cbb30 100644
--- a/arch/powerpc/boot/dts/ps3.dts
+++ b/arch/powerpc/boot/dts/ps3.dts
@@ -1,21 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* PS3 Game Console device tree.
*
* Copyright (C) 2007 Sony Computer Entertainment Inc.
* Copyright 2007 Sony Corp.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/dts-v1/;
diff --git a/arch/powerpc/boot/dts/rainier.dts b/arch/powerpc/boot/dts/rainier.dts
index 9684c80e4093..e59829cff556 100644
--- a/arch/powerpc/boot/dts/rainier.dts
+++ b/arch/powerpc/boot/dts/rainier.dts
@@ -344,7 +344,7 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@ef600300";
+ stdout-path = "/plb/opb/serial@ef600300";
bootargs = "console=ttyS0,115200";
};
};
diff --git a/arch/powerpc/boot/dts/redwood.dts b/arch/powerpc/boot/dts/redwood.dts
index d86a3a498118..3c849e23e5f3 100644
--- a/arch/powerpc/boot/dts/redwood.dts
+++ b/arch/powerpc/boot/dts/redwood.dts
@@ -235,7 +235,7 @@
has-new-stacr-staopc;
};
};
- PCIE0: pciex@d00000000 {
+ PCIE0: pcie@d00000000 {
device_type = "pci";
#interrupt-cells = <1>;
#size-cells = <2>;
@@ -276,7 +276,7 @@
0x0 0x0 0x0 0x4 &UIC3 0x3 0x4 /* swizzled int D */>;
};
- PCIE1: pciex@d20000000 {
+ PCIE1: pcie@d20000000 {
device_type = "pci";
#interrupt-cells = <1>;
#size-cells = <2>;
@@ -317,7 +317,7 @@
0x0 0x0 0x0 0x4 &UIC3 0x7 0x4 /* swizzled int D */>;
};
- PCIE2: pciex@d40000000 {
+ PCIE2: pcie@d40000000 {
device_type = "pci";
#interrupt-cells = <1>;
#size-cells = <2>;
@@ -358,30 +358,11 @@
0x0 0x0 0x0 0x4 &UIC3 0xb 0x4 /* swizzled int D */>;
};
- MSI: ppc4xx-msi@400300000 {
- compatible = "amcc,ppc4xx-msi", "ppc4xx-msi";
- reg = < 0x4 0x00300000 0x100
- 0x4 0x00300000 0x100>;
- sdr-base = <0x3B0>;
- msi-data = <0x00000000>;
- msi-mask = <0x44440000>;
- interrupt-count = <3>;
- interrupts =<0 1 2 3>;
- interrupt-parent = <&UIC0>;
- #interrupt-cells = <1>;
- #address-cells = <0>;
- #size-cells = <0>;
- interrupt-map = <0 &UIC0 0xC 1
- 1 &UIC0 0x0D 1
- 2 &UIC0 0x0E 1
- 3 &UIC0 0x0F 1>;
- };
-
};
chosen {
- linux,stdout-path = "/plb/opb/serial@ef600200";
+ stdout-path = "/plb/opb/serial@ef600200";
};
};
diff --git a/arch/powerpc/boot/dts/sam440ep.dts b/arch/powerpc/boot/dts/sam440ep.dts
index f0663be10421..7d15f18e1180 100644
--- a/arch/powerpc/boot/dts/sam440ep.dts
+++ b/arch/powerpc/boot/dts/sam440ep.dts
@@ -196,7 +196,7 @@
interrupt-parent = <&UIC0>;
interrupts = <2 4>;
rtc@68 {
- compatible = "stm,m41t80";
+ compatible = "st,m41t80";
reg = <0x68>;
};
};
@@ -288,6 +288,6 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@ef600300";
+ stdout-path = "/plb/opb/serial@ef600300";
};
};
diff --git a/arch/powerpc/boot/dts/sbc8349.dts b/arch/powerpc/boot/dts/sbc8349.dts
deleted file mode 100644
index fc89e00b765c..000000000000
--- a/arch/powerpc/boot/dts/sbc8349.dts
+++ /dev/null
@@ -1,331 +0,0 @@
-/*
- * SBC8349E Device Tree Source
- *
- * Copyright 2007 Wind River Inc.
- *
- * Paul Gortmaker (see MAINTAINERS for contact information)
- *
- * -based largely on the Freescale MPC834x_MDS dts.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-/dts-v1/;
-
-/ {
- model = "SBC8349E";
- compatible = "SBC834xE";
- #address-cells = <1>;
- #size-cells = <1>;
-
- aliases {
- ethernet0 = &enet0;
- ethernet1 = &enet1;
- serial0 = &serial0;
- serial1 = &serial1;
- pci0 = &pci0;
- };
-
- cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- PowerPC,8349@0 {
- device_type = "cpu";
- reg = <0x0>;
- d-cache-line-size = <32>;
- i-cache-line-size = <32>;
- d-cache-size = <32768>;
- i-cache-size = <32768>;
- timebase-frequency = <0>; // from bootloader
- bus-frequency = <0>; // from bootloader
- clock-frequency = <0>; // from bootloader
- };
- };
-
- memory {
- device_type = "memory";
- reg = <0x00000000 0x10000000>; // 256MB at 0
- };
-
- soc8349@e0000000 {
- #address-cells = <1>;
- #size-cells = <1>;
- device_type = "soc";
- ranges = <0x0 0xe0000000 0x00100000>;
- reg = <0xe0000000 0x00000200>;
- bus-frequency = <0>;
-
- wdt@200 {
- compatible = "mpc83xx_wdt";
- reg = <0x200 0x100>;
- };
-
- i2c@3000 {
- #address-cells = <1>;
- #size-cells = <0>;
- cell-index = <0>;
- compatible = "fsl-i2c";
- reg = <0x3000 0x100>;
- interrupts = <14 0x8>;
- interrupt-parent = <&ipic>;
- dfsrr;
- };
-
- i2c@3100 {
- #address-cells = <1>;
- #size-cells = <0>;
- cell-index = <1>;
- compatible = "fsl-i2c";
- reg = <0x3100 0x100>;
- interrupts = <15 0x8>;
- interrupt-parent = <&ipic>;
- dfsrr;
- };
-
- spi@7000 {
- cell-index = <0>;
- compatible = "fsl,spi";
- reg = <0x7000 0x1000>;
- interrupts = <16 0x8>;
- interrupt-parent = <&ipic>;
- mode = "cpu";
- };
-
- dma@82a8 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "fsl,mpc8349-dma", "fsl,elo-dma";
- reg = <0x82a8 4>;
- ranges = <0 0x8100 0x1a8>;
- interrupt-parent = <&ipic>;
- interrupts = <71 8>;
- cell-index = <0>;
- dma-channel@0 {
- compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel";
- reg = <0 0x80>;
- cell-index = <0>;
- interrupt-parent = <&ipic>;
- interrupts = <71 8>;
- };
- dma-channel@80 {
- compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel";
- reg = <0x80 0x80>;
- cell-index = <1>;
- interrupt-parent = <&ipic>;
- interrupts = <71 8>;
- };
- dma-channel@100 {
- compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel";
- reg = <0x100 0x80>;
- cell-index = <2>;
- interrupt-parent = <&ipic>;
- interrupts = <71 8>;
- };
- dma-channel@180 {
- compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel";
- reg = <0x180 0x28>;
- cell-index = <3>;
- interrupt-parent = <&ipic>;
- interrupts = <71 8>;
- };
- };
-
- /* phy type (ULPI or SERIAL) are only types supported for MPH */
- /* port = 0 or 1 */
- usb@22000 {
- compatible = "fsl-usb2-mph";
- reg = <0x22000 0x1000>;
- #address-cells = <1>;
- #size-cells = <0>;
- interrupt-parent = <&ipic>;
- interrupts = <39 0x8>;
- phy_type = "ulpi";
- port0;
- };
-
- enet0: ethernet@24000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <0>;
- device_type = "network";
- model = "TSEC";
- compatible = "gianfar";
- reg = <0x24000 0x1000>;
- ranges = <0x0 0x24000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <32 0x8 33 0x8 34 0x8>;
- interrupt-parent = <&ipic>;
- tbi-handle = <&tbi0>;
- phy-handle = <&phy0>;
- linux,network-index = <0>;
-
- mdio@520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-mdio";
- reg = <0x520 0x20>;
-
- phy0: ethernet-phy@19 {
- interrupt-parent = <&ipic>;
- interrupts = <20 0x8>;
- reg = <0x19>;
- };
-
- phy1: ethernet-phy@1a {
- interrupt-parent = <&ipic>;
- interrupts = <21 0x8>;
- reg = <0x1a>;
- };
-
- tbi0: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
- };
-
- enet1: ethernet@25000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <1>;
- device_type = "network";
- model = "TSEC";
- compatible = "gianfar";
- reg = <0x25000 0x1000>;
- ranges = <0x0 0x25000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <35 0x8 36 0x8 37 0x8>;
- interrupt-parent = <&ipic>;
- tbi-handle = <&tbi1>;
- phy-handle = <&phy1>;
- linux,network-index = <1>;
-
- mdio@520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-tbi";
- reg = <0x520 0x20>;
-
- tbi1: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
- };
-
- serial0: serial@4500 {
- cell-index = <0>;
- device_type = "serial";
- compatible = "fsl,ns16550", "ns16550";
- reg = <0x4500 0x100>;
- clock-frequency = <0>;
- interrupts = <9 0x8>;
- interrupt-parent = <&ipic>;
- };
-
- serial1: serial@4600 {
- cell-index = <1>;
- device_type = "serial";
- compatible = "fsl,ns16550", "ns16550";
- reg = <0x4600 0x100>;
- clock-frequency = <0>;
- interrupts = <10 0x8>;
- interrupt-parent = <&ipic>;
- };
-
- crypto@30000 {
- compatible = "fsl,sec2.0";
- reg = <0x30000 0x10000>;
- interrupts = <11 0x8>;
- interrupt-parent = <&ipic>;
- fsl,num-channels = <4>;
- fsl,channel-fifo-len = <24>;
- fsl,exec-units-mask = <0x7e>;
- fsl,descriptor-types-mask = <0x01010ebf>;
- };
-
- /* IPIC
- * interrupts cell = <intr #, sense>
- * sense values match linux IORESOURCE_IRQ_* defines:
- * sense == 8: Level, low assertion
- * sense == 2: Edge, high-to-low change
- */
- ipic: pic@700 {
- interrupt-controller;
- #address-cells = <0>;
- #interrupt-cells = <2>;
- reg = <0x700 0x100>;
- device_type = "ipic";
- };
- };
-
- localbus@e0005000 {
- #address-cells = <2>;
- #size-cells = <1>;
- compatible = "fsl,mpc8349-localbus", "simple-bus";
- reg = <0xe0005000 0x1000>;
- interrupts = <77 0x8>;
- interrupt-parent = <&ipic>;
- ranges = <0x0 0x0 0xff800000 0x00800000 /* 8MB Flash */
- 0x1 0x0 0xf8000000 0x00002000 /* 8KB EEPROM */
- 0x2 0x0 0x10000000 0x04000000 /* 64MB SDRAM */
- 0x3 0x0 0x10000000 0x04000000>; /* 64MB SDRAM */
-
- flash@0,0 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "intel,28F640J3A", "cfi-flash";
- reg = <0x0 0x0 0x800000>;
- bank-width = <2>;
- device-width = <1>;
-
- partition@0 {
- label = "u-boot";
- reg = <0x00000000 0x00040000>;
- read-only;
- };
-
- partition@40000 {
- label = "user";
- reg = <0x00040000 0x006c0000>;
- };
-
- partition@700000 {
- label = "legacy u-boot";
- reg = <0x00700000 0x00100000>;
- read-only;
- };
-
- };
- };
-
- pci0: pci@e0008500 {
- interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
- interrupt-map = <
-
- /* IDSEL 0x11 */
- 0x8800 0x0 0x0 0x1 &ipic 48 0x8
- 0x8800 0x0 0x0 0x2 &ipic 17 0x8
- 0x8800 0x0 0x0 0x3 &ipic 18 0x8
- 0x8800 0x0 0x0 0x4 &ipic 19 0x8>;
-
- interrupt-parent = <&ipic>;
- interrupts = <0x42 0x8>;
- bus-range = <0 0>;
- ranges = <0x02000000 0x0 0x90000000 0x90000000 0x0 0x10000000
- 0x42000000 0x0 0x80000000 0x80000000 0x0 0x10000000
- 0x01000000 0x0 0x00000000 0xe2000000 0x0 0x00100000>;
- clock-frequency = <66666666>;
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- reg = <0xe0008500 0x100 /* internal registers */
- 0xe0008300 0x8>; /* config space access registers */
- compatible = "fsl,mpc8349-pci";
- device_type = "pci";
- };
-};
diff --git a/arch/powerpc/boot/dts/sbc8548-altflash.dts b/arch/powerpc/boot/dts/sbc8548-altflash.dts
deleted file mode 100644
index 0b38a0defd2c..000000000000
--- a/arch/powerpc/boot/dts/sbc8548-altflash.dts
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * SBC8548 Device Tree Source
- *
- * Configured for booting off the alternate (64MB SODIMM) flash.
- * Requires switching JP12 jumpers and changing SW2.8 setting.
- *
- * Copyright 2013 Wind River Systems Inc.
- *
- * Paul Gortmaker (see MAINTAINERS for contact information)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-
-/dts-v1/;
-
-/include/ "sbc8548-pre.dtsi"
-
-/{
- localbus@e0000000 {
- #address-cells = <2>;
- #size-cells = <1>;
- compatible = "simple-bus";
- reg = <0xe0000000 0x5000>;
- interrupt-parent = <&mpic>;
-
- ranges = <0x0 0x0 0xfc000000 0x04000000 /*64MB Flash*/
- 0x3 0x0 0xf0000000 0x04000000 /*64MB SDRAM*/
- 0x4 0x0 0xf4000000 0x04000000 /*64MB SDRAM*/
- 0x5 0x0 0xf8000000 0x00b10000 /* EPLD */
- 0x6 0x0 0xef800000 0x00800000>; /*8MB Flash*/
-
- flash@0,0 {
- #address-cells = <1>;
- #size-cells = <1>;
- reg = <0x0 0x0 0x04000000>;
- compatible = "intel,JS28F128", "cfi-flash";
- bank-width = <4>;
- device-width = <1>;
- partition@0x0 {
- label = "space";
- /* FC000000 -> FFEFFFFF */
- reg = <0x00000000 0x03f00000>;
- };
- partition@0x03f00000 {
- label = "bootloader";
- /* FFF00000 -> FFFFFFFF */
- reg = <0x03f00000 0x00100000>;
- read-only;
- };
- };
-
-
- epld@5,0 {
- compatible = "wrs,epld-localbus";
- #address-cells = <2>;
- #size-cells = <1>;
- reg = <0x5 0x0 0x00b10000>;
- ranges = <
- 0x0 0x0 0x5 0x000000 0x1fff /* LED */
- 0x1 0x0 0x5 0x100000 0x1fff /* Switches */
- 0x3 0x0 0x5 0x300000 0x1fff /* HW Rev. */
- 0xb 0x0 0x5 0xb00000 0x1fff /* EEPROM */
- >;
-
- led@0,0 {
- compatible = "led";
- reg = <0x0 0x0 0x1fff>;
- };
-
- switches@1,0 {
- compatible = "switches";
- reg = <0x1 0x0 0x1fff>;
- };
-
- hw-rev@3,0 {
- compatible = "hw-rev";
- reg = <0x3 0x0 0x1fff>;
- };
-
- eeprom@b,0 {
- compatible = "eeprom";
- reg = <0xb 0 0x1fff>;
- };
-
- };
-
- alt-flash@6,0 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "intel,JS28F640", "cfi-flash";
- reg = <0x6 0x0 0x800000>;
- bank-width = <1>;
- device-width = <1>;
- partition@0x0 {
- label = "space";
- /* EF800000 -> EFF9FFFF */
- reg = <0x00000000 0x007a0000>;
- };
- partition@0x7a0000 {
- label = "bootloader";
- /* EFFA0000 -> EFFFFFFF */
- reg = <0x007a0000 0x00060000>;
- read-only;
- };
- };
-
-
- };
-};
-
-/include/ "sbc8548-post.dtsi"
diff --git a/arch/powerpc/boot/dts/sbc8548-post.dtsi b/arch/powerpc/boot/dts/sbc8548-post.dtsi
deleted file mode 100644
index 9b505c8e5350..000000000000
--- a/arch/powerpc/boot/dts/sbc8548-post.dtsi
+++ /dev/null
@@ -1,293 +0,0 @@
-/*
- * SBC8548 Device Tree Source
- *
- * Copyright 2007 Wind River Systems Inc.
- *
- * Paul Gortmaker (see MAINTAINERS for contact information)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-/{
- soc8548@e0000000 {
- #address-cells = <1>;
- #size-cells = <1>;
- device_type = "soc";
- ranges = <0x00000000 0xe0000000 0x00100000>;
- bus-frequency = <0>;
- compatible = "simple-bus";
-
- ecm-law@0 {
- compatible = "fsl,ecm-law";
- reg = <0x0 0x1000>;
- fsl,num-laws = <10>;
- };
-
- ecm@1000 {
- compatible = "fsl,mpc8548-ecm", "fsl,ecm";
- reg = <0x1000 0x1000>;
- interrupts = <17 2>;
- interrupt-parent = <&mpic>;
- };
-
- memory-controller@2000 {
- compatible = "fsl,mpc8548-memory-controller";
- reg = <0x2000 0x1000>;
- interrupt-parent = <&mpic>;
- interrupts = <0x12 0x2>;
- };
-
- L2: l2-cache-controller@20000 {
- compatible = "fsl,mpc8548-l2-cache-controller";
- reg = <0x20000 0x1000>;
- cache-line-size = <0x20>; // 32 bytes
- cache-size = <0x80000>; // L2, 512K
- interrupt-parent = <&mpic>;
- interrupts = <0x10 0x2>;
- };
-
- i2c@3000 {
- #address-cells = <1>;
- #size-cells = <0>;
- cell-index = <0>;
- compatible = "fsl-i2c";
- reg = <0x3000 0x100>;
- interrupts = <0x2b 0x2>;
- interrupt-parent = <&mpic>;
- dfsrr;
- };
-
- i2c@3100 {
- #address-cells = <1>;
- #size-cells = <0>;
- cell-index = <1>;
- compatible = "fsl-i2c";
- reg = <0x3100 0x100>;
- interrupts = <0x2b 0x2>;
- interrupt-parent = <&mpic>;
- dfsrr;
- };
-
- dma@21300 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "fsl,mpc8548-dma", "fsl,eloplus-dma";
- reg = <0x21300 0x4>;
- ranges = <0x0 0x21100 0x200>;
- cell-index = <0>;
- dma-channel@0 {
- compatible = "fsl,mpc8548-dma-channel",
- "fsl,eloplus-dma-channel";
- reg = <0x0 0x80>;
- cell-index = <0>;
- interrupt-parent = <&mpic>;
- interrupts = <20 2>;
- };
- dma-channel@80 {
- compatible = "fsl,mpc8548-dma-channel",
- "fsl,eloplus-dma-channel";
- reg = <0x80 0x80>;
- cell-index = <1>;
- interrupt-parent = <&mpic>;
- interrupts = <21 2>;
- };
- dma-channel@100 {
- compatible = "fsl,mpc8548-dma-channel",
- "fsl,eloplus-dma-channel";
- reg = <0x100 0x80>;
- cell-index = <2>;
- interrupt-parent = <&mpic>;
- interrupts = <22 2>;
- };
- dma-channel@180 {
- compatible = "fsl,mpc8548-dma-channel",
- "fsl,eloplus-dma-channel";
- reg = <0x180 0x80>;
- cell-index = <3>;
- interrupt-parent = <&mpic>;
- interrupts = <23 2>;
- };
- };
-
- enet0: ethernet@24000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <0>;
- device_type = "network";
- model = "eTSEC";
- compatible = "gianfar";
- reg = <0x24000 0x1000>;
- ranges = <0x0 0x24000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <0x1d 0x2 0x1e 0x2 0x22 0x2>;
- interrupt-parent = <&mpic>;
- tbi-handle = <&tbi0>;
- phy-handle = <&phy0>;
-
- mdio@520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-mdio";
- reg = <0x520 0x20>;
-
- phy0: ethernet-phy@19 {
- interrupt-parent = <&mpic>;
- interrupts = <0x6 0x1>;
- reg = <0x19>;
- };
- phy1: ethernet-phy@1a {
- interrupt-parent = <&mpic>;
- interrupts = <0x7 0x1>;
- reg = <0x1a>;
- };
- tbi0: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
- };
-
- enet1: ethernet@25000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <1>;
- device_type = "network";
- model = "eTSEC";
- compatible = "gianfar";
- reg = <0x25000 0x1000>;
- ranges = <0x0 0x25000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <0x23 0x2 0x24 0x2 0x28 0x2>;
- interrupt-parent = <&mpic>;
- tbi-handle = <&tbi1>;
- phy-handle = <&phy1>;
-
- mdio@520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-tbi";
- reg = <0x520 0x20>;
-
- tbi1: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
- };
-
- serial0: serial@4500 {
- cell-index = <0>;
- device_type = "serial";
- compatible = "fsl,ns16550", "ns16550";
- reg = <0x4500 0x100>; // reg base, size
- clock-frequency = <0>; // should we fill in in uboot?
- interrupts = <0x2a 0x2>;
- interrupt-parent = <&mpic>;
- };
-
- serial1: serial@4600 {
- cell-index = <1>;
- device_type = "serial";
- compatible = "fsl,ns16550", "ns16550";
- reg = <0x4600 0x100>; // reg base, size
- clock-frequency = <0>; // should we fill in in uboot?
- interrupts = <0x2a 0x2>;
- interrupt-parent = <&mpic>;
- };
-
- global-utilities@e0000 { //global utilities reg
- compatible = "fsl,mpc8548-guts";
- reg = <0xe0000 0x1000>;
- fsl,has-rstcr;
- };
-
- crypto@30000 {
- compatible = "fsl,sec2.1", "fsl,sec2.0";
- reg = <0x30000 0x10000>;
- interrupts = <45 2>;
- interrupt-parent = <&mpic>;
- fsl,num-channels = <4>;
- fsl,channel-fifo-len = <24>;
- fsl,exec-units-mask = <0xfe>;
- fsl,descriptor-types-mask = <0x12b0ebf>;
- };
-
- mpic: pic@40000 {
- interrupt-controller;
- #address-cells = <0>;
- #interrupt-cells = <2>;
- reg = <0x40000 0x40000>;
- compatible = "chrp,open-pic";
- device_type = "open-pic";
- };
- };
-
- pci0: pci@e0008000 {
- interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
- interrupt-map = <
- /* IDSEL 0x01 (PCI-X slot) @66MHz */
- 0x0800 0x0 0x0 0x1 &mpic 0x2 0x1
- 0x0800 0x0 0x0 0x2 &mpic 0x3 0x1
- 0x0800 0x0 0x0 0x3 &mpic 0x4 0x1
- 0x0800 0x0 0x0 0x4 &mpic 0x1 0x1
-
- /* IDSEL 0x11 (PCI, 3.3V 32bit) @33MHz */
- 0x8800 0x0 0x0 0x1 &mpic 0x2 0x1
- 0x8800 0x0 0x0 0x2 &mpic 0x3 0x1
- 0x8800 0x0 0x0 0x3 &mpic 0x4 0x1
- 0x8800 0x0 0x0 0x4 &mpic 0x1 0x1>;
-
- interrupt-parent = <&mpic>;
- interrupts = <0x18 0x2>;
- bus-range = <0 0>;
- ranges = <0x02000000 0x0 0x80000000 0x80000000 0x0 0x10000000
- 0x01000000 0x0 0x00000000 0xe2000000 0x0 0x00800000>;
- clock-frequency = <66000000>;
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- reg = <0xe0008000 0x1000>;
- compatible = "fsl,mpc8540-pcix", "fsl,mpc8540-pci";
- device_type = "pci";
- };
-
- pci1: pcie@e000a000 {
- interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
- interrupt-map = <
-
- /* IDSEL 0x0 (PEX) */
- 0x0000 0x0 0x0 0x1 &mpic 0x0 0x1
- 0x0000 0x0 0x0 0x2 &mpic 0x1 0x1
- 0x0000 0x0 0x0 0x3 &mpic 0x2 0x1
- 0x0000 0x0 0x0 0x4 &mpic 0x3 0x1>;
-
- interrupt-parent = <&mpic>;
- interrupts = <0x1a 0x2>;
- bus-range = <0x0 0xff>;
- ranges = <0x02000000 0x0 0xa0000000 0xa0000000 0x0 0x10000000
- 0x01000000 0x0 0x00000000 0xe2800000 0x0 0x08000000>;
- clock-frequency = <33000000>;
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- reg = <0xe000a000 0x1000>;
- compatible = "fsl,mpc8548-pcie";
- device_type = "pci";
- pcie@0 {
- reg = <0x0 0x0 0x0 0x0 0x0>;
- #size-cells = <2>;
- #address-cells = <3>;
- device_type = "pci";
- ranges = <0x02000000 0x0 0xa0000000
- 0x02000000 0x0 0xa0000000
- 0x0 0x10000000
-
- 0x01000000 0x0 0x00000000
- 0x01000000 0x0 0x00000000
- 0x0 0x00800000>;
- };
- };
-};
diff --git a/arch/powerpc/boot/dts/sbc8548-pre.dtsi b/arch/powerpc/boot/dts/sbc8548-pre.dtsi
deleted file mode 100644
index d8c66290c5b4..000000000000
--- a/arch/powerpc/boot/dts/sbc8548-pre.dtsi
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * SBC8548 Device Tree Source
- *
- * Copyright 2007 Wind River Systems Inc.
- *
- * Paul Gortmaker (see MAINTAINERS for contact information)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-/{
- model = "SBC8548";
- compatible = "SBC8548";
- #address-cells = <1>;
- #size-cells = <1>;
-
- aliases {
- ethernet0 = &enet0;
- ethernet1 = &enet1;
- serial0 = &serial0;
- serial1 = &serial1;
- pci0 = &pci0;
- pci1 = &pci1;
- };
-
- cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- PowerPC,8548@0 {
- device_type = "cpu";
- reg = <0>;
- d-cache-line-size = <0x20>; // 32 bytes
- i-cache-line-size = <0x20>; // 32 bytes
- d-cache-size = <0x8000>; // L1, 32K
- i-cache-size = <0x8000>; // L1, 32K
- timebase-frequency = <0>; // From uboot
- bus-frequency = <0>;
- clock-frequency = <0>;
- next-level-cache = <&L2>;
- };
- };
-
- memory {
- device_type = "memory";
- reg = <0x00000000 0x10000000>;
- };
-
-};
diff --git a/arch/powerpc/boot/dts/sbc8548.dts b/arch/powerpc/boot/dts/sbc8548.dts
deleted file mode 100644
index 1df2a0955668..000000000000
--- a/arch/powerpc/boot/dts/sbc8548.dts
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * SBC8548 Device Tree Source
- *
- * Copyright 2007 Wind River Systems Inc.
- *
- * Paul Gortmaker (see MAINTAINERS for contact information)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-
-/dts-v1/;
-
-/include/ "sbc8548-pre.dtsi"
-
-/{
- localbus@e0000000 {
- #address-cells = <2>;
- #size-cells = <1>;
- compatible = "simple-bus";
- reg = <0xe0000000 0x5000>;
- interrupt-parent = <&mpic>;
-
- ranges = <0x0 0x0 0xff800000 0x00800000 /*8MB Flash*/
- 0x3 0x0 0xf0000000 0x04000000 /*64MB SDRAM*/
- 0x4 0x0 0xf4000000 0x04000000 /*64MB SDRAM*/
- 0x5 0x0 0xf8000000 0x00b10000 /* EPLD */
- 0x6 0x0 0xec000000 0x04000000>; /*64MB Flash*/
-
-
- flash@0,0 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "intel,JS28F640", "cfi-flash";
- reg = <0x0 0x0 0x800000>;
- bank-width = <1>;
- device-width = <1>;
- partition@0x0 {
- label = "space";
- /* FF800000 -> FFF9FFFF */
- reg = <0x00000000 0x007a0000>;
- };
- partition@0x7a0000 {
- label = "bootloader";
- /* FFFA0000 -> FFFFFFFF */
- reg = <0x007a0000 0x00060000>;
- read-only;
- };
- };
-
- epld@5,0 {
- compatible = "wrs,epld-localbus";
- #address-cells = <2>;
- #size-cells = <1>;
- reg = <0x5 0x0 0x00b10000>;
- ranges = <
- 0x0 0x0 0x5 0x000000 0x1fff /* LED */
- 0x1 0x0 0x5 0x100000 0x1fff /* Switches */
- 0x3 0x0 0x5 0x300000 0x1fff /* HW Rev. */
- 0xb 0x0 0x5 0xb00000 0x1fff /* EEPROM */
- >;
-
- led@0,0 {
- compatible = "led";
- reg = <0x0 0x0 0x1fff>;
- };
-
- switches@1,0 {
- compatible = "switches";
- reg = <0x1 0x0 0x1fff>;
- };
-
- hw-rev@3,0 {
- compatible = "hw-rev";
- reg = <0x3 0x0 0x1fff>;
- };
-
- eeprom@b,0 {
- compatible = "eeprom";
- reg = <0xb 0 0x1fff>;
- };
-
- };
-
- alt-flash@6,0 {
- #address-cells = <1>;
- #size-cells = <1>;
- reg = <0x6 0x0 0x04000000>;
- compatible = "intel,JS28F128", "cfi-flash";
- bank-width = <4>;
- device-width = <1>;
- partition@0x0 {
- label = "space";
- /* EC000000 -> EFEFFFFF */
- reg = <0x00000000 0x03f00000>;
- };
- partition@0x03f00000 {
- label = "bootloader";
- /* EFF00000 -> EFFFFFFF */
- reg = <0x03f00000 0x00100000>;
- read-only;
- };
- };
- };
-};
-
-/include/ "sbc8548-post.dtsi"
diff --git a/arch/powerpc/boot/dts/sbc8641d.dts b/arch/powerpc/boot/dts/sbc8641d.dts
deleted file mode 100644
index 631ede72e226..000000000000
--- a/arch/powerpc/boot/dts/sbc8641d.dts
+++ /dev/null
@@ -1,455 +0,0 @@
-/*
- * SBC8641D Device Tree Source
- *
- * Copyright 2008 Wind River Systems Inc.
- *
- * Paul Gortmaker (see MAINTAINERS for contact information)
- *
- * Based largely on the mpc8641_hpcn.dts by Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-/dts-v1/;
-
-/ {
- model = "SBC8641D";
- compatible = "wind,sbc8641";
- #address-cells = <1>;
- #size-cells = <1>;
-
- aliases {
- ethernet0 = &enet0;
- ethernet1 = &enet1;
- ethernet2 = &enet2;
- ethernet3 = &enet3;
- serial0 = &serial0;
- serial1 = &serial1;
- pci0 = &pci0;
- pci1 = &pci1;
- };
-
- cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- PowerPC,8641@0 {
- device_type = "cpu";
- reg = <0>;
- d-cache-line-size = <32>;
- i-cache-line-size = <32>;
- d-cache-size = <32768>; // L1
- i-cache-size = <32768>; // L1
- timebase-frequency = <0>; // From uboot
- bus-frequency = <0>; // From uboot
- clock-frequency = <0>; // From uboot
- };
- PowerPC,8641@1 {
- device_type = "cpu";
- reg = <1>;
- d-cache-line-size = <32>;
- i-cache-line-size = <32>;
- d-cache-size = <32768>;
- i-cache-size = <32768>;
- timebase-frequency = <0>; // From uboot
- bus-frequency = <0>; // From uboot
- clock-frequency = <0>; // From uboot
- };
- };
-
- memory {
- device_type = "memory";
- reg = <0x00000000 0x20000000>; // 512M at 0x0
- };
-
- localbus@f8005000 {
- #address-cells = <2>;
- #size-cells = <1>;
- compatible = "fsl,mpc8641-localbus", "simple-bus";
- reg = <0xf8005000 0x1000>;
- interrupts = <19 2>;
- interrupt-parent = <&mpic>;
-
- ranges = <0 0 0xff000000 0x01000000 // 16MB Boot flash
- 1 0 0xf0000000 0x00010000 // 64KB EEPROM
- 2 0 0xf1000000 0x00100000 // EPLD (1MB)
- 3 0 0xe0000000 0x04000000 // 64MB LB SDRAM (CS3)
- 4 0 0xe4000000 0x04000000 // 64MB LB SDRAM (CS4)
- 6 0 0xf4000000 0x00100000 // LCD display (1MB)
- 7 0 0xe8000000 0x04000000>; // 64MB OneNAND
-
- flash@0,0 {
- compatible = "cfi-flash";
- reg = <0 0 0x01000000>;
- bank-width = <2>;
- device-width = <2>;
- #address-cells = <1>;
- #size-cells = <1>;
- partition@0 {
- label = "dtb";
- reg = <0x00000000 0x00100000>;
- read-only;
- };
- partition@300000 {
- label = "kernel";
- reg = <0x00100000 0x00400000>;
- read-only;
- };
- partition@400000 {
- label = "fs";
- reg = <0x00500000 0x00a00000>;
- };
- partition@700000 {
- label = "firmware";
- reg = <0x00f00000 0x00100000>;
- read-only;
- };
- };
-
- epld@2,0 {
- compatible = "wrs,epld-localbus";
- #address-cells = <2>;
- #size-cells = <1>;
- reg = <2 0 0x100000>;
- ranges = <0 0 5 0 1 // User switches
- 1 0 5 1 1 // Board ID/Rev
- 3 0 5 3 1>; // LEDs
- };
- };
-
- soc@f8000000 {
- #address-cells = <1>;
- #size-cells = <1>;
- device_type = "soc";
- compatible = "simple-bus";
- ranges = <0x00000000 0xf8000000 0x00100000>;
- bus-frequency = <0>;
-
- mcm-law@0 {
- compatible = "fsl,mcm-law";
- reg = <0x0 0x1000>;
- fsl,num-laws = <10>;
- };
-
- mcm@1000 {
- compatible = "fsl,mpc8641-mcm", "fsl,mcm";
- reg = <0x1000 0x1000>;
- interrupts = <17 2>;
- interrupt-parent = <&mpic>;
- };
-
- i2c@3000 {
- #address-cells = <1>;
- #size-cells = <0>;
- cell-index = <0>;
- compatible = "fsl-i2c";
- reg = <0x3000 0x100>;
- interrupts = <43 2>;
- interrupt-parent = <&mpic>;
- dfsrr;
- };
-
- i2c@3100 {
- #address-cells = <1>;
- #size-cells = <0>;
- cell-index = <1>;
- compatible = "fsl-i2c";
- reg = <0x3100 0x100>;
- interrupts = <43 2>;
- interrupt-parent = <&mpic>;
- dfsrr;
- };
-
- dma@21300 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "fsl,mpc8641-dma", "fsl,eloplus-dma";
- reg = <0x21300 0x4>;
- ranges = <0x0 0x21100 0x200>;
- cell-index = <0>;
- dma-channel@0 {
- compatible = "fsl,mpc8641-dma-channel",
- "fsl,eloplus-dma-channel";
- reg = <0x0 0x80>;
- cell-index = <0>;
- interrupt-parent = <&mpic>;
- interrupts = <20 2>;
- };
- dma-channel@80 {
- compatible = "fsl,mpc8641-dma-channel",
- "fsl,eloplus-dma-channel";
- reg = <0x80 0x80>;
- cell-index = <1>;
- interrupt-parent = <&mpic>;
- interrupts = <21 2>;
- };
- dma-channel@100 {
- compatible = "fsl,mpc8641-dma-channel",
- "fsl,eloplus-dma-channel";
- reg = <0x100 0x80>;
- cell-index = <2>;
- interrupt-parent = <&mpic>;
- interrupts = <22 2>;
- };
- dma-channel@180 {
- compatible = "fsl,mpc8641-dma-channel",
- "fsl,eloplus-dma-channel";
- reg = <0x180 0x80>;
- cell-index = <3>;
- interrupt-parent = <&mpic>;
- interrupts = <23 2>;
- };
- };
-
- enet0: ethernet@24000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <0>;
- device_type = "network";
- model = "TSEC";
- compatible = "gianfar";
- reg = <0x24000 0x1000>;
- ranges = <0x0 0x24000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <29 2 30 2 34 2>;
- interrupt-parent = <&mpic>;
- tbi-handle = <&tbi0>;
- phy-handle = <&phy0>;
- phy-connection-type = "rgmii-id";
-
- mdio@520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-mdio";
- reg = <0x520 0x20>;
-
- phy0: ethernet-phy@1f {
- interrupt-parent = <&mpic>;
- interrupts = <10 1>;
- reg = <0x1f>;
- };
- phy1: ethernet-phy@0 {
- interrupt-parent = <&mpic>;
- interrupts = <10 1>;
- reg = <0>;
- };
- phy2: ethernet-phy@1 {
- interrupt-parent = <&mpic>;
- interrupts = <10 1>;
- reg = <1>;
- };
- phy3: ethernet-phy@2 {
- interrupt-parent = <&mpic>;
- interrupts = <10 1>;
- reg = <2>;
- };
- tbi0: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
- };
-
- enet1: ethernet@25000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <1>;
- device_type = "network";
- model = "TSEC";
- compatible = "gianfar";
- reg = <0x25000 0x1000>;
- ranges = <0x0 0x25000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <35 2 36 2 40 2>;
- interrupt-parent = <&mpic>;
- tbi-handle = <&tbi1>;
- phy-handle = <&phy1>;
- phy-connection-type = "rgmii-id";
-
- mdio@520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-tbi";
- reg = <0x520 0x20>;
-
- tbi1: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
- };
-
- enet2: ethernet@26000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <2>;
- device_type = "network";
- model = "TSEC";
- compatible = "gianfar";
- reg = <0x26000 0x1000>;
- ranges = <0x0 0x26000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <31 2 32 2 33 2>;
- interrupt-parent = <&mpic>;
- tbi-handle = <&tbi2>;
- phy-handle = <&phy2>;
- phy-connection-type = "rgmii-id";
-
- mdio@520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-tbi";
- reg = <0x520 0x20>;
-
- tbi2: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
- };
-
- enet3: ethernet@27000 {
- #address-cells = <1>;
- #size-cells = <1>;
- cell-index = <3>;
- device_type = "network";
- model = "TSEC";
- compatible = "gianfar";
- reg = <0x27000 0x1000>;
- ranges = <0x0 0x27000 0x1000>;
- local-mac-address = [ 00 00 00 00 00 00 ];
- interrupts = <37 2 38 2 39 2>;
- interrupt-parent = <&mpic>;
- tbi-handle = <&tbi3>;
- phy-handle = <&phy3>;
- phy-connection-type = "rgmii-id";
-
- mdio@520 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "fsl,gianfar-tbi";
- reg = <0x520 0x20>;
-
- tbi3: tbi-phy@11 {
- reg = <0x11>;
- device_type = "tbi-phy";
- };
- };
- };
-
- serial0: serial@4500 {
- cell-index = <0>;
- device_type = "serial";
- compatible = "fsl,ns16550", "ns16550";
- reg = <0x4500 0x100>;
- clock-frequency = <0>;
- interrupts = <42 2>;
- interrupt-parent = <&mpic>;
- };
-
- serial1: serial@4600 {
- cell-index = <1>;
- device_type = "serial";
- compatible = "fsl,ns16550", "ns16550";
- reg = <0x4600 0x100>;
- clock-frequency = <0>;
- interrupts = <28 2>;
- interrupt-parent = <&mpic>;
- };
-
- mpic: pic@40000 {
- clock-frequency = <0>;
- interrupt-controller;
- #address-cells = <0>;
- #interrupt-cells = <2>;
- reg = <0x40000 0x40000>;
- compatible = "chrp,open-pic";
- device_type = "open-pic";
- big-endian;
- };
-
- global-utilities@e0000 {
- compatible = "fsl,mpc8641-guts";
- reg = <0xe0000 0x1000>;
- fsl,has-rstcr;
- };
- };
-
- pci0: pcie@f8008000 {
- compatible = "fsl,mpc8641-pcie";
- device_type = "pci";
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- reg = <0xf8008000 0x1000>;
- bus-range = <0x0 0xff>;
- ranges = <0x02000000 0x0 0x80000000 0x80000000 0x0 0x20000000
- 0x01000000 0x0 0x00000000 0xe2000000 0x0 0x00100000>;
- clock-frequency = <33333333>;
- interrupt-parent = <&mpic>;
- interrupts = <24 2>;
- interrupt-map-mask = <0xff00 0 0 7>;
- interrupt-map = <
- /* IDSEL 0x0 */
- 0x0000 0 0 1 &mpic 0 1
- 0x0000 0 0 2 &mpic 1 1
- 0x0000 0 0 3 &mpic 2 1
- 0x0000 0 0 4 &mpic 3 1
- >;
-
- pcie@0 {
- reg = <0 0 0 0 0>;
- #size-cells = <2>;
- #address-cells = <3>;
- device_type = "pci";
- ranges = <0x02000000 0x0 0x80000000
- 0x02000000 0x0 0x80000000
- 0x0 0x20000000
-
- 0x01000000 0x0 0x00000000
- 0x01000000 0x0 0x00000000
- 0x0 0x00100000>;
- };
-
- };
-
- pci1: pcie@f8009000 {
- compatible = "fsl,mpc8641-pcie";
- device_type = "pci";
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- reg = <0xf8009000 0x1000>;
- bus-range = <0 0xff>;
- ranges = <0x02000000 0x0 0xa0000000 0xa0000000 0x0 0x20000000
- 0x01000000 0x0 0x00000000 0xe3000000 0x0 0x00100000>;
- clock-frequency = <33333333>;
- interrupt-parent = <&mpic>;
- interrupts = <25 2>;
- interrupt-map-mask = <0xf800 0 0 7>;
- interrupt-map = <
- /* IDSEL 0x0 */
- 0x0000 0 0 1 &mpic 4 1
- 0x0000 0 0 2 &mpic 5 1
- 0x0000 0 0 3 &mpic 6 1
- 0x0000 0 0 4 &mpic 7 1
- >;
-
- pcie@0 {
- reg = <0 0 0 0 0>;
- #size-cells = <2>;
- #address-cells = <3>;
- device_type = "pci";
- ranges = <0x02000000 0x0 0xa0000000
- 0x02000000 0x0 0xa0000000
- 0x0 0x20000000
-
- 0x01000000 0x0 0x00000000
- 0x01000000 0x0 0x00000000
- 0x0 0x00100000>;
- };
- };
-};
diff --git a/arch/powerpc/boot/dts/sequoia.dts b/arch/powerpc/boot/dts/sequoia.dts
index b1d329246b08..60d211da9593 100644
--- a/arch/powerpc/boot/dts/sequoia.dts
+++ b/arch/powerpc/boot/dts/sequoia.dts
@@ -229,7 +229,7 @@
};
partition@84000 {
label = "user";
- reg = <0x00000000 0x01f7c000>;
+ reg = <0x00084000 0x01f7c000>;
};
};
};
@@ -406,7 +406,7 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@ef600300";
+ stdout-path = "/plb/opb/serial@ef600300";
bootargs = "console=ttyS0,115200";
};
};
diff --git a/arch/powerpc/boot/dts/socrates.dts b/arch/powerpc/boot/dts/socrates.dts
index 134a5ff917e1..00a56e8e367c 100644
--- a/arch/powerpc/boot/dts/socrates.dts
+++ b/arch/powerpc/boot/dts/socrates.dts
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Device Tree Source for the Socrates board (MPC8544).
*
* Copyright (c) 2008 Emcraft Systems.
* Sergei Poselenov, <sposelenov@emcraft.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/dts-v1/;
diff --git a/arch/powerpc/boot/dts/storcenter.dts b/arch/powerpc/boot/dts/storcenter.dts
index 2a555738517e..99f6f544dc5f 100644
--- a/arch/powerpc/boot/dts/storcenter.dts
+++ b/arch/powerpc/boot/dts/storcenter.dts
@@ -137,6 +137,6 @@
};
chosen {
- linux,stdout-path = &serial0;
+ stdout-path = &serial0;
};
};
diff --git a/arch/powerpc/boot/dts/stx_gp3_8560.dts b/arch/powerpc/boot/dts/stx_gp3_8560.dts
index 78a72ee48205..e73f7e75b0b4 100644
--- a/arch/powerpc/boot/dts/stx_gp3_8560.dts
+++ b/arch/powerpc/boot/dts/stx_gp3_8560.dts
@@ -1,16 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* STX GP3 - 8560 ADS Device Tree Source
*
* Copyright 2008 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/dts-v1/;
+/include/ "fsl/e500v1_power_isa.dtsi"
+
/ {
model = "stx,gp3";
compatible = "stx,gp3-8560", "stx,gp3";
diff --git a/arch/powerpc/boot/dts/stxssa8555.dts b/arch/powerpc/boot/dts/stxssa8555.dts
index 859f854ba538..96add25c904b 100644
--- a/arch/powerpc/boot/dts/stxssa8555.dts
+++ b/arch/powerpc/boot/dts/stxssa8555.dts
@@ -1,18 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* MPC8555-based STx GP3 Device Tree Source
*
* Copyright 2006, 2008 Freescale Semiconductor Inc.
*
* Copyright 2010 Silicon Turnkey Express LLC.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/dts-v1/;
+/include/ "fsl/e500v1_power_isa.dtsi"
+
/ {
model = "stx,gp3";
compatible = "stx,gp3-8560", "stx,gp3";
diff --git a/arch/powerpc/boot/dts/t4240emu.dts b/arch/powerpc/boot/dts/t4240emu.dts
deleted file mode 100644
index bc12127a03fb..000000000000
--- a/arch/powerpc/boot/dts/t4240emu.dts
+++ /dev/null
@@ -1,281 +0,0 @@
-/*
- * T4240 emulator Device Tree Source
- *
- * Copyright 2013 Freescale Semiconductor Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Freescale Semiconductor nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- *
- * ALTERNATIVELY, this software may be distributed under the terms of the
- * GNU General Public License ("GPL") as published by the Free Software
- * Foundation, either version 2 of that License or (at your option) any
- * later version.
- *
- * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor "AS IS" AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/dts-v1/;
-
-/include/ "fsl/e6500_power_isa.dtsi"
-/ {
- compatible = "fsl,T4240";
- #address-cells = <2>;
- #size-cells = <2>;
- interrupt-parent = <&mpic>;
-
- aliases {
- ccsr = &soc;
-
- serial0 = &serial0;
- serial1 = &serial1;
- serial2 = &serial2;
- serial3 = &serial3;
- dma0 = &dma0;
- dma1 = &dma1;
- };
-
- cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- cpu0: PowerPC,e6500@0 {
- device_type = "cpu";
- reg = <0 1>;
- next-level-cache = <&L2_1>;
- fsl,portid-mapping = <0x80000000>;
- };
- cpu1: PowerPC,e6500@2 {
- device_type = "cpu";
- reg = <2 3>;
- next-level-cache = <&L2_1>;
- fsl,portid-mapping = <0x80000000>;
- };
- cpu2: PowerPC,e6500@4 {
- device_type = "cpu";
- reg = <4 5>;
- next-level-cache = <&L2_1>;
- fsl,portid-mapping = <0x80000000>;
- };
- cpu3: PowerPC,e6500@6 {
- device_type = "cpu";
- reg = <6 7>;
- next-level-cache = <&L2_1>;
- fsl,portid-mapping = <0x80000000>;
- };
-
- cpu4: PowerPC,e6500@8 {
- device_type = "cpu";
- reg = <8 9>;
- next-level-cache = <&L2_2>;
- fsl,portid-mapping = <0x40000000>;
- };
- cpu5: PowerPC,e6500@10 {
- device_type = "cpu";
- reg = <10 11>;
- next-level-cache = <&L2_2>;
- fsl,portid-mapping = <0x40000000>;
- };
- cpu6: PowerPC,e6500@12 {
- device_type = "cpu";
- reg = <12 13>;
- next-level-cache = <&L2_2>;
- fsl,portid-mapping = <0x40000000>;
- };
- cpu7: PowerPC,e6500@14 {
- device_type = "cpu";
- reg = <14 15>;
- next-level-cache = <&L2_2>;
- fsl,portid-mapping = <0x40000000>;
- };
-
- cpu8: PowerPC,e6500@16 {
- device_type = "cpu";
- reg = <16 17>;
- next-level-cache = <&L2_3>;
- fsl,portid-mapping = <0x20000000>;
- };
- cpu9: PowerPC,e6500@18 {
- device_type = "cpu";
- reg = <18 19>;
- next-level-cache = <&L2_3>;
- fsl,portid-mapping = <0x20000000>;
- };
- cpu10: PowerPC,e6500@20 {
- device_type = "cpu";
- reg = <20 21>;
- next-level-cache = <&L2_3>;
- fsl,portid-mapping = <0x20000000>;
- };
- cpu11: PowerPC,e6500@22 {
- device_type = "cpu";
- reg = <22 23>;
- next-level-cache = <&L2_3>;
- fsl,portid-mapping = <0x20000000>;
- };
- };
-};
-
-/ {
- model = "fsl,T4240QDS";
- compatible = "fsl,T4240EMU", "fsl,T4240QDS";
- #address-cells = <2>;
- #size-cells = <2>;
- interrupt-parent = <&mpic>;
-
- ifc: localbus@ffe124000 {
- reg = <0xf 0xfe124000 0 0x2000>;
- ranges = <0 0 0xf 0xe8000000 0x08000000
- 2 0 0xf 0xff800000 0x00010000
- 3 0 0xf 0xffdf0000 0x00008000>;
-
- nor@0,0 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "cfi-flash";
- reg = <0x0 0x0 0x8000000>;
-
- bank-width = <2>;
- device-width = <1>;
- };
-
- };
-
- memory {
- device_type = "memory";
- };
-
- soc: soc@ffe000000 {
- ranges = <0x00000000 0xf 0xfe000000 0x1000000>;
- reg = <0xf 0xfe000000 0 0x00001000>;
-
- };
-};
-
-&ifc {
- #address-cells = <2>;
- #size-cells = <1>;
- compatible = "fsl,ifc", "simple-bus";
- interrupts = <25 2 0 0>;
-};
-
-&soc {
- #address-cells = <1>;
- #size-cells = <1>;
- device_type = "soc";
- compatible = "simple-bus";
-
- soc-sram-error {
- compatible = "fsl,soc-sram-error";
- interrupts = <16 2 1 29>;
- };
-
- corenet-law@0 {
- compatible = "fsl,corenet-law";
- reg = <0x0 0x1000>;
- fsl,num-laws = <32>;
- };
-
- ddr1: memory-controller@8000 {
- compatible = "fsl,qoriq-memory-controller-v4.7",
- "fsl,qoriq-memory-controller";
- reg = <0x8000 0x1000>;
- interrupts = <16 2 1 23>;
- };
-
- ddr2: memory-controller@9000 {
- compatible = "fsl,qoriq-memory-controller-v4.7",
- "fsl,qoriq-memory-controller";
- reg = <0x9000 0x1000>;
- interrupts = <16 2 1 22>;
- };
-
- ddr3: memory-controller@a000 {
- compatible = "fsl,qoriq-memory-controller-v4.7",
- "fsl,qoriq-memory-controller";
- reg = <0xa000 0x1000>;
- interrupts = <16 2 1 21>;
- };
-
- cpc: l3-cache-controller@10000 {
- compatible = "fsl,t4240-l3-cache-controller", "cache";
- reg = <0x10000 0x1000
- 0x11000 0x1000
- 0x12000 0x1000>;
- interrupts = <16 2 1 27
- 16 2 1 26
- 16 2 1 25>;
- };
-
- corenet-cf@18000 {
- compatible = "fsl,corenet2-cf", "fsl,corenet-cf";
- reg = <0x18000 0x1000>;
- interrupts = <16 2 1 31>;
- fsl,ccf-num-csdids = <32>;
- fsl,ccf-num-snoopids = <32>;
- };
-
- iommu@20000 {
- compatible = "fsl,pamu-v1.0", "fsl,pamu";
- reg = <0x20000 0x6000>;
- fsl,portid-mapping = <0x8000>;
- interrupts = <
- 24 2 0 0
- 16 2 1 30>;
- };
-
-/include/ "fsl/qoriq-mpic.dtsi"
-
- guts: global-utilities@e0000 {
- compatible = "fsl,t4240-device-config", "fsl,qoriq-device-config-2.0";
- reg = <0xe0000 0xe00>;
- fsl,has-rstcr;
- fsl,liodn-bits = <12>;
- };
-
- clockgen: global-utilities@e1000 {
- compatible = "fsl,t4240-clockgen", "fsl,qoriq-clockgen-2.0";
- reg = <0xe1000 0x1000>;
- };
-
-/include/ "fsl/qoriq-dma-0.dtsi"
-/include/ "fsl/qoriq-dma-1.dtsi"
-
-/include/ "fsl/qoriq-i2c-0.dtsi"
-/include/ "fsl/qoriq-i2c-1.dtsi"
-/include/ "fsl/qoriq-duart-0.dtsi"
-/include/ "fsl/qoriq-duart-1.dtsi"
-
- L2_1: l2-cache-controller@c20000 {
- compatible = "fsl,t4240-l2-cache-controller";
- reg = <0xc20000 0x40000>;
- next-level-cache = <&cpc>;
- };
- L2_2: l2-cache-controller@c60000 {
- compatible = "fsl,t4240-l2-cache-controller";
- reg = <0xc60000 0x40000>;
- next-level-cache = <&cpc>;
- };
- L2_3: l2-cache-controller@ca0000 {
- compatible = "fsl,t4240-l2-cache-controller";
- reg = <0xca0000 0x40000>;
- next-level-cache = <&cpc>;
- };
-};
diff --git a/arch/powerpc/boot/dts/taishan.dts b/arch/powerpc/boot/dts/taishan.dts
index 1657ad0bf8a6..803f1bff7fa8 100644
--- a/arch/powerpc/boot/dts/taishan.dts
+++ b/arch/powerpc/boot/dts/taishan.dts
@@ -422,6 +422,6 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@40000300";
+ stdout-path = "/plb/opb/serial@40000300";
};
};
diff --git a/arch/powerpc/boot/dts/tqm5200.dts b/arch/powerpc/boot/dts/tqm5200.dts
index 1db07f6cf133..372177b19e60 100644
--- a/arch/powerpc/boot/dts/tqm5200.dts
+++ b/arch/powerpc/boot/dts/tqm5200.dts
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* TQM5200 board Device Tree Source
*
* Copyright (C) 2007 Semihalf
* Marian Balakowicz <m8@semihalf.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/dts-v1/;
@@ -36,7 +32,7 @@
};
};
- memory {
+ memory@0 {
device_type = "memory";
reg = <0x00000000 0x04000000>; // 64MB
};
@@ -204,8 +200,8 @@
clock-frequency = <0>; // From boot loader
interrupts = <2 8 0 2 9 0 2 10 0>;
bus-range = <0 0>;
- ranges = <0x42000000 0 0x80000000 0x80000000 0 0x10000000
- 0x02000000 0 0x90000000 0x90000000 0 0x10000000
- 0x01000000 0 0x00000000 0xa0000000 0 0x01000000>;
+ ranges = <0x42000000 0 0x80000000 0x80000000 0 0x10000000>,
+ <0x02000000 0 0x90000000 0x90000000 0 0x10000000>,
+ <0x01000000 0 0x00000000 0xa0000000 0 0x01000000>;
};
};
diff --git a/arch/powerpc/boot/dts/tqm8540.dts b/arch/powerpc/boot/dts/tqm8540.dts
index 91cbd7acd276..eb4d8fd3f7aa 100644
--- a/arch/powerpc/boot/dts/tqm8540.dts
+++ b/arch/powerpc/boot/dts/tqm8540.dts
@@ -1,16 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* TQM 8540 Device Tree Source
*
* Copyright 2008 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/dts-v1/;
+/include/ "fsl/e500v1_power_isa.dtsi"
+
/ {
model = "tqc,tqm8540";
compatible = "tqc,tqm8540";
diff --git a/arch/powerpc/boot/dts/tqm8541.dts b/arch/powerpc/boot/dts/tqm8541.dts
index 84dce2d5fc48..fe5d3d873ec9 100644
--- a/arch/powerpc/boot/dts/tqm8541.dts
+++ b/arch/powerpc/boot/dts/tqm8541.dts
@@ -1,16 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* TQM 8541 Device Tree Source
*
* Copyright 2008 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/dts-v1/;
+/include/ "fsl/e500v1_power_isa.dtsi"
+
/ {
model = "tqc,tqm8541";
compatible = "tqc,tqm8541";
diff --git a/arch/powerpc/boot/dts/tqm8548-bigflash.dts b/arch/powerpc/boot/dts/tqm8548-bigflash.dts
index 7a333dd02d9c..caa36c5ef115 100644
--- a/arch/powerpc/boot/dts/tqm8548-bigflash.dts
+++ b/arch/powerpc/boot/dts/tqm8548-bigflash.dts
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* TQM8548 Device Tree Source
*
* Copyright 2006 Freescale Semiconductor Inc.
* Copyright 2008 Wolfgang Grandegger <wg@denx.de>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/dts-v1/;
diff --git a/arch/powerpc/boot/dts/tqm8548.dts b/arch/powerpc/boot/dts/tqm8548.dts
index c737caff10c7..12a64410f349 100644
--- a/arch/powerpc/boot/dts/tqm8548.dts
+++ b/arch/powerpc/boot/dts/tqm8548.dts
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* TQM8548 Device Tree Source
*
* Copyright 2006 Freescale Semiconductor Inc.
* Copyright 2008 Wolfgang Grandegger <wg@denx.de>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/dts-v1/;
diff --git a/arch/powerpc/boot/dts/tqm8555.dts b/arch/powerpc/boot/dts/tqm8555.dts
index d0416a5cdddf..4be05b7d225d 100644
--- a/arch/powerpc/boot/dts/tqm8555.dts
+++ b/arch/powerpc/boot/dts/tqm8555.dts
@@ -1,16 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* TQM 8555 Device Tree Source
*
* Copyright 2008 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/dts-v1/;
+/include/ "fsl/e500v1_power_isa.dtsi"
+
/ {
model = "tqc,tqm8555";
compatible = "tqc,tqm8555";
diff --git a/arch/powerpc/boot/dts/tqm8560.dts b/arch/powerpc/boot/dts/tqm8560.dts
index f9a11ebf736c..8ea48502420b 100644
--- a/arch/powerpc/boot/dts/tqm8560.dts
+++ b/arch/powerpc/boot/dts/tqm8560.dts
@@ -1,17 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* TQM 8560 Device Tree Source
*
* Copyright 2008 Freescale Semiconductor Inc.
* Copyright 2008 Wolfgang Grandegger <wg@grandegger.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/dts-v1/;
+/include/ "fsl/e500v1_power_isa.dtsi"
+
/ {
model = "tqc,tqm8560";
compatible = "tqc,tqm8560";
diff --git a/arch/powerpc/boot/dts/tqm8xx.dts b/arch/powerpc/boot/dts/tqm8xx.dts
index 3d1446b99c7e..d16cdfd81205 100644
--- a/arch/powerpc/boot/dts/tqm8xx.dts
+++ b/arch/powerpc/boot/dts/tqm8xx.dts
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* TQM8XX Device Tree Source
*
* Heiko Schocher <hs@denx.de>
* 2010 DENX Software Engineering GmbH
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/dts-v1/;
diff --git a/arch/powerpc/boot/dts/turris1x.dts b/arch/powerpc/boot/dts/turris1x.dts
new file mode 100644
index 000000000000..dff1ea074d9d
--- /dev/null
+++ b/arch/powerpc/boot/dts/turris1x.dts
@@ -0,0 +1,520 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Turris 1.x Device Tree Source
+ *
+ * Copyright 2013 - 2022 CZ.NIC z.s.p.o. (http://www.nic.cz/)
+ *
+ * Pinout, Schematics and Altium hardware design files are open source
+ * and available at: https://docs.turris.cz/hw/turris-1x/turris-1x/
+ */
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/interrupt-controller/irq.h>
+#include <dt-bindings/leds/common.h>
+/include/ "fsl/p2020si-pre.dtsi"
+
+/ {
+ model = "Turris 1.x";
+ compatible = "cznic,turris1x";
+
+ aliases {
+ ethernet0 = &enet0;
+ ethernet1 = &enet1;
+ ethernet2 = &enet2;
+ serial0 = &serial0;
+ serial1 = &serial1;
+ pci0 = &pci0;
+ pci1 = &pci1;
+ pci2 = &pci2;
+ spi0 = &spi0;
+ };
+
+ memory {
+ device_type = "memory";
+ };
+
+ soc: soc@ffe00000 {
+ ranges = <0x0 0x0 0xffe00000 0x00100000>;
+
+ i2c@3000 {
+ /* PCA9557PW GPIO controller for boot config */
+ gpio-controller@18 {
+ compatible = "nxp,pca9557";
+ label = "bootcfg";
+ reg = <0x18>;
+ #gpio-cells = <2>;
+ gpio-controller;
+ polarity = <0x00>;
+ };
+
+ /* STM32F030R8T6 MCU for power control */
+ power-control@2a {
+ /*
+ * Turris Power Control firmware runs on STM32F0 MCU.
+ * This firmware is open source and available at:
+ * https://gitlab.nic.cz/turris/hw/turris_power_control
+ */
+ reg = <0x2a>;
+ };
+
+ /* DDR3 SPD/EEPROM PSWP instruction */
+ eeprom@32 {
+ reg = <0x32>;
+ };
+
+ /* SA56004ED temperature control */
+ temperature-sensor@4c {
+ compatible = "nxp,sa56004";
+ reg = <0x4c>;
+ interrupt-parent = <&gpio>;
+ interrupts = <12 IRQ_TYPE_LEVEL_LOW>, /* GPIO12 - ALERT pin */
+ <13 IRQ_TYPE_LEVEL_LOW>; /* GPIO13 - CRIT pin */
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ /* Local temperature sensor (SA56004ED internal) */
+ channel@0 {
+ reg = <0>;
+ label = "board";
+ };
+
+ /* Remote temperature sensor (D+/D- connected to P2020 CPU Temperature Diode) */
+ channel@1 {
+ reg = <1>;
+ label = "cpu";
+ };
+ };
+
+ /* DDR3 SPD/EEPROM */
+ eeprom@52 {
+ compatible = "atmel,spd";
+ reg = <0x52>;
+ };
+
+ /* MCP79402-I/ST Protected EEPROM */
+ eeprom@57 {
+ reg = <0x57>;
+ };
+
+ /* ATSHA204-TH-DA-T crypto module */
+ crypto@64 {
+ compatible = "atmel,atsha204";
+ reg = <0x64>;
+ };
+
+ /* IDT6V49205BNLGI clock generator */
+ clock-generator@69 {
+ compatible = "idt,6v49205b";
+ reg = <0x69>;
+ };
+
+ /* MCP79402-I/ST RTC */
+ rtc@6f {
+ compatible = "microchip,mcp7940x";
+ reg = <0x6f>;
+ interrupt-parent = <&gpio>;
+ interrupts = <14 0>; /* GPIO14 - MFP pin */
+ };
+ };
+
+ /* SPI on connector P1 */
+ spi0: spi@7000 {
+ };
+
+ gpio: gpio-controller@fc00 {
+ #interrupt-cells = <2>;
+ interrupt-controller;
+ };
+
+ /* Connected to SMSC USB2412-DZK 2-Port USB 2.0 Hub Controller */
+ usb@22000 {
+ phy_type = "ulpi";
+ dr_mode = "host";
+ };
+
+ enet0: ethernet@24000 {
+ /* Connected to port 6 of QCA8337N-AL3C switch */
+ phy-connection-type = "rgmii-id";
+
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ };
+ };
+
+ mdio@24520 {
+ /* KSZ9031RNXCA ethernet phy for WAN port */
+ phy: ethernet-phy@7 {
+ interrupts = <3 1 0 0>;
+ reg = <0x7>;
+ };
+
+ /* QCA8337N-AL3C switch with integrated ethernet PHYs for LAN ports */
+ switch@10 {
+ compatible = "qca,qca8337";
+ interrupts = <2 1 0 0>;
+ reg = <0x10>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ label = "cpu";
+ ethernet = <&enet1>;
+ phy-mode = "rgmii-id";
+
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ label = "lan5";
+ };
+
+ port@2 {
+ reg = <2>;
+ label = "lan4";
+ };
+
+ port@3 {
+ reg = <3>;
+ label = "lan3";
+ };
+
+ port@4 {
+ reg = <4>;
+ label = "lan2";
+ };
+
+ port@5 {
+ reg = <5>;
+ label = "lan1";
+ };
+
+ port@6 {
+ reg = <6>;
+ label = "cpu";
+ ethernet = <&enet0>;
+ phy-mode = "rgmii-id";
+
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ };
+ };
+ };
+ };
+ };
+
+ ptp_clock@24e00 {
+ fsl,tclk-period = <5>;
+ fsl,tmr-prsc = <200>;
+ fsl,tmr-add = <0xcccccccd>;
+ fsl,tmr-fiper1 = <0x3b9ac9fb>;
+ fsl,tmr-fiper2 = <0x0001869b>;
+ fsl,max-adj = <249999999>;
+ };
+
+ enet1: ethernet@25000 {
+ /* Connected to port 0 of QCA8337N-AL3C switch */
+ phy-connection-type = "rgmii-id";
+
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ };
+ };
+
+ mdio@25520 {
+ status = "disabled";
+ };
+
+ enet2: ethernet@26000 {
+ /* Connected to KSZ9031RNXCA ethernet phy (WAN port) */
+ label = "wan";
+ phy-handle = <&phy>;
+ phy-connection-type = "rgmii-id";
+ };
+
+ mdio@26520 {
+ status = "disabled";
+ };
+
+ sdhc@2e000 {
+ bus-width = <4>;
+ cd-gpios = <&gpio 8 GPIO_ACTIVE_LOW>;
+ };
+ };
+
+ lbc: localbus@ffe05000 {
+ reg = <0 0xffe05000 0 0x1000>;
+
+ ranges = <0x0 0x0 0x0 0xef000000 0x01000000>, /* NOR */
+ <0x1 0x0 0x0 0xff800000 0x00040000>, /* NAND */
+ <0x3 0x0 0x0 0xffa00000 0x00020000>; /* CPLD */
+
+ /* S29GL128P90TFIR10 NOR */
+ nor@0,0 {
+ compatible = "cfi-flash";
+ reg = <0x0 0x0 0x01000000>;
+ bank-width = <2>;
+ device-width = <1>;
+
+ partitions {
+ compatible = "fixed-partitions";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ partition@0 {
+ /* 128 kB for Device Tree Blob */
+ reg = <0x00000000 0x00020000>;
+ label = "dtb";
+ };
+
+ partition@20000 {
+ /* 1.7 MB for Linux Kernel Image */
+ reg = <0x00020000 0x001a0000>;
+ label = "kernel";
+ };
+
+ partition@1c0000 {
+ /* 1.5 MB for Rescue JFFS2 Root File System */
+ reg = <0x001c0000 0x00180000>;
+ label = "rescue";
+ };
+
+ partition@340000 {
+ /* 11 MB for TAR.XZ Archive with Factory content of NAND Root File System */
+ reg = <0x00340000 0x00b00000>;
+ label = "factory";
+ };
+
+ partition@e40000 {
+ /* 768 kB for Certificates JFFS2 File System */
+ reg = <0x00e40000 0x000c0000>;
+ label = "certificates";
+ };
+
+ /* free unused space 0x00f00000-0x00f20000 */
+
+ partition@f20000 {
+ /* 128 kB for U-Boot Environment Variables */
+ reg = <0x00f20000 0x00020000>;
+ label = "u-boot-env";
+ };
+
+ partition@f40000 {
+ /* 768 kB for U-Boot Bootloader Image */
+ reg = <0x00f40000 0x000c0000>;
+ label = "u-boot";
+ };
+ };
+ };
+
+ /* MT29F2G08ABAEAWP:E NAND */
+ nand@1,0 {
+ compatible = "fsl,p2020-fcm-nand", "fsl,elbc-fcm-nand";
+ reg = <0x1 0x0 0x00040000>;
+ nand-ecc-mode = "soft";
+ nand-ecc-algo = "bch";
+
+ partitions {
+ compatible = "fixed-partitions";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ partition@0 {
+ /* 256 MB for UBI with one volume: UBIFS Root File System */
+ reg = <0x00000000 0x10000000>;
+ label = "rootfs";
+ };
+ };
+ };
+
+ /* LCMXO1200C-3FTN256C FPGA */
+ cpld@3,0 {
+ /*
+ * Turris CPLD firmware which runs on this Lattice FPGA,
+ * is extended version of P1021RDB-PC CPLD v4.1 firmware.
+ * It is backward compatible with its original version
+ * and the only extension is support for Turris LEDs.
+ * Turris CPLD firmware is open source and available at:
+ * https://gitlab.nic.cz/turris/hw/turris_cpld/-/blob/master/CZ_NIC_Router_CPLD.v
+ */
+ compatible = "cznic,turris1x-cpld", "fsl,p1021rdb-pc-cpld", "simple-bus", "syscon";
+ reg = <0x3 0x0 0x30>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0x0 0x3 0x0 0x00020000>;
+
+ /* MAX6370KA+T watchdog */
+ watchdog@2 {
+ /*
+ * CPLD firmware maps SET0, SET1 and SET2
+ * input logic of MAX6370KA+T chip to CPLD
+ * memory space at byte offset 0x2. WDI
+ * input logic is outside of the CPLD and
+ * connected via external GPIO.
+ */
+ compatible = "maxim,max6370";
+ reg = <0x02 0x01>;
+ gpios = <&gpio 11 GPIO_ACTIVE_LOW>;
+ };
+
+ reboot@d {
+ /*
+ * CPLD firmware which manages system reset and
+ * watchdog registers has bugs. It does not
+ * autoclear system reset register after change
+ * and watchdog ignores reset line on immediate
+ * succeeding reset cycle triggered by watchdog.
+ * These bugs have to be workarounded in U-Boot
+ * bootloader. So use system reset via syscon as
+ * a last resort because older U-Boot versions
+ * do not have workaround for watchdog.
+ *
+ * Reset method via rstcr's global-utilities
+ * (the preferred one) has priority level 128,
+ * watchdog has priority level 0 and default
+ * syscon-reboot priority level is 192.
+ *
+ * So define syscon-reboot with custom priority
+ * level 64 (between rstcr and watchdog) because
+ * rstcr should stay as default preferred reset
+ * method and reset via watchdog is more broken
+ * than system reset via syscon.
+ */
+ compatible = "syscon-reboot";
+ reg = <0x0d 0x01>;
+ offset = <0x0d>;
+ mask = <0x01>;
+ value = <0x01>;
+ priority = <64>;
+ };
+
+ led-controller@13 {
+ /*
+ * LEDs are controlled by CPLD firmware.
+ * All five LAN LEDs share common RGB settings
+ * and so it is not possible to set different
+ * colors on different LAN ports.
+ */
+ compatible = "cznic,turris1x-leds";
+ reg = <0x13 0x1d>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ multi-led@0 {
+ reg = <0x0>;
+ color = <LED_COLOR_ID_RGB>;
+ function = LED_FUNCTION_WAN;
+ };
+
+ multi-led@1 {
+ reg = <0x1>;
+ color = <LED_COLOR_ID_RGB>;
+ function = LED_FUNCTION_LAN;
+ function-enumerator = <5>;
+ };
+
+ multi-led@2 {
+ reg = <0x2>;
+ color = <LED_COLOR_ID_RGB>;
+ function = LED_FUNCTION_LAN;
+ function-enumerator = <4>;
+ };
+
+ multi-led@3 {
+ reg = <0x3>;
+ color = <LED_COLOR_ID_RGB>;
+ function = LED_FUNCTION_LAN;
+ function-enumerator = <3>;
+ };
+
+ multi-led@4 {
+ reg = <0x4>;
+ color = <LED_COLOR_ID_RGB>;
+ function = LED_FUNCTION_LAN;
+ function-enumerator = <2>;
+ };
+
+ multi-led@5 {
+ reg = <0x5>;
+ color = <LED_COLOR_ID_RGB>;
+ function = LED_FUNCTION_LAN;
+ function-enumerator = <1>;
+ };
+
+ multi-led@6 {
+ reg = <0x6>;
+ color = <LED_COLOR_ID_RGB>;
+ function = LED_FUNCTION_WLAN;
+ };
+
+ multi-led@7 {
+ reg = <0x7>;
+ color = <LED_COLOR_ID_RGB>;
+ function = LED_FUNCTION_POWER;
+ };
+ };
+ };
+ };
+
+ pci2: pcie@ffe08000 {
+ /*
+ * PCIe bus for on-board TUSB7340RKM USB 3.0 xHCI controller.
+ * This xHCI controller is available only on Turris 1.1 boards.
+ * Turris 1.0 boards have nothing connected to this PCIe bus,
+ * so system would see only PCIe Root Port of this PCIe Root
+ * Complex. TUSB7340RKM xHCI controller has four SuperSpeed
+ * channels. Channel 0 is connected to the front USB 3.0 port,
+ * channel 1 (but only USB 2.0 subset) to USB 2.0 pins on mPCIe
+ * slot 1 (CN5), channels 2 and 3 to connector P600.
+ *
+ * P2020 PCIe Root Port does not use PCIe MEM and xHCI controller
+ * uses 64kB + 8kB of PCIe MEM. No PCIe IO is used or required.
+ * So allocate 128kB of PCIe MEM for this PCIe bus.
+ */
+ reg = <0 0xffe08000 0 0x1000>;
+ ranges = <0x02000000 0x0 0xc0000000 0 0xc0000000 0x0 0x00020000>, /* MEM */
+ <0x01000000 0x0 0x00000000 0 0xffc20000 0x0 0x00010000>; /* IO */
+
+ pcie@0 {
+ ranges;
+ };
+ };
+
+ pci1: pcie@ffe09000 {
+ /* PCIe bus on mPCIe slot 2 (CN6) for expansion mPCIe card */
+ reg = <0 0xffe09000 0 0x1000>;
+ ranges = <0x02000000 0x0 0xa0000000 0 0xa0000000 0x0 0x20000000>, /* MEM */
+ <0x01000000 0x0 0x00000000 0 0xffc10000 0x0 0x00010000>; /* IO */
+
+ pcie@0 {
+ ranges;
+ };
+ };
+
+ pci0: pcie@ffe0a000 {
+ /*
+ * PCIe bus on mPCIe slot 1 (CN5) for expansion mPCIe card.
+ * Turris 1.1 boards have in this mPCIe slot additional USB 2.0
+ * pins via channel 1 of TUSB7340RKM xHCI controller and also
+ * additional SIM card slot, both for USB-based WWAN cards.
+ */
+ reg = <0 0xffe0a000 0 0x1000>;
+ ranges = <0x02000000 0x0 0x80000000 0 0x80000000 0x0 0x20000000>, /* MEM */
+ <0x01000000 0x0 0x00000000 0 0xffc00000 0x0 0x00010000>; /* IO */
+
+ pcie@0 {
+ ranges;
+ };
+ };
+};
+
+/include/ "fsl/p2020si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/uc101.dts b/arch/powerpc/boot/dts/uc101.dts
index 5c462194ef06..2e34d019178b 100644
--- a/arch/powerpc/boot/dts/uc101.dts
+++ b/arch/powerpc/boot/dts/uc101.dts
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Manroland uc101 board Device Tree Source
*
* Copyright (C) 2009 DENX Software Engineering GmbH
* Heiko Schocher <hs@denx.de>
* Copyright 2006-2007 Secret Lab Technologies Ltd.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
/include/ "mpc5200b.dtsi"
diff --git a/arch/powerpc/boot/dts/virtex440-ml507.dts b/arch/powerpc/boot/dts/virtex440-ml507.dts
deleted file mode 100644
index 391a4e299783..000000000000
--- a/arch/powerpc/boot/dts/virtex440-ml507.dts
+++ /dev/null
@@ -1,406 +0,0 @@
-/*
- * This file supports the Xilinx ML507 board with the 440 processor.
- * A reference design for the FPGA is provided at http://git.xilinx.com.
- *
- * (C) Copyright 2008 Xilinx, Inc.
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
- *
- * ---
- *
- * Device Tree Generator version: 1.1
- *
- * CAUTION: This file is automatically generated by libgen.
- * Version: Xilinx EDK 10.1.03 EDK_K_SP3.6
- *
- * XPS project directory: ml507_ppc440_emb_ref
- */
-
-/dts-v1/;
-
-/ {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "xlnx,virtex440";
- dcr-parent = <&ppc440_0>;
- model = "testing";
- DDR2_SDRAM: memory@0 {
- device_type = "memory";
- reg = < 0 0x10000000 >;
- } ;
- chosen {
- bootargs = "console=ttyS0 root=/dev/ram";
- linux,stdout-path = &RS232_Uart_1;
- } ;
- cpus {
- #address-cells = <1>;
- #cpus = <1>;
- #size-cells = <0>;
- ppc440_0: cpu@0 {
- clock-frequency = <400000000>;
- compatible = "PowerPC,440", "ibm,ppc440";
- d-cache-line-size = <0x20>;
- d-cache-size = <0x8000>;
- dcr-access-method = "native";
- dcr-controller ;
- device_type = "cpu";
- i-cache-line-size = <0x20>;
- i-cache-size = <0x8000>;
- model = "PowerPC,440";
- reg = <0>;
- timebase-frequency = <400000000>;
- xlnx,apu-control = <1>;
- xlnx,apu-udi-0 = <0>;
- xlnx,apu-udi-1 = <0>;
- xlnx,apu-udi-10 = <0>;
- xlnx,apu-udi-11 = <0>;
- xlnx,apu-udi-12 = <0>;
- xlnx,apu-udi-13 = <0>;
- xlnx,apu-udi-14 = <0>;
- xlnx,apu-udi-15 = <0>;
- xlnx,apu-udi-2 = <0>;
- xlnx,apu-udi-3 = <0>;
- xlnx,apu-udi-4 = <0>;
- xlnx,apu-udi-5 = <0>;
- xlnx,apu-udi-6 = <0>;
- xlnx,apu-udi-7 = <0>;
- xlnx,apu-udi-8 = <0>;
- xlnx,apu-udi-9 = <0>;
- xlnx,dcr-autolock-enable = <1>;
- xlnx,dcu-rd-ld-cache-plb-prio = <0>;
- xlnx,dcu-rd-noncache-plb-prio = <0>;
- xlnx,dcu-rd-touch-plb-prio = <0>;
- xlnx,dcu-rd-urgent-plb-prio = <0>;
- xlnx,dcu-wr-flush-plb-prio = <0>;
- xlnx,dcu-wr-store-plb-prio = <0>;
- xlnx,dcu-wr-urgent-plb-prio = <0>;
- xlnx,dma0-control = <0>;
- xlnx,dma0-plb-prio = <0>;
- xlnx,dma0-rxchannelctrl = <0x1010000>;
- xlnx,dma0-rxirqtimer = <0x3ff>;
- xlnx,dma0-txchannelctrl = <0x1010000>;
- xlnx,dma0-txirqtimer = <0x3ff>;
- xlnx,dma1-control = <0>;
- xlnx,dma1-plb-prio = <0>;
- xlnx,dma1-rxchannelctrl = <0x1010000>;
- xlnx,dma1-rxirqtimer = <0x3ff>;
- xlnx,dma1-txchannelctrl = <0x1010000>;
- xlnx,dma1-txirqtimer = <0x3ff>;
- xlnx,dma2-control = <0>;
- xlnx,dma2-plb-prio = <0>;
- xlnx,dma2-rxchannelctrl = <0x1010000>;
- xlnx,dma2-rxirqtimer = <0x3ff>;
- xlnx,dma2-txchannelctrl = <0x1010000>;
- xlnx,dma2-txirqtimer = <0x3ff>;
- xlnx,dma3-control = <0>;
- xlnx,dma3-plb-prio = <0>;
- xlnx,dma3-rxchannelctrl = <0x1010000>;
- xlnx,dma3-rxirqtimer = <0x3ff>;
- xlnx,dma3-txchannelctrl = <0x1010000>;
- xlnx,dma3-txirqtimer = <0x3ff>;
- xlnx,endian-reset = <0>;
- xlnx,generate-plb-timespecs = <1>;
- xlnx,icu-rd-fetch-plb-prio = <0>;
- xlnx,icu-rd-spec-plb-prio = <0>;
- xlnx,icu-rd-touch-plb-prio = <0>;
- xlnx,interconnect-imask = <0xffffffff>;
- xlnx,mplb-allow-lock-xfer = <1>;
- xlnx,mplb-arb-mode = <0>;
- xlnx,mplb-awidth = <0x20>;
- xlnx,mplb-counter = <0x500>;
- xlnx,mplb-dwidth = <0x80>;
- xlnx,mplb-max-burst = <8>;
- xlnx,mplb-native-dwidth = <0x80>;
- xlnx,mplb-p2p = <0>;
- xlnx,mplb-prio-dcur = <2>;
- xlnx,mplb-prio-dcuw = <3>;
- xlnx,mplb-prio-icu = <4>;
- xlnx,mplb-prio-splb0 = <1>;
- xlnx,mplb-prio-splb1 = <0>;
- xlnx,mplb-read-pipe-enable = <1>;
- xlnx,mplb-sync-tattribute = <0>;
- xlnx,mplb-wdog-enable = <1>;
- xlnx,mplb-write-pipe-enable = <1>;
- xlnx,mplb-write-post-enable = <1>;
- xlnx,num-dma = <1>;
- xlnx,pir = <0xf>;
- xlnx,ppc440mc-addr-base = <0>;
- xlnx,ppc440mc-addr-high = <0xfffffff>;
- xlnx,ppc440mc-arb-mode = <0>;
- xlnx,ppc440mc-bank-conflict-mask = <0xc00000>;
- xlnx,ppc440mc-control = <0xf810008f>;
- xlnx,ppc440mc-max-burst = <8>;
- xlnx,ppc440mc-prio-dcur = <2>;
- xlnx,ppc440mc-prio-dcuw = <3>;
- xlnx,ppc440mc-prio-icu = <4>;
- xlnx,ppc440mc-prio-splb0 = <1>;
- xlnx,ppc440mc-prio-splb1 = <0>;
- xlnx,ppc440mc-row-conflict-mask = <0x3ffe00>;
- xlnx,ppcdm-asyncmode = <0>;
- xlnx,ppcds-asyncmode = <0>;
- xlnx,user-reset = <0>;
- DMA0: sdma@80 {
- compatible = "xlnx,ll-dma-1.00.a";
- dcr-reg = < 0x80 0x11 >;
- interrupt-parent = <&xps_intc_0>;
- interrupts = < 10 2 11 2 >;
- } ;
- } ;
- } ;
- plb_v46_0: plb@0 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "xlnx,plb-v46-1.03.a", "simple-bus";
- ranges ;
- DIP_Switches_8Bit: gpio@81460000 {
- compatible = "xlnx,xps-gpio-1.00.a";
- interrupt-parent = <&xps_intc_0>;
- interrupts = < 7 2 >;
- reg = < 0x81460000 0x10000 >;
- xlnx,all-inputs = <1>;
- xlnx,all-inputs-2 = <0>;
- xlnx,dout-default = <0>;
- xlnx,dout-default-2 = <0>;
- xlnx,family = "virtex5";
- xlnx,gpio-width = <8>;
- xlnx,interrupt-present = <1>;
- xlnx,is-bidir = <1>;
- xlnx,is-bidir-2 = <1>;
- xlnx,is-dual = <0>;
- xlnx,tri-default = <0xffffffff>;
- xlnx,tri-default-2 = <0xffffffff>;
- } ;
- FLASH: flash@fc000000 {
- bank-width = <2>;
- compatible = "xlnx,xps-mch-emc-2.00.a", "cfi-flash";
- reg = < 0xfc000000 0x2000000 >;
- xlnx,family = "virtex5";
- xlnx,include-datawidth-matching-0 = <0x1>;
- xlnx,include-datawidth-matching-1 = <0x0>;
- xlnx,include-datawidth-matching-2 = <0x0>;
- xlnx,include-datawidth-matching-3 = <0x0>;
- xlnx,include-negedge-ioregs = <0x0>;
- xlnx,include-plb-ipif = <0x1>;
- xlnx,include-wrbuf = <0x1>;
- xlnx,max-mem-width = <0x10>;
- xlnx,mch-native-dwidth = <0x20>;
- xlnx,mch-plb-clk-period-ps = <0x2710>;
- xlnx,mch-splb-awidth = <0x20>;
- xlnx,mch0-accessbuf-depth = <0x10>;
- xlnx,mch0-protocol = <0x0>;
- xlnx,mch0-rddatabuf-depth = <0x10>;
- xlnx,mch1-accessbuf-depth = <0x10>;
- xlnx,mch1-protocol = <0x0>;
- xlnx,mch1-rddatabuf-depth = <0x10>;
- xlnx,mch2-accessbuf-depth = <0x10>;
- xlnx,mch2-protocol = <0x0>;
- xlnx,mch2-rddatabuf-depth = <0x10>;
- xlnx,mch3-accessbuf-depth = <0x10>;
- xlnx,mch3-protocol = <0x0>;
- xlnx,mch3-rddatabuf-depth = <0x10>;
- xlnx,mem0-width = <0x10>;
- xlnx,mem1-width = <0x20>;
- xlnx,mem2-width = <0x20>;
- xlnx,mem3-width = <0x20>;
- xlnx,num-banks-mem = <0x1>;
- xlnx,num-channels = <0x2>;
- xlnx,priority-mode = <0x0>;
- xlnx,synch-mem-0 = <0x0>;
- xlnx,synch-mem-1 = <0x0>;
- xlnx,synch-mem-2 = <0x0>;
- xlnx,synch-mem-3 = <0x0>;
- xlnx,synch-pipedelay-0 = <0x2>;
- xlnx,synch-pipedelay-1 = <0x2>;
- xlnx,synch-pipedelay-2 = <0x2>;
- xlnx,synch-pipedelay-3 = <0x2>;
- xlnx,tavdv-ps-mem-0 = <0x1adb0>;
- xlnx,tavdv-ps-mem-1 = <0x3a98>;
- xlnx,tavdv-ps-mem-2 = <0x3a98>;
- xlnx,tavdv-ps-mem-3 = <0x3a98>;
- xlnx,tcedv-ps-mem-0 = <0x1adb0>;
- xlnx,tcedv-ps-mem-1 = <0x3a98>;
- xlnx,tcedv-ps-mem-2 = <0x3a98>;
- xlnx,tcedv-ps-mem-3 = <0x3a98>;
- xlnx,thzce-ps-mem-0 = <0x88b8>;
- xlnx,thzce-ps-mem-1 = <0x1b58>;
- xlnx,thzce-ps-mem-2 = <0x1b58>;
- xlnx,thzce-ps-mem-3 = <0x1b58>;
- xlnx,thzoe-ps-mem-0 = <0x1b58>;
- xlnx,thzoe-ps-mem-1 = <0x1b58>;
- xlnx,thzoe-ps-mem-2 = <0x1b58>;
- xlnx,thzoe-ps-mem-3 = <0x1b58>;
- xlnx,tlzwe-ps-mem-0 = <0x88b8>;
- xlnx,tlzwe-ps-mem-1 = <0x0>;
- xlnx,tlzwe-ps-mem-2 = <0x0>;
- xlnx,tlzwe-ps-mem-3 = <0x0>;
- xlnx,twc-ps-mem-0 = <0x2af8>;
- xlnx,twc-ps-mem-1 = <0x3a98>;
- xlnx,twc-ps-mem-2 = <0x3a98>;
- xlnx,twc-ps-mem-3 = <0x3a98>;
- xlnx,twp-ps-mem-0 = <0x11170>;
- xlnx,twp-ps-mem-1 = <0x2ee0>;
- xlnx,twp-ps-mem-2 = <0x2ee0>;
- xlnx,twp-ps-mem-3 = <0x2ee0>;
- xlnx,xcl0-linesize = <0x4>;
- xlnx,xcl0-writexfer = <0x1>;
- xlnx,xcl1-linesize = <0x4>;
- xlnx,xcl1-writexfer = <0x1>;
- xlnx,xcl2-linesize = <0x4>;
- xlnx,xcl2-writexfer = <0x1>;
- xlnx,xcl3-linesize = <0x4>;
- xlnx,xcl3-writexfer = <0x1>;
- } ;
- Hard_Ethernet_MAC: xps-ll-temac@81c00000 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "xlnx,compound";
- ethernet@81c00000 {
- #address-cells = <1>;
- #size-cells = <0>;
- compatible = "xlnx,xps-ll-temac-1.01.b";
- device_type = "network";
- interrupt-parent = <&xps_intc_0>;
- interrupts = < 5 2 >;
- llink-connected = <&DMA0>;
- local-mac-address = [ 02 00 00 00 00 00 ];
- reg = < 0x81c00000 0x40 >;
- xlnx,bus2core-clk-ratio = <1>;
- xlnx,phy-type = <1>;
- xlnx,phyaddr = <1>;
- xlnx,rxcsum = <1>;
- xlnx,rxfifo = <0x1000>;
- xlnx,temac-type = <0>;
- xlnx,txcsum = <1>;
- xlnx,txfifo = <0x1000>;
- phy-handle = <&phy7>;
- clock-frequency = <100000000>;
- phy7: phy@7 {
- compatible = "marvell,88e1111";
- reg = <7>;
- } ;
- } ;
- } ;
- IIC_EEPROM: i2c@81600000 {
- compatible = "xlnx,xps-iic-2.00.a";
- interrupt-parent = <&xps_intc_0>;
- interrupts = < 6 2 >;
- reg = < 0x81600000 0x10000 >;
- xlnx,clk-freq = <0x5f5e100>;
- xlnx,family = "virtex5";
- xlnx,gpo-width = <0x1>;
- xlnx,iic-freq = <0x186a0>;
- xlnx,scl-inertial-delay = <0x0>;
- xlnx,sda-inertial-delay = <0x0>;
- xlnx,ten-bit-adr = <0x0>;
- } ;
- LEDs_8Bit: gpio@81400000 {
- compatible = "xlnx,xps-gpio-1.00.a";
- reg = < 0x81400000 0x10000 >;
- xlnx,all-inputs = <0>;
- xlnx,all-inputs-2 = <0>;
- xlnx,dout-default = <0>;
- xlnx,dout-default-2 = <0>;
- xlnx,family = "virtex5";
- xlnx,gpio-width = <8>;
- xlnx,interrupt-present = <0>;
- xlnx,is-bidir = <1>;
- xlnx,is-bidir-2 = <1>;
- xlnx,is-dual = <0>;
- xlnx,tri-default = <0xffffffff>;
- xlnx,tri-default-2 = <0xffffffff>;
- } ;
- LEDs_Positions: gpio@81420000 {
- compatible = "xlnx,xps-gpio-1.00.a";
- reg = < 0x81420000 0x10000 >;
- xlnx,all-inputs = <0>;
- xlnx,all-inputs-2 = <0>;
- xlnx,dout-default = <0>;
- xlnx,dout-default-2 = <0>;
- xlnx,family = "virtex5";
- xlnx,gpio-width = <5>;
- xlnx,interrupt-present = <0>;
- xlnx,is-bidir = <1>;
- xlnx,is-bidir-2 = <1>;
- xlnx,is-dual = <0>;
- xlnx,tri-default = <0xffffffff>;
- xlnx,tri-default-2 = <0xffffffff>;
- } ;
- Push_Buttons_5Bit: gpio@81440000 {
- compatible = "xlnx,xps-gpio-1.00.a";
- interrupt-parent = <&xps_intc_0>;
- interrupts = < 8 2 >;
- reg = < 0x81440000 0x10000 >;
- xlnx,all-inputs = <1>;
- xlnx,all-inputs-2 = <0>;
- xlnx,dout-default = <0>;
- xlnx,dout-default-2 = <0>;
- xlnx,family = "virtex5";
- xlnx,gpio-width = <5>;
- xlnx,interrupt-present = <1>;
- xlnx,is-bidir = <1>;
- xlnx,is-bidir-2 = <1>;
- xlnx,is-dual = <0>;
- xlnx,tri-default = <0xffffffff>;
- xlnx,tri-default-2 = <0xffffffff>;
- } ;
- RS232_Uart_1: serial@83e00000 {
- clock-frequency = <100000000>;
- compatible = "xlnx,xps-uart16550-2.00.b", "ns16550";
- current-speed = <9600>;
- device_type = "serial";
- interrupt-parent = <&xps_intc_0>;
- interrupts = < 9 2 >;
- reg = < 0x83e00000 0x10000 >;
- reg-offset = <0x1003>;
- reg-shift = <2>;
- xlnx,family = "virtex5";
- xlnx,has-external-rclk = <0>;
- xlnx,has-external-xin = <0>;
- xlnx,is-a-16550 = <1>;
- } ;
- SysACE_CompactFlash: sysace@83600000 {
- compatible = "xlnx,xps-sysace-1.00.a";
- interrupt-parent = <&xps_intc_0>;
- interrupts = < 4 2 >;
- reg = < 0x83600000 0x10000 >;
- xlnx,family = "virtex5";
- xlnx,mem-width = <0x10>;
- } ;
- xps_bram_if_cntlr_1: xps-bram-if-cntlr@ffff0000 {
- compatible = "xlnx,xps-bram-if-cntlr-1.00.a";
- reg = < 0xffff0000 0x10000 >;
- xlnx,family = "virtex5";
- } ;
- xps_intc_0: interrupt-controller@81800000 {
- #interrupt-cells = <2>;
- compatible = "xlnx,xps-intc-1.00.a";
- interrupt-controller ;
- reg = < 0x81800000 0x10000 >;
- xlnx,num-intr-inputs = <0xc>;
- } ;
- xps_timebase_wdt_1: xps-timebase-wdt@83a00000 {
- compatible = "xlnx,xps-timebase-wdt-1.00.b";
- interrupt-parent = <&xps_intc_0>;
- interrupts = < 2 0 1 2 >;
- reg = < 0x83a00000 0x10000 >;
- xlnx,family = "virtex5";
- xlnx,wdt-enable-once = <0>;
- xlnx,wdt-interval = <0x1e>;
- } ;
- xps_timer_1: timer@83c00000 {
- compatible = "xlnx,xps-timer-1.00.a";
- interrupt-parent = <&xps_intc_0>;
- interrupts = < 3 2 >;
- reg = < 0x83c00000 0x10000 >;
- xlnx,count-width = <0x20>;
- xlnx,family = "virtex5";
- xlnx,gen0-assert = <1>;
- xlnx,gen1-assert = <1>;
- xlnx,one-timer-only = <1>;
- xlnx,trig0-assert = <1>;
- xlnx,trig1-assert = <1>;
- } ;
- } ;
-} ;
diff --git a/arch/powerpc/boot/dts/virtex440-ml510.dts b/arch/powerpc/boot/dts/virtex440-ml510.dts
deleted file mode 100644
index 81a8dc2c6365..000000000000
--- a/arch/powerpc/boot/dts/virtex440-ml510.dts
+++ /dev/null
@@ -1,465 +0,0 @@
-/*
- * Xilinx ML510 Reference Design support
- *
- * This DTS file was created for the ml510_bsb1_pcores_ppc440 reference design.
- * The reference design contains a bug which prevent PCI DMA from working
- * properly. A description of the bug is given in the plbv46_pci section. It
- * needs to be fixed by the user until Xilinx updates their reference design.
- *
- * Copyright 2009, Roderick Colenbrander
- */
-
-/dts-v1/;
-/ {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "xlnx,ml510-ref-design", "xlnx,virtex440";
- dcr-parent = <&ppc440_0>;
- DDR2_SDRAM_DIMM0: memory@0 {
- device_type = "memory";
- reg = < 0x0 0x20000000 >;
- } ;
- alias {
- ethernet0 = &Hard_Ethernet_MAC;
- serial0 = &RS232_Uart_1;
- } ;
- chosen {
- bootargs = "console=ttyS0 root=/dev/ram";
- linux,stdout-path = "/plb@0/serial@83e00000";
- } ;
- cpus {
- #address-cells = <1>;
- #cpus = <0x1>;
- #size-cells = <0>;
- ppc440_0: cpu@0 {
- #address-cells = <1>;
- #size-cells = <1>;
- clock-frequency = <300000000>;
- compatible = "PowerPC,440", "ibm,ppc440";
- d-cache-line-size = <0x20>;
- d-cache-size = <0x8000>;
- dcr-access-method = "native";
- dcr-controller ;
- device_type = "cpu";
- i-cache-line-size = <0x20>;
- i-cache-size = <0x8000>;
- model = "PowerPC,440";
- reg = <0>;
- timebase-frequency = <300000000>;
- xlnx,apu-control = <0x2000>;
- xlnx,apu-udi-0 = <0x0>;
- xlnx,apu-udi-1 = <0x0>;
- xlnx,apu-udi-10 = <0x0>;
- xlnx,apu-udi-11 = <0x0>;
- xlnx,apu-udi-12 = <0x0>;
- xlnx,apu-udi-13 = <0x0>;
- xlnx,apu-udi-14 = <0x0>;
- xlnx,apu-udi-15 = <0x0>;
- xlnx,apu-udi-2 = <0x0>;
- xlnx,apu-udi-3 = <0x0>;
- xlnx,apu-udi-4 = <0x0>;
- xlnx,apu-udi-5 = <0x0>;
- xlnx,apu-udi-6 = <0x0>;
- xlnx,apu-udi-7 = <0x0>;
- xlnx,apu-udi-8 = <0x0>;
- xlnx,apu-udi-9 = <0x0>;
- xlnx,dcr-autolock-enable = <0x1>;
- xlnx,dcu-rd-ld-cache-plb-prio = <0x0>;
- xlnx,dcu-rd-noncache-plb-prio = <0x0>;
- xlnx,dcu-rd-touch-plb-prio = <0x0>;
- xlnx,dcu-rd-urgent-plb-prio = <0x0>;
- xlnx,dcu-wr-flush-plb-prio = <0x0>;
- xlnx,dcu-wr-store-plb-prio = <0x0>;
- xlnx,dcu-wr-urgent-plb-prio = <0x0>;
- xlnx,dma0-control = <0x0>;
- xlnx,dma0-plb-prio = <0x0>;
- xlnx,dma0-rxchannelctrl = <0x1010000>;
- xlnx,dma0-rxirqtimer = <0x3ff>;
- xlnx,dma0-txchannelctrl = <0x1010000>;
- xlnx,dma0-txirqtimer = <0x3ff>;
- xlnx,dma1-control = <0x0>;
- xlnx,dma1-plb-prio = <0x0>;
- xlnx,dma1-rxchannelctrl = <0x1010000>;
- xlnx,dma1-rxirqtimer = <0x3ff>;
- xlnx,dma1-txchannelctrl = <0x1010000>;
- xlnx,dma1-txirqtimer = <0x3ff>;
- xlnx,dma2-control = <0x0>;
- xlnx,dma2-plb-prio = <0x0>;
- xlnx,dma2-rxchannelctrl = <0x1010000>;
- xlnx,dma2-rxirqtimer = <0x3ff>;
- xlnx,dma2-txchannelctrl = <0x1010000>;
- xlnx,dma2-txirqtimer = <0x3ff>;
- xlnx,dma3-control = <0x0>;
- xlnx,dma3-plb-prio = <0x0>;
- xlnx,dma3-rxchannelctrl = <0x1010000>;
- xlnx,dma3-rxirqtimer = <0x3ff>;
- xlnx,dma3-txchannelctrl = <0x1010000>;
- xlnx,dma3-txirqtimer = <0x3ff>;
- xlnx,endian-reset = <0x0>;
- xlnx,generate-plb-timespecs = <0x1>;
- xlnx,icu-rd-fetch-plb-prio = <0x0>;
- xlnx,icu-rd-spec-plb-prio = <0x0>;
- xlnx,icu-rd-touch-plb-prio = <0x0>;
- xlnx,interconnect-imask = <0xffffffff>;
- xlnx,mplb-allow-lock-xfer = <0x1>;
- xlnx,mplb-arb-mode = <0x0>;
- xlnx,mplb-awidth = <0x20>;
- xlnx,mplb-counter = <0x500>;
- xlnx,mplb-dwidth = <0x80>;
- xlnx,mplb-max-burst = <0x8>;
- xlnx,mplb-native-dwidth = <0x80>;
- xlnx,mplb-p2p = <0x0>;
- xlnx,mplb-prio-dcur = <0x2>;
- xlnx,mplb-prio-dcuw = <0x3>;
- xlnx,mplb-prio-icu = <0x4>;
- xlnx,mplb-prio-splb0 = <0x1>;
- xlnx,mplb-prio-splb1 = <0x0>;
- xlnx,mplb-read-pipe-enable = <0x1>;
- xlnx,mplb-sync-tattribute = <0x0>;
- xlnx,mplb-wdog-enable = <0x1>;
- xlnx,mplb-write-pipe-enable = <0x1>;
- xlnx,mplb-write-post-enable = <0x1>;
- xlnx,num-dma = <0x0>;
- xlnx,pir = <0xf>;
- xlnx,ppc440mc-addr-base = <0x0>;
- xlnx,ppc440mc-addr-high = <0x1fffffff>;
- xlnx,ppc440mc-arb-mode = <0x0>;
- xlnx,ppc440mc-bank-conflict-mask = <0x1800000>;
- xlnx,ppc440mc-control = <0xf810008f>;
- xlnx,ppc440mc-max-burst = <0x8>;
- xlnx,ppc440mc-prio-dcur = <0x2>;
- xlnx,ppc440mc-prio-dcuw = <0x3>;
- xlnx,ppc440mc-prio-icu = <0x4>;
- xlnx,ppc440mc-prio-splb0 = <0x1>;
- xlnx,ppc440mc-prio-splb1 = <0x0>;
- xlnx,ppc440mc-row-conflict-mask = <0x7ffe00>;
- xlnx,ppcdm-asyncmode = <0x0>;
- xlnx,ppcds-asyncmode = <0x0>;
- xlnx,user-reset = <0x0>;
- } ;
- } ;
- plb_v46_0: plb@0 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "xlnx,plb-v46-1.03.a", "simple-bus";
- ranges ;
- FLASH: flash@fc000000 {
- bank-width = <2>;
- compatible = "xlnx,xps-mch-emc-2.00.a", "cfi-flash";
- reg = < 0xfc000000 0x2000000 >;
- xlnx,family = "virtex5";
- xlnx,include-datawidth-matching-0 = <0x1>;
- xlnx,include-datawidth-matching-1 = <0x0>;
- xlnx,include-datawidth-matching-2 = <0x0>;
- xlnx,include-datawidth-matching-3 = <0x0>;
- xlnx,include-negedge-ioregs = <0x0>;
- xlnx,include-plb-ipif = <0x1>;
- xlnx,include-wrbuf = <0x1>;
- xlnx,max-mem-width = <0x10>;
- xlnx,mch-native-dwidth = <0x20>;
- xlnx,mch-plb-clk-period-ps = <0x2710>;
- xlnx,mch-splb-awidth = <0x20>;
- xlnx,mch0-accessbuf-depth = <0x10>;
- xlnx,mch0-protocol = <0x0>;
- xlnx,mch0-rddatabuf-depth = <0x10>;
- xlnx,mch1-accessbuf-depth = <0x10>;
- xlnx,mch1-protocol = <0x0>;
- xlnx,mch1-rddatabuf-depth = <0x10>;
- xlnx,mch2-accessbuf-depth = <0x10>;
- xlnx,mch2-protocol = <0x0>;
- xlnx,mch2-rddatabuf-depth = <0x10>;
- xlnx,mch3-accessbuf-depth = <0x10>;
- xlnx,mch3-protocol = <0x0>;
- xlnx,mch3-rddatabuf-depth = <0x10>;
- xlnx,mem0-width = <0x10>;
- xlnx,mem1-width = <0x20>;
- xlnx,mem2-width = <0x20>;
- xlnx,mem3-width = <0x20>;
- xlnx,num-banks-mem = <0x1>;
- xlnx,num-channels = <0x2>;
- xlnx,priority-mode = <0x0>;
- xlnx,synch-mem-0 = <0x0>;
- xlnx,synch-mem-1 = <0x0>;
- xlnx,synch-mem-2 = <0x0>;
- xlnx,synch-mem-3 = <0x0>;
- xlnx,synch-pipedelay-0 = <0x2>;
- xlnx,synch-pipedelay-1 = <0x2>;
- xlnx,synch-pipedelay-2 = <0x2>;
- xlnx,synch-pipedelay-3 = <0x2>;
- xlnx,tavdv-ps-mem-0 = <0x1adb0>;
- xlnx,tavdv-ps-mem-1 = <0x3a98>;
- xlnx,tavdv-ps-mem-2 = <0x3a98>;
- xlnx,tavdv-ps-mem-3 = <0x3a98>;
- xlnx,tcedv-ps-mem-0 = <0x1adb0>;
- xlnx,tcedv-ps-mem-1 = <0x3a98>;
- xlnx,tcedv-ps-mem-2 = <0x3a98>;
- xlnx,tcedv-ps-mem-3 = <0x3a98>;
- xlnx,thzce-ps-mem-0 = <0x88b8>;
- xlnx,thzce-ps-mem-1 = <0x1b58>;
- xlnx,thzce-ps-mem-2 = <0x1b58>;
- xlnx,thzce-ps-mem-3 = <0x1b58>;
- xlnx,thzoe-ps-mem-0 = <0x1b58>;
- xlnx,thzoe-ps-mem-1 = <0x1b58>;
- xlnx,thzoe-ps-mem-2 = <0x1b58>;
- xlnx,thzoe-ps-mem-3 = <0x1b58>;
- xlnx,tlzwe-ps-mem-0 = <0x88b8>;
- xlnx,tlzwe-ps-mem-1 = <0x0>;
- xlnx,tlzwe-ps-mem-2 = <0x0>;
- xlnx,tlzwe-ps-mem-3 = <0x0>;
- xlnx,twc-ps-mem-0 = <0x1adb0>;
- xlnx,twc-ps-mem-1 = <0x3a98>;
- xlnx,twc-ps-mem-2 = <0x3a98>;
- xlnx,twc-ps-mem-3 = <0x3a98>;
- xlnx,twp-ps-mem-0 = <0x11170>;
- xlnx,twp-ps-mem-1 = <0x2ee0>;
- xlnx,twp-ps-mem-2 = <0x2ee0>;
- xlnx,twp-ps-mem-3 = <0x2ee0>;
- xlnx,xcl0-linesize = <0x4>;
- xlnx,xcl0-writexfer = <0x1>;
- xlnx,xcl1-linesize = <0x4>;
- xlnx,xcl1-writexfer = <0x1>;
- xlnx,xcl2-linesize = <0x4>;
- xlnx,xcl2-writexfer = <0x1>;
- xlnx,xcl3-linesize = <0x4>;
- xlnx,xcl3-writexfer = <0x1>;
- } ;
- Hard_Ethernet_MAC: xps-ll-temac@81c00000 {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "xlnx,compound";
- ethernet@81c00000 {
- compatible = "xlnx,xps-ll-temac-1.01.b";
- device_type = "network";
- interrupt-parent = <&xps_intc_0>;
- interrupts = < 8 2 >;
- llink-connected = <&Hard_Ethernet_MAC_fifo>;
- local-mac-address = [ 02 00 00 00 00 00 ];
- reg = < 0x81c00000 0x40 >;
- xlnx,bus2core-clk-ratio = <0x1>;
- xlnx,phy-type = <0x3>;
- xlnx,phyaddr = <0x1>;
- xlnx,rxcsum = <0x0>;
- xlnx,rxfifo = <0x8000>;
- xlnx,temac-type = <0x0>;
- xlnx,txcsum = <0x0>;
- xlnx,txfifo = <0x8000>;
- } ;
- } ;
- Hard_Ethernet_MAC_fifo: xps-ll-fifo@81a00000 {
- compatible = "xlnx,xps-ll-fifo-1.01.a";
- interrupt-parent = <&xps_intc_0>;
- interrupts = < 6 2 >;
- reg = < 0x81a00000 0x10000 >;
- xlnx,family = "virtex5";
- } ;
- IIC_EEPROM: i2c@81600000 {
- compatible = "xlnx,xps-iic-2.00.a";
- interrupt-parent = <&xps_intc_0>;
- interrupts = < 9 2 >;
- reg = < 0x81600000 0x10000 >;
- xlnx,clk-freq = <0x5f5e100>;
- xlnx,family = "virtex5";
- xlnx,gpo-width = <0x1>;
- xlnx,iic-freq = <0x186a0>;
- xlnx,scl-inertial-delay = <0x5>;
- xlnx,sda-inertial-delay = <0x5>;
- xlnx,ten-bit-adr = <0x0>;
- } ;
- LCD_OPTIONAL: gpio@81420000 {
- compatible = "xlnx,xps-gpio-1.00.a";
- reg = < 0x81420000 0x10000 >;
- xlnx,all-inputs = <0x0>;
- xlnx,all-inputs-2 = <0x0>;
- xlnx,dout-default = <0x0>;
- xlnx,dout-default-2 = <0x0>;
- xlnx,family = "virtex5";
- xlnx,gpio-width = <0xb>;
- xlnx,interrupt-present = <0x0>;
- xlnx,is-bidir = <0x1>;
- xlnx,is-bidir-2 = <0x1>;
- xlnx,is-dual = <0x0>;
- xlnx,tri-default = <0xffffffff>;
- xlnx,tri-default-2 = <0xffffffff>;
- } ;
- LEDs_4Bit: gpio@81400000 {
- compatible = "xlnx,xps-gpio-1.00.a";
- reg = < 0x81400000 0x10000 >;
- xlnx,all-inputs = <0x0>;
- xlnx,all-inputs-2 = <0x0>;
- xlnx,dout-default = <0x0>;
- xlnx,dout-default-2 = <0x0>;
- xlnx,family = "virtex5";
- xlnx,gpio-width = <0x4>;
- xlnx,interrupt-present = <0x0>;
- xlnx,is-bidir = <0x1>;
- xlnx,is-bidir-2 = <0x1>;
- xlnx,is-dual = <0x0>;
- xlnx,tri-default = <0xffffffff>;
- xlnx,tri-default-2 = <0xffffffff>;
- } ;
- RS232_Uart_1: serial@83e00000 {
- clock-frequency = <100000000>;
- compatible = "xlnx,xps-uart16550-2.00.b", "ns16550";
- current-speed = <9600>;
- device_type = "serial";
- interrupt-parent = <&xps_intc_0>;
- interrupts = < 11 2 >;
- reg = < 0x83e00000 0x10000 >;
- reg-offset = <0x1003>;
- reg-shift = <2>;
- xlnx,family = "virtex5";
- xlnx,has-external-rclk = <0x0>;
- xlnx,has-external-xin = <0x0>;
- xlnx,is-a-16550 = <0x1>;
- } ;
- SPI_EEPROM: xps-spi@feff8000 {
- compatible = "xlnx,xps-spi-2.00.b";
- interrupt-parent = <&xps_intc_0>;
- interrupts = < 10 2 >;
- reg = < 0xfeff8000 0x80 >;
- xlnx,family = "virtex5";
- xlnx,fifo-exist = <0x1>;
- xlnx,num-ss-bits = <0x1>;
- xlnx,num-transfer-bits = <0x8>;
- xlnx,sck-ratio = <0x80>;
- } ;
- SysACE_CompactFlash: sysace@83600000 {
- compatible = "xlnx,xps-sysace-1.00.a";
- interrupt-parent = <&xps_intc_0>;
- interrupts = < 7 2 >;
- reg = < 0x83600000 0x10000 >;
- xlnx,family = "virtex5";
- xlnx,mem-width = <0x10>;
- } ;
- plbv46_pci_0: plbv46-pci@85e00000 {
- #size-cells = <2>;
- #address-cells = <3>;
- compatible = "xlnx,plbv46-pci-1.03.a";
- device_type = "pci";
- reg = < 0x85e00000 0x10000 >;
-
- /*
- * The default ML510 BSB has C_IPIFBAR2PCIBAR_0 set to
- * 0 which means that a read/write to the memory mapped
- * i/o region (which starts at 0xa0000000) for pci
- * bar 0 on the plb side translates to 0.
- * It is important to set this value to 0xa0000000, so
- * that inbound and outbound pci transactions work
- * properly including DMA.
- */
- ranges = <0x02000000 0 0xa0000000 0xa0000000 0 0x20000000
- 0x01000000 0 0x00000000 0xf0000000 0 0x00010000>;
-
- #interrupt-cells = <1>;
- interrupt-parent = <&xps_intc_0>;
- interrupt-map-mask = <0xff00 0x0 0x0 0x7>;
- interrupt-map = <
- /* IRQ mapping for pci slots and ALI M1533
- * periperhals. In total there are 5 interrupt
- * lines connected to a xps_intc controller.
- * Four of them are PCI IRQ A, B, C, D and
- * which correspond to respectively xpx_intc
- * 5, 4, 3 and 2. The fifth interrupt line is
- * connected to the south bridge and this one
- * uses irq 1 and is active high instead of
- * active low.
- *
- * The M1533 contains various peripherals
- * including AC97 audio, a modem, USB, IDE and
- * some power management stuff. The modem
- * isn't connected on the ML510 and the power
- * management core also isn't used.
- */
-
- /* IDSEL 0x16 / dev=6, bus=0 / PCI slot 3 */
- 0x3000 0 0 1 &xps_intc_0 3 2
- 0x3000 0 0 2 &xps_intc_0 2 2
- 0x3000 0 0 3 &xps_intc_0 5 2
- 0x3000 0 0 4 &xps_intc_0 4 2
-
- /* IDSEL 0x13 / dev=3, bus=1 / PCI slot 4 */
- /*
- 0x11800 0 0 1 &xps_intc_0 5 0 2
- 0x11800 0 0 2 &xps_intc_0 4 0 2
- 0x11800 0 0 3 &xps_intc_0 3 0 2
- 0x11800 0 0 4 &xps_intc_0 2 0 2
- */
-
- /* According to the datasheet + schematic
- * ABCD [FPGA] of slot 5 is mapped to DABC.
- * Testing showed that at least A maps to B,
- * the mapping of the other pins is a guess
- * and for that reason the lines have been
- * commented out.
- */
- /* IDSEL 0x15 / dev=5, bus=0 / PCI slot 5 */
- 0x2800 0 0 1 &xps_intc_0 4 2
- /*
- 0x2800 0 0 2 &xps_intc_0 3 2
- 0x2800 0 0 3 &xps_intc_0 2 2
- 0x2800 0 0 4 &xps_intc_0 5 2
- */
-
- /* IDSEL 0x12 / dev=2, bus=1 / PCI slot 6 */
- /*
- 0x11000 0 0 1 &xps_intc_0 4 0 2
- 0x11000 0 0 2 &xps_intc_0 3 0 2
- 0x11000 0 0 3 &xps_intc_0 2 0 2
- 0x11000 0 0 4 &xps_intc_0 5 0 2
- */
-
- /* IDSEL 0x11 / dev=1, bus=0 / AC97 audio */
- 0x0800 0 0 1 &i8259 7 2
-
- /* IDSEL 0x1b / dev=11, bus=0 / IDE */
- 0x5800 0 0 1 &i8259 14 2
-
- /* IDSEL 0x1f / dev 15, bus=0 / 2x USB 1.1 */
- 0x7800 0 0 1 &i8259 7 2
- >;
- ali_m1533 {
- #size-cells = <1>;
- #address-cells = <2>;
- i8259: interrupt-controller@20 {
- reg = <1 0x20 2
- 1 0xa0 2
- 1 0x4d0 2>;
- interrupt-controller;
- device_type = "interrupt-controller";
- #address-cells = <0>;
- #interrupt-cells = <2>;
- compatible = "chrp,iic";
-
- /* south bridge irq is active high */
- interrupts = <1 3>;
- interrupt-parent = <&xps_intc_0>;
- };
- };
- } ;
- xps_bram_if_cntlr_1: xps-bram-if-cntlr@ffff0000 {
- compatible = "xlnx,xps-bram-if-cntlr-1.00.a";
- reg = < 0xffff0000 0x10000 >;
- xlnx,family = "virtex5";
- } ;
- xps_intc_0: interrupt-controller@81800000 {
- #interrupt-cells = <0x2>;
- compatible = "xlnx,xps-intc-1.00.a";
- interrupt-controller ;
- reg = < 0x81800000 0x10000 >;
- xlnx,num-intr-inputs = <0xc>;
- } ;
- xps_tft_0: tft@86e00000 {
- compatible = "xlnx,xps-tft-1.00.a";
- reg = < 0x86e00000 0x10000 >;
- xlnx,dcr-splb-slave-if = <0x1>;
- xlnx,default-tft-base-addr = <0x0>;
- xlnx,family = "virtex5";
- xlnx,i2c-slave-addr = <0x76>;
- xlnx,mplb-awidth = <0x20>;
- xlnx,mplb-dwidth = <0x80>;
- xlnx,mplb-native-dwidth = <0x40>;
- xlnx,mplb-smallest-slave = <0x20>;
- xlnx,tft-interface = <0x1>;
- } ;
- } ;
-} ;
diff --git a/arch/powerpc/boot/dts/walnut.dts b/arch/powerpc/boot/dts/walnut.dts
deleted file mode 100644
index 4a9f726ada13..000000000000
--- a/arch/powerpc/boot/dts/walnut.dts
+++ /dev/null
@@ -1,246 +0,0 @@
-/*
- * Device Tree Source for IBM Walnut
- *
- * Copyright 2007 IBM Corp.
- * Josh Boyer <jwboyer@linux.vnet.ibm.com>
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without
- * any warranty of any kind, whether express or implied.
- */
-
-/dts-v1/;
-
-/ {
- #address-cells = <1>;
- #size-cells = <1>;
- model = "ibm,walnut";
- compatible = "ibm,walnut";
- dcr-parent = <&{/cpus/cpu@0}>;
-
- aliases {
- ethernet0 = &EMAC;
- serial0 = &UART0;
- serial1 = &UART1;
- };
-
- cpus {
- #address-cells = <1>;
- #size-cells = <0>;
-
- cpu@0 {
- device_type = "cpu";
- model = "PowerPC,405GP";
- reg = <0x00000000>;
- clock-frequency = <200000000>; /* Filled in by zImage */
- timebase-frequency = <0>; /* Filled in by zImage */
- i-cache-line-size = <32>;
- d-cache-line-size = <32>;
- i-cache-size = <16384>;
- d-cache-size = <16384>;
- dcr-controller;
- dcr-access-method = "native";
- };
- };
-
- memory {
- device_type = "memory";
- reg = <0x00000000 0x00000000>; /* Filled in by zImage */
- };
-
- UIC0: interrupt-controller {
- compatible = "ibm,uic";
- interrupt-controller;
- cell-index = <0>;
- dcr-reg = <0x0c0 0x009>;
- #address-cells = <0>;
- #size-cells = <0>;
- #interrupt-cells = <2>;
- };
-
- plb {
- compatible = "ibm,plb3";
- #address-cells = <1>;
- #size-cells = <1>;
- ranges;
- clock-frequency = <0>; /* Filled in by zImage */
-
- SDRAM0: memory-controller {
- compatible = "ibm,sdram-405gp";
- dcr-reg = <0x010 0x002>;
- };
-
- MAL: mcmal {
- compatible = "ibm,mcmal-405gp", "ibm,mcmal";
- dcr-reg = <0x180 0x062>;
- num-tx-chans = <1>;
- num-rx-chans = <1>;
- interrupt-parent = <&UIC0>;
- interrupts = <
- 0xb 0x4 /* TXEOB */
- 0xc 0x4 /* RXEOB */
- 0xa 0x4 /* SERR */
- 0xd 0x4 /* TXDE */
- 0xe 0x4 /* RXDE */>;
- };
-
- POB0: opb {
- compatible = "ibm,opb-405gp", "ibm,opb";
- #address-cells = <1>;
- #size-cells = <1>;
- ranges = <0xef600000 0xef600000 0x00a00000>;
- dcr-reg = <0x0a0 0x005>;
- clock-frequency = <0>; /* Filled in by zImage */
-
- UART0: serial@ef600300 {
- device_type = "serial";
- compatible = "ns16550";
- reg = <0xef600300 0x00000008>;
- virtual-reg = <0xef600300>;
- clock-frequency = <0>; /* Filled in by zImage */
- current-speed = <9600>;
- interrupt-parent = <&UIC0>;
- interrupts = <0x0 0x4>;
- };
-
- UART1: serial@ef600400 {
- device_type = "serial";
- compatible = "ns16550";
- reg = <0xef600400 0x00000008>;
- virtual-reg = <0xef600400>;
- clock-frequency = <0>; /* Filled in by zImage */
- current-speed = <9600>;
- interrupt-parent = <&UIC0>;
- interrupts = <0x1 0x4>;
- };
-
- IIC: i2c@ef600500 {
- compatible = "ibm,iic-405gp", "ibm,iic";
- reg = <0xef600500 0x00000011>;
- interrupt-parent = <&UIC0>;
- interrupts = <0x2 0x4>;
- };
-
- GPIO: gpio@ef600700 {
- compatible = "ibm,gpio-405gp";
- reg = <0xef600700 0x00000020>;
- };
-
- EMAC: ethernet@ef600800 {
- device_type = "network";
- compatible = "ibm,emac-405gp", "ibm,emac";
- interrupt-parent = <&UIC0>;
- interrupts = <
- 0xf 0x4 /* Ethernet */
- 0x9 0x4 /* Ethernet Wake Up */>;
- local-mac-address = [000000000000]; /* Filled in by zImage */
- reg = <0xef600800 0x00000070>;
- mal-device = <&MAL>;
- mal-tx-channel = <0>;
- mal-rx-channel = <0>;
- cell-index = <0>;
- max-frame-size = <1500>;
- rx-fifo-size = <4096>;
- tx-fifo-size = <2048>;
- phy-mode = "rmii";
- phy-map = <0x00000001>;
- };
-
- };
-
- EBC0: ebc {
- compatible = "ibm,ebc-405gp", "ibm,ebc";
- dcr-reg = <0x012 0x002>;
- #address-cells = <2>;
- #size-cells = <1>;
- /* The ranges property is supplied by the bootwrapper
- * and is based on the firmware's configuration of the
- * EBC bridge
- */
- clock-frequency = <0>; /* Filled in by zImage */
-
- sram@0,0 {
- reg = <0x00000000 0x00000000 0x00080000>;
- };
-
- flash@0,80000 {
- compatible = "jedec-flash";
- bank-width = <1>;
- reg = <0x00000000 0x00080000 0x00080000>;
- #address-cells = <1>;
- #size-cells = <1>;
- partition@0 {
- label = "OpenBIOS";
- reg = <0x00000000 0x00080000>;
- read-only;
- };
- };
-
- nvram@1,0 {
- /* NVRAM and RTC */
- compatible = "ds1743-nvram";
- #bytes = <0x2000>;
- reg = <0x00000001 0x00000000 0x00002000>;
- };
-
- keyboard@2,0 {
- compatible = "intel,82C42PC";
- reg = <0x00000002 0x00000000 0x00000002>;
- };
-
- ir@3,0 {
- compatible = "ti,TIR2000PAG";
- reg = <0x00000003 0x00000000 0x00000010>;
- };
-
- fpga@7,0 {
- compatible = "Walnut-FPGA";
- reg = <0x00000007 0x00000000 0x00000010>;
- virtual-reg = <0xf0300005>;
- };
- };
-
- PCI0: pci@ec000000 {
- device_type = "pci";
- #interrupt-cells = <1>;
- #size-cells = <2>;
- #address-cells = <3>;
- compatible = "ibm,plb405gp-pci", "ibm,plb-pci";
- primary;
- reg = <0xeec00000 0x00000008 /* Config space access */
- 0xeed80000 0x00000004 /* IACK */
- 0xeed80000 0x00000004 /* Special cycle */
- 0xef480000 0x00000040>; /* Internal registers */
-
- /* Outbound ranges, one memory and one IO,
- * later cannot be changed. Chip supports a second
- * IO range but we don't use it for now
- */
- ranges = <0x02000000 0x00000000 0x80000000 0x80000000 0x00000000 0x20000000
- 0x01000000 0x00000000 0x00000000 0xe8000000 0x00000000 0x00010000>;
-
- /* Inbound 2GB range starting at 0 */
- dma-ranges = <0x42000000 0x0 0x0 0x0 0x0 0x80000000>;
-
- /* Walnut has all 4 IRQ pins tied together per slot */
- interrupt-map-mask = <0xf800 0x0 0x0 0x0>;
- interrupt-map = <
- /* IDSEL 1 */
- 0x800 0x0 0x0 0x0 &UIC0 0x1c 0x8
-
- /* IDSEL 2 */
- 0x1000 0x0 0x0 0x0 &UIC0 0x1d 0x8
-
- /* IDSEL 3 */
- 0x1800 0x0 0x0 0x0 &UIC0 0x1e 0x8
-
- /* IDSEL 4 */
- 0x2000 0x0 0x0 0x0 &UIC0 0x1f 0x8
- >;
- };
- };
-
- chosen {
- linux,stdout-path = "/plb/opb/serial@ef600300";
- };
-};
diff --git a/arch/powerpc/boot/dts/warp.dts b/arch/powerpc/boot/dts/warp.dts
index e576ee85c42f..aa62d08e97c2 100644
--- a/arch/powerpc/boot/dts/warp.dts
+++ b/arch/powerpc/boot/dts/warp.dts
@@ -238,7 +238,7 @@
/* This will create 52 and 53 */
at24@52 {
- compatible = "at,24c04";
+ compatible = "atmel,24c04";
reg = <0x52>;
};
};
@@ -258,14 +258,12 @@
};
power-leds {
- compatible = "gpio-leds";
+ compatible = "warp-power-leds";
green {
gpios = <&GPIO1 0 0>;
- default-state = "keep";
};
red {
gpios = <&GPIO1 1 0>;
- default-state = "keep";
};
};
@@ -304,6 +302,6 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@ef600300";
+ stdout-path = "/plb/opb/serial@ef600300";
};
};
diff --git a/arch/powerpc/boot/dts/wii.dts b/arch/powerpc/boot/dts/wii.dts
index 77528c9a8dbd..e46143c32308 100644
--- a/arch/powerpc/boot/dts/wii.dts
+++ b/arch/powerpc/boot/dts/wii.dts
@@ -1,18 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* arch/powerpc/boot/dts/wii.dts
*
* Nintendo Wii platform device tree source
* Copyright (C) 2008-2009 The GameCube Linux Team
* Copyright (C) 2008,2009 Albert Herranz
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
*/
/dts-v1/;
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
/*
* This is commented-out for now.
@@ -65,14 +62,14 @@
0x0d800000 0x0d800000 0x00800000>;
interrupt-parent = <&PIC0>;
- video@0c002000 {
+ video@c002000 {
compatible = "nintendo,hollywood-vi",
"nintendo,flipper-vi";
reg = <0x0c002000 0x100>;
interrupts = <8>;
};
- processor-interface@0c003000 {
+ processor-interface@c003000 {
compatible = "nintendo,hollywood-pi",
"nintendo,flipper-pi";
reg = <0x0c003000 0x100>;
@@ -84,7 +81,7 @@
};
};
- dsp@0c005000 {
+ dsp@c005000 {
#address-cells = <1>;
#size-cells = <1>;
compatible = "nintendo,hollywood-dsp",
@@ -93,14 +90,14 @@
interrupts = <6>;
};
- gamepad-controller@0d006400 {
+ gamepad-controller@d006400 {
compatible = "nintendo,hollywood-si",
"nintendo,flipper-si";
reg = <0x0d006400 0x100>;
interrupts = <3>;
};
- audio@0c006c00 {
+ audio@c006c00 {
compatible = "nintendo,hollywood-ai",
"nintendo,flipper-ai";
reg = <0x0d006c00 0x20>;
@@ -108,7 +105,7 @@
};
/* External Interface bus */
- exi@0d006800 {
+ exi@d006800 {
compatible = "nintendo,hollywood-exi",
"nintendo,flipper-exi";
reg = <0x0d006800 0x40>;
@@ -116,7 +113,7 @@
interrupts = <4>;
};
- usb@0d040000 {
+ usb@d040000 {
compatible = "nintendo,hollywood-usb-ehci",
"usb-ehci";
reg = <0x0d040000 0x100>;
@@ -124,7 +121,7 @@
interrupt-parent = <&PIC1>;
};
- usb@0d050000 {
+ usb@d050000 {
compatible = "nintendo,hollywood-usb-ohci",
"usb-ohci";
reg = <0x0d050000 0x100>;
@@ -132,7 +129,7 @@
interrupt-parent = <&PIC1>;
};
- usb@0d060000 {
+ usb@d060000 {
compatible = "nintendo,hollywood-usb-ohci",
"usb-ohci";
reg = <0x0d060000 0x100>;
@@ -140,7 +137,7 @@
interrupt-parent = <&PIC1>;
};
- sd@0d070000 {
+ sd@d070000 {
compatible = "nintendo,hollywood-sdhci",
"sdhci";
reg = <0x0d070000 0x200>;
@@ -148,7 +145,7 @@
interrupt-parent = <&PIC1>;
};
- sdio@0d080000 {
+ sdio@d080000 {
compatible = "nintendo,hollywood-sdhci",
"sdhci";
reg = <0x0d080000 0x200>;
@@ -156,14 +153,14 @@
interrupt-parent = <&PIC1>;
};
- ipc@0d000000 {
+ ipc@d000000 {
compatible = "nintendo,hollywood-ipc";
reg = <0x0d000000 0x10>;
interrupts = <30>;
interrupt-parent = <&PIC1>;
};
- PIC1: pic1@0d800030 {
+ PIC1: pic1@d800030 {
#interrupt-cells = <1>;
compatible = "nintendo,hollywood-pic";
reg = <0x0d800030 0x10>;
@@ -171,11 +168,30 @@
interrupts = <14>;
};
- GPIO: gpio@0d8000c0 {
+ srnprot@d800060 {
+ compatible = "nintendo,hollywood-srnprot";
+ reg = <0x0d800060 0x4>;
+ };
+
+ GPIO: gpio@d8000c0 {
#gpio-cells = <2>;
compatible = "nintendo,hollywood-gpio";
reg = <0x0d8000c0 0x40>;
gpio-controller;
+ ngpios = <24>;
+
+ gpio-line-names =
+ "POWER", "SHUTDOWN", "FAN", "DC_DC",
+ "DI_SPIN", "SLOT_LED", "EJECT_BTN", "SLOT_IN",
+ "SENSOR_BAR", "DO_EJECT", "EEP_CS", "EEP_CLK",
+ "EEP_MOSI", "EEP_MISO", "AVE_SCL", "AVE_SDA",
+ "DEBUG0", "DEBUG1", "DEBUG2", "DEBUG3",
+ "DEBUG4", "DEBUG5", "DEBUG6", "DEBUG7";
+
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ interrupts = <10>;
+ interrupt-parent = <&PIC1>;
/*
* This is commented out while a standard binding
@@ -203,16 +219,54 @@
*/
};
- control@0d800100 {
+ control@d800100 {
compatible = "nintendo,hollywood-control";
- reg = <0x0d800100 0x300>;
+ /*
+ * Both the address and length are wrong, according to
+ * Wiibrew this should be <0x0d800000 0x400>, but it
+ * requires refactoring the PIC1, GPIO and OTP nodes
+ * before changing that.
+ */
+ reg = <0x0d800100 0xa0>;
+ };
+
+ otp@d8001ec {
+ compatible = "nintendo,hollywood-otp";
+ reg = <0x0d8001ec 0x8>;
};
- disk@0d806000 {
+ disk@d806000 {
compatible = "nintendo,hollywood-di";
reg = <0x0d806000 0x40>;
interrupts = <2>;
};
};
+
+ gpio-leds {
+ compatible = "gpio-leds";
+
+ /* This is the blue LED in the disk drive slot */
+ drive-slot {
+ label = "wii:blue:drive_slot";
+ gpios = <&GPIO 5 GPIO_ACTIVE_HIGH>;
+ panic-indicator;
+ };
+ };
+
+ gpio-keys {
+ compatible = "gpio-keys";
+
+ power {
+ label = "Power Button";
+ gpios = <&GPIO 0 GPIO_ACTIVE_HIGH>;
+ linux,code = <KEY_POWER>;
+ };
+
+ eject {
+ label = "Eject Button";
+ gpios = <&GPIO 6 GPIO_ACTIVE_HIGH>;
+ linux,code = <KEY_EJECTCD>;
+ };
+ };
};
diff --git a/arch/powerpc/boot/dts/xcalibur1501.dts b/arch/powerpc/boot/dts/xcalibur1501.dts
index c409cbafb126..46c25bda9515 100644
--- a/arch/powerpc/boot/dts/xcalibur1501.dts
+++ b/arch/powerpc/boot/dts/xcalibur1501.dts
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2008 Extreme Engineering Solutions, Inc.
* Based on MPC8572DS device tree from Freescale Semiconductor, Inc.
*
* XCalibur1501 6U CompactPCI single-board computer based on MPC8572E
- *
- * This is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
/dts-v1/;
@@ -238,7 +235,7 @@
};
rtc@68 {
- compatible = "stm,m41t00",
+ compatible = "st,m41t00",
"dallas,ds1338";
reg = <0x68>;
};
diff --git a/arch/powerpc/boot/dts/xpedite5200.dts b/arch/powerpc/boot/dts/xpedite5200.dts
index 8fd7b7031357..74b346f2d43c 100644
--- a/arch/powerpc/boot/dts/xpedite5200.dts
+++ b/arch/powerpc/boot/dts/xpedite5200.dts
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2009 Extreme Engineering Solutions, Inc.
* Based on TQM8548 device tree
*
* XPedite5200 PrPMC/XMC module based on MPC8548E
- *
- * This is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
/dts-v1/;
@@ -130,12 +127,12 @@
};
rtc@68 {
- compatible = "stm,m41t00",
+ compatible = "st,m41t00",
"dallas,ds1338";
reg = <0x68>;
};
- dtt@48 {
+ dtt@34 {
compatible = "maxim,max1237";
reg = <0x34>;
};
diff --git a/arch/powerpc/boot/dts/xpedite5200_xmon.dts b/arch/powerpc/boot/dts/xpedite5200_xmon.dts
index 0baa8283d08c..d491c7a8f979 100644
--- a/arch/powerpc/boot/dts/xpedite5200_xmon.dts
+++ b/arch/powerpc/boot/dts/xpedite5200_xmon.dts
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2009 Extreme Engineering Solutions, Inc.
* Based on TQM8548 device tree
*
* XPedite5200 PrPMC/XMC module based on MPC8548E. This dts is for the
* xMon boot loader memory map which differs from U-Boot's.
- *
- * This is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
/dts-v1/;
@@ -134,12 +131,12 @@
};
rtc@68 {
- compatible = "stm,m41t00",
+ compatible = "st,m41t00",
"dallas,ds1338";
reg = <0x68>;
};
- dtt@48 {
+ dtt@34 {
compatible = "maxim,max1237";
reg = <0x34>;
};
@@ -503,6 +500,6 @@
/* Needed for dtbImage boot wrapper compatibility */
chosen {
- linux,stdout-path = &serial0;
+ stdout-path = &serial0;
};
};
diff --git a/arch/powerpc/boot/dts/xpedite5301.dts b/arch/powerpc/boot/dts/xpedite5301.dts
index 04cb410da48b..12184e179638 100644
--- a/arch/powerpc/boot/dts/xpedite5301.dts
+++ b/arch/powerpc/boot/dts/xpedite5301.dts
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2008 Extreme Engineering Solutions, Inc.
* Based on MPC8572DS device tree from Freescale Semiconductor, Inc.
*
* XPedite5301 PMC/XMC module based on MPC8572E
- *
- * This is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
/dts-v1/;
@@ -231,7 +228,7 @@
};
rtc@68 {
- compatible = "stm,m41t00",
+ compatible = "st,m41t00",
"dallas,ds1338";
reg = <0x68>;
};
diff --git a/arch/powerpc/boot/dts/xpedite5330.dts b/arch/powerpc/boot/dts/xpedite5330.dts
index 73f8620f1ce7..e8fc90c52ad6 100644
--- a/arch/powerpc/boot/dts/xpedite5330.dts
+++ b/arch/powerpc/boot/dts/xpedite5330.dts
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2008 Extreme Engineering Solutions, Inc.
* Based on MPC8572DS device tree from Freescale Semiconductor, Inc.
*
* XPedite5330 3U CompactPCI module based on MPC8572E
- *
- * This is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
/dts-v1/;
@@ -267,7 +264,7 @@
};
rtc@68 {
- compatible = "stm,m41t00",
+ compatible = "st,m41t00",
"dallas,ds1338";
reg = <0x68>;
};
diff --git a/arch/powerpc/boot/dts/xpedite5370.dts b/arch/powerpc/boot/dts/xpedite5370.dts
index cd0ea2b99362..2b5aa2f3a709 100644
--- a/arch/powerpc/boot/dts/xpedite5370.dts
+++ b/arch/powerpc/boot/dts/xpedite5370.dts
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2008 Extreme Engineering Solutions, Inc.
* Based on MPC8572DS device tree from Freescale Semiconductor, Inc.
*
* XPedite5370 3U VPX single-board computer based on MPC8572E
- *
- * This is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
/dts-v1/;
@@ -229,7 +226,7 @@
};
rtc@68 {
- compatible = "stm,m41t00",
+ compatible = "st,m41t00",
"dallas,ds1338";
reg = <0x68>;
};
diff --git a/arch/powerpc/boot/dts/yosemite.dts b/arch/powerpc/boot/dts/yosemite.dts
index 30bb4753577a..56508785ce13 100644
--- a/arch/powerpc/boot/dts/yosemite.dts
+++ b/arch/powerpc/boot/dts/yosemite.dts
@@ -327,6 +327,6 @@
};
chosen {
- linux,stdout-path = "/plb/opb/serial@ef600300";
+ stdout-path = "/plb/opb/serial@ef600300";
};
};
diff --git a/arch/powerpc/boot/dummy.c b/arch/powerpc/boot/dummy.c
deleted file mode 100644
index 31dbf45bf99c..000000000000
--- a/arch/powerpc/boot/dummy.c
+++ /dev/null
@@ -1,4 +0,0 @@
-int main(void)
-{
- return 0;
-}
diff --git a/arch/powerpc/boot/ebony.c b/arch/powerpc/boot/ebony.c
index 5532ab3221dd..add2316d34d5 100644
--- a/arch/powerpc/boot/ebony.c
+++ b/arch/powerpc/boot/ebony.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright 2007 David Gibson, IBM Corporation.
*
@@ -9,11 +10,6 @@
*
* Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net>
* Copyright (c) 2003, 2004 Zultys Technologies
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <stdarg.h>
#include <stddef.h>
diff --git a/arch/powerpc/boot/elf.h b/arch/powerpc/boot/elf.h
index 1941bc50d4c5..f6aa7c20fcaf 100644
--- a/arch/powerpc/boot/elf.h
+++ b/arch/powerpc/boot/elf.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _PPC_BOOT_ELF_H_
#define _PPC_BOOT_ELF_H_
diff --git a/arch/powerpc/boot/elf_util.c b/arch/powerpc/boot/elf_util.c
index 316552dea4d8..9e6cbdfdc172 100644
--- a/arch/powerpc/boot/elf_util.c
+++ b/arch/powerpc/boot/elf_util.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) Paul Mackerras 1997.
*
* Updates for PPC64 by Todd Inglett, Dave Engebretsen & Peter Bergner.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <stdarg.h>
#include <stddef.h>
diff --git a/arch/powerpc/boot/ep405.c b/arch/powerpc/boot/ep405.c
deleted file mode 100644
index 2d08a862cbea..000000000000
--- a/arch/powerpc/boot/ep405.c
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Embedded Planet EP405 with PlanetCore firmware
- *
- * (c) Benjamin Herrenschmidt <benh@kernel.crashing.org>, IBM Corp,\
- *
- * Based on ep88xc.c by
- *
- * Scott Wood <scottwood@freescale.com>
- *
- * Copyright (c) 2007 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- */
-
-#include "ops.h"
-#include "stdio.h"
-#include "planetcore.h"
-#include "dcr.h"
-#include "4xx.h"
-#include "io.h"
-
-static char *table;
-static u64 mem_size;
-
-static void platform_fixups(void)
-{
- u64 val;
- void *nvrtc;
-
- dt_fixup_memory(0, mem_size);
- planetcore_set_mac_addrs(table);
-
- if (!planetcore_get_decimal(table, PLANETCORE_KEY_CRYSTAL_HZ, &val)) {
- printf("No PlanetCore crystal frequency key.\r\n");
- return;
- }
- ibm405gp_fixup_clocks(val, 0xa8c000);
- ibm4xx_quiesce_eth((u32 *)0xef600800, NULL);
- ibm4xx_fixup_ebc_ranges("/plb/ebc");
-
- if (!planetcore_get_decimal(table, PLANETCORE_KEY_KB_NVRAM, &val)) {
- printf("No PlanetCore NVRAM size key.\r\n");
- return;
- }
- nvrtc = finddevice("/plb/ebc/nvrtc@4,200000");
- if (nvrtc != NULL) {
- u32 reg[3] = { 4, 0x200000, 0};
- getprop(nvrtc, "reg", reg, 3);
- reg[2] = (val << 10) & 0xffffffff;
- setprop(nvrtc, "reg", reg, 3);
- }
-}
-
-void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
- unsigned long r6, unsigned long r7)
-{
- table = (char *)r3;
- planetcore_prepare_table(table);
-
- if (!planetcore_get_decimal(table, PLANETCORE_KEY_MB_RAM, &mem_size))
- return;
-
- mem_size *= 1024 * 1024;
- simple_alloc_init(_end, mem_size - (unsigned long)_end, 32, 64);
-
- fdt_init(_dtb_start);
-
- planetcore_set_stdout_path(table);
-
- serial_console_init();
- platform_ops.fixups = platform_fixups;
-}
diff --git a/arch/powerpc/boot/ep8248e.c b/arch/powerpc/boot/ep8248e.c
index f57d14d0272b..2ab9e0d8ca80 100644
--- a/arch/powerpc/boot/ep8248e.c
+++ b/arch/powerpc/boot/ep8248e.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Embedded Planet EP8248E with PlanetCore firmware
*
* Author: Scott Wood <scottwood@freescale.com>
*
* Copyright (c) 2007 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
*/
#include "ops.h"
diff --git a/arch/powerpc/boot/ep88xc.c b/arch/powerpc/boot/ep88xc.c
index a400f5407155..1c277a13b368 100644
--- a/arch/powerpc/boot/ep88xc.c
+++ b/arch/powerpc/boot/ep88xc.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Embedded Planet EP88xC with PlanetCore firmware
*
* Author: Scott Wood <scottwood@freescale.com>
*
* Copyright (c) 2007 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
*/
#include "ops.h"
diff --git a/arch/powerpc/boot/epapr-wrapper.c b/arch/powerpc/boot/epapr-wrapper.c
index c10191006673..01262f50b769 100644
--- a/arch/powerpc/boot/epapr-wrapper.c
+++ b/arch/powerpc/boot/epapr-wrapper.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
extern void epapr_platform_init(unsigned long r3, unsigned long r4,
unsigned long r5, unsigned long r6,
unsigned long r7);
diff --git a/arch/powerpc/boot/epapr.c b/arch/powerpc/boot/epapr.c
index 02e91aa2194a..7c5b26ade6c4 100644
--- a/arch/powerpc/boot/epapr.c
+++ b/arch/powerpc/boot/epapr.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Bootwrapper for ePAPR compliant firmwares
*
@@ -8,10 +9,6 @@
* and
* Scott Wood <scottwood@freescale.com>
* Copyright (c) 2007 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
*/
#include "ops.h"
diff --git a/arch/powerpc/boot/fixed-head.S b/arch/powerpc/boot/fixed-head.S
index 8e14cd9e1a54..4346c750cac1 100644
--- a/arch/powerpc/boot/fixed-head.S
+++ b/arch/powerpc/boot/fixed-head.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
.text
.global _zimage_start
_zimage_start:
diff --git a/arch/powerpc/boot/fixup-headers.sed b/arch/powerpc/boot/fixup-headers.sed
new file mode 100644
index 000000000000..96362428eb37
--- /dev/null
+++ b/arch/powerpc/boot/fixup-headers.sed
@@ -0,0 +1,12 @@
+# Copyright 2016 IBM Corporation.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 or later as
+# published by the Free Software Foundation.
+
+s@#include <linux/decompress/mm\.h>@@;
+s@\"zlib_inflate/\([^\"]*\).*@"\1"@;
+s@<linux/kernel.h>@<stddef.h>@;
+
+s@__used@@;
+s@<linux/\([^>]*\).*@"\1"@;
diff --git a/arch/powerpc/boot/fsl-soc.c b/arch/powerpc/boot/fsl-soc.c
index b835ed69e1a1..01bad8ea62ee 100644
--- a/arch/powerpc/boot/fsl-soc.c
+++ b/arch/powerpc/boot/fsl-soc.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Freescale SOC support functions
*
* Author: Scott Wood <scottwood@freescale.com>
*
* Copyright (c) 2007 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
*/
#include "ops.h"
diff --git a/arch/powerpc/boot/fsl-soc.h b/arch/powerpc/boot/fsl-soc.h
index 5da26fc6e3cf..00b2cb89ff2f 100644
--- a/arch/powerpc/boot/fsl-soc.h
+++ b/arch/powerpc/boot/fsl-soc.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _PPC_BOOT_FSL_SOC_H_
#define _PPC_BOOT_FSL_SOC_H_
diff --git a/arch/powerpc/boot/gamecube-head.S b/arch/powerpc/boot/gamecube-head.S
index 65a9b2a3bf33..ccf5f1045e4a 100644
--- a/arch/powerpc/boot/gamecube-head.S
+++ b/arch/powerpc/boot/gamecube-head.S
@@ -1,15 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* arch/powerpc/boot/gamecube-head.S
*
* Nintendo GameCube bootwrapper entry.
* Copyright (C) 2004-2009 The GameCube Linux Team
* Copyright (C) 2008,2009 Albert Herranz
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
*/
#include "ppc_asm.h"
diff --git a/arch/powerpc/boot/gamecube.c b/arch/powerpc/boot/gamecube.c
index 28ae7057be5e..d030612fdd74 100644
--- a/arch/powerpc/boot/gamecube.c
+++ b/arch/powerpc/boot/gamecube.c
@@ -1,15 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* arch/powerpc/boot/gamecube.c
*
* Nintendo GameCube bootwrapper support
* Copyright (C) 2004-2009 The GameCube Linux Team
* Copyright (C) 2008,2009 Albert Herranz
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
*/
#include <stddef.h>
diff --git a/arch/powerpc/boot/gunzip_util.c b/arch/powerpc/boot/gunzip_util.c
deleted file mode 100644
index 9dc52501de83..000000000000
--- a/arch/powerpc/boot/gunzip_util.c
+++ /dev/null
@@ -1,204 +0,0 @@
-/*
- * Copyright 2007 David Gibson, IBM Corporation.
- * Based on earlier work, Copyright (C) Paul Mackerras 1997.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <stddef.h>
-#include "string.h"
-#include "stdio.h"
-#include "ops.h"
-#include "gunzip_util.h"
-
-#define HEAD_CRC 2
-#define EXTRA_FIELD 4
-#define ORIG_NAME 8
-#define COMMENT 0x10
-#define RESERVED 0xe0
-
-/**
- * gunzip_start - prepare to decompress gzip data
- * @state: decompressor state structure to be initialized
- * @src: buffer containing gzip compressed or uncompressed data
- * @srclen: size in bytes of the buffer at src
- *
- * If the buffer at @src contains a gzip header, this function
- * initializes zlib to decompress the data, storing the decompression
- * state in @state. The other functions in this file can then be used
- * to decompress data from the gzipped stream.
- *
- * If the buffer at @src does not contain a gzip header, it is assumed
- * to contain uncompressed data. The buffer information is recorded
- * in @state and the other functions in this file will simply copy
- * data from the uncompressed data stream at @src.
- *
- * Any errors, such as bad compressed data, cause an error to be
- * printed an the platform's exit() function to be called.
- */
-void gunzip_start(struct gunzip_state *state, void *src, int srclen)
-{
- char *hdr = src;
- int hdrlen = 0;
-
- memset(state, 0, sizeof(*state));
-
- /* Check for gzip magic number */
- if ((hdr[0] == 0x1f) && (hdr[1] == 0x8b)) {
- /* gzip data, initialize zlib parameters */
- int r, flags;
-
- state->s.workspace = state->scratch;
- if (zlib_inflate_workspacesize() > sizeof(state->scratch))
- fatal("insufficient scratch space for gunzip\n\r");
-
- /* skip header */
- hdrlen = 10;
- flags = hdr[3];
- if (hdr[2] != Z_DEFLATED || (flags & RESERVED) != 0)
- fatal("bad gzipped data\n\r");
- if ((flags & EXTRA_FIELD) != 0)
- hdrlen = 12 + hdr[10] + (hdr[11] << 8);
- if ((flags & ORIG_NAME) != 0)
- while (hdr[hdrlen++] != 0)
- ;
- if ((flags & COMMENT) != 0)
- while (hdr[hdrlen++] != 0)
- ;
- if ((flags & HEAD_CRC) != 0)
- hdrlen += 2;
- if (hdrlen >= srclen)
- fatal("gunzip_start: ran out of data in header\n\r");
-
- r = zlib_inflateInit2(&state->s, -MAX_WBITS);
- if (r != Z_OK)
- fatal("inflateInit2 returned %d\n\r", r);
- }
-
- state->s.total_in = hdrlen;
- state->s.next_in = src + hdrlen;
- state->s.avail_in = srclen - hdrlen;
-}
-
-/**
- * gunzip_partial - extract bytes from a gzip data stream
- * @state: gzip state structure previously initialized by gunzip_start()
- * @dst: buffer to store extracted data
- * @dstlen: maximum number of bytes to extract
- *
- * This function extracts at most @dstlen bytes from the data stream
- * previously associated with @state by gunzip_start(), decompressing
- * if necessary. Exactly @dstlen bytes are extracted unless the data
- * stream doesn't contain enough bytes, in which case the entire
- * remainder of the stream is decompressed.
- *
- * Returns the actual number of bytes extracted. If any errors occur,
- * such as a corrupted compressed stream, an error is printed an the
- * platform's exit() function is called.
- */
-int gunzip_partial(struct gunzip_state *state, void *dst, int dstlen)
-{
- int len;
-
- if (state->s.workspace) {
- /* gunzipping */
- int r;
-
- state->s.next_out = dst;
- state->s.avail_out = dstlen;
- r = zlib_inflate(&state->s, Z_FULL_FLUSH);
- if (r != Z_OK && r != Z_STREAM_END)
- fatal("inflate returned %d msg: %s\n\r", r, state->s.msg);
- len = state->s.next_out - (Byte *)dst;
- } else {
- /* uncompressed image */
- len = min(state->s.avail_in, (uLong)dstlen);
- memcpy(dst, state->s.next_in, len);
- state->s.next_in += len;
- state->s.avail_in -= len;
- }
- return len;
-}
-
-/**
- * gunzip_exactly - extract a fixed number of bytes from a gzip data stream
- * @state: gzip state structure previously initialized by gunzip_start()
- * @dst: buffer to store extracted data
- * @dstlen: number of bytes to extract
- *
- * This function extracts exactly @dstlen bytes from the data stream
- * previously associated with @state by gunzip_start(), decompressing
- * if necessary.
- *
- * If there are less @dstlen bytes available in the data stream, or if
- * any other errors occur, such as a corrupted compressed stream, an
- * error is printed an the platform's exit() function is called.
- */
-void gunzip_exactly(struct gunzip_state *state, void *dst, int dstlen)
-{
- int len;
-
- len = gunzip_partial(state, dst, dstlen);
- if (len < dstlen)
- fatal("\n\rgunzip_exactly: ran out of data!"
- " Wanted %d, got %d.\n\r", dstlen, len);
-}
-
-/**
- * gunzip_discard - discard bytes from a gzip data stream
- * @state: gzip state structure previously initialized by gunzip_start()
- * @len: number of bytes to discard
- *
- * This function extracts, then discards exactly @len bytes from the
- * data stream previously associated with @state by gunzip_start().
- * Subsequent gunzip_partial(), gunzip_exactly() or gunzip_finish()
- * calls will extract the data following the discarded bytes in the
- * data stream.
- *
- * If there are less @len bytes available in the data stream, or if
- * any other errors occur, such as a corrupted compressed stream, an
- * error is printed an the platform's exit() function is called.
- */
-void gunzip_discard(struct gunzip_state *state, int len)
-{
- static char discard_buf[128];
-
- while (len > sizeof(discard_buf)) {
- gunzip_exactly(state, discard_buf, sizeof(discard_buf));
- len -= sizeof(discard_buf);
- }
-
- if (len > 0)
- gunzip_exactly(state, discard_buf, len);
-}
-
-/**
- * gunzip_finish - extract all remaining bytes from a gzip data stream
- * @state: gzip state structure previously initialized by gunzip_start()
- * @dst: buffer to store extracted data
- * @dstlen: maximum number of bytes to extract
- *
- * This function extracts all remaining data, or at most @dstlen
- * bytes, from the stream previously associated with @state by
- * gunzip_start(). zlib is then shut down, so it is an error to use
- * any of the functions in this file on @state until it is
- * re-initialized with another call to gunzip_start().
- *
- * If any errors occur, such as a corrupted compressed stream, an
- * error is printed an the platform's exit() function is called.
- */
-int gunzip_finish(struct gunzip_state *state, void *dst, int dstlen)
-{
- int len;
-
- len = gunzip_partial(state, dst, dstlen);
-
- if (state->s.workspace) {
- zlib_inflateEnd(&state->s);
- }
-
- return len;
-}
diff --git a/arch/powerpc/boot/gunzip_util.h b/arch/powerpc/boot/gunzip_util.h
deleted file mode 100644
index b3dfa6e87b3a..000000000000
--- a/arch/powerpc/boot/gunzip_util.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Decompression convenience functions
- *
- * Copyright 2007 David Gibson, IBM Corporation.
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2. This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
- */
-#ifndef _PPC_BOOT_GUNZIP_UTIL_H_
-#define _PPC_BOOT_GUNZIP_UTIL_H_
-
-#include "zlib.h"
-
-/*
- * These functions are designed to make life easy for decompressing
- * kernel images, initrd images or any other gzip compressed image,
- * particularly if its useful to decompress part of the image (e.g. to
- * examine headers) before decompressing the remainder.
- *
- * To use:
- * - declare a gunzip_state structure
- * - use gunzip_start() to initialize the state, associating it
- * with a stream of compressed data
- * - use gunzip_partial(), gunzip_exactly() and gunzip_discard()
- * in any combination to extract pieces of data from the stream
- * - Finally use gunzip_finish() to extract the tail of the
- * compressed stream and wind up zlib
- */
-
-/* scratch space for gunzip; 46912 is from zlib_inflate_workspacesize() */
-#define GUNZIP_SCRATCH_SIZE 46912
-
-struct gunzip_state {
- z_stream s;
- char scratch[46912];
-};
-
-void gunzip_start(struct gunzip_state *state, void *src, int srclen);
-int gunzip_partial(struct gunzip_state *state, void *dst, int dstlen);
-void gunzip_exactly(struct gunzip_state *state, void *dst, int len);
-void gunzip_discard(struct gunzip_state *state, int len);
-int gunzip_finish(struct gunzip_state *state, void *dst, int len);
-
-#endif /* _PPC_BOOT_GUNZIP_UTIL_H_ */
diff --git a/arch/powerpc/boot/hack-coff.c b/arch/powerpc/boot/hack-coff.c
index 5e5a6573a1ef..a010e124ac4b 100644
--- a/arch/powerpc/boot/hack-coff.c
+++ b/arch/powerpc/boot/hack-coff.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* hack-coff.c - hack the header of an xcoff file to fill in
* a few fields needed by the Open Firmware xcoff loader on
* Power Macs but not initialized by objcopy.
*
* Copyright (C) Paul Mackerras 1997.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <stdio.h>
#include <stdlib.h>
diff --git a/arch/powerpc/boot/holly.c b/arch/powerpc/boot/holly.c
index 58013b923178..557c7a0ece08 100644
--- a/arch/powerpc/boot/holly.c
+++ b/arch/powerpc/boot/holly.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright 2007 IBM Corporation
*
@@ -6,10 +7,6 @@
*
* Based on earlier code:
* Copyright (C) Paul Mackerras 1997.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
*/
#include <stdarg.h>
#include <stddef.h>
diff --git a/arch/powerpc/boot/install.sh b/arch/powerpc/boot/install.sh
index b6a256bc96ee..c3df6c27ce75 100644..100755
--- a/arch/powerpc/boot/install.sh
+++ b/arch/powerpc/boot/install.sh
@@ -15,41 +15,23 @@
# $2 - kernel image file
# $3 - kernel map file
# $4 - default install path (blank if root directory)
-# $5 and more - kernel boot files; zImage*, uImage, cuImage.*, etc.
-#
-# Bail with error code if anything goes wrong
set -e
-# User may have a custom install script
+# this should work for both the pSeries zImage and the iSeries vmlinux.sm
+image_name=$(basename "$2")
-if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
-if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
-# Default install
+echo "Warning: '${INSTALLKERNEL}' command not available... Copying" \
+ "directly to $4/$image_name-$1" >&2
-# this should work for both the pSeries zImage and the iSeries vmlinux.sm
-image_name=`basename $2`
-
-if [ -f $4/$image_name ]; then
- mv $4/$image_name $4/$image_name.old
+if [ -f "$4"/"$image_name"-"$1" ]; then
+ mv "$4"/"$image_name"-"$1" "$4"/"$image_name"-"$1".old
fi
-if [ -f $4/System.map ]; then
- mv $4/System.map $4/System.old
+if [ -f "$4"/System.map-"$1" ]; then
+ mv "$4"/System.map-"$1" "$4"/System-"$1".old
fi
-cat $2 > $4/$image_name
-cp $3 $4/System.map
-
-# Copy all the bootable image files
-path=$4
-shift 4
-while [ $# -ne 0 ]; do
- image_name=`basename $1`
- if [ -f $path/$image_name ]; then
- mv $path/$image_name $path/$image_name.old
- fi
- cat $1 > $path/$image_name
- shift
-done;
+cat "$2" > "$4"/"$image_name"-"$1"
+cp "$3" "$4"/System.map-"$1"
diff --git a/arch/powerpc/boot/io.h b/arch/powerpc/boot/io.h
index 394da5500466..5c6f90c34923 100644
--- a/arch/powerpc/boot/io.h
+++ b/arch/powerpc/boot/io.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _IO_H
#define _IO_H
diff --git a/arch/powerpc/boot/libfdt-wrapper.c b/arch/powerpc/boot/libfdt-wrapper.c
index bb8b9b3505ee..217d0d7a6a60 100644
--- a/arch/powerpc/boot/libfdt-wrapper.c
+++ b/arch/powerpc/boot/libfdt-wrapper.c
@@ -1,24 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* This file does the necessary interface mapping between the bootwrapper
* device tree operations and the interface provided by shared source
* files flatdevicetree.[ch].
*
* Copyright 2007 David Gibson, IBM Corporation.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
- * 02110-1301 USA
*/
#include <stddef.h>
@@ -44,12 +30,12 @@
#define offset_devp(off) \
({ \
- int _offset = (off); \
+ unsigned long _offset = (off); \
check_err(_offset) ? NULL : (void *)(_offset+1); \
})
-#define devp_offset_find(devp) (((int)(devp))-1)
-#define devp_offset(devp) (devp ? ((int)(devp))-1 : 0)
+#define devp_offset_find(devp) (((unsigned long)(devp))-1)
+#define devp_offset(devp) (devp ? ((unsigned long)(devp))-1 : 0)
static void *fdt;
static void *buf; /* = NULL */
diff --git a/arch/powerpc/boot/libfdt_env.h b/arch/powerpc/boot/libfdt_env.h
index c89fdb1b80e1..9757d4f6331e 100644
--- a/arch/powerpc/boot/libfdt_env.h
+++ b/arch/powerpc/boot/libfdt_env.h
@@ -1,18 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ARCH_POWERPC_BOOT_LIBFDT_ENV_H
#define _ARCH_POWERPC_BOOT_LIBFDT_ENV_H
#include <types.h>
#include <string.h>
-typedef u32 uint32_t;
-typedef u64 uint64_t;
+#define INT_MAX ((int)(~0U>>1))
+#define UINT32_MAX ((u32)~0U)
+#define INT32_MAX ((s32)(UINT32_MAX >> 1))
+
+#include "of.h"
+
typedef unsigned long uintptr_t;
-#define fdt16_to_cpu(x) (x)
-#define cpu_to_fdt16(x) (x)
-#define fdt32_to_cpu(x) (x)
-#define cpu_to_fdt32(x) (x)
-#define fdt64_to_cpu(x) (x)
-#define cpu_to_fdt64(x) (x)
+typedef __be16 fdt16_t;
+typedef __be32 fdt32_t;
+typedef __be64 fdt64_t;
+
+#define fdt16_to_cpu(x) be16_to_cpu(x)
+#define cpu_to_fdt16(x) cpu_to_be16(x)
+#define fdt32_to_cpu(x) be32_to_cpu(x)
+#define cpu_to_fdt32(x) cpu_to_be32(x)
+#define fdt64_to_cpu(x) be64_to_cpu(x)
+#define cpu_to_fdt64(x) cpu_to_be64(x)
#endif /* _ARCH_POWERPC_BOOT_LIBFDT_ENV_H */
diff --git a/arch/powerpc/boot/main.c b/arch/powerpc/boot/main.c
index d367a0aece2a..2c0e2a1cab01 100644
--- a/arch/powerpc/boot/main.c
+++ b/arch/powerpc/boot/main.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) Paul Mackerras 1997.
*
* Updates for PPC64 by Todd Inglett, Dave Engebretsen & Peter Bergner.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <stdarg.h>
#include <stddef.h>
@@ -15,11 +11,8 @@
#include "string.h"
#include "stdio.h"
#include "ops.h"
-#include "gunzip_util.h"
#include "reg.h"
-static struct gunzip_state gzstate;
-
struct addr_range {
void *addr;
unsigned long size;
@@ -30,15 +23,21 @@ struct addr_range {
static struct addr_range prep_kernel(void)
{
char elfheader[256];
- void *vmlinuz_addr = _vmlinux_start;
+ unsigned char *vmlinuz_addr = (unsigned char *)_vmlinux_start;
unsigned long vmlinuz_size = _vmlinux_end - _vmlinux_start;
void *addr = 0;
struct elf_info ei;
- int len;
-
- /* gunzip the ELF header of the kernel */
- gunzip_start(&gzstate, vmlinuz_addr, vmlinuz_size);
- gunzip_exactly(&gzstate, elfheader, sizeof(elfheader));
+ long len;
+ int uncompressed_image = 0;
+
+ len = partial_decompress(vmlinuz_addr, vmlinuz_size,
+ elfheader, sizeof(elfheader), 0);
+ /* assume uncompressed data if -1 is returned */
+ if (len == -1) {
+ uncompressed_image = 1;
+ memcpy(elfheader, vmlinuz_addr, sizeof(elfheader));
+ printf("No valid compressed data found, assume uncompressed data\n\r");
+ }
if (!parse_elf64(elfheader, &ei) && !parse_elf32(elfheader, &ei))
fatal("Error: not a valid PPC32 or PPC64 ELF file!\n\r");
@@ -51,7 +50,7 @@ static struct addr_range prep_kernel(void)
* the kernel bss must be claimed (it will be zero'd by the
* kernel itself)
*/
- printf("Allocating 0x%lx bytes for kernel ...\n\r", ei.memsize);
+ printf("Allocating 0x%lx bytes for kernel...\n\r", ei.memsize);
if (platform_ops.vmlinux_alloc) {
addr = platform_ops.vmlinux_alloc(ei.memsize);
@@ -71,17 +70,29 @@ static struct addr_range prep_kernel(void)
"device tree\n\r");
}
- /* Finally, gunzip the kernel */
- printf("gunzipping (0x%p <- 0x%p:0x%p)...", addr,
+ if (uncompressed_image) {
+ memcpy(addr, vmlinuz_addr + ei.elfoffset, ei.loadsize);
+ printf("0x%lx bytes of uncompressed data copied\n\r",
+ ei.loadsize);
+ goto out;
+ }
+
+ /* Finally, decompress the kernel */
+ printf("Decompressing (0x%p <- 0x%p:0x%p)...\n\r", addr,
vmlinuz_addr, vmlinuz_addr+vmlinuz_size);
- /* discard up to the actual load data */
- gunzip_discard(&gzstate, ei.elfoffset - sizeof(elfheader));
- len = gunzip_finish(&gzstate, addr, ei.loadsize);
+
+ len = partial_decompress(vmlinuz_addr, vmlinuz_size,
+ addr, ei.loadsize, ei.elfoffset);
+
+ if (len < 0)
+ fatal("Decompression failed with error code %ld\n\r", len);
+
if (len != ei.loadsize)
- fatal("ran out of data! only got 0x%x of 0x%lx bytes.\n\r",
- len, ei.loadsize);
- printf("done 0x%x bytes\n\r", len);
+ fatal("Decompression error: got 0x%lx bytes, expected 0x%lx.\n\r",
+ len, ei.loadsize);
+ printf("Done! Decompressed 0x%lx bytes\n\r", len);
+out:
flush_cache(addr, ei.loadsize);
return (struct addr_range){addr, ei.memsize};
@@ -93,7 +104,7 @@ static struct addr_range prep_initrd(struct addr_range vmlinux, void *chosen,
{
/* If we have an image attached to us, it overrides anything
* supplied by the loader. */
- if (_initrd_end > _initrd_start) {
+ if (&_initrd_end > &_initrd_start) {
printf("Attached initrd image at 0x%p-0x%p\n\r",
_initrd_start, _initrd_end);
initrd_addr = (unsigned long)_initrd_start;
@@ -135,22 +146,73 @@ static struct addr_range prep_initrd(struct addr_range vmlinux, void *chosen,
return (struct addr_range){(void *)initrd_addr, initrd_size};
}
+#ifdef __powerpc64__
+static void prep_esm_blob(struct addr_range vmlinux, void *chosen)
+{
+ unsigned long esm_blob_addr, esm_blob_size;
+
+ /* Do we have an ESM (Enter Secure Mode) blob? */
+ if (&_esm_blob_end <= &_esm_blob_start)
+ return;
+
+ printf("Attached ESM blob at 0x%p-0x%p\n\r",
+ _esm_blob_start, _esm_blob_end);
+ esm_blob_addr = (unsigned long)_esm_blob_start;
+ esm_blob_size = _esm_blob_end - _esm_blob_start;
+
+ /*
+ * If the ESM blob is too low it will be clobbered when the
+ * kernel relocates to its final location. In this case,
+ * allocate a safer place and move it.
+ */
+ if (esm_blob_addr < vmlinux.size) {
+ void *old_addr = (void *)esm_blob_addr;
+
+ printf("Allocating 0x%lx bytes for esm_blob ...\n\r",
+ esm_blob_size);
+ esm_blob_addr = (unsigned long)malloc(esm_blob_size);
+ if (!esm_blob_addr)
+ fatal("Can't allocate memory for ESM blob !\n\r");
+ printf("Relocating ESM blob 0x%lx <- 0x%p (0x%lx bytes)\n\r",
+ esm_blob_addr, old_addr, esm_blob_size);
+ memmove((void *)esm_blob_addr, old_addr, esm_blob_size);
+ }
+
+ /* Tell the kernel ESM blob address via device tree. */
+ setprop_val(chosen, "linux,esm-blob-start", (u32)(esm_blob_addr));
+ setprop_val(chosen, "linux,esm-blob-end", (u32)(esm_blob_addr + esm_blob_size));
+}
+#else
+static inline void prep_esm_blob(struct addr_range vmlinux, void *chosen) { }
+#endif
+
/* A buffer that may be edited by tools operating on a zImage binary so as to
* edit the command line passed to vmlinux (by setting /chosen/bootargs).
- * The buffer is put in it's own section so that tools may locate it easier.
+ * The buffer is put in its own section so that tools may locate it easier.
*/
static char cmdline[BOOT_COMMAND_LINE_SIZE]
__attribute__((__section__("__builtin_cmdline")));
static void prep_cmdline(void *chosen)
{
+ unsigned int getline_timeout = 5000;
+ int v;
+ int n;
+
+ /* Wait-for-input time */
+ n = getprop(chosen, "linux,cmdline-timeout", &v, sizeof(v));
+ if (n == sizeof(v))
+ getline_timeout = v;
+
if (cmdline[0] == '\0')
getprop(chosen, "bootargs", cmdline, BOOT_COMMAND_LINE_SIZE-1);
printf("\n\rLinux/PowerPC load: %s", cmdline);
+
/* If possible, edit the command line */
- if (console_ops.edit_cmdline)
- console_ops.edit_cmdline(cmdline, BOOT_COMMAND_LINE_SIZE);
+ if (console_ops.edit_cmdline && getline_timeout)
+ console_ops.edit_cmdline(cmdline, BOOT_COMMAND_LINE_SIZE, getline_timeout);
+
printf("\n\r");
/* Put the command line back into the devtree for the kernel */
@@ -192,6 +254,7 @@ void start(void)
vmlinux = prep_kernel();
initrd = prep_initrd(vmlinux, chosen,
loader_info.initrd_addr, loader_info.initrd_size);
+ prep_esm_blob(vmlinux, chosen);
prep_cmdline(chosen);
printf("Finalizing device tree...");
@@ -206,8 +269,12 @@ void start(void)
console_ops.close();
kentry = (kernel_entry_t) vmlinux.addr;
- if (ft_addr)
- kentry(ft_addr, 0, NULL);
+ if (ft_addr) {
+ if(platform_ops.kentry)
+ platform_ops.kentry(ft_addr, vmlinux.addr);
+ else
+ kentry(ft_addr, 0, NULL);
+ }
else
kentry((unsigned long)initrd.addr, initrd.size,
loader_info.promptr);
diff --git a/arch/powerpc/boot/microwatt.c b/arch/powerpc/boot/microwatt.c
new file mode 100644
index 000000000000..ca9d83617fc1
--- /dev/null
+++ b/arch/powerpc/boot/microwatt.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <stddef.h>
+#include "stdio.h"
+#include "types.h"
+#include "io.h"
+#include "ops.h"
+
+BSS_STACK(8192);
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5)
+{
+ unsigned long heapsize = 16*1024*1024 - (unsigned long)_end;
+
+ /*
+ * Disable interrupts and turn off MSR_RI, since we'll
+ * shortly be overwriting the interrupt vectors.
+ */
+ __asm__ volatile("mtmsrd %0,1" : : "r" (0));
+
+ simple_alloc_init(_end, heapsize, 32, 64);
+ fdt_init(_dtb_start);
+ serial_console_init();
+}
diff --git a/arch/powerpc/boot/mktree.c b/arch/powerpc/boot/mktree.c
index e2ae24340fc8..dc603f3c15be 100644
--- a/arch/powerpc/boot/mktree.c
+++ b/arch/powerpc/boot/mktree.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Makes a tree bootable image for IBM Evaluation boards.
* Basically, just take a zImage, skip the ELF header, and stuff
diff --git a/arch/powerpc/boot/motload-head.S b/arch/powerpc/boot/motload-head.S
new file mode 100644
index 000000000000..826dad0c19d9
--- /dev/null
+++ b/arch/powerpc/boot/motload-head.S
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include "ppc_asm.h"
+
+ .text
+ .globl _zimage_start
+_zimage_start:
+ mfmsr r10
+ rlwinm r10,r10,0,~(1<<15) /* Clear MSR_EE */
+ sync
+ mtmsr r10
+ isync
+ b _zimage_start_lib
diff --git a/arch/powerpc/boot/mpc52xx-psc.c b/arch/powerpc/boot/mpc52xx-psc.c
index d4cb4e4e0938..c2c08633ee35 100644
--- a/arch/powerpc/boot/mpc52xx-psc.c
+++ b/arch/powerpc/boot/mpc52xx-psc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* MPC5200 PSC serial console support.
*
diff --git a/arch/powerpc/boot/mpc8xx.c b/arch/powerpc/boot/mpc8xx.c
index add55a7f184f..e19ef64df4f1 100644
--- a/arch/powerpc/boot/mpc8xx.c
+++ b/arch/powerpc/boot/mpc8xx.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* MPC8xx support functions
*
* Author: Scott Wood <scottwood@freescale.com>
*
* Copyright (c) 2007 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
*/
#include "ops.h"
@@ -24,7 +21,7 @@ u32 mpc885_get_clock(u32 crystal)
{
u32 *immr;
u32 plprcr;
- int mfi, mfn, mfd, pdf, div;
+ int mfi, mfn, mfd, pdf;
u32 ret;
immr = fsl_get_immr();
@@ -43,7 +40,6 @@ u32 mpc885_get_clock(u32 crystal)
}
pdf = (plprcr >> 1) & 0xf;
- div = (plprcr >> 20) & 3;
mfd = (plprcr >> 22) & 0x1f;
mfn = (plprcr >> 27) & 0x1f;
diff --git a/arch/powerpc/boot/mpc8xx.h b/arch/powerpc/boot/mpc8xx.h
index 3f59901ab1c0..3852ed90047f 100644
--- a/arch/powerpc/boot/mpc8xx.h
+++ b/arch/powerpc/boot/mpc8xx.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _PPC_BOOT_MPC8xx_H_
#define _PPC_BOOT_MPC8xx_H_
diff --git a/arch/powerpc/boot/mpsc.c b/arch/powerpc/boot/mpsc.c
deleted file mode 100644
index 425ad88cce8d..000000000000
--- a/arch/powerpc/boot/mpsc.c
+++ /dev/null
@@ -1,169 +0,0 @@
-/*
- * MPSC/UART driver for the Marvell mv64360, mv64460, ...
- *
- * Author: Mark A. Greer <mgreer@mvista.com>
- *
- * 2007 (c) MontaVista Software, Inc. This file is licensed under
- * the terms of the GNU General Public License version 2. This program
- * is licensed "as is" without any warranty of any kind, whether express
- * or implied.
- */
-
-#include <stdarg.h>
-#include <stddef.h>
-#include "types.h"
-#include "string.h"
-#include "stdio.h"
-#include "io.h"
-#include "ops.h"
-
-
-#define MPSC_CHR_1 0x000c
-
-#define MPSC_CHR_2 0x0010
-#define MPSC_CHR_2_TA (1<<7)
-#define MPSC_CHR_2_TCS (1<<9)
-#define MPSC_CHR_2_RA (1<<23)
-#define MPSC_CHR_2_CRD (1<<25)
-#define MPSC_CHR_2_EH (1<<31)
-
-#define MPSC_CHR_4 0x0018
-#define MPSC_CHR_4_Z (1<<29)
-
-#define MPSC_CHR_5 0x001c
-#define MPSC_CHR_5_CTL1_INTR (1<<12)
-#define MPSC_CHR_5_CTL1_VALID (1<<15)
-
-#define MPSC_CHR_10 0x0030
-
-#define MPSC_INTR_CAUSE 0x0000
-#define MPSC_INTR_CAUSE_RCC (1<<6)
-#define MPSC_INTR_MASK 0x0080
-
-#define SDMA_SDCM 0x0008
-#define SDMA_SDCM_AR (1<<15)
-#define SDMA_SDCM_AT (1<<31)
-
-static volatile char *mpsc_base;
-static volatile char *mpscintr_base;
-static u32 chr1, chr2;
-
-static int mpsc_open(void)
-{
- chr1 = in_le32((u32 *)(mpsc_base + MPSC_CHR_1)) & 0x00ff0000;
- chr2 = in_le32((u32 *)(mpsc_base + MPSC_CHR_2)) & ~(MPSC_CHR_2_TA
- | MPSC_CHR_2_TCS | MPSC_CHR_2_RA | MPSC_CHR_2_CRD
- | MPSC_CHR_2_EH);
- out_le32((u32 *)(mpsc_base + MPSC_CHR_4), MPSC_CHR_4_Z);
- out_le32((u32 *)(mpsc_base + MPSC_CHR_5),
- MPSC_CHR_5_CTL1_INTR | MPSC_CHR_5_CTL1_VALID);
- out_le32((u32 *)(mpsc_base + MPSC_CHR_2), chr2 | MPSC_CHR_2_EH);
- return 0;
-}
-
-static void mpsc_putc(unsigned char c)
-{
- while (in_le32((u32 *)(mpsc_base + MPSC_CHR_2)) & MPSC_CHR_2_TCS);
-
- out_le32((u32 *)(mpsc_base + MPSC_CHR_1), chr1 | c);
- out_le32((u32 *)(mpsc_base + MPSC_CHR_2), chr2 | MPSC_CHR_2_TCS);
-}
-
-static unsigned char mpsc_getc(void)
-{
- u32 cause = 0;
- unsigned char c;
-
- while (!(cause & MPSC_INTR_CAUSE_RCC))
- cause = in_le32((u32 *)(mpscintr_base + MPSC_INTR_CAUSE));
-
- c = in_8((u8 *)(mpsc_base + MPSC_CHR_10 + 2));
- out_8((u8 *)(mpsc_base + MPSC_CHR_10 + 2), c);
- out_le32((u32 *)(mpscintr_base + MPSC_INTR_CAUSE),
- cause & ~MPSC_INTR_CAUSE_RCC);
-
- return c;
-}
-
-static u8 mpsc_tstc(void)
-{
- return (u8)((in_le32((u32 *)(mpscintr_base + MPSC_INTR_CAUSE))
- & MPSC_INTR_CAUSE_RCC) != 0);
-}
-
-static void mpsc_stop_dma(volatile char *sdma_base)
-{
- out_le32((u32 *)(mpsc_base + MPSC_CHR_2),MPSC_CHR_2_TA | MPSC_CHR_2_RA);
- out_le32((u32 *)(sdma_base + SDMA_SDCM), SDMA_SDCM_AR | SDMA_SDCM_AT);
-
- while ((in_le32((u32 *)(sdma_base + SDMA_SDCM))
- & (SDMA_SDCM_AR | SDMA_SDCM_AT)) != 0)
- udelay(100);
-}
-
-static volatile char *mpsc_get_virtreg_of_phandle(void *devp, char *prop)
-{
- void *v;
- int n;
-
- n = getprop(devp, prop, &v, sizeof(v));
- if (n != sizeof(v))
- goto err_out;
-
- devp = find_node_by_linuxphandle((u32)v);
- if (devp == NULL)
- goto err_out;
-
- n = getprop(devp, "virtual-reg", &v, sizeof(v));
- if (n == sizeof(v))
- return v;
-
-err_out:
- return NULL;
-}
-
-int mpsc_console_init(void *devp, struct serial_console_data *scdp)
-{
- void *v;
- int n, reg_set;
- volatile char *sdma_base;
-
- n = getprop(devp, "virtual-reg", &v, sizeof(v));
- if (n != sizeof(v))
- goto err_out;
- mpsc_base = v;
-
- sdma_base = mpsc_get_virtreg_of_phandle(devp, "sdma");
- if (sdma_base == NULL)
- goto err_out;
-
- mpscintr_base = mpsc_get_virtreg_of_phandle(devp, "mpscintr");
- if (mpscintr_base == NULL)
- goto err_out;
-
- n = getprop(devp, "cell-index", &v, sizeof(v));
- if (n != sizeof(v))
- goto err_out;
- reg_set = (int)v;
-
- mpscintr_base += (reg_set == 0) ? 0x4 : 0xc;
-
- /* Make sure the mpsc ctlrs are shutdown */
- out_le32((u32 *)(mpscintr_base + MPSC_INTR_CAUSE), 0);
- out_le32((u32 *)(mpscintr_base + MPSC_INTR_CAUSE), 0);
- out_le32((u32 *)(mpscintr_base + MPSC_INTR_MASK), 0);
- out_le32((u32 *)(mpscintr_base + MPSC_INTR_MASK), 0);
-
- mpsc_stop_dma(sdma_base);
-
- scdp->open = mpsc_open;
- scdp->putc = mpsc_putc;
- scdp->getc = mpsc_getc;
- scdp->tstc = mpsc_tstc;
- scdp->close = NULL;
-
- return 0;
-
-err_out:
- return -1;
-}
diff --git a/arch/powerpc/boot/mv64x60.c b/arch/powerpc/boot/mv64x60.c
deleted file mode 100644
index d9bb302b91d2..000000000000
--- a/arch/powerpc/boot/mv64x60.c
+++ /dev/null
@@ -1,581 +0,0 @@
-/*
- * Marvell hostbridge routines
- *
- * Author: Mark A. Greer <source@mvista.com>
- *
- * 2004, 2005, 2007 (c) MontaVista Software, Inc. This file is licensed under
- * the terms of the GNU General Public License version 2. This program
- * is licensed "as is" without any warranty of any kind, whether express
- * or implied.
- */
-
-#include <stdarg.h>
-#include <stddef.h>
-#include "types.h"
-#include "elf.h"
-#include "page.h"
-#include "string.h"
-#include "stdio.h"
-#include "io.h"
-#include "ops.h"
-#include "mv64x60.h"
-
-#define PCI_DEVFN(slot,func) ((((slot) & 0x1f) << 3) | ((func) & 0x07))
-
-#define MV64x60_CPU2MEM_WINDOWS 4
-#define MV64x60_CPU2MEM_0_BASE 0x0008
-#define MV64x60_CPU2MEM_0_SIZE 0x0010
-#define MV64x60_CPU2MEM_1_BASE 0x0208
-#define MV64x60_CPU2MEM_1_SIZE 0x0210
-#define MV64x60_CPU2MEM_2_BASE 0x0018
-#define MV64x60_CPU2MEM_2_SIZE 0x0020
-#define MV64x60_CPU2MEM_3_BASE 0x0218
-#define MV64x60_CPU2MEM_3_SIZE 0x0220
-
-#define MV64x60_ENET2MEM_BAR_ENABLE 0x2290
-#define MV64x60_ENET2MEM_0_BASE 0x2200
-#define MV64x60_ENET2MEM_0_SIZE 0x2204
-#define MV64x60_ENET2MEM_1_BASE 0x2208
-#define MV64x60_ENET2MEM_1_SIZE 0x220c
-#define MV64x60_ENET2MEM_2_BASE 0x2210
-#define MV64x60_ENET2MEM_2_SIZE 0x2214
-#define MV64x60_ENET2MEM_3_BASE 0x2218
-#define MV64x60_ENET2MEM_3_SIZE 0x221c
-#define MV64x60_ENET2MEM_4_BASE 0x2220
-#define MV64x60_ENET2MEM_4_SIZE 0x2224
-#define MV64x60_ENET2MEM_5_BASE 0x2228
-#define MV64x60_ENET2MEM_5_SIZE 0x222c
-#define MV64x60_ENET2MEM_ACC_PROT_0 0x2294
-#define MV64x60_ENET2MEM_ACC_PROT_1 0x2298
-#define MV64x60_ENET2MEM_ACC_PROT_2 0x229c
-
-#define MV64x60_MPSC2MEM_BAR_ENABLE 0xf250
-#define MV64x60_MPSC2MEM_0_BASE 0xf200
-#define MV64x60_MPSC2MEM_0_SIZE 0xf204
-#define MV64x60_MPSC2MEM_1_BASE 0xf208
-#define MV64x60_MPSC2MEM_1_SIZE 0xf20c
-#define MV64x60_MPSC2MEM_2_BASE 0xf210
-#define MV64x60_MPSC2MEM_2_SIZE 0xf214
-#define MV64x60_MPSC2MEM_3_BASE 0xf218
-#define MV64x60_MPSC2MEM_3_SIZE 0xf21c
-#define MV64x60_MPSC_0_REMAP 0xf240
-#define MV64x60_MPSC_1_REMAP 0xf244
-#define MV64x60_MPSC2MEM_ACC_PROT_0 0xf254
-#define MV64x60_MPSC2MEM_ACC_PROT_1 0xf258
-#define MV64x60_MPSC2REGS_BASE 0xf25c
-
-#define MV64x60_IDMA2MEM_BAR_ENABLE 0x0a80
-#define MV64x60_IDMA2MEM_0_BASE 0x0a00
-#define MV64x60_IDMA2MEM_0_SIZE 0x0a04
-#define MV64x60_IDMA2MEM_1_BASE 0x0a08
-#define MV64x60_IDMA2MEM_1_SIZE 0x0a0c
-#define MV64x60_IDMA2MEM_2_BASE 0x0a10
-#define MV64x60_IDMA2MEM_2_SIZE 0x0a14
-#define MV64x60_IDMA2MEM_3_BASE 0x0a18
-#define MV64x60_IDMA2MEM_3_SIZE 0x0a1c
-#define MV64x60_IDMA2MEM_4_BASE 0x0a20
-#define MV64x60_IDMA2MEM_4_SIZE 0x0a24
-#define MV64x60_IDMA2MEM_5_BASE 0x0a28
-#define MV64x60_IDMA2MEM_5_SIZE 0x0a2c
-#define MV64x60_IDMA2MEM_6_BASE 0x0a30
-#define MV64x60_IDMA2MEM_6_SIZE 0x0a34
-#define MV64x60_IDMA2MEM_7_BASE 0x0a38
-#define MV64x60_IDMA2MEM_7_SIZE 0x0a3c
-#define MV64x60_IDMA2MEM_ACC_PROT_0 0x0a70
-#define MV64x60_IDMA2MEM_ACC_PROT_1 0x0a74
-#define MV64x60_IDMA2MEM_ACC_PROT_2 0x0a78
-#define MV64x60_IDMA2MEM_ACC_PROT_3 0x0a7c
-
-#define MV64x60_PCI_ACC_CNTL_WINDOWS 6
-#define MV64x60_PCI0_PCI_DECODE_CNTL 0x0d3c
-#define MV64x60_PCI1_PCI_DECODE_CNTL 0x0dbc
-
-#define MV64x60_PCI0_BAR_ENABLE 0x0c3c
-#define MV64x60_PCI02MEM_0_SIZE 0x0c08
-#define MV64x60_PCI0_ACC_CNTL_0_BASE_LO 0x1e00
-#define MV64x60_PCI0_ACC_CNTL_0_BASE_HI 0x1e04
-#define MV64x60_PCI0_ACC_CNTL_0_SIZE 0x1e08
-#define MV64x60_PCI0_ACC_CNTL_1_BASE_LO 0x1e10
-#define MV64x60_PCI0_ACC_CNTL_1_BASE_HI 0x1e14
-#define MV64x60_PCI0_ACC_CNTL_1_SIZE 0x1e18
-#define MV64x60_PCI0_ACC_CNTL_2_BASE_LO 0x1e20
-#define MV64x60_PCI0_ACC_CNTL_2_BASE_HI 0x1e24
-#define MV64x60_PCI0_ACC_CNTL_2_SIZE 0x1e28
-#define MV64x60_PCI0_ACC_CNTL_3_BASE_LO 0x1e30
-#define MV64x60_PCI0_ACC_CNTL_3_BASE_HI 0x1e34
-#define MV64x60_PCI0_ACC_CNTL_3_SIZE 0x1e38
-#define MV64x60_PCI0_ACC_CNTL_4_BASE_LO 0x1e40
-#define MV64x60_PCI0_ACC_CNTL_4_BASE_HI 0x1e44
-#define MV64x60_PCI0_ACC_CNTL_4_SIZE 0x1e48
-#define MV64x60_PCI0_ACC_CNTL_5_BASE_LO 0x1e50
-#define MV64x60_PCI0_ACC_CNTL_5_BASE_HI 0x1e54
-#define MV64x60_PCI0_ACC_CNTL_5_SIZE 0x1e58
-
-#define MV64x60_PCI1_BAR_ENABLE 0x0cbc
-#define MV64x60_PCI12MEM_0_SIZE 0x0c88
-#define MV64x60_PCI1_ACC_CNTL_0_BASE_LO 0x1e80
-#define MV64x60_PCI1_ACC_CNTL_0_BASE_HI 0x1e84
-#define MV64x60_PCI1_ACC_CNTL_0_SIZE 0x1e88
-#define MV64x60_PCI1_ACC_CNTL_1_BASE_LO 0x1e90
-#define MV64x60_PCI1_ACC_CNTL_1_BASE_HI 0x1e94
-#define MV64x60_PCI1_ACC_CNTL_1_SIZE 0x1e98
-#define MV64x60_PCI1_ACC_CNTL_2_BASE_LO 0x1ea0
-#define MV64x60_PCI1_ACC_CNTL_2_BASE_HI 0x1ea4
-#define MV64x60_PCI1_ACC_CNTL_2_SIZE 0x1ea8
-#define MV64x60_PCI1_ACC_CNTL_3_BASE_LO 0x1eb0
-#define MV64x60_PCI1_ACC_CNTL_3_BASE_HI 0x1eb4
-#define MV64x60_PCI1_ACC_CNTL_3_SIZE 0x1eb8
-#define MV64x60_PCI1_ACC_CNTL_4_BASE_LO 0x1ec0
-#define MV64x60_PCI1_ACC_CNTL_4_BASE_HI 0x1ec4
-#define MV64x60_PCI1_ACC_CNTL_4_SIZE 0x1ec8
-#define MV64x60_PCI1_ACC_CNTL_5_BASE_LO 0x1ed0
-#define MV64x60_PCI1_ACC_CNTL_5_BASE_HI 0x1ed4
-#define MV64x60_PCI1_ACC_CNTL_5_SIZE 0x1ed8
-
-#define MV64x60_CPU2PCI_SWAP_NONE 0x01000000
-
-#define MV64x60_CPU2PCI0_IO_BASE 0x0048
-#define MV64x60_CPU2PCI0_IO_SIZE 0x0050
-#define MV64x60_CPU2PCI0_IO_REMAP 0x00f0
-#define MV64x60_CPU2PCI0_MEM_0_BASE 0x0058
-#define MV64x60_CPU2PCI0_MEM_0_SIZE 0x0060
-#define MV64x60_CPU2PCI0_MEM_0_REMAP_LO 0x00f8
-#define MV64x60_CPU2PCI0_MEM_0_REMAP_HI 0x0320
-
-#define MV64x60_CPU2PCI1_IO_BASE 0x0090
-#define MV64x60_CPU2PCI1_IO_SIZE 0x0098
-#define MV64x60_CPU2PCI1_IO_REMAP 0x0108
-#define MV64x60_CPU2PCI1_MEM_0_BASE 0x00a0
-#define MV64x60_CPU2PCI1_MEM_0_SIZE 0x00a8
-#define MV64x60_CPU2PCI1_MEM_0_REMAP_LO 0x0110
-#define MV64x60_CPU2PCI1_MEM_0_REMAP_HI 0x0340
-
-struct mv64x60_mem_win {
- u32 hi;
- u32 lo;
- u32 size;
-};
-
-struct mv64x60_pci_win {
- u32 fcn;
- u32 hi;
- u32 lo;
- u32 size;
-};
-
-/* PCI config access routines */
-struct {
- u32 addr;
- u32 data;
-} static mv64x60_pci_cfgio[2] = {
- { /* hose 0 */
- .addr = 0xcf8,
- .data = 0xcfc,
- },
- { /* hose 1 */
- .addr = 0xc78,
- .data = 0xc7c,
- }
-};
-
-u32 mv64x60_cfg_read(u8 *bridge_base, u8 hose, u8 bus, u8 devfn, u8 offset)
-{
- out_le32((u32 *)(bridge_base + mv64x60_pci_cfgio[hose].addr),
- (1 << 31) | (bus << 16) | (devfn << 8) | offset);
- return in_le32((u32 *)(bridge_base + mv64x60_pci_cfgio[hose].data));
-}
-
-void mv64x60_cfg_write(u8 *bridge_base, u8 hose, u8 bus, u8 devfn, u8 offset,
- u32 val)
-{
- out_le32((u32 *)(bridge_base + mv64x60_pci_cfgio[hose].addr),
- (1 << 31) | (bus << 16) | (devfn << 8) | offset);
- out_le32((u32 *)(bridge_base + mv64x60_pci_cfgio[hose].data), val);
-}
-
-/* I/O ctlr -> system memory setup */
-static struct mv64x60_mem_win mv64x60_cpu2mem[MV64x60_CPU2MEM_WINDOWS] = {
- {
- .lo = MV64x60_CPU2MEM_0_BASE,
- .size = MV64x60_CPU2MEM_0_SIZE,
- },
- {
- .lo = MV64x60_CPU2MEM_1_BASE,
- .size = MV64x60_CPU2MEM_1_SIZE,
- },
- {
- .lo = MV64x60_CPU2MEM_2_BASE,
- .size = MV64x60_CPU2MEM_2_SIZE,
- },
- {
- .lo = MV64x60_CPU2MEM_3_BASE,
- .size = MV64x60_CPU2MEM_3_SIZE,
- },
-};
-
-static struct mv64x60_mem_win mv64x60_enet2mem[MV64x60_CPU2MEM_WINDOWS] = {
- {
- .lo = MV64x60_ENET2MEM_0_BASE,
- .size = MV64x60_ENET2MEM_0_SIZE,
- },
- {
- .lo = MV64x60_ENET2MEM_1_BASE,
- .size = MV64x60_ENET2MEM_1_SIZE,
- },
- {
- .lo = MV64x60_ENET2MEM_2_BASE,
- .size = MV64x60_ENET2MEM_2_SIZE,
- },
- {
- .lo = MV64x60_ENET2MEM_3_BASE,
- .size = MV64x60_ENET2MEM_3_SIZE,
- },
-};
-
-static struct mv64x60_mem_win mv64x60_mpsc2mem[MV64x60_CPU2MEM_WINDOWS] = {
- {
- .lo = MV64x60_MPSC2MEM_0_BASE,
- .size = MV64x60_MPSC2MEM_0_SIZE,
- },
- {
- .lo = MV64x60_MPSC2MEM_1_BASE,
- .size = MV64x60_MPSC2MEM_1_SIZE,
- },
- {
- .lo = MV64x60_MPSC2MEM_2_BASE,
- .size = MV64x60_MPSC2MEM_2_SIZE,
- },
- {
- .lo = MV64x60_MPSC2MEM_3_BASE,
- .size = MV64x60_MPSC2MEM_3_SIZE,
- },
-};
-
-static struct mv64x60_mem_win mv64x60_idma2mem[MV64x60_CPU2MEM_WINDOWS] = {
- {
- .lo = MV64x60_IDMA2MEM_0_BASE,
- .size = MV64x60_IDMA2MEM_0_SIZE,
- },
- {
- .lo = MV64x60_IDMA2MEM_1_BASE,
- .size = MV64x60_IDMA2MEM_1_SIZE,
- },
- {
- .lo = MV64x60_IDMA2MEM_2_BASE,
- .size = MV64x60_IDMA2MEM_2_SIZE,
- },
- {
- .lo = MV64x60_IDMA2MEM_3_BASE,
- .size = MV64x60_IDMA2MEM_3_SIZE,
- },
-};
-
-static u32 mv64x60_dram_selects[MV64x60_CPU2MEM_WINDOWS] = {0xe,0xd,0xb,0x7};
-
-/*
- * ENET, MPSC, and IDMA ctlrs on the MV64x60 have separate windows that
- * must be set up so that the respective ctlr can access system memory.
- * Configure them to be same as cpu->memory windows.
- */
-void mv64x60_config_ctlr_windows(u8 *bridge_base, u8 *bridge_pbase,
- u8 is_coherent)
-{
- u32 i, base, size, enables, prot = 0, snoop_bits = 0;
-
- /* Disable ctlr->mem windows */
- out_le32((u32 *)(bridge_base + MV64x60_ENET2MEM_BAR_ENABLE), 0x3f);
- out_le32((u32 *)(bridge_base + MV64x60_MPSC2MEM_BAR_ENABLE), 0xf);
- out_le32((u32 *)(bridge_base + MV64x60_ENET2MEM_BAR_ENABLE), 0xff);
-
- if (is_coherent)
- snoop_bits = 0x2 << 12; /* Writeback */
-
- enables = in_le32((u32 *)(bridge_base + MV64x60_CPU_BAR_ENABLE)) & 0xf;
-
- for (i=0; i<MV64x60_CPU2MEM_WINDOWS; i++) {
- if (enables & (1 << i)) /* Set means disabled */
- continue;
-
- base = in_le32((u32 *)(bridge_base + mv64x60_cpu2mem[i].lo))
- << 16;
- base |= snoop_bits | (mv64x60_dram_selects[i] << 8);
- size = in_le32((u32 *)(bridge_base + mv64x60_cpu2mem[i].size))
- << 16;
- prot |= (0x3 << (i << 1)); /* RW access */
-
- out_le32((u32 *)(bridge_base + mv64x60_enet2mem[i].lo), base);
- out_le32((u32 *)(bridge_base + mv64x60_enet2mem[i].size), size);
- out_le32((u32 *)(bridge_base + mv64x60_mpsc2mem[i].lo), base);
- out_le32((u32 *)(bridge_base + mv64x60_mpsc2mem[i].size), size);
- out_le32((u32 *)(bridge_base + mv64x60_idma2mem[i].lo), base);
- out_le32((u32 *)(bridge_base + mv64x60_idma2mem[i].size), size);
- }
-
- out_le32((u32 *)(bridge_base + MV64x60_ENET2MEM_ACC_PROT_0), prot);
- out_le32((u32 *)(bridge_base + MV64x60_ENET2MEM_ACC_PROT_1), prot);
- out_le32((u32 *)(bridge_base + MV64x60_ENET2MEM_ACC_PROT_2), prot);
- out_le32((u32 *)(bridge_base + MV64x60_MPSC2MEM_ACC_PROT_0), prot);
- out_le32((u32 *)(bridge_base + MV64x60_MPSC2MEM_ACC_PROT_1), prot);
- out_le32((u32 *)(bridge_base + MV64x60_IDMA2MEM_ACC_PROT_0), prot);
- out_le32((u32 *)(bridge_base + MV64x60_IDMA2MEM_ACC_PROT_1), prot);
- out_le32((u32 *)(bridge_base + MV64x60_IDMA2MEM_ACC_PROT_2), prot);
- out_le32((u32 *)(bridge_base + MV64x60_IDMA2MEM_ACC_PROT_3), prot);
-
- /* Set mpsc->bridge's reg window to the bridge's internal registers. */
- out_le32((u32 *)(bridge_base + MV64x60_MPSC2REGS_BASE),
- (u32)bridge_pbase);
-
- out_le32((u32 *)(bridge_base + MV64x60_ENET2MEM_BAR_ENABLE), enables);
- out_le32((u32 *)(bridge_base + MV64x60_MPSC2MEM_BAR_ENABLE), enables);
- out_le32((u32 *)(bridge_base + MV64x60_IDMA2MEM_BAR_ENABLE), enables);
-}
-
-/* PCI MEM -> system memory, et. al. setup */
-static struct mv64x60_pci_win mv64x60_pci2mem[2] = {
- { /* hose 0 */
- .fcn = 0,
- .hi = 0x14,
- .lo = 0x10,
- .size = MV64x60_PCI02MEM_0_SIZE,
- },
- { /* hose 1 */
- .fcn = 0,
- .hi = 0x94,
- .lo = 0x90,
- .size = MV64x60_PCI12MEM_0_SIZE,
- },
-};
-
-static struct
-mv64x60_mem_win mv64x60_pci_acc[2][MV64x60_PCI_ACC_CNTL_WINDOWS] = {
- { /* hose 0 */
- {
- .hi = MV64x60_PCI0_ACC_CNTL_0_BASE_HI,
- .lo = MV64x60_PCI0_ACC_CNTL_0_BASE_LO,
- .size = MV64x60_PCI0_ACC_CNTL_0_SIZE,
- },
- {
- .hi = MV64x60_PCI0_ACC_CNTL_1_BASE_HI,
- .lo = MV64x60_PCI0_ACC_CNTL_1_BASE_LO,
- .size = MV64x60_PCI0_ACC_CNTL_1_SIZE,
- },
- {
- .hi = MV64x60_PCI0_ACC_CNTL_2_BASE_HI,
- .lo = MV64x60_PCI0_ACC_CNTL_2_BASE_LO,
- .size = MV64x60_PCI0_ACC_CNTL_2_SIZE,
- },
- {
- .hi = MV64x60_PCI0_ACC_CNTL_3_BASE_HI,
- .lo = MV64x60_PCI0_ACC_CNTL_3_BASE_LO,
- .size = MV64x60_PCI0_ACC_CNTL_3_SIZE,
- },
- },
- { /* hose 1 */
- {
- .hi = MV64x60_PCI1_ACC_CNTL_0_BASE_HI,
- .lo = MV64x60_PCI1_ACC_CNTL_0_BASE_LO,
- .size = MV64x60_PCI1_ACC_CNTL_0_SIZE,
- },
- {
- .hi = MV64x60_PCI1_ACC_CNTL_1_BASE_HI,
- .lo = MV64x60_PCI1_ACC_CNTL_1_BASE_LO,
- .size = MV64x60_PCI1_ACC_CNTL_1_SIZE,
- },
- {
- .hi = MV64x60_PCI1_ACC_CNTL_2_BASE_HI,
- .lo = MV64x60_PCI1_ACC_CNTL_2_BASE_LO,
- .size = MV64x60_PCI1_ACC_CNTL_2_SIZE,
- },
- {
- .hi = MV64x60_PCI1_ACC_CNTL_3_BASE_HI,
- .lo = MV64x60_PCI1_ACC_CNTL_3_BASE_LO,
- .size = MV64x60_PCI1_ACC_CNTL_3_SIZE,
- },
- },
-};
-
-static struct mv64x60_mem_win mv64x60_pci2reg[2] = {
- {
- .hi = 0x24,
- .lo = 0x20,
- .size = 0,
- },
- {
- .hi = 0xa4,
- .lo = 0xa0,
- .size = 0,
- },
-};
-
-/* Only need to use 1 window (per hose) to get access to all of system memory */
-void mv64x60_config_pci_windows(u8 *bridge_base, u8 *bridge_pbase, u8 hose,
- u8 bus, u32 mem_size, u32 acc_bits)
-{
- u32 i, offset, bar_enable, enables;
-
- /* Disable all windows but PCI MEM -> Bridge's regs window */
- enables = ~(1 << 9);
- bar_enable = hose ? MV64x60_PCI1_BAR_ENABLE : MV64x60_PCI0_BAR_ENABLE;
- out_le32((u32 *)(bridge_base + bar_enable), enables);
-
- for (i=0; i<MV64x60_PCI_ACC_CNTL_WINDOWS; i++)
- out_le32((u32 *)(bridge_base + mv64x60_pci_acc[hose][i].lo), 0);
-
- /* If mem_size is 0, leave windows disabled */
- if (mem_size == 0)
- return;
-
- /* Cause automatic updates of PCI remap regs */
- offset = hose ?
- MV64x60_PCI1_PCI_DECODE_CNTL : MV64x60_PCI0_PCI_DECODE_CNTL;
- i = in_le32((u32 *)(bridge_base + offset));
- out_le32((u32 *)(bridge_base + offset), i & ~0x1);
-
- mem_size = (mem_size - 1) & 0xfffff000;
-
- /* Map PCI MEM addr 0 -> System Mem addr 0 */
- mv64x60_cfg_write(bridge_base, hose, bus,
- PCI_DEVFN(0, mv64x60_pci2mem[hose].fcn),
- mv64x60_pci2mem[hose].hi, 0);
- mv64x60_cfg_write(bridge_base, hose, bus,
- PCI_DEVFN(0, mv64x60_pci2mem[hose].fcn),
- mv64x60_pci2mem[hose].lo, 0);
- out_le32((u32 *)(bridge_base + mv64x60_pci2mem[hose].size),mem_size);
-
- acc_bits |= MV64x60_PCI_ACC_CNTL_ENABLE;
- out_le32((u32 *)(bridge_base + mv64x60_pci_acc[hose][0].hi), 0);
- out_le32((u32 *)(bridge_base + mv64x60_pci_acc[hose][0].lo), acc_bits);
- out_le32((u32 *)(bridge_base + mv64x60_pci_acc[hose][0].size),mem_size);
-
- /* Set PCI MEM->bridge's reg window to where they are in CPU mem map */
- i = (u32)bridge_base;
- i &= 0xffff0000;
- i |= (0x2 << 1);
- mv64x60_cfg_write(bridge_base, hose, bus, PCI_DEVFN(0,0),
- mv64x60_pci2reg[hose].hi, 0);
- mv64x60_cfg_write(bridge_base, hose, bus, PCI_DEVFN(0,0),
- mv64x60_pci2reg[hose].lo, i);
-
- enables &= ~0x1; /* Enable PCI MEM -> System Mem window 0 */
- out_le32((u32 *)(bridge_base + bar_enable), enables);
-}
-
-/* CPU -> PCI I/O & MEM setup */
-struct mv64x60_cpu2pci_win mv64x60_cpu2pci_io[2] = {
- { /* hose 0 */
- .lo = MV64x60_CPU2PCI0_IO_BASE,
- .size = MV64x60_CPU2PCI0_IO_SIZE,
- .remap_hi = 0,
- .remap_lo = MV64x60_CPU2PCI0_IO_REMAP,
- },
- { /* hose 1 */
- .lo = MV64x60_CPU2PCI1_IO_BASE,
- .size = MV64x60_CPU2PCI1_IO_SIZE,
- .remap_hi = 0,
- .remap_lo = MV64x60_CPU2PCI1_IO_REMAP,
- },
-};
-
-struct mv64x60_cpu2pci_win mv64x60_cpu2pci_mem[2] = {
- { /* hose 0 */
- .lo = MV64x60_CPU2PCI0_MEM_0_BASE,
- .size = MV64x60_CPU2PCI0_MEM_0_SIZE,
- .remap_hi = MV64x60_CPU2PCI0_MEM_0_REMAP_HI,
- .remap_lo = MV64x60_CPU2PCI0_MEM_0_REMAP_LO,
- },
- { /* hose 1 */
- .lo = MV64x60_CPU2PCI1_MEM_0_BASE,
- .size = MV64x60_CPU2PCI1_MEM_0_SIZE,
- .remap_hi = MV64x60_CPU2PCI1_MEM_0_REMAP_HI,
- .remap_lo = MV64x60_CPU2PCI1_MEM_0_REMAP_LO,
- },
-};
-
-/* Only need to set up 1 window to pci mem space */
-void mv64x60_config_cpu2pci_window(u8 *bridge_base, u8 hose, u32 pci_base_hi,
- u32 pci_base_lo, u32 cpu_base, u32 size,
- struct mv64x60_cpu2pci_win *offset_tbl)
-{
- cpu_base >>= 16;
- cpu_base |= MV64x60_CPU2PCI_SWAP_NONE;
- out_le32((u32 *)(bridge_base + offset_tbl[hose].lo), cpu_base);
-
- if (offset_tbl[hose].remap_hi != 0)
- out_le32((u32 *)(bridge_base + offset_tbl[hose].remap_hi),
- pci_base_hi);
- out_le32((u32 *)(bridge_base + offset_tbl[hose].remap_lo),
- pci_base_lo >> 16);
-
- size = (size - 1) >> 16;
- out_le32((u32 *)(bridge_base + offset_tbl[hose].size), size);
-}
-
-/* Read mem ctlr to get the amount of mem in system */
-u32 mv64x60_get_mem_size(u8 *bridge_base)
-{
- u32 enables, i, v;
- u32 mem = 0;
-
- enables = in_le32((u32 *)(bridge_base + MV64x60_CPU_BAR_ENABLE)) & 0xf;
-
- for (i=0; i<MV64x60_CPU2MEM_WINDOWS; i++)
- if (!(enables & (1<<i))) {
- v = in_le32((u32*)(bridge_base
- + mv64x60_cpu2mem[i].size));
- v = ((v & 0xffff) + 1) << 16;
- mem += v;
- }
-
- return mem;
-}
-
-/* Get physical address of bridge's registers */
-u8 *mv64x60_get_bridge_pbase(void)
-{
- u32 v[2];
- void *devp;
-
- devp = find_node_by_compatible(NULL, "marvell,mv64360");
- if (devp == NULL)
- goto err_out;
- if (getprop(devp, "reg", v, sizeof(v)) != sizeof(v))
- goto err_out;
-
- return (u8 *)v[0];
-
-err_out:
- return 0;
-}
-
-/* Get virtual address of bridge's registers */
-u8 *mv64x60_get_bridge_base(void)
-{
- u32 v;
- void *devp;
-
- devp = find_node_by_compatible(NULL, "marvell,mv64360");
- if (devp == NULL)
- goto err_out;
- if (getprop(devp, "virtual-reg", &v, sizeof(v)) != sizeof(v))
- goto err_out;
-
- return (u8 *)v;
-
-err_out:
- return 0;
-}
-
-u8 mv64x60_is_coherent(void)
-{
- u32 v;
- void *devp;
-
- devp = finddevice("/");
- if (devp == NULL)
- return 1; /* Assume coherency on */
-
- if (getprop(devp, "coherency-off", &v, sizeof(v)) < 0)
- return 1; /* Coherency on */
- else
- return 0;
-}
diff --git a/arch/powerpc/boot/mv64x60.h b/arch/powerpc/boot/mv64x60.h
deleted file mode 100644
index b827105e6e54..000000000000
--- a/arch/powerpc/boot/mv64x60.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Author: Mark A. Greer <source@mvista.com>
- *
- * 2007 (c) MontaVista Software, Inc. This file is licensed under
- * the terms of the GNU General Public License version 2. This program
- * is licensed "as is" without any warranty of any kind, whether express
- * or implied.
- */
-
-#ifndef _PPC_BOOT_MV64x60_H_
-#define _PPC_BOOT_MV64x60_H_
-
-#define MV64x60_CPU_BAR_ENABLE 0x0278
-
-#define MV64x60_PCI_ACC_CNTL_ENABLE (1<<0)
-#define MV64x60_PCI_ACC_CNTL_REQ64 (1<<1)
-#define MV64x60_PCI_ACC_CNTL_SNOOP_NONE 0x00000000
-#define MV64x60_PCI_ACC_CNTL_SNOOP_WT 0x00000004
-#define MV64x60_PCI_ACC_CNTL_SNOOP_WB 0x00000008
-#define MV64x60_PCI_ACC_CNTL_SNOOP_MASK 0x0000000c
-#define MV64x60_PCI_ACC_CNTL_ACCPROT (1<<4)
-#define MV64x60_PCI_ACC_CNTL_WRPROT (1<<5)
-#define MV64x60_PCI_ACC_CNTL_SWAP_BYTE 0x00000000
-#define MV64x60_PCI_ACC_CNTL_SWAP_NONE 0x00000040
-#define MV64x60_PCI_ACC_CNTL_SWAP_BYTE_WORD 0x00000080
-#define MV64x60_PCI_ACC_CNTL_SWAP_WORD 0x000000c0
-#define MV64x60_PCI_ACC_CNTL_SWAP_MASK 0x000000c0
-#define MV64x60_PCI_ACC_CNTL_MBURST_32_BYTES 0x00000000
-#define MV64x60_PCI_ACC_CNTL_MBURST_64_BYTES 0x00000100
-#define MV64x60_PCI_ACC_CNTL_MBURST_128_BYTES 0x00000200
-#define MV64x60_PCI_ACC_CNTL_MBURST_MASK 0x00000300
-#define MV64x60_PCI_ACC_CNTL_RDSIZE_32_BYTES 0x00000000
-#define MV64x60_PCI_ACC_CNTL_RDSIZE_64_BYTES 0x00000400
-#define MV64x60_PCI_ACC_CNTL_RDSIZE_128_BYTES 0x00000800
-#define MV64x60_PCI_ACC_CNTL_RDSIZE_256_BYTES 0x00000c00
-#define MV64x60_PCI_ACC_CNTL_RDSIZE_MASK 0x00000c00
-
-struct mv64x60_cpu2pci_win {
- u32 lo;
- u32 size;
- u32 remap_hi;
- u32 remap_lo;
-};
-
-extern struct mv64x60_cpu2pci_win mv64x60_cpu2pci_io[2];
-extern struct mv64x60_cpu2pci_win mv64x60_cpu2pci_mem[2];
-
-u32 mv64x60_cfg_read(u8 *bridge_base, u8 hose, u8 bus, u8 devfn,
- u8 offset);
-void mv64x60_cfg_write(u8 *bridge_base, u8 hose, u8 bus, u8 devfn,
- u8 offset, u32 val);
-
-void mv64x60_config_ctlr_windows(u8 *bridge_base, u8 *bridge_pbase,
- u8 is_coherent);
-void mv64x60_config_pci_windows(u8 *bridge_base, u8 *bridge_pbase, u8 hose,
- u8 bus, u32 mem_size, u32 acc_bits);
-void mv64x60_config_cpu2pci_window(u8 *bridge_base, u8 hose, u32 pci_base_hi,
- u32 pci_base_lo, u32 cpu_base, u32 size,
- struct mv64x60_cpu2pci_win *offset_tbl);
-u32 mv64x60_get_mem_size(u8 *bridge_base);
-u8 *mv64x60_get_bridge_pbase(void);
-u8 *mv64x60_get_bridge_base(void);
-u8 mv64x60_is_coherent(void);
-
-int mv64x60_i2c_open(void);
-int mv64x60_i2c_read(u32 devaddr, u8 *buf, u32 offset, u32 offset_size,
- u32 count);
-void mv64x60_i2c_close(void);
-
-#endif /* _PPC_BOOT_MV64x60_H_ */
diff --git a/arch/powerpc/boot/mv64x60_i2c.c b/arch/powerpc/boot/mv64x60_i2c.c
deleted file mode 100644
index 52a3212b6638..000000000000
--- a/arch/powerpc/boot/mv64x60_i2c.c
+++ /dev/null
@@ -1,204 +0,0 @@
-/*
- * Bootloader version of the i2c driver for the MV64x60.
- *
- * Author: Dale Farnsworth <dfarnsworth@mvista.com>
- * Maintained by: Mark A. Greer <mgreer@mvista.com>
- *
- * 2003, 2007 (c) MontaVista, Software, Inc. This file is licensed under
- * the terms of the GNU General Public License version 2. This program is
- * licensed "as is" without any warranty of any kind, whether express or
- * implied.
- */
-
-#include <stdarg.h>
-#include <stddef.h>
-#include "types.h"
-#include "elf.h"
-#include "page.h"
-#include "string.h"
-#include "stdio.h"
-#include "io.h"
-#include "ops.h"
-#include "mv64x60.h"
-
-/* Register defines */
-#define MV64x60_I2C_REG_SLAVE_ADDR 0x00
-#define MV64x60_I2C_REG_DATA 0x04
-#define MV64x60_I2C_REG_CONTROL 0x08
-#define MV64x60_I2C_REG_STATUS 0x0c
-#define MV64x60_I2C_REG_BAUD 0x0c
-#define MV64x60_I2C_REG_EXT_SLAVE_ADDR 0x10
-#define MV64x60_I2C_REG_SOFT_RESET 0x1c
-
-#define MV64x60_I2C_CONTROL_ACK 0x04
-#define MV64x60_I2C_CONTROL_IFLG 0x08
-#define MV64x60_I2C_CONTROL_STOP 0x10
-#define MV64x60_I2C_CONTROL_START 0x20
-#define MV64x60_I2C_CONTROL_TWSIEN 0x40
-#define MV64x60_I2C_CONTROL_INTEN 0x80
-
-#define MV64x60_I2C_STATUS_BUS_ERR 0x00
-#define MV64x60_I2C_STATUS_MAST_START 0x08
-#define MV64x60_I2C_STATUS_MAST_REPEAT_START 0x10
-#define MV64x60_I2C_STATUS_MAST_WR_ADDR_ACK 0x18
-#define MV64x60_I2C_STATUS_MAST_WR_ADDR_NO_ACK 0x20
-#define MV64x60_I2C_STATUS_MAST_WR_ACK 0x28
-#define MV64x60_I2C_STATUS_MAST_WR_NO_ACK 0x30
-#define MV64x60_I2C_STATUS_MAST_LOST_ARB 0x38
-#define MV64x60_I2C_STATUS_MAST_RD_ADDR_ACK 0x40
-#define MV64x60_I2C_STATUS_MAST_RD_ADDR_NO_ACK 0x48
-#define MV64x60_I2C_STATUS_MAST_RD_DATA_ACK 0x50
-#define MV64x60_I2C_STATUS_MAST_RD_DATA_NO_ACK 0x58
-#define MV64x60_I2C_STATUS_MAST_WR_ADDR_2_ACK 0xd0
-#define MV64x60_I2C_STATUS_MAST_WR_ADDR_2_NO_ACK 0xd8
-#define MV64x60_I2C_STATUS_MAST_RD_ADDR_2_ACK 0xe0
-#define MV64x60_I2C_STATUS_MAST_RD_ADDR_2_NO_ACK 0xe8
-#define MV64x60_I2C_STATUS_NO_STATUS 0xf8
-
-static u8 *ctlr_base;
-
-static int mv64x60_i2c_wait_for_status(int wanted)
-{
- int i;
- int status;
-
- for (i=0; i<1000; i++) {
- udelay(10);
- status = in_le32((u32 *)(ctlr_base + MV64x60_I2C_REG_STATUS))
- & 0xff;
- if (status == wanted)
- return status;
- }
- return -status;
-}
-
-static int mv64x60_i2c_control(int control, int status)
-{
- out_le32((u32 *)(ctlr_base + MV64x60_I2C_REG_CONTROL), control & 0xff);
- return mv64x60_i2c_wait_for_status(status);
-}
-
-static int mv64x60_i2c_read_byte(int control, int status)
-{
- out_le32((u32 *)(ctlr_base + MV64x60_I2C_REG_CONTROL), control & 0xff);
- if (mv64x60_i2c_wait_for_status(status) < 0)
- return -1;
- return in_le32((u32 *)(ctlr_base + MV64x60_I2C_REG_DATA)) & 0xff;
-}
-
-static int mv64x60_i2c_write_byte(int data, int control, int status)
-{
- out_le32((u32 *)(ctlr_base + MV64x60_I2C_REG_DATA), data & 0xff);
- out_le32((u32 *)(ctlr_base + MV64x60_I2C_REG_CONTROL), control & 0xff);
- return mv64x60_i2c_wait_for_status(status);
-}
-
-int mv64x60_i2c_read(u32 devaddr, u8 *buf, u32 offset, u32 offset_size,
- u32 count)
-{
- int i;
- int data;
- int control;
- int status;
-
- if (ctlr_base == NULL)
- return -1;
-
- /* send reset */
- out_le32((u32 *)(ctlr_base + MV64x60_I2C_REG_SOFT_RESET), 0);
- out_le32((u32 *)(ctlr_base + MV64x60_I2C_REG_SLAVE_ADDR), 0);
- out_le32((u32 *)(ctlr_base + MV64x60_I2C_REG_EXT_SLAVE_ADDR), 0);
- out_le32((u32 *)(ctlr_base + MV64x60_I2C_REG_BAUD), (4 << 3) | 0x4);
-
- if (mv64x60_i2c_control(MV64x60_I2C_CONTROL_TWSIEN,
- MV64x60_I2C_STATUS_NO_STATUS) < 0)
- return -1;
-
- /* send start */
- control = MV64x60_I2C_CONTROL_START | MV64x60_I2C_CONTROL_TWSIEN;
- status = MV64x60_I2C_STATUS_MAST_START;
- if (mv64x60_i2c_control(control, status) < 0)
- return -1;
-
- /* select device for writing */
- data = devaddr & ~0x1;
- control = MV64x60_I2C_CONTROL_TWSIEN;
- status = MV64x60_I2C_STATUS_MAST_WR_ADDR_ACK;
- if (mv64x60_i2c_write_byte(data, control, status) < 0)
- return -1;
-
- /* send offset of data */
- control = MV64x60_I2C_CONTROL_TWSIEN;
- status = MV64x60_I2C_STATUS_MAST_WR_ACK;
- if (offset_size > 1) {
- if (mv64x60_i2c_write_byte(offset >> 8, control, status) < 0)
- return -1;
- }
- if (mv64x60_i2c_write_byte(offset, control, status) < 0)
- return -1;
-
- /* resend start */
- control = MV64x60_I2C_CONTROL_START | MV64x60_I2C_CONTROL_TWSIEN;
- status = MV64x60_I2C_STATUS_MAST_REPEAT_START;
- if (mv64x60_i2c_control(control, status) < 0)
- return -1;
-
- /* select device for reading */
- data = devaddr | 0x1;
- control = MV64x60_I2C_CONTROL_TWSIEN;
- status = MV64x60_I2C_STATUS_MAST_RD_ADDR_ACK;
- if (mv64x60_i2c_write_byte(data, control, status) < 0)
- return -1;
-
- /* read all but last byte of data */
- control = MV64x60_I2C_CONTROL_ACK | MV64x60_I2C_CONTROL_TWSIEN;
- status = MV64x60_I2C_STATUS_MAST_RD_DATA_ACK;
-
- for (i=1; i<count; i++) {
- data = mv64x60_i2c_read_byte(control, status);
- if (data < 0) {
- printf("errors on iteration %d\n", i);
- return -1;
- }
- *buf++ = data;
- }
-
- /* read last byte of data */
- control = MV64x60_I2C_CONTROL_TWSIEN;
- status = MV64x60_I2C_STATUS_MAST_RD_DATA_NO_ACK;
- data = mv64x60_i2c_read_byte(control, status);
- if (data < 0)
- return -1;
- *buf++ = data;
-
- /* send stop */
- control = MV64x60_I2C_CONTROL_STOP | MV64x60_I2C_CONTROL_TWSIEN;
- status = MV64x60_I2C_STATUS_NO_STATUS;
- if (mv64x60_i2c_control(control, status) < 0)
- return -1;
-
- return count;
-}
-
-int mv64x60_i2c_open(void)
-{
- u32 v;
- void *devp;
-
- devp = find_node_by_compatible(NULL, "marvell,mv64360-i2c");
- if (devp == NULL)
- goto err_out;
- if (getprop(devp, "virtual-reg", &v, sizeof(v)) != sizeof(v))
- goto err_out;
-
- ctlr_base = (u8 *)v;
- return 0;
-
-err_out:
- return -1;
-}
-
-void mv64x60_i2c_close(void)
-{
- ctlr_base = NULL;
-}
diff --git a/arch/powerpc/boot/mvme5100.c b/arch/powerpc/boot/mvme5100.c
index cb865f83c60b..51453d0ec995 100644
--- a/arch/powerpc/boot/mvme5100.c
+++ b/arch/powerpc/boot/mvme5100.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Motorola/Emerson MVME5100 with PPCBug firmware.
*
* Author: Stephen Chivers <schivers@csc.com>
*
* Copyright 2013 CSC Australia Pty. Ltd.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
- *
*/
#include "types.h"
#include "ops.h"
diff --git a/arch/powerpc/boot/mvme7100.c b/arch/powerpc/boot/mvme7100.c
new file mode 100644
index 000000000000..1e218454ab7f
--- /dev/null
+++ b/arch/powerpc/boot/mvme7100.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Motload compatibility for the Emerson/Artesyn MVME7100
+ *
+ * Copyright 2016 Elettra-Sincrotrone Trieste S.C.p.A.
+ *
+ * Author: Alessio Igor Bogani <alessio.bogani@elettra.eu>
+ */
+
+#include "ops.h"
+#include "stdio.h"
+#include "cuboot.h"
+
+#define TARGET_86xx
+#define TARGET_HAS_ETH1
+#define TARGET_HAS_ETH2
+#define TARGET_HAS_ETH3
+#include "ppcboot.h"
+
+static bd_t bd;
+
+BSS_STACK(16384);
+
+static void mvme7100_fixups(void)
+{
+ void *devp;
+ unsigned long busfreq = bd.bi_busfreq * 1000000;
+
+ dt_fixup_cpu_clocks(bd.bi_intfreq * 1000000, busfreq / 4, busfreq);
+
+ devp = finddevice("/soc@f1000000");
+ if (devp)
+ setprop(devp, "bus-frequency", &busfreq, sizeof(busfreq));
+
+ devp = finddevice("/soc/serial@4500");
+ if (devp)
+ setprop(devp, "clock-frequency", &busfreq, sizeof(busfreq));
+
+ dt_fixup_memory(bd.bi_memstart, bd.bi_memsize);
+
+ dt_fixup_mac_address_by_alias("ethernet0", bd.bi_enetaddr);
+ dt_fixup_mac_address_by_alias("ethernet1", bd.bi_enet1addr);
+ dt_fixup_mac_address_by_alias("ethernet2", bd.bi_enet2addr);
+ dt_fixup_mac_address_by_alias("ethernet3", bd.bi_enet3addr);
+}
+
+void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+ unsigned long r6, unsigned long r7)
+{
+ CUBOOT_INIT();
+ fdt_init(_dtb_start);
+ serial_console_init();
+ platform_ops.fixups = mvme7100_fixups;
+}
diff --git a/arch/powerpc/boot/ns16550.c b/arch/powerpc/boot/ns16550.c
index 8c9ead94be06..f16d2be1d0f3 100644
--- a/arch/powerpc/boot/ns16550.c
+++ b/arch/powerpc/boot/ns16550.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* 16550 serial console support.
*
@@ -14,6 +15,7 @@
#include "stdio.h"
#include "io.h"
#include "ops.h"
+#include "of.h"
#define UART_DLL 0 /* Out: Divisor Latch Low */
#define UART_DLM 1 /* Out: Divisor Latch High */
@@ -57,16 +59,20 @@ int ns16550_console_init(void *devp, struct serial_console_data *scdp)
int n;
u32 reg_offset;
- if (dt_get_virtual_reg(devp, (void **)&reg_base, 1) < 1)
+ if (dt_get_virtual_reg(devp, (void **)&reg_base, 1) < 1) {
+ printf("virt reg parse fail...\r\n");
return -1;
+ }
n = getprop(devp, "reg-offset", &reg_offset, sizeof(reg_offset));
if (n == sizeof(reg_offset))
- reg_base += reg_offset;
+ reg_base += be32_to_cpu(reg_offset);
n = getprop(devp, "reg-shift", &reg_shift, sizeof(reg_shift));
if (n != sizeof(reg_shift))
reg_shift = 0;
+ else
+ reg_shift = be32_to_cpu(reg_shift);
scdp->open = ns16550_open;
scdp->putc = ns16550_putc;
diff --git a/arch/powerpc/boot/of.c b/arch/powerpc/boot/of.c
index 7ca910cb2fc6..2fbd4ae60ec9 100644
--- a/arch/powerpc/boot/of.c
+++ b/arch/powerpc/boot/of.c
@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) Paul Mackerras 1997.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <stdarg.h>
#include <stddef.h>
diff --git a/arch/powerpc/boot/of.h b/arch/powerpc/boot/of.h
index c8c1750aba0c..31b2f5dfd589 100644
--- a/arch/powerpc/boot/of.h
+++ b/arch/powerpc/boot/of.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _PPC_BOOT_OF_H_
#define _PPC_BOOT_OF_H_
@@ -21,14 +22,24 @@ int of_setprop(const void *phandle, const char *name, const void *buf,
/* Console functions */
void of_console_init(void);
+typedef u16 __be16;
typedef u32 __be32;
+typedef u64 __be64;
#ifdef __LITTLE_ENDIAN__
+#define cpu_to_be16(x) swab16(x)
+#define be16_to_cpu(x) swab16(x)
#define cpu_to_be32(x) swab32(x)
#define be32_to_cpu(x) swab32(x)
+#define cpu_to_be64(x) swab64(x)
+#define be64_to_cpu(x) swab64(x)
#else
+#define cpu_to_be16(x) (x)
+#define be16_to_cpu(x) (x)
#define cpu_to_be32(x) (x)
#define be32_to_cpu(x) (x)
+#define cpu_to_be64(x) (x)
+#define be64_to_cpu(x) (x)
#endif
#define PROM_ERROR (-1u)
diff --git a/arch/powerpc/boot/ofconsole.c b/arch/powerpc/boot/ofconsole.c
index 8b754702460a..8eb0f1c452c5 100644
--- a/arch/powerpc/boot/ofconsole.c
+++ b/arch/powerpc/boot/ofconsole.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* OF console routines
*
* Copyright (C) Paul Mackerras 1997.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <stddef.h>
#include "types.h"
diff --git a/arch/powerpc/boot/oflib.c b/arch/powerpc/boot/oflib.c
index 46c98a47d949..8759c985ef9a 100644
--- a/arch/powerpc/boot/oflib.c
+++ b/arch/powerpc/boot/oflib.c
@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) Paul Mackerras 1997.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <stddef.h>
#include "types.h"
diff --git a/arch/powerpc/boot/opal-calls.S b/arch/powerpc/boot/opal-calls.S
new file mode 100644
index 000000000000..1f2f330a459e
--- /dev/null
+++ b/arch/powerpc/boot/opal-calls.S
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2016 IBM Corporation.
+ */
+
+#include "ppc_asm.h"
+#include "../include/asm/opal-api.h"
+
+ .text
+
+ .globl opal_kentry
+opal_kentry:
+ /* r3 is the fdt ptr */
+ mtctr r4
+ li r4, 0
+ li r5, 0
+ li r6, 0
+ li r7, 0
+ LOAD_REG_ADDR(r11, opal)
+ ld r8,0(r11)
+ ld r9,8(r11)
+ bctr
+
+#define OPAL_CALL(name, token) \
+ .globl name; \
+name: \
+ li r0, token; \
+ b opal_call;
+
+opal_call:
+ mflr r11
+ std r11,16(r1)
+ mfcr r12
+ stw r12,8(r1)
+ mr r13,r2
+
+ /* Set opal return address */
+ LOAD_REG_ADDR(r11, opal_return)
+ mtlr r11
+ mfmsr r12
+
+ /* switch to BE when we enter OPAL */
+ li r11,MSR_LE
+ andc r12,r12,r11
+ mtspr SPRN_HSRR1,r12
+
+ /* load the opal call entry point and base */
+ LOAD_REG_ADDR(r11, opal)
+ ld r12,8(r11)
+ ld r2,0(r11)
+ mtspr SPRN_HSRR0,r12
+ hrfid
+
+opal_return:
+ FIXUP_ENDIAN
+ mr r2,r13;
+ lwz r11,8(r1);
+ ld r12,16(r1)
+ mtcr r11;
+ mtlr r12
+ blr
+
+OPAL_CALL(opal_console_write, OPAL_CONSOLE_WRITE);
+OPAL_CALL(opal_console_read, OPAL_CONSOLE_READ);
+OPAL_CALL(opal_console_write_buffer_space, OPAL_CONSOLE_WRITE_BUFFER_SPACE);
+OPAL_CALL(opal_poll_events, OPAL_POLL_EVENTS);
+OPAL_CALL(opal_console_flush, OPAL_CONSOLE_FLUSH);
diff --git a/arch/powerpc/boot/opal.c b/arch/powerpc/boot/opal.c
new file mode 100644
index 000000000000..b69818ce592b
--- /dev/null
+++ b/arch/powerpc/boot/opal.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2016 IBM Corporation.
+ */
+
+#include "ops.h"
+#include "stdio.h"
+#include "io.h"
+#include <libfdt.h>
+#include "../include/asm/opal-api.h"
+
+/* Global OPAL struct used by opal-call.S */
+struct opal {
+ u64 base;
+ u64 entry;
+} opal;
+
+static u32 opal_con_id;
+
+/* see opal-wrappers.S */
+int64_t opal_console_write(int64_t term_number, u64 *length, const u8 *buffer);
+int64_t opal_console_read(int64_t term_number, uint64_t *length, u8 *buffer);
+int64_t opal_console_write_buffer_space(uint64_t term_number, uint64_t *length);
+int64_t opal_console_flush(uint64_t term_number);
+int64_t opal_poll_events(uint64_t *outstanding_event_mask);
+
+void opal_kentry(unsigned long fdt_addr, void *vmlinux_addr);
+
+static int opal_con_open(void)
+{
+ /*
+ * When OPAL loads the boot kernel it stashes the OPAL base and entry
+ * address in r8 and r9 so the kernel can use the OPAL console
+ * before unflattening the devicetree. While executing the wrapper will
+ * probably trash r8 and r9 so this kentry hook restores them before
+ * entering the decompressed kernel.
+ */
+ platform_ops.kentry = opal_kentry;
+ return 0;
+}
+
+static void opal_con_putc(unsigned char c)
+{
+ int64_t rc;
+ uint64_t olen, len;
+
+ do {
+ rc = opal_console_write_buffer_space(opal_con_id, &olen);
+ len = be64_to_cpu(olen);
+ if (rc)
+ return;
+ opal_poll_events(NULL);
+ } while (len < 1);
+
+
+ olen = cpu_to_be64(1);
+ opal_console_write(opal_con_id, &olen, &c);
+}
+
+static void opal_con_close(void)
+{
+ opal_console_flush(opal_con_id);
+}
+
+static void opal_init(void)
+{
+ void *opal_node;
+
+ opal_node = finddevice("/ibm,opal");
+ if (!opal_node)
+ return;
+ if (getprop(opal_node, "opal-base-address", &opal.base, sizeof(u64)) < 0)
+ return;
+ opal.base = be64_to_cpu(opal.base);
+ if (getprop(opal_node, "opal-entry-address", &opal.entry, sizeof(u64)) < 0)
+ return;
+ opal.entry = be64_to_cpu(opal.entry);
+}
+
+int opal_console_init(void *devp, struct serial_console_data *scdp)
+{
+ opal_init();
+
+ if (devp) {
+ int n = getprop(devp, "reg", &opal_con_id, sizeof(u32));
+ if (n != sizeof(u32))
+ return -1;
+ opal_con_id = be32_to_cpu(opal_con_id);
+ } else
+ opal_con_id = 0;
+
+ scdp->open = opal_con_open;
+ scdp->putc = opal_con_putc;
+ scdp->close = opal_con_close;
+
+ return 0;
+}
diff --git a/arch/powerpc/boot/ops.h b/arch/powerpc/boot/ops.h
index 8aad3c55aeda..a40c2162a4e9 100644
--- a/arch/powerpc/boot/ops.h
+++ b/arch/powerpc/boot/ops.h
@@ -1,12 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Global definition of all the bootwrapper operations.
*
* Author: Mark A. Greer <mgreer@mvista.com>
*
- * 2006 (c) MontaVista Software, Inc. This file is licensed under
- * the terms of the GNU General Public License version 2. This program
- * is licensed "as is" without any warranty of any kind, whether express
- * or implied.
+ * 2006 (c) MontaVista Software, Inc.
*/
#ifndef _PPC_BOOT_OPS_H_
#define _PPC_BOOT_OPS_H_
@@ -30,6 +28,7 @@ struct platform_ops {
void * (*realloc)(void *ptr, unsigned long size);
void (*exit)(void);
void * (*vmlinux_alloc)(unsigned long size);
+ void (*kentry)(unsigned long fdt_addr, void *vmlinux_addr);
};
extern struct platform_ops platform_ops;
@@ -58,7 +57,7 @@ extern struct dt_ops dt_ops;
struct console_ops {
int (*open)(void);
void (*write)(const char *buf, int len);
- void (*edit_cmdline)(char *buf, int len);
+ void (*edit_cmdline)(char *buf, int len, unsigned int getline_timeout);
void (*close)(void);
void *data;
};
@@ -85,10 +84,9 @@ void start(void);
void fdt_init(void *blob);
int serial_console_init(void);
int ns16550_console_init(void *devp, struct serial_console_data *scdp);
-int mpsc_console_init(void *devp, struct serial_console_data *scdp);
int cpm_console_init(void *devp, struct serial_console_data *scdp);
int mpc5200_psc_console_init(void *devp, struct serial_console_data *scdp);
-int uartlite_console_init(void *devp, struct serial_console_data *scdp);
+int opal_console_init(void *devp, struct serial_console_data *scdp);
void *simple_alloc_init(char *base, unsigned long heap_size,
unsigned long granularity, unsigned long max_allocs);
extern void flush_cache(void *, unsigned long);
@@ -200,12 +198,6 @@ void __dt_fixup_mac_addresses(u32 startindex, ...);
__dt_fixup_mac_addresses(0, __VA_ARGS__, NULL)
-static inline void *find_node_by_linuxphandle(const u32 linuxphandle)
-{
- return find_node_by_prop_value(NULL, "linux,phandle",
- (char *)&linuxphandle, sizeof(u32));
-}
-
static inline char *get_path(const void *phandle, char *buf, int len)
{
if (dt_ops.get_path)
@@ -250,6 +242,8 @@ extern char _initrd_start[];
extern char _initrd_end[];
extern char _dtb_start[];
extern char _dtb_end[];
+extern char _esm_blob_start[];
+extern char _esm_blob_end[];
static inline __attribute__((const))
int __ilog2_u32(u32 n)
@@ -259,4 +253,7 @@ int __ilog2_u32(u32 n)
return 31 - bit;
}
+long partial_decompress(void *inbuf, unsigned long input_size, void *outbuf,
+ unsigned long output_size, unsigned long skip);
+
#endif /* _PPC_BOOT_OPS_H_ */
diff --git a/arch/powerpc/boot/page.h b/arch/powerpc/boot/page.h
index 14eca30fef64..e44a3119720d 100644
--- a/arch/powerpc/boot/page.h
+++ b/arch/powerpc/boot/page.h
@@ -1,15 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _PPC_BOOT_PAGE_H
#define _PPC_BOOT_PAGE_H
/*
* Copyright (C) 2001 PPC64 Team, IBM Corp
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#define ASM_CONST(x) x
#else
#define __ASM_CONST(x) x##UL
@@ -22,8 +18,8 @@
#define PAGE_MASK (~(PAGE_SIZE-1))
/* align addr on a size boundary - adjust address up/down if needed */
-#define _ALIGN_UP(addr,size) (((addr)+((size)-1))&(~((size)-1)))
-#define _ALIGN_DOWN(addr,size) ((addr)&(~((size)-1)))
+#define _ALIGN_UP(addr, size) (((addr)+((size)-1))&(~((typeof(addr))(size)-1)))
+#define _ALIGN_DOWN(addr, size) ((addr)&(~((typeof(addr))(size)-1)))
/* align addr on a size boundary - adjust address up if needed */
#define _ALIGN(addr,size) _ALIGN_UP(addr,size)
diff --git a/arch/powerpc/boot/planetcore.c b/arch/powerpc/boot/planetcore.c
index 0d8558a475bb..d5f391e342be 100644
--- a/arch/powerpc/boot/planetcore.c
+++ b/arch/powerpc/boot/planetcore.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* PlanetCore configuration data support functions
*
* Author: Scott Wood <scottwood@freescale.com>
*
* Copyright (c) 2007 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
*/
#include "stdio.h"
@@ -131,36 +128,3 @@ void planetcore_set_stdout_path(const char *table)
setprop_str(chosen, "linux,stdout-path", path);
}
-
-void planetcore_set_serial_speed(const char *table)
-{
- void *chosen, *stdout;
- u64 baud;
- u32 baud32;
- int len;
-
- chosen = finddevice("/chosen");
- if (!chosen)
- return;
-
- len = getprop(chosen, "linux,stdout-path", prop_buf, MAX_PROP_LEN);
- if (len <= 0)
- return;
-
- stdout = finddevice(prop_buf);
- if (!stdout) {
- printf("planetcore_set_serial_speed: "
- "Bad /chosen/linux,stdout-path.\r\n");
-
- return;
- }
-
- if (!planetcore_get_decimal(table, PLANETCORE_KEY_SERIAL_BAUD,
- &baud)) {
- printf("planetcore_set_serial_speed: No SB tag.\r\n");
- return;
- }
-
- baud32 = baud;
- setprop(stdout, "current-speed", &baud32, 4);
-}
diff --git a/arch/powerpc/boot/planetcore.h b/arch/powerpc/boot/planetcore.h
index 0d4094f1771c..5311db06c62b 100644
--- a/arch/powerpc/boot/planetcore.h
+++ b/arch/powerpc/boot/planetcore.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _PPC_BOOT_PLANETCORE_H_
#define _PPC_BOOT_PLANETCORE_H_
@@ -43,7 +44,4 @@ void planetcore_set_mac_addrs(const char *table);
*/
void planetcore_set_stdout_path(const char *table);
-/* Sets the current-speed property in the serial node. */
-void planetcore_set_serial_speed(const char *table);
-
#endif
diff --git a/arch/powerpc/boot/ppc_asm.h b/arch/powerpc/boot/ppc_asm.h
index 35ea60c1f070..a66cfd76fa4d 100644
--- a/arch/powerpc/boot/ppc_asm.h
+++ b/arch/powerpc/boot/ppc_asm.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _PPC64_PPC_ASM_H
#define _PPC64_PPC_ASM_H
/*
@@ -5,11 +6,6 @@
* Definitions used by various bits of low-level assembly code on PowerPC.
*
* Copyright (C) 1995-1999 Gary Thomas, Paul Mackerras, Cort Dougan.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
/* Condition Register Bit Fields */
@@ -61,17 +57,41 @@
#define SPRN_TBRL 268
#define SPRN_TBRU 269
+#define SPRN_HSRR0 0x13A /* Hypervisor Save/Restore 0 */
+#define SPRN_HSRR1 0x13B /* Hypervisor Save/Restore 1 */
+
+#define MSR_LE 0x0000000000000001
#define FIXUP_ENDIAN \
- tdi 0, 0, 0x48; /* Reverse endian of b . + 8 */ \
- b $+36; /* Skip trampoline if endian is good */ \
- .long 0x05009f42; /* bcl 20,31,$+4 */ \
- .long 0xa602487d; /* mflr r10 */ \
- .long 0x1c004a39; /* addi r10,r10,28 */ \
+ tdi 0,0,0x48; /* Reverse endian of b . + 8 */ \
+ b $+44; /* Skip trampoline if endian is good */ \
.long 0xa600607d; /* mfmsr r11 */ \
.long 0x01006b69; /* xori r11,r11,1 */ \
+ .long 0x00004039; /* li r10,0 */ \
+ .long 0x6401417d; /* mtmsrd r10,1 */ \
+ .long 0x05009f42; /* bcl 20,31,$+4 */ \
+ .long 0xa602487d; /* mflr r10 */ \
+ .long 0x14004a39; /* addi r10,r10,20 */ \
.long 0xa6035a7d; /* mtsrr0 r10 */ \
.long 0xa6037b7d; /* mtsrr1 r11 */ \
.long 0x2400004c /* rfid */
+#ifdef CONFIG_PPC_8xx
+#define MFTBL(dest) mftb dest
+#define MFTBU(dest) mftbu dest
+#else
+#define MFTBL(dest) mfspr dest, SPRN_TBRL
+#define MFTBU(dest) mfspr dest, SPRN_TBRU
+#endif
+
+#ifdef CONFIG_PPC64_BOOT_WRAPPER
+#define LOAD_REG_ADDR(reg,name) \
+ addis reg,r2,name@toc@ha; \
+ addi reg,reg,name@toc@l
+#else
+#define LOAD_REG_ADDR(reg,name) \
+ lis reg,name@ha; \
+ addi reg,reg,name@l
+#endif
+
#endif /* _PPC64_PPC_ASM_H */
diff --git a/arch/powerpc/boot/ppcboot-hotfoot.h b/arch/powerpc/boot/ppcboot-hotfoot.h
deleted file mode 100644
index 1a3e80b533da..000000000000
--- a/arch/powerpc/boot/ppcboot-hotfoot.h
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * This interface is used for compatibility with old U-boots *ONLY*.
- * Please do not imitate or extend this.
- */
-
-/*
- * Unfortunately, the ESTeem Hotfoot board uses a mangled version of
- * ppcboot.h for historical reasons, and in the interest of having a
- * mainline kernel boot on the production board+bootloader, this was the
- * least-offensive solution. Please direct all flames to:
- *
- * Solomon Peachy <solomon@linux-wlan.com>
- *
- * (This header is identical to ppcboot.h except for the
- * TARGET_HOTFOOT bits)
- */
-
-/*
- * (C) Copyright 2000, 2001
- * Wolfgang Denk, DENX Software Engineering, wd@denx.de.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
- * MA 02111-1307 USA
- */
-
-#ifndef __PPCBOOT_H__
-#define __PPCBOOT_H__
-
-/*
- * Board information passed to kernel from PPCBoot
- *
- * include/asm-ppc/ppcboot.h
- */
-
-#include "types.h"
-
-typedef struct bd_info {
- unsigned long bi_memstart; /* start of DRAM memory */
- unsigned long bi_memsize; /* size of DRAM memory in bytes */
- unsigned long bi_flashstart; /* start of FLASH memory */
- unsigned long bi_flashsize; /* size of FLASH memory */
- unsigned long bi_flashoffset; /* reserved area for startup monitor */
- unsigned long bi_sramstart; /* start of SRAM memory */
- unsigned long bi_sramsize; /* size of SRAM memory */
-#if defined(TARGET_8xx) || defined(TARGET_CPM2) || defined(TARGET_85xx) ||\
- defined(TARGET_83xx)
- unsigned long bi_immr_base; /* base of IMMR register */
-#endif
-#if defined(TARGET_PPC_MPC52xx)
- unsigned long bi_mbar_base; /* base of internal registers */
-#endif
- unsigned long bi_bootflags; /* boot / reboot flag (for LynxOS) */
- unsigned long bi_ip_addr; /* IP Address */
- unsigned char bi_enetaddr[6]; /* Ethernet address */
-#if defined(TARGET_HOTFOOT)
- /* second onboard ethernet port */
- unsigned char bi_enet1addr[6];
-#define HAVE_ENET1ADDR
-#endif /* TARGET_HOOTFOOT */
- unsigned short bi_ethspeed; /* Ethernet speed in Mbps */
- unsigned long bi_intfreq; /* Internal Freq, in MHz */
- unsigned long bi_busfreq; /* Bus Freq, in MHz */
-#if defined(TARGET_CPM2)
- unsigned long bi_cpmfreq; /* CPM_CLK Freq, in MHz */
- unsigned long bi_brgfreq; /* BRG_CLK Freq, in MHz */
- unsigned long bi_sccfreq; /* SCC_CLK Freq, in MHz */
- unsigned long bi_vco; /* VCO Out from PLL, in MHz */
-#endif
-#if defined(TARGET_PPC_MPC52xx)
- unsigned long bi_ipbfreq; /* IPB Bus Freq, in MHz */
- unsigned long bi_pcifreq; /* PCI Bus Freq, in MHz */
-#endif
- unsigned long bi_baudrate; /* Console Baudrate */
-#if defined(TARGET_4xx)
- unsigned char bi_s_version[4]; /* Version of this structure */
- unsigned char bi_r_version[32]; /* Version of the ROM (IBM) */
- unsigned int bi_procfreq; /* CPU (Internal) Freq, in Hz */
- unsigned int bi_plb_busfreq; /* PLB Bus speed, in Hz */
- unsigned int bi_pci_busfreq; /* PCI Bus speed, in Hz */
- unsigned char bi_pci_enetaddr[6]; /* PCI Ethernet MAC address */
-#endif
-#if defined(TARGET_HOTFOOT)
- unsigned int bi_pllouta_freq; /* PLL OUTA speed, in Hz */
-#endif
-#if defined(TARGET_HYMOD)
- hymod_conf_t bi_hymod_conf; /* hymod configuration information */
-#endif
-#if defined(TARGET_EVB64260) || defined(TARGET_405EP) || defined(TARGET_44x) || \
- defined(TARGET_85xx) || defined(TARGET_83xx) || defined(TARGET_HAS_ETH1)
- /* second onboard ethernet port */
- unsigned char bi_enet1addr[6];
-#define HAVE_ENET1ADDR
-#endif
-#if defined(TARGET_EVB64260) || defined(TARGET_440GX) || \
- defined(TARGET_85xx) || defined(TARGET_HAS_ETH2)
- /* third onboard ethernet ports */
- unsigned char bi_enet2addr[6];
-#define HAVE_ENET2ADDR
-#endif
-#if defined(TARGET_440GX) || defined(TARGET_HAS_ETH3)
- /* fourth onboard ethernet ports */
- unsigned char bi_enet3addr[6];
-#define HAVE_ENET3ADDR
-#endif
-#if defined(TARGET_HOTFOOT)
- int bi_phynum[2]; /* Determines phy mapping */
- int bi_phymode[2]; /* Determines phy mode */
-#endif
-#if defined(TARGET_4xx)
- unsigned int bi_opbfreq; /* OB clock in Hz */
- int bi_iic_fast[2]; /* Use fast i2c mode */
-#endif
-#if defined(TARGET_440GX)
- int bi_phynum[4]; /* phy mapping */
- int bi_phymode[4]; /* phy mode */
-#endif
-} bd_t;
-
-#define bi_tbfreq bi_intfreq
-
-#endif /* __PPCBOOT_H__ */
diff --git a/arch/powerpc/boot/ppcboot.h b/arch/powerpc/boot/ppcboot.h
index 6ae6f9063952..90c8f452fe6e 100644
--- a/arch/powerpc/boot/ppcboot.h
+++ b/arch/powerpc/boot/ppcboot.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* This interface is used for compatibility with old U-boots *ONLY*.
* Please do not imitate or extend this.
@@ -6,21 +7,6 @@
/*
* (C) Copyright 2000, 2001
* Wolfgang Denk, DENX Software Engineering, wd@denx.de.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
- * MA 02111-1307 USA
*/
#ifndef __PPCBOOT_H__
@@ -43,7 +29,7 @@ typedef struct bd_info {
unsigned long bi_sramstart; /* start of SRAM memory */
unsigned long bi_sramsize; /* size of SRAM memory */
#if defined(TARGET_8xx) || defined(TARGET_CPM2) || defined(TARGET_85xx) ||\
- defined(TARGET_83xx)
+ defined(TARGET_83xx) || defined(TARGET_86xx)
unsigned long bi_immr_base; /* base of IMMR register */
#endif
#if defined(TARGET_PPC_MPC52xx)
@@ -77,7 +63,7 @@ typedef struct bd_info {
#if defined(TARGET_HYMOD)
hymod_conf_t bi_hymod_conf; /* hymod configuration information */
#endif
-#if defined(TARGET_EVB64260) || defined(TARGET_405EP) || defined(TARGET_44x) || \
+#if defined(TARGET_EVB64260) || defined(TARGET_44x) || \
defined(TARGET_85xx) || defined(TARGET_83xx) || defined(TARGET_HAS_ETH1)
/* second onboard ethernet port */
unsigned char bi_enet1addr[6];
diff --git a/arch/powerpc/boot/pq2.c b/arch/powerpc/boot/pq2.c
index f6d118558f1d..de27f1c0721f 100644
--- a/arch/powerpc/boot/pq2.c
+++ b/arch/powerpc/boot/pq2.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* PowerQUICC II support functions
*
* Author: Scott Wood <scottwood@freescale.com>
*
* Copyright (c) 2007 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
*/
#include "ops.h"
diff --git a/arch/powerpc/boot/pq2.h b/arch/powerpc/boot/pq2.h
index 481698c7a51a..f577b3bec60b 100644
--- a/arch/powerpc/boot/pq2.h
+++ b/arch/powerpc/boot/pq2.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _PPC_BOOT_PQ2_H_
#define _PPC_BOOT_PQ2_H_
diff --git a/arch/powerpc/boot/prpmc2800.c b/arch/powerpc/boot/prpmc2800.c
deleted file mode 100644
index da31d6030482..000000000000
--- a/arch/powerpc/boot/prpmc2800.c
+++ /dev/null
@@ -1,571 +0,0 @@
-/*
- * Motorola ECC prpmc280/f101 & prpmc2800/f101e platform code.
- *
- * Author: Mark A. Greer <mgreer@mvista.com>
- *
- * 2007 (c) MontaVista, Software, Inc. This file is licensed under
- * the terms of the GNU General Public License version 2. This program
- * is licensed "as is" without any warranty of any kind, whether express
- * or implied.
- */
-
-#include <stdarg.h>
-#include <stddef.h>
-#include "types.h"
-#include "elf.h"
-#include "page.h"
-#include "string.h"
-#include "stdio.h"
-#include "io.h"
-#include "ops.h"
-#include "gunzip_util.h"
-#include "mv64x60.h"
-
-#define KB 1024U
-#define MB (KB*KB)
-#define GB (KB*MB)
-#define MHz (1000U*1000U)
-#define GHz (1000U*MHz)
-
-#define BOARD_MODEL "PrPMC2800"
-#define BOARD_MODEL_MAX 32 /* max strlen(BOARD_MODEL) + 1 */
-
-#define EEPROM2_ADDR 0xa4
-#define EEPROM3_ADDR 0xa8
-
-BSS_STACK(16*KB);
-
-static u8 *bridge_base;
-
-typedef enum {
- BOARD_MODEL_PRPMC280,
- BOARD_MODEL_PRPMC2800,
-} prpmc2800_board_model;
-
-typedef enum {
- BRIDGE_TYPE_MV64360,
- BRIDGE_TYPE_MV64362,
-} prpmc2800_bridge_type;
-
-struct prpmc2800_board_info {
- prpmc2800_board_model model;
- char variant;
- prpmc2800_bridge_type bridge_type;
- u8 subsys0;
- u8 subsys1;
- u8 vpd4;
- u8 vpd4_mask;
- u32 core_speed;
- u32 mem_size;
- u32 boot_flash;
- u32 user_flash;
-};
-
-static struct prpmc2800_board_info prpmc2800_board_info[] = {
- {
- .model = BOARD_MODEL_PRPMC280,
- .variant = 'a',
- .bridge_type = BRIDGE_TYPE_MV64360,
- .subsys0 = 0xff,
- .subsys1 = 0xff,
- .vpd4 = 0x00,
- .vpd4_mask = 0x0f,
- .core_speed = 1*GHz,
- .mem_size = 512*MB,
- .boot_flash = 1*MB,
- .user_flash = 64*MB,
- },
- {
- .model = BOARD_MODEL_PRPMC280,
- .variant = 'b',
- .bridge_type = BRIDGE_TYPE_MV64362,
- .subsys0 = 0xff,
- .subsys1 = 0xff,
- .vpd4 = 0x01,
- .vpd4_mask = 0x0f,
- .core_speed = 1*GHz,
- .mem_size = 512*MB,
- .boot_flash = 0,
- .user_flash = 0,
- },
- {
- .model = BOARD_MODEL_PRPMC280,
- .variant = 'c',
- .bridge_type = BRIDGE_TYPE_MV64360,
- .subsys0 = 0xff,
- .subsys1 = 0xff,
- .vpd4 = 0x02,
- .vpd4_mask = 0x0f,
- .core_speed = 733*MHz,
- .mem_size = 512*MB,
- .boot_flash = 1*MB,
- .user_flash = 64*MB,
- },
- {
- .model = BOARD_MODEL_PRPMC280,
- .variant = 'd',
- .bridge_type = BRIDGE_TYPE_MV64360,
- .subsys0 = 0xff,
- .subsys1 = 0xff,
- .vpd4 = 0x03,
- .vpd4_mask = 0x0f,
- .core_speed = 1*GHz,
- .mem_size = 1*GB,
- .boot_flash = 1*MB,
- .user_flash = 64*MB,
- },
- {
- .model = BOARD_MODEL_PRPMC280,
- .variant = 'e',
- .bridge_type = BRIDGE_TYPE_MV64360,
- .subsys0 = 0xff,
- .subsys1 = 0xff,
- .vpd4 = 0x04,
- .vpd4_mask = 0x0f,
- .core_speed = 1*GHz,
- .mem_size = 512*MB,
- .boot_flash = 1*MB,
- .user_flash = 64*MB,
- },
- {
- .model = BOARD_MODEL_PRPMC280,
- .variant = 'f',
- .bridge_type = BRIDGE_TYPE_MV64362,
- .subsys0 = 0xff,
- .subsys1 = 0xff,
- .vpd4 = 0x05,
- .vpd4_mask = 0x0f,
- .core_speed = 733*MHz,
- .mem_size = 128*MB,
- .boot_flash = 1*MB,
- .user_flash = 0,
- },
- {
- .model = BOARD_MODEL_PRPMC280,
- .variant = 'g',
- .bridge_type = BRIDGE_TYPE_MV64360,
- .subsys0 = 0xff,
- .subsys1 = 0xff,
- .vpd4 = 0x06,
- .vpd4_mask = 0x0f,
- .core_speed = 1*GHz,
- .mem_size = 256*MB,
- .boot_flash = 1*MB,
- .user_flash = 0,
- },
- {
- .model = BOARD_MODEL_PRPMC280,
- .variant = 'h',
- .bridge_type = BRIDGE_TYPE_MV64360,
- .subsys0 = 0xff,
- .subsys1 = 0xff,
- .vpd4 = 0x07,
- .vpd4_mask = 0x0f,
- .core_speed = 1*GHz,
- .mem_size = 1*GB,
- .boot_flash = 1*MB,
- .user_flash = 64*MB,
- },
- {
- .model = BOARD_MODEL_PRPMC2800,
- .variant = 'a',
- .bridge_type = BRIDGE_TYPE_MV64360,
- .subsys0 = 0xb2,
- .subsys1 = 0x8c,
- .vpd4 = 0x00,
- .vpd4_mask = 0x00,
- .core_speed = 1*GHz,
- .mem_size = 512*MB,
- .boot_flash = 2*MB,
- .user_flash = 64*MB,
- },
- {
- .model = BOARD_MODEL_PRPMC2800,
- .variant = 'b',
- .bridge_type = BRIDGE_TYPE_MV64362,
- .subsys0 = 0xb2,
- .subsys1 = 0x8d,
- .vpd4 = 0x00,
- .vpd4_mask = 0x00,
- .core_speed = 1*GHz,
- .mem_size = 512*MB,
- .boot_flash = 0,
- .user_flash = 0,
- },
- {
- .model = BOARD_MODEL_PRPMC2800,
- .variant = 'c',
- .bridge_type = BRIDGE_TYPE_MV64360,
- .subsys0 = 0xb2,
- .subsys1 = 0x8e,
- .vpd4 = 0x00,
- .vpd4_mask = 0x00,
- .core_speed = 733*MHz,
- .mem_size = 512*MB,
- .boot_flash = 2*MB,
- .user_flash = 64*MB,
- },
- {
- .model = BOARD_MODEL_PRPMC2800,
- .variant = 'd',
- .bridge_type = BRIDGE_TYPE_MV64360,
- .subsys0 = 0xb2,
- .subsys1 = 0x8f,
- .vpd4 = 0x00,
- .vpd4_mask = 0x00,
- .core_speed = 1*GHz,
- .mem_size = 1*GB,
- .boot_flash = 2*MB,
- .user_flash = 64*MB,
- },
- {
- .model = BOARD_MODEL_PRPMC2800,
- .variant = 'e',
- .bridge_type = BRIDGE_TYPE_MV64360,
- .subsys0 = 0xa2,
- .subsys1 = 0x8a,
- .vpd4 = 0x00,
- .vpd4_mask = 0x00,
- .core_speed = 1*GHz,
- .mem_size = 512*MB,
- .boot_flash = 2*MB,
- .user_flash = 64*MB,
- },
- {
- .model = BOARD_MODEL_PRPMC2800,
- .variant = 'f',
- .bridge_type = BRIDGE_TYPE_MV64362,
- .subsys0 = 0xa2,
- .subsys1 = 0x8b,
- .vpd4 = 0x00,
- .vpd4_mask = 0x00,
- .core_speed = 733*MHz,
- .mem_size = 128*MB,
- .boot_flash = 2*MB,
- .user_flash = 0,
- },
- {
- .model = BOARD_MODEL_PRPMC2800,
- .variant = 'g',
- .bridge_type = BRIDGE_TYPE_MV64360,
- .subsys0 = 0xa2,
- .subsys1 = 0x8c,
- .vpd4 = 0x00,
- .vpd4_mask = 0x00,
- .core_speed = 1*GHz,
- .mem_size = 2*GB,
- .boot_flash = 2*MB,
- .user_flash = 64*MB,
- },
- {
- .model = BOARD_MODEL_PRPMC2800,
- .variant = 'h',
- .bridge_type = BRIDGE_TYPE_MV64360,
- .subsys0 = 0xa2,
- .subsys1 = 0x8d,
- .vpd4 = 0x00,
- .vpd4_mask = 0x00,
- .core_speed = 733*MHz,
- .mem_size = 1*GB,
- .boot_flash = 2*MB,
- .user_flash = 64*MB,
- },
-};
-
-static struct prpmc2800_board_info *prpmc2800_get_board_info(u8 *vpd)
-{
- struct prpmc2800_board_info *bip;
- int i;
-
- for (i=0,bip=prpmc2800_board_info; i<ARRAY_SIZE(prpmc2800_board_info);
- i++,bip++)
- if ((vpd[0] == bip->subsys0) && (vpd[1] == bip->subsys1)
- && ((vpd[4] & bip->vpd4_mask) == bip->vpd4))
- return bip;
-
- return NULL;
-}
-
-/* Get VPD from i2c eeprom 2, then match it to a board info entry */
-static struct prpmc2800_board_info *prpmc2800_get_bip(void)
-{
- struct prpmc2800_board_info *bip;
- u8 vpd[5];
- int rc;
-
- if (mv64x60_i2c_open())
- fatal("Error: Can't open i2c device\n\r");
-
- /* Get VPD from i2c eeprom-2 */
- memset(vpd, 0, sizeof(vpd));
- rc = mv64x60_i2c_read(EEPROM2_ADDR, vpd, 0x1fde, 2, sizeof(vpd));
- if (rc < 0)
- fatal("Error: Couldn't read eeprom2\n\r");
- mv64x60_i2c_close();
-
- /* Get board type & related info */
- bip = prpmc2800_get_board_info(vpd);
- if (bip == NULL) {
- printf("Error: Unsupported board or corrupted VPD:\n\r");
- printf(" 0x%x 0x%x 0x%x 0x%x 0x%x\n\r",
- vpd[0], vpd[1], vpd[2], vpd[3], vpd[4]);
- printf("Using device tree defaults...\n\r");
- }
-
- return bip;
-}
-
-static void prpmc2800_bridge_setup(u32 mem_size)
-{
- u32 i, v[12], enables, acc_bits;
- u32 pci_base_hi, pci_base_lo, size, buf[2];
- unsigned long cpu_base;
- int rc;
- void *devp;
- u8 *bridge_pbase, is_coherent;
- struct mv64x60_cpu2pci_win *tbl;
-
- bridge_pbase = mv64x60_get_bridge_pbase();
- is_coherent = mv64x60_is_coherent();
-
- if (is_coherent)
- acc_bits = MV64x60_PCI_ACC_CNTL_SNOOP_WB
- | MV64x60_PCI_ACC_CNTL_SWAP_NONE
- | MV64x60_PCI_ACC_CNTL_MBURST_32_BYTES
- | MV64x60_PCI_ACC_CNTL_RDSIZE_32_BYTES;
- else
- acc_bits = MV64x60_PCI_ACC_CNTL_SNOOP_NONE
- | MV64x60_PCI_ACC_CNTL_SWAP_NONE
- | MV64x60_PCI_ACC_CNTL_MBURST_128_BYTES
- | MV64x60_PCI_ACC_CNTL_RDSIZE_256_BYTES;
-
- mv64x60_config_ctlr_windows(bridge_base, bridge_pbase, is_coherent);
- mv64x60_config_pci_windows(bridge_base, bridge_pbase, 0, 0, mem_size,
- acc_bits);
-
- /* Get the cpu -> pci i/o & mem mappings from the device tree */
- devp = find_node_by_compatible(NULL, "marvell,mv64360-pci");
- if (devp == NULL)
- fatal("Error: Missing marvell,mv64360-pci"
- " device tree node\n\r");
-
- rc = getprop(devp, "ranges", v, sizeof(v));
- if (rc != sizeof(v))
- fatal("Error: Can't find marvell,mv64360-pci ranges"
- " property\n\r");
-
- /* Get the cpu -> pci i/o & mem mappings from the device tree */
- devp = find_node_by_compatible(NULL, "marvell,mv64360");
- if (devp == NULL)
- fatal("Error: Missing marvell,mv64360 device tree node\n\r");
-
- enables = in_le32((u32 *)(bridge_base + MV64x60_CPU_BAR_ENABLE));
- enables |= 0x0007fe00; /* Disable all cpu->pci windows */
- out_le32((u32 *)(bridge_base + MV64x60_CPU_BAR_ENABLE), enables);
-
- for (i=0; i<12; i+=6) {
- switch (v[i] & 0xff000000) {
- case 0x01000000: /* PCI I/O Space */
- tbl = mv64x60_cpu2pci_io;
- break;
- case 0x02000000: /* PCI MEM Space */
- tbl = mv64x60_cpu2pci_mem;
- break;
- default:
- continue;
- }
-
- pci_base_hi = v[i+1];
- pci_base_lo = v[i+2];
- cpu_base = v[i+3];
- size = v[i+5];
-
- buf[0] = cpu_base;
- buf[1] = size;
-
- if (!dt_xlate_addr(devp, buf, sizeof(buf), &cpu_base))
- fatal("Error: Can't translate PCI address 0x%x\n\r",
- (u32)cpu_base);
-
- mv64x60_config_cpu2pci_window(bridge_base, 0, pci_base_hi,
- pci_base_lo, cpu_base, size, tbl);
- }
-
- enables &= ~0x00000600; /* Enable cpu->pci0 i/o, cpu->pci0 mem0 */
- out_le32((u32 *)(bridge_base + MV64x60_CPU_BAR_ENABLE), enables);
-}
-
-static void prpmc2800_fixups(void)
-{
- u32 v[2], l, mem_size;
- int rc;
- void *devp;
- char model[BOARD_MODEL_MAX];
- struct prpmc2800_board_info *bip;
-
- bip = prpmc2800_get_bip(); /* Get board info based on VPD */
-
- mem_size = (bip) ? bip->mem_size : mv64x60_get_mem_size(bridge_base);
- prpmc2800_bridge_setup(mem_size); /* Do necessary bridge setup */
-
- /* If the VPD doesn't match what we know about, just use the
- * defaults already in the device tree.
- */
- if (!bip)
- return;
-
- /* Know the board type so override device tree defaults */
- /* Set /model appropriately */
- devp = finddevice("/");
- if (devp == NULL)
- fatal("Error: Missing '/' device tree node\n\r");
- memset(model, 0, BOARD_MODEL_MAX);
- strncpy(model, BOARD_MODEL, BOARD_MODEL_MAX - 2);
- l = strlen(model);
- if (bip->model == BOARD_MODEL_PRPMC280)
- l--;
- model[l++] = bip->variant;
- model[l++] = '\0';
- setprop(devp, "model", model, l);
-
- /* Set /cpus/PowerPC,7447/clock-frequency */
- devp = find_node_by_prop_value_str(NULL, "device_type", "cpu");
- if (devp == NULL)
- fatal("Error: Missing proper cpu device tree node\n\r");
- v[0] = bip->core_speed;
- setprop(devp, "clock-frequency", &v[0], sizeof(v[0]));
-
- /* Set /memory/reg size */
- devp = finddevice("/memory");
- if (devp == NULL)
- fatal("Error: Missing /memory device tree node\n\r");
- v[0] = 0;
- v[1] = bip->mem_size;
- setprop(devp, "reg", v, sizeof(v));
-
- /* Update model, if this is a mv64362 */
- if (bip->bridge_type == BRIDGE_TYPE_MV64362) {
- devp = find_node_by_compatible(NULL, "marvell,mv64360");
- if (devp == NULL)
- fatal("Error: Missing marvell,mv64360"
- " device tree node\n\r");
- setprop(devp, "model", "mv64362", strlen("mv64362") + 1);
- }
-
- /* Set User FLASH size */
- devp = find_node_by_compatible(NULL, "direct-mapped");
- if (devp == NULL)
- fatal("Error: Missing User FLASH device tree node\n\r");
- rc = getprop(devp, "reg", v, sizeof(v));
- if (rc != sizeof(v))
- fatal("Error: Can't find User FLASH reg property\n\r");
- v[1] = bip->user_flash;
- setprop(devp, "reg", v, sizeof(v));
-}
-
-#define MV64x60_MPP_CNTL_0 0xf000
-#define MV64x60_MPP_CNTL_2 0xf008
-#define MV64x60_GPP_IO_CNTL 0xf100
-#define MV64x60_GPP_LEVEL_CNTL 0xf110
-#define MV64x60_GPP_VALUE_SET 0xf118
-
-static void prpmc2800_reset(void)
-{
- u32 temp;
-
- udelay(5000000);
-
- if (bridge_base != 0) {
- temp = in_le32((u32 *)(bridge_base + MV64x60_MPP_CNTL_0));
- temp &= 0xFFFF0FFF;
- out_le32((u32 *)(bridge_base + MV64x60_MPP_CNTL_0), temp);
-
- temp = in_le32((u32 *)(bridge_base + MV64x60_GPP_LEVEL_CNTL));
- temp |= 0x00000004;
- out_le32((u32 *)(bridge_base + MV64x60_GPP_LEVEL_CNTL), temp);
-
- temp = in_le32((u32 *)(bridge_base + MV64x60_GPP_IO_CNTL));
- temp |= 0x00000004;
- out_le32((u32 *)(bridge_base + MV64x60_GPP_IO_CNTL), temp);
-
- temp = in_le32((u32 *)(bridge_base + MV64x60_MPP_CNTL_2));
- temp &= 0xFFFF0FFF;
- out_le32((u32 *)(bridge_base + MV64x60_MPP_CNTL_2), temp);
-
- temp = in_le32((u32 *)(bridge_base + MV64x60_GPP_LEVEL_CNTL));
- temp |= 0x00080000;
- out_le32((u32 *)(bridge_base + MV64x60_GPP_LEVEL_CNTL), temp);
-
- temp = in_le32((u32 *)(bridge_base + MV64x60_GPP_IO_CNTL));
- temp |= 0x00080000;
- out_le32((u32 *)(bridge_base + MV64x60_GPP_IO_CNTL), temp);
-
- out_le32((u32 *)(bridge_base + MV64x60_GPP_VALUE_SET),
- 0x00080004);
- }
-
- for (;;);
-}
-
-#define HEAP_SIZE (16*MB)
-static struct gunzip_state gzstate;
-
-void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
- unsigned long r6, unsigned long r7)
-{
- struct elf_info ei;
- char *heap_start, *dtb;
- int dt_size = _dtb_end - _dtb_start;
- void *vmlinuz_addr = _vmlinux_start;
- unsigned long vmlinuz_size = _vmlinux_end - _vmlinux_start;
- char elfheader[256];
-
- if (dt_size <= 0) /* No fdt */
- exit();
-
- /*
- * Start heap after end of the kernel (after decompressed to
- * address 0) or the end of the zImage, whichever is higher.
- * That's so things allocated by simple_alloc won't overwrite
- * any part of the zImage and the kernel won't overwrite the dtb
- * when decompressed & relocated.
- */
- gunzip_start(&gzstate, vmlinuz_addr, vmlinuz_size);
- gunzip_exactly(&gzstate, elfheader, sizeof(elfheader));
-
- if (!parse_elf32(elfheader, &ei))
- exit();
-
- heap_start = (char *)(ei.memsize + ei.elfoffset); /* end of kernel*/
- heap_start = max(heap_start, (char *)_end); /* end of zImage */
-
- if ((unsigned)simple_alloc_init(heap_start, HEAP_SIZE, 2*KB, 16)
- > (128*MB))
- exit();
-
- /* Relocate dtb to safe area past end of zImage & kernel */
- dtb = malloc(dt_size);
- if (!dtb)
- exit();
- memmove(dtb, _dtb_start, dt_size);
- fdt_init(dtb);
-
- bridge_base = mv64x60_get_bridge_base();
-
- platform_ops.fixups = prpmc2800_fixups;
- platform_ops.exit = prpmc2800_reset;
-
- if (serial_console_init() < 0)
- exit();
-}
-
-/* _zimage_start called very early--need to turn off external interrupts */
-asm (" .globl _zimage_start\n\
- _zimage_start:\n\
- mfmsr 10\n\
- rlwinm 10,10,0,~(1<<15) /* Clear MSR_EE */\n\
- sync\n\
- mtmsr 10\n\
- isync\n\
- b _zimage_start_lib\n\
-");
diff --git a/arch/powerpc/boot/ps3-head.S b/arch/powerpc/boot/ps3-head.S
index b6fcbaf5027b..0a4ebfcc3949 100644
--- a/arch/powerpc/boot/ps3-head.S
+++ b/arch/powerpc/boot/ps3-head.S
@@ -1,21 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* PS3 bootwrapper entry.
*
* Copyright (C) 2007 Sony Computer Entertainment Inc.
* Copyright 2007 Sony Corp.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "ppc_asm.h"
@@ -57,11 +45,6 @@ __system_reset_overlay:
bctr
1:
- /* Save the value at addr zero for a null pointer write check later. */
-
- li r4, 0
- lwz r3, 0(r4)
-
/* Primary delays then goes to _zimage_start in wrapper. */
or 31, 31, 31 /* db16cyc */
diff --git a/arch/powerpc/boot/ps3-hvcall.S b/arch/powerpc/boot/ps3-hvcall.S
index d6068f1829ca..ff74102e8a71 100644
--- a/arch/powerpc/boot/ps3-hvcall.S
+++ b/arch/powerpc/boot/ps3-hvcall.S
@@ -1,21 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* PS3 bootwrapper hvcalls.
*
* Copyright (C) 2007 Sony Computer Entertainment Inc.
* Copyright 2007 Sony Corp.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "ppc_asm.h"
diff --git a/arch/powerpc/boot/ps3.c b/arch/powerpc/boot/ps3.c
index 4ec2d86d3c50..89ff46b8b225 100644
--- a/arch/powerpc/boot/ps3.c
+++ b/arch/powerpc/boot/ps3.c
@@ -1,21 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* PS3 bootwrapper support.
*
* Copyright (C) 2007 Sony Computer Entertainment Inc.
* Copyright 2007 Sony Corp.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <stdarg.h>
@@ -33,18 +21,11 @@ extern int lv1_get_logical_ppe_id(u64 *out_1);
extern int lv1_get_repository_node_value(u64 in_1, u64 in_2, u64 in_3,
u64 in_4, u64 in_5, u64 *out_1, u64 *out_2);
-#ifdef DEBUG
-#define DBG(fmt...) printf(fmt)
-#else
-static inline int __attribute__ ((format (printf, 1, 2))) DBG(
- const char *fmt, ...) {return 0;}
-#endif
-
BSS_STACK(4096);
/* A buffer that may be edited by tools operating on a zImage binary so as to
* edit the command line passed to vmlinux (by setting /chosen/bootargs).
- * The buffer is put in it's own section so that tools may locate it easier.
+ * The buffer is put in its own section so that tools may locate it easier.
*/
static char cmdline[BOOT_COMMAND_LINE_SIZE]
@@ -119,13 +100,12 @@ void ps3_copy_vectors(void)
flush_cache((void *)0x100, 512);
}
-void platform_init(unsigned long null_check)
+void platform_init(void)
{
const u32 heapsize = 0x1000000 - (u32)_end; /* 16MiB */
void *chosen;
unsigned long ft_addr;
u64 rm_size;
- unsigned long val;
console_ops.write = ps3_console_write;
platform_ops.exit = ps3_exit;
@@ -140,7 +120,7 @@ void platform_init(unsigned long null_check)
ps3_repository_read_rm_size(&rm_size);
dt_fixup_memory(0, rm_size);
- if (_initrd_end > _initrd_start) {
+ if (&_initrd_end > &_initrd_start) {
setprop_val(chosen, "linux,initrd-start", (u32)(_initrd_start));
setprop_val(chosen, "linux,initrd-end", (u32)(_initrd_end));
}
@@ -153,11 +133,6 @@ void platform_init(unsigned long null_check)
printf(" flat tree at 0x%lx\n\r", ft_addr);
- val = *(unsigned long *)0;
-
- if (val != null_check)
- printf("null check failed: %lx != %lx\n\r", val, null_check);
-
((kernel_entry_t)0)(ft_addr, 0, NULL);
ps3_exit();
diff --git a/arch/powerpc/boot/pseries-head.S b/arch/powerpc/boot/pseries-head.S
index 6ef6e02e80f9..1b1a638ce6e8 100644
--- a/arch/powerpc/boot/pseries-head.S
+++ b/arch/powerpc/boot/pseries-head.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#include "ppc_asm.h"
.text
diff --git a/arch/powerpc/boot/redboot-83xx.c b/arch/powerpc/boot/redboot-83xx.c
index 79aa9e151fa7..b610e78b43b6 100644
--- a/arch/powerpc/boot/redboot-83xx.c
+++ b/arch/powerpc/boot/redboot-83xx.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* RedBoot firmware support
*
@@ -5,10 +6,6 @@
*
* Copyright (c) 2007 Freescale Semiconductor, Inc.
* Copyright (c) 2008 Codehermit
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
*/
#include "ops.h"
diff --git a/arch/powerpc/boot/redboot-8xx.c b/arch/powerpc/boot/redboot-8xx.c
index f7945adc8004..d7006eeaf5ea 100644
--- a/arch/powerpc/boot/redboot-8xx.c
+++ b/arch/powerpc/boot/redboot-8xx.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* RedBoot firmware support
*
* Author: Scott Wood <scottwood@freescale.com>
*
* Copyright (c) 2007 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
*/
#include "ops.h"
diff --git a/arch/powerpc/boot/redboot.h b/arch/powerpc/boot/redboot.h
index ace0b7fed8eb..8f319b1add32 100644
--- a/arch/powerpc/boot/redboot.h
+++ b/arch/powerpc/boot/redboot.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _PPC_REDBOOT_H
#define _PPC_REDBOOT_H
diff --git a/arch/powerpc/boot/reg.h b/arch/powerpc/boot/reg.h
index 9c2c9978e0eb..fd8f4fcbfc4a 100644
--- a/arch/powerpc/boot/reg.h
+++ b/arch/powerpc/boot/reg.h
@@ -1,12 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _PPC_BOOT_REG_H
#define _PPC_BOOT_REG_H
/*
* Copyright 2007 Davud Gibson, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
static inline u32 mfpvr(void)
diff --git a/arch/powerpc/boot/rs6000.h b/arch/powerpc/boot/rs6000.h
index 433f45084e41..16df8f3c43f1 100644
--- a/arch/powerpc/boot/rs6000.h
+++ b/arch/powerpc/boot/rs6000.h
@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* IBM RS/6000 "XCOFF" file definitions for BFD.
Copyright (C) 1990, 1991 Free Software Foundation, Inc.
- FIXME: Can someone provide a transliteration of this name into ASCII?
- Using the following chars caused a compiler warning on HIUX (so I replaced
- them with octal escapes), and isn't useful without an understanding of what
- character set it is.
- Written by Mimi Ph\373\364ng-Th\345o V\365 of IBM
+ Written by Mimi Phuong-Thao Vo of IBM
and John Gilmore of Cygnus Support. */
/********************** FILE HEADER **********************/
@@ -239,5 +236,5 @@ struct external_reloc {
#define DEFAULT_DATA_SECTION_ALIGNMENT 4
#define DEFAULT_BSS_SECTION_ALIGNMENT 4
#define DEFAULT_TEXT_SECTION_ALIGNMENT 4
-/* For new sections we havn't heard of before */
+/* For new sections we haven't heard of before */
#define DEFAULT_SECTION_ALIGNMENT 4
diff --git a/arch/powerpc/boot/serial.c b/arch/powerpc/boot/serial.c
index f2156f07571f..c6d32a8c3612 100644
--- a/arch/powerpc/boot/serial.c
+++ b/arch/powerpc/boot/serial.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Generic serial console support
*
@@ -6,10 +7,7 @@
* Code in serial_edit_cmdline() copied from <file:arch/ppc/boot/simple/misc.c>
* and was written by Matt Porter <mporter@kernel.crashing.org>.
*
- * 2001,2006 (c) MontaVista Software, Inc. This file is licensed under
- * the terms of the GNU General Public License version 2. This program
- * is licensed "as is" without any warranty of any kind, whether express
- * or implied.
+ * 2001,2006 (c) MontaVista Software, Inc.
*/
#include <stdarg.h>
#include <stddef.h>
@@ -33,7 +31,7 @@ static void serial_write(const char *buf, int len)
scdp->putc(*buf++);
}
-static void serial_edit_cmdline(char *buf, int len)
+static void serial_edit_cmdline(char *buf, int len, unsigned int timeout)
{
int timer = 0, count;
char ch, *cp;
@@ -44,7 +42,7 @@ static void serial_edit_cmdline(char *buf, int len)
cp = &buf[count];
count++;
- while (timer++ < 5*1000) {
+ do {
if (scdp->tstc()) {
while (((ch = scdp->getc()) != '\n') && (ch != '\r')) {
/* Test for backspace/delete */
@@ -70,7 +68,7 @@ static void serial_edit_cmdline(char *buf, int len)
break; /* Exit 'timer' loop */
}
udelay(1000); /* 1 msec */
- }
+ } while (timer++ < timeout);
*cp = 0;
}
@@ -92,7 +90,8 @@ static void *serial_get_stdout_devp(void)
if (devp == NULL)
goto err_out;
- if (getprop(devp, "linux,stdout-path", path, MAX_PATH_LEN) > 0) {
+ if (getprop(devp, "linux,stdout-path", path, MAX_PATH_LEN) > 0 ||
+ getprop(devp, "stdout-path", path, MAX_PATH_LEN) > 0) {
devp = finddevice(path);
if (devp == NULL)
goto err_out;
@@ -120,18 +119,21 @@ int serial_console_init(void)
if (dt_is_compatible(devp, "ns16550") ||
dt_is_compatible(devp, "pnpPNP,501"))
rc = ns16550_console_init(devp, &serial_cd);
- else if (dt_is_compatible(devp, "marvell,mv64360-mpsc"))
- rc = mpsc_console_init(devp, &serial_cd);
+#ifdef CONFIG_CPM
else if (dt_is_compatible(devp, "fsl,cpm1-scc-uart") ||
dt_is_compatible(devp, "fsl,cpm1-smc-uart") ||
dt_is_compatible(devp, "fsl,cpm2-scc-uart") ||
dt_is_compatible(devp, "fsl,cpm2-smc-uart"))
rc = cpm_console_init(devp, &serial_cd);
+#endif
+#ifdef CONFIG_PPC_MPC52xx
else if (dt_is_compatible(devp, "fsl,mpc5200-psc-uart"))
rc = mpc5200_psc_console_init(devp, &serial_cd);
- else if (dt_is_compatible(devp, "xlnx,opb-uartlite-1.00.b") ||
- dt_is_compatible(devp, "xlnx,xps-uartlite-1.00.a"))
- rc = uartlite_console_init(devp, &serial_cd);
+#endif
+#ifdef CONFIG_PPC_POWERNV
+ else if (dt_is_compatible(devp, "ibm,opal-console-raw"))
+ rc = opal_console_init(devp, &serial_cd);
+#endif
/* Add other serial console driver calls here */
diff --git a/arch/powerpc/boot/simple_alloc.c b/arch/powerpc/boot/simple_alloc.c
index 65ec135d0157..d07796fdf91a 100644
--- a/arch/powerpc/boot/simple_alloc.c
+++ b/arch/powerpc/boot/simple_alloc.c
@@ -1,12 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Implement primitive realloc(3) functionality.
*
* Author: Mark A. Greer <mgreer@mvista.com>
*
- * 2006 (c) MontaVista, Software, Inc. This file is licensed under
- * the terms of the GNU General Public License version 2. This program
- * is licensed "as is" without any warranty of any kind, whether express
- * or implied.
+ * 2006 (c) MontaVista, Software, Inc.
*/
#include <stddef.h>
@@ -114,8 +112,11 @@ static void *simple_realloc(void *ptr, unsigned long size)
return ptr;
new = simple_malloc(size);
- memcpy(new, ptr, p->size);
- simple_free(ptr);
+ if (new) {
+ memcpy(new, ptr, p->size);
+ simple_free(ptr);
+ }
+
return new;
}
diff --git a/arch/powerpc/boot/simpleboot.c b/arch/powerpc/boot/simpleboot.c
index 21cd48074ec8..c80691d83880 100644
--- a/arch/powerpc/boot/simpleboot.c
+++ b/arch/powerpc/boot/simpleboot.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* The simple platform -- for booting when firmware doesn't supply a device
* tree or any platform configuration information.
@@ -9,10 +10,6 @@
*
* Copyright (c) 2007 Freescale Semiconductor, Inc.
* Copyright (c) 2008 Secret Lab Technologies Ltd.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
*/
#include "ops.h"
@@ -61,7 +58,7 @@ void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
if (*reg++ != 0)
fatal("Memory range is not based at address 0\n");
- /* get the memsize and trucate it to under 4G on 32 bit machines */
+ /* get the memsize and truncate it to under 4G on 32 bit machines */
memsize64 = 0;
for (i = 0; i < *ns; i++)
memsize64 = (memsize64 << 32) | *reg++;
diff --git a/arch/powerpc/boot/stdbool.h b/arch/powerpc/boot/stdbool.h
new file mode 100644
index 000000000000..2dfe247ede80
--- /dev/null
+++ b/arch/powerpc/boot/stdbool.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) IBM Corporation 2016.
+ *
+ * This file is only necessary because some of the pre-boot decompressors
+ * expect stdbool.h to be available.
+ */
+
+#include "types.h"
diff --git a/arch/powerpc/boot/stdint.h b/arch/powerpc/boot/stdint.h
new file mode 100644
index 000000000000..5cc5e87b00ec
--- /dev/null
+++ b/arch/powerpc/boot/stdint.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) IBM Corporation 2016.
+ *
+ * This file is only necessary because some of the pre-boot decompressors
+ * expect stdint.h to be available.
+ */
+
+#include "types.h"
diff --git a/arch/powerpc/boot/stdio.c b/arch/powerpc/boot/stdio.c
index a701261b1781..31eece29f56d 100644
--- a/arch/powerpc/boot/stdio.c
+++ b/arch/powerpc/boot/stdio.c
@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) Paul Mackerras 1997.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <stdarg.h>
#include <stddef.h>
@@ -21,6 +17,16 @@ size_t strnlen(const char * s, size_t count)
return sc - s;
}
+char *strrchr(const char *s, int c)
+{
+ const char *last = NULL;
+ do {
+ if (*s == (char)c)
+ last = s;
+ } while (*s++);
+ return (char *)last;
+}
+
#ifdef __powerpc64__
# define do_div(n, base) ({ \
diff --git a/arch/powerpc/boot/stdio.h b/arch/powerpc/boot/stdio.h
index adffc58412d4..884d5959a9ae 100644
--- a/arch/powerpc/boot/stdio.h
+++ b/arch/powerpc/boot/stdio.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _PPC_BOOT_STDIO_H_
#define _PPC_BOOT_STDIO_H_
diff --git a/arch/powerpc/boot/stdlib.c b/arch/powerpc/boot/stdlib.c
index e00d58c29eea..868b019d6384 100644
--- a/arch/powerpc/boot/stdlib.c
+++ b/arch/powerpc/boot/stdlib.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* stdlib functions
*
* Author: Scott Wood <scottwood@freescale.com>
*
* Copyright (c) 2007 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
*/
#include "stdlib.h"
diff --git a/arch/powerpc/boot/stdlib.h b/arch/powerpc/boot/stdlib.h
index 1bf01ac73aba..0a61fcd10fdb 100644
--- a/arch/powerpc/boot/stdlib.h
+++ b/arch/powerpc/boot/stdlib.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _PPC_BOOT_STDLIB_H_
#define _PPC_BOOT_STDLIB_H_
diff --git a/arch/powerpc/boot/string.S b/arch/powerpc/boot/string.S
index acc9428f2789..d2a2dbf1eefc 100644
--- a/arch/powerpc/boot/string.S
+++ b/arch/powerpc/boot/string.S
@@ -1,11 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (C) Paul Mackerras 1997.
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* NOTE: this code runs in 32 bit mode and is packaged as ELF32.
*/
diff --git a/arch/powerpc/boot/string.h b/arch/powerpc/boot/string.h
index 50091cc0eed9..8c2ec0c05e4e 100644
--- a/arch/powerpc/boot/string.h
+++ b/arch/powerpc/boot/string.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _PPC_BOOT_STRING_H_
#define _PPC_BOOT_STRING_H_
#include <stddef.h>
@@ -6,6 +7,7 @@ extern char *strcpy(char *dest, const char *src);
extern char *strncpy(char *dest, const char *src, size_t n);
extern char *strcat(char *dest, const char *src);
extern char *strchr(const char *s, int c);
+extern char *strrchr(const char *s, int c);
extern int strcmp(const char *s1, const char *s2);
extern int strncmp(const char *s1, const char *s2, size_t n);
extern size_t strlen(const char *s);
diff --git a/arch/powerpc/boot/swab.h b/arch/powerpc/boot/swab.h
index d0e1431084ca..11d2069fbb66 100644
--- a/arch/powerpc/boot/swab.h
+++ b/arch/powerpc/boot/swab.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _PPC_BOOT_SWAB_H_
#define _PPC_BOOT_SWAB_H_
diff --git a/arch/powerpc/boot/treeboot-akebono.c b/arch/powerpc/boot/treeboot-akebono.c
index b73174c34fe4..e3cc2599869c 100644
--- a/arch/powerpc/boot/treeboot-akebono.c
+++ b/arch/powerpc/boot/treeboot-akebono.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright © 2013 Tony Breeds IBM Corporation
* Copyright © 2013 Alistair Popple IBM Corporation
@@ -14,11 +15,6 @@
* Copyright 2007 David Gibson, IBM Corporation.
* Copyright 2010 Ben. Herrenschmidt, IBM Corporation.
* Copyright © 2011 David Kleikamp IBM Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <stdarg.h>
#include <stddef.h>
@@ -38,7 +34,7 @@
BSS_STACK(4096);
-#define SPRN_PIR 0x11E /* Processor Indentification Register */
+#define SPRN_PIR 0x11E /* Processor Identification Register */
#define USERDATA_LEN 256 /* Length of userdata passed in by PIBS */
#define MAX_RANKS 0x4
#define DDR3_MR0CF 0x80010011U
diff --git a/arch/powerpc/boot/treeboot-bamboo.c b/arch/powerpc/boot/treeboot-bamboo.c
index 9eee48fc7114..97b5b161dbbb 100644
--- a/arch/powerpc/boot/treeboot-bamboo.c
+++ b/arch/powerpc/boot/treeboot-bamboo.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright IBM Corporation, 2007
* Josh Boyer <jwboyer@linux.vnet.ibm.com>
*
* Based on ebony wrapper:
* Copyright 2007 David Gibson, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2 of the License
*/
#include "ops.h"
#include "stdio.h"
diff --git a/arch/powerpc/boot/treeboot-currituck.c b/arch/powerpc/boot/treeboot-currituck.c
index 925ae43b7467..d53e8a592f81 100644
--- a/arch/powerpc/boot/treeboot-currituck.c
+++ b/arch/powerpc/boot/treeboot-currituck.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright © 2011 Tony Breeds IBM Corporation
*
@@ -13,11 +14,6 @@
* Copyright 2007 David Gibson, IBM Corporation.
* Copyright 2010 Ben. Herrenschmidt, IBM Corporation.
* Copyright © 2011 David Kleikamp IBM Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <stdarg.h>
#include <stddef.h>
@@ -80,7 +76,7 @@ static void ibm_currituck_fixups(void)
}
}
-#define SPRN_PIR 0x11E /* Processor Indentification Register */
+#define SPRN_PIR 0x11E /* Processor Identification Register */
void platform_init(void)
{
unsigned long end_of_ram, avail_ram;
diff --git a/arch/powerpc/boot/treeboot-ebony.c b/arch/powerpc/boot/treeboot-ebony.c
index 21cc4834a384..332e28659134 100644
--- a/arch/powerpc/boot/treeboot-ebony.c
+++ b/arch/powerpc/boot/treeboot-ebony.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Old U-boot compatibility for Ebony
*
@@ -6,10 +7,6 @@
* Copyright 2007 David Gibson, IBM Corporatio.
* Based on cuboot-83xx.c, which is:
* Copyright (c) 2007 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
*/
#include "ops.h"
diff --git a/arch/powerpc/boot/treeboot-iss4xx.c b/arch/powerpc/boot/treeboot-iss4xx.c
index 329e710feda2..9ab556093cb8 100644
--- a/arch/powerpc/boot/treeboot-iss4xx.c
+++ b/arch/powerpc/boot/treeboot-iss4xx.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright 2010 Ben. Herrenschmidt, IBM Corporation.
*
@@ -11,11 +12,6 @@
* Copyright (c) 2003, 2004 Zultys Technologies
*
* Copyright 2007 David Gibson, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <stdarg.h>
#include <stddef.h>
@@ -59,7 +55,7 @@ static void *iss_4xx_vmlinux_alloc(unsigned long size)
return (void *)ibm4xx_memstart;
}
-#define SPRN_PIR 0x11E /* Processor Indentification Register */
+#define SPRN_PIR 0x11E /* Processor Identification Register */
void platform_init(void)
{
unsigned long end_of_ram = 0x08000000;
diff --git a/arch/powerpc/boot/treeboot-walnut.c b/arch/powerpc/boot/treeboot-walnut.c
deleted file mode 100644
index 097974e59fac..000000000000
--- a/arch/powerpc/boot/treeboot-walnut.c
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Old U-boot compatibility for Walnut
- *
- * Author: Josh Boyer <jwboyer@linux.vnet.ibm.com>
- *
- * Copyright 2007 IBM Corporation
- * Based on cuboot-83xx.c, which is:
- * Copyright (c) 2007 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- */
-
-#include "ops.h"
-#include "stdio.h"
-#include "dcr.h"
-#include "4xx.h"
-#include "io.h"
-
-BSS_STACK(4096);
-
-static void walnut_flashsel_fixup(void)
-{
- void *devp, *sram;
- u32 reg_flash[3] = {0x0, 0x0, 0x80000};
- u32 reg_sram[3] = {0x0, 0x0, 0x80000};
- u8 *fpga;
- u8 fpga_brds1 = 0x0;
-
- devp = finddevice("/plb/ebc/fpga");
- if (!devp)
- fatal("Couldn't locate FPGA node\n\r");
-
- if (getprop(devp, "virtual-reg", &fpga, sizeof(fpga)) != sizeof(fpga))
- fatal("no virtual-reg property\n\r");
-
- fpga_brds1 = in_8(fpga);
-
- devp = finddevice("/plb/ebc/flash");
- if (!devp)
- fatal("Couldn't locate flash node\n\r");
-
- if (getprop(devp, "reg", reg_flash, sizeof(reg_flash)) != sizeof(reg_flash))
- fatal("flash reg property has unexpected size\n\r");
-
- sram = finddevice("/plb/ebc/sram");
- if (!sram)
- fatal("Couldn't locate sram node\n\r");
-
- if (getprop(sram, "reg", reg_sram, sizeof(reg_sram)) != sizeof(reg_sram))
- fatal("sram reg property has unexpected size\n\r");
-
- if (fpga_brds1 & 0x1) {
- reg_flash[1] ^= 0x80000;
- reg_sram[1] ^= 0x80000;
- }
-
- setprop(devp, "reg", reg_flash, sizeof(reg_flash));
- setprop(sram, "reg", reg_sram, sizeof(reg_sram));
-}
-
-#define WALNUT_OPENBIOS_MAC_OFF 0xfffffe0b
-static void walnut_fixups(void)
-{
- ibm4xx_sdram_fixup_memsize();
- ibm405gp_fixup_clocks(33330000, 0xa8c000);
- ibm4xx_quiesce_eth((u32 *)0xef600800, NULL);
- ibm4xx_fixup_ebc_ranges("/plb/ebc");
- walnut_flashsel_fixup();
- dt_fixup_mac_address_by_alias("ethernet0", (u8 *) WALNUT_OPENBIOS_MAC_OFF);
-}
-
-void platform_init(void)
-{
- unsigned long end_of_ram = 0x2000000;
- unsigned long avail_ram = end_of_ram - (unsigned long) _end;
-
- simple_alloc_init(_end, avail_ram, 32, 32);
- platform_ops.fixups = walnut_fixups;
- platform_ops.exit = ibm40x_dbcr_reset;
- fdt_init(_dtb_start);
- serial_console_init();
-}
diff --git a/arch/powerpc/boot/types.h b/arch/powerpc/boot/types.h
index 31393d17a9c1..8a4c418b7260 100644
--- a/arch/powerpc/boot/types.h
+++ b/arch/powerpc/boot/types.h
@@ -1,6 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _TYPES_H_
#define _TYPES_H_
+#include <stdbool.h>
+
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
typedef unsigned char u8;
@@ -12,6 +15,16 @@ typedef short s16;
typedef int s32;
typedef long long s64;
+/* required for opal-api.h */
+typedef u8 uint8_t;
+typedef u16 uint16_t;
+typedef u32 uint32_t;
+typedef u64 uint64_t;
+typedef s8 int8_t;
+typedef s16 int16_t;
+typedef s32 int32_t;
+typedef s64 int64_t;
+
#define min(x,y) ({ \
typeof(x) _x = (x); \
typeof(y) _y = (y); \
@@ -24,4 +37,16 @@ typedef long long s64;
(void) (&_x == &_y); \
_x > _y ? _x : _y; })
+#define min_t(type, a, b) min(((type) a), ((type) b))
+#define max_t(type, a, b) max(((type) a), ((type) b))
+
+typedef int bool;
+
+#ifndef true
+#define true 1
+#endif
+
+#ifndef false
+#define false 0
+#endif
#endif /* _TYPES_H_ */
diff --git a/arch/powerpc/boot/uartlite.c b/arch/powerpc/boot/uartlite.c
deleted file mode 100644
index 46bed69b4169..000000000000
--- a/arch/powerpc/boot/uartlite.c
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Xilinx UARTLITE bootloader driver
- *
- * Copyright (C) 2007 Secret Lab Technologies Ltd.
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
- */
-
-#include <stdarg.h>
-#include <stddef.h>
-#include "types.h"
-#include "string.h"
-#include "stdio.h"
-#include "io.h"
-#include "ops.h"
-
-#define ULITE_RX 0x00
-#define ULITE_TX 0x04
-#define ULITE_STATUS 0x08
-#define ULITE_CONTROL 0x0c
-
-#define ULITE_STATUS_RXVALID 0x01
-#define ULITE_STATUS_TXFULL 0x08
-
-#define ULITE_CONTROL_RST_RX 0x02
-
-static void * reg_base;
-
-static int uartlite_open(void)
-{
- /* Clear the RX FIFO */
- out_be32(reg_base + ULITE_CONTROL, ULITE_CONTROL_RST_RX);
- return 0;
-}
-
-static void uartlite_putc(unsigned char c)
-{
- u32 reg = ULITE_STATUS_TXFULL;
- while (reg & ULITE_STATUS_TXFULL) /* spin on TXFULL bit */
- reg = in_be32(reg_base + ULITE_STATUS);
- out_be32(reg_base + ULITE_TX, c);
-}
-
-static unsigned char uartlite_getc(void)
-{
- u32 reg = 0;
- while (!(reg & ULITE_STATUS_RXVALID)) /* spin waiting for RXVALID bit */
- reg = in_be32(reg_base + ULITE_STATUS);
- return in_be32(reg_base + ULITE_RX);
-}
-
-static u8 uartlite_tstc(void)
-{
- u32 reg = in_be32(reg_base + ULITE_STATUS);
- return reg & ULITE_STATUS_RXVALID;
-}
-
-int uartlite_console_init(void *devp, struct serial_console_data *scdp)
-{
- int n;
- unsigned long reg_phys;
-
- n = getprop(devp, "virtual-reg", &reg_base, sizeof(reg_base));
- if (n != sizeof(reg_base)) {
- if (!dt_xlate_reg(devp, 0, &reg_phys, NULL))
- return -1;
-
- reg_base = (void *)reg_phys;
- }
-
- scdp->open = uartlite_open;
- scdp->putc = uartlite_putc;
- scdp->getc = uartlite_getc;
- scdp->tstc = uartlite_tstc;
- scdp->close = NULL;
- return 0;
-}
diff --git a/arch/powerpc/boot/ugecon.c b/arch/powerpc/boot/ugecon.c
index 8f2a6b311534..938a38bd40ba 100644
--- a/arch/powerpc/boot/ugecon.c
+++ b/arch/powerpc/boot/ugecon.c
@@ -1,15 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* arch/powerpc/boot/ugecon.c
*
* USB Gecko bootwrapper console.
* Copyright (C) 2008-2009 The GameCube Linux Team
* Copyright (C) 2008,2009 Albert Herranz
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
*/
#include <stddef.h>
diff --git a/arch/powerpc/boot/ugecon.h b/arch/powerpc/boot/ugecon.h
index 43737539169b..291f33f77675 100644
--- a/arch/powerpc/boot/ugecon.h
+++ b/arch/powerpc/boot/ugecon.h
@@ -1,15 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* arch/powerpc/boot/ugecon.h
*
* USB Gecko early bootwrapper console.
* Copyright (C) 2008-2009 The GameCube Linux Team
* Copyright (C) 2008,2009 Albert Herranz
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
*/
#ifndef __UGECON_H
diff --git a/arch/powerpc/boot/util.S b/arch/powerpc/boot/util.S
index 243b8497d58b..6a92376daf3f 100644
--- a/arch/powerpc/boot/util.S
+++ b/arch/powerpc/boot/util.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copied from <file:arch/powerpc/kernel/misc_32.S>
*
@@ -10,12 +11,6 @@
* kexec bits:
* Copyright (C) 2002-2003 Eric Biederman <ebiederm@xmission.com>
* GameCube/ppc32 port Copyright (C) 2004 Albert Herranz
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
#include "ppc_asm.h"
@@ -23,7 +18,7 @@
.text
-/* udelay (on non-601 processors) needs to know the period of the
+/* udelay needs to know the period of the
* timebase in nanoseconds. This used to be hardcoded to be 60ns
* (period of 66MHz/4). Now a variable is used that is initialized to
* 60 for backward compatibility, but it can be overridden as necessary
@@ -42,61 +37,31 @@ timebase_period_ns:
*/
.globl udelay
udelay:
- mfspr r4,SPRN_PVR
- srwi r4,r4,16
- cmpwi 0,r4,1 /* 601 ? */
- bne .Ludelay_not_601
-00: li r0,86 /* Instructions / microsecond? */
- mtctr r0
-10: addi r0,r0,0 /* NOP */
- bdnz 10b
- subic. r3,r3,1
- bne 00b
- blr
-
-.Ludelay_not_601:
mulli r4,r3,1000 /* nanoseconds */
/* Change r4 to be the number of ticks using:
* (nanoseconds + (timebase_period_ns - 1 )) / timebase_period_ns
* timebase_period_ns defaults to 60 (16.6MHz) */
mflr r5
- bl 0f
+ bcl 20,31,0f
0: mflr r6
mtlr r5
- lis r5,0b@ha
- addi r5,r5,0b@l
- subf r5,r5,r6 /* In case we're relocated */
- addis r5,r5,timebase_period_ns@ha
- lwz r5,timebase_period_ns@l(r5)
+ addis r5,r6,(timebase_period_ns-0b)@ha
+ lwz r5,(timebase_period_ns-0b)@l(r5)
add r4,r4,r5
addi r4,r4,-1
divw r4,r4,r5 /* BUS ticks */
-#ifdef CONFIG_8xx
-1: mftbu r5
- mftb r6
- mftbu r7
-#else
-1: mfspr r5, SPRN_TBRU
- mfspr r6, SPRN_TBRL
- mfspr r7, SPRN_TBRU
-#endif
+1: MFTBU(r5)
+ MFTBL(r6)
+ MFTBU(r7)
cmpw 0,r5,r7
bne 1b /* Get [synced] base time */
addc r9,r6,r4 /* Compute end time */
addze r8,r5
-#ifdef CONFIG_8xx
-2: mftbu r5
-#else
-2: mfspr r5, SPRN_TBRU
-#endif
+2: MFTBU(r5)
cmpw 0,r5,r8
blt 2b
bgt 3f
-#ifdef CONFIG_8xx
- mftb r6
-#else
- mfspr r6, SPRN_TBRL
-#endif
+ MFTBL(r6)
cmpw 0,r6,r9
blt 2b
3: blr
diff --git a/arch/powerpc/boot/virtex.c b/arch/powerpc/boot/virtex.c
deleted file mode 100644
index f622805f8000..000000000000
--- a/arch/powerpc/boot/virtex.c
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * The platform specific code for virtex devices since a boot loader is not
- * always used.
- *
- * (C) Copyright 2008 Xilinx, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- */
-
-#include "ops.h"
-#include "io.h"
-#include "stdio.h"
-
-#define UART_DLL 0 /* Out: Divisor Latch Low */
-#define UART_DLM 1 /* Out: Divisor Latch High */
-#define UART_FCR 2 /* Out: FIFO Control Register */
-#define UART_FCR_CLEAR_RCVR 0x02 /* Clear the RCVR FIFO */
-#define UART_FCR_CLEAR_XMIT 0x04 /* Clear the XMIT FIFO */
-#define UART_LCR 3 /* Out: Line Control Register */
-#define UART_MCR 4 /* Out: Modem Control Register */
-#define UART_MCR_RTS 0x02 /* RTS complement */
-#define UART_MCR_DTR 0x01 /* DTR complement */
-#define UART_LCR_DLAB 0x80 /* Divisor latch access bit */
-#define UART_LCR_WLEN8 0x03 /* Wordlength: 8 bits */
-
-static int virtex_ns16550_console_init(void *devp)
-{
- unsigned char *reg_base;
- u32 reg_shift, reg_offset, clk, spd;
- u16 divisor;
- int n;
-
- if (dt_get_virtual_reg(devp, (void **)&reg_base, 1) < 1)
- return -1;
-
- n = getprop(devp, "reg-offset", &reg_offset, sizeof(reg_offset));
- if (n == sizeof(reg_offset))
- reg_base += reg_offset;
-
- n = getprop(devp, "reg-shift", &reg_shift, sizeof(reg_shift));
- if (n != sizeof(reg_shift))
- reg_shift = 0;
-
- n = getprop(devp, "current-speed", (void *)&spd, sizeof(spd));
- if (n != sizeof(spd))
- spd = 9600;
-
- /* should there be a default clock rate?*/
- n = getprop(devp, "clock-frequency", (void *)&clk, sizeof(clk));
- if (n != sizeof(clk))
- return -1;
-
- divisor = clk / (16 * spd);
-
- /* Access baud rate */
- out_8(reg_base + (UART_LCR << reg_shift), UART_LCR_DLAB);
-
- /* Baud rate based on input clock */
- out_8(reg_base + (UART_DLL << reg_shift), divisor & 0xFF);
- out_8(reg_base + (UART_DLM << reg_shift), divisor >> 8);
-
- /* 8 data, 1 stop, no parity */
- out_8(reg_base + (UART_LCR << reg_shift), UART_LCR_WLEN8);
-
- /* RTS/DTR */
- out_8(reg_base + (UART_MCR << reg_shift), UART_MCR_RTS | UART_MCR_DTR);
-
- /* Clear transmitter and receiver */
- out_8(reg_base + (UART_FCR << reg_shift),
- UART_FCR_CLEAR_XMIT | UART_FCR_CLEAR_RCVR);
- return 0;
-}
-
-/* For virtex, the kernel may be loaded without using a bootloader and if so
- some UARTs need more setup than is provided in the normal console init
-*/
-int platform_specific_init(void)
-{
- void *devp;
- char devtype[MAX_PROP_LEN];
- char path[MAX_PATH_LEN];
-
- devp = finddevice("/chosen");
- if (devp == NULL)
- return -1;
-
- if (getprop(devp, "linux,stdout-path", path, MAX_PATH_LEN) > 0) {
- devp = finddevice(path);
- if (devp == NULL)
- return -1;
-
- if ((getprop(devp, "device_type", devtype, sizeof(devtype)) > 0)
- && !strcmp(devtype, "serial")
- && (dt_is_compatible(devp, "ns16550")))
- virtex_ns16550_console_init(devp);
- }
- return 0;
-}
diff --git a/arch/powerpc/boot/virtex405-head.S b/arch/powerpc/boot/virtex405-head.S
deleted file mode 100644
index 3edb13f94669..000000000000
--- a/arch/powerpc/boot/virtex405-head.S
+++ /dev/null
@@ -1,30 +0,0 @@
-#include "ppc_asm.h"
-
- .text
- .global _zimage_start
-_zimage_start:
-
- /* PPC errata 213: needed by Virtex-4 FX */
- mfccr0 0
- oris 0,0,0x50000000@h
- mtccr0 0
-
- /*
- * Invalidate the data cache if the data cache is turned off.
- * - The 405 core does not invalidate the data cache on power-up
- * or reset but does turn off the data cache. We cannot assume
- * that the cache contents are valid.
- * - If the data cache is turned on this must have been done by
- * a bootloader and we assume that the cache contents are
- * valid.
- */
- mfdccr r9
- cmplwi r9,0
- bne 2f
- lis r9,0
- li r8,256
- mtctr r8
-1: dccci r0,r9
- addi r9,r9,0x20
- bdnz 1b
-2: b _zimage_start_lib
diff --git a/arch/powerpc/boot/wii-head.S b/arch/powerpc/boot/wii-head.S
index edd79b836fcf..7b1e5a019f90 100644
--- a/arch/powerpc/boot/wii-head.S
+++ b/arch/powerpc/boot/wii-head.S
@@ -1,15 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* arch/powerpc/boot/wii-head.S
*
* Nintendo Wii bootwrapper entry.
* Copyright (C) 2008-2009 The GameCube Linux Team
* Copyright (C) 2008,2009 Albert Herranz
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
*/
#include "ppc_asm.h"
diff --git a/arch/powerpc/boot/wii.c b/arch/powerpc/boot/wii.c
index 2ebaec0344dd..59406ad04665 100644
--- a/arch/powerpc/boot/wii.c
+++ b/arch/powerpc/boot/wii.c
@@ -1,15 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* arch/powerpc/boot/wii.c
*
* Nintendo Wii bootwrapper support
* Copyright (C) 2008-2009 The GameCube Linux Team
* Copyright (C) 2008,2009 Albert Herranz
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
*/
#include <stddef.h>
diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
index ae0f88ec4a32..a75baefd1cff 100755
--- a/arch/powerpc/boot/wrapper
+++ b/arch/powerpc/boot/wrapper
@@ -1,8 +1,7 @@
#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-only
# Copyright (C) 2006 Paul Mackerras, IBM Corporation <paulus@samba.org>
-# This program may be used under the terms of version 2 of the GNU
-# General Public License.
# This script takes a kernel binary and optionally an initrd image
# and/or a device-tree blob, and creates a bootable zImage for a
@@ -14,19 +13,25 @@
# -i initrd specify initrd file
# -d devtree specify device-tree blob
# -s tree.dts specify device-tree source file (needs dtc installed)
+# -e esm_blob specify ESM blob for secure images
# -c cache $kernel.strip.gz (use if present & newer, else make)
# -C prefix specify command prefix for cross-building tools
# (strip, objcopy, ld)
# -D dir specify directory containing data files used by script
# (default ./arch/powerpc/boot)
# -W dir specify working directory for temporary files (default .)
+# -z use gzip (legacy)
+# -Z zsuffix compression to use (gz, xz or none)
# Stop execution if any command fails
set -e
+export LC_ALL=C
+
# Allow for verbose output
if [ "$V" = 1 ]; then
set -x
+ map="-Map wrapper.map"
fi
# defaults
@@ -36,11 +41,15 @@ platform=of
initrd=
dtb=
dts=
+esm_blob=
cacheit=
binary=
-gzip=.gz
+compression=.gz
+uboot_comp=gzip
pie=
format=
+notext=
+rodynamic=
# cross-compilation prefix
CROSS=
@@ -58,11 +67,29 @@ tmpdir=.
usage() {
echo 'Usage: wrapper [-o output] [-p platform] [-i initrd]' >&2
- echo ' [-d devtree] [-s tree.dts] [-c] [-C cross-prefix]' >&2
- echo ' [-D datadir] [-W workingdir] [--no-gzip] [vmlinux]' >&2
+ echo ' [-d devtree] [-s tree.dts] [-e esm_blob]' >&2
+ echo ' [-c] [-C cross-prefix] [-D datadir] [-W workingdir]' >&2
+ echo ' [-Z (gz|xz|none)] [--no-compression] [vmlinux]' >&2
exit 1
}
+run_cmd() {
+ if [ "$V" = 1 ]; then
+ $* 2>&1
+ else
+ local msg
+
+ set +e
+ msg=$($* 2>&1)
+
+ if [ $? -ne "0" ]; then
+ echo $msg
+ exit 1
+ fi
+ set -e
+ fi
+}
+
while [ "$#" -gt 0 ]; do
case "$1" in
-o)
@@ -85,6 +112,11 @@ while [ "$#" -gt 0 ]; do
[ "$#" -gt 0 ] || usage
dtb="$1"
;;
+ -e)
+ shift
+ [ "$#" -gt 0 ] || usage
+ esm_blob="$1"
+ ;;
-s)
shift
[ "$#" -gt 0 ] || usage
@@ -109,8 +141,31 @@ while [ "$#" -gt 0 ]; do
[ "$#" -gt 0 ] || usage
tmpdir="$1"
;;
+ -z)
+ compression=.gz
+ uboot_comp=gzip
+ ;;
+ -Z)
+ shift
+ [ "$#" -gt 0 ] || usage
+ [ "$1" != "gz" -o "$1" != "xz" -o "$1" != "lzma" -o "$1" != "lzo" -o "$1" != "none" ] || usage
+
+ compression=".$1"
+ uboot_comp=$1
+
+ if [ $compression = ".none" ]; then
+ compression=
+ uboot_comp=none
+ fi
+ if [ $uboot_comp = "gz" ]; then
+ uboot_comp=gzip
+ fi
+ ;;
--no-gzip)
- gzip=
+ # a "feature" of the wrapper script is that it can be used outside
+ # the kernel tree. So keeping this around for backwards compatibility.
+ compression=
+ uboot_comp=none
;;
-?)
usage
@@ -123,6 +178,7 @@ while [ "$#" -gt 0 ]; do
shift
done
+
if [ -n "$dts" ]; then
if [ ! -r "$dts" -a -r "$object/dts/$dts" ]; then
dts="$object/dts/$dts"
@@ -137,13 +193,46 @@ if [ -z "$kernel" ]; then
kernel=vmlinux
fi
-elfformat="`${CROSS}objdump -p "$kernel" | grep 'file format' | awk '{print $4}'`"
+LC_ALL=C elfformat="`${CROSS}objdump -p "$kernel" | grep 'file format' | awk '{print $4}'`"
case "$elfformat" in
elf64-powerpcle) format=elf64lppc ;;
elf64-powerpc) format=elf32ppc ;;
elf32-powerpc) format=elf32ppc ;;
esac
+ld_version()
+{
+ # Poached from scripts/ld-version.sh, but we don't want to call that because
+ # this script (wrapper) is distributed separately from the kernel source.
+ # Extract linker version number from stdin and turn into single number.
+ awk '{
+ gsub(".*\\)", "");
+ gsub(".*version ", "");
+ gsub("-.*", "");
+ split($1,a, ".");
+ if( length(a[3]) == "8" )
+ # a[3] is probably a date of format yyyymmdd used for release snapshots. We
+ # can assume it to be zero as it does not signify a new version as such.
+ a[3] = 0;
+ print a[1]*100000000 + a[2]*1000000 + a[3]*10000;
+ exit
+ }'
+}
+
+ld_is_lld()
+{
+ ${CROSS}ld -V 2>&1 | grep -q LLD
+}
+
+# Do not include PT_INTERP segment when linking pie. Non-pie linking
+# just ignores this option.
+nodl="--no-dynamic-linker"
+
+# suppress some warnings in recent ld versions
+nowarn="-z noexecstack"
+if "${CROSS}ld" -v --no-warn-rwx-segments >/dev/null 2>&1; then
+ nowarn="$nowarn --no-warn-rwx-segments"
+fi
platformo=$object/"$platform".o
lds=$object/zImage.lds
@@ -152,9 +241,16 @@ objflags=-S
tmp=$tmpdir/zImage.$$.o
ksection=.kernel:vmlinux.strip
isection=.kernel:initrd
+esection=.kernel:esm_blob
link_address='0x400000'
make_space=y
+
+if [ -n "$esm_blob" -a "$platform" != "pseries" ]; then
+ echo "ESM blob not support on non-pseries platforms" >&2
+ exit 1
+fi
+
case "$platform" in
of)
platformo="$object/of.o $object/epapr.o"
@@ -169,11 +265,6 @@ pseries)
fi
make_space=n
;;
-maple)
- platformo="$object/of.o $object/epapr.o"
- link_address='0x400000'
- make_space=n
- ;;
pmac|chrp)
platformo="$object/of.o $object/epapr.o"
make_space=n
@@ -195,7 +286,7 @@ miboot|uboot*)
;;
cuboot*)
binary=y
- gzip=
+ compression=
case "$platform" in
*-mpc866ads|*-mpc885ads|*-adder875*|*-ep88xc)
platformo=$object/cuboot-8xx.o
@@ -215,7 +306,7 @@ cuboot*)
*-tqm8541|*-mpc8560*|*-tqm8560|*-tqm8555|*-ksi8560*)
platformo=$object/cuboot-85xx-cpm2.o
;;
- *-mpc85*|*-tqm85*|*-sbc85*)
+ *-mpc85*|*-tqm85*)
platformo=$object/cuboot-85xx.o
;;
*-amigaone)
@@ -226,7 +317,7 @@ cuboot*)
ps3)
platformo="$object/ps3-head.o $object/ps3-hvcall.o $object/ps3.o"
lds=$object/zImage.ps3.lds
- gzip=
+ compression=
ext=bin
objflags="-O binary --set-section-flags=.bss=contents,alloc,load,data"
ksection=.kernel:vmlinux.bin
@@ -235,7 +326,7 @@ ps3)
make_space=n
pie=
;;
-ep88xc|ep405|ep8248e)
+ep88xc|ep8248e)
platformo="$object/fixed-head.o $object/$platform.o"
binary=y
;;
@@ -243,14 +334,6 @@ adder875-redboot)
platformo="$object/fixed-head.o $object/redboot-8xx.o"
binary=y
;;
-simpleboot-virtex405-*)
- platformo="$object/virtex405-head.o $object/simpleboot.o $object/virtex.o"
- binary=y
- ;;
-simpleboot-virtex440-*)
- platformo="$object/fixed-head.o $object/simpleboot.o $object/virtex.o"
- binary=y
- ;;
simpleboot-*)
platformo="$object/fixed-head.o $object/simpleboot.o"
binary=y
@@ -267,6 +350,11 @@ gamecube|wii)
link_address='0x600000'
platformo="$object/$platform-head.o $object/$platform.o"
;;
+microwatt)
+ link_address='0x500000'
+ platformo="$object/fixed-head.o $object/$platform.o"
+ binary=y
+ ;;
treeboot-currituck)
link_address='0x1000000'
;;
@@ -277,38 +365,62 @@ treeboot-iss4xx-mpic)
platformo="$object/treeboot-iss4xx.o"
;;
epapr)
- platformo="$object/epapr.o $object/epapr-wrapper.o"
+ platformo="$object/pseries-head.o $object/epapr.o $object/epapr-wrapper.o"
link_address='0x20000000'
pie=-pie
+ notext='-z notext'
+ rodynamic=$(if ${CROSS}ld -V 2>&1 | grep -q LLD ; then echo "-z rodynamic"; fi)
;;
mvme5100)
platformo="$object/fixed-head.o $object/mvme5100.o"
binary=y
;;
+mvme7100)
+ platformo="$object/motload-head.o $object/mvme7100.o"
+ link_address='0x4000000'
+ binary=y
+ ;;
esac
vmz="$tmpdir/`basename \"$kernel\"`.$ext"
-if [ -z "$cacheit" -o ! -f "$vmz$gzip" -o "$vmz$gzip" -ot "$kernel" ]; then
- ${CROSS}objcopy $objflags "$kernel" "$vmz.$$"
- strip_size=$(stat -c %s $vmz.$$)
+# Calculate the vmlinux.strip size
+${CROSS}objcopy $objflags "$kernel" "$vmz.$$"
+strip_size=$(${CONFIG_SHELL} "${srctree}/scripts/file-size.sh" "$vmz.$$")
- if [ -n "$gzip" ]; then
+if [ -z "$cacheit" -o ! -f "$vmz$compression" -o "$vmz$compression" -ot "$kernel" ]; then
+ # recompress the image if we need to
+ case $compression in
+ .xz)
+ xz --check=crc32 -f -6 "$vmz.$$"
+ ;;
+ .gz)
gzip -n -f -9 "$vmz.$$"
- fi
+ ;;
+ .lzma)
+ xz --format=lzma -f -6 "$vmz.$$"
+ ;;
+ .lzo)
+ lzop -f -9 "$vmz.$$"
+ ;;
+ *)
+ # drop the compression suffix so the stripped vmlinux is used
+ compression=
+ uboot_comp=none
+ ;;
+ esac
if [ -n "$cacheit" ]; then
- mv -f "$vmz.$$$gzip" "$vmz$gzip"
+ mv -f "$vmz.$$$compression" "$vmz$compression"
else
vmz="$vmz.$$"
fi
else
- # Calculate the vmlinux.strip size
- ${CROSS}objcopy $objflags "$kernel" "$vmz.$$"
- strip_size=$(stat -c %s $vmz.$$)
rm -f $vmz.$$
fi
+vmz="$vmz$compression"
+
if [ "$make_space" = "y" ]; then
# Round the size to next higher MB limit
round_size=$(((strip_size + 0xfffff) & 0xfff00000))
@@ -324,12 +436,10 @@ if [ "$make_space" = "y" ]; then
fi
fi
-vmz="$vmz$gzip"
-
# Extract kernel version information, some platforms want to include
# it in the image header
version=`${CROSS}strings "$kernel" | grep '^Linux version [-0-9.]' | \
- cut -d' ' -f3`
+ head -n1 | cut -d' ' -f3`
if [ -n "$version" ]; then
uboot_version="-n Linux-$version"
fi
@@ -340,33 +450,13 @@ membase=`${CROSS}objdump -p "$kernel" | grep -m 1 LOAD | awk '{print $7}'`
case "$platform" in
uboot)
rm -f "$ofile"
- ${MKIMAGE} -A ppc -O linux -T kernel -C gzip -a $membase -e $membase \
+ ${MKIMAGE} -A ppc -O linux -T kernel -C $uboot_comp -a $membase -e $membase \
$uboot_version -d "$vmz" "$ofile"
if [ -z "$cacheit" ]; then
rm -f "$vmz"
fi
exit 0
;;
-uboot-obs600)
- rm -f "$ofile"
- # obs600 wants a multi image with an initrd, so we need to put a fake
- # one in even when building a "normal" image.
- if [ -n "$initrd" ]; then
- real_rd="$initrd"
- else
- real_rd=`mktemp`
- echo "\0" >>"$real_rd"
- fi
- ${MKIMAGE} -A ppc -O linux -T multi -C gzip -a $membase -e $membase \
- $uboot_version -d "$vmz":"$real_rd":"$dtb" "$ofile"
- if [ -z "$initrd" ]; then
- rm -f "$real_rd"
- fi
- if [ -z "$cacheit" ]; then
- rm -f "$vmz"
- fi
- exit 0
- ;;
esac
addsec() {
@@ -391,11 +481,16 @@ if [ -n "$dtb" ]; then
fi
fi
+if [ -n "$esm_blob" ]; then
+ addsec $tmp "$esm_blob" $esection
+fi
+
if [ "$platform" != "miboot" ]; then
if [ -n "$link_address" ] ; then
text_start="-Ttext $link_address"
fi
- ${CROSS}ld -m $format -T $lds $text_start $pie -o "$ofile" \
+#link everything
+ ${CROSS}ld -m $format -T $lds $text_start $pie $nodl $nowarn $rodynamic $notext -o "$ofile" $map \
$platformo $tmp $object/wrapper.a
rm $tmp
fi
@@ -411,7 +506,7 @@ fi
# post-processing needed for some platforms
case "$platform" in
-pseries|chrp|maple)
+pseries|chrp)
$objbin/addnote "$ofile"
;;
coff)
@@ -456,16 +551,27 @@ ps3)
${CROSS}objcopy -O binary "$ofile" "$ofile.bin"
- dd if="$ofile.bin" of="$ofile.bin" conv=notrunc \
- skip=$overlay_dest seek=$system_reset_kernel \
+ run_cmd dd if="$ofile.bin" of="$ofile.bin" conv=notrunc \
+ skip=$overlay_dest seek=$system_reset_kernel \
count=$overlay_size bs=1
- dd if="$ofile.bin" of="$ofile.bin" conv=notrunc \
- skip=$system_reset_overlay seek=$overlay_dest \
+ run_cmd dd if="$ofile.bin" of="$ofile.bin" conv=notrunc \
+ skip=$system_reset_overlay seek=$overlay_dest \
count=$overlay_size bs=1
odir="$(dirname "$ofile.bin")"
- rm -f "$odir/otheros.bld"
- gzip -n --force -9 --stdout "$ofile.bin" > "$odir/otheros.bld"
+
+ # The ps3's flash loader has a size limit of 16 MiB for the uncompressed
+ # image. If a compressed image that exceeded this limit is written to
+ # flash the loader will decompress that image until the 16 MiB limit is
+ # reached, then enter the system reset vector of the partially decompressed
+ # image. No warning is issued.
+ rm -f "$odir"/{otheros,otheros-too-big}.bld
+ size=$(${CROSS}nm --no-sort --radix=d "$ofile" | grep -E ' _end$' | cut -d' ' -f1)
+ bld="otheros.bld"
+ if [ $size -gt $((0x1000000)) ]; then
+ bld="otheros-too-big.bld"
+ fi
+ gzip -n --force -9 --stdout "$ofile.bin" > "$odir/$bld"
;;
esac
diff --git a/arch/powerpc/boot/xz_config.h b/arch/powerpc/boot/xz_config.h
new file mode 100644
index 000000000000..9506a96ebbcc
--- /dev/null
+++ b/arch/powerpc/boot/xz_config.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __XZ_CONFIG_H__
+#define __XZ_CONFIG_H__
+
+/*
+ * most of this is copied from lib/xz/xz_private.h, we can't use their defines
+ * since the boot wrapper is not built in the same environment as the rest of
+ * the kernel.
+ */
+
+#include "types.h"
+#include "swab.h"
+
+static inline uint32_t swab32p(void *p)
+{
+ uint32_t *q = p;
+
+ return swab32(*q);
+}
+
+#ifdef __LITTLE_ENDIAN__
+#define get_le32(p) (*((uint32_t *) (p)))
+#define cpu_to_be32(x) swab32(x)
+static inline u32 be32_to_cpup(const u32 *p)
+{
+ return swab32p((u32 *)p);
+}
+#else
+#define get_le32(p) swab32p(p)
+#define cpu_to_be32(x) (x)
+static inline u32 be32_to_cpup(const u32 *p)
+{
+ return *p;
+}
+#endif
+
+static inline uint32_t get_unaligned_be32(const void *p)
+{
+ return be32_to_cpup(p);
+}
+
+static inline void put_unaligned_be32(u32 val, void *p)
+{
+ *((u32 *)p) = cpu_to_be32(val);
+}
+
+#define memeq(a, b, size) (memcmp(a, b, size) == 0)
+#define memzero(buf, size) memset(buf, 0, size)
+
+/* prevent the inclusion of the xz-preboot MM headers */
+#define DECOMPR_MM_H
+#define memmove memmove
+
+/* xz.h needs to be included directly since we need enum xz_mode */
+#include "../../../include/linux/xz.h"
+
+#endif
diff --git a/arch/powerpc/boot/zImage.coff.lds.S b/arch/powerpc/boot/zImage.coff.lds.S
index de4c9e3c9344..117951295117 100644
--- a/arch/powerpc/boot/zImage.coff.lds.S
+++ b/arch/powerpc/boot/zImage.coff.lds.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
OUTPUT_ARCH(powerpc:common)
ENTRY(_zimage_start_opd)
EXTERN(_zimage_start_opd)
diff --git a/arch/powerpc/boot/zImage.lds.S b/arch/powerpc/boot/zImage.lds.S
index 861e72109df2..d65cd55a6f38 100644
--- a/arch/powerpc/boot/zImage.lds.S
+++ b/arch/powerpc/boot/zImage.lds.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#include <asm-generic/vmlinux.lds.h>
#ifdef CONFIG_PPC64_BOOT_WRAPPER
@@ -33,6 +34,14 @@ SECTIONS
__dynamic_start = .;
*(.dynamic)
}
+
+#ifdef CONFIG_PPC64_BOOT_WRAPPER
+ .got : ALIGN(256)
+ {
+ *(.got .toc)
+ }
+#endif
+
.hash : { *(.hash) }
.interp : { *(.interp) }
.rela.dyn :
@@ -67,14 +76,13 @@ SECTIONS
_initrd_end = .;
}
-#ifdef CONFIG_PPC64_BOOT_WRAPPER
- .got :
+ . = ALIGN(4096);
+ .kernel:esm_blob :
{
- __toc_start = .;
- *(.got)
- *(.toc)
+ _esm_blob_start = .;
+ *(.kernel:esm_blob)
+ _esm_blob_end = .;
}
-#endif
. = ALIGN(4096);
.bss :
diff --git a/arch/powerpc/boot/zImage.ps3.lds.S b/arch/powerpc/boot/zImage.ps3.lds.S
index aaa469c1e60d..d0ffb493614d 100644
--- a/arch/powerpc/boot/zImage.ps3.lds.S
+++ b/arch/powerpc/boot/zImage.ps3.lds.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
OUTPUT_ARCH(powerpc:common)
ENTRY(_zimage_start)
EXTERN(_zimage_start)
@@ -7,7 +8,7 @@ SECTIONS
.kernel:vmlinux.bin : { *(.kernel:vmlinux.bin) }
_vmlinux_end = .;
- . = ALIGN(4096);
+ . = ALIGN(8);
_dtb_start = .;
.kernel:dtb : { *(.kernel:dtb) }
_dtb_end = .;
diff --git a/arch/powerpc/configs/32-bit.config b/arch/powerpc/configs/32-bit.config
new file mode 100644
index 000000000000..ad6546850c68
--- /dev/null
+++ b/arch/powerpc/configs/32-bit.config
@@ -0,0 +1 @@
+# CONFIG_PPC64 is not set
diff --git a/arch/powerpc/configs/40x/acadia_defconfig b/arch/powerpc/configs/40x/acadia_defconfig
deleted file mode 100644
index 69e06eeae6a6..000000000000
--- a/arch/powerpc/configs/40x/acadia_defconfig
+++ /dev/null
@@ -1,87 +0,0 @@
-CONFIG_40x=y
-CONFIG_EXPERIMENTAL=y
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-CONFIG_EXPERT=y
-CONFIG_KALLSYMS_ALL=y
-CONFIG_KALLSYMS_EXTRA_PASS=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-# CONFIG_BLK_DEV_BSG is not set
-CONFIG_ACADIA=y
-# CONFIG_WALNUT is not set
-CONFIG_SPARSE_IRQ=y
-CONFIG_PCI=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_INET=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
-# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-CONFIG_CONNECTOR=y
-CONFIG_MTD=y
-CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_OF_PARTS=y
-CONFIG_MTD_CHAR=y
-CONFIG_MTD_BLOCK=m
-CONFIG_MTD_CFI=y
-CONFIG_MTD_JEDECPROBE=y
-CONFIG_MTD_CFI_AMDSTD=y
-CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_PROC_DEVICETREE=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=35000
-# CONFIG_MISC_DEVICES is not set
-CONFIG_NETDEVICES=y
-CONFIG_ETHERNET=y
-CONFIG_NET_VENDOR_IBM=y
-CONFIG_MII=y
-CONFIG_IBM_EMAC=y
-CONFIG_IBM_EMAC_RXB=256
-CONFIG_IBM_EMAC_TXB=256
-CONFIG_IBM_EMAC_DEBUG=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
-# CONFIG_INPUT is not set
-# CONFIG_SERIO is not set
-# CONFIG_VT is not set
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_EXTENDED=y
-CONFIG_SERIAL_8250_SHARE_IRQ=y
-CONFIG_SERIAL_OF_PLATFORM=y
-# CONFIG_HW_RANDOM is not set
-# CONFIG_HWMON is not set
-CONFIG_THERMAL=y
-# CONFIG_USB_SUPPORT is not set
-CONFIG_EXT2_FS=y
-CONFIG_INOTIFY=y
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_CRAMFS=y
-CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
-CONFIG_ROOT_NFS=y
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_FS=y
-CONFIG_DEBUG_KERNEL=y
-CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-CONFIG_CRYPTO=y
-CONFIG_CRYPTO_CBC=y
-CONFIG_CRYPTO_ECB=y
-CONFIG_CRYPTO_PCBC=y
-CONFIG_CRYPTO_MD5=y
-CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/40x/ep405_defconfig b/arch/powerpc/configs/40x/ep405_defconfig
deleted file mode 100644
index e9d84b5d0ab6..000000000000
--- a/arch/powerpc/configs/40x/ep405_defconfig
+++ /dev/null
@@ -1,84 +0,0 @@
-CONFIG_40x=y
-CONFIG_EXPERIMENTAL=y
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-CONFIG_EXPERT=y
-CONFIG_KALLSYMS_ALL=y
-CONFIG_KALLSYMS_EXTRA_PASS=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-# CONFIG_BLK_DEV_BSG is not set
-CONFIG_EP405=y
-# CONFIG_WALNUT is not set
-CONFIG_SPARSE_IRQ=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_INET=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
-# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-CONFIG_CONNECTOR=y
-CONFIG_MTD=y
-CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_OF_PARTS=y
-CONFIG_MTD_CHAR=y
-CONFIG_MTD_BLOCK=m
-CONFIG_MTD_CFI=y
-CONFIG_MTD_JEDECPROBE=y
-CONFIG_MTD_CFI_AMDSTD=y
-CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_PROC_DEVICETREE=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=35000
-CONFIG_NETDEVICES=y
-CONFIG_ETHERNET=y
-CONFIG_NET_VENDOR_IBM=y
-CONFIG_IBM_EMAC=y
-# CONFIG_INPUT is not set
-# CONFIG_SERIO is not set
-# CONFIG_VT is not set
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_EXTENDED=y
-CONFIG_SERIAL_8250_SHARE_IRQ=y
-CONFIG_SERIAL_OF_PLATFORM=y
-# CONFIG_HW_RANDOM is not set
-# CONFIG_HWMON is not set
-CONFIG_THERMAL=y
-CONFIG_VIDEO_OUTPUT_CONTROL=m
-CONFIG_USB=y
-CONFIG_USB_MON=y
-CONFIG_USB_OHCI_HCD=y
-CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
-CONFIG_USB_OHCI_HCD_PPC_OF_LE=y
-CONFIG_EXT2_FS=y
-CONFIG_INOTIFY=y
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_CRAMFS=y
-CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
-CONFIG_ROOT_NFS=y
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_FS=y
-CONFIG_DEBUG_KERNEL=y
-CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-CONFIG_CRYPTO=y
-CONFIG_CRYPTO_CBC=y
-CONFIG_CRYPTO_ECB=y
-CONFIG_CRYPTO_PCBC=y
-CONFIG_CRYPTO_MD5=y
-CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/40x/kilauea_defconfig b/arch/powerpc/configs/40x/kilauea_defconfig
deleted file mode 100644
index 5ff338f6443f..000000000000
--- a/arch/powerpc/configs/40x/kilauea_defconfig
+++ /dev/null
@@ -1,97 +0,0 @@
-CONFIG_40x=y
-CONFIG_EXPERIMENTAL=y
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-CONFIG_EXPERT=y
-CONFIG_KALLSYMS_ALL=y
-CONFIG_KALLSYMS_EXTRA_PASS=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-# CONFIG_BLK_DEV_BSG is not set
-CONFIG_KILAUEA=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-# CONFIG_WALNUT is not set
-CONFIG_SPARSE_IRQ=y
-CONFIG_PCI=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_INET=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
-# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-CONFIG_CONNECTOR=y
-CONFIG_MTD=y
-CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_OF_PARTS=y
-CONFIG_MTD_CHAR=y
-CONFIG_MTD_BLOCK=y
-CONFIG_MTD_CFI=y
-CONFIG_MTD_JEDECPROBE=y
-CONFIG_MTD_CFI_AMDSTD=y
-CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_MTD_NAND=y
-CONFIG_MTD_NAND_NDFC=y
-CONFIG_PROC_DEVICETREE=y
-CONFIG_PM=y
-CONFIG_SUSPEND=y
-CONFIG_PPC4xx_CPM=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=35000
-# CONFIG_MISC_DEVICES is not set
-CONFIG_NETDEVICES=y
-CONFIG_ETHERNET=y
-CONFIG_NET_VENDOR_IBM=y
-CONFIG_IBM_EMAC=y
-CONFIG_IBM_EMAC_RXB=256
-CONFIG_IBM_EMAC_TXB=256
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
-# CONFIG_INPUT is not set
-# CONFIG_SERIO is not set
-# CONFIG_VT is not set
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_EXTENDED=y
-CONFIG_SERIAL_8250_SHARE_IRQ=y
-CONFIG_SERIAL_OF_PLATFORM=y
-# CONFIG_HW_RANDOM is not set
-CONFIG_I2C=y
-CONFIG_I2C_CHARDEV=y
-CONFIG_I2C_IBM_IIC=y
-CONFIG_SENSORS_LM75=y
-CONFIG_THERMAL=y
-# CONFIG_USB_SUPPORT is not set
-CONFIG_RTC_CLASS=y
-CONFIG_RTC_DRV_DS1307=y
-CONFIG_EXT2_FS=y
-CONFIG_INOTIFY=y
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_CRAMFS=y
-CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
-CONFIG_ROOT_NFS=y
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_FS=y
-CONFIG_DEBUG_KERNEL=y
-CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-CONFIG_CRYPTO=y
-CONFIG_CRYPTO_CBC=y
-CONFIG_CRYPTO_ECB=y
-CONFIG_CRYPTO_PCBC=y
-CONFIG_CRYPTO_MD5=y
-CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/40x/klondike_defconfig b/arch/powerpc/configs/40x/klondike_defconfig
deleted file mode 100644
index c0d228dc73dc..000000000000
--- a/arch/powerpc/configs/40x/klondike_defconfig
+++ /dev/null
@@ -1,55 +0,0 @@
-CONFIG_40x=y
-CONFIG_EXPERIMENTAL=y
-CONFIG_SYSVIPC=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_SYSFS_DEPRECATED=y
-CONFIG_SYSFS_DEPRECATED_V2=y
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_SYSCTL_SYSCALL=y
-CONFIG_EMBEDDED=y
-CONFIG_SLAB=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-# CONFIG_WALNUT is not set
-CONFIG_APM8018X=y
-# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_MATH_EMULATION=y
-# CONFIG_MIGRATION is not set
-# CONFIG_SUSPEND is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-CONFIG_PROC_DEVICETREE=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=35000
-CONFIG_SCSI=y
-CONFIG_BLK_DEV_SD=y
-CONFIG_CHR_DEV_SG=y
-CONFIG_SCSI_SAS_ATTRS=y
-# CONFIG_INPUT is not set
-# CONFIG_SERIO is not set
-# CONFIG_VT is not set
-# CONFIG_UNIX98_PTYS is not set
-# CONFIG_LEGACY_PTYS is not set
-# CONFIG_DEVKMEM is not set
-# CONFIG_HW_RANDOM is not set
-# CONFIG_HWMON is not set
-# CONFIG_USB_SUPPORT is not set
-# CONFIG_IOMMU_SUPPORT is not set
-CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_EXT4_FS=y
-CONFIG_MSDOS_FS=y
-CONFIG_VFAT_FS=y
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_CRAMFS=y
-CONFIG_NLS_CODEPAGE_437=y
-CONFIG_NLS_ASCII=y
-CONFIG_NLS_ISO8859_1=y
-CONFIG_NLS_UTF8=y
-CONFIG_AVERAGE=y
-CONFIG_MAGIC_SYSRQ=y
-# CONFIG_SCHED_DEBUG is not set
-# CONFIG_DEBUG_BUGVERBOSE is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-# CONFIG_FTRACE is not set
diff --git a/arch/powerpc/configs/40x/makalu_defconfig b/arch/powerpc/configs/40x/makalu_defconfig
deleted file mode 100644
index 84505e3aa0fb..000000000000
--- a/arch/powerpc/configs/40x/makalu_defconfig
+++ /dev/null
@@ -1,84 +0,0 @@
-CONFIG_40x=y
-CONFIG_EXPERIMENTAL=y
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-CONFIG_EXPERT=y
-CONFIG_KALLSYMS_ALL=y
-CONFIG_KALLSYMS_EXTRA_PASS=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-# CONFIG_BLK_DEV_BSG is not set
-CONFIG_MAKALU=y
-# CONFIG_WALNUT is not set
-CONFIG_SPARSE_IRQ=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_INET=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
-# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-CONFIG_CONNECTOR=y
-CONFIG_MTD=y
-CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_OF_PARTS=y
-CONFIG_MTD_CHAR=y
-CONFIG_MTD_BLOCK=m
-CONFIG_MTD_CFI=y
-CONFIG_MTD_JEDECPROBE=y
-CONFIG_MTD_CFI_AMDSTD=y
-CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_PROC_DEVICETREE=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=35000
-# CONFIG_MISC_DEVICES is not set
-CONFIG_NETDEVICES=y
-CONFIG_ETHERNET=y
-CONFIG_NET_VENDOR_IBM=y
-CONFIG_IBM_EMAC=y
-CONFIG_IBM_EMAC_RXB=256
-CONFIG_IBM_EMAC_TXB=256
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
-# CONFIG_INPUT is not set
-# CONFIG_SERIO is not set
-# CONFIG_VT is not set
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_EXTENDED=y
-CONFIG_SERIAL_8250_SHARE_IRQ=y
-CONFIG_SERIAL_OF_PLATFORM=y
-# CONFIG_HW_RANDOM is not set
-# CONFIG_HWMON is not set
-CONFIG_THERMAL=y
-# CONFIG_USB_SUPPORT is not set
-CONFIG_EXT2_FS=y
-CONFIG_INOTIFY=y
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_CRAMFS=y
-CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
-CONFIG_ROOT_NFS=y
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_FS=y
-CONFIG_DEBUG_KERNEL=y
-CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-CONFIG_CRYPTO=y
-CONFIG_CRYPTO_CBC=y
-CONFIG_CRYPTO_ECB=y
-CONFIG_CRYPTO_PCBC=y
-CONFIG_CRYPTO_MD5=y
-CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/40x/obs600_defconfig b/arch/powerpc/configs/40x/obs600_defconfig
deleted file mode 100644
index 91c110dad2d6..000000000000
--- a/arch/powerpc/configs/40x/obs600_defconfig
+++ /dev/null
@@ -1,83 +0,0 @@
-CONFIG_40x=y
-CONFIG_EXPERIMENTAL=y
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_EXPERT=y
-CONFIG_KALLSYMS_ALL=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_WALNUT is not set
-CONFIG_OBS600=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_MATH_EMULATION=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_INET=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
-# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-CONFIG_CONNECTOR=y
-CONFIG_MTD=y
-CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_OF_PARTS=y
-CONFIG_MTD_CHAR=y
-CONFIG_MTD_BLOCK=y
-CONFIG_MTD_CFI=y
-CONFIG_MTD_JEDECPROBE=y
-CONFIG_MTD_CFI_AMDSTD=y
-CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_MTD_NAND=y
-CONFIG_MTD_NAND_NDFC=y
-CONFIG_PROC_DEVICETREE=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=35000
-CONFIG_NETDEVICES=y
-CONFIG_IBM_EMAC=y
-CONFIG_IBM_EMAC_RXB=256
-CONFIG_IBM_EMAC_TXB=256
-# CONFIG_INPUT is not set
-# CONFIG_SERIO is not set
-# CONFIG_VT is not set
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_EXTENDED=y
-CONFIG_SERIAL_8250_SHARE_IRQ=y
-CONFIG_SERIAL_OF_PLATFORM=y
-# CONFIG_HW_RANDOM is not set
-CONFIG_I2C=y
-CONFIG_I2C_CHARDEV=y
-CONFIG_I2C_IBM_IIC=y
-CONFIG_SENSORS_LM75=y
-CONFIG_THERMAL=y
-# CONFIG_USB_SUPPORT is not set
-CONFIG_RTC_CLASS=y
-CONFIG_RTC_DRV_DS1307=y
-CONFIG_EXT2_FS=y
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_CRAMFS=y
-CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
-CONFIG_ROOT_NFS=y
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_FS=y
-CONFIG_DETECT_HUNG_TASK=y
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-CONFIG_CRYPTO=y
-CONFIG_CRYPTO_CBC=y
-CONFIG_CRYPTO_ECB=y
-CONFIG_CRYPTO_PCBC=y
-CONFIG_CRYPTO_MD5=y
-CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/40x/virtex_defconfig b/arch/powerpc/configs/40x/virtex_defconfig
deleted file mode 100644
index 0a81e1f7dd59..000000000000
--- a/arch/powerpc/configs/40x/virtex_defconfig
+++ /dev/null
@@ -1,91 +0,0 @@
-CONFIG_40x=y
-CONFIG_EXPERIMENTAL=y
-# CONFIG_LOCALVERSION_AUTO is not set
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-CONFIG_SLAB=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODULE_FORCE_UNLOAD=y
-CONFIG_MODVERSIONS=y
-# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_WALNUT is not set
-CONFIG_XILINX_VIRTEX_GENERIC_BOARD=y
-CONFIG_PREEMPT=y
-CONFIG_MATH_EMULATION=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
-CONFIG_PCI=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_LRO is not set
-CONFIG_NETFILTER=y
-CONFIG_IP_NF_IPTABLES=m
-CONFIG_IP_NF_FILTER=m
-CONFIG_IP_NF_MANGLE=m
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-CONFIG_PROC_DEVICETREE=y
-CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=8192
-CONFIG_XILINX_SYSACE=y
-CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-CONFIG_MII=y
-# CONFIG_NETDEV_10000 is not set
-# CONFIG_SERIO_I8042 is not set
-# CONFIG_SERIO_SERPORT is not set
-CONFIG_SERIO_XILINX_XPS_PS2=y
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_UARTLITE=y
-CONFIG_SERIAL_UARTLITE_CONSOLE=y
-CONFIG_SERIAL_OF_PLATFORM=y
-CONFIG_XILINX_HWICAP=y
-CONFIG_GPIOLIB=y
-CONFIG_GPIO_SYSFS=y
-CONFIG_GPIO_XILINX=y
-# CONFIG_HWMON is not set
-CONFIG_FB=y
-CONFIG_FB_XILINX=y
-CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_FONTS=y
-CONFIG_FONT_8x8=y
-CONFIG_FONT_8x16=y
-CONFIG_LOGO=y
-# CONFIG_HID_SUPPORT is not set
-# CONFIG_USB_SUPPORT is not set
-CONFIG_EXT2_FS=y
-CONFIG_INOTIFY=y
-CONFIG_AUTOFS_FS=y
-CONFIG_AUTOFS4_FS=y
-CONFIG_MSDOS_FS=y
-CONFIG_VFAT_FS=y
-CONFIG_TMPFS=y
-CONFIG_CRAMFS=y
-CONFIG_ROMFS_FS=y
-CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
-CONFIG_ROOT_NFS=y
-CONFIG_NLS_CODEPAGE_437=y
-CONFIG_NLS_ASCII=m
-CONFIG_NLS_ISO8859_1=m
-CONFIG_NLS_UTF8=m
-CONFIG_CRC_CCITT=y
-CONFIG_PRINTK_TIME=y
-CONFIG_DEBUG_KERNEL=y
-CONFIG_DEBUG_INFO=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/40x/walnut_defconfig b/arch/powerpc/configs/40x/walnut_defconfig
deleted file mode 100644
index 0a19f4386ee9..000000000000
--- a/arch/powerpc/configs/40x/walnut_defconfig
+++ /dev/null
@@ -1,77 +0,0 @@
-CONFIG_40x=y
-CONFIG_EXPERIMENTAL=y
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-CONFIG_EXPERT=y
-CONFIG_KALLSYMS_ALL=y
-CONFIG_KALLSYMS_EXTRA_PASS=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-# CONFIG_BLK_DEV_BSG is not set
-CONFIG_SPARSE_IRQ=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_INET=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
-# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-CONFIG_CONNECTOR=y
-CONFIG_MTD=y
-CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_OF_PARTS=y
-CONFIG_MTD_CHAR=y
-CONFIG_MTD_BLOCK=m
-CONFIG_MTD_CFI=y
-CONFIG_MTD_JEDECPROBE=y
-CONFIG_MTD_CFI_AMDSTD=y
-CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_PROC_DEVICETREE=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=35000
-CONFIG_NETDEVICES=y
-CONFIG_ETHERNET=y
-CONFIG_NET_VENDOR_IBM=y
-CONFIG_IBM_EMAC=y
-# CONFIG_INPUT is not set
-# CONFIG_SERIO is not set
-# CONFIG_VT is not set
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_EXTENDED=y
-CONFIG_SERIAL_8250_SHARE_IRQ=y
-CONFIG_SERIAL_OF_PLATFORM=y
-# CONFIG_HW_RANDOM is not set
-# CONFIG_HWMON is not set
-CONFIG_THERMAL=y
-CONFIG_VIDEO_OUTPUT_CONTROL=m
-CONFIG_EXT2_FS=y
-CONFIG_INOTIFY=y
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_CRAMFS=y
-CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
-CONFIG_ROOT_NFS=y
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_FS=y
-CONFIG_DEBUG_KERNEL=y
-CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-CONFIG_CRYPTO=y
-CONFIG_CRYPTO_CBC=y
-CONFIG_CRYPTO_ECB=y
-CONFIG_CRYPTO_PCBC=y
-CONFIG_CRYPTO_MD5=y
-CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/44x.config b/arch/powerpc/configs/44x.config
new file mode 100644
index 000000000000..79b7b1962995
--- /dev/null
+++ b/arch/powerpc/configs/44x.config
@@ -0,0 +1,2 @@
+CONFIG_PPC64=n
+CONFIG_44x=y
diff --git a/arch/powerpc/configs/44x/akebono_defconfig b/arch/powerpc/configs/44x/akebono_defconfig
index 7e2530cd9d30..1882eb2da354 100644
--- a/arch/powerpc/configs/44x/akebono_defconfig
+++ b/arch/powerpc/configs/44x/akebono_defconfig
@@ -4,18 +4,13 @@ CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
-CONFIG_RD_BZIP2=y
-CONFIG_RD_LZMA=y
-CONFIG_RD_XZ=y
CONFIG_EXPERT=y
CONFIG_KALLSYMS_ALL=y
# CONFIG_SLUB_CPU_PARTIAL is not set
CONFIG_PROFILING=y
-CONFIG_OPROFILE=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_POWERNV_MSI is not set
CONFIG_PPC_47x=y
# CONFIG_EBONY is not set
CONFIG_AKEBONO=y
@@ -23,23 +18,16 @@ CONFIG_HIGHMEM=y
CONFIG_HZ_100=y
CONFIG_IRQ_ALL_CPUS=y
# CONFIG_COMPACTION is not set
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
# CONFIG_SUSPEND is not set
-CONFIG_PCI_MSI=y
CONFIG_NET=y
+CONFIG_NETDEVICES=y
CONFIG_PACKET=y
CONFIG_UNIX=y
CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_CONNECTOR=y
@@ -48,13 +36,14 @@ CONFIG_MTD_BLOCK=y
CONFIG_MTD_JEDECPROBE=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=35000
# CONFIG_SCSI_PROC_FS is not set
CONFIG_BLK_DEV_SD=y
# CONFIG_SCSI_LOWLEVEL is not set
+CONFIG_ATA=y
# CONFIG_SATA_PMP is not set
+CONFIG_SATA_AHCI_PLATFORM=y
# CONFIG_ATA_SFF is not set
# CONFIG_NET_VENDOR_3COM is not set
# CONFIG_NET_VENDOR_ADAPTEC is not set
@@ -62,7 +51,6 @@ CONFIG_BLK_DEV_SD=y
# CONFIG_NET_VENDOR_AMD is not set
# CONFIG_NET_VENDOR_ARC is not set
# CONFIG_NET_VENDOR_ATHEROS is not set
-# CONFIG_NET_CADENCE is not set
# CONFIG_NET_VENDOR_BROADCOM is not set
# CONFIG_NET_VENDOR_BROCADE is not set
# CONFIG_NET_VENDOR_CHELSIO is not set
@@ -70,8 +58,6 @@ CONFIG_BLK_DEV_SD=y
# CONFIG_NET_VENDOR_DEC is not set
# CONFIG_NET_VENDOR_DLINK is not set
# CONFIG_NET_VENDOR_EMULEX is not set
-# CONFIG_NET_VENDOR_EXAR is not set
-# CONFIG_NET_VENDOR_HP is not set
CONFIG_IBM_EMAC=y
# CONFIG_NET_VENDOR_MARVELL is not set
# CONFIG_NET_VENDOR_MELLANOX is not set
@@ -114,13 +100,14 @@ CONFIG_USB_OHCI_HCD=y
# CONFIG_USB_OHCI_HCD_PCI is not set
CONFIG_USB_STORAGE=y
CONFIG_MMC=y
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PLTFM=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_M41T80=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
# CONFIG_DNOTIFY is not set
# CONFIG_INOTIFY_USER is not set
CONFIG_VFAT_FS=y
@@ -131,8 +118,7 @@ CONFIG_CRAMFS=y
CONFIG_NLS_DEFAULT="n"
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ISO8859_1=y
-CONFIG_DEBUG_INFO=y
-CONFIG_DEBUG_FS=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_XMON=y
@@ -142,7 +128,5 @@ CONFIG_PPC_EARLY_DEBUG_44x_PHYSLOW=0x00010000
CONFIG_PPC_EARLY_DEBUG_44x_PHYSHIGH=0x33f
CONFIG_CRYPTO_PCBC=y
CONFIG_CRYPTO_MD5=y
-CONFIG_CRYPTO_SHA1_PPC=y
CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/44x/arches_defconfig b/arch/powerpc/configs/44x/arches_defconfig
index 44355c53cd30..41d04e70d4fb 100644
--- a/arch/powerpc/configs/44x/arches_defconfig
+++ b/arch/powerpc/configs/44x/arches_defconfig
@@ -1,21 +1,16 @@
CONFIG_44x=y
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_EBONY is not set
CONFIG_ARCHES=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_SPARSE_IRQ=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
@@ -23,33 +18,20 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_CONNECTOR=y
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_OF_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=35000
-# CONFIG_MISC_DEVICES is not set
CONFIG_NETDEVICES=y
-CONFIG_ETHERNET=y
-CONFIG_NET_VENDOR_IBM=y
CONFIG_IBM_EMAC=y
CONFIG_IBM_EMAC_RXB=256
CONFIG_IBM_EMAC_TXB=256
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
# CONFIG_VT is not set
@@ -64,21 +46,15 @@ CONFIG_I2C=y
CONFIG_I2C_CHARDEV=y
CONFIG_I2C_IBM_IIC=y
CONFIG_SENSORS_AD7414=y
-CONFIG_VIDEO_OUTPUT_CONTROL=m
# CONFIG_USB_SUPPORT is not set
CONFIG_EXT2_FS=y
-CONFIG_INOTIFY=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_JFFS2_FS=y
CONFIG_CRAMFS=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_ROOT_NFS=y
-CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_FS=y
-CONFIG_DEBUG_KERNEL=y
+CONFIG_MAGIC_SYSRQ=y
CONFIG_DETECT_HUNG_TASK=y
# CONFIG_DEBUG_BUGVERBOSE is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
diff --git a/arch/powerpc/configs/44x/bamboo_defconfig b/arch/powerpc/configs/44x/bamboo_defconfig
index cef7d62560c4..acbce718eaa8 100644
--- a/arch/powerpc/configs/44x/bamboo_defconfig
+++ b/arch/powerpc/configs/44x/bamboo_defconfig
@@ -1,19 +1,14 @@
CONFIG_44x=y
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
CONFIG_BAMBOO=y
# CONFIG_EBONY is not set
-CONFIG_SPARSE_IRQ=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
@@ -21,19 +16,11 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_CONNECTOR=y
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=35000
CONFIG_NETDEVICES=y
-CONFIG_ETHERNET=y
-CONFIG_NET_VENDOR_IBM=y
CONFIG_IBM_EMAC=y
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
@@ -47,26 +34,18 @@ CONFIG_SERIAL_OF_PLATFORM=y
# CONFIG_HW_RANDOM is not set
# CONFIG_HWMON is not set
CONFIG_THERMAL=y
-CONFIG_VIDEO_OUTPUT_CONTROL=m
CONFIG_EXT2_FS=y
-CONFIG_INOTIFY=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_CRAMFS=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_ROOT_NFS=y
-CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_FS=y
-CONFIG_DEBUG_KERNEL=y
+CONFIG_MAGIC_SYSRQ=y
CONFIG_DETECT_HUNG_TASK=y
# CONFIG_DEBUG_BUGVERBOSE is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-CONFIG_CRYPTO=y
CONFIG_CRYPTO_CBC=y
CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_PCBC=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/44x/bluestone_defconfig b/arch/powerpc/configs/44x/bluestone_defconfig
index ca7f1f32f2b2..37088f250c9e 100644
--- a/arch/powerpc/configs/44x/bluestone_defconfig
+++ b/arch/powerpc/configs/44x/bluestone_defconfig
@@ -1,21 +1,16 @@
CONFIG_44x=y
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
CONFIG_EXPERT=y
-# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_PCI_QUIRKS is not set
+# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_COMPAT_BRK is not set
CONFIG_BLUESTONE=y
# CONFIG_EBONY is not set
-# CONFIG_KVM_GUEST is not set
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_SPARSE_IRQ=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
@@ -23,22 +18,16 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_CONNECTOR=y
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_OF_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=35000
CONFIG_NETDEVICES=y
-CONFIG_ETHERNET=y
-CONFIG_NET_VENDOR_IBM=y
CONFIG_IBM_EMAC=y
CONFIG_IBM_EMAC_RXB=256
CONFIG_IBM_EMAC_TXB=256
@@ -53,16 +42,14 @@ CONFIG_I2C=y
CONFIG_I2C_CHARDEV=y
CONFIG_I2C_IBM_IIC=y
CONFIG_SENSORS_AD7414=y
-# CONFIG_HID_SUPPORT is not set
# CONFIG_USB_SUPPORT is not set
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_M41T80=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
+CONFIG_EXT4_FS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_CRAMFS=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_ROOT_NFS=y
CONFIG_NLS=y
diff --git a/arch/powerpc/configs/44x/canyonlands_defconfig b/arch/powerpc/configs/44x/canyonlands_defconfig
index 9919a91add12..61776ade572b 100644
--- a/arch/powerpc/configs/44x/canyonlands_defconfig
+++ b/arch/powerpc/configs/44x/canyonlands_defconfig
@@ -1,21 +1,16 @@
CONFIG_44x=y
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_EBONY is not set
CONFIG_CANYONLANDS=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_SPARSE_IRQ=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
@@ -23,38 +18,22 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_CONNECTOR=y
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_OF_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_MTD_NAND=y
+CONFIG_MTD_RAW_NAND=y
CONFIG_MTD_NAND_NDFC=y
-CONFIG_PROC_DEVICETREE=y
-CONFIG_PM=y
-CONFIG_SUSPEND=y
-CONFIG_PPC4xx_CPM=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=35000
-# CONFIG_MISC_DEVICES is not set
CONFIG_NETDEVICES=y
-CONFIG_ETHERNET=y
-CONFIG_NET_VENDOR_IBM=y
CONFIG_IBM_EMAC=y
CONFIG_IBM_EMAC_RXB=256
CONFIG_IBM_EMAC_TXB=256
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
# CONFIG_VT is not set
@@ -76,21 +55,15 @@ CONFIG_USB_EHCI_HCD=m
CONFIG_USB_OHCI_HCD=y
CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
CONFIG_USB_OHCI_HCD_PPC_OF_LE=y
-CONFIG_USB_LIBUSUAL=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_M41T80=y
CONFIG_EXT2_FS=y
-CONFIG_INOTIFY=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_CRAMFS=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_ROOT_NFS=y
-CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_FS=y
-CONFIG_DEBUG_KERNEL=y
+CONFIG_MAGIC_SYSRQ=y
CONFIG_DETECT_HUNG_TASK=y
# CONFIG_DEBUG_BUGVERBOSE is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
diff --git a/arch/powerpc/configs/44x/currituck_defconfig b/arch/powerpc/configs/44x/currituck_defconfig
index 47de68261443..7283b7d4a1a5 100644
--- a/arch/powerpc/configs/44x/currituck_defconfig
+++ b/arch/powerpc/configs/44x/currituck_defconfig
@@ -1,14 +1,11 @@
CONFIG_44x=y
CONFIG_SMP=y
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
-CONFIG_SPARSE_IRQ=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_EXPERT=y
CONFIG_KALLSYMS_ALL=y
CONFIG_PROFILING=y
-CONFIG_OPROFILE=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
@@ -19,8 +16,6 @@ CONFIG_HIGHMEM=y
CONFIG_HZ_100=y
CONFIG_MATH_EMULATION=y
CONFIG_IRQ_ALL_CPUS=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
# CONFIG_SUSPEND is not set
CONFIG_NET=y
CONFIG_PACKET=y
@@ -29,22 +24,15 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_CONNECTOR=y
CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_JEDECPROBE=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=35000
# CONFIG_SCSI_PROC_FS is not set
@@ -56,7 +44,6 @@ CONFIG_SATA_SIL24=y
# CONFIG_ATA_SFF is not set
CONFIG_NETDEVICES=y
CONFIG_E1000E=y
-# CONFIG_NETDEV_10000 is not set
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
# CONFIG_VT is not set
@@ -76,34 +63,27 @@ CONFIG_USB_OHCI_HCD=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_M41T80=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_CRAMFS=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_NFS_V3_ACL=y
CONFIG_NFS_V4=y
CONFIG_NLS_DEFAULT="n"
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_FS=y
-CONFIG_DEBUG_KERNEL=y
CONFIG_DETECT_HUNG_TASK=y
-CONFIG_DEBUG_INFO=y
-CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_XMON=y
CONFIG_XMON_DEFAULT=y
CONFIG_PPC_EARLY_DEBUG=y
CONFIG_PPC_EARLY_DEBUG_44x_PHYSLOW=0x10000000
CONFIG_PPC_EARLY_DEBUG_44x_PHYSHIGH=0x200
-CONFIG_CRYPTO=y
CONFIG_CRYPTO_CBC=y
CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_PCBC=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/44x/ebony_defconfig b/arch/powerpc/configs/44x/ebony_defconfig
index 31b58b0d52e2..93d2a4e64af9 100644
--- a/arch/powerpc/configs/44x/ebony_defconfig
+++ b/arch/powerpc/configs/44x/ebony_defconfig
@@ -1,18 +1,14 @@
CONFIG_44x=y
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
CONFIG_KALLSYMS_ALL=y
-CONFIG_KALLSYMS_EXTRA_PASS=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
CONFIG_MATH_EMULATION=y
-CONFIG_SPARSE_IRQ=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
@@ -20,27 +16,17 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_CONNECTOR=y
CONFIG_MTD=y
-CONFIG_MTD_OF_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_JEDECPROBE=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=35000
CONFIG_NETDEVICES=y
-CONFIG_ETHERNET=y
-CONFIG_NET_VENDOR_IBM=y
CONFIG_IBM_EMAC=y
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
@@ -55,25 +41,18 @@ CONFIG_SERIAL_OF_PLATFORM=y
# CONFIG_HWMON is not set
CONFIG_THERMAL=y
CONFIG_EXT2_FS=y
-CONFIG_INOTIFY=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_JFFS2_FS=y
CONFIG_CRAMFS=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_ROOT_NFS=y
-CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_FS=y
-CONFIG_DEBUG_KERNEL=y
+CONFIG_MAGIC_SYSRQ=y
CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-CONFIG_CRYPTO=y
CONFIG_CRYPTO_CBC=y
CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_PCBC=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/44x/eiger_defconfig b/arch/powerpc/configs/44x/eiger_defconfig
index faccaf65f394..509300f400e2 100644
--- a/arch/powerpc/configs/44x/eiger_defconfig
+++ b/arch/powerpc/configs/44x/eiger_defconfig
@@ -1,21 +1,15 @@
CONFIG_44x=y
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
-# CONFIG_BLK_DEV_BSG is not set
# CONFIG_EBONY is not set
CONFIG_EIGER=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_SPARSE_IRQ=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
CONFIG_PCIEPORTBUS=y
# CONFIG_PCIEASPM is not set
CONFIG_NET=y
@@ -25,42 +19,28 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_CONNECTOR=y
CONFIG_MTD=y
-CONFIG_MTD_CONCAT=y
CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_OF_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_MTD_NAND=y
+CONFIG_MTD_RAW_NAND=y
CONFIG_MTD_NAND_NDFC=y
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=35000
-# CONFIG_MISC_DEVICES is not set
CONFIG_SCSI=y
CONFIG_BLK_DEV_SD=y
CONFIG_CHR_DEV_SG=y
CONFIG_FUSION=y
CONFIG_FUSION_SAS=y
-CONFIG_I2O=y
CONFIG_NETDEVICES=y
-CONFIG_ETHERNET=y
-CONFIG_NET_VENDOR_IBM=y
CONFIG_IBM_EMAC=y
CONFIG_IBM_EMAC_RXB=256
CONFIG_IBM_EMAC_TXB=256
CONFIG_E1000E=y
-# CONFIG_NETDEV_10000 is not set
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
# CONFIG_VT is not set
@@ -80,44 +60,32 @@ CONFIG_I2C_DEBUG_CORE=y
CONFIG_I2C_DEBUG_ALGO=y
CONFIG_I2C_DEBUG_BUS=y
# CONFIG_HWMON is not set
-CONFIG_VIDEO_OUTPUT_CONTROL=m
# CONFIG_USB_SUPPORT is not set
CONFIG_DMADEVICES=y
CONFIG_EXT2_FS=y
-CONFIG_INOTIFY=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_CRAMFS=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_ROOT_NFS=y
-CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_FS=y
-CONFIG_DEBUG_KERNEL=y
+CONFIG_MAGIC_SYSRQ=y
CONFIG_DETECT_HUNG_TASK=y
# CONFIG_DEBUG_BUGVERBOSE is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-CONFIG_CRYPTO=y
CONFIG_CRYPTO_CRYPTD=y
CONFIG_CRYPTO_AUTHENC=y
CONFIG_CRYPTO_CCM=y
CONFIG_CRYPTO_GCM=y
CONFIG_CRYPTO_CBC=y
CONFIG_CRYPTO_CTS=y
-CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_LRW=y
CONFIG_CRYPTO_PCBC=y
CONFIG_CRYPTO_XTS=y
-CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_XCBC=y
CONFIG_CRYPTO_MD4=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_SHA1=y
-CONFIG_CRYPTO_SHA256=y
CONFIG_CRYPTO_SHA512=y
-CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_ARC4=y
CONFIG_CRYPTO_BLOWFISH=y
CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/44x/fsp2_defconfig b/arch/powerpc/configs/44x/fsp2_defconfig
new file mode 100644
index 000000000000..5492537f4c6c
--- /dev/null
+++ b/arch/powerpc/configs/44x/fsp2_defconfig
@@ -0,0 +1,121 @@
+CONFIG_44x=y
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+# CONFIG_FHANDLE is not set
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=16
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_RD_LZMA is not set
+# CONFIG_RD_XZ is not set
+# CONFIG_RD_LZO is not set
+# CONFIG_RD_LZ4 is not set
+CONFIG_KALLSYMS_ALL=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_EXPERT=y
+CONFIG_PROFILING=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PPC_47x=y
+# CONFIG_EBONY is not set
+CONFIG_FSP2=y
+CONFIG_476FPE_ERR46=y
+CONFIG_SWIOTLB=y
+CONFIG_KEXEC=y
+CONFIG_CRASH_DUMP=y
+CONFIG_CMDLINE="ip=on rw"
+# CONFIG_SUSPEND is not set
+# CONFIG_PCI is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+# CONFIG_IPV6 is not set
+CONFIG_VLAN_8021Q=m
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_CONNECTOR=y
+CONFIG_MTD=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_JEDECPROBE=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=35000
+# CONFIG_SCSI_PROC_FS is not set
+CONFIG_BLK_DEV_SD=y
+# CONFIG_SCSI_LOWLEVEL is not set
+CONFIG_ATA=y
+# CONFIG_SATA_PMP is not set
+# CONFIG_ATA_SFF is not set
+CONFIG_NETDEVICES=y
+CONFIG_BONDING=m
+CONFIG_IBM_EMAC=m
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVMEM is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=32
+CONFIG_SERIAL_8250_RUNTIME_UARTS=32
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+CONFIG_I2C=y
+CONFIG_I2C_IBM_IIC=y
+CONFIG_PTP_1588_CLOCK=y
+# CONFIG_HWMON is not set
+CONFIG_THERMAL=y
+CONFIG_WATCHDOG=y
+CONFIG_BOOKE_WDT=y
+CONFIG_USB=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_MMC=y
+CONFIG_MMC_DEBUG=y
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PLTFM=y
+CONFIG_MMC_SDHCI_OF_ARASAN=y
+CONFIG_MMC_SDHCI_ST=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_M41T80=y
+CONFIG_RESET_CONTROLLER=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_JFFS2_FS=y
+CONFIG_JFFS2_FS_WBUF_VERIFY=y
+CONFIG_JFFS2_SUMMARY=y
+CONFIG_JFFS2_FS_XATTR=y
+CONFIG_CRAMFS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=y
+CONFIG_ROOT_NFS=y
+CONFIG_NLS_DEFAULT="n"
+CONFIG_XZ_DEC=y
+CONFIG_PRINTK_TIME=y
+CONFIG_MESSAGE_LOGLEVEL_DEFAULT=3
+CONFIG_DYNAMIC_DEBUG=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_CRYPTO_CBC=y
+CONFIG_CRYPTO_ECB=y
+CONFIG_CRYPTO_PCBC=y
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_DES=y
+# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/44x/icon_defconfig b/arch/powerpc/configs/44x/icon_defconfig
index 05782c145141..fb9a15573546 100644
--- a/arch/powerpc/configs/44x/icon_defconfig
+++ b/arch/powerpc/configs/44x/icon_defconfig
@@ -1,21 +1,14 @@
CONFIG_44x=y
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_LOG_BUF_SHIFT=14
-CONFIG_SYSFS_DEPRECATED_V2=y
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
-# CONFIG_BLK_DEV_BSG is not set
# CONFIG_EBONY is not set
CONFIG_ICON=y
CONFIG_HIGHMEM=y
-CONFIG_SPARSE_IRQ=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
CONFIG_PCIEPORTBUS=y
# CONFIG_PCIEASPM is not set
CONFIG_NET=y
@@ -25,26 +18,16 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_CONNECTOR=y
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_OF_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=35000
-CONFIG_XILINX_SYSACE=y
-# CONFIG_MISC_DEVICES is not set
CONFIG_SCSI=y
CONFIG_BLK_DEV_SD=y
CONFIG_SCSI_CONSTANTS=y
@@ -55,14 +38,8 @@ CONFIG_FUSION_SAS=y
CONFIG_FUSION_CTL=y
CONFIG_FUSION_LOGGING=y
CONFIG_NETDEVICES=y
-CONFIG_ETHERNET=y
-CONFIG_NET_VENDOR_IBM=y
CONFIG_IBM_EMAC=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
# CONFIG_WLAN is not set
-CONFIG_INPUT_MOUSEDEV_SCREEN_X=640
-CONFIG_INPUT_MOUSEDEV_SCREEN_Y=480
# CONFIG_MOUSE_PS2_ALPS is not set
# CONFIG_MOUSE_PS2_LOGIPS2PP is not set
# CONFIG_MOUSE_PS2_SYNAPTICS is not set
@@ -79,7 +56,6 @@ CONFIG_I2C_CHARDEV=y
CONFIG_I2C_IBM_IIC=y
# CONFIG_HWMON is not set
CONFIG_MFD_SM501=y
-CONFIG_VIDEO_OUTPUT_CONTROL=m
CONFIG_FB=y
CONFIG_FB_SM501=y
CONFIG_FRAMEBUFFER_CONSOLE=y
@@ -90,30 +66,22 @@ CONFIG_LOGO=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_DS1307=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_INOTIFY=y
+CONFIG_EXT4_FS=y
CONFIG_VFAT_FS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_CRAMFS=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_ROOT_NFS=y
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_CODEPAGE_850=y
CONFIG_NLS_ISO8859_1=y
CONFIG_NLS_ISO8859_15=y
CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_KERNEL=y
CONFIG_DETECT_HUNG_TASK=y
# CONFIG_DEBUG_BUGVERBOSE is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-CONFIG_CRYPTO=y
CONFIG_CRYPTO_CBC=y
CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_PCBC=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/44x/iss476-smp_defconfig b/arch/powerpc/configs/44x/iss476-smp_defconfig
index 49a1518a4e69..0f6380e1e612 100644
--- a/arch/powerpc/configs/44x/iss476-smp_defconfig
+++ b/arch/powerpc/configs/44x/iss476-smp_defconfig
@@ -1,17 +1,12 @@
CONFIG_44x=y
CONFIG_SMP=y
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
-CONFIG_SPARSE_IRQ=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
CONFIG_KALLSYMS_ALL=y
-CONFIG_KALLSYMS_EXTRA_PASS=y
CONFIG_PROFILING=y
-CONFIG_OPROFILE=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
@@ -21,11 +16,9 @@ CONFIG_ISS4xx=y
CONFIG_HZ_100=y
CONFIG_MATH_EMULATION=y
CONFIG_IRQ_ALL_CPUS=y
-CONFIG_CMDLINE_BOOL=y
CONFIG_CMDLINE="root=/dev/issblk0"
# CONFIG_PCI is not set
CONFIG_ADVANCED_OPTIONS=y
-CONFIG_NONSTATIC_KERNEL=y
CONFIG_DYNAMIC_MEMSTART=y
CONFIG_NET=y
CONFIG_PACKET=y
@@ -34,21 +27,13 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_CONNECTOR=y
CONFIG_MTD=y
-CONFIG_MTD_OF_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_JEDECPROBE=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=35000
# CONFIG_INPUT is not set
@@ -64,27 +49,20 @@ CONFIG_SERIAL_OF_PLATFORM=y
CONFIG_THERMAL=y
# CONFIG_USB_SUPPORT is not set
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_CRAMFS=y
# CONFIG_NETWORK_FILESYSTEMS is not set
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_FS=y
-CONFIG_DEBUG_KERNEL=y
CONFIG_DETECT_HUNG_TASK=y
-CONFIG_DEBUG_INFO=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_PPC_EARLY_DEBUG=y
-CONFIG_CRYPTO=y
CONFIG_CRYPTO_CBC=y
CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_PCBC=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/44x/katmai_defconfig b/arch/powerpc/configs/44x/katmai_defconfig
index f1137972ed41..1a0f1c3e0ee9 100644
--- a/arch/powerpc/configs/44x/katmai_defconfig
+++ b/arch/powerpc/configs/44x/katmai_defconfig
@@ -1,19 +1,14 @@
CONFIG_44x=y
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_EBONY is not set
CONFIG_KATMAI=y
-CONFIG_SPARSE_IRQ=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
@@ -21,28 +16,18 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_CONNECTOR=y
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_OF_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=35000
CONFIG_MACINTOSH_DRIVERS=y
CONFIG_NETDEVICES=y
-CONFIG_ETHERNET=y
-CONFIG_NET_VENDOR_IBM=y
CONFIG_IBM_EMAC=y
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
@@ -55,25 +40,17 @@ CONFIG_SERIAL_8250_SHARE_IRQ=y
CONFIG_SERIAL_OF_PLATFORM=y
# CONFIG_HW_RANDOM is not set
# CONFIG_HWMON is not set
-CONFIG_VIDEO_OUTPUT_CONTROL=m
CONFIG_EXT2_FS=y
-CONFIG_INOTIFY=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_CRAMFS=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_ROOT_NFS=y
CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_KERNEL=y
CONFIG_DETECT_HUNG_TASK=y
# CONFIG_DEBUG_BUGVERBOSE is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-CONFIG_CRYPTO=y
CONFIG_CRYPTO_CBC=y
CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_PCBC=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/44x/rainier_defconfig b/arch/powerpc/configs/44x/rainier_defconfig
index 4b91a44c4c32..6dd67de06a0b 100644
--- a/arch/powerpc/configs/44x/rainier_defconfig
+++ b/arch/powerpc/configs/44x/rainier_defconfig
@@ -1,10 +1,8 @@
CONFIG_44x=y
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
@@ -12,9 +10,6 @@ CONFIG_MODULE_UNLOAD=y
# CONFIG_EBONY is not set
CONFIG_RAINIER=y
CONFIG_MATH_EMULATION=y
-CONFIG_SPARSE_IRQ=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
@@ -22,23 +17,15 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_CONNECTOR=y
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_OF_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_CFI=y
CONFIG_MTD_JEDECPROBE=y
CONFIG_MTD_CFI_INTELEXT=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=35000
CONFIG_MACINTOSH_DRIVERS=y
@@ -55,29 +42,21 @@ CONFIG_SERIAL_OF_PLATFORM=y
# CONFIG_HW_RANDOM is not set
# CONFIG_HWMON is not set
CONFIG_THERMAL=y
-CONFIG_VIDEO_OUTPUT_CONTROL=m
CONFIG_EXT2_FS=y
-CONFIG_INOTIFY=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_JFFS2_FS=y
CONFIG_CRAMFS=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_ROOT_NFS=y
-CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_FS=y
-CONFIG_DEBUG_KERNEL=y
+CONFIG_MAGIC_SYSRQ=y
CONFIG_DETECT_HUNG_TASK=y
# CONFIG_DEBUG_BUGVERBOSE is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_PPC_EARLY_DEBUG=y
CONFIG_PPC_EARLY_DEBUG_44x_PHYSLOW=0xef600300
-CONFIG_CRYPTO=y
CONFIG_CRYPTO_CBC=y
CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_PCBC=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/44x/redwood_defconfig b/arch/powerpc/configs/44x/redwood_defconfig
index b7113e114a14..e28d76416537 100644
--- a/arch/powerpc/configs/44x/redwood_defconfig
+++ b/arch/powerpc/configs/44x/redwood_defconfig
@@ -1,21 +1,15 @@
CONFIG_44x=y
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
-# CONFIG_BLK_DEV_BSG is not set
# CONFIG_EBONY is not set
CONFIG_REDWOOD=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_SPARSE_IRQ=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
CONFIG_PCIEPORTBUS=y
# CONFIG_PCIEASPM is not set
CONFIG_NET=y
@@ -25,41 +19,27 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_CONNECTOR=y
CONFIG_MTD=y
-CONFIG_MTD_CONCAT=y
CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_OF_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=35000
-# CONFIG_MISC_DEVICES is not set
CONFIG_SCSI=y
CONFIG_BLK_DEV_SD=y
CONFIG_CHR_DEV_SG=y
CONFIG_FUSION=y
CONFIG_FUSION_SAS=y
-CONFIG_I2O=y
CONFIG_NETDEVICES=y
-CONFIG_ETHERNET=y
-CONFIG_NET_VENDOR_IBM=y
CONFIG_IBM_EMAC=y
CONFIG_IBM_EMAC_RXB=256
CONFIG_IBM_EMAC_TXB=256
CONFIG_IBM_EMAC_DEBUG=y
CONFIG_E1000E=y
-# CONFIG_NETDEV_10000 is not set
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
# CONFIG_VT is not set
@@ -79,44 +59,32 @@ CONFIG_I2C_DEBUG_CORE=y
CONFIG_I2C_DEBUG_ALGO=y
CONFIG_I2C_DEBUG_BUS=y
# CONFIG_HWMON is not set
-CONFIG_VIDEO_OUTPUT_CONTROL=m
# CONFIG_USB_SUPPORT is not set
CONFIG_DMADEVICES=y
CONFIG_EXT2_FS=y
-CONFIG_INOTIFY=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_CRAMFS=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_ROOT_NFS=y
-CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_FS=y
-CONFIG_DEBUG_KERNEL=y
+CONFIG_MAGIC_SYSRQ=y
CONFIG_DETECT_HUNG_TASK=y
# CONFIG_DEBUG_BUGVERBOSE is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-CONFIG_CRYPTO=y
CONFIG_CRYPTO_CRYPTD=y
CONFIG_CRYPTO_AUTHENC=y
CONFIG_CRYPTO_CCM=y
CONFIG_CRYPTO_GCM=y
CONFIG_CRYPTO_CBC=y
CONFIG_CRYPTO_CTS=y
-CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_LRW=y
CONFIG_CRYPTO_PCBC=y
CONFIG_CRYPTO_XTS=y
-CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_XCBC=y
CONFIG_CRYPTO_MD4=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_SHA1=y
-CONFIG_CRYPTO_SHA256=y
CONFIG_CRYPTO_SHA512=y
-CONFIG_CRYPTO_AES=y
CONFIG_CRYPTO_ARC4=y
CONFIG_CRYPTO_BLOWFISH=y
CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/44x/sam440ep_defconfig b/arch/powerpc/configs/44x/sam440ep_defconfig
index 9622eb2a3e37..98221bda380d 100644
--- a/arch/powerpc/configs/44x/sam440ep_defconfig
+++ b/arch/powerpc/configs/44x/sam440ep_defconfig
@@ -1,22 +1,17 @@
CONFIG_44x=y
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_IKCONFIG=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_AMIGA_PARTITION=y
# CONFIG_EBONY is not set
CONFIG_SAM440EP=y
-CONFIG_SPARSE_IRQ=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
@@ -24,18 +19,11 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_CONNECTOR=y
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=35000
-# CONFIG_MISC_DEVICES is not set
CONFIG_BLK_DEV_SD=y
CONFIG_BLK_DEV_SR=y
CONFIG_CHR_DEV_SG=y
@@ -44,11 +32,7 @@ CONFIG_ATA=y
# CONFIG_SATA_PMP is not set
CONFIG_SATA_SIL=y
CONFIG_NETDEVICES=y
-CONFIG_ETHERNET=y
-CONFIG_NET_VENDOR_IBM=y
CONFIG_IBM_EMAC=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
CONFIG_INPUT_FF_MEMLESS=m
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
@@ -59,7 +43,6 @@ CONFIG_SERIAL_OF_PLATFORM=y
# CONFIG_HW_RANDOM is not set
CONFIG_I2C_IBM_IIC=y
# CONFIG_HWMON is not set
-CONFIG_VIDEO_OUTPUT_CONTROL=y
CONFIG_FB=y
CONFIG_FB_RADEON=y
CONFIG_LCD_CLASS_DEVICE=y
@@ -80,10 +63,8 @@ CONFIG_HID_MONTEREY=y
CONFIG_HID_PANTHERLORD=y
CONFIG_HID_PETALYNX=y
CONFIG_HID_SAMSUNG=y
-CONFIG_HID_SONY=y
CONFIG_HID_SUNPLUS=y
CONFIG_USB=y
-# CONFIG_USB_DEVICE_CLASS is not set
CONFIG_USB_EHCI_HCD=m
CONFIG_USB_OHCI_HCD=y
CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
@@ -95,12 +76,9 @@ CONFIG_RTC_DRV_M41T80_WDT=y
CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_REISERFS_FS=y
-CONFIG_INOTIFY=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_AUTOFS_FS=y
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
CONFIG_ZISOFS=y
@@ -111,11 +89,6 @@ CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_AFFS_FS=m
# CONFIG_NETWORK_FILESYSTEMS is not set
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_AMIGA_PARTITION=y
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ISO8859_1=y
-CONFIG_CRC_T10DIF=y
CONFIG_MAGIC_SYSRQ=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
diff --git a/arch/powerpc/configs/44x/sequoia_defconfig b/arch/powerpc/configs/44x/sequoia_defconfig
index 9642d99b47f1..b4984eab43eb 100644
--- a/arch/powerpc/configs/44x/sequoia_defconfig
+++ b/arch/powerpc/configs/44x/sequoia_defconfig
@@ -1,21 +1,16 @@
CONFIG_44x=y
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_EBONY is not set
CONFIG_SEQUOIA=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_SPARSE_IRQ=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
@@ -23,30 +18,20 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_CONNECTOR=y
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_OF_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_CFI=y
CONFIG_MTD_JEDECPROBE=y
CONFIG_MTD_CFI_INTELEXT=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_MTD_NAND=y
+CONFIG_MTD_RAW_NAND=y
CONFIG_MTD_NAND_NDFC=y
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=35000
CONFIG_NETDEVICES=y
-CONFIG_ETHERNET=y
-CONFIG_NET_VENDOR_IBM=y
CONFIG_IBM_EMAC=y
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
@@ -60,27 +45,19 @@ CONFIG_SERIAL_OF_PLATFORM=y
# CONFIG_HW_RANDOM is not set
# CONFIG_HWMON is not set
CONFIG_THERMAL=y
-CONFIG_VIDEO_OUTPUT_CONTROL=m
CONFIG_EXT2_FS=y
-CONFIG_INOTIFY=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_JFFS2_FS=y
CONFIG_CRAMFS=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_ROOT_NFS=y
-CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_FS=y
-CONFIG_DEBUG_KERNEL=y
+CONFIG_MAGIC_SYSRQ=y
CONFIG_DETECT_HUNG_TASK=y
# CONFIG_DEBUG_BUGVERBOSE is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-CONFIG_CRYPTO=y
CONFIG_CRYPTO_CBC=y
CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_PCBC=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/44x/taishan_defconfig b/arch/powerpc/configs/44x/taishan_defconfig
index 09e3075030bf..3ea5932ab852 100644
--- a/arch/powerpc/configs/44x/taishan_defconfig
+++ b/arch/powerpc/configs/44x/taishan_defconfig
@@ -1,19 +1,14 @@
CONFIG_44x=y
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_EBONY is not set
CONFIG_TAISHAN=y
-CONFIG_SPARSE_IRQ=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
@@ -21,26 +16,17 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_CONNECTOR=y
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=35000
CONFIG_MACINTOSH_DRIVERS=y
CONFIG_NETDEVICES=y
-CONFIG_ETHERNET=y
-CONFIG_NET_VENDOR_IBM=y
CONFIG_IBM_EMAC=y
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
@@ -54,26 +40,18 @@ CONFIG_SERIAL_OF_PLATFORM=y
# CONFIG_HW_RANDOM is not set
# CONFIG_HWMON is not set
CONFIG_THERMAL=y
-CONFIG_VIDEO_OUTPUT_CONTROL=m
CONFIG_EXT2_FS=y
-CONFIG_INOTIFY=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_CRAMFS=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_ROOT_NFS=y
-CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_FS=y
-CONFIG_DEBUG_KERNEL=y
+CONFIG_MAGIC_SYSRQ=y
CONFIG_DETECT_HUNG_TASK=y
# CONFIG_DEBUG_BUGVERBOSE is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-CONFIG_CRYPTO=y
CONFIG_CRYPTO_CBC=y
CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_PCBC=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/44x/virtex5_defconfig b/arch/powerpc/configs/44x/virtex5_defconfig
deleted file mode 100644
index 1eb3caf828a5..000000000000
--- a/arch/powerpc/configs/44x/virtex5_defconfig
+++ /dev/null
@@ -1,90 +0,0 @@
-CONFIG_44x=y
-CONFIG_EXPERIMENTAL=y
-# CONFIG_LOCALVERSION_AUTO is not set
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-CONFIG_SLAB=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODULE_FORCE_UNLOAD=y
-CONFIG_MODVERSIONS=y
-# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_EBONY is not set
-CONFIG_XILINX_VIRTEX440_GENERIC_BOARD=y
-CONFIG_PREEMPT=y
-CONFIG_MATH_EMULATION=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_LRO is not set
-CONFIG_NETFILTER=y
-CONFIG_IP_NF_IPTABLES=m
-CONFIG_IP_NF_FILTER=m
-CONFIG_IP_NF_MANGLE=m
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-CONFIG_PROC_DEVICETREE=y
-CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=8192
-CONFIG_XILINX_SYSACE=y
-CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-CONFIG_MII=y
-# CONFIG_NETDEV_10000 is not set
-# CONFIG_SERIO_I8042 is not set
-# CONFIG_SERIO_SERPORT is not set
-CONFIG_SERIO_XILINX_XPS_PS2=y
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_UARTLITE=y
-CONFIG_SERIAL_UARTLITE_CONSOLE=y
-CONFIG_SERIAL_OF_PLATFORM=y
-CONFIG_XILINX_HWICAP=y
-CONFIG_GPIOLIB=y
-CONFIG_GPIO_SYSFS=y
-CONFIG_GPIO_XILINX=y
-# CONFIG_HWMON is not set
-CONFIG_FB=y
-CONFIG_FB_XILINX=y
-CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_FONTS=y
-CONFIG_FONT_8x8=y
-CONFIG_FONT_8x16=y
-CONFIG_LOGO=y
-# CONFIG_HID_SUPPORT is not set
-# CONFIG_USB_SUPPORT is not set
-CONFIG_EXT2_FS=y
-CONFIG_INOTIFY=y
-CONFIG_AUTOFS_FS=y
-CONFIG_AUTOFS4_FS=y
-CONFIG_MSDOS_FS=y
-CONFIG_VFAT_FS=y
-CONFIG_TMPFS=y
-CONFIG_CRAMFS=y
-CONFIG_ROMFS_FS=y
-CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
-CONFIG_ROOT_NFS=y
-CONFIG_NLS_CODEPAGE_437=y
-CONFIG_NLS_ASCII=m
-CONFIG_NLS_ISO8859_1=m
-CONFIG_NLS_UTF8=m
-CONFIG_CRC_CCITT=y
-CONFIG_PRINTK_TIME=y
-CONFIG_DEBUG_KERNEL=y
-CONFIG_DEBUG_INFO=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/44x/warp_defconfig b/arch/powerpc/configs/44x/warp_defconfig
index 551e50a0be5e..5757625469c4 100644
--- a/arch/powerpc/configs/44x/warp_defconfig
+++ b/arch/powerpc/configs/44x/warp_defconfig
@@ -1,5 +1,4 @@
CONFIG_44x=y
-CONFIG_EXPERIMENTAL=y
CONFIG_LOCALVERSION="-pika"
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_SYSVIPC=y
@@ -7,7 +6,6 @@ CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
@@ -16,8 +14,6 @@ CONFIG_MODULE_UNLOAD=y
CONFIG_WARP=y
CONFIG_PPC4xx_GPIO=y
CONFIG_HZ_1000=y
-CONFIG_SPARSE_IRQ=y
-CONFIG_CMDLINE_BOOL=y
CONFIG_CMDLINE="ip=on"
# CONFIG_PCI is not set
CONFIG_NET=y
@@ -26,39 +22,27 @@ CONFIG_UNIX=y
CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
CONFIG_NETFILTER=y
CONFIG_VLAN_8021Q=y
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_STANDALONE is not set
-# CONFIG_FIRMWARE_IN_KERNEL is not set
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_OF_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_MTD_NAND=y
+CONFIG_MTD_RAW_NAND=y
CONFIG_MTD_NAND_NDFC=y
CONFIG_MTD_UBI=y
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_RAM=y
-CONFIG_MISC_DEVICES=y
CONFIG_EEPROM_AT24=y
CONFIG_SCSI=y
CONFIG_BLK_DEV_SD=y
CONFIG_SCSI_SPI_ATTRS=y
# CONFIG_SCSI_LOWLEVEL is not set
CONFIG_NETDEVICES=y
-CONFIG_ETHERNET=y
-CONFIG_NET_VENDOR_IBM=y
-CONFIG_MII=y
CONFIG_IBM_EMAC=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
# CONFIG_VT is not set
@@ -72,7 +56,6 @@ CONFIG_I2C_IBM_IIC=y
CONFIG_GPIO_SYSFS=y
CONFIG_SENSORS_AD7414=y
CONFIG_THERMAL=y
-CONFIG_THERMAL_HWMON=y
CONFIG_WATCHDOG=y
CONFIG_USB=y
CONFIG_USB_MON=y
@@ -83,14 +66,10 @@ CONFIG_MMC=y
CONFIG_NEW_LEDS=y
CONFIG_LEDS_CLASS=y
CONFIG_LEDS_GPIO=y
-# CONFIG_LEDS_GPIO_PLATFORM is not set
CONFIG_LEDS_TRIGGERS=y
CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-# CONFIG_EXT3_FS_XATTR is not set
-CONFIG_INOTIFY=y
+CONFIG_EXT4_FS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_PROC_KCORE=y
@@ -99,7 +78,6 @@ CONFIG_JFFS2_FS=y
CONFIG_UBIFS_FS=y
CONFIG_CRAMFS=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_ROOT_NFS=y
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_CODEPAGE_850=y
@@ -107,16 +85,10 @@ CONFIG_NLS_ASCII=y
CONFIG_NLS_ISO8859_1=y
CONFIG_NLS_ISO8859_15=y
CONFIG_NLS_UTF8=y
-CONFIG_CRC_CCITT=y
-CONFIG_CRC_T10DIF=y
CONFIG_PRINTK_TIME=y
-CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
CONFIG_DEBUG_FS=y
-CONFIG_DEBUG_KERNEL=y
+CONFIG_MAGIC_SYSRQ=y
CONFIG_DETECT_HUNG_TASK=y
# CONFIG_SCHED_DEBUG is not set
# CONFIG_DEBUG_BUGVERBOSE is not set
-CONFIG_DEBUG_INFO=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/52xx/cm5200_defconfig b/arch/powerpc/configs/52xx/cm5200_defconfig
index 0dc99e141035..2412a6bf7ee6 100644
--- a/arch/powerpc/configs/52xx/cm5200_defconfig
+++ b/arch/powerpc/configs/52xx/cm5200_defconfig
@@ -1,18 +1,15 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
-# CONFIG_SYSCTL_SYSCALL is not set
# CONFIG_KALLSYMS is not set
# CONFIG_EPOLL is not set
# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
# CONFIG_PPC_CHRP is not set
CONFIG_PPC_MPC52xx=y
CONFIG_PPC_MPC5200_SIMPLE=y
# CONFIG_PPC_PMAC is not set
-CONFIG_SPARSE_IRQ=y
CONFIG_PM=y
# CONFIG_PCI is not set
CONFIG_NET=y
@@ -25,32 +22,24 @@ CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=32768
-# CONFIG_MISC_DEVICES is not set
CONFIG_SCSI=y
CONFIG_BLK_DEV_SD=y
CONFIG_CHR_DEV_SG=y
# CONFIG_SCSI_LOWLEVEL is not set
CONFIG_NETDEVICES=y
-CONFIG_LXT_PHY=y
-CONFIG_NET_ETHERNET=y
CONFIG_FEC_MPC52xx=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
+CONFIG_LXT_PHY=y
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
# CONFIG_VT is not set
@@ -64,16 +53,13 @@ CONFIG_I2C_MPC=y
# CONFIG_HWMON is not set
CONFIG_WATCHDOG=y
CONFIG_USB=y
-# CONFIG_USB_DEVICE_CLASS is not set
CONFIG_USB_OHCI_HCD=y
CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
CONFIG_USB_STORAGE=y
CONFIG_DMADEVICES=y
CONFIG_PPC_BESTCOMM=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_INOTIFY=y
+CONFIG_EXT4_FS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_PROC_KCORE=y
@@ -81,17 +67,12 @@ CONFIG_TMPFS=y
CONFIG_JFFS2_FS=y
CONFIG_CRAMFS=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
-CONFIG_PARTITION_ADVANCED=y
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ISO8859_1=y
CONFIG_PRINTK_TIME=y
-CONFIG_DEBUG_KERNEL=y
CONFIG_DETECT_HUNG_TASK=y
# CONFIG_DEBUG_BUGVERBOSE is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_PCBC=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/52xx/lite5200b_defconfig b/arch/powerpc/configs/52xx/lite5200b_defconfig
index 104a332e79ab..7db479dcbc0c 100644
--- a/arch/powerpc/configs/52xx/lite5200b_defconfig
+++ b/arch/powerpc/configs/52xx/lite5200b_defconfig
@@ -1,10 +1,9 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
-# CONFIG_SYSCTL_SYSCALL is not set
# CONFIG_KALLSYMS is not set
# CONFIG_EPOLL is not set
CONFIG_MODULES=y
@@ -15,10 +14,7 @@ CONFIG_PPC_MPC52xx=y
CONFIG_PPC_MPC5200_SIMPLE=y
CONFIG_PPC_LITE5200=y
# CONFIG_PPC_PMAC is not set
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_SPARSE_IRQ=y
-CONFIG_PM=y
+CONFIG_GEN_RTC=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
@@ -29,11 +25,8 @@ CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=32768
@@ -42,9 +35,8 @@ CONFIG_BLK_DEV_SD=y
CONFIG_ATA=y
CONFIG_PATA_MPC52xx=y
CONFIG_NETDEVICES=y
-CONFIG_LXT_PHY=y
-CONFIG_NET_ETHERNET=y
CONFIG_FEC_MPC52xx=y
+CONFIG_LXT_PHY=y
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
# CONFIG_VT is not set
@@ -52,28 +44,20 @@ CONFIG_SERIAL_MPC52xx=y
CONFIG_SERIAL_MPC52xx_CONSOLE=y
CONFIG_SERIAL_MPC52xx_CONSOLE_BAUD=115200
# CONFIG_HW_RANDOM is not set
-CONFIG_GEN_RTC=y
CONFIG_I2C=y
CONFIG_I2C_CHARDEV=y
CONFIG_I2C_MPC=y
# CONFIG_HWMON is not set
-CONFIG_VIDEO_OUTPUT_CONTROL=m
CONFIG_DMADEVICES=y
CONFIG_PPC_BESTCOMM=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_INOTIFY=y
+CONFIG_EXT4_FS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
CONFIG_PRINTK_TIME=y
-CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
CONFIG_DETECT_HUNG_TASK=y
# CONFIG_DEBUG_BUGVERBOSE is not set
-CONFIG_DEBUG_INFO=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/52xx/motionpro_defconfig b/arch/powerpc/configs/52xx/motionpro_defconfig
index c936fab9ec4a..6186ead1e105 100644
--- a/arch/powerpc/configs/52xx/motionpro_defconfig
+++ b/arch/powerpc/configs/52xx/motionpro_defconfig
@@ -1,18 +1,15 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
-# CONFIG_SYSCTL_SYSCALL is not set
# CONFIG_KALLSYMS is not set
# CONFIG_EPOLL is not set
# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
# CONFIG_PPC_CHRP is not set
CONFIG_PPC_MPC52xx=y
CONFIG_PPC_MPC5200_SIMPLE=y
# CONFIG_PPC_PMAC is not set
-CONFIG_SPARSE_IRQ=y
CONFIG_PM=y
# CONFIG_PCI is not set
CONFIG_NET=y
@@ -25,44 +22,34 @@ CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
-CONFIG_MTD_CONCAT=y
CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_ROM=y
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_MISC_DEVICES=y
CONFIG_EEPROM_LEGACY=y
CONFIG_BLK_DEV_SD=y
CONFIG_CHR_DEV_SG=y
CONFIG_ATA=y
CONFIG_PATA_MPC52xx=y
CONFIG_NETDEVICES=y
-CONFIG_MARVELL_PHY=y
+CONFIG_FEC_MPC52xx=y
+CONFIG_MDIO_BITBANG=y
+CONFIG_BROADCOM_PHY=y
+CONFIG_CICADA_PHY=y
CONFIG_DAVICOM_PHY=y
-CONFIG_QSEMI_PHY=y
+CONFIG_ICPLUS_PHY=y
CONFIG_LXT_PHY=y
-CONFIG_CICADA_PHY=y
-CONFIG_VITESSE_PHY=y
+CONFIG_MARVELL_PHY=y
+CONFIG_QSEMI_PHY=y
CONFIG_SMSC_PHY=y
-CONFIG_BROADCOM_PHY=y
-CONFIG_ICPLUS_PHY=y
-CONFIG_MDIO_BITBANG=y
-CONFIG_NET_ETHERNET=y
-CONFIG_MII=y
-CONFIG_FEC_MPC52xx=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
+CONFIG_VITESSE_PHY=y
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
# CONFIG_VT is not set
@@ -84,9 +71,7 @@ CONFIG_RTC_DRV_DS1307=y
CONFIG_DMADEVICES=y
CONFIG_PPC_BESTCOMM=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_INOTIFY=y
+CONFIG_EXT4_FS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_PROC_KCORE=y
@@ -94,18 +79,13 @@ CONFIG_TMPFS=y
CONFIG_JFFS2_FS=y
CONFIG_CRAMFS=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
-CONFIG_PARTITION_ADVANCED=y
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ISO8859_1=y
CONFIG_PRINTK_TIME=y
-CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
CONFIG_DETECT_HUNG_TASK=y
# CONFIG_DEBUG_BUGVERBOSE is not set
-CONFIG_DEBUG_INFO=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_PCBC=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/52xx/pcm030_defconfig b/arch/powerpc/configs/52xx/pcm030_defconfig
index 1d03c35540c7..88fbe0d42e11 100644
--- a/arch/powerpc/configs/52xx/pcm030_defconfig
+++ b/arch/powerpc/configs/52xx/pcm030_defconfig
@@ -1,31 +1,24 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_LOCALVERSION="trunk"
# CONFIG_LOCALVERSION_AUTO is not set
# CONFIG_SWAP is not set
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
-# CONFIG_SYSCTL_SYSCALL is not set
# CONFIG_VM_EVENT_COUNTERS is not set
-CONFIG_SLAB=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
# CONFIG_PPC_CHRP is not set
CONFIG_PPC_MPC52xx=y
CONFIG_PPC_MPC5200_SIMPLE=y
# CONFIG_PPC_PMAC is not set
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
CONFIG_HZ_100=y
CONFIG_PREEMPT=y
-CONFIG_SPARSE_IRQ=y
# CONFIG_SECCOMP is not set
CONFIG_NET=y
CONFIG_PACKET=y
@@ -35,50 +28,36 @@ CONFIG_IP_MULTICAST=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_INET_DIAG is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_INTELEXT=y
CONFIG_MTD_PHYSMAP=y
-CONFIG_PROC_DEVICETREE=y
# CONFIG_BLK_DEV is not set
-# CONFIG_MISC_DEVICES is not set
# CONFIG_SCSI_PROC_FS is not set
CONFIG_BLK_DEV_SD=m
# CONFIG_SCSI_LOWLEVEL is not set
CONFIG_ATA=m
CONFIG_PATA_MPC52xx=m
CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-CONFIG_MII=y
CONFIG_FEC_MPC52xx=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
CONFIG_SERIAL_MPC52xx=y
CONFIG_SERIAL_MPC52xx_CONSOLE=y
-# CONFIG_LEGACY_PTYS is not set
CONFIG_HW_RANDOM=y
CONFIG_I2C=y
CONFIG_I2C_CHARDEV=y
CONFIG_I2C_MPC=y
# CONFIG_HWMON is not set
CONFIG_USB=y
-# CONFIG_USB_DEVICE_CLASS is not set
CONFIG_USB_OHCI_HCD=m
-# CONFIG_USB_OHCI_HCD_PPC_SOC is not set
CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
# CONFIG_USB_OHCI_HCD_PCI is not set
CONFIG_USB_STORAGE=m
@@ -87,16 +66,13 @@ CONFIG_RTC_DRV_PCF8563=m
CONFIG_DMADEVICES=y
CONFIG_PPC_BESTCOMM=y
CONFIG_EXT2_FS=m
-CONFIG_EXT3_FS=m
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT4_FS=m
# CONFIG_DNOTIFY is not set
CONFIG_VFAT_FS=m
CONFIG_FAT_DEFAULT_CODEPAGE=850
CONFIG_TMPFS=y
CONFIG_JFFS2_FS=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_ROOT_NFS=y
CONFIG_NLS_CODEPAGE_850=y
CONFIG_NLS_ISO8859_1=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
diff --git a/arch/powerpc/configs/52xx/tqm5200_defconfig b/arch/powerpc/configs/52xx/tqm5200_defconfig
index ca83ec88b114..688f703d8e22 100644
--- a/arch/powerpc/configs/52xx/tqm5200_defconfig
+++ b/arch/powerpc/configs/52xx/tqm5200_defconfig
@@ -1,17 +1,14 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
-CONFIG_SPARSE_IRQ=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-CONFIG_EMBEDDED=y
-# CONFIG_SYSCTL_SYSCALL is not set
# CONFIG_KALLSYMS is not set
# CONFIG_EPOLL is not set
+CONFIG_EXPERT=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODVERSIONS=y
# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
# CONFIG_PPC_CHRP is not set
CONFIG_PPC_MPC52xx=y
CONFIG_PPC_MPC5200_SIMPLE=y
@@ -29,22 +26,16 @@ CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
-CONFIG_MTD_CONCAT=y
CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_OF_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_ROM=y
CONFIG_MTD_PHYSMAP_OF=y
CONFIG_MTD_PLATRAM=y
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=32768
@@ -54,12 +45,8 @@ CONFIG_ATA=y
CONFIG_PATA_MPC52xx=y
CONFIG_PATA_PLATFORM=y
CONFIG_NETDEVICES=y
-CONFIG_LXT_PHY=y
-CONFIG_FIXED_PHY=y
-CONFIG_NET_ETHERNET=y
CONFIG_FEC_MPC52xx=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
+CONFIG_LXT_PHY=y
CONFIG_SERIAL_MPC52xx=y
CONFIG_SERIAL_MPC52xx_CONSOLE=y
CONFIG_SERIAL_MPC52xx_CONSOLE_BAUD=115200
@@ -75,7 +62,6 @@ CONFIG_FB_FOREIGN_ENDIAN=y
CONFIG_FB_SM501=y
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_USB=y
-# CONFIG_USB_DEVICE_CLASS is not set
CONFIG_USB_MON=y
CONFIG_USB_OHCI_HCD=y
CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
@@ -86,8 +72,7 @@ CONFIG_RTC_DRV_DS1374=y
CONFIG_DMADEVICES=y
CONFIG_PPC_BESTCOMM=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT4_FS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_PROC_KCORE=y
@@ -95,17 +80,13 @@ CONFIG_TMPFS=y
CONFIG_JFFS2_FS=y
CONFIG_CRAMFS=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
-CONFIG_PARTITION_ADVANCED=y
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ISO8859_1=y
CONFIG_PRINTK_TIME=y
-CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
CONFIG_DETECT_HUNG_TASK=y
# CONFIG_DEBUG_BUGVERBOSE is not set
-CONFIG_DEBUG_INFO=y
CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_PCBC=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/64-bit.config b/arch/powerpc/configs/64-bit.config
new file mode 100644
index 000000000000..0fe6406929e2
--- /dev/null
+++ b/arch/powerpc/configs/64-bit.config
@@ -0,0 +1 @@
+CONFIG_PPC64=y
diff --git a/arch/powerpc/configs/83xx/asp8347_defconfig b/arch/powerpc/configs/83xx/asp8347_defconfig
index 985f95c7280a..10192410b33c 100644
--- a/arch/powerpc/configs/83xx/asp8347_defconfig
+++ b/arch/powerpc/configs/83xx/asp8347_defconfig
@@ -1,21 +1,20 @@
CONFIG_FSL_EMB_PERFMON=y
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
# CONFIG_KALLSYMS is not set
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_MSDOS_PARTITION is not set
# CONFIG_PPC_CHRP is not set
# CONFIG_PPC_PMAC is not set
CONFIG_PPC_83xx=y
CONFIG_ASP834x=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_SPARSE_IRQ=y
CONFIG_PCI=y
CONFIG_NET=y
CONFIG_PACKET=y
@@ -27,30 +26,21 @@ CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
CONFIG_MTD_REDBOOT_PARTS=y
CONFIG_MTD_REDBOOT_PARTS_UNALLOCATED=y
-CONFIG_MTD_OF_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_INTELEXT=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=32768
CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-CONFIG_MII=y
CONFIG_GIANFAR=y
-# CONFIG_NETDEV_10000 is not set
-# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
@@ -63,8 +53,6 @@ CONFIG_I2C_CHARDEV=y
CONFIG_I2C_MPC=y
CONFIG_THERMAL=y
CONFIG_WATCHDOG=y
-CONFIG_VIDEO_OUTPUT_CONTROL=m
-# CONFIG_HID_SUPPORT is not set
CONFIG_USB=y
CONFIG_USB_MON=y
CONFIG_USB_EHCI_HCD=y
@@ -72,19 +60,12 @@ CONFIG_USB_EHCI_FSL=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_DS1374=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_INOTIFY=y
+CONFIG_EXT4_FS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_JFFS2_FS=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_MSDOS_PARTITION is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
CONFIG_CRYPTO_ECB=m
CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/83xx/kmeter1_defconfig b/arch/powerpc/configs/83xx/kmeter1_defconfig
index e12e60c3b9a2..487e5e1bbf4c 100644
--- a/arch/powerpc/configs/83xx/kmeter1_defconfig
+++ b/arch/powerpc/configs/83xx/kmeter1_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
# CONFIG_SWAP is not set
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
@@ -6,14 +5,11 @@ CONFIG_NO_HZ=y
CONFIG_HIGH_RES_TIMERS=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_EXPERT=y
-CONFIG_SLAB=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
CONFIG_PARTITION_ADVANCED=y
# CONFIG_MSDOS_PARTITION is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
# CONFIG_PPC_CHRP is not set
# CONFIG_PPC_PMAC is not set
CONFIG_PPC_83xx=y
@@ -26,17 +22,12 @@ CONFIG_UNIX=y
CONFIG_INET=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_PNP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
CONFIG_TIPC=y
CONFIG_BRIDGE=m
CONFIG_VLAN_8021Q=y
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_INTELEXT=y
@@ -45,10 +36,8 @@ CONFIG_MTD_PHYSMAP_OF=y
CONFIG_MTD_PHRAM=y
CONFIG_MTD_UBI=y
CONFIG_MTD_UBI_GLUEBI=y
-CONFIG_PROC_DEVICETREE=y
CONFIG_NETDEVICES=y
CONFIG_DUMMY=y
-CONFIG_MII=y
CONFIG_TUN=y
CONFIG_UCC_GETH=y
CONFIG_MARVELL_PHY=y
@@ -60,7 +49,6 @@ CONFIG_HDLC=y
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
# CONFIG_VT is not set
-# CONFIG_DEVKMEM is not set
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_HW_RANDOM=y
diff --git a/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig b/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig
index 4b4a2a9133a5..16a42e2267fb 100644
--- a/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig
+++ b/arch/powerpc/configs/83xx/mpc8313_rdb_defconfig
@@ -1,20 +1,18 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
# CONFIG_KALLSYMS is not set
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
# CONFIG_PPC_CHRP is not set
# CONFIG_PPC_PMAC is not set
CONFIG_PPC_83xx=y
CONFIG_MPC831x_RDB=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_SPARSE_IRQ=y
CONFIG_PCI=y
CONFIG_NET=y
CONFIG_PACKET=y
@@ -25,20 +23,15 @@ CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
-CONFIG_MTD_OF_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_MTD_NAND=y
+CONFIG_MTD_RAW_NAND=y
CONFIG_MTD_NAND_FSL_ELBC=y
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=32768
@@ -51,13 +44,9 @@ CONFIG_MD_LINEAR=y
CONFIG_MD_RAID0=y
CONFIG_MD_RAID1=y
CONFIG_NETDEVICES=y
-CONFIG_CICADA_PHY=y
-CONFIG_FIXED_PHY=y
-CONFIG_NET_ETHERNET=y
-CONFIG_NET_PCI=y
-CONFIG_E100=y
CONFIG_GIANFAR=y
-# CONFIG_INPUT_MOUSEDEV is not set
+CONFIG_E100=y
+CONFIG_CICADA_PHY=y
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
@@ -71,7 +60,6 @@ CONFIG_I2C_MPC=y
CONFIG_SPI=y
CONFIG_SPI_BITBANG=y
CONFIG_WATCHDOG=y
-CONFIG_VIDEO_OUTPUT_CONTROL=m
# CONFIG_USB_HID is not set
CONFIG_USB=y
CONFIG_USB_MON=y
@@ -82,26 +70,17 @@ CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
CONFIG_USB_UHCI_HCD=y
CONFIG_USB_STORAGE=y
CONFIG_USB_GADGET=y
-CONFIG_USB_GADGET_NET2280=y
CONFIG_USB_ETH=m
CONFIG_RTC_CLASS=y
CONFIG_RTC_INTF_DEV_UIE_EMUL=y
CONFIG_RTC_DRV_DS1307=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_INOTIFY=y
+CONFIG_EXT4_FS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_JFFS2_FS=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_DEBUG_KERNEL=y
CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig b/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig
index 5871395573c5..80d40ae668eb 100644
--- a/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig
+++ b/arch/powerpc/configs/83xx/mpc8315_rdb_defconfig
@@ -1,20 +1,18 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
# CONFIG_KALLSYMS is not set
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
# CONFIG_PPC_CHRP is not set
# CONFIG_PPC_PMAC is not set
CONFIG_PPC_83xx=y
CONFIG_MPC831x_RDB=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_SPARSE_IRQ=y
CONFIG_PCI=y
CONFIG_NET=y
CONFIG_PACKET=y
@@ -25,18 +23,14 @@ CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_MTD_NAND=y
-CONFIG_PROC_DEVICETREE=y
+CONFIG_MTD_RAW_NAND=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=32768
@@ -50,11 +44,8 @@ CONFIG_MD_LINEAR=y
CONFIG_MD_RAID0=y
CONFIG_MD_RAID1=y
CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-CONFIG_NET_PCI=y
-CONFIG_E100=y
CONFIG_GIANFAR=y
-# CONFIG_INPUT_MOUSEDEV is not set
+CONFIG_E100=y
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
@@ -68,7 +59,6 @@ CONFIG_I2C_MPC=y
CONFIG_SPI=y
CONFIG_SPI_BITBANG=y
CONFIG_WATCHDOG=y
-CONFIG_VIDEO_OUTPUT_CONTROL=m
# CONFIG_USB_HID is not set
CONFIG_USB=y
CONFIG_USB_MON=y
@@ -79,26 +69,17 @@ CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
CONFIG_USB_UHCI_HCD=y
CONFIG_USB_STORAGE=y
CONFIG_USB_GADGET=y
-CONFIG_USB_GADGET_NET2280=y
CONFIG_USB_ETH=m
CONFIG_RTC_CLASS=y
CONFIG_RTC_INTF_DEV_UIE_EMUL=y
CONFIG_RTC_DRV_DS1307=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_INOTIFY=y
+CONFIG_EXT4_FS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_JFFS2_FS=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_DEBUG_KERNEL=y
CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/83xx/mpc832x_mds_defconfig b/arch/powerpc/configs/83xx/mpc832x_mds_defconfig
deleted file mode 100644
index a5699a1f7d0a..000000000000
--- a/arch/powerpc/configs/83xx/mpc832x_mds_defconfig
+++ /dev/null
@@ -1,75 +0,0 @@
-CONFIG_EXPERIMENTAL=y
-CONFIG_SYSVIPC=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-CONFIG_EXPERT=y
-# CONFIG_KALLSYMS is not set
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_PPC_CHRP is not set
-# CONFIG_PPC_PMAC is not set
-CONFIG_PPC_83xx=y
-CONFIG_MPC832x_MDS=y
-CONFIG_QUICC_ENGINE=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_MATH_EMULATION=y
-CONFIG_SPARSE_IRQ=y
-CONFIG_PCI=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-CONFIG_SYN_COOKIES=y
-# CONFIG_INET_LRO is not set
-# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-# CONFIG_FW_LOADER is not set
-CONFIG_PROC_DEVICETREE=y
-CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_SCSI=y
-CONFIG_NETDEVICES=y
-CONFIG_DAVICOM_PHY=y
-CONFIG_NET_ETHERNET=y
-CONFIG_MII=y
-CONFIG_UCC_GETH=y
-# CONFIG_INPUT_MOUSEDEV is not set
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO is not set
-# CONFIG_VT is not set
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_HW_RANDOM=y
-CONFIG_I2C=y
-CONFIG_I2C_CHARDEV=y
-CONFIG_I2C_MPC=y
-CONFIG_WATCHDOG=y
-CONFIG_VIDEO_OUTPUT_CONTROL=m
-CONFIG_RTC_CLASS=y
-CONFIG_RTC_DRV_DS1374=y
-CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_INOTIFY=y
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
-CONFIG_NFS_V4=y
-CONFIG_ROOT_NFS=y
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_MSDOS_PARTITION is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-CONFIG_CRYPTO_ECB=m
-CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig b/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig
index 5adc4cea42d3..b99caba8724a 100644
--- a/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig
+++ b/arch/powerpc/configs/83xx/mpc832x_rdb_defconfig
@@ -1,22 +1,21 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
# CONFIG_KALLSYMS is not set
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_LDM_PARTITION=y
# CONFIG_PPC_CHRP is not set
# CONFIG_PPC_PMAC is not set
CONFIG_PPC_83xx=y
CONFIG_MPC832x_RDB=y
-CONFIG_QUICC_ENGINE=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
+CONFIG_GEN_RTC=y
CONFIG_MATH_EMULATION=y
-CONFIG_SPARSE_IRQ=y
CONFIG_PCI=y
CONFIG_NET=y
CONFIG_PACKET=y
@@ -27,23 +26,17 @@ CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=32768
CONFIG_SCSI=y
CONFIG_BLK_DEV_SD=y
CONFIG_NETDEVICES=y
-CONFIG_ICPLUS_PHY=y
-CONFIG_NET_ETHERNET=y
-CONFIG_MII=y
-CONFIG_E1000=y
CONFIG_UCC_GETH=y
-# CONFIG_INPUT_MOUSEDEV is not set
+CONFIG_E1000=y
+CONFIG_ICPLUS_PHY=y
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
@@ -51,14 +44,12 @@ CONFIG_UCC_GETH=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_HW_RANDOM=y
-CONFIG_GEN_RTC=y
CONFIG_I2C=y
CONFIG_I2C_CHARDEV=y
CONFIG_I2C_MPC=y
CONFIG_SPI=y
CONFIG_SPI_BITBANG=y
CONFIG_WATCHDOG=y
-CONFIG_VIDEO_OUTPUT_CONTROL=m
# CONFIG_USB_HID is not set
CONFIG_USB=y
CONFIG_USB_MON=y
@@ -68,27 +59,19 @@ CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
CONFIG_USB_STORAGE=y
CONFIG_MMC=y
CONFIG_MMC_SPI=y
+CONFIG_QUICC_ENGINE=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_INOTIFY=y
+CONFIG_EXT4_FS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_LDM_PARTITION=y
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_CODEPAGE_932=y
CONFIG_NLS_ISO8859_8=y
CONFIG_NLS_ISO8859_1=y
-CONFIG_CRC_T10DIF=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_CRYPTO_ECB=m
CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/83xx/mpc834x_itx_defconfig b/arch/powerpc/configs/83xx/mpc834x_itx_defconfig
index 82b6b6c88d6a..11163052fdba 100644
--- a/arch/powerpc/configs/83xx/mpc834x_itx_defconfig
+++ b/arch/powerpc/configs/83xx/mpc834x_itx_defconfig
@@ -1,20 +1,19 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
# CONFIG_KALLSYMS is not set
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_MAC_PARTITION=y
# CONFIG_PPC_CHRP is not set
# CONFIG_PPC_PMAC is not set
CONFIG_PPC_83xx=y
CONFIG_MPC834x_ITX=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_SPARSE_IRQ=y
CONFIG_PCI=y
CONFIG_NET=y
CONFIG_PACKET=y
@@ -25,20 +24,15 @@ CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_PHYSMAP=y
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_IDE=y
CONFIG_BLK_DEV_SD=y
CONFIG_CHR_DEV_SG=y
CONFIG_SCSI_SPI_ATTRS=y
@@ -52,9 +46,8 @@ CONFIG_MD_LINEAR=y
CONFIG_MD_RAID0=y
CONFIG_MD_RAID1=y
CONFIG_NETDEVICES=y
-CONFIG_CICADA_PHY=y
-CONFIG_FIXED_PHY=y
CONFIG_GIANFAR=y
+CONFIG_CICADA_PHY=y
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
# CONFIG_VT is not set
@@ -69,7 +62,6 @@ CONFIG_SPI=y
CONFIG_SPI_BITBANG=y
# CONFIG_HWMON is not set
CONFIG_WATCHDOG=y
-CONFIG_VIDEO_OUTPUT_CONTROL=m
CONFIG_USB=y
CONFIG_USB_MON=y
CONFIG_USB_EHCI_HCD=y
@@ -80,21 +72,12 @@ CONFIG_RTC_CLASS=y
CONFIG_RTC_INTF_DEV_UIE_EMUL=y
CONFIG_RTC_DRV_DS1307=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_INOTIFY=y
+CONFIG_EXT4_FS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_MAC_PARTITION=y
-CONFIG_CRC_T10DIF=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig b/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig
index f8b228aaa03a..312d39e4242c 100644
--- a/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig
+++ b/arch/powerpc/configs/83xx/mpc834x_itxgp_defconfig
@@ -1,20 +1,19 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
# CONFIG_KALLSYMS is not set
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_MAC_PARTITION=y
# CONFIG_PPC_CHRP is not set
# CONFIG_PPC_PMAC is not set
CONFIG_PPC_83xx=y
CONFIG_MPC834x_ITX=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_SPARSE_IRQ=y
CONFIG_PCI=y
CONFIG_NET=y
CONFIG_PACKET=y
@@ -25,16 +24,12 @@ CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_PHYSMAP=y
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=32768
@@ -43,8 +38,8 @@ CONFIG_BLK_DEV_SD=y
CONFIG_CHR_DEV_SG=y
CONFIG_SCSI_SPI_ATTRS=y
CONFIG_NETDEVICES=y
-CONFIG_CICADA_PHY=y
CONFIG_GIANFAR=y
+CONFIG_CICADA_PHY=y
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
# CONFIG_VT is not set
@@ -59,7 +54,6 @@ CONFIG_SPI=y
CONFIG_SPI_BITBANG=y
# CONFIG_HWMON is not set
CONFIG_WATCHDOG=y
-CONFIG_VIDEO_OUTPUT_CONTROL=m
CONFIG_USB=y
CONFIG_USB_MON=y
CONFIG_USB_EHCI_HCD=y
@@ -70,21 +64,12 @@ CONFIG_RTC_CLASS=y
CONFIG_RTC_INTF_DEV_UIE_EMUL=y
CONFIG_RTC_DRV_DS1307=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_INOTIFY=y
+CONFIG_EXT4_FS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_MAC_PARTITION=y
-CONFIG_CRC_T10DIF=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/83xx/mpc834x_mds_defconfig b/arch/powerpc/configs/83xx/mpc834x_mds_defconfig
deleted file mode 100644
index 99660c062191..000000000000
--- a/arch/powerpc/configs/83xx/mpc834x_mds_defconfig
+++ /dev/null
@@ -1,74 +0,0 @@
-CONFIG_EXPERIMENTAL=y
-CONFIG_SYSVIPC=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-CONFIG_EXPERT=y
-# CONFIG_KALLSYMS is not set
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_PPC_CHRP is not set
-# CONFIG_PPC_PMAC is not set
-CONFIG_PPC_83xx=y
-CONFIG_MPC834x_MDS=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_SPARSE_IRQ=y
-CONFIG_PCI=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_XFRM_USER=m
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-CONFIG_SYN_COOKIES=y
-# CONFIG_INET_LRO is not set
-# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-# CONFIG_FW_LOADER is not set
-CONFIG_PROC_DEVICETREE=y
-CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_NETDEVICES=y
-CONFIG_MARVELL_PHY=y
-CONFIG_NET_ETHERNET=y
-CONFIG_NET_PCI=y
-CONFIG_E100=y
-CONFIG_GIANFAR=y
-# CONFIG_INPUT_MOUSEDEV is not set
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO is not set
-# CONFIG_VT is not set
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-# CONFIG_HW_RANDOM is not set
-CONFIG_I2C=y
-CONFIG_I2C_CHARDEV=y
-CONFIG_I2C_MPC=y
-CONFIG_WATCHDOG=y
-CONFIG_VIDEO_OUTPUT_CONTROL=m
-CONFIG_RTC_CLASS=y
-CONFIG_RTC_DRV_DS1374=y
-CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_INOTIFY=y
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
-CONFIG_NFS_V4=y
-CONFIG_ROOT_NFS=y
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_MSDOS_PARTITION is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-CONFIG_CRYPTO_ECB=m
-CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/83xx/mpc836x_mds_defconfig b/arch/powerpc/configs/83xx/mpc836x_mds_defconfig
deleted file mode 100644
index 05710bbfd2ef..000000000000
--- a/arch/powerpc/configs/83xx/mpc836x_mds_defconfig
+++ /dev/null
@@ -1,81 +0,0 @@
-CONFIG_EXPERIMENTAL=y
-CONFIG_SYSVIPC=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-CONFIG_EXPERT=y
-# CONFIG_KALLSYMS is not set
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_PPC_CHRP is not set
-# CONFIG_PPC_PMAC is not set
-CONFIG_PPC_83xx=y
-CONFIG_MPC836x_MDS=y
-CONFIG_QUICC_ENGINE=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_SPARSE_IRQ=y
-CONFIG_PCI=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-CONFIG_SYN_COOKIES=y
-# CONFIG_INET_LRO is not set
-# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-# CONFIG_FW_LOADER is not set
-CONFIG_MTD=y
-CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
-CONFIG_MTD_BLOCK=y
-CONFIG_MTD_CFI=y
-CONFIG_MTD_CFI_AMDSTD=y
-CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_PROC_DEVICETREE=y
-CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_SCSI=y
-CONFIG_NETDEVICES=y
-CONFIG_MARVELL_PHY=y
-CONFIG_NET_ETHERNET=y
-CONFIG_MII=y
-CONFIG_UCC_GETH=y
-# CONFIG_INPUT_MOUSEDEV is not set
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO is not set
-# CONFIG_VT is not set
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_HW_RANDOM=y
-CONFIG_I2C=y
-CONFIG_I2C_CHARDEV=y
-CONFIG_I2C_MPC=y
-CONFIG_WATCHDOG=y
-CONFIG_VIDEO_OUTPUT_CONTROL=m
-CONFIG_RTC_CLASS=y
-CONFIG_RTC_DRV_DS1374=y
-CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_INOTIFY=y
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
-CONFIG_NFS_V4=y
-CONFIG_ROOT_NFS=y
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_MSDOS_PARTITION is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-CONFIG_CRYPTO_ECB=m
-CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/83xx/mpc836x_rdk_defconfig b/arch/powerpc/configs/83xx/mpc836x_rdk_defconfig
index 0540d673a052..093df33f9455 100644
--- a/arch/powerpc/configs/83xx/mpc836x_rdk_defconfig
+++ b/arch/powerpc/configs/83xx/mpc836x_rdk_defconfig
@@ -1,20 +1,18 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
# CONFIG_KALLSYMS is not set
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_MSDOS_PARTITION is not set
# CONFIG_PPC_CHRP is not set
# CONFIG_PPC_PMAC is not set
CONFIG_PPC_83xx=y
CONFIG_MPC836x_RDK=y
-CONFIG_QUICC_ENGINE=y
CONFIG_QE_GPIO=y
-CONFIG_SPARSE_IRQ=y
CONFIG_PCI=y
CONFIG_NET=y
CONFIG_PACKET=y
@@ -25,30 +23,23 @@ CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_ADV_OPTIONS=y
CONFIG_MTD_CFI_INTELEXT=y
CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=32768
CONFIG_NETDEVICES=y
-CONFIG_BROADCOM_PHY=y
CONFIG_UCC_GETH=y
-# CONFIG_NETDEV_10000 is not set
-# CONFIG_INPUT_MOUSEDEV is not set
+CONFIG_BROADCOM_PHY=y
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
-# CONFIG_DEVKMEM is not set
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_QE=y
@@ -68,20 +59,13 @@ CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
# CONFIG_LOGO_LINUX_MONO is not set
# CONFIG_USB_SUPPORT is not set
+CONFIG_QUICC_ENGINE=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_INOTIFY=y
+CONFIG_EXT4_FS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_JFFS2_FS=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_MSDOS_PARTITION is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_PPC_EARLY_DEBUG=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/83xx/mpc837x_mds_defconfig b/arch/powerpc/configs/83xx/mpc837x_mds_defconfig
deleted file mode 100644
index f367985be6f7..000000000000
--- a/arch/powerpc/configs/83xx/mpc837x_mds_defconfig
+++ /dev/null
@@ -1,74 +0,0 @@
-CONFIG_EXPERIMENTAL=y
-CONFIG_SYSVIPC=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-CONFIG_EXPERT=y
-CONFIG_SLAB=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_PPC_CHRP is not set
-# CONFIG_PPC_PMAC is not set
-CONFIG_PPC_83xx=y
-CONFIG_MPC837x_MDS=y
-CONFIG_SPARSE_IRQ=y
-CONFIG_PCI=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_XFRM_USER=m
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-CONFIG_SYN_COOKIES=y
-# CONFIG_INET_LRO is not set
-# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-# CONFIG_FW_LOADER is not set
-CONFIG_PROC_DEVICETREE=y
-CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_BLK_DEV_SD=y
-CONFIG_CHR_DEV_SG=y
-CONFIG_ATA=y
-CONFIG_SATA_FSL=y
-CONFIG_NETDEVICES=y
-CONFIG_MARVELL_PHY=y
-CONFIG_NET_ETHERNET=y
-CONFIG_MII=y
-CONFIG_GIANFAR=y
-# CONFIG_INPUT_MOUSEDEV is not set
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO is not set
-# CONFIG_VT is not set
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-# CONFIG_HW_RANDOM is not set
-CONFIG_GEN_RTC=y
-CONFIG_I2C=y
-CONFIG_I2C_CHARDEV=y
-CONFIG_I2C_MPC=y
-CONFIG_WATCHDOG=y
-CONFIG_VIDEO_OUTPUT_CONTROL=m
-CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_INOTIFY=y
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
-CONFIG_NFS_V4=y
-CONFIG_ROOT_NFS=y
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_CRC_T10DIF=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-CONFIG_CRYPTO_ECB=m
-CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig b/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig
index 414eda381591..ac27f99faab8 100644
--- a/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig
+++ b/arch/powerpc/configs/83xx/mpc837x_rdb_defconfig
@@ -1,18 +1,16 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
-CONFIG_SLAB=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
# CONFIG_PPC_CHRP is not set
# CONFIG_PPC_PMAC is not set
CONFIG_PPC_83xx=y
CONFIG_MPC837x_RDB=y
-CONFIG_SPARSE_IRQ=y
+CONFIG_GEN_RTC=y
CONFIG_PCI=y
CONFIG_NET=y
CONFIG_PACKET=y
@@ -23,14 +21,8 @@ CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=32768
@@ -43,14 +35,9 @@ CONFIG_BLK_DEV_MD=y
CONFIG_MD_RAID1=y
CONFIG_MD_RAID456=y
CONFIG_NETDEVICES=y
-CONFIG_MARVELL_PHY=y
-CONFIG_FIXED_PHY=y
-CONFIG_NET_ETHERNET=y
-CONFIG_MII=y
CONFIG_GIANFAR=y
-# CONFIG_NETDEV_10000 is not set
+CONFIG_MARVELL_PHY=y
CONFIG_INPUT_FF_MEMLESS=m
-# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
@@ -58,12 +45,10 @@ CONFIG_INPUT_FF_MEMLESS=m
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
# CONFIG_HW_RANDOM is not set
-CONFIG_GEN_RTC=y
CONFIG_I2C=y
CONFIG_I2C_CHARDEV=y
CONFIG_I2C_MPC=y
CONFIG_WATCHDOG=y
-CONFIG_VIDEO_OUTPUT_CONTROL=m
CONFIG_HID_A4TECH=y
CONFIG_HID_APPLE=y
CONFIG_HID_BELKIN=y
@@ -78,27 +63,17 @@ CONFIG_HID_MONTEREY=y
CONFIG_HID_PANTHERLORD=y
CONFIG_HID_PETALYNX=y
CONFIG_HID_SAMSUNG=y
-CONFIG_HID_SONY=y
CONFIG_HID_SUNPLUS=y
CONFIG_USB=y
CONFIG_USB_MON=y
CONFIG_USB_EHCI_HCD=y
CONFIG_USB_EHCI_FSL=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_INOTIFY=y
+CONFIG_EXT4_FS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_CRC_T10DIF=y
-# CONFIG_ENABLE_MUST_CHECK is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_CRYPTO_ECB=m
CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/83xx/sbc834x_defconfig b/arch/powerpc/configs/83xx/sbc834x_defconfig
deleted file mode 100644
index 4ae385894c64..000000000000
--- a/arch/powerpc/configs/83xx/sbc834x_defconfig
+++ /dev/null
@@ -1,93 +0,0 @@
-CONFIG_EXPERIMENTAL=y
-CONFIG_SYSVIPC=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-CONFIG_EXPERT=y
-# CONFIG_KALLSYMS is not set
-CONFIG_SLAB=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_PPC_CHRP is not set
-# CONFIG_PPC_PMAC is not set
-CONFIG_PPC_83xx=y
-CONFIG_SBC834x=y
-CONFIG_SPARSE_IRQ=y
-CONFIG_PCI=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_XFRM_USER=m
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-CONFIG_SYN_COOKIES=y
-# CONFIG_INET_LRO is not set
-# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-# CONFIG_FW_LOADER is not set
-CONFIG_MTD=y
-CONFIG_MTD_CONCAT=y
-CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_OF_PARTS=y
-CONFIG_MTD_CHAR=y
-CONFIG_MTD_BLOCK=y
-CONFIG_MTD_CFI=y
-CONFIG_MTD_CFI_INTELEXT=y
-CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_PROC_DEVICETREE=y
-CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_SCSI=y
-# CONFIG_SCSI_PROC_FS is not set
-CONFIG_BLK_DEV_SD=y
-# CONFIG_SCSI_LOWLEVEL is not set
-CONFIG_NETDEVICES=y
-CONFIG_BROADCOM_PHY=y
-CONFIG_NET_ETHERNET=y
-CONFIG_MII=y
-CONFIG_GIANFAR=y
-# CONFIG_NETDEV_10000 is not set
-# CONFIG_INPUT_MOUSEDEV is not set
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO is not set
-# CONFIG_VT is not set
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-# CONFIG_SERIAL_8250_PCI is not set
-CONFIG_SERIAL_8250_NR_UARTS=2
-CONFIG_SERIAL_8250_RUNTIME_UARTS=2
-# CONFIG_HW_RANDOM is not set
-CONFIG_GEN_RTC=y
-CONFIG_I2C=y
-CONFIG_I2C_CHARDEV=y
-CONFIG_I2C_MPC=y
-CONFIG_WATCHDOG=y
-# CONFIG_USB_HID is not set
-CONFIG_USB=y
-CONFIG_USB_MON=y
-CONFIG_USB_EHCI_HCD=y
-CONFIG_USB_EHCI_FSL=y
-CONFIG_USB_STORAGE=y
-CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-# CONFIG_EXT3_FS_XATTR is not set
-CONFIG_INOTIFY=y
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
-CONFIG_NFS_V4=y
-CONFIG_ROOT_NFS=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-CONFIG_CRYPTO_ECB=m
-CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/85xx-32bit.config b/arch/powerpc/configs/85xx-32bit.config
new file mode 100644
index 000000000000..a85310bcb1fd
--- /dev/null
+++ b/arch/powerpc/configs/85xx-32bit.config
@@ -0,0 +1,6 @@
+CONFIG_PPC64=n
+CONFIG_HIGHMEM=y
+CONFIG_KEXEC=y
+CONFIG_PPC_85xx=y
+CONFIG_PROC_KCORE=y
+CONFIG_PHYS_64BIT=y
diff --git a/arch/powerpc/configs/85xx-64bit.config b/arch/powerpc/configs/85xx-64bit.config
new file mode 100644
index 000000000000..4aba81222885
--- /dev/null
+++ b/arch/powerpc/configs/85xx-64bit.config
@@ -0,0 +1,4 @@
+CONFIG_MATH_EMULATION=y
+CONFIG_MATH_EMULATION_HW_UNIMPLEMENTED=y
+CONFIG_PPC64=y
+CONFIG_PPC_BOOK3E_64=y
diff --git a/arch/powerpc/configs/85xx-hw.config b/arch/powerpc/configs/85xx-hw.config
new file mode 100644
index 000000000000..8aff83217397
--- /dev/null
+++ b/arch/powerpc/configs/85xx-hw.config
@@ -0,0 +1,141 @@
+CONFIG_AQUANTIA_PHY=y
+CONFIG_AT803X_PHY=y
+CONFIG_ATA=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_BROADCOM_PHY=y
+CONFIG_C293_PCIE=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_CHR_DEV_ST=y
+CONFIG_CICADA_PHY=y
+CONFIG_CLK_QORIQ=y
+CONFIG_CRYPTO_DEV_FSL_CAAM=y
+CONFIG_CRYPTO_DEV_TALITOS=y
+CONFIG_DAVICOM_PHY=y
+CONFIG_DMADEVICES=y
+CONFIG_E1000E=y
+CONFIG_E1000=y
+CONFIG_EDAC=y
+CONFIG_EDAC_MPC85XX=y
+CONFIG_EEPROM_AT24=y
+CONFIG_EEPROM_LEGACY=y
+CONFIG_FB_FSL_DIU=y
+CONFIG_FS_ENET=y
+CONFIG_FSL_CORENET_CF=y
+CONFIG_FSL_DMA=y
+CONFIG_FSL_HV_MANAGER=y
+CONFIG_FSL_IFC=y
+CONFIG_FSL_PQ_MDIO=y
+CONFIG_FSL_RIO=y
+CONFIG_FSL_XGMAC_MDIO=y
+CONFIG_GIANFAR=y
+CONFIG_GPIO_MPC8XXX=y
+CONFIG_HID_A4TECH=y
+CONFIG_HID_APPLE=y
+CONFIG_HID_BELKIN=y
+CONFIG_HID_CHERRY=y
+CONFIG_HID_CHICONY=y
+CONFIG_HID_CYPRESS=y
+CONFIG_HID_EZKEY=y
+CONFIG_HID_GYRATION=y
+CONFIG_HID_LOGITECH=y
+CONFIG_HID_MICROSOFT=y
+CONFIG_HID_MONTEREY=y
+CONFIG_HID_PANTHERLORD=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_SAMSUNG=y
+CONFIG_HID_SUNPLUS=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_CPM=m
+CONFIG_I2C_MPC=y
+CONFIG_I2C_MUX_PCA954x=y
+CONFIG_I2C_MUX=y
+CONFIG_I2C=y
+CONFIG_IGB=y
+CONFIG_INPUT_FF_MEMLESS=m
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSEDEV is not set
+# CONFIG_INPUT_MOUSE is not set
+CONFIG_MARVELL_PHY=y
+CONFIG_MDIO_BUS_MUX_GPIO=y
+CONFIG_MDIO_BUS_MUX_MMIOREG=y
+CONFIG_MEMORY=y
+CONFIG_MMC_SDHCI_OF_ESDHC=y
+CONFIG_MMC_SDHCI_PLTFM=y
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_CFI_INTELEXT=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_NAND_FSL_ELBC=y
+CONFIG_MTD_NAND_FSL_IFC=y
+CONFIG_MTD_RAW_NAND=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_MTD_PHYSMAP=y
+CONFIG_MTD_PLATRAM=y
+CONFIG_MTD_SPI_NOR=y
+CONFIG_NETDEVICES=y
+CONFIG_NVRAM=y
+CONFIG_PATA_ALI=y
+CONFIG_PATA_SIL680=y
+CONFIG_PATA_VIA=y
+# CONFIG_PCIEASPM is not set
+CONFIG_PCIEPORTBUS=y
+CONFIG_PCI_MSI=y
+CONFIG_PCI=y
+CONFIG_PPC_EPAPR_HV_BYTECHAN=y
+# CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set
+CONFIG_QE_GPIO=y
+CONFIG_QUICC_ENGINE=y
+CONFIG_RAPIDIO=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_CMOS=y
+CONFIG_RTC_DRV_DS1307=y
+CONFIG_RTC_DRV_DS1374=y
+CONFIG_RTC_DRV_DS3232=y
+CONFIG_SATA_AHCI=y
+CONFIG_SATA_FSL=y
+CONFIG_SATA_SIL24=y
+CONFIG_SATA_SIL=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_SCSI_SYM53C8XX_2=y
+CONFIG_SENSORS_INA2XX=y
+CONFIG_SENSORS_LM90=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_DETECT_IRQ=y
+CONFIG_SERIAL_8250_MANY_PORTS=y
+CONFIG_SERIAL_8250_NR_UARTS=6
+CONFIG_SERIAL_8250_RSA=y
+CONFIG_SERIAL_8250_RUNTIME_UARTS=6
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_QE=m
+CONFIG_SERIO_LIBPS2=y
+# CONFIG_SND_DRIVERS is not set
+CONFIG_SND_INTEL8X0=y
+CONFIG_SND_POWERPC_SOC=y
+# CONFIG_SND_PPC is not set
+CONFIG_SND_SOC=y
+# CONFIG_SND_SUPPORT_OLD_API is not set
+# CONFIG_SND_USB is not set
+CONFIG_SND=y
+CONFIG_SOUND=y
+CONFIG_SPI_FSL_ESPI=y
+CONFIG_SPI_FSL_SPI=y
+CONFIG_SPI_GPIO=y
+CONFIG_SPI=y
+CONFIG_TERANETICS_PHY=y
+CONFIG_UCC_GETH=y
+CONFIG_USB_EHCI_FSL=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_HID=m
+CONFIG_USB_MON=y
+CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
+CONFIG_USB_OHCI_HCD_PPC_OF_LE=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_STORAGE=y
+CONFIG_USB=y
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_VIRT_DRIVERS=y
+CONFIG_VITESSE_PHY=y
diff --git a/arch/powerpc/configs/85xx-smp.config b/arch/powerpc/configs/85xx-smp.config
new file mode 100644
index 000000000000..3b4d1e54636d
--- /dev/null
+++ b/arch/powerpc/configs/85xx-smp.config
@@ -0,0 +1,2 @@
+CONFIG_NR_CPUS=24
+CONFIG_SMP=y
diff --git a/arch/powerpc/configs/85xx/ge_imp3a_defconfig b/arch/powerpc/configs/85xx/ge_imp3a_defconfig
index dc939de9b5b0..7beb36a41d45 100644
--- a/arch/powerpc/configs/85xx/ge_imp3a_defconfig
+++ b/arch/powerpc/configs/85xx/ge_imp3a_defconfig
@@ -1,17 +1,15 @@
CONFIG_PPC_85xx=y
CONFIG_SMP=y
CONFIG_NR_CPUS=2
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
+CONFIG_HIGH_RES_TIMERS=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_BSD_PROCESS_ACCT_V3=y
-CONFIG_SPARSE_IRQ=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
# CONFIG_UTS_NS is not set
# CONFIG_IPC_NS is not set
-# CONFIG_USER_NS is not set
# CONFIG_PID_NS is not set
# CONFIG_NET_NS is not set
CONFIG_SYSFS_DEPRECATED=y
@@ -19,22 +17,19 @@ CONFIG_SYSFS_DEPRECATED_V2=y
CONFIG_RELAY=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_PERF_EVENTS=y
-CONFIG_SLAB=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
CONFIG_GE_IMP3A=y
-CONFIG_QUICC_ENGINE=y
CONFIG_QE_GPIO=y
CONFIG_CPM2=y
CONFIG_HIGHMEM=y
-CONFIG_HIGH_RES_TIMERS=y
CONFIG_HZ_1000=y
CONFIG_PREEMPT=y
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
CONFIG_BINFMT_MISC=m
CONFIG_MATH_EMULATION=y
-CONFIG_FORCE_MAX_ZONEORDER=17
+CONFIG_ARCH_FORCE_MAX_ORDER=16
CONFIG_PCI=y
CONFIG_PCIEPORTBUS=y
CONFIG_PCI_MSI=y
@@ -64,30 +59,23 @@ CONFIG_SYN_COOKIES=y
CONFIG_INET_AH=m
CONFIG_INET_ESP=m
CONFIG_INET_IPCOMP=m
-# CONFIG_INET_XFRM_MODE_BEET is not set
CONFIG_INET6_AH=m
CONFIG_INET6_IPCOMP=m
CONFIG_IPV6_TUNNEL=m
CONFIG_NET_PKTGEN=m
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_MTD=y
-CONFIG_MTD_OF_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_JEDECPROBE=y
CONFIG_MTD_CFI_INTELEXT=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_MTD_NAND=y
+CONFIG_MTD_RAW_NAND=y
CONFIG_MTD_NAND_FSL_ELBC=y
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_LOOP=m
-CONFIG_BLK_DEV_CRYPTOLOOP=m
CONFIG_BLK_DEV_NBD=m
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=131072
-CONFIG_MISC_DEVICES=y
CONFIG_DS1682=y
CONFIG_BLK_DEV_SD=y
CONFIG_CHR_DEV_ST=y
@@ -100,7 +88,6 @@ CONFIG_NETDEVICES=y
CONFIG_BONDING=m
CONFIG_DUMMY=m
CONFIG_NETCONSOLE=y
-CONFIG_NETPOLL_TRAP=y
CONFIG_TUN=m
# CONFIG_NET_VENDOR_3COM is not set
CONFIG_FS_ENET=y
@@ -126,7 +113,6 @@ CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_8250_NR_UARTS=2
CONFIG_SERIAL_8250_RUNTIME_UARTS=2
-CONFIG_SERIAL_8250_EXTENDED=y
CONFIG_SERIAL_8250_MANY_PORTS=y
CONFIG_SERIAL_8250_DETECT_IRQ=y
CONFIG_SERIAL_8250_RSA=y
@@ -142,7 +128,6 @@ CONFIG_SENSORS_LM90=y
CONFIG_SENSORS_LM92=y
CONFIG_WATCHDOG=y
CONFIG_GEF_WDT=y
-CONFIG_VIDEO_OUTPUT_CONTROL=m
CONFIG_HID_DRAGONRISE=y
CONFIG_HID_GYRATION=y
CONFIG_HID_TWINHAN=y
@@ -150,7 +135,6 @@ CONFIG_HID_ORTEK=y
CONFIG_HID_PANTHERLORD=y
CONFIG_HID_PETALYNX=y
CONFIG_HID_SAMSUNG=y
-CONFIG_HID_SONY=y
CONFIG_HID_SUNPLUS=y
CONFIG_HID_GREENASIA=y
CONFIG_HID_SMARTJOYPLUS=y
@@ -166,21 +150,18 @@ CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
CONFIG_USB_OHCI_HCD_PPC_OF_LE=y
CONFIG_USB_STORAGE=y
CONFIG_EDAC=y
-CONFIG_EDAC_MM_EDAC=y
CONFIG_EDAC_MPC85XX=y
CONFIG_RTC_CLASS=y
# CONFIG_RTC_INTF_PROC is not set
CONFIG_RTC_DRV_RX8581=y
CONFIG_DMADEVICES=y
CONFIG_FSL_DMA=y
-# CONFIG_NET_DMA is not set
+CONFIG_QUICC_ENGINE=y
CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_EXT3_FS_POSIX_ACL=y
CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_FUSE_FS=y
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
@@ -195,7 +176,6 @@ CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_JFFS2_FS=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
CONFIG_NFSD=y
@@ -241,15 +221,9 @@ CONFIG_NLS_ISO8859_15=y
CONFIG_NLS_KOI8_R=m
CONFIG_NLS_KOI8_U=m
CONFIG_NLS_UTF8=y
-CONFIG_CRC_CCITT=y
-CONFIG_CRC_T10DIF=y
-CONFIG_LIBCRC32C=y
CONFIG_MAGIC_SYSRQ=y
-CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_CRYPTO_CBC=y
CONFIG_CRYPTO_MD5=y
-CONFIG_CRYPTO_SHA256=m
CONFIG_CRYPTO_SHA512=m
CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
CONFIG_CRYPTO_DEV_TALITOS=y
diff --git a/arch/powerpc/configs/85xx/kmp204x_defconfig b/arch/powerpc/configs/85xx/kmp204x_defconfig
deleted file mode 100644
index e362d588dfbf..000000000000
--- a/arch/powerpc/configs/85xx/kmp204x_defconfig
+++ /dev/null
@@ -1,224 +0,0 @@
-CONFIG_PPC_85xx=y
-CONFIG_SMP=y
-CONFIG_NR_CPUS=8
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_AUDIT=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_BSD_PROCESS_ACCT=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_CGROUPS=y
-CONFIG_CGROUP_SCHED=y
-CONFIG_RELAY=y
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_KALLSYMS_ALL=y
-CONFIG_EMBEDDED=y
-CONFIG_PERF_EVENTS=y
-CONFIG_SLAB=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODULE_FORCE_UNLOAD=y
-CONFIG_MODVERSIONS=y
-# CONFIG_BLK_DEV_BSG is not set
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_MAC_PARTITION=y
-CONFIG_CORENET_GENERIC=y
-CONFIG_MPIC_MSGR=y
-CONFIG_HIGHMEM=y
-# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_BINFMT_MISC=m
-CONFIG_KEXEC=y
-CONFIG_FORCE_MAX_ZONEORDER=13
-CONFIG_PCI=y
-CONFIG_PCIEPORTBUS=y
-# CONFIG_PCIEASPM is not set
-CONFIG_PCI_MSI=y
-CONFIG_ADVANCED_OPTIONS=y
-CONFIG_LOWMEM_SIZE_BOOL=y
-CONFIG_LOWMEM_SIZE=0x20000000
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_XFRM_USER=y
-CONFIG_XFRM_SUB_POLICY=y
-CONFIG_XFRM_STATISTICS=y
-CONFIG_NET_KEY=y
-CONFIG_NET_KEY_MIGRATE=y
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_ADVANCED_ROUTER=y
-CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_ROUTE_MULTIPATH=y
-CONFIG_IP_ROUTE_VERBOSE=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-CONFIG_IP_PNP_RARP=y
-CONFIG_NET_IPIP=y
-CONFIG_IP_MROUTE=y
-CONFIG_IP_PIMSM_V1=y
-CONFIG_IP_PIMSM_V2=y
-CONFIG_INET_AH=y
-CONFIG_INET_ESP=y
-CONFIG_INET_IPCOMP=y
-# CONFIG_INET_LRO is not set
-CONFIG_IPV6=y
-CONFIG_IP_SCTP=m
-CONFIG_TIPC=y
-CONFIG_NET_SCHED=y
-CONFIG_NET_SCH_CBQ=y
-CONFIG_NET_SCH_HTB=y
-CONFIG_NET_SCH_HFSC=y
-CONFIG_NET_SCH_PRIO=y
-CONFIG_NET_SCH_MULTIQ=y
-CONFIG_NET_SCH_RED=y
-CONFIG_NET_SCH_SFQ=y
-CONFIG_NET_SCH_TEQL=y
-CONFIG_NET_SCH_TBF=y
-CONFIG_NET_SCH_GRED=y
-CONFIG_NET_CLS_BASIC=y
-CONFIG_NET_CLS_TCINDEX=y
-CONFIG_NET_CLS_U32=y
-CONFIG_CLS_U32_PERF=y
-CONFIG_CLS_U32_MARK=y
-CONFIG_NET_CLS_FLOW=y
-CONFIG_NET_CLS_CGROUP=y
-CONFIG_UEVENT_HELPER_PATH="/sbin/mdev"
-CONFIG_DEVTMPFS=y
-CONFIG_MTD=y
-CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_BLOCK=y
-CONFIG_MTD_CFI=y
-CONFIG_MTD_CFI_AMDSTD=y
-CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_MTD_M25P80=y
-CONFIG_MTD_PHRAM=y
-CONFIG_MTD_NAND=y
-CONFIG_MTD_NAND_ECC_BCH=y
-CONFIG_MTD_NAND_FSL_ELBC=y
-CONFIG_MTD_UBI=y
-CONFIG_MTD_UBI_GLUEBI=y
-CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_COUNT=2
-CONFIG_BLK_DEV_RAM_SIZE=2048
-CONFIG_EEPROM_AT24=y
-CONFIG_SCSI=y
-CONFIG_BLK_DEV_SD=y
-CONFIG_CHR_DEV_ST=y
-CONFIG_BLK_DEV_SR=y
-CONFIG_CHR_DEV_SG=y
-CONFIG_SCSI_MULTI_LUN=y
-CONFIG_SCSI_LOGGING=y
-CONFIG_SCSI_SYM53C8XX_2=y
-CONFIG_NETDEVICES=y
-# CONFIG_NET_VENDOR_3COM is not set
-# CONFIG_NET_VENDOR_ADAPTEC is not set
-# CONFIG_NET_VENDOR_ALTEON is not set
-# CONFIG_NET_VENDOR_AMD is not set
-# CONFIG_NET_VENDOR_ATHEROS is not set
-# CONFIG_NET_CADENCE is not set
-# CONFIG_NET_VENDOR_BROADCOM is not set
-# CONFIG_NET_VENDOR_BROCADE is not set
-# CONFIG_NET_VENDOR_CHELSIO is not set
-# CONFIG_NET_VENDOR_CISCO is not set
-# CONFIG_NET_VENDOR_DEC is not set
-# CONFIG_NET_VENDOR_DLINK is not set
-# CONFIG_NET_VENDOR_EMULEX is not set
-# CONFIG_NET_VENDOR_EXAR is not set
-CONFIG_FSL_PQ_MDIO=y
-CONFIG_FSL_XGMAC_MDIO=y
-# CONFIG_NET_VENDOR_HP is not set
-# CONFIG_NET_VENDOR_INTEL is not set
-# CONFIG_NET_VENDOR_MARVELL is not set
-# CONFIG_NET_VENDOR_MELLANOX is not set
-# CONFIG_NET_VENDOR_MICREL is not set
-# CONFIG_NET_VENDOR_MICROCHIP is not set
-# CONFIG_NET_VENDOR_MYRI is not set
-# CONFIG_NET_VENDOR_NATSEMI is not set
-# CONFIG_NET_VENDOR_NVIDIA is not set
-# CONFIG_NET_VENDOR_OKI is not set
-# CONFIG_NET_PACKET_ENGINE is not set
-# CONFIG_NET_VENDOR_QLOGIC is not set
-# CONFIG_NET_VENDOR_REALTEK is not set
-# CONFIG_NET_VENDOR_RDC is not set
-# CONFIG_NET_VENDOR_SEEQ is not set
-# CONFIG_NET_VENDOR_SILAN is not set
-# CONFIG_NET_VENDOR_SIS is not set
-# CONFIG_NET_VENDOR_SMSC is not set
-# CONFIG_NET_VENDOR_STMICRO is not set
-# CONFIG_NET_VENDOR_SUN is not set
-# CONFIG_NET_VENDOR_TEHUTI is not set
-# CONFIG_NET_VENDOR_TI is not set
-# CONFIG_NET_VENDOR_VIA is not set
-# CONFIG_NET_VENDOR_WIZNET is not set
-# CONFIG_NET_VENDOR_XILINX is not set
-CONFIG_MARVELL_PHY=y
-CONFIG_VITESSE_PHY=y
-CONFIG_FIXED_PHY=y
-# CONFIG_WLAN is not set
-# CONFIG_INPUT_MOUSEDEV is not set
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-CONFIG_SERIO_LIBPS2=y
-# CONFIG_LEGACY_PTYS is not set
-CONFIG_PPC_EPAPR_HV_BYTECHAN=y
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_MANY_PORTS=y
-CONFIG_SERIAL_8250_DETECT_IRQ=y
-CONFIG_SERIAL_8250_RSA=y
-CONFIG_NVRAM=y
-CONFIG_I2C=y
-CONFIG_I2C_CHARDEV=y
-CONFIG_I2C_MUX=y
-CONFIG_I2C_MUX_PCA954x=y
-CONFIG_I2C_MPC=y
-CONFIG_SPI=y
-CONFIG_SPI_FSL_SPI=y
-CONFIG_SPI_FSL_ESPI=y
-CONFIG_SPI_SPIDEV=m
-CONFIG_PTP_1588_CLOCK=y
-# CONFIG_HWMON is not set
-# CONFIG_USB_SUPPORT is not set
-CONFIG_EDAC=y
-CONFIG_EDAC_MM_EDAC=y
-CONFIG_EDAC_MPC85XX=y
-CONFIG_RTC_CLASS=y
-CONFIG_RTC_DRV_DS3232=y
-CONFIG_RTC_DRV_CMOS=y
-CONFIG_UIO=y
-CONFIG_STAGING=y
-CONFIG_CLK_PPC_CORENET=y
-CONFIG_EXT2_FS=y
-CONFIG_NTFS_FS=y
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_JFFS2_FS=y
-CONFIG_UBIFS_FS=y
-CONFIG_CRAMFS=y
-CONFIG_SQUASHFS=y
-CONFIG_SQUASHFS_XZ=y
-CONFIG_NFS_FS=y
-CONFIG_NFS_V4=y
-CONFIG_ROOT_NFS=y
-CONFIG_NLS_ISO8859_1=y
-CONFIG_NLS_UTF8=m
-CONFIG_CRC_ITU_T=m
-CONFIG_DEBUG_INFO=y
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_SHIRQ=y
-CONFIG_DETECT_HUNG_TASK=y
-CONFIG_SCHEDSTATS=y
-CONFIG_RCU_TRACE=y
-CONFIG_UPROBE_EVENT=y
-CONFIG_CRYPTO_NULL=y
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_MD4=y
-CONFIG_CRYPTO_SHA256=y
-CONFIG_CRYPTO_SHA512=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRYPTO_DEV_FSL_CAAM=y
diff --git a/arch/powerpc/configs/85xx/ksi8560_defconfig b/arch/powerpc/configs/85xx/ksi8560_defconfig
index aee0d17a9551..9cb211fb6d1e 100644
--- a/arch/powerpc/configs/85xx/ksi8560_defconfig
+++ b/arch/powerpc/configs/85xx/ksi8560_defconfig
@@ -1,17 +1,17 @@
CONFIG_PPC_85xx=y
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_MSDOS_PARTITION is not set
CONFIG_KSI8560=y
CONFIG_CPM2=y
+CONFIG_GEN_RTC=y
CONFIG_HIGHMEM=y
CONFIG_BINFMT_MISC=y
CONFIG_MATH_EMULATION=y
-CONFIG_SPARSE_IRQ=y
# CONFIG_SECCOMP is not set
CONFIG_NET=y
CONFIG_PACKET=y
@@ -22,13 +22,9 @@ CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
-CONFIG_MTD_CONCAT=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_JEDECPROBE=y
@@ -37,38 +33,25 @@ CONFIG_MTD_PHYSMAP_OF=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_IDE=y
CONFIG_NETDEVICES=y
-CONFIG_MARVELL_PHY=y
-CONFIG_NET_ETHERNET=y
CONFIG_FS_ENET=y
# CONFIG_FS_ENET_HAS_SCC is not set
CONFIG_FS_ENET_MDIO_FCC=y
CONFIG_GIANFAR=y
-# CONFIG_INPUT_MOUSEDEV is not set
+CONFIG_MARVELL_PHY=y
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
# CONFIG_VT is not set
CONFIG_SERIAL_CPM=y
CONFIG_SERIAL_CPM_CONSOLE=y
-CONFIG_GEN_RTC=y
-CONFIG_VIDEO_OUTPUT_CONTROL=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_INOTIFY=y
+CONFIG_EXT4_FS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_MSDOS_PARTITION is not set
CONFIG_DEBUG_FS=y
-CONFIG_DEBUG_KERNEL=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_DEBUG_MUTEXES=y
# CONFIG_DEBUG_BUGVERBOSE is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/85xx/mpc8540_ads_defconfig b/arch/powerpc/configs/85xx/mpc8540_ads_defconfig
deleted file mode 100644
index 11662c217ac0..000000000000
--- a/arch/powerpc/configs/85xx/mpc8540_ads_defconfig
+++ /dev/null
@@ -1,63 +0,0 @@
-CONFIG_PPC_85xx=y
-CONFIG_EXPERIMENTAL=y
-CONFIG_SYSVIPC=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-CONFIG_EXPERT=y
-# CONFIG_BLK_DEV_BSG is not set
-CONFIG_MPC8540_ADS=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_BINFMT_MISC=y
-CONFIG_MATH_EMULATION=y
-CONFIG_SPARSE_IRQ=y
-# CONFIG_SECCOMP is not set
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_XFRM_USER=y
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-CONFIG_SYN_COOKIES=y
-# CONFIG_INET_LRO is not set
-# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-# CONFIG_FW_LOADER is not set
-CONFIG_PROC_DEVICETREE=y
-CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-CONFIG_MII=y
-CONFIG_GIANFAR=y
-# CONFIG_INPUT_MOUSEDEV is not set
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO is not set
-# CONFIG_VT is not set
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-# CONFIG_HW_RANDOM is not set
-CONFIG_GEN_RTC=y
-CONFIG_VIDEO_OUTPUT_CONTROL=y
-CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_INOTIFY=y
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_NFS_FS=y
-CONFIG_ROOT_NFS=y
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_MSDOS_PARTITION is not set
-CONFIG_DEBUG_KERNEL=y
-CONFIG_DETECT_HUNG_TASK=y
-CONFIG_DEBUG_MUTEXES=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/85xx/mpc8560_ads_defconfig b/arch/powerpc/configs/85xx/mpc8560_ads_defconfig
deleted file mode 100644
index ebe9b30b0721..000000000000
--- a/arch/powerpc/configs/85xx/mpc8560_ads_defconfig
+++ /dev/null
@@ -1,64 +0,0 @@
-CONFIG_PPC_85xx=y
-CONFIG_EXPERIMENTAL=y
-CONFIG_SYSVIPC=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-CONFIG_EXPERT=y
-# CONFIG_BLK_DEV_BSG is not set
-CONFIG_MPC8560_ADS=y
-CONFIG_BINFMT_MISC=y
-CONFIG_MATH_EMULATION=y
-CONFIG_SPARSE_IRQ=y
-# CONFIG_SECCOMP is not set
-CONFIG_PCI=y
-CONFIG_PCI_DEBUG=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-CONFIG_SYN_COOKIES=y
-# CONFIG_INET_LRO is not set
-# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-# CONFIG_FW_LOADER is not set
-CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_NETDEVICES=y
-CONFIG_MARVELL_PHY=y
-CONFIG_DAVICOM_PHY=y
-CONFIG_NET_ETHERNET=y
-CONFIG_FS_ENET=y
-# CONFIG_FS_ENET_HAS_SCC is not set
-CONFIG_E1000=y
-CONFIG_GIANFAR=y
-# CONFIG_INPUT_MOUSEDEV is not set
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO is not set
-# CONFIG_VT is not set
-CONFIG_SERIAL_CPM=y
-CONFIG_SERIAL_CPM_CONSOLE=y
-CONFIG_GEN_RTC=y
-CONFIG_VIDEO_OUTPUT_CONTROL=y
-CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_INOTIFY=y
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_NFS_FS=y
-CONFIG_ROOT_NFS=y
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_MSDOS_PARTITION is not set
-CONFIG_DEBUG_KERNEL=y
-CONFIG_DETECT_HUNG_TASK=y
-CONFIG_DEBUG_MUTEXES=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig b/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig
deleted file mode 100644
index eb25229b387a..000000000000
--- a/arch/powerpc/configs/85xx/mpc85xx_cds_defconfig
+++ /dev/null
@@ -1,68 +0,0 @@
-CONFIG_PPC_85xx=y
-CONFIG_EXPERIMENTAL=y
-CONFIG_SYSVIPC=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-CONFIG_EXPERT=y
-# CONFIG_BLK_DEV_BSG is not set
-CONFIG_MPC85xx_CDS=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_BINFMT_MISC=y
-CONFIG_MATH_EMULATION=y
-CONFIG_SPARSE_IRQ=y
-# CONFIG_SECCOMP is not set
-CONFIG_PCI=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_XFRM_USER=y
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-CONFIG_SYN_COOKIES=y
-# CONFIG_INET_LRO is not set
-# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-# CONFIG_FW_LOADER is not set
-CONFIG_PROC_DEVICETREE=y
-CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_IDE=y
-CONFIG_BLK_DEV_GENERIC=y
-CONFIG_BLK_DEV_VIA82CXXX=y
-CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-CONFIG_MII=y
-CONFIG_E1000=y
-CONFIG_GIANFAR=y
-# CONFIG_INPUT_MOUSEDEV is not set
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO is not set
-# CONFIG_VT is not set
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-# CONFIG_HW_RANDOM is not set
-CONFIG_GEN_RTC=y
-CONFIG_VIDEO_OUTPUT_CONTROL=y
-CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_INOTIFY=y
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_NFS_FS=y
-CONFIG_ROOT_NFS=y
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_MSDOS_PARTITION is not set
-CONFIG_DEBUG_KERNEL=y
-CONFIG_DETECT_HUNG_TASK=y
-CONFIG_DEBUG_MUTEXES=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/85xx/ppa8548_defconfig b/arch/powerpc/configs/85xx/ppa8548_defconfig
index e80bb9b21eac..4bd5f993d26a 100644
--- a/arch/powerpc/configs/85xx/ppa8548_defconfig
+++ b/arch/powerpc/configs/85xx/ppa8548_defconfig
@@ -1,11 +1,14 @@
CONFIG_PPC_85xx=y
-CONFIG_PPA8548=y
-CONFIG_DTC=y
-CONFIG_DEFAULT_UIMAGE=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
-# CONFIG_PCI is not set
-# CONFIG_USB_SUPPORT is not set
+CONFIG_PPA8548=y
+CONFIG_FSL_LBC=y
+CONFIG_RAPIDIO=y
+CONFIG_FSL_RIO=y
+CONFIG_RAPIDIO_DMA_ENGINE=y
+CONFIG_RAPIDIO_ENUM_BASIC=y
+CONFIG_RAPIDIO_CPS_XX=y
+CONFIG_RAPIDIO_CPS_GEN2=y
CONFIG_ADVANCED_OPTIONS=y
CONFIG_LOWMEM_SIZE_BOOL=y
CONFIG_LOWMEM_SIZE=0x40000000
@@ -14,51 +17,28 @@ CONFIG_LOWMEM_CAM_NUM=4
CONFIG_PAGE_OFFSET_BOOL=y
CONFIG_PAGE_OFFSET=0xb0000000
CONFIG_KERNEL_START_BOOL=y
-CONFIG_KERNEL_START=0xb0000000
-# CONFIG_PHYSICAL_START_BOOL is not set
-CONFIG_PHYSICAL_START=0x00000000
-CONFIG_PHYSICAL_ALIGN=0x04000000
CONFIG_TASK_SIZE_BOOL=y
CONFIG_TASK_SIZE=0xb0000000
-
-CONFIG_FSL_LBC=y
-CONFIG_FSL_DMA=y
-CONFIG_FSL_RIO=y
-
-CONFIG_RAPIDIO=y
-CONFIG_RAPIDIO_DMA_ENGINE=y
-CONFIG_RAPIDIO_TSI57X=y
-CONFIG_RAPIDIO_TSI568=y
-CONFIG_RAPIDIO_CPS_XX=y
-CONFIG_RAPIDIO_CPS_GEN2=y
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_PROC_DEVICETREE=y
-
+CONFIG_NET=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
CONFIG_MTD=y
-CONFIG_MTD_BLKDEVS=y
+CONFIG_MTD_CMDLINE_PARTS=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
-CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_CFI_INTELEXT=y
-CONFIG_MTD_CHAR=y
-CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CONCAT=y
+CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_PHYSMAP_OF=y
-
+CONFIG_NETDEVICES=y
+CONFIG_GIANFAR=y
+CONFIG_MARVELL_PHY=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_I2C=y
CONFIG_I2C_MPC=y
-CONFIG_I2C_CHARDEV
+# CONFIG_USB_SUPPORT is not set
CONFIG_RTC_CLASS=y
-CONFIG_RTC_HCTOSYS=y
CONFIG_RTC_DRV_ISL1208=y
-
-CONFIG_NET=y
-CONFIG_INET=y
-CONFIG_IP_PNP=y
-CONFIG_NETDEVICES=y
-CONFIG_MII=y
-CONFIG_GIANFAR=y
-CONFIG_MARVELL_PHY=y
+CONFIG_FSL_DMA=y
CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y
diff --git a/arch/powerpc/configs/85xx/sbc8548_defconfig b/arch/powerpc/configs/85xx/sbc8548_defconfig
deleted file mode 100644
index 008a7a47b89b..000000000000
--- a/arch/powerpc/configs/85xx/sbc8548_defconfig
+++ /dev/null
@@ -1,76 +0,0 @@
-CONFIG_PPC_85xx=y
-CONFIG_EXPERIMENTAL=y
-CONFIG_SYSVIPC=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-CONFIG_EXPERT=y
-CONFIG_SLAB=y
-# CONFIG_BLK_DEV_BSG is not set
-CONFIG_SBC8548=y
-CONFIG_BINFMT_MISC=y
-CONFIG_MATH_EMULATION=y
-CONFIG_SPARSE_IRQ=y
-# CONFIG_SECCOMP is not set
-CONFIG_PCI=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_XFRM_USER=y
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-CONFIG_SYN_COOKIES=y
-# CONFIG_INET_LRO is not set
-# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-# CONFIG_FW_LOADER is not set
-CONFIG_PROC_DEVICETREE=y
-CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_NETDEVICES=y
-CONFIG_BROADCOM_PHY=y
-CONFIG_NET_ETHERNET=y
-CONFIG_MII=y
-CONFIG_GIANFAR=y
-# CONFIG_INPUT_MOUSEDEV is not set
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO is not set
-# CONFIG_VT is not set
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-# CONFIG_HW_RANDOM is not set
-CONFIG_GEN_RTC=y
-CONFIG_VIDEO_OUTPUT_CONTROL=y
-# CONFIG_HID_SUPPORT is not set
-# CONFIG_USB_SUPPORT is not set
-CONFIG_INOTIFY=y
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_NFS_FS=y
-CONFIG_ROOT_NFS=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_MTD=y
-CONFIG_MTD_OF_PARTS=y
-CONFIG_MTD_CHAR=y
-CONFIG_MTD_BLKDEVS=y
-CONFIG_MTD_BLOCK=y
-CONFIG_MTD_CFI=y
-CONFIG_MTD_GEN_PROBE=y
-CONFIG_MTD_CFI_ADV_OPTIONS=y
-CONFIG_MTD_CFI_NOSWAP=y
-CONFIG_MTD_CFI_GEOMETRY=y
-CONFIG_MTD_MAP_BANK_WIDTH_1=y
-CONFIG_MTD_MAP_BANK_WIDTH_2=y
-CONFIG_MTD_MAP_BANK_WIDTH_4=y
-CONFIG_MTD_CFI_I1=y
-CONFIG_MTD_CFI_I2=y
-CONFIG_MTD_CFI_I4=y
-CONFIG_MTD_CFI_INTELEXT=y
-CONFIG_MTD_CFI_UTIL=y
-CONFIG_MTD_PHYSMAP_OF=y
diff --git a/arch/powerpc/configs/85xx/socrates_defconfig b/arch/powerpc/configs/85xx/socrates_defconfig
index 435fd408eef1..7037a6d8018c 100644
--- a/arch/powerpc/configs/85xx/socrates_defconfig
+++ b/arch/powerpc/configs/85xx/socrates_defconfig
@@ -1,20 +1,17 @@
CONFIG_PPC_85xx=y
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_LOG_BUF_SHIFT=16
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
# CONFIG_KALLSYMS is not set
-# CONFIG_HOTPLUG is not set
# CONFIG_EPOLL is not set
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
CONFIG_SOCRATES=y
CONFIG_MATH_EMULATION=y
-CONFIG_SPARSE_IRQ=y
CONFIG_PCI=y
CONFIG_NET=y
CONFIG_PACKET=y
@@ -25,22 +22,16 @@ CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
CONFIG_CAN=y
-CONFIG_CAN_RAW=y
-CONFIG_CAN_BCM=y
CONFIG_MTD=y
-CONFIG_MTD_CONCAT=y
CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_OF_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_JEDECPROBE=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_MTD_NAND=y
+CONFIG_MTD_RAW_NAND=y
CONFIG_MTD_NAND_SOCRATES=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
@@ -49,13 +40,8 @@ CONFIG_SCSI=y
CONFIG_BLK_DEV_SD=y
# CONFIG_SCSI_LOWLEVEL is not set
CONFIG_NETDEVICES=y
-CONFIG_MARVELL_PHY=y
-CONFIG_NET_ETHERNET=y
-CONFIG_MII=y
CONFIG_GIANFAR=y
-# CONFIG_NETDEV_10000 is not set
-CONFIG_INPUT_MOUSEDEV_SCREEN_X=800
-CONFIG_INPUT_MOUSEDEV_SCREEN_Y=480
+CONFIG_MARVELL_PHY=y
CONFIG_INPUT_EVDEV=y
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
@@ -65,7 +51,6 @@ CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_8250_NR_UARTS=2
CONFIG_SERIAL_8250_RUNTIME_UARTS=2
-CONFIG_SERIAL_8250_EXTENDED=y
CONFIG_SERIAL_8250_MANY_PORTS=y
CONFIG_SERIAL_8250_DETECT_IRQ=y
CONFIG_SERIAL_8250_RSA=y
@@ -82,8 +67,6 @@ CONFIG_FB_MB862XX=y
CONFIG_FB_MB862XX_LIME=y
# CONFIG_VGA_CONSOLE is not set
CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_FONTS=y
-CONFIG_FONT_8x16=y
CONFIG_USB=y
CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
CONFIG_USB_MON=y
@@ -93,17 +76,11 @@ CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
CONFIG_USB_STORAGE=y
CONFIG_RTC_CLASS=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_INOTIFY=y
+CONFIG_EXT4_FS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_JFFS2_FS=y
CONFIG_CRAMFS=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_ROOT_NFS=y
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
+CONFIG_FONTS=y
diff --git a/arch/powerpc/configs/85xx/stx_gp3_defconfig b/arch/powerpc/configs/85xx/stx_gp3_defconfig
index 5d4db154bf59..0a42072fa23c 100644
--- a/arch/powerpc/configs/85xx/stx_gp3_defconfig
+++ b/arch/powerpc/configs/85xx/stx_gp3_defconfig
@@ -1,9 +1,7 @@
CONFIG_PPC_85xx=y
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
CONFIG_MODULES=y
CONFIG_MODVERSIONS=y
@@ -12,7 +10,6 @@ CONFIG_STX_GP3=y
CONFIG_HIGHMEM=y
CONFIG_BINFMT_MISC=m
CONFIG_MATH_EMULATION=y
-CONFIG_SPARSE_IRQ=y
CONFIG_PCI=y
CONFIG_NET=y
CONFIG_PACKET=y
@@ -20,36 +17,27 @@ CONFIG_UNIX=y
CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
CONFIG_NETFILTER=y
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_FILTER=m
CONFIG_NET_PKTGEN=y
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
-CONFIG_PROC_DEVICETREE=y
CONFIG_PARPORT=m
CONFIG_PARPORT_PC=m
CONFIG_BLK_DEV_LOOP=m
CONFIG_BLK_DEV_NBD=m
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_IDE=y
-CONFIG_BLK_DEV_IDECD=m
CONFIG_SCSI=m
CONFIG_BLK_DEV_SD=m
CONFIG_CHR_DEV_ST=m
CONFIG_BLK_DEV_SR=m
CONFIG_CHR_DEV_SG=m
-CONFIG_SCSI_MULTI_LUN=y
CONFIG_SCSI_CONSTANTS=y
CONFIG_NETDEVICES=y
-CONFIG_MARVELL_PHY=y
-CONFIG_NET_ETHERNET=y
CONFIG_GIANFAR=y
-CONFIG_INPUT_MOUSEDEV_SCREEN_X=1280
-CONFIG_INPUT_MOUSEDEV_SCREEN_Y=1024
+CONFIG_MARVELL_PHY=y
CONFIG_INPUT_JOYDEV=m
CONFIG_INPUT_EVDEV=m
# CONFIG_VT is not set
@@ -61,11 +49,8 @@ CONFIG_AGP=m
CONFIG_DRM=m
CONFIG_SOUND=m
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_INOTIFY=y
-CONFIG_AUTOFS_FS=m
-CONFIG_AUTOFS4_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_AUTOFS_FS=y
CONFIG_ISO9660_FS=m
CONFIG_UDF_FS=m
CONFIG_MSDOS_FS=m
@@ -73,16 +58,8 @@ CONFIG_VFAT_FS=m
CONFIG_TMPFS=y
CONFIG_CRAMFS=m
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_ROOT_NFS=y
-CONFIG_SMB_FS=m
CONFIG_NLS=y
-CONFIG_CRC_CCITT=y
-CONFIG_CRC_T10DIF=m
-CONFIG_DEBUG_KERNEL=y
CONFIG_DETECT_HUNG_TASK=y
# CONFIG_DEBUG_BUGVERBOSE is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_BDI_SWITCH=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/85xx/tqm8540_defconfig b/arch/powerpc/configs/85xx/tqm8540_defconfig
index 5a800e6e38e3..bbf040aa1f9a 100644
--- a/arch/powerpc/configs/85xx/tqm8540_defconfig
+++ b/arch/powerpc/configs/85xx/tqm8540_defconfig
@@ -1,17 +1,16 @@
CONFIG_PPC_85xx=y
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
# CONFIG_KALLSYMS is not set
-# CONFIG_HOTPLUG is not set
# CONFIG_EPOLL is not set
# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_MSDOS_PARTITION is not set
CONFIG_TQM8540=y
+CONFIG_GEN_RTC=y
CONFIG_MATH_EMULATION=y
-CONFIG_SPARSE_IRQ=y
CONFIG_PCI=y
CONFIG_NET=y
CONFIG_PACKET=y
@@ -22,51 +21,37 @@ CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
CONFIG_MTD=y
-CONFIG_MTD_CONCAT=y
CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_IDE=y
-CONFIG_BLK_DEV_GENERIC=y
-CONFIG_BLK_DEV_VIA82CXXX=y
+CONFIG_ATA=y
+CONFIG_ATA_GENERIC=y
+CONFIG_PATA_VIA=y
CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-CONFIG_NET_PCI=y
-CONFIG_E100=y
CONFIG_GIANFAR=y
-# CONFIG_INPUT_MOUSEDEV is not set
+CONFIG_E100=y
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
# CONFIG_VT is not set
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_GEN_RTC=y
CONFIG_I2C=y
CONFIG_I2C_CHARDEV=y
CONFIG_I2C_MPC=y
CONFIG_HWMON_DEBUG_CHIP=y
CONFIG_SENSORS_LM75=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_INOTIFY=y
+CONFIG_EXT4_FS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_JFFS2_FS=y
CONFIG_CRAMFS=y
CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_MSDOS_PARTITION is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/85xx/tqm8541_defconfig b/arch/powerpc/configs/85xx/tqm8541_defconfig
index 2d936697d69e..523ad8dcfd9d 100644
--- a/arch/powerpc/configs/85xx/tqm8541_defconfig
+++ b/arch/powerpc/configs/85xx/tqm8541_defconfig
@@ -1,17 +1,16 @@
CONFIG_PPC_85xx=y
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
# CONFIG_KALLSYMS is not set
-# CONFIG_HOTPLUG is not set
# CONFIG_EPOLL is not set
# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_MSDOS_PARTITION is not set
CONFIG_TQM8541=y
+CONFIG_GEN_RTC=y
CONFIG_MATH_EMULATION=y
-CONFIG_SPARSE_IRQ=y
CONFIG_PCI=y
CONFIG_NET=y
CONFIG_PACKET=y
@@ -22,27 +21,21 @@ CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
CONFIG_MTD=y
-CONFIG_MTD_CONCAT=y
CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_IDE=y
-CONFIG_BLK_DEV_GENERIC=y
-CONFIG_BLK_DEV_VIA82CXXX=y
+CONFIG_ATA=y
+CONFIG_ATA_GENERIC=y
+CONFIG_PATA_VIA=y
CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-CONFIG_NET_PCI=y
-CONFIG_E100=y
CONFIG_GIANFAR=y
-# CONFIG_INPUT_MOUSEDEV is not set
+CONFIG_E100=y
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
@@ -51,24 +44,16 @@ CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_CPM=y
CONFIG_SERIAL_CPM_CONSOLE=y
-CONFIG_GEN_RTC=y
CONFIG_I2C=y
CONFIG_I2C_CHARDEV=y
CONFIG_I2C_MPC=y
CONFIG_HWMON_DEBUG_CHIP=y
CONFIG_SENSORS_LM75=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_INOTIFY=y
+CONFIG_EXT4_FS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_JFFS2_FS=y
CONFIG_CRAMFS=y
CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_MSDOS_PARTITION is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/85xx/tqm8548_defconfig b/arch/powerpc/configs/85xx/tqm8548_defconfig
index ce8a67e89473..afa1b9b633f8 100644
--- a/arch/powerpc/configs/85xx/tqm8548_defconfig
+++ b/arch/powerpc/configs/85xx/tqm8548_defconfig
@@ -1,20 +1,19 @@
CONFIG_PPC_85xx=y
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_MSDOS_PARTITION is not set
CONFIG_TQM8548=y
CONFIG_HIGHMEM=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
CONFIG_BINFMT_MISC=y
CONFIG_MATH_EMULATION=y
-CONFIG_SPARSE_IRQ=y
# CONFIG_SECCOMP is not set
CONFIG_PCI=y
CONFIG_PCIEPORTBUS=y
@@ -29,29 +28,20 @@ CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
-CONFIG_MTD_OF_PARTS=y
-CONFIG_MTD_CHAR=y
-CONFIG_MTD_BLKDEVS=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_MTD_NAND_ECC_SMC=y
-CONFIG_MTD_NAND=y
+CONFIG_MTD_NAND_ECC_SW_HAMMING_SMC=y
+CONFIG_MTD_RAW_NAND=y
CONFIG_MTD_NAND_FSL_UPM=y
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=32768
CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-CONFIG_MII=y
CONFIG_GIANFAR=y
-# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
@@ -63,22 +53,14 @@ CONFIG_I2C=y
CONFIG_I2C_CHARDEV=y
CONFIG_I2C_MPC=y
CONFIG_SENSORS_LM75=y
-CONFIG_VIDEO_OUTPUT_CONTROL=y
# CONFIG_USB_SUPPORT is not set
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_DS1307=y
-CONFIG_INOTIFY=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_JFFS2_FS=y
CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_MSDOS_PARTITION is not set
-CONFIG_DEBUG_KERNEL=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_DEBUG_MUTEXES=y
# CONFIG_DEBUG_BUGVERBOSE is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/85xx/tqm8555_defconfig b/arch/powerpc/configs/85xx/tqm8555_defconfig
index a4e12971ccac..0032ce1e8c9c 100644
--- a/arch/powerpc/configs/85xx/tqm8555_defconfig
+++ b/arch/powerpc/configs/85xx/tqm8555_defconfig
@@ -1,17 +1,16 @@
CONFIG_PPC_85xx=y
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
# CONFIG_KALLSYMS is not set
-# CONFIG_HOTPLUG is not set
# CONFIG_EPOLL is not set
# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_MSDOS_PARTITION is not set
CONFIG_TQM8555=y
+CONFIG_GEN_RTC=y
CONFIG_MATH_EMULATION=y
-CONFIG_SPARSE_IRQ=y
CONFIG_PCI=y
CONFIG_NET=y
CONFIG_PACKET=y
@@ -22,27 +21,21 @@ CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
CONFIG_MTD=y
-CONFIG_MTD_CONCAT=y
CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_IDE=y
-CONFIG_BLK_DEV_GENERIC=y
-CONFIG_BLK_DEV_VIA82CXXX=y
+CONFIG_ATA=y
+CONFIG_ATA_GENERIC=y
+CONFIG_PATA_VIA=y
CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-CONFIG_NET_PCI=y
-CONFIG_E100=y
CONFIG_GIANFAR=y
-# CONFIG_INPUT_MOUSEDEV is not set
+CONFIG_E100=y
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
@@ -51,24 +44,16 @@ CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_CPM=y
CONFIG_SERIAL_CPM_CONSOLE=y
-CONFIG_GEN_RTC=y
CONFIG_I2C=y
CONFIG_I2C_CHARDEV=y
CONFIG_I2C_MPC=y
CONFIG_HWMON_DEBUG_CHIP=y
CONFIG_SENSORS_LM75=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_INOTIFY=y
+CONFIG_EXT4_FS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_JFFS2_FS=y
CONFIG_CRAMFS=y
CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_MSDOS_PARTITION is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/85xx/tqm8560_defconfig b/arch/powerpc/configs/85xx/tqm8560_defconfig
index 341abe18a74d..a80b971f7d6e 100644
--- a/arch/powerpc/configs/85xx/tqm8560_defconfig
+++ b/arch/powerpc/configs/85xx/tqm8560_defconfig
@@ -1,17 +1,16 @@
CONFIG_PPC_85xx=y
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
# CONFIG_KALLSYMS is not set
-# CONFIG_HOTPLUG is not set
# CONFIG_EPOLL is not set
# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_MSDOS_PARTITION is not set
CONFIG_TQM8560=y
+CONFIG_GEN_RTC=y
CONFIG_MATH_EMULATION=y
-CONFIG_SPARSE_IRQ=y
CONFIG_PCI=y
CONFIG_NET=y
CONFIG_PACKET=y
@@ -22,27 +21,21 @@ CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
CONFIG_MTD=y
-CONFIG_MTD_CONCAT=y
CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_IDE=y
-CONFIG_BLK_DEV_GENERIC=y
-CONFIG_BLK_DEV_VIA82CXXX=y
+CONFIG_ATA=y
+CONFIG_ATA_GENERIC=y
+CONFIG_PATA_VIA=y
CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-CONFIG_NET_PCI=y
-CONFIG_E100=y
CONFIG_GIANFAR=y
-# CONFIG_INPUT_MOUSEDEV is not set
+CONFIG_E100=y
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
@@ -51,24 +44,16 @@ CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_CPM=y
CONFIG_SERIAL_CPM_CONSOLE=y
-CONFIG_GEN_RTC=y
CONFIG_I2C=y
CONFIG_I2C_CHARDEV=y
CONFIG_I2C_MPC=y
CONFIG_HWMON_DEBUG_CHIP=y
CONFIG_SENSORS_LM75=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_INOTIFY=y
+CONFIG_EXT4_FS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_JFFS2_FS=y
CONFIG_CRAMFS=y
CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_MSDOS_PARTITION is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig
index 72df8ab8449e..488d03ae6d6c 100644
--- a/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig
+++ b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig
@@ -1,29 +1,25 @@
CONFIG_PPC_85xx=y
CONFIG_SMP=y
CONFIG_NR_CPUS=2
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
-CONFIG_BSD_PROCESS_ACCT=y
CONFIG_AUDIT=y
+CONFIG_BSD_PROCESS_ACCT=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
CONFIG_KALLSYMS_ALL=y
-CONFIG_KALLSYMS_EXTRA_PASS=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_MODVERSIONS=y
# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
CONFIG_XES_MPC85xx=y
-CONFIG_GPIO_MPC8XXX=y
CONFIG_HIGHMEM=y
CONFIG_MATH_EMULATION=y
-CONFIG_SPARSE_IRQ=y
CONFIG_PCI=y
CONFIG_PCIEPORTBUS=y
# CONFIG_PCIEASPM is not set
@@ -52,23 +48,12 @@ CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_IP_PNP_RARP=y
CONFIG_NET_IPIP=y
-CONFIG_NET_IPGRE=y
-CONFIG_NET_IPGRE_BROADCAST=y
CONFIG_IP_MROUTE=y
CONFIG_IP_PIMSM_V1=y
CONFIG_IP_PIMSM_V2=y
-CONFIG_ARPD=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
-CONFIG_IPV6=y
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_MTD=y
CONFIG_MTD_REDBOOT_PARTS=y
CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_OF_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_JEDECPROBE=y
@@ -76,10 +61,9 @@ CONFIG_MTD_CFI_INTELEXT=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_CFI_STAA=y
CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_MTD_NAND=y
+CONFIG_MTD_RAW_NAND=y
CONFIG_MTD_NAND_FSL_ELBC=y
CONFIG_MTD_NAND_FSL_UPM=y
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_NBD=y
CONFIG_BLK_DEV_RAM=y
@@ -88,20 +72,15 @@ CONFIG_BLK_DEV_SD=y
CONFIG_CHR_DEV_ST=y
CONFIG_BLK_DEV_SR=y
CONFIG_CHR_DEV_SG=y
-CONFIG_SCSI_MULTI_LUN=y
CONFIG_SCSI_LOGGING=y
CONFIG_ATA=y
CONFIG_SATA_AHCI=y
CONFIG_PATA_ALI=y
CONFIG_NETDEVICES=y
CONFIG_DUMMY=y
-CONFIG_BROADCOM_PHY=y
-CONFIG_NET_ETHERNET=y
-CONFIG_MII=y
-CONFIG_E1000=y
CONFIG_GIANFAR=y
-# CONFIG_NETDEV_10000 is not set
-# CONFIG_INPUT_MOUSEDEV is not set
+CONFIG_E1000=y
+CONFIG_BROADCOM_PHY=y
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
CONFIG_SERIO_LIBPS2=y
@@ -109,7 +88,6 @@ CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_8250_NR_UARTS=2
CONFIG_SERIAL_8250_RUNTIME_UARTS=2
-CONFIG_SERIAL_8250_EXTENDED=y
CONFIG_SERIAL_8250_MANY_PORTS=y
CONFIG_SERIAL_8250_DETECT_IRQ=y
CONFIG_SERIAL_8250_RSA=y
@@ -118,36 +96,27 @@ CONFIG_NVRAM=y
CONFIG_I2C=y
CONFIG_I2C_CHARDEV=y
CONFIG_I2C_MPC=y
-CONFIG_GPIO_SYSFS=y
-CONFIG_GPIO_PCA953X=y
CONFIG_SENSORS_DS1621=y
CONFIG_SENSORS_LM90=y
CONFIG_WATCHDOG=y
-CONFIG_VIDEO_OUTPUT_CONTROL=y
CONFIG_USB=y
-# CONFIG_USB_DEVICE_CLASS is not set
CONFIG_USB_MON=y
-CONFIG_USB_ISP1760_HCD=y
CONFIG_USB_STORAGE=y
+CONFIG_USB_ISP1760=y
CONFIG_NEW_LEDS=y
CONFIG_LEDS_CLASS=y
-CONFIG_LEDS_GPIO=y
CONFIG_LEDS_PCA955X=y
CONFIG_LEDS_TRIGGERS=y
CONFIG_LEDS_TRIGGER_TIMER=y
CONFIG_LEDS_TRIGGER_HEARTBEAT=y
-CONFIG_LEDS_TRIGGER_GPIO=y
CONFIG_EDAC=y
-CONFIG_EDAC_MM_EDAC=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_DS1307=y
CONFIG_RTC_DRV_CMOS=y
CONFIG_DMADEVICES=y
CONFIG_FSL_DMA=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_INOTIFY=y
+CONFIG_EXT4_FS=y
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
CONFIG_ZISOFS=y
@@ -159,17 +128,11 @@ CONFIG_TMPFS=y
CONFIG_JFFS2_FS=y
CONFIG_JFFS2_SUMMARY=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_ROOT_NFS=y
CONFIG_NFSD=y
-CONFIG_PARTITION_ADVANCED=y
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ISO8859_1=y
-CONFIG_CRC_T10DIF=y
-CONFIG_DEBUG_KERNEL=y
CONFIG_DETECT_HUNG_TASK=y
# CONFIG_DEBUG_BUGVERBOSE is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_MD5=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/86xx-hw.config b/arch/powerpc/configs/86xx-hw.config
new file mode 100644
index 000000000000..e7bd265fae5a
--- /dev/null
+++ b/arch/powerpc/configs/86xx-hw.config
@@ -0,0 +1,101 @@
+CONFIG_ATA=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_BLK_DEV_SR=y
+CONFIG_BROADCOM_PHY=y
+# CONFIG_CARDBUS is not set
+CONFIG_CHR_DEV_SG=y
+CONFIG_CHR_DEV_ST=y
+CONFIG_CRYPTO_HMAC=y
+CONFIG_DS1682=y
+CONFIG_EEPROM_LEGACY=y
+CONFIG_GEF_WDT=y
+CONFIG_GIANFAR=y
+CONFIG_GPIO_GE_FPGA=y
+CONFIG_GPIO_SYSFS=y
+CONFIG_HID_A4TECH=y
+CONFIG_HID_APPLE=y
+CONFIG_HID_BELKIN=y
+CONFIG_HID_CHERRY=y
+CONFIG_HID_CHICONY=y
+CONFIG_HID_CYPRESS=y
+CONFIG_HID_EZKEY=y
+CONFIG_HID_GYRATION=y
+CONFIG_HID_LOGITECH=y
+CONFIG_HID_MICROSOFT=y
+CONFIG_HID_MONTEREY=y
+CONFIG_HID_PANTHERLORD=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_SAMSUNG=y
+CONFIG_HID_SUNPLUS=y
+CONFIG_HW_RANDOM=y
+CONFIG_HZ_1000=y
+CONFIG_I2C_MPC=y
+CONFIG_I2C=y
+CONFIG_INPUT_FF_MEMLESS=m
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSEDEV is not set
+# CONFIG_INPUT_MOUSE is not set
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI_ADV_OPTIONS=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_CFI_INTELEXT=y
+CONFIG_MTD_CFI_LE_BYTE_SWAP=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_JEDECPROBE=y
+CONFIG_MTD_NAND_FSL_ELBC=y
+CONFIG_MTD_RAW_NAND=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_NETDEVICES=y
+CONFIG_NET_TULIP=y
+CONFIG_NVRAM=y
+CONFIG_PATA_ALI=y
+CONFIG_PCCARD=y
+CONFIG_PCI_DEBUG=y
+# CONFIG_PCIEASPM is not set
+CONFIG_PCIEPORTBUS=y
+CONFIG_PCI=y
+# CONFIG_PCMCIA_LOAD_CIS is not set
+# CONFIG_PPC_CHRP is not set
+# CONFIG_PPC_PMAC is not set
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_CMOS=y
+CONFIG_RTC_DRV_RX8581=y
+CONFIG_SATA_AHCI=y
+CONFIG_SATA_SIL24=y
+CONFIG_SATA_SIL=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_SENSORS_LM90=y
+CONFIG_SENSORS_LM92=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_DETECT_IRQ=y
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_MANY_PORTS=y
+CONFIG_SERIAL_8250_NR_UARTS=5
+CONFIG_SERIAL_8250_RSA=y
+CONFIG_SERIAL_8250_RUNTIME_UARTS=5
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIO_LIBPS2=y
+CONFIG_SND_INTEL8X0=y
+CONFIG_SND_MIXER_OSS=y
+CONFIG_SND_PCM_OSS=y
+# CONFIG_SND_SUPPORT_OLD_API is not set
+CONFIG_SND=y
+CONFIG_SOUND=y
+CONFIG_ULI526X=y
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_MON=y
+CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
+CONFIG_USB_OHCI_HCD_PPC_OF_LE=y
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_STORAGE=y
+CONFIG_USB=y
+CONFIG_VITESSE_PHY=y
+CONFIG_VME_BUS=y
+CONFIG_VME_TSI148=y
+CONFIG_WATCHDOG=y
+# CONFIG_YENTA_O2 is not set
+# CONFIG_YENTA_RICOH is not set
+# CONFIG_YENTA_TOSHIBA is not set
+CONFIG_YENTA=y
diff --git a/arch/powerpc/configs/86xx-smp.config b/arch/powerpc/configs/86xx-smp.config
new file mode 100644
index 000000000000..40ac38d3038c
--- /dev/null
+++ b/arch/powerpc/configs/86xx-smp.config
@@ -0,0 +1,2 @@
+CONFIG_NR_CPUS=2
+CONFIG_SMP=y
diff --git a/arch/powerpc/configs/86xx/gef_ppc9a_defconfig b/arch/powerpc/configs/86xx/gef_ppc9a_defconfig
deleted file mode 100644
index e5a648115ada..000000000000
--- a/arch/powerpc/configs/86xx/gef_ppc9a_defconfig
+++ /dev/null
@@ -1,237 +0,0 @@
-CONFIG_SMP=y
-CONFIG_NR_CPUS=2
-CONFIG_EXPERIMENTAL=y
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_BSD_PROCESS_ACCT=y
-CONFIG_BSD_PROCESS_ACCT_V3=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_RELAY=y
-CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-CONFIG_EXPERT=y
-CONFIG_SLAB=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_PPC_CHRP is not set
-# CONFIG_PPC_PMAC is not set
-CONFIG_PPC_86xx=y
-CONFIG_GEF_PPC9A=y
-CONFIG_HIGHMEM=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_HZ_1000=y
-CONFIG_PREEMPT=y
-CONFIG_BINFMT_MISC=m
-CONFIG_SPARSE_IRQ=y
-CONFIG_PCI=y
-CONFIG_PCIEPORTBUS=y
-# CONFIG_PCIEASPM is not set
-CONFIG_PCCARD=y
-# CONFIG_PCMCIA_LOAD_CIS is not set
-# CONFIG_CARDBUS is not set
-CONFIG_YENTA=y
-# CONFIG_YENTA_O2 is not set
-# CONFIG_YENTA_RICOH is not set
-# CONFIG_YENTA_TOSHIBA is not set
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_XFRM_USER=m
-CONFIG_NET_KEY=m
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_ADVANCED_ROUTER=y
-CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_ROUTE_MULTIPATH=y
-CONFIG_IP_ROUTE_VERBOSE=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-CONFIG_IP_PNP_RARP=y
-CONFIG_NET_IPIP=m
-CONFIG_NET_IPGRE=m
-CONFIG_NET_IPGRE_BROADCAST=y
-CONFIG_IP_MROUTE=y
-CONFIG_IP_PIMSM_V1=y
-CONFIG_IP_PIMSM_V2=y
-CONFIG_SYN_COOKIES=y
-CONFIG_INET_AH=m
-CONFIG_INET_ESP=m
-CONFIG_INET_IPCOMP=m
-# CONFIG_INET_XFRM_MODE_BEET is not set
-CONFIG_INET6_AH=m
-CONFIG_INET6_ESP=m
-CONFIG_INET6_IPCOMP=m
-CONFIG_IPV6_TUNNEL=m
-CONFIG_NET_PKTGEN=m
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-CONFIG_MTD=y
-CONFIG_MTD_CONCAT=y
-CONFIG_MTD_OF_PARTS=y
-CONFIG_MTD_CHAR=y
-CONFIG_MTD_BLOCK=y
-CONFIG_MTD_CFI=y
-CONFIG_MTD_JEDECPROBE=y
-CONFIG_MTD_CFI_INTELEXT=y
-CONFIG_MTD_CFI_AMDSTD=y
-CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_BLK_DEV_LOOP=m
-CONFIG_BLK_DEV_CRYPTOLOOP=m
-CONFIG_BLK_DEV_NBD=m
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=131072
-CONFIG_MISC_DEVICES=y
-CONFIG_DS1682=y
-CONFIG_IDE=y
-CONFIG_BLK_DEV_IDECS=y
-CONFIG_BLK_DEV_SD=y
-CONFIG_CHR_DEV_ST=y
-CONFIG_BLK_DEV_SR=y
-CONFIG_ATA=y
-CONFIG_SATA_SIL=y
-CONFIG_NETDEVICES=y
-CONFIG_DUMMY=m
-CONFIG_BONDING=m
-CONFIG_TUN=m
-CONFIG_NET_ETHERNET=y
-CONFIG_MII=y
-CONFIG_GIANFAR=y
-# CONFIG_NETDEV_10000 is not set
-CONFIG_PPP=m
-CONFIG_PPP_MULTILINK=y
-CONFIG_PPP_FILTER=y
-CONFIG_PPP_ASYNC=m
-CONFIG_PPP_SYNC_TTY=m
-CONFIG_PPP_DEFLATE=m
-CONFIG_PPP_BSDCOMP=m
-CONFIG_PPPOE=m
-CONFIG_SLIP=m
-CONFIG_SLIP_COMPRESSED=y
-CONFIG_SLIP_SMART=y
-CONFIG_SLIP_MODE_SLIP6=y
-CONFIG_NETCONSOLE=y
-CONFIG_NETPOLL_TRAP=y
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO is not set
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-# CONFIG_SERIAL_8250_PCI is not set
-CONFIG_SERIAL_8250_NR_UARTS=2
-CONFIG_SERIAL_8250_RUNTIME_UARTS=2
-# CONFIG_LEGACY_PTYS is not set
-CONFIG_HW_RANDOM=y
-CONFIG_NVRAM=y
-CONFIG_I2C=y
-CONFIG_I2C_CHARDEV=y
-CONFIG_I2C_MPC=y
-CONFIG_GPIO_SYSFS=y
-CONFIG_GPIO_GE_FPGA=y
-CONFIG_SENSORS_LM90=y
-CONFIG_SENSORS_LM92=y
-CONFIG_WATCHDOG=y
-CONFIG_GEF_WDT=y
-CONFIG_VIDEO_OUTPUT_CONTROL=m
-CONFIG_HID_A4TECH=y
-CONFIG_HID_APPLE=y
-CONFIG_HID_BELKIN=y
-CONFIG_HID_CHERRY=y
-CONFIG_HID_CHICONY=y
-CONFIG_HID_CYPRESS=y
-CONFIG_HID_EZKEY=y
-CONFIG_HID_GYRATION=y
-CONFIG_HID_LOGITECH=y
-CONFIG_HID_MICROSOFT=y
-CONFIG_HID_MONTEREY=y
-CONFIG_HID_PANTHERLORD=y
-CONFIG_HID_PETALYNX=y
-CONFIG_HID_SAMSUNG=y
-CONFIG_HID_SONY=y
-CONFIG_HID_SUNPLUS=y
-CONFIG_USB=y
-# CONFIG_USB_DEVICE_CLASS is not set
-CONFIG_USB_EHCI_HCD=y
-# CONFIG_USB_EHCI_HCD_PPC_OF is not set
-CONFIG_USB_OHCI_HCD=y
-CONFIG_USB_STORAGE=y
-CONFIG_RTC_CLASS=y
-# CONFIG_RTC_INTF_PROC is not set
-CONFIG_RTC_DRV_RX8581=y
-CONFIG_STAGING=y
-# CONFIG_STAGING_EXCLUDE_BUILD is not set
-CONFIG_VME_BUS=y
-CONFIG_VME_TSI148=y
-CONFIG_EXT2_FS=y
-CONFIG_EXT2_FS_XATTR=y
-CONFIG_EXT2_FS_POSIX_ACL=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_INOTIFY=y
-CONFIG_ISO9660_FS=y
-CONFIG_JOLIET=y
-CONFIG_ZISOFS=y
-CONFIG_UDF_FS=y
-CONFIG_MSDOS_FS=y
-CONFIG_VFAT_FS=y
-CONFIG_FAT_DEFAULT_CODEPAGE=850
-CONFIG_FAT_DEFAULT_IOCHARSET="ascii"
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_JFFS2_FS=y
-CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
-CONFIG_NFS_V4=y
-CONFIG_ROOT_NFS=y
-CONFIG_CIFS=m
-CONFIG_CIFS_XATTR=y
-CONFIG_CIFS_POSIX=y
-CONFIG_NLS_CODEPAGE_437=m
-CONFIG_NLS_CODEPAGE_737=m
-CONFIG_NLS_CODEPAGE_775=m
-CONFIG_NLS_CODEPAGE_850=m
-CONFIG_NLS_CODEPAGE_852=m
-CONFIG_NLS_CODEPAGE_855=m
-CONFIG_NLS_CODEPAGE_857=m
-CONFIG_NLS_CODEPAGE_860=m
-CONFIG_NLS_CODEPAGE_861=m
-CONFIG_NLS_CODEPAGE_862=m
-CONFIG_NLS_CODEPAGE_863=m
-CONFIG_NLS_CODEPAGE_864=m
-CONFIG_NLS_CODEPAGE_865=m
-CONFIG_NLS_CODEPAGE_866=m
-CONFIG_NLS_CODEPAGE_869=m
-CONFIG_NLS_CODEPAGE_936=m
-CONFIG_NLS_CODEPAGE_950=m
-CONFIG_NLS_CODEPAGE_932=m
-CONFIG_NLS_CODEPAGE_949=m
-CONFIG_NLS_CODEPAGE_874=m
-CONFIG_NLS_ISO8859_8=m
-CONFIG_NLS_CODEPAGE_1250=m
-CONFIG_NLS_CODEPAGE_1251=m
-CONFIG_NLS_ASCII=m
-CONFIG_NLS_ISO8859_1=m
-CONFIG_NLS_ISO8859_2=m
-CONFIG_NLS_ISO8859_3=m
-CONFIG_NLS_ISO8859_4=m
-CONFIG_NLS_ISO8859_5=m
-CONFIG_NLS_ISO8859_6=m
-CONFIG_NLS_ISO8859_7=m
-CONFIG_NLS_ISO8859_9=m
-CONFIG_NLS_ISO8859_13=m
-CONFIG_NLS_ISO8859_14=m
-CONFIG_NLS_ISO8859_15=m
-CONFIG_NLS_KOI8_R=m
-CONFIG_NLS_KOI8_U=m
-CONFIG_NLS_UTF8=m
-CONFIG_CRC_CCITT=y
-CONFIG_CRC_T10DIF=y
-CONFIG_LIBCRC32C=y
-CONFIG_MAGIC_SYSRQ=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/86xx/gef_sbc310_defconfig b/arch/powerpc/configs/86xx/gef_sbc310_defconfig
deleted file mode 100644
index 8317b6010ba6..000000000000
--- a/arch/powerpc/configs/86xx/gef_sbc310_defconfig
+++ /dev/null
@@ -1,234 +0,0 @@
-CONFIG_SMP=y
-CONFIG_NR_CPUS=2
-CONFIG_EXPERIMENTAL=y
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_BSD_PROCESS_ACCT=y
-CONFIG_BSD_PROCESS_ACCT_V3=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_RELAY=y
-CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-CONFIG_EXPERT=y
-CONFIG_SLAB=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_PPC_CHRP is not set
-# CONFIG_PPC_PMAC is not set
-CONFIG_PPC_86xx=y
-CONFIG_GEF_SBC310=y
-CONFIG_HIGHMEM=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_HZ_1000=y
-CONFIG_PREEMPT=y
-CONFIG_BINFMT_MISC=y
-CONFIG_SPARSE_IRQ=y
-CONFIG_PCI=y
-CONFIG_PCIEPORTBUS=y
-# CONFIG_PCIEASPM is not set
-CONFIG_PCCARD=y
-# CONFIG_PCMCIA_LOAD_CIS is not set
-# CONFIG_CARDBUS is not set
-CONFIG_YENTA=y
-# CONFIG_YENTA_O2 is not set
-# CONFIG_YENTA_RICOH is not set
-# CONFIG_YENTA_TOSHIBA is not set
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_XFRM_USER=m
-CONFIG_NET_KEY=m
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_ADVANCED_ROUTER=y
-CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_ROUTE_MULTIPATH=y
-CONFIG_IP_ROUTE_VERBOSE=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-CONFIG_IP_PNP_RARP=y
-CONFIG_NET_IPIP=m
-CONFIG_NET_IPGRE=m
-CONFIG_NET_IPGRE_BROADCAST=y
-CONFIG_IP_MROUTE=y
-CONFIG_IP_PIMSM_V1=y
-CONFIG_IP_PIMSM_V2=y
-CONFIG_SYN_COOKIES=y
-CONFIG_INET_AH=m
-CONFIG_INET_ESP=m
-CONFIG_INET_IPCOMP=m
-# CONFIG_INET_XFRM_MODE_BEET is not set
-CONFIG_INET6_AH=m
-CONFIG_INET6_ESP=m
-CONFIG_INET6_IPCOMP=m
-CONFIG_IPV6_TUNNEL=m
-CONFIG_NET_PKTGEN=m
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-CONFIG_MTD=y
-CONFIG_MTD_CONCAT=y
-CONFIG_MTD_OF_PARTS=y
-CONFIG_MTD_CHAR=y
-CONFIG_MTD_BLOCK=y
-CONFIG_MTD_CFI=y
-CONFIG_MTD_JEDECPROBE=y
-CONFIG_MTD_CFI_INTELEXT=y
-CONFIG_MTD_CFI_AMDSTD=y
-CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_BLK_DEV_LOOP=m
-CONFIG_BLK_DEV_CRYPTOLOOP=m
-CONFIG_BLK_DEV_NBD=m
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=131072
-CONFIG_MISC_DEVICES=y
-CONFIG_DS1682=y
-CONFIG_IDE=y
-CONFIG_BLK_DEV_IDECS=y
-CONFIG_BLK_DEV_SD=y
-CONFIG_CHR_DEV_ST=y
-CONFIG_BLK_DEV_SR=y
-CONFIG_ATA=y
-CONFIG_SATA_SIL24=y
-# CONFIG_ATA_SFF is not set
-CONFIG_NETDEVICES=y
-CONFIG_DUMMY=m
-CONFIG_BONDING=m
-CONFIG_TUN=m
-CONFIG_NET_ETHERNET=y
-CONFIG_MII=y
-CONFIG_GIANFAR=y
-# CONFIG_NETDEV_10000 is not set
-CONFIG_PPP=m
-CONFIG_PPP_MULTILINK=y
-CONFIG_PPP_FILTER=y
-CONFIG_PPP_ASYNC=m
-CONFIG_PPP_SYNC_TTY=m
-CONFIG_PPP_DEFLATE=m
-CONFIG_PPP_BSDCOMP=m
-CONFIG_PPPOE=m
-CONFIG_SLIP=m
-CONFIG_SLIP_COMPRESSED=y
-CONFIG_SLIP_SMART=y
-CONFIG_SLIP_MODE_SLIP6=y
-CONFIG_NETCONSOLE=y
-CONFIG_NETPOLL_TRAP=y
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO is not set
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-# CONFIG_SERIAL_8250_PCI is not set
-CONFIG_SERIAL_8250_NR_UARTS=2
-CONFIG_SERIAL_8250_RUNTIME_UARTS=2
-# CONFIG_LEGACY_PTYS is not set
-CONFIG_HW_RANDOM=y
-CONFIG_NVRAM=y
-CONFIG_I2C=y
-CONFIG_I2C_CHARDEV=y
-CONFIG_I2C_MPC=y
-CONFIG_GPIO_SYSFS=y
-CONFIG_GPIO_GE_FPGA=y
-CONFIG_SENSORS_LM90=y
-CONFIG_SENSORS_LM92=y
-CONFIG_WATCHDOG=y
-CONFIG_GEF_WDT=y
-CONFIG_VIDEO_OUTPUT_CONTROL=m
-CONFIG_HID_A4TECH=y
-CONFIG_HID_APPLE=y
-CONFIG_HID_BELKIN=y
-CONFIG_HID_CHERRY=y
-CONFIG_HID_CHICONY=y
-CONFIG_HID_CYPRESS=y
-CONFIG_HID_EZKEY=y
-CONFIG_HID_GYRATION=y
-CONFIG_HID_LOGITECH=y
-CONFIG_HID_MICROSOFT=y
-CONFIG_HID_MONTEREY=y
-CONFIG_HID_PANTHERLORD=y
-CONFIG_HID_PETALYNX=y
-CONFIG_HID_SAMSUNG=y
-CONFIG_HID_SONY=y
-CONFIG_HID_SUNPLUS=y
-CONFIG_USB=y
-# CONFIG_USB_DEVICE_CLASS is not set
-CONFIG_USB_EHCI_HCD=y
-# CONFIG_USB_EHCI_HCD_PPC_OF is not set
-CONFIG_USB_OHCI_HCD=y
-CONFIG_USB_STORAGE=y
-CONFIG_RTC_CLASS=y
-# CONFIG_RTC_INTF_PROC is not set
-CONFIG_RTC_DRV_RX8581=y
-CONFIG_EXT2_FS=y
-CONFIG_EXT2_FS_XATTR=y
-CONFIG_EXT2_FS_POSIX_ACL=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_INOTIFY=y
-CONFIG_ISO9660_FS=y
-CONFIG_JOLIET=y
-CONFIG_ZISOFS=y
-CONFIG_UDF_FS=y
-CONFIG_MSDOS_FS=y
-CONFIG_VFAT_FS=y
-CONFIG_FAT_DEFAULT_CODEPAGE=850
-CONFIG_FAT_DEFAULT_IOCHARSET="ascii"
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_JFFS2_FS=y
-CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
-CONFIG_NFS_V4=y
-CONFIG_ROOT_NFS=y
-CONFIG_CIFS=m
-CONFIG_CIFS_XATTR=y
-CONFIG_CIFS_POSIX=y
-CONFIG_NLS_CODEPAGE_437=m
-CONFIG_NLS_CODEPAGE_737=m
-CONFIG_NLS_CODEPAGE_775=m
-CONFIG_NLS_CODEPAGE_850=m
-CONFIG_NLS_CODEPAGE_852=m
-CONFIG_NLS_CODEPAGE_855=m
-CONFIG_NLS_CODEPAGE_857=m
-CONFIG_NLS_CODEPAGE_860=m
-CONFIG_NLS_CODEPAGE_861=m
-CONFIG_NLS_CODEPAGE_862=m
-CONFIG_NLS_CODEPAGE_863=m
-CONFIG_NLS_CODEPAGE_864=m
-CONFIG_NLS_CODEPAGE_865=m
-CONFIG_NLS_CODEPAGE_866=m
-CONFIG_NLS_CODEPAGE_869=m
-CONFIG_NLS_CODEPAGE_936=m
-CONFIG_NLS_CODEPAGE_950=m
-CONFIG_NLS_CODEPAGE_932=m
-CONFIG_NLS_CODEPAGE_949=m
-CONFIG_NLS_CODEPAGE_874=m
-CONFIG_NLS_ISO8859_8=m
-CONFIG_NLS_CODEPAGE_1250=m
-CONFIG_NLS_CODEPAGE_1251=m
-CONFIG_NLS_ASCII=m
-CONFIG_NLS_ISO8859_1=m
-CONFIG_NLS_ISO8859_2=m
-CONFIG_NLS_ISO8859_3=m
-CONFIG_NLS_ISO8859_4=m
-CONFIG_NLS_ISO8859_5=m
-CONFIG_NLS_ISO8859_6=m
-CONFIG_NLS_ISO8859_7=m
-CONFIG_NLS_ISO8859_9=m
-CONFIG_NLS_ISO8859_13=m
-CONFIG_NLS_ISO8859_14=m
-CONFIG_NLS_ISO8859_15=m
-CONFIG_NLS_KOI8_R=m
-CONFIG_NLS_KOI8_U=m
-CONFIG_NLS_UTF8=m
-CONFIG_CRC_CCITT=y
-CONFIG_CRC_T10DIF=y
-CONFIG_LIBCRC32C=y
-CONFIG_MAGIC_SYSRQ=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/86xx/gef_sbc610_defconfig b/arch/powerpc/configs/86xx/gef_sbc610_defconfig
deleted file mode 100644
index 124d66f0282c..000000000000
--- a/arch/powerpc/configs/86xx/gef_sbc610_defconfig
+++ /dev/null
@@ -1,307 +0,0 @@
-CONFIG_SMP=y
-CONFIG_NR_CPUS=2
-CONFIG_EXPERIMENTAL=y
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_BSD_PROCESS_ACCT=y
-CONFIG_BSD_PROCESS_ACCT_V3=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_RELAY=y
-CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-CONFIG_EXPERT=y
-CONFIG_SLAB=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_PPC_CHRP is not set
-# CONFIG_PPC_PMAC is not set
-CONFIG_PPC_86xx=y
-CONFIG_GEF_SBC610=y
-CONFIG_HIGHMEM=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_HZ_1000=y
-CONFIG_PREEMPT=y
-CONFIG_BINFMT_MISC=m
-CONFIG_SPARSE_IRQ=y
-CONFIG_PCI=y
-CONFIG_PCIEPORTBUS=y
-# CONFIG_PCIEASPM is not set
-CONFIG_PCI_DEBUG=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_XFRM_USER=m
-CONFIG_NET_KEY=m
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_ADVANCED_ROUTER=y
-CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_ROUTE_MULTIPATH=y
-CONFIG_IP_ROUTE_VERBOSE=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-CONFIG_IP_PNP_RARP=y
-CONFIG_NET_IPIP=m
-CONFIG_NET_IPGRE=m
-CONFIG_NET_IPGRE_BROADCAST=y
-CONFIG_IP_MROUTE=y
-CONFIG_IP_PIMSM_V1=y
-CONFIG_IP_PIMSM_V2=y
-CONFIG_SYN_COOKIES=y
-CONFIG_INET_AH=m
-CONFIG_INET_ESP=m
-CONFIG_INET_IPCOMP=m
-# CONFIG_INET_LRO is not set
-CONFIG_INET6_AH=m
-CONFIG_INET6_ESP=m
-CONFIG_INET6_IPCOMP=m
-CONFIG_IPV6_TUNNEL=m
-CONFIG_NETFILTER=y
-# CONFIG_NETFILTER_XT_MATCH_SCTP is not set
-CONFIG_IP_NF_QUEUE=m
-CONFIG_IP_NF_IPTABLES=m
-CONFIG_IP_NF_MATCH_ADDRTYPE=m
-CONFIG_IP_NF_MATCH_ECN=m
-CONFIG_IP_NF_MATCH_TTL=m
-CONFIG_IP_NF_FILTER=m
-CONFIG_IP_NF_TARGET_REJECT=m
-CONFIG_IP_NF_TARGET_LOG=m
-CONFIG_IP_NF_TARGET_ULOG=m
-CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_ECN=m
-CONFIG_IP_NF_RAW=m
-CONFIG_IP_NF_ARPTABLES=m
-CONFIG_IP_NF_ARPFILTER=m
-CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_IP6_NF_QUEUE=m
-CONFIG_IP6_NF_IPTABLES=m
-CONFIG_IP6_NF_MATCH_EUI64=m
-CONFIG_IP6_NF_MATCH_FRAG=m
-CONFIG_IP6_NF_MATCH_OPTS=m
-CONFIG_IP6_NF_MATCH_HL=m
-CONFIG_IP6_NF_MATCH_IPV6HEADER=m
-CONFIG_IP6_NF_MATCH_RT=m
-CONFIG_IP6_NF_TARGET_LOG=m
-CONFIG_IP6_NF_FILTER=m
-CONFIG_IP6_NF_MANGLE=m
-CONFIG_IP6_NF_RAW=m
-CONFIG_IP_SCTP=m
-CONFIG_TIPC=m
-CONFIG_ATM=m
-CONFIG_ATM_CLIP=m
-CONFIG_ATM_LANE=m
-CONFIG_ATM_MPOA=m
-CONFIG_ATM_BR2684=m
-CONFIG_BRIDGE=m
-CONFIG_VLAN_8021Q=m
-CONFIG_WAN_ROUTER=m
-CONFIG_NET_SCHED=y
-CONFIG_NET_SCH_CBQ=m
-CONFIG_NET_SCH_HTB=m
-CONFIG_NET_SCH_HFSC=m
-CONFIG_NET_SCH_ATM=m
-CONFIG_NET_SCH_PRIO=m
-CONFIG_NET_SCH_RED=m
-CONFIG_NET_SCH_SFQ=m
-CONFIG_NET_SCH_TEQL=m
-CONFIG_NET_SCH_TBF=m
-CONFIG_NET_SCH_GRED=m
-CONFIG_NET_SCH_DSMARK=m
-CONFIG_NET_SCH_NETEM=m
-CONFIG_NET_CLS_TCINDEX=m
-CONFIG_NET_CLS_ROUTE4=m
-CONFIG_NET_CLS_FW=m
-CONFIG_NET_CLS_U32=m
-CONFIG_NET_CLS_RSVP=m
-CONFIG_NET_CLS_RSVP6=m
-CONFIG_NET_PKTGEN=m
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-# CONFIG_FW_LOADER is not set
-CONFIG_MTD=y
-CONFIG_MTD_CONCAT=y
-CONFIG_MTD_OF_PARTS=y
-CONFIG_MTD_CHAR=y
-CONFIG_MTD_BLOCK=y
-CONFIG_MTD_CFI=y
-CONFIG_MTD_JEDECPROBE=y
-CONFIG_MTD_CFI_INTELEXT=y
-CONFIG_MTD_CFI_AMDSTD=y
-CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_BLK_DEV_LOOP=m
-CONFIG_BLK_DEV_CRYPTOLOOP=m
-CONFIG_BLK_DEV_NBD=m
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=131072
-CONFIG_MISC_DEVICES=y
-CONFIG_DS1682=y
-CONFIG_BLK_DEV_SD=y
-CONFIG_CHR_DEV_ST=y
-CONFIG_BLK_DEV_SR=y
-CONFIG_ATA=y
-CONFIG_SATA_SIL=y
-CONFIG_NETDEVICES=y
-CONFIG_DUMMY=m
-CONFIG_BONDING=m
-CONFIG_TUN=m
-CONFIG_NET_ETHERNET=y
-CONFIG_MII=y
-CONFIG_GIANFAR=y
-# CONFIG_NETDEV_10000 is not set
-CONFIG_PPP=m
-CONFIG_PPP_MULTILINK=y
-CONFIG_PPP_FILTER=y
-CONFIG_PPP_ASYNC=m
-CONFIG_PPP_SYNC_TTY=m
-CONFIG_PPP_DEFLATE=m
-CONFIG_PPP_BSDCOMP=m
-CONFIG_PPPOE=m
-CONFIG_PPPOATM=m
-CONFIG_SLIP=m
-CONFIG_SLIP_COMPRESSED=y
-CONFIG_SLIP_SMART=y
-CONFIG_SLIP_MODE_SLIP6=y
-CONFIG_NETCONSOLE=y
-CONFIG_NETPOLL_TRAP=y
-CONFIG_INPUT_FF_MEMLESS=m
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO is not set
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-# CONFIG_SERIAL_8250_PCI is not set
-CONFIG_SERIAL_8250_NR_UARTS=2
-CONFIG_SERIAL_8250_RUNTIME_UARTS=2
-# CONFIG_LEGACY_PTYS is not set
-CONFIG_HW_RANDOM=y
-CONFIG_NVRAM=y
-CONFIG_I2C=y
-CONFIG_I2C_CHARDEV=y
-CONFIG_I2C_MPC=y
-CONFIG_GPIO_SYSFS=y
-CONFIG_GPIO_GE_FPGA=y
-CONFIG_SENSORS_LM90=y
-CONFIG_SENSORS_LM92=y
-CONFIG_WATCHDOG=y
-CONFIG_GEF_WDT=y
-CONFIG_VIDEO_OUTPUT_CONTROL=m
-CONFIG_HID_A4TECH=y
-CONFIG_HID_APPLE=y
-CONFIG_HID_BELKIN=y
-CONFIG_HID_CHERRY=y
-CONFIG_HID_CHICONY=y
-CONFIG_HID_CYPRESS=y
-CONFIG_HID_EZKEY=y
-CONFIG_HID_GYRATION=y
-CONFIG_HID_LOGITECH=y
-CONFIG_HID_MICROSOFT=y
-CONFIG_HID_MONTEREY=y
-CONFIG_HID_PANTHERLORD=y
-CONFIG_HID_PETALYNX=y
-CONFIG_HID_SAMSUNG=y
-CONFIG_HID_SONY=y
-CONFIG_HID_SUNPLUS=y
-CONFIG_USB=y
-# CONFIG_USB_DEVICE_CLASS is not set
-CONFIG_USB_EHCI_HCD=y
-# CONFIG_USB_EHCI_HCD_PPC_OF is not set
-CONFIG_USB_OHCI_HCD=y
-CONFIG_USB_STORAGE=y
-CONFIG_RTC_CLASS=y
-# CONFIG_RTC_INTF_PROC is not set
-CONFIG_RTC_DRV_RX8581=y
-CONFIG_STAGING=y
-# CONFIG_STAGING_EXCLUDE_BUILD is not set
-CONFIG_VME_BUS=y
-CONFIG_VME_TSI148=y
-CONFIG_EXT2_FS=y
-CONFIG_EXT2_FS_XATTR=y
-CONFIG_EXT2_FS_POSIX_ACL=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_INOTIFY=y
-CONFIG_MSDOS_FS=y
-CONFIG_VFAT_FS=y
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_JFFS2_FS=y
-CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
-CONFIG_NFS_V4=y
-CONFIG_ROOT_NFS=y
-CONFIG_CIFS=m
-CONFIG_CIFS_XATTR=y
-CONFIG_CIFS_POSIX=y
-CONFIG_NLS_CODEPAGE_437=m
-CONFIG_NLS_CODEPAGE_737=m
-CONFIG_NLS_CODEPAGE_775=m
-CONFIG_NLS_CODEPAGE_850=m
-CONFIG_NLS_CODEPAGE_852=m
-CONFIG_NLS_CODEPAGE_855=m
-CONFIG_NLS_CODEPAGE_857=m
-CONFIG_NLS_CODEPAGE_860=m
-CONFIG_NLS_CODEPAGE_861=m
-CONFIG_NLS_CODEPAGE_862=m
-CONFIG_NLS_CODEPAGE_863=m
-CONFIG_NLS_CODEPAGE_864=m
-CONFIG_NLS_CODEPAGE_865=m
-CONFIG_NLS_CODEPAGE_866=m
-CONFIG_NLS_CODEPAGE_869=m
-CONFIG_NLS_CODEPAGE_936=m
-CONFIG_NLS_CODEPAGE_950=m
-CONFIG_NLS_CODEPAGE_932=m
-CONFIG_NLS_CODEPAGE_949=m
-CONFIG_NLS_CODEPAGE_874=m
-CONFIG_NLS_ISO8859_8=m
-CONFIG_NLS_CODEPAGE_1250=m
-CONFIG_NLS_CODEPAGE_1251=m
-CONFIG_NLS_ASCII=m
-CONFIG_NLS_ISO8859_1=m
-CONFIG_NLS_ISO8859_2=m
-CONFIG_NLS_ISO8859_3=m
-CONFIG_NLS_ISO8859_4=m
-CONFIG_NLS_ISO8859_5=m
-CONFIG_NLS_ISO8859_6=m
-CONFIG_NLS_ISO8859_7=m
-CONFIG_NLS_ISO8859_9=m
-CONFIG_NLS_ISO8859_13=m
-CONFIG_NLS_ISO8859_14=m
-CONFIG_NLS_ISO8859_15=m
-CONFIG_NLS_KOI8_R=m
-CONFIG_NLS_KOI8_U=m
-CONFIG_NLS_UTF8=m
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_KERNEL=y
-CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_DEBUG_BUGVERBOSE is not set
-CONFIG_DEBUG_INFO=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-CONFIG_SECURITY=y
-CONFIG_SECURITY_NETWORK=y
-CONFIG_CRYPTO_NULL=m
-CONFIG_CRYPTO_TEST=m
-CONFIG_CRYPTO_ECB=m
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_HMAC=y
-CONFIG_CRYPTO_MD4=m
-CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_SHA256=m
-CONFIG_CRYPTO_SHA512=m
-CONFIG_CRYPTO_WP512=m
-CONFIG_CRYPTO_AES=m
-CONFIG_CRYPTO_ANUBIS=m
-CONFIG_CRYPTO_ARC4=m
-CONFIG_CRYPTO_BLOWFISH=m
-CONFIG_CRYPTO_CAST5=m
-CONFIG_CRYPTO_CAST6=m
-CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_TEA=m
-CONFIG_CRYPTO_TWOFISH=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/86xx/mpc8610_hpcd_defconfig b/arch/powerpc/configs/86xx/mpc8610_hpcd_defconfig
deleted file mode 100644
index 9b192bb6bd3d..000000000000
--- a/arch/powerpc/configs/86xx/mpc8610_hpcd_defconfig
+++ /dev/null
@@ -1,124 +0,0 @@
-CONFIG_EXPERIMENTAL=y
-# CONFIG_SWAP is not set
-CONFIG_SYSVIPC=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-CONFIG_EXPERT=y
-CONFIG_KALLSYMS_EXTRA_PASS=y
-# CONFIG_ELF_CORE is not set
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_CFQ is not set
-# CONFIG_PPC_CHRP is not set
-# CONFIG_PPC_PMAC is not set
-CONFIG_PPC_86xx=y
-CONFIG_MPC8610_HPCD=y
-CONFIG_HIGHMEM=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_HZ_1000=y
-CONFIG_SPARSE_IRQ=y
-CONFIG_FORCE_MAX_ZONEORDER=12
-# CONFIG_SECCOMP is not set
-CONFIG_PCI=y
-CONFIG_PCIEPORTBUS=y
-# CONFIG_PCIEASPM is not set
-CONFIG_PCI_DEBUG=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_XFRM_USER=y
-CONFIG_INET=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-CONFIG_IP_PNP_RARP=y
-# CONFIG_INET_LRO is not set
-CONFIG_IPV6=y
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-CONFIG_MTD=y
-CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
-CONFIG_MTD_BLOCK=y
-CONFIG_MTD_CFI=y
-CONFIG_MTD_CFI_AMDSTD=y
-CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_MTD_NAND=y
-CONFIG_MTD_NAND_FSL_ELBC=y
-CONFIG_PROC_DEVICETREE=y
-CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=131072
-CONFIG_IDE=y
-CONFIG_BLK_DEV_SD=y
-CONFIG_CHR_DEV_SG=y
-CONFIG_ATA=y
-CONFIG_SATA_AHCI=y
-CONFIG_PATA_ALI=y
-CONFIG_NETDEVICES=y
-CONFIG_DUMMY=y
-CONFIG_NET_ETHERNET=y
-CONFIG_NET_TULIP=y
-CONFIG_ULI526X=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
-# CONFIG_INPUT_MOUSEDEV is not set
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-CONFIG_SERIO_LIBPS2=y
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_NR_UARTS=2
-CONFIG_SERIAL_8250_RUNTIME_UARTS=2
-CONFIG_SERIAL_8250_EXTENDED=y
-CONFIG_SERIAL_8250_MANY_PORTS=y
-CONFIG_SERIAL_8250_SHARE_IRQ=y
-CONFIG_SERIAL_8250_DETECT_IRQ=y
-CONFIG_SERIAL_8250_RSA=y
-# CONFIG_LEGACY_PTYS is not set
-# CONFIG_HW_RANDOM is not set
-CONFIG_I2C=y
-CONFIG_I2C_MPC=y
-# CONFIG_HWMON is not set
-CONFIG_VIDEO_OUTPUT_CONTROL=y
-CONFIG_FB=y
-CONFIG_FB_FSL_DIU=y
-CONFIG_VGACON_SOFT_SCROLLBACK=y
-CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_FONTS=y
-CONFIG_FONT_8x8=y
-CONFIG_FONT_8x16=y
-CONFIG_SOUND=y
-CONFIG_SND=y
-CONFIG_SND_MIXER_OSS=y
-CONFIG_SND_PCM_OSS=y
-# CONFIG_SND_SUPPORT_OLD_API is not set
-CONFIG_SND_SOC=y
-CONFIG_SND_POWERPC_SOC=y
-CONFIG_RTC_CLASS=y
-CONFIG_RTC_DRV_CMOS=y
-CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-# CONFIG_DNOTIFY is not set
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
-CONFIG_ROOT_NFS=y
-CONFIG_NFSD=y
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_LDM_PARTITION=y
-CONFIG_NLS=y
-CONFIG_CRC_T10DIF=y
-CONFIG_DEBUG_KERNEL=y
-CONFIG_DEBUG_SHIRQ=y
-CONFIG_DETECT_HUNG_TASK=y
-CONFIG_DEBUG_INFO=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig b/arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig
deleted file mode 100644
index 76f43df3dec7..000000000000
--- a/arch/powerpc/configs/86xx/mpc8641_hpcn_defconfig
+++ /dev/null
@@ -1,175 +0,0 @@
-CONFIG_SMP=y
-CONFIG_NR_CPUS=2
-CONFIG_EXPERIMENTAL=y
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_BSD_PROCESS_ACCT=y
-CONFIG_AUDIT=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-CONFIG_EXPERT=y
-CONFIG_KALLSYMS_ALL=y
-CONFIG_KALLSYMS_EXTRA_PASS=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODULE_FORCE_UNLOAD=y
-CONFIG_MODVERSIONS=y
-# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_PPC_CHRP is not set
-# CONFIG_PPC_PMAC is not set
-CONFIG_PPC_86xx=y
-CONFIG_MPC8641_HPCN=y
-CONFIG_HIGHMEM=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_HZ_1000=y
-CONFIG_BINFMT_MISC=m
-CONFIG_SPARSE_IRQ=y
-CONFIG_PCI=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_XFRM_USER=y
-CONFIG_NET_KEY=m
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_ADVANCED_ROUTER=y
-CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_ROUTE_MULTIPATH=y
-CONFIG_IP_ROUTE_VERBOSE=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-CONFIG_IP_PNP_RARP=y
-CONFIG_NET_IPIP=y
-CONFIG_NET_IPGRE=y
-CONFIG_NET_IPGRE_BROADCAST=y
-CONFIG_IP_MROUTE=y
-CONFIG_IP_PIMSM_V1=y
-CONFIG_IP_PIMSM_V2=y
-CONFIG_ARPD=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
-CONFIG_IPV6=y
-CONFIG_IP_SCTP=m
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-CONFIG_PROC_DEVICETREE=y
-CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_NBD=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=131072
-CONFIG_MISC_DEVICES=y
-CONFIG_EEPROM_LEGACY=y
-CONFIG_BLK_DEV_SD=y
-CONFIG_CHR_DEV_ST=y
-CONFIG_BLK_DEV_SR=y
-CONFIG_CHR_DEV_SG=y
-CONFIG_SCSI_MULTI_LUN=y
-CONFIG_SCSI_LOGGING=y
-CONFIG_ATA=y
-CONFIG_SATA_AHCI=y
-CONFIG_PATA_ALI=y
-CONFIG_NETDEVICES=y
-CONFIG_DUMMY=y
-CONFIG_VITESSE_PHY=y
-CONFIG_NET_ETHERNET=y
-CONFIG_MII=y
-CONFIG_GIANFAR=y
-CONFIG_INPUT_FF_MEMLESS=m
-# CONFIG_INPUT_MOUSEDEV is not set
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-CONFIG_SERIO_LIBPS2=y
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_NR_UARTS=2
-CONFIG_SERIAL_8250_RUNTIME_UARTS=2
-CONFIG_SERIAL_8250_EXTENDED=y
-CONFIG_SERIAL_8250_MANY_PORTS=y
-CONFIG_SERIAL_8250_SHARE_IRQ=y
-CONFIG_SERIAL_8250_DETECT_IRQ=y
-CONFIG_SERIAL_8250_RSA=y
-# CONFIG_HW_RANDOM is not set
-CONFIG_NVRAM=y
-CONFIG_I2C=y
-CONFIG_I2C_MPC=y
-# CONFIG_HWMON is not set
-CONFIG_VIDEO_OUTPUT_CONTROL=y
-CONFIG_SOUND=y
-CONFIG_SND=y
-CONFIG_SND_MIXER_OSS=y
-CONFIG_SND_PCM_OSS=y
-# CONFIG_SND_SUPPORT_OLD_API is not set
-CONFIG_SND_INTEL8X0=y
-CONFIG_HID_A4TECH=y
-CONFIG_HID_APPLE=y
-CONFIG_HID_BELKIN=y
-CONFIG_HID_CHERRY=y
-CONFIG_HID_CHICONY=y
-CONFIG_HID_CYPRESS=y
-CONFIG_HID_EZKEY=y
-CONFIG_HID_GYRATION=y
-CONFIG_HID_LOGITECH=y
-CONFIG_HID_MICROSOFT=y
-CONFIG_HID_MONTEREY=y
-CONFIG_HID_PANTHERLORD=y
-CONFIG_HID_PETALYNX=y
-CONFIG_HID_SAMSUNG=y
-CONFIG_HID_SONY=y
-CONFIG_HID_SUNPLUS=y
-CONFIG_USB=y
-CONFIG_USB_MON=y
-CONFIG_USB_EHCI_HCD=y
-CONFIG_USB_OHCI_HCD=y
-CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
-CONFIG_USB_OHCI_HCD_PPC_OF_LE=y
-CONFIG_USB_STORAGE=y
-CONFIG_RTC_CLASS=y
-CONFIG_RTC_DRV_CMOS=y
-CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_INOTIFY=y
-CONFIG_ISO9660_FS=m
-CONFIG_JOLIET=y
-CONFIG_ZISOFS=y
-CONFIG_UDF_FS=m
-CONFIG_MSDOS_FS=m
-CONFIG_VFAT_FS=y
-CONFIG_NTFS_FS=y
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_ADFS_FS=m
-CONFIG_AFFS_FS=m
-CONFIG_HFS_FS=m
-CONFIG_HFSPLUS_FS=m
-CONFIG_BEFS_FS=m
-CONFIG_BFS_FS=m
-CONFIG_EFS_FS=m
-CONFIG_CRAMFS=y
-CONFIG_VXFS_FS=m
-CONFIG_HPFS_FS=m
-CONFIG_QNX4FS_FS=m
-CONFIG_SYSV_FS=m
-CONFIG_UFS_FS=m
-CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
-CONFIG_NFS_V4=y
-CONFIG_ROOT_NFS=y
-CONFIG_NFSD=y
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_MAC_PARTITION=y
-CONFIG_CRC_T10DIF=y
-CONFIG_DEBUG_KERNEL=y
-CONFIG_DETECT_HUNG_TASK=y
-CONFIG_DEBUG_INFO=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_HMAC=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/86xx/sbc8641d_defconfig b/arch/powerpc/configs/86xx/sbc8641d_defconfig
deleted file mode 100644
index 1e151594c691..000000000000
--- a/arch/powerpc/configs/86xx/sbc8641d_defconfig
+++ /dev/null
@@ -1,279 +0,0 @@
-CONFIG_SMP=y
-CONFIG_NR_CPUS=2
-CONFIG_EXPERIMENTAL=y
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_BSD_PROCESS_ACCT=y
-CONFIG_BSD_PROCESS_ACCT_V3=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_RELAY=y
-CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-CONFIG_EXPERT=y
-CONFIG_SLAB=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_PPC_CHRP is not set
-# CONFIG_PPC_PMAC is not set
-CONFIG_PPC_86xx=y
-CONFIG_SBC8641D=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_PREEMPT=y
-CONFIG_BINFMT_MISC=m
-CONFIG_SPARSE_IRQ=y
-CONFIG_PCI=y
-CONFIG_PCIEPORTBUS=y
-# CONFIG_PCIEASPM is not set
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_XFRM_USER=m
-CONFIG_NET_KEY=m
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_ADVANCED_ROUTER=y
-CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_ROUTE_MULTIPATH=y
-CONFIG_IP_ROUTE_VERBOSE=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-CONFIG_IP_PNP_RARP=y
-CONFIG_NET_IPIP=m
-CONFIG_NET_IPGRE=m
-CONFIG_NET_IPGRE_BROADCAST=y
-CONFIG_IP_MROUTE=y
-CONFIG_IP_PIMSM_V1=y
-CONFIG_IP_PIMSM_V2=y
-CONFIG_SYN_COOKIES=y
-CONFIG_INET_AH=m
-CONFIG_INET_ESP=m
-CONFIG_INET_IPCOMP=m
-# CONFIG_INET_LRO is not set
-CONFIG_INET6_AH=m
-CONFIG_INET6_ESP=m
-CONFIG_INET6_IPCOMP=m
-CONFIG_IPV6_TUNNEL=m
-CONFIG_NETFILTER=y
-# CONFIG_NETFILTER_XT_MATCH_SCTP is not set
-CONFIG_IP_NF_QUEUE=m
-CONFIG_IP_NF_IPTABLES=m
-CONFIG_IP_NF_MATCH_ADDRTYPE=m
-CONFIG_IP_NF_MATCH_ECN=m
-CONFIG_IP_NF_MATCH_TTL=m
-CONFIG_IP_NF_FILTER=m
-CONFIG_IP_NF_TARGET_REJECT=m
-CONFIG_IP_NF_TARGET_LOG=m
-CONFIG_IP_NF_TARGET_ULOG=m
-CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_ECN=m
-CONFIG_IP_NF_RAW=m
-CONFIG_IP_NF_ARPTABLES=m
-CONFIG_IP_NF_ARPFILTER=m
-CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_IP6_NF_QUEUE=m
-CONFIG_IP6_NF_IPTABLES=m
-CONFIG_IP6_NF_MATCH_EUI64=m
-CONFIG_IP6_NF_MATCH_FRAG=m
-CONFIG_IP6_NF_MATCH_OPTS=m
-CONFIG_IP6_NF_MATCH_HL=m
-CONFIG_IP6_NF_MATCH_IPV6HEADER=m
-CONFIG_IP6_NF_MATCH_RT=m
-CONFIG_IP6_NF_TARGET_LOG=m
-CONFIG_IP6_NF_FILTER=m
-CONFIG_IP6_NF_MANGLE=m
-CONFIG_IP6_NF_RAW=m
-CONFIG_IP_SCTP=m
-CONFIG_TIPC=m
-CONFIG_ATM=m
-CONFIG_ATM_CLIP=m
-CONFIG_ATM_LANE=m
-CONFIG_ATM_MPOA=m
-CONFIG_ATM_BR2684=m
-CONFIG_BRIDGE=m
-CONFIG_VLAN_8021Q=m
-CONFIG_WAN_ROUTER=m
-CONFIG_NET_SCHED=y
-CONFIG_NET_SCH_CBQ=m
-CONFIG_NET_SCH_HTB=m
-CONFIG_NET_SCH_HFSC=m
-CONFIG_NET_SCH_ATM=m
-CONFIG_NET_SCH_PRIO=m
-CONFIG_NET_SCH_RED=m
-CONFIG_NET_SCH_SFQ=m
-CONFIG_NET_SCH_TEQL=m
-CONFIG_NET_SCH_TBF=m
-CONFIG_NET_SCH_GRED=m
-CONFIG_NET_SCH_DSMARK=m
-CONFIG_NET_SCH_NETEM=m
-CONFIG_NET_CLS_TCINDEX=m
-CONFIG_NET_CLS_ROUTE4=m
-CONFIG_NET_CLS_FW=m
-CONFIG_NET_CLS_U32=m
-CONFIG_NET_CLS_RSVP=m
-CONFIG_NET_CLS_RSVP6=m
-CONFIG_NET_PKTGEN=m
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-# CONFIG_FW_LOADER is not set
-CONFIG_MTD=y
-CONFIG_MTD_CONCAT=y
-CONFIG_MTD_CHAR=y
-CONFIG_MTD_BLOCK=y
-CONFIG_MTD_CFI=y
-CONFIG_MTD_CFI_ADV_OPTIONS=y
-CONFIG_MTD_CFI_LE_BYTE_SWAP=y
-CONFIG_MTD_CFI_INTELEXT=y
-CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_BLK_DEV_LOOP=m
-CONFIG_BLK_DEV_CRYPTOLOOP=m
-CONFIG_BLK_DEV_NBD=m
-CONFIG_BLK_DEV_RAM=y
-CONFIG_MD=y
-CONFIG_BLK_DEV_MD=y
-CONFIG_MD_LINEAR=y
-CONFIG_MD_RAID0=y
-CONFIG_MD_RAID1=y
-CONFIG_MD_RAID10=y
-CONFIG_MD_MULTIPATH=y
-CONFIG_MD_FAULTY=y
-CONFIG_BLK_DEV_DM=y
-CONFIG_DM_CRYPT=y
-CONFIG_DM_SNAPSHOT=y
-CONFIG_DM_MIRROR=y
-CONFIG_DM_ZERO=y
-CONFIG_NETDEVICES=y
-CONFIG_DUMMY=m
-CONFIG_BONDING=m
-CONFIG_TUN=m
-CONFIG_BROADCOM_PHY=y
-CONFIG_NET_ETHERNET=y
-CONFIG_MII=y
-CONFIG_GIANFAR=y
-# CONFIG_NETDEV_10000 is not set
-CONFIG_PPP=m
-CONFIG_PPP_MULTILINK=y
-CONFIG_PPP_FILTER=y
-CONFIG_PPP_ASYNC=m
-CONFIG_PPP_SYNC_TTY=m
-CONFIG_PPP_DEFLATE=m
-CONFIG_PPP_BSDCOMP=m
-CONFIG_PPPOE=m
-CONFIG_PPPOATM=m
-CONFIG_SLIP=m
-CONFIG_SLIP_COMPRESSED=y
-CONFIG_SLIP_SMART=y
-CONFIG_SLIP_MODE_SLIP6=y
-CONFIG_NETCONSOLE=y
-CONFIG_NETPOLL_TRAP=y
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO is not set
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-# CONFIG_SERIAL_8250_PCI is not set
-CONFIG_SERIAL_8250_NR_UARTS=2
-CONFIG_SERIAL_8250_RUNTIME_UARTS=2
-CONFIG_I2C=y
-CONFIG_I2C_CHARDEV=y
-CONFIG_I2C_MPC=y
-CONFIG_WATCHDOG=y
-CONFIG_SOFT_WATCHDOG=m
-CONFIG_VIDEO_OUTPUT_CONTROL=m
-CONFIG_EXT2_FS=y
-CONFIG_EXT2_FS_XATTR=y
-CONFIG_EXT2_FS_POSIX_ACL=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_REISERFS_FS=m
-CONFIG_REISERFS_FS_XATTR=y
-CONFIG_REISERFS_FS_POSIX_ACL=y
-CONFIG_OCFS2_FS=m
-CONFIG_INOTIFY=y
-CONFIG_AUTOFS_FS=m
-CONFIG_AUTOFS4_FS=m
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_MINIX_FS=m
-CONFIG_ROMFS_FS=m
-CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
-CONFIG_NFS_V4=y
-CONFIG_ROOT_NFS=y
-CONFIG_SMB_FS=m
-CONFIG_SMB_NLS_DEFAULT=y
-CONFIG_CIFS=m
-CONFIG_CIFS_XATTR=y
-CONFIG_CIFS_POSIX=y
-CONFIG_NLS_CODEPAGE_437=m
-CONFIG_NLS_CODEPAGE_737=m
-CONFIG_NLS_CODEPAGE_775=m
-CONFIG_NLS_CODEPAGE_850=m
-CONFIG_NLS_CODEPAGE_852=m
-CONFIG_NLS_CODEPAGE_855=m
-CONFIG_NLS_CODEPAGE_857=m
-CONFIG_NLS_CODEPAGE_860=m
-CONFIG_NLS_CODEPAGE_861=m
-CONFIG_NLS_CODEPAGE_862=m
-CONFIG_NLS_CODEPAGE_863=m
-CONFIG_NLS_CODEPAGE_864=m
-CONFIG_NLS_CODEPAGE_865=m
-CONFIG_NLS_CODEPAGE_866=m
-CONFIG_NLS_CODEPAGE_869=m
-CONFIG_NLS_CODEPAGE_936=m
-CONFIG_NLS_CODEPAGE_950=m
-CONFIG_NLS_CODEPAGE_932=m
-CONFIG_NLS_CODEPAGE_949=m
-CONFIG_NLS_CODEPAGE_874=m
-CONFIG_NLS_ISO8859_8=m
-CONFIG_NLS_CODEPAGE_1250=m
-CONFIG_NLS_CODEPAGE_1251=m
-CONFIG_NLS_ASCII=m
-CONFIG_NLS_ISO8859_1=m
-CONFIG_NLS_ISO8859_2=m
-CONFIG_NLS_ISO8859_3=m
-CONFIG_NLS_ISO8859_4=m
-CONFIG_NLS_ISO8859_5=m
-CONFIG_NLS_ISO8859_6=m
-CONFIG_NLS_ISO8859_7=m
-CONFIG_NLS_ISO8859_9=m
-CONFIG_NLS_ISO8859_13=m
-CONFIG_NLS_ISO8859_14=m
-CONFIG_NLS_ISO8859_15=m
-CONFIG_NLS_KOI8_R=m
-CONFIG_NLS_KOI8_U=m
-CONFIG_NLS_UTF8=m
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_FS=y
-CONFIG_DEBUG_KERNEL=y
-CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_DEBUG_BUGVERBOSE is not set
-CONFIG_DEBUG_INFO=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-CONFIG_SECURITY=y
-CONFIG_SECURITY_NETWORK=y
-CONFIG_CRYPTO_NULL=m
-CONFIG_CRYPTO_TEST=m
-CONFIG_CRYPTO_ECB=m
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_HMAC=y
-CONFIG_CRYPTO_MD4=m
-CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_SHA256=m
-CONFIG_CRYPTO_SHA512=m
-CONFIG_CRYPTO_WP512=m
-CONFIG_CRYPTO_AES=m
-CONFIG_CRYPTO_ANUBIS=m
-CONFIG_CRYPTO_ARC4=m
-CONFIG_CRYPTO_BLOWFISH=m
-CONFIG_CRYPTO_CAST5=m
-CONFIG_CRYPTO_CAST6=m
-CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_TEA=m
-CONFIG_CRYPTO_TWOFISH=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/8xx.config b/arch/powerpc/configs/8xx.config
new file mode 100644
index 000000000000..7eb3ffbbd667
--- /dev/null
+++ b/arch/powerpc/configs/8xx.config
@@ -0,0 +1,2 @@
+CONFIG_PPC64=n
+CONFIG_PPC_8xx=y
diff --git a/arch/powerpc/configs/adder875_defconfig b/arch/powerpc/configs/adder875_defconfig
index 15b1ff5d96e7..3c6445c98a85 100644
--- a/arch/powerpc/configs/adder875_defconfig
+++ b/arch/powerpc/configs/adder875_defconfig
@@ -1,21 +1,17 @@
CONFIG_PPC_8xx=y
-CONFIG_EXPERIMENTAL=y
# CONFIG_SWAP is not set
CONFIG_SYSVIPC=y
CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
-# CONFIG_SYSCTL_SYSCALL is not set
# CONFIG_ELF_CORE is not set
-# CONFIG_BASE_FULL is not set
+CONFIG_BASE_SMALL=y
# CONFIG_FUTEX is not set
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_CFQ is not set
+CONFIG_PARTITION_ADVANCED=y
CONFIG_PPC_ADDER875=y
-CONFIG_8xx_COPYBACK=y
+CONFIG_GEN_RTC=y
CONFIG_HZ_1000=y
-CONFIG_SPARSE_IRQ=y
# CONFIG_SECCOMP is not set
CONFIG_NET=y
CONFIG_PACKET=y
@@ -24,50 +20,31 @@ CONFIG_INET=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_PNP=y
CONFIG_SYN_COOKIES=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_PHYSMAP_OF=y
# CONFIG_BLK_DEV is not set
-# CONFIG_MISC_DEVICES is not set
CONFIG_NETDEVICES=y
-CONFIG_DAVICOM_PHY=y
-CONFIG_NET_ETHERNET=y
CONFIG_FS_ENET=y
# CONFIG_FS_ENET_HAS_SCC is not set
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
+CONFIG_DAVICOM_PHY=y
# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
CONFIG_SERIAL_CPM=y
CONFIG_SERIAL_CPM_CONSOLE=y
-# CONFIG_LEGACY_PTYS is not set
-CONFIG_GEN_RTC=y
# CONFIG_HWMON is not set
CONFIG_THERMAL=y
-CONFIG_VIDEO_OUTPUT_CONTROL=y
-# CONFIG_HID_SUPPORT is not set
# CONFIG_USB_SUPPORT is not set
# CONFIG_DNOTIFY is not set
CONFIG_TMPFS=y
CONFIG_CRAMFS=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_ROOT_NFS=y
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_CRC32 is not set
-CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
CONFIG_DEBUG_FS=y
-CONFIG_DEBUG_KERNEL=y
+CONFIG_MAGIC_SYSRQ=y
CONFIG_DETECT_HUNG_TASK=y
-CONFIG_DEBUG_INFO=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_CRC32_SLICEBY4=y
diff --git a/arch/powerpc/configs/altivec.config b/arch/powerpc/configs/altivec.config
new file mode 100644
index 000000000000..58a697cb5a62
--- /dev/null
+++ b/arch/powerpc/configs/altivec.config
@@ -0,0 +1 @@
+CONFIG_ALTIVEC=y
diff --git a/arch/powerpc/configs/amigaone_defconfig b/arch/powerpc/configs/amigaone_defconfig
index 8c66b13e59fc..69ef3dc31c4b 100644
--- a/arch/powerpc/configs/amigaone_defconfig
+++ b/arch/powerpc/configs/amigaone_defconfig
@@ -1,8 +1,9 @@
CONFIG_ALTIVEC=y
-CONFIG_EXPERIMENTAL=y
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=15
@@ -12,24 +13,19 @@ CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_AMIGA_PARTITION=y
# CONFIG_PPC_CHRP is not set
# CONFIG_PPC_PMAC is not set
CONFIG_AMIGAONE=y
CONFIG_HIGHMEM=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
CONFIG_BINFMT_MISC=y
-# CONFIG_MIGRATION is not set
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
CONFIG_INET=y
CONFIG_IP_MULTICAST=y
CONFIG_SYN_COOKIES=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
CONFIG_NETFILTER=y
# CONFIG_NETFILTER_ADVANCED is not set
@@ -37,52 +33,40 @@ CONFIG_NETFILTER=y
# CONFIG_NETFILTER_XT_TARGET_TCPMSS is not set
# CONFIG_NETFILTER_XT_MATCH_CONNTRACK is not set
# CONFIG_NETFILTER_XT_MATCH_STATE is not set
-# CONFIG_IP_NF_TARGET_ULOG is not set
# CONFIG_IP_NF_MANGLE is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_STANDALONE is not set
-CONFIG_PROC_DEVICETREE=y
CONFIG_PARPORT=y
CONFIG_PARPORT_PC=y
CONFIG_PARPORT_PC_FIFO=y
CONFIG_BLK_DEV_FD=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
-CONFIG_IDE=y
-CONFIG_BLK_DEV_IDECD=y
-# CONFIG_IDEPCI_PCIBUS_ORDER is not set
-CONFIG_BLK_DEV_GENERIC=y
-CONFIG_BLK_DEV_SIIMAGE=y
-CONFIG_BLK_DEV_VIA82CXXX=y
-CONFIG_SCSI=y
CONFIG_BLK_DEV_SD=y
CONFIG_CHR_DEV_ST=y
CONFIG_BLK_DEV_SR=y
-CONFIG_BLK_DEV_SR_VENDOR=y
CONFIG_CHR_DEV_SG=y
CONFIG_SCSI_CONSTANTS=y
CONFIG_SCSI_SYM53C8XX_2=y
CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0
# CONFIG_SCSI_SYM53C8XX_MMIO is not set
+CONFIG_ATA=y
+CONFIG_PATA_SIL680=y
+CONFIG_PATA_VIA=y
+CONFIG_ATA_GENERIC=y
CONFIG_NETDEVICES=y
-CONFIG_PHYLIB=y
-CONFIG_NET_ETHERNET=y
-CONFIG_NET_VENDOR_3COM=y
CONFIG_VORTEX=y
-CONFIG_NET_PCI=y
CONFIG_8139CP=y
CONFIG_8139TOO=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
+CONFIG_PHYLIB=y
CONFIG_PPP=m
-CONFIG_PPP_MULTILINK=y
-CONFIG_PPP_FILTER=y
-CONFIG_PPP_ASYNC=m
-CONFIG_PPP_SYNC_TTY=m
-CONFIG_PPP_DEFLATE=m
CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPP_FILTER=y
CONFIG_PPP_MPPE=m
+CONFIG_PPP_MULTILINK=y
CONFIG_PPPOE=m
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
CONFIG_INPUT_EVDEV=y
CONFIG_INPUT_MISC=y
CONFIG_INPUT_PCSPKR=y
@@ -96,7 +80,6 @@ CONFIG_FIRMWARE_EDID=y
CONFIG_FB_TILEBLITTING=y
CONFIG_FB_RADEON=y
CONFIG_FB_3DFX=y
-CONFIG_DISPLAY_SUPPORT=m
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
CONFIG_HID_GYRATION=y
@@ -104,7 +87,6 @@ CONFIG_HID_NTRIG=y
CONFIG_HID_PANTHERLORD=y
CONFIG_HID_PETALYNX=y
CONFIG_HID_SAMSUNG=y
-CONFIG_HID_SONY=y
CONFIG_HID_SUNPLUS=y
CONFIG_HID_TOPSEED=y
CONFIG_USB=y
@@ -115,30 +97,20 @@ CONFIG_USB_STORAGE=m
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_CMOS=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
CONFIG_EXT4_FS=y
-CONFIG_INOTIFY=y
CONFIG_ISO9660_FS=y
CONFIG_MSDOS_FS=m
CONFIG_VFAT_FS=m
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_AFFS_FS=m
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_AMIGA_PARTITION=y
CONFIG_NLS_ASCII=y
CONFIG_NLS_ISO8859_1=m
-CONFIG_CRC_T10DIF=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_MUTEXES=y
-CONFIG_DEBUG_SPINLOCK_SLEEP=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_XMON=y
CONFIG_XMON_DEFAULT=y
-CONFIG_CRYPTO=y
CONFIG_CRYPTO_CBC=m
CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/be.config b/arch/powerpc/configs/be.config
new file mode 100644
index 000000000000..c5cdc99a6530
--- /dev/null
+++ b/arch/powerpc/configs/be.config
@@ -0,0 +1 @@
+CONFIG_CPU_BIG_ENDIAN=y
diff --git a/arch/powerpc/configs/book3s_32.config b/arch/powerpc/configs/book3s_32.config
new file mode 100644
index 000000000000..8721eb7b1294
--- /dev/null
+++ b/arch/powerpc/configs/book3s_32.config
@@ -0,0 +1,2 @@
+CONFIG_PPC64=n
+CONFIG_PPC_BOOK3S_32=y
diff --git a/arch/powerpc/configs/c2k_defconfig b/arch/powerpc/configs/c2k_defconfig
deleted file mode 100644
index 5e2aa43562b5..000000000000
--- a/arch/powerpc/configs/c2k_defconfig
+++ /dev/null
@@ -1,450 +0,0 @@
-CONFIG_EXPERIMENTAL=y
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_BSD_PROCESS_ACCT=y
-CONFIG_AUDIT=y
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_KALLSYMS_EXTRA_PASS=y
-CONFIG_PROFILING=y
-CONFIG_OPROFILE=m
-CONFIG_KPROBES=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODVERSIONS=y
-# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_PPC_CHRP is not set
-# CONFIG_PPC_PMAC is not set
-CONFIG_EMBEDDED6xx=y
-CONFIG_PPC_C2K=y
-CONFIG_CPU_FREQ=y
-CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y
-CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
-CONFIG_CPU_FREQ_GOV_POWERSAVE=m
-CONFIG_CPU_FREQ_GOV_ONDEMAND=m
-CONFIG_HIGHMEM=y
-CONFIG_PREEMPT_VOLUNTARY=y
-CONFIG_BINFMT_MISC=y
-CONFIG_SPARSE_IRQ=y
-CONFIG_PM=y
-CONFIG_PCI_MSI=y
-CONFIG_HOTPLUG_PCI=y
-CONFIG_HOTPLUG_PCI_SHPC=m
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_XFRM_USER=y
-CONFIG_NET_KEY=m
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_ADVANCED_ROUTER=y
-CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_ROUTE_MULTIPATH=y
-CONFIG_IP_ROUTE_VERBOSE=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_NET_IPIP=m
-CONFIG_NET_IPGRE=m
-CONFIG_NET_IPGRE_BROADCAST=y
-CONFIG_IP_MROUTE=y
-CONFIG_IP_PIMSM_V1=y
-CONFIG_IP_PIMSM_V2=y
-CONFIG_SYN_COOKIES=y
-CONFIG_INET_AH=m
-CONFIG_INET_ESP=m
-CONFIG_INET_IPCOMP=m
-CONFIG_IPV6_PRIVACY=y
-CONFIG_INET6_AH=m
-CONFIG_INET6_ESP=m
-CONFIG_INET6_IPCOMP=m
-CONFIG_IPV6_TUNNEL=m
-CONFIG_NETFILTER=y
-# CONFIG_NETFILTER_XT_MATCH_SCTP is not set
-CONFIG_IP_NF_QUEUE=m
-CONFIG_IP_NF_IPTABLES=m
-CONFIG_IP_NF_MATCH_ADDRTYPE=m
-CONFIG_IP_NF_MATCH_ECN=m
-CONFIG_IP_NF_MATCH_TTL=m
-CONFIG_IP_NF_FILTER=m
-CONFIG_IP_NF_TARGET_REJECT=m
-CONFIG_IP_NF_TARGET_LOG=m
-CONFIG_IP_NF_TARGET_ULOG=m
-CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_ECN=m
-CONFIG_IP_NF_RAW=m
-CONFIG_IP_NF_ARPTABLES=m
-CONFIG_IP_NF_ARPFILTER=m
-CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_IP6_NF_IPTABLES=m
-CONFIG_IP6_NF_MATCH_EUI64=m
-CONFIG_IP6_NF_MATCH_FRAG=m
-CONFIG_IP6_NF_MATCH_OPTS=m
-CONFIG_IP6_NF_MATCH_HL=m
-CONFIG_IP6_NF_MATCH_IPV6HEADER=m
-CONFIG_IP6_NF_MATCH_RT=m
-CONFIG_IP6_NF_TARGET_LOG=m
-CONFIG_IP6_NF_FILTER=m
-CONFIG_IP6_NF_MANGLE=m
-CONFIG_IP6_NF_RAW=m
-CONFIG_BRIDGE_NF_EBTABLES=m
-CONFIG_BRIDGE_EBT_BROUTE=m
-CONFIG_BRIDGE_EBT_T_FILTER=m
-CONFIG_BRIDGE_EBT_T_NAT=m
-CONFIG_BRIDGE_EBT_802_3=m
-CONFIG_BRIDGE_EBT_AMONG=m
-CONFIG_BRIDGE_EBT_ARP=m
-CONFIG_BRIDGE_EBT_IP=m
-CONFIG_BRIDGE_EBT_LIMIT=m
-CONFIG_BRIDGE_EBT_MARK=m
-CONFIG_BRIDGE_EBT_PKTTYPE=m
-CONFIG_BRIDGE_EBT_STP=m
-CONFIG_BRIDGE_EBT_VLAN=m
-CONFIG_BRIDGE_EBT_ARPREPLY=m
-CONFIG_BRIDGE_EBT_DNAT=m
-CONFIG_BRIDGE_EBT_MARK_T=m
-CONFIG_BRIDGE_EBT_REDIRECT=m
-CONFIG_BRIDGE_EBT_SNAT=m
-CONFIG_BRIDGE_EBT_LOG=m
-CONFIG_IP_SCTP=m
-CONFIG_ATM=m
-CONFIG_ATM_CLIP=m
-CONFIG_ATM_LANE=m
-CONFIG_ATM_BR2684=m
-CONFIG_BRIDGE=m
-CONFIG_VLAN_8021Q=m
-CONFIG_NET_SCHED=y
-CONFIG_NET_SCH_CBQ=m
-CONFIG_NET_SCH_HTB=m
-CONFIG_NET_SCH_HFSC=m
-CONFIG_NET_SCH_ATM=m
-CONFIG_NET_SCH_PRIO=m
-CONFIG_NET_SCH_RED=m
-CONFIG_NET_SCH_SFQ=m
-CONFIG_NET_SCH_TEQL=m
-CONFIG_NET_SCH_TBF=m
-CONFIG_NET_SCH_GRED=m
-CONFIG_NET_SCH_DSMARK=m
-CONFIG_NET_SCH_NETEM=m
-CONFIG_NET_CLS_TCINDEX=m
-CONFIG_NET_CLS_ROUTE4=m
-CONFIG_NET_CLS_FW=m
-CONFIG_NET_CLS_U32=m
-CONFIG_CLS_U32_PERF=y
-CONFIG_NET_CLS_RSVP=m
-CONFIG_NET_CLS_RSVP6=m
-CONFIG_NET_CLS_IND=y
-CONFIG_BT=m
-CONFIG_BT_L2CAP=y
-CONFIG_BT_SCO=y
-CONFIG_BT_RFCOMM=m
-CONFIG_BT_RFCOMM_TTY=y
-CONFIG_BT_BNEP=m
-CONFIG_BT_BNEP_MC_FILTER=y
-CONFIG_BT_BNEP_PROTO_FILTER=y
-CONFIG_BT_HIDP=m
-CONFIG_BT_HCIUART=m
-CONFIG_BT_HCIUART_H4=y
-CONFIG_BT_HCIUART_BCSP=y
-CONFIG_BT_HCIBCM203X=m
-CONFIG_BT_HCIBFUSB=m
-CONFIG_BT_HCIVHCI=m
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-CONFIG_MTD=y
-CONFIG_MTD_CONCAT=m
-CONFIG_MTD_OF_PARTS=y
-CONFIG_MTD_CHAR=m
-CONFIG_MTD_BLOCK=y
-CONFIG_MTD_CFI=y
-CONFIG_MTD_CFI_AMDSTD=y
-CONFIG_MTD_COMPLEX_MAPPINGS=y
-CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_BLK_DEV_LOOP=m
-CONFIG_BLK_DEV_CRYPTOLOOP=m
-CONFIG_BLK_DEV_NBD=m
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=16384
-# CONFIG_MISC_DEVICES is not set
-CONFIG_SCSI=m
-CONFIG_BLK_DEV_SD=m
-CONFIG_CHR_DEV_ST=m
-CONFIG_CHR_DEV_OSST=m
-CONFIG_BLK_DEV_SR=m
-CONFIG_BLK_DEV_SR_VENDOR=y
-CONFIG_CHR_DEV_SG=m
-CONFIG_SCSI_CONSTANTS=y
-CONFIG_SCSI_LOGGING=y
-CONFIG_SCSI_ISCSI_ATTRS=m
-CONFIG_BLK_DEV_3W_XXXX_RAID=m
-CONFIG_SCSI_3W_9XXX=m
-CONFIG_SCSI_ACARD=m
-CONFIG_SCSI_AACRAID=m
-CONFIG_SCSI_AIC7XXX=m
-CONFIG_AIC7XXX_CMDS_PER_DEVICE=4
-CONFIG_AIC7XXX_RESET_DELAY_MS=15000
-# CONFIG_AIC7XXX_DEBUG_ENABLE is not set
-# CONFIG_AIC7XXX_REG_PRETTY_PRINT is not set
-CONFIG_SCSI_AIC7XXX_OLD=m
-CONFIG_SCSI_AIC79XX=m
-CONFIG_AIC79XX_CMDS_PER_DEVICE=4
-CONFIG_AIC79XX_RESET_DELAY_MS=15000
-# CONFIG_AIC79XX_DEBUG_ENABLE is not set
-# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set
-CONFIG_SCSI_ARCMSR=m
-CONFIG_MEGARAID_NEWGEN=y
-CONFIG_MEGARAID_MM=m
-CONFIG_MEGARAID_MAILBOX=m
-CONFIG_MEGARAID_SAS=m
-CONFIG_SCSI_FUTURE_DOMAIN=m
-CONFIG_SCSI_GDTH=m
-CONFIG_SCSI_IPS=m
-CONFIG_SCSI_INITIO=m
-CONFIG_SCSI_SYM53C8XX_2=m
-CONFIG_SCSI_QLOGIC_1280=m
-CONFIG_SCSI_LPFC=m
-CONFIG_NETDEVICES=y
-CONFIG_DUMMY=m
-CONFIG_BONDING=m
-CONFIG_TUN=m
-CONFIG_VITESSE_PHY=y
-CONFIG_NET_ETHERNET=y
-CONFIG_MII=y
-CONFIG_MV643XX_ETH=y
-# CONFIG_NETDEV_10000 is not set
-# CONFIG_ATM_DRIVERS is not set
-CONFIG_NETCONSOLE=m
-CONFIG_NETPOLL_TRAP=y
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
-CONFIG_INPUT_EVDEV=y
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-CONFIG_INPUT_MISC=y
-CONFIG_INPUT_UINPUT=m
-# CONFIG_SERIO is not set
-CONFIG_SERIAL_NONSTANDARD=y
-CONFIG_SERIAL_MPSC=y
-CONFIG_SERIAL_MPSC_CONSOLE=y
-# CONFIG_LEGACY_PTYS is not set
-CONFIG_NVRAM=m
-CONFIG_GEN_RTC=m
-CONFIG_RAW_DRIVER=y
-CONFIG_MAX_RAW_DEVS=8192
-CONFIG_I2C=m
-CONFIG_I2C_CHARDEV=m
-CONFIG_I2C_MV64XXX=m
-CONFIG_HWMON=m
-CONFIG_SENSORS_ADM1021=m
-CONFIG_SENSORS_ADM1025=m
-CONFIG_SENSORS_ADM1026=m
-CONFIG_SENSORS_ADM1031=m
-CONFIG_SENSORS_DS1621=m
-CONFIG_SENSORS_GL518SM=m
-CONFIG_SENSORS_IT87=m
-CONFIG_SENSORS_LM75=m
-CONFIG_SENSORS_LM77=m
-CONFIG_SENSORS_LM78=m
-CONFIG_SENSORS_LM80=m
-CONFIG_SENSORS_LM83=m
-CONFIG_SENSORS_LM85=m
-CONFIG_SENSORS_LM87=m
-CONFIG_SENSORS_LM90=m
-CONFIG_SENSORS_MAX1619=m
-CONFIG_SENSORS_PCF8591=m
-CONFIG_SENSORS_SMSC47M1=m
-CONFIG_SENSORS_SMSC47B397=m
-CONFIG_SENSORS_VIA686A=m
-CONFIG_SENSORS_W83781D=m
-CONFIG_SENSORS_W83L785TS=m
-CONFIG_SENSORS_W83627HF=m
-CONFIG_WATCHDOG=y
-CONFIG_SOFT_WATCHDOG=m
-CONFIG_PCIPCWATCHDOG=m
-CONFIG_WDTPCI=m
-CONFIG_USBPCWATCHDOG=m
-# CONFIG_VGA_CONSOLE is not set
-# CONFIG_HID_SUPPORT is not set
-CONFIG_USB=m
-# CONFIG_USB_DEVICE_CLASS is not set
-CONFIG_USB_MON=m
-CONFIG_USB_EHCI_HCD=m
-CONFIG_USB_EHCI_ROOT_HUB_TT=y
-CONFIG_USB_OHCI_HCD=m
-CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
-CONFIG_USB_UHCI_HCD=m
-CONFIG_USB_ACM=m
-CONFIG_USB_PRINTER=m
-CONFIG_USB_STORAGE=m
-CONFIG_USB_STORAGE_DATAFAB=m
-CONFIG_USB_STORAGE_FREECOM=m
-CONFIG_USB_STORAGE_ISD200=m
-CONFIG_USB_STORAGE_SDDR09=m
-CONFIG_USB_STORAGE_SDDR55=m
-CONFIG_USB_STORAGE_JUMPSHOT=m
-CONFIG_USB_MDC800=m
-CONFIG_USB_MICROTEK=m
-CONFIG_USB_SERIAL=m
-CONFIG_USB_SERIAL_GENERIC=y
-CONFIG_USB_SERIAL_BELKIN=m
-CONFIG_USB_SERIAL_WHITEHEAT=m
-CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m
-CONFIG_USB_SERIAL_EMPEG=m
-CONFIG_USB_SERIAL_FTDI_SIO=m
-CONFIG_USB_SERIAL_VISOR=m
-CONFIG_USB_SERIAL_IPAQ=m
-CONFIG_USB_SERIAL_IR=m
-CONFIG_USB_SERIAL_EDGEPORT=m
-CONFIG_USB_SERIAL_EDGEPORT_TI=m
-CONFIG_USB_SERIAL_KEYSPAN_PDA=m
-CONFIG_USB_SERIAL_KEYSPAN=m
-CONFIG_USB_SERIAL_KEYSPAN_MPR=y
-CONFIG_USB_SERIAL_KEYSPAN_USA28=y
-CONFIG_USB_SERIAL_KEYSPAN_USA28X=y
-CONFIG_USB_SERIAL_KEYSPAN_USA28XA=y
-CONFIG_USB_SERIAL_KEYSPAN_USA28XB=y
-CONFIG_USB_SERIAL_KEYSPAN_USA19=y
-CONFIG_USB_SERIAL_KEYSPAN_USA18X=y
-CONFIG_USB_SERIAL_KEYSPAN_USA19W=y
-CONFIG_USB_SERIAL_KEYSPAN_USA19QW=y
-CONFIG_USB_SERIAL_KEYSPAN_USA19QI=y
-CONFIG_USB_SERIAL_KEYSPAN_USA49W=y
-CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y
-CONFIG_USB_SERIAL_KLSI=m
-CONFIG_USB_SERIAL_KOBIL_SCT=m
-CONFIG_USB_SERIAL_MCT_U232=m
-CONFIG_USB_SERIAL_PL2303=m
-CONFIG_USB_SERIAL_SAFE=m
-CONFIG_USB_SERIAL_SAFE_PADDED=y
-CONFIG_USB_SERIAL_CYBERJACK=m
-CONFIG_USB_SERIAL_XIRCOM=m
-CONFIG_USB_SERIAL_OMNINET=m
-CONFIG_USB_EMI62=m
-CONFIG_USB_RIO500=m
-CONFIG_USB_LEGOTOWER=m
-CONFIG_USB_LCD=m
-CONFIG_USB_LED=m
-CONFIG_USB_TEST=m
-CONFIG_USB_ATM=m
-CONFIG_USB_SPEEDTOUCH=m
-CONFIG_INFINIBAND=m
-CONFIG_INFINIBAND_USER_MAD=m
-CONFIG_INFINIBAND_USER_ACCESS=m
-CONFIG_INFINIBAND_MTHCA=m
-CONFIG_INFINIBAND_AMSO1100=m
-CONFIG_INFINIBAND_IPOIB=m
-CONFIG_INFINIBAND_IPOIB_CM=y
-CONFIG_INFINIBAND_SRP=m
-CONFIG_DMADEVICES=y
-CONFIG_EXT3_FS=m
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
-CONFIG_QUOTA=y
-CONFIG_QFMT_V2=y
-CONFIG_AUTOFS4_FS=m
-CONFIG_UDF_FS=m
-CONFIG_MSDOS_FS=m
-CONFIG_VFAT_FS=m
-CONFIG_FAT_DEFAULT_IOCHARSET="ascii"
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_HFS_FS=m
-CONFIG_HFSPLUS_FS=m
-CONFIG_JFFS2_FS=y
-CONFIG_CRAMFS=m
-CONFIG_VXFS_FS=m
-CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
-CONFIG_NFS_V3_ACL=y
-CONFIG_NFS_V4=y
-CONFIG_ROOT_NFS=y
-CONFIG_RPCSEC_GSS_SPKM3=m
-CONFIG_CIFS=m
-CONFIG_CIFS_XATTR=y
-CONFIG_CIFS_POSIX=y
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_OSF_PARTITION=y
-CONFIG_MAC_PARTITION=y
-CONFIG_BSD_DISKLABEL=y
-CONFIG_MINIX_SUBPARTITION=y
-CONFIG_SOLARIS_X86_PARTITION=y
-CONFIG_UNIXWARE_DISKLABEL=y
-CONFIG_SGI_PARTITION=y
-CONFIG_SUN_PARTITION=y
-CONFIG_EFI_PARTITION=y
-CONFIG_NLS=y
-CONFIG_NLS_DEFAULT="utf8"
-CONFIG_NLS_CODEPAGE_437=y
-CONFIG_NLS_CODEPAGE_737=m
-CONFIG_NLS_CODEPAGE_775=m
-CONFIG_NLS_CODEPAGE_850=m
-CONFIG_NLS_CODEPAGE_852=m
-CONFIG_NLS_CODEPAGE_855=m
-CONFIG_NLS_CODEPAGE_857=m
-CONFIG_NLS_CODEPAGE_860=m
-CONFIG_NLS_CODEPAGE_861=m
-CONFIG_NLS_CODEPAGE_862=m
-CONFIG_NLS_CODEPAGE_863=m
-CONFIG_NLS_CODEPAGE_864=m
-CONFIG_NLS_CODEPAGE_865=m
-CONFIG_NLS_CODEPAGE_866=m
-CONFIG_NLS_CODEPAGE_869=m
-CONFIG_NLS_CODEPAGE_936=m
-CONFIG_NLS_CODEPAGE_950=m
-CONFIG_NLS_CODEPAGE_932=m
-CONFIG_NLS_CODEPAGE_949=m
-CONFIG_NLS_CODEPAGE_874=m
-CONFIG_NLS_ISO8859_8=m
-CONFIG_NLS_CODEPAGE_1250=m
-CONFIG_NLS_CODEPAGE_1251=m
-CONFIG_NLS_ASCII=y
-CONFIG_NLS_ISO8859_1=m
-CONFIG_NLS_ISO8859_2=m
-CONFIG_NLS_ISO8859_3=m
-CONFIG_NLS_ISO8859_4=m
-CONFIG_NLS_ISO8859_5=m
-CONFIG_NLS_ISO8859_6=m
-CONFIG_NLS_ISO8859_7=m
-CONFIG_NLS_ISO8859_9=m
-CONFIG_NLS_ISO8859_13=m
-CONFIG_NLS_ISO8859_14=m
-CONFIG_NLS_ISO8859_15=m
-CONFIG_NLS_KOI8_R=m
-CONFIG_NLS_KOI8_U=m
-CONFIG_CRC_CCITT=m
-CONFIG_CRC_T10DIF=m
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_KERNEL=y
-CONFIG_DETECT_HUNG_TASK=y
-CONFIG_DEBUG_SPINLOCK=y
-CONFIG_DEBUG_SPINLOCK_SLEEP=y
-CONFIG_DEBUG_HIGHMEM=y
-CONFIG_DEBUG_INFO=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-CONFIG_DEBUG_STACKOVERFLOW=y
-CONFIG_DEBUG_STACK_USAGE=y
-CONFIG_BOOTX_TEXT=y
-CONFIG_PPC_EARLY_DEBUG=y
-CONFIG_PPC_EARLY_DEBUG_BOOTX=y
-CONFIG_KEYS=y
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
-CONFIG_SECURITY=y
-CONFIG_SECURITY_NETWORK=y
-CONFIG_SECURITY_SELINUX=y
-CONFIG_SECURITY_SELINUX_BOOTPARAM=y
-CONFIG_SECURITY_SELINUX_DISABLE=y
-CONFIG_CRYPTO_NULL=m
-CONFIG_CRYPTO_ECB=m
-CONFIG_CRYPTO_HMAC=y
-CONFIG_CRYPTO_MD4=m
-CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_SHA1=y
-CONFIG_CRYPTO_SHA256=m
-CONFIG_CRYPTO_SHA512=m
-CONFIG_CRYPTO_WP512=m
-CONFIG_CRYPTO_AES=m
-CONFIG_CRYPTO_ARC4=m
-CONFIG_CRYPTO_BLOWFISH=m
-CONFIG_CRYPTO_CAST6=m
-CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_TEA=m
-CONFIG_CRYPTO_TWOFISH=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/cell_defconfig b/arch/powerpc/configs/cell_defconfig
index 4bee1a6d41d0..7a31b52e92e1 100644
--- a/arch/powerpc/configs/cell_defconfig
+++ b/arch/powerpc/configs/cell_defconfig
@@ -1,10 +1,11 @@
CONFIG_PPC64=y
-CONFIG_TUNE_CELL=y
+CONFIG_CELL_CPU=y
CONFIG_ALTIVEC=y
CONFIG_SMP=y
CONFIG_NR_CPUS=4
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=15
@@ -13,10 +14,10 @@ CONFIG_CPUSETS=y
CONFIG_BLK_DEV_INITRD=y
# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
-CONFIG_OPROFILE=m
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
-# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_PPC_POWERNV is not set
# CONFIG_PPC_PSERIES is not set
# CONFIG_PPC_PMAC is not set
CONFIG_PPC_PS3=y
@@ -24,21 +25,16 @@ CONFIG_PS3_DISK=y
CONFIG_PS3_ROM=m
CONFIG_PS3_FLASH=m
CONFIG_PS3_LPM=m
-CONFIG_PPC_IBM_CELL_BLADE=y
-CONFIG_PPC_CELLEB=y
CONFIG_RTAS_FLASH=y
CONFIG_CPU_FREQ=y
CONFIG_CPU_FREQ_GOV_POWERSAVE=y
CONFIG_CPU_FREQ_GOV_USERSPACE=y
CONFIG_CPU_FREQ_GOV_ONDEMAND=y
CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
+CONFIG_GEN_RTC=y
CONFIG_BINFMT_MISC=m
CONFIG_IRQ_ALL_CPUS=y
CONFIG_NUMA=y
-CONFIG_MEMORY_HOTREMOVE=y
-CONFIG_PPC_64K_PAGES=y
CONFIG_SCHED_SMT=y
CONFIG_PCIEPORTBUS=y
CONFIG_NET=y
@@ -52,16 +48,12 @@ CONFIG_IP_PNP_BOOTP=y
CONFIG_IP_PNP_RARP=y
CONFIG_NET_IPIP=y
CONFIG_SYN_COOKIES=y
-# CONFIG_INET_XFRM_MODE_BEET is not set
-CONFIG_IPV6=y
CONFIG_INET6_AH=m
CONFIG_INET6_ESP=m
CONFIG_INET6_IPCOMP=m
-# CONFIG_INET6_XFRM_MODE_BEET is not set
# CONFIG_IPV6_SIT is not set
CONFIG_IPV6_TUNNEL=m
CONFIG_NETFILTER=y
-CONFIG_NETFILTER_NETLINK_QUEUE=m
CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
CONFIG_NETFILTER_XT_TARGET_DSCP=m
CONFIG_NETFILTER_XT_TARGET_MARK=m
@@ -70,7 +62,6 @@ CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
CONFIG_NETFILTER_XT_MATCH_COMMENT=m
-CONFIG_NETFILTER_XT_MATCH_DCCP=m
CONFIG_NETFILTER_XT_MATCH_DSCP=m
CONFIG_NETFILTER_XT_MATCH_ESP=m
CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
@@ -92,16 +83,12 @@ CONFIG_NETFILTER_XT_MATCH_STRING=m
CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
CONFIG_NETFILTER_XT_MATCH_TIME=m
CONFIG_NETFILTER_XT_MATCH_U32=m
-CONFIG_IP_NF_QUEUE=m
CONFIG_IP_NF_IPTABLES=m
-CONFIG_IP_NF_MATCH_ADDRTYPE=m
CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
CONFIG_IP_NF_MATCH_TTL=m
CONFIG_IP_NF_FILTER=m
CONFIG_IP_NF_TARGET_REJECT=m
-CONFIG_IP_NF_TARGET_LOG=m
-CONFIG_IP_NF_TARGET_ULOG=m
CONFIG_IP_NF_MANGLE=m
CONFIG_IP_NF_TARGET_ECN=m
CONFIG_IP_NF_TARGET_TTL=m
@@ -109,22 +96,18 @@ CONFIG_IP_NF_RAW=m
CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=131072
-CONFIG_IDE=y
-CONFIG_BLK_DEV_GENERIC=y
-CONFIG_BLK_DEV_AEC62XX=y
-CONFIG_BLK_DEV_SIIMAGE=y
-CONFIG_BLK_DEV_CELLEB=y
CONFIG_BLK_DEV_SD=y
CONFIG_BLK_DEV_SR=m
CONFIG_CHR_DEV_SG=y
CONFIG_ATA=y
CONFIG_SATA_PROMISE=y
+CONFIG_PATA_ARTOP=y
CONFIG_PATA_PDC2027X=m
+CONFIG_PATA_SIL680=y
+CONFIG_ATA_GENERIC=y
CONFIG_MD=y
CONFIG_BLK_DEV_MD=m
CONFIG_MD_LINEAR=m
@@ -142,39 +125,29 @@ CONFIG_NETDEVICES=y
CONFIG_BONDING=m
CONFIG_MACVLAN=m
CONFIG_TUN=y
-CONFIG_NET_ETHERNET=y
-CONFIG_MII=y
+CONFIG_TIGON3=y
CONFIG_E1000=m
CONFIG_SKGE=m
CONFIG_SKY2=m
-CONFIG_TIGON3=y
-CONFIG_SPIDER_NET=y
CONFIG_GELIC_NET=m
CONFIG_GELIC_WIRELESS=y
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO_I8042 is not set
+# CONFIG_LEGACY_PTYS is not set
CONFIG_SERIAL_NONSTANDARD=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_TXX9_NR_UARTS=2
-CONFIG_SERIAL_TXX9_CONSOLE=y
CONFIG_SERIAL_OF_PLATFORM=y
-# CONFIG_LEGACY_PTYS is not set
CONFIG_HVC_RTAS=y
-CONFIG_HVC_BEAT=y
CONFIG_IPMI_HANDLER=m
CONFIG_IPMI_DEVICE_INTERFACE=m
CONFIG_IPMI_SI=m
CONFIG_IPMI_WATCHDOG=m
CONFIG_IPMI_POWEROFF=m
# CONFIG_HW_RANDOM is not set
-CONFIG_GEN_RTC=y
CONFIG_I2C=y
-# CONFIG_HWMON is not set
CONFIG_WATCHDOG=y
-CONFIG_VIDEO_OUTPUT_CONTROL=m
# CONFIG_VGA_CONSOLE is not set
CONFIG_HID=m
# CONFIG_USB_HID is not set
@@ -192,13 +165,10 @@ CONFIG_INFINIBAND_MTHCA=m
CONFIG_INFINIBAND_IPOIB=m
CONFIG_INFINIBAND_IPOIB_DEBUG_DATA=y
CONFIG_EDAC=y
-CONFIG_EDAC_MM_EDAC=y
-CONFIG_EDAC_CELL=y
CONFIG_UIO=m
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-CONFIG_INOTIFY=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_EXT4_FS=y
+CONFIG_AUTOFS_FS=m
CONFIG_ISO9660_FS=m
CONFIG_JOLIET=y
CONFIG_UDF_FS=m
@@ -208,11 +178,8 @@ CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_HUGETLBFS=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_NFS_V3_ACL=y
CONFIG_ROOT_NFS=y
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_EFI_PARTITION=y
CONFIG_NLS_ISO8859_1=m
CONFIG_NLS_ISO8859_2=m
CONFIG_NLS_ISO8859_3=m
@@ -224,12 +191,9 @@ CONFIG_NLS_ISO8859_9=m
CONFIG_NLS_ISO8859_13=m
CONFIG_NLS_ISO8859_14=m
CONFIG_NLS_ISO8859_15=m
-# CONFIG_ENABLE_MUST_CHECK is not set
CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_FS=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_MUTEXES=y
-CONFIG_DEBUG_SPINLOCK_SLEEP=y
CONFIG_XMON=y
CONFIG_XMON_DEFAULT=y
CONFIG_CRYPTO_ECB=m
diff --git a/arch/powerpc/configs/celleb_defconfig b/arch/powerpc/configs/celleb_defconfig
deleted file mode 100644
index 6d7b22f41b50..000000000000
--- a/arch/powerpc/configs/celleb_defconfig
+++ /dev/null
@@ -1,161 +0,0 @@
-CONFIG_PPC64=y
-CONFIG_TUNE_CELL=y
-CONFIG_ALTIVEC=y
-CONFIG_SMP=y
-CONFIG_NR_CPUS=4
-CONFIG_EXPERIMENTAL=y
-CONFIG_SYSVIPC=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_LOG_BUF_SHIFT=15
-CONFIG_BLK_DEV_INITRD=y
-# CONFIG_COMPAT_BRK is not set
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODVERSIONS=y
-CONFIG_MODULE_SRCVERSION_ALL=y
-# CONFIG_PPC_PSERIES is not set
-# CONFIG_PPC_PMAC is not set
-CONFIG_PPC_CELLEB=y
-CONFIG_SPU_FS=y
-# CONFIG_CBE_THERM is not set
-CONFIG_UDBG_RTAS_CONSOLE=y
-# CONFIG_RTAS_PROC is not set
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_BINFMT_MISC=m
-CONFIG_KEXEC=y
-CONFIG_NUMA=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_SYN_COOKIES=y
-CONFIG_IPV6=y
-CONFIG_INET6_AH=m
-CONFIG_INET6_ESP=m
-CONFIG_INET6_IPCOMP=m
-CONFIG_IPV6_TUNNEL=m
-CONFIG_NETFILTER=y
-CONFIG_IP_NF_QUEUE=m
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-CONFIG_PROC_DEVICETREE=y
-CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=131072
-CONFIG_IDE=y
-CONFIG_BLK_DEV_IDECD=m
-CONFIG_BLK_DEV_GENERIC=y
-CONFIG_BLK_DEV_CELLEB=y
-CONFIG_SCSI=m
-# CONFIG_SCSI_PROC_FS is not set
-CONFIG_BLK_DEV_SD=m
-CONFIG_BLK_DEV_SR=m
-CONFIG_CHR_DEV_SG=m
-CONFIG_SCSI_MULTI_LUN=y
-CONFIG_MD=y
-CONFIG_BLK_DEV_MD=m
-CONFIG_MD_LINEAR=m
-CONFIG_MD_RAID0=m
-CONFIG_MD_RAID1=m
-CONFIG_BLK_DEV_DM=m
-CONFIG_DM_CRYPT=m
-CONFIG_DM_SNAPSHOT=m
-CONFIG_DM_MIRROR=m
-CONFIG_DM_ZERO=m
-CONFIG_DM_MULTIPATH=m
-CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-CONFIG_MII=y
-CONFIG_SPIDER_NET=y
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO_I8042 is not set
-CONFIG_SERIAL_NONSTANDARD=y
-CONFIG_SERIAL_TXX9_NR_UARTS=3
-CONFIG_SERIAL_TXX9_CONSOLE=y
-# CONFIG_LEGACY_PTYS is not set
-CONFIG_HVC_RTAS=y
-CONFIG_HVC_BEAT=y
-# CONFIG_HW_RANDOM is not set
-CONFIG_GEN_RTC=y
-CONFIG_I2C=y
-# CONFIG_HWMON is not set
-CONFIG_WATCHDOG=y
-# CONFIG_VGA_CONSOLE is not set
-CONFIG_USB_HIDDEV=y
-CONFIG_USB=y
-# CONFIG_USB_DEVICE_CLASS is not set
-CONFIG_USB_MON=y
-CONFIG_USB_EHCI_HCD=m
-# CONFIG_USB_EHCI_HCD_PPC_OF is not set
-CONFIG_USB_OHCI_HCD=m
-CONFIG_USB_STORAGE=m
-CONFIG_EXT2_FS=y
-CONFIG_EXT2_FS_XATTR=y
-CONFIG_EXT2_FS_POSIX_ACL=y
-CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT2_FS_XIP=y
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
-CONFIG_INOTIFY=y
-CONFIG_ISO9660_FS=m
-CONFIG_JOLIET=y
-CONFIG_UDF_FS=m
-CONFIG_MSDOS_FS=m
-CONFIG_VFAT_FS=m
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_HUGETLBFS=y
-CONFIG_NFS_FS=m
-CONFIG_NFS_V3=y
-CONFIG_NFS_V3_ACL=y
-CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
-CONFIG_NFSD_V3_ACL=y
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_EFI_PARTITION=y
-CONFIG_NLS_ISO8859_1=m
-CONFIG_NLS_ISO8859_2=m
-CONFIG_NLS_ISO8859_3=m
-CONFIG_NLS_ISO8859_4=m
-CONFIG_NLS_ISO8859_5=m
-CONFIG_NLS_ISO8859_6=m
-CONFIG_NLS_ISO8859_7=m
-CONFIG_NLS_ISO8859_9=m
-CONFIG_NLS_ISO8859_13=m
-CONFIG_NLS_ISO8859_14=m
-CONFIG_NLS_ISO8859_15=m
-CONFIG_LIBCRC32C=m
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_FS=y
-CONFIG_DEBUG_KERNEL=y
-CONFIG_DEBUG_MUTEXES=y
-CONFIG_DEBUG_SPINLOCK_SLEEP=y
-CONFIG_XMON=y
-CONFIG_XMON_DEFAULT=y
-CONFIG_CRYPTO_NULL=m
-CONFIG_CRYPTO_TEST=m
-CONFIG_CRYPTO_ECB=m
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_HMAC=y
-CONFIG_CRYPTO_MD4=m
-CONFIG_CRYPTO_MD5=y
-CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_SHA256=m
-CONFIG_CRYPTO_SHA512=m
-CONFIG_CRYPTO_TGR192=m
-CONFIG_CRYPTO_WP512=m
-CONFIG_CRYPTO_ANUBIS=m
-CONFIG_CRYPTO_ARC4=m
-CONFIG_CRYPTO_BLOWFISH=m
-CONFIG_CRYPTO_CAST5=m
-CONFIG_CRYPTO_CAST6=m
-CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_TEA=m
-CONFIG_CRYPTO_TWOFISH=m
-# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/chrp32_defconfig b/arch/powerpc/configs/chrp32_defconfig
index db5b30857e1c..b799c95480ae 100644
--- a/arch/powerpc/configs/chrp32_defconfig
+++ b/arch/powerpc/configs/chrp32_defconfig
@@ -1,25 +1,25 @@
CONFIG_SMP=y
-CONFIG_EXPERIMENTAL=y
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=15
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
# CONFIG_COMPAT_BRK is not set
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_MAC_PARTITION=y
# CONFIG_PPC_PMAC is not set
+CONFIG_GEN_RTC=y
CONFIG_HIGHMEM=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
CONFIG_BINFMT_MISC=y
CONFIG_IRQ_ALL_CPUS=y
-# CONFIG_MIGRATION is not set
CONFIG_ISA=y
CONFIG_NET=y
CONFIG_PACKET=y
@@ -27,9 +27,6 @@ CONFIG_UNIX=y
CONFIG_INET=y
CONFIG_IP_MULTICAST=y
CONFIG_SYN_COOKIES=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
# CONFIG_IPV6 is not set
CONFIG_NETFILTER=y
# CONFIG_NETFILTER_ADVANCED is not set
@@ -37,57 +34,46 @@ CONFIG_NETFILTER=y
# CONFIG_NETFILTER_XT_TARGET_TCPMSS is not set
# CONFIG_NETFILTER_XT_MATCH_CONNTRACK is not set
# CONFIG_NETFILTER_XT_MATCH_STATE is not set
-# CONFIG_IP_NF_TARGET_ULOG is not set
# CONFIG_IP_NF_MANGLE is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_STANDALONE is not set
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_FD=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
-CONFIG_IDE=y
-CONFIG_BLK_DEV_IDECD=y
-CONFIG_BLK_DEV_GENERIC=y
-CONFIG_BLK_DEV_SL82C105=y
-CONFIG_BLK_DEV_VIA82CXXX=y
-CONFIG_SCSI=y
CONFIG_BLK_DEV_SD=y
CONFIG_CHR_DEV_ST=y
CONFIG_BLK_DEV_SR=y
-CONFIG_BLK_DEV_SR_VENDOR=y
CONFIG_CHR_DEV_SG=y
CONFIG_SCSI_CONSTANTS=y
CONFIG_SCSI_SYM53C8XX_2=y
CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0
+CONFIG_ATA=y
+CONFIG_PATA_VIA=y
+CONFIG_PATA_WINBOND=y
+CONFIG_ATA_GENERIC=y
CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-CONFIG_NET_TULIP=y
-CONFIG_DE4X5=y
-CONFIG_NET_PCI=y
CONFIG_PCNET32=y
+CONFIG_NET_TULIP=y
+CONFIG_MV643XX_ETH=y
CONFIG_8139CP=y
CONFIG_8139TOO=y
# CONFIG_8139TOO_PIO is not set
CONFIG_VIA_RHINE=y
-CONFIG_MV643XX_ETH=y
CONFIG_PPP=m
-CONFIG_PPP_MULTILINK=y
-CONFIG_PPP_FILTER=y
-CONFIG_PPP_ASYNC=m
-CONFIG_PPP_SYNC_TTY=m
-CONFIG_PPP_DEFLATE=m
CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPP_FILTER=y
CONFIG_PPP_MPPE=m
+CONFIG_PPP_MULTILINK=y
CONFIG_PPPOE=m
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
CONFIG_INPUT_EVDEV=y
CONFIG_INPUT_MISC=y
CONFIG_INPUT_UINPUT=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_BRIQ_PANEL=m
# CONFIG_HW_RANDOM is not set
CONFIG_NVRAM=y
-CONFIG_GEN_RTC=y
# CONFIG_HWMON is not set
CONFIG_FB=y
CONFIG_FIRMWARE_EDID=y
@@ -101,14 +87,12 @@ CONFIG_FB_ATY=y
CONFIG_FB_ATY_CT=y
CONFIG_FB_ATY_GX=y
CONFIG_FB_3DFX=y
-CONFIG_DISPLAY_SUPPORT=m
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
CONFIG_HID_GYRATION=y
CONFIG_HID_PANTHERLORD=y
CONFIG_HID_PETALYNX=y
CONFIG_HID_SAMSUNG=y
-CONFIG_HID_SONY=y
CONFIG_HID_SUNPLUS=y
CONFIG_USB=y
CONFIG_USB_MON=y
@@ -118,29 +102,19 @@ CONFIG_USB_OHCI_HCD=y
CONFIG_USB_UHCI_HCD=y
CONFIG_USB_STORAGE=m
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
CONFIG_EXT4_FS=y
-CONFIG_INOTIFY=y
CONFIG_ISO9660_FS=y
CONFIG_MSDOS_FS=m
CONFIG_VFAT_FS=m
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_MAC_PARTITION=y
CONFIG_NLS_ASCII=y
CONFIG_NLS_ISO8859_1=m
-CONFIG_CRC_T10DIF=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_MUTEXES=y
-CONFIG_DEBUG_SPINLOCK_SLEEP=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_XMON=y
CONFIG_XMON_DEFAULT=y
-CONFIG_CRYPTO=y
CONFIG_CRYPTO_CBC=m
CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/corenet32_smp_defconfig b/arch/powerpc/configs/corenet32_smp_defconfig
deleted file mode 100644
index 6a3c58adf253..000000000000
--- a/arch/powerpc/configs/corenet32_smp_defconfig
+++ /dev/null
@@ -1,183 +0,0 @@
-CONFIG_PPC_85xx=y
-CONFIG_SMP=y
-CONFIG_NR_CPUS=8
-CONFIG_EXPERIMENTAL=y
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_BSD_PROCESS_ACCT=y
-CONFIG_AUDIT=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_KALLSYMS_ALL=y
-CONFIG_EMBEDDED=y
-CONFIG_PERF_EVENTS=y
-CONFIG_SLAB=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODULE_FORCE_UNLOAD=y
-CONFIG_MODVERSIONS=y
-# CONFIG_BLK_DEV_BSG is not set
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_MAC_PARTITION=y
-CONFIG_CORENET_GENERIC=y
-CONFIG_HIGHMEM=y
-# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_BINFMT_MISC=m
-CONFIG_KEXEC=y
-CONFIG_FORCE_MAX_ZONEORDER=13
-CONFIG_PCI=y
-CONFIG_PCIEPORTBUS=y
-# CONFIG_PCIEASPM is not set
-CONFIG_PCI_MSI=y
-CONFIG_RAPIDIO=y
-CONFIG_FSL_RIO=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_XFRM_USER=y
-CONFIG_XFRM_SUB_POLICY=y
-CONFIG_XFRM_STATISTICS=y
-CONFIG_NET_KEY=y
-CONFIG_NET_KEY_MIGRATE=y
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_ADVANCED_ROUTER=y
-CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_ROUTE_MULTIPATH=y
-CONFIG_IP_ROUTE_VERBOSE=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-CONFIG_IP_PNP_RARP=y
-CONFIG_NET_IPIP=y
-CONFIG_IP_MROUTE=y
-CONFIG_IP_PIMSM_V1=y
-CONFIG_IP_PIMSM_V2=y
-CONFIG_ARPD=y
-CONFIG_INET_AH=y
-CONFIG_INET_ESP=y
-CONFIG_INET_IPCOMP=y
-# CONFIG_INET_LRO is not set
-CONFIG_IPV6=y
-CONFIG_IP_SCTP=m
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-CONFIG_DEVTMPFS=y
-CONFIG_DEVTMPFS_MOUNT=y
-CONFIG_MTD=y
-CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
-CONFIG_MTD_BLOCK=y
-CONFIG_MTD_CFI=y
-CONFIG_MTD_CFI_INTELEXT=y
-CONFIG_MTD_CFI_AMDSTD=y
-CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_MTD_M25P80=y
-CONFIG_MTD_NAND=y
-CONFIG_MTD_NAND_FSL_ELBC=y
-CONFIG_MTD_NAND_FSL_IFC=y
-CONFIG_PROC_DEVICETREE=y
-CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=131072
-CONFIG_BLK_DEV_SD=y
-CONFIG_CHR_DEV_ST=y
-CONFIG_BLK_DEV_SR=y
-CONFIG_CHR_DEV_SG=y
-CONFIG_SCSI_MULTI_LUN=y
-CONFIG_SCSI_LOGGING=y
-CONFIG_SCSI_SYM53C8XX_2=y
-CONFIG_ATA=y
-CONFIG_SATA_AHCI=y
-CONFIG_SATA_FSL=y
-CONFIG_SATA_SIL24=y
-CONFIG_SATA_SIL=y
-CONFIG_PATA_SIL680=y
-CONFIG_NETDEVICES=y
-CONFIG_FSL_PQ_MDIO=y
-CONFIG_E1000=y
-CONFIG_E1000E=y
-CONFIG_VITESSE_PHY=y
-CONFIG_AT803X_PHY=y
-CONFIG_FIXED_PHY=y
-# CONFIG_INPUT_MOUSEDEV is not set
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-CONFIG_SERIO_LIBPS2=y
-# CONFIG_LEGACY_PTYS is not set
-CONFIG_PPC_EPAPR_HV_BYTECHAN=y
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_MANY_PORTS=y
-CONFIG_SERIAL_8250_DETECT_IRQ=y
-CONFIG_SERIAL_8250_RSA=y
-CONFIG_NVRAM=y
-CONFIG_I2C=y
-CONFIG_I2C_CHARDEV=y
-CONFIG_I2C_MPC=y
-CONFIG_SPI=y
-CONFIG_SPI_GPIO=y
-CONFIG_SPI_FSL_SPI=y
-CONFIG_SPI_FSL_ESPI=y
-# CONFIG_HWMON is not set
-CONFIG_VIDEO_OUTPUT_CONTROL=y
-CONFIG_USB_HID=m
-CONFIG_USB=y
-CONFIG_USB_MON=y
-CONFIG_USB_EHCI_HCD=y
-CONFIG_USB_EHCI_FSL=y
-CONFIG_USB_OHCI_HCD=y
-CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
-CONFIG_USB_OHCI_HCD_PPC_OF_LE=y
-CONFIG_USB_STORAGE=y
-CONFIG_MMC=y
-CONFIG_MMC_SDHCI=y
-CONFIG_EDAC=y
-CONFIG_EDAC_MM_EDAC=y
-CONFIG_EDAC_MPC85XX=y
-CONFIG_RTC_CLASS=y
-CONFIG_RTC_DRV_DS1307=y
-CONFIG_RTC_DRV_DS1374=y
-CONFIG_RTC_DRV_DS3232=y
-CONFIG_UIO=y
-CONFIG_STAGING=y
-CONFIG_VIRT_DRIVERS=y
-CONFIG_FSL_HV_MANAGER=y
-CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_ISO9660_FS=m
-CONFIG_JOLIET=y
-CONFIG_ZISOFS=y
-CONFIG_UDF_FS=m
-CONFIG_MSDOS_FS=m
-CONFIG_VFAT_FS=y
-CONFIG_NTFS_FS=y
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_HUGETLBFS=y
-CONFIG_JFFS2_FS=y
-CONFIG_CRAMFS=y
-CONFIG_NFS_FS=y
-CONFIG_NFS_V4=y
-CONFIG_ROOT_NFS=y
-CONFIG_NFSD=m
-CONFIG_NLS_ISO8859_1=y
-CONFIG_NLS_UTF8=m
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_SHIRQ=y
-CONFIG_DETECT_HUNG_TASK=y
-CONFIG_DEBUG_INFO=y
-CONFIG_RCU_TRACE=y
-CONFIG_CRYPTO_NULL=y
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_MD4=y
-CONFIG_CRYPTO_SHA256=y
-CONFIG_CRYPTO_SHA512=y
-CONFIG_CRYPTO_AES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRYPTO_DEV_FSL_CAAM=y
-CONFIG_FSL_CORENET_CF=y
diff --git a/arch/powerpc/configs/corenet64_smp_defconfig b/arch/powerpc/configs/corenet64_smp_defconfig
deleted file mode 100644
index 4b07bade1ba9..000000000000
--- a/arch/powerpc/configs/corenet64_smp_defconfig
+++ /dev/null
@@ -1,182 +0,0 @@
-CONFIG_PPC64=y
-CONFIG_PPC_BOOK3E_64=y
-CONFIG_ALTIVEC=y
-CONFIG_SMP=y
-CONFIG_NR_CPUS=24
-CONFIG_SYSVIPC=y
-CONFIG_IRQ_DOMAIN_DEBUG=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_BSD_PROCESS_ACCT=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_EXPERT=y
-CONFIG_KALLSYMS_ALL=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODULE_FORCE_UNLOAD=y
-CONFIG_MODVERSIONS=y
-# CONFIG_BLK_DEV_BSG is not set
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_MAC_PARTITION=y
-CONFIG_CORENET_GENERIC=y
-# CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set
-CONFIG_BINFMT_MISC=m
-CONFIG_MATH_EMULATION=y
-CONFIG_MATH_EMULATION_HW_UNIMPLEMENTED=y
-CONFIG_PCIEPORTBUS=y
-CONFIG_PCI_MSI=y
-CONFIG_RAPIDIO=y
-CONFIG_FSL_RIO=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_XFRM_USER=y
-CONFIG_NET_KEY=y
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_ADVANCED_ROUTER=y
-CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_ROUTE_MULTIPATH=y
-CONFIG_IP_ROUTE_VERBOSE=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-CONFIG_IP_PNP_RARP=y
-CONFIG_NET_IPIP=y
-CONFIG_IP_MROUTE=y
-CONFIG_IP_PIMSM_V1=y
-CONFIG_IP_PIMSM_V2=y
-CONFIG_ARPD=y
-CONFIG_INET_ESP=y
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
-CONFIG_IPV6=y
-CONFIG_IP_SCTP=m
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-CONFIG_DEVTMPFS=y
-CONFIG_DEVTMPFS_MOUNT=y
-CONFIG_MTD=y
-CONFIG_MTD_OF_PARTS=y
-CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
-CONFIG_MTD_BLKDEVS=y
-CONFIG_MTD_BLOCK=y
-CONFIG_FTL=y
-CONFIG_MTD_CFI=y
-CONFIG_MTD_GEN_PROBE=y
-CONFIG_MTD_MAP_BANK_WIDTH_1=y
-CONFIG_MTD_MAP_BANK_WIDTH_2=y
-CONFIG_MTD_MAP_BANK_WIDTH_4=y
-CONFIG_MTD_CFI_I1=y
-CONFIG_MTD_CFI_I2=y
-CONFIG_MTD_CFI_INTELEXT=y
-CONFIG_MTD_CFI_AMDSTD=y
-CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_MTD_M25P80=y
-CONFIG_MTD_CFI_UTIL=y
-CONFIG_MTD_NAND_ECC=y
-CONFIG_MTD_NAND=y
-CONFIG_MTD_NAND_IDS=y
-CONFIG_MTD_NAND_FSL_ELBC=y
-CONFIG_MTD_NAND_FSL_IFC=y
-CONFIG_MTD_UBI=y
-CONFIG_MTD_UBI_WL_THRESHOLD=4096
-CONFIG_MTD_UBI_BEB_RESERVE=1
-CONFIG_PROC_DEVICETREE=y
-CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=131072
-CONFIG_EEPROM_LEGACY=y
-CONFIG_ATA=y
-CONFIG_SATA_FSL=y
-CONFIG_SATA_SIL24=y
-CONFIG_NETDEVICES=y
-CONFIG_DUMMY=y
-CONFIG_E1000E=y
-CONFIG_INPUT_FF_MEMLESS=m
-# CONFIG_INPUT_MOUSEDEV is not set
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-CONFIG_SERIO_LIBPS2=y
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_MANY_PORTS=y
-CONFIG_SERIAL_8250_DETECT_IRQ=y
-CONFIG_SERIAL_8250_RSA=y
-CONFIG_I2C=y
-CONFIG_I2C_CHARDEV=y
-CONFIG_I2C_MPC=y
-CONFIG_SPI=y
-CONFIG_SPI_GPIO=y
-CONFIG_SPI_FSL_SPI=y
-CONFIG_SPI_FSL_ESPI=y
-# CONFIG_HWMON is not set
-CONFIG_VIDEO_OUTPUT_CONTROL=y
-CONFIG_USB_HID=m
-CONFIG_USB=y
-CONFIG_USB_MON=y
-CONFIG_USB_EHCI_HCD=y
-CONFIG_USB_EHCI_FSL=y
-CONFIG_USB_STORAGE=y
-CONFIG_MMC=y
-CONFIG_MMC_SDHCI=y
-CONFIG_RTC_CLASS=y
-CONFIG_RTC_DRV_DS1307=y
-CONFIG_RTC_DRV_DS1374=y
-CONFIG_RTC_DRV_DS3232=y
-CONFIG_EDAC=y
-CONFIG_EDAC_MM_EDAC=y
-CONFIG_DMADEVICES=y
-CONFIG_FSL_DMA=y
-CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-CONFIG_ISO9660_FS=m
-CONFIG_JOLIET=y
-CONFIG_ZISOFS=y
-CONFIG_UDF_FS=m
-CONFIG_MSDOS_FS=m
-CONFIG_VFAT_FS=y
-CONFIG_NTFS_FS=y
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_HUGETLBFS=y
-CONFIG_MISC_FILESYSTEMS=y
-CONFIG_JFFS2_FS=y
-CONFIG_JFFS2_FS_DEBUG=1
-CONFIG_JFFS2_FS_WRITEBUFFER=y
-CONFIG_JFFS2_ZLIB=y
-CONFIG_JFFS2_RTIME=y
-CONFIG_UBIFS_FS=y
-CONFIG_UBIFS_FS_XATTR=y
-CONFIG_UBIFS_FS_LZO=y
-CONFIG_UBIFS_FS_ZLIB=y
-CONFIG_NFS_FS=y
-CONFIG_NFS_V4=y
-CONFIG_ROOT_NFS=y
-CONFIG_NFSD=m
-CONFIG_NLS_ISO8859_1=y
-CONFIG_NLS_UTF8=m
-CONFIG_CRC_T10DIF=y
-CONFIG_CRC16=y
-CONFIG_ZLIB_DEFLATE=y
-CONFIG_LZO_COMPRESS=y
-CONFIG_LZO_DECOMPRESS=y
-CONFIG_CRYPTO_DEFLATE=y
-CONFIG_CRYPTO_LZO=y
-CONFIG_FRAME_WARN=1024
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_FS=y
-CONFIG_DEBUG_SHIRQ=y
-CONFIG_DETECT_HUNG_TASK=y
-CONFIG_DEBUG_INFO=y
-CONFIG_CRYPTO_NULL=y
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_MD4=y
-CONFIG_CRYPTO_SHA256=y
-CONFIG_CRYPTO_SHA512=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRYPTO_DEV_FSL_CAAM=y
-CONFIG_FSL_CORENET_CF=y
diff --git a/arch/powerpc/configs/corenet_base.config b/arch/powerpc/configs/corenet_base.config
new file mode 100644
index 000000000000..1c40de1e764b
--- /dev/null
+++ b/arch/powerpc/configs/corenet_base.config
@@ -0,0 +1,2 @@
+CONFIG_CORENET_GENERIC=y
+CONFIG_PPC_QEMU_E500=y
diff --git a/arch/powerpc/configs/debug.config b/arch/powerpc/configs/debug.config
new file mode 100644
index 000000000000..bcc1fcf25e10
--- /dev/null
+++ b/arch/powerpc/configs/debug.config
@@ -0,0 +1,5 @@
+CONFIG_JUMP_LABEL_FEATURE_CHECK_DEBUG=y
+CONFIG_PPC_IRQ_SOFT_MASK_DEBUG=y
+CONFIG_PPC_KUAP_DEBUG=y
+CONFIG_PPC_RFI_SRR_DEBUG=y
+CONFIG_SCOM_DEBUGFS=y
diff --git a/arch/powerpc/configs/disable-werror.config b/arch/powerpc/configs/disable-werror.config
new file mode 100644
index 000000000000..7776b91da37f
--- /dev/null
+++ b/arch/powerpc/configs/disable-werror.config
@@ -0,0 +1,2 @@
+# Help: Disable -Werror
+CONFIG_PPC_DISABLE_WERROR=y
diff --git a/arch/powerpc/configs/dpaa.config b/arch/powerpc/configs/dpaa.config
new file mode 100644
index 000000000000..4ffacafe4036
--- /dev/null
+++ b/arch/powerpc/configs/dpaa.config
@@ -0,0 +1,5 @@
+CONFIG_FSL_DPAA=y
+CONFIG_FSL_PAMU=y
+CONFIG_FSL_FMAN=y
+CONFIG_FSL_DPAA_ETH=y
+CONFIG_CORTINA_PHY=y
diff --git a/arch/powerpc/configs/ep8248e_defconfig b/arch/powerpc/configs/ep8248e_defconfig
index fceffb3cffbe..0d8d3f41f194 100644
--- a/arch/powerpc/configs/ep8248e_defconfig
+++ b/arch/powerpc/configs/ep8248e_defconfig
@@ -4,14 +4,12 @@ CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_EXPERT=y
CONFIG_KALLSYMS_ALL=y
-CONFIG_SLAB=y
-# CONFIG_IOSCHED_CFQ is not set
+CONFIG_PARTITION_ADVANCED=y
# CONFIG_PPC_CHRP is not set
# CONFIG_PPC_PMAC is not set
CONFIG_PPC_82xx=y
CONFIG_EP8248E=y
CONFIG_BINFMT_MISC=y
-CONFIG_SPARSE_IRQ=y
# CONFIG_SECCOMP is not set
CONFIG_PCI=y
CONFIG_NET=y
@@ -23,12 +21,9 @@ CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
-# CONFIG_INET_LRO is not set
CONFIG_NETFILTER=y
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_ADV_OPTIONS=y
@@ -38,14 +33,11 @@ CONFIG_MTD_CFI_GEOMETRY=y
# CONFIG_MTD_CFI_I1 is not set
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_LOOP=y
-# CONFIG_MISC_DEVICES is not set
CONFIG_NETDEVICES=y
-CONFIG_DAVICOM_PHY=y
-CONFIG_NET_ETHERNET=y
CONFIG_FS_ENET=y
# CONFIG_FS_ENET_HAS_SCC is not set
+CONFIG_DAVICOM_PHY=y
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
# CONFIG_VT is not set
@@ -54,35 +46,25 @@ CONFIG_SERIAL_CPM_CONSOLE=y
# CONFIG_HWMON is not set
# CONFIG_USB_SUPPORT is not set
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-# CONFIG_EXT3_FS_XATTR is not set
-CONFIG_INOTIFY=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_AUTOFS_FS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_CRAMFS=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_ROOT_NFS=y
-CONFIG_PARTITION_ADVANCED=y
CONFIG_NLS=y
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ASCII=y
CONFIG_NLS_ISO8859_1=y
CONFIG_NLS_UTF8=y
-# CONFIG_CRC32 is not set
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_KERNEL=y
# CONFIG_SCHED_DEBUG is not set
-CONFIG_DEBUG_INFO=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_BDI_SWITCH=y
CONFIG_CRYPTO_CBC=y
CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_PCBC=y
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/ep88xc_defconfig b/arch/powerpc/configs/ep88xc_defconfig
index b8a79d7ee89f..354180ab94bc 100644
--- a/arch/powerpc/configs/ep88xc_defconfig
+++ b/arch/powerpc/configs/ep88xc_defconfig
@@ -1,24 +1,19 @@
CONFIG_PPC_8xx=y
-CONFIG_EXPERIMENTAL=y
# CONFIG_SWAP is not set
CONFIG_SYSVIPC=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
-# CONFIG_SYSCTL_SYSCALL is not set
# CONFIG_ELF_CORE is not set
-# CONFIG_BASE_FULL is not set
+CONFIG_BASE_SMALL=y
# CONFIG_FUTEX is not set
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_CFQ is not set
+CONFIG_PARTITION_ADVANCED=y
CONFIG_PPC_EP88XC=y
-CONFIG_8xx_COPYBACK=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
+CONFIG_GEN_RTC=y
CONFIG_HZ_100=y
-CONFIG_8XX_MINIMAL_FPEMU=y
-CONFIG_SPARSE_IRQ=y
# CONFIG_SECCOMP is not set
CONFIG_NET=y
CONFIG_PACKET=y
@@ -27,49 +22,31 @@ CONFIG_INET=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_PNP=y
CONFIG_SYN_COOKIES=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_PROC_DEVICETREE=y
# CONFIG_BLK_DEV is not set
-# CONFIG_MISC_DEVICES is not set
CONFIG_NETDEVICES=y
-CONFIG_LXT_PHY=y
-CONFIG_NET_ETHERNET=y
CONFIG_FS_ENET=y
# CONFIG_FS_ENET_HAS_SCC is not set
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
+CONFIG_LXT_PHY=y
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
CONFIG_SERIAL_CPM=y
CONFIG_SERIAL_CPM_CONSOLE=y
-# CONFIG_LEGACY_PTYS is not set
-CONFIG_GEN_RTC=y
# CONFIG_HWMON is not set
# CONFIG_USB_SUPPORT is not set
# CONFIG_DNOTIFY is not set
CONFIG_TMPFS=y
CONFIG_CRAMFS=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_ROOT_NFS=y
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_CRC32 is not set
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_KERNEL=y
CONFIG_DETECT_HUNG_TASK=y
-CONFIG_DEBUG_INFO=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_CRC32_SLICEBY4=y
diff --git a/arch/powerpc/configs/fsl-emb-nonhw.config b/arch/powerpc/configs/fsl-emb-nonhw.config
new file mode 100644
index 000000000000..2f81bc2d819e
--- /dev/null
+++ b/arch/powerpc/configs/fsl-emb-nonhw.config
@@ -0,0 +1,126 @@
+CONFIG_ADFS_FS=m
+CONFIG_AFFS_FS=m
+CONFIG_AUDIT=y
+CONFIG_BEFS_FS=m
+CONFIG_BFS_FS=m
+CONFIG_BINFMT_MISC=m
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_NBD=y
+CONFIG_BLK_DEV_RAM_SIZE=131072
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_CGROUPS=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_CPUSETS=y
+CONFIG_CRAMFS=y
+CONFIG_CRYPTO_MD4=y
+CONFIG_CRYPTO_NULL=y
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_SHA256=y
+CONFIG_CRYPTO_SHA512=y
+CONFIG_DEBUG_FS=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_SHIRQ=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_DEVTMPFS=y
+CONFIG_DUMMY=y
+CONFIG_EFS_FS=m
+CONFIG_EXPERT=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_FB=y
+CONFIG_FHANDLE=y
+CONFIG_FIXED_PHY=y
+CONFIG_FONT_8x16=y
+CONFIG_FONT_8x8=y
+CONFIG_FONTS=y
+CONFIG_ARCH_FORCE_MAX_ORDER=12
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAME_WARN=1024
+CONFIG_FTL=y
+CONFIG_GPIO_GENERIC_PLATFORM=y
+CONFIG_HFS_FS=m
+CONFIG_HFSPLUS_FS=m
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_HPFS_FS=m
+CONFIG_HUGETLBFS=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_IKCONFIG=y
+CONFIG_INET_AH=y
+CONFIG_INET_ESP=y
+CONFIG_INET_IPCOMP=y
+CONFIG_INET=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_RARP=y
+CONFIG_IP_PNP=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_IP_SCTP=m
+CONFIG_IPV6=y
+CONFIG_ISO9660_FS=m
+CONFIG_JFFS2_FS_DEBUG=1
+CONFIG_JFFS2_FS=y
+CONFIG_JOLIET=y
+CONFIG_KALLSYMS_ALL=y
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_MAC_PARTITION=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MSDOS_FS=m
+CONFIG_MTD_UBI=y
+CONFIG_MTD=y
+CONFIG_NET_IPIP=y
+CONFIG_NET_KEY_MIGRATE=y
+CONFIG_NET_KEY=y
+CONFIG_NET=y
+CONFIG_NFSD=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V4=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_CODEPAGE_850=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_UTF8=m
+CONFIG_NO_HZ=y
+CONFIG_NTFS_FS=y
+CONFIG_PACKET=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_PERF_EVENTS=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_POWER_SUPPLY=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_GPIO=y
+CONFIG_POWER_RESET_GPIO_RESTART=y
+CONFIG_QNX4FS_FS=m
+CONFIG_RCU_TRACE=y
+CONFIG_RESET_CONTROLLER=y
+CONFIG_ROOT_NFS=y
+CONFIG_SYSVIPC=y
+CONFIG_TMPFS=y
+CONFIG_UBIFS_FS=y
+CONFIG_UDF_FS=m
+CONFIG_UFS_FS=m
+CONFIG_UIO=y
+CONFIG_UNIX=y
+CONFIG_VFAT_FS=y
+CONFIG_VXFS_FS=m
+CONFIG_XFRM_STATISTICS=y
+CONFIG_XFRM_SUB_POLICY=y
+CONFIG_XFRM_USER=y
+CONFIG_ZISOFS=y
diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig
index 3c72fa615bd9..428f17b45513 100644
--- a/arch/powerpc/configs/g5_defconfig
+++ b/arch/powerpc/configs/g5_defconfig
@@ -2,29 +2,31 @@ CONFIG_PPC64=y
CONFIG_ALTIVEC=y
CONFIG_SMP=y
CONFIG_NR_CPUS=4
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
+CONFIG_CGROUPS=y
CONFIG_BLK_DEV_INITRD=y
# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
-CONFIG_OPROFILE=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODVERSIONS=y
CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_PPC_POWERNV is not set
# CONFIG_PPC_PSERIES is not set
CONFIG_CPU_FREQ=y
CONFIG_CPU_FREQ_GOV_POWERSAVE=y
CONFIG_CPU_FREQ_GOV_USERSPACE=y
CONFIG_CPU_FREQ_PMAC64=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
+CONFIG_GEN_RTC=y
CONFIG_KEXEC=y
CONFIG_IRQ_ALL_CPUS=y
-# CONFIG_MIGRATION is not set
+CONFIG_PPC_4K_PAGES=y
CONFIG_PCI_MSI=y
CONFIG_NET=y
CONFIG_PACKET=y
@@ -49,29 +51,22 @@ CONFIG_NF_CONNTRACK_FTP=m
CONFIG_NF_CONNTRACK_IRC=m
CONFIG_NF_CONNTRACK_TFTP=m
CONFIG_NF_CT_NETLINK=m
-CONFIG_NF_CONNTRACK_IPV4=m
-CONFIG_IP_NF_QUEUE=m
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-CONFIG_PROC_DEVICETREE=y
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_NBD=m
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=65536
CONFIG_CDROM_PKTCDVD=m
-CONFIG_IDE=y
-CONFIG_BLK_DEV_IDECD=y
-CONFIG_BLK_DEV_IDE_PMAC=y
-CONFIG_BLK_DEV_IDE_PMAC_ATA100FIRST=y
CONFIG_BLK_DEV_SD=y
CONFIG_CHR_DEV_ST=y
CONFIG_BLK_DEV_SR=y
-CONFIG_BLK_DEV_SR_VENDOR=y
CONFIG_CHR_DEV_SG=y
-CONFIG_SCSI_MULTI_LUN=y
CONFIG_SCSI_CONSTANTS=y
CONFIG_SCSI_SPI_ATTRS=y
CONFIG_ATA=y
CONFIG_SATA_SVW=y
+CONFIG_PATA_MACIO=y
CONFIG_MD=y
CONFIG_BLK_DEV_MD=y
CONFIG_MD_LINEAR=y
@@ -85,33 +80,29 @@ CONFIG_DM_CRYPT=m
CONFIG_DM_SNAPSHOT=m
CONFIG_DM_MIRROR=m
CONFIG_DM_ZERO=m
-CONFIG_IEEE1394=y
-CONFIG_IEEE1394_OHCI1394=y
-CONFIG_IEEE1394_SBP2=m
-CONFIG_IEEE1394_ETH1394=m
-CONFIG_IEEE1394_RAWIO=y
-CONFIG_IEEE1394_VIDEO1394=m
-CONFIG_IEEE1394_DV1394=m
CONFIG_ADB_PMU=y
CONFIG_PMAC_SMU=y
CONFIG_MAC_EMUMOUSEBTN=y
-CONFIG_THERM_PM72=y
CONFIG_WINDFARM=y
CONFIG_WINDFARM_PM81=y
CONFIG_WINDFARM_PM91=y
CONFIG_WINDFARM_PM112=y
CONFIG_WINDFARM_PM121=y
CONFIG_NETDEVICES=y
-CONFIG_DUMMY=m
CONFIG_BONDING=m
+CONFIG_DUMMY=m
CONFIG_TUN=m
-CONFIG_NET_ETHERNET=y
-CONFIG_MII=y
-CONFIG_SUNGEM=y
CONFIG_ACENIC=m
CONFIG_ACENIC_OMIT_TIGON_I=y
-CONFIG_E1000=y
CONFIG_TIGON3=y
+CONFIG_E1000=y
+CONFIG_SUNGEM=y
+CONFIG_PPP=m
+CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPPOE=m
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
CONFIG_USB_CATC=m
CONFIG_USB_KAWETH=m
CONFIG_USB_PEGASUS=m
@@ -121,13 +112,6 @@ CONFIG_USB_USBNET=m
# CONFIG_USB_NET_NET1080 is not set
# CONFIG_USB_NET_CDC_SUBSET is not set
# CONFIG_USB_NET_ZAURUS is not set
-CONFIG_PPP=m
-CONFIG_PPP_ASYNC=m
-CONFIG_PPP_SYNC_TTY=m
-CONFIG_PPP_DEFLATE=m
-CONFIG_PPP_BSDCOMP=m
-CONFIG_PPPOE=m
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
CONFIG_INPUT_JOYDEV=m
CONFIG_INPUT_EVDEV=y
# CONFIG_KEYBOARD_ATKBD is not set
@@ -135,13 +119,9 @@ CONFIG_INPUT_EVDEV=y
# CONFIG_SERIO_I8042 is not set
# CONFIG_SERIO_SERPORT is not set
# CONFIG_HW_RANDOM is not set
-CONFIG_GEN_RTC=y
-CONFIG_RAW_DRIVER=y
CONFIG_I2C_CHARDEV=y
-# CONFIG_HWMON is not set
CONFIG_AGP=m
CONFIG_AGP_UNINORTH=m
-CONFIG_VIDEO_OUTPUT_CONTROL=m
CONFIG_FB=y
CONFIG_FIRMWARE_EDID=y
CONFIG_FB_TILEBLITTING=y
@@ -154,10 +134,11 @@ CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
CONFIG_SOUND=m
CONFIG_SND=m
-CONFIG_SND_SEQUENCER=m
+CONFIG_SND_OSSEMUL=y
CONFIG_SND_MIXER_OSS=m
CONFIG_SND_PCM_OSS=m
-CONFIG_SND_SEQUENCER_OSS=y
+CONFIG_SND_SEQUENCER=m
+CONFIG_SND_SEQUENCER_OSS=m
CONFIG_SND_POWERMAC=m
CONFIG_SND_AOA=m
CONFIG_SND_AOA_FABRIC_LAYOUT=m
@@ -165,15 +146,14 @@ CONFIG_SND_AOA_ONYX=m
CONFIG_SND_AOA_TAS=m
CONFIG_SND_AOA_TOONIE=m
CONFIG_SND_USB_AUDIO=m
-CONFIG_HID_PID=y
-CONFIG_USB_HIDDEV=y
CONFIG_HID_GYRATION=y
CONFIG_LOGITECH_FF=y
CONFIG_HID_PANTHERLORD=y
CONFIG_HID_PETALYNX=y
CONFIG_HID_SAMSUNG=y
-CONFIG_HID_SONY=y
CONFIG_HID_SUNPLUS=y
+CONFIG_HID_PID=y
+CONFIG_USB_HIDDEV=y
CONFIG_USB=y
CONFIG_USB_MON=y
CONFIG_USB_EHCI_HCD=y
@@ -205,18 +185,6 @@ CONFIG_USB_SERIAL_GARMIN=m
CONFIG_USB_SERIAL_IPW=m
CONFIG_USB_SERIAL_KEYSPAN_PDA=m
CONFIG_USB_SERIAL_KEYSPAN=m
-CONFIG_USB_SERIAL_KEYSPAN_MPR=y
-CONFIG_USB_SERIAL_KEYSPAN_USA28=y
-CONFIG_USB_SERIAL_KEYSPAN_USA28X=y
-CONFIG_USB_SERIAL_KEYSPAN_USA28XA=y
-CONFIG_USB_SERIAL_KEYSPAN_USA28XB=y
-CONFIG_USB_SERIAL_KEYSPAN_USA19=y
-CONFIG_USB_SERIAL_KEYSPAN_USA18X=y
-CONFIG_USB_SERIAL_KEYSPAN_USA19W=y
-CONFIG_USB_SERIAL_KEYSPAN_USA19QW=y
-CONFIG_USB_SERIAL_KEYSPAN_USA19QI=y
-CONFIG_USB_SERIAL_KEYSPAN_USA49W=y
-CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y
CONFIG_USB_SERIAL_KLSI=m
CONFIG_USB_SERIAL_KOBIL_SCT=m
CONFIG_USB_SERIAL_MCT_U232=m
@@ -225,26 +193,18 @@ CONFIG_USB_SERIAL_SAFE=m
CONFIG_USB_SERIAL_SAFE_PADDED=y
CONFIG_USB_SERIAL_TI=m
CONFIG_USB_SERIAL_CYBERJACK=m
-CONFIG_USB_SERIAL_XIRCOM=m
CONFIG_USB_SERIAL_OMNINET=m
CONFIG_USB_APPLEDISPLAY=m
CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT2_FS_XIP=y
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
CONFIG_EXT4_FS=y
-CONFIG_REISERFS_FS=y
-CONFIG_REISERFS_FS_XATTR=y
-CONFIG_REISERFS_FS_POSIX_ACL=y
-CONFIG_REISERFS_FS_SECURITY=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
CONFIG_XFS_FS=m
CONFIG_XFS_POSIX_ACL=y
-CONFIG_INOTIFY=y
-CONFIG_AUTOFS_FS=m
+CONFIG_FS_DAX=y
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
CONFIG_ZISOFS=y
@@ -258,14 +218,12 @@ CONFIG_HFS_FS=m
CONFIG_HFSPLUS_FS=m
CONFIG_CRAMFS=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_NFS_V3_ACL=y
CONFIG_NFS_V4=y
CONFIG_NFSD=y
CONFIG_NFSD_V3_ACL=y
CONFIG_NFSD_V4=y
CONFIG_CIFS=m
-CONFIG_PARTITION_ADVANCED=y
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_CODEPAGE_1250=y
CONFIG_NLS_CODEPAGE_1251=y
@@ -273,31 +231,17 @@ CONFIG_NLS_ASCII=y
CONFIG_NLS_ISO8859_1=y
CONFIG_NLS_ISO8859_15=y
CONFIG_NLS_UTF8=y
-CONFIG_CRC_T10DIF=y
-CONFIG_LIBCRC32C=m
CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_FS=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_MUTEXES=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_LATENCYTOP=y
-CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_BOOTX_TEXT=y
-CONFIG_PPC_EARLY_DEBUG=y
-CONFIG_PPC_EARLY_DEBUG_BOOTX=y
-CONFIG_CRYPTO_NULL=m
-CONFIG_CRYPTO_TEST=m
-CONFIG_CRYPTO_ECB=m
+CONFIG_CRYPTO_BENCHMARK=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_HMAC=y
-CONFIG_CRYPTO_MD4=m
CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_SHA256=m
CONFIG_CRYPTO_SHA512=m
CONFIG_CRYPTO_WP512=m
-CONFIG_CRYPTO_AES=m
CONFIG_CRYPTO_ANUBIS=m
-CONFIG_CRYPTO_ARC4=m
CONFIG_CRYPTO_BLOWFISH=m
CONFIG_CRYPTO_CAST5=m
CONFIG_CRYPTO_CAST6=m
@@ -305,5 +249,5 @@ CONFIG_CRYPTO_KHAZAD=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
# CONFIG_CRYPTO_HW is not set
+CONFIG_PRINTK_TIME=y
diff --git a/arch/powerpc/configs/gamecube_defconfig b/arch/powerpc/configs/gamecube_defconfig
index 9ef2cc13e1b4..cdd99657b71b 100644
--- a/arch/powerpc/configs/gamecube_defconfig
+++ b/arch/powerpc/configs/gamecube_defconfig
@@ -1,16 +1,13 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_LOCALVERSION="-gcn"
CONFIG_SYSVIPC=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
# CONFIG_ELF_CORE is not set
CONFIG_PERF_EVENTS=y
# CONFIG_VM_EVENT_COUNTERS is not set
-CONFIG_SLAB=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
@@ -22,7 +19,6 @@ CONFIG_GAMECUBE=y
CONFIG_PREEMPT=y
CONFIG_BINFMT_MISC=m
CONFIG_KEXEC=y
-# CONFIG_MIGRATION is not set
# CONFIG_SECCOMP is not set
CONFIG_ADVANCED_OPTIONS=y
CONFIG_NET=y
@@ -32,28 +28,18 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_RARP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_INET_DIAG is not set
# CONFIG_IPV6 is not set
# CONFIG_WIRELESS is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_STANDALONE is not set
# CONFIG_FW_LOADER is not set
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_NBD=m
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_COUNT=2
CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
# CONFIG_WLAN is not set
CONFIG_INPUT_FF_MEMLESS=m
-# CONFIG_INPUT_MOUSEDEV is not set
CONFIG_INPUT_JOYDEV=y
CONFIG_INPUT_EVDEV=y
# CONFIG_KEYBOARD_ATKBD is not set
@@ -61,7 +47,6 @@ CONFIG_INPUT_EVDEV=y
CONFIG_INPUT_JOYSTICK=y
# CONFIG_SERIO_I8042 is not set
# CONFIG_SERIO_SERPORT is not set
-# CONFIG_DEVKMEM is not set
CONFIG_LEGACY_PTY_COUNT=64
# CONFIG_HW_RANDOM is not set
# CONFIG_HWMON is not set
@@ -74,19 +59,17 @@ CONFIG_LOGO=y
# CONFIG_LOGO_LINUX_CLUT224 is not set
CONFIG_SOUND=y
CONFIG_SND=y
-CONFIG_SND_SEQUENCER=y
+CONFIG_SND_OSSEMUL=y
CONFIG_SND_MIXER_OSS=y
CONFIG_SND_PCM_OSS=y
-CONFIG_SND_SEQUENCER_OSS=y
# CONFIG_SND_VERBOSE_PROCFS is not set
+CONFIG_SND_SEQUENCER=y
+CONFIG_SND_SEQUENCER_OSS=y
# CONFIG_USB_SUPPORT is not set
CONFIG_RTC_CLASS=y
-CONFIG_RTC_DRV_GENERIC=y
+CONFIG_RTC_DRV_GAMECUBE=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-# CONFIG_EXT3_FS_XATTR is not set
-CONFIG_INOTIFY=y
+CONFIG_EXT4_FS=y
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
CONFIG_MSDOS_FS=y
@@ -95,20 +78,13 @@ CONFIG_PROC_KCORE=y
# CONFIG_PROC_PAGE_MONITOR is not set
CONFIG_TMPFS=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_ROOT_NFS=y
CONFIG_CIFS=y
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ISO8859_1=y
-CONFIG_CRC_CCITT=y
CONFIG_PRINTK_TIME=y
-CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_SPINLOCK=y
CONFIG_DEBUG_MUTEXES=y
-CONFIG_DEBUG_SPINLOCK_SLEEP=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_LATENCYTOP=y
-CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_SCHED_TRACER=y
CONFIG_DMA_API_DEBUG=y
CONFIG_PPC_EARLY_DEBUG=y
diff --git a/arch/powerpc/configs/guest.config b/arch/powerpc/configs/guest.config
new file mode 100644
index 000000000000..fece83487215
--- /dev/null
+++ b/arch/powerpc/configs/guest.config
@@ -0,0 +1,14 @@
+CONFIG_VIRTIO_BLK=y
+CONFIG_SCSI_VIRTIO=y
+CONFIG_VIRTIO_NET=y
+CONFIG_NET_FAILOVER=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_VIRTIO=y
+CONFIG_VIRTIO_PCI=y
+CONFIG_KVM_GUEST=y
+CONFIG_EPAPR_PARAVIRT=y
+CONFIG_VIRTIO_BALLOON=y
+CONFIG_VHOST_NET=y
+CONFIG_VHOST=y
+CONFIG_IBMVETH=y
+CONFIG_IBMVNIC=y
diff --git a/arch/powerpc/configs/hardening.config b/arch/powerpc/configs/hardening.config
new file mode 100644
index 000000000000..4e9bba327e8f
--- /dev/null
+++ b/arch/powerpc/configs/hardening.config
@@ -0,0 +1,10 @@
+# PowerPC specific hardening options
+
+# Block kernel from unexpectedly reading userspace memory.
+CONFIG_PPC_KUAP=y
+
+# Attack surface reduction.
+# CONFIG_SCOM_DEBUGFS is not set
+
+# Disable internal kernel debugger.
+# CONFIG_XMON is not set
diff --git a/arch/powerpc/configs/holly_defconfig b/arch/powerpc/configs/holly_defconfig
index 94ebfee188db..271daff47d1d 100644
--- a/arch/powerpc/configs/holly_defconfig
+++ b/arch/powerpc/configs/holly_defconfig
@@ -1,19 +1,18 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
CONFIG_MODULES=y
# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
# CONFIG_PPC_CHRP is not set
# CONFIG_PPC_PMAC is not set
CONFIG_EMBEDDED6xx=y
CONFIG_PPC_HOLLY=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
+CONFIG_GEN_RTC=y
CONFIG_BINFMT_MISC=y
-CONFIG_CMDLINE_BOOL=y
CONFIG_CMDLINE="console=ttyS0,115200"
# CONFIG_SECCOMP is not set
CONFIG_NET=y
@@ -26,23 +25,17 @@ CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=131072
CONFIG_BLK_DEV_SD=y
CONFIG_ATA=y
CONFIG_NETDEVICES=y
-CONFIG_PHYLIB=y
-CONFIG_NET_ETHERNET=y
-CONFIG_NET_VENDOR_3COM=y
CONFIG_VORTEX=y
CONFIG_TSI108_ETH=y
-# CONFIG_INPUT_MOUSEDEV is not set
+CONFIG_PHYLIB=y
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
@@ -54,17 +47,13 @@ CONFIG_SERIAL_8250_EXTENDED=y
CONFIG_SERIAL_8250_SHARE_IRQ=y
CONFIG_SERIAL_OF_PLATFORM=y
# CONFIG_HW_RANDOM is not set
-CONFIG_GEN_RTC=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-CONFIG_INOTIFY=y
+CONFIG_EXT4_FS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y
-CONFIG_PARTITION_ADVANCED=y
CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_KERNEL=y
# CONFIG_SCHED_DEBUG is not set
# CONFIG_DEBUG_BUGVERBOSE is not set
CONFIG_XMON=y
diff --git a/arch/powerpc/configs/kvm_guest.config b/arch/powerpc/configs/kvm_guest.config
new file mode 120000
index 000000000000..a5f7a2fa74ef
--- /dev/null
+++ b/arch/powerpc/configs/kvm_guest.config
@@ -0,0 +1 @@
+../../../kernel/configs/kvm_guest.config \ No newline at end of file
diff --git a/arch/powerpc/configs/le.config b/arch/powerpc/configs/le.config
new file mode 100644
index 000000000000..ee43fdb3b8f4
--- /dev/null
+++ b/arch/powerpc/configs/le.config
@@ -0,0 +1 @@
+CONFIG_CPU_LITTLE_ENDIAN=y
diff --git a/arch/powerpc/configs/linkstation_defconfig b/arch/powerpc/configs/linkstation_defconfig
index b5e684640fdf..b564f9e33a0d 100644
--- a/arch/powerpc/configs/linkstation_defconfig
+++ b/arch/powerpc/configs/linkstation_defconfig
@@ -1,6 +1,7 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=14
@@ -13,10 +14,7 @@ CONFIG_MODULE_UNLOAD=y
# CONFIG_PPC_PMAC is not set
CONFIG_EMBEDDED6xx=y
CONFIG_LINKSTATION=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
CONFIG_HZ_100=y
-CONFIG_SPARSE_IRQ=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
@@ -25,11 +23,9 @@ CONFIG_IP_MULTICAST=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
CONFIG_NETFILTER=y
CONFIG_NF_CONNTRACK=m
-CONFIG_NF_CT_PROTO_SCTP=m
CONFIG_NF_CONNTRACK_AMANDA=m
CONFIG_NF_CONNTRACK_FTP=m
CONFIG_NF_CONNTRACK_H323=m
@@ -41,14 +37,9 @@ CONFIG_NF_CONNTRACK_TFTP=m
CONFIG_NETFILTER_XT_MATCH_MAC=m
CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
CONFIG_NETFILTER_XT_MATCH_STATE=m
-CONFIG_NF_CONNTRACK_IPV4=m
CONFIG_IP_NF_IPTABLES=m
-CONFIG_IP_NF_MATCH_ADDRTYPE=m
CONFIG_IP_NF_FILTER=m
CONFIG_IP_NF_TARGET_REJECT=m
-CONFIG_NF_NAT=m
-CONFIG_IP_NF_TARGET_MASQUERADE=m
-CONFIG_IP_NF_TARGET_REDIRECT=m
CONFIG_IP_NF_MANGLE=m
CONFIG_IP_NF_TARGET_ECN=m
CONFIG_IP_NF_TARGET_TTL=m
@@ -56,12 +47,8 @@ CONFIG_IP_NF_RAW=m
CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_MTD=y
-CONFIG_MTD_CONCAT=y
CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_OF_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_JEDECPROBE=y
@@ -72,28 +59,23 @@ CONFIG_MTD_CFI_GEOMETRY=y
# CONFIG_MTD_CFI_I2 is not set
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_PHYSMAP=y
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_COUNT=2
CONFIG_BLK_DEV_RAM_SIZE=8192
-CONFIG_MISC_DEVICES=y
CONFIG_EEPROM_LEGACY=m
CONFIG_BLK_DEV_SD=y
CONFIG_CHR_DEV_SG=y
-CONFIG_SCSI_MULTI_LUN=y
CONFIG_ATA=y
CONFIG_PATA_IT821X=y
CONFIG_PATA_SIL680=y
CONFIG_NETDEVICES=y
+CONFIG_NETCONSOLE=y
CONFIG_TUN=m
-CONFIG_NET_ETHERNET=y
CONFIG_NET_TULIP=y
CONFIG_TULIP=y
CONFIG_TULIP_MMIO=y
CONFIG_R8169=y
-CONFIG_NETCONSOLE=y
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
CONFIG_INPUT_EVDEV=m
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
@@ -106,7 +88,6 @@ CONFIG_HW_RANDOM=y
CONFIG_I2C=y
CONFIG_I2C_CHARDEV=y
CONFIG_I2C_MPC=y
-CONFIG_VIDEO_OUTPUT_CONTROL=m
# CONFIG_VGA_CONSOLE is not set
CONFIG_HID=m
# CONFIG_USB_HID is not set
@@ -123,10 +104,8 @@ CONFIG_USB_SERIAL_FTDI_SIO=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_RS5C372=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT4_FS=y
CONFIG_XFS_FS=m
-CONFIG_INOTIFY=y
CONFIG_ISO9660_FS=m
CONFIG_JOLIET=y
CONFIG_ZISOFS=y
@@ -137,33 +116,22 @@ CONFIG_NTFS_FS=m
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_NFS_V3_ACL=y
CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
CONFIG_CIFS=m
CONFIG_NLS_CODEPAGE_437=m
CONFIG_NLS_CODEPAGE_932=m
CONFIG_NLS_ISO8859_1=m
CONFIG_NLS_UTF8=m
-CONFIG_CRC_CCITT=m
-CONFIG_CRC_T10DIF=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-CONFIG_CRYPTO_ECB=m
CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_MD4=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_SHA1=m
-CONFIG_CRYPTO_AES=m
-CONFIG_CRYPTO_ARC4=m
CONFIG_CRYPTO_BLOWFISH=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_DEFLATE=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/maple_defconfig b/arch/powerpc/configs/maple_defconfig
deleted file mode 100644
index 95e545d9f25c..000000000000
--- a/arch/powerpc/configs/maple_defconfig
+++ /dev/null
@@ -1,145 +0,0 @@
-CONFIG_PPC64=y
-CONFIG_SMP=y
-CONFIG_NR_CPUS=4
-CONFIG_EXPERIMENTAL=y
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-# CONFIG_COMPAT_BRK is not set
-CONFIG_PROFILING=y
-CONFIG_OPROFILE=m
-CONFIG_KPROBES=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODVERSIONS=y
-CONFIG_MODULE_SRCVERSION_ALL=y
-# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_PPC_PSERIES is not set
-# CONFIG_PPC_PMAC is not set
-CONFIG_PPC_MAPLE=y
-CONFIG_UDBG_RTAS_CONSOLE=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_KEXEC=y
-CONFIG_IRQ_ALL_CPUS=y
-# CONFIG_MIGRATION is not set
-CONFIG_PCI_MSI=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_XFRM_USER=m
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-# CONFIG_INET_LRO is not set
-# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-CONFIG_PROC_DEVICETREE=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=8192
-CONFIG_IDE=y
-CONFIG_BLK_DEV_IDECD=y
-CONFIG_IDE_TASK_IOCTL=y
-CONFIG_BLK_DEV_GENERIC=y
-CONFIG_BLK_DEV_AMD74XX=y
-# CONFIG_SCSI_PROC_FS is not set
-CONFIG_BLK_DEV_SD=y
-CONFIG_CHR_DEV_SG=y
-CONFIG_SCSI_IPR=y
-CONFIG_ATA=y
-CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-CONFIG_NET_PCI=y
-CONFIG_AMD8111_ETH=y
-CONFIG_E1000=y
-CONFIG_TIGON3=y
-CONFIG_USB_PEGASUS=y
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
-CONFIG_INPUT_MOUSEDEV_SCREEN_X=1600
-CONFIG_INPUT_MOUSEDEV_SCREEN_Y=1200
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO is not set
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_HVC_RTAS=y
-# CONFIG_HW_RANDOM is not set
-CONFIG_GEN_RTC=y
-CONFIG_I2C=y
-CONFIG_I2C_CHARDEV=y
-CONFIG_I2C_AMD8111=y
-# CONFIG_HWMON is not set
-# CONFIG_VGA_CONSOLE is not set
-CONFIG_HID_GYRATION=y
-CONFIG_HID_PANTHERLORD=y
-CONFIG_HID_PETALYNX=y
-CONFIG_HID_SAMSUNG=y
-CONFIG_HID_SONY=y
-CONFIG_HID_SUNPLUS=y
-CONFIG_USB=y
-CONFIG_USB_MON=y
-CONFIG_USB_EHCI_HCD=y
-CONFIG_USB_EHCI_ROOT_HUB_TT=y
-# CONFIG_USB_EHCI_HCD_PPC_OF is not set
-CONFIG_USB_OHCI_HCD=y
-CONFIG_USB_UHCI_HCD=y
-CONFIG_USB_SERIAL=y
-CONFIG_USB_SERIAL_GENERIC=y
-CONFIG_USB_SERIAL_CYPRESS_M8=m
-CONFIG_USB_SERIAL_GARMIN=m
-CONFIG_USB_SERIAL_IPW=m
-CONFIG_USB_SERIAL_KEYSPAN=y
-CONFIG_USB_SERIAL_KEYSPAN_MPR=y
-CONFIG_USB_SERIAL_KEYSPAN_USA28=y
-CONFIG_USB_SERIAL_KEYSPAN_USA28X=y
-CONFIG_USB_SERIAL_KEYSPAN_USA28XA=y
-CONFIG_USB_SERIAL_KEYSPAN_USA28XB=y
-CONFIG_USB_SERIAL_KEYSPAN_USA19=y
-CONFIG_USB_SERIAL_KEYSPAN_USA18X=y
-CONFIG_USB_SERIAL_KEYSPAN_USA19W=y
-CONFIG_USB_SERIAL_KEYSPAN_USA19QW=y
-CONFIG_USB_SERIAL_KEYSPAN_USA19QI=y
-CONFIG_USB_SERIAL_KEYSPAN_USA49W=y
-CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y
-CONFIG_USB_SERIAL_TI=m
-CONFIG_EXT2_FS=y
-CONFIG_EXT2_FS_XIP=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_FS_XATTR is not set
-CONFIG_EXT4_FS=y
-CONFIG_INOTIFY=y
-CONFIG_MSDOS_FS=y
-CONFIG_VFAT_FS=y
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_HUGETLBFS=y
-CONFIG_CRAMFS=y
-CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
-CONFIG_NFS_V3_ACL=y
-CONFIG_NFS_V4=y
-CONFIG_ROOT_NFS=y
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_MAC_PARTITION=y
-CONFIG_NLS_DEFAULT="utf-8"
-CONFIG_NLS_UTF8=y
-CONFIG_CRC_CCITT=y
-CONFIG_CRC_T10DIF=y
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_KERNEL=y
-CONFIG_DEBUG_SPINLOCK_SLEEP=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_LATENCYTOP=y
-CONFIG_DEBUG_STACKOVERFLOW=y
-CONFIG_DEBUG_STACK_USAGE=y
-CONFIG_XMON=y
-CONFIG_XMON_DEFAULT=y
-CONFIG_BOOTX_TEXT=y
-CONFIG_PPC_EARLY_DEBUG=y
-CONFIG_PPC_EARLY_DEBUG_BOOTX=y
-CONFIG_CRYPTO_ECB=m
-CONFIG_CRYPTO_PCBC=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/mgcoge_defconfig b/arch/powerpc/configs/mgcoge_defconfig
index 8fa84f156ef3..f65001e7877f 100644
--- a/arch/powerpc/configs/mgcoge_defconfig
+++ b/arch/powerpc/configs/mgcoge_defconfig
@@ -1,7 +1,7 @@
-CONFIG_EXPERIMENTAL=y
# CONFIG_SWAP is not set
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
+CONFIG_HIGH_RES_TIMERS=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=14
@@ -9,14 +9,11 @@ CONFIG_BLK_DEV_INITRD=y
# CONFIG_RD_GZIP is not set
CONFIG_KALLSYMS_ALL=y
# CONFIG_PCSPKR_PLATFORM is not set
-CONFIG_EMBEDDED=y
-CONFIG_SLAB=y
+CONFIG_EXPERT=y
CONFIG_PARTITION_ADVANCED=y
-# CONFIG_IOSCHED_CFQ is not set
# CONFIG_PPC_PMAC is not set
CONFIG_PPC_82xx=y
CONFIG_MGCOGE=y
-CONFIG_HIGH_RES_TIMERS=y
CONFIG_BINFMT_MISC=y
# CONFIG_SECCOMP is not set
CONFIG_NET=y
@@ -28,16 +25,12 @@ CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
CONFIG_NETFILTER=y
CONFIG_TIPC=y
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
-CONFIG_MTD_BLKDEVS=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_ADV_OPTIONS=y
CONFIG_MTD_CFI_GEOMETRY=y
@@ -45,13 +38,11 @@ CONFIG_MTD_CFI_GEOMETRY=y
CONFIG_MTD_CFI_INTELEXT=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_NETDEVICES=y
CONFIG_FS_ENET=y
CONFIG_FS_ENET_MDIO_FCC=y
-CONFIG_FIXED_PHY=y
# CONFIG_WLAN is not set
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
@@ -68,27 +59,24 @@ CONFIG_USB_GADGET=y
CONFIG_USB_FSL_USB2=y
CONFIG_USB_G_SERIAL=y
CONFIG_UIO=y
-CONFIG_UIO_PDRV=y
CONFIG_EXT2_FS=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_JFFS2_FS=y
CONFIG_CRAMFS=y
CONFIG_SQUASHFS=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_ROOT_NFS=y
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ASCII=y
CONFIG_NLS_ISO8859_1=y
CONFIG_NLS_UTF8=y
-CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
CONFIG_DEBUG_FS=y
+CONFIG_MAGIC_SYSRQ=y
# CONFIG_SCHED_DEBUG is not set
-CONFIG_DEBUG_INFO=y
CONFIG_BDI_SWITCH=y
CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_PCBC=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/microwatt_defconfig b/arch/powerpc/configs/microwatt_defconfig
new file mode 100644
index 000000000000..a64fb1ef8c75
--- /dev/null
+++ b/arch/powerpc/configs/microwatt_defconfig
@@ -0,0 +1,108 @@
+# CONFIG_SWAP is not set
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_PREEMPT_VOLUNTARY=y
+CONFIG_TICK_CPU_ACCOUNTING=y
+CONFIG_LOG_BUF_SHIFT=16
+CONFIG_CGROUPS=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_EXPERT=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_SLUB_DEBUG is not set
+# CONFIG_COMPAT_BRK is not set
+# CONFIG_SLAB_MERGE_DEFAULT is not set
+CONFIG_PPC64=y
+CONFIG_POWER9_CPU=y
+# CONFIG_PPC_64S_HASH_MMU is not set
+# CONFIG_PPC_KUEP is not set
+# CONFIG_PPC_KUAP is not set
+CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_NR_IRQS=64
+CONFIG_PANIC_TIMEOUT=10
+# CONFIG_PPC_POWERNV is not set
+# CONFIG_PPC_PSERIES is not set
+CONFIG_PPC_MICROWATT=y
+# CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set
+CONFIG_CPU_FREQ=y
+CONFIG_HZ_100=y
+CONFIG_PPC_4K_PAGES=y
+# CONFIG_SECCOMP is not set
+# CONFIG_MQ_IOSCHED_KYBER is not set
+# CONFIG_COREDUMP is not set
+# CONFIG_COMPACTION is not set
+# CONFIG_MIGRATION is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_PACKET_DIAG=y
+CONFIG_UNIX=y
+CONFIG_UNIX_DIAG=y
+CONFIG_INET=y
+CONFIG_INET_UDP_DIAG=y
+CONFIG_INET_RAW_DIAG=y
+# CONFIG_WIRELESS is not set
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_STANDALONE is not set
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FW_LOADER is not set
+# CONFIG_ALLOW_DEV_COREDUMP is not set
+CONFIG_MTD=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_PARTITIONED_MASTER=y
+CONFIG_MTD_SPI_NOR=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_NETDEVICES=y
+CONFIG_LITEX_LITEETH=y
+# CONFIG_WLAN is not set
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_SERIAL_8250=y
+# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+CONFIG_SERIAL_NONSTANDARD=y
+# CONFIG_NVRAM is not set
+CONFIG_SPI=y
+CONFIG_SPI_DEBUG=y
+CONFIG_SPI_BITBANG=y
+CONFIG_SPI_SPIDEV=y
+# CONFIG_HWMON is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_MMC=y
+# CONFIG_PWRSEQ_EMMC is not set
+# CONFIG_PWRSEQ_SIMPLE is not set
+CONFIG_MMC_LITEX=y
+# CONFIG_VIRTIO_MENU is not set
+CONFIG_COMMON_CLK=y
+# CONFIG_IOMMU_SUPPORT is not set
+# CONFIG_NVMEM is not set
+CONFIG_EXT4_FS=y
+# CONFIG_FILE_LOCKING is not set
+# CONFIG_DNOTIFY is not set
+CONFIG_AUTOFS_FS=y
+CONFIG_TMPFS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_CRYPTO_SHA256=y
+# CONFIG_CRYPTO_HW is not set
+# CONFIG_XZ_DEC_X86 is not set
+# CONFIG_XZ_DEC_IA64 is not set
+# CONFIG_XZ_DEC_ARM is not set
+# CONFIG_XZ_DEC_ARMTHUMB is not set
+# CONFIG_XZ_DEC_SPARC is not set
+CONFIG_PRINTK_TIME=y
+# CONFIG_SYMBOLIC_ERRNAME is not set
+# CONFIG_DEBUG_BUGVERBOSE is not set
+# CONFIG_DEBUG_MISC is not set
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_FTRACE is not set
+# CONFIG_STRICT_DEVMEM is not set
+CONFIG_PPC_DISABLE_WERROR=y
+CONFIG_XMON=y
+CONFIG_XMON_DEFAULT=y
+# CONFIG_XMON_DEFAULT_RO_MODE is not set
+# CONFIG_RUNTIME_TESTING_MENU is not set
diff --git a/arch/powerpc/configs/mpc512x_defconfig b/arch/powerpc/configs/mpc512x_defconfig
index ee853a1b1b2c..d24457bc5791 100644
--- a/arch/powerpc/configs/mpc512x_defconfig
+++ b/arch/powerpc/configs/mpc512x_defconfig
@@ -4,14 +4,13 @@ CONFIG_NO_HZ=y
CONFIG_LOG_BUF_SHIFT=16
CONFIG_BLK_DEV_INITRD=y
# CONFIG_COMPAT_BRK is not set
-CONFIG_SLAB=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
CONFIG_PARTITION_ADVANCED=y
-# CONFIG_IOSCHED_CFQ is not set
# CONFIG_PPC_CHRP is not set
CONFIG_PPC_MPC512x=y
+CONFIG_MPC512x_LPBFIFO=y
CONFIG_MPC5121_ADS=y
CONFIG_MPC512x_GENERIC=y
CONFIG_PDM360NG=y
@@ -24,10 +23,6 @@ CONFIG_PACKET=y
CONFIG_UNIX=y
CONFIG_INET=y
CONFIG_IP_PNP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_INET_DIAG is not set
# CONFIG_IPV6 is not set
CONFIG_CAN=y
@@ -35,11 +30,9 @@ CONFIG_CAN_VCAN=y
CONFIG_CAN_MSCAN=y
CONFIG_CAN_DEBUG_DEVICES=y
# CONFIG_WIRELESS is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
# CONFIG_PREVENT_FIRMWARE_BUILD is not set
-# CONFIG_FIRMWARE_IN_KERNEL is not set
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
CONFIG_MTD_BLOCK=y
@@ -47,14 +40,12 @@ CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_ROM=y
CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_MTD_NAND=y
+CONFIG_MTD_RAW_NAND=y
CONFIG_MTD_NAND_MPC5121_NFC=y
CONFIG_MTD_UBI=y
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_COUNT=1
CONFIG_BLK_DEV_RAM_SIZE=8192
-CONFIG_BLK_DEV_XIP=y
CONFIG_EEPROM_AT24=y
CONFIG_EEPROM_AT25=y
CONFIG_SCSI=y
@@ -63,26 +54,22 @@ CONFIG_BLK_DEV_SD=y
CONFIG_CHR_DEV_SG=y
CONFIG_NETDEVICES=y
CONFIG_FS_ENET=y
-CONFIG_MARVELL_PHY=y
-CONFIG_DAVICOM_PHY=y
-CONFIG_QSEMI_PHY=y
-CONFIG_LXT_PHY=y
-CONFIG_CICADA_PHY=y
-CONFIG_VITESSE_PHY=y
-CONFIG_SMSC_PHY=y
+CONFIG_MDIO_BITBANG=y
CONFIG_BROADCOM_PHY=y
+CONFIG_CICADA_PHY=y
+CONFIG_DAVICOM_PHY=y
CONFIG_ICPLUS_PHY=y
-CONFIG_REALTEK_PHY=y
+CONFIG_LSI_ET1011C_PHY=y
+CONFIG_LXT_PHY=y
+CONFIG_MARVELL_PHY=y
CONFIG_NATIONAL_PHY=y
+CONFIG_QSEMI_PHY=y
+CONFIG_REALTEK_PHY=y
+CONFIG_SMSC_PHY=y
CONFIG_STE10XP=y
-CONFIG_LSI_ET1011C_PHY=y
-CONFIG_FIXED_PHY=y
-CONFIG_MDIO_BITBANG=y
+CONFIG_VITESSE_PHY=y
# CONFIG_WLAN is not set
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
CONFIG_INPUT_EVDEV=y
-CONFIG_VT_HW_CONSOLE_BINDING=y
-# CONFIG_DEVKMEM is not set
CONFIG_SERIAL_MPC52xx=y
CONFIG_SERIAL_MPC52xx_CONSOLE=y
CONFIG_SERIAL_MPC52xx_CONSOLE_BAUD=115200
@@ -115,9 +102,8 @@ CONFIG_RTC_DRV_MPC5121=y
CONFIG_DMADEVICES=y
CONFIG_MPC512X_DMA=y
CONFIG_EXT2_FS=y
-CONFIG_EXT2_FS_XIP=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT4_FS=y
+CONFIG_FS_DAX=y
# CONFIG_DNOTIFY is not set
CONFIG_VFAT_FS=y
CONFIG_TMPFS=y
@@ -127,7 +113,4 @@ CONFIG_NFS_FS=y
CONFIG_ROOT_NFS=y
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ISO8859_1=y
-# CONFIG_ENABLE_WARN_DEPRECATED is not set
-# CONFIG_ENABLE_MUST_CHECK is not set
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/mpc5200_defconfig b/arch/powerpc/configs/mpc5200_defconfig
index 69fd8adf9f5e..c0fe5e76604a 100644
--- a/arch/powerpc/configs/mpc5200_defconfig
+++ b/arch/powerpc/configs/mpc5200_defconfig
@@ -1,6 +1,6 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
-CONFIG_SPARSE_IRQ=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
CONFIG_MODULES=y
@@ -15,9 +15,6 @@ CONFIG_PPC_MEDIA5200=y
CONFIG_PPC_MPC5200_BUGFIX=y
CONFIG_PPC_MPC5200_LPBFIFO=m
# CONFIG_PPC_PMAC is not set
-CONFIG_SIMPLE_GPIO=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
@@ -28,13 +25,9 @@ CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_OF_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
@@ -42,7 +35,6 @@ CONFIG_MTD_ROM=y
CONFIG_MTD_PHYSMAP_OF=y
CONFIG_MTD_PLATRAM=y
CONFIG_MTD_UBI=m
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=32768
@@ -56,7 +48,6 @@ CONFIG_NETDEVICES=y
CONFIG_FEC_MPC52xx=y
CONFIG_AMD_PHY=y
CONFIG_LXT_PHY=y
-CONFIG_FIXED_PHY=y
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
@@ -77,13 +68,10 @@ CONFIG_SENSORS_LM87=m
CONFIG_WATCHDOG=y
CONFIG_MFD_SM501=m
CONFIG_DRM=y
-CONFIG_VIDEO_OUTPUT_CONTROL=y
-CONFIG_FB=y
CONFIG_FB_FOREIGN_ENDIAN=y
CONFIG_FB_RADEON=y
CONFIG_FB_SM501=m
# CONFIG_VGA_CONSOLE is not set
-CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
CONFIG_SOUND=y
CONFIG_SND=y
@@ -94,9 +82,6 @@ CONFIG_SND=y
# CONFIG_SND_SPI is not set
# CONFIG_SND_USB is not set
CONFIG_SND_SOC=y
-CONFIG_SND_SOC_MPC5200_I2S=y
-CONFIG_SND_MPC52xx_SOC_PCM030=y
-CONFIG_SND_MPC52xx_SOC_EFIKA=y
CONFIG_HID_DRAGONRISE=y
CONFIG_HID_GYRATION=y
CONFIG_HID_TWINHAN=y
@@ -104,7 +89,6 @@ CONFIG_HID_ORTEK=y
CONFIG_HID_PANTHERLORD=y
CONFIG_HID_PETALYNX=y
CONFIG_HID_SAMSUNG=y
-CONFIG_HID_SONY=y
CONFIG_HID_SUNPLUS=y
CONFIG_HID_GREENASIA=y
CONFIG_HID_SMARTJOYPLUS=y
@@ -112,7 +96,6 @@ CONFIG_HID_TOPSEED=y
CONFIG_HID_THRUSTMASTER=y
CONFIG_HID_ZEROPLUS=y
CONFIG_USB=y
-# CONFIG_USB_DEVICE_CLASS is not set
CONFIG_USB_MON=y
CONFIG_USB_OHCI_HCD=y
CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
@@ -125,8 +108,7 @@ CONFIG_RTC_DRV_PCF8563=m
CONFIG_DMADEVICES=y
CONFIG_PPC_BESTCOMM=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT4_FS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_PROC_KCORE=y
@@ -135,13 +117,11 @@ CONFIG_JFFS2_FS=y
CONFIG_UBIFS_FS=m
CONFIG_CRAMFS=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ISO8859_1=y
CONFIG_PRINTK_TIME=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DETECT_HUNG_TASK=y
-CONFIG_DEBUG_INFO=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/mpc7448_hpc2_defconfig b/arch/powerpc/configs/mpc7448_hpc2_defconfig
deleted file mode 100644
index 75f0bbf0f6e8..000000000000
--- a/arch/powerpc/configs/mpc7448_hpc2_defconfig
+++ /dev/null
@@ -1,69 +0,0 @@
-CONFIG_ALTIVEC=y
-CONFIG_EXPERIMENTAL=y
-CONFIG_SYSVIPC=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-CONFIG_EXPERT=y
-# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_PPC_CHRP is not set
-# CONFIG_PPC_PMAC is not set
-CONFIG_EMBEDDED6xx=y
-CONFIG_MPC7448HPC2=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_BINFMT_MISC=y
-CONFIG_SPARSE_IRQ=y
-# CONFIG_SECCOMP is not set
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_XFRM_USER=y
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-CONFIG_SYN_COOKIES=y
-# CONFIG_INET_LRO is not set
-# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-# CONFIG_FW_LOADER is not set
-CONFIG_PROC_DEVICETREE=y
-CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=131072
-CONFIG_BLK_DEV_SD=y
-CONFIG_ATA=y
-CONFIG_SATA_MV=y
-CONFIG_NETDEVICES=y
-CONFIG_PHYLIB=y
-CONFIG_NET_ETHERNET=y
-CONFIG_NET_PCI=y
-CONFIG_E100=y
-CONFIG_8139TOO=y
-# CONFIG_8139TOO_PIO is not set
-CONFIG_TSI108_ETH=y
-# CONFIG_INPUT_MOUSEDEV is not set
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO is not set
-# CONFIG_VT is not set
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-# CONFIG_HW_RANDOM is not set
-CONFIG_GEN_RTC=y
-CONFIG_VIDEO_OUTPUT_CONTROL=y
-CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_INOTIFY=y
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_NFS_FS=y
-CONFIG_ROOT_NFS=y
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_CRC_T10DIF=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/mpc8272_ads_defconfig b/arch/powerpc/configs/mpc8272_ads_defconfig
deleted file mode 100644
index 6a22400f73c1..000000000000
--- a/arch/powerpc/configs/mpc8272_ads_defconfig
+++ /dev/null
@@ -1,95 +0,0 @@
-CONFIG_SYSVIPC=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_EXPERT=y
-CONFIG_KALLSYMS_ALL=y
-# CONFIG_PPC_CHRP is not set
-# CONFIG_PPC_PMAC is not set
-CONFIG_PPC_82xx=y
-CONFIG_MPC8272_ADS=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_BINFMT_MISC=y
-CONFIG_SPARSE_IRQ=y
-CONFIG_PCI=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-CONFIG_SYN_COOKIES=y
-# CONFIG_INET_LRO is not set
-CONFIG_NETFILTER=y
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-# CONFIG_FW_LOADER is not set
-CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
-CONFIG_MTD_BLOCK=y
-CONFIG_MTD_JEDECPROBE=y
-CONFIG_MTD_CFI_ADV_OPTIONS=y
-CONFIG_MTD_CFI_GEOMETRY=y
-# CONFIG_MTD_MAP_BANK_WIDTH_1 is not set
-# CONFIG_MTD_MAP_BANK_WIDTH_2 is not set
-# CONFIG_MTD_CFI_I1 is not set
-# CONFIG_MTD_CFI_I2 is not set
-CONFIG_MTD_CFI_I4=y
-CONFIG_MTD_CFI_INTELEXT=y
-CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_PROC_DEVICETREE=y
-CONFIG_BLK_DEV_LOOP=y
-# CONFIG_MISC_DEVICES is not set
-CONFIG_NETDEVICES=y
-CONFIG_TUN=y
-CONFIG_DAVICOM_PHY=y
-CONFIG_NET_ETHERNET=y
-CONFIG_FS_ENET=y
-# CONFIG_FS_ENET_HAS_SCC is not set
-CONFIG_FS_ENET_MDIO_FCC=y
-CONFIG_PPP=y
-CONFIG_PPP_ASYNC=y
-CONFIG_PPP_SYNC_TTY=y
-CONFIG_PPP_DEFLATE=y
-CONFIG_INPUT_EVDEV=y
-# CONFIG_SERIO_I8042 is not set
-# CONFIG_VT is not set
-CONFIG_SERIAL_CPM=y
-CONFIG_SERIAL_CPM_CONSOLE=y
-# CONFIG_HWMON is not set
-# CONFIG_HID_SUPPORT is not set
-# CONFIG_USB_SUPPORT is not set
-CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_INOTIFY=y
-CONFIG_AUTOFS4_FS=y
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_CRAMFS=y
-CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
-CONFIG_NFS_V3_ACL=y
-CONFIG_ROOT_NFS=y
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_NLS=y
-CONFIG_NLS_CODEPAGE_437=y
-CONFIG_NLS_ASCII=y
-CONFIG_NLS_ISO8859_1=y
-CONFIG_NLS_UTF8=y
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_KERNEL=y
-CONFIG_DETECT_HUNG_TASK=y
-CONFIG_DEBUG_INFO=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-CONFIG_BDI_SWITCH=y
-CONFIG_CRYPTO_CBC=y
-CONFIG_CRYPTO_ECB=y
-CONFIG_CRYPTO_PCBC=y
-CONFIG_CRYPTO_MD5=y
-CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/mpc83xx_defconfig b/arch/powerpc/configs/mpc83xx_defconfig
index 23fec79964cf..a815d9e5e3e8 100644
--- a/arch/powerpc/configs/mpc83xx_defconfig
+++ b/arch/powerpc/configs/mpc83xx_defconfig
@@ -1,9 +1,7 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
CONFIG_EXPERT=y
-CONFIG_SLAB=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
@@ -12,17 +10,11 @@ CONFIG_PARTITION_ADVANCED=y
# CONFIG_PPC_PMAC is not set
CONFIG_PPC_83xx=y
CONFIG_MPC831x_RDB=y
-CONFIG_MPC832x_MDS=y
CONFIG_MPC832x_RDB=y
-CONFIG_MPC834x_MDS=y
CONFIG_MPC834x_ITX=y
-CONFIG_MPC836x_MDS=y
CONFIG_MPC836x_RDK=y
-CONFIG_MPC837x_MDS=y
CONFIG_MPC837x_RDB=y
-CONFIG_SBC834x=y
CONFIG_ASP834x=y
-CONFIG_QUICC_ENGINE=y
CONFIG_QE_GPIO=y
CONFIG_MATH_EMULATION=y
CONFIG_PCI=y
@@ -38,21 +30,17 @@ CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
CONFIG_SYN_COOKIES=y
CONFIG_INET_ESP=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_MTD_NAND=y
+CONFIG_MTD_RAW_NAND=y
CONFIG_MTD_NAND_FSL_ELBC=y
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=32768
@@ -62,16 +50,13 @@ CONFIG_ATA=y
CONFIG_SATA_FSL=y
CONFIG_SATA_SIL=y
CONFIG_NETDEVICES=y
-CONFIG_MII=y
CONFIG_UCC_GETH=y
CONFIG_GIANFAR=y
-CONFIG_MARVELL_PHY=y
CONFIG_DAVICOM_PHY=y
-CONFIG_VITESSE_PHY=y
CONFIG_ICPLUS_PHY=y
-CONFIG_FIXED_PHY=y
+CONFIG_MARVELL_PHY=y
+CONFIG_VITESSE_PHY=y
CONFIG_INPUT_FF_MEMLESS=m
-# CONFIG_INPUT_MOUSEDEV is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
@@ -82,7 +67,6 @@ CONFIG_I2C=y
CONFIG_I2C_CHARDEV=y
CONFIG_I2C_MPC=y
CONFIG_WATCHDOG=y
-CONFIG_VIDEO_OUTPUT_CONTROL=m
CONFIG_HID_A4TECH=y
CONFIG_HID_APPLE=y
CONFIG_HID_BELKIN=y
@@ -97,7 +81,6 @@ CONFIG_HID_MONTEREY=y
CONFIG_HID_PANTHERLORD=y
CONFIG_HID_PETALYNX=y
CONFIG_HID_SAMSUNG=y
-CONFIG_HID_SONY=y
CONFIG_HID_SUNPLUS=y
CONFIG_USB=y
CONFIG_USB_MON=y
@@ -106,19 +89,15 @@ CONFIG_USB_EHCI_FSL=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_DS1307=y
CONFIG_RTC_DRV_DS1374=y
+CONFIG_QUICC_ENGINE=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT4_FS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_NFS_FS=y
CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
-CONFIG_CRC_T10DIF=y
CONFIG_CRYPTO_ECB=m
CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_SHA256=y
CONFIG_CRYPTO_SHA512=y
-CONFIG_CRYPTO_AES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
CONFIG_CRYPTO_DEV_TALITOS=y
diff --git a/arch/powerpc/configs/mpc85xx_base.config b/arch/powerpc/configs/mpc85xx_base.config
new file mode 100644
index 000000000000..a1e4d72ed39d
--- /dev/null
+++ b/arch/powerpc/configs/mpc85xx_base.config
@@ -0,0 +1,20 @@
+CONFIG_MATH_EMULATION=y
+CONFIG_MPC8536_DS=y
+CONFIG_MPC85xx_DS=y
+CONFIG_MPC85xx_MDS=y
+CONFIG_MPC85xx_RDB=y
+CONFIG_KSI8560=y
+CONFIG_MVME2500=y
+CONFIG_P1010_RDB=y
+CONFIG_P1022_DS=y
+CONFIG_P1022_RDK=y
+CONFIG_P1023_RDB=y
+CONFIG_TWR_P102x=y
+CONFIG_SOCRATES=y
+CONFIG_STX_GP3=y
+CONFIG_TQM8540=y
+CONFIG_TQM8541=y
+CONFIG_TQM8548=y
+CONFIG_TQM8555=y
+CONFIG_TQM8560=y
+CONFIG_XES_MPC85xx=y
diff --git a/arch/powerpc/configs/mpc85xx_defconfig b/arch/powerpc/configs/mpc85xx_defconfig
deleted file mode 100644
index fa1bfd37f1ec..000000000000
--- a/arch/powerpc/configs/mpc85xx_defconfig
+++ /dev/null
@@ -1,274 +0,0 @@
-CONFIG_PPC_85xx=y
-CONFIG_PHYS_64BIT=y
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_AUDIT=y
-CONFIG_IRQ_DOMAIN_DEBUG=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_BSD_PROCESS_ACCT=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_EXPERT=y
-CONFIG_KALLSYMS_ALL=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODULE_FORCE_UNLOAD=y
-CONFIG_MODVERSIONS=y
-# CONFIG_BLK_DEV_BSG is not set
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_MAC_PARTITION=y
-CONFIG_MPC8540_ADS=y
-CONFIG_MPC8560_ADS=y
-CONFIG_MPC85xx_CDS=y
-CONFIG_MPC85xx_MDS=y
-CONFIG_MPC8536_DS=y
-CONFIG_MPC85xx_DS=y
-CONFIG_MPC85xx_RDB=y
-CONFIG_C293_PCIE=y
-CONFIG_P1010_RDB=y
-CONFIG_P1022_DS=y
-CONFIG_P1022_RDK=y
-CONFIG_P1023_RDB=y
-CONFIG_SOCRATES=y
-CONFIG_KSI8560=y
-CONFIG_XES_MPC85xx=y
-CONFIG_STX_GP3=y
-CONFIG_TQM8540=y
-CONFIG_TQM8541=y
-CONFIG_TQM8548=y
-CONFIG_TQM8555=y
-CONFIG_TQM8560=y
-CONFIG_SBC8548=y
-CONFIG_QUICC_ENGINE=y
-CONFIG_QE_GPIO=y
-CONFIG_HIGHMEM=y
-CONFIG_BINFMT_MISC=m
-CONFIG_MATH_EMULATION=y
-CONFIG_FORCE_MAX_ZONEORDER=12
-CONFIG_PCI=y
-CONFIG_PCI_MSI=y
-CONFIG_RAPIDIO=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_XFRM_USER=y
-CONFIG_NET_KEY=y
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_ADVANCED_ROUTER=y
-CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_ROUTE_MULTIPATH=y
-CONFIG_IP_ROUTE_VERBOSE=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-CONFIG_IP_PNP_RARP=y
-CONFIG_NET_IPIP=y
-CONFIG_IP_MROUTE=y
-CONFIG_IP_PIMSM_V1=y
-CONFIG_IP_PIMSM_V2=y
-CONFIG_ARPD=y
-CONFIG_INET_ESP=y
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
-CONFIG_IPV6=y
-CONFIG_IP_SCTP=m
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-CONFIG_DEVTMPFS=y
-CONFIG_DEVTMPFS_MOUNT=y
-CONFIG_MTD=y
-CONFIG_MTD_OF_PARTS=y
-CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
-CONFIG_MTD_BLKDEVS=y
-CONFIG_MTD_BLOCK=y
-CONFIG_FTL=y
-CONFIG_MTD_CFI=y
-CONFIG_MTD_GEN_PROBE=y
-CONFIG_MTD_MAP_BANK_WIDTH_1=y
-CONFIG_MTD_MAP_BANK_WIDTH_2=y
-CONFIG_MTD_MAP_BANK_WIDTH_4=y
-CONFIG_MTD_CFI_I1=y
-CONFIG_MTD_CFI_I2=y
-CONFIG_MTD_CFI_INTELEXT=y
-CONFIG_MTD_CFI_AMDSTD=y
-CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_MTD_M25P80=y
-CONFIG_MTD_CFI_UTIL=y
-CONFIG_MTD_NAND_ECC=y
-CONFIG_MTD_NAND=y
-CONFIG_MTD_NAND_IDS=y
-CONFIG_MTD_NAND_FSL_ELBC=y
-CONFIG_MTD_NAND_FSL_IFC=y
-CONFIG_MTD_UBI=y
-CONFIG_MTD_UBI_WL_THRESHOLD=4096
-CONFIG_MTD_UBI_BEB_RESERVE=1
-CONFIG_PROC_DEVICETREE=y
-CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_NBD=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=131072
-CONFIG_EEPROM_AT24=y
-CONFIG_EEPROM_LEGACY=y
-CONFIG_BLK_DEV_SD=y
-CONFIG_CHR_DEV_ST=y
-CONFIG_BLK_DEV_SR=y
-CONFIG_CHR_DEV_SG=y
-CONFIG_SCSI_MULTI_LUN=y
-CONFIG_SCSI_LOGGING=y
-CONFIG_ATA=y
-CONFIG_SATA_AHCI=y
-CONFIG_SATA_FSL=y
-CONFIG_SATA_SIL24=y
-CONFIG_PATA_ALI=y
-CONFIG_PATA_VIA=y
-CONFIG_NETDEVICES=y
-CONFIG_DUMMY=y
-CONFIG_FS_ENET=y
-CONFIG_UCC_GETH=y
-CONFIG_GIANFAR=y
-CONFIG_E1000=y
-CONFIG_E1000E=y
-CONFIG_IGB=y
-CONFIG_MARVELL_PHY=y
-CONFIG_DAVICOM_PHY=y
-CONFIG_CICADA_PHY=y
-CONFIG_VITESSE_PHY=y
-CONFIG_AT803X_PHY=y
-CONFIG_FIXED_PHY=y
-CONFIG_INPUT_FF_MEMLESS=m
-# CONFIG_INPUT_MOUSEDEV is not set
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-CONFIG_SERIO_LIBPS2=y
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_NR_UARTS=2
-CONFIG_SERIAL_8250_RUNTIME_UARTS=2
-CONFIG_SERIAL_8250_MANY_PORTS=y
-CONFIG_SERIAL_8250_DETECT_IRQ=y
-CONFIG_SERIAL_8250_RSA=y
-CONFIG_SERIAL_QE=m
-CONFIG_NVRAM=y
-CONFIG_I2C_CHARDEV=y
-CONFIG_I2C_CPM=m
-CONFIG_I2C_MPC=y
-CONFIG_SPI=y
-CONFIG_SPI_FSL_SPI=y
-CONFIG_SPI_FSL_ESPI=y
-CONFIG_GPIO_MPC8XXX=y
-# CONFIG_HWMON is not set
-CONFIG_VIDEO_OUTPUT_CONTROL=y
-CONFIG_FB=y
-CONFIG_FB_FSL_DIU=y
-# CONFIG_VGA_CONSOLE is not set
-CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_FONTS=y
-CONFIG_FONT_8x8=y
-CONFIG_FONT_8x16=y
-CONFIG_SOUND=y
-CONFIG_SND=y
-# CONFIG_SND_SUPPORT_OLD_API is not set
-# CONFIG_SND_DRIVERS is not set
-CONFIG_SND_INTEL8X0=y
-# CONFIG_SND_PPC is not set
-# CONFIG_SND_USB is not set
-CONFIG_SND_SOC=y
-CONFIG_SND_POWERPC_SOC=y
-CONFIG_HID_A4TECH=y
-CONFIG_HID_APPLE=y
-CONFIG_HID_BELKIN=y
-CONFIG_HID_CHERRY=y
-CONFIG_HID_CHICONY=y
-CONFIG_HID_CYPRESS=y
-CONFIG_HID_EZKEY=y
-CONFIG_HID_GYRATION=y
-CONFIG_HID_LOGITECH=y
-CONFIG_HID_MICROSOFT=y
-CONFIG_HID_MONTEREY=y
-CONFIG_HID_PANTHERLORD=y
-CONFIG_HID_PETALYNX=y
-CONFIG_HID_SAMSUNG=y
-CONFIG_HID_SONY=y
-CONFIG_HID_SUNPLUS=y
-CONFIG_USB=y
-CONFIG_USB_MON=y
-CONFIG_USB_EHCI_HCD=y
-CONFIG_USB_EHCI_FSL=y
-CONFIG_USB_OHCI_HCD=y
-CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
-CONFIG_USB_OHCI_HCD_PPC_OF_LE=y
-CONFIG_USB_STORAGE=y
-CONFIG_MMC=y
-CONFIG_MMC_SDHCI=y
-CONFIG_MMC_SDHCI_PLTFM=y
-CONFIG_MMC_SDHCI_OF_ESDHC=y
-CONFIG_EDAC=y
-CONFIG_EDAC_MM_EDAC=y
-CONFIG_RTC_CLASS=y
-CONFIG_RTC_DRV_DS1307=y
-CONFIG_RTC_DRV_DS1374=y
-CONFIG_RTC_DRV_DS3232=y
-CONFIG_RTC_DRV_CMOS=y
-CONFIG_RTC_DRV_DS1307=y
-CONFIG_DMADEVICES=y
-CONFIG_FSL_DMA=y
-# CONFIG_NET_DMA is not set
-CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_ISO9660_FS=m
-CONFIG_JOLIET=y
-CONFIG_ZISOFS=y
-CONFIG_UDF_FS=m
-CONFIG_MSDOS_FS=m
-CONFIG_VFAT_FS=y
-CONFIG_NTFS_FS=y
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_HUGETLBFS=y
-CONFIG_JFFS2_FS=y
-CONFIG_JFFS2_FS_DEBUG=1
-CONFIG_JFFS2_FS_WRITEBUFFER=y
-CONFIG_JFFS2_ZLIB=y
-CONFIG_JFFS2_RTIME=y
-CONFIG_UBIFS_FS=y
-CONFIG_UBIFS_FS_XATTR=y
-CONFIG_UBIFS_FS_LZO=y
-CONFIG_UBIFS_FS_ZLIB=y
-CONFIG_ADFS_FS=m
-CONFIG_AFFS_FS=m
-CONFIG_HFS_FS=m
-CONFIG_HFSPLUS_FS=m
-CONFIG_BEFS_FS=m
-CONFIG_BFS_FS=m
-CONFIG_EFS_FS=m
-CONFIG_CRAMFS=y
-CONFIG_VXFS_FS=m
-CONFIG_HPFS_FS=m
-CONFIG_QNX4FS_FS=m
-CONFIG_SYSV_FS=m
-CONFIG_UFS_FS=m
-CONFIG_NFS_FS=y
-CONFIG_NFS_V4=y
-CONFIG_ROOT_NFS=y
-CONFIG_NFSD=y
-CONFIG_CRC_T10DIF=y
-CONFIG_CRC16=y
-CONFIG_ZLIB_DEFLATE=y
-CONFIG_LZO_COMPRESS=y
-CONFIG_LZO_DECOMPRESS=y
-CONFIG_CRYPTO_DEFLATE=y
-CONFIG_CRYPTO_LZO=y
-CONFIG_DEBUG_FS=y
-CONFIG_DETECT_HUNG_TASK=y
-CONFIG_DEBUG_INFO=y
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_SHA256=y
-CONFIG_CRYPTO_SHA512=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRYPTO_DEV_FSL_CAAM=y
-CONFIG_CRYPTO_DEV_TALITOS=y
diff --git a/arch/powerpc/configs/mpc85xx_smp_defconfig b/arch/powerpc/configs/mpc85xx_smp_defconfig
deleted file mode 100644
index 0b452ebd8b3d..000000000000
--- a/arch/powerpc/configs/mpc85xx_smp_defconfig
+++ /dev/null
@@ -1,276 +0,0 @@
-CONFIG_PPC_85xx=y
-CONFIG_PHYS_64BIT=y
-CONFIG_SMP=y
-CONFIG_NR_CPUS=8
-CONFIG_EXPERIMENTAL=y
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_BSD_PROCESS_ACCT=y
-CONFIG_AUDIT=y
-CONFIG_IRQ_DOMAIN_DEBUG=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_EXPERT=y
-CONFIG_KALLSYMS_ALL=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODULE_FORCE_UNLOAD=y
-CONFIG_MODVERSIONS=y
-# CONFIG_BLK_DEV_BSG is not set
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_MAC_PARTITION=y
-CONFIG_MPC8540_ADS=y
-CONFIG_MPC8560_ADS=y
-CONFIG_MPC85xx_CDS=y
-CONFIG_MPC85xx_MDS=y
-CONFIG_MPC8536_DS=y
-CONFIG_MPC85xx_DS=y
-CONFIG_MPC85xx_RDB=y
-CONFIG_C293_PCIE=y
-CONFIG_P1010_RDB=y
-CONFIG_P1022_DS=y
-CONFIG_P1022_RDK=y
-CONFIG_P1023_RDB=y
-CONFIG_SOCRATES=y
-CONFIG_KSI8560=y
-CONFIG_XES_MPC85xx=y
-CONFIG_STX_GP3=y
-CONFIG_TQM8540=y
-CONFIG_TQM8541=y
-CONFIG_TQM8548=y
-CONFIG_TQM8555=y
-CONFIG_TQM8560=y
-CONFIG_SBC8548=y
-CONFIG_QUICC_ENGINE=y
-CONFIG_QE_GPIO=y
-CONFIG_HIGHMEM=y
-CONFIG_BINFMT_MISC=m
-CONFIG_MATH_EMULATION=y
-CONFIG_FORCE_MAX_ZONEORDER=12
-CONFIG_PCI=y
-CONFIG_PCI_MSI=y
-CONFIG_RAPIDIO=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_XFRM_USER=y
-CONFIG_NET_KEY=y
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_ADVANCED_ROUTER=y
-CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_ROUTE_MULTIPATH=y
-CONFIG_IP_ROUTE_VERBOSE=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-CONFIG_IP_PNP_RARP=y
-CONFIG_NET_IPIP=y
-CONFIG_IP_MROUTE=y
-CONFIG_IP_PIMSM_V1=y
-CONFIG_IP_PIMSM_V2=y
-CONFIG_ARPD=y
-CONFIG_INET_ESP=y
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
-CONFIG_IPV6=y
-CONFIG_IP_SCTP=m
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-CONFIG_DEVTMPFS=y
-CONFIG_DEVTMPFS_MOUNT=y
-CONFIG_MTD=y
-CONFIG_MTD_OF_PARTS=y
-CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_CHAR=y
-CONFIG_MTD_BLKDEVS=y
-CONFIG_MTD_BLOCK=y
-CONFIG_FTL=y
-CONFIG_MTD_CFI=y
-CONFIG_MTD_GEN_PROBE=y
-CONFIG_MTD_MAP_BANK_WIDTH_1=y
-CONFIG_MTD_MAP_BANK_WIDTH_2=y
-CONFIG_MTD_MAP_BANK_WIDTH_4=y
-CONFIG_MTD_CFI_I1=y
-CONFIG_MTD_CFI_I2=y
-CONFIG_MTD_CFI_INTELEXT=y
-CONFIG_MTD_CFI_AMDSTD=y
-CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_MTD_M25P80=y
-CONFIG_MTD_CFI_UTIL=y
-CONFIG_MTD_NAND_ECC=y
-CONFIG_MTD_NAND=y
-CONFIG_MTD_NAND_IDS=y
-CONFIG_MTD_NAND_FSL_ELBC=y
-CONFIG_MTD_NAND_FSL_IFC=y
-CONFIG_MTD_UBI=y
-CONFIG_MTD_UBI_WL_THRESHOLD=4096
-CONFIG_MTD_UBI_BEB_RESERVE=1
-CONFIG_PROC_DEVICETREE=y
-CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_NBD=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=131072
-CONFIG_EEPROM_AT24=y
-CONFIG_EEPROM_LEGACY=y
-CONFIG_BLK_DEV_SD=y
-CONFIG_CHR_DEV_ST=y
-CONFIG_BLK_DEV_SR=y
-CONFIG_CHR_DEV_SG=y
-CONFIG_SCSI_MULTI_LUN=y
-CONFIG_SCSI_LOGGING=y
-CONFIG_ATA=y
-CONFIG_SATA_AHCI=y
-CONFIG_SATA_FSL=y
-CONFIG_SATA_SIL24=y
-CONFIG_PATA_ALI=y
-CONFIG_NETDEVICES=y
-CONFIG_DUMMY=y
-CONFIG_FS_ENET=y
-CONFIG_UCC_GETH=y
-CONFIG_GIANFAR=y
-CONFIG_E1000E=y
-CONFIG_MARVELL_PHY=y
-CONFIG_DAVICOM_PHY=y
-CONFIG_CICADA_PHY=y
-CONFIG_VITESSE_PHY=y
-CONFIG_AT803X_PHY=y
-CONFIG_FIXED_PHY=y
-CONFIG_INPUT_FF_MEMLESS=m
-# CONFIG_INPUT_MOUSEDEV is not set
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-CONFIG_SERIO_LIBPS2=y
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_NR_UARTS=2
-CONFIG_SERIAL_8250_RUNTIME_UARTS=2
-CONFIG_SERIAL_8250_MANY_PORTS=y
-CONFIG_SERIAL_8250_DETECT_IRQ=y
-CONFIG_SERIAL_8250_RSA=y
-CONFIG_SERIAL_QE=m
-CONFIG_NVRAM=y
-CONFIG_I2C=y
-CONFIG_I2C_CHARDEV=y
-CONFIG_I2C_CPM=m
-CONFIG_I2C_MPC=y
-CONFIG_SPI=y
-CONFIG_SPI_FSL_SPI=y
-CONFIG_SPI_FSL_ESPI=y
-CONFIG_GPIO_MPC8XXX=y
-# CONFIG_HWMON is not set
-CONFIG_VIDEO_OUTPUT_CONTROL=y
-CONFIG_FB=y
-CONFIG_FB_FSL_DIU=y
-# CONFIG_VGA_CONSOLE is not set
-CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_FONTS=y
-CONFIG_FONT_8x8=y
-CONFIG_FONT_8x16=y
-CONFIG_SOUND=y
-CONFIG_SND=y
-# CONFIG_SND_SUPPORT_OLD_API is not set
-# CONFIG_SND_DRIVERS is not set
-CONFIG_SND_INTEL8X0=y
-# CONFIG_SND_PPC is not set
-# CONFIG_SND_USB is not set
-CONFIG_SND_SOC=y
-CONFIG_SND_POWERPC_SOC=y
-CONFIG_HID_A4TECH=y
-CONFIG_HID_APPLE=y
-CONFIG_HID_BELKIN=y
-CONFIG_HID_CHERRY=y
-CONFIG_HID_CHICONY=y
-CONFIG_HID_CYPRESS=y
-CONFIG_HID_EZKEY=y
-CONFIG_HID_GYRATION=y
-CONFIG_HID_LOGITECH=y
-CONFIG_HID_MICROSOFT=y
-CONFIG_HID_MONTEREY=y
-CONFIG_HID_PANTHERLORD=y
-CONFIG_HID_PETALYNX=y
-CONFIG_HID_SAMSUNG=y
-CONFIG_HID_SONY=y
-CONFIG_HID_SUNPLUS=y
-CONFIG_USB=y
-CONFIG_USB_MON=y
-CONFIG_USB_EHCI_HCD=y
-CONFIG_USB_EHCI_FSL=y
-CONFIG_USB_OHCI_HCD=y
-CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
-CONFIG_USB_OHCI_HCD_PPC_OF_LE=y
-CONFIG_USB_STORAGE=y
-CONFIG_MMC=y
-CONFIG_MMC_SDHCI=y
-CONFIG_MMC_SDHCI_PLTFM=y
-CONFIG_MMC_SDHCI_OF_ESDHC=y
-CONFIG_EDAC=y
-CONFIG_EDAC_MM_EDAC=y
-CONFIG_RTC_CLASS=y
-CONFIG_RTC_DRV_DS1307=y
-CONFIG_RTC_DRV_DS1374=y
-CONFIG_RTC_DRV_DS3232=y
-CONFIG_RTC_DRV_CMOS=y
-CONFIG_RTC_DRV_DS1307=y
-CONFIG_DMADEVICES=y
-CONFIG_FSL_DMA=y
-# CONFIG_NET_DMA is not set
-CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_ISO9660_FS=m
-CONFIG_JOLIET=y
-CONFIG_ZISOFS=y
-CONFIG_UDF_FS=m
-CONFIG_MSDOS_FS=m
-CONFIG_VFAT_FS=y
-CONFIG_NTFS_FS=y
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_HUGETLBFS=y
-CONFIG_JFFS2_FS=y
-CONFIG_JFFS2_FS_DEBUG=1
-CONFIG_JFFS2_FS_WRITEBUFFER=y
-CONFIG_JFFS2_ZLIB=y
-CONFIG_JFFS2_RTIME=y
-CONFIG_UBIFS_FS=y
-CONFIG_UBIFS_FS_XATTR=y
-CONFIG_UBIFS_FS_LZO=y
-CONFIG_UBIFS_FS_ZLIB=y
-CONFIG_ADFS_FS=m
-CONFIG_AFFS_FS=m
-CONFIG_HFS_FS=m
-CONFIG_HFSPLUS_FS=m
-CONFIG_BEFS_FS=m
-CONFIG_BFS_FS=m
-CONFIG_EFS_FS=m
-CONFIG_CRAMFS=y
-CONFIG_VXFS_FS=m
-CONFIG_HPFS_FS=m
-CONFIG_QNX4FS_FS=m
-CONFIG_SYSV_FS=m
-CONFIG_UFS_FS=m
-CONFIG_NFS_FS=y
-CONFIG_NFS_V4=y
-CONFIG_ROOT_NFS=y
-CONFIG_NFSD=y
-CONFIG_CRC_T10DIF=y
-CONFIG_CRC16=y
-CONFIG_ZLIB_DEFLATE=y
-CONFIG_LZO_COMPRESS=y
-CONFIG_LZO_DECOMPRESS=y
-CONFIG_CRYPTO_DEFLATE=y
-CONFIG_CRYPTO_LZO=y
-CONFIG_DEBUG_FS=y
-CONFIG_DETECT_HUNG_TASK=y
-CONFIG_DEBUG_INFO=y
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_SHA256=y
-CONFIG_CRYPTO_SHA512=y
-CONFIG_CRYPTO_AES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRYPTO_DEV_FSL_CAAM=y
-CONFIG_CRYPTO_DEV_TALITOS=y
diff --git a/arch/powerpc/configs/mpc866_ads_defconfig b/arch/powerpc/configs/mpc866_ads_defconfig
index d954e80c286a..dfbdd5e8e108 100644
--- a/arch/powerpc/configs/mpc866_ads_defconfig
+++ b/arch/powerpc/configs/mpc866_ads_defconfig
@@ -1,25 +1,20 @@
CONFIG_PPC_8xx=y
-CONFIG_EXPERIMENTAL=y
# CONFIG_SWAP is not set
CONFIG_SYSVIPC=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
-# CONFIG_SYSCTL_SYSCALL is not set
-# CONFIG_HOTPLUG is not set
# CONFIG_BUG is not set
-# CONFIG_BASE_FULL is not set
+CONFIG_BASE_SMALL=y
# CONFIG_EPOLL is not set
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
CONFIG_MPC86XADS=y
-CONFIG_8xx_COPYBACK=y
-CONFIG_8xx_CPU6=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
+CONFIG_GEN_RTC=y
CONFIG_HZ_1000=y
CONFIG_MATH_EMULATION=y
-CONFIG_SPARSE_IRQ=y
# CONFIG_SECCOMP is not set
CONFIG_NET=y
CONFIG_PACKET=y
@@ -28,31 +23,18 @@ CONFIG_INET=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_PNP=y
CONFIG_SYN_COOKIES=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
CONFIG_BLK_DEV_LOOP=y
CONFIG_NETDEVICES=y
-CONFIG_FIXED_PHY=y
-CONFIG_NET_ETHERNET=y
CONFIG_FS_ENET=y
# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
CONFIG_SERIAL_CPM=y
CONFIG_SERIAL_CPM_CONSOLE=y
-# CONFIG_LEGACY_PTYS is not set
-CONFIG_GEN_RTC=y
-CONFIG_VIDEO_OUTPUT_CONTROL=y
CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_INOTIFY=y
+CONFIG_EXT4_FS=y
CONFIG_TMPFS=y
CONFIG_CRAMFS=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_ROOT_NFS=y
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_CRC_CCITT=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRC32_SLICEBY4=y
diff --git a/arch/powerpc/configs/mpc86xx_base.config b/arch/powerpc/configs/mpc86xx_base.config
new file mode 100644
index 000000000000..632c014b122d
--- /dev/null
+++ b/arch/powerpc/configs/mpc86xx_base.config
@@ -0,0 +1,8 @@
+CONFIG_PPC_86xx=y
+CONFIG_GEF_PPC9A=y
+CONFIG_GEF_SBC310=y
+CONFIG_GEF_SBC610=y
+CONFIG_MVME7100=y
+CONFIG_HIGHMEM=y
+CONFIG_KEXEC=y
+CONFIG_PROC_KCORE=y
diff --git a/arch/powerpc/configs/mpc86xx_defconfig b/arch/powerpc/configs/mpc86xx_defconfig
deleted file mode 100644
index 35595ea74ff4..000000000000
--- a/arch/powerpc/configs/mpc86xx_defconfig
+++ /dev/null
@@ -1,178 +0,0 @@
-CONFIG_SMP=y
-CONFIG_NR_CPUS=2
-CONFIG_EXPERIMENTAL=y
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_BSD_PROCESS_ACCT=y
-CONFIG_AUDIT=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-CONFIG_EXPERT=y
-CONFIG_KALLSYMS_ALL=y
-CONFIG_KALLSYMS_EXTRA_PASS=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODULE_FORCE_UNLOAD=y
-CONFIG_MODVERSIONS=y
-# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_PPC_CHRP is not set
-# CONFIG_PPC_PMAC is not set
-CONFIG_PPC_86xx=y
-CONFIG_MPC8641_HPCN=y
-CONFIG_SBC8641D=y
-CONFIG_MPC8610_HPCD=y
-CONFIG_GEF_SBC610=y
-CONFIG_HIGHMEM=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_HZ_1000=y
-CONFIG_BINFMT_MISC=m
-CONFIG_SPARSE_IRQ=y
-CONFIG_PCI=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_XFRM_USER=y
-CONFIG_NET_KEY=m
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_ADVANCED_ROUTER=y
-CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_ROUTE_MULTIPATH=y
-CONFIG_IP_ROUTE_VERBOSE=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-CONFIG_IP_PNP_RARP=y
-CONFIG_NET_IPIP=y
-CONFIG_NET_IPGRE=y
-CONFIG_NET_IPGRE_BROADCAST=y
-CONFIG_IP_MROUTE=y
-CONFIG_IP_PIMSM_V1=y
-CONFIG_IP_PIMSM_V2=y
-CONFIG_ARPD=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
-CONFIG_IPV6=y
-CONFIG_IP_SCTP=m
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-CONFIG_PROC_DEVICETREE=y
-CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_NBD=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=131072
-CONFIG_MISC_DEVICES=y
-CONFIG_EEPROM_LEGACY=y
-CONFIG_BLK_DEV_SD=y
-CONFIG_CHR_DEV_ST=y
-CONFIG_BLK_DEV_SR=y
-CONFIG_CHR_DEV_SG=y
-CONFIG_SCSI_MULTI_LUN=y
-CONFIG_SCSI_LOGGING=y
-CONFIG_ATA=y
-CONFIG_SATA_AHCI=y
-CONFIG_PATA_ALI=y
-CONFIG_NETDEVICES=y
-CONFIG_DUMMY=y
-CONFIG_VITESSE_PHY=y
-CONFIG_NET_ETHERNET=y
-CONFIG_MII=y
-CONFIG_GIANFAR=y
-CONFIG_INPUT_FF_MEMLESS=m
-# CONFIG_INPUT_MOUSEDEV is not set
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-CONFIG_SERIO_LIBPS2=y
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_NR_UARTS=2
-CONFIG_SERIAL_8250_RUNTIME_UARTS=2
-CONFIG_SERIAL_8250_EXTENDED=y
-CONFIG_SERIAL_8250_MANY_PORTS=y
-CONFIG_SERIAL_8250_SHARE_IRQ=y
-CONFIG_SERIAL_8250_DETECT_IRQ=y
-CONFIG_SERIAL_8250_RSA=y
-# CONFIG_HW_RANDOM is not set
-CONFIG_NVRAM=y
-CONFIG_I2C=y
-CONFIG_I2C_MPC=y
-# CONFIG_HWMON is not set
-CONFIG_VIDEO_OUTPUT_CONTROL=y
-CONFIG_SOUND=y
-CONFIG_SND=y
-CONFIG_SND_MIXER_OSS=y
-CONFIG_SND_PCM_OSS=y
-# CONFIG_SND_SUPPORT_OLD_API is not set
-CONFIG_SND_INTEL8X0=y
-CONFIG_HID_A4TECH=y
-CONFIG_HID_APPLE=y
-CONFIG_HID_BELKIN=y
-CONFIG_HID_CHERRY=y
-CONFIG_HID_CHICONY=y
-CONFIG_HID_CYPRESS=y
-CONFIG_HID_EZKEY=y
-CONFIG_HID_GYRATION=y
-CONFIG_HID_LOGITECH=y
-CONFIG_HID_MICROSOFT=y
-CONFIG_HID_MONTEREY=y
-CONFIG_HID_PANTHERLORD=y
-CONFIG_HID_PETALYNX=y
-CONFIG_HID_SAMSUNG=y
-CONFIG_HID_SONY=y
-CONFIG_HID_SUNPLUS=y
-CONFIG_USB=y
-CONFIG_USB_MON=y
-CONFIG_USB_EHCI_HCD=y
-CONFIG_USB_OHCI_HCD=y
-CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
-CONFIG_USB_OHCI_HCD_PPC_OF_LE=y
-CONFIG_USB_STORAGE=y
-CONFIG_RTC_CLASS=y
-CONFIG_RTC_DRV_CMOS=y
-CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_INOTIFY=y
-CONFIG_ISO9660_FS=m
-CONFIG_JOLIET=y
-CONFIG_ZISOFS=y
-CONFIG_UDF_FS=m
-CONFIG_MSDOS_FS=m
-CONFIG_VFAT_FS=y
-CONFIG_NTFS_FS=y
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_ADFS_FS=m
-CONFIG_AFFS_FS=m
-CONFIG_HFS_FS=m
-CONFIG_HFSPLUS_FS=m
-CONFIG_BEFS_FS=m
-CONFIG_BFS_FS=m
-CONFIG_EFS_FS=m
-CONFIG_CRAMFS=y
-CONFIG_VXFS_FS=m
-CONFIG_HPFS_FS=m
-CONFIG_QNX4FS_FS=m
-CONFIG_SYSV_FS=m
-CONFIG_UFS_FS=m
-CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
-CONFIG_NFS_V4=y
-CONFIG_ROOT_NFS=y
-CONFIG_NFSD=y
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_MAC_PARTITION=y
-CONFIG_CRC_T10DIF=y
-CONFIG_DEBUG_KERNEL=y
-CONFIG_DETECT_HUNG_TASK=y
-CONFIG_DEBUG_INFO=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_HMAC=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig
index 3f47d00a10c0..9bc2758a6a9a 100644
--- a/arch/powerpc/configs/mpc885_ads_defconfig
+++ b/arch/powerpc/configs/mpc885_ads_defconfig
@@ -1,24 +1,30 @@
-CONFIG_PPC_8xx=y
-CONFIG_EXPERIMENTAL=y
# CONFIG_SWAP is not set
CONFIG_SYSVIPC=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BPF_JIT=y
+CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y
CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
-# CONFIG_SYSCTL_SYSCALL is not set
# CONFIG_ELF_CORE is not set
-# CONFIG_BASE_FULL is not set
+CONFIG_BASE_SMALL=y
# CONFIG_FUTEX is not set
+CONFIG_PERF_EVENTS=y
# CONFIG_VM_EVENT_COUNTERS is not set
-# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_CFQ is not set
-CONFIG_8xx_COPYBACK=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
+CONFIG_PPC_8xx=y
+CONFIG_8xx_GPIO=y
+CONFIG_SMC_UCODE_PATCH=y
+CONFIG_PIN_TLB=y
+CONFIG_GEN_RTC=y
CONFIG_HZ_100=y
-CONFIG_8XX_MINIMAL_FPEMU=y
-CONFIG_SPARSE_IRQ=y
+CONFIG_MATH_EMULATION=y
+CONFIG_PPC_16K_PAGES=y
+CONFIG_ADVANCED_OPTIONS=y
# CONFIG_SECCOMP is not set
+CONFIG_STRICT_KERNEL_RWX=y
+CONFIG_MODULES=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
@@ -26,15 +32,8 @@ CONFIG_INET=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_PNP=y
CONFIG_SYN_COOKIES=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
-# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_JEDECPROBE=y
CONFIG_MTD_CFI_ADV_OPTIONS=y
@@ -45,37 +44,37 @@ CONFIG_MTD_CFI_GEOMETRY=y
# CONFIG_MTD_CFI_I2 is not set
CONFIG_MTD_CFI_I4=y
CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP=y
CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_PROC_DEVICETREE=y
# CONFIG_BLK_DEV is not set
-# CONFIG_MISC_DEVICES is not set
CONFIG_NETDEVICES=y
-CONFIG_DAVICOM_PHY=y
-CONFIG_NET_ETHERNET=y
CONFIG_FS_ENET=y
# CONFIG_FS_ENET_HAS_SCC is not set
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
+CONFIG_DAVICOM_PHY=y
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
CONFIG_SERIAL_CPM=y
CONFIG_SERIAL_CPM_CONSOLE=y
-# CONFIG_LEGACY_PTYS is not set
-CONFIG_GEN_RTC=y
+CONFIG_SPI=y
+CONFIG_SPI_FSL_SPI=y
# CONFIG_HWMON is not set
+CONFIG_WATCHDOG=y
+CONFIG_8xxx_WDT=y
# CONFIG_USB_SUPPORT is not set
# CONFIG_DNOTIFY is not set
CONFIG_TMPFS=y
CONFIG_CRAMFS=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_ROOT_NFS=y
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_CRC32 is not set
+CONFIG_CRYPTO=y
+CONFIG_CRYPTO_DEV_TALITOS=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_FS=y
+CONFIG_DEBUG_VM_PGTABLE=y
CONFIG_DETECT_HUNG_TASK=y
-CONFIG_DEBUG_INFO=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_CRC32_SLICEBY4=y
+CONFIG_BDI_SWITCH=y
+CONFIG_PPC_EARLY_DEBUG=y
+CONFIG_PTDUMP_DEBUGFS=y
diff --git a/arch/powerpc/configs/mvme5100_defconfig b/arch/powerpc/configs/mvme5100_defconfig
index 93c7752e2dbb..fa2b3b9c5945 100644
--- a/arch/powerpc/configs/mvme5100_defconfig
+++ b/arch/powerpc/configs/mvme5100_defconfig
@@ -20,10 +20,9 @@ CONFIG_EMBEDDED6xx=y
CONFIG_MVME5100=y
CONFIG_KVM_GUEST=y
CONFIG_HZ_100=y
+CONFIG_CMDLINE="console=ttyS0,9600 ip=dhcp root=/dev/nfs"
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
# CONFIG_COMPACTION is not set
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyS0,9600 ip=dhcp root=/dev/nfs"
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
@@ -32,11 +31,9 @@ CONFIG_IP_MULTICAST=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
CONFIG_NETFILTER=y
CONFIG_NF_CONNTRACK=m
-CONFIG_NF_CT_PROTO_SCTP=m
CONFIG_NF_CONNTRACK_AMANDA=m
CONFIG_NF_CONNTRACK_FTP=m
CONFIG_NF_CONNTRACK_H323=m
@@ -48,7 +45,6 @@ CONFIG_NF_CONNTRACK_TFTP=m
CONFIG_NETFILTER_XT_MATCH_MAC=m
CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
CONFIG_NETFILTER_XT_MATCH_STATE=m
-CONFIG_NF_CONNTRACK_IPV4=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_FILTER=m
CONFIG_IP_NF_TARGET_REJECT=m
@@ -60,8 +56,6 @@ CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_LAPB=m
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_COUNT=2
@@ -72,7 +66,6 @@ CONFIG_TUN=m
# CONFIG_NET_VENDOR_3COM is not set
CONFIG_E100=y
# CONFIG_WLAN is not set
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
@@ -88,16 +81,12 @@ CONFIG_I2C=y
CONFIG_I2C_CHARDEV=y
CONFIG_I2C_MPC=y
# CONFIG_HWMON is not set
-CONFIG_VIDEO_OUTPUT_CONTROL=m
# CONFIG_VGA_CONSOLE is not set
# CONFIG_HID is not set
# CONFIG_USB_SUPPORT is not set
# CONFIG_IOMMU_SUPPORT is not set
-CONFIG_VME_BUS=m
-CONFIG_VME_CA91CX42=m
CONFIG_EXT2_FS=m
-CONFIG_EXT3_FS=m
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT4_FS=m
CONFIG_XFS_FS=m
CONFIG_ISO9660_FS=m
CONFIG_JOLIET=y
@@ -112,21 +101,13 @@ CONFIG_NFS_V3_ACL=y
CONFIG_NFS_V4=y
CONFIG_ROOT_NFS=y
CONFIG_NFSD=m
-CONFIG_NFSD_V3=y
CONFIG_CIFS=m
CONFIG_NLS=y
CONFIG_NLS_CODEPAGE_437=m
CONFIG_NLS_CODEPAGE_932=m
CONFIG_NLS_ISO8859_1=m
CONFIG_NLS_UTF8=m
-CONFIG_CRC_CCITT=m
-CONFIG_CRC_T10DIF=y
CONFIG_XZ_DEC=y
-CONFIG_XZ_DEC_X86=y
-CONFIG_XZ_DEC_IA64=y
-CONFIG_XZ_DEC_ARM=y
-CONFIG_XZ_DEC_ARMTHUMB=y
-CONFIG_XZ_DEC_SPARC=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DETECT_HUNG_TASK=y
@@ -141,4 +122,3 @@ CONFIG_CRYPTO_DES=y
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_DEFLATE=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig
index cec044a3ff69..8bbf51b38480 100644
--- a/arch/powerpc/configs/pasemi_defconfig
+++ b/arch/powerpc/configs/pasemi_defconfig
@@ -7,12 +7,12 @@ CONFIG_NO_HZ=y
CONFIG_HIGH_RES_TIMERS=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_PROFILING=y
-CONFIG_OPROFILE=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
CONFIG_PARTITION_ADVANCED=y
CONFIG_MAC_PARTITION=y
+# CONFIG_PPC_POWERNV is not set
# CONFIG_PPC_PSERIES is not set
# CONFIG_PPC_PMAC is not set
CONFIG_PPC_PASEMI=y
@@ -22,7 +22,6 @@ CONFIG_CPU_FREQ_GOV_POWERSAVE=y
CONFIG_CPU_FREQ_GOV_USERSPACE=y
CONFIG_CPU_FREQ_GOV_ONDEMAND=y
CONFIG_HZ_1000=y
-CONFIG_PPC_64K_PAGES=y
# CONFIG_SECCOMP is not set
CONFIG_PCI_MSI=y
CONFIG_PCCARD=y
@@ -43,31 +42,23 @@ CONFIG_SYN_COOKIES=y
CONFIG_INET_AH=y
CONFIG_INET_ESP=y
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_MTD=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_SLRAM=y
CONFIG_MTD_PHRAM=y
-CONFIG_MTD_NAND=y
+CONFIG_MTD_RAW_NAND=y
CONFIG_MTD_NAND_PASEMI=y
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=16384
CONFIG_EEPROM_LEGACY=y
-CONFIG_IDE=y
-CONFIG_BLK_DEV_IDECD=y
-CONFIG_IDE_TASK_IOCTL=y
CONFIG_BLK_DEV_SD=y
CONFIG_CHR_DEV_ST=y
-CONFIG_CHR_DEV_OSST=y
CONFIG_BLK_DEV_SR=y
-CONFIG_BLK_DEV_SR_VENDOR=y
CONFIG_CHR_DEV_SG=y
CONFIG_CHR_DEV_SCH=y
-CONFIG_SCSI_MULTI_LUN=y
CONFIG_SCSI_CONSTANTS=y
CONFIG_SCSI_LOGGING=y
CONFIG_ATA=y
@@ -101,7 +92,6 @@ CONFIG_LEGACY_PTY_COUNT=4
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_HW_RANDOM=y
-CONFIG_RAW_DRIVER=y
CONFIG_I2C_CHARDEV=y
CONFIG_I2C_PASEMI=y
CONFIG_SENSORS_LM85=y
@@ -115,13 +105,13 @@ CONFIG_FB_NVIDIA=y
CONFIG_FB_NVIDIA_I2C=y
CONFIG_FB_RADEON=y
# CONFIG_LCD_CLASS_DEVICE is not set
-CONFIG_VGACON_SOFT_SCROLLBACK=y
CONFIG_LOGO=y
CONFIG_SOUND=y
CONFIG_SND=y
-CONFIG_SND_SEQUENCER=y
+CONFIG_SND_OSSEMUL=y
CONFIG_SND_MIXER_OSS=y
CONFIG_SND_PCM_OSS=y
+CONFIG_SND_SEQUENCER=y
CONFIG_SND_SEQUENCER_OSS=y
CONFIG_SND_USB_AUDIO=y
CONFIG_SND_USB_USX2Y=y
@@ -132,7 +122,6 @@ CONFIG_HID_NTRIG=y
CONFIG_HID_PANTHERLORD=y
CONFIG_HID_PETALYNX=y
CONFIG_HID_SAMSUNG=y
-CONFIG_HID_SONY=y
CONFIG_HID_SUNPLUS=y
CONFIG_HID_GREENASIA=y
CONFIG_HID_SMARTJOYPLUS=y
@@ -146,16 +135,15 @@ CONFIG_USB_UHCI_HCD=y
CONFIG_USB_SL811_HCD=y
CONFIG_USB_STORAGE=y
CONFIG_EDAC=y
-CONFIG_EDAC_MM_EDAC=y
CONFIG_EDAC_PASEMI=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_DS1307=y
+CONFIG_RAS=y
CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_AUTOFS4_FS=y
+CONFIG_EXT4_FS=y
+CONFIG_AUTOFS_FS=y
CONFIG_ISO9660_FS=y
CONFIG_UDF_FS=y
CONFIG_MSDOS_FS=y
@@ -171,9 +159,7 @@ CONFIG_NFSD=y
CONFIG_NFSD_V4=y
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ISO8859_1=y
-CONFIG_CRC_CCITT=y
CONFIG_PRINTK_TIME=y
-CONFIG_DEBUG_FS=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DETECT_HUNG_TASK=y
@@ -181,7 +167,5 @@ CONFIG_DETECT_HUNG_TASK=y
CONFIG_XMON=y
CONFIG_XMON_DEFAULT=y
CONFIG_CRYPTO_MD4=y
-CONFIG_CRYPTO_SHA256=y
CONFIG_CRYPTO_SHA512=y
CONFIG_CRYPTO_BLOWFISH=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig
index 553e66278010..ae45f70b29f0 100644
--- a/arch/powerpc/configs/pmac32_defconfig
+++ b/arch/powerpc/configs/pmac32_defconfig
@@ -1,36 +1,33 @@
CONFIG_ALTIVEC=y
-CONFIG_EXPERIMENTAL=y
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
-CONFIG_OPROFILE=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_PARTITION_ADVANCED=y
# CONFIG_PPC_CHRP is not set
CONFIG_CPU_FREQ=y
CONFIG_CPU_FREQ_GOV_POWERSAVE=y
CONFIG_CPU_FREQ_GOV_USERSPACE=y
CONFIG_CPU_FREQ_PMAC=y
-CONFIG_PPC601_SYNC_FIX=y
+CONFIG_GEN_RTC=y
CONFIG_HIGHMEM=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
CONFIG_BINFMT_MISC=m
-# CONFIG_MIGRATION is not set
-CONFIG_PM=y
-CONFIG_PM_DEBUG=y
CONFIG_HIBERNATION=y
+CONFIG_PM_DEBUG=y
CONFIG_APM_EMULATION=y
CONFIG_PCCARD=m
CONFIG_YENTA=m
+CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
CONFIG_XFRM_USER=y
@@ -40,12 +37,8 @@ CONFIG_IP_MULTICAST=y
CONFIG_SYN_COOKIES=y
CONFIG_INET_AH=y
CONFIG_INET_ESP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
CONFIG_NETFILTER=y
-CONFIG_NETFILTER_NETLINK_QUEUE=m
CONFIG_NF_CONNTRACK=m
CONFIG_NF_CONNTRACK_FTP=m
CONFIG_NF_CONNTRACK_IRC=m
@@ -81,20 +74,12 @@ CONFIG_NETFILTER_XT_MATCH_STRING=m
CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
CONFIG_NETFILTER_XT_MATCH_TIME=m
CONFIG_NETFILTER_XT_MATCH_U32=m
-CONFIG_NF_CONNTRACK_IPV4=m
CONFIG_IP_NF_IPTABLES=m
-CONFIG_IP_NF_MATCH_ADDRTYPE=m
CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
CONFIG_IP_NF_MATCH_TTL=m
CONFIG_IP_NF_FILTER=m
CONFIG_IP_NF_TARGET_REJECT=m
-CONFIG_IP_NF_TARGET_LOG=m
-CONFIG_IP_NF_TARGET_ULOG=m
-CONFIG_NF_NAT=m
-CONFIG_IP_NF_TARGET_MASQUERADE=m
-CONFIG_IP_NF_TARGET_NETMAP=m
-CONFIG_IP_NF_TARGET_REDIRECT=m
CONFIG_IP_NF_MANGLE=m
CONFIG_IP_NF_TARGET_ECN=m
CONFIG_IP_NF_TARGET_TTL=m
@@ -102,17 +87,7 @@ CONFIG_IP_NF_RAW=m
CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_IP_DCCP=m
-CONFIG_IRDA=m
-CONFIG_IRLAN=m
-CONFIG_IRNET=m
-CONFIG_IRCOMM=m
-CONFIG_IRDA_CACHE_LAST_LSAP=y
-CONFIG_IRDA_FAST_RR=y
-CONFIG_IRTTY_SIR=m
CONFIG_BT=m
-CONFIG_BT_L2CAP=y
-CONFIG_BT_SCO=y
CONFIG_BT_RFCOMM=m
CONFIG_BT_RFCOMM_TTY=y
CONFIG_BT_BNEP=m
@@ -124,38 +99,30 @@ CONFIG_BT_HCIBFUSB=m
CONFIG_CFG80211=m
CONFIG_MAC80211=m
CONFIG_MAC80211_LEDS=y
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_STANDALONE is not set
CONFIG_CONNECTOR=y
-CONFIG_PROC_DEVICETREE=y
CONFIG_MAC_FLOPPY=m
CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_UB=m
CONFIG_BLK_DEV_RAM=y
-CONFIG_IDE=y
-CONFIG_BLK_DEV_IDECS=m
-CONFIG_BLK_DEV_IDECD=y
-CONFIG_BLK_DEV_GENERIC=y
-CONFIG_BLK_DEV_PDC202XX_NEW=y
-CONFIG_BLK_DEV_SL82C105=y
-CONFIG_BLK_DEV_IDE_PMAC=y
-CONFIG_BLK_DEV_IDE_PMAC_ATA100FIRST=y
-CONFIG_SCSI=y
CONFIG_BLK_DEV_SD=y
CONFIG_CHR_DEV_ST=y
CONFIG_BLK_DEV_SR=y
-CONFIG_BLK_DEV_SR_VENDOR=y
CONFIG_CHR_DEV_SG=y
CONFIG_SCSI_CONSTANTS=y
CONFIG_SCSI_FC_ATTRS=y
CONFIG_SCSI_AIC7XXX=m
CONFIG_AIC7XXX_CMDS_PER_DEVICE=253
CONFIG_AIC7XXX_RESET_DELAY_MS=15000
-CONFIG_SCSI_AIC7XXX_OLD=m
CONFIG_SCSI_SYM53C8XX_2=y
CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0
CONFIG_SCSI_MESH=y
CONFIG_SCSI_MAC53C94=y
+CONFIG_ATA=y
+CONFIG_PATA_MACIO=y
+CONFIG_PATA_PDC2027X=y
+CONFIG_PATA_WINBOND=y
+CONFIG_PATA_PCMCIA=m
+CONFIG_ATA_GENERIC=y
CONFIG_MD=y
CONFIG_BLK_DEV_MD=m
CONFIG_MD_LINEAR=m
@@ -169,17 +136,10 @@ CONFIG_DM_CRYPT=m
CONFIG_DM_SNAPSHOT=m
CONFIG_DM_MIRROR=m
CONFIG_DM_ZERO=m
-CONFIG_IEEE1394=m
-CONFIG_IEEE1394_OHCI1394=m
-CONFIG_IEEE1394_SBP2=m
-CONFIG_IEEE1394_RAWIO=m
-CONFIG_IEEE1394_VIDEO1394=m
-CONFIG_IEEE1394_DV1394=m
CONFIG_ADB=y
-CONFIG_ADB_CUDA=y
CONFIG_ADB_PMU=y
CONFIG_ADB_PMU_LED=y
-CONFIG_ADB_PMU_LED_IDE=y
+CONFIG_ADB_PMU_LED_DISK=y
CONFIG_PMAC_APM_EMU=m
CONFIG_PMAC_MEDIABAY=y
CONFIG_PMAC_BACKLIGHT=y
@@ -192,27 +152,21 @@ CONFIG_PMAC_RACKMETER=m
CONFIG_NETDEVICES=y
CONFIG_DUMMY=m
CONFIG_TUN=m
-CONFIG_NET_ETHERNET=y
+CONFIG_PCNET32=y
CONFIG_MACE=y
CONFIG_BMAC=y
CONFIG_SUNGEM=y
-CONFIG_NET_PCI=y
-CONFIG_PCNET32=y
-CONFIG_PRISM54=m
-CONFIG_B43=m
-CONFIG_B43LEGACY=m
-CONFIG_HERMES=m
-CONFIG_APPLE_AIRPORT=m
-CONFIG_PCMCIA_HERMES=m
-CONFIG_P54_COMMON=m
-CONFIG_USB_USBNET=m
-# CONFIG_USB_NET_CDC_SUBSET is not set
CONFIG_PPP=y
+CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_DEFLATE=y
CONFIG_PPP_MULTILINK=y
CONFIG_PPP_ASYNC=y
CONFIG_PPP_SYNC_TTY=m
-CONFIG_PPP_DEFLATE=y
-CONFIG_PPP_BSDCOMP=m
+CONFIG_USB_USBNET=m
+# CONFIG_USB_NET_CDC_SUBSET is not set
+CONFIG_B43=m
+CONFIG_B43LEGACY=m
+CONFIG_P54_COMMON=m
CONFIG_INPUT_EVDEV=y
# CONFIG_KEYBOARD_ATKBD is not set
# CONFIG_MOUSE_PS2 is not set
@@ -220,20 +174,21 @@ CONFIG_MOUSE_APPLETOUCH=y
# CONFIG_SERIO_I8042 is not set
# CONFIG_SERIO_SERPORT is not set
CONFIG_SERIAL_8250=m
-CONFIG_SERIAL_PMACZILOG=m
+CONFIG_SERIAL_PMACZILOG=y
CONFIG_SERIAL_PMACZILOG_TTYS=y
+CONFIG_SERIAL_PMACZILOG_CONSOLE=y
CONFIG_NVRAM=y
-CONFIG_GEN_RTC=y
CONFIG_I2C_CHARDEV=m
-CONFIG_POWER_SUPPLY=y
+CONFIG_POWER_RESET=y
CONFIG_APM_POWER=y
CONFIG_BATTERY_PMU=y
CONFIG_HWMON=m
CONFIG_AGP=m
CONFIG_AGP_UNINORTH=m
CONFIG_DRM=m
-CONFIG_DRM_R128=m
CONFIG_DRM_RADEON=m
+CONFIG_DRM_LEGACY=y
+CONFIG_DRM_R128=m
CONFIG_FB=y
CONFIG_FB_OF=y
CONFIG_FB_CONTROL=y
@@ -252,17 +207,18 @@ CONFIG_FB_ATY=y
CONFIG_FB_ATY_CT=y
CONFIG_FB_ATY_GX=y
CONFIG_FB_3DFX=y
-CONFIG_DISPLAY_SUPPORT=y
+CONFIG_BACKLIGHT_CLASS_DEVICE=y
# CONFIG_VGA_CONSOLE is not set
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
CONFIG_SOUND=m
CONFIG_SND=m
-CONFIG_SND_SEQUENCER=m
-CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_OSSEMUL=y
CONFIG_SND_MIXER_OSS=m
CONFIG_SND_PCM_OSS=m
-CONFIG_SND_SEQUENCER_OSS=y
+CONFIG_SND_SEQUENCER=m
+CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_SEQUENCER_OSS=m
CONFIG_SND_DUMMY=m
CONFIG_SND_POWERMAC=m
CONFIG_SND_AOA=m
@@ -294,27 +250,12 @@ CONFIG_USB_SERIAL_VISOR=m
CONFIG_USB_SERIAL_IPAQ=m
CONFIG_USB_SERIAL_KEYSPAN_PDA=m
CONFIG_USB_SERIAL_KEYSPAN=m
-CONFIG_USB_SERIAL_KEYSPAN_MPR=y
-CONFIG_USB_SERIAL_KEYSPAN_USA28=y
-CONFIG_USB_SERIAL_KEYSPAN_USA28X=y
-CONFIG_USB_SERIAL_KEYSPAN_USA28XA=y
-CONFIG_USB_SERIAL_KEYSPAN_USA28XB=y
-CONFIG_USB_SERIAL_KEYSPAN_USA19=y
-CONFIG_USB_SERIAL_KEYSPAN_USA18X=y
-CONFIG_USB_SERIAL_KEYSPAN_USA19W=y
-CONFIG_USB_SERIAL_KEYSPAN_USA19QW=y
-CONFIG_USB_SERIAL_KEYSPAN_USA19QI=y
-CONFIG_USB_SERIAL_KEYSPAN_USA49W=y
-CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y
CONFIG_USB_APPLEDISPLAY=m
CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_EXT3_FS_POSIX_ACL=y
CONFIG_EXT4_FS=y
-CONFIG_INOTIFY=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_AUTOFS_FS=m
CONFIG_FUSE_FS=m
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
@@ -327,36 +268,22 @@ CONFIG_TMPFS=y
CONFIG_HFS_FS=m
CONFIG_HFSPLUS_FS=m
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_NFS_V3_ACL=y
CONFIG_NFS_V4=y
CONFIG_NFSD=m
CONFIG_NFSD_V3_ACL=y
CONFIG_NFSD_V4=y
-CONFIG_SMB_FS=m
-CONFIG_PARTITION_ADVANCED=y
CONFIG_NLS_CODEPAGE_437=m
CONFIG_NLS_ISO8859_1=m
-CONFIG_CRC_T10DIF=y
-CONFIG_LIBCRC32C=m
CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_FS=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_LATENCYTOP=y
-CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_XMON=y
CONFIG_XMON_DEFAULT=y
CONFIG_BOOTX_TEXT=y
-CONFIG_PPC_EARLY_DEBUG=y
-CONFIG_PPC_EARLY_DEBUG_BOOTX=y
-CONFIG_CRYPTO_NULL=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_MD4=m
-CONFIG_CRYPTO_SHA256=m
CONFIG_CRYPTO_SHA512=m
-CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_ANUBIS=m
CONFIG_CRYPTO_BLOWFISH=m
@@ -367,4 +294,3 @@ CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_DEFLATE=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/powernv_defconfig
index a905063281cc..bd4685612de6 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/powernv_defconfig
@@ -1,13 +1,9 @@
CONFIG_PPC64=y
-CONFIG_ALTIVEC=y
-CONFIG_VSX=y
-CONFIG_SMP=y
CONFIG_NR_CPUS=2048
+CONFIG_CPU_LITTLE_ENDIAN=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_AUDIT=y
-CONFIG_AUDITSYSCALL=y
-CONFIG_IRQ_DOMAIN_DEBUG=y
CONFIG_NO_HZ=y
CONFIG_HIGH_RES_TIMERS=y
CONFIG_TASKSTATS=y
@@ -16,15 +12,23 @@ CONFIG_TASK_XACCT=y
CONFIG_TASK_IO_ACCOUNTING=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=18
+CONFIG_LOG_CPU_MAX_BUF_SHIFT=13
+CONFIG_NUMA_BALANCING=y
CONFIG_CGROUPS=y
+CONFIG_MEMCG=y
+CONFIG_CGROUP_SCHED=y
CONFIG_CGROUP_FREEZER=y
-CONFIG_CGROUP_DEVICE=y
CONFIG_CPUSETS=y
+CONFIG_CGROUP_DEVICE=y
CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_PERF=y
+CONFIG_CGROUP_BPF=y
+CONFIG_USER_NS=y
CONFIG_BLK_DEV_INITRD=y
+CONFIG_BPF_SYSCALL=y
# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
-CONFIG_OPROFILE=y
CONFIG_KPROBES=y
CONFIG_JUMP_LABEL=y
CONFIG_MODULES=y
@@ -32,26 +36,38 @@ CONFIG_MODULE_UNLOAD=y
CONFIG_MODVERSIONS=y
CONFIG_MODULE_SRCVERSION_ALL=y
CONFIG_PARTITION_ADVANCED=y
-CONFIG_PPC_SPLPAR=y
-CONFIG_SCANLOG=m
-CONFIG_PPC_SMLPAR=y
-CONFIG_DTL=y
-# CONFIG_PPC_PMAC is not set
-CONFIG_RTAS_FLASH=m
-CONFIG_IBMEBUS=y
-CONFIG_HZ_100=y
+# CONFIG_SCOM_DEBUGFS is not set
+CONFIG_OPAL_PRD=y
+CONFIG_PPC_MEMTRACE=y
+# CONFIG_PPC_PSERIES is not set
+# CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set
+CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
+CONFIG_CPU_FREQ_GOV_POWERSAVE=y
+CONFIG_CPU_FREQ_GOV_USERSPACE=y
+CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
+CONFIG_CPU_IDLE=y
+CONFIG_HZ_1000=y
CONFIG_BINFMT_MISC=m
CONFIG_PPC_TRANSACTIONAL_MEM=y
+CONFIG_PPC_UV=y
+CONFIG_HOTPLUG_CPU=y
CONFIG_KEXEC=y
+CONFIG_KEXEC_FILE=y
CONFIG_IRQ_ALL_CPUS=y
+CONFIG_NUMA=y
CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
-CONFIG_PPC_64K_PAGES=y
-CONFIG_PPC_SUBPAGE_PROT=y
+CONFIG_KSM=y
+CONFIG_MEMORY_FAILURE=y
+CONFIG_HWPOISON_INJECT=m
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
CONFIG_SCHED_SMT=y
+CONFIG_PM=y
CONFIG_HOTPLUG_PCI=y
-CONFIG_HOTPLUG_PCI_RPA=m
-CONFIG_HOTPLUG_PCI_RPA_DLPAR=m
+CONFIG_ZONE_DEVICE=y
+CONFIG_DEVICE_PRIVATE=y
+CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
CONFIG_XFRM_USER=m
@@ -63,52 +79,56 @@ CONFIG_SYN_COOKIES=y
CONFIG_INET_AH=m
CONFIG_INET_ESP=m
CONFIG_INET_IPCOMP=m
-# CONFIG_IPV6 is not set
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_IPV6_SIT=m
CONFIG_NETFILTER=y
# CONFIG_NETFILTER_ADVANCED is not set
CONFIG_BRIDGE=m
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_VLAN_8021Q=m
+CONFIG_NET_SCHED=y
+CONFIG_NET_CLS_BPF=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_ACT_BPF=m
+CONFIG_DNS_RESOLVER=y
+CONFIG_BPF_JIT=y
+# CONFIG_WIRELESS is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
-CONFIG_PROC_DEVICETREE=y
-CONFIG_PARPORT=m
-CONFIG_PARPORT_PC=m
-CONFIG_BLK_DEV_FD=m
+CONFIG_MTD=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_POWERNV_FLASH=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_NBD=m
-CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM=m
CONFIG_BLK_DEV_RAM_SIZE=65536
-CONFIG_VIRTIO_BLK=m
-CONFIG_IDE=y
-CONFIG_BLK_DEV_IDECD=y
-CONFIG_BLK_DEV_GENERIC=y
-CONFIG_BLK_DEV_AMD74XX=y
+CONFIG_BLK_DEV_NVME=y
CONFIG_BLK_DEV_SD=y
-CONFIG_CHR_DEV_ST=y
-CONFIG_BLK_DEV_SR=y
-CONFIG_BLK_DEV_SR_VENDOR=y
-CONFIG_CHR_DEV_SG=y
-CONFIG_SCSI_MULTI_LUN=y
+CONFIG_CHR_DEV_ST=m
+CONFIG_BLK_DEV_SR=m
+CONFIG_CHR_DEV_SG=m
CONFIG_SCSI_CONSTANTS=y
-CONFIG_SCSI_FC_ATTRS=y
+CONFIG_SCSI_SCAN_ASYNC=y
+CONFIG_SCSI_FC_ATTRS=m
+CONFIG_SCSI_SRP_ATTRS=y
CONFIG_SCSI_CXGB3_ISCSI=m
CONFIG_SCSI_CXGB4_ISCSI=m
CONFIG_SCSI_BNX2_ISCSI=m
CONFIG_BE2ISCSI=m
+CONFIG_SCSI_AACRAID=y
CONFIG_SCSI_MPT2SAS=m
-CONFIG_SCSI_IBMVSCSI=y
-CONFIG_SCSI_IBMVFC=m
-CONFIG_SCSI_SYM53C8XX_2=y
+CONFIG_SCSI_SYM53C8XX_2=m
CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0
CONFIG_SCSI_IPR=y
CONFIG_SCSI_QLA_FC=m
CONFIG_SCSI_QLA_ISCSI=m
CONFIG_SCSI_LPFC=m
-CONFIG_SCSI_VIRTIO=m
-CONFIG_SCSI_DH=m
+CONFIG_SCSI_DH=y
CONFIG_SCSI_DH_RDAC=m
CONFIG_SCSI_DH_ALUA=m
CONFIG_ATA=y
+CONFIG_SATA_AHCI=y
# CONFIG_ATA_SFF is not set
CONFIG_MD=y
CONFIG_BLK_DEV_MD=y
@@ -122,6 +142,7 @@ CONFIG_MD_FAULTY=m
CONFIG_BLK_DEV_DM=y
CONFIG_DM_CRYPT=m
CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_THIN_PROVISIONING=m
CONFIG_DM_MIRROR=m
CONFIG_DM_ZERO=m
CONFIG_DM_MULTIPATH=m
@@ -130,107 +151,116 @@ CONFIG_DM_MULTIPATH_ST=m
CONFIG_DM_UEVENT=y
CONFIG_BONDING=m
CONFIG_DUMMY=m
-CONFIG_NETCONSOLE=y
-CONFIG_NETPOLL_TRAP=y
+CONFIG_MACVLAN=m
+CONFIG_MACVTAP=m
+CONFIG_VXLAN=m
+CONFIG_NETCONSOLE=m
CONFIG_TUN=m
-CONFIG_VIRTIO_NET=m
-CONFIG_VHOST_NET=m
-CONFIG_VORTEX=y
+CONFIG_VETH=m
+CONFIG_VORTEX=m
CONFIG_ACENIC=m
CONFIG_ACENIC_OMIT_TIGON_I=y
-CONFIG_PCNET32=y
+CONFIG_PCNET32=m
CONFIG_TIGON3=y
+CONFIG_BNX2X=m
+# CONFIG_CAVIUM_PTP is not set
CONFIG_CHELSIO_T1=m
CONFIG_BE2NET=m
CONFIG_S2IO=m
-CONFIG_IBMVETH=y
-CONFIG_EHEA=y
CONFIG_E100=y
CONFIG_E1000=y
CONFIG_E1000E=y
-CONFIG_IXGB=m
+CONFIG_IGB=y
CONFIG_IXGBE=m
+CONFIG_I40E=m
CONFIG_MLX4_EN=m
+CONFIG_MLX5_CORE=m
+CONFIG_MLX5_FPGA=y
+CONFIG_MLX5_CORE_EN=y
+CONFIG_MLX5_CORE_IPOIB=y
CONFIG_MYRI10GE=m
-CONFIG_QLGE=m
CONFIG_NETXEN_NIC=m
-CONFIG_PPP=m
-CONFIG_PPP_BSDCOMP=m
-CONFIG_PPP_DEFLATE=m
-CONFIG_PPPOE=m
-CONFIG_PPP_ASYNC=m
-CONFIG_PPP_SYNC_TTY=m
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
+CONFIG_USB_NET_DRIVERS=m
+# CONFIG_WLAN is not set
CONFIG_INPUT_EVDEV=m
-CONFIG_INPUT_MISC=y
-CONFIG_INPUT_PCSPKR=m
-# CONFIG_SERIO_SERPORT is not set
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_LEGACY_PTYS is not set
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_ICOM=m
+CONFIG_SERIAL_8250_PCI=m
CONFIG_SERIAL_JSM=m
-CONFIG_HVC_CONSOLE=y
-CONFIG_HVC_RTAS=y
-CONFIG_HVCS=m
-CONFIG_VIRTIO_CONSOLE=m
-CONFIG_IBM_BSR=m
-CONFIG_GEN_RTC=y
-CONFIG_RAW_DRIVER=y
-CONFIG_MAX_RAW_DEVS=1024
-CONFIG_FB=y
+CONFIG_IPMI_HANDLER=y
+CONFIG_IPMI_DEVICE_INTERFACE=y
+CONFIG_IPMI_POWERNV=y
+# CONFIG_DEVPORT is not set
+CONFIG_I2C_CHARDEV=y
+# CONFIG_PTP_1588_CLOCK is not set
+CONFIG_DRM=y
+CONFIG_DRM_AST=y
CONFIG_FIRMWARE_EDID=y
CONFIG_FB_OF=y
-CONFIG_FB_MATROX=y
+CONFIG_FB_MATROX=m
CONFIG_FB_MATROX_MILLENIUM=y
CONFIG_FB_MATROX_MYSTIQUE=y
CONFIG_FB_MATROX_G=y
-CONFIG_FB_RADEON=y
-CONFIG_FB_IBM_GXT4500=y
+CONFIG_FB_RADEON=m
+CONFIG_FB_IBM_GXT4500=m
CONFIG_LCD_PLATFORM=m
# CONFIG_VGA_CONSOLE is not set
-CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
-CONFIG_HID_GYRATION=y
-CONFIG_HID_PANTHERLORD=y
-CONFIG_HID_PETALYNX=y
-CONFIG_HID_SAMSUNG=y
-CONFIG_HID_SUNPLUS=y
+CONFIG_HID_A4TECH=m
+CONFIG_HID_APPLE=m
+CONFIG_HID_BELKIN=m
+CONFIG_HID_CHERRY=m
+CONFIG_HID_CHICONY=m
+CONFIG_HID_CYPRESS=m
+CONFIG_HID_EZKEY=m
+CONFIG_HID_GYRATION=m
+CONFIG_HID_ITE=m
+CONFIG_HID_KENSINGTON=m
+CONFIG_HID_LOGITECH=m
+CONFIG_HID_MICROSOFT=m
+CONFIG_HID_MONTEREY=m
+CONFIG_HID_PANTHERLORD=m
+CONFIG_HID_PETALYNX=m
+CONFIG_HID_SAMSUNG=m
+CONFIG_HID_SUNPLUS=m
+CONFIG_USB_HID=m
CONFIG_USB_HIDDEV=y
CONFIG_USB=y
CONFIG_USB_MON=m
+CONFIG_USB_XHCI_HCD=y
CONFIG_USB_EHCI_HCD=y
# CONFIG_USB_EHCI_HCD_PPC_OF is not set
CONFIG_USB_OHCI_HCD=y
-CONFIG_USB_STORAGE=m
+CONFIG_USB_OHCI_HCD_PCI=m
+CONFIG_USB_STORAGE=y
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=m
+CONFIG_LEDS_POWERNV=m
CONFIG_INFINIBAND=m
CONFIG_INFINIBAND_USER_MAD=m
CONFIG_INFINIBAND_USER_ACCESS=m
CONFIG_INFINIBAND_MTHCA=m
-CONFIG_INFINIBAND_EHCA=m
-CONFIG_INFINIBAND_CXGB3=m
CONFIG_INFINIBAND_CXGB4=m
CONFIG_MLX4_INFINIBAND=m
CONFIG_INFINIBAND_IPOIB=m
CONFIG_INFINIBAND_IPOIB_CM=y
CONFIG_INFINIBAND_SRP=m
CONFIG_INFINIBAND_ISER=m
-CONFIG_VIRTIO_PCI=m
-CONFIG_VIRTIO_BALLOON=m
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_GENERIC=y
+# CONFIG_VIRTIO_MENU is not set
+CONFIG_LIBNVDIMM=y
CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT2_FS_XIP=y
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
-CONFIG_REISERFS_FS=y
-CONFIG_REISERFS_FS_XATTR=y
-CONFIG_REISERFS_FS_POSIX_ACL=y
-CONFIG_REISERFS_FS_SECURITY=y
CONFIG_JFS_FS=m
CONFIG_JFS_POSIX_ACL=y
CONFIG_JFS_SECURITY=y
@@ -239,12 +269,14 @@ CONFIG_XFS_POSIX_ACL=y
CONFIG_BTRFS_FS=m
CONFIG_BTRFS_FS_POSIX_ACL=y
CONFIG_NILFS2_FS=m
-CONFIG_AUTOFS4_FS=m
+CONFIG_FANOTIFY=y
+CONFIG_AUTOFS_FS=m
CONFIG_FUSE_FS=m
+CONFIG_OVERLAY_FS=m
CONFIG_ISO9660_FS=y
CONFIG_UDF_FS=m
-CONFIG_MSDOS_FS=y
-CONFIG_VFAT_FS=y
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
@@ -255,9 +287,9 @@ CONFIG_SQUASHFS_XATTR=y
CONFIG_SQUASHFS_LZO=y
CONFIG_SQUASHFS_XZ=y
CONFIG_PSTORE=y
-CONFIG_NFS_FS=y
+CONFIG_NFS_FS=m
CONFIG_NFS_V3_ACL=y
-CONFIG_NFS_V4=y
+CONFIG_NFS_V4=m
CONFIG_NFSD=m
CONFIG_NFSD_V3_ACL=y
CONFIG_NFSD_V4=y
@@ -269,41 +301,42 @@ CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ASCII=y
CONFIG_NLS_ISO8859_1=y
CONFIG_NLS_UTF8=y
-CONFIG_CRC_T10DIF=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_STACK_USAGE=y
CONFIG_DEBUG_STACKOVERFLOW=y
-CONFIG_LOCKUP_DETECTOR=y
-CONFIG_LATENCYTOP=y
+CONFIG_SOFTLOCKUP_DETECTOR=y
+CONFIG_HARDLOCKUP_DETECTOR=y
+CONFIG_FUNCTION_TRACER=y
CONFIG_SCHED_TRACER=y
+CONFIG_STACK_TRACER=y
+CONFIG_FTRACE_SYSCALLS=y
CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_PPC_EMULATED_STATS=y
CONFIG_CODE_PATCHING_SELFTEST=y
CONFIG_FTR_FIXUP_SELFTEST=y
CONFIG_MSI_BITMAP_SELFTEST=y
CONFIG_XMON=y
-CONFIG_XMON_DEFAULT=y
-CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_BENCHMARK=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_TGR192=m
+CONFIG_CRYPTO_SHA256=y
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_ANUBIS=m
CONFIG_CRYPTO_BLOWFISH=m
CONFIG_CRYPTO_CAST6=m
CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
CONFIG_CRYPTO_DEV_NX=y
-CONFIG_CRYPTO_DEV_NX_ENCRYPT=m
+CONFIG_CRYPTO_DEV_VMX=y
CONFIG_VIRTUALIZATION=y
CONFIG_KVM_BOOK3S_64=m
-CONFIG_KVM_BOOK3S_64_HV=y
-CONFIG_TRANSPARENT_HUGEPAGE=y
-CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=y
-CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
+CONFIG_KVM_BOOK3S_64_HV=m
+CONFIG_VHOST_NET=m
+CONFIG_PRINTK_TIME=y
+CONFIG_PRINTK_CALLER=y
+CONFIG_KALLSYMS_ALL=y
diff --git a/arch/powerpc/configs/ppc40x_defconfig b/arch/powerpc/configs/ppc40x_defconfig
deleted file mode 100644
index 52908c7897d9..000000000000
--- a/arch/powerpc/configs/ppc40x_defconfig
+++ /dev/null
@@ -1,105 +0,0 @@
-CONFIG_40x=y
-CONFIG_EXPERIMENTAL=y
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-CONFIG_EXPERT=y
-CONFIG_KALLSYMS_ALL=y
-CONFIG_KALLSYMS_EXTRA_PASS=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-# CONFIG_BLK_DEV_BSG is not set
-CONFIG_PPC4xx_GPIO=y
-CONFIG_ACADIA=y
-CONFIG_EP405=y
-CONFIG_HOTFOOT=y
-CONFIG_KILAUEA=y
-CONFIG_MAKALU=y
-CONFIG_XILINX_VIRTEX_GENERIC_BOARD=y
-CONFIG_SPARSE_IRQ=y
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_INET=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-CONFIG_CONNECTOR=y
-CONFIG_MTD=y
-CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_OF_PARTS=y
-CONFIG_MTD_CHAR=y
-CONFIG_MTD_BLOCK=m
-CONFIG_MTD_CFI=y
-CONFIG_MTD_JEDECPROBE=y
-CONFIG_MTD_CFI_AMDSTD=y
-CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_MTD_UBI=m
-CONFIG_MTD_UBI_GLUEBI=m
-CONFIG_PROC_DEVICETREE=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=35000
-CONFIG_XILINX_SYSACE=m
-CONFIG_NETDEVICES=y
-CONFIG_ETHERNET=y
-CONFIG_NET_VENDOR_IBM=y
-CONFIG_IBM_EMAC=y
-# CONFIG_INPUT is not set
-CONFIG_SERIO=m
-# CONFIG_SERIO_I8042 is not set
-# CONFIG_SERIO_SERPORT is not set
-CONFIG_SERIO_XILINX_XPS_PS2=m
-# CONFIG_VT is not set
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_8250_EXTENDED=y
-CONFIG_SERIAL_8250_SHARE_IRQ=y
-CONFIG_SERIAL_UARTLITE=y
-CONFIG_SERIAL_UARTLITE_CONSOLE=y
-CONFIG_SERIAL_OF_PLATFORM=y
-# CONFIG_HW_RANDOM is not set
-CONFIG_XILINX_HWICAP=m
-CONFIG_I2C=m
-CONFIG_I2C_CHARDEV=m
-CONFIG_I2C_GPIO=m
-CONFIG_I2C_IBM_IIC=m
-CONFIG_GPIO_XILINX=y
-# CONFIG_HWMON is not set
-CONFIG_THERMAL=y
-CONFIG_VIDEO_OUTPUT_CONTROL=m
-CONFIG_FB=m
-CONFIG_FB_XILINX=m
-CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=m
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_INOTIFY=y
-CONFIG_VFAT_FS=m
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_JFFS2_FS=m
-CONFIG_UBIFS_FS=m
-CONFIG_CRAMFS=y
-CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
-CONFIG_ROOT_NFS=y
-CONFIG_NLS_CODEPAGE_437=m
-CONFIG_NLS_ISO8859_1=m
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_FS=y
-CONFIG_DEBUG_KERNEL=y
-CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-CONFIG_CRYPTO_CBC=y
-CONFIG_CRYPTO_ECB=y
-CONFIG_CRYPTO_PCBC=y
-CONFIG_CRYPTO_MD5=y
-CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/ppc44x_defconfig b/arch/powerpc/configs/ppc44x_defconfig
index 924e10df1844..41c930f74ed4 100644
--- a/arch/powerpc/configs/ppc44x_defconfig
+++ b/arch/powerpc/configs/ppc44x_defconfig
@@ -1,13 +1,10 @@
CONFIG_44x=y
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
CONFIG_KALLSYMS_ALL=y
-CONFIG_KALLSYMS_EXTRA_PASS=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
@@ -25,10 +22,8 @@ CONFIG_GLACIER=y
CONFIG_REDWOOD=y
CONFIG_EIGER=y
CONFIG_YOSEMITE=y
-CONFIG_XILINX_VIRTEX440_GENERIC_BOARD=y
CONFIG_PPC4xx_GPIO=y
CONFIG_MATH_EMULATION=y
-CONFIG_SPARSE_IRQ=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
@@ -36,61 +31,44 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
CONFIG_BRIDGE=m
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_CONNECTOR=y
CONFIG_MTD=y
-CONFIG_MTD_OF_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_JEDECPROBE=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_MTD_NAND=m
+CONFIG_MTD_RAW_NAND=m
CONFIG_MTD_NAND_NDFC=m
CONFIG_MTD_UBI=m
CONFIG_MTD_UBI_GLUEBI=m
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=35000
-CONFIG_XILINX_SYSACE=m
CONFIG_SCSI=m
CONFIG_BLK_DEV_SD=m
# CONFIG_SCSI_LOWLEVEL is not set
CONFIG_NETDEVICES=y
CONFIG_TUN=m
-CONFIG_ETHERNET=y
-CONFIG_NET_VENDOR_IBM=y
CONFIG_IBM_EMAC=y
# CONFIG_INPUT is not set
CONFIG_SERIO=m
# CONFIG_SERIO_I8042 is not set
# CONFIG_SERIO_SERPORT is not set
-CONFIG_SERIO_XILINX_XPS_PS2=m
# CONFIG_VT is not set
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
# CONFIG_SERIAL_8250_PCI is not set
CONFIG_SERIAL_8250_EXTENDED=y
CONFIG_SERIAL_8250_SHARE_IRQ=y
-CONFIG_SERIAL_UARTLITE=y
-CONFIG_SERIAL_UARTLITE_CONSOLE=y
CONFIG_SERIAL_OF_PLATFORM=y
# CONFIG_HW_RANDOM is not set
-CONFIG_XILINX_HWICAP=m
CONFIG_I2C=m
CONFIG_I2C_CHARDEV=m
CONFIG_I2C_GPIO=m
CONFIG_I2C_IBM_IIC=m
-CONFIG_GPIO_XILINX=y
# CONFIG_HWMON is not set
CONFIG_FB=m
-CONFIG_FB_XILINX=m
CONFIG_USB=m
CONFIG_USB_EHCI_HCD=m
CONFIG_USB_OHCI_HCD=m
@@ -98,32 +76,23 @@ CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
# CONFIG_USB_OHCI_HCD_PCI is not set
CONFIG_USB_STORAGE=m
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=m
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT4_FS=m
CONFIG_VFAT_FS=m
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_JFFS2_FS=y
CONFIG_UBIFS_FS=m
-CONFIG_UBIFS_FS_XATTR=y
-CONFIG_LOGFS=m
CONFIG_CRAMFS=y
CONFIG_SQUASHFS=m
CONFIG_SQUASHFS_XATTR=y
CONFIG_SQUASHFS_LZO=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_ROOT_NFS=y
CONFIG_NLS_CODEPAGE_437=m
CONFIG_NLS_ISO8859_1=m
-CONFIG_CRC_T10DIF=m
CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_KERNEL=y
CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_CRYPTO_ECB=y
CONFIG_CRYPTO_PCBC=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
# CONFIG_CRYPTO_HW is not set
CONFIG_VIRTUALIZATION=y
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index f26b267eb71f..2d92c11eea7e 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -1,34 +1,49 @@
-CONFIG_PPC64=y
-CONFIG_ALTIVEC=y
-CONFIG_VSX=y
-CONFIG_SMP=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
-CONFIG_IRQ_DOMAIN_DEBUG=y
+CONFIG_AUDIT=y
+CONFIG_NO_HZ_FULL=y
CONFIG_NO_HZ=y
CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_BPF_JIT=y
+CONFIG_BPF_LSM=y
+CONFIG_PREEMPT_VOLUNTARY=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_BSD_PROCESS_ACCT_V3=y
CONFIG_TASKSTATS=y
CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_PSI=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
-CONFIG_CGROUPS=y
+CONFIG_LOG_BUF_SHIFT=18
+CONFIG_LOG_CPU_MAX_BUF_SHIFT=13
+CONFIG_NUMA_BALANCING=y
+CONFIG_MEMCG=y
+CONFIG_BLK_CGROUP=y
+CONFIG_CFS_BANDWIDTH=y
+CONFIG_CGROUP_PIDS=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_HUGETLB=y
CONFIG_CPUSETS=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_PERF=y
+CONFIG_CGROUP_BPF=y
+CONFIG_CGROUP_MISC=y
+CONFIG_USER_NS=y
+CONFIG_CHECKPOINT_RESTORE=y
+CONFIG_SCHED_AUTOGROUP=y
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
-CONFIG_OPROFILE=y
-CONFIG_KPROBES=y
-CONFIG_JUMP_LABEL=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODVERSIONS=y
-CONFIG_MODULE_SRCVERSION_ALL=y
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_PPC_SPLPAR=y
-CONFIG_SCANLOG=m
-CONFIG_PPC_SMLPAR=y
+CONFIG_PPC64=y
+CONFIG_NR_CPUS=2048
CONFIG_DTL=y
-CONFIG_PPC_MAPLE=y
+CONFIG_PPC_SMLPAR=y
+CONFIG_IBMEBUS=y
+CONFIG_PAPR_SCM=m
+CONFIG_PPC_SVM=y
CONFIG_PPC_PASEMI=y
CONFIG_PPC_PASEMI_IOMMU=y
CONFIG_PPC_PS3=y
@@ -36,27 +51,48 @@ CONFIG_PS3_DISK=m
CONFIG_PS3_ROM=m
CONFIG_PS3_FLASH=m
CONFIG_PS3_LPM=m
-CONFIG_PPC_IBM_CELL_BLADE=y
-CONFIG_PPC_CELLEB=y
-CONFIG_PPC_CELL_QPACE=y
CONFIG_RTAS_FLASH=m
-CONFIG_IBMEBUS=y
-CONFIG_CPU_FREQ=y
+CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
CONFIG_CPU_FREQ_GOV_POWERSAVE=y
CONFIG_CPU_FREQ_GOV_USERSPACE=y
+CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
CONFIG_CPU_FREQ_PMAC64=y
-CONFIG_HZ_100=y
-CONFIG_BINFMT_MISC=m
+CONFIG_HZ_1000=y
CONFIG_PPC_TRANSACTIONAL_MEM=y
CONFIG_KEXEC=y
+CONFIG_KEXEC_FILE=y
+CONFIG_CRASH_DUMP=y
+CONFIG_FA_DUMP=y
CONFIG_IRQ_ALL_CPUS=y
-CONFIG_MEMORY_HOTREMOVE=y
CONFIG_SCHED_SMT=y
-CONFIG_PCCARD=y
-CONFIG_ELECTRA_CF=y
-CONFIG_HOTPLUG_PCI=y
-CONFIG_HOTPLUG_PCI_RPA=m
-CONFIG_HOTPLUG_PCI_RPA_DLPAR=m
+CONFIG_PPC_SECURE_BOOT=y
+CONFIG_VIRTUALIZATION=y
+CONFIG_KVM_BOOK3S_64=m
+CONFIG_KVM_BOOK3S_64_HV=m
+CONFIG_KPROBES=y
+CONFIG_JUMP_LABEL=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_MODULE_SIG_SHA512=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_BINFMT_MISC=m
+CONFIG_ZSWAP=y
+CONFIG_ZSMALLOC=y
+# CONFIG_SLAB_MERGE_DEFAULT is not set
+CONFIG_SLAB_FREELIST_RANDOM=y
+CONFIG_SLAB_FREELIST_HARDENED=y
+CONFIG_SHUFFLE_PAGE_ALLOCATOR=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_MEMORY_HOTREMOVE=y
+CONFIG_KSM=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_MEM_SOFT_DIRTY=y
+CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
+CONFIG_ZONE_DEVICE=y
+CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
CONFIG_XFRM_USER=m
@@ -71,34 +107,33 @@ CONFIG_SYN_COOKIES=y
CONFIG_INET_AH=m
CONFIG_INET_ESP=m
CONFIG_INET_IPCOMP=m
-# CONFIG_IPV6 is not set
CONFIG_NETFILTER=y
# CONFIG_NETFILTER_ADVANCED is not set
CONFIG_BRIDGE=m
-CONFIG_BPF_JIT=y
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_VLAN_8021Q=m
+CONFIG_NET_SCHED=y
+CONFIG_NET_CLS_BPF=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_ACT_BPF=m
+CONFIG_HOTPLUG_PCI=y
+CONFIG_HOTPLUG_PCI_RPA=m
+CONFIG_HOTPLUG_PCI_RPA_DLPAR=m
+CONFIG_PCCARD=y
+CONFIG_ELECTRA_CF=y
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_FD=y
+CONFIG_ZRAM=m
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_NBD=m
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=65536
CONFIG_VIRTIO_BLK=m
-CONFIG_IDE=y
-CONFIG_BLK_DEV_IDECD=y
-CONFIG_BLK_DEV_GENERIC=y
-CONFIG_BLK_DEV_AMD74XX=y
-CONFIG_BLK_DEV_CELLEB=y
-CONFIG_BLK_DEV_IDE_PMAC=y
-CONFIG_BLK_DEV_IDE_PMAC_ATA100FIRST=y
+CONFIG_BLK_DEV_NVME=m
CONFIG_BLK_DEV_SD=y
-CONFIG_CHR_DEV_ST=y
+CONFIG_CHR_DEV_ST=m
CONFIG_BLK_DEV_SR=y
-CONFIG_BLK_DEV_SR_VENDOR=y
CONFIG_CHR_DEV_SG=y
-CONFIG_SCSI_MULTI_LUN=y
CONFIG_SCSI_CONSTANTS=y
CONFIG_SCSI_FC_ATTRS=y
CONFIG_SCSI_CXGB3_ISCSI=m
@@ -108,41 +143,56 @@ CONFIG_BE2ISCSI=m
CONFIG_SCSI_MPT2SAS=m
CONFIG_SCSI_IBMVSCSI=y
CONFIG_SCSI_IBMVFC=m
-CONFIG_SCSI_SYM53C8XX_2=y
+CONFIG_SCSI_SYM53C8XX_2=m
CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0
CONFIG_SCSI_IPR=y
CONFIG_SCSI_QLA_FC=m
CONFIG_SCSI_QLA_ISCSI=m
CONFIG_SCSI_LPFC=m
CONFIG_SCSI_VIRTIO=m
-CONFIG_SCSI_DH=m
+CONFIG_SCSI_DH=y
CONFIG_SCSI_DH_RDAC=m
CONFIG_SCSI_DH_ALUA=m
CONFIG_ATA=y
+CONFIG_SATA_AHCI=y
CONFIG_SATA_SIL24=y
CONFIG_SATA_MV=y
CONFIG_SATA_SVW=y
+CONFIG_PATA_AMD=y
+CONFIG_PATA_MACIO=y
+CONFIG_ATA_GENERIC=y
CONFIG_MD=y
CONFIG_BLK_DEV_MD=y
CONFIG_MD_LINEAR=y
CONFIG_MD_RAID0=y
CONFIG_MD_RAID1=y
-CONFIG_MD_RAID10=m
-CONFIG_MD_RAID456=m
CONFIG_MD_MULTIPATH=m
CONFIG_MD_FAULTY=m
CONFIG_BLK_DEV_DM=y
+CONFIG_DM_UNSTRIPED=m
CONFIG_DM_CRYPT=m
CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_THIN_PROVISIONING=m
+CONFIG_DM_CACHE=m
+CONFIG_DM_WRITECACHE=m
+CONFIG_DM_EBS=m
+CONFIG_DM_ERA=m
+CONFIG_DM_CLONE=m
CONFIG_DM_MIRROR=m
+CONFIG_DM_LOG_USERSPACE=m
+CONFIG_DM_RAID=m
CONFIG_DM_ZERO=m
CONFIG_DM_MULTIPATH=m
CONFIG_DM_MULTIPATH_QL=m
CONFIG_DM_MULTIPATH_ST=m
+CONFIG_DM_MULTIPATH_HST=m
+CONFIG_DM_MULTIPATH_IOA=m
+CONFIG_DM_DELAY=m
+CONFIG_DM_DUST=m
+CONFIG_DM_INIT=y
CONFIG_DM_UEVENT=y
CONFIG_ADB_PMU=y
CONFIG_PMAC_SMU=y
-CONFIG_THERM_PM72=y
CONFIG_WINDFARM=y
CONFIG_WINDFARM_PM81=y
CONFIG_WINDFARM_PM91=y
@@ -151,43 +201,40 @@ CONFIG_WINDFARM_PM121=y
CONFIG_BONDING=m
CONFIG_DUMMY=m
CONFIG_NETCONSOLE=y
-CONFIG_NETPOLL_TRAP=y
CONFIG_TUN=m
CONFIG_VIRTIO_NET=m
-CONFIG_VHOST_NET=m
-CONFIG_VORTEX=y
+CONFIG_VORTEX=m
CONFIG_ACENIC=m
CONFIG_ACENIC_OMIT_TIGON_I=y
-CONFIG_PCNET32=y
+CONFIG_PCNET32=m
CONFIG_TIGON3=y
+CONFIG_BNX2X=m
CONFIG_CHELSIO_T1=m
CONFIG_BE2NET=m
-CONFIG_S2IO=m
CONFIG_IBMVETH=m
CONFIG_EHEA=m
+CONFIG_IBMVNIC=m
CONFIG_E100=y
CONFIG_E1000=y
CONFIG_E1000E=y
-CONFIG_IXGB=m
CONFIG_IXGBE=m
+CONFIG_I40E=m
CONFIG_MLX4_EN=m
CONFIG_MYRI10GE=m
+CONFIG_S2IO=m
CONFIG_PASEMI_MAC=y
-CONFIG_QLGE=m
CONFIG_NETXEN_NIC=m
CONFIG_SUNGEM=y
CONFIG_GELIC_NET=m
CONFIG_GELIC_WIRELESS=y
-CONFIG_SPIDER_NET=m
-CONFIG_MARVELL_PHY=y
CONFIG_BROADCOM_PHY=m
+CONFIG_MARVELL_PHY=y
CONFIG_PPP=m
CONFIG_PPP_BSDCOMP=m
CONFIG_PPP_DEFLATE=m
CONFIG_PPPOE=m
CONFIG_PPP_ASYNC=m
CONFIG_PPP_SYNC_TTY=m
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
CONFIG_INPUT_EVDEV=m
CONFIG_INPUT_MISC=y
CONFIG_INPUT_PCSPKR=m
@@ -195,19 +242,15 @@ CONFIG_INPUT_PCSPKR=m
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_ICOM=m
-CONFIG_SERIAL_TXX9_CONSOLE=y
CONFIG_SERIAL_JSM=m
CONFIG_HVC_CONSOLE=y
CONFIG_HVC_RTAS=y
-CONFIG_HVC_BEAT=y
CONFIG_HVCS=m
CONFIG_VIRTIO_CONSOLE=m
CONFIG_IBM_BSR=m
-CONFIG_RAW_DRIVER=y
CONFIG_I2C_CHARDEV=y
CONFIG_I2C_AMD8111=y
CONFIG_I2C_PASEMI=y
-CONFIG_VIDEO_OUTPUT_CONTROL=m
CONFIG_FB=y
CONFIG_FIRMWARE_EDID=y
CONFIG_FB_OF=y
@@ -226,11 +269,12 @@ CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
CONFIG_SOUND=m
CONFIG_SND=m
-CONFIG_SND_SEQUENCER=m
-CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_OSSEMUL=y
CONFIG_SND_MIXER_OSS=m
CONFIG_SND_PCM_OSS=m
-CONFIG_SND_SEQUENCER_OSS=y
+CONFIG_SND_SEQUENCER=m
+CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_SEQUENCER_OSS=m
CONFIG_SND_POWERMAC=m
CONFIG_SND_AOA=m
CONFIG_SND_AOA_FABRIC_LAYOUT=m
@@ -250,54 +294,44 @@ CONFIG_USB_EHCI_HCD=y
CONFIG_USB_OHCI_HCD=y
CONFIG_USB_STORAGE=m
CONFIG_USB_APPLEDISPLAY=m
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=m
+CONFIG_LEDS_POWERNV=m
CONFIG_INFINIBAND=m
CONFIG_INFINIBAND_USER_MAD=m
CONFIG_INFINIBAND_USER_ACCESS=m
-CONFIG_INFINIBAND_MTHCA=m
-CONFIG_INFINIBAND_EHCA=m
-CONFIG_INFINIBAND_CXGB3=m
CONFIG_INFINIBAND_CXGB4=m
CONFIG_MLX4_INFINIBAND=m
+CONFIG_INFINIBAND_MTHCA=m
CONFIG_INFINIBAND_IPOIB=m
CONFIG_INFINIBAND_IPOIB_CM=y
CONFIG_INFINIBAND_SRP=m
CONFIG_INFINIBAND_ISER=m
CONFIG_EDAC=y
-CONFIG_EDAC_MM_EDAC=y
CONFIG_EDAC_PASEMI=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_DS1307=y
CONFIG_VIRTIO_PCI=m
CONFIG_VIRTIO_BALLOON=m
-CONFIG_EXT2_FS=y
-CONFIG_EXT2_FS_XATTR=y
-CONFIG_EXT2_FS_POSIX_ACL=y
-CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT2_FS_XIP=y
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
+CONFIG_VHOST_NET=m
+CONFIG_RAS=y
+CONFIG_LIBNVDIMM=y
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
-CONFIG_REISERFS_FS=y
-CONFIG_REISERFS_FS_XATTR=y
-CONFIG_REISERFS_FS_POSIX_ACL=y
-CONFIG_REISERFS_FS_SECURITY=y
-CONFIG_JFS_FS=m
-CONFIG_JFS_POSIX_ACL=y
-CONFIG_JFS_SECURITY=y
-CONFIG_XFS_FS=m
+CONFIG_XFS_FS=y
CONFIG_XFS_POSIX_ACL=y
CONFIG_BTRFS_FS=m
CONFIG_BTRFS_FS_POSIX_ACL=y
CONFIG_NILFS2_FS=m
-CONFIG_AUTOFS4_FS=m
+CONFIG_FS_DAX=y
+CONFIG_AUTOFS_FS=m
CONFIG_FUSE_FS=m
+CONFIG_OVERLAY_FS=m
CONFIG_ISO9660_FS=y
CONFIG_UDF_FS=m
CONFIG_MSDOS_FS=y
-CONFIG_VFAT_FS=y
+CONFIG_VFAT_FS=m
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
@@ -309,6 +343,7 @@ CONFIG_SQUASHFS=m
CONFIG_SQUASHFS_XATTR=y
CONFIG_SQUASHFS_LZO=y
CONFIG_SQUASHFS_XZ=y
+CONFIG_PSTORE=y
CONFIG_NFS_FS=y
CONFIG_NFS_V3_ACL=y
CONFIG_NFS_V4=y
@@ -324,40 +359,109 @@ CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ASCII=y
CONFIG_NLS_ISO8859_1=y
CONFIG_NLS_UTF8=y
-CONFIG_CRC_T10DIF=y
-CONFIG_MAGIC_SYSRQ=y
+CONFIG_SECURITY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM=y
+CONFIG_SECURITY_YAMA=y
+CONFIG_SECURITY_LOCKDOWN_LSM=y
+CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y
+CONFIG_SECURITY_LANDLOCK=y
+CONFIG_INTEGRITY_SIGNATURE=y
+CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y
+CONFIG_INTEGRITY_PLATFORM_KEYRING=y
+CONFIG_IMA=y
+CONFIG_IMA_KEXEC=y
+CONFIG_IMA_DEFAULT_HASH_SHA256=y
+CONFIG_IMA_WRITE_POLICY=y
+CONFIG_IMA_APPRAISE=y
+CONFIG_IMA_ARCH_POLICY=y
+CONFIG_IMA_APPRAISE_MODSIG=y
+CONFIG_CRYPTO_BENCHMARK=m
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_SHA256=y
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_AES_GCM_P10=m
+CONFIG_CRYPTO_DEV_NX=y
+CONFIG_CRYPTO_DEV_NX_ENCRYPT=m
+CONFIG_CRYPTO_DEV_VMX=y
+CONFIG_SYSTEM_TRUSTED_KEYRING=y
+CONFIG_SYSTEM_BLACKLIST_KEYRING=y
+CONFIG_PRINTK_TIME=y
+CONFIG_PRINTK_CALLER=y
CONFIG_DEBUG_KERNEL=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_PAGE_OWNER=y
+CONFIG_PAGE_POISONING=y
+CONFIG_DEBUG_RODATA_TEST=y
+CONFIG_DEBUG_WX=y
CONFIG_DEBUG_STACK_USAGE=y
+CONFIG_DEBUG_VM=y
+# CONFIG_DEBUG_VM_PGTABLE is not set
CONFIG_DEBUG_STACKOVERFLOW=y
-CONFIG_LOCKUP_DETECTOR=y
+CONFIG_SOFTLOCKUP_DETECTOR=y
+CONFIG_HARDLOCKUP_DETECTOR=y
+CONFIG_SCHEDSTATS=y
CONFIG_DEBUG_MUTEXES=y
-CONFIG_LATENCYTOP=y
+CONFIG_FUNCTION_TRACER=y
+CONFIG_LOCK_TORTURE_TEST=m
+CONFIG_BUG_ON_DATA_CORRUPTION=y
+CONFIG_STACK_TRACER=y
CONFIG_SCHED_TRACER=y
+CONFIG_FTRACE_SYSCALLS=y
CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_IO_STRICT_DEVMEM=y
+CONFIG_PPC_EMULATED_STATS=y
CONFIG_CODE_PATCHING_SELFTEST=y
CONFIG_FTR_FIXUP_SELFTEST=y
CONFIG_MSI_BITMAP_SELFTEST=y
CONFIG_XMON=y
CONFIG_BOOTX_TEXT=y
-CONFIG_PPC_EARLY_DEBUG=y
-CONFIG_CRYPTO_TEST=m
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_HMAC=y
-CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_TGR192=m
-CONFIG_CRYPTO_WP512=m
-CONFIG_CRYPTO_ANUBIS=m
-CONFIG_CRYPTO_BLOWFISH=m
-CONFIG_CRYPTO_CAST6=m
-CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SALSA20=m
-CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_TEA=m
-CONFIG_CRYPTO_TWOFISH=m
-CONFIG_CRYPTO_LZO=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRYPTO_DEV_NX=y
-CONFIG_CRYPTO_DEV_NX_ENCRYPT=m
-CONFIG_VIRTUALIZATION=y
-CONFIG_KVM_BOOK3S_64=m
-CONFIG_KVM_BOOK3S_64_HV=y
+CONFIG_KUNIT=m
+CONFIG_KUNIT_ALL_TESTS=m
+CONFIG_LKDTM=m
+CONFIG_TEST_MIN_HEAP=m
+CONFIG_TEST_DIV64=m
+CONFIG_BACKTRACE_SELF_TEST=m
+CONFIG_TEST_REF_TRACKER=m
+CONFIG_RBTREE_TEST=m
+CONFIG_REED_SOLOMON_TEST=m
+CONFIG_INTERVAL_TREE_TEST=m
+CONFIG_PERCPU_TEST=m
+CONFIG_ATOMIC64_SELFTEST=m
+CONFIG_ASYNC_RAID6_TEST=m
+CONFIG_TEST_HEXDUMP=m
+CONFIG_STRING_SELFTEST=m
+CONFIG_TEST_STRING_HELPERS=m
+CONFIG_TEST_KSTRTOX=m
+CONFIG_TEST_PRINTF=m
+CONFIG_TEST_SCANF=m
+CONFIG_TEST_BITMAP=m
+CONFIG_TEST_UUID=m
+CONFIG_TEST_XARRAY=m
+CONFIG_TEST_MAPLE_TREE=m
+CONFIG_TEST_RHASHTABLE=m
+CONFIG_TEST_IDA=m
+CONFIG_TEST_BITOPS=m
+CONFIG_TEST_VMALLOC=m
+CONFIG_TEST_USER_COPY=m
+CONFIG_TEST_BPF=m
+CONFIG_TEST_BLACKHOLE_DEV=m
+CONFIG_FIND_BIT_BENCHMARK=m
+CONFIG_TEST_FIRMWARE=m
+CONFIG_TEST_SYSCTL=m
+CONFIG_LINEAR_RANGES_TEST=m
+CONFIG_TEST_UDELAY=m
+CONFIG_TEST_STATIC_KEYS=m
+CONFIG_TEST_KMOD=m
+CONFIG_TEST_MEMCAT_P=m
+CONFIG_TEST_MEMINIT=m
+CONFIG_TEST_FREE_PAGES=m
+CONFIG_MEMTEST=y
+CONFIG_KALLSYMS_ALL=y
diff --git a/arch/powerpc/configs/ppc64e_defconfig b/arch/powerpc/configs/ppc64e_defconfig
index 438e813dc9cb..90247b2a0ab0 100644
--- a/arch/powerpc/configs/ppc64e_defconfig
+++ b/arch/powerpc/configs/ppc64e_defconfig
@@ -14,14 +14,12 @@ CONFIG_CPUSETS=y
CONFIG_BLK_DEV_INITRD=y
# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
-CONFIG_OPROFILE=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODVERSIONS=y
CONFIG_MODULE_SRCVERSION_ALL=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_MAC_PARTITION=y
-CONFIG_EFI_PARTITION=y
CONFIG_CORENET_GENERIC=y
CONFIG_CPU_FREQ=y
CONFIG_CPU_FREQ_GOV_POWERSAVE=y
@@ -30,8 +28,9 @@ CONFIG_BINFMT_MISC=m
CONFIG_IRQ_ALL_CPUS=y
CONFIG_SPARSEMEM_MANUAL=y
CONFIG_PCI_MSI=y
-CONFIG_PCCARD=y
CONFIG_HOTPLUG_PCI=y
+CONFIG_PCCARD=y
+CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
CONFIG_XFRM_USER=m
@@ -50,25 +49,17 @@ CONFIG_INET_IPCOMP=m
CONFIG_NETFILTER=y
# CONFIG_NETFILTER_ADVANCED is not set
CONFIG_BRIDGE=m
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_FD=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_NBD=m
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=65536
-CONFIG_IDE=y
-CONFIG_BLK_DEV_IDECD=y
-CONFIG_BLK_DEV_GENERIC=y
-CONFIG_BLK_DEV_AMD74XX=y
CONFIG_BLK_DEV_SD=y
CONFIG_CHR_DEV_ST=y
CONFIG_BLK_DEV_SR=y
-CONFIG_BLK_DEV_SR_VENDOR=y
CONFIG_CHR_DEV_SG=y
-CONFIG_SCSI_MULTI_LUN=y
CONFIG_SCSI_CONSTANTS=y
CONFIG_SCSI_FC_ATTRS=y
CONFIG_SCSI_SRP_ATTRS=y
@@ -80,6 +71,8 @@ CONFIG_SCSI_DEBUG=m
CONFIG_ATA=y
CONFIG_SATA_SIL24=y
CONFIG_SATA_SVW=y
+CONFIG_PATA_AMD=y
+CONFIG_ATA_GENERIC=y
CONFIG_MD=y
CONFIG_BLK_DEV_MD=y
CONFIG_MD_LINEAR=y
@@ -101,7 +94,6 @@ CONFIG_NETDEVICES=y
CONFIG_BONDING=m
CONFIG_DUMMY=m
CONFIG_NETCONSOLE=y
-CONFIG_NETPOLL_TRAP=y
CONFIG_TUN=m
CONFIG_VORTEX=y
CONFIG_ACENIC=y
@@ -110,27 +102,23 @@ CONFIG_PCNET32=y
CONFIG_TIGON3=y
CONFIG_E100=y
CONFIG_E1000=y
-CONFIG_IXGB=m
CONFIG_SUNGEM=y
-CONFIG_MARVELL_PHY=y
CONFIG_BROADCOM_PHY=m
+CONFIG_MARVELL_PHY=y
CONFIG_PPP=m
CONFIG_PPP_BSDCOMP=m
CONFIG_PPP_DEFLATE=m
CONFIG_PPPOE=m
CONFIG_PPP_ASYNC=m
CONFIG_PPP_SYNC_TTY=m
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
CONFIG_INPUT_EVDEV=m
CONFIG_INPUT_MISC=y
# CONFIG_SERIO_SERPORT is not set
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
# CONFIG_HW_RANDOM is not set
-CONFIG_RAW_DRIVER=y
CONFIG_I2C_CHARDEV=y
CONFIG_I2C_AMD8111=y
-CONFIG_VIDEO_OUTPUT_CONTROL=m
CONFIG_FB=y
CONFIG_FIRMWARE_EDID=y
CONFIG_FB_OF=y
@@ -148,11 +136,12 @@ CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
CONFIG_SOUND=m
CONFIG_SND=m
-CONFIG_SND_SEQUENCER=m
-CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_OSSEMUL=y
CONFIG_SND_MIXER_OSS=m
CONFIG_SND_PCM_OSS=m
-CONFIG_SND_SEQUENCER_OSS=y
+CONFIG_SND_SEQUENCER=m
+CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_SEQUENCER_OSS=m
CONFIG_HID_DRAGONRISE=y
CONFIG_HID_GYRATION=y
CONFIG_HID_TWINHAN=y
@@ -177,32 +166,22 @@ CONFIG_INFINIBAND=m
CONFIG_INFINIBAND_MTHCA=m
CONFIG_INFINIBAND_IPOIB=m
CONFIG_INFINIBAND_ISER=m
-CONFIG_EDAC=y
-CONFIG_EDAC_MM_EDAC=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_DS1307=y
CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT2_FS_XIP=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
-CONFIG_REISERFS_FS=y
-CONFIG_REISERFS_FS_XATTR=y
-CONFIG_REISERFS_FS_POSIX_ACL=y
-CONFIG_REISERFS_FS_SECURITY=y
CONFIG_JFS_FS=y
CONFIG_JFS_POSIX_ACL=y
CONFIG_JFS_SECURITY=y
CONFIG_XFS_FS=m
CONFIG_XFS_POSIX_ACL=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_FS_DAX=y
+CONFIG_AUTOFS_FS=m
CONFIG_ISO9660_FS=y
CONFIG_UDF_FS=m
CONFIG_MSDOS_FS=y
@@ -228,14 +207,12 @@ CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ASCII=y
CONFIG_NLS_ISO8859_1=y
CONFIG_NLS_UTF8=y
-CONFIG_CRC_T10DIF=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_STACK_USAGE=y
CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_DEBUG_MUTEXES=y
-CONFIG_LATENCYTOP=y
CONFIG_IRQSOFF_TRACER=y
CONFIG_SCHED_TRACER=y
CONFIG_BLK_DEV_IO_TRACE=y
@@ -243,23 +220,20 @@ CONFIG_CODE_PATCHING_SELFTEST=y
CONFIG_FTR_FIXUP_SELFTEST=y
CONFIG_MSI_BITMAP_SELFTEST=y
CONFIG_XMON=y
-CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_BENCHMARK=m
CONFIG_CRYPTO_CCM=m
CONFIG_CRYPTO_GCM=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_SHA512=m
-CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_ANUBIS=m
CONFIG_CRYPTO_BLOWFISH=m
CONFIG_CRYPTO_CAST6=m
CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/configs/ppc64le.config b/arch/powerpc/configs/ppc64le.config
new file mode 100644
index 000000000000..14dca1062c1b
--- /dev/null
+++ b/arch/powerpc/configs/ppc64le.config
@@ -0,0 +1,2 @@
+CONFIG_PPC64=y
+CONFIG_CPU_LITTLE_ENDIAN=y
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
index fec5870f1818..b082c1fae13c 100644
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -1,64 +1,55 @@
CONFIG_FSL_EMB_PERFMON=y
-CONFIG_EXPERIMENTAL=y
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
+CONFIG_AUDIT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_TASKSTATS=y
CONFIG_TASK_DELAY_ACCT=y
CONFIG_TASK_XACCT=y
CONFIG_TASK_IO_ACCOUNTING=y
-CONFIG_AUDIT=y
CONFIG_CGROUPS=y
+CONFIG_CGROUP_SCHED=y
CONFIG_CGROUP_DEVICE=y
CONFIG_CGROUP_CPUACCT=y
-CONFIG_RESOURCE_COUNTERS=y
-CONFIG_CGROUP_SCHED=y
-CONFIG_RT_GROUP_SCHED=y
-CONFIG_UTS_NS=y
-CONFIG_IPC_NS=y
CONFIG_USER_NS=y
-CONFIG_PID_NS=y
CONFIG_BLK_DEV_INITRD=y
-CONFIG_KALLSYMS_EXTRA_PASS=y
# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
-CONFIG_OPROFILE=m
CONFIG_KPROBES=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_SRCVERSION_ALL=y
CONFIG_BLK_DEV_INTEGRITY=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_OSF_PARTITION=y
+CONFIG_AMIGA_PARTITION=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_UNIXWARE_DISKLABEL=y
+CONFIG_SGI_PARTITION=y
+CONFIG_SUN_PARTITION=y
+CONFIG_KARMA_PARTITION=y
CONFIG_PPC_MPC52xx=y
CONFIG_PPC_EFIKA=y
CONFIG_PPC_MPC5200_BUGFIX=y
-CONFIG_PPC_MPC5200_GPIO=y
CONFIG_PPC_82xx=y
-CONFIG_MPC8272_ADS=y
-CONFIG_PQ2FADS=y
CONFIG_EP8248E=y
CONFIG_MGCOGE=y
CONFIG_PPC_83xx=y
CONFIG_MPC831x_RDB=y
-CONFIG_MPC832x_MDS=y
CONFIG_MPC832x_RDB=y
-CONFIG_MPC834x_MDS=y
CONFIG_MPC834x_ITX=y
-CONFIG_MPC836x_MDS=y
CONFIG_MPC836x_RDK=y
-CONFIG_MPC837x_MDS=y
CONFIG_MPC837x_RDB=y
-CONFIG_SBC834x=y
CONFIG_ASP834x=y
CONFIG_PPC_86xx=y
-CONFIG_MPC8641_HPCN=y
-CONFIG_SBC8641D=y
-CONFIG_MPC8610_HPCD=y
CONFIG_GEF_SBC610=y
CONFIG_CPU_FREQ=y
-CONFIG_CPU_FREQ_DEBUG=y
-CONFIG_CPU_FREQ_STAT=m
-CONFIG_CPU_FREQ_STAT_DETAILS=y
+CONFIG_CPU_FREQ_STAT=y
CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE=y
CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
CONFIG_CPU_FREQ_GOV_POWERSAVE=m
@@ -67,21 +58,14 @@ CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m
CONFIG_CPU_FREQ_PMAC=y
CONFIG_TAU=y
CONFIG_TAU_AVERAGE=y
-CONFIG_QUICC_ENGINE=y
CONFIG_QE_GPIO=y
-CONFIG_PPC_BESTCOMM=y
-CONFIG_GPIO_MPC8XXX=y
CONFIG_MCU_MPC8349EMITX=y
CONFIG_HIGHMEM=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
CONFIG_HZ_1000=y
CONFIG_PREEMPT_VOLUNTARY=y
CONFIG_BINFMT_MISC=y
-# CONFIG_MIGRATION is not set
-CONFIG_PM=y
-CONFIG_PM_DEBUG=y
CONFIG_HIBERNATION=y
+CONFIG_PM_DEBUG=y
CONFIG_ISA=y
CONFIG_PCIEPORTBUS=y
CONFIG_PCI_MSI=y
@@ -106,8 +90,6 @@ CONFIG_IP_MULTIPLE_TABLES=y
CONFIG_IP_ROUTE_MULTIPATH=y
CONFIG_IP_ROUTE_VERBOSE=y
CONFIG_NET_IPIP=m
-CONFIG_NET_IPGRE=m
-CONFIG_NET_IPGRE_BROADCAST=y
CONFIG_IP_MROUTE=y
CONFIG_IP_PIMSM_V1=y
CONFIG_IP_PIMSM_V2=y
@@ -115,9 +97,6 @@ CONFIG_SYN_COOKIES=y
CONFIG_INET_AH=m
CONFIG_INET_ESP=m
CONFIG_INET_IPCOMP=m
-CONFIG_INET_XFRM_MODE_TRANSPORT=m
-CONFIG_INET_XFRM_MODE_TUNNEL=m
-CONFIG_INET_XFRM_MODE_BEET=m
CONFIG_INET_DIAG=m
CONFIG_TCP_CONG_ADVANCED=y
CONFIG_TCP_CONG_HSTCP=m
@@ -128,7 +107,6 @@ CONFIG_TCP_CONG_VENO=m
CONFIG_TCP_CONG_YEAH=m
CONFIG_TCP_CONG_ILLINOIS=m
CONFIG_TCP_MD5SIG=y
-CONFIG_IPV6_PRIVACY=y
CONFIG_IPV6_ROUTER_PREF=y
CONFIG_IPV6_ROUTE_INFO=y
CONFIG_IPV6_OPTIMISTIC_DAD=y
@@ -136,7 +114,6 @@ CONFIG_INET6_AH=m
CONFIG_INET6_ESP=m
CONFIG_INET6_IPCOMP=m
CONFIG_IPV6_MIP6=m
-CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m
CONFIG_IPV6_TUNNEL=m
CONFIG_IPV6_MULTIPLE_TABLES=y
CONFIG_IPV6_SUBTREES=y
@@ -144,11 +121,9 @@ CONFIG_IPV6_MROUTE=y
CONFIG_IPV6_PIMSM_V2=y
CONFIG_NETLABEL=y
CONFIG_NETFILTER=y
-CONFIG_NETFILTER_NETLINK_QUEUE=m
CONFIG_NF_CONNTRACK=m
CONFIG_NF_CONNTRACK_SECMARK=y
CONFIG_NF_CONNTRACK_EVENTS=y
-CONFIG_NF_CT_PROTO_UDPLITE=m
CONFIG_NF_CONNTRACK_AMANDA=m
CONFIG_NF_CONNTRACK_FTP=m
CONFIG_NF_CONNTRACK_H323=m
@@ -159,7 +134,6 @@ CONFIG_NF_CONNTRACK_SANE=m
CONFIG_NF_CONNTRACK_SIP=m
CONFIG_NF_CONNTRACK_TFTP=m
CONFIG_NF_CT_NETLINK=m
-CONFIG_NETFILTER_TPROXY=m
CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
@@ -195,32 +169,19 @@ CONFIG_NETFILTER_XT_MATCH_QUOTA=m
CONFIG_NETFILTER_XT_MATCH_RATEEST=m
CONFIG_NETFILTER_XT_MATCH_REALM=m
CONFIG_NETFILTER_XT_MATCH_RECENT=m
-CONFIG_NETFILTER_XT_MATCH_SOCKET=m
CONFIG_NETFILTER_XT_MATCH_STATE=m
CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
CONFIG_NETFILTER_XT_MATCH_STRING=m
CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
CONFIG_NETFILTER_XT_MATCH_TIME=m
CONFIG_NETFILTER_XT_MATCH_U32=m
-CONFIG_NF_CONNTRACK_IPV4=m
-# CONFIG_NF_CONNTRACK_PROC_COMPAT is not set
-CONFIG_IP_NF_QUEUE=m
CONFIG_IP_NF_IPTABLES=m
-CONFIG_IP_NF_MATCH_ADDRTYPE=m
CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
CONFIG_IP_NF_MATCH_TTL=m
CONFIG_IP_NF_FILTER=m
CONFIG_IP_NF_TARGET_REJECT=m
-CONFIG_IP_NF_TARGET_LOG=m
-CONFIG_IP_NF_TARGET_ULOG=m
-CONFIG_NF_NAT=m
-CONFIG_IP_NF_TARGET_MASQUERADE=m
-CONFIG_IP_NF_TARGET_NETMAP=m
-CONFIG_IP_NF_TARGET_REDIRECT=m
-CONFIG_NF_NAT_SNMP_BASIC=m
CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
CONFIG_IP_NF_TARGET_ECN=m
CONFIG_IP_NF_TARGET_TTL=m
CONFIG_IP_NF_RAW=m
@@ -228,8 +189,6 @@ CONFIG_IP_NF_SECURITY=m
CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_NF_CONNTRACK_IPV6=m
-CONFIG_IP6_NF_QUEUE=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP6_NF_MATCH_AH=m
CONFIG_IP6_NF_MATCH_EUI64=m
@@ -240,7 +199,6 @@ CONFIG_IP6_NF_MATCH_IPV6HEADER=m
CONFIG_IP6_NF_MATCH_MH=m
CONFIG_IP6_NF_MATCH_RT=m
CONFIG_IP6_NF_TARGET_HL=m
-CONFIG_IP6_NF_TARGET_LOG=m
CONFIG_IP6_NF_FILTER=m
CONFIG_IP6_NF_TARGET_REJECT=m
CONFIG_IP6_NF_MANGLE=m
@@ -266,10 +224,7 @@ CONFIG_BRIDGE_EBT_MARK_T=m
CONFIG_BRIDGE_EBT_REDIRECT=m
CONFIG_BRIDGE_EBT_SNAT=m
CONFIG_BRIDGE_EBT_LOG=m
-CONFIG_BRIDGE_EBT_ULOG=m
CONFIG_BRIDGE_EBT_NFLOG=m
-CONFIG_IP_DCCP=m
-CONFIG_NET_DCCPPROBE=m
CONFIG_TIPC=m
CONFIG_ATM=m
CONFIG_ATM_CLIP=m
@@ -277,15 +232,10 @@ CONFIG_ATM_LANE=m
CONFIG_ATM_BR2684=m
CONFIG_BRIDGE=m
CONFIG_VLAN_8021Q=m
-CONFIG_DECNET=m
-CONFIG_DECNET_ROUTER=y
-CONFIG_IPX=m
CONFIG_ATALK=m
CONFIG_DEV_APPLETALK=m
CONFIG_IPDDP=m
CONFIG_IPDDP_ENCAP=y
-CONFIG_IPDDP_DECAP=y
-CONFIG_WAN_ROUTER=m
CONFIG_NET_SCHED=y
CONFIG_NET_SCH_CBQ=m
CONFIG_NET_SCH_HTB=m
@@ -302,7 +252,6 @@ CONFIG_NET_SCH_DSMARK=m
CONFIG_NET_SCH_NETEM=m
CONFIG_NET_SCH_INGRESS=m
CONFIG_NET_CLS_BASIC=m
-CONFIG_NET_CLS_TCINDEX=m
CONFIG_NET_CLS_ROUTE4=m
CONFIG_NET_CLS_FW=m
CONFIG_NET_CLS_U32=m
@@ -322,35 +271,11 @@ CONFIG_NET_ACT_POLICE=m
CONFIG_NET_ACT_GACT=m
CONFIG_GACT_PROB=y
CONFIG_NET_ACT_MIRRED=m
-CONFIG_NET_ACT_IPT=m
CONFIG_NET_ACT_NAT=m
CONFIG_NET_ACT_PEDIT=m
CONFIG_NET_ACT_SIMP=m
CONFIG_NET_ACT_SKBEDIT=m
-CONFIG_NET_CLS_IND=y
-CONFIG_IRDA=m
-CONFIG_IRLAN=m
-CONFIG_IRNET=m
-CONFIG_IRCOMM=m
-CONFIG_IRDA_CACHE_LAST_LSAP=y
-CONFIG_IRDA_FAST_RR=y
-CONFIG_IRTTY_SIR=m
-CONFIG_KINGSUN_DONGLE=m
-CONFIG_KSDAZZLE_DONGLE=m
-CONFIG_KS959_DONGLE=m
-CONFIG_USB_IRDA=m
-CONFIG_SIGMATEL_FIR=m
-CONFIG_NSC_FIR=m
-CONFIG_WINBOND_FIR=m
-CONFIG_TOSHIBA_FIR=m
-CONFIG_SMC_IRCC_FIR=m
-CONFIG_ALI_FIR=m
-CONFIG_VLSI_FIR=m
-CONFIG_VIA_FIR=m
-CONFIG_MCS_FIR=m
CONFIG_BT=m
-CONFIG_BT_L2CAP=y
-CONFIG_BT_SCO=y
CONFIG_BT_RFCOMM=m
CONFIG_BT_RFCOMM_TTY=y
CONFIG_BT_BNEP=m
@@ -358,16 +283,13 @@ CONFIG_BT_BNEP_MC_FILTER=y
CONFIG_BT_BNEP_PROTO_FILTER=y
CONFIG_BT_HIDP=m
CONFIG_BT_HCIUART=m
-CONFIG_BT_HCIUART_H4=y
CONFIG_BT_HCIUART_BCSP=y
-CONFIG_BT_HCIUART_LL=y
CONFIG_BT_HCIBCM203X=m
CONFIG_BT_HCIBPA10X=m
CONFIG_BT_HCIBFUSB=m
CONFIG_BT_HCIDTL1=m
CONFIG_BT_HCIBT3C=m
CONFIG_BT_HCIBLUECARD=m
-CONFIG_BT_HCIBTUART=m
CONFIG_BT_HCIVHCI=m
CONFIG_CFG80211=m
CONFIG_MAC80211=m
@@ -376,11 +298,8 @@ CONFIG_MAC80211_LEDS=y
CONFIG_MAC80211_DEBUGFS=y
CONFIG_NET_9P=m
CONFIG_NET_9P_VIRTIO=m
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-# CONFIG_FIRMWARE_IN_KERNEL is not set
CONFIG_DEBUG_DEVRES=y
CONFIG_CONNECTOR=y
-CONFIG_PROC_DEVICETREE=y
CONFIG_PARPORT=m
CONFIG_PARPORT_PC=m
CONFIG_PARPORT_SERIAL=m
@@ -389,37 +308,24 @@ CONFIG_PNP=y
CONFIG_ISAPNP=y
CONFIG_MAC_FLOPPY=m
CONFIG_BLK_DEV_LOOP=m
-CONFIG_BLK_DEV_CRYPTOLOOP=m
CONFIG_BLK_DEV_NBD=m
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=16384
CONFIG_CDROM_PKTCDVD=m
CONFIG_VIRTIO_BLK=m
-CONFIG_BLK_DEV_HD=y
-CONFIG_MISC_DEVICES=y
CONFIG_ENCLOSURE_SERVICES=m
CONFIG_SENSORS_TSL2550=m
CONFIG_EEPROM_AT24=m
CONFIG_EEPROM_LEGACY=m
CONFIG_EEPROM_MAX6875=m
CONFIG_EEPROM_93CX6=m
-CONFIG_IDE=y
-CONFIG_BLK_DEV_IDECD=m
-CONFIG_IDE_TASK_IOCTL=y
-# CONFIG_IDEPCI_PCIBUS_ORDER is not set
-CONFIG_BLK_DEV_GENERIC=y
-CONFIG_BLK_DEV_IDE_PMAC=y
-CONFIG_BLK_DEV_IDE_PMAC_ATA100FIRST=y
CONFIG_RAID_ATTRS=m
CONFIG_BLK_DEV_SD=y
CONFIG_CHR_DEV_ST=m
-CONFIG_CHR_DEV_OSST=m
CONFIG_BLK_DEV_SR=m
-CONFIG_BLK_DEV_SR_VENDOR=y
CONFIG_CHR_DEV_SG=y
CONFIG_CHR_DEV_SCH=m
CONFIG_SCSI_ENCLOSURE=m
-CONFIG_SCSI_MULTI_LUN=y
CONFIG_SCSI_CONSTANTS=y
CONFIG_SCSI_LOGGING=y
CONFIG_SCSI_SCAN_ASYNC=y
@@ -436,14 +342,15 @@ CONFIG_ATA=y
# CONFIG_SATA_PMP is not set
CONFIG_SATA_FSL=m
CONFIG_PDC_ADMA=m
-CONFIG_PATA_MPC52xx=m
CONFIG_ATA_PIIX=m
+CONFIG_PATA_MACIO=y
+CONFIG_PATA_MPC52xx=m
CONFIG_PATA_OPTIDMA=m
CONFIG_PATA_SCH=m
CONFIG_PATA_VIA=m
CONFIG_PATA_PLATFORM=m
CONFIG_PATA_OF_PLATFORM=m
-CONFIG_ATA_GENERIC=m
+CONFIG_ATA_GENERIC=y
CONFIG_MD=y
CONFIG_BLK_DEV_MD=y
CONFIG_MD_LINEAR=m
@@ -468,7 +375,7 @@ CONFIG_ADB=y
CONFIG_ADB_CUDA=y
CONFIG_ADB_PMU=y
CONFIG_ADB_PMU_LED=y
-CONFIG_ADB_PMU_LED_IDE=y
+CONFIG_ADB_PMU_LED_DISK=y
CONFIG_PMAC_APM_EMU=y
CONFIG_PMAC_MEDIABAY=y
CONFIG_PMAC_BACKLIGHT=y
@@ -479,109 +386,126 @@ CONFIG_THERM_WINDTUNNEL=m
CONFIG_THERM_ADT746X=m
CONFIG_WINDFARM=y
CONFIG_PMAC_RACKMETER=m
+CONFIG_SENSORS_AMS=m
CONFIG_NETDEVICES=y
-CONFIG_IFB=m
-CONFIG_DUMMY=m
CONFIG_BONDING=m
-CONFIG_MACVLAN=m
+CONFIG_DUMMY=m
CONFIG_EQUALIZER=m
+CONFIG_NET_FC=y
+CONFIG_IFB=m
+CONFIG_MACVLAN=m
+CONFIG_NETCONSOLE=m
CONFIG_TUN=m
CONFIG_VETH=m
-CONFIG_NET_SB1000=m
-CONFIG_MARVELL_PHY=m
-CONFIG_DAVICOM_PHY=m
-CONFIG_QSEMI_PHY=m
-CONFIG_LXT_PHY=m
-CONFIG_CICADA_PHY=m
-CONFIG_VITESSE_PHY=m
-CONFIG_SMSC_PHY=m
-CONFIG_BROADCOM_PHY=m
-CONFIG_ICPLUS_PHY=m
-CONFIG_REALTEK_PHY=m
-CONFIG_NET_ETHERNET=y
-CONFIG_MACE=m
-CONFIG_BMAC=m
-CONFIG_HAPPYMEAL=m
-CONFIG_SUNGEM=m
-CONFIG_CASSINI=m
-CONFIG_NET_VENDOR_3COM=y
+CONFIG_VIRTIO_NET=m
+CONFIG_ATM_TCP=m
+CONFIG_ATM_LANAI=m
+CONFIG_ATM_ENI=m
+CONFIG_ATM_NICSTAR=m
+CONFIG_ATM_IDT77252=m
+CONFIG_ATM_HE=m
CONFIG_EL3=m
+CONFIG_PCMCIA_3C574=m
+CONFIG_PCMCIA_3C589=m
CONFIG_VORTEX=m
CONFIG_TYPHOON=m
-CONFIG_NET_VENDOR_SMC=y
-CONFIG_ULTRA=m
+CONFIG_ADAPTEC_STARFIRE=m
+CONFIG_ACENIC=m
+CONFIG_AMD8111_ETH=m
+CONFIG_PCNET32=m
+CONFIG_PCMCIA_NMCLAN=m
+CONFIG_MACE=m
+CONFIG_BMAC=m
+CONFIG_ATL1=m
+CONFIG_B44=m
+CONFIG_BNX2=m
+CONFIG_TIGON3=m
+CONFIG_BNX2X=m
+CONFIG_CHELSIO_T1=m
+CONFIG_CHELSIO_T1_1G=y
+CONFIG_CHELSIO_T3=m
CONFIG_NET_TULIP=y
CONFIG_DE2104X=m
CONFIG_TULIP=m
CONFIG_TULIP_MMIO=y
-CONFIG_DE4X5=m
CONFIG_WINBOND_840=m
CONFIG_DM9102=m
CONFIG_ULI526X=m
CONFIG_PCMCIA_XIRCOM=m
-CONFIG_NET_ISA=y
-CONFIG_EWRK3=m
-CONFIG_NE2000=m
-CONFIG_NET_PCI=y
-CONFIG_PCNET32=m
-CONFIG_AMD8111_ETH=m
-CONFIG_ADAPTEC_STARFIRE=m
-CONFIG_B44=m
-CONFIG_FORCEDETH=m
+CONFIG_DL2K=m
+CONFIG_SUNDANCE=m
+CONFIG_S2IO=m
+CONFIG_FEC_MPC52xx=m
+CONFIG_GIANFAR=m
+CONFIG_PCMCIA_FMVJ18X=m
CONFIG_E100=m
+CONFIG_E1000=m
+CONFIG_E1000E=m
+CONFIG_IGB=m
+CONFIG_IXGBE=m
+CONFIG_MV643XX_ETH=m
+CONFIG_SKGE=m
+CONFIG_SKY2=m
+CONFIG_MYRI10GE=m
CONFIG_FEALNX=m
CONFIG_NATSEMI=m
+CONFIG_NS83820=m
+CONFIG_PCMCIA_AXNET=m
+CONFIG_NE2000=m
CONFIG_NE2K_PCI=m
+CONFIG_PCMCIA_PCNET=m
+CONFIG_ULTRA=m
+CONFIG_FORCEDETH=m
+CONFIG_HAMACHI=m
+CONFIG_YELLOWFIN=m
+CONFIG_QLA3XXX=m
+CONFIG_NETXEN_NIC=m
CONFIG_8139CP=m
CONFIG_8139TOO=m
# CONFIG_8139TOO_PIO is not set
CONFIG_8139TOO_8129=y
+CONFIG_R8169=m
CONFIG_R6040=m
+CONFIG_SC92031=m
CONFIG_SIS900=m
+CONFIG_SIS190=m
+CONFIG_SFC=m
+CONFIG_PCMCIA_SMC91C92=m
CONFIG_EPIC100=m
-CONFIG_SUNDANCE=m
+CONFIG_HAPPYMEAL=m
+CONFIG_SUNGEM=m
+CONFIG_CASSINI=m
+CONFIG_NIU=m
+CONFIG_TEHUTI=m
CONFIG_TLAN=m
CONFIG_VIA_RHINE=m
CONFIG_VIA_RHINE_MMIO=y
-CONFIG_SC92031=m
-CONFIG_NET_POCKET=y
-CONFIG_DE600=m
-CONFIG_DE620=m
-CONFIG_FEC_MPC52xx=m
-CONFIG_ACENIC=m
-CONFIG_DL2K=m
-CONFIG_E1000=m
-CONFIG_E1000E=m
-CONFIG_IP1000=m
-CONFIG_IGB=m
-CONFIG_NS83820=m
-CONFIG_HAMACHI=m
-CONFIG_YELLOWFIN=m
-CONFIG_R8169=m
-CONFIG_R8169_VLAN=y
-CONFIG_SIS190=m
-CONFIG_SKGE=m
-CONFIG_SKY2=m
CONFIG_VIA_VELOCITY=m
-CONFIG_TIGON3=m
-CONFIG_BNX2=m
-CONFIG_GIANFAR=m
-CONFIG_MV643XX_ETH=m
-CONFIG_QLA3XXX=m
-CONFIG_ATL1=m
-CONFIG_CHELSIO_T1=m
-CONFIG_CHELSIO_T1_1G=y
-CONFIG_CHELSIO_T3=m
-CONFIG_IXGBE=m
-CONFIG_IXGB=m
-CONFIG_S2IO=m
-CONFIG_MYRI10GE=m
-CONFIG_NETXEN_NIC=m
-CONFIG_NIU=m
-CONFIG_TEHUTI=m
-CONFIG_BNX2X=m
-CONFIG_QLGE=m
-CONFIG_SFC=m
+CONFIG_PCMCIA_XIRC2PS=m
+CONFIG_FDDI=y
+CONFIG_SKFP=m
+CONFIG_BROADCOM_PHY=m
+CONFIG_CICADA_PHY=m
+CONFIG_DAVICOM_PHY=m
+CONFIG_ICPLUS_PHY=m
+CONFIG_LXT_PHY=m
+CONFIG_MARVELL_PHY=m
+CONFIG_QSEMI_PHY=m
+CONFIG_REALTEK_PHY=m
+CONFIG_SMSC_PHY=m
+CONFIG_VITESSE_PHY=m
+CONFIG_PLIP=m
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPP_FILTER=y
+CONFIG_PPP_MPPE=m
+CONFIG_PPP_MULTILINK=y
+CONFIG_PPPOATM=m
+CONFIG_PPPOE=m
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_SYNC_TTY=m
+CONFIG_SLIP=m
+CONFIG_SLIP_COMPRESSED=y
+CONFIG_SLIP_SMART=y
CONFIG_USB_CATC=m
CONFIG_USB_KAWETH=m
CONFIG_USB_PEGASUS=m
@@ -597,41 +521,6 @@ CONFIG_USB_ALI_M5632=y
CONFIG_USB_AN2720=y
CONFIG_USB_EPSON2888=y
CONFIG_USB_KC2190=y
-CONFIG_NET_PCMCIA=y
-CONFIG_PCMCIA_3C589=m
-CONFIG_PCMCIA_3C574=m
-CONFIG_PCMCIA_FMVJ18X=m
-CONFIG_PCMCIA_PCNET=m
-CONFIG_PCMCIA_NMCLAN=m
-CONFIG_PCMCIA_SMC91C92=m
-CONFIG_PCMCIA_XIRC2PS=m
-CONFIG_PCMCIA_AXNET=m
-CONFIG_ATM_TCP=m
-CONFIG_ATM_LANAI=m
-CONFIG_ATM_ENI=m
-CONFIG_ATM_NICSTAR=m
-CONFIG_ATM_IDT77252=m
-CONFIG_ATM_HE=m
-CONFIG_FDDI=y
-CONFIG_SKFP=m
-CONFIG_PLIP=m
-CONFIG_PPP_MULTILINK=y
-CONFIG_PPP_FILTER=y
-CONFIG_PPP_ASYNC=m
-CONFIG_PPP_SYNC_TTY=m
-CONFIG_PPP_DEFLATE=m
-CONFIG_PPP_MPPE=m
-CONFIG_PPPOE=m
-CONFIG_PPPOATM=m
-CONFIG_SLIP=m
-CONFIG_SLIP_COMPRESSED=y
-CONFIG_SLIP_SMART=y
-CONFIG_NET_FC=y
-CONFIG_NETCONSOLE=m
-CONFIG_NETCONSOLE_DYNAMIC=y
-CONFIG_NETPOLL_TRAP=y
-CONFIG_VIRTIO_NET=m
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
CONFIG_INPUT_JOYDEV=m
CONFIG_INPUT_EVDEV=y
CONFIG_MOUSE_SERIAL=m
@@ -669,12 +558,9 @@ CONFIG_JOYSTICK_XPAD_LEDS=y
CONFIG_INPUT_TABLET=y
CONFIG_TABLET_USB_ACECAD=m
CONFIG_TABLET_USB_AIPTEK=m
-CONFIG_TABLET_USB_GTCO=m
CONFIG_TABLET_USB_KBTAB=m
-CONFIG_TABLET_USB_WACOM=m
CONFIG_INPUT_MISC=y
CONFIG_INPUT_PCSPKR=m
-CONFIG_INPUT_ATI_REMOTE=m
CONFIG_INPUT_ATI_REMOTE2=m
CONFIG_INPUT_KEYSPAN_REMOTE=m
CONFIG_INPUT_POWERMATE=m
@@ -686,16 +572,11 @@ CONFIG_GAMEPORT_NS558=m
CONFIG_GAMEPORT_L4=m
CONFIG_GAMEPORT_EMU10K1=m
CONFIG_GAMEPORT_FM801=m
-CONFIG_VT_HW_CONSOLE_BINDING=y
-# CONFIG_DEVKMEM is not set
+# CONFIG_LEGACY_PTYS is not set
CONFIG_SERIAL_NONSTANDARD=y
-CONFIG_ROCKETPORT=m
-CONFIG_CYCLADES=m
-CONFIG_SYNCLINK=m
-CONFIG_SYNCLINKMP=m
CONFIG_SYNCLINK_GT=m
-CONFIG_N_HDLC=m
CONFIG_NOZOMI=m
+CONFIG_N_HDLC=m
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_8250_CS=m
@@ -705,15 +586,13 @@ CONFIG_SERIAL_8250_MANY_PORTS=y
CONFIG_SERIAL_8250_SHARE_IRQ=y
CONFIG_SERIAL_8250_DETECT_IRQ=y
CONFIG_SERIAL_8250_RSA=y
+CONFIG_SERIAL_OF_PLATFORM=y
CONFIG_SERIAL_UARTLITE=m
CONFIG_SERIAL_PMACZILOG=m
CONFIG_SERIAL_MPC52xx=y
CONFIG_SERIAL_MPC52xx_CONSOLE=y
CONFIG_SERIAL_MPC52xx_CONSOLE_BAUD=115200
CONFIG_SERIAL_JSM=m
-CONFIG_SERIAL_OF_PLATFORM=y
-# CONFIG_LEGACY_PTYS is not set
-CONFIG_BRIQ_PANEL=m
CONFIG_PRINTER=m
CONFIG_LP_CONSOLE=y
CONFIG_PPDEV=m
@@ -721,9 +600,6 @@ CONFIG_HW_RANDOM=y
CONFIG_HW_RANDOM_VIRTIO=m
CONFIG_NVRAM=y
CONFIG_DTLK=m
-CONFIG_R3964=m
-CONFIG_CARDMAN_4000=m
-CONFIG_CARDMAN_4040=m
CONFIG_IPWIRELESS=m
CONFIG_I2C_CHARDEV=m
CONFIG_I2C_HYDRA=m
@@ -731,11 +607,11 @@ CONFIG_I2C_MPC=m
CONFIG_I2C_PCA_PLATFORM=m
CONFIG_I2C_SIMTEC=m
CONFIG_I2C_PARPORT=m
-CONFIG_I2C_PARPORT_LIGHT=m
CONFIG_I2C_TINY_USB=m
CONFIG_I2C_PCA_ISA=m
CONFIG_I2C_STUB=m
CONFIG_GPIO_SYSFS=y
+CONFIG_GPIO_MPC8XXX=y
CONFIG_W1=m
CONFIG_W1_MASTER_DS2490=m
CONFIG_W1_MASTER_DS2482=m
@@ -743,7 +619,6 @@ CONFIG_W1_SLAVE_THERM=m
CONFIG_W1_SLAVE_SMEM=m
CONFIG_W1_SLAVE_DS2433=m
CONFIG_W1_SLAVE_DS2433_CRC=y
-CONFIG_W1_SLAVE_DS2760=m
CONFIG_APM_POWER=m
CONFIG_BATTERY_PMU=m
CONFIG_HWMON=m
@@ -755,15 +630,13 @@ CONFIG_SENSORS_ADM1029=m
CONFIG_SENSORS_ADM1031=m
CONFIG_SENSORS_ADM9240=m
CONFIG_SENSORS_ADT7470=m
-CONFIG_SENSORS_AMS=m
CONFIG_SENSORS_ATXP1=m
CONFIG_SENSORS_DS1621=m
-CONFIG_SENSORS_F71805F=m
-CONFIG_SENSORS_F71882FG=m
CONFIG_SENSORS_F75375S=m
CONFIG_SENSORS_GL518SM=m
CONFIG_SENSORS_GL520SM=m
-CONFIG_SENSORS_IT87=m
+CONFIG_SENSORS_MAX1619=m
+CONFIG_SENSORS_MAX6650=m
CONFIG_SENSORS_LM63=m
CONFIG_SENSORS_LM75=m
CONFIG_SENSORS_LM77=m
@@ -775,20 +648,12 @@ CONFIG_SENSORS_LM87=m
CONFIG_SENSORS_LM90=m
CONFIG_SENSORS_LM92=m
CONFIG_SENSORS_LM93=m
-CONFIG_SENSORS_MAX1619=m
-CONFIG_SENSORS_MAX6650=m
-CONFIG_SENSORS_PC87360=m
-CONFIG_SENSORS_PC87427=m
CONFIG_SENSORS_PCF8591=m
CONFIG_SENSORS_SIS5595=m
-CONFIG_SENSORS_DME1737=m
-CONFIG_SENSORS_SMSC47M1=m
CONFIG_SENSORS_SMSC47M192=m
-CONFIG_SENSORS_SMSC47B397=m
CONFIG_SENSORS_ADS7828=m
CONFIG_SENSORS_THMC50=m
CONFIG_SENSORS_VIA686A=m
-CONFIG_SENSORS_VT1211=m
CONFIG_SENSORS_VT8231=m
CONFIG_SENSORS_W83781D=m
CONFIG_SENSORS_W83791D=m
@@ -796,8 +661,6 @@ CONFIG_SENSORS_W83792D=m
CONFIG_SENSORS_W83793=m
CONFIG_SENSORS_W83L785TS=m
CONFIG_SENSORS_W83L786NG=m
-CONFIG_SENSORS_W83627HF=m
-CONFIG_SENSORS_W83627EHF=m
CONFIG_THERMAL=y
CONFIG_WATCHDOG=y
CONFIG_SOFT_WATCHDOG=m
@@ -809,14 +672,14 @@ CONFIG_MFD_SM501_GPIO=y
CONFIG_AGP=y
CONFIG_AGP_UNINORTH=y
CONFIG_DRM=m
+CONFIG_DRM_RADEON=m
+CONFIG_DRM_LEGACY=y
CONFIG_DRM_TDFX=m
CONFIG_DRM_R128=m
-CONFIG_DRM_RADEON=m
CONFIG_DRM_MGA=m
CONFIG_DRM_SIS=m
CONFIG_DRM_VIA=m
CONFIG_DRM_SAVAGE=m
-CONFIG_VIDEO_OUTPUT_CONTROL=m
CONFIG_FB=y
CONFIG_FB_CIRRUS=m
CONFIG_FB_OF=y
@@ -851,27 +714,26 @@ CONFIG_FB_TRIDENT=m
CONFIG_FB_SM501=m
CONFIG_FB_IBM_GXT4500=y
CONFIG_LCD_PLATFORM=m
-CONFIG_DISPLAY_SUPPORT=m
-CONFIG_VGACON_SOFT_SCROLLBACK=y
+CONFIG_BACKLIGHT_CLASS_DEVICE=y
CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y
CONFIG_LOGO=y
# CONFIG_LOGO_LINUX_MONO is not set
# CONFIG_LOGO_LINUX_VGA16 is not set
CONFIG_SOUND=m
CONFIG_SND=m
-CONFIG_SND_SEQUENCER=m
-CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_OSSEMUL=y
CONFIG_SND_MIXER_OSS=m
CONFIG_SND_PCM_OSS=m
-CONFIG_SND_SEQUENCER_OSS=y
CONFIG_SND_DYNAMIC_MINORS=y
# CONFIG_SND_SUPPORT_OLD_API is not set
CONFIG_SND_VERBOSE_PRINTK=y
CONFIG_SND_DEBUG=y
CONFIG_SND_DEBUG_VERBOSE=y
CONFIG_SND_PCM_XRUN_DEBUG=y
+CONFIG_SND_SEQUENCER=m
+CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_SEQUENCER_OSS=m
CONFIG_SND_DUMMY=m
CONFIG_SND_VIRMIDI=m
CONFIG_SND_MTPAV=m
@@ -898,7 +760,6 @@ CONFIG_SND_CMIPCI=m
CONFIG_SND_OXYGEN=m
CONFIG_SND_CS4281=m
CONFIG_SND_CS46XX=m
-CONFIG_SND_CS5530=m
CONFIG_SND_DARLA20=m
CONFIG_SND_GINA20=m
CONFIG_SND_LAYLA20=m
@@ -920,7 +781,6 @@ CONFIG_SND_ES1968=m
CONFIG_SND_FM801=m
CONFIG_SND_HDSP=m
CONFIG_SND_HDSPM=m
-CONFIG_SND_HIFIER=m
CONFIG_SND_ICE1712=m
CONFIG_SND_ICE1724=m
CONFIG_SND_KORG1212=m
@@ -950,8 +810,6 @@ CONFIG_SND_USB_CAIAQ=m
CONFIG_SND_USB_CAIAQ_INPUT=y
# CONFIG_SND_PCMCIA is not set
CONFIG_HIDRAW=y
-CONFIG_HID_PID=y
-CONFIG_USB_HIDDEV=y
CONFIG_HID_GYRATION=y
CONFIG_LOGITECH_FF=y
CONFIG_LOGIRUMBLEPAD2_FF=y
@@ -961,18 +819,17 @@ CONFIG_HID_PETALYNX=y
CONFIG_HID_SAMSUNG=y
CONFIG_HID_SONY=y
CONFIG_HID_SUNPLUS=y
+CONFIG_HID_PID=y
+CONFIG_USB_HIDDEV=y
CONFIG_USB=y
CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
-# CONFIG_USB_DEVICE_CLASS is not set
CONFIG_USB_MON=y
CONFIG_USB_EHCI_HCD=m
-CONFIG_USB_EHCI_TT_NEWSCHED=y
-CONFIG_USB_EHCI_FSL=y
+CONFIG_USB_EHCI_FSL=m
CONFIG_USB_OHCI_HCD=m
CONFIG_USB_OHCI_HCD_PPC_OF_BE=y
CONFIG_USB_OHCI_HCD_PPC_OF_LE=y
CONFIG_USB_UHCI_HCD=m
-CONFIG_USB_U132_HCD=m
CONFIG_USB_SL811_HCD=m
CONFIG_USB_ACM=m
CONFIG_USB_PRINTER=m
@@ -1002,7 +859,6 @@ CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m
CONFIG_USB_SERIAL_CYPRESS_M8=m
CONFIG_USB_SERIAL_EMPEG=m
CONFIG_USB_SERIAL_FTDI_SIO=m
-CONFIG_USB_SERIAL_FUNSOFT=m
CONFIG_USB_SERIAL_VISOR=m
CONFIG_USB_SERIAL_IPAQ=m
CONFIG_USB_SERIAL_IR=m
@@ -1017,18 +873,15 @@ CONFIG_USB_SERIAL_KOBIL_SCT=m
CONFIG_USB_SERIAL_MCT_U232=m
CONFIG_USB_SERIAL_MOS7720=m
CONFIG_USB_SERIAL_MOS7840=m
-CONFIG_USB_SERIAL_MOTOROLA=m
CONFIG_USB_SERIAL_NAVMAN=m
CONFIG_USB_SERIAL_PL2303=m
CONFIG_USB_SERIAL_OTI6858=m
CONFIG_USB_SERIAL_SPCP8X5=m
-CONFIG_USB_SERIAL_HP4X=m
CONFIG_USB_SERIAL_SAFE=m
CONFIG_USB_SERIAL_SAFE_PADDED=y
CONFIG_USB_SERIAL_SIERRAWIRELESS=m
CONFIG_USB_SERIAL_TI=m
CONFIG_USB_SERIAL_CYBERJACK=m
-CONFIG_USB_SERIAL_XIRCOM=m
CONFIG_USB_SERIAL_OPTION=m
CONFIG_USB_SERIAL_OMNINET=m
CONFIG_USB_SERIAL_DEBUG=m
@@ -1038,12 +891,9 @@ CONFIG_USB_ADUTUX=m
CONFIG_USB_SEVSEG=m
CONFIG_USB_LEGOTOWER=m
CONFIG_USB_LCD=m
-CONFIG_USB_LED=m
CONFIG_USB_IDMOUSE=m
-CONFIG_USB_FTDI_ELAN=m
CONFIG_USB_APPLEDISPLAY=m
CONFIG_USB_SISUSBVGA=m
-CONFIG_USB_SISUSBVGA_CON=y
CONFIG_USB_LD=m
CONFIG_USB_TRANCEVIBRATOR=m
CONFIG_USB_IOWARRIOR=m
@@ -1059,8 +909,7 @@ CONFIG_LEDS_TRIGGER_BACKLIGHT=m
CONFIG_LEDS_TRIGGER_DEFAULT_ON=m
CONFIG_ACCESSIBILITY=y
CONFIG_A11Y_BRAILLE_CONSOLE=y
-CONFIG_EDAC=y
-CONFIG_EDAC_MM_EDAC=m
+CONFIG_EDAC=m
CONFIG_RTC_CLASS=y
# CONFIG_RTC_HCTOSYS is not set
CONFIG_RTC_DRV_DS1307=m
@@ -1084,27 +933,23 @@ CONFIG_RTC_DRV_M48T35=m
CONFIG_RTC_DRV_M48T59=m
CONFIG_RTC_DRV_V3020=m
CONFIG_DMADEVICES=y
+CONFIG_PPC_BESTCOMM=y
CONFIG_AUXDISPLAY=y
CONFIG_KS0108=m
CONFIG_UIO=m
CONFIG_UIO_CIF=m
-CONFIG_UIO_PDRV=m
CONFIG_UIO_PDRV_GENIRQ=m
+CONFIG_VIRTIO_PCI=m
+CONFIG_VIRTIO_BALLOON=m
+CONFIG_QUICC_ENGINE=y
CONFIG_EXT2_FS=m
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT2_FS_XIP=y
-CONFIG_EXT3_FS=m
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
CONFIG_JBD2_DEBUG=y
-CONFIG_REISERFS_FS=m
-CONFIG_REISERFS_PROC_INFO=y
-CONFIG_REISERFS_FS_XATTR=y
-CONFIG_REISERFS_FS_POSIX_ACL=y
-CONFIG_REISERFS_FS_SECURITY=y
CONFIG_JFS_FS=m
CONFIG_JFS_POSIX_ACL=y
CONFIG_JFS_SECURITY=y
@@ -1112,13 +957,9 @@ CONFIG_XFS_FS=m
CONFIG_XFS_QUOTA=y
CONFIG_XFS_POSIX_ACL=y
CONFIG_GFS2_FS=m
-CONFIG_OCFS2_FS=m
-# CONFIG_OCFS2_DEBUG_MASKLOG is not set
+CONFIG_FS_DAX=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
-# CONFIG_PRINT_QUOTA_WARNING is not set
-CONFIG_QFMT_V2=y
CONFIG_AUTOFS_FS=m
-CONFIG_AUTOFS4_FS=m
CONFIG_FUSE_FS=m
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
@@ -1143,45 +984,20 @@ CONFIG_MINIX_FS=m
CONFIG_OMFS_FS=m
CONFIG_QNX4FS_FS=m
CONFIG_ROMFS_FS=m
-CONFIG_SYSV_FS=m
CONFIG_UFS_FS=m
CONFIG_NFS_FS=m
-CONFIG_NFS_V3=y
CONFIG_NFS_V3_ACL=y
-CONFIG_NFS_V4=y
+CONFIG_NFS_V4=m
CONFIG_NFSD=m
CONFIG_NFSD_V3_ACL=y
CONFIG_NFSD_V4=y
-CONFIG_RPCSEC_GSS_SPKM3=m
CONFIG_CIFS=m
-CONFIG_CIFS_WEAK_PW_HASH=y
CONFIG_CIFS_UPCALL=y
CONFIG_CIFS_XATTR=y
CONFIG_CIFS_POSIX=y
CONFIG_CIFS_DFS_UPCALL=y
-CONFIG_CIFS_EXPERIMENTAL=y
-CONFIG_NCP_FS=m
-CONFIG_NCPFS_PACKET_SIGNING=y
-CONFIG_NCPFS_IOCTL_LOCKING=y
-CONFIG_NCPFS_STRONG=y
-CONFIG_NCPFS_NFS_NS=y
-CONFIG_NCPFS_OS2_NS=y
-CONFIG_NCPFS_SMALLDOS=y
-CONFIG_NCPFS_NLS=y
-CONFIG_NCPFS_EXTRAS=y
CONFIG_CODA_FS=m
CONFIG_9P_FS=m
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_OSF_PARTITION=y
-CONFIG_AMIGA_PARTITION=y
-CONFIG_BSD_DISKLABEL=y
-CONFIG_MINIX_SUBPARTITION=y
-CONFIG_SOLARIS_X86_PARTITION=y
-CONFIG_UNIXWARE_DISKLABEL=y
-CONFIG_SGI_PARTITION=y
-CONFIG_SUN_PARTITION=y
-CONFIG_KARMA_PARTITION=y
-CONFIG_EFI_PARTITION=y
CONFIG_NLS_DEFAULT="utf8"
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_CODEPAGE_737=m
@@ -1220,29 +1036,24 @@ CONFIG_NLS_ISO8859_14=m
CONFIG_NLS_ISO8859_15=m
CONFIG_NLS_KOI8_R=m
CONFIG_NLS_KOI8_U=m
-CONFIG_DLM=m
-CONFIG_DLM_DEBUG=y
-# CONFIG_ENABLE_WARN_DEPRECATED is not set
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+CONFIG_HEADERS_INSTALL=y
CONFIG_MAGIC_SYSRQ=y
-CONFIG_UNUSED_SYMBOLS=y
-CONFIG_HEADERS_CHECK=y
CONFIG_DEBUG_KERNEL=y
-CONFIG_DEBUG_SHIRQ=y
-CONFIG_TIMER_STATS=y
CONFIG_DEBUG_OBJECTS=y
CONFIG_DEBUG_OBJECTS_FREE=y
CONFIG_DEBUG_OBJECTS_TIMERS=y
CONFIG_SLUB_DEBUG_ON=y
+CONFIG_DEBUG_STACK_USAGE=y
+CONFIG_DEBUG_VM=y
+CONFIG_DEBUG_HIGHMEM=y
+CONFIG_DEBUG_STACKOVERFLOW=y
+CONFIG_DEBUG_SHIRQ=y
CONFIG_DEBUG_RT_MUTEXES=y
CONFIG_DEBUG_SPINLOCK=y
CONFIG_DEBUG_MUTEXES=y
-CONFIG_DEBUG_SPINLOCK_SLEEP=y
-CONFIG_DEBUG_HIGHMEM=y
-CONFIG_DEBUG_INFO=y
-CONFIG_DEBUG_VM=y
CONFIG_DEBUG_LIST=y
CONFIG_DEBUG_SG=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
CONFIG_FAULT_INJECTION=y
CONFIG_FAILSLAB=y
CONFIG_FAIL_PAGE_ALLOC=y
@@ -1250,45 +1061,28 @@ CONFIG_FAIL_MAKE_REQUEST=y
CONFIG_FAIL_IO_TIMEOUT=y
CONFIG_FAULT_INJECTION_DEBUG_FS=y
CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y
-CONFIG_LATENCYTOP=y
-CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_SCHED_TRACER=y
CONFIG_STACK_TRACER=y
CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_DEBUG_STACKOVERFLOW=y
-CONFIG_DEBUG_STACK_USAGE=y
CONFIG_XMON=y
CONFIG_BOOTX_TEXT=y
-CONFIG_PPC_EARLY_DEBUG=y
-CONFIG_PPC_EARLY_DEBUG_BOOTX=y
-CONFIG_KEYS=y
-CONFIG_KEYS_DEBUG_PROC_KEYS=y
CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y
CONFIG_SECURITY_NETWORK_XFRM=y
CONFIG_SECURITY_SELINUX=y
CONFIG_SECURITY_SELINUX_BOOTPARAM=y
CONFIG_SECURITY_SELINUX_DISABLE=y
-CONFIG_CRYPTO_NULL=m
-CONFIG_CRYPTO_TEST=m
-CONFIG_CRYPTO_CCM=m
-CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_BENCHMARK=m
CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_XCBC=m
-CONFIG_CRYPTO_MD4=m
CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_RMD128=m
CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_RMD256=m
-CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA1=y
-CONFIG_CRYPTO_SHA256=m
CONFIG_CRYPTO_SHA512=m
-CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_ANUBIS=m
CONFIG_CRYPTO_BLOWFISH=m
@@ -1296,16 +1090,12 @@ CONFIG_CRYPTO_CAMELLIA=m
CONFIG_CRYPTO_CAST6=m
CONFIG_CRYPTO_FCRYPT=m
CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_LZO=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
CONFIG_CRYPTO_DEV_HIFN_795X=m
CONFIG_CRYPTO_DEV_HIFN_795X_RNG=y
CONFIG_CRYPTO_DEV_TALITOS=m
CONFIG_VIRTUALIZATION=y
-CONFIG_VIRTIO_PCI=m
-CONFIG_VIRTIO_BALLOON=m
diff --git a/arch/powerpc/configs/pq2fads_defconfig b/arch/powerpc/configs/pq2fads_defconfig
deleted file mode 100644
index baad8db21b61..000000000000
--- a/arch/powerpc/configs/pq2fads_defconfig
+++ /dev/null
@@ -1,99 +0,0 @@
-CONFIG_SYSVIPC=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_LOG_BUF_SHIFT=14
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_EXPERT=y
-CONFIG_KALLSYMS_ALL=y
-# CONFIG_PPC_CHRP is not set
-# CONFIG_PPC_PMAC is not set
-CONFIG_PPC_82xx=y
-CONFIG_PQ2FADS=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_BINFMT_MISC=y
-CONFIG_SPARSE_IRQ=y
-CONFIG_PCI=y
-# CONFIG_8260_PCI9 is not set
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-CONFIG_SYN_COOKIES=y
-# CONFIG_INET_LRO is not set
-CONFIG_NETFILTER=y
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-# CONFIG_FW_LOADER is not set
-CONFIG_MTD=y
-CONFIG_MTD_CHAR=y
-CONFIG_MTD_BLOCK=y
-CONFIG_MTD_JEDECPROBE=y
-CONFIG_MTD_CFI_ADV_OPTIONS=y
-CONFIG_MTD_CFI_GEOMETRY=y
-# CONFIG_MTD_MAP_BANK_WIDTH_1 is not set
-# CONFIG_MTD_MAP_BANK_WIDTH_2 is not set
-# CONFIG_MTD_CFI_I1 is not set
-# CONFIG_MTD_CFI_I2 is not set
-CONFIG_MTD_CFI_I4=y
-CONFIG_MTD_CFI_INTELEXT=y
-CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_PROC_DEVICETREE=y
-CONFIG_BLK_DEV_LOOP=y
-CONFIG_IDE=y
-CONFIG_NETDEVICES=y
-CONFIG_TUN=y
-CONFIG_DAVICOM_PHY=y
-CONFIG_NET_ETHERNET=y
-CONFIG_FS_ENET=y
-# CONFIG_FS_ENET_HAS_SCC is not set
-CONFIG_FS_ENET_MDIO_FCC=y
-CONFIG_PPP=y
-CONFIG_PPP_ASYNC=y
-CONFIG_PPP_SYNC_TTY=y
-CONFIG_PPP_DEFLATE=y
-CONFIG_INPUT_EVDEV=y
-# CONFIG_SERIO_I8042 is not set
-# CONFIG_VT is not set
-CONFIG_SERIAL_CPM=y
-CONFIG_SERIAL_CPM_CONSOLE=y
-# CONFIG_HWMON is not set
-CONFIG_VIDEO_OUTPUT_CONTROL=y
-# CONFIG_HID_SUPPORT is not set
-CONFIG_USB_GADGET=y
-CONFIG_USB_GADGET_M66592=y
-CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_INOTIFY=y
-CONFIG_AUTOFS4_FS=y
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_CRAMFS=y
-CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
-CONFIG_NFS_V3_ACL=y
-CONFIG_ROOT_NFS=y
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_NLS=y
-CONFIG_NLS_CODEPAGE_437=y
-CONFIG_NLS_ASCII=y
-CONFIG_NLS_ISO8859_1=y
-CONFIG_NLS_UTF8=y
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_KERNEL=y
-CONFIG_DETECT_HUNG_TASK=y
-# CONFIG_SCHED_DEBUG is not set
-CONFIG_DEBUG_INFO=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
-CONFIG_BDI_SWITCH=y
-CONFIG_CRYPTO_CBC=y
-CONFIG_CRYPTO_ECB=y
-CONFIG_CRYPTO_PCBC=y
-CONFIG_CRYPTO_MD5=y
-CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig
index fdee37fab81c..0b48d2b776c4 100644
--- a/arch/powerpc/configs/ps3_defconfig
+++ b/arch/powerpc/configs/ps3_defconfig
@@ -1,44 +1,41 @@
-CONFIG_PPC64=y
-CONFIG_TUNE_CELL=y
-CONFIG_ALTIVEC=y
-CONFIG_SMP=y
-CONFIG_NR_CPUS=2
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_HIGH_RES_TIMERS=y
CONFIG_BLK_DEV_INITRD=y
-CONFIG_RD_LZMA=y
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
-CONFIG_EMBEDDED=y
+CONFIG_EXPERT=y
# CONFIG_PERF_EVENTS is not set
-# CONFIG_COMPAT_BRK is not set
-CONFIG_SLAB=y
CONFIG_PROFILING=y
-CONFIG_OPROFILE=m
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
+CONFIG_PPC64=y
+CONFIG_CELL_CPU=y
+CONFIG_ALTIVEC=y
+CONFIG_SMP=y
+CONFIG_NR_CPUS=2
# CONFIG_PPC_POWERNV is not set
# CONFIG_PPC_PSERIES is not set
# CONFIG_PPC_PMAC is not set
CONFIG_PPC_PS3=y
+CONFIG_PS3_ADVANCED=y
+CONFIG_PS3_REPOSITORY_WRITE=y
CONFIG_PS3_DISK=y
CONFIG_PS3_ROM=y
CONFIG_PS3_FLASH=y
CONFIG_PS3_VRAM=m
CONFIG_PS3_LPM=m
# CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set
-# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_BINFMT_MISC=y
CONFIG_KEXEC=y
-# CONFIG_SPARSEMEM_VMEMMAP is not set
-# CONFIG_COMPACTION is not set
+CONFIG_PPC_4K_PAGES=y
CONFIG_SCHED_SMT=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
CONFIG_PM_DEBUG=y
# CONFIG_SECCOMP is not set
-# CONFIG_PCI is not set
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_BINFMT_MISC=y
+# CONFIG_COMPAT_BRK is not set
+# CONFIG_SPARSEMEM_VMEMMAP is not set
+# CONFIG_COMPACTION is not set
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
@@ -46,12 +43,7 @@ CONFIG_INET=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_INET_DIAG is not set
-CONFIG_IPV6=y
CONFIG_BT=m
CONFIG_BT_RFCOMM=m
CONFIG_BT_RFCOMM_TTY=y
@@ -63,11 +55,9 @@ CONFIG_BT_HCIBTUSB=m
CONFIG_CFG80211=m
CONFIG_CFG80211_WEXT=y
CONFIG_MAC80211=m
-CONFIG_MAC80211_RC_PID=y
# CONFIG_MAC80211_RC_MINSTREL is not set
+CONFIG_UEVENT_HELPER=y
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-# CONFIG_FIRMWARE_IN_KERNEL is not set
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=65535
@@ -75,7 +65,6 @@ CONFIG_SCSI=y
CONFIG_BLK_DEV_SD=y
CONFIG_BLK_DEV_SR=y
CONFIG_CHR_DEV_SG=m
-CONFIG_SCSI_MULTI_LUN=y
# CONFIG_SCSI_LOWLEVEL is not set
CONFIG_MD=y
CONFIG_BLK_DEV_DM=m
@@ -96,8 +85,6 @@ CONFIG_USB_USBNET=m
# CONFIG_USB_NET_NET1080 is not set
# CONFIG_USB_NET_CDC_SUBSET is not set
# CONFIG_USB_NET_ZAURUS is not set
-CONFIG_INPUT_FF_MEMLESS=m
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
CONFIG_INPUT_JOYDEV=m
CONFIG_INPUT_EVDEV=m
# CONFIG_INPUT_KEYBOARD is not set
@@ -106,7 +93,6 @@ CONFIG_INPUT_EVDEV=m
# CONFIG_LEGACY_PTYS is not set
# CONFIG_HW_RANDOM is not set
# CONFIG_HWMON is not set
-CONFIG_VIDEO_OUTPUT_CONTROL=m
CONFIG_FB=y
CONFIG_FB_PS3=y
# CONFIG_VGA_CONSOLE is not set
@@ -121,16 +107,11 @@ CONFIG_SND=m
# CONFIG_SND_DRIVERS is not set
CONFIG_SND_USB_AUDIO=m
CONFIG_HIDRAW=y
-CONFIG_HID_APPLE=m
CONFIG_HID_BELKIN=m
CONFIG_HID_CHERRY=m
CONFIG_HID_EZKEY=m
CONFIG_HID_TWINHAN=m
-CONFIG_HID_LOGITECH=m
-CONFIG_HID_LOGITECH_DJ=m
CONFIG_HID_MICROSOFT=m
-CONFIG_HID_PS3REMOTE=m
-CONFIG_HID_SONY=m
CONFIG_HID_SUNPLUS=m
CONFIG_HID_SMARTJOYPLUS=m
CONFIG_USB_HIDDEV=y
@@ -145,12 +126,10 @@ CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_PS3=y
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_EXT2_FS=m
-CONFIG_EXT3_FS=m
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
CONFIG_EXT4_FS=y
CONFIG_QUOTA=y
CONFIG_QFMT_V2=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
CONFIG_ISO9660_FS=m
CONFIG_JOLIET=y
CONFIG_UDF_FS=m
@@ -166,23 +145,17 @@ CONFIG_CIFS=m
CONFIG_NLS=y
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ISO8859_1=y
-CONFIG_CRC_CCITT=m
-CONFIG_CRC_T10DIF=y
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_LZO=m
+CONFIG_PRINTK_TIME=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_FS=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_PROVE_LOCKING=y
CONFIG_DEBUG_LOCKDEP=y
-CONFIG_DEBUG_INFO=y
-CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_DEBUG_LIST=y
CONFIG_RCU_CPU_STALL_TIMEOUT=60
# CONFIG_FTRACE is not set
-CONFIG_DEBUG_STACKOVERFLOW=y
-CONFIG_CRYPTO_CCM=m
-CONFIG_CRYPTO_GCM=m
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_SALSA20=m
-CONFIG_CRYPTO_LZO=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
diff --git a/arch/powerpc/configs/pseries_le_defconfig b/arch/powerpc/configs/pseries_le_defconfig
deleted file mode 100644
index 58e3dbf43ca4..000000000000
--- a/arch/powerpc/configs/pseries_le_defconfig
+++ /dev/null
@@ -1,304 +0,0 @@
-CONFIG_PPC64=y
-CONFIG_ALTIVEC=y
-CONFIG_VSX=y
-CONFIG_SMP=y
-CONFIG_NR_CPUS=2048
-CONFIG_CPU_LITTLE_ENDIAN=y
-CONFIG_SYSVIPC=y
-CONFIG_POSIX_MQUEUE=y
-CONFIG_AUDIT=y
-CONFIG_AUDITSYSCALL=y
-CONFIG_IRQ_DOMAIN_DEBUG=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_TASKSTATS=y
-CONFIG_TASK_DELAY_ACCT=y
-CONFIG_TASK_XACCT=y
-CONFIG_TASK_IO_ACCOUNTING=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_CGROUPS=y
-CONFIG_CGROUP_FREEZER=y
-CONFIG_CGROUP_DEVICE=y
-CONFIG_CPUSETS=y
-CONFIG_CGROUP_CPUACCT=y
-CONFIG_BLK_DEV_INITRD=y
-# CONFIG_COMPAT_BRK is not set
-CONFIG_PROFILING=y
-CONFIG_OPROFILE=y
-CONFIG_KPROBES=y
-CONFIG_JUMP_LABEL=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODVERSIONS=y
-CONFIG_MODULE_SRCVERSION_ALL=y
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_PPC_SPLPAR=y
-CONFIG_SCANLOG=m
-CONFIG_PPC_SMLPAR=y
-CONFIG_DTL=y
-# CONFIG_PPC_PMAC is not set
-CONFIG_RTAS_FLASH=m
-CONFIG_IBMEBUS=y
-CONFIG_HZ_100=y
-CONFIG_BINFMT_MISC=m
-CONFIG_PPC_TRANSACTIONAL_MEM=y
-CONFIG_KEXEC=y
-CONFIG_IRQ_ALL_CPUS=y
-CONFIG_MEMORY_HOTPLUG=y
-CONFIG_MEMORY_HOTREMOVE=y
-CONFIG_CMA=y
-CONFIG_PPC_64K_PAGES=y
-CONFIG_PPC_SUBPAGE_PROT=y
-CONFIG_SCHED_SMT=y
-CONFIG_HOTPLUG_PCI=y
-CONFIG_HOTPLUG_PCI_RPA=m
-CONFIG_HOTPLUG_PCI_RPA_DLPAR=m
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_XFRM_USER=m
-CONFIG_NET_KEY=m
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-CONFIG_NET_IPIP=y
-CONFIG_SYN_COOKIES=y
-CONFIG_INET_AH=m
-CONFIG_INET_ESP=m
-CONFIG_INET_IPCOMP=m
-# CONFIG_IPV6 is not set
-CONFIG_NETFILTER=y
-# CONFIG_NETFILTER_ADVANCED is not set
-CONFIG_BRIDGE=m
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-CONFIG_DEVTMPFS=y
-CONFIG_DEVTMPFS_MOUNT=y
-CONFIG_PROC_DEVICETREE=y
-CONFIG_PARPORT=m
-CONFIG_PARPORT_PC=m
-CONFIG_BLK_DEV_FD=m
-CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_NBD=m
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=65536
-CONFIG_VIRTIO_BLK=m
-CONFIG_IDE=y
-CONFIG_BLK_DEV_IDECD=y
-CONFIG_BLK_DEV_GENERIC=y
-CONFIG_BLK_DEV_AMD74XX=y
-CONFIG_BLK_DEV_SD=y
-CONFIG_CHR_DEV_ST=y
-CONFIG_BLK_DEV_SR=y
-CONFIG_BLK_DEV_SR_VENDOR=y
-CONFIG_CHR_DEV_SG=y
-CONFIG_SCSI_MULTI_LUN=y
-CONFIG_SCSI_CONSTANTS=y
-CONFIG_SCSI_FC_ATTRS=y
-CONFIG_SCSI_CXGB3_ISCSI=m
-CONFIG_SCSI_CXGB4_ISCSI=m
-CONFIG_SCSI_BNX2_ISCSI=m
-CONFIG_BE2ISCSI=m
-CONFIG_SCSI_MPT2SAS=m
-CONFIG_SCSI_IBMVSCSI=y
-CONFIG_SCSI_IBMVFC=m
-CONFIG_SCSI_SYM53C8XX_2=y
-CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=0
-CONFIG_SCSI_IPR=y
-CONFIG_SCSI_QLA_FC=m
-CONFIG_SCSI_QLA_ISCSI=m
-CONFIG_SCSI_LPFC=m
-CONFIG_SCSI_VIRTIO=m
-CONFIG_SCSI_DH=m
-CONFIG_SCSI_DH_RDAC=m
-CONFIG_SCSI_DH_ALUA=m
-CONFIG_ATA=y
-# CONFIG_ATA_SFF is not set
-CONFIG_MD=y
-CONFIG_BLK_DEV_MD=y
-CONFIG_MD_LINEAR=y
-CONFIG_MD_RAID0=y
-CONFIG_MD_RAID1=y
-CONFIG_MD_RAID10=m
-CONFIG_MD_RAID456=m
-CONFIG_MD_MULTIPATH=m
-CONFIG_MD_FAULTY=m
-CONFIG_BLK_DEV_DM=y
-CONFIG_DM_CRYPT=m
-CONFIG_DM_SNAPSHOT=m
-CONFIG_DM_MIRROR=m
-CONFIG_DM_ZERO=m
-CONFIG_DM_MULTIPATH=m
-CONFIG_DM_MULTIPATH_QL=m
-CONFIG_DM_MULTIPATH_ST=m
-CONFIG_DM_UEVENT=y
-CONFIG_BONDING=m
-CONFIG_DUMMY=m
-CONFIG_NETCONSOLE=y
-CONFIG_NETPOLL_TRAP=y
-CONFIG_TUN=m
-CONFIG_VIRTIO_NET=m
-CONFIG_VORTEX=y
-CONFIG_ACENIC=m
-CONFIG_ACENIC_OMIT_TIGON_I=y
-CONFIG_PCNET32=y
-CONFIG_TIGON3=y
-CONFIG_CHELSIO_T1=m
-CONFIG_BE2NET=m
-CONFIG_S2IO=m
-CONFIG_IBMVETH=y
-CONFIG_EHEA=y
-CONFIG_E100=y
-CONFIG_E1000=y
-CONFIG_E1000E=y
-CONFIG_IXGB=m
-CONFIG_IXGBE=m
-CONFIG_MLX4_EN=m
-CONFIG_MYRI10GE=m
-CONFIG_QLGE=m
-CONFIG_NETXEN_NIC=m
-CONFIG_PPP=m
-CONFIG_PPP_BSDCOMP=m
-CONFIG_PPP_DEFLATE=m
-CONFIG_PPPOE=m
-CONFIG_PPP_ASYNC=m
-CONFIG_PPP_SYNC_TTY=m
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
-CONFIG_INPUT_EVDEV=m
-CONFIG_INPUT_MISC=y
-CONFIG_INPUT_PCSPKR=m
-# CONFIG_SERIO_SERPORT is not set
-CONFIG_SERIAL_8250=y
-CONFIG_SERIAL_8250_CONSOLE=y
-CONFIG_SERIAL_ICOM=m
-CONFIG_SERIAL_JSM=m
-CONFIG_HVC_CONSOLE=y
-CONFIG_HVC_RTAS=y
-CONFIG_HVCS=m
-CONFIG_VIRTIO_CONSOLE=m
-CONFIG_IBM_BSR=m
-CONFIG_GEN_RTC=y
-CONFIG_RAW_DRIVER=y
-CONFIG_MAX_RAW_DEVS=1024
-CONFIG_FB=y
-CONFIG_FIRMWARE_EDID=y
-CONFIG_FB_OF=y
-CONFIG_FB_MATROX=y
-CONFIG_FB_MATROX_MILLENIUM=y
-CONFIG_FB_MATROX_MYSTIQUE=y
-CONFIG_FB_MATROX_G=y
-CONFIG_FB_RADEON=y
-CONFIG_FB_IBM_GXT4500=y
-CONFIG_LCD_PLATFORM=m
-# CONFIG_VGA_CONSOLE is not set
-CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_LOGO=y
-CONFIG_HID_GYRATION=y
-CONFIG_HID_PANTHERLORD=y
-CONFIG_HID_PETALYNX=y
-CONFIG_HID_SAMSUNG=y
-CONFIG_HID_SUNPLUS=y
-CONFIG_USB_HIDDEV=y
-CONFIG_USB=y
-CONFIG_USB_MON=m
-CONFIG_USB_EHCI_HCD=y
-# CONFIG_USB_EHCI_HCD_PPC_OF is not set
-CONFIG_USB_OHCI_HCD=y
-CONFIG_USB_STORAGE=m
-CONFIG_INFINIBAND=m
-CONFIG_INFINIBAND_USER_MAD=m
-CONFIG_INFINIBAND_USER_ACCESS=m
-CONFIG_INFINIBAND_MTHCA=m
-CONFIG_INFINIBAND_EHCA=m
-CONFIG_INFINIBAND_CXGB3=m
-CONFIG_INFINIBAND_CXGB4=m
-CONFIG_MLX4_INFINIBAND=m
-CONFIG_INFINIBAND_IPOIB=m
-CONFIG_INFINIBAND_IPOIB_CM=y
-CONFIG_INFINIBAND_SRP=m
-CONFIG_INFINIBAND_ISER=m
-CONFIG_VIRTIO_PCI=m
-CONFIG_VIRTIO_BALLOON=m
-CONFIG_EXT2_FS=y
-CONFIG_EXT2_FS_XATTR=y
-CONFIG_EXT2_FS_POSIX_ACL=y
-CONFIG_EXT2_FS_SECURITY=y
-CONFIG_EXT2_FS_XIP=y
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
-CONFIG_EXT4_FS=y
-CONFIG_EXT4_FS_POSIX_ACL=y
-CONFIG_EXT4_FS_SECURITY=y
-CONFIG_REISERFS_FS=y
-CONFIG_REISERFS_FS_XATTR=y
-CONFIG_REISERFS_FS_POSIX_ACL=y
-CONFIG_REISERFS_FS_SECURITY=y
-CONFIG_JFS_FS=m
-CONFIG_JFS_POSIX_ACL=y
-CONFIG_JFS_SECURITY=y
-CONFIG_XFS_FS=m
-CONFIG_XFS_POSIX_ACL=y
-CONFIG_BTRFS_FS=m
-CONFIG_BTRFS_FS_POSIX_ACL=y
-CONFIG_NILFS2_FS=m
-CONFIG_AUTOFS4_FS=m
-CONFIG_FUSE_FS=m
-CONFIG_ISO9660_FS=y
-CONFIG_UDF_FS=m
-CONFIG_MSDOS_FS=y
-CONFIG_VFAT_FS=y
-CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_TMPFS_POSIX_ACL=y
-CONFIG_HUGETLBFS=y
-CONFIG_CRAMFS=m
-CONFIG_SQUASHFS=m
-CONFIG_SQUASHFS_XATTR=y
-CONFIG_SQUASHFS_LZO=y
-CONFIG_SQUASHFS_XZ=y
-CONFIG_PSTORE=y
-CONFIG_NFS_FS=y
-CONFIG_NFS_V3_ACL=y
-CONFIG_NFS_V4=y
-CONFIG_NFSD=m
-CONFIG_NFSD_V3_ACL=y
-CONFIG_NFSD_V4=y
-CONFIG_CIFS=m
-CONFIG_CIFS_XATTR=y
-CONFIG_CIFS_POSIX=y
-CONFIG_NLS_DEFAULT="utf8"
-CONFIG_NLS_CODEPAGE_437=y
-CONFIG_NLS_ASCII=y
-CONFIG_NLS_ISO8859_1=y
-CONFIG_NLS_UTF8=y
-CONFIG_CRC_T10DIF=y
-CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_KERNEL=y
-CONFIG_DEBUG_STACK_USAGE=y
-CONFIG_DEBUG_STACKOVERFLOW=y
-CONFIG_LOCKUP_DETECTOR=y
-CONFIG_LATENCYTOP=y
-CONFIG_SCHED_TRACER=y
-CONFIG_BLK_DEV_IO_TRACE=y
-CONFIG_CODE_PATCHING_SELFTEST=y
-CONFIG_FTR_FIXUP_SELFTEST=y
-CONFIG_MSI_BITMAP_SELFTEST=y
-CONFIG_XMON=y
-CONFIG_CRYPTO_TEST=m
-CONFIG_CRYPTO_PCBC=m
-CONFIG_CRYPTO_HMAC=y
-CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_TGR192=m
-CONFIG_CRYPTO_WP512=m
-CONFIG_CRYPTO_ANUBIS=m
-CONFIG_CRYPTO_BLOWFISH=m
-CONFIG_CRYPTO_CAST6=m
-CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SALSA20=m
-CONFIG_CRYPTO_SERPENT=m
-CONFIG_CRYPTO_TEA=m
-CONFIG_CRYPTO_TWOFISH=m
-CONFIG_CRYPTO_LZO=m
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRYPTO_DEV_NX=y
-CONFIG_CRYPTO_DEV_NX_ENCRYPT=m
-CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
diff --git a/arch/powerpc/configs/security.config b/arch/powerpc/configs/security.config
new file mode 100644
index 000000000000..0d54e29e2cdf
--- /dev/null
+++ b/arch/powerpc/configs/security.config
@@ -0,0 +1,17 @@
+# Help: Common security options for PowerPC builds
+
+# This is the equivalent of booting with lockdown=integrity
+CONFIG_SECURITY=y
+CONFIG_SECURITYFS=y
+CONFIG_SECURITY_LOCKDOWN_LSM=y
+CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y
+CONFIG_LOCK_DOWN_KERNEL_FORCE_INTEGRITY=y
+
+# These are some general, reasonably inexpensive hardening options
+CONFIG_HARDENED_USERCOPY=y
+CONFIG_FORTIFY_SOURCE=y
+CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y
+
+# UBSAN bounds checking is very cheap and good for hardening
+CONFIG_UBSAN=y
+# CONFIG_UBSAN_MISC is not set
diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig
new file mode 100644
index 000000000000..2b71a6dc399e
--- /dev/null
+++ b/arch/powerpc/configs/skiroot_defconfig
@@ -0,0 +1,302 @@
+CONFIG_KERNEL_XZ=y
+# CONFIG_SWAP is not set
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+# CONFIG_CPU_ISOLATION is not set
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=20
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_RD_GZIP is not set
+# CONFIG_RD_BZIP2 is not set
+# CONFIG_RD_LZMA is not set
+# CONFIG_RD_LZO is not set
+# CONFIG_RD_LZ4 is not set
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_EXPERT=y
+# CONFIG_SGETMASK_SYSCALL is not set
+# CONFIG_SYSFS_SYSCALL is not set
+# CONFIG_SHMEM is not set
+# CONFIG_AIO is not set
+CONFIG_PERF_EVENTS=y
+# CONFIG_COMPAT_BRK is not set
+# CONFIG_SLAB_MERGE_DEFAULT is not set
+CONFIG_SLAB_FREELIST_RANDOM=y
+CONFIG_SLAB_FREELIST_HARDENED=y
+CONFIG_PPC64=y
+CONFIG_ALTIVEC=y
+CONFIG_VSX=y
+CONFIG_NR_CPUS=2048
+CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_PANIC_TIMEOUT=30
+# CONFIG_PPC_VAS is not set
+# CONFIG_PPC_PSERIES is not set
+# CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set
+CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
+CONFIG_CPU_IDLE=y
+CONFIG_HZ_100=y
+CONFIG_KEXEC=y
+CONFIG_KEXEC_FILE=y
+CONFIG_PRESERVE_FA_DUMP=y
+CONFIG_IRQ_ALL_CPUS=y
+CONFIG_NUMA=y
+CONFIG_SCHED_SMT=y
+CONFIG_CMDLINE="console=tty0 console=hvc0 ipr.fast_reboot=1 quiet"
+# CONFIG_SECCOMP is not set
+# CONFIG_PPC_MEM_KEYS is not set
+CONFIG_JUMP_LABEL=y
+CONFIG_STRICT_KERNEL_RWX=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_SIG_FORCE=y
+CONFIG_MODULE_SIG_SHA512=y
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_MQ_IOSCHED_DEADLINE is not set
+# CONFIG_MQ_IOSCHED_KYBER is not set
+# CONFIG_COMPACTION is not set
+# CONFIG_MIGRATION is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_NET_IPIP=y
+CONFIG_SYN_COOKIES=y
+CONFIG_DNS_RESOLVER=y
+# CONFIG_WIRELESS is not set
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_MTD=m
+CONFIG_MTD_POWERNV_FLASH=m
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=65536
+CONFIG_VIRTIO_BLK=m
+CONFIG_BLK_DEV_NVME=m
+CONFIG_NVME_MULTIPATH=y
+CONFIG_EEPROM_AT24=m
+# CONFIG_OCXL is not set
+CONFIG_BLK_DEV_SD=m
+CONFIG_BLK_DEV_SR=m
+CONFIG_CHR_DEV_SG=m
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_SCAN_ASYNC=y
+CONFIG_SCSI_FC_ATTRS=y
+CONFIG_SCSI_CXGB3_ISCSI=m
+CONFIG_SCSI_CXGB4_ISCSI=m
+CONFIG_SCSI_BNX2_ISCSI=m
+CONFIG_SCSI_AACRAID=m
+CONFIG_MEGARAID_NEWGEN=y
+CONFIG_MEGARAID_MM=m
+CONFIG_MEGARAID_MAILBOX=m
+CONFIG_MEGARAID_SAS=m
+CONFIG_SCSI_MPT2SAS=m
+CONFIG_SCSI_IPR=m
+# CONFIG_SCSI_IPR_TRACE is not set
+# CONFIG_SCSI_IPR_DUMP is not set
+CONFIG_SCSI_QLA_FC=m
+CONFIG_SCSI_QLA_ISCSI=m
+CONFIG_SCSI_LPFC=m
+CONFIG_SCSI_VIRTIO=m
+CONFIG_SCSI_DH=y
+CONFIG_SCSI_DH_ALUA=m
+CONFIG_ATA=y
+CONFIG_SATA_AHCI=m
+# CONFIG_ATA_SFF is not set
+CONFIG_MD=y
+CONFIG_BLK_DEV_MD=m
+CONFIG_MD_LINEAR=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_RAID1=m
+CONFIG_MD_RAID10=m
+CONFIG_MD_RAID456=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_MD_FAULTY=m
+CONFIG_BLK_DEV_DM=m
+CONFIG_DM_CRYPT=m
+CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_MIRROR=m
+CONFIG_DM_ZERO=m
+CONFIG_DM_MULTIPATH=m
+# CONFIG_NET_VENDOR_3COM is not set
+# CONFIG_NET_VENDOR_ADAPTEC is not set
+# CONFIG_NET_VENDOR_AGERE is not set
+# CONFIG_NET_VENDOR_ALACRITECH is not set
+CONFIG_ACENIC=m
+CONFIG_ACENIC_OMIT_TIGON_I=y
+# CONFIG_NET_VENDOR_AMAZON is not set
+# CONFIG_NET_VENDOR_AMD is not set
+# CONFIG_NET_VENDOR_AQUANTIA is not set
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_VENDOR_ATHEROS is not set
+CONFIG_TIGON3=m
+CONFIG_BNX2X=m
+# CONFIG_NET_VENDOR_BROCADE is not set
+# CONFIG_NET_VENDOR_CADENCE is not set
+# CONFIG_NET_VENDOR_CAVIUM is not set
+CONFIG_CHELSIO_T1=m
+# CONFIG_NET_VENDOR_CISCO is not set
+# CONFIG_NET_VENDOR_CORTINA is not set
+# CONFIG_NET_VENDOR_DEC is not set
+# CONFIG_NET_VENDOR_DLINK is not set
+CONFIG_BE2NET=m
+# CONFIG_NET_VENDOR_EZCHIP is not set
+# CONFIG_NET_VENDOR_HUAWEI is not set
+CONFIG_E1000=m
+CONFIG_E1000E=m
+CONFIG_IGB=m
+CONFIG_IXGBE=m
+CONFIG_I40E=m
+# CONFIG_NET_VENDOR_MARVELL is not set
+CONFIG_MLX4_EN=m
+# CONFIG_MLX4_CORE_GEN2 is not set
+CONFIG_MLX5_CORE=m
+CONFIG_MLX5_CORE_EN=y
+# CONFIG_MLX5_EN_RXNFC is not set
+# CONFIG_NET_VENDOR_MICREL is not set
+# CONFIG_NET_VENDOR_MICROSEMI is not set
+CONFIG_MYRI10GE=m
+# CONFIG_NET_VENDOR_NATSEMI is not set
+CONFIG_S2IO=m
+# CONFIG_NET_VENDOR_NETRONOME is not set
+# CONFIG_NET_VENDOR_NI is not set
+# CONFIG_NET_VENDOR_NVIDIA is not set
+# CONFIG_NET_VENDOR_OKI is not set
+# CONFIG_NET_VENDOR_PACKET_ENGINES is not set
+CONFIG_NETXEN_NIC=m
+CONFIG_QED=m
+CONFIG_QEDE=m
+# CONFIG_NET_VENDOR_QUALCOMM is not set
+# CONFIG_NET_VENDOR_RDC is not set
+# CONFIG_NET_VENDOR_REALTEK is not set
+# CONFIG_NET_VENDOR_RENESAS is not set
+# CONFIG_NET_VENDOR_ROCKER is not set
+# CONFIG_NET_VENDOR_SAMSUNG is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
+CONFIG_SFC=m
+# CONFIG_NET_VENDOR_SILAN is not set
+# CONFIG_NET_VENDOR_SIS is not set
+# CONFIG_NET_VENDOR_SMSC is not set
+# CONFIG_NET_VENDOR_SOCIONEXT is not set
+# CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_SUN is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
+# CONFIG_NET_VENDOR_TEHUTI is not set
+# CONFIG_NET_VENDOR_TI is not set
+# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_NET_VENDOR_XILINX is not set
+CONFIG_PHYLIB=y
+# CONFIG_USB_NET_DRIVERS is not set
+# CONFIG_WLAN is not set
+CONFIG_INPUT_EVDEV=y
+CONFIG_INPUT_MISC=y
+# CONFIG_SERIO_SERPORT is not set
+# CONFIG_DEVMEM is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_IPMI_HANDLER=y
+CONFIG_IPMI_DEVICE_INTERFACE=y
+CONFIG_IPMI_POWERNV=y
+CONFIG_IPMI_WATCHDOG=y
+CONFIG_HW_RANDOM=y
+CONFIG_TCG_TPM=y
+CONFIG_TCG_TIS_I2C_NUVOTON=y
+# CONFIG_DEVPORT is not set
+CONFIG_I2C=y
+# CONFIG_I2C_COMPAT is not set
+CONFIG_I2C_CHARDEV=y
+# CONFIG_I2C_HELPER_AUTO is not set
+CONFIG_I2C_ALGOBIT=y
+CONFIG_I2C_OPAL=m
+CONFIG_PPS=y
+CONFIG_SENSORS_IBMPOWERNV=m
+CONFIG_DRM=m
+CONFIG_DRM_AST=m
+CONFIG_FB=y
+CONFIG_FIRMWARE_EDID=y
+# CONFIG_VGA_CONSOLE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_LOGO=y
+# CONFIG_LOGO_LINUX_MONO is not set
+# CONFIG_LOGO_LINUX_VGA16 is not set
+CONFIG_HID_GENERIC=m
+CONFIG_HID_A4TECH=y
+CONFIG_HID_BELKIN=y
+CONFIG_HID_CHERRY=y
+CONFIG_HID_CHICONY=y
+CONFIG_HID_CYPRESS=y
+CONFIG_HID_EZKEY=y
+CONFIG_HID_ITE=y
+CONFIG_HID_KENSINGTON=y
+CONFIG_HID_MICROSOFT=y
+CONFIG_HID_MONTEREY=y
+CONFIG_USB_HIDDEV=y
+CONFIG_USB=m
+CONFIG_USB_XHCI_HCD=m
+CONFIG_USB_EHCI_HCD=m
+# CONFIG_USB_EHCI_HCD_PPC_OF is not set
+CONFIG_USB_OHCI_HCD=m
+CONFIG_USB_STORAGE=m
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_OPAL=m
+CONFIG_RTC_DRV_GENERIC=m
+CONFIG_VIRT_DRIVERS=y
+CONFIG_VIRTIO_PCI=m
+# CONFIG_IOMMU_SUPPORT is not set
+CONFIG_EXT4_FS=m
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_XFS_FS=m
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_BTRFS_FS=m
+CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_ISO9660_FS=m
+CONFIG_UDF_FS=m
+CONFIG_MSDOS_FS=m
+CONFIG_VFAT_FS=m
+CONFIG_PROC_KCORE=y
+CONFIG_HUGETLBFS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+# CONFIG_NETWORK_FILESYSTEMS is not set
+CONFIG_NLS=y
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_ISO8859_1=y
+CONFIG_NLS_UTF8=y
+CONFIG_ENCRYPTED_KEYS=y
+CONFIG_SECURITY=y
+CONFIG_HARDENED_USERCOPY=y
+CONFIG_FORTIFY_SOURCE=y
+CONFIG_SECURITY_LOCKDOWN_LSM=y
+CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y
+CONFIG_LOCK_DOWN_KERNEL_FORCE_INTEGRITY=y
+# CONFIG_INTEGRITY is not set
+CONFIG_LSM="yama,loadpin,safesetid,integrity"
+# CONFIG_CRYPTO_HW is not set
+# CONFIG_XZ_DEC_X86 is not set
+# CONFIG_XZ_DEC_IA64 is not set
+# CONFIG_XZ_DEC_ARM is not set
+# CONFIG_XZ_DEC_ARMTHUMB is not set
+# CONFIG_XZ_DEC_SPARC is not set
+CONFIG_PRINTK_TIME=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_SCHED_STACK_END_CHECK=y
+CONFIG_DEBUG_STACKOVERFLOW=y
+CONFIG_PANIC_ON_OOPS=y
+CONFIG_SOFTLOCKUP_DETECTOR=y
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
+CONFIG_HARDLOCKUP_DETECTOR=y
+CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y
+CONFIG_WQ_WATCHDOG=y
+# CONFIG_SCHED_DEBUG is not set
+CONFIG_DEBUG_SG=y
+CONFIG_DEBUG_NOTIFIERS=y
+CONFIG_BUG_ON_DATA_CORRUPTION=y
+# CONFIG_FTRACE is not set
+CONFIG_XMON=y
+# CONFIG_RUNTIME_TESTING_MENU is not set
diff --git a/arch/powerpc/configs/storcenter_defconfig b/arch/powerpc/configs/storcenter_defconfig
index 60ad2c08caa6..e415222bd839 100644
--- a/arch/powerpc/configs/storcenter_defconfig
+++ b/arch/powerpc/configs/storcenter_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_EXPERT=y
@@ -6,14 +5,13 @@ CONFIG_EXPERT=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
# CONFIG_PPC_CHRP is not set
# CONFIG_PPC_PMAC is not set
CONFIG_EMBEDDED6xx=y
CONFIG_STORCENTER=y
CONFIG_HZ_100=y
CONFIG_BINFMT_MISC=y
-CONFIG_SPARSE_IRQ=y
-CONFIG_CMDLINE_BOOL=y
CONFIG_CMDLINE="console=ttyS0,115200"
# CONFIG_SECCOMP is not set
CONFIG_NET=y
@@ -23,17 +21,9 @@ CONFIG_INET=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
-# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_OF_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_FTL=y
CONFIG_NFTL=y
@@ -41,13 +31,11 @@ CONFIG_NFTL_RW=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_PHYSMAP=y
-CONFIG_PROC_DEVICETREE=y
-CONFIG_IDE=y
-CONFIG_BLK_DEV_VIA82CXXX=y
-CONFIG_SCSI=y
CONFIG_BLK_DEV_SD=y
CONFIG_BLK_DEV_SR=y
CONFIG_SCSI_SPI_ATTRS=y
+CONFIG_ATA=y
+CONFIG_PATA_VIA=y
CONFIG_MD=y
CONFIG_BLK_DEV_MD=y
CONFIG_MD_LINEAR=y
@@ -57,7 +45,6 @@ CONFIG_MD_RAID456=y
CONFIG_NETDEVICES=y
CONFIG_DUMMY=m
CONFIG_R8169=y
-# CONFIG_NETDEV_10000 is not set
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
# CONFIG_VT is not set
@@ -78,21 +65,13 @@ CONFIG_USB_STORAGE=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_DS1307=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_EXT4_FS=y
CONFIG_XFS_FS=m
-CONFIG_INOTIFY=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_JFFS2_FS=y
# CONFIG_NETWORK_FILESYSTEMS is not set
-CONFIG_PARTITION_ADVANCED=y
CONFIG_NLS_DEFAULT="utf8"
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ISO8859_1=y
CONFIG_NLS_UTF8=y
-CONFIG_CRC_T10DIF=y
-# CONFIG_ENABLE_WARN_DEPRECATED is not set
-# CONFIG_ENABLE_MUST_CHECK is not set
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_SYSCTL_SYSCALL_CHECK=y
diff --git a/arch/powerpc/configs/tqm8xx_defconfig b/arch/powerpc/configs/tqm8xx_defconfig
index 7fe277a7b422..425f10837a18 100644
--- a/arch/powerpc/configs/tqm8xx_defconfig
+++ b/arch/powerpc/configs/tqm8xx_defconfig
@@ -1,29 +1,23 @@
CONFIG_PPC_8xx=y
-CONFIG_EXPERIMENTAL=y
# CONFIG_SWAP is not set
CONFIG_SYSVIPC=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
CONFIG_LOG_BUF_SHIFT=14
-CONFIG_SYSFS_DEPRECATED_V2=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
-# CONFIG_SYSCTL_SYSCALL is not set
# CONFIG_ELF_CORE is not set
-# CONFIG_BASE_FULL is not set
+CONFIG_BASE_SMALL=y
# CONFIG_FUTEX is not set
# CONFIG_VM_EVENT_COUNTERS is not set
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_SRCVERSION_ALL=y
# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_CFQ is not set
+CONFIG_PARTITION_ADVANCED=y
CONFIG_TQM8XX=y
-CONFIG_8xx_COPYBACK=y
# CONFIG_8xx_CPU15 is not set
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
+CONFIG_GEN_RTC=y
CONFIG_HZ_100=y
-CONFIG_8XX_MINIMAL_FPEMU=y
-CONFIG_SPARSE_IRQ=y
# CONFIG_SECCOMP is not set
CONFIG_NET=y
CONFIG_PACKET=y
@@ -31,56 +25,35 @@ CONFIG_UNIX=y
CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_SYN_COOKIES=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_IPV6 is not set
# CONFIG_WIRELESS is not set
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_FW_LOADER is not set
CONFIG_MTD=y
-CONFIG_MTD_CONCAT=y
CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_OF_PARTS=y
-CONFIG_MTD_CHAR=y
CONFIG_MTD_BLOCK=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_INTELEXT=y
CONFIG_MTD_CFI_AMDSTD=y
CONFIG_MTD_PHYSMAP_OF=y
-CONFIG_PROC_DEVICETREE=y
# CONFIG_BLK_DEV is not set
-# CONFIG_MISC_DEVICES is not set
CONFIG_NETDEVICES=y
-CONFIG_DAVICOM_PHY=y
-CONFIG_FIXED_PHY=y
-CONFIG_NET_ETHERNET=y
CONFIG_FS_ENET=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
+CONFIG_DAVICOM_PHY=y
# CONFIG_WLAN is not set
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
CONFIG_SERIAL_CPM=y
CONFIG_SERIAL_CPM_CONSOLE=y
-# CONFIG_LEGACY_PTYS is not set
CONFIG_HW_RANDOM=y
-CONFIG_GEN_RTC=y
# CONFIG_HWMON is not set
# CONFIG_USB_SUPPORT is not set
# CONFIG_DNOTIFY is not set
CONFIG_TMPFS=y
CONFIG_CRAMFS=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_ROOT_NFS=y
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_CRC32 is not set
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_KERNEL=y
CONFIG_DETECT_HUNG_TASK=y
-CONFIG_DEBUG_INFO=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_CRC32_SLICEBY4=y
diff --git a/arch/powerpc/configs/wii_defconfig b/arch/powerpc/configs/wii_defconfig
index 1e2b7d062aa4..7c714a19221e 100644
--- a/arch/powerpc/configs/wii_defconfig
+++ b/arch/powerpc/configs/wii_defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_LOCALVERSION="-wii"
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_SYSVIPC=y
@@ -6,12 +5,10 @@ CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EXPERT=y
# CONFIG_ELF_CORE is not set
CONFIG_PERF_EVENTS=y
# CONFIG_VM_EVENT_COUNTERS is not set
-CONFIG_SLAB=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
# CONFIG_PPC_CHRP is not set
@@ -22,7 +19,6 @@ CONFIG_WII=y
CONFIG_PREEMPT=y
CONFIG_BINFMT_MISC=m
CONFIG_KEXEC=y
-# CONFIG_MIGRATION is not set
# CONFIG_SECCOMP is not set
CONFIG_ADVANCED_OPTIONS=y
CONFIG_NET=y
@@ -32,53 +28,39 @@ CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_IP_PNP_RARP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_INET_LRO is not set
# CONFIG_INET_DIAG is not set
# CONFIG_IPV6 is not set
CONFIG_BT=y
-CONFIG_BT_L2CAP=y
CONFIG_BT_RFCOMM=y
CONFIG_BT_BNEP=y
CONFIG_BT_BNEP_MC_FILTER=y
CONFIG_BT_HIDP=y
CONFIG_CFG80211=y
CONFIG_MAC80211=y
-CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
# CONFIG_STANDALONE is not set
-# CONFIG_FIRMWARE_IN_KERNEL is not set
-CONFIG_PROC_DEVICETREE=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_COUNT=2
CONFIG_SCSI=y
CONFIG_BLK_DEV_SD=y
-CONFIG_SCSI_MULTI_LUN=y
CONFIG_NETDEVICES=y
-CONFIG_NET_ETHERNET=y
-CONFIG_MII=y
-# CONFIG_NETDEV_1000 is not set
-# CONFIG_NETDEV_10000 is not set
+# CONFIG_ETHERNET is not set
CONFIG_B43=y
+CONFIG_B43_BUSES_SSB=y
CONFIG_B43_SDIO=y
# CONFIG_B43_PHY_LP is not set
CONFIG_B43_DEBUG=y
CONFIG_INPUT_FF_MEMLESS=m
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
-CONFIG_INPUT_MOUSEDEV_SCREEN_X=640
-CONFIG_INPUT_MOUSEDEV_SCREEN_Y=480
CONFIG_INPUT_JOYDEV=y
CONFIG_INPUT_EVDEV=y
# CONFIG_KEYBOARD_ATKBD is not set
+CONFIG_KEYBOARD_GPIO=y
# CONFIG_MOUSE_PS2 is not set
CONFIG_INPUT_JOYSTICK=y
CONFIG_INPUT_MISC=y
CONFIG_INPUT_UINPUT=y
# CONFIG_SERIO_I8042 is not set
# CONFIG_SERIO_SERPORT is not set
-# CONFIG_DEVKMEM is not set
CONFIG_LEGACY_PTY_COUNT=64
# CONFIG_HW_RANDOM is not set
CONFIG_NVRAM=y
@@ -87,29 +69,38 @@ CONFIG_I2C_CHARDEV=y
CONFIG_I2C_GPIO=y
CONFIG_GPIOLIB=y
CONFIG_GPIO_SYSFS=y
+CONFIG_GPIO_HLWD=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_GPIO=y
# CONFIG_HWMON is not set
-CONFIG_SSB_DEBUG=y
CONFIG_FB=y
# CONFIG_VGA_CONSOLE is not set
CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_SOUND=y
CONFIG_SND=y
-CONFIG_SND_SEQUENCER=y
+CONFIG_SND_OSSEMUL=y
CONFIG_SND_MIXER_OSS=y
CONFIG_SND_PCM_OSS=y
-CONFIG_SND_SEQUENCER_OSS=y
# CONFIG_SND_VERBOSE_PROCFS is not set
+CONFIG_SND_SEQUENCER=y
+CONFIG_SND_SEQUENCER_OSS=y
CONFIG_HID_APPLE=m
CONFIG_HID_WACOM=m
CONFIG_MMC=y
CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PLTFM=y
+CONFIG_MMC_SDHCI_OF_HLWD=y
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=y
+CONFIG_LEDS_GPIO=y
+CONFIG_LEDS_TRIGGERS=y
+CONFIG_LEDS_TRIGGER_HEARTBEAT=y
+CONFIG_LEDS_TRIGGER_PANIC=y
CONFIG_RTC_CLASS=y
-CONFIG_RTC_DRV_GENERIC=y
+CONFIG_RTC_DRV_GAMECUBE=y
+CONFIG_NVMEM_NINTENDO_OTP=y
CONFIG_EXT2_FS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-# CONFIG_EXT3_FS_XATTR is not set
-CONFIG_INOTIFY=y
+CONFIG_EXT4_FS=y
CONFIG_FUSE_FS=m
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
@@ -119,24 +110,16 @@ CONFIG_PROC_KCORE=y
# CONFIG_PROC_PAGE_MONITOR is not set
CONFIG_TMPFS=y
CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
CONFIG_ROOT_NFS=y
CONFIG_CIFS=m
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ISO8859_1=y
-CONFIG_CRC_CCITT=y
CONFIG_PRINTK_TIME=y
CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_SPINLOCK=y
CONFIG_DEBUG_MUTEXES=y
-CONFIG_DEBUG_SPINLOCK_SLEEP=y
-# CONFIG_RCU_CPU_STALL_DETECTOR is not set
-CONFIG_LATENCYTOP=y
-CONFIG_SYSCTL_SYSCALL_CHECK=y
CONFIG_SCHED_TRACER=y
CONFIG_BLK_DEV_IO_TRACE=y
CONFIG_DMA_API_DEBUG=y
CONFIG_PPC_EARLY_DEBUG=y
-# CONFIG_CRYPTO_ANSI_CPRNG is not set
# CONFIG_CRYPTO_HW is not set
diff --git a/arch/powerpc/crypto/.gitignore b/arch/powerpc/crypto/.gitignore
new file mode 100644
index 000000000000..e9fe73aac8b6
--- /dev/null
+++ b/arch/powerpc/crypto/.gitignore
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0-only
+aesp10-ppc.S
+aesp8-ppc.S
+ghashp10-ppc.S
+ghashp8-ppc.S
diff --git a/arch/powerpc/crypto/Kconfig b/arch/powerpc/crypto/Kconfig
new file mode 100644
index 000000000000..662aed46f9c7
--- /dev/null
+++ b/arch/powerpc/crypto/Kconfig
@@ -0,0 +1,65 @@
+# SPDX-License-Identifier: GPL-2.0
+
+menu "Accelerated Cryptographic Algorithms for CPU (powerpc)"
+
+config CRYPTO_AES_PPC_SPE
+ tristate "Ciphers: AES, modes: ECB/CBC/CTR/XTS (SPE)"
+ depends on SPE
+ select CRYPTO_SKCIPHER
+ help
+ Block ciphers: AES cipher algorithms (FIPS-197)
+ Length-preserving ciphers: AES with ECB, CBC, CTR, and XTS modes
+
+ Architecture: powerpc using:
+ - SPE (Signal Processing Engine) extensions
+
+ SPE is available for:
+ - Processor Type: Freescale 8500
+ - CPU selection: e500 (8540)
+
+ This module should only be used for low power (router) devices
+ without hardware AES acceleration (e.g. caam crypto). It reduces the
+ size of the AES tables from 16KB to 8KB + 256 bytes and mitigates
+ timining attacks. Nevertheless it might be not as secure as other
+ architecture specific assembler implementations that work on 1KB
+ tables or 256 bytes S-boxes.
+
+config CRYPTO_AES_GCM_P10
+ tristate "Stitched AES/GCM acceleration support on P10 or later CPU (PPC)"
+ depends on PPC64 && CPU_LITTLE_ENDIAN && VSX
+ select CRYPTO_LIB_AES
+ select CRYPTO_ALGAPI
+ select CRYPTO_AEAD
+ select CRYPTO_SKCIPHER
+ select CRYPTO_SIMD
+ help
+ AEAD cipher: AES cipher algorithms (FIPS-197)
+ GCM (Galois/Counter Mode) authenticated encryption mode (NIST SP800-38D)
+ Architecture: powerpc64 using:
+ - little-endian
+ - Power10 or later features
+
+ Support for cryptographic acceleration instructions on Power10 or
+ later CPU. This module supports stitched acceleration for AES/GCM.
+
+config CRYPTO_DEV_VMX
+ bool "Support for VMX cryptographic acceleration instructions"
+ depends on PPC64 && VSX
+ help
+ Support for VMX cryptographic acceleration instructions.
+
+config CRYPTO_DEV_VMX_ENCRYPT
+ tristate "Encryption acceleration support on P8 CPU"
+ depends on CRYPTO_DEV_VMX
+ select CRYPTO_AES
+ select CRYPTO_CBC
+ select CRYPTO_CTR
+ select CRYPTO_GHASH
+ select CRYPTO_XTS
+ default m
+ help
+ Support for VMX cryptographic acceleration instructions on Power8 CPU.
+ This module supports acceleration for AES and GHASH in hardware. If you
+ choose 'M' here, this module will be called vmx-crypto.
+
+endmenu
diff --git a/arch/powerpc/crypto/Makefile b/arch/powerpc/crypto/Makefile
index 2926fb9c570a..5960e5300db7 100644
--- a/arch/powerpc/crypto/Makefile
+++ b/arch/powerpc/crypto/Makefile
@@ -1,9 +1,40 @@
+# SPDX-License-Identifier: GPL-2.0
#
# powerpc/crypto/Makefile
#
# Arch-specific CryptoAPI modules.
#
-obj-$(CONFIG_CRYPTO_SHA1_PPC) += sha1-powerpc.o
+obj-$(CONFIG_CRYPTO_AES_PPC_SPE) += aes-ppc-spe.o
+obj-$(CONFIG_CRYPTO_AES_GCM_P10) += aes-gcm-p10-crypto.o
+obj-$(CONFIG_CRYPTO_DEV_VMX_ENCRYPT) += vmx-crypto.o
-sha1-powerpc-y := sha1-powerpc-asm.o sha1.o
+aes-ppc-spe-y := aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-modes.o aes-spe-glue.o
+aes-gcm-p10-crypto-y := aes-gcm-p10-glue.o aes-gcm-p10.o ghashp10-ppc.o aesp10-ppc.o
+vmx-crypto-objs := vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_ctr.o aes_xts.o ghash.o
+
+ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
+override flavour := linux-ppc64le
+else
+ifdef CONFIG_PPC64_ELF_ABI_V2
+override flavour := linux-ppc64-elfv2
+else
+override flavour := linux-ppc64
+endif
+endif
+
+quiet_cmd_perl = PERL $@
+ cmd_perl = $(PERL) $< $(flavour) > $@
+
+targets += aesp10-ppc.S ghashp10-ppc.S aesp8-ppc.S ghashp8-ppc.S
+
+$(obj)/aesp10-ppc.S $(obj)/ghashp10-ppc.S: $(obj)/%.S: $(src)/%.pl FORCE
+ $(call if_changed,perl)
+
+$(obj)/aesp8-ppc.S $(obj)/ghashp8-ppc.S: $(obj)/%.S: $(src)/%.pl FORCE
+ $(call if_changed,perl)
+
+OBJECT_FILES_NON_STANDARD_aesp10-ppc.o := y
+OBJECT_FILES_NON_STANDARD_ghashp10-ppc.o := y
+OBJECT_FILES_NON_STANDARD_aesp8-ppc.o := y
+OBJECT_FILES_NON_STANDARD_ghashp8-ppc.o := y
diff --git a/arch/powerpc/crypto/aes-gcm-p10-glue.c b/arch/powerpc/crypto/aes-gcm-p10-glue.c
new file mode 100644
index 000000000000..85f4fd4b1bdc
--- /dev/null
+++ b/arch/powerpc/crypto/aes-gcm-p10-glue.c
@@ -0,0 +1,433 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Glue code for accelerated AES-GCM stitched implementation for ppc64le.
+ *
+ * Copyright 2022- IBM Inc. All rights reserved
+ */
+
+#include <linux/unaligned.h>
+#include <asm/simd.h>
+#include <asm/switch_to.h>
+#include <crypto/gcm.h>
+#include <crypto/aes.h>
+#include <crypto/algapi.h>
+#include <crypto/b128ops.h>
+#include <crypto/gf128mul.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/aead.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+#include <linux/types.h>
+
+#define PPC_ALIGN 16
+#define GCM_IV_SIZE 12
+#define RFC4106_NONCE_SIZE 4
+
+MODULE_DESCRIPTION("PPC64le AES-GCM with Stitched implementation");
+MODULE_AUTHOR("Danny Tsen <dtsen@linux.ibm.com");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("aes");
+
+asmlinkage int aes_p10_set_encrypt_key(const u8 *userKey, const int bits,
+ void *key);
+asmlinkage void aes_p10_encrypt(const u8 *in, u8 *out, const void *key);
+asmlinkage void aes_p10_gcm_encrypt(const u8 *in, u8 *out, size_t len,
+ void *rkey, u8 *iv, void *Xi);
+asmlinkage void aes_p10_gcm_decrypt(const u8 *in, u8 *out, size_t len,
+ void *rkey, u8 *iv, void *Xi);
+asmlinkage void gcm_init_htable(unsigned char htable[], unsigned char Xi[]);
+asmlinkage void gcm_ghash_p10(unsigned char *Xi, unsigned char *Htable,
+ unsigned char *aad, unsigned int alen);
+asmlinkage void gcm_update(u8 *iv, void *Xi);
+
+struct aes_key {
+ u8 key[AES_MAX_KEYLENGTH];
+ u64 rounds;
+};
+
+struct gcm_ctx {
+ u8 iv[16];
+ u8 ivtag[16];
+ u8 aad_hash[16];
+ u64 aadLen;
+ u64 Plen; /* offset 56 - used in aes_p10_gcm_{en/de}crypt */
+ u8 pblock[16];
+};
+struct Hash_ctx {
+ u8 H[16]; /* subkey */
+ u8 Htable[256]; /* Xi, Hash table(offset 32) */
+};
+
+struct p10_aes_gcm_ctx {
+ struct aes_key enc_key;
+ u8 nonce[RFC4106_NONCE_SIZE];
+};
+
+static void vsx_begin(void)
+{
+ preempt_disable();
+ pagefault_disable();
+ enable_kernel_vsx();
+}
+
+static void vsx_end(void)
+{
+ disable_kernel_vsx();
+ pagefault_enable();
+ preempt_enable();
+}
+
+static void set_subkey(unsigned char *hash)
+{
+ *(u64 *)&hash[0] = be64_to_cpup((__be64 *)&hash[0]);
+ *(u64 *)&hash[8] = be64_to_cpup((__be64 *)&hash[8]);
+}
+
+/*
+ * Compute aad if any.
+ * - Hash aad and copy to Xi.
+ */
+static void set_aad(struct gcm_ctx *gctx, struct Hash_ctx *hash,
+ unsigned char *aad, int alen)
+{
+ int i;
+ u8 nXi[16] = {0, };
+
+ gctx->aadLen = alen;
+ i = alen & ~0xf;
+ if (i) {
+ gcm_ghash_p10(nXi, hash->Htable+32, aad, i);
+ aad += i;
+ alen -= i;
+ }
+ if (alen) {
+ for (i = 0; i < alen; i++)
+ nXi[i] ^= aad[i];
+
+ memset(gctx->aad_hash, 0, 16);
+ gcm_ghash_p10(gctx->aad_hash, hash->Htable+32, nXi, 16);
+ } else {
+ memcpy(gctx->aad_hash, nXi, 16);
+ }
+
+ memcpy(hash->Htable, gctx->aad_hash, 16);
+}
+
+static void gcmp10_init(struct gcm_ctx *gctx, u8 *iv, unsigned char *rdkey,
+ struct Hash_ctx *hash, u8 *assoc, unsigned int assoclen)
+{
+ __be32 counter = cpu_to_be32(1);
+
+ aes_p10_encrypt(hash->H, hash->H, rdkey);
+ set_subkey(hash->H);
+ gcm_init_htable(hash->Htable+32, hash->H);
+
+ *((__be32 *)(iv+12)) = counter;
+
+ gctx->Plen = 0;
+
+ /*
+ * Encrypt counter vector as iv tag and increment counter.
+ */
+ aes_p10_encrypt(iv, gctx->ivtag, rdkey);
+
+ counter = cpu_to_be32(2);
+ *((__be32 *)(iv+12)) = counter;
+ memcpy(gctx->iv, iv, 16);
+
+ gctx->aadLen = assoclen;
+ memset(gctx->aad_hash, 0, 16);
+ if (assoclen)
+ set_aad(gctx, hash, assoc, assoclen);
+}
+
+static void finish_tag(struct gcm_ctx *gctx, struct Hash_ctx *hash, int len)
+{
+ int i;
+ unsigned char len_ac[16 + PPC_ALIGN];
+ unsigned char *aclen = PTR_ALIGN((void *)len_ac, PPC_ALIGN);
+ __be64 clen = cpu_to_be64(len << 3);
+ __be64 alen = cpu_to_be64(gctx->aadLen << 3);
+
+ if (len == 0 && gctx->aadLen == 0) {
+ memcpy(hash->Htable, gctx->ivtag, 16);
+ return;
+ }
+
+ /*
+ * Len is in bits.
+ */
+ *((__be64 *)(aclen)) = alen;
+ *((__be64 *)(aclen+8)) = clen;
+
+ /*
+ * hash (AAD len and len)
+ */
+ gcm_ghash_p10(hash->Htable, hash->Htable+32, aclen, 16);
+
+ for (i = 0; i < 16; i++)
+ hash->Htable[i] ^= gctx->ivtag[i];
+}
+
+static int set_authsize(struct crypto_aead *tfm, unsigned int authsize)
+{
+ switch (authsize) {
+ case 4:
+ case 8:
+ case 12:
+ case 13:
+ case 14:
+ case 15:
+ case 16:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int p10_aes_gcm_setkey(struct crypto_aead *aead, const u8 *key,
+ unsigned int keylen)
+{
+ struct crypto_tfm *tfm = crypto_aead_tfm(aead);
+ struct p10_aes_gcm_ctx *ctx = crypto_tfm_ctx(tfm);
+ int ret;
+
+ vsx_begin();
+ ret = aes_p10_set_encrypt_key(key, keylen * 8, &ctx->enc_key);
+ vsx_end();
+
+ return ret ? -EINVAL : 0;
+}
+
+static int p10_aes_gcm_crypt(struct aead_request *req, u8 *riv,
+ int assoclen, int enc)
+{
+ struct crypto_tfm *tfm = req->base.tfm;
+ struct p10_aes_gcm_ctx *ctx = crypto_tfm_ctx(tfm);
+ u8 databuf[sizeof(struct gcm_ctx) + PPC_ALIGN];
+ struct gcm_ctx *gctx = PTR_ALIGN((void *)databuf, PPC_ALIGN);
+ u8 hashbuf[sizeof(struct Hash_ctx) + PPC_ALIGN];
+ struct Hash_ctx *hash = PTR_ALIGN((void *)hashbuf, PPC_ALIGN);
+ struct skcipher_walk walk;
+ u8 *assocmem = NULL;
+ u8 *assoc;
+ unsigned int cryptlen = req->cryptlen;
+ unsigned char ivbuf[AES_BLOCK_SIZE+PPC_ALIGN];
+ unsigned char *iv = PTR_ALIGN((void *)ivbuf, PPC_ALIGN);
+ int ret;
+ unsigned long auth_tag_len = crypto_aead_authsize(__crypto_aead_cast(tfm));
+ u8 otag[16];
+ int total_processed = 0;
+ int nbytes;
+
+ memset(databuf, 0, sizeof(databuf));
+ memset(hashbuf, 0, sizeof(hashbuf));
+ memset(ivbuf, 0, sizeof(ivbuf));
+ memcpy(iv, riv, GCM_IV_SIZE);
+
+ /* Linearize assoc, if not already linear */
+ if (req->src->length >= assoclen && req->src->length) {
+ assoc = sg_virt(req->src); /* ppc64 is !HIGHMEM */
+ } else {
+ gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+ GFP_KERNEL : GFP_ATOMIC;
+
+ /* assoc can be any length, so must be on heap */
+ assocmem = kmalloc(assoclen, flags);
+ if (unlikely(!assocmem))
+ return -ENOMEM;
+ assoc = assocmem;
+
+ scatterwalk_map_and_copy(assoc, req->src, 0, assoclen, 0);
+ }
+
+ vsx_begin();
+ gcmp10_init(gctx, iv, (unsigned char *) &ctx->enc_key, hash, assoc, assoclen);
+ vsx_end();
+
+ kfree(assocmem);
+
+ if (enc)
+ ret = skcipher_walk_aead_encrypt(&walk, req, false);
+ else
+ ret = skcipher_walk_aead_decrypt(&walk, req, false);
+ if (ret)
+ return ret;
+
+ while ((nbytes = walk.nbytes) > 0 && ret == 0) {
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+ u8 buf[AES_BLOCK_SIZE];
+
+ if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE))
+ src = dst = memcpy(buf, src, nbytes);
+
+ vsx_begin();
+ if (enc)
+ aes_p10_gcm_encrypt(src, dst, nbytes,
+ &ctx->enc_key, gctx->iv, hash->Htable);
+ else
+ aes_p10_gcm_decrypt(src, dst, nbytes,
+ &ctx->enc_key, gctx->iv, hash->Htable);
+
+ if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE))
+ memcpy(walk.dst.virt.addr, buf, nbytes);
+
+ vsx_end();
+
+ total_processed += walk.nbytes;
+ ret = skcipher_walk_done(&walk, 0);
+ }
+
+ if (ret)
+ return ret;
+
+ /* Finalize hash */
+ vsx_begin();
+ gcm_update(gctx->iv, hash->Htable);
+ finish_tag(gctx, hash, total_processed);
+ vsx_end();
+
+ /* copy Xi to end of dst */
+ if (enc)
+ scatterwalk_map_and_copy(hash->Htable, req->dst, req->assoclen + cryptlen,
+ auth_tag_len, 1);
+ else {
+ scatterwalk_map_and_copy(otag, req->src,
+ req->assoclen + cryptlen - auth_tag_len,
+ auth_tag_len, 0);
+
+ if (crypto_memneq(otag, hash->Htable, auth_tag_len)) {
+ memzero_explicit(hash->Htable, 16);
+ return -EBADMSG;
+ }
+ }
+
+ return 0;
+}
+
+static int rfc4106_setkey(struct crypto_aead *tfm, const u8 *inkey,
+ unsigned int keylen)
+{
+ struct p10_aes_gcm_ctx *ctx = crypto_aead_ctx(tfm);
+ int err;
+
+ keylen -= RFC4106_NONCE_SIZE;
+ err = p10_aes_gcm_setkey(tfm, inkey, keylen);
+ if (err)
+ return err;
+
+ memcpy(ctx->nonce, inkey + keylen, RFC4106_NONCE_SIZE);
+ return 0;
+}
+
+static int rfc4106_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
+{
+ return crypto_rfc4106_check_authsize(authsize);
+}
+
+static int rfc4106_encrypt(struct aead_request *req)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct p10_aes_gcm_ctx *ctx = crypto_aead_ctx(aead);
+ u8 iv[AES_BLOCK_SIZE];
+
+ memcpy(iv, ctx->nonce, RFC4106_NONCE_SIZE);
+ memcpy(iv + RFC4106_NONCE_SIZE, req->iv, GCM_RFC4106_IV_SIZE);
+
+ return crypto_ipsec_check_assoclen(req->assoclen) ?:
+ p10_aes_gcm_crypt(req, iv, req->assoclen - GCM_RFC4106_IV_SIZE, 1);
+}
+
+static int rfc4106_decrypt(struct aead_request *req)
+{
+ struct crypto_aead *aead = crypto_aead_reqtfm(req);
+ struct p10_aes_gcm_ctx *ctx = crypto_aead_ctx(aead);
+ u8 iv[AES_BLOCK_SIZE];
+
+ memcpy(iv, ctx->nonce, RFC4106_NONCE_SIZE);
+ memcpy(iv + RFC4106_NONCE_SIZE, req->iv, GCM_RFC4106_IV_SIZE);
+
+ return crypto_ipsec_check_assoclen(req->assoclen) ?:
+ p10_aes_gcm_crypt(req, iv, req->assoclen - GCM_RFC4106_IV_SIZE, 0);
+}
+
+static int p10_aes_gcm_encrypt(struct aead_request *req)
+{
+ return p10_aes_gcm_crypt(req, req->iv, req->assoclen, 1);
+}
+
+static int p10_aes_gcm_decrypt(struct aead_request *req)
+{
+ return p10_aes_gcm_crypt(req, req->iv, req->assoclen, 0);
+}
+
+static struct aead_alg gcm_aes_algs[] = {{
+ .ivsize = GCM_IV_SIZE,
+ .maxauthsize = 16,
+
+ .setauthsize = set_authsize,
+ .setkey = p10_aes_gcm_setkey,
+ .encrypt = p10_aes_gcm_encrypt,
+ .decrypt = p10_aes_gcm_decrypt,
+
+ .base.cra_name = "__gcm(aes)",
+ .base.cra_driver_name = "__aes_gcm_p10",
+ .base.cra_priority = 2100,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct p10_aes_gcm_ctx)+
+ 4 * sizeof(u64[2]),
+ .base.cra_module = THIS_MODULE,
+ .base.cra_flags = CRYPTO_ALG_INTERNAL,
+}, {
+ .ivsize = GCM_RFC4106_IV_SIZE,
+ .maxauthsize = 16,
+ .setkey = rfc4106_setkey,
+ .setauthsize = rfc4106_setauthsize,
+ .encrypt = rfc4106_encrypt,
+ .decrypt = rfc4106_decrypt,
+
+ .base.cra_name = "__rfc4106(gcm(aes))",
+ .base.cra_driver_name = "__rfc4106_aes_gcm_p10",
+ .base.cra_priority = 2100,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct p10_aes_gcm_ctx) +
+ 4 * sizeof(u64[2]),
+ .base.cra_module = THIS_MODULE,
+ .base.cra_flags = CRYPTO_ALG_INTERNAL,
+}};
+
+static struct simd_aead_alg *p10_simd_aeads[ARRAY_SIZE(gcm_aes_algs)];
+
+static int __init p10_init(void)
+{
+ int ret;
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_31))
+ return 0;
+
+ ret = simd_register_aeads_compat(gcm_aes_algs,
+ ARRAY_SIZE(gcm_aes_algs),
+ p10_simd_aeads);
+ if (ret) {
+ simd_unregister_aeads(gcm_aes_algs, ARRAY_SIZE(gcm_aes_algs),
+ p10_simd_aeads);
+ return ret;
+ }
+ return 0;
+}
+
+static void __exit p10_exit(void)
+{
+ simd_unregister_aeads(gcm_aes_algs, ARRAY_SIZE(gcm_aes_algs),
+ p10_simd_aeads);
+}
+
+module_init(p10_init);
+module_exit(p10_exit);
diff --git a/arch/powerpc/crypto/aes-gcm-p10.S b/arch/powerpc/crypto/aes-gcm-p10.S
new file mode 100644
index 000000000000..89f50eef3512
--- /dev/null
+++ b/arch/powerpc/crypto/aes-gcm-p10.S
@@ -0,0 +1,1236 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#
+# Accelerated AES-GCM stitched implementation for ppc64le.
+#
+# Copyright 2024- IBM Inc.
+#
+#===================================================================================
+# Written by Danny Tsen <dtsen@us.ibm.com>
+#
+# GHASH is based on the Karatsuba multiplication method.
+#
+# Xi xor X1
+#
+# X1 * H^4 + X2 * H^3 + x3 * H^2 + X4 * H =
+# (X1.h * H4.h + xX.l * H4.l + X1 * H4) +
+# (X2.h * H3.h + X2.l * H3.l + X2 * H3) +
+# (X3.h * H2.h + X3.l * H2.l + X3 * H2) +
+# (X4.h * H.h + X4.l * H.l + X4 * H)
+#
+# Xi = v0
+# H Poly = v2
+# Hash keys = v3 - v14
+# ( H.l, H, H.h)
+# ( H^2.l, H^2, H^2.h)
+# ( H^3.l, H^3, H^3.h)
+# ( H^4.l, H^4, H^4.h)
+#
+# v30 is IV
+# v31 - counter 1
+#
+# AES used,
+# vs0 - round key 0
+# v15, v16, v17, v18, v19, v20, v21, v22 for 8 blocks (encrypted)
+#
+# This implementation uses stitched AES-GCM approach to improve overall performance.
+# AES is implemented with 8x blocks and GHASH is using 2 4x blocks.
+#
+# ===================================================================================
+#
+
+#include <asm/ppc_asm.h>
+#include <linux/linkage.h>
+
+.machine "any"
+.text
+
+.macro SAVE_GPR GPR OFFSET FRAME
+ std \GPR,\OFFSET(\FRAME)
+.endm
+
+.macro SAVE_VRS VRS OFFSET FRAME
+ stxv \VRS+32, \OFFSET(\FRAME)
+.endm
+
+.macro RESTORE_GPR GPR OFFSET FRAME
+ ld \GPR,\OFFSET(\FRAME)
+.endm
+
+.macro RESTORE_VRS VRS OFFSET FRAME
+ lxv \VRS+32, \OFFSET(\FRAME)
+.endm
+
+.macro SAVE_REGS
+ mflr 0
+ std 0, 16(1)
+ stdu 1,-512(1)
+
+ SAVE_GPR 14, 112, 1
+ SAVE_GPR 15, 120, 1
+ SAVE_GPR 16, 128, 1
+ SAVE_GPR 17, 136, 1
+ SAVE_GPR 18, 144, 1
+ SAVE_GPR 19, 152, 1
+ SAVE_GPR 20, 160, 1
+ SAVE_GPR 21, 168, 1
+ SAVE_GPR 22, 176, 1
+ SAVE_GPR 23, 184, 1
+ SAVE_GPR 24, 192, 1
+
+ addi 9, 1, 256
+ SAVE_VRS 20, 0, 9
+ SAVE_VRS 21, 16, 9
+ SAVE_VRS 22, 32, 9
+ SAVE_VRS 23, 48, 9
+ SAVE_VRS 24, 64, 9
+ SAVE_VRS 25, 80, 9
+ SAVE_VRS 26, 96, 9
+ SAVE_VRS 27, 112, 9
+ SAVE_VRS 28, 128, 9
+ SAVE_VRS 29, 144, 9
+ SAVE_VRS 30, 160, 9
+ SAVE_VRS 31, 176, 9
+.endm # SAVE_REGS
+
+.macro RESTORE_REGS
+ addi 9, 1, 256
+ RESTORE_VRS 20, 0, 9
+ RESTORE_VRS 21, 16, 9
+ RESTORE_VRS 22, 32, 9
+ RESTORE_VRS 23, 48, 9
+ RESTORE_VRS 24, 64, 9
+ RESTORE_VRS 25, 80, 9
+ RESTORE_VRS 26, 96, 9
+ RESTORE_VRS 27, 112, 9
+ RESTORE_VRS 28, 128, 9
+ RESTORE_VRS 29, 144, 9
+ RESTORE_VRS 30, 160, 9
+ RESTORE_VRS 31, 176, 9
+
+ RESTORE_GPR 14, 112, 1
+ RESTORE_GPR 15, 120, 1
+ RESTORE_GPR 16, 128, 1
+ RESTORE_GPR 17, 136, 1
+ RESTORE_GPR 18, 144, 1
+ RESTORE_GPR 19, 152, 1
+ RESTORE_GPR 20, 160, 1
+ RESTORE_GPR 21, 168, 1
+ RESTORE_GPR 22, 176, 1
+ RESTORE_GPR 23, 184, 1
+ RESTORE_GPR 24, 192, 1
+
+ addi 1, 1, 512
+ ld 0, 16(1)
+ mtlr 0
+.endm # RESTORE_REGS
+
+# 4x loops
+.macro AES_CIPHER_4x _VCIPHER ST r
+ \_VCIPHER \ST, \ST, \r
+ \_VCIPHER \ST+1, \ST+1, \r
+ \_VCIPHER \ST+2, \ST+2, \r
+ \_VCIPHER \ST+3, \ST+3, \r
+.endm
+
+# 8x loops
+.macro AES_CIPHER_8x _VCIPHER ST r
+ \_VCIPHER \ST, \ST, \r
+ \_VCIPHER \ST+1, \ST+1, \r
+ \_VCIPHER \ST+2, \ST+2, \r
+ \_VCIPHER \ST+3, \ST+3, \r
+ \_VCIPHER \ST+4, \ST+4, \r
+ \_VCIPHER \ST+5, \ST+5, \r
+ \_VCIPHER \ST+6, \ST+6, \r
+ \_VCIPHER \ST+7, \ST+7, \r
+.endm
+
+.macro LOOP_8AES_STATE
+ xxlor 32+23, 1, 1
+ xxlor 32+24, 2, 2
+ xxlor 32+25, 3, 3
+ xxlor 32+26, 4, 4
+ AES_CIPHER_8x vcipher, 15, 23
+ AES_CIPHER_8x vcipher, 15, 24
+ AES_CIPHER_8x vcipher, 15, 25
+ AES_CIPHER_8x vcipher, 15, 26
+ xxlor 32+23, 5, 5
+ xxlor 32+24, 6, 6
+ xxlor 32+25, 7, 7
+ xxlor 32+26, 8, 8
+ AES_CIPHER_8x vcipher, 15, 23
+ AES_CIPHER_8x vcipher, 15, 24
+ AES_CIPHER_8x vcipher, 15, 25
+ AES_CIPHER_8x vcipher, 15, 26
+.endm
+
+#
+# PPC_GHASH4x(H, S1, S2, S3, S4): Compute 4x hash values based on Karatsuba method.
+# H: returning digest
+# S#: states
+#
+# S1 should xor with the previous digest
+#
+# Xi = v0
+# H Poly = v2
+# Hash keys = v3 - v14
+# Scratch: v23 - v29
+#
+.macro PPC_GHASH4x H S1 S2 S3 S4
+
+ vpmsumd 23, 12, \S1 # H4.L * X.L
+ vpmsumd 24, 9, \S2
+ vpmsumd 25, 6, \S3
+ vpmsumd 26, 3, \S4
+
+ vpmsumd 27, 13, \S1 # H4.L * X.H + H4.H * X.L
+ vpmsumd 28, 10, \S2 # H3.L * X1.H + H3.H * X1.L
+
+ vxor 23, 23, 24
+ vxor 23, 23, 25
+ vxor 23, 23, 26 # L
+
+ vxor 24, 27, 28
+ vpmsumd 25, 7, \S3
+ vpmsumd 26, 4, \S4
+
+ vxor 24, 24, 25
+ vxor 24, 24, 26 # M
+
+ # sum hash and reduction with H Poly
+ vpmsumd 28, 23, 2 # reduction
+
+ vxor 1, 1, 1
+ vsldoi 25, 24, 1, 8 # mL
+ vsldoi 1, 1, 24, 8 # mH
+ vxor 23, 23, 25 # mL + L
+
+ # This performs swap and xor like,
+ # vsldoi 23, 23, 23, 8 # swap
+ # vxor 23, 23, 28
+ xxlor 32+25, 10, 10
+ vpermxor 23, 23, 28, 25
+
+ vpmsumd 26, 14, \S1 # H4.H * X.H
+ vpmsumd 27, 11, \S2
+ vpmsumd 28, 8, \S3
+ vpmsumd 29, 5, \S4
+
+ vxor 24, 26, 27
+ vxor 24, 24, 28
+ vxor 24, 24, 29
+
+ vxor 24, 24, 1
+
+ # sum hash and reduction with H Poly
+ vsldoi 25, 23, 23, 8 # swap
+ vpmsumd 23, 23, 2
+ vxor 27, 25, 24
+ vxor \H, 23, 27
+.endm
+
+#
+# Compute update single ghash
+# scratch: v1, v22..v27
+#
+.macro PPC_GHASH1x H S1
+
+ vxor 1, 1, 1
+
+ vpmsumd 22, 3, \S1 # L
+ vpmsumd 23, 4, \S1 # M
+ vpmsumd 24, 5, \S1 # H
+
+ vpmsumd 27, 22, 2 # reduction
+
+ vsldoi 25, 23, 1, 8 # mL
+ vsldoi 26, 1, 23, 8 # mH
+ vxor 22, 22, 25 # LL + LL
+ vxor 24, 24, 26 # HH + HH
+
+ xxlor 32+25, 10, 10
+ vpermxor 22, 22, 27, 25
+
+ vsldoi 23, 22, 22, 8 # swap
+ vpmsumd 22, 22, 2 # reduction
+ vxor 23, 23, 24
+ vxor \H, 22, 23
+.endm
+
+#
+# LOAD_HASH_TABLE
+# Xi = v0
+# H Poly = v2
+# Hash keys = v3 - v14
+#
+.macro LOAD_HASH_TABLE
+ # Load Xi
+ lxvb16x 32, 0, 8 # load Xi
+
+ # load Hash - h^4, h^3, h^2, h
+ li 10, 32
+ lxvd2x 2+32, 10, 8 # H Poli
+ li 10, 48
+ lxvd2x 3+32, 10, 8 # Hl
+ li 10, 64
+ lxvd2x 4+32, 10, 8 # H
+ li 10, 80
+ lxvd2x 5+32, 10, 8 # Hh
+
+ li 10, 96
+ lxvd2x 6+32, 10, 8 # H^2l
+ li 10, 112
+ lxvd2x 7+32, 10, 8 # H^2
+ li 10, 128
+ lxvd2x 8+32, 10, 8 # H^2h
+
+ li 10, 144
+ lxvd2x 9+32, 10, 8 # H^3l
+ li 10, 160
+ lxvd2x 10+32, 10, 8 # H^3
+ li 10, 176
+ lxvd2x 11+32, 10, 8 # H^3h
+
+ li 10, 192
+ lxvd2x 12+32, 10, 8 # H^4l
+ li 10, 208
+ lxvd2x 13+32, 10, 8 # H^4
+ li 10, 224
+ lxvd2x 14+32, 10, 8 # H^4h
+.endm
+
+################################################################################
+# Compute AES and ghash one block at a time.
+# r23: AES rounds
+# v30: current IV
+# vs0: roundkey 0
+#
+################################################################################
+SYM_FUNC_START_LOCAL(aes_gcm_crypt_1x)
+
+ cmpdi 5, 16
+ bge __More_1x
+ blr
+__More_1x:
+ li 10, 16
+ divdu 12, 5, 10
+
+ xxlxor 32+15, 32+30, 0
+
+ # Pre-load 8 AES rounds to scratch vectors.
+ xxlor 32+16, 1, 1
+ xxlor 32+17, 2, 2
+ xxlor 32+18, 3, 3
+ xxlor 32+19, 4, 4
+ xxlor 32+20, 5, 5
+ xxlor 32+21, 6, 6
+ xxlor 32+28, 7, 7
+ xxlor 32+29, 8, 8
+ lwz 23, 240(6) # n rounds
+ addi 22, 23, -9 # remaing AES rounds
+
+ cmpdi 12, 0
+ bgt __Loop_1x
+ blr
+
+__Loop_1x:
+ mtctr 22
+ addi 10, 6, 144
+ vcipher 15, 15, 16
+ vcipher 15, 15, 17
+ vcipher 15, 15, 18
+ vcipher 15, 15, 19
+ vcipher 15, 15, 20
+ vcipher 15, 15, 21
+ vcipher 15, 15, 28
+ vcipher 15, 15, 29
+
+__Loop_aes_1state:
+ lxv 32+1, 0(10)
+ vcipher 15, 15, 1
+ addi 10, 10, 16
+ bdnz __Loop_aes_1state
+ lxv 32+1, 0(10) # last round key
+ lxvb16x 11, 0, 14 # load input block
+ vcipherlast 15, 15, 1
+
+ xxlxor 32+15, 32+15, 11
+ stxvb16x 32+15, 0, 9 # store output
+ addi 14, 14, 16
+ addi 9, 9, 16
+
+ cmpdi 24, 0 # decrypt?
+ bne __Encrypt_1x
+ xxlor 15+32, 11, 11
+__Encrypt_1x:
+ vxor 15, 15, 0
+ PPC_GHASH1x 0, 15
+
+ addi 5, 5, -16
+ addi 11, 11, 16
+
+ vadduwm 30, 30, 31 # IV + counter
+ xxlxor 32+15, 32+30, 0
+ addi 12, 12, -1
+ cmpdi 12, 0
+ bgt __Loop_1x
+
+ stxvb16x 32+30, 0, 7 # update IV
+ stxvb16x 32+0, 0, 8 # update Xi
+ blr
+SYM_FUNC_END(aes_gcm_crypt_1x)
+
+################################################################################
+# Process a normal partial block when we come here.
+# Compute partial mask, Load and store partial block to stack.
+# Update partial_len and pblock.
+# pblock is (encrypted ^ AES state) for encrypt
+# and (input ^ AES state) for decrypt.
+#
+################################################################################
+SYM_FUNC_START_LOCAL(__Process_partial)
+
+ # create partial mask
+ vspltisb 16, -1
+ li 12, 16
+ sub 12, 12, 5
+ sldi 12, 12, 3
+ mtvsrdd 32+17, 0, 12
+ vslo 16, 16, 17 # partial block mask
+
+ lxvb16x 11, 0, 14 # load partial block
+ xxland 11, 11, 32+16
+
+ # AES crypt partial
+ xxlxor 32+15, 32+30, 0
+ lwz 23, 240(6) # n rounds
+ addi 22, 23, -1 # loop - 1
+ mtctr 22
+ addi 10, 6, 16
+
+__Loop_aes_pstate:
+ lxv 32+1, 0(10)
+ vcipher 15, 15, 1
+ addi 10, 10, 16
+ bdnz __Loop_aes_pstate
+ lxv 32+1, 0(10) # last round key
+ vcipherlast 15, 15, 1
+
+ xxlxor 32+15, 32+15, 11
+ vand 15, 15, 16
+
+ # AES crypt output v15
+ # Write partial
+ li 10, 224
+ stxvb16x 15+32, 10, 1 # write v15 to stack
+ addi 10, 1, 223
+ addi 12, 9, -1
+ mtctr 5 # partial block len
+__Write_partial:
+ lbzu 22, 1(10)
+ stbu 22, 1(12)
+ bdnz __Write_partial
+
+ cmpdi 24, 0 # decrypt?
+ bne __Encrypt_partial
+ xxlor 32+15, 11, 11 # decrypt using the input block
+__Encrypt_partial:
+ #vxor 15, 15, 0 # ^ previous hash
+ #PPC_GHASH1x 0, 15
+
+ add 14, 14, 5
+ add 9, 9, 5
+ std 5, 56(7) # update partial
+ sub 11, 11, 5
+ li 5, 0 # done last byte
+
+ #
+ # Don't increase IV since this is the last partial.
+ # It should get updated in gcm_update if no more data blocks.
+ #vadduwm 30, 30, 31 # increase IV
+ stxvb16x 32+30, 0, 7 # update IV
+ li 10, 64
+ stxvb16x 32+0, 0, 8 # Update X1
+ stxvb16x 32+15, 10, 7 # Update pblock
+ blr
+SYM_FUNC_END(__Process_partial)
+
+################################################################################
+# Combine partial blocks and ghash when we come here.
+#
+# The partial block has to be shifted to the right location to encrypt/decrypt
+# and compute ghash if combing the previous partial block is needed.
+# - Compute ghash for a full block. Clear Partial_len and pblock. Update IV.
+# Write Xi.
+# - Don't compute ghash if not full block. gcm_update will take care of it
+# is the last block. Update Partial_len and pblock.
+#
+################################################################################
+SYM_FUNC_START_LOCAL(__Combine_partial)
+
+ ld 12, 56(7)
+ mr 21, 5 # these bytes to be processed
+
+ li 17, 0
+ li 16, 16
+ sub 22, 16, 12 # bytes to complete a block
+ sub 17, 22, 5 # remaining bytes in a block
+ cmpdi 5, 16
+ ble __Inp_msg_less16
+ li 17, 0
+ mr 21, 22
+ b __Combine_continue
+__Inp_msg_less16:
+ cmpd 22, 5
+ bgt __Combine_continue
+ li 17, 0
+ mr 21, 22 # these bytes to be processed
+
+__Combine_continue:
+ # load msg and shift to the proper location and mask
+ vspltisb 16, -1
+ sldi 15, 12, 3
+ mtvsrdd 32+17, 0, 15
+ vslo 16, 16, 17
+ vsro 16, 16, 17
+ sldi 15, 17, 3
+ mtvsrdd 32+17, 0, 15
+ vsro 16, 16, 17
+ vslo 16, 16, 17 # mask
+
+ lxvb16x 32+19, 0, 14 # load partial block
+ sldi 15, 12, 3
+ mtvsrdd 32+17, 0, 15
+ vsro 19, 19, 17 # 0x00..xxxx??..??
+ sldi 15, 17, 3
+ mtvsrdd 32+17, 0, 15
+ vsro 19, 19, 17 # 0x00..xxxx
+ vslo 19, 19, 17 # shift back to form 0x00..xxxx00..00
+
+ # AES crypt partial
+ xxlxor 32+15, 32+30, 0
+ lwz 23, 240(6) # n rounds
+ addi 22, 23, -1 # loop - 1
+ mtctr 22
+ addi 10, 6, 16
+
+__Loop_aes_cpstate:
+ lxv 32+1, 0(10)
+ vcipher 15, 15, 1
+ addi 10, 10, 16
+ bdnz __Loop_aes_cpstate
+ lxv 32+1, 0(10) # last round key
+ vcipherlast 15, 15, 1
+
+ vxor 15, 15, 19
+ vand 15, 15, 16
+
+ # AES crypt output v15
+ # Write partial
+ li 10, 224
+ stxvb16x 15+32, 10, 1 # write v15 to stack
+ addi 10, 1, 223
+ add 10, 10, 12 # add offset
+ addi 15, 9, -1
+ mtctr 21 # partial block len
+__Write_combine_partial:
+ lbzu 22, 1(10)
+ stbu 22, 1(15)
+ bdnz __Write_combine_partial
+
+ add 14, 14, 21
+ add 11, 11, 21
+ add 9, 9, 21
+ sub 5, 5, 21
+
+ # Encrypt/Decrypt?
+ cmpdi 24, 0 # decrypt?
+ bne __Encrypt_combine_partial
+ vmr 15, 19 # decrypt using the input block
+
+__Encrypt_combine_partial:
+ #
+ # Update partial flag and combine ghash.
+__Update_partial_ghash:
+ li 10, 64
+ lxvb16x 32+17, 10, 7 # load previous pblock
+ add 12, 12, 21 # combined pprocessed
+ vxor 15, 15, 17 # combined pblock
+
+ cmpdi 12, 16
+ beq __Clear_partial_flag
+ std 12, 56(7) # update partial len
+ stxvb16x 32+15, 10, 7 # Update current pblock
+ blr
+
+__Clear_partial_flag:
+ li 12, 0
+ std 12, 56(7)
+ # Update IV and ghash here
+ vadduwm 30, 30, 31 # increase IV
+ stxvb16x 32+30, 0, 7 # update IV
+
+ # v15 either is either (input blockor encrypted)^(AES state)
+ vxor 15, 15, 0
+ PPC_GHASH1x 0, 15
+ stxvb16x 32+0, 10, 7 # update pblock for debug?
+ stxvb16x 32+0, 0, 8 # update Xi
+ blr
+SYM_FUNC_END(__Combine_partial)
+
+################################################################################
+# gcm_update(iv, Xi) - compute last hash
+#
+################################################################################
+SYM_FUNC_START(gcm_update)
+
+ ld 10, 56(3)
+ cmpdi 10, 0
+ beq __no_update
+
+ lxvb16x 32, 0, 4 # load Xi
+ # load Hash - h^4, h^3, h^2, h
+ li 10, 32
+ lxvd2x 2+32, 10, 4 # H Poli
+ li 10, 48
+ lxvd2x 3+32, 10, 4 # Hl
+ li 10, 64
+ lxvd2x 4+32, 10, 4 # H
+ li 10, 80
+ lxvd2x 5+32, 10, 4 # Hh
+
+ addis 11, 2, permx@toc@ha
+ addi 11, 11, permx@toc@l
+ lxv 10, 0(11) # vs10: vpermxor vector
+
+ li 9, 64
+ lxvb16x 32+6, 9, 3 # load pblock
+ vxor 6, 6, 0
+
+ vxor 1, 1, 1
+ vpmsumd 12, 3, 6 # L
+ vpmsumd 13, 4, 6 # M
+ vpmsumd 14, 5, 6 # H
+ vpmsumd 17, 12, 2 # reduction
+ vsldoi 15, 13, 1, 8 # mL
+ vsldoi 16, 1, 13, 8 # mH
+ vxor 12, 12, 15 # LL + LL
+ vxor 14, 14, 16 # HH + HH
+ xxlor 32+15, 10, 10
+ vpermxor 12, 12, 17, 15
+ vsldoi 13, 12, 12, 8 # swap
+ vpmsumd 12, 12, 2 # reduction
+ vxor 13, 13, 14
+ vxor 7, 12, 13
+
+ #vxor 0, 0, 0
+ #stxvb16x 32+0, 9, 3
+ li 10, 0
+ std 10, 56(3)
+ stxvb16x 32+7, 0, 4
+
+__no_update:
+ blr
+SYM_FUNC_END(gcm_update)
+
+################################################################################
+# aes_p10_gcm_encrypt (const void *inp, void *out, size_t len,
+# const char *rk, unsigned char iv[16], void *Xip);
+#
+# r3 - inp
+# r4 - out
+# r5 - len
+# r6 - AES round keys
+# r7 - iv and other data
+# r8 - Xi, HPoli, hash keys
+#
+# rounds is at offset 240 in rk
+# Xi is at 0 in gcm_table (Xip).
+#
+################################################################################
+SYM_FUNC_START(aes_p10_gcm_encrypt)
+
+ cmpdi 5, 0
+ ble __Invalid_msg_len
+
+ SAVE_REGS
+ LOAD_HASH_TABLE
+
+ # initialize ICB: GHASH( IV ), IV - r7
+ lxvb16x 30+32, 0, 7 # load IV - v30
+
+ mr 14, 3
+ mr 9, 4
+
+ # counter 1
+ vxor 31, 31, 31
+ vspltisb 22, 1
+ vsldoi 31, 31, 22,1 # counter 1
+
+ addis 11, 2, permx@toc@ha
+ addi 11, 11, permx@toc@l
+ lxv 10, 0(11) # vs10: vpermxor vector
+ li 11, 0
+
+ # load 9 round keys to VSR
+ lxv 0, 0(6) # round key 0
+ lxv 1, 16(6) # round key 1
+ lxv 2, 32(6) # round key 2
+ lxv 3, 48(6) # round key 3
+ lxv 4, 64(6) # round key 4
+ lxv 5, 80(6) # round key 5
+ lxv 6, 96(6) # round key 6
+ lxv 7, 112(6) # round key 7
+ lxv 8, 128(6) # round key 8
+
+ # load rounds - 10 (128), 12 (192), 14 (256)
+ lwz 23, 240(6) # n rounds
+ li 24, 1 # encrypt
+
+__Process_encrypt:
+ #
+ # Process different blocks
+ #
+ ld 12, 56(7)
+ cmpdi 12, 0
+ bgt __Do_combine_enc
+ cmpdi 5, 128
+ blt __Process_more_enc
+
+#
+# Process 8x AES/GCM blocks
+#
+__Process_8x_enc:
+ # 8x blcoks
+ li 10, 128
+ divdu 12, 5, 10 # n 128 bytes-blocks
+
+ addi 12, 12, -1 # loop - 1
+
+ vmr 15, 30 # first state: IV
+ vadduwm 16, 15, 31 # state + counter
+ vadduwm 17, 16, 31
+ vadduwm 18, 17, 31
+ vadduwm 19, 18, 31
+ vadduwm 20, 19, 31
+ vadduwm 21, 20, 31
+ vadduwm 22, 21, 31
+ xxlor 9, 32+22, 32+22 # save last state
+
+ # vxor state, state, w # addroundkey
+ xxlor 32+29, 0, 0
+ vxor 15, 15, 29 # IV + round key - add round key 0
+ vxor 16, 16, 29
+ vxor 17, 17, 29
+ vxor 18, 18, 29
+ vxor 19, 19, 29
+ vxor 20, 20, 29
+ vxor 21, 21, 29
+ vxor 22, 22, 29
+
+ li 15, 16
+ li 16, 32
+ li 17, 48
+ li 18, 64
+ li 19, 80
+ li 20, 96
+ li 21, 112
+
+ #
+ # Pre-compute first 8 AES state and leave 1/3/5 more rounds
+ # for the loop.
+ #
+ addi 22, 23, -9 # process 8 keys
+ mtctr 22 # AES key loop
+ addi 10, 6, 144
+
+ LOOP_8AES_STATE # process 8 AES keys
+
+__PreLoop_aes_state:
+ lxv 32+1, 0(10) # round key
+ AES_CIPHER_8x vcipher 15 1
+ addi 10, 10, 16
+ bdnz __PreLoop_aes_state
+ lxv 32+1, 0(10) # last round key (v1)
+
+ cmpdi 12, 0 # Only one loop (8 block)
+ beq __Finish_ghash
+
+#
+# Loop 8x blocks and compute ghash
+#
+__Loop_8x_block_enc:
+ vcipherlast 15, 15, 1
+ vcipherlast 16, 16, 1
+ vcipherlast 17, 17, 1
+ vcipherlast 18, 18, 1
+ vcipherlast 19, 19, 1
+ vcipherlast 20, 20, 1
+ vcipherlast 21, 21, 1
+ vcipherlast 22, 22, 1
+
+ lxvb16x 32+23, 0, 14 # load block
+ lxvb16x 32+24, 15, 14 # load block
+ lxvb16x 32+25, 16, 14 # load block
+ lxvb16x 32+26, 17, 14 # load block
+ lxvb16x 32+27, 18, 14 # load block
+ lxvb16x 32+28, 19, 14 # load block
+ lxvb16x 32+29, 20, 14 # load block
+ lxvb16x 32+30, 21, 14 # load block
+ addi 14, 14, 128
+
+ vxor 15, 15, 23
+ vxor 16, 16, 24
+ vxor 17, 17, 25
+ vxor 18, 18, 26
+ vxor 19, 19, 27
+ vxor 20, 20, 28
+ vxor 21, 21, 29
+ vxor 22, 22, 30
+
+ stxvb16x 47, 0, 9 # store output
+ stxvb16x 48, 15, 9 # store output
+ stxvb16x 49, 16, 9 # store output
+ stxvb16x 50, 17, 9 # store output
+ stxvb16x 51, 18, 9 # store output
+ stxvb16x 52, 19, 9 # store output
+ stxvb16x 53, 20, 9 # store output
+ stxvb16x 54, 21, 9 # store output
+ addi 9, 9, 128
+
+ # ghash here
+ vxor 15, 15, 0
+ PPC_GHASH4x 0, 15, 16, 17, 18
+
+ vxor 19, 19, 0
+ PPC_GHASH4x 0, 19, 20, 21, 22
+
+ xxlor 32+15, 9, 9 # last state
+ vadduwm 15, 15, 31 # state + counter
+ vadduwm 16, 15, 31
+ vadduwm 17, 16, 31
+ vadduwm 18, 17, 31
+ vadduwm 19, 18, 31
+ vadduwm 20, 19, 31
+ vadduwm 21, 20, 31
+ vadduwm 22, 21, 31
+ xxlor 9, 32+22, 32+22 # save last state
+
+ xxlor 32+27, 0, 0 # restore roundkey 0
+ vxor 15, 15, 27 # IV + round key - add round key 0
+ vxor 16, 16, 27
+ vxor 17, 17, 27
+ vxor 18, 18, 27
+ vxor 19, 19, 27
+ vxor 20, 20, 27
+ vxor 21, 21, 27
+ vxor 22, 22, 27
+
+ addi 5, 5, -128
+ addi 11, 11, 128
+
+ LOOP_8AES_STATE # process 8 AES keys
+ mtctr 22 # AES key loop
+ addi 10, 6, 144
+__LastLoop_aes_state:
+ lxv 32+1, 0(10) # round key
+ AES_CIPHER_8x vcipher 15 1
+ addi 10, 10, 16
+ bdnz __LastLoop_aes_state
+ lxv 32+1, 0(10) # last round key (v1)
+
+ addi 12, 12, -1
+ cmpdi 12, 0
+ bne __Loop_8x_block_enc
+
+__Finish_ghash:
+ vcipherlast 15, 15, 1
+ vcipherlast 16, 16, 1
+ vcipherlast 17, 17, 1
+ vcipherlast 18, 18, 1
+ vcipherlast 19, 19, 1
+ vcipherlast 20, 20, 1
+ vcipherlast 21, 21, 1
+ vcipherlast 22, 22, 1
+
+ lxvb16x 32+23, 0, 14 # load block
+ lxvb16x 32+24, 15, 14 # load block
+ lxvb16x 32+25, 16, 14 # load block
+ lxvb16x 32+26, 17, 14 # load block
+ lxvb16x 32+27, 18, 14 # load block
+ lxvb16x 32+28, 19, 14 # load block
+ lxvb16x 32+29, 20, 14 # load block
+ lxvb16x 32+30, 21, 14 # load block
+ addi 14, 14, 128
+
+ vxor 15, 15, 23
+ vxor 16, 16, 24
+ vxor 17, 17, 25
+ vxor 18, 18, 26
+ vxor 19, 19, 27
+ vxor 20, 20, 28
+ vxor 21, 21, 29
+ vxor 22, 22, 30
+
+ stxvb16x 47, 0, 9 # store output
+ stxvb16x 48, 15, 9 # store output
+ stxvb16x 49, 16, 9 # store output
+ stxvb16x 50, 17, 9 # store output
+ stxvb16x 51, 18, 9 # store output
+ stxvb16x 52, 19, 9 # store output
+ stxvb16x 53, 20, 9 # store output
+ stxvb16x 54, 21, 9 # store output
+ addi 9, 9, 128
+
+ vxor 15, 15, 0
+ PPC_GHASH4x 0, 15, 16, 17, 18
+
+ vxor 19, 19, 0
+ PPC_GHASH4x 0, 19, 20, 21, 22
+
+ xxlor 30+32, 9, 9 # last ctr
+ vadduwm 30, 30, 31 # increase ctr
+ stxvb16x 32+30, 0, 7 # update IV
+ stxvb16x 32+0, 0, 8 # update Xi
+
+ addi 5, 5, -128
+ addi 11, 11, 128
+
+ #
+ # Done 8x blocks
+ #
+
+ cmpdi 5, 0
+ beq aes_gcm_out
+
+__Process_more_enc:
+ li 24, 1 # encrypt
+ bl aes_gcm_crypt_1x
+ cmpdi 5, 0
+ beq aes_gcm_out
+
+ bl __Process_partial
+ cmpdi 5, 0
+ beq aes_gcm_out
+__Do_combine_enc:
+ bl __Combine_partial
+ cmpdi 5, 0
+ bgt __Process_encrypt
+ b aes_gcm_out
+
+SYM_FUNC_END(aes_p10_gcm_encrypt)
+
+################################################################################
+# aes_p10_gcm_decrypt (const void *inp, void *out, size_t len,
+# const char *rk, unsigned char iv[16], void *Xip);
+# 8x Decrypt
+#
+################################################################################
+SYM_FUNC_START(aes_p10_gcm_decrypt)
+
+ cmpdi 5, 0
+ ble __Invalid_msg_len
+
+ SAVE_REGS
+ LOAD_HASH_TABLE
+
+ # initialize ICB: GHASH( IV ), IV - r7
+ lxvb16x 30+32, 0, 7 # load IV - v30
+
+ mr 14, 3
+ mr 9, 4
+
+ # counter 1
+ vxor 31, 31, 31
+ vspltisb 22, 1
+ vsldoi 31, 31, 22,1 # counter 1
+
+ addis 11, 2, permx@toc@ha
+ addi 11, 11, permx@toc@l
+ lxv 10, 0(11) # vs10: vpermxor vector
+ li 11, 0
+
+ # load 9 round keys to VSR
+ lxv 0, 0(6) # round key 0
+ lxv 1, 16(6) # round key 1
+ lxv 2, 32(6) # round key 2
+ lxv 3, 48(6) # round key 3
+ lxv 4, 64(6) # round key 4
+ lxv 5, 80(6) # round key 5
+ lxv 6, 96(6) # round key 6
+ lxv 7, 112(6) # round key 7
+ lxv 8, 128(6) # round key 8
+
+ # load rounds - 10 (128), 12 (192), 14 (256)
+ lwz 23, 240(6) # n rounds
+ li 24, 0 # decrypt
+
+__Process_decrypt:
+ #
+ # Process different blocks
+ #
+ ld 12, 56(7)
+ cmpdi 12, 0
+ bgt __Do_combine_dec
+ cmpdi 5, 128
+ blt __Process_more_dec
+
+#
+# Process 8x AES/GCM blocks
+#
+__Process_8x_dec:
+ # 8x blcoks
+ li 10, 128
+ divdu 12, 5, 10 # n 128 bytes-blocks
+
+ addi 12, 12, -1 # loop - 1
+
+ vmr 15, 30 # first state: IV
+ vadduwm 16, 15, 31 # state + counter
+ vadduwm 17, 16, 31
+ vadduwm 18, 17, 31
+ vadduwm 19, 18, 31
+ vadduwm 20, 19, 31
+ vadduwm 21, 20, 31
+ vadduwm 22, 21, 31
+ xxlor 9, 32+22, 32+22 # save last state
+
+ # vxor state, state, w # addroundkey
+ xxlor 32+29, 0, 0
+ vxor 15, 15, 29 # IV + round key - add round key 0
+ vxor 16, 16, 29
+ vxor 17, 17, 29
+ vxor 18, 18, 29
+ vxor 19, 19, 29
+ vxor 20, 20, 29
+ vxor 21, 21, 29
+ vxor 22, 22, 29
+
+ li 15, 16
+ li 16, 32
+ li 17, 48
+ li 18, 64
+ li 19, 80
+ li 20, 96
+ li 21, 112
+
+ #
+ # Pre-compute first 8 AES state and leave 1/3/5 more rounds
+ # for the loop.
+ #
+ addi 22, 23, -9 # process 8 keys
+ mtctr 22 # AES key loop
+ addi 10, 6, 144
+
+ LOOP_8AES_STATE # process 8 AES keys
+
+__PreLoop_aes_state_dec:
+ lxv 32+1, 0(10) # round key
+ AES_CIPHER_8x vcipher 15 1
+ addi 10, 10, 16
+ bdnz __PreLoop_aes_state_dec
+ lxv 32+1, 0(10) # last round key (v1)
+
+ cmpdi 12, 0 # Only one loop (8 block)
+ beq __Finish_ghash_dec
+
+#
+# Loop 8x blocks and compute ghash
+#
+__Loop_8x_block_dec:
+ vcipherlast 15, 15, 1
+ vcipherlast 16, 16, 1
+ vcipherlast 17, 17, 1
+ vcipherlast 18, 18, 1
+ vcipherlast 19, 19, 1
+ vcipherlast 20, 20, 1
+ vcipherlast 21, 21, 1
+ vcipherlast 22, 22, 1
+
+ lxvb16x 32+23, 0, 14 # load block
+ lxvb16x 32+24, 15, 14 # load block
+ lxvb16x 32+25, 16, 14 # load block
+ lxvb16x 32+26, 17, 14 # load block
+ lxvb16x 32+27, 18, 14 # load block
+ lxvb16x 32+28, 19, 14 # load block
+ lxvb16x 32+29, 20, 14 # load block
+ lxvb16x 32+30, 21, 14 # load block
+ addi 14, 14, 128
+
+ vxor 15, 15, 23
+ vxor 16, 16, 24
+ vxor 17, 17, 25
+ vxor 18, 18, 26
+ vxor 19, 19, 27
+ vxor 20, 20, 28
+ vxor 21, 21, 29
+ vxor 22, 22, 30
+
+ stxvb16x 47, 0, 9 # store output
+ stxvb16x 48, 15, 9 # store output
+ stxvb16x 49, 16, 9 # store output
+ stxvb16x 50, 17, 9 # store output
+ stxvb16x 51, 18, 9 # store output
+ stxvb16x 52, 19, 9 # store output
+ stxvb16x 53, 20, 9 # store output
+ stxvb16x 54, 21, 9 # store output
+
+ addi 9, 9, 128
+
+ vmr 15, 23
+ vmr 16, 24
+ vmr 17, 25
+ vmr 18, 26
+ vmr 19, 27
+ vmr 20, 28
+ vmr 21, 29
+ vmr 22, 30
+
+ # ghash here
+ vxor 15, 15, 0
+ PPC_GHASH4x 0, 15, 16, 17, 18
+
+ vxor 19, 19, 0
+ PPC_GHASH4x 0, 19, 20, 21, 22
+
+ xxlor 32+15, 9, 9 # last state
+ vadduwm 15, 15, 31 # state + counter
+ vadduwm 16, 15, 31
+ vadduwm 17, 16, 31
+ vadduwm 18, 17, 31
+ vadduwm 19, 18, 31
+ vadduwm 20, 19, 31
+ vadduwm 21, 20, 31
+ vadduwm 22, 21, 31
+ xxlor 9, 32+22, 32+22 # save last state
+
+ xxlor 32+27, 0, 0 # restore roundkey 0
+ vxor 15, 15, 27 # IV + round key - add round key 0
+ vxor 16, 16, 27
+ vxor 17, 17, 27
+ vxor 18, 18, 27
+ vxor 19, 19, 27
+ vxor 20, 20, 27
+ vxor 21, 21, 27
+ vxor 22, 22, 27
+
+ addi 5, 5, -128
+ addi 11, 11, 128
+
+ LOOP_8AES_STATE # process 8 AES keys
+ mtctr 22 # AES key loop
+ addi 10, 6, 144
+__LastLoop_aes_state_dec:
+ lxv 32+1, 0(10) # round key
+ AES_CIPHER_8x vcipher 15 1
+ addi 10, 10, 16
+ bdnz __LastLoop_aes_state_dec
+ lxv 32+1, 0(10) # last round key (v1)
+
+ addi 12, 12, -1
+ cmpdi 12, 0
+ bne __Loop_8x_block_dec
+
+__Finish_ghash_dec:
+ vcipherlast 15, 15, 1
+ vcipherlast 16, 16, 1
+ vcipherlast 17, 17, 1
+ vcipherlast 18, 18, 1
+ vcipherlast 19, 19, 1
+ vcipherlast 20, 20, 1
+ vcipherlast 21, 21, 1
+ vcipherlast 22, 22, 1
+
+ lxvb16x 32+23, 0, 14 # load block
+ lxvb16x 32+24, 15, 14 # load block
+ lxvb16x 32+25, 16, 14 # load block
+ lxvb16x 32+26, 17, 14 # load block
+ lxvb16x 32+27, 18, 14 # load block
+ lxvb16x 32+28, 19, 14 # load block
+ lxvb16x 32+29, 20, 14 # load block
+ lxvb16x 32+30, 21, 14 # load block
+ addi 14, 14, 128
+
+ vxor 15, 15, 23
+ vxor 16, 16, 24
+ vxor 17, 17, 25
+ vxor 18, 18, 26
+ vxor 19, 19, 27
+ vxor 20, 20, 28
+ vxor 21, 21, 29
+ vxor 22, 22, 30
+
+ stxvb16x 47, 0, 9 # store output
+ stxvb16x 48, 15, 9 # store output
+ stxvb16x 49, 16, 9 # store output
+ stxvb16x 50, 17, 9 # store output
+ stxvb16x 51, 18, 9 # store output
+ stxvb16x 52, 19, 9 # store output
+ stxvb16x 53, 20, 9 # store output
+ stxvb16x 54, 21, 9 # store output
+ addi 9, 9, 128
+
+ #vmr 15, 23
+ vxor 15, 23, 0
+ vmr 16, 24
+ vmr 17, 25
+ vmr 18, 26
+ vmr 19, 27
+ vmr 20, 28
+ vmr 21, 29
+ vmr 22, 30
+
+ #vxor 15, 15, 0
+ PPC_GHASH4x 0, 15, 16, 17, 18
+
+ vxor 19, 19, 0
+ PPC_GHASH4x 0, 19, 20, 21, 22
+
+ xxlor 30+32, 9, 9 # last ctr
+ vadduwm 30, 30, 31 # increase ctr
+ stxvb16x 32+30, 0, 7 # update IV
+ stxvb16x 32+0, 0, 8 # update Xi
+
+ addi 5, 5, -128
+ addi 11, 11, 128
+
+ #
+ # Done 8x blocks
+ #
+
+ cmpdi 5, 0
+ beq aes_gcm_out
+
+__Process_more_dec:
+ li 24, 0 # decrypt
+ bl aes_gcm_crypt_1x
+ cmpdi 5, 0
+ beq aes_gcm_out
+
+ bl __Process_partial
+ cmpdi 5, 0
+ beq aes_gcm_out
+__Do_combine_dec:
+ bl __Combine_partial
+ cmpdi 5, 0
+ bgt __Process_decrypt
+ b aes_gcm_out
+SYM_FUNC_END(aes_p10_gcm_decrypt)
+
+SYM_FUNC_START_LOCAL(aes_gcm_out)
+
+ mr 3, 11 # return count
+
+ RESTORE_REGS
+ blr
+
+__Invalid_msg_len:
+ li 3, 0
+ blr
+SYM_FUNC_END(aes_gcm_out)
+
+SYM_DATA_START_LOCAL(PERMX)
+.align 4
+# for vector permute and xor
+permx:
+.long 0x4c5d6e7f, 0x08192a3b, 0xc4d5e6f7, 0x8091a2b3
+SYM_DATA_END(permx)
diff --git a/arch/powerpc/crypto/aes-spe-core.S b/arch/powerpc/crypto/aes-spe-core.S
new file mode 100644
index 000000000000..8e00eccc352b
--- /dev/null
+++ b/arch/powerpc/crypto/aes-spe-core.S
@@ -0,0 +1,346 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Fast AES implementation for SPE instruction set (PPC)
+ *
+ * This code makes use of the SPE SIMD instruction set as defined in
+ * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf
+ * Implementation is based on optimization guide notes from
+ * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ */
+
+#include <asm/ppc_asm.h>
+#include "aes-spe-regs.h"
+
+#define EAD(in, bpos) \
+ rlwimi rT0,in,28-((bpos+3)%4)*8,20,27;
+
+#define DAD(in, bpos) \
+ rlwimi rT1,in,24-((bpos+3)%4)*8,24,31;
+
+#define LWH(out, off) \
+ evlwwsplat out,off(rT0); /* load word high */
+
+#define LWL(out, off) \
+ lwz out,off(rT0); /* load word low */
+
+#define LBZ(out, tab, off) \
+ lbz out,off(tab); /* load byte */
+
+#define LAH(out, in, bpos, off) \
+ EAD(in, bpos) /* calc addr + load word high */ \
+ LWH(out, off)
+
+#define LAL(out, in, bpos, off) \
+ EAD(in, bpos) /* calc addr + load word low */ \
+ LWL(out, off)
+
+#define LAE(out, in, bpos) \
+ EAD(in, bpos) /* calc addr + load enc byte */ \
+ LBZ(out, rT0, 8)
+
+#define LBE(out) \
+ LBZ(out, rT0, 8) /* load enc byte */
+
+#define LAD(out, in, bpos) \
+ DAD(in, bpos) /* calc addr + load dec byte */ \
+ LBZ(out, rT1, 0)
+
+#define LBD(out) \
+ LBZ(out, rT1, 0)
+
+/*
+ * ppc_encrypt_block: The central encryption function for a single 16 bytes
+ * block. It does no stack handling or register saving to support fast calls
+ * via bl/blr. It expects that caller has pre-xored input data with first
+ * 4 words of encryption key into rD0-rD3. Pointer/counter registers must
+ * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3
+ * and rW0-rW3 and caller must execute a final xor on the output registers.
+ * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing.
+ *
+ */
+_GLOBAL(ppc_encrypt_block)
+ LAH(rW4, rD1, 2, 4)
+ LAH(rW6, rD0, 3, 0)
+ LAH(rW3, rD0, 1, 8)
+ppc_encrypt_block_loop:
+ LAH(rW0, rD3, 0, 12)
+ LAL(rW0, rD0, 0, 12)
+ LAH(rW1, rD1, 0, 12)
+ LAH(rW2, rD2, 1, 8)
+ LAL(rW2, rD3, 1, 8)
+ LAL(rW3, rD1, 1, 8)
+ LAL(rW4, rD2, 2, 4)
+ LAL(rW6, rD1, 3, 0)
+ LAH(rW5, rD3, 2, 4)
+ LAL(rW5, rD0, 2, 4)
+ LAH(rW7, rD2, 3, 0)
+ evldw rD1,16(rKP)
+ EAD(rD3, 3)
+ evxor rW2,rW2,rW4
+ LWL(rW7, 0)
+ evxor rW2,rW2,rW6
+ EAD(rD2, 0)
+ evxor rD1,rD1,rW2
+ LWL(rW1, 12)
+ evxor rD1,rD1,rW0
+ evldw rD3,24(rKP)
+ evmergehi rD0,rD0,rD1
+ EAD(rD1, 2)
+ evxor rW3,rW3,rW5
+ LWH(rW4, 4)
+ evxor rW3,rW3,rW7
+ EAD(rD0, 3)
+ evxor rD3,rD3,rW3
+ LWH(rW6, 0)
+ evxor rD3,rD3,rW1
+ EAD(rD0, 1)
+ evmergehi rD2,rD2,rD3
+ LWH(rW3, 8)
+ LAH(rW0, rD3, 0, 12)
+ LAL(rW0, rD0, 0, 12)
+ LAH(rW1, rD1, 0, 12)
+ LAH(rW2, rD2, 1, 8)
+ LAL(rW2, rD3, 1, 8)
+ LAL(rW3, rD1, 1, 8)
+ LAL(rW4, rD2, 2, 4)
+ LAL(rW6, rD1, 3, 0)
+ LAH(rW5, rD3, 2, 4)
+ LAL(rW5, rD0, 2, 4)
+ LAH(rW7, rD2, 3, 0)
+ evldw rD1,32(rKP)
+ EAD(rD3, 3)
+ evxor rW2,rW2,rW4
+ LWL(rW7, 0)
+ evxor rW2,rW2,rW6
+ EAD(rD2, 0)
+ evxor rD1,rD1,rW2
+ LWL(rW1, 12)
+ evxor rD1,rD1,rW0
+ evldw rD3,40(rKP)
+ evmergehi rD0,rD0,rD1
+ EAD(rD1, 2)
+ evxor rW3,rW3,rW5
+ LWH(rW4, 4)
+ evxor rW3,rW3,rW7
+ EAD(rD0, 3)
+ evxor rD3,rD3,rW3
+ LWH(rW6, 0)
+ evxor rD3,rD3,rW1
+ EAD(rD0, 1)
+ evmergehi rD2,rD2,rD3
+ LWH(rW3, 8)
+ addi rKP,rKP,32
+ bdnz ppc_encrypt_block_loop
+ LAH(rW0, rD3, 0, 12)
+ LAL(rW0, rD0, 0, 12)
+ LAH(rW1, rD1, 0, 12)
+ LAH(rW2, rD2, 1, 8)
+ LAL(rW2, rD3, 1, 8)
+ LAL(rW3, rD1, 1, 8)
+ LAL(rW4, rD2, 2, 4)
+ LAH(rW5, rD3, 2, 4)
+ LAL(rW6, rD1, 3, 0)
+ LAL(rW5, rD0, 2, 4)
+ LAH(rW7, rD2, 3, 0)
+ evldw rD1,16(rKP)
+ EAD(rD3, 3)
+ evxor rW2,rW2,rW4
+ LWL(rW7, 0)
+ evxor rW2,rW2,rW6
+ EAD(rD2, 0)
+ evxor rD1,rD1,rW2
+ LWL(rW1, 12)
+ evxor rD1,rD1,rW0
+ evldw rD3,24(rKP)
+ evmergehi rD0,rD0,rD1
+ EAD(rD1, 0)
+ evxor rW3,rW3,rW5
+ LBE(rW2)
+ evxor rW3,rW3,rW7
+ EAD(rD0, 1)
+ evxor rD3,rD3,rW3
+ LBE(rW6)
+ evxor rD3,rD3,rW1
+ EAD(rD0, 0)
+ evmergehi rD2,rD2,rD3
+ LBE(rW1)
+ LAE(rW0, rD3, 0)
+ LAE(rW1, rD0, 0)
+ LAE(rW4, rD2, 1)
+ LAE(rW5, rD3, 1)
+ LAE(rW3, rD2, 0)
+ LAE(rW7, rD1, 1)
+ rlwimi rW0,rW4,8,16,23
+ rlwimi rW1,rW5,8,16,23
+ LAE(rW4, rD1, 2)
+ LAE(rW5, rD2, 2)
+ rlwimi rW2,rW6,8,16,23
+ rlwimi rW3,rW7,8,16,23
+ LAE(rW6, rD3, 2)
+ LAE(rW7, rD0, 2)
+ rlwimi rW0,rW4,16,8,15
+ rlwimi rW1,rW5,16,8,15
+ LAE(rW4, rD0, 3)
+ LAE(rW5, rD1, 3)
+ rlwimi rW2,rW6,16,8,15
+ lwz rD0,32(rKP)
+ rlwimi rW3,rW7,16,8,15
+ lwz rD1,36(rKP)
+ LAE(rW6, rD2, 3)
+ LAE(rW7, rD3, 3)
+ rlwimi rW0,rW4,24,0,7
+ lwz rD2,40(rKP)
+ rlwimi rW1,rW5,24,0,7
+ lwz rD3,44(rKP)
+ rlwimi rW2,rW6,24,0,7
+ rlwimi rW3,rW7,24,0,7
+ blr
+
+/*
+ * ppc_decrypt_block: The central decryption function for a single 16 bytes
+ * block. It does no stack handling or register saving to support fast calls
+ * via bl/blr. It expects that caller has pre-xored input data with first
+ * 4 words of encryption key into rD0-rD3. Pointer/counter registers must
+ * have also been set up before (rT0, rKP, CTR). Output is stored in rD0-rD3
+ * and rW0-rW3 and caller must execute a final xor on the output registers.
+ * All working registers rD0-rD3 & rW0-rW7 are overwritten during processing.
+ *
+ */
+_GLOBAL(ppc_decrypt_block)
+ LAH(rW0, rD1, 0, 12)
+ LAH(rW6, rD0, 3, 0)
+ LAH(rW3, rD0, 1, 8)
+ppc_decrypt_block_loop:
+ LAH(rW1, rD3, 0, 12)
+ LAL(rW0, rD2, 0, 12)
+ LAH(rW2, rD2, 1, 8)
+ LAL(rW2, rD3, 1, 8)
+ LAH(rW4, rD3, 2, 4)
+ LAL(rW4, rD0, 2, 4)
+ LAL(rW6, rD1, 3, 0)
+ LAH(rW5, rD1, 2, 4)
+ LAH(rW7, rD2, 3, 0)
+ LAL(rW7, rD3, 3, 0)
+ LAL(rW3, rD1, 1, 8)
+ evldw rD1,16(rKP)
+ EAD(rD0, 0)
+ evxor rW4,rW4,rW6
+ LWL(rW1, 12)
+ evxor rW0,rW0,rW4
+ EAD(rD2, 2)
+ evxor rW0,rW0,rW2
+ LWL(rW5, 4)
+ evxor rD1,rD1,rW0
+ evldw rD3,24(rKP)
+ evmergehi rD0,rD0,rD1
+ EAD(rD1, 0)
+ evxor rW3,rW3,rW7
+ LWH(rW0, 12)
+ evxor rW3,rW3,rW1
+ EAD(rD0, 3)
+ evxor rD3,rD3,rW3
+ LWH(rW6, 0)
+ evxor rD3,rD3,rW5
+ EAD(rD0, 1)
+ evmergehi rD2,rD2,rD3
+ LWH(rW3, 8)
+ LAH(rW1, rD3, 0, 12)
+ LAL(rW0, rD2, 0, 12)
+ LAH(rW2, rD2, 1, 8)
+ LAL(rW2, rD3, 1, 8)
+ LAH(rW4, rD3, 2, 4)
+ LAL(rW4, rD0, 2, 4)
+ LAL(rW6, rD1, 3, 0)
+ LAH(rW5, rD1, 2, 4)
+ LAH(rW7, rD2, 3, 0)
+ LAL(rW7, rD3, 3, 0)
+ LAL(rW3, rD1, 1, 8)
+ evldw rD1,32(rKP)
+ EAD(rD0, 0)
+ evxor rW4,rW4,rW6
+ LWL(rW1, 12)
+ evxor rW0,rW0,rW4
+ EAD(rD2, 2)
+ evxor rW0,rW0,rW2
+ LWL(rW5, 4)
+ evxor rD1,rD1,rW0
+ evldw rD3,40(rKP)
+ evmergehi rD0,rD0,rD1
+ EAD(rD1, 0)
+ evxor rW3,rW3,rW7
+ LWH(rW0, 12)
+ evxor rW3,rW3,rW1
+ EAD(rD0, 3)
+ evxor rD3,rD3,rW3
+ LWH(rW6, 0)
+ evxor rD3,rD3,rW5
+ EAD(rD0, 1)
+ evmergehi rD2,rD2,rD3
+ LWH(rW3, 8)
+ addi rKP,rKP,32
+ bdnz ppc_decrypt_block_loop
+ LAH(rW1, rD3, 0, 12)
+ LAL(rW0, rD2, 0, 12)
+ LAH(rW2, rD2, 1, 8)
+ LAL(rW2, rD3, 1, 8)
+ LAH(rW4, rD3, 2, 4)
+ LAL(rW4, rD0, 2, 4)
+ LAL(rW6, rD1, 3, 0)
+ LAH(rW5, rD1, 2, 4)
+ LAH(rW7, rD2, 3, 0)
+ LAL(rW7, rD3, 3, 0)
+ LAL(rW3, rD1, 1, 8)
+ evldw rD1,16(rKP)
+ EAD(rD0, 0)
+ evxor rW4,rW4,rW6
+ LWL(rW1, 12)
+ evxor rW0,rW0,rW4
+ EAD(rD2, 2)
+ evxor rW0,rW0,rW2
+ LWL(rW5, 4)
+ evxor rD1,rD1,rW0
+ evldw rD3,24(rKP)
+ evmergehi rD0,rD0,rD1
+ DAD(rD1, 0)
+ evxor rW3,rW3,rW7
+ LBD(rW0)
+ evxor rW3,rW3,rW1
+ DAD(rD0, 1)
+ evxor rD3,rD3,rW3
+ LBD(rW6)
+ evxor rD3,rD3,rW5
+ DAD(rD0, 0)
+ evmergehi rD2,rD2,rD3
+ LBD(rW3)
+ LAD(rW2, rD3, 0)
+ LAD(rW1, rD2, 0)
+ LAD(rW4, rD2, 1)
+ LAD(rW5, rD3, 1)
+ LAD(rW7, rD1, 1)
+ rlwimi rW0,rW4,8,16,23
+ rlwimi rW1,rW5,8,16,23
+ LAD(rW4, rD3, 2)
+ LAD(rW5, rD0, 2)
+ rlwimi rW2,rW6,8,16,23
+ rlwimi rW3,rW7,8,16,23
+ LAD(rW6, rD1, 2)
+ LAD(rW7, rD2, 2)
+ rlwimi rW0,rW4,16,8,15
+ rlwimi rW1,rW5,16,8,15
+ LAD(rW4, rD0, 3)
+ LAD(rW5, rD1, 3)
+ rlwimi rW2,rW6,16,8,15
+ lwz rD0,32(rKP)
+ rlwimi rW3,rW7,16,8,15
+ lwz rD1,36(rKP)
+ LAD(rW6, rD2, 3)
+ LAD(rW7, rD3, 3)
+ rlwimi rW0,rW4,24,0,7
+ lwz rD2,40(rKP)
+ rlwimi rW1,rW5,24,0,7
+ lwz rD3,44(rKP)
+ rlwimi rW2,rW6,24,0,7
+ rlwimi rW3,rW7,24,0,7
+ blr
diff --git a/arch/powerpc/crypto/aes-spe-glue.c b/arch/powerpc/crypto/aes-spe-glue.c
new file mode 100644
index 000000000000..efab78a3a8f6
--- /dev/null
+++ b/arch/powerpc/crypto/aes-spe-glue.c
@@ -0,0 +1,522 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Glue code for AES implementation for SPE instructions (PPC)
+ *
+ * Based on generic implementation. The assembler module takes care
+ * about the SPE registers so it can run from interrupt context.
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ */
+
+#include <crypto/aes.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/crypto.h>
+#include <asm/byteorder.h>
+#include <asm/switch_to.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/xts.h>
+#include <crypto/gf128mul.h>
+#include <crypto/scatterwalk.h>
+
+/*
+ * MAX_BYTES defines the number of bytes that are allowed to be processed
+ * between preempt_disable() and preempt_enable(). e500 cores can issue two
+ * instructions per clock cycle using one 32/64 bit unit (SU1) and one 32
+ * bit unit (SU2). One of these can be a memory access that is executed via
+ * a single load and store unit (LSU). XTS-AES-256 takes ~780 operations per
+ * 16 byte block or 25 cycles per byte. Thus 768 bytes of input data
+ * will need an estimated maximum of 20,000 cycles. Headroom for cache misses
+ * included. Even with the low end model clocked at 667 MHz this equals to a
+ * critical time window of less than 30us. The value has been chosen to
+ * process a 512 byte disk block in one or a large 1400 bytes IPsec network
+ * packet in two runs.
+ *
+ */
+#define MAX_BYTES 768
+
+struct ppc_aes_ctx {
+ u32 key_enc[AES_MAX_KEYLENGTH_U32];
+ u32 key_dec[AES_MAX_KEYLENGTH_U32];
+ u32 rounds;
+};
+
+struct ppc_xts_ctx {
+ u32 key_enc[AES_MAX_KEYLENGTH_U32];
+ u32 key_dec[AES_MAX_KEYLENGTH_U32];
+ u32 key_twk[AES_MAX_KEYLENGTH_U32];
+ u32 rounds;
+};
+
+extern void ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc, u32 rounds);
+extern void ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec, u32 rounds);
+extern void ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
+ u32 bytes);
+extern void ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
+ u32 bytes);
+extern void ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
+ u32 bytes, u8 *iv);
+extern void ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
+ u32 bytes, u8 *iv);
+extern void ppc_crypt_ctr (u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
+ u32 bytes, u8 *iv);
+extern void ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc, u32 rounds,
+ u32 bytes, u8 *iv, u32 *key_twk);
+extern void ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec, u32 rounds,
+ u32 bytes, u8 *iv, u32 *key_twk);
+
+extern void ppc_expand_key_128(u32 *key_enc, const u8 *key);
+extern void ppc_expand_key_192(u32 *key_enc, const u8 *key);
+extern void ppc_expand_key_256(u32 *key_enc, const u8 *key);
+
+extern void ppc_generate_decrypt_key(u32 *key_dec,u32 *key_enc,
+ unsigned int key_len);
+
+static void spe_begin(void)
+{
+ /* disable preemption and save users SPE registers if required */
+ preempt_disable();
+ enable_kernel_spe();
+}
+
+static void spe_end(void)
+{
+ disable_kernel_spe();
+ /* reenable preemption */
+ preempt_enable();
+}
+
+static int ppc_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ switch (key_len) {
+ case AES_KEYSIZE_128:
+ ctx->rounds = 4;
+ ppc_expand_key_128(ctx->key_enc, in_key);
+ break;
+ case AES_KEYSIZE_192:
+ ctx->rounds = 5;
+ ppc_expand_key_192(ctx->key_enc, in_key);
+ break;
+ case AES_KEYSIZE_256:
+ ctx->rounds = 6;
+ ppc_expand_key_256(ctx->key_enc, in_key);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ ppc_generate_decrypt_key(ctx->key_dec, ctx->key_enc, key_len);
+
+ return 0;
+}
+
+static int ppc_aes_setkey_skcipher(struct crypto_skcipher *tfm,
+ const u8 *in_key, unsigned int key_len)
+{
+ return ppc_aes_setkey(crypto_skcipher_tfm(tfm), in_key, key_len);
+}
+
+static int ppc_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ struct ppc_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ int err;
+
+ err = xts_verify_key(tfm, in_key, key_len);
+ if (err)
+ return err;
+
+ key_len >>= 1;
+
+ switch (key_len) {
+ case AES_KEYSIZE_128:
+ ctx->rounds = 4;
+ ppc_expand_key_128(ctx->key_enc, in_key);
+ ppc_expand_key_128(ctx->key_twk, in_key + AES_KEYSIZE_128);
+ break;
+ case AES_KEYSIZE_192:
+ ctx->rounds = 5;
+ ppc_expand_key_192(ctx->key_enc, in_key);
+ ppc_expand_key_192(ctx->key_twk, in_key + AES_KEYSIZE_192);
+ break;
+ case AES_KEYSIZE_256:
+ ctx->rounds = 6;
+ ppc_expand_key_256(ctx->key_enc, in_key);
+ ppc_expand_key_256(ctx->key_twk, in_key + AES_KEYSIZE_256);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ ppc_generate_decrypt_key(ctx->key_dec, ctx->key_enc, key_len);
+
+ return 0;
+}
+
+static void ppc_aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+ struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ spe_begin();
+ ppc_encrypt_aes(out, in, ctx->key_enc, ctx->rounds);
+ spe_end();
+}
+
+static void ppc_aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+ struct ppc_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ spe_begin();
+ ppc_decrypt_aes(out, in, ctx->key_dec, ctx->rounds);
+ spe_end();
+}
+
+static int ppc_ecb_crypt(struct skcipher_request *req, bool enc)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct ppc_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ while ((nbytes = walk.nbytes) != 0) {
+ nbytes = min_t(unsigned int, nbytes, MAX_BYTES);
+ nbytes = round_down(nbytes, AES_BLOCK_SIZE);
+
+ spe_begin();
+ if (enc)
+ ppc_encrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_enc, ctx->rounds, nbytes);
+ else
+ ppc_decrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_dec, ctx->rounds, nbytes);
+ spe_end();
+
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+ }
+
+ return err;
+}
+
+static int ppc_ecb_encrypt(struct skcipher_request *req)
+{
+ return ppc_ecb_crypt(req, true);
+}
+
+static int ppc_ecb_decrypt(struct skcipher_request *req)
+{
+ return ppc_ecb_crypt(req, false);
+}
+
+static int ppc_cbc_crypt(struct skcipher_request *req, bool enc)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct ppc_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ while ((nbytes = walk.nbytes) != 0) {
+ nbytes = min_t(unsigned int, nbytes, MAX_BYTES);
+ nbytes = round_down(nbytes, AES_BLOCK_SIZE);
+
+ spe_begin();
+ if (enc)
+ ppc_encrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_enc, ctx->rounds, nbytes,
+ walk.iv);
+ else
+ ppc_decrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_dec, ctx->rounds, nbytes,
+ walk.iv);
+ spe_end();
+
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+ }
+
+ return err;
+}
+
+static int ppc_cbc_encrypt(struct skcipher_request *req)
+{
+ return ppc_cbc_crypt(req, true);
+}
+
+static int ppc_cbc_decrypt(struct skcipher_request *req)
+{
+ return ppc_cbc_crypt(req, false);
+}
+
+static int ppc_ctr_crypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct ppc_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ while ((nbytes = walk.nbytes) != 0) {
+ nbytes = min_t(unsigned int, nbytes, MAX_BYTES);
+ if (nbytes < walk.total)
+ nbytes = round_down(nbytes, AES_BLOCK_SIZE);
+
+ spe_begin();
+ ppc_crypt_ctr(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_enc, ctx->rounds, nbytes, walk.iv);
+ spe_end();
+
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+ }
+
+ return err;
+}
+
+static int ppc_xts_crypt(struct skcipher_request *req, bool enc)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct ppc_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int err;
+ u32 *twk;
+
+ err = skcipher_walk_virt(&walk, req, false);
+ twk = ctx->key_twk;
+
+ while ((nbytes = walk.nbytes) != 0) {
+ nbytes = min_t(unsigned int, nbytes, MAX_BYTES);
+ nbytes = round_down(nbytes, AES_BLOCK_SIZE);
+
+ spe_begin();
+ if (enc)
+ ppc_encrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_enc, ctx->rounds, nbytes,
+ walk.iv, twk);
+ else
+ ppc_decrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_dec, ctx->rounds, nbytes,
+ walk.iv, twk);
+ spe_end();
+
+ twk = NULL;
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+ }
+
+ return err;
+}
+
+static int ppc_xts_encrypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct ppc_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ int tail = req->cryptlen % AES_BLOCK_SIZE;
+ int offset = req->cryptlen - tail - AES_BLOCK_SIZE;
+ struct skcipher_request subreq;
+ u8 b[2][AES_BLOCK_SIZE];
+ int err;
+
+ if (req->cryptlen < AES_BLOCK_SIZE)
+ return -EINVAL;
+
+ if (tail) {
+ subreq = *req;
+ skcipher_request_set_crypt(&subreq, req->src, req->dst,
+ req->cryptlen - tail, req->iv);
+ req = &subreq;
+ }
+
+ err = ppc_xts_crypt(req, true);
+ if (err || !tail)
+ return err;
+
+ scatterwalk_map_and_copy(b[0], req->dst, offset, AES_BLOCK_SIZE, 0);
+ memcpy(b[1], b[0], tail);
+ scatterwalk_map_and_copy(b[0], req->src, offset + AES_BLOCK_SIZE, tail, 0);
+
+ spe_begin();
+ ppc_encrypt_xts(b[0], b[0], ctx->key_enc, ctx->rounds, AES_BLOCK_SIZE,
+ req->iv, NULL);
+ spe_end();
+
+ scatterwalk_map_and_copy(b[0], req->dst, offset, AES_BLOCK_SIZE + tail, 1);
+
+ return 0;
+}
+
+static int ppc_xts_decrypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct ppc_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ int tail = req->cryptlen % AES_BLOCK_SIZE;
+ int offset = req->cryptlen - tail - AES_BLOCK_SIZE;
+ struct skcipher_request subreq;
+ u8 b[3][AES_BLOCK_SIZE];
+ le128 twk;
+ int err;
+
+ if (req->cryptlen < AES_BLOCK_SIZE)
+ return -EINVAL;
+
+ if (tail) {
+ subreq = *req;
+ skcipher_request_set_crypt(&subreq, req->src, req->dst,
+ offset, req->iv);
+ req = &subreq;
+ }
+
+ err = ppc_xts_crypt(req, false);
+ if (err || !tail)
+ return err;
+
+ scatterwalk_map_and_copy(b[1], req->src, offset, AES_BLOCK_SIZE + tail, 0);
+
+ spe_begin();
+ if (!offset)
+ ppc_encrypt_ecb(req->iv, req->iv, ctx->key_twk, ctx->rounds,
+ AES_BLOCK_SIZE);
+
+ gf128mul_x_ble(&twk, (le128 *)req->iv);
+
+ ppc_decrypt_xts(b[1], b[1], ctx->key_dec, ctx->rounds, AES_BLOCK_SIZE,
+ (u8 *)&twk, NULL);
+ memcpy(b[0], b[2], tail);
+ memcpy(b[0] + tail, b[1] + tail, AES_BLOCK_SIZE - tail);
+ ppc_decrypt_xts(b[0], b[0], ctx->key_dec, ctx->rounds, AES_BLOCK_SIZE,
+ req->iv, NULL);
+ spe_end();
+
+ scatterwalk_map_and_copy(b[0], req->dst, offset, AES_BLOCK_SIZE + tail, 1);
+
+ return 0;
+}
+
+/*
+ * Algorithm definitions. Disabling alignment (cra_alignmask=0) was chosen
+ * because the e500 platform can handle unaligned reads/writes very efficiently.
+ * This improves IPsec thoughput by another few percent. Additionally we assume
+ * that AES context is always aligned to at least 8 bytes because it is created
+ * with kmalloc() in the crypto infrastructure
+ */
+
+static struct crypto_alg aes_cipher_alg = {
+ .cra_name = "aes",
+ .cra_driver_name = "aes-ppc-spe",
+ .cra_priority = 300,
+ .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct ppc_aes_ctx),
+ .cra_alignmask = 0,
+ .cra_module = THIS_MODULE,
+ .cra_u = {
+ .cipher = {
+ .cia_min_keysize = AES_MIN_KEY_SIZE,
+ .cia_max_keysize = AES_MAX_KEY_SIZE,
+ .cia_setkey = ppc_aes_setkey,
+ .cia_encrypt = ppc_aes_encrypt,
+ .cia_decrypt = ppc_aes_decrypt
+ }
+ }
+};
+
+static struct skcipher_alg aes_skcipher_algs[] = {
+ {
+ .base.cra_name = "ecb(aes)",
+ .base.cra_driver_name = "ecb-ppc-spe",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct ppc_aes_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .setkey = ppc_aes_setkey_skcipher,
+ .encrypt = ppc_ecb_encrypt,
+ .decrypt = ppc_ecb_decrypt,
+ }, {
+ .base.cra_name = "cbc(aes)",
+ .base.cra_driver_name = "cbc-ppc-spe",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct ppc_aes_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ppc_aes_setkey_skcipher,
+ .encrypt = ppc_cbc_encrypt,
+ .decrypt = ppc_cbc_decrypt,
+ }, {
+ .base.cra_name = "ctr(aes)",
+ .base.cra_driver_name = "ctr-ppc-spe",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct ppc_aes_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ppc_aes_setkey_skcipher,
+ .encrypt = ppc_ctr_crypt,
+ .decrypt = ppc_ctr_crypt,
+ .chunksize = AES_BLOCK_SIZE,
+ }, {
+ .base.cra_name = "xts(aes)",
+ .base.cra_driver_name = "xts-ppc-spe",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct ppc_xts_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = AES_MIN_KEY_SIZE * 2,
+ .max_keysize = AES_MAX_KEY_SIZE * 2,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ppc_xts_setkey,
+ .encrypt = ppc_xts_encrypt,
+ .decrypt = ppc_xts_decrypt,
+ }
+};
+
+static int __init ppc_aes_mod_init(void)
+{
+ int err;
+
+ err = crypto_register_alg(&aes_cipher_alg);
+ if (err)
+ return err;
+
+ err = crypto_register_skciphers(aes_skcipher_algs,
+ ARRAY_SIZE(aes_skcipher_algs));
+ if (err)
+ crypto_unregister_alg(&aes_cipher_alg);
+ return err;
+}
+
+static void __exit ppc_aes_mod_fini(void)
+{
+ crypto_unregister_alg(&aes_cipher_alg);
+ crypto_unregister_skciphers(aes_skcipher_algs,
+ ARRAY_SIZE(aes_skcipher_algs));
+}
+
+module_init(ppc_aes_mod_init);
+module_exit(ppc_aes_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS, SPE optimized");
+
+MODULE_ALIAS_CRYPTO("aes");
+MODULE_ALIAS_CRYPTO("ecb(aes)");
+MODULE_ALIAS_CRYPTO("cbc(aes)");
+MODULE_ALIAS_CRYPTO("ctr(aes)");
+MODULE_ALIAS_CRYPTO("xts(aes)");
+MODULE_ALIAS_CRYPTO("aes-ppc-spe");
diff --git a/arch/powerpc/crypto/aes-spe-keys.S b/arch/powerpc/crypto/aes-spe-keys.S
new file mode 100644
index 000000000000..2e1bc0d099bf
--- /dev/null
+++ b/arch/powerpc/crypto/aes-spe-keys.S
@@ -0,0 +1,278 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Key handling functions for PPC AES implementation
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ */
+
+#include <asm/ppc_asm.h>
+
+#ifdef __BIG_ENDIAN__
+#define LOAD_KEY(d, s, off) \
+ lwz d,off(s);
+#else
+#define LOAD_KEY(d, s, off) \
+ li r0,off; \
+ lwbrx d,s,r0;
+#endif
+
+#define INITIALIZE_KEY \
+ stwu r1,-32(r1); /* create stack frame */ \
+ stw r14,8(r1); /* save registers */ \
+ stw r15,12(r1); \
+ stw r16,16(r1);
+
+#define FINALIZE_KEY \
+ lwz r14,8(r1); /* restore registers */ \
+ lwz r15,12(r1); \
+ lwz r16,16(r1); \
+ xor r5,r5,r5; /* clear sensitive data */ \
+ xor r6,r6,r6; \
+ xor r7,r7,r7; \
+ xor r8,r8,r8; \
+ xor r9,r9,r9; \
+ xor r10,r10,r10; \
+ xor r11,r11,r11; \
+ xor r12,r12,r12; \
+ addi r1,r1,32; /* cleanup stack */
+
+#define LS_BOX(r, t1, t2) \
+ lis t2,PPC_AES_4K_ENCTAB@h; \
+ ori t2,t2,PPC_AES_4K_ENCTAB@l; \
+ rlwimi t2,r,4,20,27; \
+ lbz t1,8(t2); \
+ rlwimi r,t1,0,24,31; \
+ rlwimi t2,r,28,20,27; \
+ lbz t1,8(t2); \
+ rlwimi r,t1,8,16,23; \
+ rlwimi t2,r,20,20,27; \
+ lbz t1,8(t2); \
+ rlwimi r,t1,16,8,15; \
+ rlwimi t2,r,12,20,27; \
+ lbz t1,8(t2); \
+ rlwimi r,t1,24,0,7;
+
+#define GF8_MUL(out, in, t1, t2) \
+ lis t1,0x8080; /* multiplication in GF8 */ \
+ ori t1,t1,0x8080; \
+ and t1,t1,in; \
+ srwi t1,t1,7; \
+ mulli t1,t1,0x1b; \
+ lis t2,0x7f7f; \
+ ori t2,t2,0x7f7f; \
+ and t2,t2,in; \
+ slwi t2,t2,1; \
+ xor out,t1,t2;
+
+/*
+ * ppc_expand_key_128(u32 *key_enc, const u8 *key)
+ *
+ * Expand 128 bit key into 176 bytes encryption key. It consists of
+ * key itself plus 10 rounds with 16 bytes each
+ *
+ */
+_GLOBAL(ppc_expand_key_128)
+ INITIALIZE_KEY
+ LOAD_KEY(r5,r4,0)
+ LOAD_KEY(r6,r4,4)
+ LOAD_KEY(r7,r4,8)
+ LOAD_KEY(r8,r4,12)
+ stw r5,0(r3) /* key[0..3] = input data */
+ stw r6,4(r3)
+ stw r7,8(r3)
+ stw r8,12(r3)
+ li r16,10 /* 10 expansion rounds */
+ lis r0,0x0100 /* RCO(1) */
+ppc_expand_128_loop:
+ addi r3,r3,16
+ mr r14,r8 /* apply LS_BOX to 4th temp */
+ rotlwi r14,r14,8
+ LS_BOX(r14, r15, r4)
+ xor r14,r14,r0
+ xor r5,r5,r14 /* xor next 4 keys */
+ xor r6,r6,r5
+ xor r7,r7,r6
+ xor r8,r8,r7
+ stw r5,0(r3) /* store next 4 keys */
+ stw r6,4(r3)
+ stw r7,8(r3)
+ stw r8,12(r3)
+ GF8_MUL(r0, r0, r4, r14) /* multiply RCO by 2 in GF */
+ subi r16,r16,1
+ cmpwi r16,0
+ bt eq,ppc_expand_128_end
+ b ppc_expand_128_loop
+ppc_expand_128_end:
+ FINALIZE_KEY
+ blr
+
+/*
+ * ppc_expand_key_192(u32 *key_enc, const u8 *key)
+ *
+ * Expand 192 bit key into 208 bytes encryption key. It consists of key
+ * itself plus 12 rounds with 16 bytes each
+ *
+ */
+_GLOBAL(ppc_expand_key_192)
+ INITIALIZE_KEY
+ LOAD_KEY(r5,r4,0)
+ LOAD_KEY(r6,r4,4)
+ LOAD_KEY(r7,r4,8)
+ LOAD_KEY(r8,r4,12)
+ LOAD_KEY(r9,r4,16)
+ LOAD_KEY(r10,r4,20)
+ stw r5,0(r3)
+ stw r6,4(r3)
+ stw r7,8(r3)
+ stw r8,12(r3)
+ stw r9,16(r3)
+ stw r10,20(r3)
+ li r16,8 /* 8 expansion rounds */
+ lis r0,0x0100 /* RCO(1) */
+ppc_expand_192_loop:
+ addi r3,r3,24
+ mr r14,r10 /* apply LS_BOX to 6th temp */
+ rotlwi r14,r14,8
+ LS_BOX(r14, r15, r4)
+ xor r14,r14,r0
+ xor r5,r5,r14 /* xor next 6 keys */
+ xor r6,r6,r5
+ xor r7,r7,r6
+ xor r8,r8,r7
+ xor r9,r9,r8
+ xor r10,r10,r9
+ stw r5,0(r3)
+ stw r6,4(r3)
+ stw r7,8(r3)
+ stw r8,12(r3)
+ subi r16,r16,1
+ cmpwi r16,0 /* last round early kick out */
+ bt eq,ppc_expand_192_end
+ stw r9,16(r3)
+ stw r10,20(r3)
+ GF8_MUL(r0, r0, r4, r14) /* multiply RCO GF8 */
+ b ppc_expand_192_loop
+ppc_expand_192_end:
+ FINALIZE_KEY
+ blr
+
+/*
+ * ppc_expand_key_256(u32 *key_enc, const u8 *key)
+ *
+ * Expand 256 bit key into 240 bytes encryption key. It consists of key
+ * itself plus 14 rounds with 16 bytes each
+ *
+ */
+_GLOBAL(ppc_expand_key_256)
+ INITIALIZE_KEY
+ LOAD_KEY(r5,r4,0)
+ LOAD_KEY(r6,r4,4)
+ LOAD_KEY(r7,r4,8)
+ LOAD_KEY(r8,r4,12)
+ LOAD_KEY(r9,r4,16)
+ LOAD_KEY(r10,r4,20)
+ LOAD_KEY(r11,r4,24)
+ LOAD_KEY(r12,r4,28)
+ stw r5,0(r3)
+ stw r6,4(r3)
+ stw r7,8(r3)
+ stw r8,12(r3)
+ stw r9,16(r3)
+ stw r10,20(r3)
+ stw r11,24(r3)
+ stw r12,28(r3)
+ li r16,7 /* 7 expansion rounds */
+ lis r0,0x0100 /* RCO(1) */
+ppc_expand_256_loop:
+ addi r3,r3,32
+ mr r14,r12 /* apply LS_BOX to 8th temp */
+ rotlwi r14,r14,8
+ LS_BOX(r14, r15, r4)
+ xor r14,r14,r0
+ xor r5,r5,r14 /* xor 4 keys */
+ xor r6,r6,r5
+ xor r7,r7,r6
+ xor r8,r8,r7
+ mr r14,r8
+ LS_BOX(r14, r15, r4) /* apply LS_BOX to 4th temp */
+ xor r9,r9,r14 /* xor 4 keys */
+ xor r10,r10,r9
+ xor r11,r11,r10
+ xor r12,r12,r11
+ stw r5,0(r3)
+ stw r6,4(r3)
+ stw r7,8(r3)
+ stw r8,12(r3)
+ subi r16,r16,1
+ cmpwi r16,0 /* last round early kick out */
+ bt eq,ppc_expand_256_end
+ stw r9,16(r3)
+ stw r10,20(r3)
+ stw r11,24(r3)
+ stw r12,28(r3)
+ GF8_MUL(r0, r0, r4, r14)
+ b ppc_expand_256_loop
+ppc_expand_256_end:
+ FINALIZE_KEY
+ blr
+
+/*
+ * ppc_generate_decrypt_key: derive decryption key from encryption key
+ * number of bytes to handle are calculated from length of key (16/24/32)
+ *
+ */
+_GLOBAL(ppc_generate_decrypt_key)
+ addi r6,r5,24
+ slwi r6,r6,2
+ lwzx r7,r4,r6 /* first/last 4 words are same */
+ stw r7,0(r3)
+ lwz r7,0(r4)
+ stwx r7,r3,r6
+ addi r6,r6,4
+ lwzx r7,r4,r6
+ stw r7,4(r3)
+ lwz r7,4(r4)
+ stwx r7,r3,r6
+ addi r6,r6,4
+ lwzx r7,r4,r6
+ stw r7,8(r3)
+ lwz r7,8(r4)
+ stwx r7,r3,r6
+ addi r6,r6,4
+ lwzx r7,r4,r6
+ stw r7,12(r3)
+ lwz r7,12(r4)
+ stwx r7,r3,r6
+ addi r3,r3,16
+ add r4,r4,r6
+ subi r4,r4,28
+ addi r5,r5,20
+ srwi r5,r5,2
+ppc_generate_decrypt_block:
+ li r6,4
+ mtctr r6
+ppc_generate_decrypt_word:
+ lwz r6,0(r4)
+ GF8_MUL(r7, r6, r0, r7)
+ GF8_MUL(r8, r7, r0, r8)
+ GF8_MUL(r9, r8, r0, r9)
+ xor r10,r9,r6
+ xor r11,r7,r8
+ xor r11,r11,r9
+ xor r12,r7,r10
+ rotrwi r12,r12,24
+ xor r11,r11,r12
+ xor r12,r8,r10
+ rotrwi r12,r12,16
+ xor r11,r11,r12
+ rotrwi r12,r10,8
+ xor r11,r11,r12
+ stw r11,0(r3)
+ addi r3,r3,4
+ addi r4,r4,4
+ bdnz ppc_generate_decrypt_word
+ subi r4,r4,32
+ subi r5,r5,1
+ cmpwi r5,0
+ bt gt,ppc_generate_decrypt_block
+ blr
diff --git a/arch/powerpc/crypto/aes-spe-modes.S b/arch/powerpc/crypto/aes-spe-modes.S
new file mode 100644
index 000000000000..3f92a6a85785
--- /dev/null
+++ b/arch/powerpc/crypto/aes-spe-modes.S
@@ -0,0 +1,625 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * AES modes (ECB/CBC/CTR/XTS) for PPC AES implementation
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ */
+
+#include <asm/ppc_asm.h>
+#include "aes-spe-regs.h"
+
+#ifdef __BIG_ENDIAN__ /* Macros for big endian builds */
+
+#define LOAD_DATA(reg, off) \
+ lwz reg,off(rSP); /* load with offset */
+#define SAVE_DATA(reg, off) \
+ stw reg,off(rDP); /* save with offset */
+#define NEXT_BLOCK \
+ addi rSP,rSP,16; /* increment pointers per bloc */ \
+ addi rDP,rDP,16;
+#define LOAD_IV(reg, off) \
+ lwz reg,off(rIP); /* IV loading with offset */
+#define SAVE_IV(reg, off) \
+ stw reg,off(rIP); /* IV saving with offset */
+#define START_IV /* nothing to reset */
+#define CBC_DEC 16 /* CBC decrement per block */
+#define CTR_DEC 1 /* CTR decrement one byte */
+
+#else /* Macros for little endian */
+
+#define LOAD_DATA(reg, off) \
+ lwbrx reg,0,rSP; /* load reversed */ \
+ addi rSP,rSP,4; /* and increment pointer */
+#define SAVE_DATA(reg, off) \
+ stwbrx reg,0,rDP; /* save reversed */ \
+ addi rDP,rDP,4; /* and increment pointer */
+#define NEXT_BLOCK /* nothing todo */
+#define LOAD_IV(reg, off) \
+ lwbrx reg,0,rIP; /* load reversed */ \
+ addi rIP,rIP,4; /* and increment pointer */
+#define SAVE_IV(reg, off) \
+ stwbrx reg,0,rIP; /* load reversed */ \
+ addi rIP,rIP,4; /* and increment pointer */
+#define START_IV \
+ subi rIP,rIP,16; /* must reset pointer */
+#define CBC_DEC 32 /* 2 blocks because of incs */
+#define CTR_DEC 17 /* 1 block because of incs */
+
+#endif
+
+#define SAVE_0_REGS
+#define LOAD_0_REGS
+
+#define SAVE_4_REGS \
+ stw rI0,96(r1); /* save 32 bit registers */ \
+ stw rI1,100(r1); \
+ stw rI2,104(r1); \
+ stw rI3,108(r1);
+
+#define LOAD_4_REGS \
+ lwz rI0,96(r1); /* restore 32 bit registers */ \
+ lwz rI1,100(r1); \
+ lwz rI2,104(r1); \
+ lwz rI3,108(r1);
+
+#define SAVE_8_REGS \
+ SAVE_4_REGS \
+ stw rG0,112(r1); /* save 32 bit registers */ \
+ stw rG1,116(r1); \
+ stw rG2,120(r1); \
+ stw rG3,124(r1);
+
+#define LOAD_8_REGS \
+ LOAD_4_REGS \
+ lwz rG0,112(r1); /* restore 32 bit registers */ \
+ lwz rG1,116(r1); \
+ lwz rG2,120(r1); \
+ lwz rG3,124(r1);
+
+#define INITIALIZE_CRYPT(tab,nr32bitregs) \
+ mflr r0; \
+ stwu r1,-160(r1); /* create stack frame */ \
+ lis rT0,tab@h; /* en-/decryption table pointer */ \
+ stw r0,8(r1); /* save link register */ \
+ ori rT0,rT0,tab@l; \
+ evstdw r14,16(r1); \
+ mr rKS,rKP; \
+ evstdw r15,24(r1); /* We must save non volatile */ \
+ evstdw r16,32(r1); /* registers. Take the chance */ \
+ evstdw r17,40(r1); /* and save the SPE part too */ \
+ evstdw r18,48(r1); \
+ evstdw r19,56(r1); \
+ evstdw r20,64(r1); \
+ evstdw r21,72(r1); \
+ evstdw r22,80(r1); \
+ evstdw r23,88(r1); \
+ SAVE_##nr32bitregs##_REGS
+
+#define FINALIZE_CRYPT(nr32bitregs) \
+ lwz r0,8(r1); \
+ evldw r14,16(r1); /* restore SPE registers */ \
+ evldw r15,24(r1); \
+ evldw r16,32(r1); \
+ evldw r17,40(r1); \
+ evldw r18,48(r1); \
+ evldw r19,56(r1); \
+ evldw r20,64(r1); \
+ evldw r21,72(r1); \
+ evldw r22,80(r1); \
+ evldw r23,88(r1); \
+ LOAD_##nr32bitregs##_REGS \
+ mtlr r0; /* restore link register */ \
+ xor r0,r0,r0; \
+ stw r0,16(r1); /* delete sensitive data */ \
+ stw r0,24(r1); /* that we might have pushed */ \
+ stw r0,32(r1); /* from other context that runs */ \
+ stw r0,40(r1); /* the same code */ \
+ stw r0,48(r1); \
+ stw r0,56(r1); \
+ stw r0,64(r1); \
+ stw r0,72(r1); \
+ stw r0,80(r1); \
+ stw r0,88(r1); \
+ addi r1,r1,160; /* cleanup stack frame */
+
+#define ENDIAN_SWAP(t0, t1, s0, s1) \
+ rotrwi t0,s0,8; /* swap endianness for 2 GPRs */ \
+ rotrwi t1,s1,8; \
+ rlwimi t0,s0,8,8,15; \
+ rlwimi t1,s1,8,8,15; \
+ rlwimi t0,s0,8,24,31; \
+ rlwimi t1,s1,8,24,31;
+
+#define GF128_MUL(d0, d1, d2, d3, t0) \
+ li t0,0x87; /* multiplication in GF128 */ \
+ cmpwi d3,-1; \
+ iselgt t0,0,t0; \
+ rlwimi d3,d2,0,0,0; /* propagate "carry" bits */ \
+ rotlwi d3,d3,1; \
+ rlwimi d2,d1,0,0,0; \
+ rotlwi d2,d2,1; \
+ rlwimi d1,d0,0,0,0; \
+ slwi d0,d0,1; /* shift left 128 bit */ \
+ rotlwi d1,d1,1; \
+ xor d0,d0,t0;
+
+#define START_KEY(d0, d1, d2, d3) \
+ lwz rW0,0(rKP); \
+ mtctr rRR; \
+ lwz rW1,4(rKP); \
+ lwz rW2,8(rKP); \
+ lwz rW3,12(rKP); \
+ xor rD0,d0,rW0; \
+ xor rD1,d1,rW1; \
+ xor rD2,d2,rW2; \
+ xor rD3,d3,rW3;
+
+/*
+ * ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc,
+ * u32 rounds)
+ *
+ * called from glue layer to encrypt a single 16 byte block
+ * round values are AES128 = 4, AES192 = 5, AES256 = 6
+ *
+ */
+_GLOBAL(ppc_encrypt_aes)
+ INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
+ LOAD_DATA(rD0, 0)
+ LOAD_DATA(rD1, 4)
+ LOAD_DATA(rD2, 8)
+ LOAD_DATA(rD3, 12)
+ START_KEY(rD0, rD1, rD2, rD3)
+ bl ppc_encrypt_block
+ xor rD0,rD0,rW0
+ SAVE_DATA(rD0, 0)
+ xor rD1,rD1,rW1
+ SAVE_DATA(rD1, 4)
+ xor rD2,rD2,rW2
+ SAVE_DATA(rD2, 8)
+ xor rD3,rD3,rW3
+ SAVE_DATA(rD3, 12)
+ FINALIZE_CRYPT(0)
+ blr
+
+/*
+ * ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec,
+ * u32 rounds)
+ *
+ * called from glue layer to decrypt a single 16 byte block
+ * round values are AES128 = 4, AES192 = 5, AES256 = 6
+ *
+ */
+_GLOBAL(ppc_decrypt_aes)
+ INITIALIZE_CRYPT(PPC_AES_4K_DECTAB,0)
+ LOAD_DATA(rD0, 0)
+ addi rT1,rT0,4096
+ LOAD_DATA(rD1, 4)
+ LOAD_DATA(rD2, 8)
+ LOAD_DATA(rD3, 12)
+ START_KEY(rD0, rD1, rD2, rD3)
+ bl ppc_decrypt_block
+ xor rD0,rD0,rW0
+ SAVE_DATA(rD0, 0)
+ xor rD1,rD1,rW1
+ SAVE_DATA(rD1, 4)
+ xor rD2,rD2,rW2
+ SAVE_DATA(rD2, 8)
+ xor rD3,rD3,rW3
+ SAVE_DATA(rD3, 12)
+ FINALIZE_CRYPT(0)
+ blr
+
+/*
+ * ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc,
+ * u32 rounds, u32 bytes);
+ *
+ * called from glue layer to encrypt multiple blocks via ECB
+ * Bytes must be larger or equal 16 and only whole blocks are
+ * processed. round values are AES128 = 4, AES192 = 5 and
+ * AES256 = 6
+ *
+ */
+_GLOBAL(ppc_encrypt_ecb)
+ INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
+ppc_encrypt_ecb_loop:
+ LOAD_DATA(rD0, 0)
+ mr rKP,rKS
+ LOAD_DATA(rD1, 4)
+ subi rLN,rLN,16
+ LOAD_DATA(rD2, 8)
+ cmpwi rLN,15
+ LOAD_DATA(rD3, 12)
+ START_KEY(rD0, rD1, rD2, rD3)
+ bl ppc_encrypt_block
+ xor rD0,rD0,rW0
+ SAVE_DATA(rD0, 0)
+ xor rD1,rD1,rW1
+ SAVE_DATA(rD1, 4)
+ xor rD2,rD2,rW2
+ SAVE_DATA(rD2, 8)
+ xor rD3,rD3,rW3
+ SAVE_DATA(rD3, 12)
+ NEXT_BLOCK
+ bt gt,ppc_encrypt_ecb_loop
+ FINALIZE_CRYPT(0)
+ blr
+
+/*
+ * ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec,
+ * u32 rounds, u32 bytes);
+ *
+ * called from glue layer to decrypt multiple blocks via ECB
+ * Bytes must be larger or equal 16 and only whole blocks are
+ * processed. round values are AES128 = 4, AES192 = 5 and
+ * AES256 = 6
+ *
+ */
+_GLOBAL(ppc_decrypt_ecb)
+ INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 0)
+ addi rT1,rT0,4096
+ppc_decrypt_ecb_loop:
+ LOAD_DATA(rD0, 0)
+ mr rKP,rKS
+ LOAD_DATA(rD1, 4)
+ subi rLN,rLN,16
+ LOAD_DATA(rD2, 8)
+ cmpwi rLN,15
+ LOAD_DATA(rD3, 12)
+ START_KEY(rD0, rD1, rD2, rD3)
+ bl ppc_decrypt_block
+ xor rD0,rD0,rW0
+ SAVE_DATA(rD0, 0)
+ xor rD1,rD1,rW1
+ SAVE_DATA(rD1, 4)
+ xor rD2,rD2,rW2
+ SAVE_DATA(rD2, 8)
+ xor rD3,rD3,rW3
+ SAVE_DATA(rD3, 12)
+ NEXT_BLOCK
+ bt gt,ppc_decrypt_ecb_loop
+ FINALIZE_CRYPT(0)
+ blr
+
+/*
+ * ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc,
+ * 32 rounds, u32 bytes, u8 *iv);
+ *
+ * called from glue layer to encrypt multiple blocks via CBC
+ * Bytes must be larger or equal 16 and only whole blocks are
+ * processed. round values are AES128 = 4, AES192 = 5 and
+ * AES256 = 6
+ *
+ */
+_GLOBAL(ppc_encrypt_cbc)
+ INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
+ LOAD_IV(rI0, 0)
+ LOAD_IV(rI1, 4)
+ LOAD_IV(rI2, 8)
+ LOAD_IV(rI3, 12)
+ppc_encrypt_cbc_loop:
+ LOAD_DATA(rD0, 0)
+ mr rKP,rKS
+ LOAD_DATA(rD1, 4)
+ subi rLN,rLN,16
+ LOAD_DATA(rD2, 8)
+ cmpwi rLN,15
+ LOAD_DATA(rD3, 12)
+ xor rD0,rD0,rI0
+ xor rD1,rD1,rI1
+ xor rD2,rD2,rI2
+ xor rD3,rD3,rI3
+ START_KEY(rD0, rD1, rD2, rD3)
+ bl ppc_encrypt_block
+ xor rI0,rD0,rW0
+ SAVE_DATA(rI0, 0)
+ xor rI1,rD1,rW1
+ SAVE_DATA(rI1, 4)
+ xor rI2,rD2,rW2
+ SAVE_DATA(rI2, 8)
+ xor rI3,rD3,rW3
+ SAVE_DATA(rI3, 12)
+ NEXT_BLOCK
+ bt gt,ppc_encrypt_cbc_loop
+ START_IV
+ SAVE_IV(rI0, 0)
+ SAVE_IV(rI1, 4)
+ SAVE_IV(rI2, 8)
+ SAVE_IV(rI3, 12)
+ FINALIZE_CRYPT(4)
+ blr
+
+/*
+ * ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec,
+ * u32 rounds, u32 bytes, u8 *iv);
+ *
+ * called from glue layer to decrypt multiple blocks via CBC
+ * round values are AES128 = 4, AES192 = 5, AES256 = 6
+ *
+ */
+_GLOBAL(ppc_decrypt_cbc)
+ INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 4)
+ li rT1,15
+ LOAD_IV(rI0, 0)
+ andc rLN,rLN,rT1
+ LOAD_IV(rI1, 4)
+ subi rLN,rLN,16
+ LOAD_IV(rI2, 8)
+ add rSP,rSP,rLN /* reverse processing */
+ LOAD_IV(rI3, 12)
+ add rDP,rDP,rLN
+ LOAD_DATA(rD0, 0)
+ addi rT1,rT0,4096
+ LOAD_DATA(rD1, 4)
+ LOAD_DATA(rD2, 8)
+ LOAD_DATA(rD3, 12)
+ START_IV
+ SAVE_IV(rD0, 0)
+ SAVE_IV(rD1, 4)
+ SAVE_IV(rD2, 8)
+ cmpwi rLN,16
+ SAVE_IV(rD3, 12)
+ bt lt,ppc_decrypt_cbc_end
+ppc_decrypt_cbc_loop:
+ mr rKP,rKS
+ START_KEY(rD0, rD1, rD2, rD3)
+ bl ppc_decrypt_block
+ subi rLN,rLN,16
+ subi rSP,rSP,CBC_DEC
+ xor rW0,rD0,rW0
+ LOAD_DATA(rD0, 0)
+ xor rW1,rD1,rW1
+ LOAD_DATA(rD1, 4)
+ xor rW2,rD2,rW2
+ LOAD_DATA(rD2, 8)
+ xor rW3,rD3,rW3
+ LOAD_DATA(rD3, 12)
+ xor rW0,rW0,rD0
+ SAVE_DATA(rW0, 0)
+ xor rW1,rW1,rD1
+ SAVE_DATA(rW1, 4)
+ xor rW2,rW2,rD2
+ SAVE_DATA(rW2, 8)
+ xor rW3,rW3,rD3
+ SAVE_DATA(rW3, 12)
+ cmpwi rLN,15
+ subi rDP,rDP,CBC_DEC
+ bt gt,ppc_decrypt_cbc_loop
+ppc_decrypt_cbc_end:
+ mr rKP,rKS
+ START_KEY(rD0, rD1, rD2, rD3)
+ bl ppc_decrypt_block
+ xor rW0,rW0,rD0
+ xor rW1,rW1,rD1
+ xor rW2,rW2,rD2
+ xor rW3,rW3,rD3
+ xor rW0,rW0,rI0 /* decrypt with initial IV */
+ SAVE_DATA(rW0, 0)
+ xor rW1,rW1,rI1
+ SAVE_DATA(rW1, 4)
+ xor rW2,rW2,rI2
+ SAVE_DATA(rW2, 8)
+ xor rW3,rW3,rI3
+ SAVE_DATA(rW3, 12)
+ FINALIZE_CRYPT(4)
+ blr
+
+/*
+ * ppc_crypt_ctr(u8 *out, const u8 *in, u32 *key_enc,
+ * u32 rounds, u32 bytes, u8 *iv);
+ *
+ * called from glue layer to encrypt/decrypt multiple blocks
+ * via CTR. Number of bytes does not need to be a multiple of
+ * 16. Round values are AES128 = 4, AES192 = 5, AES256 = 6
+ *
+ */
+_GLOBAL(ppc_crypt_ctr)
+ INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
+ LOAD_IV(rI0, 0)
+ LOAD_IV(rI1, 4)
+ LOAD_IV(rI2, 8)
+ cmpwi rLN,16
+ LOAD_IV(rI3, 12)
+ START_IV
+ bt lt,ppc_crypt_ctr_partial
+ppc_crypt_ctr_loop:
+ mr rKP,rKS
+ START_KEY(rI0, rI1, rI2, rI3)
+ bl ppc_encrypt_block
+ xor rW0,rD0,rW0
+ xor rW1,rD1,rW1
+ xor rW2,rD2,rW2
+ xor rW3,rD3,rW3
+ LOAD_DATA(rD0, 0)
+ subi rLN,rLN,16
+ LOAD_DATA(rD1, 4)
+ LOAD_DATA(rD2, 8)
+ LOAD_DATA(rD3, 12)
+ xor rD0,rD0,rW0
+ SAVE_DATA(rD0, 0)
+ xor rD1,rD1,rW1
+ SAVE_DATA(rD1, 4)
+ xor rD2,rD2,rW2
+ SAVE_DATA(rD2, 8)
+ xor rD3,rD3,rW3
+ SAVE_DATA(rD3, 12)
+ addic rI3,rI3,1 /* increase counter */
+ addze rI2,rI2
+ addze rI1,rI1
+ addze rI0,rI0
+ NEXT_BLOCK
+ cmpwi rLN,15
+ bt gt,ppc_crypt_ctr_loop
+ppc_crypt_ctr_partial:
+ cmpwi rLN,0
+ bt eq,ppc_crypt_ctr_end
+ mr rKP,rKS
+ START_KEY(rI0, rI1, rI2, rI3)
+ bl ppc_encrypt_block
+ xor rW0,rD0,rW0
+ SAVE_IV(rW0, 0)
+ xor rW1,rD1,rW1
+ SAVE_IV(rW1, 4)
+ xor rW2,rD2,rW2
+ SAVE_IV(rW2, 8)
+ xor rW3,rD3,rW3
+ SAVE_IV(rW3, 12)
+ mtctr rLN
+ subi rIP,rIP,CTR_DEC
+ subi rSP,rSP,1
+ subi rDP,rDP,1
+ppc_crypt_ctr_xorbyte:
+ lbzu rW4,1(rIP) /* bytewise xor for partial block */
+ lbzu rW5,1(rSP)
+ xor rW4,rW4,rW5
+ stbu rW4,1(rDP)
+ bdnz ppc_crypt_ctr_xorbyte
+ subf rIP,rLN,rIP
+ addi rIP,rIP,1
+ addic rI3,rI3,1
+ addze rI2,rI2
+ addze rI1,rI1
+ addze rI0,rI0
+ppc_crypt_ctr_end:
+ SAVE_IV(rI0, 0)
+ SAVE_IV(rI1, 4)
+ SAVE_IV(rI2, 8)
+ SAVE_IV(rI3, 12)
+ FINALIZE_CRYPT(4)
+ blr
+
+/*
+ * ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc,
+ * u32 rounds, u32 bytes, u8 *iv, u32 *key_twk);
+ *
+ * called from glue layer to encrypt multiple blocks via XTS
+ * If key_twk is given, the initial IV encryption will be
+ * processed too. Round values are AES128 = 4, AES192 = 5,
+ * AES256 = 6
+ *
+ */
+_GLOBAL(ppc_encrypt_xts)
+ INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 8)
+ LOAD_IV(rI0, 0)
+ LOAD_IV(rI1, 4)
+ LOAD_IV(rI2, 8)
+ cmpwi rKT,0
+ LOAD_IV(rI3, 12)
+ bt eq,ppc_encrypt_xts_notweak
+ mr rKP,rKT
+ START_KEY(rI0, rI1, rI2, rI3)
+ bl ppc_encrypt_block
+ xor rI0,rD0,rW0
+ xor rI1,rD1,rW1
+ xor rI2,rD2,rW2
+ xor rI3,rD3,rW3
+ppc_encrypt_xts_notweak:
+ ENDIAN_SWAP(rG0, rG1, rI0, rI1)
+ ENDIAN_SWAP(rG2, rG3, rI2, rI3)
+ppc_encrypt_xts_loop:
+ LOAD_DATA(rD0, 0)
+ mr rKP,rKS
+ LOAD_DATA(rD1, 4)
+ subi rLN,rLN,16
+ LOAD_DATA(rD2, 8)
+ LOAD_DATA(rD3, 12)
+ xor rD0,rD0,rI0
+ xor rD1,rD1,rI1
+ xor rD2,rD2,rI2
+ xor rD3,rD3,rI3
+ START_KEY(rD0, rD1, rD2, rD3)
+ bl ppc_encrypt_block
+ xor rD0,rD0,rW0
+ xor rD1,rD1,rW1
+ xor rD2,rD2,rW2
+ xor rD3,rD3,rW3
+ xor rD0,rD0,rI0
+ SAVE_DATA(rD0, 0)
+ xor rD1,rD1,rI1
+ SAVE_DATA(rD1, 4)
+ xor rD2,rD2,rI2
+ SAVE_DATA(rD2, 8)
+ xor rD3,rD3,rI3
+ SAVE_DATA(rD3, 12)
+ GF128_MUL(rG0, rG1, rG2, rG3, rW0)
+ ENDIAN_SWAP(rI0, rI1, rG0, rG1)
+ ENDIAN_SWAP(rI2, rI3, rG2, rG3)
+ cmpwi rLN,0
+ NEXT_BLOCK
+ bt gt,ppc_encrypt_xts_loop
+ START_IV
+ SAVE_IV(rI0, 0)
+ SAVE_IV(rI1, 4)
+ SAVE_IV(rI2, 8)
+ SAVE_IV(rI3, 12)
+ FINALIZE_CRYPT(8)
+ blr
+
+/*
+ * ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec,
+ * u32 rounds, u32 blocks, u8 *iv, u32 *key_twk);
+ *
+ * called from glue layer to decrypt multiple blocks via XTS
+ * If key_twk is given, the initial IV encryption will be
+ * processed too. Round values are AES128 = 4, AES192 = 5,
+ * AES256 = 6
+ *
+ */
+_GLOBAL(ppc_decrypt_xts)
+ INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 8)
+ LOAD_IV(rI0, 0)
+ addi rT1,rT0,4096
+ LOAD_IV(rI1, 4)
+ LOAD_IV(rI2, 8)
+ cmpwi rKT,0
+ LOAD_IV(rI3, 12)
+ bt eq,ppc_decrypt_xts_notweak
+ subi rT0,rT0,4096
+ mr rKP,rKT
+ START_KEY(rI0, rI1, rI2, rI3)
+ bl ppc_encrypt_block
+ xor rI0,rD0,rW0
+ xor rI1,rD1,rW1
+ xor rI2,rD2,rW2
+ xor rI3,rD3,rW3
+ addi rT0,rT0,4096
+ppc_decrypt_xts_notweak:
+ ENDIAN_SWAP(rG0, rG1, rI0, rI1)
+ ENDIAN_SWAP(rG2, rG3, rI2, rI3)
+ppc_decrypt_xts_loop:
+ LOAD_DATA(rD0, 0)
+ mr rKP,rKS
+ LOAD_DATA(rD1, 4)
+ subi rLN,rLN,16
+ LOAD_DATA(rD2, 8)
+ LOAD_DATA(rD3, 12)
+ xor rD0,rD0,rI0
+ xor rD1,rD1,rI1
+ xor rD2,rD2,rI2
+ xor rD3,rD3,rI3
+ START_KEY(rD0, rD1, rD2, rD3)
+ bl ppc_decrypt_block
+ xor rD0,rD0,rW0
+ xor rD1,rD1,rW1
+ xor rD2,rD2,rW2
+ xor rD3,rD3,rW3
+ xor rD0,rD0,rI0
+ SAVE_DATA(rD0, 0)
+ xor rD1,rD1,rI1
+ SAVE_DATA(rD1, 4)
+ xor rD2,rD2,rI2
+ SAVE_DATA(rD2, 8)
+ xor rD3,rD3,rI3
+ SAVE_DATA(rD3, 12)
+ GF128_MUL(rG0, rG1, rG2, rG3, rW0)
+ ENDIAN_SWAP(rI0, rI1, rG0, rG1)
+ ENDIAN_SWAP(rI2, rI3, rG2, rG3)
+ cmpwi rLN,0
+ NEXT_BLOCK
+ bt gt,ppc_decrypt_xts_loop
+ START_IV
+ SAVE_IV(rI0, 0)
+ SAVE_IV(rI1, 4)
+ SAVE_IV(rI2, 8)
+ SAVE_IV(rI3, 12)
+ FINALIZE_CRYPT(8)
+ blr
diff --git a/arch/powerpc/crypto/aes-spe-regs.h b/arch/powerpc/crypto/aes-spe-regs.h
new file mode 100644
index 000000000000..2eb4c9b94152
--- /dev/null
+++ b/arch/powerpc/crypto/aes-spe-regs.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Common registers for PPC AES implementation
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ */
+
+#define rKS r0 /* copy of en-/decryption key pointer */
+#define rDP r3 /* destination pointer */
+#define rSP r4 /* source pointer */
+#define rKP r5 /* pointer to en-/decryption key pointer */
+#define rRR r6 /* en-/decryption rounds */
+#define rLN r7 /* length of data to be processed */
+#define rIP r8 /* potiner to IV (CBC/CTR/XTS modes) */
+#define rKT r9 /* pointer to tweak key (XTS mode) */
+#define rT0 r11 /* pointers to en-/decryption tables */
+#define rT1 r10
+#define rD0 r9 /* data */
+#define rD1 r14
+#define rD2 r12
+#define rD3 r15
+#define rW0 r16 /* working registers */
+#define rW1 r17
+#define rW2 r18
+#define rW3 r19
+#define rW4 r20
+#define rW5 r21
+#define rW6 r22
+#define rW7 r23
+#define rI0 r24 /* IV */
+#define rI1 r25
+#define rI2 r26
+#define rI3 r27
+#define rG0 r28 /* endian reversed tweak (XTS mode) */
+#define rG1 r29
+#define rG2 r30
+#define rG3 r31
diff --git a/arch/powerpc/crypto/aes-tab-4k.S b/arch/powerpc/crypto/aes-tab-4k.S
new file mode 100644
index 000000000000..ceb604bc6f72
--- /dev/null
+++ b/arch/powerpc/crypto/aes-tab-4k.S
@@ -0,0 +1,326 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * 4K AES tables for PPC AES implementation
+ *
+ * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
+ */
+
+/*
+ * These big endian AES encryption/decryption tables have been taken from
+ * crypto/aes_generic.c and are designed to be simply accessed by a combination
+ * of rlwimi/lwz instructions with a minimum of table registers (usually only
+ * one required). Thus they are aligned to 4K. The locality of rotated values
+ * is derived from the reduced offsets that are available in the SPE load
+ * instructions. E.g. evldw, evlwwsplat, ...
+ *
+ * For the safety-conscious it has to be noted that they might be vulnerable
+ * to cache timing attacks because of their size. Nevertheless in contrast to
+ * the generic tables they have been reduced from 16KB to 8KB + 256 bytes.
+ * This is a quite good tradeoff for low power devices (e.g. routers) without
+ * dedicated encryption hardware where we usually have no multiuser
+ * environment.
+ *
+ */
+
+#define R(a, b, c, d) \
+ 0x##a##b##c##d, 0x##d##a##b##c, 0x##c##d##a##b, 0x##b##c##d##a
+
+.data
+.align 12
+.globl PPC_AES_4K_ENCTAB
+PPC_AES_4K_ENCTAB:
+/* encryption table, same as crypto_ft_tab in crypto/aes-generic.c */
+ .long R(c6, 63, 63, a5), R(f8, 7c, 7c, 84)
+ .long R(ee, 77, 77, 99), R(f6, 7b, 7b, 8d)
+ .long R(ff, f2, f2, 0d), R(d6, 6b, 6b, bd)
+ .long R(de, 6f, 6f, b1), R(91, c5, c5, 54)
+ .long R(60, 30, 30, 50), R(02, 01, 01, 03)
+ .long R(ce, 67, 67, a9), R(56, 2b, 2b, 7d)
+ .long R(e7, fe, fe, 19), R(b5, d7, d7, 62)
+ .long R(4d, ab, ab, e6), R(ec, 76, 76, 9a)
+ .long R(8f, ca, ca, 45), R(1f, 82, 82, 9d)
+ .long R(89, c9, c9, 40), R(fa, 7d, 7d, 87)
+ .long R(ef, fa, fa, 15), R(b2, 59, 59, eb)
+ .long R(8e, 47, 47, c9), R(fb, f0, f0, 0b)
+ .long R(41, ad, ad, ec), R(b3, d4, d4, 67)
+ .long R(5f, a2, a2, fd), R(45, af, af, ea)
+ .long R(23, 9c, 9c, bf), R(53, a4, a4, f7)
+ .long R(e4, 72, 72, 96), R(9b, c0, c0, 5b)
+ .long R(75, b7, b7, c2), R(e1, fd, fd, 1c)
+ .long R(3d, 93, 93, ae), R(4c, 26, 26, 6a)
+ .long R(6c, 36, 36, 5a), R(7e, 3f, 3f, 41)
+ .long R(f5, f7, f7, 02), R(83, cc, cc, 4f)
+ .long R(68, 34, 34, 5c), R(51, a5, a5, f4)
+ .long R(d1, e5, e5, 34), R(f9, f1, f1, 08)
+ .long R(e2, 71, 71, 93), R(ab, d8, d8, 73)
+ .long R(62, 31, 31, 53), R(2a, 15, 15, 3f)
+ .long R(08, 04, 04, 0c), R(95, c7, c7, 52)
+ .long R(46, 23, 23, 65), R(9d, c3, c3, 5e)
+ .long R(30, 18, 18, 28), R(37, 96, 96, a1)
+ .long R(0a, 05, 05, 0f), R(2f, 9a, 9a, b5)
+ .long R(0e, 07, 07, 09), R(24, 12, 12, 36)
+ .long R(1b, 80, 80, 9b), R(df, e2, e2, 3d)
+ .long R(cd, eb, eb, 26), R(4e, 27, 27, 69)
+ .long R(7f, b2, b2, cd), R(ea, 75, 75, 9f)
+ .long R(12, 09, 09, 1b), R(1d, 83, 83, 9e)
+ .long R(58, 2c, 2c, 74), R(34, 1a, 1a, 2e)
+ .long R(36, 1b, 1b, 2d), R(dc, 6e, 6e, b2)
+ .long R(b4, 5a, 5a, ee), R(5b, a0, a0, fb)
+ .long R(a4, 52, 52, f6), R(76, 3b, 3b, 4d)
+ .long R(b7, d6, d6, 61), R(7d, b3, b3, ce)
+ .long R(52, 29, 29, 7b), R(dd, e3, e3, 3e)
+ .long R(5e, 2f, 2f, 71), R(13, 84, 84, 97)
+ .long R(a6, 53, 53, f5), R(b9, d1, d1, 68)
+ .long R(00, 00, 00, 00), R(c1, ed, ed, 2c)
+ .long R(40, 20, 20, 60), R(e3, fc, fc, 1f)
+ .long R(79, b1, b1, c8), R(b6, 5b, 5b, ed)
+ .long R(d4, 6a, 6a, be), R(8d, cb, cb, 46)
+ .long R(67, be, be, d9), R(72, 39, 39, 4b)
+ .long R(94, 4a, 4a, de), R(98, 4c, 4c, d4)
+ .long R(b0, 58, 58, e8), R(85, cf, cf, 4a)
+ .long R(bb, d0, d0, 6b), R(c5, ef, ef, 2a)
+ .long R(4f, aa, aa, e5), R(ed, fb, fb, 16)
+ .long R(86, 43, 43, c5), R(9a, 4d, 4d, d7)
+ .long R(66, 33, 33, 55), R(11, 85, 85, 94)
+ .long R(8a, 45, 45, cf), R(e9, f9, f9, 10)
+ .long R(04, 02, 02, 06), R(fe, 7f, 7f, 81)
+ .long R(a0, 50, 50, f0), R(78, 3c, 3c, 44)
+ .long R(25, 9f, 9f, ba), R(4b, a8, a8, e3)
+ .long R(a2, 51, 51, f3), R(5d, a3, a3, fe)
+ .long R(80, 40, 40, c0), R(05, 8f, 8f, 8a)
+ .long R(3f, 92, 92, ad), R(21, 9d, 9d, bc)
+ .long R(70, 38, 38, 48), R(f1, f5, f5, 04)
+ .long R(63, bc, bc, df), R(77, b6, b6, c1)
+ .long R(af, da, da, 75), R(42, 21, 21, 63)
+ .long R(20, 10, 10, 30), R(e5, ff, ff, 1a)
+ .long R(fd, f3, f3, 0e), R(bf, d2, d2, 6d)
+ .long R(81, cd, cd, 4c), R(18, 0c, 0c, 14)
+ .long R(26, 13, 13, 35), R(c3, ec, ec, 2f)
+ .long R(be, 5f, 5f, e1), R(35, 97, 97, a2)
+ .long R(88, 44, 44, cc), R(2e, 17, 17, 39)
+ .long R(93, c4, c4, 57), R(55, a7, a7, f2)
+ .long R(fc, 7e, 7e, 82), R(7a, 3d, 3d, 47)
+ .long R(c8, 64, 64, ac), R(ba, 5d, 5d, e7)
+ .long R(32, 19, 19, 2b), R(e6, 73, 73, 95)
+ .long R(c0, 60, 60, a0), R(19, 81, 81, 98)
+ .long R(9e, 4f, 4f, d1), R(a3, dc, dc, 7f)
+ .long R(44, 22, 22, 66), R(54, 2a, 2a, 7e)
+ .long R(3b, 90, 90, ab), R(0b, 88, 88, 83)
+ .long R(8c, 46, 46, ca), R(c7, ee, ee, 29)
+ .long R(6b, b8, b8, d3), R(28, 14, 14, 3c)
+ .long R(a7, de, de, 79), R(bc, 5e, 5e, e2)
+ .long R(16, 0b, 0b, 1d), R(ad, db, db, 76)
+ .long R(db, e0, e0, 3b), R(64, 32, 32, 56)
+ .long R(74, 3a, 3a, 4e), R(14, 0a, 0a, 1e)
+ .long R(92, 49, 49, db), R(0c, 06, 06, 0a)
+ .long R(48, 24, 24, 6c), R(b8, 5c, 5c, e4)
+ .long R(9f, c2, c2, 5d), R(bd, d3, d3, 6e)
+ .long R(43, ac, ac, ef), R(c4, 62, 62, a6)
+ .long R(39, 91, 91, a8), R(31, 95, 95, a4)
+ .long R(d3, e4, e4, 37), R(f2, 79, 79, 8b)
+ .long R(d5, e7, e7, 32), R(8b, c8, c8, 43)
+ .long R(6e, 37, 37, 59), R(da, 6d, 6d, b7)
+ .long R(01, 8d, 8d, 8c), R(b1, d5, d5, 64)
+ .long R(9c, 4e, 4e, d2), R(49, a9, a9, e0)
+ .long R(d8, 6c, 6c, b4), R(ac, 56, 56, fa)
+ .long R(f3, f4, f4, 07), R(cf, ea, ea, 25)
+ .long R(ca, 65, 65, af), R(f4, 7a, 7a, 8e)
+ .long R(47, ae, ae, e9), R(10, 08, 08, 18)
+ .long R(6f, ba, ba, d5), R(f0, 78, 78, 88)
+ .long R(4a, 25, 25, 6f), R(5c, 2e, 2e, 72)
+ .long R(38, 1c, 1c, 24), R(57, a6, a6, f1)
+ .long R(73, b4, b4, c7), R(97, c6, c6, 51)
+ .long R(cb, e8, e8, 23), R(a1, dd, dd, 7c)
+ .long R(e8, 74, 74, 9c), R(3e, 1f, 1f, 21)
+ .long R(96, 4b, 4b, dd), R(61, bd, bd, dc)
+ .long R(0d, 8b, 8b, 86), R(0f, 8a, 8a, 85)
+ .long R(e0, 70, 70, 90), R(7c, 3e, 3e, 42)
+ .long R(71, b5, b5, c4), R(cc, 66, 66, aa)
+ .long R(90, 48, 48, d8), R(06, 03, 03, 05)
+ .long R(f7, f6, f6, 01), R(1c, 0e, 0e, 12)
+ .long R(c2, 61, 61, a3), R(6a, 35, 35, 5f)
+ .long R(ae, 57, 57, f9), R(69, b9, b9, d0)
+ .long R(17, 86, 86, 91), R(99, c1, c1, 58)
+ .long R(3a, 1d, 1d, 27), R(27, 9e, 9e, b9)
+ .long R(d9, e1, e1, 38), R(eb, f8, f8, 13)
+ .long R(2b, 98, 98, b3), R(22, 11, 11, 33)
+ .long R(d2, 69, 69, bb), R(a9, d9, d9, 70)
+ .long R(07, 8e, 8e, 89), R(33, 94, 94, a7)
+ .long R(2d, 9b, 9b, b6), R(3c, 1e, 1e, 22)
+ .long R(15, 87, 87, 92), R(c9, e9, e9, 20)
+ .long R(87, ce, ce, 49), R(aa, 55, 55, ff)
+ .long R(50, 28, 28, 78), R(a5, df, df, 7a)
+ .long R(03, 8c, 8c, 8f), R(59, a1, a1, f8)
+ .long R(09, 89, 89, 80), R(1a, 0d, 0d, 17)
+ .long R(65, bf, bf, da), R(d7, e6, e6, 31)
+ .long R(84, 42, 42, c6), R(d0, 68, 68, b8)
+ .long R(82, 41, 41, c3), R(29, 99, 99, b0)
+ .long R(5a, 2d, 2d, 77), R(1e, 0f, 0f, 11)
+ .long R(7b, b0, b0, cb), R(a8, 54, 54, fc)
+ .long R(6d, bb, bb, d6), R(2c, 16, 16, 3a)
+.globl PPC_AES_4K_DECTAB
+PPC_AES_4K_DECTAB:
+/* decryption table, same as crypto_it_tab in crypto/aes-generic.c */
+ .long R(51, f4, a7, 50), R(7e, 41, 65, 53)
+ .long R(1a, 17, a4, c3), R(3a, 27, 5e, 96)
+ .long R(3b, ab, 6b, cb), R(1f, 9d, 45, f1)
+ .long R(ac, fa, 58, ab), R(4b, e3, 03, 93)
+ .long R(20, 30, fa, 55), R(ad, 76, 6d, f6)
+ .long R(88, cc, 76, 91), R(f5, 02, 4c, 25)
+ .long R(4f, e5, d7, fc), R(c5, 2a, cb, d7)
+ .long R(26, 35, 44, 80), R(b5, 62, a3, 8f)
+ .long R(de, b1, 5a, 49), R(25, ba, 1b, 67)
+ .long R(45, ea, 0e, 98), R(5d, fe, c0, e1)
+ .long R(c3, 2f, 75, 02), R(81, 4c, f0, 12)
+ .long R(8d, 46, 97, a3), R(6b, d3, f9, c6)
+ .long R(03, 8f, 5f, e7), R(15, 92, 9c, 95)
+ .long R(bf, 6d, 7a, eb), R(95, 52, 59, da)
+ .long R(d4, be, 83, 2d), R(58, 74, 21, d3)
+ .long R(49, e0, 69, 29), R(8e, c9, c8, 44)
+ .long R(75, c2, 89, 6a), R(f4, 8e, 79, 78)
+ .long R(99, 58, 3e, 6b), R(27, b9, 71, dd)
+ .long R(be, e1, 4f, b6), R(f0, 88, ad, 17)
+ .long R(c9, 20, ac, 66), R(7d, ce, 3a, b4)
+ .long R(63, df, 4a, 18), R(e5, 1a, 31, 82)
+ .long R(97, 51, 33, 60), R(62, 53, 7f, 45)
+ .long R(b1, 64, 77, e0), R(bb, 6b, ae, 84)
+ .long R(fe, 81, a0, 1c), R(f9, 08, 2b, 94)
+ .long R(70, 48, 68, 58), R(8f, 45, fd, 19)
+ .long R(94, de, 6c, 87), R(52, 7b, f8, b7)
+ .long R(ab, 73, d3, 23), R(72, 4b, 02, e2)
+ .long R(e3, 1f, 8f, 57), R(66, 55, ab, 2a)
+ .long R(b2, eb, 28, 07), R(2f, b5, c2, 03)
+ .long R(86, c5, 7b, 9a), R(d3, 37, 08, a5)
+ .long R(30, 28, 87, f2), R(23, bf, a5, b2)
+ .long R(02, 03, 6a, ba), R(ed, 16, 82, 5c)
+ .long R(8a, cf, 1c, 2b), R(a7, 79, b4, 92)
+ .long R(f3, 07, f2, f0), R(4e, 69, e2, a1)
+ .long R(65, da, f4, cd), R(06, 05, be, d5)
+ .long R(d1, 34, 62, 1f), R(c4, a6, fe, 8a)
+ .long R(34, 2e, 53, 9d), R(a2, f3, 55, a0)
+ .long R(05, 8a, e1, 32), R(a4, f6, eb, 75)
+ .long R(0b, 83, ec, 39), R(40, 60, ef, aa)
+ .long R(5e, 71, 9f, 06), R(bd, 6e, 10, 51)
+ .long R(3e, 21, 8a, f9), R(96, dd, 06, 3d)
+ .long R(dd, 3e, 05, ae), R(4d, e6, bd, 46)
+ .long R(91, 54, 8d, b5), R(71, c4, 5d, 05)
+ .long R(04, 06, d4, 6f), R(60, 50, 15, ff)
+ .long R(19, 98, fb, 24), R(d6, bd, e9, 97)
+ .long R(89, 40, 43, cc), R(67, d9, 9e, 77)
+ .long R(b0, e8, 42, bd), R(07, 89, 8b, 88)
+ .long R(e7, 19, 5b, 38), R(79, c8, ee, db)
+ .long R(a1, 7c, 0a, 47), R(7c, 42, 0f, e9)
+ .long R(f8, 84, 1e, c9), R(00, 00, 00, 00)
+ .long R(09, 80, 86, 83), R(32, 2b, ed, 48)
+ .long R(1e, 11, 70, ac), R(6c, 5a, 72, 4e)
+ .long R(fd, 0e, ff, fb), R(0f, 85, 38, 56)
+ .long R(3d, ae, d5, 1e), R(36, 2d, 39, 27)
+ .long R(0a, 0f, d9, 64), R(68, 5c, a6, 21)
+ .long R(9b, 5b, 54, d1), R(24, 36, 2e, 3a)
+ .long R(0c, 0a, 67, b1), R(93, 57, e7, 0f)
+ .long R(b4, ee, 96, d2), R(1b, 9b, 91, 9e)
+ .long R(80, c0, c5, 4f), R(61, dc, 20, a2)
+ .long R(5a, 77, 4b, 69), R(1c, 12, 1a, 16)
+ .long R(e2, 93, ba, 0a), R(c0, a0, 2a, e5)
+ .long R(3c, 22, e0, 43), R(12, 1b, 17, 1d)
+ .long R(0e, 09, 0d, 0b), R(f2, 8b, c7, ad)
+ .long R(2d, b6, a8, b9), R(14, 1e, a9, c8)
+ .long R(57, f1, 19, 85), R(af, 75, 07, 4c)
+ .long R(ee, 99, dd, bb), R(a3, 7f, 60, fd)
+ .long R(f7, 01, 26, 9f), R(5c, 72, f5, bc)
+ .long R(44, 66, 3b, c5), R(5b, fb, 7e, 34)
+ .long R(8b, 43, 29, 76), R(cb, 23, c6, dc)
+ .long R(b6, ed, fc, 68), R(b8, e4, f1, 63)
+ .long R(d7, 31, dc, ca), R(42, 63, 85, 10)
+ .long R(13, 97, 22, 40), R(84, c6, 11, 20)
+ .long R(85, 4a, 24, 7d), R(d2, bb, 3d, f8)
+ .long R(ae, f9, 32, 11), R(c7, 29, a1, 6d)
+ .long R(1d, 9e, 2f, 4b), R(dc, b2, 30, f3)
+ .long R(0d, 86, 52, ec), R(77, c1, e3, d0)
+ .long R(2b, b3, 16, 6c), R(a9, 70, b9, 99)
+ .long R(11, 94, 48, fa), R(47, e9, 64, 22)
+ .long R(a8, fc, 8c, c4), R(a0, f0, 3f, 1a)
+ .long R(56, 7d, 2c, d8), R(22, 33, 90, ef)
+ .long R(87, 49, 4e, c7), R(d9, 38, d1, c1)
+ .long R(8c, ca, a2, fe), R(98, d4, 0b, 36)
+ .long R(a6, f5, 81, cf), R(a5, 7a, de, 28)
+ .long R(da, b7, 8e, 26), R(3f, ad, bf, a4)
+ .long R(2c, 3a, 9d, e4), R(50, 78, 92, 0d)
+ .long R(6a, 5f, cc, 9b), R(54, 7e, 46, 62)
+ .long R(f6, 8d, 13, c2), R(90, d8, b8, e8)
+ .long R(2e, 39, f7, 5e), R(82, c3, af, f5)
+ .long R(9f, 5d, 80, be), R(69, d0, 93, 7c)
+ .long R(6f, d5, 2d, a9), R(cf, 25, 12, b3)
+ .long R(c8, ac, 99, 3b), R(10, 18, 7d, a7)
+ .long R(e8, 9c, 63, 6e), R(db, 3b, bb, 7b)
+ .long R(cd, 26, 78, 09), R(6e, 59, 18, f4)
+ .long R(ec, 9a, b7, 01), R(83, 4f, 9a, a8)
+ .long R(e6, 95, 6e, 65), R(aa, ff, e6, 7e)
+ .long R(21, bc, cf, 08), R(ef, 15, e8, e6)
+ .long R(ba, e7, 9b, d9), R(4a, 6f, 36, ce)
+ .long R(ea, 9f, 09, d4), R(29, b0, 7c, d6)
+ .long R(31, a4, b2, af), R(2a, 3f, 23, 31)
+ .long R(c6, a5, 94, 30), R(35, a2, 66, c0)
+ .long R(74, 4e, bc, 37), R(fc, 82, ca, a6)
+ .long R(e0, 90, d0, b0), R(33, a7, d8, 15)
+ .long R(f1, 04, 98, 4a), R(41, ec, da, f7)
+ .long R(7f, cd, 50, 0e), R(17, 91, f6, 2f)
+ .long R(76, 4d, d6, 8d), R(43, ef, b0, 4d)
+ .long R(cc, aa, 4d, 54), R(e4, 96, 04, df)
+ .long R(9e, d1, b5, e3), R(4c, 6a, 88, 1b)
+ .long R(c1, 2c, 1f, b8), R(46, 65, 51, 7f)
+ .long R(9d, 5e, ea, 04), R(01, 8c, 35, 5d)
+ .long R(fa, 87, 74, 73), R(fb, 0b, 41, 2e)
+ .long R(b3, 67, 1d, 5a), R(92, db, d2, 52)
+ .long R(e9, 10, 56, 33), R(6d, d6, 47, 13)
+ .long R(9a, d7, 61, 8c), R(37, a1, 0c, 7a)
+ .long R(59, f8, 14, 8e), R(eb, 13, 3c, 89)
+ .long R(ce, a9, 27, ee), R(b7, 61, c9, 35)
+ .long R(e1, 1c, e5, ed), R(7a, 47, b1, 3c)
+ .long R(9c, d2, df, 59), R(55, f2, 73, 3f)
+ .long R(18, 14, ce, 79), R(73, c7, 37, bf)
+ .long R(53, f7, cd, ea), R(5f, fd, aa, 5b)
+ .long R(df, 3d, 6f, 14), R(78, 44, db, 86)
+ .long R(ca, af, f3, 81), R(b9, 68, c4, 3e)
+ .long R(38, 24, 34, 2c), R(c2, a3, 40, 5f)
+ .long R(16, 1d, c3, 72), R(bc, e2, 25, 0c)
+ .long R(28, 3c, 49, 8b), R(ff, 0d, 95, 41)
+ .long R(39, a8, 01, 71), R(08, 0c, b3, de)
+ .long R(d8, b4, e4, 9c), R(64, 56, c1, 90)
+ .long R(7b, cb, 84, 61), R(d5, 32, b6, 70)
+ .long R(48, 6c, 5c, 74), R(d0, b8, 57, 42)
+.globl PPC_AES_4K_DECTAB2
+PPC_AES_4K_DECTAB2:
+/* decryption table, same as crypto_il_tab in crypto/aes-generic.c */
+ .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
+ .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
+ .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
+ .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
+ .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
+ .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
+ .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
+ .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
+ .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
+ .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
+ .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
+ .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
+ .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
+ .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
+ .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
+ .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
+ .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
+ .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
+ .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
+ .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
+ .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
+ .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
+ .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
+ .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
+ .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
+ .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
+ .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
+ .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
+ .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
+ .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
+ .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
+ .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
diff --git a/arch/powerpc/crypto/aes.c b/arch/powerpc/crypto/aes.c
new file mode 100644
index 000000000000..3f1e5e894902
--- /dev/null
+++ b/arch/powerpc/crypto/aes.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AES routines supporting VMX instructions on the Power 8
+ *
+ * Copyright (C) 2015 International Business Machines Inc.
+ *
+ * Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com>
+ */
+
+#include <asm/simd.h>
+#include <asm/switch_to.h>
+#include <crypto/aes.h>
+#include <crypto/internal/cipher.h>
+#include <crypto/internal/simd.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/uaccess.h>
+
+#include "aesp8-ppc.h"
+
+struct p8_aes_ctx {
+ struct crypto_cipher *fallback;
+ struct aes_key enc_key;
+ struct aes_key dec_key;
+};
+
+static int p8_aes_init(struct crypto_tfm *tfm)
+{
+ const char *alg = crypto_tfm_alg_name(tfm);
+ struct crypto_cipher *fallback;
+ struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ fallback = crypto_alloc_cipher(alg, 0, CRYPTO_ALG_NEED_FALLBACK);
+ if (IS_ERR(fallback)) {
+ printk(KERN_ERR
+ "Failed to allocate transformation for '%s': %ld\n",
+ alg, PTR_ERR(fallback));
+ return PTR_ERR(fallback);
+ }
+
+ crypto_cipher_set_flags(fallback,
+ crypto_cipher_get_flags((struct
+ crypto_cipher *)
+ tfm));
+ ctx->fallback = fallback;
+
+ return 0;
+}
+
+static void p8_aes_exit(struct crypto_tfm *tfm)
+{
+ struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ if (ctx->fallback) {
+ crypto_free_cipher(ctx->fallback);
+ ctx->fallback = NULL;
+ }
+}
+
+static int p8_aes_setkey(struct crypto_tfm *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ int ret;
+ struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ preempt_disable();
+ pagefault_disable();
+ enable_kernel_vsx();
+ ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key);
+ ret |= aes_p8_set_decrypt_key(key, keylen * 8, &ctx->dec_key);
+ disable_kernel_vsx();
+ pagefault_enable();
+ preempt_enable();
+
+ ret |= crypto_cipher_setkey(ctx->fallback, key, keylen);
+
+ return ret ? -EINVAL : 0;
+}
+
+static void p8_aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+ struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ if (!crypto_simd_usable()) {
+ crypto_cipher_encrypt_one(ctx->fallback, dst, src);
+ } else {
+ preempt_disable();
+ pagefault_disable();
+ enable_kernel_vsx();
+ aes_p8_encrypt(src, dst, &ctx->enc_key);
+ disable_kernel_vsx();
+ pagefault_enable();
+ preempt_enable();
+ }
+}
+
+static void p8_aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+ struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ if (!crypto_simd_usable()) {
+ crypto_cipher_decrypt_one(ctx->fallback, dst, src);
+ } else {
+ preempt_disable();
+ pagefault_disable();
+ enable_kernel_vsx();
+ aes_p8_decrypt(src, dst, &ctx->dec_key);
+ disable_kernel_vsx();
+ pagefault_enable();
+ preempt_enable();
+ }
+}
+
+struct crypto_alg p8_aes_alg = {
+ .cra_name = "aes",
+ .cra_driver_name = "p8_aes",
+ .cra_module = THIS_MODULE,
+ .cra_priority = 1000,
+ .cra_type = NULL,
+ .cra_flags = CRYPTO_ALG_TYPE_CIPHER | CRYPTO_ALG_NEED_FALLBACK,
+ .cra_alignmask = 0,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct p8_aes_ctx),
+ .cra_init = p8_aes_init,
+ .cra_exit = p8_aes_exit,
+ .cra_cipher = {
+ .cia_min_keysize = AES_MIN_KEY_SIZE,
+ .cia_max_keysize = AES_MAX_KEY_SIZE,
+ .cia_setkey = p8_aes_setkey,
+ .cia_encrypt = p8_aes_encrypt,
+ .cia_decrypt = p8_aes_decrypt,
+ },
+};
diff --git a/arch/powerpc/crypto/aes_cbc.c b/arch/powerpc/crypto/aes_cbc.c
new file mode 100644
index 000000000000..5f2a4f375eef
--- /dev/null
+++ b/arch/powerpc/crypto/aes_cbc.c
@@ -0,0 +1,137 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AES CBC routines supporting VMX instructions on the Power 8
+ *
+ * Copyright (C) 2015 International Business Machines Inc.
+ *
+ * Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com>
+ */
+
+#include <asm/simd.h>
+#include <asm/switch_to.h>
+#include <crypto/aes.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/uaccess.h>
+
+#include "aesp8-ppc.h"
+
+struct p8_aes_cbc_ctx {
+ struct crypto_skcipher *fallback;
+ struct aes_key enc_key;
+ struct aes_key dec_key;
+};
+
+static int p8_aes_cbc_init(struct crypto_skcipher *tfm)
+{
+ struct p8_aes_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct crypto_skcipher *fallback;
+
+ fallback = crypto_alloc_skcipher("cbc(aes)", 0,
+ CRYPTO_ALG_NEED_FALLBACK |
+ CRYPTO_ALG_ASYNC);
+ if (IS_ERR(fallback)) {
+ pr_err("Failed to allocate cbc(aes) fallback: %ld\n",
+ PTR_ERR(fallback));
+ return PTR_ERR(fallback);
+ }
+
+ crypto_skcipher_set_reqsize(tfm, sizeof(struct skcipher_request) +
+ crypto_skcipher_reqsize(fallback));
+ ctx->fallback = fallback;
+ return 0;
+}
+
+static void p8_aes_cbc_exit(struct crypto_skcipher *tfm)
+{
+ struct p8_aes_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ crypto_free_skcipher(ctx->fallback);
+}
+
+static int p8_aes_cbc_setkey(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ struct p8_aes_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
+ int ret;
+
+ preempt_disable();
+ pagefault_disable();
+ enable_kernel_vsx();
+ ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key);
+ ret |= aes_p8_set_decrypt_key(key, keylen * 8, &ctx->dec_key);
+ disable_kernel_vsx();
+ pagefault_enable();
+ preempt_enable();
+
+ ret |= crypto_skcipher_setkey(ctx->fallback, key, keylen);
+
+ return ret ? -EINVAL : 0;
+}
+
+static int p8_aes_cbc_crypt(struct skcipher_request *req, int enc)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ const struct p8_aes_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int ret;
+
+ if (!crypto_simd_usable()) {
+ struct skcipher_request *subreq = skcipher_request_ctx(req);
+
+ *subreq = *req;
+ skcipher_request_set_tfm(subreq, ctx->fallback);
+ return enc ? crypto_skcipher_encrypt(subreq) :
+ crypto_skcipher_decrypt(subreq);
+ }
+
+ ret = skcipher_walk_virt(&walk, req, false);
+ while ((nbytes = walk.nbytes) != 0) {
+ preempt_disable();
+ pagefault_disable();
+ enable_kernel_vsx();
+ aes_p8_cbc_encrypt(walk.src.virt.addr,
+ walk.dst.virt.addr,
+ round_down(nbytes, AES_BLOCK_SIZE),
+ enc ? &ctx->enc_key : &ctx->dec_key,
+ walk.iv, enc);
+ disable_kernel_vsx();
+ pagefault_enable();
+ preempt_enable();
+
+ ret = skcipher_walk_done(&walk, nbytes % AES_BLOCK_SIZE);
+ }
+ return ret;
+}
+
+static int p8_aes_cbc_encrypt(struct skcipher_request *req)
+{
+ return p8_aes_cbc_crypt(req, 1);
+}
+
+static int p8_aes_cbc_decrypt(struct skcipher_request *req)
+{
+ return p8_aes_cbc_crypt(req, 0);
+}
+
+struct skcipher_alg p8_aes_cbc_alg = {
+ .base.cra_name = "cbc(aes)",
+ .base.cra_driver_name = "p8_aes_cbc",
+ .base.cra_module = THIS_MODULE,
+ .base.cra_priority = 2000,
+ .base.cra_flags = CRYPTO_ALG_NEED_FALLBACK,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct p8_aes_cbc_ctx),
+ .setkey = p8_aes_cbc_setkey,
+ .encrypt = p8_aes_cbc_encrypt,
+ .decrypt = p8_aes_cbc_decrypt,
+ .init = p8_aes_cbc_init,
+ .exit = p8_aes_cbc_exit,
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+};
diff --git a/arch/powerpc/crypto/aes_ctr.c b/arch/powerpc/crypto/aes_ctr.c
new file mode 100644
index 000000000000..e27c4036e711
--- /dev/null
+++ b/arch/powerpc/crypto/aes_ctr.c
@@ -0,0 +1,153 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AES CTR routines supporting VMX instructions on the Power 8
+ *
+ * Copyright (C) 2015 International Business Machines Inc.
+ *
+ * Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com>
+ */
+
+#include <asm/simd.h>
+#include <asm/switch_to.h>
+#include <crypto/aes.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/uaccess.h>
+
+#include "aesp8-ppc.h"
+
+struct p8_aes_ctr_ctx {
+ struct crypto_skcipher *fallback;
+ struct aes_key enc_key;
+};
+
+static int p8_aes_ctr_init(struct crypto_skcipher *tfm)
+{
+ struct p8_aes_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct crypto_skcipher *fallback;
+
+ fallback = crypto_alloc_skcipher("ctr(aes)", 0,
+ CRYPTO_ALG_NEED_FALLBACK |
+ CRYPTO_ALG_ASYNC);
+ if (IS_ERR(fallback)) {
+ pr_err("Failed to allocate ctr(aes) fallback: %ld\n",
+ PTR_ERR(fallback));
+ return PTR_ERR(fallback);
+ }
+
+ crypto_skcipher_set_reqsize(tfm, sizeof(struct skcipher_request) +
+ crypto_skcipher_reqsize(fallback));
+ ctx->fallback = fallback;
+ return 0;
+}
+
+static void p8_aes_ctr_exit(struct crypto_skcipher *tfm)
+{
+ struct p8_aes_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ crypto_free_skcipher(ctx->fallback);
+}
+
+static int p8_aes_ctr_setkey(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ struct p8_aes_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
+ int ret;
+
+ preempt_disable();
+ pagefault_disable();
+ enable_kernel_vsx();
+ ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key);
+ disable_kernel_vsx();
+ pagefault_enable();
+ preempt_enable();
+
+ ret |= crypto_skcipher_setkey(ctx->fallback, key, keylen);
+
+ return ret ? -EINVAL : 0;
+}
+
+static void p8_aes_ctr_final(const struct p8_aes_ctr_ctx *ctx,
+ struct skcipher_walk *walk)
+{
+ const u8 *src = walk->src.virt.addr;
+ u8 *ctrblk = walk->iv;
+ u8 keystream[AES_BLOCK_SIZE];
+ u8 *dst = walk->dst.virt.addr;
+ unsigned int nbytes = walk->nbytes;
+
+ preempt_disable();
+ pagefault_disable();
+ enable_kernel_vsx();
+ aes_p8_encrypt(ctrblk, keystream, &ctx->enc_key);
+ disable_kernel_vsx();
+ pagefault_enable();
+ preempt_enable();
+
+ crypto_xor_cpy(dst, keystream, src, nbytes);
+ crypto_inc(ctrblk, AES_BLOCK_SIZE);
+}
+
+static int p8_aes_ctr_crypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ const struct p8_aes_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ int ret;
+
+ if (!crypto_simd_usable()) {
+ struct skcipher_request *subreq = skcipher_request_ctx(req);
+
+ *subreq = *req;
+ skcipher_request_set_tfm(subreq, ctx->fallback);
+ return crypto_skcipher_encrypt(subreq);
+ }
+
+ ret = skcipher_walk_virt(&walk, req, false);
+ while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
+ preempt_disable();
+ pagefault_disable();
+ enable_kernel_vsx();
+ aes_p8_ctr32_encrypt_blocks(walk.src.virt.addr,
+ walk.dst.virt.addr,
+ nbytes / AES_BLOCK_SIZE,
+ &ctx->enc_key, walk.iv);
+ disable_kernel_vsx();
+ pagefault_enable();
+ preempt_enable();
+
+ do {
+ crypto_inc(walk.iv, AES_BLOCK_SIZE);
+ } while ((nbytes -= AES_BLOCK_SIZE) >= AES_BLOCK_SIZE);
+
+ ret = skcipher_walk_done(&walk, nbytes);
+ }
+ if (nbytes) {
+ p8_aes_ctr_final(ctx, &walk);
+ ret = skcipher_walk_done(&walk, 0);
+ }
+ return ret;
+}
+
+struct skcipher_alg p8_aes_ctr_alg = {
+ .base.cra_name = "ctr(aes)",
+ .base.cra_driver_name = "p8_aes_ctr",
+ .base.cra_module = THIS_MODULE,
+ .base.cra_priority = 2000,
+ .base.cra_flags = CRYPTO_ALG_NEED_FALLBACK,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct p8_aes_ctr_ctx),
+ .setkey = p8_aes_ctr_setkey,
+ .encrypt = p8_aes_ctr_crypt,
+ .decrypt = p8_aes_ctr_crypt,
+ .init = p8_aes_ctr_init,
+ .exit = p8_aes_ctr_exit,
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .chunksize = AES_BLOCK_SIZE,
+};
diff --git a/arch/powerpc/crypto/aes_xts.c b/arch/powerpc/crypto/aes_xts.c
new file mode 100644
index 000000000000..9440e771cede
--- /dev/null
+++ b/arch/powerpc/crypto/aes_xts.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AES XTS routines supporting VMX In-core instructions on Power 8
+ *
+ * Copyright (C) 2015 International Business Machines Inc.
+ *
+ * Author: Leonidas S. Barbosa <leosilva@linux.vnet.ibm.com>
+ */
+
+#include <asm/simd.h>
+#include <asm/switch_to.h>
+#include <crypto/aes.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/xts.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/uaccess.h>
+
+#include "aesp8-ppc.h"
+
+struct p8_aes_xts_ctx {
+ struct crypto_skcipher *fallback;
+ struct aes_key enc_key;
+ struct aes_key dec_key;
+ struct aes_key tweak_key;
+};
+
+static int p8_aes_xts_init(struct crypto_skcipher *tfm)
+{
+ struct p8_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct crypto_skcipher *fallback;
+
+ fallback = crypto_alloc_skcipher("xts(aes)", 0,
+ CRYPTO_ALG_NEED_FALLBACK |
+ CRYPTO_ALG_ASYNC);
+ if (IS_ERR(fallback)) {
+ pr_err("Failed to allocate xts(aes) fallback: %ld\n",
+ PTR_ERR(fallback));
+ return PTR_ERR(fallback);
+ }
+
+ crypto_skcipher_set_reqsize(tfm, sizeof(struct skcipher_request) +
+ crypto_skcipher_reqsize(fallback));
+ ctx->fallback = fallback;
+ return 0;
+}
+
+static void p8_aes_xts_exit(struct crypto_skcipher *tfm)
+{
+ struct p8_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ crypto_free_skcipher(ctx->fallback);
+}
+
+static int p8_aes_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ struct p8_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ int ret;
+
+ ret = xts_verify_key(tfm, key, keylen);
+ if (ret)
+ return ret;
+
+ preempt_disable();
+ pagefault_disable();
+ enable_kernel_vsx();
+ ret = aes_p8_set_encrypt_key(key + keylen/2, (keylen/2) * 8, &ctx->tweak_key);
+ ret |= aes_p8_set_encrypt_key(key, (keylen/2) * 8, &ctx->enc_key);
+ ret |= aes_p8_set_decrypt_key(key, (keylen/2) * 8, &ctx->dec_key);
+ disable_kernel_vsx();
+ pagefault_enable();
+ preempt_enable();
+
+ ret |= crypto_skcipher_setkey(ctx->fallback, key, keylen);
+
+ return ret ? -EINVAL : 0;
+}
+
+static int p8_aes_xts_crypt(struct skcipher_request *req, int enc)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ const struct p8_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
+ u8 tweak[AES_BLOCK_SIZE];
+ int ret;
+
+ if (req->cryptlen < AES_BLOCK_SIZE)
+ return -EINVAL;
+
+ if (!crypto_simd_usable() || (req->cryptlen % XTS_BLOCK_SIZE) != 0) {
+ struct skcipher_request *subreq = skcipher_request_ctx(req);
+
+ *subreq = *req;
+ skcipher_request_set_tfm(subreq, ctx->fallback);
+ return enc ? crypto_skcipher_encrypt(subreq) :
+ crypto_skcipher_decrypt(subreq);
+ }
+
+ ret = skcipher_walk_virt(&walk, req, false);
+ if (ret)
+ return ret;
+
+ preempt_disable();
+ pagefault_disable();
+ enable_kernel_vsx();
+
+ aes_p8_encrypt(walk.iv, tweak, &ctx->tweak_key);
+
+ disable_kernel_vsx();
+ pagefault_enable();
+ preempt_enable();
+
+ while ((nbytes = walk.nbytes) != 0) {
+ preempt_disable();
+ pagefault_disable();
+ enable_kernel_vsx();
+ if (enc)
+ aes_p8_xts_encrypt(walk.src.virt.addr,
+ walk.dst.virt.addr,
+ round_down(nbytes, AES_BLOCK_SIZE),
+ &ctx->enc_key, NULL, tweak);
+ else
+ aes_p8_xts_decrypt(walk.src.virt.addr,
+ walk.dst.virt.addr,
+ round_down(nbytes, AES_BLOCK_SIZE),
+ &ctx->dec_key, NULL, tweak);
+ disable_kernel_vsx();
+ pagefault_enable();
+ preempt_enable();
+
+ ret = skcipher_walk_done(&walk, nbytes % AES_BLOCK_SIZE);
+ }
+ return ret;
+}
+
+static int p8_aes_xts_encrypt(struct skcipher_request *req)
+{
+ return p8_aes_xts_crypt(req, 1);
+}
+
+static int p8_aes_xts_decrypt(struct skcipher_request *req)
+{
+ return p8_aes_xts_crypt(req, 0);
+}
+
+struct skcipher_alg p8_aes_xts_alg = {
+ .base.cra_name = "xts(aes)",
+ .base.cra_driver_name = "p8_aes_xts",
+ .base.cra_module = THIS_MODULE,
+ .base.cra_priority = 2000,
+ .base.cra_flags = CRYPTO_ALG_NEED_FALLBACK,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct p8_aes_xts_ctx),
+ .setkey = p8_aes_xts_setkey,
+ .encrypt = p8_aes_xts_encrypt,
+ .decrypt = p8_aes_xts_decrypt,
+ .init = p8_aes_xts_init,
+ .exit = p8_aes_xts_exit,
+ .min_keysize = 2 * AES_MIN_KEY_SIZE,
+ .max_keysize = 2 * AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+};
diff --git a/arch/powerpc/crypto/aesp10-ppc.pl b/arch/powerpc/crypto/aesp10-ppc.pl
new file mode 100644
index 000000000000..2c06ce2a2c7c
--- /dev/null
+++ b/arch/powerpc/crypto/aesp10-ppc.pl
@@ -0,0 +1,585 @@
+#! /usr/bin/env perl
+# SPDX-License-Identifier: GPL-2.0
+
+# This code is taken from CRYPTOGAMs[1] and is included here using the option
+# in the license to distribute the code under the GPL. Therefore this program
+# is free software; you can redistribute it and/or modify it under the terms of
+# the GNU General Public License version 2 as published by the Free Software
+# Foundation.
+#
+# [1] https://www.openssl.org/~appro/cryptogams/
+
+# Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain copyright notices,
+# this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials
+# provided with the distribution.
+#
+# * Neither the name of the CRYPTOGAMS nor the names of its
+# copyright holder and contributors may be used to endorse or
+# promote products derived from this software without specific
+# prior written permission.
+#
+# ALTERNATIVELY, provided that this notice is retained in full, this
+# product may be distributed under the terms of the GNU General Public
+# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
+# those given above.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see https://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# This module implements support for AES instructions as per PowerISA
+# specification version 2.07, first implemented by POWER8 processor.
+# The module is endian-agnostic in sense that it supports both big-
+# and little-endian cases. Data alignment in parallelizable modes is
+# handled with VSX loads and stores, which implies MSR.VSX flag being
+# set. It should also be noted that ISA specification doesn't prohibit
+# alignment exceptions for these instructions on page boundaries.
+# Initially alignment was handled in pure AltiVec/VMX way [when data
+# is aligned programmatically, which in turn guarantees exception-
+# free execution], but it turned to hamper performance when vcipher
+# instructions are interleaved. It's reckoned that eventual
+# misalignment penalties at page boundaries are in average lower
+# than additional overhead in pure AltiVec approach.
+#
+# May 2016
+#
+# Add XTS subroutine, 9x on little- and 12x improvement on big-endian
+# systems were measured.
+#
+######################################################################
+# Current large-block performance in cycles per byte processed with
+# 128-bit key (less is better).
+#
+# CBC en-/decrypt CTR XTS
+# POWER8[le] 3.96/0.72 0.74 1.1
+# POWER8[be] 3.75/0.65 0.66 1.0
+
+$flavour = shift;
+
+if ($flavour =~ /64/) {
+ $SIZE_T =8;
+ $LRSAVE =2*$SIZE_T;
+ $STU ="stdu";
+ $POP ="ld";
+ $PUSH ="std";
+ $UCMP ="cmpld";
+ $SHL ="sldi";
+} elsif ($flavour =~ /32/) {
+ $SIZE_T =4;
+ $LRSAVE =$SIZE_T;
+ $STU ="stwu";
+ $POP ="lwz";
+ $PUSH ="stw";
+ $UCMP ="cmplw";
+ $SHL ="slwi";
+} else { die "nonsense $flavour"; }
+
+$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
+die "can't locate ppc-xlate.pl";
+
+open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
+
+$FRAME=8*$SIZE_T;
+$prefix="aes_p10";
+
+$sp="r1";
+$vrsave="r12";
+
+#########################################################################
+{{{ # Key setup procedures #
+my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
+my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
+my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
+
+$code.=<<___;
+.machine "any"
+
+.text
+
+.align 7
+rcon:
+.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
+.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
+.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
+.long 0,0,0,0 ?asis
+Lconsts:
+ mflr r0
+ bcl 20,31,\$+4
+ mflr $ptr #vvvvv "distance between . and rcon
+ addi $ptr,$ptr,-0x48
+ mtlr r0
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,0,0
+.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
+
+.globl .${prefix}_set_encrypt_key
+Lset_encrypt_key:
+ mflr r11
+ $PUSH r11,$LRSAVE($sp)
+
+ li $ptr,-1
+ ${UCMP}i $inp,0
+ beq- Lenc_key_abort # if ($inp==0) return -1;
+ ${UCMP}i $out,0
+ beq- Lenc_key_abort # if ($out==0) return -1;
+ li $ptr,-2
+ cmpwi $bits,128
+ blt- Lenc_key_abort
+ cmpwi $bits,256
+ bgt- Lenc_key_abort
+ andi. r0,$bits,0x3f
+ bne- Lenc_key_abort
+
+ lis r0,0xfff0
+ mfspr $vrsave,256
+ mtspr 256,r0
+
+ bl Lconsts
+ mtlr r11
+
+ neg r9,$inp
+ lvx $in0,0,$inp
+ addi $inp,$inp,15 # 15 is not typo
+ lvsr $key,0,r9 # borrow $key
+ li r8,0x20
+ cmpwi $bits,192
+ lvx $in1,0,$inp
+ le?vspltisb $mask,0x0f # borrow $mask
+ lvx $rcon,0,$ptr
+ le?vxor $key,$key,$mask # adjust for byte swap
+ lvx $mask,r8,$ptr
+ addi $ptr,$ptr,0x10
+ vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
+ li $cnt,8
+ vxor $zero,$zero,$zero
+ mtctr $cnt
+
+ ?lvsr $outperm,0,$out
+ vspltisb $outmask,-1
+ lvx $outhead,0,$out
+ ?vperm $outmask,$zero,$outmask,$outperm
+
+ blt Loop128
+ addi $inp,$inp,8
+ beq L192
+ addi $inp,$inp,8
+ b L256
+
+.align 4
+Loop128:
+ vperm $key,$in0,$in0,$mask # rotate-n-splat
+ vsldoi $tmp,$zero,$in0,12 # >>32
+ vperm $outtail,$in0,$in0,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ vcipherlast $key,$key,$rcon
+ stvx $stage,0,$out
+ addi $out,$out,16
+
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ vadduwm $rcon,$rcon,$rcon
+ vxor $in0,$in0,$key
+ bdnz Loop128
+
+ lvx $rcon,0,$ptr # last two round keys
+
+ vperm $key,$in0,$in0,$mask # rotate-n-splat
+ vsldoi $tmp,$zero,$in0,12 # >>32
+ vperm $outtail,$in0,$in0,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ vcipherlast $key,$key,$rcon
+ stvx $stage,0,$out
+ addi $out,$out,16
+
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ vadduwm $rcon,$rcon,$rcon
+ vxor $in0,$in0,$key
+
+ vperm $key,$in0,$in0,$mask # rotate-n-splat
+ vsldoi $tmp,$zero,$in0,12 # >>32
+ vperm $outtail,$in0,$in0,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ vcipherlast $key,$key,$rcon
+ stvx $stage,0,$out
+ addi $out,$out,16
+
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ vxor $in0,$in0,$key
+ vperm $outtail,$in0,$in0,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ stvx $stage,0,$out
+
+ addi $inp,$out,15 # 15 is not typo
+ addi $out,$out,0x50
+
+ li $rounds,10
+ b Ldone
+
+.align 4
+L192:
+ lvx $tmp,0,$inp
+ li $cnt,4
+ vperm $outtail,$in0,$in0,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ stvx $stage,0,$out
+ addi $out,$out,16
+ vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
+ vspltisb $key,8 # borrow $key
+ mtctr $cnt
+ vsububm $mask,$mask,$key # adjust the mask
+
+Loop192:
+ vperm $key,$in1,$in1,$mask # roate-n-splat
+ vsldoi $tmp,$zero,$in0,12 # >>32
+ vcipherlast $key,$key,$rcon
+
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+
+ vsldoi $stage,$zero,$in1,8
+ vspltw $tmp,$in0,3
+ vxor $tmp,$tmp,$in1
+ vsldoi $in1,$zero,$in1,12 # >>32
+ vadduwm $rcon,$rcon,$rcon
+ vxor $in1,$in1,$tmp
+ vxor $in0,$in0,$key
+ vxor $in1,$in1,$key
+ vsldoi $stage,$stage,$in0,8
+
+ vperm $key,$in1,$in1,$mask # rotate-n-splat
+ vsldoi $tmp,$zero,$in0,12 # >>32
+ vperm $outtail,$stage,$stage,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ vcipherlast $key,$key,$rcon
+ stvx $stage,0,$out
+ addi $out,$out,16
+
+ vsldoi $stage,$in0,$in1,8
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vperm $outtail,$stage,$stage,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ stvx $stage,0,$out
+ addi $out,$out,16
+
+ vspltw $tmp,$in0,3
+ vxor $tmp,$tmp,$in1
+ vsldoi $in1,$zero,$in1,12 # >>32
+ vadduwm $rcon,$rcon,$rcon
+ vxor $in1,$in1,$tmp
+ vxor $in0,$in0,$key
+ vxor $in1,$in1,$key
+ vperm $outtail,$in0,$in0,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ stvx $stage,0,$out
+ addi $inp,$out,15 # 15 is not typo
+ addi $out,$out,16
+ bdnz Loop192
+
+ li $rounds,12
+ addi $out,$out,0x20
+ b Ldone
+
+.align 4
+L256:
+ lvx $tmp,0,$inp
+ li $cnt,7
+ li $rounds,14
+ vperm $outtail,$in0,$in0,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ stvx $stage,0,$out
+ addi $out,$out,16
+ vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
+ mtctr $cnt
+
+Loop256:
+ vperm $key,$in1,$in1,$mask # rotate-n-splat
+ vsldoi $tmp,$zero,$in0,12 # >>32
+ vperm $outtail,$in1,$in1,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ vcipherlast $key,$key,$rcon
+ stvx $stage,0,$out
+ addi $out,$out,16
+
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ vadduwm $rcon,$rcon,$rcon
+ vxor $in0,$in0,$key
+ vperm $outtail,$in0,$in0,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ stvx $stage,0,$out
+ addi $inp,$out,15 # 15 is not typo
+ addi $out,$out,16
+ bdz Ldone
+
+ vspltw $key,$in0,3 # just splat
+ vsldoi $tmp,$zero,$in1,12 # >>32
+ vsbox $key,$key
+
+ vxor $in1,$in1,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in1,$in1,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in1,$in1,$tmp
+
+ vxor $in1,$in1,$key
+ b Loop256
+
+.align 4
+Ldone:
+ lvx $in1,0,$inp # redundant in aligned case
+ vsel $in1,$outhead,$in1,$outmask
+ stvx $in1,0,$inp
+ li $ptr,0
+ mtspr 256,$vrsave
+ stw $rounds,0($out)
+
+Lenc_key_abort:
+ mr r3,$ptr
+ blr
+ .long 0
+ .byte 0,12,0x14,1,0,0,3,0
+ .long 0
+.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
+
+.globl .${prefix}_set_decrypt_key
+ $STU $sp,-$FRAME($sp)
+ mflr r10
+ $PUSH r10,$FRAME+$LRSAVE($sp)
+ bl Lset_encrypt_key
+ mtlr r10
+
+ cmpwi r3,0
+ bne- Ldec_key_abort
+
+ slwi $cnt,$rounds,4
+ subi $inp,$out,240 # first round key
+ srwi $rounds,$rounds,1
+ add $out,$inp,$cnt # last round key
+ mtctr $rounds
+
+Ldeckey:
+ lwz r0, 0($inp)
+ lwz r6, 4($inp)
+ lwz r7, 8($inp)
+ lwz r8, 12($inp)
+ addi $inp,$inp,16
+ lwz r9, 0($out)
+ lwz r10,4($out)
+ lwz r11,8($out)
+ lwz r12,12($out)
+ stw r0, 0($out)
+ stw r6, 4($out)
+ stw r7, 8($out)
+ stw r8, 12($out)
+ subi $out,$out,16
+ stw r9, -16($inp)
+ stw r10,-12($inp)
+ stw r11,-8($inp)
+ stw r12,-4($inp)
+ bdnz Ldeckey
+
+ xor r3,r3,r3 # return value
+Ldec_key_abort:
+ addi $sp,$sp,$FRAME
+ blr
+ .long 0
+ .byte 0,12,4,1,0x80,0,3,0
+ .long 0
+.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
+___
+}}}
+#########################################################################
+{{{ # Single block en- and decrypt procedures #
+sub gen_block () {
+my $dir = shift;
+my $n = $dir eq "de" ? "n" : "";
+my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
+
+$code.=<<___;
+.globl .${prefix}_${dir}crypt
+ lwz $rounds,240($key)
+ lis r0,0xfc00
+ mfspr $vrsave,256
+ li $idx,15 # 15 is not typo
+ mtspr 256,r0
+
+ lvx v0,0,$inp
+ neg r11,$out
+ lvx v1,$idx,$inp
+ lvsl v2,0,$inp # inpperm
+ le?vspltisb v4,0x0f
+ ?lvsl v3,0,r11 # outperm
+ le?vxor v2,v2,v4
+ li $idx,16
+ vperm v0,v0,v1,v2 # align [and byte swap in LE]
+ lvx v1,0,$key
+ ?lvsl v5,0,$key # keyperm
+ srwi $rounds,$rounds,1
+ lvx v2,$idx,$key
+ addi $idx,$idx,16
+ subi $rounds,$rounds,1
+ ?vperm v1,v1,v2,v5 # align round key
+
+ vxor v0,v0,v1
+ lvx v1,$idx,$key
+ addi $idx,$idx,16
+ mtctr $rounds
+
+Loop_${dir}c:
+ ?vperm v2,v2,v1,v5
+ v${n}cipher v0,v0,v2
+ lvx v2,$idx,$key
+ addi $idx,$idx,16
+ ?vperm v1,v1,v2,v5
+ v${n}cipher v0,v0,v1
+ lvx v1,$idx,$key
+ addi $idx,$idx,16
+ bdnz Loop_${dir}c
+
+ ?vperm v2,v2,v1,v5
+ v${n}cipher v0,v0,v2
+ lvx v2,$idx,$key
+ ?vperm v1,v1,v2,v5
+ v${n}cipherlast v0,v0,v1
+
+ vspltisb v2,-1
+ vxor v1,v1,v1
+ li $idx,15 # 15 is not typo
+ ?vperm v2,v1,v2,v3 # outmask
+ le?vxor v3,v3,v4
+ lvx v1,0,$out # outhead
+ vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
+ vsel v1,v1,v0,v2
+ lvx v4,$idx,$out
+ stvx v1,0,$out
+ vsel v0,v0,v4,v2
+ stvx v0,$idx,$out
+
+ mtspr 256,$vrsave
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,3,0
+ .long 0
+.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
+___
+}
+&gen_block("en");
+&gen_block("de");
+}}}
+
+my $consts=1;
+foreach(split("\n",$code)) {
+ s/\`([^\`]*)\`/eval($1)/geo;
+
+ # constants table endian-specific conversion
+ if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
+ my $conv=$3;
+ my @bytes=();
+
+ # convert to endian-agnostic format
+ if ($1 eq "long") {
+ foreach (split(/,\s*/,$2)) {
+ my $l = /^0/?oct:int;
+ push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
+ }
+ } else {
+ @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
+ }
+
+ # little-endian conversion
+ if ($flavour =~ /le$/o) {
+ SWITCH: for($conv) {
+ /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
+ /\?rev/ && do { @bytes=reverse(@bytes); last; };
+ }
+ }
+
+ #emit
+ print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
+ next;
+ }
+ $consts=0 if (m/Lconsts:/o); # end of table
+
+ # instructions prefixed with '?' are endian-specific and need
+ # to be adjusted accordingly...
+ if ($flavour =~ /le$/o) { # little-endian
+ s/le\?//o or
+ s/be\?/#be#/o or
+ s/\?lvsr/lvsl/o or
+ s/\?lvsl/lvsr/o or
+ s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
+ s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
+ s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
+ } else { # big-endian
+ s/le\?/#le#/o or
+ s/be\?//o or
+ s/\?([a-z]+)/$1/o;
+ }
+
+ print $_,"\n";
+}
+
+close STDOUT;
diff --git a/arch/powerpc/crypto/aesp8-ppc.h b/arch/powerpc/crypto/aesp8-ppc.h
new file mode 100644
index 000000000000..5764d4438388
--- /dev/null
+++ b/arch/powerpc/crypto/aesp8-ppc.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/types.h>
+#include <crypto/aes.h>
+
+struct aes_key {
+ u8 key[AES_MAX_KEYLENGTH];
+ int rounds;
+};
+
+extern struct shash_alg p8_ghash_alg;
+extern struct crypto_alg p8_aes_alg;
+extern struct skcipher_alg p8_aes_cbc_alg;
+extern struct skcipher_alg p8_aes_ctr_alg;
+extern struct skcipher_alg p8_aes_xts_alg;
+
+int aes_p8_set_encrypt_key(const u8 *userKey, const int bits,
+ struct aes_key *key);
+int aes_p8_set_decrypt_key(const u8 *userKey, const int bits,
+ struct aes_key *key);
+void aes_p8_encrypt(const u8 *in, u8 *out, const struct aes_key *key);
+void aes_p8_decrypt(const u8 *in, u8 *out, const struct aes_key *key);
+void aes_p8_cbc_encrypt(const u8 *in, u8 *out, size_t len,
+ const struct aes_key *key, u8 *iv, const int enc);
+void aes_p8_ctr32_encrypt_blocks(const u8 *in, u8 *out,
+ size_t len, const struct aes_key *key,
+ const u8 *iv);
+void aes_p8_xts_encrypt(const u8 *in, u8 *out, size_t len,
+ const struct aes_key *key1, const struct aes_key *key2, u8 *iv);
+void aes_p8_xts_decrypt(const u8 *in, u8 *out, size_t len,
+ const struct aes_key *key1, const struct aes_key *key2, u8 *iv);
diff --git a/arch/powerpc/crypto/aesp8-ppc.pl b/arch/powerpc/crypto/aesp8-ppc.pl
new file mode 100644
index 000000000000..f729589d792e
--- /dev/null
+++ b/arch/powerpc/crypto/aesp8-ppc.pl
@@ -0,0 +1,3889 @@
+#! /usr/bin/env perl
+# SPDX-License-Identifier: GPL-2.0
+
+# This code is taken from CRYPTOGAMs[1] and is included here using the option
+# in the license to distribute the code under the GPL. Therefore this program
+# is free software; you can redistribute it and/or modify it under the terms of
+# the GNU General Public License version 2 as published by the Free Software
+# Foundation.
+#
+# [1] https://www.openssl.org/~appro/cryptogams/
+
+# Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org>
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain copyright notices,
+# this list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials
+# provided with the distribution.
+#
+# * Neither the name of the CRYPTOGAMS nor the names of its
+# copyright holder and contributors may be used to endorse or
+# promote products derived from this software without specific
+# prior written permission.
+#
+# ALTERNATIVELY, provided that this notice is retained in full, this
+# product may be distributed under the terms of the GNU General Public
+# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
+# those given above.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see https://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# This module implements support for AES instructions as per PowerISA
+# specification version 2.07, first implemented by POWER8 processor.
+# The module is endian-agnostic in sense that it supports both big-
+# and little-endian cases. Data alignment in parallelizable modes is
+# handled with VSX loads and stores, which implies MSR.VSX flag being
+# set. It should also be noted that ISA specification doesn't prohibit
+# alignment exceptions for these instructions on page boundaries.
+# Initially alignment was handled in pure AltiVec/VMX way [when data
+# is aligned programmatically, which in turn guarantees exception-
+# free execution], but it turned to hamper performance when vcipher
+# instructions are interleaved. It's reckoned that eventual
+# misalignment penalties at page boundaries are in average lower
+# than additional overhead in pure AltiVec approach.
+#
+# May 2016
+#
+# Add XTS subroutine, 9x on little- and 12x improvement on big-endian
+# systems were measured.
+#
+######################################################################
+# Current large-block performance in cycles per byte processed with
+# 128-bit key (less is better).
+#
+# CBC en-/decrypt CTR XTS
+# POWER8[le] 3.96/0.72 0.74 1.1
+# POWER8[be] 3.75/0.65 0.66 1.0
+
+$flavour = shift;
+
+if ($flavour =~ /64/) {
+ $SIZE_T =8;
+ $LRSAVE =2*$SIZE_T;
+ $STU ="stdu";
+ $POP ="ld";
+ $PUSH ="std";
+ $UCMP ="cmpld";
+ $SHL ="sldi";
+} elsif ($flavour =~ /32/) {
+ $SIZE_T =4;
+ $LRSAVE =$SIZE_T;
+ $STU ="stwu";
+ $POP ="lwz";
+ $PUSH ="stw";
+ $UCMP ="cmplw";
+ $SHL ="slwi";
+} else { die "nonsense $flavour"; }
+
+$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
+die "can't locate ppc-xlate.pl";
+
+open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
+
+$FRAME=8*$SIZE_T;
+$prefix="aes_p8";
+
+$sp="r1";
+$vrsave="r12";
+
+#########################################################################
+{{{ # Key setup procedures #
+my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
+my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
+my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
+
+$code.=<<___;
+.machine "any"
+
+.text
+
+.align 7
+rcon:
+.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
+.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
+.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
+.long 0,0,0,0 ?asis
+.long 0x0f102132, 0x43546576, 0x8798a9ba, 0xcbdcedfe
+Lconsts:
+ mflr r0
+ bcl 20,31,\$+4
+ mflr $ptr #vvvvv "distance between . and rcon
+ addi $ptr,$ptr,-0x58
+ mtlr r0
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,0,0
+.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
+
+.globl .${prefix}_set_encrypt_key
+Lset_encrypt_key:
+ mflr r11
+ $PUSH r11,$LRSAVE($sp)
+
+ li $ptr,-1
+ ${UCMP}i $inp,0
+ beq- Lenc_key_abort # if ($inp==0) return -1;
+ ${UCMP}i $out,0
+ beq- Lenc_key_abort # if ($out==0) return -1;
+ li $ptr,-2
+ cmpwi $bits,128
+ blt- Lenc_key_abort
+ cmpwi $bits,256
+ bgt- Lenc_key_abort
+ andi. r0,$bits,0x3f
+ bne- Lenc_key_abort
+
+ lis r0,0xfff0
+ mfspr $vrsave,256
+ mtspr 256,r0
+
+ bl Lconsts
+ mtlr r11
+
+ neg r9,$inp
+ lvx $in0,0,$inp
+ addi $inp,$inp,15 # 15 is not typo
+ lvsr $key,0,r9 # borrow $key
+ li r8,0x20
+ cmpwi $bits,192
+ lvx $in1,0,$inp
+ le?vspltisb $mask,0x0f # borrow $mask
+ lvx $rcon,0,$ptr
+ le?vxor $key,$key,$mask # adjust for byte swap
+ lvx $mask,r8,$ptr
+ addi $ptr,$ptr,0x10
+ vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
+ li $cnt,8
+ vxor $zero,$zero,$zero
+ mtctr $cnt
+
+ ?lvsr $outperm,0,$out
+ vspltisb $outmask,-1
+ lvx $outhead,0,$out
+ ?vperm $outmask,$zero,$outmask,$outperm
+
+ blt Loop128
+ addi $inp,$inp,8
+ beq L192
+ addi $inp,$inp,8
+ b L256
+
+.align 4
+Loop128:
+ vperm $key,$in0,$in0,$mask # rotate-n-splat
+ vsldoi $tmp,$zero,$in0,12 # >>32
+ vperm $outtail,$in0,$in0,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ vcipherlast $key,$key,$rcon
+ stvx $stage,0,$out
+ addi $out,$out,16
+
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ vadduwm $rcon,$rcon,$rcon
+ vxor $in0,$in0,$key
+ bdnz Loop128
+
+ lvx $rcon,0,$ptr # last two round keys
+
+ vperm $key,$in0,$in0,$mask # rotate-n-splat
+ vsldoi $tmp,$zero,$in0,12 # >>32
+ vperm $outtail,$in0,$in0,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ vcipherlast $key,$key,$rcon
+ stvx $stage,0,$out
+ addi $out,$out,16
+
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ vadduwm $rcon,$rcon,$rcon
+ vxor $in0,$in0,$key
+
+ vperm $key,$in0,$in0,$mask # rotate-n-splat
+ vsldoi $tmp,$zero,$in0,12 # >>32
+ vperm $outtail,$in0,$in0,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ vcipherlast $key,$key,$rcon
+ stvx $stage,0,$out
+ addi $out,$out,16
+
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ vxor $in0,$in0,$key
+ vperm $outtail,$in0,$in0,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ stvx $stage,0,$out
+
+ addi $inp,$out,15 # 15 is not typo
+ addi $out,$out,0x50
+
+ li $rounds,10
+ b Ldone
+
+.align 4
+L192:
+ lvx $tmp,0,$inp
+ li $cnt,4
+ vperm $outtail,$in0,$in0,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ stvx $stage,0,$out
+ addi $out,$out,16
+ vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
+ vspltisb $key,8 # borrow $key
+ mtctr $cnt
+ vsububm $mask,$mask,$key # adjust the mask
+
+Loop192:
+ vperm $key,$in1,$in1,$mask # roate-n-splat
+ vsldoi $tmp,$zero,$in0,12 # >>32
+ vcipherlast $key,$key,$rcon
+
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+
+ vsldoi $stage,$zero,$in1,8
+ vspltw $tmp,$in0,3
+ vxor $tmp,$tmp,$in1
+ vsldoi $in1,$zero,$in1,12 # >>32
+ vadduwm $rcon,$rcon,$rcon
+ vxor $in1,$in1,$tmp
+ vxor $in0,$in0,$key
+ vxor $in1,$in1,$key
+ vsldoi $stage,$stage,$in0,8
+
+ vperm $key,$in1,$in1,$mask # rotate-n-splat
+ vsldoi $tmp,$zero,$in0,12 # >>32
+ vperm $outtail,$stage,$stage,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ vcipherlast $key,$key,$rcon
+ stvx $stage,0,$out
+ addi $out,$out,16
+
+ vsldoi $stage,$in0,$in1,8
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vperm $outtail,$stage,$stage,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ stvx $stage,0,$out
+ addi $out,$out,16
+
+ vspltw $tmp,$in0,3
+ vxor $tmp,$tmp,$in1
+ vsldoi $in1,$zero,$in1,12 # >>32
+ vadduwm $rcon,$rcon,$rcon
+ vxor $in1,$in1,$tmp
+ vxor $in0,$in0,$key
+ vxor $in1,$in1,$key
+ vperm $outtail,$in0,$in0,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ stvx $stage,0,$out
+ addi $inp,$out,15 # 15 is not typo
+ addi $out,$out,16
+ bdnz Loop192
+
+ li $rounds,12
+ addi $out,$out,0x20
+ b Ldone
+
+.align 4
+L256:
+ lvx $tmp,0,$inp
+ li $cnt,7
+ li $rounds,14
+ vperm $outtail,$in0,$in0,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ stvx $stage,0,$out
+ addi $out,$out,16
+ vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
+ mtctr $cnt
+
+Loop256:
+ vperm $key,$in1,$in1,$mask # rotate-n-splat
+ vsldoi $tmp,$zero,$in0,12 # >>32
+ vperm $outtail,$in1,$in1,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ vcipherlast $key,$key,$rcon
+ stvx $stage,0,$out
+ addi $out,$out,16
+
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in0,$in0,$tmp
+ vadduwm $rcon,$rcon,$rcon
+ vxor $in0,$in0,$key
+ vperm $outtail,$in0,$in0,$outperm # rotate
+ vsel $stage,$outhead,$outtail,$outmask
+ vmr $outhead,$outtail
+ stvx $stage,0,$out
+ addi $inp,$out,15 # 15 is not typo
+ addi $out,$out,16
+ bdz Ldone
+
+ vspltw $key,$in0,3 # just splat
+ vsldoi $tmp,$zero,$in1,12 # >>32
+ vsbox $key,$key
+
+ vxor $in1,$in1,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in1,$in1,$tmp
+ vsldoi $tmp,$zero,$tmp,12 # >>32
+ vxor $in1,$in1,$tmp
+
+ vxor $in1,$in1,$key
+ b Loop256
+
+.align 4
+Ldone:
+ lvx $in1,0,$inp # redundant in aligned case
+ vsel $in1,$outhead,$in1,$outmask
+ stvx $in1,0,$inp
+ li $ptr,0
+ mtspr 256,$vrsave
+ stw $rounds,0($out)
+
+Lenc_key_abort:
+ mr r3,$ptr
+ blr
+ .long 0
+ .byte 0,12,0x14,1,0,0,3,0
+ .long 0
+.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
+
+.globl .${prefix}_set_decrypt_key
+ $STU $sp,-$FRAME($sp)
+ mflr r10
+ $PUSH r10,$FRAME+$LRSAVE($sp)
+ bl Lset_encrypt_key
+ mtlr r10
+
+ cmpwi r3,0
+ bne- Ldec_key_abort
+
+ slwi $cnt,$rounds,4
+ subi $inp,$out,240 # first round key
+ srwi $rounds,$rounds,1
+ add $out,$inp,$cnt # last round key
+ mtctr $rounds
+
+Ldeckey:
+ lwz r0, 0($inp)
+ lwz r6, 4($inp)
+ lwz r7, 8($inp)
+ lwz r8, 12($inp)
+ addi $inp,$inp,16
+ lwz r9, 0($out)
+ lwz r10,4($out)
+ lwz r11,8($out)
+ lwz r12,12($out)
+ stw r0, 0($out)
+ stw r6, 4($out)
+ stw r7, 8($out)
+ stw r8, 12($out)
+ subi $out,$out,16
+ stw r9, -16($inp)
+ stw r10,-12($inp)
+ stw r11,-8($inp)
+ stw r12,-4($inp)
+ bdnz Ldeckey
+
+ xor r3,r3,r3 # return value
+Ldec_key_abort:
+ addi $sp,$sp,$FRAME
+ blr
+ .long 0
+ .byte 0,12,4,1,0x80,0,3,0
+ .long 0
+.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
+___
+}}}
+#########################################################################
+{{{ # Single block en- and decrypt procedures #
+sub gen_block () {
+my $dir = shift;
+my $n = $dir eq "de" ? "n" : "";
+my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
+
+$code.=<<___;
+.globl .${prefix}_${dir}crypt
+ lwz $rounds,240($key)
+ lis r0,0xfc00
+ mfspr $vrsave,256
+ li $idx,15 # 15 is not typo
+ mtspr 256,r0
+
+ lvx v0,0,$inp
+ neg r11,$out
+ lvx v1,$idx,$inp
+ lvsl v2,0,$inp # inpperm
+ le?vspltisb v4,0x0f
+ ?lvsl v3,0,r11 # outperm
+ le?vxor v2,v2,v4
+ li $idx,16
+ vperm v0,v0,v1,v2 # align [and byte swap in LE]
+ lvx v1,0,$key
+ ?lvsl v5,0,$key # keyperm
+ srwi $rounds,$rounds,1
+ lvx v2,$idx,$key
+ addi $idx,$idx,16
+ subi $rounds,$rounds,1
+ ?vperm v1,v1,v2,v5 # align round key
+
+ vxor v0,v0,v1
+ lvx v1,$idx,$key
+ addi $idx,$idx,16
+ mtctr $rounds
+
+Loop_${dir}c:
+ ?vperm v2,v2,v1,v5
+ v${n}cipher v0,v0,v2
+ lvx v2,$idx,$key
+ addi $idx,$idx,16
+ ?vperm v1,v1,v2,v5
+ v${n}cipher v0,v0,v1
+ lvx v1,$idx,$key
+ addi $idx,$idx,16
+ bdnz Loop_${dir}c
+
+ ?vperm v2,v2,v1,v5
+ v${n}cipher v0,v0,v2
+ lvx v2,$idx,$key
+ ?vperm v1,v1,v2,v5
+ v${n}cipherlast v0,v0,v1
+
+ vspltisb v2,-1
+ vxor v1,v1,v1
+ li $idx,15 # 15 is not typo
+ ?vperm v2,v1,v2,v3 # outmask
+ le?vxor v3,v3,v4
+ lvx v1,0,$out # outhead
+ vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
+ vsel v1,v1,v0,v2
+ lvx v4,$idx,$out
+ stvx v1,0,$out
+ vsel v0,v0,v4,v2
+ stvx v0,$idx,$out
+
+ mtspr 256,$vrsave
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,3,0
+ .long 0
+.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
+___
+}
+&gen_block("en");
+&gen_block("de");
+}}}
+#########################################################################
+{{{ # CBC en- and decrypt procedures #
+my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
+my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
+my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
+ map("v$_",(4..10));
+$code.=<<___;
+.globl .${prefix}_cbc_encrypt
+ ${UCMP}i $len,16
+ bltlr-
+
+ cmpwi $enc,0 # test direction
+ lis r0,0xffe0
+ mfspr $vrsave,256
+ mtspr 256,r0
+
+ li $idx,15
+ vxor $rndkey0,$rndkey0,$rndkey0
+ le?vspltisb $tmp,0x0f
+
+ lvx $ivec,0,$ivp # load [unaligned] iv
+ lvsl $inpperm,0,$ivp
+ lvx $inptail,$idx,$ivp
+ le?vxor $inpperm,$inpperm,$tmp
+ vperm $ivec,$ivec,$inptail,$inpperm
+
+ neg r11,$inp
+ ?lvsl $keyperm,0,$key # prepare for unaligned key
+ lwz $rounds,240($key)
+
+ lvsr $inpperm,0,r11 # prepare for unaligned load
+ lvx $inptail,0,$inp
+ addi $inp,$inp,15 # 15 is not typo
+ le?vxor $inpperm,$inpperm,$tmp
+
+ ?lvsr $outperm,0,$out # prepare for unaligned store
+ vspltisb $outmask,-1
+ lvx $outhead,0,$out
+ ?vperm $outmask,$rndkey0,$outmask,$outperm
+ le?vxor $outperm,$outperm,$tmp
+
+ srwi $rounds,$rounds,1
+ li $idx,16
+ subi $rounds,$rounds,1
+ beq Lcbc_dec
+
+Lcbc_enc:
+ vmr $inout,$inptail
+ lvx $inptail,0,$inp
+ addi $inp,$inp,16
+ mtctr $rounds
+ subi $len,$len,16 # len-=16
+
+ lvx $rndkey0,0,$key
+ vperm $inout,$inout,$inptail,$inpperm
+ lvx $rndkey1,$idx,$key
+ addi $idx,$idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vxor $inout,$inout,$rndkey0
+ lvx $rndkey0,$idx,$key
+ addi $idx,$idx,16
+ vxor $inout,$inout,$ivec
+
+Loop_cbc_enc:
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vcipher $inout,$inout,$rndkey1
+ lvx $rndkey1,$idx,$key
+ addi $idx,$idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vcipher $inout,$inout,$rndkey0
+ lvx $rndkey0,$idx,$key
+ addi $idx,$idx,16
+ bdnz Loop_cbc_enc
+
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vcipher $inout,$inout,$rndkey1
+ lvx $rndkey1,$idx,$key
+ li $idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vcipherlast $ivec,$inout,$rndkey0
+ ${UCMP}i $len,16
+
+ vperm $tmp,$ivec,$ivec,$outperm
+ vsel $inout,$outhead,$tmp,$outmask
+ vmr $outhead,$tmp
+ stvx $inout,0,$out
+ addi $out,$out,16
+ bge Lcbc_enc
+
+ b Lcbc_done
+
+.align 4
+Lcbc_dec:
+ ${UCMP}i $len,128
+ bge _aesp8_cbc_decrypt8x
+ vmr $tmp,$inptail
+ lvx $inptail,0,$inp
+ addi $inp,$inp,16
+ mtctr $rounds
+ subi $len,$len,16 # len-=16
+
+ lvx $rndkey0,0,$key
+ vperm $tmp,$tmp,$inptail,$inpperm
+ lvx $rndkey1,$idx,$key
+ addi $idx,$idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vxor $inout,$tmp,$rndkey0
+ lvx $rndkey0,$idx,$key
+ addi $idx,$idx,16
+
+Loop_cbc_dec:
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vncipher $inout,$inout,$rndkey1
+ lvx $rndkey1,$idx,$key
+ addi $idx,$idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vncipher $inout,$inout,$rndkey0
+ lvx $rndkey0,$idx,$key
+ addi $idx,$idx,16
+ bdnz Loop_cbc_dec
+
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vncipher $inout,$inout,$rndkey1
+ lvx $rndkey1,$idx,$key
+ li $idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vncipherlast $inout,$inout,$rndkey0
+ ${UCMP}i $len,16
+
+ vxor $inout,$inout,$ivec
+ vmr $ivec,$tmp
+ vperm $tmp,$inout,$inout,$outperm
+ vsel $inout,$outhead,$tmp,$outmask
+ vmr $outhead,$tmp
+ stvx $inout,0,$out
+ addi $out,$out,16
+ bge Lcbc_dec
+
+Lcbc_done:
+ addi $out,$out,-1
+ lvx $inout,0,$out # redundant in aligned case
+ vsel $inout,$outhead,$inout,$outmask
+ stvx $inout,0,$out
+
+ neg $enc,$ivp # write [unaligned] iv
+ li $idx,15 # 15 is not typo
+ vxor $rndkey0,$rndkey0,$rndkey0
+ vspltisb $outmask,-1
+ le?vspltisb $tmp,0x0f
+ ?lvsl $outperm,0,$enc
+ ?vperm $outmask,$rndkey0,$outmask,$outperm
+ le?vxor $outperm,$outperm,$tmp
+ lvx $outhead,0,$ivp
+ vperm $ivec,$ivec,$ivec,$outperm
+ vsel $inout,$outhead,$ivec,$outmask
+ lvx $inptail,$idx,$ivp
+ stvx $inout,0,$ivp
+ vsel $inout,$ivec,$inptail,$outmask
+ stvx $inout,$idx,$ivp
+
+ mtspr 256,$vrsave
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,6,0
+ .long 0
+___
+#########################################################################
+{{ # Optimized CBC decrypt procedure #
+my $key_="r11";
+my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
+my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
+my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
+my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
+ # v26-v31 last 6 round keys
+my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
+
+$code.=<<___;
+.align 5
+_aesp8_cbc_decrypt8x:
+ $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
+ li r10,`$FRAME+8*16+15`
+ li r11,`$FRAME+8*16+31`
+ stvx v20,r10,$sp # ABI says so
+ addi r10,r10,32
+ stvx v21,r11,$sp
+ addi r11,r11,32
+ stvx v22,r10,$sp
+ addi r10,r10,32
+ stvx v23,r11,$sp
+ addi r11,r11,32
+ stvx v24,r10,$sp
+ addi r10,r10,32
+ stvx v25,r11,$sp
+ addi r11,r11,32
+ stvx v26,r10,$sp
+ addi r10,r10,32
+ stvx v27,r11,$sp
+ addi r11,r11,32
+ stvx v28,r10,$sp
+ addi r10,r10,32
+ stvx v29,r11,$sp
+ addi r11,r11,32
+ stvx v30,r10,$sp
+ stvx v31,r11,$sp
+ li r0,-1
+ stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
+ li $x10,0x10
+ $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+ li $x20,0x20
+ $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+ li $x30,0x30
+ $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+ li $x40,0x40
+ $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+ li $x50,0x50
+ $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+ li $x60,0x60
+ $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+ li $x70,0x70
+ mtspr 256,r0
+
+ subi $rounds,$rounds,3 # -4 in total
+ subi $len,$len,128 # bias
+
+ lvx $rndkey0,$x00,$key # load key schedule
+ lvx v30,$x10,$key
+ addi $key,$key,0x20
+ lvx v31,$x00,$key
+ ?vperm $rndkey0,$rndkey0,v30,$keyperm
+ addi $key_,$sp,$FRAME+15
+ mtctr $rounds
+
+Load_cbc_dec_key:
+ ?vperm v24,v30,v31,$keyperm
+ lvx v30,$x10,$key
+ addi $key,$key,0x20
+ stvx v24,$x00,$key_ # off-load round[1]
+ ?vperm v25,v31,v30,$keyperm
+ lvx v31,$x00,$key
+ stvx v25,$x10,$key_ # off-load round[2]
+ addi $key_,$key_,0x20
+ bdnz Load_cbc_dec_key
+
+ lvx v26,$x10,$key
+ ?vperm v24,v30,v31,$keyperm
+ lvx v27,$x20,$key
+ stvx v24,$x00,$key_ # off-load round[3]
+ ?vperm v25,v31,v26,$keyperm
+ lvx v28,$x30,$key
+ stvx v25,$x10,$key_ # off-load round[4]
+ addi $key_,$sp,$FRAME+15 # rewind $key_
+ ?vperm v26,v26,v27,$keyperm
+ lvx v29,$x40,$key
+ ?vperm v27,v27,v28,$keyperm
+ lvx v30,$x50,$key
+ ?vperm v28,v28,v29,$keyperm
+ lvx v31,$x60,$key
+ ?vperm v29,v29,v30,$keyperm
+ lvx $out0,$x70,$key # borrow $out0
+ ?vperm v30,v30,v31,$keyperm
+ lvx v24,$x00,$key_ # pre-load round[1]
+ ?vperm v31,v31,$out0,$keyperm
+ lvx v25,$x10,$key_ # pre-load round[2]
+
+ #lvx $inptail,0,$inp # "caller" already did this
+ #addi $inp,$inp,15 # 15 is not typo
+ subi $inp,$inp,15 # undo "caller"
+
+ le?li $idx,8
+ lvx_u $in0,$x00,$inp # load first 8 "words"
+ le?lvsl $inpperm,0,$idx
+ le?vspltisb $tmp,0x0f
+ lvx_u $in1,$x10,$inp
+ le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
+ lvx_u $in2,$x20,$inp
+ le?vperm $in0,$in0,$in0,$inpperm
+ lvx_u $in3,$x30,$inp
+ le?vperm $in1,$in1,$in1,$inpperm
+ lvx_u $in4,$x40,$inp
+ le?vperm $in2,$in2,$in2,$inpperm
+ vxor $out0,$in0,$rndkey0
+ lvx_u $in5,$x50,$inp
+ le?vperm $in3,$in3,$in3,$inpperm
+ vxor $out1,$in1,$rndkey0
+ lvx_u $in6,$x60,$inp
+ le?vperm $in4,$in4,$in4,$inpperm
+ vxor $out2,$in2,$rndkey0
+ lvx_u $in7,$x70,$inp
+ addi $inp,$inp,0x80
+ le?vperm $in5,$in5,$in5,$inpperm
+ vxor $out3,$in3,$rndkey0
+ le?vperm $in6,$in6,$in6,$inpperm
+ vxor $out4,$in4,$rndkey0
+ le?vperm $in7,$in7,$in7,$inpperm
+ vxor $out5,$in5,$rndkey0
+ vxor $out6,$in6,$rndkey0
+ vxor $out7,$in7,$rndkey0
+
+ mtctr $rounds
+ b Loop_cbc_dec8x
+.align 5
+Loop_cbc_dec8x:
+ vncipher $out0,$out0,v24
+ vncipher $out1,$out1,v24
+ vncipher $out2,$out2,v24
+ vncipher $out3,$out3,v24
+ vncipher $out4,$out4,v24
+ vncipher $out5,$out5,v24
+ vncipher $out6,$out6,v24
+ vncipher $out7,$out7,v24
+ lvx v24,$x20,$key_ # round[3]
+ addi $key_,$key_,0x20
+
+ vncipher $out0,$out0,v25
+ vncipher $out1,$out1,v25
+ vncipher $out2,$out2,v25
+ vncipher $out3,$out3,v25
+ vncipher $out4,$out4,v25
+ vncipher $out5,$out5,v25
+ vncipher $out6,$out6,v25
+ vncipher $out7,$out7,v25
+ lvx v25,$x10,$key_ # round[4]
+ bdnz Loop_cbc_dec8x
+
+ subic $len,$len,128 # $len-=128
+ vncipher $out0,$out0,v24
+ vncipher $out1,$out1,v24
+ vncipher $out2,$out2,v24
+ vncipher $out3,$out3,v24
+ vncipher $out4,$out4,v24
+ vncipher $out5,$out5,v24
+ vncipher $out6,$out6,v24
+ vncipher $out7,$out7,v24
+
+ subfe. r0,r0,r0 # borrow?-1:0
+ vncipher $out0,$out0,v25
+ vncipher $out1,$out1,v25
+ vncipher $out2,$out2,v25
+ vncipher $out3,$out3,v25
+ vncipher $out4,$out4,v25
+ vncipher $out5,$out5,v25
+ vncipher $out6,$out6,v25
+ vncipher $out7,$out7,v25
+
+ and r0,r0,$len
+ vncipher $out0,$out0,v26
+ vncipher $out1,$out1,v26
+ vncipher $out2,$out2,v26
+ vncipher $out3,$out3,v26
+ vncipher $out4,$out4,v26
+ vncipher $out5,$out5,v26
+ vncipher $out6,$out6,v26
+ vncipher $out7,$out7,v26
+
+ add $inp,$inp,r0 # $inp is adjusted in such
+ # way that at exit from the
+ # loop inX-in7 are loaded
+ # with last "words"
+ vncipher $out0,$out0,v27
+ vncipher $out1,$out1,v27
+ vncipher $out2,$out2,v27
+ vncipher $out3,$out3,v27
+ vncipher $out4,$out4,v27
+ vncipher $out5,$out5,v27
+ vncipher $out6,$out6,v27
+ vncipher $out7,$out7,v27
+
+ addi $key_,$sp,$FRAME+15 # rewind $key_
+ vncipher $out0,$out0,v28
+ vncipher $out1,$out1,v28
+ vncipher $out2,$out2,v28
+ vncipher $out3,$out3,v28
+ vncipher $out4,$out4,v28
+ vncipher $out5,$out5,v28
+ vncipher $out6,$out6,v28
+ vncipher $out7,$out7,v28
+ lvx v24,$x00,$key_ # re-pre-load round[1]
+
+ vncipher $out0,$out0,v29
+ vncipher $out1,$out1,v29
+ vncipher $out2,$out2,v29
+ vncipher $out3,$out3,v29
+ vncipher $out4,$out4,v29
+ vncipher $out5,$out5,v29
+ vncipher $out6,$out6,v29
+ vncipher $out7,$out7,v29
+ lvx v25,$x10,$key_ # re-pre-load round[2]
+
+ vncipher $out0,$out0,v30
+ vxor $ivec,$ivec,v31 # xor with last round key
+ vncipher $out1,$out1,v30
+ vxor $in0,$in0,v31
+ vncipher $out2,$out2,v30
+ vxor $in1,$in1,v31
+ vncipher $out3,$out3,v30
+ vxor $in2,$in2,v31
+ vncipher $out4,$out4,v30
+ vxor $in3,$in3,v31
+ vncipher $out5,$out5,v30
+ vxor $in4,$in4,v31
+ vncipher $out6,$out6,v30
+ vxor $in5,$in5,v31
+ vncipher $out7,$out7,v30
+ vxor $in6,$in6,v31
+
+ vncipherlast $out0,$out0,$ivec
+ vncipherlast $out1,$out1,$in0
+ lvx_u $in0,$x00,$inp # load next input block
+ vncipherlast $out2,$out2,$in1
+ lvx_u $in1,$x10,$inp
+ vncipherlast $out3,$out3,$in2
+ le?vperm $in0,$in0,$in0,$inpperm
+ lvx_u $in2,$x20,$inp
+ vncipherlast $out4,$out4,$in3
+ le?vperm $in1,$in1,$in1,$inpperm
+ lvx_u $in3,$x30,$inp
+ vncipherlast $out5,$out5,$in4
+ le?vperm $in2,$in2,$in2,$inpperm
+ lvx_u $in4,$x40,$inp
+ vncipherlast $out6,$out6,$in5
+ le?vperm $in3,$in3,$in3,$inpperm
+ lvx_u $in5,$x50,$inp
+ vncipherlast $out7,$out7,$in6
+ le?vperm $in4,$in4,$in4,$inpperm
+ lvx_u $in6,$x60,$inp
+ vmr $ivec,$in7
+ le?vperm $in5,$in5,$in5,$inpperm
+ lvx_u $in7,$x70,$inp
+ addi $inp,$inp,0x80
+
+ le?vperm $out0,$out0,$out0,$inpperm
+ le?vperm $out1,$out1,$out1,$inpperm
+ stvx_u $out0,$x00,$out
+ le?vperm $in6,$in6,$in6,$inpperm
+ vxor $out0,$in0,$rndkey0
+ le?vperm $out2,$out2,$out2,$inpperm
+ stvx_u $out1,$x10,$out
+ le?vperm $in7,$in7,$in7,$inpperm
+ vxor $out1,$in1,$rndkey0
+ le?vperm $out3,$out3,$out3,$inpperm
+ stvx_u $out2,$x20,$out
+ vxor $out2,$in2,$rndkey0
+ le?vperm $out4,$out4,$out4,$inpperm
+ stvx_u $out3,$x30,$out
+ vxor $out3,$in3,$rndkey0
+ le?vperm $out5,$out5,$out5,$inpperm
+ stvx_u $out4,$x40,$out
+ vxor $out4,$in4,$rndkey0
+ le?vperm $out6,$out6,$out6,$inpperm
+ stvx_u $out5,$x50,$out
+ vxor $out5,$in5,$rndkey0
+ le?vperm $out7,$out7,$out7,$inpperm
+ stvx_u $out6,$x60,$out
+ vxor $out6,$in6,$rndkey0
+ stvx_u $out7,$x70,$out
+ addi $out,$out,0x80
+ vxor $out7,$in7,$rndkey0
+
+ mtctr $rounds
+ beq Loop_cbc_dec8x # did $len-=128 borrow?
+
+ addic. $len,$len,128
+ beq Lcbc_dec8x_done
+ nop
+ nop
+
+Loop_cbc_dec8x_tail: # up to 7 "words" tail...
+ vncipher $out1,$out1,v24
+ vncipher $out2,$out2,v24
+ vncipher $out3,$out3,v24
+ vncipher $out4,$out4,v24
+ vncipher $out5,$out5,v24
+ vncipher $out6,$out6,v24
+ vncipher $out7,$out7,v24
+ lvx v24,$x20,$key_ # round[3]
+ addi $key_,$key_,0x20
+
+ vncipher $out1,$out1,v25
+ vncipher $out2,$out2,v25
+ vncipher $out3,$out3,v25
+ vncipher $out4,$out4,v25
+ vncipher $out5,$out5,v25
+ vncipher $out6,$out6,v25
+ vncipher $out7,$out7,v25
+ lvx v25,$x10,$key_ # round[4]
+ bdnz Loop_cbc_dec8x_tail
+
+ vncipher $out1,$out1,v24
+ vncipher $out2,$out2,v24
+ vncipher $out3,$out3,v24
+ vncipher $out4,$out4,v24
+ vncipher $out5,$out5,v24
+ vncipher $out6,$out6,v24
+ vncipher $out7,$out7,v24
+
+ vncipher $out1,$out1,v25
+ vncipher $out2,$out2,v25
+ vncipher $out3,$out3,v25
+ vncipher $out4,$out4,v25
+ vncipher $out5,$out5,v25
+ vncipher $out6,$out6,v25
+ vncipher $out7,$out7,v25
+
+ vncipher $out1,$out1,v26
+ vncipher $out2,$out2,v26
+ vncipher $out3,$out3,v26
+ vncipher $out4,$out4,v26
+ vncipher $out5,$out5,v26
+ vncipher $out6,$out6,v26
+ vncipher $out7,$out7,v26
+
+ vncipher $out1,$out1,v27
+ vncipher $out2,$out2,v27
+ vncipher $out3,$out3,v27
+ vncipher $out4,$out4,v27
+ vncipher $out5,$out5,v27
+ vncipher $out6,$out6,v27
+ vncipher $out7,$out7,v27
+
+ vncipher $out1,$out1,v28
+ vncipher $out2,$out2,v28
+ vncipher $out3,$out3,v28
+ vncipher $out4,$out4,v28
+ vncipher $out5,$out5,v28
+ vncipher $out6,$out6,v28
+ vncipher $out7,$out7,v28
+
+ vncipher $out1,$out1,v29
+ vncipher $out2,$out2,v29
+ vncipher $out3,$out3,v29
+ vncipher $out4,$out4,v29
+ vncipher $out5,$out5,v29
+ vncipher $out6,$out6,v29
+ vncipher $out7,$out7,v29
+
+ vncipher $out1,$out1,v30
+ vxor $ivec,$ivec,v31 # last round key
+ vncipher $out2,$out2,v30
+ vxor $in1,$in1,v31
+ vncipher $out3,$out3,v30
+ vxor $in2,$in2,v31
+ vncipher $out4,$out4,v30
+ vxor $in3,$in3,v31
+ vncipher $out5,$out5,v30
+ vxor $in4,$in4,v31
+ vncipher $out6,$out6,v30
+ vxor $in5,$in5,v31
+ vncipher $out7,$out7,v30
+ vxor $in6,$in6,v31
+
+ cmplwi $len,32 # switch($len)
+ blt Lcbc_dec8x_one
+ nop
+ beq Lcbc_dec8x_two
+ cmplwi $len,64
+ blt Lcbc_dec8x_three
+ nop
+ beq Lcbc_dec8x_four
+ cmplwi $len,96
+ blt Lcbc_dec8x_five
+ nop
+ beq Lcbc_dec8x_six
+
+Lcbc_dec8x_seven:
+ vncipherlast $out1,$out1,$ivec
+ vncipherlast $out2,$out2,$in1
+ vncipherlast $out3,$out3,$in2
+ vncipherlast $out4,$out4,$in3
+ vncipherlast $out5,$out5,$in4
+ vncipherlast $out6,$out6,$in5
+ vncipherlast $out7,$out7,$in6
+ vmr $ivec,$in7
+
+ le?vperm $out1,$out1,$out1,$inpperm
+ le?vperm $out2,$out2,$out2,$inpperm
+ stvx_u $out1,$x00,$out
+ le?vperm $out3,$out3,$out3,$inpperm
+ stvx_u $out2,$x10,$out
+ le?vperm $out4,$out4,$out4,$inpperm
+ stvx_u $out3,$x20,$out
+ le?vperm $out5,$out5,$out5,$inpperm
+ stvx_u $out4,$x30,$out
+ le?vperm $out6,$out6,$out6,$inpperm
+ stvx_u $out5,$x40,$out
+ le?vperm $out7,$out7,$out7,$inpperm
+ stvx_u $out6,$x50,$out
+ stvx_u $out7,$x60,$out
+ addi $out,$out,0x70
+ b Lcbc_dec8x_done
+
+.align 5
+Lcbc_dec8x_six:
+ vncipherlast $out2,$out2,$ivec
+ vncipherlast $out3,$out3,$in2
+ vncipherlast $out4,$out4,$in3
+ vncipherlast $out5,$out5,$in4
+ vncipherlast $out6,$out6,$in5
+ vncipherlast $out7,$out7,$in6
+ vmr $ivec,$in7
+
+ le?vperm $out2,$out2,$out2,$inpperm
+ le?vperm $out3,$out3,$out3,$inpperm
+ stvx_u $out2,$x00,$out
+ le?vperm $out4,$out4,$out4,$inpperm
+ stvx_u $out3,$x10,$out
+ le?vperm $out5,$out5,$out5,$inpperm
+ stvx_u $out4,$x20,$out
+ le?vperm $out6,$out6,$out6,$inpperm
+ stvx_u $out5,$x30,$out
+ le?vperm $out7,$out7,$out7,$inpperm
+ stvx_u $out6,$x40,$out
+ stvx_u $out7,$x50,$out
+ addi $out,$out,0x60
+ b Lcbc_dec8x_done
+
+.align 5
+Lcbc_dec8x_five:
+ vncipherlast $out3,$out3,$ivec
+ vncipherlast $out4,$out4,$in3
+ vncipherlast $out5,$out5,$in4
+ vncipherlast $out6,$out6,$in5
+ vncipherlast $out7,$out7,$in6
+ vmr $ivec,$in7
+
+ le?vperm $out3,$out3,$out3,$inpperm
+ le?vperm $out4,$out4,$out4,$inpperm
+ stvx_u $out3,$x00,$out
+ le?vperm $out5,$out5,$out5,$inpperm
+ stvx_u $out4,$x10,$out
+ le?vperm $out6,$out6,$out6,$inpperm
+ stvx_u $out5,$x20,$out
+ le?vperm $out7,$out7,$out7,$inpperm
+ stvx_u $out6,$x30,$out
+ stvx_u $out7,$x40,$out
+ addi $out,$out,0x50
+ b Lcbc_dec8x_done
+
+.align 5
+Lcbc_dec8x_four:
+ vncipherlast $out4,$out4,$ivec
+ vncipherlast $out5,$out5,$in4
+ vncipherlast $out6,$out6,$in5
+ vncipherlast $out7,$out7,$in6
+ vmr $ivec,$in7
+
+ le?vperm $out4,$out4,$out4,$inpperm
+ le?vperm $out5,$out5,$out5,$inpperm
+ stvx_u $out4,$x00,$out
+ le?vperm $out6,$out6,$out6,$inpperm
+ stvx_u $out5,$x10,$out
+ le?vperm $out7,$out7,$out7,$inpperm
+ stvx_u $out6,$x20,$out
+ stvx_u $out7,$x30,$out
+ addi $out,$out,0x40
+ b Lcbc_dec8x_done
+
+.align 5
+Lcbc_dec8x_three:
+ vncipherlast $out5,$out5,$ivec
+ vncipherlast $out6,$out6,$in5
+ vncipherlast $out7,$out7,$in6
+ vmr $ivec,$in7
+
+ le?vperm $out5,$out5,$out5,$inpperm
+ le?vperm $out6,$out6,$out6,$inpperm
+ stvx_u $out5,$x00,$out
+ le?vperm $out7,$out7,$out7,$inpperm
+ stvx_u $out6,$x10,$out
+ stvx_u $out7,$x20,$out
+ addi $out,$out,0x30
+ b Lcbc_dec8x_done
+
+.align 5
+Lcbc_dec8x_two:
+ vncipherlast $out6,$out6,$ivec
+ vncipherlast $out7,$out7,$in6
+ vmr $ivec,$in7
+
+ le?vperm $out6,$out6,$out6,$inpperm
+ le?vperm $out7,$out7,$out7,$inpperm
+ stvx_u $out6,$x00,$out
+ stvx_u $out7,$x10,$out
+ addi $out,$out,0x20
+ b Lcbc_dec8x_done
+
+.align 5
+Lcbc_dec8x_one:
+ vncipherlast $out7,$out7,$ivec
+ vmr $ivec,$in7
+
+ le?vperm $out7,$out7,$out7,$inpperm
+ stvx_u $out7,0,$out
+ addi $out,$out,0x10
+
+Lcbc_dec8x_done:
+ le?vperm $ivec,$ivec,$ivec,$inpperm
+ stvx_u $ivec,0,$ivp # write [unaligned] iv
+
+ li r10,`$FRAME+15`
+ li r11,`$FRAME+31`
+ stvx $inpperm,r10,$sp # wipe copies of round keys
+ addi r10,r10,32
+ stvx $inpperm,r11,$sp
+ addi r11,r11,32
+ stvx $inpperm,r10,$sp
+ addi r10,r10,32
+ stvx $inpperm,r11,$sp
+ addi r11,r11,32
+ stvx $inpperm,r10,$sp
+ addi r10,r10,32
+ stvx $inpperm,r11,$sp
+ addi r11,r11,32
+ stvx $inpperm,r10,$sp
+ addi r10,r10,32
+ stvx $inpperm,r11,$sp
+ addi r11,r11,32
+
+ mtspr 256,$vrsave
+ lvx v20,r10,$sp # ABI says so
+ addi r10,r10,32
+ lvx v21,r11,$sp
+ addi r11,r11,32
+ lvx v22,r10,$sp
+ addi r10,r10,32
+ lvx v23,r11,$sp
+ addi r11,r11,32
+ lvx v24,r10,$sp
+ addi r10,r10,32
+ lvx v25,r11,$sp
+ addi r11,r11,32
+ lvx v26,r10,$sp
+ addi r10,r10,32
+ lvx v27,r11,$sp
+ addi r11,r11,32
+ lvx v28,r10,$sp
+ addi r10,r10,32
+ lvx v29,r11,$sp
+ addi r11,r11,32
+ lvx v30,r10,$sp
+ lvx v31,r11,$sp
+ $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+ $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+ $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+ $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+ $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+ $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+ addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0x80,6,6,0
+ .long 0
+.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
+___
+}} }}}
+
+#########################################################################
+{{{ # CTR procedure[s] #
+
+####################### WARNING: Here be dragons! #######################
+#
+# This code is written as 'ctr32', based on a 32-bit counter used
+# upstream. The kernel does *not* use a 32-bit counter. The kernel uses
+# a 128-bit counter.
+#
+# This leads to subtle changes from the upstream code: the counter
+# is incremented with vaddu_q_m rather than vaddu_w_m. This occurs in
+# both the bulk (8 blocks at a time) path, and in the individual block
+# path. Be aware of this when doing updates.
+#
+# See:
+# 1d4aa0b4c181 ("crypto: vmx - Fixing AES-CTR counter bug")
+# 009b30ac7444 ("crypto: vmx - CTR: always increment IV as quadword")
+# https://github.com/openssl/openssl/pull/8942
+#
+#########################################################################
+my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
+my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
+my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
+ map("v$_",(4..11));
+my $dat=$tmp;
+
+$code.=<<___;
+.globl .${prefix}_ctr32_encrypt_blocks
+ ${UCMP}i $len,1
+ bltlr-
+
+ lis r0,0xfff0
+ mfspr $vrsave,256
+ mtspr 256,r0
+
+ li $idx,15
+ vxor $rndkey0,$rndkey0,$rndkey0
+ le?vspltisb $tmp,0x0f
+
+ lvx $ivec,0,$ivp # load [unaligned] iv
+ lvsl $inpperm,0,$ivp
+ lvx $inptail,$idx,$ivp
+ vspltisb $one,1
+ le?vxor $inpperm,$inpperm,$tmp
+ vperm $ivec,$ivec,$inptail,$inpperm
+ vsldoi $one,$rndkey0,$one,1
+
+ neg r11,$inp
+ ?lvsl $keyperm,0,$key # prepare for unaligned key
+ lwz $rounds,240($key)
+
+ lvsr $inpperm,0,r11 # prepare for unaligned load
+ lvx $inptail,0,$inp
+ addi $inp,$inp,15 # 15 is not typo
+ le?vxor $inpperm,$inpperm,$tmp
+
+ srwi $rounds,$rounds,1
+ li $idx,16
+ subi $rounds,$rounds,1
+
+ ${UCMP}i $len,8
+ bge _aesp8_ctr32_encrypt8x
+
+ ?lvsr $outperm,0,$out # prepare for unaligned store
+ vspltisb $outmask,-1
+ lvx $outhead,0,$out
+ ?vperm $outmask,$rndkey0,$outmask,$outperm
+ le?vxor $outperm,$outperm,$tmp
+
+ lvx $rndkey0,0,$key
+ mtctr $rounds
+ lvx $rndkey1,$idx,$key
+ addi $idx,$idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vxor $inout,$ivec,$rndkey0
+ lvx $rndkey0,$idx,$key
+ addi $idx,$idx,16
+ b Loop_ctr32_enc
+
+.align 5
+Loop_ctr32_enc:
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vcipher $inout,$inout,$rndkey1
+ lvx $rndkey1,$idx,$key
+ addi $idx,$idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vcipher $inout,$inout,$rndkey0
+ lvx $rndkey0,$idx,$key
+ addi $idx,$idx,16
+ bdnz Loop_ctr32_enc
+
+ vadduqm $ivec,$ivec,$one # Kernel change for 128-bit
+ vmr $dat,$inptail
+ lvx $inptail,0,$inp
+ addi $inp,$inp,16
+ subic. $len,$len,1 # blocks--
+
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vcipher $inout,$inout,$rndkey1
+ lvx $rndkey1,$idx,$key
+ vperm $dat,$dat,$inptail,$inpperm
+ li $idx,16
+ ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
+ lvx $rndkey0,0,$key
+ vxor $dat,$dat,$rndkey1 # last round key
+ vcipherlast $inout,$inout,$dat
+
+ lvx $rndkey1,$idx,$key
+ addi $idx,$idx,16
+ vperm $inout,$inout,$inout,$outperm
+ vsel $dat,$outhead,$inout,$outmask
+ mtctr $rounds
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vmr $outhead,$inout
+ vxor $inout,$ivec,$rndkey0
+ lvx $rndkey0,$idx,$key
+ addi $idx,$idx,16
+ stvx $dat,0,$out
+ addi $out,$out,16
+ bne Loop_ctr32_enc
+
+ addi $out,$out,-1
+ lvx $inout,0,$out # redundant in aligned case
+ vsel $inout,$outhead,$inout,$outmask
+ stvx $inout,0,$out
+
+ mtspr 256,$vrsave
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,6,0
+ .long 0
+___
+#########################################################################
+{{ # Optimized CTR procedure #
+my $key_="r11";
+my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
+my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
+my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
+my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
+ # v26-v31 last 6 round keys
+my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
+my ($two,$three,$four)=($outhead,$outperm,$outmask);
+
+$code.=<<___;
+.align 5
+_aesp8_ctr32_encrypt8x:
+ $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
+ li r10,`$FRAME+8*16+15`
+ li r11,`$FRAME+8*16+31`
+ stvx v20,r10,$sp # ABI says so
+ addi r10,r10,32
+ stvx v21,r11,$sp
+ addi r11,r11,32
+ stvx v22,r10,$sp
+ addi r10,r10,32
+ stvx v23,r11,$sp
+ addi r11,r11,32
+ stvx v24,r10,$sp
+ addi r10,r10,32
+ stvx v25,r11,$sp
+ addi r11,r11,32
+ stvx v26,r10,$sp
+ addi r10,r10,32
+ stvx v27,r11,$sp
+ addi r11,r11,32
+ stvx v28,r10,$sp
+ addi r10,r10,32
+ stvx v29,r11,$sp
+ addi r11,r11,32
+ stvx v30,r10,$sp
+ stvx v31,r11,$sp
+ li r0,-1
+ stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
+ li $x10,0x10
+ $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+ li $x20,0x20
+ $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+ li $x30,0x30
+ $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+ li $x40,0x40
+ $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+ li $x50,0x50
+ $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+ li $x60,0x60
+ $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+ li $x70,0x70
+ mtspr 256,r0
+
+ subi $rounds,$rounds,3 # -4 in total
+
+ lvx $rndkey0,$x00,$key # load key schedule
+ lvx v30,$x10,$key
+ addi $key,$key,0x20
+ lvx v31,$x00,$key
+ ?vperm $rndkey0,$rndkey0,v30,$keyperm
+ addi $key_,$sp,$FRAME+15
+ mtctr $rounds
+
+Load_ctr32_enc_key:
+ ?vperm v24,v30,v31,$keyperm
+ lvx v30,$x10,$key
+ addi $key,$key,0x20
+ stvx v24,$x00,$key_ # off-load round[1]
+ ?vperm v25,v31,v30,$keyperm
+ lvx v31,$x00,$key
+ stvx v25,$x10,$key_ # off-load round[2]
+ addi $key_,$key_,0x20
+ bdnz Load_ctr32_enc_key
+
+ lvx v26,$x10,$key
+ ?vperm v24,v30,v31,$keyperm
+ lvx v27,$x20,$key
+ stvx v24,$x00,$key_ # off-load round[3]
+ ?vperm v25,v31,v26,$keyperm
+ lvx v28,$x30,$key
+ stvx v25,$x10,$key_ # off-load round[4]
+ addi $key_,$sp,$FRAME+15 # rewind $key_
+ ?vperm v26,v26,v27,$keyperm
+ lvx v29,$x40,$key
+ ?vperm v27,v27,v28,$keyperm
+ lvx v30,$x50,$key
+ ?vperm v28,v28,v29,$keyperm
+ lvx v31,$x60,$key
+ ?vperm v29,v29,v30,$keyperm
+ lvx $out0,$x70,$key # borrow $out0
+ ?vperm v30,v30,v31,$keyperm
+ lvx v24,$x00,$key_ # pre-load round[1]
+ ?vperm v31,v31,$out0,$keyperm
+ lvx v25,$x10,$key_ # pre-load round[2]
+
+ vadduqm $two,$one,$one
+ subi $inp,$inp,15 # undo "caller"
+ $SHL $len,$len,4
+
+ vadduqm $out1,$ivec,$one # counter values ...
+ vadduqm $out2,$ivec,$two # (do all ctr adds as 128-bit)
+ vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
+ le?li $idx,8
+ vadduqm $out3,$out1,$two
+ vxor $out1,$out1,$rndkey0
+ le?lvsl $inpperm,0,$idx
+ vadduqm $out4,$out2,$two
+ vxor $out2,$out2,$rndkey0
+ le?vspltisb $tmp,0x0f
+ vadduqm $out5,$out3,$two
+ vxor $out3,$out3,$rndkey0
+ le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
+ vadduqm $out6,$out4,$two
+ vxor $out4,$out4,$rndkey0
+ vadduqm $out7,$out5,$two
+ vxor $out5,$out5,$rndkey0
+ vadduqm $ivec,$out6,$two # next counter value
+ vxor $out6,$out6,$rndkey0
+ vxor $out7,$out7,$rndkey0
+
+ mtctr $rounds
+ b Loop_ctr32_enc8x
+.align 5
+Loop_ctr32_enc8x:
+ vcipher $out0,$out0,v24
+ vcipher $out1,$out1,v24
+ vcipher $out2,$out2,v24
+ vcipher $out3,$out3,v24
+ vcipher $out4,$out4,v24
+ vcipher $out5,$out5,v24
+ vcipher $out6,$out6,v24
+ vcipher $out7,$out7,v24
+Loop_ctr32_enc8x_middle:
+ lvx v24,$x20,$key_ # round[3]
+ addi $key_,$key_,0x20
+
+ vcipher $out0,$out0,v25
+ vcipher $out1,$out1,v25
+ vcipher $out2,$out2,v25
+ vcipher $out3,$out3,v25
+ vcipher $out4,$out4,v25
+ vcipher $out5,$out5,v25
+ vcipher $out6,$out6,v25
+ vcipher $out7,$out7,v25
+ lvx v25,$x10,$key_ # round[4]
+ bdnz Loop_ctr32_enc8x
+
+ subic r11,$len,256 # $len-256, borrow $key_
+ vcipher $out0,$out0,v24
+ vcipher $out1,$out1,v24
+ vcipher $out2,$out2,v24
+ vcipher $out3,$out3,v24
+ vcipher $out4,$out4,v24
+ vcipher $out5,$out5,v24
+ vcipher $out6,$out6,v24
+ vcipher $out7,$out7,v24
+
+ subfe r0,r0,r0 # borrow?-1:0
+ vcipher $out0,$out0,v25
+ vcipher $out1,$out1,v25
+ vcipher $out2,$out2,v25
+ vcipher $out3,$out3,v25
+ vcipher $out4,$out4,v25
+ vcipher $out5,$out5,v25
+ vcipher $out6,$out6,v25
+ vcipher $out7,$out7,v25
+
+ and r0,r0,r11
+ addi $key_,$sp,$FRAME+15 # rewind $key_
+ vcipher $out0,$out0,v26
+ vcipher $out1,$out1,v26
+ vcipher $out2,$out2,v26
+ vcipher $out3,$out3,v26
+ vcipher $out4,$out4,v26
+ vcipher $out5,$out5,v26
+ vcipher $out6,$out6,v26
+ vcipher $out7,$out7,v26
+ lvx v24,$x00,$key_ # re-pre-load round[1]
+
+ subic $len,$len,129 # $len-=129
+ vcipher $out0,$out0,v27
+ addi $len,$len,1 # $len-=128 really
+ vcipher $out1,$out1,v27
+ vcipher $out2,$out2,v27
+ vcipher $out3,$out3,v27
+ vcipher $out4,$out4,v27
+ vcipher $out5,$out5,v27
+ vcipher $out6,$out6,v27
+ vcipher $out7,$out7,v27
+ lvx v25,$x10,$key_ # re-pre-load round[2]
+
+ vcipher $out0,$out0,v28
+ lvx_u $in0,$x00,$inp # load input
+ vcipher $out1,$out1,v28
+ lvx_u $in1,$x10,$inp
+ vcipher $out2,$out2,v28
+ lvx_u $in2,$x20,$inp
+ vcipher $out3,$out3,v28
+ lvx_u $in3,$x30,$inp
+ vcipher $out4,$out4,v28
+ lvx_u $in4,$x40,$inp
+ vcipher $out5,$out5,v28
+ lvx_u $in5,$x50,$inp
+ vcipher $out6,$out6,v28
+ lvx_u $in6,$x60,$inp
+ vcipher $out7,$out7,v28
+ lvx_u $in7,$x70,$inp
+ addi $inp,$inp,0x80
+
+ vcipher $out0,$out0,v29
+ le?vperm $in0,$in0,$in0,$inpperm
+ vcipher $out1,$out1,v29
+ le?vperm $in1,$in1,$in1,$inpperm
+ vcipher $out2,$out2,v29
+ le?vperm $in2,$in2,$in2,$inpperm
+ vcipher $out3,$out3,v29
+ le?vperm $in3,$in3,$in3,$inpperm
+ vcipher $out4,$out4,v29
+ le?vperm $in4,$in4,$in4,$inpperm
+ vcipher $out5,$out5,v29
+ le?vperm $in5,$in5,$in5,$inpperm
+ vcipher $out6,$out6,v29
+ le?vperm $in6,$in6,$in6,$inpperm
+ vcipher $out7,$out7,v29
+ le?vperm $in7,$in7,$in7,$inpperm
+
+ add $inp,$inp,r0 # $inp is adjusted in such
+ # way that at exit from the
+ # loop inX-in7 are loaded
+ # with last "words"
+ subfe. r0,r0,r0 # borrow?-1:0
+ vcipher $out0,$out0,v30
+ vxor $in0,$in0,v31 # xor with last round key
+ vcipher $out1,$out1,v30
+ vxor $in1,$in1,v31
+ vcipher $out2,$out2,v30
+ vxor $in2,$in2,v31
+ vcipher $out3,$out3,v30
+ vxor $in3,$in3,v31
+ vcipher $out4,$out4,v30
+ vxor $in4,$in4,v31
+ vcipher $out5,$out5,v30
+ vxor $in5,$in5,v31
+ vcipher $out6,$out6,v30
+ vxor $in6,$in6,v31
+ vcipher $out7,$out7,v30
+ vxor $in7,$in7,v31
+
+ bne Lctr32_enc8x_break # did $len-129 borrow?
+
+ vcipherlast $in0,$out0,$in0
+ vcipherlast $in1,$out1,$in1
+ vadduqm $out1,$ivec,$one # counter values ...
+ vcipherlast $in2,$out2,$in2
+ vadduqm $out2,$ivec,$two
+ vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
+ vcipherlast $in3,$out3,$in3
+ vadduqm $out3,$out1,$two
+ vxor $out1,$out1,$rndkey0
+ vcipherlast $in4,$out4,$in4
+ vadduqm $out4,$out2,$two
+ vxor $out2,$out2,$rndkey0
+ vcipherlast $in5,$out5,$in5
+ vadduqm $out5,$out3,$two
+ vxor $out3,$out3,$rndkey0
+ vcipherlast $in6,$out6,$in6
+ vadduqm $out6,$out4,$two
+ vxor $out4,$out4,$rndkey0
+ vcipherlast $in7,$out7,$in7
+ vadduqm $out7,$out5,$two
+ vxor $out5,$out5,$rndkey0
+ le?vperm $in0,$in0,$in0,$inpperm
+ vadduqm $ivec,$out6,$two # next counter value
+ vxor $out6,$out6,$rndkey0
+ le?vperm $in1,$in1,$in1,$inpperm
+ vxor $out7,$out7,$rndkey0
+ mtctr $rounds
+
+ vcipher $out0,$out0,v24
+ stvx_u $in0,$x00,$out
+ le?vperm $in2,$in2,$in2,$inpperm
+ vcipher $out1,$out1,v24
+ stvx_u $in1,$x10,$out
+ le?vperm $in3,$in3,$in3,$inpperm
+ vcipher $out2,$out2,v24
+ stvx_u $in2,$x20,$out
+ le?vperm $in4,$in4,$in4,$inpperm
+ vcipher $out3,$out3,v24
+ stvx_u $in3,$x30,$out
+ le?vperm $in5,$in5,$in5,$inpperm
+ vcipher $out4,$out4,v24
+ stvx_u $in4,$x40,$out
+ le?vperm $in6,$in6,$in6,$inpperm
+ vcipher $out5,$out5,v24
+ stvx_u $in5,$x50,$out
+ le?vperm $in7,$in7,$in7,$inpperm
+ vcipher $out6,$out6,v24
+ stvx_u $in6,$x60,$out
+ vcipher $out7,$out7,v24
+ stvx_u $in7,$x70,$out
+ addi $out,$out,0x80
+
+ b Loop_ctr32_enc8x_middle
+
+.align 5
+Lctr32_enc8x_break:
+ cmpwi $len,-0x60
+ blt Lctr32_enc8x_one
+ nop
+ beq Lctr32_enc8x_two
+ cmpwi $len,-0x40
+ blt Lctr32_enc8x_three
+ nop
+ beq Lctr32_enc8x_four
+ cmpwi $len,-0x20
+ blt Lctr32_enc8x_five
+ nop
+ beq Lctr32_enc8x_six
+ cmpwi $len,0x00
+ blt Lctr32_enc8x_seven
+
+Lctr32_enc8x_eight:
+ vcipherlast $out0,$out0,$in0
+ vcipherlast $out1,$out1,$in1
+ vcipherlast $out2,$out2,$in2
+ vcipherlast $out3,$out3,$in3
+ vcipherlast $out4,$out4,$in4
+ vcipherlast $out5,$out5,$in5
+ vcipherlast $out6,$out6,$in6
+ vcipherlast $out7,$out7,$in7
+
+ le?vperm $out0,$out0,$out0,$inpperm
+ le?vperm $out1,$out1,$out1,$inpperm
+ stvx_u $out0,$x00,$out
+ le?vperm $out2,$out2,$out2,$inpperm
+ stvx_u $out1,$x10,$out
+ le?vperm $out3,$out3,$out3,$inpperm
+ stvx_u $out2,$x20,$out
+ le?vperm $out4,$out4,$out4,$inpperm
+ stvx_u $out3,$x30,$out
+ le?vperm $out5,$out5,$out5,$inpperm
+ stvx_u $out4,$x40,$out
+ le?vperm $out6,$out6,$out6,$inpperm
+ stvx_u $out5,$x50,$out
+ le?vperm $out7,$out7,$out7,$inpperm
+ stvx_u $out6,$x60,$out
+ stvx_u $out7,$x70,$out
+ addi $out,$out,0x80
+ b Lctr32_enc8x_done
+
+.align 5
+Lctr32_enc8x_seven:
+ vcipherlast $out0,$out0,$in1
+ vcipherlast $out1,$out1,$in2
+ vcipherlast $out2,$out2,$in3
+ vcipherlast $out3,$out3,$in4
+ vcipherlast $out4,$out4,$in5
+ vcipherlast $out5,$out5,$in6
+ vcipherlast $out6,$out6,$in7
+
+ le?vperm $out0,$out0,$out0,$inpperm
+ le?vperm $out1,$out1,$out1,$inpperm
+ stvx_u $out0,$x00,$out
+ le?vperm $out2,$out2,$out2,$inpperm
+ stvx_u $out1,$x10,$out
+ le?vperm $out3,$out3,$out3,$inpperm
+ stvx_u $out2,$x20,$out
+ le?vperm $out4,$out4,$out4,$inpperm
+ stvx_u $out3,$x30,$out
+ le?vperm $out5,$out5,$out5,$inpperm
+ stvx_u $out4,$x40,$out
+ le?vperm $out6,$out6,$out6,$inpperm
+ stvx_u $out5,$x50,$out
+ stvx_u $out6,$x60,$out
+ addi $out,$out,0x70
+ b Lctr32_enc8x_done
+
+.align 5
+Lctr32_enc8x_six:
+ vcipherlast $out0,$out0,$in2
+ vcipherlast $out1,$out1,$in3
+ vcipherlast $out2,$out2,$in4
+ vcipherlast $out3,$out3,$in5
+ vcipherlast $out4,$out4,$in6
+ vcipherlast $out5,$out5,$in7
+
+ le?vperm $out0,$out0,$out0,$inpperm
+ le?vperm $out1,$out1,$out1,$inpperm
+ stvx_u $out0,$x00,$out
+ le?vperm $out2,$out2,$out2,$inpperm
+ stvx_u $out1,$x10,$out
+ le?vperm $out3,$out3,$out3,$inpperm
+ stvx_u $out2,$x20,$out
+ le?vperm $out4,$out4,$out4,$inpperm
+ stvx_u $out3,$x30,$out
+ le?vperm $out5,$out5,$out5,$inpperm
+ stvx_u $out4,$x40,$out
+ stvx_u $out5,$x50,$out
+ addi $out,$out,0x60
+ b Lctr32_enc8x_done
+
+.align 5
+Lctr32_enc8x_five:
+ vcipherlast $out0,$out0,$in3
+ vcipherlast $out1,$out1,$in4
+ vcipherlast $out2,$out2,$in5
+ vcipherlast $out3,$out3,$in6
+ vcipherlast $out4,$out4,$in7
+
+ le?vperm $out0,$out0,$out0,$inpperm
+ le?vperm $out1,$out1,$out1,$inpperm
+ stvx_u $out0,$x00,$out
+ le?vperm $out2,$out2,$out2,$inpperm
+ stvx_u $out1,$x10,$out
+ le?vperm $out3,$out3,$out3,$inpperm
+ stvx_u $out2,$x20,$out
+ le?vperm $out4,$out4,$out4,$inpperm
+ stvx_u $out3,$x30,$out
+ stvx_u $out4,$x40,$out
+ addi $out,$out,0x50
+ b Lctr32_enc8x_done
+
+.align 5
+Lctr32_enc8x_four:
+ vcipherlast $out0,$out0,$in4
+ vcipherlast $out1,$out1,$in5
+ vcipherlast $out2,$out2,$in6
+ vcipherlast $out3,$out3,$in7
+
+ le?vperm $out0,$out0,$out0,$inpperm
+ le?vperm $out1,$out1,$out1,$inpperm
+ stvx_u $out0,$x00,$out
+ le?vperm $out2,$out2,$out2,$inpperm
+ stvx_u $out1,$x10,$out
+ le?vperm $out3,$out3,$out3,$inpperm
+ stvx_u $out2,$x20,$out
+ stvx_u $out3,$x30,$out
+ addi $out,$out,0x40
+ b Lctr32_enc8x_done
+
+.align 5
+Lctr32_enc8x_three:
+ vcipherlast $out0,$out0,$in5
+ vcipherlast $out1,$out1,$in6
+ vcipherlast $out2,$out2,$in7
+
+ le?vperm $out0,$out0,$out0,$inpperm
+ le?vperm $out1,$out1,$out1,$inpperm
+ stvx_u $out0,$x00,$out
+ le?vperm $out2,$out2,$out2,$inpperm
+ stvx_u $out1,$x10,$out
+ stvx_u $out2,$x20,$out
+ addi $out,$out,0x30
+ b Lctr32_enc8x_done
+
+.align 5
+Lctr32_enc8x_two:
+ vcipherlast $out0,$out0,$in6
+ vcipherlast $out1,$out1,$in7
+
+ le?vperm $out0,$out0,$out0,$inpperm
+ le?vperm $out1,$out1,$out1,$inpperm
+ stvx_u $out0,$x00,$out
+ stvx_u $out1,$x10,$out
+ addi $out,$out,0x20
+ b Lctr32_enc8x_done
+
+.align 5
+Lctr32_enc8x_one:
+ vcipherlast $out0,$out0,$in7
+
+ le?vperm $out0,$out0,$out0,$inpperm
+ stvx_u $out0,0,$out
+ addi $out,$out,0x10
+
+Lctr32_enc8x_done:
+ li r10,`$FRAME+15`
+ li r11,`$FRAME+31`
+ stvx $inpperm,r10,$sp # wipe copies of round keys
+ addi r10,r10,32
+ stvx $inpperm,r11,$sp
+ addi r11,r11,32
+ stvx $inpperm,r10,$sp
+ addi r10,r10,32
+ stvx $inpperm,r11,$sp
+ addi r11,r11,32
+ stvx $inpperm,r10,$sp
+ addi r10,r10,32
+ stvx $inpperm,r11,$sp
+ addi r11,r11,32
+ stvx $inpperm,r10,$sp
+ addi r10,r10,32
+ stvx $inpperm,r11,$sp
+ addi r11,r11,32
+
+ mtspr 256,$vrsave
+ lvx v20,r10,$sp # ABI says so
+ addi r10,r10,32
+ lvx v21,r11,$sp
+ addi r11,r11,32
+ lvx v22,r10,$sp
+ addi r10,r10,32
+ lvx v23,r11,$sp
+ addi r11,r11,32
+ lvx v24,r10,$sp
+ addi r10,r10,32
+ lvx v25,r11,$sp
+ addi r11,r11,32
+ lvx v26,r10,$sp
+ addi r10,r10,32
+ lvx v27,r11,$sp
+ addi r11,r11,32
+ lvx v28,r10,$sp
+ addi r10,r10,32
+ lvx v29,r11,$sp
+ addi r11,r11,32
+ lvx v30,r10,$sp
+ lvx v31,r11,$sp
+ $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+ $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+ $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+ $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+ $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+ $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+ addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0x80,6,6,0
+ .long 0
+.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
+___
+}} }}}
+
+#########################################################################
+{{{ # XTS procedures #
+# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, #
+# const AES_KEY *key1, const AES_KEY *key2, #
+# [const] unsigned char iv[16]); #
+# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which #
+# input tweak value is assumed to be encrypted already, and last tweak #
+# value, one suitable for consecutive call on same chunk of data, is #
+# written back to original buffer. In addition, in "tweak chaining" #
+# mode only complete input blocks are processed. #
+
+my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10));
+my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2));
+my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7));
+my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12));
+my $taillen = $key2;
+
+ ($inp,$idx) = ($idx,$inp); # reassign
+
+$code.=<<___;
+.globl .${prefix}_xts_encrypt
+ mr $inp,r3 # reassign
+ li r3,-1
+ ${UCMP}i $len,16
+ bltlr-
+
+ lis r0,0xfff0
+ mfspr r12,256 # save vrsave
+ li r11,0
+ mtspr 256,r0
+
+ vspltisb $seven,0x07 # 0x070707..07
+ le?lvsl $leperm,r11,r11
+ le?vspltisb $tmp,0x0f
+ le?vxor $leperm,$leperm,$seven
+
+ li $idx,15
+ lvx $tweak,0,$ivp # load [unaligned] iv
+ lvsl $inpperm,0,$ivp
+ lvx $inptail,$idx,$ivp
+ le?vxor $inpperm,$inpperm,$tmp
+ vperm $tweak,$tweak,$inptail,$inpperm
+
+ neg r11,$inp
+ lvsr $inpperm,0,r11 # prepare for unaligned load
+ lvx $inout,0,$inp
+ addi $inp,$inp,15 # 15 is not typo
+ le?vxor $inpperm,$inpperm,$tmp
+
+ ${UCMP}i $key2,0 # key2==NULL?
+ beq Lxts_enc_no_key2
+
+ ?lvsl $keyperm,0,$key2 # prepare for unaligned key
+ lwz $rounds,240($key2)
+ srwi $rounds,$rounds,1
+ subi $rounds,$rounds,1
+ li $idx,16
+
+ lvx $rndkey0,0,$key2
+ lvx $rndkey1,$idx,$key2
+ addi $idx,$idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vxor $tweak,$tweak,$rndkey0
+ lvx $rndkey0,$idx,$key2
+ addi $idx,$idx,16
+ mtctr $rounds
+
+Ltweak_xts_enc:
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vcipher $tweak,$tweak,$rndkey1
+ lvx $rndkey1,$idx,$key2
+ addi $idx,$idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vcipher $tweak,$tweak,$rndkey0
+ lvx $rndkey0,$idx,$key2
+ addi $idx,$idx,16
+ bdnz Ltweak_xts_enc
+
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vcipher $tweak,$tweak,$rndkey1
+ lvx $rndkey1,$idx,$key2
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vcipherlast $tweak,$tweak,$rndkey0
+
+ li $ivp,0 # don't chain the tweak
+ b Lxts_enc
+
+Lxts_enc_no_key2:
+ li $idx,-16
+ and $len,$len,$idx # in "tweak chaining"
+ # mode only complete
+ # blocks are processed
+Lxts_enc:
+ lvx $inptail,0,$inp
+ addi $inp,$inp,16
+
+ ?lvsl $keyperm,0,$key1 # prepare for unaligned key
+ lwz $rounds,240($key1)
+ srwi $rounds,$rounds,1
+ subi $rounds,$rounds,1
+ li $idx,16
+
+ vslb $eighty7,$seven,$seven # 0x808080..80
+ vor $eighty7,$eighty7,$seven # 0x878787..87
+ vspltisb $tmp,1 # 0x010101..01
+ vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
+
+ ${UCMP}i $len,96
+ bge _aesp8_xts_encrypt6x
+
+ andi. $taillen,$len,15
+ subic r0,$len,32
+ subi $taillen,$taillen,16
+ subfe r0,r0,r0
+ and r0,r0,$taillen
+ add $inp,$inp,r0
+
+ lvx $rndkey0,0,$key1
+ lvx $rndkey1,$idx,$key1
+ addi $idx,$idx,16
+ vperm $inout,$inout,$inptail,$inpperm
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vxor $inout,$inout,$tweak
+ vxor $inout,$inout,$rndkey0
+ lvx $rndkey0,$idx,$key1
+ addi $idx,$idx,16
+ mtctr $rounds
+ b Loop_xts_enc
+
+.align 5
+Loop_xts_enc:
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vcipher $inout,$inout,$rndkey1
+ lvx $rndkey1,$idx,$key1
+ addi $idx,$idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vcipher $inout,$inout,$rndkey0
+ lvx $rndkey0,$idx,$key1
+ addi $idx,$idx,16
+ bdnz Loop_xts_enc
+
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vcipher $inout,$inout,$rndkey1
+ lvx $rndkey1,$idx,$key1
+ li $idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vxor $rndkey0,$rndkey0,$tweak
+ vcipherlast $output,$inout,$rndkey0
+
+ le?vperm $tmp,$output,$output,$leperm
+ be?nop
+ le?stvx_u $tmp,0,$out
+ be?stvx_u $output,0,$out
+ addi $out,$out,16
+
+ subic. $len,$len,16
+ beq Lxts_enc_done
+
+ vmr $inout,$inptail
+ lvx $inptail,0,$inp
+ addi $inp,$inp,16
+ lvx $rndkey0,0,$key1
+ lvx $rndkey1,$idx,$key1
+ addi $idx,$idx,16
+
+ subic r0,$len,32
+ subfe r0,r0,r0
+ and r0,r0,$taillen
+ add $inp,$inp,r0
+
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ vand $tmp,$tmp,$eighty7
+ vxor $tweak,$tweak,$tmp
+
+ vperm $inout,$inout,$inptail,$inpperm
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vxor $inout,$inout,$tweak
+ vxor $output,$output,$rndkey0 # just in case $len<16
+ vxor $inout,$inout,$rndkey0
+ lvx $rndkey0,$idx,$key1
+ addi $idx,$idx,16
+
+ mtctr $rounds
+ ${UCMP}i $len,16
+ bge Loop_xts_enc
+
+ vxor $output,$output,$tweak
+ lvsr $inpperm,0,$len # $inpperm is no longer needed
+ vxor $inptail,$inptail,$inptail # $inptail is no longer needed
+ vspltisb $tmp,-1
+ vperm $inptail,$inptail,$tmp,$inpperm
+ vsel $inout,$inout,$output,$inptail
+
+ subi r11,$out,17
+ subi $out,$out,16
+ mtctr $len
+ li $len,16
+Loop_xts_enc_steal:
+ lbzu r0,1(r11)
+ stb r0,16(r11)
+ bdnz Loop_xts_enc_steal
+
+ mtctr $rounds
+ b Loop_xts_enc # one more time...
+
+Lxts_enc_done:
+ ${UCMP}i $ivp,0
+ beq Lxts_enc_ret
+
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ vand $tmp,$tmp,$eighty7
+ vxor $tweak,$tweak,$tmp
+
+ le?vperm $tweak,$tweak,$tweak,$leperm
+ stvx_u $tweak,0,$ivp
+
+Lxts_enc_ret:
+ mtspr 256,r12 # restore vrsave
+ li r3,0
+ blr
+ .long 0
+ .byte 0,12,0x04,0,0x80,6,6,0
+ .long 0
+.size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
+
+.globl .${prefix}_xts_decrypt
+ mr $inp,r3 # reassign
+ li r3,-1
+ ${UCMP}i $len,16
+ bltlr-
+
+ lis r0,0xfff8
+ mfspr r12,256 # save vrsave
+ li r11,0
+ mtspr 256,r0
+
+ andi. r0,$len,15
+ neg r0,r0
+ andi. r0,r0,16
+ sub $len,$len,r0
+
+ vspltisb $seven,0x07 # 0x070707..07
+ le?lvsl $leperm,r11,r11
+ le?vspltisb $tmp,0x0f
+ le?vxor $leperm,$leperm,$seven
+
+ li $idx,15
+ lvx $tweak,0,$ivp # load [unaligned] iv
+ lvsl $inpperm,0,$ivp
+ lvx $inptail,$idx,$ivp
+ le?vxor $inpperm,$inpperm,$tmp
+ vperm $tweak,$tweak,$inptail,$inpperm
+
+ neg r11,$inp
+ lvsr $inpperm,0,r11 # prepare for unaligned load
+ lvx $inout,0,$inp
+ addi $inp,$inp,15 # 15 is not typo
+ le?vxor $inpperm,$inpperm,$tmp
+
+ ${UCMP}i $key2,0 # key2==NULL?
+ beq Lxts_dec_no_key2
+
+ ?lvsl $keyperm,0,$key2 # prepare for unaligned key
+ lwz $rounds,240($key2)
+ srwi $rounds,$rounds,1
+ subi $rounds,$rounds,1
+ li $idx,16
+
+ lvx $rndkey0,0,$key2
+ lvx $rndkey1,$idx,$key2
+ addi $idx,$idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vxor $tweak,$tweak,$rndkey0
+ lvx $rndkey0,$idx,$key2
+ addi $idx,$idx,16
+ mtctr $rounds
+
+Ltweak_xts_dec:
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vcipher $tweak,$tweak,$rndkey1
+ lvx $rndkey1,$idx,$key2
+ addi $idx,$idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vcipher $tweak,$tweak,$rndkey0
+ lvx $rndkey0,$idx,$key2
+ addi $idx,$idx,16
+ bdnz Ltweak_xts_dec
+
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vcipher $tweak,$tweak,$rndkey1
+ lvx $rndkey1,$idx,$key2
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vcipherlast $tweak,$tweak,$rndkey0
+
+ li $ivp,0 # don't chain the tweak
+ b Lxts_dec
+
+Lxts_dec_no_key2:
+ neg $idx,$len
+ andi. $idx,$idx,15
+ add $len,$len,$idx # in "tweak chaining"
+ # mode only complete
+ # blocks are processed
+Lxts_dec:
+ lvx $inptail,0,$inp
+ addi $inp,$inp,16
+
+ ?lvsl $keyperm,0,$key1 # prepare for unaligned key
+ lwz $rounds,240($key1)
+ srwi $rounds,$rounds,1
+ subi $rounds,$rounds,1
+ li $idx,16
+
+ vslb $eighty7,$seven,$seven # 0x808080..80
+ vor $eighty7,$eighty7,$seven # 0x878787..87
+ vspltisb $tmp,1 # 0x010101..01
+ vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
+
+ ${UCMP}i $len,96
+ bge _aesp8_xts_decrypt6x
+
+ lvx $rndkey0,0,$key1
+ lvx $rndkey1,$idx,$key1
+ addi $idx,$idx,16
+ vperm $inout,$inout,$inptail,$inpperm
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vxor $inout,$inout,$tweak
+ vxor $inout,$inout,$rndkey0
+ lvx $rndkey0,$idx,$key1
+ addi $idx,$idx,16
+ mtctr $rounds
+
+ ${UCMP}i $len,16
+ blt Ltail_xts_dec
+ be?b Loop_xts_dec
+
+.align 5
+Loop_xts_dec:
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vncipher $inout,$inout,$rndkey1
+ lvx $rndkey1,$idx,$key1
+ addi $idx,$idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vncipher $inout,$inout,$rndkey0
+ lvx $rndkey0,$idx,$key1
+ addi $idx,$idx,16
+ bdnz Loop_xts_dec
+
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vncipher $inout,$inout,$rndkey1
+ lvx $rndkey1,$idx,$key1
+ li $idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vxor $rndkey0,$rndkey0,$tweak
+ vncipherlast $output,$inout,$rndkey0
+
+ le?vperm $tmp,$output,$output,$leperm
+ be?nop
+ le?stvx_u $tmp,0,$out
+ be?stvx_u $output,0,$out
+ addi $out,$out,16
+
+ subic. $len,$len,16
+ beq Lxts_dec_done
+
+ vmr $inout,$inptail
+ lvx $inptail,0,$inp
+ addi $inp,$inp,16
+ lvx $rndkey0,0,$key1
+ lvx $rndkey1,$idx,$key1
+ addi $idx,$idx,16
+
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ vand $tmp,$tmp,$eighty7
+ vxor $tweak,$tweak,$tmp
+
+ vperm $inout,$inout,$inptail,$inpperm
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vxor $inout,$inout,$tweak
+ vxor $inout,$inout,$rndkey0
+ lvx $rndkey0,$idx,$key1
+ addi $idx,$idx,16
+
+ mtctr $rounds
+ ${UCMP}i $len,16
+ bge Loop_xts_dec
+
+Ltail_xts_dec:
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak1,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ vand $tmp,$tmp,$eighty7
+ vxor $tweak1,$tweak1,$tmp
+
+ subi $inp,$inp,16
+ add $inp,$inp,$len
+
+ vxor $inout,$inout,$tweak # :-(
+ vxor $inout,$inout,$tweak1 # :-)
+
+Loop_xts_dec_short:
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vncipher $inout,$inout,$rndkey1
+ lvx $rndkey1,$idx,$key1
+ addi $idx,$idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vncipher $inout,$inout,$rndkey0
+ lvx $rndkey0,$idx,$key1
+ addi $idx,$idx,16
+ bdnz Loop_xts_dec_short
+
+ ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
+ vncipher $inout,$inout,$rndkey1
+ lvx $rndkey1,$idx,$key1
+ li $idx,16
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+ vxor $rndkey0,$rndkey0,$tweak1
+ vncipherlast $output,$inout,$rndkey0
+
+ le?vperm $tmp,$output,$output,$leperm
+ be?nop
+ le?stvx_u $tmp,0,$out
+ be?stvx_u $output,0,$out
+
+ vmr $inout,$inptail
+ lvx $inptail,0,$inp
+ #addi $inp,$inp,16
+ lvx $rndkey0,0,$key1
+ lvx $rndkey1,$idx,$key1
+ addi $idx,$idx,16
+ vperm $inout,$inout,$inptail,$inpperm
+ ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
+
+ lvsr $inpperm,0,$len # $inpperm is no longer needed
+ vxor $inptail,$inptail,$inptail # $inptail is no longer needed
+ vspltisb $tmp,-1
+ vperm $inptail,$inptail,$tmp,$inpperm
+ vsel $inout,$inout,$output,$inptail
+
+ vxor $rndkey0,$rndkey0,$tweak
+ vxor $inout,$inout,$rndkey0
+ lvx $rndkey0,$idx,$key1
+ addi $idx,$idx,16
+
+ subi r11,$out,1
+ mtctr $len
+ li $len,16
+Loop_xts_dec_steal:
+ lbzu r0,1(r11)
+ stb r0,16(r11)
+ bdnz Loop_xts_dec_steal
+
+ mtctr $rounds
+ b Loop_xts_dec # one more time...
+
+Lxts_dec_done:
+ ${UCMP}i $ivp,0
+ beq Lxts_dec_ret
+
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ vsldoi $tmp,$tmp,$tmp,15
+ vand $tmp,$tmp,$eighty7
+ vxor $tweak,$tweak,$tmp
+
+ le?vperm $tweak,$tweak,$tweak,$leperm
+ stvx_u $tweak,0,$ivp
+
+Lxts_dec_ret:
+ mtspr 256,r12 # restore vrsave
+ li r3,0
+ blr
+ .long 0
+ .byte 0,12,0x04,0,0x80,6,6,0
+ .long 0
+.size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
+___
+#########################################################################
+{{ # Optimized XTS procedures #
+my $key_=$key2;
+my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
+ $x00=0 if ($flavour =~ /osx/);
+my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5));
+my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
+my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
+my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
+ # v26-v31 last 6 round keys
+my ($keyperm)=($out0); # aliases with "caller", redundant assignment
+my $taillen=$x70;
+
+$code.=<<___;
+.align 5
+_aesp8_xts_encrypt6x:
+ $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
+ mflr r11
+ li r7,`$FRAME+8*16+15`
+ li r3,`$FRAME+8*16+31`
+ $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
+ stvx v20,r7,$sp # ABI says so
+ addi r7,r7,32
+ stvx v21,r3,$sp
+ addi r3,r3,32
+ stvx v22,r7,$sp
+ addi r7,r7,32
+ stvx v23,r3,$sp
+ addi r3,r3,32
+ stvx v24,r7,$sp
+ addi r7,r7,32
+ stvx v25,r3,$sp
+ addi r3,r3,32
+ stvx v26,r7,$sp
+ addi r7,r7,32
+ stvx v27,r3,$sp
+ addi r3,r3,32
+ stvx v28,r7,$sp
+ addi r7,r7,32
+ stvx v29,r3,$sp
+ addi r3,r3,32
+ stvx v30,r7,$sp
+ stvx v31,r3,$sp
+ li r0,-1
+ stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
+ li $x10,0x10
+ $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+ li $x20,0x20
+ $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+ li $x30,0x30
+ $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+ li $x40,0x40
+ $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+ li $x50,0x50
+ $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+ li $x60,0x60
+ $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+ li $x70,0x70
+ mtspr 256,r0
+
+ xxlor 2, 32+$eighty7, 32+$eighty7
+ vsldoi $eighty7,$tmp,$eighty7,1 # 0x010101..87
+ xxlor 1, 32+$eighty7, 32+$eighty7
+
+ # Load XOR Lconsts.
+ mr $x70, r6
+ bl Lconsts
+ lxvw4x 0, $x40, r6 # load XOR contents
+ mr r6, $x70
+ li $x70,0x70
+
+ subi $rounds,$rounds,3 # -4 in total
+
+ lvx $rndkey0,$x00,$key1 # load key schedule
+ lvx v30,$x10,$key1
+ addi $key1,$key1,0x20
+ lvx v31,$x00,$key1
+ ?vperm $rndkey0,$rndkey0,v30,$keyperm
+ addi $key_,$sp,$FRAME+15
+ mtctr $rounds
+
+Load_xts_enc_key:
+ ?vperm v24,v30,v31,$keyperm
+ lvx v30,$x10,$key1
+ addi $key1,$key1,0x20
+ stvx v24,$x00,$key_ # off-load round[1]
+ ?vperm v25,v31,v30,$keyperm
+ lvx v31,$x00,$key1
+ stvx v25,$x10,$key_ # off-load round[2]
+ addi $key_,$key_,0x20
+ bdnz Load_xts_enc_key
+
+ lvx v26,$x10,$key1
+ ?vperm v24,v30,v31,$keyperm
+ lvx v27,$x20,$key1
+ stvx v24,$x00,$key_ # off-load round[3]
+ ?vperm v25,v31,v26,$keyperm
+ lvx v28,$x30,$key1
+ stvx v25,$x10,$key_ # off-load round[4]
+ addi $key_,$sp,$FRAME+15 # rewind $key_
+ ?vperm v26,v26,v27,$keyperm
+ lvx v29,$x40,$key1
+ ?vperm v27,v27,v28,$keyperm
+ lvx v30,$x50,$key1
+ ?vperm v28,v28,v29,$keyperm
+ lvx v31,$x60,$key1
+ ?vperm v29,v29,v30,$keyperm
+ lvx $twk5,$x70,$key1 # borrow $twk5
+ ?vperm v30,v30,v31,$keyperm
+ lvx v24,$x00,$key_ # pre-load round[1]
+ ?vperm v31,v31,$twk5,$keyperm
+ lvx v25,$x10,$key_ # pre-load round[2]
+
+ # Switch to use the following codes with 0x010101..87 to generate tweak.
+ # eighty7 = 0x010101..87
+ # vsrab tmp, tweak, seven # next tweak value, right shift 7 bits
+ # vand tmp, tmp, eighty7 # last byte with carry
+ # vaddubm tweak, tweak, tweak # left shift 1 bit (x2)
+ # xxlor vsx, 0, 0
+ # vpermxor tweak, tweak, tmp, vsx
+
+ vperm $in0,$inout,$inptail,$inpperm
+ subi $inp,$inp,31 # undo "caller"
+ vxor $twk0,$tweak,$rndkey0
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ vand $tmp,$tmp,$eighty7
+ vxor $out0,$in0,$twk0
+ xxlor 32+$in1, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in1
+
+ lvx_u $in1,$x10,$inp
+ vxor $twk1,$tweak,$rndkey0
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ le?vperm $in1,$in1,$in1,$leperm
+ vand $tmp,$tmp,$eighty7
+ vxor $out1,$in1,$twk1
+ xxlor 32+$in2, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in2
+
+ lvx_u $in2,$x20,$inp
+ andi. $taillen,$len,15
+ vxor $twk2,$tweak,$rndkey0
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ le?vperm $in2,$in2,$in2,$leperm
+ vand $tmp,$tmp,$eighty7
+ vxor $out2,$in2,$twk2
+ xxlor 32+$in3, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in3
+
+ lvx_u $in3,$x30,$inp
+ sub $len,$len,$taillen
+ vxor $twk3,$tweak,$rndkey0
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ le?vperm $in3,$in3,$in3,$leperm
+ vand $tmp,$tmp,$eighty7
+ vxor $out3,$in3,$twk3
+ xxlor 32+$in4, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in4
+
+ lvx_u $in4,$x40,$inp
+ subi $len,$len,0x60
+ vxor $twk4,$tweak,$rndkey0
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ le?vperm $in4,$in4,$in4,$leperm
+ vand $tmp,$tmp,$eighty7
+ vxor $out4,$in4,$twk4
+ xxlor 32+$in5, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in5
+
+ lvx_u $in5,$x50,$inp
+ addi $inp,$inp,0x60
+ vxor $twk5,$tweak,$rndkey0
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ le?vperm $in5,$in5,$in5,$leperm
+ vand $tmp,$tmp,$eighty7
+ vxor $out5,$in5,$twk5
+ xxlor 32+$in0, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in0
+
+ vxor v31,v31,$rndkey0
+ mtctr $rounds
+ b Loop_xts_enc6x
+
+.align 5
+Loop_xts_enc6x:
+ vcipher $out0,$out0,v24
+ vcipher $out1,$out1,v24
+ vcipher $out2,$out2,v24
+ vcipher $out3,$out3,v24
+ vcipher $out4,$out4,v24
+ vcipher $out5,$out5,v24
+ lvx v24,$x20,$key_ # round[3]
+ addi $key_,$key_,0x20
+
+ vcipher $out0,$out0,v25
+ vcipher $out1,$out1,v25
+ vcipher $out2,$out2,v25
+ vcipher $out3,$out3,v25
+ vcipher $out4,$out4,v25
+ vcipher $out5,$out5,v25
+ lvx v25,$x10,$key_ # round[4]
+ bdnz Loop_xts_enc6x
+
+ xxlor 32+$eighty7, 1, 1 # 0x010101..87
+
+ subic $len,$len,96 # $len-=96
+ vxor $in0,$twk0,v31 # xor with last round key
+ vcipher $out0,$out0,v24
+ vcipher $out1,$out1,v24
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vxor $twk0,$tweak,$rndkey0
+ vaddubm $tweak,$tweak,$tweak
+ vcipher $out2,$out2,v24
+ vcipher $out3,$out3,v24
+ vcipher $out4,$out4,v24
+ vcipher $out5,$out5,v24
+
+ subfe. r0,r0,r0 # borrow?-1:0
+ vand $tmp,$tmp,$eighty7
+ vcipher $out0,$out0,v25
+ vcipher $out1,$out1,v25
+ xxlor 32+$in1, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in1
+ vcipher $out2,$out2,v25
+ vcipher $out3,$out3,v25
+ vxor $in1,$twk1,v31
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vxor $twk1,$tweak,$rndkey0
+ vcipher $out4,$out4,v25
+ vcipher $out5,$out5,v25
+
+ and r0,r0,$len
+ vaddubm $tweak,$tweak,$tweak
+ vcipher $out0,$out0,v26
+ vcipher $out1,$out1,v26
+ vand $tmp,$tmp,$eighty7
+ vcipher $out2,$out2,v26
+ vcipher $out3,$out3,v26
+ xxlor 32+$in2, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in2
+ vcipher $out4,$out4,v26
+ vcipher $out5,$out5,v26
+
+ add $inp,$inp,r0 # $inp is adjusted in such
+ # way that at exit from the
+ # loop inX-in5 are loaded
+ # with last "words"
+ vxor $in2,$twk2,v31
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vxor $twk2,$tweak,$rndkey0
+ vaddubm $tweak,$tweak,$tweak
+ vcipher $out0,$out0,v27
+ vcipher $out1,$out1,v27
+ vcipher $out2,$out2,v27
+ vcipher $out3,$out3,v27
+ vand $tmp,$tmp,$eighty7
+ vcipher $out4,$out4,v27
+ vcipher $out5,$out5,v27
+
+ addi $key_,$sp,$FRAME+15 # rewind $key_
+ xxlor 32+$in3, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in3
+ vcipher $out0,$out0,v28
+ vcipher $out1,$out1,v28
+ vxor $in3,$twk3,v31
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vxor $twk3,$tweak,$rndkey0
+ vcipher $out2,$out2,v28
+ vcipher $out3,$out3,v28
+ vaddubm $tweak,$tweak,$tweak
+ vcipher $out4,$out4,v28
+ vcipher $out5,$out5,v28
+ lvx v24,$x00,$key_ # re-pre-load round[1]
+ vand $tmp,$tmp,$eighty7
+
+ vcipher $out0,$out0,v29
+ vcipher $out1,$out1,v29
+ xxlor 32+$in4, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in4
+ vcipher $out2,$out2,v29
+ vcipher $out3,$out3,v29
+ vxor $in4,$twk4,v31
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vxor $twk4,$tweak,$rndkey0
+ vcipher $out4,$out4,v29
+ vcipher $out5,$out5,v29
+ lvx v25,$x10,$key_ # re-pre-load round[2]
+ vaddubm $tweak,$tweak,$tweak
+
+ vcipher $out0,$out0,v30
+ vcipher $out1,$out1,v30
+ vand $tmp,$tmp,$eighty7
+ vcipher $out2,$out2,v30
+ vcipher $out3,$out3,v30
+ xxlor 32+$in5, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in5
+ vcipher $out4,$out4,v30
+ vcipher $out5,$out5,v30
+ vxor $in5,$twk5,v31
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vxor $twk5,$tweak,$rndkey0
+
+ vcipherlast $out0,$out0,$in0
+ lvx_u $in0,$x00,$inp # load next input block
+ vaddubm $tweak,$tweak,$tweak
+ vcipherlast $out1,$out1,$in1
+ lvx_u $in1,$x10,$inp
+ vcipherlast $out2,$out2,$in2
+ le?vperm $in0,$in0,$in0,$leperm
+ lvx_u $in2,$x20,$inp
+ vand $tmp,$tmp,$eighty7
+ vcipherlast $out3,$out3,$in3
+ le?vperm $in1,$in1,$in1,$leperm
+ lvx_u $in3,$x30,$inp
+ vcipherlast $out4,$out4,$in4
+ le?vperm $in2,$in2,$in2,$leperm
+ lvx_u $in4,$x40,$inp
+ xxlor 10, 32+$in0, 32+$in0
+ xxlor 32+$in0, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in0
+ xxlor 32+$in0, 10, 10
+ vcipherlast $tmp,$out5,$in5 # last block might be needed
+ # in stealing mode
+ le?vperm $in3,$in3,$in3,$leperm
+ lvx_u $in5,$x50,$inp
+ addi $inp,$inp,0x60
+ le?vperm $in4,$in4,$in4,$leperm
+ le?vperm $in5,$in5,$in5,$leperm
+
+ le?vperm $out0,$out0,$out0,$leperm
+ le?vperm $out1,$out1,$out1,$leperm
+ stvx_u $out0,$x00,$out # store output
+ vxor $out0,$in0,$twk0
+ le?vperm $out2,$out2,$out2,$leperm
+ stvx_u $out1,$x10,$out
+ vxor $out1,$in1,$twk1
+ le?vperm $out3,$out3,$out3,$leperm
+ stvx_u $out2,$x20,$out
+ vxor $out2,$in2,$twk2
+ le?vperm $out4,$out4,$out4,$leperm
+ stvx_u $out3,$x30,$out
+ vxor $out3,$in3,$twk3
+ le?vperm $out5,$tmp,$tmp,$leperm
+ stvx_u $out4,$x40,$out
+ vxor $out4,$in4,$twk4
+ le?stvx_u $out5,$x50,$out
+ be?stvx_u $tmp, $x50,$out
+ vxor $out5,$in5,$twk5
+ addi $out,$out,0x60
+
+ mtctr $rounds
+ beq Loop_xts_enc6x # did $len-=96 borrow?
+
+ xxlor 32+$eighty7, 2, 2 # 0x010101..87
+
+ addic. $len,$len,0x60
+ beq Lxts_enc6x_zero
+ cmpwi $len,0x20
+ blt Lxts_enc6x_one
+ nop
+ beq Lxts_enc6x_two
+ cmpwi $len,0x40
+ blt Lxts_enc6x_three
+ nop
+ beq Lxts_enc6x_four
+
+Lxts_enc6x_five:
+ vxor $out0,$in1,$twk0
+ vxor $out1,$in2,$twk1
+ vxor $out2,$in3,$twk2
+ vxor $out3,$in4,$twk3
+ vxor $out4,$in5,$twk4
+
+ bl _aesp8_xts_enc5x
+
+ le?vperm $out0,$out0,$out0,$leperm
+ vmr $twk0,$twk5 # unused tweak
+ le?vperm $out1,$out1,$out1,$leperm
+ stvx_u $out0,$x00,$out # store output
+ le?vperm $out2,$out2,$out2,$leperm
+ stvx_u $out1,$x10,$out
+ le?vperm $out3,$out3,$out3,$leperm
+ stvx_u $out2,$x20,$out
+ vxor $tmp,$out4,$twk5 # last block prep for stealing
+ le?vperm $out4,$out4,$out4,$leperm
+ stvx_u $out3,$x30,$out
+ stvx_u $out4,$x40,$out
+ addi $out,$out,0x50
+ bne Lxts_enc6x_steal
+ b Lxts_enc6x_done
+
+.align 4
+Lxts_enc6x_four:
+ vxor $out0,$in2,$twk0
+ vxor $out1,$in3,$twk1
+ vxor $out2,$in4,$twk2
+ vxor $out3,$in5,$twk3
+ vxor $out4,$out4,$out4
+
+ bl _aesp8_xts_enc5x
+
+ le?vperm $out0,$out0,$out0,$leperm
+ vmr $twk0,$twk4 # unused tweak
+ le?vperm $out1,$out1,$out1,$leperm
+ stvx_u $out0,$x00,$out # store output
+ le?vperm $out2,$out2,$out2,$leperm
+ stvx_u $out1,$x10,$out
+ vxor $tmp,$out3,$twk4 # last block prep for stealing
+ le?vperm $out3,$out3,$out3,$leperm
+ stvx_u $out2,$x20,$out
+ stvx_u $out3,$x30,$out
+ addi $out,$out,0x40
+ bne Lxts_enc6x_steal
+ b Lxts_enc6x_done
+
+.align 4
+Lxts_enc6x_three:
+ vxor $out0,$in3,$twk0
+ vxor $out1,$in4,$twk1
+ vxor $out2,$in5,$twk2
+ vxor $out3,$out3,$out3
+ vxor $out4,$out4,$out4
+
+ bl _aesp8_xts_enc5x
+
+ le?vperm $out0,$out0,$out0,$leperm
+ vmr $twk0,$twk3 # unused tweak
+ le?vperm $out1,$out1,$out1,$leperm
+ stvx_u $out0,$x00,$out # store output
+ vxor $tmp,$out2,$twk3 # last block prep for stealing
+ le?vperm $out2,$out2,$out2,$leperm
+ stvx_u $out1,$x10,$out
+ stvx_u $out2,$x20,$out
+ addi $out,$out,0x30
+ bne Lxts_enc6x_steal
+ b Lxts_enc6x_done
+
+.align 4
+Lxts_enc6x_two:
+ vxor $out0,$in4,$twk0
+ vxor $out1,$in5,$twk1
+ vxor $out2,$out2,$out2
+ vxor $out3,$out3,$out3
+ vxor $out4,$out4,$out4
+
+ bl _aesp8_xts_enc5x
+
+ le?vperm $out0,$out0,$out0,$leperm
+ vmr $twk0,$twk2 # unused tweak
+ vxor $tmp,$out1,$twk2 # last block prep for stealing
+ le?vperm $out1,$out1,$out1,$leperm
+ stvx_u $out0,$x00,$out # store output
+ stvx_u $out1,$x10,$out
+ addi $out,$out,0x20
+ bne Lxts_enc6x_steal
+ b Lxts_enc6x_done
+
+.align 4
+Lxts_enc6x_one:
+ vxor $out0,$in5,$twk0
+ nop
+Loop_xts_enc1x:
+ vcipher $out0,$out0,v24
+ lvx v24,$x20,$key_ # round[3]
+ addi $key_,$key_,0x20
+
+ vcipher $out0,$out0,v25
+ lvx v25,$x10,$key_ # round[4]
+ bdnz Loop_xts_enc1x
+
+ add $inp,$inp,$taillen
+ cmpwi $taillen,0
+ vcipher $out0,$out0,v24
+
+ subi $inp,$inp,16
+ vcipher $out0,$out0,v25
+
+ lvsr $inpperm,0,$taillen
+ vcipher $out0,$out0,v26
+
+ lvx_u $in0,0,$inp
+ vcipher $out0,$out0,v27
+
+ addi $key_,$sp,$FRAME+15 # rewind $key_
+ vcipher $out0,$out0,v28
+ lvx v24,$x00,$key_ # re-pre-load round[1]
+
+ vcipher $out0,$out0,v29
+ lvx v25,$x10,$key_ # re-pre-load round[2]
+ vxor $twk0,$twk0,v31
+
+ le?vperm $in0,$in0,$in0,$leperm
+ vcipher $out0,$out0,v30
+
+ vperm $in0,$in0,$in0,$inpperm
+ vcipherlast $out0,$out0,$twk0
+
+ vmr $twk0,$twk1 # unused tweak
+ vxor $tmp,$out0,$twk1 # last block prep for stealing
+ le?vperm $out0,$out0,$out0,$leperm
+ stvx_u $out0,$x00,$out # store output
+ addi $out,$out,0x10
+ bne Lxts_enc6x_steal
+ b Lxts_enc6x_done
+
+.align 4
+Lxts_enc6x_zero:
+ cmpwi $taillen,0
+ beq Lxts_enc6x_done
+
+ add $inp,$inp,$taillen
+ subi $inp,$inp,16
+ lvx_u $in0,0,$inp
+ lvsr $inpperm,0,$taillen # $in5 is no more
+ le?vperm $in0,$in0,$in0,$leperm
+ vperm $in0,$in0,$in0,$inpperm
+ vxor $tmp,$tmp,$twk0
+Lxts_enc6x_steal:
+ vxor $in0,$in0,$twk0
+ vxor $out0,$out0,$out0
+ vspltisb $out1,-1
+ vperm $out0,$out0,$out1,$inpperm
+ vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember?
+
+ subi r30,$out,17
+ subi $out,$out,16
+ mtctr $taillen
+Loop_xts_enc6x_steal:
+ lbzu r0,1(r30)
+ stb r0,16(r30)
+ bdnz Loop_xts_enc6x_steal
+
+ li $taillen,0
+ mtctr $rounds
+ b Loop_xts_enc1x # one more time...
+
+.align 4
+Lxts_enc6x_done:
+ ${UCMP}i $ivp,0
+ beq Lxts_enc6x_ret
+
+ vxor $tweak,$twk0,$rndkey0
+ le?vperm $tweak,$tweak,$tweak,$leperm
+ stvx_u $tweak,0,$ivp
+
+Lxts_enc6x_ret:
+ mtlr r11
+ li r10,`$FRAME+15`
+ li r11,`$FRAME+31`
+ stvx $seven,r10,$sp # wipe copies of round keys
+ addi r10,r10,32
+ stvx $seven,r11,$sp
+ addi r11,r11,32
+ stvx $seven,r10,$sp
+ addi r10,r10,32
+ stvx $seven,r11,$sp
+ addi r11,r11,32
+ stvx $seven,r10,$sp
+ addi r10,r10,32
+ stvx $seven,r11,$sp
+ addi r11,r11,32
+ stvx $seven,r10,$sp
+ addi r10,r10,32
+ stvx $seven,r11,$sp
+ addi r11,r11,32
+
+ mtspr 256,$vrsave
+ lvx v20,r10,$sp # ABI says so
+ addi r10,r10,32
+ lvx v21,r11,$sp
+ addi r11,r11,32
+ lvx v22,r10,$sp
+ addi r10,r10,32
+ lvx v23,r11,$sp
+ addi r11,r11,32
+ lvx v24,r10,$sp
+ addi r10,r10,32
+ lvx v25,r11,$sp
+ addi r11,r11,32
+ lvx v26,r10,$sp
+ addi r10,r10,32
+ lvx v27,r11,$sp
+ addi r11,r11,32
+ lvx v28,r10,$sp
+ addi r10,r10,32
+ lvx v29,r11,$sp
+ addi r11,r11,32
+ lvx v30,r10,$sp
+ lvx v31,r11,$sp
+ $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+ $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+ $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+ $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+ $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+ $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+ addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
+ blr
+ .long 0
+ .byte 0,12,0x04,1,0x80,6,6,0
+ .long 0
+
+.align 5
+_aesp8_xts_enc5x:
+ vcipher $out0,$out0,v24
+ vcipher $out1,$out1,v24
+ vcipher $out2,$out2,v24
+ vcipher $out3,$out3,v24
+ vcipher $out4,$out4,v24
+ lvx v24,$x20,$key_ # round[3]
+ addi $key_,$key_,0x20
+
+ vcipher $out0,$out0,v25
+ vcipher $out1,$out1,v25
+ vcipher $out2,$out2,v25
+ vcipher $out3,$out3,v25
+ vcipher $out4,$out4,v25
+ lvx v25,$x10,$key_ # round[4]
+ bdnz _aesp8_xts_enc5x
+
+ add $inp,$inp,$taillen
+ cmpwi $taillen,0
+ vcipher $out0,$out0,v24
+ vcipher $out1,$out1,v24
+ vcipher $out2,$out2,v24
+ vcipher $out3,$out3,v24
+ vcipher $out4,$out4,v24
+
+ subi $inp,$inp,16
+ vcipher $out0,$out0,v25
+ vcipher $out1,$out1,v25
+ vcipher $out2,$out2,v25
+ vcipher $out3,$out3,v25
+ vcipher $out4,$out4,v25
+ vxor $twk0,$twk0,v31
+
+ vcipher $out0,$out0,v26
+ lvsr $inpperm,r0,$taillen # $in5 is no more
+ vcipher $out1,$out1,v26
+ vcipher $out2,$out2,v26
+ vcipher $out3,$out3,v26
+ vcipher $out4,$out4,v26
+ vxor $in1,$twk1,v31
+
+ vcipher $out0,$out0,v27
+ lvx_u $in0,0,$inp
+ vcipher $out1,$out1,v27
+ vcipher $out2,$out2,v27
+ vcipher $out3,$out3,v27
+ vcipher $out4,$out4,v27
+ vxor $in2,$twk2,v31
+
+ addi $key_,$sp,$FRAME+15 # rewind $key_
+ vcipher $out0,$out0,v28
+ vcipher $out1,$out1,v28
+ vcipher $out2,$out2,v28
+ vcipher $out3,$out3,v28
+ vcipher $out4,$out4,v28
+ lvx v24,$x00,$key_ # re-pre-load round[1]
+ vxor $in3,$twk3,v31
+
+ vcipher $out0,$out0,v29
+ le?vperm $in0,$in0,$in0,$leperm
+ vcipher $out1,$out1,v29
+ vcipher $out2,$out2,v29
+ vcipher $out3,$out3,v29
+ vcipher $out4,$out4,v29
+ lvx v25,$x10,$key_ # re-pre-load round[2]
+ vxor $in4,$twk4,v31
+
+ vcipher $out0,$out0,v30
+ vperm $in0,$in0,$in0,$inpperm
+ vcipher $out1,$out1,v30
+ vcipher $out2,$out2,v30
+ vcipher $out3,$out3,v30
+ vcipher $out4,$out4,v30
+
+ vcipherlast $out0,$out0,$twk0
+ vcipherlast $out1,$out1,$in1
+ vcipherlast $out2,$out2,$in2
+ vcipherlast $out3,$out3,$in3
+ vcipherlast $out4,$out4,$in4
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,0,0
+
+.align 5
+_aesp8_xts_decrypt6x:
+ $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
+ mflr r11
+ li r7,`$FRAME+8*16+15`
+ li r3,`$FRAME+8*16+31`
+ $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
+ stvx v20,r7,$sp # ABI says so
+ addi r7,r7,32
+ stvx v21,r3,$sp
+ addi r3,r3,32
+ stvx v22,r7,$sp
+ addi r7,r7,32
+ stvx v23,r3,$sp
+ addi r3,r3,32
+ stvx v24,r7,$sp
+ addi r7,r7,32
+ stvx v25,r3,$sp
+ addi r3,r3,32
+ stvx v26,r7,$sp
+ addi r7,r7,32
+ stvx v27,r3,$sp
+ addi r3,r3,32
+ stvx v28,r7,$sp
+ addi r7,r7,32
+ stvx v29,r3,$sp
+ addi r3,r3,32
+ stvx v30,r7,$sp
+ stvx v31,r3,$sp
+ li r0,-1
+ stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
+ li $x10,0x10
+ $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+ li $x20,0x20
+ $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+ li $x30,0x30
+ $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+ li $x40,0x40
+ $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+ li $x50,0x50
+ $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+ li $x60,0x60
+ $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+ li $x70,0x70
+ mtspr 256,r0
+
+ xxlor 2, 32+$eighty7, 32+$eighty7
+ vsldoi $eighty7,$tmp,$eighty7,1 # 0x010101..87
+ xxlor 1, 32+$eighty7, 32+$eighty7
+
+ # Load XOR Lconsts.
+ mr $x70, r6
+ bl Lconsts
+ lxvw4x 0, $x40, r6 # load XOR contents
+ mr r6, $x70
+ li $x70,0x70
+
+ subi $rounds,$rounds,3 # -4 in total
+
+ lvx $rndkey0,$x00,$key1 # load key schedule
+ lvx v30,$x10,$key1
+ addi $key1,$key1,0x20
+ lvx v31,$x00,$key1
+ ?vperm $rndkey0,$rndkey0,v30,$keyperm
+ addi $key_,$sp,$FRAME+15
+ mtctr $rounds
+
+Load_xts_dec_key:
+ ?vperm v24,v30,v31,$keyperm
+ lvx v30,$x10,$key1
+ addi $key1,$key1,0x20
+ stvx v24,$x00,$key_ # off-load round[1]
+ ?vperm v25,v31,v30,$keyperm
+ lvx v31,$x00,$key1
+ stvx v25,$x10,$key_ # off-load round[2]
+ addi $key_,$key_,0x20
+ bdnz Load_xts_dec_key
+
+ lvx v26,$x10,$key1
+ ?vperm v24,v30,v31,$keyperm
+ lvx v27,$x20,$key1
+ stvx v24,$x00,$key_ # off-load round[3]
+ ?vperm v25,v31,v26,$keyperm
+ lvx v28,$x30,$key1
+ stvx v25,$x10,$key_ # off-load round[4]
+ addi $key_,$sp,$FRAME+15 # rewind $key_
+ ?vperm v26,v26,v27,$keyperm
+ lvx v29,$x40,$key1
+ ?vperm v27,v27,v28,$keyperm
+ lvx v30,$x50,$key1
+ ?vperm v28,v28,v29,$keyperm
+ lvx v31,$x60,$key1
+ ?vperm v29,v29,v30,$keyperm
+ lvx $twk5,$x70,$key1 # borrow $twk5
+ ?vperm v30,v30,v31,$keyperm
+ lvx v24,$x00,$key_ # pre-load round[1]
+ ?vperm v31,v31,$twk5,$keyperm
+ lvx v25,$x10,$key_ # pre-load round[2]
+
+ vperm $in0,$inout,$inptail,$inpperm
+ subi $inp,$inp,31 # undo "caller"
+ vxor $twk0,$tweak,$rndkey0
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ vand $tmp,$tmp,$eighty7
+ vxor $out0,$in0,$twk0
+ xxlor 32+$in1, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in1
+
+ lvx_u $in1,$x10,$inp
+ vxor $twk1,$tweak,$rndkey0
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ le?vperm $in1,$in1,$in1,$leperm
+ vand $tmp,$tmp,$eighty7
+ vxor $out1,$in1,$twk1
+ xxlor 32+$in2, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in2
+
+ lvx_u $in2,$x20,$inp
+ andi. $taillen,$len,15
+ vxor $twk2,$tweak,$rndkey0
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ le?vperm $in2,$in2,$in2,$leperm
+ vand $tmp,$tmp,$eighty7
+ vxor $out2,$in2,$twk2
+ xxlor 32+$in3, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in3
+
+ lvx_u $in3,$x30,$inp
+ sub $len,$len,$taillen
+ vxor $twk3,$tweak,$rndkey0
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ le?vperm $in3,$in3,$in3,$leperm
+ vand $tmp,$tmp,$eighty7
+ vxor $out3,$in3,$twk3
+ xxlor 32+$in4, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in4
+
+ lvx_u $in4,$x40,$inp
+ subi $len,$len,0x60
+ vxor $twk4,$tweak,$rndkey0
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ le?vperm $in4,$in4,$in4,$leperm
+ vand $tmp,$tmp,$eighty7
+ vxor $out4,$in4,$twk4
+ xxlor 32+$in5, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in5
+
+ lvx_u $in5,$x50,$inp
+ addi $inp,$inp,0x60
+ vxor $twk5,$tweak,$rndkey0
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vaddubm $tweak,$tweak,$tweak
+ le?vperm $in5,$in5,$in5,$leperm
+ vand $tmp,$tmp,$eighty7
+ vxor $out5,$in5,$twk5
+ xxlor 32+$in0, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in0
+
+ vxor v31,v31,$rndkey0
+ mtctr $rounds
+ b Loop_xts_dec6x
+
+.align 5
+Loop_xts_dec6x:
+ vncipher $out0,$out0,v24
+ vncipher $out1,$out1,v24
+ vncipher $out2,$out2,v24
+ vncipher $out3,$out3,v24
+ vncipher $out4,$out4,v24
+ vncipher $out5,$out5,v24
+ lvx v24,$x20,$key_ # round[3]
+ addi $key_,$key_,0x20
+
+ vncipher $out0,$out0,v25
+ vncipher $out1,$out1,v25
+ vncipher $out2,$out2,v25
+ vncipher $out3,$out3,v25
+ vncipher $out4,$out4,v25
+ vncipher $out5,$out5,v25
+ lvx v25,$x10,$key_ # round[4]
+ bdnz Loop_xts_dec6x
+
+ xxlor 32+$eighty7, 1, 1 # 0x010101..87
+
+ subic $len,$len,96 # $len-=96
+ vxor $in0,$twk0,v31 # xor with last round key
+ vncipher $out0,$out0,v24
+ vncipher $out1,$out1,v24
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vxor $twk0,$tweak,$rndkey0
+ vaddubm $tweak,$tweak,$tweak
+ vncipher $out2,$out2,v24
+ vncipher $out3,$out3,v24
+ vncipher $out4,$out4,v24
+ vncipher $out5,$out5,v24
+
+ subfe. r0,r0,r0 # borrow?-1:0
+ vand $tmp,$tmp,$eighty7
+ vncipher $out0,$out0,v25
+ vncipher $out1,$out1,v25
+ xxlor 32+$in1, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in1
+ vncipher $out2,$out2,v25
+ vncipher $out3,$out3,v25
+ vxor $in1,$twk1,v31
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vxor $twk1,$tweak,$rndkey0
+ vncipher $out4,$out4,v25
+ vncipher $out5,$out5,v25
+
+ and r0,r0,$len
+ vaddubm $tweak,$tweak,$tweak
+ vncipher $out0,$out0,v26
+ vncipher $out1,$out1,v26
+ vand $tmp,$tmp,$eighty7
+ vncipher $out2,$out2,v26
+ vncipher $out3,$out3,v26
+ xxlor 32+$in2, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in2
+ vncipher $out4,$out4,v26
+ vncipher $out5,$out5,v26
+
+ add $inp,$inp,r0 # $inp is adjusted in such
+ # way that at exit from the
+ # loop inX-in5 are loaded
+ # with last "words"
+ vxor $in2,$twk2,v31
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vxor $twk2,$tweak,$rndkey0
+ vaddubm $tweak,$tweak,$tweak
+ vncipher $out0,$out0,v27
+ vncipher $out1,$out1,v27
+ vncipher $out2,$out2,v27
+ vncipher $out3,$out3,v27
+ vand $tmp,$tmp,$eighty7
+ vncipher $out4,$out4,v27
+ vncipher $out5,$out5,v27
+
+ addi $key_,$sp,$FRAME+15 # rewind $key_
+ xxlor 32+$in3, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in3
+ vncipher $out0,$out0,v28
+ vncipher $out1,$out1,v28
+ vxor $in3,$twk3,v31
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vxor $twk3,$tweak,$rndkey0
+ vncipher $out2,$out2,v28
+ vncipher $out3,$out3,v28
+ vaddubm $tweak,$tweak,$tweak
+ vncipher $out4,$out4,v28
+ vncipher $out5,$out5,v28
+ lvx v24,$x00,$key_ # re-pre-load round[1]
+ vand $tmp,$tmp,$eighty7
+
+ vncipher $out0,$out0,v29
+ vncipher $out1,$out1,v29
+ xxlor 32+$in4, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in4
+ vncipher $out2,$out2,v29
+ vncipher $out3,$out3,v29
+ vxor $in4,$twk4,v31
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vxor $twk4,$tweak,$rndkey0
+ vncipher $out4,$out4,v29
+ vncipher $out5,$out5,v29
+ lvx v25,$x10,$key_ # re-pre-load round[2]
+ vaddubm $tweak,$tweak,$tweak
+
+ vncipher $out0,$out0,v30
+ vncipher $out1,$out1,v30
+ vand $tmp,$tmp,$eighty7
+ vncipher $out2,$out2,v30
+ vncipher $out3,$out3,v30
+ xxlor 32+$in5, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in5
+ vncipher $out4,$out4,v30
+ vncipher $out5,$out5,v30
+ vxor $in5,$twk5,v31
+ vsrab $tmp,$tweak,$seven # next tweak value
+ vxor $twk5,$tweak,$rndkey0
+
+ vncipherlast $out0,$out0,$in0
+ lvx_u $in0,$x00,$inp # load next input block
+ vaddubm $tweak,$tweak,$tweak
+ vncipherlast $out1,$out1,$in1
+ lvx_u $in1,$x10,$inp
+ vncipherlast $out2,$out2,$in2
+ le?vperm $in0,$in0,$in0,$leperm
+ lvx_u $in2,$x20,$inp
+ vand $tmp,$tmp,$eighty7
+ vncipherlast $out3,$out3,$in3
+ le?vperm $in1,$in1,$in1,$leperm
+ lvx_u $in3,$x30,$inp
+ vncipherlast $out4,$out4,$in4
+ le?vperm $in2,$in2,$in2,$leperm
+ lvx_u $in4,$x40,$inp
+ xxlor 10, 32+$in0, 32+$in0
+ xxlor 32+$in0, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in0
+ xxlor 32+$in0, 10, 10
+ vncipherlast $out5,$out5,$in5
+ le?vperm $in3,$in3,$in3,$leperm
+ lvx_u $in5,$x50,$inp
+ addi $inp,$inp,0x60
+ le?vperm $in4,$in4,$in4,$leperm
+ le?vperm $in5,$in5,$in5,$leperm
+
+ le?vperm $out0,$out0,$out0,$leperm
+ le?vperm $out1,$out1,$out1,$leperm
+ stvx_u $out0,$x00,$out # store output
+ vxor $out0,$in0,$twk0
+ le?vperm $out2,$out2,$out2,$leperm
+ stvx_u $out1,$x10,$out
+ vxor $out1,$in1,$twk1
+ le?vperm $out3,$out3,$out3,$leperm
+ stvx_u $out2,$x20,$out
+ vxor $out2,$in2,$twk2
+ le?vperm $out4,$out4,$out4,$leperm
+ stvx_u $out3,$x30,$out
+ vxor $out3,$in3,$twk3
+ le?vperm $out5,$out5,$out5,$leperm
+ stvx_u $out4,$x40,$out
+ vxor $out4,$in4,$twk4
+ stvx_u $out5,$x50,$out
+ vxor $out5,$in5,$twk5
+ addi $out,$out,0x60
+
+ mtctr $rounds
+ beq Loop_xts_dec6x # did $len-=96 borrow?
+
+ xxlor 32+$eighty7, 2, 2 # 0x010101..87
+
+ addic. $len,$len,0x60
+ beq Lxts_dec6x_zero
+ cmpwi $len,0x20
+ blt Lxts_dec6x_one
+ nop
+ beq Lxts_dec6x_two
+ cmpwi $len,0x40
+ blt Lxts_dec6x_three
+ nop
+ beq Lxts_dec6x_four
+
+Lxts_dec6x_five:
+ vxor $out0,$in1,$twk0
+ vxor $out1,$in2,$twk1
+ vxor $out2,$in3,$twk2
+ vxor $out3,$in4,$twk3
+ vxor $out4,$in5,$twk4
+
+ bl _aesp8_xts_dec5x
+
+ le?vperm $out0,$out0,$out0,$leperm
+ vmr $twk0,$twk5 # unused tweak
+ vxor $twk1,$tweak,$rndkey0
+ le?vperm $out1,$out1,$out1,$leperm
+ stvx_u $out0,$x00,$out # store output
+ vxor $out0,$in0,$twk1
+ le?vperm $out2,$out2,$out2,$leperm
+ stvx_u $out1,$x10,$out
+ le?vperm $out3,$out3,$out3,$leperm
+ stvx_u $out2,$x20,$out
+ le?vperm $out4,$out4,$out4,$leperm
+ stvx_u $out3,$x30,$out
+ stvx_u $out4,$x40,$out
+ addi $out,$out,0x50
+ bne Lxts_dec6x_steal
+ b Lxts_dec6x_done
+
+.align 4
+Lxts_dec6x_four:
+ vxor $out0,$in2,$twk0
+ vxor $out1,$in3,$twk1
+ vxor $out2,$in4,$twk2
+ vxor $out3,$in5,$twk3
+ vxor $out4,$out4,$out4
+
+ bl _aesp8_xts_dec5x
+
+ le?vperm $out0,$out0,$out0,$leperm
+ vmr $twk0,$twk4 # unused tweak
+ vmr $twk1,$twk5
+ le?vperm $out1,$out1,$out1,$leperm
+ stvx_u $out0,$x00,$out # store output
+ vxor $out0,$in0,$twk5
+ le?vperm $out2,$out2,$out2,$leperm
+ stvx_u $out1,$x10,$out
+ le?vperm $out3,$out3,$out3,$leperm
+ stvx_u $out2,$x20,$out
+ stvx_u $out3,$x30,$out
+ addi $out,$out,0x40
+ bne Lxts_dec6x_steal
+ b Lxts_dec6x_done
+
+.align 4
+Lxts_dec6x_three:
+ vxor $out0,$in3,$twk0
+ vxor $out1,$in4,$twk1
+ vxor $out2,$in5,$twk2
+ vxor $out3,$out3,$out3
+ vxor $out4,$out4,$out4
+
+ bl _aesp8_xts_dec5x
+
+ le?vperm $out0,$out0,$out0,$leperm
+ vmr $twk0,$twk3 # unused tweak
+ vmr $twk1,$twk4
+ le?vperm $out1,$out1,$out1,$leperm
+ stvx_u $out0,$x00,$out # store output
+ vxor $out0,$in0,$twk4
+ le?vperm $out2,$out2,$out2,$leperm
+ stvx_u $out1,$x10,$out
+ stvx_u $out2,$x20,$out
+ addi $out,$out,0x30
+ bne Lxts_dec6x_steal
+ b Lxts_dec6x_done
+
+.align 4
+Lxts_dec6x_two:
+ vxor $out0,$in4,$twk0
+ vxor $out1,$in5,$twk1
+ vxor $out2,$out2,$out2
+ vxor $out3,$out3,$out3
+ vxor $out4,$out4,$out4
+
+ bl _aesp8_xts_dec5x
+
+ le?vperm $out0,$out0,$out0,$leperm
+ vmr $twk0,$twk2 # unused tweak
+ vmr $twk1,$twk3
+ le?vperm $out1,$out1,$out1,$leperm
+ stvx_u $out0,$x00,$out # store output
+ vxor $out0,$in0,$twk3
+ stvx_u $out1,$x10,$out
+ addi $out,$out,0x20
+ bne Lxts_dec6x_steal
+ b Lxts_dec6x_done
+
+.align 4
+Lxts_dec6x_one:
+ vxor $out0,$in5,$twk0
+ nop
+Loop_xts_dec1x:
+ vncipher $out0,$out0,v24
+ lvx v24,$x20,$key_ # round[3]
+ addi $key_,$key_,0x20
+
+ vncipher $out0,$out0,v25
+ lvx v25,$x10,$key_ # round[4]
+ bdnz Loop_xts_dec1x
+
+ subi r0,$taillen,1
+ vncipher $out0,$out0,v24
+
+ andi. r0,r0,16
+ cmpwi $taillen,0
+ vncipher $out0,$out0,v25
+
+ sub $inp,$inp,r0
+ vncipher $out0,$out0,v26
+
+ lvx_u $in0,0,$inp
+ vncipher $out0,$out0,v27
+
+ addi $key_,$sp,$FRAME+15 # rewind $key_
+ vncipher $out0,$out0,v28
+ lvx v24,$x00,$key_ # re-pre-load round[1]
+
+ vncipher $out0,$out0,v29
+ lvx v25,$x10,$key_ # re-pre-load round[2]
+ vxor $twk0,$twk0,v31
+
+ le?vperm $in0,$in0,$in0,$leperm
+ vncipher $out0,$out0,v30
+
+ mtctr $rounds
+ vncipherlast $out0,$out0,$twk0
+
+ vmr $twk0,$twk1 # unused tweak
+ vmr $twk1,$twk2
+ le?vperm $out0,$out0,$out0,$leperm
+ stvx_u $out0,$x00,$out # store output
+ addi $out,$out,0x10
+ vxor $out0,$in0,$twk2
+ bne Lxts_dec6x_steal
+ b Lxts_dec6x_done
+
+.align 4
+Lxts_dec6x_zero:
+ cmpwi $taillen,0
+ beq Lxts_dec6x_done
+
+ lvx_u $in0,0,$inp
+ le?vperm $in0,$in0,$in0,$leperm
+ vxor $out0,$in0,$twk1
+Lxts_dec6x_steal:
+ vncipher $out0,$out0,v24
+ lvx v24,$x20,$key_ # round[3]
+ addi $key_,$key_,0x20
+
+ vncipher $out0,$out0,v25
+ lvx v25,$x10,$key_ # round[4]
+ bdnz Lxts_dec6x_steal
+
+ add $inp,$inp,$taillen
+ vncipher $out0,$out0,v24
+
+ cmpwi $taillen,0
+ vncipher $out0,$out0,v25
+
+ lvx_u $in0,0,$inp
+ vncipher $out0,$out0,v26
+
+ lvsr $inpperm,0,$taillen # $in5 is no more
+ vncipher $out0,$out0,v27
+
+ addi $key_,$sp,$FRAME+15 # rewind $key_
+ vncipher $out0,$out0,v28
+ lvx v24,$x00,$key_ # re-pre-load round[1]
+
+ vncipher $out0,$out0,v29
+ lvx v25,$x10,$key_ # re-pre-load round[2]
+ vxor $twk1,$twk1,v31
+
+ le?vperm $in0,$in0,$in0,$leperm
+ vncipher $out0,$out0,v30
+
+ vperm $in0,$in0,$in0,$inpperm
+ vncipherlast $tmp,$out0,$twk1
+
+ le?vperm $out0,$tmp,$tmp,$leperm
+ le?stvx_u $out0,0,$out
+ be?stvx_u $tmp,0,$out
+
+ vxor $out0,$out0,$out0
+ vspltisb $out1,-1
+ vperm $out0,$out0,$out1,$inpperm
+ vsel $out0,$in0,$tmp,$out0
+ vxor $out0,$out0,$twk0
+
+ subi r30,$out,1
+ mtctr $taillen
+Loop_xts_dec6x_steal:
+ lbzu r0,1(r30)
+ stb r0,16(r30)
+ bdnz Loop_xts_dec6x_steal
+
+ li $taillen,0
+ mtctr $rounds
+ b Loop_xts_dec1x # one more time...
+
+.align 4
+Lxts_dec6x_done:
+ ${UCMP}i $ivp,0
+ beq Lxts_dec6x_ret
+
+ vxor $tweak,$twk0,$rndkey0
+ le?vperm $tweak,$tweak,$tweak,$leperm
+ stvx_u $tweak,0,$ivp
+
+Lxts_dec6x_ret:
+ mtlr r11
+ li r10,`$FRAME+15`
+ li r11,`$FRAME+31`
+ stvx $seven,r10,$sp # wipe copies of round keys
+ addi r10,r10,32
+ stvx $seven,r11,$sp
+ addi r11,r11,32
+ stvx $seven,r10,$sp
+ addi r10,r10,32
+ stvx $seven,r11,$sp
+ addi r11,r11,32
+ stvx $seven,r10,$sp
+ addi r10,r10,32
+ stvx $seven,r11,$sp
+ addi r11,r11,32
+ stvx $seven,r10,$sp
+ addi r10,r10,32
+ stvx $seven,r11,$sp
+ addi r11,r11,32
+
+ mtspr 256,$vrsave
+ lvx v20,r10,$sp # ABI says so
+ addi r10,r10,32
+ lvx v21,r11,$sp
+ addi r11,r11,32
+ lvx v22,r10,$sp
+ addi r10,r10,32
+ lvx v23,r11,$sp
+ addi r11,r11,32
+ lvx v24,r10,$sp
+ addi r10,r10,32
+ lvx v25,r11,$sp
+ addi r11,r11,32
+ lvx v26,r10,$sp
+ addi r10,r10,32
+ lvx v27,r11,$sp
+ addi r11,r11,32
+ lvx v28,r10,$sp
+ addi r10,r10,32
+ lvx v29,r11,$sp
+ addi r11,r11,32
+ lvx v30,r10,$sp
+ lvx v31,r11,$sp
+ $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
+ $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
+ $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
+ $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
+ $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
+ $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
+ addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
+ blr
+ .long 0
+ .byte 0,12,0x04,1,0x80,6,6,0
+ .long 0
+
+.align 5
+_aesp8_xts_dec5x:
+ vncipher $out0,$out0,v24
+ vncipher $out1,$out1,v24
+ vncipher $out2,$out2,v24
+ vncipher $out3,$out3,v24
+ vncipher $out4,$out4,v24
+ lvx v24,$x20,$key_ # round[3]
+ addi $key_,$key_,0x20
+
+ vncipher $out0,$out0,v25
+ vncipher $out1,$out1,v25
+ vncipher $out2,$out2,v25
+ vncipher $out3,$out3,v25
+ vncipher $out4,$out4,v25
+ lvx v25,$x10,$key_ # round[4]
+ bdnz _aesp8_xts_dec5x
+
+ subi r0,$taillen,1
+ vncipher $out0,$out0,v24
+ vncipher $out1,$out1,v24
+ vncipher $out2,$out2,v24
+ vncipher $out3,$out3,v24
+ vncipher $out4,$out4,v24
+
+ andi. r0,r0,16
+ cmpwi $taillen,0
+ vncipher $out0,$out0,v25
+ vncipher $out1,$out1,v25
+ vncipher $out2,$out2,v25
+ vncipher $out3,$out3,v25
+ vncipher $out4,$out4,v25
+ vxor $twk0,$twk0,v31
+
+ sub $inp,$inp,r0
+ vncipher $out0,$out0,v26
+ vncipher $out1,$out1,v26
+ vncipher $out2,$out2,v26
+ vncipher $out3,$out3,v26
+ vncipher $out4,$out4,v26
+ vxor $in1,$twk1,v31
+
+ vncipher $out0,$out0,v27
+ lvx_u $in0,0,$inp
+ vncipher $out1,$out1,v27
+ vncipher $out2,$out2,v27
+ vncipher $out3,$out3,v27
+ vncipher $out4,$out4,v27
+ vxor $in2,$twk2,v31
+
+ addi $key_,$sp,$FRAME+15 # rewind $key_
+ vncipher $out0,$out0,v28
+ vncipher $out1,$out1,v28
+ vncipher $out2,$out2,v28
+ vncipher $out3,$out3,v28
+ vncipher $out4,$out4,v28
+ lvx v24,$x00,$key_ # re-pre-load round[1]
+ vxor $in3,$twk3,v31
+
+ vncipher $out0,$out0,v29
+ le?vperm $in0,$in0,$in0,$leperm
+ vncipher $out1,$out1,v29
+ vncipher $out2,$out2,v29
+ vncipher $out3,$out3,v29
+ vncipher $out4,$out4,v29
+ lvx v25,$x10,$key_ # re-pre-load round[2]
+ vxor $in4,$twk4,v31
+
+ vncipher $out0,$out0,v30
+ vncipher $out1,$out1,v30
+ vncipher $out2,$out2,v30
+ vncipher $out3,$out3,v30
+ vncipher $out4,$out4,v30
+
+ vncipherlast $out0,$out0,$twk0
+ vncipherlast $out1,$out1,$in1
+ vncipherlast $out2,$out2,$in2
+ vncipherlast $out3,$out3,$in3
+ vncipherlast $out4,$out4,$in4
+ mtctr $rounds
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,0,0
+___
+}} }}}
+
+my $consts=1;
+foreach(split("\n",$code)) {
+ s/\`([^\`]*)\`/eval($1)/geo;
+
+ # constants table endian-specific conversion
+ if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
+ my $conv=$3;
+ my @bytes=();
+
+ # convert to endian-agnostic format
+ if ($1 eq "long") {
+ foreach (split(/,\s*/,$2)) {
+ my $l = /^0/?oct:int;
+ push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
+ }
+ } else {
+ @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
+ }
+
+ # little-endian conversion
+ if ($flavour =~ /le$/o) {
+ SWITCH: for($conv) {
+ /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
+ /\?rev/ && do { @bytes=reverse(@bytes); last; };
+ }
+ }
+
+ #emit
+ print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
+ next;
+ }
+ $consts=0 if (m/Lconsts:/o); # end of table
+
+ # instructions prefixed with '?' are endian-specific and need
+ # to be adjusted accordingly...
+ if ($flavour =~ /le$/o) { # little-endian
+ s/le\?//o or
+ s/be\?/#be#/o or
+ s/\?lvsr/lvsl/o or
+ s/\?lvsl/lvsr/o or
+ s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
+ s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
+ s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
+ } else { # big-endian
+ s/le\?/#le#/o or
+ s/be\?//o or
+ s/\?([a-z]+)/$1/o;
+ }
+
+ print $_,"\n";
+}
+
+close STDOUT;
diff --git a/arch/powerpc/crypto/ghash.c b/arch/powerpc/crypto/ghash.c
new file mode 100644
index 000000000000..7308735bdb33
--- /dev/null
+++ b/arch/powerpc/crypto/ghash.c
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * GHASH routines supporting VMX instructions on the Power 8
+ *
+ * Copyright (C) 2015, 2019 International Business Machines Inc.
+ *
+ * Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com>
+ *
+ * Extended by Daniel Axtens <dja@axtens.net> to replace the fallback
+ * mechanism. The new approach is based on arm64 code, which is:
+ * Copyright (C) 2014 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ */
+
+#include "aesp8-ppc.h"
+#include <asm/switch_to.h>
+#include <crypto/aes.h>
+#include <crypto/gf128mul.h>
+#include <crypto/ghash.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/uaccess.h>
+
+void gcm_init_p8(u128 htable[16], const u64 Xi[2]);
+void gcm_gmult_p8(u64 Xi[2], const u128 htable[16]);
+void gcm_ghash_p8(u64 Xi[2], const u128 htable[16],
+ const u8 *in, size_t len);
+
+struct p8_ghash_ctx {
+ /* key used by vector asm */
+ u128 htable[16];
+ /* key used by software fallback */
+ be128 key;
+};
+
+struct p8_ghash_desc_ctx {
+ u64 shash[2];
+};
+
+static int p8_ghash_init(struct shash_desc *desc)
+{
+ struct p8_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ memset(dctx->shash, 0, GHASH_DIGEST_SIZE);
+ return 0;
+}
+
+static int p8_ghash_setkey(struct crypto_shash *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(tfm));
+
+ if (keylen != GHASH_BLOCK_SIZE)
+ return -EINVAL;
+
+ preempt_disable();
+ pagefault_disable();
+ enable_kernel_vsx();
+ gcm_init_p8(ctx->htable, (const u64 *) key);
+ disable_kernel_vsx();
+ pagefault_enable();
+ preempt_enable();
+
+ memcpy(&ctx->key, key, GHASH_BLOCK_SIZE);
+
+ return 0;
+}
+
+static inline void __ghash_block(struct p8_ghash_ctx *ctx,
+ struct p8_ghash_desc_ctx *dctx,
+ const u8 *src)
+{
+ if (crypto_simd_usable()) {
+ preempt_disable();
+ pagefault_disable();
+ enable_kernel_vsx();
+ gcm_ghash_p8(dctx->shash, ctx->htable, src, GHASH_BLOCK_SIZE);
+ disable_kernel_vsx();
+ pagefault_enable();
+ preempt_enable();
+ } else {
+ crypto_xor((u8 *)dctx->shash, src, GHASH_BLOCK_SIZE);
+ gf128mul_lle((be128 *)dctx->shash, &ctx->key);
+ }
+}
+
+static inline int __ghash_blocks(struct p8_ghash_ctx *ctx,
+ struct p8_ghash_desc_ctx *dctx,
+ const u8 *src, unsigned int srclen)
+{
+ int remain = srclen - round_down(srclen, GHASH_BLOCK_SIZE);
+
+ srclen -= remain;
+ if (crypto_simd_usable()) {
+ preempt_disable();
+ pagefault_disable();
+ enable_kernel_vsx();
+ gcm_ghash_p8(dctx->shash, ctx->htable,
+ src, srclen);
+ disable_kernel_vsx();
+ pagefault_enable();
+ preempt_enable();
+ } else {
+ do {
+ crypto_xor((u8 *)dctx->shash, src, GHASH_BLOCK_SIZE);
+ gf128mul_lle((be128 *)dctx->shash, &ctx->key);
+ srclen -= GHASH_BLOCK_SIZE;
+ src += GHASH_BLOCK_SIZE;
+ } while (srclen);
+ }
+
+ return remain;
+}
+
+static int p8_ghash_update(struct shash_desc *desc,
+ const u8 *src, unsigned int srclen)
+{
+ struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(desc->tfm));
+ struct p8_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ return __ghash_blocks(ctx, dctx, src, srclen);
+}
+
+static int p8_ghash_finup(struct shash_desc *desc, const u8 *src,
+ unsigned int len, u8 *out)
+{
+ struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(desc->tfm));
+ struct p8_ghash_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ if (len) {
+ u8 buf[GHASH_BLOCK_SIZE] = {};
+
+ memcpy(buf, src, len);
+ __ghash_block(ctx, dctx, buf);
+ memzero_explicit(buf, sizeof(buf));
+ }
+ memcpy(out, dctx->shash, GHASH_DIGEST_SIZE);
+ return 0;
+}
+
+struct shash_alg p8_ghash_alg = {
+ .digestsize = GHASH_DIGEST_SIZE,
+ .init = p8_ghash_init,
+ .update = p8_ghash_update,
+ .finup = p8_ghash_finup,
+ .setkey = p8_ghash_setkey,
+ .descsize = sizeof(struct p8_ghash_desc_ctx),
+ .base = {
+ .cra_name = "ghash",
+ .cra_driver_name = "p8_ghash",
+ .cra_priority = 1000,
+ .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
+ .cra_blocksize = GHASH_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct p8_ghash_ctx),
+ .cra_module = THIS_MODULE,
+ },
+};
diff --git a/arch/powerpc/crypto/ghashp10-ppc.pl b/arch/powerpc/crypto/ghashp10-ppc.pl
new file mode 100644
index 000000000000..27a6b0bec645
--- /dev/null
+++ b/arch/powerpc/crypto/ghashp10-ppc.pl
@@ -0,0 +1,370 @@
+#!/usr/bin/env perl
+# SPDX-License-Identifier: GPL-2.0
+
+# This code is taken from the OpenSSL project but the author (Andy Polyakov)
+# has relicensed it under the GPLv2. Therefore this program is free software;
+# you can redistribute it and/or modify it under the terms of the GNU General
+# Public License version 2 as published by the Free Software Foundation.
+#
+# The original headers, including the original license headers, are
+# included below for completeness.
+
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see https://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# GHASH for PowerISA v2.07.
+#
+# July 2014
+#
+# Accurate performance measurements are problematic, because it's
+# always virtualized setup with possibly throttled processor.
+# Relative comparison is therefore more informative. This initial
+# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x
+# faster than "4-bit" integer-only compiler-generated 64-bit code.
+# "Initial version" means that there is room for futher improvement.
+
+$flavour=shift;
+$output =shift;
+
+if ($flavour =~ /64/) {
+ $SIZE_T=8;
+ $LRSAVE=2*$SIZE_T;
+ $STU="stdu";
+ $POP="ld";
+ $PUSH="std";
+} elsif ($flavour =~ /32/) {
+ $SIZE_T=4;
+ $LRSAVE=$SIZE_T;
+ $STU="stwu";
+ $POP="lwz";
+ $PUSH="stw";
+} else { die "nonsense $flavour"; }
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
+die "can't locate ppc-xlate.pl";
+
+open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!";
+
+my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6)); # argument block
+
+my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3));
+my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12));
+my ($Xl1,$Xm1,$Xh1,$IN1,$H2,$H2h,$H2l)=map("v$_",(13..19));
+my $vrsave="r12";
+my ($t4,$t5,$t6) = ($Hl,$H,$Hh);
+
+$code=<<___;
+.machine "any"
+
+.text
+
+.globl .gcm_init_p10
+ lis r0,0xfff0
+ li r8,0x10
+ mfspr $vrsave,256
+ li r9,0x20
+ mtspr 256,r0
+ li r10,0x30
+ lvx_u $H,0,r4 # load H
+ le?xor r7,r7,r7
+ le?addi r7,r7,0x8 # need a vperm start with 08
+ le?lvsr 5,0,r7
+ le?vspltisb 6,0x0f
+ le?vxor 5,5,6 # set a b-endian mask
+ le?vperm $H,$H,$H,5
+
+ vspltisb $xC2,-16 # 0xf0
+ vspltisb $t0,1 # one
+ vaddubm $xC2,$xC2,$xC2 # 0xe0
+ vxor $zero,$zero,$zero
+ vor $xC2,$xC2,$t0 # 0xe1
+ vsldoi $xC2,$xC2,$zero,15 # 0xe1...
+ vsldoi $t1,$zero,$t0,1 # ...1
+ vaddubm $xC2,$xC2,$xC2 # 0xc2...
+ vspltisb $t2,7
+ vor $xC2,$xC2,$t1 # 0xc2....01
+ vspltb $t1,$H,0 # most significant byte
+ vsl $H,$H,$t0 # H<<=1
+ vsrab $t1,$t1,$t2 # broadcast carry bit
+ vand $t1,$t1,$xC2
+ vxor $H,$H,$t1 # twisted H
+
+ vsldoi $H,$H,$H,8 # twist even more ...
+ vsldoi $xC2,$zero,$xC2,8 # 0xc2.0
+ vsldoi $Hl,$zero,$H,8 # ... and split
+ vsldoi $Hh,$H,$zero,8
+
+ stvx_u $xC2,0,r3 # save pre-computed table
+ stvx_u $Hl,r8,r3
+ stvx_u $H, r9,r3
+ stvx_u $Hh,r10,r3
+
+ mtspr 256,$vrsave
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,2,0
+ .long 0
+.size .gcm_init_p10,.-.gcm_init_p10
+
+.globl .gcm_init_htable
+ lis r0,0xfff0
+ li r8,0x10
+ mfspr $vrsave,256
+ li r9,0x20
+ mtspr 256,r0
+ li r10,0x30
+ lvx_u $H,0,r4 # load H
+
+ vspltisb $xC2,-16 # 0xf0
+ vspltisb $t0,1 # one
+ vaddubm $xC2,$xC2,$xC2 # 0xe0
+ vxor $zero,$zero,$zero
+ vor $xC2,$xC2,$t0 # 0xe1
+ vsldoi $xC2,$xC2,$zero,15 # 0xe1...
+ vsldoi $t1,$zero,$t0,1 # ...1
+ vaddubm $xC2,$xC2,$xC2 # 0xc2...
+ vspltisb $t2,7
+ vor $xC2,$xC2,$t1 # 0xc2....01
+ vspltb $t1,$H,0 # most significant byte
+ vsl $H,$H,$t0 # H<<=1
+ vsrab $t1,$t1,$t2 # broadcast carry bit
+ vand $t1,$t1,$xC2
+ vxor $IN,$H,$t1 # twisted H
+
+ vsldoi $H,$IN,$IN,8 # twist even more ...
+ vsldoi $xC2,$zero,$xC2,8 # 0xc2.0
+ vsldoi $Hl,$zero,$H,8 # ... and split
+ vsldoi $Hh,$H,$zero,8
+
+ stvx_u $xC2,0,r3 # save pre-computed table
+ stvx_u $Hl,r8,r3
+ li r8,0x40
+ stvx_u $H, r9,r3
+ li r9,0x50
+ stvx_u $Hh,r10,r3
+ li r10,0x60
+
+ vpmsumd $Xl,$IN,$Hl # H.lo·H.lo
+ vpmsumd $Xm,$IN,$H # H.hi·H.lo+H.lo·H.hi
+ vpmsumd $Xh,$IN,$Hh # H.hi·H.hi
+
+ vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
+
+ vsldoi $t0,$Xm,$zero,8
+ vsldoi $t1,$zero,$Xm,8
+ vxor $Xl,$Xl,$t0
+ vxor $Xh,$Xh,$t1
+
+ vsldoi $Xl,$Xl,$Xl,8
+ vxor $Xl,$Xl,$t2
+
+ vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
+ vpmsumd $Xl,$Xl,$xC2
+ vxor $t1,$t1,$Xh
+ vxor $IN1,$Xl,$t1
+
+ vsldoi $H2,$IN1,$IN1,8
+ vsldoi $H2l,$zero,$H2,8
+ vsldoi $H2h,$H2,$zero,8
+
+ stvx_u $H2l,r8,r3 # save H^2
+ li r8,0x70
+ stvx_u $H2,r9,r3
+ li r9,0x80
+ stvx_u $H2h,r10,r3
+ li r10,0x90
+
+ vpmsumd $Xl,$IN,$H2l # H.lo·H^2.lo
+ vpmsumd $Xl1,$IN1,$H2l # H^2.lo·H^2.lo
+ vpmsumd $Xm,$IN,$H2 # H.hi·H^2.lo+H.lo·H^2.hi
+ vpmsumd $Xm1,$IN1,$H2 # H^2.hi·H^2.lo+H^2.lo·H^2.hi
+ vpmsumd $Xh,$IN,$H2h # H.hi·H^2.hi
+ vpmsumd $Xh1,$IN1,$H2h # H^2.hi·H^2.hi
+
+ vpmsumd $t2,$Xl,$xC2 # 1st reduction phase
+ vpmsumd $t6,$Xl1,$xC2 # 1st reduction phase
+
+ vsldoi $t0,$Xm,$zero,8
+ vsldoi $t1,$zero,$Xm,8
+ vsldoi $t4,$Xm1,$zero,8
+ vsldoi $t5,$zero,$Xm1,8
+ vxor $Xl,$Xl,$t0
+ vxor $Xh,$Xh,$t1
+ vxor $Xl1,$Xl1,$t4
+ vxor $Xh1,$Xh1,$t5
+
+ vsldoi $Xl,$Xl,$Xl,8
+ vsldoi $Xl1,$Xl1,$Xl1,8
+ vxor $Xl,$Xl,$t2
+ vxor $Xl1,$Xl1,$t6
+
+ vsldoi $t1,$Xl,$Xl,8 # 2nd reduction phase
+ vsldoi $t5,$Xl1,$Xl1,8 # 2nd reduction phase
+ vpmsumd $Xl,$Xl,$xC2
+ vpmsumd $Xl1,$Xl1,$xC2
+ vxor $t1,$t1,$Xh
+ vxor $t5,$t5,$Xh1
+ vxor $Xl,$Xl,$t1
+ vxor $Xl1,$Xl1,$t5
+
+ vsldoi $H,$Xl,$Xl,8
+ vsldoi $H2,$Xl1,$Xl1,8
+ vsldoi $Hl,$zero,$H,8
+ vsldoi $Hh,$H,$zero,8
+ vsldoi $H2l,$zero,$H2,8
+ vsldoi $H2h,$H2,$zero,8
+
+ stvx_u $Hl,r8,r3 # save H^3
+ li r8,0xa0
+ stvx_u $H,r9,r3
+ li r9,0xb0
+ stvx_u $Hh,r10,r3
+ li r10,0xc0
+ stvx_u $H2l,r8,r3 # save H^4
+ stvx_u $H2,r9,r3
+ stvx_u $H2h,r10,r3
+
+ mtspr 256,$vrsave
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,2,0
+ .long 0
+.size .gcm_init_htable,.-.gcm_init_htable
+
+.globl .gcm_gmult_p10
+ lis r0,0xfff8
+ li r8,0x10
+ mfspr $vrsave,256
+ li r9,0x20
+ mtspr 256,r0
+ li r10,0x30
+ lvx_u $IN,0,$Xip # load Xi
+
+ lvx_u $Hl,r8,$Htbl # load pre-computed table
+ le?lvsl $lemask,r0,r0
+ lvx_u $H, r9,$Htbl
+ le?vspltisb $t0,0x07
+ lvx_u $Hh,r10,$Htbl
+ le?vxor $lemask,$lemask,$t0
+ lvx_u $xC2,0,$Htbl
+ le?vperm $IN,$IN,$IN,$lemask
+ vxor $zero,$zero,$zero
+
+ vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
+ vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
+ vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
+
+ vpmsumd $t2,$Xl,$xC2 # 1st phase
+
+ vsldoi $t0,$Xm,$zero,8
+ vsldoi $t1,$zero,$Xm,8
+ vxor $Xl,$Xl,$t0
+ vxor $Xh,$Xh,$t1
+
+ vsldoi $Xl,$Xl,$Xl,8
+ vxor $Xl,$Xl,$t2
+
+ vsldoi $t1,$Xl,$Xl,8 # 2nd phase
+ vpmsumd $Xl,$Xl,$xC2
+ vxor $t1,$t1,$Xh
+ vxor $Xl,$Xl,$t1
+
+ le?vperm $Xl,$Xl,$Xl,$lemask
+ stvx_u $Xl,0,$Xip # write out Xi
+
+ mtspr 256,$vrsave
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,2,0
+ .long 0
+.size .gcm_gmult_p10,.-.gcm_gmult_p10
+
+.globl .gcm_ghash_p10
+ lis r0,0xfff8
+ li r8,0x10
+ mfspr $vrsave,256
+ li r9,0x20
+ mtspr 256,r0
+ li r10,0x30
+ lvx_u $Xl,0,$Xip # load Xi
+
+ lvx_u $Hl,r8,$Htbl # load pre-computed table
+ le?lvsl $lemask,r0,r0
+ lvx_u $H, r9,$Htbl
+ le?vspltisb $t0,0x07
+ lvx_u $Hh,r10,$Htbl
+ le?vxor $lemask,$lemask,$t0
+ lvx_u $xC2,0,$Htbl
+ le?vperm $Xl,$Xl,$Xl,$lemask
+ vxor $zero,$zero,$zero
+
+ lvx_u $IN,0,$inp
+ addi $inp,$inp,16
+ subi $len,$len,16
+ le?vperm $IN,$IN,$IN,$lemask
+ vxor $IN,$IN,$Xl
+ b Loop
+
+.align 5
+Loop:
+ subic $len,$len,16
+ vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
+ subfe. r0,r0,r0 # borrow?-1:0
+ vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
+ and r0,r0,$len
+ vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
+ add $inp,$inp,r0
+
+ vpmsumd $t2,$Xl,$xC2 # 1st phase
+
+ vsldoi $t0,$Xm,$zero,8
+ vsldoi $t1,$zero,$Xm,8
+ vxor $Xl,$Xl,$t0
+ vxor $Xh,$Xh,$t1
+
+ vsldoi $Xl,$Xl,$Xl,8
+ vxor $Xl,$Xl,$t2
+ lvx_u $IN,0,$inp
+ addi $inp,$inp,16
+
+ vsldoi $t1,$Xl,$Xl,8 # 2nd phase
+ vpmsumd $Xl,$Xl,$xC2
+ le?vperm $IN,$IN,$IN,$lemask
+ vxor $t1,$t1,$Xh
+ vxor $IN,$IN,$t1
+ vxor $IN,$IN,$Xl
+ beq Loop # did $len-=16 borrow?
+
+ vxor $Xl,$Xl,$t1
+ le?vperm $Xl,$Xl,$Xl,$lemask
+ stvx_u $Xl,0,$Xip # write out Xi
+
+ mtspr 256,$vrsave
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,4,0
+ .long 0
+.size .gcm_ghash_p10,.-.gcm_ghash_p10
+
+.asciz "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
+.align 2
+___
+
+foreach (split("\n",$code)) {
+ if ($flavour =~ /le$/o) { # little-endian
+ s/le\?//o or
+ s/be\?/#be#/o;
+ } else {
+ s/le\?/#le#/o or
+ s/be\?//o;
+ }
+ print $_,"\n";
+}
+
+close STDOUT; # enforce flush
diff --git a/arch/powerpc/crypto/ghashp8-ppc.pl b/arch/powerpc/crypto/ghashp8-ppc.pl
new file mode 100644
index 000000000000..041e633c214f
--- /dev/null
+++ b/arch/powerpc/crypto/ghashp8-ppc.pl
@@ -0,0 +1,243 @@
+#!/usr/bin/env perl
+# SPDX-License-Identifier: GPL-2.0
+
+# This code is taken from the OpenSSL project but the author (Andy Polyakov)
+# has relicensed it under the GPLv2. Therefore this program is free software;
+# you can redistribute it and/or modify it under the terms of the GNU General
+# Public License version 2 as published by the Free Software Foundation.
+#
+# The original headers, including the original license headers, are
+# included below for completeness.
+
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see https://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# GHASH for PowerISA v2.07.
+#
+# July 2014
+#
+# Accurate performance measurements are problematic, because it's
+# always virtualized setup with possibly throttled processor.
+# Relative comparison is therefore more informative. This initial
+# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x
+# faster than "4-bit" integer-only compiler-generated 64-bit code.
+# "Initial version" means that there is room for futher improvement.
+
+$flavour=shift;
+$output =shift;
+
+if ($flavour =~ /64/) {
+ $SIZE_T=8;
+ $LRSAVE=2*$SIZE_T;
+ $STU="stdu";
+ $POP="ld";
+ $PUSH="std";
+} elsif ($flavour =~ /32/) {
+ $SIZE_T=4;
+ $LRSAVE=$SIZE_T;
+ $STU="stwu";
+ $POP="lwz";
+ $PUSH="stw";
+} else { die "nonsense $flavour"; }
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
+die "can't locate ppc-xlate.pl";
+
+open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!";
+
+my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6)); # argument block
+
+my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3));
+my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12));
+my $vrsave="r12";
+
+$code=<<___;
+.machine "any"
+
+.text
+
+.globl .gcm_init_p8
+ lis r0,0xfff0
+ li r8,0x10
+ mfspr $vrsave,256
+ li r9,0x20
+ mtspr 256,r0
+ li r10,0x30
+ lvx_u $H,0,r4 # load H
+ le?xor r7,r7,r7
+ le?addi r7,r7,0x8 # need a vperm start with 08
+ le?lvsr 5,0,r7
+ le?vspltisb 6,0x0f
+ le?vxor 5,5,6 # set a b-endian mask
+ le?vperm $H,$H,$H,5
+
+ vspltisb $xC2,-16 # 0xf0
+ vspltisb $t0,1 # one
+ vaddubm $xC2,$xC2,$xC2 # 0xe0
+ vxor $zero,$zero,$zero
+ vor $xC2,$xC2,$t0 # 0xe1
+ vsldoi $xC2,$xC2,$zero,15 # 0xe1...
+ vsldoi $t1,$zero,$t0,1 # ...1
+ vaddubm $xC2,$xC2,$xC2 # 0xc2...
+ vspltisb $t2,7
+ vor $xC2,$xC2,$t1 # 0xc2....01
+ vspltb $t1,$H,0 # most significant byte
+ vsl $H,$H,$t0 # H<<=1
+ vsrab $t1,$t1,$t2 # broadcast carry bit
+ vand $t1,$t1,$xC2
+ vxor $H,$H,$t1 # twisted H
+
+ vsldoi $H,$H,$H,8 # twist even more ...
+ vsldoi $xC2,$zero,$xC2,8 # 0xc2.0
+ vsldoi $Hl,$zero,$H,8 # ... and split
+ vsldoi $Hh,$H,$zero,8
+
+ stvx_u $xC2,0,r3 # save pre-computed table
+ stvx_u $Hl,r8,r3
+ stvx_u $H, r9,r3
+ stvx_u $Hh,r10,r3
+
+ mtspr 256,$vrsave
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,2,0
+ .long 0
+.size .gcm_init_p8,.-.gcm_init_p8
+
+.globl .gcm_gmult_p8
+ lis r0,0xfff8
+ li r8,0x10
+ mfspr $vrsave,256
+ li r9,0x20
+ mtspr 256,r0
+ li r10,0x30
+ lvx_u $IN,0,$Xip # load Xi
+
+ lvx_u $Hl,r8,$Htbl # load pre-computed table
+ le?lvsl $lemask,r0,r0
+ lvx_u $H, r9,$Htbl
+ le?vspltisb $t0,0x07
+ lvx_u $Hh,r10,$Htbl
+ le?vxor $lemask,$lemask,$t0
+ lvx_u $xC2,0,$Htbl
+ le?vperm $IN,$IN,$IN,$lemask
+ vxor $zero,$zero,$zero
+
+ vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
+ vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
+ vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
+
+ vpmsumd $t2,$Xl,$xC2 # 1st phase
+
+ vsldoi $t0,$Xm,$zero,8
+ vsldoi $t1,$zero,$Xm,8
+ vxor $Xl,$Xl,$t0
+ vxor $Xh,$Xh,$t1
+
+ vsldoi $Xl,$Xl,$Xl,8
+ vxor $Xl,$Xl,$t2
+
+ vsldoi $t1,$Xl,$Xl,8 # 2nd phase
+ vpmsumd $Xl,$Xl,$xC2
+ vxor $t1,$t1,$Xh
+ vxor $Xl,$Xl,$t1
+
+ le?vperm $Xl,$Xl,$Xl,$lemask
+ stvx_u $Xl,0,$Xip # write out Xi
+
+ mtspr 256,$vrsave
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,2,0
+ .long 0
+.size .gcm_gmult_p8,.-.gcm_gmult_p8
+
+.globl .gcm_ghash_p8
+ lis r0,0xfff8
+ li r8,0x10
+ mfspr $vrsave,256
+ li r9,0x20
+ mtspr 256,r0
+ li r10,0x30
+ lvx_u $Xl,0,$Xip # load Xi
+
+ lvx_u $Hl,r8,$Htbl # load pre-computed table
+ le?lvsl $lemask,r0,r0
+ lvx_u $H, r9,$Htbl
+ le?vspltisb $t0,0x07
+ lvx_u $Hh,r10,$Htbl
+ le?vxor $lemask,$lemask,$t0
+ lvx_u $xC2,0,$Htbl
+ le?vperm $Xl,$Xl,$Xl,$lemask
+ vxor $zero,$zero,$zero
+
+ lvx_u $IN,0,$inp
+ addi $inp,$inp,16
+ subi $len,$len,16
+ le?vperm $IN,$IN,$IN,$lemask
+ vxor $IN,$IN,$Xl
+ b Loop
+
+.align 5
+Loop:
+ subic $len,$len,16
+ vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
+ subfe. r0,r0,r0 # borrow?-1:0
+ vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
+ and r0,r0,$len
+ vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
+ add $inp,$inp,r0
+
+ vpmsumd $t2,$Xl,$xC2 # 1st phase
+
+ vsldoi $t0,$Xm,$zero,8
+ vsldoi $t1,$zero,$Xm,8
+ vxor $Xl,$Xl,$t0
+ vxor $Xh,$Xh,$t1
+
+ vsldoi $Xl,$Xl,$Xl,8
+ vxor $Xl,$Xl,$t2
+ lvx_u $IN,0,$inp
+ addi $inp,$inp,16
+
+ vsldoi $t1,$Xl,$Xl,8 # 2nd phase
+ vpmsumd $Xl,$Xl,$xC2
+ le?vperm $IN,$IN,$IN,$lemask
+ vxor $t1,$t1,$Xh
+ vxor $IN,$IN,$t1
+ vxor $IN,$IN,$Xl
+ beq Loop # did $len-=16 borrow?
+
+ vxor $Xl,$Xl,$t1
+ le?vperm $Xl,$Xl,$Xl,$lemask
+ stvx_u $Xl,0,$Xip # write out Xi
+
+ mtspr 256,$vrsave
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,4,0
+ .long 0
+.size .gcm_ghash_p8,.-.gcm_ghash_p8
+
+.asciz "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
+.align 2
+___
+
+foreach (split("\n",$code)) {
+ if ($flavour =~ /le$/o) { # little-endian
+ s/le\?//o or
+ s/be\?/#be#/o;
+ } else {
+ s/le\?/#le#/o or
+ s/be\?//o;
+ }
+ print $_,"\n";
+}
+
+close STDOUT; # enforce flush
diff --git a/arch/powerpc/crypto/ppc-xlate.pl b/arch/powerpc/crypto/ppc-xlate.pl
new file mode 100644
index 000000000000..23cca703ce29
--- /dev/null
+++ b/arch/powerpc/crypto/ppc-xlate.pl
@@ -0,0 +1,229 @@
+#!/usr/bin/env perl
+# SPDX-License-Identifier: GPL-2.0
+
+# PowerPC assembler distiller by <appro>.
+
+my $flavour = shift;
+my $output = shift;
+open STDOUT,">$output" || die "can't open $output: $!";
+
+my %GLOBALS;
+my $dotinlocallabels=($flavour=~/linux/)?1:0;
+
+################################################################
+# directives which need special treatment on different platforms
+################################################################
+my $globl = sub {
+ my $junk = shift;
+ my $name = shift;
+ my $global = \$GLOBALS{$name};
+ my $ret;
+
+ $name =~ s|^[\.\_]||;
+
+ SWITCH: for ($flavour) {
+ /aix/ && do { $name = ".$name";
+ last;
+ };
+ /osx/ && do { $name = "_$name";
+ last;
+ };
+ /linux/
+ && do { $ret = "_GLOBAL($name)";
+ last;
+ };
+ }
+
+ $ret = ".globl $name\nalign 5\n$name:" if (!$ret);
+ $$global = $name;
+ $ret;
+};
+my $text = sub {
+ my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text";
+ $ret = ".abiversion 2\n".$ret if ($flavour =~ /linux.*64le/);
+ $ret;
+};
+my $machine = sub {
+ my $junk = shift;
+ my $arch = shift;
+ if ($flavour =~ /osx/)
+ { $arch =~ s/\"//g;
+ $arch = ($flavour=~/64/) ? "ppc970-64" : "ppc970" if ($arch eq "any");
+ }
+ ".machine $arch";
+};
+my $size = sub {
+ if ($flavour =~ /linux/)
+ { shift;
+ my $name = shift; $name =~ s|^[\.\_]||;
+ my $ret = ".size $name,.-".($flavour=~/64$/?".":"").$name;
+ $ret .= "\n.size .$name,.-.$name" if ($flavour=~/64$/);
+ $ret;
+ }
+ else
+ { ""; }
+};
+my $asciz = sub {
+ shift;
+ my $line = join(",",@_);
+ if ($line =~ /^"(.*)"$/)
+ { ".byte " . join(",",unpack("C*",$1),0) . "\n.align 2"; }
+ else
+ { ""; }
+};
+my $quad = sub {
+ shift;
+ my @ret;
+ my ($hi,$lo);
+ for (@_) {
+ if (/^0x([0-9a-f]*?)([0-9a-f]{1,8})$/io)
+ { $hi=$1?"0x$1":"0"; $lo="0x$2"; }
+ elsif (/^([0-9]+)$/o)
+ { $hi=$1>>32; $lo=$1&0xffffffff; } # error-prone with 32-bit perl
+ else
+ { $hi=undef; $lo=$_; }
+
+ if (defined($hi))
+ { push(@ret,$flavour=~/le$/o?".long\t$lo,$hi":".long\t$hi,$lo"); }
+ else
+ { push(@ret,".quad $lo"); }
+ }
+ join("\n",@ret);
+};
+
+################################################################
+# simplified mnemonics not handled by at least one assembler
+################################################################
+my $cmplw = sub {
+ my $f = shift;
+ my $cr = 0; $cr = shift if ($#_>1);
+ # Some out-of-date 32-bit GNU assembler just can't handle cmplw...
+ ($flavour =~ /linux.*32/) ?
+ " .long ".sprintf "0x%x",31<<26|$cr<<23|$_[0]<<16|$_[1]<<11|64 :
+ " cmplw ".join(',',$cr,@_);
+};
+my $bdnz = sub {
+ my $f = shift;
+ my $bo = $f=~/[\+\-]/ ? 16+9 : 16; # optional "to be taken" hint
+ " bc $bo,0,".shift;
+} if ($flavour!~/linux/);
+my $bltlr = sub {
+ my $f = shift;
+ my $bo = $f=~/\-/ ? 12+2 : 12; # optional "not to be taken" hint
+ ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints
+ " .long ".sprintf "0x%x",19<<26|$bo<<21|16<<1 :
+ " bclr $bo,0";
+};
+my $bnelr = sub {
+ my $f = shift;
+ my $bo = $f=~/\-/ ? 4+2 : 4; # optional "not to be taken" hint
+ ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints
+ " .long ".sprintf "0x%x",19<<26|$bo<<21|2<<16|16<<1 :
+ " bclr $bo,2";
+};
+my $beqlr = sub {
+ my $f = shift;
+ my $bo = $f=~/-/ ? 12+2 : 12; # optional "not to be taken" hint
+ ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints
+ " .long ".sprintf "0x%X",19<<26|$bo<<21|2<<16|16<<1 :
+ " bclr $bo,2";
+};
+# GNU assembler can't handle extrdi rA,rS,16,48, or when sum of last two
+# arguments is 64, with "operand out of range" error.
+my $extrdi = sub {
+ my ($f,$ra,$rs,$n,$b) = @_;
+ $b = ($b+$n)&63; $n = 64-$n;
+ " rldicl $ra,$rs,$b,$n";
+};
+my $vmr = sub {
+ my ($f,$vx,$vy) = @_;
+ " vor $vx,$vy,$vy";
+};
+
+# Some ABIs specify vrsave, special-purpose register #256, as reserved
+# for system use.
+my $no_vrsave = ($flavour =~ /linux-ppc64le/);
+my $mtspr = sub {
+ my ($f,$idx,$ra) = @_;
+ if ($idx == 256 && $no_vrsave) {
+ " or $ra,$ra,$ra";
+ } else {
+ " mtspr $idx,$ra";
+ }
+};
+my $mfspr = sub {
+ my ($f,$rd,$idx) = @_;
+ if ($idx == 256 && $no_vrsave) {
+ " li $rd,-1";
+ } else {
+ " mfspr $rd,$idx";
+ }
+};
+
+# PowerISA 2.06 stuff
+sub vsxmem_op {
+ my ($f, $vrt, $ra, $rb, $op) = @_;
+ " .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|($rb<<11)|($op*2+1);
+}
+# made-up unaligned memory reference AltiVec/VMX instructions
+my $lvx_u = sub { vsxmem_op(@_, 844); }; # lxvd2x
+my $stvx_u = sub { vsxmem_op(@_, 972); }; # stxvd2x
+my $lvdx_u = sub { vsxmem_op(@_, 588); }; # lxsdx
+my $stvdx_u = sub { vsxmem_op(@_, 716); }; # stxsdx
+my $lvx_4w = sub { vsxmem_op(@_, 780); }; # lxvw4x
+my $stvx_4w = sub { vsxmem_op(@_, 908); }; # stxvw4x
+
+# PowerISA 2.07 stuff
+sub vcrypto_op {
+ my ($f, $vrt, $vra, $vrb, $op) = @_;
+ " .long ".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|$op;
+}
+my $vcipher = sub { vcrypto_op(@_, 1288); };
+my $vcipherlast = sub { vcrypto_op(@_, 1289); };
+my $vncipher = sub { vcrypto_op(@_, 1352); };
+my $vncipherlast= sub { vcrypto_op(@_, 1353); };
+my $vsbox = sub { vcrypto_op(@_, 0, 1480); };
+my $vshasigmad = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1730); };
+my $vshasigmaw = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1666); };
+my $vpmsumb = sub { vcrypto_op(@_, 1032); };
+my $vpmsumd = sub { vcrypto_op(@_, 1224); };
+my $vpmsubh = sub { vcrypto_op(@_, 1096); };
+my $vpmsumw = sub { vcrypto_op(@_, 1160); };
+my $vaddudm = sub { vcrypto_op(@_, 192); };
+my $vadduqm = sub { vcrypto_op(@_, 256); };
+
+my $mtsle = sub {
+ my ($f, $arg) = @_;
+ " .long ".sprintf "0x%X",(31<<26)|($arg<<21)|(147*2);
+};
+
+print "#include <asm/ppc_asm.h>\n" if $flavour =~ /linux/;
+
+while($line=<>) {
+
+ $line =~ s|[#!;].*$||; # get rid of asm-style comments...
+ $line =~ s|/\*.*\*/||; # ... and C-style comments...
+ $line =~ s|^\s+||; # ... and skip white spaces in beginning...
+ $line =~ s|\s+$||; # ... and at the end
+
+ {
+ $line =~ s|\b\.L(\w+)|L$1|g; # common denominator for Locallabel
+ $line =~ s|\bL(\w+)|\.L$1|g if ($dotinlocallabels);
+ }
+
+ {
+ $line =~ s|^\s*(\.?)(\w+)([\.\+\-]?)\s*||;
+ my $c = $1; $c = "\t" if ($c eq "");
+ my $mnemonic = $2;
+ my $f = $3;
+ my $opcode = eval("\$$mnemonic");
+ $line =~ s/\b(c?[rf]|v|vs)([0-9]+)\b/$2/g if ($c ne "." and $flavour !~ /osx/);
+ if (ref($opcode) eq 'CODE') { $line = &$opcode($f,split(',',$line)); }
+ elsif ($mnemonic) { $line = $c.$mnemonic.$f."\t".$line; }
+ }
+
+ print $line if ($line);
+ print "\n";
+}
+
+close STDOUT;
diff --git a/arch/powerpc/crypto/sha1-powerpc-asm.S b/arch/powerpc/crypto/sha1-powerpc-asm.S
deleted file mode 100644
index 125e16520061..000000000000
--- a/arch/powerpc/crypto/sha1-powerpc-asm.S
+++ /dev/null
@@ -1,179 +0,0 @@
-/*
- * SHA-1 implementation for PowerPC.
- *
- * Copyright (C) 2005 Paul Mackerras <paulus@samba.org>
- */
-
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-
-/*
- * We roll the registers for T, A, B, C, D, E around on each
- * iteration; T on iteration t is A on iteration t+1, and so on.
- * We use registers 7 - 12 for this.
- */
-#define RT(t) ((((t)+5)%6)+7)
-#define RA(t) ((((t)+4)%6)+7)
-#define RB(t) ((((t)+3)%6)+7)
-#define RC(t) ((((t)+2)%6)+7)
-#define RD(t) ((((t)+1)%6)+7)
-#define RE(t) ((((t)+0)%6)+7)
-
-/* We use registers 16 - 31 for the W values */
-#define W(t) (((t)%16)+16)
-
-#define LOADW(t) \
- lwz W(t),(t)*4(r4)
-
-#define STEPD0_LOAD(t) \
- andc r0,RD(t),RB(t); \
- and r6,RB(t),RC(t); \
- rotlwi RT(t),RA(t),5; \
- or r6,r6,r0; \
- add r0,RE(t),r15; \
- add RT(t),RT(t),r6; \
- add r14,r0,W(t); \
- lwz W((t)+4),((t)+4)*4(r4); \
- rotlwi RB(t),RB(t),30; \
- add RT(t),RT(t),r14
-
-#define STEPD0_UPDATE(t) \
- and r6,RB(t),RC(t); \
- andc r0,RD(t),RB(t); \
- rotlwi RT(t),RA(t),5; \
- rotlwi RB(t),RB(t),30; \
- or r6,r6,r0; \
- add r0,RE(t),r15; \
- xor r5,W((t)+4-3),W((t)+4-8); \
- add RT(t),RT(t),r6; \
- xor W((t)+4),W((t)+4-16),W((t)+4-14); \
- add r0,r0,W(t); \
- xor W((t)+4),W((t)+4),r5; \
- add RT(t),RT(t),r0; \
- rotlwi W((t)+4),W((t)+4),1
-
-#define STEPD1(t) \
- xor r6,RB(t),RC(t); \
- rotlwi RT(t),RA(t),5; \
- rotlwi RB(t),RB(t),30; \
- xor r6,r6,RD(t); \
- add r0,RE(t),r15; \
- add RT(t),RT(t),r6; \
- add r0,r0,W(t); \
- add RT(t),RT(t),r0
-
-#define STEPD1_UPDATE(t) \
- xor r6,RB(t),RC(t); \
- rotlwi RT(t),RA(t),5; \
- rotlwi RB(t),RB(t),30; \
- xor r6,r6,RD(t); \
- add r0,RE(t),r15; \
- xor r5,W((t)+4-3),W((t)+4-8); \
- add RT(t),RT(t),r6; \
- xor W((t)+4),W((t)+4-16),W((t)+4-14); \
- add r0,r0,W(t); \
- xor W((t)+4),W((t)+4),r5; \
- add RT(t),RT(t),r0; \
- rotlwi W((t)+4),W((t)+4),1
-
-#define STEPD2_UPDATE(t) \
- and r6,RB(t),RC(t); \
- and r0,RB(t),RD(t); \
- rotlwi RT(t),RA(t),5; \
- or r6,r6,r0; \
- rotlwi RB(t),RB(t),30; \
- and r0,RC(t),RD(t); \
- xor r5,W((t)+4-3),W((t)+4-8); \
- or r6,r6,r0; \
- xor W((t)+4),W((t)+4-16),W((t)+4-14); \
- add r0,RE(t),r15; \
- add RT(t),RT(t),r6; \
- add r0,r0,W(t); \
- xor W((t)+4),W((t)+4),r5; \
- add RT(t),RT(t),r0; \
- rotlwi W((t)+4),W((t)+4),1
-
-#define STEP0LD4(t) \
- STEPD0_LOAD(t); \
- STEPD0_LOAD((t)+1); \
- STEPD0_LOAD((t)+2); \
- STEPD0_LOAD((t)+3)
-
-#define STEPUP4(t, fn) \
- STEP##fn##_UPDATE(t); \
- STEP##fn##_UPDATE((t)+1); \
- STEP##fn##_UPDATE((t)+2); \
- STEP##fn##_UPDATE((t)+3)
-
-#define STEPUP20(t, fn) \
- STEPUP4(t, fn); \
- STEPUP4((t)+4, fn); \
- STEPUP4((t)+8, fn); \
- STEPUP4((t)+12, fn); \
- STEPUP4((t)+16, fn)
-
-_GLOBAL(powerpc_sha_transform)
- PPC_STLU r1,-INT_FRAME_SIZE(r1)
- SAVE_8GPRS(14, r1)
- SAVE_10GPRS(22, r1)
-
- /* Load up A - E */
- lwz RA(0),0(r3) /* A */
- lwz RB(0),4(r3) /* B */
- lwz RC(0),8(r3) /* C */
- lwz RD(0),12(r3) /* D */
- lwz RE(0),16(r3) /* E */
-
- LOADW(0)
- LOADW(1)
- LOADW(2)
- LOADW(3)
-
- lis r15,0x5a82 /* K0-19 */
- ori r15,r15,0x7999
- STEP0LD4(0)
- STEP0LD4(4)
- STEP0LD4(8)
- STEPUP4(12, D0)
- STEPUP4(16, D0)
-
- lis r15,0x6ed9 /* K20-39 */
- ori r15,r15,0xeba1
- STEPUP20(20, D1)
-
- lis r15,0x8f1b /* K40-59 */
- ori r15,r15,0xbcdc
- STEPUP20(40, D2)
-
- lis r15,0xca62 /* K60-79 */
- ori r15,r15,0xc1d6
- STEPUP4(60, D1)
- STEPUP4(64, D1)
- STEPUP4(68, D1)
- STEPUP4(72, D1)
- lwz r20,16(r3)
- STEPD1(76)
- lwz r19,12(r3)
- STEPD1(77)
- lwz r18,8(r3)
- STEPD1(78)
- lwz r17,4(r3)
- STEPD1(79)
-
- lwz r16,0(r3)
- add r20,RE(80),r20
- add RD(0),RD(80),r19
- add RC(0),RC(80),r18
- add RB(0),RB(80),r17
- add RA(0),RA(80),r16
- mr RE(0),r20
- stw RA(0),0(r3)
- stw RB(0),4(r3)
- stw RC(0),8(r3)
- stw RD(0),12(r3)
- stw RE(0),16(r3)
-
- REST_8GPRS(14, r1)
- REST_10GPRS(22, r1)
- addi r1,r1,INT_FRAME_SIZE
- blr
diff --git a/arch/powerpc/crypto/sha1.c b/arch/powerpc/crypto/sha1.c
deleted file mode 100644
index f9e8b9491efc..000000000000
--- a/arch/powerpc/crypto/sha1.c
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
- * Cryptographic API.
- *
- * powerpc implementation of the SHA1 Secure Hash Algorithm.
- *
- * Derived from cryptoapi implementation, adapted for in-place
- * scatterlist interface.
- *
- * Derived from "crypto/sha1.c"
- * Copyright (c) Alan Smithee.
- * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
- * Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- */
-#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/mm.h>
-#include <linux/cryptohash.h>
-#include <linux/types.h>
-#include <crypto/sha.h>
-#include <asm/byteorder.h>
-
-extern void powerpc_sha_transform(u32 *state, const u8 *src, u32 *temp);
-
-static int sha1_init(struct shash_desc *desc)
-{
- struct sha1_state *sctx = shash_desc_ctx(desc);
-
- *sctx = (struct sha1_state){
- .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
- };
-
- return 0;
-}
-
-static int sha1_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
-{
- struct sha1_state *sctx = shash_desc_ctx(desc);
- unsigned int partial, done;
- const u8 *src;
-
- partial = sctx->count & 0x3f;
- sctx->count += len;
- done = 0;
- src = data;
-
- if ((partial + len) > 63) {
- u32 temp[SHA_WORKSPACE_WORDS];
-
- if (partial) {
- done = -partial;
- memcpy(sctx->buffer + partial, data, done + 64);
- src = sctx->buffer;
- }
-
- do {
- powerpc_sha_transform(sctx->state, src, temp);
- done += 64;
- src = data + done;
- } while (done + 63 < len);
-
- memset(temp, 0, sizeof(temp));
- partial = 0;
- }
- memcpy(sctx->buffer + partial, src, len - done);
-
- return 0;
-}
-
-
-/* Add padding and return the message digest. */
-static int sha1_final(struct shash_desc *desc, u8 *out)
-{
- struct sha1_state *sctx = shash_desc_ctx(desc);
- __be32 *dst = (__be32 *)out;
- u32 i, index, padlen;
- __be64 bits;
- static const u8 padding[64] = { 0x80, };
-
- bits = cpu_to_be64(sctx->count << 3);
-
- /* Pad out to 56 mod 64 */
- index = sctx->count & 0x3f;
- padlen = (index < 56) ? (56 - index) : ((64+56) - index);
- sha1_update(desc, padding, padlen);
-
- /* Append length */
- sha1_update(desc, (const u8 *)&bits, sizeof(bits));
-
- /* Store state in digest */
- for (i = 0; i < 5; i++)
- dst[i] = cpu_to_be32(sctx->state[i]);
-
- /* Wipe context */
- memset(sctx, 0, sizeof *sctx);
-
- return 0;
-}
-
-static int sha1_export(struct shash_desc *desc, void *out)
-{
- struct sha1_state *sctx = shash_desc_ctx(desc);
-
- memcpy(out, sctx, sizeof(*sctx));
- return 0;
-}
-
-static int sha1_import(struct shash_desc *desc, const void *in)
-{
- struct sha1_state *sctx = shash_desc_ctx(desc);
-
- memcpy(sctx, in, sizeof(*sctx));
- return 0;
-}
-
-static struct shash_alg alg = {
- .digestsize = SHA1_DIGEST_SIZE,
- .init = sha1_init,
- .update = sha1_update,
- .final = sha1_final,
- .export = sha1_export,
- .import = sha1_import,
- .descsize = sizeof(struct sha1_state),
- .statesize = sizeof(struct sha1_state),
- .base = {
- .cra_name = "sha1",
- .cra_driver_name= "sha1-powerpc",
- .cra_flags = CRYPTO_ALG_TYPE_SHASH,
- .cra_blocksize = SHA1_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-};
-
-static int __init sha1_powerpc_mod_init(void)
-{
- return crypto_register_shash(&alg);
-}
-
-static void __exit sha1_powerpc_mod_fini(void)
-{
- crypto_unregister_shash(&alg);
-}
-
-module_init(sha1_powerpc_mod_init);
-module_exit(sha1_powerpc_mod_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm");
-
-MODULE_ALIAS("sha1-powerpc");
diff --git a/arch/powerpc/crypto/vmx.c b/arch/powerpc/crypto/vmx.c
new file mode 100644
index 000000000000..0b725e826388
--- /dev/null
+++ b/arch/powerpc/crypto/vmx.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Routines supporting VMX instructions on the Power 8
+ *
+ * Copyright (C) 2015 International Business Machines Inc.
+ *
+ * Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com>
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/types.h>
+#include <linux/err.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <asm/cputable.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/skcipher.h>
+
+#include "aesp8-ppc.h"
+
+static int __init p8_init(void)
+{
+ int ret;
+
+ ret = crypto_register_shash(&p8_ghash_alg);
+ if (ret)
+ goto err;
+
+ ret = crypto_register_alg(&p8_aes_alg);
+ if (ret)
+ goto err_unregister_ghash;
+
+ ret = crypto_register_skcipher(&p8_aes_cbc_alg);
+ if (ret)
+ goto err_unregister_aes;
+
+ ret = crypto_register_skcipher(&p8_aes_ctr_alg);
+ if (ret)
+ goto err_unregister_aes_cbc;
+
+ ret = crypto_register_skcipher(&p8_aes_xts_alg);
+ if (ret)
+ goto err_unregister_aes_ctr;
+
+ return 0;
+
+err_unregister_aes_ctr:
+ crypto_unregister_skcipher(&p8_aes_ctr_alg);
+err_unregister_aes_cbc:
+ crypto_unregister_skcipher(&p8_aes_cbc_alg);
+err_unregister_aes:
+ crypto_unregister_alg(&p8_aes_alg);
+err_unregister_ghash:
+ crypto_unregister_shash(&p8_ghash_alg);
+err:
+ return ret;
+}
+
+static void __exit p8_exit(void)
+{
+ crypto_unregister_skcipher(&p8_aes_xts_alg);
+ crypto_unregister_skcipher(&p8_aes_ctr_alg);
+ crypto_unregister_skcipher(&p8_aes_cbc_alg);
+ crypto_unregister_alg(&p8_aes_alg);
+ crypto_unregister_shash(&p8_ghash_alg);
+}
+
+module_cpu_feature_match(PPC_MODULE_FEATURE_VEC_CRYPTO, p8_init);
+module_exit(p8_exit);
+
+MODULE_AUTHOR("Marcelo Cerri<mhcerri@br.ibm.com>");
+MODULE_DESCRIPTION("IBM VMX cryptographic acceleration instructions "
+ "support on Power 8");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("1.0.0");
+MODULE_IMPORT_NS("CRYPTO_INTERNAL");
diff --git a/arch/powerpc/include/asm/8xx_immap.h b/arch/powerpc/include/asm/8xx_immap.h
index bdf0563ba423..f9cac46a95cb 100644
--- a/arch/powerpc/include/asm/8xx_immap.h
+++ b/arch/powerpc/include/asm/8xx_immap.h
@@ -560,5 +560,7 @@ typedef struct immap {
cpm8xx_t im_cpm; /* Communication processor */
} immap_t;
+extern immap_t __iomem *mpc8xx_immr;
+
#endif /* __IMMAP_8XX__ */
#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index 7f23f162ce9c..2e23533b67e3 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -1,9 +1,8 @@
-
-generic-y += clkdev.h
-generic-y += hash.h
+# SPDX-License-Identifier: GPL-2.0
+generated-y += syscall_table_32.h
+generated-y += syscall_table_64.h
+generated-y += syscall_table_spu.h
+generic-y += agp.h
generic-y += mcs_spinlock.h
-generic-y += preempt.h
-generic-y += rwsem.h
-generic-y += scatterlist.h
-generic-y += trace_clock.h
-generic-y += vtime.h
+generic-y += qrwlock.h
+generic-y += early_ioremap.h
diff --git a/arch/powerpc/include/asm/accounting.h b/arch/powerpc/include/asm/accounting.h
new file mode 100644
index 000000000000..6d79c31700e2
--- /dev/null
+++ b/arch/powerpc/include/asm/accounting.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Common time accounting prototypes and such for all ppc machines.
+ */
+
+#ifndef __POWERPC_ACCOUNTING_H
+#define __POWERPC_ACCOUNTING_H
+
+/* Stuff for accurate time accounting */
+struct cpu_accounting_data {
+ /* Accumulated cputime values to flush on ticks*/
+ unsigned long utime;
+ unsigned long stime;
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
+ unsigned long utime_scaled;
+ unsigned long stime_scaled;
+#endif
+ unsigned long gtime;
+ unsigned long hardirq_time;
+ unsigned long softirq_time;
+ unsigned long steal_time;
+ unsigned long idle_time;
+ /* Internal counters */
+ unsigned long starttime; /* TB value snapshot */
+ unsigned long starttime_user; /* TB value on exit to usermode */
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
+ unsigned long startspurr; /* SPURR value snapshot */
+ unsigned long utime_sspurr; /* ->user_time when ->startspurr set */
+#endif
+};
+
+#endif
diff --git a/arch/powerpc/include/asm/agp.h b/arch/powerpc/include/asm/agp.h
deleted file mode 100644
index 416e12c2d505..000000000000
--- a/arch/powerpc/include/asm/agp.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef _ASM_POWERPC_AGP_H
-#define _ASM_POWERPC_AGP_H
-#ifdef __KERNEL__
-
-#include <asm/io.h>
-
-#define map_page_into_agp(page)
-#define unmap_page_from_agp(page)
-#define flush_agp_cache() mb()
-
-/* GATT allocation. Returns/accepts GATT kernel virtual address. */
-#define alloc_gatt_pages(order) \
- ((char *)__get_free_pages(GFP_KERNEL, (order)))
-#define free_gatt_pages(table, order) \
- free_pages((unsigned long)(table), (order))
-
-#endif /* __KERNEL__ */
-#endif /* _ASM_POWERPC_AGP_H */
diff --git a/arch/powerpc/include/asm/archrandom.h b/arch/powerpc/include/asm/archrandom.h
index bde531103638..51b093f67528 100644
--- a/arch/powerpc/include/asm/archrandom.h
+++ b/arch/powerpc/include/asm/archrandom.h
@@ -1,50 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_ARCHRANDOM_H
#define _ASM_POWERPC_ARCHRANDOM_H
-#ifdef CONFIG_ARCH_RANDOM
-
-#include <asm/machdep.h>
-
-static inline int arch_get_random_long(unsigned long *v)
+static inline size_t __must_check arch_get_random_longs(unsigned long *v, size_t max_longs)
{
- if (ppc_md.get_random_long)
- return ppc_md.get_random_long(v);
-
return 0;
}
-static inline int arch_get_random_int(unsigned int *v)
-{
- unsigned long val;
- int rc;
-
- rc = arch_get_random_long(&val);
- if (rc)
- *v = val;
-
- return rc;
-}
-
-static inline int arch_has_random(void)
-{
- return !!ppc_md.get_random_long;
-}
-
-int powernv_get_random_long(unsigned long *v);
-
-static inline int arch_get_random_seed_long(unsigned long *v)
-{
- return 0;
-}
-static inline int arch_get_random_seed_int(unsigned int *v)
-{
- return 0;
-}
-static inline int arch_has_random_seed(void)
-{
- return 0;
-}
+size_t __must_check arch_get_random_seed_longs(unsigned long *v, size_t max_longs);
-#endif /* CONFIG_ARCH_RANDOM */
+#ifdef CONFIG_PPC_POWERNV
+int pnv_get_random_long(unsigned long *v);
+#endif
#endif /* _ASM_POWERPC_ARCHRANDOM_H */
diff --git a/arch/powerpc/include/asm/asm-compat.h b/arch/powerpc/include/asm/asm-compat.h
index 21be8ae8f809..f48e644900a2 100644
--- a/arch/powerpc/include/asm/asm-compat.h
+++ b/arch/powerpc/include/asm/asm-compat.h
@@ -1,21 +1,10 @@
#ifndef _ASM_POWERPC_ASM_COMPAT_H
#define _ASM_POWERPC_ASM_COMPAT_H
+#include <asm/asm-const.h>
#include <asm/types.h>
#include <asm/ppc-opcode.h>
-#ifdef __ASSEMBLY__
-# define stringify_in_c(...) __VA_ARGS__
-# define ASM_CONST(x) x
-#else
-/* This version of stringify will deal with commas... */
-# define __stringify_in_c(...) #__VA_ARGS__
-# define stringify_in_c(...) __stringify_in_c(__VA_ARGS__) " "
-# define __ASM_CONST(x) x##UL
-# define ASM_CONST(x) __ASM_CONST(x)
-#endif
-
-
#ifdef __powerpc64__
/* operations for longs and pointers */
@@ -23,13 +12,16 @@
#define PPC_STL stringify_in_c(std)
#define PPC_STLU stringify_in_c(stdu)
#define PPC_LCMPI stringify_in_c(cmpdi)
-#define PPC_LONG stringify_in_c(.llong)
+#define PPC_LCMPLI stringify_in_c(cmpldi)
+#define PPC_LCMP stringify_in_c(cmpd)
+#define PPC_LONG stringify_in_c(.8byte)
#define PPC_LONG_ALIGN stringify_in_c(.balign 8)
#define PPC_TLNEI stringify_in_c(tdnei)
-#define PPC_LLARX(t, a, b, eh) PPC_LDARX(t, a, b, eh)
+#define PPC_LLARX stringify_in_c(ldarx)
#define PPC_STLCX stringify_in_c(stdcx.)
#define PPC_CNTLZL stringify_in_c(cntlzd)
#define PPC_MTOCRF(FXM, RS) MTOCRF((FXM), RS)
+#define PPC_SRL stringify_in_c(srd)
#define PPC_LR_STKOFF 16
#define PPC_MIN_STKFRM 112
@@ -45,6 +37,12 @@
#define STDX_BE stringify_in_c(stdbrx)
#endif
+#ifdef CONFIG_CC_IS_CLANG
+#define DS_FORM_CONSTRAINT "Z<>"
+#else
+#define DS_FORM_CONSTRAINT "YZ<>"
+#endif
+
#else /* 32-bit */
/* operations for longs and pointers */
@@ -52,29 +50,19 @@
#define PPC_STL stringify_in_c(stw)
#define PPC_STLU stringify_in_c(stwu)
#define PPC_LCMPI stringify_in_c(cmpwi)
+#define PPC_LCMPLI stringify_in_c(cmplwi)
+#define PPC_LCMP stringify_in_c(cmpw)
#define PPC_LONG stringify_in_c(.long)
#define PPC_LONG_ALIGN stringify_in_c(.balign 4)
#define PPC_TLNEI stringify_in_c(twnei)
-#define PPC_LLARX(t, a, b, eh) PPC_LWARX(t, a, b, eh)
+#define PPC_LLARX stringify_in_c(lwarx)
#define PPC_STLCX stringify_in_c(stwcx.)
#define PPC_CNTLZL stringify_in_c(cntlzw)
#define PPC_MTOCRF stringify_in_c(mtcrf)
+#define PPC_SRL stringify_in_c(srw)
#define PPC_LR_STKOFF 4
#define PPC_MIN_STKFRM 16
#endif
-#ifdef __KERNEL__
-#ifdef CONFIG_IBM405_ERR77
-/* Erratum #77 on the 405 means we need a sync or dcbt before every
- * stwcx. The old ATOMIC_SYNC_FIX covered some but not all of this.
- */
-#define PPC405_ERR77(ra,rb) stringify_in_c(dcbt ra, rb;)
-#define PPC405_ERR77_SYNC stringify_in_c(sync;)
-#else
-#define PPC405_ERR77(ra,rb)
-#define PPC405_ERR77_SYNC
-#endif
-#endif
-
#endif /* _ASM_POWERPC_ASM_COMPAT_H */
diff --git a/arch/powerpc/include/asm/asm-const.h b/arch/powerpc/include/asm/asm-const.h
new file mode 100644
index 000000000000..392bdb1f104f
--- /dev/null
+++ b/arch/powerpc/include/asm/asm-const.h
@@ -0,0 +1,15 @@
+#ifndef _ASM_POWERPC_ASM_CONST_H
+#define _ASM_POWERPC_ASM_CONST_H
+
+#ifdef __ASSEMBLER__
+# define stringify_in_c(...) __VA_ARGS__
+# define ASM_CONST(x) x
+#else
+/* This version of stringify will deal with commas... */
+# define __stringify_in_c(...) #__VA_ARGS__
+# define stringify_in_c(...) __stringify_in_c(__VA_ARGS__) " "
+# define __ASM_CONST(x) x##UL
+# define ASM_CONST(x) __ASM_CONST(x)
+#endif
+
+#endif /* _ASM_POWERPC_ASM_CONST_H */
diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
new file mode 100644
index 000000000000..274bce76f5da
--- /dev/null
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_ASM_PROTOTYPES_H
+#define _ASM_POWERPC_ASM_PROTOTYPES_H
+/*
+ * This file is for C prototypes of asm symbols that are EXPORTed.
+ * It allows the modversions logic to see their prototype and
+ * generate proper CRCs for them.
+ *
+ * Copyright 2016, Daniel Axtens, IBM Corporation.
+ */
+
+#include <linux/threads.h>
+#include <asm/cacheflush.h>
+#include <asm/checksum.h>
+#include <linux/uaccess.h>
+#include <asm/epapr_hcalls.h>
+#include <asm/dcr.h>
+#include <asm/mmu_context.h>
+#include <asm/ultravisor-api.h>
+
+#include <uapi/asm/ucontext.h>
+
+/* Ultravisor */
+#if defined(CONFIG_PPC_POWERNV) || defined(CONFIG_PPC_SVM)
+long ucall_norets(unsigned long opcode, ...);
+#else
+static inline long ucall_norets(unsigned long opcode, ...)
+{
+ return U_NOT_AVAILABLE;
+}
+#endif
+
+/* OPAL */
+int64_t __opal_call(int64_t a0, int64_t a1, int64_t a2, int64_t a3,
+ int64_t a4, int64_t a5, int64_t a6, int64_t a7,
+ int64_t opcode, uint64_t msr);
+
+/* misc runtime */
+void enable_machine_check(void);
+extern u64 __bswapdi2(u64);
+extern s64 __lshrdi3(s64, int);
+extern s64 __ashldi3(s64, int);
+extern s64 __ashrdi3(s64, int);
+extern int __cmpdi2(s64, s64);
+extern int __ucmpdi2(u64, u64);
+
+/* tracing */
+void _mcount(void);
+
+/* Transaction memory related */
+void tm_enable(void);
+void tm_disable(void);
+void tm_abort(uint8_t cause);
+
+struct kvm_vcpu;
+void _kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu, u64 guest_msr);
+void _kvmppc_save_tm_pr(struct kvm_vcpu *vcpu, u64 guest_msr);
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+void kvmppc_save_tm_hv(struct kvm_vcpu *vcpu, u64 msr, bool preserve_nv);
+void kvmppc_restore_tm_hv(struct kvm_vcpu *vcpu, u64 msr, bool preserve_nv);
+#else
+static inline void kvmppc_save_tm_hv(struct kvm_vcpu *vcpu, u64 msr,
+ bool preserve_nv) { }
+static inline void kvmppc_restore_tm_hv(struct kvm_vcpu *vcpu, u64 msr,
+ bool preserve_nv) { }
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+
+void kvmppc_p9_enter_guest(struct kvm_vcpu *vcpu);
+
+long kvmppc_h_set_dabr(struct kvm_vcpu *vcpu, unsigned long dabr);
+long kvmppc_h_set_xdabr(struct kvm_vcpu *vcpu, unsigned long dabr,
+ unsigned long dabrx);
+
+#endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */
diff --git a/arch/powerpc/include/asm/asm.h b/arch/powerpc/include/asm/asm.h
new file mode 100644
index 000000000000..86f46b604e9a
--- /dev/null
+++ b/arch/powerpc/include/asm/asm.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_ASM_H
+#define _ASM_POWERPC_ASM_H
+
+#define _ASM_PTR " .long "
+
+#endif /* _ASM_POWERPC_ASM_H */
diff --git a/arch/powerpc/include/asm/async_tx.h b/arch/powerpc/include/asm/async_tx.h
index 8b2dc55d01ab..a14758426dd5 100644
--- a/arch/powerpc/include/asm/async_tx.h
+++ b/arch/powerpc/include/asm/async_tx.h
@@ -1,24 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (C) 2008-2009 DENX Software Engineering.
*
* Author: Yuri Tikhonov <yur@emcraft.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * The full GNU General Public License is included in this distribution in the
- * file called COPYING.
*/
#ifndef _ASM_POWERPC_ASYNC_TX_H_
#define _ASM_POWERPC_ASYNC_TX_H_
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index 28992d012926..d1ea554c33ed 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_ATOMIC_H_
#define _ASM_POWERPC_ATOMIC_H_
@@ -9,176 +10,125 @@
#include <linux/types.h>
#include <asm/cmpxchg.h>
#include <asm/barrier.h>
+#include <asm/asm-const.h>
+#include <asm/asm-compat.h>
-#define ATOMIC_INIT(i) { (i) }
-
-static __inline__ int atomic_read(const atomic_t *v)
-{
- int t;
-
- __asm__ __volatile__("lwz%U1%X1 %0,%1" : "=r"(t) : "m"(v->counter));
-
- return t;
-}
+/*
+ * Since *_return_relaxed and {cmp}xchg_relaxed are implemented with
+ * a "bne-" instruction at the end, so an isync is enough as a acquire barrier
+ * on the platform without lwsync.
+ */
+#define __atomic_acquire_fence() \
+ __asm__ __volatile__(PPC_ACQUIRE_BARRIER "" : : : "memory")
-static __inline__ void atomic_set(atomic_t *v, int i)
-{
- __asm__ __volatile__("stw%U0%X0 %1,%0" : "=m"(v->counter) : "r"(i));
-}
+#define __atomic_release_fence() \
+ __asm__ __volatile__(PPC_RELEASE_BARRIER "" : : : "memory")
-static __inline__ void atomic_add(int a, atomic_t *v)
+static __inline__ int arch_atomic_read(const atomic_t *v)
{
int t;
- __asm__ __volatile__(
-"1: lwarx %0,0,%3 # atomic_add\n\
- add %0,%2,%0\n"
- PPC405_ERR77(0,%3)
-" stwcx. %0,0,%3 \n\
- bne- 1b"
- : "=&r" (t), "+m" (v->counter)
- : "r" (a), "r" (&v->counter)
- : "cc");
-}
-
-static __inline__ int atomic_add_return(int a, atomic_t *v)
-{
- int t;
-
- __asm__ __volatile__(
- PPC_ATOMIC_ENTRY_BARRIER
-"1: lwarx %0,0,%2 # atomic_add_return\n\
- add %0,%1,%0\n"
- PPC405_ERR77(0,%2)
-" stwcx. %0,0,%2 \n\
- bne- 1b"
- PPC_ATOMIC_EXIT_BARRIER
- : "=&r" (t)
- : "r" (a), "r" (&v->counter)
- : "cc", "memory");
+ /* -mprefixed can generate offsets beyond range, fall back hack */
+ if (IS_ENABLED(CONFIG_PPC_KERNEL_PREFIXED))
+ __asm__ __volatile__("lwz %0,0(%1)" : "=r"(t) : "b"(&v->counter));
+ else
+ __asm__ __volatile__("lwz%U1%X1 %0,%1" : "=r"(t) : "m<>"(v->counter));
return t;
}
-#define atomic_add_negative(a, v) (atomic_add_return((a), (v)) < 0)
-
-static __inline__ void atomic_sub(int a, atomic_t *v)
+static __inline__ void arch_atomic_set(atomic_t *v, int i)
{
- int t;
-
- __asm__ __volatile__(
-"1: lwarx %0,0,%3 # atomic_sub\n\
- subf %0,%2,%0\n"
- PPC405_ERR77(0,%3)
-" stwcx. %0,0,%3 \n\
- bne- 1b"
- : "=&r" (t), "+m" (v->counter)
- : "r" (a), "r" (&v->counter)
- : "cc");
+ /* -mprefixed can generate offsets beyond range, fall back hack */
+ if (IS_ENABLED(CONFIG_PPC_KERNEL_PREFIXED))
+ __asm__ __volatile__("stw %1,0(%2)" : "=m"(v->counter) : "r"(i), "b"(&v->counter));
+ else
+ __asm__ __volatile__("stw%U0%X0 %1,%0" : "=m<>"(v->counter) : "r"(i));
}
-static __inline__ int atomic_sub_return(int a, atomic_t *v)
-{
- int t;
-
- __asm__ __volatile__(
- PPC_ATOMIC_ENTRY_BARRIER
-"1: lwarx %0,0,%2 # atomic_sub_return\n\
- subf %0,%1,%0\n"
- PPC405_ERR77(0,%2)
-" stwcx. %0,0,%2 \n\
- bne- 1b"
- PPC_ATOMIC_EXIT_BARRIER
- : "=&r" (t)
- : "r" (a), "r" (&v->counter)
- : "cc", "memory");
-
- return t;
+#define ATOMIC_OP(op, asm_op, suffix, sign, ...) \
+static __inline__ void arch_atomic_##op(int a, atomic_t *v) \
+{ \
+ int t; \
+ \
+ __asm__ __volatile__( \
+"1: lwarx %0,0,%3 # atomic_" #op "\n" \
+ #asm_op "%I2" suffix " %0,%0,%2\n" \
+" stwcx. %0,0,%3 \n" \
+" bne- 1b\n" \
+ : "=&r" (t), "+m" (v->counter) \
+ : "r"#sign (a), "r" (&v->counter) \
+ : "cc", ##__VA_ARGS__); \
+} \
+
+#define ATOMIC_OP_RETURN_RELAXED(op, asm_op, suffix, sign, ...) \
+static inline int arch_atomic_##op##_return_relaxed(int a, atomic_t *v) \
+{ \
+ int t; \
+ \
+ __asm__ __volatile__( \
+"1: lwarx %0,0,%3 # atomic_" #op "_return_relaxed\n" \
+ #asm_op "%I2" suffix " %0,%0,%2\n" \
+" stwcx. %0,0,%3\n" \
+" bne- 1b\n" \
+ : "=&r" (t), "+m" (v->counter) \
+ : "r"#sign (a), "r" (&v->counter) \
+ : "cc", ##__VA_ARGS__); \
+ \
+ return t; \
}
-static __inline__ void atomic_inc(atomic_t *v)
-{
- int t;
-
- __asm__ __volatile__(
-"1: lwarx %0,0,%2 # atomic_inc\n\
- addic %0,%0,1\n"
- PPC405_ERR77(0,%2)
-" stwcx. %0,0,%2 \n\
- bne- 1b"
- : "=&r" (t), "+m" (v->counter)
- : "r" (&v->counter)
- : "cc", "xer");
+#define ATOMIC_FETCH_OP_RELAXED(op, asm_op, suffix, sign, ...) \
+static inline int arch_atomic_fetch_##op##_relaxed(int a, atomic_t *v) \
+{ \
+ int res, t; \
+ \
+ __asm__ __volatile__( \
+"1: lwarx %0,0,%4 # atomic_fetch_" #op "_relaxed\n" \
+ #asm_op "%I3" suffix " %1,%0,%3\n" \
+" stwcx. %1,0,%4\n" \
+" bne- 1b\n" \
+ : "=&r" (res), "=&r" (t), "+m" (v->counter) \
+ : "r"#sign (a), "r" (&v->counter) \
+ : "cc", ##__VA_ARGS__); \
+ \
+ return res; \
}
-static __inline__ int atomic_inc_return(atomic_t *v)
-{
- int t;
+#define ATOMIC_OPS(op, asm_op, suffix, sign, ...) \
+ ATOMIC_OP(op, asm_op, suffix, sign, ##__VA_ARGS__) \
+ ATOMIC_OP_RETURN_RELAXED(op, asm_op, suffix, sign, ##__VA_ARGS__)\
+ ATOMIC_FETCH_OP_RELAXED(op, asm_op, suffix, sign, ##__VA_ARGS__)
- __asm__ __volatile__(
- PPC_ATOMIC_ENTRY_BARRIER
-"1: lwarx %0,0,%1 # atomic_inc_return\n\
- addic %0,%0,1\n"
- PPC405_ERR77(0,%1)
-" stwcx. %0,0,%1 \n\
- bne- 1b"
- PPC_ATOMIC_EXIT_BARRIER
- : "=&r" (t)
- : "r" (&v->counter)
- : "cc", "xer", "memory");
+ATOMIC_OPS(add, add, "c", I, "xer")
+ATOMIC_OPS(sub, sub, "c", I, "xer")
- return t;
-}
+#define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed
+#define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed
-/*
- * atomic_inc_and_test - increment and test
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0)
+#define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add_relaxed
+#define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed
-static __inline__ void atomic_dec(atomic_t *v)
-{
- int t;
+#undef ATOMIC_OPS
+#define ATOMIC_OPS(op, asm_op, suffix, sign) \
+ ATOMIC_OP(op, asm_op, suffix, sign) \
+ ATOMIC_FETCH_OP_RELAXED(op, asm_op, suffix, sign)
- __asm__ __volatile__(
-"1: lwarx %0,0,%2 # atomic_dec\n\
- addic %0,%0,-1\n"
- PPC405_ERR77(0,%2)\
-" stwcx. %0,0,%2\n\
- bne- 1b"
- : "=&r" (t), "+m" (v->counter)
- : "r" (&v->counter)
- : "cc", "xer");
-}
+ATOMIC_OPS(and, and, ".", K)
+ATOMIC_OPS(or, or, "", K)
+ATOMIC_OPS(xor, xor, "", K)
-static __inline__ int atomic_dec_return(atomic_t *v)
-{
- int t;
+#define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed
+#define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed
+#define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor_relaxed
- __asm__ __volatile__(
- PPC_ATOMIC_ENTRY_BARRIER
-"1: lwarx %0,0,%1 # atomic_dec_return\n\
- addic %0,%0,-1\n"
- PPC405_ERR77(0,%1)
-" stwcx. %0,0,%1\n\
- bne- 1b"
- PPC_ATOMIC_EXIT_BARRIER
- : "=&r" (t)
- : "r" (&v->counter)
- : "cc", "xer", "memory");
-
- return t;
-}
-
-#define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
-#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+#undef ATOMIC_OPS
+#undef ATOMIC_FETCH_OP_RELAXED
+#undef ATOMIC_OP_RETURN_RELAXED
+#undef ATOMIC_OP
/**
- * __atomic_add_unless - add unless the number is a given value
+ * atomic_fetch_add_unless - add unless the number is a given value
* @v: pointer of type atomic_t
* @a: the amount to add to v...
* @u: ...unless v is equal to u.
@@ -186,69 +136,35 @@ static __inline__ int atomic_dec_return(atomic_t *v)
* Atomically adds @a to @v, so long as it was not @u.
* Returns the old value of @v.
*/
-static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
+static __inline__ int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
{
int t;
__asm__ __volatile__ (
PPC_ATOMIC_ENTRY_BARRIER
-"1: lwarx %0,0,%1 # __atomic_add_unless\n\
+"1: lwarx %0,0,%1 # atomic_fetch_add_unless\n\
cmpw 0,%0,%3 \n\
- beq- 2f \n\
- add %0,%2,%0 \n"
- PPC405_ERR77(0,%2)
+ beq 2f \n\
+ add%I2c %0,%0,%2 \n"
" stwcx. %0,0,%1 \n\
bne- 1b \n"
PPC_ATOMIC_EXIT_BARRIER
-" subf %0,%2,%0 \n\
+" sub%I2c %0,%0,%2 \n\
2:"
: "=&r" (t)
- : "r" (&v->counter), "r" (a), "r" (u)
- : "cc", "memory");
+ : "r" (&v->counter), "rI" (a), "r" (u)
+ : "cc", "memory", "xer");
return t;
}
-
-/**
- * atomic_inc_not_zero - increment unless the number is zero
- * @v: pointer of type atomic_t
- *
- * Atomically increments @v by 1, so long as @v is non-zero.
- * Returns non-zero if @v was non-zero, and zero otherwise.
- */
-static __inline__ int atomic_inc_not_zero(atomic_t *v)
-{
- int t1, t2;
-
- __asm__ __volatile__ (
- PPC_ATOMIC_ENTRY_BARRIER
-"1: lwarx %0,0,%2 # atomic_inc_not_zero\n\
- cmpwi 0,%0,0\n\
- beq- 2f\n\
- addic %1,%0,1\n"
- PPC405_ERR77(0,%2)
-" stwcx. %1,0,%2\n\
- bne- 1b\n"
- PPC_ATOMIC_EXIT_BARRIER
- "\n\
-2:"
- : "=&r" (t1), "=&r" (t2)
- : "r" (&v->counter)
- : "cc", "xer", "memory");
-
- return t1;
-}
-#define atomic_inc_not_zero(v) atomic_inc_not_zero((v))
-
-#define atomic_sub_and_test(a, v) (atomic_sub_return((a), (v)) == 0)
-#define atomic_dec_and_test(v) (atomic_dec_return((v)) == 0)
+#define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless
/*
* Atomically test *v and decrement if it is greater than 0.
* The function returns the old value of *v minus 1, even if
* the atomic variable, v, was not decremented.
*/
-static __inline__ int atomic_dec_if_positive(atomic_t *v)
+static __inline__ int arch_atomic_dec_if_positive(atomic_t *v)
{
int t;
@@ -258,7 +174,6 @@ static __inline__ int atomic_dec_if_positive(atomic_t *v)
cmpwi %0,1\n\
addi %0,%0,-1\n\
blt- 2f\n"
- PPC405_ERR77(0,%1)
" stwcx. %0,0,%1\n\
bne- 1b"
PPC_ATOMIC_EXIT_BARRIER
@@ -269,95 +184,120 @@ static __inline__ int atomic_dec_if_positive(atomic_t *v)
return t;
}
-#define atomic_dec_if_positive atomic_dec_if_positive
+#define arch_atomic_dec_if_positive arch_atomic_dec_if_positive
#ifdef __powerpc64__
#define ATOMIC64_INIT(i) { (i) }
-static __inline__ long atomic64_read(const atomic64_t *v)
+static __inline__ s64 arch_atomic64_read(const atomic64_t *v)
{
- long t;
+ s64 t;
- __asm__ __volatile__("ld%U1%X1 %0,%1" : "=r"(t) : "m"(v->counter));
+ /* -mprefixed can generate offsets beyond range, fall back hack */
+ if (IS_ENABLED(CONFIG_PPC_KERNEL_PREFIXED))
+ __asm__ __volatile__("ld %0,0(%1)" : "=r"(t) : "b"(&v->counter));
+ else
+ __asm__ __volatile__("ld%U1%X1 %0,%1" : "=r"(t) : DS_FORM_CONSTRAINT (v->counter));
return t;
}
-static __inline__ void atomic64_set(atomic64_t *v, long i)
+static __inline__ void arch_atomic64_set(atomic64_t *v, s64 i)
{
- __asm__ __volatile__("std%U0%X0 %1,%0" : "=m"(v->counter) : "r"(i));
+ /* -mprefixed can generate offsets beyond range, fall back hack */
+ if (IS_ENABLED(CONFIG_PPC_KERNEL_PREFIXED))
+ __asm__ __volatile__("std %1,0(%2)" : "=m"(v->counter) : "r"(i), "b"(&v->counter));
+ else
+ __asm__ __volatile__("std%U0%X0 %1,%0" : "=" DS_FORM_CONSTRAINT (v->counter) : "r"(i));
}
-static __inline__ void atomic64_add(long a, atomic64_t *v)
-{
- long t;
+#define ATOMIC64_OP(op, asm_op) \
+static __inline__ void arch_atomic64_##op(s64 a, atomic64_t *v) \
+{ \
+ s64 t; \
+ \
+ __asm__ __volatile__( \
+"1: ldarx %0,0,%3 # atomic64_" #op "\n" \
+ #asm_op " %0,%2,%0\n" \
+" stdcx. %0,0,%3 \n" \
+" bne- 1b\n" \
+ : "=&r" (t), "+m" (v->counter) \
+ : "r" (a), "r" (&v->counter) \
+ : "cc"); \
+}
- __asm__ __volatile__(
-"1: ldarx %0,0,%3 # atomic64_add\n\
- add %0,%2,%0\n\
- stdcx. %0,0,%3 \n\
- bne- 1b"
- : "=&r" (t), "+m" (v->counter)
- : "r" (a), "r" (&v->counter)
- : "cc");
+#define ATOMIC64_OP_RETURN_RELAXED(op, asm_op) \
+static inline s64 \
+arch_atomic64_##op##_return_relaxed(s64 a, atomic64_t *v) \
+{ \
+ s64 t; \
+ \
+ __asm__ __volatile__( \
+"1: ldarx %0,0,%3 # atomic64_" #op "_return_relaxed\n" \
+ #asm_op " %0,%2,%0\n" \
+" stdcx. %0,0,%3\n" \
+" bne- 1b\n" \
+ : "=&r" (t), "+m" (v->counter) \
+ : "r" (a), "r" (&v->counter) \
+ : "cc"); \
+ \
+ return t; \
}
-static __inline__ long atomic64_add_return(long a, atomic64_t *v)
-{
- long t;
+#define ATOMIC64_FETCH_OP_RELAXED(op, asm_op) \
+static inline s64 \
+arch_atomic64_fetch_##op##_relaxed(s64 a, atomic64_t *v) \
+{ \
+ s64 res, t; \
+ \
+ __asm__ __volatile__( \
+"1: ldarx %0,0,%4 # atomic64_fetch_" #op "_relaxed\n" \
+ #asm_op " %1,%3,%0\n" \
+" stdcx. %1,0,%4\n" \
+" bne- 1b\n" \
+ : "=&r" (res), "=&r" (t), "+m" (v->counter) \
+ : "r" (a), "r" (&v->counter) \
+ : "cc"); \
+ \
+ return res; \
+}
- __asm__ __volatile__(
- PPC_ATOMIC_ENTRY_BARRIER
-"1: ldarx %0,0,%2 # atomic64_add_return\n\
- add %0,%1,%0\n\
- stdcx. %0,0,%2 \n\
- bne- 1b"
- PPC_ATOMIC_EXIT_BARRIER
- : "=&r" (t)
- : "r" (a), "r" (&v->counter)
- : "cc", "memory");
+#define ATOMIC64_OPS(op, asm_op) \
+ ATOMIC64_OP(op, asm_op) \
+ ATOMIC64_OP_RETURN_RELAXED(op, asm_op) \
+ ATOMIC64_FETCH_OP_RELAXED(op, asm_op)
- return t;
-}
+ATOMIC64_OPS(add, add)
+ATOMIC64_OPS(sub, subf)
-#define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0)
+#define arch_atomic64_add_return_relaxed arch_atomic64_add_return_relaxed
+#define arch_atomic64_sub_return_relaxed arch_atomic64_sub_return_relaxed
-static __inline__ void atomic64_sub(long a, atomic64_t *v)
-{
- long t;
+#define arch_atomic64_fetch_add_relaxed arch_atomic64_fetch_add_relaxed
+#define arch_atomic64_fetch_sub_relaxed arch_atomic64_fetch_sub_relaxed
- __asm__ __volatile__(
-"1: ldarx %0,0,%3 # atomic64_sub\n\
- subf %0,%2,%0\n\
- stdcx. %0,0,%3 \n\
- bne- 1b"
- : "=&r" (t), "+m" (v->counter)
- : "r" (a), "r" (&v->counter)
- : "cc");
-}
+#undef ATOMIC64_OPS
+#define ATOMIC64_OPS(op, asm_op) \
+ ATOMIC64_OP(op, asm_op) \
+ ATOMIC64_FETCH_OP_RELAXED(op, asm_op)
-static __inline__ long atomic64_sub_return(long a, atomic64_t *v)
-{
- long t;
+ATOMIC64_OPS(and, and)
+ATOMIC64_OPS(or, or)
+ATOMIC64_OPS(xor, xor)
- __asm__ __volatile__(
- PPC_ATOMIC_ENTRY_BARRIER
-"1: ldarx %0,0,%2 # atomic64_sub_return\n\
- subf %0,%1,%0\n\
- stdcx. %0,0,%2 \n\
- bne- 1b"
- PPC_ATOMIC_EXIT_BARRIER
- : "=&r" (t)
- : "r" (a), "r" (&v->counter)
- : "cc", "memory");
+#define arch_atomic64_fetch_and_relaxed arch_atomic64_fetch_and_relaxed
+#define arch_atomic64_fetch_or_relaxed arch_atomic64_fetch_or_relaxed
+#define arch_atomic64_fetch_xor_relaxed arch_atomic64_fetch_xor_relaxed
- return t;
-}
+#undef ATOPIC64_OPS
+#undef ATOMIC64_FETCH_OP_RELAXED
+#undef ATOMIC64_OP_RETURN_RELAXED
+#undef ATOMIC64_OP
-static __inline__ void atomic64_inc(atomic64_t *v)
+static __inline__ void arch_atomic64_inc(atomic64_t *v)
{
- long t;
+ s64 t;
__asm__ __volatile__(
"1: ldarx %0,0,%2 # atomic64_inc\n\
@@ -368,38 +308,27 @@ static __inline__ void atomic64_inc(atomic64_t *v)
: "r" (&v->counter)
: "cc", "xer");
}
+#define arch_atomic64_inc arch_atomic64_inc
-static __inline__ long atomic64_inc_return(atomic64_t *v)
+static __inline__ s64 arch_atomic64_inc_return_relaxed(atomic64_t *v)
{
- long t;
+ s64 t;
__asm__ __volatile__(
- PPC_ATOMIC_ENTRY_BARRIER
-"1: ldarx %0,0,%1 # atomic64_inc_return\n\
- addic %0,%0,1\n\
- stdcx. %0,0,%1 \n\
- bne- 1b"
- PPC_ATOMIC_EXIT_BARRIER
- : "=&r" (t)
+"1: ldarx %0,0,%2 # atomic64_inc_return_relaxed\n"
+" addic %0,%0,1\n"
+" stdcx. %0,0,%2\n"
+" bne- 1b"
+ : "=&r" (t), "+m" (v->counter)
: "r" (&v->counter)
- : "cc", "xer", "memory");
+ : "cc", "xer");
return t;
}
-/*
- * atomic64_inc_and_test - increment and test
- * @v: pointer of type atomic64_t
- *
- * Atomically increments @v by 1
- * and returns true if the result is zero, or false for all
- * other cases.
- */
-#define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0)
-
-static __inline__ void atomic64_dec(atomic64_t *v)
+static __inline__ void arch_atomic64_dec(atomic64_t *v)
{
- long t;
+ s64 t;
__asm__ __volatile__(
"1: ldarx %0,0,%2 # atomic64_dec\n\
@@ -410,35 +339,34 @@ static __inline__ void atomic64_dec(atomic64_t *v)
: "r" (&v->counter)
: "cc", "xer");
}
+#define arch_atomic64_dec arch_atomic64_dec
-static __inline__ long atomic64_dec_return(atomic64_t *v)
+static __inline__ s64 arch_atomic64_dec_return_relaxed(atomic64_t *v)
{
- long t;
+ s64 t;
__asm__ __volatile__(
- PPC_ATOMIC_ENTRY_BARRIER
-"1: ldarx %0,0,%1 # atomic64_dec_return\n\
- addic %0,%0,-1\n\
- stdcx. %0,0,%1\n\
- bne- 1b"
- PPC_ATOMIC_EXIT_BARRIER
- : "=&r" (t)
+"1: ldarx %0,0,%2 # atomic64_dec_return_relaxed\n"
+" addic %0,%0,-1\n"
+" stdcx. %0,0,%2\n"
+" bne- 1b"
+ : "=&r" (t), "+m" (v->counter)
: "r" (&v->counter)
- : "cc", "xer", "memory");
+ : "cc", "xer");
return t;
}
-#define atomic64_sub_and_test(a, v) (atomic64_sub_return((a), (v)) == 0)
-#define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0)
+#define arch_atomic64_inc_return_relaxed arch_atomic64_inc_return_relaxed
+#define arch_atomic64_dec_return_relaxed arch_atomic64_dec_return_relaxed
/*
* Atomically test *v and decrement if it is greater than 0.
* The function returns the old value of *v minus 1.
*/
-static __inline__ long atomic64_dec_if_positive(atomic64_t *v)
+static __inline__ s64 arch_atomic64_dec_if_positive(atomic64_t *v)
{
- long t;
+ s64 t;
__asm__ __volatile__(
PPC_ATOMIC_ENTRY_BARRIER
@@ -455,12 +383,10 @@ static __inline__ long atomic64_dec_if_positive(atomic64_t *v)
return t;
}
-
-#define atomic64_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
-#define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
+#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
/**
- * atomic64_add_unless - add unless the number is a given value
+ * atomic64_fetch_add_unless - add unless the number is a given value
* @v: pointer of type atomic64_t
* @a: the amount to add to v...
* @u: ...unless v is equal to u.
@@ -468,15 +394,15 @@ static __inline__ long atomic64_dec_if_positive(atomic64_t *v)
* Atomically adds @a to @v, so long as it was not @u.
* Returns the old value of @v.
*/
-static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
+static __inline__ s64 arch_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
{
- long t;
+ s64 t;
__asm__ __volatile__ (
PPC_ATOMIC_ENTRY_BARRIER
-"1: ldarx %0,0,%1 # __atomic_add_unless\n\
+"1: ldarx %0,0,%1 # atomic64_fetch_add_unless\n\
cmpd 0,%0,%3 \n\
- beq- 2f \n\
+ beq 2f \n\
add %0,%2,%0 \n"
" stdcx. %0,0,%1 \n\
bne- 1b \n"
@@ -487,8 +413,9 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
: "r" (&v->counter), "r" (a), "r" (u)
: "cc", "memory");
- return t != u;
+ return t;
}
+#define arch_atomic64_fetch_add_unless arch_atomic64_fetch_add_unless
/**
* atomic_inc64_not_zero - increment unless the number is zero
@@ -497,9 +424,9 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u)
* Atomically increments @v by 1, so long as @v is non-zero.
* Returns non-zero if @v was non-zero, and zero otherwise.
*/
-static __inline__ long atomic64_inc_not_zero(atomic64_t *v)
+static __inline__ int arch_atomic64_inc_not_zero(atomic64_t *v)
{
- long t1, t2;
+ s64 t1, t2;
__asm__ __volatile__ (
PPC_ATOMIC_ENTRY_BARRIER
@@ -516,8 +443,9 @@ static __inline__ long atomic64_inc_not_zero(atomic64_t *v)
: "r" (&v->counter)
: "cc", "xer", "memory");
- return t1;
+ return t1 != 0;
}
+#define arch_atomic64_inc_not_zero(v) arch_atomic64_inc_not_zero((v))
#endif /* __powerpc64__ */
diff --git a/arch/powerpc/include/asm/backlight.h b/arch/powerpc/include/asm/backlight.h
index 8cf5c37c3817..061a910d7492 100644
--- a/arch/powerpc/include/asm/backlight.h
+++ b/arch/powerpc/include/asm/backlight.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Routines for handling backlight control on PowerBooks
*
@@ -9,15 +10,14 @@
#define __ASM_POWERPC_BACKLIGHT_H
#ifdef __KERNEL__
-#include <linux/fb.h>
#include <linux/mutex.h>
+struct backlight_device;
+
/* For locking instructions, see the implementation file */
extern struct backlight_device *pmac_backlight;
extern struct mutex pmac_backlight_mutex;
-extern int pmac_backlight_curve_lookup(struct fb_info *info, int value);
-
extern int pmac_has_backlight_type(const char *type);
extern void pmac_backlight_key(int direction);
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
index bab79a110c7b..9e9833faa4af 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -1,9 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
*/
#ifndef _ASM_POWERPC_BARRIER_H
#define _ASM_POWERPC_BARRIER_H
+#include <asm/asm-const.h>
+
+#ifndef __ASSEMBLER__
+#include <asm/ppc-opcode.h>
+#endif
+
/*
* Memory barrier.
* The sync instruction guarantees that all memory accesses initiated
@@ -15,8 +22,6 @@
* mb() prevents loads and stores being reordered across this point.
* rmb() prevents loads being reordered across this point.
* wmb() prevents stores being reordered across this point.
- * read_barrier_depends() prevents data-dependent loads being reordered
- * across this point (nop on PPC).
*
* *mb() variants without smp_ prefix must order all types of memory
* operations with one another. sync is the only instruction sufficient
@@ -30,35 +35,30 @@
* However, on CPUs that don't support lwsync, lwsync actually maps to a
* heavy-weight sync, so smp_wmb() can be a lighter-weight eieio.
*/
-#define mb() __asm__ __volatile__ ("sync" : : : "memory")
-#define rmb() __asm__ __volatile__ ("sync" : : : "memory")
-#define wmb() __asm__ __volatile__ ("sync" : : : "memory")
-#define read_barrier_depends() do { } while(0)
-
-#define set_mb(var, value) do { var = value; mb(); } while (0)
+#define __mb() __asm__ __volatile__ ("sync" : : : "memory")
+#define __rmb() __asm__ __volatile__ ("sync" : : : "memory")
+#define __wmb() __asm__ __volatile__ ("sync" : : : "memory")
-#ifdef CONFIG_SMP
-
-#ifdef __SUBARCH_HAS_LWSYNC
+/* The sub-arch has lwsync */
+#if defined(CONFIG_PPC64) || defined(CONFIG_PPC_E500MC)
# define SMPWMB LWSYNC
+#elif defined(CONFIG_BOOKE)
+# define SMPWMB mbar
#else
# define SMPWMB eieio
#endif
+/* clang defines this macro for a builtin, which will not work with runtime patching */
+#undef __lwsync
#define __lwsync() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : :"memory")
+#define __dma_rmb() __lwsync()
+#define __dma_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory")
-#define smp_mb() mb()
-#define smp_rmb() __lwsync()
-#define smp_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory")
-#define smp_read_barrier_depends() read_barrier_depends()
-#else
-#define __lwsync() barrier()
+#define __smp_lwsync() __lwsync()
-#define smp_mb() barrier()
-#define smp_rmb() barrier()
-#define smp_wmb() barrier()
-#define smp_read_barrier_depends() do { } while(0)
-#endif /* CONFIG_SMP */
+#define __smp_mb() __mb()
+#define __smp_rmb() __lwsync()
+#define __smp_wmb() __asm__ __volatile__ (stringify_in_c(SMPWMB) : : :"memory")
/*
* This is a barrier which prevents following instructions from being
@@ -69,22 +69,51 @@
#define data_barrier(x) \
asm volatile("twi 0,%0,0; isync" : : "r" (x) : "memory");
-#define smp_store_release(p, v) \
+#define __smp_store_release(p, v) \
do { \
compiletime_assert_atomic_type(*p); \
- __lwsync(); \
- ACCESS_ONCE(*p) = (v); \
+ __smp_lwsync(); \
+ WRITE_ONCE(*p, v); \
} while (0)
-#define smp_load_acquire(p) \
+#define __smp_load_acquire(p) \
({ \
- typeof(*p) ___p1 = ACCESS_ONCE(*p); \
+ typeof(*p) ___p1 = READ_ONCE(*p); \
compiletime_assert_atomic_type(*p); \
- __lwsync(); \
+ __smp_lwsync(); \
___p1; \
})
-#define smp_mb__before_atomic() smp_mb()
-#define smp_mb__after_atomic() smp_mb()
+#ifdef CONFIG_PPC_BOOK3S_64
+#define NOSPEC_BARRIER_SLOT nop
+#elif defined(CONFIG_PPC_E500)
+#define NOSPEC_BARRIER_SLOT nop; nop
+#endif
+
+#ifdef CONFIG_PPC_BARRIER_NOSPEC
+/*
+ * Prevent execution of subsequent instructions until preceding branches have
+ * been fully resolved and are no longer executing speculatively.
+ */
+#define barrier_nospec_asm NOSPEC_BARRIER_FIXUP_SECTION; NOSPEC_BARRIER_SLOT
+
+// This also acts as a compiler barrier due to the memory clobber.
+#define barrier_nospec() asm (stringify_in_c(barrier_nospec_asm) ::: "memory")
+
+#else /* !CONFIG_PPC_BARRIER_NOSPEC */
+#define barrier_nospec_asm
+#define barrier_nospec()
+#endif /* CONFIG_PPC_BARRIER_NOSPEC */
+
+/*
+ * pmem_wmb() ensures that all stores for which the modification
+ * are written to persistent storage by preceding dcbfps/dcbstps
+ * instructions have updated persistent storage before any data
+ * access or data transfer caused by subsequent instructions is
+ * initiated.
+ */
+#define pmem_wmb() __asm__ __volatile__(PPC_PHWSYNC ::: "memory")
+
+#include <asm-generic/barrier.h>
#endif /* _ASM_POWERPC_BARRIER_H */
diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h
index bd3bd573d0ae..0d0470cd5ac3 100644
--- a/arch/powerpc/include/asm/bitops.h
+++ b/arch/powerpc/include/asm/bitops.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* PowerPC atomic bit operations.
*
@@ -14,9 +15,9 @@
*
* The bitop functions are defined to work on unsigned longs, so for a
* ppc64 system the bits end up numbered:
- * |63..............0|127............64|191...........128|255...........196|
+ * |63..............0|127............64|191...........128|255...........192|
* and on ppc32:
- * |31.....0|63....31|95....64|127...96|159..128|191..160|223..192|255..224|
+ * |31.....0|63....32|95....64|127...96|159..128|191..160|223..192|255..224|
*
* There are a few little-endian macros used mostly for filesystem
* bitmaps, these work on similar bit arrays layouts, but
@@ -26,11 +27,6 @@
* The main difference is that bit 3-5 (64b) or 3-4 (32b) in the bit
* number field needs to be reversed compared to the big-endian bit
* fields. This can be achieved by XOR with 0x38 (64b) or 0x18 (32b).
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#ifndef _ASM_POWERPC_BITOPS_H
@@ -51,48 +47,101 @@
#define PPC_BIT(bit) (1UL << PPC_BITLSHIFT(bit))
#define PPC_BITMASK(bs, be) ((PPC_BIT(bs) - PPC_BIT(be)) | PPC_BIT(bs))
+/* Put a PPC bit into a "normal" bit position */
+#define PPC_BITEXTRACT(bits, ppc_bit, dst_bit) \
+ ((((bits) >> PPC_BITLSHIFT(ppc_bit)) & 1) << (dst_bit))
+
+#define PPC_BITLSHIFT32(be) (32 - 1 - (be))
+#define PPC_BIT32(bit) (1UL << PPC_BITLSHIFT32(bit))
+#define PPC_BITMASK32(bs, be) ((PPC_BIT32(bs) - PPC_BIT32(be))|PPC_BIT32(bs))
+
+#define PPC_BITLSHIFT8(be) (8 - 1 - (be))
+#define PPC_BIT8(bit) (1UL << PPC_BITLSHIFT8(bit))
+#define PPC_BITMASK8(bs, be) ((PPC_BIT8(bs) - PPC_BIT8(be))|PPC_BIT8(bs))
+
#include <asm/barrier.h>
/* Macro for generating the ***_bits() functions */
#define DEFINE_BITOP(fn, op, prefix) \
-static __inline__ void fn(unsigned long mask, \
+static inline void fn(unsigned long mask, \
volatile unsigned long *_p) \
{ \
unsigned long old; \
unsigned long *p = (unsigned long *)_p; \
__asm__ __volatile__ ( \
prefix \
-"1:" PPC_LLARX(%0,0,%3,0) "\n" \
- stringify_in_c(op) "%0,%0,%2\n" \
- PPC405_ERR77(0,%3) \
+"1:" PPC_LLARX "%0,0,%3,0\n" \
+ #op "%I2 %0,%0,%2\n" \
PPC_STLCX "%0,0,%3\n" \
"bne- 1b\n" \
: "=&r" (old), "+m" (*p) \
- : "r" (mask), "r" (p) \
+ : "rK" (mask), "r" (p) \
: "cc", "memory"); \
}
DEFINE_BITOP(set_bits, or, "")
-DEFINE_BITOP(clear_bits, andc, "")
-DEFINE_BITOP(clear_bits_unlock, andc, PPC_RELEASE_BARRIER)
DEFINE_BITOP(change_bits, xor, "")
-static __inline__ void set_bit(int nr, volatile unsigned long *addr)
+static __always_inline bool is_rlwinm_mask_valid(unsigned long x)
+{
+ if (!x)
+ return false;
+ if (x & 1)
+ x = ~x; // make the mask non-wrapping
+ x += x & -x; // adding the low set bit results in at most one bit set
+
+ return !(x & (x - 1));
+}
+
+#define DEFINE_CLROP(fn, prefix) \
+static inline void fn(unsigned long mask, volatile unsigned long *_p) \
+{ \
+ unsigned long old; \
+ unsigned long *p = (unsigned long *)_p; \
+ \
+ if (IS_ENABLED(CONFIG_PPC32) && \
+ __builtin_constant_p(mask) && is_rlwinm_mask_valid(~mask)) {\
+ asm volatile ( \
+ prefix \
+ "1:" "lwarx %0,0,%3\n" \
+ "rlwinm %0,%0,0,%2\n" \
+ "stwcx. %0,0,%3\n" \
+ "bne- 1b\n" \
+ : "=&r" (old), "+m" (*p) \
+ : "n" (~mask), "r" (p) \
+ : "cc", "memory"); \
+ } else { \
+ asm volatile ( \
+ prefix \
+ "1:" PPC_LLARX "%0,0,%3,0\n" \
+ "andc %0,%0,%2\n" \
+ PPC_STLCX "%0,0,%3\n" \
+ "bne- 1b\n" \
+ : "=&r" (old), "+m" (*p) \
+ : "r" (mask), "r" (p) \
+ : "cc", "memory"); \
+ } \
+}
+
+DEFINE_CLROP(clear_bits, "")
+DEFINE_CLROP(clear_bits_unlock, PPC_RELEASE_BARRIER)
+
+static inline void arch_set_bit(int nr, volatile unsigned long *addr)
{
set_bits(BIT_MASK(nr), addr + BIT_WORD(nr));
}
-static __inline__ void clear_bit(int nr, volatile unsigned long *addr)
+static inline void arch_clear_bit(int nr, volatile unsigned long *addr)
{
clear_bits(BIT_MASK(nr), addr + BIT_WORD(nr));
}
-static __inline__ void clear_bit_unlock(int nr, volatile unsigned long *addr)
+static inline void arch_clear_bit_unlock(int nr, volatile unsigned long *addr)
{
clear_bits_unlock(BIT_MASK(nr), addr + BIT_WORD(nr));
}
-static __inline__ void change_bit(int nr, volatile unsigned long *addr)
+static inline void arch_change_bit(int nr, volatile unsigned long *addr)
{
change_bits(BIT_MASK(nr), addr + BIT_WORD(nr));
}
@@ -100,7 +149,7 @@ static __inline__ void change_bit(int nr, volatile unsigned long *addr)
/* Like DEFINE_BITOP(), with changes to the arguments to 'op' and the output
* operands. */
#define DEFINE_TESTOP(fn, op, prefix, postfix, eh) \
-static __inline__ unsigned long fn( \
+static inline unsigned long fn( \
unsigned long mask, \
volatile unsigned long *_p) \
{ \
@@ -108,14 +157,13 @@ static __inline__ unsigned long fn( \
unsigned long *p = (unsigned long *)_p; \
__asm__ __volatile__ ( \
prefix \
-"1:" PPC_LLARX(%0,0,%3,eh) "\n" \
- stringify_in_c(op) "%1,%0,%2\n" \
- PPC405_ERR77(0,%3) \
+"1:" PPC_LLARX "%0,0,%3,%4\n" \
+ #op "%I2 %1,%0,%2\n" \
PPC_STLCX "%1,0,%3\n" \
"bne- 1b\n" \
postfix \
: "=&r" (old), "=&r" (t) \
- : "r" (mask), "r" (p) \
+ : "rK" (mask), "r" (p), "n" (eh) \
: "cc", "memory"); \
return (old & mask); \
}
@@ -123,40 +171,90 @@ static __inline__ unsigned long fn( \
DEFINE_TESTOP(test_and_set_bits, or, PPC_ATOMIC_ENTRY_BARRIER,
PPC_ATOMIC_EXIT_BARRIER, 0)
DEFINE_TESTOP(test_and_set_bits_lock, or, "",
- PPC_ACQUIRE_BARRIER, 1)
-DEFINE_TESTOP(test_and_clear_bits, andc, PPC_ATOMIC_ENTRY_BARRIER,
- PPC_ATOMIC_EXIT_BARRIER, 0)
+ PPC_ACQUIRE_BARRIER, IS_ENABLED(CONFIG_PPC64))
DEFINE_TESTOP(test_and_change_bits, xor, PPC_ATOMIC_ENTRY_BARRIER,
PPC_ATOMIC_EXIT_BARRIER, 0)
-static __inline__ int test_and_set_bit(unsigned long nr,
- volatile unsigned long *addr)
+static inline unsigned long test_and_clear_bits(unsigned long mask, volatile unsigned long *_p)
+{
+ unsigned long old, t;
+ unsigned long *p = (unsigned long *)_p;
+
+ if (IS_ENABLED(CONFIG_PPC32) &&
+ __builtin_constant_p(mask) && is_rlwinm_mask_valid(~mask)) {
+ asm volatile (
+ PPC_ATOMIC_ENTRY_BARRIER
+ "1:" "lwarx %0,0,%3\n"
+ "rlwinm %1,%0,0,%2\n"
+ "stwcx. %1,0,%3\n"
+ "bne- 1b\n"
+ PPC_ATOMIC_EXIT_BARRIER
+ : "=&r" (old), "=&r" (t)
+ : "n" (~mask), "r" (p)
+ : "cc", "memory");
+ } else {
+ asm volatile (
+ PPC_ATOMIC_ENTRY_BARRIER
+ "1:" PPC_LLARX "%0,0,%3,0\n"
+ "andc %1,%0,%2\n"
+ PPC_STLCX "%1,0,%3\n"
+ "bne- 1b\n"
+ PPC_ATOMIC_EXIT_BARRIER
+ : "=&r" (old), "=&r" (t)
+ : "r" (mask), "r" (p)
+ : "cc", "memory");
+ }
+
+ return (old & mask);
+}
+
+static inline int arch_test_and_set_bit(unsigned long nr,
+ volatile unsigned long *addr)
{
return test_and_set_bits(BIT_MASK(nr), addr + BIT_WORD(nr)) != 0;
}
-static __inline__ int test_and_set_bit_lock(unsigned long nr,
- volatile unsigned long *addr)
+static inline int arch_test_and_set_bit_lock(unsigned long nr,
+ volatile unsigned long *addr)
{
return test_and_set_bits_lock(BIT_MASK(nr),
addr + BIT_WORD(nr)) != 0;
}
-static __inline__ int test_and_clear_bit(unsigned long nr,
- volatile unsigned long *addr)
+static inline int arch_test_and_clear_bit(unsigned long nr,
+ volatile unsigned long *addr)
{
return test_and_clear_bits(BIT_MASK(nr), addr + BIT_WORD(nr)) != 0;
}
-static __inline__ int test_and_change_bit(unsigned long nr,
- volatile unsigned long *addr)
+static inline int arch_test_and_change_bit(unsigned long nr,
+ volatile unsigned long *addr)
{
return test_and_change_bits(BIT_MASK(nr), addr + BIT_WORD(nr)) != 0;
}
+static inline bool arch_xor_unlock_is_negative_byte(unsigned long mask,
+ volatile unsigned long *p)
+{
+ unsigned long old, t;
+
+ __asm__ __volatile__ (
+ PPC_RELEASE_BARRIER
+"1:" PPC_LLARX "%0,0,%3,0\n"
+ "xor %1,%0,%2\n"
+ PPC_STLCX "%1,0,%3\n"
+ "bne- 1b\n"
+ : "=&r" (old), "=&r" (t)
+ : "r" (mask), "r" (p)
+ : "cc", "memory");
+
+ return (old & BIT_MASK(7)) != 0;
+}
+#define arch_xor_unlock_is_negative_byte arch_xor_unlock_is_negative_byte
+
#include <asm-generic/bitops/non-atomic.h>
-static __inline__ void __clear_bit_unlock(int nr, volatile unsigned long *addr)
+static inline void arch___clear_bit_unlock(int nr, volatile unsigned long *addr)
{
__asm__ __volatile__(PPC_RELEASE_BARRIER "" ::: "memory");
__clear_bit(nr, addr);
@@ -166,102 +264,48 @@ static __inline__ void __clear_bit_unlock(int nr, volatile unsigned long *addr)
* Return the zero-based bit position (LE, not IBM bit numbering) of
* the most significant 1-bit in a double word.
*/
-static __inline__ __attribute__((const))
-int __ilog2(unsigned long x)
-{
- int lz;
+#define __ilog2(x) ilog2(x)
- asm (PPC_CNTLZL "%0,%1" : "=r" (lz) : "r" (x));
- return BITS_PER_LONG - 1 - lz;
-}
+#include <asm-generic/bitops/ffz.h>
-static inline __attribute__((const))
-int __ilog2_u32(u32 n)
-{
- int bit;
- asm ("cntlzw %0,%1" : "=r" (bit) : "r" (n));
- return 31 - bit;
-}
-
-#ifdef __powerpc64__
-static inline __attribute__((const))
-int __ilog2_u64(u64 n)
-{
- int bit;
- asm ("cntlzd %0,%1" : "=r" (bit) : "r" (n));
- return 63 - bit;
-}
-#endif
-
-/*
- * Determines the bit position of the least significant 0 bit in the
- * specified double word. The returned bit position will be
- * zero-based, starting from the right side (63/31 - 0).
- */
-static __inline__ unsigned long ffz(unsigned long x)
-{
- /* no zero exists anywhere in the 8 byte area. */
- if ((x = ~x) == 0)
- return BITS_PER_LONG;
-
- /*
- * Calculate the bit position of the least significant '1' bit in x
- * (since x has been changed this will actually be the least significant
- * '0' bit in * the original x). Note: (x & -x) gives us a mask that
- * is the least significant * (RIGHT-most) 1-bit of the value in x.
- */
- return __ilog2(x & -x);
-}
+#include <asm-generic/bitops/builtin-__ffs.h>
-static __inline__ int __ffs(unsigned long x)
-{
- return __ilog2(x & -x);
-}
-
-/*
- * ffs: find first bit set. This is defined the same way as
- * the libc and compiler builtin ffs routines, therefore
- * differs in spirit from the above ffz (man ffs).
- */
-static __inline__ int ffs(int x)
-{
- unsigned long i = (unsigned long)x;
- return __ilog2(i & -i) + 1;
-}
+#include <asm-generic/bitops/builtin-ffs.h>
/*
* fls: find last (most-significant) bit set.
* Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
*/
-static __inline__ int fls(unsigned int x)
+static __always_inline __attribute_const__ int fls(unsigned int x)
{
int lz;
- asm ("cntlzw %0,%1" : "=r" (lz) : "r" (x));
+ if (__builtin_constant_p(x))
+ return x ? 32 - __builtin_clz(x) : 0;
+ asm("cntlzw %0,%1" : "=r" (lz) : "r" (x));
return 32 - lz;
}
-static __inline__ unsigned long __fls(unsigned long x)
-{
- return __ilog2(x);
-}
+#include <asm-generic/bitops/builtin-__fls.h>
/*
* 64-bit can do this using one cntlzd (count leading zeroes doubleword)
* instruction; for 32-bit we use the generic version, which does two
* 32-bit fls calls.
*/
-#ifdef __powerpc64__
-static __inline__ int fls64(__u64 x)
+#ifdef CONFIG_PPC64
+static __always_inline __attribute_const__ int fls64(__u64 x)
{
int lz;
- asm ("cntlzd %0,%1" : "=r" (lz) : "r" (x));
+ if (__builtin_constant_p(x))
+ return x ? 64 - __builtin_clzll(x) : 0;
+ asm("cntlzd %0,%1" : "=r" (lz) : "r" (x));
return 64 - lz;
}
#else
#include <asm-generic/bitops/fls64.h>
-#endif /* __powerpc64__ */
+#endif
#ifdef CONFIG_PPC64
unsigned int __arch_hweight8(unsigned int w);
@@ -273,7 +317,9 @@ unsigned long __arch_hweight64(__u64 w);
#include <asm-generic/bitops/hweight.h>
#endif
-#include <asm-generic/bitops/find.h>
+/* wrappers that deal with KASAN instrumentation */
+#include <asm-generic/bitops/instrumented-atomic.h>
+#include <asm-generic/bitops/instrumented-lock.h>
/* Little-endian versions */
#include <asm-generic/bitops/le.h>
diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h
new file mode 100644
index 000000000000..873c5146e326
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -0,0 +1,175 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_32_KUP_H
+#define _ASM_POWERPC_BOOK3S_32_KUP_H
+
+#include <asm/bug.h>
+#include <asm/book3s/32/mmu-hash.h>
+#include <asm/mmu.h>
+#include <asm/synch.h>
+
+#ifndef __ASSEMBLER__
+
+#ifdef CONFIG_PPC_KUAP
+
+#include <linux/sched.h>
+
+#define KUAP_NONE (~0UL)
+
+static __always_inline void kuap_lock_one(unsigned long addr)
+{
+ mtsr(mfsr(addr) | SR_KS, addr);
+ isync(); /* Context sync required after mtsr() */
+}
+
+static __always_inline void kuap_unlock_one(unsigned long addr)
+{
+ mtsr(mfsr(addr) & ~SR_KS, addr);
+ isync(); /* Context sync required after mtsr() */
+}
+
+static __always_inline void uaccess_begin_32s(unsigned long addr)
+{
+ unsigned long tmp;
+
+ asm volatile(ASM_MMU_FTR_IFSET(
+ "mfsrin %0, %1;"
+ "rlwinm %0, %0, 0, %2;"
+ "mtsrin %0, %1;"
+ "isync", "", %3)
+ : "=&r"(tmp)
+ : "r"(addr), "i"(~SR_KS), "i"(MMU_FTR_KUAP)
+ : "memory");
+}
+
+static __always_inline void uaccess_end_32s(unsigned long addr)
+{
+ unsigned long tmp;
+
+ asm volatile(ASM_MMU_FTR_IFSET(
+ "mfsrin %0, %1;"
+ "oris %0, %0, %2;"
+ "mtsrin %0, %1;"
+ "isync", "", %3)
+ : "=&r"(tmp)
+ : "r"(addr), "i"(SR_KS >> 16), "i"(MMU_FTR_KUAP)
+ : "memory");
+}
+
+static __always_inline void __kuap_save_and_lock(struct pt_regs *regs)
+{
+ unsigned long kuap = current->thread.kuap;
+
+ regs->kuap = kuap;
+ if (unlikely(kuap == KUAP_NONE))
+ return;
+
+ current->thread.kuap = KUAP_NONE;
+ kuap_lock_one(kuap);
+}
+#define __kuap_save_and_lock __kuap_save_and_lock
+
+static __always_inline void kuap_user_restore(struct pt_regs *regs)
+{
+}
+
+static __always_inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap)
+{
+ if (unlikely(kuap != KUAP_NONE)) {
+ current->thread.kuap = KUAP_NONE;
+ kuap_lock_one(kuap);
+ }
+
+ if (likely(regs->kuap == KUAP_NONE))
+ return;
+
+ current->thread.kuap = regs->kuap;
+
+ kuap_unlock_one(regs->kuap);
+}
+
+static __always_inline unsigned long __kuap_get_and_assert_locked(void)
+{
+ unsigned long kuap = current->thread.kuap;
+
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_PPC_KUAP_DEBUG) && kuap != KUAP_NONE);
+
+ return kuap;
+}
+#define __kuap_get_and_assert_locked __kuap_get_and_assert_locked
+
+static __always_inline void allow_user_access(void __user *to, const void __user *from,
+ u32 size, unsigned long dir)
+{
+ BUILD_BUG_ON(!__builtin_constant_p(dir));
+
+ if (!(dir & KUAP_WRITE))
+ return;
+
+ current->thread.kuap = (__force u32)to;
+ uaccess_begin_32s((__force u32)to);
+}
+
+static __always_inline void prevent_user_access(unsigned long dir)
+{
+ u32 kuap = current->thread.kuap;
+
+ BUILD_BUG_ON(!__builtin_constant_p(dir));
+
+ if (!(dir & KUAP_WRITE))
+ return;
+
+ current->thread.kuap = KUAP_NONE;
+ uaccess_end_32s(kuap);
+}
+
+static __always_inline unsigned long prevent_user_access_return(void)
+{
+ unsigned long flags = current->thread.kuap;
+
+ if (flags != KUAP_NONE) {
+ current->thread.kuap = KUAP_NONE;
+ uaccess_end_32s(flags);
+ }
+
+ return flags;
+}
+
+static __always_inline void restore_user_access(unsigned long flags)
+{
+ if (flags != KUAP_NONE) {
+ current->thread.kuap = flags;
+ uaccess_begin_32s(flags);
+ }
+}
+
+static __always_inline bool
+__bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
+{
+ unsigned long kuap = regs->kuap;
+
+ if (!is_write)
+ return false;
+ if (kuap == KUAP_NONE)
+ return true;
+
+ /*
+ * If faulting address doesn't match unlocked segment, change segment.
+ * In case of unaligned store crossing two segments, emulate store.
+ */
+ if ((kuap ^ address) & 0xf0000000) {
+ if (!(kuap & 0x0fffffff) && address > kuap - 4 && fix_alignment(regs)) {
+ regs_add_return_ip(regs, 4);
+ emulate_single_step(regs);
+ } else {
+ regs->kuap = address;
+ }
+ }
+
+ return false;
+}
+
+#endif /* CONFIG_PPC_KUAP */
+
+#endif /* __ASSEMBLER__ */
+
+#endif /* _ASM_POWERPC_BOOK3S_32_KUP_H */
diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h
new file mode 100644
index 000000000000..8435bf3cdabf
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h
@@ -0,0 +1,236 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_32_MMU_HASH_H_
+#define _ASM_POWERPC_BOOK3S_32_MMU_HASH_H_
+
+/*
+ * 32-bit hash table MMU support
+ */
+
+/*
+ * BATs
+ */
+
+/* Block size masks */
+#define BL_128K 0x000
+#define BL_256K 0x001
+#define BL_512K 0x003
+#define BL_1M 0x007
+#define BL_2M 0x00F
+#define BL_4M 0x01F
+#define BL_8M 0x03F
+#define BL_16M 0x07F
+#define BL_32M 0x0FF
+#define BL_64M 0x1FF
+#define BL_128M 0x3FF
+#define BL_256M 0x7FF
+
+/* BAT Access Protection */
+#define BPP_XX 0x00 /* No access */
+#define BPP_RX 0x01 /* Read only */
+#define BPP_RW 0x02 /* Read/write */
+
+#ifndef __ASSEMBLER__
+/* Contort a phys_addr_t into the right format/bits for a BAT */
+#ifdef CONFIG_PHYS_64BIT
+#define BAT_PHYS_ADDR(x) ((u32)((x & 0x00000000fffe0000ULL) | \
+ ((x & 0x0000000e00000000ULL) >> 24) | \
+ ((x & 0x0000000100000000ULL) >> 30)))
+#define PHYS_BAT_ADDR(x) (((u64)(x) & 0x00000000fffe0000ULL) | \
+ (((u64)(x) << 24) & 0x0000000e00000000ULL) | \
+ (((u64)(x) << 30) & 0x0000000100000000ULL))
+#else
+#define BAT_PHYS_ADDR(x) (x)
+#define PHYS_BAT_ADDR(x) ((x) & 0xfffe0000)
+#endif
+
+struct ppc_bat {
+ u32 batu;
+ u32 batl;
+};
+#endif /* !__ASSEMBLER__ */
+
+/*
+ * Hash table
+ */
+
+/* Values for PP (assumes Ks=0, Kp=1) */
+#define PP_RWXX 0 /* Supervisor read/write, User none */
+#define PP_RWRX 1 /* Supervisor read/write, User read */
+#define PP_RWRW 2 /* Supervisor read/write, User read/write */
+#define PP_RXRX 3 /* Supervisor read, User read */
+
+/* Values for Segment Registers */
+#define SR_NX 0x10000000 /* No Execute */
+#define SR_KP 0x20000000 /* User key */
+#define SR_KS 0x40000000 /* Supervisor key */
+
+#ifdef __ASSEMBLER__
+
+#include <asm/asm-offsets.h>
+
+.macro uus_addi sr reg1 reg2 imm
+ .if NUM_USER_SEGMENTS > \sr
+ addi \reg1,\reg2,\imm
+ .endif
+.endm
+
+.macro uus_mtsr sr reg1
+ .if NUM_USER_SEGMENTS > \sr
+ mtsr \sr, \reg1
+ .endif
+.endm
+
+/*
+ * This isync() shouldn't be necessary as the kernel is not excepted to run
+ * any instruction in userspace soon after the update of segments and 'rfi'
+ * instruction is used to return to userspace, but hash based cores
+ * (at least G3) seem to exhibit a random behaviour when the 'isync' is not
+ * there. 603 cores don't have this behaviour so don't do the 'isync' as it
+ * saves several CPU cycles.
+ */
+.macro uus_isync
+#ifdef CONFIG_PPC_BOOK3S_604
+BEGIN_MMU_FTR_SECTION
+ isync
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
+#endif
+.endm
+
+.macro update_user_segments_by_4 tmp1 tmp2 tmp3 tmp4
+ uus_addi 1, \tmp2, \tmp1, 0x111
+ uus_addi 2, \tmp3, \tmp1, 0x222
+ uus_addi 3, \tmp4, \tmp1, 0x333
+
+ uus_mtsr 0, \tmp1
+ uus_mtsr 1, \tmp2
+ uus_mtsr 2, \tmp3
+ uus_mtsr 3, \tmp4
+
+ uus_addi 4, \tmp1, \tmp1, 0x444
+ uus_addi 5, \tmp2, \tmp2, 0x444
+ uus_addi 6, \tmp3, \tmp3, 0x444
+ uus_addi 7, \tmp4, \tmp4, 0x444
+
+ uus_mtsr 4, \tmp1
+ uus_mtsr 5, \tmp2
+ uus_mtsr 6, \tmp3
+ uus_mtsr 7, \tmp4
+
+ uus_addi 8, \tmp1, \tmp1, 0x444
+ uus_addi 9, \tmp2, \tmp2, 0x444
+ uus_addi 10, \tmp3, \tmp3, 0x444
+ uus_addi 11, \tmp4, \tmp4, 0x444
+
+ uus_mtsr 8, \tmp1
+ uus_mtsr 9, \tmp2
+ uus_mtsr 10, \tmp3
+ uus_mtsr 11, \tmp4
+
+ uus_addi 12, \tmp1, \tmp1, 0x444
+ uus_addi 13, \tmp2, \tmp2, 0x444
+ uus_addi 14, \tmp3, \tmp3, 0x444
+ uus_addi 15, \tmp4, \tmp4, 0x444
+
+ uus_mtsr 12, \tmp1
+ uus_mtsr 13, \tmp2
+ uus_mtsr 14, \tmp3
+ uus_mtsr 15, \tmp4
+
+ uus_isync
+.endm
+
+#else
+
+/*
+ * This macro defines the mapping from contexts to VSIDs (virtual
+ * segment IDs). We use a skew on both the context and the high 4 bits
+ * of the 32-bit virtual address (the "effective segment ID") in order
+ * to spread out the entries in the MMU hash table. Note, if this
+ * function is changed then hash functions will have to be
+ * changed to correspond.
+ */
+#define CTX_TO_VSID(c, id) ((((c) * (897 * 16)) + (id * 0x111)) & 0xffffff)
+
+/*
+ * Hardware Page Table Entry
+ * Note that the xpn and x bitfields are used only by processors that
+ * support extended addressing; otherwise, those bits are reserved.
+ */
+struct hash_pte {
+ unsigned long v:1; /* Entry is valid */
+ unsigned long vsid:24; /* Virtual segment identifier */
+ unsigned long h:1; /* Hash algorithm indicator */
+ unsigned long api:6; /* Abbreviated page index */
+ unsigned long rpn:20; /* Real (physical) page number */
+ unsigned long xpn:3; /* Real page number bits 0-2, optional */
+ unsigned long r:1; /* Referenced */
+ unsigned long c:1; /* Changed */
+ unsigned long w:1; /* Write-thru cache mode */
+ unsigned long i:1; /* Cache inhibited */
+ unsigned long m:1; /* Memory coherence */
+ unsigned long g:1; /* Guarded */
+ unsigned long x:1; /* Real page number bit 3, optional */
+ unsigned long pp:2; /* Page protection */
+};
+
+typedef struct {
+ unsigned long id;
+ unsigned long sr0;
+ void __user *vdso;
+} mm_context_t;
+
+#ifdef CONFIG_PPC_KUEP
+#define INIT_MM_CONTEXT(mm) .context.sr0 = SR_NX
+#endif
+
+void update_bats(void);
+static inline void cleanup_cpu_mmu_context(void) { }
+
+/* patch sites */
+extern s32 patch__hash_page_A0, patch__hash_page_A1, patch__hash_page_A2;
+extern s32 patch__hash_page_B, patch__hash_page_C;
+extern s32 patch__flush_hash_A0, patch__flush_hash_A1, patch__flush_hash_A2;
+extern s32 patch__flush_hash_B;
+
+#include <asm/reg.h>
+#include <asm/task_size_32.h>
+
+static __always_inline void update_user_segment(u32 n, u32 val)
+{
+ if (n << 28 < TASK_SIZE)
+ mtsr(val + n * 0x111, n << 28);
+}
+
+static __always_inline void update_user_segments(u32 val)
+{
+ val &= 0xf0ffffff;
+
+ update_user_segment(0, val);
+ update_user_segment(1, val);
+ update_user_segment(2, val);
+ update_user_segment(3, val);
+ update_user_segment(4, val);
+ update_user_segment(5, val);
+ update_user_segment(6, val);
+ update_user_segment(7, val);
+ update_user_segment(8, val);
+ update_user_segment(9, val);
+ update_user_segment(10, val);
+ update_user_segment(11, val);
+ update_user_segment(12, val);
+ update_user_segment(13, val);
+ update_user_segment(14, val);
+ update_user_segment(15, val);
+}
+
+int __init find_free_bat(void);
+unsigned int bat_block_size(unsigned long base, unsigned long top);
+#endif /* !__ASSEMBLER__ */
+
+/* We happily ignore the smaller BATs on 601, we don't actually use
+ * those definitions on hash32 at the moment anyway
+ */
+#define mmu_virtual_psize MMU_PAGE_4K
+#define mmu_linear_psize MMU_PAGE_256M
+
+#endif /* _ASM_POWERPC_BOOK3S_32_MMU_HASH_H_ */
diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
new file mode 100644
index 000000000000..f4390704d5ba
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_32_PGALLOC_H
+#define _ASM_POWERPC_BOOK3S_32_PGALLOC_H
+
+#include <linux/threads.h>
+#include <linux/slab.h>
+
+static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+ pgd_t *pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
+ pgtable_gfp_flags(mm, GFP_KERNEL));
+
+#ifdef CONFIG_PPC_BOOK3S_603
+ memcpy(pgd + USER_PTRS_PER_PGD, swapper_pg_dir + USER_PTRS_PER_PGD,
+ (MAX_PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
+#endif
+ return pgd;
+}
+
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+ kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd);
+}
+
+/*
+ * We don't have any real pmd's, and this code never triggers because
+ * the pgd will always be present..
+ */
+/* #define pmd_alloc_one(mm,address) ({ BUG(); ((pmd_t *)2); }) */
+#define pmd_free(mm, x) do { } while (0)
+#define __pmd_free_tlb(tlb,x,a) do { } while (0)
+/* #define pgd_populate(mm, pmd, pte) BUG() */
+
+static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp,
+ pte_t *pte)
+{
+ *pmdp = __pmd(__pa(pte) | _PMD_PRESENT);
+}
+
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
+ pgtable_t pte_page)
+{
+ *pmdp = __pmd(__pa(pte_page) | _PMD_PRESENT);
+}
+
+static inline void pgtable_free(void *table, unsigned index_size)
+{
+ if (!index_size) {
+ pte_fragment_free((unsigned long *)table, 0);
+ } else {
+ BUG_ON(index_size > MAX_PGTABLE_INDEX_SIZE);
+ kmem_cache_free(PGT_CACHE(index_size), table);
+ }
+}
+
+static inline void pgtable_free_tlb(struct mmu_gather *tlb,
+ void *table, int shift)
+{
+ unsigned long pgf = (unsigned long)table;
+ BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
+ pgf |= shift;
+ tlb_remove_table(tlb, (void *)pgf);
+}
+
+static inline void __tlb_remove_table(void *_table)
+{
+ void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
+ unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
+
+ pgtable_free(table, shift);
+}
+
+static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
+ unsigned long address)
+{
+ pgtable_free_tlb(tlb, table, 0);
+}
+#endif /* _ASM_POWERPC_BOOK3S_32_PGALLOC_H */
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
new file mode 100644
index 000000000000..87dcca962be7
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -0,0 +1,607 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_32_PGTABLE_H
+#define _ASM_POWERPC_BOOK3S_32_PGTABLE_H
+
+#include <asm-generic/pgtable-nopmd.h>
+
+/*
+ * The "classic" 32-bit implementation of the PowerPC MMU uses a hash
+ * table containing PTEs, together with a set of 16 segment registers,
+ * to define the virtual to physical address mapping.
+ *
+ * We use the hash table as an extended TLB, i.e. a cache of currently
+ * active mappings. We maintain a two-level page table tree, much
+ * like that used by the i386, for the sake of the Linux memory
+ * management code. Low-level assembler code in hash_low_32.S
+ * (procedure hash_page) is responsible for extracting ptes from the
+ * tree and putting them into the hash table when necessary, and
+ * updating the accessed and modified bits in the page table tree.
+ */
+
+#define _PAGE_PRESENT 0x001 /* software: pte contains a translation */
+#define _PAGE_HASHPTE 0x002 /* hash_page has made an HPTE for this pte */
+#define _PAGE_READ 0x004 /* software: read access allowed */
+#define _PAGE_GUARDED 0x008 /* G: prohibit speculative access */
+#define _PAGE_COHERENT 0x010 /* M: enforce memory coherence (SMP systems) */
+#define _PAGE_NO_CACHE 0x020 /* I: cache inhibit */
+#define _PAGE_WRITETHRU 0x040 /* W: cache write-through */
+#define _PAGE_DIRTY 0x080 /* C: page changed */
+#define _PAGE_ACCESSED 0x100 /* R: page referenced */
+#define _PAGE_EXEC 0x200 /* software: exec allowed */
+#define _PAGE_WRITE 0x400 /* software: user write access allowed */
+#define _PAGE_SPECIAL 0x800 /* software: Special page */
+
+#ifdef CONFIG_PTE_64BIT
+/* We never clear the high word of the pte */
+#define _PTE_NONE_MASK (0xffffffff00000000ULL | _PAGE_HASHPTE)
+#else
+#define _PTE_NONE_MASK _PAGE_HASHPTE
+#endif
+
+#define _PMD_PRESENT 0
+#define _PMD_PRESENT_MASK (PAGE_MASK)
+#define _PMD_BAD (~PAGE_MASK)
+
+/* We borrow the _PAGE_READ bit to store the exclusive marker in swap PTEs. */
+#define _PAGE_SWP_EXCLUSIVE _PAGE_READ
+
+/* And here we include common definitions */
+
+#define _PAGE_HPTEFLAGS _PAGE_HASHPTE
+
+/*
+ * Location of the PFN in the PTE. Most 32-bit platforms use the same
+ * as _PAGE_SHIFT here (ie, naturally aligned).
+ * Platform who don't just pre-define the value so we don't override it here.
+ */
+#define PTE_RPN_SHIFT (PAGE_SHIFT)
+
+/*
+ * The mask covered by the RPN must be a ULL on 32-bit platforms with
+ * 64-bit PTEs.
+ */
+#ifdef CONFIG_PTE_64BIT
+#define PTE_RPN_MASK (~((1ULL << PTE_RPN_SHIFT) - 1))
+#define MAX_POSSIBLE_PHYSMEM_BITS 36
+#else
+#define PTE_RPN_MASK (~((1UL << PTE_RPN_SHIFT) - 1))
+#define MAX_POSSIBLE_PHYSMEM_BITS 32
+#endif
+
+/*
+ * _PAGE_CHG_MASK masks of bits that are to be preserved across
+ * pgprot changes.
+ */
+#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HASHPTE | _PAGE_DIRTY | \
+ _PAGE_ACCESSED | _PAGE_SPECIAL)
+
+/*
+ * We define 2 sets of base prot bits, one for basic pages (ie,
+ * cacheable kernel and user pages) and one for non cacheable
+ * pages. We always set _PAGE_COHERENT when SMP is enabled or
+ * the processor might need it for DMA coherency.
+ */
+#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED)
+#define _PAGE_BASE (_PAGE_BASE_NC | _PAGE_COHERENT)
+
+#include <asm/pgtable-masks.h>
+
+/* Permission masks used for kernel mappings */
+#define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
+#define PAGE_KERNEL_NC __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | _PAGE_NO_CACHE)
+#define PAGE_KERNEL_NCG __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | _PAGE_NO_CACHE | _PAGE_GUARDED)
+#define PAGE_KERNEL_X __pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX)
+#define PAGE_KERNEL_RO __pgprot(_PAGE_BASE | _PAGE_KERNEL_RO)
+#define PAGE_KERNEL_ROX __pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX)
+
+#define PTE_INDEX_SIZE PTE_SHIFT
+#define PMD_INDEX_SIZE 0
+#define PUD_INDEX_SIZE 0
+#define PGD_INDEX_SIZE (32 - PGDIR_SHIFT)
+
+#define PMD_CACHE_INDEX PMD_INDEX_SIZE
+#define PUD_CACHE_INDEX PUD_INDEX_SIZE
+
+#ifndef __ASSEMBLER__
+#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE)
+#define PMD_TABLE_SIZE 0
+#define PUD_TABLE_SIZE 0
+#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE)
+
+/* Bits to mask out from a PMD to get to the PTE page */
+#define PMD_MASKED_BITS (PTE_TABLE_SIZE - 1)
+#endif /* __ASSEMBLER__ */
+
+#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE)
+#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE)
+
+/*
+ * The normal case is that PTEs are 32-bits and we have a 1-page
+ * 1024-entry pgdir pointing to 1-page 1024-entry PTE pages. -- paulus
+ *
+ * For any >32-bit physical address platform, we can use the following
+ * two level page table layout where the pgdir is 8KB and the MS 13 bits
+ * are an index to the second level table. The combined pgdir/pmd first
+ * level has 2048 entries and the second level has 512 64-bit PTE entries.
+ * -Matt
+ */
+/* PGDIR_SHIFT determines what a top-level page table entry can map */
+#define PGDIR_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE)
+#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
+#define PGDIR_MASK (~(PGDIR_SIZE-1))
+
+#define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE)
+
+#ifndef __ASSEMBLER__
+
+int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
+void unmap_kernel_page(unsigned long va);
+
+#endif /* !__ASSEMBLER__ */
+
+/*
+ * This is the bottom of the PKMAP area with HIGHMEM or an arbitrary
+ * value (for now) on others, from where we can start layout kernel
+ * virtual space that goes below PKMAP and FIXMAP
+ */
+
+#define FIXADDR_SIZE 0
+#ifdef CONFIG_KASAN
+#include <asm/kasan.h>
+#define FIXADDR_TOP (KASAN_SHADOW_START - PAGE_SIZE)
+#else
+#define FIXADDR_TOP ((unsigned long)(-PAGE_SIZE))
+#endif
+
+/*
+ * ioremap_bot starts at that address. Early ioremaps move down from there,
+ * until mem_init() at which point this becomes the top of the vmalloc
+ * and ioremap space
+ */
+#ifdef CONFIG_HIGHMEM
+#define IOREMAP_TOP PKMAP_BASE
+#else
+#define IOREMAP_TOP FIXADDR_START
+#endif
+
+/* PPC32 shares vmalloc area with ioremap */
+#define IOREMAP_START VMALLOC_START
+#define IOREMAP_END VMALLOC_END
+
+/*
+ * Just any arbitrary offset to the start of the vmalloc VM area: the
+ * current 16MB value just means that there will be a 64MB "hole" after the
+ * physical memory until the kernel virtual memory starts. That means that
+ * any out-of-bounds memory accesses will hopefully be caught.
+ * The vmalloc() routines leaves a hole of 4kB between each vmalloced
+ * area for the same reason. ;)
+ *
+ * We no longer map larger than phys RAM with the BATs so we don't have
+ * to worry about the VMALLOC_OFFSET causing problems. We do have to worry
+ * about clashes between our early calls to ioremap() that start growing down
+ * from ioremap_base being run into the VM area allocations (growing upwards
+ * from VMALLOC_START). For this reason we have ioremap_bot to check when
+ * we actually run into our mappings setup in the early boot with the VM
+ * system. This really does become a problem for machines with good amounts
+ * of RAM. -- Cort
+ */
+#define VMALLOC_OFFSET (0x1000000) /* 16M */
+
+#define VMALLOC_START ((((long)high_memory + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)))
+
+#ifdef CONFIG_KASAN_VMALLOC
+#define VMALLOC_END ALIGN_DOWN(ioremap_bot, PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT)
+#else
+#define VMALLOC_END ioremap_bot
+#endif
+
+#define MODULES_END ALIGN_DOWN(PAGE_OFFSET, SZ_256M)
+#define MODULES_SIZE (CONFIG_MODULES_SIZE * SZ_1M)
+#define MODULES_VADDR (MODULES_END - MODULES_SIZE)
+
+#ifndef __ASSEMBLER__
+#include <linux/sched.h>
+#include <linux/threads.h>
+
+/* Bits to mask out from a PGD to get to the PUD page */
+#define PGD_MASKED_BITS 0
+
+#define pgd_ERROR(e) \
+ pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
+/*
+ * Bits in a linux-style PTE. These match the bits in the
+ * (hardware-defined) PowerPC PTE as closely as possible.
+ */
+
+#define pte_clear(mm, addr, ptep) \
+ do { pte_update(mm, addr, ptep, ~_PAGE_HASHPTE, 0, 0); } while (0)
+
+#define pmd_none(pmd) (!pmd_val(pmd))
+#define pmd_bad(pmd) (pmd_val(pmd) & _PMD_BAD)
+#define pmd_present(pmd) (pmd_val(pmd) & _PMD_PRESENT_MASK)
+static inline void pmd_clear(pmd_t *pmdp)
+{
+ *pmdp = __pmd(0);
+}
+
+
+/*
+ * When flushing the tlb entry for a page, we also need to flush the hash
+ * table entry. flush_hash_pages is assembler (for speed) in hashtable.S.
+ */
+extern int flush_hash_pages(unsigned context, unsigned long va,
+ unsigned long pmdval, int count);
+
+/* Add an HPTE to the hash table */
+extern void add_hash_page(unsigned context, unsigned long va,
+ unsigned long pmdval);
+
+/* Flush an entry from the TLB/hash table */
+static inline void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, unsigned long addr)
+{
+ if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) {
+ unsigned long ptephys = __pa(ptep) & PAGE_MASK;
+
+ flush_hash_pages(mm->context.id, addr, ptephys, 1);
+ }
+}
+
+/*
+ * PTE updates. This function is called whenever an existing
+ * valid PTE is updated. This does -not- include set_pte_at()
+ * which nowadays only sets a new PTE.
+ *
+ * Depending on the type of MMU, we may need to use atomic updates
+ * and the PTE may be either 32 or 64 bit wide. In the later case,
+ * when using atomic updates, only the low part of the PTE is
+ * accessed atomically.
+ */
+static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p,
+ unsigned long clr, unsigned long set, int huge)
+{
+ pte_basic_t old;
+
+ if (mmu_has_feature(MMU_FTR_HPTE_TABLE)) {
+ unsigned long tmp;
+
+ asm volatile(
+#ifndef CONFIG_PTE_64BIT
+ "1: lwarx %0, 0, %3\n"
+ " andc %1, %0, %4\n"
+#else
+ "1: lwarx %L0, 0, %3\n"
+ " lwz %0, -4(%3)\n"
+ " andc %1, %L0, %4\n"
+#endif
+ " or %1, %1, %5\n"
+ " stwcx. %1, 0, %3\n"
+ " bne- 1b"
+ : "=&r" (old), "=&r" (tmp), "=m" (*p)
+#ifndef CONFIG_PTE_64BIT
+ : "r" (p),
+#else
+ : "b" ((unsigned long)(p) + 4),
+#endif
+ "r" (clr), "r" (set), "m" (*p)
+ : "cc" );
+ } else {
+ old = pte_val(*p);
+
+ *p = __pte((old & ~(pte_basic_t)clr) | set);
+ }
+
+ return old;
+}
+
+/*
+ * 2.6 calls this without flushing the TLB entry; this is wrong
+ * for our hash-based implementation, we fix that up here.
+ */
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ unsigned long old;
+ old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0);
+ if (old & _PAGE_HASHPTE)
+ flush_hash_entry(mm, ptep, addr);
+
+ return (old & _PAGE_ACCESSED) != 0;
+}
+#define ptep_test_and_clear_young(__vma, __addr, __ptep) \
+ __ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep)
+
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep)
+{
+ return __pte(pte_update(mm, addr, ptep, ~_PAGE_HASHPTE, 0, 0));
+}
+
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep)
+{
+ pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0);
+}
+
+static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
+ pte_t *ptep, pte_t entry,
+ unsigned long address,
+ int psize)
+{
+ unsigned long set = pte_val(entry) &
+ (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
+
+ pte_update(vma->vm_mm, address, ptep, 0, set, 0);
+
+ flush_tlb_page(vma, address);
+}
+
+#define __HAVE_ARCH_PTE_SAME
+#define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HASHPTE) == 0)
+
+#define pmd_pfn(pmd) (pmd_val(pmd) >> PAGE_SHIFT)
+#define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd))
+
+/*
+ * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
+ * are !pte_none() && !pte_present().
+ *
+ * Format of swap PTEs (32bit PTEs):
+ *
+ * 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 3 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * <----------------- offset --------------------> < type -> E H P
+ *
+ * E is the exclusive marker that is not stored in swap entries.
+ * _PAGE_PRESENT (P) and __PAGE_HASHPTE (H) must be 0.
+ *
+ * For 64bit PTEs, the offset is extended by 32bit.
+ */
+#define __swp_type(entry) ((entry).val & 0x1f)
+#define __swp_offset(entry) ((entry).val >> 5)
+#define __swp_entry(type, offset) ((swp_entry_t) { ((type) & 0x1f) | ((offset) << 5) })
+#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 })
+#define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 })
+
+static inline bool pte_swp_exclusive(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
+}
+
+static inline pte_t pte_swp_mkexclusive(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_SWP_EXCLUSIVE);
+}
+
+static inline pte_t pte_swp_clear_exclusive(pte_t pte)
+{
+ return __pte(pte_val(pte) & ~_PAGE_SWP_EXCLUSIVE);
+}
+
+/* Generic accessors to PTE bits */
+static inline bool pte_read(pte_t pte)
+{
+ return !!(pte_val(pte) & _PAGE_READ);
+}
+
+static inline bool pte_write(pte_t pte)
+{
+ return !!(pte_val(pte) & _PAGE_WRITE);
+}
+
+static inline int pte_dirty(pte_t pte) { return !!(pte_val(pte) & _PAGE_DIRTY); }
+static inline int pte_young(pte_t pte) { return !!(pte_val(pte) & _PAGE_ACCESSED); }
+static inline int pte_special(pte_t pte) { return !!(pte_val(pte) & _PAGE_SPECIAL); }
+static inline int pte_none(pte_t pte) { return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; }
+static inline bool pte_exec(pte_t pte) { return pte_val(pte) & _PAGE_EXEC; }
+
+static inline int pte_present(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_PRESENT;
+}
+
+static inline bool pte_hw_valid(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_PRESENT;
+}
+
+static inline bool pte_hashpte(pte_t pte)
+{
+ return !!(pte_val(pte) & _PAGE_HASHPTE);
+}
+
+static inline bool pte_ci(pte_t pte)
+{
+ return !!(pte_val(pte) & _PAGE_NO_CACHE);
+}
+
+/*
+ * We only find page table entry in the last level
+ * Hence no need for other accessors
+ */
+#define pte_access_permitted pte_access_permitted
+static inline bool pte_access_permitted(pte_t pte, bool write)
+{
+ /*
+ * A read-only access is controlled by _PAGE_READ bit.
+ * We have _PAGE_READ set for WRITE
+ */
+ if (!pte_present(pte) || !pte_read(pte))
+ return false;
+
+ if (write && !pte_write(pte))
+ return false;
+
+ return true;
+}
+
+/* Conversion functions: convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
+ *
+ * Even if PTEs can be unsigned long long, a PFN is always an unsigned
+ * long for now.
+ */
+static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
+{
+ return __pte(((pte_basic_t)(pfn) << PTE_RPN_SHIFT) |
+ pgprot_val(pgprot));
+}
+
+/* Generic modifiers for PTE bits */
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+ return __pte(pte_val(pte) & ~_PAGE_WRITE);
+}
+
+static inline pte_t pte_exprotect(pte_t pte)
+{
+ return __pte(pte_val(pte) & ~_PAGE_EXEC);
+}
+
+static inline pte_t pte_mkclean(pte_t pte)
+{
+ return __pte(pte_val(pte) & ~_PAGE_DIRTY);
+}
+
+static inline pte_t pte_mkold(pte_t pte)
+{
+ return __pte(pte_val(pte) & ~_PAGE_ACCESSED);
+}
+
+static inline pte_t pte_mkexec(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_EXEC);
+}
+
+static inline pte_t pte_mkpte(pte_t pte)
+{
+ return pte;
+}
+
+static inline pte_t pte_mkwrite_novma(pte_t pte)
+{
+ /*
+ * write implies read, hence set both
+ */
+ return __pte(pte_val(pte) | _PAGE_RW);
+}
+
+static inline pte_t pte_mkdirty(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_DIRTY);
+}
+
+static inline pte_t pte_mkyoung(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_ACCESSED);
+}
+
+static inline pte_t pte_mkspecial(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_SPECIAL);
+}
+
+static inline pte_t pte_mkhuge(pte_t pte)
+{
+ return pte;
+}
+
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+ return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
+}
+
+
+
+/* This low level function performs the actual PTE insertion
+ * Setting the PTE depends on the MMU type and other factors.
+ *
+ * First case is 32-bit in UP mode with 32-bit PTEs, we need to preserve
+ * the _PAGE_HASHPTE bit since we may not have invalidated the previous
+ * translation in the hash yet (done in a subsequent flush_tlb_xxx())
+ * and see we need to keep track that this PTE needs invalidating.
+ *
+ * Second case is 32-bit with 64-bit PTE. In this case, we
+ * can just store as long as we do the two halves in the right order
+ * with a barrier in between. This is possible because we take care,
+ * in the hash code, to pre-invalidate if the PTE was already hashed,
+ * which synchronizes us with any concurrent invalidation.
+ * In the percpu case, we fallback to the simple update preserving
+ * the hash bits (ie, same as the non-SMP case).
+ *
+ * Third case is 32-bit in SMP mode with 32-bit PTEs. We use the
+ * helper pte_update() which does an atomic update. We need to do that
+ * because a concurrent invalidation can clear _PAGE_HASHPTE. If it's a
+ * per-CPU PTE such as a kmap_atomic, we also do a simple update preserving
+ * the hash bits instead.
+ */
+static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte, int percpu)
+{
+ if ((!IS_ENABLED(CONFIG_SMP) && !IS_ENABLED(CONFIG_PTE_64BIT)) || percpu) {
+ *ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE) |
+ (pte_val(pte) & ~_PAGE_HASHPTE));
+ } else if (IS_ENABLED(CONFIG_PTE_64BIT)) {
+ if (pte_val(*ptep) & _PAGE_HASHPTE)
+ flush_hash_entry(mm, ptep, addr);
+
+ asm volatile("stw%X0 %2,%0; eieio; stw%X1 %L2,%1" :
+ "=m" (*ptep), "=m" (*((unsigned char *)ptep+4)) :
+ "r" (pte) : "memory");
+ } else {
+ pte_update(mm, addr, ptep, ~_PAGE_HASHPTE, pte_val(pte), 0);
+ }
+}
+
+/*
+ * Macro to mark a page protection value as "uncacheable".
+ */
+
+#define _PAGE_CACHE_CTL (_PAGE_COHERENT | _PAGE_GUARDED | _PAGE_NO_CACHE | \
+ _PAGE_WRITETHRU)
+
+#define pgprot_noncached pgprot_noncached
+static inline pgprot_t pgprot_noncached(pgprot_t prot)
+{
+ return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
+ _PAGE_NO_CACHE | _PAGE_GUARDED);
+}
+
+#define pgprot_noncached_wc pgprot_noncached_wc
+static inline pgprot_t pgprot_noncached_wc(pgprot_t prot)
+{
+ return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
+ _PAGE_NO_CACHE);
+}
+
+#define pgprot_cached pgprot_cached
+static inline pgprot_t pgprot_cached(pgprot_t prot)
+{
+ return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
+ _PAGE_COHERENT);
+}
+
+#define pgprot_cached_wthru pgprot_cached_wthru
+static inline pgprot_t pgprot_cached_wthru(pgprot_t prot)
+{
+ return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
+ _PAGE_COHERENT | _PAGE_WRITETHRU);
+}
+
+#define pgprot_cached_noncoherent pgprot_cached_noncoherent
+static inline pgprot_t pgprot_cached_noncoherent(pgprot_t prot)
+{
+ return __pgprot(pgprot_val(prot) & ~_PAGE_CACHE_CTL);
+}
+
+#define pgprot_writecombine pgprot_writecombine
+static inline pgprot_t pgprot_writecombine(pgprot_t prot)
+{
+ return pgprot_noncached_wc(prot);
+}
+
+#endif /* !__ASSEMBLER__ */
+
+#endif /* _ASM_POWERPC_BOOK3S_32_PGTABLE_H */
diff --git a/arch/powerpc/include/asm/book3s/32/tlbflush.h b/arch/powerpc/include/asm/book3s/32/tlbflush.h
new file mode 100644
index 000000000000..e43534da5207
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/32/tlbflush.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_32_TLBFLUSH_H
+#define _ASM_POWERPC_BOOK3S_32_TLBFLUSH_H
+
+#include <linux/build_bug.h>
+
+#define MMU_NO_CONTEXT (0)
+/*
+ * TLB flushing for "classic" hash-MMU 32-bit CPUs, 6xx, 7xx, 7xxx
+ */
+void hash__flush_tlb_mm(struct mm_struct *mm);
+void hash__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
+void hash__flush_range(struct mm_struct *mm, unsigned long start, unsigned long end);
+
+#ifdef CONFIG_SMP
+void _tlbie(unsigned long address);
+#else
+static inline void _tlbie(unsigned long address)
+{
+ asm volatile ("tlbie %0; sync" : : "r" (address) : "memory");
+}
+#endif
+void _tlbia(void);
+
+/*
+ * Called at the end of a mmu_gather operation to make sure the
+ * TLB flush is completely done.
+ */
+static inline void tlb_flush(struct mmu_gather *tlb)
+{
+ /* 603 needs to flush the whole TLB here since it doesn't use a hash table. */
+ if (!mmu_has_feature(MMU_FTR_HPTE_TABLE))
+ _tlbia();
+}
+
+static inline void flush_range(struct mm_struct *mm, unsigned long start, unsigned long end)
+{
+ start &= PAGE_MASK;
+ if (mmu_has_feature(MMU_FTR_HPTE_TABLE))
+ hash__flush_range(mm, start, end);
+ else if (end - start <= PAGE_SIZE)
+ _tlbie(start);
+ else
+ _tlbia();
+}
+
+static inline void flush_tlb_mm(struct mm_struct *mm)
+{
+ if (mmu_has_feature(MMU_FTR_HPTE_TABLE))
+ hash__flush_tlb_mm(mm);
+ else
+ _tlbia();
+}
+
+static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
+{
+ if (mmu_has_feature(MMU_FTR_HPTE_TABLE))
+ hash__flush_tlb_page(vma, vmaddr);
+ else
+ _tlbie(vmaddr);
+}
+
+static inline void
+flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
+{
+ flush_range(vma->vm_mm, start, end);
+}
+
+static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+ flush_range(&init_mm, start, end);
+}
+
+static inline void local_flush_tlb_page(struct vm_area_struct *vma,
+ unsigned long vmaddr)
+{
+ flush_tlb_page(vma, vmaddr);
+}
+
+static inline void local_flush_tlb_page_psize(struct mm_struct *mm,
+ unsigned long vmaddr, int psize)
+{
+ flush_range(mm, vmaddr, vmaddr);
+}
+
+static inline void local_flush_tlb_mm(struct mm_struct *mm)
+{
+ flush_tlb_mm(mm);
+}
+
+#endif /* _ASM_POWERPC_BOOK3S_32_TLBFLUSH_H */
diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
new file mode 100644
index 000000000000..8e5bd9902bed
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -0,0 +1,173 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_64_HASH_4K_H
+#define _ASM_POWERPC_BOOK3S_64_HASH_4K_H
+
+#define H_PTE_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps: 2^9 x 4KB = 2MB
+#define H_PMD_INDEX_SIZE 7 // size: 8B << 7 = 1KB, maps: 2^7 x 2MB = 256MB
+#define H_PUD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps: 2^9 x 256MB = 128GB
+#define H_PGD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps: 2^9 x 128GB = 64TB
+
+/*
+ * Each context is 512TB. But on 4k we restrict our max TASK size to 64TB
+ * Hence also limit max EA bits to 64TB.
+ */
+#define MAX_EA_BITS_PER_CONTEXT 46
+
+
+/*
+ * Our page table limit us to 64TB. For 64TB physical memory, we only need 64GB
+ * of vmemmap space. To better support sparse memory layout, we use 61TB
+ * linear map range, 1TB of vmalloc, 1TB of I/O and 1TB of vmememmap.
+ */
+#define REGION_SHIFT (40)
+#define H_KERN_MAP_SIZE (ASM_CONST(1) << REGION_SHIFT)
+
+/*
+ * Limits the linear mapping range
+ */
+#define H_MAX_PHYSMEM_BITS 46
+
+/*
+ * Define the address range of the kernel non-linear virtual area (61TB)
+ */
+#define H_KERN_VIRT_START ASM_CONST(0xc0003d0000000000)
+
+#ifndef __ASSEMBLER__
+#define H_PTE_TABLE_SIZE (sizeof(pte_t) << H_PTE_INDEX_SIZE)
+#define H_PMD_TABLE_SIZE (sizeof(pmd_t) << H_PMD_INDEX_SIZE)
+#define H_PUD_TABLE_SIZE (sizeof(pud_t) << H_PUD_INDEX_SIZE)
+#define H_PGD_TABLE_SIZE (sizeof(pgd_t) << H_PGD_INDEX_SIZE)
+
+#define H_PAGE_F_GIX_SHIFT _PAGE_PA_MAX
+#define H_PAGE_F_SECOND _RPAGE_PKEY_BIT0 /* HPTE is in 2ndary HPTEG */
+#define H_PAGE_F_GIX (_RPAGE_RPN43 | _RPAGE_RPN42 | _RPAGE_RPN41)
+#define H_PAGE_BUSY _RPAGE_RSV1
+#define H_PAGE_HASHPTE _RPAGE_PKEY_BIT4
+
+/* PTE flags to conserve for HPTE identification */
+#define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_HASHPTE | \
+ H_PAGE_F_SECOND | H_PAGE_F_GIX)
+/*
+ * Not supported by 4k linux page size
+ */
+#define H_PAGE_4K_PFN 0x0
+#define H_PAGE_THP_HUGE 0x0
+#define H_PAGE_COMBO 0x0
+
+/* 8 bytes per each pte entry */
+#define H_PTE_FRAG_SIZE_SHIFT (H_PTE_INDEX_SIZE + 3)
+#define H_PTE_FRAG_NR (PAGE_SIZE >> H_PTE_FRAG_SIZE_SHIFT)
+#define H_PMD_FRAG_SIZE_SHIFT (H_PMD_INDEX_SIZE + 3)
+#define H_PMD_FRAG_NR (PAGE_SIZE >> H_PMD_FRAG_SIZE_SHIFT)
+
+/* memory key bits, only 8 keys supported */
+#define H_PTE_PKEY_BIT4 0
+#define H_PTE_PKEY_BIT3 0
+#define H_PTE_PKEY_BIT2 _RPAGE_PKEY_BIT3
+#define H_PTE_PKEY_BIT1 _RPAGE_PKEY_BIT2
+#define H_PTE_PKEY_BIT0 _RPAGE_PKEY_BIT1
+
+
+/*
+ * On all 4K setups, remap_4k_pfn() equates to remap_pfn_range()
+ */
+#define remap_4k_pfn(vma, addr, pfn, prot) \
+ remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE, (prot))
+
+/*
+ * With 4K page size the real_pte machinery is all nops.
+ */
+static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep, int offset)
+{
+ return (real_pte_t){pte};
+}
+
+#define __rpte_to_pte(r) ((r).pte)
+
+static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index)
+{
+ return pte_val(__rpte_to_pte(rpte)) >> H_PAGE_F_GIX_SHIFT;
+}
+
+#define pte_iterate_hashed_subpages(rpte, psize, va, index, shift) \
+ do { \
+ index = 0; \
+ shift = mmu_psize_defs[psize].shift; \
+
+#define pte_iterate_hashed_end() } while(0)
+
+/*
+ * We expect this to be called only for user addresses or kernel virtual
+ * addresses other than the linear mapping.
+ */
+#define pte_pagesize_index(mm, addr, pte) MMU_PAGE_4K
+
+/*
+ * 4K PTE format is different from 64K PTE format. Saving the hash_slot is just
+ * a matter of returning the PTE bits that need to be modified. On 64K PTE,
+ * things are a little more involved and hence needs many more parameters to
+ * accomplish the same. However we want to abstract this out from the caller by
+ * keeping the prototype consistent across the two formats.
+ */
+static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte,
+ unsigned int subpg_index, unsigned long hidx,
+ int offset)
+{
+ return (hidx << H_PAGE_F_GIX_SHIFT) &
+ (H_PAGE_F_SECOND | H_PAGE_F_GIX);
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+
+static inline char *get_hpte_slot_array(pmd_t *pmdp)
+{
+ BUG();
+ return NULL;
+}
+
+static inline unsigned int hpte_valid(unsigned char *hpte_slot_array, int index)
+{
+ BUG();
+ return 0;
+}
+
+static inline unsigned int hpte_hash_index(unsigned char *hpte_slot_array,
+ int index)
+{
+ BUG();
+ return 0;
+}
+
+static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array,
+ unsigned int index, unsigned int hidx)
+{
+ BUG();
+}
+
+static inline int hash__pmd_trans_huge(pmd_t pmd)
+{
+ return 0;
+}
+
+static inline pmd_t hash__pmd_mkhuge(pmd_t pmd)
+{
+ BUG();
+ return pmd;
+}
+
+extern unsigned long hash__pmd_hugepage_update(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp,
+ unsigned long clr, unsigned long set);
+extern pmd_t hash__pmdp_collapse_flush(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp);
+extern void hash__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+ pgtable_t pgtable);
+extern pgtable_t hash__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
+extern pmd_t hash__pmdp_huge_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp);
+extern int hash__has_transparent_hugepage(void);
+#endif
+
+#endif /* !__ASSEMBLER__ */
+
+#endif /* _ASM_POWERPC_BOOK3S_64_HASH_4K_H */
diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
new file mode 100644
index 000000000000..7deb3a66890b
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -0,0 +1,286 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_64_HASH_64K_H
+#define _ASM_POWERPC_BOOK3S_64_HASH_64K_H
+
+#define H_PTE_INDEX_SIZE 8 // size: 8B << 8 = 2KB, maps 2^8 x 64KB = 16MB
+#define H_PMD_INDEX_SIZE 10 // size: 8B << 10 = 8KB, maps 2^10 x 16MB = 16GB
+#define H_PUD_INDEX_SIZE 10 // size: 8B << 10 = 8KB, maps 2^10 x 16GB = 16TB
+#define H_PGD_INDEX_SIZE 8 // size: 8B << 8 = 2KB, maps 2^8 x 16TB = 4PB
+
+/*
+ * If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS
+ * if we increase SECTIONS_WIDTH we will not store node details in page->flags and
+ * page_to_nid does a page->section->node lookup
+ * Hence only increase for VMEMMAP. Further depending on SPARSEMEM_EXTREME reduce
+ * memory requirements with large number of sections.
+ * 51 bits is the max physical real address on POWER9
+ */
+#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME)
+#define H_MAX_PHYSMEM_BITS 51
+#else
+#define H_MAX_PHYSMEM_BITS 46
+#endif
+
+/*
+ * Each context is 512TB size. SLB miss for first context/default context
+ * is handled in the hotpath.
+ */
+#define MAX_EA_BITS_PER_CONTEXT 49
+#define REGION_SHIFT MAX_EA_BITS_PER_CONTEXT
+
+/*
+ * We use one context for each MAP area.
+ */
+#define H_KERN_MAP_SIZE (1UL << MAX_EA_BITS_PER_CONTEXT)
+
+/*
+ * Define the address range of the kernel non-linear virtual area
+ * 2PB
+ */
+#define H_KERN_VIRT_START ASM_CONST(0xc008000000000000)
+
+/*
+ * 64k aligned address free up few of the lower bits of RPN for us
+ * We steal that here. For more deatils look at pte_pfn/pfn_pte()
+ */
+#define H_PAGE_COMBO _RPAGE_RPN0 /* this is a combo 4k page */
+#define H_PAGE_4K_PFN _RPAGE_RPN1 /* PFN is for a single 4k page */
+#define H_PAGE_BUSY _RPAGE_RSV1 /* software: PTE & hash are busy */
+#define H_PAGE_HASHPTE _RPAGE_RPN43 /* PTE has associated HPTE */
+
+/* memory key bits. */
+#define H_PTE_PKEY_BIT4 _RPAGE_PKEY_BIT4
+#define H_PTE_PKEY_BIT3 _RPAGE_PKEY_BIT3
+#define H_PTE_PKEY_BIT2 _RPAGE_PKEY_BIT2
+#define H_PTE_PKEY_BIT1 _RPAGE_PKEY_BIT1
+#define H_PTE_PKEY_BIT0 _RPAGE_PKEY_BIT0
+
+/*
+ * We need to differentiate between explicit huge page and THP huge
+ * page, since THP huge page also need to track real subpage details
+ */
+#define H_PAGE_THP_HUGE H_PAGE_4K_PFN
+
+/* PTE flags to conserve for HPTE identification */
+#define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_HASHPTE | H_PAGE_COMBO)
+/*
+ * We use a 2K PTE page fragment and another 2K for storing
+ * real_pte_t hash index
+ * 8 bytes per each pte entry and another 8 bytes for storing
+ * slot details.
+ */
+#define H_PTE_FRAG_SIZE_SHIFT (H_PTE_INDEX_SIZE + 3 + 1)
+#define H_PTE_FRAG_NR (PAGE_SIZE >> H_PTE_FRAG_SIZE_SHIFT)
+
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)
+#define H_PMD_FRAG_SIZE_SHIFT (H_PMD_INDEX_SIZE + 3 + 1)
+#else
+#define H_PMD_FRAG_SIZE_SHIFT (H_PMD_INDEX_SIZE + 3)
+#endif
+#define H_PMD_FRAG_NR (PAGE_SIZE >> H_PMD_FRAG_SIZE_SHIFT)
+
+#ifndef __ASSEMBLER__
+#include <asm/errno.h>
+
+/*
+ * With 64K pages on hash table, we have a special PTE format that
+ * uses a second "half" of the page table to encode sub-page information
+ * in order to deal with 64K made of 4K HW pages. Thus we override the
+ * generic accessors and iterators here
+ */
+#define __real_pte __real_pte
+static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep, int offset)
+{
+ real_pte_t rpte;
+ unsigned long *hidxp;
+
+ rpte.pte = pte;
+
+ /*
+ * Ensure that we do not read the hidx before we read the PTE. Because
+ * the writer side is expected to finish writing the hidx first followed
+ * by the PTE, by using smp_wmb(). pte_set_hash_slot() ensures that.
+ */
+ smp_rmb();
+
+ hidxp = (unsigned long *)(ptep + offset);
+ rpte.hidx = *hidxp;
+ return rpte;
+}
+
+/*
+ * shift the hidx representation by one-modulo-0xf; i.e hidx 0 is respresented
+ * as 1, 1 as 2,... , and 0xf as 0. This convention lets us represent a
+ * invalid hidx 0xf with a 0x0 bit value. PTEs are anyway zero'd when
+ * allocated. We dont have to zero them gain; thus save on the initialization.
+ */
+#define HIDX_UNSHIFT_BY_ONE(x) ((x + 0xfUL) & 0xfUL) /* shift backward by one */
+#define HIDX_SHIFT_BY_ONE(x) ((x + 0x1UL) & 0xfUL) /* shift forward by one */
+#define HIDX_BITS(x, index) (x << (index << 2))
+#define BITS_TO_HIDX(x, index) ((x >> (index << 2)) & 0xfUL)
+#define INVALID_RPTE_HIDX 0x0UL
+
+static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index)
+{
+ return HIDX_UNSHIFT_BY_ONE(BITS_TO_HIDX(rpte.hidx, index));
+}
+
+/*
+ * Commit the hidx and return PTE bits that needs to be modified. The caller is
+ * expected to modify the PTE bits accordingly and commit the PTE to memory.
+ */
+static inline unsigned long pte_set_hidx(pte_t *ptep, real_pte_t rpte,
+ unsigned int subpg_index,
+ unsigned long hidx, int offset)
+{
+ unsigned long *hidxp = (unsigned long *)(ptep + offset);
+
+ rpte.hidx &= ~HIDX_BITS(0xfUL, subpg_index);
+ *hidxp = rpte.hidx | HIDX_BITS(HIDX_SHIFT_BY_ONE(hidx), subpg_index);
+
+ /*
+ * Anyone reading PTE must ensure hidx bits are read after reading the
+ * PTE by using the read-side barrier smp_rmb(). __real_pte() can be
+ * used for that.
+ */
+ smp_wmb();
+
+ /* No PTE bits to be modified, return 0x0UL */
+ return 0x0UL;
+}
+
+#define __rpte_to_pte(r) ((r).pte)
+extern bool __rpte_sub_valid(real_pte_t rpte, unsigned long index);
+/*
+ * Trick: we set __end to va + 64k, which happens works for
+ * a 16M page as well as we want only one iteration
+ */
+#define pte_iterate_hashed_subpages(rpte, psize, vpn, index, shift) \
+ do { \
+ unsigned long __end = vpn + (1UL << (PAGE_SHIFT - VPN_SHIFT)); \
+ unsigned __split = (psize == MMU_PAGE_4K || \
+ psize == MMU_PAGE_64K_AP); \
+ shift = mmu_psize_defs[psize].shift; \
+ for (index = 0; vpn < __end; index++, \
+ vpn += (1L << (shift - VPN_SHIFT))) { \
+ if (!__split || __rpte_sub_valid(rpte, index))
+
+#define pte_iterate_hashed_end() } } while(0)
+
+#define pte_pagesize_index(mm, addr, pte) \
+ (((pte) & H_PAGE_COMBO)? MMU_PAGE_4K: MMU_PAGE_64K)
+
+extern int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
+ unsigned long pfn, unsigned long size, pgprot_t);
+static inline int hash__remap_4k_pfn(struct vm_area_struct *vma, unsigned long addr,
+ unsigned long pfn, pgprot_t prot)
+{
+ if (pfn > (PTE_RPN_MASK >> PAGE_SHIFT)) {
+ WARN(1, "remap_4k_pfn called with wrong pfn value\n");
+ return -EINVAL;
+ }
+ return remap_pfn_range(vma, addr, pfn, PAGE_SIZE,
+ __pgprot(pgprot_val(prot) | H_PAGE_4K_PFN));
+}
+
+#define H_PTE_TABLE_SIZE PTE_FRAG_SIZE
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined (CONFIG_HUGETLB_PAGE)
+#define H_PMD_TABLE_SIZE ((sizeof(pmd_t) << PMD_INDEX_SIZE) + \
+ (sizeof(unsigned long) << PMD_INDEX_SIZE))
+#else
+#define H_PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE)
+#endif
+#ifdef CONFIG_HUGETLB_PAGE
+#define H_PUD_TABLE_SIZE ((sizeof(pud_t) << PUD_INDEX_SIZE) + \
+ (sizeof(unsigned long) << PUD_INDEX_SIZE))
+#else
+#define H_PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE)
+#endif
+#define H_PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE)
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline char *get_hpte_slot_array(pmd_t *pmdp)
+{
+ /*
+ * The hpte hindex is stored in the pgtable whose address is in the
+ * second half of the PMD
+ *
+ * Order this load with the test for pmd_trans_huge in the caller
+ */
+ smp_rmb();
+ return *(char **)(pmdp + PTRS_PER_PMD);
+
+
+}
+/*
+ * The linux hugepage PMD now include the pmd entries followed by the address
+ * to the stashed pgtable_t. The stashed pgtable_t contains the hpte bits.
+ * [ 000 | 1 bit secondary | 3 bit hidx | 1 bit valid]. We use one byte per
+ * each HPTE entry. With 16MB hugepage and 64K HPTE we need 256 entries and
+ * with 4K HPTE we need 4096 entries. Both will fit in a 4K pgtable_t.
+ *
+ * The top three bits are intentionally left as zero. This memory location
+ * are also used as normal page PTE pointers. So if we have any pointers
+ * left around while we collapse a hugepage, we need to make sure
+ * _PAGE_PRESENT bit of that is zero when we look at them
+ */
+static inline unsigned int hpte_valid(unsigned char *hpte_slot_array, int index)
+{
+ return hpte_slot_array[index] & 0x1;
+}
+
+static inline unsigned int hpte_hash_index(unsigned char *hpte_slot_array,
+ int index)
+{
+ return hpte_slot_array[index] >> 1;
+}
+
+static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array,
+ unsigned int index, unsigned int hidx)
+{
+ hpte_slot_array[index] = (hidx << 1) | 0x1;
+}
+
+/*
+ *
+ * For core kernel code by design pmd_trans_huge is never run on any hugetlbfs
+ * page. The hugetlbfs page table walking and mangling paths are totally
+ * separated form the core VM paths and they're differentiated by
+ * VM_HUGETLB being set on vm_flags well before any pmd_trans_huge could run.
+ *
+ * pmd_trans_huge() is defined as false at build time if
+ * CONFIG_TRANSPARENT_HUGEPAGE=n to optimize away code blocks at build
+ * time in such case.
+ *
+ * For ppc64 we need to differntiate from explicit hugepages from THP, because
+ * for THP we also track the subpage details at the pmd level. We don't do
+ * that for explicit huge pages.
+ *
+ */
+static inline int hash__pmd_trans_huge(pmd_t pmd)
+{
+ return !!((pmd_val(pmd) & (_PAGE_PTE | H_PAGE_THP_HUGE)) ==
+ (_PAGE_PTE | H_PAGE_THP_HUGE));
+}
+
+static inline pmd_t hash__pmd_mkhuge(pmd_t pmd)
+{
+ return __pmd(pmd_val(pmd) | (_PAGE_PTE | H_PAGE_THP_HUGE));
+}
+
+extern unsigned long hash__pmd_hugepage_update(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp,
+ unsigned long clr, unsigned long set);
+extern pmd_t hash__pmdp_collapse_flush(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp);
+extern void hash__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+ pgtable_t pgtable);
+extern pgtable_t hash__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
+extern pmd_t hash__pmdp_huge_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp);
+extern int hash__has_transparent_hugepage(void);
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+#endif /* __ASSEMBLER__ */
+
+#endif /* _ASM_POWERPC_BOOK3S_64_HASH_64K_H */
diff --git a/arch/powerpc/include/asm/book3s/64/hash-pkey.h b/arch/powerpc/include/asm/book3s/64/hash-pkey.h
new file mode 100644
index 000000000000..6c5564c4fae4
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/hash-pkey.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_64_HASH_PKEY_H
+#define _ASM_POWERPC_BOOK3S_64_HASH_PKEY_H
+
+/* We use key 3 for KERNEL */
+#define HASH_DEFAULT_KERNEL_KEY (HPTE_R_KEY_BIT0 | HPTE_R_KEY_BIT1)
+
+static inline u64 hash__vmflag_to_pte_pkey_bits(u64 vm_flags)
+{
+ return (((vm_flags & VM_PKEY_BIT0) ? H_PTE_PKEY_BIT0 : 0x0UL) |
+ ((vm_flags & VM_PKEY_BIT1) ? H_PTE_PKEY_BIT1 : 0x0UL) |
+ ((vm_flags & VM_PKEY_BIT2) ? H_PTE_PKEY_BIT2 : 0x0UL) |
+ ((vm_flags & VM_PKEY_BIT3) ? H_PTE_PKEY_BIT3 : 0x0UL) |
+ ((vm_flags & VM_PKEY_BIT4) ? H_PTE_PKEY_BIT4 : 0x0UL));
+}
+
+static inline u64 pte_to_hpte_pkey_bits(u64 pteflags, unsigned long flags)
+{
+ unsigned long pte_pkey;
+
+ pte_pkey = (((pteflags & H_PTE_PKEY_BIT4) ? HPTE_R_KEY_BIT4 : 0x0UL) |
+ ((pteflags & H_PTE_PKEY_BIT3) ? HPTE_R_KEY_BIT3 : 0x0UL) |
+ ((pteflags & H_PTE_PKEY_BIT2) ? HPTE_R_KEY_BIT2 : 0x0UL) |
+ ((pteflags & H_PTE_PKEY_BIT1) ? HPTE_R_KEY_BIT1 : 0x0UL) |
+ ((pteflags & H_PTE_PKEY_BIT0) ? HPTE_R_KEY_BIT0 : 0x0UL));
+
+ if (mmu_has_feature(MMU_FTR_KUAP) ||
+ mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) {
+ if ((pte_pkey == 0) && (flags & HPTE_USE_KERNEL_KEY))
+ return HASH_DEFAULT_KERNEL_KEY;
+ }
+
+ return pte_pkey;
+}
+
+static inline u16 hash__pte_to_pkey_bits(u64 pteflags)
+{
+ return (((pteflags & H_PTE_PKEY_BIT4) ? 0x10 : 0x0UL) |
+ ((pteflags & H_PTE_PKEY_BIT3) ? 0x8 : 0x0UL) |
+ ((pteflags & H_PTE_PKEY_BIT2) ? 0x4 : 0x0UL) |
+ ((pteflags & H_PTE_PKEY_BIT1) ? 0x2 : 0x0UL) |
+ ((pteflags & H_PTE_PKEY_BIT0) ? 0x1 : 0x0UL));
+}
+
+#endif
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
new file mode 100644
index 000000000000..5a8cbd496731
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -0,0 +1,300 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_64_HASH_H
+#define _ASM_POWERPC_BOOK3S_64_HASH_H
+#ifdef __KERNEL__
+
+#include <asm/asm-const.h>
+#include <asm/book3s/64/slice.h>
+
+/*
+ * Common bits between 4K and 64K pages in a linux-style PTE.
+ * Additional bits may be defined in pgtable-hash64-*.h
+ *
+ */
+#define H_PTE_NONE_MASK _PAGE_HPTEFLAGS
+
+#ifdef CONFIG_PPC_64K_PAGES
+#include <asm/book3s/64/hash-64k.h>
+#else
+#include <asm/book3s/64/hash-4k.h>
+#endif
+
+#define H_PTRS_PER_PTE (1 << H_PTE_INDEX_SIZE)
+#define H_PTRS_PER_PMD (1 << H_PMD_INDEX_SIZE)
+#define H_PTRS_PER_PUD (1 << H_PUD_INDEX_SIZE)
+
+/* Bits to set in a PMD/PUD/PGD entry valid bit*/
+#define HASH_PMD_VAL_BITS (0x8000000000000000UL)
+#define HASH_PUD_VAL_BITS (0x8000000000000000UL)
+#define HASH_PGD_VAL_BITS (0x8000000000000000UL)
+
+/*
+ * Size of EA range mapped by our pagetables.
+ */
+#define H_PGTABLE_EADDR_SIZE (H_PTE_INDEX_SIZE + H_PMD_INDEX_SIZE + \
+ H_PUD_INDEX_SIZE + H_PGD_INDEX_SIZE + PAGE_SHIFT)
+#define H_PGTABLE_RANGE (ASM_CONST(1) << H_PGTABLE_EADDR_SIZE)
+/*
+ * Top 2 bits are ignored in page table walk.
+ */
+#define EA_MASK (~(0xcUL << 60))
+
+/*
+ * We store the slot details in the second half of page table.
+ * Increase the pud level table so that hugetlb ptes can be stored
+ * at pud level.
+ */
+#if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_PPC_64K_PAGES)
+#define H_PUD_CACHE_INDEX (H_PUD_INDEX_SIZE + 1)
+#else
+#define H_PUD_CACHE_INDEX (H_PUD_INDEX_SIZE)
+#endif
+
+/*
+ * +------------------------------+
+ * | |
+ * | |
+ * | |
+ * +------------------------------+ Kernel virtual map end (0xc00e000000000000)
+ * | |
+ * | |
+ * | 512TB/16TB of vmemmap |
+ * | |
+ * | |
+ * +------------------------------+ Kernel vmemmap start
+ * | |
+ * | 512TB/16TB of IO map |
+ * | |
+ * +------------------------------+ Kernel IO map start
+ * | |
+ * | 512TB/16TB of vmap |
+ * | |
+ * +------------------------------+ Kernel virt start (0xc008000000000000)
+ * | |
+ * | |
+ * | |
+ * +------------------------------+ Kernel linear (0xc.....)
+ */
+
+#define H_VMALLOC_START H_KERN_VIRT_START
+#define H_VMALLOC_SIZE H_KERN_MAP_SIZE
+#define H_VMALLOC_END (H_VMALLOC_START + H_VMALLOC_SIZE)
+
+#define H_KERN_IO_START H_VMALLOC_END
+#define H_KERN_IO_SIZE H_KERN_MAP_SIZE
+#define H_KERN_IO_END (H_KERN_IO_START + H_KERN_IO_SIZE)
+
+#define H_VMEMMAP_START H_KERN_IO_END
+#define H_VMEMMAP_SIZE H_KERN_MAP_SIZE
+#define H_VMEMMAP_END (H_VMEMMAP_START + H_VMEMMAP_SIZE)
+
+#define NON_LINEAR_REGION_ID(ea) ((((unsigned long)ea - H_KERN_VIRT_START) >> REGION_SHIFT) + 2)
+
+/*
+ * Region IDs
+ */
+#define USER_REGION_ID 0
+#define LINEAR_MAP_REGION_ID 1
+#define VMALLOC_REGION_ID NON_LINEAR_REGION_ID(H_VMALLOC_START)
+#define IO_REGION_ID NON_LINEAR_REGION_ID(H_KERN_IO_START)
+#define VMEMMAP_REGION_ID NON_LINEAR_REGION_ID(H_VMEMMAP_START)
+#define INVALID_REGION_ID (VMEMMAP_REGION_ID + 1)
+
+/*
+ * Defines the address of the vmemap area, in its own region on
+ * hash table CPUs.
+ */
+
+/* PTEIDX nibble */
+#define _PTEIDX_SECONDARY 0x8
+#define _PTEIDX_GROUP_IX 0x7
+
+#define H_PMD_BAD_BITS (PTE_TABLE_SIZE-1)
+#define H_PUD_BAD_BITS (PMD_TABLE_SIZE-1)
+
+#ifndef __ASSEMBLER__
+static inline int get_region_id(unsigned long ea)
+{
+ int region_id;
+ int id = (ea >> 60UL);
+
+ if (id == 0)
+ return USER_REGION_ID;
+
+ if (id != (PAGE_OFFSET >> 60))
+ return INVALID_REGION_ID;
+
+ if (ea < H_KERN_VIRT_START)
+ return LINEAR_MAP_REGION_ID;
+
+ BUILD_BUG_ON(NON_LINEAR_REGION_ID(H_VMALLOC_START) != 2);
+
+ region_id = NON_LINEAR_REGION_ID(ea);
+ return region_id;
+}
+
+static inline int hash__pmd_same(pmd_t pmd_a, pmd_t pmd_b)
+{
+ return (((pmd_raw(pmd_a) ^ pmd_raw(pmd_b)) & ~cpu_to_be64(_PAGE_HPTEFLAGS)) == 0);
+}
+
+#define hash__pmd_bad(pmd) (pmd_val(pmd) & H_PMD_BAD_BITS)
+
+/*
+ * pud comparison that will work with both pte and page table pointer.
+ */
+static inline int hash__pud_same(pud_t pud_a, pud_t pud_b)
+{
+ return (((pud_raw(pud_a) ^ pud_raw(pud_b)) & ~cpu_to_be64(_PAGE_HPTEFLAGS)) == 0);
+}
+#define hash__pud_bad(pud) (pud_val(pud) & H_PUD_BAD_BITS)
+
+static inline int hash__p4d_bad(p4d_t p4d)
+{
+ return (p4d_val(p4d) == 0);
+}
+#ifdef CONFIG_STRICT_KERNEL_RWX
+extern void hash__mark_rodata_ro(void);
+extern void hash__mark_initmem_nx(void);
+#endif
+
+extern void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, unsigned long pte, int huge);
+unsigned long htab_convert_pte_flags(unsigned long pteflags, unsigned long flags);
+/* Atomic PTE updates */
+static inline unsigned long hash__pte_update_one(pte_t *ptep, unsigned long clr,
+ unsigned long set)
+{
+ __be64 old_be, tmp_be;
+
+ __asm__ __volatile__(
+ "1: ldarx %0,0,%3 # pte_update\n\
+ and. %1,%0,%6\n\
+ bne- 1b \n\
+ andc %1,%0,%4 \n\
+ or %1,%1,%7\n\
+ stdcx. %1,0,%3 \n\
+ bne- 1b"
+ : "=&r" (old_be), "=&r" (tmp_be), "=m" (*ptep)
+ : "r" (ptep), "r" (cpu_to_be64(clr)), "m" (*ptep),
+ "r" (cpu_to_be64(H_PAGE_BUSY)), "r" (cpu_to_be64(set))
+ : "cc" );
+
+ return be64_to_cpu(old_be);
+}
+
+static inline unsigned long hash__pte_update(struct mm_struct *mm,
+ unsigned long addr,
+ pte_t *ptep, unsigned long clr,
+ unsigned long set,
+ int huge)
+{
+ unsigned long old;
+
+ old = hash__pte_update_one(ptep, clr, set);
+
+ if (IS_ENABLED(CONFIG_PPC_4K_PAGES) && huge) {
+ unsigned int psize = get_slice_psize(mm, addr);
+ int nb, i;
+
+ if (psize == MMU_PAGE_16M)
+ nb = SZ_16M / PMD_SIZE;
+ else if (psize == MMU_PAGE_16G)
+ nb = SZ_16G / PUD_SIZE;
+ else
+ nb = 1;
+
+ WARN_ON_ONCE(nb == 1); /* Should never happen */
+
+ for (i = 1; i < nb; i++)
+ hash__pte_update_one(ptep + i, clr, set);
+ }
+ /* huge pages use the old page table lock */
+ if (!huge)
+ assert_pte_locked(mm, addr);
+
+ if (old & H_PAGE_HASHPTE)
+ hpte_need_flush(mm, addr, ptep, old, huge);
+
+ return old;
+}
+
+/* Set the dirty and/or accessed bits atomically in a linux PTE, this
+ * function doesn't need to flush the hash entry
+ */
+static inline void hash__ptep_set_access_flags(pte_t *ptep, pte_t entry)
+{
+ __be64 old, tmp, val, mask;
+
+ mask = cpu_to_be64(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_READ | _PAGE_WRITE |
+ _PAGE_EXEC | _PAGE_SOFT_DIRTY);
+
+ val = pte_raw(entry) & mask;
+
+ __asm__ __volatile__(
+ "1: ldarx %0,0,%4\n\
+ and. %1,%0,%6\n\
+ bne- 1b \n\
+ or %0,%3,%0\n\
+ stdcx. %0,0,%4\n\
+ bne- 1b"
+ :"=&r" (old), "=&r" (tmp), "=m" (*ptep)
+ :"r" (val), "r" (ptep), "m" (*ptep), "r" (cpu_to_be64(H_PAGE_BUSY))
+ :"cc");
+}
+
+static inline int hash__pte_same(pte_t pte_a, pte_t pte_b)
+{
+ return (((pte_raw(pte_a) ^ pte_raw(pte_b)) & ~cpu_to_be64(_PAGE_HPTEFLAGS)) == 0);
+}
+
+static inline int hash__pte_none(pte_t pte)
+{
+ return (pte_val(pte) & ~H_PTE_NONE_MASK) == 0;
+}
+
+unsigned long pte_get_hash_gslot(unsigned long vpn, unsigned long shift,
+ int ssize, real_pte_t rpte, unsigned int subpg_index);
+
+/* This low level function performs the actual PTE insertion
+ * Setting the PTE depends on the MMU type and other factors. It's
+ * an horrible mess that I'm not going to try to clean up now but
+ * I'm keeping it in one place rather than spread around
+ */
+static inline void hash__set_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte, int percpu)
+{
+ /*
+ * Anything else just stores the PTE normally. That covers all 64-bit
+ * cases, and 32-bit non-hash with 32-bit PTEs.
+ */
+ *ptep = pte;
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+extern void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, unsigned long old_pmd);
+#else
+static inline void hpte_do_hugepage_flush(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp,
+ unsigned long old_pmd)
+{
+ WARN(1, "%s called with THP disabled\n", __func__);
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+
+int hash__map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot);
+extern int __meminit hash__vmemmap_create_mapping(unsigned long start,
+ unsigned long page_size,
+ unsigned long phys);
+extern void hash__vmemmap_remove_mapping(unsigned long start,
+ unsigned long page_size);
+
+int hash__create_section_mapping(unsigned long start, unsigned long end,
+ int nid, pgprot_t prot);
+int hash__remove_section_mapping(unsigned long start, unsigned long end);
+
+#endif /* !__ASSEMBLER__ */
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_BOOK3S_64_HASH_H */
diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h
new file mode 100644
index 000000000000..bb786694dd26
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_64_HUGETLB_H
+#define _ASM_POWERPC_BOOK3S_64_HUGETLB_H
+
+#include <asm/firmware.h>
+
+/*
+ * For radix we want generic code to handle hugetlb. But then if we want
+ * both hash and radix to be enabled together we need to workaround the
+ * limitations.
+ */
+void radix__flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
+void radix__local_flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
+
+extern void radix__huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t old_pte, pte_t pte);
+
+static inline int hstate_get_psize(struct hstate *hstate)
+{
+ unsigned long shift;
+
+ shift = huge_page_shift(hstate);
+ if (shift == mmu_psize_defs[MMU_PAGE_2M].shift)
+ return MMU_PAGE_2M;
+ else if (shift == mmu_psize_defs[MMU_PAGE_1G].shift)
+ return MMU_PAGE_1G;
+ else if (shift == mmu_psize_defs[MMU_PAGE_16M].shift)
+ return MMU_PAGE_16M;
+ else if (shift == mmu_psize_defs[MMU_PAGE_16G].shift)
+ return MMU_PAGE_16G;
+ else {
+ WARN(1, "Wrong huge page shift\n");
+ return mmu_virtual_psize;
+ }
+}
+
+#define __HAVE_ARCH_GIGANTIC_PAGE_RUNTIME_SUPPORTED
+static inline bool gigantic_page_runtime_supported(void)
+{
+ /*
+ * We used gigantic page reservation with hypervisor assist in some case.
+ * We cannot use runtime allocation of gigantic pages in those platforms
+ * This is hash translation mode LPARs.
+ */
+ if (firmware_has_feature(FW_FEATURE_LPAR) && !radix_enabled())
+ return false;
+
+ return true;
+}
+
+#define huge_ptep_modify_prot_start huge_ptep_modify_prot_start
+extern pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep);
+
+#define huge_ptep_modify_prot_commit huge_ptep_modify_prot_commit
+extern void huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t old_pte, pte_t new_pte);
+
+static inline void flush_hugetlb_page(struct vm_area_struct *vma,
+ unsigned long vmaddr)
+{
+ if (radix_enabled())
+ return radix__flush_hugetlb_page(vma, vmaddr);
+}
+
+void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
+
+static inline int check_and_get_huge_psize(int shift)
+{
+ int mmu_psize;
+
+ if (shift > SLICE_HIGH_SHIFT)
+ return -EINVAL;
+
+ mmu_psize = shift_to_mmu_psize(shift);
+
+ /*
+ * We need to make sure that for different page sizes reported by
+ * firmware we only add hugetlb support for page sizes that can be
+ * supported by linux page table layout.
+ * For now we have
+ * Radix: 2M and 1G
+ * Hash: 16M and 16G
+ */
+ if (radix_enabled()) {
+ if (mmu_psize != MMU_PAGE_2M && mmu_psize != MMU_PAGE_1G)
+ return -EINVAL;
+ } else {
+ if (mmu_psize != MMU_PAGE_16M && mmu_psize != MMU_PAGE_16G)
+ return -EINVAL;
+ }
+ return mmu_psize;
+}
+
+#define arch_has_huge_bootmem_alloc arch_has_huge_bootmem_alloc
+
+static inline bool arch_has_huge_bootmem_alloc(void)
+{
+ return (firmware_has_feature(FW_FEATURE_LPAR) && !radix_enabled());
+}
+#endif
diff --git a/arch/powerpc/include/asm/book3s/64/kexec.h b/arch/powerpc/include/asm/book3s/64/kexec.h
new file mode 100644
index 000000000000..df37a76c1e9f
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/kexec.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_POWERPC_BOOK3S_64_KEXEC_H_
+#define _ASM_POWERPC_BOOK3S_64_KEXEC_H_
+
+#include <asm/plpar_wrappers.h>
+
+#define reset_sprs reset_sprs
+static inline void reset_sprs(void)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_206)) {
+ mtspr(SPRN_AMR, 0);
+ mtspr(SPRN_UAMOR, 0);
+ }
+
+ if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ mtspr(SPRN_IAMR, 0);
+ if (cpu_has_feature(CPU_FTR_HVMODE))
+ mtspr(SPRN_CIABR, 0);
+ else
+ plpar_set_ciabr(0);
+ }
+
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ mtspr(SPRN_DEXCR, 0);
+ mtspr(SPRN_HASHKEYR, 0);
+ }
+
+ /* Do we need isync()? We are going via a kexec reset */
+ isync();
+}
+
+#endif
diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h
new file mode 100644
index 000000000000..03aec3c6c851
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/kup.h
@@ -0,0 +1,418 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_64_KUP_H
+#define _ASM_POWERPC_BOOK3S_64_KUP_H
+
+#include <linux/const.h>
+#include <asm/reg.h>
+
+#define AMR_KUAP_BLOCK_READ UL(0x5455555555555555)
+#define AMR_KUAP_BLOCK_WRITE UL(0xa8aaaaaaaaaaaaaa)
+#define AMR_KUEP_BLOCKED UL(0x5455555555555555)
+#define AMR_KUAP_BLOCKED (AMR_KUAP_BLOCK_READ | AMR_KUAP_BLOCK_WRITE)
+
+#ifdef __ASSEMBLER__
+
+.macro kuap_user_restore gpr1, gpr2
+#if defined(CONFIG_PPC_PKEY)
+ BEGIN_MMU_FTR_SECTION_NESTED(67)
+ b 100f // skip_restore_amr
+ END_MMU_FTR_SECTION_NESTED_IFCLR(MMU_FTR_PKEY, 67)
+ /*
+ * AMR and IAMR are going to be different when
+ * returning to userspace.
+ */
+ ld \gpr1, STACK_REGS_AMR(r1)
+
+ /*
+ * If kuap feature is not enabled, do the mtspr
+ * only if AMR value is different.
+ */
+ BEGIN_MMU_FTR_SECTION_NESTED(68)
+ mfspr \gpr2, SPRN_AMR
+ cmpd \gpr1, \gpr2
+ beq 99f
+ END_MMU_FTR_SECTION_NESTED_IFCLR(MMU_FTR_KUAP, 68)
+
+ isync
+ mtspr SPRN_AMR, \gpr1
+99:
+ /*
+ * Restore IAMR only when returning to userspace
+ */
+ ld \gpr1, STACK_REGS_IAMR(r1)
+
+ /*
+ * If kuep feature is not enabled, do the mtspr
+ * only if IAMR value is different.
+ */
+ BEGIN_MMU_FTR_SECTION_NESTED(69)
+ mfspr \gpr2, SPRN_IAMR
+ cmpd \gpr1, \gpr2
+ beq 100f
+ END_MMU_FTR_SECTION_NESTED_IFCLR(MMU_FTR_BOOK3S_KUEP, 69)
+
+ isync
+ mtspr SPRN_IAMR, \gpr1
+
+100: //skip_restore_amr
+ /* No isync required, see kuap_user_restore() */
+#endif
+.endm
+
+.macro kuap_kernel_restore gpr1, gpr2
+#if defined(CONFIG_PPC_PKEY)
+
+ BEGIN_MMU_FTR_SECTION_NESTED(67)
+ /*
+ * AMR is going to be mostly the same since we are
+ * returning to the kernel. Compare and do a mtspr.
+ */
+ ld \gpr2, STACK_REGS_AMR(r1)
+ mfspr \gpr1, SPRN_AMR
+ cmpd \gpr1, \gpr2
+ beq 100f
+ isync
+ mtspr SPRN_AMR, \gpr2
+ /*
+ * No isync required, see kuap_restore_amr()
+ * No need to restore IAMR when returning to kernel space.
+ */
+100:
+ END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_KUAP, 67)
+#endif
+.endm
+
+#ifdef CONFIG_PPC_KUAP
+.macro kuap_check_amr gpr1, gpr2
+#ifdef CONFIG_PPC_KUAP_DEBUG
+ BEGIN_MMU_FTR_SECTION_NESTED(67)
+ mfspr \gpr1, SPRN_AMR
+ /* Prevent access to userspace using any key values */
+ LOAD_REG_IMMEDIATE(\gpr2, AMR_KUAP_BLOCKED)
+999: tdne \gpr1, \gpr2
+ EMIT_WARN_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE)
+ END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_KUAP, 67)
+#endif
+.endm
+#endif
+
+/*
+ * if (pkey) {
+ *
+ * save AMR -> stack;
+ * if (kuap) {
+ * if (AMR != BLOCKED)
+ * KUAP_BLOCKED -> AMR;
+ * }
+ * if (from_user) {
+ * save IAMR -> stack;
+ * if (kuep) {
+ * KUEP_BLOCKED ->IAMR
+ * }
+ * }
+ * return;
+ * }
+ *
+ * if (kuap) {
+ * if (from_kernel) {
+ * save AMR -> stack;
+ * if (AMR != BLOCKED)
+ * KUAP_BLOCKED -> AMR;
+ * }
+ *
+ * }
+ */
+.macro kuap_save_amr_and_lock gpr1, gpr2, use_cr, msr_pr_cr
+#if defined(CONFIG_PPC_PKEY)
+
+ /*
+ * if both pkey and kuap is disabled, nothing to do
+ */
+ BEGIN_MMU_FTR_SECTION_NESTED(68)
+ b 100f // skip_save_amr
+ END_MMU_FTR_SECTION_NESTED_IFCLR(MMU_FTR_PKEY | MMU_FTR_KUAP, 68)
+
+ /*
+ * if pkey is disabled and we are entering from userspace
+ * don't do anything.
+ */
+ BEGIN_MMU_FTR_SECTION_NESTED(67)
+ .ifnb \msr_pr_cr
+ /*
+ * Without pkey we are not changing AMR outside the kernel
+ * hence skip this completely.
+ */
+ bne \msr_pr_cr, 100f // from userspace
+ .endif
+ END_MMU_FTR_SECTION_NESTED_IFCLR(MMU_FTR_PKEY, 67)
+
+ /*
+ * pkey is enabled or pkey is disabled but entering from kernel
+ */
+ mfspr \gpr1, SPRN_AMR
+ std \gpr1, STACK_REGS_AMR(r1)
+
+ /*
+ * update kernel AMR with AMR_KUAP_BLOCKED only
+ * if KUAP feature is enabled
+ */
+ BEGIN_MMU_FTR_SECTION_NESTED(69)
+ LOAD_REG_IMMEDIATE(\gpr2, AMR_KUAP_BLOCKED)
+ cmpd \use_cr, \gpr1, \gpr2
+ beq \use_cr, 102f
+ /*
+ * We don't isync here because we very recently entered via an interrupt
+ */
+ mtspr SPRN_AMR, \gpr2
+ isync
+102:
+ END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_KUAP, 69)
+
+ /*
+ * if entering from kernel we don't need save IAMR
+ */
+ .ifnb \msr_pr_cr
+ beq \msr_pr_cr, 100f // from kernel space
+ mfspr \gpr1, SPRN_IAMR
+ std \gpr1, STACK_REGS_IAMR(r1)
+
+ /*
+ * update kernel IAMR with AMR_KUEP_BLOCKED only
+ * if KUEP feature is enabled
+ */
+ BEGIN_MMU_FTR_SECTION_NESTED(70)
+ LOAD_REG_IMMEDIATE(\gpr2, AMR_KUEP_BLOCKED)
+ mtspr SPRN_IAMR, \gpr2
+ isync
+ END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_BOOK3S_KUEP, 70)
+ .endif
+
+100: // skip_save_amr
+#endif
+.endm
+
+#else /* !__ASSEMBLER__ */
+
+#include <linux/jump_label.h>
+#include <linux/sched.h>
+
+DECLARE_STATIC_KEY_FALSE(uaccess_flush_key);
+
+#ifdef CONFIG_PPC_PKEY
+
+extern u64 __ro_after_init default_uamor;
+extern u64 __ro_after_init default_amr;
+extern u64 __ro_after_init default_iamr;
+
+#include <asm/mmu.h>
+#include <asm/ptrace.h>
+
+/* usage of kthread_use_mm() should inherit the
+ * AMR value of the operating address space. But, the AMR value is
+ * thread-specific and we inherit the address space and not thread
+ * access restrictions. Because of this ignore AMR value when accessing
+ * userspace via kernel thread.
+ */
+static __always_inline u64 current_thread_amr(void)
+{
+ if (current->thread.regs)
+ return current->thread.regs->amr;
+ return default_amr;
+}
+
+static __always_inline u64 current_thread_iamr(void)
+{
+ if (current->thread.regs)
+ return current->thread.regs->iamr;
+ return default_iamr;
+}
+#endif /* CONFIG_PPC_PKEY */
+
+#ifdef CONFIG_PPC_KUAP
+
+static __always_inline void kuap_user_restore(struct pt_regs *regs)
+{
+ bool restore_amr = false, restore_iamr = false;
+ unsigned long amr, iamr;
+
+ if (!mmu_has_feature(MMU_FTR_PKEY))
+ return;
+
+ if (!mmu_has_feature(MMU_FTR_KUAP)) {
+ amr = mfspr(SPRN_AMR);
+ if (amr != regs->amr)
+ restore_amr = true;
+ } else {
+ restore_amr = true;
+ }
+
+ if (!mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) {
+ iamr = mfspr(SPRN_IAMR);
+ if (iamr != regs->iamr)
+ restore_iamr = true;
+ } else {
+ restore_iamr = true;
+ }
+
+
+ if (restore_amr || restore_iamr) {
+ isync();
+ if (restore_amr)
+ mtspr(SPRN_AMR, regs->amr);
+ if (restore_iamr)
+ mtspr(SPRN_IAMR, regs->iamr);
+ }
+ /*
+ * No isync required here because we are about to rfi
+ * back to previous context before any user accesses
+ * would be made, which is a CSI.
+ */
+}
+
+static __always_inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long amr)
+{
+ if (likely(regs->amr == amr))
+ return;
+
+ isync();
+ mtspr(SPRN_AMR, regs->amr);
+ /*
+ * No isync required here because we are about to rfi
+ * back to previous context before any user accesses
+ * would be made, which is a CSI.
+ *
+ * No need to restore IAMR when returning to kernel space.
+ */
+}
+
+static __always_inline unsigned long __kuap_get_and_assert_locked(void)
+{
+ unsigned long amr = mfspr(SPRN_AMR);
+
+ if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG)) /* kuap_check_amr() */
+ WARN_ON_ONCE(amr != AMR_KUAP_BLOCKED);
+ return amr;
+}
+#define __kuap_get_and_assert_locked __kuap_get_and_assert_locked
+
+/* __kuap_lock() not required, book3s/64 does that in ASM */
+
+/*
+ * We support individually allowing read or write, but we don't support nesting
+ * because that would require an expensive read/modify write of the AMR.
+ */
+
+static __always_inline unsigned long get_kuap(void)
+{
+ /*
+ * We return AMR_KUAP_BLOCKED when we don't support KUAP because
+ * prevent_user_access_return needs to return AMR_KUAP_BLOCKED to
+ * cause restore_user_access to do a flush.
+ *
+ * This has no effect in terms of actually blocking things on hash,
+ * so it doesn't break anything.
+ */
+ if (!mmu_has_feature(MMU_FTR_KUAP))
+ return AMR_KUAP_BLOCKED;
+
+ return mfspr(SPRN_AMR);
+}
+
+static __always_inline void set_kuap(unsigned long value)
+{
+ if (!mmu_has_feature(MMU_FTR_KUAP))
+ return;
+
+ /*
+ * ISA v3.0B says we need a CSI (Context Synchronising Instruction) both
+ * before and after the move to AMR. See table 6 on page 1134.
+ */
+ isync();
+ mtspr(SPRN_AMR, value);
+ isync();
+}
+
+static __always_inline bool
+__bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
+{
+ /*
+ * For radix this will be a storage protection fault (DSISR_PROTFAULT).
+ * For hash this will be a key fault (DSISR_KEYFAULT)
+ */
+ /*
+ * We do have exception table entry, but accessing the
+ * userspace results in fault. This could be because we
+ * didn't unlock the AMR or access is denied by userspace
+ * using a key value that blocks access. We are only interested
+ * in catching the use case of accessing without unlocking
+ * the AMR. Hence check for BLOCK_WRITE/READ against AMR.
+ */
+ if (is_write) {
+ return (regs->amr & AMR_KUAP_BLOCK_WRITE) == AMR_KUAP_BLOCK_WRITE;
+ }
+ return (regs->amr & AMR_KUAP_BLOCK_READ) == AMR_KUAP_BLOCK_READ;
+}
+
+static __always_inline void allow_user_access(void __user *to, const void __user *from,
+ unsigned long size, unsigned long dir)
+{
+ unsigned long thread_amr = 0;
+
+ // This is written so we can resolve to a single case at build time
+ BUILD_BUG_ON(!__builtin_constant_p(dir));
+
+ if (mmu_has_feature(MMU_FTR_PKEY))
+ thread_amr = current_thread_amr();
+
+ if (dir == KUAP_READ)
+ set_kuap(thread_amr | AMR_KUAP_BLOCK_WRITE);
+ else if (dir == KUAP_WRITE)
+ set_kuap(thread_amr | AMR_KUAP_BLOCK_READ);
+ else if (dir == KUAP_READ_WRITE)
+ set_kuap(thread_amr);
+ else
+ BUILD_BUG();
+}
+
+#else /* CONFIG_PPC_KUAP */
+
+static __always_inline unsigned long get_kuap(void)
+{
+ return AMR_KUAP_BLOCKED;
+}
+
+static __always_inline void set_kuap(unsigned long value) { }
+
+static __always_inline void allow_user_access(void __user *to, const void __user *from,
+ unsigned long size, unsigned long dir)
+{ }
+
+#endif /* !CONFIG_PPC_KUAP */
+
+static __always_inline void prevent_user_access(unsigned long dir)
+{
+ set_kuap(AMR_KUAP_BLOCKED);
+ if (static_branch_unlikely(&uaccess_flush_key))
+ do_uaccess_flush();
+}
+
+static __always_inline unsigned long prevent_user_access_return(void)
+{
+ unsigned long flags = get_kuap();
+
+ set_kuap(AMR_KUAP_BLOCKED);
+ if (static_branch_unlikely(&uaccess_flush_key))
+ do_uaccess_flush();
+
+ return flags;
+}
+
+static __always_inline void restore_user_access(unsigned long flags)
+{
+ set_kuap(flags);
+ if (static_branch_unlikely(&uaccess_flush_key) && flags == AMR_KUAP_BLOCKED)
+ do_uaccess_flush();
+}
+#endif /* __ASSEMBLER__ */
+
+#endif /* _ASM_POWERPC_BOOK3S_64_KUP_H */
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
new file mode 100644
index 000000000000..346351423207
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -0,0 +1,885 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_BOOK3S_64_MMU_HASH_H_
+#define _ASM_POWERPC_BOOK3S_64_MMU_HASH_H_
+/*
+ * PowerPC64 memory management structures
+ *
+ * Dave Engebretsen & Mike Corrigan <{engebret|mikejc}@us.ibm.com>
+ * PPC64 rework.
+ */
+
+#include <asm/page.h>
+#include <asm/bug.h>
+#include <asm/asm-const.h>
+
+/*
+ * This is necessary to get the definition of PGTABLE_RANGE which we
+ * need for various slices related matters. Note that this isn't the
+ * complete pgtable.h but only a portion of it.
+ */
+#include <asm/book3s/64/pgtable.h>
+#include <asm/book3s/64/slice.h>
+#include <asm/task_size_64.h>
+#include <asm/cpu_has_feature.h>
+
+/*
+ * SLB
+ */
+
+#define SLB_NUM_BOLTED 2
+#define SLB_CACHE_ENTRIES 8
+#define SLB_MIN_SIZE 32
+
+/* Bits in the SLB ESID word */
+#define SLB_ESID_V ASM_CONST(0x0000000008000000) /* valid */
+
+/* Bits in the SLB VSID word */
+#define SLB_VSID_SHIFT 12
+#define SLB_VSID_SHIFT_256M SLB_VSID_SHIFT
+#define SLB_VSID_SHIFT_1T 24
+#define SLB_VSID_SSIZE_SHIFT 62
+#define SLB_VSID_B ASM_CONST(0xc000000000000000)
+#define SLB_VSID_B_256M ASM_CONST(0x0000000000000000)
+#define SLB_VSID_B_1T ASM_CONST(0x4000000000000000)
+#define SLB_VSID_KS ASM_CONST(0x0000000000000800)
+#define SLB_VSID_KP ASM_CONST(0x0000000000000400)
+#define SLB_VSID_N ASM_CONST(0x0000000000000200) /* no-execute */
+#define SLB_VSID_L ASM_CONST(0x0000000000000100)
+#define SLB_VSID_C ASM_CONST(0x0000000000000080) /* class */
+#define SLB_VSID_LP ASM_CONST(0x0000000000000030)
+#define SLB_VSID_LP_00 ASM_CONST(0x0000000000000000)
+#define SLB_VSID_LP_01 ASM_CONST(0x0000000000000010)
+#define SLB_VSID_LP_10 ASM_CONST(0x0000000000000020)
+#define SLB_VSID_LP_11 ASM_CONST(0x0000000000000030)
+#define SLB_VSID_LLP (SLB_VSID_L|SLB_VSID_LP)
+
+#define SLB_VSID_KERNEL (SLB_VSID_KP)
+#define SLB_VSID_USER (SLB_VSID_KP|SLB_VSID_KS|SLB_VSID_C)
+
+#define SLBIE_C (0x08000000)
+#define SLBIE_SSIZE_SHIFT 25
+
+/*
+ * Hash table
+ */
+
+#define HPTES_PER_GROUP 8
+
+#define HPTE_V_SSIZE_SHIFT 62
+#define HPTE_V_AVPN_SHIFT 7
+#define HPTE_V_COMMON_BITS ASM_CONST(0x000fffffffffffff)
+#define HPTE_V_AVPN ASM_CONST(0x3fffffffffffff80)
+#define HPTE_V_AVPN_3_0 ASM_CONST(0x000fffffffffff80)
+#define HPTE_V_AVPN_VAL(x) (((x) & HPTE_V_AVPN) >> HPTE_V_AVPN_SHIFT)
+#define HPTE_V_COMPARE(x,y) (!(((x) ^ (y)) & 0xffffffffffffff80UL))
+#define HPTE_V_BOLTED ASM_CONST(0x0000000000000010)
+#define HPTE_V_LOCK ASM_CONST(0x0000000000000008)
+#define HPTE_V_LARGE ASM_CONST(0x0000000000000004)
+#define HPTE_V_SECONDARY ASM_CONST(0x0000000000000002)
+#define HPTE_V_VALID ASM_CONST(0x0000000000000001)
+
+/*
+ * ISA 3.0 has a different HPTE format.
+ */
+#define HPTE_R_3_0_SSIZE_SHIFT 58
+#define HPTE_R_3_0_SSIZE_MASK (3ull << HPTE_R_3_0_SSIZE_SHIFT)
+#define HPTE_R_PP0 ASM_CONST(0x8000000000000000)
+#define HPTE_R_TS ASM_CONST(0x4000000000000000)
+#define HPTE_R_KEY_HI ASM_CONST(0x3000000000000000)
+#define HPTE_R_KEY_BIT4 ASM_CONST(0x2000000000000000)
+#define HPTE_R_KEY_BIT3 ASM_CONST(0x1000000000000000)
+#define HPTE_R_RPN_SHIFT 12
+#define HPTE_R_RPN ASM_CONST(0x0ffffffffffff000)
+#define HPTE_R_RPN_3_0 ASM_CONST(0x01fffffffffff000)
+#define HPTE_R_PP ASM_CONST(0x0000000000000003)
+#define HPTE_R_PPP ASM_CONST(0x8000000000000003)
+#define HPTE_R_N ASM_CONST(0x0000000000000004)
+#define HPTE_R_G ASM_CONST(0x0000000000000008)
+#define HPTE_R_M ASM_CONST(0x0000000000000010)
+#define HPTE_R_I ASM_CONST(0x0000000000000020)
+#define HPTE_R_W ASM_CONST(0x0000000000000040)
+#define HPTE_R_WIMG ASM_CONST(0x0000000000000078)
+#define HPTE_R_C ASM_CONST(0x0000000000000080)
+#define HPTE_R_R ASM_CONST(0x0000000000000100)
+#define HPTE_R_KEY_LO ASM_CONST(0x0000000000000e00)
+#define HPTE_R_KEY_BIT2 ASM_CONST(0x0000000000000800)
+#define HPTE_R_KEY_BIT1 ASM_CONST(0x0000000000000400)
+#define HPTE_R_KEY_BIT0 ASM_CONST(0x0000000000000200)
+#define HPTE_R_KEY (HPTE_R_KEY_LO | HPTE_R_KEY_HI)
+
+#define HPTE_V_1TB_SEG ASM_CONST(0x4000000000000000)
+#define HPTE_V_VRMA_MASK ASM_CONST(0x4001ffffff000000)
+
+/* Values for PP (assumes Ks=0, Kp=1) */
+#define PP_RWXX 0 /* Supervisor read/write, User none */
+#define PP_RWRX 1 /* Supervisor read/write, User read */
+#define PP_RWRW 2 /* Supervisor read/write, User read/write */
+#define PP_RXRX 3 /* Supervisor read, User read */
+#define PP_RXXX (HPTE_R_PP0 | 2) /* Supervisor read, user none */
+
+/* Fields for tlbiel instruction in architecture 2.06 */
+#define TLBIEL_INVAL_SEL_MASK 0xc00 /* invalidation selector */
+#define TLBIEL_INVAL_PAGE 0x000 /* invalidate a single page */
+#define TLBIEL_INVAL_SET_LPID 0x800 /* invalidate a set for current LPID */
+#define TLBIEL_INVAL_SET 0xc00 /* invalidate a set for all LPIDs */
+#define TLBIEL_INVAL_SET_MASK 0xfff000 /* set number to inval. */
+#define TLBIEL_INVAL_SET_SHIFT 12
+
+#define POWER7_TLB_SETS 128 /* # sets in POWER7 TLB */
+#define POWER8_TLB_SETS 512 /* # sets in POWER8 TLB */
+#define POWER9_TLB_SETS_HASH 256 /* # sets in POWER9 TLB Hash mode */
+#define POWER9_TLB_SETS_RADIX 128 /* # sets in POWER9 TLB Radix mode */
+
+#ifndef __ASSEMBLER__
+
+struct mmu_hash_ops {
+ void (*hpte_invalidate)(unsigned long slot,
+ unsigned long vpn,
+ int bpsize, int apsize,
+ int ssize, int local);
+ long (*hpte_updatepp)(unsigned long slot,
+ unsigned long newpp,
+ unsigned long vpn,
+ int bpsize, int apsize,
+ int ssize, unsigned long flags);
+ void (*hpte_updateboltedpp)(unsigned long newpp,
+ unsigned long ea,
+ int psize, int ssize);
+ long (*hpte_insert)(unsigned long hpte_group,
+ unsigned long vpn,
+ unsigned long prpn,
+ unsigned long rflags,
+ unsigned long vflags,
+ int psize, int apsize,
+ int ssize);
+ long (*hpte_remove)(unsigned long hpte_group);
+ int (*hpte_removebolted)(unsigned long ea,
+ int psize, int ssize);
+ void (*flush_hash_range)(unsigned long number, int local);
+ void (*hugepage_invalidate)(unsigned long vsid,
+ unsigned long addr,
+ unsigned char *hpte_slot_array,
+ int psize, int ssize, int local);
+ int (*resize_hpt)(unsigned long shift);
+ /*
+ * Special for kexec.
+ * To be called in real mode with interrupts disabled. No locks are
+ * taken as such, concurrent access on pre POWER5 hardware could result
+ * in a deadlock.
+ * The linear mapping is destroyed as well.
+ */
+ void (*hpte_clear_all)(void);
+};
+extern struct mmu_hash_ops mmu_hash_ops;
+
+struct hash_pte {
+ __be64 v;
+ __be64 r;
+};
+
+extern struct hash_pte *htab_address;
+extern unsigned long htab_size_bytes;
+extern unsigned long htab_hash_mask;
+
+
+static inline int shift_to_mmu_psize(unsigned int shift)
+{
+ int psize;
+
+ for (psize = 0; psize < MMU_PAGE_COUNT; ++psize)
+ if (mmu_psize_defs[psize].shift == shift)
+ return psize;
+ return -1;
+}
+
+static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
+{
+ if (mmu_psize_defs[mmu_psize].shift)
+ return mmu_psize_defs[mmu_psize].shift;
+ BUG();
+}
+
+static inline unsigned int ap_to_shift(unsigned long ap)
+{
+ int psize;
+
+ for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
+ if (mmu_psize_defs[psize].ap == ap)
+ return mmu_psize_defs[psize].shift;
+ }
+
+ return -1;
+}
+
+static inline unsigned long get_sllp_encoding(int psize)
+{
+ unsigned long sllp;
+
+ sllp = ((mmu_psize_defs[psize].sllp & SLB_VSID_L) >> 6) |
+ ((mmu_psize_defs[psize].sllp & SLB_VSID_LP) >> 4);
+ return sllp;
+}
+
+#endif /* __ASSEMBLER__ */
+
+/*
+ * Segment sizes.
+ * These are the values used by hardware in the B field of
+ * SLB entries and the first dword of MMU hashtable entries.
+ * The B field is 2 bits; the values 2 and 3 are unused and reserved.
+ */
+#define MMU_SEGSIZE_256M 0
+#define MMU_SEGSIZE_1T 1
+
+/*
+ * encode page number shift.
+ * in order to fit the 78 bit va in a 64 bit variable we shift the va by
+ * 12 bits. This enable us to address upto 76 bit va.
+ * For hpt hash from a va we can ignore the page size bits of va and for
+ * hpte encoding we ignore up to 23 bits of va. So ignoring lower 12 bits ensure
+ * we work in all cases including 4k page size.
+ */
+#define VPN_SHIFT 12
+
+/*
+ * HPTE Large Page (LP) details
+ */
+#define LP_SHIFT 12
+#define LP_BITS 8
+#define LP_MASK(i) ((0xFF >> (i)) << LP_SHIFT)
+
+#ifndef __ASSEMBLER__
+
+static inline int slb_vsid_shift(int ssize)
+{
+ if (ssize == MMU_SEGSIZE_256M)
+ return SLB_VSID_SHIFT;
+ return SLB_VSID_SHIFT_1T;
+}
+
+static inline int segment_shift(int ssize)
+{
+ if (ssize == MMU_SEGSIZE_256M)
+ return SID_SHIFT;
+ return SID_SHIFT_1T;
+}
+
+/*
+ * This array is indexed by the LP field of the HPTE second dword.
+ * Since this field may contain some RPN bits, some entries are
+ * replicated so that we get the same value irrespective of RPN.
+ * The top 4 bits are the page size index (MMU_PAGE_*) for the
+ * actual page size, the bottom 4 bits are the base page size.
+ */
+extern u8 hpte_page_sizes[1 << LP_BITS];
+
+static inline unsigned long __hpte_page_size(unsigned long h, unsigned long l,
+ bool is_base_size)
+{
+ unsigned int i, lp;
+
+ if (!(h & HPTE_V_LARGE))
+ return 1ul << 12;
+
+ /* Look at the 8 bit LP value */
+ lp = (l >> LP_SHIFT) & ((1 << LP_BITS) - 1);
+ i = hpte_page_sizes[lp];
+ if (!i)
+ return 0;
+ if (!is_base_size)
+ i >>= 4;
+ return 1ul << mmu_psize_defs[i & 0xf].shift;
+}
+
+static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
+{
+ return __hpte_page_size(h, l, 0);
+}
+
+static inline unsigned long hpte_base_page_size(unsigned long h, unsigned long l)
+{
+ return __hpte_page_size(h, l, 1);
+}
+
+/*
+ * The current system page and segment sizes
+ */
+extern int mmu_kernel_ssize;
+extern int mmu_highuser_ssize;
+extern u16 mmu_slb_size;
+extern unsigned long tce_alloc_start, tce_alloc_end;
+
+/*
+ * If the processor supports 64k normal pages but not 64k cache
+ * inhibited pages, we have to be prepared to switch processes
+ * to use 4k pages when they create cache-inhibited mappings.
+ * If this is the case, mmu_ci_restrictions will be set to 1.
+ */
+extern int mmu_ci_restrictions;
+
+/*
+ * This computes the AVPN and B fields of the first dword of a HPTE,
+ * for use when we want to match an existing PTE. The bottom 7 bits
+ * of the returned value are zero.
+ */
+static inline unsigned long hpte_encode_avpn(unsigned long vpn, int psize,
+ int ssize)
+{
+ unsigned long v;
+ /*
+ * The AVA field omits the low-order 23 bits of the 78 bits VA.
+ * These bits are not needed in the PTE, because the
+ * low-order b of these bits are part of the byte offset
+ * into the virtual page and, if b < 23, the high-order
+ * 23-b of these bits are always used in selecting the
+ * PTEGs to be searched
+ */
+ v = (vpn >> (23 - VPN_SHIFT)) & ~(mmu_psize_defs[psize].avpnm);
+ v <<= HPTE_V_AVPN_SHIFT;
+ v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT;
+ return v;
+}
+
+/*
+ * ISA v3.0 defines a new HPTE format, which differs from the old
+ * format in having smaller AVPN and ARPN fields, and the B field
+ * in the second dword instead of the first.
+ */
+static inline unsigned long hpte_old_to_new_v(unsigned long v)
+{
+ /* trim AVPN, drop B */
+ return v & HPTE_V_COMMON_BITS;
+}
+
+static inline unsigned long hpte_old_to_new_r(unsigned long v, unsigned long r)
+{
+ /* move B field from 1st to 2nd dword, trim ARPN */
+ return (r & ~HPTE_R_3_0_SSIZE_MASK) |
+ (((v) >> HPTE_V_SSIZE_SHIFT) << HPTE_R_3_0_SSIZE_SHIFT);
+}
+
+static inline unsigned long hpte_new_to_old_v(unsigned long v, unsigned long r)
+{
+ /* insert B field */
+ return (v & HPTE_V_COMMON_BITS) |
+ ((r & HPTE_R_3_0_SSIZE_MASK) <<
+ (HPTE_V_SSIZE_SHIFT - HPTE_R_3_0_SSIZE_SHIFT));
+}
+
+static inline unsigned long hpte_new_to_old_r(unsigned long r)
+{
+ /* clear out B field */
+ return r & ~HPTE_R_3_0_SSIZE_MASK;
+}
+
+static inline unsigned long hpte_get_old_v(struct hash_pte *hptep)
+{
+ unsigned long hpte_v;
+
+ hpte_v = be64_to_cpu(hptep->v);
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r));
+ return hpte_v;
+}
+
+/*
+ * This function sets the AVPN and L fields of the HPTE appropriately
+ * using the base page size and actual page size.
+ */
+static inline unsigned long hpte_encode_v(unsigned long vpn, int base_psize,
+ int actual_psize, int ssize)
+{
+ unsigned long v;
+ v = hpte_encode_avpn(vpn, base_psize, ssize);
+ if (actual_psize != MMU_PAGE_4K)
+ v |= HPTE_V_LARGE;
+ return v;
+}
+
+/*
+ * This function sets the ARPN, and LP fields of the HPTE appropriately
+ * for the page size. We assume the pa is already "clean" that is properly
+ * aligned for the requested page size
+ */
+static inline unsigned long hpte_encode_r(unsigned long pa, int base_psize,
+ int actual_psize)
+{
+ /* A 4K page needs no special encoding */
+ if (actual_psize == MMU_PAGE_4K)
+ return pa & HPTE_R_RPN;
+ else {
+ unsigned int penc = mmu_psize_defs[base_psize].penc[actual_psize];
+ unsigned int shift = mmu_psize_defs[actual_psize].shift;
+ return (pa & ~((1ul << shift) - 1)) | (penc << LP_SHIFT);
+ }
+}
+
+/*
+ * Build a VPN_SHIFT bit shifted va given VSID, EA and segment size.
+ */
+static inline unsigned long hpt_vpn(unsigned long ea,
+ unsigned long vsid, int ssize)
+{
+ unsigned long mask;
+ int s_shift = segment_shift(ssize);
+
+ mask = (1ul << (s_shift - VPN_SHIFT)) - 1;
+ return (vsid << (s_shift - VPN_SHIFT)) | ((ea >> VPN_SHIFT) & mask);
+}
+
+/*
+ * This hashes a virtual address
+ */
+static inline unsigned long hpt_hash(unsigned long vpn,
+ unsigned int shift, int ssize)
+{
+ unsigned long mask;
+ unsigned long hash, vsid;
+
+ /* VPN_SHIFT can be atmost 12 */
+ if (ssize == MMU_SEGSIZE_256M) {
+ mask = (1ul << (SID_SHIFT - VPN_SHIFT)) - 1;
+ hash = (vpn >> (SID_SHIFT - VPN_SHIFT)) ^
+ ((vpn & mask) >> (shift - VPN_SHIFT));
+ } else {
+ mask = (1ul << (SID_SHIFT_1T - VPN_SHIFT)) - 1;
+ vsid = vpn >> (SID_SHIFT_1T - VPN_SHIFT);
+ hash = vsid ^ (vsid << 25) ^
+ ((vpn & mask) >> (shift - VPN_SHIFT)) ;
+ }
+ return hash & 0x7fffffffffUL;
+}
+
+#define HPTE_LOCAL_UPDATE 0x1
+#define HPTE_NOHPTE_UPDATE 0x2
+#define HPTE_USE_KERNEL_KEY 0x4
+
+long hpte_insert_repeating(unsigned long hash, unsigned long vpn, unsigned long pa,
+ unsigned long rlags, unsigned long vflags, int psize, int ssize);
+extern int __hash_page_4K(unsigned long ea, unsigned long access,
+ unsigned long vsid, pte_t *ptep, unsigned long trap,
+ unsigned long flags, int ssize, int subpage_prot);
+extern int __hash_page_64K(unsigned long ea, unsigned long access,
+ unsigned long vsid, pte_t *ptep, unsigned long trap,
+ unsigned long flags, int ssize);
+struct mm_struct;
+unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap);
+extern int hash_page_mm(struct mm_struct *mm, unsigned long ea,
+ unsigned long access, unsigned long trap,
+ unsigned long flags);
+extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap,
+ unsigned long dsisr);
+void low_hash_fault(struct pt_regs *regs, unsigned long address, int rc);
+int __hash_page(unsigned long trap, unsigned long ea, unsigned long dsisr, unsigned long msr);
+int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
+ pte_t *ptep, unsigned long trap, unsigned long flags,
+ int ssize, unsigned int shift, unsigned int mmu_psize);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+extern int __hash_page_thp(unsigned long ea, unsigned long access,
+ unsigned long vsid, pmd_t *pmdp, unsigned long trap,
+ unsigned long flags, int ssize, unsigned int psize);
+#else
+static inline int __hash_page_thp(unsigned long ea, unsigned long access,
+ unsigned long vsid, pmd_t *pmdp,
+ unsigned long trap, unsigned long flags,
+ int ssize, unsigned int psize)
+{
+ BUG();
+ return -1;
+}
+#endif
+extern void hash_failure_debug(unsigned long ea, unsigned long access,
+ unsigned long vsid, unsigned long trap,
+ int ssize, int psize, int lpsize,
+ unsigned long pte);
+extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
+ unsigned long pstart, unsigned long prot,
+ int psize, int ssize);
+int htab_remove_mapping(unsigned long vstart, unsigned long vend,
+ int psize, int ssize);
+extern void pseries_add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages);
+extern void demote_segment_4k(struct mm_struct *mm, unsigned long addr);
+
+extern void hash__setup_new_exec(void);
+
+#ifdef CONFIG_PPC_PSERIES
+void hpte_init_pseries(void);
+#else
+static inline void hpte_init_pseries(void) { }
+#endif
+
+extern void hpte_init_native(void);
+
+struct slb_entry {
+ u64 esid;
+ u64 vsid;
+};
+
+extern void slb_initialize(void);
+void slb_flush_and_restore_bolted(void);
+void slb_flush_all_realmode(void);
+void __slb_restore_bolted_realmode(void);
+void slb_restore_bolted_realmode(void);
+void slb_save_contents(struct slb_entry *slb_ptr);
+void slb_dump_contents(struct slb_entry *slb_ptr);
+
+extern void slb_vmalloc_update(void);
+void preload_new_slb_context(unsigned long start, unsigned long sp);
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+void slb_set_size(u16 size);
+#else
+static inline void slb_set_size(u16 size) { }
+#endif
+
+#endif /* __ASSEMBLER__ */
+
+/*
+ * VSID allocation (256MB segment)
+ *
+ * We first generate a 37-bit "proto-VSID". Proto-VSIDs are generated
+ * from mmu context id and effective segment id of the address.
+ *
+ * For user processes max context id is limited to MAX_USER_CONTEXT.
+ * more details in get_user_context
+ *
+ * For kernel space get_kernel_context
+ *
+ * The proto-VSIDs are then scrambled into real VSIDs with the
+ * multiplicative hash:
+ *
+ * VSID = (proto-VSID * VSID_MULTIPLIER) % VSID_MODULUS
+ *
+ * VSID_MULTIPLIER is prime, so in particular it is
+ * co-prime to VSID_MODULUS, making this a 1:1 scrambling function.
+ * Because the modulus is 2^n-1 we can compute it efficiently without
+ * a divide or extra multiply (see below). The scramble function gives
+ * robust scattering in the hash table (at least based on some initial
+ * results).
+ *
+ * We use VSID 0 to indicate an invalid VSID. The means we can't use context id
+ * 0, because a context id of 0 and an EA of 0 gives a proto-VSID of 0, which
+ * will produce a VSID of 0.
+ *
+ * We also need to avoid the last segment of the last context, because that
+ * would give a protovsid of 0x1fffffffff. That will result in a VSID 0
+ * because of the modulo operation in vsid scramble.
+ */
+
+/*
+ * Max Va bits we support as of now is 68 bits. We want 19 bit
+ * context ID.
+ * Restrictions:
+ * GPU has restrictions of not able to access beyond 128TB
+ * (47 bit effective address). We also cannot do more than 20bit PID.
+ * For p4 and p5 which can only do 65 bit VA, we restrict our CONTEXT_BITS
+ * to 16 bits (ie, we can only have 2^16 pids at the same time).
+ */
+#define VA_BITS 68
+#define CONTEXT_BITS 19
+#define ESID_BITS (VA_BITS - (SID_SHIFT + CONTEXT_BITS))
+#define ESID_BITS_1T (VA_BITS - (SID_SHIFT_1T + CONTEXT_BITS))
+
+#define ESID_BITS_MASK ((1 << ESID_BITS) - 1)
+#define ESID_BITS_1T_MASK ((1 << ESID_BITS_1T) - 1)
+
+/*
+ * Now certain config support MAX_PHYSMEM more than 512TB. Hence we will need
+ * to use more than one context for linear mapping the kernel.
+ * For vmalloc and memmap, we use just one context with 512TB. With 64 byte
+ * struct page size, we need ony 32 TB in memmap for 2PB (51 bits (MAX_PHYSMEM_BITS)).
+ */
+#if (H_MAX_PHYSMEM_BITS > MAX_EA_BITS_PER_CONTEXT)
+#define MAX_KERNEL_CTX_CNT (1UL << (H_MAX_PHYSMEM_BITS - MAX_EA_BITS_PER_CONTEXT))
+#else
+#define MAX_KERNEL_CTX_CNT 1
+#endif
+
+#define MAX_VMALLOC_CTX_CNT 1
+#define MAX_IO_CTX_CNT 1
+#define MAX_VMEMMAP_CTX_CNT 1
+
+/*
+ * 256MB segment
+ * The proto-VSID space has 2^(CONTEX_BITS + ESID_BITS) - 1 segments
+ * available for user + kernel mapping. VSID 0 is reserved as invalid, contexts
+ * 1-4 are used for kernel mapping. Each segment contains 2^28 bytes. Each
+ * context maps 2^49 bytes (512TB).
+ *
+ * We also need to avoid the last segment of the last context, because that
+ * would give a protovsid of 0x1fffffffff. That will result in a VSID 0
+ * because of the modulo operation in vsid scramble.
+ *
+ */
+#define MAX_USER_CONTEXT ((ASM_CONST(1) << CONTEXT_BITS) - 2)
+
+// The + 2 accounts for INVALID_REGION and 1 more to avoid overlap with kernel
+#define MIN_USER_CONTEXT (MAX_KERNEL_CTX_CNT + MAX_VMALLOC_CTX_CNT + \
+ MAX_IO_CTX_CNT + MAX_VMEMMAP_CTX_CNT + 2)
+
+/*
+ * For platforms that support on 65bit VA we limit the context bits
+ */
+#define MAX_USER_CONTEXT_65BIT_VA ((ASM_CONST(1) << (65 - (SID_SHIFT + ESID_BITS))) - 2)
+
+/*
+ * This should be computed such that protovosid * vsid_mulitplier
+ * doesn't overflow 64 bits. The vsid_mutliplier should also be
+ * co-prime to vsid_modulus. We also need to make sure that number
+ * of bits in multiplied result (dividend) is less than twice the number of
+ * protovsid bits for our modulus optmization to work.
+ *
+ * The below table shows the current values used.
+ * |-------+------------+----------------------+------------+-------------------|
+ * | | Prime Bits | proto VSID_BITS_65VA | Total Bits | 2* prot VSID_BITS |
+ * |-------+------------+----------------------+------------+-------------------|
+ * | 1T | 24 | 25 | 49 | 50 |
+ * |-------+------------+----------------------+------------+-------------------|
+ * | 256MB | 24 | 37 | 61 | 74 |
+ * |-------+------------+----------------------+------------+-------------------|
+ *
+ * |-------+------------+----------------------+------------+--------------------|
+ * | | Prime Bits | proto VSID_BITS_68VA | Total Bits | 2* proto VSID_BITS |
+ * |-------+------------+----------------------+------------+--------------------|
+ * | 1T | 24 | 28 | 52 | 56 |
+ * |-------+------------+----------------------+------------+--------------------|
+ * | 256MB | 24 | 40 | 64 | 80 |
+ * |-------+------------+----------------------+------------+--------------------|
+ *
+ */
+#define VSID_MULTIPLIER_256M ASM_CONST(12538073) /* 24-bit prime */
+#define VSID_BITS_256M (VA_BITS - SID_SHIFT)
+#define VSID_BITS_65_256M (65 - SID_SHIFT)
+/*
+ * Modular multiplicative inverse of VSID_MULTIPLIER under modulo VSID_MODULUS
+ */
+#define VSID_MULINV_256M ASM_CONST(665548017062)
+
+#define VSID_MULTIPLIER_1T ASM_CONST(12538073) /* 24-bit prime */
+#define VSID_BITS_1T (VA_BITS - SID_SHIFT_1T)
+#define VSID_BITS_65_1T (65 - SID_SHIFT_1T)
+#define VSID_MULINV_1T ASM_CONST(209034062)
+
+/* 1TB VSID reserved for VRMA */
+#define VRMA_VSID 0x1ffffffUL
+#define USER_VSID_RANGE (1UL << (ESID_BITS + SID_SHIFT))
+
+/* 4 bits per slice and we have one slice per 1TB */
+#define SLICE_ARRAY_SIZE (H_PGTABLE_RANGE >> 41)
+#define LOW_SLICE_ARRAY_SZ (BITS_PER_LONG / BITS_PER_BYTE)
+#define TASK_SLICE_ARRAY_SZ(x) ((x)->hash_context->slb_addr_limit >> 41)
+#ifndef __ASSEMBLER__
+
+#ifdef CONFIG_PPC_SUBPAGE_PROT
+/*
+ * For the sub-page protection option, we extend the PGD with one of
+ * these. Basically we have a 3-level tree, with the top level being
+ * the protptrs array. To optimize speed and memory consumption when
+ * only addresses < 4GB are being protected, pointers to the first
+ * four pages of sub-page protection words are stored in the low_prot
+ * array.
+ * Each page of sub-page protection words protects 1GB (4 bytes
+ * protects 64k). For the 3-level tree, each page of pointers then
+ * protects 8TB.
+ */
+struct subpage_prot_table {
+ unsigned long maxaddr; /* only addresses < this are protected */
+ unsigned int **protptrs[(TASK_SIZE_USER64 >> 43)];
+ unsigned int *low_prot[4];
+};
+
+#define SBP_L1_BITS (PAGE_SHIFT - 2)
+#define SBP_L2_BITS (PAGE_SHIFT - 3)
+#define SBP_L1_COUNT (1 << SBP_L1_BITS)
+#define SBP_L2_COUNT (1 << SBP_L2_BITS)
+#define SBP_L2_SHIFT (PAGE_SHIFT + SBP_L1_BITS)
+#define SBP_L3_SHIFT (SBP_L2_SHIFT + SBP_L2_BITS)
+
+extern void subpage_prot_free(struct mm_struct *mm);
+#else
+static inline void subpage_prot_free(struct mm_struct *mm) {}
+#endif /* CONFIG_PPC_SUBPAGE_PROT */
+
+/*
+ * One bit per slice. We have lower slices which cover 256MB segments
+ * upto 4G range. That gets us 16 low slices. For the rest we track slices
+ * in 1TB size.
+ */
+struct slice_mask {
+ u64 low_slices;
+ DECLARE_BITMAP(high_slices, SLICE_NUM_HIGH);
+};
+
+struct hash_mm_context {
+ u16 user_psize; /* page size index */
+
+ /* SLB page size encodings*/
+ unsigned char low_slices_psize[LOW_SLICE_ARRAY_SZ];
+ unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
+ unsigned long slb_addr_limit;
+#ifdef CONFIG_PPC_64K_PAGES
+ struct slice_mask mask_64k;
+#endif
+ struct slice_mask mask_4k;
+#ifdef CONFIG_HUGETLB_PAGE
+ struct slice_mask mask_16m;
+ struct slice_mask mask_16g;
+#endif
+
+#ifdef CONFIG_PPC_SUBPAGE_PROT
+ struct subpage_prot_table *spt;
+#endif /* CONFIG_PPC_SUBPAGE_PROT */
+};
+
+#if 0
+/*
+ * The code below is equivalent to this function for arguments
+ * < 2^VSID_BITS, which is all this should ever be called
+ * with. However gcc is not clever enough to compute the
+ * modulus (2^n-1) without a second multiply.
+ */
+#define vsid_scramble(protovsid, size) \
+ ((((protovsid) * VSID_MULTIPLIER_##size) % VSID_MODULUS_##size))
+
+/* simplified form avoiding mod operation */
+#define vsid_scramble(protovsid, size) \
+ ({ \
+ unsigned long x; \
+ x = (protovsid) * VSID_MULTIPLIER_##size; \
+ x = (x >> VSID_BITS_##size) + (x & VSID_MODULUS_##size); \
+ (x + ((x+1) >> VSID_BITS_##size)) & VSID_MODULUS_##size; \
+ })
+
+#else /* 1 */
+static inline unsigned long vsid_scramble(unsigned long protovsid,
+ unsigned long vsid_multiplier, int vsid_bits)
+{
+ unsigned long vsid;
+ unsigned long vsid_modulus = ((1UL << vsid_bits) - 1);
+ /*
+ * We have same multipler for both 256 and 1T segements now
+ */
+ vsid = protovsid * vsid_multiplier;
+ vsid = (vsid >> vsid_bits) + (vsid & vsid_modulus);
+ return (vsid + ((vsid + 1) >> vsid_bits)) & vsid_modulus;
+}
+
+#endif /* 1 */
+
+/* Returns the segment size indicator for a user address */
+static inline int user_segment_size(unsigned long addr)
+{
+ /* Use 1T segments if possible for addresses >= 1T */
+ if (addr >= (1UL << SID_SHIFT_1T))
+ return mmu_highuser_ssize;
+ return MMU_SEGSIZE_256M;
+}
+
+static inline unsigned long get_vsid(unsigned long context, unsigned long ea,
+ int ssize)
+{
+ unsigned long va_bits = VA_BITS;
+ unsigned long vsid_bits;
+ unsigned long protovsid;
+
+ /*
+ * Bad address. We return VSID 0 for that
+ */
+ if ((ea & EA_MASK) >= H_PGTABLE_RANGE)
+ return 0;
+
+ if (!mmu_has_feature(MMU_FTR_68_BIT_VA))
+ va_bits = 65;
+
+ if (ssize == MMU_SEGSIZE_256M) {
+ vsid_bits = va_bits - SID_SHIFT;
+ protovsid = (context << ESID_BITS) |
+ ((ea >> SID_SHIFT) & ESID_BITS_MASK);
+ return vsid_scramble(protovsid, VSID_MULTIPLIER_256M, vsid_bits);
+ }
+ /* 1T segment */
+ vsid_bits = va_bits - SID_SHIFT_1T;
+ protovsid = (context << ESID_BITS_1T) |
+ ((ea >> SID_SHIFT_1T) & ESID_BITS_1T_MASK);
+ return vsid_scramble(protovsid, VSID_MULTIPLIER_1T, vsid_bits);
+}
+
+/*
+ * For kernel space, we use context ids as
+ * below. Range is 512TB per context.
+ *
+ * 0x00001 - [ 0xc000000000000000 - 0xc001ffffffffffff]
+ * 0x00002 - [ 0xc002000000000000 - 0xc003ffffffffffff]
+ * 0x00003 - [ 0xc004000000000000 - 0xc005ffffffffffff]
+ * 0x00004 - [ 0xc006000000000000 - 0xc007ffffffffffff]
+ *
+ * vmap, IO, vmemap
+ *
+ * 0x00005 - [ 0xc008000000000000 - 0xc009ffffffffffff]
+ * 0x00006 - [ 0xc00a000000000000 - 0xc00bffffffffffff]
+ * 0x00007 - [ 0xc00c000000000000 - 0xc00dffffffffffff]
+ *
+ */
+static inline unsigned long get_kernel_context(unsigned long ea)
+{
+ unsigned long region_id = get_region_id(ea);
+ unsigned long ctx;
+ /*
+ * Depending on Kernel config, kernel region can have one context
+ * or more.
+ */
+ if (region_id == LINEAR_MAP_REGION_ID) {
+ /*
+ * We already verified ea to be not beyond the addr limit.
+ */
+ ctx = 1 + ((ea & EA_MASK) >> MAX_EA_BITS_PER_CONTEXT);
+ } else
+ ctx = region_id + MAX_KERNEL_CTX_CNT - 1;
+ return ctx;
+}
+
+/*
+ * This is only valid for addresses >= PAGE_OFFSET
+ */
+static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize)
+{
+ unsigned long context;
+
+ if (!is_kernel_addr(ea))
+ return 0;
+
+ context = get_kernel_context(ea);
+ return get_vsid(context, ea, ssize);
+}
+
+unsigned htab_shift_for_mem_size(unsigned long mem_size);
+
+enum slb_index {
+ LINEAR_INDEX = 0, /* Kernel linear map (0xc000000000000000) */
+ KSTACK_INDEX = 1, /* Kernel stack map */
+};
+
+#define slb_esid_mask(ssize) \
+ (((ssize) == MMU_SEGSIZE_256M) ? ESID_MASK : ESID_MASK_1T)
+
+static inline unsigned long mk_esid_data(unsigned long ea, int ssize,
+ enum slb_index index)
+{
+ return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | index;
+}
+
+static inline unsigned long __mk_vsid_data(unsigned long vsid, int ssize,
+ unsigned long flags)
+{
+ return (vsid << slb_vsid_shift(ssize)) | flags |
+ ((unsigned long)ssize << SLB_VSID_SSIZE_SHIFT);
+}
+
+static inline unsigned long mk_vsid_data(unsigned long ea, int ssize,
+ unsigned long flags)
+{
+ return __mk_vsid_data(get_kernel_vsid(ea, ssize), ssize, flags);
+}
+
+#endif /* __ASSEMBLER__ */
+#endif /* _ASM_POWERPC_BOOK3S_64_MMU_HASH_H_ */
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
new file mode 100644
index 000000000000..48631365b48c
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -0,0 +1,292 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_64_MMU_H_
+#define _ASM_POWERPC_BOOK3S_64_MMU_H_
+
+#include <asm/page.h>
+
+#ifndef __ASSEMBLER__
+/*
+ * Page size definition
+ *
+ * shift : is the "PAGE_SHIFT" value for that page size
+ * sllp : is a bit mask with the value of SLB L || LP to be or'ed
+ * directly to a slbmte "vsid" value
+ * penc : is the HPTE encoding mask for the "LP" field:
+ *
+ */
+struct mmu_psize_def {
+ unsigned int shift; /* number of bits */
+ int penc[MMU_PAGE_COUNT]; /* HPTE encoding */
+ unsigned int tlbiel; /* tlbiel supported for that page size */
+ unsigned long avpnm; /* bits to mask out in AVPN in the HPTE */
+ unsigned long h_rpt_pgsize; /* H_RPT_INVALIDATE page size encoding */
+ union {
+ unsigned long sllp; /* SLB L||LP (exact mask to use in slbmte) */
+ unsigned long ap; /* Ap encoding used by PowerISA 3.0 */
+ };
+};
+extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
+#endif /* __ASSEMBLER__ */
+
+/* 64-bit classic hash table MMU */
+#include <asm/book3s/64/mmu-hash.h>
+
+#ifndef __ASSEMBLER__
+/*
+ * ISA 3.0 partition and process table entry format
+ */
+struct prtb_entry {
+ __be64 prtb0;
+ __be64 prtb1;
+};
+extern struct prtb_entry *process_tb;
+
+struct patb_entry {
+ __be64 patb0;
+ __be64 patb1;
+};
+extern struct patb_entry *partition_tb;
+
+/* Bits in patb0 field */
+#define PATB_HR (1UL << 63)
+#define RPDB_MASK 0x0fffffffffffff00UL
+#define RPDB_SHIFT (1UL << 8)
+#define RTS1_SHIFT 61 /* top 2 bits of radix tree size */
+#define RTS1_MASK (3UL << RTS1_SHIFT)
+#define RTS2_SHIFT 5 /* bottom 3 bits of radix tree size */
+#define RTS2_MASK (7UL << RTS2_SHIFT)
+#define RPDS_MASK 0x1f /* root page dir. size field */
+
+/* Bits in patb1 field */
+#define PATB_GR (1UL << 63) /* guest uses radix; must match HR */
+#define PRTS_MASK 0x1f /* process table size field */
+#define PRTB_MASK 0x0ffffffffffff000UL
+
+/* Number of supported LPID bits */
+extern unsigned int mmu_lpid_bits;
+
+/* Number of supported PID bits */
+extern unsigned int mmu_pid_bits;
+
+/* Base PID to allocate from */
+extern unsigned int mmu_base_pid;
+
+extern unsigned long __ro_after_init memory_block_size;
+
+#define PRTB_SIZE_SHIFT (mmu_pid_bits + 4)
+#define PRTB_ENTRIES (1ul << mmu_pid_bits)
+
+#define PATB_SIZE_SHIFT (mmu_lpid_bits + 4)
+#define PATB_ENTRIES (1ul << mmu_lpid_bits)
+
+typedef unsigned long mm_context_id_t;
+struct spinlock;
+
+/* Maximum possible number of NPUs in a system. */
+#define NV_MAX_NPUS 8
+
+typedef struct {
+ union {
+ /*
+ * We use id as the PIDR content for radix. On hash we can use
+ * more than one id. The extended ids are used when we start
+ * having address above 512TB. We allocate one extended id
+ * for each 512TB. The new id is then used with the 49 bit
+ * EA to build a new VA. We always use ESID_BITS_1T_MASK bits
+ * from EA and new context ids to build the new VAs.
+ */
+ mm_context_id_t id;
+#ifdef CONFIG_PPC_64S_HASH_MMU
+ mm_context_id_t extended_id[TASK_SIZE_USER64/TASK_CONTEXT_SIZE];
+#endif
+ };
+
+ /* Number of bits in the mm_cpumask */
+ atomic_t active_cpus;
+
+ /* Number of users of the external (Nest) MMU */
+ atomic_t copros;
+
+ /* Number of user space windows opened in process mm_context */
+ atomic_t vas_windows;
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+ struct hash_mm_context *hash_context;
+#endif
+
+ void __user *vdso;
+ /*
+ * pagetable fragment support
+ */
+ void *pte_frag;
+ void *pmd_frag;
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+ struct list_head iommu_group_mem_list;
+#endif
+
+#ifdef CONFIG_PPC_MEM_KEYS
+ /*
+ * Each bit represents one protection key.
+ * bit set -> key allocated
+ * bit unset -> key available for allocation
+ */
+ u32 pkey_allocation_map;
+ s16 execute_only_pkey; /* key holding execute-only protection */
+#endif
+} mm_context_t;
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+static inline u16 mm_ctx_user_psize(mm_context_t *ctx)
+{
+ return ctx->hash_context->user_psize;
+}
+
+static inline void mm_ctx_set_user_psize(mm_context_t *ctx, u16 user_psize)
+{
+ ctx->hash_context->user_psize = user_psize;
+}
+
+static inline unsigned char *mm_ctx_low_slices(mm_context_t *ctx)
+{
+ return ctx->hash_context->low_slices_psize;
+}
+
+static inline unsigned char *mm_ctx_high_slices(mm_context_t *ctx)
+{
+ return ctx->hash_context->high_slices_psize;
+}
+
+static inline unsigned long mm_ctx_slb_addr_limit(mm_context_t *ctx)
+{
+ return ctx->hash_context->slb_addr_limit;
+}
+
+static inline void mm_ctx_set_slb_addr_limit(mm_context_t *ctx, unsigned long limit)
+{
+ ctx->hash_context->slb_addr_limit = limit;
+}
+
+static inline struct slice_mask *slice_mask_for_size(mm_context_t *ctx, int psize)
+{
+#ifdef CONFIG_PPC_64K_PAGES
+ if (psize == MMU_PAGE_64K)
+ return &ctx->hash_context->mask_64k;
+#endif
+#ifdef CONFIG_HUGETLB_PAGE
+ if (psize == MMU_PAGE_16M)
+ return &ctx->hash_context->mask_16m;
+ if (psize == MMU_PAGE_16G)
+ return &ctx->hash_context->mask_16g;
+#endif
+ BUG_ON(psize != MMU_PAGE_4K);
+
+ return &ctx->hash_context->mask_4k;
+}
+
+#ifdef CONFIG_PPC_SUBPAGE_PROT
+static inline struct subpage_prot_table *mm_ctx_subpage_prot(mm_context_t *ctx)
+{
+ return ctx->hash_context->spt;
+}
+#endif
+
+/*
+ * The current system page and segment sizes
+ */
+extern int mmu_virtual_psize;
+extern int mmu_vmalloc_psize;
+extern int mmu_io_psize;
+#else /* CONFIG_PPC_64S_HASH_MMU */
+#ifdef CONFIG_PPC_64K_PAGES
+#define mmu_virtual_psize MMU_PAGE_64K
+#else
+#define mmu_virtual_psize MMU_PAGE_4K
+#endif
+#endif
+extern int mmu_linear_psize;
+extern int mmu_vmemmap_psize;
+
+/* MMU initialization */
+void mmu_early_init_devtree(void);
+void hash__early_init_devtree(void);
+void radix__early_init_devtree(void);
+#ifdef CONFIG_PPC_PKEY
+void pkey_early_init_devtree(void);
+#else
+static inline void pkey_early_init_devtree(void) {}
+#endif
+
+extern void hash__early_init_mmu(void);
+extern void radix__early_init_mmu(void);
+static inline void __init early_init_mmu(void)
+{
+ if (radix_enabled())
+ return radix__early_init_mmu();
+ return hash__early_init_mmu();
+}
+extern void hash__early_init_mmu_secondary(void);
+extern void radix__early_init_mmu_secondary(void);
+static inline void early_init_mmu_secondary(void)
+{
+ if (radix_enabled())
+ return radix__early_init_mmu_secondary();
+ return hash__early_init_mmu_secondary();
+}
+
+extern void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base,
+ phys_addr_t first_memblock_size);
+static inline void setup_initial_memory_limit(phys_addr_t first_memblock_base,
+ phys_addr_t first_memblock_size)
+{
+ /*
+ * Hash has more strict restrictions. At this point we don't
+ * know which translations we will pick. Hence go with hash
+ * restrictions.
+ */
+ if (!early_radix_enabled())
+ hash__setup_initial_memory_limit(first_memblock_base,
+ first_memblock_size);
+}
+
+#ifdef CONFIG_PPC_PSERIES
+void __init radix_init_pseries(void);
+#else
+static inline void radix_init_pseries(void) { }
+#endif
+
+#ifdef CONFIG_HOTPLUG_CPU
+#define arch_clear_mm_cpumask_cpu(cpu, mm) \
+ do { \
+ if (cpumask_test_cpu(cpu, mm_cpumask(mm))) { \
+ dec_mm_active_cpus(mm); \
+ cpumask_clear_cpu(cpu, mm_cpumask(mm)); \
+ } \
+ } while (0)
+
+void cleanup_cpu_mmu_context(void);
+#endif
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+static inline int get_user_context(mm_context_t *ctx, unsigned long ea)
+{
+ int index = ea >> MAX_EA_BITS_PER_CONTEXT;
+
+ if (likely(index < ARRAY_SIZE(ctx->extended_id)))
+ return ctx->extended_id[index];
+
+ /* should never happen */
+ WARN_ON(1);
+ return 0;
+}
+
+static inline unsigned long get_user_vsid(mm_context_t *ctx,
+ unsigned long ea, int ssize)
+{
+ unsigned long context = get_user_context(ctx, ea);
+
+ return get_vsid(context, ea, ssize);
+}
+#endif
+
+#endif /* __ASSEMBLER__ */
+#endif /* _ASM_POWERPC_BOOK3S_64_MMU_H_ */
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
new file mode 100644
index 000000000000..dd2cff53a111
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -0,0 +1,183 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_BOOK3S_64_PGALLOC_H
+#define _ASM_POWERPC_BOOK3S_64_PGALLOC_H
+/*
+ */
+
+#include <linux/slab.h>
+#include <linux/cpumask.h>
+#include <linux/kmemleak.h>
+#include <linux/percpu.h>
+
+struct vmemmap_backing {
+ struct vmemmap_backing *list;
+ unsigned long phys;
+ unsigned long virt_addr;
+};
+extern struct vmemmap_backing *vmemmap_list;
+
+extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned long);
+extern void pmd_fragment_free(unsigned long *);
+extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift);
+extern void __tlb_remove_table(void *_table);
+void pte_frag_destroy(void *pte_frag);
+
+static inline pgd_t *radix__pgd_alloc(struct mm_struct *mm)
+{
+#ifdef CONFIG_PPC_64K_PAGES
+ return (pgd_t *)__get_free_page(pgtable_gfp_flags(mm, PGALLOC_GFP));
+#else
+ struct page *page;
+ page = alloc_pages(pgtable_gfp_flags(mm, PGALLOC_GFP | __GFP_RETRY_MAYFAIL),
+ 4);
+ if (!page)
+ return NULL;
+ return (pgd_t *) page_address(page);
+#endif
+}
+
+static inline void radix__pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+#ifdef CONFIG_PPC_64K_PAGES
+ free_page((unsigned long)pgd);
+#else
+ free_pages((unsigned long)pgd, 4);
+#endif
+}
+
+static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+ pgd_t *pgd;
+
+ if (radix_enabled())
+ return radix__pgd_alloc(mm);
+
+ pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
+ pgtable_gfp_flags(mm, GFP_KERNEL));
+ if (unlikely(!pgd))
+ return pgd;
+
+ /*
+ * Don't scan the PGD for pointers, it contains references to PUDs but
+ * those references are not full pointers and so can't be recognised by
+ * kmemleak.
+ */
+ kmemleak_no_scan(pgd);
+
+ /*
+ * With hugetlb, we don't clear the second half of the page table.
+ * If we share the same slab cache with the pmd or pud level table,
+ * we need to make sure we zero out the full table on alloc.
+ * With 4K we don't store slot in the second half. Hence we don't
+ * need to do this for 4k.
+ */
+#if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_PPC_64K_PAGES) && \
+ (H_PGD_INDEX_SIZE == H_PUD_CACHE_INDEX)
+ memset(pgd, 0, PGD_TABLE_SIZE);
+#endif
+ return pgd;
+}
+
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+ if (radix_enabled())
+ return radix__pgd_free(mm, pgd);
+ kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd);
+}
+
+static inline void p4d_populate(struct mm_struct *mm, p4d_t *pgd, pud_t *pud)
+{
+ *pgd = __p4d(__pgtable_ptr_val(pud) | PGD_VAL_BITS);
+}
+
+static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+ pud_t *pud;
+
+ pud = kmem_cache_alloc(PGT_CACHE(PUD_CACHE_INDEX),
+ pgtable_gfp_flags(mm, GFP_KERNEL));
+ /*
+ * Tell kmemleak to ignore the PUD, that means don't scan it for
+ * pointers and don't consider it a leak. PUDs are typically only
+ * referred to by their PGD, but kmemleak is not able to recognise those
+ * as pointers, leading to false leak reports.
+ */
+ kmemleak_ignore(pud);
+
+ return pud;
+}
+
+static inline void __pud_free(pud_t *pud)
+{
+ struct page *page = virt_to_page(pud);
+
+ /*
+ * Early pud pages allocated via memblock allocator
+ * can't be directly freed to slab. KFENCE pages have
+ * both reserved and slab flags set so need to be freed
+ * kmem_cache_free.
+ */
+ if (PageReserved(page) && !PageSlab(page))
+ free_reserved_page(page);
+ else
+ kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), pud);
+}
+
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+ return __pud_free(pud);
+}
+
+static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+{
+ *pud = __pud(__pgtable_ptr_val(pmd) | PUD_VAL_BITS);
+}
+
+static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
+ unsigned long address)
+{
+ pgtable_free_tlb(tlb, pud, PUD_INDEX);
+}
+
+static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+ return pmd_fragment_alloc(mm, addr);
+}
+
+static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
+{
+ pmd_fragment_free((unsigned long *)pmd);
+}
+
+static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
+ unsigned long address)
+{
+ return pgtable_free_tlb(tlb, pmd, PMD_INDEX);
+}
+
+static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
+ pte_t *pte)
+{
+ *pmd = __pmd(__pgtable_ptr_val(pte) | PMD_VAL_BITS);
+}
+
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
+ pgtable_t pte_page)
+{
+ *pmd = __pmd(__pgtable_ptr_val(pte_page) | PMD_VAL_BITS);
+}
+
+static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
+ unsigned long address)
+{
+ pgtable_free_tlb(tlb, table, PTE_INDEX);
+}
+
+extern atomic_long_t direct_pages_count[MMU_PAGE_COUNT];
+static inline void update_page_count(int psize, long count)
+{
+ if (IS_ENABLED(CONFIG_PROC_FS))
+ atomic_long_add(count, &direct_pages_count[psize]);
+}
+
+#endif /* _ASM_POWERPC_BOOK3S_64_PGALLOC_H */
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
new file mode 100644
index 000000000000..004a03e97e58
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_64_PGTABLE_64K_H
+#define _ASM_POWERPC_BOOK3S_64_PGTABLE_64K_H
+
+#ifndef __ASSEMBLER__
+#ifdef CONFIG_HUGETLB_PAGE
+
+#endif /* CONFIG_HUGETLB_PAGE */
+
+static inline int remap_4k_pfn(struct vm_area_struct *vma, unsigned long addr,
+ unsigned long pfn, pgprot_t prot)
+{
+ if (radix_enabled())
+ BUG();
+ return hash__remap_4k_pfn(vma, addr, pfn, prot);
+}
+#endif /* __ASSEMBLER__ */
+#endif /*_ASM_POWERPC_BOOK3S_64_PGTABLE_64K_H */
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
new file mode 100644
index 000000000000..aac8ce30cd3b
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -0,0 +1,1385 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_64_PGTABLE_H_
+#define _ASM_POWERPC_BOOK3S_64_PGTABLE_H_
+
+#include <asm-generic/pgtable-nop4d.h>
+
+#ifndef __ASSEMBLER__
+#include <linux/mmdebug.h>
+#include <linux/bug.h>
+#include <linux/sizes.h>
+#endif
+
+/*
+ * Common bits between hash and Radix page table
+ */
+
+#define _PAGE_EXEC 0x00001 /* execute permission */
+#define _PAGE_WRITE 0x00002 /* write access allowed */
+#define _PAGE_READ 0x00004 /* read access allowed */
+#define _PAGE_PRIVILEGED 0x00008 /* kernel access only */
+#define _PAGE_SAO 0x00010 /* Strong access order */
+#define _PAGE_NON_IDEMPOTENT 0x00020 /* non idempotent memory */
+#define _PAGE_TOLERANT 0x00030 /* tolerant memory, cache inhibited */
+#define _PAGE_DIRTY 0x00080 /* C: page changed */
+#define _PAGE_ACCESSED 0x00100 /* R: page referenced */
+/*
+ * Software bits
+ */
+#define _RPAGE_SW0 0x2000000000000000UL
+#define _RPAGE_SW1 0x00800
+#define _RPAGE_SW2 0x00400
+#define _RPAGE_SW3 0x00200
+#define _RPAGE_RSV1 0x00040UL
+
+#define _RPAGE_PKEY_BIT4 0x1000000000000000UL
+#define _RPAGE_PKEY_BIT3 0x0800000000000000UL
+#define _RPAGE_PKEY_BIT2 0x0400000000000000UL
+#define _RPAGE_PKEY_BIT1 0x0200000000000000UL
+#define _RPAGE_PKEY_BIT0 0x0100000000000000UL
+
+#define _PAGE_PTE 0x4000000000000000UL /* distinguishes PTEs from pointers */
+#define _PAGE_PRESENT 0x8000000000000000UL /* pte contains a translation */
+/*
+ * We need to mark a pmd pte invalid while splitting. We can do that by clearing
+ * the _PAGE_PRESENT bit. But then that will be taken as a swap pte. In order to
+ * differentiate between two use a SW field when invalidating.
+ *
+ * We do that temporary invalidate for regular pte entry in ptep_set_access_flags
+ *
+ * This is used only when _PAGE_PRESENT is cleared.
+ */
+#define _PAGE_INVALID _RPAGE_SW0
+
+/*
+ * Top and bottom bits of RPN which can be used by hash
+ * translation mode, because we expect them to be zero
+ * otherwise.
+ */
+#define _RPAGE_RPN0 0x01000
+#define _RPAGE_RPN1 0x02000
+#define _RPAGE_RPN43 0x0080000000000000UL
+#define _RPAGE_RPN42 0x0040000000000000UL
+#define _RPAGE_RPN41 0x0020000000000000UL
+
+/* Max physical address bit as per radix table */
+#define _RPAGE_PA_MAX 56
+
+/*
+ * Max physical address bit we will use for now.
+ *
+ * This is mostly a hardware limitation and for now Power9 has
+ * a 51 bit limit.
+ *
+ * This is different from the number of physical bit required to address
+ * the last byte of memory. That is defined by MAX_PHYSMEM_BITS.
+ * MAX_PHYSMEM_BITS is a linux limitation imposed by the maximum
+ * number of sections we can support (SECTIONS_SHIFT).
+ *
+ * This is different from Radix page table limitation above and
+ * should always be less than that. The limit is done such that
+ * we can overload the bits between _RPAGE_PA_MAX and _PAGE_PA_MAX
+ * for hash linux page table specific bits.
+ *
+ * In order to be compatible with future hardware generations we keep
+ * some offsets and limit this for now to 53
+ */
+#define _PAGE_PA_MAX 53
+
+#define _PAGE_SOFT_DIRTY _RPAGE_SW3 /* software: software dirty tracking */
+#define _PAGE_SPECIAL _RPAGE_SW2 /* software: special page */
+
+/*
+ * Drivers request for cache inhibited pte mapping using _PAGE_NO_CACHE
+ * Instead of fixing all of them, add an alternate define which
+ * maps CI pte mapping.
+ */
+#define _PAGE_NO_CACHE _PAGE_TOLERANT
+/*
+ * We support _RPAGE_PA_MAX bit real address in pte. On the linux side
+ * we are limited by _PAGE_PA_MAX. Clear everything above _PAGE_PA_MAX
+ * and every thing below PAGE_SHIFT;
+ */
+#define PTE_RPN_MASK (((1UL << _PAGE_PA_MAX) - 1) & (PAGE_MASK))
+#define PTE_RPN_SHIFT PAGE_SHIFT
+/*
+ * set of bits not changed in pmd_modify. Even though we have hash specific bits
+ * in here, on radix we expect them to be zero.
+ */
+#define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
+ _PAGE_ACCESSED | H_PAGE_THP_HUGE | _PAGE_PTE | \
+ _PAGE_SOFT_DIRTY)
+/*
+ * user access blocked by key
+ */
+#define _PAGE_KERNEL_RW (_PAGE_PRIVILEGED | _PAGE_RW | _PAGE_DIRTY)
+#define _PAGE_KERNEL_RO (_PAGE_PRIVILEGED | _PAGE_READ)
+#define _PAGE_KERNEL_ROX (_PAGE_PRIVILEGED | _PAGE_READ | _PAGE_EXEC)
+#define _PAGE_KERNEL_RWX (_PAGE_PRIVILEGED | _PAGE_DIRTY | _PAGE_RW | _PAGE_EXEC)
+/*
+ * _PAGE_CHG_MASK masks of bits that are to be preserved across
+ * pgprot changes
+ */
+#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
+ _PAGE_ACCESSED | _PAGE_SPECIAL | _PAGE_PTE | \
+ _PAGE_SOFT_DIRTY)
+
+/*
+ * We define 2 sets of base prot bits, one for basic pages (ie,
+ * cacheable kernel and user pages) and one for non cacheable
+ * pages. We always set _PAGE_COHERENT when SMP is enabled or
+ * the processor might need it for DMA coherency.
+ */
+#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED)
+#define _PAGE_BASE (_PAGE_BASE_NC)
+
+#include <asm/pgtable-masks.h>
+
+/* Permission masks used for kernel mappings */
+#define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
+#define PAGE_KERNEL_NC __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | _PAGE_TOLERANT)
+#define PAGE_KERNEL_NCG __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | _PAGE_NON_IDEMPOTENT)
+#define PAGE_KERNEL_X __pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX)
+#define PAGE_KERNEL_RO __pgprot(_PAGE_BASE | _PAGE_KERNEL_RO)
+#define PAGE_KERNEL_ROX __pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX)
+
+#ifndef __ASSEMBLER__
+/*
+ * page table defines
+ */
+extern unsigned long __pte_index_size;
+extern unsigned long __pmd_index_size;
+extern unsigned long __pud_index_size;
+extern unsigned long __pgd_index_size;
+extern unsigned long __pud_cache_index;
+#define PTE_INDEX_SIZE __pte_index_size
+#define PMD_INDEX_SIZE __pmd_index_size
+#define PUD_INDEX_SIZE __pud_index_size
+#define PGD_INDEX_SIZE __pgd_index_size
+/* pmd table use page table fragments */
+#define PMD_CACHE_INDEX 0
+#define PUD_CACHE_INDEX __pud_cache_index
+/*
+ * Because of use of pte fragments and THP, size of page table
+ * are not always derived out of index size above.
+ */
+extern unsigned long __pte_table_size;
+extern unsigned long __pmd_table_size;
+extern unsigned long __pud_table_size;
+extern unsigned long __pgd_table_size;
+#define PTE_TABLE_SIZE __pte_table_size
+#define PMD_TABLE_SIZE __pmd_table_size
+#define PUD_TABLE_SIZE __pud_table_size
+#define PGD_TABLE_SIZE __pgd_table_size
+
+extern unsigned long __pmd_val_bits;
+extern unsigned long __pud_val_bits;
+extern unsigned long __pgd_val_bits;
+#define PMD_VAL_BITS __pmd_val_bits
+#define PUD_VAL_BITS __pud_val_bits
+#define PGD_VAL_BITS __pgd_val_bits
+
+extern unsigned long __pte_frag_nr;
+#define PTE_FRAG_NR __pte_frag_nr
+extern unsigned long __pte_frag_size_shift;
+#define PTE_FRAG_SIZE_SHIFT __pte_frag_size_shift
+#define PTE_FRAG_SIZE (1UL << PTE_FRAG_SIZE_SHIFT)
+
+extern unsigned long __pmd_frag_nr;
+#define PMD_FRAG_NR __pmd_frag_nr
+extern unsigned long __pmd_frag_size_shift;
+#define PMD_FRAG_SIZE_SHIFT __pmd_frag_size_shift
+#define PMD_FRAG_SIZE (1UL << PMD_FRAG_SIZE_SHIFT)
+
+#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE)
+#define PTRS_PER_PMD (1 << PMD_INDEX_SIZE)
+#define PTRS_PER_PUD (1 << PUD_INDEX_SIZE)
+#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE)
+
+#define MAX_PTRS_PER_PTE ((H_PTRS_PER_PTE > R_PTRS_PER_PTE) ? H_PTRS_PER_PTE : R_PTRS_PER_PTE)
+#define MAX_PTRS_PER_PMD ((H_PTRS_PER_PMD > R_PTRS_PER_PMD) ? H_PTRS_PER_PMD : R_PTRS_PER_PMD)
+#define MAX_PTRS_PER_PUD ((H_PTRS_PER_PUD > R_PTRS_PER_PUD) ? H_PTRS_PER_PUD : R_PTRS_PER_PUD)
+#define MAX_PTRS_PER_PGD (1 << (H_PGD_INDEX_SIZE > RADIX_PGD_INDEX_SIZE ? \
+ H_PGD_INDEX_SIZE : RADIX_PGD_INDEX_SIZE))
+
+/* PMD_SHIFT determines what a second-level page table entry can map */
+#define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE)
+#define PMD_SIZE (1UL << PMD_SHIFT)
+#define PMD_MASK (~(PMD_SIZE-1))
+
+/* PUD_SHIFT determines what a third-level page table entry can map */
+#define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE)
+#define PUD_SIZE (1UL << PUD_SHIFT)
+#define PUD_MASK (~(PUD_SIZE-1))
+
+/* PGDIR_SHIFT determines what a fourth-level page table entry can map */
+#define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE)
+#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
+#define PGDIR_MASK (~(PGDIR_SIZE-1))
+
+/* Bits to mask out from a PMD to get to the PTE page */
+#define PMD_MASKED_BITS 0xc0000000000000ffUL
+/* Bits to mask out from a PUD to get to the PMD page */
+#define PUD_MASKED_BITS 0xc0000000000000ffUL
+/* Bits to mask out from a PGD to get to the PUD page */
+#define P4D_MASKED_BITS 0xc0000000000000ffUL
+
+/*
+ * Used as an indicator for rcu callback functions
+ */
+enum pgtable_index {
+ PTE_INDEX = 0,
+ PMD_INDEX,
+ PUD_INDEX,
+ PGD_INDEX,
+ /*
+ * Below are used with 4k page size and hugetlb
+ */
+ HTLB_16M_INDEX,
+ HTLB_16G_INDEX,
+};
+
+extern unsigned long __vmalloc_start;
+extern unsigned long __vmalloc_end;
+#define VMALLOC_START __vmalloc_start
+#define VMALLOC_END __vmalloc_end
+
+static inline unsigned int ioremap_max_order(void)
+{
+ if (radix_enabled())
+ return PUD_SHIFT;
+ return 7 + PAGE_SHIFT; /* default from linux/vmalloc.h */
+}
+#define IOREMAP_MAX_ORDER ioremap_max_order()
+
+extern unsigned long __kernel_virt_start;
+extern unsigned long __kernel_io_start;
+extern unsigned long __kernel_io_end;
+#define KERN_VIRT_START __kernel_virt_start
+#define KERN_IO_START __kernel_io_start
+#define KERN_IO_END __kernel_io_end
+
+extern struct page *vmemmap;
+extern unsigned long pci_io_base;
+
+#define pmd_leaf pmd_leaf
+static inline bool pmd_leaf(pmd_t pmd)
+{
+ return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE));
+}
+
+#define pud_leaf pud_leaf
+static inline bool pud_leaf(pud_t pud)
+{
+ return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE));
+}
+
+#define pmd_leaf_size pmd_leaf_size
+static inline unsigned long pmd_leaf_size(pmd_t pmd)
+{
+ if (IS_ENABLED(CONFIG_PPC_4K_PAGES) && !radix_enabled())
+ return SZ_16M;
+ else
+ return PMD_SIZE;
+}
+
+#define pud_leaf_size pud_leaf_size
+static inline unsigned long pud_leaf_size(pud_t pud)
+{
+ if (IS_ENABLED(CONFIG_PPC_4K_PAGES) && !radix_enabled())
+ return SZ_16G;
+ else
+ return PUD_SIZE;
+}
+#endif /* __ASSEMBLER__ */
+
+#include <asm/book3s/64/hash.h>
+#include <asm/book3s/64/radix.h>
+
+#if H_MAX_PHYSMEM_BITS > R_MAX_PHYSMEM_BITS
+#define MAX_PHYSMEM_BITS H_MAX_PHYSMEM_BITS
+#else
+#define MAX_PHYSMEM_BITS R_MAX_PHYSMEM_BITS
+#endif
+
+/* hash 4k can't share hugetlb and also doesn't support THP */
+#ifdef CONFIG_PPC_64K_PAGES
+#include <asm/book3s/64/pgtable-64k.h>
+#endif
+
+#include <asm/barrier.h>
+/*
+ * IO space itself carved into the PIO region (ISA and PHB IO space) and
+ * the ioremap space
+ *
+ * ISA_IO_BASE = KERN_IO_START, 64K reserved area
+ * PHB_IO_BASE = ISA_IO_BASE + 64K to ISA_IO_BASE + 2G, PHB IO spaces
+ * IOREMAP_BASE = ISA_IO_BASE + 2G to VMALLOC_START + PGTABLE_RANGE
+ */
+#define FULL_IO_SIZE 0x80000000ul
+#define ISA_IO_BASE (KERN_IO_START)
+#define ISA_IO_END (KERN_IO_START + 0x10000ul)
+#define PHB_IO_BASE (ISA_IO_END)
+#define PHB_IO_END (KERN_IO_START + FULL_IO_SIZE)
+#define IOREMAP_BASE (PHB_IO_END)
+#define IOREMAP_START (ioremap_bot)
+#define IOREMAP_END (KERN_IO_END - FIXADDR_SIZE)
+#define FIXADDR_SIZE SZ_32M
+#define FIXADDR_TOP (IOREMAP_END + FIXADDR_SIZE)
+
+#ifndef __ASSEMBLER__
+
+static inline unsigned long pte_update(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, unsigned long clr,
+ unsigned long set, int huge)
+{
+ if (radix_enabled())
+ return radix__pte_update(mm, addr, ptep, clr, set, huge);
+ return hash__pte_update(mm, addr, ptep, clr, set, huge);
+}
+/*
+ * For hash even if we have _PAGE_ACCESSED = 0, we do a pte_update.
+ * We currently remove entries from the hashtable regardless of whether
+ * the entry was young or dirty.
+ *
+ * We should be more intelligent about this but for the moment we override
+ * these functions and force a tlb flush unconditionally
+ * For radix: H_PAGE_HASHPTE should be zero. Hence we can use the same
+ * function for both hash and radix.
+ */
+static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ unsigned long old;
+
+ if ((pte_raw(*ptep) & cpu_to_be64(_PAGE_ACCESSED | H_PAGE_HASHPTE)) == 0)
+ return 0;
+ old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0);
+ return (old & _PAGE_ACCESSED) != 0;
+}
+
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+#define ptep_test_and_clear_young(__vma, __addr, __ptep) \
+({ \
+ __ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep); \
+})
+
+/*
+ * On Book3S CPUs, clearing the accessed bit without a TLB flush
+ * doesn't cause data corruption. [ It could cause incorrect
+ * page aging and the (mistaken) reclaim of hot pages, but the
+ * chance of that should be relatively low. ]
+ *
+ * So as a performance optimization don't flush the TLB when
+ * clearing the accessed bit, it will eventually be flushed by
+ * a context switch or a VM operation anyway. [ In the rare
+ * event of it not getting flushed for a long time the delay
+ * shouldn't really matter because there's no real memory
+ * pressure for swapout to react to. ]
+ *
+ * Note: this optimisation also exists in pte_needs_flush() and
+ * huge_pmd_needs_flush().
+ */
+#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
+#define ptep_clear_flush_young ptep_test_and_clear_young
+
+#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
+#define pmdp_clear_flush_young pmdp_test_and_clear_young
+
+static inline int pte_write(pte_t pte)
+{
+ return !!(pte_raw(pte) & cpu_to_be64(_PAGE_WRITE));
+}
+
+static inline int pte_read(pte_t pte)
+{
+ return !!(pte_raw(pte) & cpu_to_be64(_PAGE_READ));
+}
+
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep)
+{
+ if (pte_write(*ptep))
+ pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0);
+}
+
+#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ if (pte_write(*ptep))
+ pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1);
+}
+
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ unsigned long old = pte_update(mm, addr, ptep, ~0UL, 0, 0);
+ return __pte(old);
+}
+
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
+static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
+ unsigned long addr,
+ pte_t *ptep, int full)
+{
+ if (full && radix_enabled()) {
+ /*
+ * We know that this is a full mm pte clear and
+ * hence can be sure there is no parallel set_pte.
+ */
+ return radix__ptep_get_and_clear_full(mm, addr, ptep, full);
+ }
+ return ptep_get_and_clear(mm, addr, ptep);
+}
+
+
+static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t * ptep)
+{
+ pte_update(mm, addr, ptep, ~0UL, 0, 0);
+}
+
+static inline int pte_dirty(pte_t pte)
+{
+ return !!(pte_raw(pte) & cpu_to_be64(_PAGE_DIRTY));
+}
+
+static inline int pte_young(pte_t pte)
+{
+ return !!(pte_raw(pte) & cpu_to_be64(_PAGE_ACCESSED));
+}
+
+static inline int pte_special(pte_t pte)
+{
+ return !!(pte_raw(pte) & cpu_to_be64(_PAGE_SPECIAL));
+}
+
+static inline bool pte_exec(pte_t pte)
+{
+ return !!(pte_raw(pte) & cpu_to_be64(_PAGE_EXEC));
+}
+
+
+#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
+static inline bool pte_soft_dirty(pte_t pte)
+{
+ return !!(pte_raw(pte) & cpu_to_be64(_PAGE_SOFT_DIRTY));
+}
+
+static inline pte_t pte_mksoft_dirty(pte_t pte)
+{
+ return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SOFT_DIRTY));
+}
+
+static inline pte_t pte_clear_soft_dirty(pte_t pte)
+{
+ return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_SOFT_DIRTY));
+}
+#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
+
+#ifdef CONFIG_NUMA_BALANCING
+static inline int pte_protnone(pte_t pte)
+{
+ return (pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT | _PAGE_PTE | _PAGE_RWX)) ==
+ cpu_to_be64(_PAGE_PRESENT | _PAGE_PTE);
+}
+#endif /* CONFIG_NUMA_BALANCING */
+
+static inline bool pte_hw_valid(pte_t pte)
+{
+ return (pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT | _PAGE_PTE)) ==
+ cpu_to_be64(_PAGE_PRESENT | _PAGE_PTE);
+}
+
+static inline int pte_present(pte_t pte)
+{
+ /*
+ * A pte is considerent present if _PAGE_PRESENT is set.
+ * We also need to consider the pte present which is marked
+ * invalid during ptep_set_access_flags. Hence we look for _PAGE_INVALID
+ * if we find _PAGE_PRESENT cleared.
+ */
+
+ if (pte_hw_valid(pte))
+ return true;
+ return (pte_raw(pte) & cpu_to_be64(_PAGE_INVALID | _PAGE_PTE)) ==
+ cpu_to_be64(_PAGE_INVALID | _PAGE_PTE);
+}
+
+#ifdef CONFIG_PPC_MEM_KEYS
+extern bool arch_pte_access_permitted(u64 pte, bool write, bool execute);
+#else
+static inline bool arch_pte_access_permitted(u64 pte, bool write, bool execute)
+{
+ return true;
+}
+#endif /* CONFIG_PPC_MEM_KEYS */
+
+static inline bool pte_user(pte_t pte)
+{
+ return !(pte_raw(pte) & cpu_to_be64(_PAGE_PRIVILEGED));
+}
+
+#define pte_access_permitted pte_access_permitted
+static inline bool pte_access_permitted(pte_t pte, bool write)
+{
+ /*
+ * _PAGE_READ is needed for any access and will be cleared for
+ * PROT_NONE. Execute-only mapping via PROT_EXEC also returns false.
+ */
+ if (!pte_present(pte) || !pte_user(pte) || !pte_read(pte))
+ return false;
+
+ if (write && !pte_write(pte))
+ return false;
+
+ return arch_pte_access_permitted(pte_val(pte), write, 0);
+}
+
+/*
+ * Conversion functions: convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
+ *
+ * Even if PTEs can be unsigned long long, a PFN is always an unsigned
+ * long for now.
+ */
+static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
+{
+ VM_BUG_ON(pfn >> (64 - PAGE_SHIFT));
+ VM_BUG_ON((pfn << PAGE_SHIFT) & ~PTE_RPN_MASK);
+
+ return __pte(((pte_basic_t)pfn << PAGE_SHIFT) | pgprot_val(pgprot) | _PAGE_PTE);
+}
+
+/* Generic modifiers for PTE bits */
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+ return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_WRITE));
+}
+
+static inline pte_t pte_exprotect(pte_t pte)
+{
+ return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_EXEC));
+}
+
+static inline pte_t pte_mkclean(pte_t pte)
+{
+ return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_DIRTY));
+}
+
+static inline pte_t pte_mkold(pte_t pte)
+{
+ return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_ACCESSED));
+}
+
+static inline pte_t pte_mkexec(pte_t pte)
+{
+ return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_EXEC));
+}
+
+static inline pte_t pte_mkwrite_novma(pte_t pte)
+{
+ /*
+ * write implies read, hence set both
+ */
+ return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_RW));
+}
+
+static inline pte_t pte_mkdirty(pte_t pte)
+{
+ return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_DIRTY | _PAGE_SOFT_DIRTY));
+}
+
+static inline pte_t pte_mkyoung(pte_t pte)
+{
+ return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_ACCESSED));
+}
+
+static inline pte_t pte_mkspecial(pte_t pte)
+{
+ return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SPECIAL));
+}
+
+static inline pte_t pte_mkhuge(pte_t pte)
+{
+ return pte;
+}
+
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+ /* FIXME!! check whether this need to be a conditional */
+ return __pte_raw((pte_raw(pte) & cpu_to_be64(_PAGE_CHG_MASK)) |
+ cpu_to_be64(pgprot_val(newprot)));
+}
+
+/* Encode and de-code a swap entry */
+#define MAX_SWAPFILES_CHECK() do { \
+ BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS); \
+ /* \
+ * Don't have overlapping bits with _PAGE_HPTEFLAGS \
+ * We filter HPTEFLAGS on set_pte. \
+ */ \
+ BUILD_BUG_ON(_PAGE_HPTEFLAGS & SWP_TYPE_MASK); \
+ BUILD_BUG_ON(_PAGE_HPTEFLAGS & _PAGE_SWP_SOFT_DIRTY); \
+ BUILD_BUG_ON(_PAGE_HPTEFLAGS & _PAGE_SWP_EXCLUSIVE); \
+ } while (0)
+
+#define SWP_TYPE_BITS 5
+#define SWP_TYPE_MASK ((1UL << SWP_TYPE_BITS) - 1)
+#define __swp_type(x) ((x).val & SWP_TYPE_MASK)
+#define __swp_offset(x) (((x).val & PTE_RPN_MASK) >> PAGE_SHIFT)
+#define __swp_entry(type, offset) ((swp_entry_t) { \
+ (type) | (((offset) << PAGE_SHIFT) & PTE_RPN_MASK)})
+/*
+ * swp_entry_t must be independent of pte bits. We build a swp_entry_t from
+ * swap type and offset we get from swap and convert that to pte to find a
+ * matching pte in linux page table.
+ * Clear bits not found in swap entries here.
+ */
+#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) & ~_PAGE_PTE })
+#define __swp_entry_to_pte(x) __pte((x).val | _PAGE_PTE)
+#define __pmd_to_swp_entry(pmd) (__pte_to_swp_entry(pmd_pte(pmd)))
+#define __swp_entry_to_pmd(x) (pte_pmd(__swp_entry_to_pte(x)))
+
+#ifdef CONFIG_MEM_SOFT_DIRTY
+#define _PAGE_SWP_SOFT_DIRTY _PAGE_SOFT_DIRTY
+#else
+#define _PAGE_SWP_SOFT_DIRTY 0UL
+#endif /* CONFIG_MEM_SOFT_DIRTY */
+
+#define _PAGE_SWP_EXCLUSIVE _PAGE_NON_IDEMPOTENT
+
+#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
+static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
+{
+ return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SWP_SOFT_DIRTY));
+}
+
+static inline bool pte_swp_soft_dirty(pte_t pte)
+{
+ return !!(pte_raw(pte) & cpu_to_be64(_PAGE_SWP_SOFT_DIRTY));
+}
+
+static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
+{
+ return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_SWP_SOFT_DIRTY));
+}
+#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
+
+static inline pte_t pte_swp_mkexclusive(pte_t pte)
+{
+ return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SWP_EXCLUSIVE));
+}
+
+static inline bool pte_swp_exclusive(pte_t pte)
+{
+ return !!(pte_raw(pte) & cpu_to_be64(_PAGE_SWP_EXCLUSIVE));
+}
+
+static inline pte_t pte_swp_clear_exclusive(pte_t pte)
+{
+ return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_SWP_EXCLUSIVE));
+}
+
+static inline bool check_pte_access(unsigned long access, unsigned long ptev)
+{
+ /*
+ * This check for _PAGE_RWX and _PAGE_PRESENT bits
+ */
+ if (access & ~ptev)
+ return false;
+ /*
+ * This check for access to privilege space
+ */
+ if ((access & _PAGE_PRIVILEGED) != (ptev & _PAGE_PRIVILEGED))
+ return false;
+
+ return true;
+}
+/*
+ * Generic functions with hash/radix callbacks
+ */
+
+static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
+ pte_t *ptep, pte_t entry,
+ unsigned long address,
+ int psize)
+{
+ if (radix_enabled())
+ return radix__ptep_set_access_flags(vma, ptep, entry,
+ address, psize);
+ return hash__ptep_set_access_flags(ptep, entry);
+}
+
+#define __HAVE_ARCH_PTE_SAME
+static inline int pte_same(pte_t pte_a, pte_t pte_b)
+{
+ if (radix_enabled())
+ return radix__pte_same(pte_a, pte_b);
+ return hash__pte_same(pte_a, pte_b);
+}
+
+static inline int pte_none(pte_t pte)
+{
+ if (radix_enabled())
+ return radix__pte_none(pte);
+ return hash__pte_none(pte);
+}
+
+static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte, int percpu)
+{
+
+ VM_WARN_ON(!(pte_raw(pte) & cpu_to_be64(_PAGE_PTE)));
+ /*
+ * Keep the _PAGE_PTE added till we are sure we handle _PAGE_PTE
+ * in all the callers.
+ */
+ pte = __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_PTE));
+
+ if (radix_enabled())
+ return radix__set_pte_at(mm, addr, ptep, pte, percpu);
+ return hash__set_pte_at(mm, addr, ptep, pte, percpu);
+}
+
+#define _PAGE_CACHE_CTL (_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT)
+
+#define pgprot_noncached pgprot_noncached
+static inline pgprot_t pgprot_noncached(pgprot_t prot)
+{
+ return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
+ _PAGE_NON_IDEMPOTENT);
+}
+
+#define pgprot_noncached_wc pgprot_noncached_wc
+static inline pgprot_t pgprot_noncached_wc(pgprot_t prot)
+{
+ return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
+ _PAGE_TOLERANT);
+}
+
+#define pgprot_cached pgprot_cached
+static inline pgprot_t pgprot_cached(pgprot_t prot)
+{
+ return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL));
+}
+
+#define pgprot_writecombine pgprot_writecombine
+static inline pgprot_t pgprot_writecombine(pgprot_t prot)
+{
+ return pgprot_noncached_wc(prot);
+}
+/*
+ * check a pte mapping have cache inhibited property
+ */
+static inline bool pte_ci(pte_t pte)
+{
+ __be64 pte_v = pte_raw(pte);
+
+ if (((pte_v & cpu_to_be64(_PAGE_CACHE_CTL)) == cpu_to_be64(_PAGE_TOLERANT)) ||
+ ((pte_v & cpu_to_be64(_PAGE_CACHE_CTL)) == cpu_to_be64(_PAGE_NON_IDEMPOTENT)))
+ return true;
+ return false;
+}
+
+static inline void pmd_clear(pmd_t *pmdp)
+{
+ if (IS_ENABLED(CONFIG_DEBUG_VM) && !radix_enabled()) {
+ /*
+ * Don't use this if we can possibly have a hash page table
+ * entry mapping this.
+ */
+ WARN_ON((pmd_val(*pmdp) & (H_PAGE_HASHPTE | _PAGE_PTE)) == (H_PAGE_HASHPTE | _PAGE_PTE));
+ }
+ *pmdp = __pmd(0);
+}
+
+static inline int pmd_none(pmd_t pmd)
+{
+ return !pmd_raw(pmd);
+}
+
+static inline int pmd_present(pmd_t pmd)
+{
+ /*
+ * A pmd is considerent present if _PAGE_PRESENT is set.
+ * We also need to consider the pmd present which is marked
+ * invalid during a split. Hence we look for _PAGE_INVALID
+ * if we find _PAGE_PRESENT cleared.
+ */
+ if (pmd_raw(pmd) & cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID))
+ return true;
+
+ return false;
+}
+
+static inline int pmd_is_serializing(pmd_t pmd)
+{
+ /*
+ * If the pmd is undergoing a split, the _PAGE_PRESENT bit is clear
+ * and _PAGE_INVALID is set (see pmd_present, pmdp_invalidate).
+ *
+ * This condition may also occur when flushing a pmd while flushing
+ * it (see ptep_modify_prot_start), so callers must ensure this
+ * case is fine as well.
+ */
+ if ((pmd_raw(pmd) & cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID)) ==
+ cpu_to_be64(_PAGE_INVALID))
+ return true;
+
+ return false;
+}
+
+static inline int pmd_bad(pmd_t pmd)
+{
+ if (radix_enabled())
+ return radix__pmd_bad(pmd);
+ return hash__pmd_bad(pmd);
+}
+
+static inline void pud_clear(pud_t *pudp)
+{
+ if (IS_ENABLED(CONFIG_DEBUG_VM) && !radix_enabled()) {
+ /*
+ * Don't use this if we can possibly have a hash page table
+ * entry mapping this.
+ */
+ WARN_ON((pud_val(*pudp) & (H_PAGE_HASHPTE | _PAGE_PTE)) == (H_PAGE_HASHPTE | _PAGE_PTE));
+ }
+ *pudp = __pud(0);
+}
+
+static inline int pud_none(pud_t pud)
+{
+ return !pud_raw(pud);
+}
+
+static inline int pud_present(pud_t pud)
+{
+ return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PRESENT));
+}
+
+extern struct page *pud_page(pud_t pud);
+extern struct page *pmd_page(pmd_t pmd);
+static inline pte_t pud_pte(pud_t pud)
+{
+ return __pte_raw(pud_raw(pud));
+}
+
+static inline pud_t pte_pud(pte_t pte)
+{
+ return __pud_raw(pte_raw(pte));
+}
+
+static inline pte_t *pudp_ptep(pud_t *pud)
+{
+ return (pte_t *)pud;
+}
+
+#define pud_pfn(pud) pte_pfn(pud_pte(pud))
+#define pud_dirty(pud) pte_dirty(pud_pte(pud))
+#define pud_young(pud) pte_young(pud_pte(pud))
+#define pud_mkold(pud) pte_pud(pte_mkold(pud_pte(pud)))
+#define pud_wrprotect(pud) pte_pud(pte_wrprotect(pud_pte(pud)))
+#define pud_mkdirty(pud) pte_pud(pte_mkdirty(pud_pte(pud)))
+#define pud_mkclean(pud) pte_pud(pte_mkclean(pud_pte(pud)))
+#define pud_mkyoung(pud) pte_pud(pte_mkyoung(pud_pte(pud)))
+#define pud_mkwrite(pud) pte_pud(pte_mkwrite_novma(pud_pte(pud)))
+#define pud_write(pud) pte_write(pud_pte(pud))
+
+#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
+#define pud_soft_dirty(pmd) pte_soft_dirty(pud_pte(pud))
+#define pud_mksoft_dirty(pmd) pte_pud(pte_mksoft_dirty(pud_pte(pud)))
+#define pud_clear_soft_dirty(pmd) pte_pud(pte_clear_soft_dirty(pud_pte(pud)))
+#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
+
+static inline int pud_bad(pud_t pud)
+{
+ if (radix_enabled())
+ return radix__pud_bad(pud);
+ return hash__pud_bad(pud);
+}
+
+#define pud_access_permitted pud_access_permitted
+static inline bool pud_access_permitted(pud_t pud, bool write)
+{
+ return pte_access_permitted(pud_pte(pud), write);
+}
+
+#define __p4d_raw(x) ((p4d_t) { __pgd_raw(x) })
+static inline __be64 p4d_raw(p4d_t x)
+{
+ return pgd_raw(x.pgd);
+}
+
+#define p4d_write(p4d) pte_write(p4d_pte(p4d))
+
+static inline void p4d_clear(p4d_t *p4dp)
+{
+ *p4dp = __p4d(0);
+}
+
+static inline int p4d_none(p4d_t p4d)
+{
+ return !p4d_raw(p4d);
+}
+
+static inline int p4d_present(p4d_t p4d)
+{
+ return !!(p4d_raw(p4d) & cpu_to_be64(_PAGE_PRESENT));
+}
+
+static inline pte_t p4d_pte(p4d_t p4d)
+{
+ return __pte_raw(p4d_raw(p4d));
+}
+
+static inline p4d_t pte_p4d(pte_t pte)
+{
+ return __p4d_raw(pte_raw(pte));
+}
+
+static inline int p4d_bad(p4d_t p4d)
+{
+ if (radix_enabled())
+ return radix__p4d_bad(p4d);
+ return hash__p4d_bad(p4d);
+}
+
+#define p4d_access_permitted p4d_access_permitted
+static inline bool p4d_access_permitted(p4d_t p4d, bool write)
+{
+ return pte_access_permitted(p4d_pte(p4d), write);
+}
+
+extern struct page *p4d_page(p4d_t p4d);
+
+/* Pointers in the page table tree are physical addresses */
+#define __pgtable_ptr_val(ptr) __pa(ptr)
+
+static inline pud_t *p4d_pgtable(p4d_t p4d)
+{
+ return (pud_t *)__va(p4d_val(p4d) & ~P4D_MASKED_BITS);
+}
+
+static inline pmd_t *pud_pgtable(pud_t pud)
+{
+ return (pmd_t *)__va(pud_val(pud) & ~PUD_MASKED_BITS);
+}
+
+#define pmd_ERROR(e) \
+ pr_err("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
+#define pud_ERROR(e) \
+ pr_err("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e))
+#define pgd_ERROR(e) \
+ pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
+
+static inline int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot)
+{
+ if (radix_enabled()) {
+#if defined(CONFIG_PPC_RADIX_MMU) && defined(DEBUG_VM)
+ unsigned long page_size = 1 << mmu_psize_defs[mmu_io_psize].shift;
+ WARN((page_size != PAGE_SIZE), "I/O page size != PAGE_SIZE");
+#endif
+ return radix__map_kernel_page(ea, pa, prot, PAGE_SIZE);
+ }
+ return hash__map_kernel_page(ea, pa, prot);
+}
+
+void unmap_kernel_page(unsigned long va);
+
+static inline int __meminit vmemmap_create_mapping(unsigned long start,
+ unsigned long page_size,
+ unsigned long phys)
+{
+ if (radix_enabled())
+ return radix__vmemmap_create_mapping(start, page_size, phys);
+ return hash__vmemmap_create_mapping(start, page_size, phys);
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+static inline void vmemmap_remove_mapping(unsigned long start,
+ unsigned long page_size)
+{
+ if (radix_enabled())
+ return radix__vmemmap_remove_mapping(start, page_size);
+ return hash__vmemmap_remove_mapping(start, page_size);
+}
+#endif
+
+static inline pte_t pmd_pte(pmd_t pmd)
+{
+ return __pte_raw(pmd_raw(pmd));
+}
+
+static inline pmd_t pte_pmd(pte_t pte)
+{
+ return __pmd_raw(pte_raw(pte));
+}
+
+static inline pte_t *pmdp_ptep(pmd_t *pmd)
+{
+ return (pte_t *)pmd;
+}
+#define pmd_pfn(pmd) pte_pfn(pmd_pte(pmd))
+#define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd))
+#define pmd_young(pmd) pte_young(pmd_pte(pmd))
+#define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd)))
+#define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd)))
+#define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd)))
+#define pmd_mkclean(pmd) pte_pmd(pte_mkclean(pmd_pte(pmd)))
+#define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd)))
+#define pmd_mkwrite_novma(pmd) pte_pmd(pte_mkwrite_novma(pmd_pte(pmd)))
+
+#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
+#define pmd_soft_dirty(pmd) pte_soft_dirty(pmd_pte(pmd))
+#define pmd_mksoft_dirty(pmd) pte_pmd(pte_mksoft_dirty(pmd_pte(pmd)))
+#define pmd_clear_soft_dirty(pmd) pte_pmd(pte_clear_soft_dirty(pmd_pte(pmd)))
+
+#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+#define pmd_swp_mksoft_dirty(pmd) pte_pmd(pte_swp_mksoft_dirty(pmd_pte(pmd)))
+#define pmd_swp_soft_dirty(pmd) pte_swp_soft_dirty(pmd_pte(pmd))
+#define pmd_swp_clear_soft_dirty(pmd) pte_pmd(pte_swp_clear_soft_dirty(pmd_pte(pmd)))
+#endif
+#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
+
+#ifdef CONFIG_NUMA_BALANCING
+static inline int pmd_protnone(pmd_t pmd)
+{
+ return pte_protnone(pmd_pte(pmd));
+}
+#endif /* CONFIG_NUMA_BALANCING */
+
+#define pmd_write(pmd) pte_write(pmd_pte(pmd))
+
+#define pmd_access_permitted pmd_access_permitted
+static inline bool pmd_access_permitted(pmd_t pmd, bool write)
+{
+ /*
+ * pmdp_invalidate sets this combination (which is not caught by
+ * !pte_present() check in pte_access_permitted), to prevent
+ * lock-free lookups, as part of the serialize_against_pte_lookup()
+ * synchronisation.
+ *
+ * This also catches the case where the PTE's hardware PRESENT bit is
+ * cleared while TLB is flushed, which is suboptimal but should not
+ * be frequent.
+ */
+ if (pmd_is_serializing(pmd))
+ return false;
+
+ return pte_access_permitted(pmd_pte(pmd), write);
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot);
+extern pud_t pfn_pud(unsigned long pfn, pgprot_t pgprot);
+extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot);
+extern pud_t pud_modify(pud_t pud, pgprot_t newprot);
+extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t pmd);
+extern void set_pud_at(struct mm_struct *mm, unsigned long addr,
+ pud_t *pudp, pud_t pud);
+
+static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
+ unsigned long addr, pmd_t *pmd)
+{
+}
+
+static inline void update_mmu_cache_pud(struct vm_area_struct *vma,
+ unsigned long addr, pud_t *pud)
+{
+}
+
+extern int hash__has_transparent_hugepage(void);
+static inline int has_transparent_hugepage(void)
+{
+ if (radix_enabled())
+ return radix__has_transparent_hugepage();
+ return hash__has_transparent_hugepage();
+}
+#define has_transparent_hugepage has_transparent_hugepage
+
+static inline int has_transparent_pud_hugepage(void)
+{
+ if (radix_enabled())
+ return radix__has_transparent_pud_hugepage();
+ return 0;
+}
+#define has_transparent_pud_hugepage has_transparent_pud_hugepage
+
+static inline unsigned long
+pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp,
+ unsigned long clr, unsigned long set)
+{
+ if (radix_enabled())
+ return radix__pmd_hugepage_update(mm, addr, pmdp, clr, set);
+ return hash__pmd_hugepage_update(mm, addr, pmdp, clr, set);
+}
+
+static inline unsigned long
+pud_hugepage_update(struct mm_struct *mm, unsigned long addr, pud_t *pudp,
+ unsigned long clr, unsigned long set)
+{
+ if (radix_enabled())
+ return radix__pud_hugepage_update(mm, addr, pudp, clr, set);
+ BUG();
+ return pud_val(*pudp);
+}
+
+/*
+ * For radix we should always find H_PAGE_HASHPTE zero. Hence
+ * the below will work for radix too
+ */
+static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp)
+{
+ unsigned long old;
+
+ if ((pmd_raw(*pmdp) & cpu_to_be64(_PAGE_ACCESSED | H_PAGE_HASHPTE)) == 0)
+ return 0;
+ old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED, 0);
+ return ((old & _PAGE_ACCESSED) != 0);
+}
+
+static inline int __pudp_test_and_clear_young(struct mm_struct *mm,
+ unsigned long addr, pud_t *pudp)
+{
+ unsigned long old;
+
+ if ((pud_raw(*pudp) & cpu_to_be64(_PAGE_ACCESSED | H_PAGE_HASHPTE)) == 0)
+ return 0;
+ old = pud_hugepage_update(mm, addr, pudp, _PAGE_ACCESSED, 0);
+ return ((old & _PAGE_ACCESSED) != 0);
+}
+
+#define __HAVE_ARCH_PMDP_SET_WRPROTECT
+static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp)
+{
+ if (pmd_write(*pmdp))
+ pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0);
+}
+
+#define __HAVE_ARCH_PUDP_SET_WRPROTECT
+static inline void pudp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
+ pud_t *pudp)
+{
+ if (pud_write(*pudp))
+ pud_hugepage_update(mm, addr, pudp, _PAGE_WRITE, 0);
+}
+
+/*
+ * Only returns true for a THP. False for pmd migration entry.
+ * We also need to return true when we come across a pte that
+ * in between a thp split. While splitting THP, we mark the pmd
+ * invalid (pmdp_invalidate()) before we set it with pte page
+ * address. A pmd_trans_huge() check against a pmd entry during that time
+ * should return true.
+ * We should not call this on a hugetlb entry. We should check for HugeTLB
+ * entry using vma->vm_flags
+ * The page table walk rule is explained in Documentation/mm/transhuge.rst
+ */
+static inline int pmd_trans_huge(pmd_t pmd)
+{
+ if (!pmd_present(pmd))
+ return false;
+
+ if (radix_enabled())
+ return radix__pmd_trans_huge(pmd);
+ return hash__pmd_trans_huge(pmd);
+}
+
+static inline int pud_trans_huge(pud_t pud)
+{
+ if (!pud_present(pud))
+ return false;
+
+ if (radix_enabled())
+ return radix__pud_trans_huge(pud);
+ return 0;
+}
+
+
+#define __HAVE_ARCH_PMD_SAME
+static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
+{
+ if (radix_enabled())
+ return radix__pmd_same(pmd_a, pmd_b);
+ return hash__pmd_same(pmd_a, pmd_b);
+}
+
+#define pud_same pud_same
+static inline int pud_same(pud_t pud_a, pud_t pud_b)
+{
+ if (radix_enabled())
+ return radix__pud_same(pud_a, pud_b);
+ return hash__pud_same(pud_a, pud_b);
+}
+
+
+static inline pmd_t __pmd_mkhuge(pmd_t pmd)
+{
+ if (radix_enabled())
+ return radix__pmd_mkhuge(pmd);
+ return hash__pmd_mkhuge(pmd);
+}
+
+static inline pud_t __pud_mkhuge(pud_t pud)
+{
+ if (radix_enabled())
+ return radix__pud_mkhuge(pud);
+ BUG();
+ return pud;
+}
+
+/*
+ * pfn_pmd return a pmd_t that can be used as pmd pte entry.
+ */
+static inline pmd_t pmd_mkhuge(pmd_t pmd)
+{
+#ifdef CONFIG_DEBUG_VM
+ if (radix_enabled())
+ WARN_ON((pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE)) == 0);
+ else
+ WARN_ON((pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE | H_PAGE_THP_HUGE)) !=
+ cpu_to_be64(_PAGE_PTE | H_PAGE_THP_HUGE));
+#endif
+ return pmd;
+}
+
+static inline pud_t pud_mkhuge(pud_t pud)
+{
+#ifdef CONFIG_DEBUG_VM
+ if (radix_enabled())
+ WARN_ON((pud_raw(pud) & cpu_to_be64(_PAGE_PTE)) == 0);
+ else
+ WARN_ON(1);
+#endif
+ return pud;
+}
+
+
+#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
+extern int pmdp_set_access_flags(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp,
+ pmd_t entry, int dirty);
+#define __HAVE_ARCH_PUDP_SET_ACCESS_FLAGS
+extern int pudp_set_access_flags(struct vm_area_struct *vma,
+ unsigned long address, pud_t *pudp,
+ pud_t entry, int dirty);
+
+#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
+extern int pmdp_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp);
+#define __HAVE_ARCH_PUDP_TEST_AND_CLEAR_YOUNG
+extern int pudp_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long address, pud_t *pudp);
+
+
+#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
+static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp)
+{
+ if (radix_enabled())
+ return radix__pmdp_huge_get_and_clear(mm, addr, pmdp);
+ return hash__pmdp_huge_get_and_clear(mm, addr, pmdp);
+}
+
+#define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR
+static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pud_t *pudp)
+{
+ if (radix_enabled())
+ return radix__pudp_huge_get_and_clear(mm, addr, pudp);
+ BUG();
+ return *pudp;
+}
+
+static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp)
+{
+ if (radix_enabled())
+ return radix__pmdp_collapse_flush(vma, address, pmdp);
+ return hash__pmdp_collapse_flush(vma, address, pmdp);
+}
+#define pmdp_collapse_flush pmdp_collapse_flush
+
+#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
+pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma,
+ unsigned long addr,
+ pmd_t *pmdp, int full);
+
+#define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR_FULL
+pud_t pudp_huge_get_and_clear_full(struct vm_area_struct *vma,
+ unsigned long addr,
+ pud_t *pudp, int full);
+
+#define __HAVE_ARCH_PGTABLE_DEPOSIT
+static inline void pgtable_trans_huge_deposit(struct mm_struct *mm,
+ pmd_t *pmdp, pgtable_t pgtable)
+{
+ if (radix_enabled())
+ return radix__pgtable_trans_huge_deposit(mm, pmdp, pgtable);
+ return hash__pgtable_trans_huge_deposit(mm, pmdp, pgtable);
+}
+
+#define __HAVE_ARCH_PGTABLE_WITHDRAW
+static inline pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm,
+ pmd_t *pmdp)
+{
+ if (radix_enabled())
+ return radix__pgtable_trans_huge_withdraw(mm, pmdp);
+ return hash__pgtable_trans_huge_withdraw(mm, pmdp);
+}
+
+#define __HAVE_ARCH_PMDP_INVALIDATE
+extern pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
+ pmd_t *pmdp);
+extern pud_t pudp_invalidate(struct vm_area_struct *vma, unsigned long address,
+ pud_t *pudp);
+
+#define pmd_move_must_withdraw pmd_move_must_withdraw
+struct spinlock;
+extern int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
+ struct spinlock *old_pmd_ptl,
+ struct vm_area_struct *vma);
+/*
+ * Hash translation mode use the deposited table to store hash pte
+ * slot information.
+ */
+#define arch_needs_pgtable_deposit arch_needs_pgtable_deposit
+static inline bool arch_needs_pgtable_deposit(void)
+{
+ if (radix_enabled())
+ return false;
+ return true;
+}
+extern void serialize_against_pte_lookup(struct mm_struct *mm);
+
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
+pte_t ptep_modify_prot_start(struct vm_area_struct *, unsigned long, pte_t *);
+void ptep_modify_prot_commit(struct vm_area_struct *, unsigned long,
+ pte_t *, pte_t, pte_t);
+
+/*
+ * Returns true for a R -> RW upgrade of pte
+ */
+static inline bool is_pte_rw_upgrade(unsigned long old_val, unsigned long new_val)
+{
+ if (!(old_val & _PAGE_READ))
+ return false;
+
+ if ((!(old_val & _PAGE_WRITE)) && (new_val & _PAGE_WRITE))
+ return true;
+
+ return false;
+}
+
+#endif /* __ASSEMBLER__ */
+#endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */
diff --git a/arch/powerpc/include/asm/book3s/64/pkeys.h b/arch/powerpc/include/asm/book3s/64/pkeys.h
new file mode 100644
index 000000000000..ff911b4251d9
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/pkeys.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#ifndef _ASM_POWERPC_BOOK3S_64_PKEYS_H
+#define _ASM_POWERPC_BOOK3S_64_PKEYS_H
+
+#include <asm/book3s/64/hash-pkey.h>
+
+static inline u64 vmflag_to_pte_pkey_bits(vm_flags_t vm_flags)
+{
+ if (!mmu_has_feature(MMU_FTR_PKEY))
+ return 0x0UL;
+
+ if (radix_enabled())
+ BUG();
+ return hash__vmflag_to_pte_pkey_bits(vm_flags);
+}
+
+static inline u16 pte_to_pkey_bits(u64 pteflags)
+{
+ if (radix_enabled())
+ BUG();
+ return hash__pte_to_pkey_bits(pteflags);
+}
+
+#endif /*_ASM_POWERPC_KEYS_H */
diff --git a/arch/powerpc/include/asm/book3s/64/radix-4k.h b/arch/powerpc/include/asm/book3s/64/radix-4k.h
new file mode 100644
index 000000000000..035ceecd6d67
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/radix-4k.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_PGTABLE_RADIX_4K_H
+#define _ASM_POWERPC_PGTABLE_RADIX_4K_H
+
+/*
+ * For 4K page size supported index is 13/9/9/9
+ */
+#define RADIX_PTE_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 4K = 2MB
+#define RADIX_PMD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 2MB = 1GB
+#define RADIX_PUD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 1GB = 512GB
+#define RADIX_PGD_INDEX_SIZE 13 // size: 8B << 13 = 64KB, maps 2^13 x 512GB = 4PB
+
+/*
+ * One fragment per page
+ */
+#define RADIX_PTE_FRAG_SIZE_SHIFT (RADIX_PTE_INDEX_SIZE + 3)
+#define RADIX_PTE_FRAG_NR (PAGE_SIZE >> RADIX_PTE_FRAG_SIZE_SHIFT)
+
+#define RADIX_PMD_FRAG_SIZE_SHIFT (RADIX_PMD_INDEX_SIZE + 3)
+#define RADIX_PMD_FRAG_NR (PAGE_SIZE >> RADIX_PMD_FRAG_SIZE_SHIFT)
+
+#endif /* _ASM_POWERPC_PGTABLE_RADIX_4K_H */
diff --git a/arch/powerpc/include/asm/book3s/64/radix-64k.h b/arch/powerpc/include/asm/book3s/64/radix-64k.h
new file mode 100644
index 000000000000..54e33828b0fb
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/radix-64k.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_PGTABLE_RADIX_64K_H
+#define _ASM_POWERPC_PGTABLE_RADIX_64K_H
+
+/*
+ * For 64K page size supported index is 13/9/9/5
+ */
+#define RADIX_PTE_INDEX_SIZE 5 // size: 8B << 5 = 256B, maps 2^5 x 64K = 2MB
+#define RADIX_PMD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 2MB = 1GB
+#define RADIX_PUD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 1GB = 512GB
+#define RADIX_PGD_INDEX_SIZE 13 // size: 8B << 13 = 64KB, maps 2^13 x 512GB = 4PB
+
+/*
+ * We use a 256 byte PTE page fragment in radix
+ * 8 bytes per each PTE entry.
+ */
+#define RADIX_PTE_FRAG_SIZE_SHIFT (RADIX_PTE_INDEX_SIZE + 3)
+#define RADIX_PTE_FRAG_NR (PAGE_SIZE >> RADIX_PTE_FRAG_SIZE_SHIFT)
+
+#define RADIX_PMD_FRAG_SIZE_SHIFT (RADIX_PMD_INDEX_SIZE + 3)
+#define RADIX_PMD_FRAG_NR (PAGE_SIZE >> RADIX_PMD_FRAG_SIZE_SHIFT)
+
+#endif /* _ASM_POWERPC_PGTABLE_RADIX_64K_H */
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
new file mode 100644
index 000000000000..da954e779744
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -0,0 +1,366 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_PGTABLE_RADIX_H
+#define _ASM_POWERPC_PGTABLE_RADIX_H
+
+#include <asm/asm-const.h>
+
+#ifndef __ASSEMBLER__
+#include <asm/cmpxchg.h>
+#endif
+
+#ifdef CONFIG_PPC_64K_PAGES
+#include <asm/book3s/64/radix-64k.h>
+#else
+#include <asm/book3s/64/radix-4k.h>
+#endif
+
+#ifndef __ASSEMBLER__
+#include <asm/book3s/64/tlbflush-radix.h>
+#include <asm/cpu_has_feature.h>
+#endif
+
+/* An empty PTE can still have a R or C writeback */
+#define RADIX_PTE_NONE_MASK (_PAGE_DIRTY | _PAGE_ACCESSED)
+
+/* Bits to set in a RPMD/RPUD/RPGD */
+#define RADIX_PMD_VAL_BITS (0x8000000000000000UL | RADIX_PTE_INDEX_SIZE)
+#define RADIX_PUD_VAL_BITS (0x8000000000000000UL | RADIX_PMD_INDEX_SIZE)
+#define RADIX_PGD_VAL_BITS (0x8000000000000000UL | RADIX_PUD_INDEX_SIZE)
+
+/* Don't have anything in the reserved bits and leaf bits */
+#define RADIX_PMD_BAD_BITS 0x60000000000000e0UL
+#define RADIX_PUD_BAD_BITS 0x60000000000000e0UL
+#define RADIX_P4D_BAD_BITS 0x60000000000000e0UL
+
+#define RADIX_PMD_SHIFT (PAGE_SHIFT + RADIX_PTE_INDEX_SIZE)
+#define RADIX_PUD_SHIFT (RADIX_PMD_SHIFT + RADIX_PMD_INDEX_SIZE)
+#define RADIX_PGD_SHIFT (RADIX_PUD_SHIFT + RADIX_PUD_INDEX_SIZE)
+
+#define R_PTRS_PER_PTE (1 << RADIX_PTE_INDEX_SIZE)
+#define R_PTRS_PER_PMD (1 << RADIX_PMD_INDEX_SIZE)
+#define R_PTRS_PER_PUD (1 << RADIX_PUD_INDEX_SIZE)
+
+/*
+ * Size of EA range mapped by our pagetables.
+ */
+#define RADIX_PGTABLE_EADDR_SIZE (RADIX_PTE_INDEX_SIZE + RADIX_PMD_INDEX_SIZE + \
+ RADIX_PUD_INDEX_SIZE + RADIX_PGD_INDEX_SIZE + PAGE_SHIFT)
+#define RADIX_PGTABLE_RANGE (ASM_CONST(1) << RADIX_PGTABLE_EADDR_SIZE)
+
+/*
+ * We support 52 bit address space, Use top bit for kernel
+ * virtual mapping. Also make sure kernel fit in the top
+ * quadrant.
+ *
+ * +------------------+
+ * +------------------+ Kernel virtual map (0xc008000000000000)
+ * | |
+ * | |
+ * | |
+ * 0b11......+------------------+ Kernel linear map (0xc....)
+ * | |
+ * | 2 quadrant |
+ * | |
+ * 0b10......+------------------+
+ * | |
+ * | 1 quadrant |
+ * | |
+ * 0b01......+------------------+
+ * | |
+ * | 0 quadrant |
+ * | |
+ * 0b00......+------------------+
+ *
+ *
+ * 3rd quadrant expanded:
+ * +------------------------------+ Highest address (0xc010000000000000)
+ * +------------------------------+ KASAN shadow end (0xc00fc00000000000)
+ * | |
+ * | |
+ * +------------------------------+ Kernel vmemmap end/shadow start (0xc00e000000000000)
+ * | |
+ * | 512TB |
+ * | |
+ * +------------------------------+ Kernel IO map end/vmemap start
+ * | |
+ * | 512TB |
+ * | |
+ * +------------------------------+ Kernel vmap end/ IO map start
+ * | |
+ * | 512TB |
+ * | |
+ * +------------------------------+ Kernel virt start (0xc008000000000000)
+ * | |
+ * | |
+ * | |
+ * +------------------------------+ Kernel linear (0xc.....)
+ */
+
+/* For the sizes of the shadow area, see kasan.h */
+
+/*
+ * If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS
+ * if we increase SECTIONS_WIDTH we will not store node details in page->flags and
+ * page_to_nid does a page->section->node lookup
+ * Hence only increase for VMEMMAP. Further depending on SPARSEMEM_EXTREME reduce
+ * memory requirements with large number of sections.
+ * 51 bits is the max physical real address on POWER9
+ */
+
+#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME)
+#define R_MAX_PHYSMEM_BITS 51
+#else
+#define R_MAX_PHYSMEM_BITS 46
+#endif
+
+#define RADIX_KERN_VIRT_START ASM_CONST(0xc008000000000000)
+/*
+ * 49 = MAX_EA_BITS_PER_CONTEXT (hash specific). To make sure we pick
+ * the same value as hash.
+ */
+#define RADIX_KERN_MAP_SIZE (1UL << 49)
+
+#define RADIX_VMALLOC_START RADIX_KERN_VIRT_START
+#define RADIX_VMALLOC_SIZE RADIX_KERN_MAP_SIZE
+#define RADIX_VMALLOC_END (RADIX_VMALLOC_START + RADIX_VMALLOC_SIZE)
+
+#define RADIX_KERN_IO_START RADIX_VMALLOC_END
+#define RADIX_KERN_IO_SIZE RADIX_KERN_MAP_SIZE
+#define RADIX_KERN_IO_END (RADIX_KERN_IO_START + RADIX_KERN_IO_SIZE)
+
+#define RADIX_VMEMMAP_START RADIX_KERN_IO_END
+#define RADIX_VMEMMAP_SIZE RADIX_KERN_MAP_SIZE
+#define RADIX_VMEMMAP_END (RADIX_VMEMMAP_START + RADIX_VMEMMAP_SIZE)
+
+#ifndef __ASSEMBLER__
+#define RADIX_PTE_TABLE_SIZE (sizeof(pte_t) << RADIX_PTE_INDEX_SIZE)
+#define RADIX_PMD_TABLE_SIZE (sizeof(pmd_t) << RADIX_PMD_INDEX_SIZE)
+#define RADIX_PUD_TABLE_SIZE (sizeof(pud_t) << RADIX_PUD_INDEX_SIZE)
+#define RADIX_PGD_TABLE_SIZE (sizeof(pgd_t) << RADIX_PGD_INDEX_SIZE)
+
+#ifdef CONFIG_STRICT_KERNEL_RWX
+extern void radix__mark_rodata_ro(void);
+extern void radix__mark_initmem_nx(void);
+#endif
+
+extern void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
+ pte_t entry, unsigned long address,
+ int psize);
+
+extern void radix__ptep_modify_prot_commit(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t old_pte, pte_t pte);
+
+static inline unsigned long __radix_pte_update(pte_t *ptep, unsigned long clr,
+ unsigned long set)
+{
+ __be64 old_be, tmp_be;
+
+ __asm__ __volatile__(
+ "1: ldarx %0,0,%3 # pte_update\n"
+ " andc %1,%0,%5 \n"
+ " or %1,%1,%4 \n"
+ " stdcx. %1,0,%3 \n"
+ " bne- 1b"
+ : "=&r" (old_be), "=&r" (tmp_be), "=m" (*ptep)
+ : "r" (ptep), "r" (cpu_to_be64(set)), "r" (cpu_to_be64(clr))
+ : "cc" );
+
+ return be64_to_cpu(old_be);
+}
+
+static inline unsigned long radix__pte_update(struct mm_struct *mm,
+ unsigned long addr,
+ pte_t *ptep, unsigned long clr,
+ unsigned long set,
+ int huge)
+{
+ unsigned long old_pte;
+
+ old_pte = __radix_pte_update(ptep, clr, set);
+ if (!huge)
+ assert_pte_locked(mm, addr);
+
+ return old_pte;
+}
+
+static inline pte_t radix__ptep_get_and_clear_full(struct mm_struct *mm,
+ unsigned long addr,
+ pte_t *ptep, int full)
+{
+ unsigned long old_pte;
+
+ if (full) {
+ old_pte = pte_val(*ptep);
+ *ptep = __pte(0);
+ } else
+ old_pte = radix__pte_update(mm, addr, ptep, ~0ul, 0, 0);
+
+ return __pte(old_pte);
+}
+
+static inline int radix__pte_same(pte_t pte_a, pte_t pte_b)
+{
+ return ((pte_raw(pte_a) ^ pte_raw(pte_b)) == 0);
+}
+
+static inline int radix__pte_none(pte_t pte)
+{
+ return (pte_val(pte) & ~RADIX_PTE_NONE_MASK) == 0;
+}
+
+static inline void radix__set_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte, int percpu)
+{
+ *ptep = pte;
+
+ /*
+ * The architecture suggests a ptesync after setting the pte, which
+ * orders the store that updates the pte with subsequent page table
+ * walk accesses which may load the pte. Without this it may be
+ * possible for a subsequent access to result in spurious fault.
+ *
+ * This is not necessary for correctness, because a spurious fault
+ * is tolerated by the page fault handler, and this store will
+ * eventually be seen. In testing, there was no noticable increase
+ * in user faults on POWER9. Avoiding ptesync here is a significant
+ * win for things like fork. If a future microarchitecture benefits
+ * from ptesync, it should probably go into update_mmu_cache, rather
+ * than set_pte_at (which is used to set ptes unrelated to faults).
+ *
+ * Spurious faults from the kernel memory are not tolerated, so there
+ * is a ptesync in flush_cache_vmap, and __map_kernel_page() follows
+ * the pte update sequence from ISA Book III 6.10 Translation Table
+ * Update Synchronization Requirements.
+ */
+}
+
+static inline int radix__pmd_bad(pmd_t pmd)
+{
+ return !!(pmd_val(pmd) & RADIX_PMD_BAD_BITS);
+}
+
+static inline int radix__pmd_same(pmd_t pmd_a, pmd_t pmd_b)
+{
+ return ((pmd_raw(pmd_a) ^ pmd_raw(pmd_b)) == 0);
+}
+
+static inline int radix__pud_bad(pud_t pud)
+{
+ return !!(pud_val(pud) & RADIX_PUD_BAD_BITS);
+}
+
+static inline int radix__pud_same(pud_t pud_a, pud_t pud_b)
+{
+ return ((pud_raw(pud_a) ^ pud_raw(pud_b)) == 0);
+}
+
+static inline int radix__p4d_bad(p4d_t p4d)
+{
+ return !!(p4d_val(p4d) & RADIX_P4D_BAD_BITS);
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+
+static inline int radix__pmd_trans_huge(pmd_t pmd)
+{
+ return (pmd_val(pmd) & _PAGE_PTE) == _PAGE_PTE;
+}
+
+static inline pmd_t radix__pmd_mkhuge(pmd_t pmd)
+{
+ return __pmd(pmd_val(pmd) | _PAGE_PTE);
+}
+
+static inline int radix__pud_trans_huge(pud_t pud)
+{
+ return (pud_val(pud) & _PAGE_PTE) == _PAGE_PTE;
+}
+
+static inline pud_t radix__pud_mkhuge(pud_t pud)
+{
+ return __pud(pud_val(pud) | _PAGE_PTE);
+}
+
+extern unsigned long radix__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, unsigned long clr,
+ unsigned long set);
+extern unsigned long radix__pud_hugepage_update(struct mm_struct *mm, unsigned long addr,
+ pud_t *pudp, unsigned long clr,
+ unsigned long set);
+extern pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp);
+extern void radix__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+ pgtable_t pgtable);
+extern pgtable_t radix__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
+extern pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp);
+pud_t radix__pudp_huge_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pud_t *pudp);
+
+static inline int radix__has_transparent_hugepage(void)
+{
+ /* For radix 2M at PMD level means thp */
+ if (mmu_psize_defs[MMU_PAGE_2M].shift == PMD_SHIFT)
+ return 1;
+ return 0;
+}
+
+static inline int radix__has_transparent_pud_hugepage(void)
+{
+ /* For radix 1G at PUD level means pud hugepage support */
+ if (mmu_psize_defs[MMU_PAGE_1G].shift == PUD_SHIFT)
+ return 1;
+ return 0;
+}
+#endif
+
+struct vmem_altmap;
+struct dev_pagemap;
+extern int __meminit radix__vmemmap_create_mapping(unsigned long start,
+ unsigned long page_size,
+ unsigned long phys);
+int __meminit radix__vmemmap_populate(unsigned long start, unsigned long end,
+ int node, struct vmem_altmap *altmap);
+void __ref radix__vmemmap_free(unsigned long start, unsigned long end,
+ struct vmem_altmap *altmap);
+extern void radix__vmemmap_remove_mapping(unsigned long start,
+ unsigned long page_size);
+
+extern int radix__map_kernel_page(unsigned long ea, unsigned long pa,
+ pgprot_t flags, unsigned int psz);
+
+static inline unsigned long radix__get_tree_size(void)
+{
+ unsigned long rts_field;
+ /*
+ * We support 52 bits, hence:
+ * bits 52 - 31 = 21, 0b10101
+ * RTS encoding details
+ * bits 0 - 3 of rts -> bits 6 - 8 unsigned long
+ * bits 4 - 5 of rts -> bits 62 - 63 of unsigned long
+ */
+ rts_field = (0x5UL << 5); /* 6 - 8 bits */
+ rts_field |= (0x2UL << 61);
+
+ return rts_field;
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+int radix__create_section_mapping(unsigned long start, unsigned long end,
+ int nid, pgprot_t prot);
+int radix__remove_section_mapping(unsigned long start, unsigned long end);
+#endif /* CONFIG_MEMORY_HOTPLUG */
+
+#ifdef CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP
+#define vmemmap_can_optimize vmemmap_can_optimize
+bool vmemmap_can_optimize(struct vmem_altmap *altmap, struct dev_pagemap *pgmap);
+#endif
+
+#define vmemmap_populate_compound_pages vmemmap_populate_compound_pages
+int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn,
+ unsigned long start,
+ unsigned long end, int node,
+ struct dev_pagemap *pgmap);
+#endif /* __ASSEMBLER__ */
+#endif
diff --git a/arch/powerpc/include/asm/book3s/64/slice.h b/arch/powerpc/include/asm/book3s/64/slice.h
new file mode 100644
index 000000000000..6e2f7a74cd75
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/slice.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_64_SLICE_H
+#define _ASM_POWERPC_BOOK3S_64_SLICE_H
+
+#ifndef __ASSEMBLER__
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+#ifdef CONFIG_HUGETLB_PAGE
+#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+#endif
+#define HAVE_ARCH_UNMAPPED_AREA
+#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
+#endif
+
+#define SLICE_LOW_SHIFT 28
+#define SLICE_LOW_TOP (0x100000000ul)
+#define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT)
+#define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT)
+
+#define SLICE_HIGH_SHIFT 40
+#define SLICE_NUM_HIGH (H_PGTABLE_RANGE >> SLICE_HIGH_SHIFT)
+#define GET_HIGH_SLICE_INDEX(addr) ((addr) >> SLICE_HIGH_SHIFT)
+
+#define SLB_ADDR_LIMIT_DEFAULT DEFAULT_MAP_WINDOW_USER64
+
+struct mm_struct;
+
+unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
+ unsigned long flags, unsigned int psize,
+ int topdown);
+
+unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr);
+
+void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
+ unsigned long len, unsigned int psize);
+
+void slice_init_new_context_exec(struct mm_struct *mm);
+void slice_setup_new_exec(void);
+
+#endif /* __ASSEMBLER__ */
+
+#endif /* _ASM_POWERPC_BOOK3S_64_SLICE_H */
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
new file mode 100644
index 000000000000..146287d9580f
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_64_TLBFLUSH_HASH_H
+#define _ASM_POWERPC_BOOK3S_64_TLBFLUSH_HASH_H
+
+/*
+ * TLB flushing for 64-bit hash-MMU CPUs
+ */
+
+#include <linux/percpu.h>
+#include <asm/page.h>
+
+#define PPC64_TLB_BATCH_NR 192
+
+struct ppc64_tlb_batch {
+ int active;
+ unsigned long index;
+ struct mm_struct *mm;
+ real_pte_t pte[PPC64_TLB_BATCH_NR];
+ unsigned long vpn[PPC64_TLB_BATCH_NR];
+ unsigned int psize;
+ int ssize;
+};
+DECLARE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
+
+extern void __flush_tlb_pending(struct ppc64_tlb_batch *batch);
+
+#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
+
+static inline void arch_enter_lazy_mmu_mode(void)
+{
+ struct ppc64_tlb_batch *batch;
+
+ if (radix_enabled())
+ return;
+ /*
+ * apply_to_page_range can call us this preempt enabled when
+ * operating on kernel page tables.
+ */
+ preempt_disable();
+ batch = this_cpu_ptr(&ppc64_tlb_batch);
+ batch->active = 1;
+}
+
+static inline void arch_leave_lazy_mmu_mode(void)
+{
+ struct ppc64_tlb_batch *batch;
+
+ if (radix_enabled())
+ return;
+ batch = this_cpu_ptr(&ppc64_tlb_batch);
+
+ if (batch->index)
+ __flush_tlb_pending(batch);
+ batch->active = 0;
+ preempt_enable();
+}
+
+#define arch_flush_lazy_mmu_mode() do {} while (0)
+
+extern void hash__tlbiel_all(unsigned int action);
+
+extern void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize,
+ int ssize, unsigned long flags);
+extern void flush_hash_range(unsigned long number, int local);
+extern void flush_hash_hugepage(unsigned long vsid, unsigned long addr,
+ pmd_t *pmdp, unsigned int psize, int ssize,
+ unsigned long flags);
+
+struct mmu_gather;
+extern void hash__tlb_flush(struct mmu_gather *tlb);
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+/* Private function for use by PCI IO mapping code */
+extern void __flush_hash_table_range(unsigned long start, unsigned long end);
+void flush_hash_table_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr);
+#else
+static inline void __flush_hash_table_range(unsigned long start, unsigned long end) { }
+#endif
+#endif /* _ASM_POWERPC_BOOK3S_64_TLBFLUSH_HASH_H */
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
new file mode 100644
index 000000000000..a38542259fab
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_TLBFLUSH_RADIX_H
+#define _ASM_POWERPC_TLBFLUSH_RADIX_H
+
+#include <asm/hvcall.h>
+
+#define RIC_FLUSH_TLB 0
+#define RIC_FLUSH_PWC 1
+#define RIC_FLUSH_ALL 2
+
+struct vm_area_struct;
+struct mm_struct;
+struct mmu_gather;
+
+static inline u64 psize_to_rpti_pgsize(unsigned long psize)
+{
+ if (psize == MMU_PAGE_4K)
+ return H_RPTI_PAGE_4K;
+ if (psize == MMU_PAGE_64K)
+ return H_RPTI_PAGE_64K;
+ if (psize == MMU_PAGE_2M)
+ return H_RPTI_PAGE_2M;
+ if (psize == MMU_PAGE_1G)
+ return H_RPTI_PAGE_1G;
+ return H_RPTI_PAGE_ALL;
+}
+
+static inline int mmu_get_ap(int psize)
+{
+ return mmu_psize_defs[psize].ap;
+}
+
+#ifdef CONFIG_PPC_RADIX_MMU
+extern void radix__tlbiel_all(unsigned int action);
+extern void radix__flush_tlb_lpid_page(unsigned int lpid,
+ unsigned long addr,
+ unsigned long page_size);
+extern void radix__flush_pwc_lpid(unsigned int lpid);
+extern void radix__flush_all_lpid(unsigned int lpid);
+extern void radix__flush_all_lpid_guest(unsigned int lpid);
+#else
+static inline void radix__tlbiel_all(unsigned int action) { WARN_ON(1); }
+static inline void radix__flush_tlb_lpid_page(unsigned int lpid,
+ unsigned long addr,
+ unsigned long page_size)
+{
+ WARN_ON(1);
+}
+static inline void radix__flush_pwc_lpid(unsigned int lpid)
+{
+ WARN_ON(1);
+}
+static inline void radix__flush_all_lpid(unsigned int lpid)
+{
+ WARN_ON(1);
+}
+static inline void radix__flush_all_lpid_guest(unsigned int lpid)
+{
+ WARN_ON(1);
+}
+#endif
+
+extern void radix__flush_hugetlb_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end);
+extern void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
+ unsigned long end, int psize);
+void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
+ unsigned long end, int psize);
+extern void radix__flush_pmd_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end);
+extern void radix__flush_pud_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end);
+extern void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end);
+extern void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end);
+
+extern void radix__local_flush_tlb_mm(struct mm_struct *mm);
+extern void radix__local_flush_all_mm(struct mm_struct *mm);
+extern void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
+extern void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
+ int psize);
+extern void radix__tlb_flush(struct mmu_gather *tlb);
+#ifdef CONFIG_SMP
+extern void radix__flush_tlb_mm(struct mm_struct *mm);
+extern void radix__flush_all_mm(struct mm_struct *mm);
+extern void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
+extern void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
+ int psize);
+#else
+#define radix__flush_tlb_mm(mm) radix__local_flush_tlb_mm(mm)
+#define radix__flush_all_mm(mm) radix__local_flush_all_mm(mm)
+#define radix__flush_tlb_page(vma,addr) radix__local_flush_tlb_page(vma,addr)
+#define radix__flush_tlb_page_psize(mm,addr,p) radix__local_flush_tlb_page_psize(mm,addr,p)
+#endif
+extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr);
+extern void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr);
+extern void radix__flush_tlb_all(void);
+
+#endif
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h
new file mode 100644
index 000000000000..fd642b729775
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h
@@ -0,0 +1,225 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_64_TLBFLUSH_H
+#define _ASM_POWERPC_BOOK3S_64_TLBFLUSH_H
+
+#define MMU_NO_CONTEXT ~0UL
+
+#include <linux/mm_types.h>
+#include <linux/mmu_notifier.h>
+#include <asm/book3s/64/tlbflush-hash.h>
+#include <asm/book3s/64/tlbflush-radix.h>
+
+/* TLB flush actions. Used as argument to tlbiel_all() */
+enum {
+ TLB_INVAL_SCOPE_GLOBAL = 0, /* invalidate all TLBs */
+ TLB_INVAL_SCOPE_LPID = 1, /* invalidate TLBs for current LPID */
+};
+
+static inline void tlbiel_all(void)
+{
+ /*
+ * This is used for host machine check and bootup.
+ *
+ * This uses early_radix_enabled and implementations use
+ * early_cpu_has_feature etc because that works early in boot
+ * and this is the machine check path which is not performance
+ * critical.
+ */
+ if (early_radix_enabled())
+ radix__tlbiel_all(TLB_INVAL_SCOPE_GLOBAL);
+ else
+ hash__tlbiel_all(TLB_INVAL_SCOPE_GLOBAL);
+}
+
+static inline void tlbiel_all_lpid(bool radix)
+{
+ /*
+ * This is used for guest machine check.
+ */
+ if (radix)
+ radix__tlbiel_all(TLB_INVAL_SCOPE_LPID);
+ else
+ hash__tlbiel_all(TLB_INVAL_SCOPE_LPID);
+}
+
+
+#define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
+static inline void flush_pmd_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+ if (radix_enabled())
+ radix__flush_pmd_tlb_range(vma, start, end);
+}
+
+#define __HAVE_ARCH_FLUSH_PUD_TLB_RANGE
+static inline void flush_pud_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+ if (radix_enabled())
+ radix__flush_pud_tlb_range(vma, start, end);
+}
+
+#define __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE
+static inline void flush_hugetlb_tlb_range(struct vm_area_struct *vma,
+ unsigned long start,
+ unsigned long end)
+{
+ if (radix_enabled())
+ radix__flush_hugetlb_tlb_range(vma, start, end);
+}
+
+static inline void flush_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+ if (radix_enabled())
+ radix__flush_tlb_range(vma, start, end);
+}
+
+static inline void flush_tlb_kernel_range(unsigned long start,
+ unsigned long end)
+{
+ if (radix_enabled())
+ radix__flush_tlb_kernel_range(start, end);
+}
+
+static inline void local_flush_tlb_mm(struct mm_struct *mm)
+{
+ if (radix_enabled())
+ radix__local_flush_tlb_mm(mm);
+}
+
+static inline void local_flush_tlb_page(struct vm_area_struct *vma,
+ unsigned long vmaddr)
+{
+ if (radix_enabled())
+ radix__local_flush_tlb_page(vma, vmaddr);
+}
+
+static inline void local_flush_tlb_page_psize(struct mm_struct *mm,
+ unsigned long vmaddr, int psize)
+{
+ if (radix_enabled())
+ radix__local_flush_tlb_page_psize(mm, vmaddr, psize);
+}
+
+static inline void tlb_flush(struct mmu_gather *tlb)
+{
+ if (radix_enabled())
+ radix__tlb_flush(tlb);
+ else
+ hash__tlb_flush(tlb);
+}
+
+#ifdef CONFIG_SMP
+static inline void flush_tlb_mm(struct mm_struct *mm)
+{
+ if (radix_enabled())
+ radix__flush_tlb_mm(mm);
+}
+
+static inline void flush_tlb_page(struct vm_area_struct *vma,
+ unsigned long vmaddr)
+{
+ if (radix_enabled())
+ radix__flush_tlb_page(vma, vmaddr);
+}
+#else
+#define flush_tlb_mm(mm) local_flush_tlb_mm(mm)
+#define flush_tlb_page(vma, addr) local_flush_tlb_page(vma, addr)
+#endif /* CONFIG_SMP */
+
+#define flush_tlb_fix_spurious_fault flush_tlb_fix_spurious_fault
+static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma,
+ unsigned long address,
+ pte_t *ptep)
+{
+ /*
+ * Book3S 64 does not require spurious fault flushes because the PTE
+ * must be re-fetched in case of an access permission problem. So the
+ * only reason for a spurious fault should be concurrent modification
+ * to the PTE, in which case the PTE will eventually be re-fetched by
+ * the MMU when it attempts the access again.
+ *
+ * See: Power ISA Version 3.1B, 6.10.1.2 Modifying a Translation Table
+ * Entry, Setting a Reference or Change Bit or Upgrading Access
+ * Authority (PTE Subject to Atomic Hardware Updates):
+ *
+ * "If the only change being made to a valid PTE that is subject to
+ * atomic hardware updates is to set the Reference or Change bit to
+ * 1 or to upgrade access authority, a simpler sequence suffices
+ * because the translation hardware will refetch the PTE if an
+ * access is attempted for which the only problems were reference
+ * and/or change bits needing to be set or insufficient access
+ * authority."
+ *
+ * The nest MMU in POWER9 does not perform this PTE re-fetch, but
+ * it avoids the spurious fault problem by flushing the TLB before
+ * upgrading PTE permissions, see radix__ptep_set_access_flags.
+ */
+}
+
+static inline bool __pte_flags_need_flush(unsigned long oldval,
+ unsigned long newval)
+{
+ unsigned long delta = oldval ^ newval;
+
+ /*
+ * The return value of this function doesn't matter for hash,
+ * ptep_modify_prot_start() does a pte_update() which does or schedules
+ * any necessary hash table update and flush.
+ */
+ if (!radix_enabled())
+ return true;
+
+ /*
+ * We do not expect kernel mappings or non-PTEs or not-present PTEs.
+ */
+ VM_WARN_ON_ONCE(oldval & _PAGE_PRIVILEGED);
+ VM_WARN_ON_ONCE(newval & _PAGE_PRIVILEGED);
+ VM_WARN_ON_ONCE(!(oldval & _PAGE_PTE));
+ VM_WARN_ON_ONCE(!(newval & _PAGE_PTE));
+ VM_WARN_ON_ONCE(!(oldval & _PAGE_PRESENT));
+ VM_WARN_ON_ONCE(!(newval & _PAGE_PRESENT));
+
+ /*
+ * Must flush on any change except READ, WRITE, EXEC, DIRTY, ACCESSED.
+ *
+ * In theory, some changed software bits could be tolerated, in
+ * practice those should rarely if ever matter.
+ */
+
+ if (delta & ~(_PAGE_RWX | _PAGE_DIRTY | _PAGE_ACCESSED))
+ return true;
+
+ /*
+ * If any of the above was present in old but cleared in new, flush.
+ * With the exception of _PAGE_ACCESSED, don't worry about flushing
+ * if that was cleared (see the comment in ptep_clear_flush_young()).
+ */
+ if ((delta & ~_PAGE_ACCESSED) & oldval)
+ return true;
+
+ return false;
+}
+
+static inline bool pte_needs_flush(pte_t oldpte, pte_t newpte)
+{
+ return __pte_flags_need_flush(pte_val(oldpte), pte_val(newpte));
+}
+#define pte_needs_flush pte_needs_flush
+
+static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd)
+{
+ return __pte_flags_need_flush(pmd_val(oldpmd), pmd_val(newpmd));
+}
+#define huge_pmd_needs_flush huge_pmd_needs_flush
+
+extern bool tlbie_capable;
+extern bool tlbie_enabled;
+
+static inline bool cputlb_use_tlbie(void)
+{
+ return tlbie_enabled;
+}
+
+#endif /* _ASM_POWERPC_BOOK3S_64_TLBFLUSH_H */
diff --git a/arch/powerpc/include/asm/book3s/pgalloc.h b/arch/powerpc/include/asm/book3s/pgalloc.h
new file mode 100644
index 000000000000..6b178ca143e7
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/pgalloc.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_PGALLOC_H
+#define _ASM_POWERPC_BOOK3S_PGALLOC_H
+
+#include <linux/mm.h>
+
+extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
+
+#ifdef CONFIG_PPC64
+#include <asm/book3s/64/pgalloc.h>
+#else
+#include <asm/book3s/32/pgalloc.h>
+#endif
+
+#endif /* _ASM_POWERPC_BOOK3S_PGALLOC_H */
diff --git a/arch/powerpc/include/asm/book3s/pgtable.h b/arch/powerpc/include/asm/book3s/pgtable.h
new file mode 100644
index 000000000000..f42d68c6b314
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/pgtable.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_PGTABLE_H
+#define _ASM_POWERPC_BOOK3S_PGTABLE_H
+
+#ifdef CONFIG_PPC64
+#include <asm/book3s/64/pgtable.h>
+#else
+#include <asm/book3s/32/pgtable.h>
+#endif
+
+#endif
diff --git a/arch/powerpc/include/asm/book3s/tlbflush.h b/arch/powerpc/include/asm/book3s/tlbflush.h
new file mode 100644
index 000000000000..dec11de41055
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/tlbflush.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_TLBFLUSH_H
+#define _ASM_POWERPC_BOOK3S_TLBFLUSH_H
+
+#ifdef CONFIG_PPC64
+#include <asm/book3s/64/tlbflush.h>
+#else
+#include <asm/book3s/32/tlbflush.h>
+#endif
+
+#endif /* _ASM_POWERPC_BOOK3S_TLBFLUSH_H */
diff --git a/arch/powerpc/include/asm/bootx.h b/arch/powerpc/include/asm/bootx.h
index dd9461003dfa..1c121f3c524f 100644
--- a/arch/powerpc/include/asm/bootx.h
+++ b/arch/powerpc/include/asm/bootx.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* This file describes the structure passed from the BootX application
* (for MacOS) when it is used to boot Linux.
diff --git a/arch/powerpc/include/asm/bpf_perf_event.h b/arch/powerpc/include/asm/bpf_perf_event.h
new file mode 100644
index 000000000000..e8a7b4ffb58c
--- /dev/null
+++ b/arch/powerpc/include/asm/bpf_perf_event.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BPF_PERF_EVENT_H
+#define _ASM_POWERPC_BPF_PERF_EVENT_H
+
+#include <asm/ptrace.h>
+
+typedef struct user_pt_regs bpf_user_pt_regs_t;
+
+#endif /* _ASM_POWERPC_BPF_PERF_EVENT_H */
diff --git a/arch/powerpc/include/asm/btext.h b/arch/powerpc/include/asm/btext.h
index 89fc382648bc..860f8868f11e 100644
--- a/arch/powerpc/include/asm/btext.h
+++ b/arch/powerpc/include/asm/btext.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Definitions for using the procedures in btext.c.
*
@@ -12,18 +13,22 @@ extern void btext_update_display(unsigned long phys, int width, int height,
int depth, int pitch);
extern void btext_setup_display(int width, int height, int depth, int pitch,
unsigned long address);
+#ifdef CONFIG_PPC32
extern void btext_prepare_BAT(void);
+#else
+static inline void btext_prepare_BAT(void) { }
+#endif
extern void btext_map(void);
extern void btext_unmap(void);
extern void btext_drawchar(char c);
extern void btext_drawstring(const char *str);
-extern void btext_drawhex(unsigned long v);
-extern void btext_drawtext(const char *c, unsigned int len);
+void __init btext_drawhex(unsigned long v);
+void __init btext_drawtext(const char *c, unsigned int len);
-extern void btext_clearscreen(void);
-extern void btext_flushscreen(void);
-extern void btext_flushline(void);
+void __init btext_clearscreen(void);
+void __init btext_flushscreen(void);
+void __init btext_flushline(void);
#endif /* __KERNEL__ */
#endif /* __PPC_BTEXT_H */
diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h
index 3eb53d741070..bbaa7e81f821 100644
--- a/arch/powerpc/include/asm/bug.h
+++ b/arch/powerpc/include/asm/bug.h
@@ -1,25 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_BUG_H
#define _ASM_POWERPC_BUG_H
#ifdef __KERNEL__
#include <asm/asm-compat.h>
-/*
- * Define an illegal instr to trap on the bug.
- * We don't use 0 because that marks the end of a function
- * in the ELF ABI. That's "Boo Boo" in case you wonder...
- */
-#define BUG_OPCODE .long 0x00b00b00 /* For asm */
-#define BUG_ILLEGAL_INSTR "0x00b00b00" /* For BUG macro */
-
#ifdef CONFIG_BUG
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#include <asm/asm-offsets.h>
#ifdef CONFIG_DEBUG_BUGVERBOSE
.macro EMIT_BUG_ENTRY addr,file,line,flags
- .section __bug_table,"a"
-5001: PPC_LONG \addr, 5002f
+ .section __bug_table,"aw"
+5001: .4byte \addr - .
+ .4byte 5002f - .
.short \line, \flags
.org 5001b+BUG_ENTRY_SIZE
.previous
@@ -29,33 +23,43 @@
.endm
#else
.macro EMIT_BUG_ENTRY addr,file,line,flags
- .section __bug_table,"a"
-5001: PPC_LONG \addr
+ .section __bug_table,"aw"
+5001: .4byte \addr - .
.short \flags
.org 5001b+BUG_ENTRY_SIZE
.previous
.endm
#endif /* verbose */
-#else /* !__ASSEMBLY__ */
+#else /* !__ASSEMBLER__ */
/* _EMIT_BUG_ENTRY expects args %0,%1,%2,%3 to be FILE, LINE, flags and
sizeof(struct bug_entry), respectively */
#ifdef CONFIG_DEBUG_BUGVERBOSE
#define _EMIT_BUG_ENTRY \
- ".section __bug_table,\"a\"\n" \
- "2:\t" PPC_LONG "1b, %0\n" \
- "\t.short %1, %2\n" \
+ ".section __bug_table,\"aw\"\n" \
+ "2: .4byte 1b - .\n" \
+ " .4byte %0 - .\n" \
+ " .short %1, %2\n" \
".org 2b+%3\n" \
".previous\n"
#else
#define _EMIT_BUG_ENTRY \
- ".section __bug_table,\"a\"\n" \
- "2:\t" PPC_LONG "1b\n" \
- "\t.short %2\n" \
+ ".section __bug_table,\"aw\"\n" \
+ "2: .4byte 1b - .\n" \
+ " .short %2\n" \
".org 2b+%3\n" \
".previous\n"
#endif
+#define BUG_ENTRY(insn, flags, ...) \
+ __asm__ __volatile__( \
+ "1: " insn "\n" \
+ _EMIT_BUG_ENTRY \
+ : : "i" (__FILE__), "i" (__LINE__), \
+ "i" (flags), \
+ "i" (sizeof(struct bug_entry)), \
+ ##__VA_ARGS__)
+
/*
* BUG_ON() and WARN_ON() do their best to cooperate with compile-time
* optimisations. However depending on the complexity of the condition
@@ -63,79 +67,68 @@
*/
#define BUG() do { \
- __asm__ __volatile__( \
- "1: twi 31,0,0\n" \
- _EMIT_BUG_ENTRY \
- : : "i" (__FILE__), "i" (__LINE__), \
- "i" (0), "i" (sizeof(struct bug_entry))); \
+ BUG_ENTRY("twi 31, 0, 0", 0); \
unreachable(); \
} while (0)
+#define HAVE_ARCH_BUG
+
+#define __WARN_FLAGS(flags) BUG_ENTRY("twi 31, 0, 0", BUGFLAG_WARNING | (flags))
+#ifdef CONFIG_PPC64
#define BUG_ON(x) do { \
if (__builtin_constant_p(x)) { \
if (x) \
BUG(); \
} else { \
- __asm__ __volatile__( \
- "1: "PPC_TLNEI" %4,0\n" \
- _EMIT_BUG_ENTRY \
- : : "i" (__FILE__), "i" (__LINE__), "i" (0), \
- "i" (sizeof(struct bug_entry)), \
- "r" ((__force long)(x))); \
+ BUG_ENTRY(PPC_TLNEI " %4, 0", 0, "r" ((__force long)(x))); \
} \
} while (0)
-#define __WARN_TAINT(taint) do { \
- __asm__ __volatile__( \
- "1: twi 31,0,0\n" \
- _EMIT_BUG_ENTRY \
- : : "i" (__FILE__), "i" (__LINE__), \
- "i" (BUGFLAG_TAINT(taint)), \
- "i" (sizeof(struct bug_entry))); \
-} while (0)
-
#define WARN_ON(x) ({ \
int __ret_warn_on = !!(x); \
if (__builtin_constant_p(__ret_warn_on)) { \
if (__ret_warn_on) \
__WARN(); \
} else { \
- __asm__ __volatile__( \
- "1: "PPC_TLNEI" %4,0\n" \
- _EMIT_BUG_ENTRY \
- : : "i" (__FILE__), "i" (__LINE__), \
- "i" (BUGFLAG_TAINT(TAINT_WARN)), \
- "i" (sizeof(struct bug_entry)), \
- "r" (__ret_warn_on)); \
+ BUG_ENTRY(PPC_TLNEI " %4, 0", \
+ BUGFLAG_WARNING | BUGFLAG_TAINT(TAINT_WARN), \
+ "r" (__ret_warn_on)); \
} \
unlikely(__ret_warn_on); \
})
-#define HAVE_ARCH_BUG
#define HAVE_ARCH_BUG_ON
#define HAVE_ARCH_WARN_ON
-#endif /* __ASSEMBLY __ */
+#endif
+
+#endif /* __ASSEMBLER__ */
#else
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
.macro EMIT_BUG_ENTRY addr,file,line,flags
.endm
-#else /* !__ASSEMBLY__ */
+#else /* !__ASSEMBLER__ */
#define _EMIT_BUG_ENTRY
#endif
#endif /* CONFIG_BUG */
+#define EMIT_WARN_ENTRY EMIT_BUG_ENTRY
+
#include <asm-generic/bug.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
struct pt_regs;
-extern int do_page_fault(struct pt_regs *, unsigned long, unsigned long);
-extern void bad_page_fault(struct pt_regs *, unsigned long, int);
+void hash__do_page_fault(struct pt_regs *);
+void bad_page_fault(struct pt_regs *, int);
+void emulate_single_step(struct pt_regs *regs);
extern void _exception(int, struct pt_regs *, int, unsigned long);
+extern void _exception_pkey(struct pt_regs *, unsigned long, int);
extern void die(const char *, struct pt_regs *, long);
-extern void print_backtrace(unsigned long *);
-
-#endif /* !__ASSEMBLY__ */
+void die_mce(const char *str, struct pt_regs *regs, long err);
+extern bool die_will_crash(void);
+extern void panic_flush_kmsg_start(void);
+extern void panic_flush_kmsg_end(void);
+#endif /* !__ASSEMBLER__ */
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_BUG_H */
diff --git a/arch/powerpc/include/asm/bugs.h b/arch/powerpc/include/asm/bugs.h
deleted file mode 100644
index 42fdb73e3068..000000000000
--- a/arch/powerpc/include/asm/bugs.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef _ASM_POWERPC_BUGS_H
-#define _ASM_POWERPC_BUGS_H
-
-/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-/*
- * This file is included by 'init/main.c' to check for
- * architecture-dependent bugs.
- */
-
-static inline void check_bugs(void) { }
-
-#endif /* _ASM_POWERPC_BUGS_H */
diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h
index 34a05a1a990b..6796babc4d31 100644
--- a/arch/powerpc/include/asm/cache.h
+++ b/arch/powerpc/include/asm/cache.h
@@ -1,19 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_CACHE_H
#define _ASM_POWERPC_CACHE_H
#ifdef __KERNEL__
-#include <asm/reg.h>
/* bytes per L1 cache line */
-#if defined(CONFIG_8xx) || defined(CONFIG_403GCX)
+#if defined(CONFIG_PPC_8xx)
#define L1_CACHE_SHIFT 4
#define MAX_COPY_PREFETCH 1
+#define IFETCH_ALIGN_SHIFT 2
#elif defined(CONFIG_PPC_E500MC)
#define L1_CACHE_SHIFT 6
#define MAX_COPY_PREFETCH 4
+#define IFETCH_ALIGN_SHIFT 3
#elif defined(CONFIG_PPC32)
#define MAX_COPY_PREFETCH 4
+#define IFETCH_ALIGN_SHIFT 3 /* 603 fetches 2 insn at a time */
#if defined(CONFIG_PPC_47x)
#define L1_CACHE_SHIFT 7
#else
@@ -21,50 +24,86 @@
#endif
#else /* CONFIG_PPC64 */
#define L1_CACHE_SHIFT 7
+#define IFETCH_ALIGN_SHIFT 4 /* POWER8,9 */
#endif
#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
#define SMP_CACHE_BYTES L1_CACHE_BYTES
-#if defined(__powerpc64__) && !defined(__ASSEMBLY__)
+#define IFETCH_ALIGN_BYTES (1 << IFETCH_ALIGN_SHIFT)
+
+#ifdef CONFIG_NOT_COHERENT_CACHE
+#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
+#endif
+
+#if !defined(__ASSEMBLER__)
+#ifdef CONFIG_PPC64
+
+struct ppc_cache_info {
+ u32 size;
+ u32 line_size;
+ u32 block_size; /* L1 only */
+ u32 log_block_size;
+ u32 blocks_per_page;
+ u32 sets;
+ u32 assoc;
+};
+
struct ppc64_caches {
- u32 dsize; /* L1 d-cache size */
- u32 dline_size; /* L1 d-cache line size */
- u32 log_dline_size;
- u32 dlines_per_page;
- u32 isize; /* L1 i-cache size */
- u32 iline_size; /* L1 i-cache line size */
- u32 log_iline_size;
- u32 ilines_per_page;
+ struct ppc_cache_info l1d;
+ struct ppc_cache_info l1i;
+ struct ppc_cache_info l2;
+ struct ppc_cache_info l3;
};
extern struct ppc64_caches ppc64_caches;
-static inline void logmpp(u64 x)
+static inline u32 l1_dcache_shift(void)
{
- asm volatile(PPC_LOGMPP(R1) : : "r" (x));
+ return ppc64_caches.l1d.log_block_size;
}
-#endif /* __powerpc64__ && ! __ASSEMBLY__ */
-
-#if defined(__ASSEMBLY__)
-/*
- * For a snooping icache, we still need a dummy icbi to purge all the
- * prefetched instructions from the ifetch buffers. We also need a sync
- * before the icbi to order the the actual stores to memory that might
- * have modified instructions with the icbi.
- */
-#define PURGE_PREFETCHED_INS \
- sync; \
- icbi 0,r3; \
- sync; \
- isync
+static inline u32 l1_dcache_bytes(void)
+{
+ return ppc64_caches.l1d.block_size;
+}
+
+static inline u32 l1_icache_shift(void)
+{
+ return ppc64_caches.l1i.log_block_size;
+}
+static inline u32 l1_icache_bytes(void)
+{
+ return ppc64_caches.l1i.block_size;
+}
#else
-#define __read_mostly __attribute__((__section__(".data..read_mostly")))
+static inline u32 l1_dcache_shift(void)
+{
+ return L1_CACHE_SHIFT;
+}
+
+static inline u32 l1_dcache_bytes(void)
+{
+ return L1_CACHE_BYTES;
+}
+
+static inline u32 l1_icache_shift(void)
+{
+ return L1_CACHE_SHIFT;
+}
+
+static inline u32 l1_icache_bytes(void)
+{
+ return L1_CACHE_BYTES;
+}
+
+#endif
+
+#define __read_mostly __section(".data..read_mostly")
-#ifdef CONFIG_6xx
+#ifdef CONFIG_PPC_BOOK3S_32
extern long _get_L2CR(void);
extern long _get_L3CR(void);
extern void _set_L2CR(unsigned long);
@@ -76,9 +115,36 @@ extern void _set_L3CR(unsigned long);
#define _set_L3CR(val) do { } while(0)
#endif
-extern void cacheable_memzero(void *p, unsigned int nb);
-extern void *cacheable_memcpy(void *, const void *, unsigned int);
+static inline void dcbz(void *addr)
+{
+ __asm__ __volatile__ ("dcbz 0, %0" : : "r"(addr) : "memory");
+}
+
+static inline void dcbi(void *addr)
+{
+ __asm__ __volatile__ ("dcbi 0, %0" : : "r"(addr) : "memory");
+}
+
+static inline void dcbf(void *addr)
+{
+ __asm__ __volatile__ ("dcbf 0, %0" : : "r"(addr) : "memory");
+}
+
+static inline void dcbst(void *addr)
+{
+ __asm__ __volatile__ ("dcbst 0, %0" : : "r"(addr) : "memory");
+}
+
+static inline void icbi(void *addr)
+{
+ asm volatile ("icbi 0, %0" : : "r"(addr) : "memory");
+}
+
+static inline void iccci(void *addr)
+{
+ asm volatile ("iccci 0, %0" : : "r"(addr) : "memory");
+}
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_CACHE_H */
diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
index 5b9312220e84..1fea42928f64 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -1,72 +1,136 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#ifndef _ASM_POWERPC_CACHEFLUSH_H
#define _ASM_POWERPC_CACHEFLUSH_H
-#ifdef __KERNEL__
-
#include <linux/mm.h>
#include <asm/cputable.h>
+#include <asm/cpu_has_feature.h>
+
+/*
+ * This flag is used to indicate that the page pointed to by a pte is clean
+ * and does not require cleaning before returning it to the user.
+ */
+#define PG_dcache_clean PG_arch_1
+#ifdef CONFIG_PPC_BOOK3S_64
/*
- * No cache flushing is required when address mappings are changed,
- * because the caches on PowerPCs are physically addressed.
+ * Book3s has no ptesync after setting a pte, so without this ptesync it's
+ * possible for a kernel virtual mapping access to return a spurious fault
+ * if it's accessed right after the pte is set. The page fault handler does
+ * not expect this type of fault. flush_cache_vmap is not exactly the right
+ * place to put this, but it seems to work well enough.
*/
-#define flush_cache_all() do { } while (0)
-#define flush_cache_mm(mm) do { } while (0)
-#define flush_cache_dup_mm(mm) do { } while (0)
-#define flush_cache_range(vma, start, end) do { } while (0)
-#define flush_cache_page(vma, vmaddr, pfn) do { } while (0)
-#define flush_icache_page(vma, page) do { } while (0)
-#define flush_cache_vmap(start, end) do { } while (0)
-#define flush_cache_vunmap(start, end) do { } while (0)
+static inline void flush_cache_vmap(unsigned long start, unsigned long end)
+{
+ asm volatile("ptesync" ::: "memory");
+}
+#define flush_cache_vmap flush_cache_vmap
+#endif /* CONFIG_PPC_BOOK3S_64 */
#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
-extern void flush_dcache_page(struct page *page);
-#define flush_dcache_mmap_lock(mapping) do { } while (0)
-#define flush_dcache_mmap_unlock(mapping) do { } while (0)
-
-extern void __flush_disable_L1(void);
-
-extern void flush_icache_range(unsigned long, unsigned long);
-extern void flush_icache_user_range(struct vm_area_struct *vma,
- struct page *page, unsigned long addr,
- int len);
-extern void __flush_dcache_icache(void *page_va);
-extern void flush_dcache_icache_page(struct page *page);
-#if defined(CONFIG_PPC32) && !defined(CONFIG_BOOKE)
-extern void __flush_dcache_icache_phys(unsigned long physaddr);
-#endif /* CONFIG_PPC32 && !CONFIG_BOOKE */
-
-extern void flush_dcache_range(unsigned long start, unsigned long stop);
-#ifdef CONFIG_PPC32
-extern void clean_dcache_range(unsigned long start, unsigned long stop);
-extern void invalidate_dcache_range(unsigned long start, unsigned long stop);
-#endif /* CONFIG_PPC32 */
-#ifdef CONFIG_PPC64
-extern void flush_inval_dcache_range(unsigned long start, unsigned long stop);
-extern void flush_dcache_phys_range(unsigned long start, unsigned long stop);
-#endif
+/*
+ * This is called when a page has been modified by the kernel.
+ * It just marks the page as not i-cache clean. We do the i-cache
+ * flush later when the page is given to a user process, if necessary.
+ */
+static inline void flush_dcache_folio(struct folio *folio)
+{
+ if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
+ return;
+ /* avoid an atomic op if possible */
+ if (test_bit(PG_dcache_clean, &folio->flags.f))
+ clear_bit(PG_dcache_clean, &folio->flags.f);
+}
+#define flush_dcache_folio flush_dcache_folio
+
+static inline void flush_dcache_page(struct page *page)
+{
+ flush_dcache_folio(page_folio(page));
+}
+
+void flush_icache_range(unsigned long start, unsigned long stop);
+#define flush_icache_range flush_icache_range
-#define copy_to_user_page(vma, page, vaddr, dst, src, len) \
- do { \
- memcpy(dst, src, len); \
- flush_icache_user_range(vma, page, vaddr, len); \
- } while (0)
-#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
- memcpy(dst, src, len)
+void flush_icache_user_page(struct vm_area_struct *vma, struct page *page,
+ unsigned long addr, int len);
+#define flush_icache_user_page flush_icache_user_page
+void flush_dcache_icache_folio(struct folio *folio);
+
+/**
+ * flush_dcache_range(): Write any modified data cache blocks out to memory and
+ * invalidate them. Does not invalidate the corresponding instruction cache
+ * blocks.
+ *
+ * @start: the start address
+ * @stop: the stop address (exclusive)
+ */
+static inline void flush_dcache_range(unsigned long start, unsigned long stop)
+{
+ unsigned long shift = l1_dcache_shift();
+ unsigned long bytes = l1_dcache_bytes();
+ void *addr = (void *)(start & ~(bytes - 1));
+ unsigned long size = stop - (unsigned long)addr + (bytes - 1);
+ unsigned long i;
+
+ if (IS_ENABLED(CONFIG_PPC64))
+ mb(); /* sync */
+
+ for (i = 0; i < size >> shift; i++, addr += bytes)
+ dcbf(addr);
+ mb(); /* sync */
+
+}
+
+/*
+ * Write any modified data cache blocks out to memory.
+ * Does not invalidate the corresponding cache lines (especially for
+ * any corresponding instruction cache).
+ */
+static inline void clean_dcache_range(unsigned long start, unsigned long stop)
+{
+ unsigned long shift = l1_dcache_shift();
+ unsigned long bytes = l1_dcache_bytes();
+ void *addr = (void *)(start & ~(bytes - 1));
+ unsigned long size = stop - (unsigned long)addr + (bytes - 1);
+ unsigned long i;
+
+ for (i = 0; i < size >> shift; i++, addr += bytes)
+ dcbst(addr);
+ mb(); /* sync */
+}
+
+/*
+ * Like above, but invalidate the D-cache. This is used by the 8xx
+ * to invalidate the cache so the PPC core doesn't get stale data
+ * from the CPM (no cache snooping here :-).
+ */
+static inline void invalidate_dcache_range(unsigned long start,
+ unsigned long stop)
+{
+ unsigned long shift = l1_dcache_shift();
+ unsigned long bytes = l1_dcache_bytes();
+ void *addr = (void *)(start & ~(bytes - 1));
+ unsigned long size = stop - (unsigned long)addr + (bytes - 1);
+ unsigned long i;
+ for (i = 0; i < size >> shift; i++, addr += bytes)
+ dcbi(addr);
+ mb(); /* sync */
+}
-#ifdef CONFIG_DEBUG_PAGEALLOC
-/* internal debugging function */
-void kernel_map_pages(struct page *page, int numpages, int enable);
+#ifdef CONFIG_44x
+static inline void flush_instruction_cache(void)
+{
+ iccci((void *)KERNELBASE);
+ isync();
+}
+#else
+void flush_instruction_cache(void);
#endif
-#endif /* __KERNEL__ */
+#include <asm-generic/cacheflush.h>
#endif /* _ASM_POWERPC_CACHEFLUSH_H */
diff --git a/arch/powerpc/include/asm/cell-pmu.h b/arch/powerpc/include/asm/cell-pmu.h
index b4b7338ad79e..7fbefd64b4fb 100644
--- a/arch/powerpc/include/asm/cell-pmu.h
+++ b/arch/powerpc/include/asm/cell-pmu.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Cell Broadband Engine Performance Monitor
*
@@ -6,20 +7,6 @@
* Author:
* David Erb (djerb@us.ibm.com)
* Kevin Corry (kevcorry@us.ibm.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#ifndef __ASM_CELL_PMU_H__
@@ -33,36 +20,9 @@
/* Macros for the pm_control register. */
#define CBE_PM_16BIT_CTR(ctr) (1 << (24 - ((ctr) & (NR_PHYS_CTRS - 1))))
-#define CBE_PM_ENABLE_PERF_MON 0x80000000
-#define CBE_PM_STOP_AT_MAX 0x40000000
-#define CBE_PM_TRACE_MODE_GET(pm_control) (((pm_control) >> 28) & 0x3)
-#define CBE_PM_TRACE_MODE_SET(mode) (((mode) & 0x3) << 28)
-#define CBE_PM_TRACE_BUF_OVFLW(bit) (((bit) & 0x1) << 17)
-#define CBE_PM_COUNT_MODE_SET(count) (((count) & 0x3) << 18)
-#define CBE_PM_FREEZE_ALL_CTRS 0x00100000
-#define CBE_PM_ENABLE_EXT_TRACE 0x00008000
-#define CBE_PM_SPU_ADDR_TRACE_SET(msk) (((msk) & 0x3) << 9)
/* Macros for the trace_address register. */
-#define CBE_PM_TRACE_BUF_FULL 0x00000800
#define CBE_PM_TRACE_BUF_EMPTY 0x00000400
-#define CBE_PM_TRACE_BUF_DATA_COUNT(ta) ((ta) & 0x3ff)
-#define CBE_PM_TRACE_BUF_MAX_COUNT 0x400
-
-/* Macros for the pm07_control registers. */
-#define CBE_PM_CTR_INPUT_MUX(pm07_control) (((pm07_control) >> 26) & 0x3f)
-#define CBE_PM_CTR_INPUT_CONTROL 0x02000000
-#define CBE_PM_CTR_POLARITY 0x01000000
-#define CBE_PM_CTR_COUNT_CYCLES 0x00800000
-#define CBE_PM_CTR_ENABLE 0x00400000
-#define PM07_CTR_INPUT_MUX(x) (((x) & 0x3F) << 26)
-#define PM07_CTR_INPUT_CONTROL(x) (((x) & 1) << 25)
-#define PM07_CTR_POLARITY(x) (((x) & 1) << 24)
-#define PM07_CTR_COUNT_CYCLES(x) (((x) & 1) << 23)
-#define PM07_CTR_ENABLE(x) (((x) & 1) << 22)
-
-/* Macros for the pm_status register. */
-#define CBE_PM_CTR_OVERFLOW_INTR(ctr) (1 << (31 - ((ctr) & 7)))
enum pm_reg_name {
group_control,
@@ -75,33 +35,4 @@ enum pm_reg_name {
pm_start_stop,
};
-/* Routines for reading/writing the PMU registers. */
-extern u32 cbe_read_phys_ctr(u32 cpu, u32 phys_ctr);
-extern void cbe_write_phys_ctr(u32 cpu, u32 phys_ctr, u32 val);
-extern u32 cbe_read_ctr(u32 cpu, u32 ctr);
-extern void cbe_write_ctr(u32 cpu, u32 ctr, u32 val);
-
-extern u32 cbe_read_pm07_control(u32 cpu, u32 ctr);
-extern void cbe_write_pm07_control(u32 cpu, u32 ctr, u32 val);
-extern u32 cbe_read_pm(u32 cpu, enum pm_reg_name reg);
-extern void cbe_write_pm(u32 cpu, enum pm_reg_name reg, u32 val);
-
-extern u32 cbe_get_ctr_size(u32 cpu, u32 phys_ctr);
-extern void cbe_set_ctr_size(u32 cpu, u32 phys_ctr, u32 ctr_size);
-
-extern void cbe_enable_pm(u32 cpu);
-extern void cbe_disable_pm(u32 cpu);
-
-extern void cbe_read_trace_buffer(u32 cpu, u64 *buf);
-
-extern void cbe_enable_pm_interrupts(u32 cpu, u32 thread, u32 mask);
-extern void cbe_disable_pm_interrupts(u32 cpu);
-extern u32 cbe_get_and_clear_pm_interrupts(u32 cpu);
-extern void cbe_sync_irq(int node);
-
-#define CBE_COUNT_SUPERVISOR_MODE 0
-#define CBE_COUNT_HYPERVISOR_MODE 1
-#define CBE_COUNT_PROBLEM_MODE 2
-#define CBE_COUNT_ALL_MODES 3
-
#endif /* __ASM_CELL_PMU_H__ */
diff --git a/arch/powerpc/include/asm/cell-regs.h b/arch/powerpc/include/asm/cell-regs.h
index fdf64fd25950..20f7339a3d4a 100644
--- a/arch/powerpc/include/asm/cell-regs.h
+++ b/arch/powerpc/include/asm/cell-regs.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* cbe_regs.h
*
@@ -17,293 +18,6 @@
#include <asm/cell-pmu.h>
-/*
- *
- * Some HID register definitions
- *
- */
-
-/* CBE specific HID0 bits */
-#define HID0_CBE_THERM_WAKEUP 0x0000020000000000ul
-#define HID0_CBE_SYSERR_WAKEUP 0x0000008000000000ul
-#define HID0_CBE_THERM_INT_EN 0x0000000400000000ul
-#define HID0_CBE_SYSERR_INT_EN 0x0000000200000000ul
-
-#define MAX_CBE 2
-
-/*
- *
- * Pervasive unit register definitions
- *
- */
-
-union spe_reg {
- u64 val;
- u8 spe[8];
-};
-
-union ppe_spe_reg {
- u64 val;
- struct {
- u32 ppe;
- u32 spe;
- };
-};
-
-
-struct cbe_pmd_regs {
- /* Debug Bus Control */
- u64 pad_0x0000; /* 0x0000 */
-
- u64 group_control; /* 0x0008 */
-
- u8 pad_0x0010_0x00a8 [0x00a8 - 0x0010]; /* 0x0010 */
-
- u64 debug_bus_control; /* 0x00a8 */
-
- u8 pad_0x00b0_0x0100 [0x0100 - 0x00b0]; /* 0x00b0 */
-
- u64 trace_aux_data; /* 0x0100 */
- u64 trace_buffer_0_63; /* 0x0108 */
- u64 trace_buffer_64_127; /* 0x0110 */
- u64 trace_address; /* 0x0118 */
- u64 ext_tr_timer; /* 0x0120 */
-
- u8 pad_0x0128_0x0400 [0x0400 - 0x0128]; /* 0x0128 */
-
- /* Performance Monitor */
- u64 pm_status; /* 0x0400 */
- u64 pm_control; /* 0x0408 */
- u64 pm_interval; /* 0x0410 */
- u64 pm_ctr[4]; /* 0x0418 */
- u64 pm_start_stop; /* 0x0438 */
- u64 pm07_control[8]; /* 0x0440 */
-
- u8 pad_0x0480_0x0800 [0x0800 - 0x0480]; /* 0x0480 */
-
- /* Thermal Sensor Registers */
- union spe_reg ts_ctsr1; /* 0x0800 */
- u64 ts_ctsr2; /* 0x0808 */
- union spe_reg ts_mtsr1; /* 0x0810 */
- u64 ts_mtsr2; /* 0x0818 */
- union spe_reg ts_itr1; /* 0x0820 */
- u64 ts_itr2; /* 0x0828 */
- u64 ts_gitr; /* 0x0830 */
- u64 ts_isr; /* 0x0838 */
- u64 ts_imr; /* 0x0840 */
- union spe_reg tm_cr1; /* 0x0848 */
- u64 tm_cr2; /* 0x0850 */
- u64 tm_simr; /* 0x0858 */
- union ppe_spe_reg tm_tpr; /* 0x0860 */
- union spe_reg tm_str1; /* 0x0868 */
- u64 tm_str2; /* 0x0870 */
- union ppe_spe_reg tm_tsr; /* 0x0878 */
-
- /* Power Management */
- u64 pmcr; /* 0x0880 */
-#define CBE_PMD_PAUSE_ZERO_CONTROL 0x10000
- u64 pmsr; /* 0x0888 */
-
- /* Time Base Register */
- u64 tbr; /* 0x0890 */
-
- u8 pad_0x0898_0x0c00 [0x0c00 - 0x0898]; /* 0x0898 */
-
- /* Fault Isolation Registers */
- u64 checkstop_fir; /* 0x0c00 */
- u64 recoverable_fir; /* 0x0c08 */
- u64 spec_att_mchk_fir; /* 0x0c10 */
- u32 fir_mode_reg; /* 0x0c18 */
- u8 pad_0x0c1c_0x0c20 [4]; /* 0x0c1c */
-#define CBE_PMD_FIR_MODE_M8 0x00800
- u64 fir_enable_mask; /* 0x0c20 */
-
- u8 pad_0x0c28_0x0ca8 [0x0ca8 - 0x0c28]; /* 0x0c28 */
- u64 ras_esc_0; /* 0x0ca8 */
- u8 pad_0x0cb0_0x1000 [0x1000 - 0x0cb0]; /* 0x0cb0 */
-};
-
-extern struct cbe_pmd_regs __iomem *cbe_get_pmd_regs(struct device_node *np);
-extern struct cbe_pmd_regs __iomem *cbe_get_cpu_pmd_regs(int cpu);
-
-/*
- * PMU shadow registers
- *
- * Many of the registers in the performance monitoring unit are write-only,
- * so we need to save a copy of what we write to those registers.
- *
- * The actual data counters are read/write. However, writing to the counters
- * only takes effect if the PMU is enabled. Otherwise the value is stored in
- * a hardware latch until the next time the PMU is enabled. So we save a copy
- * of the counter values if we need to read them back while the PMU is
- * disabled. The counter_value_in_latch field is a bitmap indicating which
- * counters currently have a value waiting to be written.
- */
-
-struct cbe_pmd_shadow_regs {
- u32 group_control;
- u32 debug_bus_control;
- u32 trace_address;
- u32 ext_tr_timer;
- u32 pm_status;
- u32 pm_control;
- u32 pm_interval;
- u32 pm_start_stop;
- u32 pm07_control[NR_CTRS];
-
- u32 pm_ctr[NR_PHYS_CTRS];
- u32 counter_value_in_latch;
-};
-
-extern struct cbe_pmd_shadow_regs *cbe_get_pmd_shadow_regs(struct device_node *np);
-extern struct cbe_pmd_shadow_regs *cbe_get_cpu_pmd_shadow_regs(int cpu);
-
-/*
- *
- * IIC unit register definitions
- *
- */
-
-struct cbe_iic_pending_bits {
- u32 data;
- u8 flags;
- u8 class;
- u8 source;
- u8 prio;
-};
-
-#define CBE_IIC_IRQ_VALID 0x80
-#define CBE_IIC_IRQ_IPI 0x40
-
-struct cbe_iic_thread_regs {
- struct cbe_iic_pending_bits pending;
- struct cbe_iic_pending_bits pending_destr;
- u64 generate;
- u64 prio;
-};
-
-struct cbe_iic_regs {
- u8 pad_0x0000_0x0400[0x0400 - 0x0000]; /* 0x0000 */
-
- /* IIC interrupt registers */
- struct cbe_iic_thread_regs thread[2]; /* 0x0400 */
-
- u64 iic_ir; /* 0x0440 */
-#define CBE_IIC_IR_PRIO(x) (((x) & 0xf) << 12)
-#define CBE_IIC_IR_DEST_NODE(x) (((x) & 0xf) << 4)
-#define CBE_IIC_IR_DEST_UNIT(x) ((x) & 0xf)
-#define CBE_IIC_IR_IOC_0 0x0
-#define CBE_IIC_IR_IOC_1S 0xb
-#define CBE_IIC_IR_PT_0 0xe
-#define CBE_IIC_IR_PT_1 0xf
-
- u64 iic_is; /* 0x0448 */
-#define CBE_IIC_IS_PMI 0x2
-
- u8 pad_0x0450_0x0500[0x0500 - 0x0450]; /* 0x0450 */
-
- /* IOC FIR */
- u64 ioc_fir_reset; /* 0x0500 */
- u64 ioc_fir_set; /* 0x0508 */
- u64 ioc_checkstop_enable; /* 0x0510 */
- u64 ioc_fir_error_mask; /* 0x0518 */
- u64 ioc_syserr_enable; /* 0x0520 */
- u64 ioc_fir; /* 0x0528 */
-
- u8 pad_0x0530_0x1000[0x1000 - 0x0530]; /* 0x0530 */
-};
-
-extern struct cbe_iic_regs __iomem *cbe_get_iic_regs(struct device_node *np);
-extern struct cbe_iic_regs __iomem *cbe_get_cpu_iic_regs(int cpu);
-
-
-struct cbe_mic_tm_regs {
- u8 pad_0x0000_0x0040[0x0040 - 0x0000]; /* 0x0000 */
-
- u64 mic_ctl_cnfg2; /* 0x0040 */
-#define CBE_MIC_ENABLE_AUX_TRC 0x8000000000000000LL
-#define CBE_MIC_DISABLE_PWR_SAV_2 0x0200000000000000LL
-#define CBE_MIC_DISABLE_AUX_TRC_WRAP 0x0100000000000000LL
-#define CBE_MIC_ENABLE_AUX_TRC_INT 0x0080000000000000LL
-
- u64 pad_0x0048; /* 0x0048 */
-
- u64 mic_aux_trc_base; /* 0x0050 */
- u64 mic_aux_trc_max_addr; /* 0x0058 */
- u64 mic_aux_trc_cur_addr; /* 0x0060 */
- u64 mic_aux_trc_grf_addr; /* 0x0068 */
- u64 mic_aux_trc_grf_data; /* 0x0070 */
-
- u64 pad_0x0078; /* 0x0078 */
-
- u64 mic_ctl_cnfg_0; /* 0x0080 */
-#define CBE_MIC_DISABLE_PWR_SAV_0 0x8000000000000000LL
-
- u64 pad_0x0088; /* 0x0088 */
-
- u64 slow_fast_timer_0; /* 0x0090 */
- u64 slow_next_timer_0; /* 0x0098 */
-
- u8 pad_0x00a0_0x00f8[0x00f8 - 0x00a0]; /* 0x00a0 */
- u64 mic_df_ecc_address_0; /* 0x00f8 */
-
- u8 pad_0x0100_0x01b8[0x01b8 - 0x0100]; /* 0x0100 */
- u64 mic_df_ecc_address_1; /* 0x01b8 */
-
- u64 mic_ctl_cnfg_1; /* 0x01c0 */
-#define CBE_MIC_DISABLE_PWR_SAV_1 0x8000000000000000LL
-
- u64 pad_0x01c8; /* 0x01c8 */
-
- u64 slow_fast_timer_1; /* 0x01d0 */
- u64 slow_next_timer_1; /* 0x01d8 */
-
- u8 pad_0x01e0_0x0208[0x0208 - 0x01e0]; /* 0x01e0 */
- u64 mic_exc; /* 0x0208 */
-#define CBE_MIC_EXC_BLOCK_SCRUB 0x0800000000000000ULL
-#define CBE_MIC_EXC_FAST_SCRUB 0x0100000000000000ULL
-
- u64 mic_mnt_cfg; /* 0x0210 */
-#define CBE_MIC_MNT_CFG_CHAN_0_POP 0x0002000000000000ULL
-#define CBE_MIC_MNT_CFG_CHAN_1_POP 0x0004000000000000ULL
-
- u64 mic_df_config; /* 0x0218 */
-#define CBE_MIC_ECC_DISABLE_0 0x4000000000000000ULL
-#define CBE_MIC_ECC_REP_SINGLE_0 0x2000000000000000ULL
-#define CBE_MIC_ECC_DISABLE_1 0x0080000000000000ULL
-#define CBE_MIC_ECC_REP_SINGLE_1 0x0040000000000000ULL
-
- u8 pad_0x0220_0x0230[0x0230 - 0x0220]; /* 0x0220 */
- u64 mic_fir; /* 0x0230 */
-#define CBE_MIC_FIR_ECC_SINGLE_0_ERR 0x0200000000000000ULL
-#define CBE_MIC_FIR_ECC_MULTI_0_ERR 0x0100000000000000ULL
-#define CBE_MIC_FIR_ECC_SINGLE_1_ERR 0x0080000000000000ULL
-#define CBE_MIC_FIR_ECC_MULTI_1_ERR 0x0040000000000000ULL
-#define CBE_MIC_FIR_ECC_ERR_MASK 0xffff000000000000ULL
-#define CBE_MIC_FIR_ECC_SINGLE_0_CTE 0x0000020000000000ULL
-#define CBE_MIC_FIR_ECC_MULTI_0_CTE 0x0000010000000000ULL
-#define CBE_MIC_FIR_ECC_SINGLE_1_CTE 0x0000008000000000ULL
-#define CBE_MIC_FIR_ECC_MULTI_1_CTE 0x0000004000000000ULL
-#define CBE_MIC_FIR_ECC_CTE_MASK 0x0000ffff00000000ULL
-#define CBE_MIC_FIR_ECC_SINGLE_0_RESET 0x0000000002000000ULL
-#define CBE_MIC_FIR_ECC_MULTI_0_RESET 0x0000000001000000ULL
-#define CBE_MIC_FIR_ECC_SINGLE_1_RESET 0x0000000000800000ULL
-#define CBE_MIC_FIR_ECC_MULTI_1_RESET 0x0000000000400000ULL
-#define CBE_MIC_FIR_ECC_RESET_MASK 0x00000000ffff0000ULL
-#define CBE_MIC_FIR_ECC_SINGLE_0_SET 0x0000000000000200ULL
-#define CBE_MIC_FIR_ECC_MULTI_0_SET 0x0000000000000100ULL
-#define CBE_MIC_FIR_ECC_SINGLE_1_SET 0x0000000000000080ULL
-#define CBE_MIC_FIR_ECC_MULTI_1_SET 0x0000000000000040ULL
-#define CBE_MIC_FIR_ECC_SET_MASK 0x000000000000ffffULL
- u64 mic_fir_debug; /* 0x0238 */
-
- u8 pad_0x0240_0x1000[0x1000 - 0x0240]; /* 0x0240 */
-};
-
-extern struct cbe_mic_tm_regs __iomem *cbe_get_mic_tm_regs(struct device_node *np);
-extern struct cbe_mic_tm_regs __iomem *cbe_get_cpu_mic_tm_regs(int cpu);
-
-
/* Cell page table entries */
#define CBE_IOPTE_PP_W 0x8000000000000000ul /* protection: write */
#define CBE_IOPTE_PP_R 0x4000000000000000ul /* protection: read */
@@ -314,13 +28,4 @@ extern struct cbe_mic_tm_regs __iomem *cbe_get_cpu_mic_tm_regs(int cpu);
#define CBE_IOPTE_H 0x0000000000000800ul /* cache hint */
#define CBE_IOPTE_IOID_Mask 0x00000000000007fful /* ioid */
-/* some utility functions to deal with SMT */
-extern u32 cbe_get_hw_thread_id(int cpu);
-extern u32 cbe_cpu_to_node(int cpu);
-extern u32 cbe_node_to_cpu(int node);
-
-/* Init this module early */
-extern void cbe_regs_init(void);
-
-
#endif /* CBE_REGS_H */
diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h
index 8251a3ba870f..4b573a3b7e17 100644
--- a/arch/powerpc/include/asm/checksum.h
+++ b/arch/powerpc/include/asm/checksum.h
@@ -1,47 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _ASM_POWERPC_CHECKSUM_H
#define _ASM_POWERPC_CHECKSUM_H
#ifdef __KERNEL__
/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
-/*
- * This is a version of ip_compute_csum() optimized for IP headers,
- * which always checksum on 4 octet boundaries. ihl is the number
- * of 32-bit words and is always >= 5.
- */
-#ifdef CONFIG_GENERIC_CSUM
-#include <asm-generic/checksum.h>
-#else
-extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl);
-
-/*
- * computes the checksum of the TCP/UDP pseudo-header
- * returns a 16-bit checksum, already complemented
- */
-extern __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
- unsigned short len,
- unsigned short proto,
- __wsum sum);
-
-/*
- * computes the checksum of a memory block at buff, length len,
- * and adds in "sum" (32-bit)
- *
- * returns a 32-bit number suitable for feeding into itself
- * or csum_tcpudp_magic
- *
- * this function must be called with even lengths, except
- * for the last fragment, which may be odd
- *
- * it's best to have buff aligned on a 32-bit boundary
- */
-extern __wsum csum_partial(const void *buff, int len, __wsum sum);
-
+#include <linux/bitops.h>
+#include <linux/in6.h>
/*
* Computes the checksum of a memory block at src, length len,
* and adds in "sum" (32-bit), while copying the block to dst.
@@ -52,28 +18,18 @@ extern __wsum csum_partial(const void *buff, int len, __wsum sum);
* Like csum_partial, this must be called with even lengths,
* except for the last fragment.
*/
-extern __wsum csum_partial_copy_generic(const void *src, void *dst,
- int len, __wsum sum,
- int *src_err, int *dst_err);
+extern __wsum csum_partial_copy_generic(const void *src, void *dst, int len);
-#ifdef __powerpc64__
#define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
extern __wsum csum_and_copy_from_user(const void __user *src, void *dst,
- int len, __wsum sum, int *err_ptr);
+ int len);
#define HAVE_CSUM_COPY_USER
extern __wsum csum_and_copy_to_user(const void *src, void __user *dst,
- int len, __wsum sum, int *err_ptr);
-#else
-/*
- * the same as csum_partial, but copies from src to dst while it
- * checksums.
- */
-#define csum_partial_copy_from_user(src, dst, len, sum, errp) \
- csum_partial_copy_generic((__force const void *)(src), (dst), (len), (sum), (errp), NULL)
-#endif
+ int len);
-#define csum_partial_copy_nocheck(src, dst, len, sum) \
- csum_partial_copy_generic((src), (dst), (len), (sum), NULL, NULL)
+#define _HAVE_ARCH_CSUM_AND_COPY
+#define csum_partial_copy_nocheck(src, dst, len) \
+ csum_partial_copy_generic((src), (dst), (len))
/*
@@ -82,38 +38,36 @@ extern __wsum csum_and_copy_to_user(const void *src, void __user *dst,
*/
static inline __sum16 csum_fold(__wsum sum)
{
- unsigned int tmp;
-
- /* swap the two 16-bit halves of sum */
- __asm__("rlwinm %0,%1,16,0,31" : "=r" (tmp) : "r" (sum));
- /* if there is a carry from adding the two 16-bit halves,
- it will carry from the lower half into the upper half,
- giving us the correct sum in the upper half. */
- return (__force __sum16)(~((__force u32)sum + tmp) >> 16);
+ u32 tmp = (__force u32)sum;
+
+ /*
+ * swap the two 16-bit halves of sum
+ * if there is a carry from adding the two 16-bit halves,
+ * it will carry from the lower half into the upper half,
+ * giving us the correct sum in the upper half.
+ */
+ return (__force __sum16)(~(tmp + rol32(tmp, 16)) >> 16);
}
-/*
- * this routine is used for miscellaneous IP-like checksums, mainly
- * in icmp.c
- */
-static inline __sum16 ip_compute_csum(const void *buff, int len)
+static inline u32 from64to32(u64 x)
{
- return csum_fold(csum_partial(buff, len, 0));
+ return (x + ror64(x, 32)) >> 32;
}
-static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
- unsigned short len,
- unsigned short proto,
- __wsum sum)
+static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len,
+ __u8 proto, __wsum sum)
{
#ifdef __powerpc64__
- unsigned long s = (__force u32)sum;
+ u64 s = (__force u32)sum;
s += (__force u32)saddr;
s += (__force u32)daddr;
+#ifdef __BIG_ENDIAN__
s += proto + len;
- s += (s >> 32);
- return (__force __wsum) s;
+#else
+ s += (proto + len) << 8;
+#endif
+ return (__force __wsum) from64to32(s);
#else
__asm__("\n\
addc %0,%0,%1 \n\
@@ -127,6 +81,141 @@ static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
#endif
}
+/*
+ * computes the checksum of the TCP/UDP pseudo-header
+ * returns a 16-bit checksum, already complemented
+ */
+static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len,
+ __u8 proto, __wsum sum)
+{
+ return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
+}
+
+#define HAVE_ARCH_CSUM_ADD
+static __always_inline __wsum csum_add(__wsum csum, __wsum addend)
+{
+#ifdef __powerpc64__
+ u64 res = (__force u64)csum;
+
+ res += (__force u64)addend;
+ return (__force __wsum)((u32)res + (res >> 32));
+#else
+ if (__builtin_constant_p(csum) && csum == 0)
+ return addend;
+ if (__builtin_constant_p(addend) && addend == 0)
+ return csum;
+
+ asm("addc %0,%0,%1;"
+ "addze %0,%0;"
+ : "+r" (csum) : "r" (addend) : "xer");
+ return csum;
#endif
+}
+
+#define HAVE_ARCH_CSUM_SHIFT
+static __always_inline __wsum csum_shift(__wsum sum, int offset)
+{
+ /* rotate sum to align it with a 16b boundary */
+ return (__force __wsum)rol32((__force u32)sum, (offset & 1) << 3);
+}
+
+/*
+ * This is a version of ip_compute_csum() optimized for IP headers,
+ * which always checksum on 4 octet boundaries. ihl is the number
+ * of 32-bit words and is always >= 5.
+ */
+static inline __wsum ip_fast_csum_nofold(const void *iph, unsigned int ihl)
+{
+ const u32 *ptr = (const u32 *)iph + 1;
+#ifdef __powerpc64__
+ unsigned int i;
+ u64 s = *(const u32 *)iph;
+
+ for (i = 0; i < ihl - 1; i++, ptr++)
+ s += *ptr;
+ return (__force __wsum)from64to32(s);
+#else
+ __wsum sum, tmp;
+
+ asm("mtctr %3;"
+ "addc %0,%4,%5;"
+ "1: lwzu %1, 4(%2);"
+ "adde %0,%0,%1;"
+ "bdnz 1b;"
+ "addze %0,%0;"
+ : "=r" (sum), "=r" (tmp), "+b" (ptr)
+ : "r" (ihl - 2), "r" (*(const u32 *)iph), "r" (*ptr)
+ : "ctr", "xer", "memory");
+
+ return sum;
+#endif
+}
+
+static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
+{
+ return csum_fold(ip_fast_csum_nofold(iph, ihl));
+}
+
+/*
+ * computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic
+ *
+ * this function must be called with even lengths, except
+ * for the last fragment, which may be odd
+ *
+ * it's best to have buff aligned on a 32-bit boundary
+ */
+__wsum __csum_partial(const void *buff, int len, __wsum sum);
+
+static __always_inline __wsum csum_partial(const void *buff, int len, __wsum sum)
+{
+ if (__builtin_constant_p(len) && len <= 16 && (len & 1) == 0) {
+ if (len == 2)
+ sum = csum_add(sum, (__force __wsum)*(const u16 *)buff);
+ if (len >= 4)
+ sum = csum_add(sum, (__force __wsum)*(const u32 *)buff);
+ if (len == 6)
+ sum = csum_add(sum, (__force __wsum)
+ *(const u16 *)(buff + 4));
+ if (len >= 8)
+ sum = csum_add(sum, (__force __wsum)
+ *(const u32 *)(buff + 4));
+ if (len == 10)
+ sum = csum_add(sum, (__force __wsum)
+ *(const u16 *)(buff + 8));
+ if (len >= 12)
+ sum = csum_add(sum, (__force __wsum)
+ *(const u32 *)(buff + 8));
+ if (len == 14)
+ sum = csum_add(sum, (__force __wsum)
+ *(const u16 *)(buff + 12));
+ if (len >= 16)
+ sum = csum_add(sum, (__force __wsum)
+ *(const u32 *)(buff + 12));
+ } else if (__builtin_constant_p(len) && (len & 3) == 0) {
+ sum = csum_add(sum, ip_fast_csum_nofold(buff, len >> 2));
+ } else {
+ sum = __csum_partial(buff, len, sum);
+ }
+ return sum;
+}
+
+/*
+ * this routine is used for miscellaneous IP-like checksums, mainly
+ * in icmp.c
+ */
+static inline __sum16 ip_compute_csum(const void *buff, int len)
+{
+ return csum_fold(csum_partial(buff, len, 0));
+}
+
+#define _HAVE_ARCH_IPV6_CSUM
+__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
+ __u32 len, __u8 proto, __wsum sum);
+
#endif /* __KERNEL__ */
#endif
diff --git a/arch/powerpc/include/asm/clocksource.h b/arch/powerpc/include/asm/clocksource.h
new file mode 100644
index 000000000000..0a26ef13a34a
--- /dev/null
+++ b/arch/powerpc/include/asm/clocksource.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_CLOCKSOURCE_H
+#define _ASM_POWERPC_CLOCKSOURCE_H
+
+#include <asm/vdso/clocksource.h>
+
+#endif /* _ASM_POWERPC_CLOCKSOURCE_H */
diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h
index d463c68fe7f0..dbb50c06f0bf 100644
--- a/arch/powerpc/include/asm/cmpxchg.h
+++ b/arch/powerpc/include/asm/cmpxchg.h
@@ -1,42 +1,153 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_CMPXCHG_H_
#define _ASM_POWERPC_CMPXCHG_H_
#ifdef __KERNEL__
#include <linux/compiler.h>
#include <asm/synch.h>
-#include <asm/asm-compat.h>
+#include <linux/bug.h>
+
+#ifdef __BIG_ENDIAN
+#define BITOFF_CAL(size, off) ((sizeof(u32) - size - off) * BITS_PER_BYTE)
+#else
+#define BITOFF_CAL(size, off) (off * BITS_PER_BYTE)
+#endif
+
+#define XCHG_GEN(type, sfx, cl) \
+static inline u32 __xchg_##type##sfx(volatile void *p, u32 val) \
+{ \
+ unsigned int prev, prev_mask, tmp, bitoff, off; \
+ \
+ off = (unsigned long)p % sizeof(u32); \
+ bitoff = BITOFF_CAL(sizeof(type), off); \
+ p -= off; \
+ val <<= bitoff; \
+ prev_mask = (u32)(type)-1 << bitoff; \
+ \
+ __asm__ __volatile__( \
+"1: lwarx %0,0,%3\n" \
+" andc %1,%0,%5\n" \
+" or %1,%1,%4\n" \
+" stwcx. %1,0,%3\n" \
+" bne- 1b\n" \
+ : "=&r" (prev), "=&r" (tmp), "+m" (*(u32*)p) \
+ : "r" (p), "r" (val), "r" (prev_mask) \
+ : "cc", cl); \
+ \
+ return prev >> bitoff; \
+}
+
+#define CMPXCHG_GEN(type, sfx, br, br2, cl) \
+static inline \
+u32 __cmpxchg_##type##sfx(volatile void *p, u32 old, u32 new) \
+{ \
+ unsigned int prev, prev_mask, tmp, bitoff, off; \
+ \
+ off = (unsigned long)p % sizeof(u32); \
+ bitoff = BITOFF_CAL(sizeof(type), off); \
+ p -= off; \
+ old <<= bitoff; \
+ new <<= bitoff; \
+ prev_mask = (u32)(type)-1 << bitoff; \
+ \
+ __asm__ __volatile__( \
+ br \
+"1: lwarx %0,0,%3\n" \
+" and %1,%0,%6\n" \
+" cmpw 0,%1,%4\n" \
+" bne- 2f\n" \
+" andc %1,%0,%6\n" \
+" or %1,%1,%5\n" \
+" stwcx. %1,0,%3\n" \
+" bne- 1b\n" \
+ br2 \
+ "\n" \
+"2:" \
+ : "=&r" (prev), "=&r" (tmp), "+m" (*(u32*)p) \
+ : "r" (p), "r" (old), "r" (new), "r" (prev_mask) \
+ : "cc", cl); \
+ \
+ return prev >> bitoff; \
+}
/*
* Atomic exchange
*
- * Changes the memory location '*ptr' to be val and returns
+ * Changes the memory location '*p' to be val and returns
* the previous value stored there.
*/
+
+#ifndef CONFIG_PPC_HAS_LBARX_LHARX
+XCHG_GEN(u8, _local, "memory");
+XCHG_GEN(u8, _relaxed, "cc");
+XCHG_GEN(u16, _local, "memory");
+XCHG_GEN(u16, _relaxed, "cc");
+#else
static __always_inline unsigned long
-__xchg_u32(volatile void *p, unsigned long val)
+__xchg_u8_local(volatile void *p, unsigned long val)
{
unsigned long prev;
__asm__ __volatile__(
- PPC_RELEASE_BARRIER
-"1: lwarx %0,0,%2 \n"
- PPC405_ERR77(0,%2)
-" stwcx. %3,0,%2 \n\
- bne- 1b"
- PPC_ACQUIRE_BARRIER
- : "=&r" (prev), "+m" (*(volatile unsigned int *)p)
+"1: lbarx %0,0,%2 # __xchg_u8_local\n"
+" stbcx. %3,0,%2 \n"
+" bne- 1b"
+ : "=&r" (prev), "+m" (*(volatile unsigned char *)p)
+ : "r" (p), "r" (val)
+ : "cc", "memory");
+
+ return prev;
+}
+
+static __always_inline unsigned long
+__xchg_u8_relaxed(u8 *p, unsigned long val)
+{
+ unsigned long prev;
+
+ __asm__ __volatile__(
+"1: lbarx %0,0,%2 # __xchg_u8_relaxed\n"
+" stbcx. %3,0,%2\n"
+" bne- 1b"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (val)
+ : "cc");
+
+ return prev;
+}
+
+static __always_inline unsigned long
+__xchg_u16_local(volatile void *p, unsigned long val)
+{
+ unsigned long prev;
+
+ __asm__ __volatile__(
+"1: lharx %0,0,%2 # __xchg_u16_local\n"
+" sthcx. %3,0,%2\n"
+" bne- 1b"
+ : "=&r" (prev), "+m" (*(volatile unsigned short *)p)
: "r" (p), "r" (val)
: "cc", "memory");
return prev;
}
-/*
- * Atomic exchange
- *
- * Changes the memory location '*ptr' to be val and returns
- * the previous value stored there.
- */
+static __always_inline unsigned long
+__xchg_u16_relaxed(u16 *p, unsigned long val)
+{
+ unsigned long prev;
+
+ __asm__ __volatile__(
+"1: lharx %0,0,%2 # __xchg_u16_relaxed\n"
+" sthcx. %3,0,%2\n"
+" bne- 1b"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (val)
+ : "cc");
+
+ return prev;
+}
+#endif
+
static __always_inline unsigned long
__xchg_u32_local(volatile void *p, unsigned long val)
{
@@ -44,7 +155,6 @@ __xchg_u32_local(volatile void *p, unsigned long val)
__asm__ __volatile__(
"1: lwarx %0,0,%2 \n"
- PPC405_ERR77(0,%2)
" stwcx. %3,0,%2 \n\
bne- 1b"
: "=&r" (prev), "+m" (*(volatile unsigned int *)p)
@@ -54,26 +164,23 @@ __xchg_u32_local(volatile void *p, unsigned long val)
return prev;
}
-#ifdef CONFIG_PPC64
static __always_inline unsigned long
-__xchg_u64(volatile void *p, unsigned long val)
+__xchg_u32_relaxed(u32 *p, unsigned long val)
{
unsigned long prev;
__asm__ __volatile__(
- PPC_RELEASE_BARRIER
-"1: ldarx %0,0,%2 \n"
- PPC405_ERR77(0,%2)
-" stdcx. %3,0,%2 \n\
- bne- 1b"
- PPC_ACQUIRE_BARRIER
- : "=&r" (prev), "+m" (*(volatile unsigned long *)p)
+"1: lwarx %0,0,%2\n"
+" stwcx. %3,0,%2\n"
+" bne- 1b"
+ : "=&r" (prev), "+m" (*p)
: "r" (p), "r" (val)
- : "cc", "memory");
+ : "cc");
return prev;
}
+#ifdef CONFIG_PPC64
static __always_inline unsigned long
__xchg_u64_local(volatile void *p, unsigned long val)
{
@@ -81,7 +188,6 @@ __xchg_u64_local(volatile void *p, unsigned long val)
__asm__ __volatile__(
"1: ldarx %0,0,%2 \n"
- PPC405_ERR77(0,%2)
" stdcx. %3,0,%2 \n\
bne- 1b"
: "=&r" (prev), "+m" (*(volatile unsigned long *)p)
@@ -90,61 +196,250 @@ __xchg_u64_local(volatile void *p, unsigned long val)
return prev;
}
-#endif
-/*
- * This function doesn't exist, so you'll get a linker error
- * if something tries to do an invalid xchg().
- */
-extern void __xchg_called_with_bad_pointer(void);
+static __always_inline unsigned long
+__xchg_u64_relaxed(u64 *p, unsigned long val)
+{
+ unsigned long prev;
+
+ __asm__ __volatile__(
+"1: ldarx %0,0,%2\n"
+" stdcx. %3,0,%2\n"
+" bne- 1b"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (val)
+ : "cc");
+
+ return prev;
+}
+#endif
static __always_inline unsigned long
-__xchg(volatile void *ptr, unsigned long x, unsigned int size)
+__xchg_local(void *ptr, unsigned long x, unsigned int size)
{
switch (size) {
+ case 1:
+ return __xchg_u8_local(ptr, x);
+ case 2:
+ return __xchg_u16_local(ptr, x);
case 4:
- return __xchg_u32(ptr, x);
+ return __xchg_u32_local(ptr, x);
#ifdef CONFIG_PPC64
case 8:
- return __xchg_u64(ptr, x);
+ return __xchg_u64_local(ptr, x);
#endif
}
- __xchg_called_with_bad_pointer();
+ BUILD_BUG_ON_MSG(1, "Unsupported size for __xchg_local");
return x;
}
static __always_inline unsigned long
-__xchg_local(volatile void *ptr, unsigned long x, unsigned int size)
+__xchg_relaxed(void *ptr, unsigned long x, unsigned int size)
{
switch (size) {
+ case 1:
+ return __xchg_u8_relaxed(ptr, x);
+ case 2:
+ return __xchg_u16_relaxed(ptr, x);
case 4:
- return __xchg_u32_local(ptr, x);
+ return __xchg_u32_relaxed(ptr, x);
#ifdef CONFIG_PPC64
case 8:
- return __xchg_u64_local(ptr, x);
+ return __xchg_u64_relaxed(ptr, x);
#endif
}
- __xchg_called_with_bad_pointer();
+ BUILD_BUG_ON_MSG(1, "Unsupported size for __xchg_relaxed");
return x;
}
-#define xchg(ptr,x) \
- ({ \
- __typeof__(*(ptr)) _x_ = (x); \
- (__typeof__(*(ptr))) __xchg((ptr), (unsigned long)_x_, sizeof(*(ptr))); \
- })
-
-#define xchg_local(ptr,x) \
+#define arch_xchg_local(ptr,x) \
({ \
__typeof__(*(ptr)) _x_ = (x); \
(__typeof__(*(ptr))) __xchg_local((ptr), \
(unsigned long)_x_, sizeof(*(ptr))); \
})
+#define arch_xchg_relaxed(ptr, x) \
+({ \
+ __typeof__(*(ptr)) _x_ = (x); \
+ (__typeof__(*(ptr))) __xchg_relaxed((ptr), \
+ (unsigned long)_x_, sizeof(*(ptr))); \
+})
+
/*
* Compare and exchange - if *p == old, set it to new,
* and return the old value of *p.
*/
-#define __HAVE_ARCH_CMPXCHG 1
+#ifndef CONFIG_PPC_HAS_LBARX_LHARX
+CMPXCHG_GEN(u8, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory");
+CMPXCHG_GEN(u8, _local, , , "memory");
+CMPXCHG_GEN(u8, _acquire, , PPC_ACQUIRE_BARRIER, "memory");
+CMPXCHG_GEN(u8, _relaxed, , , "cc");
+CMPXCHG_GEN(u16, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory");
+CMPXCHG_GEN(u16, _local, , , "memory");
+CMPXCHG_GEN(u16, _acquire, , PPC_ACQUIRE_BARRIER, "memory");
+CMPXCHG_GEN(u16, _relaxed, , , "cc");
+#else
+static __always_inline unsigned long
+__cmpxchg_u8(volatile unsigned char *p, unsigned long old, unsigned long new)
+{
+ unsigned int prev;
+
+ __asm__ __volatile__ (
+ PPC_ATOMIC_ENTRY_BARRIER
+"1: lbarx %0,0,%2 # __cmpxchg_u8\n"
+" cmpw 0,%0,%3\n"
+" bne- 2f\n"
+" stbcx. %4,0,%2\n"
+" bne- 1b"
+ PPC_ATOMIC_EXIT_BARRIER
+ "\n\
+2:"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (old), "r" (new)
+ : "cc", "memory");
+
+ return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u8_local(volatile unsigned char *p, unsigned long old,
+ unsigned long new)
+{
+ unsigned int prev;
+
+ __asm__ __volatile__ (
+"1: lbarx %0,0,%2 # __cmpxchg_u8_local\n"
+" cmpw 0,%0,%3\n"
+" bne- 2f\n"
+" stbcx. %4,0,%2\n"
+" bne- 1b\n"
+"2:"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (old), "r" (new)
+ : "cc", "memory");
+
+ return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u8_relaxed(u8 *p, unsigned long old, unsigned long new)
+{
+ unsigned long prev;
+
+ __asm__ __volatile__ (
+"1: lbarx %0,0,%2 # __cmpxchg_u8_relaxed\n"
+" cmpw 0,%0,%3\n"
+" bne- 2f\n"
+" stbcx. %4,0,%2\n"
+" bne- 1b\n"
+"2:"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (old), "r" (new)
+ : "cc");
+
+ return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u8_acquire(u8 *p, unsigned long old, unsigned long new)
+{
+ unsigned long prev;
+
+ __asm__ __volatile__ (
+"1: lbarx %0,0,%2 # __cmpxchg_u8_acquire\n"
+" cmpw 0,%0,%3\n"
+" bne- 2f\n"
+" stbcx. %4,0,%2\n"
+" bne- 1b\n"
+ PPC_ACQUIRE_BARRIER
+"2:"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (old), "r" (new)
+ : "cc", "memory");
+
+ return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u16(volatile unsigned short *p, unsigned long old, unsigned long new)
+{
+ unsigned int prev;
+
+ __asm__ __volatile__ (
+ PPC_ATOMIC_ENTRY_BARRIER
+"1: lharx %0,0,%2 # __cmpxchg_u16\n"
+" cmpw 0,%0,%3\n"
+" bne- 2f\n"
+" sthcx. %4,0,%2\n"
+" bne- 1b\n"
+ PPC_ATOMIC_EXIT_BARRIER
+"2:"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (old), "r" (new)
+ : "cc", "memory");
+
+ return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u16_local(volatile unsigned short *p, unsigned long old,
+ unsigned long new)
+{
+ unsigned int prev;
+
+ __asm__ __volatile__ (
+"1: lharx %0,0,%2 # __cmpxchg_u16_local\n"
+" cmpw 0,%0,%3\n"
+" bne- 2f\n"
+" sthcx. %4,0,%2\n"
+" bne- 1b"
+"2:"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (old), "r" (new)
+ : "cc", "memory");
+
+ return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u16_relaxed(u16 *p, unsigned long old, unsigned long new)
+{
+ unsigned long prev;
+
+ __asm__ __volatile__ (
+"1: lharx %0,0,%2 # __cmpxchg_u16_relaxed\n"
+" cmpw 0,%0,%3\n"
+" bne- 2f\n"
+" sthcx. %4,0,%2\n"
+" bne- 1b\n"
+"2:"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (old), "r" (new)
+ : "cc");
+
+ return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u16_acquire(u16 *p, unsigned long old, unsigned long new)
+{
+ unsigned long prev;
+
+ __asm__ __volatile__ (
+"1: lharx %0,0,%2 # __cmpxchg_u16_acquire\n"
+" cmpw 0,%0,%3\n"
+" bne- 2f\n"
+" sthcx. %4,0,%2\n"
+" bne- 1b\n"
+ PPC_ACQUIRE_BARRIER
+"2:"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (old), "r" (new)
+ : "cc", "memory");
+
+ return prev;
+}
+#endif
static __always_inline unsigned long
__cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new)
@@ -152,14 +447,13 @@ __cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new)
unsigned int prev;
__asm__ __volatile__ (
- PPC_RELEASE_BARRIER
+ PPC_ATOMIC_ENTRY_BARRIER
"1: lwarx %0,0,%2 # __cmpxchg_u32\n\
cmpw 0,%0,%3\n\
bne- 2f\n"
- PPC405_ERR77(0,%2)
" stwcx. %4,0,%2\n\
bne- 1b"
- PPC_ACQUIRE_BARRIER
+ PPC_ATOMIC_EXIT_BARRIER
"\n\
2:"
: "=&r" (prev), "+m" (*p)
@@ -179,7 +473,6 @@ __cmpxchg_u32_local(volatile unsigned int *p, unsigned long old,
"1: lwarx %0,0,%2 # __cmpxchg_u32\n\
cmpw 0,%0,%3\n\
bne- 2f\n"
- PPC405_ERR77(0,%2)
" stwcx. %4,0,%2\n\
bne- 1b"
"\n\
@@ -191,6 +484,54 @@ __cmpxchg_u32_local(volatile unsigned int *p, unsigned long old,
return prev;
}
+static __always_inline unsigned long
+__cmpxchg_u32_relaxed(u32 *p, unsigned long old, unsigned long new)
+{
+ unsigned long prev;
+
+ __asm__ __volatile__ (
+"1: lwarx %0,0,%2 # __cmpxchg_u32_relaxed\n"
+" cmpw 0,%0,%3\n"
+" bne- 2f\n"
+" stwcx. %4,0,%2\n"
+" bne- 1b\n"
+"2:"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (old), "r" (new)
+ : "cc");
+
+ return prev;
+}
+
+/*
+ * cmpxchg family don't have order guarantee if cmp part fails, therefore we
+ * can avoid superfluous barriers if we use assembly code to implement
+ * cmpxchg() and cmpxchg_acquire(), however we don't do the similar for
+ * cmpxchg_release() because that will result in putting a barrier in the
+ * middle of a ll/sc loop, which is probably a bad idea. For example, this
+ * might cause the conditional store more likely to fail.
+ */
+static __always_inline unsigned long
+__cmpxchg_u32_acquire(u32 *p, unsigned long old, unsigned long new)
+{
+ unsigned long prev;
+
+ __asm__ __volatile__ (
+"1: lwarx %0,0,%2 # __cmpxchg_u32_acquire\n"
+" cmpw 0,%0,%3\n"
+" bne- 2f\n"
+" stwcx. %4,0,%2\n"
+" bne- 1b\n"
+ PPC_ACQUIRE_BARRIER
+ "\n"
+"2:"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (old), "r" (new)
+ : "cc", "memory");
+
+ return prev;
+}
+
#ifdef CONFIG_PPC64
static __always_inline unsigned long
__cmpxchg_u64(volatile unsigned long *p, unsigned long old, unsigned long new)
@@ -198,13 +539,13 @@ __cmpxchg_u64(volatile unsigned long *p, unsigned long old, unsigned long new)
unsigned long prev;
__asm__ __volatile__ (
- PPC_RELEASE_BARRIER
+ PPC_ATOMIC_ENTRY_BARRIER
"1: ldarx %0,0,%2 # __cmpxchg_u64\n\
cmpd 0,%0,%3\n\
bne- 2f\n\
stdcx. %4,0,%2\n\
bne- 1b"
- PPC_ACQUIRE_BARRIER
+ PPC_ATOMIC_EXIT_BARRIER
"\n\
2:"
: "=&r" (prev), "+m" (*p)
@@ -234,17 +575,57 @@ __cmpxchg_u64_local(volatile unsigned long *p, unsigned long old,
return prev;
}
-#endif
-/* This function doesn't exist, so you'll get a linker error
- if something tries to do an invalid cmpxchg(). */
-extern void __cmpxchg_called_with_bad_pointer(void);
+static __always_inline unsigned long
+__cmpxchg_u64_relaxed(u64 *p, unsigned long old, unsigned long new)
+{
+ unsigned long prev;
+
+ __asm__ __volatile__ (
+"1: ldarx %0,0,%2 # __cmpxchg_u64_relaxed\n"
+" cmpd 0,%0,%3\n"
+" bne- 2f\n"
+" stdcx. %4,0,%2\n"
+" bne- 1b\n"
+"2:"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (old), "r" (new)
+ : "cc");
+
+ return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u64_acquire(u64 *p, unsigned long old, unsigned long new)
+{
+ unsigned long prev;
+
+ __asm__ __volatile__ (
+"1: ldarx %0,0,%2 # __cmpxchg_u64_acquire\n"
+" cmpd 0,%0,%3\n"
+" bne- 2f\n"
+" stdcx. %4,0,%2\n"
+" bne- 1b\n"
+ PPC_ACQUIRE_BARRIER
+ "\n"
+"2:"
+ : "=&r" (prev), "+m" (*p)
+ : "r" (p), "r" (old), "r" (new)
+ : "cc", "memory");
+
+ return prev;
+}
+#endif
static __always_inline unsigned long
__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new,
unsigned int size)
{
switch (size) {
+ case 1:
+ return __cmpxchg_u8(ptr, old, new);
+ case 2:
+ return __cmpxchg_u16(ptr, old, new);
case 4:
return __cmpxchg_u32(ptr, old, new);
#ifdef CONFIG_PPC64
@@ -252,15 +633,19 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new,
return __cmpxchg_u64(ptr, old, new);
#endif
}
- __cmpxchg_called_with_bad_pointer();
+ BUILD_BUG_ON_MSG(1, "Unsupported size for __cmpxchg");
return old;
}
static __always_inline unsigned long
-__cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new,
+__cmpxchg_local(void *ptr, unsigned long old, unsigned long new,
unsigned int size)
{
switch (size) {
+ case 1:
+ return __cmpxchg_u8_local(ptr, old, new);
+ case 2:
+ return __cmpxchg_u16_local(ptr, old, new);
case 4:
return __cmpxchg_u32_local(ptr, old, new);
#ifdef CONFIG_PPC64
@@ -268,11 +653,50 @@ __cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new,
return __cmpxchg_u64_local(ptr, old, new);
#endif
}
- __cmpxchg_called_with_bad_pointer();
+ BUILD_BUG_ON_MSG(1, "Unsupported size for __cmpxchg_local");
+ return old;
+}
+
+static __always_inline unsigned long
+__cmpxchg_relaxed(void *ptr, unsigned long old, unsigned long new,
+ unsigned int size)
+{
+ switch (size) {
+ case 1:
+ return __cmpxchg_u8_relaxed(ptr, old, new);
+ case 2:
+ return __cmpxchg_u16_relaxed(ptr, old, new);
+ case 4:
+ return __cmpxchg_u32_relaxed(ptr, old, new);
+#ifdef CONFIG_PPC64
+ case 8:
+ return __cmpxchg_u64_relaxed(ptr, old, new);
+#endif
+ }
+ BUILD_BUG_ON_MSG(1, "Unsupported size for __cmpxchg_relaxed");
return old;
}
-#define cmpxchg(ptr, o, n) \
+static __always_inline unsigned long
+__cmpxchg_acquire(void *ptr, unsigned long old, unsigned long new,
+ unsigned int size)
+{
+ switch (size) {
+ case 1:
+ return __cmpxchg_u8_acquire(ptr, old, new);
+ case 2:
+ return __cmpxchg_u16_acquire(ptr, old, new);
+ case 4:
+ return __cmpxchg_u32_acquire(ptr, old, new);
+#ifdef CONFIG_PPC64
+ case 8:
+ return __cmpxchg_u64_acquire(ptr, old, new);
+#endif
+ }
+ BUILD_BUG_ON_MSG(1, "Unsupported size for __cmpxchg_acquire");
+ return old;
+}
+#define arch_cmpxchg(ptr, o, n) \
({ \
__typeof__(*(ptr)) _o_ = (o); \
__typeof__(*(ptr)) _n_ = (n); \
@@ -281,7 +705,7 @@ __cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new,
})
-#define cmpxchg_local(ptr, o, n) \
+#define arch_cmpxchg_local(ptr, o, n) \
({ \
__typeof__(*(ptr)) _o_ = (o); \
__typeof__(*(ptr)) _n_ = (n); \
@@ -289,21 +713,47 @@ __cmpxchg_local(volatile void *ptr, unsigned long old, unsigned long new,
(unsigned long)_n_, sizeof(*(ptr))); \
})
+#define arch_cmpxchg_relaxed(ptr, o, n) \
+({ \
+ __typeof__(*(ptr)) _o_ = (o); \
+ __typeof__(*(ptr)) _n_ = (n); \
+ (__typeof__(*(ptr))) __cmpxchg_relaxed((ptr), \
+ (unsigned long)_o_, (unsigned long)_n_, \
+ sizeof(*(ptr))); \
+})
+
+#define arch_cmpxchg_acquire(ptr, o, n) \
+({ \
+ __typeof__(*(ptr)) _o_ = (o); \
+ __typeof__(*(ptr)) _n_ = (n); \
+ (__typeof__(*(ptr))) __cmpxchg_acquire((ptr), \
+ (unsigned long)_o_, (unsigned long)_n_, \
+ sizeof(*(ptr))); \
+})
#ifdef CONFIG_PPC64
-#define cmpxchg64(ptr, o, n) \
+#define arch_cmpxchg64(ptr, o, n) \
({ \
BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
- cmpxchg((ptr), (o), (n)); \
+ arch_cmpxchg((ptr), (o), (n)); \
})
-#define cmpxchg64_local(ptr, o, n) \
+#define arch_cmpxchg64_local(ptr, o, n) \
({ \
BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
- cmpxchg_local((ptr), (o), (n)); \
+ arch_cmpxchg_local((ptr), (o), (n)); \
})
-#define cmpxchg64_relaxed cmpxchg64_local
+#define arch_cmpxchg64_relaxed(ptr, o, n) \
+({ \
+ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
+ arch_cmpxchg_relaxed((ptr), (o), (n)); \
+})
+#define arch_cmpxchg64_acquire(ptr, o, n) \
+({ \
+ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
+ arch_cmpxchg_acquire((ptr), (o), (n)); \
+})
#else
#include <asm-generic/cmpxchg-local.h>
-#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
+#define arch_cmpxchg64_local(ptr, o, n) __generic_cmpxchg64_local((ptr), (o), (n))
#endif
#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/code-patching-asm.h b/arch/powerpc/include/asm/code-patching-asm.h
new file mode 100644
index 000000000000..ed7b1448493a
--- /dev/null
+++ b/arch/powerpc/include/asm/code-patching-asm.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright 2018, Michael Ellerman, IBM Corporation.
+ */
+#ifndef _ASM_POWERPC_CODE_PATCHING_ASM_H
+#define _ASM_POWERPC_CODE_PATCHING_ASM_H
+
+/* Define a "site" that can be patched */
+.macro patch_site label name
+ .pushsection ".rodata"
+ .balign 4
+ .global \name
+\name:
+ .4byte \label - .
+ .popsection
+.endm
+
+#endif /* _ASM_POWERPC_CODE_PATCHING_ASM_H */
diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h
deleted file mode 100644
index 840a5509b3f1..000000000000
--- a/arch/powerpc/include/asm/code-patching.h
+++ /dev/null
@@ -1,102 +0,0 @@
-#ifndef _ASM_POWERPC_CODE_PATCHING_H
-#define _ASM_POWERPC_CODE_PATCHING_H
-
-/*
- * Copyright 2008, Michael Ellerman, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <asm/types.h>
-#include <asm/ppc-opcode.h>
-
-/* Flags for create_branch:
- * "b" == create_branch(addr, target, 0);
- * "ba" == create_branch(addr, target, BRANCH_ABSOLUTE);
- * "bl" == create_branch(addr, target, BRANCH_SET_LINK);
- * "bla" == create_branch(addr, target, BRANCH_ABSOLUTE | BRANCH_SET_LINK);
- */
-#define BRANCH_SET_LINK 0x1
-#define BRANCH_ABSOLUTE 0x2
-
-unsigned int create_branch(const unsigned int *addr,
- unsigned long target, int flags);
-unsigned int create_cond_branch(const unsigned int *addr,
- unsigned long target, int flags);
-int patch_branch(unsigned int *addr, unsigned long target, int flags);
-int patch_instruction(unsigned int *addr, unsigned int instr);
-
-int instr_is_relative_branch(unsigned int instr);
-int instr_is_branch_to_addr(const unsigned int *instr, unsigned long addr);
-unsigned long branch_target(const unsigned int *instr);
-unsigned int translate_branch(const unsigned int *dest,
- const unsigned int *src);
-#ifdef CONFIG_PPC_BOOK3E_64
-void __patch_exception(int exc, unsigned long addr);
-#define patch_exception(exc, name) do { \
- extern unsigned int name; \
- __patch_exception((exc), (unsigned long)&name); \
-} while (0)
-#endif
-
-#define OP_RT_RA_MASK 0xffff0000UL
-#define LIS_R2 0x3c020000UL
-#define ADDIS_R2_R12 0x3c4c0000UL
-#define ADDI_R2_R2 0x38420000UL
-
-static inline unsigned long ppc_function_entry(void *func)
-{
-#if defined(CONFIG_PPC64)
-#if defined(_CALL_ELF) && _CALL_ELF == 2
- u32 *insn = func;
-
- /*
- * A PPC64 ABIv2 function may have a local and a global entry
- * point. We need to use the local entry point when patching
- * functions, so identify and step over the global entry point
- * sequence.
- *
- * The global entry point sequence is always of the form:
- *
- * addis r2,r12,XXXX
- * addi r2,r2,XXXX
- *
- * A linker optimisation may convert the addis to lis:
- *
- * lis r2,XXXX
- * addi r2,r2,XXXX
- */
- if ((((*insn & OP_RT_RA_MASK) == ADDIS_R2_R12) ||
- ((*insn & OP_RT_RA_MASK) == LIS_R2)) &&
- ((*(insn+1) & OP_RT_RA_MASK) == ADDI_R2_R2))
- return (unsigned long)(insn + 2);
- else
- return (unsigned long)func;
-#else
- /*
- * On PPC64 ABIv1 the function pointer actually points to the
- * function's descriptor. The first entry in the descriptor is the
- * address of the function text.
- */
- return ((func_descr_t *)func)->entry;
-#endif
-#else
- return (unsigned long)func;
-#endif
-}
-
-static inline unsigned long ppc_global_function_entry(void *func)
-{
-#if defined(CONFIG_PPC64) && defined(_CALL_ELF) && _CALL_ELF == 2
- /* PPC64 ABIv2 the global entry point is at the address */
- return (unsigned long)func;
-#else
- /* All other cases there is no change vs ppc_function_entry() */
- return ppc_function_entry(func);
-#endif
-}
-
-#endif /* _ASM_POWERPC_CODE_PATCHING_H */
diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h
index b142b8e0ed9e..dda4091fd012 100644
--- a/arch/powerpc/include/asm/compat.h
+++ b/arch/powerpc/include/asm/compat.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_COMPAT_H
#define _ASM_POWERPC_COMPAT_H
#ifdef __KERNEL__
@@ -7,52 +8,20 @@
#include <linux/types.h>
#include <linux/sched.h>
-#define COMPAT_USER_HZ 100
+#define compat_ipc_pid_t compat_ipc_pid_t
+typedef u16 compat_ipc_pid_t;
+
+#define compat_ipc64_perm compat_ipc64_perm
+
+#include <asm-generic/compat.h>
+
#ifdef __BIG_ENDIAN__
#define COMPAT_UTS_MACHINE "ppc\0\0"
#else
#define COMPAT_UTS_MACHINE "ppcle\0\0"
#endif
-typedef u32 compat_size_t;
-typedef s32 compat_ssize_t;
-typedef s32 compat_time_t;
-typedef s32 compat_clock_t;
-typedef s32 compat_pid_t;
-typedef u32 __compat_uid_t;
-typedef u32 __compat_gid_t;
-typedef u32 __compat_uid32_t;
-typedef u32 __compat_gid32_t;
-typedef u32 compat_mode_t;
-typedef u32 compat_ino_t;
-typedef u32 compat_dev_t;
-typedef s32 compat_off_t;
-typedef s64 compat_loff_t;
typedef s16 compat_nlink_t;
-typedef u16 compat_ipc_pid_t;
-typedef s32 compat_daddr_t;
-typedef u32 compat_caddr_t;
-typedef __kernel_fsid_t compat_fsid_t;
-typedef s32 compat_key_t;
-typedef s32 compat_timer_t;
-
-typedef s32 compat_int_t;
-typedef s32 compat_long_t;
-typedef s64 compat_s64;
-typedef u32 compat_uint_t;
-typedef u32 compat_ulong_t;
-typedef u64 compat_u64;
-typedef u32 compat_uptr_t;
-
-struct compat_timespec {
- compat_time_t tv_sec;
- s32 tv_nsec;
-};
-
-struct compat_timeval {
- compat_time_t tv_sec;
- s32 tv_usec;
-};
struct compat_stat {
compat_dev_t st_dev;
@@ -65,154 +34,15 @@ struct compat_stat {
compat_off_t st_size;
compat_off_t st_blksize;
compat_off_t st_blocks;
- compat_time_t st_atime;
+ old_time32_t st_atime;
u32 st_atime_nsec;
- compat_time_t st_mtime;
+ old_time32_t st_mtime;
u32 st_mtime_nsec;
- compat_time_t st_ctime;
+ old_time32_t st_ctime;
u32 st_ctime_nsec;
u32 __unused4[2];
};
-struct compat_flock {
- short l_type;
- short l_whence;
- compat_off_t l_start;
- compat_off_t l_len;
- compat_pid_t l_pid;
-};
-
-#define F_GETLK64 12 /* using 'struct flock64' */
-#define F_SETLK64 13
-#define F_SETLKW64 14
-
-struct compat_flock64 {
- short l_type;
- short l_whence;
- compat_loff_t l_start;
- compat_loff_t l_len;
- compat_pid_t l_pid;
-};
-
-struct compat_statfs {
- int f_type;
- int f_bsize;
- int f_blocks;
- int f_bfree;
- int f_bavail;
- int f_files;
- int f_ffree;
- compat_fsid_t f_fsid;
- int f_namelen; /* SunOS ignores this field. */
- int f_frsize;
- int f_flags;
- int f_spare[4];
-};
-
-#define COMPAT_RLIM_OLD_INFINITY 0x7fffffff
-#define COMPAT_RLIM_INFINITY 0xffffffff
-
-typedef u32 compat_old_sigset_t;
-
-#define _COMPAT_NSIG 64
-#define _COMPAT_NSIG_BPW 32
-
-typedef u32 compat_sigset_word;
-
-typedef union compat_sigval {
- compat_int_t sival_int;
- compat_uptr_t sival_ptr;
-} compat_sigval_t;
-
-#define SI_PAD_SIZE32 (128/sizeof(int) - 3)
-
-typedef struct compat_siginfo {
- int si_signo;
- int si_errno;
- int si_code;
-
- union {
- int _pad[SI_PAD_SIZE32];
-
- /* kill() */
- struct {
- compat_pid_t _pid; /* sender's pid */
- __compat_uid_t _uid; /* sender's uid */
- } _kill;
-
- /* POSIX.1b timers */
- struct {
- compat_timer_t _tid; /* timer id */
- int _overrun; /* overrun count */
- compat_sigval_t _sigval; /* same as below */
- int _sys_private; /* not to be passed to user */
- } _timer;
-
- /* POSIX.1b signals */
- struct {
- compat_pid_t _pid; /* sender's pid */
- __compat_uid_t _uid; /* sender's uid */
- compat_sigval_t _sigval;
- } _rt;
-
- /* SIGCHLD */
- struct {
- compat_pid_t _pid; /* which child */
- __compat_uid_t _uid; /* sender's uid */
- int _status; /* exit code */
- compat_clock_t _utime;
- compat_clock_t _stime;
- } _sigchld;
-
- /* SIGILL, SIGFPE, SIGSEGV, SIGBUS, SIGEMT */
- struct {
- unsigned int _addr; /* faulting insn/memory ref. */
- } _sigfault;
-
- /* SIGPOLL */
- struct {
- int _band; /* POLL_IN, POLL_OUT, POLL_MSG */
- int _fd;
- } _sigpoll;
- } _sifields;
-} compat_siginfo_t;
-
-#define COMPAT_OFF_T_MAX 0x7fffffff
-#define COMPAT_LOFF_T_MAX 0x7fffffffffffffffL
-
-/*
- * A pointer passed in from user mode. This should not
- * be used for syscall parameters, just declare them
- * as pointers because the syscall entry code will have
- * appropriately converted them already.
- */
-
-static inline void __user *compat_ptr(compat_uptr_t uptr)
-{
- return (void __user *)(unsigned long)uptr;
-}
-
-static inline compat_uptr_t ptr_to_compat(void __user *uptr)
-{
- return (u32)(unsigned long)uptr;
-}
-
-static inline void __user *arch_compat_alloc_user_space(long len)
-{
- struct pt_regs *regs = current->thread.regs;
- unsigned long usp = regs->gpr[1];
-
- /*
- * We can't access below the stack pointer in the 32bit ABI and
- * can access 288 bytes in the 64bit big-endian ABI,
- * or 512 bytes with the new ELFv2 little-endian ABI.
- */
- if (!is_32bit_task())
- usp -= USER_REDZONE_SIZE;
-
- return (void __user *) (usp - len);
-}
-
/*
* ipc64_perm is actually 32/64bit clean but since the compat layer refers to
* it we may as well define it.
@@ -232,10 +62,10 @@ struct compat_ipc64_perm {
struct compat_semid64_ds {
struct compat_ipc64_perm sem_perm;
- unsigned int __unused1;
- compat_time_t sem_otime;
- unsigned int __unused2;
- compat_time_t sem_ctime;
+ unsigned int sem_otime_high;
+ unsigned int sem_otime;
+ unsigned int sem_ctime_high;
+ unsigned int sem_ctime;
compat_ulong_t sem_nsems;
compat_ulong_t __unused3;
compat_ulong_t __unused4;
@@ -243,12 +73,12 @@ struct compat_semid64_ds {
struct compat_msqid64_ds {
struct compat_ipc64_perm msg_perm;
- unsigned int __unused1;
- compat_time_t msg_stime;
- unsigned int __unused2;
- compat_time_t msg_rtime;
- unsigned int __unused3;
- compat_time_t msg_ctime;
+ unsigned int msg_stime_high;
+ unsigned int msg_stime;
+ unsigned int msg_rtime_high;
+ unsigned int msg_rtime;
+ unsigned int msg_ctime_high;
+ unsigned int msg_ctime;
compat_ulong_t msg_cbytes;
compat_ulong_t msg_qnum;
compat_ulong_t msg_qbytes;
@@ -260,12 +90,12 @@ struct compat_msqid64_ds {
struct compat_shmid64_ds {
struct compat_ipc64_perm shm_perm;
- unsigned int __unused1;
- compat_time_t shm_atime;
- unsigned int __unused2;
- compat_time_t shm_dtime;
- unsigned int __unused3;
- compat_time_t shm_ctime;
+ unsigned int shm_atime_high;
+ unsigned int shm_atime;
+ unsigned int shm_dtime_high;
+ unsigned int shm_dtime;
+ unsigned int shm_ctime_high;
+ unsigned int shm_ctime;
unsigned int __unused4;
compat_size_t shm_segsz;
compat_pid_t shm_cpid;
diff --git a/arch/powerpc/include/asm/context_tracking.h b/arch/powerpc/include/asm/context_tracking.h
index 40014921ffff..4b63931c49e0 100644
--- a/arch/powerpc/include/asm/context_tracking.h
+++ b/arch/powerpc/include/asm/context_tracking.h
@@ -1,7 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_CONTEXT_TRACKING_H
#define _ASM_POWERPC_CONTEXT_TRACKING_H
-#ifdef CONFIG_CONTEXT_TRACKING
+#ifdef CONFIG_CONTEXT_TRACKING_USER
#define SCHEDULE_USER bl schedule_user
#else
#define SCHEDULE_USER bl schedule
diff --git a/arch/powerpc/include/asm/copro.h b/arch/powerpc/include/asm/copro.h
new file mode 100644
index 000000000000..81bd176203ab
--- /dev/null
+++ b/arch/powerpc/include/asm/copro.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2014 IBM Corp.
+ */
+
+#ifndef _ASM_POWERPC_COPRO_H
+#define _ASM_POWERPC_COPRO_H
+
+#include <linux/mm_types.h>
+
+struct copro_slb
+{
+ u64 esid, vsid;
+};
+
+int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea,
+ unsigned long dsisr, vm_fault_t *flt);
+
+int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb);
+
+#endif /* _ASM_POWERPC_COPRO_H */
diff --git a/arch/powerpc/include/asm/cpm.h b/arch/powerpc/include/asm/cpm.h
index 4398a6cdcf53..ce483b0f8a4d 100644
--- a/arch/powerpc/include/asm/cpm.h
+++ b/arch/powerpc/include/asm/cpm.h
@@ -1,212 +1 @@
-#ifndef __CPM_H
-#define __CPM_H
-
-#include <linux/compiler.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/of.h>
-
-/*
- * SPI Parameter RAM common to QE and CPM.
- */
-struct spi_pram {
- __be16 rbase; /* Rx Buffer descriptor base address */
- __be16 tbase; /* Tx Buffer descriptor base address */
- u8 rfcr; /* Rx function code */
- u8 tfcr; /* Tx function code */
- __be16 mrblr; /* Max receive buffer length */
- __be32 rstate; /* Internal */
- __be32 rdp; /* Internal */
- __be16 rbptr; /* Internal */
- __be16 rbc; /* Internal */
- __be32 rxtmp; /* Internal */
- __be32 tstate; /* Internal */
- __be32 tdp; /* Internal */
- __be16 tbptr; /* Internal */
- __be16 tbc; /* Internal */
- __be32 txtmp; /* Internal */
- __be32 res; /* Tx temp. */
- __be16 rpbase; /* Relocation pointer (CPM1 only) */
- __be16 res1; /* Reserved */
-};
-
-/*
- * USB Controller pram common to QE and CPM.
- */
-struct usb_ctlr {
- u8 usb_usmod;
- u8 usb_usadr;
- u8 usb_uscom;
- u8 res1[1];
- __be16 usb_usep[4];
- u8 res2[4];
- __be16 usb_usber;
- u8 res3[2];
- __be16 usb_usbmr;
- u8 res4[1];
- u8 usb_usbs;
- /* Fields down below are QE-only */
- __be16 usb_ussft;
- u8 res5[2];
- __be16 usb_usfrn;
- u8 res6[0x22];
-} __attribute__ ((packed));
-
-/*
- * Function code bits, usually generic to devices.
- */
-#ifdef CONFIG_CPM1
-#define CPMFCR_GBL ((u_char)0x00) /* Flag doesn't exist in CPM1 */
-#define CPMFCR_TC2 ((u_char)0x00) /* Flag doesn't exist in CPM1 */
-#define CPMFCR_DTB ((u_char)0x00) /* Flag doesn't exist in CPM1 */
-#define CPMFCR_BDB ((u_char)0x00) /* Flag doesn't exist in CPM1 */
-#else
-#define CPMFCR_GBL ((u_char)0x20) /* Set memory snooping */
-#define CPMFCR_TC2 ((u_char)0x04) /* Transfer code 2 value */
-#define CPMFCR_DTB ((u_char)0x02) /* Use local bus for data when set */
-#define CPMFCR_BDB ((u_char)0x01) /* Use local bus for BD when set */
-#endif
-#define CPMFCR_EB ((u_char)0x10) /* Set big endian byte order */
-
-/* Opcodes common to CPM1 and CPM2
-*/
-#define CPM_CR_INIT_TRX ((ushort)0x0000)
-#define CPM_CR_INIT_RX ((ushort)0x0001)
-#define CPM_CR_INIT_TX ((ushort)0x0002)
-#define CPM_CR_HUNT_MODE ((ushort)0x0003)
-#define CPM_CR_STOP_TX ((ushort)0x0004)
-#define CPM_CR_GRA_STOP_TX ((ushort)0x0005)
-#define CPM_CR_RESTART_TX ((ushort)0x0006)
-#define CPM_CR_CLOSE_RX_BD ((ushort)0x0007)
-#define CPM_CR_SET_GADDR ((ushort)0x0008)
-#define CPM_CR_SET_TIMER ((ushort)0x0008)
-#define CPM_CR_STOP_IDMA ((ushort)0x000b)
-
-/* Buffer descriptors used by many of the CPM protocols. */
-typedef struct cpm_buf_desc {
- ushort cbd_sc; /* Status and Control */
- ushort cbd_datlen; /* Data length in buffer */
- uint cbd_bufaddr; /* Buffer address in host memory */
-} cbd_t;
-
-/* Buffer descriptor control/status used by serial
- */
-
-#define BD_SC_EMPTY (0x8000) /* Receive is empty */
-#define BD_SC_READY (0x8000) /* Transmit is ready */
-#define BD_SC_WRAP (0x2000) /* Last buffer descriptor */
-#define BD_SC_INTRPT (0x1000) /* Interrupt on change */
-#define BD_SC_LAST (0x0800) /* Last buffer in frame */
-#define BD_SC_TC (0x0400) /* Transmit CRC */
-#define BD_SC_CM (0x0200) /* Continuous mode */
-#define BD_SC_ID (0x0100) /* Rec'd too many idles */
-#define BD_SC_P (0x0100) /* xmt preamble */
-#define BD_SC_BR (0x0020) /* Break received */
-#define BD_SC_FR (0x0010) /* Framing error */
-#define BD_SC_PR (0x0008) /* Parity error */
-#define BD_SC_NAK (0x0004) /* NAK - did not respond */
-#define BD_SC_OV (0x0002) /* Overrun */
-#define BD_SC_UN (0x0002) /* Underrun */
-#define BD_SC_CD (0x0001) /* */
-#define BD_SC_CL (0x0001) /* Collision */
-
-/* Buffer descriptor control/status used by Ethernet receive.
- * Common to SCC and FCC.
- */
-#define BD_ENET_RX_EMPTY (0x8000)
-#define BD_ENET_RX_WRAP (0x2000)
-#define BD_ENET_RX_INTR (0x1000)
-#define BD_ENET_RX_LAST (0x0800)
-#define BD_ENET_RX_FIRST (0x0400)
-#define BD_ENET_RX_MISS (0x0100)
-#define BD_ENET_RX_BC (0x0080) /* FCC Only */
-#define BD_ENET_RX_MC (0x0040) /* FCC Only */
-#define BD_ENET_RX_LG (0x0020)
-#define BD_ENET_RX_NO (0x0010)
-#define BD_ENET_RX_SH (0x0008)
-#define BD_ENET_RX_CR (0x0004)
-#define BD_ENET_RX_OV (0x0002)
-#define BD_ENET_RX_CL (0x0001)
-#define BD_ENET_RX_STATS (0x01ff) /* All status bits */
-
-/* Buffer descriptor control/status used by Ethernet transmit.
- * Common to SCC and FCC.
- */
-#define BD_ENET_TX_READY (0x8000)
-#define BD_ENET_TX_PAD (0x4000)
-#define BD_ENET_TX_WRAP (0x2000)
-#define BD_ENET_TX_INTR (0x1000)
-#define BD_ENET_TX_LAST (0x0800)
-#define BD_ENET_TX_TC (0x0400)
-#define BD_ENET_TX_DEF (0x0200)
-#define BD_ENET_TX_HB (0x0100)
-#define BD_ENET_TX_LC (0x0080)
-#define BD_ENET_TX_RL (0x0040)
-#define BD_ENET_TX_RCMASK (0x003c)
-#define BD_ENET_TX_UN (0x0002)
-#define BD_ENET_TX_CSL (0x0001)
-#define BD_ENET_TX_STATS (0x03ff) /* All status bits */
-
-/* Buffer descriptor control/status used by Transparent mode SCC.
- */
-#define BD_SCC_TX_LAST (0x0800)
-
-/* Buffer descriptor control/status used by I2C.
- */
-#define BD_I2C_START (0x0400)
-
-int cpm_muram_init(void);
-
-#if defined(CONFIG_CPM) || defined(CONFIG_QUICC_ENGINE)
-unsigned long cpm_muram_alloc(unsigned long size, unsigned long align);
-int cpm_muram_free(unsigned long offset);
-unsigned long cpm_muram_alloc_fixed(unsigned long offset, unsigned long size);
-void __iomem *cpm_muram_addr(unsigned long offset);
-unsigned long cpm_muram_offset(void __iomem *addr);
-dma_addr_t cpm_muram_dma(void __iomem *addr);
-#else
-static inline unsigned long cpm_muram_alloc(unsigned long size,
- unsigned long align)
-{
- return -ENOSYS;
-}
-
-static inline int cpm_muram_free(unsigned long offset)
-{
- return -ENOSYS;
-}
-
-static inline unsigned long cpm_muram_alloc_fixed(unsigned long offset,
- unsigned long size)
-{
- return -ENOSYS;
-}
-
-static inline void __iomem *cpm_muram_addr(unsigned long offset)
-{
- return NULL;
-}
-
-static inline unsigned long cpm_muram_offset(void __iomem *addr)
-{
- return -ENOSYS;
-}
-
-static inline dma_addr_t cpm_muram_dma(void __iomem *addr)
-{
- return 0;
-}
-#endif /* defined(CONFIG_CPM) || defined(CONFIG_QUICC_ENGINE) */
-
-#ifdef CONFIG_CPM
-int cpm_command(u32 command, u8 opcode);
-#else
-static inline int cpm_command(u32 command, u8 opcode)
-{
- return -ENOSYS;
-}
-#endif /* CONFIG_CPM */
-
-int cpm2_gpiochip_add32(struct device_node *np);
-
-#endif
+#include <soc/fsl/cpm.h>
diff --git a/arch/powerpc/include/asm/cpm1.h b/arch/powerpc/include/asm/cpm1.h
index 8ee4211ca0c6..e3c6969853ef 100644
--- a/arch/powerpc/include/asm/cpm1.h
+++ b/arch/powerpc/include/asm/cpm1.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* MPC8xx Communication Processor Module.
* Copyright (c) 1997 Dan Malek (dmalek@jlc.net)
@@ -48,11 +49,6 @@
*/
extern cpm8xx_t __iomem *cpmp; /* Pointer to comm processor */
-#define cpm_dpalloc cpm_muram_alloc
-#define cpm_dpfree cpm_muram_free
-#define cpm_dpram_addr cpm_muram_addr
-#define cpm_dpram_phys cpm_muram_dma
-
extern void cpm_setbrg(uint brg, uint rate);
extern void __init cpm_load_patch(cpm8xx_t *cp);
@@ -67,6 +63,7 @@ extern void cpm_reset(void);
#define PROFF_SPI ((uint)0x0180)
#define PROFF_SCC3 ((uint)0x0200)
#define PROFF_SMC1 ((uint)0x0280)
+#define PROFF_DSP1 ((uint)0x02c0)
#define PROFF_SCC4 ((uint)0x0300)
#define PROFF_SMC2 ((uint)0x0380)
@@ -560,6 +557,8 @@ typedef struct risc_timer_pram {
#define CPM_PIN_SECONDARY 2
#define CPM_PIN_GPIO 4
#define CPM_PIN_OPENDRAIN 8
+#define CPM_PIN_FALLEDGE 16
+#define CPM_PIN_ANYEDGE 0
enum cpm_port {
CPM_PORTA,
@@ -602,5 +601,7 @@ enum cpm_clk {
};
int cpm1_clk_setup(enum cpm_clk_target target, int clock, int mode);
+int cpm1_gpiochip_add16(struct device *dev);
+int cpm1_gpiochip_add32(struct device *dev);
#endif /* __CPM1__ */
diff --git a/arch/powerpc/include/asm/cpm2.h b/arch/powerpc/include/asm/cpm2.h
index 7c8608b09694..a22acc36eb9b 100644
--- a/arch/powerpc/include/asm/cpm2.h
+++ b/arch/powerpc/include/asm/cpm2.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Communication Processor Module v2.
*
@@ -86,10 +87,6 @@
*/
extern cpm_cpm2_t __iomem *cpmp; /* Pointer to comm processor */
-#define cpm_dpalloc cpm_muram_alloc
-#define cpm_dpfree cpm_muram_free
-#define cpm_dpram_addr cpm_muram_addr
-
extern void cpm2_reset(void);
/* Baud rate generators.
@@ -593,7 +590,7 @@ typedef struct fcc_enet {
uint fen_p256c; /* Total packets 256 < bytes <= 511 */
uint fen_p512c; /* Total packets 512 < bytes <= 1023 */
uint fen_p1024c; /* Total packets 1024 < bytes <= 1518 */
- uint fen_cambuf; /* Internal CAM buffer poiner */
+ uint fen_cambuf; /* Internal CAM buffer pointer */
ushort fen_rfthr; /* Received frames threshold */
ushort fen_rfcnt; /* Received frames count */
} fcc_enet_t;
@@ -1079,6 +1076,9 @@ typedef struct im_idma {
#define FCC2_MEM_OFFSET FCC_MEM_OFFSET(1)
#define FCC3_MEM_OFFSET FCC_MEM_OFFSET(2)
+/* Pipeline Maximum Depth */
+#define MPC82XX_BCR_PLDP 0x00800000
+
/* Clocks and GRG's */
enum cpm_clk_dir {
@@ -1132,8 +1132,8 @@ enum cpm_clk {
CPM_CLK_DUMMY
};
-extern int cpm2_clk_setup(enum cpm_clk_target target, int clock, int mode);
-extern int cpm2_smc_clk_setup(enum cpm_clk_target target, int clock);
+int __init cpm2_clk_setup(enum cpm_clk_target target, int clock, int mode);
+int __init cpm2_smc_clk_setup(enum cpm_clk_target target, int clock);
#define CPM_PIN_INPUT 0
#define CPM_PIN_OUTPUT 1
@@ -1142,7 +1142,7 @@ extern int cpm2_smc_clk_setup(enum cpm_clk_target target, int clock);
#define CPM_PIN_GPIO 4
#define CPM_PIN_OPENDRAIN 8
-void cpm2_set_pin(int port, int pin, int flags);
+void __init cpm2_set_pin(int port, int pin, int flags);
#endif /* __CPM2__ */
#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/cpu_has_feature.h b/arch/powerpc/include/asm/cpu_has_feature.h
new file mode 100644
index 000000000000..604fa3b6c33d
--- /dev/null
+++ b/arch/powerpc/include/asm/cpu_has_feature.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_POWERPC_CPU_HAS_FEATURE_H
+#define __ASM_POWERPC_CPU_HAS_FEATURE_H
+
+#ifndef __ASSEMBLER__
+
+#include <linux/bug.h>
+#include <asm/cputable.h>
+
+static __always_inline bool early_cpu_has_feature(unsigned long feature)
+{
+ return !!((CPU_FTRS_ALWAYS & feature) ||
+ (CPU_FTRS_POSSIBLE & cur_cpu_spec->cpu_features & feature));
+}
+
+#ifdef CONFIG_JUMP_LABEL_FEATURE_CHECKS
+#include <linux/jump_label.h>
+
+#define NUM_CPU_FTR_KEYS BITS_PER_LONG
+
+extern struct static_key_true cpu_feature_keys[NUM_CPU_FTR_KEYS];
+
+static __always_inline bool cpu_has_feature(unsigned long feature)
+{
+ int i;
+
+ BUILD_BUG_ON(!__builtin_constant_p(feature));
+ BUILD_BUG_ON(__builtin_popcountl(feature) > 1);
+
+#ifdef CONFIG_JUMP_LABEL_FEATURE_CHECK_DEBUG
+ if (!static_key_feature_checks_initialized) {
+ printk("Warning! cpu_has_feature() used prior to jump label init!\n");
+ dump_stack();
+ return early_cpu_has_feature(feature);
+ }
+#endif
+
+ if (CPU_FTRS_ALWAYS & feature)
+ return true;
+
+ if (!(CPU_FTRS_POSSIBLE & feature))
+ return false;
+
+ i = __builtin_ctzl(feature);
+ return static_branch_likely(&cpu_feature_keys[i]);
+}
+#else
+static __always_inline bool cpu_has_feature(unsigned long feature)
+{
+ return early_cpu_has_feature(feature);
+}
+#endif
+
+#endif /* __ASSEMBLER__ */
+#endif /* __ASM_POWERPC_CPU_HAS_FEATURE_H */
diff --git a/arch/powerpc/include/asm/cpu_setup.h b/arch/powerpc/include/asm/cpu_setup.h
new file mode 100644
index 000000000000..30e2fe389502
--- /dev/null
+++ b/arch/powerpc/include/asm/cpu_setup.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2020 IBM Corporation
+ */
+
+#ifndef _ASM_POWERPC_CPU_SETUP_H
+#define _ASM_POWERPC_CPU_SETUP_H
+void __setup_cpu_power7(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_power8(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_power9(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_power10(unsigned long offset, struct cpu_spec *spec);
+void __restore_cpu_power7(void);
+void __restore_cpu_power8(void);
+void __restore_cpu_power9(void);
+void __restore_cpu_power10(void);
+
+void __setup_cpu_e500v1(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_e500v2(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_e500mc(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_440ep(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_440epx(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_440gx(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_440grx(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_440spe(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_440x5(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_460ex(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_460gt(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_460sx(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_apm821xx(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_603(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_604(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_750(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_750cx(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_750fx(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_7400(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_7410(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_745x(unsigned long offset, struct cpu_spec *spec);
+
+void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_ppc970MP(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_pa6t(unsigned long offset, struct cpu_spec *spec);
+void __restore_cpu_pa6t(void);
+void __restore_cpu_ppc970(void);
+
+void __setup_cpu_e5500(unsigned long offset, struct cpu_spec *spec);
+void __setup_cpu_e6500(unsigned long offset, struct cpu_spec *spec);
+void __restore_cpu_e5500(void);
+void __restore_cpu_e6500(void);
+#endif /* _ASM_POWERPC_CPU_SETUP_H */
diff --git a/arch/powerpc/include/asm/cpufeature.h b/arch/powerpc/include/asm/cpufeature.h
new file mode 100644
index 000000000000..2dcc66225e7f
--- /dev/null
+++ b/arch/powerpc/include/asm/cpufeature.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * CPU feature definitions for module loading, used by
+ * module_cpu_feature_match(), see asm/cputable.h for powerpc CPU features.
+ *
+ * Copyright 2016 Alastair D'Silva, IBM Corporation.
+ */
+
+#ifndef __ASM_POWERPC_CPUFEATURE_H
+#define __ASM_POWERPC_CPUFEATURE_H
+
+#include <asm/cputable.h>
+
+/* Keep these in step with powerpc/include/asm/cputable.h */
+#define MAX_CPU_FEATURES (2 * 32)
+
+/*
+ * Currently we don't have a need for any of the feature bits defined in
+ * cpu_user_features. When we do, they should be defined such as:
+ *
+ * #define PPC_MODULE_FEATURE_32 (ilog2(PPC_FEATURE_32))
+ */
+
+#define PPC_MODULE_FEATURE_VEC_CRYPTO (32 + ilog2(PPC_FEATURE2_VEC_CRYPTO))
+#define PPC_MODULE_FEATURE_P10 (32 + ilog2(PPC_FEATURE2_ARCH_3_1))
+
+#define cpu_feature(x) (x)
+
+static inline bool cpu_have_feature(unsigned int num)
+{
+ if (num < 32)
+ return !!(cur_cpu_spec->cpu_user_features & 1UL << num);
+ else
+ return !!(cur_cpu_spec->cpu_user_features2 & 1UL << (num - 32));
+}
+
+#endif /* __ASM_POWERPC_CPUFEATURE_H */
diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h
new file mode 100644
index 000000000000..054cd2fcfd55
--- /dev/null
+++ b/arch/powerpc/include/asm/cpuidle.h
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_CPUIDLE_H
+#define _ASM_POWERPC_CPUIDLE_H
+
+#ifdef CONFIG_PPC_POWERNV
+/* Thread state used in powernv idle state management */
+#define PNV_THREAD_RUNNING 0
+#define PNV_THREAD_NAP 1
+#define PNV_THREAD_SLEEP 2
+#define PNV_THREAD_WINKLE 3
+
+/*
+ * Core state used in powernv idle for POWER8.
+ *
+ * The lock bit synchronizes updates to the state, as well as parts of the
+ * sleep/wake code (see kernel/idle_book3s.S).
+ *
+ * Bottom 8 bits track the idle state of each thread. Bit is cleared before
+ * the thread executes an idle instruction (nap/sleep/winkle).
+ *
+ * Then there is winkle tracking. A core does not lose complete state
+ * until every thread is in winkle. So the winkle count field counts the
+ * number of threads in winkle (small window of false positives is okay
+ * around the sleep/wake, so long as there are no false negatives).
+ *
+ * When the winkle count reaches 8 (the COUNT_ALL_BIT becomes set), then
+ * the THREAD_WINKLE_BITS are set, which indicate which threads have not
+ * yet woken from the winkle state.
+ */
+#define NR_PNV_CORE_IDLE_LOCK_BIT 28
+#define PNV_CORE_IDLE_LOCK_BIT (1ULL << NR_PNV_CORE_IDLE_LOCK_BIT)
+
+#define PNV_CORE_IDLE_WINKLE_COUNT_SHIFT 16
+#define PNV_CORE_IDLE_WINKLE_COUNT 0x00010000
+#define PNV_CORE_IDLE_WINKLE_COUNT_BITS 0x000F0000
+#define PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT 8
+#define PNV_CORE_IDLE_THREAD_WINKLE_BITS 0x0000FF00
+
+#define PNV_CORE_IDLE_THREAD_BITS 0x000000FF
+
+/*
+ * ============================ NOTE =================================
+ * The older firmware populates only the RL field in the psscr_val and
+ * sets the psscr_mask to 0xf. On such a firmware, the kernel sets the
+ * remaining PSSCR fields to default values as follows:
+ *
+ * - ESL and EC bits are to 1. So wakeup from any stop state will be
+ * at vector 0x100.
+ *
+ * - MTL and PSLL are set to the maximum allowed value as per the ISA,
+ * i.e. 15.
+ *
+ * - The Transition Rate, TR is set to the Maximum value 3.
+ */
+#define PSSCR_HV_DEFAULT_VAL (PSSCR_ESL | PSSCR_EC | \
+ PSSCR_PSLL_MASK | PSSCR_TR_MASK | \
+ PSSCR_MTL_MASK)
+
+#define PSSCR_HV_DEFAULT_MASK (PSSCR_ESL | PSSCR_EC | \
+ PSSCR_PSLL_MASK | PSSCR_TR_MASK | \
+ PSSCR_MTL_MASK | PSSCR_RL_MASK)
+#define PSSCR_EC_SHIFT 20
+#define PSSCR_ESL_SHIFT 21
+#define GET_PSSCR_EC(x) (((x) & PSSCR_EC) >> PSSCR_EC_SHIFT)
+#define GET_PSSCR_ESL(x) (((x) & PSSCR_ESL) >> PSSCR_ESL_SHIFT)
+#define GET_PSSCR_RL(x) ((x) & PSSCR_RL_MASK)
+
+#define ERR_EC_ESL_MISMATCH -1
+#define ERR_DEEP_STATE_ESL_MISMATCH -2
+
+#ifndef __ASSEMBLER__
+
+#define PNV_IDLE_NAME_LEN 16
+struct pnv_idle_states_t {
+ char name[PNV_IDLE_NAME_LEN];
+ u32 latency_ns;
+ u32 residency_ns;
+ u64 psscr_val;
+ u64 psscr_mask;
+ u32 flags;
+ bool valid;
+};
+
+extern struct pnv_idle_states_t *pnv_idle_states;
+extern int nr_pnv_idle_states;
+
+unsigned long pnv_cpu_offline(unsigned int cpu);
+int __init validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags);
+static inline void report_invalid_psscr_val(u64 psscr_val, int err)
+{
+ switch (err) {
+ case ERR_EC_ESL_MISMATCH:
+ pr_warn("Invalid psscr 0x%016llx : ESL,EC bits unequal",
+ psscr_val);
+ break;
+ case ERR_DEEP_STATE_ESL_MISMATCH:
+ pr_warn("Invalid psscr 0x%016llx : ESL cleared for deep stop-state",
+ psscr_val);
+ }
+}
+#endif
+
+#endif
+
+#endif
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index daa5af91163c..ec16c12296da 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -1,12 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __ASM_POWERPC_CPUTABLE_H
#define __ASM_POWERPC_CPUTABLE_H
-#include <asm/asm-compat.h>
-#include <asm/feature-fixups.h>
+#include <linux/types.h>
#include <uapi/asm/cputable.h>
+#include <asm/asm-const.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/* This structure can grow, it's real size is used by head.S code
* via the mkdefs mechanism.
@@ -16,16 +17,6 @@ struct cpu_spec;
typedef void (*cpu_setup_t)(unsigned long offset, struct cpu_spec* spec);
typedef void (*cpu_restore_t)(void);
-enum powerpc_oprofile_type {
- PPC_OPROFILE_INVALID = 0,
- PPC_OPROFILE_RS64 = 1,
- PPC_OPROFILE_POWER4 = 2,
- PPC_OPROFILE_G4 = 3,
- PPC_OPROFILE_FSL_EMB = 4,
- PPC_OPROFILE_CELL = 5,
- PPC_OPROFILE_PA6T = 6,
-};
-
enum powerpc_pmc_type {
PPC_PMC_DEFAULT = 0,
PPC_PMC_IBM = 1,
@@ -40,8 +31,14 @@ extern int machine_check_4xx(struct pt_regs *regs);
extern int machine_check_440A(struct pt_regs *regs);
extern int machine_check_e500mc(struct pt_regs *regs);
extern int machine_check_e500(struct pt_regs *regs);
-extern int machine_check_e200(struct pt_regs *regs);
extern int machine_check_47x(struct pt_regs *regs);
+int machine_check_8xx(struct pt_regs *regs);
+int machine_check_83xx(struct pt_regs *regs);
+
+extern void cpu_down_flush_e500v2(void);
+extern void cpu_down_flush_e500mc(void);
+extern void cpu_down_flush_e5500(void);
+extern void cpu_down_flush_e6500(void);
/* NOTE WELL: Update identify_cpu() if fields are added or removed! */
struct cpu_spec {
@@ -59,6 +56,9 @@ struct cpu_spec {
unsigned int icache_bsize;
unsigned int dcache_bsize;
+ /* flush caches inside the current cpu */
+ void (*cpu_down_flush)(void);
+
/* number of performance monitor counters */
unsigned int num_pmcs;
enum powerpc_pmc_type pmc_type;
@@ -70,19 +70,6 @@ struct cpu_spec {
/* Used to restore cpu setup on secondary processors and at resume */
cpu_restore_t cpu_restore;
- /* Used by oprofile userspace to select the right counters */
- char *oprofile_cpu_type;
-
- /* Processor specific oprofile operations */
- enum powerpc_oprofile_type oprofile_type;
-
- /* Bit locations inside the mmcra change */
- unsigned long oprofile_mmcra_sihv;
- unsigned long oprofile_mmcra_sipr;
-
- /* Bits to clear during an oprofile exception */
- unsigned long oprofile_mmcra_clear;
-
/* Name of processor class, for the ELF AT_PLATFORM entry */
char *platform;
@@ -96,63 +83,69 @@ struct cpu_spec {
* called in real mode to handle SLB and TLB errors.
*/
long (*machine_check_early)(struct pt_regs *regs);
-
- /*
- * Processor specific routine to flush tlbs.
- */
- void (*flush_tlb)(unsigned long inval_selector);
-
};
extern struct cpu_spec *cur_cpu_spec;
extern unsigned int __start___ftr_fixup, __stop___ftr_fixup;
+extern void set_cur_cpu_spec(struct cpu_spec *s);
extern struct cpu_spec *identify_cpu(unsigned long offset, unsigned int pvr);
+extern void identify_cpu_name(unsigned int pvr);
extern void do_feature_fixups(unsigned long value, void *fixup_start,
void *fixup_end);
extern const char *powerpc_base_platform;
-#endif /* __ASSEMBLY__ */
+#ifdef CONFIG_JUMP_LABEL_FEATURE_CHECKS
+extern void cpu_feature_keys_init(void);
+#else
+static inline void cpu_feature_keys_init(void) { }
+#endif
+
+#endif /* __ASSEMBLER__ */
/* CPU kernel features */
-/* Retain the 32b definitions all use bottom half of word */
+/* Definitions for features that we have on both 32-bit and 64-bit chips */
#define CPU_FTR_COHERENT_ICACHE ASM_CONST(0x00000001)
-#define CPU_FTR_L2CR ASM_CONST(0x00000002)
-#define CPU_FTR_SPEC7450 ASM_CONST(0x00000004)
-#define CPU_FTR_ALTIVEC ASM_CONST(0x00000008)
-#define CPU_FTR_TAU ASM_CONST(0x00000010)
-#define CPU_FTR_CAN_DOZE ASM_CONST(0x00000020)
-#define CPU_FTR_USE_TB ASM_CONST(0x00000040)
-#define CPU_FTR_L2CSR ASM_CONST(0x00000080)
-#define CPU_FTR_601 ASM_CONST(0x00000100)
-#define CPU_FTR_DBELL ASM_CONST(0x00000200)
-#define CPU_FTR_CAN_NAP ASM_CONST(0x00000400)
-#define CPU_FTR_L3CR ASM_CONST(0x00000800)
-#define CPU_FTR_L3_DISABLE_NAP ASM_CONST(0x00001000)
-#define CPU_FTR_NAP_DISABLE_L2_PR ASM_CONST(0x00002000)
-#define CPU_FTR_DUAL_PLL_750FX ASM_CONST(0x00004000)
-#define CPU_FTR_NO_DPM ASM_CONST(0x00008000)
-#define CPU_FTR_476_DD2 ASM_CONST(0x00010000)
-#define CPU_FTR_NEED_COHERENT ASM_CONST(0x00020000)
-#define CPU_FTR_NO_BTIC ASM_CONST(0x00040000)
-#define CPU_FTR_DEBUG_LVL_EXC ASM_CONST(0x00080000)
-#define CPU_FTR_NODSISRALIGN ASM_CONST(0x00100000)
-#define CPU_FTR_PPC_LE ASM_CONST(0x00200000)
-#define CPU_FTR_REAL_LE ASM_CONST(0x00400000)
-#define CPU_FTR_FPU_UNAVAILABLE ASM_CONST(0x00800000)
-#define CPU_FTR_UNIFIED_ID_CACHE ASM_CONST(0x01000000)
-#define CPU_FTR_SPE ASM_CONST(0x02000000)
-#define CPU_FTR_NEED_PAIRED_STWCX ASM_CONST(0x04000000)
-#define CPU_FTR_LWSYNC ASM_CONST(0x08000000)
-#define CPU_FTR_NOEXECUTE ASM_CONST(0x10000000)
-#define CPU_FTR_INDEXED_DCR ASM_CONST(0x20000000)
-#define CPU_FTR_EMB_HV ASM_CONST(0x40000000)
+#define CPU_FTR_ALTIVEC ASM_CONST(0x00000002)
+#define CPU_FTR_DBELL ASM_CONST(0x00000004)
+#define CPU_FTR_CAN_NAP ASM_CONST(0x00000008)
+#define CPU_FTR_DEBUG_LVL_EXC ASM_CONST(0x00000010)
+// ASM_CONST(0x00000020) Free
+#define CPU_FTR_FPU_UNAVAILABLE ASM_CONST(0x00000040)
+#define CPU_FTR_LWSYNC ASM_CONST(0x00000080)
+#define CPU_FTR_NOEXECUTE ASM_CONST(0x00000100)
+#define CPU_FTR_EMB_HV ASM_CONST(0x00000200)
+
+/* Definitions for features that only exist on 32-bit chips */
+#ifdef CONFIG_PPC32
+#define CPU_FTR_L2CR ASM_CONST(0x00002000)
+#define CPU_FTR_SPEC7450 ASM_CONST(0x00004000)
+#define CPU_FTR_TAU ASM_CONST(0x00008000)
+#define CPU_FTR_CAN_DOZE ASM_CONST(0x00010000)
+#define CPU_FTR_L3CR ASM_CONST(0x00040000)
+#define CPU_FTR_L3_DISABLE_NAP ASM_CONST(0x00080000)
+#define CPU_FTR_NAP_DISABLE_L2_PR ASM_CONST(0x00100000)
+#define CPU_FTR_DUAL_PLL_750FX ASM_CONST(0x00200000)
+#define CPU_FTR_NO_DPM ASM_CONST(0x00400000)
+#define CPU_FTR_476_DD2 ASM_CONST(0x00800000)
+#define CPU_FTR_NEED_COHERENT ASM_CONST(0x01000000)
+#define CPU_FTR_NO_BTIC ASM_CONST(0x02000000)
+#define CPU_FTR_PPC_LE ASM_CONST(0x04000000)
+#define CPU_FTR_SPE ASM_CONST(0x10000000)
+#define CPU_FTR_NEED_PAIRED_STWCX ASM_CONST(0x20000000)
+#define CPU_FTR_INDEXED_DCR ASM_CONST(0x40000000)
+
+#else /* CONFIG_PPC32 */
+/* Define these to 0 for the sake of tests in common code */
+#define CPU_FTR_PPC_LE (0)
+#define CPU_FTR_SPE (0)
+#endif
/*
- * Add the 64-bit processor unique features in the top half of the word;
+ * Definitions for the 64-bit processor unique features;
* on 32-bit, make the names available but defined to be 0.
*/
#ifdef __powerpc64__
@@ -161,41 +154,50 @@ extern const char *powerpc_base_platform;
#define LONG_ASM_CONST(x) 0
#endif
-#define CPU_FTR_HVMODE LONG_ASM_CONST(0x0000000100000000)
-#define CPU_FTR_ARCH_201 LONG_ASM_CONST(0x0000000200000000)
-#define CPU_FTR_ARCH_206 LONG_ASM_CONST(0x0000000400000000)
-#define CPU_FTR_ARCH_207S LONG_ASM_CONST(0x0000000800000000)
-#define CPU_FTR_IABR LONG_ASM_CONST(0x0000001000000000)
-#define CPU_FTR_MMCRA LONG_ASM_CONST(0x0000002000000000)
-#define CPU_FTR_CTRL LONG_ASM_CONST(0x0000004000000000)
-#define CPU_FTR_SMT LONG_ASM_CONST(0x0000008000000000)
-#define CPU_FTR_PAUSE_ZERO LONG_ASM_CONST(0x0000010000000000)
-#define CPU_FTR_PURR LONG_ASM_CONST(0x0000020000000000)
-#define CPU_FTR_CELL_TB_BUG LONG_ASM_CONST(0x0000040000000000)
-#define CPU_FTR_SPURR LONG_ASM_CONST(0x0000080000000000)
-#define CPU_FTR_DSCR LONG_ASM_CONST(0x0000100000000000)
-#define CPU_FTR_VSX LONG_ASM_CONST(0x0000200000000000)
-#define CPU_FTR_SAO LONG_ASM_CONST(0x0000400000000000)
-#define CPU_FTR_CP_USE_DCBTZ LONG_ASM_CONST(0x0000800000000000)
-#define CPU_FTR_UNALIGNED_LD_STD LONG_ASM_CONST(0x0001000000000000)
-#define CPU_FTR_ASYM_SMT LONG_ASM_CONST(0x0002000000000000)
-#define CPU_FTR_STCX_CHECKS_ADDRESS LONG_ASM_CONST(0x0004000000000000)
-#define CPU_FTR_POPCNTB LONG_ASM_CONST(0x0008000000000000)
-#define CPU_FTR_POPCNTD LONG_ASM_CONST(0x0010000000000000)
-#define CPU_FTR_ICSWX LONG_ASM_CONST(0x0020000000000000)
-#define CPU_FTR_VMX_COPY LONG_ASM_CONST(0x0040000000000000)
-#define CPU_FTR_TM LONG_ASM_CONST(0x0080000000000000)
-#define CPU_FTR_CFAR LONG_ASM_CONST(0x0100000000000000)
-#define CPU_FTR_HAS_PPR LONG_ASM_CONST(0x0200000000000000)
-#define CPU_FTR_DAWR LONG_ASM_CONST(0x0400000000000000)
-#define CPU_FTR_DABRX LONG_ASM_CONST(0x0800000000000000)
-#define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x1000000000000000)
-
-#ifndef __ASSEMBLY__
-
-#define CPU_FTR_PPCAS_ARCH_V2 (CPU_FTR_NOEXECUTE | CPU_FTR_NODSISRALIGN)
-
-#define MMU_FTR_PPCAS_ARCH_V2 (MMU_FTR_TLBIEL | MMU_FTR_16M_PAGE)
+#define CPU_FTR_REAL_LE LONG_ASM_CONST(0x0000000000001000)
+#define CPU_FTR_HVMODE LONG_ASM_CONST(0x0000000000002000)
+#define CPU_FTR_ARCH_206 LONG_ASM_CONST(0x0000000000008000)
+#define CPU_FTR_ARCH_207S LONG_ASM_CONST(0x0000000000010000)
+#define CPU_FTR_ARCH_300 LONG_ASM_CONST(0x0000000000020000)
+#define CPU_FTR_MMCRA LONG_ASM_CONST(0x0000000000040000)
+#define CPU_FTR_CTRL LONG_ASM_CONST(0x0000000000080000)
+#define CPU_FTR_SMT LONG_ASM_CONST(0x0000000000100000)
+#define CPU_FTR_PAUSE_ZERO LONG_ASM_CONST(0x0000000000200000)
+#define CPU_FTR_PURR LONG_ASM_CONST(0x0000000000400000)
+#define CPU_FTR_CELL_TB_BUG LONG_ASM_CONST(0x0000000000800000)
+#define CPU_FTR_SPURR LONG_ASM_CONST(0x0000000001000000)
+#define CPU_FTR_DSCR LONG_ASM_CONST(0x0000000002000000)
+#define CPU_FTR_VSX LONG_ASM_CONST(0x0000000004000000)
+#define CPU_FTR_SAO LONG_ASM_CONST(0x0000000008000000)
+#define CPU_FTR_CP_USE_DCBTZ LONG_ASM_CONST(0x0000000010000000)
+#define CPU_FTR_UNALIGNED_LD_STD LONG_ASM_CONST(0x0000000020000000)
+#define CPU_FTR_ASYM_SMT LONG_ASM_CONST(0x0000000040000000)
+#define CPU_FTR_STCX_CHECKS_ADDRESS LONG_ASM_CONST(0x0000000080000000)
+#define CPU_FTR_POPCNTB LONG_ASM_CONST(0x0000000100000000)
+#define CPU_FTR_POPCNTD LONG_ASM_CONST(0x0000000200000000)
+/* LONG_ASM_CONST(0x0000000400000000) Free */
+#define CPU_FTR_VMX_COPY LONG_ASM_CONST(0x0000000800000000)
+#define CPU_FTR_TM LONG_ASM_CONST(0x0000001000000000)
+#define CPU_FTR_CFAR LONG_ASM_CONST(0x0000002000000000)
+#define CPU_FTR_HAS_PPR LONG_ASM_CONST(0x0000004000000000)
+#define CPU_FTR_DAWR LONG_ASM_CONST(0x0000008000000000)
+#define CPU_FTR_DABRX LONG_ASM_CONST(0x0000010000000000)
+#define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x0000020000000000)
+#define CPU_FTR_POWER9_DD2_1 LONG_ASM_CONST(0x0000080000000000)
+#define CPU_FTR_P9_TM_HV_ASSIST LONG_ASM_CONST(0x0000100000000000)
+#define CPU_FTR_P9_TM_XER_SO_BUG LONG_ASM_CONST(0x0000200000000000)
+#define CPU_FTR_P9_TLBIE_STQ_BUG LONG_ASM_CONST(0x0000400000000000)
+#define CPU_FTR_P9_TIDR LONG_ASM_CONST(0x0000800000000000)
+#define CPU_FTR_P9_TLBIE_ERAT_BUG LONG_ASM_CONST(0x0001000000000000)
+#define CPU_FTR_P9_RADIX_PREFETCH_BUG LONG_ASM_CONST(0x0002000000000000)
+#define CPU_FTR_ARCH_31 LONG_ASM_CONST(0x0004000000000000)
+#define CPU_FTR_DAWR1 LONG_ASM_CONST(0x0008000000000000)
+#define CPU_FTR_DEXCR_NPHIE LONG_ASM_CONST(0x0010000000000000)
+#define CPU_FTR_P11_PVR LONG_ASM_CONST(0x0020000000000000)
+
+#ifndef __ASSEMBLER__
+
+#define CPU_FTR_PPCAS_ARCH_V2 (CPU_FTR_NOEXECUTE)
/* We only set the altivec features if the kernel was compiled with altivec
* support
@@ -236,11 +238,13 @@ extern const char *powerpc_base_platform;
/* We only set the TM feature if the kernel was compiled with TM supprt */
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-#define CPU_FTR_TM_COMP CPU_FTR_TM
-#define PPC_FEATURE2_HTM_COMP PPC_FEATURE2_HTM
+#define CPU_FTR_TM_COMP CPU_FTR_TM
+#define PPC_FEATURE2_HTM_COMP PPC_FEATURE2_HTM
+#define PPC_FEATURE2_HTM_NOSC_COMP PPC_FEATURE2_HTM_NOSC
#else
-#define CPU_FTR_TM_COMP 0
-#define PPC_FEATURE2_HTM_COMP 0
+#define CPU_FTR_TM_COMP 0
+#define PPC_FEATURE2_HTM_COMP 0
+#define PPC_FEATURE2_HTM_NOSC_COMP 0
#endif
/* We need to mark all pages as being coherent if we're SMP or we have a
@@ -249,7 +253,7 @@ extern const char *powerpc_base_platform;
* This is also required by 52xx family.
*/
#if defined(CONFIG_SMP) || defined(CONFIG_MPC10X_BRIDGE) \
- || defined(CONFIG_PPC_83xx) || defined(CONFIG_8260) \
+ || defined(CONFIG_PPC_83xx) || defined(CONFIG_PPC_82xx) \
|| defined(CONFIG_PPC_MPC52xx)
#define CPU_FTR_COMMON CPU_FTR_NEED_COHERENT
#else
@@ -267,22 +271,18 @@ extern const char *powerpc_base_platform;
#define CPU_FTR_MAYBE_CAN_NAP 0
#endif
-#define CPU_FTRS_PPC601 (CPU_FTR_COMMON | CPU_FTR_601 | \
- CPU_FTR_COHERENT_ICACHE | CPU_FTR_UNIFIED_ID_CACHE)
-#define CPU_FTRS_603 (CPU_FTR_COMMON | \
- CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \
- CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE)
-#define CPU_FTRS_604 (CPU_FTR_COMMON | \
- CPU_FTR_USE_TB | CPU_FTR_PPC_LE)
+#define CPU_FTRS_603 (CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE | \
+ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE | CPU_FTR_NOEXECUTE)
+#define CPU_FTRS_604 (CPU_FTR_COMMON | CPU_FTR_PPC_LE)
#define CPU_FTRS_740_NOTAU (CPU_FTR_COMMON | \
- CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
+ CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_L2CR | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE)
#define CPU_FTRS_740 (CPU_FTR_COMMON | \
- CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
+ CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_L2CR | \
CPU_FTR_TAU | CPU_FTR_MAYBE_CAN_NAP | \
CPU_FTR_PPC_LE)
#define CPU_FTRS_750 (CPU_FTR_COMMON | \
- CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
+ CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_L2CR | \
CPU_FTR_TAU | CPU_FTR_MAYBE_CAN_NAP | \
CPU_FTR_PPC_LE)
#define CPU_FTRS_750CL (CPU_FTRS_750)
@@ -291,125 +291,108 @@ extern const char *powerpc_base_platform;
#define CPU_FTRS_750FX (CPU_FTRS_750 | CPU_FTR_DUAL_PLL_750FX)
#define CPU_FTRS_750GX (CPU_FTRS_750FX)
#define CPU_FTRS_7400_NOTAU (CPU_FTR_COMMON | \
- CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
+ CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_L2CR | \
CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE)
#define CPU_FTRS_7400 (CPU_FTR_COMMON | \
- CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
+ CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_L2CR | \
CPU_FTR_TAU | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE)
#define CPU_FTRS_7450_20 (CPU_FTR_COMMON | \
- CPU_FTR_USE_TB | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
+ CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_L3CR | CPU_FTR_SPEC7450 | \
CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
#define CPU_FTRS_7450_21 (CPU_FTR_COMMON | \
- CPU_FTR_USE_TB | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_L3CR | CPU_FTR_SPEC7450 | \
CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_L3_DISABLE_NAP | \
CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
#define CPU_FTRS_7450_23 (CPU_FTR_COMMON | \
- CPU_FTR_USE_TB | CPU_FTR_NEED_PAIRED_STWCX | \
+ CPU_FTR_NEED_PAIRED_STWCX | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_L3CR | CPU_FTR_SPEC7450 | \
CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE)
#define CPU_FTRS_7455_1 (CPU_FTR_COMMON | \
- CPU_FTR_USE_TB | CPU_FTR_NEED_PAIRED_STWCX | \
+ CPU_FTR_NEED_PAIRED_STWCX | \
CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | CPU_FTR_L3CR | \
CPU_FTR_SPEC7450 | CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE)
#define CPU_FTRS_7455_20 (CPU_FTR_COMMON | \
- CPU_FTR_USE_TB | CPU_FTR_NEED_PAIRED_STWCX | \
+ CPU_FTR_NEED_PAIRED_STWCX | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_L3CR | CPU_FTR_SPEC7450 | \
CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_L3_DISABLE_NAP | \
CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE)
#define CPU_FTRS_7455 (CPU_FTR_COMMON | \
- CPU_FTR_USE_TB | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_L3CR | CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \
CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
#define CPU_FTRS_7447_10 (CPU_FTR_COMMON | \
- CPU_FTR_USE_TB | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_L3CR | CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \
CPU_FTR_NEED_COHERENT | CPU_FTR_NO_BTIC | CPU_FTR_PPC_LE | \
CPU_FTR_NEED_PAIRED_STWCX)
#define CPU_FTRS_7447 (CPU_FTR_COMMON | \
- CPU_FTR_USE_TB | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_L3CR | CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \
CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
#define CPU_FTRS_7447A (CPU_FTR_COMMON | \
- CPU_FTR_USE_TB | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \
CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
#define CPU_FTRS_7448 (CPU_FTR_COMMON | \
- CPU_FTR_USE_TB | \
CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_SPEC7450 | CPU_FTR_NAP_DISABLE_L2_PR | \
CPU_FTR_PPC_LE | CPU_FTR_NEED_PAIRED_STWCX)
-#define CPU_FTRS_82XX (CPU_FTR_COMMON | \
- CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB)
+#define CPU_FTRS_82XX (CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_NOEXECUTE)
#define CPU_FTRS_G2_LE (CPU_FTR_COMMON | CPU_FTR_MAYBE_CAN_DOZE | \
- CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP)
+ CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_NOEXECUTE)
#define CPU_FTRS_E300 (CPU_FTR_MAYBE_CAN_DOZE | \
- CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | \
- CPU_FTR_COMMON)
+ CPU_FTR_MAYBE_CAN_NAP | \
+ CPU_FTR_COMMON | CPU_FTR_NOEXECUTE)
#define CPU_FTRS_E300C2 (CPU_FTR_MAYBE_CAN_DOZE | \
- CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | \
- CPU_FTR_COMMON | CPU_FTR_FPU_UNAVAILABLE)
-#define CPU_FTRS_CLASSIC32 (CPU_FTR_COMMON | CPU_FTR_USE_TB)
-#define CPU_FTRS_8XX (CPU_FTR_USE_TB)
-#define CPU_FTRS_40X (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
-#define CPU_FTRS_44X (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
-#define CPU_FTRS_440x6 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE | \
+ CPU_FTR_MAYBE_CAN_NAP | \
+ CPU_FTR_COMMON | CPU_FTR_FPU_UNAVAILABLE | CPU_FTR_NOEXECUTE)
+#define CPU_FTRS_CLASSIC32 (CPU_FTR_COMMON)
+#define CPU_FTRS_8XX (CPU_FTR_NOEXECUTE)
+#define CPU_FTRS_44X (CPU_FTR_NOEXECUTE)
+#define CPU_FTRS_440x6 (CPU_FTR_NOEXECUTE | \
CPU_FTR_INDEXED_DCR)
#define CPU_FTRS_47X (CPU_FTRS_440x6)
-#define CPU_FTRS_E200 (CPU_FTR_USE_TB | CPU_FTR_SPE_COMP | \
- CPU_FTR_NODSISRALIGN | CPU_FTR_COHERENT_ICACHE | \
- CPU_FTR_UNIFIED_ID_CACHE | CPU_FTR_NOEXECUTE | \
- CPU_FTR_DEBUG_LVL_EXC)
-#define CPU_FTRS_E500 (CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \
- CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_NODSISRALIGN | \
+#define CPU_FTRS_E500 (CPU_FTR_MAYBE_CAN_DOZE | \
+ CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | \
CPU_FTR_NOEXECUTE)
-#define CPU_FTRS_E500_2 (CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \
+#define CPU_FTRS_E500_2 (CPU_FTR_MAYBE_CAN_DOZE | \
CPU_FTR_SPE_COMP | CPU_FTR_MAYBE_CAN_NAP | \
- CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE)
-#define CPU_FTRS_E500MC (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \
- CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
+ CPU_FTR_NOEXECUTE)
+#define CPU_FTRS_E500MC ( \
+ CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
CPU_FTR_DBELL | CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV)
/*
* e5500/e6500 erratum A-006958 is a timebase bug that can use the
* same workaround as CPU_FTR_CELL_TB_BUG.
*/
-#define CPU_FTRS_E5500 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \
- CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
+#define CPU_FTRS_E5500 ( \
+ CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV | CPU_FTR_CELL_TB_BUG)
-#define CPU_FTRS_E6500 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \
- CPU_FTR_L2CSR | CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
+#define CPU_FTRS_E6500 ( \
+ CPU_FTR_LWSYNC | CPU_FTR_NOEXECUTE | \
CPU_FTR_DBELL | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
CPU_FTR_DEBUG_LVL_EXC | CPU_FTR_EMB_HV | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_CELL_TB_BUG | CPU_FTR_SMT)
-#define CPU_FTRS_GENERIC_32 (CPU_FTR_COMMON | CPU_FTR_NODSISRALIGN)
/* 64-bit CPUs */
-#define CPU_FTRS_POWER4 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
+#define CPU_FTRS_PPC970 (CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
- CPU_FTR_MMCRA | CPU_FTR_CP_USE_DCBTZ | \
- CPU_FTR_STCX_CHECKS_ADDRESS)
-#define CPU_FTRS_PPC970 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
- CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_201 | \
CPU_FTR_ALTIVEC_COMP | CPU_FTR_CAN_NAP | CPU_FTR_MMCRA | \
CPU_FTR_CP_USE_DCBTZ | CPU_FTR_STCX_CHECKS_ADDRESS | \
CPU_FTR_HVMODE | CPU_FTR_DABRX)
-#define CPU_FTRS_POWER5 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
+#define CPU_FTRS_POWER5 (CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
CPU_FTR_MMCRA | CPU_FTR_SMT | \
CPU_FTR_COHERENT_ICACHE | CPU_FTR_PURR | \
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_DABRX)
-#define CPU_FTRS_POWER6 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
+#define CPU_FTRS_POWER6 (CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
CPU_FTR_MMCRA | CPU_FTR_SMT | \
CPU_FTR_COHERENT_ICACHE | \
@@ -417,83 +400,119 @@ extern const char *powerpc_base_platform;
CPU_FTR_DSCR | CPU_FTR_UNALIGNED_LD_STD | \
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_CFAR | \
CPU_FTR_DABRX)
-#define CPU_FTRS_POWER7 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
+#define CPU_FTRS_POWER7 (CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\
CPU_FTR_MMCRA | CPU_FTR_SMT | \
CPU_FTR_COHERENT_ICACHE | \
CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
CPU_FTR_DSCR | CPU_FTR_SAO | CPU_FTR_ASYM_SMT | \
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
- CPU_FTR_ICSWX | CPU_FTR_CFAR | CPU_FTR_HVMODE | \
- CPU_FTR_VMX_COPY | CPU_FTR_HAS_PPR | CPU_FTR_DABRX)
-#define CPU_FTRS_POWER8 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
+ CPU_FTR_CFAR | CPU_FTR_HVMODE | \
+ CPU_FTR_VMX_COPY | CPU_FTR_HAS_PPR | CPU_FTR_DABRX )
+#define CPU_FTRS_POWER8 (CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\
CPU_FTR_MMCRA | CPU_FTR_SMT | \
CPU_FTR_COHERENT_ICACHE | \
CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
CPU_FTR_DSCR | CPU_FTR_SAO | \
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
- CPU_FTR_ICSWX | CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
+ CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \
- CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP)
+ CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP )
#define CPU_FTRS_POWER8E (CPU_FTRS_POWER8 | CPU_FTR_PMAO_BUG)
-#define CPU_FTRS_POWER8_DD1 (CPU_FTRS_POWER8 & ~CPU_FTR_DBELL)
-#define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
+#define CPU_FTRS_POWER9 (CPU_FTR_LWSYNC | \
+ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\
+ CPU_FTR_MMCRA | CPU_FTR_SMT | \
+ CPU_FTR_COHERENT_ICACHE | \
+ CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
+ CPU_FTR_DSCR | CPU_FTR_SAO | \
+ CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
+ CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
+ CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \
+ CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_P9_TLBIE_STQ_BUG | \
+ CPU_FTR_P9_TLBIE_ERAT_BUG | CPU_FTR_P9_TIDR)
+#define CPU_FTRS_POWER9_DD2_0 (CPU_FTRS_POWER9 | CPU_FTR_P9_RADIX_PREFETCH_BUG)
+#define CPU_FTRS_POWER9_DD2_1 (CPU_FTRS_POWER9 | \
+ CPU_FTR_P9_RADIX_PREFETCH_BUG | \
+ CPU_FTR_POWER9_DD2_1)
+#define CPU_FTRS_POWER9_DD2_2 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1 | \
+ CPU_FTR_P9_TM_HV_ASSIST | \
+ CPU_FTR_P9_TM_XER_SO_BUG)
+#define CPU_FTRS_POWER9_DD2_3 (CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD2_1 | \
+ CPU_FTR_P9_TM_HV_ASSIST | \
+ CPU_FTR_P9_TM_XER_SO_BUG | \
+ CPU_FTR_DAWR)
+#define CPU_FTRS_POWER10 (CPU_FTR_LWSYNC | \
+ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\
+ CPU_FTR_MMCRA | CPU_FTR_SMT | \
+ CPU_FTR_COHERENT_ICACHE | \
+ CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
+ CPU_FTR_DSCR | CPU_FTR_SAO | \
+ CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
+ CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
+ CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \
+ CPU_FTR_ARCH_300 | CPU_FTR_ARCH_31 | \
+ CPU_FTR_DAWR | CPU_FTR_DAWR1 | \
+ CPU_FTR_DEXCR_NPHIE)
+
+#define CPU_FTRS_POWER11 (CPU_FTRS_POWER10 | CPU_FTR_P11_PVR)
+
+#define CPU_FTRS_CELL (CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
CPU_FTR_PAUSE_ZERO | CPU_FTR_CELL_TB_BUG | CPU_FTR_CP_USE_DCBTZ | \
CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_DABRX)
-#define CPU_FTRS_PA6T (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
+#define CPU_FTRS_PA6T (CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_ALTIVEC_COMP | \
CPU_FTR_PURR | CPU_FTR_REAL_LE | CPU_FTR_DABRX)
-#define CPU_FTRS_COMPATIBLE (CPU_FTR_USE_TB | CPU_FTR_PPCAS_ARCH_V2)
+#define CPU_FTRS_COMPATIBLE (CPU_FTR_PPCAS_ARCH_V2)
-#define CPU_FTRS_A2 (CPU_FTR_USE_TB | CPU_FTR_SMT | CPU_FTR_DBELL | \
- CPU_FTR_NOEXECUTE | CPU_FTR_NODSISRALIGN | \
- CPU_FTR_ICSWX | CPU_FTR_DABRX )
-
-#ifdef __powerpc64__
-#ifdef CONFIG_PPC_BOOK3E
-#define CPU_FTRS_POSSIBLE (CPU_FTRS_E6500 | CPU_FTRS_E5500 | CPU_FTRS_A2)
+#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC_BOOK3E_64
+#define CPU_FTRS_POSSIBLE (CPU_FTRS_E6500 | CPU_FTRS_E5500)
#else
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
#define CPU_FTRS_POSSIBLE \
- (CPU_FTRS_POWER4 | CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \
+ (CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | CPU_FTRS_POWER8 | \
+ CPU_FTR_ALTIVEC_COMP | CPU_FTR_VSX_COMP | CPU_FTRS_POWER9 | \
+ CPU_FTRS_POWER9_DD2_1 | CPU_FTRS_POWER9_DD2_2 | \
+ CPU_FTRS_POWER9_DD2_3 | CPU_FTRS_POWER10 | CPU_FTRS_POWER11)
+#else
+#define CPU_FTRS_POSSIBLE \
+ (CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \
CPU_FTRS_POWER6 | CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | \
- CPU_FTRS_POWER8 | CPU_FTRS_POWER8_DD1 | CPU_FTRS_CELL | \
- CPU_FTRS_PA6T | CPU_FTR_VSX)
+ CPU_FTRS_POWER8 | CPU_FTRS_CELL | CPU_FTRS_PA6T | \
+ CPU_FTR_VSX_COMP | CPU_FTR_ALTIVEC_COMP | CPU_FTRS_POWER9 | \
+ CPU_FTRS_POWER9_DD2_1 | CPU_FTRS_POWER9_DD2_2 | \
+ CPU_FTRS_POWER9_DD2_3 | CPU_FTRS_POWER10 | CPU_FTRS_POWER11)
+#endif /* CONFIG_CPU_LITTLE_ENDIAN */
#endif
#else
enum {
CPU_FTRS_POSSIBLE =
-#ifdef CONFIG_PPC_BOOK3S_32
- CPU_FTRS_PPC601 | CPU_FTRS_603 | CPU_FTRS_604 | CPU_FTRS_740_NOTAU |
+#ifdef CONFIG_PPC_BOOK3S_604
+ CPU_FTRS_604 | CPU_FTRS_740_NOTAU |
CPU_FTRS_740 | CPU_FTRS_750 | CPU_FTRS_750FX1 |
CPU_FTRS_750FX2 | CPU_FTRS_750FX | CPU_FTRS_750GX |
CPU_FTRS_7400_NOTAU | CPU_FTRS_7400 | CPU_FTRS_7450_20 |
CPU_FTRS_7450_21 | CPU_FTRS_7450_23 | CPU_FTRS_7455_1 |
CPU_FTRS_7455_20 | CPU_FTRS_7455 | CPU_FTRS_7447_10 |
- CPU_FTRS_7447 | CPU_FTRS_7447A | CPU_FTRS_82XX |
- CPU_FTRS_G2_LE | CPU_FTRS_E300 | CPU_FTRS_E300C2 |
+ CPU_FTRS_7447 | CPU_FTRS_7447A |
CPU_FTRS_CLASSIC32 |
-#else
- CPU_FTRS_GENERIC_32 |
#endif
-#ifdef CONFIG_8xx
- CPU_FTRS_8XX |
-#endif
-#ifdef CONFIG_40x
- CPU_FTRS_40X |
+#ifdef CONFIG_PPC_BOOK3S_603
+ CPU_FTRS_603 | CPU_FTRS_82XX |
+ CPU_FTRS_G2_LE | CPU_FTRS_E300 | CPU_FTRS_E300C2 |
#endif
-#ifdef CONFIG_44x
- CPU_FTRS_44X | CPU_FTRS_440x6 |
+#ifdef CONFIG_PPC_8xx
+ CPU_FTRS_8XX |
#endif
#ifdef CONFIG_PPC_47x
CPU_FTRS_47X | CPU_FTR_476_DD2 |
+#elif defined(CONFIG_44x)
+ CPU_FTRS_44X | CPU_FTRS_440x6 |
#endif
-#ifdef CONFIG_E200
- CPU_FTRS_E200 |
-#endif
-#ifdef CONFIG_E500
+#ifdef CONFIG_PPC_E500
CPU_FTRS_E500 | CPU_FTRS_E500_2 |
#endif
#ifdef CONFIG_PPC_E500MC
@@ -503,45 +522,69 @@ enum {
};
#endif /* __powerpc64__ */
-#ifdef __powerpc64__
-#ifdef CONFIG_PPC_BOOK3E
-#define CPU_FTRS_ALWAYS (CPU_FTRS_E6500 & CPU_FTRS_E5500 & CPU_FTRS_A2)
+#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC_BOOK3E_64
+#define CPU_FTRS_ALWAYS (CPU_FTRS_E6500 & CPU_FTRS_E5500)
+#else
+
+#ifdef CONFIG_PPC_DT_CPU_FTRS
+#define CPU_FTRS_DT_CPU_BASE \
+ (CPU_FTR_LWSYNC | \
+ CPU_FTR_FPU_UNAVAILABLE | \
+ CPU_FTR_NOEXECUTE | \
+ CPU_FTR_COHERENT_ICACHE | \
+ CPU_FTR_STCX_CHECKS_ADDRESS | \
+ CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
+ CPU_FTR_DAWR | \
+ CPU_FTR_ARCH_206 | \
+ CPU_FTR_ARCH_207S)
+#else
+#define CPU_FTRS_DT_CPU_BASE (~0ul)
+#endif
+
+/* pseries may disable DBELL with ibm,pi-features */
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+#define CPU_FTRS_ALWAYS \
+ (CPU_FTRS_POSSIBLE & ~CPU_FTR_HVMODE & ~CPU_FTR_DBELL & \
+ CPU_FTRS_POWER7 & CPU_FTRS_POWER8E & CPU_FTRS_POWER8 & \
+ CPU_FTRS_POWER9 & CPU_FTRS_POWER9_DD2_1 & CPU_FTRS_POWER9_DD2_2 & \
+ CPU_FTRS_POWER10 & CPU_FTRS_POWER11 & CPU_FTRS_DT_CPU_BASE)
#else
#define CPU_FTRS_ALWAYS \
- (CPU_FTRS_POWER4 & CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & \
+ (CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & \
CPU_FTRS_POWER6 & CPU_FTRS_POWER7 & CPU_FTRS_CELL & \
CPU_FTRS_PA6T & CPU_FTRS_POWER8 & CPU_FTRS_POWER8E & \
- CPU_FTRS_POWER8_DD1 & CPU_FTRS_POSSIBLE)
+ ~CPU_FTR_HVMODE & ~CPU_FTR_DBELL & CPU_FTRS_POSSIBLE & \
+ CPU_FTRS_POWER9 & CPU_FTRS_POWER9_DD2_1 & CPU_FTRS_POWER9_DD2_2 & \
+ CPU_FTRS_POWER10 & CPU_FTRS_POWER11 & CPU_FTRS_DT_CPU_BASE)
+#endif /* CONFIG_CPU_LITTLE_ENDIAN */
#endif
#else
enum {
CPU_FTRS_ALWAYS =
-#ifdef CONFIG_PPC_BOOK3S_32
- CPU_FTRS_PPC601 & CPU_FTRS_603 & CPU_FTRS_604 & CPU_FTRS_740_NOTAU &
+#ifdef CONFIG_PPC_BOOK3S_604
+ CPU_FTRS_604 & CPU_FTRS_740_NOTAU &
CPU_FTRS_740 & CPU_FTRS_750 & CPU_FTRS_750FX1 &
CPU_FTRS_750FX2 & CPU_FTRS_750FX & CPU_FTRS_750GX &
CPU_FTRS_7400_NOTAU & CPU_FTRS_7400 & CPU_FTRS_7450_20 &
CPU_FTRS_7450_21 & CPU_FTRS_7450_23 & CPU_FTRS_7455_1 &
CPU_FTRS_7455_20 & CPU_FTRS_7455 & CPU_FTRS_7447_10 &
- CPU_FTRS_7447 & CPU_FTRS_7447A & CPU_FTRS_82XX &
- CPU_FTRS_G2_LE & CPU_FTRS_E300 & CPU_FTRS_E300C2 &
+ CPU_FTRS_7447 & CPU_FTRS_7447A &
CPU_FTRS_CLASSIC32 &
-#else
- CPU_FTRS_GENERIC_32 &
#endif
-#ifdef CONFIG_8xx
- CPU_FTRS_8XX &
+#ifdef CONFIG_PPC_BOOK3S_603
+ CPU_FTRS_603 & CPU_FTRS_82XX &
+ CPU_FTRS_G2_LE & CPU_FTRS_E300 & CPU_FTRS_E300C2 &
#endif
-#ifdef CONFIG_40x
- CPU_FTRS_40X &
+#ifdef CONFIG_PPC_8xx
+ CPU_FTRS_8XX &
#endif
-#ifdef CONFIG_44x
+#ifdef CONFIG_PPC_47x
+ CPU_FTRS_47X &
+#elif defined(CONFIG_44x)
CPU_FTRS_44X & CPU_FTRS_440x6 &
#endif
-#ifdef CONFIG_E200
- CPU_FTRS_E200 &
-#endif
-#ifdef CONFIG_E500
+#ifdef CONFIG_PPC_E500
CPU_FTRS_E500 & CPU_FTRS_E500_2 &
#endif
#ifdef CONFIG_PPC_E500MC
@@ -552,16 +595,13 @@ enum {
};
#endif /* __powerpc64__ */
-static inline int cpu_has_feature(unsigned long feature)
-{
- return (CPU_FTRS_ALWAYS & feature) ||
- (CPU_FTRS_POSSIBLE
- & cur_cpu_spec->cpu_features
- & feature);
-}
-
-#define HBP_NUM 1
+/*
+ * Maximum number of hw breakpoint supported on powerpc. Number of
+ * breakpoints supported by actual hw might be less than this, which
+ * is decided at run time in nr_wp_slots().
+ */
+#define HBP_NUM_MAX 2
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* __ASM_POWERPC_CPUTABLE_H */
diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h
index 2bf8e9307be9..d06f2b20b810 100644
--- a/arch/powerpc/include/asm/cputhreads.h
+++ b/arch/powerpc/include/asm/cputhreads.h
@@ -1,7 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_CPUTHREADS_H
#define _ASM_POWERPC_CPUTHREADS_H
+#ifndef __ASSEMBLER__
#include <linux/cpumask.h>
+#include <asm/cpu_has_feature.h>
/*
* Mapping of threads to cores
@@ -25,42 +28,13 @@ extern cpumask_t threads_core_mask;
#define threads_per_core 1
#define threads_per_subcore 1
#define threads_shift 0
-#define threads_core_mask (CPU_MASK_CPU0)
+#define has_big_cores 0
+#define threads_core_mask (*get_cpu_mask(0))
#endif
-/* cpu_thread_mask_to_cores - Return a cpumask of one per cores
- * hit by the argument
- *
- * @threads: a cpumask of threads
- *
- * This function returns a cpumask which will have one "cpu" (or thread)
- * bit set for each core that has at least one thread set in the argument.
- *
- * This can typically be used for things like IPI for tlb invalidations
- * since those need to be done only once per core/TLB
- */
-static inline cpumask_t cpu_thread_mask_to_cores(const struct cpumask *threads)
-{
- cpumask_t tmp, res;
- int i;
-
- cpumask_clear(&res);
- for (i = 0; i < NR_CPUS; i += threads_per_core) {
- cpumask_shift_left(&tmp, &threads_core_mask, i);
- if (cpumask_intersects(threads, &tmp))
- cpumask_set_cpu(i, &res);
- }
- return res;
-}
-
static inline int cpu_nr_cores(void)
{
- return NR_CPUS >> threads_shift;
-}
-
-static inline cpumask_t cpu_online_cores_map(void)
-{
- return cpu_thread_mask_to_cores(cpu_online_mask);
+ return nr_cpu_ids >> threads_shift;
}
#ifdef CONFIG_SMP
@@ -91,7 +65,51 @@ static inline int cpu_last_thread_sibling(int cpu)
return cpu | (threads_per_core - 1);
}
+/*
+ * tlb_thread_siblings are siblings which share a TLB. This is not
+ * architected, is not something a hypervisor could emulate and a future
+ * CPU may change behaviour even in compat mode, so this should only be
+ * used on PowerNV, and only with care.
+ */
+static inline int cpu_first_tlb_thread_sibling(int cpu)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_300) && (threads_per_core == 8))
+ return cpu & ~0x6; /* Big Core */
+ else
+ return cpu_first_thread_sibling(cpu);
+}
+
+static inline int cpu_last_tlb_thread_sibling(int cpu)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_300) && (threads_per_core == 8))
+ return cpu | 0x6; /* Big Core */
+ else
+ return cpu_last_thread_sibling(cpu);
+}
+
+static inline int cpu_tlb_thread_sibling_step(void)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_300) && (threads_per_core == 8))
+ return 2; /* Big Core */
+ else
+ return 1;
+}
+
+static inline u32 get_tensr(void)
+{
+#ifdef CONFIG_BOOKE
+ if (cpu_has_feature(CPU_FTR_SMT))
+ return mfspr(SPRN_TENSR);
+#endif
+ return 1;
+}
+
+void book3e_start_thread(int thread, unsigned long addr);
+void book3e_stop_thread(int thread);
+
+#endif /* __ASSEMBLER__ */
+#define INVALID_THREAD_HWID 0x0fff
#endif /* _ASM_POWERPC_CPUTHREADS_H */
diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h
index 607559ab271f..aff858ca99c0 100644
--- a/arch/powerpc/include/asm/cputime.h
+++ b/arch/powerpc/include/asm/cputime.h
@@ -1,13 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Definitions for measuring cputime on powerpc machines.
*
* Copyright (C) 2006 Paul Mackerras, IBM Corp.
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* If we have CONFIG_VIRT_CPU_ACCOUNTING_NATIVE, we measure cpu time in
* the same units as the timebase. Otherwise we measure cpu time
* in jiffies using the generic definitions.
@@ -16,220 +12,76 @@
#ifndef __POWERPC_CPUTIME_H
#define __POWERPC_CPUTIME_H
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-#include <asm-generic/cputime.h>
-#ifdef __KERNEL__
-static inline void setup_cputime_one_jiffy(void) { }
-#endif
-#else
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
#include <linux/types.h>
#include <linux/time.h>
#include <asm/div64.h>
#include <asm/time.h>
#include <asm/param.h>
-
-typedef u64 __nocast cputime_t;
-typedef u64 __nocast cputime64_t;
+#include <asm/firmware.h>
#ifdef __KERNEL__
+#define cputime_to_nsecs(cputime) tb_to_ns(cputime)
/*
- * One jiffy in timebase units computed during initialization
+ * PPC64 uses PACA which is task independent for storing accounting data while
+ * PPC32 uses struct thread_info, therefore at task switch the accounting data
+ * has to be populated in the new task
*/
-extern cputime_t cputime_one_jiffy;
-
-/*
- * Convert cputime <-> jiffies
- */
-extern u64 __cputime_jiffies_factor;
-DECLARE_PER_CPU(unsigned long, cputime_last_delta);
-DECLARE_PER_CPU(unsigned long, cputime_scaled_last_delta);
-
-static inline unsigned long cputime_to_jiffies(const cputime_t ct)
-{
- return mulhdu((__force u64) ct, __cputime_jiffies_factor);
-}
-
-/* Estimate the scaled cputime by scaling the real cputime based on
- * the last scaled to real ratio */
-static inline cputime_t cputime_to_scaled(const cputime_t ct)
-{
- if (cpu_has_feature(CPU_FTR_SPURR) &&
- __get_cpu_var(cputime_last_delta))
- return (__force u64) ct *
- __get_cpu_var(cputime_scaled_last_delta) /
- __get_cpu_var(cputime_last_delta);
- return ct;
-}
+#ifdef CONFIG_PPC64
+#define get_accounting(tsk) (&get_paca()->accounting)
+#define raw_get_accounting(tsk) (&local_paca->accounting)
-static inline cputime_t jiffies_to_cputime(const unsigned long jif)
-{
- u64 ct;
- unsigned long sec;
-
- /* have to be a little careful about overflow */
- ct = jif % HZ;
- sec = jif / HZ;
- if (ct) {
- ct *= tb_ticks_per_sec;
- do_div(ct, HZ);
- }
- if (sec)
- ct += (cputime_t) sec * tb_ticks_per_sec;
- return (__force cputime_t) ct;
-}
-
-static inline void setup_cputime_one_jiffy(void)
-{
- cputime_one_jiffy = jiffies_to_cputime(1);
-}
-
-static inline cputime64_t jiffies64_to_cputime64(const u64 jif)
-{
- u64 ct;
- u64 sec;
-
- /* have to be a little careful about overflow */
- ct = jif % HZ;
- sec = jif / HZ;
- if (ct) {
- ct *= tb_ticks_per_sec;
- do_div(ct, HZ);
- }
- if (sec)
- ct += (u64) sec * tb_ticks_per_sec;
- return (__force cputime64_t) ct;
-}
-
-static inline u64 cputime64_to_jiffies64(const cputime_t ct)
-{
- return mulhdu((__force u64) ct, __cputime_jiffies_factor);
-}
-
-/*
- * Convert cputime <-> microseconds
- */
-extern u64 __cputime_usec_factor;
-
-static inline unsigned long cputime_to_usecs(const cputime_t ct)
-{
- return mulhdu((__force u64) ct, __cputime_usec_factor);
-}
-
-static inline cputime_t usecs_to_cputime(const unsigned long us)
-{
- u64 ct;
- unsigned long sec;
-
- /* have to be a little careful about overflow */
- ct = us % 1000000;
- sec = us / 1000000;
- if (ct) {
- ct *= tb_ticks_per_sec;
- do_div(ct, 1000000);
- }
- if (sec)
- ct += (cputime_t) sec * tb_ticks_per_sec;
- return (__force cputime_t) ct;
-}
-
-#define usecs_to_cputime64(us) usecs_to_cputime(us)
-
-/*
- * Convert cputime <-> seconds
- */
-extern u64 __cputime_sec_factor;
-
-static inline unsigned long cputime_to_secs(const cputime_t ct)
-{
- return mulhdu((__force u64) ct, __cputime_sec_factor);
-}
-
-static inline cputime_t secs_to_cputime(const unsigned long sec)
-{
- return (__force cputime_t)((u64) sec * tb_ticks_per_sec);
-}
+#else
+#define get_accounting(tsk) (&task_thread_info(tsk)->accounting)
+#define raw_get_accounting(tsk) get_accounting(tsk)
+#endif
/*
- * Convert cputime <-> timespec
+ * account_cpu_user_entry/exit runs "unreconciled", so can't trace,
+ * can't use get_paca()
*/
-static inline void cputime_to_timespec(const cputime_t ct, struct timespec *p)
+static notrace inline void account_cpu_user_entry(void)
{
- u64 x = (__force u64) ct;
- unsigned int frac;
+ unsigned long tb = mftb();
+ struct cpu_accounting_data *acct = raw_get_accounting(current);
- frac = do_div(x, tb_ticks_per_sec);
- p->tv_sec = x;
- x = (u64) frac * 1000000000;
- do_div(x, tb_ticks_per_sec);
- p->tv_nsec = x;
+ acct->utime += (tb - acct->starttime_user);
+ acct->starttime = tb;
}
-static inline cputime_t timespec_to_cputime(const struct timespec *p)
+static notrace inline void account_cpu_user_exit(void)
{
- u64 ct;
+ unsigned long tb = mftb();
+ struct cpu_accounting_data *acct = raw_get_accounting(current);
- ct = (u64) p->tv_nsec * tb_ticks_per_sec;
- do_div(ct, 1000000000);
- return (__force cputime_t)(ct + (u64) p->tv_sec * tb_ticks_per_sec);
+ acct->stime += (tb - acct->starttime);
+ acct->starttime_user = tb;
}
-/*
- * Convert cputime <-> timeval
- */
-static inline void cputime_to_timeval(const cputime_t ct, struct timeval *p)
+static notrace inline void account_stolen_time(void)
{
- u64 x = (__force u64) ct;
- unsigned int frac;
+#ifdef CONFIG_PPC_SPLPAR
+ if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+ struct lppaca *lp = local_paca->lppaca_ptr;
- frac = do_div(x, tb_ticks_per_sec);
- p->tv_sec = x;
- x = (u64) frac * 1000000;
- do_div(x, tb_ticks_per_sec);
- p->tv_usec = x;
+ if (unlikely(local_paca->dtl_ridx != be64_to_cpu(lp->dtl_idx)))
+ pseries_accumulate_stolen_time();
+ }
+#endif
}
-static inline cputime_t timeval_to_cputime(const struct timeval *p)
+#endif /* __KERNEL__ */
+#else /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
+static inline void account_cpu_user_entry(void)
{
- u64 ct;
-
- ct = (u64) p->tv_usec * tb_ticks_per_sec;
- do_div(ct, 1000000);
- return (__force cputime_t)(ct + (u64) p->tv_sec * tb_ticks_per_sec);
}
-
-/*
- * Convert cputime <-> clock_t (units of 1/USER_HZ seconds)
- */
-extern u64 __cputime_clockt_factor;
-
-static inline unsigned long cputime_to_clock_t(const cputime_t ct)
+static inline void account_cpu_user_exit(void)
{
- return mulhdu((__force u64) ct, __cputime_clockt_factor);
}
-
-static inline cputime_t clock_t_to_cputime(const unsigned long clk)
+static notrace inline void account_stolen_time(void)
{
- u64 ct;
- unsigned long sec;
-
- /* have to be a little careful about overflow */
- ct = clk % USER_HZ;
- sec = clk / USER_HZ;
- if (ct) {
- ct *= tb_ticks_per_sec;
- do_div(ct, USER_HZ);
- }
- if (sec)
- ct += (u64) sec * tb_ticks_per_sec;
- return (__force cputime_t) ct;
}
-
-#define cputime64_to_clock_t(ct) cputime_to_clock_t((cputime_t)(ct))
-
-static inline void arch_vtime_task_switch(struct task_struct *tsk) { }
-
-#endif /* __KERNEL__ */
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
#endif /* __POWERPC_CPUTIME_H */
diff --git a/arch/powerpc/include/asm/crash_reserve.h b/arch/powerpc/include/asm/crash_reserve.h
new file mode 100644
index 000000000000..6467ce29b1fa
--- /dev/null
+++ b/arch/powerpc/include/asm/crash_reserve.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_CRASH_RESERVE_H
+#define _ASM_POWERPC_CRASH_RESERVE_H
+
+/* crash kernel regions are Page size agliged */
+#define CRASH_ALIGN PAGE_SIZE
+
+#endif /* _ASM_POWERPC_CRASH_RESERVE_H */
diff --git a/arch/powerpc/include/asm/crashdump-ppc64.h b/arch/powerpc/include/asm/crashdump-ppc64.h
new file mode 100644
index 000000000000..68d9717cc5ee
--- /dev/null
+++ b/arch/powerpc/include/asm/crashdump-ppc64.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _ASM_POWERPC_CRASHDUMP_PPC64_H
+#define _ASM_POWERPC_CRASHDUMP_PPC64_H
+
+/*
+ * Backup region - first 64KB of System RAM
+ *
+ * If ever the below macros are to be changed, please be judicious.
+ * The implicit assumptions are:
+ * - start, end & size are less than UINT32_MAX.
+ * - start & size are at least 8 byte aligned.
+ *
+ * For implementation details: arch/powerpc/purgatory/trampoline_64.S
+ */
+#define BACKUP_SRC_START 0
+#define BACKUP_SRC_END 0xffff
+#define BACKUP_SRC_SIZE (BACKUP_SRC_END - BACKUP_SRC_START + 1)
+
+#endif /* __ASM_POWERPC_CRASHDUMP_PPC64_H */
diff --git a/arch/powerpc/include/asm/current.h b/arch/powerpc/include/asm/current.h
index e2c7f06931e7..bbfb94800415 100644
--- a/arch/powerpc/include/asm/current.h
+++ b/arch/powerpc/include/asm/current.h
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _ASM_POWERPC_CURRENT_H
#define _ASM_POWERPC_CURRENT_H
#ifdef __KERNEL__
/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
struct task_struct;
@@ -19,7 +16,8 @@ static inline struct task_struct *get_current(void)
{
struct task_struct *task;
- __asm__ __volatile__("ld %0,%1(13)"
+ /* get_current can be cached by the compiler, so no volatile */
+ asm ("ld %0,%1(13)"
: "=r" (task)
: "i" (offsetof(struct paca_struct, __current)));
diff --git a/arch/powerpc/include/asm/dbdma.h b/arch/powerpc/include/asm/dbdma.h
index e23f07e73cb3..4785c1716b3e 100644
--- a/arch/powerpc/include/asm/dbdma.h
+++ b/arch/powerpc/include/asm/dbdma.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Definitions for using the Apple Descriptor-Based DMA controller
* in Power Macintosh computers.
@@ -42,12 +43,12 @@ struct dbdma_regs {
* DBDMA command structure. These fields are all little-endian!
*/
struct dbdma_cmd {
- unsigned short req_count; /* requested byte transfer count */
- unsigned short command; /* command word (has bit-fields) */
- unsigned int phy_addr; /* physical data address */
- unsigned int cmd_dep; /* command-dependent field */
- unsigned short res_count; /* residual count after completion */
- unsigned short xfer_status; /* transfer status */
+ __le16 req_count; /* requested byte transfer count */
+ __le16 command; /* command word (has bit-fields) */
+ __le32 phy_addr; /* physical data address */
+ __le32 cmd_dep; /* command-dependent field */
+ __le16 res_count; /* residual count after completion */
+ __le16 xfer_status; /* transfer status */
};
/* DBDMA command values in command field */
diff --git a/arch/powerpc/include/asm/dbell.h b/arch/powerpc/include/asm/dbell.h
index 5fa6b20eba10..0b9ef726f92c 100644
--- a/arch/powerpc/include/asm/dbell.h
+++ b/arch/powerpc/include/asm/dbell.h
@@ -1,11 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright 2009 Freescale Semiconductor, Inc.
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* provides masks and opcode images for use by code generation, emulation
* and for instructions that older assemblers might not know about
*/
@@ -15,7 +11,10 @@
#include <linux/smp.h>
#include <linux/threads.h>
+#include <asm/cputhreads.h>
#include <asm/ppc-opcode.h>
+#include <asm/feature-fixups.h>
+#include <asm/kvm_ppc.h>
#define PPC_DBELL_MSG_BRDCAST (0x04000000)
#define PPC_DBELL_TYPE(x) (((x) & 0xf) << (63-36))
@@ -34,33 +33,57 @@ enum ppc_dbell {
#ifdef CONFIG_PPC_BOOK3S
#define PPC_DBELL_MSGTYPE PPC_DBELL_SERVER
-#define SPRN_DOORBELL_CPUTAG SPRN_TIR
-#define PPC_DBELL_TAG_MASK 0x7f
static inline void _ppc_msgsnd(u32 msg)
{
- if (cpu_has_feature(CPU_FTR_HVMODE))
- __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
- else
- __asm__ __volatile__ (PPC_MSGSNDP(%0) : : "r" (msg));
+ __asm__ __volatile__ (ASM_FTR_IFSET(PPC_MSGSND(%1), PPC_MSGSNDP(%1), %0)
+ : : "i" (CPU_FTR_HVMODE), "r" (msg));
+}
+
+/* sync after taking message interrupt */
+static inline void ppc_msgsync(void)
+{
+ /* sync is not required when taking messages from the same core */
+ __asm__ __volatile__ (ASM_FTR_IFSET(PPC_MSGSYNC " ; lwsync", "", %0)
+ : : "i" (CPU_FTR_HVMODE|CPU_FTR_ARCH_300));
+}
+
+static inline void _ppc_msgclr(u32 msg)
+{
+ __asm__ __volatile__ (ASM_FTR_IFSET(PPC_MSGCLR(%1), PPC_MSGCLRP(%1), %0)
+ : : "i" (CPU_FTR_HVMODE), "r" (msg));
+}
+
+static inline void ppc_msgclr(enum ppc_dbell type)
+{
+ u32 msg = PPC_DBELL_TYPE(type);
+
+ _ppc_msgclr(msg);
}
#else /* CONFIG_PPC_BOOK3S */
#define PPC_DBELL_MSGTYPE PPC_DBELL
-#define SPRN_DOORBELL_CPUTAG SPRN_PIR
-#define PPC_DBELL_TAG_MASK 0x3fff
static inline void _ppc_msgsnd(u32 msg)
{
__asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
}
+/* sync after taking message interrupt */
+static inline void ppc_msgsync(void)
+{
+}
+
#endif /* CONFIG_PPC_BOOK3S */
-extern void doorbell_cause_ipi(int cpu, unsigned long data);
extern void doorbell_exception(struct pt_regs *regs);
-extern void doorbell_setup_this_cpu(void);
+
+/* sync before sending message */
+static inline void ppc_msgsnd_sync(void)
+{
+ __asm__ __volatile__ ("sync" : : : "memory");
+}
static inline void ppc_msgsnd(enum ppc_dbell type, u32 flags, u32 tag)
{
@@ -70,4 +93,63 @@ static inline void ppc_msgsnd(enum ppc_dbell type, u32 flags, u32 tag)
_ppc_msgsnd(msg);
}
+#ifdef CONFIG_SMP
+
+/*
+ * Doorbells must only be used if CPU_FTR_DBELL is available.
+ * msgsnd is used in HV, and msgsndp is used in !HV.
+ *
+ * These should be used by platform code that is aware of restrictions.
+ * Other arch code should use ->cause_ipi.
+ *
+ * doorbell_global_ipi() sends a dbell to any target CPU.
+ * Must be used only by architectures that address msgsnd target
+ * by PIR/get_hard_smp_processor_id.
+ */
+static inline void doorbell_global_ipi(int cpu)
+{
+ u32 tag = get_hard_smp_processor_id(cpu);
+
+ kvmppc_set_host_ipi(cpu);
+ /* Order previous accesses vs. msgsnd, which is treated as a store */
+ ppc_msgsnd_sync();
+ ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag);
+}
+
+/*
+ * doorbell_core_ipi() sends a dbell to a target CPU in the same core.
+ * Must be used only by architectures that address msgsnd target
+ * by TIR/cpu_thread_in_core.
+ */
+static inline void doorbell_core_ipi(int cpu)
+{
+ u32 tag = cpu_thread_in_core(cpu);
+
+ kvmppc_set_host_ipi(cpu);
+ /* Order previous accesses vs. msgsnd, which is treated as a store */
+ ppc_msgsnd_sync();
+ ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag);
+}
+
+/*
+ * Attempt to cause a core doorbell if destination is on the same core.
+ * Returns 1 on success, 0 on failure.
+ */
+static inline int doorbell_try_core_ipi(int cpu)
+{
+ int this_cpu = get_cpu();
+ int ret = 0;
+
+ if (cpumask_test_cpu(cpu, cpu_sibling_mask(this_cpu))) {
+ doorbell_core_ipi(cpu);
+ ret = 1;
+ }
+
+ put_cpu();
+
+ return ret;
+}
+
+#endif /* CONFIG_SMP */
+
#endif /* _ASM_POWERPC_DBELL_H */
diff --git a/arch/powerpc/include/asm/dcr-generic.h b/arch/powerpc/include/asm/dcr-generic.h
deleted file mode 100644
index 35b71599ec46..000000000000
--- a/arch/powerpc/include/asm/dcr-generic.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * (c) Copyright 2006 Benjamin Herrenschmidt, IBM Corp.
- * <benh@kernel.crashing.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
- * the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef _ASM_POWERPC_DCR_GENERIC_H
-#define _ASM_POWERPC_DCR_GENERIC_H
-#ifdef __KERNEL__
-#ifndef __ASSEMBLY__
-
-enum host_type_t {DCR_HOST_MMIO, DCR_HOST_NATIVE, DCR_HOST_INVALID};
-
-typedef struct {
- enum host_type_t type;
- union {
- dcr_host_mmio_t mmio;
- dcr_host_native_t native;
- } host;
-} dcr_host_t;
-
-extern bool dcr_map_ok_generic(dcr_host_t host);
-
-extern dcr_host_t dcr_map_generic(struct device_node *dev, unsigned int dcr_n,
- unsigned int dcr_c);
-extern void dcr_unmap_generic(dcr_host_t host, unsigned int dcr_c);
-
-extern u32 dcr_read_generic(dcr_host_t host, unsigned int dcr_n);
-
-extern void dcr_write_generic(dcr_host_t host, unsigned int dcr_n, u32 value);
-
-#endif /* __ASSEMBLY__ */
-#endif /* __KERNEL__ */
-#endif /* _ASM_POWERPC_DCR_GENERIC_H */
-
-
diff --git a/arch/powerpc/include/asm/dcr-mmio.h b/arch/powerpc/include/asm/dcr-mmio.h
deleted file mode 100644
index 93a68b28e695..000000000000
--- a/arch/powerpc/include/asm/dcr-mmio.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * (c) Copyright 2006 Benjamin Herrenschmidt, IBM Corp.
- * <benh@kernel.crashing.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
- * the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef _ASM_POWERPC_DCR_MMIO_H
-#define _ASM_POWERPC_DCR_MMIO_H
-#ifdef __KERNEL__
-
-#include <asm/io.h>
-
-typedef struct {
- void __iomem *token;
- unsigned int stride;
- unsigned int base;
-} dcr_host_mmio_t;
-
-static inline bool dcr_map_ok_mmio(dcr_host_mmio_t host)
-{
- return host.token != NULL;
-}
-
-extern dcr_host_mmio_t dcr_map_mmio(struct device_node *dev,
- unsigned int dcr_n,
- unsigned int dcr_c);
-extern void dcr_unmap_mmio(dcr_host_mmio_t host, unsigned int dcr_c);
-
-static inline u32 dcr_read_mmio(dcr_host_mmio_t host, unsigned int dcr_n)
-{
- return in_be32(host.token + ((host.base + dcr_n) * host.stride));
-}
-
-static inline void dcr_write_mmio(dcr_host_mmio_t host,
- unsigned int dcr_n,
- u32 value)
-{
- out_be32(host.token + ((host.base + dcr_n) * host.stride), value);
-}
-
-#endif /* __KERNEL__ */
-#endif /* _ASM_POWERPC_DCR_MMIO_H */
-
-
diff --git a/arch/powerpc/include/asm/dcr-native.h b/arch/powerpc/include/asm/dcr-native.h
index 7d2e6235726d..65b3fc2dc404 100644
--- a/arch/powerpc/include/asm/dcr-native.h
+++ b/arch/powerpc/include/asm/dcr-native.h
@@ -1,29 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* (c) Copyright 2006 Benjamin Herrenschmidt, IBM Corp.
* <benh@kernel.crashing.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
- * the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _ASM_POWERPC_DCR_NATIVE_H
#define _ASM_POWERPC_DCR_NATIVE_H
#ifdef __KERNEL__
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/spinlock.h>
#include <asm/cputable.h>
+#include <asm/cpu_has_feature.h>
+#include <linux/stringify.h>
typedef struct {
unsigned int base;
@@ -31,7 +20,7 @@ typedef struct {
static inline bool dcr_map_ok_native(dcr_host_native_t host)
{
- return 1;
+ return true;
}
#define dcr_map_native(dev, dcr_n, dcr_c) \
@@ -64,8 +53,8 @@ static inline void mtdcrx(unsigned int reg, unsigned int val)
#define mfdcr(rn) \
({unsigned int rval; \
if (__builtin_constant_p(rn) && rn < 1024) \
- asm volatile("mfdcr %0," __stringify(rn) \
- : "=r" (rval)); \
+ asm volatile("mfdcr %0, %1" : "=r" (rval) \
+ : "n" (rn)); \
else if (likely(cpu_has_feature(CPU_FTR_INDEXED_DCR))) \
rval = mfdcrx(rn); \
else \
@@ -75,8 +64,8 @@ static inline void mtdcrx(unsigned int reg, unsigned int val)
#define mtdcr(rn, v) \
do { \
if (__builtin_constant_p(rn) && rn < 1024) \
- asm volatile("mtdcr " __stringify(rn) ",%0" \
- : : "r" (v)); \
+ asm volatile("mtdcr %0, %1" \
+ : : "n" (rn), "r" (v)); \
else if (likely(cpu_has_feature(CPU_FTR_INDEXED_DCR))) \
mtdcrx(rn, v); \
else \
@@ -150,6 +139,6 @@ static inline void __dcri_clrset(int base_addr, int base_data, int reg,
DCRN_ ## base ## _CONFIG_DATA, \
reg, clr, set)
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_DCR_NATIVE_H */
diff --git a/arch/powerpc/include/asm/dcr-regs.h b/arch/powerpc/include/asm/dcr-regs.h
index 380274de429f..5c1a4973f46a 100644
--- a/arch/powerpc/include/asm/dcr-regs.h
+++ b/arch/powerpc/include/asm/dcr-regs.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Common DCR / SDR / CPR register definitions used on various IBM/AMCC
* 4xx processors
diff --git a/arch/powerpc/include/asm/dcr.h b/arch/powerpc/include/asm/dcr.h
index 9d6851cfb841..3c0fac2cc2b2 100644
--- a/arch/powerpc/include/asm/dcr.h
+++ b/arch/powerpc/include/asm/dcr.h
@@ -1,68 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* (c) Copyright 2006 Benjamin Herrenschmidt, IBM Corp.
* <benh@kernel.crashing.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
- * the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _ASM_POWERPC_DCR_H
#define _ASM_POWERPC_DCR_H
#ifdef __KERNEL__
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#ifdef CONFIG_PPC_DCR
-#ifdef CONFIG_PPC_DCR_NATIVE
#include <asm/dcr-native.h>
-#endif
-#ifdef CONFIG_PPC_DCR_MMIO
-#include <asm/dcr-mmio.h>
-#endif
-
-
-/* Indirection layer for providing both NATIVE and MMIO support. */
-
-#if defined(CONFIG_PPC_DCR_NATIVE) && defined(CONFIG_PPC_DCR_MMIO)
-
-#include <asm/dcr-generic.h>
-
-#define DCR_MAP_OK(host) dcr_map_ok_generic(host)
-#define dcr_map(dev, dcr_n, dcr_c) dcr_map_generic(dev, dcr_n, dcr_c)
-#define dcr_unmap(host, dcr_c) dcr_unmap_generic(host, dcr_c)
-#define dcr_read(host, dcr_n) dcr_read_generic(host, dcr_n)
-#define dcr_write(host, dcr_n, value) dcr_write_generic(host, dcr_n, value)
-
-#else
-
-#ifdef CONFIG_PPC_DCR_NATIVE
typedef dcr_host_native_t dcr_host_t;
#define DCR_MAP_OK(host) dcr_map_ok_native(host)
#define dcr_map(dev, dcr_n, dcr_c) dcr_map_native(dev, dcr_n, dcr_c)
#define dcr_unmap(host, dcr_c) dcr_unmap_native(host, dcr_c)
#define dcr_read(host, dcr_n) dcr_read_native(host, dcr_n)
#define dcr_write(host, dcr_n, value) dcr_write_native(host, dcr_n, value)
-#else
-typedef dcr_host_mmio_t dcr_host_t;
-#define DCR_MAP_OK(host) dcr_map_ok_mmio(host)
-#define dcr_map(dev, dcr_n, dcr_c) dcr_map_mmio(dev, dcr_n, dcr_c)
-#define dcr_unmap(host, dcr_c) dcr_unmap_mmio(host, dcr_c)
-#define dcr_read(host, dcr_n) dcr_read_mmio(host, dcr_n)
-#define dcr_write(host, dcr_n, value) dcr_write_mmio(host, dcr_n, value)
-#endif
-
-#endif /* defined(CONFIG_PPC_DCR_NATIVE) && defined(CONFIG_PPC_DCR_MMIO) */
/*
* additional helpers to read the DCR * base from the device-tree
@@ -73,6 +28,6 @@ extern unsigned int dcr_resource_start(const struct device_node *np,
extern unsigned int dcr_resource_len(const struct device_node *np,
unsigned int index);
#endif /* CONFIG_PPC_DCR */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_DCR_H */
diff --git a/arch/powerpc/include/asm/debug.h b/arch/powerpc/include/asm/debug.h
index a954e4975049..51c744608f37 100644
--- a/arch/powerpc/include/asm/debug.h
+++ b/arch/powerpc/include/asm/debug.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
*/
@@ -8,9 +9,7 @@
struct pt_regs;
-extern struct dentry *powerpc_debugfs_root;
-
-#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
+#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE)
extern int (*__debugger)(struct pt_regs *regs);
extern int (*__debugger_ipi)(struct pt_regs *regs);
@@ -46,15 +45,13 @@ static inline int debugger_break_match(struct pt_regs *regs) { return 0; }
static inline int debugger_fault_handler(struct pt_regs *regs) { return 0; }
#endif
-void set_breakpoint(struct arch_hw_breakpoint *brk);
-void __set_breakpoint(struct arch_hw_breakpoint *brk);
+void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk);
+void suspend_breakpoints(void);
+void restore_breakpoints(void);
+bool ppc_breakpoint_available(void);
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
extern void do_send_trap(struct pt_regs *regs, unsigned long address,
- unsigned long error_code, int signal_code, int brkpt);
-#else
-
-extern void do_break(struct pt_regs *regs, unsigned long address,
- unsigned long error_code);
+ unsigned long error_code, int brkpt);
#endif
#endif /* _ASM_POWERPC_DEBUG_H */
diff --git a/arch/powerpc/include/asm/delay.h b/arch/powerpc/include/asm/delay.h
index 52e4d54da2a9..51bb8c1476c7 100644
--- a/arch/powerpc/include/asm/delay.h
+++ b/arch/powerpc/include/asm/delay.h
@@ -1,18 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _ASM_POWERPC_DELAY_H
#define _ASM_POWERPC_DELAY_H
#ifdef __KERNEL__
+#include <linux/processor.h>
#include <asm/time.h>
/*
* Copyright 1996, Paul Mackerras.
* Copyright (C) 2009 Freescale Semiconductor, Inc. All rights reserved.
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* PPC64 Support added by Dave Engebretsen, Todd Inglett, Mike Corrigan,
* Anton Blanchard.
*/
@@ -57,12 +54,19 @@ extern void udelay(unsigned long usecs);
({ \
typeof(condition) __ret; \
unsigned long __loops = tb_ticks_per_usec * timeout; \
- unsigned long __start = get_tbl(); \
- while (!(__ret = (condition)) && (tb_ticks_since(__start) <= __loops)) \
- if (delay) \
+ unsigned long __start = mftb(); \
+ \
+ if (delay) { \
+ while (!(__ret = (condition)) && \
+ (tb_ticks_since(__start) <= __loops)) \
udelay(delay); \
- else \
- cpu_relax(); \
+ } else { \
+ spin_begin(); \
+ while (!(__ret = (condition)) && \
+ (tb_ticks_since(__start) <= __loops)) \
+ spin_cpu_relax(); \
+ spin_end(); \
+ } \
if (!__ret) \
__ret = (condition); \
__ret; \
diff --git a/arch/powerpc/include/asm/device.h b/arch/powerpc/include/asm/device.h
index 38faeded7d59..a4dc27655b3e 100644
--- a/arch/powerpc/include/asm/device.h
+++ b/arch/powerpc/include/asm/device.h
@@ -1,13 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Arch specific extensions to struct device
- *
- * This file is released under the GPLv2
*/
#ifndef _ASM_POWERPC_DEVICE_H
#define _ASM_POWERPC_DEVICE_H
-struct dma_map_ops;
struct device_node;
+#ifdef CONFIG_PPC64
+struct pci_dn;
+struct iommu_table;
+#endif
/*
* Arch extensions to struct device.
@@ -16,23 +18,19 @@ struct device_node;
* drivers/macintosh/macio_asic.c
*/
struct dev_archdata {
- /* DMA operations on that device */
- struct dma_map_ops *dma_ops;
-
/*
- * When an iommu is in use, dma_data is used as a ptr to the base of the
- * iommu_table. Otherwise, it is a simple numerical offset.
+ * These two used to be a union. However, with the hybrid ops we need
+ * both so here we store both a DMA offset for direct mappings and
+ * an iommu_table for remapped DMA.
*/
- union {
- dma_addr_t dma_offset;
- void *iommu_table_base;
- } dma_data;
+ dma_addr_t dma_offset;
-#ifdef CONFIG_IOMMU_API
- void *iommu_domain;
+#ifdef CONFIG_PPC64
+ struct iommu_table *iommu_table_base;
#endif
-#ifdef CONFIG_SWIOTLB
- dma_addr_t max_direct_dma_addr;
+
+#ifdef CONFIG_PPC64
+ struct pci_dn *pci_data;
#endif
#ifdef CONFIG_EEH
struct eeh_dev *edev;
@@ -40,12 +38,18 @@ struct dev_archdata {
#ifdef CONFIG_FAIL_IOMMU
int fail_iommu;
#endif
+#ifdef CONFIG_PCI_IOV
+ void *iov_data;
+#endif
};
struct pdev_archdata {
u64 dma_mask;
+ /*
+ * Pointer to nvdimm_pmu structure, to handle the unregistering
+ * of pmu device
+ */
+ void *priv;
};
-#define ARCH_HAS_DMA_GET_REQUIRED_MASK
-
#endif /* _ASM_POWERPC_DEVICE_H */
diff --git a/arch/powerpc/include/asm/disassemble.h b/arch/powerpc/include/asm/disassemble.h
index 6330a61b875a..8d2ebc36d5e3 100644
--- a/arch/powerpc/include/asm/disassemble.h
+++ b/arch/powerpc/include/asm/disassemble.h
@@ -1,16 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright IBM Corp. 2008
*
@@ -42,6 +31,11 @@ static inline unsigned int get_dcrn(u32 inst)
return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
}
+static inline unsigned int get_tmrn(u32 inst)
+{
+ return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
+}
+
static inline unsigned int get_rt(u32 inst)
{
return (inst >> 21) & 0x1f;
@@ -82,6 +76,11 @@ static inline unsigned int get_oc(u32 inst)
return (inst >> 11) & 0x7fff;
}
+static inline unsigned int get_tx_or_sx(u32 inst)
+{
+ return (inst) & 0x1;
+}
+
#define IS_XFORM(inst) (get_op(inst) == 31)
#define IS_DSFORM(inst) (get_op(inst) >= 56)
diff --git a/arch/powerpc/include/asm/div64.h b/arch/powerpc/include/asm/div64.h
deleted file mode 100644
index 6cd978cefb28..000000000000
--- a/arch/powerpc/include/asm/div64.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/div64.h>
diff --git a/arch/powerpc/include/asm/dma-direct.h b/arch/powerpc/include/asm/dma-direct.h
new file mode 100644
index 000000000000..128304cbee1d
--- /dev/null
+++ b/arch/powerpc/include/asm/dma-direct.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ASM_POWERPC_DMA_DIRECT_H
+#define ASM_POWERPC_DMA_DIRECT_H 1
+
+static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+{
+ return paddr + dev->archdata.dma_offset;
+}
+
+static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
+{
+ return daddr - dev->archdata.dma_offset;
+}
+#endif /* ASM_POWERPC_DMA_DIRECT_H */
diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h
deleted file mode 100644
index 150866b2a3fe..000000000000
--- a/arch/powerpc/include/asm/dma-mapping.h
+++ /dev/null
@@ -1,225 +0,0 @@
-/*
- * Copyright (C) 2004 IBM
- *
- * Implements the generic device dma API for powerpc.
- * the pci and vio busses
- */
-#ifndef _ASM_DMA_MAPPING_H
-#define _ASM_DMA_MAPPING_H
-#ifdef __KERNEL__
-
-#include <linux/types.h>
-#include <linux/cache.h>
-/* need struct page definitions */
-#include <linux/mm.h>
-#include <linux/scatterlist.h>
-#include <linux/dma-attrs.h>
-#include <linux/dma-debug.h>
-#include <asm/io.h>
-#include <asm/swiotlb.h>
-
-#define DMA_ERROR_CODE (~(dma_addr_t)0x0)
-
-/* Some dma direct funcs must be visible for use in other dma_ops */
-extern void *dma_direct_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag,
- struct dma_attrs *attrs);
-extern void dma_direct_free_coherent(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_handle,
- struct dma_attrs *attrs);
-extern int dma_direct_mmap_coherent(struct device *dev,
- struct vm_area_struct *vma,
- void *cpu_addr, dma_addr_t handle,
- size_t size, struct dma_attrs *attrs);
-
-#ifdef CONFIG_NOT_COHERENT_CACHE
-/*
- * DMA-consistent mapping functions for PowerPCs that don't support
- * cache snooping. These allocate/free a region of uncached mapped
- * memory space for use with DMA devices. Alternatively, you could
- * allocate the space "normally" and use the cache management functions
- * to ensure it is consistent.
- */
-struct device;
-extern void *__dma_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *handle, gfp_t gfp);
-extern void __dma_free_coherent(size_t size, void *vaddr);
-extern void __dma_sync(void *vaddr, size_t size, int direction);
-extern void __dma_sync_page(struct page *page, unsigned long offset,
- size_t size, int direction);
-extern unsigned long __dma_get_coherent_pfn(unsigned long cpu_addr);
-
-#else /* ! CONFIG_NOT_COHERENT_CACHE */
-/*
- * Cache coherent cores.
- */
-
-#define __dma_alloc_coherent(dev, gfp, size, handle) NULL
-#define __dma_free_coherent(size, addr) ((void)0)
-#define __dma_sync(addr, size, rw) ((void)0)
-#define __dma_sync_page(pg, off, sz, rw) ((void)0)
-
-#endif /* ! CONFIG_NOT_COHERENT_CACHE */
-
-static inline unsigned long device_to_mask(struct device *dev)
-{
- if (dev->dma_mask && *dev->dma_mask)
- return *dev->dma_mask;
- /* Assume devices without mask can take 32 bit addresses */
- return 0xfffffffful;
-}
-
-/*
- * Available generic sets of operations
- */
-#ifdef CONFIG_PPC64
-extern struct dma_map_ops dma_iommu_ops;
-#endif
-extern struct dma_map_ops dma_direct_ops;
-
-static inline struct dma_map_ops *get_dma_ops(struct device *dev)
-{
- /* We don't handle the NULL dev case for ISA for now. We could
- * do it via an out of line call but it is not needed for now. The
- * only ISA DMA device we support is the floppy and we have a hack
- * in the floppy driver directly to get a device for us.
- */
- if (unlikely(dev == NULL))
- return NULL;
-
- return dev->archdata.dma_ops;
-}
-
-static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops)
-{
- dev->archdata.dma_ops = ops;
-}
-
-/*
- * get_dma_offset()
- *
- * Get the dma offset on configurations where the dma address can be determined
- * from the physical address by looking at a simple offset. Direct dma and
- * swiotlb use this function, but it is typically not used by implementations
- * with an iommu.
- */
-static inline dma_addr_t get_dma_offset(struct device *dev)
-{
- if (dev)
- return dev->archdata.dma_data.dma_offset;
-
- return PCI_DRAM_OFFSET;
-}
-
-static inline void set_dma_offset(struct device *dev, dma_addr_t off)
-{
- if (dev)
- dev->archdata.dma_data.dma_offset = off;
-}
-
-/* this will be removed soon */
-#define flush_write_buffers()
-
-#include <asm-generic/dma-mapping-common.h>
-
-static inline int dma_supported(struct device *dev, u64 mask)
-{
- struct dma_map_ops *dma_ops = get_dma_ops(dev);
-
- if (unlikely(dma_ops == NULL))
- return 0;
- if (dma_ops->dma_supported == NULL)
- return 1;
- return dma_ops->dma_supported(dev, mask);
-}
-
-extern int dma_set_mask(struct device *dev, u64 dma_mask);
-extern int __dma_set_mask(struct device *dev, u64 dma_mask);
-
-#define dma_alloc_coherent(d,s,h,f) dma_alloc_attrs(d,s,h,f,NULL)
-
-static inline void *dma_alloc_attrs(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag,
- struct dma_attrs *attrs)
-{
- struct dma_map_ops *dma_ops = get_dma_ops(dev);
- void *cpu_addr;
-
- BUG_ON(!dma_ops);
-
- cpu_addr = dma_ops->alloc(dev, size, dma_handle, flag, attrs);
-
- debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr);
-
- return cpu_addr;
-}
-
-#define dma_free_coherent(d,s,c,h) dma_free_attrs(d,s,c,h,NULL)
-
-static inline void dma_free_attrs(struct device *dev, size_t size,
- void *cpu_addr, dma_addr_t dma_handle,
- struct dma_attrs *attrs)
-{
- struct dma_map_ops *dma_ops = get_dma_ops(dev);
-
- BUG_ON(!dma_ops);
-
- debug_dma_free_coherent(dev, size, cpu_addr, dma_handle);
-
- dma_ops->free(dev, size, cpu_addr, dma_handle, attrs);
-}
-
-static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
- struct dma_map_ops *dma_ops = get_dma_ops(dev);
-
- debug_dma_mapping_error(dev, dma_addr);
- if (dma_ops->mapping_error)
- return dma_ops->mapping_error(dev, dma_addr);
-
-#ifdef CONFIG_PPC64
- return (dma_addr == DMA_ERROR_CODE);
-#else
- return 0;
-#endif
-}
-
-static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
-{
-#ifdef CONFIG_SWIOTLB
- struct dev_archdata *sd = &dev->archdata;
-
- if (sd->max_direct_dma_addr && addr + size > sd->max_direct_dma_addr)
- return 0;
-#endif
-
- if (!dev->dma_mask)
- return 0;
-
- return addr + size - 1 <= *dev->dma_mask;
-}
-
-static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
-{
- return paddr + get_dma_offset(dev);
-}
-
-static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
-{
- return daddr - get_dma_offset(dev);
-}
-
-#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
-#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
-
-#define ARCH_HAS_DMA_MMAP_COHERENT
-
-static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
- enum dma_data_direction direction)
-{
- BUG_ON(direction == DMA_NONE);
- __dma_sync(vaddr, size, (int)direction);
-}
-
-#endif /* __KERNEL__ */
-#endif /* _ASM_DMA_MAPPING_H */
diff --git a/arch/powerpc/include/asm/dma.h b/arch/powerpc/include/asm/dma.h
index a5c6d83b5f60..d97c66d9ae34 100644
--- a/arch/powerpc/include/asm/dma.h
+++ b/arch/powerpc/include/asm/dma.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_DMA_H
#define _ASM_POWERPC_DMA_H
#ifdef __KERNEL__
@@ -150,10 +151,9 @@
#define DMA2_EXT_REG 0x4D6
#ifndef __powerpc64__
- /* in arch/ppc/kernel/setup.c -- Cort */
+ /* in arch/powerpc/kernel/setup_32.c -- Cort */
extern unsigned int DMA_MODE_WRITE;
extern unsigned int DMA_MODE_READ;
- extern unsigned long ISA_DMA_THRESHOLD;
#else
#define DMA_MODE_READ 0x44 /* I/O to memory, no autoinit, increment, single mode */
#define DMA_MODE_WRITE 0x48 /* memory to I/O, no autoinit, increment, single mode */
@@ -340,11 +340,5 @@ extern int request_dma(unsigned int dmanr, const char *device_id);
/* release it again */
extern void free_dma(unsigned int dmanr);
-#ifdef CONFIG_PCI
-extern int isa_dma_bridge_buggy;
-#else
-#define isa_dma_bridge_buggy (0)
-#endif
-
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_DMA_H */
diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h
new file mode 100644
index 000000000000..13bf6dee8e2d
--- /dev/null
+++ b/arch/powerpc/include/asm/drmem.h
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * drmem.h: Power specific logical memory block representation
+ *
+ * Copyright 2017 IBM Corporation
+ */
+
+#ifndef _ASM_POWERPC_LMB_H
+#define _ASM_POWERPC_LMB_H
+
+#include <linux/sched.h>
+
+struct drmem_lmb {
+ u64 base_addr;
+ u32 drc_index;
+ u32 aa_index;
+ u32 flags;
+};
+
+struct drmem_lmb_info {
+ struct drmem_lmb *lmbs;
+ int n_lmbs;
+ u64 lmb_size;
+};
+
+struct device_node;
+struct property;
+
+extern struct drmem_lmb_info *drmem_info;
+
+static inline struct drmem_lmb *drmem_lmb_next(struct drmem_lmb *lmb,
+ const struct drmem_lmb *start)
+{
+ /*
+ * DLPAR code paths can take several milliseconds per element
+ * when interacting with firmware. Ensure that we don't
+ * unfairly monopolize the CPU.
+ */
+ if (((++lmb - start) % 16) == 0)
+ cond_resched();
+
+ return lmb;
+}
+
+#define for_each_drmem_lmb_in_range(lmb, start, end) \
+ for ((lmb) = (start); (lmb) < (end); lmb = drmem_lmb_next(lmb, start))
+
+#define for_each_drmem_lmb(lmb) \
+ for_each_drmem_lmb_in_range((lmb), \
+ &drmem_info->lmbs[0], \
+ &drmem_info->lmbs[drmem_info->n_lmbs])
+
+/*
+ * The of_drconf_cell_v1 struct defines the layout of the LMB data
+ * specified in the ibm,dynamic-memory device tree property.
+ * The property itself is a 32-bit value specifying the number of
+ * LMBs followed by an array of of_drconf_cell_v1 entries, one
+ * per LMB.
+ */
+struct of_drconf_cell_v1 {
+ __be64 base_addr;
+ __be32 drc_index;
+ __be32 reserved;
+ __be32 aa_index;
+ __be32 flags;
+};
+
+/*
+ * Version 2 of the ibm,dynamic-memory property is defined as a
+ * 32-bit value specifying the number of LMB sets followed by an
+ * array of of_drconf_cell_v2 entries, one per LMB set.
+ */
+struct of_drconf_cell_v2 {
+ u32 seq_lmbs;
+ u64 base_addr;
+ u32 drc_index;
+ u32 aa_index;
+ u32 flags;
+} __packed;
+
+#define DRCONF_MEM_ASSIGNED 0x00000008
+#define DRCONF_MEM_AI_INVALID 0x00000040
+#define DRCONF_MEM_RESERVED 0x00000080
+#define DRCONF_MEM_HOTREMOVABLE 0x00000100
+
+static inline u64 drmem_lmb_size(void)
+{
+ return drmem_info->lmb_size;
+}
+
+#define DRMEM_LMB_RESERVED 0x80000000
+
+static inline void drmem_mark_lmb_reserved(struct drmem_lmb *lmb)
+{
+ lmb->flags |= DRMEM_LMB_RESERVED;
+}
+
+static inline void drmem_remove_lmb_reservation(struct drmem_lmb *lmb)
+{
+ lmb->flags &= ~DRMEM_LMB_RESERVED;
+}
+
+static inline bool drmem_lmb_reserved(struct drmem_lmb *lmb)
+{
+ return lmb->flags & DRMEM_LMB_RESERVED;
+}
+
+u64 drmem_lmb_memory_max(void);
+int walk_drmem_lmbs(struct device_node *dn, void *data,
+ int (*func)(struct drmem_lmb *, const __be32 **, void *));
+int drmem_update_dt(void);
+
+#ifdef CONFIG_PPC_PSERIES
+int __init
+walk_drmem_lmbs_early(unsigned long node, void *data,
+ int (*func)(struct drmem_lmb *, const __be32 **, void *));
+void drmem_update_lmbs(struct property *prop);
+#endif
+
+static inline void invalidate_lmb_associativity_index(struct drmem_lmb *lmb)
+{
+ lmb->aa_index = 0xffffffff;
+}
+
+#endif /* _ASM_POWERPC_LMB_H */
diff --git a/arch/powerpc/include/asm/dt_cpu_ftrs.h b/arch/powerpc/include/asm/dt_cpu_ftrs.h
new file mode 100644
index 000000000000..0c729e2d0e8a
--- /dev/null
+++ b/arch/powerpc/include/asm/dt_cpu_ftrs.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_POWERPC_DT_CPU_FTRS_H
+#define __ASM_POWERPC_DT_CPU_FTRS_H
+
+/*
+ * Copyright 2017, IBM Corporation
+ * cpufeatures is the new way to discover CPU features with /cpus/features
+ * devicetree. This supersedes PVR based discovery ("cputable"), and older
+ * device tree feature advertisement.
+ */
+
+#include <linux/types.h>
+#include <uapi/asm/cputable.h>
+
+#ifdef CONFIG_PPC_DT_CPU_FTRS
+bool dt_cpu_ftrs_init(void *fdt);
+void dt_cpu_ftrs_scan(void);
+bool dt_cpu_ftrs_in_use(void);
+#else
+static inline bool dt_cpu_ftrs_init(void *fdt) { return false; }
+static inline void dt_cpu_ftrs_scan(void) { }
+static inline bool dt_cpu_ftrs_in_use(void) { return false; }
+#endif
+
+#endif /* __ASM_POWERPC_DT_CPU_FTRS_H */
diff --git a/arch/powerpc/include/asm/dtl.h b/arch/powerpc/include/asm/dtl.h
new file mode 100644
index 000000000000..a5c21bc623cb
--- /dev/null
+++ b/arch/powerpc/include/asm/dtl.h
@@ -0,0 +1,43 @@
+#ifndef _ASM_POWERPC_DTL_H
+#define _ASM_POWERPC_DTL_H
+
+#include <linux/rwsem.h>
+#include <asm/lppaca.h>
+
+/*
+ * Layout of entries in the hypervisor's dispatch trace log buffer.
+ */
+struct dtl_entry {
+ u8 dispatch_reason;
+ u8 preempt_reason;
+ __be16 processor_id;
+ __be32 enqueue_to_dispatch_time;
+ __be32 ready_to_enqueue_time;
+ __be32 waiting_to_ready_time;
+ __be64 timebase;
+ __be64 fault_addr;
+ __be64 srr0;
+ __be64 srr1;
+};
+
+#define DISPATCH_LOG_BYTES 4096 /* bytes per cpu */
+#define N_DISPATCH_LOG (DISPATCH_LOG_BYTES / sizeof(struct dtl_entry))
+
+/*
+ * Dispatch trace log event enable mask:
+ * 0x1: voluntary virtual processor waits
+ * 0x2: time-slice preempts
+ * 0x4: virtual partition memory page faults
+ */
+#define DTL_LOG_CEDE 0x1
+#define DTL_LOG_PREEMPT 0x2
+#define DTL_LOG_FAULT 0x4
+#define DTL_LOG_ALL (DTL_LOG_CEDE | DTL_LOG_PREEMPT | DTL_LOG_FAULT)
+
+extern struct kmem_cache *dtl_cache;
+extern struct rw_semaphore dtl_access_lock;
+
+extern void register_dtl_buffer(int cpu);
+extern void alloc_dtl_buffers(unsigned long *time_limit);
+
+#endif /* _ASM_POWERPC_DTL_H */
diff --git a/arch/powerpc/include/asm/edac.h b/arch/powerpc/include/asm/edac.h
index 6ead88bbfbb8..5571e23d253e 100644
--- a/arch/powerpc/include/asm/edac.h
+++ b/arch/powerpc/include/asm/edac.h
@@ -12,11 +12,11 @@
#define ASM_EDAC_H
/*
* ECC atomic, DMA, SMP and interrupt safe scrub function.
- * Implements the per arch atomic_scrub() that EDAC use for software
+ * Implements the per arch edac_atomic_scrub() that EDAC use for software
* ECC scrubbing. It reads memory and then writes back the original
* value, allowing the hardware to detect and correct memory errors.
*/
-static __inline__ void atomic_scrub(void *va, u32 size)
+static __inline__ void edac_atomic_scrub(void *va, u32 size)
{
unsigned int *virt_addr = va;
unsigned int temp;
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 9983c3d26bca..5e34611de9ef 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -1,20 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (C) 2001 Dave Engebretsen & Todd Inglett IBM Corporation.
* Copyright 2001-2012 IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _POWERPC_EEH_H
@@ -27,18 +14,21 @@
#include <linux/time.h>
#include <linux/atomic.h>
+#include <uapi/asm/eeh.h>
+
struct pci_dev;
struct pci_bus;
-struct device_node;
+struct pci_dn;
#ifdef CONFIG_EEH
/* EEH subsystem flags */
-#define EEH_ENABLED 0x01 /* EEH enabled */
-#define EEH_FORCE_DISABLED 0x02 /* EEH disabled */
-#define EEH_PROBE_MODE_DEV 0x04 /* From PCI device */
-#define EEH_PROBE_MODE_DEVTREE 0x08 /* From device tree */
-#define EEH_ENABLE_IO_FOR_LOG 0x10 /* Enable IO for log */
+#define EEH_ENABLED 0x01 /* EEH enabled */
+#define EEH_FORCE_DISABLED 0x02 /* EEH disabled */
+#define EEH_PROBE_MODE_DEV 0x04 /* From PCI device */
+#define EEH_PROBE_MODE_DEVTREE 0x08 /* From device tree */
+#define EEH_ENABLE_IO_FOR_LOG 0x20 /* Enable IO for log */
+#define EEH_EARLY_DUMP_LOG 0x40 /* Dump log immediately */
/*
* Delay for PE reset, all in ms
@@ -53,7 +43,7 @@ struct device_node;
/*
* The struct is used to trace PE related EEH functionality.
* In theory, there will have one instance of the struct to
- * be created against particular PE. In nature, PEs corelate
+ * be created against particular PE. In nature, PEs correlate
* to each other. the struct has to reflect that hierarchy in
* order to easily pick up those affected PEs when one particular
* PE has EEH errors.
@@ -68,34 +58,54 @@ struct device_node;
#define EEH_PE_PHB (1 << 1) /* PHB PE */
#define EEH_PE_DEVICE (1 << 2) /* Device PE */
#define EEH_PE_BUS (1 << 3) /* Bus PE */
+#define EEH_PE_VF (1 << 4) /* VF PE */
#define EEH_PE_ISOLATED (1 << 0) /* Isolated PE */
#define EEH_PE_RECOVERING (1 << 1) /* Recovering PE */
-#define EEH_PE_RESET (1 << 2) /* PE reset in progress */
+#define EEH_PE_CFG_BLOCKED (1 << 2) /* Block config access */
+#define EEH_PE_RESET (1 << 3) /* PE reset in progress */
#define EEH_PE_KEEP (1 << 8) /* Keep PE on hotplug */
+#define EEH_PE_CFG_RESTRICTED (1 << 9) /* Block config on error */
+#define EEH_PE_REMOVED (1 << 10) /* Removed permanently */
+#define EEH_PE_PRI_BUS (1 << 11) /* Cached primary bus */
struct eeh_pe {
int type; /* PE type: PHB/Bus/Device */
int state; /* PE EEH dependent mode */
- int config_addr; /* Traditional PCI address */
int addr; /* PE configuration address */
struct pci_controller *phb; /* Associated PHB */
struct pci_bus *bus; /* Top PCI bus for bus PE */
int check_count; /* Times of ignored error */
int freeze_count; /* Times of froze up */
- struct timeval tstamp; /* Time on first-time freeze */
+ time64_t tstamp; /* Time on first-time freeze */
int false_positives; /* Times of reported #ff's */
atomic_t pass_dev_cnt; /* Count of passed through devs */
struct eeh_pe *parent; /* Parent PE */
- void *data; /* PE auxillary data */
- struct list_head child_list; /* Link PE to the child list */
- struct list_head edevs; /* Link list of EEH devices */
- struct list_head child; /* Child PEs */
+ void *data; /* PE auxiliary data */
+ struct list_head child_list; /* List of PEs below this PE */
+ struct list_head child; /* Memb. child_list/eeh_phb_pe */
+ struct list_head edevs; /* List of eeh_dev in this PE */
+
+#ifdef CONFIG_STACKTRACE
+ /*
+ * Saved stack trace. When we find a PE freeze in eeh_dev_check_failure
+ * the stack trace is saved here so we can print it in the recovery
+ * thread if it turns out to due to a real problem rather than
+ * a hot-remove.
+ *
+ * A max of 64 entries might be overkill, but it also might not be.
+ */
+ unsigned long stack_trace[64];
+ int trace_entries;
+#endif /* CONFIG_STACKTRACE */
};
#define eeh_pe_for_each_dev(pe, edev, tmp) \
- list_for_each_entry_safe(edev, tmp, &pe->edevs, list)
+ list_for_each_entry_safe(edev, tmp, &pe->edevs, entry)
+
+#define eeh_for_each_pe(root, pe) \
+ for (pe = root; pe; pe = eeh_pe_next(pe, root))
static inline bool eeh_pe_passed(struct eeh_pe *pe)
{
@@ -121,24 +131,40 @@ static inline bool eeh_pe_passed(struct eeh_pe *pe)
struct eeh_dev {
int mode; /* EEH mode */
- int class_code; /* Class code of the device */
- int config_addr; /* Config address */
+ int bdfn; /* bdfn of device (for cfg ops) */
+ struct pci_controller *controller;
int pe_config_addr; /* PE config address */
u32 config_space[16]; /* Saved PCI config space */
int pcix_cap; /* Saved PCIx capability */
int pcie_cap; /* Saved PCIe capability */
int aer_cap; /* Saved AER capability */
+ int af_cap; /* Saved AF capability */
struct eeh_pe *pe; /* Associated PE */
- struct list_head list; /* Form link list in the PE */
- struct pci_controller *phb; /* Associated PHB */
- struct device_node *dn; /* Associated device node */
+ struct list_head entry; /* Membership in eeh_pe.edevs */
+ struct list_head rmv_entry; /* Membership in rmv_list */
+ struct pci_dn *pdn; /* Associated PCI device node */
struct pci_dev *pdev; /* Associated PCI device */
- struct pci_bus *bus; /* PCI bus for partial hotplug */
+ bool in_error; /* Error flag for edev */
+
+ /* VF specific properties */
+ struct pci_dev *physfn; /* Associated SRIOV PF */
+ int vf_index; /* Index of this VF */
};
-static inline struct device_node *eeh_dev_to_of_node(struct eeh_dev *edev)
+/* "fmt" must be a simple literal string */
+#define EEH_EDEV_PRINT(level, edev, fmt, ...) \
+ pr_##level("PCI %04x:%02x:%02x.%x#%04x: EEH: " fmt, \
+ (edev)->controller->global_number, PCI_BUSNO((edev)->bdfn), \
+ PCI_SLOT((edev)->bdfn), PCI_FUNC((edev)->bdfn), \
+ ((edev)->pe ? (edev)->pe_config_addr : 0xffff), ##__VA_ARGS__)
+#define eeh_edev_dbg(edev, fmt, ...) EEH_EDEV_PRINT(debug, (edev), fmt, ##__VA_ARGS__)
+#define eeh_edev_info(edev, fmt, ...) EEH_EDEV_PRINT(info, (edev), fmt, ##__VA_ARGS__)
+#define eeh_edev_warn(edev, fmt, ...) EEH_EDEV_PRINT(warn, (edev), fmt, ##__VA_ARGS__)
+#define eeh_edev_err(edev, fmt, ...) EEH_EDEV_PRINT(err, (edev), fmt, ##__VA_ARGS__)
+
+static inline struct pci_dn *eeh_dev_to_pdn(struct eeh_dev *edev)
{
- return edev ? edev->dn : NULL;
+ return edev ? edev->pdn : NULL;
}
static inline struct pci_dev *eeh_dev_to_pci_dev(struct eeh_dev *edev)
@@ -146,6 +172,11 @@ static inline struct pci_dev *eeh_dev_to_pci_dev(struct eeh_dev *edev)
return edev ? edev->pdev : NULL;
}
+static inline struct eeh_pe *eeh_dev_to_pe(struct eeh_dev* edev)
+{
+ return edev ? edev->pe : NULL;
+}
+
/* Return values from eeh_ops::next_error */
enum {
EEH_NEXT_ERR_NONE = 0,
@@ -167,6 +198,7 @@ enum {
#define EEH_OPT_ENABLE 1 /* EEH enable */
#define EEH_OPT_THAW_MMIO 2 /* MMIO enable */
#define EEH_OPT_THAW_DMA 3 /* DMA enable */
+#define EEH_OPT_FREEZE_PE 4 /* Freeze PE */
#define EEH_STATE_UNAVAILABLE (1 << 0) /* State unavailable */
#define EEH_STATE_NOT_SUPPORT (1 << 1) /* EEH not supported */
#define EEH_STATE_RESET_ACTIVE (1 << 2) /* Active reset */
@@ -174,11 +206,6 @@ enum {
#define EEH_STATE_DMA_ACTIVE (1 << 4) /* Active DMA */
#define EEH_STATE_MMIO_ENABLED (1 << 5) /* MMIO enabled */
#define EEH_STATE_DMA_ENABLED (1 << 6) /* DMA enabled */
-#define EEH_PE_STATE_NORMAL 0 /* Normal state */
-#define EEH_PE_STATE_RESET 1 /* PE reset asserted */
-#define EEH_PE_STATE_STOPPED_IO_DMA 2 /* Frozen PE */
-#define EEH_PE_STATE_STOPPED_DMA 4 /* Stopped DMA, Enabled IO */
-#define EEH_PE_STATE_UNAVAIL 5 /* Unavailable */
#define EEH_RESET_DEACTIVATE 0 /* Deactivate the PE reset */
#define EEH_RESET_HOT 1 /* Hot reset */
#define EEH_RESET_FUNDAMENTAL 3 /* Fundamental reset */
@@ -187,24 +214,24 @@ enum {
struct eeh_ops {
char *name;
- int (*init)(void);
- int (*post_init)(void);
- void* (*of_probe)(struct device_node *dn, void *flag);
- int (*dev_probe)(struct pci_dev *dev, void *flag);
+ struct eeh_dev *(*probe)(struct pci_dev *pdev);
int (*set_option)(struct eeh_pe *pe, int option);
- int (*get_pe_addr)(struct eeh_pe *pe);
- int (*get_state)(struct eeh_pe *pe, int *state);
+ int (*get_state)(struct eeh_pe *pe, int *delay);
int (*reset)(struct eeh_pe *pe, int option);
- int (*wait_state)(struct eeh_pe *pe, int max_wait);
int (*get_log)(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len);
int (*configure_bridge)(struct eeh_pe *pe);
- int (*read_config)(struct device_node *dn, int where, int size, u32 *val);
- int (*write_config)(struct device_node *dn, int where, int size, u32 val);
+ int (*err_inject)(struct eeh_pe *pe, int type, int func,
+ unsigned long addr, unsigned long mask);
+ int (*read_config)(struct eeh_dev *edev, int where, int size, u32 *val);
+ int (*write_config)(struct eeh_dev *edev, int where, int size, u32 val);
int (*next_error)(struct eeh_pe **pe);
- int (*restore_config)(struct device_node *dn);
+ int (*restore_config)(struct eeh_dev *edev);
+ int (*notify_resume)(struct eeh_dev *edev);
};
extern int eeh_subsystem_flags;
+extern u32 eeh_max_freezes;
+extern bool eeh_debugfs_no_recover;
extern struct eeh_ops *eeh_ops;
extern raw_spinlock_t confirm_error_lock;
@@ -225,11 +252,7 @@ static inline bool eeh_has_flag(int flag)
static inline bool eeh_enabled(void)
{
- if (eeh_has_flag(EEH_FORCE_DISABLED) ||
- !eeh_has_flag(EEH_ENABLED))
- return false;
-
- return true;
+ return eeh_has_flag(EEH_ENABLED) && !eeh_has_flag(EEH_FORCE_DISABLED);
}
static inline void eeh_serialize_lock(unsigned long *flags)
@@ -242,50 +265,50 @@ static inline void eeh_serialize_unlock(unsigned long flags)
raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
}
-/*
- * Max number of EEH freezes allowed before we consider the device
- * to be permanently disabled.
- */
-#define EEH_MAX_ALLOWED_FREEZES 5
+static inline bool eeh_state_active(int state)
+{
+ return (state & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE))
+ == (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
+}
-typedef void *(*eeh_traverse_func)(void *data, void *flag);
+typedef void (*eeh_edev_traverse_func)(struct eeh_dev *edev, void *flag);
+typedef void *(*eeh_pe_traverse_func)(struct eeh_pe *pe, void *flag);
void eeh_set_pe_aux_size(int size);
int eeh_phb_pe_create(struct pci_controller *phb);
+int eeh_wait_state(struct eeh_pe *pe, int max_wait);
struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb);
-struct eeh_pe *eeh_pe_get(struct eeh_dev *edev);
-int eeh_add_to_parent_pe(struct eeh_dev *edev);
-int eeh_rmv_from_parent_pe(struct eeh_dev *edev);
+struct eeh_pe *eeh_pe_next(struct eeh_pe *pe, struct eeh_pe *root);
+struct eeh_pe *eeh_pe_get(struct pci_controller *phb, int pe_no);
+int eeh_pe_tree_insert(struct eeh_dev *edev, struct eeh_pe *new_pe_parent);
+int eeh_pe_tree_remove(struct eeh_dev *edev);
void eeh_pe_update_time_stamp(struct eeh_pe *pe);
void *eeh_pe_traverse(struct eeh_pe *root,
- eeh_traverse_func fn, void *flag);
-void *eeh_pe_dev_traverse(struct eeh_pe *root,
- eeh_traverse_func fn, void *flag);
+ eeh_pe_traverse_func fn, void *flag);
+void eeh_pe_dev_traverse(struct eeh_pe *root,
+ eeh_edev_traverse_func fn, void *flag);
void eeh_pe_restore_bars(struct eeh_pe *pe);
const char *eeh_pe_loc_get(struct eeh_pe *pe);
struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe);
-void *eeh_dev_init(struct device_node *dn, void *data);
-void eeh_dev_phb_init_dynamic(struct pci_controller *phb);
-int eeh_init(void);
-int __init eeh_ops_register(struct eeh_ops *ops);
-int __exit eeh_ops_unregister(const char *name);
-unsigned long eeh_check_failure(const volatile void __iomem *token,
- unsigned long val);
+void eeh_show_enabled(void);
+int __init eeh_init(struct eeh_ops *ops);
+int eeh_check_failure(const volatile void __iomem *token);
int eeh_dev_check_failure(struct eeh_dev *edev);
-void eeh_addr_cache_build(void);
-void eeh_add_device_early(struct device_node *);
-void eeh_add_device_tree_early(struct device_node *);
-void eeh_add_device_late(struct pci_dev *);
-void eeh_add_device_tree_late(struct pci_bus *);
-void eeh_add_sysfs_files(struct pci_bus *);
+void eeh_addr_cache_init(void);
+void eeh_probe_device(struct pci_dev *pdev);
void eeh_remove_device(struct pci_dev *);
+int eeh_unfreeze_pe(struct eeh_pe *pe);
+int eeh_pe_reset_and_recover(struct eeh_pe *pe);
int eeh_dev_open(struct pci_dev *pdev);
void eeh_dev_release(struct pci_dev *pdev);
struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group *group);
int eeh_pe_set_option(struct eeh_pe *pe, int option);
int eeh_pe_get_state(struct eeh_pe *pe);
-int eeh_pe_reset(struct eeh_pe *pe, int option);
+int eeh_pe_reset(struct eeh_pe *pe, int option, bool include_passed);
int eeh_pe_configure(struct eeh_pe *pe);
+int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
+ unsigned long addr, unsigned long mask);
+int eeh_pe_inject_mmio_error(struct pci_dev *pdev);
/**
* EEH_POSSIBLE_ERROR() -- test for possible MMIO failure.
@@ -309,43 +332,30 @@ static inline bool eeh_enabled(void)
return false;
}
-static inline int eeh_init(void)
-{
- return 0;
-}
-
-static inline void *eeh_dev_init(struct device_node *dn, void *data)
-{
- return NULL;
-}
+static inline void eeh_show_enabled(void) { }
-static inline void eeh_dev_phb_init_dynamic(struct pci_controller *phb) { }
-
-static inline unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val)
+static inline int eeh_check_failure(const volatile void __iomem *token)
{
- return val;
+ return 0;
}
#define eeh_dev_check_failure(x) (0)
-static inline void eeh_addr_cache_build(void) { }
-
-static inline void eeh_add_device_early(struct device_node *dn) { }
-
-static inline void eeh_add_device_tree_early(struct device_node *dn) { }
-
-static inline void eeh_add_device_late(struct pci_dev *dev) { }
+static inline void eeh_addr_cache_init(void) { }
-static inline void eeh_add_device_tree_late(struct pci_bus *bus) { }
-
-static inline void eeh_add_sysfs_files(struct pci_bus *bus) { }
+static inline void eeh_probe_device(struct pci_dev *dev) { }
static inline void eeh_remove_device(struct pci_dev *dev) { }
#define EEH_POSSIBLE_ERROR(val, type) (0)
#define EEH_IO_ERROR_VALUE(size) (-1UL)
+static inline int eeh_phb_pe_create(struct pci_controller *phb) { return 0; }
#endif /* CONFIG_EEH */
+#if defined(CONFIG_PPC_PSERIES) && defined(CONFIG_EEH)
+void pseries_eeh_init_edev_recursive(struct pci_dn *pdn);
+#endif
+
#ifdef CONFIG_PPC64
/*
* MMIO read/write operations with EEH support.
@@ -354,7 +364,7 @@ static inline u8 eeh_readb(const volatile void __iomem *addr)
{
u8 val = in_8(addr);
if (EEH_POSSIBLE_ERROR(val, u8))
- return eeh_check_failure(addr, val);
+ eeh_check_failure(addr);
return val;
}
@@ -362,7 +372,7 @@ static inline u16 eeh_readw(const volatile void __iomem *addr)
{
u16 val = in_le16(addr);
if (EEH_POSSIBLE_ERROR(val, u16))
- return eeh_check_failure(addr, val);
+ eeh_check_failure(addr);
return val;
}
@@ -370,7 +380,7 @@ static inline u32 eeh_readl(const volatile void __iomem *addr)
{
u32 val = in_le32(addr);
if (EEH_POSSIBLE_ERROR(val, u32))
- return eeh_check_failure(addr, val);
+ eeh_check_failure(addr);
return val;
}
@@ -378,7 +388,7 @@ static inline u64 eeh_readq(const volatile void __iomem *addr)
{
u64 val = in_le64(addr);
if (EEH_POSSIBLE_ERROR(val, u64))
- return eeh_check_failure(addr, val);
+ eeh_check_failure(addr);
return val;
}
@@ -386,7 +396,7 @@ static inline u16 eeh_readw_be(const volatile void __iomem *addr)
{
u16 val = in_be16(addr);
if (EEH_POSSIBLE_ERROR(val, u16))
- return eeh_check_failure(addr, val);
+ eeh_check_failure(addr);
return val;
}
@@ -394,7 +404,7 @@ static inline u32 eeh_readl_be(const volatile void __iomem *addr)
{
u32 val = in_be32(addr);
if (EEH_POSSIBLE_ERROR(val, u32))
- return eeh_check_failure(addr, val);
+ eeh_check_failure(addr);
return val;
}
@@ -402,7 +412,7 @@ static inline u64 eeh_readq_be(const volatile void __iomem *addr)
{
u64 val = in_be64(addr);
if (EEH_POSSIBLE_ERROR(val, u64))
- return eeh_check_failure(addr, val);
+ eeh_check_failure(addr);
return val;
}
@@ -416,7 +426,7 @@ static inline void eeh_memcpy_fromio(void *dest, const
* were copied. Check all four bytes.
*/
if (n >= 4 && EEH_POSSIBLE_ERROR(*((u32 *)(dest + n - 4)), u32))
- eeh_check_failure(src, *((u32 *)(dest + n - 4)));
+ eeh_check_failure(src);
}
/* in-string eeh macros */
@@ -425,7 +435,7 @@ static inline void eeh_readsb(const volatile void __iomem *addr, void * buf,
{
_insb(addr, buf, ns);
if (EEH_POSSIBLE_ERROR((*(((u8*)buf)+ns-1)), u8))
- eeh_check_failure(addr, *(u8*)buf);
+ eeh_check_failure(addr);
}
static inline void eeh_readsw(const volatile void __iomem *addr, void * buf,
@@ -433,7 +443,7 @@ static inline void eeh_readsw(const volatile void __iomem *addr, void * buf,
{
_insw(addr, buf, ns);
if (EEH_POSSIBLE_ERROR((*(((u16*)buf)+ns-1)), u16))
- eeh_check_failure(addr, *(u16*)buf);
+ eeh_check_failure(addr);
}
static inline void eeh_readsl(const volatile void __iomem *addr, void * buf,
@@ -441,9 +451,12 @@ static inline void eeh_readsl(const volatile void __iomem *addr, void * buf,
{
_insl(addr, buf, nl);
if (EEH_POSSIBLE_ERROR((*(((u32*)buf)+nl-1)), u32))
- eeh_check_failure(addr, *(u32*)buf);
+ eeh_check_failure(addr);
}
+
+void __init eeh_cache_debugfs_init(void);
+
#endif /* CONFIG_PPC64 */
#endif /* __KERNEL__ */
#endif /* _POWERPC_EEH_H */
diff --git a/arch/powerpc/include/asm/eeh_event.h b/arch/powerpc/include/asm/eeh_event.h
index 1e551a2d6f82..dadde7d52f46 100644
--- a/arch/powerpc/include/asm/eeh_event.h
+++ b/arch/powerpc/include/asm/eeh_event.h
@@ -1,17 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* Copyright (c) 2005 Linas Vepstas <linas@linas.org>
*/
@@ -33,8 +21,10 @@ struct eeh_event {
int eeh_event_init(void);
int eeh_send_failure_event(struct eeh_pe *pe);
+int __eeh_send_failure_event(struct eeh_pe *pe);
void eeh_remove_event(struct eeh_pe *pe, bool force);
-void eeh_handle_event(struct eeh_pe *pe);
+void eeh_handle_normal_event(struct eeh_pe *pe);
+void eeh_handle_special_event(void);
#endif /* __KERNEL__ */
#endif /* ASM_POWERPC_EEH_EVENT_H */
diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h
index 888d8f3f2524..bb4b94444d3e 100644
--- a/arch/powerpc/include/asm/elf.h
+++ b/arch/powerpc/include/asm/elf.h
@@ -1,10 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* ELF register definitions..
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#ifndef _ASM_POWERPC_ELF_H
#define _ASM_POWERPC_ELF_H
@@ -23,13 +19,13 @@
#define CORE_DUMP_USE_REGSET
#define ELF_EXEC_PAGESIZE PAGE_SIZE
-/* This is the location that an ET_DYN program is loaded if exec'ed. Typical
- use of this is to invoke "./ld.so someprog" to test out a new version of
- the loader. We need to make sure that it is out of the way of the program
- that it will "exec", and that there is sufficient room for the brk. */
-
-extern unsigned long randomize_et_dyn(unsigned long base);
-#define ELF_ET_DYN_BASE (randomize_et_dyn(0x20000000))
+/*
+ * This is the base location for PIE (ET_DYN with INTERP) loads. On
+ * 64-bit, this is raised to 4GB to leave the entire 32-bit address
+ * space open for things that want to use the area for 32-bit pointers.
+ */
+#define ELF_ET_DYN_BASE (is_32bit_task() ? 0x000400000UL : \
+ 0x100000000UL)
#define ELF_CORE_EFLAGS (is_elf2_task() ? 2 : 0)
@@ -57,8 +53,6 @@ static inline void ppc_elf_core_copy_regs(elf_gregset_t elf_regs,
}
#define ELF_CORE_COPY_REGS(gregs, regs) ppc_elf_core_copy_regs(gregs, regs);
-typedef elf_vrregset_t elf_fpxregset_t;
-
/* ELF_HWCAP yields a mask that user programs can use to figure out what
instruction set this cpu supports. This could be done in userspace,
but it's not easy, and we've already done it here. */
@@ -129,16 +123,73 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm,
(0x7ff >> (PAGE_SHIFT - 12)) : \
(0x3ffff >> (PAGE_SHIFT - 12)))
-extern unsigned long arch_randomize_brk(struct mm_struct *mm);
-#define arch_randomize_brk arch_randomize_brk
-
-
#ifdef CONFIG_SPU_BASE
/* Notes used in ET_CORE. Note name is "SPU/<fd>/<filename>". */
#define NT_SPU 1
-#define ARCH_HAVE_EXTRA_ELF_NOTES
-
#endif /* CONFIG_SPU_BASE */
+#ifdef CONFIG_PPC64
+
+#define get_cache_geometry(level) \
+ (ppc64_caches.level.assoc << 16 | ppc64_caches.level.line_size)
+
+#define ARCH_DLINFO_CACHE_GEOMETRY \
+ NEW_AUX_ENT(AT_L1I_CACHESIZE, ppc64_caches.l1i.size); \
+ NEW_AUX_ENT(AT_L1I_CACHEGEOMETRY, get_cache_geometry(l1i)); \
+ NEW_AUX_ENT(AT_L1D_CACHESIZE, ppc64_caches.l1d.size); \
+ NEW_AUX_ENT(AT_L1D_CACHEGEOMETRY, get_cache_geometry(l1d)); \
+ NEW_AUX_ENT(AT_L2_CACHESIZE, ppc64_caches.l2.size); \
+ NEW_AUX_ENT(AT_L2_CACHEGEOMETRY, get_cache_geometry(l2)); \
+ NEW_AUX_ENT(AT_L3_CACHESIZE, ppc64_caches.l3.size); \
+ NEW_AUX_ENT(AT_L3_CACHEGEOMETRY, get_cache_geometry(l3))
+
+#else
+#define ARCH_DLINFO_CACHE_GEOMETRY
+#endif
+
+/*
+ * The requirements here are:
+ * - keep the final alignment of sp (sp & 0xf)
+ * - make sure the 32-bit value at the first 16 byte aligned position of
+ * AUXV is greater than 16 for glibc compatibility.
+ * AT_IGNOREPPC is used for that.
+ * - for compatibility with glibc ARCH_DLINFO must always be defined on PPC,
+ * even if DLINFO_ARCH_ITEMS goes to zero or is undefined.
+ * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes
+ */
+#define COMMON_ARCH_DLINFO \
+do { \
+ /* Handle glibc compatibility. */ \
+ NEW_AUX_ENT(AT_IGNOREPPC, AT_IGNOREPPC); \
+ NEW_AUX_ENT(AT_IGNOREPPC, AT_IGNOREPPC); \
+ /* Cache size items */ \
+ NEW_AUX_ENT(AT_DCACHEBSIZE, dcache_bsize); \
+ NEW_AUX_ENT(AT_ICACHEBSIZE, icache_bsize); \
+ NEW_AUX_ENT(AT_UCACHEBSIZE, 0); \
+ VDSO_AUX_ENT(AT_SYSINFO_EHDR, (unsigned long)current->mm->context.vdso);\
+ ARCH_DLINFO_CACHE_GEOMETRY; \
+} while (0)
+
+#define ARCH_DLINFO \
+do { \
+ COMMON_ARCH_DLINFO; \
+ NEW_AUX_ENT(AT_MINSIGSTKSZ, get_min_sigframe_size()); \
+} while (0)
+
+#define COMPAT_ARCH_DLINFO \
+do { \
+ COMMON_ARCH_DLINFO; \
+ NEW_AUX_ENT(AT_MINSIGSTKSZ, get_min_sigframe_size_compat()); \
+} while (0)
+
+/* Relocate the kernel image to @final_address */
+void relocate(unsigned long final_address);
+
+struct func_desc {
+ unsigned long addr;
+ unsigned long toc;
+ unsigned long env;
+};
+
#endif /* _ASM_POWERPC_ELF_H */
diff --git a/arch/powerpc/include/asm/elfnote.h b/arch/powerpc/include/asm/elfnote.h
new file mode 100644
index 000000000000..a201b6e9ae44
--- /dev/null
+++ b/arch/powerpc/include/asm/elfnote.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * PowerPC ELF notes.
+ *
+ * Copyright 2019, IBM Corporation
+ */
+
+#ifndef __ASM_POWERPC_ELFNOTE_H__
+#define __ASM_POWERPC_ELFNOTE_H__
+
+/*
+ * These note types should live in a SHT_NOTE segment and have
+ * "PowerPC" in the name field.
+ */
+
+/*
+ * The capabilities supported/required by this kernel (bitmap).
+ *
+ * This type uses a bitmap as "desc" field. Each bit is described
+ * in arch/powerpc/kernel/note.S
+ */
+#define PPC_ELFNOTE_CAPABILITIES 1
+
+#endif /* __ASM_POWERPC_ELFNOTE_H__ */
diff --git a/arch/powerpc/include/asm/emulated_ops.h b/arch/powerpc/include/asm/emulated_ops.h
index f00e10e2a335..800cb21000cf 100644
--- a/arch/powerpc/include/asm/emulated_ops.h
+++ b/arch/powerpc/include/asm/emulated_ops.h
@@ -1,18 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright 2007 Sony Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.
- * If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _ASM_POWERPC_EMULATED_OPS_H
@@ -55,6 +43,10 @@ extern struct ppc_emulated {
struct ppc_emulated_entry mfdscr;
struct ppc_emulated_entry mtdscr;
struct ppc_emulated_entry lq_stq;
+ struct ppc_emulated_entry lxvw4x;
+ struct ppc_emulated_entry lxvh8x;
+ struct ppc_emulated_entry lxvd2x;
+ struct ppc_emulated_entry lxvb16x;
#endif
} ppc_emulated;
diff --git a/arch/powerpc/include/asm/epapr_hcalls.h b/arch/powerpc/include/asm/epapr_hcalls.h
index 334459ad145b..8fc5aaa4bbba 100644
--- a/arch/powerpc/include/asm/epapr_hcalls.h
+++ b/arch/powerpc/include/asm/epapr_hcalls.h
@@ -37,7 +37,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-/* A "hypercall" is an "sc 1" instruction. This header file file provides C
+/* A "hypercall" is an "sc 1" instruction. This header file provides C
* wrapper functions for the ePAPR hypervisor interface. It is inteded
* for use by Linux device drivers and other operating systems.
*
@@ -52,7 +52,7 @@
#include <uapi/asm/epapr_hcalls.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
#include <linux/errno.h>
#include <asm/byteorder.h>
@@ -65,7 +65,7 @@
* but the gcc inline assembly syntax does not allow us to specify registers
* on the clobber list that are also on the input/output list. Therefore,
* the lists of clobbered registers depends on the number of register
- * parmeters ("+r" and "=r") passed to the hypercall.
+ * parameters ("+r" and "=r") passed to the hypercall.
*
* Each assembly block should use one of the HCALL_CLOBBERSx macros. As a
* general rule, 'x' is the number of parameters passed to the assembly
@@ -246,7 +246,7 @@ static inline unsigned int ev_int_get_mask(unsigned int interrupt,
* ev_int_eoi - signal the end of interrupt processing
* @interrupt: the interrupt number
*
- * This function signals the end of processing for the the specified
+ * This function signals the end of processing for the specified
* interrupt, which must be the interrupt currently in service. By
* definition, this is also the highest-priority interrupt.
*
@@ -466,17 +466,17 @@ static inline unsigned long epapr_hypercall(unsigned long *in,
unsigned long *out,
unsigned long nr)
{
- unsigned long register r0 asm("r0");
- unsigned long register r3 asm("r3") = in[0];
- unsigned long register r4 asm("r4") = in[1];
- unsigned long register r5 asm("r5") = in[2];
- unsigned long register r6 asm("r6") = in[3];
- unsigned long register r7 asm("r7") = in[4];
- unsigned long register r8 asm("r8") = in[5];
- unsigned long register r9 asm("r9") = in[6];
- unsigned long register r10 asm("r10") = in[7];
- unsigned long register r11 asm("r11") = nr;
- unsigned long register r12 asm("r12");
+ register unsigned long r0 asm("r0");
+ register unsigned long r3 asm("r3") = in[0];
+ register unsigned long r4 asm("r4") = in[1];
+ register unsigned long r5 asm("r5") = in[2];
+ register unsigned long r6 asm("r6") = in[3];
+ register unsigned long r7 asm("r7") = in[4];
+ register unsigned long r8 asm("r8") = in[5];
+ register unsigned long r9 asm("r9") = in[6];
+ register unsigned long r10 asm("r10") = in[7];
+ register unsigned long r11 asm("r11") = nr;
+ register unsigned long r12 asm("r12");
asm volatile("bl epapr_hypercall_start"
: "=r"(r0), "=r"(r3), "=r"(r4), "=r"(r5), "=r"(r6),
@@ -508,7 +508,7 @@ static unsigned long epapr_hypercall(unsigned long *in,
static inline long epapr_hypercall0_1(unsigned int nr, unsigned long *r2)
{
- unsigned long in[8];
+ unsigned long in[8] = {0};
unsigned long out[8];
unsigned long r;
@@ -520,7 +520,7 @@ static inline long epapr_hypercall0_1(unsigned int nr, unsigned long *r2)
static inline long epapr_hypercall0(unsigned int nr)
{
- unsigned long in[8];
+ unsigned long in[8] = {0};
unsigned long out[8];
return epapr_hypercall(in, out, nr);
@@ -528,7 +528,7 @@ static inline long epapr_hypercall0(unsigned int nr)
static inline long epapr_hypercall1(unsigned int nr, unsigned long p1)
{
- unsigned long in[8];
+ unsigned long in[8] = {0};
unsigned long out[8];
in[0] = p1;
@@ -538,7 +538,7 @@ static inline long epapr_hypercall1(unsigned int nr, unsigned long p1)
static inline long epapr_hypercall2(unsigned int nr, unsigned long p1,
unsigned long p2)
{
- unsigned long in[8];
+ unsigned long in[8] = {0};
unsigned long out[8];
in[0] = p1;
@@ -549,7 +549,7 @@ static inline long epapr_hypercall2(unsigned int nr, unsigned long p1,
static inline long epapr_hypercall3(unsigned int nr, unsigned long p1,
unsigned long p2, unsigned long p3)
{
- unsigned long in[8];
+ unsigned long in[8] = {0};
unsigned long out[8];
in[0] = p1;
@@ -562,7 +562,7 @@ static inline long epapr_hypercall4(unsigned int nr, unsigned long p1,
unsigned long p2, unsigned long p3,
unsigned long p4)
{
- unsigned long in[8];
+ unsigned long in[8] = {0};
unsigned long out[8];
in[0] = p1;
@@ -571,5 +571,5 @@ static inline long epapr_hypercall4(unsigned int nr, unsigned long p1,
in[3] = p4;
return epapr_hypercall(in, out, nr);
}
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _EPAPR_HCALLS_H */
diff --git a/arch/powerpc/include/asm/exception-64e.h b/arch/powerpc/include/asm/exception-64e.h
index a8b52b61043f..1a83b1ff3578 100644
--- a/arch/powerpc/include/asm/exception-64e.h
+++ b/arch/powerpc/include/asm/exception-64e.h
@@ -1,12 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Definitions for use by exception code on Book3-E
*
* Copyright (C) 2008 Ben. Herrenschmidt (benh@kernel.crashing.org), IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#ifndef _ASM_POWERPC_EXCEPTION_64E_H
#define _ASM_POWERPC_EXCEPTION_64E_H
@@ -69,14 +65,8 @@
#define EX_TLB_ESR ( 9 * 8) /* Level 0 and 2 only */
#define EX_TLB_SRR0 (10 * 8)
#define EX_TLB_SRR1 (11 * 8)
-#ifdef CONFIG_BOOK3E_MMU_TLB_STATS
-#define EX_TLB_R8 (12 * 8)
-#define EX_TLB_R9 (13 * 8)
-#define EX_TLB_LR (14 * 8)
-#define EX_TLB_SIZE (15 * 8)
-#else
-#define EX_TLB_SIZE (12 * 8)
-#endif
+#define EX_TLB_R7 (12 * 8)
+#define EX_TLB_SIZE (13 * 8)
#define START_EXCEPTION(label) \
.globl exc_##label##_book3e; \
@@ -113,8 +103,7 @@ exc_##label##_book3e:
std r11,EX_TLB_R12(r12); \
mtspr SPRN_SPRG_TLB_EXFRAME,r14; \
std r15,EX_TLB_SRR1(r12); \
- std r16,EX_TLB_SRR0(r12); \
- TLB_MISS_PROLOG_STATS
+ std r16,EX_TLB_SRR0(r12);
/* And these are the matching epilogs that restores things
*
@@ -146,7 +135,6 @@ exc_##label##_book3e:
mtspr SPRN_SRR0,r15; \
ld r15,EX_TLB_R15(r12); \
mtspr SPRN_SRR1,r16; \
- TLB_MISS_RESTORE_STATS \
ld r16,EX_TLB_R16(r12); \
ld r12,EX_TLB_R12(r12); \
@@ -161,52 +149,24 @@ exc_##label##_book3e:
addi r11,r13,PACA_EXTLB; \
TLB_MISS_RESTORE(r11)
-#ifdef CONFIG_BOOK3E_MMU_TLB_STATS
-#define TLB_MISS_PROLOG_STATS \
- mflr r10; \
- std r8,EX_TLB_R8(r12); \
- std r9,EX_TLB_R9(r12); \
- std r10,EX_TLB_LR(r12);
-#define TLB_MISS_RESTORE_STATS \
- ld r16,EX_TLB_LR(r12); \
- ld r9,EX_TLB_R9(r12); \
- ld r8,EX_TLB_R8(r12); \
- mtlr r16;
-#define TLB_MISS_STATS_D(name) \
- addi r9,r13,MMSTAT_DSTATS+name; \
- bl tlb_stat_inc;
-#define TLB_MISS_STATS_I(name) \
- addi r9,r13,MMSTAT_ISTATS+name; \
- bl tlb_stat_inc;
-#define TLB_MISS_STATS_X(name) \
- ld r8,PACA_EXTLB+EX_TLB_ESR(r13); \
- cmpdi cr2,r8,-1; \
- beq cr2,61f; \
- addi r9,r13,MMSTAT_DSTATS+name; \
- b 62f; \
-61: addi r9,r13,MMSTAT_ISTATS+name; \
-62: bl tlb_stat_inc;
-#define TLB_MISS_STATS_SAVE_INFO \
- std r14,EX_TLB_ESR(r12); /* save ESR */
-#define TLB_MISS_STATS_SAVE_INFO_BOLTED \
- std r14,PACA_EXTLB+EX_TLB_ESR(r13); /* save ESR */
-#else
-#define TLB_MISS_PROLOG_STATS
-#define TLB_MISS_RESTORE_STATS
-#define TLB_MISS_PROLOG_STATS_BOLTED
-#define TLB_MISS_RESTORE_STATS_BOLTED
-#define TLB_MISS_STATS_D(name)
-#define TLB_MISS_STATS_I(name)
-#define TLB_MISS_STATS_X(name)
-#define TLB_MISS_STATS_Y(name)
-#define TLB_MISS_STATS_SAVE_INFO
-#define TLB_MISS_STATS_SAVE_INFO_BOLTED
+#ifndef __ASSEMBLER__
+extern unsigned int interrupt_base_book3e;
#endif
#define SET_IVOR(vector_number, vector_offset) \
- li r3,vector_offset@l; \
- ori r3,r3,interrupt_base_book3e@l; \
+ LOAD_REG_ADDR(r3,interrupt_base_book3e);\
+ ori r3,r3,vector_offset@l; \
mtspr SPRN_IVOR##vector_number,r3;
+/*
+ * powerpc relies on return from interrupt/syscall being context synchronising
+ * (which rfi is) to support ARCH_HAS_MEMBARRIER_SYNC_CORE without additional
+ * synchronisation instructions.
+ */
+#define RFI_TO_KERNEL \
+ rfi
+
+#define RFI_TO_USER \
+ rfi
#endif /* _ASM_POWERPC_EXCEPTION_64E_H */
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 77f52b26dad6..a9437e89f69f 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _ASM_POWERPC_EXCEPTION_H
#define _ASM_POWERPC_EXCEPTION_H
/*
@@ -18,11 +19,6 @@
*
* This file contains the low-level support and setup for the
* PowerPC-64 platform, including trap and interrupt dispatch.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
/*
* The following macros define the code that appears as
@@ -34,545 +30,149 @@
* exception handlers (including pSeries LPAR) and iSeries LPAR
* implementations as possible.
*/
+#include <asm/feature-fixups.h>
+
+/* PACA save area size in u64 units (exgen, exmc, etc) */
+#define EX_SIZE 10
+/* PACA save area offsets */
#define EX_R9 0
#define EX_R10 8
#define EX_R11 16
#define EX_R12 24
#define EX_R13 32
-#define EX_SRR0 40
-#define EX_DAR 48
-#define EX_DSISR 56
-#define EX_CCR 60
-#define EX_R3 64
-#define EX_LR 72
-#define EX_CFAR 80
-#define EX_PPR 88 /* SMT thread status register (priority) */
-#define EX_CTR 96
-
-#ifdef CONFIG_RELOCATABLE
-#define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \
- ld r12,PACAKBASE(r13); /* get high part of &label */ \
- mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \
- LOAD_HANDLER(r12,label); \
- mtctr r12; \
- mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \
- li r10,MSR_RI; \
- mtmsrd r10,1; /* Set RI (EE=0) */ \
- bctr;
-#else
-/* If not relocatable, we can jump directly -- and save messing with LR */
-#define __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \
- mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \
- mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \
- li r10,MSR_RI; \
- mtmsrd r10,1; /* Set RI (EE=0) */ \
- b label;
-#endif
-#define EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \
- __EXCEPTION_RELON_PROLOG_PSERIES_1(label, h) \
+#define EX_DAR 40
+#define EX_DSISR 48
+#define EX_CCR 52
+#define EX_CFAR 56
+#define EX_PPR 64
+#define EX_CTR 72
/*
- * As EXCEPTION_PROLOG_PSERIES(), except we've already got relocation on
- * so no need to rfid. Save lr in case we're CONFIG_RELOCATABLE, in which
- * case EXCEPTION_RELON_PROLOG_PSERIES_1 will be using lr.
+ * maximum recursive depth of MCE exceptions
*/
-#define EXCEPTION_RELON_PROLOG_PSERIES(area, label, h, extra, vec) \
- EXCEPTION_PROLOG_0(area); \
- EXCEPTION_PROLOG_1(area, extra, vec); \
- EXCEPTION_RELON_PROLOG_PSERIES_1(label, h)
+#define MAX_MCE_DEPTH 4
-/*
- * We're short on space and time in the exception prolog, so we can't
- * use the normal SET_REG_IMMEDIATE macro. Normally we just need the
- * low halfword of the address, but for Kdump we need the whole low
- * word.
- */
-#define LOAD_HANDLER(reg, label) \
- /* Handlers must be within 64K of kbase, which must be 64k aligned */ \
- ori reg,reg,(label)-_stext; /* virt addr of handler ... */
+#ifdef __ASSEMBLER__
-/* Exception register prefixes */
-#define EXC_HV H
-#define EXC_STD
+#define STF_ENTRY_BARRIER_SLOT \
+ STF_ENTRY_BARRIER_FIXUP_SECTION; \
+ nop; \
+ nop; \
+ nop
-#if defined(CONFIG_RELOCATABLE)
-/*
- * If we support interrupts with relocation on AND we're a relocatable kernel,
- * we need to use CTR to get to the 2nd level handler. So, save/restore it
- * when required.
- */
-#define SAVE_CTR(reg, area) mfctr reg ; std reg,area+EX_CTR(r13)
-#define GET_CTR(reg, area) ld reg,area+EX_CTR(r13)
-#define RESTORE_CTR(reg, area) ld reg,area+EX_CTR(r13) ; mtctr reg
-#else
-/* ...else CTR is unused and in register. */
-#define SAVE_CTR(reg, area)
-#define GET_CTR(reg, area) mfctr reg
-#define RESTORE_CTR(reg, area)
-#endif
+#define STF_EXIT_BARRIER_SLOT \
+ STF_EXIT_BARRIER_FIXUP_SECTION; \
+ nop; \
+ nop; \
+ nop; \
+ nop; \
+ nop; \
+ nop
-/*
- * PPR save/restore macros used in exceptions_64s.S
- * Used for P7 or later processors
- */
-#define SAVE_PPR(area, ra, rb) \
-BEGIN_FTR_SECTION_NESTED(940) \
- ld ra,PACACURRENT(r13); \
- ld rb,area+EX_PPR(r13); /* Read PPR from paca */ \
- std rb,TASKTHREADPPR(ra); \
-END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,940)
+#define ENTRY_FLUSH_SLOT \
+ ENTRY_FLUSH_FIXUP_SECTION; \
+ nop; \
+ nop; \
+ nop;
-#define RESTORE_PPR_PACA(area, ra) \
-BEGIN_FTR_SECTION_NESTED(941) \
- ld ra,area+EX_PPR(r13); \
- mtspr SPRN_PPR,ra; \
-END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,941)
+#define SCV_ENTRY_FLUSH_SLOT \
+ SCV_ENTRY_FLUSH_FIXUP_SECTION; \
+ nop; \
+ nop; \
+ nop;
/*
- * Increase the priority on systems where PPR save/restore is not
- * implemented/ supported.
+ * r10 must be free to use, r13 must be paca
*/
-#define HMT_MEDIUM_PPR_DISCARD \
-BEGIN_FTR_SECTION_NESTED(942) \
- HMT_MEDIUM; \
-END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,0,942) /*non P7*/
-
-/*
- * Get an SPR into a register if the CPU has the given feature
- */
-#define OPT_GET_SPR(ra, spr, ftr) \
-BEGIN_FTR_SECTION_NESTED(943) \
- mfspr ra,spr; \
-END_FTR_SECTION_NESTED(ftr,ftr,943)
-
-/*
- * Set an SPR from a register if the CPU has the given feature
- */
-#define OPT_SET_SPR(ra, spr, ftr) \
-BEGIN_FTR_SECTION_NESTED(943) \
- mtspr spr,ra; \
-END_FTR_SECTION_NESTED(ftr,ftr,943)
-
-/*
- * Save a register to the PACA if the CPU has the given feature
- */
-#define OPT_SAVE_REG_TO_PACA(offset, ra, ftr) \
-BEGIN_FTR_SECTION_NESTED(943) \
- std ra,offset(r13); \
-END_FTR_SECTION_NESTED(ftr,ftr,943)
-
-#define EXCEPTION_PROLOG_0(area) \
- GET_PACA(r13); \
- std r9,area+EX_R9(r13); /* save r9 */ \
- OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR); \
- HMT_MEDIUM; \
- std r10,area+EX_R10(r13); /* save r10 - r12 */ \
- OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR)
+#define INTERRUPT_TO_KERNEL \
+ STF_ENTRY_BARRIER_SLOT; \
+ ENTRY_FLUSH_SLOT
-#define __EXCEPTION_PROLOG_1(area, extra, vec) \
- OPT_SAVE_REG_TO_PACA(area+EX_PPR, r9, CPU_FTR_HAS_PPR); \
- OPT_SAVE_REG_TO_PACA(area+EX_CFAR, r10, CPU_FTR_CFAR); \
- SAVE_CTR(r10, area); \
- mfcr r9; \
- extra(vec); \
- std r11,area+EX_R11(r13); \
- std r12,area+EX_R12(r13); \
- GET_SCRATCH0(r10); \
- std r10,area+EX_R13(r13)
-#define EXCEPTION_PROLOG_1(area, extra, vec) \
- __EXCEPTION_PROLOG_1(area, extra, vec)
-
-#define __EXCEPTION_PROLOG_PSERIES_1(label, h) \
- ld r12,PACAKBASE(r13); /* get high part of &label */ \
- ld r10,PACAKMSR(r13); /* get MSR value for kernel */ \
- mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \
- LOAD_HANDLER(r12,label) \
- mtspr SPRN_##h##SRR0,r12; \
- mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \
- mtspr SPRN_##h##SRR1,r10; \
- h##rfid; \
- b . /* prevent speculative execution */
-#define EXCEPTION_PROLOG_PSERIES_1(label, h) \
- __EXCEPTION_PROLOG_PSERIES_1(label, h)
-
-#define EXCEPTION_PROLOG_PSERIES(area, label, h, extra, vec) \
- EXCEPTION_PROLOG_0(area); \
- EXCEPTION_PROLOG_1(area, extra, vec); \
- EXCEPTION_PROLOG_PSERIES_1(label, h);
-
-#define __KVMTEST(n) \
- lbz r10,HSTATE_IN_GUEST(r13); \
- cmpwi r10,0; \
- bne do_kvm_##n
-
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
/*
- * If hv is possible, interrupts come into to the hv version
- * of the kvmppc_interrupt code, which then jumps to the PR handler,
- * kvmppc_interrupt_pr, if the guest is a PR guest.
+ * r10, ctr must be free to use, r13 must be paca
*/
-#define kvmppc_interrupt kvmppc_interrupt_hv
-#else
-#define kvmppc_interrupt kvmppc_interrupt_pr
-#endif
-
-#define __KVM_HANDLER(area, h, n) \
-do_kvm_##n: \
- BEGIN_FTR_SECTION_NESTED(947) \
- ld r10,area+EX_CFAR(r13); \
- std r10,HSTATE_CFAR(r13); \
- END_FTR_SECTION_NESTED(CPU_FTR_CFAR,CPU_FTR_CFAR,947); \
- BEGIN_FTR_SECTION_NESTED(948) \
- ld r10,area+EX_PPR(r13); \
- std r10,HSTATE_PPR(r13); \
- END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948); \
- ld r10,area+EX_R10(r13); \
- stw r9,HSTATE_SCRATCH1(r13); \
- ld r9,area+EX_R9(r13); \
- std r12,HSTATE_SCRATCH0(r13); \
- li r12,n; \
- b kvmppc_interrupt
-
-#define __KVM_HANDLER_SKIP(area, h, n) \
-do_kvm_##n: \
- cmpwi r10,KVM_GUEST_MODE_SKIP; \
- ld r10,area+EX_R10(r13); \
- beq 89f; \
- stw r9,HSTATE_SCRATCH1(r13); \
- BEGIN_FTR_SECTION_NESTED(948) \
- ld r9,area+EX_PPR(r13); \
- std r9,HSTATE_PPR(r13); \
- END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948); \
- ld r9,area+EX_R9(r13); \
- std r12,HSTATE_SCRATCH0(r13); \
- li r12,n; \
- b kvmppc_interrupt; \
-89: mtocrf 0x80,r9; \
- ld r9,area+EX_R9(r13); \
- b kvmppc_skip_##h##interrupt
-
-#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-#define KVMTEST(n) __KVMTEST(n)
-#define KVM_HANDLER(area, h, n) __KVM_HANDLER(area, h, n)
-#define KVM_HANDLER_SKIP(area, h, n) __KVM_HANDLER_SKIP(area, h, n)
-
-#else
-#define KVMTEST(n)
-#define KVM_HANDLER(area, h, n)
-#define KVM_HANDLER_SKIP(area, h, n)
-#endif
-
-#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
-#define KVMTEST_PR(n) __KVMTEST(n)
-#define KVM_HANDLER_PR(area, h, n) __KVM_HANDLER(area, h, n)
-#define KVM_HANDLER_PR_SKIP(area, h, n) __KVM_HANDLER_SKIP(area, h, n)
-
-#else
-#define KVMTEST_PR(n)
-#define KVM_HANDLER_PR(area, h, n)
-#define KVM_HANDLER_PR_SKIP(area, h, n)
-#endif
-
-#define NOTEST(n)
+#define SCV_INTERRUPT_TO_KERNEL \
+ STF_ENTRY_BARRIER_SLOT; \
+ SCV_ENTRY_FLUSH_SLOT
/*
- * The common exception prolog is used for all except a few exceptions
- * such as a segment miss on a kernel address. We have to be prepared
- * to take another exception from the point where we first touch the
- * kernel stack onwards.
+ * Macros for annotating the expected destination of (h)rfid
*
- * On entry r13 points to the paca, r9-r13 are saved in the paca,
- * r9 contains the saved CR, r11 and r12 contain the saved SRR0 and
- * SRR1, and relocation is on.
- */
-#define EXCEPTION_PROLOG_COMMON(n, area) \
- andi. r10,r12,MSR_PR; /* See if coming from user */ \
- mr r10,r1; /* Save r1 */ \
- subi r1,r1,INT_FRAME_SIZE; /* alloc frame on kernel stack */ \
- beq- 1f; \
- ld r1,PACAKSAVE(r13); /* kernel stack to use */ \
-1: cmpdi cr1,r1,-INT_FRAME_SIZE; /* check if r1 is in userspace */ \
- blt+ cr1,3f; /* abort if it is */ \
- li r1,(n); /* will be reloaded later */ \
- sth r1,PACA_TRAP_SAVE(r13); \
- std r3,area+EX_R3(r13); \
- addi r3,r13,area; /* r3 -> where regs are saved*/ \
- RESTORE_CTR(r1, area); \
- b bad_stack; \
-3: std r9,_CCR(r1); /* save CR in stackframe */ \
- std r11,_NIP(r1); /* save SRR0 in stackframe */ \
- std r12,_MSR(r1); /* save SRR1 in stackframe */ \
- std r10,0(r1); /* make stack chain pointer */ \
- std r0,GPR0(r1); /* save r0 in stackframe */ \
- std r10,GPR1(r1); /* save r1 in stackframe */ \
- beq 4f; /* if from kernel mode */ \
- ACCOUNT_CPU_USER_ENTRY(r9, r10); \
- SAVE_PPR(area, r9, r10); \
-4: EXCEPTION_PROLOG_COMMON_2(area) \
- EXCEPTION_PROLOG_COMMON_3(n) \
- ACCOUNT_STOLEN_TIME
-
-/* Save original regs values from save area to stack frame. */
-#define EXCEPTION_PROLOG_COMMON_2(area) \
- ld r9,area+EX_R9(r13); /* move r9, r10 to stackframe */ \
- ld r10,area+EX_R10(r13); \
- std r9,GPR9(r1); \
- std r10,GPR10(r1); \
- ld r9,area+EX_R11(r13); /* move r11 - r13 to stackframe */ \
- ld r10,area+EX_R12(r13); \
- ld r11,area+EX_R13(r13); \
- std r9,GPR11(r1); \
- std r10,GPR12(r1); \
- std r11,GPR13(r1); \
- BEGIN_FTR_SECTION_NESTED(66); \
- ld r10,area+EX_CFAR(r13); \
- std r10,ORIG_GPR3(r1); \
- END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66); \
- GET_CTR(r10, area); \
- std r10,_CTR(r1);
-
-#define EXCEPTION_PROLOG_COMMON_3(n) \
- std r2,GPR2(r1); /* save r2 in stackframe */ \
- SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \
- SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \
- mflr r9; /* Get LR, later save to stack */ \
- ld r2,PACATOC(r13); /* get kernel TOC into r2 */ \
- std r9,_LINK(r1); \
- lbz r10,PACASOFTIRQEN(r13); \
- mfspr r11,SPRN_XER; /* save XER in stackframe */ \
- std r10,SOFTE(r1); \
- std r11,_XER(r1); \
- li r9,(n)+1; \
- std r9,_TRAP(r1); /* set trap number */ \
- li r10,0; \
- ld r11,exception_marker@toc(r2); \
- std r10,RESULT(r1); /* clear regs->result */ \
- std r11,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */
-
-/*
- * Exception vectors.
- */
-#define STD_EXCEPTION_PSERIES(loc, vec, label) \
- . = loc; \
- .globl label##_pSeries; \
-label##_pSeries: \
- HMT_MEDIUM_PPR_DISCARD; \
- SET_SCRATCH0(r13); /* save r13 */ \
- EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common, \
- EXC_STD, KVMTEST_PR, vec)
-
-/* Version of above for when we have to branch out-of-line */
-#define STD_EXCEPTION_PSERIES_OOL(vec, label) \
- .globl label##_pSeries; \
-label##_pSeries: \
- EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_PR, vec); \
- EXCEPTION_PROLOG_PSERIES_1(label##_common, EXC_STD)
-
-#define STD_EXCEPTION_HV(loc, vec, label) \
- . = loc; \
- .globl label##_hv; \
-label##_hv: \
- HMT_MEDIUM_PPR_DISCARD; \
- SET_SCRATCH0(r13); /* save r13 */ \
- EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common, \
- EXC_HV, KVMTEST, vec)
-
-/* Version of above for when we have to branch out-of-line */
-#define STD_EXCEPTION_HV_OOL(vec, label) \
- .globl label##_hv; \
-label##_hv: \
- EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST, vec); \
- EXCEPTION_PROLOG_PSERIES_1(label##_common, EXC_HV)
-
-#define STD_RELON_EXCEPTION_PSERIES(loc, vec, label) \
- . = loc; \
- .globl label##_relon_pSeries; \
-label##_relon_pSeries: \
- HMT_MEDIUM_PPR_DISCARD; \
- /* No guest interrupts come through here */ \
- SET_SCRATCH0(r13); /* save r13 */ \
- EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label##_common, \
- EXC_STD, NOTEST, vec)
-
-#define STD_RELON_EXCEPTION_PSERIES_OOL(vec, label) \
- .globl label##_relon_pSeries; \
-label##_relon_pSeries: \
- EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, vec); \
- EXCEPTION_RELON_PROLOG_PSERIES_1(label##_common, EXC_STD)
-
-#define STD_RELON_EXCEPTION_HV(loc, vec, label) \
- . = loc; \
- .globl label##_relon_hv; \
-label##_relon_hv: \
- HMT_MEDIUM_PPR_DISCARD; \
- /* No guest interrupts come through here */ \
- SET_SCRATCH0(r13); /* save r13 */ \
- EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label##_common, \
- EXC_HV, NOTEST, vec)
-
-#define STD_RELON_EXCEPTION_HV_OOL(vec, label) \
- .globl label##_relon_hv; \
-label##_relon_hv: \
- EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, vec); \
- EXCEPTION_RELON_PROLOG_PSERIES_1(label##_common, EXC_HV)
-
-/* This associate vector numbers with bits in paca->irq_happened */
-#define SOFTEN_VALUE_0x500 PACA_IRQ_EE
-#define SOFTEN_VALUE_0x502 PACA_IRQ_EE
-#define SOFTEN_VALUE_0x900 PACA_IRQ_DEC
-#define SOFTEN_VALUE_0x982 PACA_IRQ_DEC
-#define SOFTEN_VALUE_0xa00 PACA_IRQ_DBELL
-#define SOFTEN_VALUE_0xe80 PACA_IRQ_DBELL
-#define SOFTEN_VALUE_0xe82 PACA_IRQ_DBELL
-#define SOFTEN_VALUE_0xe60 PACA_IRQ_HMI
-#define SOFTEN_VALUE_0xe62 PACA_IRQ_HMI
-
-#define __SOFTEN_TEST(h, vec) \
- lbz r10,PACASOFTIRQEN(r13); \
- cmpwi r10,0; \
- li r10,SOFTEN_VALUE_##vec; \
- beq masked_##h##interrupt
-#define _SOFTEN_TEST(h, vec) __SOFTEN_TEST(h, vec)
-
-#define SOFTEN_TEST_PR(vec) \
- KVMTEST_PR(vec); \
- _SOFTEN_TEST(EXC_STD, vec)
-
-#define SOFTEN_TEST_HV(vec) \
- KVMTEST(vec); \
- _SOFTEN_TEST(EXC_HV, vec)
-
-#define SOFTEN_TEST_HV_201(vec) \
- KVMTEST(vec); \
- _SOFTEN_TEST(EXC_STD, vec)
-
-#define SOFTEN_NOTEST_PR(vec) _SOFTEN_TEST(EXC_STD, vec)
-#define SOFTEN_NOTEST_HV(vec) _SOFTEN_TEST(EXC_HV, vec)
-
-#define __MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra) \
- SET_SCRATCH0(r13); /* save r13 */ \
- EXCEPTION_PROLOG_0(PACA_EXGEN); \
- __EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec); \
- EXCEPTION_PROLOG_PSERIES_1(label##_common, h);
-
-#define _MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra) \
- __MASKABLE_EXCEPTION_PSERIES(vec, label, h, extra)
-
-#define MASKABLE_EXCEPTION_PSERIES(loc, vec, label) \
- . = loc; \
- .globl label##_pSeries; \
-label##_pSeries: \
- HMT_MEDIUM_PPR_DISCARD; \
- _MASKABLE_EXCEPTION_PSERIES(vec, label, \
- EXC_STD, SOFTEN_TEST_PR)
-
-#define MASKABLE_EXCEPTION_HV(loc, vec, label) \
- . = loc; \
- .globl label##_hv; \
-label##_hv: \
- _MASKABLE_EXCEPTION_PSERIES(vec, label, \
- EXC_HV, SOFTEN_TEST_HV)
-
-#define MASKABLE_EXCEPTION_HV_OOL(vec, label) \
- .globl label##_hv; \
-label##_hv: \
- EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec); \
- EXCEPTION_PROLOG_PSERIES_1(label##_common, EXC_HV);
-
-#define __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra) \
- HMT_MEDIUM_PPR_DISCARD; \
- SET_SCRATCH0(r13); /* save r13 */ \
- EXCEPTION_PROLOG_0(PACA_EXGEN); \
- __EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec); \
- EXCEPTION_RELON_PROLOG_PSERIES_1(label##_common, h);
-#define _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra) \
- __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra)
-
-#define MASKABLE_RELON_EXCEPTION_PSERIES(loc, vec, label) \
- . = loc; \
- .globl label##_relon_pSeries; \
-label##_relon_pSeries: \
- _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, \
- EXC_STD, SOFTEN_NOTEST_PR)
-
-#define MASKABLE_RELON_EXCEPTION_HV(loc, vec, label) \
- . = loc; \
- .globl label##_relon_hv; \
-label##_relon_hv: \
- _MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, \
- EXC_HV, SOFTEN_NOTEST_HV)
-
-#define MASKABLE_RELON_EXCEPTION_HV_OOL(vec, label) \
- .globl label##_relon_hv; \
-label##_relon_hv: \
- EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_HV, vec); \
- EXCEPTION_PROLOG_PSERIES_1(label##_common, EXC_HV);
-
-/*
- * Our exception common code can be passed various "additions"
- * to specify the behaviour of interrupts, whether to kick the
- * runlatch, etc...
- */
-
-/*
- * This addition reconciles our actual IRQ state with the various software
- * flags that track it. This may call C code.
- */
-#define ADD_RECONCILE RECONCILE_IRQ_STATE(r10,r11)
-
-#define ADD_NVGPRS \
- bl save_nvgprs
-
-#define RUNLATCH_ON \
-BEGIN_FTR_SECTION \
- CURRENT_THREAD_INFO(r3, r1); \
- ld r4,TI_LOCAL_FLAGS(r3); \
- andi. r0,r4,_TLF_RUNLATCH; \
- beql ppc64_runlatch_on_trampoline; \
-END_FTR_SECTION_IFSET(CPU_FTR_CTRL)
-
-#define EXCEPTION_COMMON(trap, label, hdlr, ret, additions) \
- .align 7; \
- .globl label##_common; \
-label##_common: \
- EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN); \
- /* Volatile regs are potentially clobbered here */ \
- additions; \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- bl hdlr; \
- b ret
-
-#define STD_EXCEPTION_COMMON(trap, label, hdlr) \
- EXCEPTION_COMMON(trap, label, hdlr, ret_from_except, \
- ADD_NVGPRS;ADD_RECONCILE)
-
-/*
- * Like STD_EXCEPTION_COMMON, but for exceptions that can occur
- * in the idle task and therefore need the special idle handling
- * (finish nap and runlatch)
- */
-#define STD_EXCEPTION_COMMON_ASYNC(trap, label, hdlr) \
- EXCEPTION_COMMON(trap, label, hdlr, ret_from_except_lite, \
- FINISH_NAP;ADD_RECONCILE;RUNLATCH_ON)
-
-/*
- * When the idle code in power4_idle puts the CPU into NAP mode,
- * it has to do so in a loop, and relies on the external interrupt
- * and decrementer interrupt entry code to get it out of the loop.
- * It sets the _TLF_NAPPING bit in current_thread_info()->local_flags
- * to signal that it is in the loop and needs help to get out.
- */
-#ifdef CONFIG_PPC_970_NAP
-#define FINISH_NAP \
-BEGIN_FTR_SECTION \
- CURRENT_THREAD_INFO(r11, r1); \
- ld r9,TI_LOCAL_FLAGS(r11); \
- andi. r10,r9,_TLF_NAPPING; \
- bnel power4_fixup_nap; \
-END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
-#else
-#define FINISH_NAP
-#endif
+ * The nop instructions allow us to insert one or more instructions to flush the
+ * L1-D cache when returning to userspace or a guest.
+ *
+ * powerpc relies on return from interrupt/syscall being context synchronising
+ * (which hrfid, rfid, and rfscv are) to support ARCH_HAS_MEMBARRIER_SYNC_CORE
+ * without additional synchronisation instructions.
+ *
+ * soft-masked interrupt replay does not include a context-synchronising rfid,
+ * but those always return to kernel, the sync is only required when returning
+ * to user.
+ */
+#define RFI_FLUSH_SLOT \
+ RFI_FLUSH_FIXUP_SECTION; \
+ nop; \
+ nop; \
+ nop
+
+#define RFI_TO_KERNEL \
+ rfid
+
+#define RFI_TO_USER \
+ STF_EXIT_BARRIER_SLOT; \
+ RFI_FLUSH_SLOT; \
+ rfid; \
+ b rfi_flush_fallback
+
+#define RFI_TO_USER_OR_KERNEL \
+ STF_EXIT_BARRIER_SLOT; \
+ RFI_FLUSH_SLOT; \
+ rfid; \
+ b rfi_flush_fallback
+
+#define RFI_TO_GUEST \
+ STF_EXIT_BARRIER_SLOT; \
+ RFI_FLUSH_SLOT; \
+ rfid; \
+ b rfi_flush_fallback
+
+#define HRFI_TO_KERNEL \
+ hrfid
+
+#define HRFI_TO_USER \
+ STF_EXIT_BARRIER_SLOT; \
+ RFI_FLUSH_SLOT; \
+ hrfid; \
+ b hrfi_flush_fallback
+
+#define HRFI_TO_USER_OR_KERNEL \
+ STF_EXIT_BARRIER_SLOT; \
+ RFI_FLUSH_SLOT; \
+ hrfid; \
+ b hrfi_flush_fallback
+
+#define HRFI_TO_GUEST \
+ STF_EXIT_BARRIER_SLOT; \
+ RFI_FLUSH_SLOT; \
+ hrfid; \
+ b hrfi_flush_fallback
+
+#define HRFI_TO_UNKNOWN \
+ STF_EXIT_BARRIER_SLOT; \
+ RFI_FLUSH_SLOT; \
+ hrfid; \
+ b hrfi_flush_fallback
+
+#define RFSCV_TO_USER \
+ STF_EXIT_BARRIER_SLOT; \
+ RFI_FLUSH_SLOT; \
+ RFSCV; \
+ b rfscv_flush_fallback
+
+#else /* __ASSEMBLER__ */
+/* Prototype for function defined in exceptions-64s.S */
+void do_uaccess_flush(void);
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_POWERPC_EXCEPTION_H */
diff --git a/arch/powerpc/include/asm/exec.h b/arch/powerpc/include/asm/exec.h
index 8196e9c7d7e8..92cac4851275 100644
--- a/arch/powerpc/include/asm/exec.h
+++ b/arch/powerpc/include/asm/exec.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
*/
diff --git a/arch/powerpc/include/asm/extable.h b/arch/powerpc/include/asm/extable.h
new file mode 100644
index 000000000000..d483a9c24ba9
--- /dev/null
+++ b/arch/powerpc/include/asm/extable.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ARCH_POWERPC_EXTABLE_H
+#define _ARCH_POWERPC_EXTABLE_H
+
+/*
+ * The exception table consists of pairs of relative addresses: the first is
+ * the address of an instruction that is allowed to fault, and the second is
+ * the address at which the program should continue. No registers are
+ * modified, so it is entirely up to the continuation code to figure out what
+ * to do.
+ *
+ * All the routines below use bits of fixup code that are out of line with the
+ * main instruction path. This means when everything is well, we don't even
+ * have to jump over them. Further, they do not intrude on our cache or tlb
+ * entries.
+ */
+
+#define ARCH_HAS_RELATIVE_EXTABLE
+
+#ifndef __ASSEMBLER__
+
+struct exception_table_entry {
+ int insn;
+ int fixup;
+};
+
+static inline unsigned long extable_fixup(const struct exception_table_entry *x)
+{
+ return (unsigned long)&x->fixup + x->fixup;
+}
+
+#endif
+
+/*
+ * Helper macro for exception table entries
+ */
+#define EX_TABLE(_fault, _target) \
+ stringify_in_c(.section __ex_table,"a";)\
+ stringify_in_c(.balign 4;) \
+ stringify_in_c(.long (_fault) - . ;) \
+ stringify_in_c(.long (_target) - . ;) \
+ stringify_in_c(.previous)
+
+#endif
diff --git a/arch/powerpc/include/asm/fadump-internal.h b/arch/powerpc/include/asm/fadump-internal.h
new file mode 100644
index 000000000000..e83869a4eb6a
--- /dev/null
+++ b/arch/powerpc/include/asm/fadump-internal.h
@@ -0,0 +1,196 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Firmware-Assisted Dump internal code.
+ *
+ * Copyright 2011, Mahesh Salgaonkar, IBM Corporation.
+ * Copyright 2019, Hari Bathini, IBM Corporation.
+ */
+
+#ifndef _ASM_POWERPC_FADUMP_INTERNAL_H
+#define _ASM_POWERPC_FADUMP_INTERNAL_H
+
+/* Maximum number of memory regions kernel supports */
+#define FADUMP_MAX_MEM_REGS 128
+
+#ifndef CONFIG_PRESERVE_FA_DUMP
+
+/* The upper limit percentage for user specified boot memory size (25%) */
+#define MAX_BOOT_MEM_RATIO 4
+
+#define memblock_num_regions(memblock_type) (memblock.memblock_type.cnt)
+
+/* FAD commands */
+#define FADUMP_REGISTER 1
+#define FADUMP_UNREGISTER 2
+#define FADUMP_INVALIDATE 3
+
+/*
+ * Copy the ascii values for first 8 characters from a string into u64
+ * variable at their respective indexes.
+ * e.g.
+ * The string "FADMPINF" will be converted into 0x4641444d50494e46
+ */
+static inline u64 fadump_str_to_u64(const char *str)
+{
+ u64 val = 0;
+ int i;
+
+ for (i = 0; i < sizeof(val); i++)
+ val = (*str) ? (val << 8) | *str++ : val << 8;
+ return val;
+}
+
+#define FADUMP_CPU_UNKNOWN (~((u32)0))
+
+/*
+ * The introduction of new fields in the fadump crash info header has
+ * led to a change in the magic key from `FADMPINF` to `FADMPSIG` for
+ * identifying a kernel crash from an old kernel.
+ *
+ * To prevent the need for further changes to the magic number in the
+ * event of future modifications to the fadump crash info header, a
+ * version field has been introduced to track the fadump crash info
+ * header version.
+ *
+ * Consider a few points before adding new members to the fadump crash info
+ * header structure:
+ *
+ * - Append new members; avoid adding them in between.
+ * - Non-primitive members should have a size member as well.
+ * - For every change in the fadump header, increment the
+ * fadump header version. This helps the updated kernel decide how to
+ * handle kernel dumps from older kernels.
+ */
+#define FADUMP_CRASH_INFO_MAGIC_OLD fadump_str_to_u64("FADMPINF")
+#define FADUMP_CRASH_INFO_MAGIC fadump_str_to_u64("FADMPSIG")
+#define FADUMP_HEADER_VERSION 1
+
+/* fadump crash info structure */
+struct fadump_crash_info_header {
+ u64 magic_number;
+ u32 version;
+ u32 crashing_cpu;
+ u64 vmcoreinfo_raddr;
+ u64 vmcoreinfo_size;
+ u32 pt_regs_sz;
+ u32 cpu_mask_sz;
+ struct pt_regs regs;
+ struct cpumask cpu_mask;
+};
+
+struct fadump_memory_range {
+ u64 base;
+ u64 size;
+};
+
+/* fadump memory ranges info */
+#define RNG_NAME_SZ 16
+struct fadump_mrange_info {
+ char name[RNG_NAME_SZ];
+ struct fadump_memory_range *mem_ranges;
+ u32 mem_ranges_sz;
+ u32 mem_range_cnt;
+ u32 max_mem_ranges;
+ bool is_static;
+};
+
+/* Platform specific callback functions */
+struct fadump_ops;
+
+/* Firmware-assisted dump configuration details. */
+struct fw_dump {
+ unsigned long reserve_dump_area_start;
+ unsigned long reserve_dump_area_size;
+ /* cmd line option during boot */
+ unsigned long reserve_bootvar;
+
+ unsigned long cpu_state_data_size;
+ u64 cpu_state_dest_vaddr;
+ u32 cpu_state_data_version;
+ u32 cpu_state_entry_size;
+
+ unsigned long hpte_region_size;
+
+ unsigned long boot_memory_size;
+ u64 boot_mem_dest_addr;
+ u64 boot_mem_addr[FADUMP_MAX_MEM_REGS];
+ u64 boot_mem_sz[FADUMP_MAX_MEM_REGS];
+ u64 boot_mem_top;
+ u64 boot_mem_regs_cnt;
+
+ unsigned long fadumphdr_addr;
+ u64 elfcorehdr_addr;
+ u64 elfcorehdr_size;
+ unsigned long cpu_notes_buf_vaddr;
+ unsigned long cpu_notes_buf_size;
+
+ unsigned long param_area;
+
+ /*
+ * Maximum size supported by firmware to copy from source to
+ * destination address per entry.
+ */
+ u64 max_copy_size;
+ u64 kernel_metadata;
+
+ int ibm_configure_kernel_dump;
+
+ unsigned long fadump_enabled:1;
+ unsigned long fadump_supported:1;
+ unsigned long dump_active:1;
+ unsigned long dump_registered:1;
+ unsigned long nocma:1;
+ unsigned long param_area_supported:1;
+
+ struct fadump_ops *ops;
+};
+
+struct fadump_ops {
+ u64 (*fadump_init_mem_struct)(struct fw_dump *fadump_conf);
+ u64 (*fadump_get_metadata_size)(void);
+ int (*fadump_setup_metadata)(struct fw_dump *fadump_conf);
+ u64 (*fadump_get_bootmem_min)(void);
+ int (*fadump_register)(struct fw_dump *fadump_conf);
+ int (*fadump_unregister)(struct fw_dump *fadump_conf);
+ int (*fadump_invalidate)(struct fw_dump *fadump_conf);
+ void (*fadump_cleanup)(struct fw_dump *fadump_conf);
+ int (*fadump_process)(struct fw_dump *fadump_conf);
+ void (*fadump_region_show)(struct fw_dump *fadump_conf,
+ struct seq_file *m);
+ void (*fadump_trigger)(struct fadump_crash_info_header *fdh,
+ const char *msg);
+ int (*fadump_max_boot_mem_rgns)(void);
+};
+
+/* Helper functions */
+s32 __init fadump_setup_cpu_notes_buf(u32 num_cpus);
+void fadump_free_cpu_notes_buf(void);
+u32 *__init fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs);
+void __init fadump_update_elfcore_header(char *bufp);
+bool is_fadump_reserved_mem_contiguous(void);
+
+#else /* !CONFIG_PRESERVE_FA_DUMP */
+
+/* Firmware-assisted dump configuration details. */
+struct fw_dump {
+ u64 boot_mem_top;
+ u64 dump_active;
+};
+
+#endif /* CONFIG_PRESERVE_FA_DUMP */
+
+#ifdef CONFIG_PPC_PSERIES
+extern void rtas_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node);
+#else
+static inline void
+rtas_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) { }
+#endif
+
+#ifdef CONFIG_PPC_POWERNV
+extern void opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node);
+#else
+static inline void
+opal_fadump_dt_scan(struct fw_dump *fadump_conf, u64 node) { }
+#endif
+
+#endif /* _ASM_POWERPC_FADUMP_INTERNAL_H */
diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h
index a6774560afe3..a48f54dde4f6 100644
--- a/arch/powerpc/include/asm/fadump.h
+++ b/arch/powerpc/include/asm/fadump.h
@@ -1,217 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Firmware Assisted dump header file.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
* Copyright 2011 IBM Corporation
* Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
*/
-#ifndef __PPC64_FA_DUMP_H__
-#define __PPC64_FA_DUMP_H__
+#ifndef _ASM_POWERPC_FADUMP_H
+#define _ASM_POWERPC_FADUMP_H
#ifdef CONFIG_FA_DUMP
-/*
- * The RMA region will be saved for later dumping when kernel crashes.
- * RMA is Real Mode Area, the first block of logical memory address owned
- * by logical partition, containing the storage that may be accessed with
- * translate off.
- */
-#define RMA_START 0x0
-#define RMA_END (ppc64_rma_size)
-
-/*
- * On some Power systems where RMO is 128MB, it still requires minimum of
- * 256MB for kernel to boot successfully. When kdump infrastructure is
- * configured to save vmcore over network, we run into OOM issue while
- * loading modules related to network setup. Hence we need aditional 64M
- * of memory to avoid OOM issue.
- */
-#define MIN_BOOT_MEM (((RMA_END < (0x1UL << 28)) ? (0x1UL << 28) : RMA_END) \
- + (0x1UL << 26))
-
-#define memblock_num_regions(memblock_type) (memblock.memblock_type.cnt)
-
-#ifndef ELF_CORE_EFLAGS
-#define ELF_CORE_EFLAGS 0
-#endif
-
-/* Firmware provided dump sections */
-#define FADUMP_CPU_STATE_DATA 0x0001
-#define FADUMP_HPTE_REGION 0x0002
-#define FADUMP_REAL_MODE_REGION 0x0011
-
-/* Dump request flag */
-#define FADUMP_REQUEST_FLAG 0x00000001
-
-/* FAD commands */
-#define FADUMP_REGISTER 1
-#define FADUMP_UNREGISTER 2
-#define FADUMP_INVALIDATE 3
-
-/* Dump status flag */
-#define FADUMP_ERROR_FLAG 0x2000
-
-#define FADUMP_CPU_ID_MASK ((1UL << 32) - 1)
-
-#define CPU_UNKNOWN (~((u32)0))
-
-/* Utility macros */
-#define SKIP_TO_NEXT_CPU(reg_entry) \
-({ \
- while (reg_entry->reg_id != REG_ID("CPUEND")) \
- reg_entry++; \
- reg_entry++; \
-})
-
-/* Kernel Dump section info */
-struct fadump_section {
- u32 request_flag;
- u16 source_data_type;
- u16 error_flags;
- u64 source_address;
- u64 source_len;
- u64 bytes_dumped;
- u64 destination_address;
-};
-
-/* ibm,configure-kernel-dump header. */
-struct fadump_section_header {
- u32 dump_format_version;
- u16 dump_num_sections;
- u16 dump_status_flag;
- u32 offset_first_dump_section;
-
- /* Fields for disk dump option. */
- u32 dd_block_size;
- u64 dd_block_offset;
- u64 dd_num_blocks;
- u32 dd_offset_disk_path;
-
- /* Maximum time allowed to prevent an automatic dump-reboot. */
- u32 max_time_auto;
-};
-
-/*
- * Firmware Assisted dump memory structure. This structure is required for
- * registering future kernel dump with power firmware through rtas call.
- *
- * No disk dump option. Hence disk dump path string section is not included.
- */
-struct fadump_mem_struct {
- struct fadump_section_header header;
-
- /* Kernel dump sections */
- struct fadump_section cpu_state_data;
- struct fadump_section hpte_region;
- struct fadump_section rmr_region;
-};
-
-/* Firmware-assisted dump configuration details. */
-struct fw_dump {
- unsigned long cpu_state_data_size;
- unsigned long hpte_region_size;
- unsigned long boot_memory_size;
- unsigned long reserve_dump_area_start;
- unsigned long reserve_dump_area_size;
- /* cmd line option during boot */
- unsigned long reserve_bootvar;
-
- unsigned long fadumphdr_addr;
- unsigned long cpu_notes_buf;
- unsigned long cpu_notes_buf_size;
-
- int ibm_configure_kernel_dump;
-
- unsigned long fadump_enabled:1;
- unsigned long fadump_supported:1;
- unsigned long dump_active:1;
- unsigned long dump_registered:1;
-};
-
-/*
- * Copy the ascii values for first 8 characters from a string into u64
- * variable at their respective indexes.
- * e.g.
- * The string "FADMPINF" will be converted into 0x4641444d50494e46
- */
-static inline u64 str_to_u64(const char *str)
-{
- u64 val = 0;
- int i;
-
- for (i = 0; i < sizeof(val); i++)
- val = (*str) ? (val << 8) | *str++ : val << 8;
- return val;
-}
-#define STR_TO_HEX(x) str_to_u64(x)
-#define REG_ID(x) str_to_u64(x)
-
-#define FADUMP_CRASH_INFO_MAGIC STR_TO_HEX("FADMPINF")
-#define REGSAVE_AREA_MAGIC STR_TO_HEX("REGSAVE")
+extern int crashing_cpu;
-/* The firmware-assisted dump format.
- *
- * The register save area is an area in the partition's memory used to preserve
- * the register contents (CPU state data) for the active CPUs during a firmware
- * assisted dump. The dump format contains register save area header followed
- * by register entries. Each list of registers for a CPU starts with
- * "CPUSTRT" and ends with "CPUEND".
- */
-
-/* Register save area header. */
-struct fadump_reg_save_area_header {
- u64 magic_number;
- u32 version;
- u32 num_cpu_offset;
-};
-
-/* Register entry. */
-struct fadump_reg_entry {
- u64 reg_id;
- u64 reg_value;
-};
-
-/* fadump crash info structure */
-struct fadump_crash_info_header {
- u64 magic_number;
- u64 elfcorehdr_addr;
- u32 crashing_cpu;
- struct pt_regs regs;
- struct cpumask cpu_online_mask;
-};
-
-/* Crash memory ranges */
-#define INIT_CRASHMEM_RANGES (INIT_MEMBLOCK_REGIONS + 2)
-
-struct fad_crash_memory_ranges {
- unsigned long long base;
- unsigned long long size;
-};
-
-extern int early_init_dt_scan_fw_dump(unsigned long node,
- const char *uname, int depth, void *data);
-extern int fadump_reserve_mem(void);
+extern int is_fadump_memory_area(u64 addr, ulong size);
extern int setup_fadump(void);
extern int is_fadump_active(void);
+extern int should_fadump_crash(void);
extern void crash_fadump(struct pt_regs *, const char *);
extern void fadump_cleanup(void);
+void fadump_setup_param_area(void);
+extern void fadump_append_bootargs(void);
#else /* CONFIG_FA_DUMP */
static inline int is_fadump_active(void) { return 0; }
+static inline int should_fadump_crash(void) { return 0; }
static inline void crash_fadump(struct pt_regs *regs, const char *str) { }
+static inline void fadump_cleanup(void) { }
+static inline void fadump_setup_param_area(void) { }
+static inline void fadump_append_bootargs(void) { }
+#endif /* !CONFIG_FA_DUMP */
+
+#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP)
+extern int early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
+ int depth, void *data);
+extern int fadump_reserve_mem(void);
#endif
+
+#if defined(CONFIG_FA_DUMP) && defined(CONFIG_CMA)
+void fadump_cma_init(void);
+#else
+static inline void fadump_cma_init(void) { }
#endif
+
+#endif /* _ASM_POWERPC_FADUMP_H */
diff --git a/arch/powerpc/include/asm/fb.h b/arch/powerpc/include/asm/fb.h
deleted file mode 100644
index 411af8d17a69..000000000000
--- a/arch/powerpc/include/asm/fb.h
+++ /dev/null
@@ -1,21 +0,0 @@
-#ifndef _ASM_FB_H_
-#define _ASM_FB_H_
-
-#include <linux/fb.h>
-#include <linux/fs.h>
-#include <asm/page.h>
-
-static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma,
- unsigned long off)
-{
- vma->vm_page_prot = phys_mem_access_prot(file, off >> PAGE_SHIFT,
- vma->vm_end - vma->vm_start,
- vma->vm_page_prot);
-}
-
-static inline int fb_is_primary_device(struct fb_info *info)
-{
- return 0;
-}
-
-#endif /* _ASM_FB_H_ */
diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h
index 9a67a38bf7b9..756a6c694018 100644
--- a/arch/powerpc/include/asm/feature-fixups.h
+++ b/arch/powerpc/include/asm/feature-fixups.h
@@ -1,11 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef __ASM_POWERPC_FEATURE_FIXUPS_H
#define __ASM_POWERPC_FEATURE_FIXUPS_H
+#include <asm/asm-const.h>
+
/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
/*
@@ -19,11 +18,11 @@
*/
#if defined(CONFIG_PPC64) && !defined(__powerpc64__)
/* 64 bits kernel, 32 bits code (ie. vdso32) */
-#define FTR_ENTRY_LONG .llong
+#define FTR_ENTRY_LONG .8byte
#define FTR_ENTRY_OFFSET .long 0xffffffff; .long
#elif defined(CONFIG_PPC64)
-#define FTR_ENTRY_LONG .llong
-#define FTR_ENTRY_OFFSET .llong
+#define FTR_ENTRY_LONG .8byte
+#define FTR_ENTRY_OFFSET .8byte
#else
#define FTR_ENTRY_LONG .long
#define FTR_ENTRY_OFFSET .long
@@ -37,6 +36,24 @@ label##2: \
.align 2; \
label##3:
+
+#ifndef CONFIG_CC_IS_CLANG
+#define CHECK_ALT_SIZE(else_size, body_size) \
+ .ifgt (else_size) - (body_size); \
+ .error "Feature section else case larger than body"; \
+ .endif;
+#else
+/*
+ * If we use the ifgt syntax above, clang's assembler complains about the
+ * expression being non-absolute when the code appears in an inline assembly
+ * statement.
+ * As a workaround use an .org directive that has no effect if the else case
+ * instructions are smaller than the body, but fails otherwise.
+ */
+#define CHECK_ALT_SIZE(else_size, body_size) \
+ .org . + ((else_size) > (body_size));
+#endif
+
#define MAKE_FTR_SECTION_ENTRY(msk, val, label, sect) \
label##4: \
.popsection; \
@@ -49,9 +66,7 @@ label##5: \
FTR_ENTRY_OFFSET label##2b-label##5b; \
FTR_ENTRY_OFFSET label##3b-label##5b; \
FTR_ENTRY_OFFSET label##4b-label##5b; \
- .ifgt (label##4b- label##3b)-(label##2b- label##1b); \
- .error "Feature section else case larger than body"; \
- .endif; \
+ CHECK_ALT_SIZE((label##4b-label##3b), (label##2b-label##1b)); \
.popsection;
@@ -66,6 +81,9 @@ label##5: \
#define END_FTR_SECTION(msk, val) \
END_FTR_SECTION_NESTED(msk, val, 97)
+#define END_FTR_SECTION_NESTED_IFSET(msk, label) \
+ END_FTR_SECTION_NESTED((msk), (msk), label)
+
#define END_FTR_SECTION_IFSET(msk) END_FTR_SECTION((msk), (msk))
#define END_FTR_SECTION_IFCLR(msk) END_FTR_SECTION((msk), 0)
@@ -95,6 +113,12 @@ label##5: \
#define END_MMU_FTR_SECTION(msk, val) \
END_MMU_FTR_SECTION_NESTED(msk, val, 97)
+#define END_MMU_FTR_SECTION_NESTED_IFSET(msk, label) \
+ END_MMU_FTR_SECTION_NESTED((msk), (msk), label)
+
+#define END_MMU_FTR_SECTION_NESTED_IFCLR(msk, label) \
+ END_MMU_FTR_SECTION_NESTED((msk), 0, label)
+
#define END_MMU_FTR_SECTION_IFSET(msk) END_MMU_FTR_SECTION((msk), (msk))
#define END_MMU_FTR_SECTION_IFCLR(msk) END_MMU_FTR_SECTION((msk), 0)
@@ -144,7 +168,7 @@ label##5: \
#define ALT_FW_FTR_SECTION_END_IFCLR(msk) \
ALT_FW_FTR_SECTION_END_NESTED_IFCLR(msk, 97)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#define ASM_FTR_IF(section_if, section_else, msk, val) \
stringify_in_c(BEGIN_FTR_SECTION) \
@@ -172,7 +196,7 @@ label##5: \
#define ASM_MMU_FTR_IFCLR(section_if, section_else, msk) \
ASM_MMU_FTR_IF(section_if, section_else, (msk), 0)
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
/* LWSYNC feature sections */
#define START_LWSYNC_SECTION(label) label##1:
@@ -184,4 +208,94 @@ label##3: \
FTR_ENTRY_OFFSET label##1b-label##3b; \
.popsection;
+#define STF_ENTRY_BARRIER_FIXUP_SECTION \
+953: \
+ .pushsection __stf_entry_barrier_fixup,"a"; \
+ .align 2; \
+954: \
+ FTR_ENTRY_OFFSET 953b-954b; \
+ .popsection;
+
+#define STF_EXIT_BARRIER_FIXUP_SECTION \
+955: \
+ .pushsection __stf_exit_barrier_fixup,"a"; \
+ .align 2; \
+956: \
+ FTR_ENTRY_OFFSET 955b-956b; \
+ .popsection;
+
+#define UACCESS_FLUSH_FIXUP_SECTION \
+959: \
+ .pushsection __uaccess_flush_fixup,"a"; \
+ .align 2; \
+960: \
+ FTR_ENTRY_OFFSET 959b-960b; \
+ .popsection;
+
+#define ENTRY_FLUSH_FIXUP_SECTION \
+957: \
+ .pushsection __entry_flush_fixup,"a"; \
+ .align 2; \
+958: \
+ FTR_ENTRY_OFFSET 957b-958b; \
+ .popsection;
+
+#define SCV_ENTRY_FLUSH_FIXUP_SECTION \
+957: \
+ .pushsection __scv_entry_flush_fixup,"a"; \
+ .align 2; \
+958: \
+ FTR_ENTRY_OFFSET 957b-958b; \
+ .popsection;
+
+#define RFI_FLUSH_FIXUP_SECTION \
+951: \
+ .pushsection __rfi_flush_fixup,"a"; \
+ .align 2; \
+952: \
+ FTR_ENTRY_OFFSET 951b-952b; \
+ .popsection;
+
+#define NOSPEC_BARRIER_FIXUP_SECTION \
+953: \
+ .pushsection __barrier_nospec_fixup,"a"; \
+ .align 2; \
+954: \
+ FTR_ENTRY_OFFSET 953b-954b; \
+ .popsection;
+
+#define START_BTB_FLUSH_SECTION \
+955: \
+
+#define END_BTB_FLUSH_SECTION \
+956: \
+ .pushsection __btb_flush_fixup,"a"; \
+ .align 2; \
+957: \
+ FTR_ENTRY_OFFSET 955b-957b; \
+ FTR_ENTRY_OFFSET 956b-957b; \
+ .popsection;
+
+#ifndef __ASSEMBLER__
+#include <linux/types.h>
+
+extern long stf_barrier_fallback;
+extern long entry_flush_fallback;
+extern long scv_entry_flush_fallback;
+extern long __start___stf_entry_barrier_fixup, __stop___stf_entry_barrier_fixup;
+extern long __start___stf_exit_barrier_fixup, __stop___stf_exit_barrier_fixup;
+extern long __start___uaccess_flush_fixup, __stop___uaccess_flush_fixup;
+extern long __start___entry_flush_fixup, __stop___entry_flush_fixup;
+extern long __start___scv_entry_flush_fixup, __stop___scv_entry_flush_fixup;
+extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup;
+extern long __start___barrier_nospec_fixup, __stop___barrier_nospec_fixup;
+extern long __start__btb_flush_fixup, __stop__btb_flush_fixup;
+
+extern bool static_key_feature_checks_initialized;
+
+void apply_feature_fixups(void);
+void update_mmu_feature_fixups(unsigned long mask);
+void setup_feature_keys(void);
+#endif
+
#endif /* __ASM_POWERPC_FEATURE_FIXUPS_H */
diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h
index 681bc0314b6b..abd7c56f4d55 100644
--- a/arch/powerpc/include/asm/firmware.h
+++ b/arch/powerpc/include/asm/firmware.h
@@ -1,21 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
*
* Modifications for ppc64:
* Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#ifndef __ASM_POWERPC_FIRMWARE_H
#define __ASM_POWERPC_FIRMWARE_H
#ifdef __KERNEL__
-#include <asm/asm-compat.h>
-#include <asm/feature-fixups.h>
+#include <asm/asm-const.h>
/* firmware feature bitmask values */
@@ -38,23 +33,32 @@
#define FW_FEATURE_LLAN ASM_CONST(0x0000000000010000)
#define FW_FEATURE_BULK_REMOVE ASM_CONST(0x0000000000020000)
#define FW_FEATURE_XDABR ASM_CONST(0x0000000000040000)
-#define FW_FEATURE_MULTITCE ASM_CONST(0x0000000000080000)
+#define FW_FEATURE_PUT_TCE_IND ASM_CONST(0x0000000000080000)
#define FW_FEATURE_SPLPAR ASM_CONST(0x0000000000100000)
#define FW_FEATURE_LPAR ASM_CONST(0x0000000000400000)
#define FW_FEATURE_PS3_LV1 ASM_CONST(0x0000000000800000)
-#define FW_FEATURE_BEAT ASM_CONST(0x0000000001000000)
+#define FW_FEATURE_HPT_RESIZE ASM_CONST(0x0000000001000000)
#define FW_FEATURE_CMO ASM_CONST(0x0000000002000000)
#define FW_FEATURE_VPHN ASM_CONST(0x0000000004000000)
#define FW_FEATURE_XCMO ASM_CONST(0x0000000008000000)
#define FW_FEATURE_OPAL ASM_CONST(0x0000000010000000)
-#define FW_FEATURE_OPALv2 ASM_CONST(0x0000000020000000)
#define FW_FEATURE_SET_MODE ASM_CONST(0x0000000040000000)
#define FW_FEATURE_BEST_ENERGY ASM_CONST(0x0000000080000000)
-#define FW_FEATURE_TYPE1_AFFINITY ASM_CONST(0x0000000100000000)
+#define FW_FEATURE_FORM1_AFFINITY ASM_CONST(0x0000000100000000)
#define FW_FEATURE_PRRN ASM_CONST(0x0000000200000000)
-#define FW_FEATURE_OPALv3 ASM_CONST(0x0000000400000000)
+#define FW_FEATURE_DRMEM_V2 ASM_CONST(0x0000000400000000)
+#define FW_FEATURE_DRC_INFO ASM_CONST(0x0000000800000000)
+#define FW_FEATURE_BLOCK_REMOVE ASM_CONST(0x0000001000000000)
+#define FW_FEATURE_PAPR_SCM ASM_CONST(0x0000002000000000)
+#define FW_FEATURE_ULTRAVISOR ASM_CONST(0x0000004000000000)
+#define FW_FEATURE_STUFF_TCE ASM_CONST(0x0000008000000000)
+#define FW_FEATURE_RPT_INVALIDATE ASM_CONST(0x0000010000000000)
+#define FW_FEATURE_FORM2_AFFINITY ASM_CONST(0x0000020000000000)
+#define FW_FEATURE_ENERGY_SCALE_INFO ASM_CONST(0x0000040000000000)
+#define FW_FEATURE_WATCHDOG ASM_CONST(0x0000080000000000)
+#define FW_FEATURE_PLPKS ASM_CONST(0x0000100000000000)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
enum {
#ifdef CONFIG_PPC64
@@ -65,18 +69,22 @@ enum {
FW_FEATURE_MIGRATE | FW_FEATURE_PERFMON | FW_FEATURE_CRQ |
FW_FEATURE_VIO | FW_FEATURE_RDMA | FW_FEATURE_LLAN |
FW_FEATURE_BULK_REMOVE | FW_FEATURE_XDABR |
- FW_FEATURE_MULTITCE | FW_FEATURE_SPLPAR | FW_FEATURE_LPAR |
+ FW_FEATURE_PUT_TCE_IND | FW_FEATURE_STUFF_TCE |
+ FW_FEATURE_SPLPAR | FW_FEATURE_LPAR |
FW_FEATURE_CMO | FW_FEATURE_VPHN | FW_FEATURE_XCMO |
FW_FEATURE_SET_MODE | FW_FEATURE_BEST_ENERGY |
- FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN,
+ FW_FEATURE_FORM1_AFFINITY | FW_FEATURE_PRRN |
+ FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2 |
+ FW_FEATURE_DRC_INFO | FW_FEATURE_BLOCK_REMOVE |
+ FW_FEATURE_PAPR_SCM | FW_FEATURE_ULTRAVISOR |
+ FW_FEATURE_RPT_INVALIDATE | FW_FEATURE_FORM2_AFFINITY |
+ FW_FEATURE_ENERGY_SCALE_INFO | FW_FEATURE_WATCHDOG |
+ FW_FEATURE_PLPKS,
FW_FEATURE_PSERIES_ALWAYS = 0,
- FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL | FW_FEATURE_OPALv2 |
- FW_FEATURE_OPALv3,
+ FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL | FW_FEATURE_ULTRAVISOR,
FW_FEATURE_POWERNV_ALWAYS = 0,
FW_FEATURE_PS3_POSSIBLE = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1,
FW_FEATURE_PS3_ALWAYS = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1,
- FW_FEATURE_CELLEB_POSSIBLE = FW_FEATURE_LPAR | FW_FEATURE_BEAT,
- FW_FEATURE_CELLEB_ALWAYS = 0,
FW_FEATURE_NATIVE_POSSIBLE = 0,
FW_FEATURE_NATIVE_ALWAYS = 0,
FW_FEATURE_POSSIBLE =
@@ -89,10 +97,7 @@ enum {
#ifdef CONFIG_PPC_PS3
FW_FEATURE_PS3_POSSIBLE |
#endif
-#ifdef CONFIG_PPC_CELLEB
- FW_FEATURE_CELLEB_POSSIBLE |
-#endif
-#ifdef CONFIG_PPC_NATIVE
+#ifdef CONFIG_PPC_HASH_MMU_NATIVE
FW_FEATURE_NATIVE_ALWAYS |
#endif
0,
@@ -106,10 +111,7 @@ enum {
#ifdef CONFIG_PPC_PS3
FW_FEATURE_PS3_ALWAYS &
#endif
-#ifdef CONFIG_PPC_CELLEB
- FW_FEATURE_CELLEB_ALWAYS &
-#endif
-#ifdef CONFIG_PPC_NATIVE
+#ifdef CONFIG_PPC_HASH_MMU_NATIVE
FW_FEATURE_NATIVE_ALWAYS &
#endif
FW_FEATURE_POSSIBLE,
@@ -134,9 +136,16 @@ extern void machine_check_fwnmi(void);
/* This is true if we are using the firmware NMI handler (typically LPAR) */
extern int fwnmi_active;
+extern int ibm_nmi_interlock_token;
extern unsigned int __start___fw_ftr_fixup, __stop___fw_ftr_fixup;
-#endif /* __ASSEMBLY__ */
+#ifdef CONFIG_PPC_PSERIES
+void pseries_probe_fw_features(void);
+#else
+static inline void pseries_probe_fw_features(void) { }
+#endif
+
+#endif /* __ASSEMBLER__ */
#endif /* __KERNEL__ */
#endif /* __ASM_POWERPC_FIRMWARE_H */
diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h
index 90f604bbcd19..bc5109eab5b7 100644
--- a/arch/powerpc/include/asm/fixmap.h
+++ b/arch/powerpc/include/asm/fixmap.h
@@ -14,16 +14,15 @@
#ifndef _ASM_FIXMAP_H
#define _ASM_FIXMAP_H
-#ifndef __ASSEMBLY__
-#include <linux/kernel.h>
+#ifndef __ASSEMBLER__
+#include <linux/sizes.h>
+#include <linux/pgtable.h>
#include <asm/page.h>
#ifdef CONFIG_HIGHMEM
#include <linux/threads.h>
-#include <asm/kmap_types.h>
+#include <asm/kmap_size.h>
#endif
-#define FIXADDR_TOP ((unsigned long)(-PAGE_SIZE))
-
/*
* Here we define all the compile-time 'special' virtual
* addresses. The point is to have a constant address at
@@ -44,26 +43,73 @@
*/
enum fixed_addresses {
FIX_HOLE,
+#ifdef CONFIG_PPC32
/* reserve the top 128K for early debugging purposes */
FIX_EARLY_DEBUG_TOP = FIX_HOLE,
- FIX_EARLY_DEBUG_BASE = FIX_EARLY_DEBUG_TOP+((128*1024)/PAGE_SIZE)-1,
+ FIX_EARLY_DEBUG_BASE = FIX_EARLY_DEBUG_TOP+(ALIGN(SZ_128K, PAGE_SIZE)/PAGE_SIZE)-1,
#ifdef CONFIG_HIGHMEM
FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
- FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
+ FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_MAX_IDX * NR_CPUS) - 1,
+#endif
+#ifdef CONFIG_PPC_8xx
+ /* For IMMR we need an aligned 512K area */
+#define FIX_IMMR_SIZE (512 * 1024 / PAGE_SIZE)
+ FIX_IMMR_START,
+ FIX_IMMR_BASE = __ALIGN_MASK(FIX_IMMR_START, FIX_IMMR_SIZE - 1) - 1 +
+ FIX_IMMR_SIZE,
+#endif
+#ifdef CONFIG_PPC_83xx
+ /* For IMMR we need an aligned 2M area */
+#define FIX_IMMR_SIZE (SZ_2M / PAGE_SIZE)
+ FIX_IMMR_START,
+ FIX_IMMR_BASE = __ALIGN_MASK(FIX_IMMR_START, FIX_IMMR_SIZE - 1) - 1 +
+ FIX_IMMR_SIZE,
#endif
/* FIX_PCIE_MCFG, */
+#endif /* CONFIG_PPC32 */
+ __end_of_permanent_fixed_addresses,
+
+#define NR_FIX_BTMAPS (SZ_256K / PAGE_SIZE)
+#define FIX_BTMAPS_SLOTS 16
+#define TOTAL_FIX_BTMAPS (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS)
+
+ FIX_BTMAP_END = __end_of_permanent_fixed_addresses,
+ FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1,
__end_of_fixed_addresses
};
-extern void __set_fixmap (enum fixed_addresses idx,
- phys_addr_t phys, pgprot_t flags);
-
#define __FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT)
#define FIXADDR_START (FIXADDR_TOP - __FIXADDR_SIZE)
+#define FIXMAP_ALIGNED_SIZE (ALIGN(FIXADDR_TOP, PGDIR_SIZE) - \
+ ALIGN_DOWN(FIXADDR_START, PGDIR_SIZE))
+#define FIXMAP_PTE_SIZE (FIXMAP_ALIGNED_SIZE / PGDIR_SIZE * PTE_TABLE_SIZE)
+
#define FIXMAP_PAGE_NOCACHE PAGE_KERNEL_NCG
+#define FIXMAP_PAGE_IO PAGE_KERNEL_NCG
#include <asm-generic/fixmap.h>
-#endif /* !__ASSEMBLY__ */
+static inline void __set_fixmap(enum fixed_addresses idx,
+ phys_addr_t phys, pgprot_t flags)
+{
+ BUILD_BUG_ON(IS_ENABLED(CONFIG_PPC64) && __FIXADDR_SIZE > FIXADDR_SIZE);
+
+ if (__builtin_constant_p(idx))
+ BUILD_BUG_ON(idx >= __end_of_fixed_addresses);
+ else if (WARN_ON(idx >= __end_of_fixed_addresses))
+ return;
+ if (pgprot_val(flags))
+ map_kernel_page(__fix_to_virt(idx), phys, flags);
+ else
+ unmap_kernel_page(__fix_to_virt(idx));
+}
+
+#define __early_set_fixmap __set_fixmap
+
+#ifdef CONFIG_PPC_8xx
+#define VIRT_IMMR_BASE (__fix_to_virt(FIX_IMMR_BASE))
+#endif
+
+#endif /* !__ASSEMBLER__ */
#endif
diff --git a/arch/powerpc/include/asm/floppy.h b/arch/powerpc/include/asm/floppy.h
index 936a904ae78c..f4dc657638b3 100644
--- a/arch/powerpc/include/asm/floppy.h
+++ b/arch/powerpc/include/asm/floppy.h
@@ -13,8 +13,8 @@
#include <asm/machdep.h>
-#define fd_inb(port) inb_p(port)
-#define fd_outb(value,port) outb_p(value,port)
+#define fd_inb(base, reg) inb_p((base) + (reg))
+#define fd_outb(value, base, reg) outb_p(value, (base) + (reg))
#define fd_enable_dma() enable_dma(FLOPPY_DMA)
#define fd_disable_dma() fd_ops->_disable_dma(FLOPPY_DMA)
@@ -25,7 +25,6 @@
#define fd_get_dma_residue() fd_ops->_get_dma_residue(FLOPPY_DMA)
#define fd_enable_irq() enable_irq(FLOPPY_IRQ)
#define fd_disable_irq() disable_irq(FLOPPY_IRQ)
-#define fd_cacheflush(addr,size) /* nothing */
#define fd_free_irq() free_irq(FLOPPY_IRQ, NULL);
#include <linux/pci.h>
@@ -62,21 +61,22 @@ static irqreturn_t floppy_hardint(int irq, void *dev_id)
st = 1;
for (lcount=virtual_dma_count, lptr=virtual_dma_addr;
lcount; lcount--, lptr++) {
- st=inb(virtual_dma_port+4) & 0xa0 ;
- if (st != 0xa0)
+ st = inb(virtual_dma_port + FD_STATUS);
+ st &= STATUS_DMA | STATUS_READY;
+ if (st != (STATUS_DMA | STATUS_READY))
break;
if (virtual_dma_mode)
- outb_p(*lptr, virtual_dma_port+5);
+ outb_p(*lptr, virtual_dma_port + FD_DATA);
else
- *lptr = inb_p(virtual_dma_port+5);
+ *lptr = inb_p(virtual_dma_port + FD_DATA);
}
virtual_dma_count = lcount;
virtual_dma_addr = lptr;
- st = inb(virtual_dma_port+4);
+ st = inb(virtual_dma_port + FD_STATUS);
- if (st == 0x20)
+ if (st == STATUS_DMA)
return IRQ_HANDLED;
- if (!(st & 0x20)) {
+ if (!(st & STATUS_DMA)) {
virtual_dma_residue += virtual_dma_count;
virtual_dma_count=0;
doing_vdma = 0;
@@ -134,17 +134,22 @@ static int hard_dma_setup(char *addr, unsigned long size, int mode, int io)
int dir;
doing_vdma = 0;
- dir = (mode == DMA_MODE_READ) ? PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE;
+ dir = (mode == DMA_MODE_READ) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
if (bus_addr
&& (addr != prev_addr || size != prev_size || dir != prev_dir)) {
/* different from last time -- unmap prev */
- pci_unmap_single(isa_bridge_pcidev, bus_addr, prev_size, prev_dir);
+ dma_unmap_single(&isa_bridge_pcidev->dev, bus_addr, prev_size,
+ prev_dir);
bus_addr = 0;
}
- if (!bus_addr) /* need to map it */
- bus_addr = pci_map_single(isa_bridge_pcidev, addr, size, dir);
+ if (!bus_addr) { /* need to map it */
+ bus_addr = dma_map_single(&isa_bridge_pcidev->dev, addr, size,
+ dir);
+ if (dma_mapping_error(&isa_bridge_pcidev->dev, bus_addr))
+ return -ENOMEM;
+ }
/* remember this one as prev */
prev_addr = addr;
@@ -152,7 +157,6 @@ static int hard_dma_setup(char *addr, unsigned long size, int mode, int io)
prev_dir = dir;
fd_clear_dma_ff();
- fd_cacheflush(addr, size);
fd_set_dma_mode(mode);
set_dma_addr(FLOPPY_DMA, bus_addr);
fd_set_dma_count(size);
@@ -202,11 +206,6 @@ static int FDC2 = -1;
#define N_FDC 2 /* Don't change this! */
#define N_DRIVE 8
-/*
- * The PowerPC has no problems with floppy DMA crossing 64k borders.
- */
-#define CROSS_64KB(a,s) (0)
-
#define EXTRA_FLOPPY_PARAMS
#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/fprobe.h b/arch/powerpc/include/asm/fprobe.h
new file mode 100644
index 000000000000..d64bc28fb3d3
--- /dev/null
+++ b/arch/powerpc/include/asm/fprobe.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_PPC_FPROBE_H
+#define _ASM_PPC_FPROBE_H
+
+#include <asm-generic/fprobe.h>
+
+#ifdef CONFIG_64BIT
+#undef FPROBE_HEADER_MSB_PATTERN
+#define FPROBE_HEADER_MSB_PATTERN (PAGE_OFFSET & ~FPROBE_HEADER_MSB_MASK)
+#endif
+
+#endif /* _ASM_PPC_FPROBE_H */
diff --git a/arch/powerpc/include/asm/fpu.h b/arch/powerpc/include/asm/fpu.h
new file mode 100644
index 000000000000..ca584e4bc40f
--- /dev/null
+++ b/arch/powerpc/include/asm/fpu.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2023 SiFive
+ */
+
+#ifndef _ASM_POWERPC_FPU_H
+#define _ASM_POWERPC_FPU_H
+
+#include <linux/preempt.h>
+
+#include <asm/cpu_has_feature.h>
+#include <asm/switch_to.h>
+
+#define kernel_fpu_available() (!cpu_has_feature(CPU_FTR_FPU_UNAVAILABLE))
+
+static inline void kernel_fpu_begin(void)
+{
+ preempt_disable();
+ enable_kernel_fp();
+}
+
+static inline void kernel_fpu_end(void)
+{
+ disable_kernel_fp();
+ preempt_enable();
+}
+
+#endif /* ! _ASM_POWERPC_FPU_H */
diff --git a/arch/powerpc/include/asm/fs_pd.h b/arch/powerpc/include/asm/fs_pd.h
deleted file mode 100644
index f79d6c74eb2a..000000000000
--- a/arch/powerpc/include/asm/fs_pd.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Platform information definitions.
- *
- * 2006 (c) MontaVista Software, Inc.
- * Vitaly Bordug <vbordug@ru.mvista.com>
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
- */
-
-#ifndef FS_PD_H
-#define FS_PD_H
-#include <sysdev/fsl_soc.h>
-#include <asm/time.h>
-
-#ifdef CONFIG_CPM2
-#include <asm/cpm2.h>
-
-#if defined(CONFIG_8260)
-#include <asm/mpc8260.h>
-#endif
-
-#define cpm2_map(member) (&cpm2_immr->member)
-#define cpm2_map_size(member, size) (&cpm2_immr->member)
-#define cpm2_unmap(addr) do {} while(0)
-#endif
-
-#ifdef CONFIG_8xx
-#include <asm/8xx_immap.h>
-
-extern immap_t __iomem *mpc8xx_immr;
-
-#define immr_map(member) (&mpc8xx_immr->member)
-#define immr_map_size(member, size) (&mpc8xx_immr->member)
-#define immr_unmap(addr) do {} while (0)
-#endif
-
-static inline int uart_baudrate(void)
-{
- return get_baudrate();
-}
-
-static inline int uart_clock(void)
-{
- return ppc_proc_freq;
-}
-
-#endif
diff --git a/arch/powerpc/include/asm/fsl_85xx_cache_sram.h b/arch/powerpc/include/asm/fsl_85xx_cache_sram.h
deleted file mode 100644
index 2af2bdc37b2e..000000000000
--- a/arch/powerpc/include/asm/fsl_85xx_cache_sram.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright 2009 Freescale Semiconductor, Inc.
- *
- * Cache SRAM handling for QorIQ platform
- *
- * Author: Vivek Mahajan <vivek.mahajan@freescale.com>
-
- * This file is derived from the original work done
- * by Sylvain Munaut for the Bestcomm SRAM allocator.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef __ASM_POWERPC_FSL_85XX_CACHE_SRAM_H__
-#define __ASM_POWERPC_FSL_85XX_CACHE_SRAM_H__
-
-#include <asm/rheap.h>
-#include <linux/spinlock.h>
-
-/*
- * Cache-SRAM
- */
-
-struct mpc85xx_cache_sram {
- phys_addr_t base_phys;
- void *base_virt;
- unsigned int size;
- rh_info_t *rh;
- spinlock_t lock;
-};
-
-extern void mpc85xx_cache_sram_free(void *ptr);
-extern void *mpc85xx_cache_sram_alloc(unsigned int size,
- phys_addr_t *phys, unsigned int align);
-
-#endif /* __AMS_POWERPC_FSL_85XX_CACHE_SRAM_H__ */
diff --git a/arch/powerpc/include/asm/fsl_gtm.h b/arch/powerpc/include/asm/fsl_gtm.h
index 3b05808f9caa..6ff68765094d 100644
--- a/arch/powerpc/include/asm/fsl_gtm.h
+++ b/arch/powerpc/include/asm/fsl_gtm.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Freescale General-purpose Timers Module
*
@@ -6,11 +7,6 @@
* Jerry Huang <Chang-Ming.Huang@freescale.com>
* Copyright (c) MontaVista Software, Inc. 2008.
* Anton Vorontsov <avorontsov@ru.mvista.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#ifndef __ASM_FSL_GTM_H
diff --git a/arch/powerpc/include/asm/fsl_guts.h b/arch/powerpc/include/asm/fsl_guts.h
deleted file mode 100644
index 77ced0b3d81d..000000000000
--- a/arch/powerpc/include/asm/fsl_guts.h
+++ /dev/null
@@ -1,189 +0,0 @@
-/**
- * Freecale 85xx and 86xx Global Utilties register set
- *
- * Authors: Jeff Brown
- * Timur Tabi <timur@freescale.com>
- *
- * Copyright 2004,2007,2012 Freescale Semiconductor, Inc
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-#ifndef __ASM_POWERPC_FSL_GUTS_H__
-#define __ASM_POWERPC_FSL_GUTS_H__
-#ifdef __KERNEL__
-
-/**
- * Global Utility Registers.
- *
- * Not all registers defined in this structure are available on all chips, so
- * you are expected to know whether a given register actually exists on your
- * chip before you access it.
- *
- * Also, some registers are similar on different chips but have slightly
- * different names. In these cases, one name is chosen to avoid extraneous
- * #ifdefs.
- */
-struct ccsr_guts {
- __be32 porpllsr; /* 0x.0000 - POR PLL Ratio Status Register */
- __be32 porbmsr; /* 0x.0004 - POR Boot Mode Status Register */
- __be32 porimpscr; /* 0x.0008 - POR I/O Impedance Status and Control Register */
- __be32 pordevsr; /* 0x.000c - POR I/O Device Status Register */
- __be32 pordbgmsr; /* 0x.0010 - POR Debug Mode Status Register */
- __be32 pordevsr2; /* 0x.0014 - POR device status register 2 */
- u8 res018[0x20 - 0x18];
- __be32 porcir; /* 0x.0020 - POR Configuration Information Register */
- u8 res024[0x30 - 0x24];
- __be32 gpiocr; /* 0x.0030 - GPIO Control Register */
- u8 res034[0x40 - 0x34];
- __be32 gpoutdr; /* 0x.0040 - General-Purpose Output Data Register */
- u8 res044[0x50 - 0x44];
- __be32 gpindr; /* 0x.0050 - General-Purpose Input Data Register */
- u8 res054[0x60 - 0x54];
- __be32 pmuxcr; /* 0x.0060 - Alternate Function Signal Multiplex Control */
- __be32 pmuxcr2; /* 0x.0064 - Alternate function signal multiplex control 2 */
- __be32 dmuxcr; /* 0x.0068 - DMA Mux Control Register */
- u8 res06c[0x70 - 0x6c];
- __be32 devdisr; /* 0x.0070 - Device Disable Control */
-#define CCSR_GUTS_DEVDISR_TB1 0x00001000
-#define CCSR_GUTS_DEVDISR_TB0 0x00004000
- __be32 devdisr2; /* 0x.0074 - Device Disable Control 2 */
- u8 res078[0x7c - 0x78];
- __be32 pmjcr; /* 0x.007c - 4 Power Management Jog Control Register */
- __be32 powmgtcsr; /* 0x.0080 - Power Management Status and Control Register */
- __be32 pmrccr; /* 0x.0084 - Power Management Reset Counter Configuration Register */
- __be32 pmpdccr; /* 0x.0088 - Power Management Power Down Counter Configuration Register */
- __be32 pmcdr; /* 0x.008c - 4Power management clock disable register */
- __be32 mcpsumr; /* 0x.0090 - Machine Check Summary Register */
- __be32 rstrscr; /* 0x.0094 - Reset Request Status and Control Register */
- __be32 ectrstcr; /* 0x.0098 - Exception reset control register */
- __be32 autorstsr; /* 0x.009c - Automatic reset status register */
- __be32 pvr; /* 0x.00a0 - Processor Version Register */
- __be32 svr; /* 0x.00a4 - System Version Register */
- u8 res0a8[0xb0 - 0xa8];
- __be32 rstcr; /* 0x.00b0 - Reset Control Register */
- u8 res0b4[0xc0 - 0xb4];
- __be32 iovselsr; /* 0x.00c0 - I/O voltage select status register
- Called 'elbcvselcr' on 86xx SOCs */
- u8 res0c4[0x224 - 0xc4];
- __be32 iodelay1; /* 0x.0224 - IO delay control register 1 */
- __be32 iodelay2; /* 0x.0228 - IO delay control register 2 */
- u8 res22c[0x604 - 0x22c];
- __be32 pamubypenr; /* 0x.604 - PAMU bypass enable register */
- u8 res608[0x800 - 0x608];
- __be32 clkdvdr; /* 0x.0800 - Clock Divide Register */
- u8 res804[0x900 - 0x804];
- __be32 ircr; /* 0x.0900 - Infrared Control Register */
- u8 res904[0x908 - 0x904];
- __be32 dmacr; /* 0x.0908 - DMA Control Register */
- u8 res90c[0x914 - 0x90c];
- __be32 elbccr; /* 0x.0914 - eLBC Control Register */
- u8 res918[0xb20 - 0x918];
- __be32 ddr1clkdr; /* 0x.0b20 - DDR1 Clock Disable Register */
- __be32 ddr2clkdr; /* 0x.0b24 - DDR2 Clock Disable Register */
- __be32 ddrclkdr; /* 0x.0b28 - DDR Clock Disable Register */
- u8 resb2c[0xe00 - 0xb2c];
- __be32 clkocr; /* 0x.0e00 - Clock Out Select Register */
- u8 rese04[0xe10 - 0xe04];
- __be32 ddrdllcr; /* 0x.0e10 - DDR DLL Control Register */
- u8 rese14[0xe20 - 0xe14];
- __be32 lbcdllcr; /* 0x.0e20 - LBC DLL Control Register */
- __be32 cpfor; /* 0x.0e24 - L2 charge pump fuse override register */
- u8 rese28[0xf04 - 0xe28];
- __be32 srds1cr0; /* 0x.0f04 - SerDes1 Control Register 0 */
- __be32 srds1cr1; /* 0x.0f08 - SerDes1 Control Register 0 */
- u8 resf0c[0xf2c - 0xf0c];
- __be32 itcr; /* 0x.0f2c - Internal transaction control register */
- u8 resf30[0xf40 - 0xf30];
- __be32 srds2cr0; /* 0x.0f40 - SerDes2 Control Register 0 */
- __be32 srds2cr1; /* 0x.0f44 - SerDes2 Control Register 0 */
-} __attribute__ ((packed));
-
-
-/* Alternate function signal multiplex control */
-#define MPC85xx_PMUXCR_QE(x) (0x8000 >> (x))
-
-#ifdef CONFIG_PPC_86xx
-
-#define CCSR_GUTS_DMACR_DEV_SSI 0 /* DMA controller/channel set to SSI */
-#define CCSR_GUTS_DMACR_DEV_IR 1 /* DMA controller/channel set to IR */
-
-/*
- * Set the DMACR register in the GUTS
- *
- * The DMACR register determines the source of initiated transfers for each
- * channel on each DMA controller. Rather than have a bunch of repetitive
- * macros for the bit patterns, we just have a function that calculates
- * them.
- *
- * guts: Pointer to GUTS structure
- * co: The DMA controller (0 or 1)
- * ch: The channel on the DMA controller (0, 1, 2, or 3)
- * device: The device to set as the source (CCSR_GUTS_DMACR_DEV_xx)
- */
-static inline void guts_set_dmacr(struct ccsr_guts __iomem *guts,
- unsigned int co, unsigned int ch, unsigned int device)
-{
- unsigned int shift = 16 + (8 * (1 - co) + 2 * (3 - ch));
-
- clrsetbits_be32(&guts->dmacr, 3 << shift, device << shift);
-}
-
-#define CCSR_GUTS_PMUXCR_LDPSEL 0x00010000
-#define CCSR_GUTS_PMUXCR_SSI1_MASK 0x0000C000 /* Bitmask for SSI1 */
-#define CCSR_GUTS_PMUXCR_SSI1_LA 0x00000000 /* Latched address */
-#define CCSR_GUTS_PMUXCR_SSI1_HI 0x00004000 /* High impedance */
-#define CCSR_GUTS_PMUXCR_SSI1_SSI 0x00008000 /* Used for SSI1 */
-#define CCSR_GUTS_PMUXCR_SSI2_MASK 0x00003000 /* Bitmask for SSI2 */
-#define CCSR_GUTS_PMUXCR_SSI2_LA 0x00000000 /* Latched address */
-#define CCSR_GUTS_PMUXCR_SSI2_HI 0x00001000 /* High impedance */
-#define CCSR_GUTS_PMUXCR_SSI2_SSI 0x00002000 /* Used for SSI2 */
-#define CCSR_GUTS_PMUXCR_LA_22_25_LA 0x00000000 /* Latched Address */
-#define CCSR_GUTS_PMUXCR_LA_22_25_HI 0x00000400 /* High impedance */
-#define CCSR_GUTS_PMUXCR_DBGDRV 0x00000200 /* Signals not driven */
-#define CCSR_GUTS_PMUXCR_DMA2_0 0x00000008
-#define CCSR_GUTS_PMUXCR_DMA2_3 0x00000004
-#define CCSR_GUTS_PMUXCR_DMA1_0 0x00000002
-#define CCSR_GUTS_PMUXCR_DMA1_3 0x00000001
-
-/*
- * Set the DMA external control bits in the GUTS
- *
- * The DMA external control bits in the PMUXCR are only meaningful for
- * channels 0 and 3. Any other channels are ignored.
- *
- * guts: Pointer to GUTS structure
- * co: The DMA controller (0 or 1)
- * ch: The channel on the DMA controller (0, 1, 2, or 3)
- * value: the new value for the bit (0 or 1)
- */
-static inline void guts_set_pmuxcr_dma(struct ccsr_guts __iomem *guts,
- unsigned int co, unsigned int ch, unsigned int value)
-{
- if ((ch == 0) || (ch == 3)) {
- unsigned int shift = 2 * (co + 1) - (ch & 1) - 1;
-
- clrsetbits_be32(&guts->pmuxcr, 1 << shift, value << shift);
- }
-}
-
-#define CCSR_GUTS_CLKDVDR_PXCKEN 0x80000000
-#define CCSR_GUTS_CLKDVDR_SSICKEN 0x20000000
-#define CCSR_GUTS_CLKDVDR_PXCKINV 0x10000000
-#define CCSR_GUTS_CLKDVDR_PXCKDLY_SHIFT 25
-#define CCSR_GUTS_CLKDVDR_PXCKDLY_MASK 0x06000000
-#define CCSR_GUTS_CLKDVDR_PXCKDLY(x) \
- (((x) & 3) << CCSR_GUTS_CLKDVDR_PXCKDLY_SHIFT)
-#define CCSR_GUTS_CLKDVDR_PXCLK_SHIFT 16
-#define CCSR_GUTS_CLKDVDR_PXCLK_MASK 0x001F0000
-#define CCSR_GUTS_CLKDVDR_PXCLK(x) (((x) & 31) << CCSR_GUTS_CLKDVDR_PXCLK_SHIFT)
-#define CCSR_GUTS_CLKDVDR_SSICLK_MASK 0x000000FF
-#define CCSR_GUTS_CLKDVDR_SSICLK(x) ((x) & CCSR_GUTS_CLKDVDR_SSICLK_MASK)
-
-#endif
-
-#endif
-#endif
diff --git a/arch/powerpc/include/asm/fsl_hcalls.h b/arch/powerpc/include/asm/fsl_hcalls.h
index 3abb58394da4..b889d13547fd 100644
--- a/arch/powerpc/include/asm/fsl_hcalls.h
+++ b/arch/powerpc/include/asm/fsl_hcalls.h
@@ -109,7 +109,7 @@ static inline unsigned int fh_send_nmi(unsigned int vcpu_mask)
#define FH_DTPROP_MAX_PROPLEN 32768
/**
- * fh_partiton_get_dtprop - get a property from a guest device tree.
+ * fh_partition_get_dtprop - get a property from a guest device tree.
* @handle: handle of partition whose device tree is to be accessed
* @dtpath_addr: physical address of device tree path to access
* @propname_addr: physical address of name of property
diff --git a/arch/powerpc/include/asm/fsl_lbc.h b/arch/powerpc/include/asm/fsl_lbc.h
index 067fb0dca549..c4af5ee716aa 100644
--- a/arch/powerpc/include/asm/fsl_lbc.h
+++ b/arch/powerpc/include/asm/fsl_lbc.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/* Freescale Local Bus Controller
*
* Copyright © 2006-2007, 2010 Freescale Semiconductor
@@ -5,20 +6,6 @@
* Authors: Nick Spence <nick.spence@freescale.com>,
* Scott Wood <scottwood@freescale.com>
* Jack Lan <jack.lan@freescale.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef __ASM_FSL_LBC_H
@@ -95,6 +82,9 @@ struct fsl_lbc_bank {
#define OR_FCM_TRLX_SHIFT 2
#define OR_FCM_EHTR 0x00000002
#define OR_FCM_EHTR_SHIFT 1
+
+#define OR_GPCM_AM 0xFFFF8000
+#define OR_GPCM_AM_SHIFT 15
};
struct fsl_lbc_regs {
diff --git a/arch/powerpc/include/asm/fsl_pamu_stash.h b/arch/powerpc/include/asm/fsl_pamu_stash.h
index caa1b21c25cd..c0fbadb70b5d 100644
--- a/arch/powerpc/include/asm/fsl_pamu_stash.h
+++ b/arch/powerpc/include/asm/fsl_pamu_stash.h
@@ -1,24 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright (C) 2013 Freescale Semiconductor, Inc.
- *
*/
#ifndef __FSL_PAMU_STASH_H
#define __FSL_PAMU_STASH_H
+struct iommu_domain;
+
/* cache stash targets */
enum pamu_stash_target {
PAMU_ATTR_CACHE_L1 = 1,
@@ -26,14 +16,6 @@ enum pamu_stash_target {
PAMU_ATTR_CACHE_L3,
};
-/*
- * This attribute allows configuring stashig specific parameters
- * in the PAMU hardware.
- */
-
-struct pamu_stash_attribute {
- u32 cpu; /* cpu number */
- u32 cache; /* cache to stash to: L1,L2,L3 */
-};
+int fsl_pamu_configure_l1_stash(struct iommu_domain *domain, u32 cpu);
#endif /* __FSL_PAMU_STASH_H */
diff --git a/arch/powerpc/include/asm/fsl_pm.h b/arch/powerpc/include/asm/fsl_pm.h
new file mode 100644
index 000000000000..61a4c977320f
--- /dev/null
+++ b/arch/powerpc/include/asm/fsl_pm.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Support Power Management
+ *
+ * Copyright 2014-2015 Freescale Semiconductor Inc.
+ */
+#ifndef __PPC_FSL_PM_H
+#define __PPC_FSL_PM_H
+
+#define E500_PM_PH10 1
+#define E500_PM_PH15 2
+#define E500_PM_PH20 3
+#define E500_PM_PH30 4
+#define E500_PM_DOZE E500_PM_PH10
+#define E500_PM_NAP E500_PM_PH15
+
+#define PLAT_PM_SLEEP 20
+#define PLAT_PM_LPM20 30
+
+#define FSL_PM_SLEEP (1 << 0)
+#define FSL_PM_DEEP_SLEEP (1 << 1)
+
+struct fsl_pm_ops {
+ /* mask pending interrupts to the RCPM from MPIC */
+ void (*irq_mask)(int cpu);
+
+ /* unmask pending interrupts to the RCPM from MPIC */
+ void (*irq_unmask)(int cpu);
+ void (*cpu_enter_state)(int cpu, int state);
+ void (*cpu_exit_state)(int cpu, int state);
+ void (*cpu_up_prepare)(int cpu);
+ void (*cpu_die)(int cpu);
+ int (*plat_enter_sleep)(void);
+ void (*freeze_time_base)(bool freeze);
+
+ /* keep the power of IP blocks during sleep/deep sleep */
+ void (*set_ip_power)(bool enable, u32 mask);
+
+ /* get platform supported power management modes */
+ unsigned int (*get_pm_modes)(void);
+};
+
+extern const struct fsl_pm_ops *qoriq_pm_ops;
+
+int __init fsl_rcpm_init(void);
+
+#endif /* __PPC_FSL_PM_H */
diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h
index e3661872fbea..5984eaa75ce8 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -1,79 +1,180 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_FTRACE
#define _ASM_POWERPC_FTRACE
+#include <asm/types.h>
+
#ifdef CONFIG_FUNCTION_TRACER
-#define MCOUNT_ADDR ((long)(_mcount))
+#define MCOUNT_ADDR ((unsigned long)(_mcount))
#define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */
-#ifdef __ASSEMBLY__
-
-/* Based off of objdump optput from glibc */
-
-#define MCOUNT_SAVE_FRAME \
- stwu r1,-48(r1); \
- stw r3, 12(r1); \
- stw r4, 16(r1); \
- stw r5, 20(r1); \
- stw r6, 24(r1); \
- mflr r3; \
- lwz r4, 52(r1); \
- mfcr r5; \
- stw r7, 28(r1); \
- stw r8, 32(r1); \
- stw r9, 36(r1); \
- stw r10,40(r1); \
- stw r3, 44(r1); \
- stw r5, 8(r1)
-
-#define MCOUNT_RESTORE_FRAME \
- lwz r6, 8(r1); \
- lwz r0, 44(r1); \
- lwz r3, 12(r1); \
- mtctr r0; \
- lwz r4, 16(r1); \
- mtcr r6; \
- lwz r5, 20(r1); \
- lwz r6, 24(r1); \
- lwz r0, 52(r1); \
- lwz r7, 28(r1); \
- lwz r8, 32(r1); \
- mtlr r0; \
- lwz r9, 36(r1); \
- lwz r10,40(r1); \
- addi r1, r1, 48
-
-#else /* !__ASSEMBLY__ */
+/* Ignore unused weak functions which will have larger offsets */
+#if defined(CONFIG_MPROFILE_KERNEL) || defined(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY)
+#define FTRACE_MCOUNT_MAX_OFFSET 16
+#elif defined(CONFIG_PPC32)
+#define FTRACE_MCOUNT_MAX_OFFSET 8
+#endif
+
+#ifndef __ASSEMBLER__
extern void _mcount(void);
-#ifdef CONFIG_DYNAMIC_FTRACE
-static inline unsigned long ftrace_call_adjust(unsigned long addr)
-{
- /* reloction of mcount call site is the same as the address */
- return addr;
-}
+unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip,
+ unsigned long sp);
+struct module;
+struct dyn_ftrace;
struct dyn_arch_ftrace {
- struct module *mod;
+#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE
+ /* pointer to the associated out-of-line stub */
+ unsigned long ool_stub;
+#endif
};
-#endif /* CONFIG_DYNAMIC_FTRACE */
-#endif /* __ASSEMBLY__ */
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
+#define ftrace_need_init_nop() (true)
+int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
+#define ftrace_init_nop ftrace_init_nop
+
+#include <linux/ftrace_regs.h>
+
+static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs)
+{
+ /* We clear regs.msr in ftrace_call */
+ return arch_ftrace_regs(fregs)->regs.msr ? &arch_ftrace_regs(fregs)->regs : NULL;
+}
+
+#define arch_ftrace_fill_perf_regs(fregs, _regs) do { \
+ (_regs)->result = 0; \
+ (_regs)->nip = arch_ftrace_regs(fregs)->regs.nip; \
+ (_regs)->gpr[1] = arch_ftrace_regs(fregs)->regs.gpr[1]; \
+ asm volatile("mfmsr %0" : "=r" ((_regs)->msr)); \
+ } while (0)
+
+#undef ftrace_regs_get_return_value
+static __always_inline unsigned long
+ftrace_regs_get_return_value(const struct ftrace_regs *fregs)
+{
+ return arch_ftrace_regs(fregs)->regs.gpr[3];
+}
+#define ftrace_regs_get_return_value ftrace_regs_get_return_value
+
+#undef ftrace_regs_get_frame_pointer
+static __always_inline unsigned long
+ftrace_regs_get_frame_pointer(const struct ftrace_regs *fregs)
+{
+ return arch_ftrace_regs(fregs)->regs.gpr[1];
+}
+
+static __always_inline void
+ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs,
+ unsigned long ip)
+{
+ regs_set_return_ip(&arch_ftrace_regs(fregs)->regs, ip);
+}
+
+static __always_inline unsigned long
+ftrace_regs_get_return_address(struct ftrace_regs *fregs)
+{
+ return arch_ftrace_regs(fregs)->regs.link;
+}
+
+struct ftrace_ops;
+
+#define ftrace_graph_func ftrace_graph_func
+void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *op, struct ftrace_regs *fregs);
+#endif
+#endif /* __ASSEMBLER__ */
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+#define ARCH_SUPPORTS_FTRACE_OPS 1
#endif
+#endif /* CONFIG_FUNCTION_TRACER */
-#if defined(CONFIG_FTRACE_SYSCALLS) && defined(CONFIG_PPC64) && !defined(__ASSEMBLY__)
-#if !defined(_CALL_ELF) || _CALL_ELF != 2
+#ifndef __ASSEMBLER__
+#ifdef CONFIG_FTRACE_SYSCALLS
+/*
+ * Some syscall entry functions on powerpc start with "ppc_" (fork and clone,
+ * for instance) or ppc32_/ppc64_. We should also match the sys_ variant with
+ * those.
+ */
#define ARCH_HAS_SYSCALL_MATCH_SYM_NAME
static inline bool arch_syscall_match_sym_name(const char *sym, const char *name)
{
- /*
- * Compare the symbol name with the system call name. Skip the .sys or .SyS
- * prefix from the symbol name and the sys prefix from the system call name and
- * just match the rest. This is only needed on ppc64 since symbol names on
- * 32bit do not start with a period so the generic function will work.
- */
- return !strcmp(sym + 4, name + 3);
+ return !strcmp(sym, name) ||
+ (!strncmp(sym, "__se_sys", 8) && !strcmp(sym + 5, name)) ||
+ (!strncmp(sym, "ppc_", 4) && !strcmp(sym + 4, name + 4)) ||
+ (!strncmp(sym, "ppc32_", 6) && !strcmp(sym + 6, name + 4)) ||
+ (!strncmp(sym, "ppc64_", 6) && !strcmp(sym + 6, name + 4));
+}
+#endif /* CONFIG_FTRACE_SYSCALLS */
+
+#if defined(CONFIG_PPC64) && defined(CONFIG_FUNCTION_TRACER)
+#include <asm/paca.h>
+
+static inline void this_cpu_disable_ftrace(void)
+{
+ get_paca()->ftrace_enabled = 0;
+}
+
+static inline void this_cpu_enable_ftrace(void)
+{
+ get_paca()->ftrace_enabled = 1;
+}
+
+/* Disable ftrace on this CPU if possible (may not be implemented) */
+static inline void this_cpu_set_ftrace_enabled(u8 ftrace_enabled)
+{
+ get_paca()->ftrace_enabled = ftrace_enabled;
+}
+
+static inline u8 this_cpu_get_ftrace_enabled(void)
+{
+ return get_paca()->ftrace_enabled;
+}
+#else /* CONFIG_PPC64 */
+static inline void this_cpu_disable_ftrace(void) { }
+static inline void this_cpu_enable_ftrace(void) { }
+static inline void this_cpu_set_ftrace_enabled(u8 ftrace_enabled) { }
+static inline u8 this_cpu_get_ftrace_enabled(void) { return 1; }
+#endif /* CONFIG_PPC64 */
+
+#ifdef CONFIG_FUNCTION_TRACER
+extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[];
+#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE
+struct ftrace_ool_stub {
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS
+ struct ftrace_ops *ftrace_op;
+#endif
+ u32 insn[4];
+} __aligned(sizeof(unsigned long));
+extern struct ftrace_ool_stub ftrace_ool_stub_text_end[], ftrace_ool_stub_text[],
+ ftrace_ool_stub_inittext[];
+extern unsigned int ftrace_ool_stub_text_end_count, ftrace_ool_stub_text_count,
+ ftrace_ool_stub_inittext_count;
+#endif
+void ftrace_free_init_tramp(void);
+unsigned long ftrace_call_adjust(unsigned long addr);
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+/*
+ * When an ftrace registered caller is tracing a function that is also set by a
+ * register_ftrace_direct() call, it needs to be differentiated in the
+ * ftrace_caller trampoline so that the direct call can be invoked after the
+ * other ftrace ops. To do this, place the direct caller in the orig_gpr3 field
+ * of pt_regs. This tells ftrace_caller that there's a direct caller.
+ */
+static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs, unsigned long addr)
+{
+ struct pt_regs *regs = &arch_ftrace_regs(fregs)->regs;
+
+ regs->orig_gpr3 = addr;
}
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
+#else
+static inline void ftrace_free_init_tramp(void) { }
+static inline unsigned long ftrace_call_adjust(unsigned long addr) { return addr; }
#endif
-#endif /* CONFIG_FTRACE_SYSCALLS && CONFIG_PPC64 && !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_POWERPC_FTRACE */
diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h
index 2a9cf845473b..b3001f8b2c1e 100644
--- a/arch/powerpc/include/asm/futex.h
+++ b/arch/powerpc/include/asm/futex.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_FUTEX_H
#define _ASM_POWERPC_FUTEX_H
@@ -7,14 +8,12 @@
#include <linux/uaccess.h>
#include <asm/errno.h>
#include <asm/synch.h>
-#include <asm/asm-compat.h>
#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \
__asm__ __volatile ( \
PPC_ATOMIC_ENTRY_BARRIER \
"1: lwarx %0,0,%2\n" \
insn \
- PPC405_ERR77(0, %2) \
"2: stwcx. %1,0,%2\n" \
"bne- 1b\n" \
PPC_ATOMIC_EXIT_BARRIER \
@@ -23,29 +22,20 @@
"4: li %1,%3\n" \
"b 3b\n" \
".previous\n" \
- ".section __ex_table,\"a\"\n" \
- ".align 3\n" \
- PPC_LONG "1b,4b,2b,4b\n" \
- ".previous" \
+ EX_TABLE(1b, 4b) \
+ EX_TABLE(2b, 4b) \
: "=&r" (oldval), "=&r" (ret) \
: "b" (uaddr), "i" (-EFAULT), "r" (oparg) \
: "cr0", "memory")
-static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
+static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
+ u32 __user *uaddr)
{
- int op = (encoded_op >> 28) & 7;
- int cmp = (encoded_op >> 24) & 15;
- int oparg = (encoded_op << 8) >> 20;
- int cmparg = (encoded_op << 20) >> 20;
int oldval = 0, ret;
- if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
- oparg = 1 << oparg;
- if (! access_ok (VERIFY_WRITE, uaddr, sizeof(u32)))
+ if (!user_access_begin(uaddr, sizeof(u32)))
return -EFAULT;
- pagefault_disable();
-
switch (op) {
case FUTEX_OP_SET:
__futex_atomic_op("mr %1,%4\n", ret, oldval, uaddr, oparg);
@@ -65,20 +55,10 @@ static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr)
default:
ret = -ENOSYS;
}
+ user_access_end();
- pagefault_enable();
+ *oval = oldval;
- if (!ret) {
- switch (cmp) {
- case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break;
- case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break;
- case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break;
- case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break;
- case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break;
- case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break;
- default: ret = -ENOSYS;
- }
- }
return ret;
}
@@ -89,7 +69,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
int ret = 0;
u32 prev;
- if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
+ if (!user_access_begin(uaddr, sizeof(u32)))
return -EFAULT;
__asm__ __volatile__ (
@@ -97,23 +77,23 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
"1: lwarx %1,0,%3 # futex_atomic_cmpxchg_inatomic\n\
cmpw 0,%1,%4\n\
bne- 3f\n"
- PPC405_ERR77(0,%3)
"2: stwcx. %5,0,%3\n\
bne- 1b\n"
PPC_ATOMIC_EXIT_BARRIER
"3: .section .fixup,\"ax\"\n\
4: li %0,%6\n\
b 3b\n\
- .previous\n\
- .section __ex_table,\"a\"\n\
- .align 3\n\
- " PPC_LONG "1b,4b,2b,4b\n\
- .previous" \
+ .previous\n"
+ EX_TABLE(1b, 4b)
+ EX_TABLE(2b, 4b)
: "+r" (ret), "=&r" (prev), "+m" (*uaddr)
: "r" (uaddr), "r" (oldval), "r" (newval), "i" (-EFAULT)
: "cc", "memory");
+ user_access_end();
+
*uval = prev;
+
return ret;
}
diff --git a/arch/powerpc/include/asm/gpio.h b/arch/powerpc/include/asm/gpio.h
deleted file mode 100644
index b3799d88ffcf..000000000000
--- a/arch/powerpc/include/asm/gpio.h
+++ /dev/null
@@ -1,4 +0,0 @@
-#ifndef __LINUX_GPIO_H
-#warning Include linux/gpio.h instead of asm/gpio.h
-#include <linux/gpio.h>
-#endif
diff --git a/arch/powerpc/include/asm/grackle.h b/arch/powerpc/include/asm/grackle.h
index bd7812a519d4..7376e3fa1570 100644
--- a/arch/powerpc/include/asm/grackle.h
+++ b/arch/powerpc/include/asm/grackle.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_GRACKLE_H
#define _ASM_POWERPC_GRACKLE_H
#ifdef __KERNEL__
diff --git a/arch/powerpc/include/asm/guest-state-buffer.h b/arch/powerpc/include/asm/guest-state-buffer.h
new file mode 100644
index 000000000000..acd61eb36d59
--- /dev/null
+++ b/arch/powerpc/include/asm/guest-state-buffer.h
@@ -0,0 +1,1019 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Interface based on include/net/netlink.h
+ */
+#ifndef _ASM_POWERPC_GUEST_STATE_BUFFER_H
+#define _ASM_POWERPC_GUEST_STATE_BUFFER_H
+
+#include "asm/hvcall.h"
+#include <linux/gfp.h>
+#include <linux/bitmap.h>
+#include <asm/plpar_wrappers.h>
+
+/**************************************************************************
+ * Guest State Buffer Constants
+ **************************************************************************/
+/* Element without a value and any length */
+#define KVMPPC_GSID_BLANK 0x0000
+/* Size required for the L0's internal VCPU representation */
+#define KVMPPC_GSID_HOST_STATE_SIZE 0x0001
+ /* Minimum size for the H_GUEST_RUN_VCPU output buffer */
+#define KVMPPC_GSID_RUN_OUTPUT_MIN_SIZE 0x0002
+ /* "Logical" PVR value as defined in the PAPR */
+#define KVMPPC_GSID_LOGICAL_PVR 0x0003
+ /* L0 relative timebase offset */
+#define KVMPPC_GSID_TB_OFFSET 0x0004
+ /* Partition Scoped Page Table Info */
+#define KVMPPC_GSID_PARTITION_TABLE 0x0005
+ /* Process Table Info */
+#define KVMPPC_GSID_PROCESS_TABLE 0x0006
+
+/* Guest Management Heap Size */
+#define KVMPPC_GSID_L0_GUEST_HEAP 0x0800
+
+/* Guest Management Heap Max Size */
+#define KVMPPC_GSID_L0_GUEST_HEAP_MAX 0x0801
+
+/* Guest Pagetable Size */
+#define KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE 0x0802
+
+/* Guest Pagetable Max Size */
+#define KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX 0x0803
+
+/* Guest Pagetable Reclaim in bytes */
+#define KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM 0x0804
+
+/* H_GUEST_RUN_VCPU input buffer Info */
+#define KVMPPC_GSID_RUN_INPUT 0x0C00
+/* H_GUEST_RUN_VCPU output buffer Info */
+#define KVMPPC_GSID_RUN_OUTPUT 0x0C01
+#define KVMPPC_GSID_VPA 0x0C02
+
+#define KVMPPC_GSID_GPR(x) (0x1000 + (x))
+#define KVMPPC_GSID_HDEC_EXPIRY_TB 0x1020
+#define KVMPPC_GSID_NIA 0x1021
+#define KVMPPC_GSID_MSR 0x1022
+#define KVMPPC_GSID_LR 0x1023
+#define KVMPPC_GSID_XER 0x1024
+#define KVMPPC_GSID_CTR 0x1025
+#define KVMPPC_GSID_CFAR 0x1026
+#define KVMPPC_GSID_SRR0 0x1027
+#define KVMPPC_GSID_SRR1 0x1028
+#define KVMPPC_GSID_DAR 0x1029
+#define KVMPPC_GSID_DEC_EXPIRY_TB 0x102A
+#define KVMPPC_GSID_VTB 0x102B
+#define KVMPPC_GSID_LPCR 0x102C
+#define KVMPPC_GSID_HFSCR 0x102D
+#define KVMPPC_GSID_FSCR 0x102E
+#define KVMPPC_GSID_FPSCR 0x102F
+#define KVMPPC_GSID_DAWR0 0x1030
+#define KVMPPC_GSID_DAWR1 0x1031
+#define KVMPPC_GSID_CIABR 0x1032
+#define KVMPPC_GSID_PURR 0x1033
+#define KVMPPC_GSID_SPURR 0x1034
+#define KVMPPC_GSID_IC 0x1035
+#define KVMPPC_GSID_SPRG0 0x1036
+#define KVMPPC_GSID_SPRG1 0x1037
+#define KVMPPC_GSID_SPRG2 0x1038
+#define KVMPPC_GSID_SPRG3 0x1039
+#define KVMPPC_GSID_PPR 0x103A
+#define KVMPPC_GSID_MMCR(x) (0x103B + (x))
+#define KVMPPC_GSID_MMCRA 0x103F
+#define KVMPPC_GSID_SIER(x) (0x1040 + (x))
+#define KVMPPC_GSID_BESCR 0x1043
+#define KVMPPC_GSID_EBBHR 0x1044
+#define KVMPPC_GSID_EBBRR 0x1045
+#define KVMPPC_GSID_AMR 0x1046
+#define KVMPPC_GSID_IAMR 0x1047
+#define KVMPPC_GSID_AMOR 0x1048
+#define KVMPPC_GSID_UAMOR 0x1049
+#define KVMPPC_GSID_SDAR 0x104A
+#define KVMPPC_GSID_SIAR 0x104B
+#define KVMPPC_GSID_DSCR 0x104C
+#define KVMPPC_GSID_TAR 0x104D
+#define KVMPPC_GSID_DEXCR 0x104E
+#define KVMPPC_GSID_HDEXCR 0x104F
+#define KVMPPC_GSID_HASHKEYR 0x1050
+#define KVMPPC_GSID_HASHPKEYR 0x1051
+#define KVMPPC_GSID_CTRL 0x1052
+#define KVMPPC_GSID_DPDES 0x1053
+
+#define KVMPPC_GSID_CR 0x2000
+#define KVMPPC_GSID_PIDR 0x2001
+#define KVMPPC_GSID_DSISR 0x2002
+#define KVMPPC_GSID_VSCR 0x2003
+#define KVMPPC_GSID_VRSAVE 0x2004
+#define KVMPPC_GSID_DAWRX0 0x2005
+#define KVMPPC_GSID_DAWRX1 0x2006
+#define KVMPPC_GSID_PMC(x) (0x2007 + (x))
+#define KVMPPC_GSID_WORT 0x200D
+#define KVMPPC_GSID_PSPB 0x200E
+
+#define KVMPPC_GSID_VSRS(x) (0x3000 + (x))
+
+#define KVMPPC_GSID_HDAR 0xF000
+#define KVMPPC_GSID_HDSISR 0xF001
+#define KVMPPC_GSID_HEIR 0xF002
+#define KVMPPC_GSID_ASDR 0xF003
+
+#define KVMPPC_GSE_GUESTWIDE_START KVMPPC_GSID_BLANK
+#define KVMPPC_GSE_GUESTWIDE_END KVMPPC_GSID_PROCESS_TABLE
+#define KVMPPC_GSE_GUESTWIDE_COUNT \
+ (KVMPPC_GSE_GUESTWIDE_END - KVMPPC_GSE_GUESTWIDE_START + 1)
+
+#define KVMPPC_GSE_HOSTWIDE_START KVMPPC_GSID_L0_GUEST_HEAP
+#define KVMPPC_GSE_HOSTWIDE_END KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM
+#define KVMPPC_GSE_HOSTWIDE_COUNT \
+ (KVMPPC_GSE_HOSTWIDE_END - KVMPPC_GSE_HOSTWIDE_START + 1)
+
+#define KVMPPC_GSE_META_START KVMPPC_GSID_RUN_INPUT
+#define KVMPPC_GSE_META_END KVMPPC_GSID_VPA
+#define KVMPPC_GSE_META_COUNT (KVMPPC_GSE_META_END - KVMPPC_GSE_META_START + 1)
+
+#define KVMPPC_GSE_DW_REGS_START KVMPPC_GSID_GPR(0)
+#define KVMPPC_GSE_DW_REGS_END KVMPPC_GSID_DPDES
+#define KVMPPC_GSE_DW_REGS_COUNT \
+ (KVMPPC_GSE_DW_REGS_END - KVMPPC_GSE_DW_REGS_START + 1)
+
+#define KVMPPC_GSE_W_REGS_START KVMPPC_GSID_CR
+#define KVMPPC_GSE_W_REGS_END KVMPPC_GSID_PSPB
+#define KVMPPC_GSE_W_REGS_COUNT \
+ (KVMPPC_GSE_W_REGS_END - KVMPPC_GSE_W_REGS_START + 1)
+
+#define KVMPPC_GSE_VSRS_START KVMPPC_GSID_VSRS(0)
+#define KVMPPC_GSE_VSRS_END KVMPPC_GSID_VSRS(63)
+#define KVMPPC_GSE_VSRS_COUNT (KVMPPC_GSE_VSRS_END - KVMPPC_GSE_VSRS_START + 1)
+
+#define KVMPPC_GSE_INTR_REGS_START KVMPPC_GSID_HDAR
+#define KVMPPC_GSE_INTR_REGS_END KVMPPC_GSID_ASDR
+#define KVMPPC_GSE_INTR_REGS_COUNT \
+ (KVMPPC_GSE_INTR_REGS_END - KVMPPC_GSE_INTR_REGS_START + 1)
+
+#define KVMPPC_GSE_IDEN_COUNT \
+ (KVMPPC_GSE_HOSTWIDE_COUNT + \
+ KVMPPC_GSE_GUESTWIDE_COUNT + KVMPPC_GSE_META_COUNT + \
+ KVMPPC_GSE_DW_REGS_COUNT + KVMPPC_GSE_W_REGS_COUNT + \
+ KVMPPC_GSE_VSRS_COUNT + KVMPPC_GSE_INTR_REGS_COUNT)
+
+/**
+ * Ranges of guest state buffer elements
+ */
+enum {
+ KVMPPC_GS_CLASS_GUESTWIDE = 0x01,
+ KVMPPC_GS_CLASS_HOSTWIDE = 0x02,
+ KVMPPC_GS_CLASS_META = 0x04,
+ KVMPPC_GS_CLASS_DWORD_REG = 0x08,
+ KVMPPC_GS_CLASS_WORD_REG = 0x10,
+ KVMPPC_GS_CLASS_VECTOR = 0x18,
+ KVMPPC_GS_CLASS_INTR = 0x20,
+};
+
+/**
+ * Types of guest state buffer elements
+ */
+enum {
+ KVMPPC_GSE_BE32,
+ KVMPPC_GSE_BE64,
+ KVMPPC_GSE_VEC128,
+ KVMPPC_GSE_PARTITION_TABLE,
+ KVMPPC_GSE_PROCESS_TABLE,
+ KVMPPC_GSE_BUFFER,
+ __KVMPPC_GSE_TYPE_MAX,
+};
+
+/**
+ * Flags for guest state elements
+ */
+enum {
+ KVMPPC_GS_FLAGS_WIDE = 0x01,
+ KVMPPC_GS_FLAGS_HOST_WIDE = 0x02,
+};
+
+/**
+ * struct kvmppc_gs_part_table - deserialized partition table information
+ * element
+ * @address: start of the partition table
+ * @ea_bits: number of bits in the effective address
+ * @gpd_size: root page directory size
+ */
+struct kvmppc_gs_part_table {
+ u64 address;
+ u64 ea_bits;
+ u64 gpd_size;
+};
+
+/**
+ * struct kvmppc_gs_proc_table - deserialized process table information element
+ * @address: start of the process table
+ * @gpd_size: process table size
+ */
+struct kvmppc_gs_proc_table {
+ u64 address;
+ u64 gpd_size;
+};
+
+/**
+ * struct kvmppc_gs_buff_info - deserialized meta guest state buffer information
+ * @address: start of the guest state buffer
+ * @size: size of the guest state buffer
+ */
+struct kvmppc_gs_buff_info {
+ u64 address;
+ u64 size;
+};
+
+/**
+ * struct kvmppc_gs_header - serialized guest state buffer header
+ * @nelem: count of guest state elements in the buffer
+ * @data: start of the stream of elements in the buffer
+ */
+struct kvmppc_gs_header {
+ __be32 nelems;
+ char data[];
+} __packed;
+
+/**
+ * struct kvmppc_gs_elem - serialized guest state buffer element
+ * @iden: Guest State ID
+ * @len: length of data
+ * @data: the guest state buffer element's value
+ */
+struct kvmppc_gs_elem {
+ __be16 iden;
+ __be16 len;
+ char data[];
+} __packed;
+
+/**
+ * struct kvmppc_gs_buff - a guest state buffer with metadata.
+ * @capacity: total length of the buffer
+ * @len: current length of the elements and header
+ * @guest_id: guest id associated with the buffer
+ * @vcpu_id: vcpu_id associated with the buffer
+ * @hdr: the serialised guest state buffer
+ */
+struct kvmppc_gs_buff {
+ size_t capacity;
+ size_t len;
+ unsigned long guest_id;
+ unsigned long vcpu_id;
+ struct kvmppc_gs_header *hdr;
+};
+
+/**
+ * struct kvmppc_gs_bitmap - a bitmap for element ids
+ * @bitmap: a bitmap large enough for all Guest State IDs
+ */
+struct kvmppc_gs_bitmap {
+ /* private: */
+ DECLARE_BITMAP(bitmap, KVMPPC_GSE_IDEN_COUNT);
+};
+
+/**
+ * struct kvmppc_gs_parser - a map of element ids to locations in a buffer
+ * @iterator: bitmap used for iterating
+ * @gses: contains the pointers to elements
+ *
+ * A guest state parser is used for deserialising a guest state buffer.
+ * Given a buffer, it then allows looking up guest state elements using
+ * a guest state id.
+ */
+struct kvmppc_gs_parser {
+ /* private: */
+ struct kvmppc_gs_bitmap iterator;
+ struct kvmppc_gs_elem *gses[KVMPPC_GSE_IDEN_COUNT];
+};
+
+enum {
+ GSM_GUEST_WIDE = 0x1,
+ GSM_SEND = 0x2,
+ GSM_RECEIVE = 0x4,
+ GSM_GSB_OWNER = 0x8,
+};
+
+struct kvmppc_gs_msg;
+
+/**
+ * struct kvmppc_gs_msg_ops - guest state message behavior
+ * @get_size: maximum size required for the message data
+ * @fill_info: serializes to the guest state buffer format
+ * @refresh_info: dserializes from the guest state buffer format
+ */
+struct kvmppc_gs_msg_ops {
+ size_t (*get_size)(struct kvmppc_gs_msg *gsm);
+ int (*fill_info)(struct kvmppc_gs_buff *gsb, struct kvmppc_gs_msg *gsm);
+ int (*refresh_info)(struct kvmppc_gs_msg *gsm,
+ struct kvmppc_gs_buff *gsb);
+};
+
+/**
+ * struct kvmppc_gs_msg - a guest state message
+ * @bitmap: the guest state ids that should be included
+ * @ops: modify message behavior for reading and writing to buffers
+ * @flags: host wide, guest wide or thread wide
+ * @data: location where buffer data will be written to or from.
+ *
+ * A guest state message is allows flexibility in sending in receiving data
+ * in a guest state buffer format.
+ */
+struct kvmppc_gs_msg {
+ struct kvmppc_gs_bitmap bitmap;
+ struct kvmppc_gs_msg_ops *ops;
+ unsigned long flags;
+ void *data;
+};
+
+/**************************************************************************
+ * Guest State IDs
+ **************************************************************************/
+
+u16 kvmppc_gsid_size(u16 iden);
+unsigned long kvmppc_gsid_flags(u16 iden);
+u64 kvmppc_gsid_mask(u16 iden);
+
+/**************************************************************************
+ * Guest State Buffers
+ **************************************************************************/
+struct kvmppc_gs_buff *kvmppc_gsb_new(size_t size, unsigned long guest_id,
+ unsigned long vcpu_id, gfp_t flags);
+void kvmppc_gsb_free(struct kvmppc_gs_buff *gsb);
+void *kvmppc_gsb_put(struct kvmppc_gs_buff *gsb, size_t size);
+int kvmppc_gsb_send(struct kvmppc_gs_buff *gsb, unsigned long flags);
+int kvmppc_gsb_recv(struct kvmppc_gs_buff *gsb, unsigned long flags);
+
+/**
+ * kvmppc_gsb_header() - the header of a guest state buffer
+ * @gsb: guest state buffer
+ *
+ * Returns a pointer to the buffer header.
+ */
+static inline struct kvmppc_gs_header *
+kvmppc_gsb_header(struct kvmppc_gs_buff *gsb)
+{
+ return gsb->hdr;
+}
+
+/**
+ * kvmppc_gsb_data() - the elements of a guest state buffer
+ * @gsb: guest state buffer
+ *
+ * Returns a pointer to the first element of the buffer data.
+ */
+static inline struct kvmppc_gs_elem *kvmppc_gsb_data(struct kvmppc_gs_buff *gsb)
+{
+ return (struct kvmppc_gs_elem *)kvmppc_gsb_header(gsb)->data;
+}
+
+/**
+ * kvmppc_gsb_len() - the current length of a guest state buffer
+ * @gsb: guest state buffer
+ *
+ * Returns the length including the header of a buffer.
+ */
+static inline size_t kvmppc_gsb_len(struct kvmppc_gs_buff *gsb)
+{
+ return gsb->len;
+}
+
+/**
+ * kvmppc_gsb_capacity() - the capacity of a guest state buffer
+ * @gsb: guest state buffer
+ *
+ * Returns the capacity of a buffer.
+ */
+static inline size_t kvmppc_gsb_capacity(struct kvmppc_gs_buff *gsb)
+{
+ return gsb->capacity;
+}
+
+/**
+ * kvmppc_gsb_paddress() - the physical address of buffer
+ * @gsb: guest state buffer
+ *
+ * Returns the physical address of the buffer.
+ */
+static inline u64 kvmppc_gsb_paddress(struct kvmppc_gs_buff *gsb)
+{
+ return __pa(kvmppc_gsb_header(gsb));
+}
+
+/**
+ * kvmppc_gsb_nelems() - the number of elements in a buffer
+ * @gsb: guest state buffer
+ *
+ * Returns the number of elements in a buffer
+ */
+static inline u32 kvmppc_gsb_nelems(struct kvmppc_gs_buff *gsb)
+{
+ return be32_to_cpu(kvmppc_gsb_header(gsb)->nelems);
+}
+
+/**
+ * kvmppc_gsb_reset() - empty a guest state buffer
+ * @gsb: guest state buffer
+ *
+ * Reset the number of elements and length of buffer to empty.
+ */
+static inline void kvmppc_gsb_reset(struct kvmppc_gs_buff *gsb)
+{
+ kvmppc_gsb_header(gsb)->nelems = cpu_to_be32(0);
+ gsb->len = sizeof(struct kvmppc_gs_header);
+}
+
+/**
+ * kvmppc_gsb_data_len() - the length of a buffer excluding the header
+ * @gsb: guest state buffer
+ *
+ * Returns the length of a buffer excluding the header
+ */
+static inline size_t kvmppc_gsb_data_len(struct kvmppc_gs_buff *gsb)
+{
+ return gsb->len - sizeof(struct kvmppc_gs_header);
+}
+
+/**
+ * kvmppc_gsb_data_cap() - the capacity of a buffer excluding the header
+ * @gsb: guest state buffer
+ *
+ * Returns the capacity of a buffer excluding the header
+ */
+static inline size_t kvmppc_gsb_data_cap(struct kvmppc_gs_buff *gsb)
+{
+ return gsb->capacity - sizeof(struct kvmppc_gs_header);
+}
+
+/**
+ * kvmppc_gsb_for_each_elem - iterate over the elements in a buffer
+ * @i: loop counter
+ * @pos: set to current element
+ * @gsb: guest state buffer
+ * @rem: initialized to buffer capacity, holds bytes currently remaining in
+ * stream
+ */
+#define kvmppc_gsb_for_each_elem(i, pos, gsb, rem) \
+ kvmppc_gse_for_each_elem(i, kvmppc_gsb_nelems(gsb), pos, \
+ kvmppc_gsb_data(gsb), \
+ kvmppc_gsb_data_cap(gsb), rem)
+
+/**************************************************************************
+ * Guest State Elements
+ **************************************************************************/
+
+/**
+ * kvmppc_gse_iden() - guest state ID of element
+ * @gse: guest state element
+ *
+ * Return the guest state ID in host endianness.
+ */
+static inline u16 kvmppc_gse_iden(const struct kvmppc_gs_elem *gse)
+{
+ return be16_to_cpu(gse->iden);
+}
+
+/**
+ * kvmppc_gse_len() - length of guest state element data
+ * @gse: guest state element
+ *
+ * Returns the length of guest state element data
+ */
+static inline u16 kvmppc_gse_len(const struct kvmppc_gs_elem *gse)
+{
+ return be16_to_cpu(gse->len);
+}
+
+/**
+ * kvmppc_gse_total_len() - total length of guest state element
+ * @gse: guest state element
+ *
+ * Returns the length of the data plus the ID and size header.
+ */
+static inline u16 kvmppc_gse_total_len(const struct kvmppc_gs_elem *gse)
+{
+ return be16_to_cpu(gse->len) + sizeof(*gse);
+}
+
+/**
+ * kvmppc_gse_total_size() - space needed for a given data length
+ * @size: data length
+ *
+ * Returns size plus the space needed for the ID and size header.
+ */
+static inline u16 kvmppc_gse_total_size(u16 size)
+{
+ return sizeof(struct kvmppc_gs_elem) + size;
+}
+
+/**
+ * kvmppc_gse_data() - pointer to data of a guest state element
+ * @gse: guest state element
+ *
+ * Returns a pointer to the beginning of guest state element data.
+ */
+static inline void *kvmppc_gse_data(const struct kvmppc_gs_elem *gse)
+{
+ return (void *)gse->data;
+}
+
+/**
+ * kvmppc_gse_ok() - checks space exists for guest state element
+ * @gse: guest state element
+ * @remaining: bytes of space remaining
+ *
+ * Returns true if the guest state element can fit in remaining space.
+ */
+static inline bool kvmppc_gse_ok(const struct kvmppc_gs_elem *gse,
+ int remaining)
+{
+ return remaining >= kvmppc_gse_total_len(gse);
+}
+
+/**
+ * kvmppc_gse_next() - iterate to the next guest state element in a stream
+ * @gse: stream of guest state elements
+ * @remaining: length of the guest element stream
+ *
+ * Returns the next guest state element in a stream of elements. The length of
+ * the stream is updated in remaining.
+ */
+static inline struct kvmppc_gs_elem *
+kvmppc_gse_next(const struct kvmppc_gs_elem *gse, int *remaining)
+{
+ int len = sizeof(*gse) + kvmppc_gse_len(gse);
+
+ *remaining -= len;
+ return (struct kvmppc_gs_elem *)(gse->data + kvmppc_gse_len(gse));
+}
+
+/**
+ * kvmppc_gse_for_each_elem - iterate over a stream of guest state elements
+ * @i: loop counter
+ * @max: number of elements
+ * @pos: set to current element
+ * @head: head of elements
+ * @len: length of the stream
+ * @rem: initialized to len, holds bytes currently remaining elements
+ */
+#define kvmppc_gse_for_each_elem(i, max, pos, head, len, rem) \
+ for (i = 0, pos = head, rem = len; kvmppc_gse_ok(pos, rem) && i < max; \
+ pos = kvmppc_gse_next(pos, &(rem)), i++)
+
+int __kvmppc_gse_put(struct kvmppc_gs_buff *gsb, u16 iden, u16 size,
+ const void *data);
+int kvmppc_gse_parse(struct kvmppc_gs_parser *gsp, struct kvmppc_gs_buff *gsb);
+
+/**
+ * kvmppc_gse_put_be32() - add a be32 guest state element to a buffer
+ * @gsb: guest state buffer to add element to
+ * @iden: guest state ID
+ * @val: big endian value
+ */
+static inline int kvmppc_gse_put_be32(struct kvmppc_gs_buff *gsb, u16 iden,
+ __be32 val)
+{
+ __be32 tmp;
+
+ tmp = val;
+ return __kvmppc_gse_put(gsb, iden, sizeof(__be32), &tmp);
+}
+
+/**
+ * kvmppc_gse_put_u32() - add a host endian 32bit int guest state element to a
+ * buffer
+ * @gsb: guest state buffer to add element to
+ * @iden: guest state ID
+ * @val: host endian value
+ */
+static inline int kvmppc_gse_put_u32(struct kvmppc_gs_buff *gsb, u16 iden,
+ u32 val)
+{
+ __be32 tmp;
+
+ val &= kvmppc_gsid_mask(iden);
+ tmp = cpu_to_be32(val);
+ return kvmppc_gse_put_be32(gsb, iden, tmp);
+}
+
+/**
+ * kvmppc_gse_put_be64() - add a be64 guest state element to a buffer
+ * @gsb: guest state buffer to add element to
+ * @iden: guest state ID
+ * @val: big endian value
+ */
+static inline int kvmppc_gse_put_be64(struct kvmppc_gs_buff *gsb, u16 iden,
+ __be64 val)
+{
+ __be64 tmp;
+
+ tmp = val;
+ return __kvmppc_gse_put(gsb, iden, sizeof(__be64), &tmp);
+}
+
+/**
+ * kvmppc_gse_put_u64() - add a host endian 64bit guest state element to a
+ * buffer
+ * @gsb: guest state buffer to add element to
+ * @iden: guest state ID
+ * @val: host endian value
+ */
+static inline int kvmppc_gse_put_u64(struct kvmppc_gs_buff *gsb, u16 iden,
+ u64 val)
+{
+ __be64 tmp;
+
+ val &= kvmppc_gsid_mask(iden);
+ tmp = cpu_to_be64(val);
+ return kvmppc_gse_put_be64(gsb, iden, tmp);
+}
+
+/**
+ * __kvmppc_gse_put_reg() - add a register type guest state element to a buffer
+ * @gsb: guest state buffer to add element to
+ * @iden: guest state ID
+ * @val: host endian value
+ *
+ * Adds a register type guest state element. Uses the guest state ID for
+ * determining the length of the guest element. If the guest state ID has
+ * bits that can not be set they will be cleared.
+ */
+static inline int __kvmppc_gse_put_reg(struct kvmppc_gs_buff *gsb, u16 iden,
+ u64 val)
+{
+ val &= kvmppc_gsid_mask(iden);
+ if (kvmppc_gsid_size(iden) == sizeof(u64))
+ return kvmppc_gse_put_u64(gsb, iden, val);
+
+ if (kvmppc_gsid_size(iden) == sizeof(u32)) {
+ u32 tmp;
+
+ tmp = (u32)val;
+ if (tmp != val)
+ return -EINVAL;
+
+ return kvmppc_gse_put_u32(gsb, iden, tmp);
+ }
+ return -EINVAL;
+}
+
+/**
+ * kvmppc_gse_put_vector128() - add a vector guest state element to a buffer
+ * @gsb: guest state buffer to add element to
+ * @iden: guest state ID
+ * @val: 16 byte vector value
+ */
+static inline int kvmppc_gse_put_vector128(struct kvmppc_gs_buff *gsb, u16 iden,
+ vector128 *val)
+{
+ __be64 tmp[2] = { 0 };
+ union {
+ __vector128 v;
+ u64 dw[2];
+ } u;
+
+ u.v = *val;
+ tmp[0] = cpu_to_be64(u.dw[TS_FPROFFSET]);
+#ifdef CONFIG_VSX
+ tmp[1] = cpu_to_be64(u.dw[TS_VSRLOWOFFSET]);
+#endif
+ return __kvmppc_gse_put(gsb, iden, sizeof(tmp), &tmp);
+}
+
+/**
+ * kvmppc_gse_put_part_table() - add a partition table guest state element to a
+ * buffer
+ * @gsb: guest state buffer to add element to
+ * @iden: guest state ID
+ * @val: partition table value
+ */
+static inline int kvmppc_gse_put_part_table(struct kvmppc_gs_buff *gsb,
+ u16 iden,
+ struct kvmppc_gs_part_table val)
+{
+ __be64 tmp[3];
+
+ tmp[0] = cpu_to_be64(val.address);
+ tmp[1] = cpu_to_be64(val.ea_bits);
+ tmp[2] = cpu_to_be64(val.gpd_size);
+ return __kvmppc_gse_put(gsb, KVMPPC_GSID_PARTITION_TABLE, sizeof(tmp),
+ &tmp);
+}
+
+/**
+ * kvmppc_gse_put_proc_table() - add a process table guest state element to a
+ * buffer
+ * @gsb: guest state buffer to add element to
+ * @iden: guest state ID
+ * @val: process table value
+ */
+static inline int kvmppc_gse_put_proc_table(struct kvmppc_gs_buff *gsb,
+ u16 iden,
+ struct kvmppc_gs_proc_table val)
+{
+ __be64 tmp[2];
+
+ tmp[0] = cpu_to_be64(val.address);
+ tmp[1] = cpu_to_be64(val.gpd_size);
+ return __kvmppc_gse_put(gsb, KVMPPC_GSID_PROCESS_TABLE, sizeof(tmp),
+ &tmp);
+}
+
+/**
+ * kvmppc_gse_put_buff_info() - adds a GSB description guest state element to a
+ * buffer
+ * @gsb: guest state buffer to add element to
+ * @iden: guest state ID
+ * @val: guest state buffer description value
+ */
+static inline int kvmppc_gse_put_buff_info(struct kvmppc_gs_buff *gsb, u16 iden,
+ struct kvmppc_gs_buff_info val)
+{
+ __be64 tmp[2];
+
+ tmp[0] = cpu_to_be64(val.address);
+ tmp[1] = cpu_to_be64(val.size);
+ return __kvmppc_gse_put(gsb, iden, sizeof(tmp), &tmp);
+}
+
+int __kvmppc_gse_put(struct kvmppc_gs_buff *gsb, u16 iden, u16 size,
+ const void *data);
+
+/**
+ * kvmppc_gse_get_be32() - return the data of a be32 element
+ * @gse: guest state element
+ */
+static inline __be32 kvmppc_gse_get_be32(const struct kvmppc_gs_elem *gse)
+{
+ if (WARN_ON(kvmppc_gse_len(gse) != sizeof(__be32)))
+ return 0;
+ return *(__be32 *)kvmppc_gse_data(gse);
+}
+
+/**
+ * kvmppc_gse_get_u32() - return the data of a be32 element in host endianness
+ * @gse: guest state element
+ */
+static inline u32 kvmppc_gse_get_u32(const struct kvmppc_gs_elem *gse)
+{
+ return be32_to_cpu(kvmppc_gse_get_be32(gse));
+}
+
+/**
+ * kvmppc_gse_get_be64() - return the data of a be64 element
+ * @gse: guest state element
+ */
+static inline __be64 kvmppc_gse_get_be64(const struct kvmppc_gs_elem *gse)
+{
+ if (WARN_ON(kvmppc_gse_len(gse) != sizeof(__be64)))
+ return 0;
+ return *(__be64 *)kvmppc_gse_data(gse);
+}
+
+/**
+ * kvmppc_gse_get_u64() - return the data of a be64 element in host endianness
+ * @gse: guest state element
+ */
+static inline u64 kvmppc_gse_get_u64(const struct kvmppc_gs_elem *gse)
+{
+ return be64_to_cpu(kvmppc_gse_get_be64(gse));
+}
+
+/**
+ * kvmppc_gse_get_vector128() - return the data of a vector element
+ * @gse: guest state element
+ */
+static inline void kvmppc_gse_get_vector128(const struct kvmppc_gs_elem *gse,
+ vector128 *v)
+{
+ union {
+ __vector128 v;
+ u64 dw[2];
+ } u = { 0 };
+ __be64 *src;
+
+ if (WARN_ON(kvmppc_gse_len(gse) != sizeof(__vector128)))
+ *v = u.v;
+
+ src = (__be64 *)kvmppc_gse_data(gse);
+ u.dw[TS_FPROFFSET] = be64_to_cpu(src[0]);
+#ifdef CONFIG_VSX
+ u.dw[TS_VSRLOWOFFSET] = be64_to_cpu(src[1]);
+#endif
+ *v = u.v;
+}
+
+/**************************************************************************
+ * Guest State Bitmap
+ **************************************************************************/
+
+bool kvmppc_gsbm_test(struct kvmppc_gs_bitmap *gsbm, u16 iden);
+void kvmppc_gsbm_set(struct kvmppc_gs_bitmap *gsbm, u16 iden);
+void kvmppc_gsbm_clear(struct kvmppc_gs_bitmap *gsbm, u16 iden);
+u16 kvmppc_gsbm_next(struct kvmppc_gs_bitmap *gsbm, u16 prev);
+
+/**
+ * kvmppc_gsbm_zero - zero the entire bitmap
+ * @gsbm: guest state buffer bitmap
+ */
+static inline void kvmppc_gsbm_zero(struct kvmppc_gs_bitmap *gsbm)
+{
+ bitmap_zero(gsbm->bitmap, KVMPPC_GSE_IDEN_COUNT);
+}
+
+/**
+ * kvmppc_gsbm_fill - fill the entire bitmap
+ * @gsbm: guest state buffer bitmap
+ */
+static inline void kvmppc_gsbm_fill(struct kvmppc_gs_bitmap *gsbm)
+{
+ bitmap_fill(gsbm->bitmap, KVMPPC_GSE_IDEN_COUNT);
+ clear_bit(0, gsbm->bitmap);
+}
+
+/**
+ * kvmppc_gsbm_for_each - iterate the present guest state IDs
+ * @gsbm: guest state buffer bitmap
+ * @iden: current guest state ID
+ */
+#define kvmppc_gsbm_for_each(gsbm, iden) \
+ for (iden = kvmppc_gsbm_next(gsbm, 0); iden != 0; \
+ iden = kvmppc_gsbm_next(gsbm, iden))
+
+/**************************************************************************
+ * Guest State Parser
+ **************************************************************************/
+
+void kvmppc_gsp_insert(struct kvmppc_gs_parser *gsp, u16 iden,
+ struct kvmppc_gs_elem *gse);
+struct kvmppc_gs_elem *kvmppc_gsp_lookup(struct kvmppc_gs_parser *gsp,
+ u16 iden);
+
+/**
+ * kvmppc_gsp_for_each - iterate the <guest state IDs, guest state element>
+ * pairs
+ * @gsp: guest state buffer bitmap
+ * @iden: current guest state ID
+ * @gse: guest state element
+ */
+#define kvmppc_gsp_for_each(gsp, iden, gse) \
+ for (iden = kvmppc_gsbm_next(&(gsp)->iterator, 0), \
+ gse = kvmppc_gsp_lookup((gsp), iden); \
+ iden != 0; iden = kvmppc_gsbm_next(&(gsp)->iterator, iden), \
+ gse = kvmppc_gsp_lookup((gsp), iden))
+
+/**************************************************************************
+ * Guest State Message
+ **************************************************************************/
+
+/**
+ * kvmppc_gsm_for_each - iterate the guest state IDs included in a guest state
+ * message
+ * @gsp: guest state buffer bitmap
+ * @iden: current guest state ID
+ * @gse: guest state element
+ */
+#define kvmppc_gsm_for_each(gsm, iden) \
+ for (iden = kvmppc_gsbm_next(&gsm->bitmap, 0); iden != 0; \
+ iden = kvmppc_gsbm_next(&gsm->bitmap, iden))
+
+int kvmppc_gsm_init(struct kvmppc_gs_msg *mgs, struct kvmppc_gs_msg_ops *ops,
+ void *data, unsigned long flags);
+
+struct kvmppc_gs_msg *kvmppc_gsm_new(struct kvmppc_gs_msg_ops *ops, void *data,
+ unsigned long flags, gfp_t gfp_flags);
+void kvmppc_gsm_free(struct kvmppc_gs_msg *gsm);
+size_t kvmppc_gsm_size(struct kvmppc_gs_msg *gsm);
+int kvmppc_gsm_fill_info(struct kvmppc_gs_msg *gsm, struct kvmppc_gs_buff *gsb);
+int kvmppc_gsm_refresh_info(struct kvmppc_gs_msg *gsm,
+ struct kvmppc_gs_buff *gsb);
+
+/**
+ * kvmppc_gsm_include - indicate a guest state ID should be included when
+ * serializing
+ * @gsm: guest state message
+ * @iden: guest state ID
+ */
+static inline void kvmppc_gsm_include(struct kvmppc_gs_msg *gsm, u16 iden)
+{
+ kvmppc_gsbm_set(&gsm->bitmap, iden);
+}
+
+/**
+ * kvmppc_gsm_includes - check if a guest state ID will be included when
+ * serializing
+ * @gsm: guest state message
+ * @iden: guest state ID
+ */
+static inline bool kvmppc_gsm_includes(struct kvmppc_gs_msg *gsm, u16 iden)
+{
+ return kvmppc_gsbm_test(&gsm->bitmap, iden);
+}
+
+/**
+ * kvmppc_gsm_includes - indicate all guest state IDs should be included when
+ * serializing
+ * @gsm: guest state message
+ * @iden: guest state ID
+ */
+static inline void kvmppc_gsm_include_all(struct kvmppc_gs_msg *gsm)
+{
+ kvmppc_gsbm_fill(&gsm->bitmap);
+}
+
+/**
+ * kvmppc_gsm_include - clear the guest state IDs that should be included when
+ * serializing
+ * @gsm: guest state message
+ */
+static inline void kvmppc_gsm_reset(struct kvmppc_gs_msg *gsm)
+{
+ kvmppc_gsbm_zero(&gsm->bitmap);
+}
+
+/**
+ * kvmppc_gsb_receive_data - flexibly update values from a guest state buffer
+ * @gsb: guest state buffer
+ * @gsm: guest state message
+ *
+ * Requests updated values for the guest state values included in the guest
+ * state message. The guest state message will then deserialize the guest state
+ * buffer.
+ */
+static inline int kvmppc_gsb_receive_data(struct kvmppc_gs_buff *gsb,
+ struct kvmppc_gs_msg *gsm)
+{
+ int rc;
+
+ kvmppc_gsb_reset(gsb);
+ rc = kvmppc_gsm_fill_info(gsm, gsb);
+ if (rc < 0)
+ return rc;
+
+ rc = kvmppc_gsb_recv(gsb, gsm->flags);
+ if (rc < 0)
+ return rc;
+
+ rc = kvmppc_gsm_refresh_info(gsm, gsb);
+ if (rc < 0)
+ return rc;
+ return 0;
+}
+
+/**
+ * kvmppc_gsb_recv - receive a single guest state ID
+ * @gsb: guest state buffer
+ * @gsm: guest state message
+ * @iden: guest state identity
+ */
+static inline int kvmppc_gsb_receive_datum(struct kvmppc_gs_buff *gsb,
+ struct kvmppc_gs_msg *gsm, u16 iden)
+{
+ int rc;
+
+ kvmppc_gsm_include(gsm, iden);
+ rc = kvmppc_gsb_receive_data(gsb, gsm);
+ if (rc < 0)
+ return rc;
+ kvmppc_gsm_reset(gsm);
+ return 0;
+}
+
+/**
+ * kvmppc_gsb_send_data - flexibly send values from a guest state buffer
+ * @gsb: guest state buffer
+ * @gsm: guest state message
+ *
+ * Sends the guest state values included in the guest state message.
+ */
+static inline int kvmppc_gsb_send_data(struct kvmppc_gs_buff *gsb,
+ struct kvmppc_gs_msg *gsm)
+{
+ int rc;
+
+ kvmppc_gsb_reset(gsb);
+ rc = kvmppc_gsm_fill_info(gsm, gsb);
+ if (rc < 0)
+ return rc;
+ rc = kvmppc_gsb_send(gsb, gsm->flags);
+
+ return rc;
+}
+
+/**
+ * kvmppc_gsb_recv - send a single guest state ID
+ * @gsb: guest state buffer
+ * @gsm: guest state message
+ * @iden: guest state identity
+ */
+static inline int kvmppc_gsb_send_datum(struct kvmppc_gs_buff *gsb,
+ struct kvmppc_gs_msg *gsm, u16 iden)
+{
+ int rc;
+
+ kvmppc_gsm_include(gsm, iden);
+ rc = kvmppc_gsb_send_data(gsb, gsm);
+ if (rc < 0)
+ return rc;
+ kvmppc_gsm_reset(gsm);
+ return 0;
+}
+
+#endif /* _ASM_POWERPC_GUEST_STATE_BUFFER_H */
diff --git a/arch/powerpc/include/asm/hardirq.h b/arch/powerpc/include/asm/hardirq.h
index 1bbb3013d6aa..f133b5930ae1 100644
--- a/arch/powerpc/include/asm/hardirq.h
+++ b/arch/powerpc/include/asm/hardirq.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_HARDIRQ_H
#define _ASM_POWERPC_HARDIRQ_H
@@ -7,11 +8,15 @@
typedef struct {
unsigned int __softirq_pending;
unsigned int timer_irqs_event;
+ unsigned int broadcast_irqs_event;
unsigned int timer_irqs_others;
unsigned int pmu_irqs;
unsigned int mce_exceptions;
unsigned int spurious_irqs;
- unsigned int hmi_exceptions;
+ unsigned int sreset_irqs;
+#ifdef CONFIG_PPC_WATCHDOG
+ unsigned int soft_nmi_irqs;
+#endif
#ifdef CONFIG_PPC_DOORBELL
unsigned int doorbell_irqs;
#endif
@@ -20,8 +25,7 @@ typedef struct {
DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
#define __ARCH_IRQ_STAT
-
-#define local_softirq_pending() __get_cpu_var(irq_stat).__softirq_pending
+#define __ARCH_IRQ_EXIT_IRQS_DISABLED
static inline void ack_bad_irq(unsigned int irq)
{
diff --git a/arch/powerpc/include/asm/head-64.h b/arch/powerpc/include/asm/head-64.h
new file mode 100644
index 000000000000..3966bd5810cb
--- /dev/null
+++ b/arch/powerpc/include/asm/head-64.h
@@ -0,0 +1,172 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_HEAD_64_H
+#define _ASM_POWERPC_HEAD_64_H
+
+#include <asm/cache.h>
+
+#ifdef __ASSEMBLER__
+/*
+ * We can't do CPP stringification and concatination directly into the section
+ * name for some reason, so these macros can do it for us.
+ */
+.macro define_ftsec name
+ .section ".head.text.\name\()","ax",@progbits
+.endm
+.macro define_data_ftsec name
+ .section ".head.data.\name\()","a",@progbits
+.endm
+.macro use_ftsec name
+ .section ".head.text.\name\()","ax",@progbits
+.endm
+
+/*
+ * Fixed (location) sections are used by opening fixed sections and emitting
+ * fixed section entries into them before closing them. Multiple fixed sections
+ * can be open at any time.
+ *
+ * Each fixed section created in a .S file must have corresponding linkage
+ * directives including location, added to arch/powerpc/kernel/vmlinux.lds.S
+ *
+ * For each fixed section, code is generated into it in the order which it
+ * appears in the source. Fixed section entries can be placed at a fixed
+ * location within the section using _LOCATION postifx variants. These must
+ * be ordered according to their relative placements within the section.
+ *
+ * OPEN_FIXED_SECTION(section_name, start_address, end_address)
+ * FIXED_SECTION_ENTRY_BEGIN(section_name, label1)
+ *
+ * USE_FIXED_SECTION(section_name)
+ * label3:
+ * li r10,128
+ * mv r11,r10
+
+ * FIXED_SECTION_ENTRY_BEGIN_LOCATION(section_name, label2, start_address, size)
+ * FIXED_SECTION_ENTRY_END_LOCATION(section_name, label2, start_address, size)
+ * CLOSE_FIXED_SECTION(section_name)
+ *
+ * ZERO_FIXED_SECTION can be used to emit zeroed data.
+ *
+ * Troubleshooting:
+ * - If the build dies with "Error: attempt to move .org backwards" at
+ * CLOSE_FIXED_SECTION() or elsewhere, there may be something
+ * unexpected being added there. Remove the '. = x_len' line, rebuild, and
+ * check what is pushing the section down.
+ * - If the build dies in linking, check arch/powerpc/tools/head_check.sh
+ * comments.
+ * - If the kernel crashes or hangs in very early boot, it could be linker
+ * stubs at the start of the main text.
+ */
+
+#define OPEN_FIXED_SECTION(sname, start, end) \
+ sname##_start = (start); \
+ sname##_end = (end); \
+ sname##_len = (end) - (start); \
+ define_ftsec sname; \
+ . = 0x0; \
+start_##sname:
+
+/*
+ * .linker_stub_catch section is used to catch linker stubs from being
+ * inserted in our .text section, above the start_text label (which breaks
+ * the ABS_ADDR calculation). See kernel/vmlinux.lds.S and tools/head_check.sh
+ * for more details. We would prefer to just keep a cacheline (0x80), but
+ * 0x100 seems to be how the linker aligns branch stub groups.
+ */
+#ifdef CONFIG_LD_HEAD_STUB_CATCH
+#define OPEN_TEXT_SECTION(start) \
+ .section ".linker_stub_catch","ax",@progbits; \
+linker_stub_catch: \
+ . = 0x4; \
+ text_start = (start) + 0x100; \
+ .section ".text","ax",@progbits; \
+ .balign 0x100; \
+start_text:
+#else
+#define OPEN_TEXT_SECTION(start) \
+ text_start = (start); \
+ .section ".text","ax",@progbits; \
+ . = 0x0; \
+start_text:
+#endif
+
+#define ZERO_FIXED_SECTION(sname, start, end) \
+ sname##_start = (start); \
+ sname##_end = (end); \
+ sname##_len = (end) - (start); \
+ define_data_ftsec sname; \
+ . = 0x0; \
+ . = sname##_len;
+
+#define USE_FIXED_SECTION(sname) \
+ use_ftsec sname;
+
+#define USE_TEXT_SECTION() \
+ .text
+
+#define CLOSE_FIXED_SECTION(sname) \
+ USE_FIXED_SECTION(sname); \
+ . = sname##_len; \
+end_##sname:
+
+
+#define __FIXED_SECTION_ENTRY_BEGIN(sname, name, __align) \
+ USE_FIXED_SECTION(sname); \
+ .balign __align; \
+ .global name; \
+name:
+
+#define FIXED_SECTION_ENTRY_BEGIN(sname, name) \
+ __FIXED_SECTION_ENTRY_BEGIN(sname, name, IFETCH_ALIGN_BYTES)
+
+#define FIXED_SECTION_ENTRY_BEGIN_LOCATION(sname, name, start, size) \
+ USE_FIXED_SECTION(sname); \
+ name##_start = (start); \
+ .if ((start) % (size) != 0); \
+ .error "Fixed section exception vector misalignment"; \
+ .endif; \
+ .if ((size) != 0x20) && ((size) != 0x80) && ((size) != 0x100) && ((size) != 0x1000); \
+ .error "Fixed section exception vector bad size"; \
+ .endif; \
+ .if (start) < sname##_start; \
+ .error "Fixed section underflow"; \
+ .abort; \
+ .endif; \
+ . = (start) - sname##_start; \
+ .global name; \
+name:
+
+#define FIXED_SECTION_ENTRY_END_LOCATION(sname, name, start, size) \
+ .if (start) + (size) > sname##_end; \
+ .error "Fixed section overflow"; \
+ .abort; \
+ .endif; \
+ .if (. - name > (start) + (size) - name##_start); \
+ .error "Fixed entry overflow"; \
+ .abort; \
+ .endif; \
+ . = ((start) + (size) - sname##_start); \
+
+
+/*
+ * These macros are used to change symbols in other fixed sections to be
+ * absolute or related to our current fixed section.
+ *
+ * - DEFINE_FIXED_SYMBOL / FIXED_SYMBOL_ABS_ADDR is used to find the
+ * absolute address of a symbol within a fixed section, from any section.
+ *
+ * - ABS_ADDR is used to find the absolute address of any symbol, from within
+ * a fixed section.
+ */
+// define label as being _in_ sname
+#define DEFINE_FIXED_SYMBOL(label, sname) \
+ label##_absolute = (label - start_ ## sname + sname ## _start)
+
+#define FIXED_SYMBOL_ABS_ADDR(label) \
+ (label##_absolute)
+
+// find label from _within_ sname
+#define ABS_ADDR(label, sname) (label - start_ ## sname + sname ## _start)
+
+#endif /* __ASSEMBLER__ */
+
+#endif /* _ASM_POWERPC_HEAD_64_H */
diff --git a/arch/powerpc/include/asm/heathrow.h b/arch/powerpc/include/asm/heathrow.h
index 93f54958a9d1..8bc5b168762e 100644
--- a/arch/powerpc/include/asm/heathrow.h
+++ b/arch/powerpc/include/asm/heathrow.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_HEATHROW_H
#define _ASM_POWERPC_HEATHROW_H
#ifdef __KERNEL__
diff --git a/arch/powerpc/include/asm/highmem.h b/arch/powerpc/include/asm/highmem.h
index caaf6e00630d..c0fcd1bbdba9 100644
--- a/arch/powerpc/include/asm/highmem.h
+++ b/arch/powerpc/include/asm/highmem.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* highmem.h: virtual kernel memory mappings for high memory
*
@@ -23,13 +24,10 @@
#ifdef __KERNEL__
#include <linux/interrupt.h>
-#include <asm/kmap_types.h>
-#include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
#include <asm/page.h>
#include <asm/fixmap.h>
-extern pte_t *kmap_pte;
-extern pgprot_t kmap_prot;
extern pte_t *pkmap_page_table;
/*
@@ -58,48 +56,15 @@ extern pte_t *pkmap_page_table;
#define PKMAP_NR(virt) ((virt-PKMAP_BASE) >> PAGE_SHIFT)
#define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT))
-extern void *kmap_high(struct page *page);
-extern void kunmap_high(struct page *page);
-extern void *kmap_atomic_prot(struct page *page, pgprot_t prot);
-extern void __kunmap_atomic(void *kvaddr);
-
-static inline void *kmap(struct page *page)
-{
- might_sleep();
- if (!PageHighMem(page))
- return page_address(page);
- return kmap_high(page);
-}
-
-static inline void kunmap(struct page *page)
-{
- BUG_ON(in_interrupt());
- if (!PageHighMem(page))
- return;
- kunmap_high(page);
-}
-
-static inline void *kmap_atomic(struct page *page)
-{
- return kmap_atomic_prot(page, kmap_prot);
-}
-
-static inline struct page *kmap_atomic_to_page(void *ptr)
-{
- unsigned long idx, vaddr = (unsigned long) ptr;
- pte_t *pte;
-
- if (vaddr < FIXADDR_START)
- return virt_to_page(ptr);
-
- idx = virt_to_fix(vaddr);
- pte = kmap_pte - (idx - FIX_KMAP_BEGIN);
- return pte_page(*pte);
-}
-
-
#define flush_cache_kmaps() flush_cache_all()
+#define arch_kmap_local_set_pte(mm, vaddr, ptep, ptev) \
+ __set_pte_at(mm, vaddr, ptep, ptev, 1)
+#define arch_kmap_local_post_map(vaddr, pteval) \
+ local_flush_tlb_page(NULL, vaddr)
+#define arch_kmap_local_post_unmap(vaddr) \
+ local_flush_tlb_page(NULL, vaddr)
+
#endif /* __KERNEL__ */
#endif /* _ASM_HIGHMEM_H */
diff --git a/arch/powerpc/include/asm/hmi.h b/arch/powerpc/include/asm/hmi.h
new file mode 100644
index 000000000000..155748460c5d
--- /dev/null
+++ b/arch/powerpc/include/asm/hmi.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Hypervisor Maintenance Interrupt header file.
+ *
+ * Copyright 2015 IBM Corporation
+ * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+ */
+
+#ifndef __ASM_PPC64_HMI_H__
+#define __ASM_PPC64_HMI_H__
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+
+#define CORE_TB_RESYNC_REQ_BIT 63
+#define MAX_SUBCORE_PER_CORE 4
+
+/*
+ * sibling_subcore_state structure is used to co-ordinate all threads
+ * during HMI to avoid TB corruption. This structure is allocated once
+ * per each core and shared by all threads on that core.
+ */
+struct sibling_subcore_state {
+ unsigned long flags;
+ u8 in_guest[MAX_SUBCORE_PER_CORE];
+};
+
+extern void wait_for_subcore_guest_exit(void);
+extern void wait_for_tb_resync(void);
+#else
+static inline void wait_for_subcore_guest_exit(void) { }
+static inline void wait_for_tb_resync(void) { }
+#endif
+
+struct pt_regs;
+extern long hmi_handle_debugtrig(struct pt_regs *regs);
+
+#endif /* __ASM_PPC64_HMI_H__ */
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index 623f2971ce0e..86326587e58d 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -1,190 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_HUGETLB_H
#define _ASM_POWERPC_HUGETLB_H
#ifdef CONFIG_HUGETLB_PAGE
#include <asm/page.h>
-#include <asm-generic/hugetlb.h>
-
-extern struct kmem_cache *hugepte_cache;
#ifdef CONFIG_PPC_BOOK3S_64
-/*
- * This should work for other subarchs too. But right now we use the
- * new format only for 64bit book3s
- */
-static inline pte_t *hugepd_page(hugepd_t hpd)
-{
- BUG_ON(!hugepd_ok(hpd));
- /*
- * We have only four bits to encode, MMU page size
- */
- BUILD_BUG_ON((MMU_PAGE_COUNT - 1) > 0xf);
- return (pte_t *)(hpd.pd & ~HUGEPD_SHIFT_MASK);
-}
-
-static inline unsigned int hugepd_mmu_psize(hugepd_t hpd)
-{
- return (hpd.pd & HUGEPD_SHIFT_MASK) >> 2;
-}
-
-static inline unsigned int hugepd_shift(hugepd_t hpd)
-{
- return mmu_psize_to_shift(hugepd_mmu_psize(hpd));
-}
-
-#else
-
-static inline pte_t *hugepd_page(hugepd_t hpd)
-{
- BUG_ON(!hugepd_ok(hpd));
- return (pte_t *)((hpd.pd & ~HUGEPD_SHIFT_MASK) | PD_HUGE);
-}
-
-static inline unsigned int hugepd_shift(hugepd_t hpd)
-{
- return hpd.pd & HUGEPD_SHIFT_MASK;
-}
-
+#include <asm/book3s/64/hugetlb.h>
+#elif defined(CONFIG_PPC_E500)
+#include <asm/nohash/hugetlb-e500.h>
+#elif defined(CONFIG_PPC_8xx)
+#include <asm/nohash/32/hugetlb-8xx.h>
#endif /* CONFIG_PPC_BOOK3S_64 */
+extern bool hugetlb_disabled;
-static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr,
- unsigned pdshift)
+static inline bool hugepages_supported(void)
{
- /*
- * On FSL BookE, we have multiple higher-level table entries that
- * point to the same hugepte. Just use the first one since they're all
- * identical. So for that case, idx=0.
- */
- unsigned long idx = 0;
-
- pte_t *dir = hugepd_page(*hpdp);
-#ifndef CONFIG_PPC_FSL_BOOK3E
- idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(*hpdp);
-#endif
-
- return dir + idx;
-}
+ if (hugetlb_disabled)
+ return false;
-pte_t *huge_pte_offset_and_shift(struct mm_struct *mm,
- unsigned long addr, unsigned *shift);
+ return HPAGE_SHIFT != 0;
+}
+#define hugepages_supported hugepages_supported
-void flush_dcache_icache_hugepage(struct page *page);
+void __init hugetlbpage_init_defaultsize(void);
-#if defined(CONFIG_PPC_MM_SLICES) || defined(CONFIG_PPC_SUBPAGE_PROT)
-int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
+int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
unsigned long len);
-#else
+
static inline int is_hugepage_only_range(struct mm_struct *mm,
unsigned long addr,
unsigned long len)
{
+ if (IS_ENABLED(CONFIG_PPC_64S_HASH_MMU) && !radix_enabled())
+ return slice_is_hugepage_only_range(mm, addr, len);
return 0;
}
-#endif
-
-void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea,
- pte_t pte);
-void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
-
-void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
- unsigned long end, unsigned long floor,
- unsigned long ceiling);
-
-/*
- * The version of vma_mmu_pagesize() in arch/powerpc/mm/hugetlbpage.c needs
- * to override the version in mm/hugetlb.c
- */
-#define vma_mmu_pagesize vma_mmu_pagesize
-
-/*
- * If the arch doesn't supply something else, assume that hugepage
- * size aligned regions are ok without further preparation.
- */
-static inline int prepare_hugepage_range(struct file *file,
- unsigned long addr, unsigned long len)
-{
- struct hstate *h = hstate_file(file);
- if (len & ~huge_page_mask(h))
- return -EINVAL;
- if (addr & ~huge_page_mask(h))
- return -EINVAL;
- return 0;
-}
-
-static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm)
-{
-}
+#define is_hugepage_only_range is_hugepage_only_range
+#define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT
+void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
+ pte_t pte, unsigned long sz);
-static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pte)
-{
- set_pte_at(mm, addr, ptep, pte);
-}
-
+#define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep)
+ unsigned long addr, pte_t *ptep,
+ unsigned long sz)
{
-#ifdef CONFIG_PPC64
return __pte(pte_update(mm, addr, ptep, ~0UL, 0, 1));
-#else
- return __pte(pte_update(ptep, ~0UL, 0));
-#endif
}
-static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
- unsigned long addr, pte_t *ptep)
+#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH
+static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep)
{
pte_t pte;
- pte = huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
- flush_tlb_page(vma, addr);
-}
-
-static inline int huge_pte_none(pte_t pte)
-{
- return pte_none(pte);
-}
-
-static inline pte_t huge_pte_wrprotect(pte_t pte)
-{
- return pte_wrprotect(pte);
-}
-
-static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma,
- unsigned long addr, pte_t *ptep,
- pte_t pte, int dirty)
-{
-#ifdef HUGETLB_NEED_PRELOAD
- /*
- * The "return 1" forces a call of update_mmu_cache, which will write a
- * TLB entry. Without this, platforms that don't do a write of the TLB
- * entry in the TLB miss handler asm will fault ad infinitum.
- */
- ptep_set_access_flags(vma, addr, ptep, pte, dirty);
- return 1;
-#else
- return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
-#endif
-}
+ unsigned long sz = huge_page_size(hstate_vma(vma));
-static inline pte_t huge_ptep_get(pte_t *ptep)
-{
- return *ptep;
+ pte = huge_ptep_get_and_clear(vma->vm_mm, addr, ptep, sz);
+ flush_hugetlb_page(vma, addr);
+ return pte;
}
-static inline int arch_prepare_hugepage(struct page *page)
-{
- return 0;
-}
-
-static inline void arch_release_hugepage(struct page *page)
-{
-}
+#define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS
+int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t pte, int dirty);
-static inline void arch_clear_hugepage_flags(struct page *page)
-{
-}
+void gigantic_hugetlb_cma_reserve(void) __init;
+#include <asm-generic/hugetlb.h>
#else /* ! CONFIG_HUGETLB_PAGE */
static inline void flush_hugetlb_page(struct vm_area_struct *vma,
@@ -192,25 +77,13 @@ static inline void flush_hugetlb_page(struct vm_area_struct *vma,
{
}
-#define hugepd_shift(x) 0
-static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr,
- unsigned pdshift)
+static inline void __init gigantic_hugetlb_cma_reserve(void)
{
- return 0;
}
-#endif /* CONFIG_HUGETLB_PAGE */
-/*
- * FSL Book3E platforms require special gpage handling - the gpages
- * are reserved early in the boot process by memblock instead of via
- * the .dts as on IBM platforms.
- */
-#if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_PPC_FSL_BOOK3E)
-extern void __init reserve_hugetlb_gpages(void);
-#else
-static inline void reserve_hugetlb_gpages(void)
+static inline void __init hugetlbpage_init_defaultsize(void)
{
}
-#endif
+#endif /* CONFIG_HUGETLB_PAGE */
#endif /* _ASM_POWERPC_HUGETLB_H */
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index 85bc8c0d257b..9aef16149d92 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_HVCALL_H
#define _ASM_POWERPC_HVCALL_H
#ifdef __KERNEL__
@@ -78,6 +79,7 @@
#define H_NOT_ENOUGH_RESOURCES -44
#define H_R_STATE -45
#define H_RESCINDED -46
+#define H_ABORTED -54
#define H_P2 -55
#define H_P3 -56
#define H_P4 -57
@@ -86,7 +88,9 @@
#define H_P7 -60
#define H_P8 -61
#define H_P9 -62
+#define H_NOOP -63
#define H_TOO_BIG -64
+#define H_UNSUPPORTED -67
#define H_OVERLAP -68
#define H_INTERRUPT -69
#define H_BAD_DATA -70
@@ -94,6 +98,20 @@
#define H_SG_LIST -72
#define H_OP_MODE -73
#define H_COP_HW -74
+#define H_STATE -75
+#define H_IN_USE -77
+
+#define H_INVALID_ELEMENT_ID -79
+#define H_INVALID_ELEMENT_SIZE -80
+#define H_INVALID_ELEMENT_VALUE -81
+#define H_INPUT_BUFFER_NOT_DEFINED -82
+#define H_INPUT_BUFFER_TOO_SMALL -83
+#define H_OUTPUT_BUFFER_NOT_DEFINED -84
+#define H_OUTPUT_BUFFER_TOO_SMALL -85
+#define H_PARTITION_PAGE_TABLE_NOT_DEFINED -86
+#define H_GUEST_VCPU_STATE_NOT_HV_OWNED -87
+
+
#define H_UNSUPPORTED_FLAG_START -256
#define H_UNSUPPORTED_FLAG_END -511
#define H_MULTI_THREADS_ACTIVE -9005
@@ -152,6 +170,14 @@
#define H_VASI_RESUMED 5
#define H_VASI_COMPLETED 6
+/* VASI signal codes. Only the Cancel code is valid for H_VASI_SIGNAL. */
+#define H_VASI_SIGNAL_CANCEL 1
+#define H_VASI_SIGNAL_ABORT 2
+#define H_VASI_SIGNAL_SUSPEND 3
+#define H_VASI_SIGNAL_COMPLETE 4
+#define H_VASI_SIGNAL_ENABLE 5
+#define H_VASI_SIGNAL_FAILOVER 6
+
/* Each control block has to be on a 4K boundary */
#define H_CB_ALIGNMENT 4096
@@ -234,15 +260,17 @@
#define H_CREATE_RPT 0x1A4
#define H_REMOVE_RPT 0x1A8
#define H_REGISTER_RPAGES 0x1AC
-#define H_DISABLE_AND_GETC 0x1B0
+#define H_DISABLE_AND_GET 0x1B0
#define H_ERROR_DATA 0x1B4
#define H_GET_HCA_INFO 0x1B8
#define H_GET_PERF_COUNT 0x1BC
#define H_MANAGE_TRACE 0x1C0
+#define H_GET_CPU_CHARACTERISTICS 0x1C8
#define H_FREE_LOGICAL_LAN_BUFFER 0x1D4
#define H_QUERY_INT_STATE 0x1E4
#define H_POLL_PENDING 0x1D8
#define H_ILLAN_ATTRIBUTES 0x244
+#define H_ADD_LOGICAL_LAN_BUFFERS 0x248
#define H_MODIFY_HEA_QP 0x250
#define H_QUERY_HEA_QP 0x254
#define H_QUERY_HEA 0x258
@@ -257,35 +285,257 @@
#define H_ADD_CONN 0x284
#define H_DEL_CONN 0x288
#define H_JOIN 0x298
+#define H_VASI_SIGNAL 0x2A0
#define H_VASI_STATE 0x2A4
+#define H_VIOCTL 0x2A8
#define H_ENABLE_CRQ 0x2B0
#define H_GET_EM_PARMS 0x2B8
#define H_SET_MPP 0x2D0
#define H_GET_MPP 0x2D4
+#define H_REG_SUB_CRQ 0x2DC
#define H_HOME_NODE_ASSOCIATIVITY 0x2EC
+#define H_FREE_SUB_CRQ 0x2E0
+#define H_SEND_SUB_CRQ 0x2E4
+#define H_SEND_SUB_CRQ_INDIRECT 0x2E8
#define H_BEST_ENERGY 0x2F4
#define H_XIRR_X 0x2FC
#define H_RANDOM 0x300
#define H_COP 0x304
#define H_GET_MPP_X 0x314
#define H_SET_MODE 0x31C
-#define MAX_HCALL_OPCODE H_SET_MODE
+#define H_BLOCK_REMOVE 0x328
+#define H_CLEAR_HPT 0x358
+#define H_REQUEST_VMC 0x360
+#define H_RESIZE_HPT_PREPARE 0x36C
+#define H_RESIZE_HPT_COMMIT 0x370
+#define H_REGISTER_PROC_TBL 0x37C
+#define H_SIGNAL_SYS_RESET 0x380
+#define H_ALLOCATE_VAS_WINDOW 0x388
+#define H_MODIFY_VAS_WINDOW 0x38C
+#define H_DEALLOCATE_VAS_WINDOW 0x390
+#define H_QUERY_VAS_WINDOW 0x394
+#define H_QUERY_VAS_CAPABILITIES 0x398
+#define H_QUERY_NX_CAPABILITIES 0x39C
+#define H_GET_NX_FAULT 0x3A0
+#define H_INT_GET_SOURCE_INFO 0x3A8
+#define H_INT_SET_SOURCE_CONFIG 0x3AC
+#define H_INT_GET_SOURCE_CONFIG 0x3B0
+#define H_INT_GET_QUEUE_INFO 0x3B4
+#define H_INT_SET_QUEUE_CONFIG 0x3B8
+#define H_INT_GET_QUEUE_CONFIG 0x3BC
+#define H_INT_SET_OS_REPORTING_LINE 0x3C0
+#define H_INT_GET_OS_REPORTING_LINE 0x3C4
+#define H_INT_ESB 0x3C8
+#define H_INT_SYNC 0x3CC
+#define H_INT_RESET 0x3D0
+#define H_SCM_READ_METADATA 0x3E4
+#define H_SCM_WRITE_METADATA 0x3E8
+#define H_SCM_BIND_MEM 0x3EC
+#define H_SCM_UNBIND_MEM 0x3F0
+#define H_SCM_QUERY_BLOCK_MEM_BINDING 0x3F4
+#define H_SCM_QUERY_LOGICAL_MEM_BINDING 0x3F8
+#define H_SCM_UNBIND_ALL 0x3FC
+#define H_SCM_HEALTH 0x400
+#define H_SCM_PERFORMANCE_STATS 0x418
+#define H_PKS_GET_CONFIG 0x41C
+#define H_PKS_SET_PASSWORD 0x420
+#define H_PKS_GEN_PASSWORD 0x424
+#define H_PKS_WRITE_OBJECT 0x42C
+#define H_PKS_GEN_KEY 0x430
+#define H_PKS_READ_OBJECT 0x434
+#define H_PKS_REMOVE_OBJECT 0x438
+#define H_PKS_CONFIRM_OBJECT_FLUSHED 0x43C
+#define H_RPT_INVALIDATE 0x448
+#define H_SCM_FLUSH 0x44C
+#define H_GET_ENERGY_SCALE_INFO 0x450
+#define H_PKS_SIGNED_UPDATE 0x454
+#define H_HTM 0x458
+#define H_WATCHDOG 0x45C
+#define H_GUEST_GET_CAPABILITIES 0x460
+#define H_GUEST_SET_CAPABILITIES 0x464
+#define H_GUEST_CREATE 0x470
+#define H_GUEST_CREATE_VCPU 0x474
+#define H_GUEST_GET_STATE 0x478
+#define H_GUEST_SET_STATE 0x47C
+#define H_GUEST_RUN_VCPU 0x480
+#define H_GUEST_COPY_MEMORY 0x484
+#define H_GUEST_DELETE 0x488
+#define MAX_HCALL_OPCODE H_GUEST_DELETE
+
+/* Scope args for H_SCM_UNBIND_ALL */
+#define H_UNBIND_SCOPE_ALL (0x1)
+#define H_UNBIND_SCOPE_DRC (0x2)
+
+/* H_VIOCTL functions */
+#define H_GET_VIOA_DUMP_SIZE 0x01
+#define H_GET_VIOA_DUMP 0x02
+#define H_GET_ILLAN_NUM_VLAN_IDS 0x03
+#define H_GET_ILLAN_VLAN_ID_LIST 0x04
+#define H_GET_ILLAN_SWITCH_ID 0x05
+#define H_DISABLE_MIGRATION 0x06
+#define H_ENABLE_MIGRATION 0x07
+#define H_GET_PARTNER_INFO 0x08
+#define H_GET_PARTNER_WWPN_LIST 0x09
+#define H_DISABLE_ALL_VIO_INTS 0x0A
+#define H_DISABLE_VIO_INTERRUPT 0x0B
+#define H_ENABLE_VIO_INTERRUPT 0x0C
+#define H_GET_SESSION_TOKEN 0x19
+#define H_SESSION_ERR_DETECTED 0x1A
+
/* Platform specific hcalls, used by KVM */
#define H_RTAS 0xf000
+/*
+ * Platform specific hcalls, used by QEMU/SLOF. These are ignored by
+ * KVM and only kept here so we can identify them during tracing.
+ */
+#define H_LOGICAL_MEMOP 0xF001
+#define H_CAS 0XF002
+#define H_UPDATE_DT 0XF003
+
/* "Platform specific hcalls", provided by PHYP */
#define H_GET_24X7_CATALOG_PAGE 0xF078
#define H_GET_24X7_DATA 0xF07C
#define H_GET_PERF_COUNTER_INFO 0xF080
+/* Platform-specific hcalls used for nested HV KVM */
+#define H_SET_PARTITION_TABLE 0xF800
+#define H_ENTER_NESTED 0xF804
+#define H_TLB_INVALIDATE 0xF808
+#define H_COPY_TOFROM_GUEST 0xF80C
+
+/* Flags for H_SVM_PAGE_IN */
+#define H_PAGE_IN_SHARED 0x1
+
+/* Platform-specific hcalls used by the Ultravisor */
+#define H_SVM_PAGE_IN 0xEF00
+#define H_SVM_PAGE_OUT 0xEF04
+#define H_SVM_INIT_START 0xEF08
+#define H_SVM_INIT_DONE 0xEF0C
+#define H_SVM_INIT_ABORT 0xEF14
+
/* Values for 2nd argument to H_SET_MODE */
#define H_SET_MODE_RESOURCE_SET_CIABR 1
-#define H_SET_MODE_RESOURCE_SET_DAWR 2
+#define H_SET_MODE_RESOURCE_SET_DAWR0 2
#define H_SET_MODE_RESOURCE_ADDR_TRANS_MODE 3
#define H_SET_MODE_RESOURCE_LE 4
+#define H_SET_MODE_RESOURCE_SET_DAWR1 5
+
+/* Values for argument to H_SIGNAL_SYS_RESET */
+#define H_SIGNAL_SYS_RESET_ALL -1
+#define H_SIGNAL_SYS_RESET_ALL_OTHERS -2
+/* >= 0 values are CPU number */
+
+/* H_GET_CPU_CHARACTERISTICS return values */
+#define H_CPU_CHAR_SPEC_BAR_ORI31 (1ull << 63) // IBM bit 0
+#define H_CPU_CHAR_BCCTRL_SERIALISED (1ull << 62) // IBM bit 1
+#define H_CPU_CHAR_L1D_FLUSH_ORI30 (1ull << 61) // IBM bit 2
+#define H_CPU_CHAR_L1D_FLUSH_TRIG2 (1ull << 60) // IBM bit 3
+#define H_CPU_CHAR_L1D_THREAD_PRIV (1ull << 59) // IBM bit 4
+#define H_CPU_CHAR_BRANCH_HINTS_HONORED (1ull << 58) // IBM bit 5
+#define H_CPU_CHAR_THREAD_RECONFIG_CTRL (1ull << 57) // IBM bit 6
+#define H_CPU_CHAR_COUNT_CACHE_DISABLED (1ull << 56) // IBM bit 7
+#define H_CPU_CHAR_BCCTR_FLUSH_ASSIST (1ull << 54) // IBM bit 9
+#define H_CPU_CHAR_BCCTR_LINK_FLUSH_ASSIST (1ull << 52) // IBM bit 11
+
+#define H_CPU_BEHAV_FAVOUR_SECURITY (1ull << 63) // IBM bit 0
+#define H_CPU_BEHAV_L1D_FLUSH_PR (1ull << 62) // IBM bit 1
+#define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ull << 61) // IBM bit 2
+#define H_CPU_BEHAV_FAVOUR_SECURITY_H (1ull << 60) // IBM bit 3
+#define H_CPU_BEHAV_FLUSH_COUNT_CACHE (1ull << 58) // IBM bit 5
+#define H_CPU_BEHAV_FLUSH_LINK_STACK (1ull << 57) // IBM bit 6
+#define H_CPU_BEHAV_NO_L1D_FLUSH_ENTRY (1ull << 56) // IBM bit 7
+#define H_CPU_BEHAV_NO_L1D_FLUSH_UACCESS (1ull << 55) // IBM bit 8
+#define H_CPU_BEHAV_NO_STF_BARRIER (1ull << 54) // IBM bit 9
+
+/* Flag values used in H_REGISTER_PROC_TBL hcall */
+#define PROC_TABLE_OP_MASK 0x18
+#define PROC_TABLE_DEREG 0x10
+#define PROC_TABLE_NEW 0x18
+#define PROC_TABLE_TYPE_MASK 0x06
+#define PROC_TABLE_HPT_SLB 0x00
+#define PROC_TABLE_HPT_PT 0x02
+#define PROC_TABLE_RADIX 0x04
+#define PROC_TABLE_GTSE 0x01
+
+/*
+ * Defines for
+ * H_RPT_INVALIDATE - Invalidate RPT translation lookaside information.
+ */
-#ifndef __ASSEMBLY__
+/* Type of translation to invalidate (type) */
+#define H_RPTI_TYPE_NESTED 0x0001 /* Invalidate nested guest partition-scope */
+#define H_RPTI_TYPE_TLB 0x0002 /* Invalidate TLB */
+#define H_RPTI_TYPE_PWC 0x0004 /* Invalidate Page Walk Cache */
+/* Invalidate caching of Process Table Entries if H_RPTI_TYPE_NESTED is clear */
+#define H_RPTI_TYPE_PRT 0x0008
+/* Invalidate caching of Partition Table Entries if H_RPTI_TYPE_NESTED is set */
+#define H_RPTI_TYPE_PAT 0x0008
+#define H_RPTI_TYPE_ALL (H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | \
+ H_RPTI_TYPE_PRT)
+#define H_RPTI_TYPE_NESTED_ALL (H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | \
+ H_RPTI_TYPE_PAT)
+
+/* Invalidation targets (target) */
+#define H_RPTI_TARGET_CMMU 0x01 /* All virtual processors in the partition */
+#define H_RPTI_TARGET_CMMU_LOCAL 0x02 /* Current virtual processor */
+/* All nest/accelerator agents in use by the partition */
+#define H_RPTI_TARGET_NMMU 0x04
+
+/* Page size mask (page sizes) */
+#define H_RPTI_PAGE_4K 0x01
+#define H_RPTI_PAGE_64K 0x02
+#define H_RPTI_PAGE_2M 0x04
+#define H_RPTI_PAGE_1G 0x08
+#define H_RPTI_PAGE_ALL (-1UL)
+
+/* Flags for H_GUEST_{S,G}_STATE */
+#define H_GUEST_FLAGS_WIDE (1UL << (63 - 0))
+#define H_GUEST_FLAGS_HOST_WIDE (1UL << (63 - 1))
+
+/* Flag values used for H_{S,G}SET_GUEST_CAPABILITIES */
+#define H_GUEST_CAP_COPY_MEM (1UL << (63 - 0))
+#define H_GUEST_CAP_POWER9 (1UL << (63 - 1))
+#define H_GUEST_CAP_POWER10 (1UL << (63 - 2))
+#define H_GUEST_CAP_POWER11 (1UL << (63 - 3))
+#define H_GUEST_CAP_BITMAP2 (1UL << (63 - 63))
+
+/*
+ * Defines for H_HTM - Macros for hardware trace macro (HTM) function.
+ */
+#define H_HTM_FLAGS_HARDWARE_TARGET (1ul << 63)
+#define H_HTM_FLAGS_LOGICAL_TARGET (1ul << 62)
+#define H_HTM_FLAGS_PROCID_TARGET (1ul << 61)
+#define H_HTM_FLAGS_NOWRAP (1ul << 60)
+
+#define H_HTM_OP_SHIFT (63-15)
+#define H_HTM_OP(x) ((unsigned long)(x)<<H_HTM_OP_SHIFT)
+#define H_HTM_OP_CAPABILITIES 0x01
+#define H_HTM_OP_STATUS 0x02
+#define H_HTM_OP_SETUP 0x03
+#define H_HTM_OP_CONFIGURE 0x04
+#define H_HTM_OP_START 0x05
+#define H_HTM_OP_STOP 0x06
+#define H_HTM_OP_DECONFIGURE 0x07
+#define H_HTM_OP_DUMP_DETAILS 0x08
+#define H_HTM_OP_DUMP_DATA 0x09
+#define H_HTM_OP_DUMP_SYSMEM_CONF 0x0a
+#define H_HTM_OP_DUMP_SYSPROC_CONF 0x0b
+
+#define H_HTM_TYPE_SHIFT (63-31)
+#define H_HTM_TYPE(x) ((unsigned long)(x)<<H_HTM_TYPE_SHIFT)
+#define H_HTM_TYPE_NEST 0x01
+#define H_HTM_TYPE_CORE 0x02
+#define H_HTM_TYPE_LLAT 0x03
+#define H_HTM_TYPE_GLOBAL 0xff
+
+#define H_HTM_TARGET_NODE_INDEX(x) ((unsigned long)(x)<<(63-15))
+#define H_HTM_TARGET_NODAL_CHIP_INDEX(x) ((unsigned long)(x)<<(63-31))
+#define H_HTM_TARGET_CORE_INDEX_ON_CHIP(x) ((unsigned long)(x)<<(63-47))
+
+#ifndef __ASSEMBLER__
+#include <linux/types.h>
/**
* plpar_hcall_norets: - Make a pseries hypervisor call with no return arguments
@@ -297,6 +547,9 @@
*/
long plpar_hcall_norets(unsigned long opcode, ...);
+/* Variant which does not do hcall tracing */
+long plpar_hcall_norets_notrace(unsigned long opcode, ...);
+
/**
* plpar_hcall: - Make a pseries hypervisor call
* @opcode: The hypervisor call to make.
@@ -308,7 +561,7 @@ long plpar_hcall_norets(unsigned long opcode, ...);
* Used for all but the craziest of phyp interfaces (see plpar_hcall9)
*/
#define PLPAR_HCALL_BUFSIZE 4
-long plpar_hcall(unsigned long opcode, unsigned long *retbuf, ...);
+long plpar_hcall(unsigned long opcode, unsigned long retbuf[static PLPAR_HCALL_BUFSIZE], ...);
/**
* plpar_hcall_raw: - Make a hypervisor call without calculating hcall stats
@@ -322,7 +575,7 @@ long plpar_hcall(unsigned long opcode, unsigned long *retbuf, ...);
* plpar_hcall, but plpar_hcall_raw works in real mode and does not
* calculate hypervisor call statistics.
*/
-long plpar_hcall_raw(unsigned long opcode, unsigned long *retbuf, ...);
+long plpar_hcall_raw(unsigned long opcode, unsigned long retbuf[static PLPAR_HCALL_BUFSIZE], ...);
/**
* plpar_hcall9: - Make a pseries hypervisor call with up to 9 return arguments
@@ -333,18 +586,13 @@ long plpar_hcall_raw(unsigned long opcode, unsigned long *retbuf, ...);
* PLPAR_HCALL9_BUFSIZE to size the return argument buffer.
*/
#define PLPAR_HCALL9_BUFSIZE 9
-long plpar_hcall9(unsigned long opcode, unsigned long *retbuf, ...);
-long plpar_hcall9_raw(unsigned long opcode, unsigned long *retbuf, ...);
-
-/* For hcall instrumentation. One structure per-hcall, per-CPU */
-struct hcall_stats {
- unsigned long num_calls; /* number of calls (on this CPU) */
- unsigned long tb_total; /* total wall time (mftb) of calls. */
- unsigned long purr_total; /* total cpu time (PURR) of calls. */
- unsigned long tb_start;
- unsigned long purr_start;
-};
-#define HCALL_STAT_ARRAY_SIZE ((MAX_HCALL_OPCODE >> 2) + 1)
+long plpar_hcall9(unsigned long opcode, unsigned long retbuf[static PLPAR_HCALL9_BUFSIZE], ...);
+long plpar_hcall9_raw(unsigned long opcode, unsigned long retbuf[static PLPAR_HCALL9_BUFSIZE], ...);
+
+/* pseries hcall tracing */
+extern struct static_key hcall_tracepoint_key;
+void __trace_hcall_entry(unsigned long opcode, unsigned long *args);
+void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf);
struct hvcall_mpp_data {
unsigned long entitled_mem;
@@ -359,7 +607,7 @@ struct hvcall_mpp_data {
unsigned long backing_mem;
};
-int h_get_mpp(struct hvcall_mpp_data *);
+long h_get_mpp(struct hvcall_mpp_data *mpp_data);
struct hvcall_mpp_x_data {
unsigned long coalesced_bytes;
@@ -391,38 +639,102 @@ static inline unsigned int get_longbusy_msecs(int longbusy_rc)
}
}
-#ifdef CONFIG_PPC_PSERIES
-extern int CMO_PrPSP;
-extern int CMO_SecPSP;
-extern unsigned long CMO_PageSize;
+struct h_cpu_char_result {
+ u64 character;
+ u64 behaviour;
+};
-static inline int cmo_get_primary_psp(void)
-{
- return CMO_PrPSP;
-}
+/*
+ * Register state for entering a nested guest with H_ENTER_NESTED.
+ * New member must be added at the end.
+ */
+struct hv_guest_state {
+ u64 version; /* version of this structure layout, must be first */
+ u32 lpid;
+ u32 vcpu_token;
+ /* These registers are hypervisor privileged (at least for writing) */
+ u64 lpcr;
+ u64 pcr;
+ u64 amor;
+ u64 dpdes;
+ u64 hfscr;
+ s64 tb_offset;
+ u64 dawr0;
+ u64 dawrx0;
+ u64 ciabr;
+ u64 hdec_expiry;
+ u64 purr;
+ u64 spurr;
+ u64 ic;
+ u64 vtb;
+ u64 hdar;
+ u64 hdsisr;
+ u64 heir;
+ u64 asdr;
+ /* These are OS privileged but need to be set late in guest entry */
+ u64 srr0;
+ u64 srr1;
+ u64 sprg[4];
+ u64 pidr;
+ u64 cfar;
+ u64 ppr;
+ /* Version 1 ends here */
+ u64 dawr1;
+ u64 dawrx1;
+ /* Version 2 ends here */
+};
-static inline int cmo_get_secondary_psp(void)
-{
- return CMO_SecPSP;
-}
+/* Latest version of hv_guest_state structure */
+#define HV_GUEST_STATE_VERSION 2
-static inline unsigned long cmo_get_page_size(void)
+static inline int hv_guest_state_size(unsigned int version)
{
- return CMO_PageSize;
+ switch (version) {
+ case 1:
+ return offsetofend(struct hv_guest_state, ppr);
+ case 2:
+ return offsetofend(struct hv_guest_state, dawrx1);
+ default:
+ return -1;
+ }
}
-extern long pSeries_enable_reloc_on_exc(void);
-extern long pSeries_disable_reloc_on_exc(void);
-
-extern long pseries_big_endian_exceptions(void);
-
-#else
-
-#define pSeries_enable_reloc_on_exc() do {} while (0)
-#define pSeries_disable_reloc_on_exc() do {} while (0)
-
-#endif /* CONFIG_PPC_PSERIES */
-
-#endif /* __ASSEMBLY__ */
+/*
+ * From the document "H_GetPerformanceCounterInfo Interface" v1.07
+ *
+ * H_GET_PERF_COUNTER_INFO argument
+ */
+struct hv_get_perf_counter_info_params {
+ __be32 counter_request; /* I */
+ __be32 starting_index; /* IO */
+ __be16 secondary_index; /* IO */
+ __be16 returned_values; /* O */
+ __be32 detail_rc; /* O, only needed when called via *_norets() */
+
+ /*
+ * O, size each of counter_value element in bytes, only set for version
+ * >= 0x3
+ */
+ __be16 cv_element_size;
+
+ /* I, 0 (zero) for versions < 0x3 */
+ __u8 counter_info_version_in;
+
+ /* O, 0 (zero) if version < 0x3. Must be set to 0 when making hcall */
+ __u8 counter_info_version_out;
+ __u8 reserved[0xC];
+ __u8 counter_value[];
+} __packed;
+
+#define HGPCI_REQ_BUFFER_SIZE 4096
+#define HGPCI_MAX_DATA_BYTES \
+ (HGPCI_REQ_BUFFER_SIZE - sizeof(struct hv_get_perf_counter_info_params))
+
+struct hv_gpci_request_buffer {
+ struct hv_get_perf_counter_info_params params;
+ uint8_t bytes[HGPCI_MAX_DATA_BYTES];
+} __packed;
+
+#endif /* __ASSEMBLER__ */
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_HVCALL_H */
diff --git a/arch/powerpc/include/asm/hvconsole.h b/arch/powerpc/include/asm/hvconsole.h
index 35ea69e8121f..d841a97010a0 100644
--- a/arch/powerpc/include/asm/hvconsole.h
+++ b/arch/powerpc/include/asm/hvconsole.h
@@ -1,22 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* hvconsole.h
* Copyright (C) 2004 Ryan S Arnold, IBM Corporation
*
* LPAR console support.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _PPC64_HVCONSOLE_H
@@ -34,8 +21,11 @@
* Vio firmware always attempts to fetch MAX_VIO_GET_CHARS chars. The 'count'
* parm is included to conform to put_chars() function pointer template
*/
-extern int hvc_get_chars(uint32_t vtermno, char *buf, int count);
-extern int hvc_put_chars(uint32_t vtermno, const char *buf, int count);
+extern ssize_t hvc_get_chars(uint32_t vtermno, u8 *buf, size_t count);
+extern ssize_t hvc_put_chars(uint32_t vtermno, const u8 *buf, size_t count);
+
+/* Provided by HVC VIO */
+void hvc_vio_init_early(void);
#endif /* __KERNEL__ */
#endif /* _PPC64_HVCONSOLE_H */
diff --git a/arch/powerpc/include/asm/hvcserver.h b/arch/powerpc/include/asm/hvcserver.h
index 67d7da3a4da4..2b20403e9fde 100644
--- a/arch/powerpc/include/asm/hvcserver.h
+++ b/arch/powerpc/include/asm/hvcserver.h
@@ -1,22 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* hvcserver.h
* Copyright (C) 2004 Ryan S Arnold, IBM Corporation
*
* PPC64 virtual I/O console server support.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _PPC64_HVCSERVER_H
diff --git a/arch/powerpc/include/asm/hvsi.h b/arch/powerpc/include/asm/hvsi.h
index d4a5315718ca..9058edcb632b 100644
--- a/arch/powerpc/include/asm/hvsi.h
+++ b/arch/powerpc/include/asm/hvsi.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _HVSI_H
#define _HVSI_H
@@ -63,7 +64,7 @@ struct hvsi_priv {
unsigned int inbuf_len; /* data in input buffer */
unsigned char inbuf[HVSI_INBUF_SIZE];
unsigned int inbuf_cur; /* Cursor in input buffer */
- unsigned int inbuf_pktlen; /* packet lenght from cursor */
+ size_t inbuf_pktlen; /* packet length from cursor */
atomic_t seqno; /* packet sequence number */
unsigned int opened:1; /* driver opened */
unsigned int established:1; /* protocol established */
@@ -71,24 +72,26 @@ struct hvsi_priv {
unsigned int mctrl_update:1; /* modem control updated */
unsigned short mctrl; /* modem control */
struct tty_struct *tty; /* tty structure */
- int (*get_chars)(uint32_t termno, char *buf, int count);
- int (*put_chars)(uint32_t termno, const char *buf, int count);
+ ssize_t (*get_chars)(uint32_t termno, u8 *buf, size_t count);
+ ssize_t (*put_chars)(uint32_t termno, const u8 *buf, size_t count);
uint32_t termno;
};
/* hvsi lib functions */
struct hvc_struct;
extern void hvsilib_init(struct hvsi_priv *pv,
- int (*get_chars)(uint32_t termno, char *buf, int count),
- int (*put_chars)(uint32_t termno, const char *buf,
- int count),
+ ssize_t (*get_chars)(uint32_t termno, u8 *buf,
+ size_t count),
+ ssize_t (*put_chars)(uint32_t termno, const u8 *buf,
+ size_t count),
int termno, int is_console);
extern int hvsilib_open(struct hvsi_priv *pv, struct hvc_struct *hp);
extern void hvsilib_close(struct hvsi_priv *pv, struct hvc_struct *hp);
extern int hvsilib_read_mctrl(struct hvsi_priv *pv);
extern int hvsilib_write_mctrl(struct hvsi_priv *pv, int dtr);
extern void hvsilib_establish(struct hvsi_priv *pv);
-extern int hvsilib_get_chars(struct hvsi_priv *pv, char *buf, int count);
-extern int hvsilib_put_chars(struct hvsi_priv *pv, const char *buf, int count);
+extern ssize_t hvsilib_get_chars(struct hvsi_priv *pv, u8 *buf, size_t count);
+extern ssize_t hvsilib_put_chars(struct hvsi_priv *pv, const u8 *buf,
+ size_t count);
#endif /* _HVSI_H */
diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h
index ac6432d9be46..66db0147d5b4 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -1,36 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* PowerPC BookIII S hardware breakpoint definitions
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
* Copyright 2010, IBM Corporation.
* Author: K.Prasad <prasad@linux.vnet.ibm.com>
- *
*/
#ifndef _PPC_BOOK3S_64_HW_BREAKPOINT_H
#define _PPC_BOOK3S_64_HW_BREAKPOINT_H
+#include <asm/cpu_has_feature.h>
+
#ifdef __KERNEL__
struct arch_hw_breakpoint {
unsigned long address;
u16 type;
u16 len; /* length of the target data symbol */
+ u16 hw_len; /* length programmed in hw */
+ u8 flags;
+ bool perf_single_step; /* temporarily uninstalled for a perf single step */
};
-/* Note: Don't change the the first 6 bits below as they are in the same order
+/* Note: Don't change the first 6 bits below as they are in the same order
* as the dabr and dabrx.
*/
#define HW_BRK_TYPE_READ 0x01
@@ -47,21 +38,48 @@ struct arch_hw_breakpoint {
#define HW_BRK_TYPE_PRIV_ALL (HW_BRK_TYPE_USER | HW_BRK_TYPE_KERNEL | \
HW_BRK_TYPE_HYP)
+#define HW_BRK_FLAG_DISABLED 0x1
+
+/* Minimum granularity */
+#ifdef CONFIG_PPC_8xx
+#define HW_BREAKPOINT_SIZE 0x4
+#else
+#define HW_BREAKPOINT_SIZE 0x8
+#endif
+#define HW_BREAKPOINT_SIZE_QUADWORD 0x10
+
+#define DABR_MAX_LEN 8
+#define DAWR_MAX_LEN 512
+
+static inline int nr_wp_slots(void)
+{
+ return cpu_has_feature(CPU_FTR_DAWR1) ? 2 : 1;
+}
+
+bool wp_check_constraints(struct pt_regs *regs, ppc_inst_t instr,
+ unsigned long ea, int type, int size,
+ struct arch_hw_breakpoint *info);
+
+void wp_get_instr_detail(struct pt_regs *regs, ppc_inst_t *instr,
+ int *type, int *size, unsigned long *ea);
+
#ifdef CONFIG_HAVE_HW_BREAKPOINT
#include <linux/kdebug.h>
#include <asm/reg.h>
#include <asm/debug.h>
+struct perf_event_attr;
struct perf_event;
struct pmu;
struct perf_sample_data;
-
-#define HW_BREAKPOINT_ALIGN 0x7
+struct task_struct;
extern int hw_breakpoint_slots(int type);
extern int arch_bp_generic_fields(int type, int *gen_bp_type);
-extern int arch_check_bp_in_kernelspace(struct perf_event *bp);
-extern int arch_validate_hwbkpt_settings(struct perf_event *bp);
+extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw);
+extern int hw_breakpoint_arch_parse(struct perf_event *bp,
+ const struct perf_event_attr *attr,
+ struct arch_hw_breakpoint *hw);
extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
unsigned long val, void *data);
int arch_install_hw_breakpoint(struct perf_event *bp);
@@ -74,19 +92,37 @@ extern void ptrace_triggered(struct perf_event *bp,
struct perf_sample_data *data, struct pt_regs *regs);
static inline void hw_breakpoint_disable(void)
{
- struct arch_hw_breakpoint brk;
+ int i;
+ struct arch_hw_breakpoint null_brk = {0};
- brk.address = 0;
- brk.type = 0;
- brk.len = 0;
- __set_breakpoint(&brk);
+ if (!ppc_breakpoint_available())
+ return;
+
+ for (i = 0; i < nr_wp_slots(); i++)
+ __set_breakpoint(i, &null_brk);
}
extern void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs);
+int hw_breakpoint_handler(struct die_args *args);
#else /* CONFIG_HAVE_HW_BREAKPOINT */
static inline void hw_breakpoint_disable(void) { }
static inline void thread_change_pc(struct task_struct *tsk,
struct pt_regs *regs) { }
+
#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+
+
+#ifdef CONFIG_PPC_DAWR
+extern bool dawr_force_enable;
+static inline bool dawr_enabled(void)
+{
+ return dawr_force_enable;
+}
+int set_dawr(int nr, struct arch_hw_breakpoint *brk);
+#else
+static inline bool dawr_enabled(void) { return false; }
+static inline int set_dawr(int nr, struct arch_hw_breakpoint *brk) { return -1; }
+#endif
+
#endif /* __KERNEL__ */
#endif /* _PPC_BOOK3S_64_HW_BREAKPOINT_H */
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index b59ac27a6b7d..1078ba88efaf 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
*/
@@ -17,70 +18,197 @@
* PACA flags in paca->irq_happened.
*
* This bits are set when interrupts occur while soft-disabled
- * and allow a proper replay. Additionally, PACA_IRQ_HARD_DIS
- * is set whenever we manually hard disable.
+ * and allow a proper replay.
+ *
+ * The PACA_IRQ_HARD_DIS is set whenever we hard disable. It is almost
+ * always in synch with the MSR[EE] state, except:
+ * - A window in interrupt entry, where hardware disables MSR[EE] and that
+ * must be "reconciled" with the soft mask state.
+ * - NMI interrupts that hit in awkward places, until they fix the state.
+ * - When local irqs are being enabled and state is being fixed up.
+ * - When returning from an interrupt there are some windows where this
+ * can become out of synch, but gets fixed before the RFI or before
+ * executing the next user instruction (see arch/powerpc/kernel/interrupt.c).
*/
#define PACA_IRQ_HARD_DIS 0x01
#define PACA_IRQ_DBELL 0x02
#define PACA_IRQ_EE 0x04
#define PACA_IRQ_DEC 0x08 /* Or FIT */
-#define PACA_IRQ_EE_EDGE 0x10 /* BookE only */
-#define PACA_IRQ_HMI 0x20
+#define PACA_IRQ_HMI 0x10
+#define PACA_IRQ_PMI 0x20
+#define PACA_IRQ_REPLAYING 0x40
+
+/*
+ * Some soft-masked interrupts must be hard masked until they are replayed
+ * (e.g., because the soft-masked handler does not clear the exception).
+ * Interrupt replay itself must remain hard masked too.
+ */
+#ifdef CONFIG_PPC_BOOK3S
+#define PACA_IRQ_MUST_HARD_MASK (PACA_IRQ_EE|PACA_IRQ_PMI|PACA_IRQ_REPLAYING)
+#else
+#define PACA_IRQ_MUST_HARD_MASK (PACA_IRQ_EE|PACA_IRQ_REPLAYING)
+#endif
#endif /* CONFIG_PPC64 */
-#ifndef __ASSEMBLY__
+/*
+ * flags for paca->irq_soft_mask
+ */
+#define IRQS_ENABLED 0
+#define IRQS_DISABLED 1 /* local_irq_disable() interrupts */
+#define IRQS_PMI_DISABLED 2
+#define IRQS_ALL_DISABLED (IRQS_DISABLED | IRQS_PMI_DISABLED)
+
+#ifndef __ASSEMBLER__
+
+static inline void __hard_irq_enable(void)
+{
+ if (IS_ENABLED(CONFIG_BOOKE))
+ wrtee(MSR_EE);
+ else if (IS_ENABLED(CONFIG_PPC_8xx))
+ wrtspr(SPRN_EIE);
+ else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64))
+ __mtmsrd(MSR_EE | MSR_RI, 1);
+ else
+ mtmsr(mfmsr() | MSR_EE);
+}
+
+static inline void __hard_irq_disable(void)
+{
+ if (IS_ENABLED(CONFIG_BOOKE))
+ wrtee(0);
+ else if (IS_ENABLED(CONFIG_PPC_8xx))
+ wrtspr(SPRN_EID);
+ else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64))
+ __mtmsrd(MSR_RI, 1);
+ else
+ mtmsr(mfmsr() & ~MSR_EE);
+}
-extern void __replay_interrupt(unsigned int vector);
+static inline void __hard_EE_RI_disable(void)
+{
+ if (IS_ENABLED(CONFIG_BOOKE))
+ wrtee(0);
+ else if (IS_ENABLED(CONFIG_PPC_8xx))
+ wrtspr(SPRN_NRI);
+ else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64))
+ __mtmsrd(0, 1);
+ else
+ mtmsr(mfmsr() & ~(MSR_EE | MSR_RI));
+}
-extern void timer_interrupt(struct pt_regs *);
-extern void performance_monitor_exception(struct pt_regs *regs);
-extern void WatchdogException(struct pt_regs *regs);
-extern void unknown_exception(struct pt_regs *regs);
+static inline void __hard_RI_enable(void)
+{
+ if (IS_ENABLED(CONFIG_BOOKE))
+ return;
+
+ if (IS_ENABLED(CONFIG_PPC_8xx))
+ wrtspr(SPRN_EID);
+ else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64))
+ __mtmsrd(MSR_RI, 1);
+ else
+ mtmsr(mfmsr() | MSR_RI);
+}
#ifdef CONFIG_PPC64
#include <asm/paca.h>
-static inline unsigned long arch_local_save_flags(void)
+static inline notrace unsigned long irq_soft_mask_return(void)
{
unsigned long flags;
asm volatile(
"lbz %0,%1(13)"
: "=r" (flags)
- : "i" (offsetof(struct paca_struct, soft_enabled)));
+ : "i" (offsetof(struct paca_struct, irq_soft_mask)));
return flags;
}
-static inline unsigned long arch_local_irq_disable(void)
+/*
+ * The "memory" clobber acts as both a compiler barrier
+ * for the critical section and as a clobber because
+ * we changed paca->irq_soft_mask
+ */
+static inline notrace void irq_soft_mask_set(unsigned long mask)
{
- unsigned long flags, zero;
+ /*
+ * The irq mask must always include the STD bit if any are set.
+ *
+ * and interrupts don't get replayed until the standard
+ * interrupt (local_irq_disable()) is unmasked.
+ *
+ * Other masks must only provide additional masking beyond
+ * the standard, and they are also not replayed until the
+ * standard interrupt becomes unmasked.
+ *
+ * This could be changed, but it will require partial
+ * unmasks to be replayed, among other things. For now, take
+ * the simple approach.
+ */
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+ WARN_ON(mask && !(mask & IRQS_DISABLED));
asm volatile(
- "li %1,0; lbz %0,%2(13); stb %1,%2(13)"
- : "=r" (flags), "=&r" (zero)
- : "i" (offsetof(struct paca_struct, soft_enabled))
+ "stb %0,%1(13)"
+ :
+ : "r" (mask),
+ "i" (offsetof(struct paca_struct, irq_soft_mask))
: "memory");
+}
+
+static inline notrace unsigned long irq_soft_mask_set_return(unsigned long mask)
+{
+ unsigned long flags = irq_soft_mask_return();
+
+ irq_soft_mask_set(mask);
return flags;
}
+static inline notrace unsigned long irq_soft_mask_or_return(unsigned long mask)
+{
+ unsigned long flags = irq_soft_mask_return();
+
+ irq_soft_mask_set(flags | mask);
+
+ return flags;
+}
+
+static inline notrace unsigned long irq_soft_mask_andc_return(unsigned long mask)
+{
+ unsigned long flags = irq_soft_mask_return();
+
+ irq_soft_mask_set(flags & ~mask);
+
+ return flags;
+}
+
+static inline unsigned long arch_local_save_flags(void)
+{
+ return irq_soft_mask_return();
+}
+
+static inline void arch_local_irq_disable(void)
+{
+ irq_soft_mask_set(IRQS_DISABLED);
+}
+
extern void arch_local_irq_restore(unsigned long);
static inline void arch_local_irq_enable(void)
{
- arch_local_irq_restore(1);
+ arch_local_irq_restore(IRQS_ENABLED);
}
static inline unsigned long arch_local_irq_save(void)
{
- return arch_local_irq_disable();
+ return irq_soft_mask_or_return(IRQS_DISABLED);
}
static inline bool arch_irqs_disabled_flags(unsigned long flags)
{
- return flags == 0;
+ return flags & IRQS_DISABLED;
}
static inline bool arch_irqs_disabled(void)
@@ -88,51 +216,206 @@ static inline bool arch_irqs_disabled(void)
return arch_irqs_disabled_flags(arch_local_save_flags());
}
-#ifdef CONFIG_PPC_BOOK3E
-#define __hard_irq_enable() asm volatile("wrteei 1" : : : "memory")
-#define __hard_irq_disable() asm volatile("wrteei 0" : : : "memory")
-#else
-#define __hard_irq_enable() __mtmsrd(local_paca->kernel_msr | MSR_EE, 1)
-#define __hard_irq_disable() __mtmsrd(local_paca->kernel_msr, 1)
-#endif
+static inline void set_pmi_irq_pending(void)
+{
+ /*
+ * Invoked from PMU callback functions to set PMI bit in the paca.
+ * This has to be called with irq's disabled (via hard_irq_disable()).
+ */
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+ WARN_ON_ONCE(mfmsr() & MSR_EE);
+
+ get_paca()->irq_happened |= PACA_IRQ_PMI;
+}
+
+static inline void clear_pmi_irq_pending(void)
+{
+ /*
+ * Invoked from PMU callback functions to clear the pending PMI bit
+ * in the paca.
+ */
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+ WARN_ON_ONCE(mfmsr() & MSR_EE);
+
+ get_paca()->irq_happened &= ~PACA_IRQ_PMI;
+}
-#define hard_irq_disable() do { \
- u8 _was_enabled; \
- __hard_irq_disable(); \
- _was_enabled = local_paca->soft_enabled; \
- local_paca->soft_enabled = 0; \
- local_paca->irq_happened |= PACA_IRQ_HARD_DIS; \
- if (_was_enabled) \
- trace_hardirqs_off(); \
+static inline bool pmi_irq_pending(void)
+{
+ /*
+ * Invoked from PMU callback functions to check if there is a pending
+ * PMI bit in the paca.
+ */
+ if (get_paca()->irq_happened & PACA_IRQ_PMI)
+ return true;
+
+ return false;
+}
+
+#ifdef CONFIG_PPC_BOOK3S
+/*
+ * To support disabling and enabling of irq with PMI, set of
+ * new powerpc_local_irq_pmu_save() and powerpc_local_irq_restore()
+ * functions are added. These macros are implemented using generic
+ * linux local_irq_* code from include/linux/irqflags.h.
+ */
+#define raw_local_irq_pmu_save(flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ flags = irq_soft_mask_or_return(IRQS_DISABLED | \
+ IRQS_PMI_DISABLED); \
+ } while(0)
+
+#define raw_local_irq_pmu_restore(flags) \
+ do { \
+ typecheck(unsigned long, flags); \
+ arch_local_irq_restore(flags); \
+ } while(0)
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+#define powerpc_local_irq_pmu_save(flags) \
+ do { \
+ raw_local_irq_pmu_save(flags); \
+ if (!raw_irqs_disabled_flags(flags)) \
+ trace_hardirqs_off(); \
+ } while(0)
+#define powerpc_local_irq_pmu_restore(flags) \
+ do { \
+ if (!raw_irqs_disabled_flags(flags)) \
+ trace_hardirqs_on(); \
+ raw_local_irq_pmu_restore(flags); \
+ } while(0)
+#else
+#define powerpc_local_irq_pmu_save(flags) \
+ do { \
+ raw_local_irq_pmu_save(flags); \
+ } while(0)
+#define powerpc_local_irq_pmu_restore(flags) \
+ do { \
+ raw_local_irq_pmu_restore(flags); \
+ } while (0)
+#endif /* CONFIG_TRACE_IRQFLAGS */
+
+#endif /* CONFIG_PPC_BOOK3S */
+
+#define hard_irq_disable() do { \
+ unsigned long flags; \
+ __hard_irq_disable(); \
+ flags = irq_soft_mask_set_return(IRQS_ALL_DISABLED); \
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS; \
+ if (!arch_irqs_disabled_flags(flags)) { \
+ asm volatile("std%X0 %1,%0" : "=m" (local_paca->saved_r1) \
+ : "r" (current_stack_pointer)); \
+ trace_hardirqs_off(); \
+ } \
} while(0)
+static inline bool __lazy_irq_pending(u8 irq_happened)
+{
+ return !!(irq_happened & ~PACA_IRQ_HARD_DIS);
+}
+
+/*
+ * Check if a lazy IRQ is pending. Should be called with IRQs hard disabled.
+ */
static inline bool lazy_irq_pending(void)
{
- return !!(get_paca()->irq_happened & ~PACA_IRQ_HARD_DIS);
+ return __lazy_irq_pending(get_paca()->irq_happened);
}
/*
- * This is called by asynchronous interrupts to conditionally
- * re-enable hard interrupts when soft-disabled after having
- * cleared the source of the interrupt
+ * Check if a lazy IRQ is pending, with no debugging checks.
+ * Should be called with IRQs hard disabled.
+ * For use in RI disabled code or other constrained situations.
*/
-static inline void may_hard_irq_enable(void)
+static inline bool lazy_irq_pending_nocheck(void)
{
+ return __lazy_irq_pending(local_paca->irq_happened);
+}
+
+bool power_pmu_wants_prompt_pmi(void);
+
+/*
+ * This is called by asynchronous interrupts to check whether to
+ * conditionally re-enable hard interrupts after having cleared
+ * the source of the interrupt. They are kept disabled if there
+ * is a different soft-masked interrupt pending that requires hard
+ * masking.
+ */
+static inline bool should_hard_irq_enable(struct pt_regs *regs)
+{
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) {
+ WARN_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
+ WARN_ON(!(get_paca()->irq_happened & PACA_IRQ_HARD_DIS));
+ WARN_ON(mfmsr() & MSR_EE);
+ }
+
+ if (!IS_ENABLED(CONFIG_PERF_EVENTS))
+ return false;
+ /*
+ * If the PMU is not running, there is not much reason to enable
+ * MSR[EE] in irq handlers because any interrupts would just be
+ * soft-masked.
+ *
+ * TODO: Add test for 64e
+ */
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) {
+ if (!power_pmu_wants_prompt_pmi())
+ return false;
+ /*
+ * If PMIs are disabled then IRQs should be disabled as well,
+ * so we shouldn't see this condition, check for it just in
+ * case because we are about to enable PMIs.
+ */
+ if (WARN_ON_ONCE(regs->softe & IRQS_PMI_DISABLED))
+ return false;
+ }
+
+ if (get_paca()->irq_happened & PACA_IRQ_MUST_HARD_MASK)
+ return false;
+
+ return true;
+}
+
+/*
+ * Do the hard enabling, only call this if should_hard_irq_enable is true.
+ * This allows PMI interrupts to profile irq handlers.
+ */
+static inline void do_hard_irq_enable(void)
+{
+ /*
+ * Asynch interrupts come in with IRQS_ALL_DISABLED,
+ * PACA_IRQ_HARD_DIS, and MSR[EE]=0.
+ */
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_64))
+ irq_soft_mask_andc_return(IRQS_PMI_DISABLED);
get_paca()->irq_happened &= ~PACA_IRQ_HARD_DIS;
- if (!(get_paca()->irq_happened & PACA_IRQ_EE))
- __hard_irq_enable();
+ __hard_irq_enable();
}
static inline bool arch_irq_disabled_regs(struct pt_regs *regs)
{
- return !regs->softe;
+ return (regs->softe & IRQS_DISABLED);
}
extern bool prep_irq_for_idle(void);
+extern bool prep_irq_for_idle_irqsoff(void);
+extern void irq_set_pending_from_srr1(unsigned long srr1);
+
+#define fini_irq_for_idle_irqsoff() trace_hardirqs_off();
+extern void force_external_irq_replay(void);
+
+static inline void irq_soft_mask_regs_set_state(struct pt_regs *regs, unsigned long val)
+{
+ regs->softe = val;
+}
#else /* CONFIG_PPC64 */
-#define SET_MSR_EE(x) mtmsr(x)
+static inline notrace unsigned long irq_soft_mask_return(void)
+{
+ return 0;
+}
static inline unsigned long arch_local_save_flags(void)
{
@@ -141,41 +424,34 @@ static inline unsigned long arch_local_save_flags(void)
static inline void arch_local_irq_restore(unsigned long flags)
{
-#if defined(CONFIG_BOOKE)
- asm volatile("wrtee %0" : : "r" (flags) : "memory");
-#else
- mtmsr(flags);
-#endif
+ if (IS_ENABLED(CONFIG_BOOKE))
+ wrtee(flags);
+ else
+ mtmsr(flags);
}
static inline unsigned long arch_local_irq_save(void)
{
unsigned long flags = arch_local_save_flags();
-#ifdef CONFIG_BOOKE
- asm volatile("wrteei 0" : : : "memory");
-#else
- SET_MSR_EE(flags & ~MSR_EE);
-#endif
+
+ if (IS_ENABLED(CONFIG_BOOKE))
+ wrtee(0);
+ else if (IS_ENABLED(CONFIG_PPC_8xx))
+ wrtspr(SPRN_EID);
+ else
+ mtmsr(flags & ~MSR_EE);
+
return flags;
}
static inline void arch_local_irq_disable(void)
{
-#ifdef CONFIG_BOOKE
- asm volatile("wrteei 0" : : : "memory");
-#else
- arch_local_irq_save();
-#endif
+ __hard_irq_disable();
}
static inline void arch_local_irq_enable(void)
{
-#ifdef CONFIG_BOOKE
- asm volatile("wrteei 1" : : : "memory");
-#else
- unsigned long msr = mfmsr();
- SET_MSR_EE(msr | MSR_EE);
-#endif
+ __hard_irq_enable();
}
static inline bool arch_irqs_disabled_flags(unsigned long flags)
@@ -195,18 +471,51 @@ static inline bool arch_irq_disabled_regs(struct pt_regs *regs)
return !(regs->msr & MSR_EE);
}
-static inline void may_hard_irq_enable(void) { }
+static __always_inline bool should_hard_irq_enable(struct pt_regs *regs)
+{
+ return false;
+}
+
+static inline void do_hard_irq_enable(void)
+{
+ BUILD_BUG();
+}
+static inline void clear_pmi_irq_pending(void) { }
+static inline void set_pmi_irq_pending(void) { }
+static inline bool pmi_irq_pending(void) { return false; }
+
+static inline void irq_soft_mask_regs_set_state(struct pt_regs *regs, unsigned long val)
+{
+}
#endif /* CONFIG_PPC64 */
-#define ARCH_IRQ_INIT_FLAGS IRQ_NOREQUEST
+static inline unsigned long mtmsr_isync_irqsafe(unsigned long msr)
+{
+#ifdef CONFIG_PPC64
+ if (arch_irqs_disabled()) {
+ /*
+ * With soft-masking, MSR[EE] can change from 1 to 0
+ * asynchronously when irqs are disabled, and we don't want to
+ * set MSR[EE] back to 1 here if that has happened. A race-free
+ * way to do this is ensure EE is already 0. Another way it
+ * could be done is with a RESTART_TABLE handler, but that's
+ * probably overkill here.
+ */
+ msr &= ~MSR_EE;
+ mtmsr_isync(msr);
+ irq_soft_mask_set(IRQS_ALL_DISABLED);
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+ } else
+#endif
+ mtmsr_isync(msr);
-/*
- * interrupt-retrigger: should we handle this via lost interrupts and IPIs
- * or should we not care like we do now ? --BenH.
- */
-struct irq_chip;
+ return msr;
+}
+
+
+#define ARCH_IRQ_INIT_FLAGS IRQ_NOREQUEST
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_HW_IRQ_H */
diff --git a/arch/powerpc/include/asm/hydra.h b/arch/powerpc/include/asm/hydra.h
index 5b0c98bd46ab..d024447283a0 100644
--- a/arch/powerpc/include/asm/hydra.h
+++ b/arch/powerpc/include/asm/hydra.h
@@ -10,7 +10,7 @@
*
* © Copyright 1995 Apple Computer, Inc. All rights reserved.
*
- * It's available online from http://www.cpu.lu/~mlan/ftp/MacTech.pdf
+ * It's available online from https://www.cpu.lu/~mlan/ftp/MacTech.pdf
* You can obtain paper copies of this book from computer bookstores or by
* writing Morgan Kaufmann Publishers, Inc., 340 Pine Street, Sixth Floor, San
* Francisco, CA 94104. Reference ISBN 1-55860-393-X.
@@ -89,14 +89,11 @@ extern volatile struct Hydra __iomem *Hydra;
#define HYDRA_INT_EXT2 13 /* PCI IRQX */
#define HYDRA_INT_EXT3 14 /* PCI IRQY */
#define HYDRA_INT_EXT4 15 /* PCI IRQZ */
-#define HYDRA_INT_EXT5 16 /* IDE Primay/Secondary */
+#define HYDRA_INT_EXT5 16 /* IDE Primary/Secondary */
#define HYDRA_INT_EXT6 17 /* IDE Secondary */
#define HYDRA_INT_EXT7 18 /* Power Off Request */
#define HYDRA_INT_SPARE 19
-extern int hydra_init(void);
-extern void macio_adb_init(void);
-
#endif /* __KERNEL__ */
#endif /* _ASMPPC_HYDRA_H */
diff --git a/arch/powerpc/include/asm/i8259.h b/arch/powerpc/include/asm/i8259.h
index c3fdfbd5a673..75481d363cd8 100644
--- a/arch/powerpc/include/asm/i8259.h
+++ b/arch/powerpc/include/asm/i8259.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_I8259_H
#define _ASM_POWERPC_I8259_H
#ifdef __KERNEL__
@@ -6,7 +7,7 @@
extern void i8259_init(struct device_node *node, unsigned long intack_addr);
extern unsigned int i8259_irq(void);
-extern struct irq_domain *i8259_get_host(void);
+struct irq_domain *__init i8259_get_host(void);
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_I8259_H */
diff --git a/arch/powerpc/include/asm/ibmebus.h b/arch/powerpc/include/asm/ibmebus.h
index 088f95b2e14f..46fe406f461c 100644
--- a/arch/powerpc/include/asm/ibmebus.h
+++ b/arch/powerpc/include/asm/ibmebus.h
@@ -46,7 +46,9 @@
#include <linux/of_device.h>
#include <linux/of_platform.h>
-extern struct bus_type ibmebus_bus_type;
+struct platform_driver;
+
+extern const struct bus_type ibmebus_bus_type;
int ibmebus_register_driver(struct platform_driver *drv);
void ibmebus_unregister_driver(struct platform_driver *drv);
diff --git a/arch/powerpc/include/asm/icswx.h b/arch/powerpc/include/asm/icswx.h
new file mode 100644
index 000000000000..f6599ccb3012
--- /dev/null
+++ b/arch/powerpc/include/asm/icswx.h
@@ -0,0 +1,204 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * ICSWX api
+ *
+ * Copyright (C) 2015 IBM Corp.
+ *
+ * This provides the Initiate Coprocessor Store Word Indexed (ICSWX)
+ * instruction. This instruction is used to communicate with PowerPC
+ * coprocessors. This also provides definitions of the structures used
+ * to communicate with the coprocessor.
+ *
+ * The RFC02130: Coprocessor Architecture document is the reference for
+ * everything in this file unless otherwise noted.
+ */
+#ifndef _ARCH_POWERPC_INCLUDE_ASM_ICSWX_H_
+#define _ARCH_POWERPC_INCLUDE_ASM_ICSWX_H_
+
+#include <asm/ppc-opcode.h> /* for PPC_ICSWX */
+
+/* Chapter 6.5.8 Coprocessor-Completion Block (CCB) */
+
+#define CCB_VALUE (0x3fffffffffffffff)
+#define CCB_ADDRESS (0xfffffffffffffff8)
+#define CCB_CM (0x0000000000000007)
+#define CCB_CM0 (0x0000000000000004)
+#define CCB_CM12 (0x0000000000000003)
+
+#define CCB_CM0_ALL_COMPLETIONS (0x0)
+#define CCB_CM0_LAST_IN_CHAIN (0x4)
+#define CCB_CM12_STORE (0x0)
+#define CCB_CM12_INTERRUPT (0x1)
+
+#define CCB_SIZE (0x10)
+#define CCB_ALIGN CCB_SIZE
+
+struct coprocessor_completion_block {
+ __be64 value;
+ __be64 address;
+} __packed __aligned(CCB_ALIGN);
+
+
+/* Chapter 6.5.7 Coprocessor-Status Block (CSB) */
+
+#define CSB_V (0x80)
+#define CSB_F (0x04)
+#define CSB_CH (0x03)
+#define CSB_CE_INCOMPLETE (0x80)
+#define CSB_CE_TERMINATION (0x40)
+#define CSB_CE_TPBC (0x20)
+
+#define CSB_CC_SUCCESS (0)
+#define CSB_CC_INVALID_ALIGN (1)
+#define CSB_CC_OPERAND_OVERLAP (2)
+#define CSB_CC_DATA_LENGTH (3)
+#define CSB_CC_TRANSLATION (5)
+#define CSB_CC_PROTECTION (6)
+#define CSB_CC_RD_EXTERNAL (7)
+#define CSB_CC_INVALID_OPERAND (8)
+#define CSB_CC_PRIVILEGE (9)
+#define CSB_CC_INTERNAL (10)
+#define CSB_CC_WR_EXTERNAL (12)
+#define CSB_CC_NOSPC (13)
+#define CSB_CC_EXCESSIVE_DDE (14)
+#define CSB_CC_WR_TRANSLATION (15)
+#define CSB_CC_WR_PROTECTION (16)
+#define CSB_CC_UNKNOWN_CODE (17)
+#define CSB_CC_ABORT (18)
+#define CSB_CC_EXCEED_BYTE_COUNT (19) /* P9 or later */
+#define CSB_CC_TRANSPORT (20)
+#define CSB_CC_INVALID_CRB (21) /* P9 or later */
+#define CSB_CC_INVALID_DDE (30) /* P9 or later */
+#define CSB_CC_SEGMENTED_DDL (31)
+#define CSB_CC_PROGRESS_POINT (32)
+#define CSB_CC_DDE_OVERFLOW (33)
+#define CSB_CC_SESSION (34)
+#define CSB_CC_PROVISION (36)
+#define CSB_CC_CHAIN (37)
+#define CSB_CC_SEQUENCE (38)
+#define CSB_CC_HW (39)
+/* P9 DD2 NX Workbook 3.2 (Table 4-36): Address translation fault */
+#define CSB_CC_FAULT_ADDRESS (250)
+
+#define CSB_SIZE (0x10)
+#define CSB_ALIGN CSB_SIZE
+
+struct coprocessor_status_block {
+ u8 flags;
+ u8 cs;
+ u8 cc;
+ u8 ce;
+ __be32 count;
+ __be64 address;
+} __packed __aligned(CSB_ALIGN);
+
+
+/* Chapter 6.5.10 Data-Descriptor List (DDL)
+ * each list contains one or more Data-Descriptor Entries (DDE)
+ */
+
+#define DDE_P (0x8000)
+
+#define DDE_SIZE (0x10)
+#define DDE_ALIGN DDE_SIZE
+
+struct data_descriptor_entry {
+ __be16 flags;
+ u8 count;
+ u8 index;
+ __be32 length;
+ __be64 address;
+} __packed __aligned(DDE_ALIGN);
+
+/* 4.3.2 NX-stamped Fault CRB */
+
+#define NX_STAMP_ALIGN (0x10)
+
+struct nx_fault_stamp {
+ __be64 fault_storage_addr;
+ __be16 reserved;
+ __u8 flags;
+ __u8 fault_status;
+ __be32 pswid;
+} __packed __aligned(NX_STAMP_ALIGN);
+
+/* Chapter 6.5.2 Coprocessor-Request Block (CRB) */
+
+#define CRB_SIZE (0x80)
+#define CRB_ALIGN (0x100) /* Errata: requires 256 alignment */
+
+/* Coprocessor Status Block field
+ * ADDRESS address of CSB
+ * C CCB is valid
+ * AT 0 = addrs are virtual, 1 = addrs are phys
+ * M enable perf monitor
+ */
+#define CRB_CSB_ADDRESS (0xfffffffffffffff0)
+#define CRB_CSB_C (0x0000000000000008)
+#define CRB_CSB_AT (0x0000000000000002)
+#define CRB_CSB_M (0x0000000000000001)
+
+struct coprocessor_request_block {
+ __be32 ccw;
+ __be32 flags;
+ __be64 csb_addr;
+
+ struct data_descriptor_entry source;
+ struct data_descriptor_entry target;
+
+ struct coprocessor_completion_block ccb;
+
+ union {
+ struct nx_fault_stamp nx;
+ u8 reserved[16];
+ } stamp;
+
+ u8 reserved[32];
+
+ struct coprocessor_status_block csb;
+} __aligned(128);
+
+/* RFC02167 Initiate Coprocessor Instructions document
+ * Chapter 8.2.1.1.1 RS
+ * Chapter 8.2.3 Coprocessor Directive
+ * Chapter 8.2.4 Execution
+ *
+ * The CCW must be converted to BE before passing to icswx()
+ */
+
+#define CCW_PS (0xff000000)
+#define CCW_CT (0x00ff0000)
+#define CCW_CD (0x0000ffff)
+#define CCW_CL (0x0000c000)
+
+
+/* RFC02167 Initiate Coprocessor Instructions document
+ * Chapter 8.2.1 Initiate Coprocessor Store Word Indexed (ICSWX)
+ * Chapter 8.2.4.1 Condition Register 0
+ */
+
+#define ICSWX_INITIATED (0x8)
+#define ICSWX_BUSY (0x4)
+#define ICSWX_REJECTED (0x2)
+#define ICSWX_XERS0 (0x1) /* undefined or set from XERSO. */
+
+static inline int icswx(__be32 ccw, struct coprocessor_request_block *crb)
+{
+ __be64 ccw_reg = ccw;
+ u32 cr;
+
+ /* NB: the same structures are used by VAS-NX */
+ BUILD_BUG_ON(sizeof(*crb) != 128);
+
+ __asm__ __volatile__(
+ PPC_ICSWX(%1,0,%2) "\n"
+ "mfcr %0\n"
+ : "=r" (cr)
+ : "r" (ccw_reg), "r" (crb)
+ : "cr0", "memory");
+
+ return (int)((cr >> 28) & 0xf);
+}
+
+
+#endif /* _ARCH_POWERPC_INCLUDE_ASM_ICSWX_H_ */
diff --git a/arch/powerpc/include/asm/ide.h b/arch/powerpc/include/asm/ide.h
deleted file mode 100644
index da01b20aea59..000000000000
--- a/arch/powerpc/include/asm/ide.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * Copyright (C) 1994-1996 Linus Torvalds & authors
- *
- * This file contains the powerpc architecture specific IDE code.
- */
-#ifndef _ASM_POWERPC_IDE_H
-#define _ASM_POWERPC_IDE_H
-
-#include <linux/compiler.h>
-#include <asm/io.h>
-
-#define __ide_mm_insw(p, a, c) readsw((void __iomem *)(p), (a), (c))
-#define __ide_mm_insl(p, a, c) readsl((void __iomem *)(p), (a), (c))
-#define __ide_mm_outsw(p, a, c) writesw((void __iomem *)(p), (a), (c))
-#define __ide_mm_outsl(p, a, c) writesl((void __iomem *)(p), (a), (c))
-
-#endif /* _ASM_POWERPC_IDE_H */
diff --git a/arch/powerpc/include/asm/idle.h b/arch/powerpc/include/asm/idle.h
new file mode 100644
index 000000000000..00f360667391
--- /dev/null
+++ b/arch/powerpc/include/asm/idle.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_IDLE_H
+#define _ASM_POWERPC_IDLE_H
+#include <asm/runlatch.h>
+#include <asm/paca.h>
+
+#ifdef CONFIG_PPC_PSERIES
+DECLARE_PER_CPU(u64, idle_spurr_cycles);
+DECLARE_PER_CPU(u64, idle_entry_purr_snap);
+DECLARE_PER_CPU(u64, idle_entry_spurr_snap);
+
+static __always_inline void snapshot_purr_idle_entry(void)
+{
+ *this_cpu_ptr(&idle_entry_purr_snap) = mfspr(SPRN_PURR);
+}
+
+static __always_inline void snapshot_spurr_idle_entry(void)
+{
+ *this_cpu_ptr(&idle_entry_spurr_snap) = mfspr(SPRN_SPURR);
+}
+
+static __always_inline void update_idle_purr_accounting(void)
+{
+ u64 wait_cycles;
+ u64 in_purr = *this_cpu_ptr(&idle_entry_purr_snap);
+
+ wait_cycles = be64_to_cpu(get_lppaca()->wait_state_cycles);
+ wait_cycles += mfspr(SPRN_PURR) - in_purr;
+ get_lppaca()->wait_state_cycles = cpu_to_be64(wait_cycles);
+}
+
+static __always_inline void update_idle_spurr_accounting(void)
+{
+ u64 *idle_spurr_cycles_ptr = this_cpu_ptr(&idle_spurr_cycles);
+ u64 in_spurr = *this_cpu_ptr(&idle_entry_spurr_snap);
+
+ *idle_spurr_cycles_ptr += mfspr(SPRN_SPURR) - in_spurr;
+}
+
+static __always_inline void pseries_idle_prolog(void)
+{
+ ppc64_runlatch_off();
+ snapshot_purr_idle_entry();
+ snapshot_spurr_idle_entry();
+ /*
+ * Indicate to the HV that we are idle. Now would be
+ * a good time to find other work to dispatch.
+ */
+ get_lppaca()->idle = 1;
+}
+
+static __always_inline void pseries_idle_epilog(void)
+{
+ update_idle_purr_accounting();
+ update_idle_spurr_accounting();
+ get_lppaca()->idle = 0;
+ ppc64_runlatch_on();
+}
+
+static inline u64 read_this_idle_purr(void)
+{
+ /*
+ * If we are reading from an idle context, update the
+ * idle-purr cycles corresponding to the last idle period.
+ * Since the idle context is not yet over, take a fresh
+ * snapshot of the idle-purr.
+ */
+ if (unlikely(get_lppaca()->idle == 1)) {
+ update_idle_purr_accounting();
+ snapshot_purr_idle_entry();
+ }
+
+ return be64_to_cpu(get_lppaca()->wait_state_cycles);
+}
+
+static inline u64 read_this_idle_spurr(void)
+{
+ /*
+ * If we are reading from an idle context, update the
+ * idle-spurr cycles corresponding to the last idle period.
+ * Since the idle context is not yet over, take a fresh
+ * snapshot of the idle-spurr.
+ */
+ if (get_lppaca()->idle == 1) {
+ update_idle_spurr_accounting();
+ snapshot_spurr_idle_entry();
+ }
+
+ return *this_cpu_ptr(&idle_spurr_cycles);
+}
+
+#endif /* CONFIG_PPC_PSERIES */
+#endif
diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h
new file mode 100644
index 000000000000..a656635df386
--- /dev/null
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -0,0 +1,172 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef __ASM_POWERPC_IMC_PMU_H
+#define __ASM_POWERPC_IMC_PMU_H
+
+/*
+ * IMC Nest Performance Monitor counter support.
+ *
+ * Copyright (C) 2017 Madhavan Srinivasan, IBM Corporation.
+ * (C) 2017 Anju T Sudhakar, IBM Corporation.
+ * (C) 2017 Hemant K Shaw, IBM Corporation.
+ */
+
+#include <linux/perf_event.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/io.h>
+#include <asm/opal.h>
+
+/*
+ * Compatibility macros for IMC devices
+ */
+#define IMC_DTB_COMPAT "ibm,opal-in-memory-counters"
+#define IMC_DTB_UNIT_COMPAT "ibm,imc-counters"
+
+
+/*
+ * LDBAR: Counter address and Enable/Disable macro.
+ * perf/imc-pmu.c has the LDBAR layout information.
+ */
+#define THREAD_IMC_LDBAR_MASK 0x0003ffffffffe000ULL
+#define THREAD_IMC_ENABLE 0x8000000000000000ULL
+#define TRACE_IMC_ENABLE 0x4000000000000000ULL
+
+/*
+ * For debugfs interface for imc-mode and imc-command
+ */
+#define IMC_CNTL_BLK_OFFSET 0x3FC00
+#define IMC_CNTL_BLK_CMD_OFFSET 8
+#define IMC_CNTL_BLK_MODE_OFFSET 32
+
+/*
+ * Structure to hold memory address information for imc units.
+ */
+struct imc_mem_info {
+ u64 *vbase;
+ u32 id;
+};
+
+/*
+ * Place holder for nest pmu events and values.
+ */
+struct imc_events {
+ u32 value;
+ char *name;
+ char *unit;
+ char *scale;
+};
+
+/*
+ * Trace IMC hardware updates a 64bytes record on
+ * Core Performance Monitoring Counter (CPMC)
+ * overflow. Here is the layout for the trace imc record
+ *
+ * DW 0 : Timebase
+ * DW 1 : Program Counter
+ * DW 2 : PIDR information
+ * DW 3 : CPMC1
+ * DW 4 : CPMC2
+ * DW 5 : CPMC3
+ * Dw 6 : CPMC4
+ * DW 7 : Timebase
+ * .....
+ *
+ * The following is the data structure to hold trace imc data.
+ */
+struct trace_imc_data {
+ __be64 tb1;
+ __be64 ip;
+ __be64 val;
+ __be64 cpmc1;
+ __be64 cpmc2;
+ __be64 cpmc3;
+ __be64 cpmc4;
+ __be64 tb2;
+};
+
+/* Event attribute array index */
+#define IMC_FORMAT_ATTR 0
+#define IMC_EVENT_ATTR 1
+#define IMC_CPUMASK_ATTR 2
+#define IMC_NULL_ATTR 3
+
+/* PMU Format attribute macros */
+#define IMC_EVENT_OFFSET_MASK 0xffffffffULL
+
+/*
+ * Macro to mask bits 0:21 of first double word(which is the timebase) to
+ * compare with 8th double word (timebase) of trace imc record data.
+ */
+#define IMC_TRACE_RECORD_TB1_MASK 0x3ffffffffffULL
+
+/*
+ * Bit 0:1 in third DW of IMC trace record
+ * specifies the MSR[HV PR] values.
+ */
+#define IMC_TRACE_RECORD_VAL_HVPR(x) ((x) >> 62)
+
+/*
+ * Device tree parser code detects IMC pmu support and
+ * registers new IMC pmus. This structure will hold the
+ * pmu functions, events, counter memory information
+ * and attrs for each imc pmu and will be referenced at
+ * the time of pmu registration.
+ */
+struct imc_pmu {
+ struct pmu pmu;
+ struct imc_mem_info *mem_info;
+ struct imc_events *events;
+ /*
+ * Attribute groups for the PMU. Slot 0 used for
+ * format attribute, slot 1 used for cpusmask attribute,
+ * slot 2 used for event attribute. Slot 3 keep as
+ * NULL.
+ */
+ const struct attribute_group *attr_groups[4];
+ u32 counter_mem_size;
+ int domain;
+ /*
+ * flag to notify whether the memory is mmaped
+ * or allocated by kernel.
+ */
+ bool imc_counter_mmaped;
+};
+
+/*
+ * Structure to hold id, lock and reference count for the imc events which
+ * are inited.
+ */
+struct imc_pmu_ref {
+ spinlock_t lock;
+ unsigned int id;
+ int refc;
+};
+
+/*
+ * In-Memory Collection Counters type.
+ * Data comes from Device tree.
+ * Three device type are supported.
+ */
+
+enum {
+ IMC_TYPE_THREAD = 0x1,
+ IMC_TYPE_TRACE = 0x2,
+ IMC_TYPE_CORE = 0x4,
+ IMC_TYPE_CHIP = 0x10,
+};
+
+/*
+ * Domains for IMC PMUs
+ */
+#define IMC_DOMAIN_NEST 1
+#define IMC_DOMAIN_CORE 2
+#define IMC_DOMAIN_THREAD 3
+/* For trace-imc the domain is still thread but it operates in trace-mode */
+#define IMC_DOMAIN_TRACE 4
+
+extern int init_imc_pmu(struct device_node *parent,
+ struct imc_pmu *pmu_ptr, int pmu_id);
+extern void thread_imc_disable(void);
+extern int get_max_nest_dev(void);
+extern void unregister_thread_imc(void);
+#endif /* __ASM_POWERPC_IMC_PMU_H */
diff --git a/arch/powerpc/include/asm/immap_cpm2.h b/arch/powerpc/include/asm/immap_cpm2.h
index 7c64fda5357b..845d5b3fb212 100644
--- a/arch/powerpc/include/asm/immap_cpm2.h
+++ b/arch/powerpc/include/asm/immap_cpm2.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* CPM2 Internal Memory Map
* Copyright (c) 1999 Dan Malek (dmalek@jlc.net)
diff --git a/arch/powerpc/include/asm/immap_qe.h b/arch/powerpc/include/asm/immap_qe.h
deleted file mode 100644
index bedbff891423..000000000000
--- a/arch/powerpc/include/asm/immap_qe.h
+++ /dev/null
@@ -1,491 +0,0 @@
-/*
- * QUICC Engine (QE) Internal Memory Map.
- * The Internal Memory Map for devices with QE on them. This
- * is the superset of all QE devices (8360, etc.).
-
- * Copyright (C) 2006. Freescale Semiconductor, Inc. All rights reserved.
- *
- * Authors: Shlomi Gridish <gridish@freescale.com>
- * Li Yang <leoli@freescale.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-#ifndef _ASM_POWERPC_IMMAP_QE_H
-#define _ASM_POWERPC_IMMAP_QE_H
-#ifdef __KERNEL__
-
-#include <linux/kernel.h>
-#include <asm/io.h>
-
-#define QE_IMMAP_SIZE (1024 * 1024) /* 1MB from 1MB+IMMR */
-
-/* QE I-RAM */
-struct qe_iram {
- __be32 iadd; /* I-RAM Address Register */
- __be32 idata; /* I-RAM Data Register */
- u8 res0[0x04];
- __be32 iready; /* I-RAM Ready Register */
- u8 res1[0x70];
-} __attribute__ ((packed));
-
-/* QE Interrupt Controller */
-struct qe_ic_regs {
- __be32 qicr;
- __be32 qivec;
- __be32 qripnr;
- __be32 qipnr;
- __be32 qipxcc;
- __be32 qipycc;
- __be32 qipwcc;
- __be32 qipzcc;
- __be32 qimr;
- __be32 qrimr;
- __be32 qicnr;
- u8 res0[0x4];
- __be32 qiprta;
- __be32 qiprtb;
- u8 res1[0x4];
- __be32 qricr;
- u8 res2[0x20];
- __be32 qhivec;
- u8 res3[0x1C];
-} __attribute__ ((packed));
-
-/* Communications Processor */
-struct cp_qe {
- __be32 cecr; /* QE command register */
- __be32 ceccr; /* QE controller configuration register */
- __be32 cecdr; /* QE command data register */
- u8 res0[0xA];
- __be16 ceter; /* QE timer event register */
- u8 res1[0x2];
- __be16 cetmr; /* QE timers mask register */
- __be32 cetscr; /* QE time-stamp timer control register */
- __be32 cetsr1; /* QE time-stamp register 1 */
- __be32 cetsr2; /* QE time-stamp register 2 */
- u8 res2[0x8];
- __be32 cevter; /* QE virtual tasks event register */
- __be32 cevtmr; /* QE virtual tasks mask register */
- __be16 cercr; /* QE RAM control register */
- u8 res3[0x2];
- u8 res4[0x24];
- __be16 ceexe1; /* QE external request 1 event register */
- u8 res5[0x2];
- __be16 ceexm1; /* QE external request 1 mask register */
- u8 res6[0x2];
- __be16 ceexe2; /* QE external request 2 event register */
- u8 res7[0x2];
- __be16 ceexm2; /* QE external request 2 mask register */
- u8 res8[0x2];
- __be16 ceexe3; /* QE external request 3 event register */
- u8 res9[0x2];
- __be16 ceexm3; /* QE external request 3 mask register */
- u8 res10[0x2];
- __be16 ceexe4; /* QE external request 4 event register */
- u8 res11[0x2];
- __be16 ceexm4; /* QE external request 4 mask register */
- u8 res12[0x3A];
- __be32 ceurnr; /* QE microcode revision number register */
- u8 res13[0x244];
-} __attribute__ ((packed));
-
-/* QE Multiplexer */
-struct qe_mux {
- __be32 cmxgcr; /* CMX general clock route register */
- __be32 cmxsi1cr_l; /* CMX SI1 clock route low register */
- __be32 cmxsi1cr_h; /* CMX SI1 clock route high register */
- __be32 cmxsi1syr; /* CMX SI1 SYNC route register */
- __be32 cmxucr[4]; /* CMX UCCx clock route registers */
- __be32 cmxupcr; /* CMX UPC clock route register */
- u8 res0[0x1C];
-} __attribute__ ((packed));
-
-/* QE Timers */
-struct qe_timers {
- u8 gtcfr1; /* Timer 1 and Timer 2 global config register*/
- u8 res0[0x3];
- u8 gtcfr2; /* Timer 3 and timer 4 global config register*/
- u8 res1[0xB];
- __be16 gtmdr1; /* Timer 1 mode register */
- __be16 gtmdr2; /* Timer 2 mode register */
- __be16 gtrfr1; /* Timer 1 reference register */
- __be16 gtrfr2; /* Timer 2 reference register */
- __be16 gtcpr1; /* Timer 1 capture register */
- __be16 gtcpr2; /* Timer 2 capture register */
- __be16 gtcnr1; /* Timer 1 counter */
- __be16 gtcnr2; /* Timer 2 counter */
- __be16 gtmdr3; /* Timer 3 mode register */
- __be16 gtmdr4; /* Timer 4 mode register */
- __be16 gtrfr3; /* Timer 3 reference register */
- __be16 gtrfr4; /* Timer 4 reference register */
- __be16 gtcpr3; /* Timer 3 capture register */
- __be16 gtcpr4; /* Timer 4 capture register */
- __be16 gtcnr3; /* Timer 3 counter */
- __be16 gtcnr4; /* Timer 4 counter */
- __be16 gtevr1; /* Timer 1 event register */
- __be16 gtevr2; /* Timer 2 event register */
- __be16 gtevr3; /* Timer 3 event register */
- __be16 gtevr4; /* Timer 4 event register */
- __be16 gtps; /* Timer 1 prescale register */
- u8 res2[0x46];
-} __attribute__ ((packed));
-
-/* BRG */
-struct qe_brg {
- __be32 brgc[16]; /* BRG configuration registers */
- u8 res0[0x40];
-} __attribute__ ((packed));
-
-/* SPI */
-struct spi {
- u8 res0[0x20];
- __be32 spmode; /* SPI mode register */
- u8 res1[0x2];
- u8 spie; /* SPI event register */
- u8 res2[0x1];
- u8 res3[0x2];
- u8 spim; /* SPI mask register */
- u8 res4[0x1];
- u8 res5[0x1];
- u8 spcom; /* SPI command register */
- u8 res6[0x2];
- __be32 spitd; /* SPI transmit data register (cpu mode) */
- __be32 spird; /* SPI receive data register (cpu mode) */
- u8 res7[0x8];
-} __attribute__ ((packed));
-
-/* SI */
-struct si1 {
- __be16 siamr1; /* SI1 TDMA mode register */
- __be16 sibmr1; /* SI1 TDMB mode register */
- __be16 sicmr1; /* SI1 TDMC mode register */
- __be16 sidmr1; /* SI1 TDMD mode register */
- u8 siglmr1_h; /* SI1 global mode register high */
- u8 res0[0x1];
- u8 sicmdr1_h; /* SI1 command register high */
- u8 res2[0x1];
- u8 sistr1_h; /* SI1 status register high */
- u8 res3[0x1];
- __be16 sirsr1_h; /* SI1 RAM shadow address register high */
- u8 sitarc1; /* SI1 RAM counter Tx TDMA */
- u8 sitbrc1; /* SI1 RAM counter Tx TDMB */
- u8 sitcrc1; /* SI1 RAM counter Tx TDMC */
- u8 sitdrc1; /* SI1 RAM counter Tx TDMD */
- u8 sirarc1; /* SI1 RAM counter Rx TDMA */
- u8 sirbrc1; /* SI1 RAM counter Rx TDMB */
- u8 sircrc1; /* SI1 RAM counter Rx TDMC */
- u8 sirdrc1; /* SI1 RAM counter Rx TDMD */
- u8 res4[0x8];
- __be16 siemr1; /* SI1 TDME mode register 16 bits */
- __be16 sifmr1; /* SI1 TDMF mode register 16 bits */
- __be16 sigmr1; /* SI1 TDMG mode register 16 bits */
- __be16 sihmr1; /* SI1 TDMH mode register 16 bits */
- u8 siglmg1_l; /* SI1 global mode register low 8 bits */
- u8 res5[0x1];
- u8 sicmdr1_l; /* SI1 command register low 8 bits */
- u8 res6[0x1];
- u8 sistr1_l; /* SI1 status register low 8 bits */
- u8 res7[0x1];
- __be16 sirsr1_l; /* SI1 RAM shadow address register low 16 bits*/
- u8 siterc1; /* SI1 RAM counter Tx TDME 8 bits */
- u8 sitfrc1; /* SI1 RAM counter Tx TDMF 8 bits */
- u8 sitgrc1; /* SI1 RAM counter Tx TDMG 8 bits */
- u8 sithrc1; /* SI1 RAM counter Tx TDMH 8 bits */
- u8 sirerc1; /* SI1 RAM counter Rx TDME 8 bits */
- u8 sirfrc1; /* SI1 RAM counter Rx TDMF 8 bits */
- u8 sirgrc1; /* SI1 RAM counter Rx TDMG 8 bits */
- u8 sirhrc1; /* SI1 RAM counter Rx TDMH 8 bits */
- u8 res8[0x8];
- __be32 siml1; /* SI1 multiframe limit register */
- u8 siedm1; /* SI1 extended diagnostic mode register */
- u8 res9[0xBB];
-} __attribute__ ((packed));
-
-/* SI Routing Tables */
-struct sir {
- u8 tx[0x400];
- u8 rx[0x400];
- u8 res0[0x800];
-} __attribute__ ((packed));
-
-/* USB Controller */
-struct qe_usb_ctlr {
- u8 usb_usmod;
- u8 usb_usadr;
- u8 usb_uscom;
- u8 res1[1];
- __be16 usb_usep[4];
- u8 res2[4];
- __be16 usb_usber;
- u8 res3[2];
- __be16 usb_usbmr;
- u8 res4[1];
- u8 usb_usbs;
- __be16 usb_ussft;
- u8 res5[2];
- __be16 usb_usfrn;
- u8 res6[0x22];
-} __attribute__ ((packed));
-
-/* MCC */
-struct qe_mcc {
- __be32 mcce; /* MCC event register */
- __be32 mccm; /* MCC mask register */
- __be32 mccf; /* MCC configuration register */
- __be32 merl; /* MCC emergency request level register */
- u8 res0[0xF0];
-} __attribute__ ((packed));
-
-/* QE UCC Slow */
-struct ucc_slow {
- __be32 gumr_l; /* UCCx general mode register (low) */
- __be32 gumr_h; /* UCCx general mode register (high) */
- __be16 upsmr; /* UCCx protocol-specific mode register */
- u8 res0[0x2];
- __be16 utodr; /* UCCx transmit on demand register */
- __be16 udsr; /* UCCx data synchronization register */
- __be16 ucce; /* UCCx event register */
- u8 res1[0x2];
- __be16 uccm; /* UCCx mask register */
- u8 res2[0x1];
- u8 uccs; /* UCCx status register */
- u8 res3[0x24];
- __be16 utpt;
- u8 res4[0x52];
- u8 guemr; /* UCC general extended mode register */
-} __attribute__ ((packed));
-
-/* QE UCC Fast */
-struct ucc_fast {
- __be32 gumr; /* UCCx general mode register */
- __be32 upsmr; /* UCCx protocol-specific mode register */
- __be16 utodr; /* UCCx transmit on demand register */
- u8 res0[0x2];
- __be16 udsr; /* UCCx data synchronization register */
- u8 res1[0x2];
- __be32 ucce; /* UCCx event register */
- __be32 uccm; /* UCCx mask register */
- u8 uccs; /* UCCx status register */
- u8 res2[0x7];
- __be32 urfb; /* UCC receive FIFO base */
- __be16 urfs; /* UCC receive FIFO size */
- u8 res3[0x2];
- __be16 urfet; /* UCC receive FIFO emergency threshold */
- __be16 urfset; /* UCC receive FIFO special emergency
- threshold */
- __be32 utfb; /* UCC transmit FIFO base */
- __be16 utfs; /* UCC transmit FIFO size */
- u8 res4[0x2];
- __be16 utfet; /* UCC transmit FIFO emergency threshold */
- u8 res5[0x2];
- __be16 utftt; /* UCC transmit FIFO transmit threshold */
- u8 res6[0x2];
- __be16 utpt; /* UCC transmit polling timer */
- u8 res7[0x2];
- __be32 urtry; /* UCC retry counter register */
- u8 res8[0x4C];
- u8 guemr; /* UCC general extended mode register */
-} __attribute__ ((packed));
-
-struct ucc {
- union {
- struct ucc_slow slow;
- struct ucc_fast fast;
- u8 res[0x200]; /* UCC blocks are 512 bytes each */
- };
-} __attribute__ ((packed));
-
-/* MultiPHY UTOPIA POS Controllers (UPC) */
-struct upc {
- __be32 upgcr; /* UTOPIA/POS general configuration register */
- __be32 uplpa; /* UTOPIA/POS last PHY address */
- __be32 uphec; /* ATM HEC register */
- __be32 upuc; /* UTOPIA/POS UCC configuration */
- __be32 updc1; /* UTOPIA/POS device 1 configuration */
- __be32 updc2; /* UTOPIA/POS device 2 configuration */
- __be32 updc3; /* UTOPIA/POS device 3 configuration */
- __be32 updc4; /* UTOPIA/POS device 4 configuration */
- __be32 upstpa; /* UTOPIA/POS STPA threshold */
- u8 res0[0xC];
- __be32 updrs1_h; /* UTOPIA/POS device 1 rate select */
- __be32 updrs1_l; /* UTOPIA/POS device 1 rate select */
- __be32 updrs2_h; /* UTOPIA/POS device 2 rate select */
- __be32 updrs2_l; /* UTOPIA/POS device 2 rate select */
- __be32 updrs3_h; /* UTOPIA/POS device 3 rate select */
- __be32 updrs3_l; /* UTOPIA/POS device 3 rate select */
- __be32 updrs4_h; /* UTOPIA/POS device 4 rate select */
- __be32 updrs4_l; /* UTOPIA/POS device 4 rate select */
- __be32 updrp1; /* UTOPIA/POS device 1 receive priority low */
- __be32 updrp2; /* UTOPIA/POS device 2 receive priority low */
- __be32 updrp3; /* UTOPIA/POS device 3 receive priority low */
- __be32 updrp4; /* UTOPIA/POS device 4 receive priority low */
- __be32 upde1; /* UTOPIA/POS device 1 event */
- __be32 upde2; /* UTOPIA/POS device 2 event */
- __be32 upde3; /* UTOPIA/POS device 3 event */
- __be32 upde4; /* UTOPIA/POS device 4 event */
- __be16 uprp1;
- __be16 uprp2;
- __be16 uprp3;
- __be16 uprp4;
- u8 res1[0x8];
- __be16 uptirr1_0; /* Device 1 transmit internal rate 0 */
- __be16 uptirr1_1; /* Device 1 transmit internal rate 1 */
- __be16 uptirr1_2; /* Device 1 transmit internal rate 2 */
- __be16 uptirr1_3; /* Device 1 transmit internal rate 3 */
- __be16 uptirr2_0; /* Device 2 transmit internal rate 0 */
- __be16 uptirr2_1; /* Device 2 transmit internal rate 1 */
- __be16 uptirr2_2; /* Device 2 transmit internal rate 2 */
- __be16 uptirr2_3; /* Device 2 transmit internal rate 3 */
- __be16 uptirr3_0; /* Device 3 transmit internal rate 0 */
- __be16 uptirr3_1; /* Device 3 transmit internal rate 1 */
- __be16 uptirr3_2; /* Device 3 transmit internal rate 2 */
- __be16 uptirr3_3; /* Device 3 transmit internal rate 3 */
- __be16 uptirr4_0; /* Device 4 transmit internal rate 0 */
- __be16 uptirr4_1; /* Device 4 transmit internal rate 1 */
- __be16 uptirr4_2; /* Device 4 transmit internal rate 2 */
- __be16 uptirr4_3; /* Device 4 transmit internal rate 3 */
- __be32 uper1; /* Device 1 port enable register */
- __be32 uper2; /* Device 2 port enable register */
- __be32 uper3; /* Device 3 port enable register */
- __be32 uper4; /* Device 4 port enable register */
- u8 res2[0x150];
-} __attribute__ ((packed));
-
-/* SDMA */
-struct sdma {
- __be32 sdsr; /* Serial DMA status register */
- __be32 sdmr; /* Serial DMA mode register */
- __be32 sdtr1; /* SDMA system bus threshold register */
- __be32 sdtr2; /* SDMA secondary bus threshold register */
- __be32 sdhy1; /* SDMA system bus hysteresis register */
- __be32 sdhy2; /* SDMA secondary bus hysteresis register */
- __be32 sdta1; /* SDMA system bus address register */
- __be32 sdta2; /* SDMA secondary bus address register */
- __be32 sdtm1; /* SDMA system bus MSNUM register */
- __be32 sdtm2; /* SDMA secondary bus MSNUM register */
- u8 res0[0x10];
- __be32 sdaqr; /* SDMA address bus qualify register */
- __be32 sdaqmr; /* SDMA address bus qualify mask register */
- u8 res1[0x4];
- __be32 sdebcr; /* SDMA CAM entries base register */
- u8 res2[0x38];
-} __attribute__ ((packed));
-
-/* Debug Space */
-struct dbg {
- __be32 bpdcr; /* Breakpoint debug command register */
- __be32 bpdsr; /* Breakpoint debug status register */
- __be32 bpdmr; /* Breakpoint debug mask register */
- __be32 bprmrr0; /* Breakpoint request mode risc register 0 */
- __be32 bprmrr1; /* Breakpoint request mode risc register 1 */
- u8 res0[0x8];
- __be32 bprmtr0; /* Breakpoint request mode trb register 0 */
- __be32 bprmtr1; /* Breakpoint request mode trb register 1 */
- u8 res1[0x8];
- __be32 bprmir; /* Breakpoint request mode immediate register */
- __be32 bprmsr; /* Breakpoint request mode serial register */
- __be32 bpemr; /* Breakpoint exit mode register */
- u8 res2[0x48];
-} __attribute__ ((packed));
-
-/*
- * RISC Special Registers (Trap and Breakpoint). These are described in
- * the QE Developer's Handbook.
- */
-struct rsp {
- __be32 tibcr[16]; /* Trap/instruction breakpoint control regs */
- u8 res0[64];
- __be32 ibcr0;
- __be32 ibs0;
- __be32 ibcnr0;
- u8 res1[4];
- __be32 ibcr1;
- __be32 ibs1;
- __be32 ibcnr1;
- __be32 npcr;
- __be32 dbcr;
- __be32 dbar;
- __be32 dbamr;
- __be32 dbsr;
- __be32 dbcnr;
- u8 res2[12];
- __be32 dbdr_h;
- __be32 dbdr_l;
- __be32 dbdmr_h;
- __be32 dbdmr_l;
- __be32 bsr;
- __be32 bor;
- __be32 bior;
- u8 res3[4];
- __be32 iatr[4];
- __be32 eccr; /* Exception control configuration register */
- __be32 eicr;
- u8 res4[0x100-0xf8];
-} __attribute__ ((packed));
-
-struct qe_immap {
- struct qe_iram iram; /* I-RAM */
- struct qe_ic_regs ic; /* Interrupt Controller */
- struct cp_qe cp; /* Communications Processor */
- struct qe_mux qmx; /* QE Multiplexer */
- struct qe_timers qet; /* QE Timers */
- struct spi spi[0x2]; /* spi */
- struct qe_mcc mcc; /* mcc */
- struct qe_brg brg; /* brg */
- struct qe_usb_ctlr usb; /* USB */
- struct si1 si1; /* SI */
- u8 res11[0x800];
- struct sir sir; /* SI Routing Tables */
- struct ucc ucc1; /* ucc1 */
- struct ucc ucc3; /* ucc3 */
- struct ucc ucc5; /* ucc5 */
- struct ucc ucc7; /* ucc7 */
- u8 res12[0x600];
- struct upc upc1; /* MultiPHY UTOPIA POS Ctrlr 1*/
- struct ucc ucc2; /* ucc2 */
- struct ucc ucc4; /* ucc4 */
- struct ucc ucc6; /* ucc6 */
- struct ucc ucc8; /* ucc8 */
- u8 res13[0x600];
- struct upc upc2; /* MultiPHY UTOPIA POS Ctrlr 2*/
- struct sdma sdma; /* SDMA */
- struct dbg dbg; /* 0x104080 - 0x1040FF
- Debug Space */
- struct rsp rsp[0x2]; /* 0x104100 - 0x1042FF
- RISC Special Registers
- (Trap and Breakpoint) */
- u8 res14[0x300]; /* 0x104300 - 0x1045FF */
- u8 res15[0x3A00]; /* 0x104600 - 0x107FFF */
- u8 res16[0x8000]; /* 0x108000 - 0x110000 */
- u8 muram[0xC000]; /* 0x110000 - 0x11C000
- Multi-user RAM */
- u8 res17[0x24000]; /* 0x11C000 - 0x140000 */
- u8 res18[0xC0000]; /* 0x140000 - 0x200000 */
-} __attribute__ ((packed));
-
-extern struct qe_immap __iomem *qe_immr;
-extern phys_addr_t get_qe_base(void);
-
-/*
- * Returns the offset within the QE address space of the given pointer.
- *
- * Note that the QE does not support 36-bit physical addresses, so if
- * get_qe_base() returns a number above 4GB, the caller will probably fail.
- */
-static inline phys_addr_t immrbar_virt_to_phys(void *address)
-{
- void *q = (void *)qe_immr;
-
- /* Is it a MURAM address? */
- if ((address >= q) && (address < (q + QE_IMMAP_SIZE)))
- return get_qe_base() + (address - q);
-
- /* It's an address returned by kmalloc */
- return virt_to_phys(address);
-}
-
-#endif /* __KERNEL__ */
-#endif /* _ASM_POWERPC_IMMAP_QE_H */
diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h
new file mode 100644
index 000000000000..ffa82167c860
--- /dev/null
+++ b/arch/powerpc/include/asm/inst.h
@@ -0,0 +1,166 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_INST_H
+#define _ASM_POWERPC_INST_H
+
+#include <asm/ppc-opcode.h>
+#include <asm/reg.h>
+#include <asm/disassemble.h>
+#include <asm/uaccess.h>
+
+#define ___get_user_instr(gu_op, dest, ptr) \
+({ \
+ long __gui_ret; \
+ u32 __user *__gui_ptr = (u32 __user *)ptr; \
+ ppc_inst_t __gui_inst; \
+ unsigned int __prefix, __suffix; \
+ \
+ __chk_user_ptr(ptr); \
+ __gui_ret = gu_op(__prefix, __gui_ptr); \
+ if (__gui_ret == 0) { \
+ if (IS_ENABLED(CONFIG_PPC64) && (__prefix >> 26) == OP_PREFIX) { \
+ __gui_ret = gu_op(__suffix, __gui_ptr + 1); \
+ __gui_inst = ppc_inst_prefix(__prefix, __suffix); \
+ } else { \
+ __gui_inst = ppc_inst(__prefix); \
+ } \
+ if (__gui_ret == 0) \
+ (dest) = __gui_inst; \
+ } \
+ __gui_ret; \
+})
+
+#define get_user_instr(x, ptr) ___get_user_instr(get_user, x, ptr)
+
+#define __get_user_instr(x, ptr) ___get_user_instr(__get_user, x, ptr)
+
+/*
+ * Instruction data type for POWER
+ */
+
+#if defined(CONFIG_PPC64) || defined(__CHECKER__)
+static inline u32 ppc_inst_val(ppc_inst_t x)
+{
+ return x.val;
+}
+
+#define ppc_inst(x) ((ppc_inst_t){ .val = (x) })
+
+#else
+static inline u32 ppc_inst_val(ppc_inst_t x)
+{
+ return x;
+}
+#define ppc_inst(x) (x)
+#endif
+
+static inline int ppc_inst_primary_opcode(ppc_inst_t x)
+{
+ return ppc_inst_val(x) >> 26;
+}
+
+#ifdef CONFIG_PPC64
+#define ppc_inst_prefix(x, y) ((ppc_inst_t){ .val = (x), .suffix = (y) })
+
+static inline u32 ppc_inst_suffix(ppc_inst_t x)
+{
+ return x.suffix;
+}
+
+#else
+#define ppc_inst_prefix(x, y) ((void)y, ppc_inst(x))
+
+static inline u32 ppc_inst_suffix(ppc_inst_t x)
+{
+ return 0;
+}
+
+#endif /* CONFIG_PPC64 */
+
+static inline ppc_inst_t ppc_inst_read(const u32 *ptr)
+{
+ if (IS_ENABLED(CONFIG_PPC64) && (*ptr >> 26) == OP_PREFIX)
+ return ppc_inst_prefix(*ptr, *(ptr + 1));
+ else
+ return ppc_inst(*ptr);
+}
+
+static inline bool ppc_inst_prefixed(ppc_inst_t x)
+{
+ return IS_ENABLED(CONFIG_PPC64) && ppc_inst_primary_opcode(x) == OP_PREFIX;
+}
+
+static inline ppc_inst_t ppc_inst_swab(ppc_inst_t x)
+{
+ return ppc_inst_prefix(swab32(ppc_inst_val(x)), swab32(ppc_inst_suffix(x)));
+}
+
+static inline bool ppc_inst_equal(ppc_inst_t x, ppc_inst_t y)
+{
+ if (ppc_inst_val(x) != ppc_inst_val(y))
+ return false;
+ if (!ppc_inst_prefixed(x))
+ return true;
+ return ppc_inst_suffix(x) == ppc_inst_suffix(y);
+}
+
+static inline int ppc_inst_len(ppc_inst_t x)
+{
+ return ppc_inst_prefixed(x) ? 8 : 4;
+}
+
+/*
+ * Return the address of the next instruction, if the instruction @value was
+ * located at @location.
+ */
+static inline u32 *ppc_inst_next(u32 *location, u32 *value)
+{
+ ppc_inst_t tmp;
+
+ tmp = ppc_inst_read(value);
+
+ return (void *)location + ppc_inst_len(tmp);
+}
+
+static inline unsigned long ppc_inst_as_ulong(ppc_inst_t x)
+{
+ if (IS_ENABLED(CONFIG_PPC32))
+ return ppc_inst_val(x);
+ else if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN))
+ return (u64)ppc_inst_suffix(x) << 32 | ppc_inst_val(x);
+ else
+ return (u64)ppc_inst_val(x) << 32 | ppc_inst_suffix(x);
+}
+
+static inline void ppc_inst_write(u32 *ptr, ppc_inst_t x)
+{
+ if (!ppc_inst_prefixed(x))
+ *ptr = ppc_inst_val(x);
+ else
+ *(u64 *)ptr = ppc_inst_as_ulong(x);
+}
+
+static inline int __copy_inst_from_kernel_nofault(ppc_inst_t *inst, u32 *src)
+{
+ unsigned int val, suffix;
+
+ __get_kernel_nofault(&val, src, u32, Efault);
+ if (IS_ENABLED(CONFIG_PPC64) && get_op(val) == OP_PREFIX) {
+ __get_kernel_nofault(&suffix, src + 1, u32, Efault);
+ *inst = ppc_inst_prefix(val, suffix);
+ } else {
+ *inst = ppc_inst(val);
+ }
+ return 0;
+Efault:
+ return -EFAULT;
+}
+
+static inline int copy_inst_from_kernel_nofault(ppc_inst_t *inst, u32 *src)
+{
+ if (unlikely(!is_kernel_addr((unsigned long)src)))
+ return -ERANGE;
+
+ return __copy_inst_from_kernel_nofault(inst, src);
+}
+
+#endif /* _ASM_POWERPC_INST_H */
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h
new file mode 100644
index 000000000000..eb0e4a20b818
--- /dev/null
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -0,0 +1,680 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_INTERRUPT_H
+#define _ASM_POWERPC_INTERRUPT_H
+
+/* BookE/4xx */
+#define INTERRUPT_CRITICAL_INPUT 0x100
+
+/* BookE */
+#define INTERRUPT_DEBUG 0xd00
+#ifdef CONFIG_BOOKE
+#define INTERRUPT_PERFMON 0x260
+#define INTERRUPT_DOORBELL 0x280
+#endif
+
+/* BookS/4xx/8xx */
+#define INTERRUPT_MACHINE_CHECK 0x200
+
+/* BookS/8xx */
+#define INTERRUPT_SYSTEM_RESET 0x100
+
+/* BookS */
+#define INTERRUPT_DATA_SEGMENT 0x380
+#define INTERRUPT_INST_SEGMENT 0x480
+#define INTERRUPT_TRACE 0xd00
+#define INTERRUPT_H_DATA_STORAGE 0xe00
+#define INTERRUPT_HMI 0xe60
+#define INTERRUPT_H_FAC_UNAVAIL 0xf80
+#ifdef CONFIG_PPC_BOOK3S
+#define INTERRUPT_DOORBELL 0xa00
+#define INTERRUPT_PERFMON 0xf00
+#define INTERRUPT_ALTIVEC_UNAVAIL 0xf20
+#endif
+
+/* BookE/BookS/4xx/8xx */
+#define INTERRUPT_DATA_STORAGE 0x300
+#define INTERRUPT_INST_STORAGE 0x400
+#define INTERRUPT_EXTERNAL 0x500
+#define INTERRUPT_ALIGNMENT 0x600
+#define INTERRUPT_PROGRAM 0x700
+#define INTERRUPT_SYSCALL 0xc00
+#define INTERRUPT_TRACE 0xd00
+
+/* BookE/BookS/44x */
+#define INTERRUPT_FP_UNAVAIL 0x800
+
+/* BookE/BookS/44x/8xx */
+#define INTERRUPT_DECREMENTER 0x900
+
+#ifndef INTERRUPT_PERFMON
+#define INTERRUPT_PERFMON 0x0
+#endif
+
+/* 8xx */
+#define INTERRUPT_SOFT_EMU_8xx 0x1000
+#define INTERRUPT_INST_TLB_MISS_8xx 0x1100
+#define INTERRUPT_DATA_TLB_MISS_8xx 0x1200
+#define INTERRUPT_INST_TLB_ERROR_8xx 0x1300
+#define INTERRUPT_DATA_TLB_ERROR_8xx 0x1400
+#define INTERRUPT_DATA_BREAKPOINT_8xx 0x1c00
+#define INTERRUPT_INST_BREAKPOINT_8xx 0x1d00
+
+/* 603 */
+#define INTERRUPT_INST_TLB_MISS_603 0x1000
+#define INTERRUPT_DATA_LOAD_TLB_MISS_603 0x1100
+#define INTERRUPT_DATA_STORE_TLB_MISS_603 0x1200
+
+#ifndef __ASSEMBLER__
+
+#include <linux/context_tracking.h>
+#include <linux/hardirq.h>
+#include <asm/cputime.h>
+#include <asm/firmware.h>
+#include <asm/ftrace.h>
+#include <asm/kprobes.h>
+#include <asm/runlatch.h>
+
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
+/*
+ * WARN/BUG is handled with a program interrupt so minimise checks here to
+ * avoid recursion and maximise the chance of getting the first oops handled.
+ */
+#define INT_SOFT_MASK_BUG_ON(regs, cond) \
+do { \
+ if ((user_mode(regs) || (TRAP(regs) != INTERRUPT_PROGRAM))) \
+ BUG_ON(cond); \
+} while (0)
+#else
+#define INT_SOFT_MASK_BUG_ON(regs, cond)
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_64
+extern char __end_soft_masked[];
+bool search_kernel_soft_mask_table(unsigned long addr);
+unsigned long search_kernel_restart_table(unsigned long addr);
+
+DECLARE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant);
+
+static inline bool is_implicit_soft_masked(struct pt_regs *regs)
+{
+ if (user_mode(regs))
+ return false;
+
+ if (regs->nip >= (unsigned long)__end_soft_masked)
+ return false;
+
+ return search_kernel_soft_mask_table(regs->nip);
+}
+
+static inline void srr_regs_clobbered(void)
+{
+ local_paca->srr_valid = 0;
+ local_paca->hsrr_valid = 0;
+}
+#else
+static inline unsigned long search_kernel_restart_table(unsigned long addr)
+{
+ return 0;
+}
+
+static inline bool is_implicit_soft_masked(struct pt_regs *regs)
+{
+ return false;
+}
+
+static inline void srr_regs_clobbered(void)
+{
+}
+#endif
+
+static inline void nap_adjust_return(struct pt_regs *regs)
+{
+#ifdef CONFIG_PPC_970_NAP
+ if (unlikely(test_thread_local_flags(_TLF_NAPPING))) {
+ /* Can avoid a test-and-clear because NMIs do not call this */
+ clear_thread_local_flags(_TLF_NAPPING);
+ regs_set_return_ip(regs, (unsigned long)power4_idle_nap_return);
+ }
+#endif
+}
+
+static inline void booke_restore_dbcr0(void)
+{
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+ unsigned long dbcr0 = current->thread.debug.dbcr0;
+
+ if (IS_ENABLED(CONFIG_PPC32) && unlikely(dbcr0 & DBCR0_IDM)) {
+ mtspr(SPRN_DBSR, -1);
+ mtspr(SPRN_DBCR0, global_dbcr0[smp_processor_id()]);
+ }
+#endif
+}
+
+static inline void interrupt_enter_prepare(struct pt_regs *regs)
+{
+#ifdef CONFIG_PPC64
+ irq_soft_mask_set(IRQS_ALL_DISABLED);
+
+ /*
+ * If the interrupt was taken with HARD_DIS clear, then enable MSR[EE].
+ * Asynchronous interrupts get here with HARD_DIS set (see below), so
+ * this enables MSR[EE] for synchronous interrupts. IRQs remain
+ * soft-masked. The interrupt handler may later call
+ * interrupt_cond_local_irq_enable() to achieve a regular process
+ * context.
+ */
+ if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) {
+ INT_SOFT_MASK_BUG_ON(regs, !(regs->msr & MSR_EE));
+ __hard_irq_enable();
+ } else {
+ __hard_RI_enable();
+ }
+ /* Enable MSR[RI] early, to support kernel SLB and hash faults */
+#endif
+
+ if (!arch_irq_disabled_regs(regs))
+ trace_hardirqs_off();
+
+ if (user_mode(regs)) {
+ kuap_lock();
+ CT_WARN_ON(ct_state() != CT_STATE_USER);
+ user_exit_irqoff();
+
+ account_cpu_user_entry();
+ account_stolen_time();
+ } else {
+ kuap_save_and_lock(regs);
+ /*
+ * CT_WARN_ON comes here via program_check_exception,
+ * so avoid recursion.
+ */
+ if (TRAP(regs) != INTERRUPT_PROGRAM)
+ CT_WARN_ON(ct_state() != CT_STATE_KERNEL &&
+ ct_state() != CT_STATE_IDLE);
+ INT_SOFT_MASK_BUG_ON(regs, is_implicit_soft_masked(regs));
+ INT_SOFT_MASK_BUG_ON(regs, arch_irq_disabled_regs(regs) &&
+ search_kernel_restart_table(regs->nip));
+ }
+ INT_SOFT_MASK_BUG_ON(regs, !arch_irq_disabled_regs(regs) &&
+ !(regs->msr & MSR_EE));
+
+ booke_restore_dbcr0();
+}
+
+/*
+ * Care should be taken to note that interrupt_exit_prepare and
+ * interrupt_async_exit_prepare do not necessarily return immediately to
+ * regs context (e.g., if regs is usermode, we don't necessarily return to
+ * user mode). Other interrupts might be taken between here and return,
+ * context switch / preemption may occur in the exit path after this, or a
+ * signal may be delivered, etc.
+ *
+ * The real interrupt exit code is platform specific, e.g.,
+ * interrupt_exit_user_prepare / interrupt_exit_kernel_prepare for 64s.
+ *
+ * However interrupt_nmi_exit_prepare does return directly to regs, because
+ * NMIs do not do "exit work" or replay soft-masked interrupts.
+ */
+static inline void interrupt_exit_prepare(struct pt_regs *regs)
+{
+}
+
+static inline void interrupt_async_enter_prepare(struct pt_regs *regs)
+{
+#ifdef CONFIG_PPC64
+ /* Ensure interrupt_enter_prepare does not enable MSR[EE] */
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+#endif
+ interrupt_enter_prepare(regs);
+#ifdef CONFIG_PPC_BOOK3S_64
+ /*
+ * RI=1 is set by interrupt_enter_prepare, so this thread flags access
+ * has to come afterward (it can cause SLB faults).
+ */
+ if (cpu_has_feature(CPU_FTR_CTRL) &&
+ !test_thread_local_flags(_TLF_RUNLATCH))
+ __ppc64_runlatch_on();
+#endif
+ irq_enter();
+}
+
+static inline void interrupt_async_exit_prepare(struct pt_regs *regs)
+{
+ /*
+ * Adjust at exit so the main handler sees the true NIA. This must
+ * come before irq_exit() because irq_exit can enable interrupts, and
+ * if another interrupt is taken before nap_adjust_return has run
+ * here, then that interrupt would return directly to idle nap return.
+ */
+ nap_adjust_return(regs);
+
+ irq_exit();
+ interrupt_exit_prepare(regs);
+}
+
+struct interrupt_nmi_state {
+#ifdef CONFIG_PPC64
+ u8 irq_soft_mask;
+ u8 irq_happened;
+ u8 ftrace_enabled;
+ u64 softe;
+#endif
+};
+
+static inline bool nmi_disables_ftrace(struct pt_regs *regs)
+{
+ /* Allow DEC and PMI to be traced when they are soft-NMI */
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) {
+ if (TRAP(regs) == INTERRUPT_DECREMENTER)
+ return false;
+ if (TRAP(regs) == INTERRUPT_PERFMON)
+ return false;
+ }
+ if (IS_ENABLED(CONFIG_PPC_BOOK3E_64)) {
+ if (TRAP(regs) == INTERRUPT_PERFMON)
+ return false;
+ }
+
+ return true;
+}
+
+static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct interrupt_nmi_state *state)
+{
+#ifdef CONFIG_PPC64
+ state->irq_soft_mask = local_paca->irq_soft_mask;
+ state->irq_happened = local_paca->irq_happened;
+ state->softe = regs->softe;
+
+ /*
+ * Set IRQS_ALL_DISABLED unconditionally so irqs_disabled() does
+ * the right thing, and set IRQ_HARD_DIS. We do not want to reconcile
+ * because that goes through irq tracing which we don't want in NMI.
+ */
+ local_paca->irq_soft_mask = IRQS_ALL_DISABLED;
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+ if (!(regs->msr & MSR_EE) || is_implicit_soft_masked(regs)) {
+ /*
+ * Adjust regs->softe to be soft-masked if it had not been
+ * reconcied (e.g., interrupt entry with MSR[EE]=0 but softe
+ * not yet set disabled), or if it was in an implicit soft
+ * masked state. This makes arch_irq_disabled_regs(regs)
+ * behave as expected.
+ */
+ regs->softe = IRQS_ALL_DISABLED;
+ }
+
+ __hard_RI_enable();
+
+ /* Don't do any per-CPU operations until interrupt state is fixed */
+
+ if (nmi_disables_ftrace(regs)) {
+ state->ftrace_enabled = this_cpu_get_ftrace_enabled();
+ this_cpu_set_ftrace_enabled(0);
+ }
+#endif
+
+ /* If data relocations are enabled, it's safe to use nmi_enter() */
+ if (mfmsr() & MSR_DR) {
+ nmi_enter();
+ return;
+ }
+
+ /*
+ * But do not use nmi_enter() for pseries hash guest taking a real-mode
+ * NMI because not everything it touches is within the RMA limit.
+ */
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) &&
+ firmware_has_feature(FW_FEATURE_LPAR) &&
+ !radix_enabled())
+ return;
+
+ /*
+ * Likewise, don't use it if we have some form of instrumentation (like
+ * KASAN shadow) that is not safe to access in real mode (even on radix)
+ */
+ if (IS_ENABLED(CONFIG_KASAN))
+ return;
+
+ /*
+ * Likewise, do not use it in real mode if percpu first chunk is not
+ * embedded. With CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK enabled there
+ * are chances where percpu allocation can come from vmalloc area.
+ */
+ if (percpu_first_chunk_is_paged)
+ return;
+
+ /* Otherwise, it should be safe to call it */
+ nmi_enter();
+}
+
+static inline void interrupt_nmi_exit_prepare(struct pt_regs *regs, struct interrupt_nmi_state *state)
+{
+ if (mfmsr() & MSR_DR) {
+ // nmi_exit if relocations are on
+ nmi_exit();
+ } else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) &&
+ firmware_has_feature(FW_FEATURE_LPAR) &&
+ !radix_enabled()) {
+ // no nmi_exit for a pseries hash guest taking a real mode exception
+ } else if (IS_ENABLED(CONFIG_KASAN)) {
+ // no nmi_exit for KASAN in real mode
+ } else if (percpu_first_chunk_is_paged) {
+ // no nmi_exit if percpu first chunk is not embedded
+ } else {
+ nmi_exit();
+ }
+
+ /*
+ * nmi does not call nap_adjust_return because nmi should not create
+ * new work to do (must use irq_work for that).
+ */
+
+#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC_BOOK3S
+ if (arch_irq_disabled_regs(regs)) {
+ unsigned long rst = search_kernel_restart_table(regs->nip);
+ if (rst)
+ regs_set_return_ip(regs, rst);
+ }
+#endif
+
+ if (nmi_disables_ftrace(regs))
+ this_cpu_set_ftrace_enabled(state->ftrace_enabled);
+
+ /* Check we didn't change the pending interrupt mask. */
+ WARN_ON_ONCE((state->irq_happened | PACA_IRQ_HARD_DIS) != local_paca->irq_happened);
+ regs->softe = state->softe;
+ local_paca->irq_happened = state->irq_happened;
+ local_paca->irq_soft_mask = state->irq_soft_mask;
+#endif
+}
+
+/*
+ * Don't use noinstr here like x86, but rather add NOKPROBE_SYMBOL to each
+ * function definition. The reason for this is the noinstr section is placed
+ * after the main text section, i.e., very far away from the interrupt entry
+ * asm. That creates problems with fitting linker stubs when building large
+ * kernels.
+ */
+#define interrupt_handler __visible noinline notrace __no_kcsan __no_sanitize_address
+
+/**
+ * DECLARE_INTERRUPT_HANDLER_RAW - Declare raw interrupt handler function
+ * @func: Function name of the entry point
+ * @returns: Returns a value back to asm caller
+ */
+#define DECLARE_INTERRUPT_HANDLER_RAW(func) \
+ __visible long func(struct pt_regs *regs)
+
+/**
+ * DEFINE_INTERRUPT_HANDLER_RAW - Define raw interrupt handler function
+ * @func: Function name of the entry point
+ * @returns: Returns a value back to asm caller
+ *
+ * @func is called from ASM entry code.
+ *
+ * This is a plain function which does no tracing, reconciling, etc.
+ * The macro is written so it acts as function definition. Append the
+ * body with a pair of curly brackets.
+ *
+ * raw interrupt handlers must not enable or disable interrupts, or
+ * schedule, tracing and instrumentation (ftrace, lockdep, etc) would
+ * not be advisable either, although may be possible in a pinch, the
+ * trace will look odd at least.
+ *
+ * A raw handler may call one of the other interrupt handler functions
+ * to be converted into that interrupt context without these restrictions.
+ *
+ * On PPC64, _RAW handlers may return with fast_interrupt_return.
+ *
+ * Specific handlers may have additional restrictions.
+ */
+#define DEFINE_INTERRUPT_HANDLER_RAW(func) \
+static __always_inline __no_sanitize_address __no_kcsan long \
+____##func(struct pt_regs *regs); \
+ \
+interrupt_handler long func(struct pt_regs *regs) \
+{ \
+ long ret; \
+ \
+ __hard_RI_enable(); \
+ \
+ ret = ____##func (regs); \
+ \
+ return ret; \
+} \
+NOKPROBE_SYMBOL(func); \
+ \
+static __always_inline __no_sanitize_address __no_kcsan long \
+____##func(struct pt_regs *regs)
+
+/**
+ * DECLARE_INTERRUPT_HANDLER - Declare synchronous interrupt handler function
+ * @func: Function name of the entry point
+ */
+#define DECLARE_INTERRUPT_HANDLER(func) \
+ __visible void func(struct pt_regs *regs)
+
+/**
+ * DEFINE_INTERRUPT_HANDLER - Define synchronous interrupt handler function
+ * @func: Function name of the entry point
+ *
+ * @func is called from ASM entry code.
+ *
+ * The macro is written so it acts as function definition. Append the
+ * body with a pair of curly brackets.
+ */
+#define DEFINE_INTERRUPT_HANDLER(func) \
+static __always_inline void ____##func(struct pt_regs *regs); \
+ \
+interrupt_handler void func(struct pt_regs *regs) \
+{ \
+ interrupt_enter_prepare(regs); \
+ \
+ ____##func (regs); \
+ \
+ interrupt_exit_prepare(regs); \
+} \
+NOKPROBE_SYMBOL(func); \
+ \
+static __always_inline void ____##func(struct pt_regs *regs)
+
+/**
+ * DECLARE_INTERRUPT_HANDLER_RET - Declare synchronous interrupt handler function
+ * @func: Function name of the entry point
+ * @returns: Returns a value back to asm caller
+ */
+#define DECLARE_INTERRUPT_HANDLER_RET(func) \
+ __visible long func(struct pt_regs *regs)
+
+/**
+ * DEFINE_INTERRUPT_HANDLER_RET - Define synchronous interrupt handler function
+ * @func: Function name of the entry point
+ * @returns: Returns a value back to asm caller
+ *
+ * @func is called from ASM entry code.
+ *
+ * The macro is written so it acts as function definition. Append the
+ * body with a pair of curly brackets.
+ */
+#define DEFINE_INTERRUPT_HANDLER_RET(func) \
+static __always_inline long ____##func(struct pt_regs *regs); \
+ \
+interrupt_handler long func(struct pt_regs *regs) \
+{ \
+ long ret; \
+ \
+ interrupt_enter_prepare(regs); \
+ \
+ ret = ____##func (regs); \
+ \
+ interrupt_exit_prepare(regs); \
+ \
+ return ret; \
+} \
+NOKPROBE_SYMBOL(func); \
+ \
+static __always_inline long ____##func(struct pt_regs *regs)
+
+/**
+ * DECLARE_INTERRUPT_HANDLER_ASYNC - Declare asynchronous interrupt handler function
+ * @func: Function name of the entry point
+ */
+#define DECLARE_INTERRUPT_HANDLER_ASYNC(func) \
+ __visible void func(struct pt_regs *regs)
+
+/**
+ * DEFINE_INTERRUPT_HANDLER_ASYNC - Define asynchronous interrupt handler function
+ * @func: Function name of the entry point
+ *
+ * @func is called from ASM entry code.
+ *
+ * The macro is written so it acts as function definition. Append the
+ * body with a pair of curly brackets.
+ */
+#define DEFINE_INTERRUPT_HANDLER_ASYNC(func) \
+static __always_inline void ____##func(struct pt_regs *regs); \
+ \
+interrupt_handler void func(struct pt_regs *regs) \
+{ \
+ interrupt_async_enter_prepare(regs); \
+ \
+ ____##func (regs); \
+ \
+ interrupt_async_exit_prepare(regs); \
+} \
+NOKPROBE_SYMBOL(func); \
+ \
+static __always_inline void ____##func(struct pt_regs *regs)
+
+/**
+ * DECLARE_INTERRUPT_HANDLER_NMI - Declare NMI interrupt handler function
+ * @func: Function name of the entry point
+ * @returns: Returns a value back to asm caller
+ */
+#define DECLARE_INTERRUPT_HANDLER_NMI(func) \
+ __visible long func(struct pt_regs *regs)
+
+/**
+ * DEFINE_INTERRUPT_HANDLER_NMI - Define NMI interrupt handler function
+ * @func: Function name of the entry point
+ * @returns: Returns a value back to asm caller
+ *
+ * @func is called from ASM entry code.
+ *
+ * The macro is written so it acts as function definition. Append the
+ * body with a pair of curly brackets.
+ */
+#define DEFINE_INTERRUPT_HANDLER_NMI(func) \
+static __always_inline __no_sanitize_address __no_kcsan long \
+____##func(struct pt_regs *regs); \
+ \
+interrupt_handler long func(struct pt_regs *regs) \
+{ \
+ struct interrupt_nmi_state state; \
+ long ret; \
+ \
+ interrupt_nmi_enter_prepare(regs, &state); \
+ \
+ ret = ____##func (regs); \
+ \
+ interrupt_nmi_exit_prepare(regs, &state); \
+ \
+ return ret; \
+} \
+NOKPROBE_SYMBOL(func); \
+ \
+static __always_inline __no_sanitize_address __no_kcsan long \
+____##func(struct pt_regs *regs)
+
+
+/* Interrupt handlers */
+/* kernel/traps.c */
+DECLARE_INTERRUPT_HANDLER_NMI(system_reset_exception);
+#ifdef CONFIG_PPC_BOOK3S_64
+DECLARE_INTERRUPT_HANDLER_RAW(machine_check_early_boot);
+DECLARE_INTERRUPT_HANDLER_ASYNC(machine_check_exception_async);
+#endif
+DECLARE_INTERRUPT_HANDLER_NMI(machine_check_exception);
+DECLARE_INTERRUPT_HANDLER(SMIException);
+DECLARE_INTERRUPT_HANDLER(handle_hmi_exception);
+DECLARE_INTERRUPT_HANDLER(unknown_exception);
+DECLARE_INTERRUPT_HANDLER_ASYNC(unknown_async_exception);
+DECLARE_INTERRUPT_HANDLER_NMI(unknown_nmi_exception);
+DECLARE_INTERRUPT_HANDLER(instruction_breakpoint_exception);
+DECLARE_INTERRUPT_HANDLER(RunModeException);
+DECLARE_INTERRUPT_HANDLER(single_step_exception);
+DECLARE_INTERRUPT_HANDLER(program_check_exception);
+DECLARE_INTERRUPT_HANDLER(emulation_assist_interrupt);
+DECLARE_INTERRUPT_HANDLER(alignment_exception);
+DECLARE_INTERRUPT_HANDLER(StackOverflow);
+DECLARE_INTERRUPT_HANDLER(stack_overflow_exception);
+DECLARE_INTERRUPT_HANDLER(kernel_fp_unavailable_exception);
+DECLARE_INTERRUPT_HANDLER(altivec_unavailable_exception);
+DECLARE_INTERRUPT_HANDLER(vsx_unavailable_exception);
+DECLARE_INTERRUPT_HANDLER(facility_unavailable_exception);
+DECLARE_INTERRUPT_HANDLER(fp_unavailable_tm);
+DECLARE_INTERRUPT_HANDLER(altivec_unavailable_tm);
+DECLARE_INTERRUPT_HANDLER(vsx_unavailable_tm);
+DECLARE_INTERRUPT_HANDLER_NMI(performance_monitor_exception_nmi);
+DECLARE_INTERRUPT_HANDLER_ASYNC(performance_monitor_exception_async);
+DECLARE_INTERRUPT_HANDLER_RAW(performance_monitor_exception);
+DECLARE_INTERRUPT_HANDLER(DebugException);
+DECLARE_INTERRUPT_HANDLER(altivec_assist_exception);
+DECLARE_INTERRUPT_HANDLER(CacheLockingException);
+DECLARE_INTERRUPT_HANDLER(SPEFloatingPointException);
+DECLARE_INTERRUPT_HANDLER(SPEFloatingPointRoundException);
+DECLARE_INTERRUPT_HANDLER_NMI(WatchdogException);
+DECLARE_INTERRUPT_HANDLER(kernel_bad_stack);
+
+/* slb.c */
+DECLARE_INTERRUPT_HANDLER_RAW(do_slb_fault);
+DECLARE_INTERRUPT_HANDLER(do_bad_segment_interrupt);
+
+/* hash_utils.c */
+DECLARE_INTERRUPT_HANDLER(do_hash_fault);
+
+/* fault.c */
+DECLARE_INTERRUPT_HANDLER(do_page_fault);
+DECLARE_INTERRUPT_HANDLER(do_bad_page_fault_segv);
+
+/* process.c */
+DECLARE_INTERRUPT_HANDLER(do_break);
+
+/* time.c */
+DECLARE_INTERRUPT_HANDLER_ASYNC(timer_interrupt);
+
+/* mce.c */
+DECLARE_INTERRUPT_HANDLER_NMI(machine_check_early);
+DECLARE_INTERRUPT_HANDLER_NMI(hmi_exception_realmode);
+
+DECLARE_INTERRUPT_HANDLER_ASYNC(TAUException);
+
+/* irq.c */
+DECLARE_INTERRUPT_HANDLER_ASYNC(do_IRQ);
+
+void __noreturn unrecoverable_exception(struct pt_regs *regs);
+
+void replay_system_reset(void);
+void replay_soft_interrupts(void);
+
+static inline void interrupt_cond_local_irq_enable(struct pt_regs *regs)
+{
+ if (!arch_irq_disabled_regs(regs))
+ local_irq_enable();
+}
+
+long system_call_exception(struct pt_regs *regs, unsigned long r0);
+notrace unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs, long scv);
+notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs);
+notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs);
+#ifdef CONFIG_PPC64
+unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *regs);
+unsigned long interrupt_exit_user_restart(struct pt_regs *regs);
+unsigned long interrupt_exit_kernel_restart(struct pt_regs *regs);
+#endif
+
+#endif /* __ASSEMBLER__ */
+
+#endif /* _ASM_POWERPC_INTERRUPT_H */
diff --git a/arch/powerpc/include/asm/io-defs.h b/arch/powerpc/include/asm/io-defs.h
index 44d7927aec69..5c2be9b54a9d 100644
--- a/arch/powerpc/include/asm/io-defs.h
+++ b/arch/powerpc/include/asm/io-defs.h
@@ -1,60 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* This file is meant to be include multiple times by other headers */
-/* last 2 argments are used by platforms/cell/io-workarounds.[ch] */
-DEF_PCI_AC_RET(readb, u8, (const PCI_IO_ADDR addr), (addr), mem, addr)
-DEF_PCI_AC_RET(readw, u16, (const PCI_IO_ADDR addr), (addr), mem, addr)
-DEF_PCI_AC_RET(readl, u32, (const PCI_IO_ADDR addr), (addr), mem, addr)
-DEF_PCI_AC_RET(readw_be, u16, (const PCI_IO_ADDR addr), (addr), mem, addr)
-DEF_PCI_AC_RET(readl_be, u32, (const PCI_IO_ADDR addr), (addr), mem, addr)
-DEF_PCI_AC_NORET(writeb, (u8 val, PCI_IO_ADDR addr), (val, addr), mem, addr)
-DEF_PCI_AC_NORET(writew, (u16 val, PCI_IO_ADDR addr), (val, addr), mem, addr)
-DEF_PCI_AC_NORET(writel, (u32 val, PCI_IO_ADDR addr), (val, addr), mem, addr)
-DEF_PCI_AC_NORET(writew_be, (u16 val, PCI_IO_ADDR addr), (val, addr), mem, addr)
-DEF_PCI_AC_NORET(writel_be, (u32 val, PCI_IO_ADDR addr), (val, addr), mem, addr)
-
-#ifdef __powerpc64__
-DEF_PCI_AC_RET(readq, u64, (const PCI_IO_ADDR addr), (addr), mem, addr)
-DEF_PCI_AC_RET(readq_be, u64, (const PCI_IO_ADDR addr), (addr), mem, addr)
-DEF_PCI_AC_NORET(writeq, (u64 val, PCI_IO_ADDR addr), (val, addr), mem, addr)
-DEF_PCI_AC_NORET(writeq_be, (u64 val, PCI_IO_ADDR addr), (val, addr), mem, addr)
-#endif /* __powerpc64__ */
-
-DEF_PCI_AC_RET(inb, u8, (unsigned long port), (port), pio, port)
-DEF_PCI_AC_RET(inw, u16, (unsigned long port), (port), pio, port)
-DEF_PCI_AC_RET(inl, u32, (unsigned long port), (port), pio, port)
-DEF_PCI_AC_NORET(outb, (u8 val, unsigned long port), (val, port), pio, port)
-DEF_PCI_AC_NORET(outw, (u16 val, unsigned long port), (val, port), pio, port)
-DEF_PCI_AC_NORET(outl, (u32 val, unsigned long port), (val, port), pio, port)
-
-DEF_PCI_AC_NORET(readsb, (const PCI_IO_ADDR a, void *b, unsigned long c),
- (a, b, c), mem, a)
-DEF_PCI_AC_NORET(readsw, (const PCI_IO_ADDR a, void *b, unsigned long c),
- (a, b, c), mem, a)
-DEF_PCI_AC_NORET(readsl, (const PCI_IO_ADDR a, void *b, unsigned long c),
- (a, b, c), mem, a)
-DEF_PCI_AC_NORET(writesb, (PCI_IO_ADDR a, const void *b, unsigned long c),
- (a, b, c), mem, a)
-DEF_PCI_AC_NORET(writesw, (PCI_IO_ADDR a, const void *b, unsigned long c),
- (a, b, c), mem, a)
-DEF_PCI_AC_NORET(writesl, (PCI_IO_ADDR a, const void *b, unsigned long c),
- (a, b, c), mem, a)
-
-DEF_PCI_AC_NORET(insb, (unsigned long p, void *b, unsigned long c),
- (p, b, c), pio, p)
-DEF_PCI_AC_NORET(insw, (unsigned long p, void *b, unsigned long c),
- (p, b, c), pio, p)
-DEF_PCI_AC_NORET(insl, (unsigned long p, void *b, unsigned long c),
- (p, b, c), pio, p)
-DEF_PCI_AC_NORET(outsb, (unsigned long p, const void *b, unsigned long c),
- (p, b, c), pio, p)
-DEF_PCI_AC_NORET(outsw, (unsigned long p, const void *b, unsigned long c),
- (p, b, c), pio, p)
-DEF_PCI_AC_NORET(outsl, (unsigned long p, const void *b, unsigned long c),
- (p, b, c), pio, p)
-
-DEF_PCI_AC_NORET(memset_io, (PCI_IO_ADDR a, int c, unsigned long n),
- (a, c, n), mem, a)
-DEF_PCI_AC_NORET(memcpy_fromio, (void *d, const PCI_IO_ADDR s, unsigned long n),
- (d, s, n), mem, s)
-DEF_PCI_AC_NORET(memcpy_toio, (PCI_IO_ADDR d, const void *s, unsigned long n),
- (d, s, n), mem, d)
+DEF_PCI_AC_RET(inb, u8, (unsigned long port), (port))
+DEF_PCI_AC_RET(inw, u16, (unsigned long port), (port))
+DEF_PCI_AC_RET(inl, u32, (unsigned long port), (port))
+DEF_PCI_AC_NORET(outb, (u8 val, unsigned long port), (val, port))
+DEF_PCI_AC_NORET(outw, (u16 val, unsigned long port), (val, port))
+DEF_PCI_AC_NORET(outl, (u32 val, unsigned long port), (val, port))
+DEF_PCI_AC_NORET(insb, (unsigned long p, void *b, unsigned long c), (p, b, c))
+DEF_PCI_AC_NORET(insw, (unsigned long p, void *b, unsigned long c), (p, b, c))
+DEF_PCI_AC_NORET(insl, (unsigned long p, void *b, unsigned long c), (p, b, c))
+DEF_PCI_AC_NORET(outsb, (unsigned long p, const void *b, unsigned long c), (p, b, c))
+DEF_PCI_AC_NORET(outsw, (unsigned long p, const void *b, unsigned long c), (p, b, c))
+DEF_PCI_AC_NORET(outsl, (unsigned long p, const void *b, unsigned long c), (p, b, c))
diff --git a/arch/powerpc/include/asm/io-workarounds.h b/arch/powerpc/include/asm/io-workarounds.h
deleted file mode 100644
index f96dd096ff4e..000000000000
--- a/arch/powerpc/include/asm/io-workarounds.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Support PCI IO workaround
- *
- * (C) Copyright 2007-2008 TOSHIBA CORPORATION
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#ifndef _IO_WORKAROUNDS_H
-#define _IO_WORKAROUNDS_H
-
-#include <linux/io.h>
-#include <asm/pci-bridge.h>
-
-/* Bus info */
-struct iowa_bus {
- struct pci_controller *phb;
- struct ppc_pci_io *ops;
- void *private;
-};
-
-void iowa_register_bus(struct pci_controller *, struct ppc_pci_io *,
- int (*)(struct iowa_bus *, void *), void *);
-struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR);
-struct iowa_bus *iowa_pio_find_bus(unsigned long);
-
-extern struct ppc_pci_io spiderpci_ops;
-extern int spiderpci_iowa_init(struct iowa_bus *, void *);
-
-#define SPIDER_PCI_REG_BASE 0xd000
-#define SPIDER_PCI_REG_SIZE 0x1000
-#define SPIDER_PCI_VCI_CNTL_STAT 0x0110
-#define SPIDER_PCI_DUMMY_READ 0x0810
-#define SPIDER_PCI_DUMMY_READ_BASE 0x0814
-
-#endif /* _IO_WORKAROUNDS_H */
diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index 97d3869991ca..7a89754842d6 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -1,14 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _ASM_POWERPC_IO_H
#define _ASM_POWERPC_IO_H
#ifdef __KERNEL__
-#define ARCH_HAS_IOREMAP_WC
-
/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
/* Check of existence of legacy devices */
@@ -25,32 +20,24 @@ extern struct pci_dev *isa_bridge_pcidev;
#endif
#include <linux/device.h>
-#include <linux/io.h>
-
#include <linux/compiler.h>
+#include <linux/mm.h>
#include <asm/page.h>
#include <asm/byteorder.h>
#include <asm/synch.h>
#include <asm/delay.h>
+#include <asm/mmiowb.h>
#include <asm/mmu.h>
-#include <asm-generic/iomap.h>
-
-#ifdef CONFIG_PPC64
-#include <asm/paca.h>
-#endif
-
#define SIO_CONFIG_RA 0x398
#define SIO_CONFIG_RD 0x399
-#define SLOW_DOWN_IO
-
/* 32 bits uses slightly different variables for the various IO
* bases. Most of this file only uses _IO_BASE though which we
* define properly based on the platform
*/
#ifndef CONFIG_PCI
-#define _IO_BASE 0
+#define _IO_BASE POISON_POINTER_DELTA
#define _ISA_MEM_BASE 0
#define PCI_DRAM_OFFSET 0
#elif defined(CONFIG_PPC32)
@@ -78,8 +65,8 @@ extern resource_size_t isa_mem_base;
extern bool isa_io_special;
#ifdef CONFIG_PPC32
-#if defined(CONFIG_PPC_INDIRECT_PIO) || defined(CONFIG_PPC_INDIRECT_MMIO)
-#error CONFIG_PPC_INDIRECT_{PIO,MMIO} are not yet supported on 32 bits
+#ifdef CONFIG_PPC_INDIRECT_PIO
+#error CONFIG_PPC_INDIRECT_PIO is not yet supported on 32 bits
#endif
#endif
@@ -93,43 +80,50 @@ extern bool isa_io_special;
*
* in_8, in_le16, in_be16, in_le32, in_be32, in_le64, in_be64
* out_8, out_le16, out_be16, out_le32, out_be32, out_le64, out_be64
- * _insb, _insw_ns, _insl_ns, _outsb, _outsw_ns, _outsl_ns
+ * _insb, _insw, _insl, _outsb, _outsw, _outsl
*
* Those operate directly on a kernel virtual address. Note that the prototype
* for the out_* accessors has the arguments in opposite order from the usual
* linux PCI accessors. Unlike those, they take the address first and the value
* next.
- *
- * Note: I might drop the _ns suffix on the stream operations soon as it is
- * simply normal for stream operations to not swap in the first place.
- *
*/
-#ifdef CONFIG_PPC64
-#define IO_SET_SYNC_FLAG() do { local_paca->io_sync = 1; } while(0)
-#else
-#define IO_SET_SYNC_FLAG()
-#endif
-
-/* gcc 4.0 and older doesn't have 'Z' constraint */
-#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ == 0)
+/* -mprefixed can generate offsets beyond range, fall back hack */
+#ifdef CONFIG_PPC_KERNEL_PREFIXED
#define DEF_MMIO_IN_X(name, size, insn) \
static inline u##size name(const volatile u##size __iomem *addr) \
{ \
u##size ret; \
__asm__ __volatile__("sync;"#insn" %0,0,%1;twi 0,%0,0;isync" \
- : "=r" (ret) : "r" (addr), "m" (*addr) : "memory"); \
+ : "=r" (ret) : "r" (addr) : "memory"); \
return ret; \
}
#define DEF_MMIO_OUT_X(name, size, insn) \
static inline void name(volatile u##size __iomem *addr, u##size val) \
{ \
- __asm__ __volatile__("sync;"#insn" %1,0,%2" \
- : "=m" (*addr) : "r" (val), "r" (addr) : "memory"); \
- IO_SET_SYNC_FLAG(); \
+ __asm__ __volatile__("sync;"#insn" %1,0,%0" \
+ : : "r" (addr), "r" (val) : "memory"); \
+ mmiowb_set_pending(); \
+}
+
+#define DEF_MMIO_IN_D(name, size, insn) \
+static inline u##size name(const volatile u##size __iomem *addr) \
+{ \
+ u##size ret; \
+ __asm__ __volatile__("sync;"#insn" %0,0(%1);twi 0,%0,0;isync"\
+ : "=r" (ret) : "b" (addr) : "memory"); \
+ return ret; \
+}
+
+#define DEF_MMIO_OUT_D(name, size, insn) \
+static inline void name(volatile u##size __iomem *addr, u##size val) \
+{ \
+ __asm__ __volatile__("sync;"#insn" %1,0(%0)" \
+ : : "b" (addr), "r" (val) : "memory"); \
+ mmiowb_set_pending(); \
}
-#else /* newer gcc */
+#else
#define DEF_MMIO_IN_X(name, size, insn) \
static inline u##size name(const volatile u##size __iomem *addr) \
{ \
@@ -144,16 +138,15 @@ static inline void name(volatile u##size __iomem *addr, u##size val) \
{ \
__asm__ __volatile__("sync;"#insn" %1,%y0" \
: "=Z" (*addr) : "r" (val) : "memory"); \
- IO_SET_SYNC_FLAG(); \
+ mmiowb_set_pending(); \
}
-#endif
#define DEF_MMIO_IN_D(name, size, insn) \
static inline u##size name(const volatile u##size __iomem *addr) \
{ \
u##size ret; \
__asm__ __volatile__("sync;"#insn"%U1%X1 %0,%1;twi 0,%0,0;isync"\
- : "=r" (ret) : "m" (*addr) : "memory"); \
+ : "=r" (ret) : "m<>" (*addr) : "memory"); \
return ret; \
}
@@ -161,9 +154,10 @@ static inline u##size name(const volatile u##size __iomem *addr) \
static inline void name(volatile u##size __iomem *addr, u##size val) \
{ \
__asm__ __volatile__("sync;"#insn"%U0%X0 %1,%0" \
- : "=m" (*addr) : "r" (val) : "memory"); \
- IO_SET_SYNC_FLAG(); \
+ : "=m<>" (*addr) : "r" (val) : "memory"); \
+ mmiowb_set_pending(); \
}
+#endif
DEF_MMIO_IN_D(in_8, 8, lbz);
DEF_MMIO_OUT_D(out_8, 8, stb);
@@ -191,24 +185,8 @@ DEF_MMIO_OUT_D(out_le32, 32, stw);
#endif /* __BIG_ENDIAN */
-/*
- * Cache inhibitied accessors for use in real mode, you don't want to use these
- * unless you know what you're doing.
- *
- * NB. These use the cpu byte ordering.
- */
-DEF_MMIO_OUT_X(out_rm8, 8, stbcix);
-DEF_MMIO_OUT_X(out_rm16, 16, sthcix);
-DEF_MMIO_OUT_X(out_rm32, 32, stwcix);
-DEF_MMIO_IN_X(in_rm8, 8, lbzcix);
-DEF_MMIO_IN_X(in_rm16, 16, lhzcix);
-DEF_MMIO_IN_X(in_rm32, 32, lwzcix);
-
#ifdef __powerpc64__
-DEF_MMIO_OUT_X(out_rm64, 64, stdcix);
-DEF_MMIO_IN_X(in_rm64, 64, ldcix);
-
#ifdef __BIG_ENDIAN__
DEF_MMIO_OUT_D(out_be64, 64, std);
DEF_MMIO_IN_D(in_be64, 64, ld);
@@ -246,19 +224,10 @@ static inline void out_be64(volatile u64 __iomem *addr, u64 val)
*/
extern void _insb(const volatile u8 __iomem *addr, void *buf, long count);
extern void _outsb(volatile u8 __iomem *addr,const void *buf,long count);
-extern void _insw_ns(const volatile u16 __iomem *addr, void *buf, long count);
-extern void _outsw_ns(volatile u16 __iomem *addr, const void *buf, long count);
-extern void _insl_ns(const volatile u32 __iomem *addr, void *buf, long count);
-extern void _outsl_ns(volatile u32 __iomem *addr, const void *buf, long count);
-
-/* The _ns naming is historical and will be removed. For now, just #define
- * the non _ns equivalent names
- */
-#define _insw _insw_ns
-#define _insl _insl_ns
-#define _outsw _outsw_ns
-#define _outsl _outsl_ns
-
+extern void _insw(const volatile u16 __iomem *addr, void *buf, long count);
+extern void _outsw(volatile u16 __iomem *addr, const void *buf, long count);
+extern void _insl(const volatile u32 __iomem *addr, void *buf, long count);
+extern void _outsl(volatile u32 __iomem *addr, const void *buf, long count);
/*
* memset_io, memcpy_toio, memcpy_fromio base implementations are out of line
@@ -279,9 +248,9 @@ extern void _memcpy_toio(volatile void __iomem *dest, const void *src,
* for PowerPC is as close as possible to the x86 version of these, and thus
* provides fairly heavy weight barriers for the non-raw versions
*
- * In addition, they support a hook mechanism when CONFIG_PPC_INDIRECT_MMIO
- * or CONFIG_PPC_INDIRECT_PIO are set allowing the platform to provide its
- * own implementation of some or all of the accessors.
+ * In addition, they support a hook mechanism when CONFIG_PPC_INDIRECT_PIO
+ * is set allowing the platform to provide its own implementation of some
+ * of the accessors.
*/
/*
@@ -292,98 +261,96 @@ extern void _memcpy_toio(volatile void __iomem *dest, const void *src,
#include <asm/eeh.h>
#endif
-/* Shortcut to the MMIO argument pointer */
-#define PCI_IO_ADDR volatile void __iomem *
-
-/* Indirect IO address tokens:
- *
- * When CONFIG_PPC_INDIRECT_MMIO is set, the platform can provide hooks
- * on all MMIOs. (Note that this is all 64 bits only for now)
- *
- * To help platforms who may need to differenciate MMIO addresses in
- * their hooks, a bitfield is reserved for use by the platform near the
- * top of MMIO addresses (not PIO, those have to cope the hard way).
- *
- * This bit field is 12 bits and is at the top of the IO virtual
- * addresses PCI_IO_INDIRECT_TOKEN_MASK.
- *
- * The kernel virtual space is thus:
- *
- * 0xD000000000000000 : vmalloc
- * 0xD000080000000000 : PCI PHB IO space
- * 0xD000080080000000 : ioremap
- * 0xD0000fffffffffff : end of ioremap region
- *
- * Since the top 4 bits are reserved as the region ID, we use thus
- * the next 12 bits and keep 4 bits available for the future if the
- * virtual address space is ever to be extended.
- *
- * The direct IO mapping operations will then mask off those bits
- * before doing the actual access, though that only happen when
- * CONFIG_PPC_INDIRECT_MMIO is set, thus be careful when you use that
- * mechanism
- *
- * For PIO, there is a separate CONFIG_PPC_INDIRECT_PIO which makes
- * all PIO functions call through a hook.
- */
-
-#ifdef CONFIG_PPC_INDIRECT_MMIO
-#define PCI_IO_IND_TOKEN_MASK 0x0fff000000000000ul
-#define PCI_IO_IND_TOKEN_SHIFT 48
-#define PCI_FIX_ADDR(addr) \
- ((PCI_IO_ADDR)(((unsigned long)(addr)) & ~PCI_IO_IND_TOKEN_MASK))
-#define PCI_GET_ADDR_TOKEN(addr) \
- (((unsigned long)(addr) & PCI_IO_IND_TOKEN_MASK) >> \
- PCI_IO_IND_TOKEN_SHIFT)
-#define PCI_SET_ADDR_TOKEN(addr, token) \
-do { \
- unsigned long __a = (unsigned long)(addr); \
- __a &= ~PCI_IO_IND_TOKEN_MASK; \
- __a |= ((unsigned long)(token)) << PCI_IO_IND_TOKEN_SHIFT; \
- (addr) = (void __iomem *)__a; \
-} while(0)
-#else
-#define PCI_FIX_ADDR(addr) (addr)
-#endif
-
+#define _IO_PORT(port) ((volatile void __iomem *)(_IO_BASE + (port)))
+#ifdef __powerpc64__
/*
- * Non ordered and non-swapping "raw" accessors
+ * Real mode versions of raw accessors. Those instructions are only supposed
+ * to be used in hypervisor real mode as per the architecture spec.
*/
+static inline void __raw_rm_writeb(u8 val, volatile void __iomem *paddr)
+{
+ __asm__ __volatile__(".machine push; \
+ .machine power6; \
+ stbcix %0,0,%1; \
+ .machine pop;"
+ : : "r" (val), "r" (paddr) : "memory");
+}
-static inline unsigned char __raw_readb(const volatile void __iomem *addr)
+static inline void __raw_rm_writew(u16 val, volatile void __iomem *paddr)
{
- return *(volatile unsigned char __force *)PCI_FIX_ADDR(addr);
+ __asm__ __volatile__(".machine push; \
+ .machine power6; \
+ sthcix %0,0,%1; \
+ .machine pop;"
+ : : "r" (val), "r" (paddr) : "memory");
}
-static inline unsigned short __raw_readw(const volatile void __iomem *addr)
+
+static inline void __raw_rm_writel(u32 val, volatile void __iomem *paddr)
{
- return *(volatile unsigned short __force *)PCI_FIX_ADDR(addr);
+ __asm__ __volatile__(".machine push; \
+ .machine power6; \
+ stwcix %0,0,%1; \
+ .machine pop;"
+ : : "r" (val), "r" (paddr) : "memory");
}
-static inline unsigned int __raw_readl(const volatile void __iomem *addr)
+
+static inline void __raw_rm_writeq(u64 val, volatile void __iomem *paddr)
{
- return *(volatile unsigned int __force *)PCI_FIX_ADDR(addr);
+ __asm__ __volatile__(".machine push; \
+ .machine power6; \
+ stdcix %0,0,%1; \
+ .machine pop;"
+ : : "r" (val), "r" (paddr) : "memory");
}
-static inline void __raw_writeb(unsigned char v, volatile void __iomem *addr)
+
+static inline void __raw_rm_writeq_be(u64 val, volatile void __iomem *paddr)
{
- *(volatile unsigned char __force *)PCI_FIX_ADDR(addr) = v;
+ __raw_rm_writeq((__force u64)cpu_to_be64(val), paddr);
}
-static inline void __raw_writew(unsigned short v, volatile void __iomem *addr)
+
+static inline u8 __raw_rm_readb(volatile void __iomem *paddr)
{
- *(volatile unsigned short __force *)PCI_FIX_ADDR(addr) = v;
+ u8 ret;
+ __asm__ __volatile__(".machine push; \
+ .machine power6; \
+ lbzcix %0,0, %1; \
+ .machine pop;"
+ : "=r" (ret) : "r" (paddr) : "memory");
+ return ret;
}
-static inline void __raw_writel(unsigned int v, volatile void __iomem *addr)
+
+static inline u16 __raw_rm_readw(volatile void __iomem *paddr)
{
- *(volatile unsigned int __force *)PCI_FIX_ADDR(addr) = v;
+ u16 ret;
+ __asm__ __volatile__(".machine push; \
+ .machine power6; \
+ lhzcix %0,0, %1; \
+ .machine pop;"
+ : "=r" (ret) : "r" (paddr) : "memory");
+ return ret;
}
-#ifdef __powerpc64__
-static inline unsigned long __raw_readq(const volatile void __iomem *addr)
+static inline u32 __raw_rm_readl(volatile void __iomem *paddr)
{
- return *(volatile unsigned long __force *)PCI_FIX_ADDR(addr);
+ u32 ret;
+ __asm__ __volatile__(".machine push; \
+ .machine power6; \
+ lwzcix %0,0, %1; \
+ .machine pop;"
+ : "=r" (ret) : "r" (paddr) : "memory");
+ return ret;
}
-static inline void __raw_writeq(unsigned long v, volatile void __iomem *addr)
+
+static inline u64 __raw_rm_readq(volatile void __iomem *paddr)
{
- *(volatile unsigned long __force *)PCI_FIX_ADDR(addr) = v;
+ u64 ret;
+ __asm__ __volatile__(".machine push; \
+ .machine power6; \
+ ldcix %0,0, %1; \
+ .machine pop;"
+ : "=r" (ret) : "r" (paddr) : "memory");
+ return ret;
}
#endif /* __powerpc64__ */
@@ -418,13 +385,10 @@ static inline unsigned int name(unsigned int port) \
"5: li %0,-1\n" \
" b 4b\n" \
".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 2\n" \
- " .long 0b,5b\n" \
- " .long 1b,5b\n" \
- " .long 2b,5b\n" \
- " .long 3b,5b\n" \
- ".previous" \
+ EX_TABLE(0b, 5b) \
+ EX_TABLE(1b, 5b) \
+ EX_TABLE(2b, 5b) \
+ EX_TABLE(3b, 5b) \
: "=&r" (x) \
: "r" (port + _IO_BASE) \
: "memory"); \
@@ -439,11 +403,8 @@ static inline void name(unsigned int val, unsigned int port) \
"0:" op " %0,0,%1\n" \
"1: sync\n" \
"2:\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 2\n" \
- " .long 0b,2b\n" \
- " .long 1b,2b\n" \
- ".previous" \
+ EX_TABLE(0b, 2b) \
+ EX_TABLE(1b, 2b) \
: : "r" (val), "r" (port + _IO_BASE) \
: "memory"); \
}
@@ -472,30 +433,23 @@ __do_out_asm(_rec_outl, "stwbrx")
* possible to hook directly at the toplevel PIO operation if they have to
* be handled differently
*/
-#define __do_writeb(val, addr) out_8(PCI_FIX_ADDR(addr), val)
-#define __do_writew(val, addr) out_le16(PCI_FIX_ADDR(addr), val)
-#define __do_writel(val, addr) out_le32(PCI_FIX_ADDR(addr), val)
-#define __do_writeq(val, addr) out_le64(PCI_FIX_ADDR(addr), val)
-#define __do_writew_be(val, addr) out_be16(PCI_FIX_ADDR(addr), val)
-#define __do_writel_be(val, addr) out_be32(PCI_FIX_ADDR(addr), val)
-#define __do_writeq_be(val, addr) out_be64(PCI_FIX_ADDR(addr), val)
#ifdef CONFIG_EEH
-#define __do_readb(addr) eeh_readb(PCI_FIX_ADDR(addr))
-#define __do_readw(addr) eeh_readw(PCI_FIX_ADDR(addr))
-#define __do_readl(addr) eeh_readl(PCI_FIX_ADDR(addr))
-#define __do_readq(addr) eeh_readq(PCI_FIX_ADDR(addr))
-#define __do_readw_be(addr) eeh_readw_be(PCI_FIX_ADDR(addr))
-#define __do_readl_be(addr) eeh_readl_be(PCI_FIX_ADDR(addr))
-#define __do_readq_be(addr) eeh_readq_be(PCI_FIX_ADDR(addr))
+#define __do_readb(addr) eeh_readb(addr)
+#define __do_readw(addr) eeh_readw(addr)
+#define __do_readl(addr) eeh_readl(addr)
+#define __do_readq(addr) eeh_readq(addr)
+#define __do_readw_be(addr) eeh_readw_be(addr)
+#define __do_readl_be(addr) eeh_readl_be(addr)
+#define __do_readq_be(addr) eeh_readq_be(addr)
#else /* CONFIG_EEH */
-#define __do_readb(addr) in_8(PCI_FIX_ADDR(addr))
-#define __do_readw(addr) in_le16(PCI_FIX_ADDR(addr))
-#define __do_readl(addr) in_le32(PCI_FIX_ADDR(addr))
-#define __do_readq(addr) in_le64(PCI_FIX_ADDR(addr))
-#define __do_readw_be(addr) in_be16(PCI_FIX_ADDR(addr))
-#define __do_readl_be(addr) in_be32(PCI_FIX_ADDR(addr))
-#define __do_readq_be(addr) in_be64(PCI_FIX_ADDR(addr))
+#define __do_readb(addr) in_8(addr)
+#define __do_readw(addr) in_le16(addr)
+#define __do_readl(addr) in_le32(addr)
+#define __do_readq(addr) in_le64(addr)
+#define __do_readw_be(addr) in_be16(addr)
+#define __do_readl_be(addr) in_be32(addr)
+#define __do_readq_be(addr) in_be64(addr)
#endif /* !defined(CONFIG_EEH) */
#ifdef CONFIG_PPC32
@@ -506,64 +460,185 @@ __do_out_asm(_rec_outl, "stwbrx")
#define __do_inw(port) _rec_inw(port)
#define __do_inl(port) _rec_inl(port)
#else /* CONFIG_PPC32 */
-#define __do_outb(val, port) writeb(val,(PCI_IO_ADDR)_IO_BASE+port);
-#define __do_outw(val, port) writew(val,(PCI_IO_ADDR)_IO_BASE+port);
-#define __do_outl(val, port) writel(val,(PCI_IO_ADDR)_IO_BASE+port);
-#define __do_inb(port) readb((PCI_IO_ADDR)_IO_BASE + port);
-#define __do_inw(port) readw((PCI_IO_ADDR)_IO_BASE + port);
-#define __do_inl(port) readl((PCI_IO_ADDR)_IO_BASE + port);
+#define __do_outb(val, port) writeb(val,_IO_PORT(port));
+#define __do_outw(val, port) writew(val,_IO_PORT(port));
+#define __do_outl(val, port) writel(val,_IO_PORT(port));
+#define __do_inb(port) readb(_IO_PORT(port));
+#define __do_inw(port) readw(_IO_PORT(port));
+#define __do_inl(port) readl(_IO_PORT(port));
#endif /* !CONFIG_PPC32 */
#ifdef CONFIG_EEH
-#define __do_readsb(a, b, n) eeh_readsb(PCI_FIX_ADDR(a), (b), (n))
-#define __do_readsw(a, b, n) eeh_readsw(PCI_FIX_ADDR(a), (b), (n))
-#define __do_readsl(a, b, n) eeh_readsl(PCI_FIX_ADDR(a), (b), (n))
+#define __do_readsb(a, b, n) eeh_readsb(a, (b), (n))
+#define __do_readsw(a, b, n) eeh_readsw(a, (b), (n))
+#define __do_readsl(a, b, n) eeh_readsl(a, (b), (n))
#else /* CONFIG_EEH */
-#define __do_readsb(a, b, n) _insb(PCI_FIX_ADDR(a), (b), (n))
-#define __do_readsw(a, b, n) _insw(PCI_FIX_ADDR(a), (b), (n))
-#define __do_readsl(a, b, n) _insl(PCI_FIX_ADDR(a), (b), (n))
+#define __do_readsb(a, b, n) _insb(a, (b), (n))
+#define __do_readsw(a, b, n) _insw(a, (b), (n))
+#define __do_readsl(a, b, n) _insl(a, (b), (n))
#endif /* !CONFIG_EEH */
-#define __do_writesb(a, b, n) _outsb(PCI_FIX_ADDR(a),(b),(n))
-#define __do_writesw(a, b, n) _outsw(PCI_FIX_ADDR(a),(b),(n))
-#define __do_writesl(a, b, n) _outsl(PCI_FIX_ADDR(a),(b),(n))
-
-#define __do_insb(p, b, n) readsb((PCI_IO_ADDR)_IO_BASE+(p), (b), (n))
-#define __do_insw(p, b, n) readsw((PCI_IO_ADDR)_IO_BASE+(p), (b), (n))
-#define __do_insl(p, b, n) readsl((PCI_IO_ADDR)_IO_BASE+(p), (b), (n))
-#define __do_outsb(p, b, n) writesb((PCI_IO_ADDR)_IO_BASE+(p),(b),(n))
-#define __do_outsw(p, b, n) writesw((PCI_IO_ADDR)_IO_BASE+(p),(b),(n))
-#define __do_outsl(p, b, n) writesl((PCI_IO_ADDR)_IO_BASE+(p),(b),(n))
-
-#define __do_memset_io(addr, c, n) \
- _memset_io(PCI_FIX_ADDR(addr), c, n)
-#define __do_memcpy_toio(dst, src, n) \
- _memcpy_toio(PCI_FIX_ADDR(dst), src, n)
+#define __do_writesb(a, b, n) _outsb(a, (b), (n))
+#define __do_writesw(a, b, n) _outsw(a, (b), (n))
+#define __do_writesl(a, b, n) _outsl(a, (b), (n))
+
+#define __do_insb(p, b, n) readsb(_IO_PORT(p), (b), (n))
+#define __do_insw(p, b, n) readsw(_IO_PORT(p), (b), (n))
+#define __do_insl(p, b, n) readsl(_IO_PORT(p), (b), (n))
+#define __do_outsb(p, b, n) writesb(_IO_PORT(p),(b),(n))
+#define __do_outsw(p, b, n) writesw(_IO_PORT(p),(b),(n))
+#define __do_outsl(p, b, n) writesl(_IO_PORT(p),(b),(n))
#ifdef CONFIG_EEH
#define __do_memcpy_fromio(dst, src, n) \
- eeh_memcpy_fromio(dst, PCI_FIX_ADDR(src), n)
+ eeh_memcpy_fromio(dst, src, n)
#else /* CONFIG_EEH */
#define __do_memcpy_fromio(dst, src, n) \
- _memcpy_fromio(dst,PCI_FIX_ADDR(src),n)
+ _memcpy_fromio(dst, src, n)
#endif /* !CONFIG_EEH */
-#ifdef CONFIG_PPC_INDIRECT_PIO
-#define DEF_PCI_HOOK_pio(x) x
-#else
-#define DEF_PCI_HOOK_pio(x) NULL
-#endif
+static inline u8 readb(const volatile void __iomem *addr)
+{
+ return __do_readb(addr);
+}
+#define readb readb
+
+static inline u16 readw(const volatile void __iomem *addr)
+{
+ return __do_readw(addr);
+}
+#define readw readw
+
+static inline u32 readl(const volatile void __iomem *addr)
+{
+ return __do_readl(addr);
+}
+#define readl readl
+
+static inline u16 readw_be(const volatile void __iomem *addr)
+{
+ return __do_readw_be(addr);
+}
+
+static inline u32 readl_be(const volatile void __iomem *addr)
+{
+ return __do_readl_be(addr);
+}
+
+static inline void writeb(u8 val, volatile void __iomem *addr)
+{
+ out_8(addr, val);
+}
+#define writeb writeb
+
+static inline void writew(u16 val, volatile void __iomem *addr)
+{
+ out_le16(addr, val);
+}
+#define writew writew
+
+static inline void writel(u32 val, volatile void __iomem *addr)
+{
+ out_le32(addr, val);
+}
+#define writel writel
+
+static inline void writew_be(u16 val, volatile void __iomem *addr)
+{
+ out_be16(addr, val);
+}
+
+static inline void writel_be(u32 val, volatile void __iomem *addr)
+{
+ out_be32(addr, val);
+}
-#ifdef CONFIG_PPC_INDIRECT_MMIO
-#define DEF_PCI_HOOK_mem(x) x
+static inline void readsb(const volatile void __iomem *a, void *b, unsigned long c)
+{
+ __do_readsb(a, b, c);
+}
+#define readsb readsb
+
+static inline void readsw(const volatile void __iomem *a, void *b, unsigned long c)
+{
+ __do_readsw(a, b, c);
+}
+#define readsw readsw
+
+static inline void readsl(const volatile void __iomem *a, void *b, unsigned long c)
+{
+ __do_readsl(a, b, c);
+}
+#define readsl readsl
+
+static inline void writesb(volatile void __iomem *a, const void *b, unsigned long c)
+{
+ __do_writesb(a, b, c);
+}
+#define writesb writesb
+
+static inline void writesw(volatile void __iomem *a, const void *b, unsigned long c)
+{
+ __do_writesw(a, b, c);
+}
+#define writesw writesw
+
+static inline void writesl(volatile void __iomem *a, const void *b, unsigned long c)
+{
+ __do_writesl(a, b, c);
+}
+#define writesl writesl
+
+static inline void memset_io(volatile void __iomem *a, int c, unsigned long n)
+{
+ _memset_io(a, c, n);
+}
+#define memset_io memset_io
+
+static inline void memcpy_fromio(void *d, const volatile void __iomem *s, unsigned long n)
+{
+ __do_memcpy_fromio(d, s, n);
+}
+#define memcpy_fromio memcpy_fromio
+
+static inline void memcpy_toio(volatile void __iomem *d, const void *s, unsigned long n)
+{
+ _memcpy_toio(d, s, n);
+}
+#define memcpy_toio memcpy_toio
+
+#ifdef __powerpc64__
+static inline u64 readq(const volatile void __iomem *addr)
+{
+ return __do_readq(addr);
+}
+
+static inline u64 readq_be(const volatile void __iomem *addr)
+{
+ return __do_readq_be(addr);
+}
+
+static inline void writeq(u64 val, volatile void __iomem *addr)
+{
+ out_le64(addr, val);
+}
+
+static inline void writeq_be(u64 val, volatile void __iomem *addr)
+{
+ out_be64(addr, val);
+}
+#endif /* __powerpc64__ */
+
+#ifdef CONFIG_PPC_INDIRECT_PIO
+#define DEF_PCI_HOOK(x) x
#else
-#define DEF_PCI_HOOK_mem(x) NULL
+#define DEF_PCI_HOOK(x) NULL
#endif
/* Structure containing all the hooks */
extern struct ppc_pci_io {
-#define DEF_PCI_AC_RET(name, ret, at, al, space, aa) ret (*name) at;
-#define DEF_PCI_AC_NORET(name, at, al, space, aa) void (*name) at;
+#define DEF_PCI_AC_RET(name, ret, at, al) ret (*name) at;
+#define DEF_PCI_AC_NORET(name, at, al) void (*name) at;
#include <asm/io-defs.h>
@@ -573,18 +648,18 @@ extern struct ppc_pci_io {
} ppc_pci_io;
/* The inline wrappers */
-#define DEF_PCI_AC_RET(name, ret, at, al, space, aa) \
+#define DEF_PCI_AC_RET(name, ret, at, al) \
static inline ret name at \
{ \
- if (DEF_PCI_HOOK_##space(ppc_pci_io.name) != NULL) \
+ if (DEF_PCI_HOOK(ppc_pci_io.name) != NULL) \
return ppc_pci_io.name al; \
return __do_##name al; \
}
-#define DEF_PCI_AC_NORET(name, at, al, space, aa) \
+#define DEF_PCI_AC_NORET(name, at, al) \
static inline void name at \
{ \
- if (DEF_PCI_HOOK_##space(ppc_pci_io.name) != NULL) \
+ if (DEF_PCI_HOOK(ppc_pci_io.name) != NULL) \
ppc_pci_io.name al; \
else \
__do_##name al; \
@@ -595,50 +670,86 @@ static inline void name at \
#undef DEF_PCI_AC_RET
#undef DEF_PCI_AC_NORET
-/* Some drivers check for the presence of readq & writeq with
- * a #ifdef, so we make them happy here.
- */
+// Signal to asm-generic/io.h that we have implemented these.
+#define inb inb
+#define inw inw
+#define inl inl
+#define outb outb
+#define outw outw
+#define outl outl
+#define insb insb
+#define insw insw
+#define insl insl
+#define outsb outsb
+#define outsw outsw
+#define outsl outsl
#ifdef __powerpc64__
#define readq readq
#define writeq writeq
#endif
/*
- * Convert a physical pointer to a virtual kernel pointer for /dev/mem
- * access
+ * We don't do relaxed operations yet, at least not with this semantic
*/
-#define xlate_dev_mem_ptr(p) __va(p)
-
+#define readb_relaxed(addr) readb(addr)
+#define readw_relaxed(addr) readw(addr)
+#define readl_relaxed(addr) readl(addr)
+#define readq_relaxed(addr) readq(addr)
+#define writeb_relaxed(v, addr) writeb(v, addr)
+#define writew_relaxed(v, addr) writew(v, addr)
+#define writel_relaxed(v, addr) writel(v, addr)
+#define writeq_relaxed(v, addr) writeq(v, addr)
+
+#ifndef CONFIG_GENERIC_IOMAP
/*
- * Convert a virtual cached pointer to an uncached pointer
+ * Here comes the implementation of the IOMAP interfaces.
*/
-#define xlate_dev_kmem_ptr(p) p
+static inline unsigned int ioread16be(const void __iomem *addr)
+{
+ return readw_be(addr);
+}
+#define ioread16be ioread16be
-/*
- * We don't do relaxed operations yet, at least not with this semantic
- */
-#define readb_relaxed(addr) readb(addr)
-#define readw_relaxed(addr) readw(addr)
-#define readl_relaxed(addr) readl(addr)
-#define readq_relaxed(addr) readq(addr)
+static inline unsigned int ioread32be(const void __iomem *addr)
+{
+ return readl_be(addr);
+}
+#define ioread32be ioread32be
-#ifdef CONFIG_PPC32
-#define mmiowb()
-#else
-/*
- * Enforce synchronisation of stores vs. spin_unlock
- * (this does it explicitly, though our implementation of spin_unlock
- * does it implicitely too)
- */
-static inline void mmiowb(void)
+#ifdef __powerpc64__
+static inline u64 ioread64be(const void __iomem *addr)
{
- unsigned long tmp;
+ return readq_be(addr);
+}
+#define ioread64be ioread64be
+#endif /* __powerpc64__ */
- __asm__ __volatile__("sync; li %0,0; stb %0,%1(13)"
- : "=&r" (tmp) : "i" (offsetof(struct paca_struct, io_sync))
- : "memory");
+static inline void iowrite16be(u16 val, void __iomem *addr)
+{
+ writew_be(val, addr);
}
-#endif /* !CONFIG_PPC32 */
+#define iowrite16be iowrite16be
+
+static inline void iowrite32be(u32 val, void __iomem *addr)
+{
+ writel_be(val, addr);
+}
+#define iowrite32be iowrite32be
+
+#ifdef __powerpc64__
+static inline void iowrite64be(u64 val, void __iomem *addr)
+{
+ writeq_be(val, addr);
+}
+#define iowrite64be iowrite64be
+#endif /* __powerpc64__ */
+
+struct pci_dev;
+void pci_iounmap(struct pci_dev *dev, void __iomem *addr);
+#define pci_iounmap pci_iounmap
+void __iomem *ioport_map(unsigned long port, unsigned int len);
+#define ioport_map ioport_map
+#endif
static inline void iosync(void)
{
@@ -671,7 +782,6 @@ static inline void iosync(void)
#define IO_SPACE_LIMIT ~(0UL)
-
/**
* ioremap - map bus memory into CPU space
* @address: bus address of the memory
@@ -691,45 +801,42 @@ static inline void iosync(void)
* * ioremap_prot allows to specify the page flags as an argument and can
* also be hooked by the platform via ppc_md.
*
- * * ioremap_nocache is identical to ioremap
- *
* * ioremap_wc enables write combining
*
- * * iounmap undoes such a mapping and can be hooked
+ * * ioremap_wt enables write through
*
- * * __ioremap_at (and the pending __iounmap_at) are low level functions to
- * create hand-made mappings for use only by the PCI code and cannot
- * currently be hooked. Must be page aligned.
+ * * ioremap_coherent maps coherent cached memory
*
- * * __ioremap is the low level implementation used by ioremap and
- * ioremap_prot and cannot be hooked (but can be used by a hook on one
- * of the previous ones)
+ * * iounmap undoes such a mapping and can be hooked
*
* * __ioremap_caller is the same as above but takes an explicit caller
* reference rather than using __builtin_return_address(0)
*
- * * __iounmap, is the low level implementation used by iounmap and cannot
- * be hooked (but can be used by a hook on iounmap)
- *
*/
extern void __iomem *ioremap(phys_addr_t address, unsigned long size);
-extern void __iomem *ioremap_prot(phys_addr_t address, unsigned long size,
- unsigned long flags);
+#define ioremap ioremap
+#define ioremap_prot ioremap_prot
extern void __iomem *ioremap_wc(phys_addr_t address, unsigned long size);
-#define ioremap_nocache(addr, size) ioremap((addr), (size))
+#define ioremap_wc ioremap_wc
-extern void iounmap(volatile void __iomem *addr);
+#ifdef CONFIG_PPC32
+void __iomem *ioremap_wt(phys_addr_t address, unsigned long size);
+#define ioremap_wt ioremap_wt
+#endif
-extern void __iomem *__ioremap(phys_addr_t, unsigned long size,
- unsigned long flags);
-extern void __iomem *__ioremap_caller(phys_addr_t, unsigned long size,
- unsigned long flags, void *caller);
+void __iomem *ioremap_coherent(phys_addr_t address, unsigned long size);
+#define ioremap_cache(addr, size) \
+ ioremap_prot((addr), (size), PAGE_KERNEL)
+
+#define iounmap iounmap
-extern void __iounmap(volatile void __iomem *addr);
+void __iomem *ioremap_phb(phys_addr_t paddr, unsigned long size);
-extern void __iomem * __ioremap_at(phys_addr_t pa, void *ea,
- unsigned long size, unsigned long flags);
-extern void __iounmap_at(void *ea, unsigned long size);
+int early_ioremap_range(unsigned long ea, phys_addr_t pa,
+ unsigned long size, pgprot_t prot);
+
+extern void __iomem *__ioremap_caller(phys_addr_t, unsigned long size,
+ pgprot_t prot, void *caller);
/*
* When CONFIG_PPC_INDIRECT_PIO is set, we use the generic iomap implementation
@@ -744,8 +851,10 @@ extern void __iounmap_at(void *ea, unsigned long size);
#define mmio_read16be(addr) readw_be(addr)
#define mmio_read32be(addr) readl_be(addr)
+#define mmio_read64be(addr) readq_be(addr)
#define mmio_write16be(val, addr) writew_be(val, addr)
#define mmio_write32be(val, addr) writel_be(val, addr)
+#define mmio_write64be(val, addr) writeq_be(val, addr)
#define mmio_insb(addr, dst, count) readsb(addr, dst, count)
#define mmio_insw(addr, dst, count) readsw(addr, dst, count)
#define mmio_insl(addr, dst, count) readsl(addr, dst, count)
@@ -765,10 +874,13 @@ extern void __iounmap_at(void *ea, unsigned long size);
* almost all conceivable cases a device driver should not be using
* this function
*/
-static inline unsigned long virt_to_phys(volatile void * address)
+static inline unsigned long virt_to_phys(const volatile void * address)
{
+ WARN_ON(IS_ENABLED(CONFIG_DEBUG_VIRTUAL) && !virt_addr_valid(address));
+
return __pa((unsigned long)address);
}
+#define virt_to_phys virt_to_phys
/**
* phys_to_virt - map physical address to virtual
@@ -786,14 +898,10 @@ static inline void * phys_to_virt(unsigned long address)
{
return (void *)__va(address);
}
+#define phys_to_virt phys_to_virt
/*
- * Change "struct page" to physical address.
- */
-#define page_to_phys(page) ((phys_addr_t)page_to_pfn(page) << PAGE_SHIFT)
-
-/*
- * 32 bits still uses virt_to_bus() for it's implementation of DMA
+ * 32 bits still uses virt_to_bus() for its implementation of DMA
* mappings se we have to keep it defined here. We also have some old
* drivers (shame shame shame) that use bus_to_virt() and haven't been
* fixed yet so I need to define it here.
@@ -806,6 +914,7 @@ static inline unsigned long virt_to_bus(volatile void * address)
return 0;
return __pa(address) + PCI_DRAM_OFFSET;
}
+#define virt_to_bus virt_to_bus
static inline void * bus_to_virt(unsigned long address)
{
@@ -813,8 +922,7 @@ static inline void * bus_to_virt(unsigned long address)
return NULL;
return __va(address - PCI_DRAM_OFFSET);
}
-
-#define page_to_bus(page) (page_to_phys(page) + PCI_DRAM_OFFSET)
+#define bus_to_virt bus_to_virt
#endif /* CONFIG_PPC32 */
@@ -851,8 +959,15 @@ static inline void * bus_to_virt(unsigned long address)
#define clrsetbits_8(addr, clear, set) clrsetbits(8, addr, clear, set)
-void __iomem *devm_ioremap_prot(struct device *dev, resource_size_t offset,
- size_t size, unsigned long flags);
+#include <asm-generic/io.h>
+
+#ifdef __powerpc64__
+static inline void __raw_writeq_be(unsigned long v, volatile void __iomem *addr)
+{
+ __raw_writeq((__force unsigned long)cpu_to_be64(v), addr);
+}
+#define __raw_writeq_be __raw_writeq_be
+#endif // __powerpc64__
#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/io_event_irq.h b/arch/powerpc/include/asm/io_event_irq.h
index b1a9a1be3c21..290c7530d1b6 100644
--- a/arch/powerpc/include/asm/io_event_irq.h
+++ b/arch/powerpc/include/asm/io_event_irq.h
@@ -1,10 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright 2010, 2011 Mark Nelson and Tseng-Hui (Frank) Lin, IBM Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#ifndef _ASM_POWERPC_IO_EVENT_IRQ_H
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index 42632c7a2a4e..b410021ad4c6 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -1,21 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
* Rewrite, cleanup:
* Copyright (C) 2004 Olof Johansson <olof@lixom.net>, IBM Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _ASM_IOMMU_H
@@ -25,24 +12,70 @@
#include <linux/compiler.h>
#include <linux/spinlock.h>
#include <linux/device.h>
-#include <linux/dma-mapping.h>
+#include <linux/dma-map-ops.h>
#include <linux/bitops.h>
#include <asm/machdep.h>
#include <asm/types.h>
+#include <asm/pci-bridge.h>
+#include <asm/asm-const.h>
#define IOMMU_PAGE_SHIFT_4K 12
#define IOMMU_PAGE_SIZE_4K (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K)
#define IOMMU_PAGE_MASK_4K (~((1 << IOMMU_PAGE_SHIFT_4K) - 1))
-#define IOMMU_PAGE_ALIGN_4K(addr) _ALIGN_UP(addr, IOMMU_PAGE_SIZE_4K)
+#define IOMMU_PAGE_ALIGN_4K(addr) ALIGN(addr, IOMMU_PAGE_SIZE_4K)
#define IOMMU_PAGE_SIZE(tblptr) (ASM_CONST(1) << (tblptr)->it_page_shift)
#define IOMMU_PAGE_MASK(tblptr) (~((1 << (tblptr)->it_page_shift) - 1))
-#define IOMMU_PAGE_ALIGN(addr, tblptr) _ALIGN_UP(addr, IOMMU_PAGE_SIZE(tblptr))
+#define IOMMU_PAGE_ALIGN(addr, tblptr) ALIGN(addr, IOMMU_PAGE_SIZE(tblptr))
+
+#define DIRECT64_PROPNAME "linux,direct64-ddr-window-info"
+#define DMA64_PROPNAME "linux,dma64-ddr-window-info"
+
+#define MIN_DDW_VPMEM_DMA_WINDOW SZ_2G
/* Boot time flags */
extern int iommu_is_off;
extern int iommu_force_on;
+struct iommu_table_ops {
+ /*
+ * When called with direction==DMA_NONE, it is equal to clear().
+ * uaddr is a linear map address.
+ */
+ int (*set)(struct iommu_table *tbl,
+ long index, long npages,
+ unsigned long uaddr,
+ enum dma_data_direction direction,
+ unsigned long attrs);
+#ifdef CONFIG_IOMMU_API
+ /*
+ * Exchanges existing TCE with new TCE plus direction bits;
+ * returns old TCE and DMA direction mask.
+ * @tce is a physical address.
+ */
+ int (*xchg_no_kill)(struct iommu_table *tbl,
+ long index,
+ unsigned long *hpa,
+ enum dma_data_direction *direction);
+
+ void (*tce_kill)(struct iommu_table *tbl,
+ unsigned long index,
+ unsigned long pages);
+
+ __be64 *(*useraddrptr)(struct iommu_table *tbl, long index, bool alloc);
+#endif
+ void (*clear)(struct iommu_table *tbl,
+ long index, long npages);
+ /* get() returns a physical address */
+ unsigned long (*get)(struct iommu_table *tbl, long index);
+ void (*flush)(struct iommu_table *tbl);
+ void (*free)(struct iommu_table *tbl);
+};
+
+/* These are used by VIO */
+extern struct iommu_table_ops iommu_table_lpar_multi_ops;
+extern struct iommu_table_ops iommu_table_pseries_ops;
+
/*
* IOMAP_MAX_ORDER defines the largest contiguous block
* of dma space we can get. IOMAP_MAX_ORDER = 13
@@ -63,6 +96,9 @@ struct iommu_pool {
struct iommu_table {
unsigned long it_busno; /* Bus number this table belongs to */
unsigned long it_size; /* Size of iommu table in entries */
+ unsigned long it_indirect_levels;
+ unsigned long it_level_size;
+ unsigned long it_allocated_size;
unsigned long it_offset; /* Offset into global table */
unsigned long it_base; /* mapped address of tce table */
unsigned long it_index; /* which iommu table this is */
@@ -74,12 +110,20 @@ struct iommu_table {
struct iommu_pool pools[IOMMU_NR_POOLS];
unsigned long *it_map; /* A simple allocation bitmap for now */
unsigned long it_page_shift;/* table iommu page size */
-#ifdef CONFIG_IOMMU_API
- struct iommu_group *it_group;
-#endif
- void (*set_bypass)(struct iommu_table *tbl, bool enable);
+ struct list_head it_group_list;/* List of iommu_table_group_link */
+ __be64 *it_userspace; /* userspace view of the table */
+ struct iommu_table_ops *it_ops;
+ struct kref it_kref;
+ int it_nid;
+ unsigned long it_reserved_start; /* Start of not-DMA-able (MMIO) area */
+ unsigned long it_reserved_end;
};
+#define IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry) \
+ ((tbl)->it_ops->useraddrptr((tbl), (entry), false))
+#define IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry) \
+ ((tbl)->it_ops->useraddrptr((tbl), (entry), true))
+
/* Pure 2^n version of get_order */
static inline __attribute_const__
int get_iommu_order(unsigned long size, struct iommu_table *tbl)
@@ -90,60 +134,140 @@ int get_iommu_order(unsigned long size, struct iommu_table *tbl)
struct scatterlist;
-static inline void set_iommu_table_base(struct device *dev, void *base)
+#ifdef CONFIG_PPC64
+
+static inline void set_iommu_table_base(struct device *dev,
+ struct iommu_table *base)
{
- dev->archdata.dma_data.iommu_table_base = base;
+ dev->archdata.iommu_table_base = base;
}
static inline void *get_iommu_table_base(struct device *dev)
{
- return dev->archdata.dma_data.iommu_table_base;
+ return dev->archdata.iommu_table_base;
}
-/* Frees table for an individual device node */
-extern void iommu_free_table(struct iommu_table *tbl, const char *node_name);
+extern int dma_iommu_dma_supported(struct device *dev, u64 mask);
+
+extern struct iommu_table *iommu_tce_table_get(struct iommu_table *tbl);
+extern int iommu_tce_table_put(struct iommu_table *tbl);
/* Initializes an iommu_table based in values set in the passed-in
* structure
*/
-extern struct iommu_table *iommu_init_table(struct iommu_table * tbl,
- int nid);
+extern struct iommu_table *iommu_init_table(struct iommu_table *tbl,
+ int nid, unsigned long res_start, unsigned long res_end);
+bool iommu_table_in_use(struct iommu_table *tbl);
+extern void iommu_table_reserve_pages(struct iommu_table *tbl,
+ unsigned long res_start, unsigned long res_end);
+extern void iommu_table_clear(struct iommu_table *tbl);
+
+#define IOMMU_TABLE_GROUP_MAX_TABLES 2
+
+struct iommu_table_group;
+
+struct iommu_table_group_ops {
+ unsigned long (*get_table_size)(
+ __u32 page_shift,
+ __u64 window_size,
+ __u32 levels);
+ long (*create_table)(struct iommu_table_group *table_group,
+ int num,
+ __u32 page_shift,
+ __u64 window_size,
+ __u32 levels,
+ struct iommu_table **ptbl);
+ long (*set_window)(struct iommu_table_group *table_group,
+ int num,
+ struct iommu_table *tblnew);
+ long (*unset_window)(struct iommu_table_group *table_group,
+ int num);
+ /* Switch ownership from platform code to external user (e.g. VFIO) */
+ long (*take_ownership)(struct iommu_table_group *table_group, struct device *dev);
+ /* Switch ownership from external user (e.g. VFIO) back to core */
+ void (*release_ownership)(struct iommu_table_group *table_group, struct device *dev);
+};
+
+struct iommu_table_group_link {
+ struct list_head next;
+ struct rcu_head rcu;
+ struct iommu_table_group *table_group;
+};
+
+struct iommu_table_group {
+ /* IOMMU properties */
+ __u32 tce32_start;
+ __u32 tce32_size;
+ __u64 pgsizes; /* Bitmap of supported page sizes */
+ __u32 max_dynamic_windows_supported;
+ __u32 max_levels;
+
+ struct iommu_group *group;
+ struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES];
+ struct iommu_table_group_ops *ops;
+};
+
#ifdef CONFIG_IOMMU_API
-extern void iommu_register_group(struct iommu_table *tbl,
+
+extern void iommu_register_group(struct iommu_table_group *table_group,
int pci_domain_number, unsigned long pe_num);
-extern int iommu_add_device(struct device *dev);
-extern void iommu_del_device(struct device *dev);
+extern int iommu_add_device(struct iommu_table_group *table_group,
+ struct device *dev);
+extern long iommu_tce_xchg(struct mm_struct *mm, struct iommu_table *tbl,
+ unsigned long entry, unsigned long *hpa,
+ enum dma_data_direction *direction);
+extern long iommu_tce_xchg_no_kill(struct mm_struct *mm,
+ struct iommu_table *tbl,
+ unsigned long entry, unsigned long *hpa,
+ enum dma_data_direction *direction);
+extern void iommu_tce_kill(struct iommu_table *tbl,
+ unsigned long entry, unsigned long pages);
+int dev_has_iommu_table(struct device *dev, void *data);
+
#else
-static inline void iommu_register_group(struct iommu_table *tbl,
+static inline void iommu_register_group(struct iommu_table_group *table_group,
int pci_domain_number,
unsigned long pe_num)
{
}
-static inline int iommu_add_device(struct device *dev)
+static inline int iommu_add_device(struct iommu_table_group *table_group,
+ struct device *dev)
{
return 0;
}
-static inline void iommu_del_device(struct device *dev)
+static inline int dev_has_iommu_table(struct device *dev, void *data)
{
+ return 0;
}
#endif /* !CONFIG_IOMMU_API */
-static inline void set_iommu_table_base_and_group(struct device *dev,
- void *base)
+u64 dma_iommu_get_required_mask(struct device *dev);
+#else
+
+static inline void *get_iommu_table_base(struct device *dev)
{
- set_iommu_table_base(dev, base);
- iommu_add_device(dev);
+ return NULL;
}
-extern int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
- struct scatterlist *sglist, int nelems,
- unsigned long mask, enum dma_data_direction direction,
- struct dma_attrs *attrs);
-extern void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
- int nelems, enum dma_data_direction direction,
- struct dma_attrs *attrs);
+static inline int dma_iommu_dma_supported(struct device *dev, u64 mask)
+{
+ return 0;
+}
+
+#endif /* CONFIG_PPC64 */
+
+extern int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
+ struct scatterlist *sglist, int nelems,
+ unsigned long mask,
+ enum dma_data_direction direction,
+ unsigned long attrs);
+extern void ppc_iommu_unmap_sg(struct iommu_table *tbl,
+ struct scatterlist *sglist,
+ int nelems,
+ enum dma_data_direction direction,
+ unsigned long attrs);
extern void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
size_t size, dma_addr_t *dma_handle,
@@ -154,23 +278,16 @@ extern dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl,
struct page *page, unsigned long offset,
size_t size, unsigned long mask,
enum dma_data_direction direction,
- struct dma_attrs *attrs);
+ unsigned long attrs);
extern void iommu_unmap_page(struct iommu_table *tbl, dma_addr_t dma_handle,
size_t size, enum dma_data_direction direction,
- struct dma_attrs *attrs);
+ unsigned long attrs);
-extern void iommu_init_early_pSeries(void);
-extern void iommu_init_early_dart(void);
+void __init iommu_init_early_pSeries(void);
+extern void iommu_init_early_dart(struct pci_controller_ops *controller_ops);
extern void iommu_init_early_pasemi(void);
-extern void alloc_dart_table(void);
#if defined(CONFIG_PPC64) && defined(CONFIG_PM)
-static inline void iommu_save(void)
-{
- if (ppc_md.iommu_save)
- ppc_md.iommu_save();
-}
-
static inline void iommu_restore(void)
{
if (ppc_md.iommu_restore)
@@ -179,25 +296,28 @@ static inline void iommu_restore(void)
#endif
/* The API to support IOMMU operations for VFIO */
-extern int iommu_tce_clear_param_check(struct iommu_table *tbl,
- unsigned long ioba, unsigned long tce_value,
- unsigned long npages);
-extern int iommu_tce_put_param_check(struct iommu_table *tbl,
- unsigned long ioba, unsigned long tce);
-extern int iommu_tce_build(struct iommu_table *tbl, unsigned long entry,
- unsigned long hwaddr, enum dma_data_direction direction);
-extern unsigned long iommu_clear_tce(struct iommu_table *tbl,
- unsigned long entry);
-extern int iommu_clear_tces_and_put_pages(struct iommu_table *tbl,
- unsigned long entry, unsigned long pages);
-extern int iommu_put_tce_user_mode(struct iommu_table *tbl,
- unsigned long entry, unsigned long tce);
+extern int iommu_tce_check_ioba(unsigned long page_shift,
+ unsigned long offset, unsigned long size,
+ unsigned long ioba, unsigned long npages);
+extern int iommu_tce_check_gpa(unsigned long page_shift,
+ unsigned long gpa);
+
+#define iommu_tce_clear_param_check(tbl, ioba, tce_value, npages) \
+ (iommu_tce_check_ioba((tbl)->it_page_shift, \
+ (tbl)->it_offset, (tbl)->it_size, \
+ (ioba), (npages)) || (tce_value))
+#define iommu_tce_put_param_check(tbl, ioba, gpa) \
+ (iommu_tce_check_ioba((tbl)->it_page_shift, \
+ (tbl)->it_offset, (tbl)->it_size, \
+ (ioba), 1) || \
+ iommu_tce_check_gpa((tbl)->it_page_shift, (gpa)))
extern void iommu_flush_tce(struct iommu_table *tbl);
-extern int iommu_take_ownership(struct iommu_table *tbl);
-extern void iommu_release_ownership(struct iommu_table *tbl);
extern enum dma_data_direction iommu_tce_direction(unsigned long tce);
+extern unsigned long iommu_direction_to_tce_perm(enum dma_data_direction dir);
+
+extern const struct dma_map_ops dma_iommu_ops;
#endif /* __KERNEL__ */
#endif /* _ASM_IOMMU_H */
diff --git a/arch/powerpc/include/asm/ipic.h b/arch/powerpc/include/asm/ipic.h
index fb59829983b8..b47ca7dc7199 100644
--- a/arch/powerpc/include/asm/ipic.h
+++ b/arch/powerpc/include/asm/ipic.h
@@ -1,14 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* IPIC external definitions and structure.
*
* Maintainer: Kumar Gala <galak@kernel.crashing.org>
*
* Copyright 2005 Freescale Semiconductor, Inc
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#ifdef __KERNEL__
#ifndef __ASM_IPIC_H__
@@ -69,11 +65,7 @@ enum ipic_mcp_irq {
IPIC_MCP_MU = 7,
};
-extern int ipic_set_priority(unsigned int irq, unsigned int priority);
-extern void ipic_set_highest_priority(unsigned int irq);
-extern void ipic_set_default_priority(void);
-extern void ipic_enable_mcp(enum ipic_mcp_irq mcp_irq);
-extern void ipic_disable_mcp(enum ipic_mcp_irq mcp_irq);
+void __init ipic_set_default_priority(void);
extern u32 ipic_get_mcp_status(void);
extern void ipic_clear_mcp_status(u32 mask);
diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index 41f13cec8a8f..aa3751960ffd 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -1,15 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifdef __KERNEL__
#ifndef _ASM_POWERPC_IRQ_H
#define _ASM_POWERPC_IRQ_H
/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
-#include <linux/irqdomain.h>
#include <linux/threads.h>
#include <linux/list.h>
#include <linux/radix-tree.h>
@@ -20,22 +16,14 @@
extern atomic_t ppc_n_lost_interrupts;
-/* This number is used when no interrupt has been assigned */
-#define NO_IRQ (0)
-
/* Total number of virq in the platform */
#define NR_IRQS CONFIG_NR_IRQS
-/* Same thing, used by the generic IRQ code */
-#define NR_IRQS_LEGACY NUM_ISA_INTERRUPTS
+/* Number of irqs reserved for a legacy isa controller */
+#define NR_IRQS_LEGACY 16
extern irq_hw_number_t virq_to_hw(unsigned int virq);
-/**
- * irq_early_init - Init irq remapping subsystem
- */
-extern void irq_early_init(void);
-
static __inline__ int irq_canonicalize(int irq)
{
return irq;
@@ -43,37 +31,33 @@ static __inline__ int irq_canonicalize(int irq)
extern int distribute_irqs;
-struct irqaction;
struct pt_regs;
-#define __ARCH_HAS_DO_SOFTIRQ
-
-#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+#ifdef CONFIG_BOOKE
/*
* Per-cpu stacks for handling critical, debug and machine check
* level interrupts.
*/
-extern struct thread_info *critirq_ctx[NR_CPUS];
-extern struct thread_info *dbgirq_ctx[NR_CPUS];
-extern struct thread_info *mcheckirq_ctx[NR_CPUS];
-extern void exc_lvl_ctx_init(void);
-#else
-#define exc_lvl_ctx_init()
+extern void *critirq_ctx[NR_CPUS];
+extern void *dbgirq_ctx[NR_CPUS];
+extern void *mcheckirq_ctx[NR_CPUS];
#endif
/*
* Per-cpu stacks for handling hard and soft interrupts.
*/
-extern struct thread_info *hardirq_ctx[NR_CPUS];
-extern struct thread_info *softirq_ctx[NR_CPUS];
+extern void *hardirq_ctx[NR_CPUS];
+extern void *softirq_ctx[NR_CPUS];
-extern void irq_ctx_init(void);
-extern void call_do_softirq(struct thread_info *tp);
-extern void call_do_irq(struct pt_regs *regs, struct thread_info *tp);
-extern void do_IRQ(struct pt_regs *regs);
-extern void __do_irq(struct pt_regs *regs);
+void __do_IRQ(struct pt_regs *regs);
int irq_choose_cpu(const struct cpumask *mask);
+#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI)
+extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask,
+ int exclude_cpu);
+#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
+#endif
+
#endif /* _ASM_IRQ_H */
#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/irq_regs.h b/arch/powerpc/include/asm/irq_regs.h
deleted file mode 100644
index ba94b51a0a70..000000000000
--- a/arch/powerpc/include/asm/irq_regs.h
+++ /dev/null
@@ -1,2 +0,0 @@
-#include <asm-generic/irq_regs.h>
-
diff --git a/arch/powerpc/include/asm/irq_work.h b/arch/powerpc/include/asm/irq_work.h
new file mode 100644
index 000000000000..c6d3078bd8c3
--- /dev/null
+++ b/arch/powerpc/include/asm/irq_work.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_IRQ_WORK_H
+#define _ASM_POWERPC_IRQ_WORK_H
+
+static inline bool arch_irq_work_has_interrupt(void)
+{
+ return true;
+}
+
+#endif /* _ASM_POWERPC_IRQ_WORK_H */
diff --git a/arch/powerpc/include/asm/irqflags.h b/arch/powerpc/include/asm/irqflags.h
index f2149066fe5d..1351fb40fe74 100644
--- a/arch/powerpc/include/asm/irqflags.h
+++ b/arch/powerpc/include/asm/irqflags.h
@@ -1,73 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* IRQ flags handling
*/
#ifndef _ASM_IRQFLAGS_H
#define _ASM_IRQFLAGS_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* Get definitions for arch_local_save_flags(x), etc.
*/
#include <asm/hw_irq.h>
-#else
-#ifdef CONFIG_TRACE_IRQFLAGS
-#ifdef CONFIG_IRQSOFF_TRACER
-/*
- * Since the ftrace irqsoff latency trace checks CALLER_ADDR1,
- * which is the stack frame here, we need to force a stack frame
- * in case we came from user space.
- */
-#define TRACE_WITH_FRAME_BUFFER(func) \
- mflr r0; \
- stdu r1, -STACK_FRAME_OVERHEAD(r1); \
- std r0, 16(r1); \
- stdu r1, -STACK_FRAME_OVERHEAD(r1); \
- bl func; \
- ld r1, 0(r1); \
- ld r1, 0(r1);
-#else
-#define TRACE_WITH_FRAME_BUFFER(func) \
- bl func;
-#endif
-
-/*
- * These are calls to C code, so the caller must be prepared for volatiles to
- * be clobbered.
- */
-#define TRACE_ENABLE_INTS TRACE_WITH_FRAME_BUFFER(trace_hardirqs_on)
-#define TRACE_DISABLE_INTS TRACE_WITH_FRAME_BUFFER(trace_hardirqs_off)
-
-/*
- * This is used by assembly code to soft-disable interrupts first and
- * reconcile irq state.
- *
- * NB: This may call C code, so the caller must be prepared for volatiles to
- * be clobbered.
- */
-#define RECONCILE_IRQ_STATE(__rA, __rB) \
- lbz __rA,PACASOFTIRQEN(r13); \
- lbz __rB,PACAIRQHAPPENED(r13); \
- cmpwi cr0,__rA,0; \
- li __rA,0; \
- ori __rB,__rB,PACA_IRQ_HARD_DIS; \
- stb __rB,PACAIRQHAPPENED(r13); \
- beq 44f; \
- stb __rA,PACASOFTIRQEN(r13); \
- TRACE_DISABLE_INTS; \
-44:
-
-#else
-#define TRACE_ENABLE_INTS
-#define TRACE_DISABLE_INTS
-
-#define RECONCILE_IRQ_STATE(__rA, __rB) \
- lbz __rA,PACAIRQHAPPENED(r13); \
- li __rB,0; \
- ori __rA,__rA,PACA_IRQ_HARD_DIS; \
- stb __rB,PACASOFTIRQEN(r13); \
- stb __rA,PACAIRQHAPPENED(r13)
-#endif
#endif
#endif
diff --git a/arch/powerpc/include/asm/isa-bridge.h b/arch/powerpc/include/asm/isa-bridge.h
new file mode 100644
index 000000000000..47295894bf91
--- /dev/null
+++ b/arch/powerpc/include/asm/isa-bridge.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ISA_BRIDGE_H
+#define __ISA_BRIDGE_H
+
+#ifdef CONFIG_PPC64
+
+extern void isa_bridge_find_early(struct pci_controller *hose);
+extern void isa_bridge_init_non_pci(struct device_node *np);
+
+static inline int isa_vaddr_is_ioport(void __iomem *address)
+{
+ /* Check if address hits the reserved legacy IO range */
+ unsigned long ea = (unsigned long)address;
+ return ea >= ISA_IO_BASE && ea < ISA_IO_END;
+}
+
+#else
+
+static inline int isa_vaddr_is_ioport(void __iomem *address)
+{
+ /* No specific ISA handling on ppc32 at this stage, it
+ * all goes through PCI
+ */
+ return 0;
+}
+
+#endif
+
+#endif /* __ISA_BRIDGE_H */
+
diff --git a/arch/powerpc/include/asm/jump_label.h b/arch/powerpc/include/asm/jump_label.h
index efbf9a322a23..d4eaba459a0e 100644
--- a/arch/powerpc/include/asm/jump_label.h
+++ b/arch/powerpc/include/asm/jump_label.h
@@ -1,53 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _ASM_POWERPC_JUMP_LABEL_H
#define _ASM_POWERPC_JUMP_LABEL_H
/*
* Copyright 2010 Michael Ellerman, IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
#include <asm/feature-fixups.h>
+#include <asm/asm-const.h>
#define JUMP_ENTRY_TYPE stringify_in_c(FTR_ENTRY_LONG)
#define JUMP_LABEL_NOP_SIZE 4
-static __always_inline bool arch_static_branch(struct static_key *key)
+static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
- asm_volatile_goto("1:\n\t"
- "nop\n\t"
+ asm goto("1:\n\t"
+ "nop # arch_static_branch\n\t"
".pushsection __jump_table, \"aw\"\n\t"
- JUMP_ENTRY_TYPE "1b, %l[l_yes], %c0\n\t"
+ ".long 1b - ., %l[l_yes] - .\n\t"
+ JUMP_ENTRY_TYPE "%c0 - .\n\t"
".popsection \n\t"
- : : "i" (key) : : l_yes);
+ : : "i" (&((char *)key)[branch]) : : l_yes);
+
return false;
l_yes:
return true;
}
-#ifdef CONFIG_PPC64
-typedef u64 jump_label_t;
-#else
-typedef u32 jump_label_t;
-#endif
+static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
+{
+ asm goto("1:\n\t"
+ "b %l[l_yes] # arch_static_branch_jump\n\t"
+ ".pushsection __jump_table, \"aw\"\n\t"
+ ".long 1b - ., %l[l_yes] - .\n\t"
+ JUMP_ENTRY_TYPE "%c0 - .\n\t"
+ ".popsection \n\t"
+ : : "i" (&((char *)key)[branch]) : : l_yes);
-struct jump_entry {
- jump_label_t code;
- jump_label_t target;
- jump_label_t key;
-};
+ return false;
+l_yes:
+ return true;
+}
#else
#define ARCH_STATIC_BRANCH(LABEL, KEY) \
1098: nop; \
.pushsection __jump_table, "aw"; \
- FTR_ENTRY_LONG 1098b, LABEL, KEY; \
+ .long 1098b - ., LABEL - .; \
+ FTR_ENTRY_LONG KEY - .; \
.popsection
#endif
diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h
new file mode 100644
index 000000000000..045804a86f98
--- /dev/null
+++ b/arch/powerpc/include/asm/kasan.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_KASAN_H
+#define __ASM_KASAN_H
+
+#if defined(CONFIG_KASAN) && !defined(CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX)
+#define _GLOBAL_KASAN(fn) _GLOBAL(__##fn)
+#define _GLOBAL_TOC_KASAN(fn) _GLOBAL_TOC(__##fn)
+#define EXPORT_SYMBOL_KASAN(fn) EXPORT_SYMBOL(__##fn)
+#else
+#define _GLOBAL_KASAN(fn) _GLOBAL(fn)
+#define _GLOBAL_TOC_KASAN(fn) _GLOBAL_TOC(fn)
+#define EXPORT_SYMBOL_KASAN(fn)
+#endif
+
+#ifndef __ASSEMBLER__
+
+#include <asm/page.h>
+#include <linux/sizes.h>
+
+#define KASAN_SHADOW_SCALE_SHIFT 3
+
+#if defined(CONFIG_EXECMEM) && defined(CONFIG_PPC32)
+#define KASAN_KERN_START ALIGN_DOWN(PAGE_OFFSET - SZ_256M, SZ_256M)
+#else
+#define KASAN_KERN_START PAGE_OFFSET
+#endif
+
+#define KASAN_SHADOW_START (KASAN_SHADOW_OFFSET + \
+ (KASAN_KERN_START >> KASAN_SHADOW_SCALE_SHIFT))
+
+#define KASAN_SHADOW_OFFSET ASM_CONST(CONFIG_KASAN_SHADOW_OFFSET)
+
+#ifdef CONFIG_PPC32
+#define KASAN_SHADOW_END (-(-KASAN_SHADOW_START >> KASAN_SHADOW_SCALE_SHIFT))
+#elif defined(CONFIG_PPC_BOOK3S_64)
+/*
+ * The shadow ends before the highest accessible address
+ * because we don't need a shadow for the shadow. Instead:
+ * c00e000000000000 << 3 + a80e000000000000 = c00fc00000000000
+ */
+#define KASAN_SHADOW_END 0xc00fc00000000000UL
+
+#else
+
+/*
+ * The shadow ends before the highest accessible address
+ * because we don't need a shadow for the shadow.
+ * But it doesn't hurt to have a shadow for the shadow,
+ * keep shadow end aligned eases things.
+ */
+#define KASAN_SHADOW_END 0xc000200000000000UL
+
+#endif
+
+#ifdef CONFIG_KASAN
+
+void kasan_early_init(void);
+void kasan_mmu_init(void);
+void kasan_init(void);
+void kasan_late_init(void);
+#else
+static inline void kasan_init(void) { }
+static inline void kasan_mmu_init(void) { }
+static inline void kasan_late_init(void) { }
+#endif
+
+void kasan_update_early_region(unsigned long k_start, unsigned long k_end, pte_t pte);
+int kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_end);
+int kasan_init_region(void *start, size_t size);
+
+#endif /* __ASSEMBLER__ */
+#endif
diff --git a/arch/powerpc/include/asm/kdebug.h b/arch/powerpc/include/asm/kdebug.h
index ae6d206728af..0f7c1ef37d0d 100644
--- a/arch/powerpc/include/asm/kdebug.h
+++ b/arch/powerpc/include/asm/kdebug.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_KDEBUG_H
#define _ASM_POWERPC_KDEBUG_H
#ifdef __KERNEL__
diff --git a/arch/powerpc/include/asm/kdump.h b/arch/powerpc/include/asm/kdump.h
index c9776202d7ec..802644178f43 100644
--- a/arch/powerpc/include/asm/kdump.h
+++ b/arch/powerpc/include/asm/kdump.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _PPC64_KDUMP_H
#define _PPC64_KDUMP_H
@@ -30,7 +31,7 @@
#endif /* CONFIG_CRASH_DUMP */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#if defined(CONFIG_CRASH_DUMP) && !defined(CONFIG_NONSTATIC_KERNEL)
extern void reserve_kdump_trampoline(void);
@@ -41,6 +42,6 @@ static inline void reserve_kdump_trampoline(void) { ; }
static inline void setup_kdump_trampoline(void) { ; }
#endif
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __PPC64_KDUMP_H */
diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index 16d7e33d35e9..4bbf9f699aaa 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -1,8 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_KEXEC_H
#define _ASM_POWERPC_KEXEC_H
#ifdef __KERNEL__
-#if defined(CONFIG_FSL_BOOKE) || defined(CONFIG_44x)
+#if defined(CONFIG_PPC_85xx) || defined(CONFIG_44x)
/*
* On FSL-BookE we setup a 1:1 mapping which covers the first 2GiB of memory
@@ -48,13 +49,78 @@
#define KEXEC_STATE_IRQS_OFF 1
#define KEXEC_STATE_REAL_MODE 2
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/reg.h>
typedef void (*crash_shutdown_t)(void);
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
+struct kimage;
+struct pt_regs;
+
+extern void kexec_smp_wait(void); /* get and clear naca physid, wait for
+ master to copy new code to 0 */
+extern void default_machine_kexec(struct kimage *image);
+
+void relocate_new_kernel(unsigned long indirection_page, unsigned long reboot_code_buffer,
+ unsigned long start_address) __noreturn;
+void kexec_copy_flush(struct kimage *image);
+
+#ifdef CONFIG_KEXEC_FILE
+extern const struct kexec_file_ops kexec_elf64_ops;
+
+#define ARCH_HAS_KIMAGE_ARCH
+
+struct kimage_arch {
+ struct crash_mem *exclude_ranges;
+
+ unsigned long backup_start;
+ void *backup_buf;
+ void *fdt;
+};
+
+char *setup_kdump_cmdline(struct kimage *image, char *cmdline,
+ unsigned long cmdline_len);
+int setup_purgatory(struct kimage *image, const void *slave_code,
+ const void *fdt, unsigned long kernel_load_addr,
+ unsigned long fdt_load_addr);
+
+#ifdef CONFIG_PPC64
+struct kexec_buf;
+
+int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, unsigned long buf_len);
+#define arch_kexec_kernel_image_probe arch_kexec_kernel_image_probe
+int arch_kimage_file_post_load_cleanup(struct kimage *image);
+#define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup
+
+int arch_check_excluded_range(struct kimage *image, unsigned long start,
+ unsigned long end);
+#define arch_check_excluded_range arch_check_excluded_range
+
+
+int load_crashdump_segments_ppc64(struct kimage *image,
+ struct kexec_buf *kbuf);
+int setup_purgatory_ppc64(struct kimage *image, const void *slave_code,
+ const void *fdt, unsigned long kernel_load_addr,
+ unsigned long fdt_load_addr);
+unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image, struct crash_mem *rmem);
+int setup_new_fdt_ppc64(const struct kimage *image, void *fdt, struct crash_mem *rmem);
+#endif /* CONFIG_PPC64 */
+
+#endif /* CONFIG_KEXEC_FILE */
+
+#endif /* CONFIG_KEXEC_CORE */
+
+#ifdef CONFIG_CRASH_RESERVE
+int __init overlaps_crashkernel(unsigned long start, unsigned long size);
+extern void arch_reserve_crashkernel(void);
+#else
+static inline void arch_reserve_crashkernel(void) {}
+static inline int overlaps_crashkernel(unsigned long start, unsigned long size) { return 0; }
+#endif
+
+#if defined(CONFIG_CRASH_DUMP)
/*
* This function is responsible for capturing register states if coming
* via panic or invoking dump using sysrq-trigger.
@@ -68,34 +134,43 @@ static inline void crash_setup_regs(struct pt_regs *newregs,
ppc_save_regs(newregs);
}
-extern void kexec_smp_wait(void); /* get and clear naca physid, wait for
- master to copy new code to 0 */
+#ifdef CONFIG_CRASH_HOTPLUG
+void arch_crash_handle_hotplug_event(struct kimage *image, void *arg);
+#define arch_crash_handle_hotplug_event arch_crash_handle_hotplug_event
+
+int arch_crash_hotplug_support(struct kimage *image, unsigned long kexec_flags);
+#define arch_crash_hotplug_support arch_crash_hotplug_support
+
+unsigned int arch_crash_get_elfcorehdr_size(void);
+#define crash_get_elfcorehdr_size arch_crash_get_elfcorehdr_size
+#endif /* CONFIG_CRASH_HOTPLUG */
+
extern int crashing_cpu;
extern void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *));
+extern void crash_ipi_callback(struct pt_regs *regs);
+extern int crash_wake_offline;
-struct kimage;
-struct pt_regs;
-extern void default_machine_kexec(struct kimage *image);
-extern int default_machine_kexec_prepare(struct kimage *image);
-extern void default_machine_crash_shutdown(struct pt_regs *regs);
extern int crash_shutdown_register(crash_shutdown_t handler);
extern int crash_shutdown_unregister(crash_shutdown_t handler);
+extern void default_machine_crash_shutdown(struct pt_regs *regs);
-extern void machine_kexec_simple(struct kimage *image);
+extern void crash_kexec_prepare(void);
extern void crash_kexec_secondary(struct pt_regs *regs);
-extern int overlaps_crashkernel(unsigned long start, unsigned long size);
-extern void reserve_crashkernel(void);
-extern void machine_kexec_mask_interrupts(void);
-
-#else /* !CONFIG_KEXEC */
-static inline void crash_kexec_secondary(struct pt_regs *regs) { }
-static inline int overlaps_crashkernel(unsigned long start, unsigned long size)
+static inline bool kdump_in_progress(void)
{
- return 0;
+ return crashing_cpu >= 0;
}
-static inline void reserve_crashkernel(void) { ; }
+bool is_kdump_kernel(void);
+#define is_kdump_kernel is_kdump_kernel
+#if defined(CONFIG_PPC_RTAS)
+void crash_free_reserved_phys_range(unsigned long begin, unsigned long end);
+#define crash_free_reserved_phys_range crash_free_reserved_phys_range
+#endif /* CONFIG_PPC_RTAS */
+
+#else /* !CONFIG_CRASH_DUMP */
+static inline void crash_kexec_secondary(struct pt_regs *regs) { }
static inline int crash_shutdown_register(crash_shutdown_t handler)
{
@@ -107,7 +182,34 @@ static inline int crash_shutdown_unregister(crash_shutdown_t handler)
return 0;
}
-#endif /* CONFIG_KEXEC */
-#endif /* ! __ASSEMBLY__ */
+static inline bool kdump_in_progress(void)
+{
+ return false;
+}
+
+static inline void crash_ipi_callback(struct pt_regs *regs) { }
+
+static inline void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
+{
+}
+
+#endif /* CONFIG_CRASH_DUMP */
+
+#if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_DUMP)
+int update_cpus_node(void *fdt);
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_64
+#include <asm/book3s/64/kexec.h>
+#endif
+
+#ifndef reset_sprs
+#define reset_sprs reset_sprs
+static inline void reset_sprs(void)
+{
+}
+#endif
+
+#endif /* ! __ASSEMBLER__ */
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_KEXEC_H */
diff --git a/arch/powerpc/include/asm/kexec_ranges.h b/arch/powerpc/include/asm/kexec_ranges.h
new file mode 100644
index 000000000000..14055896cbcb
--- /dev/null
+++ b/arch/powerpc/include/asm/kexec_ranges.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _ASM_POWERPC_KEXEC_RANGES_H
+#define _ASM_POWERPC_KEXEC_RANGES_H
+
+#define MEM_RANGE_CHUNK_SZ 2048 /* Memory ranges size chunk */
+
+void sort_memory_ranges(struct crash_mem *mrngs, bool merge);
+struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges);
+int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size);
+int remove_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size);
+int get_exclude_memory_ranges(struct crash_mem **mem_ranges);
+int get_reserved_memory_ranges(struct crash_mem **mem_ranges);
+int get_crash_memory_ranges(struct crash_mem **mem_ranges);
+int get_usable_memory_ranges(struct crash_mem **mem_ranges);
+#endif /* _ASM_POWERPC_KEXEC_RANGES_H */
diff --git a/arch/powerpc/include/asm/keylargo.h b/arch/powerpc/include/asm/keylargo.h
index 2156315d8a90..debdf548009d 100644
--- a/arch/powerpc/include/asm/keylargo.h
+++ b/arch/powerpc/include/asm/keylargo.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_KEYLARGO_H
#define _ASM_POWERPC_KEYLARGO_H
#ifdef __KERNEL__
diff --git a/arch/powerpc/include/asm/kfence.h b/arch/powerpc/include/asm/kfence.h
new file mode 100644
index 000000000000..1f7cab58ab2c
--- /dev/null
+++ b/arch/powerpc/include/asm/kfence.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * powerpc KFENCE support.
+ *
+ * Copyright (C) 2020 CS GROUP France
+ */
+
+#ifndef __ASM_POWERPC_KFENCE_H
+#define __ASM_POWERPC_KFENCE_H
+
+#include <linux/mm.h>
+#include <asm/pgtable.h>
+
+#ifdef CONFIG_PPC64_ELF_ABI_V1
+#define ARCH_FUNC_PREFIX "."
+#endif
+
+extern bool kfence_early_init;
+extern bool kfence_disabled;
+
+static inline void disable_kfence(void)
+{
+ kfence_disabled = true;
+}
+
+static inline bool arch_kfence_init_pool(void)
+{
+ return !kfence_disabled;
+}
+
+static inline bool kfence_early_init_enabled(void)
+{
+ return IS_ENABLED(CONFIG_KFENCE) && kfence_early_init;
+}
+
+#ifdef CONFIG_PPC64
+static inline bool kfence_protect_page(unsigned long addr, bool protect)
+{
+ struct page *page = virt_to_page((void *)addr);
+
+ __kernel_map_pages(page, 1, !protect);
+
+ return true;
+}
+#else
+static inline bool kfence_protect_page(unsigned long addr, bool protect)
+{
+ pte_t *kpte = virt_to_kpte(addr);
+
+ if (protect) {
+ pte_update(&init_mm, addr, kpte, _PAGE_PRESENT, 0, 0);
+ flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+ } else {
+ pte_update(&init_mm, addr, kpte, 0, _PAGE_PRESENT, 0);
+ }
+
+ return true;
+}
+#endif
+
+#endif /* __ASM_POWERPC_KFENCE_H */
diff --git a/arch/powerpc/include/asm/kgdb.h b/arch/powerpc/include/asm/kgdb.h
index 9db24e77b9f4..f39531903325 100644
--- a/arch/powerpc/include/asm/kgdb.h
+++ b/arch/powerpc/include/asm/kgdb.h
@@ -21,14 +21,17 @@
#ifndef __POWERPC_KGDB_H__
#define __POWERPC_KGDB_H__
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#define BREAK_INSTR_SIZE 4
#define BUFMAX ((NUMREGBYTES * 2) + 512)
#define OUTBUFMAX ((NUMREGBYTES * 2) + 512)
+
+#define BREAK_INSTR 0x7d821008 /* twge r2, r2 */
+
static inline void arch_kgdb_breakpoint(void)
{
- asm(".long 0x7d821008"); /* twge r2, r2 */
+ asm(stringify_in_c(.long BREAK_INSTR));
}
#define CACHE_FLUSH_IS_SAFE 1
#define DBG_MAX_REG_NUM 70
@@ -49,7 +52,7 @@ static inline void arch_kgdb_breakpoint(void)
/* On non-E500 family PPC32 we determine the size by picking the last
* register we need, but on E500 we skip sections so we list what we
* need to store, and add it up. */
-#ifndef CONFIG_E500
+#ifndef CONFIG_PPC_E500
#define MAXREG (PT_FPSCR+1)
#else
/* 32 GPRs (8 bytes), nip, msr, ccr, link, ctr, xer, acc (8 bytes), spefscr*/
@@ -59,6 +62,6 @@ static inline void arch_kgdb_breakpoint(void)
/* CR/LR, R1, R2, R13-R31 inclusive. */
#define NUMCRITREGBYTES (23 * sizeof(int))
#endif /* 32/64 */
-#endif /* !(__ASSEMBLY__) */
+#endif /* !(__ASSEMBLER__) */
#endif /* !__POWERPC_KGDB_H__ */
#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/kmap_types.h b/arch/powerpc/include/asm/kmap_types.h
deleted file mode 100644
index 5acabbd7ac6f..000000000000
--- a/arch/powerpc/include/asm/kmap_types.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef _ASM_POWERPC_KMAP_TYPES_H
-#define _ASM_POWERPC_KMAP_TYPES_H
-
-#ifdef __KERNEL__
-
-/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#define KM_TYPE_NR 16
-
-#endif /* __KERNEL__ */
-#endif /* _ASM_POWERPC_KMAP_TYPES_H */
diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h
index af15d4d8d604..dfe2e5ad3b21 100644
--- a/arch/powerpc/include/asm/kprobes.h
+++ b/arch/powerpc/include/asm/kprobes.h
@@ -1,23 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _ASM_POWERPC_KPROBES_H
#define _ASM_POWERPC_KPROBES_H
+
+#include <asm-generic/kprobes.h>
+
#ifdef __KERNEL__
/*
* Kernel Probes (KProbes)
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
* Copyright (C) IBM Corporation, 2002, 2004
*
* 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
@@ -29,51 +19,39 @@
#include <linux/types.h>
#include <linux/ptrace.h>
#include <linux/percpu.h>
+#include <linux/module.h>
#include <asm/probes.h>
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
+#ifdef CONFIG_KPROBES
#define __ARCH_WANT_KPROBES_INSN_SLOT
struct pt_regs;
struct kprobe;
-typedef ppc_opcode_t kprobe_opcode_t;
-#define MAX_INSN_SIZE 1
+typedef u32 kprobe_opcode_t;
-#ifdef CONFIG_PPC64
-/*
- * 64bit powerpc uses function descriptors.
- * Handle cases where:
- * - User passes a <.symbol> or <module:.symbol>
- * - User passes a <symbol> or <module:symbol>
- * - User passes a non-existent symbol, kallsyms_lookup_name
- * returns 0. Don't deref the NULL pointer in that case
- */
-#define kprobe_lookup_name(name, addr) \
-{ \
- addr = (kprobe_opcode_t *)kallsyms_lookup_name(name); \
- if (addr) { \
- char *colon; \
- if ((colon = strchr(name, ':')) != NULL) { \
- colon++; \
- if (*colon != '\0' && *colon != '.') \
- addr = (kprobe_opcode_t *)ppc_function_entry(addr); \
- } else if (name[0] != '.') \
- addr = (kprobe_opcode_t *)ppc_function_entry(addr); \
- } else { \
- char dot_name[KSYM_NAME_LEN]; \
- dot_name[0] = '.'; \
- dot_name[1] = '\0'; \
- strncat(dot_name, name, KSYM_NAME_LEN - 2); \
- addr = (kprobe_opcode_t *)kallsyms_lookup_name(dot_name); \
- } \
-}
-#endif
+extern kprobe_opcode_t optinsn_slot;
+
+/* Optinsn template address */
+extern kprobe_opcode_t optprobe_template_entry[];
+extern kprobe_opcode_t optprobe_template_op_address[];
+extern kprobe_opcode_t optprobe_template_call_handler[];
+extern kprobe_opcode_t optprobe_template_insn[];
+extern kprobe_opcode_t optprobe_template_call_emulate[];
+extern kprobe_opcode_t optprobe_template_ret[];
+extern kprobe_opcode_t optprobe_template_end[];
+
+/* Fixed instruction size for powerpc */
+#define MAX_INSN_SIZE 2
+#define MAX_OPTIMIZED_LENGTH sizeof(kprobe_opcode_t) /* 4 bytes */
+#define MAX_OPTINSN_SIZE (optprobe_template_end - optprobe_template_entry)
+#define RELATIVEJUMP_SIZE sizeof(kprobe_opcode_t) /* 4 bytes */
#define flush_insn_slot(p) do { } while (0)
#define kretprobe_blacklist_size 0
-void kretprobe_trampoline(void);
+void __kretprobe_trampoline(void);
extern void arch_remove_kprobe(struct kprobe *p);
/* Architecture specific copy of original instruction */
@@ -97,12 +75,21 @@ struct prev_kprobe {
struct kprobe_ctlblk {
unsigned long kprobe_status;
unsigned long kprobe_saved_msr;
- struct pt_regs jprobe_saved_regs;
struct prev_kprobe prev_kprobe;
};
-extern int kprobe_exceptions_notify(struct notifier_block *self,
- unsigned long val, void *data);
+struct arch_optimized_insn {
+ kprobe_opcode_t copied_insn[1];
+ /* detour buffer */
+ kprobe_opcode_t *insn;
+};
+
extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
+extern int kprobe_handler(struct pt_regs *regs);
+extern int kprobe_post_handler(struct pt_regs *regs);
+#else
+static inline int kprobe_handler(struct pt_regs *regs) { return 0; }
+static inline int kprobe_post_handler(struct pt_regs *regs) { return 0; }
+#endif /* CONFIG_KPROBES */
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_KPROBES_H */
diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
new file mode 100644
index 000000000000..dab63b82a8d4
--- /dev/null
+++ b/arch/powerpc/include/asm/kup.h
@@ -0,0 +1,186 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_KUP_H_
+#define _ASM_POWERPC_KUP_H_
+
+#define KUAP_READ 1
+#define KUAP_WRITE 2
+#define KUAP_READ_WRITE (KUAP_READ | KUAP_WRITE)
+
+#ifndef __ASSEMBLER__
+#include <linux/types.h>
+
+static __always_inline bool kuap_is_disabled(void);
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_64
+#include <asm/book3s/64/kup.h>
+#endif
+
+#ifdef CONFIG_PPC_8xx
+#include <asm/nohash/32/kup-8xx.h>
+#endif
+
+#ifdef CONFIG_BOOKE
+#include <asm/nohash/kup-booke.h>
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_32
+#include <asm/book3s/32/kup.h>
+#endif
+
+#ifdef __ASSEMBLER__
+#ifndef CONFIG_PPC_KUAP
+.macro kuap_check_amr gpr1, gpr2
+.endm
+
+#endif
+
+#else /* !__ASSEMBLER__ */
+
+extern bool disable_kuep;
+extern bool disable_kuap;
+
+#include <linux/pgtable.h>
+
+void setup_kup(void);
+void setup_kuep(bool disabled);
+
+#ifdef CONFIG_PPC_KUAP
+void setup_kuap(bool disabled);
+
+static __always_inline bool kuap_is_disabled(void)
+{
+ return !mmu_has_feature(MMU_FTR_KUAP);
+}
+#else
+static inline void setup_kuap(bool disabled) { }
+
+static __always_inline bool kuap_is_disabled(void) { return true; }
+
+static __always_inline bool
+__bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
+{
+ return false;
+}
+
+static __always_inline void kuap_user_restore(struct pt_regs *regs) { }
+static __always_inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long amr) { }
+
+/*
+ * book3s/64/kup-radix.h defines these functions for the !KUAP case to flush
+ * the L1D cache after user accesses. Only include the empty stubs for other
+ * platforms.
+ */
+#ifndef CONFIG_PPC_BOOK3S_64
+static __always_inline void allow_user_access(void __user *to, const void __user *from,
+ unsigned long size, unsigned long dir) { }
+static __always_inline void prevent_user_access(unsigned long dir) { }
+static __always_inline unsigned long prevent_user_access_return(void) { return 0UL; }
+static __always_inline void restore_user_access(unsigned long flags) { }
+#endif /* CONFIG_PPC_BOOK3S_64 */
+#endif /* CONFIG_PPC_KUAP */
+
+static __always_inline bool
+bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
+{
+ if (kuap_is_disabled())
+ return false;
+
+ return __bad_kuap_fault(regs, address, is_write);
+}
+
+static __always_inline void kuap_lock(void)
+{
+#ifdef __kuap_lock
+ if (kuap_is_disabled())
+ return;
+
+ __kuap_lock();
+#endif
+}
+
+static __always_inline void kuap_save_and_lock(struct pt_regs *regs)
+{
+#ifdef __kuap_save_and_lock
+ if (kuap_is_disabled())
+ return;
+
+ __kuap_save_and_lock(regs);
+#endif
+}
+
+static __always_inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long amr)
+{
+ if (kuap_is_disabled())
+ return;
+
+ __kuap_kernel_restore(regs, amr);
+}
+
+static __always_inline unsigned long kuap_get_and_assert_locked(void)
+{
+#ifdef __kuap_get_and_assert_locked
+ if (!kuap_is_disabled())
+ return __kuap_get_and_assert_locked();
+#endif
+ return 0;
+}
+
+static __always_inline void kuap_assert_locked(void)
+{
+ if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG))
+ kuap_get_and_assert_locked();
+}
+
+static __always_inline void allow_read_from_user(const void __user *from, unsigned long size)
+{
+ barrier_nospec();
+ allow_user_access(NULL, from, size, KUAP_READ);
+}
+
+static __always_inline void allow_write_to_user(void __user *to, unsigned long size)
+{
+ allow_user_access(to, NULL, size, KUAP_WRITE);
+}
+
+static __always_inline void allow_read_write_user(void __user *to, const void __user *from,
+ unsigned long size)
+{
+ barrier_nospec();
+ allow_user_access(to, from, size, KUAP_READ_WRITE);
+}
+
+static __always_inline void prevent_read_from_user(const void __user *from, unsigned long size)
+{
+ prevent_user_access(KUAP_READ);
+}
+
+static __always_inline void prevent_write_to_user(void __user *to, unsigned long size)
+{
+ prevent_user_access(KUAP_WRITE);
+}
+
+static __always_inline void prevent_read_write_user(void __user *to, const void __user *from,
+ unsigned long size)
+{
+ prevent_user_access(KUAP_READ_WRITE);
+}
+
+static __always_inline void prevent_current_access_user(void)
+{
+ prevent_user_access(KUAP_READ_WRITE);
+}
+
+static __always_inline void prevent_current_read_from_user(void)
+{
+ prevent_user_access(KUAP_READ);
+}
+
+static __always_inline void prevent_current_write_to_user(void)
+{
+ prevent_user_access(KUAP_WRITE);
+}
+
+#endif /* !__ASSEMBLER__ */
+
+#endif /* _ASM_POWERPC_KUAP_H_ */
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index 465dfcb82c92..f9af8df09077 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -1,16 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright IBM Corp. 2008
*
@@ -20,7 +9,7 @@
#ifndef __POWERPC_KVM_ASM_H__
#define __POWERPC_KVM_ASM_H__
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#ifdef CONFIG_64BIT
#define PPC_STD(sreg, offset, areg) std sreg, (offset)(areg)
#define PPC_LD(treg, offset, areg) ld treg, (offset)(areg)
@@ -53,17 +42,17 @@
#define BOOKE_INTERRUPT_DEBUG 15
/* E500 */
-#define BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL 32
-#define BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST 33
-/*
- * TODO: Unify 32-bit and 64-bit kernel exception handlers to use same defines
- */
-#define BOOKE_INTERRUPT_SPE_UNAVAIL BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL
-#define BOOKE_INTERRUPT_SPE_FP_DATA BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST
-#define BOOKE_INTERRUPT_ALTIVEC_UNAVAIL BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL
-#define BOOKE_INTERRUPT_ALTIVEC_ASSIST \
- BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST
+#ifdef CONFIG_SPE_POSSIBLE
+#define BOOKE_INTERRUPT_SPE_UNAVAIL 32
+#define BOOKE_INTERRUPT_SPE_FP_DATA 33
#define BOOKE_INTERRUPT_SPE_FP_ROUND 34
+#endif
+
+#ifdef CONFIG_PPC_E500MC
+#define BOOKE_INTERRUPT_ALTIVEC_UNAVAIL 32
+#define BOOKE_INTERRUPT_ALTIVEC_ASSIST 33
+#endif
+
#define BOOKE_INTERRUPT_PERFORMANCE_MONITOR 35
#define BOOKE_INTERRUPT_DOORBELL 36
#define BOOKE_INTERRUPT_DOORBELL_CRITICAL 37
@@ -84,13 +73,13 @@
#define BOOK3S_INTERRUPT_INST_STORAGE 0x400
#define BOOK3S_INTERRUPT_INST_SEGMENT 0x480
#define BOOK3S_INTERRUPT_EXTERNAL 0x500
-#define BOOK3S_INTERRUPT_EXTERNAL_LEVEL 0x501
#define BOOK3S_INTERRUPT_EXTERNAL_HV 0x502
#define BOOK3S_INTERRUPT_ALIGNMENT 0x600
#define BOOK3S_INTERRUPT_PROGRAM 0x700
#define BOOK3S_INTERRUPT_FP_UNAVAIL 0x800
#define BOOK3S_INTERRUPT_DECREMENTER 0x900
#define BOOK3S_INTERRUPT_HV_DECREMENTER 0x980
+#define BOOK3S_INTERRUPT_NESTED_HV_DECREMENTER 0x1980
#define BOOK3S_INTERRUPT_DOORBELL 0xa00
#define BOOK3S_INTERRUPT_SYSCALL 0xc00
#define BOOK3S_INTERRUPT_TRACE 0xd00
@@ -99,12 +88,24 @@
#define BOOK3S_INTERRUPT_H_EMUL_ASSIST 0xe40
#define BOOK3S_INTERRUPT_HMI 0xe60
#define BOOK3S_INTERRUPT_H_DOORBELL 0xe80
+#define BOOK3S_INTERRUPT_H_VIRT 0xea0
#define BOOK3S_INTERRUPT_PERFMON 0xf00
#define BOOK3S_INTERRUPT_ALTIVEC 0xf20
#define BOOK3S_INTERRUPT_VSX 0xf40
#define BOOK3S_INTERRUPT_FAC_UNAVAIL 0xf60
#define BOOK3S_INTERRUPT_H_FAC_UNAVAIL 0xf80
+/* book3s_hv */
+
+#define BOOK3S_INTERRUPT_HV_SOFTPATCH 0x1500
+
+/*
+ * Special trap used to indicate to host that this is a
+ * passthrough interrupt that could not be handled
+ * completely in the guest.
+ */
+#define BOOK3S_INTERRUPT_HV_RM_HARD 0x5555
+
#define BOOK3S_IRQPRIO_SYSTEM_RESET 0
#define BOOK3S_IRQPRIO_DATA_SEGMENT 1
#define BOOK3S_IRQPRIO_INST_SEGMENT 2
@@ -122,8 +123,7 @@
#define BOOK3S_IRQPRIO_EXTERNAL 14
#define BOOK3S_IRQPRIO_DECREMENTER 15
#define BOOK3S_IRQPRIO_PERFORMANCE_MONITOR 16
-#define BOOK3S_IRQPRIO_EXTERNAL_LEVEL 17
-#define BOOK3S_IRQPRIO_MAX 18
+#define BOOK3S_IRQPRIO_MAX 17
#define BOOK3S_HFLAG_DCBZ32 0x1
#define BOOK3S_HFLAG_SLB 0x2
@@ -136,6 +136,7 @@
#define RESUME_FLAG_NV (1<<0) /* Reload guest nonvolatile state? */
#define RESUME_FLAG_HOST (1<<1) /* Resume host? */
#define RESUME_FLAG_ARCH1 (1<<2)
+#define RESUME_FLAG_ARCH2 (1<<3)
#define RESUME_GUEST 0
#define RESUME_GUEST_NV RESUME_FLAG_NV
@@ -147,7 +148,11 @@
#define KVM_GUEST_MODE_SKIP 2
#define KVM_GUEST_MODE_GUEST_HV 3
#define KVM_GUEST_MODE_HOST_HV 4
+#define KVM_GUEST_MODE_HV_P9 5 /* ISA >= v3.0 path */
#define KVM_INST_FETCH_FAILED -1
+/* Extract PO and XOP opcode fields */
+#define PO_XOP_OPCODE_MASK 0xfc0007fe
+
#endif /* __POWERPC_KVM_ASM_H__ */
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 6acf0c2a0f99..e1ff291ba891 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -1,16 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright SUSE Linux Products GmbH 2009
*
@@ -23,6 +12,7 @@
#include <linux/types.h>
#include <linux/kvm_host.h>
#include <asm/kvm_book3s_asm.h>
+#include <asm/guest-state-buffer.h>
struct kvmppc_bat {
u64 raw;
@@ -69,6 +59,44 @@ struct hpte_cache {
int pagesize;
};
+/*
+ * Struct for a virtual core.
+ * Note: entry_exit_map combines a bitmap of threads that have entered
+ * in the bottom 8 bits and a bitmap of threads that have exited in the
+ * next 8 bits. This is so that we can atomically set the entry bit
+ * iff the exit map is 0 without taking a lock.
+ */
+struct kvmppc_vcore {
+ int n_runnable;
+ int num_threads;
+ int entry_exit_map;
+ int napping_threads;
+ int first_vcpuid;
+ u16 pcpu;
+ u16 last_cpu;
+ u8 vcore_state;
+ u8 in_guest;
+ struct kvm_vcpu *runnable_threads[MAX_SMT_THREADS];
+ struct list_head preempt_list;
+ spinlock_t lock;
+ struct rcuwait wait;
+ spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */
+ u64 stolen_tb;
+ u64 preempt_tb;
+ struct kvm_vcpu *runner;
+ struct kvm *kvm;
+ u64 tb_offset; /* guest timebase - host timebase */
+ u64 tb_offset_applied; /* timebase offset currently in force */
+ ulong lpcr;
+ u32 arch_compat;
+ ulong pcr;
+ ulong dpdes; /* doorbell state (POWER8) */
+ ulong vtb; /* virtual timebase */
+ ulong conferring_threads;
+ unsigned int halt_poll_ns;
+ atomic_t online_count;
+};
+
struct kvmppc_vcpu_book3s {
struct kvmppc_sid_map sid_map[SID_MAP_NUM];
struct {
@@ -83,6 +111,7 @@ struct kvmppc_vcpu_book3s {
u64 sdr1;
u64 hior;
u64 msr_mask;
+ u64 vtb;
#ifdef CONFIG_PPC_BOOK3S_32
u32 vsid_pool[VSID_POOL_SIZE];
u32 vsid_next;
@@ -106,10 +135,6 @@ struct kvmppc_vcpu_book3s {
spinlock_t mmu_lock;
};
-#define CONTEXT_HOST 0
-#define CONTEXT_GUEST 1
-#define CONTEXT_GUEST_END 2
-
#define VSID_REAL 0x07ffffffffc00000ULL
#define VSID_BAT 0x07ffffffffb00000ULL
#define VSID_64K 0x0800000000000000ULL
@@ -131,11 +156,12 @@ extern void kvmppc_mmu_unmap_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr);
extern void kvmppc_mmu_flush_segment(struct kvm_vcpu *vcpu, ulong eaddr, ulong seg_size);
extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu);
-extern int kvmppc_book3s_hv_page_fault(struct kvm_run *run,
- struct kvm_vcpu *vcpu, unsigned long addr,
- unsigned long status);
+extern int kvmppc_book3s_hv_page_fault(struct kvm_vcpu *vcpu,
+ unsigned long addr, unsigned long status);
extern long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr,
unsigned long slb_v, unsigned long valid);
+extern int kvmppc_hv_emulate_mmio(struct kvm_vcpu *vcpu,
+ unsigned long gpa, gva_t ea, int is_store);
extern void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte);
extern struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu);
@@ -148,20 +174,72 @@ extern void kvmppc_mmu_hpte_sysexit(void);
extern int kvmppc_mmu_hv_init(void);
extern int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hc);
+extern int kvmppc_book3s_radix_page_fault(struct kvm_vcpu *vcpu,
+ unsigned long ea, unsigned long dsisr);
+extern unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid,
+ gva_t eaddr, void *to, void *from,
+ unsigned long n);
+extern long kvmhv_copy_from_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr,
+ void *to, unsigned long n);
+extern long kvmhv_copy_to_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr,
+ void *from, unsigned long n);
+extern int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, gva_t eaddr,
+ struct kvmppc_pte *gpte, u64 root,
+ u64 *pte_ret_p);
+extern int kvmppc_mmu_radix_translate_table(struct kvm_vcpu *vcpu, gva_t eaddr,
+ struct kvmppc_pte *gpte, u64 table,
+ int table_index, u64 *pte_ret_p);
+extern int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
+ struct kvmppc_pte *gpte, bool data, bool iswrite);
+extern void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
+ unsigned int pshift, u64 lpid);
+extern void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, unsigned long gpa,
+ unsigned int shift,
+ const struct kvm_memory_slot *memslot,
+ u64 lpid);
+extern bool kvmppc_hv_handle_set_rc(struct kvm *kvm, bool nested,
+ bool writing, unsigned long gpa,
+ u64 lpid);
+extern int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu,
+ unsigned long gpa,
+ struct kvm_memory_slot *memslot,
+ bool writing,
+ pte_t *inserted_pte, unsigned int *levelp);
+extern int kvmppc_init_vm_radix(struct kvm *kvm);
+extern void kvmppc_free_radix(struct kvm *kvm);
+extern void kvmppc_free_pgtable_radix(struct kvm *kvm, pgd_t *pgd,
+ u64 lpid);
+extern int kvmppc_radix_init(void);
+extern void kvmppc_radix_exit(void);
+extern void kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
+ unsigned long gfn);
+extern bool kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
+ unsigned long gfn);
+extern bool kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
+ unsigned long gfn);
+extern long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm,
+ struct kvm_memory_slot *memslot, unsigned long *map);
+extern void kvmppc_radix_flush_memslot(struct kvm *kvm,
+ const struct kvm_memory_slot *memslot);
+extern int kvmhv_get_rmmu_info(struct kvm *kvm, struct kvm_ppc_rmmu_info *info);
+
/* XXX remove this export when load_last_inst() is generic */
extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data);
extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec);
extern void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
unsigned int vec);
extern void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags);
+extern void kvmppc_trigger_fac_interrupt(struct kvm_vcpu *vcpu, ulong fac);
extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat,
bool upper, u32 val);
extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr);
-extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu);
-extern pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa, bool writing,
- bool *writable);
+extern int kvmppc_emulate_paired_single(struct kvm_vcpu *vcpu);
+extern kvm_pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa,
+ bool writing, bool *writable, struct page **page);
extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
unsigned long *rmap, long pte_index, int realmode);
+extern void kvmppc_update_dirty_map(const struct kvm_memory_slot *memslot,
+ unsigned long gfn, unsigned long psize);
extern void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep,
unsigned long pte_index);
void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep,
@@ -170,20 +248,27 @@ extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr,
unsigned long *nb_ret);
extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr,
unsigned long gpa, bool dirty);
-extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
- long pte_index, unsigned long pteh, unsigned long ptel);
extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
long pte_index, unsigned long pteh, unsigned long ptel,
pgd_t *pgdir, bool realmode, unsigned long *idx_ret);
extern long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
unsigned long pte_index, unsigned long avpn,
unsigned long *hpret);
-extern long kvmppc_hv_get_dirty_log(struct kvm *kvm,
+extern long kvmppc_hv_get_dirty_log_hpt(struct kvm *kvm,
struct kvm_memory_slot *memslot, unsigned long *map);
+extern void kvmppc_harvest_vpa_dirty(struct kvmppc_vpa *vpa,
+ struct kvm_memory_slot *memslot,
+ unsigned long *map);
+extern unsigned long kvmppc_filter_lpcr_hv(struct kvm *kvm,
+ unsigned long lpcr);
extern void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr,
unsigned long mask);
extern void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr);
+extern int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu);
+extern int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu);
+extern void kvmhv_emulate_tm_rollback(struct kvm_vcpu *vcpu);
+
extern void kvmppc_entry_trampoline(void);
extern void kvmppc_hv_entry_trampoline(void);
extern u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst);
@@ -192,10 +277,112 @@ extern int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd);
extern void kvmppc_pr_init_default_hcalls(struct kvm *kvm);
extern int kvmppc_hcall_impl_pr(unsigned long cmd);
extern int kvmppc_hcall_impl_hv_realmode(unsigned long cmd);
-extern void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu,
- struct kvm_vcpu *vcpu);
-extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
- struct kvmppc_book3s_shadow_vcpu *svcpu);
+extern void kvmppc_copy_to_svcpu(struct kvm_vcpu *vcpu);
+extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu);
+
+long kvmppc_read_intr(void);
+void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr);
+void kvmppc_inject_interrupt_hv(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags);
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+void kvmppc_save_tm_pr(struct kvm_vcpu *vcpu);
+void kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu);
+void kvmppc_save_tm_sprs(struct kvm_vcpu *vcpu);
+void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu);
+#else
+static inline void kvmppc_save_tm_pr(struct kvm_vcpu *vcpu) {}
+static inline void kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu) {}
+static inline void kvmppc_save_tm_sprs(struct kvm_vcpu *vcpu) {}
+static inline void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu) {}
+#endif
+
+extern unsigned long nested_capabilities;
+long kvmhv_nested_init(void);
+void kvmhv_nested_exit(void);
+void kvmhv_vm_nested_init(struct kvm *kvm);
+long kvmhv_set_partition_table(struct kvm_vcpu *vcpu);
+long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu);
+void kvmhv_flush_lpid(u64 lpid);
+void kvmhv_set_ptbl_entry(u64 lpid, u64 dw0, u64 dw1);
+void kvmhv_release_all_nested(struct kvm *kvm);
+long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu);
+long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu);
+long do_h_rpt_invalidate_pat(struct kvm_vcpu *vcpu, unsigned long lpid,
+ unsigned long type, unsigned long pg_sizes,
+ unsigned long start, unsigned long end);
+int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu,
+ u64 time_limit, unsigned long lpcr);
+void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr);
+void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu,
+ struct hv_guest_state *hr);
+long int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu);
+
+void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac);
+
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+
+extern struct static_key_false __kvmhv_is_nestedv2;
+
+static inline bool kvmhv_is_nestedv2(void)
+{
+ return static_branch_unlikely(&__kvmhv_is_nestedv2);
+}
+
+static inline bool kvmhv_is_nestedv1(void)
+{
+ return !static_branch_likely(&__kvmhv_is_nestedv2);
+}
+
+#else
+
+static inline bool kvmhv_is_nestedv2(void)
+{
+ return false;
+}
+
+static inline bool kvmhv_is_nestedv1(void)
+{
+ return false;
+}
+
+#endif
+
+int __kvmhv_nestedv2_reload_ptregs(struct kvm_vcpu *vcpu, struct pt_regs *regs);
+int __kvmhv_nestedv2_mark_dirty_ptregs(struct kvm_vcpu *vcpu, struct pt_regs *regs);
+int __kvmhv_nestedv2_mark_dirty(struct kvm_vcpu *vcpu, u16 iden);
+int __kvmhv_nestedv2_cached_reload(struct kvm_vcpu *vcpu, u16 iden);
+
+static inline int kvmhv_nestedv2_reload_ptregs(struct kvm_vcpu *vcpu,
+ struct pt_regs *regs)
+{
+ if (kvmhv_is_nestedv2())
+ return __kvmhv_nestedv2_reload_ptregs(vcpu, regs);
+ return 0;
+}
+static inline int kvmhv_nestedv2_mark_dirty_ptregs(struct kvm_vcpu *vcpu,
+ struct pt_regs *regs)
+{
+ if (kvmhv_is_nestedv2())
+ return __kvmhv_nestedv2_mark_dirty_ptregs(vcpu, regs);
+ return 0;
+}
+
+static inline int kvmhv_nestedv2_mark_dirty(struct kvm_vcpu *vcpu, u16 iden)
+{
+ if (kvmhv_is_nestedv2())
+ return __kvmhv_nestedv2_mark_dirty(vcpu, iden);
+ return 0;
+}
+
+static inline int kvmhv_nestedv2_cached_reload(struct kvm_vcpu *vcpu, u16 iden)
+{
+ if (kvmhv_is_nestedv2())
+ return __kvmhv_nestedv2_cached_reload(vcpu, iden);
+ return 0;
+}
+
+extern int kvm_irq_bypass;
static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu)
{
@@ -213,62 +400,74 @@ static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu)
static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
{
- vcpu->arch.gpr[num] = val;
+ vcpu->arch.regs.gpr[num] = val;
+ kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_GPR(num));
}
static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
{
- return vcpu->arch.gpr[num];
+ WARN_ON(kvmhv_nestedv2_cached_reload(vcpu, KVMPPC_GSID_GPR(num)) < 0);
+ return vcpu->arch.regs.gpr[num];
}
static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
{
- vcpu->arch.cr = val;
+ vcpu->arch.regs.ccr = val;
+ kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_CR);
}
static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
{
- return vcpu->arch.cr;
+ WARN_ON(kvmhv_nestedv2_cached_reload(vcpu, KVMPPC_GSID_CR) < 0);
+ return vcpu->arch.regs.ccr;
}
-static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val)
+static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val)
{
- vcpu->arch.xer = val;
+ vcpu->arch.regs.xer = val;
+ kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_XER);
}
-static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
+static inline ulong kvmppc_get_xer(struct kvm_vcpu *vcpu)
{
- return vcpu->arch.xer;
+ WARN_ON(kvmhv_nestedv2_cached_reload(vcpu, KVMPPC_GSID_XER) < 0);
+ return vcpu->arch.regs.xer;
}
static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val)
{
- vcpu->arch.ctr = val;
+ vcpu->arch.regs.ctr = val;
+ kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_CTR);
}
static inline ulong kvmppc_get_ctr(struct kvm_vcpu *vcpu)
{
- return vcpu->arch.ctr;
+ WARN_ON(kvmhv_nestedv2_cached_reload(vcpu, KVMPPC_GSID_CTR) < 0);
+ return vcpu->arch.regs.ctr;
}
static inline void kvmppc_set_lr(struct kvm_vcpu *vcpu, ulong val)
{
- vcpu->arch.lr = val;
+ vcpu->arch.regs.link = val;
+ kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_LR);
}
static inline ulong kvmppc_get_lr(struct kvm_vcpu *vcpu)
{
- return vcpu->arch.lr;
+ WARN_ON(kvmhv_nestedv2_cached_reload(vcpu, KVMPPC_GSID_LR) < 0);
+ return vcpu->arch.regs.link;
}
static inline void kvmppc_set_pc(struct kvm_vcpu *vcpu, ulong val)
{
- vcpu->arch.pc = val;
+ vcpu->arch.regs.nip = val;
+ kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_NIA);
}
static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu)
{
- return vcpu->arch.pc;
+ WARN_ON(kvmhv_nestedv2_cached_reload(vcpu, KVMPPC_GSID_NIA) < 0);
+ return vcpu->arch.regs.nip;
}
static inline u64 kvmppc_get_msr(struct kvm_vcpu *vcpu);
@@ -282,6 +481,147 @@ static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
return vcpu->arch.fault_dar;
}
+static inline u64 kvmppc_get_fpr(struct kvm_vcpu *vcpu, int i)
+{
+ WARN_ON(kvmhv_nestedv2_cached_reload(vcpu, KVMPPC_GSID_VSRS(i)) < 0);
+ return vcpu->arch.fp.fpr[i][TS_FPROFFSET];
+}
+
+static inline void kvmppc_set_fpr(struct kvm_vcpu *vcpu, int i, u64 val)
+{
+ vcpu->arch.fp.fpr[i][TS_FPROFFSET] = val;
+ kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_VSRS(i));
+}
+
+static inline u64 kvmppc_get_fpscr(struct kvm_vcpu *vcpu)
+{
+ WARN_ON(kvmhv_nestedv2_cached_reload(vcpu, KVMPPC_GSID_FPSCR) < 0);
+ return vcpu->arch.fp.fpscr;
+}
+
+static inline void kvmppc_set_fpscr(struct kvm_vcpu *vcpu, u64 val)
+{
+ vcpu->arch.fp.fpscr = val;
+ kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_FPSCR);
+}
+
+
+static inline u64 kvmppc_get_vsx_fpr(struct kvm_vcpu *vcpu, int i, int j)
+{
+ WARN_ON(kvmhv_nestedv2_cached_reload(vcpu, KVMPPC_GSID_VSRS(i)) < 0);
+ return vcpu->arch.fp.fpr[i][j];
+}
+
+static inline void kvmppc_set_vsx_fpr(struct kvm_vcpu *vcpu, int i, int j,
+ u64 val)
+{
+ vcpu->arch.fp.fpr[i][j] = val;
+ kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_VSRS(i));
+}
+
+#ifdef CONFIG_ALTIVEC
+static inline void kvmppc_get_vsx_vr(struct kvm_vcpu *vcpu, int i, vector128 *v)
+{
+ WARN_ON(kvmhv_nestedv2_cached_reload(vcpu, KVMPPC_GSID_VSRS(32 + i)) < 0);
+ *v = vcpu->arch.vr.vr[i];
+}
+
+static inline void kvmppc_set_vsx_vr(struct kvm_vcpu *vcpu, int i,
+ vector128 *val)
+{
+ vcpu->arch.vr.vr[i] = *val;
+ kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_VSRS(32 + i));
+}
+
+static inline u32 kvmppc_get_vscr(struct kvm_vcpu *vcpu)
+{
+ WARN_ON(kvmhv_nestedv2_cached_reload(vcpu, KVMPPC_GSID_VSCR) < 0);
+ return vcpu->arch.vr.vscr.u[3];
+}
+
+static inline void kvmppc_set_vscr(struct kvm_vcpu *vcpu, u32 val)
+{
+ vcpu->arch.vr.vscr.u[3] = val;
+ kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_VSCR);
+}
+#endif
+
+#define KVMPPC_BOOK3S_VCPU_ACCESSOR_SET(reg, size, iden) \
+static inline void kvmppc_set_##reg(struct kvm_vcpu *vcpu, u##size val) \
+{ \
+ \
+ vcpu->arch.reg = val; \
+ kvmhv_nestedv2_mark_dirty(vcpu, iden); \
+}
+
+#define KVMPPC_BOOK3S_VCPU_ACCESSOR_GET(reg, size, iden) \
+static inline u##size kvmppc_get_##reg(struct kvm_vcpu *vcpu) \
+{ \
+ WARN_ON(kvmhv_nestedv2_cached_reload(vcpu, iden) < 0); \
+ return vcpu->arch.reg; \
+}
+
+#define KVMPPC_BOOK3S_VCPU_ACCESSOR(reg, size, iden) \
+ KVMPPC_BOOK3S_VCPU_ACCESSOR_SET(reg, size, iden) \
+ KVMPPC_BOOK3S_VCPU_ACCESSOR_GET(reg, size, iden) \
+
+KVMPPC_BOOK3S_VCPU_ACCESSOR(pid, 32, KVMPPC_GSID_PIDR)
+KVMPPC_BOOK3S_VCPU_ACCESSOR(tar, 64, KVMPPC_GSID_TAR)
+KVMPPC_BOOK3S_VCPU_ACCESSOR(ebbhr, 64, KVMPPC_GSID_EBBHR)
+KVMPPC_BOOK3S_VCPU_ACCESSOR(ebbrr, 64, KVMPPC_GSID_EBBRR)
+KVMPPC_BOOK3S_VCPU_ACCESSOR(bescr, 64, KVMPPC_GSID_BESCR)
+KVMPPC_BOOK3S_VCPU_ACCESSOR(ic, 64, KVMPPC_GSID_IC)
+KVMPPC_BOOK3S_VCPU_ACCESSOR(vrsave, 64, KVMPPC_GSID_VRSAVE)
+
+
+#define KVMPPC_BOOK3S_VCORE_ACCESSOR_SET(reg, size, iden) \
+static inline void kvmppc_set_##reg(struct kvm_vcpu *vcpu, u##size val) \
+{ \
+ vcpu->arch.vcore->reg = val; \
+ kvmhv_nestedv2_mark_dirty(vcpu, iden); \
+}
+
+#define KVMPPC_BOOK3S_VCORE_ACCESSOR_GET(reg, size, iden) \
+static inline u##size kvmppc_get_##reg(struct kvm_vcpu *vcpu) \
+{ \
+ WARN_ON(kvmhv_nestedv2_cached_reload(vcpu, iden) < 0); \
+ return vcpu->arch.vcore->reg; \
+}
+
+#define KVMPPC_BOOK3S_VCORE_ACCESSOR(reg, size, iden) \
+ KVMPPC_BOOK3S_VCORE_ACCESSOR_SET(reg, size, iden) \
+ KVMPPC_BOOK3S_VCORE_ACCESSOR_GET(reg, size, iden) \
+
+
+KVMPPC_BOOK3S_VCORE_ACCESSOR(vtb, 64, KVMPPC_GSID_VTB)
+KVMPPC_BOOK3S_VCORE_ACCESSOR(dpdes, 64, KVMPPC_GSID_DPDES)
+KVMPPC_BOOK3S_VCORE_ACCESSOR_GET(arch_compat, 32, KVMPPC_GSID_LOGICAL_PVR)
+KVMPPC_BOOK3S_VCORE_ACCESSOR_GET(lpcr, 64, KVMPPC_GSID_LPCR)
+KVMPPC_BOOK3S_VCORE_ACCESSOR_SET(tb_offset, 64, KVMPPC_GSID_TB_OFFSET)
+
+static inline u64 kvmppc_get_tb_offset(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.vcore->tb_offset;
+}
+
+static inline u64 kvmppc_get_dec_expires(struct kvm_vcpu *vcpu)
+{
+ WARN_ON(kvmhv_nestedv2_cached_reload(vcpu, KVMPPC_GSID_DEC_EXPIRY_TB) < 0);
+ return vcpu->arch.dec_expires;
+}
+
+static inline void kvmppc_set_dec_expires(struct kvm_vcpu *vcpu, u64 val)
+{
+ vcpu->arch.dec_expires = val;
+ kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_DEC_EXPIRY_TB);
+}
+
+/* Expiry time of vcpu DEC relative to host TB */
+static inline u64 kvmppc_dec_expires_host_tb(struct kvm_vcpu *vcpu)
+{
+ return kvmppc_get_dec_expires(vcpu) - kvmppc_get_tb_offset(vcpu);
+}
+
static inline bool is_kvmppc_resume_guest(int r)
{
return (r == RESUME_GUEST || r == RESUME_GUEST_NV);
@@ -294,6 +634,9 @@ static inline bool kvmppc_supports_magic_page(struct kvm_vcpu *vcpu)
return !is_kvmppc_hv_enabled(vcpu->kvm);
}
+extern int kvmppc_h_logical_ci_load(struct kvm_vcpu *vcpu);
+extern int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu);
+
/* Magic register values loaded into r3 and r4 before the 'sc' assembly
* instruction for the OSI hypercalls */
#define OSI_SC_MAGIC_R3 0x113724FA
@@ -303,10 +646,54 @@ static inline bool kvmppc_supports_magic_page(struct kvm_vcpu *vcpu)
/* TO = 31 for unconditional trap */
#define INS_TW 0x7fe00008
-/* LPIDs we support with this build -- runtime limit may be lower */
-#define KVMPPC_NR_LPIDS (LPID_RSVD + 1)
-
#define SPLIT_HACK_MASK 0xff000000
#define SPLIT_HACK_OFFS 0xfb000000
+/*
+ * This packs a VCPU ID from the [0..KVM_MAX_VCPU_IDS) space down to the
+ * [0..KVM_MAX_VCPUS) space, using knowledge of the guest's core stride
+ * (but not its actual threading mode, which is not available) to avoid
+ * collisions.
+ *
+ * The implementation leaves VCPU IDs from the range [0..KVM_MAX_VCPUS) (block
+ * 0) unchanged: if the guest is filling each VCORE completely then it will be
+ * using consecutive IDs and it will fill the space without any packing.
+ *
+ * For higher VCPU IDs, the packed ID is based on the VCPU ID modulo
+ * KVM_MAX_VCPUS (effectively masking off the top bits) and then an offset is
+ * added to avoid collisions.
+ *
+ * VCPU IDs in the range [KVM_MAX_VCPUS..(KVM_MAX_VCPUS*2)) (block 1) are only
+ * possible if the guest is leaving at least 1/2 of each VCORE empty, so IDs
+ * can be safely packed into the second half of each VCORE by adding an offset
+ * of (stride / 2).
+ *
+ * Similarly, if VCPU IDs in the range [(KVM_MAX_VCPUS*2)..(KVM_MAX_VCPUS*4))
+ * (blocks 2 and 3) are seen, the guest must be leaving at least 3/4 of each
+ * VCORE empty so packed IDs can be offset by (stride / 4) and (stride * 3 / 4).
+ *
+ * Finally, VCPU IDs from blocks 5..7 will only be seen if the guest is using a
+ * stride of 8 and 1 thread per core so the remaining offsets of 1, 5, 3 and 7
+ * must be free to use.
+ *
+ * (The offsets for each block are stored in block_offsets[], indexed by the
+ * block number if the stride is 8. For cases where the guest's stride is less
+ * than 8, we can re-use the block_offsets array by multiplying the block
+ * number by (MAX_SMT_THREADS / stride) to reach the correct entry.)
+ */
+static inline u32 kvmppc_pack_vcpu_id(struct kvm *kvm, u32 id)
+{
+ const int block_offsets[MAX_SMT_THREADS] = {0, 4, 2, 6, 1, 5, 3, 7};
+ int stride = kvm->arch.emul_smt_mode;
+ int block = (id / KVM_MAX_VCPUS) * (MAX_SMT_THREADS / stride);
+ u32 packed_id;
+
+ if (WARN_ONCE(block >= MAX_SMT_THREADS, "VCPU ID too large to pack"))
+ return 0;
+ packed_id = (id % KVM_MAX_VCPUS) + block_offsets[block];
+ if (WARN_ONCE(packed_id >= KVM_MAX_VCPUS, "VCPU ID packing failed"))
+ return 0;
+ return packed_id;
+}
+
#endif /* __ASM_KVM_BOOK3S_H__ */
diff --git a/arch/powerpc/include/asm/kvm_book3s_32.h b/arch/powerpc/include/asm/kvm_book3s_32.h
index c720e0b3238d..e9d2e8463105 100644
--- a/arch/powerpc/include/asm/kvm_book3s_32.h
+++ b/arch/powerpc/include/asm/kvm_book3s_32.h
@@ -1,16 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright SUSE Linux Products GmbH 2010
*
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 0aa817933e6a..b936e174eefd 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -1,16 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright SUSE Linux Products GmbH 2010
*
@@ -20,6 +9,105 @@
#ifndef __ASM_KVM_BOOK3S_64_H__
#define __ASM_KVM_BOOK3S_64_H__
+#include <linux/string.h>
+#include <asm/bitops.h>
+#include <asm/book3s/64/mmu-hash.h>
+#include <asm/cpu_has_feature.h>
+#include <asm/ppc-opcode.h>
+#include <asm/pte-walk.h>
+
+/*
+ * Structure for a nested guest, that is, for a guest that is managed by
+ * one of our guests.
+ */
+struct kvm_nested_guest {
+ struct kvm *l1_host; /* L1 VM that owns this nested guest */
+ int l1_lpid; /* lpid L1 guest thinks this guest is */
+ int shadow_lpid; /* real lpid of this nested guest */
+ pgd_t *shadow_pgtable; /* our page table for this guest */
+ u64 l1_gr_to_hr; /* L1's addr of part'n-scoped table */
+ u64 process_table; /* process table entry for this guest */
+ long refcnt; /* number of pointers to this struct */
+ struct mutex tlb_lock; /* serialize page faults and tlbies */
+ struct kvm_nested_guest *next;
+ cpumask_t need_tlb_flush;
+ short prev_cpu[NR_CPUS];
+ u8 radix; /* is this nested guest radix */
+};
+
+/*
+ * We define a nested rmap entry as a single 64-bit quantity
+ * 0xFFF0000000000000 12-bit lpid field
+ * 0x000FFFFFFFFFF000 40-bit guest 4k page frame number
+ * 0x0000000000000001 1-bit single entry flag
+ */
+#define RMAP_NESTED_LPID_MASK 0xFFF0000000000000UL
+#define RMAP_NESTED_LPID_SHIFT (52)
+#define RMAP_NESTED_GPA_MASK 0x000FFFFFFFFFF000UL
+#define RMAP_NESTED_IS_SINGLE_ENTRY 0x0000000000000001UL
+
+/* Structure for a nested guest rmap entry */
+struct rmap_nested {
+ struct llist_node list;
+ u64 rmap;
+};
+
+/*
+ * for_each_nest_rmap_safe - iterate over the list of nested rmap entries
+ * safe against removal of the list entry or NULL list
+ * @pos: a (struct rmap_nested *) to use as a loop cursor
+ * @node: pointer to the first entry
+ * NOTE: this can be NULL
+ * @rmapp: an (unsigned long *) in which to return the rmap entries on each
+ * iteration
+ * NOTE: this must point to already allocated memory
+ *
+ * The nested_rmap is a llist of (struct rmap_nested) entries pointed to by the
+ * rmap entry in the memslot. The list is always terminated by a "single entry"
+ * stored in the list element of the final entry of the llist. If there is ONLY
+ * a single entry then this is itself in the rmap entry of the memslot, not a
+ * llist head pointer.
+ *
+ * Note that the iterator below assumes that a nested rmap entry is always
+ * non-zero. This is true for our usage because the LPID field is always
+ * non-zero (zero is reserved for the host).
+ *
+ * This should be used to iterate over the list of rmap_nested entries with
+ * processing done on the u64 rmap value given by each iteration. This is safe
+ * against removal of list entries and it is always safe to call free on (pos).
+ *
+ * e.g.
+ * struct rmap_nested *cursor;
+ * struct llist_node *first;
+ * unsigned long rmap;
+ * for_each_nest_rmap_safe(cursor, first, &rmap) {
+ * do_something(rmap);
+ * free(cursor);
+ * }
+ */
+#define for_each_nest_rmap_safe(pos, node, rmapp) \
+ for ((pos) = llist_entry((node), typeof(*(pos)), list); \
+ (node) && \
+ (*(rmapp) = ((RMAP_NESTED_IS_SINGLE_ENTRY & ((u64) (node))) ? \
+ ((u64) (node)) : ((pos)->rmap))) && \
+ (((node) = ((RMAP_NESTED_IS_SINGLE_ENTRY & ((u64) (node))) ? \
+ ((struct llist_node *) ((pos) = NULL)) : \
+ (pos)->list.next)), true); \
+ (pos) = llist_entry((node), typeof(*(pos)), list))
+
+struct kvm_nested_guest *kvmhv_get_nested(struct kvm *kvm, int l1_lpid,
+ bool create);
+void kvmhv_put_nested(struct kvm_nested_guest *gp);
+int kvmhv_nested_next_lpid(struct kvm *kvm, int lpid);
+
+/* Encoding of first parameter for H_TLB_INVALIDATE */
+#define H_TLBIE_P1_ENC(ric, prs, r) (___PPC_RIC(ric) | ___PPC_PRS(prs) | \
+ ___PPC_R(r))
+
+/* Power architecture requires HPT is at least 256kiB, at most 64TiB */
+#define PPC_MIN_HPT_ORDER 18
+#define PPC_MAX_HPT_ORDER 46
+
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
static inline struct kvmppc_book3s_shadow_vcpu *svcpu_get(struct kvm_vcpu *vcpu)
{
@@ -33,14 +121,37 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
}
#endif
-#define SPAPR_TCE_SHIFT 12
-
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+
+static inline bool kvm_is_radix(struct kvm *kvm)
+{
+ return kvm->arch.radix;
+}
+
+static inline bool kvmhv_vcpu_is_radix(struct kvm_vcpu *vcpu)
+{
+ bool radix;
+
+ if (vcpu->arch.nested)
+ radix = vcpu->arch.nested->radix;
+ else
+ radix = kvm_is_radix(vcpu->kvm);
+
+ return radix;
+}
+
+unsigned long kvmppc_msr_hard_disable_set_facilities(struct kvm_vcpu *vcpu, unsigned long msr);
+
+int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr, u64 *tb);
+
#define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */
-extern unsigned long kvm_rma_pages;
#endif
-#define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */
+/*
+ * Invalid HDSISR value which is used to indicate when HW has not set the reg.
+ * Used to work around an errata.
+ */
+#define HDSISR_CANARY 0x7fff
/*
* We use a lock bit in HPTE dword 0 to synchronize updates and
@@ -86,32 +197,100 @@ static inline long try_lock_hpte(__be64 *hpte, unsigned long bits)
return old == 0;
}
-static inline int __hpte_actual_psize(unsigned int lp, int psize)
+static inline void unlock_hpte(__be64 *hpte, unsigned long hpte_v)
{
- int i, shift;
- unsigned int mask;
+ hpte_v &= ~HPTE_V_HVLOCK;
+ asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
+ hpte[0] = cpu_to_be64(hpte_v);
+}
- /* start from 1 ignoring MMU_PAGE_4K */
- for (i = 1; i < MMU_PAGE_COUNT; i++) {
+/* Without barrier */
+static inline void __unlock_hpte(__be64 *hpte, unsigned long hpte_v)
+{
+ hpte_v &= ~HPTE_V_HVLOCK;
+ hpte[0] = cpu_to_be64(hpte_v);
+}
- /* invalid penc */
- if (mmu_psize_defs[psize].penc[i] == -1)
- continue;
- /*
- * encoding bits per actual page size
- * PTE LP actual page size
- * rrrr rrrz >=8KB
- * rrrr rrzz >=16KB
- * rrrr rzzz >=32KB
- * rrrr zzzz >=64KB
- * .......
- */
- shift = mmu_psize_defs[i].shift - LP_SHIFT;
- if (shift > LP_BITS)
- shift = LP_BITS;
- mask = (1 << shift) - 1;
- if ((lp & mask) == mmu_psize_defs[psize].penc[i])
- return i;
+/*
+ * These functions encode knowledge of the POWER7/8/9 hardware
+ * interpretations of the HPTE LP (large page size) field.
+ */
+static inline int kvmppc_hpte_page_shifts(unsigned long h, unsigned long l)
+{
+ unsigned int lphi;
+
+ if (!(h & HPTE_V_LARGE))
+ return 12; /* 4kB */
+ lphi = (l >> 16) & 0xf;
+ switch ((l >> 12) & 0xf) {
+ case 0:
+ return !lphi ? 24 : 0; /* 16MB */
+ break;
+ case 1:
+ return 16; /* 64kB */
+ break;
+ case 3:
+ return !lphi ? 34 : 0; /* 16GB */
+ break;
+ case 7:
+ return (16 << 8) + 12; /* 64kB in 4kB */
+ break;
+ case 8:
+ if (!lphi)
+ return (24 << 8) + 16; /* 16MB in 64kkB */
+ if (lphi == 3)
+ return (24 << 8) + 12; /* 16MB in 4kB */
+ break;
+ }
+ return 0;
+}
+
+static inline int kvmppc_hpte_base_page_shift(unsigned long h, unsigned long l)
+{
+ return kvmppc_hpte_page_shifts(h, l) & 0xff;
+}
+
+static inline int kvmppc_hpte_actual_page_shift(unsigned long h, unsigned long l)
+{
+ int tmp = kvmppc_hpte_page_shifts(h, l);
+
+ if (tmp >= 0x100)
+ tmp >>= 8;
+ return tmp;
+}
+
+static inline unsigned long kvmppc_actual_pgsz(unsigned long v, unsigned long r)
+{
+ int shift = kvmppc_hpte_actual_page_shift(v, r);
+
+ if (shift)
+ return 1ul << shift;
+ return 0;
+}
+
+static inline int kvmppc_pgsize_lp_encoding(int base_shift, int actual_shift)
+{
+ switch (base_shift) {
+ case 12:
+ switch (actual_shift) {
+ case 12:
+ return 0;
+ case 16:
+ return 7;
+ case 24:
+ return 0x38;
+ }
+ break;
+ case 16:
+ switch (actual_shift) {
+ case 16:
+ return 1;
+ case 24:
+ return 8;
+ }
+ break;
+ case 24:
+ return 0;
}
return -1;
}
@@ -119,23 +298,15 @@ static inline int __hpte_actual_psize(unsigned int lp, int psize)
static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
unsigned long pte_index)
{
- int b_psize = MMU_PAGE_4K, a_psize = MMU_PAGE_4K;
- unsigned int penc;
+ int a_pgshift, b_pgshift;
unsigned long rb = 0, va_low, sllp;
- unsigned int lp = (r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
-
- if (v & HPTE_V_LARGE) {
- for (b_psize = 0; b_psize < MMU_PAGE_COUNT; b_psize++) {
-
- /* valid entries have a shift value */
- if (!mmu_psize_defs[b_psize].shift)
- continue;
- a_psize = __hpte_actual_psize(lp, b_psize);
- if (a_psize != -1)
- break;
- }
+ b_pgshift = a_pgshift = kvmppc_hpte_page_shifts(v, r);
+ if (a_pgshift >= 0x100) {
+ b_pgshift &= 0xff;
+ a_pgshift >>= 8;
}
+
/*
* Ignore the top 14 bits of va
* v have top two bits covering segment size, hence move
@@ -148,7 +319,6 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
/* This covers 14..54 bits of va*/
rb = (v & ~0x7fUL) << 16; /* AVA field */
- rb |= v >> (62 - 8); /* B field */
/*
* AVA in v had cleared lower 23 bits. We need to derive
* that from pteg index
@@ -168,82 +338,42 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
va_low ^= v >> (SID_SHIFT_1T - 16);
va_low &= 0x7ff;
- switch (b_psize) {
- case MMU_PAGE_4K:
- sllp = ((mmu_psize_defs[a_psize].sllp & SLB_VSID_L) >> 6) |
- ((mmu_psize_defs[a_psize].sllp & SLB_VSID_LP) >> 4);
- rb |= sllp << 5; /* AP field */
+ if (b_pgshift <= 12) {
+ if (a_pgshift > 12) {
+ sllp = (a_pgshift == 16) ? 5 : 4;
+ rb |= sllp << 5; /* AP field */
+ }
rb |= (va_low & 0x7ff) << 12; /* remaining 11 bits of AVA */
- break;
- default:
- {
+ } else {
int aval_shift;
/*
* remaining bits of AVA/LP fields
* Also contain the rr bits of LP
*/
- rb |= (va_low << mmu_psize_defs[b_psize].shift) & 0x7ff000;
+ rb |= (va_low << b_pgshift) & 0x7ff000;
/*
* Now clear not needed LP bits based on actual psize
*/
- rb &= ~((1ul << mmu_psize_defs[a_psize].shift) - 1);
+ rb &= ~((1ul << a_pgshift) - 1);
/*
* AVAL field 58..77 - base_page_shift bits of va
* we have space for 58..64 bits, Missing bits should
* be zero filled. +1 is to take care of L bit shift
*/
- aval_shift = 64 - (77 - mmu_psize_defs[b_psize].shift) + 1;
+ aval_shift = 64 - (77 - b_pgshift) + 1;
rb |= ((va_low << aval_shift) & 0xfe);
rb |= 1; /* L field */
- penc = mmu_psize_defs[b_psize].penc[a_psize];
- rb |= penc << 12; /* LP field */
- break;
- }
+ rb |= r & 0xff000 & ((1ul << a_pgshift) - 1); /* LP field */
}
- rb |= (v >> 54) & 0x300; /* B field */
+ /*
+ * This sets both bits of the B field in the PTE. 0b1x values are
+ * reserved, but those will have been filtered by kvmppc_do_h_enter.
+ */
+ rb |= (v >> HPTE_V_SSIZE_SHIFT) << 8; /* B field */
return rb;
}
-static inline unsigned long __hpte_page_size(unsigned long h, unsigned long l,
- bool is_base_size)
-{
-
- int size, a_psize;
- /* Look at the 8 bit LP value */
- unsigned int lp = (l >> LP_SHIFT) & ((1 << LP_BITS) - 1);
-
- /* only handle 4k, 64k and 16M pages for now */
- if (!(h & HPTE_V_LARGE))
- return 1ul << 12;
- else {
- for (size = 0; size < MMU_PAGE_COUNT; size++) {
- /* valid entries have a shift value */
- if (!mmu_psize_defs[size].shift)
- continue;
-
- a_psize = __hpte_actual_psize(lp, size);
- if (a_psize != -1) {
- if (is_base_size)
- return 1ul << mmu_psize_defs[size].shift;
- return 1ul << mmu_psize_defs[a_psize].shift;
- }
- }
-
- }
- return 0;
-}
-
-static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
-{
- return __hpte_page_size(h, l, 0);
-}
-
-static inline unsigned long hpte_base_page_size(unsigned long h, unsigned long l)
-{
- return __hpte_page_size(h, l, 1);
-}
-
static inline unsigned long hpte_rpn(unsigned long ptel, unsigned long psize)
{
return ((ptel & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
@@ -265,78 +395,65 @@ static inline unsigned long hpte_make_readonly(unsigned long ptel)
return ptel;
}
-static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long io_type)
+static inline bool hpte_cache_flags_ok(unsigned long hptel, bool is_ci)
{
- unsigned int wimg = ptel & HPTE_R_WIMG;
+ unsigned int wimg = hptel & HPTE_R_WIMG;
/* Handle SAO */
if (wimg == (HPTE_R_W | HPTE_R_I | HPTE_R_M) &&
cpu_has_feature(CPU_FTR_ARCH_206))
wimg = HPTE_R_M;
- if (!io_type)
+ if (!is_ci)
return wimg == HPTE_R_M;
-
- return (wimg & (HPTE_R_W | HPTE_R_I)) == io_type;
+ /*
+ * if host is mapped cache inhibited, make sure hptel also have
+ * cache inhibited.
+ */
+ if (wimg & HPTE_R_W) /* FIXME!! is this ok for all guest. ? */
+ return false;
+ return !!(wimg & HPTE_R_I);
}
/*
* If it's present and writable, atomically set dirty and referenced bits and
- * return the PTE, otherwise return 0. If we find a transparent hugepage
- * and if it is marked splitting we return 0;
+ * return the PTE, otherwise return 0.
*/
-static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing,
- unsigned int hugepage)
+static inline pte_t kvmppc_read_update_linux_pte(pte_t *ptep, int writing)
{
pte_t old_pte, new_pte = __pte(0);
while (1) {
- old_pte = pte_val(*ptep);
/*
- * wait until _PAGE_BUSY is clear then set it atomically
+ * Make sure we don't reload from ptep
*/
- if (unlikely(old_pte & _PAGE_BUSY)) {
+ old_pte = READ_ONCE(*ptep);
+ /*
+ * wait until H_PAGE_BUSY is clear then set it atomically
+ */
+ if (unlikely(pte_val(old_pte) & H_PAGE_BUSY)) {
cpu_relax();
continue;
}
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- /* If hugepage and is trans splitting return None */
- if (unlikely(hugepage &&
- pmd_trans_splitting(pte_pmd(old_pte))))
- return __pte(0);
-#endif
/* If pte is not present return None */
- if (unlikely(!(old_pte & _PAGE_PRESENT)))
+ if (unlikely(!pte_present(old_pte)))
return __pte(0);
new_pte = pte_mkyoung(old_pte);
if (writing && pte_write(old_pte))
new_pte = pte_mkdirty(new_pte);
- if (old_pte == __cmpxchg_u64((unsigned long *)ptep, old_pte,
- new_pte))
+ if (pte_xchg(ptep, old_pte, new_pte))
break;
}
return new_pte;
}
-
-/* Return HPTE cache control bits corresponding to Linux pte bits */
-static inline unsigned long hpte_cache_bits(unsigned long pte_val)
-{
-#if _PAGE_NO_CACHE == HPTE_R_I && _PAGE_WRITETHRU == HPTE_R_W
- return pte_val & (HPTE_R_W | HPTE_R_I);
-#else
- return ((pte_val & _PAGE_NO_CACHE) ? HPTE_R_I : 0) +
- ((pte_val & _PAGE_WRITETHRU) ? HPTE_R_W : 0);
-#endif
-}
-
static inline bool hpte_read_permission(unsigned long pp, unsigned long key)
{
if (key)
return PP_RWRX <= pp && pp <= PP_RXRX;
- return 1;
+ return true;
}
static inline bool hpte_write_permission(unsigned long pp, unsigned long key)
@@ -374,7 +491,7 @@ static inline bool slot_is_aligned(struct kvm_memory_slot *memslot,
unsigned long mask = (pagesize >> PAGE_SHIFT) - 1;
if (pagesize <= PAGE_SIZE)
- return 1;
+ return true;
return !(memslot->base_gfn & mask) && !(memslot->npages & mask);
}
@@ -420,9 +537,164 @@ static inline void note_hpte_modification(struct kvm *kvm,
*/
static inline struct kvm_memslots *kvm_memslots_raw(struct kvm *kvm)
{
- return rcu_dereference_raw_notrace(kvm->memslots);
+ return rcu_dereference_raw_check(kvm->memslots[0]);
+}
+
+extern void kvmppc_mmu_debugfs_init(struct kvm *kvm);
+extern void kvmhv_radix_debugfs_init(struct kvm *kvm);
+
+extern void kvmhv_rm_send_ipi(int cpu);
+
+static inline unsigned long kvmppc_hpt_npte(struct kvm_hpt_info *hpt)
+{
+ /* HPTEs are 2**4 bytes long */
+ return 1UL << (hpt->order - 4);
}
+static inline unsigned long kvmppc_hpt_mask(struct kvm_hpt_info *hpt)
+{
+ /* 128 (2**7) bytes in each HPTEG */
+ return (1UL << (hpt->order - 7)) - 1;
+}
+
+/* Set bits in a dirty bitmap, which is in LE format */
+static inline void set_dirty_bits(unsigned long *map, unsigned long i,
+ unsigned long npages)
+{
+
+ if (npages >= 8)
+ memset((char *)map + i / 8, 0xff, npages / 8);
+ else
+ for (; npages; ++i, --npages)
+ __set_bit_le(i, map);
+}
+
+static inline void set_dirty_bits_atomic(unsigned long *map, unsigned long i,
+ unsigned long npages)
+{
+ if (npages >= 8)
+ memset((char *)map + i / 8, 0xff, npages / 8);
+ else
+ for (; npages; ++i, --npages)
+ set_bit_le(i, map);
+}
+
+static inline u64 sanitize_msr(u64 msr)
+{
+ msr &= ~MSR_HV;
+ msr |= MSR_ME;
+ return msr;
+}
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+static inline void copy_from_checkpoint(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.regs.ccr = vcpu->arch.cr_tm;
+ vcpu->arch.regs.xer = vcpu->arch.xer_tm;
+ vcpu->arch.regs.link = vcpu->arch.lr_tm;
+ vcpu->arch.regs.ctr = vcpu->arch.ctr_tm;
+ vcpu->arch.amr = vcpu->arch.amr_tm;
+ vcpu->arch.ppr = vcpu->arch.ppr_tm;
+ vcpu->arch.dscr = vcpu->arch.dscr_tm;
+ vcpu->arch.tar = vcpu->arch.tar_tm;
+ memcpy(vcpu->arch.regs.gpr, vcpu->arch.gpr_tm,
+ sizeof(vcpu->arch.regs.gpr));
+ vcpu->arch.fp = vcpu->arch.fp_tm;
+ vcpu->arch.vr = vcpu->arch.vr_tm;
+ vcpu->arch.vrsave = vcpu->arch.vrsave_tm;
+}
+
+static inline void copy_to_checkpoint(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.cr_tm = vcpu->arch.regs.ccr;
+ vcpu->arch.xer_tm = vcpu->arch.regs.xer;
+ vcpu->arch.lr_tm = vcpu->arch.regs.link;
+ vcpu->arch.ctr_tm = vcpu->arch.regs.ctr;
+ vcpu->arch.amr_tm = vcpu->arch.amr;
+ vcpu->arch.ppr_tm = vcpu->arch.ppr;
+ vcpu->arch.dscr_tm = vcpu->arch.dscr;
+ vcpu->arch.tar_tm = vcpu->arch.tar;
+ memcpy(vcpu->arch.gpr_tm, vcpu->arch.regs.gpr,
+ sizeof(vcpu->arch.regs.gpr));
+ vcpu->arch.fp_tm = vcpu->arch.fp;
+ vcpu->arch.vr_tm = vcpu->arch.vr;
+ vcpu->arch.vrsave_tm = vcpu->arch.vrsave;
+}
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+
+extern int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte,
+ unsigned long gpa, unsigned int level,
+ unsigned long mmu_seq, u64 lpid,
+ unsigned long *rmapp, struct rmap_nested **n_rmap);
+extern void kvmhv_insert_nest_rmap(struct kvm *kvm, unsigned long *rmapp,
+ struct rmap_nested **n_rmap);
+extern void kvmhv_update_nest_rmap_rc_list(struct kvm *kvm, unsigned long *rmapp,
+ unsigned long clr, unsigned long set,
+ unsigned long hpa, unsigned long nbytes);
+extern void kvmhv_remove_nest_rmap_range(struct kvm *kvm,
+ const struct kvm_memory_slot *memslot,
+ unsigned long gpa, unsigned long hpa,
+ unsigned long nbytes);
+
+static inline pte_t *
+find_kvm_secondary_pte_unlocked(struct kvm *kvm, unsigned long ea,
+ unsigned *hshift)
+{
+ pte_t *pte;
+
+ pte = __find_linux_pte(kvm->arch.pgtable, ea, NULL, hshift);
+ return pte;
+}
+
+static inline pte_t *find_kvm_secondary_pte(struct kvm *kvm, unsigned long ea,
+ unsigned *hshift)
+{
+ pte_t *pte;
+
+ VM_WARN(!spin_is_locked(&kvm->mmu_lock),
+ "%s called with kvm mmu_lock not held \n", __func__);
+ pte = __find_linux_pte(kvm->arch.pgtable, ea, NULL, hshift);
+
+ return pte;
+}
+
+static inline pte_t *find_kvm_host_pte(struct kvm *kvm, unsigned long mmu_seq,
+ unsigned long ea, unsigned *hshift)
+{
+ pte_t *pte;
+
+ VM_WARN(!spin_is_locked(&kvm->mmu_lock),
+ "%s called with kvm mmu_lock not held \n", __func__);
+
+ if (mmu_invalidate_retry(kvm, mmu_seq))
+ return NULL;
+
+ pte = __find_linux_pte(kvm->mm->pgd, ea, NULL, hshift);
+
+ return pte;
+}
+
+extern pte_t *find_kvm_nested_guest_pte(struct kvm *kvm, unsigned long lpid,
+ unsigned long ea, unsigned *hshift);
+
+int kvmhv_nestedv2_vcpu_create(struct kvm_vcpu *vcpu, struct kvmhv_nestedv2_io *io);
+void kvmhv_nestedv2_vcpu_free(struct kvm_vcpu *vcpu, struct kvmhv_nestedv2_io *io);
+int kvmhv_nestedv2_flush_vcpu(struct kvm_vcpu *vcpu, u64 time_limit);
+int kvmhv_nestedv2_set_ptbl_entry(unsigned long lpid, u64 dw0, u64 dw1);
+int kvmhv_nestedv2_parse_output(struct kvm_vcpu *vcpu);
+int kvmhv_nestedv2_set_vpa(struct kvm_vcpu *vcpu, unsigned long vpa);
+
+int kvmhv_counters_tracepoint_regfunc(void);
+void kvmhv_counters_tracepoint_unregfunc(void);
+int kvmhv_get_l2_counters_status(void);
+void kvmhv_set_l2_counters_status(int cpu, bool status);
+u64 kvmhv_get_l1_to_l2_cs_time(void);
+u64 kvmhv_get_l2_to_l1_cs_time(void);
+u64 kvmhv_get_l2_runtime_agg(void);
+u64 kvmhv_get_l1_to_l2_cs_time_vcpu(void);
+u64 kvmhv_get_l2_to_l1_cs_time_vcpu(void);
+u64 kvmhv_get_l2_runtime_agg_vcpu(void);
+
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
#endif /* __ASM_KVM_BOOK3S_64_H__ */
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index 5bdfb5dd3400..3435fe144908 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -1,16 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright SUSE Linux Products GmbH 2009
*
@@ -25,7 +14,13 @@
#define XICS_MFRR 0xc
#define XICS_IPI 2 /* interrupt source # for IPIs */
-#ifdef __ASSEMBLY__
+/* Maximum number of threads per physical core */
+#define MAX_SMT_THREADS 8
+
+/* Maximum number of subcores per physical core */
+#define MAX_SUBCORES 4
+
+#ifdef __ASSEMBLER__
#ifdef CONFIG_KVM_BOOK3S_HANDLER
@@ -63,7 +58,20 @@ kvmppc_resume_\intno:
#endif /* CONFIG_KVM_BOOK3S_HANDLER */
-#else /*__ASSEMBLY__ */
+#else /*__ASSEMBLER__ */
+
+struct kvmppc_vcore;
+
+/* Struct used for coordinating micro-threading (split-core) mode changes */
+struct kvm_split_mode {
+ unsigned long rpr;
+ unsigned long pmmar;
+ unsigned long ldbar;
+ u8 subcore_size;
+ u8 do_nap;
+ u8 napped[MAX_SMT_THREADS];
+ struct kvmppc_vcore *vc[MAX_SUBCORES];
+};
/*
* This struct goes in the PACA on 64-bit processors. It is used
@@ -88,10 +96,13 @@ struct kvmppc_host_state {
u8 hwthread_req;
u8 hwthread_state;
u8 host_ipi;
- u8 ptid;
+ u8 ptid; /* thread number within subcore when split */
+ u8 fake_suspend;
struct kvm_vcpu *kvm_vcpu;
struct kvmppc_vcore *kvm_vcore;
- unsigned long xics_phys;
+ void __iomem *xics_phys;
+ void __iomem *xive_tima_phys;
+ void __iomem *xive_tima_virt;
u32 saved_xirr;
u64 dabr;
u64 host_mmcr[7]; /* MMCR 0,1,A, SIAR, SDAR, MMCR2, SIER */
@@ -100,6 +111,7 @@ struct kvmppc_host_state {
u64 host_spurr;
u64 host_dscr;
u64 dec_expires;
+ struct kvm_split_mode *kvm_split_mode;
#endif
#ifdef CONFIG_PPC_BOOK3S_64
u64 cfar;
@@ -112,7 +124,7 @@ struct kvmppc_book3s_shadow_vcpu {
bool in_use;
ulong gpr[14];
u32 cr;
- u32 xer;
+ ulong xer;
ulong ctr;
ulong lr;
ulong pc;
@@ -138,11 +150,11 @@ struct kvmppc_book3s_shadow_vcpu {
#endif
};
-#endif /*__ASSEMBLY__ */
+#endif /*__ASSEMBLER__ */
/* Values for kvm_state */
#define KVM_HWTHREAD_IN_KERNEL 0
-#define KVM_HWTHREAD_IN_NAP 1
+#define KVM_HWTHREAD_IN_IDLE 1
#define KVM_HWTHREAD_IN_KVM 2
#endif /* __ASM_KVM_BOOK3S_ASM_H__ */
diff --git a/arch/powerpc/include/asm/kvm_book3s_uvmem.h b/arch/powerpc/include/asm/kvm_book3s_uvmem.h
new file mode 100644
index 000000000000..0a6319448cb6
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_book3s_uvmem.h
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_KVM_BOOK3S_UVMEM_H__
+#define __ASM_KVM_BOOK3S_UVMEM_H__
+
+#ifdef CONFIG_PPC_UV
+int kvmppc_uvmem_init(void);
+void kvmppc_uvmem_free(void);
+bool kvmppc_uvmem_available(void);
+int kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot);
+void kvmppc_uvmem_slot_free(struct kvm *kvm,
+ const struct kvm_memory_slot *slot);
+unsigned long kvmppc_h_svm_page_in(struct kvm *kvm,
+ unsigned long gra,
+ unsigned long flags,
+ unsigned long page_shift);
+unsigned long kvmppc_h_svm_page_out(struct kvm *kvm,
+ unsigned long gra,
+ unsigned long flags,
+ unsigned long page_shift);
+unsigned long kvmppc_h_svm_init_start(struct kvm *kvm);
+unsigned long kvmppc_h_svm_init_done(struct kvm *kvm);
+int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn);
+unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm);
+void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
+ struct kvm *kvm, bool skip_page_out);
+int kvmppc_uvmem_memslot_create(struct kvm *kvm,
+ const struct kvm_memory_slot *new);
+void kvmppc_uvmem_memslot_delete(struct kvm *kvm,
+ const struct kvm_memory_slot *old);
+#else
+static inline int kvmppc_uvmem_init(void)
+{
+ return 0;
+}
+
+static inline void kvmppc_uvmem_free(void) { }
+
+static inline bool kvmppc_uvmem_available(void)
+{
+ return false;
+}
+
+static inline int
+kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot)
+{
+ return 0;
+}
+
+static inline void
+kvmppc_uvmem_slot_free(struct kvm *kvm, const struct kvm_memory_slot *slot) { }
+
+static inline unsigned long
+kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gra,
+ unsigned long flags, unsigned long page_shift)
+{
+ return H_UNSUPPORTED;
+}
+
+static inline unsigned long
+kvmppc_h_svm_page_out(struct kvm *kvm, unsigned long gra,
+ unsigned long flags, unsigned long page_shift)
+{
+ return H_UNSUPPORTED;
+}
+
+static inline unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
+{
+ return H_UNSUPPORTED;
+}
+
+static inline unsigned long kvmppc_h_svm_init_done(struct kvm *kvm)
+{
+ return H_UNSUPPORTED;
+}
+
+static inline unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm)
+{
+ return H_UNSUPPORTED;
+}
+
+static inline int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn)
+{
+ return -EFAULT;
+}
+
+static inline void
+kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
+ struct kvm *kvm, bool skip_page_out) { }
+
+static inline int kvmppc_uvmem_memslot_create(struct kvm *kvm,
+ const struct kvm_memory_slot *new)
+{
+ return H_UNSUPPORTED;
+}
+
+static inline void kvmppc_uvmem_memslot_delete(struct kvm *kvm,
+ const struct kvm_memory_slot *old) { }
+
+#endif /* CONFIG_PPC_UV */
+#endif /* __ASM_KVM_BOOK3S_UVMEM_H__ */
diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h
index f7aa5cc395c4..7c3291aa8922 100644
--- a/arch/powerpc/include/asm/kvm_booke.h
+++ b/arch/powerpc/include/asm/kvm_booke.h
@@ -1,16 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright SUSE Linux Products GmbH 2010
*
@@ -23,44 +12,45 @@
#include <linux/types.h>
#include <linux/kvm_host.h>
-/* LPIDs we support with this build -- runtime limit may be lower */
+/*
+ * Number of available lpids. Only the low-order 6 bits of LPID rgister are
+ * implemented on e500mc+ cores.
+ */
#define KVMPPC_NR_LPIDS 64
#define KVMPPC_INST_EHPRIV 0x7c00021c
#define EHPRIV_OC_SHIFT 11
/* "ehpriv 1" : ehpriv with OC = 1 is used for debug emulation */
#define EHPRIV_OC_DEBUG 1
-#define KVMPPC_INST_EHPRIV_DEBUG (KVMPPC_INST_EHPRIV | \
- (EHPRIV_OC_DEBUG << EHPRIV_OC_SHIFT))
static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
{
- vcpu->arch.gpr[num] = val;
+ vcpu->arch.regs.gpr[num] = val;
}
static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
{
- return vcpu->arch.gpr[num];
+ return vcpu->arch.regs.gpr[num];
}
static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
{
- vcpu->arch.cr = val;
+ vcpu->arch.regs.ccr = val;
}
static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
{
- return vcpu->arch.cr;
+ return vcpu->arch.regs.ccr;
}
-static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val)
+static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, ulong val)
{
- vcpu->arch.xer = val;
+ vcpu->arch.regs.xer = val;
}
-static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
+static inline ulong kvmppc_get_xer(struct kvm_vcpu *vcpu)
{
- return vcpu->arch.xer;
+ return vcpu->arch.regs.xer;
}
static inline bool kvmppc_need_byteswap(struct kvm_vcpu *vcpu)
@@ -71,38 +61,50 @@ static inline bool kvmppc_need_byteswap(struct kvm_vcpu *vcpu)
static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val)
{
- vcpu->arch.ctr = val;
+ vcpu->arch.regs.ctr = val;
}
static inline ulong kvmppc_get_ctr(struct kvm_vcpu *vcpu)
{
- return vcpu->arch.ctr;
+ return vcpu->arch.regs.ctr;
}
static inline void kvmppc_set_lr(struct kvm_vcpu *vcpu, ulong val)
{
- vcpu->arch.lr = val;
+ vcpu->arch.regs.link = val;
}
static inline ulong kvmppc_get_lr(struct kvm_vcpu *vcpu)
{
- return vcpu->arch.lr;
+ return vcpu->arch.regs.link;
}
static inline void kvmppc_set_pc(struct kvm_vcpu *vcpu, ulong val)
{
- vcpu->arch.pc = val;
+ vcpu->arch.regs.nip = val;
}
static inline ulong kvmppc_get_pc(struct kvm_vcpu *vcpu)
{
- return vcpu->arch.pc;
+ return vcpu->arch.regs.nip;
}
+static inline void kvmppc_set_fpr(struct kvm_vcpu *vcpu, int i, u64 val)
+{
+ vcpu->arch.fp.fpr[i][TS_FPROFFSET] = val;
+}
+
+static inline u64 kvmppc_get_fpr(struct kvm_vcpu *vcpu, int i)
+{
+ return vcpu->arch.fp.fpr[i][TS_FPROFFSET];
+}
+
+#ifdef CONFIG_BOOKE
static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
{
return vcpu->arch.fault_dear;
}
+#endif
static inline bool kvmppc_supports_magic_page(struct kvm_vcpu *vcpu)
{
diff --git a/arch/powerpc/include/asm/kvm_booke_hv_asm.h b/arch/powerpc/include/asm/kvm_booke_hv_asm.h
index e5f048bbcb7c..3acf2995d364 100644
--- a/arch/powerpc/include/asm/kvm_booke_hv_asm.h
+++ b/arch/powerpc/include/asm/kvm_booke_hv_asm.h
@@ -1,15 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright 2010-2011 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
*/
#ifndef ASM_KVM_BOOKE_HV_ASM_H
#define ASM_KVM_BOOKE_HV_ASM_H
-#ifdef __ASSEMBLY__
+#include <asm/feature-fixups.h>
+
+#ifdef __ASSEMBLER__
/*
* All exceptions from guest state must go through KVM
@@ -65,5 +64,5 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
#endif
.endm
-#endif /*__ASSEMBLY__ */
+#endif /*__ASSEMBLER__ */
#endif /* ASM_KVM_BOOKE_HV_ASM_H */
diff --git a/arch/powerpc/include/asm/kvm_fpu.h b/arch/powerpc/include/asm/kvm_fpu.h
index 92daae132492..25df316b7ebf 100644
--- a/arch/powerpc/include/asm/kvm_fpu.h
+++ b/arch/powerpc/include/asm/kvm_fpu.h
@@ -1,16 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright Novell Inc. 2010
*
diff --git a/arch/powerpc/include/asm/kvm_guest.h b/arch/powerpc/include/asm/kvm_guest.h
new file mode 100644
index 000000000000..68e499abdb24
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_guest.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 IBM Corporation
+ */
+
+#ifndef _ASM_POWERPC_KVM_GUEST_H_
+#define _ASM_POWERPC_KVM_GUEST_H_
+
+#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST)
+#include <linux/jump_label.h>
+
+DECLARE_STATIC_KEY_FALSE(kvm_guest);
+
+static inline bool is_kvm_guest(void)
+{
+ return static_branch_unlikely(&kvm_guest);
+}
+
+int __init check_kvm_guest(void);
+#else
+static inline bool is_kvm_guest(void) { return false; }
+static inline int check_kvm_guest(void) { return 0; }
+#endif
+
+#endif /* _ASM_POWERPC_KVM_GUEST_H_ */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 98d9dd50d063..2d139c807577 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -1,16 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright IBM Corp. 2007
*
@@ -35,31 +24,44 @@
#include <asm/page.h>
#include <asm/cacheflush.h>
#include <asm/hvcall.h>
+#include <asm/mce.h>
+#include <asm/guest-state-buffer.h>
+
+#define __KVM_HAVE_ARCH_VCPU_DEBUGFS
#define KVM_MAX_VCPUS NR_CPUS
#define KVM_MAX_VCORES NR_CPUS
-#define KVM_USER_MEM_SLOTS 32
-#define KVM_MEM_SLOTS_NUM KVM_USER_MEM_SLOTS
-#ifdef CONFIG_KVM_MMIO
-#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
-#endif
+#include <asm/cputhreads.h>
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+#include <asm/kvm_book3s_asm.h> /* for MAX_SMT_THREADS */
+#define KVM_MAX_VCPU_IDS (MAX_SMT_THREADS * KVM_MAX_VCORES)
+
+/*
+ * Limit the nested partition table to 4096 entries (because that's what
+ * hardware supports). Both guest and host use this value.
+ */
+#define KVM_MAX_NESTED_GUESTS_SHIFT 12
+
+#else
+#define KVM_MAX_VCPU_IDS KVM_MAX_VCPUS
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
+
+#define __KVM_HAVE_ARCH_INTC_INITIALIZED
+
+#define KVM_HALT_POLL_NS_DEFAULT 10000 /* 10 us */
/* These values are internal and can be increased later */
#define KVM_NR_IRQCHIPS 1
#define KVM_IRQCHIP_NUM_PINS 256
-#include <linux/mmu_notifier.h>
+/* PPC-specific vcpu->requests bit members */
+#define KVM_REQ_WATCHDOG KVM_ARCH_REQ(0)
+#define KVM_REQ_EPR_EXIT KVM_ARCH_REQ(1)
+#define KVM_REQ_PENDING_TIMER KVM_ARCH_REQ(2)
-#define KVM_ARCH_WANT_MMU_NOTIFIER
-
-struct kvm;
-extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
-extern int kvm_unmap_hva_range(struct kvm *kvm,
- unsigned long start, unsigned long end);
-extern int kvm_age_hva(struct kvm *kvm, unsigned long hva);
-extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
-extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
+#include <linux/mmu_notifier.h>
#define HPTEG_CACHE_NUM (1 << 15)
#define HPTEG_HASH_BITS_PTE 13
@@ -76,51 +78,54 @@ extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
/* Physical Address Mask - allowed range of real mode RAM access */
#define KVM_PAM 0x0fffffffffffffffULL
-struct kvm;
-struct kvm_run;
-struct kvm_vcpu;
-
struct lppaca;
struct slb_shadow;
struct dtl_entry;
struct kvmppc_vcpu_book3s;
struct kvmppc_book3s_shadow_vcpu;
+struct kvm_nested_guest;
struct kvm_vm_stat {
- u32 remote_tlb_flush;
+ struct kvm_vm_stat_generic generic;
+ u64 num_2M_pages;
+ u64 num_1G_pages;
};
struct kvm_vcpu_stat {
- u32 sum_exits;
- u32 mmio_exits;
- u32 signal_exits;
- u32 light_exits;
+ struct kvm_vcpu_stat_generic generic;
+ u64 sum_exits;
+ u64 mmio_exits;
+ u64 signal_exits;
+ u64 light_exits;
/* Account for special types of light exits: */
- u32 itlb_real_miss_exits;
- u32 itlb_virt_miss_exits;
- u32 dtlb_real_miss_exits;
- u32 dtlb_virt_miss_exits;
- u32 syscall_exits;
- u32 isi_exits;
- u32 dsi_exits;
- u32 emulated_inst_exits;
- u32 dec_exits;
- u32 ext_intr_exits;
- u32 halt_wakeup;
- u32 dbell_exits;
- u32 gdbell_exits;
- u32 ld;
- u32 st;
+ u64 itlb_real_miss_exits;
+ u64 itlb_virt_miss_exits;
+ u64 dtlb_real_miss_exits;
+ u64 dtlb_virt_miss_exits;
+ u64 syscall_exits;
+ u64 isi_exits;
+ u64 dsi_exits;
+ u64 emulated_inst_exits;
+ u64 dec_exits;
+ u64 ext_intr_exits;
+ u64 halt_successful_wait;
+ u64 dbell_exits;
+ u64 gdbell_exits;
+ u64 ld;
+ u64 st;
#ifdef CONFIG_PPC_BOOK3S
- u32 pf_storage;
- u32 pf_instruc;
- u32 sp_storage;
- u32 sp_instruc;
- u32 queue_intr;
- u32 ld_slow;
- u32 st_slow;
+ u64 pf_storage;
+ u64 pf_instruc;
+ u64 sp_storage;
+ u64 sp_instruc;
+ u64 queue_intr;
+ u64 ld_slow;
+ u64 st_slow;
#endif
+ u64 pthru_all;
+ u64 pthru_host;
+ u64 pthru_bad_aff;
};
enum kvm_exit_types {
@@ -144,6 +149,7 @@ enum kvm_exit_types {
EMULATED_TLBWE_EXITS,
EMULATED_RFI_EXITS,
EMULATED_RFCI_EXITS,
+ EMULATED_RFDI_EXITS,
DEC_EXITS,
EXT_INTR_EXITS,
HALT_WAKEUP,
@@ -171,22 +177,40 @@ struct kvmppc_pginfo {
atomic_t refcnt;
};
+struct kvmppc_spapr_tce_iommu_table {
+ struct rcu_head rcu;
+ struct list_head next;
+ struct iommu_table *tbl;
+ struct kref kref;
+};
+
+#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
+
struct kvmppc_spapr_tce_table {
struct list_head list;
struct kvm *kvm;
u64 liobn;
- u32 window_size;
- struct page *pages[0];
-};
-
-struct kvm_rma_info {
- atomic_t use_count;
- unsigned long base_pfn;
+ struct rcu_head rcu;
+ u32 page_shift;
+ u64 offset; /* in pages */
+ u64 size; /* window size in pages */
+ struct list_head iommu_tables;
+ struct mutex alloc_lock;
+ struct page *pages[];
};
/* XICS components, defined in book3s_xics.c */
struct kvmppc_xics;
struct kvmppc_icp;
+extern struct kvm_device_ops kvm_xics_ops;
+
+/* XIVE components, defined in book3s_xive.c */
+struct kvmppc_xive;
+struct kvmppc_xive_vcpu;
+extern struct kvm_device_ops kvm_xive_ops;
+extern struct kvm_device_ops kvm_xive_native_ops;
+
+struct kvmppc_passthru_irqmap;
/*
* The reverse mapping array has one entry for each HPTE,
@@ -202,55 +226,82 @@ struct revmap_entry {
};
/*
- * We use the top bit of each memslot->arch.rmap entry as a lock bit,
- * and bit 32 as a present flag. The bottom 32 bits are the
- * index in the guest HPT of a HPTE that points to the page.
+ * The rmap array of size number of guest pages is allocated for each memslot.
+ * This array is used to store usage specific information about the guest page.
+ * Below are the encodings of the various possible usage types.
+ */
+/* Free bits which can be used to define a new usage */
+#define KVMPPC_RMAP_TYPE_MASK 0xff00000000000000
+#define KVMPPC_RMAP_NESTED 0xc000000000000000 /* Nested rmap array */
+#define KVMPPC_RMAP_HPT 0x0100000000000000 /* HPT guest */
+
+/*
+ * rmap usage definition for a hash page table (hpt) guest:
+ * 0x0000080000000000 Lock bit
+ * 0x0000018000000000 RC bits
+ * 0x0000000100000000 Present bit
+ * 0x00000000ffffffff HPT index bits
+ * The bottom 32 bits are the index in the guest HPT of a HPTE that points to
+ * the page.
*/
-#define KVMPPC_RMAP_LOCK_BIT 63
+#define KVMPPC_RMAP_LOCK_BIT 43
#define KVMPPC_RMAP_RC_SHIFT 32
#define KVMPPC_RMAP_REFERENCED (HPTE_R_R << KVMPPC_RMAP_RC_SHIFT)
-#define KVMPPC_RMAP_CHANGED (HPTE_R_C << KVMPPC_RMAP_RC_SHIFT)
#define KVMPPC_RMAP_PRESENT 0x100000000ul
#define KVMPPC_RMAP_INDEX 0xfffffffful
-/* Low-order bits in memslot->arch.slot_phys[] */
-#define KVMPPC_PAGE_ORDER_MASK 0x1f
-#define KVMPPC_PAGE_NO_CACHE HPTE_R_I /* 0x20 */
-#define KVMPPC_PAGE_WRITETHRU HPTE_R_W /* 0x40 */
-#define KVMPPC_GOT_PAGE 0x80
-
struct kvm_arch_memory_slot {
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
unsigned long *rmap;
- unsigned long *slot_phys;
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
};
+struct kvm_hpt_info {
+ /* Host virtual (linear mapping) address of guest HPT */
+ unsigned long virt;
+ /* Array of reverse mapping entries for each guest HPTE */
+ struct revmap_entry *rev;
+ /* Guest HPT size is 2**(order) bytes */
+ u32 order;
+ /* 1 if HPT allocated with CMA, 0 otherwise */
+ int cma;
+};
+
+struct kvm_resize_hpt;
+
+/* Flag values for kvm_arch.secure_guest */
+#define KVMPPC_SECURE_INIT_START 0x1 /* H_SVM_INIT_START has been called */
+#define KVMPPC_SECURE_INIT_DONE 0x2 /* H_SVM_INIT_DONE completed */
+#define KVMPPC_SECURE_INIT_ABORT 0x4 /* H_SVM_INIT_ABORT issued */
+
struct kvm_arch {
- unsigned int lpid;
+ u64 lpid;
+ unsigned int smt_mode; /* # vcpus per virtual core */
+ unsigned int emul_smt_mode; /* emualted SMT mode, on P9 */
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- unsigned long hpt_virt;
- struct revmap_entry *revmap;
+ unsigned int tlb_sets;
+ struct kvm_hpt_info hpt;
+ atomic64_t mmio_update;
unsigned int host_lpid;
unsigned long host_lpcr;
unsigned long sdr1;
unsigned long host_sdr1;
- int tlbie_lock;
unsigned long lpcr;
- unsigned long rmor;
- struct kvm_rma_info *rma;
unsigned long vrma_slb_v;
- int rma_setup_done;
- int using_mmu_notifiers;
- u32 hpt_order;
+ int mmu_ready;
atomic_t vcpus_running;
u32 online_vcores;
- unsigned long hpt_npte;
- unsigned long hpt_mask;
atomic_t hpte_mod_interest;
- spinlock_t slot_phys_lock;
cpumask_t need_tlb_flush;
- int hpt_cma_alloc;
+ u8 radix;
+ u8 fwnmi_enabled;
+ u8 secure_guest;
+ u8 svm_enabled;
+ bool nested_enable;
+ bool dawr1_enabled;
+ pgd_t *pgtable;
+ u64 process_table;
+ struct kvm_resize_hpt *resize_hpt; /* protected by kvm->lock */
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
struct mutex hpt_mutex;
@@ -258,6 +309,7 @@ struct kvm_arch {
#ifdef CONFIG_PPC_BOOK3S_64
struct list_head spapr_tce_tables;
struct list_head rtas_tokens;
+ struct mutex rtas_token_lock;
DECLARE_BITMAP(enabled_hcalls, MAX_HCALL_OPCODE/4 + 1);
#endif
#ifdef CONFIG_KVM_MPIC
@@ -265,59 +317,46 @@ struct kvm_arch {
#endif
#ifdef CONFIG_KVM_XICS
struct kvmppc_xics *xics;
+ struct kvmppc_xics *xics_device;
+ struct kvmppc_xive *xive; /* Current XIVE device in use */
+ struct {
+ struct kvmppc_xive *native;
+ struct kvmppc_xive *xics_on_xive;
+ } xive_devices;
+ struct kvmppc_passthru_irqmap *pimap;
#endif
struct kvmppc_ops *kvm_ops;
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ struct mutex uvmem_lock;
+ struct list_head uvmem_pfns;
+ struct mutex mmu_setup_lock; /* nests inside vcpu mutexes */
+ u64 l1_ptcr;
+ struct idr kvm_nested_guest_idr;
/* This array can grow quite large, keep it at the end */
struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
#endif
};
-/*
- * Struct for a virtual core.
- * Note: entry_exit_count combines an entry count in the bottom 8 bits
- * and an exit count in the next 8 bits. This is so that we can
- * atomically increment the entry count iff the exit count is 0
- * without taking the lock.
- */
-struct kvmppc_vcore {
- int n_runnable;
- int n_busy;
- int num_threads;
- int entry_exit_count;
- int n_woken;
- int nap_count;
- int napping_threads;
- int first_vcpuid;
- u16 pcpu;
- u16 last_cpu;
- u8 vcore_state;
- u8 in_guest;
- struct list_head runnable_threads;
- spinlock_t lock;
- wait_queue_head_t wq;
- u64 stolen_tb;
- u64 preempt_tb;
- struct kvm_vcpu *runner;
- struct kvm *kvm;
- u64 tb_offset; /* guest timebase - host timebase */
- ulong lpcr;
- u32 arch_compat;
- ulong pcr;
- ulong dpdes; /* doorbell state (POWER8) */
- void *mpp_buffer; /* Micro Partition Prefetch buffer */
- bool mpp_buffer_is_valid;
-};
+#define VCORE_ENTRY_MAP(vc) ((vc)->entry_exit_map & 0xff)
+#define VCORE_EXIT_MAP(vc) ((vc)->entry_exit_map >> 8)
+#define VCORE_IS_EXITING(vc) (VCORE_EXIT_MAP(vc) != 0)
-#define VCORE_ENTRY_COUNT(vc) ((vc)->entry_exit_count & 0xff)
-#define VCORE_EXIT_COUNT(vc) ((vc)->entry_exit_count >> 8)
+/* This bit is used when a vcore exit is triggered from outside the vcore */
+#define VCORE_EXIT_REQ 0x10000
-/* Values for vcore_state */
+/*
+ * Values for vcore_state.
+ * Note that these are arranged such that lower values
+ * (< VCORE_SLEEPING) don't require stolen time accounting
+ * on load/unload, and higher values do.
+ */
#define VCORE_INACTIVE 0
-#define VCORE_SLEEPING 1
-#define VCORE_STARTING 2
-#define VCORE_RUNNING 3
-#define VCORE_EXITING 4
+#define VCORE_PREEMPT 1
+#define VCORE_PIGGYBACK 2
+#define VCORE_SLEEPING 3
+#define VCORE_RUNNING 4
+#define VCORE_EXITING 5
+#define VCORE_POLLING 6
/*
* Struct used to manage memory for a virtual processor area
@@ -341,7 +380,10 @@ struct kvmppc_pte {
bool may_read : 1;
bool may_write : 1;
bool may_execute : 1;
+ unsigned long wimg;
+ unsigned long rc;
u8 page_size; /* MMU_PAGE_xxx */
+ u8 page_shift;
};
struct kvmppc_mmu {
@@ -349,6 +391,7 @@ struct kvmppc_mmu {
void (*slbmte)(struct kvm_vcpu *vcpu, u64 rb, u64 rs);
u64 (*slbmfee)(struct kvm_vcpu *vcpu, u64 slb_nr);
u64 (*slbmfev)(struct kvm_vcpu *vcpu, u64 slb_nr);
+ int (*slbfee)(struct kvm_vcpu *vcpu, gva_t eaddr, ulong *ret_slb);
void (*slbie)(struct kvm_vcpu *vcpu, u64 slb_nr);
void (*slbia)(struct kvm_vcpu *vcpu);
/* book3s */
@@ -356,7 +399,6 @@ struct kvmppc_mmu {
u32 (*mfsrin)(struct kvm_vcpu *vcpu, u32 srnum);
int (*xlate)(struct kvm_vcpu *vcpu, gva_t eaddr,
struct kvmppc_pte *pte, bool data, bool iswrite);
- void (*reset_msr)(struct kvm_vcpu *vcpu);
void (*tlbie)(struct kvm_vcpu *vcpu, ulong addr, bool large);
int (*esid_to_vsid)(struct kvm_vcpu *vcpu, ulong esid, u64 *vsid);
u64 (*ea_to_vp)(struct kvm_vcpu *vcpu, gva_t eaddr, bool data);
@@ -378,7 +420,29 @@ struct kvmppc_slb {
u8 base_page_size; /* MMU_PAGE_xxx */
};
-# ifdef CONFIG_PPC_FSL_BOOK3E
+/* Struct used to accumulate timing information in HV real mode code */
+struct kvmhv_tb_accumulator {
+ u64 seqcount; /* used to synchronize access, also count * 2 */
+ u64 tb_total; /* total time in timebase ticks */
+ u64 tb_min; /* min time */
+ u64 tb_max; /* max time */
+};
+
+#ifdef CONFIG_PPC_BOOK3S_64
+struct kvmppc_irq_map {
+ u32 r_hwirq;
+ u32 v_hwirq;
+ struct irq_desc *desc;
+};
+
+#define KVMPPC_PIRQ_MAPPED 1024
+struct kvmppc_passthru_irqmap {
+ int n_mapped;
+ struct kvmppc_irq_map mapped[KVMPPC_PIRQ_MAPPED];
+};
+#endif
+
+# ifdef CONFIG_PPC_E500
#define KVMPPC_BOOKE_IAC_NUM 2
#define KVMPPC_BOOKE_DAC_NUM 2
# else
@@ -395,10 +459,72 @@ struct kvmppc_slb {
#define KVMPPC_IRQ_DEFAULT 0
#define KVMPPC_IRQ_MPIC 1
-#define KVMPPC_IRQ_XICS 2
+#define KVMPPC_IRQ_XICS 2 /* Includes a XIVE option */
+#define KVMPPC_IRQ_XIVE 3 /* XIVE native exploitation mode */
+
+#define MMIO_HPTE_CACHE_SIZE 4
+
+struct mmio_hpte_cache_entry {
+ unsigned long hpte_v;
+ unsigned long hpte_r;
+ unsigned long rpte;
+ unsigned long pte_index;
+ unsigned long eaddr;
+ unsigned long slb_v;
+ long mmio_update;
+ unsigned int slb_base_pshift;
+};
+
+struct mmio_hpte_cache {
+ struct mmio_hpte_cache_entry entry[MMIO_HPTE_CACHE_SIZE];
+ unsigned int index;
+};
+
+#define KVMPPC_VSX_COPY_NONE 0
+#define KVMPPC_VSX_COPY_WORD 1
+#define KVMPPC_VSX_COPY_DWORD 2
+#define KVMPPC_VSX_COPY_DWORD_LOAD_DUMP 3
+#define KVMPPC_VSX_COPY_WORD_LOAD_DUMP 4
+
+#define KVMPPC_VMX_COPY_BYTE 8
+#define KVMPPC_VMX_COPY_HWORD 9
+#define KVMPPC_VMX_COPY_WORD 10
+#define KVMPPC_VMX_COPY_DWORD 11
struct openpic;
+/* W0 and W1 of a XIVE thread management context */
+union xive_tma_w01 {
+ struct {
+ u8 nsr;
+ u8 cppr;
+ u8 ipb;
+ u8 lsmfb;
+ u8 ack;
+ u8 inc;
+ u8 age;
+ u8 pipr;
+ };
+ __be64 w01;
+};
+
+ /* Nestedv2 H_GUEST_RUN_VCPU configuration */
+struct kvmhv_nestedv2_config {
+ struct kvmppc_gs_buff_info vcpu_run_output_cfg;
+ struct kvmppc_gs_buff_info vcpu_run_input_cfg;
+ u64 vcpu_run_output_size;
+};
+
+ /* Nestedv2 L1<->L0 communication state */
+struct kvmhv_nestedv2_io {
+ struct kvmhv_nestedv2_config cfg;
+ struct kvmppc_gs_buff *vcpu_run_output;
+ struct kvmppc_gs_buff *vcpu_run_input;
+ struct kvmppc_gs_msg *vcpu_message;
+ struct kvmppc_gs_msg *vcore_message;
+ struct kvmppc_gs_bitmap valids;
+};
+
struct kvm_vcpu_arch {
ulong host_stack;
u32 host_pid;
@@ -413,7 +539,11 @@ struct kvm_vcpu_arch {
struct kvmppc_book3s_shadow_vcpu *shadow_vcpu;
#endif
- ulong gpr[32];
+ /*
+ * This is passed along to the HV via H_ENTER_NESTED. Align to
+ * prevent it crossing a real 4K page.
+ */
+ struct pt_regs regs __aligned(512);
struct thread_fp_state fp;
@@ -448,23 +578,16 @@ struct kvm_vcpu_arch {
u32 qpr[32];
#endif
- ulong pc;
- ulong ctr;
- ulong lr;
#ifdef CONFIG_PPC_BOOK3S
ulong tar;
#endif
- ulong xer;
- u32 cr;
-
#ifdef CONFIG_PPC_BOOK3S
ulong hflags;
ulong guest_owned_ext;
ulong purr;
ulong spurr;
ulong ic;
- ulong vtb;
ulong dscr;
ulong amr;
ulong uamor;
@@ -472,12 +595,21 @@ struct kvm_vcpu_arch {
u32 ctrl;
u32 dabrx;
ulong dabr;
- ulong dawr;
- ulong dawrx;
+ ulong dawr0;
+ ulong dawrx0;
+ ulong dawr1;
+ ulong dawrx1;
+ ulong dexcr;
+ ulong hashkeyr;
+ ulong hashpkeyr;
ulong ciabr;
ulong cfar;
ulong ppr;
- ulong pspb;
+ u32 pspb;
+ u8 load_ebb;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ u8 load_tm;
+#endif
ulong fscr;
ulong shadow_fscr;
ulong ebbhr;
@@ -488,6 +620,9 @@ struct kvm_vcpu_arch {
ulong tcscr;
ulong acop;
ulong wort;
+ ulong tid;
+ ulong psscr;
+ ulong hfscr;
ulong shadow_srr1;
#endif
u32 vrsave; /* also USPRG0 */
@@ -501,7 +636,7 @@ struct kvm_vcpu_arch {
ulong mcsrr0;
ulong mcsrr1;
ulong mcsr;
- u32 dec;
+ ulong dec;
#ifdef CONFIG_BOOKE
u32 decar;
#endif
@@ -524,18 +659,22 @@ struct kvm_vcpu_arch {
u32 ccr1;
u32 dbsr;
- u64 mmcr[5];
+ u64 mmcr[4]; /* MMCR0, MMCR1, MMCR2, MMCR3 */
+ u64 mmcra;
+ u64 mmcrs;
u32 pmc[8];
u32 spmc[2];
u64 siar;
u64 sdar;
- u64 sier;
+ u64 sier[3];
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
u64 tfhar;
u64 texasr;
u64 tfiar;
+ u64 orig_texasr;
u32 cr_tm;
+ u64 xer_tm;
u64 lr_tm;
u64 ctr_tm;
u64 amr_tm;
@@ -549,7 +688,6 @@ struct kvm_vcpu_arch {
struct thread_vr_state vr_tm;
u32 vrsave_tm; /* also USPRG0 */
-
#endif
#ifdef CONFIG_KVM_EXIT_TIMING
@@ -563,13 +701,18 @@ struct kvm_vcpu_arch {
u64 timing_min_duration[__NUMBER_OF_KVM_EXIT_TYPES];
u64 timing_max_duration[__NUMBER_OF_KVM_EXIT_TYPES];
u64 timing_last_exit;
- struct dentry *debugfs_exit_timing;
#endif
#ifdef CONFIG_PPC_BOOK3S
ulong fault_dar;
u32 fault_dsisr;
unsigned long intr_msr;
+ /*
+ * POWER9 and later: fault_gpa contains the guest real address of page
+ * fault for a radix guest, or segment descriptor (equivalent to result
+ * from slbmfev of SLB entry that translated the EA) for hash guests.
+ */
+ ulong fault_gpa;
#endif
#ifdef CONFIG_BOOKE
@@ -589,16 +732,30 @@ struct kvm_vcpu_arch {
u32 crit_save;
/* guest debug registers*/
struct debug_reg dbg_reg;
- /* hardware visible debug registers when in guest state */
- struct debug_reg shadow_dbg_reg;
#endif
gpa_t paddr_accessed;
gva_t vaddr_accessed;
pgd_t *pgdir;
- u8 io_gpr; /* GPR used as IO source/target */
- u8 mmio_is_bigendian;
+ u16 io_gpr; /* GPR used as IO source/target */
+ u8 mmio_host_swabbed;
u8 mmio_sign_extend;
+ /* conversion between single and double precision */
+ u8 mmio_sp64_extend;
+ /*
+ * Number of simulations for vsx.
+ * If we use 2*8bytes to simulate 1*16bytes,
+ * then the number should be 2 and
+ * mmio_copy_type=KVMPPC_VSX_COPY_DWORD.
+ * If we use 4*4bytes to simulate 1*16bytes,
+ * the number should be 4 and
+ * mmio_vsx_copy_type=KVMPPC_VSX_COPY_WORD.
+ */
+ u8 mmio_vsx_copy_nums;
+ u8 mmio_vsx_offset;
+ u8 mmio_vmx_copy_nums;
+ u8 mmio_vmx_offset;
+ u8 mmio_copy_type;
u8 osi_needed;
u8 osi_enabled;
u8 papr_enabled;
@@ -608,26 +765,32 @@ struct kvm_vcpu_arch {
u8 hcall_needed;
u8 epr_flags; /* KVMPPC_EPR_xxx */
u8 epr_needed;
+ u8 external_oneshot; /* clear external irq after delivery */
u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */
struct hrtimer dec_timer;
- struct tasklet_struct tasklet;
u64 dec_jiffies;
- u64 dec_expires;
+ u64 dec_expires; /* Relative to guest timebase. */
unsigned long pending_exceptions;
u8 ceded;
u8 prodded;
- u32 last_inst;
+ u8 doorbell_request;
+ u8 irq_pending; /* Used by XIVE to signal pending guest irqs */
+ unsigned long last_inst;
- wait_queue_head_t *wqp;
+ struct rcuwait wait;
+ struct rcuwait *waitp;
struct kvmppc_vcore *vcore;
int ret;
int trap;
int state;
int ptid;
+ int thread_cpu;
+ int prev_cpu;
bool timer_running;
wait_queue_head_t cpu_run;
+ struct machine_check_event mce_evt; /* Valid if trap == 0x200 */
struct kvm_vcpu_arch_shared *shared;
#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE)
@@ -642,18 +805,25 @@ struct kvm_vcpu_arch {
struct openpic *mpic; /* KVM_IRQ_MPIC */
#ifdef CONFIG_KVM_XICS
struct kvmppc_icp *icp; /* XICS presentation controller */
+ struct kvmppc_xive_vcpu *xive_vcpu; /* XIVE virtual CPU data */
+ __be32 xive_cam_word; /* Cooked W2 in proper endian with valid bit */
+ u8 xive_pushed; /* Is the VP pushed on the physical CPU ? */
+ u8 xive_esc_on; /* Is the escalation irq enabled ? */
+ union xive_tma_w01 xive_saved_state; /* W0..1 of XIVE thread state */
+ u64 xive_esc_raddr; /* Escalation interrupt ESB real addr */
+ u64 xive_esc_vaddr; /* Escalation interrupt ESB virt addr */
#endif
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
struct kvm_vcpu_arch_shared shregs;
+ struct mmio_hpte_cache mmio_cache;
unsigned long pgfault_addr;
long pgfault_index;
unsigned long pgfault_hpte[2];
+ struct mmio_hpte_cache_entry *pgfault_cache;
- struct list_head run_list;
struct task_struct *run_task;
- struct kvm_run *kvm_run;
spinlock_t vpa_update_lock;
struct kvmppc_vpa vpa;
@@ -666,10 +836,51 @@ struct kvm_vcpu_arch {
spinlock_t tbacct_lock;
u64 busy_stolen;
u64 busy_preempt;
+
+ u64 emul_inst;
+
+ u32 online;
+
+ u64 hfscr_permitted; /* A mask of permitted HFSCR facilities */
+
+ /* For support of nested guests */
+ struct kvm_nested_guest *nested;
+ u64 nested_hfscr; /* HFSCR that the L1 requested for the nested guest */
+ u32 nested_vcpu_id;
+ gpa_t nested_io_gpr;
+ /* For nested APIv2 guests*/
+ struct kvmhv_nestedv2_io nestedv2_io;
+#endif
+
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+ struct kvmhv_tb_accumulator *cur_activity; /* What we're timing */
+ u64 cur_tb_start; /* when it started */
+#ifdef CONFIG_KVM_BOOK3S_HV_P9_TIMING
+ struct kvmhv_tb_accumulator vcpu_entry;
+ struct kvmhv_tb_accumulator vcpu_exit;
+ struct kvmhv_tb_accumulator in_guest;
+ struct kvmhv_tb_accumulator hcall;
+ struct kvmhv_tb_accumulator pg_fault;
+ struct kvmhv_tb_accumulator guest_entry;
+ struct kvmhv_tb_accumulator guest_exit;
+#else
+ struct kvmhv_tb_accumulator rm_entry; /* real-mode entry code */
+ struct kvmhv_tb_accumulator rm_intr; /* real-mode intr handling */
+ struct kvmhv_tb_accumulator rm_exit; /* real-mode exit code */
+ struct kvmhv_tb_accumulator guest_time; /* guest execution */
+ struct kvmhv_tb_accumulator cede_time; /* time napping inside guest */
+#endif
+#endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ u64 l1_to_l2_cs;
+ u64 l2_to_l1_cs;
+ u64 l2_runtime_agg;
#endif
};
#define VCPU_FPR(vcpu, i) (vcpu)->arch.fp.fpr[i][TS_FPROFFSET]
+#define VCPU_VSX_FPR(vcpu, i, j) ((vcpu)->arch.fp.fpr[i][j])
+#define VCPU_VSX_VR(vcpu, i) ((vcpu)->arch.vr.vr[i])
/* Values for vcpu->arch.state */
#define KVMPPC_VCPU_NOTREADY 0
@@ -677,14 +888,23 @@ struct kvm_vcpu_arch {
#define KVMPPC_VCPU_BUSY_IN_HOST 2
/* Values for vcpu->arch.io_gpr */
-#define KVM_MMIO_REG_MASK 0x001f
-#define KVM_MMIO_REG_EXT_MASK 0xffe0
+#define KVM_MMIO_REG_MASK 0x003f
+#define KVM_MMIO_REG_EXT_MASK 0xffc0
#define KVM_MMIO_REG_GPR 0x0000
-#define KVM_MMIO_REG_FPR 0x0020
-#define KVM_MMIO_REG_QPR 0x0040
-#define KVM_MMIO_REG_FQPR 0x0060
+#define KVM_MMIO_REG_FPR 0x0040
+#define KVM_MMIO_REG_QPR 0x0080
+#define KVM_MMIO_REG_FQPR 0x00c0
+#define KVM_MMIO_REG_VSX 0x0100
+#define KVM_MMIO_REG_VMX 0x0180
+#define KVM_MMIO_REG_NESTED_GPR 0xffc0
+
#define __KVM_HAVE_ARCH_WQP
#define __KVM_HAVE_CREATE_DEVICE
+static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
+static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
+static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
+
#endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h
index 336a91acb8b1..abe1b5e82547 100644
--- a/arch/powerpc/include/asm/kvm_para.h
+++ b/arch/powerpc/include/asm/kvm_para.h
@@ -1,16 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright IBM Corp. 2008
*
@@ -19,35 +8,15 @@
#ifndef __POWERPC_KVM_PARA_H__
#define __POWERPC_KVM_PARA_H__
-#include <uapi/asm/kvm_para.h>
-
-#ifdef CONFIG_KVM_GUEST
-
-#include <linux/of.h>
+#include <asm/kvm_guest.h>
-static inline int kvm_para_available(void)
-{
- struct device_node *hyper_node;
-
- hyper_node = of_find_node_by_path("/hypervisor");
- if (!hyper_node)
- return 0;
-
- if (!of_device_is_compatible(hyper_node, "linux,kvm"))
- return 0;
-
- return 1;
-}
-
-#else
+#include <uapi/asm/kvm_para.h>
static inline int kvm_para_available(void)
{
- return 0;
+ return IS_ENABLED(CONFIG_KVM_GUEST) && is_kvm_guest();
}
-#endif
-
static inline unsigned int kvm_arch_para_features(void)
{
unsigned long r;
@@ -61,6 +30,11 @@ static inline unsigned int kvm_arch_para_features(void)
return r;
}
+static inline unsigned int kvm_arch_para_hints(void)
+{
+ return 0;
+}
+
static inline bool kvm_check_and_clear_guest_paused(void)
{
return false;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index fb86a2299d8a..0953f2daa466 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -1,16 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright IBM Corp. 2008
*
@@ -36,7 +25,16 @@
#endif
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
#include <asm/paca.h>
+#include <asm/xive.h>
+#include <asm/cpu_has_feature.h>
#endif
+#include <asm/inst.h>
+
+/*
+ * KVMPPC_INST_SW_BREAKPOINT is debug Instruction
+ * for supporting software breakpoint.
+ */
+#define KVMPPC_INST_SW_BREAKPOINT 0x00dddd00
enum emulation_result {
EMULATE_DONE, /* no further processing */
@@ -46,7 +44,7 @@ enum emulation_result {
EMULATE_EXIT_USER, /* emulation requires exit to user-space */
};
-enum instruction_type {
+enum instruction_fetch_type {
INST_GENERIC,
INST_SC, /* system call */
};
@@ -61,35 +59,45 @@ enum xlate_readwrite {
XLATE_WRITE /* check for write permissions */
};
-extern int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
-extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
+extern int kvmppc_vcpu_run(struct kvm_vcpu *vcpu);
+extern int __kvmppc_vcpu_run(struct kvm_vcpu *vcpu);
extern void kvmppc_handler_highmem(void);
extern void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu);
-extern int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
+extern int kvmppc_handle_load(struct kvm_vcpu *vcpu,
unsigned int rt, unsigned int bytes,
int is_default_endian);
-extern int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu,
+extern int kvmppc_handle_loads(struct kvm_vcpu *vcpu,
unsigned int rt, unsigned int bytes,
int is_default_endian);
-extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
+extern int kvmppc_handle_vsx_load(struct kvm_vcpu *vcpu,
+ unsigned int rt, unsigned int bytes,
+ int is_default_endian, int mmio_sign_extend);
+extern int kvmppc_handle_vmx_load(struct kvm_vcpu *vcpu,
+ unsigned int rt, unsigned int bytes, int is_default_endian);
+extern int kvmppc_handle_vmx_store(struct kvm_vcpu *vcpu,
+ unsigned int rs, unsigned int bytes, int is_default_endian);
+extern int kvmppc_handle_store(struct kvm_vcpu *vcpu,
u64 val, unsigned int bytes,
int is_default_endian);
+extern int kvmppc_handle_vsx_store(struct kvm_vcpu *vcpu,
+ int rs, unsigned int bytes,
+ int is_default_endian);
extern int kvmppc_load_last_inst(struct kvm_vcpu *vcpu,
- enum instruction_type type, u32 *inst);
+ enum instruction_fetch_type type,
+ unsigned long *inst);
extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
bool data);
extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
bool data);
-extern int kvmppc_emulate_instruction(struct kvm_run *run,
- struct kvm_vcpu *vcpu);
+extern int kvmppc_emulate_instruction(struct kvm_vcpu *vcpu);
extern int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu);
-extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
+extern int kvmppc_emulate_mmio(struct kvm_vcpu *vcpu);
extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
-extern void kvmppc_decrementer_func(unsigned long data);
+extern void kvmppc_decrementer_func(struct kvm_vcpu *vcpu);
extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu);
extern int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu);
extern void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu);
@@ -98,10 +106,7 @@ extern void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu);
extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
unsigned int gtlb_idx);
-extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode);
extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid);
-extern void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu);
-extern int kvmppc_mmu_init(struct kvm_vcpu *vcpu);
extern int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
extern int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
extern gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index,
@@ -112,11 +117,9 @@ extern int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr,
enum xlate_instdata xlid, enum xlate_readwrite xlrw,
struct kvmppc_pte *pte);
-extern struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm,
- unsigned int id);
+extern int kvmppc_core_vcpu_create(struct kvm_vcpu *vcpu);
extern void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu);
extern int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu);
-extern int kvmppc_core_check_processor_compat(void);
extern int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
struct kvm_translation *tr);
@@ -125,63 +128,89 @@ extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
extern int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu);
extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
-extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags);
+
+extern void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu,
+ ulong srr1_flags);
+extern void kvmppc_core_queue_syscall(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu,
+ ulong srr1_flags);
+extern void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu,
+ ulong srr1_flags);
+extern void kvmppc_core_queue_vec_unavail(struct kvm_vcpu *vcpu,
+ ulong srr1_flags);
+extern void kvmppc_core_queue_vsx_unavail(struct kvm_vcpu *vcpu,
+ ulong srr1_flags);
extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu);
extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu);
extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
struct kvm_interrupt *irq);
extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu);
-extern void kvmppc_core_queue_dtlb_miss(struct kvm_vcpu *vcpu, ulong dear_flags,
+extern void kvmppc_core_queue_dtlb_miss(struct kvm_vcpu *vcpu,
+ ulong dear_flags,
ulong esr_flags);
extern void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu,
- ulong dear_flags,
- ulong esr_flags);
+ ulong srr1_flags,
+ ulong dar,
+ ulong dsisr);
extern void kvmppc_core_queue_itlb_miss(struct kvm_vcpu *vcpu);
extern void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu,
- ulong esr_flags);
+ ulong srr1_flags);
+
extern void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu);
extern int kvmppc_core_check_requests(struct kvm_vcpu *vcpu);
extern int kvmppc_booke_init(void);
extern void kvmppc_booke_exit(void);
-extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu);
extern int kvmppc_kvm_pv(struct kvm_vcpu *vcpu);
extern void kvmppc_map_magic(struct kvm_vcpu *vcpu);
-extern long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp);
-extern long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp);
-extern void kvmppc_free_hpt(struct kvm *kvm);
-extern long kvmppc_prepare_vrma(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem);
+extern int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order);
+extern void kvmppc_set_hpt(struct kvm *kvm, struct kvm_hpt_info *info);
+extern int kvmppc_alloc_reset_hpt(struct kvm *kvm, int order);
+extern void kvmppc_free_hpt(struct kvm_hpt_info *info);
+extern void kvmppc_rmap_reset(struct kvm *kvm);
extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
struct kvm_memory_slot *memslot, unsigned long porder);
extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu);
-
-extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
- struct kvm_create_spapr_tce *args);
+extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
+ struct iommu_group *grp);
+extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm,
+ struct iommu_group *grp);
+extern int kvmppc_switch_mmu_to_hpt(struct kvm *kvm);
+extern int kvmppc_switch_mmu_to_radix(struct kvm *kvm);
+extern void kvmppc_setup_partition_table(struct kvm *kvm);
+
+extern int kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
+ struct kvm_create_spapr_tce_64 *args);
+#define kvmppc_ioba_validate(stt, ioba, npages) \
+ (iommu_tce_check_ioba((stt)->page_shift, (stt)->offset, \
+ (stt)->size, (ioba), (npages)) ? \
+ H_PARAMETER : H_SUCCESS)
extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
unsigned long ioba, unsigned long tce);
+extern long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
+ unsigned long liobn, unsigned long ioba,
+ unsigned long tce_list, unsigned long npages);
+extern long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
+ unsigned long liobn, unsigned long ioba,
+ unsigned long tce_value, unsigned long npages);
extern long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
unsigned long ioba);
-extern struct kvm_rma_info *kvm_alloc_rma(void);
-extern void kvm_release_rma(struct kvm_rma_info *ri);
-extern struct page *kvm_alloc_hpt(unsigned long nr_pages);
-extern void kvm_release_hpt(struct page *page, unsigned long nr_pages);
+extern struct page *kvm_alloc_hpt_cma(unsigned long nr_pages);
+extern void kvm_free_hpt_cma(struct page *page, unsigned long nr_pages);
extern int kvmppc_core_init_vm(struct kvm *kvm);
extern void kvmppc_core_destroy_vm(struct kvm *kvm);
extern void kvmppc_core_free_memslot(struct kvm *kvm,
- struct kvm_memory_slot *free,
- struct kvm_memory_slot *dont);
-extern int kvmppc_core_create_memslot(struct kvm *kvm,
- struct kvm_memory_slot *slot,
- unsigned long npages);
+ struct kvm_memory_slot *slot);
extern int kvmppc_core_prepare_memory_region(struct kvm *kvm,
- struct kvm_memory_slot *memslot,
- struct kvm_userspace_memory_region *mem);
+ const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new,
+ enum kvm_mr_change change);
extern void kvmppc_core_commit_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem,
- const struct kvm_memory_slot *old);
+ struct kvm_memory_slot *old,
+ const struct kvm_memory_slot *new,
+ enum kvm_mr_change change);
extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm,
struct kvm_ppc_smmu_info *info);
extern void kvmppc_core_flush_memslot(struct kvm *kvm,
@@ -193,12 +222,17 @@ extern void kvmppc_bookehv_exit(void);
extern int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu);
extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *);
+extern int kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
+ struct kvm_ppc_resize_hpt *rhpt);
+extern int kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm,
+ struct kvm_ppc_resize_hpt *rhpt);
int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq);
extern int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp);
extern int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu);
extern void kvmppc_rtas_tokens_free(struct kvm *kvm);
+
extern int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server,
u32 priority);
extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server,
@@ -206,15 +240,22 @@ extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server,
extern int kvmppc_xics_int_on(struct kvm *kvm, u32 irq);
extern int kvmppc_xics_int_off(struct kvm *kvm, u32 irq);
+void kvmppc_core_dequeue_debug(struct kvm_vcpu *vcpu);
+void kvmppc_core_queue_debug(struct kvm_vcpu *vcpu);
+
union kvmppc_one_reg {
u32 wval;
u64 dval;
vector128 vval;
u64 vsxval[2];
+ u32 vsx32val[4];
+ u16 vsx16val[8];
+ u8 vsx8val[16];
struct {
u64 addr;
u64 length;
} vpaval;
+ u64 xive_timaval[2];
};
struct kvmppc_ops {
@@ -227,48 +268,64 @@ struct kvmppc_ops {
union kvmppc_one_reg *val);
void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
void (*vcpu_put)(struct kvm_vcpu *vcpu);
+ void (*inject_interrupt)(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags);
void (*set_msr)(struct kvm_vcpu *vcpu, u64 msr);
- int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu);
- struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned int id);
+ int (*vcpu_run)(struct kvm_vcpu *vcpu);
+ int (*vcpu_create)(struct kvm_vcpu *vcpu);
void (*vcpu_free)(struct kvm_vcpu *vcpu);
int (*check_requests)(struct kvm_vcpu *vcpu);
int (*get_dirty_log)(struct kvm *kvm, struct kvm_dirty_log *log);
void (*flush_memslot)(struct kvm *kvm, struct kvm_memory_slot *memslot);
int (*prepare_memory_region)(struct kvm *kvm,
- struct kvm_memory_slot *memslot,
- struct kvm_userspace_memory_region *mem);
+ const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new,
+ enum kvm_mr_change change);
void (*commit_memory_region)(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem,
- const struct kvm_memory_slot *old);
- int (*unmap_hva)(struct kvm *kvm, unsigned long hva);
- int (*unmap_hva_range)(struct kvm *kvm, unsigned long start,
- unsigned long end);
- int (*age_hva)(struct kvm *kvm, unsigned long hva);
- int (*test_age_hva)(struct kvm *kvm, unsigned long hva);
- void (*set_spte_hva)(struct kvm *kvm, unsigned long hva, pte_t pte);
- void (*mmu_destroy)(struct kvm_vcpu *vcpu);
- void (*free_memslot)(struct kvm_memory_slot *free,
- struct kvm_memory_slot *dont);
- int (*create_memslot)(struct kvm_memory_slot *slot,
- unsigned long npages);
+ struct kvm_memory_slot *old,
+ const struct kvm_memory_slot *new,
+ enum kvm_mr_change change);
+ bool (*unmap_gfn_range)(struct kvm *kvm, struct kvm_gfn_range *range);
+ bool (*age_gfn)(struct kvm *kvm, struct kvm_gfn_range *range);
+ bool (*test_age_gfn)(struct kvm *kvm, struct kvm_gfn_range *range);
+ void (*free_memslot)(struct kvm_memory_slot *slot);
int (*init_vm)(struct kvm *kvm);
void (*destroy_vm)(struct kvm *kvm);
int (*get_smmu_info)(struct kvm *kvm, struct kvm_ppc_smmu_info *info);
- int (*emulate_op)(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ int (*emulate_op)(struct kvm_vcpu *vcpu,
unsigned int inst, int *advance);
int (*emulate_mtspr)(struct kvm_vcpu *vcpu, int sprn, ulong spr_val);
int (*emulate_mfspr)(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val);
void (*fast_vcpu_kick)(struct kvm_vcpu *vcpu);
- long (*arch_vm_ioctl)(struct file *filp, unsigned int ioctl,
- unsigned long arg);
+ int (*arch_vm_ioctl)(struct file *filp, unsigned int ioctl,
+ unsigned long arg);
int (*hcall_implemented)(unsigned long hcall);
+ int (*irq_bypass_add_producer)(struct irq_bypass_consumer *,
+ struct irq_bypass_producer *);
+ void (*irq_bypass_del_producer)(struct irq_bypass_consumer *,
+ struct irq_bypass_producer *);
+ int (*configure_mmu)(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg);
+ int (*get_rmmu_info)(struct kvm *kvm, struct kvm_ppc_rmmu_info *info);
+ int (*set_smt_mode)(struct kvm *kvm, unsigned long mode,
+ unsigned long flags);
+ void (*giveup_ext)(struct kvm_vcpu *vcpu, ulong msr);
+ int (*enable_nested)(struct kvm *kvm);
+ int (*load_from_eaddr)(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr,
+ int size);
+ int (*store_to_eaddr)(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr,
+ int size);
+ int (*enable_svm)(struct kvm *kvm);
+ int (*svm_off)(struct kvm *kvm);
+ int (*enable_dawr1)(struct kvm *kvm);
+ bool (*hash_v3_possible)(void);
+ int (*create_vm_debugfs)(struct kvm *kvm);
+ int (*create_vcpu_debugfs)(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry);
};
extern struct kvmppc_ops *kvmppc_hv_ops;
extern struct kvmppc_ops *kvmppc_pr_ops;
static inline int kvmppc_get_last_inst(struct kvm_vcpu *vcpu,
- enum instruction_type type, u32 *inst)
+ enum instruction_fetch_type type, ppc_inst_t *inst)
{
int ret = EMULATE_DONE;
u32 fetched_inst;
@@ -279,15 +336,30 @@ static inline int kvmppc_get_last_inst(struct kvm_vcpu *vcpu,
ret = kvmppc_load_last_inst(vcpu, type, &vcpu->arch.last_inst);
/* Write fetch_failed unswapped if the fetch failed */
- if (ret == EMULATE_DONE)
- fetched_inst = kvmppc_need_byteswap(vcpu) ?
- swab32(vcpu->arch.last_inst) :
- vcpu->arch.last_inst;
- else
- fetched_inst = vcpu->arch.last_inst;
+ if (ret != EMULATE_DONE) {
+ *inst = ppc_inst(KVM_INST_FETCH_FAILED);
+ return ret;
+ }
+
+#ifdef CONFIG_PPC64
+ /* Is this a prefixed instruction? */
+ if ((vcpu->arch.last_inst >> 32) != 0) {
+ u32 prefix = vcpu->arch.last_inst >> 32;
+ u32 suffix = vcpu->arch.last_inst;
+ if (kvmppc_need_byteswap(vcpu)) {
+ prefix = swab32(prefix);
+ suffix = swab32(suffix);
+ }
+ *inst = ppc_inst_prefix(prefix, suffix);
+ return EMULATE_DONE;
+ }
+#endif
- *inst = fetched_inst;
- return ret;
+ fetched_inst = kvmppc_need_byteswap(vcpu) ?
+ swab32(vcpu->arch.last_inst) :
+ vcpu->arch.last_inst;
+ *inst = ppc_inst(fetched_inst);
+ return EMULATE_DONE;
}
static inline bool is_kvmppc_hv_enabled(struct kvm *kvm)
@@ -295,6 +367,8 @@ static inline bool is_kvmppc_hv_enabled(struct kvm *kvm)
return kvm->arch.kvm_ops == kvmppc_hv_ops;
}
+extern int kvmppc_hwrng_present(void);
+
/*
* Cuts out inst bits with ordering according to spec.
* That means the leftmost bit is zero. All given bits are included.
@@ -371,7 +445,15 @@ struct openpic;
extern void kvm_cma_reserve(void) __init;
static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
{
- paca[cpu].kvm_hstate.xics_phys = addr;
+ paca_ptrs[cpu]->kvm_hstate.xics_phys = (void __iomem *)addr;
+}
+
+static inline void kvmppc_set_xive_tima(int cpu,
+ unsigned long phys_addr,
+ void __iomem *virt_addr)
+{
+ paca_ptrs[cpu]->kvm_hstate.xive_tima_phys = (void __iomem *)phys_addr;
+ paca_ptrs[cpu]->kvm_hstate.xive_tima_virt = virt_addr;
}
static inline u32 kvmppc_get_xics_latch(void)
@@ -383,9 +465,100 @@ static inline u32 kvmppc_get_xics_latch(void)
return xirr;
}
-static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
+/*
+ * To avoid the need to unnecessarily exit fully to the host kernel, an IPI to
+ * a CPU thread that's running/napping inside of a guest is by default regarded
+ * as a request to wake the CPU (if needed) and continue execution within the
+ * guest, potentially to process new state like externally-generated
+ * interrupts or IPIs sent from within the guest itself (e.g. H_PROD/H_IPI).
+ *
+ * To force an exit to the host kernel, kvmppc_set_host_ipi() must be called
+ * prior to issuing the IPI to set the corresponding 'host_ipi' flag in the
+ * target CPU's PACA. To avoid unnecessary exits to the host, this flag should
+ * be immediately cleared via kvmppc_clear_host_ipi() by the IPI handler on
+ * the receiving side prior to processing the IPI work.
+ *
+ * NOTE:
+ *
+ * We currently issue an smp_mb() at the beginning of kvmppc_set_host_ipi().
+ * This is to guard against sequences such as the following:
+ *
+ * CPU
+ * X: smp_muxed_ipi_set_message():
+ * X: smp_mb()
+ * X: message[RESCHEDULE] = 1
+ * X: doorbell_global_ipi(42):
+ * X: kvmppc_set_host_ipi(42)
+ * X: ppc_msgsnd_sync()/smp_mb()
+ * X: ppc_msgsnd() -> 42
+ * 42: doorbell_exception(): // from CPU X
+ * 42: ppc_msgsync()
+ * 105: smp_muxed_ipi_set_message():
+ * 105: smb_mb()
+ * // STORE DEFERRED DUE TO RE-ORDERING
+ * --105: message[CALL_FUNCTION] = 1
+ * | 105: doorbell_global_ipi(42):
+ * | 105: kvmppc_set_host_ipi(42)
+ * | 42: kvmppc_clear_host_ipi(42)
+ * | 42: smp_ipi_demux_relaxed()
+ * | 42: // returns to executing guest
+ * | // RE-ORDERED STORE COMPLETES
+ * ->105: message[CALL_FUNCTION] = 1
+ * 105: ppc_msgsnd_sync()/smp_mb()
+ * 105: ppc_msgsnd() -> 42
+ * 42: local_paca->kvm_hstate.host_ipi == 0 // IPI ignored
+ * 105: // hangs waiting on 42 to process messages/call_single_queue
+ *
+ * We also issue an smp_mb() at the end of kvmppc_clear_host_ipi(). This is
+ * to guard against sequences such as the following (as well as to create
+ * a read-side pairing with the barrier in kvmppc_set_host_ipi()):
+ *
+ * CPU
+ * X: smp_muxed_ipi_set_message():
+ * X: smp_mb()
+ * X: message[RESCHEDULE] = 1
+ * X: doorbell_global_ipi(42):
+ * X: kvmppc_set_host_ipi(42)
+ * X: ppc_msgsnd_sync()/smp_mb()
+ * X: ppc_msgsnd() -> 42
+ * 42: doorbell_exception(): // from CPU X
+ * 42: ppc_msgsync()
+ * // STORE DEFERRED DUE TO RE-ORDERING
+ * -- 42: kvmppc_clear_host_ipi(42)
+ * | 42: smp_ipi_demux_relaxed()
+ * | 105: smp_muxed_ipi_set_message():
+ * | 105: smb_mb()
+ * | 105: message[CALL_FUNCTION] = 1
+ * | 105: doorbell_global_ipi(42):
+ * | 105: kvmppc_set_host_ipi(42)
+ * | // RE-ORDERED STORE COMPLETES
+ * -> 42: kvmppc_clear_host_ipi(42)
+ * 42: // returns to executing guest
+ * 105: ppc_msgsnd_sync()/smp_mb()
+ * 105: ppc_msgsnd() -> 42
+ * 42: local_paca->kvm_hstate.host_ipi == 0 // IPI ignored
+ * 105: // hangs waiting on 42 to process messages/call_single_queue
+ */
+static inline void kvmppc_set_host_ipi(int cpu)
{
- paca[cpu].kvm_hstate.host_ipi = host_ipi;
+ /*
+ * order stores of IPI messages vs. setting of host_ipi flag
+ *
+ * pairs with the barrier in kvmppc_clear_host_ipi()
+ */
+ smp_mb();
+ WRITE_ONCE(paca_ptrs[cpu]->kvm_hstate.host_ipi, 1);
+}
+
+static inline void kvmppc_clear_host_ipi(int cpu)
+{
+ WRITE_ONCE(paca_ptrs[cpu]->kvm_hstate.host_ipi, 0);
+ /*
+ * order clearing of host_ipi flag vs. processing of IPI messages
+ *
+ * pairs with the barrier in kvmppc_set_host_ipi()
+ */
+ smp_mb();
}
static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
@@ -397,6 +570,8 @@ extern void kvm_hv_vm_activated(void);
extern void kvm_hv_vm_deactivated(void);
extern bool kvm_hv_mode_active(void);
+extern void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu);
+
#else
static inline void __init kvm_cma_reserve(void)
{}
@@ -404,12 +579,20 @@ static inline void __init kvm_cma_reserve(void)
static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
{}
+static inline void kvmppc_set_xive_tima(int cpu,
+ unsigned long phys_addr,
+ void __iomem *virt_addr)
+{}
+
static inline u32 kvmppc_get_xics_latch(void)
{
return 0;
}
-static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi)
+static inline void kvmppc_set_host_ipi(int cpu)
+{}
+
+static inline void kvmppc_clear_host_ipi(int cpu)
{}
static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu)
@@ -421,33 +604,274 @@ static inline bool kvm_hv_mode_active(void) { return false; }
#endif
+#ifdef CONFIG_PPC_PSERIES
+static inline bool kvmhv_on_pseries(void)
+{
+ return !cpu_has_feature(CPU_FTR_HVMODE);
+}
+#else
+static inline bool kvmhv_on_pseries(void)
+{
+ return false;
+}
+
+#endif
+
+#ifndef CONFIG_PPC_BOOK3S
+
+static inline bool kvmhv_is_nestedv2(void)
+{
+ return false;
+}
+
+static inline bool kvmhv_is_nestedv1(void)
+{
+ return false;
+}
+
+static inline int kvmhv_nestedv2_reload_ptregs(struct kvm_vcpu *vcpu,
+ struct pt_regs *regs)
+{
+ return 0;
+}
+static inline int kvmhv_nestedv2_mark_dirty_ptregs(struct kvm_vcpu *vcpu,
+ struct pt_regs *regs)
+{
+ return 0;
+}
+
+static inline int kvmhv_nestedv2_mark_dirty(struct kvm_vcpu *vcpu, u16 iden)
+{
+ return 0;
+}
+
+static inline int kvmhv_nestedv2_cached_reload(struct kvm_vcpu *vcpu, u16 iden)
+{
+ return 0;
+}
+
+#endif
+
#ifdef CONFIG_KVM_XICS
static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
{
return vcpu->arch.irq_type == KVMPPC_IRQ_XICS;
}
+
+static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap(
+ struct kvm *kvm)
+{
+ if (kvm && kvm_irq_bypass)
+ return kvm->arch.pimap;
+ return NULL;
+}
+
+extern void kvmppc_alloc_host_rm_ops(void);
+extern void kvmppc_free_host_rm_ops(void);
+extern void kvmppc_free_pimap(struct kvm *kvm);
+extern int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall);
extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu);
-extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server);
-extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args);
extern int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd);
+extern int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req);
extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu);
extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev,
struct kvm_vcpu *vcpu, u32 cpu);
+extern void kvmppc_xics_ipi_action(void);
+extern void kvmppc_xics_set_mapped(struct kvm *kvm, unsigned long guest_irq,
+ unsigned long host_irq);
+extern void kvmppc_xics_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
+ unsigned long host_irq);
+extern long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, __be32 xirr,
+ struct kvmppc_irq_map *irq_map,
+ struct kvmppc_passthru_irqmap *pimap,
+ bool *again);
+
+extern int kvmppc_xics_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
+ int level, bool line_status);
+
+extern int h_ipi_redirect;
#else
+static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap(
+ struct kvm *kvm)
+ { return NULL; }
+static inline void kvmppc_alloc_host_rm_ops(void) {}
+static inline void kvmppc_free_host_rm_ops(void) {}
+static inline void kvmppc_free_pimap(struct kvm *kvm) {}
+static inline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
+ { return 0; }
static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
{ return 0; }
static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { }
-static inline int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu,
- unsigned long server)
- { return -EINVAL; }
-static inline int kvm_vm_ioctl_xics_irq(struct kvm *kvm,
- struct kvm_irq_level *args)
- { return -ENOTTY; }
static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
{ return 0; }
+static inline int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
+ { return 0; }
+#endif
+
+#ifdef CONFIG_KVM_XIVE
+/*
+ * Below the first "xive" is the "eXternal Interrupt Virtualization Engine"
+ * ie. P9 new interrupt controller, while the second "xive" is the legacy
+ * "eXternal Interrupt Vector Entry" which is the configuration of an
+ * interrupt on the "xics" interrupt controller on P8 and earlier. Those
+ * two function consume or produce a legacy "XIVE" state from the
+ * new "XIVE" interrupt controller.
+ */
+extern int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
+ u32 priority);
+extern int kvmppc_xive_get_xive(struct kvm *kvm, u32 irq, u32 *server,
+ u32 *priority);
+extern int kvmppc_xive_int_on(struct kvm *kvm, u32 irq);
+extern int kvmppc_xive_int_off(struct kvm *kvm, u32 irq);
+
+extern int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
+ struct kvm_vcpu *vcpu, u32 cpu);
+extern void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu);
+extern int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
+ unsigned long host_irq);
+extern int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
+ unsigned long host_irq);
+extern u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu);
+extern int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
+
+extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
+ int level, bool line_status);
+extern void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu);
+extern void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu);
+extern bool kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu);
+
+static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.irq_type == KVMPPC_IRQ_XIVE;
+}
+
+extern int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
+ struct kvm_vcpu *vcpu, u32 cpu);
+extern void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu);
+extern int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu,
+ union kvmppc_one_reg *val);
+extern int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu,
+ union kvmppc_one_reg *val);
+extern bool kvmppc_xive_native_supported(void);
+
+#else
+static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
+ u32 priority) { return -1; }
+static inline int kvmppc_xive_get_xive(struct kvm *kvm, u32 irq, u32 *server,
+ u32 *priority) { return -1; }
+static inline int kvmppc_xive_int_on(struct kvm *kvm, u32 irq) { return -1; }
+static inline int kvmppc_xive_int_off(struct kvm *kvm, u32 irq) { return -1; }
+
+static inline int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
+ struct kvm_vcpu *vcpu, u32 cpu) { return -EBUSY; }
+static inline void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu) { }
+static inline int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
+ struct irq_desc *host_desc) { return -ENODEV; }
+static inline int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
+ struct irq_desc *host_desc) { return -ENODEV; }
+static inline u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu) { return 0; }
+static inline int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) { return -ENOENT; }
+
+static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
+ int level, bool line_status) { return -ENODEV; }
+static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { }
+static inline void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu) { }
+static inline bool kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu) { return true; }
+
+static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
+ { return 0; }
+static inline int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
+ struct kvm_vcpu *vcpu, u32 cpu) { return -EBUSY; }
+static inline void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) { }
+static inline int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu,
+ union kvmppc_one_reg *val)
+{ return 0; }
+static inline int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu,
+ union kvmppc_one_reg *val)
+{ return -ENOENT; }
+
+#endif /* CONFIG_KVM_XIVE */
+
+#if defined(CONFIG_PPC_POWERNV) && defined(CONFIG_KVM_BOOK3S_64_HANDLER)
+static inline bool xics_on_xive(void)
+{
+ return xive_enabled() && cpu_has_feature(CPU_FTR_HVMODE);
+}
+#else
+static inline bool xics_on_xive(void)
+{
+ return false;
+}
#endif
+/*
+ * Prototypes for functions called only from assembler code.
+ * Having prototypes reduces sparse errors.
+ */
+long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
+ unsigned long ioba, unsigned long tce);
+long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
+ unsigned long liobn, unsigned long ioba,
+ unsigned long tce_list, unsigned long npages);
+long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
+ unsigned long liobn, unsigned long ioba,
+ unsigned long tce_value, unsigned long npages);
+long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
+ unsigned int yield_count);
+long kvmppc_rm_h_random(struct kvm_vcpu *vcpu);
+void kvmhv_commence_exit(int trap);
+void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu);
+void kvmppc_subcore_enter_guest(void);
+void kvmppc_subcore_exit_guest(void);
+long kvmppc_realmode_hmi_handler(void);
+long kvmppc_p9_realmode_hmi_handler(struct kvm_vcpu *vcpu);
+long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
+ long pte_index, unsigned long pteh, unsigned long ptel);
+long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
+ unsigned long pte_index, unsigned long avpn);
+long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu);
+long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
+ unsigned long pte_index, unsigned long avpn);
+long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
+ unsigned long pte_index);
+long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
+ unsigned long pte_index);
+long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
+ unsigned long pte_index);
+long kvmppc_rm_h_page_init(struct kvm_vcpu *vcpu, unsigned long flags,
+ unsigned long dest, unsigned long src);
+long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
+ unsigned long slb_v, unsigned int status, bool data);
+void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu);
+
+/*
+ * Host-side operations we want to set up while running in real
+ * mode in the guest operating on the xics.
+ * Currently only VCPU wakeup is supported.
+ */
+
+union kvmppc_rm_state {
+ unsigned long raw;
+ struct {
+ u32 in_host;
+ u32 rm_action;
+ };
+};
+
+struct kvmppc_host_rm_core {
+ union kvmppc_rm_state rm_state;
+ void *rm_data;
+ char pad[112];
+};
+
+struct kvmppc_host_rm_ops {
+ struct kvmppc_host_rm_core *rm_core;
+ void (*vcpu_kick)(struct kvm_vcpu *vcpu);
+};
+
+extern struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv;
+
static inline unsigned long kvmppc_get_epr(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_KVM_BOOKE_HV
@@ -500,13 +924,12 @@ int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu,
struct kvm_dirty_tlb *cfg);
long kvmppc_alloc_lpid(void);
-void kvmppc_claim_lpid(long lpid);
void kvmppc_free_lpid(long lpid);
void kvmppc_init_lpid(unsigned long nr_lpids);
-static inline void kvmppc_mmu_flush_icache(pfn_t pfn)
+static inline void kvmppc_mmu_flush_icache(kvm_pfn_t pfn)
{
- struct page *page;
+ struct folio *folio;
/*
* We can only access pages that the kernel maps
* as memory. Bail out for unmapped ones.
@@ -515,10 +938,10 @@ static inline void kvmppc_mmu_flush_icache(pfn_t pfn)
return;
/* Clear i-cache for new pages */
- page = pfn_to_page(pfn);
- if (!test_bit(PG_arch_1, &page->flags)) {
- flush_dcache_icache_page(page);
- set_bit(PG_arch_1, &page->flags);
+ folio = page_folio(pfn_to_page(pfn));
+ if (!test_bit(PG_dcache_clean, &folio->flags.f)) {
+ flush_dcache_icache_folio(folio);
+ set_bit(PG_dcache_clean, &folio->flags.f);
}
}
@@ -539,79 +962,85 @@ static inline bool kvmppc_shared_big_endian(struct kvm_vcpu *vcpu)
#endif
}
-#define SPRNG_WRAPPER_GET(reg, bookehv_spr) \
+#define KVMPPC_BOOKE_HV_SPRNG_ACCESSOR_GET(reg, bookehv_spr) \
static inline ulong kvmppc_get_##reg(struct kvm_vcpu *vcpu) \
{ \
return mfspr(bookehv_spr); \
} \
-#define SPRNG_WRAPPER_SET(reg, bookehv_spr) \
+#define KVMPPC_BOOKE_HV_SPRNG_ACCESSOR_SET(reg, bookehv_spr) \
static inline void kvmppc_set_##reg(struct kvm_vcpu *vcpu, ulong val) \
{ \
mtspr(bookehv_spr, val); \
} \
-#define SHARED_WRAPPER_GET(reg, size) \
+#define KVMPPC_VCPU_SHARED_REGS_ACCESSOR_GET(reg, size, iden) \
static inline u##size kvmppc_get_##reg(struct kvm_vcpu *vcpu) \
{ \
+ if (iden) \
+ WARN_ON(kvmhv_nestedv2_cached_reload(vcpu, iden) < 0); \
if (kvmppc_shared_big_endian(vcpu)) \
- return be##size##_to_cpu(vcpu->arch.shared->reg); \
+ return be##size##_to_cpu((__be##size __force)vcpu->arch.shared->reg); \
else \
- return le##size##_to_cpu(vcpu->arch.shared->reg); \
+ return le##size##_to_cpu((__le##size __force)vcpu->arch.shared->reg); \
} \
-#define SHARED_WRAPPER_SET(reg, size) \
+#define KVMPPC_VCPU_SHARED_REGS_ACCESSOR_SET(reg, size, iden) \
static inline void kvmppc_set_##reg(struct kvm_vcpu *vcpu, u##size val) \
{ \
if (kvmppc_shared_big_endian(vcpu)) \
- vcpu->arch.shared->reg = cpu_to_be##size(val); \
+ vcpu->arch.shared->reg = (u##size __force)cpu_to_be##size(val); \
else \
- vcpu->arch.shared->reg = cpu_to_le##size(val); \
+ vcpu->arch.shared->reg = (u##size __force)cpu_to_le##size(val); \
+ \
+ if (iden) \
+ kvmhv_nestedv2_mark_dirty(vcpu, iden); \
} \
-#define SHARED_WRAPPER(reg, size) \
- SHARED_WRAPPER_GET(reg, size) \
- SHARED_WRAPPER_SET(reg, size) \
+#define KVMPPC_VCPU_SHARED_REGS_ACCESSOR(reg, size, iden) \
+ KVMPPC_VCPU_SHARED_REGS_ACCESSOR_GET(reg, size, iden) \
+ KVMPPC_VCPU_SHARED_REGS_ACCESSOR_SET(reg, size, iden) \
-#define SPRNG_WRAPPER(reg, bookehv_spr) \
- SPRNG_WRAPPER_GET(reg, bookehv_spr) \
- SPRNG_WRAPPER_SET(reg, bookehv_spr) \
+#define KVMPPC_BOOKE_HV_SPRNG_ACCESSOR(reg, bookehv_spr) \
+ KVMPPC_BOOKE_HV_SPRNG_ACCESSOR_GET(reg, bookehv_spr) \
+ KVMPPC_BOOKE_HV_SPRNG_ACCESSOR_SET(reg, bookehv_spr) \
#ifdef CONFIG_KVM_BOOKE_HV
-#define SHARED_SPRNG_WRAPPER(reg, size, bookehv_spr) \
- SPRNG_WRAPPER(reg, bookehv_spr) \
+#define KVMPPC_BOOKE_HV_SPRNG_OR_VCPU_SHARED_REGS_ACCESSOR(reg, size, bookehv_spr, iden) \
+ KVMPPC_BOOKE_HV_SPRNG_ACCESSOR(reg, bookehv_spr) \
#else
-#define SHARED_SPRNG_WRAPPER(reg, size, bookehv_spr) \
- SHARED_WRAPPER(reg, size) \
+#define KVMPPC_BOOKE_HV_SPRNG_OR_VCPU_SHARED_REGS_ACCESSOR(reg, size, bookehv_spr, iden) \
+ KVMPPC_VCPU_SHARED_REGS_ACCESSOR(reg, size, iden) \
#endif
-SHARED_WRAPPER(critical, 64)
-SHARED_SPRNG_WRAPPER(sprg0, 64, SPRN_GSPRG0)
-SHARED_SPRNG_WRAPPER(sprg1, 64, SPRN_GSPRG1)
-SHARED_SPRNG_WRAPPER(sprg2, 64, SPRN_GSPRG2)
-SHARED_SPRNG_WRAPPER(sprg3, 64, SPRN_GSPRG3)
-SHARED_SPRNG_WRAPPER(srr0, 64, SPRN_GSRR0)
-SHARED_SPRNG_WRAPPER(srr1, 64, SPRN_GSRR1)
-SHARED_SPRNG_WRAPPER(dar, 64, SPRN_GDEAR)
-SHARED_SPRNG_WRAPPER(esr, 64, SPRN_GESR)
-SHARED_WRAPPER_GET(msr, 64)
+KVMPPC_VCPU_SHARED_REGS_ACCESSOR(critical, 64, 0)
+KVMPPC_BOOKE_HV_SPRNG_OR_VCPU_SHARED_REGS_ACCESSOR(sprg0, 64, SPRN_GSPRG0, KVMPPC_GSID_SPRG0)
+KVMPPC_BOOKE_HV_SPRNG_OR_VCPU_SHARED_REGS_ACCESSOR(sprg1, 64, SPRN_GSPRG1, KVMPPC_GSID_SPRG1)
+KVMPPC_BOOKE_HV_SPRNG_OR_VCPU_SHARED_REGS_ACCESSOR(sprg2, 64, SPRN_GSPRG2, KVMPPC_GSID_SPRG2)
+KVMPPC_BOOKE_HV_SPRNG_OR_VCPU_SHARED_REGS_ACCESSOR(sprg3, 64, SPRN_GSPRG3, KVMPPC_GSID_SPRG3)
+KVMPPC_BOOKE_HV_SPRNG_OR_VCPU_SHARED_REGS_ACCESSOR(srr0, 64, SPRN_GSRR0, KVMPPC_GSID_SRR0)
+KVMPPC_BOOKE_HV_SPRNG_OR_VCPU_SHARED_REGS_ACCESSOR(srr1, 64, SPRN_GSRR1, KVMPPC_GSID_SRR1)
+KVMPPC_BOOKE_HV_SPRNG_OR_VCPU_SHARED_REGS_ACCESSOR(dar, 64, SPRN_GDEAR, KVMPPC_GSID_DAR)
+KVMPPC_BOOKE_HV_SPRNG_OR_VCPU_SHARED_REGS_ACCESSOR(esr, 64, SPRN_GESR, 0)
+KVMPPC_VCPU_SHARED_REGS_ACCESSOR_GET(msr, 64, KVMPPC_GSID_MSR)
static inline void kvmppc_set_msr_fast(struct kvm_vcpu *vcpu, u64 val)
{
if (kvmppc_shared_big_endian(vcpu))
vcpu->arch.shared->msr = cpu_to_be64(val);
else
vcpu->arch.shared->msr = cpu_to_le64(val);
+ kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_MSR);
}
-SHARED_WRAPPER(dsisr, 32)
-SHARED_WRAPPER(int_pending, 32)
-SHARED_WRAPPER(sprg4, 64)
-SHARED_WRAPPER(sprg5, 64)
-SHARED_WRAPPER(sprg6, 64)
-SHARED_WRAPPER(sprg7, 64)
+KVMPPC_VCPU_SHARED_REGS_ACCESSOR(dsisr, 32, KVMPPC_GSID_DSISR)
+KVMPPC_VCPU_SHARED_REGS_ACCESSOR(int_pending, 32, 0)
+KVMPPC_VCPU_SHARED_REGS_ACCESSOR(sprg4, 64, 0)
+KVMPPC_VCPU_SHARED_REGS_ACCESSOR(sprg5, 64, 0)
+KVMPPC_VCPU_SHARED_REGS_ACCESSOR(sprg6, 64, 0)
+KVMPPC_VCPU_SHARED_REGS_ACCESSOR(sprg7, 64, 0)
static inline u32 kvmppc_get_sr(struct kvm_vcpu *vcpu, int nr)
{
@@ -647,10 +1076,22 @@ static inline void kvmppc_fix_ee_before_entry(void)
/* Only need to enable IRQs by hard enabling them after this */
local_paca->irq_happened = 0;
- local_paca->soft_enabled = 1;
+ irq_soft_mask_set(IRQS_ENABLED);
#endif
}
+static inline void kvmppc_fix_ee_after_exit(void)
+{
+#ifdef CONFIG_PPC64
+ /* Only need to enable IRQs by hard enabling them after this */
+ local_paca->irq_happened = PACA_IRQ_HARD_DIS;
+ irq_soft_mask_set(IRQS_ALL_DISABLED);
+#endif
+
+ trace_hardirqs_off();
+}
+
+
static inline ulong kvmppc_get_ea_indexed(struct kvm_vcpu *vcpu, int ra, int rb)
{
ulong ea;
diff --git a/arch/powerpc/include/asm/kvm_types.h b/arch/powerpc/include/asm/kvm_types.h
new file mode 100644
index 000000000000..5d4bffea7d47
--- /dev/null
+++ b/arch/powerpc/include/asm/kvm_types.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_PPC_KVM_TYPES_H
+#define _ASM_PPC_KVM_TYPES_H
+
+#if IS_MODULE(CONFIG_KVM_BOOK3S_64_PR) && IS_MODULE(CONFIG_KVM_BOOK3S_64_HV)
+#define KVM_SUB_MODULES kvm-pr,kvm-hv
+#elif IS_MODULE(CONFIG_KVM_BOOK3S_64_PR)
+#define KVM_SUB_MODULES kvm-pr
+#elif IS_MODULE(CONFIG_KVM_BOOK3S_64_HV)
+#define KVM_SUB_MODULES kvm-hv
+#else
+#undef KVM_SUB_MODULES
+#endif
+
+#endif
diff --git a/arch/powerpc/include/asm/libata-portmap.h b/arch/powerpc/include/asm/libata-portmap.h
index 4d8518049f4d..7c602da62560 100644
--- a/arch/powerpc/include/asm/libata-portmap.h
+++ b/arch/powerpc/include/asm/libata-portmap.h
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __ASM_POWERPC_LIBATA_PORTMAP_H
#define __ASM_POWERPC_LIBATA_PORTMAP_H
-#define ATA_PRIMARY_CMD 0x1F0
-#define ATA_PRIMARY_CTL 0x3F6
#define ATA_PRIMARY_IRQ(dev) pci_get_legacy_ide_irq(dev, 0)
-#define ATA_SECONDARY_CMD 0x170
-#define ATA_SECONDARY_CTL 0x376
#define ATA_SECONDARY_IRQ(dev) pci_get_legacy_ide_irq(dev, 1)
#endif
diff --git a/arch/powerpc/include/asm/linkage.h b/arch/powerpc/include/asm/linkage.h
index e3ad5c72724a..b71b9582e754 100644
--- a/arch/powerpc/include/asm/linkage.h
+++ b/arch/powerpc/include/asm/linkage.h
@@ -1,8 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_LINKAGE_H
#define _ASM_POWERPC_LINKAGE_H
-#ifdef CONFIG_PPC64
-#if !defined(_CALL_ELF) || _CALL_ELF != 2
+#include <asm/types.h>
+
+#ifdef CONFIG_PPC64_ELF_ABI_V1
#define cond_syscall(x) \
asm ("\t.weak " #x "\n\t.set " #x ", sys_ni_syscall\n" \
"\t.weak ." #x "\n\t.set ." #x ", .sys_ni_syscall\n")
@@ -10,6 +12,5 @@
asm ("\t.globl " #alias "\n\t.set " #alias ", " #name "\n" \
"\t.globl ." #alias "\n\t.set ." #alias ", ." #name)
#endif
-#endif
#endif /* _ASM_POWERPC_LINKAGE_H */
diff --git a/arch/powerpc/include/asm/livepatch.h b/arch/powerpc/include/asm/livepatch.h
new file mode 100644
index 000000000000..d044a1fd4f44
--- /dev/null
+++ b/arch/powerpc/include/asm/livepatch.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * livepatch.h - powerpc-specific Kernel Live Patching Core
+ *
+ * Copyright (C) 2015-2016, SUSE, IBM Corp.
+ */
+#ifndef _ASM_POWERPC_LIVEPATCH_H
+#define _ASM_POWERPC_LIVEPATCH_H
+
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+
+#ifdef CONFIG_LIVEPATCH_64
+static inline void klp_init_thread_info(struct task_struct *p)
+{
+ /* + 1 to account for STACK_END_MAGIC */
+ task_thread_info(p)->livepatch_sp = end_of_stack(p) + 1;
+}
+#else
+static inline void klp_init_thread_info(struct task_struct *p) { }
+#endif
+
+#endif /* _ASM_POWERPC_LIVEPATCH_H */
diff --git a/arch/powerpc/include/asm/local.h b/arch/powerpc/include/asm/local.h
index b8da91363864..ec6ced6d7ced 100644
--- a/arch/powerpc/include/asm/local.h
+++ b/arch/powerpc/include/asm/local.h
@@ -1,76 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ARCH_POWERPC_LOCAL_H
#define _ARCH_POWERPC_LOCAL_H
+#ifdef CONFIG_PPC_BOOK3S_64
+
#include <linux/percpu.h>
#include <linux/atomic.h>
+#include <linux/irqflags.h>
+
+#include <asm/hw_irq.h>
typedef struct
{
- atomic_long_t a;
+ long v;
} local_t;
-#define LOCAL_INIT(i) { ATOMIC_LONG_INIT(i) }
-
-#define local_read(l) atomic_long_read(&(l)->a)
-#define local_set(l,i) atomic_long_set(&(l)->a, (i))
-
-#define local_add(i,l) atomic_long_add((i),(&(l)->a))
-#define local_sub(i,l) atomic_long_sub((i),(&(l)->a))
-#define local_inc(l) atomic_long_inc(&(l)->a)
-#define local_dec(l) atomic_long_dec(&(l)->a)
+#define LOCAL_INIT(i) { (i) }
-static __inline__ long local_add_return(long a, local_t *l)
+static __inline__ long local_read(const local_t *l)
{
- long t;
-
- __asm__ __volatile__(
-"1:" PPC_LLARX(%0,0,%2,0) " # local_add_return\n\
- add %0,%1,%0\n"
- PPC405_ERR77(0,%2)
- PPC_STLCX "%0,0,%2 \n\
- bne- 1b"
- : "=&r" (t)
- : "r" (a), "r" (&(l->a.counter))
- : "cc", "memory");
-
- return t;
+ return READ_ONCE(l->v);
}
-#define local_add_negative(a, l) (local_add_return((a), (l)) < 0)
-
-static __inline__ long local_sub_return(long a, local_t *l)
+static __inline__ void local_set(local_t *l, long i)
{
- long t;
+ WRITE_ONCE(l->v, i);
+}
- __asm__ __volatile__(
-"1:" PPC_LLARX(%0,0,%2,0) " # local_sub_return\n\
- subf %0,%1,%0\n"
- PPC405_ERR77(0,%2)
- PPC_STLCX "%0,0,%2 \n\
- bne- 1b"
- : "=&r" (t)
- : "r" (a), "r" (&(l->a.counter))
- : "cc", "memory");
+#define LOCAL_OP(op, c_op) \
+static __inline__ void local_##op(long i, local_t *l) \
+{ \
+ unsigned long flags; \
+ \
+ powerpc_local_irq_pmu_save(flags); \
+ l->v c_op i; \
+ powerpc_local_irq_pmu_restore(flags); \
+}
- return t;
+#define LOCAL_OP_RETURN(op, c_op) \
+static __inline__ long local_##op##_return(long a, local_t *l) \
+{ \
+ long t; \
+ unsigned long flags; \
+ \
+ powerpc_local_irq_pmu_save(flags); \
+ t = (l->v c_op a); \
+ powerpc_local_irq_pmu_restore(flags); \
+ \
+ return t; \
}
-static __inline__ long local_inc_return(local_t *l)
-{
- long t;
+#define LOCAL_OPS(op, c_op) \
+ LOCAL_OP(op, c_op) \
+ LOCAL_OP_RETURN(op, c_op)
- __asm__ __volatile__(
-"1:" PPC_LLARX(%0,0,%1,0) " # local_inc_return\n\
- addic %0,%0,1\n"
- PPC405_ERR77(0,%1)
- PPC_STLCX "%0,0,%1 \n\
- bne- 1b"
- : "=&r" (t)
- : "r" (&(l->a.counter))
- : "cc", "xer", "memory");
+LOCAL_OPS(add, +=)
+LOCAL_OPS(sub, -=)
- return t;
-}
+#define local_add_negative(a, l) (local_add_return((a), (l)) < 0)
+#define local_inc_return(l) local_add_return(1LL, l)
+#define local_inc(l) local_inc_return(l)
/*
* local_inc_and_test - increment and test
@@ -80,96 +69,91 @@ static __inline__ long local_inc_return(local_t *l)
* and returns true if the result is zero, or false for all
* other cases.
*/
-#define local_inc_and_test(l) (local_inc_return(l) == 0)
+#define local_inc_and_test(l) (local_inc_return(l) == 0)
-static __inline__ long local_dec_return(local_t *l)
+#define local_dec_return(l) local_sub_return(1LL, l)
+#define local_dec(l) local_dec_return(l)
+#define local_sub_and_test(a, l) (local_sub_return((a), (l)) == 0)
+#define local_dec_and_test(l) (local_dec_return((l)) == 0)
+
+static __inline__ long local_cmpxchg(local_t *l, long o, long n)
{
long t;
+ unsigned long flags;
- __asm__ __volatile__(
-"1:" PPC_LLARX(%0,0,%1,0) " # local_dec_return\n\
- addic %0,%0,-1\n"
- PPC405_ERR77(0,%1)
- PPC_STLCX "%0,0,%1\n\
- bne- 1b"
- : "=&r" (t)
- : "r" (&(l->a.counter))
- : "cc", "xer", "memory");
+ powerpc_local_irq_pmu_save(flags);
+ t = l->v;
+ if (t == o)
+ l->v = n;
+ powerpc_local_irq_pmu_restore(flags);
return t;
}
-#define local_cmpxchg(l, o, n) \
- (cmpxchg_local(&((l)->a.counter), (o), (n)))
-#define local_xchg(l, n) (xchg_local(&((l)->a.counter), (n)))
-
-/**
- * local_add_unless - add unless the number is a given value
- * @l: pointer of type local_t
- * @a: the amount to add to v...
- * @u: ...unless v is equal to u.
- *
- * Atomically adds @a to @l, so long as it was not @u.
- * Returns non-zero if @l was not @u, and zero otherwise.
- */
-static __inline__ int local_add_unless(local_t *l, long a, long u)
+static __inline__ bool local_try_cmpxchg(local_t *l, long *po, long n)
{
- long t;
+ long o = *po, r;
+
+ r = local_cmpxchg(l, o, n);
+ if (unlikely(r != o))
+ *po = r;
- __asm__ __volatile__ (
-"1:" PPC_LLARX(%0,0,%1,0) " # local_add_unless\n\
- cmpw 0,%0,%3 \n\
- beq- 2f \n\
- add %0,%2,%0 \n"
- PPC405_ERR77(0,%2)
- PPC_STLCX "%0,0,%1 \n\
- bne- 1b \n"
-" subf %0,%2,%0 \n\
-2:"
- : "=&r" (t)
- : "r" (&(l->a.counter)), "r" (a), "r" (u)
- : "cc", "memory");
-
- return t != u;
+ return likely(r == o);
}
-#define local_inc_not_zero(l) local_add_unless((l), 1, 0)
+static __inline__ long local_xchg(local_t *l, long n)
+{
+ long t;
+ unsigned long flags;
-#define local_sub_and_test(a, l) (local_sub_return((a), (l)) == 0)
-#define local_dec_and_test(l) (local_dec_return((l)) == 0)
+ powerpc_local_irq_pmu_save(flags);
+ t = l->v;
+ l->v = n;
+ powerpc_local_irq_pmu_restore(flags);
-/*
- * Atomically test *l and decrement if it is greater than 0.
- * The function returns the old value of *l minus 1.
+ return t;
+}
+
+/**
+ * local_add_unless - add unless the number is already a given value
+ * @l: pointer of type local_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @l, if @v was not already @u.
+ * Returns true if the addition was done.
*/
-static __inline__ long local_dec_if_positive(local_t *l)
+static __inline__ bool local_add_unless(local_t *l, long a, long u)
{
- long t;
+ unsigned long flags;
+ bool ret = false;
- __asm__ __volatile__(
-"1:" PPC_LLARX(%0,0,%1,0) " # local_dec_if_positive\n\
- cmpwi %0,1\n\
- addi %0,%0,-1\n\
- blt- 2f\n"
- PPC405_ERR77(0,%1)
- PPC_STLCX "%0,0,%1\n\
- bne- 1b"
- "\n\
-2:" : "=&b" (t)
- : "r" (&(l->a.counter))
- : "cc", "memory");
+ powerpc_local_irq_pmu_save(flags);
+ if (l->v != u) {
+ l->v += a;
+ ret = true;
+ }
+ powerpc_local_irq_pmu_restore(flags);
- return t;
+ return ret;
}
+#define local_inc_not_zero(l) local_add_unless((l), 1, 0)
+
/* Use these for per-cpu local_t variables: on some archs they are
* much more efficient than these naive implementations. Note they take
* a variable, not an address.
*/
-#define __local_inc(l) ((l)->a.counter++)
-#define __local_dec(l) ((l)->a.counter++)
-#define __local_add(i,l) ((l)->a.counter+=(i))
-#define __local_sub(i,l) ((l)->a.counter-=(i))
+#define __local_inc(l) ((l)->v++)
+#define __local_dec(l) ((l)->v++)
+#define __local_add(i,l) ((l)->v+=(i))
+#define __local_sub(i,l) ((l)->v-=(i))
+
+#else /* CONFIG_PPC64 */
+
+#include <asm-generic/local.h>
+
+#endif /* CONFIG_PPC64 */
#endif /* _ARCH_POWERPC_LOCAL_H */
diff --git a/arch/powerpc/include/asm/local64.h b/arch/powerpc/include/asm/local64.h
deleted file mode 100644
index 36c93b5cc239..000000000000
--- a/arch/powerpc/include/asm/local64.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/local64.h>
diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h
index d0a2a2f99564..f40a646bee3c 100644
--- a/arch/powerpc/include/asm/lppaca.h
+++ b/arch/powerpc/include/asm/lppaca.h
@@ -1,23 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* lppaca.h
* Copyright (C) 2001 Mike Corrigan IBM Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _ASM_POWERPC_LPPACA_H
#define _ASM_POWERPC_LPPACA_H
+
#ifdef __KERNEL__
/*
@@ -34,16 +22,20 @@
#include <linux/threads.h>
#include <asm/types.h>
#include <asm/mmu.h>
+#include <asm/firmware.h>
+#include <asm/paca.h>
/*
- * We only have to have statically allocated lppaca structs on
- * legacy iSeries, which supports at most 64 cpus.
- */
-#define NR_LPPACAS 1
-
-/*
- * The Hypervisor barfs if the lppaca crosses a page boundary. A 1k
- * alignment is sufficient to prevent this
+ * The lppaca is the "virtual processor area" registered with the hypervisor,
+ * H_REGISTER_VPA etc.
+ *
+ * According to PAPR, the structure is 640 bytes long, must be L1 cache line
+ * aligned, and must not cross a 4kB boundary. Its size field must be at
+ * least 640 bytes (but may be more).
+ *
+ * Pre-v4.14 KVM hypervisors reject the VPA if its size field is smaller than
+ * 1kB, so we dynamically allocate 1kB and advertise size as 1kB, but keep
+ * this structure as the canonical 640 byte size.
*/
struct lppaca {
/* cacheline 1 contains read-only data */
@@ -70,7 +62,8 @@ struct lppaca {
u8 donate_dedicated_cpu; /* Donate dedicated CPU cycles */
u8 fpregs_in_use;
u8 pmcregs_in_use;
- u8 reserved8[28];
+ u8 l2_counters_enable; /* Enable usage of counters for KVM guest */
+ u8 reserved8[27];
__be64 wait_state_cycles; /* Wait cycles for this proc */
u8 reserved9[28];
__be16 slb_count; /* # of SLBs to maintain */
@@ -91,19 +84,25 @@ struct lppaca {
volatile __be32 dispersion_count; /* dispatch changed physical cpu */
volatile __be64 cmo_faults; /* CMO page fault count */
volatile __be64 cmo_fault_time; /* CMO page fault time */
- u8 reserved10[104];
+ u8 reserved10[64]; /* [S]PURR expropriated/donated */
+ volatile __be64 enqueue_dispatch_tb; /* Total TB enqueue->dispatch */
+ volatile __be64 ready_enqueue_tb; /* Total TB ready->enqueue */
+ volatile __be64 wait_ready_tb; /* Total TB wait->ready */
+ u8 reserved11[16];
/* cacheline 4-5 */
__be32 page_ins; /* CMO Hint - # page ins by OS */
- u8 reserved11[148];
- volatile __be64 dtl_idx; /* Dispatch Trace Log head index */
- u8 reserved12[96];
-} __attribute__((__aligned__(0x400)));
-
-extern struct lppaca lppaca[];
+ u8 reserved12[28];
+ volatile __be64 l1_to_l2_cs_tb;
+ volatile __be64 l2_to_l1_cs_tb;
+ volatile __be64 l2_runtime_tb;
+ u8 reserved13[96];
+ volatile __be64 dtl_idx; /* Dispatch Trace Log head index */
+ u8 reserved14[96];
+} ____cacheline_aligned;
-#define lppaca_of(cpu) (*paca[cpu].lppaca_ptr)
+#define lppaca_of(cpu) (*paca_ptrs[cpu]->lppaca_ptr)
/*
* We are using a non architected field to determine if a partition is
@@ -112,11 +111,23 @@ extern struct lppaca lppaca[];
*/
#define LPPACA_OLD_SHARED_PROC 2
-static inline bool lppaca_shared_proc(struct lppaca *l)
+#ifdef CONFIG_PPC_PSERIES
+/*
+ * All CPUs should have the same shared proc value, so directly access the PACA
+ * to avoid false positives from DEBUG_PREEMPT.
+ */
+static inline bool lppaca_shared_proc(void)
{
+ struct lppaca *l = local_paca->lppaca_ptr;
+
+ if (!firmware_has_feature(FW_FEATURE_SPLPAR))
+ return false;
return !!(l->__old_status & LPPACA_OLD_SHARED_PROC);
}
+#define get_lppaca() (get_paca()->lppaca_ptr)
+#endif
+
/*
* SLB shadow buffer structure as defined in the PAPR. The save_area
* contains adjacent ESID and VSID pairs for each shadowed SLB. The
@@ -132,35 +143,6 @@ struct slb_shadow {
} save_area[SLB_NUM_BOLTED];
} ____cacheline_aligned;
-/*
- * Layout of entries in the hypervisor's dispatch trace log buffer.
- */
-struct dtl_entry {
- u8 dispatch_reason;
- u8 preempt_reason;
- __be16 processor_id;
- __be32 enqueue_to_dispatch_time;
- __be32 ready_to_enqueue_time;
- __be32 waiting_to_ready_time;
- __be64 timebase;
- __be64 fault_addr;
- __be64 srr0;
- __be64 srr1;
-};
-
-#define DISPATCH_LOG_BYTES 4096 /* bytes per cpu */
-#define N_DISPATCH_LOG (DISPATCH_LOG_BYTES / sizeof(struct dtl_entry))
-
-extern struct kmem_cache *dtl_cache;
-
-/*
- * When CONFIG_VIRT_CPU_ACCOUNTING_NATIVE = y, the cpu accounting code controls
- * reading from the dispatch trace log. If other code wants to consume
- * DTL entries, it can set this pointer to a function that will get
- * called once for each DTL entry that gets processed.
- */
-extern void (*dtl_consumer)(struct dtl_entry *entry, u64 index);
-
#endif /* CONFIG_PPC_BOOK3S */
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_LPPACA_H */
diff --git a/arch/powerpc/include/asm/lv1call.h b/arch/powerpc/include/asm/lv1call.h
index f5117674bf92..ae70120953a8 100644
--- a/arch/powerpc/include/asm/lv1call.h
+++ b/arch/powerpc/include/asm/lv1call.h
@@ -1,28 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* PS3 hvcall interface.
*
* Copyright (C) 2006 Sony Computer Entertainment Inc.
* Copyright 2006 Sony Corp.
* Copyright 2003, 2004 (c) MontaVista Software, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#if !defined(_ASM_POWERPC_LV1CALL_H)
#define _ASM_POWERPC_LV1CALL_H
-#if !defined(__ASSEMBLY__)
+#if !defined(__ASSEMBLER__)
#include <linux/types.h>
#include <linux/export.h>
@@ -223,7 +211,7 @@
{return _lv1_##name(LV1_##in##_IN_##out##_OUT_ARGS);}
#endif
-#endif /* !defined(__ASSEMBLY__) */
+#endif /* !defined(__ASSEMBLER__) */
/* lv1 call table */
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index b125ceab149c..3298eec123a3 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -1,161 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _ASM_POWERPC_MACHDEP_H
#define _ASM_POWERPC_MACHDEP_H
#ifdef __KERNEL__
-/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/seq_file.h>
+#include <linux/compiler.h>
#include <linux/init.h>
-#include <linux/dma-mapping.h>
#include <linux/export.h>
+#include <linux/time64.h>
-#include <asm/setup.h>
-
-/* We export this macro for external modules like Alsa to know if
- * ppc_md.feature_call is implemented or not
- */
-#define CONFIG_PPC_HAS_FEATURE_CALLS
+#include <asm/page.h>
struct pt_regs;
-struct pci_bus;
+struct pci_bus;
+struct device;
struct device_node;
struct iommu_table;
struct rtc_time;
struct file;
+struct pci_dev;
struct pci_controller;
struct kimage;
struct pci_host_bridge;
+struct seq_file;
struct machdep_calls {
- char *name;
+ const char *name;
+ const char *compatible;
+ const char * const *compatibles;
#ifdef CONFIG_PPC64
- void (*hpte_invalidate)(unsigned long slot,
- unsigned long vpn,
- int bpsize, int apsize,
- int ssize, int local);
- long (*hpte_updatepp)(unsigned long slot,
- unsigned long newpp,
- unsigned long vpn,
- int bpsize, int apsize,
- int ssize, int local);
- void (*hpte_updateboltedpp)(unsigned long newpp,
- unsigned long ea,
- int psize, int ssize);
- long (*hpte_insert)(unsigned long hpte_group,
- unsigned long vpn,
- unsigned long prpn,
- unsigned long rflags,
- unsigned long vflags,
- int psize, int apsize,
- int ssize);
- long (*hpte_remove)(unsigned long hpte_group);
- void (*hpte_removebolted)(unsigned long ea,
- int psize, int ssize);
- void (*flush_hash_range)(unsigned long number, int local);
- void (*hugepage_invalidate)(unsigned long vsid,
- unsigned long addr,
- unsigned char *hpte_slot_array,
- int psize, int ssize);
- /* special for kexec, to be called in real mode, linear mapping is
- * destroyed as well */
- void (*hpte_clear_all)(void);
-
- int (*tce_build)(struct iommu_table *tbl,
- long index,
- long npages,
- unsigned long uaddr,
- enum dma_data_direction direction,
- struct dma_attrs *attrs);
- void (*tce_free)(struct iommu_table *tbl,
- long index,
- long npages);
- unsigned long (*tce_get)(struct iommu_table *tbl,
- long index);
- void (*tce_flush)(struct iommu_table *tbl);
-
- /* _rm versions are for real mode use only */
- int (*tce_build_rm)(struct iommu_table *tbl,
- long index,
- long npages,
- unsigned long uaddr,
- enum dma_data_direction direction,
- struct dma_attrs *attrs);
- void (*tce_free_rm)(struct iommu_table *tbl,
- long index,
- long npages);
- void (*tce_flush_rm)(struct iommu_table *tbl);
-
- void __iomem * (*ioremap)(phys_addr_t addr, unsigned long size,
- unsigned long flags, void *caller);
- void (*iounmap)(volatile void __iomem *token);
-
#ifdef CONFIG_PM
- void (*iommu_save)(void);
void (*iommu_restore)(void);
#endif
-#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
+#ifdef CONFIG_MEMORY_HOTPLUG
unsigned long (*memory_block_size)(void);
#endif
#endif /* CONFIG_PPC64 */
- void (*pci_dma_dev_setup)(struct pci_dev *dev);
- void (*pci_dma_bus_setup)(struct pci_bus *bus);
-
- /* Platform set_dma_mask and dma_get_required_mask overrides */
- int (*dma_set_mask)(struct device *dev, u64 dma_mask);
- u64 (*dma_get_required_mask)(struct device *dev);
+ void (*dma_set_mask)(struct device *dev, u64 dma_mask);
int (*probe)(void);
void (*setup_arch)(void); /* Optional, may be NULL */
- void (*init_early)(void);
/* Optional, may be NULL. */
void (*show_cpuinfo)(struct seq_file *m);
- void (*show_percpuinfo)(struct seq_file *m, int i);
/* Returns the current operating frequency of "cpu" in Hz */
unsigned long (*get_proc_freq)(unsigned int cpu);
void (*init_IRQ)(void);
- /* Return an irq, or NO_IRQ to indicate there are none pending. */
+ /* Return an irq, or 0 to indicate there are none pending. */
unsigned int (*get_irq)(void);
/* PCI stuff */
- /* Called after scanning the bus, before allocating resources */
+ /* Called after allocating resources */
void (*pcibios_fixup)(void);
- int (*pci_probe_mode)(struct pci_bus *);
void (*pci_irq_fixup)(struct pci_dev *dev);
int (*pcibios_root_bridge_prepare)(struct pci_host_bridge
*bridge);
+ /* finds all the pci_controllers present at boot */
+ void (*discover_phbs)(void);
+
/* To setup PHBs when using automatic OF platform driver for PCI */
int (*pci_setup_phb)(struct pci_controller *host);
-#ifdef CONFIG_PCI_MSI
- int (*msi_check_device)(struct pci_dev* dev,
- int nvec, int type);
- int (*setup_msi_irqs)(struct pci_dev *dev,
- int nvec, int type);
- void (*teardown_msi_irqs)(struct pci_dev *dev);
-#endif
-
- void (*restart)(char *cmd);
- void (*power_off)(void);
- void (*halt)(void);
+ void __noreturn (*restart)(char *cmd);
+ void __noreturn (*halt)(void);
void (*panic)(char *str);
- void (*cpu_die)(void);
long (*time_init)(void); /* Optional, may be NULL */
int (*set_rtc_time)(struct rtc_time *);
void (*get_rtc_time)(struct rtc_time *);
- unsigned long (*get_boot_time)(void);
- unsigned char (*rtc_read_val)(int addr);
- void (*rtc_write_val)(int addr, unsigned char val);
+ time64_t (*get_boot_time)(void);
void (*calibrate_decr)(void);
@@ -167,8 +83,8 @@ struct machdep_calls {
unsigned char (*nvram_read_val)(int addr);
void (*nvram_write_val)(int addr, unsigned char val);
ssize_t (*nvram_write)(char *buf, size_t count, loff_t *index);
- ssize_t (*nvram_read)(char *buf, size_t count, loff_t *index);
- ssize_t (*nvram_size)(void);
+ ssize_t (*nvram_read)(char *buf, size_t count, loff_t *index);
+ ssize_t (*nvram_size)(void);
void (*nvram_sync)(void);
/* Exception handlers */
@@ -178,22 +94,24 @@ struct machdep_calls {
/* Early exception handlers called in realmode */
int (*hmi_exception_early)(struct pt_regs *regs);
+ long (*machine_check_early)(struct pt_regs *regs);
/* Called during machine check exception to retrive fixup address. */
bool (*mce_check_early_recovery)(struct pt_regs *regs);
+ void (*machine_check_log_err)(void);
+
/* Motherboard/chipset features. This is a kind of general purpose
* hook used to control some machine specific features (like reset
* lines, chip power control, etc...).
*/
long (*feature_call)(unsigned int feature, ...);
- /* Get legacy PCI/IDE interrupt mapping */
+ /* Get legacy PCI/IDE interrupt mapping */
int (*pci_get_legacy_ide_irq)(struct pci_dev *dev, int channel);
-
+
/* Get access protection for /dev/mem */
- pgprot_t (*phys_mem_access_prot)(struct file *file,
- unsigned long pfn,
+ pgprot_t (*phys_mem_access_prot)(unsigned long pfn,
unsigned long size,
pgprot_t vma_prot);
@@ -207,12 +125,12 @@ struct machdep_calls {
platform, called once per cpu. */
void (*enable_pmcs)(void);
- /* Set DABR for this platform, leave empty for default implemenation */
+ /* Set DABR for this platform, leave empty for default implementation */
int (*set_dabr)(unsigned long dabr,
unsigned long dabrx);
- /* Set DAWR for this platform, leave empty for default implemenation */
- int (*set_dawr)(unsigned long dawr,
+ /* Set DAWR for this platform, leave empty for default implementation */
+ int (*set_dawr)(int nr, unsigned long dawr,
unsigned long dawrx);
#ifdef CONFIG_PPC32 /* XXX for now */
@@ -220,8 +138,6 @@ struct machdep_calls {
May be NULL. */
void (*init)(void);
- void (*kgdb_map_scc)(void);
-
/*
* optional PCI "hooks"
*/
@@ -240,40 +156,38 @@ struct machdep_calls {
/* Called for each PCI bus in the system when it's probed */
void (*pcibios_fixup_bus)(struct pci_bus *);
- /* Called when pci_enable_device() is called. Returns 0 to
- * allow assignment/enabling of the device. */
- int (*pcibios_enable_device_hook)(struct pci_dev *);
-
/* Called after scan and before resource survey */
void (*pcibios_fixup_phb)(struct pci_controller *hose);
- /* Called during PCI resource reassignment */
- resource_size_t (*pcibios_window_alignment)(struct pci_bus *, unsigned long type);
+ /*
+ * Called after device has been added to bus and
+ * before sysfs has been created.
+ */
+ void (*pcibios_bus_add_device)(struct pci_dev *pdev);
- /* Reset the secondary bus of bridge */
- void (*pcibios_reset_secondary_bus)(struct pci_dev *dev);
+ resource_size_t (*pcibios_default_alignment)(void);
+
+#ifdef CONFIG_PCI_IOV
+ void (*pcibios_fixup_sriov)(struct pci_dev *pdev);
+ resource_size_t (*pcibios_iov_resource_alignment)(struct pci_dev *, int resno);
+ int (*pcibios_sriov_enable)(struct pci_dev *pdev, u16 num_vfs);
+ int (*pcibios_sriov_disable)(struct pci_dev *pdev);
+#endif /* CONFIG_PCI_IOV */
/* Called to shutdown machine specific hardware not already controlled
* by other drivers.
*/
void (*machine_shutdown)(void);
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
void (*kexec_cpu_down)(int crash_shutdown, int secondary);
- /* Called to do what every setup is needed on image and the
- * reboot code buffer. Returns 0 on success.
- * Provide your own (maybe dummy) implementation if your platform
- * claims to support kexec.
- */
- int (*machine_kexec_prepare)(struct kimage *image);
-
/* Called to perform the _real_ kexec.
* Do NOT allocate memory or fail here. We are past the point of
* no return.
*/
void (*machine_kexec)(struct kimage *image);
-#endif /* CONFIG_KEXEC */
+#endif /* CONFIG_KEXEC_CORE */
#ifdef CONFIG_SUSPEND
/* These are called to disable and enable, respectively, IRQs when
@@ -284,27 +198,18 @@ struct machdep_calls {
void (*suspend_disable_irqs)(void);
void (*suspend_enable_irqs)(void);
#endif
- int (*suspend_disable_cpu)(void);
#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
ssize_t (*cpu_probe)(const char *, size_t);
ssize_t (*cpu_release)(const char *, size_t);
#endif
-#ifdef CONFIG_ARCH_RANDOM
- int (*get_random_long)(unsigned long *v);
-#endif
-
-#ifdef CONFIG_MEMORY_HOTREMOVE
- int (*remove_memory)(u64, u64);
-#endif
+ int (*get_random_seed)(unsigned long *v);
};
extern void e500_idle(void);
extern void power4_idle(void);
-extern void power7_idle(void);
extern void ppc6xx_idle(void);
-extern void book3e_idle(void);
/*
* ppc_md contains a copy of the machine description structure for the
@@ -314,48 +219,24 @@ extern void book3e_idle(void);
extern struct machdep_calls ppc_md;
extern struct machdep_calls *machine_id;
-#define __machine_desc __attribute__ ((__section__ (".machine.desc")))
+#define __machine_desc __section(".machine.desc")
#define define_machine(name) \
extern struct machdep_calls mach_##name; \
EXPORT_SYMBOL(mach_##name); \
struct machdep_calls mach_##name __machine_desc =
-#define machine_is(name) \
- ({ \
- extern struct machdep_calls mach_##name \
- __attribute__((weak)); \
- machine_id == &mach_##name; \
- })
-
-extern void probe_machine(void);
-
-extern char cmd_line[COMMAND_LINE_SIZE];
+static inline bool __machine_is(const struct machdep_calls *md)
+{
+ WARN_ON(!machine_id); // complain if used before probe_machine()
+ return machine_id == md;
+}
-#ifdef CONFIG_PPC_PMAC
-/*
- * Power macintoshes have either a CUDA, PMU or SMU controlling
- * system reset, power, NVRAM, RTC.
- */
-typedef enum sys_ctrler_kind {
- SYS_CTRLER_UNKNOWN = 0,
- SYS_CTRLER_CUDA = 1,
- SYS_CTRLER_PMU = 2,
- SYS_CTRLER_SMU = 3,
-} sys_ctrler_t;
-extern sys_ctrler_t sys_ctrler;
-
-#endif /* CONFIG_PPC_PMAC */
-
-
-/* Functions to produce codes on the leds.
- * The SRC code should be unique for the message category and should
- * be limited to the lower 24 bits (the upper 8 are set by these funcs),
- * and (for boot & dump) should be sorted numerically in the order
- * the events occur.
- */
-/* Print a boot progress message. */
-void ppc64_boot_msg(unsigned int src, const char *msg);
+#define machine_is(name) \
+ ({ \
+ extern struct machdep_calls mach_##name __weak; \
+ __machine_is(&mach_##name); \
+ })
static inline void log_error(char *buf, unsigned int err_type, int fatal)
{
diff --git a/arch/powerpc/include/asm/macio.h b/arch/powerpc/include/asm/macio.h
index 27af7f8bbb8d..9203ff6acbf6 100644
--- a/arch/powerpc/include/asm/macio.h
+++ b/arch/powerpc/include/asm/macio.h
@@ -1,10 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __MACIO_ASIC_H__
#define __MACIO_ASIC_H__
#ifdef __KERNEL__
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
-extern struct bus_type macio_bus_type;
+extern const struct bus_type macio_bus_type;
/* MacIO device driver is defined later */
struct macio_driver;
@@ -124,7 +126,7 @@ static inline struct pci_dev *macio_get_pci_dev(struct macio_dev *mdev)
struct macio_driver
{
int (*probe)(struct macio_dev* dev, const struct of_device_id *match);
- int (*remove)(struct macio_dev* dev);
+ void (*remove)(struct macio_dev *dev);
int (*suspend)(struct macio_dev* dev, pm_message_t state);
int (*resume)(struct macio_dev* dev);
diff --git a/arch/powerpc/include/asm/mc146818rtc.h b/arch/powerpc/include/asm/mc146818rtc.h
index f2741c8b59a1..d9e4ecd41009 100644
--- a/arch/powerpc/include/asm/mc146818rtc.h
+++ b/arch/powerpc/include/asm/mc146818rtc.h
@@ -1,13 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _ASM_POWERPC_MC146818RTC_H
#define _ASM_POWERPC_MC146818RTC_H
/*
* Machine dependent access functions for RTC registers.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#ifdef __KERNEL__
diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index f97d8cb6bdf6..c9f0936bd3c9 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -1,20 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Machine check exception header file.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
* Copyright 2013 IBM Corporation
* Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
*/
@@ -24,48 +11,6 @@
#include <linux/bitops.h>
-/*
- * Machine Check bits on power7 and power8
- */
-#define P7_SRR1_MC_LOADSTORE(srr1) ((srr1) & PPC_BIT(42)) /* P8 too */
-
-/* SRR1 bits for machine check (On Power7 and Power8) */
-#define P7_SRR1_MC_IFETCH(srr1) ((srr1) & PPC_BITMASK(43, 45)) /* P8 too */
-
-#define P7_SRR1_MC_IFETCH_UE (0x1 << PPC_BITLSHIFT(45)) /* P8 too */
-#define P7_SRR1_MC_IFETCH_SLB_PARITY (0x2 << PPC_BITLSHIFT(45)) /* P8 too */
-#define P7_SRR1_MC_IFETCH_SLB_MULTIHIT (0x3 << PPC_BITLSHIFT(45)) /* P8 too */
-#define P7_SRR1_MC_IFETCH_SLB_BOTH (0x4 << PPC_BITLSHIFT(45))
-#define P7_SRR1_MC_IFETCH_TLB_MULTIHIT (0x5 << PPC_BITLSHIFT(45)) /* P8 too */
-#define P7_SRR1_MC_IFETCH_UE_TLB_RELOAD (0x6 << PPC_BITLSHIFT(45)) /* P8 too */
-#define P7_SRR1_MC_IFETCH_UE_IFU_INTERNAL (0x7 << PPC_BITLSHIFT(45))
-
-/* SRR1 bits for machine check (On Power8) */
-#define P8_SRR1_MC_IFETCH_ERAT_MULTIHIT (0x4 << PPC_BITLSHIFT(45))
-
-/* DSISR bits for machine check (On Power7 and Power8) */
-#define P7_DSISR_MC_UE (PPC_BIT(48)) /* P8 too */
-#define P7_DSISR_MC_UE_TABLEWALK (PPC_BIT(49)) /* P8 too */
-#define P7_DSISR_MC_ERAT_MULTIHIT (PPC_BIT(52)) /* P8 too */
-#define P7_DSISR_MC_TLB_MULTIHIT_MFTLB (PPC_BIT(53)) /* P8 too */
-#define P7_DSISR_MC_SLB_PARITY_MFSLB (PPC_BIT(55)) /* P8 too */
-#define P7_DSISR_MC_SLB_MULTIHIT (PPC_BIT(56)) /* P8 too */
-#define P7_DSISR_MC_SLB_MULTIHIT_PARITY (PPC_BIT(57)) /* P8 too */
-
-/*
- * DSISR bits for machine check (Power8) in addition to above.
- * Secondary DERAT Multihit
- */
-#define P8_DSISR_MC_ERAT_MULTIHIT_SEC (PPC_BIT(54))
-
-/* SLB error bits */
-#define P7_DSISR_MC_SLB_ERRORS (P7_DSISR_MC_ERAT_MULTIHIT | \
- P7_DSISR_MC_SLB_PARITY_MFSLB | \
- P7_DSISR_MC_SLB_MULTIHIT | \
- P7_DSISR_MC_SLB_MULTIHIT_PARITY)
-
-#define P8_DSISR_MC_SLB_ERRORS (P7_DSISR_MC_SLB_ERRORS | \
- P8_DSISR_MC_ERAT_MULTIHIT_SEC)
enum MCE_Version {
MCE_V1 = 1,
};
@@ -73,7 +18,7 @@ enum MCE_Version {
enum MCE_Severity {
MCE_SEV_NO_ERROR = 0,
MCE_SEV_WARNING = 1,
- MCE_SEV_ERROR_SYNC = 2,
+ MCE_SEV_SEVERE = 2,
MCE_SEV_FATAL = 3,
};
@@ -85,6 +30,10 @@ enum MCE_Disposition {
enum MCE_Initiator {
MCE_INITIATOR_UNKNOWN = 0,
MCE_INITIATOR_CPU = 1,
+ MCE_INITIATOR_PCI = 2,
+ MCE_INITIATOR_ISA = 3,
+ MCE_INITIATOR_MEMORY= 4,
+ MCE_INITIATOR_POWERMGM = 5,
};
enum MCE_ErrorType {
@@ -93,6 +42,19 @@ enum MCE_ErrorType {
MCE_ERROR_TYPE_SLB = 2,
MCE_ERROR_TYPE_ERAT = 3,
MCE_ERROR_TYPE_TLB = 4,
+ MCE_ERROR_TYPE_USER = 5,
+ MCE_ERROR_TYPE_RA = 6,
+ MCE_ERROR_TYPE_LINK = 7,
+ MCE_ERROR_TYPE_DCACHE = 8,
+ MCE_ERROR_TYPE_ICACHE = 9,
+};
+
+enum MCE_ErrorClass {
+ MCE_ECLASS_UNKNOWN = 0,
+ MCE_ECLASS_HARDWARE,
+ MCE_ECLASS_HARD_INDETERMINATE,
+ MCE_ECLASS_SOFTWARE,
+ MCE_ECLASS_SOFT_INDETERMINATE,
};
enum MCE_UeErrorType {
@@ -121,51 +83,106 @@ enum MCE_TlbErrorType {
MCE_TLB_ERROR_MULTIHIT = 2,
};
+enum MCE_UserErrorType {
+ MCE_USER_ERROR_INDETERMINATE = 0,
+ MCE_USER_ERROR_TLBIE = 1,
+ MCE_USER_ERROR_SCV = 2,
+};
+
+enum MCE_RaErrorType {
+ MCE_RA_ERROR_INDETERMINATE = 0,
+ MCE_RA_ERROR_IFETCH = 1,
+ MCE_RA_ERROR_IFETCH_FOREIGN = 2,
+ MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH = 3,
+ MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN = 4,
+ MCE_RA_ERROR_LOAD = 5,
+ MCE_RA_ERROR_STORE = 6,
+ MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE = 7,
+ MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN = 8,
+ MCE_RA_ERROR_LOAD_STORE_FOREIGN = 9,
+};
+
+enum MCE_LinkErrorType {
+ MCE_LINK_ERROR_INDETERMINATE = 0,
+ MCE_LINK_ERROR_IFETCH_TIMEOUT = 1,
+ MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT = 2,
+ MCE_LINK_ERROR_LOAD_TIMEOUT = 3,
+ MCE_LINK_ERROR_STORE_TIMEOUT = 4,
+ MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT = 5,
+};
+
struct machine_check_event {
- enum MCE_Version version:8; /* 0x00 */
- uint8_t in_use; /* 0x01 */
- enum MCE_Severity severity:8; /* 0x02 */
- enum MCE_Initiator initiator:8; /* 0x03 */
- enum MCE_ErrorType error_type:8; /* 0x04 */
- enum MCE_Disposition disposition:8; /* 0x05 */
- uint8_t reserved_1[2]; /* 0x06 */
- uint64_t gpr3; /* 0x08 */
- uint64_t srr0; /* 0x10 */
- uint64_t srr1; /* 0x18 */
- union { /* 0x20 */
+ enum MCE_Version version:8;
+ u8 in_use;
+ enum MCE_Severity severity:8;
+ enum MCE_Initiator initiator:8;
+ enum MCE_ErrorType error_type:8;
+ enum MCE_ErrorClass error_class:8;
+ enum MCE_Disposition disposition:8;
+ bool sync_error;
+ u16 cpu;
+ u64 gpr3;
+ u64 srr0;
+ u64 srr1;
+ union {
struct {
enum MCE_UeErrorType ue_error_type:8;
- uint8_t effective_address_provided;
- uint8_t physical_address_provided;
- uint8_t reserved_1[5];
- uint64_t effective_address;
- uint64_t physical_address;
- uint8_t reserved_2[8];
+ u8 effective_address_provided;
+ u8 physical_address_provided;
+ u8 ignore_event;
+ u8 reserved_1[4];
+ u64 effective_address;
+ u64 physical_address;
+ u8 reserved_2[8];
} ue_error;
struct {
enum MCE_SlbErrorType slb_error_type:8;
- uint8_t effective_address_provided;
- uint8_t reserved_1[6];
- uint64_t effective_address;
- uint8_t reserved_2[16];
+ u8 effective_address_provided;
+ u8 reserved_1[6];
+ u64 effective_address;
+ u8 reserved_2[16];
} slb_error;
struct {
enum MCE_EratErrorType erat_error_type:8;
- uint8_t effective_address_provided;
- uint8_t reserved_1[6];
- uint64_t effective_address;
- uint8_t reserved_2[16];
+ u8 effective_address_provided;
+ u8 reserved_1[6];
+ u64 effective_address;
+ u8 reserved_2[16];
} erat_error;
struct {
enum MCE_TlbErrorType tlb_error_type:8;
- uint8_t effective_address_provided;
- uint8_t reserved_1[6];
- uint64_t effective_address;
- uint8_t reserved_2[16];
+ u8 effective_address_provided;
+ u8 reserved_1[6];
+ u64 effective_address;
+ u8 reserved_2[16];
} tlb_error;
+
+ struct {
+ enum MCE_UserErrorType user_error_type:8;
+ u8 effective_address_provided;
+ u8 reserved_1[6];
+ u64 effective_address;
+ u8 reserved_2[16];
+ } user_error;
+
+ struct {
+ enum MCE_RaErrorType ra_error_type:8;
+ u8 effective_address_provided;
+ u8 reserved_1[6];
+ u64 effective_address;
+ u8 reserved_2[16];
+ } ra_error;
+
+ struct {
+ enum MCE_LinkErrorType link_error_type:8;
+ u8 effective_address_provided;
+ u8 reserved_1[6];
+ u64 effective_address;
+ u8 reserved_2[16];
+ } link_error;
} u;
};
@@ -176,23 +193,76 @@ struct mce_error_info {
enum MCE_SlbErrorType slb_error_type:8;
enum MCE_EratErrorType erat_error_type:8;
enum MCE_TlbErrorType tlb_error_type:8;
+ enum MCE_UserErrorType user_error_type:8;
+ enum MCE_RaErrorType ra_error_type:8;
+ enum MCE_LinkErrorType link_error_type:8;
} u;
- uint8_t reserved[2];
+ enum MCE_Severity severity:8;
+ enum MCE_Initiator initiator:8;
+ enum MCE_ErrorClass error_class:8;
+ bool sync_error;
+ bool ignore_event;
};
-#define MAX_MC_EVT 100
+#define MAX_MC_EVT 10
+
+struct mce_info {
+ int mce_nest_count;
+ struct machine_check_event mce_event[MAX_MC_EVT];
+ /* Queue for delayed MCE events. */
+ int mce_queue_count;
+ struct machine_check_event mce_event_queue[MAX_MC_EVT];
+ /* Queue for delayed MCE UE events. */
+ int mce_ue_count;
+ struct machine_check_event mce_ue_event_queue[MAX_MC_EVT];
+};
/* Release flags for get_mce_event() */
#define MCE_EVENT_RELEASE true
#define MCE_EVENT_DONTRELEASE false
+struct pt_regs;
+struct notifier_block;
+
extern void save_mce_event(struct pt_regs *regs, long handled,
struct mce_error_info *mce_err, uint64_t nip,
- uint64_t addr);
+ uint64_t addr, uint64_t phys_addr);
extern int get_mce_event(struct machine_check_event *mce, bool release);
extern void release_mce_event(void);
extern void machine_check_queue_event(void);
-extern void machine_check_print_event_info(struct machine_check_event *evt);
-extern uint64_t get_mce_fault_addr(struct machine_check_event *evt);
+extern void machine_check_print_event_info(struct machine_check_event *evt,
+ bool user_mode, bool in_guest);
+unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr);
+extern void mce_common_process_ue(struct pt_regs *regs,
+ struct mce_error_info *mce_err);
+void mce_irq_work_queue(void);
+int mce_register_notifier(struct notifier_block *nb);
+int mce_unregister_notifier(struct notifier_block *nb);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+void mce_run_irq_context_handlers(void);
+#else
+static inline void mce_run_irq_context_handlers(void) { };
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+#ifdef CONFIG_PPC_BOOK3S_64
+void set_mce_pending_irq_work(void);
+void clear_mce_pending_irq_work(void);
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+#ifdef CONFIG_PPC_BOOK3S_64
+void flush_and_reload_slb(void);
+void flush_erat(void);
+long __machine_check_early_realmode_p7(struct pt_regs *regs);
+long __machine_check_early_realmode_p8(struct pt_regs *regs);
+long __machine_check_early_realmode_p9(struct pt_regs *regs);
+long __machine_check_early_realmode_p10(struct pt_regs *regs);
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+#ifdef CONFIG_PPC_BOOK3S_64
+void mce_init(void);
+#else
+static inline void mce_init(void) { };
+#endif /* CONFIG_PPC_BOOK3S_64 */
#endif /* __ASM_PPC64_MCE_H__ */
diff --git a/arch/powerpc/include/asm/mediabay.h b/arch/powerpc/include/asm/mediabay.h
index 11037a4133ee..230fda4707b8 100644
--- a/arch/powerpc/include/asm/mediabay.h
+++ b/arch/powerpc/include/asm/mediabay.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* mediabay.h: definitions for using the media bay
* on PowerBook 3400 and similar computers.
diff --git a/arch/powerpc/include/asm/mem_encrypt.h b/arch/powerpc/include/asm/mem_encrypt.h
new file mode 100644
index 000000000000..2f26b8fc8d29
--- /dev/null
+++ b/arch/powerpc/include/asm/mem_encrypt.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * SVM helper functions
+ *
+ * Copyright 2018 IBM Corporation
+ */
+
+#ifndef _ASM_POWERPC_MEM_ENCRYPT_H
+#define _ASM_POWERPC_MEM_ENCRYPT_H
+
+#include <asm/svm.h>
+
+static inline bool force_dma_unencrypted(struct device *dev)
+{
+ return is_secure_guest();
+}
+
+int set_memory_encrypted(unsigned long addr, int numpages);
+int set_memory_decrypted(unsigned long addr, int numpages);
+
+#endif /* _ASM_POWERPC_MEM_ENCRYPT_H */
diff --git a/arch/powerpc/include/asm/membarrier.h b/arch/powerpc/include/asm/membarrier.h
new file mode 100644
index 000000000000..de7f79157918
--- /dev/null
+++ b/arch/powerpc/include/asm/membarrier.h
@@ -0,0 +1,28 @@
+#ifndef _ASM_POWERPC_MEMBARRIER_H
+#define _ASM_POWERPC_MEMBARRIER_H
+
+static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
+ struct mm_struct *next,
+ struct task_struct *tsk)
+{
+ /*
+ * Only need the full barrier when switching between processes.
+ * Barrier when switching from kernel to userspace is not
+ * required here, given that it is implied by mmdrop(). Barrier
+ * when switching from userspace to kernel is not needed after
+ * store to rq->curr.
+ */
+ if (IS_ENABLED(CONFIG_SMP) &&
+ likely(!(atomic_read(&next->membarrier_state) &
+ (MEMBARRIER_STATE_PRIVATE_EXPEDITED |
+ MEMBARRIER_STATE_GLOBAL_EXPEDITED)) || !prev))
+ return;
+
+ /*
+ * The membarrier system call requires a full memory barrier
+ * after storing to rq->curr, before going back to user-space.
+ */
+ smp_mb();
+}
+
+#endif /* _ASM_POWERPC_MEMBARRIER_H */
diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h
index 8565c254151a..912f78a956a1 100644
--- a/arch/powerpc/include/asm/mman.h
+++ b/arch/powerpc/include/asm/mman.h
@@ -1,44 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#ifndef _ASM_POWERPC_MMAN_H
#define _ASM_POWERPC_MMAN_H
#include <uapi/asm/mman.h>
-#ifdef CONFIG_PPC64
+#if defined(CONFIG_PPC64) && !defined(BUILD_VDSO)
#include <asm/cputable.h>
#include <linux/mm.h>
+#include <linux/pkeys.h>
+#include <asm/cpu_has_feature.h>
+#include <asm/firmware.h>
-/*
- * This file is included by linux/mman.h, so we can't use cacl_vm_prot_bits()
- * here. How important is the optimization?
- */
-static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot)
-{
- return (prot & PROT_SAO) ? VM_SAO : 0;
-}
-#define arch_calc_vm_prot_bits(prot) arch_calc_vm_prot_bits(prot)
-
-static inline pgprot_t arch_vm_get_page_prot(unsigned long vm_flags)
+static inline vm_flags_t arch_calc_vm_prot_bits(unsigned long prot,
+ unsigned long pkey)
{
- return (vm_flags & VM_SAO) ? __pgprot(_PAGE_SAO) : __pgprot(0);
+#ifdef CONFIG_PPC_MEM_KEYS
+ return (((prot & PROT_SAO) ? VM_SAO : 0) | pkey_to_vmflag_bits(pkey));
+#else
+ return ((prot & PROT_SAO) ? VM_SAO : 0);
+#endif
}
-#define arch_vm_get_page_prot(vm_flags) arch_vm_get_page_prot(vm_flags)
+#define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey)
-static inline int arch_validate_prot(unsigned long prot)
+static inline bool arch_validate_prot(unsigned long prot, unsigned long addr)
{
if (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM | PROT_SAO))
- return 0;
- if ((prot & PROT_SAO) && !cpu_has_feature(CPU_FTR_SAO))
- return 0;
- return 1;
+ return false;
+ if (prot & PROT_SAO) {
+ if (!cpu_has_feature(CPU_FTR_SAO))
+ return false;
+ if (firmware_has_feature(FW_FEATURE_LPAR) &&
+ !IS_ENABLED(CONFIG_PPC_PROT_SAO_LPAR))
+ return false;
+ }
+ return true;
}
-#define arch_validate_prot(prot) arch_validate_prot(prot)
+#define arch_validate_prot arch_validate_prot
#endif /* CONFIG_PPC64 */
#endif /* _ASM_POWERPC_MMAN_H */
diff --git a/arch/powerpc/include/asm/mmiowb.h b/arch/powerpc/include/asm/mmiowb.h
new file mode 100644
index 000000000000..74a00127eb20
--- /dev/null
+++ b/arch/powerpc/include/asm/mmiowb.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_MMIOWB_H
+#define _ASM_POWERPC_MMIOWB_H
+
+#ifdef CONFIG_MMIOWB
+
+#include <linux/compiler.h>
+#include <asm/barrier.h>
+#include <asm/paca.h>
+
+#define arch_mmiowb_state() (&local_paca->mmiowb_state)
+#define mmiowb() mb()
+
+#endif /* CONFIG_MMIOWB */
+
+#include <asm-generic/mmiowb.h>
+
+#endif /* _ASM_POWERPC_MMIOWB_H */
diff --git a/arch/powerpc/include/asm/mmu-40x.h b/arch/powerpc/include/asm/mmu-40x.h
deleted file mode 100644
index 34916865eaef..000000000000
--- a/arch/powerpc/include/asm/mmu-40x.h
+++ /dev/null
@@ -1,67 +0,0 @@
-#ifndef _ASM_POWERPC_MMU_40X_H_
-#define _ASM_POWERPC_MMU_40X_H_
-
-/*
- * PPC40x support
- */
-
-#define PPC40X_TLB_SIZE 64
-
-/*
- * TLB entries are defined by a "high" tag portion and a "low" data
- * portion. On all architectures, the data portion is 32-bits.
- *
- * TLB entries are managed entirely under software control by reading,
- * writing, and searchoing using the 4xx-specific tlbre, tlbwr, and tlbsx
- * instructions.
- */
-
-#define TLB_LO 1
-#define TLB_HI 0
-
-#define TLB_DATA TLB_LO
-#define TLB_TAG TLB_HI
-
-/* Tag portion */
-
-#define TLB_EPN_MASK 0xFFFFFC00 /* Effective Page Number */
-#define TLB_PAGESZ_MASK 0x00000380
-#define TLB_PAGESZ(x) (((x) & 0x7) << 7)
-#define PAGESZ_1K 0
-#define PAGESZ_4K 1
-#define PAGESZ_16K 2
-#define PAGESZ_64K 3
-#define PAGESZ_256K 4
-#define PAGESZ_1M 5
-#define PAGESZ_4M 6
-#define PAGESZ_16M 7
-#define TLB_VALID 0x00000040 /* Entry is valid */
-
-/* Data portion */
-
-#define TLB_RPN_MASK 0xFFFFFC00 /* Real Page Number */
-#define TLB_PERM_MASK 0x00000300
-#define TLB_EX 0x00000200 /* Instruction execution allowed */
-#define TLB_WR 0x00000100 /* Writes permitted */
-#define TLB_ZSEL_MASK 0x000000F0
-#define TLB_ZSEL(x) (((x) & 0xF) << 4)
-#define TLB_ATTR_MASK 0x0000000F
-#define TLB_W 0x00000008 /* Caching is write-through */
-#define TLB_I 0x00000004 /* Caching is inhibited */
-#define TLB_M 0x00000002 /* Memory is coherent */
-#define TLB_G 0x00000001 /* Memory is guarded from prefetch */
-
-#ifndef __ASSEMBLY__
-
-typedef struct {
- unsigned int id;
- unsigned int active;
- unsigned long vdso_base;
-} mm_context_t;
-
-#endif /* !__ASSEMBLY__ */
-
-#define mmu_virtual_psize MMU_PAGE_4K
-#define mmu_linear_psize MMU_PAGE_256M
-
-#endif /* _ASM_POWERPC_MMU_40X_H_ */
diff --git a/arch/powerpc/include/asm/mmu-hash32.h b/arch/powerpc/include/asm/mmu-hash32.h
deleted file mode 100644
index 16f513e5cbd7..000000000000
--- a/arch/powerpc/include/asm/mmu-hash32.h
+++ /dev/null
@@ -1,93 +0,0 @@
-#ifndef _ASM_POWERPC_MMU_HASH32_H_
-#define _ASM_POWERPC_MMU_HASH32_H_
-/*
- * 32-bit hash table MMU support
- */
-
-/*
- * BATs
- */
-
-/* Block size masks */
-#define BL_128K 0x000
-#define BL_256K 0x001
-#define BL_512K 0x003
-#define BL_1M 0x007
-#define BL_2M 0x00F
-#define BL_4M 0x01F
-#define BL_8M 0x03F
-#define BL_16M 0x07F
-#define BL_32M 0x0FF
-#define BL_64M 0x1FF
-#define BL_128M 0x3FF
-#define BL_256M 0x7FF
-
-/* BAT Access Protection */
-#define BPP_XX 0x00 /* No access */
-#define BPP_RX 0x01 /* Read only */
-#define BPP_RW 0x02 /* Read/write */
-
-#ifndef __ASSEMBLY__
-/* Contort a phys_addr_t into the right format/bits for a BAT */
-#ifdef CONFIG_PHYS_64BIT
-#define BAT_PHYS_ADDR(x) ((u32)((x & 0x00000000fffe0000ULL) | \
- ((x & 0x0000000e00000000ULL) >> 24) | \
- ((x & 0x0000000100000000ULL) >> 30)))
-#else
-#define BAT_PHYS_ADDR(x) (x)
-#endif
-
-struct ppc_bat {
- u32 batu;
- u32 batl;
-};
-#endif /* !__ASSEMBLY__ */
-
-/*
- * Hash table
- */
-
-/* Values for PP (assumes Ks=0, Kp=1) */
-#define PP_RWXX 0 /* Supervisor read/write, User none */
-#define PP_RWRX 1 /* Supervisor read/write, User read */
-#define PP_RWRW 2 /* Supervisor read/write, User read/write */
-#define PP_RXRX 3 /* Supervisor read, User read */
-
-#ifndef __ASSEMBLY__
-
-/*
- * Hardware Page Table Entry
- * Note that the xpn and x bitfields are used only by processors that
- * support extended addressing; otherwise, those bits are reserved.
- */
-struct hash_pte {
- unsigned long v:1; /* Entry is valid */
- unsigned long vsid:24; /* Virtual segment identifier */
- unsigned long h:1; /* Hash algorithm indicator */
- unsigned long api:6; /* Abbreviated page index */
- unsigned long rpn:20; /* Real (physical) page number */
- unsigned long xpn:3; /* Real page number bits 0-2, optional */
- unsigned long r:1; /* Referenced */
- unsigned long c:1; /* Changed */
- unsigned long w:1; /* Write-thru cache mode */
- unsigned long i:1; /* Cache inhibited */
- unsigned long m:1; /* Memory coherence */
- unsigned long g:1; /* Guarded */
- unsigned long x:1; /* Real page number bit 3, optional */
- unsigned long pp:2; /* Page protection */
-};
-
-typedef struct {
- unsigned long id;
- unsigned long vdso_base;
-} mm_context_t;
-
-#endif /* !__ASSEMBLY__ */
-
-/* We happily ignore the smaller BATs on 601, we don't actually use
- * those definitions on hash32 at the moment anyway
- */
-#define mmu_virtual_psize MMU_PAGE_4K
-#define mmu_linear_psize MMU_PAGE_256M
-
-#endif /* _ASM_POWERPC_MMU_HASH32_H_ */
diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
deleted file mode 100644
index d76514487d6f..000000000000
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ /dev/null
@@ -1,591 +0,0 @@
-#ifndef _ASM_POWERPC_MMU_HASH64_H_
-#define _ASM_POWERPC_MMU_HASH64_H_
-/*
- * PowerPC64 memory management structures
- *
- * Dave Engebretsen & Mike Corrigan <{engebret|mikejc}@us.ibm.com>
- * PPC64 rework.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <asm/asm-compat.h>
-#include <asm/page.h>
-
-/*
- * This is necessary to get the definition of PGTABLE_RANGE which we
- * need for various slices related matters. Note that this isn't the
- * complete pgtable.h but only a portion of it.
- */
-#include <asm/pgtable-ppc64.h>
-#include <asm/bug.h>
-#include <asm/processor.h>
-
-/*
- * SLB
- */
-
-#define SLB_NUM_BOLTED 3
-#define SLB_CACHE_ENTRIES 8
-#define SLB_MIN_SIZE 32
-
-/* Bits in the SLB ESID word */
-#define SLB_ESID_V ASM_CONST(0x0000000008000000) /* valid */
-
-/* Bits in the SLB VSID word */
-#define SLB_VSID_SHIFT 12
-#define SLB_VSID_SHIFT_1T 24
-#define SLB_VSID_SSIZE_SHIFT 62
-#define SLB_VSID_B ASM_CONST(0xc000000000000000)
-#define SLB_VSID_B_256M ASM_CONST(0x0000000000000000)
-#define SLB_VSID_B_1T ASM_CONST(0x4000000000000000)
-#define SLB_VSID_KS ASM_CONST(0x0000000000000800)
-#define SLB_VSID_KP ASM_CONST(0x0000000000000400)
-#define SLB_VSID_N ASM_CONST(0x0000000000000200) /* no-execute */
-#define SLB_VSID_L ASM_CONST(0x0000000000000100)
-#define SLB_VSID_C ASM_CONST(0x0000000000000080) /* class */
-#define SLB_VSID_LP ASM_CONST(0x0000000000000030)
-#define SLB_VSID_LP_00 ASM_CONST(0x0000000000000000)
-#define SLB_VSID_LP_01 ASM_CONST(0x0000000000000010)
-#define SLB_VSID_LP_10 ASM_CONST(0x0000000000000020)
-#define SLB_VSID_LP_11 ASM_CONST(0x0000000000000030)
-#define SLB_VSID_LLP (SLB_VSID_L|SLB_VSID_LP)
-
-#define SLB_VSID_KERNEL (SLB_VSID_KP)
-#define SLB_VSID_USER (SLB_VSID_KP|SLB_VSID_KS|SLB_VSID_C)
-
-#define SLBIE_C (0x08000000)
-#define SLBIE_SSIZE_SHIFT 25
-
-/*
- * Hash table
- */
-
-#define HPTES_PER_GROUP 8
-
-#define HPTE_V_SSIZE_SHIFT 62
-#define HPTE_V_AVPN_SHIFT 7
-#define HPTE_V_AVPN ASM_CONST(0x3fffffffffffff80)
-#define HPTE_V_AVPN_VAL(x) (((x) & HPTE_V_AVPN) >> HPTE_V_AVPN_SHIFT)
-#define HPTE_V_COMPARE(x,y) (!(((x) ^ (y)) & 0xffffffffffffff80UL))
-#define HPTE_V_BOLTED ASM_CONST(0x0000000000000010)
-#define HPTE_V_LOCK ASM_CONST(0x0000000000000008)
-#define HPTE_V_LARGE ASM_CONST(0x0000000000000004)
-#define HPTE_V_SECONDARY ASM_CONST(0x0000000000000002)
-#define HPTE_V_VALID ASM_CONST(0x0000000000000001)
-
-#define HPTE_R_PP0 ASM_CONST(0x8000000000000000)
-#define HPTE_R_TS ASM_CONST(0x4000000000000000)
-#define HPTE_R_KEY_HI ASM_CONST(0x3000000000000000)
-#define HPTE_R_RPN_SHIFT 12
-#define HPTE_R_RPN ASM_CONST(0x0ffffffffffff000)
-#define HPTE_R_PP ASM_CONST(0x0000000000000003)
-#define HPTE_R_N ASM_CONST(0x0000000000000004)
-#define HPTE_R_G ASM_CONST(0x0000000000000008)
-#define HPTE_R_M ASM_CONST(0x0000000000000010)
-#define HPTE_R_I ASM_CONST(0x0000000000000020)
-#define HPTE_R_W ASM_CONST(0x0000000000000040)
-#define HPTE_R_WIMG ASM_CONST(0x0000000000000078)
-#define HPTE_R_C ASM_CONST(0x0000000000000080)
-#define HPTE_R_R ASM_CONST(0x0000000000000100)
-#define HPTE_R_KEY_LO ASM_CONST(0x0000000000000e00)
-
-#define HPTE_V_1TB_SEG ASM_CONST(0x4000000000000000)
-#define HPTE_V_VRMA_MASK ASM_CONST(0x4001ffffff000000)
-
-/* Values for PP (assumes Ks=0, Kp=1) */
-#define PP_RWXX 0 /* Supervisor read/write, User none */
-#define PP_RWRX 1 /* Supervisor read/write, User read */
-#define PP_RWRW 2 /* Supervisor read/write, User read/write */
-#define PP_RXRX 3 /* Supervisor read, User read */
-#define PP_RXXX (HPTE_R_PP0 | 2) /* Supervisor read, user none */
-
-/* Fields for tlbiel instruction in architecture 2.06 */
-#define TLBIEL_INVAL_SEL_MASK 0xc00 /* invalidation selector */
-#define TLBIEL_INVAL_PAGE 0x000 /* invalidate a single page */
-#define TLBIEL_INVAL_SET_LPID 0x800 /* invalidate a set for current LPID */
-#define TLBIEL_INVAL_SET 0xc00 /* invalidate a set for all LPIDs */
-#define TLBIEL_INVAL_SET_MASK 0xfff000 /* set number to inval. */
-#define TLBIEL_INVAL_SET_SHIFT 12
-
-#define POWER7_TLB_SETS 128 /* # sets in POWER7 TLB */
-
-#ifndef __ASSEMBLY__
-
-struct hash_pte {
- __be64 v;
- __be64 r;
-};
-
-extern struct hash_pte *htab_address;
-extern unsigned long htab_size_bytes;
-extern unsigned long htab_hash_mask;
-
-/*
- * Page size definition
- *
- * shift : is the "PAGE_SHIFT" value for that page size
- * sllp : is a bit mask with the value of SLB L || LP to be or'ed
- * directly to a slbmte "vsid" value
- * penc : is the HPTE encoding mask for the "LP" field:
- *
- */
-struct mmu_psize_def
-{
- unsigned int shift; /* number of bits */
- int penc[MMU_PAGE_COUNT]; /* HPTE encoding */
- unsigned int tlbiel; /* tlbiel supported for that page size */
- unsigned long avpnm; /* bits to mask out in AVPN in the HPTE */
- unsigned long sllp; /* SLB L||LP (exact mask to use in slbmte) */
-};
-extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
-
-static inline int shift_to_mmu_psize(unsigned int shift)
-{
- int psize;
-
- for (psize = 0; psize < MMU_PAGE_COUNT; ++psize)
- if (mmu_psize_defs[psize].shift == shift)
- return psize;
- return -1;
-}
-
-static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
-{
- if (mmu_psize_defs[mmu_psize].shift)
- return mmu_psize_defs[mmu_psize].shift;
- BUG();
-}
-
-#endif /* __ASSEMBLY__ */
-
-/*
- * Segment sizes.
- * These are the values used by hardware in the B field of
- * SLB entries and the first dword of MMU hashtable entries.
- * The B field is 2 bits; the values 2 and 3 are unused and reserved.
- */
-#define MMU_SEGSIZE_256M 0
-#define MMU_SEGSIZE_1T 1
-
-/*
- * encode page number shift.
- * in order to fit the 78 bit va in a 64 bit variable we shift the va by
- * 12 bits. This enable us to address upto 76 bit va.
- * For hpt hash from a va we can ignore the page size bits of va and for
- * hpte encoding we ignore up to 23 bits of va. So ignoring lower 12 bits ensure
- * we work in all cases including 4k page size.
- */
-#define VPN_SHIFT 12
-
-/*
- * HPTE Large Page (LP) details
- */
-#define LP_SHIFT 12
-#define LP_BITS 8
-#define LP_MASK(i) ((0xFF >> (i)) << LP_SHIFT)
-
-#ifndef __ASSEMBLY__
-
-static inline int segment_shift(int ssize)
-{
- if (ssize == MMU_SEGSIZE_256M)
- return SID_SHIFT;
- return SID_SHIFT_1T;
-}
-
-/*
- * The current system page and segment sizes
- */
-extern int mmu_linear_psize;
-extern int mmu_virtual_psize;
-extern int mmu_vmalloc_psize;
-extern int mmu_vmemmap_psize;
-extern int mmu_io_psize;
-extern int mmu_kernel_ssize;
-extern int mmu_highuser_ssize;
-extern u16 mmu_slb_size;
-extern unsigned long tce_alloc_start, tce_alloc_end;
-
-/*
- * If the processor supports 64k normal pages but not 64k cache
- * inhibited pages, we have to be prepared to switch processes
- * to use 4k pages when they create cache-inhibited mappings.
- * If this is the case, mmu_ci_restrictions will be set to 1.
- */
-extern int mmu_ci_restrictions;
-
-/*
- * This computes the AVPN and B fields of the first dword of a HPTE,
- * for use when we want to match an existing PTE. The bottom 7 bits
- * of the returned value are zero.
- */
-static inline unsigned long hpte_encode_avpn(unsigned long vpn, int psize,
- int ssize)
-{
- unsigned long v;
- /*
- * The AVA field omits the low-order 23 bits of the 78 bits VA.
- * These bits are not needed in the PTE, because the
- * low-order b of these bits are part of the byte offset
- * into the virtual page and, if b < 23, the high-order
- * 23-b of these bits are always used in selecting the
- * PTEGs to be searched
- */
- v = (vpn >> (23 - VPN_SHIFT)) & ~(mmu_psize_defs[psize].avpnm);
- v <<= HPTE_V_AVPN_SHIFT;
- v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT;
- return v;
-}
-
-/*
- * This function sets the AVPN and L fields of the HPTE appropriately
- * using the base page size and actual page size.
- */
-static inline unsigned long hpte_encode_v(unsigned long vpn, int base_psize,
- int actual_psize, int ssize)
-{
- unsigned long v;
- v = hpte_encode_avpn(vpn, base_psize, ssize);
- if (actual_psize != MMU_PAGE_4K)
- v |= HPTE_V_LARGE;
- return v;
-}
-
-/*
- * This function sets the ARPN, and LP fields of the HPTE appropriately
- * for the page size. We assume the pa is already "clean" that is properly
- * aligned for the requested page size
- */
-static inline unsigned long hpte_encode_r(unsigned long pa, int base_psize,
- int actual_psize)
-{
- /* A 4K page needs no special encoding */
- if (actual_psize == MMU_PAGE_4K)
- return pa & HPTE_R_RPN;
- else {
- unsigned int penc = mmu_psize_defs[base_psize].penc[actual_psize];
- unsigned int shift = mmu_psize_defs[actual_psize].shift;
- return (pa & ~((1ul << shift) - 1)) | (penc << LP_SHIFT);
- }
-}
-
-/*
- * Build a VPN_SHIFT bit shifted va given VSID, EA and segment size.
- */
-static inline unsigned long hpt_vpn(unsigned long ea,
- unsigned long vsid, int ssize)
-{
- unsigned long mask;
- int s_shift = segment_shift(ssize);
-
- mask = (1ul << (s_shift - VPN_SHIFT)) - 1;
- return (vsid << (s_shift - VPN_SHIFT)) | ((ea >> VPN_SHIFT) & mask);
-}
-
-/*
- * This hashes a virtual address
- */
-static inline unsigned long hpt_hash(unsigned long vpn,
- unsigned int shift, int ssize)
-{
- int mask;
- unsigned long hash, vsid;
-
- /* VPN_SHIFT can be atmost 12 */
- if (ssize == MMU_SEGSIZE_256M) {
- mask = (1ul << (SID_SHIFT - VPN_SHIFT)) - 1;
- hash = (vpn >> (SID_SHIFT - VPN_SHIFT)) ^
- ((vpn & mask) >> (shift - VPN_SHIFT));
- } else {
- mask = (1ul << (SID_SHIFT_1T - VPN_SHIFT)) - 1;
- vsid = vpn >> (SID_SHIFT_1T - VPN_SHIFT);
- hash = vsid ^ (vsid << 25) ^
- ((vpn & mask) >> (shift - VPN_SHIFT)) ;
- }
- return hash & 0x7fffffffffUL;
-}
-
-extern int __hash_page_4K(unsigned long ea, unsigned long access,
- unsigned long vsid, pte_t *ptep, unsigned long trap,
- unsigned int local, int ssize, int subpage_prot);
-extern int __hash_page_64K(unsigned long ea, unsigned long access,
- unsigned long vsid, pte_t *ptep, unsigned long trap,
- unsigned int local, int ssize);
-struct mm_struct;
-unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap);
-extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap);
-int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
- pte_t *ptep, unsigned long trap, int local, int ssize,
- unsigned int shift, unsigned int mmu_psize);
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-extern int __hash_page_thp(unsigned long ea, unsigned long access,
- unsigned long vsid, pmd_t *pmdp, unsigned long trap,
- int local, int ssize, unsigned int psize);
-#else
-static inline int __hash_page_thp(unsigned long ea, unsigned long access,
- unsigned long vsid, pmd_t *pmdp,
- unsigned long trap, int local,
- int ssize, unsigned int psize)
-{
- BUG();
- return -1;
-}
-#endif
-extern void hash_failure_debug(unsigned long ea, unsigned long access,
- unsigned long vsid, unsigned long trap,
- int ssize, int psize, int lpsize,
- unsigned long pte);
-extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
- unsigned long pstart, unsigned long prot,
- int psize, int ssize);
-extern void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages);
-extern void demote_segment_4k(struct mm_struct *mm, unsigned long addr);
-
-extern void hpte_init_native(void);
-extern void hpte_init_lpar(void);
-extern void hpte_init_beat(void);
-extern void hpte_init_beat_v3(void);
-
-extern void slb_initialize(void);
-extern void slb_flush_and_rebolt(void);
-
-extern void slb_vmalloc_update(void);
-extern void slb_set_size(u16 size);
-#endif /* __ASSEMBLY__ */
-
-/*
- * VSID allocation (256MB segment)
- *
- * We first generate a 37-bit "proto-VSID". Proto-VSIDs are generated
- * from mmu context id and effective segment id of the address.
- *
- * For user processes max context id is limited to ((1ul << 19) - 5)
- * for kernel space, we use the top 4 context ids to map address as below
- * NOTE: each context only support 64TB now.
- * 0x7fffc - [ 0xc000000000000000 - 0xc0003fffffffffff ]
- * 0x7fffd - [ 0xd000000000000000 - 0xd0003fffffffffff ]
- * 0x7fffe - [ 0xe000000000000000 - 0xe0003fffffffffff ]
- * 0x7ffff - [ 0xf000000000000000 - 0xf0003fffffffffff ]
- *
- * The proto-VSIDs are then scrambled into real VSIDs with the
- * multiplicative hash:
- *
- * VSID = (proto-VSID * VSID_MULTIPLIER) % VSID_MODULUS
- *
- * VSID_MULTIPLIER is prime, so in particular it is
- * co-prime to VSID_MODULUS, making this a 1:1 scrambling function.
- * Because the modulus is 2^n-1 we can compute it efficiently without
- * a divide or extra multiply (see below). The scramble function gives
- * robust scattering in the hash table (at least based on some initial
- * results).
- *
- * We also consider VSID 0 special. We use VSID 0 for slb entries mapping
- * bad address. This enables us to consolidate bad address handling in
- * hash_page.
- *
- * We also need to avoid the last segment of the last context, because that
- * would give a protovsid of 0x1fffffffff. That will result in a VSID 0
- * because of the modulo operation in vsid scramble. But the vmemmap
- * (which is what uses region 0xf) will never be close to 64TB in size
- * (it's 56 bytes per page of system memory).
- */
-
-#define CONTEXT_BITS 19
-#define ESID_BITS 18
-#define ESID_BITS_1T 6
-
-/*
- * 256MB segment
- * The proto-VSID space has 2^(CONTEX_BITS + ESID_BITS) - 1 segments
- * available for user + kernel mapping. The top 4 contexts are used for
- * kernel mapping. Each segment contains 2^28 bytes. Each
- * context maps 2^46 bytes (64TB) so we can support 2^19-1 contexts
- * (19 == 37 + 28 - 46).
- */
-#define MAX_USER_CONTEXT ((ASM_CONST(1) << CONTEXT_BITS) - 5)
-
-/*
- * This should be computed such that protovosid * vsid_mulitplier
- * doesn't overflow 64 bits. It should also be co-prime to vsid_modulus
- */
-#define VSID_MULTIPLIER_256M ASM_CONST(12538073) /* 24-bit prime */
-#define VSID_BITS_256M (CONTEXT_BITS + ESID_BITS)
-#define VSID_MODULUS_256M ((1UL<<VSID_BITS_256M)-1)
-
-#define VSID_MULTIPLIER_1T ASM_CONST(12538073) /* 24-bit prime */
-#define VSID_BITS_1T (CONTEXT_BITS + ESID_BITS_1T)
-#define VSID_MODULUS_1T ((1UL<<VSID_BITS_1T)-1)
-
-
-#define USER_VSID_RANGE (1UL << (ESID_BITS + SID_SHIFT))
-
-/*
- * This macro generates asm code to compute the VSID scramble
- * function. Used in slb_allocate() and do_stab_bolted. The function
- * computed is: (protovsid*VSID_MULTIPLIER) % VSID_MODULUS
- *
- * rt = register continaing the proto-VSID and into which the
- * VSID will be stored
- * rx = scratch register (clobbered)
- *
- * - rt and rx must be different registers
- * - The answer will end up in the low VSID_BITS bits of rt. The higher
- * bits may contain other garbage, so you may need to mask the
- * result.
- */
-#define ASM_VSID_SCRAMBLE(rt, rx, size) \
- lis rx,VSID_MULTIPLIER_##size@h; \
- ori rx,rx,VSID_MULTIPLIER_##size@l; \
- mulld rt,rt,rx; /* rt = rt * MULTIPLIER */ \
- \
- srdi rx,rt,VSID_BITS_##size; \
- clrldi rt,rt,(64-VSID_BITS_##size); \
- add rt,rt,rx; /* add high and low bits */ \
- /* NOTE: explanation based on VSID_BITS_##size = 36 \
- * Now, r3 == VSID (mod 2^36-1), and lies between 0 and \
- * 2^36-1+2^28-1. That in particular means that if r3 >= \
- * 2^36-1, then r3+1 has the 2^36 bit set. So, if r3+1 has \
- * the bit clear, r3 already has the answer we want, if it \
- * doesn't, the answer is the low 36 bits of r3+1. So in all \
- * cases the answer is the low 36 bits of (r3 + ((r3+1) >> 36))*/\
- addi rx,rt,1; \
- srdi rx,rx,VSID_BITS_##size; /* extract 2^VSID_BITS bit */ \
- add rt,rt,rx
-
-/* 4 bits per slice and we have one slice per 1TB */
-#define SLICE_ARRAY_SIZE (PGTABLE_RANGE >> 41)
-
-#ifndef __ASSEMBLY__
-
-#ifdef CONFIG_PPC_SUBPAGE_PROT
-/*
- * For the sub-page protection option, we extend the PGD with one of
- * these. Basically we have a 3-level tree, with the top level being
- * the protptrs array. To optimize speed and memory consumption when
- * only addresses < 4GB are being protected, pointers to the first
- * four pages of sub-page protection words are stored in the low_prot
- * array.
- * Each page of sub-page protection words protects 1GB (4 bytes
- * protects 64k). For the 3-level tree, each page of pointers then
- * protects 8TB.
- */
-struct subpage_prot_table {
- unsigned long maxaddr; /* only addresses < this are protected */
- unsigned int **protptrs[(TASK_SIZE_USER64 >> 43)];
- unsigned int *low_prot[4];
-};
-
-#define SBP_L1_BITS (PAGE_SHIFT - 2)
-#define SBP_L2_BITS (PAGE_SHIFT - 3)
-#define SBP_L1_COUNT (1 << SBP_L1_BITS)
-#define SBP_L2_COUNT (1 << SBP_L2_BITS)
-#define SBP_L2_SHIFT (PAGE_SHIFT + SBP_L1_BITS)
-#define SBP_L3_SHIFT (SBP_L2_SHIFT + SBP_L2_BITS)
-
-extern void subpage_prot_free(struct mm_struct *mm);
-extern void subpage_prot_init_new_context(struct mm_struct *mm);
-#else
-static inline void subpage_prot_free(struct mm_struct *mm) {}
-static inline void subpage_prot_init_new_context(struct mm_struct *mm) { }
-#endif /* CONFIG_PPC_SUBPAGE_PROT */
-
-typedef unsigned long mm_context_id_t;
-struct spinlock;
-
-typedef struct {
- mm_context_id_t id;
- u16 user_psize; /* page size index */
-
-#ifdef CONFIG_PPC_MM_SLICES
- u64 low_slices_psize; /* SLB page size encodings */
- unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
-#else
- u16 sllp; /* SLB page size encoding */
-#endif
- unsigned long vdso_base;
-#ifdef CONFIG_PPC_SUBPAGE_PROT
- struct subpage_prot_table spt;
-#endif /* CONFIG_PPC_SUBPAGE_PROT */
-#ifdef CONFIG_PPC_ICSWX
- struct spinlock *cop_lockp; /* guard acop and cop_pid */
- unsigned long acop; /* mask of enabled coprocessor types */
- unsigned int cop_pid; /* pid value used with coprocessors */
-#endif /* CONFIG_PPC_ICSWX */
-#ifdef CONFIG_PPC_64K_PAGES
- /* for 4K PTE fragment support */
- void *pte_frag;
-#endif
-} mm_context_t;
-
-
-#if 0
-/*
- * The code below is equivalent to this function for arguments
- * < 2^VSID_BITS, which is all this should ever be called
- * with. However gcc is not clever enough to compute the
- * modulus (2^n-1) without a second multiply.
- */
-#define vsid_scramble(protovsid, size) \
- ((((protovsid) * VSID_MULTIPLIER_##size) % VSID_MODULUS_##size))
-
-#else /* 1 */
-#define vsid_scramble(protovsid, size) \
- ({ \
- unsigned long x; \
- x = (protovsid) * VSID_MULTIPLIER_##size; \
- x = (x >> VSID_BITS_##size) + (x & VSID_MODULUS_##size); \
- (x + ((x+1) >> VSID_BITS_##size)) & VSID_MODULUS_##size; \
- })
-#endif /* 1 */
-
-/* Returns the segment size indicator for a user address */
-static inline int user_segment_size(unsigned long addr)
-{
- /* Use 1T segments if possible for addresses >= 1T */
- if (addr >= (1UL << SID_SHIFT_1T))
- return mmu_highuser_ssize;
- return MMU_SEGSIZE_256M;
-}
-
-static inline unsigned long get_vsid(unsigned long context, unsigned long ea,
- int ssize)
-{
- /*
- * Bad address. We return VSID 0 for that
- */
- if ((ea & ~REGION_MASK) >= PGTABLE_RANGE)
- return 0;
-
- if (ssize == MMU_SEGSIZE_256M)
- return vsid_scramble((context << ESID_BITS)
- | (ea >> SID_SHIFT), 256M);
- return vsid_scramble((context << ESID_BITS_1T)
- | (ea >> SID_SHIFT_1T), 1T);
-}
-
-/*
- * This is only valid for addresses >= PAGE_OFFSET
- *
- * For kernel space, we use the top 4 context ids to map address as below
- * 0x7fffc - [ 0xc000000000000000 - 0xc0003fffffffffff ]
- * 0x7fffd - [ 0xd000000000000000 - 0xd0003fffffffffff ]
- * 0x7fffe - [ 0xe000000000000000 - 0xe0003fffffffffff ]
- * 0x7ffff - [ 0xf000000000000000 - 0xf0003fffffffffff ]
- */
-static inline unsigned long get_kernel_vsid(unsigned long ea, int ssize)
-{
- unsigned long context;
-
- /*
- * kernel take the top 4 context from the available range
- */
- context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1;
- return get_vsid(context, ea, ssize);
-}
-#endif /* __ASSEMBLY__ */
-
-#endif /* _ASM_POWERPC_MMU_HASH64_H_ */
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 3d5abfe6ba67..5f9c5d436e17 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -1,35 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_MMU_H_
#define _ASM_POWERPC_MMU_H_
#ifdef __KERNEL__
#include <linux/types.h>
-#include <asm/asm-compat.h>
-#include <asm/feature-fixups.h>
+#include <asm/asm-const.h>
/*
* MMU features bit definitions
*/
/*
- * First half is MMU families
+ * MMU families
*/
#define MMU_FTR_HPTE_TABLE ASM_CONST(0x00000001)
#define MMU_FTR_TYPE_8xx ASM_CONST(0x00000002)
-#define MMU_FTR_TYPE_40x ASM_CONST(0x00000004)
#define MMU_FTR_TYPE_44x ASM_CONST(0x00000008)
#define MMU_FTR_TYPE_FSL_E ASM_CONST(0x00000010)
#define MMU_FTR_TYPE_47x ASM_CONST(0x00000020)
+/* Radix page table supported and enabled */
+#define MMU_FTR_TYPE_RADIX ASM_CONST(0x00000040)
+
+/*
+ * Individual features below.
+ */
+
+/*
+ * Supports KUAP feature
+ * key 0 controlling userspace addresses on radix
+ * Key 3 on hash
+ */
+#define MMU_FTR_KUAP ASM_CONST(0x00000200)
+
/*
- * This is individual features
+ * Supports KUEP feature
+ * key 0 controlling userspace addresses on radix
+ * Key 3 on hash
*/
+#define MMU_FTR_BOOK3S_KUEP ASM_CONST(0x00000400)
+
+/*
+ * Support for memory protection keys.
+ */
+#define MMU_FTR_PKEY ASM_CONST(0x00000800)
+
+/* Guest Translation Shootdown Enable */
+#define MMU_FTR_GTSE ASM_CONST(0x00001000)
+
+/*
+ * Support for 68 bit VA space. We added that from ISA 2.05
+ */
+#define MMU_FTR_68_BIT_VA ASM_CONST(0x00002000)
+/*
+ * Kernel read only support.
+ * We added the ppp value 0b110 in ISA 2.04.
+ */
+#define MMU_FTR_KERNEL_RO ASM_CONST(0x00004000)
+
+/*
+ * We need to clear top 16bits of va (from the remaining 64 bits )in
+ * tlbie* instructions
+ */
+#define MMU_FTR_TLBIE_CROP_VA ASM_CONST(0x00008000)
/* Enable use of high BAT registers */
#define MMU_FTR_USE_HIGH_BATS ASM_CONST(0x00010000)
/* Enable >32-bit physical addresses on 32-bit processor, only used
- * by CONFIG_6xx currently as BookE supports that from day 1
+ * by CONFIG_PPC_BOOK3S_32 currently as BookE supports that from day 1
*/
#define MMU_FTR_BIG_PHYS ASM_CONST(0x00020000)
@@ -55,15 +95,6 @@
*/
#define MMU_FTR_NEED_DTLB_SW_LRU ASM_CONST(0x00200000)
-/* Enable use of TLB reservation. Processor should support tlbsrx.
- * instruction and MAS0[WQ].
- */
-#define MMU_FTR_USE_TLBRSRV ASM_CONST(0x00800000)
-
-/* Use paired MAS registers (MAS7||MAS3, etc.)
- */
-#define MMU_FTR_USE_PAIRED_MAS ASM_CONST(0x01000000)
-
/* Doesn't support the B bit (1T segment) in SLBIE
*/
#define MMU_FTR_NO_SLBIE_B ASM_CONST(0x02000000)
@@ -88,51 +119,189 @@
*/
#define MMU_FTR_1T_SEGMENT ASM_CONST(0x40000000)
+// NX paste RMA reject in DSI
+#define MMU_FTR_NX_DSI ASM_CONST(0x80000000)
+
/* MMU feature bit sets for various CPUs */
-#define MMU_FTRS_DEFAULT_HPTE_ARCH_V2 \
- MMU_FTR_HPTE_TABLE | MMU_FTR_PPCAS_ARCH_V2
-#define MMU_FTRS_POWER4 MMU_FTRS_DEFAULT_HPTE_ARCH_V2
-#define MMU_FTRS_PPC970 MMU_FTRS_POWER4
-#define MMU_FTRS_POWER5 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
-#define MMU_FTRS_POWER6 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
-#define MMU_FTRS_POWER7 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
-#define MMU_FTRS_POWER8 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
+#define MMU_FTRS_DEFAULT_HPTE_ARCH_V2 (MMU_FTR_HPTE_TABLE | MMU_FTR_TLBIEL | MMU_FTR_16M_PAGE)
+#define MMU_FTRS_POWER MMU_FTRS_DEFAULT_HPTE_ARCH_V2
+#define MMU_FTRS_PPC970 MMU_FTRS_POWER | MMU_FTR_TLBIE_CROP_VA
+#define MMU_FTRS_POWER5 MMU_FTRS_POWER | MMU_FTR_LOCKLESS_TLBIE
+#define MMU_FTRS_POWER6 MMU_FTRS_POWER5 | MMU_FTR_KERNEL_RO | MMU_FTR_68_BIT_VA
+#define MMU_FTRS_POWER7 MMU_FTRS_POWER6
+#define MMU_FTRS_POWER8 MMU_FTRS_POWER6
+#define MMU_FTRS_POWER9 MMU_FTRS_POWER6
+#define MMU_FTRS_POWER10 MMU_FTRS_POWER6
+#define MMU_FTRS_POWER11 MMU_FTRS_POWER6
#define MMU_FTRS_CELL MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \
MMU_FTR_CI_LARGE_PAGE
#define MMU_FTRS_PA6T MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \
MMU_FTR_CI_LARGE_PAGE | MMU_FTR_NO_SLBIE_B
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
+#include <linux/bug.h>
#include <asm/cputable.h>
+#include <asm/page.h>
-#ifdef CONFIG_PPC_FSL_BOOK3E
-#include <asm/percpu.h>
-DECLARE_PER_CPU(int, next_tlbcam_idx);
+typedef pte_t *pgtable_t;
+
+enum {
+ MMU_FTRS_POSSIBLE =
+#if defined(CONFIG_PPC_BOOK3S_604)
+ MMU_FTR_HPTE_TABLE |
+#endif
+#ifdef CONFIG_PPC_8xx
+ MMU_FTR_TYPE_8xx |
#endif
+#ifdef CONFIG_PPC_47x
+ MMU_FTR_TYPE_47x | MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL |
+#elif defined(CONFIG_44x)
+ MMU_FTR_TYPE_44x |
+#endif
+#ifdef CONFIG_PPC_E500
+ MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS | MMU_FTR_USE_TLBILX |
+#endif
+#ifdef CONFIG_PPC_BOOK3S_32
+ MMU_FTR_USE_HIGH_BATS |
+#endif
+#ifdef CONFIG_PPC_83xx
+ MMU_FTR_NEED_DTLB_SW_LRU |
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+ MMU_FTR_KERNEL_RO |
+#ifdef CONFIG_PPC_64S_HASH_MMU
+ MMU_FTR_NO_SLBIE_B | MMU_FTR_16M_PAGE | MMU_FTR_TLBIEL |
+ MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_CI_LARGE_PAGE |
+ MMU_FTR_1T_SEGMENT | MMU_FTR_TLBIE_CROP_VA |
+ MMU_FTR_68_BIT_VA | MMU_FTR_HPTE_TABLE |
+#endif
+#ifdef CONFIG_PPC_RADIX_MMU
+ MMU_FTR_TYPE_RADIX |
+ MMU_FTR_GTSE | MMU_FTR_NX_DSI |
+#endif /* CONFIG_PPC_RADIX_MMU */
+#endif
+#ifdef CONFIG_PPC_KUAP
+ MMU_FTR_KUAP |
+#endif /* CONFIG_PPC_KUAP */
+#ifdef CONFIG_PPC_MEM_KEYS
+ MMU_FTR_PKEY |
+#endif
+#ifdef CONFIG_PPC_KUEP
+ MMU_FTR_BOOK3S_KUEP |
+#endif /* CONFIG_PPC_KUAP */
+
+ 0,
+};
+
+#if defined(CONFIG_PPC_BOOK3S_604) && !defined(CONFIG_PPC_BOOK3S_603)
+#define MMU_FTRS_ALWAYS MMU_FTR_HPTE_TABLE
+#endif
+#ifdef CONFIG_PPC_8xx
+#define MMU_FTRS_ALWAYS MMU_FTR_TYPE_8xx
+#endif
+#ifdef CONFIG_PPC_47x
+#define MMU_FTRS_ALWAYS MMU_FTR_TYPE_47x
+#elif defined(CONFIG_44x)
+#define MMU_FTRS_ALWAYS MMU_FTR_TYPE_44x
+#endif
+#ifdef CONFIG_PPC_E500
+#define MMU_FTRS_ALWAYS MMU_FTR_TYPE_FSL_E
+#endif
+
+/* BOOK3S_64 options */
+#if defined(CONFIG_PPC_RADIX_MMU) && !defined(CONFIG_PPC_64S_HASH_MMU)
+#define MMU_FTRS_ALWAYS MMU_FTR_TYPE_RADIX
+#elif !defined(CONFIG_PPC_RADIX_MMU) && defined(CONFIG_PPC_64S_HASH_MMU)
+#define MMU_FTRS_ALWAYS MMU_FTR_HPTE_TABLE
+#endif
+
+#ifndef MMU_FTRS_ALWAYS
+#define MMU_FTRS_ALWAYS 0
+#endif
+
+static __always_inline bool early_mmu_has_feature(unsigned long feature)
+{
+ if (MMU_FTRS_ALWAYS & feature)
+ return true;
+
+ return !!(MMU_FTRS_POSSIBLE & cur_cpu_spec->mmu_features & feature);
+}
+
+#ifdef CONFIG_JUMP_LABEL_FEATURE_CHECKS
+#include <linux/jump_label.h>
+
+#define NUM_MMU_FTR_KEYS 32
+
+extern struct static_key_true mmu_feature_keys[NUM_MMU_FTR_KEYS];
+
+extern void mmu_feature_keys_init(void);
-static inline int mmu_has_feature(unsigned long feature)
+static __always_inline bool mmu_has_feature(unsigned long feature)
{
- return (cur_cpu_spec->mmu_features & feature);
+ int i;
+
+ BUILD_BUG_ON(!__builtin_constant_p(feature));
+ BUILD_BUG_ON(__builtin_popcountl(feature) > 1);
+
+#ifdef CONFIG_JUMP_LABEL_FEATURE_CHECK_DEBUG
+ if (!static_key_feature_checks_initialized) {
+ printk("Warning! mmu_has_feature() used prior to jump label init!\n");
+ dump_stack();
+ return early_mmu_has_feature(feature);
+ }
+#endif
+
+ if (MMU_FTRS_ALWAYS & feature)
+ return true;
+
+ if (!(MMU_FTRS_POSSIBLE & feature))
+ return false;
+
+ i = __builtin_ctzl(feature);
+ return static_branch_likely(&mmu_feature_keys[i]);
}
static inline void mmu_clear_feature(unsigned long feature)
{
+ int i;
+
+ i = __builtin_ctzl(feature);
cur_cpu_spec->mmu_features &= ~feature;
+ static_branch_disable(&mmu_feature_keys[i]);
}
+#else
-extern unsigned int __start___mmu_ftr_fixup, __stop___mmu_ftr_fixup;
+static inline void mmu_feature_keys_init(void)
+{
-/* MMU initialization */
-extern void early_init_mmu(void);
-extern void early_init_mmu_secondary(void);
+}
-extern void setup_initial_memory_limit(phys_addr_t first_memblock_base,
- phys_addr_t first_memblock_size);
+static __always_inline bool mmu_has_feature(unsigned long feature)
+{
+ return early_mmu_has_feature(feature);
+}
+
+static inline void mmu_clear_feature(unsigned long feature)
+{
+ cur_cpu_spec->mmu_features &= ~feature;
+}
+#endif /* CONFIG_JUMP_LABEL */
+
+extern unsigned int __start___mmu_ftr_fixup, __stop___mmu_ftr_fixup;
#ifdef CONFIG_PPC64
/* This is our real memory area size on ppc64 server, on embedded, we
* make it match the size our of bolted TLB area
*/
extern u64 ppc64_rma_size;
+
+/* Cleanup function used by kexec */
+extern void mmu_cleanup_all(void);
+extern void radix__mmu_cleanup_all(void);
+
+/* Functions for creating and updating partition table on POWER9 */
+extern void mmu_partition_table_init(void);
+extern void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
+ unsigned long dw1, bool flush);
#endif /* CONFIG_PPC64 */
struct mm_struct;
@@ -144,7 +313,26 @@ static inline void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
}
#endif /* !CONFIG_DEBUG_VM */
-#endif /* !__ASSEMBLY__ */
+static __always_inline bool radix_enabled(void)
+{
+ return mmu_has_feature(MMU_FTR_TYPE_RADIX);
+}
+
+static __always_inline bool early_radix_enabled(void)
+{
+ return early_mmu_has_feature(MMU_FTR_TYPE_RADIX);
+}
+
+static inline bool strict_kernel_rwx_enabled(void)
+{
+ return IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) && rodata_enabled;
+}
+
+static inline bool strict_module_rwx_enabled(void)
+{
+ return IS_ENABLED(CONFIG_STRICT_MODULE_RWX) && strict_kernel_rwx_enabled();
+}
+#endif /* !__ASSEMBLER__ */
/* The kernel use the constants below to index in the page sizes array.
* The use of fixed constants for this purpose is better for performances
@@ -167,39 +355,48 @@ static inline void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
#define MMU_PAGE_64K 2
#define MMU_PAGE_64K_AP 3 /* "Admixed pages" (hash64 only) */
#define MMU_PAGE_256K 4
-#define MMU_PAGE_1M 5
-#define MMU_PAGE_2M 6
-#define MMU_PAGE_4M 7
-#define MMU_PAGE_8M 8
-#define MMU_PAGE_16M 9
-#define MMU_PAGE_64M 10
-#define MMU_PAGE_256M 11
-#define MMU_PAGE_1G 12
-#define MMU_PAGE_16G 13
-#define MMU_PAGE_64G 14
-
-#define MMU_PAGE_COUNT 15
-
-#if defined(CONFIG_PPC_STD_MMU_64)
-/* 64-bit classic hash table MMU */
-# include <asm/mmu-hash64.h>
-#elif defined(CONFIG_PPC_STD_MMU_32)
-/* 32-bit classic hash table MMU */
-# include <asm/mmu-hash32.h>
-#elif defined(CONFIG_40x)
-/* 40x-style software loaded TLB */
-# include <asm/mmu-40x.h>
-#elif defined(CONFIG_44x)
-/* 44x-style software loaded TLB */
-# include <asm/mmu-44x.h>
-#elif defined(CONFIG_PPC_BOOK3E_MMU)
-/* Freescale Book-E software loaded TLB or Book-3e (ISA 2.06+) MMU */
-# include <asm/mmu-book3e.h>
-#elif defined (CONFIG_PPC_8xx)
-/* Motorola/Freescale 8xx software loaded TLB */
-# include <asm/mmu-8xx.h>
+#define MMU_PAGE_512K 5
+#define MMU_PAGE_1M 6
+#define MMU_PAGE_2M 7
+#define MMU_PAGE_4M 8
+#define MMU_PAGE_8M 9
+#define MMU_PAGE_16M 10
+#define MMU_PAGE_64M 11
+#define MMU_PAGE_256M 12
+#define MMU_PAGE_1G 13
+#define MMU_PAGE_16G 14
+#define MMU_PAGE_64G 15
+
+/*
+ * N.B. we need to change the type of hpte_page_sizes if this gets to be > 16
+ * Also we need to change he type of mm_context.low/high_slices_psize.
+ */
+#define MMU_PAGE_COUNT 16
+
+#ifdef CONFIG_PPC_BOOK3S_64
+#include <asm/book3s/64/mmu.h>
+#else /* CONFIG_PPC_BOOK3S_64 */
+
+#ifndef __ASSEMBLER__
+/* MMU initialization */
+extern void early_init_mmu(void);
+extern void early_init_mmu_secondary(void);
+extern void setup_initial_memory_limit(phys_addr_t first_memblock_base,
+ phys_addr_t first_memblock_size);
+static inline void mmu_early_init_devtree(void) { }
+
+static inline void pkey_early_init_devtree(void) {}
+
+extern void *abatron_pteptrs[2];
+#endif /* __ASSEMBLER__ */
#endif
+#if defined(CONFIG_PPC_BOOK3S_32)
+/* 32-bit classic hash table MMU */
+#include <asm/book3s/32/mmu-hash.h>
+#elif defined(CONFIG_PPC_MMU_NOHASH)
+#include <asm/nohash/mmu.h>
+#endif
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_MMU_H_ */
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 73382eba02dc..a157ab513347 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __ASM_POWERPC_MMU_CONTEXT_H
#define __ASM_POWERPC_MMU_CONTEXT_H
#ifdef __KERNEL__
@@ -8,106 +9,284 @@
#include <linux/spinlock.h>
#include <asm/mmu.h>
#include <asm/cputable.h>
-#include <asm-generic/mm_hooks.h>
#include <asm/cputhreads.h>
/*
* Most if the context management is out of line
*/
+#define init_new_context init_new_context
extern int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
+#define destroy_context destroy_context
extern void destroy_context(struct mm_struct *mm);
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+struct mm_iommu_table_group_mem_t;
-extern void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next);
+extern bool mm_iommu_preregistered(struct mm_struct *mm);
+extern long mm_iommu_new(struct mm_struct *mm,
+ unsigned long ua, unsigned long entries,
+ struct mm_iommu_table_group_mem_t **pmem);
+extern long mm_iommu_newdev(struct mm_struct *mm, unsigned long ua,
+ unsigned long entries, unsigned long dev_hpa,
+ struct mm_iommu_table_group_mem_t **pmem);
+extern long mm_iommu_put(struct mm_struct *mm,
+ struct mm_iommu_table_group_mem_t *mem);
+extern void mm_iommu_init(struct mm_struct *mm);
+extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm,
+ unsigned long ua, unsigned long size);
+extern struct mm_iommu_table_group_mem_t *mm_iommu_get(struct mm_struct *mm,
+ unsigned long ua, unsigned long entries);
+extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
+ unsigned long ua, unsigned int pageshift, unsigned long *hpa);
+extern bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa,
+ unsigned int pageshift, unsigned long *size);
+extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem);
+extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem);
+#else
+static inline bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa,
+ unsigned int pageshift, unsigned long *size)
+{
+ return false;
+}
+static inline void mm_iommu_init(struct mm_struct *mm) { }
+#endif
extern void switch_slb(struct task_struct *tsk, struct mm_struct *mm);
-extern void set_context(unsigned long id, pgd_t *pgd);
#ifdef CONFIG_PPC_BOOK3S_64
-extern int __init_new_context(void);
+extern void radix__switch_mmu_context(struct mm_struct *prev,
+ struct mm_struct *next);
+static inline void switch_mmu_context(struct mm_struct *prev,
+ struct mm_struct *next,
+ struct task_struct *tsk)
+{
+ if (radix_enabled())
+ return radix__switch_mmu_context(prev, next);
+ return switch_slb(tsk, next);
+}
+
+extern int hash__alloc_context_id(void);
+void __init hash__reserve_context_id(int id);
extern void __destroy_context(int context_id);
static inline void mmu_context_init(void) { }
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+static inline int alloc_extended_context(struct mm_struct *mm,
+ unsigned long ea)
+{
+ int context_id;
+
+ int index = ea >> MAX_EA_BITS_PER_CONTEXT;
+
+ context_id = hash__alloc_context_id();
+ if (context_id < 0)
+ return context_id;
+
+ VM_WARN_ON(mm->context.extended_id[index]);
+ mm->context.extended_id[index] = context_id;
+ return context_id;
+}
+
+static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea)
+{
+ int context_id;
+
+ context_id = get_user_context(&mm->context, ea);
+ if (!context_id)
+ return true;
+ return false;
+}
+#endif
+
#else
+extern void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
+ struct task_struct *tsk);
extern unsigned long __init_new_context(void);
extern void __destroy_context(unsigned long context_id);
extern void mmu_context_init(void);
+static inline int alloc_extended_context(struct mm_struct *mm,
+ unsigned long ea)
+{
+ /* non book3s_64 should never find this called */
+ WARN_ON(1);
+ return -ENOMEM;
+}
+
+static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea)
+{
+ return false;
+}
#endif
-extern void switch_cop(struct mm_struct *next);
-extern int use_cop(unsigned long acop, struct mm_struct *mm);
-extern void drop_cop(unsigned long acop, struct mm_struct *mm);
+#ifdef CONFIG_PPC_BOOK3S_64
+static inline void inc_mm_active_cpus(struct mm_struct *mm)
+{
+ atomic_inc(&mm->context.active_cpus);
+}
+
+static inline void dec_mm_active_cpus(struct mm_struct *mm)
+{
+ VM_WARN_ON_ONCE(atomic_read(&mm->context.active_cpus) <= 0);
+ atomic_dec(&mm->context.active_cpus);
+}
+
+static inline void mm_context_add_copro(struct mm_struct *mm)
+{
+ /*
+ * If any copro is in use, increment the active CPU count
+ * in order to force TLB invalidations to be global as to
+ * propagate to the Nest MMU.
+ */
+ if (atomic_inc_return(&mm->context.copros) == 1)
+ inc_mm_active_cpus(mm);
+}
+
+static inline void mm_context_remove_copro(struct mm_struct *mm)
+{
+ int c;
+
+ /*
+ * When removing the last copro, we need to broadcast a global
+ * flush of the full mm, as the next TLBI may be local and the
+ * nMMU and/or PSL need to be cleaned up.
+ *
+ * Both the 'copros' and 'active_cpus' counts are looked at in
+ * radix__flush_all_mm() to determine the scope (local/global)
+ * of the TLBIs, so we need to flush first before decrementing
+ * 'copros'. If this API is used by several callers for the
+ * same context, it can lead to over-flushing. It's hopefully
+ * not common enough to be a problem.
+ *
+ * Skip on hash, as we don't know how to do the proper flush
+ * for the time being. Invalidations will remain global if
+ * used on hash. Note that we can't drop 'copros' either, as
+ * it could make some invalidations local with no flush
+ * in-between.
+ */
+ if (radix_enabled()) {
+ radix__flush_all_mm(mm);
+
+ c = atomic_dec_if_positive(&mm->context.copros);
+ /* Detect imbalance between add and remove */
+ WARN_ON(c < 0);
+
+ if (c == 0)
+ dec_mm_active_cpus(mm);
+ }
+}
/*
- * switch_mm is the entry point called from the architecture independent
- * code in kernel/sched/core.c
+ * vas_windows counter shows number of open windows in the mm
+ * context. During context switch, use this counter to clear the
+ * foreign real address mapping (CP_ABORT) for the thread / process
+ * that intend to use COPY/PASTE. When a process closes all windows,
+ * disable CP_ABORT which is expensive to run.
+ *
+ * For user context, register a copro so that TLBIs are seen by the
+ * nest MMU. mm_context_add/remove_vas_window() are used only for user
+ * space windows.
*/
-static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
- struct task_struct *tsk)
+static inline void mm_context_add_vas_window(struct mm_struct *mm)
+{
+ atomic_inc(&mm->context.vas_windows);
+ mm_context_add_copro(mm);
+}
+
+static inline void mm_context_remove_vas_window(struct mm_struct *mm)
{
- /* Mark this context has been used on the new CPU */
- cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
+ int v;
- /* 32-bit keeps track of the current PGDIR in the thread struct */
-#ifdef CONFIG_PPC32
- tsk->thread.pgdir = next->pgd;
-#endif /* CONFIG_PPC32 */
+ mm_context_remove_copro(mm);
+ v = atomic_dec_if_positive(&mm->context.vas_windows);
- /* 64-bit Book3E keeps track of current PGD in the PACA */
-#ifdef CONFIG_PPC_BOOK3E_64
- get_paca()->pgd = next->pgd;
+ /* Detect imbalance between add and remove */
+ WARN_ON(v < 0);
+}
+#else
+static inline void inc_mm_active_cpus(struct mm_struct *mm) { }
+static inline void dec_mm_active_cpus(struct mm_struct *mm) { }
+static inline void mm_context_add_copro(struct mm_struct *mm) { }
+static inline void mm_context_remove_copro(struct mm_struct *mm) { }
#endif
- /* Nothing else to do if we aren't actually switching */
- if (prev == next)
- return;
-
-#ifdef CONFIG_PPC_ICSWX
- /* Switch coprocessor context only if prev or next uses a coprocessor */
- if (prev->context.acop || next->context.acop)
- switch_cop(next);
-#endif /* CONFIG_PPC_ICSWX */
-
- /* We must stop all altivec streams before changing the HW
- * context
- */
-#ifdef CONFIG_ALTIVEC
- if (cpu_has_feature(CPU_FTR_ALTIVEC))
- asm volatile ("dssall");
-#endif /* CONFIG_ALTIVEC */
- /* The actual HW switching method differs between the various
- * sub architectures.
- */
-#ifdef CONFIG_PPC_STD_MMU_64
- switch_slb(tsk, next);
+#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU)
+void do_h_rpt_invalidate_prt(unsigned long pid, unsigned long lpid,
+ unsigned long type, unsigned long pg_sizes,
+ unsigned long start, unsigned long end);
#else
- /* Out of line for now */
- switch_mmu_context(prev, next);
+static inline void do_h_rpt_invalidate_prt(unsigned long pid,
+ unsigned long lpid,
+ unsigned long type,
+ unsigned long pg_sizes,
+ unsigned long start,
+ unsigned long end) { }
#endif
-}
+extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+ struct task_struct *tsk);
+
+static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+ struct task_struct *tsk)
+{
+ unsigned long flags;
-#define deactivate_mm(tsk,mm) do { } while (0)
+ local_irq_save(flags);
+ switch_mm_irqs_off(prev, next, tsk);
+ local_irq_restore(flags);
+}
+#define switch_mm_irqs_off switch_mm_irqs_off
/*
* After we have set current->mm to a new value, this activates
* the context for the new mm so we see the new mappings.
*/
+#define activate_mm activate_mm
static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
{
- unsigned long flags;
-
- local_irq_save(flags);
- switch_mm(prev, next, current);
- local_irq_restore(flags);
+ switch_mm_irqs_off(prev, next, current);
}
/* We don't currently use enter_lazy_tlb() for anything */
+#ifdef CONFIG_PPC_BOOK3E_64
+#define enter_lazy_tlb enter_lazy_tlb
static inline void enter_lazy_tlb(struct mm_struct *mm,
struct task_struct *tsk)
{
/* 64-bit Book3E keeps track of current PGD in the PACA */
-#ifdef CONFIG_PPC_BOOK3E_64
get_paca()->pgd = NULL;
+}
#endif
+
+extern void arch_exit_mmap(struct mm_struct *mm);
+
+#ifdef CONFIG_PPC_MEM_KEYS
+bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write,
+ bool execute, bool foreign);
+void arch_dup_pkeys(struct mm_struct *oldmm, struct mm_struct *mm);
+#else /* CONFIG_PPC_MEM_KEYS */
+static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
+ bool write, bool execute, bool foreign)
+{
+ /* by default, allow everything */
+ return true;
+}
+
+#define pkey_mm_init(mm)
+#define arch_dup_pkeys(oldmm, mm)
+
+static inline u64 pte_to_hpte_pkey_bits(u64 pteflags, unsigned long flags)
+{
+ return 0x0UL;
}
+#endif /* CONFIG_PPC_MEM_KEYS */
+
+static inline int arch_dup_mmap(struct mm_struct *oldmm,
+ struct mm_struct *mm)
+{
+ arch_dup_pkeys(oldmm, mm);
+ return 0;
+}
+
+#include <asm-generic/mmu_context.h>
+
#endif /* __KERNEL__ */
#endif /* __ASM_POWERPC_MMU_CONTEXT_H */
diff --git a/arch/powerpc/include/asm/mmzone.h b/arch/powerpc/include/asm/mmzone.h
index 7b589178be46..049152f8d597 100644
--- a/arch/powerpc/include/asm/mmzone.h
+++ b/arch/powerpc/include/asm/mmzone.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Written by Kanoj Sarcar (kanoj@sgi.com) Aug 99
*
@@ -17,13 +18,7 @@
* flags field of the struct page
*/
-#ifdef CONFIG_NEED_MULTIPLE_NODES
-
-extern struct pglist_data *node_data[];
-/*
- * Return a pointer to the node data for node n.
- */
-#define NODE_DATA(nid) (node_data[nid])
+#ifdef CONFIG_NUMA
/*
* Following are specific to this numa platform.
@@ -34,13 +29,14 @@ extern cpumask_var_t node_to_cpumask_map[];
#ifdef CONFIG_MEMORY_HOTPLUG
extern unsigned long max_pfn;
u64 memory_hotplug_max(void);
+u64 hot_add_drconf_memory_max(void);
#else
#define memory_hotplug_max() memblock_end_of_DRAM()
#endif
#else
#define memory_hotplug_max() memblock_end_of_DRAM()
-#endif /* CONFIG_NEED_MULTIPLE_NODES */
+#endif /* CONFIG_NUMA */
#endif /* __KERNEL__ */
#endif /* _ASM_MMZONE_H_ */
diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h
index dcfcad139bcc..864e22deaa2c 100644
--- a/arch/powerpc/include/asm/module.h
+++ b/arch/powerpc/include/asm/module.h
@@ -1,25 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _ASM_POWERPC_MODULE_H
#define _ASM_POWERPC_MODULE_H
#ifdef __KERNEL__
-/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
#include <linux/list.h>
#include <asm/bug.h>
#include <asm-generic/module.h>
-
#ifndef __powerpc64__
/*
* Thanks to Paul M for explaining this.
*
* PPC can only do rel jumps += 32MB, and often the kernel and other
- * modules are furthur away than this. So, we jump to a table of
+ * modules are further away than this. So, we jump to a table of
* trampolines attached to the module (the Procedure Linkage Table)
* whenever that happens.
*/
@@ -34,36 +27,48 @@ struct ppc_plt_entry {
struct mod_arch_specific {
#ifdef __powerpc64__
unsigned int stubs_section; /* Index of stubs section in module */
+ unsigned int stub_count; /* Number of stubs used */
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ unsigned int got_section; /* What section is the GOT? */
+ unsigned int pcpu_section; /* .data..percpu section */
+#else
unsigned int toc_section; /* What section is the TOC? */
bool toc_fixed; /* Have we fixed up .TOC.? */
-#ifdef CONFIG_DYNAMIC_FTRACE
- unsigned long toc;
- unsigned long tramp;
#endif
+#ifdef CONFIG_PPC64_ELF_ABI_V1
+ /* For module function descriptor dereference */
+ unsigned long start_opd;
+ unsigned long end_opd;
+#endif
#else /* powerpc64 */
/* Indices of PLT sections within module. */
unsigned int core_plt_section;
unsigned int init_plt_section;
+#endif /* powerpc64 */
+
#ifdef CONFIG_DYNAMIC_FTRACE
unsigned long tramp;
+ unsigned long tramp_regs;
+#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE
+ struct ftrace_ool_stub *ool_stubs;
+ unsigned int ool_stub_count;
+ unsigned int ool_stub_index;
+#endif
#endif
-#endif /* powerpc64 */
-
- /* List of BUG addresses, source line numbers and filenames */
- struct list_head bug_list;
- struct bug_entry *bug_table;
- unsigned int num_bugs;
};
/*
* Select ELF headers.
- * Make empty section for module_frob_arch_sections to expand.
+ * Make empty sections for module_frob_arch_sections to expand.
*/
#ifdef __powerpc64__
# ifdef MODULE
asm(".section .stubs,\"ax\",@nobits; .align 3; .previous");
+# ifdef CONFIG_PPC_KERNEL_PCREL
+ asm(".section .mygot,\"a\",@nobits; .align 3; .previous");
+# endif
# endif
#else
# ifdef MODULE
@@ -73,22 +78,15 @@ struct mod_arch_specific {
#endif
#ifdef CONFIG_DYNAMIC_FTRACE
-# ifdef MODULE
- asm(".section .ftrace.tramp,\"ax\",@nobits; .align 3; .previous");
-# endif /* MODULE */
-#endif
-
-bool is_module_trampoline(u32 *insns);
-int module_trampoline_target(struct module *mod, u32 *trampoline,
+int module_trampoline_target(struct module *mod, unsigned long trampoline,
unsigned long *target);
-
-struct exception_table_entry;
-void sort_ex_table(struct exception_table_entry *start,
- struct exception_table_entry *finish);
-
-#if defined(CONFIG_MODVERSIONS) && defined(CONFIG_PPC64)
-#define ARCH_RELOCATES_KCRCTAB
-#define reloc_start PHYSICAL_START
+int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs);
+#else
+static inline int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs)
+{
+ return 0;
+}
#endif
+
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_MODULE_H */
diff --git a/arch/powerpc/include/asm/module.lds.h b/arch/powerpc/include/asm/module.lds.h
new file mode 100644
index 000000000000..cea5dc124be4
--- /dev/null
+++ b/arch/powerpc/include/asm/module.lds.h
@@ -0,0 +1,8 @@
+/* Force alignment of .toc section. */
+SECTIONS
+{
+ .toc 0 : ALIGN(256)
+ {
+ *(.got .toc)
+ }
+}
diff --git a/arch/powerpc/include/asm/mpc5121.h b/arch/powerpc/include/asm/mpc5121.h
index 4a69cd1d5041..9ae49e743b34 100644
--- a/arch/powerpc/include/asm/mpc5121.h
+++ b/arch/powerpc/include/asm/mpc5121.h
@@ -1,8 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* MPC5121 Prototypes and definitions
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2.
*/
#ifndef __ASM_POWERPC_MPC5121_H__
@@ -60,4 +58,63 @@ struct mpc512x_lpc {
int mpc512x_cs_config(unsigned int cs, u32 val);
+/*
+ * SCLPC Module (LPB FIFO)
+ */
+struct mpc512x_lpbfifo {
+ u32 pkt_size; /* SCLPC Packet Size Register */
+ u32 start_addr; /* SCLPC Start Address Register */
+ u32 ctrl; /* SCLPC Control Register */
+ u32 enable; /* SCLPC Enable Register */
+ u32 reserved1;
+ u32 status; /* SCLPC Status Register */
+ u32 bytes_done; /* SCLPC Bytes Done Register */
+ u32 emb_sc; /* EMB Share Counter Register */
+ u32 emb_pc; /* EMB Pause Control Register */
+ u32 reserved2[7];
+ u32 data_word; /* LPC RX/TX FIFO Data Word Register */
+ u32 fifo_status; /* LPC RX/TX FIFO Status Register */
+ u32 fifo_ctrl; /* LPC RX/TX FIFO Control Register */
+ u32 fifo_alarm; /* LPC RX/TX FIFO Alarm Register */
+};
+
+#define MPC512X_SCLPC_START (1 << 31)
+#define MPC512X_SCLPC_CS(x) (((x) & 0x7) << 24)
+#define MPC512X_SCLPC_FLUSH (1 << 17)
+#define MPC512X_SCLPC_READ (1 << 16)
+#define MPC512X_SCLPC_DAI (1 << 8)
+#define MPC512X_SCLPC_BPT(x) ((x) & 0x3f)
+#define MPC512X_SCLPC_RESET (1 << 24)
+#define MPC512X_SCLPC_FIFO_RESET (1 << 16)
+#define MPC512X_SCLPC_ABORT_INT_ENABLE (1 << 9)
+#define MPC512X_SCLPC_NORM_INT_ENABLE (1 << 8)
+#define MPC512X_SCLPC_ENABLE (1 << 0)
+#define MPC512X_SCLPC_SUCCESS (1 << 24)
+#define MPC512X_SCLPC_FIFO_CTRL(x) (((x) & 0x7) << 24)
+#define MPC512X_SCLPC_FIFO_ALARM(x) ((x) & 0x3ff)
+
+enum lpb_dev_portsize {
+ LPB_DEV_PORTSIZE_UNDEFINED = 0,
+ LPB_DEV_PORTSIZE_1_BYTE = 1,
+ LPB_DEV_PORTSIZE_2_BYTES = 2,
+ LPB_DEV_PORTSIZE_4_BYTES = 4,
+ LPB_DEV_PORTSIZE_8_BYTES = 8
+};
+
+enum mpc512x_lpbfifo_req_dir {
+ MPC512X_LPBFIFO_REQ_DIR_READ,
+ MPC512X_LPBFIFO_REQ_DIR_WRITE
+};
+
+struct mpc512x_lpbfifo_request {
+ phys_addr_t dev_phys_addr; /* physical address of some device on LPB */
+ void *ram_virt_addr; /* virtual address of some region in RAM */
+ u32 size;
+ enum lpb_dev_portsize portsize;
+ enum mpc512x_lpbfifo_req_dir dir;
+ void (*callback)(struct mpc512x_lpbfifo_request *);
+};
+
+int mpc512x_lpbfifo_submit(struct mpc512x_lpbfifo_request *req);
+
#endif /* __ASM_POWERPC_MPC5121_H__ */
diff --git a/arch/powerpc/include/asm/mpc52xx.h b/arch/powerpc/include/asm/mpc52xx.h
index 0acc7c7c28d1..d7ffbd06797d 100644
--- a/arch/powerpc/include/asm/mpc52xx.h
+++ b/arch/powerpc/include/asm/mpc52xx.h
@@ -13,11 +13,10 @@
#ifndef __ASM_POWERPC_MPC52xx_H__
#define __ASM_POWERPC_MPC52xx_H__
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/types.h>
-#include <asm/prom.h>
#include <asm/mpc5xxx.h>
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#include <linux/suspend.h>
@@ -31,7 +30,7 @@
/* Structures mapping of some unit register set */
/* ======================================================================== */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/* Memory Mapping Control */
struct mpc52xx_mmap_ctl {
@@ -259,14 +258,16 @@ struct mpc52xx_intr {
u32 per_error; /* INTR + 0x38 */
};
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
/* ========================================================================= */
/* Prototypes for MPC52xx sysdev */
/* ========================================================================= */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
+
+struct device_node;
/* mpc52xx_common.c */
extern void mpc5200_setup_xlb_arbiter(void);
@@ -274,8 +275,7 @@ extern void mpc52xx_declare_of_platform_devices(void);
extern int mpc5200_psc_ac97_gpio_reset(int psc_number);
extern void mpc52xx_map_common_devices(void);
extern int mpc52xx_set_psc_clkdiv(int psc_id, int clkdiv);
-extern unsigned int mpc52xx_get_xtal_freq(struct device_node *node);
-extern void mpc52xx_restart(char *cmd);
+extern void __noreturn mpc52xx_restart(char *cmd);
/* mpc52xx_gpt.c */
struct mpc52xx_gpt_priv;
@@ -285,47 +285,6 @@ extern int mpc52xx_gpt_start_timer(struct mpc52xx_gpt_priv *gpt, u64 period,
extern u64 mpc52xx_gpt_timer_period(struct mpc52xx_gpt_priv *gpt);
extern int mpc52xx_gpt_stop_timer(struct mpc52xx_gpt_priv *gpt);
-/* mpc52xx_lpbfifo.c */
-#define MPC52XX_LPBFIFO_FLAG_READ (0)
-#define MPC52XX_LPBFIFO_FLAG_WRITE (1<<0)
-#define MPC52XX_LPBFIFO_FLAG_NO_INCREMENT (1<<1)
-#define MPC52XX_LPBFIFO_FLAG_NO_DMA (1<<2)
-#define MPC52XX_LPBFIFO_FLAG_POLL_DMA (1<<3)
-
-struct mpc52xx_lpbfifo_request {
- struct list_head list;
-
- /* localplus bus address */
- unsigned int cs;
- size_t offset;
-
- /* Memory address */
- void *data;
- phys_addr_t data_phys;
-
- /* Details of transfer */
- size_t size;
- size_t pos; /* current position of transfer */
- int flags;
- int defer_xfer_start;
-
- /* What to do when finished */
- void (*callback)(struct mpc52xx_lpbfifo_request *);
-
- void *priv; /* Driver private data */
-
- /* statistics */
- int irq_count;
- int irq_ticks;
- u8 last_byte;
- int buffer_not_done_cnt;
-};
-
-extern int mpc52xx_lpbfifo_submit(struct mpc52xx_lpbfifo_request *req);
-extern void mpc52xx_lpbfifo_abort(struct mpc52xx_lpbfifo_request *req);
-extern void mpc52xx_lpbfifo_poll(void);
-extern int mpc52xx_lpbfifo_start_xfer(struct mpc52xx_lpbfifo_request *req);
-
/* mpc52xx_pic.c */
extern void mpc52xx_init_irq(void);
extern unsigned int mpc52xx_get_irq(void);
@@ -338,7 +297,7 @@ extern void __init mpc52xx_setup_pci(void);
static inline void mpc52xx_setup_pci(void) { }
#endif
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#ifdef CONFIG_PM
struct mpc52xx_suspend {
@@ -350,14 +309,14 @@ extern struct mpc52xx_suspend mpc52xx_suspend;
extern int __init mpc52xx_pm_init(void);
extern int mpc52xx_set_wakeup_gpio(u8 pin, u8 level);
-#ifdef CONFIG_PPC_LITE5200
-extern int __init lite5200_pm_init(void);
-
/* lite5200 calls mpc5200 suspend functions, so here they are */
extern int mpc52xx_pm_prepare(void);
extern int mpc52xx_pm_enter(suspend_state_t);
extern void mpc52xx_pm_finish(void);
extern char saved_sram[0x4000]; /* reuse buffer from mpc52xx suspend */
+
+#ifdef CONFIG_PPC_LITE5200
+int __init lite5200_pm_init(void);
#endif
#endif /* CONFIG_PM */
diff --git a/arch/powerpc/include/asm/mpc52xx_psc.h b/arch/powerpc/include/asm/mpc52xx_psc.h
index d0ece257d310..ec995b289280 100644
--- a/arch/powerpc/include/asm/mpc52xx_psc.h
+++ b/arch/powerpc/include/asm/mpc52xx_psc.h
@@ -150,7 +150,10 @@
/* Structure of the hardware registers */
struct mpc52xx_psc {
- u8 mode; /* PSC + 0x00 */
+ union {
+ u8 mode; /* PSC + 0x00 */
+ u8 mr2;
+ };
u8 reserved0[3];
union { /* PSC + 0x04 */
u16 status;
@@ -258,8 +261,6 @@ struct mpc52xx_psc_fifo {
#define MPC512x_PSC_FIFO_FULL 0x2
#define MPC512x_PSC_FIFO_ALARM 0x4
#define MPC512x_PSC_FIFO_URERR 0x8
-#define MPC512x_PSC_FIFO_ORERR 0x01
-#define MPC512x_PSC_FIFO_MEMERROR 0x02
struct mpc512x_psc_fifo {
u32 reserved1[10];
diff --git a/arch/powerpc/include/asm/mpc5xxx.h b/arch/powerpc/include/asm/mpc5xxx.h
index 5ce9c5fa434a..44db26380435 100644
--- a/arch/powerpc/include/asm/mpc5xxx.h
+++ b/arch/powerpc/include/asm/mpc5xxx.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved.
*
@@ -5,18 +6,19 @@
*
* Description:
* MPC5xxx Prototypes and definitions
- *
- * This is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
*/
#ifndef __ASM_POWERPC_MPC5xxx_H__
#define __ASM_POWERPC_MPC5xxx_H__
-extern unsigned long mpc5xxx_get_bus_frequency(struct device_node *node);
+#include <linux/property.h>
+
+unsigned long mpc5xxx_fwnode_get_bus_frequency(struct fwnode_handle *fwnode);
+
+static inline unsigned long mpc5xxx_get_bus_frequency(struct device *dev)
+{
+ return mpc5xxx_fwnode_get_bus_frequency(dev_fwnode(dev));
+}
#endif /* __ASM_POWERPC_MPC5xxx_H__ */
diff --git a/arch/powerpc/include/asm/mpc6xx.h b/arch/powerpc/include/asm/mpc6xx.h
index effc2291beb2..6ed9f4ccc7b9 100644
--- a/arch/powerpc/include/asm/mpc6xx.h
+++ b/arch/powerpc/include/asm/mpc6xx.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __ASM_POWERPC_MPC6xx_H
#define __ASM_POWERPC_MPC6xx_H
diff --git a/arch/powerpc/include/asm/mpc8260.h b/arch/powerpc/include/asm/mpc8260.h
deleted file mode 100644
index 03317e1e6185..000000000000
--- a/arch/powerpc/include/asm/mpc8260.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Since there are many different boards and no standard configuration,
- * we have a unique include file for each. Rather than change every
- * file that has to include MPC8260 configuration, they all include
- * this one and the configuration switching is done here.
- */
-#ifdef __KERNEL__
-#ifndef __ASM_POWERPC_MPC8260_H__
-#define __ASM_POWERPC_MPC8260_H__
-
-#define MPC82XX_BCR_PLDP 0x00800000 /* Pipeline Maximum Depth */
-
-#ifdef CONFIG_8260
-
-#if defined(CONFIG_PQ2ADS) || defined (CONFIG_PQ2FADS)
-#include <platforms/82xx/pq2ads.h>
-#endif
-
-#ifdef CONFIG_PCI_8260
-#include <platforms/82xx/m82xx_pci.h>
-#endif
-
-#endif /* CONFIG_8260 */
-#endif /* !__ASM_POWERPC_MPC8260_H__ */
-#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/mpc85xx.h b/arch/powerpc/include/asm/mpc85xx.h
index 3bef74a9914b..21aabc323015 100644
--- a/arch/powerpc/include/asm/mpc85xx.h
+++ b/arch/powerpc/include/asm/mpc85xx.h
@@ -1,12 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* MPC85xx cpu type detection
*
* Copyright 2011-2012 Freescale Semiconductor, Inc.
- *
- * This is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
*/
#ifndef __ASM_PPC_MPC85XX_H
@@ -61,6 +57,7 @@
#define SVR_T4240 0x824000
#define SVR_T4120 0x824001
#define SVR_T4160 0x824100
+#define SVR_T4080 0x824102
#define SVR_C291 0x850000
#define SVR_C292 0x850020
#define SVR_C293 0x850030
diff --git a/arch/powerpc/include/asm/mpic.h b/arch/powerpc/include/asm/mpic.h
index 754f93d208fa..0c03a98986cd 100644
--- a/arch/powerpc/include/asm/mpic.h
+++ b/arch/powerpc/include/asm/mpic.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_MPIC_H
#define _ASM_POWERPC_MPIC_H
#ifdef __KERNEL__
@@ -34,10 +35,6 @@
#define MPIC_GREG_GCONF_BASE_MASK 0x000fffff
#define MPIC_GREG_GCONF_MCK 0x08000000
#define MPIC_GREG_GLOBAL_CONF_1 0x00030
-#define MPIC_GREG_GLOBAL_CONF_1_SIE 0x08000000
-#define MPIC_GREG_GLOBAL_CONF_1_CLK_RATIO_MASK 0x70000000
-#define MPIC_GREG_GLOBAL_CONF_1_CLK_RATIO(r) \
- (((r) << 28) & MPIC_GREG_GLOBAL_CONF_1_CLK_RATIO_MASK)
#define MPIC_GREG_VENDOR_0 0x00040
#define MPIC_GREG_VENDOR_1 0x00050
#define MPIC_GREG_VENDOR_2 0x00060
@@ -339,7 +336,7 @@ struct mpic
#endif
};
-extern struct bus_type mpic_subsys;
+extern const struct bus_type mpic_subsys;
/*
* MPIC flags (passed to mpic_alloc)
@@ -475,7 +472,7 @@ extern int mpic_cpu_get_priority(void);
extern void mpic_cpu_set_priority(int prio);
/* Request IPIs on primary mpic */
-extern void mpic_request_ipis(void);
+void __init mpic_request_ipis(void);
/* Send a message (IPI) to a given target (cpu number or MSG_*) */
void smp_mpic_message_pass(int target, int msg);
@@ -496,11 +493,5 @@ extern unsigned int mpic_get_coreint_irq(void);
/* Fetch Machine Check interrupt from primary mpic */
extern unsigned int mpic_get_mcirq(void);
-/* Set the EPIC clock ratio */
-void mpic_set_clk_ratio(struct mpic *mpic, u32 clock_ratio);
-
-/* Enable/Disable EPIC serial interrupt mode */
-void mpic_set_serial_int(struct mpic *mpic, int enable);
-
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_MPIC_H */
diff --git a/arch/powerpc/include/asm/mpic_msgr.h b/arch/powerpc/include/asm/mpic_msgr.h
index d4f471fb1031..cd25eeced208 100644
--- a/arch/powerpc/include/asm/mpic_msgr.h
+++ b/arch/powerpc/include/asm/mpic_msgr.h
@@ -1,11 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright 2011-2012, Meador Inge, Mentor Graphics Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2 of the
- * License.
- *
*/
#ifndef _ASM_MPIC_MSGR_H
@@ -122,9 +117,9 @@ static inline void mpic_msgr_set_destination(struct mpic_msgr *msgr,
* @msgr: the message register whose IRQ is to be returned
*
* Returns the IRQ number associated with the given message register.
- * NO_IRQ is returned if this message register is not capable of
- * receiving interrupts. What message register can and cannot receive
- * interrupts is specified in the device tree for the system.
+ * 0 is returned if this message register is not capable of receiving
+ * interrupts. What message register can and cannot receive interrupts is
+ * specified in the device tree for the system.
*/
static inline int mpic_msgr_get_irq(struct mpic_msgr *msgr)
{
diff --git a/arch/powerpc/include/asm/mpic_timer.h b/arch/powerpc/include/asm/mpic_timer.h
index 0e23cd4ac8aa..d33e4149be17 100644
--- a/arch/powerpc/include/asm/mpic_timer.h
+++ b/arch/powerpc/include/asm/mpic_timer.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* arch/powerpc/include/asm/mpic_timer.h
*
@@ -7,11 +8,6 @@
*
* Author: Wang Dongsheng <Dongsheng.Wang@freescale.com>
* Li Yang <leoli@freescale.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#ifndef __MPIC_TIMER__
@@ -29,17 +25,17 @@ struct mpic_timer {
#ifdef CONFIG_MPIC_TIMER
struct mpic_timer *mpic_request_timer(irq_handler_t fn, void *dev,
- const struct timeval *time);
+ time64_t time);
void mpic_start_timer(struct mpic_timer *handle);
void mpic_stop_timer(struct mpic_timer *handle);
-void mpic_get_remain_time(struct mpic_timer *handle, struct timeval *time);
+void mpic_get_remain_time(struct mpic_timer *handle, time64_t *time);
void mpic_free_timer(struct mpic_timer *handle);
#else
struct mpic_timer *mpic_request_timer(irq_handler_t fn, void *dev,
- const struct timeval *time) { return NULL; }
+ time64_t time) { return NULL; }
void mpic_start_timer(struct mpic_timer *handle) { }
void mpic_stop_timer(struct mpic_timer *handle) { }
-void mpic_get_remain_time(struct mpic_timer *handle, struct timeval *time) { }
+void mpic_get_remain_time(struct mpic_timer *handle, time64_t *time) { }
void mpic_free_timer(struct mpic_timer *handle) { }
#endif
diff --git a/arch/powerpc/include/asm/msi_bitmap.h b/arch/powerpc/include/asm/msi_bitmap.h
index 97ac3f46ae0d..55c2f7db9cbd 100644
--- a/arch/powerpc/include/asm/msi_bitmap.h
+++ b/arch/powerpc/include/asm/msi_bitmap.h
@@ -1,14 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef _POWERPC_SYSDEV_MSI_BITMAP_H
#define _POWERPC_SYSDEV_MSI_BITMAP_H
/*
* Copyright 2008, Michael Ellerman, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2 of the
- * License.
- *
*/
#include <linux/of.h>
@@ -19,6 +14,7 @@ struct msi_bitmap {
unsigned long *bitmap;
spinlock_t lock;
unsigned int irq_count;
+ bool bitmap_from_slab;
};
int msi_bitmap_alloc_hwirqs(struct msi_bitmap *bmp, int num);
diff --git a/arch/powerpc/include/asm/mutex.h b/arch/powerpc/include/asm/mutex.h
deleted file mode 100644
index 127ab23e1f6c..000000000000
--- a/arch/powerpc/include/asm/mutex.h
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Optimised mutex implementation of include/asm-generic/mutex-dec.h algorithm
- */
-#ifndef _ASM_POWERPC_MUTEX_H
-#define _ASM_POWERPC_MUTEX_H
-
-static inline int __mutex_cmpxchg_lock(atomic_t *v, int old, int new)
-{
- int t;
-
- __asm__ __volatile__ (
-"1: lwarx %0,0,%1 # mutex trylock\n\
- cmpw 0,%0,%2\n\
- bne- 2f\n"
- PPC405_ERR77(0,%1)
-" stwcx. %3,0,%1\n\
- bne- 1b"
- PPC_ACQUIRE_BARRIER
- "\n\
-2:"
- : "=&r" (t)
- : "r" (&v->counter), "r" (old), "r" (new)
- : "cc", "memory");
-
- return t;
-}
-
-static inline int __mutex_dec_return_lock(atomic_t *v)
-{
- int t;
-
- __asm__ __volatile__(
-"1: lwarx %0,0,%1 # mutex lock\n\
- addic %0,%0,-1\n"
- PPC405_ERR77(0,%1)
-" stwcx. %0,0,%1\n\
- bne- 1b"
- PPC_ACQUIRE_BARRIER
- : "=&r" (t)
- : "r" (&v->counter)
- : "cc", "memory");
-
- return t;
-}
-
-static inline int __mutex_inc_return_unlock(atomic_t *v)
-{
- int t;
-
- __asm__ __volatile__(
- PPC_RELEASE_BARRIER
-"1: lwarx %0,0,%1 # mutex unlock\n\
- addic %0,%0,1\n"
- PPC405_ERR77(0,%1)
-" stwcx. %0,0,%1 \n\
- bne- 1b"
- : "=&r" (t)
- : "r" (&v->counter)
- : "cc", "memory");
-
- return t;
-}
-
-/**
- * __mutex_fastpath_lock - try to take the lock by moving the count
- * from 1 to a 0 value
- * @count: pointer of type atomic_t
- * @fail_fn: function to call if the original value was not 1
- *
- * Change the count from 1 to a value lower than 1, and call <fail_fn> if
- * it wasn't 1 originally. This function MUST leave the value lower than
- * 1 even when the "1" assertion wasn't true.
- */
-static inline void
-__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
-{
- if (unlikely(__mutex_dec_return_lock(count) < 0))
- fail_fn(count);
-}
-
-/**
- * __mutex_fastpath_lock_retval - try to take the lock by moving the count
- * from 1 to a 0 value
- * @count: pointer of type atomic_t
- *
- * Change the count from 1 to a value lower than 1. This function returns 0
- * if the fastpath succeeds, or -1 otherwise.
- */
-static inline int
-__mutex_fastpath_lock_retval(atomic_t *count)
-{
- if (unlikely(__mutex_dec_return_lock(count) < 0))
- return -1;
- return 0;
-}
-
-/**
- * __mutex_fastpath_unlock - try to promote the count from 0 to 1
- * @count: pointer of type atomic_t
- * @fail_fn: function to call if the original value was not 0
- *
- * Try to promote the count from 0 to 1. If it wasn't 0, call <fail_fn>.
- * In the failure case, this function is allowed to either set the value to
- * 1, or to set it to a value lower than 1.
- */
-static inline void
-__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
-{
- if (unlikely(__mutex_inc_return_unlock(count) <= 0))
- fail_fn(count);
-}
-
-#define __mutex_slowpath_needs_to_unlock() 1
-
-/**
- * __mutex_fastpath_trylock - try to acquire the mutex, without waiting
- *
- * @count: pointer of type atomic_t
- * @fail_fn: fallback function
- *
- * Change the count from 1 to 0, and return 1 (success), or if the count
- * was not 1, then return 0 (failure).
- */
-static inline int
-__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
-{
- if (likely(__mutex_cmpxchg_lock(count, 1, 0) == 1))
- return 1;
- return 0;
-}
-
-#endif
diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h
new file mode 100644
index 000000000000..49a75340c3e0
--- /dev/null
+++ b/arch/powerpc/include/asm/nmi.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_NMI_H
+#define _ASM_NMI_H
+
+#ifdef CONFIG_PPC_WATCHDOG
+long soft_nmi_interrupt(struct pt_regs *regs);
+void watchdog_hardlockup_set_timeout_pct(u64 pct);
+#else
+static inline void watchdog_hardlockup_set_timeout_pct(u64 pct) {}
+#endif
+
+extern void hv_nmi_check_nonrecoverable(struct pt_regs *regs);
+
+#endif /* _ASM_NMI_H */
diff --git a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h
new file mode 100644
index 000000000000..014799557f60
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_NOHASH_32_HUGETLB_8XX_H
+#define _ASM_POWERPC_NOHASH_32_HUGETLB_8XX_H
+
+#define PAGE_SHIFT_8M 23
+
+static inline void flush_hugetlb_page(struct vm_area_struct *vma,
+ unsigned long vmaddr)
+{
+ flush_tlb_page(vma, vmaddr);
+}
+
+static inline int check_and_get_huge_psize(int shift)
+{
+ return shift_to_mmu_psize(shift);
+}
+
+#define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT
+void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
+ pte_t pte, unsigned long sz);
+
+#define __HAVE_ARCH_HUGE_PTEP_GET
+static inline pte_t huge_ptep_get(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+ if (ptep_is_8m_pmdp(mm, addr, ptep))
+ ptep = pte_offset_kernel((pmd_t *)ptep, ALIGN_DOWN(addr, SZ_8M));
+ return ptep_get(ptep);
+}
+
+#define __HAVE_ARCH_HUGE_PTE_CLEAR
+static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, unsigned long sz)
+{
+ pte_update(mm, addr, ptep, ~0UL, 0, 1);
+}
+
+#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ unsigned long clr = ~pte_val(pte_wrprotect(__pte(~0)));
+ unsigned long set = pte_val(pte_wrprotect(__pte(0)));
+
+ pte_update(mm, addr, ptep, clr, set, 1);
+}
+
+#ifdef CONFIG_PPC_4K_PAGES
+static inline pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags)
+{
+ size_t size = 1UL << shift;
+
+ if (size == SZ_16K)
+ return __pte(pte_val(entry) | _PAGE_SPS);
+ else
+ return __pte(pte_val(entry) | _PAGE_SPS | _PAGE_HUGE);
+}
+#define arch_make_huge_pte arch_make_huge_pte
+#endif
+
+#endif /* _ASM_POWERPC_NOHASH_32_HUGETLB_8XX_H */
diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
new file mode 100644
index 000000000000..08486b15b207
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_KUP_8XX_H_
+#define _ASM_POWERPC_KUP_8XX_H_
+
+#include <asm/bug.h>
+#include <asm/mmu.h>
+
+#ifdef CONFIG_PPC_KUAP
+
+#ifndef __ASSEMBLER__
+
+#include <asm/reg.h>
+
+static __always_inline void __kuap_save_and_lock(struct pt_regs *regs)
+{
+ regs->kuap = mfspr(SPRN_MD_AP);
+ mtspr(SPRN_MD_AP, MD_APG_KUAP);
+}
+#define __kuap_save_and_lock __kuap_save_and_lock
+
+static __always_inline void kuap_user_restore(struct pt_regs *regs)
+{
+}
+
+static __always_inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap)
+{
+ mtspr(SPRN_MD_AP, regs->kuap);
+}
+
+#ifdef CONFIG_PPC_KUAP_DEBUG
+static __always_inline unsigned long __kuap_get_and_assert_locked(void)
+{
+ WARN_ON_ONCE(mfspr(SPRN_MD_AP) >> 16 != MD_APG_KUAP >> 16);
+
+ return 0;
+}
+#define __kuap_get_and_assert_locked __kuap_get_and_assert_locked
+#endif
+
+static __always_inline void uaccess_begin_8xx(unsigned long val)
+{
+ asm(ASM_MMU_FTR_IFSET("mtspr %0, %1", "", %2) : :
+ "i"(SPRN_MD_AP), "r"(val), "i"(MMU_FTR_KUAP) : "memory");
+}
+
+static __always_inline void uaccess_end_8xx(void)
+{
+ asm(ASM_MMU_FTR_IFSET("mtspr %0, %1", "", %2) : :
+ "i"(SPRN_MD_AP), "r"(MD_APG_KUAP), "i"(MMU_FTR_KUAP) : "memory");
+}
+
+static __always_inline void allow_user_access(void __user *to, const void __user *from,
+ unsigned long size, unsigned long dir)
+{
+ uaccess_begin_8xx(MD_APG_INIT);
+}
+
+static __always_inline void prevent_user_access(unsigned long dir)
+{
+ uaccess_end_8xx();
+}
+
+static __always_inline unsigned long prevent_user_access_return(void)
+{
+ unsigned long flags;
+
+ flags = mfspr(SPRN_MD_AP);
+
+ uaccess_end_8xx();
+
+ return flags;
+}
+
+static __always_inline void restore_user_access(unsigned long flags)
+{
+ uaccess_begin_8xx(flags);
+}
+
+static __always_inline bool
+__bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
+{
+ return !((regs->kuap ^ MD_APG_KUAP) & 0xff000000);
+}
+
+#endif /* !__ASSEMBLER__ */
+
+#endif /* CONFIG_PPC_KUAP */
+
+#endif /* _ASM_POWERPC_KUP_8XX_H_ */
diff --git a/arch/powerpc/include/asm/mmu-44x.h b/arch/powerpc/include/asm/nohash/32/mmu-44x.h
index bf52d704fc47..c3d192194324 100644
--- a/arch/powerpc/include/asm/mmu-44x.h
+++ b/arch/powerpc/include/asm/nohash/32/mmu-44x.h
@@ -1,10 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_MMU_44X_H_
#define _ASM_POWERPC_MMU_44X_H_
/*
* PPC440 support
*/
-#include <asm/page.h>
+#include <asm/asm-const.h>
#define PPC44x_MMUCR_TID 0x000000ff
#define PPC44x_MMUCR_STS 0x00010000
@@ -99,7 +100,7 @@
#define PPC47x_TLB2_S_RW (PPC47x_TLB2_SW | PPC47x_TLB2_SR)
#define PPC47x_TLB2_IMG (PPC47x_TLB2_I | PPC47x_TLB2_M | PPC47x_TLB2_G)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
extern unsigned int tlb_44x_hwater;
extern unsigned int tlb_44x_index;
@@ -107,10 +108,13 @@ extern unsigned int tlb_44x_index;
typedef struct {
unsigned int id;
unsigned int active;
- unsigned long vdso_base;
+ void __user *vdso;
} mm_context_t;
-#endif /* !__ASSEMBLY__ */
+/* patch sites */
+extern s32 patch__tlb_44x_hwater_D, patch__tlb_44x_hwater_I;
+
+#endif /* !__ASSEMBLER__ */
#ifndef CONFIG_PPC_EARLY_DEBUG_44x
#define PPC44x_EARLY_TLBS 1
@@ -123,19 +127,19 @@ typedef struct {
/* Size of the TLBs used for pinning in lowmem */
#define PPC_PIN_SIZE (1 << 28) /* 256M */
-#if (PAGE_SHIFT == 12)
+#if defined(CONFIG_PPC_4K_PAGES)
#define PPC44x_TLBE_SIZE PPC44x_TLB_4K
#define PPC47x_TLBE_SIZE PPC47x_TLB0_4K
#define mmu_virtual_psize MMU_PAGE_4K
-#elif (PAGE_SHIFT == 14)
+#elif defined(CONFIG_PPC_16K_PAGES)
#define PPC44x_TLBE_SIZE PPC44x_TLB_16K
#define PPC47x_TLBE_SIZE PPC47x_TLB0_16K
#define mmu_virtual_psize MMU_PAGE_16K
-#elif (PAGE_SHIFT == 16)
+#elif defined(CONFIG_PPC_64K_PAGES)
#define PPC44x_TLBE_SIZE PPC44x_TLB_64K
#define PPC47x_TLBE_SIZE PPC47x_TLB0_64K
#define mmu_virtual_psize MMU_PAGE_64K
-#elif (PAGE_SHIFT == 18)
+#elif defined(CONFIG_PPC_256K_PAGES)
#define PPC44x_TLBE_SIZE PPC44x_TLB_256K
#define mmu_virtual_psize MMU_PAGE_256K
#else
diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
index 3d11d3ce79ec..f19115db8072 100644
--- a/arch/powerpc/include/asm/mmu-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_MMU_8XX_H_
#define _ASM_POWERPC_MMU_8XX_H_
/*
@@ -18,7 +19,6 @@
#define MI_RSV4I 0x08000000 /* Reserve 4 TLB entries */
#define MI_PPCS 0x02000000 /* Use MI_RPN prob/priv state */
#define MI_IDXMASK 0x00001f00 /* TLB index to be loaded */
-#define MI_RESETVAL 0x00000000 /* Value of register at reset */
/* These are the Ks and Kp from the PowerPC books. For proper operation,
* Ks = 0, Kp = 1.
@@ -27,6 +27,23 @@
#define MI_Ks 0x80000000 /* Should not be set */
#define MI_Kp 0x40000000 /* Should always be set */
+/*
+ * All pages' PP data bits are set to either 001 or 011 by copying _PAGE_EXEC
+ * into bit 21 in the ITLBmiss handler (bit 21 is the middle bit), which means
+ * respectively NA for All or X for Supervisor and no access for User.
+ * Then we use the APG to say whether accesses are according to Page rules or
+ * "all Supervisor" rules (Access to all)
+ * _PAGE_ACCESSED is also managed via APG. When _PAGE_ACCESSED is not set, say
+ * "all User" rules, that will lead to NA for all.
+ * Therefore, we define 4 APG groups. lsb is _PAGE_ACCESSED
+ * 0 => Kernel => 11 (all accesses performed according as user iaw page definition)
+ * 1 => Kernel+Accessed => 01 (all accesses performed according to page definition)
+ * 2 => User => 11 (all accesses performed according as user iaw page definition)
+ * 3 => User+Accessed => 10 (all accesses performed according to swaped page definition) for KUEP
+ * 4-15 => Not Used
+ */
+#define MI_APG_INIT 0xde000000
+
/* The effective page number register. When read, contains the information
* about the last instruction TLB miss. When MI_RPN is written, bits in
* this register are used to create the TLB entry.
@@ -56,6 +73,7 @@
* additional information from the MI_EPN, and MI_TWC registers.
*/
#define SPRN_MI_RPN 790
+#define MI_SPS16K 0x00000008 /* Small page size (0 = 4k, 1 = 16k) */
/* Define an RPN value for mapping kernel memory to large virtual
* pages for boot initialization. This has real page number of 0,
@@ -73,7 +91,6 @@
#define MD_TWAM 0x04000000 /* Use 4K page hardware assist */
#define MD_PPCS 0x02000000 /* Use MI_RPN prob/priv state */
#define MD_IDXMASK 0x00001f00 /* TLB index to be loaded */
-#define MD_RESETVAL 0x04000000 /* Value of register at reset */
#define SPRN_M_CASID 793 /* Address space ID (context) to match */
#define MC_ASIDMASK 0x0000000f /* Bits used for ASID value */
@@ -86,6 +103,10 @@
#define MD_Ks 0x80000000 /* Should not be set */
#define MD_Kp 0x40000000 /* Should always be set */
+/* See explanation above at the definition of MI_APG_INIT */
+#define MD_APG_INIT 0xdc000000
+#define MD_APG_KUAP 0xde000000
+
/* The effective page number register. When read, contains the information
* about the last instruction TLB miss. When MD_RPN is written, bits in
* this register are used to create the TLB entry.
@@ -129,21 +150,121 @@
* additional information from the MD_EPN, and MD_TWC registers.
*/
#define SPRN_MD_RPN 798
+#define MD_SPS16K 0x00000008 /* Small page size (0 = 4k, 1 = 16k) */
/* This is a temporary storage register that could be used to save
* a processor working register during a tablewalk.
*/
#define SPRN_M_TW 799
-#ifndef __ASSEMBLY__
+#if defined(CONFIG_PPC_4K_PAGES)
+#define mmu_virtual_psize MMU_PAGE_4K
+#elif defined(CONFIG_PPC_16K_PAGES)
+#define mmu_virtual_psize MMU_PAGE_16K
+#define PTE_FRAG_NR 4
+#define PTE_FRAG_SIZE_SHIFT 12
+#define PTE_FRAG_SIZE (1UL << 12)
+#else
+#error "Unsupported PAGE_SIZE"
+#endif
+
+#define mmu_linear_psize MMU_PAGE_8M
+
+#define MODULES_END PAGE_OFFSET
+#define MODULES_SIZE (CONFIG_MODULES_SIZE * SZ_1M)
+#define MODULES_VADDR (MODULES_END - MODULES_SIZE)
+
+#ifndef __ASSEMBLER__
+
+#include <linux/mmdebug.h>
+#include <linux/sizes.h>
+
+void mmu_pin_tlb(unsigned long top, bool readonly);
+
typedef struct {
unsigned int id;
unsigned int active;
- unsigned long vdso_base;
+ void __user *vdso;
+ void *pte_frag;
} mm_context_t;
-#endif /* !__ASSEMBLY__ */
-#define mmu_virtual_psize MMU_PAGE_4K
-#define mmu_linear_psize MMU_PAGE_8M
+#define PHYS_IMMR_BASE (mfspr(SPRN_IMMR) & 0xfff80000)
+
+/*
+ * Page size definitions for 8xx
+ *
+ * shift : is the "PAGE_SHIFT" value for that page size
+ *
+ */
+struct mmu_psize_def {
+ unsigned int shift; /* number of bits */
+};
+
+extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
+
+static inline int shift_to_mmu_psize(unsigned int shift)
+{
+ int psize;
+
+ for (psize = 0; psize < MMU_PAGE_COUNT; ++psize)
+ if (mmu_psize_defs[psize].shift == shift)
+ return psize;
+ return -1;
+}
+
+static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
+{
+ if (mmu_psize_defs[mmu_psize].shift)
+ return mmu_psize_defs[mmu_psize].shift;
+ BUG();
+}
+
+static inline bool arch_vmap_try_size(unsigned long addr, unsigned long end, u64 pfn,
+ unsigned int max_page_shift, unsigned long size)
+{
+ if (end - addr < size)
+ return false;
+
+ if ((1UL << max_page_shift) < size)
+ return false;
+
+ if (!IS_ALIGNED(addr, size))
+ return false;
+
+ if (!IS_ALIGNED(PFN_PHYS(pfn), size))
+ return false;
+
+ return true;
+}
+
+static inline unsigned long arch_vmap_pte_range_map_size(unsigned long addr, unsigned long end,
+ u64 pfn, unsigned int max_page_shift)
+{
+ if (arch_vmap_try_size(addr, end, pfn, max_page_shift, SZ_512K))
+ return SZ_512K;
+ if (PAGE_SIZE == SZ_16K)
+ return SZ_16K;
+ if (arch_vmap_try_size(addr, end, pfn, max_page_shift, SZ_16K))
+ return SZ_16K;
+ return PAGE_SIZE;
+}
+#define arch_vmap_pte_range_map_size arch_vmap_pte_range_map_size
+
+static inline int arch_vmap_pte_supported_shift(unsigned long size)
+{
+ if (size >= SZ_512K)
+ return 19;
+ else if (size >= SZ_16K)
+ return 14;
+ else
+ return PAGE_SHIFT;
+}
+#define arch_vmap_pte_supported_shift arch_vmap_pte_supported_shift
+
+/* patch sites */
+extern s32 patch__itlbmiss_exit_1, patch__dtlbmiss_exit_1;
+extern s32 patch__itlbmiss_perf, patch__dtlbmiss_perf;
+
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_POWERPC_MMU_8XX_H_ */
diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h
new file mode 100644
index 000000000000..11eac371e7e0
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_PGALLOC_32_H
+#define _ASM_POWERPC_PGALLOC_32_H
+
+#include <linux/threads.h>
+#include <linux/slab.h>
+
+/*
+ * We don't have any real pmd's, and this code never triggers because
+ * the pgd will always be present..
+ */
+/* #define pmd_alloc_one(mm,address) ({ BUG(); ((pmd_t *)2); }) */
+#define pmd_free(mm, x) do { } while (0)
+#define __pmd_free_tlb(tlb,x,a) do { } while (0)
+/* #define pgd_populate(mm, pmd, pte) BUG() */
+
+static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp,
+ pte_t *pte)
+{
+ if (IS_ENABLED(CONFIG_BOOKE))
+ *pmdp = __pmd((unsigned long)pte | _PMD_PRESENT);
+ else
+ *pmdp = __pmd(__pa(pte) | _PMD_PRESENT);
+}
+
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
+ pgtable_t pte_page)
+{
+ if (IS_ENABLED(CONFIG_BOOKE))
+ *pmdp = __pmd((unsigned long)pte_page | _PMD_PRESENT);
+ else
+ *pmdp = __pmd(__pa(pte_page) | _PMD_USER | _PMD_PRESENT);
+}
+
+#endif /* _ASM_POWERPC_PGALLOC_32_H */
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
new file mode 100644
index 000000000000..2d71e4b7cd09
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -0,0 +1,204 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_NOHASH_32_PGTABLE_H
+#define _ASM_POWERPC_NOHASH_32_PGTABLE_H
+
+#include <asm-generic/pgtable-nopmd.h>
+
+#ifndef __ASSEMBLER__
+#include <linux/sched.h>
+#include <linux/threads.h>
+#include <asm/mmu.h> /* For sub-arch specific PPC_PIN_SIZE */
+
+#endif /* __ASSEMBLER__ */
+
+#define PTE_INDEX_SIZE PTE_SHIFT
+#define PMD_INDEX_SIZE 0
+#define PUD_INDEX_SIZE 0
+#define PGD_INDEX_SIZE (32 - PGDIR_SHIFT)
+
+#define PMD_CACHE_INDEX PMD_INDEX_SIZE
+#define PUD_CACHE_INDEX PUD_INDEX_SIZE
+
+#ifndef __ASSEMBLER__
+#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE)
+#define PMD_TABLE_SIZE 0
+#define PUD_TABLE_SIZE 0
+#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE)
+
+#define PMD_MASKED_BITS (PTE_TABLE_SIZE - 1)
+#endif /* __ASSEMBLER__ */
+
+#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE)
+#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE)
+
+/*
+ * The normal case is that PTEs are 32-bits and we have a 1-page
+ * 1024-entry pgdir pointing to 1-page 1024-entry PTE pages. -- paulus
+ *
+ * For any >32-bit physical address platform, we can use the following
+ * two level page table layout where the pgdir is 8KB and the MS 13 bits
+ * are an index to the second level table. The combined pgdir/pmd first
+ * level has 2048 entries and the second level has 512 64-bit PTE entries.
+ * -Matt
+ */
+/* PGDIR_SHIFT determines what a top-level page table entry can map */
+#define PGDIR_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE)
+#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
+#define PGDIR_MASK (~(PGDIR_SIZE-1))
+
+/* Bits to mask out from a PGD to get to the PUD page */
+#define PGD_MASKED_BITS 0
+
+#define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE)
+
+#define pgd_ERROR(e) \
+ pr_err("%s:%d: bad pgd %08llx.\n", __FILE__, __LINE__, (unsigned long long)pgd_val(e))
+
+/*
+ * This is the bottom of the PKMAP area with HIGHMEM or an arbitrary
+ * value (for now) on others, from where we can start layout kernel
+ * virtual space that goes below PKMAP and FIXMAP
+ */
+
+#define FIXADDR_SIZE 0
+#ifdef CONFIG_KASAN
+#include <asm/kasan.h>
+#define FIXADDR_TOP (KASAN_SHADOW_START - PAGE_SIZE)
+#else
+#define FIXADDR_TOP ((unsigned long)(-PAGE_SIZE))
+#endif
+
+/*
+ * ioremap_bot starts at that address. Early ioremaps move down from there,
+ * until mem_init() at which point this becomes the top of the vmalloc
+ * and ioremap space
+ */
+#ifdef CONFIG_HIGHMEM
+#define IOREMAP_TOP PKMAP_BASE
+#else
+#define IOREMAP_TOP FIXADDR_START
+#endif
+
+/* PPC32 shares vmalloc area with ioremap */
+#define IOREMAP_START VMALLOC_START
+#define IOREMAP_END VMALLOC_END
+
+/*
+ * Just any arbitrary offset to the start of the vmalloc VM area: the
+ * current 16MB value just means that there will be a 64MB "hole" after the
+ * physical memory until the kernel virtual memory starts. That means that
+ * any out-of-bounds memory accesses will hopefully be caught.
+ * The vmalloc() routines leaves a hole of 4kB between each vmalloced
+ * area for the same reason. ;)
+ *
+ * We no longer map larger than phys RAM with the BATs so we don't have
+ * to worry about the VMALLOC_OFFSET causing problems. We do have to worry
+ * about clashes between our early calls to ioremap() that start growing down
+ * from IOREMAP_TOP being run into the VM area allocations (growing upwards
+ * from VMALLOC_START). For this reason we have ioremap_bot to check when
+ * we actually run into our mappings setup in the early boot with the VM
+ * system. This really does become a problem for machines with good amounts
+ * of RAM. -- Cort
+ */
+#define VMALLOC_OFFSET (0x1000000) /* 16M */
+#ifdef PPC_PIN_SIZE
+#define VMALLOC_START (((ALIGN((long)high_memory, PPC_PIN_SIZE) + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)))
+#else
+#define VMALLOC_START ((((long)high_memory + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)))
+#endif
+
+#ifdef CONFIG_KASAN_VMALLOC
+#define VMALLOC_END ALIGN_DOWN(ioremap_bot, PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT)
+#else
+#define VMALLOC_END ioremap_bot
+#endif
+
+/*
+ * Bits in a linux-style PTE. These match the bits in the
+ * (hardware-defined) PowerPC PTE as closely as possible.
+ */
+
+#if defined(CONFIG_44x)
+#include <asm/nohash/32/pte-44x.h>
+#elif defined(CONFIG_PPC_85xx) && defined(CONFIG_PTE_64BIT)
+#include <asm/nohash/pte-e500.h>
+#elif defined(CONFIG_PPC_85xx)
+#include <asm/nohash/32/pte-85xx.h>
+#elif defined(CONFIG_PPC_8xx)
+#include <asm/nohash/32/pte-8xx.h>
+#endif
+
+/*
+ * Location of the PFN in the PTE. Most 32-bit platforms use the same
+ * as _PAGE_SHIFT here (ie, naturally aligned).
+ * Platform who don't just pre-define the value so we don't override it here.
+ */
+#ifndef PTE_RPN_SHIFT
+#define PTE_RPN_SHIFT (PAGE_SHIFT)
+#endif
+
+/*
+ * The mask covered by the RPN must be a ULL on 32-bit platforms with
+ * 64-bit PTEs.
+ */
+#ifdef CONFIG_PTE_64BIT
+#define PTE_RPN_MASK (~((1ULL << PTE_RPN_SHIFT) - 1))
+#define MAX_POSSIBLE_PHYSMEM_BITS 36
+#else
+#define PTE_RPN_MASK (~((1UL << PTE_RPN_SHIFT) - 1))
+#define MAX_POSSIBLE_PHYSMEM_BITS 32
+#endif
+
+#ifndef __ASSEMBLER__
+
+#define pmd_none(pmd) (!pmd_val(pmd))
+#define pmd_bad(pmd) (pmd_val(pmd) & _PMD_BAD)
+#define pmd_present(pmd) (pmd_val(pmd) & _PMD_PRESENT_MASK)
+static inline void pmd_clear(pmd_t *pmdp)
+{
+ *pmdp = __pmd(0);
+}
+
+/*
+ * Note that on Book E processors, the pmd contains the kernel virtual
+ * (lowmem) address of the pte page. The physical address is less useful
+ * because everything runs with translation enabled (even the TLB miss
+ * handler). On everything else the pmd contains the physical address
+ * of the pte page. -- paulus
+ */
+#ifndef CONFIG_BOOKE
+#define pmd_pfn(pmd) (pmd_val(pmd) >> PAGE_SHIFT)
+#else
+#define pmd_page_vaddr(pmd) \
+ ((const void *)((unsigned long)pmd_val(pmd) & ~(PTE_TABLE_SIZE - 1)))
+#define pmd_pfn(pmd) (__pa(pmd_val(pmd)) >> PAGE_SHIFT)
+#endif
+
+#define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd))
+
+/*
+ * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
+ * are !pte_none() && !pte_present().
+ *
+ * Format of swap PTEs (32bit PTEs):
+ *
+ * 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 3 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * <------------------ offset -------------------> < type -> E 0 0
+ *
+ * E is the exclusive marker that is not stored in swap entries.
+ *
+ * For 64bit PTEs, the offset is extended by 32bit.
+ */
+#define __swp_type(entry) ((entry).val & 0x1f)
+#define __swp_offset(entry) ((entry).val >> 5)
+#define __swp_entry(type, offset) ((swp_entry_t) { ((type) & 0x1f) | ((offset) << 5) })
+#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 })
+#define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 })
+
+/* We borrow LSB 2 to store the exclusive marker in swap PTEs. */
+#define _PAGE_SWP_EXCLUSIVE 0x000004
+
+#endif /* !__ASSEMBLER__ */
+
+#endif /* __ASM_POWERPC_NOHASH_32_PGTABLE_H */
diff --git a/arch/powerpc/include/asm/pte-44x.h b/arch/powerpc/include/asm/nohash/32/pte-44x.h
index 4192b9bad901..da0469928273 100644
--- a/arch/powerpc/include/asm/pte-44x.h
+++ b/arch/powerpc/include/asm/nohash/32/pte-44x.h
@@ -1,5 +1,6 @@
-#ifndef _ASM_POWERPC_PTE_44x_H
-#define _ASM_POWERPC_PTE_44x_H
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_NOHASH_32_PTE_44x_H
+#define _ASM_POWERPC_NOHASH_32_PTE_44x_H
#ifdef __KERNEL__
/*
@@ -32,7 +33,7 @@
* - - - - - - U0 U1 U2 U3 W I M G E - UX UW UR SX SW SR
*
* Newer 440 cores (440x6 as used on AMCC 460EX/460GT) have additional
- * TLB2 storage attibute fields. Those are:
+ * TLB2 storage attribute fields. Those are:
*
* TLB2:
* 0...10 11 12 13 14 15 16...31
@@ -44,9 +45,6 @@
* - PRESENT *must* be in the bottom three bits because swap cache
* entries use the top 29 bits for TLB2.
*
- * - FILE *must* be in the bottom three bits because swap cache
- * entries use the top 29 bits for TLB2.
- *
* - CACHE COHERENT bit (M) has no effect on original PPC440 cores,
* because it doesn't support SMP. However, some later 460 variants
* have -some- form of SMP support and so I keep the bit there for
@@ -58,31 +56,19 @@
* above bits. Note that the bit values are CPU specific, not architecture
* specific.
*
- * The kernel PTE entry holds an arch-dependent swp_entry structure under
- * certain situations. In other words, in such situations some portion of
- * the PTE bits are used as a swp_entry. In the PPC implementation, the
- * 3-24th LSB are shared with swp_entry, however the 0-2nd three LSB still
- * hold protection values. That means the three protection bits are
- * reserved for both PTE and SWAP entry at the most significant three
- * LSBs.
- *
- * There are three protection bits available for SWAP entry:
- * _PAGE_PRESENT
- * _PAGE_FILE
- * _PAGE_HASHPTE (if HW has)
- *
- * So those three bits have to be inside of 0-2nd LSB of PTE.
- *
+ * The kernel PTE entry can be an ordinary PTE mapping a page or a special swap
+ * PTE. In case of a swap PTE, LSB 2-24 are used to store information regarding
+ * the swap entry. However LSB 0-1 still hold protection values, for example,
+ * to distinguish swap PTEs from ordinary PTEs, and must be used with care.
*/
#define _PAGE_PRESENT 0x00000001 /* S: PTE valid */
-#define _PAGE_RW 0x00000002 /* S: Write permission */
-#define _PAGE_FILE 0x00000004 /* S: nonlinear file mapping */
+#define _PAGE_WRITE 0x00000002 /* S: Write permission */
#define _PAGE_EXEC 0x00000004 /* H: Execute permission */
-#define _PAGE_ACCESSED 0x00000008 /* S: Page referenced */
+#define _PAGE_READ 0x00000008 /* S: Read permission */
#define _PAGE_DIRTY 0x00000010 /* S: Page dirty */
#define _PAGE_SPECIAL 0x00000020 /* S: Special page */
-#define _PAGE_USER 0x00000040 /* S: User page */
+#define _PAGE_ACCESSED 0x00000040 /* S: Page referenced */
#define _PAGE_ENDIAN 0x00000080 /* H: E bit */
#define _PAGE_GUARDED 0x00000100 /* H: G bit */
#define _PAGE_COHERENT 0x00000200 /* H: M bit */
@@ -93,10 +79,25 @@
#define _PMD_PRESENT 0
#define _PMD_PRESENT_MASK (PAGE_MASK)
#define _PMD_BAD (~PAGE_MASK)
+#define _PMD_USER 0
/* ERPN in a PTE never gets cleared, ignore it */
#define _PTE_NONE_MASK 0xffffffff00000000ULL
+/*
+ * We define 2 sets of base prot bits, one for basic pages (ie,
+ * cacheable kernel and user pages) and one for non cacheable
+ * pages. We always set _PAGE_COHERENT when SMP is enabled or
+ * the processor might need it for DMA coherency.
+ */
+#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED)
+#if defined(CONFIG_SMP)
+#define _PAGE_BASE (_PAGE_BASE_NC | _PAGE_COHERENT)
+#else
+#define _PAGE_BASE (_PAGE_BASE_NC)
+#endif
+
+#include <asm/pgtable-masks.h>
#endif /* __KERNEL__ */
-#endif /* _ASM_POWERPC_PTE_44x_H */
+#endif /* _ASM_POWERPC_NOHASH_32_PTE_44x_H */
diff --git a/arch/powerpc/include/asm/nohash/32/pte-85xx.h b/arch/powerpc/include/asm/nohash/32/pte-85xx.h
new file mode 100644
index 000000000000..14d64b4f3f14
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/32/pte-85xx.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_NOHASH_32_PTE_85xx_H
+#define _ASM_POWERPC_NOHASH_32_PTE_85xx_H
+#ifdef __KERNEL__
+
+/* PTE bit definitions for Freescale BookE SW loaded TLB MMU based
+ * processors
+ *
+ MMU Assist Register 3:
+
+ 32 33 34 35 36 ... 50 51 52 53 54 55 56 57 58 59 60 61 62 63
+ RPN...................... 0 0 U0 U1 U2 U3 UX SX UW SW UR SR
+
+ - PRESENT *must* be in the bottom two bits because swap PTEs use
+ the top 30 bits.
+
+*/
+
+/* Definitions for FSL Book-E Cores */
+#define _PAGE_READ 0x00001 /* H: Read permission (SR) */
+#define _PAGE_PRESENT 0x00002 /* S: PTE contains a translation */
+#define _PAGE_WRITE 0x00004 /* S: Write permission (SW) */
+#define _PAGE_DIRTY 0x00008 /* S: Page dirty */
+#define _PAGE_EXEC 0x00010 /* H: SX permission */
+#define _PAGE_ACCESSED 0x00020 /* S: Page referenced */
+
+#define _PAGE_ENDIAN 0x00040 /* H: E bit */
+#define _PAGE_GUARDED 0x00080 /* H: G bit */
+#define _PAGE_COHERENT 0x00100 /* H: M bit */
+#define _PAGE_NO_CACHE 0x00200 /* H: I bit */
+#define _PAGE_WRITETHRU 0x00400 /* H: W bit */
+#define _PAGE_SPECIAL 0x00800 /* S: Special page */
+
+#define _PMD_PRESENT 0
+#define _PMD_PRESENT_MASK (PAGE_MASK)
+#define _PMD_BAD (~PAGE_MASK)
+#define _PMD_USER 0
+
+#define _PTE_NONE_MASK 0
+
+#define PTE_WIMGE_SHIFT (6)
+
+/*
+ * We define 2 sets of base prot bits, one for basic pages (ie,
+ * cacheable kernel and user pages) and one for non cacheable
+ * pages. We always set _PAGE_COHERENT when SMP is enabled or
+ * the processor might need it for DMA coherency.
+ */
+#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED)
+#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC)
+#define _PAGE_BASE (_PAGE_BASE_NC | _PAGE_COHERENT)
+#else
+#define _PAGE_BASE (_PAGE_BASE_NC)
+#endif
+
+#include <asm/pgtable-masks.h>
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_NOHASH_32_PTE_FSL_85xx_H */
diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h b/arch/powerpc/include/asm/nohash/32/pte-8xx.h
new file mode 100644
index 000000000000..e2ea8ba9f8ca
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h
@@ -0,0 +1,241 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_NOHASH_32_PTE_8xx_H
+#define _ASM_POWERPC_NOHASH_32_PTE_8xx_H
+#ifdef __KERNEL__
+
+/*
+ * The PowerPC MPC8xx uses a TLB with hardware assisted, software tablewalk.
+ * We also use the two level tables, but we can put the real bits in them
+ * needed for the TLB and tablewalk. These definitions require Mx_CTR.PPM = 0,
+ * Mx_CTR.PPCS = 0, and MD_CTR.TWAM = 1. The level 2 descriptor has
+ * additional page protection (when Mx_CTR.PPCS = 1) that allows TLB hit
+ * based upon user/super access. The TLB does not have accessed nor write
+ * protect. We assume that if the TLB get loaded with an entry it is
+ * accessed, and overload the changed bit for write protect. We use
+ * two bits in the software pte that are supposed to be set to zero in
+ * the TLB entry (24 and 25) for these indicators. Although the level 1
+ * descriptor contains the guarded and writethrough/copyback bits, we can
+ * set these at the page level since they get copied from the Mx_TWC
+ * register when the TLB entry is loaded. We will use bit 27 for guard, since
+ * that is where it exists in the MD_TWC, and bit 26 for writethrough.
+ * These will get masked from the level 2 descriptor at TLB load time, and
+ * copied to the MD_TWC before it gets loaded.
+ * Large page sizes added. We currently support two sizes, 4K and 8M.
+ * This also allows a TLB hander optimization because we can directly
+ * load the PMD into MD_TWC. The 8M pages are only used for kernel
+ * mapping of well known areas. The PMD (PGD) entries contain control
+ * flags in addition to the address, so care must be taken that the
+ * software no longer assumes these are only pointers.
+ */
+
+/* Definitions for 8xx embedded chips. */
+#define _PAGE_PRESENT 0x0001 /* V: Page is valid */
+#define _PAGE_NO_CACHE 0x0002 /* CI: cache inhibit */
+#define _PAGE_SH 0x0004 /* SH: No ASID (context) compare */
+#define _PAGE_SPS 0x0008 /* SPS: Small Page Size (1 if 16k, 512k or 8M)*/
+#define _PAGE_DIRTY 0x0100 /* C: page changed */
+
+/* These 4 software bits must be masked out when the L2 entry is loaded
+ * into the TLB.
+ */
+#define _PAGE_GUARDED 0x0010 /* Copied to L1 G entry in DTLB */
+#define _PAGE_ACCESSED 0x0020 /* Copied to L1 APG 1 entry in I/DTLB */
+#define _PAGE_EXEC 0x0040 /* Copied to PP (bit 21) in ITLB */
+#define _PAGE_SPECIAL 0x0080 /* SW entry */
+
+#define _PAGE_NA 0x0200 /* Supervisor NA, User no access */
+#define _PAGE_RO 0x0600 /* Supervisor RO, User no access */
+
+#define _PAGE_HUGE 0x0800 /* Copied to L1 PS bit 29 */
+
+#define _PAGE_NAX (_PAGE_NA | _PAGE_EXEC)
+#define _PAGE_ROX (_PAGE_RO | _PAGE_EXEC)
+#define _PAGE_RW 0
+#define _PAGE_RWX _PAGE_EXEC
+
+/* cache related flags non existing on 8xx */
+#define _PAGE_COHERENT 0
+#define _PAGE_WRITETHRU 0
+
+#define _PAGE_KERNEL_RO (_PAGE_SH | _PAGE_RO)
+#define _PAGE_KERNEL_ROX (_PAGE_SH | _PAGE_RO | _PAGE_EXEC)
+#define _PAGE_KERNEL_RW (_PAGE_SH | _PAGE_DIRTY)
+#define _PAGE_KERNEL_RWX (_PAGE_SH | _PAGE_DIRTY | _PAGE_EXEC)
+
+#define _PMD_PRESENT 0x0001
+#define _PMD_PRESENT_MASK _PMD_PRESENT
+#define _PMD_BAD 0x0f90
+#define _PMD_PAGE_MASK 0x000c
+#define _PMD_PAGE_8M 0x000c
+#define _PMD_PAGE_512K 0x0004
+#define _PMD_ACCESSED 0x0020 /* APG 1 */
+#define _PMD_USER 0x0040 /* APG 2 */
+
+#define _PTE_NONE_MASK 0
+
+#ifdef CONFIG_PPC_16K_PAGES
+#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_SPS)
+#else
+#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED)
+#endif
+
+#define _PAGE_BASE (_PAGE_BASE_NC)
+
+#include <asm/pgtable-masks.h>
+
+#ifndef __ASSEMBLER__
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_RO);
+}
+
+#define pte_wrprotect pte_wrprotect
+
+static inline int pte_read(pte_t pte)
+{
+ return (pte_val(pte) & _PAGE_RO) != _PAGE_NA;
+}
+
+#define pte_read pte_read
+
+static inline int pte_write(pte_t pte)
+{
+ return !(pte_val(pte) & _PAGE_RO);
+}
+
+#define pte_write pte_write
+
+static inline pte_t pte_mkwrite_novma(pte_t pte)
+{
+ return __pte(pte_val(pte) & ~_PAGE_RO);
+}
+
+#define pte_mkwrite_novma pte_mkwrite_novma
+
+static inline pte_t pte_mkhuge(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_SPS | _PAGE_HUGE);
+}
+
+#define pte_mkhuge pte_mkhuge
+
+static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
+ unsigned long clr, unsigned long set, int huge);
+
+static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+ pte_update(mm, addr, ptep, 0, _PAGE_RO, 0);
+}
+#define ptep_set_wrprotect ptep_set_wrprotect
+
+static inline void __ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
+ pte_t entry, unsigned long address, int psize)
+{
+ unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_EXEC);
+ unsigned long clr = ~pte_val(entry) & _PAGE_RO;
+ int huge = psize > mmu_virtual_psize ? 1 : 0;
+
+ pte_update(vma->vm_mm, address, ptep, clr, set, huge);
+
+ flush_tlb_page(vma, address);
+}
+#define __ptep_set_access_flags __ptep_set_access_flags
+
+static inline unsigned long __pte_leaf_size(pmd_t pmd, pte_t pte)
+{
+ pte_basic_t val = pte_val(pte);
+
+ if (pmd_val(pmd) & _PMD_PAGE_8M)
+ return SZ_8M;
+ if (val & _PAGE_HUGE)
+ return SZ_512K;
+ if (val & _PAGE_SPS)
+ return SZ_16K;
+ return SZ_4K;
+}
+
+#define __pte_leaf_size __pte_leaf_size
+
+/*
+ * On the 8xx, the page tables are a bit special. For 16k pages, we have
+ * 4 identical entries. For 512k pages, we have 128 entries as if it was
+ * 4k pages, but they are flagged as 512k pages for the hardware.
+ * For 8M pages, we have 1024 entries as if it was 4M pages (PMD_SIZE)
+ * but they are flagged as 8M pages for the hardware.
+ * For 4k pages, we have a single entry in the table.
+ */
+static pmd_t *pmd_off(struct mm_struct *mm, unsigned long addr);
+static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address);
+
+static inline bool ptep_is_8m_pmdp(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+ return (pmd_t *)ptep == pmd_off(mm, ALIGN_DOWN(addr, SZ_8M));
+}
+
+static inline int number_of_cells_per_pte(pmd_t *pmd, pte_basic_t val, int huge)
+{
+ if (!huge)
+ return PAGE_SIZE / SZ_4K;
+ else if ((pmd_val(*pmd) & _PMD_PAGE_MASK) == _PMD_PAGE_8M)
+ return SZ_4M / SZ_4K;
+ else if (IS_ENABLED(CONFIG_PPC_4K_PAGES) && !(val & _PAGE_HUGE))
+ return SZ_16K / SZ_4K;
+ else
+ return SZ_512K / SZ_4K;
+}
+
+static inline pte_basic_t __pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p,
+ unsigned long clr, unsigned long set, int huge)
+{
+ pte_basic_t *entry = (pte_basic_t *)p;
+ pte_basic_t old = pte_val(*p);
+ pte_basic_t new = (old & ~(pte_basic_t)clr) | set;
+ int num, i;
+ pmd_t *pmd = pmd_off(mm, addr);
+
+ num = number_of_cells_per_pte(pmd, new, huge);
+
+ for (i = 0; i < num; i += PAGE_SIZE / SZ_4K, new += PAGE_SIZE) {
+ *entry++ = new;
+ if (IS_ENABLED(CONFIG_PPC_16K_PAGES)) {
+ *entry++ = new;
+ *entry++ = new;
+ *entry++ = new;
+ }
+ }
+
+ return old;
+}
+
+static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
+ unsigned long clr, unsigned long set, int huge)
+{
+ pte_basic_t old;
+
+ if (huge && ptep_is_8m_pmdp(mm, addr, ptep)) {
+ pmd_t *pmdp = (pmd_t *)ptep;
+
+ old = __pte_update(mm, addr, pte_offset_kernel(pmdp, 0), clr, set, huge);
+ __pte_update(mm, addr, pte_offset_kernel(pmdp + 1, 0), clr, set, huge);
+ } else {
+ old = __pte_update(mm, addr, ptep, clr, set, huge);
+ }
+ return old;
+}
+#define pte_update pte_update
+
+#ifdef CONFIG_PPC_16K_PAGES
+#define ptep_get ptep_get
+static inline pte_t ptep_get(pte_t *ptep)
+{
+ pte_basic_t val = READ_ONCE(ptep->pte);
+ pte_t pte = {val, val, val, val};
+
+ return pte;
+}
+#endif /* CONFIG_PPC_16K_PAGES */
+
+#endif
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_NOHASH_32_PTE_8xx_H */
diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
new file mode 100644
index 000000000000..e50b211becb3
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_PGALLOC_64_H
+#define _ASM_POWERPC_PGALLOC_64_H
+/*
+ */
+
+#include <linux/slab.h>
+#include <linux/cpumask.h>
+#include <linux/percpu.h>
+
+struct vmemmap_backing {
+ struct vmemmap_backing *list;
+ unsigned long phys;
+ unsigned long virt_addr;
+};
+extern struct vmemmap_backing *vmemmap_list;
+
+static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud)
+{
+ p4d_set(p4d, (unsigned long)pud);
+}
+
+static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+ return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE),
+ pgtable_gfp_flags(mm, GFP_KERNEL));
+}
+
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+ kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud);
+}
+
+static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
+{
+ pud_set(pud, (unsigned long)pmd);
+}
+
+static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
+ pte_t *pte)
+{
+ pmd_set(pmd, (unsigned long)pte);
+}
+
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
+ pgtable_t pte_page)
+{
+ pmd_set(pmd, (unsigned long)pte_page);
+}
+
+static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+ return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX),
+ pgtable_gfp_flags(mm, GFP_KERNEL));
+}
+
+static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
+{
+ kmem_cache_free(PGT_CACHE(PMD_CACHE_INDEX), pmd);
+}
+
+#define __pmd_free_tlb(tlb, pmd, addr) \
+ pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX)
+#define __pud_free_tlb(tlb, pud, addr) \
+ pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE)
+
+#endif /* _ASM_POWERPC_PGALLOC_64_H */
diff --git a/arch/powerpc/include/asm/pgtable-ppc64-4k.h b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h
index 12798c9d4b4b..fb6fa1d4e074 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64-4k.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h
@@ -1,5 +1,9 @@
-#ifndef _ASM_POWERPC_PGTABLE_PPC64_4K_H
-#define _ASM_POWERPC_PGTABLE_PPC64_4K_H
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H
+#define _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H
+
+#include <asm-generic/pgtable-nop4d.h>
+
/*
* Entries per page directory level. The PTE level must use a 64b record
* for each page table entry. The PMD and PGD level use a 32b record for
@@ -10,12 +14,12 @@
#define PUD_INDEX_SIZE 9
#define PGD_INDEX_SIZE 9
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE)
#define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE)
#define PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE)
#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE)
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE)
#define PTRS_PER_PMD (1 << PMD_INDEX_SIZE)
@@ -27,9 +31,6 @@
#define PMD_SIZE (1UL << PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE-1))
-/* With 4k base page size, hugepage PTEs go at the PMD level */
-#define MIN_HUGEPTE_SHIFT PMD_SHIFT
-
/* PUD_SHIFT determines what a third-level page table entry can map */
#define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE)
#define PUD_SIZE (1UL << PUD_SHIFT)
@@ -44,31 +45,49 @@
#define PMD_MASKED_BITS 0
/* Bits to mask out from a PUD to get to the PMD page */
#define PUD_MASKED_BITS 0
-/* Bits to mask out from a PGD to get to the PUD page */
-#define PGD_MASKED_BITS 0
+/* Bits to mask out from a P4D to get to the PUD page */
+#define P4D_MASKED_BITS 0
/*
* 4-level page tables related bits
*/
-#define pgd_none(pgd) (!pgd_val(pgd))
-#define pgd_bad(pgd) (pgd_val(pgd) == 0)
-#define pgd_present(pgd) (pgd_val(pgd) != 0)
-#define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0)
-#define pgd_page_vaddr(pgd) (pgd_val(pgd) & ~PGD_MASKED_BITS)
-#define pgd_page(pgd) virt_to_page(pgd_page_vaddr(pgd))
+#define p4d_none(p4d) (!p4d_val(p4d))
+#define p4d_bad(p4d) (p4d_val(p4d) == 0)
+#define p4d_present(p4d) (p4d_val(p4d) != 0)
+
+#ifndef __ASSEMBLER__
+
+static inline pud_t *p4d_pgtable(p4d_t p4d)
+{
+ return (pud_t *) (p4d_val(p4d) & ~P4D_MASKED_BITS);
+}
+
+static inline void p4d_clear(p4d_t *p4dp)
+{
+ *p4dp = __p4d(0);
+}
+
+static inline pte_t p4d_pte(p4d_t p4d)
+{
+ return __pte(p4d_val(p4d));
+}
+
+static inline p4d_t pte_p4d(pte_t pte)
+{
+ return __p4d(pte_val(pte));
+}
+extern struct page *p4d_page(p4d_t p4d);
-#define pud_offset(pgdp, addr) \
- (((pud_t *) pgd_page_vaddr(*(pgdp))) + \
- (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)))
+#endif /* !__ASSEMBLER__ */
#define pud_ERROR(e) \
- printk("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e))
+ pr_err("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e))
/*
* On all 4K setups, remap_4k_pfn() equates to remap_pfn_range() */
#define remap_4k_pfn(vma, addr, pfn, prot) \
remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE, (prot))
-#endif /* _ASM_POWERPC_PGTABLE_PPC64_4K_H */
+#endif /* _ _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H */
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
new file mode 100644
index 000000000000..2deb955b7bc8
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -0,0 +1,214 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_NOHASH_64_PGTABLE_H
+#define _ASM_POWERPC_NOHASH_64_PGTABLE_H
+/*
+ * This file contains the functions and defines necessary to modify and use
+ * the ppc64 non-hashed page table.
+ */
+
+#include <linux/sizes.h>
+
+#include <asm/nohash/64/pgtable-4k.h>
+#include <asm/barrier.h>
+#include <asm/asm-const.h>
+
+/*
+ * Size of EA range mapped by our pagetables.
+ */
+#define PGTABLE_EADDR_SIZE (PTE_INDEX_SIZE + PMD_INDEX_SIZE + \
+ PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT)
+#define PGTABLE_RANGE (ASM_CONST(1) << PGTABLE_EADDR_SIZE)
+
+#define PMD_CACHE_INDEX PMD_INDEX_SIZE
+#define PUD_CACHE_INDEX PUD_INDEX_SIZE
+
+/*
+ * Define the address range of the kernel non-linear virtual area
+ */
+#define KERN_VIRT_START ASM_CONST(0xc000100000000000)
+#define KERN_VIRT_SIZE ASM_CONST(0x0000100000000000)
+
+/*
+ * The vmalloc space starts at the beginning of that region, and
+ * occupies a quarter of it on Book3E
+ * (we keep a quarter for the virtual memmap)
+ */
+#define VMALLOC_START KERN_VIRT_START
+#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 2)
+#define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE)
+
+/*
+ * The third quarter of the kernel virtual space is used for IO mappings,
+ * it's itself carved into the PIO region (ISA and PHB IO space) and
+ * the ioremap space
+ *
+ * ISA_IO_BASE = KERN_IO_START, 64K reserved area
+ * PHB_IO_BASE = ISA_IO_BASE + 64K to ISA_IO_BASE + 2G, PHB IO spaces
+ * IOREMAP_BASE = ISA_IO_BASE + 2G to KERN_IO_START + KERN_IO_SIZE
+ */
+#define KERN_IO_START (KERN_VIRT_START + (KERN_VIRT_SIZE >> 1))
+#define KERN_IO_SIZE (KERN_VIRT_SIZE >> 2)
+#define FULL_IO_SIZE 0x80000000ul
+#define ISA_IO_BASE (KERN_IO_START)
+#define ISA_IO_END (KERN_IO_START + 0x10000ul)
+#define PHB_IO_BASE (ISA_IO_END)
+#define PHB_IO_END (KERN_IO_START + FULL_IO_SIZE)
+#define IOREMAP_BASE (PHB_IO_END)
+#define IOREMAP_START (ioremap_bot)
+#define IOREMAP_END (KERN_IO_START + KERN_IO_SIZE - FIXADDR_SIZE)
+#define FIXADDR_SIZE SZ_32M
+#define FIXADDR_TOP (IOREMAP_END + FIXADDR_SIZE)
+
+/*
+ * Defines the address of the vmemap area, in its own region on
+ * after the vmalloc space on Book3E
+ */
+#define VMEMMAP_BASE VMALLOC_END
+#define VMEMMAP_END KERN_IO_START
+#define vmemmap ((struct page *)VMEMMAP_BASE)
+
+
+/*
+ * Include the PTE bits definitions
+ */
+#include <asm/nohash/pte-e500.h>
+
+#define PTE_RPN_MASK (~((1UL << PTE_RPN_SHIFT) - 1))
+
+#define H_PAGE_4K_PFN 0
+
+#ifndef __ASSEMBLER__
+/* pte_clear moved to later in this file */
+
+#define PMD_BAD_BITS (PTE_TABLE_SIZE-1)
+#define PUD_BAD_BITS (PMD_TABLE_SIZE-1)
+
+static inline void pmd_set(pmd_t *pmdp, unsigned long val)
+{
+ *pmdp = __pmd(val);
+}
+
+static inline void pmd_clear(pmd_t *pmdp)
+{
+ *pmdp = __pmd(0);
+}
+
+static inline pte_t pmd_pte(pmd_t pmd)
+{
+ return __pte(pmd_val(pmd));
+}
+
+#define pmd_none(pmd) (!pmd_val(pmd))
+#define pmd_bad(pmd) (!is_kernel_addr(pmd_val(pmd)) \
+ || (pmd_val(pmd) & PMD_BAD_BITS))
+#define pmd_present(pmd) (!pmd_none(pmd))
+#define pmd_page_vaddr(pmd) ((const void *)(pmd_val(pmd) & ~PMD_MASKED_BITS))
+extern struct page *pmd_page(pmd_t pmd);
+#define pmd_pfn(pmd) (page_to_pfn(pmd_page(pmd)))
+
+static inline void pud_set(pud_t *pudp, unsigned long val)
+{
+ *pudp = __pud(val);
+}
+
+static inline void pud_clear(pud_t *pudp)
+{
+ *pudp = __pud(0);
+}
+
+#define pud_none(pud) (!pud_val(pud))
+#define pud_bad(pud) (!is_kernel_addr(pud_val(pud)) \
+ || (pud_val(pud) & PUD_BAD_BITS))
+#define pud_present(pud) (pud_val(pud) != 0)
+
+static inline pmd_t *pud_pgtable(pud_t pud)
+{
+ return (pmd_t *)(pud_val(pud) & ~PUD_MASKED_BITS);
+}
+
+extern struct page *pud_page(pud_t pud);
+
+static inline pte_t pud_pte(pud_t pud)
+{
+ return __pte(pud_val(pud));
+}
+
+static inline pud_t pte_pud(pte_t pte)
+{
+ return __pud(pte_val(pte));
+}
+#define pud_write(pud) pte_write(pud_pte(pud))
+#define p4d_write(pgd) pte_write(p4d_pte(p4d))
+
+static inline void p4d_set(p4d_t *p4dp, unsigned long val)
+{
+ *p4dp = __p4d(val);
+}
+
+#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep)
+{
+ pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1);
+}
+
+#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
+#define ptep_clear_flush_young(__vma, __address, __ptep) \
+({ \
+ int __young = ptep_test_and_clear_young(__vma, __address, __ptep);\
+ __young; \
+})
+
+#define pmd_ERROR(e) \
+ pr_err("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
+#define pgd_ERROR(e) \
+ pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
+
+/*
+ * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that
+ * are !pte_none() && !pte_present().
+ *
+ * Format of swap PTEs:
+ *
+ * 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 3 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * <-------------------------- offset ----------------------------
+ *
+ * 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 5 5 5 6 6 6 6
+ * 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3
+ * --------------> <----------- zero ------------> E < type -> 0 0
+ *
+ * E is the exclusive marker that is not stored in swap entries.
+ */
+#define MAX_SWAPFILES_CHECK() do { \
+ BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS); \
+ } while (0)
+
+#define SWP_TYPE_BITS 5
+#define __swp_type(x) (((x).val >> 2) \
+ & ((1UL << SWP_TYPE_BITS) - 1))
+#define __swp_offset(x) ((x).val >> PTE_RPN_SHIFT)
+#define __swp_entry(type, offset) ((swp_entry_t) { \
+ (((type) & 0x1f) << 2) \
+ | ((offset) << PTE_RPN_SHIFT) })
+
+#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) })
+#define __swp_entry_to_pte(x) __pte((x).val)
+
+/* We borrow MSB 56 (LSB 7) to store the exclusive marker in swap PTEs. */
+#define _PAGE_SWP_EXCLUSIVE 0x80
+
+extern int __meminit vmemmap_create_mapping(unsigned long start,
+ unsigned long page_size,
+ unsigned long phys);
+extern void vmemmap_remove_mapping(unsigned long start,
+ unsigned long page_size);
+void __patch_exception(int exc, unsigned long addr);
+#define patch_exception(exc, name) do { \
+ extern unsigned int name; \
+ __patch_exception((exc), (unsigned long)&name); \
+} while (0)
+
+#endif /* __ASSEMBLER__ */
+
+#endif /* _ASM_POWERPC_NOHASH_64_PGTABLE_H */
diff --git a/arch/powerpc/include/asm/nohash/hugetlb-e500.h b/arch/powerpc/include/asm/nohash/hugetlb-e500.h
new file mode 100644
index 000000000000..cab0e1f1eea0
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/hugetlb-e500.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_NOHASH_HUGETLB_E500_H
+#define _ASM_POWERPC_NOHASH_HUGETLB_E500_H
+
+void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
+
+static inline int check_and_get_huge_psize(int shift)
+{
+ if (shift & 1) /* Not a power of 4 */
+ return -EINVAL;
+
+ return shift_to_mmu_psize(shift);
+}
+
+static inline pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags)
+{
+ unsigned int tsize = shift - _PAGE_PSIZE_SHIFT_OFFSET;
+ pte_basic_t val = (tsize << _PAGE_PSIZE_SHIFT) & _PAGE_PSIZE_MSK;
+
+ return __pte((pte_val(entry) & ~(pte_basic_t)_PAGE_PSIZE_MSK) | val);
+}
+#define arch_make_huge_pte arch_make_huge_pte
+
+#endif /* _ASM_POWERPC_NOHASH_HUGETLB_E500_H */
diff --git a/arch/powerpc/include/asm/nohash/kup-booke.h b/arch/powerpc/include/asm/nohash/kup-booke.h
new file mode 100644
index 000000000000..d6bbb6d78bbe
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/kup-booke.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_KUP_BOOKE_H_
+#define _ASM_POWERPC_KUP_BOOKE_H_
+
+#include <asm/bug.h>
+#include <asm/mmu.h>
+
+#ifdef CONFIG_PPC_KUAP
+
+#ifdef __ASSEMBLER__
+
+.macro kuap_check_amr gpr1, gpr2
+.endm
+
+#else
+
+#include <linux/sched.h>
+
+#include <asm/reg.h>
+
+static __always_inline void __kuap_lock(void)
+{
+ mtspr(SPRN_PID, 0);
+ isync();
+}
+#define __kuap_lock __kuap_lock
+
+static __always_inline void __kuap_save_and_lock(struct pt_regs *regs)
+{
+ regs->kuap = mfspr(SPRN_PID);
+ mtspr(SPRN_PID, 0);
+ isync();
+}
+#define __kuap_save_and_lock __kuap_save_and_lock
+
+static __always_inline void kuap_user_restore(struct pt_regs *regs)
+{
+ if (kuap_is_disabled())
+ return;
+
+ mtspr(SPRN_PID, current->thread.pid);
+
+ /* Context synchronisation is performed by rfi */
+}
+
+static __always_inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap)
+{
+ if (regs->kuap)
+ mtspr(SPRN_PID, current->thread.pid);
+
+ /* Context synchronisation is performed by rfi */
+}
+
+#ifdef CONFIG_PPC_KUAP_DEBUG
+static __always_inline unsigned long __kuap_get_and_assert_locked(void)
+{
+ WARN_ON_ONCE(mfspr(SPRN_PID));
+
+ return 0;
+}
+#define __kuap_get_and_assert_locked __kuap_get_and_assert_locked
+#endif
+
+static __always_inline void uaccess_begin_booke(unsigned long val)
+{
+ asm(ASM_MMU_FTR_IFSET("mtspr %0, %1; isync", "", %2) : :
+ "i"(SPRN_PID), "r"(val), "i"(MMU_FTR_KUAP) : "memory");
+}
+
+static __always_inline void uaccess_end_booke(void)
+{
+ asm(ASM_MMU_FTR_IFSET("mtspr %0, %1; isync", "", %2) : :
+ "i"(SPRN_PID), "r"(0), "i"(MMU_FTR_KUAP) : "memory");
+}
+
+static __always_inline void allow_user_access(void __user *to, const void __user *from,
+ unsigned long size, unsigned long dir)
+{
+ uaccess_begin_booke(current->thread.pid);
+}
+
+static __always_inline void prevent_user_access(unsigned long dir)
+{
+ uaccess_end_booke();
+}
+
+static __always_inline unsigned long prevent_user_access_return(void)
+{
+ unsigned long flags = mfspr(SPRN_PID);
+
+ uaccess_end_booke();
+
+ return flags;
+}
+
+static __always_inline void restore_user_access(unsigned long flags)
+{
+ if (flags)
+ uaccess_begin_booke(current->thread.pid);
+}
+
+static __always_inline bool
+__bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
+{
+ return !regs->kuap;
+}
+
+#endif /* !__ASSEMBLER__ */
+
+#endif /* CONFIG_PPC_KUAP */
+
+#endif /* _ASM_POWERPC_KUP_BOOKE_H_ */
diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/nohash/mmu-e500.h
index cd4f04a74802..2fad5ff426a0 100644
--- a/arch/powerpc/include/asm/mmu-book3e.h
+++ b/arch/powerpc/include/asm/nohash/mmu-e500.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_MMU_BOOK3E_H_
#define _ASM_POWERPC_MMU_BOOK3E_H_
/*
@@ -74,7 +75,6 @@
#define MAS2_E 0x00000001
#define MAS2_WIMGE_MASK 0x0000001f
#define MAS2_EPN_MASK(size) (~0 << (size + 10))
-#define MAS2_VAL(addr, size, flags) ((addr) & MAS2_EPN_MASK(size) | (flags))
#define MAS3_RPN 0xFFFFF000
#define MAS3_U0 0x00000200
@@ -220,7 +220,17 @@
#define TLBILX_T_CLASS2 6
#define TLBILX_T_CLASS3 7
-#ifndef __ASSEMBLY__
+/*
+ * The mapping only needs to be cache-coherent on SMP, except on
+ * Freescale e500mc derivatives where it's also needed for coherent DMA.
+ */
+#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC)
+#define MAS2_M_IF_NEEDED MAS2_M
+#else
+#define MAS2_M_IF_NEEDED 0
+#endif
+
+#ifndef __ASSEMBLER__
#include <asm/bug.h>
extern unsigned int tlbcam_index;
@@ -228,29 +238,17 @@ extern unsigned int tlbcam_index;
typedef struct {
unsigned int id;
unsigned int active;
- unsigned long vdso_base;
-#ifdef CONFIG_PPC_MM_SLICES
- u64 low_slices_psize; /* SLB page size encodings */
- u64 high_slices_psize; /* 4 bits per slice for now */
- u16 user_psize; /* page size index */
-#endif
-#ifdef CONFIG_PPC_64K_PAGES
- /* for 4K PTE fragment support */
- void *pte_frag;
-#endif
+ void __user *vdso;
} mm_context_t;
/* Page size definitions, common between 32 and 64-bit
*
* shift : is the "PAGE_SHIFT" value for that page size
- * penc : is the pte encoding mask
*
*/
struct mmu_psize_def
{
unsigned int shift; /* number of bits */
- unsigned int enc; /* PTE encoding */
- unsigned int ind; /* Corresponding indirect page size shift */
unsigned int flags;
#define MMU_PAGE_SIZE_DIRECT 0x1 /* Supported as a direct size */
#define MMU_PAGE_SIZE_INDIRECT 0x2 /* Supported as an indirect size */
@@ -279,8 +277,6 @@ static inline unsigned int mmu_psize_to_shift(unsigned int mmu_psize)
*/
#if defined(CONFIG_PPC_4K_PAGES)
#define mmu_virtual_psize MMU_PAGE_4K
-#elif defined(CONFIG_PPC_64K_PAGES)
-#define mmu_virtual_psize MMU_PAGE_64K
#else
#error Unsupported page size
#endif
@@ -304,8 +300,7 @@ extern unsigned long linear_map_top;
extern int book3e_htw_mode;
#define PPC_HTW_NONE 0
-#define PPC_HTW_IBM 1
-#define PPC_HTW_E6500 2
+#define PPC_HTW_E6500 1
/*
* 64-bit booke platforms don't load the tlb in the tlb miss handler code.
@@ -313,8 +308,16 @@ extern int book3e_htw_mode;
* return 1, indicating that the tlb requires preloading.
*/
#define HUGETLB_NEED_PRELOAD
+
+#define mmu_cleanup_all NULL
+
+#define MAX_PHYSMEM_BITS 44
+
#endif
-#endif /* !__ASSEMBLY__ */
+#include <asm/percpu.h>
+DECLARE_PER_CPU(int, next_tlbcam_idx);
+
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_POWERPC_MMU_BOOK3E_H_ */
diff --git a/arch/powerpc/include/asm/nohash/mmu.h b/arch/powerpc/include/asm/nohash/mmu.h
new file mode 100644
index 000000000000..4cc795044103
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/mmu.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_NOHASH_MMU_H_
+#define _ASM_POWERPC_NOHASH_MMU_H_
+
+#if defined(CONFIG_44x)
+/* 44x-style software loaded TLB */
+#include <asm/nohash/32/mmu-44x.h>
+#elif defined(CONFIG_PPC_E500)
+/* Freescale Book-E software loaded TLB or Book-3e (ISA 2.06+) MMU */
+#include <asm/nohash/mmu-e500.h>
+#elif defined (CONFIG_PPC_8xx)
+/* Motorola/Freescale 8xx software loaded TLB */
+#include <asm/nohash/32/mmu-8xx.h>
+#endif
+
+#endif /* _ASM_POWERPC_NOHASH_MMU_H_ */
diff --git a/arch/powerpc/include/asm/nohash/pgalloc.h b/arch/powerpc/include/asm/nohash/pgalloc.h
new file mode 100644
index 000000000000..4ef780b291bc
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/pgalloc.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_NOHASH_PGALLOC_H
+#define _ASM_POWERPC_NOHASH_PGALLOC_H
+
+#include <linux/mm.h>
+#include <linux/slab.h>
+
+extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
+#ifdef CONFIG_PPC64
+extern void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address);
+#else
+/* 44x etc which is BOOKE not BOOK3E */
+static inline void tlb_flush_pgtable(struct mmu_gather *tlb,
+ unsigned long address)
+{
+
+}
+#endif /* !CONFIG_PPC_BOOK3E_64 */
+
+static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+ pgd_t *pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
+ pgtable_gfp_flags(mm, GFP_KERNEL));
+
+#ifdef CONFIG_PPC_8xx
+ memcpy(pgd + USER_PTRS_PER_PGD, swapper_pg_dir + USER_PTRS_PER_PGD,
+ (MAX_PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
+#endif
+ return pgd;
+}
+
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+ kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd);
+}
+
+#ifdef CONFIG_PPC64
+#include <asm/nohash/64/pgalloc.h>
+#else
+#include <asm/nohash/32/pgalloc.h>
+#endif
+
+static inline void pgtable_free(void *table, int shift)
+{
+ if (!shift) {
+ pte_fragment_free((unsigned long *)table, 0);
+ } else {
+ BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
+ kmem_cache_free(PGT_CACHE(shift), table);
+ }
+}
+
+static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
+{
+ unsigned long pgf = (unsigned long)table;
+
+ BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
+ pgf |= shift;
+ tlb_remove_table(tlb, (void *)pgf);
+}
+
+static inline void __tlb_remove_table(void *_table)
+{
+ void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
+ unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
+
+ pgtable_free(table, shift);
+}
+
+static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
+ unsigned long address)
+{
+ tlb_flush_pgtable(tlb, address);
+ pgtable_free_tlb(tlb, table, 0);
+}
+#endif /* _ASM_POWERPC_NOHASH_PGALLOC_H */
diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h
new file mode 100644
index 000000000000..5af168b7f292
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/pgtable.h
@@ -0,0 +1,377 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_NOHASH_PGTABLE_H
+#define _ASM_POWERPC_NOHASH_PGTABLE_H
+
+#ifndef __ASSEMBLER__
+static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p,
+ unsigned long clr, unsigned long set, int huge);
+#endif
+
+#if defined(CONFIG_PPC64)
+#include <asm/nohash/64/pgtable.h>
+#else
+#include <asm/nohash/32/pgtable.h>
+#endif
+
+/*
+ * _PAGE_CHG_MASK masks of bits that are to be preserved across
+ * pgprot changes.
+ */
+#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPECIAL)
+
+/* Permission masks used for kernel mappings */
+#define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
+#define PAGE_KERNEL_NC __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | _PAGE_NO_CACHE)
+#define PAGE_KERNEL_NCG __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | _PAGE_NO_CACHE | _PAGE_GUARDED)
+#define PAGE_KERNEL_X __pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX)
+#define PAGE_KERNEL_RO __pgprot(_PAGE_BASE | _PAGE_KERNEL_RO)
+#define PAGE_KERNEL_ROX __pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX)
+
+#ifndef __ASSEMBLER__
+
+extern int icache_44x_need_flush;
+
+#ifndef pte_huge_size
+static inline unsigned long pte_huge_size(pte_t pte)
+{
+ return PAGE_SIZE;
+}
+#endif
+
+/*
+ * PTE updates. This function is called whenever an existing
+ * valid PTE is updated. This does -not- include set_pte_at()
+ * which nowadays only sets a new PTE.
+ *
+ * Depending on the type of MMU, we may need to use atomic updates
+ * and the PTE may be either 32 or 64 bit wide. In the later case,
+ * when using atomic updates, only the low part of the PTE is
+ * accessed atomically.
+ *
+ * In addition, on 44x, we also maintain a global flag indicating
+ * that an executable user mapping was modified, which is needed
+ * to properly flush the virtually tagged instruction cache of
+ * those implementations.
+ */
+#ifndef pte_update
+static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p,
+ unsigned long clr, unsigned long set, int huge)
+{
+ pte_basic_t old = pte_val(*p);
+ pte_basic_t new = (old & ~(pte_basic_t)clr) | set;
+ unsigned long sz;
+ unsigned long pdsize;
+ int i;
+
+ if (new == old)
+ return old;
+
+ if (huge)
+ sz = pte_huge_size(__pte(old));
+ else
+ sz = PAGE_SIZE;
+
+ if (sz < PMD_SIZE)
+ pdsize = PAGE_SIZE;
+ else if (sz < PUD_SIZE)
+ pdsize = PMD_SIZE;
+ else if (sz < P4D_SIZE)
+ pdsize = PUD_SIZE;
+ else if (sz < PGDIR_SIZE)
+ pdsize = P4D_SIZE;
+ else
+ pdsize = PGDIR_SIZE;
+
+ for (i = 0; i < sz / pdsize; i++, p++) {
+ *p = __pte(new);
+ if (new)
+ new += (unsigned long long)(pdsize / PAGE_SIZE) << PTE_RPN_SHIFT;
+ }
+
+ if (IS_ENABLED(CONFIG_44x) && !is_kernel_addr(addr) && (old & _PAGE_EXEC))
+ icache_44x_need_flush = 1;
+
+ /* huge pages use the old page table lock */
+ if (!huge)
+ assert_pte_locked(mm, addr);
+
+ return old;
+}
+#endif
+
+static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep)
+{
+ unsigned long old;
+
+ old = pte_update(vma->vm_mm, addr, ptep, _PAGE_ACCESSED, 0, 0);
+
+ return (old & _PAGE_ACCESSED) != 0;
+}
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+
+#ifndef ptep_set_wrprotect
+static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep)
+{
+ pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0);
+}
+#endif
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+
+static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep)
+{
+ return __pte(pte_update(mm, addr, ptep, ~0UL, 0, 0));
+}
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+
+static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+{
+ pte_update(mm, addr, ptep, ~0UL, 0, 0);
+}
+
+/* Set the dirty and/or accessed bits atomically in a linux PTE */
+#ifndef __ptep_set_access_flags
+static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
+ pte_t *ptep, pte_t entry,
+ unsigned long address,
+ int psize)
+{
+ unsigned long set = pte_val(entry) &
+ (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
+ int huge = psize > mmu_virtual_psize ? 1 : 0;
+
+ pte_update(vma->vm_mm, address, ptep, 0, set, huge);
+
+ flush_tlb_page(vma, address);
+}
+#endif
+
+/* Generic accessors to PTE bits */
+#ifndef pte_mkwrite_novma
+static inline pte_t pte_mkwrite_novma(pte_t pte)
+{
+ /*
+ * write implies read, hence set both
+ */
+ return __pte(pte_val(pte) | _PAGE_RW);
+}
+#endif
+
+static inline pte_t pte_mkdirty(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_DIRTY);
+}
+
+static inline pte_t pte_mkyoung(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_ACCESSED);
+}
+
+#ifndef pte_wrprotect
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+ return __pte(pte_val(pte) & ~_PAGE_WRITE);
+}
+#endif
+
+#ifndef pte_mkexec
+static inline pte_t pte_mkexec(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_EXEC);
+}
+#endif
+
+#ifndef pte_write
+static inline int pte_write(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_WRITE;
+}
+#endif
+static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; }
+static inline int pte_special(pte_t pte) { return pte_val(pte) & _PAGE_SPECIAL; }
+static inline int pte_none(pte_t pte) { return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; }
+static inline bool pte_hashpte(pte_t pte) { return false; }
+static inline bool pte_ci(pte_t pte) { return pte_val(pte) & _PAGE_NO_CACHE; }
+static inline bool pte_exec(pte_t pte) { return pte_val(pte) & _PAGE_EXEC; }
+
+static inline int pte_present(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_PRESENT;
+}
+
+static inline bool pte_hw_valid(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_PRESENT;
+}
+
+static inline int pte_young(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_ACCESSED;
+}
+
+/*
+ * Don't just check for any non zero bits in __PAGE_READ, since for book3e
+ * and PTE_64BIT, PAGE_KERNEL_X contains _PAGE_BAP_SR which is also in
+ * _PAGE_READ. Need to explicitly match _PAGE_BAP_UR bit in that case too.
+ */
+#ifndef pte_read
+static inline bool pte_read(pte_t pte)
+{
+ return (pte_val(pte) & _PAGE_READ) == _PAGE_READ;
+}
+#endif
+
+/*
+ * We only find page table entry in the last level
+ * Hence no need for other accessors
+ */
+#define pte_access_permitted pte_access_permitted
+static inline bool pte_access_permitted(pte_t pte, bool write)
+{
+ /*
+ * A read-only access is controlled by _PAGE_READ bit.
+ * We have _PAGE_READ set for WRITE
+ */
+ if (!pte_present(pte) || !pte_read(pte))
+ return false;
+
+ if (write && !pte_write(pte))
+ return false;
+
+ return true;
+}
+
+/* Conversion functions: convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
+ *
+ * Even if PTEs can be unsigned long long, a PFN is always an unsigned
+ * long for now.
+ */
+static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) {
+ return __pte(((pte_basic_t)(pfn) << PTE_RPN_SHIFT) |
+ pgprot_val(pgprot)); }
+
+/* Generic modifiers for PTE bits */
+static inline pte_t pte_exprotect(pte_t pte)
+{
+ return __pte(pte_val(pte) & ~_PAGE_EXEC);
+}
+
+static inline pte_t pte_mkclean(pte_t pte)
+{
+ return __pte(pte_val(pte) & ~_PAGE_DIRTY);
+}
+
+static inline pte_t pte_mkold(pte_t pte)
+{
+ return __pte(pte_val(pte) & ~_PAGE_ACCESSED);
+}
+
+static inline pte_t pte_mkspecial(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_SPECIAL);
+}
+
+#ifndef pte_mkhuge
+static inline pte_t pte_mkhuge(pte_t pte)
+{
+ return __pte(pte_val(pte));
+}
+#endif
+
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+ return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
+}
+
+static inline bool pte_swp_exclusive(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_SWP_EXCLUSIVE;
+}
+
+static inline pte_t pte_swp_mkexclusive(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_SWP_EXCLUSIVE);
+}
+
+static inline pte_t pte_swp_clear_exclusive(pte_t pte)
+{
+ return __pte(pte_val(pte) & ~_PAGE_SWP_EXCLUSIVE);
+}
+
+/* This low level function performs the actual PTE insertion
+ * Setting the PTE depends on the MMU type and other factors. It's
+ * an horrible mess that I'm not going to try to clean up now but
+ * I'm keeping it in one place rather than spread around
+ */
+static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, pte_t pte, int percpu)
+{
+ /* Second case is 32-bit with 64-bit PTE. In this case, we
+ * can just store as long as we do the two halves in the right order
+ * with a barrier in between.
+ * In the percpu case, we also fallback to the simple update
+ */
+ if (IS_ENABLED(CONFIG_PPC32) && IS_ENABLED(CONFIG_PTE_64BIT) && !percpu) {
+ __asm__ __volatile__("\
+ stw%X0 %2,%0\n\
+ mbar\n\
+ stw%X1 %L2,%1"
+ : "=m" (*ptep), "=m" (*((unsigned char *)ptep+4))
+ : "r" (pte) : "memory");
+ return;
+ }
+ /* Anything else just stores the PTE normally. That covers all 64-bit
+ * cases, and 32-bit non-hash with 32-bit PTEs.
+ */
+#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PPC_16K_PAGES)
+ ptep->pte3 = ptep->pte2 = ptep->pte1 = ptep->pte = pte_val(pte);
+#else
+ *ptep = pte;
+#endif
+
+ /*
+ * With hardware tablewalk, a sync is needed to ensure that
+ * subsequent accesses see the PTE we just wrote. Unlike userspace
+ * mappings, we can't tolerate spurious faults, so make sure
+ * the new PTE will be seen the first time.
+ */
+ if (IS_ENABLED(CONFIG_PPC_BOOK3E_64) && is_kernel_addr(addr))
+ mb();
+}
+
+/*
+ * Macro to mark a page protection value as "uncacheable".
+ */
+
+#define _PAGE_CACHE_CTL (_PAGE_COHERENT | _PAGE_GUARDED | _PAGE_NO_CACHE | \
+ _PAGE_WRITETHRU)
+
+#define pgprot_noncached(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
+ _PAGE_NO_CACHE | _PAGE_GUARDED))
+
+#define pgprot_noncached_wc(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
+ _PAGE_NO_CACHE))
+
+#define pgprot_cached(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
+ _PAGE_COHERENT))
+
+#if _PAGE_WRITETHRU != 0
+#define pgprot_cached_wthru(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
+ _PAGE_COHERENT | _PAGE_WRITETHRU))
+#else
+#define pgprot_cached_wthru(prot) pgprot_noncached(prot)
+#endif
+
+#define pgprot_cached_noncoherent(prot) \
+ (__pgprot(pgprot_val(prot) & ~_PAGE_CACHE_CTL))
+
+#define pgprot_writecombine pgprot_noncached_wc
+
+int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
+void unmap_kernel_page(unsigned long va);
+
+#endif /* __ASSEMBLER__ */
+#endif
diff --git a/arch/powerpc/include/asm/nohash/pte-e500.h b/arch/powerpc/include/asm/nohash/pte-e500.h
new file mode 100644
index 000000000000..b61efc3ee904
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/pte-e500.h
@@ -0,0 +1,140 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_NOHASH_PTE_E500_H
+#define _ASM_POWERPC_NOHASH_PTE_E500_H
+#ifdef __KERNEL__
+
+/* PTE bit definitions for processors compliant to the Book3E
+ * architecture 2.06 or later. The position of the PTE bits
+ * matches the HW definition of the optional Embedded Page Table
+ * category.
+ */
+
+/* Architected bits */
+#define _PAGE_PRESENT 0x000001 /* software: pte contains a translation */
+#define _PAGE_SW1 0x000002
+#define _PAGE_BAP_SR 0x000004
+#define _PAGE_BAP_UR 0x000008
+#define _PAGE_BAP_SW 0x000010
+#define _PAGE_BAP_UW 0x000020
+#define _PAGE_BAP_SX 0x000040
+#define _PAGE_BAP_UX 0x000080
+#define _PAGE_PSIZE_MSK 0x000f00
+#define _PAGE_TSIZE_4K 0x000100
+#define _PAGE_DIRTY 0x001000 /* C: page changed */
+#define _PAGE_SW0 0x002000
+#define _PAGE_U3 0x004000
+#define _PAGE_U2 0x008000
+#define _PAGE_U1 0x010000
+#define _PAGE_U0 0x020000
+#define _PAGE_ACCESSED 0x040000
+#define _PAGE_ENDIAN 0x080000
+#define _PAGE_GUARDED 0x100000
+#define _PAGE_COHERENT 0x200000 /* M: enforce memory coherence */
+#define _PAGE_NO_CACHE 0x400000 /* I: cache inhibit */
+#define _PAGE_WRITETHRU 0x800000 /* W: cache write-through */
+
+#define _PAGE_PSIZE_SHIFT 7
+#define _PAGE_PSIZE_SHIFT_OFFSET 10
+
+/* "Higher level" linux bit combinations */
+#define _PAGE_EXEC (_PAGE_BAP_SX | _PAGE_BAP_UX) /* .. and was cache cleaned */
+#define _PAGE_READ (_PAGE_BAP_SR | _PAGE_BAP_UR) /* User read permission */
+#define _PAGE_WRITE (_PAGE_BAP_SW | _PAGE_BAP_UW) /* User write permission */
+
+#define _PAGE_KERNEL_RW (_PAGE_BAP_SW | _PAGE_BAP_SR | _PAGE_DIRTY)
+#define _PAGE_KERNEL_RO (_PAGE_BAP_SR)
+#define _PAGE_KERNEL_RWX (_PAGE_BAP_SW | _PAGE_BAP_SR | _PAGE_DIRTY | _PAGE_BAP_SX)
+#define _PAGE_KERNEL_ROX (_PAGE_BAP_SR | _PAGE_BAP_SX)
+
+#define _PAGE_NA 0
+#define _PAGE_NAX _PAGE_BAP_UX
+#define _PAGE_RO _PAGE_READ
+#define _PAGE_ROX (_PAGE_READ | _PAGE_BAP_UX)
+#define _PAGE_RW (_PAGE_READ | _PAGE_WRITE)
+#define _PAGE_RWX (_PAGE_READ | _PAGE_WRITE | _PAGE_BAP_UX)
+
+#define _PAGE_SPECIAL _PAGE_SW0
+
+#define PTE_RPN_SHIFT (24)
+
+#define PTE_WIMGE_SHIFT (19)
+#define PTE_BAP_SHIFT (2)
+
+/* On 32-bit, we never clear the top part of the PTE */
+#ifdef CONFIG_PPC32
+#define _PTE_NONE_MASK 0xffffffff00000000ULL
+#define _PMD_PRESENT 0
+#define _PMD_PRESENT_MASK (PAGE_MASK)
+#define _PMD_BAD (~PAGE_MASK)
+#define _PMD_USER 0
+#else
+#define _PTE_NONE_MASK 0
+#endif
+
+/*
+ * We define 2 sets of base prot bits, one for basic pages (ie,
+ * cacheable kernel and user pages) and one for non cacheable
+ * pages. We always set _PAGE_COHERENT when SMP is enabled or
+ * the processor might need it for DMA coherency.
+ */
+#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_TSIZE_4K)
+#if defined(CONFIG_SMP)
+#define _PAGE_BASE (_PAGE_BASE_NC | _PAGE_COHERENT)
+#else
+#define _PAGE_BASE (_PAGE_BASE_NC)
+#endif
+
+#include <asm/pgtable-masks.h>
+
+#ifndef __ASSEMBLER__
+static inline pte_t pte_mkexec(pte_t pte)
+{
+ return __pte((pte_val(pte) & ~_PAGE_BAP_SX) | _PAGE_BAP_UX);
+}
+#define pte_mkexec pte_mkexec
+
+static inline unsigned long pte_huge_size(pte_t pte)
+{
+ pte_basic_t val = pte_val(pte);
+
+ return 1UL << (((val & _PAGE_PSIZE_MSK) >> _PAGE_PSIZE_SHIFT) + _PAGE_PSIZE_SHIFT_OFFSET);
+}
+#define pte_huge_size pte_huge_size
+
+static inline int pmd_leaf(pmd_t pmd)
+{
+ if (IS_ENABLED(CONFIG_PPC64))
+ return (long)pmd_val(pmd) > 0;
+ else
+ return pmd_val(pmd) & _PAGE_PSIZE_MSK;
+}
+#define pmd_leaf pmd_leaf
+
+static inline unsigned long pmd_leaf_size(pmd_t pmd)
+{
+ return pte_huge_size(__pte(pmd_val(pmd)));
+}
+#define pmd_leaf_size pmd_leaf_size
+
+#ifdef CONFIG_PPC64
+static inline int pud_leaf(pud_t pud)
+{
+ if (IS_ENABLED(CONFIG_PPC64))
+ return (long)pud_val(pud) > 0;
+ else
+ return pud_val(pud) & _PAGE_PSIZE_MSK;
+}
+#define pud_leaf pud_leaf
+
+static inline unsigned long pud_leaf_size(pud_t pud)
+{
+ return pte_huge_size(__pte(pud_val(pud)));
+}
+#define pud_leaf_size pud_leaf_size
+
+#endif
+
+#endif /* __ASSEMBLER__ */
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_NOHASH_PTE_E500_H */
diff --git a/arch/powerpc/include/asm/nohash/tlbflush.h b/arch/powerpc/include/asm/nohash/tlbflush.h
new file mode 100644
index 000000000000..9a2cf83ea4f1
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/tlbflush.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_NOHASH_TLBFLUSH_H
+#define _ASM_POWERPC_NOHASH_TLBFLUSH_H
+
+/*
+ * TLB flushing:
+ *
+ * - flush_tlb_mm(mm) flushes the specified mm context TLB's
+ * - flush_tlb_page(vma, vmaddr) flushes one page
+ * - local_flush_tlb_mm(mm, full) flushes the specified mm context on
+ * the local processor
+ * - local_flush_tlb_page(vma, vmaddr) flushes one page on the local processor
+ * - flush_tlb_range(vma, start, end) flushes a range of pages
+ * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
+ *
+ */
+
+/*
+ * TLB flushing for software loaded TLB chips
+ *
+ * TODO: (CONFIG_PPC_85xx) determine if flush_tlb_range &
+ * flush_tlb_kernel_range are best implemented as tlbia vs
+ * specific tlbie's
+ */
+
+struct vm_area_struct;
+struct mm_struct;
+
+#define MMU_NO_CONTEXT ((unsigned int)-1)
+
+extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end);
+
+#ifdef CONFIG_PPC_8xx
+static inline void local_flush_tlb_mm(struct mm_struct *mm)
+{
+ unsigned int pid = READ_ONCE(mm->context.id);
+
+ if (pid != MMU_NO_CONTEXT)
+ asm volatile ("sync; tlbia; isync" : : : "memory");
+}
+
+static inline void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
+{
+ asm volatile ("tlbie %0; sync" : : "r" (vmaddr) : "memory");
+}
+
+static inline void local_flush_tlb_page_psize(struct mm_struct *mm,
+ unsigned long vmaddr, int psize)
+{
+ asm volatile ("tlbie %0; sync" : : "r" (vmaddr) : "memory");
+}
+
+static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+ start &= PAGE_MASK;
+
+ if (end - start <= PAGE_SIZE)
+ asm volatile ("tlbie %0; sync" : : "r" (start) : "memory");
+ else
+ asm volatile ("sync; tlbia; isync" : : : "memory");
+}
+#else
+extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
+extern void local_flush_tlb_mm(struct mm_struct *mm);
+extern void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
+void local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, int psize);
+
+extern void __local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
+ int tsize, int ind);
+#endif
+
+#ifdef CONFIG_SMP
+extern void flush_tlb_mm(struct mm_struct *mm);
+extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
+extern void __flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
+ int tsize, int ind);
+#else
+#define flush_tlb_mm(mm) local_flush_tlb_mm(mm)
+#define flush_tlb_page(vma,addr) local_flush_tlb_page(vma,addr)
+#define __flush_tlb_page(mm,addr,p,i) __local_flush_tlb_page(mm,addr,p,i)
+#endif
+
+#endif /* _ASM_POWERPC_NOHASH_TLBFLUSH_H */
diff --git a/arch/powerpc/include/asm/nvram.h b/arch/powerpc/include/asm/nvram.h
index b0fe0fe4e626..eda7fac3500e 100644
--- a/arch/powerpc/include/asm/nvram.h
+++ b/arch/powerpc/include/asm/nvram.h
@@ -1,20 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* NVRAM definitions and access functions.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#ifndef _ASM_POWERPC_NVRAM_H
#define _ASM_POWERPC_NVRAM_H
-
+#include <linux/types.h>
#include <linux/errno.h>
#include <linux/list.h>
#include <uapi/asm/nvram.h>
+/*
+ * Set oops header version to distinguish between old and new format header.
+ * lnx,oops-log partition max size is 4000, header version > 4000 will
+ * help in identifying new header.
+ */
+#define OOPS_HDR_VERSION 5000
+
+struct err_log_info {
+ __be32 error_type;
+ __be32 seq_num;
+};
+
+struct nvram_os_partition {
+ const char *name;
+ int req_size; /* desired size, in bytes */
+ int min_size; /* minimum acceptable size (0 means req_size) */
+ long size; /* size of data portion (excluding err_log_info) */
+ long index; /* offset of data portion of partition */
+ bool os_partition; /* partition initialized by OS, not FW */
+};
+
+struct oops_log_info {
+ __be16 version;
+ __be16 report_length;
+ __be64 timestamp;
+} __attribute__((packed));
+
+extern struct nvram_os_partition oops_log_partition;
+
#ifdef CONFIG_PPC_PSERIES
+extern struct nvram_os_partition rtas_log_partition;
+
extern int nvram_write_error_log(char * buff, int length,
unsigned int err_type, unsigned int err_seq);
extern int nvram_read_error_log(char * buff, int length,
@@ -47,13 +74,21 @@ extern int pmac_get_partition(int partition);
extern u8 pmac_xpram_read(int xpaddr);
extern void pmac_xpram_write(int xpaddr, u8 data);
-/* Synchronize NVRAM */
-extern void nvram_sync(void);
+/* Initialize NVRAM OS partition */
+extern int __init nvram_init_os_partition(struct nvram_os_partition *part);
+
+/* Initialize NVRAM oops partition */
+extern void __init nvram_init_oops_partition(int rtas_partition_exists);
+
+/* Read a NVRAM partition */
+extern int nvram_read_partition(struct nvram_os_partition *part, char *buff,
+ int length, unsigned int *err_type,
+ unsigned int *error_log_cnt);
-/* Determine NVRAM size */
-extern ssize_t nvram_get_size(void);
+/* Write to NVRAM OS partition */
+extern int nvram_write_os_partition(struct nvram_os_partition *part,
+ char *buff, int length,
+ unsigned int err_type,
+ unsigned int error_log_cnt);
-/* Normal access to NVRAM */
-extern unsigned char nvram_read_byte(int i);
-extern void nvram_write_byte(unsigned char c, int i);
#endif /* _ASM_POWERPC_NVRAM_H */
diff --git a/arch/powerpc/include/asm/ohare.h b/arch/powerpc/include/asm/ohare.h
index 0d030f9dea24..da3371fc348c 100644
--- a/arch/powerpc/include/asm/ohare.h
+++ b/arch/powerpc/include/asm/ohare.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_OHARE_H
#define _ASM_POWERPC_OHARE_H
#ifdef __KERNEL__
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
new file mode 100644
index 000000000000..d3eaa3425797
--- /dev/null
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -0,0 +1,1188 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * OPAL API definitions.
+ *
+ * Copyright 2011-2015 IBM Corp.
+ */
+
+#ifndef __OPAL_API_H
+#define __OPAL_API_H
+
+/****** OPAL APIs ******/
+
+/* Return codes */
+#define OPAL_SUCCESS 0
+#define OPAL_PARAMETER -1
+#define OPAL_BUSY -2
+#define OPAL_PARTIAL -3
+#define OPAL_CONSTRAINED -4
+#define OPAL_CLOSED -5
+#define OPAL_HARDWARE -6
+#define OPAL_UNSUPPORTED -7
+#define OPAL_PERMISSION -8
+#define OPAL_NO_MEM -9
+#define OPAL_RESOURCE -10
+#define OPAL_INTERNAL_ERROR -11
+#define OPAL_BUSY_EVENT -12
+#define OPAL_HARDWARE_FROZEN -13
+#define OPAL_WRONG_STATE -14
+#define OPAL_ASYNC_COMPLETION -15
+#define OPAL_EMPTY -16
+#define OPAL_I2C_TIMEOUT -17
+#define OPAL_I2C_INVALID_CMD -18
+#define OPAL_I2C_LBUS_PARITY -19
+#define OPAL_I2C_BKEND_OVERRUN -20
+#define OPAL_I2C_BKEND_ACCESS -21
+#define OPAL_I2C_ARBT_LOST -22
+#define OPAL_I2C_NACK_RCVD -23
+#define OPAL_I2C_STOP_ERR -24
+#define OPAL_XIVE_PROVISIONING -31
+#define OPAL_XIVE_FREE_ACTIVE -32
+#define OPAL_TIMEOUT -33
+
+/* API Tokens (in r0) */
+#define OPAL_INVALID_CALL -1
+#define OPAL_TEST 0
+#define OPAL_CONSOLE_WRITE 1
+#define OPAL_CONSOLE_READ 2
+#define OPAL_RTC_READ 3
+#define OPAL_RTC_WRITE 4
+#define OPAL_CEC_POWER_DOWN 5
+#define OPAL_CEC_REBOOT 6
+#define OPAL_READ_NVRAM 7
+#define OPAL_WRITE_NVRAM 8
+#define OPAL_HANDLE_INTERRUPT 9
+#define OPAL_POLL_EVENTS 10
+#define OPAL_PCI_SET_HUB_TCE_MEMORY 11
+#define OPAL_PCI_SET_PHB_TCE_MEMORY 12
+#define OPAL_PCI_CONFIG_READ_BYTE 13
+#define OPAL_PCI_CONFIG_READ_HALF_WORD 14
+#define OPAL_PCI_CONFIG_READ_WORD 15
+#define OPAL_PCI_CONFIG_WRITE_BYTE 16
+#define OPAL_PCI_CONFIG_WRITE_HALF_WORD 17
+#define OPAL_PCI_CONFIG_WRITE_WORD 18
+#define OPAL_SET_XIVE 19
+#define OPAL_GET_XIVE 20
+#define OPAL_GET_COMPLETION_TOKEN_STATUS 21 /* obsolete */
+#define OPAL_REGISTER_OPAL_EXCEPTION_HANDLER 22
+#define OPAL_PCI_EEH_FREEZE_STATUS 23
+#define OPAL_PCI_SHPC 24
+#define OPAL_CONSOLE_WRITE_BUFFER_SPACE 25
+#define OPAL_PCI_EEH_FREEZE_CLEAR 26
+#define OPAL_PCI_PHB_MMIO_ENABLE 27
+#define OPAL_PCI_SET_PHB_MEM_WINDOW 28
+#define OPAL_PCI_MAP_PE_MMIO_WINDOW 29
+#define OPAL_PCI_SET_PHB_TABLE_MEMORY 30
+#define OPAL_PCI_SET_PE 31
+#define OPAL_PCI_SET_PELTV 32
+#define OPAL_PCI_SET_MVE 33
+#define OPAL_PCI_SET_MVE_ENABLE 34
+#define OPAL_PCI_GET_XIVE_REISSUE 35
+#define OPAL_PCI_SET_XIVE_REISSUE 36
+#define OPAL_PCI_SET_XIVE_PE 37
+#define OPAL_GET_XIVE_SOURCE 38
+#define OPAL_GET_MSI_32 39
+#define OPAL_GET_MSI_64 40
+#define OPAL_START_CPU 41
+#define OPAL_QUERY_CPU_STATUS 42
+#define OPAL_WRITE_OPPANEL 43 /* unimplemented */
+#define OPAL_PCI_MAP_PE_DMA_WINDOW 44
+#define OPAL_PCI_MAP_PE_DMA_WINDOW_REAL 45
+#define OPAL_PCI_RESET 49
+#define OPAL_PCI_GET_HUB_DIAG_DATA 50
+#define OPAL_PCI_GET_PHB_DIAG_DATA 51
+#define OPAL_PCI_FENCE_PHB 52
+#define OPAL_PCI_REINIT 53
+#define OPAL_PCI_MASK_PE_ERROR 54
+#define OPAL_SET_SLOT_LED_STATUS 55
+#define OPAL_GET_EPOW_STATUS 56
+#define OPAL_SET_SYSTEM_ATTENTION_LED 57
+#define OPAL_RESERVED1 58
+#define OPAL_RESERVED2 59
+#define OPAL_PCI_NEXT_ERROR 60
+#define OPAL_PCI_EEH_FREEZE_STATUS2 61
+#define OPAL_PCI_POLL 62
+#define OPAL_PCI_MSI_EOI 63
+#define OPAL_PCI_GET_PHB_DIAG_DATA2 64
+#define OPAL_XSCOM_READ 65
+#define OPAL_XSCOM_WRITE 66
+#define OPAL_LPC_READ 67
+#define OPAL_LPC_WRITE 68
+#define OPAL_RETURN_CPU 69
+#define OPAL_REINIT_CPUS 70
+#define OPAL_ELOG_READ 71
+#define OPAL_ELOG_WRITE 72
+#define OPAL_ELOG_ACK 73
+#define OPAL_ELOG_RESEND 74
+#define OPAL_ELOG_SIZE 75
+#define OPAL_FLASH_VALIDATE 76
+#define OPAL_FLASH_MANAGE 77
+#define OPAL_FLASH_UPDATE 78
+#define OPAL_RESYNC_TIMEBASE 79
+#define OPAL_CHECK_TOKEN 80
+#define OPAL_DUMP_INIT 81
+#define OPAL_DUMP_INFO 82
+#define OPAL_DUMP_READ 83
+#define OPAL_DUMP_ACK 84
+#define OPAL_GET_MSG 85
+#define OPAL_CHECK_ASYNC_COMPLETION 86
+#define OPAL_SYNC_HOST_REBOOT 87
+#define OPAL_SENSOR_READ 88
+#define OPAL_GET_PARAM 89
+#define OPAL_SET_PARAM 90
+#define OPAL_DUMP_RESEND 91
+#define OPAL_ELOG_SEND 92 /* Deprecated */
+#define OPAL_PCI_SET_PHB_CAPI_MODE 93
+#define OPAL_DUMP_INFO2 94
+#define OPAL_WRITE_OPPANEL_ASYNC 95
+#define OPAL_PCI_ERR_INJECT 96
+#define OPAL_PCI_EEH_FREEZE_SET 97
+#define OPAL_HANDLE_HMI 98
+#define OPAL_CONFIG_CPU_IDLE_STATE 99
+#define OPAL_SLW_SET_REG 100
+#define OPAL_REGISTER_DUMP_REGION 101
+#define OPAL_UNREGISTER_DUMP_REGION 102
+#define OPAL_WRITE_TPO 103
+#define OPAL_READ_TPO 104
+#define OPAL_GET_DPO_STATUS 105
+#define OPAL_OLD_I2C_REQUEST 106 /* Deprecated */
+#define OPAL_IPMI_SEND 107
+#define OPAL_IPMI_RECV 108
+#define OPAL_I2C_REQUEST 109
+#define OPAL_FLASH_READ 110
+#define OPAL_FLASH_WRITE 111
+#define OPAL_FLASH_ERASE 112
+#define OPAL_PRD_MSG 113
+#define OPAL_LEDS_GET_INDICATOR 114
+#define OPAL_LEDS_SET_INDICATOR 115
+#define OPAL_CEC_REBOOT2 116
+#define OPAL_CONSOLE_FLUSH 117
+#define OPAL_GET_DEVICE_TREE 118
+#define OPAL_PCI_GET_PRESENCE_STATE 119
+#define OPAL_PCI_GET_POWER_STATE 120
+#define OPAL_PCI_SET_POWER_STATE 121
+#define OPAL_INT_GET_XIRR 122
+#define OPAL_INT_SET_CPPR 123
+#define OPAL_INT_EOI 124
+#define OPAL_INT_SET_MFRR 125
+#define OPAL_PCI_TCE_KILL 126
+#define OPAL_NMMU_SET_PTCR 127
+#define OPAL_XIVE_RESET 128
+#define OPAL_XIVE_GET_IRQ_INFO 129
+#define OPAL_XIVE_GET_IRQ_CONFIG 130
+#define OPAL_XIVE_SET_IRQ_CONFIG 131
+#define OPAL_XIVE_GET_QUEUE_INFO 132
+#define OPAL_XIVE_SET_QUEUE_INFO 133
+#define OPAL_XIVE_DONATE_PAGE 134
+#define OPAL_XIVE_ALLOCATE_VP_BLOCK 135
+#define OPAL_XIVE_FREE_VP_BLOCK 136
+#define OPAL_XIVE_GET_VP_INFO 137
+#define OPAL_XIVE_SET_VP_INFO 138
+#define OPAL_XIVE_ALLOCATE_IRQ 139
+#define OPAL_XIVE_FREE_IRQ 140
+#define OPAL_XIVE_SYNC 141
+#define OPAL_XIVE_DUMP 142
+#define OPAL_XIVE_GET_QUEUE_STATE 143
+#define OPAL_XIVE_SET_QUEUE_STATE 144
+#define OPAL_SIGNAL_SYSTEM_RESET 145
+#define OPAL_NPU_INIT_CONTEXT 146
+#define OPAL_NPU_DESTROY_CONTEXT 147
+#define OPAL_NPU_MAP_LPAR 148
+#define OPAL_IMC_COUNTERS_INIT 149
+#define OPAL_IMC_COUNTERS_START 150
+#define OPAL_IMC_COUNTERS_STOP 151
+#define OPAL_GET_POWERCAP 152
+#define OPAL_SET_POWERCAP 153
+#define OPAL_GET_POWER_SHIFT_RATIO 154
+#define OPAL_SET_POWER_SHIFT_RATIO 155
+#define OPAL_SENSOR_GROUP_CLEAR 156
+#define OPAL_PCI_SET_P2P 157
+#define OPAL_QUIESCE 158
+#define OPAL_NPU_SPA_SETUP 159
+#define OPAL_NPU_SPA_CLEAR_CACHE 160
+#define OPAL_NPU_TL_SET 161
+#define OPAL_SENSOR_READ_U64 162
+#define OPAL_SENSOR_GROUP_ENABLE 163
+#define OPAL_PCI_GET_PBCQ_TUNNEL_BAR 164
+#define OPAL_PCI_SET_PBCQ_TUNNEL_BAR 165
+#define OPAL_HANDLE_HMI2 166
+#define OPAL_NX_COPROC_INIT 167
+#define OPAL_XIVE_GET_VP_STATE 170
+#define OPAL_MPIPL_UPDATE 173
+#define OPAL_MPIPL_REGISTER_TAG 174
+#define OPAL_MPIPL_QUERY_TAG 175
+#define OPAL_SECVAR_GET 176
+#define OPAL_SECVAR_GET_NEXT 177
+#define OPAL_SECVAR_ENQUEUE_UPDATE 178
+#define OPAL_LAST 178
+
+#define QUIESCE_HOLD 1 /* Spin all calls at entry */
+#define QUIESCE_REJECT 2 /* Fail all calls with OPAL_BUSY */
+#define QUIESCE_LOCK_BREAK 3 /* Set to ignore locks. */
+#define QUIESCE_RESUME 4 /* Un-quiesce */
+#define QUIESCE_RESUME_FAST_REBOOT 5 /* Un-quiesce, fast reboot */
+
+/* Device tree flags */
+
+/*
+ * Flags set in power-mgmt nodes in device tree describing
+ * idle states that are supported in the platform.
+ */
+
+#define OPAL_PM_TIMEBASE_STOP 0x00000002
+#define OPAL_PM_LOSE_HYP_CONTEXT 0x00002000
+#define OPAL_PM_LOSE_FULL_CONTEXT 0x00004000
+#define OPAL_PM_NAP_ENABLED 0x00010000
+#define OPAL_PM_SLEEP_ENABLED 0x00020000
+#define OPAL_PM_WINKLE_ENABLED 0x00040000
+#define OPAL_PM_SLEEP_ENABLED_ER1 0x00080000 /* with workaround */
+#define OPAL_PM_STOP_INST_FAST 0x00100000
+#define OPAL_PM_STOP_INST_DEEP 0x00200000
+
+/*
+ * OPAL_CONFIG_CPU_IDLE_STATE parameters
+ */
+#define OPAL_CONFIG_IDLE_FASTSLEEP 1
+#define OPAL_CONFIG_IDLE_UNDO 0
+#define OPAL_CONFIG_IDLE_APPLY 1
+
+#ifndef __ASSEMBLER__
+
+/* Other enums */
+enum OpalFreezeState {
+ OPAL_EEH_STOPPED_NOT_FROZEN = 0,
+ OPAL_EEH_STOPPED_MMIO_FREEZE = 1,
+ OPAL_EEH_STOPPED_DMA_FREEZE = 2,
+ OPAL_EEH_STOPPED_MMIO_DMA_FREEZE = 3,
+ OPAL_EEH_STOPPED_RESET = 4,
+ OPAL_EEH_STOPPED_TEMP_UNAVAIL = 5,
+ OPAL_EEH_STOPPED_PERM_UNAVAIL = 6
+};
+
+enum OpalEehFreezeActionToken {
+ OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO = 1,
+ OPAL_EEH_ACTION_CLEAR_FREEZE_DMA = 2,
+ OPAL_EEH_ACTION_CLEAR_FREEZE_ALL = 3,
+
+ OPAL_EEH_ACTION_SET_FREEZE_MMIO = 1,
+ OPAL_EEH_ACTION_SET_FREEZE_DMA = 2,
+ OPAL_EEH_ACTION_SET_FREEZE_ALL = 3
+};
+
+enum OpalPciStatusToken {
+ OPAL_EEH_NO_ERROR = 0,
+ OPAL_EEH_IOC_ERROR = 1,
+ OPAL_EEH_PHB_ERROR = 2,
+ OPAL_EEH_PE_ERROR = 3,
+ OPAL_EEH_PE_MMIO_ERROR = 4,
+ OPAL_EEH_PE_DMA_ERROR = 5
+};
+
+enum OpalPciErrorSeverity {
+ OPAL_EEH_SEV_NO_ERROR = 0,
+ OPAL_EEH_SEV_IOC_DEAD = 1,
+ OPAL_EEH_SEV_PHB_DEAD = 2,
+ OPAL_EEH_SEV_PHB_FENCED = 3,
+ OPAL_EEH_SEV_PE_ER = 4,
+ OPAL_EEH_SEV_INF = 5
+};
+
+enum OpalErrinjectType {
+ OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR = 0,
+ OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64 = 1,
+};
+
+enum OpalErrinjectFunc {
+ /* IOA bus specific errors */
+ OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR = 0,
+ OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_DATA = 1,
+ OPAL_ERR_INJECT_FUNC_IOA_LD_IO_ADDR = 2,
+ OPAL_ERR_INJECT_FUNC_IOA_LD_IO_DATA = 3,
+ OPAL_ERR_INJECT_FUNC_IOA_LD_CFG_ADDR = 4,
+ OPAL_ERR_INJECT_FUNC_IOA_LD_CFG_DATA = 5,
+ OPAL_ERR_INJECT_FUNC_IOA_ST_MEM_ADDR = 6,
+ OPAL_ERR_INJECT_FUNC_IOA_ST_MEM_DATA = 7,
+ OPAL_ERR_INJECT_FUNC_IOA_ST_IO_ADDR = 8,
+ OPAL_ERR_INJECT_FUNC_IOA_ST_IO_DATA = 9,
+ OPAL_ERR_INJECT_FUNC_IOA_ST_CFG_ADDR = 10,
+ OPAL_ERR_INJECT_FUNC_IOA_ST_CFG_DATA = 11,
+ OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_ADDR = 12,
+ OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_DATA = 13,
+ OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_MASTER = 14,
+ OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_TARGET = 15,
+ OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_ADDR = 16,
+ OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_DATA = 17,
+ OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_MASTER = 18,
+ OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET = 19,
+};
+
+enum OpalMmioWindowType {
+ OPAL_M32_WINDOW_TYPE = 1,
+ OPAL_M64_WINDOW_TYPE = 2,
+ OPAL_IO_WINDOW_TYPE = 3
+};
+
+enum OpalExceptionHandler {
+ OPAL_MACHINE_CHECK_HANDLER = 1,
+ OPAL_HYPERVISOR_MAINTENANCE_HANDLER = 2,
+ OPAL_SOFTPATCH_HANDLER = 3
+};
+
+enum OpalPendingState {
+ OPAL_EVENT_OPAL_INTERNAL = 0x1,
+ OPAL_EVENT_NVRAM = 0x2,
+ OPAL_EVENT_RTC = 0x4,
+ OPAL_EVENT_CONSOLE_OUTPUT = 0x8,
+ OPAL_EVENT_CONSOLE_INPUT = 0x10,
+ OPAL_EVENT_ERROR_LOG_AVAIL = 0x20,
+ OPAL_EVENT_ERROR_LOG = 0x40,
+ OPAL_EVENT_EPOW = 0x80,
+ OPAL_EVENT_LED_STATUS = 0x100,
+ OPAL_EVENT_PCI_ERROR = 0x200,
+ OPAL_EVENT_DUMP_AVAIL = 0x400,
+ OPAL_EVENT_MSG_PENDING = 0x800,
+};
+
+enum OpalThreadStatus {
+ OPAL_THREAD_INACTIVE = 0x0,
+ OPAL_THREAD_STARTED = 0x1,
+ OPAL_THREAD_UNAVAILABLE = 0x2 /* opal-v3 */
+};
+
+enum OpalPciBusCompare {
+ OpalPciBusAny = 0, /* Any bus number match */
+ OpalPciBus3Bits = 2, /* Match top 3 bits of bus number */
+ OpalPciBus4Bits = 3, /* Match top 4 bits of bus number */
+ OpalPciBus5Bits = 4, /* Match top 5 bits of bus number */
+ OpalPciBus6Bits = 5, /* Match top 6 bits of bus number */
+ OpalPciBus7Bits = 6, /* Match top 7 bits of bus number */
+ OpalPciBusAll = 7, /* Match bus number exactly */
+};
+
+enum OpalDeviceCompare {
+ OPAL_IGNORE_RID_DEVICE_NUMBER = 0,
+ OPAL_COMPARE_RID_DEVICE_NUMBER = 1
+};
+
+enum OpalFuncCompare {
+ OPAL_IGNORE_RID_FUNCTION_NUMBER = 0,
+ OPAL_COMPARE_RID_FUNCTION_NUMBER = 1
+};
+
+enum OpalPeAction {
+ OPAL_UNMAP_PE = 0,
+ OPAL_MAP_PE = 1
+};
+
+enum OpalPeltvAction {
+ OPAL_REMOVE_PE_FROM_DOMAIN = 0,
+ OPAL_ADD_PE_TO_DOMAIN = 1
+};
+
+enum OpalMveEnableAction {
+ OPAL_DISABLE_MVE = 0,
+ OPAL_ENABLE_MVE = 1
+};
+
+enum OpalM64Action {
+ OPAL_DISABLE_M64 = 0,
+ OPAL_ENABLE_M64_SPLIT = 1,
+ OPAL_ENABLE_M64_NON_SPLIT = 2
+};
+
+enum OpalPciResetScope {
+ OPAL_RESET_PHB_COMPLETE = 1,
+ OPAL_RESET_PCI_LINK = 2,
+ OPAL_RESET_PHB_ERROR = 3,
+ OPAL_RESET_PCI_HOT = 4,
+ OPAL_RESET_PCI_FUNDAMENTAL = 5,
+ OPAL_RESET_PCI_IODA_TABLE = 6
+};
+
+enum OpalPciReinitScope {
+ /*
+ * Note: we chose values that do not overlap
+ * OpalPciResetScope as OPAL v2 used the same
+ * enum for both
+ */
+ OPAL_REINIT_PCI_DEV = 1000
+};
+
+enum OpalPciResetState {
+ OPAL_DEASSERT_RESET = 0,
+ OPAL_ASSERT_RESET = 1
+};
+
+enum OpalPciSlotPresence {
+ OPAL_PCI_SLOT_EMPTY = 0,
+ OPAL_PCI_SLOT_PRESENT = 1
+};
+
+enum OpalPciSlotPower {
+ OPAL_PCI_SLOT_POWER_OFF = 0,
+ OPAL_PCI_SLOT_POWER_ON = 1,
+ OPAL_PCI_SLOT_OFFLINE = 2,
+ OPAL_PCI_SLOT_ONLINE = 3
+};
+
+enum OpalSlotLedType {
+ OPAL_SLOT_LED_TYPE_ID = 0, /* IDENTIFY LED */
+ OPAL_SLOT_LED_TYPE_FAULT = 1, /* FAULT LED */
+ OPAL_SLOT_LED_TYPE_ATTN = 2, /* System Attention LED */
+ OPAL_SLOT_LED_TYPE_MAX = 3
+};
+
+enum OpalSlotLedState {
+ OPAL_SLOT_LED_STATE_OFF = 0, /* LED is OFF */
+ OPAL_SLOT_LED_STATE_ON = 1 /* LED is ON */
+};
+
+/*
+ * Address cycle types for LPC accesses. These also correspond
+ * to the content of the first cell of the "reg" property for
+ * device nodes on the LPC bus
+ */
+enum OpalLPCAddressType {
+ OPAL_LPC_MEM = 0,
+ OPAL_LPC_IO = 1,
+ OPAL_LPC_FW = 2,
+};
+
+enum opal_msg_type {
+ OPAL_MSG_ASYNC_COMP = 0, /* params[0] = token, params[1] = rc,
+ * additional params function-specific
+ */
+ OPAL_MSG_MEM_ERR = 1,
+ OPAL_MSG_EPOW = 2,
+ OPAL_MSG_SHUTDOWN = 3, /* params[0] = 1 reboot, 0 shutdown */
+ OPAL_MSG_HMI_EVT = 4,
+ OPAL_MSG_DPO = 5,
+ OPAL_MSG_PRD = 6,
+ OPAL_MSG_OCC = 7,
+ OPAL_MSG_PRD2 = 8,
+ OPAL_MSG_TYPE_MAX,
+};
+
+struct opal_msg {
+ __be32 msg_type;
+ __be32 reserved;
+ __be64 params[8];
+};
+
+/* System parameter permission */
+enum OpalSysparamPerm {
+ OPAL_SYSPARAM_READ = 0x1,
+ OPAL_SYSPARAM_WRITE = 0x2,
+ OPAL_SYSPARAM_RW = (OPAL_SYSPARAM_READ | OPAL_SYSPARAM_WRITE),
+};
+
+enum {
+ OPAL_IPMI_MSG_FORMAT_VERSION_1 = 1,
+};
+
+struct opal_ipmi_msg {
+ uint8_t version;
+ uint8_t netfn;
+ uint8_t cmd;
+ uint8_t data[];
+};
+
+/* FSP memory errors handling */
+enum OpalMemErr_Version {
+ OpalMemErr_V1 = 1,
+};
+
+enum OpalMemErrType {
+ OPAL_MEM_ERR_TYPE_RESILIENCE = 0,
+ OPAL_MEM_ERR_TYPE_DYN_DALLOC,
+};
+
+/* Memory Reilience error type */
+enum OpalMemErr_ResilErrType {
+ OPAL_MEM_RESILIENCE_CE = 0,
+ OPAL_MEM_RESILIENCE_UE,
+ OPAL_MEM_RESILIENCE_UE_SCRUB,
+};
+
+/* Dynamic Memory Deallocation type */
+enum OpalMemErr_DynErrType {
+ OPAL_MEM_DYNAMIC_DEALLOC = 0,
+};
+
+struct OpalMemoryErrorData {
+ enum OpalMemErr_Version version:8; /* 0x00 */
+ enum OpalMemErrType type:8; /* 0x01 */
+ __be16 flags; /* 0x02 */
+ uint8_t reserved_1[4]; /* 0x04 */
+
+ union {
+ /* Memory Resilience corrected/uncorrected error info */
+ struct {
+ enum OpalMemErr_ResilErrType resil_err_type:8;
+ uint8_t reserved_1[7];
+ __be64 physical_address_start;
+ __be64 physical_address_end;
+ } resilience;
+ /* Dynamic memory deallocation error info */
+ struct {
+ enum OpalMemErr_DynErrType dyn_err_type:8;
+ uint8_t reserved_1[7];
+ __be64 physical_address_start;
+ __be64 physical_address_end;
+ } dyn_dealloc;
+ } u;
+};
+
+/* HMI interrupt event */
+enum OpalHMI_Version {
+ OpalHMIEvt_V1 = 1,
+ OpalHMIEvt_V2 = 2,
+};
+
+enum OpalHMI_Severity {
+ OpalHMI_SEV_NO_ERROR = 0,
+ OpalHMI_SEV_WARNING = 1,
+ OpalHMI_SEV_ERROR_SYNC = 2,
+ OpalHMI_SEV_FATAL = 3,
+};
+
+enum OpalHMI_Disposition {
+ OpalHMI_DISPOSITION_RECOVERED = 0,
+ OpalHMI_DISPOSITION_NOT_RECOVERED = 1,
+};
+
+enum OpalHMI_ErrType {
+ OpalHMI_ERROR_MALFUNC_ALERT = 0,
+ OpalHMI_ERROR_PROC_RECOV_DONE,
+ OpalHMI_ERROR_PROC_RECOV_DONE_AGAIN,
+ OpalHMI_ERROR_PROC_RECOV_MASKED,
+ OpalHMI_ERROR_TFAC,
+ OpalHMI_ERROR_TFMR_PARITY,
+ OpalHMI_ERROR_HA_OVERFLOW_WARN,
+ OpalHMI_ERROR_XSCOM_FAIL,
+ OpalHMI_ERROR_XSCOM_DONE,
+ OpalHMI_ERROR_SCOM_FIR,
+ OpalHMI_ERROR_DEBUG_TRIG_FIR,
+ OpalHMI_ERROR_HYP_RESOURCE,
+ OpalHMI_ERROR_CAPP_RECOVERY,
+};
+
+enum OpalHMI_XstopType {
+ CHECKSTOP_TYPE_UNKNOWN = 0,
+ CHECKSTOP_TYPE_CORE = 1,
+ CHECKSTOP_TYPE_NX = 2,
+ CHECKSTOP_TYPE_NPU = 3
+};
+
+enum OpalHMI_CoreXstopReason {
+ CORE_CHECKSTOP_IFU_REGFILE = 0x00000001,
+ CORE_CHECKSTOP_IFU_LOGIC = 0x00000002,
+ CORE_CHECKSTOP_PC_DURING_RECOV = 0x00000004,
+ CORE_CHECKSTOP_ISU_REGFILE = 0x00000008,
+ CORE_CHECKSTOP_ISU_LOGIC = 0x00000010,
+ CORE_CHECKSTOP_FXU_LOGIC = 0x00000020,
+ CORE_CHECKSTOP_VSU_LOGIC = 0x00000040,
+ CORE_CHECKSTOP_PC_RECOV_IN_MAINT_MODE = 0x00000080,
+ CORE_CHECKSTOP_LSU_REGFILE = 0x00000100,
+ CORE_CHECKSTOP_PC_FWD_PROGRESS = 0x00000200,
+ CORE_CHECKSTOP_LSU_LOGIC = 0x00000400,
+ CORE_CHECKSTOP_PC_LOGIC = 0x00000800,
+ CORE_CHECKSTOP_PC_HYP_RESOURCE = 0x00001000,
+ CORE_CHECKSTOP_PC_HANG_RECOV_FAILED = 0x00002000,
+ CORE_CHECKSTOP_PC_AMBI_HANG_DETECTED = 0x00004000,
+ CORE_CHECKSTOP_PC_DEBUG_TRIG_ERR_INJ = 0x00008000,
+ CORE_CHECKSTOP_PC_SPRD_HYP_ERR_INJ = 0x00010000,
+};
+
+enum OpalHMI_NestAccelXstopReason {
+ NX_CHECKSTOP_SHM_INVAL_STATE_ERR = 0x00000001,
+ NX_CHECKSTOP_DMA_INVAL_STATE_ERR_1 = 0x00000002,
+ NX_CHECKSTOP_DMA_INVAL_STATE_ERR_2 = 0x00000004,
+ NX_CHECKSTOP_DMA_CH0_INVAL_STATE_ERR = 0x00000008,
+ NX_CHECKSTOP_DMA_CH1_INVAL_STATE_ERR = 0x00000010,
+ NX_CHECKSTOP_DMA_CH2_INVAL_STATE_ERR = 0x00000020,
+ NX_CHECKSTOP_DMA_CH3_INVAL_STATE_ERR = 0x00000040,
+ NX_CHECKSTOP_DMA_CH4_INVAL_STATE_ERR = 0x00000080,
+ NX_CHECKSTOP_DMA_CH5_INVAL_STATE_ERR = 0x00000100,
+ NX_CHECKSTOP_DMA_CH6_INVAL_STATE_ERR = 0x00000200,
+ NX_CHECKSTOP_DMA_CH7_INVAL_STATE_ERR = 0x00000400,
+ NX_CHECKSTOP_DMA_CRB_UE = 0x00000800,
+ NX_CHECKSTOP_DMA_CRB_SUE = 0x00001000,
+ NX_CHECKSTOP_PBI_ISN_UE = 0x00002000,
+};
+
+struct OpalHMIEvent {
+ uint8_t version; /* 0x00 */
+ uint8_t severity; /* 0x01 */
+ uint8_t type; /* 0x02 */
+ uint8_t disposition; /* 0x03 */
+ uint8_t reserved_1[4]; /* 0x04 */
+
+ __be64 hmer;
+ /* TFMR register. Valid only for TFAC and TFMR_PARITY error type. */
+ __be64 tfmr;
+
+ /* version 2 and later */
+ union {
+ /*
+ * checkstop info (Core/NX).
+ * Valid for OpalHMI_ERROR_MALFUNC_ALERT.
+ */
+ struct {
+ uint8_t xstop_type; /* enum OpalHMI_XstopType */
+ uint8_t reserved_1[3];
+ __be32 xstop_reason;
+ union {
+ __be32 pir; /* for CHECKSTOP_TYPE_CORE */
+ __be32 chip_id; /* for CHECKSTOP_TYPE_NX */
+ } u;
+ } xstop_error;
+ } u;
+};
+
+/* OPAL_HANDLE_HMI2 out_flags */
+enum {
+ OPAL_HMI_FLAGS_TB_RESYNC = (1ull << 0), /* Timebase has been resynced */
+ OPAL_HMI_FLAGS_DEC_LOST = (1ull << 1), /* DEC lost, needs to be reprogrammed */
+ OPAL_HMI_FLAGS_HDEC_LOST = (1ull << 2), /* HDEC lost, needs to be reprogrammed */
+ OPAL_HMI_FLAGS_TOD_TB_FAIL = (1ull << 3), /* TOD/TB recovery failed. */
+ OPAL_HMI_FLAGS_NEW_EVENT = (1ull << 63), /* An event has been created */
+};
+
+enum {
+ OPAL_P7IOC_DIAG_TYPE_NONE = 0,
+ OPAL_P7IOC_DIAG_TYPE_RGC = 1,
+ OPAL_P7IOC_DIAG_TYPE_BI = 2,
+ OPAL_P7IOC_DIAG_TYPE_CI = 3,
+ OPAL_P7IOC_DIAG_TYPE_MISC = 4,
+ OPAL_P7IOC_DIAG_TYPE_I2C = 5,
+ OPAL_P7IOC_DIAG_TYPE_LAST = 6
+};
+
+struct OpalIoP7IOCErrorData {
+ __be16 type;
+
+ /* GEM */
+ __be64 gemXfir;
+ __be64 gemRfir;
+ __be64 gemRirqfir;
+ __be64 gemMask;
+ __be64 gemRwof;
+
+ /* LEM */
+ __be64 lemFir;
+ __be64 lemErrMask;
+ __be64 lemAction0;
+ __be64 lemAction1;
+ __be64 lemWof;
+
+ union {
+ struct OpalIoP7IOCRgcErrorData {
+ __be64 rgcStatus; /* 3E1C10 */
+ __be64 rgcLdcp; /* 3E1C18 */
+ }rgc;
+ struct OpalIoP7IOCBiErrorData {
+ __be64 biLdcp0; /* 3C0100, 3C0118 */
+ __be64 biLdcp1; /* 3C0108, 3C0120 */
+ __be64 biLdcp2; /* 3C0110, 3C0128 */
+ __be64 biFenceStatus; /* 3C0130, 3C0130 */
+
+ uint8_t biDownbound; /* BI Downbound or Upbound */
+ }bi;
+ struct OpalIoP7IOCCiErrorData {
+ __be64 ciPortStatus; /* 3Dn008 */
+ __be64 ciPortLdcp; /* 3Dn010 */
+
+ uint8_t ciPort; /* Index of CI port: 0/1 */
+ }ci;
+ };
+};
+
+/**
+ * This structure defines the overlay which will be used to store PHB error
+ * data upon request.
+ */
+enum {
+ OPAL_PHB_ERROR_DATA_VERSION_1 = 1,
+};
+
+enum {
+ OPAL_PHB_ERROR_DATA_TYPE_P7IOC = 1,
+ OPAL_PHB_ERROR_DATA_TYPE_PHB3 = 2,
+ OPAL_PHB_ERROR_DATA_TYPE_PHB4 = 3
+};
+
+enum {
+ OPAL_P7IOC_NUM_PEST_REGS = 128,
+ OPAL_PHB3_NUM_PEST_REGS = 256,
+ OPAL_PHB4_NUM_PEST_REGS = 512
+};
+
+struct OpalIoPhbErrorCommon {
+ __be32 version;
+ __be32 ioType;
+ __be32 len;
+};
+
+struct OpalIoP7IOCPhbErrorData {
+ struct OpalIoPhbErrorCommon common;
+
+ __be32 brdgCtl;
+
+ // P7IOC utl regs
+ __be32 portStatusReg;
+ __be32 rootCmplxStatus;
+ __be32 busAgentStatus;
+
+ // P7IOC cfg regs
+ __be32 deviceStatus;
+ __be32 slotStatus;
+ __be32 linkStatus;
+ __be32 devCmdStatus;
+ __be32 devSecStatus;
+
+ // cfg AER regs
+ __be32 rootErrorStatus;
+ __be32 uncorrErrorStatus;
+ __be32 corrErrorStatus;
+ __be32 tlpHdr1;
+ __be32 tlpHdr2;
+ __be32 tlpHdr3;
+ __be32 tlpHdr4;
+ __be32 sourceId;
+
+ __be32 rsv3;
+
+ // Record data about the call to allocate a buffer.
+ __be64 errorClass;
+ __be64 correlator;
+
+ //P7IOC MMIO Error Regs
+ __be64 p7iocPlssr; // n120
+ __be64 p7iocCsr; // n110
+ __be64 lemFir; // nC00
+ __be64 lemErrorMask; // nC18
+ __be64 lemWOF; // nC40
+ __be64 phbErrorStatus; // nC80
+ __be64 phbFirstErrorStatus; // nC88
+ __be64 phbErrorLog0; // nCC0
+ __be64 phbErrorLog1; // nCC8
+ __be64 mmioErrorStatus; // nD00
+ __be64 mmioFirstErrorStatus; // nD08
+ __be64 mmioErrorLog0; // nD40
+ __be64 mmioErrorLog1; // nD48
+ __be64 dma0ErrorStatus; // nD80
+ __be64 dma0FirstErrorStatus; // nD88
+ __be64 dma0ErrorLog0; // nDC0
+ __be64 dma0ErrorLog1; // nDC8
+ __be64 dma1ErrorStatus; // nE00
+ __be64 dma1FirstErrorStatus; // nE08
+ __be64 dma1ErrorLog0; // nE40
+ __be64 dma1ErrorLog1; // nE48
+ __be64 pestA[OPAL_P7IOC_NUM_PEST_REGS];
+ __be64 pestB[OPAL_P7IOC_NUM_PEST_REGS];
+};
+
+struct OpalIoPhb3ErrorData {
+ struct OpalIoPhbErrorCommon common;
+
+ __be32 brdgCtl;
+
+ /* PHB3 UTL regs */
+ __be32 portStatusReg;
+ __be32 rootCmplxStatus;
+ __be32 busAgentStatus;
+
+ /* PHB3 cfg regs */
+ __be32 deviceStatus;
+ __be32 slotStatus;
+ __be32 linkStatus;
+ __be32 devCmdStatus;
+ __be32 devSecStatus;
+
+ /* cfg AER regs */
+ __be32 rootErrorStatus;
+ __be32 uncorrErrorStatus;
+ __be32 corrErrorStatus;
+ __be32 tlpHdr1;
+ __be32 tlpHdr2;
+ __be32 tlpHdr3;
+ __be32 tlpHdr4;
+ __be32 sourceId;
+
+ __be32 rsv3;
+
+ /* Record data about the call to allocate a buffer */
+ __be64 errorClass;
+ __be64 correlator;
+
+ /* PHB3 MMIO Error Regs */
+ __be64 nFir; /* 000 */
+ __be64 nFirMask; /* 003 */
+ __be64 nFirWOF; /* 008 */
+ __be64 phbPlssr; /* 120 */
+ __be64 phbCsr; /* 110 */
+ __be64 lemFir; /* C00 */
+ __be64 lemErrorMask; /* C18 */
+ __be64 lemWOF; /* C40 */
+ __be64 phbErrorStatus; /* C80 */
+ __be64 phbFirstErrorStatus; /* C88 */
+ __be64 phbErrorLog0; /* CC0 */
+ __be64 phbErrorLog1; /* CC8 */
+ __be64 mmioErrorStatus; /* D00 */
+ __be64 mmioFirstErrorStatus; /* D08 */
+ __be64 mmioErrorLog0; /* D40 */
+ __be64 mmioErrorLog1; /* D48 */
+ __be64 dma0ErrorStatus; /* D80 */
+ __be64 dma0FirstErrorStatus; /* D88 */
+ __be64 dma0ErrorLog0; /* DC0 */
+ __be64 dma0ErrorLog1; /* DC8 */
+ __be64 dma1ErrorStatus; /* E00 */
+ __be64 dma1FirstErrorStatus; /* E08 */
+ __be64 dma1ErrorLog0; /* E40 */
+ __be64 dma1ErrorLog1; /* E48 */
+ __be64 pestA[OPAL_PHB3_NUM_PEST_REGS];
+ __be64 pestB[OPAL_PHB3_NUM_PEST_REGS];
+};
+
+struct OpalIoPhb4ErrorData {
+ struct OpalIoPhbErrorCommon common;
+
+ __be32 brdgCtl;
+
+ /* PHB4 cfg regs */
+ __be32 deviceStatus;
+ __be32 slotStatus;
+ __be32 linkStatus;
+ __be32 devCmdStatus;
+ __be32 devSecStatus;
+
+ /* cfg AER regs */
+ __be32 rootErrorStatus;
+ __be32 uncorrErrorStatus;
+ __be32 corrErrorStatus;
+ __be32 tlpHdr1;
+ __be32 tlpHdr2;
+ __be32 tlpHdr3;
+ __be32 tlpHdr4;
+ __be32 sourceId;
+
+ /* PHB4 ETU Error Regs */
+ __be64 nFir; /* 000 */
+ __be64 nFirMask; /* 003 */
+ __be64 nFirWOF; /* 008 */
+ __be64 phbPlssr; /* 120 */
+ __be64 phbCsr; /* 110 */
+ __be64 lemFir; /* C00 */
+ __be64 lemErrorMask; /* C18 */
+ __be64 lemWOF; /* C40 */
+ __be64 phbErrorStatus; /* C80 */
+ __be64 phbFirstErrorStatus; /* C88 */
+ __be64 phbErrorLog0; /* CC0 */
+ __be64 phbErrorLog1; /* CC8 */
+ __be64 phbTxeErrorStatus; /* D00 */
+ __be64 phbTxeFirstErrorStatus; /* D08 */
+ __be64 phbTxeErrorLog0; /* D40 */
+ __be64 phbTxeErrorLog1; /* D48 */
+ __be64 phbRxeArbErrorStatus; /* D80 */
+ __be64 phbRxeArbFirstErrorStatus; /* D88 */
+ __be64 phbRxeArbErrorLog0; /* DC0 */
+ __be64 phbRxeArbErrorLog1; /* DC8 */
+ __be64 phbRxeMrgErrorStatus; /* E00 */
+ __be64 phbRxeMrgFirstErrorStatus; /* E08 */
+ __be64 phbRxeMrgErrorLog0; /* E40 */
+ __be64 phbRxeMrgErrorLog1; /* E48 */
+ __be64 phbRxeTceErrorStatus; /* E80 */
+ __be64 phbRxeTceFirstErrorStatus; /* E88 */
+ __be64 phbRxeTceErrorLog0; /* EC0 */
+ __be64 phbRxeTceErrorLog1; /* EC8 */
+
+ /* PHB4 REGB Error Regs */
+ __be64 phbPblErrorStatus; /* 1900 */
+ __be64 phbPblFirstErrorStatus; /* 1908 */
+ __be64 phbPblErrorLog0; /* 1940 */
+ __be64 phbPblErrorLog1; /* 1948 */
+ __be64 phbPcieDlpErrorLog1; /* 1AA0 */
+ __be64 phbPcieDlpErrorLog2; /* 1AA8 */
+ __be64 phbPcieDlpErrorStatus; /* 1AB0 */
+ __be64 phbRegbErrorStatus; /* 1C00 */
+ __be64 phbRegbFirstErrorStatus; /* 1C08 */
+ __be64 phbRegbErrorLog0; /* 1C40 */
+ __be64 phbRegbErrorLog1; /* 1C48 */
+
+ __be64 pestA[OPAL_PHB4_NUM_PEST_REGS];
+ __be64 pestB[OPAL_PHB4_NUM_PEST_REGS];
+};
+
+enum {
+ OPAL_REINIT_CPUS_HILE_BE = (1 << 0),
+ OPAL_REINIT_CPUS_HILE_LE = (1 << 1),
+
+ /* These two define the base MMU mode of the host on P9
+ *
+ * On P9 Nimbus DD2.0 and Cumlus (and later), KVM can still
+ * create hash guests in "radix" mode with care (full core
+ * switch only).
+ */
+ OPAL_REINIT_CPUS_MMU_HASH = (1 << 2),
+ OPAL_REINIT_CPUS_MMU_RADIX = (1 << 3),
+
+ OPAL_REINIT_CPUS_TM_SUSPEND_DISABLED = (1 << 4),
+};
+
+typedef struct oppanel_line {
+ __be64 line;
+ __be64 line_len;
+} oppanel_line_t;
+
+enum opal_prd_msg_type {
+ OPAL_PRD_MSG_TYPE_INIT = 0, /* HBRT --> OPAL */
+ OPAL_PRD_MSG_TYPE_FINI, /* HBRT/kernel --> OPAL */
+ OPAL_PRD_MSG_TYPE_ATTN, /* HBRT <-- OPAL */
+ OPAL_PRD_MSG_TYPE_ATTN_ACK, /* HBRT --> OPAL */
+ OPAL_PRD_MSG_TYPE_OCC_ERROR, /* HBRT <-- OPAL */
+ OPAL_PRD_MSG_TYPE_OCC_RESET, /* HBRT <-- OPAL */
+};
+
+struct opal_prd_msg_header {
+ uint8_t type;
+ uint8_t pad[1];
+ __be16 size;
+};
+
+struct opal_prd_msg;
+
+#define OCC_RESET 0
+#define OCC_LOAD 1
+#define OCC_THROTTLE 2
+#define OCC_MAX_THROTTLE_STATUS 5
+
+struct opal_occ_msg {
+ __be64 type;
+ __be64 chip;
+ __be64 throttle_status;
+};
+
+/*
+ * SG entries
+ *
+ * WARNING: The current implementation requires each entry
+ * to represent a block that is 4k aligned *and* each block
+ * size except the last one in the list to be as well.
+ */
+struct opal_sg_entry {
+ __be64 data;
+ __be64 length;
+};
+
+/*
+ * Candidate image SG list.
+ *
+ * length = VER | length
+ */
+struct opal_sg_list {
+ __be64 length;
+ __be64 next;
+ struct opal_sg_entry entry[];
+};
+
+/*
+ * Dump region ID range usable by the OS
+ */
+#define OPAL_DUMP_REGION_HOST_START 0x80
+#define OPAL_DUMP_REGION_LOG_BUF 0x80
+#define OPAL_DUMP_REGION_HOST_END 0xFF
+
+/* CAPI modes for PHB */
+enum {
+ OPAL_PHB_CAPI_MODE_PCIE = 0,
+ OPAL_PHB_CAPI_MODE_CAPI = 1,
+ OPAL_PHB_CAPI_MODE_SNOOP_OFF = 2,
+ OPAL_PHB_CAPI_MODE_SNOOP_ON = 3,
+ OPAL_PHB_CAPI_MODE_DMA = 4,
+ OPAL_PHB_CAPI_MODE_DMA_TVT1 = 5,
+};
+
+/* OPAL I2C request */
+struct opal_i2c_request {
+ uint8_t type;
+#define OPAL_I2C_RAW_READ 0
+#define OPAL_I2C_RAW_WRITE 1
+#define OPAL_I2C_SM_READ 2
+#define OPAL_I2C_SM_WRITE 3
+ uint8_t flags;
+#define OPAL_I2C_ADDR_10 0x01 /* Not supported yet */
+ uint8_t subaddr_sz; /* Max 4 */
+ uint8_t reserved;
+ __be16 addr; /* 7 or 10 bit address */
+ __be16 reserved2;
+ __be32 subaddr; /* Sub-address if any */
+ __be32 size; /* Data size */
+ __be64 buffer_ra; /* Buffer real address */
+};
+
+/*
+ * EPOW status sharing (OPAL and the host)
+ *
+ * The host will pass on OPAL, a buffer of length OPAL_SYSEPOW_MAX
+ * with individual elements being 16 bits wide to fetch the system
+ * wide EPOW status. Each element in the buffer will contain the
+ * EPOW status in its bit representation for a particular EPOW sub
+ * class as defined here. So multiple detailed EPOW status bits
+ * specific for any sub class can be represented in a single buffer
+ * element as its bit representation.
+ */
+
+/* System EPOW type */
+enum OpalSysEpow {
+ OPAL_SYSEPOW_POWER = 0, /* Power EPOW */
+ OPAL_SYSEPOW_TEMP = 1, /* Temperature EPOW */
+ OPAL_SYSEPOW_COOLING = 2, /* Cooling EPOW */
+ OPAL_SYSEPOW_MAX = 3, /* Max EPOW categories */
+};
+
+/* Power EPOW */
+enum OpalSysPower {
+ OPAL_SYSPOWER_UPS = 0x0001, /* System on UPS power */
+ OPAL_SYSPOWER_CHNG = 0x0002, /* System power config change */
+ OPAL_SYSPOWER_FAIL = 0x0004, /* System impending power failure */
+ OPAL_SYSPOWER_INCL = 0x0008, /* System incomplete power */
+};
+
+/* Temperature EPOW */
+enum OpalSysTemp {
+ OPAL_SYSTEMP_AMB = 0x0001, /* System over ambient temperature */
+ OPAL_SYSTEMP_INT = 0x0002, /* System over internal temperature */
+ OPAL_SYSTEMP_HMD = 0x0004, /* System over ambient humidity */
+};
+
+/* Cooling EPOW */
+enum OpalSysCooling {
+ OPAL_SYSCOOL_INSF = 0x0001, /* System insufficient cooling */
+};
+
+/* Argument to OPAL_CEC_REBOOT2() */
+enum {
+ OPAL_REBOOT_NORMAL = 0,
+ OPAL_REBOOT_PLATFORM_ERROR = 1,
+ OPAL_REBOOT_FULL_IPL = 2,
+ OPAL_REBOOT_MPIPL = 3,
+ OPAL_REBOOT_FAST = 4,
+};
+
+/* Argument to OPAL_PCI_TCE_KILL */
+enum {
+ OPAL_PCI_TCE_KILL_PAGES,
+ OPAL_PCI_TCE_KILL_PE,
+ OPAL_PCI_TCE_KILL_ALL,
+};
+
+/* The xive operation mode indicates the active "API" and
+ * corresponds to the "mode" parameter of the opal_xive_reset()
+ * call
+ */
+enum {
+ OPAL_XIVE_MODE_EMU = 0,
+ OPAL_XIVE_MODE_EXPL = 1,
+};
+
+/* Flags for OPAL_XIVE_GET_IRQ_INFO */
+enum {
+ OPAL_XIVE_IRQ_TRIGGER_PAGE = 0x00000001,
+ OPAL_XIVE_IRQ_STORE_EOI = 0x00000002,
+ OPAL_XIVE_IRQ_LSI = 0x00000004,
+ OPAL_XIVE_IRQ_SHIFT_BUG = 0x00000008, /* P9 DD1.0 workaround */
+ OPAL_XIVE_IRQ_MASK_VIA_FW = 0x00000010, /* P9 DD1.0 workaround */
+ OPAL_XIVE_IRQ_EOI_VIA_FW = 0x00000020, /* P9 DD1.0 workaround */
+ OPAL_XIVE_IRQ_STORE_EOI2 = 0x00000040,
+};
+
+/* Flags for OPAL_XIVE_GET/SET_QUEUE_INFO */
+enum {
+ OPAL_XIVE_EQ_ENABLED = 0x00000001,
+ OPAL_XIVE_EQ_ALWAYS_NOTIFY = 0x00000002,
+ OPAL_XIVE_EQ_ESCALATE = 0x00000004,
+};
+
+/* Flags for OPAL_XIVE_GET/SET_VP_INFO */
+enum {
+ OPAL_XIVE_VP_ENABLED = 0x00000001,
+ OPAL_XIVE_VP_SINGLE_ESCALATION = 0x00000002,
+};
+
+/* "Any chip" replacement for chip ID for allocation functions */
+enum {
+ OPAL_XIVE_ANY_CHIP = 0xffffffff,
+};
+
+/* Xive sync options */
+enum {
+ /* This bits are cumulative, arg is a girq */
+ XIVE_SYNC_EAS = 0x00000001, /* Sync irq source */
+ XIVE_SYNC_QUEUE = 0x00000002, /* Sync irq target */
+};
+
+/* Dump options */
+enum {
+ XIVE_DUMP_TM_HYP = 0,
+ XIVE_DUMP_TM_POOL = 1,
+ XIVE_DUMP_TM_OS = 2,
+ XIVE_DUMP_TM_USER = 3,
+ XIVE_DUMP_VP = 4,
+ XIVE_DUMP_EMU_STATE = 5,
+};
+
+/* "type" argument options for OPAL_IMC_COUNTERS_* calls */
+enum {
+ OPAL_IMC_COUNTERS_NEST = 1,
+ OPAL_IMC_COUNTERS_CORE = 2,
+ OPAL_IMC_COUNTERS_TRACE = 3,
+};
+
+
+/* PCI p2p descriptor */
+#define OPAL_PCI_P2P_ENABLE 0x1
+#define OPAL_PCI_P2P_LOAD 0x2
+#define OPAL_PCI_P2P_STORE 0x4
+
+/* MPIPL update operations */
+enum opal_mpipl_ops {
+ OPAL_MPIPL_ADD_RANGE = 0,
+ OPAL_MPIPL_REMOVE_RANGE = 1,
+ OPAL_MPIPL_REMOVE_ALL = 2,
+ OPAL_MPIPL_FREE_PRESERVED_MEMORY = 3,
+};
+
+/* Tag will point to various metadata area. Kernel will
+ * use tag to get metadata value.
+ */
+enum opal_mpipl_tags {
+ OPAL_MPIPL_TAG_CPU = 0,
+ OPAL_MPIPL_TAG_OPAL = 1,
+ OPAL_MPIPL_TAG_KERNEL = 2,
+ OPAL_MPIPL_TAG_BOOT_MEM = 3,
+};
+
+/* Preserved memory details */
+struct opal_mpipl_region {
+ __be64 src;
+ __be64 dest;
+ __be64 size;
+};
+
+/* Structure version */
+#define OPAL_MPIPL_VERSION 0x01
+
+struct opal_mpipl_fadump {
+ u8 version;
+ u8 reserved[7];
+ __be32 crashing_pir; /* OPAL crashing CPU PIR */
+ __be32 cpu_data_version;
+ __be32 cpu_data_size;
+ __be32 region_cnt;
+ struct opal_mpipl_region region[];
+} __packed;
+
+#endif /* __ASSEMBLER__ */
+
+#endif /* __OPAL_API_H */
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 86055e598269..0a398265ba04 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -1,770 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* PowerNV OPAL definitions.
*
* Copyright 2011 IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
-#ifndef __OPAL_H
-#define __OPAL_H
+#ifndef _ASM_POWERPC_OPAL_H
+#define _ASM_POWERPC_OPAL_H
-#ifndef __ASSEMBLY__
-/*
- * SG entry
- *
- * WARNING: The current implementation requires each entry
- * to represent a block that is 4k aligned *and* each block
- * size except the last one in the list to be as well.
- */
-struct opal_sg_entry {
- __be64 data;
- __be64 length;
-};
-
-/* SG list */
-struct opal_sg_list {
- __be64 length;
- __be64 next;
- struct opal_sg_entry entry[];
-};
+#include <asm/opal-api.h>
-/* We calculate number of sg entries based on PAGE_SIZE */
-#define SG_ENTRIES_PER_NODE ((PAGE_SIZE - 16) / sizeof(struct opal_sg_entry))
-
-#endif /* __ASSEMBLY__ */
-
-/****** OPAL APIs ******/
-
-/* Return codes */
-#define OPAL_SUCCESS 0
-#define OPAL_PARAMETER -1
-#define OPAL_BUSY -2
-#define OPAL_PARTIAL -3
-#define OPAL_CONSTRAINED -4
-#define OPAL_CLOSED -5
-#define OPAL_HARDWARE -6
-#define OPAL_UNSUPPORTED -7
-#define OPAL_PERMISSION -8
-#define OPAL_NO_MEM -9
-#define OPAL_RESOURCE -10
-#define OPAL_INTERNAL_ERROR -11
-#define OPAL_BUSY_EVENT -12
-#define OPAL_HARDWARE_FROZEN -13
-#define OPAL_WRONG_STATE -14
-#define OPAL_ASYNC_COMPLETION -15
-
-/* API Tokens (in r0) */
-#define OPAL_INVALID_CALL -1
-#define OPAL_CONSOLE_WRITE 1
-#define OPAL_CONSOLE_READ 2
-#define OPAL_RTC_READ 3
-#define OPAL_RTC_WRITE 4
-#define OPAL_CEC_POWER_DOWN 5
-#define OPAL_CEC_REBOOT 6
-#define OPAL_READ_NVRAM 7
-#define OPAL_WRITE_NVRAM 8
-#define OPAL_HANDLE_INTERRUPT 9
-#define OPAL_POLL_EVENTS 10
-#define OPAL_PCI_SET_HUB_TCE_MEMORY 11
-#define OPAL_PCI_SET_PHB_TCE_MEMORY 12
-#define OPAL_PCI_CONFIG_READ_BYTE 13
-#define OPAL_PCI_CONFIG_READ_HALF_WORD 14
-#define OPAL_PCI_CONFIG_READ_WORD 15
-#define OPAL_PCI_CONFIG_WRITE_BYTE 16
-#define OPAL_PCI_CONFIG_WRITE_HALF_WORD 17
-#define OPAL_PCI_CONFIG_WRITE_WORD 18
-#define OPAL_SET_XIVE 19
-#define OPAL_GET_XIVE 20
-#define OPAL_GET_COMPLETION_TOKEN_STATUS 21 /* obsolete */
-#define OPAL_REGISTER_OPAL_EXCEPTION_HANDLER 22
-#define OPAL_PCI_EEH_FREEZE_STATUS 23
-#define OPAL_PCI_SHPC 24
-#define OPAL_CONSOLE_WRITE_BUFFER_SPACE 25
-#define OPAL_PCI_EEH_FREEZE_CLEAR 26
-#define OPAL_PCI_PHB_MMIO_ENABLE 27
-#define OPAL_PCI_SET_PHB_MEM_WINDOW 28
-#define OPAL_PCI_MAP_PE_MMIO_WINDOW 29
-#define OPAL_PCI_SET_PHB_TABLE_MEMORY 30
-#define OPAL_PCI_SET_PE 31
-#define OPAL_PCI_SET_PELTV 32
-#define OPAL_PCI_SET_MVE 33
-#define OPAL_PCI_SET_MVE_ENABLE 34
-#define OPAL_PCI_GET_XIVE_REISSUE 35
-#define OPAL_PCI_SET_XIVE_REISSUE 36
-#define OPAL_PCI_SET_XIVE_PE 37
-#define OPAL_GET_XIVE_SOURCE 38
-#define OPAL_GET_MSI_32 39
-#define OPAL_GET_MSI_64 40
-#define OPAL_START_CPU 41
-#define OPAL_QUERY_CPU_STATUS 42
-#define OPAL_WRITE_OPPANEL 43
-#define OPAL_PCI_MAP_PE_DMA_WINDOW 44
-#define OPAL_PCI_MAP_PE_DMA_WINDOW_REAL 45
-#define OPAL_PCI_RESET 49
-#define OPAL_PCI_GET_HUB_DIAG_DATA 50
-#define OPAL_PCI_GET_PHB_DIAG_DATA 51
-#define OPAL_PCI_FENCE_PHB 52
-#define OPAL_PCI_REINIT 53
-#define OPAL_PCI_MASK_PE_ERROR 54
-#define OPAL_SET_SLOT_LED_STATUS 55
-#define OPAL_GET_EPOW_STATUS 56
-#define OPAL_SET_SYSTEM_ATTENTION_LED 57
-#define OPAL_RESERVED1 58
-#define OPAL_RESERVED2 59
-#define OPAL_PCI_NEXT_ERROR 60
-#define OPAL_PCI_EEH_FREEZE_STATUS2 61
-#define OPAL_PCI_POLL 62
-#define OPAL_PCI_MSI_EOI 63
-#define OPAL_PCI_GET_PHB_DIAG_DATA2 64
-#define OPAL_XSCOM_READ 65
-#define OPAL_XSCOM_WRITE 66
-#define OPAL_LPC_READ 67
-#define OPAL_LPC_WRITE 68
-#define OPAL_RETURN_CPU 69
-#define OPAL_REINIT_CPUS 70
-#define OPAL_ELOG_READ 71
-#define OPAL_ELOG_WRITE 72
-#define OPAL_ELOG_ACK 73
-#define OPAL_ELOG_RESEND 74
-#define OPAL_ELOG_SIZE 75
-#define OPAL_FLASH_VALIDATE 76
-#define OPAL_FLASH_MANAGE 77
-#define OPAL_FLASH_UPDATE 78
-#define OPAL_RESYNC_TIMEBASE 79
-#define OPAL_DUMP_INIT 81
-#define OPAL_DUMP_INFO 82
-#define OPAL_DUMP_READ 83
-#define OPAL_DUMP_ACK 84
-#define OPAL_GET_MSG 85
-#define OPAL_CHECK_ASYNC_COMPLETION 86
-#define OPAL_SYNC_HOST_REBOOT 87
-#define OPAL_SENSOR_READ 88
-#define OPAL_GET_PARAM 89
-#define OPAL_SET_PARAM 90
-#define OPAL_DUMP_RESEND 91
-#define OPAL_DUMP_INFO2 94
-#define OPAL_PCI_EEH_FREEZE_SET 97
-#define OPAL_HANDLE_HMI 98
-#define OPAL_REGISTER_DUMP_REGION 101
-#define OPAL_UNREGISTER_DUMP_REGION 102
-
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/notifier.h>
-/* Other enums */
-enum OpalVendorApiTokens {
- OPAL_START_VENDOR_API_RANGE = 1000, OPAL_END_VENDOR_API_RANGE = 1999
-};
-
-enum OpalFreezeState {
- OPAL_EEH_STOPPED_NOT_FROZEN = 0,
- OPAL_EEH_STOPPED_MMIO_FREEZE = 1,
- OPAL_EEH_STOPPED_DMA_FREEZE = 2,
- OPAL_EEH_STOPPED_MMIO_DMA_FREEZE = 3,
- OPAL_EEH_STOPPED_RESET = 4,
- OPAL_EEH_STOPPED_TEMP_UNAVAIL = 5,
- OPAL_EEH_STOPPED_PERM_UNAVAIL = 6
-};
-
-enum OpalEehFreezeActionToken {
- OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO = 1,
- OPAL_EEH_ACTION_CLEAR_FREEZE_DMA = 2,
- OPAL_EEH_ACTION_CLEAR_FREEZE_ALL = 3,
-
- OPAL_EEH_ACTION_SET_FREEZE_MMIO = 1,
- OPAL_EEH_ACTION_SET_FREEZE_DMA = 2,
- OPAL_EEH_ACTION_SET_FREEZE_ALL = 3
-};
-
-enum OpalPciStatusToken {
- OPAL_EEH_NO_ERROR = 0,
- OPAL_EEH_IOC_ERROR = 1,
- OPAL_EEH_PHB_ERROR = 2,
- OPAL_EEH_PE_ERROR = 3,
- OPAL_EEH_PE_MMIO_ERROR = 4,
- OPAL_EEH_PE_DMA_ERROR = 5
-};
-
-enum OpalPciErrorSeverity {
- OPAL_EEH_SEV_NO_ERROR = 0,
- OPAL_EEH_SEV_IOC_DEAD = 1,
- OPAL_EEH_SEV_PHB_DEAD = 2,
- OPAL_EEH_SEV_PHB_FENCED = 3,
- OPAL_EEH_SEV_PE_ER = 4,
- OPAL_EEH_SEV_INF = 5
-};
-
-enum OpalShpcAction {
- OPAL_SHPC_GET_LINK_STATE = 0,
- OPAL_SHPC_GET_SLOT_STATE = 1
-};
-
-enum OpalShpcLinkState {
- OPAL_SHPC_LINK_DOWN = 0,
- OPAL_SHPC_LINK_UP = 1
-};
-
-enum OpalMmioWindowType {
- OPAL_M32_WINDOW_TYPE = 1,
- OPAL_M64_WINDOW_TYPE = 2,
- OPAL_IO_WINDOW_TYPE = 3
-};
-
-enum OpalShpcSlotState {
- OPAL_SHPC_DEV_NOT_PRESENT = 0,
- OPAL_SHPC_DEV_PRESENT = 1
-};
-
-enum OpalExceptionHandler {
- OPAL_MACHINE_CHECK_HANDLER = 1,
- OPAL_HYPERVISOR_MAINTENANCE_HANDLER = 2,
- OPAL_SOFTPATCH_HANDLER = 3
-};
-
-enum OpalPendingState {
- OPAL_EVENT_OPAL_INTERNAL = 0x1,
- OPAL_EVENT_NVRAM = 0x2,
- OPAL_EVENT_RTC = 0x4,
- OPAL_EVENT_CONSOLE_OUTPUT = 0x8,
- OPAL_EVENT_CONSOLE_INPUT = 0x10,
- OPAL_EVENT_ERROR_LOG_AVAIL = 0x20,
- OPAL_EVENT_ERROR_LOG = 0x40,
- OPAL_EVENT_EPOW = 0x80,
- OPAL_EVENT_LED_STATUS = 0x100,
- OPAL_EVENT_PCI_ERROR = 0x200,
- OPAL_EVENT_DUMP_AVAIL = 0x400,
- OPAL_EVENT_MSG_PENDING = 0x800,
-};
-
-enum OpalMessageType {
- OPAL_MSG_ASYNC_COMP = 0, /* params[0] = token, params[1] = rc,
- * additional params function-specific
- */
- OPAL_MSG_MEM_ERR,
- OPAL_MSG_EPOW,
- OPAL_MSG_SHUTDOWN,
- OPAL_MSG_HMI_EVT,
- OPAL_MSG_TYPE_MAX,
-};
-
-/* Machine check related definitions */
-enum OpalMCE_Version {
- OpalMCE_V1 = 1,
-};
-
-enum OpalMCE_Severity {
- OpalMCE_SEV_NO_ERROR = 0,
- OpalMCE_SEV_WARNING = 1,
- OpalMCE_SEV_ERROR_SYNC = 2,
- OpalMCE_SEV_FATAL = 3,
-};
-
-enum OpalMCE_Disposition {
- OpalMCE_DISPOSITION_RECOVERED = 0,
- OpalMCE_DISPOSITION_NOT_RECOVERED = 1,
-};
-
-enum OpalMCE_Initiator {
- OpalMCE_INITIATOR_UNKNOWN = 0,
- OpalMCE_INITIATOR_CPU = 1,
-};
-
-enum OpalMCE_ErrorType {
- OpalMCE_ERROR_TYPE_UNKNOWN = 0,
- OpalMCE_ERROR_TYPE_UE = 1,
- OpalMCE_ERROR_TYPE_SLB = 2,
- OpalMCE_ERROR_TYPE_ERAT = 3,
- OpalMCE_ERROR_TYPE_TLB = 4,
-};
-
-enum OpalMCE_UeErrorType {
- OpalMCE_UE_ERROR_INDETERMINATE = 0,
- OpalMCE_UE_ERROR_IFETCH = 1,
- OpalMCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH = 2,
- OpalMCE_UE_ERROR_LOAD_STORE = 3,
- OpalMCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE = 4,
-};
-
-enum OpalMCE_SlbErrorType {
- OpalMCE_SLB_ERROR_INDETERMINATE = 0,
- OpalMCE_SLB_ERROR_PARITY = 1,
- OpalMCE_SLB_ERROR_MULTIHIT = 2,
-};
-
-enum OpalMCE_EratErrorType {
- OpalMCE_ERAT_ERROR_INDETERMINATE = 0,
- OpalMCE_ERAT_ERROR_PARITY = 1,
- OpalMCE_ERAT_ERROR_MULTIHIT = 2,
-};
-
-enum OpalMCE_TlbErrorType {
- OpalMCE_TLB_ERROR_INDETERMINATE = 0,
- OpalMCE_TLB_ERROR_PARITY = 1,
- OpalMCE_TLB_ERROR_MULTIHIT = 2,
-};
-
-enum OpalThreadStatus {
- OPAL_THREAD_INACTIVE = 0x0,
- OPAL_THREAD_STARTED = 0x1,
- OPAL_THREAD_UNAVAILABLE = 0x2 /* opal-v3 */
-};
-
-enum OpalPciBusCompare {
- OpalPciBusAny = 0, /* Any bus number match */
- OpalPciBus3Bits = 2, /* Match top 3 bits of bus number */
- OpalPciBus4Bits = 3, /* Match top 4 bits of bus number */
- OpalPciBus5Bits = 4, /* Match top 5 bits of bus number */
- OpalPciBus6Bits = 5, /* Match top 6 bits of bus number */
- OpalPciBus7Bits = 6, /* Match top 7 bits of bus number */
- OpalPciBusAll = 7, /* Match bus number exactly */
-};
-
-enum OpalDeviceCompare {
- OPAL_IGNORE_RID_DEVICE_NUMBER = 0,
- OPAL_COMPARE_RID_DEVICE_NUMBER = 1
-};
-
-enum OpalFuncCompare {
- OPAL_IGNORE_RID_FUNCTION_NUMBER = 0,
- OPAL_COMPARE_RID_FUNCTION_NUMBER = 1
-};
-
-enum OpalPeAction {
- OPAL_UNMAP_PE = 0,
- OPAL_MAP_PE = 1
-};
-
-enum OpalPeltvAction {
- OPAL_REMOVE_PE_FROM_DOMAIN = 0,
- OPAL_ADD_PE_TO_DOMAIN = 1
-};
-
-enum OpalMveEnableAction {
- OPAL_DISABLE_MVE = 0,
- OPAL_ENABLE_MVE = 1
-};
-
-enum OpalM64EnableAction {
- OPAL_DISABLE_M64 = 0,
- OPAL_ENABLE_M64_SPLIT = 1,
- OPAL_ENABLE_M64_NON_SPLIT = 2
-};
-
-enum OpalPciResetScope {
- OPAL_PHB_COMPLETE = 1, OPAL_PCI_LINK = 2, OPAL_PHB_ERROR = 3,
- OPAL_PCI_HOT_RESET = 4, OPAL_PCI_FUNDAMENTAL_RESET = 5,
- OPAL_PCI_IODA_TABLE_RESET = 6,
-};
-
-enum OpalPciReinitScope {
- OPAL_REINIT_PCI_DEV = 1000
-};
-
-enum OpalPciResetState {
- OPAL_DEASSERT_RESET = 0,
- OPAL_ASSERT_RESET = 1
-};
-
-enum OpalPciMaskAction {
- OPAL_UNMASK_ERROR_TYPE = 0,
- OPAL_MASK_ERROR_TYPE = 1
-};
-
-enum OpalSlotLedType {
- OPAL_SLOT_LED_ID_TYPE = 0,
- OPAL_SLOT_LED_FAULT_TYPE = 1
-};
-
-enum OpalLedAction {
- OPAL_TURN_OFF_LED = 0,
- OPAL_TURN_ON_LED = 1,
- OPAL_QUERY_LED_STATE_AFTER_BUSY = 2
-};
-
-enum OpalEpowStatus {
- OPAL_EPOW_NONE = 0,
- OPAL_EPOW_UPS = 1,
- OPAL_EPOW_OVER_AMBIENT_TEMP = 2,
- OPAL_EPOW_OVER_INTERNAL_TEMP = 3
-};
+/* We calculate number of sg entries based on PAGE_SIZE */
+#define SG_ENTRIES_PER_NODE ((PAGE_SIZE - 16) / sizeof(struct opal_sg_entry))
-/*
- * Address cycle types for LPC accesses. These also correspond
- * to the content of the first cell of the "reg" property for
- * device nodes on the LPC bus
- */
-enum OpalLPCAddressType {
- OPAL_LPC_MEM = 0,
- OPAL_LPC_IO = 1,
- OPAL_LPC_FW = 2,
-};
-
-/* System parameter permission */
-enum OpalSysparamPerm {
- OPAL_SYSPARAM_READ = 0x1,
- OPAL_SYSPARAM_WRITE = 0x2,
- OPAL_SYSPARAM_RW = (OPAL_SYSPARAM_READ | OPAL_SYSPARAM_WRITE),
-};
-
-struct opal_msg {
- __be32 msg_type;
- __be32 reserved;
- __be64 params[8];
-};
-
-struct opal_machine_check_event {
- enum OpalMCE_Version version:8; /* 0x00 */
- uint8_t in_use; /* 0x01 */
- enum OpalMCE_Severity severity:8; /* 0x02 */
- enum OpalMCE_Initiator initiator:8; /* 0x03 */
- enum OpalMCE_ErrorType error_type:8; /* 0x04 */
- enum OpalMCE_Disposition disposition:8; /* 0x05 */
- uint8_t reserved_1[2]; /* 0x06 */
- uint64_t gpr3; /* 0x08 */
- uint64_t srr0; /* 0x10 */
- uint64_t srr1; /* 0x18 */
- union { /* 0x20 */
- struct {
- enum OpalMCE_UeErrorType ue_error_type:8;
- uint8_t effective_address_provided;
- uint8_t physical_address_provided;
- uint8_t reserved_1[5];
- uint64_t effective_address;
- uint64_t physical_address;
- uint8_t reserved_2[8];
- } ue_error;
-
- struct {
- enum OpalMCE_SlbErrorType slb_error_type:8;
- uint8_t effective_address_provided;
- uint8_t reserved_1[6];
- uint64_t effective_address;
- uint8_t reserved_2[16];
- } slb_error;
-
- struct {
- enum OpalMCE_EratErrorType erat_error_type:8;
- uint8_t effective_address_provided;
- uint8_t reserved_1[6];
- uint64_t effective_address;
- uint8_t reserved_2[16];
- } erat_error;
-
- struct {
- enum OpalMCE_TlbErrorType tlb_error_type:8;
- uint8_t effective_address_provided;
- uint8_t reserved_1[6];
- uint64_t effective_address;
- uint8_t reserved_2[16];
- } tlb_error;
- } u;
-};
-
-/* FSP memory errors handling */
-enum OpalMemErr_Version {
- OpalMemErr_V1 = 1,
-};
-
-enum OpalMemErrType {
- OPAL_MEM_ERR_TYPE_RESILIENCE = 0,
- OPAL_MEM_ERR_TYPE_DYN_DALLOC,
- OPAL_MEM_ERR_TYPE_SCRUB,
-};
-
-/* Memory Reilience error type */
-enum OpalMemErr_ResilErrType {
- OPAL_MEM_RESILIENCE_CE = 0,
- OPAL_MEM_RESILIENCE_UE,
- OPAL_MEM_RESILIENCE_UE_SCRUB,
-};
-
-/* Dynamic Memory Deallocation type */
-enum OpalMemErr_DynErrType {
- OPAL_MEM_DYNAMIC_DEALLOC = 0,
-};
-
-/* OpalMemoryErrorData->flags */
-#define OPAL_MEM_CORRECTED_ERROR 0x0001
-#define OPAL_MEM_THRESHOLD_EXCEEDED 0x0002
-#define OPAL_MEM_ACK_REQUIRED 0x8000
-
-struct OpalMemoryErrorData {
- enum OpalMemErr_Version version:8; /* 0x00 */
- enum OpalMemErrType type:8; /* 0x01 */
- __be16 flags; /* 0x02 */
- uint8_t reserved_1[4]; /* 0x04 */
-
- union {
- /* Memory Resilience corrected/uncorrected error info */
- struct {
- enum OpalMemErr_ResilErrType resil_err_type:8;
- uint8_t reserved_1[7];
- __be64 physical_address_start;
- __be64 physical_address_end;
- } resilience;
- /* Dynamic memory deallocation error info */
- struct {
- enum OpalMemErr_DynErrType dyn_err_type:8;
- uint8_t reserved_1[7];
- __be64 physical_address_start;
- __be64 physical_address_end;
- } dyn_dealloc;
- } u;
-};
-
-/* HMI interrupt event */
-enum OpalHMI_Version {
- OpalHMIEvt_V1 = 1,
-};
-
-enum OpalHMI_Severity {
- OpalHMI_SEV_NO_ERROR = 0,
- OpalHMI_SEV_WARNING = 1,
- OpalHMI_SEV_ERROR_SYNC = 2,
- OpalHMI_SEV_FATAL = 3,
-};
-
-enum OpalHMI_Disposition {
- OpalHMI_DISPOSITION_RECOVERED = 0,
- OpalHMI_DISPOSITION_NOT_RECOVERED = 1,
-};
-
-enum OpalHMI_ErrType {
- OpalHMI_ERROR_MALFUNC_ALERT = 0,
- OpalHMI_ERROR_PROC_RECOV_DONE,
- OpalHMI_ERROR_PROC_RECOV_DONE_AGAIN,
- OpalHMI_ERROR_PROC_RECOV_MASKED,
- OpalHMI_ERROR_TFAC,
- OpalHMI_ERROR_TFMR_PARITY,
- OpalHMI_ERROR_HA_OVERFLOW_WARN,
- OpalHMI_ERROR_XSCOM_FAIL,
- OpalHMI_ERROR_XSCOM_DONE,
- OpalHMI_ERROR_SCOM_FIR,
- OpalHMI_ERROR_DEBUG_TRIG_FIR,
- OpalHMI_ERROR_HYP_RESOURCE,
-};
-
-struct OpalHMIEvent {
- uint8_t version; /* 0x00 */
- uint8_t severity; /* 0x01 */
- uint8_t type; /* 0x02 */
- uint8_t disposition; /* 0x03 */
- uint8_t reserved_1[4]; /* 0x04 */
-
- __be64 hmer;
- /* TFMR register. Valid only for TFAC and TFMR_PARITY error type. */
- __be64 tfmr;
-};
-
-enum {
- OPAL_P7IOC_DIAG_TYPE_NONE = 0,
- OPAL_P7IOC_DIAG_TYPE_RGC = 1,
- OPAL_P7IOC_DIAG_TYPE_BI = 2,
- OPAL_P7IOC_DIAG_TYPE_CI = 3,
- OPAL_P7IOC_DIAG_TYPE_MISC = 4,
- OPAL_P7IOC_DIAG_TYPE_I2C = 5,
- OPAL_P7IOC_DIAG_TYPE_LAST = 6
-};
-
-struct OpalIoP7IOCErrorData {
- __be16 type;
-
- /* GEM */
- __be64 gemXfir;
- __be64 gemRfir;
- __be64 gemRirqfir;
- __be64 gemMask;
- __be64 gemRwof;
-
- /* LEM */
- __be64 lemFir;
- __be64 lemErrMask;
- __be64 lemAction0;
- __be64 lemAction1;
- __be64 lemWof;
-
- union {
- struct OpalIoP7IOCRgcErrorData {
- __be64 rgcStatus; /* 3E1C10 */
- __be64 rgcLdcp; /* 3E1C18 */
- }rgc;
- struct OpalIoP7IOCBiErrorData {
- __be64 biLdcp0; /* 3C0100, 3C0118 */
- __be64 biLdcp1; /* 3C0108, 3C0120 */
- __be64 biLdcp2; /* 3C0110, 3C0128 */
- __be64 biFenceStatus; /* 3C0130, 3C0130 */
-
- u8 biDownbound; /* BI Downbound or Upbound */
- }bi;
- struct OpalIoP7IOCCiErrorData {
- __be64 ciPortStatus; /* 3Dn008 */
- __be64 ciPortLdcp; /* 3Dn010 */
-
- u8 ciPort; /* Index of CI port: 0/1 */
- }ci;
- };
-};
-
-/**
- * This structure defines the overlay which will be used to store PHB error
- * data upon request.
- */
-enum {
- OPAL_PHB_ERROR_DATA_VERSION_1 = 1,
-};
-
-enum {
- OPAL_PHB_ERROR_DATA_TYPE_P7IOC = 1,
- OPAL_PHB_ERROR_DATA_TYPE_PHB3 = 2
-};
-
-enum {
- OPAL_P7IOC_NUM_PEST_REGS = 128,
- OPAL_PHB3_NUM_PEST_REGS = 256
-};
-
-struct OpalIoPhbErrorCommon {
- __be32 version;
- __be32 ioType;
- __be32 len;
-};
-
-struct OpalIoP7IOCPhbErrorData {
- struct OpalIoPhbErrorCommon common;
-
- __be32 brdgCtl;
-
- // P7IOC utl regs
- __be32 portStatusReg;
- __be32 rootCmplxStatus;
- __be32 busAgentStatus;
-
- // P7IOC cfg regs
- __be32 deviceStatus;
- __be32 slotStatus;
- __be32 linkStatus;
- __be32 devCmdStatus;
- __be32 devSecStatus;
-
- // cfg AER regs
- __be32 rootErrorStatus;
- __be32 uncorrErrorStatus;
- __be32 corrErrorStatus;
- __be32 tlpHdr1;
- __be32 tlpHdr2;
- __be32 tlpHdr3;
- __be32 tlpHdr4;
- __be32 sourceId;
-
- __be32 rsv3;
-
- // Record data about the call to allocate a buffer.
- __be64 errorClass;
- __be64 correlator;
-
- //P7IOC MMIO Error Regs
- __be64 p7iocPlssr; // n120
- __be64 p7iocCsr; // n110
- __be64 lemFir; // nC00
- __be64 lemErrorMask; // nC18
- __be64 lemWOF; // nC40
- __be64 phbErrorStatus; // nC80
- __be64 phbFirstErrorStatus; // nC88
- __be64 phbErrorLog0; // nCC0
- __be64 phbErrorLog1; // nCC8
- __be64 mmioErrorStatus; // nD00
- __be64 mmioFirstErrorStatus; // nD08
- __be64 mmioErrorLog0; // nD40
- __be64 mmioErrorLog1; // nD48
- __be64 dma0ErrorStatus; // nD80
- __be64 dma0FirstErrorStatus; // nD88
- __be64 dma0ErrorLog0; // nDC0
- __be64 dma0ErrorLog1; // nDC8
- __be64 dma1ErrorStatus; // nE00
- __be64 dma1FirstErrorStatus; // nE08
- __be64 dma1ErrorLog0; // nE40
- __be64 dma1ErrorLog1; // nE48
- __be64 pestA[OPAL_P7IOC_NUM_PEST_REGS];
- __be64 pestB[OPAL_P7IOC_NUM_PEST_REGS];
-};
-
-struct OpalIoPhb3ErrorData {
- struct OpalIoPhbErrorCommon common;
-
- __be32 brdgCtl;
-
- /* PHB3 UTL regs */
- __be32 portStatusReg;
- __be32 rootCmplxStatus;
- __be32 busAgentStatus;
-
- /* PHB3 cfg regs */
- __be32 deviceStatus;
- __be32 slotStatus;
- __be32 linkStatus;
- __be32 devCmdStatus;
- __be32 devSecStatus;
-
- /* cfg AER regs */
- __be32 rootErrorStatus;
- __be32 uncorrErrorStatus;
- __be32 corrErrorStatus;
- __be32 tlpHdr1;
- __be32 tlpHdr2;
- __be32 tlpHdr3;
- __be32 tlpHdr4;
- __be32 sourceId;
-
- __be32 rsv3;
-
- /* Record data about the call to allocate a buffer */
- __be64 errorClass;
- __be64 correlator;
-
- __be64 nFir; /* 000 */
- __be64 nFirMask; /* 003 */
- __be64 nFirWOF; /* 008 */
-
- /* PHB3 MMIO Error Regs */
- __be64 phbPlssr; /* 120 */
- __be64 phbCsr; /* 110 */
- __be64 lemFir; /* C00 */
- __be64 lemErrorMask; /* C18 */
- __be64 lemWOF; /* C40 */
- __be64 phbErrorStatus; /* C80 */
- __be64 phbFirstErrorStatus; /* C88 */
- __be64 phbErrorLog0; /* CC0 */
- __be64 phbErrorLog1; /* CC8 */
- __be64 mmioErrorStatus; /* D00 */
- __be64 mmioFirstErrorStatus; /* D08 */
- __be64 mmioErrorLog0; /* D40 */
- __be64 mmioErrorLog1; /* D48 */
- __be64 dma0ErrorStatus; /* D80 */
- __be64 dma0FirstErrorStatus; /* D88 */
- __be64 dma0ErrorLog0; /* DC0 */
- __be64 dma0ErrorLog1; /* DC8 */
- __be64 dma1ErrorStatus; /* E00 */
- __be64 dma1FirstErrorStatus; /* E08 */
- __be64 dma1ErrorLog0; /* E40 */
- __be64 dma1ErrorLog1; /* E48 */
- __be64 pestA[OPAL_PHB3_NUM_PEST_REGS];
- __be64 pestB[OPAL_PHB3_NUM_PEST_REGS];
-};
-
-enum {
- OPAL_REINIT_CPUS_HILE_BE = (1 << 0),
- OPAL_REINIT_CPUS_HILE_LE = (1 << 1),
-};
-
-typedef struct oppanel_line {
- const char * line;
- uint64_t line_len;
-} oppanel_line_t;
+/* Default time to sleep or delay between OPAL_BUSY/OPAL_BUSY_EVENT loops */
+#define OPAL_BUSY_DELAY_MS 10
/* /sys/firmware/opal */
extern struct kobject *opal_kobj;
@@ -774,18 +28,32 @@ extern struct device_node *opal_node;
/* API functions */
int64_t opal_invalid_call(void);
+int64_t opal_npu_map_lpar(uint64_t phb_id, uint64_t bdf, uint64_t lparid,
+ uint64_t lpcr);
+int64_t opal_npu_spa_setup(uint64_t phb_id, uint32_t bdfn,
+ uint64_t addr, uint64_t PE_mask);
+int64_t opal_npu_spa_clear_cache(uint64_t phb_id, uint32_t bdfn,
+ uint64_t PE_handle);
+int64_t opal_npu_tl_set(uint64_t phb_id, uint32_t bdfn, long cap,
+ uint64_t rate_phys, uint32_t size);
+
int64_t opal_console_write(int64_t term_number, __be64 *length,
const uint8_t *buffer);
int64_t opal_console_read(int64_t term_number, __be64 *length,
uint8_t *buffer);
int64_t opal_console_write_buffer_space(int64_t term_number,
__be64 *length);
+int64_t opal_console_flush(int64_t term_number);
int64_t opal_rtc_read(__be32 *year_month_day,
__be64 *hour_minute_second_millisecond);
int64_t opal_rtc_write(uint32_t year_month_day,
uint64_t hour_minute_second_millisecond);
+int64_t opal_tpo_read(uint64_t token, __be32 *year_mon_day, __be32 *hour_min);
+int64_t opal_tpo_write(uint64_t token, uint32_t year_mon_day,
+ uint32_t hour_min);
int64_t opal_cec_power_down(uint64_t request);
int64_t opal_cec_reboot(void);
+int64_t opal_cec_reboot2(uint32_t reboot_type, const char *diag);
int64_t opal_read_nvram(uint64_t buffer, uint64_t size, uint64_t offset);
int64_t opal_write_nvram(uint64_t buffer, uint64_t size, uint64_t offset);
int64_t opal_handle_interrupt(uint64_t isn, __be64 *outstanding_event_mask);
@@ -819,6 +87,8 @@ int64_t opal_pci_eeh_freeze_clear(uint64_t phb_id, uint64_t pe_number,
uint64_t eeh_action_token);
int64_t opal_pci_eeh_freeze_set(uint64_t phb_id, uint64_t pe_number,
uint64_t eeh_action_token);
+int64_t opal_pci_err_inject(uint64_t phb_id, uint32_t pe_no, uint32_t type,
+ uint32_t func, uint64_t addr, uint64_t mask);
int64_t opal_pci_shpc(uint64_t phb_id, uint64_t shpc_action, uint8_t *state);
@@ -842,9 +112,6 @@ int64_t opal_pci_set_pe(uint64_t phb_id, uint64_t pe_number, uint64_t bus_dev_fu
uint8_t pe_action);
int64_t opal_pci_set_peltv(uint64_t phb_id, uint32_t parent_pe, uint32_t child_pe,
uint8_t state);
-int64_t opal_pci_set_mve(uint64_t phb_id, uint32_t mve_number, uint32_t pe_number);
-int64_t opal_pci_set_mve_enable(uint64_t phb_id, uint32_t mve_number,
- uint32_t state);
int64_t opal_pci_get_xive_reissue(uint64_t phb_id, uint32_t xive_number,
uint8_t *p_bit, uint8_t *q_bit);
int64_t opal_pci_set_xive_reissue(uint64_t phb_id, uint32_t xive_number,
@@ -869,7 +136,7 @@ int64_t opal_pci_map_pe_dma_window(uint64_t phb_id, uint16_t pe_number, uint16_t
int64_t opal_pci_map_pe_dma_window_real(uint64_t phb_id, uint16_t pe_number,
uint16_t dma_window_number, uint64_t pci_start_addr,
uint64_t pci_mem_size);
-int64_t opal_pci_reset(uint64_t phb_id, uint8_t reset_scope, uint8_t assert_state);
+int64_t opal_pci_reset(uint64_t id, uint8_t reset_scope, uint8_t assert_state);
int64_t opal_pci_get_hub_diag_data(uint64_t hub_id, void *diag_buffer,
uint64_t diag_buffer_len);
@@ -881,12 +148,14 @@ int64_t opal_pci_fence_phb(uint64_t phb_id);
int64_t opal_pci_reinit(uint64_t phb_id, uint64_t reinit_scope, uint64_t data);
int64_t opal_pci_mask_pe_error(uint64_t phb_id, uint16_t pe_number, uint8_t error_type, uint8_t mask_action);
int64_t opal_set_slot_led_status(uint64_t phb_id, uint64_t slot_id, uint8_t led_type, uint8_t led_action);
-int64_t opal_get_epow_status(__be64 *status);
+int64_t opal_get_epow_status(__be16 *epow_status, __be16 *num_epow_classes);
+int64_t opal_get_dpo_status(__be64 *dpo_timeout);
int64_t opal_set_system_attention_led(uint8_t led_action);
int64_t opal_pci_next_error(uint64_t phb_id, __be64 *first_frozen_pe,
__be16 *pci_error_type, __be16 *severity);
-int64_t opal_pci_poll(uint64_t phb_id);
+int64_t opal_pci_poll(uint64_t id);
int64_t opal_return_cpu(void);
+int64_t opal_check_token(uint64_t token);
int64_t opal_reinit_cpus(uint64_t flags);
int64_t opal_xscom_read(uint32_t gcid, uint64_t pcb_addr, __be64 *val);
@@ -914,6 +183,8 @@ int64_t opal_dump_ack(uint32_t dump_id);
int64_t opal_dump_resend_notification(void);
int64_t opal_get_msg(uint64_t buffer, uint64_t size);
+int64_t opal_write_oppanel_async(uint64_t token, oppanel_line_t *lines,
+ uint64_t num_lines);
int64_t opal_check_completion(uint64_t buffer, uint64_t size, uint64_t token);
int64_t opal_sync_host_reboot(void);
int64_t opal_get_param(uint64_t token, uint32_t param_id, uint64_t buffer,
@@ -921,52 +192,171 @@ int64_t opal_get_param(uint64_t token, uint32_t param_id, uint64_t buffer,
int64_t opal_set_param(uint64_t token, uint32_t param_id, uint64_t buffer,
uint64_t length);
int64_t opal_sensor_read(uint32_t sensor_hndl, int token, __be32 *sensor_data);
+int64_t opal_sensor_read_u64(u32 sensor_hndl, int token, __be64 *sensor_data);
int64_t opal_handle_hmi(void);
+int64_t opal_handle_hmi2(__be64 *out_flags);
int64_t opal_register_dump_region(uint32_t id, uint64_t start, uint64_t end);
int64_t opal_unregister_dump_region(uint32_t id);
+int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val);
+int64_t opal_config_cpu_idle_state(uint64_t state, uint64_t flag);
+int64_t opal_pci_set_phb_cxl_mode(uint64_t phb_id, uint64_t mode, uint64_t pe_number);
+int64_t opal_pci_get_pbcq_tunnel_bar(uint64_t phb_id, uint64_t *addr);
+int64_t opal_pci_set_pbcq_tunnel_bar(uint64_t phb_id, uint64_t addr);
+int64_t opal_ipmi_send(uint64_t interface, struct opal_ipmi_msg *msg,
+ uint64_t msg_len);
+int64_t opal_ipmi_recv(uint64_t interface, struct opal_ipmi_msg *msg,
+ uint64_t *msg_len);
+int64_t opal_i2c_request(uint64_t async_token, uint32_t bus_id,
+ struct opal_i2c_request *oreq);
+int64_t opal_prd_msg(struct opal_prd_msg *msg);
+int64_t opal_leds_get_ind(char *loc_code, __be64 *led_mask,
+ __be64 *led_value, __be64 *max_led_type);
+int64_t opal_leds_set_ind(uint64_t token, char *loc_code, const u64 led_mask,
+ const u64 led_value, __be64 *max_led_type);
+
+int64_t opal_flash_read(uint64_t id, uint64_t offset, uint64_t buf,
+ uint64_t size, uint64_t token);
+int64_t opal_flash_write(uint64_t id, uint64_t offset, uint64_t buf,
+ uint64_t size, uint64_t token);
+int64_t opal_flash_erase(uint64_t id, uint64_t offset, uint64_t size,
+ uint64_t token);
+int64_t opal_get_device_tree(uint32_t phandle, uint64_t buf, uint64_t len);
+int64_t opal_pci_get_presence_state(uint64_t id, uint64_t data);
+int64_t opal_pci_get_power_state(uint64_t id, uint64_t data);
+int64_t opal_pci_set_power_state(uint64_t async_token, uint64_t id,
+ uint64_t data);
+int64_t opal_pci_poll2(uint64_t id, uint64_t data);
+
+int64_t opal_int_get_xirr(__be32 *out_xirr, bool just_poll);
+int64_t opal_int_set_cppr(uint8_t cppr);
+int64_t opal_int_eoi(uint32_t xirr);
+int64_t opal_int_set_mfrr(uint32_t cpu, uint8_t mfrr);
+int64_t opal_pci_tce_kill(uint64_t phb_id, uint32_t kill_type,
+ uint32_t pe_num, uint32_t tce_size,
+ uint64_t dma_addr, uint32_t npages);
+int64_t opal_nmmu_set_ptcr(uint64_t chip_id, uint64_t ptcr);
+int64_t opal_xive_reset(uint64_t version);
+int64_t opal_xive_get_irq_info(uint32_t girq,
+ __be64 *out_flags,
+ __be64 *out_eoi_page,
+ __be64 *out_trig_page,
+ __be32 *out_esb_shift,
+ __be32 *out_src_chip);
+int64_t opal_xive_get_irq_config(uint32_t girq, __be64 *out_vp,
+ uint8_t *out_prio, __be32 *out_lirq);
+int64_t opal_xive_set_irq_config(uint32_t girq, uint64_t vp, uint8_t prio,
+ uint32_t lirq);
+int64_t opal_xive_get_queue_info(uint64_t vp, uint32_t prio,
+ __be64 *out_qpage,
+ __be64 *out_qsize,
+ __be64 *out_qeoi_page,
+ __be32 *out_escalate_irq,
+ __be64 *out_qflags);
+int64_t opal_xive_set_queue_info(uint64_t vp, uint32_t prio,
+ uint64_t qpage,
+ uint64_t qsize,
+ uint64_t qflags);
+int64_t opal_xive_donate_page(uint32_t chip_id, uint64_t addr);
+int64_t opal_xive_alloc_vp_block(uint32_t alloc_order);
+int64_t opal_xive_free_vp_block(uint64_t vp);
+int64_t opal_xive_get_vp_info(uint64_t vp,
+ __be64 *out_flags,
+ __be64 *out_cam_value,
+ __be64 *out_report_cl_pair,
+ __be32 *out_chip_id);
+int64_t opal_xive_set_vp_info(uint64_t vp,
+ uint64_t flags,
+ uint64_t report_cl_pair);
+int64_t opal_xive_allocate_irq_raw(uint32_t chip_id);
+int64_t opal_xive_free_irq(uint32_t girq);
+int64_t opal_xive_sync(uint32_t type, uint32_t id);
+int64_t opal_xive_dump(uint32_t type, uint32_t id);
+int64_t opal_xive_get_queue_state(uint64_t vp, uint32_t prio,
+ __be32 *out_qtoggle,
+ __be32 *out_qindex);
+int64_t opal_xive_set_queue_state(uint64_t vp, uint32_t prio,
+ uint32_t qtoggle,
+ uint32_t qindex);
+int64_t opal_xive_get_vp_state(uint64_t vp, __be64 *out_w01);
+
+int64_t opal_imc_counters_init(uint32_t type, uint64_t address,
+ uint64_t cpu_pir);
+int64_t opal_imc_counters_start(uint32_t type, uint64_t cpu_pir);
+int64_t opal_imc_counters_stop(uint32_t type, uint64_t cpu_pir);
+
+int opal_get_powercap(u32 handle, int token, u32 *pcap);
+int opal_set_powercap(u32 handle, int token, u32 pcap);
+int opal_get_power_shift_ratio(u32 handle, int token, u32 *psr);
+int opal_set_power_shift_ratio(u32 handle, int token, u32 psr);
+int opal_sensor_group_clear(u32 group_hndl, int token);
+int opal_sensor_group_enable(u32 group_hndl, int token, bool enable);
+int opal_nx_coproc_init(uint32_t chip_id, uint32_t ct);
+
+int opal_secvar_get(const char *key, uint64_t key_len, u8 *data,
+ uint64_t *data_size);
+int opal_secvar_get_next(const char *key, uint64_t *key_len,
+ uint64_t key_buf_size);
+int opal_secvar_enqueue_update(const char *key, uint64_t key_len, u8 *data,
+ uint64_t data_size);
+
+s64 opal_mpipl_update(enum opal_mpipl_ops op, u64 src, u64 dest, u64 size);
+s64 opal_mpipl_register_tag(enum opal_mpipl_tags tag, u64 addr);
+s64 opal_mpipl_query_tag(enum opal_mpipl_tags tag, __be64 *addr);
+
+s64 opal_signal_system_reset(s32 cpu);
+s64 opal_quiesce(u64 shutdown_type, s32 cpu);
/* Internal functions */
extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
int depth, void *data);
extern int early_init_dt_scan_recoverable_ranges(unsigned long node,
const char *uname, int depth, void *data);
+void __init opal_configure_cores(void);
-extern int opal_get_chars(uint32_t vtermno, char *buf, int count);
-extern int opal_put_chars(uint32_t vtermno, const char *buf, int total_len);
+extern ssize_t opal_get_chars(uint32_t vtermno, u8 *buf, size_t count);
+extern ssize_t opal_put_chars(uint32_t vtermno, const u8 *buf,
+ size_t total_len);
+extern ssize_t opal_put_chars_atomic(uint32_t vtermno, const u8 *buf,
+ size_t total_len);
+extern int opal_flush_chars(uint32_t vtermno, bool wait);
+extern int opal_flush_console(uint32_t vtermno);
extern void hvc_opal_init_early(void);
-extern int opal_notifier_register(struct notifier_block *nb);
-extern int opal_notifier_unregister(struct notifier_block *nb);
-
-extern int opal_message_notifier_register(enum OpalMessageType msg_type,
+extern int opal_message_notifier_register(enum opal_msg_type msg_type,
struct notifier_block *nb);
-extern void opal_notifier_enable(void);
-extern void opal_notifier_disable(void);
-extern void opal_notifier_update_evt(uint64_t evt_mask, uint64_t evt_val);
+extern int opal_message_notifier_unregister(enum opal_msg_type msg_type,
+ struct notifier_block *nb);
-extern int __opal_async_get_token(void);
extern int opal_async_get_token_interruptible(void);
-extern int __opal_async_release_token(int token);
extern int opal_async_release_token(int token);
extern int opal_async_wait_response(uint64_t token, struct opal_msg *msg);
+extern int opal_async_wait_response_interruptible(uint64_t token,
+ struct opal_msg *msg);
extern int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data);
+extern int opal_get_sensor_data_u64(u32 sensor_hndl, u64 *sensor_data);
+extern int sensor_group_enable(u32 grp_hndl, bool enable);
struct rtc_time;
-extern int opal_set_rtc_time(struct rtc_time *tm);
-extern void opal_get_rtc_time(struct rtc_time *tm);
-extern unsigned long opal_get_boot_time(void);
+extern time64_t opal_get_boot_time(void);
extern void opal_nvram_init(void);
-extern void opal_flash_init(void);
-extern void opal_flash_term_callback(void);
+extern void opal_flash_update_init(void);
+extern void opal_flash_update_print_message(void);
extern int opal_elog_init(void);
extern void opal_platform_dump_init(void);
extern void opal_sys_param_init(void);
extern void opal_msglog_init(void);
+extern void opal_msglog_sysfs_init(void);
+extern int opal_async_comp_init(void);
+extern int opal_sensor_init(void);
+extern int opal_hmi_handler_init(void);
+extern int opal_event_init(void);
+int opal_power_control_init(void);
extern int opal_machine_check(struct pt_regs *regs);
extern bool opal_mce_check_early_recovery(struct pt_regs *regs);
extern int opal_hmi_exception_early(struct pt_regs *regs);
+extern int opal_hmi_exception_early2(struct pt_regs *regs);
extern int opal_handle_hmi_exception(struct pt_regs *regs);
extern void opal_shutdown(void);
@@ -974,17 +364,32 @@ extern int opal_resync_timebase(void);
extern void opal_lpc_init(void);
+extern void opal_kmsg_init(void);
+
+extern int opal_event_request(unsigned int opal_event_nr);
+
struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr,
unsigned long vmalloc_size);
void opal_free_sg_list(struct opal_sg_list *sg);
-/*
- * Dump region ID range usable by the OS
- */
-#define OPAL_DUMP_REGION_HOST_START 0x80
-#define OPAL_DUMP_REGION_LOG_BUF 0x80
-#define OPAL_DUMP_REGION_HOST_END 0xFF
+extern int opal_error_code(int rc);
+
+ssize_t opal_msglog_copy(char *to, loff_t pos, size_t count);
+
+static inline int opal_get_async_rc(struct opal_msg msg)
+{
+ if (msg.msg_type != OPAL_MSG_ASYNC_COMP)
+ return OPAL_PARAMETER;
+ else
+ return be64_to_cpu(msg.params[1]);
+}
+
+void opal_wake_poller(void);
+
+void opal_powercap_init(void);
+void opal_psr_init(void);
+void opal_sensor_groups_init(void);
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
-#endif /* __OPAL_H */
+#endif /* _ASM_POWERPC_OPAL_H */
diff --git a/arch/powerpc/include/asm/oprofile_impl.h b/arch/powerpc/include/asm/oprofile_impl.h
deleted file mode 100644
index 61fe5d6f18e1..000000000000
--- a/arch/powerpc/include/asm/oprofile_impl.h
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright (C) 2004 Anton Blanchard <anton@au.ibm.com>, IBM
- *
- * Based on alpha version.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef _ASM_POWERPC_OPROFILE_IMPL_H
-#define _ASM_POWERPC_OPROFILE_IMPL_H
-#ifdef __KERNEL__
-
-#define OP_MAX_COUNTER 8
-
-/* Per-counter configuration as set via oprofilefs. */
-struct op_counter_config {
- unsigned long enabled;
- unsigned long event;
- unsigned long count;
- /* Classic doesn't support per-counter user/kernel selection */
- unsigned long kernel;
- unsigned long user;
- unsigned long unit_mask;
-};
-
-/* System-wide configuration as set via oprofilefs. */
-struct op_system_config {
-#ifdef CONFIG_PPC64
- unsigned long mmcr0;
- unsigned long mmcr1;
- unsigned long mmcra;
-#ifdef CONFIG_OPROFILE_CELL
- /* Register for oprofile user tool to check cell kernel profiling
- * support.
- */
- unsigned long cell_support;
-#endif
-#endif
- unsigned long enable_kernel;
- unsigned long enable_user;
-};
-
-/* Per-arch configuration */
-struct op_powerpc_model {
- int (*reg_setup) (struct op_counter_config *,
- struct op_system_config *,
- int num_counters);
- int (*cpu_setup) (struct op_counter_config *);
- int (*start) (struct op_counter_config *);
- int (*global_start) (struct op_counter_config *);
- void (*stop) (void);
- void (*global_stop) (void);
- int (*sync_start)(void);
- int (*sync_stop)(void);
- void (*handle_interrupt) (struct pt_regs *,
- struct op_counter_config *);
- int num_counters;
-};
-
-extern struct op_powerpc_model op_model_fsl_emb;
-extern struct op_powerpc_model op_model_power4;
-extern struct op_powerpc_model op_model_7450;
-extern struct op_powerpc_model op_model_cell;
-extern struct op_powerpc_model op_model_pa6t;
-
-
-/* All the classic PPC parts use these */
-static inline unsigned int classic_ctr_read(unsigned int i)
-{
- switch(i) {
- case 0:
- return mfspr(SPRN_PMC1);
- case 1:
- return mfspr(SPRN_PMC2);
- case 2:
- return mfspr(SPRN_PMC3);
- case 3:
- return mfspr(SPRN_PMC4);
- case 4:
- return mfspr(SPRN_PMC5);
- case 5:
- return mfspr(SPRN_PMC6);
-
-/* No PPC32 chip has more than 6 so far */
-#ifdef CONFIG_PPC64
- case 6:
- return mfspr(SPRN_PMC7);
- case 7:
- return mfspr(SPRN_PMC8);
-#endif
- default:
- return 0;
- }
-}
-
-static inline void classic_ctr_write(unsigned int i, unsigned int val)
-{
- switch(i) {
- case 0:
- mtspr(SPRN_PMC1, val);
- break;
- case 1:
- mtspr(SPRN_PMC2, val);
- break;
- case 2:
- mtspr(SPRN_PMC3, val);
- break;
- case 3:
- mtspr(SPRN_PMC4, val);
- break;
- case 4:
- mtspr(SPRN_PMC5, val);
- break;
- case 5:
- mtspr(SPRN_PMC6, val);
- break;
-
-/* No PPC32 chip has more than 6, yet */
-#ifdef CONFIG_PPC64
- case 6:
- mtspr(SPRN_PMC7, val);
- break;
- case 7:
- mtspr(SPRN_PMC8, val);
- break;
-#endif
- default:
- break;
- }
-}
-
-
-extern void op_powerpc_backtrace(struct pt_regs * const regs, unsigned int depth);
-
-#endif /* __KERNEL__ */
-#endif /* _ASM_POWERPC_OPROFILE_IMPL_H */
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index a5139ea6910b..1d58da946739 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -1,14 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* This control block defines the PACA which defines the processor
* specific data for each logical processor on the system.
* There are some pointers defined that are utilized by PLIC.
*
* C 2001 PPC 64 Team, IBM Corp
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#ifndef _ASM_POWERPC_PACA_H
#define _ASM_POWERPC_PACA_H
@@ -16,14 +12,26 @@
#ifdef CONFIG_PPC64
+#include <linux/cache.h>
+#include <linux/string.h>
#include <asm/types.h>
-#include <asm/lppaca.h>
#include <asm/mmu.h>
#include <asm/page.h>
+#ifdef CONFIG_PPC_BOOK3E_64
#include <asm/exception-64e.h>
+#else
+#include <asm/exception-64s.h>
+#endif
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
#include <asm/kvm_book3s_asm.h>
#endif
+#include <asm/accounting.h>
+#include <asm/hmi.h>
+#include <asm/cpuidle.h>
+#include <asm/atomic.h>
+#include <asm/mce.h>
+
+#include <asm-generic/mmiowb_types.h>
register struct paca_struct *local_paca asm("r13");
@@ -38,11 +46,11 @@ extern unsigned int debug_smp_processor_id(void); /* from linux/smp.h */
#define get_paca() local_paca
#endif
-#define get_lppaca() (get_paca()->lppaca_ptr)
#define get_slb_shadow() (get_paca()->slb_shadow_ptr)
struct task_struct;
-struct opal_machine_check_event;
+struct rtas_args;
+struct lppaca;
/*
* Defines the layout of the paca.
@@ -51,7 +59,7 @@ struct opal_machine_check_event;
* processor.
*/
struct paca_struct {
-#ifdef CONFIG_PPC_BOOK3S
+#ifdef CONFIG_PPC_PSERIES
/*
* Because hw_cpu_id, unlike other paca fields, is accessed
* routinely from other CPUs (from the IRQ code), we stick to
@@ -60,7 +68,8 @@ struct paca_struct {
*/
struct lppaca *lppaca_ptr; /* Pointer to LpPaca for PLIC */
-#endif /* CONFIG_PPC_BOOK3S */
+#endif /* CONFIG_PPC_PSERIES */
+
/*
* MAGIC: the spinlock functions in arch/powerpc/lib/locks.c
* load lock_token and paca_index with a single lwz
@@ -75,7 +84,9 @@ struct paca_struct {
u16 lock_token; /* Constant 0x8000, used in locks */
#endif
+#ifndef CONFIG_PPC_KERNEL_PCREL
u64 kernel_toc; /* Kernel TOC address */
+#endif
u64 kernelbase; /* Base address of kernel */
u64 kernel_msr; /* MSR while running in kernel */
void *emergency_sp; /* pointer to emergency stack */
@@ -84,32 +95,40 @@ struct paca_struct {
u8 cpu_start; /* At startup, processor spins until */
/* this becomes non-zero. */
u8 kexec_state; /* set when kexec down has irqs off */
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
struct slb_shadow *slb_shadow_ptr;
+#endif
struct dtl_entry *dispatch_log;
struct dtl_entry *dispatch_log_end;
-#endif /* CONFIG_PPC_STD_MMU_64 */
+#endif
u64 dscr_default; /* per-CPU default DSCR */
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
/*
* Now, starting in cacheline 2, the exception save areas
*/
/* used for most interrupts/exceptions */
- u64 exgen[13] __attribute__((aligned(0x80)));
- u64 exmc[13]; /* used for machine checks */
- u64 exslb[13]; /* used for SLB/segment table misses
- * on the linear mapping */
+ u64 exgen[EX_SIZE] __attribute__((aligned(0x80)));
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
/* SLB related definitions */
u16 vmalloc_sllp;
- u16 slb_cache_ptr;
+ u8 slb_cache_ptr;
+ u8 stab_rr; /* stab/slb round-robin counter */
+#ifdef CONFIG_DEBUG_VM
+ u8 in_kernel_slb_handler;
+#endif
+ u32 slb_used_bitmap; /* Bitmaps for first 32 SLB entries. */
+ u32 slb_kern_bitmap;
u32 slb_cache[SLB_CACHE_ENTRIES];
-#endif /* CONFIG_PPC_STD_MMU_64 */
+#endif
+#endif /* CONFIG_PPC_BOOK3S_64 */
-#ifdef CONFIG_PPC_BOOK3E
- u64 exgen[8] __attribute__((aligned(0x80)));
+#ifdef CONFIG_PPC_BOOK3E_64
+ u64 exgen[8] __aligned(0x40);
/* Keep pgd in the same cacheline as the start of extlb */
- pgd_t *pgd __attribute__((aligned(0x80))); /* Current PGD */
+ pgd_t *pgd __aligned(0x40); /* Current PGD */
pgd_t *kernel_pgd; /* Kernel PGD */
/* Shared by all threads of a core -- points to tcd of first thread */
@@ -130,55 +149,88 @@ struct paca_struct {
void *dbg_kstack;
struct tlb_core_data tcd;
-#endif /* CONFIG_PPC_BOOK3E */
+#endif /* CONFIG_PPC_BOOK3E_64 */
- mm_context_t context;
+#ifdef CONFIG_PPC_64S_HASH_MMU
+ unsigned char mm_ctx_low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE];
+ unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE];
+#endif
/*
* then miscellaneous read-write fields
*/
struct task_struct *__current; /* Pointer to current */
u64 kstack; /* Saved Kernel stack addr */
- u64 stab_rr; /* stab/slb round-robin counter */
- u64 saved_r1; /* r1 save for RTAS calls or PM */
+ u64 saved_r1; /* r1 save for RTAS calls or PM or EE=0 */
u64 saved_msr; /* MSR saved here by enter_rtas */
+ u64 exit_save_r1; /* Syscall/interrupt R1 save */
+#ifdef CONFIG_PPC_BOOK3E_64
u16 trap_save; /* Used when bad stack is encountered */
- u8 soft_enabled; /* irq soft-enable flag */
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+ u8 hsrr_valid; /* HSRRs set for HRFID */
+ u8 srr_valid; /* SRRs set for RFID */
+#endif
+ u8 irq_soft_mask; /* mask for irq soft masking */
u8 irq_happened; /* irq happened while soft-disabled */
- u8 io_sync; /* writel() needs spin_unlock sync */
u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */
- u8 nap_state_lost; /* NV GPR values lost in power7_idle */
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ u8 pmcregs_in_use; /* pseries puts this in lppaca */
+#endif
u64 sprg_vdso; /* Saved user-visible sprg */
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
u64 tm_scratch; /* TM scratch area for reclaim */
#endif
#ifdef CONFIG_PPC_POWERNV
- /* Pointer to OPAL machine check event structure set by the
- * early exception handler for use by high level C handler
- */
- struct opal_machine_check_event *opal_mc_evt;
+ /* PowerNV idle fields */
+ /* PNV_CORE_IDLE_* bits, all siblings work on thread 0 paca */
+ unsigned long idle_lock; /* A value of 1 means acquired */
+ unsigned long idle_state;
+ union {
+ /* P7/P8 specific fields */
+ struct {
+ /* PNV_THREAD_RUNNING/NAP/SLEEP */
+ u8 thread_idle_state;
+ /* Mask to denote subcore sibling threads */
+ u8 subcore_sibling_mask;
+ };
+
+ /* P9 specific fields */
+ struct {
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ /* The PSSCR value that the kernel requested before going to stop */
+ u64 requested_psscr;
+ /* Flag to request this thread not to stop */
+ atomic_t dont_stop;
+#endif
+ };
+ };
#endif
+
#ifdef CONFIG_PPC_BOOK3S_64
- /* Exclusive emergency stack pointer for machine check exception. */
+ /* Non-maskable exceptions that are not performance critical */
+ u64 exnmi[EX_SIZE]; /* used for system reset (nmi) */
+ u64 exmc[EX_SIZE]; /* used for machine checks */
+ /* Exclusive stacks for system reset and machine check exception. */
+ void *nmi_emergency_sp;
void *mc_emergency_sp;
+
+ u16 in_nmi; /* In nmi handler */
+
/*
* Flag to check whether we are in machine check early handler
* and already using emergency stack.
*/
u16 in_mce;
- u8 hmi_event_available; /* HMI event is available */
+ u8 hmi_event_available; /* HMI event is available */
+ u8 hmi_p9_special_emu; /* HMI P9 special emulation */
+ u32 hmi_irqs; /* HMI irq stat */
#endif
+ u8 ftrace_enabled; /* Hard disable ftrace */
/* Stuff for accurate time accounting */
- u64 user_time; /* accumulated usermode TB ticks */
- u64 system_time; /* accumulated system TB ticks */
- u64 user_time_scaled; /* accumulated usermode SPURR ticks */
- u64 starttime; /* TB value snapshot */
- u64 starttime_user; /* TB value on exit to usermode */
- u64 startspurr; /* SPURR value snapshot */
- u64 utime_sspurr; /* ->user_time when ->startspurr set */
- u64 stolen_time; /* TB ticks taken by hypervisor */
+ struct cpu_accounting_data accounting;
u64 dtl_ridx; /* read index in dispatch log */
struct dtl_entry *dtl_curr; /* pointer corresponding to dtl_ridx */
@@ -188,19 +240,58 @@ struct paca_struct {
struct kvmppc_book3s_shadow_vcpu shadow_vcpu;
#endif
struct kvmppc_host_state kvm_hstate;
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ /*
+ * Bitmap for sibling subcore status. See kvm/book3s_hv_ras.c for
+ * more details
+ */
+ struct sibling_subcore_state *sibling_subcore_state;
+#endif
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+ /*
+ * rfi fallback flush must be in its own cacheline to prevent
+ * other paca data leaking into the L1d
+ */
+ u64 exrfi[EX_SIZE] __aligned(0x80);
+ void *rfi_flush_fallback_area;
+ u64 l1d_flush_size;
#endif
-};
+#ifdef CONFIG_PPC_PSERIES
+ u8 *mce_data_buf; /* buffer to hold per cpu rtas errlog */
+#endif /* CONFIG_PPC_PSERIES */
+
+#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
+ /* Capture SLB related old contents in MCE handler. */
+ struct slb_entry *mce_faulty_slbs;
+ u16 slb_save_cache_ptr;
+#endif
+#endif /* CONFIG_PPC_BOOK3S_64 */
+#ifdef CONFIG_STACKPROTECTOR
+ unsigned long canary;
+#endif
+#ifdef CONFIG_MMIOWB
+ struct mmiowb_state mmiowb_state;
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+ struct mce_info *mce_info;
+ u8 mce_pending_irq_work;
+#endif /* CONFIG_PPC_BOOK3S_64 */
+} ____cacheline_aligned;
-extern struct paca_struct *paca;
+extern void copy_mm_to_paca(struct mm_struct *mm);
+extern struct paca_struct **paca_ptrs;
extern void initialise_paca(struct paca_struct *new_paca, int cpu);
extern void setup_paca(struct paca_struct *new_paca);
-extern void allocate_pacas(void);
+extern void allocate_paca_ptrs(void);
+extern void allocate_paca(int cpu);
extern void free_unused_pacas(void);
#else /* CONFIG_PPC64 */
-static inline void allocate_pacas(void) { };
-static inline void free_unused_pacas(void) { };
+static inline void allocate_paca(int cpu) { }
+static inline void free_unused_pacas(void) { }
#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 26fe1ae15212..b28fbb1d57eb 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -1,46 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _ASM_POWERPC_PAGE_H
#define _ASM_POWERPC_PAGE_H
/*
* Copyright (C) 2001,2005 IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/bug.h>
#else
#include <asm/types.h>
#endif
-#include <asm/asm-compat.h>
-#include <asm/kdump.h>
+#include <asm/asm-const.h>
/*
* On regular PPC32 page size is 4K (but we support 4K/16K/64K/256K pages
- * on PPC44x). For PPC64 we support either 4K or 64K software
+ * on PPC44x and 4K/16K on 8xx). For PPC64 we support either 4K or 64K software
* page size. When using 64K pages however, whether we are really supporting
* 64K pages in HW or not is irrelevant to those definitions.
*/
-#if defined(CONFIG_PPC_256K_PAGES)
-#define PAGE_SHIFT 18
-#elif defined(CONFIG_PPC_64K_PAGES)
-#define PAGE_SHIFT 16
-#elif defined(CONFIG_PPC_16K_PAGES)
-#define PAGE_SHIFT 14
-#else
-#define PAGE_SHIFT 12
-#endif
+#include <vdso/page.h>
-#define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT)
-
-#ifndef __ASSEMBLY__
-#ifdef CONFIG_HUGETLB_PAGE
-extern unsigned int HPAGE_SHIFT;
-#else
+#ifndef __ASSEMBLER__
+#ifndef CONFIG_HUGETLB_PAGE
#define HPAGE_SHIFT PAGE_SHIFT
+#elif defined(CONFIG_PPC_BOOK3S_64)
+extern unsigned int hpage_shift;
+#define HPAGE_SHIFT hpage_shift
+#elif defined(CONFIG_PPC_8xx)
+#define HPAGE_SHIFT 19 /* 512k pages */
+#elif defined(CONFIG_PPC_E500)
+#define HPAGE_SHIFT 22 /* 4M pages */
#endif
#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT)
#define HPAGE_MASK (~(HPAGE_SIZE - 1))
@@ -49,13 +41,6 @@ extern unsigned int HPAGE_SHIFT;
#endif
/*
- * Subtle: (1 << PAGE_SHIFT) is an int, not an unsigned long. So if we
- * assign PAGE_MASK to a larger type it gets extended the way we want
- * (i.e. with 1s in the high bits)
- */
-#define PAGE_MASK (~((1 << PAGE_SHIFT) - 1))
-
-/*
* KERNELBASE is the virtual address of the start of the kernel, it's often
* the same as PAGE_OFFSET, but _might not be_.
*
@@ -90,16 +75,16 @@ extern unsigned int HPAGE_SHIFT;
#define LOAD_OFFSET ASM_CONST((CONFIG_KERNEL_START-CONFIG_PHYSICAL_START))
#if defined(CONFIG_NONSTATIC_KERNEL)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
extern phys_addr_t memstart_addr;
extern phys_addr_t kernstart_addr;
-#ifdef CONFIG_RELOCATABLE_PPC32
+#if defined(CONFIG_RELOCATABLE) && defined(CONFIG_PPC32)
extern long long virt_phys_offset;
#endif
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#define PHYSICAL_START kernstart_addr
#else /* !CONFIG_NONSTATIC_KERNEL */
@@ -107,12 +92,13 @@ extern long long virt_phys_offset;
#endif
/* See Description below for VIRT_PHYS_OFFSET */
-#ifdef CONFIG_RELOCATABLE_PPC32
+#if defined(CONFIG_PPC32) && defined(CONFIG_BOOKE)
+#ifdef CONFIG_RELOCATABLE
#define VIRT_PHYS_OFFSET virt_phys_offset
#else
#define VIRT_PHYS_OFFSET (KERNELBASE - PHYSICAL_START)
#endif
-
+#endif
#ifdef CONFIG_PPC64
#define MEMORY_START 0UL
@@ -124,21 +110,16 @@ extern long long virt_phys_offset;
#ifdef CONFIG_FLATMEM
#define ARCH_PFN_OFFSET ((unsigned long)(MEMORY_START >> PAGE_SHIFT))
-#define pfn_valid(pfn) ((pfn) >= ARCH_PFN_OFFSET && (pfn) < max_mapnr)
#endif
-#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
-#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
-#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
-
/*
* On Book-E parts we need __va to parse the device tree and we can't
* determine MEMORY_START until then. However we can determine PHYSICAL_START
* from information at hand (program counter, TLB lookup).
*
- * On BookE with RELOCATABLE (RELOCATABLE_PPC32)
+ * On BookE with RELOCATABLE && PPC32
*
- * With RELOCATABLE_PPC32, we support loading the kernel at any physical
+ * With RELOCATABLE && PPC32, we support loading the kernel at any physical
* address without any restriction on the page alignment.
*
* We find the runtime address of _stext and relocate ourselves based on
@@ -204,17 +185,30 @@ extern long long virt_phys_offset;
* On non-Book-E PPC64 PAGE_OFFSET and MEMORY_START are constants so use
* the other definitions for __va & __pa.
*/
-#ifdef CONFIG_BOOKE
+#if defined(CONFIG_PPC32) && defined(CONFIG_BOOKE)
#define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) + VIRT_PHYS_OFFSET))
-#define __pa(x) ((unsigned long)(x) - VIRT_PHYS_OFFSET)
+#define __pa(x) ((phys_addr_t)(unsigned long)(x) - VIRT_PHYS_OFFSET)
#else
#ifdef CONFIG_PPC64
+
+#define VIRTUAL_WARN_ON(x) WARN_ON(IS_ENABLED(CONFIG_DEBUG_VIRTUAL) && (x))
+
/*
* gcc miscompiles (unsigned long)(&static_var) - PAGE_OFFSET
* with -mcmodel=medium, so we use & and | instead of - and + on 64-bit.
+ * This also results in better code generation.
*/
-#define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) | PAGE_OFFSET))
-#define __pa(x) ((unsigned long)(x) & 0x0fffffffffffffffUL)
+#define __va(x) \
+({ \
+ VIRTUAL_WARN_ON((unsigned long)(x) >= PAGE_OFFSET); \
+ (void *)(unsigned long)((phys_addr_t)(x) | PAGE_OFFSET); \
+})
+
+#define __pa(x) \
+({ \
+ VIRTUAL_WARN_ON((unsigned long)(x) < PAGE_OFFSET); \
+ (unsigned long)(x) & 0x0fffffffffffffffUL; \
+})
#else /* 32-bit, non book E */
#define __va(x) ((void *)(unsigned long)((phys_addr_t)(x) + PAGE_OFFSET - MEMORY_START))
@@ -222,16 +216,32 @@ extern long long virt_phys_offset;
#endif
#endif
+#ifndef __ASSEMBLER__
+static inline unsigned long virt_to_pfn(const void *kaddr)
+{
+ return __pa(kaddr) >> PAGE_SHIFT;
+}
+
+static inline const void *pfn_to_kaddr(unsigned long pfn)
+{
+ return __va(pfn << PAGE_SHIFT);
+}
+#endif
+
+#define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr))
+#define virt_addr_valid(vaddr) ({ \
+ unsigned long _addr = (unsigned long)vaddr; \
+ _addr >= PAGE_OFFSET && _addr < (unsigned long)high_memory && \
+ pfn_valid(virt_to_pfn((void *)_addr)); \
+})
+
/*
* Unfortunately the PLT is in the BSS in the PPC32 ELF ABI,
* and needs to be executable. This means the whole heap ends
* up being executable.
*/
-#define VM_DATA_DEFAULT_FLAGS32 (VM_READ | VM_WRITE | VM_EXEC | \
- VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
-
-#define VM_DATA_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | \
- VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+#define VM_DATA_DEFAULT_FLAGS32 VM_DATA_FLAGS_TSK_EXEC
+#define VM_DATA_DEFAULT_FLAGS64 VM_DATA_FLAGS_NON_EXEC
#ifdef __powerpc64__
#include <asm/page_64.h>
@@ -239,158 +249,30 @@ extern long long virt_phys_offset;
#include <asm/page_32.h>
#endif
-/* align addr on a size boundary - adjust address up/down if needed */
-#define _ALIGN_UP(addr,size) (((addr)+((size)-1))&(~((size)-1)))
-#define _ALIGN_DOWN(addr,size) ((addr)&(~((size)-1)))
-
-/* align addr on a size boundary - adjust address up if needed */
-#define _ALIGN(addr,size) _ALIGN_UP(addr,size)
-
/*
* Don't compare things with KERNELBASE or PAGE_OFFSET to test for
* "kernelness", use is_kernel_addr() - it should do what you want.
*/
#ifdef CONFIG_PPC_BOOK3E_64
#define is_kernel_addr(x) ((x) >= 0x8000000000000000ul)
-#else
+#elif defined(CONFIG_PPC_BOOK3S_64)
#define is_kernel_addr(x) ((x) >= PAGE_OFFSET)
-#endif
-
-#ifndef CONFIG_PPC_BOOK3S_64
-/*
- * Use the top bit of the higher-level page table entries to indicate whether
- * the entries we point to contain hugepages. This works because we know that
- * the page tables live in kernel space. If we ever decide to support having
- * page tables at arbitrary addresses, this breaks and will have to change.
- */
-#ifdef CONFIG_PPC64
-#define PD_HUGE 0x8000000000000000
-#else
-#define PD_HUGE 0x80000000
-#endif
-#endif /* CONFIG_PPC_BOOK3S_64 */
-
-/*
- * Some number of bits at the level of the page table that points to
- * a hugepte are used to encode the size. This masks those bits.
- */
-#define HUGEPD_SHIFT_MASK 0x3f
-
-#ifndef __ASSEMBLY__
-
-#undef STRICT_MM_TYPECHECKS
-
-#ifdef STRICT_MM_TYPECHECKS
-/* These are used to make use of C type-checking. */
-
-/* PTE level */
-typedef struct { pte_basic_t pte; } pte_t;
-#define pte_val(x) ((x).pte)
-#define __pte(x) ((pte_t) { (x) })
-
-/* 64k pages additionally define a bigger "real PTE" type that gathers
- * the "second half" part of the PTE for pseudo 64k pages
- */
-#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64)
-typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
-#else
-typedef struct { pte_t pte; } real_pte_t;
-#endif
-
-/* PMD level */
-#ifdef CONFIG_PPC64
-typedef struct { unsigned long pmd; } pmd_t;
-#define pmd_val(x) ((x).pmd)
-#define __pmd(x) ((pmd_t) { (x) })
-
-/* PUD level exusts only on 4k pages */
-#ifndef CONFIG_PPC_64K_PAGES
-typedef struct { unsigned long pud; } pud_t;
-#define pud_val(x) ((x).pud)
-#define __pud(x) ((pud_t) { (x) })
-#endif /* !CONFIG_PPC_64K_PAGES */
-#endif /* CONFIG_PPC64 */
-
-/* PGD level */
-typedef struct { unsigned long pgd; } pgd_t;
-#define pgd_val(x) ((x).pgd)
-#define __pgd(x) ((pgd_t) { (x) })
-
-/* Page protection bits */
-typedef struct { unsigned long pgprot; } pgprot_t;
-#define pgprot_val(x) ((x).pgprot)
-#define __pgprot(x) ((pgprot_t) { (x) })
-
#else
-
-/*
- * .. while these make it easier on the compiler
- */
-
-typedef pte_basic_t pte_t;
-#define pte_val(x) (x)
-#define __pte(x) (x)
-
-#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64)
-typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
-#else
-typedef pte_t real_pte_t;
+#define is_kernel_addr(x) ((x) >= TASK_SIZE)
#endif
+#ifndef __ASSEMBLER__
-#ifdef CONFIG_PPC64
-typedef unsigned long pmd_t;
-#define pmd_val(x) (x)
-#define __pmd(x) (x)
-
-#ifndef CONFIG_PPC_64K_PAGES
-typedef unsigned long pud_t;
-#define pud_val(x) (x)
-#define __pud(x) (x)
-#endif /* !CONFIG_PPC_64K_PAGES */
-#endif /* CONFIG_PPC64 */
-
-typedef unsigned long pgd_t;
-#define pgd_val(x) (x)
-#define pgprot_val(x) (x)
-
-typedef unsigned long pgprot_t;
-#define __pgd(x) (x)
-#define __pgprot(x) (x)
-
-#endif
-
-typedef struct { signed long pd; } hugepd_t;
-
-#ifdef CONFIG_HUGETLB_PAGE
#ifdef CONFIG_PPC_BOOK3S_64
-static inline int hugepd_ok(hugepd_t hpd)
-{
- /*
- * hugepd pointer, bottom two bits == 00 and next 4 bits
- * indicate size of table
- */
- return (((hpd.pd & 0x3) == 0x0) && ((hpd.pd & HUGEPD_SHIFT_MASK) != 0));
-}
+#include <asm/pgtable-be-types.h>
#else
-static inline int hugepd_ok(hugepd_t hpd)
-{
- return (hpd.pd > 0);
-}
+#include <asm/pgtable-types.h>
#endif
-#define is_hugepd(pdep) (hugepd_ok(*((hugepd_t *)(pdep))))
-int pgd_huge(pgd_t pgd);
-#else /* CONFIG_HUGETLB_PAGE */
-#define is_hugepd(pdep) 0
-#define pgd_huge(pgd) 0
-#endif /* CONFIG_HUGETLB_PAGE */
-
struct page;
extern void clear_user_page(void *page, unsigned long vaddr, struct page *pg);
extern void copy_user_page(void *to, void *from, unsigned long vaddr,
struct page *p);
-extern int page_is_ram(unsigned long pfn);
extern int devmem_is_allowed(unsigned long pfn);
#ifdef CONFIG_PPC_SMLPAR
@@ -400,13 +282,14 @@ void arch_free_page(struct page *page, int order);
struct vm_area_struct;
-#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC64)
-typedef pte_t *pgtable_t;
-#else
-typedef struct page *pgtable_t;
-#endif
+extern unsigned long kernstart_virt_addr;
+
+static inline unsigned long kaslr_offset(void)
+{
+ return kernstart_virt_addr - KERNELBASE;
+}
#include <asm-generic/memory_model.h>
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_POWERPC_PAGE_H */
diff --git a/arch/powerpc/include/asm/page_32.h b/arch/powerpc/include/asm/page_32.h
index 68d73b2a7bfc..25482405a811 100644
--- a/arch/powerpc/include/asm/page_32.h
+++ b/arch/powerpc/include/asm/page_32.h
@@ -1,6 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_PAGE_32_H
#define _ASM_POWERPC_PAGE_32_H
+#include <asm/cache.h>
+
#if defined(CONFIG_PHYSICAL_ALIGN) && (CONFIG_PHYSICAL_START != 0)
#if (CONFIG_PHYSICAL_START % CONFIG_PHYSICAL_ALIGN) != 0
#error "CONFIG_PHYSICAL_START must be a multiple of CONFIG_PHYSICAL_ALIGN"
@@ -9,23 +12,14 @@
#define VM_DATA_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS32
-#ifdef CONFIG_NOT_COHERENT_CACHE
-#define ARCH_DMA_MINALIGN L1_CACHE_BYTES
-#endif
-
-#ifdef CONFIG_PTE_64BIT
-#define PTE_FLAGS_OFFSET 4 /* offset of PTE flags, in bytes */
-#else
-#define PTE_FLAGS_OFFSET 0
-#endif
-
-#ifdef CONFIG_PPC_256K_PAGES
+#if defined(CONFIG_PPC_256K_PAGES) || \
+ (defined(CONFIG_PPC_8xx) && defined(CONFIG_PPC_16K_PAGES))
#define PTE_SHIFT (PAGE_SHIFT - PTE_T_LOG2 - 2) /* 1/4 of a page */
#else
#define PTE_SHIFT (PAGE_SHIFT - PTE_T_LOG2) /* full page */
#endif
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* The basic type of a PTE - 64 bits for those CPUs with > 32 bit
* physical addressing.
@@ -36,9 +30,22 @@ typedef unsigned long long pte_basic_t;
typedef unsigned long pte_basic_t;
#endif
-struct page;
-extern void clear_pages(void *page, int order);
-static inline void clear_page(void *page) { clear_pages(page, 0); }
+#include <asm/bug.h>
+
+/*
+ * Clear page using the dcbz instruction, which doesn't cause any
+ * memory traffic (except to write out any cache lines which get
+ * displaced). This only works on cacheable memory.
+ */
+static inline void clear_page(void *addr)
+{
+ unsigned int i;
+
+ WARN_ON((unsigned long)addr & (L1_CACHE_BYTES - 1));
+
+ for (i = 0; i < PAGE_SIZE / L1_CACHE_BYTES; i++, addr += L1_CACHE_BYTES)
+ dcbz(addr);
+}
extern void copy_page(void *to, void *from);
#include <asm-generic/getorder.h>
@@ -46,6 +53,6 @@ extern void copy_page(void *to, void *from);
#define PGD_T_LOG2 (__builtin_ffs(sizeof(pgd_t)) - 1)
#define PTE_T_LOG2 (__builtin_ffs(sizeof(pte_t)) - 1)
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_POWERPC_PAGE_32_H */
diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h
index 88693cef4f3d..0f564a06bf68 100644
--- a/arch/powerpc/include/asm/page_64.h
+++ b/arch/powerpc/include/asm/page_64.h
@@ -1,15 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _ASM_POWERPC_PAGE_64_H
#define _ASM_POWERPC_PAGE_64_H
/*
* Copyright (C) 2001 PPC64 Team, IBM Corp
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
+#include <asm/asm-const.h>
+
/*
* We always define HW_PAGE_SHIFT to 12 as use of 64K pages remains Linux
* specific, every notion of page number shared with the firmware, TCEs,
@@ -37,25 +35,45 @@
#define ESID_MASK_1T 0xffffff0000000000UL
#define GET_ESID_1T(x) (((x) >> SID_SHIFT_1T) & SID_MASK_1T)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/cache.h>
typedef unsigned long pte_basic_t;
-static __inline__ void clear_page(void *addr)
+static inline void clear_page(void *addr)
{
- unsigned long lines, line_size;
-
- line_size = ppc64_caches.dline_size;
- lines = ppc64_caches.dlines_per_page;
-
- __asm__ __volatile__(
+ unsigned long iterations;
+ unsigned long onex, twox, fourx, eightx;
+
+ iterations = ppc64_caches.l1d.blocks_per_page / 8;
+
+ /*
+ * Some verisions of gcc use multiply instructions to
+ * calculate the offsets so lets give it a hand to
+ * do better.
+ */
+ onex = ppc64_caches.l1d.block_size;
+ twox = onex << 1;
+ fourx = onex << 2;
+ eightx = onex << 3;
+
+ asm volatile(
"mtctr %1 # clear_page\n\
-1: dcbz 0,%0\n\
- add %0,%0,%3\n\
+ .balign 16\n\
+1: dcbz 0,%0\n\
+ dcbz %3,%0\n\
+ dcbz %4,%0\n\
+ dcbz %5,%0\n\
+ dcbz %6,%0\n\
+ dcbz %7,%0\n\
+ dcbz %8,%0\n\
+ dcbz %9,%0\n\
+ add %0,%0,%10\n\
bdnz+ 1b"
- : "=r" (addr)
- : "r" (lines), "0" (addr), "r" (line_size)
+ : "=&r" (addr)
+ : "r" (iterations), "0" (addr), "b" (onex), "b" (twox),
+ "b" (twox+onex), "b" (fourx), "b" (fourx+onex),
+ "b" (twox+fourx), "b" (eightx-onex), "r" (eightx)
: "ctr", "memory");
}
@@ -64,84 +82,7 @@ extern void copy_page(void *to, void *from);
/* Log 2 of page table size */
extern u64 ppc64_pft_size;
-#endif /* __ASSEMBLY__ */
-
-#ifdef CONFIG_PPC_MM_SLICES
-
-#define SLICE_LOW_SHIFT 28
-#define SLICE_HIGH_SHIFT 40
-
-#define SLICE_LOW_TOP (0x100000000ul)
-#define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT)
-#define SLICE_NUM_HIGH (PGTABLE_RANGE >> SLICE_HIGH_SHIFT)
-
-#define GET_LOW_SLICE_INDEX(addr) ((addr) >> SLICE_LOW_SHIFT)
-#define GET_HIGH_SLICE_INDEX(addr) ((addr) >> SLICE_HIGH_SHIFT)
-
-/*
- * 1 bit per slice and we have one slice per 1TB
- * Right now we support only 64TB.
- * IF we change this we will have to change the type
- * of high_slices
- */
-#define SLICE_MASK_SIZE 8
-
-#ifndef __ASSEMBLY__
-
-struct slice_mask {
- u16 low_slices;
- u64 high_slices;
-};
-
-struct mm_struct;
-
-extern unsigned long slice_get_unmapped_area(unsigned long addr,
- unsigned long len,
- unsigned long flags,
- unsigned int psize,
- int topdown);
-
-extern unsigned int get_slice_psize(struct mm_struct *mm,
- unsigned long addr);
-
-extern void slice_init_context(struct mm_struct *mm, unsigned int psize);
-extern void slice_set_user_psize(struct mm_struct *mm, unsigned int psize);
-extern void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
- unsigned long len, unsigned int psize);
-
-#define slice_mm_new_context(mm) ((mm)->context.id == MMU_NO_CONTEXT)
-
-#endif /* __ASSEMBLY__ */
-#else
-#define slice_init()
-#ifdef CONFIG_PPC_STD_MMU_64
-#define get_slice_psize(mm, addr) ((mm)->context.user_psize)
-#define slice_set_user_psize(mm, psize) \
-do { \
- (mm)->context.user_psize = (psize); \
- (mm)->context.sllp = SLB_VSID_USER | mmu_psize_defs[(psize)].sllp; \
-} while (0)
-#else /* CONFIG_PPC_STD_MMU_64 */
-#ifdef CONFIG_PPC_64K_PAGES
-#define get_slice_psize(mm, addr) MMU_PAGE_64K
-#else /* CONFIG_PPC_64K_PAGES */
-#define get_slice_psize(mm, addr) MMU_PAGE_4K
-#endif /* !CONFIG_PPC_64K_PAGES */
-#define slice_set_user_psize(mm, psize) do { BUG(); } while(0)
-#endif /* !CONFIG_PPC_STD_MMU_64 */
-
-#define slice_set_range_psize(mm, start, len, psize) \
- slice_set_user_psize((mm), (psize))
-#define slice_mm_new_context(mm) 1
-#endif /* CONFIG_PPC_MM_SLICES */
-
-#ifdef CONFIG_HUGETLB_PAGE
-
-#ifdef CONFIG_PPC_MM_SLICES
-#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
-#endif
-
-#endif /* !CONFIG_HUGETLB_PAGE */
+#endif /* __ASSEMBLER__ */
#define VM_DATA_DEFAULT_FLAGS \
(is_32bit_task() ? \
@@ -153,11 +94,8 @@ do { \
* stack by default, so in the absence of a PT_GNU_STACK program header
* we turn execute permission off.
*/
-#define VM_STACK_DEFAULT_FLAGS32 (VM_READ | VM_WRITE | VM_EXEC | \
- VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
-
-#define VM_STACK_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | \
- VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+#define VM_STACK_DEFAULT_FLAGS32 VM_DATA_FLAGS_EXEC
+#define VM_STACK_DEFAULT_FLAGS64 VM_DATA_FLAGS_NON_EXEC
#define VM_STACK_DEFAULT_FLAGS \
(is_32bit_task() ? \
diff --git a/arch/powerpc/include/asm/papr-sysparm.h b/arch/powerpc/include/asm/papr-sysparm.h
new file mode 100644
index 000000000000..a3b5a0d05db6
--- /dev/null
+++ b/arch/powerpc/include/asm/papr-sysparm.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _ASM_POWERPC_PAPR_SYSPARM_H
+#define _ASM_POWERPC_PAPR_SYSPARM_H
+
+#include <uapi/asm/papr-sysparm.h>
+
+typedef struct {
+ u32 token;
+} papr_sysparm_t;
+
+#define mk_papr_sysparm(x_) ((papr_sysparm_t){ .token = x_, })
+
+/*
+ * Derived from the "Defined Parameters" table in PAPR 7.3.16 System
+ * Parameters Option. Where the spec says "characteristics", we use
+ * "attrs" in the symbolic names to keep them from getting too
+ * unwieldy.
+ */
+#define PAPR_SYSPARM_SHARED_PROC_LPAR_ATTRS mk_papr_sysparm(20)
+#define PAPR_SYSPARM_PROC_MODULE_INFO mk_papr_sysparm(43)
+#define PAPR_SYSPARM_COOP_MEM_OVERCOMMIT_ATTRS mk_papr_sysparm(44)
+#define PAPR_SYSPARM_TLB_BLOCK_INVALIDATE_ATTRS mk_papr_sysparm(50)
+#define PAPR_SYSPARM_LPAR_NAME mk_papr_sysparm(55)
+#define PAPR_SYSPARM_HVPIPE_ENABLE mk_papr_sysparm(64)
+
+/**
+ * struct papr_sysparm_buf - RTAS work area layout for system parameter functions.
+ *
+ * This is the memory layout of the buffers passed to/from
+ * ibm,get-system-parameter and ibm,set-system-parameter. It is
+ * distinct from the papr_sysparm_io_block structure that is passed
+ * between user space and the kernel.
+ */
+struct papr_sysparm_buf {
+ __be16 len;
+ u8 val[PAPR_SYSPARM_MAX_OUTPUT];
+};
+
+struct papr_sysparm_buf *papr_sysparm_buf_alloc(void);
+void papr_sysparm_buf_free(struct papr_sysparm_buf *buf);
+int papr_sysparm_set(papr_sysparm_t param, const struct papr_sysparm_buf *buf);
+int papr_sysparm_get(papr_sysparm_t param, struct papr_sysparm_buf *buf);
+
+#endif /* _ASM_POWERPC_PAPR_SYSPARM_H */
diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h
new file mode 100644
index 000000000000..b78b82d66057
--- /dev/null
+++ b/arch/powerpc/include/asm/paravirt.h
@@ -0,0 +1,223 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_PARAVIRT_H
+#define _ASM_POWERPC_PARAVIRT_H
+
+#include <linux/jump_label.h>
+#include <asm/smp.h>
+#ifdef CONFIG_PPC64
+#include <asm/paca.h>
+#include <asm/lppaca.h>
+#include <asm/hvcall.h>
+#endif
+
+#ifdef CONFIG_PPC_SPLPAR
+#include <linux/smp.h>
+#include <asm/kvm_guest.h>
+#include <asm/cputhreads.h>
+
+DECLARE_STATIC_KEY_FALSE(shared_processor);
+
+static inline bool is_shared_processor(void)
+{
+ return static_branch_unlikely(&shared_processor);
+}
+
+#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
+extern struct static_key paravirt_steal_enabled;
+extern struct static_key paravirt_steal_rq_enabled;
+
+u64 pseries_paravirt_steal_clock(int cpu);
+
+static inline u64 paravirt_steal_clock(int cpu)
+{
+ return pseries_paravirt_steal_clock(cpu);
+}
+#endif
+
+/* If bit 0 is set, the cpu has been ceded, conferred, or preempted */
+static inline u32 yield_count_of(int cpu)
+{
+ __be32 yield_count = READ_ONCE(lppaca_of(cpu).yield_count);
+ return be32_to_cpu(yield_count);
+}
+
+/*
+ * Spinlock code confers and prods, so don't trace the hcalls because the
+ * tracing code takes spinlocks which can cause recursion deadlocks.
+ *
+ * These calls are made while the lock is not held: the lock slowpath yields if
+ * it can not acquire the lock, and unlock slow path might prod if a waiter has
+ * yielded). So this may not be a problem for simple spin locks because the
+ * tracing does not technically recurse on the lock, but we avoid it anyway.
+ *
+ * However the queued spin lock contended path is more strictly ordered: the
+ * H_CONFER hcall is made after the task has queued itself on the lock, so then
+ * recursing on that lock will cause the task to then queue up again behind the
+ * first instance (or worse: queued spinlocks use tricks that assume a context
+ * never waits on more than one spinlock, so such recursion may cause random
+ * corruption in the lock code).
+ */
+static inline void yield_to_preempted(int cpu, u32 yield_count)
+{
+ plpar_hcall_norets_notrace(H_CONFER, get_hard_smp_processor_id(cpu), yield_count);
+}
+
+static inline void prod_cpu(int cpu)
+{
+ plpar_hcall_norets_notrace(H_PROD, get_hard_smp_processor_id(cpu));
+}
+
+static inline void yield_to_any(void)
+{
+ plpar_hcall_norets_notrace(H_CONFER, -1, 0);
+}
+
+static inline bool is_vcpu_idle(int vcpu)
+{
+ return lppaca_of(vcpu).idle;
+}
+
+static inline bool vcpu_is_dispatched(int vcpu)
+{
+ /*
+ * This is the yield_count. An "odd" value (low bit on) means that
+ * the processor is yielded (either because of an OS yield or a
+ * hypervisor preempt). An even value implies that the processor is
+ * currently executing.
+ */
+ return (!(yield_count_of(vcpu) & 1));
+}
+#else
+static inline bool is_shared_processor(void)
+{
+ return false;
+}
+
+static inline u32 yield_count_of(int cpu)
+{
+ return 0;
+}
+
+extern void ___bad_yield_to_preempted(void);
+static inline void yield_to_preempted(int cpu, u32 yield_count)
+{
+ ___bad_yield_to_preempted(); /* This would be a bug */
+}
+
+extern void ___bad_yield_to_any(void);
+static inline void yield_to_any(void)
+{
+ ___bad_yield_to_any(); /* This would be a bug */
+}
+
+extern void ___bad_prod_cpu(void);
+static inline void prod_cpu(int cpu)
+{
+ ___bad_prod_cpu(); /* This would be a bug */
+}
+
+static inline bool is_vcpu_idle(int vcpu)
+{
+ return false;
+}
+static inline bool vcpu_is_dispatched(int vcpu)
+{
+ return true;
+}
+#endif
+
+#define vcpu_is_preempted vcpu_is_preempted
+static inline bool vcpu_is_preempted(int cpu)
+{
+ /*
+ * The dispatch/yield bit alone is an imperfect indicator of
+ * whether the hypervisor has dispatched @cpu to run on a physical
+ * processor. When it is clear, @cpu is definitely not preempted.
+ * But when it is set, it means only that it *might* be, subject to
+ * other conditions. So we check other properties of the VM and
+ * @cpu first, resorting to the yield count last.
+ */
+
+ /*
+ * Hypervisor preemption isn't possible in dedicated processor
+ * mode by definition.
+ */
+ if (!is_shared_processor())
+ return false;
+
+ /*
+ * If the hypervisor has dispatched the target CPU on a physical
+ * processor, then the target CPU is definitely not preempted.
+ */
+ if (vcpu_is_dispatched(cpu))
+ return false;
+
+ /*
+ * if the target CPU is not dispatched and the guest OS
+ * has not marked the CPU idle, then it is hypervisor preempted.
+ */
+ if (!is_vcpu_idle(cpu))
+ return true;
+
+#ifdef CONFIG_PPC_SPLPAR
+ if (!is_kvm_guest()) {
+ int first_cpu, i;
+
+ /*
+ * The result of vcpu_is_preempted() is used in a
+ * speculative way, and is always subject to invalidation
+ * by events internal and external to Linux. While we can
+ * be called in preemptable context (in the Linux sense),
+ * we're not accessing per-cpu resources in a way that can
+ * race destructively with Linux scheduler preemption and
+ * migration, and callers can tolerate the potential for
+ * error introduced by sampling the CPU index without
+ * pinning the task to it. So it is permissible to use
+ * raw_smp_processor_id() here to defeat the preempt debug
+ * warnings that can arise from using smp_processor_id()
+ * in arbitrary contexts.
+ */
+ first_cpu = cpu_first_thread_sibling(raw_smp_processor_id());
+
+ /*
+ * The PowerVM hypervisor dispatches VMs on a whole core
+ * basis. So we know that a thread sibling of the executing CPU
+ * cannot have been preempted by the hypervisor, even if it
+ * has called H_CONFER, which will set the yield bit.
+ */
+ if (cpu_first_thread_sibling(cpu) == first_cpu)
+ return false;
+
+ /*
+ * The specific target CPU was marked by guest OS as idle, but
+ * then also check all other cpus in the core for PowerVM
+ * because it does core scheduling and one of the vcpu
+ * of the core getting preempted by hypervisor implies
+ * other vcpus can also be considered preempted.
+ */
+ first_cpu = cpu_first_thread_sibling(cpu);
+ for (i = first_cpu; i < first_cpu + threads_per_core; i++) {
+ if (i == cpu)
+ continue;
+ if (vcpu_is_dispatched(i))
+ return false;
+ if (!is_vcpu_idle(i))
+ return true;
+ }
+ }
+#endif
+
+ /*
+ * None of the threads in target CPU's core are running but none of
+ * them were preempted too. Hence assume the target CPU to be
+ * non-preempted.
+ */
+ return false;
+}
+
+static inline bool pv_is_native_spin_unlock(void)
+{
+ return !is_shared_processor();
+}
+
+#endif /* _ASM_POWERPC_PARAVIRT_H */
diff --git a/arch/powerpc/include/asm/paravirt_api_clock.h b/arch/powerpc/include/asm/paravirt_api_clock.h
new file mode 100644
index 000000000000..d25ca7ac57c7
--- /dev/null
+++ b/arch/powerpc/include/asm/paravirt_api_clock.h
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm/paravirt.h>
diff --git a/arch/powerpc/include/asm/parport.h b/arch/powerpc/include/asm/parport.h
index a452968b29ea..42cc321ed754 100644
--- a/arch/powerpc/include/asm/parport.h
+++ b/arch/powerpc/include/asm/parport.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* parport.h: platform-specific PC-style parport initialisation
*
@@ -10,7 +11,7 @@
#define _ASM_POWERPC_PARPORT_H
#ifdef __KERNEL__
-#include <asm/prom.h>
+#include <linux/of_irq.h>
static int parport_pc_find_nonpci_ports (int autoirq, int autodma)
{
@@ -28,7 +29,7 @@ static int parport_pc_find_nonpci_ports (int autoirq, int autodma)
io1 = prop[1]; io2 = prop[2];
virq = irq_of_parse_and_map(np, 0);
- if (virq == NO_IRQ)
+ if (!virq)
continue;
if (parport_pc_probe_port(io1, io2, virq, autodma, NULL, 0)
diff --git a/arch/powerpc/include/asm/pasemi_dma.h b/arch/powerpc/include/asm/pasemi_dma.h
index eafa5a5f56de..712a0b32120f 100644
--- a/arch/powerpc/include/asm/pasemi_dma.h
+++ b/arch/powerpc/include/asm/pasemi_dma.h
@@ -1,22 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2006-2008 PA Semi, Inc
*
* Hardware register layout and descriptor formats for the on-board
* DMA engine on PA Semi PWRficient. Used by ethernet, function and security
* drivers.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef ASM_PASEMI_DMA_H
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index 4ca90a39d6d0..1dae53130782 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -1,20 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _ASM_POWERPC_PCI_BRIDGE_H
#define _ASM_POWERPC_PCI_BRIDGE_H
#ifdef __KERNEL__
/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/pci.h>
#include <linux/list.h>
#include <linux/ioport.h>
-#include <asm-generic/pci-bridge.h>
+#include <linux/numa.h>
+#include <linux/iommu.h>
struct device_node;
/*
+ * PCI controller operations
+ */
+struct pci_controller_ops {
+ void (*dma_dev_setup)(struct pci_dev *pdev);
+ void (*dma_bus_setup)(struct pci_bus *bus);
+ bool (*iommu_bypass_supported)(struct pci_dev *pdev,
+ u64 mask);
+
+ int (*probe_mode)(struct pci_bus *bus);
+
+ /* Called when pci_enable_device() is called. Returns true to
+ * allow assignment/enabling of the device. */
+ bool (*enable_device_hook)(struct pci_dev *pdev);
+
+ void (*disable_device)(struct pci_dev *pdev);
+
+ void (*release_device)(struct pci_dev *pdev);
+
+ /* Called during PCI resource reassignment */
+ resource_size_t (*window_alignment)(struct pci_bus *bus,
+ unsigned long type);
+ void (*setup_bridge)(struct pci_bus *bus,
+ unsigned long type);
+ void (*reset_secondary_bus)(struct pci_dev *pdev);
+
+#ifdef CONFIG_PCI_MSI
+ int (*setup_msi_irqs)(struct pci_dev *pdev,
+ int nvec, int type);
+ void (*teardown_msi_irqs)(struct pci_dev *pdev);
+#endif
+
+ void (*shutdown)(struct pci_controller *hose);
+
+ struct iommu_group *(*device_group)(struct pci_controller *hose,
+ struct pci_dev *pdev);
+};
+
+/*
* Structure of a PCI controller (host bridge)
*/
struct pci_controller {
@@ -34,7 +70,7 @@ struct pci_controller {
void __iomem *io_base_virt;
#ifdef CONFIG_PPC64
- void *io_base_alloc;
+ void __iomem *io_base_alloc;
#endif
resource_size_t io_base_phys;
resource_size_t pci_io_size;
@@ -46,6 +82,7 @@ struct pci_controller {
resource_size_t isa_mem_phys;
resource_size_t isa_mem_size;
+ struct pci_controller_ops controller_ops;
struct pci_ops *ops;
unsigned int __iomem *cfg_addr;
void __iomem *cfg_data;
@@ -89,9 +126,16 @@ struct pci_controller {
#ifdef CONFIG_PPC64
unsigned long buid;
+ struct pci_dn *pci_data;
#endif /* CONFIG_PPC64 */
void *private_data;
+
+ /* IRQ domain hierarchy */
+ struct irq_domain *dev_domain;
+
+ /* iommu_ops support */
+ struct iommu_device iommu;
};
/* These are used for config access before all the PCI probing
@@ -119,6 +163,10 @@ extern void setup_indirect_pci(struct pci_controller* hose,
extern int indirect_read_config(struct pci_bus *bus, unsigned int devfn,
int offset, int len, u32 *val);
+extern int __indirect_read_config(struct pci_controller *hose,
+ unsigned char bus_number, unsigned int devfn,
+ int offset, int len, u32 *val);
+
extern int indirect_write_config(struct pci_bus *bus, unsigned int devfn,
int offset, int len, u32 val);
@@ -127,19 +175,17 @@ static inline struct pci_controller *pci_bus_to_host(const struct pci_bus *bus)
return bus->sysdata;
}
-#ifndef CONFIG_PPC64
-
+#ifdef CONFIG_PPC_PMAC
extern int pci_device_from_OF_node(struct device_node *node,
u8 *bus, u8 *devfn);
-extern void pci_create_OF_bus_map(void);
+#endif
+#ifndef CONFIG_PPC64
-static inline int isa_vaddr_is_ioport(void __iomem *address)
-{
- /* No specific ISA handling on ppc32 at this stage, it
- * all goes through PCI
- */
- return 0;
-}
+#ifdef CONFIG_PPC_PCI_OF_BUS_MAP
+extern void pci_create_OF_bus_map(void);
+#else
+static inline void pci_create_OF_bus_map(void) {}
+#endif
#else /* CONFIG_PPC64 */
@@ -150,79 +196,73 @@ static inline int isa_vaddr_is_ioport(void __iomem *address)
struct iommu_table;
struct pci_dn {
+ int flags;
+#define PCI_DN_FLAG_IOV_VF 0x01
+#define PCI_DN_FLAG_DEAD 0x02 /* Device has been hot-removed */
+
int busno; /* pci bus number */
int devfn; /* pci device and function number */
+ int vendor_id; /* Vendor ID */
+ int device_id; /* Device ID */
+ int class_code; /* Device class code */
+ struct pci_dn *parent;
struct pci_controller *phb; /* for pci devices */
- struct iommu_table *iommu_table; /* for phb's or bridges */
- struct device_node *node; /* back-pointer to the device_node */
+ struct iommu_table_group *table_group; /* for phb's or bridges */
int pci_ext_config_space; /* for pci devices */
-
- bool force_32bit_msi;
-
- struct pci_dev *pcidev; /* back-pointer to the pci device */
#ifdef CONFIG_EEH
struct eeh_dev *edev; /* eeh device */
#endif
-#define IODA_INVALID_PE (-1)
-#ifdef CONFIG_PPC_POWERNV
- int pe_number;
-#endif
+#define IODA_INVALID_PE 0xFFFFFFFF
+ unsigned int pe_number;
+#ifdef CONFIG_PCI_IOV
+ u16 vfs_expanded; /* number of VFs IOV BAR expanded */
+ u16 num_vfs; /* number of VFs enabled*/
+ unsigned int *pe_num_map; /* PE# for the first VF PE or array */
+ bool m64_single_mode; /* Use M64 BAR in Single Mode */
+#define IODA_INVALID_M64 (-1)
+ int (*m64_map)[PCI_SRIOV_NUM_BARS]; /* Only used on powernv */
+ int last_allow_rc; /* Only used on pseries */
+#endif /* CONFIG_PCI_IOV */
+ int mps; /* Maximum Payload Size */
+ struct list_head child_list;
+ struct list_head list;
+ struct resource holes[PCI_SRIOV_NUM_BARS];
};
/* Get the pointer to a device_node's pci_dn */
#define PCI_DN(dn) ((struct pci_dn *) (dn)->data)
+extern struct pci_dn *pci_get_pdn_by_devfn(struct pci_bus *bus,
+ int devfn);
extern struct pci_dn *pci_get_pdn(struct pci_dev *pdev);
+extern struct pci_dn *pci_add_device_node_info(struct pci_controller *hose,
+ struct device_node *dn);
+extern void pci_remove_device_node_info(struct device_node *dn);
-extern void * update_dn_pci_info(struct device_node *dn, void *data);
-
-static inline int pci_device_from_OF_node(struct device_node *np,
- u8 *bus, u8 *devfn)
-{
- if (!PCI_DN(np))
- return -ENODEV;
- *bus = PCI_DN(np)->busno;
- *devfn = PCI_DN(np)->devfn;
- return 0;
-}
+#ifdef CONFIG_PCI_IOV
+struct pci_dn *add_sriov_vf_pdns(struct pci_dev *pdev);
+void remove_sriov_vf_pdns(struct pci_dev *pdev);
+#endif
#if defined(CONFIG_EEH)
-static inline struct eeh_dev *of_node_to_eeh_dev(struct device_node *dn)
+static inline struct eeh_dev *pdn_to_eeh_dev(struct pci_dn *pdn)
{
- /*
- * For those OF nodes whose parent isn't PCI bridge, they
- * don't have PCI_DN actually. So we have to skip them for
- * any EEH operations.
- */
- if (!dn || !PCI_DN(dn))
- return NULL;
-
- return PCI_DN(dn)->edev;
+ return pdn ? pdn->edev : NULL;
}
#else
-#define of_node_to_eeh_dev(x) (NULL)
+#define pdn_to_eeh_dev(x) (NULL)
#endif
/** Find the bus corresponding to the indicated device node */
-extern struct pci_bus *pcibios_find_pci_bus(struct device_node *dn);
+extern struct pci_bus *pci_find_bus_by_node(struct device_node *dn);
/** Remove all of the PCI devices under this bus */
-extern void pcibios_remove_pci_devices(struct pci_bus *bus);
+extern void pci_hp_remove_devices(struct pci_bus *bus);
/** Discover new pci devices under this bus, and add them */
-extern void pcibios_add_pci_devices(struct pci_bus *bus);
-
-
-extern void isa_bridge_find_early(struct pci_controller *hose);
-
-static inline int isa_vaddr_is_ioport(void __iomem *address)
-{
- /* Check if address hits the reserved legacy IO range */
- unsigned long ea = (unsigned long)address;
- return ea >= ISA_IO_BASE && ea < ISA_IO_END;
-}
+extern void pci_hp_add_devices(struct pci_bus *bus);
extern int pcibios_unmap_io_space(struct pci_bus *bus);
extern int pcibios_map_io_space(struct pci_bus *bus);
@@ -230,7 +270,7 @@ extern int pcibios_map_io_space(struct pci_bus *bus);
#ifdef CONFIG_NUMA
#define PHB_SET_NODE(PHB, NODE) ((PHB)->node = (NODE))
#else
-#define PHB_SET_NODE(PHB, NODE) ((PHB)->node = -1)
+#define PHB_SET_NODE(PHB, NODE) ((PHB)->node = NUMA_NO_NODE)
#endif
#endif /* CONFIG_PPC64 */
@@ -239,6 +279,8 @@ extern int pcibios_map_io_space(struct pci_bus *bus);
extern struct pci_controller *pci_find_hose_for_OF_device(
struct device_node* node);
+extern struct pci_controller *pci_find_controller_for_domain(int domain_nr);
+
/* Fill up host controller resources from the OF node */
extern void pci_process_bridge_OF_ranges(struct pci_controller *hose,
struct device_node *dev, int primary);
@@ -246,6 +288,7 @@ extern void pci_process_bridge_OF_ranges(struct pci_controller *hose,
/* Allocate & free a PCI host bridge structure */
extern struct pci_controller *pcibios_alloc_controller(struct device_node *dev);
extern void pcibios_free_controller(struct pci_controller *phb);
+extern void pcibios_free_controller_deferred(struct pci_host_bridge *bridge);
#ifdef CONFIG_PCI
extern int pcibios_vaddr_is_ioport(void __iomem *address);
diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index 1b0739bc14b5..46a9c4491ed0 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -1,28 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef __ASM_POWERPC_PCI_H
#define __ASM_POWERPC_PCI_H
#ifdef __KERNEL__
/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/string.h>
-#include <linux/dma-mapping.h>
+#include <linux/dma-map-ops.h>
+#include <linux/scatterlist.h>
#include <asm/machdep.h>
-#include <asm/scatterlist.h>
#include <asm/io.h>
-#include <asm/prom.h>
#include <asm/pci-bridge.h>
-#include <asm-generic/pci-dma-compat.h>
-
-/* Return values for ppc_md.pci_probe_mode function */
+/* Return values for pci_controller_ops.probe_mode function */
#define PCI_PROBE_NONE -1 /* Don't look at this bus at all */
#define PCI_PROBE_NORMAL 0 /* Do normal PCI probing */
#define PCI_PROBE_DEVTREE 1 /* Instantiate from device tree */
@@ -30,8 +24,6 @@
#define PCIBIOS_MIN_IO 0x1000
#define PCIBIOS_MIN_MEM 0x10000000
-struct pci_dev;
-
/* Values for the `which' argument to sys_pciconfig_iobase syscall. */
#define IOBASE_BRIDGE_NUMBER 0
#define IOBASE_MEMORY 1
@@ -46,7 +38,6 @@ struct pci_dev;
#define pcibios_assign_all_busses() \
(pci_has_flag(PCI_REASSIGN_ALL_BUS))
-#define HAVE_ARCH_PCI_GET_LEGACY_IDE_IRQ
static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
{
if (ppc_md.pci_get_legacy_ide_irq)
@@ -55,11 +46,9 @@ static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
}
#ifdef CONFIG_PCI
-extern void set_pci_dma_ops(struct dma_map_ops *dma_ops);
-extern struct dma_map_ops *get_pci_dma_ops(void);
+void __init set_pci_dma_ops(const struct dma_map_ops *dma_ops);
#else /* CONFIG_PCI */
#define set_pci_dma_ops(d)
-#define get_pci_dma_ops() NULL
#endif
#ifdef CONFIG_PPC64
@@ -71,36 +60,6 @@ extern struct dma_map_ops *get_pci_dma_ops(void);
*/
#define PCI_DISABLE_MWI
-#ifdef CONFIG_PCI
-static inline void pci_dma_burst_advice(struct pci_dev *pdev,
- enum pci_dma_burst_strategy *strat,
- unsigned long *strategy_parameter)
-{
- unsigned long cacheline_size;
- u8 byte;
-
- pci_read_config_byte(pdev, PCI_CACHE_LINE_SIZE, &byte);
- if (byte == 0)
- cacheline_size = 1024;
- else
- cacheline_size = (int) byte * 4;
-
- *strat = PCI_DMA_BURST_MULTIPLE;
- *strategy_parameter = cacheline_size;
-}
-#endif
-
-#else /* 32-bit */
-
-#ifdef CONFIG_PCI
-static inline void pci_dma_burst_advice(struct pci_dev *pdev,
- enum pci_dma_burst_strategy *strat,
- unsigned long *strategy_parameter)
-{
- *strat = PCI_DMA_BURST_INFINITY;
- *strategy_parameter = ~0UL;
-}
-#endif
#endif /* CONFIG_PPC64 */
extern int pci_domain_nr(struct pci_bus *bus);
@@ -109,12 +68,12 @@ extern int pci_domain_nr(struct pci_bus *bus);
extern int pci_proc_domain(struct pci_bus *bus);
struct vm_area_struct;
-/* Map a range of PCI memory or I/O space for a device into user space */
-int pci_mmap_page_range(struct pci_dev *pdev, struct vm_area_struct *vma,
- enum pci_mmap_state mmap_state, int write_combine);
-/* Tell drivers/pci/proc.c that we have pci_mmap_page_range() */
-#define HAVE_PCI_MMAP 1
+/* Tell PCI code what kind of PCI resource mappings we support */
+#define HAVE_PCI_MMAP 1
+#define ARCH_GENERIC_PCI_MMAP_RESOURCE 1
+#define arch_can_pci_mmap_io() 1
+#define arch_can_pci_mmap_wc() 1
extern int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val,
size_t count);
@@ -123,27 +82,10 @@ extern int pci_legacy_write(struct pci_bus *bus, loff_t port, u32 val,
extern int pci_mmap_legacy_page_range(struct pci_bus *bus,
struct vm_area_struct *vma,
enum pci_mmap_state mmap_state);
-
+extern void pci_adjust_legacy_attr(struct pci_bus *bus,
+ enum pci_mmap_state mmap_type);
#define HAVE_PCI_LEGACY 1
-#ifdef CONFIG_PPC64
-
-/* The PCI address space does not equal the physical memory address
- * space (we have an IOMMU). The IDE and SCSI device layers use
- * this boolean for bounce buffer decisions.
- */
-#define PCI_DMA_BUS_IS_PHYS (0)
-
-#else /* 32-bit */
-
-/* The PCI address space does equal the physical memory
- * address space (no IOMMU). The IDE and SCSI device layers use
- * this boolean for bounce buffer decisions.
- */
-#define PCI_DMA_BUS_IS_PHYS (1)
-
-#endif /* CONFIG_PPC64 */
-
extern void pcibios_claim_one_bus(struct pci_bus *b);
extern void pcibios_finish_adding_to_bus(struct pci_bus *bus);
@@ -156,27 +98,22 @@ extern int remove_phb_dynamic(struct pci_controller *phb);
extern struct pci_dev *of_create_pci_dev(struct device_node *node,
struct pci_bus *bus, int devfn);
+extern unsigned int pci_parse_of_flags(u32 addr0, int bridge);
+
extern void of_scan_pci_bridge(struct pci_dev *dev);
extern void of_scan_bus(struct device_node *node, struct pci_bus *bus);
extern void of_rescan_bus(struct device_node *node, struct pci_bus *bus);
-struct file;
-extern pgprot_t pci_phys_mem_access_prot(struct file *file,
- unsigned long pfn,
+extern pgprot_t pci_phys_mem_access_prot(unsigned long pfn,
unsigned long size,
pgprot_t prot);
-#define HAVE_ARCH_PCI_RESOURCE_TO_USER
-extern void pci_resource_to_user(const struct pci_dev *dev, int bar,
- const struct resource *rsrc,
- resource_size_t *start, resource_size_t *end);
-
extern resource_size_t pcibios_io_space_offset(struct pci_controller *hose);
-extern void pcibios_setup_bus_devices(struct pci_bus *bus);
extern void pcibios_setup_bus_self(struct pci_bus *bus);
extern void pcibios_setup_phb_io_space(struct pci_controller *hose);
extern void pcibios_scan_phb(struct pci_controller *hose);
#endif /* __KERNEL__ */
+
#endif /* __ASM_POWERPC_PCI_H */
diff --git a/arch/powerpc/include/asm/percpu.h b/arch/powerpc/include/asm/percpu.h
index 2cedefddba37..ecf5ac70cfae 100644
--- a/arch/powerpc/include/asm/percpu.h
+++ b/arch/powerpc/include/asm/percpu.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_PERCPU_H_
#define _ASM_POWERPC_PERCPU_H_
#ifdef __powerpc64__
@@ -9,13 +10,23 @@
#ifdef CONFIG_SMP
-#include <asm/paca.h>
-
#define __my_cpu_offset local_paca->data_offset
#endif /* CONFIG_SMP */
#endif /* __powerpc64__ */
+#if defined(CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK) && defined(CONFIG_SMP)
+#include <linux/jump_label.h>
+DECLARE_STATIC_KEY_FALSE(__percpu_first_chunk_is_paged);
+
+#define percpu_first_chunk_is_paged \
+ (static_key_enabled(&__percpu_first_chunk_is_paged.key))
+#else
+#define percpu_first_chunk_is_paged false
+#endif
+
#include <asm-generic/percpu.h>
+#include <asm/paca.h>
+
#endif /* _ASM_POWERPC_PERCPU_H_ */
diff --git a/arch/powerpc/include/asm/perf_event.h b/arch/powerpc/include/asm/perf_event.h
index 0bb23725b1e7..164e910bf654 100644
--- a/arch/powerpc/include/asm/perf_event.h
+++ b/arch/powerpc/include/asm/perf_event.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Performance event support - hardware-specific disambiguation
*
@@ -7,15 +8,13 @@
* devices other than the core which provide their own performance counters.
*
* Copyright 2010 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#ifdef CONFIG_PPC_PERF_CTRS
#include <asm/perf_event_server.h>
+#else
+static inline bool is_sier_available(void) { return false; }
+static inline unsigned long get_pmcs_ext_regs(int idx) { return 0; }
#endif
#ifdef CONFIG_FSL_EMB_PERF_EVENT
@@ -26,6 +25,8 @@
#include <asm/ptrace.h>
#include <asm/reg.h>
+#define perf_arch_bpf_user_pt_regs(regs) &regs->user_regs
+
/*
* Overload regs->result to specify whether we should use the MSR (result
* is zero) or the SIAR (result is non zero).
@@ -34,7 +35,14 @@
do { \
(regs)->result = 0; \
(regs)->nip = __ip; \
- (regs)->gpr[1] = *(unsigned long *)__get_SP(); \
+ (regs)->gpr[1] = current_stack_frame(); \
asm volatile("mfmsr %0" : "=r" ((regs)->msr)); \
} while (0)
+
+/* To support perf_regs sier update */
+extern bool is_sier_available(void);
+extern unsigned long get_pmcs_ext_regs(int idx);
+/* To define perf extended regs mask value */
+extern u64 PERF_REG_EXTENDED_MASK;
+#define PERF_REG_EXTENDED_MASK PERF_REG_EXTENDED_MASK
#endif
diff --git a/arch/powerpc/include/asm/perf_event_fsl_emb.h b/arch/powerpc/include/asm/perf_event_fsl_emb.h
index a58165450f6f..c4d9ceb03e8f 100644
--- a/arch/powerpc/include/asm/perf_event_fsl_emb.h
+++ b/arch/powerpc/include/asm/perf_event_fsl_emb.h
@@ -1,13 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Performance event support - Freescale embedded specific definitions.
*
* Copyright 2008-2009 Paul Mackerras, IBM Corporation.
* Copyright 2010 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/types.h>
diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h
index 814622146d5a..af0f46e2373b 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -1,12 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Performance event support - PowerPC classic/server specific definitions.
*
* Copyright 2008-2009 Paul Mackerras, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/types.h>
@@ -21,6 +17,13 @@
struct perf_event;
+struct mmcr_regs {
+ unsigned long mmcr0;
+ unsigned long mmcr1;
+ unsigned long mmcr2;
+ unsigned long mmcra;
+ unsigned long mmcr3;
+};
/*
* This struct provides the constants and functions needed to
* describe the PMU on a particular POWER-family CPU.
@@ -32,26 +35,44 @@ struct power_pmu {
unsigned long add_fields;
unsigned long test_adder;
int (*compute_mmcr)(u64 events[], int n_ev,
- unsigned int hwc[], unsigned long mmcr[],
- struct perf_event *pevents[]);
+ unsigned int hwc[], struct mmcr_regs *mmcr,
+ struct perf_event *pevents[], u32 flags);
int (*get_constraint)(u64 event_id, unsigned long *mskp,
- unsigned long *valp);
+ unsigned long *valp, u64 event_config1);
int (*get_alternatives)(u64 event_id, unsigned int flags,
u64 alt[]);
+ void (*get_mem_data_src)(union perf_mem_data_src *dsrc,
+ u32 flags, struct pt_regs *regs);
+ void (*get_mem_weight)(u64 *weight, u64 type);
+ unsigned long group_constraint_mask;
+ unsigned long group_constraint_val;
u64 (*bhrb_filter_map)(u64 branch_sample_type);
void (*config_bhrb)(u64 pmu_bhrb_filter);
- void (*disable_pmc)(unsigned int pmc, unsigned long mmcr[]);
+ void (*disable_pmc)(unsigned int pmc, struct mmcr_regs *mmcr);
int (*limited_pmc_event)(u64 event_id);
u32 flags;
const struct attribute_group **attr_groups;
int n_generic;
int *generic_events;
- int (*cache_events)[PERF_COUNT_HW_CACHE_MAX]
+ u64 (*cache_events)[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX];
+ int n_blacklist_ev;
+ int *blacklist_ev;
/* BHRB entries in the PMU */
int bhrb_nr;
+ /*
+ * set this flag with `PERF_PMU_CAP_EXTENDED_REGS` if
+ * the pmu supports extended perf regs capability
+ */
+ int capabilities;
+ /*
+ * Function to check event code for values which are
+ * reserved. Function takes struct perf_event as input,
+ * since event code could be spread in attr.config*
+ */
+ int (*check_attr_config)(struct perf_event *ev);
};
/*
@@ -65,6 +86,11 @@ struct power_pmu {
#define PPMU_HAS_SSLOT 0x00000020 /* Has sampled slot in MMCRA */
#define PPMU_HAS_SIER 0x00000040 /* Has SIER */
#define PPMU_ARCH_207S 0x00000080 /* PMC is architecture v2.07S */
+#define PPMU_NO_SIAR 0x00000100 /* Do not use SIAR */
+#define PPMU_ARCH_31 0x00000200 /* Has MMCR3, SIER2 and SIER3 */
+#define PPMU_P10_DD1 0x00000400 /* Is power10 DD1 processor version */
+#define PPMU_P10 0x00000800 /* For power10 pmu */
+#define PPMU_HAS_ATTR_CONFIG1 0x00001000 /* Using config1 attribute */
/*
* Values for flags to get_alternatives()
@@ -73,11 +99,11 @@ struct power_pmu {
#define PPMU_LIMITED_PMC_REQD 2 /* have to put this on a limited PMC */
#define PPMU_ONLY_COUNT_RUN 4 /* only counting in run state */
-extern int register_power_pmu(struct power_pmu *);
+int __init register_power_pmu(struct power_pmu *pmu);
struct pt_regs;
-extern unsigned long perf_misc_flags(struct pt_regs *regs);
-extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
+extern unsigned long perf_arch_misc_flags(struct pt_regs *regs);
+extern unsigned long perf_arch_instruction_pointer(struct pt_regs *regs);
extern unsigned long int read_bhrb(int n);
/*
@@ -85,7 +111,7 @@ extern unsigned long int read_bhrb(int n);
* if we have hardware PMU support.
*/
#ifdef CONFIG_PPC_PERF_CTRS
-#define perf_misc_flags(regs) perf_misc_flags(regs)
+#define perf_arch_misc_flags(regs) perf_arch_misc_flags(regs)
#endif
/*
@@ -136,16 +162,24 @@ extern ssize_t power_events_sysfs_show(struct device *dev,
* event 'cpu-cycles' can have two entries in sysfs: 'cpu-cycles' and
* 'PM_CYC' where the latter is the name by which the event is known in
* POWER CPU specification.
+ *
+ * Similarly, some hardware and cache events use the same event code. Eg.
+ * on POWER8, both "cache-references" and "L1-dcache-loads" events refer
+ * to the same event, PM_LD_REF_L1. The suffix, allows us to have two
+ * sysfs objects for the same event and thus two entries/aliases in sysfs.
*/
#define EVENT_VAR(_id, _suffix) event_attr_##_id##_suffix
#define EVENT_PTR(_id, _suffix) &EVENT_VAR(_id, _suffix).attr.attr
#define EVENT_ATTR(_name, _id, _suffix) \
- PMU_EVENT_ATTR(_name, EVENT_VAR(_id, _suffix), PME_##_id, \
+ PMU_EVENT_ATTR(_name, EVENT_VAR(_id, _suffix), _id, \
power_events_sysfs_show)
#define GENERIC_EVENT_ATTR(_name, _id) EVENT_ATTR(_name, _id, _g)
#define GENERIC_EVENT_PTR(_id) EVENT_PTR(_id, _g)
+#define CACHE_EVENT_ATTR(_name, _id) EVENT_ATTR(_name, _id, _c)
+#define CACHE_EVENT_PTR(_id) EVENT_PTR(_id, _c)
+
#define POWER_EVENT_ATTR(_name, _id) EVENT_ATTR(_name, _id, _p)
#define POWER_EVENT_PTR(_id) EVENT_PTR(_id, _p)
diff --git a/arch/powerpc/include/asm/pgalloc-32.h b/arch/powerpc/include/asm/pgalloc-32.h
deleted file mode 100644
index 842846c1b711..000000000000
--- a/arch/powerpc/include/asm/pgalloc-32.h
+++ /dev/null
@@ -1,91 +0,0 @@
-#ifndef _ASM_POWERPC_PGALLOC_32_H
-#define _ASM_POWERPC_PGALLOC_32_H
-
-#include <linux/threads.h>
-
-/* For 32-bit, all levels of page tables are just drawn from get_free_page() */
-#define MAX_PGTABLE_INDEX_SIZE 0
-
-extern void __bad_pte(pmd_t *pmd);
-
-extern pgd_t *pgd_alloc(struct mm_struct *mm);
-extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
-
-/*
- * We don't have any real pmd's, and this code never triggers because
- * the pgd will always be present..
- */
-/* #define pmd_alloc_one(mm,address) ({ BUG(); ((pmd_t *)2); }) */
-#define pmd_free(mm, x) do { } while (0)
-#define __pmd_free_tlb(tlb,x,a) do { } while (0)
-/* #define pgd_populate(mm, pmd, pte) BUG() */
-
-#ifndef CONFIG_BOOKE
-#define pmd_populate_kernel(mm, pmd, pte) \
- (pmd_val(*(pmd)) = __pa(pte) | _PMD_PRESENT)
-#define pmd_populate(mm, pmd, pte) \
- (pmd_val(*(pmd)) = (page_to_pfn(pte) << PAGE_SHIFT) | _PMD_PRESENT)
-#define pmd_pgtable(pmd) pmd_page(pmd)
-#else
-#define pmd_populate_kernel(mm, pmd, pte) \
- (pmd_val(*(pmd)) = (unsigned long)pte | _PMD_PRESENT)
-#define pmd_populate(mm, pmd, pte) \
- (pmd_val(*(pmd)) = (unsigned long)lowmem_page_address(pte) | _PMD_PRESENT)
-#define pmd_pgtable(pmd) pmd_page(pmd)
-#endif
-
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr);
-extern pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long addr);
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
- free_page((unsigned long)pte);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
-{
- pgtable_page_dtor(ptepage);
- __free_page(ptepage);
-}
-
-static inline void pgtable_free(void *table, unsigned index_size)
-{
- BUG_ON(index_size); /* 32-bit doesn't use this */
- free_page((unsigned long)table);
-}
-
-#define check_pgt_cache() do { } while (0)
-
-#ifdef CONFIG_SMP
-static inline void pgtable_free_tlb(struct mmu_gather *tlb,
- void *table, int shift)
-{
- unsigned long pgf = (unsigned long)table;
- BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
- pgf |= shift;
- tlb_remove_table(tlb, (void *)pgf);
-}
-
-static inline void __tlb_remove_table(void *_table)
-{
- void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
- unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
-
- pgtable_free(table, shift);
-}
-#else
-static inline void pgtable_free_tlb(struct mmu_gather *tlb,
- void *table, int shift)
-{
- pgtable_free(table, shift);
-}
-#endif
-
-static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
- unsigned long address)
-{
- tlb_flush_pgtable(tlb, address);
- pgtable_page_dtor(table);
- pgtable_free_tlb(tlb, page_address(table), 0);
-}
-#endif /* _ASM_POWERPC_PGALLOC_32_H */
diff --git a/arch/powerpc/include/asm/pgalloc-64.h b/arch/powerpc/include/asm/pgalloc-64.h
deleted file mode 100644
index 4b0be20fcbfd..000000000000
--- a/arch/powerpc/include/asm/pgalloc-64.h
+++ /dev/null
@@ -1,245 +0,0 @@
-#ifndef _ASM_POWERPC_PGALLOC_64_H
-#define _ASM_POWERPC_PGALLOC_64_H
-/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/slab.h>
-#include <linux/cpumask.h>
-#include <linux/percpu.h>
-
-struct vmemmap_backing {
- struct vmemmap_backing *list;
- unsigned long phys;
- unsigned long virt_addr;
-};
-extern struct vmemmap_backing *vmemmap_list;
-
-/*
- * Functions that deal with pagetables that could be at any level of
- * the table need to be passed an "index_size" so they know how to
- * handle allocation. For PTE pages (which are linked to a struct
- * page for now, and drawn from the main get_free_pages() pool), the
- * allocation size will be (2^index_size * sizeof(pointer)) and
- * allocations are drawn from the kmem_cache in PGT_CACHE(index_size).
- *
- * The maximum index size needs to be big enough to allow any
- * pagetable sizes we need, but small enough to fit in the low bits of
- * any page table pointer. In other words all pagetables, even tiny
- * ones, must be aligned to allow at least enough low 0 bits to
- * contain this value. This value is also used as a mask, so it must
- * be one less than a power of two.
- */
-#define MAX_PGTABLE_INDEX_SIZE 0xf
-
-extern struct kmem_cache *pgtable_cache[];
-#define PGT_CACHE(shift) ({ \
- BUG_ON(!(shift)); \
- pgtable_cache[(shift) - 1]; \
- })
-
-static inline pgd_t *pgd_alloc(struct mm_struct *mm)
-{
- return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE), GFP_KERNEL);
-}
-
-static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
- kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd);
-}
-
-#ifndef CONFIG_PPC_64K_PAGES
-
-#define pgd_populate(MM, PGD, PUD) pgd_set(PGD, PUD)
-
-static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
-{
- return kmem_cache_alloc(PGT_CACHE(PUD_INDEX_SIZE),
- GFP_KERNEL|__GFP_REPEAT);
-}
-
-static inline void pud_free(struct mm_struct *mm, pud_t *pud)
-{
- kmem_cache_free(PGT_CACHE(PUD_INDEX_SIZE), pud);
-}
-
-static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
-{
- pud_set(pud, (unsigned long)pmd);
-}
-
-#define pmd_populate(mm, pmd, pte_page) \
- pmd_populate_kernel(mm, pmd, page_address(pte_page))
-#define pmd_populate_kernel(mm, pmd, pte) pmd_set(pmd, (unsigned long)(pte))
-#define pmd_pgtable(pmd) pmd_page(pmd)
-
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
- unsigned long address)
-{
- return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO);
-}
-
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
- unsigned long address)
-{
- struct page *page;
- pte_t *pte;
-
- pte = pte_alloc_one_kernel(mm, address);
- if (!pte)
- return NULL;
- page = virt_to_page(pte);
- if (!pgtable_page_ctor(page)) {
- __free_page(page);
- return NULL;
- }
- return page;
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
- free_page((unsigned long)pte);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
-{
- pgtable_page_dtor(ptepage);
- __free_page(ptepage);
-}
-
-static inline void pgtable_free(void *table, unsigned index_size)
-{
- if (!index_size)
- free_page((unsigned long)table);
- else {
- BUG_ON(index_size > MAX_PGTABLE_INDEX_SIZE);
- kmem_cache_free(PGT_CACHE(index_size), table);
- }
-}
-
-#ifdef CONFIG_SMP
-static inline void pgtable_free_tlb(struct mmu_gather *tlb,
- void *table, int shift)
-{
- unsigned long pgf = (unsigned long)table;
- BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
- pgf |= shift;
- tlb_remove_table(tlb, (void *)pgf);
-}
-
-static inline void __tlb_remove_table(void *_table)
-{
- void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
- unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
-
- pgtable_free(table, shift);
-}
-#else /* !CONFIG_SMP */
-static inline void pgtable_free_tlb(struct mmu_gather *tlb,
- void *table, int shift)
-{
- pgtable_free(table, shift);
-}
-#endif /* CONFIG_SMP */
-
-static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
- unsigned long address)
-{
- tlb_flush_pgtable(tlb, address);
- pgtable_page_dtor(table);
- pgtable_free_tlb(tlb, page_address(table), 0);
-}
-
-#else /* if CONFIG_PPC_64K_PAGES */
-/*
- * we support 16 fragments per PTE page.
- */
-#define PTE_FRAG_NR 16
-/*
- * We use a 2K PTE page fragment and another 2K for storing
- * real_pte_t hash index
- */
-#define PTE_FRAG_SIZE_SHIFT 12
-#define PTE_FRAG_SIZE (2 * PTRS_PER_PTE * sizeof(pte_t))
-
-extern pte_t *page_table_alloc(struct mm_struct *, unsigned long, int);
-extern void page_table_free(struct mm_struct *, unsigned long *, int);
-extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift);
-#ifdef CONFIG_SMP
-extern void __tlb_remove_table(void *_table);
-#endif
-
-#define pud_populate(mm, pud, pmd) pud_set(pud, (unsigned long)pmd)
-
-static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
- pte_t *pte)
-{
- pmd_set(pmd, (unsigned long)pte);
-}
-
-static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
- pgtable_t pte_page)
-{
- pmd_set(pmd, (unsigned long)pte_page);
-}
-
-static inline pgtable_t pmd_pgtable(pmd_t pmd)
-{
- return (pgtable_t)(pmd_val(pmd) & ~PMD_MASKED_BITS);
-}
-
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
- unsigned long address)
-{
- return (pte_t *)page_table_alloc(mm, address, 1);
-}
-
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm,
- unsigned long address)
-{
- return (pgtable_t)page_table_alloc(mm, address, 0);
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
- page_table_free(mm, (unsigned long *)pte, 1);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
-{
- page_table_free(mm, (unsigned long *)ptepage, 0);
-}
-
-static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
- unsigned long address)
-{
- tlb_flush_pgtable(tlb, address);
- pgtable_free_tlb(tlb, table, 0);
-}
-#endif /* CONFIG_PPC_64K_PAGES */
-
-static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
-{
- return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX),
- GFP_KERNEL|__GFP_REPEAT);
-}
-
-static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
-{
- kmem_cache_free(PGT_CACHE(PMD_CACHE_INDEX), pmd);
-}
-
-#define __pmd_free_tlb(tlb, pmd, addr) \
- pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX)
-#ifndef CONFIG_PPC_64K_PAGES
-#define __pud_free_tlb(tlb, pud, addr) \
- pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE)
-
-#endif /* CONFIG_PPC_64K_PAGES */
-
-#define check_pgt_cache() do { } while (0)
-
-#endif /* _ASM_POWERPC_PGALLOC_64_H */
diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h
index e9a9f60e596d..3a971e2a8c73 100644
--- a/arch/powerpc/include/asm/pgalloc.h
+++ b/arch/powerpc/include/asm/pgalloc.h
@@ -1,24 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_PGALLOC_H
#define _ASM_POWERPC_PGALLOC_H
-#ifdef __KERNEL__
#include <linux/mm.h>
-#include <asm-generic/tlb.h>
-#ifdef CONFIG_PPC_BOOK3E
-extern void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address);
-#else /* CONFIG_PPC_BOOK3E */
-static inline void tlb_flush_pgtable(struct mmu_gather *tlb,
- unsigned long address)
+#ifndef MODULE
+static inline gfp_t pgtable_gfp_flags(struct mm_struct *mm, gfp_t gfp)
{
+ if (unlikely(mm == &init_mm))
+ return gfp;
+ return gfp | __GFP_ACCOUNT;
}
-#endif /* !CONFIG_PPC_BOOK3E */
+#else /* !MODULE */
+static inline gfp_t pgtable_gfp_flags(struct mm_struct *mm, gfp_t gfp)
+{
+ return gfp | __GFP_ACCOUNT;
+}
+#endif /* MODULE */
+
+#define PGALLOC_GFP (GFP_KERNEL | __GFP_ZERO)
+
+pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel);
+
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
+{
+ return (pte_t *)pte_fragment_alloc(mm, 1);
+}
+
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
+{
+ return (pgtable_t)pte_fragment_alloc(mm, 0);
+}
+
+void pte_frag_destroy(void *pte_frag);
+void pte_fragment_free(unsigned long *table, int kernel);
+
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+{
+ pte_fragment_free((unsigned long *)pte, 1);
+}
+
+static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
+{
+ pte_fragment_free((unsigned long *)ptepage, 0);
+}
+
+/* arch use pte_free_defer() implementation in arch/powerpc/mm/pgtable-frag.c */
+#define pte_free_defer pte_free_defer
+void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable);
+
+/*
+ * Functions that deal with pagetables that could be at any level of
+ * the table need to be passed an "index_size" so they know how to
+ * handle allocation. For PTE pages, the allocation size will be
+ * (2^index_size * sizeof(pointer)) and allocations are drawn from
+ * the kmem_cache in PGT_CACHE(index_size).
+ *
+ * The maximum index size needs to be big enough to allow any
+ * pagetable sizes we need, but small enough to fit in the low bits of
+ * any page table pointer. In other words all pagetables, even tiny
+ * ones, must be aligned to allow at least enough low 0 bits to
+ * contain this value. This value is also used as a mask, so it must
+ * be one less than a power of two.
+ */
+#define MAX_PGTABLE_INDEX_SIZE 0xf
+
+extern struct kmem_cache *pgtable_cache[];
+#define PGT_CACHE(shift) pgtable_cache[shift]
-#ifdef CONFIG_PPC64
-#include <asm/pgalloc-64.h>
+#ifdef CONFIG_PPC_BOOK3S
+#include <asm/book3s/pgalloc.h>
#else
-#include <asm/pgalloc-32.h>
+#include <asm/nohash/pgalloc.h>
#endif
-#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_PGALLOC_H */
diff --git a/arch/powerpc/include/asm/pgtable-be-types.h b/arch/powerpc/include/asm/pgtable-be-types.h
new file mode 100644
index 000000000000..6bd8f89b25dc
--- /dev/null
+++ b/arch/powerpc/include/asm/pgtable-be-types.h
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_PGTABLE_BE_TYPES_H
+#define _ASM_POWERPC_PGTABLE_BE_TYPES_H
+
+#include <asm/cmpxchg.h>
+
+/* PTE level */
+typedef struct { __be64 pte; } pte_t;
+#define __pte(x) ((pte_t) { cpu_to_be64(x) })
+#define __pte_raw(x) ((pte_t) { (x) })
+static inline unsigned long pte_val(pte_t x)
+{
+ return be64_to_cpu(x.pte);
+}
+
+static inline __be64 pte_raw(pte_t x)
+{
+ return x.pte;
+}
+
+/* PMD level */
+#ifdef CONFIG_PPC64
+typedef struct { __be64 pmd; } pmd_t;
+#define __pmd(x) ((pmd_t) { cpu_to_be64(x) })
+#define __pmd_raw(x) ((pmd_t) { (x) })
+static inline unsigned long pmd_val(pmd_t x)
+{
+ return be64_to_cpu(x.pmd);
+}
+
+static inline __be64 pmd_raw(pmd_t x)
+{
+ return x.pmd;
+}
+
+/* 64 bit always use 4 level table. */
+typedef struct { __be64 pud; } pud_t;
+#define __pud(x) ((pud_t) { cpu_to_be64(x) })
+#define __pud_raw(x) ((pud_t) { (x) })
+static inline unsigned long pud_val(pud_t x)
+{
+ return be64_to_cpu(x.pud);
+}
+
+static inline __be64 pud_raw(pud_t x)
+{
+ return x.pud;
+}
+
+#endif /* CONFIG_PPC64 */
+
+/* PGD level */
+typedef struct { __be64 pgd; } pgd_t;
+#define __pgd(x) ((pgd_t) { cpu_to_be64(x) })
+#define __pgd_raw(x) ((pgd_t) { (x) })
+static inline unsigned long pgd_val(pgd_t x)
+{
+ return be64_to_cpu(x.pgd);
+}
+
+static inline __be64 pgd_raw(pgd_t x)
+{
+ return x.pgd;
+}
+
+/* Page protection bits */
+typedef struct { unsigned long pgprot; } pgprot_t;
+#define pgprot_val(x) ((x).pgprot)
+#define __pgprot(x) ((pgprot_t) { (x) })
+
+/*
+ * With hash config 64k pages additionally define a bigger "real PTE" type that
+ * gathers the "second half" part of the PTE for pseudo 64k pages
+ */
+#ifdef CONFIG_PPC_64K_PAGES
+typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
+#else
+typedef struct { pte_t pte; } real_pte_t;
+#endif
+
+static inline bool pte_xchg(pte_t *ptep, pte_t old, pte_t new)
+{
+ unsigned long *p = (unsigned long *)ptep;
+ __be64 prev;
+
+ /* See comment in switch_mm_irqs_off() */
+ prev = (__force __be64)__cmpxchg_u64(p, (__force unsigned long)pte_raw(old),
+ (__force unsigned long)pte_raw(new));
+
+ return pte_raw(old) == prev;
+}
+
+static inline bool pmd_xchg(pmd_t *pmdp, pmd_t old, pmd_t new)
+{
+ unsigned long *p = (unsigned long *)pmdp;
+ __be64 prev;
+
+ prev = (__force __be64)__cmpxchg_u64(p, (__force unsigned long)pmd_raw(old),
+ (__force unsigned long)pmd_raw(new));
+
+ return pmd_raw(old) == prev;
+}
+
+#endif /* _ASM_POWERPC_PGTABLE_BE_TYPES_H */
diff --git a/arch/powerpc/include/asm/pgtable-masks.h b/arch/powerpc/include/asm/pgtable-masks.h
new file mode 100644
index 000000000000..6e8e2db26a5a
--- /dev/null
+++ b/arch/powerpc/include/asm/pgtable-masks.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_PGTABLE_MASKS_H
+#define _ASM_POWERPC_PGTABLE_MASKS_H
+
+#ifndef _PAGE_NA
+#define _PAGE_NA 0
+#define _PAGE_NAX _PAGE_EXEC
+#define _PAGE_RO _PAGE_READ
+#define _PAGE_ROX (_PAGE_READ | _PAGE_EXEC)
+#define _PAGE_RW (_PAGE_READ | _PAGE_WRITE)
+#define _PAGE_RWX (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)
+#endif
+
+/* Permission flags for kernel mappings */
+#ifndef _PAGE_KERNEL_RO
+#define _PAGE_KERNEL_RO _PAGE_RO
+#define _PAGE_KERNEL_ROX _PAGE_ROX
+#define _PAGE_KERNEL_RW (_PAGE_RW | _PAGE_DIRTY)
+#define _PAGE_KERNEL_RWX (_PAGE_RWX | _PAGE_DIRTY)
+#endif
+
+/* Permission masks used to generate the __P and __S table */
+#define PAGE_NONE __pgprot(_PAGE_BASE | _PAGE_NA)
+#define PAGE_EXECONLY_X __pgprot(_PAGE_BASE | _PAGE_NAX)
+#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_RW)
+#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_RWX)
+#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_RO)
+#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_ROX)
+#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_RO)
+#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_ROX)
+
+#endif /* _ASM_POWERPC_PGTABLE_MASKS_H */
diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h b/arch/powerpc/include/asm/pgtable-ppc32.h
deleted file mode 100644
index 47edde8c3556..000000000000
--- a/arch/powerpc/include/asm/pgtable-ppc32.h
+++ /dev/null
@@ -1,341 +0,0 @@
-#ifndef _ASM_POWERPC_PGTABLE_PPC32_H
-#define _ASM_POWERPC_PGTABLE_PPC32_H
-
-#include <asm-generic/pgtable-nopmd.h>
-
-#ifndef __ASSEMBLY__
-#include <linux/sched.h>
-#include <linux/threads.h>
-#include <asm/io.h> /* For sub-arch specific PPC_PIN_SIZE */
-
-extern unsigned long va_to_phys(unsigned long address);
-extern pte_t *va_to_pte(unsigned long address);
-extern unsigned long ioremap_bot;
-
-#ifdef CONFIG_44x
-extern int icache_44x_need_flush;
-#endif
-
-#endif /* __ASSEMBLY__ */
-
-/*
- * The normal case is that PTEs are 32-bits and we have a 1-page
- * 1024-entry pgdir pointing to 1-page 1024-entry PTE pages. -- paulus
- *
- * For any >32-bit physical address platform, we can use the following
- * two level page table layout where the pgdir is 8KB and the MS 13 bits
- * are an index to the second level table. The combined pgdir/pmd first
- * level has 2048 entries and the second level has 512 64-bit PTE entries.
- * -Matt
- */
-/* PGDIR_SHIFT determines what a top-level page table entry can map */
-#define PGDIR_SHIFT (PAGE_SHIFT + PTE_SHIFT)
-#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
-#define PGDIR_MASK (~(PGDIR_SIZE-1))
-
-/*
- * entries per page directory level: our page-table tree is two-level, so
- * we don't really have any PMD directory.
- */
-#ifndef __ASSEMBLY__
-#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_SHIFT)
-#define PGD_TABLE_SIZE (sizeof(pgd_t) << (32 - PGDIR_SHIFT))
-#endif /* __ASSEMBLY__ */
-
-#define PTRS_PER_PTE (1 << PTE_SHIFT)
-#define PTRS_PER_PMD 1
-#define PTRS_PER_PGD (1 << (32 - PGDIR_SHIFT))
-
-#define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE)
-#define FIRST_USER_ADDRESS 0
-
-#define pte_ERROR(e) \
- printk("%s:%d: bad pte %llx.\n", __FILE__, __LINE__, \
- (unsigned long long)pte_val(e))
-#define pgd_ERROR(e) \
- printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
-
-/*
- * This is the bottom of the PKMAP area with HIGHMEM or an arbitrary
- * value (for now) on others, from where we can start layout kernel
- * virtual space that goes below PKMAP and FIXMAP
- */
-#ifdef CONFIG_HIGHMEM
-#define KVIRT_TOP PKMAP_BASE
-#else
-#define KVIRT_TOP (0xfe000000UL) /* for now, could be FIXMAP_BASE ? */
-#endif
-
-/*
- * ioremap_bot starts at that address. Early ioremaps move down from there,
- * until mem_init() at which point this becomes the top of the vmalloc
- * and ioremap space
- */
-#ifdef CONFIG_NOT_COHERENT_CACHE
-#define IOREMAP_TOP ((KVIRT_TOP - CONFIG_CONSISTENT_SIZE) & PAGE_MASK)
-#else
-#define IOREMAP_TOP KVIRT_TOP
-#endif
-
-/*
- * Just any arbitrary offset to the start of the vmalloc VM area: the
- * current 16MB value just means that there will be a 64MB "hole" after the
- * physical memory until the kernel virtual memory starts. That means that
- * any out-of-bounds memory accesses will hopefully be caught.
- * The vmalloc() routines leaves a hole of 4kB between each vmalloced
- * area for the same reason. ;)
- *
- * We no longer map larger than phys RAM with the BATs so we don't have
- * to worry about the VMALLOC_OFFSET causing problems. We do have to worry
- * about clashes between our early calls to ioremap() that start growing down
- * from ioremap_base being run into the VM area allocations (growing upwards
- * from VMALLOC_START). For this reason we have ioremap_bot to check when
- * we actually run into our mappings setup in the early boot with the VM
- * system. This really does become a problem for machines with good amounts
- * of RAM. -- Cort
- */
-#define VMALLOC_OFFSET (0x1000000) /* 16M */
-#ifdef PPC_PIN_SIZE
-#define VMALLOC_START (((_ALIGN((long)high_memory, PPC_PIN_SIZE) + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)))
-#else
-#define VMALLOC_START ((((long)high_memory + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)))
-#endif
-#define VMALLOC_END ioremap_bot
-
-/*
- * Bits in a linux-style PTE. These match the bits in the
- * (hardware-defined) PowerPC PTE as closely as possible.
- */
-
-#if defined(CONFIG_40x)
-#include <asm/pte-40x.h>
-#elif defined(CONFIG_44x)
-#include <asm/pte-44x.h>
-#elif defined(CONFIG_FSL_BOOKE) && defined(CONFIG_PTE_64BIT)
-#include <asm/pte-book3e.h>
-#elif defined(CONFIG_FSL_BOOKE)
-#include <asm/pte-fsl-booke.h>
-#elif defined(CONFIG_8xx)
-#include <asm/pte-8xx.h>
-#else /* CONFIG_6xx */
-#include <asm/pte-hash32.h>
-#endif
-
-/* And here we include common definitions */
-#include <asm/pte-common.h>
-
-#ifndef __ASSEMBLY__
-
-#define pte_clear(mm, addr, ptep) \
- do { pte_update(ptep, ~_PAGE_HASHPTE, 0); } while (0)
-
-#define pmd_none(pmd) (!pmd_val(pmd))
-#define pmd_bad(pmd) (pmd_val(pmd) & _PMD_BAD)
-#define pmd_present(pmd) (pmd_val(pmd) & _PMD_PRESENT_MASK)
-#define pmd_clear(pmdp) do { pmd_val(*(pmdp)) = 0; } while (0)
-
-/*
- * When flushing the tlb entry for a page, we also need to flush the hash
- * table entry. flush_hash_pages is assembler (for speed) in hashtable.S.
- */
-extern int flush_hash_pages(unsigned context, unsigned long va,
- unsigned long pmdval, int count);
-
-/* Add an HPTE to the hash table */
-extern void add_hash_page(unsigned context, unsigned long va,
- unsigned long pmdval);
-
-/* Flush an entry from the TLB/hash table */
-extern void flush_hash_entry(struct mm_struct *mm, pte_t *ptep,
- unsigned long address);
-
-/*
- * PTE updates. This function is called whenever an existing
- * valid PTE is updated. This does -not- include set_pte_at()
- * which nowadays only sets a new PTE.
- *
- * Depending on the type of MMU, we may need to use atomic updates
- * and the PTE may be either 32 or 64 bit wide. In the later case,
- * when using atomic updates, only the low part of the PTE is
- * accessed atomically.
- *
- * In addition, on 44x, we also maintain a global flag indicating
- * that an executable user mapping was modified, which is needed
- * to properly flush the virtually tagged instruction cache of
- * those implementations.
- */
-#ifndef CONFIG_PTE_64BIT
-static inline unsigned long pte_update(pte_t *p,
- unsigned long clr,
- unsigned long set)
-{
-#ifdef PTE_ATOMIC_UPDATES
- unsigned long old, tmp;
-
- __asm__ __volatile__("\
-1: lwarx %0,0,%3\n\
- andc %1,%0,%4\n\
- or %1,%1,%5\n"
- PPC405_ERR77(0,%3)
-" stwcx. %1,0,%3\n\
- bne- 1b"
- : "=&r" (old), "=&r" (tmp), "=m" (*p)
- : "r" (p), "r" (clr), "r" (set), "m" (*p)
- : "cc" );
-#else /* PTE_ATOMIC_UPDATES */
- unsigned long old = pte_val(*p);
- *p = __pte((old & ~clr) | set);
-#endif /* !PTE_ATOMIC_UPDATES */
-
-#ifdef CONFIG_44x
- if ((old & _PAGE_USER) && (old & _PAGE_EXEC))
- icache_44x_need_flush = 1;
-#endif
- return old;
-}
-#else /* CONFIG_PTE_64BIT */
-static inline unsigned long long pte_update(pte_t *p,
- unsigned long clr,
- unsigned long set)
-{
-#ifdef PTE_ATOMIC_UPDATES
- unsigned long long old;
- unsigned long tmp;
-
- __asm__ __volatile__("\
-1: lwarx %L0,0,%4\n\
- lwzx %0,0,%3\n\
- andc %1,%L0,%5\n\
- or %1,%1,%6\n"
- PPC405_ERR77(0,%3)
-" stwcx. %1,0,%4\n\
- bne- 1b"
- : "=&r" (old), "=&r" (tmp), "=m" (*p)
- : "r" (p), "r" ((unsigned long)(p) + 4), "r" (clr), "r" (set), "m" (*p)
- : "cc" );
-#else /* PTE_ATOMIC_UPDATES */
- unsigned long long old = pte_val(*p);
- *p = __pte((old & ~(unsigned long long)clr) | set);
-#endif /* !PTE_ATOMIC_UPDATES */
-
-#ifdef CONFIG_44x
- if ((old & _PAGE_USER) && (old & _PAGE_EXEC))
- icache_44x_need_flush = 1;
-#endif
- return old;
-}
-#endif /* CONFIG_PTE_64BIT */
-
-/*
- * 2.6 calls this without flushing the TLB entry; this is wrong
- * for our hash-based implementation, we fix that up here.
- */
-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-static inline int __ptep_test_and_clear_young(unsigned int context, unsigned long addr, pte_t *ptep)
-{
- unsigned long old;
- old = pte_update(ptep, _PAGE_ACCESSED, 0);
-#if _PAGE_HASHPTE != 0
- if (old & _PAGE_HASHPTE) {
- unsigned long ptephys = __pa(ptep) & PAGE_MASK;
- flush_hash_pages(context, addr, ptephys, 1);
- }
-#endif
- return (old & _PAGE_ACCESSED) != 0;
-}
-#define ptep_test_and_clear_young(__vma, __addr, __ptep) \
- __ptep_test_and_clear_young((__vma)->vm_mm->context.id, __addr, __ptep)
-
-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
-static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep)
-{
- return __pte(pte_update(ptep, ~_PAGE_HASHPTE, 0));
-}
-
-#define __HAVE_ARCH_PTEP_SET_WRPROTECT
-static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep)
-{
- pte_update(ptep, (_PAGE_RW | _PAGE_HWWRITE), 0);
-}
-static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep)
-{
- ptep_set_wrprotect(mm, addr, ptep);
-}
-
-
-static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
-{
- unsigned long bits = pte_val(entry) &
- (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
- pte_update(ptep, 0, bits);
-}
-
-#define __HAVE_ARCH_PTE_SAME
-#define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HASHPTE) == 0)
-
-/*
- * Note that on Book E processors, the pmd contains the kernel virtual
- * (lowmem) address of the pte page. The physical address is less useful
- * because everything runs with translation enabled (even the TLB miss
- * handler). On everything else the pmd contains the physical address
- * of the pte page. -- paulus
- */
-#ifndef CONFIG_BOOKE
-#define pmd_page_vaddr(pmd) \
- ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
-#define pmd_page(pmd) \
- pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)
-#else
-#define pmd_page_vaddr(pmd) \
- ((unsigned long) (pmd_val(pmd) & PAGE_MASK))
-#define pmd_page(pmd) \
- pfn_to_page((__pa(pmd_val(pmd)) >> PAGE_SHIFT))
-#endif
-
-/* to find an entry in a kernel page-table-directory */
-#define pgd_offset_k(address) pgd_offset(&init_mm, address)
-
-/* to find an entry in a page-table-directory */
-#define pgd_index(address) ((address) >> PGDIR_SHIFT)
-#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address))
-
-/* Find an entry in the third-level page table.. */
-#define pte_index(address) \
- (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
-#define pte_offset_kernel(dir, addr) \
- ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(addr))
-#define pte_offset_map(dir, addr) \
- ((pte_t *) kmap_atomic(pmd_page(*(dir))) + pte_index(addr))
-#define pte_unmap(pte) kunmap_atomic(pte)
-
-/*
- * Encode and decode a swap entry.
- * Note that the bits we use in a PTE for representing a swap entry
- * must not include the _PAGE_PRESENT bit, the _PAGE_FILE bit, or the
- *_PAGE_HASHPTE bit (if used). -- paulus
- */
-#define __swp_type(entry) ((entry).val & 0x1f)
-#define __swp_offset(entry) ((entry).val >> 5)
-#define __swp_entry(type, offset) ((swp_entry_t) { (type) | ((offset) << 5) })
-#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 })
-#define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 })
-
-/* Encode and decode a nonlinear file mapping entry */
-#define PTE_FILE_MAX_BITS 29
-#define pte_to_pgoff(pte) (pte_val(pte) >> 3)
-#define pgoff_to_pte(off) ((pte_t) { ((off) << 3) | _PAGE_FILE })
-
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init() do { } while (0)
-
-extern int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep,
- pmd_t **pmdp);
-
-#endif /* !__ASSEMBLY__ */
-
-#endif /* _ASM_POWERPC_PGTABLE_PPC32_H */
diff --git a/arch/powerpc/include/asm/pgtable-ppc64-64k.h b/arch/powerpc/include/asm/pgtable-ppc64-64k.h
deleted file mode 100644
index a56b82fb0609..000000000000
--- a/arch/powerpc/include/asm/pgtable-ppc64-64k.h
+++ /dev/null
@@ -1,41 +0,0 @@
-#ifndef _ASM_POWERPC_PGTABLE_PPC64_64K_H
-#define _ASM_POWERPC_PGTABLE_PPC64_64K_H
-
-#include <asm-generic/pgtable-nopud.h>
-
-
-#define PTE_INDEX_SIZE 8
-#define PMD_INDEX_SIZE 10
-#define PUD_INDEX_SIZE 0
-#define PGD_INDEX_SIZE 12
-
-#ifndef __ASSEMBLY__
-#define PTE_TABLE_SIZE (sizeof(real_pte_t) << PTE_INDEX_SIZE)
-#define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE)
-#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE)
-#endif /* __ASSEMBLY__ */
-
-#define PTRS_PER_PTE (1 << PTE_INDEX_SIZE)
-#define PTRS_PER_PMD (1 << PMD_INDEX_SIZE)
-#define PTRS_PER_PGD (1 << PGD_INDEX_SIZE)
-
-/* With 4k base page size, hugepage PTEs go at the PMD level */
-#define MIN_HUGEPTE_SHIFT PAGE_SHIFT
-
-/* PMD_SHIFT determines what a second-level page table entry can map */
-#define PMD_SHIFT (PAGE_SHIFT + PTE_INDEX_SIZE)
-#define PMD_SIZE (1UL << PMD_SHIFT)
-#define PMD_MASK (~(PMD_SIZE-1))
-
-/* PGDIR_SHIFT determines what a third-level page table entry can map */
-#define PGDIR_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE)
-#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
-#define PGDIR_MASK (~(PGDIR_SIZE-1))
-
-/* Bits to mask out from a PMD to get to the PTE page */
-/* PMDs point to PTE table fragments which are 4K aligned. */
-#define PMD_MASKED_BITS 0xfff
-/* Bits to mask out from a PGD/PUD to get to the PMD page */
-#define PUD_MASKED_BITS 0x1ff
-
-#endif /* _ASM_POWERPC_PGTABLE_PPC64_64K_H */
diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h
deleted file mode 100644
index 7b3d54fae46f..000000000000
--- a/arch/powerpc/include/asm/pgtable-ppc64.h
+++ /dev/null
@@ -1,580 +0,0 @@
-#ifndef _ASM_POWERPC_PGTABLE_PPC64_H_
-#define _ASM_POWERPC_PGTABLE_PPC64_H_
-/*
- * This file contains the functions and defines necessary to modify and use
- * the ppc64 hashed page table.
- */
-
-#ifdef CONFIG_PPC_64K_PAGES
-#include <asm/pgtable-ppc64-64k.h>
-#else
-#include <asm/pgtable-ppc64-4k.h>
-#endif
-#include <asm/barrier.h>
-
-#define FIRST_USER_ADDRESS 0
-
-/*
- * Size of EA range mapped by our pagetables.
- */
-#define PGTABLE_EADDR_SIZE (PTE_INDEX_SIZE + PMD_INDEX_SIZE + \
- PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT)
-#define PGTABLE_RANGE (ASM_CONST(1) << PGTABLE_EADDR_SIZE)
-
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define PMD_CACHE_INDEX (PMD_INDEX_SIZE + 1)
-#else
-#define PMD_CACHE_INDEX PMD_INDEX_SIZE
-#endif
-/*
- * Define the address range of the kernel non-linear virtual area
- */
-
-#ifdef CONFIG_PPC_BOOK3E
-#define KERN_VIRT_START ASM_CONST(0x8000000000000000)
-#else
-#define KERN_VIRT_START ASM_CONST(0xD000000000000000)
-#endif
-#define KERN_VIRT_SIZE ASM_CONST(0x0000100000000000)
-
-/*
- * The vmalloc space starts at the beginning of that region, and
- * occupies half of it on hash CPUs and a quarter of it on Book3E
- * (we keep a quarter for the virtual memmap)
- */
-#define VMALLOC_START KERN_VIRT_START
-#ifdef CONFIG_PPC_BOOK3E
-#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 2)
-#else
-#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
-#endif
-#define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE)
-
-/*
- * The second half of the kernel virtual space is used for IO mappings,
- * it's itself carved into the PIO region (ISA and PHB IO space) and
- * the ioremap space
- *
- * ISA_IO_BASE = KERN_IO_START, 64K reserved area
- * PHB_IO_BASE = ISA_IO_BASE + 64K to ISA_IO_BASE + 2G, PHB IO spaces
- * IOREMAP_BASE = ISA_IO_BASE + 2G to VMALLOC_START + PGTABLE_RANGE
- */
-#define KERN_IO_START (KERN_VIRT_START + (KERN_VIRT_SIZE >> 1))
-#define FULL_IO_SIZE 0x80000000ul
-#define ISA_IO_BASE (KERN_IO_START)
-#define ISA_IO_END (KERN_IO_START + 0x10000ul)
-#define PHB_IO_BASE (ISA_IO_END)
-#define PHB_IO_END (KERN_IO_START + FULL_IO_SIZE)
-#define IOREMAP_BASE (PHB_IO_END)
-#define IOREMAP_END (KERN_VIRT_START + KERN_VIRT_SIZE)
-
-
-/*
- * Region IDs
- */
-#define REGION_SHIFT 60UL
-#define REGION_MASK (0xfUL << REGION_SHIFT)
-#define REGION_ID(ea) (((unsigned long)(ea)) >> REGION_SHIFT)
-
-#define VMALLOC_REGION_ID (REGION_ID(VMALLOC_START))
-#define KERNEL_REGION_ID (REGION_ID(PAGE_OFFSET))
-#define VMEMMAP_REGION_ID (0xfUL) /* Server only */
-#define USER_REGION_ID (0UL)
-
-/*
- * Defines the address of the vmemap area, in its own region on
- * hash table CPUs and after the vmalloc space on Book3E
- */
-#ifdef CONFIG_PPC_BOOK3E
-#define VMEMMAP_BASE VMALLOC_END
-#define VMEMMAP_END KERN_IO_START
-#else
-#define VMEMMAP_BASE (VMEMMAP_REGION_ID << REGION_SHIFT)
-#endif
-#define vmemmap ((struct page *)VMEMMAP_BASE)
-
-
-/*
- * Include the PTE bits definitions
- */
-#ifdef CONFIG_PPC_BOOK3S
-#include <asm/pte-hash64.h>
-#else
-#include <asm/pte-book3e.h>
-#endif
-#include <asm/pte-common.h>
-
-#ifdef CONFIG_PPC_MM_SLICES
-#define HAVE_ARCH_UNMAPPED_AREA
-#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
-#endif /* CONFIG_PPC_MM_SLICES */
-
-#ifndef __ASSEMBLY__
-
-/*
- * This is the default implementation of various PTE accessors, it's
- * used in all cases except Book3S with 64K pages where we have a
- * concept of sub-pages
- */
-#ifndef __real_pte
-
-#ifdef STRICT_MM_TYPECHECKS
-#define __real_pte(e,p) ((real_pte_t){(e)})
-#define __rpte_to_pte(r) ((r).pte)
-#else
-#define __real_pte(e,p) (e)
-#define __rpte_to_pte(r) (__pte(r))
-#endif
-#define __rpte_to_hidx(r,index) (pte_val(__rpte_to_pte(r)) >> 12)
-
-#define pte_iterate_hashed_subpages(rpte, psize, va, index, shift) \
- do { \
- index = 0; \
- shift = mmu_psize_defs[psize].shift; \
-
-#define pte_iterate_hashed_end() } while(0)
-
-#ifdef CONFIG_PPC_HAS_HASH_64K
-#define pte_pagesize_index(mm, addr, pte) get_slice_psize(mm, addr)
-#else
-#define pte_pagesize_index(mm, addr, pte) MMU_PAGE_4K
-#endif
-
-#endif /* __real_pte */
-
-
-/* pte_clear moved to later in this file */
-
-#define PMD_BAD_BITS (PTE_TABLE_SIZE-1)
-#define PUD_BAD_BITS (PMD_TABLE_SIZE-1)
-
-#define pmd_set(pmdp, pmdval) (pmd_val(*(pmdp)) = (pmdval))
-#define pmd_none(pmd) (!pmd_val(pmd))
-#define pmd_bad(pmd) (!is_kernel_addr(pmd_val(pmd)) \
- || (pmd_val(pmd) & PMD_BAD_BITS))
-#define pmd_present(pmd) (pmd_val(pmd) != 0)
-#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0)
-#define pmd_page_vaddr(pmd) (pmd_val(pmd) & ~PMD_MASKED_BITS)
-extern struct page *pmd_page(pmd_t pmd);
-
-#define pud_set(pudp, pudval) (pud_val(*(pudp)) = (pudval))
-#define pud_none(pud) (!pud_val(pud))
-#define pud_bad(pud) (!is_kernel_addr(pud_val(pud)) \
- || (pud_val(pud) & PUD_BAD_BITS))
-#define pud_present(pud) (pud_val(pud) != 0)
-#define pud_clear(pudp) (pud_val(*(pudp)) = 0)
-#define pud_page_vaddr(pud) (pud_val(pud) & ~PUD_MASKED_BITS)
-#define pud_page(pud) virt_to_page(pud_page_vaddr(pud))
-
-#define pgd_set(pgdp, pudp) ({pgd_val(*(pgdp)) = (unsigned long)(pudp);})
-
-/*
- * Find an entry in a page-table-directory. We combine the address region
- * (the high order N bits) and the pgd portion of the address.
- */
-#define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & (PTRS_PER_PGD - 1))
-
-#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address))
-
-#define pmd_offset(pudp,addr) \
- (((pmd_t *) pud_page_vaddr(*(pudp))) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)))
-
-#define pte_offset_kernel(dir,addr) \
- (((pte_t *) pmd_page_vaddr(*(dir))) + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)))
-
-#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr))
-#define pte_unmap(pte) do { } while(0)
-
-/* to find an entry in a kernel page-table-directory */
-/* This now only contains the vmalloc pages */
-#define pgd_offset_k(address) pgd_offset(&init_mm, address)
-extern void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, unsigned long pte, int huge);
-
-/* Atomic PTE updates */
-static inline unsigned long pte_update(struct mm_struct *mm,
- unsigned long addr,
- pte_t *ptep, unsigned long clr,
- unsigned long set,
- int huge)
-{
-#ifdef PTE_ATOMIC_UPDATES
- unsigned long old, tmp;
-
- __asm__ __volatile__(
- "1: ldarx %0,0,%3 # pte_update\n\
- andi. %1,%0,%6\n\
- bne- 1b \n\
- andc %1,%0,%4 \n\
- or %1,%1,%7\n\
- stdcx. %1,0,%3 \n\
- bne- 1b"
- : "=&r" (old), "=&r" (tmp), "=m" (*ptep)
- : "r" (ptep), "r" (clr), "m" (*ptep), "i" (_PAGE_BUSY), "r" (set)
- : "cc" );
-#else
- unsigned long old = pte_val(*ptep);
- *ptep = __pte((old & ~clr) | set);
-#endif
- /* huge pages use the old page table lock */
- if (!huge)
- assert_pte_locked(mm, addr);
-
-#ifdef CONFIG_PPC_STD_MMU_64
- if (old & _PAGE_HASHPTE)
- hpte_need_flush(mm, addr, ptep, old, huge);
-#endif
-
- return old;
-}
-
-static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep)
-{
- unsigned long old;
-
- if ((pte_val(*ptep) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0)
- return 0;
- old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0);
- return (old & _PAGE_ACCESSED) != 0;
-}
-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-#define ptep_test_and_clear_young(__vma, __addr, __ptep) \
-({ \
- int __r; \
- __r = __ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep); \
- __r; \
-})
-
-#define __HAVE_ARCH_PTEP_SET_WRPROTECT
-static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep)
-{
-
- if ((pte_val(*ptep) & _PAGE_RW) == 0)
- return;
-
- pte_update(mm, addr, ptep, _PAGE_RW, 0, 0);
-}
-
-static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep)
-{
- if ((pte_val(*ptep) & _PAGE_RW) == 0)
- return;
-
- pte_update(mm, addr, ptep, _PAGE_RW, 0, 1);
-}
-
-/*
- * We currently remove entries from the hashtable regardless of whether
- * the entry was young or dirty. The generic routines only flush if the
- * entry was young or dirty which is not good enough.
- *
- * We should be more intelligent about this but for the moment we override
- * these functions and force a tlb flush unconditionally
- */
-#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
-#define ptep_clear_flush_young(__vma, __address, __ptep) \
-({ \
- int __young = __ptep_test_and_clear_young((__vma)->vm_mm, __address, \
- __ptep); \
- __young; \
-})
-
-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
-static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
- unsigned long addr, pte_t *ptep)
-{
- unsigned long old = pte_update(mm, addr, ptep, ~0UL, 0, 0);
- return __pte(old);
-}
-
-static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
- pte_t * ptep)
-{
- pte_update(mm, addr, ptep, ~0UL, 0, 0);
-}
-
-
-/* Set the dirty and/or accessed bits atomically in a linux PTE, this
- * function doesn't need to flush the hash entry
- */
-static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
-{
- unsigned long bits = pte_val(entry) &
- (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
-
-#ifdef PTE_ATOMIC_UPDATES
- unsigned long old, tmp;
-
- __asm__ __volatile__(
- "1: ldarx %0,0,%4\n\
- andi. %1,%0,%6\n\
- bne- 1b \n\
- or %0,%3,%0\n\
- stdcx. %0,0,%4\n\
- bne- 1b"
- :"=&r" (old), "=&r" (tmp), "=m" (*ptep)
- :"r" (bits), "r" (ptep), "m" (*ptep), "i" (_PAGE_BUSY)
- :"cc");
-#else
- unsigned long old = pte_val(*ptep);
- *ptep = __pte(old | bits);
-#endif
-}
-
-#define __HAVE_ARCH_PTE_SAME
-#define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0)
-
-#define pte_ERROR(e) \
- printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
-#define pmd_ERROR(e) \
- printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
-#define pgd_ERROR(e) \
- printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
-
-/* Encode and de-code a swap entry */
-#define __swp_type(entry) (((entry).val >> 1) & 0x3f)
-#define __swp_offset(entry) ((entry).val >> 8)
-#define __swp_entry(type, offset) ((swp_entry_t){((type)<< 1)|((offset)<<8)})
-#define __pte_to_swp_entry(pte) ((swp_entry_t){pte_val(pte) >> PTE_RPN_SHIFT})
-#define __swp_entry_to_pte(x) ((pte_t) { (x).val << PTE_RPN_SHIFT })
-#define pte_to_pgoff(pte) (pte_val(pte) >> PTE_RPN_SHIFT)
-#define pgoff_to_pte(off) ((pte_t) {((off) << PTE_RPN_SHIFT)|_PAGE_FILE})
-#define PTE_FILE_MAX_BITS (BITS_PER_LONG - PTE_RPN_SHIFT)
-
-void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
-void pgtable_cache_init(void);
-#endif /* __ASSEMBLY__ */
-
-/*
- * THP pages can't be special. So use the _PAGE_SPECIAL
- */
-#define _PAGE_SPLITTING _PAGE_SPECIAL
-
-/*
- * We need to differentiate between explicit huge page and THP huge
- * page, since THP huge page also need to track real subpage details
- */
-#define _PAGE_THP_HUGE _PAGE_4K_PFN
-
-/*
- * set of bits not changed in pmd_modify.
- */
-#define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | \
- _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPLITTING | \
- _PAGE_THP_HUGE)
-
-#ifndef __ASSEMBLY__
-/*
- * The linux hugepage PMD now include the pmd entries followed by the address
- * to the stashed pgtable_t. The stashed pgtable_t contains the hpte bits.
- * [ 1 bit secondary | 3 bit hidx | 1 bit valid | 000]. We use one byte per
- * each HPTE entry. With 16MB hugepage and 64K HPTE we need 256 entries and
- * with 4K HPTE we need 4096 entries. Both will fit in a 4K pgtable_t.
- *
- * The last three bits are intentionally left to zero. This memory location
- * are also used as normal page PTE pointers. So if we have any pointers
- * left around while we collapse a hugepage, we need to make sure
- * _PAGE_PRESENT and _PAGE_FILE bits of that are zero when we look at them
- */
-static inline unsigned int hpte_valid(unsigned char *hpte_slot_array, int index)
-{
- return (hpte_slot_array[index] >> 3) & 0x1;
-}
-
-static inline unsigned int hpte_hash_index(unsigned char *hpte_slot_array,
- int index)
-{
- return hpte_slot_array[index] >> 4;
-}
-
-static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array,
- unsigned int index, unsigned int hidx)
-{
- hpte_slot_array[index] = hidx << 4 | 0x1 << 3;
-}
-
-struct page *realmode_pfn_to_page(unsigned long pfn);
-
-static inline char *get_hpte_slot_array(pmd_t *pmdp)
-{
- /*
- * The hpte hindex is stored in the pgtable whose address is in the
- * second half of the PMD
- *
- * Order this load with the test for pmd_trans_huge in the caller
- */
- smp_rmb();
- return *(char **)(pmdp + PTRS_PER_PMD);
-
-
-}
-
-extern void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
- pmd_t *pmdp, unsigned long old_pmd);
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot);
-extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot);
-extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot);
-extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
- pmd_t *pmdp, pmd_t pmd);
-extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
- pmd_t *pmd);
-
-static inline int pmd_trans_huge(pmd_t pmd)
-{
- /*
- * leaf pte for huge page, bottom two bits != 00
- */
- return (pmd_val(pmd) & 0x3) && (pmd_val(pmd) & _PAGE_THP_HUGE);
-}
-
-static inline int pmd_large(pmd_t pmd)
-{
- /*
- * leaf pte for huge page, bottom two bits != 00
- */
- if (pmd_trans_huge(pmd))
- return pmd_val(pmd) & _PAGE_PRESENT;
- return 0;
-}
-
-static inline int pmd_trans_splitting(pmd_t pmd)
-{
- if (pmd_trans_huge(pmd))
- return pmd_val(pmd) & _PAGE_SPLITTING;
- return 0;
-}
-
-extern int has_transparent_hugepage(void);
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-
-static inline pte_t pmd_pte(pmd_t pmd)
-{
- return __pte(pmd_val(pmd));
-}
-
-static inline pmd_t pte_pmd(pte_t pte)
-{
- return __pmd(pte_val(pte));
-}
-
-static inline pte_t *pmdp_ptep(pmd_t *pmd)
-{
- return (pte_t *)pmd;
-}
-
-#define pmd_pfn(pmd) pte_pfn(pmd_pte(pmd))
-#define pmd_young(pmd) pte_young(pmd_pte(pmd))
-#define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd)))
-#define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd)))
-#define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd)))
-#define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd)))
-#define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd)))
-
-#define __HAVE_ARCH_PMD_WRITE
-#define pmd_write(pmd) pte_write(pmd_pte(pmd))
-
-static inline pmd_t pmd_mkhuge(pmd_t pmd)
-{
- /* Do nothing, mk_pmd() does this part. */
- return pmd;
-}
-
-static inline pmd_t pmd_mknotpresent(pmd_t pmd)
-{
- pmd_val(pmd) &= ~_PAGE_PRESENT;
- return pmd;
-}
-
-static inline pmd_t pmd_mksplitting(pmd_t pmd)
-{
- pmd_val(pmd) |= _PAGE_SPLITTING;
- return pmd;
-}
-
-#define __HAVE_ARCH_PMD_SAME
-static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
-{
- return (((pmd_val(pmd_a) ^ pmd_val(pmd_b)) & ~_PAGE_HPTEFLAGS) == 0);
-}
-
-#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
-extern int pmdp_set_access_flags(struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmdp,
- pmd_t entry, int dirty);
-
-extern unsigned long pmd_hugepage_update(struct mm_struct *mm,
- unsigned long addr,
- pmd_t *pmdp,
- unsigned long clr,
- unsigned long set);
-
-static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
- unsigned long addr, pmd_t *pmdp)
-{
- unsigned long old;
-
- if ((pmd_val(*pmdp) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0)
- return 0;
- old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED, 0);
- return ((old & _PAGE_ACCESSED) != 0);
-}
-
-#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
-extern int pmdp_test_and_clear_young(struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmdp);
-#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
-extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmdp);
-
-#define __HAVE_ARCH_PMDP_GET_AND_CLEAR
-extern pmd_t pmdp_get_and_clear(struct mm_struct *mm,
- unsigned long addr, pmd_t *pmdp);
-
-#define __HAVE_ARCH_PMDP_CLEAR_FLUSH
-extern pmd_t pmdp_clear_flush(struct vm_area_struct *vma, unsigned long address,
- pmd_t *pmdp);
-
-#define __HAVE_ARCH_PMDP_SET_WRPROTECT
-static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
- pmd_t *pmdp)
-{
-
- if ((pmd_val(*pmdp) & _PAGE_RW) == 0)
- return;
-
- pmd_hugepage_update(mm, addr, pmdp, _PAGE_RW, 0);
-}
-
-#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH
-extern void pmdp_splitting_flush(struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmdp);
-
-#define __HAVE_ARCH_PGTABLE_DEPOSIT
-extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
- pgtable_t pgtable);
-#define __HAVE_ARCH_PGTABLE_WITHDRAW
-extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
-
-#define __HAVE_ARCH_PMDP_INVALIDATE
-extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
- pmd_t *pmdp);
-
-#define pmd_move_must_withdraw pmd_move_must_withdraw
-struct spinlock;
-static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
- struct spinlock *old_pmd_ptl)
-{
- /*
- * Archs like ppc64 use pgtable to store per pmd
- * specific information. So when we switch the pmd,
- * we should also withdraw and deposit the pgtable
- */
- return true;
-}
-
-#endif /* __ASSEMBLY__ */
-#endif /* _ASM_POWERPC_PGTABLE_PPC64_H_ */
diff --git a/arch/powerpc/include/asm/pgtable-types.h b/arch/powerpc/include/asm/pgtable-types.h
new file mode 100644
index 000000000000..f3086e39e7d2
--- /dev/null
+++ b/arch/powerpc/include/asm/pgtable-types.h
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_PGTABLE_TYPES_H
+#define _ASM_POWERPC_PGTABLE_TYPES_H
+
+#if defined(__CHECKER__) || !defined(CONFIG_PPC32)
+#define STRICT_MM_TYPECHECKS
+#endif
+
+/* PTE level */
+#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PPC_16K_PAGES)
+typedef struct { pte_basic_t pte, pte1, pte2, pte3; } pte_t;
+#elif defined(STRICT_MM_TYPECHECKS)
+typedef struct { pte_basic_t pte; } pte_t;
+#else
+typedef pte_basic_t pte_t;
+#endif
+
+#if defined(STRICT_MM_TYPECHECKS) || \
+ (defined(CONFIG_PPC_8xx) && defined(CONFIG_PPC_16K_PAGES))
+#define __pte(x) ((pte_t) { (x) })
+static inline pte_basic_t pte_val(pte_t x)
+{
+ return x.pte;
+}
+#else
+#define __pte(x) ((pte_t)(x))
+static inline pte_basic_t pte_val(pte_t x)
+{
+ return x;
+}
+#endif
+
+/* PMD level */
+#ifdef CONFIG_PPC64
+typedef struct { unsigned long pmd; } pmd_t;
+#define __pmd(x) ((pmd_t) { (x) })
+static inline unsigned long pmd_val(pmd_t x)
+{
+ return x.pmd;
+}
+
+/* 64 bit always use 4 level table. */
+typedef struct { unsigned long pud; } pud_t;
+#define __pud(x) ((pud_t) { (x) })
+static inline unsigned long pud_val(pud_t x)
+{
+ return x.pud;
+}
+#endif /* CONFIG_PPC64 */
+
+/* PGD level */
+#if defined(CONFIG_PPC_85xx) && defined(CONFIG_PTE_64BIT)
+typedef struct { unsigned long long pgd; } pgd_t;
+
+static inline unsigned long long pgd_val(pgd_t x)
+{
+ return x.pgd;
+}
+#else
+typedef struct { unsigned long pgd; } pgd_t;
+
+static inline unsigned long pgd_val(pgd_t x)
+{
+ return x.pgd;
+}
+#endif
+#define __pgd(x) ((pgd_t) { (x) })
+
+/* Page protection bits */
+typedef struct { unsigned long pgprot; } pgprot_t;
+#define pgprot_val(x) ((x).pgprot)
+#define __pgprot(x) ((pgprot_t) { (x) })
+
+/*
+ * With hash config 64k pages additionally define a bigger "real PTE" type that
+ * gathers the "second half" part of the PTE for pseudo 64k pages
+ */
+#ifdef CONFIG_PPC_64K_PAGES
+typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
+#else
+typedef struct { pte_t pte; } real_pte_t;
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_64
+#include <asm/cmpxchg.h>
+
+static inline bool pte_xchg(pte_t *ptep, pte_t old, pte_t new)
+{
+ unsigned long *p = (unsigned long *)ptep;
+
+ /* See comment in switch_mm_irqs_off() */
+ return pte_val(old) == __cmpxchg_u64(p, pte_val(old), pte_val(new));
+}
+#endif
+
+#endif /* _ASM_POWERPC_PGTABLE_TYPES_H */
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index d98c1ecc3266..17fd7ff6e535 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -1,297 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_PGTABLE_H
#define _ASM_POWERPC_PGTABLE_H
-#ifdef __KERNEL__
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/mmdebug.h>
+#include <linux/mmzone.h>
#include <asm/processor.h> /* For TASK_SIZE */
#include <asm/mmu.h>
#include <asm/page.h>
+#include <asm/tlbflush.h>
struct mm_struct;
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
-#if defined(CONFIG_PPC64)
-# include <asm/pgtable-ppc64.h>
+#ifdef CONFIG_PPC_BOOK3S
+#include <asm/book3s/pgtable.h>
#else
-# include <asm/pgtable-ppc32.h>
-#endif
+#include <asm/nohash/pgtable.h>
+#endif /* !CONFIG_PPC_BOOK3S */
-/*
- * We save the slot number & secondary bit in the second half of the
- * PTE page. We use the 8 bytes per each pte entry.
- */
-#define PTE_PAGE_HIDX_OFFSET (PTRS_PER_PTE * 8)
-
-#ifndef __ASSEMBLY__
-
-#include <asm/tlbflush.h>
+/* Make modules code happy. We don't set RO yet */
+#define PAGE_KERNEL_EXEC PAGE_KERNEL_X
-/* Generic accessors to PTE bits */
-static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; }
-static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; }
-static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
-static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; }
-static inline int pte_special(pte_t pte) { return pte_val(pte) & _PAGE_SPECIAL; }
-static inline int pte_none(pte_t pte) { return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; }
-static inline pgprot_t pte_pgprot(pte_t pte) { return __pgprot(pte_val(pte) & PAGE_PROT_BITS); }
-
-#ifdef CONFIG_NUMA_BALANCING
-
-static inline int pte_present(pte_t pte)
-{
- return pte_val(pte) & (_PAGE_PRESENT | _PAGE_NUMA);
-}
-
-#define pte_present_nonuma pte_present_nonuma
-static inline int pte_present_nonuma(pte_t pte)
-{
- return pte_val(pte) & (_PAGE_PRESENT);
-}
+/* Advertise special mapping type for AGP */
+#define PAGE_AGP (PAGE_KERNEL_NC)
+#define HAVE_PAGE_AGP
-#define pte_numa pte_numa
-static inline int pte_numa(pte_t pte)
-{
- return (pte_val(pte) &
- (_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA;
-}
+#ifndef __ASSEMBLER__
-#define pte_mknonnuma pte_mknonnuma
-static inline pte_t pte_mknonnuma(pte_t pte)
-{
- pte_val(pte) &= ~_PAGE_NUMA;
- pte_val(pte) |= _PAGE_PRESENT | _PAGE_ACCESSED;
- return pte;
-}
+#define PFN_PTE_SHIFT PTE_RPN_SHIFT
-#define pte_mknuma pte_mknuma
-static inline pte_t pte_mknuma(pte_t pte)
-{
- /*
- * We should not set _PAGE_NUMA on non present ptes. Also clear the
- * present bit so that hash_page will return 1 and we collect this
- * as numa fault.
- */
- if (pte_present(pte)) {
- pte_val(pte) |= _PAGE_NUMA;
- pte_val(pte) &= ~_PAGE_PRESENT;
- } else
- VM_BUG_ON(1);
- return pte;
-}
+void set_ptes(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
+ pte_t pte, unsigned int nr);
+#define set_ptes set_ptes
+#define update_mmu_cache(vma, addr, ptep) \
+ update_mmu_cache_range(NULL, vma, addr, ptep, 1)
-#define ptep_set_numa ptep_set_numa
-static inline void ptep_set_numa(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep)
-{
- if ((pte_val(*ptep) & _PAGE_PRESENT) == 0)
- VM_BUG_ON(1);
+#ifndef MAX_PTRS_PER_PGD
+#define MAX_PTRS_PER_PGD PTRS_PER_PGD
+#endif
- pte_update(mm, addr, ptep, _PAGE_PRESENT, _PAGE_NUMA, 0);
- return;
-}
+/* Keep this as a macro to avoid include dependency mess */
+#define pte_page(x) pfn_to_page(pte_pfn(x))
-#define pmd_numa pmd_numa
-static inline int pmd_numa(pmd_t pmd)
+static inline unsigned long pte_pfn(pte_t pte)
{
- return pte_numa(pmd_pte(pmd));
+ return (pte_val(pte) & PTE_RPN_MASK) >> PTE_RPN_SHIFT;
}
-#define pmdp_set_numa pmdp_set_numa
-static inline void pmdp_set_numa(struct mm_struct *mm, unsigned long addr,
- pmd_t *pmdp)
+/*
+ * Select all bits except the pfn
+ */
+#define pte_pgprot pte_pgprot
+static inline pgprot_t pte_pgprot(pte_t pte)
{
- if ((pmd_val(*pmdp) & _PAGE_PRESENT) == 0)
- VM_BUG_ON(1);
+ unsigned long pte_flags;
- pmd_hugepage_update(mm, addr, pmdp, _PAGE_PRESENT, _PAGE_NUMA);
- return;
+ pte_flags = pte_val(pte) & ~PTE_RPN_MASK;
+ return __pgprot(pte_flags);
}
-#define pmd_mknonnuma pmd_mknonnuma
-static inline pmd_t pmd_mknonnuma(pmd_t pmd)
+static inline pgprot_t pgprot_nx(pgprot_t prot)
{
- return pte_pmd(pte_mknonnuma(pmd_pte(pmd)));
+ return pte_pgprot(pte_exprotect(__pte(pgprot_val(prot))));
}
+#define pgprot_nx pgprot_nx
-#define pmd_mknuma pmd_mknuma
-static inline pmd_t pmd_mknuma(pmd_t pmd)
+#ifndef pmd_page_vaddr
+static inline const void *pmd_page_vaddr(pmd_t pmd)
{
- return pte_pmd(pte_mknuma(pmd_pte(pmd)));
+ return __va(pmd_val(pmd) & ~PMD_MASKED_BITS);
}
+#define pmd_page_vaddr pmd_page_vaddr
+#endif
+/*
+ * ZERO_PAGE is a global shared page that is always zero: used
+ * for zero-mapped memory areas etc..
+ */
+extern unsigned long empty_zero_page[];
+#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
-# else
-
-static inline int pte_present(pte_t pte)
-{
- return pte_val(pte) & _PAGE_PRESENT;
-}
-#endif /* CONFIG_NUMA_BALANCING */
+extern pgd_t swapper_pg_dir[];
-/* Conversion functions: convert a page and protection to a page entry,
- * and a page entry and page directory to the page they refer to.
- *
- * Even if PTEs can be unsigned long long, a PFN is always an unsigned
- * long for now.
- */
-static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) {
- return __pte(((pte_basic_t)(pfn) << PTE_RPN_SHIFT) |
- pgprot_val(pgprot)); }
-static inline unsigned long pte_pfn(pte_t pte) {
- return pte_val(pte) >> PTE_RPN_SHIFT; }
+extern void paging_init(void);
+void poking_init(void);
-/* Keep these as a macros to avoid include dependency mess */
-#define pte_page(x) pfn_to_page(pte_pfn(x))
-#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
-
-/* Generic modifiers for PTE bits */
-static inline pte_t pte_wrprotect(pte_t pte) {
- pte_val(pte) &= ~(_PAGE_RW | _PAGE_HWWRITE); return pte; }
-static inline pte_t pte_mkclean(pte_t pte) {
- pte_val(pte) &= ~(_PAGE_DIRTY | _PAGE_HWWRITE); return pte; }
-static inline pte_t pte_mkold(pte_t pte) {
- pte_val(pte) &= ~_PAGE_ACCESSED; return pte; }
-static inline pte_t pte_mkwrite(pte_t pte) {
- pte_val(pte) |= _PAGE_RW; return pte; }
-static inline pte_t pte_mkdirty(pte_t pte) {
- pte_val(pte) |= _PAGE_DIRTY; return pte; }
-static inline pte_t pte_mkyoung(pte_t pte) {
- pte_val(pte) |= _PAGE_ACCESSED; return pte; }
-static inline pte_t pte_mkspecial(pte_t pte) {
- pte_val(pte) |= _PAGE_SPECIAL; return pte; }
-static inline pte_t pte_mkhuge(pte_t pte) {
- return pte; }
-static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
-{
- pte_val(pte) = (pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot);
- return pte;
-}
+extern unsigned long ioremap_bot;
+extern const pgprot_t protection_map[16];
+/* can we use this in kvm */
+unsigned long vmalloc_to_phys(void *vmalloc_addr);
-/* Insert a PTE, top-level function is out of line. It uses an inline
- * low level function in the respective pgtable-* files
- */
-extern void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
- pte_t pte);
+void pgtable_cache_add(unsigned int shift);
-/* This low level function performs the actual PTE insertion
- * Setting the PTE depends on the MMU type and other factors. It's
- * an horrible mess that I'm not going to try to clean up now but
- * I'm keeping it in one place rather than spread around
- */
-static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep, pte_t pte, int percpu)
-{
-#if defined(CONFIG_PPC_STD_MMU_32) && defined(CONFIG_SMP) && !defined(CONFIG_PTE_64BIT)
- /* First case is 32-bit Hash MMU in SMP mode with 32-bit PTEs. We use the
- * helper pte_update() which does an atomic update. We need to do that
- * because a concurrent invalidation can clear _PAGE_HASHPTE. If it's a
- * per-CPU PTE such as a kmap_atomic, we do a simple update preserving
- * the hash bits instead (ie, same as the non-SMP case)
- */
- if (percpu)
- *ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
- | (pte_val(pte) & ~_PAGE_HASHPTE));
- else
- pte_update(ptep, ~_PAGE_HASHPTE, pte_val(pte));
-
-#elif defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT)
- /* Second case is 32-bit with 64-bit PTE. In this case, we
- * can just store as long as we do the two halves in the right order
- * with a barrier in between. This is possible because we take care,
- * in the hash code, to pre-invalidate if the PTE was already hashed,
- * which synchronizes us with any concurrent invalidation.
- * In the percpu case, we also fallback to the simple update preserving
- * the hash bits
- */
- if (percpu) {
- *ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
- | (pte_val(pte) & ~_PAGE_HASHPTE));
- return;
- }
-#if _PAGE_HASHPTE != 0
- if (pte_val(*ptep) & _PAGE_HASHPTE)
- flush_hash_entry(mm, ptep, addr);
+#ifdef CONFIG_PPC32
+void __init *early_alloc_pgtable(unsigned long size);
#endif
- __asm__ __volatile__("\
- stw%U0%X0 %2,%0\n\
- eieio\n\
- stw%U0%X0 %L2,%1"
- : "=m" (*ptep), "=m" (*((unsigned char *)ptep+4))
- : "r" (pte) : "memory");
-
-#elif defined(CONFIG_PPC_STD_MMU_32)
- /* Third case is 32-bit hash table in UP mode, we need to preserve
- * the _PAGE_HASHPTE bit since we may not have invalidated the previous
- * translation in the hash yet (done in a subsequent flush_tlb_xxx())
- * and see we need to keep track that this PTE needs invalidating
- */
- *ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
- | (pte_val(pte) & ~_PAGE_HASHPTE));
+pte_t *early_pte_alloc_kernel(pmd_t *pmdp, unsigned long va);
+#if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_PPC32)
+void mark_initmem_nx(void);
#else
- /* Anything else just stores the PTE normally. That covers all 64-bit
- * cases, and 32-bit non-hash with 32-bit PTEs.
- */
- *ptep = pte;
+static inline void mark_initmem_nx(void) { }
#endif
-}
-
#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
-extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
- pte_t *ptep, pte_t entry, int dirty);
-
-/*
- * Macro to mark a page protection value as "uncacheable".
- */
-
-#define _PAGE_CACHE_CTL (_PAGE_COHERENT | _PAGE_GUARDED | _PAGE_NO_CACHE | \
- _PAGE_WRITETHRU)
-
-#define pgprot_noncached(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
- _PAGE_NO_CACHE | _PAGE_GUARDED))
-
-#define pgprot_noncached_wc(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
- _PAGE_NO_CACHE))
+int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
+ pte_t *ptep, pte_t entry, int dirty);
-#define pgprot_cached(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
- _PAGE_COHERENT))
-
-#define pgprot_cached_wthru(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
- _PAGE_COHERENT | _PAGE_WRITETHRU))
-
-#define pgprot_cached_noncoherent(prot) \
- (__pgprot(pgprot_val(prot) & ~_PAGE_CACHE_CTL))
-
-#define pgprot_writecombine pgprot_noncached_wc
+pgprot_t __phys_mem_access_prot(unsigned long pfn, unsigned long size,
+ pgprot_t vma_prot);
struct file;
-extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
- unsigned long size, pgprot_t vma_prot);
+static inline pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
+ unsigned long size, pgprot_t vma_prot)
+{
+ return __phys_mem_access_prot(pfn, size, vma_prot);
+}
#define __HAVE_PHYS_MEM_ACCESS_PROT
-/*
- * ZERO_PAGE is a global shared page that is always zero: used
- * for zero-mapped memory areas etc..
- */
-extern unsigned long empty_zero_page[];
-#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
-
-extern pgd_t swapper_pg_dir[];
-
-extern void paging_init(void);
-
-/*
- * kern_addr_valid is intended to indicate whether an address is a valid
- * kernel address. Most 32-bit archs define it as always true (like this)
- * but most 64-bit archs actually perform a test. What should we do here?
- */
-#define kern_addr_valid(addr) (1)
-
-#include <asm-generic/pgtable.h>
-
+void __update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep);
/*
* This gets called at the end of handling a page fault, when
@@ -302,41 +131,77 @@ extern void paging_init(void);
* corresponding HPTE into the hash table ahead of time, instead of
* waiting for the inevitable extra hash-table miss exception.
*/
-extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *);
+static inline void update_mmu_cache_range(struct vm_fault *vmf,
+ struct vm_area_struct *vma, unsigned long address,
+ pte_t *ptep, unsigned int nr)
+{
+ if ((mmu_has_feature(MMU_FTR_HPTE_TABLE) && !radix_enabled()) ||
+ (IS_ENABLED(CONFIG_PPC_E500) && IS_ENABLED(CONFIG_HUGETLB_PAGE)))
+ __update_mmu_cache(vma, address, ptep);
+}
-extern int gup_hugepd(hugepd_t *hugepd, unsigned pdshift, unsigned long addr,
- unsigned long end, int write, struct page **pages, int *nr);
+/*
+ * When used, PTE_FRAG_NR is defined in subarch pgtable.h
+ * so we are sure it is included when arriving here.
+ */
+#ifdef PTE_FRAG_NR
+static inline void *pte_frag_get(mm_context_t *ctx)
+{
+ return ctx->pte_frag;
+}
-extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
- unsigned long end, int write, struct page **pages, int *nr);
-#ifndef CONFIG_TRANSPARENT_HUGEPAGE
-#define pmd_large(pmd) 0
-#define has_transparent_hugepage() 0
+static inline void pte_frag_set(mm_context_t *ctx, void *p)
+{
+ ctx->pte_frag = p;
+}
+#else
+#define PTE_FRAG_NR 1
+#define PTE_FRAG_SIZE_SHIFT PAGE_SHIFT
+#define PTE_FRAG_SIZE (1UL << PTE_FRAG_SIZE_SHIFT)
+
+static inline void *pte_frag_get(mm_context_t *ctx)
+{
+ return NULL;
+}
+
+static inline void pte_frag_set(mm_context_t *ctx, void *p)
+{
+}
#endif
-pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
- unsigned *shift);
-static inline pte_t *lookup_linux_ptep(pgd_t *pgdir, unsigned long hva,
- unsigned long *pte_sizep)
+#define pmd_pgtable pmd_pgtable
+static inline pgtable_t pmd_pgtable(pmd_t pmd)
+{
+ return (pgtable_t)pmd_page_vaddr(pmd);
+}
+
+#ifdef CONFIG_PPC64
+int __meminit vmemmap_populated(unsigned long vmemmap_addr, int vmemmap_map_size);
+bool altmap_cross_boundary(struct vmem_altmap *altmap, unsigned long start,
+ unsigned long page_size);
+/*
+ * mm/memory_hotplug.c:mhp_supports_memmap_on_memory goes into details
+ * some of the restrictions. We don't check for PMD_SIZE because our
+ * vmemmap allocation code can fallback correctly. The pageblock
+ * alignment requirement is met using altmap->reserve blocks.
+ */
+#define arch_supports_memmap_on_memory arch_supports_memmap_on_memory
+static inline bool arch_supports_memmap_on_memory(unsigned long vmemmap_size)
{
- pte_t *ptep;
- unsigned long ps = *pte_sizep;
- unsigned int shift;
-
- ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift);
- if (!ptep)
- return NULL;
- if (shift)
- *pte_sizep = 1ul << shift;
- else
- *pte_sizep = PAGE_SIZE;
-
- if (ps > *pte_sizep)
- return NULL;
-
- return ptep;
+ if (!radix_enabled())
+ return false;
+ /*
+ * With 4K page size and 2M PMD_SIZE, we can align
+ * things better with memory block size value
+ * starting from 128MB. Hence align things with PMD_SIZE.
+ */
+ if (IS_ENABLED(CONFIG_PPC_4K_PAGES))
+ return IS_ALIGNED(vmemmap_size, PMD_SIZE);
+ return true;
}
-#endif /* __ASSEMBLY__ */
-#endif /* __KERNEL__ */
+#endif /* CONFIG_PPC64 */
+
+#endif /* __ASSEMBLER__ */
+
#endif /* _ASM_POWERPC_PGTABLE_H */
diff --git a/arch/powerpc/include/asm/pkeys.h b/arch/powerpc/include/asm/pkeys.h
new file mode 100644
index 000000000000..28e752138996
--- /dev/null
+++ b/arch/powerpc/include/asm/pkeys.h
@@ -0,0 +1,172 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * PowerPC Memory Protection Keys management
+ *
+ * Copyright 2017, Ram Pai, IBM Corporation.
+ */
+
+#ifndef _ASM_POWERPC_KEYS_H
+#define _ASM_POWERPC_KEYS_H
+
+#include <linux/jump_label.h>
+#include <asm/firmware.h>
+
+extern int num_pkey;
+extern u32 reserved_allocation_mask; /* bits set for reserved keys */
+
+#define ARCH_VM_PKEY_FLAGS (VM_PKEY_BIT0 | VM_PKEY_BIT1 | VM_PKEY_BIT2 | \
+ VM_PKEY_BIT3 | VM_PKEY_BIT4)
+
+/* Override any generic PKEY permission defines */
+#define PKEY_DISABLE_EXECUTE 0x4
+#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS | \
+ PKEY_DISABLE_WRITE | \
+ PKEY_DISABLE_EXECUTE)
+
+#ifdef CONFIG_PPC_BOOK3S_64
+#include <asm/book3s/64/pkeys.h>
+#else
+#error "Not supported"
+#endif
+
+
+static inline vm_flags_t pkey_to_vmflag_bits(u16 pkey)
+{
+ return (((vm_flags_t)pkey << VM_PKEY_SHIFT) & ARCH_VM_PKEY_FLAGS);
+}
+
+static inline int vma_pkey(struct vm_area_struct *vma)
+{
+ if (!mmu_has_feature(MMU_FTR_PKEY))
+ return 0;
+ return (vma->vm_flags & ARCH_VM_PKEY_FLAGS) >> VM_PKEY_SHIFT;
+}
+
+static inline int arch_max_pkey(void)
+{
+ return num_pkey;
+}
+
+#define pkey_alloc_mask(pkey) (0x1 << pkey)
+
+#define mm_pkey_allocation_map(mm) (mm->context.pkey_allocation_map)
+
+#define __mm_pkey_allocated(mm, pkey) { \
+ mm_pkey_allocation_map(mm) |= pkey_alloc_mask(pkey); \
+}
+
+#define __mm_pkey_free(mm, pkey) { \
+ mm_pkey_allocation_map(mm) &= ~pkey_alloc_mask(pkey); \
+}
+
+#define __mm_pkey_is_allocated(mm, pkey) \
+ (mm_pkey_allocation_map(mm) & pkey_alloc_mask(pkey))
+
+#define __mm_pkey_is_reserved(pkey) (reserved_allocation_mask & \
+ pkey_alloc_mask(pkey))
+
+static inline bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey)
+{
+ if (pkey < 0 || pkey >= arch_max_pkey())
+ return false;
+
+ /* Reserved keys are never allocated. */
+ if (__mm_pkey_is_reserved(pkey))
+ return false;
+
+ return __mm_pkey_is_allocated(mm, pkey);
+}
+
+/*
+ * Returns a positive, 5-bit key on success, or -1 on failure.
+ * Relies on the mmap_lock to protect against concurrency in mm_pkey_alloc() and
+ * mm_pkey_free().
+ */
+static inline int mm_pkey_alloc(struct mm_struct *mm)
+{
+ /*
+ * Note: this is the one and only place we make sure that the pkey is
+ * valid as far as the hardware is concerned. The rest of the kernel
+ * trusts that only good, valid pkeys come out of here.
+ */
+ u32 all_pkeys_mask = (u32)(~(0x0));
+ int ret;
+
+ if (!mmu_has_feature(MMU_FTR_PKEY))
+ return -1;
+ /*
+ * Are we out of pkeys? We must handle this specially because ffz()
+ * behavior is undefined if there are no zeros.
+ */
+ if (mm_pkey_allocation_map(mm) == all_pkeys_mask)
+ return -1;
+
+ ret = ffz((u32)mm_pkey_allocation_map(mm));
+ __mm_pkey_allocated(mm, ret);
+
+ return ret;
+}
+
+static inline int mm_pkey_free(struct mm_struct *mm, int pkey)
+{
+ if (!mmu_has_feature(MMU_FTR_PKEY))
+ return -1;
+
+ if (!mm_pkey_is_allocated(mm, pkey))
+ return -EINVAL;
+
+ __mm_pkey_free(mm, pkey);
+
+ return 0;
+}
+
+/*
+ * Try to dedicate one of the protection keys to be used as an
+ * execute-only protection key.
+ */
+extern int execute_only_pkey(struct mm_struct *mm);
+extern int __arch_override_mprotect_pkey(struct vm_area_struct *vma,
+ int prot, int pkey);
+static inline int arch_override_mprotect_pkey(struct vm_area_struct *vma,
+ int prot, int pkey)
+{
+ if (!mmu_has_feature(MMU_FTR_PKEY))
+ return 0;
+
+ /*
+ * Is this an mprotect_pkey() call? If so, never override the value that
+ * came from the user.
+ */
+ if (pkey != -1)
+ return pkey;
+
+ return __arch_override_mprotect_pkey(vma, prot, pkey);
+}
+
+extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
+ unsigned long init_val);
+static inline int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
+ unsigned long init_val)
+{
+ if (!mmu_has_feature(MMU_FTR_PKEY))
+ return -EINVAL;
+
+ /*
+ * userspace should not change pkey-0 permissions.
+ * pkey-0 is associated with every page in the kernel.
+ * If userspace denies any permission on pkey-0, the
+ * kernel cannot operate.
+ */
+ if (pkey == 0)
+ return init_val ? -EINVAL : 0;
+
+ return __arch_set_user_pkey_access(tsk, pkey, init_val);
+}
+
+static inline bool arch_pkeys_enabled(void)
+{
+ return mmu_has_feature(MMU_FTR_PKEY);
+}
+
+extern void pkey_mm_init(struct mm_struct *mm);
+#endif /*_ASM_POWERPC_KEYS_H */
diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h
index 12c32c5f533d..f2b6cc4341bb 100644
--- a/arch/powerpc/include/asm/plpar_wrappers.h
+++ b/arch/powerpc/include/asm/plpar_wrappers.h
@@ -1,58 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_PLPAR_WRAPPERS_H
#define _ASM_POWERPC_PLPAR_WRAPPERS_H
+#ifdef CONFIG_PPC_PSERIES
+
#include <linux/string.h>
#include <linux/irqflags.h>
+#include <linux/delay.h>
#include <asm/hvcall.h>
#include <asm/paca.h>
+#include <asm/lppaca.h>
#include <asm/page.h>
-/* Get state of physical CPU from query_cpu_stopped */
-int smp_query_cpu_stopped(unsigned int pcpu);
-#define QCSS_STOPPED 0
-#define QCSS_STOPPING 1
-#define QCSS_NOT_STOPPED 2
-#define QCSS_HARDWARE_ERROR -1
-#define QCSS_HARDWARE_BUSY -2
-
static inline long poll_pending(void)
{
return plpar_hcall_norets(H_POLL_PENDING);
}
-static inline u8 get_cede_latency_hint(void)
-{
- return get_lppaca()->cede_latency_hint;
-}
-
-static inline void set_cede_latency_hint(u8 latency_hint)
-{
- get_lppaca()->cede_latency_hint = latency_hint;
-}
-
static inline long cede_processor(void)
{
- return plpar_hcall_norets(H_CEDE);
-}
-
-static inline long extended_cede_processor(unsigned long latency_hint)
-{
- long rc;
- u8 old_latency_hint = get_cede_latency_hint();
-
- set_cede_latency_hint(latency_hint);
-
- rc = cede_processor();
-#ifdef CONFIG_TRACE_IRQFLAGS
- /* Ensure that H_CEDE returns with IRQs on */
- if (WARN_ON(!(mfmsr() & MSR_EE)))
- __hard_irq_enable();
-#endif
-
- set_cede_latency_hint(old_latency_hint);
-
- return rc;
+ /*
+ * We cannot call tracepoints inside RCU idle regions which
+ * means we must not trace H_CEDE.
+ */
+ return plpar_hcall_norets_notrace(H_CEDE);
}
static inline long vpa_call(unsigned long flags, unsigned long cpu,
@@ -93,36 +65,33 @@ static inline long register_dtl(unsigned long cpu, unsigned long vpa)
return vpa_call(H_VPA_REG_DTL, cpu, vpa);
}
-static inline long plpar_page_set_loaned(unsigned long vpa)
+/*
+ * Invokes H_HTM hcall with parameters passed from htm_hcall_wrapper.
+ * flags: Set to hardwareTarget.
+ * target: Specifies target using node index, nodal chip index and core index.
+ * operation : action to perform ie configure, start, stop, deconfigure, trace
+ * based on the HTM type.
+ * param1, param2, param3: parameters for each action.
+ */
+static inline long htm_call(unsigned long flags, unsigned long target,
+ unsigned long operation, unsigned long param1,
+ unsigned long param2, unsigned long param3)
{
- unsigned long cmo_page_sz = cmo_get_page_size();
- long rc = 0;
- int i;
-
- for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz)
- rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, vpa + i, 0);
-
- for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz)
- plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE,
- vpa + i - cmo_page_sz, 0);
-
- return rc;
+ return plpar_hcall_norets(H_HTM, flags, target, operation,
+ param1, param2, param3);
}
-static inline long plpar_page_set_active(unsigned long vpa)
+static inline long htm_hcall_wrapper(unsigned long flags, unsigned long nodeindex,
+ unsigned long nodalchipindex, unsigned long coreindexonchip,
+ unsigned long type, unsigned long htm_op, unsigned long param1, unsigned long param2,
+ unsigned long param3)
{
- unsigned long cmo_page_sz = cmo_get_page_size();
- long rc = 0;
- int i;
-
- for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz)
- rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, vpa + i, 0);
-
- for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz)
- plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED,
- vpa + i - cmo_page_sz, 0);
-
- return rc;
+ return htm_call(H_HTM_FLAGS_HARDWARE_TARGET | flags,
+ H_HTM_TARGET_NODE_INDEX(nodeindex) |
+ H_HTM_TARGET_NODAL_CHIP_INDEX(nodalchipindex) |
+ H_HTM_TARGET_CORE_INDEX_ON_CHIP(coreindexonchip),
+ H_HTM_OP(htm_op) | H_HTM_TYPE(type),
+ param1, param2, param3);
}
extern void vpa_init(int cpu);
@@ -202,6 +171,23 @@ static inline long plpar_pte_read_raw(unsigned long flags, unsigned long ptex,
}
/*
+ * ptes must be 8*sizeof(unsigned long)
+ */
+static inline long plpar_pte_read_4(unsigned long flags, unsigned long ptex,
+ unsigned long *ptes)
+
+{
+ long rc;
+ unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+
+ rc = plpar_hcall9(H_READ, retbuf, flags | H_READ_4, ptex);
+
+ memcpy(ptes, retbuf, 8*sizeof(unsigned long));
+
+ return rc;
+}
+
+/*
* plpar_pte_read_4_raw can be called in real mode.
* ptes must be 8*sizeof(unsigned long)
*/
@@ -225,6 +211,18 @@ static inline long plpar_pte_protect(unsigned long flags, unsigned long ptex,
return plpar_hcall_norets(H_PROTECT, flags, ptex, avpn);
}
+static inline long plpar_resize_hpt_prepare(unsigned long flags,
+ unsigned long shift)
+{
+ return plpar_hcall_norets(H_RESIZE_HPT_PREPARE, flags, shift);
+}
+
+static inline long plpar_resize_hpt_commit(unsigned long flags,
+ unsigned long shift)
+{
+ return plpar_hcall_norets(H_RESIZE_HPT_COMMIT, flags, shift);
+}
+
static inline long plpar_tce_get(unsigned long liobn, unsigned long ioba,
unsigned long *tce_ret)
{
@@ -273,7 +271,7 @@ static inline long plpar_set_mode(unsigned long mflags, unsigned long resource,
static inline long enable_reloc_on_exceptions(void)
{
/* mflags = 3: Exceptions at 0xC000000000004000 */
- return plpar_set_mode(3, 3, 0, 0);
+ return plpar_set_mode(3, H_SET_MODE_RESOURCE_ADDR_TRANS_MODE, 0, 0);
}
/*
@@ -284,7 +282,7 @@ static inline long enable_reloc_on_exceptions(void)
* returns H_SUCCESS.
*/
static inline long disable_reloc_on_exceptions(void) {
- return plpar_set_mode(0, 3, 0, 0);
+ return plpar_set_mode(0, H_SET_MODE_RESOURCE_ADDR_TRANS_MODE, 0, 0);
}
/*
@@ -297,7 +295,7 @@ static inline long disable_reloc_on_exceptions(void) {
static inline long enable_big_endian_exceptions(void)
{
/* mflags = 0: big endian exceptions */
- return plpar_set_mode(0, 4, 0, 0);
+ return plpar_set_mode(0, H_SET_MODE_RESOURCE_LE, 0, 0);
}
/*
@@ -310,17 +308,369 @@ static inline long enable_big_endian_exceptions(void)
static inline long enable_little_endian_exceptions(void)
{
/* mflags = 1: little endian exceptions */
- return plpar_set_mode(1, 4, 0, 0);
+ return plpar_set_mode(1, H_SET_MODE_RESOURCE_LE, 0, 0);
+}
+
+static inline long plpar_set_ciabr(unsigned long ciabr)
+{
+ return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_CIABR, ciabr, 0);
+}
+
+static inline long plpar_set_watchpoint0(unsigned long dawr0, unsigned long dawrx0)
+{
+ return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_DAWR0, dawr0, dawrx0);
+}
+
+static inline long plpar_set_watchpoint1(unsigned long dawr1, unsigned long dawrx1)
+{
+ return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_DAWR1, dawr1, dawrx1);
+}
+
+static inline long plpar_signal_sys_reset(long cpu)
+{
+ return plpar_hcall_norets(H_SIGNAL_SYS_RESET, cpu);
+}
+
+static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p)
+{
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+ long rc;
+
+ rc = plpar_hcall(H_GET_CPU_CHARACTERISTICS, retbuf);
+ if (rc == H_SUCCESS) {
+ p->character = retbuf[0];
+ p->behaviour = retbuf[1];
+ }
+
+ return rc;
+}
+
+static inline long plpar_guest_create(unsigned long flags, unsigned long *guest_id)
+{
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+ unsigned long token;
+ long rc;
+
+ token = -1UL;
+ do {
+ rc = plpar_hcall(H_GUEST_CREATE, retbuf, flags, token);
+ if (rc == H_SUCCESS)
+ *guest_id = retbuf[0];
+
+ if (rc == H_BUSY) {
+ token = retbuf[0];
+ cond_resched();
+ }
+
+ if (H_IS_LONG_BUSY(rc)) {
+ token = retbuf[0];
+ msleep(get_longbusy_msecs(rc));
+ rc = H_BUSY;
+ }
+
+ } while (rc == H_BUSY);
+
+ return rc;
+}
+
+static inline long plpar_guest_create_vcpu(unsigned long flags,
+ unsigned long guest_id,
+ unsigned long vcpu_id)
+{
+ long rc;
+
+ do {
+ rc = plpar_hcall_norets(H_GUEST_CREATE_VCPU, 0, guest_id, vcpu_id);
+
+ if (rc == H_BUSY)
+ cond_resched();
+
+ if (H_IS_LONG_BUSY(rc)) {
+ msleep(get_longbusy_msecs(rc));
+ rc = H_BUSY;
+ }
+
+ } while (rc == H_BUSY);
+
+ return rc;
+}
+
+static inline long plpar_guest_set_state(unsigned long flags,
+ unsigned long guest_id,
+ unsigned long vcpu_id,
+ unsigned long data_buffer,
+ unsigned long data_size,
+ unsigned long *failed_index)
+{
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+ long rc;
+
+ while (true) {
+ rc = plpar_hcall(H_GUEST_SET_STATE, retbuf, flags, guest_id,
+ vcpu_id, data_buffer, data_size);
+
+ if (rc == H_BUSY) {
+ cpu_relax();
+ continue;
+ }
+
+ if (H_IS_LONG_BUSY(rc)) {
+ mdelay(get_longbusy_msecs(rc));
+ continue;
+ }
+
+ if (rc == H_INVALID_ELEMENT_ID)
+ *failed_index = retbuf[0];
+ else if (rc == H_INVALID_ELEMENT_SIZE)
+ *failed_index = retbuf[0];
+ else if (rc == H_INVALID_ELEMENT_VALUE)
+ *failed_index = retbuf[0];
+
+ break;
+ }
+
+ return rc;
+}
+
+static inline long plpar_guest_get_state(unsigned long flags,
+ unsigned long guest_id,
+ unsigned long vcpu_id,
+ unsigned long data_buffer,
+ unsigned long data_size,
+ unsigned long *failed_index)
+{
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+ long rc;
+
+ while (true) {
+ rc = plpar_hcall(H_GUEST_GET_STATE, retbuf, flags, guest_id,
+ vcpu_id, data_buffer, data_size);
+
+ if (rc == H_BUSY) {
+ cpu_relax();
+ continue;
+ }
+
+ if (H_IS_LONG_BUSY(rc)) {
+ mdelay(get_longbusy_msecs(rc));
+ continue;
+ }
+
+ if (rc == H_INVALID_ELEMENT_ID)
+ *failed_index = retbuf[0];
+ else if (rc == H_INVALID_ELEMENT_SIZE)
+ *failed_index = retbuf[0];
+ else if (rc == H_INVALID_ELEMENT_VALUE)
+ *failed_index = retbuf[0];
+
+ break;
+ }
+
+ return rc;
+}
+
+static inline long plpar_guest_run_vcpu(unsigned long flags, unsigned long guest_id,
+ unsigned long vcpu_id, int *trap,
+ unsigned long *failed_index)
+{
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+ long rc;
+
+ rc = plpar_hcall(H_GUEST_RUN_VCPU, retbuf, flags, guest_id, vcpu_id);
+ if (rc == H_SUCCESS)
+ *trap = retbuf[0];
+ else if (rc == H_INVALID_ELEMENT_ID)
+ *failed_index = retbuf[0];
+ else if (rc == H_INVALID_ELEMENT_SIZE)
+ *failed_index = retbuf[0];
+ else if (rc == H_INVALID_ELEMENT_VALUE)
+ *failed_index = retbuf[0];
+
+ return rc;
}
-static inline long plapr_set_ciabr(unsigned long ciabr)
+static inline long plpar_guest_delete(unsigned long flags, u64 guest_id)
{
- return plpar_set_mode(0, 1, ciabr, 0);
+ long rc;
+
+ do {
+ rc = plpar_hcall_norets(H_GUEST_DELETE, flags, guest_id);
+ if (rc == H_BUSY)
+ cond_resched();
+
+ if (H_IS_LONG_BUSY(rc)) {
+ msleep(get_longbusy_msecs(rc));
+ rc = H_BUSY;
+ }
+
+ } while (rc == H_BUSY);
+
+ return rc;
}
-static inline long plapr_set_watchpoint0(unsigned long dawr0, unsigned long dawrx0)
+static inline long plpar_guest_set_capabilities(unsigned long flags,
+ unsigned long capabilities)
{
- return plpar_set_mode(0, 2, dawr0, dawrx0);
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+ long rc;
+
+ do {
+ rc = plpar_hcall(H_GUEST_SET_CAPABILITIES, retbuf, flags, capabilities);
+ if (rc == H_BUSY)
+ cond_resched();
+
+ if (H_IS_LONG_BUSY(rc)) {
+ msleep(get_longbusy_msecs(rc));
+ rc = H_BUSY;
+ }
+ } while (rc == H_BUSY);
+
+ return rc;
}
+static inline long plpar_guest_get_capabilities(unsigned long flags,
+ unsigned long *capabilities)
+{
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+ long rc;
+
+ do {
+ rc = plpar_hcall(H_GUEST_GET_CAPABILITIES, retbuf, flags);
+ if (rc == H_BUSY)
+ cond_resched();
+
+ if (H_IS_LONG_BUSY(rc)) {
+ msleep(get_longbusy_msecs(rc));
+ rc = H_BUSY;
+ }
+ } while (rc == H_BUSY);
+
+ if (rc == H_SUCCESS)
+ *capabilities = retbuf[0];
+
+ return rc;
+}
+
+/*
+ * Wrapper to H_RPT_INVALIDATE hcall that handles return values appropriately
+ *
+ * - Returns H_SUCCESS on success
+ * - For H_BUSY return value, we retry the hcall.
+ * - For any other hcall failures, attempt a full flush once before
+ * resorting to BUG().
+ *
+ * Note: This hcall is expected to fail only very rarely. The correct
+ * error recovery of killing the process/guest will be eventually
+ * needed.
+ */
+static inline long pseries_rpt_invalidate(u64 pid, u64 target, u64 type,
+ u64 page_sizes, u64 start, u64 end)
+{
+ long rc;
+ unsigned long all;
+
+ while (true) {
+ rc = plpar_hcall_norets(H_RPT_INVALIDATE, pid, target, type,
+ page_sizes, start, end);
+ if (rc == H_BUSY) {
+ cpu_relax();
+ continue;
+ } else if (rc == H_SUCCESS)
+ return rc;
+
+ /* Flush request failed, try with a full flush once */
+ if (type & H_RPTI_TYPE_NESTED)
+ all = H_RPTI_TYPE_NESTED | H_RPTI_TYPE_NESTED_ALL;
+ else
+ all = H_RPTI_TYPE_ALL;
+retry:
+ rc = plpar_hcall_norets(H_RPT_INVALIDATE, pid, target,
+ all, page_sizes, 0, -1UL);
+ if (rc == H_BUSY) {
+ cpu_relax();
+ goto retry;
+ } else if (rc == H_SUCCESS)
+ return rc;
+
+ BUG();
+ }
+}
+
+#else /* !CONFIG_PPC_PSERIES */
+
+static inline long plpar_set_ciabr(unsigned long ciabr)
+{
+ return 0;
+}
+
+static inline long plpar_pte_read_4(unsigned long flags, unsigned long ptex,
+ unsigned long *ptes)
+{
+ return 0;
+}
+
+static inline long pseries_rpt_invalidate(u64 pid, u64 target, u64 type,
+ u64 page_sizes, u64 start, u64 end)
+{
+ return 0;
+}
+
+static inline long plpar_guest_create_vcpu(unsigned long flags,
+ unsigned long guest_id,
+ unsigned long vcpu_id)
+{
+ return 0;
+}
+
+static inline long plpar_guest_get_state(unsigned long flags,
+ unsigned long guest_id,
+ unsigned long vcpu_id,
+ unsigned long data_buffer,
+ unsigned long data_size,
+ unsigned long *failed_index)
+{
+ return 0;
+}
+
+static inline long plpar_guest_set_state(unsigned long flags,
+ unsigned long guest_id,
+ unsigned long vcpu_id,
+ unsigned long data_buffer,
+ unsigned long data_size,
+ unsigned long *failed_index)
+{
+ return 0;
+}
+
+static inline long plpar_guest_run_vcpu(unsigned long flags, unsigned long guest_id,
+ unsigned long vcpu_id, int *trap,
+ unsigned long *failed_index)
+{
+ return 0;
+}
+
+static inline long plpar_guest_create(unsigned long flags, unsigned long *guest_id)
+{
+ return 0;
+}
+
+static inline long plpar_guest_delete(unsigned long flags, u64 guest_id)
+{
+ return 0;
+}
+
+static inline long plpar_guest_get_capabilities(unsigned long flags,
+ unsigned long *capabilities)
+{
+ return 0;
+}
+
+static inline long plpar_guest_set_capabilities(unsigned long flags,
+ unsigned long capabilities)
+{
+ return 0;
+}
+
+#endif /* CONFIG_PPC_PSERIES */
+
#endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */
diff --git a/arch/powerpc/include/asm/plpks.h b/arch/powerpc/include/asm/plpks.h
new file mode 100644
index 000000000000..7a84069759b0
--- /dev/null
+++ b/arch/powerpc/include/asm/plpks.h
@@ -0,0 +1,194 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022 IBM Corporation
+ * Author: Nayna Jain <nayna@linux.ibm.com>
+ *
+ * Platform keystore for pseries LPAR(PLPKS).
+ */
+
+#ifndef _ASM_POWERPC_PLPKS_H
+#define _ASM_POWERPC_PLPKS_H
+
+#ifdef CONFIG_PSERIES_PLPKS
+
+#include <linux/types.h>
+#include <linux/list.h>
+
+// Object policy flags from supported_policies
+#define PLPKS_OSSECBOOTAUDIT PPC_BIT32(1) // OS secure boot must be audit/enforce
+#define PLPKS_OSSECBOOTENFORCE PPC_BIT32(2) // OS secure boot must be enforce
+#define PLPKS_PWSET PPC_BIT32(3) // No access without password set
+#define PLPKS_WORLDREADABLE PPC_BIT32(4) // Readable without authentication
+#define PLPKS_IMMUTABLE PPC_BIT32(5) // Once written, object cannot be removed
+#define PLPKS_TRANSIENT PPC_BIT32(6) // Object does not persist through reboot
+#define PLPKS_SIGNEDUPDATE PPC_BIT32(7) // Object can only be modified by signed updates
+#define PLPKS_HVPROVISIONED PPC_BIT32(28) // Hypervisor has provisioned this object
+
+// Signature algorithm flags from signed_update_algorithms
+#define PLPKS_ALG_RSA2048 PPC_BIT(0)
+#define PLPKS_ALG_RSA4096 PPC_BIT(1)
+
+// Object label OS metadata flags
+#define PLPKS_VAR_LINUX 0x02
+#define PLPKS_VAR_COMMON 0x04
+
+// Flags for which consumer owns an object is owned by
+#define PLPKS_FW_OWNER 0x1
+#define PLPKS_BOOTLOADER_OWNER 0x2
+#define PLPKS_OS_OWNER 0x3
+
+// Flags for label metadata fields
+#define PLPKS_LABEL_VERSION 0
+#define PLPKS_MAX_LABEL_ATTR_SIZE 16
+#define PLPKS_MAX_NAME_SIZE 239
+#define PLPKS_MAX_DATA_SIZE 4000
+
+// Timeouts for PLPKS operations
+#define PLPKS_MAX_TIMEOUT (5 * USEC_PER_SEC)
+#define PLPKS_FLUSH_SLEEP 10000 // usec
+
+struct plpks_var {
+ char *component;
+ u8 *name;
+ u8 *data;
+ u32 policy;
+ u16 namelen;
+ u16 datalen;
+ u8 os;
+};
+
+struct plpks_var_name {
+ u8 *name;
+ u16 namelen;
+};
+
+struct plpks_var_name_list {
+ u32 varcount;
+ struct plpks_var_name varlist[];
+};
+
+/**
+ * Updates the authenticated variable. It expects NULL as the component.
+ */
+int plpks_signed_update_var(struct plpks_var *var, u64 flags);
+
+/**
+ * Writes the specified var and its data to PKS.
+ * Any caller of PKS driver should present a valid component type for
+ * their variable.
+ */
+int plpks_write_var(struct plpks_var var);
+
+/**
+ * Removes the specified var and its data from PKS.
+ */
+int plpks_remove_var(char *component, u8 varos,
+ struct plpks_var_name vname);
+
+/**
+ * Returns the data for the specified os variable.
+ *
+ * Caller must allocate a buffer in var->data with length in var->datalen.
+ * If no buffer is provided, var->datalen will be populated with the object's
+ * size.
+ */
+int plpks_read_os_var(struct plpks_var *var);
+
+/**
+ * Returns the data for the specified firmware variable.
+ *
+ * Caller must allocate a buffer in var->data with length in var->datalen.
+ * If no buffer is provided, var->datalen will be populated with the object's
+ * size.
+ */
+int plpks_read_fw_var(struct plpks_var *var);
+
+/**
+ * Returns the data for the specified bootloader variable.
+ *
+ * Caller must allocate a buffer in var->data with length in var->datalen.
+ * If no buffer is provided, var->datalen will be populated with the object's
+ * size.
+ */
+int plpks_read_bootloader_var(struct plpks_var *var);
+
+/**
+ * Returns if PKS is available on this LPAR.
+ */
+bool plpks_is_available(void);
+
+/**
+ * Returns version of the Platform KeyStore.
+ */
+u8 plpks_get_version(void);
+
+/**
+ * Returns hypervisor storage overhead per object, not including the size of
+ * the object or label. Only valid for config version >= 2
+ */
+u16 plpks_get_objoverhead(void);
+
+/**
+ * Returns maximum password size. Must be >= 32 bytes
+ */
+u16 plpks_get_maxpwsize(void);
+
+/**
+ * Returns maximum object size supported by Platform KeyStore.
+ */
+u16 plpks_get_maxobjectsize(void);
+
+/**
+ * Returns maximum object label size supported by Platform KeyStore.
+ */
+u16 plpks_get_maxobjectlabelsize(void);
+
+/**
+ * Returns total size of the configured Platform KeyStore.
+ */
+u32 plpks_get_totalsize(void);
+
+/**
+ * Returns used space from the total size of the Platform KeyStore.
+ */
+u32 plpks_get_usedspace(void);
+
+/**
+ * Returns bitmask of policies supported by the hypervisor.
+ */
+u32 plpks_get_supportedpolicies(void);
+
+/**
+ * Returns maximum byte size of a single object supported by the hypervisor.
+ * Only valid for config version >= 3
+ */
+u32 plpks_get_maxlargeobjectsize(void);
+
+/**
+ * Returns bitmask of signature algorithms supported for signed updates.
+ * Only valid for config version >= 3
+ */
+u64 plpks_get_signedupdatealgorithms(void);
+
+/**
+ * Returns the length of the PLPKS password in bytes.
+ */
+u16 plpks_get_passwordlen(void);
+
+/**
+ * Called in early init to retrieve and clear the PLPKS password from the DT.
+ */
+void plpks_early_init_devtree(void);
+
+/**
+ * Populates the FDT with the PLPKS password to prepare for kexec.
+ */
+int plpks_populate_fdt(void *fdt);
+#else // CONFIG_PSERIES_PLPKS
+static inline bool plpks_is_available(void) { return false; }
+static inline u16 plpks_get_passwordlen(void) { BUILD_BUG(); }
+static inline void plpks_early_init_devtree(void) { }
+static inline int plpks_populate_fdt(void *fdt) { BUILD_BUG(); }
+#endif // CONFIG_PSERIES_PLPKS
+
+#endif // _ASM_POWERPC_PLPKS_H
diff --git a/arch/powerpc/include/asm/pmac_feature.h b/arch/powerpc/include/asm/pmac_feature.h
index 10902c9375d0..420e2878ae67 100644
--- a/arch/powerpc/include/asm/pmac_feature.h
+++ b/arch/powerpc/include/asm/pmac_feature.h
@@ -46,7 +46,7 @@
/* PowerSurge are the first generation of PCI Pmacs. This include
* all of the Grand-Central based machines. We currently don't
- * differenciate most of them.
+ * differentiate most of them.
*/
#define PMAC_TYPE_PSURGE 0x10 /* PowerSurge */
#define PMAC_TYPE_ANS 0x11 /* Apple Network Server */
@@ -192,7 +192,7 @@ static inline long pmac_call_feature(int selector, struct device_node* node,
/* PMAC_FTR_BMAC_ENABLE (struct device_node* node, 0, int value)
* enable/disable the bmac (ethernet) cell of a mac-io ASIC, also drive
- * it's reset line
+ * its reset line
*/
#define PMAC_FTR_BMAC_ENABLE PMAC_FTR_DEF(6)
@@ -210,7 +210,7 @@ static inline long pmac_call_feature(int selector, struct device_node* node,
/* PMAC_FTR_SOUND_CHIP_ENABLE (struct device_node* node, 0, int value)
* enable/disable the sound chip, whatever it is and provided it can
- * acually be controlled
+ * actually be controlled
*/
#define PMAC_FTR_SOUND_CHIP_ENABLE PMAC_FTR_DEF(9)
@@ -401,5 +401,17 @@ extern u32 __iomem *uninorth_base;
*/
extern int pmac_get_uninorth_variant(void);
+/*
+ * Power macintoshes have either a CUDA, PMU or SMU controlling
+ * system reset, power, NVRAM, RTC.
+ */
+typedef enum sys_ctrler_kind {
+ SYS_CTRLER_UNKNOWN = 0,
+ SYS_CTRLER_CUDA = 1,
+ SYS_CTRLER_PMU = 2,
+ SYS_CTRLER_SMU = 3,
+} sys_ctrler_t;
+extern sys_ctrler_t sys_ctrler;
+
#endif /* __ASM_POWERPC_PMAC_FEATURE_H */
#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/pmac_low_i2c.h b/arch/powerpc/include/asm/pmac_low_i2c.h
index 01d71826d92f..21bd7297c87f 100644
--- a/arch/powerpc/include/asm/pmac_low_i2c.h
+++ b/arch/powerpc/include/asm/pmac_low_i2c.h
@@ -1,13 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* include/asm-ppc/pmac_low_i2c.h
*
* Copyright (C) 2003 Ben. Herrenschmidt (benh@kernel.crashing.org)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
#ifndef __PMAC_LOW_I2C_H__
#define __PMAC_LOW_I2C_H__
diff --git a/arch/powerpc/include/asm/pmac_pfunc.h b/arch/powerpc/include/asm/pmac_pfunc.h
index 1330d6a58c57..cee4e9f5b8cf 100644
--- a/arch/powerpc/include/asm/pmac_pfunc.h
+++ b/arch/powerpc/include/asm/pmac_pfunc.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __PMAC_PFUNC_H__
#define __PMAC_PFUNC_H__
@@ -244,6 +245,7 @@ extern void pmf_put_function(struct pmf_function *func);
extern int pmf_call_one(struct pmf_function *func, struct pmf_args *args);
+int pmac_pfunc_base_install(void);
/* Suspend/resume code called by via-pmu directly for now */
extern void pmac_pfunc_base_suspend(void);
diff --git a/arch/powerpc/include/asm/pmc.h b/arch/powerpc/include/asm/pmc.h
index 5a9ede4962cb..3c09109e708e 100644
--- a/arch/powerpc/include/asm/pmc.h
+++ b/arch/powerpc/include/asm/pmc.h
@@ -1,20 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* pmc.h
* Copyright (C) 2004 David Gibson, IBM Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _POWERPC_PMC_H
#define _POWERPC_PMC_H
@@ -31,12 +18,29 @@ void ppc_enable_pmcs(void);
#ifdef CONFIG_PPC_BOOK3S_64
#include <asm/lppaca.h>
+#include <asm/firmware.h>
static inline void ppc_set_pmu_inuse(int inuse)
{
- get_lppaca()->pmcregs_in_use = inuse;
+#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE)
+ if (firmware_has_feature(FW_FEATURE_LPAR)) {
+#ifdef CONFIG_PPC_PSERIES
+ get_lppaca()->pmcregs_in_use = inuse;
+#endif
+ }
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ get_paca()->pmcregs_in_use = inuse;
+#endif
+#endif
}
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+static inline int ppc_get_pmu_inuse(void)
+{
+ return get_paca()->pmcregs_in_use;
+}
+#endif
+
extern void power4_enable_pmcs(void);
#else /* CONFIG_PPC64 */
diff --git a/arch/powerpc/include/asm/pmi.h b/arch/powerpc/include/asm/pmi.h
deleted file mode 100644
index b4e91fbf5081..000000000000
--- a/arch/powerpc/include/asm/pmi.h
+++ /dev/null
@@ -1,66 +0,0 @@
-#ifndef _POWERPC_PMI_H
-#define _POWERPC_PMI_H
-
-/*
- * Definitions for talking with PMI device on PowerPC
- *
- * PMI (Platform Management Interrupt) is a way to communicate
- * with the BMC (Baseboard Management Controller) via interrupts.
- * Unlike IPMI it is bidirectional and has a low latency.
- *
- * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
- *
- * Author: Christian Krafft <krafft@de.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifdef __KERNEL__
-
-#define PMI_TYPE_FREQ_CHANGE 0x01
-#define PMI_TYPE_POWER_BUTTON 0x02
-#define PMI_READ_TYPE 0
-#define PMI_READ_DATA0 1
-#define PMI_READ_DATA1 2
-#define PMI_READ_DATA2 3
-#define PMI_WRITE_TYPE 4
-#define PMI_WRITE_DATA0 5
-#define PMI_WRITE_DATA1 6
-#define PMI_WRITE_DATA2 7
-
-#define PMI_ACK 0x80
-
-#define PMI_TIMEOUT 100
-
-typedef struct {
- u8 type;
- u8 data0;
- u8 data1;
- u8 data2;
-} pmi_message_t;
-
-struct pmi_handler {
- struct list_head node;
- u8 type;
- void (*handle_pmi_message) (pmi_message_t);
-};
-
-int pmi_register_handler(struct pmi_handler *);
-void pmi_unregister_handler(struct pmi_handler *);
-
-int pmi_send_message(pmi_message_t);
-
-#endif /* __KERNEL__ */
-#endif /* _POWERPC_PMI_H */
diff --git a/arch/powerpc/include/asm/pnv-ocxl.h b/arch/powerpc/include/asm/pnv-ocxl.h
new file mode 100644
index 000000000000..9acd1fbf1197
--- /dev/null
+++ b/arch/powerpc/include/asm/pnv-ocxl.h
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+// Copyright 2017 IBM Corp.
+#ifndef _ASM_PNV_OCXL_H
+#define _ASM_PNV_OCXL_H
+
+#include <linux/bitfield.h>
+#include <linux/pci.h>
+
+#define PNV_OCXL_TL_MAX_TEMPLATE 63
+#define PNV_OCXL_TL_BITS_PER_RATE 4
+#define PNV_OCXL_TL_RATE_BUF_SIZE ((PNV_OCXL_TL_MAX_TEMPLATE+1) * PNV_OCXL_TL_BITS_PER_RATE / 8)
+
+#define PNV_OCXL_ATSD_TIMEOUT 1
+
+/* TLB Management Instructions */
+#define PNV_OCXL_ATSD_LNCH 0x00
+/* Radix Invalidate */
+#define PNV_OCXL_ATSD_LNCH_R PPC_BIT(0)
+/* Radix Invalidation Control
+ * 0b00 Just invalidate TLB.
+ * 0b01 Invalidate just Page Walk Cache.
+ * 0b10 Invalidate TLB, Page Walk Cache, and any
+ * caching of Partition and Process Table Entries.
+ */
+#define PNV_OCXL_ATSD_LNCH_RIC PPC_BITMASK(1, 2)
+/* Number and Page Size of translations to be invalidated */
+#define PNV_OCXL_ATSD_LNCH_LP PPC_BITMASK(3, 10)
+/* Invalidation Criteria
+ * 0b00 Invalidate just the target VA.
+ * 0b01 Invalidate matching PID.
+ */
+#define PNV_OCXL_ATSD_LNCH_IS PPC_BITMASK(11, 12)
+/* 0b1: Process Scope, 0b0: Partition Scope */
+#define PNV_OCXL_ATSD_LNCH_PRS PPC_BIT(13)
+/* Invalidation Flag */
+#define PNV_OCXL_ATSD_LNCH_B PPC_BIT(14)
+/* Actual Page Size to be invalidated
+ * 000 4KB
+ * 101 64KB
+ * 001 2MB
+ * 010 1GB
+ */
+#define PNV_OCXL_ATSD_LNCH_AP PPC_BITMASK(15, 17)
+/* Defines the large page select
+ * L=0b0 for 4KB pages
+ * L=0b1 for large pages)
+ */
+#define PNV_OCXL_ATSD_LNCH_L PPC_BIT(18)
+/* Process ID */
+#define PNV_OCXL_ATSD_LNCH_PID PPC_BITMASK(19, 38)
+/* NoFlush – Assumed to be 0b0 */
+#define PNV_OCXL_ATSD_LNCH_F PPC_BIT(39)
+#define PNV_OCXL_ATSD_LNCH_OCAPI_SLBI PPC_BIT(40)
+#define PNV_OCXL_ATSD_LNCH_OCAPI_SINGLETON PPC_BIT(41)
+#define PNV_OCXL_ATSD_AVA 0x08
+#define PNV_OCXL_ATSD_AVA_AVA PPC_BITMASK(0, 51)
+#define PNV_OCXL_ATSD_STAT 0x10
+
+int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled, u16 *supported);
+int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count);
+
+int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap,
+ char *rate_buf, int rate_buf_size);
+int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap,
+ uint64_t rate_buf_phys, int rate_buf_size);
+
+int pnv_ocxl_get_xsl_irq(struct pci_dev *dev, int *hwirq);
+void pnv_ocxl_unmap_xsl_regs(void __iomem *dsisr, void __iomem *dar,
+ void __iomem *tfc, void __iomem *pe_handle);
+int pnv_ocxl_map_xsl_regs(struct pci_dev *dev, void __iomem **dsisr,
+ void __iomem **dar, void __iomem **tfc,
+ void __iomem **pe_handle);
+
+int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask, void **platform_data);
+void pnv_ocxl_spa_release(void *platform_data);
+int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle);
+
+int pnv_ocxl_map_lpar(struct pci_dev *dev, uint64_t lparid,
+ uint64_t lpcr, void __iomem **arva);
+void pnv_ocxl_unmap_lpar(void __iomem *arva);
+void pnv_ocxl_tlb_invalidate(void __iomem *arva,
+ unsigned long pid,
+ unsigned long addr,
+ unsigned long page_size);
+#endif /* _ASM_PNV_OCXL_H */
diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h
new file mode 100644
index 000000000000..7e9a479951a3
--- /dev/null
+++ b/arch/powerpc/include/asm/pnv-pci.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2014 IBM Corp.
+ */
+
+#ifndef _ASM_PNV_PCI_H
+#define _ASM_PNV_PCI_H
+
+#include <linux/pci.h>
+#include <linux/pci_hotplug.h>
+#include <linux/irq.h>
+#include <linux/of.h>
+#include <asm/opal-api.h>
+
+#define PCI_SLOT_ID_PREFIX (1UL << 63)
+#define PCI_SLOT_ID(phb_id, bdfn) \
+ (PCI_SLOT_ID_PREFIX | ((uint64_t)(bdfn) << 16) | (phb_id))
+#define PCI_PHB_SLOT_ID(phb_id) (phb_id)
+
+extern int pnv_pci_get_slot_id(struct device_node *np, uint64_t *id);
+extern int pnv_pci_get_device_tree(uint32_t phandle, void *buf, uint64_t len);
+extern int pnv_pci_get_presence_state(uint64_t id, uint8_t *state);
+extern int pnv_pci_get_power_state(uint64_t id, uint8_t *state);
+extern int pnv_pci_set_power_state(uint64_t id, uint8_t state,
+ struct opal_msg *msg);
+
+int64_t pnv_opal_pci_msi_eoi(struct irq_data *d);
+bool is_pnv_opal_msi(struct irq_chip *chip);
+
+struct pnv_php_slot {
+ struct hotplug_slot slot;
+ uint64_t id;
+ char *name;
+ int slot_no;
+ unsigned int flags;
+#define PNV_PHP_FLAG_BROKEN_PDC 0x1
+ struct kref kref;
+#define PNV_PHP_STATE_INITIALIZED 0
+#define PNV_PHP_STATE_REGISTERED 1
+#define PNV_PHP_STATE_POPULATED 2
+#define PNV_PHP_STATE_OFFLINE 3
+ int state;
+ int irq;
+ struct workqueue_struct *wq;
+ struct device_node *dn;
+ struct pci_dev *pdev;
+ struct pci_bus *bus;
+ bool power_state_check;
+ u8 attention_state;
+ void *fdt;
+ void *dt;
+ struct of_changeset ocs;
+ struct pnv_php_slot *parent;
+ struct list_head children;
+ struct list_head link;
+};
+extern struct pnv_php_slot *pnv_php_find_slot(struct device_node *dn);
+extern int pnv_php_set_slot_power_state(struct hotplug_slot *slot,
+ uint8_t state);
+
+#endif
diff --git a/arch/powerpc/include/asm/powernv.h b/arch/powerpc/include/asm/powernv.h
new file mode 100644
index 000000000000..e1a858718716
--- /dev/null
+++ b/arch/powerpc/include/asm/powernv.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2017 IBM Corp.
+ */
+
+#ifndef _ASM_POWERNV_H
+#define _ASM_POWERNV_H
+
+#ifdef CONFIG_PPC_POWERNV
+extern void powernv_set_nmmu_ptcr(unsigned long ptcr);
+
+void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val);
+
+void pnv_tm_init(void);
+#else
+static inline void powernv_set_nmmu_ptcr(unsigned long ptcr) { }
+
+static inline void pnv_tm_init(void) { }
+#endif
+
+#endif /* _ASM_POWERNV_H */
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 6f8536208049..55ca49d18319 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -1,19 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright 2009 Freescale Semiconductor, Inc.
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* provides masks and opcode images for use by code generation, emulation
* and for instructions that older assemblers might not know about
*/
#ifndef _ASM_POWERPC_PPC_OPCODE_H
#define _ASM_POWERPC_PPC_OPCODE_H
-#include <linux/stringify.h>
-#include <asm/asm-compat.h>
+#include <asm/asm-const.h>
#define __REG_R0 0
#define __REG_R1 1
@@ -81,175 +76,257 @@
#define __REGA0_R30 30
#define __REGA0_R31 31
+/* For use with PPC_RAW_() macros */
+#define _R0 0
+#define _R1 1
+#define _R2 2
+#define _R3 3
+#define _R4 4
+#define _R5 5
+#define _R6 6
+#define _R7 7
+#define _R8 8
+#define _R9 9
+#define _R10 10
+#define _R11 11
+#define _R12 12
+#define _R13 13
+#define _R14 14
+#define _R15 15
+#define _R16 16
+#define _R17 17
+#define _R18 18
+#define _R19 19
+#define _R20 20
+#define _R21 21
+#define _R22 22
+#define _R23 23
+#define _R24 24
+#define _R25 25
+#define _R26 26
+#define _R27 27
+#define _R28 28
+#define _R29 29
+#define _R30 30
+#define _R31 31
+
+#define IMM_L(i) ((uintptr_t)(i) & 0xffff)
+#define IMM_DS(i) ((uintptr_t)(i) & 0xfffc)
+#define IMM_DQ(i) ((uintptr_t)(i) & 0xfff0)
+#define IMM_D0(i) (((uintptr_t)(i) >> 16) & 0x3ffff)
+#define IMM_D1(i) IMM_L(i)
+
+/*
+ * 16-bit immediate helper macros: HA() is for use with sign-extending instrs
+ * (e.g. LD, ADDI). If the bottom 16 bits is "-ve", add another bit into the
+ * top half to negate the effect (i.e. 0xffff + 1 = 0x(1)0000).
+ *
+ * XXX: should these mask out possible sign bits?
+ */
+#define IMM_H(i) ((uintptr_t)(i)>>16)
+#define IMM_HA(i) (((uintptr_t)(i)>>16) + \
+ (((uintptr_t)(i) & 0x8000) >> 15))
+
+/*
+ * 18-bit immediate helper for prefix 18-bit upper immediate si0 field.
+ */
+#define IMM_H18(i) (((uintptr_t)(i)>>16) & 0x3ffff)
+
+
/* opcode and xopcode for instructions */
-#define OP_TRAP 3
-#define OP_TRAP_64 2
+#define OP_PREFIX 1
+#define OP_TRAP_64 2
+#define OP_TRAP 3
+#define OP_SC 17
+#define OP_19 19
+#define OP_31 31
+#define OP_LWZ 32
+#define OP_LWZU 33
+#define OP_LBZ 34
+#define OP_LBZU 35
+#define OP_STW 36
+#define OP_STWU 37
+#define OP_STB 38
+#define OP_STBU 39
+#define OP_LHZ 40
+#define OP_LHZU 41
+#define OP_LHA 42
+#define OP_LHAU 43
+#define OP_STH 44
+#define OP_STHU 45
+#define OP_LMW 46
+#define OP_STMW 47
+#define OP_LFS 48
+#define OP_LFSU 49
+#define OP_LFD 50
+#define OP_LFDU 51
+#define OP_STFS 52
+#define OP_STFSU 53
+#define OP_STFD 54
+#define OP_STFDU 55
+#define OP_LQ 56
+#define OP_LD 58
+#define OP_STD 62
+
+#define OP_19_XOP_RFID 18
+#define OP_19_XOP_RFMCI 38
+#define OP_19_XOP_RFDI 39
+#define OP_19_XOP_RFI 50
+#define OP_19_XOP_RFCI 51
+#define OP_19_XOP_RFSCV 82
+#define OP_19_XOP_HRFID 274
+#define OP_19_XOP_URFID 306
+#define OP_19_XOP_STOP 370
+#define OP_19_XOP_DOZE 402
+#define OP_19_XOP_NAP 434
+#define OP_19_XOP_SLEEP 466
+#define OP_19_XOP_RVWINKLE 498
#define OP_31_XOP_TRAP 4
+#define OP_31_XOP_LDX 21
#define OP_31_XOP_LWZX 23
+#define OP_31_XOP_LDUX 53
#define OP_31_XOP_DCBST 54
#define OP_31_XOP_LWZUX 55
#define OP_31_XOP_TRAP_64 68
#define OP_31_XOP_DCBF 86
#define OP_31_XOP_LBZX 87
+#define OP_31_XOP_STDX 149
#define OP_31_XOP_STWX 151
+#define OP_31_XOP_STDUX 181
+#define OP_31_XOP_STWUX 183
#define OP_31_XOP_STBX 215
#define OP_31_XOP_LBZUX 119
#define OP_31_XOP_STBUX 247
#define OP_31_XOP_LHZX 279
#define OP_31_XOP_LHZUX 311
+#define OP_31_XOP_MSGSNDP 142
+#define OP_31_XOP_MSGCLRP 174
+#define OP_31_XOP_MTMSR 146
+#define OP_31_XOP_MTMSRD 178
+#define OP_31_XOP_TLBIE 306
#define OP_31_XOP_MFSPR 339
+#define OP_31_XOP_LWAX 341
#define OP_31_XOP_LHAX 343
+#define OP_31_XOP_LWAUX 373
#define OP_31_XOP_LHAUX 375
#define OP_31_XOP_STHX 407
#define OP_31_XOP_STHUX 439
#define OP_31_XOP_MTSPR 467
#define OP_31_XOP_DCBI 470
+#define OP_31_XOP_LDBRX 532
#define OP_31_XOP_LWBRX 534
#define OP_31_XOP_TLBSYNC 566
+#define OP_31_XOP_STDBRX 660
#define OP_31_XOP_STWBRX 662
+#define OP_31_XOP_STFSX 663
+#define OP_31_XOP_STFSUX 695
+#define OP_31_XOP_STFDX 727
+#define OP_31_XOP_HASHCHK 754
+#define OP_31_XOP_STFDUX 759
#define OP_31_XOP_LHBRX 790
+#define OP_31_XOP_LFIWAX 855
+#define OP_31_XOP_LFIWZX 887
#define OP_31_XOP_STHBRX 918
+#define OP_31_XOP_STFIWX 983
+
+/* VSX Scalar Load Instructions */
+#define OP_31_XOP_LXSDX 588
+#define OP_31_XOP_LXSSPX 524
+#define OP_31_XOP_LXSIWAX 76
+#define OP_31_XOP_LXSIWZX 12
-#define OP_LWZ 32
-#define OP_LD 58
-#define OP_LWZU 33
-#define OP_LBZ 34
-#define OP_LBZU 35
-#define OP_STW 36
-#define OP_STWU 37
-#define OP_STD 62
-#define OP_STB 38
-#define OP_STBU 39
-#define OP_LHZ 40
-#define OP_LHZU 41
-#define OP_LHA 42
-#define OP_LHAU 43
-#define OP_STH 44
-#define OP_STHU 45
+/* VSX Scalar Store Instructions */
+#define OP_31_XOP_STXSDX 716
+#define OP_31_XOP_STXSSPX 652
+#define OP_31_XOP_STXSIWX 140
+
+/* VSX Vector Load Instructions */
+#define OP_31_XOP_LXVD2X 844
+#define OP_31_XOP_LXVW4X 780
+
+/* VSX Vector Load and Splat Instruction */
+#define OP_31_XOP_LXVDSX 332
+
+/* VSX Vector Store Instructions */
+#define OP_31_XOP_STXVD2X 972
+#define OP_31_XOP_STXVW4X 908
+
+#define OP_31_XOP_LFSX 535
+#define OP_31_XOP_LFSUX 567
+#define OP_31_XOP_LFDX 599
+#define OP_31_XOP_LFDUX 631
+
+/* VMX Vector Load Instructions */
+#define OP_31_XOP_LVX 103
+
+/* VMX Vector Store Instructions */
+#define OP_31_XOP_STVX 231
/* sorted alphabetically */
-#define PPC_INST_BHRBE 0x7c00025c
-#define PPC_INST_CLRBHRB 0x7c00035c
+#define PPC_INST_BCCTR_FLUSH 0x4c400420
+#define PPC_INST_COPY 0x7c20060c
#define PPC_INST_DCBA 0x7c0005ec
#define PPC_INST_DCBA_MASK 0xfc0007fe
-#define PPC_INST_DCBAL 0x7c2005ec
-#define PPC_INST_DCBZL 0x7c2007ec
-#define PPC_INST_ICBT 0x7c00002c
+#define PPC_INST_DSSALL 0x7e00066c
#define PPC_INST_ISEL 0x7c00001e
#define PPC_INST_ISEL_MASK 0xfc00003e
-#define PPC_INST_LDARX 0x7c0000a8
-#define PPC_INST_LOGMPP 0x7c0007e4
#define PPC_INST_LSWI 0x7c0004aa
#define PPC_INST_LSWX 0x7c00042a
-#define PPC_INST_LWARX 0x7c000028
#define PPC_INST_LWSYNC 0x7c2004ac
#define PPC_INST_SYNC 0x7c0004ac
#define PPC_INST_SYNC_MASK 0xfc0007fe
-#define PPC_INST_LXVD2X 0x7c000698
#define PPC_INST_MCRXR 0x7c000400
#define PPC_INST_MCRXR_MASK 0xfc0007fe
#define PPC_INST_MFSPR_PVR 0x7c1f42a6
-#define PPC_INST_MFSPR_PVR_MASK 0xfc1fffff
-#define PPC_INST_MFTMR 0x7c0002dc
-#define PPC_INST_MSGSND 0x7c00019c
-#define PPC_INST_MSGSNDP 0x7c00011c
-#define PPC_INST_MTTMR 0x7c0003dc
-#define PPC_INST_NOP 0x60000000
+#define PPC_INST_MFSPR_PVR_MASK 0xfc1ffffe
+#define PPC_INST_MTMSRD 0x7c000164
+#define PPC_INST_PASTE 0x7c20070d
+#define PPC_INST_PASTE_MASK 0xfc2007ff
#define PPC_INST_POPCNTB 0x7c0000f4
#define PPC_INST_POPCNTB_MASK 0xfc0007fe
-#define PPC_INST_POPCNTD 0x7c0003f4
-#define PPC_INST_POPCNTW 0x7c0002f4
-#define PPC_INST_RFCI 0x4c000066
-#define PPC_INST_RFDI 0x4c00004e
-#define PPC_INST_RFMCI 0x4c00004c
+#define PPC_INST_RFEBB 0x4c000124
+#define PPC_INST_RFID 0x4c000024
#define PPC_INST_MFSPR_DSCR 0x7c1102a6
-#define PPC_INST_MFSPR_DSCR_MASK 0xfc1fffff
+#define PPC_INST_MFSPR_DSCR_MASK 0xfc1ffffe
#define PPC_INST_MTSPR_DSCR 0x7c1103a6
-#define PPC_INST_MTSPR_DSCR_MASK 0xfc1fffff
+#define PPC_INST_MTSPR_DSCR_MASK 0xfc1ffffe
#define PPC_INST_MFSPR_DSCR_USER 0x7c0302a6
-#define PPC_INST_MFSPR_DSCR_USER_MASK 0xfc1fffff
+#define PPC_INST_MFSPR_DSCR_USER_MASK 0xfc1ffffe
#define PPC_INST_MTSPR_DSCR_USER 0x7c0303a6
-#define PPC_INST_MTSPR_DSCR_USER_MASK 0xfc1fffff
-#define PPC_INST_SLBFEE 0x7c0007a7
-
+#define PPC_INST_MTSPR_DSCR_USER_MASK 0xfc1ffffe
#define PPC_INST_STRING 0x7c00042a
#define PPC_INST_STRING_MASK 0xfc0007fe
#define PPC_INST_STRING_GEN_MASK 0xfc00067e
-
#define PPC_INST_STSWI 0x7c0005aa
#define PPC_INST_STSWX 0x7c00052a
-#define PPC_INST_STXVD2X 0x7c000798
-#define PPC_INST_TLBIE 0x7c000264
-#define PPC_INST_TLBILX 0x7c000024
-#define PPC_INST_WAIT 0x7c00007c
-#define PPC_INST_TLBIVAX 0x7c000624
-#define PPC_INST_TLBSRX_DOT 0x7c0006a5
-#define PPC_INST_XXLOR 0xf0000510
-#define PPC_INST_XXSWAPD 0xf0000250
-#define PPC_INST_XVCPSGNDP 0xf0000780
#define PPC_INST_TRECHKPT 0x7c0007dd
#define PPC_INST_TRECLAIM 0x7c00075d
-#define PPC_INST_TABORT 0x7c00071d
-
-#define PPC_INST_NAP 0x4c000364
-#define PPC_INST_SLEEP 0x4c0003a4
-
-/* A2 specific instructions */
-#define PPC_INST_ERATWE 0x7c0001a6
-#define PPC_INST_ERATRE 0x7c000166
-#define PPC_INST_ERATILX 0x7c000066
-#define PPC_INST_ERATIVAX 0x7c000666
-#define PPC_INST_ERATSX 0x7c000126
-#define PPC_INST_ERATSX_DOT 0x7c000127
-
-/* Misc instructions for BPF compiler */
-#define PPC_INST_LD 0xe8000000
-#define PPC_INST_LHZ 0xa0000000
-#define PPC_INST_LHBRX 0x7c00062c
-#define PPC_INST_LWZ 0x80000000
-#define PPC_INST_STD 0xf8000000
-#define PPC_INST_STDU 0xf8000001
-#define PPC_INST_MFLR 0x7c0802a6
-#define PPC_INST_MTLR 0x7c0803a6
-#define PPC_INST_CMPWI 0x2c000000
-#define PPC_INST_CMPDI 0x2c200000
-#define PPC_INST_CMPLW 0x7c000040
-#define PPC_INST_CMPLWI 0x28000000
-#define PPC_INST_ADDI 0x38000000
-#define PPC_INST_ADDIS 0x3c000000
-#define PPC_INST_ADD 0x7c000214
-#define PPC_INST_SUB 0x7c000050
-#define PPC_INST_BLR 0x4e800020
-#define PPC_INST_BLRL 0x4e800021
-#define PPC_INST_MULLW 0x7c0001d6
-#define PPC_INST_MULHWU 0x7c000016
-#define PPC_INST_MULLI 0x1c000000
-#define PPC_INST_DIVWU 0x7c000396
-#define PPC_INST_RLWINM 0x54000000
-#define PPC_INST_RLDICR 0x78000004
-#define PPC_INST_SLW 0x7c000030
-#define PPC_INST_SRW 0x7c000430
-#define PPC_INST_AND 0x7c000038
-#define PPC_INST_ANDDOT 0x7c000039
-#define PPC_INST_OR 0x7c000378
-#define PPC_INST_XOR 0x7c000278
-#define PPC_INST_ANDI 0x70000000
-#define PPC_INST_ORI 0x60000000
-#define PPC_INST_ORIS 0x64000000
-#define PPC_INST_XORI 0x68000000
-#define PPC_INST_XORIS 0x6c000000
-#define PPC_INST_NEG 0x7c0000d0
-#define PPC_INST_BRANCH 0x48000000
+#define PPC_INST_TSR 0x7c0005dd
#define PPC_INST_BRANCH_COND 0x40800000
-#define PPC_INST_LBZCIX 0x7c0006aa
-#define PPC_INST_STBCIX 0x7c0007aa
+
+/* Prefixes */
+#define PPC_INST_LFS 0xc0000000
+#define PPC_INST_STFS 0xd0000000
+#define PPC_INST_LFD 0xc8000000
+#define PPC_INST_STFD 0xd8000000
+#define PPC_PREFIX_MLS 0x06000000
+#define PPC_PREFIX_8LS 0x04000000
+
+/* Prefixed instructions */
+#define PPC_INST_PADDI 0x38000000
+#define PPC_INST_PLD 0xe4000000
+#define PPC_INST_PSTD 0xf4000000
/* macros to insert fields into opcodes */
#define ___PPC_RA(a) (((a) & 0x1f) << 16)
#define ___PPC_RB(b) (((b) & 0x1f) << 11)
+#define ___PPC_RC(c) (((c) & 0x1f) << 6)
#define ___PPC_RS(s) (((s) & 0x1f) << 21)
#define ___PPC_RT(t) ___PPC_RS(t)
+#define ___PPC_R(r) (((r) & 0x1) << 16)
+#define ___PPC_PRS(prs) (((prs) & 0x1) << 17)
+#define ___PPC_RIC(ric) (((ric) & 0x3) << 18)
#define __PPC_RA(a) ___PPC_RA(__REG_##a)
#define __PPC_RA0(a) ___PPC_RA(__REGA0_##a)
#define __PPC_RB(b) ___PPC_RB(__REG_##b)
@@ -259,140 +336,388 @@
#define __PPC_XB(b) ((((b) & 0x1f) << 11) | (((b) & 0x20) >> 4))
#define __PPC_XS(s) ((((s) & 0x1f) << 21) | (((s) & 0x20) >> 5))
#define __PPC_XT(s) __PPC_XS(s)
+#define __PPC_XSP(s) ((((s) & 0x1e) | (((s) >> 5) & 0x1)) << 21)
+#define __PPC_XTP(s) __PPC_XSP(s)
#define __PPC_T_TLB(t) (((t) & 0x3) << 21)
+#define __PPC_PL(p) (((p) & 0x3) << 16)
#define __PPC_WC(w) (((w) & 0x3) << 21)
#define __PPC_WS(w) (((w) & 0x1f) << 11)
#define __PPC_SH(s) __PPC_WS(s)
-#define __PPC_MB(s) (((s) & 0x1f) << 6)
+#define __PPC_SH64(s) (__PPC_SH(s) | (((s) & 0x20) >> 4))
+#define __PPC_MB(s) ___PPC_RC(s)
#define __PPC_ME(s) (((s) & 0x1f) << 1)
+#define __PPC_MB64(s) (__PPC_MB(s) | ((s) & 0x20))
+#define __PPC_ME64(s) __PPC_MB64(s)
#define __PPC_BI(s) (((s) & 0x1f) << 16)
#define __PPC_CT(t) (((t) & 0x0f) << 21)
+#define __PPC_SPR(r) ((((r) & 0x1f) << 16) | ((((r) >> 5) & 0x1f) << 11))
+#define __PPC_RC21 (0x1 << 10)
+#define __PPC_PRFX_R(r) (((r) & 0x1) << 20)
+#define __PPC_EH(eh) (((eh) & 0x1) << 0)
/*
- * Only use the larx hint bit on 64bit CPUs. e500v1/v2 based CPUs will treat a
- * larx with EH set as an illegal instruction.
+ * Both low and high 16 bits are added as SIGNED additions, so if low 16 bits
+ * has high bit set, high 16 bits must be adjusted. These macros do that (stolen
+ * from binutils).
*/
-#ifdef CONFIG_PPC64
-#define __PPC_EH(eh) (((eh) & 0x1) << 0)
-#else
-#define __PPC_EH(eh) 0
-#endif
+#define PPC_LO(v) ((v) & 0xffff)
+#define PPC_HI(v) (((v) >> 16) & 0xffff)
+#define PPC_HA(v) PPC_HI((v) + 0x8000)
+#define PPC_HIGHER(v) (((v) >> 32) & 0xffff)
+#define PPC_HIGHEST(v) (((v) >> 48) & 0xffff)
+
+/* LI Field */
+#define PPC_LI_MASK 0x03fffffc
+#define PPC_LI(v) ((v) & PPC_LI_MASK)
-/* POWER8 Micro Partition Prefetch (MPP) parameters */
-/* Address mask is common for LOGMPP instruction and MPPR SPR */
-#define PPC_MPPE_ADDRESS_MASK 0xffffffffc000
+/* Base instruction encoding */
+#define PPC_RAW_CP_ABORT (0x7c00068c)
+#define PPC_RAW_COPY(a, b) (PPC_INST_COPY | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_DARN(t, l) (0x7c0005e6 | ___PPC_RT(t) | (((l) & 0x3) << 16))
+#define PPC_RAW_DCBAL(a, b) (0x7c2005ec | __PPC_RA(a) | __PPC_RB(b))
+#define PPC_RAW_DCBZL(a, b) (0x7c2007ec | __PPC_RA(a) | __PPC_RB(b))
+#define PPC_RAW_LQARX(t, a, b, eh) (0x7c000228 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | __PPC_EH(eh))
+#define PPC_RAW_LDARX(t, a, b, eh) (0x7c0000a8 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | __PPC_EH(eh))
+#define PPC_RAW_LWARX(t, a, b, eh) (0x7c000028 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | __PPC_EH(eh))
+#define PPC_RAW_PHWSYNC (0x7c8004ac)
+#define PPC_RAW_PLWSYNC (0x7ca004ac)
+#define PPC_RAW_STQCX(t, a, b) (0x7c00016d | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_MADDHD(t, a, b, c) (0x10000030 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | ___PPC_RC(c))
+#define PPC_RAW_MADDHDU(t, a, b, c) (0x10000031 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | ___PPC_RC(c))
+#define PPC_RAW_MADDLD(t, a, b, c) (0x10000033 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | ___PPC_RC(c))
+#define PPC_RAW_MSGSND(b) (0x7c00019c | ___PPC_RB(b))
+#define PPC_RAW_MSGSYNC (0x7c0006ec)
+#define PPC_RAW_MSGCLR(b) (0x7c0001dc | ___PPC_RB(b))
+#define PPC_RAW_MSGSNDP(b) (0x7c00011c | ___PPC_RB(b))
+#define PPC_RAW_MSGCLRP(b) (0x7c00015c | ___PPC_RB(b))
+#define PPC_RAW_PASTE(a, b) (0x7c20070d | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_POPCNTB(a, s) (PPC_INST_POPCNTB | __PPC_RA(a) | __PPC_RS(s))
+#define PPC_RAW_POPCNTD(a, s) (0x7c0003f4 | __PPC_RA(a) | __PPC_RS(s))
+#define PPC_RAW_POPCNTW(a, s) (0x7c0002f4 | __PPC_RA(a) | __PPC_RS(s))
+#define PPC_RAW_RFCI (0x4c000066)
+#define PPC_RAW_RFDI (0x4c00004e)
+#define PPC_RAW_RFMCI (0x4c00004c)
+#define PPC_RAW_TLBILX_LPID (0x7c000024)
+#define PPC_RAW_TLBILX(t, a, b) (0x7c000024 | __PPC_T_TLB(t) | __PPC_RA0(a) | __PPC_RB(b))
+#define PPC_RAW_WAIT_v203 (0x7c00007c)
+#define PPC_RAW_WAIT(w, p) (0x7c00003c | __PPC_WC(w) | __PPC_PL(p))
+#define PPC_RAW_TLBIE(lp, a) (0x7c000264 | ___PPC_RB(a) | ___PPC_RS(lp))
+#define PPC_RAW_TLBIE_5(rb, rs, ric, prs, r) \
+ (0x7c000264 | ___PPC_RB(rb) | ___PPC_RS(rs) | ___PPC_RIC(ric) | ___PPC_PRS(prs) | ___PPC_R(r))
+#define PPC_RAW_TLBIEL(rb, rs, ric, prs, r) \
+ (0x7c000224 | ___PPC_RB(rb) | ___PPC_RS(rs) | ___PPC_RIC(ric) | ___PPC_PRS(prs) | ___PPC_R(r))
+#define PPC_RAW_TLBIEL_v205(rb, l) (0x7c000224 | ___PPC_RB(rb) | (l << 21))
+#define PPC_RAW_TLBSRX_DOT(a, b) (0x7c0006a5 | __PPC_RA0(a) | __PPC_RB(b))
+#define PPC_RAW_TLBIVAX(a, b) (0x7c000624 | __PPC_RA0(a) | __PPC_RB(b))
+#define PPC_RAW_ERATWE(s, a, w) (0x7c0001a6 | __PPC_RS(s) | __PPC_RA(a) | __PPC_WS(w))
+#define PPC_RAW_ERATRE(s, a, w) (0x7c000166 | __PPC_RS(s) | __PPC_RA(a) | __PPC_WS(w))
+#define PPC_RAW_ERATILX(t, a, b) (0x7c000066 | __PPC_T_TLB(t) | __PPC_RA0(a) | __PPC_RB(b))
+#define PPC_RAW_ERATIVAX(s, a, b) (0x7c000666 | __PPC_RS(s) | __PPC_RA0(a) | __PPC_RB(b))
+#define PPC_RAW_ERATSX(t, a, w) (0x7c000126 | __PPC_RS(t) | __PPC_RA0(a) | __PPC_RB(b))
+#define PPC_RAW_ERATSX_DOT(t, a, w) (0x7c000127 | __PPC_RS(t) | __PPC_RA0(a) | __PPC_RB(b))
+#define PPC_RAW_SLBFEE_DOT(t, b) (0x7c0007a7 | __PPC_RT(t) | __PPC_RB(b))
+#define __PPC_RAW_SLBFEE_DOT(t, b) (0x7c0007a7 | ___PPC_RT(t) | ___PPC_RB(b))
+#define PPC_RAW_ICBT(c, a, b) (0x7c00002c | __PPC_CT(c) | __PPC_RA0(a) | __PPC_RB(b))
+#define PPC_RAW_LBZCIX(t, a, b) (0x7c0006aa | __PPC_RT(t) | __PPC_RA(a) | __PPC_RB(b))
+#define PPC_RAW_STBCIX(s, a, b) (0x7c0007aa | __PPC_RS(s) | __PPC_RA(a) | __PPC_RB(b))
+#define PPC_RAW_DCBFPS(a, b) (0x7c0000ac | ___PPC_RA(a) | ___PPC_RB(b) | (4 << 21))
+#define PPC_RAW_DCBSTPS(a, b) (0x7c0000ac | ___PPC_RA(a) | ___PPC_RB(b) | (6 << 21))
+#define PPC_RAW_SC() (0x44000002)
+#define PPC_RAW_SYNC() (0x7c0004ac)
+#define PPC_RAW_ISYNC() (0x4c00012c)
+#define PPC_RAW_LWSYNC() (0x7c2004ac)
+
+/*
+ * Define what the VSX XX1 form instructions will look like, then add
+ * the 128 bit load store instructions based on that.
+ */
+#define VSX_XX1(s, a, b) (__PPC_XS(s) | __PPC_RA(a) | __PPC_RB(b))
+#define VSX_XX3(t, a, b) (__PPC_XT(t) | __PPC_XA(a) | __PPC_XB(b))
+#define PPC_RAW_STXVD2X(s, a, b) (0x7c000798 | VSX_XX1((s), a, b))
+#define PPC_RAW_LXVD2X(s, a, b) (0x7c000698 | VSX_XX1((s), a, b))
+#define PPC_RAW_MFVRD(a, t) (0x7c000066 | VSX_XX1((t) + 32, a, R0))
+#define PPC_RAW_MTVRD(t, a) (0x7c000166 | VSX_XX1((t) + 32, a, R0))
+#define PPC_RAW_VPMSUMW(t, a, b) (0x10000488 | VSX_XX3((t), a, b))
+#define PPC_RAW_VPMSUMD(t, a, b) (0x100004c8 | VSX_XX3((t), a, b))
+#define PPC_RAW_XXLOR(t, a, b) (0xf0000490 | VSX_XX3((t), a, b))
+#define PPC_RAW_XXSWAPD(t, a) (0xf0000250 | VSX_XX3((t), a, a))
+#define PPC_RAW_XVCPSGNDP(t, a, b) ((0xf0000780 | VSX_XX3((t), (a), (b))))
+#define PPC_RAW_VPERMXOR(vrt, vra, vrb, vrc) \
+ ((0x1000002d | ___PPC_RT(vrt) | ___PPC_RA(vra) | ___PPC_RB(vrb) | (((vrc) & 0x1f) << 6)))
+#define PPC_RAW_LXVP(xtp, a, i) (0x18000000 | __PPC_XTP(xtp) | ___PPC_RA(a) | IMM_DQ(i))
+#define PPC_RAW_STXVP(xsp, a, i) (0x18000001 | __PPC_XSP(xsp) | ___PPC_RA(a) | IMM_DQ(i))
+#define PPC_RAW_LXVPX(xtp, a, b) (0x7c00029a | __PPC_XTP(xtp) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_STXVPX(xsp, a, b) (0x7c00039a | __PPC_XSP(xsp) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_PLXVP_P(xtp, i, a, pr) (PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_D0(i))
+#define PPC_RAW_PLXVP_S(xtp, i, a, pr) (0xe8000000 | __PPC_XTP(xtp) | ___PPC_RA(a) | IMM_D1(i))
+#define PPC_RAW_PSTXVP_P(xsp, i, a, pr) (PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_D0(i))
+#define PPC_RAW_PSTXVP_S(xsp, i, a, pr) (0xf8000000 | __PPC_XSP(xsp) | ___PPC_RA(a) | IMM_D1(i))
+#define PPC_RAW_NAP (0x4c000364)
+#define PPC_RAW_SLEEP (0x4c0003a4)
+#define PPC_RAW_WINKLE (0x4c0003e4)
+#define PPC_RAW_STOP (0x4c0002e4)
+#define PPC_RAW_CLRBHRB (0x7c00035c)
+#define PPC_RAW_MFBHRBE(r, n) (0x7c00025c | __PPC_RT(r) | (((n) & 0x3ff) << 11))
+#define PPC_RAW_TRECHKPT (PPC_INST_TRECHKPT)
+#define PPC_RAW_TRECLAIM(r) (PPC_INST_TRECLAIM | __PPC_RA(r))
+#define PPC_RAW_TABORT(r) (0x7c00071d | __PPC_RA(r))
+#define TMRN(x) ((((x) & 0x1f) << 16) | (((x) & 0x3e0) << 6))
+#define PPC_RAW_MTTMR(tmr, r) (0x7c0003dc | TMRN(tmr) | ___PPC_RS(r))
+#define PPC_RAW_MFTMR(tmr, r) (0x7c0002dc | TMRN(tmr) | ___PPC_RT(r))
+#define PPC_RAW_ICSWX(s, a, b) (0x7c00032d | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_ICSWEPX(s, a, b) (0x7c00076d | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_SLBIA(IH) (0x7c0003e4 | (((IH) & 0x7) << 21))
+#define PPC_RAW_VCMPEQUD_RC(vrt, vra, vrb) \
+ (0x100000c7 | ___PPC_RT(vrt) | ___PPC_RA(vra) | ___PPC_RB(vrb) | __PPC_RC21)
+#define PPC_RAW_VCMPEQUB_RC(vrt, vra, vrb) \
+ (0x10000006 | ___PPC_RT(vrt) | ___PPC_RA(vra) | ___PPC_RB(vrb) | __PPC_RC21)
+#define PPC_RAW_LD(r, base, i) (0xe8000000 | ___PPC_RT(r) | ___PPC_RA(base) | IMM_DS(i))
+#define PPC_RAW_LWA(r, base, i) (0xe8000002 | ___PPC_RT(r) | ___PPC_RA(base) | IMM_DS(i))
+#define PPC_RAW_LWZ(r, base, i) (0x80000000 | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
+#define PPC_RAW_LWZX(t, a, b) (0x7c00002e | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_STD(r, base, i) (0xf8000000 | ___PPC_RS(r) | ___PPC_RA(base) | IMM_DS(i))
+#define PPC_RAW_STDCX(s, a, b) (0x7c0001ad | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_LFSX(t, a, b) (0x7c00042e | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_STFSX(s, a, b) (0x7c00052e | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_LFDX(t, a, b) (0x7c0004ae | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_STFDX(s, a, b) (0x7c0005ae | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_LVX(t, a, b) (0x7c0000ce | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_STVX(s, a, b) (0x7c0001ce | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_ADDE(t, a, b) (0x7c000114 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_ADDZE(t, a) (0x7c000194 | ___PPC_RT(t) | ___PPC_RA(a))
+#define PPC_RAW_ADDME(t, a) (0x7c0001d4 | ___PPC_RT(t) | ___PPC_RA(a))
+#define PPC_RAW_ADD(t, a, b) (0x7c000214 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_ADD_DOT(t, a, b) (0x7c000214 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | 0x1)
+#define PPC_RAW_ADDC(t, a, b) (0x7c000014 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_ADDC_DOT(t, a, b) (0x7c000014 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | 0x1)
+#define PPC_RAW_NOP() PPC_RAW_ORI(0, 0, 0)
+#define PPC_RAW_BLR() (0x4e800020)
+#define PPC_RAW_BLRL() (0x4e800021)
+#define PPC_RAW_MTLR(r) (0x7c0803a6 | ___PPC_RT(r))
+#define PPC_RAW_MFLR(t) (0x7c0802a6 | ___PPC_RT(t))
+#define PPC_RAW_BCTR() (0x4e800420)
+#define PPC_RAW_BCTRL() (0x4e800421)
+#define PPC_RAW_MTCTR(r) (0x7c0903a6 | ___PPC_RT(r))
+#define PPC_RAW_ADDI(d, a, i) (0x38000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i))
+#define PPC_RAW_LI(r, i) PPC_RAW_ADDI(r, 0, i)
+#define PPC_RAW_ADDIS(d, a, i) (0x3c000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i))
+#define PPC_RAW_ADDIC(d, a, i) (0x30000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i))
+#define PPC_RAW_ADDIC_DOT(d, a, i) (0x34000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i))
+#define PPC_RAW_LIS(r, i) PPC_RAW_ADDIS(r, 0, i)
+#define PPC_RAW_STDX(r, base, b) (0x7c00012a | ___PPC_RS(r) | ___PPC_RA(base) | ___PPC_RB(b))
+#define PPC_RAW_STDU(r, base, i) (0xf8000001 | ___PPC_RS(r) | ___PPC_RA(base) | ((i) & 0xfffc))
+#define PPC_RAW_STW(r, base, i) (0x90000000 | ___PPC_RS(r) | ___PPC_RA(base) | IMM_L(i))
+#define PPC_RAW_STWU(r, base, i) (0x94000000 | ___PPC_RS(r) | ___PPC_RA(base) | IMM_L(i))
+#define PPC_RAW_STH(r, base, i) (0xb0000000 | ___PPC_RS(r) | ___PPC_RA(base) | IMM_L(i))
+#define PPC_RAW_STB(r, base, i) (0x98000000 | ___PPC_RS(r) | ___PPC_RA(base) | IMM_L(i))
+#define PPC_RAW_LBZ(r, base, i) (0x88000000 | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
+#define PPC_RAW_LDX(r, base, b) (0x7c00002a | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b))
+#define PPC_RAW_LHA(r, base, i) (0xa8000000 | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
+#define PPC_RAW_LHZ(r, base, i) (0xa0000000 | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
+#define PPC_RAW_LHBRX(r, base, b) (0x7c00062c | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b))
+#define PPC_RAW_LWBRX(r, base, b) (0x7c00042c | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b))
+#define PPC_RAW_LDBRX(r, base, b) (0x7c000428 | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b))
+#define PPC_RAW_STWCX(s, a, b) (0x7c00012d | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_CMPWI(a, i) (0x2c000000 | ___PPC_RA(a) | IMM_L(i))
+#define PPC_RAW_CMPDI(a, i) (0x2c200000 | ___PPC_RA(a) | IMM_L(i))
+#define PPC_RAW_CMPW(a, b) (0x7c000000 | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_CMPD(a, b) (0x7c200000 | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_CMPLWI(a, i) (0x28000000 | ___PPC_RA(a) | IMM_L(i))
+#define PPC_RAW_CMPLDI(a, i) (0x28200000 | ___PPC_RA(a) | IMM_L(i))
+#define PPC_RAW_CMPLW(a, b) (0x7c000040 | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_CMPLD(a, b) (0x7c200040 | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_SUB(d, a, b) (0x7c000050 | ___PPC_RT(d) | ___PPC_RB(a) | ___PPC_RA(b))
+#define PPC_RAW_SUBFC(d, a, b) (0x7c000010 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_SUBFE(d, a, b) (0x7c000110 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_SUBFIC(d, a, i) (0x20000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i))
+#define PPC_RAW_SUBFZE(d, a) (0x7c000190 | ___PPC_RT(d) | ___PPC_RA(a))
+#define PPC_RAW_MULD(d, a, b) (0x7c0001d2 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_MULW(d, a, b) (0x7c0001d6 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_MULHWU(d, a, b) (0x7c000016 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_MULI(d, a, i) (0x1c000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i))
+#define PPC_RAW_DIVW(d, a, b) (0x7c0003d6 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_DIVWU(d, a, b) (0x7c000396 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_DIVD(d, a, b) (0x7c0003d2 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_DIVDU(d, a, b) (0x7c000392 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_DIVDE(t, a, b) (0x7c000352 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_DIVDE_DOT(t, a, b) (0x7c000352 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | 0x1)
+#define PPC_RAW_DIVDEU(t, a, b) (0x7c000312 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_DIVDEU_DOT(t, a, b) (0x7c000312 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | 0x1)
+#define PPC_RAW_AND(d, a, b) (0x7c000038 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b))
+#define PPC_RAW_ANDI(d, a, i) (0x70000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i))
+#define PPC_RAW_ANDIS(d, a, i) (0x74000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i))
+#define PPC_RAW_AND_DOT(d, a, b) (0x7c000039 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b))
+#define PPC_RAW_OR(d, a, b) (0x7c000378 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b))
+#define PPC_RAW_MR(d, a) PPC_RAW_OR(d, a, a)
+#define PPC_RAW_ORI(d, a, i) (0x60000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i))
+#define PPC_RAW_ORIS(d, a, i) (0x64000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i))
+#define PPC_RAW_NOR(d, a, b) (0x7c0000f8 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b))
+#define PPC_RAW_XOR(d, a, b) (0x7c000278 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b))
+#define PPC_RAW_XORI(d, a, i) (0x68000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i))
+#define PPC_RAW_XORIS(d, a, i) (0x6c000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i))
+#define PPC_RAW_EXTSB(d, a) (0x7c000774 | ___PPC_RA(d) | ___PPC_RS(a))
+#define PPC_RAW_EXTSH(d, a) (0x7c000734 | ___PPC_RA(d) | ___PPC_RS(a))
+#define PPC_RAW_EXTSW(d, a) (0x7c0007b4 | ___PPC_RA(d) | ___PPC_RS(a))
+#define PPC_RAW_SLW(d, a, s) (0x7c000030 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(s))
+#define PPC_RAW_SLD(d, a, s) (0x7c000036 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(s))
+#define PPC_RAW_SRW(d, a, s) (0x7c000430 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(s))
+#define PPC_RAW_SRAW(d, a, s) (0x7c000630 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(s))
+#define PPC_RAW_SRAWI(d, a, i) (0x7c000670 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH(i))
+#define PPC_RAW_SRD(d, a, s) (0x7c000436 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(s))
+#define PPC_RAW_SRAD(d, a, s) (0x7c000634 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(s))
+#define PPC_RAW_SRADI(d, a, i) (0x7c000674 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH64(i))
+#define PPC_RAW_RLWINM(d, a, i, mb, me) (0x54000000 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH(i) | __PPC_MB(mb) | __PPC_ME(me))
+#define PPC_RAW_RLWINM_DOT(d, a, i, mb, me) \
+ (0x54000001 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH(i) | __PPC_MB(mb) | __PPC_ME(me))
+#define PPC_RAW_RLWIMI(d, a, i, mb, me) (0x50000000 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH(i) | __PPC_MB(mb) | __PPC_ME(me))
+#define PPC_RAW_RLDICL(d, a, i, mb) (0x78000000 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH64(i) | __PPC_MB64(mb))
+#define PPC_RAW_RLDICL_DOT(d, a, i, mb) (0x78000000 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH64(i) | __PPC_MB64(mb) | 0x1)
+#define PPC_RAW_RLDICR(d, a, i, me) (0x78000004 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH64(i) | __PPC_ME64(me))
-/* Bits 60 and 61 of MPP SPR should be set to one of the following */
-/* Aborting the fetch is indeed setting 00 in the table size bits */
-#define PPC_MPPR_FETCH_ABORT (0x0ULL << 60)
-#define PPC_MPPR_FETCH_WHOLE_TABLE (0x2ULL << 60)
+/* slwi = rlwinm Rx, Ry, n, 0, 31-n */
+#define PPC_RAW_SLWI(d, a, i) PPC_RAW_RLWINM(d, a, i, 0, 31-(i))
+/* srwi = rlwinm Rx, Ry, 32-n, n, 31 */
+#define PPC_RAW_SRWI(d, a, i) PPC_RAW_RLWINM(d, a, 32-(i), i, 31)
+/* sldi = rldicr Rx, Ry, n, 63-n */
+#define PPC_RAW_SLDI(d, a, i) PPC_RAW_RLDICR(d, a, i, 63-(i))
+/* sldi = rldicl Rx, Ry, 64-n, n */
+#define PPC_RAW_SRDI(d, a, i) PPC_RAW_RLDICL(d, a, 64-(i), i)
-/* Bits 54 and 55 of register for LOGMPP instruction should be set to: */
-#define PPC_LOGMPP_LOG_L2 (0x02ULL << 54)
-#define PPC_LOGMPP_LOG_L2L3 (0x01ULL << 54)
-#define PPC_LOGMPP_LOG_ABORT (0x03ULL << 54)
+#define PPC_RAW_NEG(d, a) (0x7c0000d0 | ___PPC_RT(d) | ___PPC_RA(a))
+
+#define PPC_RAW_MFSPR(d, spr) (0x7c0002a6 | ___PPC_RT(d) | __PPC_SPR(spr))
+#define PPC_RAW_MTSPR(spr, d) (0x7c0003a6 | ___PPC_RS(d) | __PPC_SPR(spr))
+#define PPC_RAW_EIEIO() (0x7c0006ac)
+
+/* bcl 20,31,$+4 */
+#define PPC_RAW_BCL4() (0x429f0005)
+#define PPC_RAW_BRANCH(offset) (0x48000000 | PPC_LI(offset))
+#define PPC_RAW_BL(offset) (0x48000001 | PPC_LI(offset))
+#define PPC_RAW_TW(t0, a, b) (0x7c000008 | ___PPC_RS(t0) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_TRAP() PPC_RAW_TW(31, 0, 0)
+#define PPC_RAW_SETB(t, bfa) (0x7c000100 | ___PPC_RT(t) | ___PPC_RA((bfa) << 2))
+
+#ifdef CONFIG_PPC32
+#define PPC_RAW_STL PPC_RAW_STW
+#define PPC_RAW_STLU PPC_RAW_STWU
+#define PPC_RAW_LL PPC_RAW_LWZ
+#define PPC_RAW_CMPLI PPC_RAW_CMPWI
+#else
+#define PPC_RAW_STL PPC_RAW_STD
+#define PPC_RAW_STLU PPC_RAW_STDU
+#define PPC_RAW_LL PPC_RAW_LD
+#define PPC_RAW_CMPLI PPC_RAW_CMPDI
+#endif
/* Deal with instructions that older assemblers aren't aware of */
-#define PPC_DCBAL(a, b) stringify_in_c(.long PPC_INST_DCBAL | \
- __PPC_RA(a) | __PPC_RB(b))
-#define PPC_DCBZL(a, b) stringify_in_c(.long PPC_INST_DCBZL | \
- __PPC_RA(a) | __PPC_RB(b))
-#define PPC_LDARX(t, a, b, eh) stringify_in_c(.long PPC_INST_LDARX | \
- ___PPC_RT(t) | ___PPC_RA(a) | \
- ___PPC_RB(b) | __PPC_EH(eh))
-#define PPC_LOGMPP(b) stringify_in_c(.long PPC_INST_LOGMPP | \
- __PPC_RB(b))
-#define PPC_LWARX(t, a, b, eh) stringify_in_c(.long PPC_INST_LWARX | \
- ___PPC_RT(t) | ___PPC_RA(a) | \
- ___PPC_RB(b) | __PPC_EH(eh))
-#define PPC_MSGSND(b) stringify_in_c(.long PPC_INST_MSGSND | \
- ___PPC_RB(b))
-#define PPC_MSGSNDP(b) stringify_in_c(.long PPC_INST_MSGSNDP | \
- ___PPC_RB(b))
-#define PPC_POPCNTB(a, s) stringify_in_c(.long PPC_INST_POPCNTB | \
- __PPC_RA(a) | __PPC_RS(s))
-#define PPC_POPCNTD(a, s) stringify_in_c(.long PPC_INST_POPCNTD | \
- __PPC_RA(a) | __PPC_RS(s))
-#define PPC_POPCNTW(a, s) stringify_in_c(.long PPC_INST_POPCNTW | \
- __PPC_RA(a) | __PPC_RS(s))
-#define PPC_RFCI stringify_in_c(.long PPC_INST_RFCI)
-#define PPC_RFDI stringify_in_c(.long PPC_INST_RFDI)
-#define PPC_RFMCI stringify_in_c(.long PPC_INST_RFMCI)
-#define PPC_TLBILX(t, a, b) stringify_in_c(.long PPC_INST_TLBILX | \
- __PPC_T_TLB(t) | __PPC_RA0(a) | __PPC_RB(b))
+#define PPC_BCCTR_FLUSH stringify_in_c(.long PPC_INST_BCCTR_FLUSH)
+#define PPC_CP_ABORT stringify_in_c(.long PPC_RAW_CP_ABORT)
+#define PPC_COPY(a, b) stringify_in_c(.long PPC_RAW_COPY(a, b))
+#define PPC_DARN(t, l) stringify_in_c(.long PPC_RAW_DARN(t, l))
+#define PPC_DCBAL(a, b) stringify_in_c(.long PPC_RAW_DCBAL(a, b))
+#define PPC_DCBZL(a, b) stringify_in_c(.long PPC_RAW_DCBZL(a, b))
+#define PPC_DIVDE(t, a, b) stringify_in_c(.long PPC_RAW_DIVDE(t, a, b))
+#define PPC_DIVDEU(t, a, b) stringify_in_c(.long PPC_RAW_DIVDEU(t, a, b))
+#define PPC_DSSALL stringify_in_c(.long PPC_INST_DSSALL)
+#define PPC_LQARX(t, a, b, eh) stringify_in_c(.long PPC_RAW_LQARX(t, a, b, eh))
+#define PPC_STQCX(t, a, b) stringify_in_c(.long PPC_RAW_STQCX(t, a, b))
+#define PPC_MADDHD(t, a, b, c) stringify_in_c(.long PPC_RAW_MADDHD(t, a, b, c))
+#define PPC_MADDHDU(t, a, b, c) stringify_in_c(.long PPC_RAW_MADDHDU(t, a, b, c))
+#define PPC_MADDLD(t, a, b, c) stringify_in_c(.long PPC_RAW_MADDLD(t, a, b, c))
+#define PPC_MSGSND(b) stringify_in_c(.long PPC_RAW_MSGSND(b))
+#define PPC_MSGSYNC stringify_in_c(.long PPC_RAW_MSGSYNC)
+#define PPC_MSGCLR(b) stringify_in_c(.long PPC_RAW_MSGCLR(b))
+#define PPC_MSGSNDP(b) stringify_in_c(.long PPC_RAW_MSGSNDP(b))
+#define PPC_MSGCLRP(b) stringify_in_c(.long PPC_RAW_MSGCLRP(b))
+#define PPC_PASTE(a, b) stringify_in_c(.long PPC_RAW_PASTE(a, b))
+#define PPC_POPCNTB(a, s) stringify_in_c(.long PPC_RAW_POPCNTB(a, s))
+#define PPC_POPCNTD(a, s) stringify_in_c(.long PPC_RAW_POPCNTD(a, s))
+#define PPC_POPCNTW(a, s) stringify_in_c(.long PPC_RAW_POPCNTW(a, s))
+#define PPC_RFCI stringify_in_c(.long PPC_RAW_RFCI)
+#define PPC_RFDI stringify_in_c(.long PPC_RAW_RFDI)
+#define PPC_RFMCI stringify_in_c(.long PPC_RAW_RFMCI)
+#define PPC_TLBILX(t, a, b) stringify_in_c(.long PPC_RAW_TLBILX(t, a, b))
#define PPC_TLBILX_ALL(a, b) PPC_TLBILX(0, a, b)
#define PPC_TLBILX_PID(a, b) PPC_TLBILX(1, a, b)
+#define PPC_TLBILX_LPID stringify_in_c(.long PPC_RAW_TLBILX_LPID)
#define PPC_TLBILX_VA(a, b) PPC_TLBILX(3, a, b)
-#define PPC_WAIT(w) stringify_in_c(.long PPC_INST_WAIT | \
- __PPC_WC(w))
-#define PPC_TLBIE(lp,a) stringify_in_c(.long PPC_INST_TLBIE | \
- ___PPC_RB(a) | ___PPC_RS(lp))
-#define PPC_TLBSRX_DOT(a,b) stringify_in_c(.long PPC_INST_TLBSRX_DOT | \
- __PPC_RA0(a) | __PPC_RB(b))
-#define PPC_TLBIVAX(a,b) stringify_in_c(.long PPC_INST_TLBIVAX | \
- __PPC_RA0(a) | __PPC_RB(b))
-
-#define PPC_ERATWE(s, a, w) stringify_in_c(.long PPC_INST_ERATWE | \
- __PPC_RS(s) | __PPC_RA(a) | __PPC_WS(w))
-#define PPC_ERATRE(s, a, w) stringify_in_c(.long PPC_INST_ERATRE | \
- __PPC_RS(s) | __PPC_RA(a) | __PPC_WS(w))
-#define PPC_ERATILX(t, a, b) stringify_in_c(.long PPC_INST_ERATILX | \
- __PPC_T_TLB(t) | __PPC_RA0(a) | \
- __PPC_RB(b))
-#define PPC_ERATIVAX(s, a, b) stringify_in_c(.long PPC_INST_ERATIVAX | \
- __PPC_RS(s) | __PPC_RA0(a) | __PPC_RB(b))
-#define PPC_ERATSX(t, a, w) stringify_in_c(.long PPC_INST_ERATSX | \
- __PPC_RS(t) | __PPC_RA0(a) | __PPC_RB(b))
-#define PPC_ERATSX_DOT(t, a, w) stringify_in_c(.long PPC_INST_ERATSX_DOT | \
- __PPC_RS(t) | __PPC_RA0(a) | __PPC_RB(b))
-#define PPC_SLBFEE_DOT(t, b) stringify_in_c(.long PPC_INST_SLBFEE | \
- __PPC_RT(t) | __PPC_RB(b))
-#define PPC_ICBT(c,a,b) stringify_in_c(.long PPC_INST_ICBT | \
- __PPC_CT(c) | __PPC_RA0(a) | __PPC_RB(b))
+#define PPC_WAIT_v203 stringify_in_c(.long PPC_RAW_WAIT_v203)
+#define PPC_WAIT(w, p) stringify_in_c(.long PPC_RAW_WAIT(w, p))
+#define PPC_TLBIE(lp, a) stringify_in_c(.long PPC_RAW_TLBIE(lp, a))
+#define PPC_TLBIE_5(rb, rs, ric, prs, r) \
+ stringify_in_c(.long PPC_RAW_TLBIE_5(rb, rs, ric, prs, r))
+#define PPC_TLBIEL(rb,rs,ric,prs,r) \
+ stringify_in_c(.long PPC_RAW_TLBIEL(rb, rs, ric, prs, r))
+#define PPC_TLBIEL_v205(rb, l) stringify_in_c(.long PPC_RAW_TLBIEL_v205(rb, l))
+#define PPC_TLBSRX_DOT(a, b) stringify_in_c(.long PPC_RAW_TLBSRX_DOT(a, b))
+#define PPC_TLBIVAX(a, b) stringify_in_c(.long PPC_RAW_TLBIVAX(a, b))
+
+#define PPC_ERATWE(s, a, w) stringify_in_c(.long PPC_RAW_ERATWE(s, a, w))
+#define PPC_ERATRE(s, a, w) stringify_in_c(.long PPC_RAW_ERATRE(a, a, w))
+#define PPC_ERATILX(t, a, b) stringify_in_c(.long PPC_RAW_ERATILX(t, a, b))
+#define PPC_ERATIVAX(s, a, b) stringify_in_c(.long PPC_RAW_ERATIVAX(s, a, b))
+#define PPC_ERATSX(t, a, w) stringify_in_c(.long PPC_RAW_ERATSX(t, a, w))
+#define PPC_ERATSX_DOT(t, a, w) stringify_in_c(.long PPC_RAW_ERATSX_DOT(t, a, w))
+#define PPC_SLBFEE_DOT(t, b) stringify_in_c(.long PPC_RAW_SLBFEE_DOT(t, b))
+#define __PPC_SLBFEE_DOT(t, b) stringify_in_c(.long __PPC_RAW_SLBFEE_DOT(t, b))
+#define PPC_ICBT(c, a, b) stringify_in_c(.long PPC_RAW_ICBT(c, a, b))
/* PASemi instructions */
-#define LBZCIX(t,a,b) stringify_in_c(.long PPC_INST_LBZCIX | \
- __PPC_RT(t) | __PPC_RA(a) | __PPC_RB(b))
-#define STBCIX(s,a,b) stringify_in_c(.long PPC_INST_STBCIX | \
- __PPC_RS(s) | __PPC_RA(a) | __PPC_RB(b))
+#define LBZCIX(t, a, b) stringify_in_c(.long PPC_RAW_LBZCIX(t, a, b))
+#define STBCIX(s, a, b) stringify_in_c(.long PPC_RAW_STBCIX(s, a, b))
+#define PPC_DCBFPS(a, b) stringify_in_c(.long PPC_RAW_DCBFPS(a, b))
+#define PPC_DCBSTPS(a, b) stringify_in_c(.long PPC_RAW_DCBSTPS(a, b))
+#define PPC_PHWSYNC stringify_in_c(.long PPC_RAW_PHWSYNC)
+#define PPC_PLWSYNC stringify_in_c(.long PPC_RAW_PLWSYNC)
+#define STXVD2X(s, a, b) stringify_in_c(.long PPC_RAW_STXVD2X(s, a, b))
+#define LXVD2X(s, a, b) stringify_in_c(.long PPC_RAW_LXVD2X(s, a, b))
+#define MFVRD(a, t) stringify_in_c(.long PPC_RAW_MFVRD(a, t))
+#define MTVRD(t, a) stringify_in_c(.long PPC_RAW_MTVRD(t, a))
+#define VPMSUMW(t, a, b) stringify_in_c(.long PPC_RAW_VPMSUMW(t, a, b))
+#define VPMSUMD(t, a, b) stringify_in_c(.long PPC_RAW_VPMSUMD(t, a, b))
+#define XXLOR(t, a, b) stringify_in_c(.long PPC_RAW_XXLOR(t, a, b))
+#define XXSWAPD(t, a) stringify_in_c(.long PPC_RAW_XXSWAPD(t, a))
+#define XVCPSGNDP(t, a, b) stringify_in_c(.long (PPC_RAW_XVCPSGNDP(t, a, b)))
-/*
- * Define what the VSX XX1 form instructions will look like, then add
- * the 128 bit load store instructions based on that.
- */
-#define VSX_XX1(s, a, b) (__PPC_XS(s) | __PPC_RA(a) | __PPC_RB(b))
-#define VSX_XX3(t, a, b) (__PPC_XT(t) | __PPC_XA(a) | __PPC_XB(b))
-#define STXVD2X(s, a, b) stringify_in_c(.long PPC_INST_STXVD2X | \
- VSX_XX1((s), a, b))
-#define LXVD2X(s, a, b) stringify_in_c(.long PPC_INST_LXVD2X | \
- VSX_XX1((s), a, b))
-#define XXLOR(t, a, b) stringify_in_c(.long PPC_INST_XXLOR | \
- VSX_XX3((t), a, b))
-#define XXSWAPD(t, a) stringify_in_c(.long PPC_INST_XXSWAPD | \
- VSX_XX3((t), a, a))
-#define XVCPSGNDP(t, a, b) stringify_in_c(.long (PPC_INST_XVCPSGNDP | \
- VSX_XX3((t), (a), (b))))
-
-#define PPC_NAP stringify_in_c(.long PPC_INST_NAP)
-#define PPC_SLEEP stringify_in_c(.long PPC_INST_SLEEP)
+#define VPERMXOR(vrt, vra, vrb, vrc) \
+ stringify_in_c(.long (PPC_RAW_VPERMXOR(vrt, vra, vrb, vrc)))
+
+#define PPC_NAP stringify_in_c(.long PPC_RAW_NAP)
+#define PPC_SLEEP stringify_in_c(.long PPC_RAW_SLEEP)
+#define PPC_WINKLE stringify_in_c(.long PPC_RAW_WINKLE)
+
+#define PPC_STOP stringify_in_c(.long PPC_RAW_STOP)
/* BHRB instructions */
-#define PPC_CLRBHRB stringify_in_c(.long PPC_INST_CLRBHRB)
-#define PPC_MFBHRBE(r, n) stringify_in_c(.long PPC_INST_BHRBE | \
- __PPC_RT(r) | \
- (((n) & 0x3ff) << 11))
+#define PPC_CLRBHRB stringify_in_c(.long PPC_RAW_CLRBHRB)
+#define PPC_MFBHRBE(r, n) stringify_in_c(.long PPC_RAW_MFBHRBE(r, n))
/* Transactional memory instructions */
-#define TRECHKPT stringify_in_c(.long PPC_INST_TRECHKPT)
-#define TRECLAIM(r) stringify_in_c(.long PPC_INST_TRECLAIM \
- | __PPC_RA(r))
-#define TABORT(r) stringify_in_c(.long PPC_INST_TABORT \
- | __PPC_RA(r))
+#define TRECHKPT stringify_in_c(.long PPC_RAW_TRECHKPT)
+#define TRECLAIM(r) stringify_in_c(.long PPC_RAW_TRECLAIM(r))
+#define TABORT(r) stringify_in_c(.long PPC_RAW_TABORT(r))
/* book3e thread control instructions */
-#define TMRN(x) ((((x) & 0x1f) << 16) | (((x) & 0x3e0) << 6))
-#define MTTMR(tmr, r) stringify_in_c(.long PPC_INST_MTTMR | \
- TMRN(tmr) | ___PPC_RS(r))
-#define MFTMR(tmr, r) stringify_in_c(.long PPC_INST_MFTMR | \
- TMRN(tmr) | ___PPC_RT(r))
+#define MTTMR(tmr, r) stringify_in_c(.long PPC_RAW_MTTMR(tmr, r))
+#define MFTMR(tmr, r) stringify_in_c(.long PPC_RAW_MFTMR(tmr, r))
+
+/* Coprocessor instructions */
+#define PPC_ICSWX(s, a, b) stringify_in_c(.long PPC_RAW_ICSWX(s, a, b))
+#define PPC_ICSWEPX(s, a, b) stringify_in_c(.long PPC_RAW_ICSWEPX(s, a, b))
+
+#define PPC_SLBIA(IH) stringify_in_c(.long PPC_RAW_SLBIA(IH))
+
+/*
+ * These may only be used on ISA v3.0 or later (aka. CPU_FTR_ARCH_300, radix
+ * implies CPU_FTR_ARCH_300). USER/GUEST invalidates may only be used by radix
+ * mode (on HPT these would also invalidate various SLBEs which may not be
+ * desired).
+ */
+#define PPC_ISA_3_0_INVALIDATE_ERAT PPC_SLBIA(7)
+#define PPC_RADIX_INVALIDATE_ERAT_USER PPC_SLBIA(3)
+#define PPC_RADIX_INVALIDATE_ERAT_GUEST PPC_SLBIA(6)
+
+#define VCMPEQUD_RC(vrt, vra, vrb) stringify_in_c(.long PPC_RAW_VCMPEQUD_RC(vrt, vra, vrb))
+
+#define VCMPEQUB_RC(vrt, vra, vrb) stringify_in_c(.long PPC_RAW_VCMPEQUB_RC(vrt, vra, vrb))
#endif /* _ASM_POWERPC_PPC_OPCODE_H */
diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h
index db1e2b8eff3c..a8b7e8682f5b 100644
--- a/arch/powerpc/include/asm/ppc-pci.h
+++ b/arch/powerpc/include/asm/ppc-pci.h
@@ -1,10 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* c 2001 PPC 64 Team, IBM Corp
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#ifndef _ASM_POWERPC_PPC_PCI_H
#define _ASM_POWERPC_PPC_PCI_H
@@ -17,14 +13,8 @@
extern unsigned long isa_io_base;
-extern void pci_setup_phb_io(struct pci_controller *hose, int primary);
-extern void pci_setup_phb_io_dynamic(struct pci_controller *hose, int primary);
-
-
extern struct list_head hose_list;
-extern void find_and_init_phbs(void);
-
extern struct pci_dev *isa_bridge_pcidev; /* may be NULL if no ISA bus */
/** Bus Unit ID macros; get low and hi 32-bits of the 64-bit BUID */
@@ -33,18 +23,31 @@ extern struct pci_dev *isa_bridge_pcidev; /* may be NULL if no ISA bus */
/* PCI device_node operations */
struct device_node;
-typedef void *(*traverse_func)(struct device_node *me, void *data);
-void *traverse_pci_devices(struct device_node *start, traverse_func pre,
- void *data);
+struct pci_dn;
-extern void pci_devs_phb_init(void);
+void *pci_traverse_device_nodes(struct device_node *start,
+ void *(*fn)(struct device_node *, void *),
+ void *data);
extern void pci_devs_phb_init_dynamic(struct pci_controller *phb);
+#if defined(CONFIG_IOMMU_API) && (defined(CONFIG_PPC_PSERIES) || \
+ defined(CONFIG_PPC_POWERNV))
+extern void ppc_iommu_register_device(struct pci_controller *phb);
+extern void ppc_iommu_unregister_device(struct pci_controller *phb);
+#else
+static inline void ppc_iommu_register_device(struct pci_controller *phb) { }
+static inline void ppc_iommu_unregister_device(struct pci_controller *phb) { }
+#endif
+
+
/* From rtas_pci.h */
extern void init_pci_config_tokens (void);
extern unsigned long get_phb_buid (struct device_node *);
extern int rtas_setup_phb(struct pci_controller *phb);
+int rtas_pci_dn_read_config(struct pci_dn *pdn, int where, int size, u32 *val);
+int rtas_pci_dn_write_config(struct pci_dn *pdn, int where, int size, u32 val);
+
#ifdef CONFIG_EEH
void eeh_addr_cache_insert_dev(struct pci_dev *dev);
@@ -52,33 +55,32 @@ void eeh_addr_cache_rmv_dev(struct pci_dev *dev);
struct eeh_dev *eeh_addr_cache_get_dev(unsigned long addr);
void eeh_slot_error_detail(struct eeh_pe *pe, int severity);
int eeh_pci_enable(struct eeh_pe *pe, int function);
-int eeh_reset_pe(struct eeh_pe *);
+int eeh_pe_reset_full(struct eeh_pe *pe, bool include_passed);
void eeh_save_bars(struct eeh_dev *edev);
-int rtas_write_config(struct pci_dn *, int where, int size, u32 val);
-int rtas_read_config(struct pci_dn *, int where, int size, u32 *val);
void eeh_pe_state_mark(struct eeh_pe *pe, int state);
-void eeh_pe_state_clear(struct eeh_pe *pe, int state);
+void eeh_pe_mark_isolated(struct eeh_pe *pe);
+void eeh_pe_state_clear(struct eeh_pe *pe, int state, bool include_passed);
+void eeh_pe_state_mark_with_cfg(struct eeh_pe *pe, int state);
void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode);
void eeh_sysfs_add_device(struct pci_dev *pdev);
void eeh_sysfs_remove_device(struct pci_dev *pdev);
-static inline const char *eeh_pci_name(struct pci_dev *pdev)
-{
- return pdev ? pci_name(pdev) : "<null>";
-}
+#endif /* CONFIG_EEH */
-static inline const char *eeh_driver_name(struct pci_dev *pdev)
-{
- return (pdev && pdev->driver) ? pdev->driver->name : "<null>";
-}
+#ifdef CONFIG_FSL_ULI1575
+void __init uli_init(void);
+#endif /* CONFIG_FSL_ULI1575 */
-#endif /* CONFIG_EEH */
+#define PCI_BUSNO(bdfn) ((bdfn >> 8) & 0xff)
#else /* CONFIG_PCI */
-static inline void find_and_init_phbs(void) { }
static inline void init_pci_config_tokens(void) { }
#endif /* !CONFIG_PCI */
+#if !defined(CONFIG_PCI) || !defined(CONFIG_FSL_ULI1575)
+static inline void __init uli_init(void) {}
+#endif /* !defined(CONFIG_PCI) || !defined(CONFIG_FSL_ULI1575) */
+
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_PPC_PCI_H */
diff --git a/arch/powerpc/include/asm/ppc4xx.h b/arch/powerpc/include/asm/ppc4xx.h
index 033039a80c42..b37119e48543 100644
--- a/arch/powerpc/include/asm/ppc4xx.h
+++ b/arch/powerpc/include/asm/ppc4xx.h
@@ -1,18 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* PPC4xx Prototypes and definitions
*
* Copyright 2008 DENX Software Engineering, Stefan Roese <sr@denx.de>
- *
- * This is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
*/
#ifndef __ASM_POWERPC_PPC4xx_H__
#define __ASM_POWERPC_PPC4xx_H__
-extern void ppc4xx_reset_system(char *cmd);
+extern void __noreturn ppc4xx_reset_system(char *cmd);
#endif /* __ASM_POWERPC_PPC4xx_H__ */
diff --git a/arch/powerpc/include/asm/ppc4xx_ocm.h b/arch/powerpc/include/asm/ppc4xx_ocm.h
deleted file mode 100644
index 6ce904605538..000000000000
--- a/arch/powerpc/include/asm/ppc4xx_ocm.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * PowerPC 4xx OCM memory allocation support
- *
- * (C) Copyright 2009, Applied Micro Circuits Corporation
- * Victor Gallardo (vgallardo@amcc.com)
- *
- * See file CREDITS for list of people who contributed to this
- * project.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
- * MA 02111-1307 USA
- */
-
-#ifndef __ASM_POWERPC_PPC4XX_OCM_H__
-#define __ASM_POWERPC_PPC4XX_OCM_H__
-
-#define PPC4XX_OCM_NON_CACHED 0
-#define PPC4XX_OCM_CACHED 1
-
-#if defined(CONFIG_PPC4xx_OCM)
-
-void *ppc4xx_ocm_alloc(phys_addr_t *phys, int size, int align,
- int flags, const char *owner);
-void ppc4xx_ocm_free(const void *virt);
-
-#else
-
-#define ppc4xx_ocm_alloc(phys, size, align, flags, owner) NULL
-#define ppc4xx_ocm_free(addr) ((void)0)
-
-#endif /* CONFIG_PPC4xx_OCM */
-
-#endif /* __ASM_POWERPC_PPC4XX_OCM_H__ */
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 7e4612528546..46947c82a712 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -9,93 +9,89 @@
#include <asm/processor.h>
#include <asm/ppc-opcode.h>
#include <asm/firmware.h>
+#include <asm/feature-fixups.h>
+#include <asm/extable.h>
-#ifndef __ASSEMBLY__
-#error __FILE__ should only be used in assembler files
-#else
+#ifdef __ASSEMBLER__
#define SZL (BITS_PER_LONG/8)
/*
- * Stuff for accurate CPU time accounting.
- * These macros handle transitions between user and system state
- * in exception entry and exit and accumulate time to the
- * user_time and system_time fields in the paca.
+ * This expands to a sequence of operations with reg incrementing from
+ * start to end inclusive, of this form:
+ *
+ * op reg, (offset + (width * reg))(base)
+ *
+ * Note that offset is not the offset of the first operation unless start
+ * is zero (or width is zero).
*/
+.macro OP_REGS op, width, start, end, base, offset
+ .Lreg=\start
+ .rept (\end - \start + 1)
+ \op .Lreg, \offset + \width * .Lreg(\base)
+ .Lreg=.Lreg+1
+ .endr
+.endm
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-#define ACCOUNT_CPU_USER_ENTRY(ra, rb)
-#define ACCOUNT_CPU_USER_EXIT(ra, rb)
-#define ACCOUNT_STOLEN_TIME
-#else
-#define ACCOUNT_CPU_USER_ENTRY(ra, rb) \
- MFTB(ra); /* get timebase */ \
- ld rb,PACA_STARTTIME_USER(r13); \
- std ra,PACA_STARTTIME(r13); \
- subf rb,rb,ra; /* subtract start value */ \
- ld ra,PACA_USER_TIME(r13); \
- add ra,ra,rb; /* add on to user time */ \
- std ra,PACA_USER_TIME(r13); \
-
-#define ACCOUNT_CPU_USER_EXIT(ra, rb) \
- MFTB(ra); /* get timebase */ \
- ld rb,PACA_STARTTIME(r13); \
- std ra,PACA_STARTTIME_USER(r13); \
- subf rb,rb,ra; /* subtract start value */ \
- ld ra,PACA_SYSTEM_TIME(r13); \
- add ra,ra,rb; /* add on to system time */ \
- std ra,PACA_SYSTEM_TIME(r13)
-
-#ifdef CONFIG_PPC_SPLPAR
-#define ACCOUNT_STOLEN_TIME \
-BEGIN_FW_FTR_SECTION; \
- beq 33f; \
- /* from user - see if there are any DTL entries to process */ \
- ld r10,PACALPPACAPTR(r13); /* get ptr to VPA */ \
- ld r11,PACA_DTL_RIDX(r13); /* get log read index */ \
- addi r10,r10,LPPACA_DTLIDX; \
- LDX_BE r10,0,r10; /* get log write index */ \
- cmpd cr1,r11,r10; \
- beq+ cr1,33f; \
- bl accumulate_stolen_time; \
- ld r12,_MSR(r1); \
- andi. r10,r12,MSR_PR; /* Restore cr0 (coming from user) */ \
-33: \
-END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
-
-#else /* CONFIG_PPC_SPLPAR */
-#define ACCOUNT_STOLEN_TIME
-
-#endif /* CONFIG_PPC_SPLPAR */
-
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
+/*
+ * This expands to a sequence of register clears for regs start to end
+ * inclusive, of the form:
+ *
+ * li rN, 0
+ */
+.macro ZEROIZE_REGS start, end
+ .Lreg=\start
+ .rept (\end - \start + 1)
+ li .Lreg, 0
+ .Lreg=.Lreg+1
+ .endr
+.endm
/*
* Macros for storing registers into and loading registers from
* exception frames.
*/
#ifdef __powerpc64__
-#define SAVE_GPR(n, base) std n,GPR0+8*(n)(base)
-#define REST_GPR(n, base) ld n,GPR0+8*(n)(base)
-#define SAVE_NVGPRS(base) SAVE_8GPRS(14, base); SAVE_10GPRS(22, base)
-#define REST_NVGPRS(base) REST_8GPRS(14, base); REST_10GPRS(22, base)
+#define SAVE_GPRS(start, end, base) OP_REGS std, 8, start, end, base, GPR0
+#define REST_GPRS(start, end, base) OP_REGS ld, 8, start, end, base, GPR0
+#define SAVE_NVGPRS(base) SAVE_GPRS(14, 31, base)
+#define REST_NVGPRS(base) REST_GPRS(14, 31, base)
#else
-#define SAVE_GPR(n, base) stw n,GPR0+4*(n)(base)
-#define REST_GPR(n, base) lwz n,GPR0+4*(n)(base)
-#define SAVE_NVGPRS(base) SAVE_GPR(13, base); SAVE_8GPRS(14, base); \
- SAVE_10GPRS(22, base)
-#define REST_NVGPRS(base) REST_GPR(13, base); REST_8GPRS(14, base); \
- REST_10GPRS(22, base)
+#define SAVE_GPRS(start, end, base) OP_REGS stw, 4, start, end, base, GPR0
+#define REST_GPRS(start, end, base) OP_REGS lwz, 4, start, end, base, GPR0
+#define SAVE_NVGPRS(base) SAVE_GPRS(13, 31, base)
+#define REST_NVGPRS(base) REST_GPRS(13, 31, base)
#endif
-#define SAVE_2GPRS(n, base) SAVE_GPR(n, base); SAVE_GPR(n+1, base)
-#define SAVE_4GPRS(n, base) SAVE_2GPRS(n, base); SAVE_2GPRS(n+2, base)
-#define SAVE_8GPRS(n, base) SAVE_4GPRS(n, base); SAVE_4GPRS(n+4, base)
-#define SAVE_10GPRS(n, base) SAVE_8GPRS(n, base); SAVE_2GPRS(n+8, base)
-#define REST_2GPRS(n, base) REST_GPR(n, base); REST_GPR(n+1, base)
-#define REST_4GPRS(n, base) REST_2GPRS(n, base); REST_2GPRS(n+2, base)
-#define REST_8GPRS(n, base) REST_4GPRS(n, base); REST_4GPRS(n+4, base)
-#define REST_10GPRS(n, base) REST_8GPRS(n, base); REST_2GPRS(n+8, base)
+#define ZEROIZE_GPRS(start, end) ZEROIZE_REGS start, end
+#ifdef __powerpc64__
+#define ZEROIZE_NVGPRS() ZEROIZE_GPRS(14, 31)
+#else
+#define ZEROIZE_NVGPRS() ZEROIZE_GPRS(13, 31)
+#endif
+#define ZEROIZE_GPR(n) ZEROIZE_GPRS(n, n)
+
+#define SAVE_GPR(n, base) SAVE_GPRS(n, n, base)
+#define REST_GPR(n, base) REST_GPRS(n, n, base)
+
+/* macros for handling user register sanitisation */
+#ifdef CONFIG_INTERRUPT_SANITIZE_REGISTERS
+#define SANITIZE_SYSCALL_GPRS() ZEROIZE_GPR(0); \
+ ZEROIZE_GPRS(5, 12); \
+ ZEROIZE_NVGPRS()
+#define SANITIZE_GPR(n) ZEROIZE_GPR(n)
+#define SANITIZE_GPRS(start, end) ZEROIZE_GPRS(start, end)
+#define SANITIZE_NVGPRS() ZEROIZE_NVGPRS()
+#define SANITIZE_RESTORE_NVGPRS() REST_NVGPRS(r1)
+#define HANDLER_RESTORE_NVGPRS()
+#else
+#define SANITIZE_SYSCALL_GPRS()
+#define SANITIZE_GPR(n)
+#define SANITIZE_GPRS(start, end)
+#define SANITIZE_NVGPRS()
+#define SANITIZE_RESTORE_NVGPRS()
+#define HANDLER_RESTORE_NVGPRS() REST_NVGPRS(r1)
+#endif /* CONFIG_INTERRUPT_SANITIZE_REGISTERS */
#define SAVE_FPR(n, base) stfd n,8*TS_FPRWIDTH*(n)(base)
#define SAVE_2FPRS(n, base) SAVE_FPR(n, base); SAVE_FPR(n+1, base)
@@ -183,32 +179,49 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
#define VCPU_GPR(n) __VCPU_GPR(__REG_##n)
#ifdef __KERNEL__
-#ifdef CONFIG_PPC64
+
+/*
+ * Used to name C functions called from asm
+ */
+#if defined(__powerpc64__) && defined(CONFIG_PPC_KERNEL_PCREL)
+#define CFUNC(name) name@notoc
+#else
+#define CFUNC(name) name
+#endif
+
+/*
+ * We use __powerpc64__ here because we want the compat VDSO to use the 32-bit
+ * version below in the else case of the ifdef.
+ */
+#ifdef __powerpc64__
#define STACKFRAMESIZE 256
#define __STK_REG(i) (112 + ((i)-14)*8)
#define STK_REG(i) __STK_REG(__REG_##i)
-#if defined(_CALL_ELF) && _CALL_ELF == 2
+#ifdef CONFIG_PPC64_ELF_ABI_V2
#define STK_GOT 24
-#define __STK_PARAM(i) (32 + ((i)-3)*8)
+#define STK_PARAM_AREA 32
#else
#define STK_GOT 40
-#define __STK_PARAM(i) (48 + ((i)-3)*8)
+#define STK_PARAM_AREA 48
#endif
+
+#define __STK_PARAM(i) (STK_PARAM_AREA + ((i)-3)*8)
#define STK_PARAM(i) __STK_PARAM(__REG_##i)
-#if defined(_CALL_ELF) && _CALL_ELF == 2
+#ifdef CONFIG_PPC64_ELF_ABI_V2
#define _GLOBAL(name) \
- .section ".text"; \
.align 2 ; \
.type name,@function; \
.globl name; \
name:
+#ifdef CONFIG_PPC_KERNEL_PCREL
+#define _GLOBAL_TOC _GLOBAL
+#else
#define _GLOBAL_TOC(name) \
- .section ".text"; \
.align 2 ; \
.type name,@function; \
.globl name; \
@@ -216,13 +229,7 @@ name: \
0: addis r2,r12,(.TOC.-0b)@ha; \
addi r2,r2,(.TOC.-0b)@l; \
.localentry name,.-name
-
-#define _KPROBE(name) \
- .section ".kprobes.text","a"; \
- .align 2 ; \
- .type name,@function; \
- .globl name; \
-name:
+#endif
#define DOTSYM(a) a
@@ -232,60 +239,57 @@ name:
#define GLUE(a,b) XGLUE(a,b)
#define _GLOBAL(name) \
- .section ".text"; \
.align 2 ; \
.globl name; \
.globl GLUE(.,name); \
- .section ".opd","aw"; \
+ .pushsection ".opd","aw"; \
name: \
.quad GLUE(.,name); \
.quad .TOC.@tocbase; \
.quad 0; \
- .previous; \
+ .popsection; \
.type GLUE(.,name),@function; \
GLUE(.,name):
#define _GLOBAL_TOC(name) _GLOBAL(name)
-#define _KPROBE(name) \
- .section ".kprobes.text","a"; \
- .align 2 ; \
- .globl name; \
- .globl GLUE(.,name); \
- .section ".opd","aw"; \
-name: \
- .quad GLUE(.,name); \
- .quad .TOC.@tocbase; \
- .quad 0; \
- .previous; \
- .type GLUE(.,name),@function; \
-GLUE(.,name):
-
#define DOTSYM(a) GLUE(.,a)
#endif
#else /* 32-bit */
-#define _ENTRY(n) \
- .globl n; \
-n:
-
#define _GLOBAL(n) \
- .text; \
- .stabs __stringify(n:F-1),N_FUN,0,0,n;\
.globl n; \
n:
#define _GLOBAL_TOC(name) _GLOBAL(name)
-#define _KPROBE(n) \
- .section ".kprobes.text","a"; \
- .globl n; \
-n:
+#define DOTSYM(a) a
#endif
+/*
+ * __kprobes (the C annotation) puts the symbol into the .kprobes.text
+ * section, which gets emitted at the end of regular text.
+ *
+ * _ASM_NOKPROBE_SYMBOL and NOKPROBE_SYMBOL just adds the symbol to
+ * a blacklist. The former is for core kprobe functions/data, the
+ * latter is for those that incdentially must be excluded from probing
+ * and allows them to be linked at more optimal location within text.
+ */
+#ifdef CONFIG_KPROBES
+#define _ASM_NOKPROBE_SYMBOL(entry) \
+ .pushsection "_kprobe_blacklist","aw"; \
+ PPC_LONG (entry) ; \
+ .popsection
+#else
+#define _ASM_NOKPROBE_SYMBOL(entry)
+#endif
+
+#define FUNC_START(name) _GLOBAL(name)
+#define FUNC_END(name)
+
/*
* LOAD_REG_IMMEDIATE(rn, expr)
* Loads the value of the constant expression 'expr' into register 'rn'
@@ -318,26 +322,85 @@ n:
/* Be careful, this will clobber the lr register. */
#define LOAD_REG_ADDR_PIC(reg, name) \
- bl 0f; \
+ bcl 20,31,$+4; \
0: mflr reg; \
addis reg,reg,(name - 0b)@ha; \
addi reg,reg,(name - 0b)@l;
-#ifdef __powerpc64__
-#ifdef HAVE_AS_ATHIGH
+#if defined(__powerpc64__) && defined(HAVE_AS_ATHIGH)
#define __AS_ATHIGH high
#else
#define __AS_ATHIGH h
#endif
-#define LOAD_REG_IMMEDIATE(reg,expr) \
- lis reg,(expr)@highest; \
- ori reg,reg,(expr)@higher; \
- rldicr reg,reg,32,31; \
- oris reg,reg,(expr)@__AS_ATHIGH; \
- ori reg,reg,(expr)@l;
+.macro __LOAD_REG_IMMEDIATE_32 r, x
+ .if (\x) >= 0x8000 || (\x) < -0x8000
+ lis \r, (\x)@__AS_ATHIGH
+ .if (\x) & 0xffff != 0
+ ori \r, \r, (\x)@l
+ .endif
+ .else
+ li \r, (\x)@l
+ .endif
+.endm
+
+.macro __LOAD_REG_IMMEDIATE r, x
+ .if (\x) >= 0x80000000 || (\x) < -0x80000000
+ __LOAD_REG_IMMEDIATE_32 \r, (\x) >> 32
+ sldi \r, \r, 32
+ .if (\x) & 0xffff0000 != 0
+ oris \r, \r, (\x)@__AS_ATHIGH
+ .endif
+ .if (\x) & 0xffff != 0
+ ori \r, \r, (\x)@l
+ .endif
+ .else
+ __LOAD_REG_IMMEDIATE_32 \r, \x
+ .endif
+.endm
+
+#ifdef __powerpc64__
+
+#ifdef CONFIG_PPC_KERNEL_PCREL
+#define __LOAD_PACA_TOC(reg) \
+ li reg,-1
+#else
+#define __LOAD_PACA_TOC(reg) \
+ ld reg,PACATOC(r13)
+#endif
+
+#define LOAD_PACA_TOC() \
+ __LOAD_PACA_TOC(r2)
+
+#define LOAD_REG_IMMEDIATE(reg, expr) __LOAD_REG_IMMEDIATE reg, expr
+
+#define LOAD_REG_IMMEDIATE_SYM(reg, tmp, expr) \
+ lis tmp, (expr)@highest; \
+ lis reg, (expr)@__AS_ATHIGH; \
+ ori tmp, tmp, (expr)@higher; \
+ ori reg, reg, (expr)@l; \
+ rldimi reg, tmp, 32, 0
+
+#ifdef CONFIG_PPC_KERNEL_PCREL
+#define LOAD_REG_ADDR(reg,name) \
+ pla reg,name@pcrel
+
+#else
#define LOAD_REG_ADDR(reg,name) \
- ld reg,name@got(r2)
+ addis reg,r2,name@toc@ha; \
+ addi reg,reg,name@toc@l
+#endif
+
+#ifdef CONFIG_PPC_BOOK3E_64
+/*
+ * This is used in register-constrained interrupt handlers. Not to be used
+ * by BOOK3S. ld complains with "got/toc optimization is not supported" if r2
+ * is not used for the TOC offset, so use @got(tocreg). If the interrupt
+ * handlers saved r2 instead, LOAD_REG_ADDR could be used.
+ */
+#define LOAD_REG_ADDR_ALTTOC(reg,tocreg,name) \
+ ld reg,name@got(tocreg)
+#endif
#define LOAD_REG_ADDRBASE(reg,name) LOAD_REG_ADDR(reg,name)
#define ADDROFF(name) 0
@@ -345,13 +408,24 @@ n:
/* offsets for stack frame layout */
#define LRSAVE 16
+/*
+ * GCC stack frames follow a different pattern on 32 vs 64. This can be used
+ * to make asm frames be consistent with C.
+ */
+#define PPC_CREATE_STACK_FRAME(size) \
+ mflr r0; \
+ std r0,16(r1); \
+ stdu r1,-(size)(r1)
+
#else /* 32-bit */
-#define LOAD_REG_IMMEDIATE(reg,expr) \
+#define LOAD_REG_IMMEDIATE(reg, expr) __LOAD_REG_IMMEDIATE_32 reg, expr
+
+#define LOAD_REG_IMMEDIATE_SYM(reg,expr) \
lis reg,(expr)@ha; \
addi reg,reg,(expr)@l;
-#define LOAD_REG_ADDR(reg,name) LOAD_REG_IMMEDIATE(reg, name)
+#define LOAD_REG_ADDR(reg,name) LOAD_REG_IMMEDIATE_SYM(reg, name)
#define LOAD_REG_ADDRBASE(reg, name) lis reg,name@ha
#define ADDROFF(name) name@l
@@ -359,51 +433,37 @@ n:
/* offsets for stack frame layout */
#define LRSAVE 4
-#endif
+#define PPC_CREATE_STACK_FRAME(size) \
+ stwu r1,-(size)(r1); \
+ mflr r0; \
+ stw r0,(size+4)(r1)
-/* various errata or part fixups */
-#ifdef CONFIG_PPC601_SYNC_FIX
-#define SYNC \
-BEGIN_FTR_SECTION \
- sync; \
- isync; \
-END_FTR_SECTION_IFSET(CPU_FTR_601)
-#define SYNC_601 \
-BEGIN_FTR_SECTION \
- sync; \
-END_FTR_SECTION_IFSET(CPU_FTR_601)
-#define ISYNC_601 \
-BEGIN_FTR_SECTION \
- isync; \
-END_FTR_SECTION_IFSET(CPU_FTR_601)
-#else
-#define SYNC
-#define SYNC_601
-#define ISYNC_601
#endif
-#if defined(CONFIG_PPC_CELL) || defined(CONFIG_PPC_FSL_BOOK3E)
+/* various errata or part fixups */
+#if defined(CONFIG_PPC_CELL) || defined(CONFIG_PPC_E500)
#define MFTB(dest) \
90: mfspr dest, SPRN_TBRL; \
BEGIN_FTR_SECTION_NESTED(96); \
cmpwi dest,0; \
beq- 90b; \
END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96)
-#elif defined(CONFIG_8xx)
-#define MFTB(dest) mftb dest
#else
-#define MFTB(dest) mfspr dest, SPRN_TBRL
+#define MFTB(dest) MFTBL(dest)
+#endif
+
+#ifdef CONFIG_PPC_8xx
+#define MFTBL(dest) mftb dest
+#define MFTBU(dest) mftbu dest
+#else
+#define MFTBL(dest) mfspr dest, SPRN_TBRL
+#define MFTBU(dest) mfspr dest, SPRN_TBRU
#endif
#ifndef CONFIG_SMP
#define TLBSYNC
-#else /* CONFIG_SMP */
-/* tlbsync is not implemented on 601 */
-#define TLBSYNC \
-BEGIN_FTR_SECTION \
- tlbsync; \
- sync; \
-END_FTR_SECTION_IFCLR(CPU_FTR_601)
+#else
+#define TLBSYNC tlbsync; sync
#endif
#ifdef CONFIG_PPC64
@@ -413,24 +473,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601)
FTR_SECTION_ELSE_NESTED(848); \
mtocrf (FXM), RS; \
ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_NOEXECUTE, 848)
-
-/*
- * PPR restore macros used in entry_64.S
- * Used for P7 or later processors
- */
-#define HMT_MEDIUM_LOW_HAS_PPR \
-BEGIN_FTR_SECTION_NESTED(944) \
- HMT_MEDIUM_LOW; \
-END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,944)
-
-#define SET_DEFAULT_THREAD_PPR(ra, rb) \
-BEGIN_FTR_SECTION_NESTED(945) \
- lis ra,INIT_PPR@highest; /* default ppr=3 */ \
- ld rb,PACACURRENT(r13); \
- sldi ra,ra,32; /* 11- 13 bits are used for ppr */ \
- std ra,TASKTHREADPPR(rb); \
-END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,945)
-
#endif
/*
@@ -440,12 +482,15 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,945)
* and they must be used.
*/
-#if !defined(CONFIG_4xx) && !defined(CONFIG_8xx)
+#if !defined(CONFIG_44x) && !defined(CONFIG_PPC_8xx)
#define tlbia \
li r4,1024; \
mtctr r4; \
lis r4,KERNELBASE@h; \
+ .machine push; \
+ .machine "power4"; \
0: tlbie r4; \
+ .machine pop; \
addi r4,r4,0x1000; \
bdnz 0b
#endif
@@ -459,14 +504,29 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,945)
/* The following stops all load and store data streams associated with stream
* ID (ie. streams created explicitly). The embedded and server mnemonics for
- * dcbt are different so we use machine "power4" here explicitly.
+ * dcbt are different so this must only be used for server.
*/
-#define DCBT_STOP_ALL_STREAM_IDS(scratch) \
-.machine push ; \
-.machine "power4" ; \
- lis scratch,0x60000000@h; \
- dcbt r0,scratch,0b01010; \
-.machine pop
+#define DCBT_BOOK3S_STOP_ALL_STREAM_IDS(scratch) \
+ lis scratch,0x60000000@h; \
+ .machine push; \
+ .machine power4; \
+ dcbt 0,scratch,0b01010; \
+ .machine pop;
+
+#define DCBT_SETUP_STREAMS(from, from_parms, to, to_parms, scratch) \
+ lis scratch,0x8000; /* GO=1 */ \
+ clrldi scratch,scratch,32; \
+ .machine push; \
+ .machine power4; \
+ /* setup read stream 0 */ \
+ dcbt 0,from,0b01000; /* addr from */ \
+ dcbt 0,from_parms,0b01010; /* length and depth from */ \
+ /* setup write stream 1 */ \
+ dcbtst 0,to,0b01000; /* addr to */ \
+ dcbtst 0,to_parms,0b01010; /* length and depth to */ \
+ eieio; \
+ dcbt 0,scratch,0b01010; /* all streams GO */ \
+ .machine pop;
/*
* toreal/fromreal/tophys/tovirt macros. 32-bit BookE makes them
@@ -504,42 +564,19 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,945)
ori rd,rd,((KERNELBASE>>48)&0xFFFF);\
rotldi rd,rd,48
#else
-/*
- * On APUS (Amiga PowerPC cpu upgrade board), we don't know the
- * physical base address of RAM at compile time.
- */
#define toreal(rd) tophys(rd,rd)
#define fromreal(rd) tovirt(rd,rd)
-#define tophys(rd,rs) \
-0: addis rd,rs,-PAGE_OFFSET@h; \
- .section ".vtop_fixup","aw"; \
- .align 1; \
- .long 0b; \
- .previous
-
-#define tovirt(rd,rs) \
-0: addis rd,rs,PAGE_OFFSET@h; \
- .section ".ptov_fixup","aw"; \
- .align 1; \
- .long 0b; \
- .previous
+#define tophys(rd, rs) addis rd, rs, -PAGE_OFFSET@h
+#define tovirt(rd, rs) addis rd, rs, PAGE_OFFSET@h
#endif
#ifdef CONFIG_PPC_BOOK3S_64
-#define RFI rfid
#define MTMSRD(r) mtmsrd r
#define MTMSR_EERI(reg) mtmsrd reg,1
#else
-#define FIX_SRR1(ra, rb)
-#ifndef CONFIG_40x
-#define RFI rfi
-#else
-#define RFI rfi; b . /* Prevent prefetch past rfi */
-#endif
#define MTMSRD(r) mtmsr r
#define MTMSR_EERI(reg) mtmsr reg
-#define CLR_TOP32(r)
#endif
#endif /* __KERNEL__ */
@@ -637,105 +674,105 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,945)
/* AltiVec Registers (VPRs) */
-#define vr0 0
-#define vr1 1
-#define vr2 2
-#define vr3 3
-#define vr4 4
-#define vr5 5
-#define vr6 6
-#define vr7 7
-#define vr8 8
-#define vr9 9
-#define vr10 10
-#define vr11 11
-#define vr12 12
-#define vr13 13
-#define vr14 14
-#define vr15 15
-#define vr16 16
-#define vr17 17
-#define vr18 18
-#define vr19 19
-#define vr20 20
-#define vr21 21
-#define vr22 22
-#define vr23 23
-#define vr24 24
-#define vr25 25
-#define vr26 26
-#define vr27 27
-#define vr28 28
-#define vr29 29
-#define vr30 30
-#define vr31 31
+#define v0 0
+#define v1 1
+#define v2 2
+#define v3 3
+#define v4 4
+#define v5 5
+#define v6 6
+#define v7 7
+#define v8 8
+#define v9 9
+#define v10 10
+#define v11 11
+#define v12 12
+#define v13 13
+#define v14 14
+#define v15 15
+#define v16 16
+#define v17 17
+#define v18 18
+#define v19 19
+#define v20 20
+#define v21 21
+#define v22 22
+#define v23 23
+#define v24 24
+#define v25 25
+#define v26 26
+#define v27 27
+#define v28 28
+#define v29 29
+#define v30 30
+#define v31 31
/* VSX Registers (VSRs) */
-#define vsr0 0
-#define vsr1 1
-#define vsr2 2
-#define vsr3 3
-#define vsr4 4
-#define vsr5 5
-#define vsr6 6
-#define vsr7 7
-#define vsr8 8
-#define vsr9 9
-#define vsr10 10
-#define vsr11 11
-#define vsr12 12
-#define vsr13 13
-#define vsr14 14
-#define vsr15 15
-#define vsr16 16
-#define vsr17 17
-#define vsr18 18
-#define vsr19 19
-#define vsr20 20
-#define vsr21 21
-#define vsr22 22
-#define vsr23 23
-#define vsr24 24
-#define vsr25 25
-#define vsr26 26
-#define vsr27 27
-#define vsr28 28
-#define vsr29 29
-#define vsr30 30
-#define vsr31 31
-#define vsr32 32
-#define vsr33 33
-#define vsr34 34
-#define vsr35 35
-#define vsr36 36
-#define vsr37 37
-#define vsr38 38
-#define vsr39 39
-#define vsr40 40
-#define vsr41 41
-#define vsr42 42
-#define vsr43 43
-#define vsr44 44
-#define vsr45 45
-#define vsr46 46
-#define vsr47 47
-#define vsr48 48
-#define vsr49 49
-#define vsr50 50
-#define vsr51 51
-#define vsr52 52
-#define vsr53 53
-#define vsr54 54
-#define vsr55 55
-#define vsr56 56
-#define vsr57 57
-#define vsr58 58
-#define vsr59 59
-#define vsr60 60
-#define vsr61 61
-#define vsr62 62
-#define vsr63 63
+#define vs0 0
+#define vs1 1
+#define vs2 2
+#define vs3 3
+#define vs4 4
+#define vs5 5
+#define vs6 6
+#define vs7 7
+#define vs8 8
+#define vs9 9
+#define vs10 10
+#define vs11 11
+#define vs12 12
+#define vs13 13
+#define vs14 14
+#define vs15 15
+#define vs16 16
+#define vs17 17
+#define vs18 18
+#define vs19 19
+#define vs20 20
+#define vs21 21
+#define vs22 22
+#define vs23 23
+#define vs24 24
+#define vs25 25
+#define vs26 26
+#define vs27 27
+#define vs28 28
+#define vs29 29
+#define vs30 30
+#define vs31 31
+#define vs32 32
+#define vs33 33
+#define vs34 34
+#define vs35 35
+#define vs36 36
+#define vs37 37
+#define vs38 38
+#define vs39 39
+#define vs40 40
+#define vs41 41
+#define vs42 42
+#define vs43 43
+#define vs44 44
+#define vs45 45
+#define vs46 46
+#define vs47 47
+#define vs48 48
+#define vs49 49
+#define vs50 50
+#define vs51 51
+#define vs52 52
+#define vs53 53
+#define vs54 54
+#define vs55 55
+#define vs56 56
+#define vs57 57
+#define vs58 58
+#define vs59 59
+#define vs60 60
+#define vs61 61
+#define vs62 62
+#define vs63 63
/* SPE Registers (EVPRs) */
@@ -772,11 +809,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,945)
#define evr30 30
#define evr31 31
-/* some stab codes */
-#define N_FUN 36
-#define N_RSYM 64
-#define N_SLINE 68
-#define N_SO 100
+#define RFSCV .long 0x4c0000a4
/*
* Create an endian fixup trampoline
@@ -793,20 +826,81 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,945)
* kernel is built for.
*/
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
#define FIXUP_ENDIAN
#else
+/*
+ * This version may be used in HV or non-HV context.
+ * MSR[EE] must be disabled.
+ */
#define FIXUP_ENDIAN \
tdi 0,0,0x48; /* Reverse endian of b . + 8 */ \
- b $+36; /* Skip trampoline if endian is good */ \
- .long 0x05009f42; /* bcl 20,31,$+4 */ \
- .long 0xa602487d; /* mflr r10 */ \
- .long 0x1c004a39; /* addi r10,r10,28 */ \
+ b 191f; /* Skip trampoline if endian is good */ \
.long 0xa600607d; /* mfmsr r11 */ \
.long 0x01006b69; /* xori r11,r11,1 */ \
+ .long 0x00004039; /* li r10,0 */ \
+ .long 0x6401417d; /* mtmsrd r10,1 */ \
+ .long 0x05009f42; /* bcl 20,31,$+4 */ \
+ .long 0xa602487d; /* mflr r10 */ \
+ .long 0x14004a39; /* addi r10,r10,20 */ \
.long 0xa6035a7d; /* mtsrr0 r10 */ \
.long 0xa6037b7d; /* mtsrr1 r11 */ \
- .long 0x2400004c /* rfid */
-#endif /* !CONFIG_PPC_BOOK3E */
-#endif /* __ASSEMBLY__ */
+ .long 0x2400004c; /* rfid */ \
+191:
+
+/*
+ * This version that may only be used with MSR[HV]=1
+ * - Does not clear MSR[RI], so more robust.
+ * - Slightly smaller and faster.
+ */
+#define FIXUP_ENDIAN_HV \
+ tdi 0,0,0x48; /* Reverse endian of b . + 8 */ \
+ b 191f; /* Skip trampoline if endian is good */ \
+ .long 0xa600607d; /* mfmsr r11 */ \
+ .long 0x01006b69; /* xori r11,r11,1 */ \
+ .long 0x05009f42; /* bcl 20,31,$+4 */ \
+ .long 0xa602487d; /* mflr r10 */ \
+ .long 0x14004a39; /* addi r10,r10,20 */ \
+ .long 0xa64b5a7d; /* mthsrr0 r10 */ \
+ .long 0xa64b7b7d; /* mthsrr1 r11 */ \
+ .long 0x2402004c; /* hrfid */ \
+191:
+
+#endif /* !CONFIG_PPC_BOOK3E_64 */
+
+#endif /* __ASSEMBLER__ */
+
+#define SOFT_MASK_TABLE(_start, _end) \
+ stringify_in_c(.section __soft_mask_table,"a";)\
+ stringify_in_c(.balign 8;) \
+ stringify_in_c(.llong (_start);) \
+ stringify_in_c(.llong (_end);) \
+ stringify_in_c(.previous)
+
+#define RESTART_TABLE(_start, _end, _target) \
+ stringify_in_c(.section __restart_table,"a";)\
+ stringify_in_c(.balign 8;) \
+ stringify_in_c(.llong (_start);) \
+ stringify_in_c(.llong (_end);) \
+ stringify_in_c(.llong (_target);) \
+ stringify_in_c(.previous)
+
+#ifdef CONFIG_PPC_E500
+#define BTB_FLUSH(reg) \
+ lis reg,BUCSR_INIT@h; \
+ ori reg,reg,BUCSR_INIT@l; \
+ mtspr SPRN_BUCSR,reg; \
+ isync;
+#else
+#define BTB_FLUSH(reg)
+#endif /* CONFIG_PPC_E500 */
+
+#if defined(CONFIG_PPC64_ELF_ABI_V1)
+#define STACK_FRAME_PARAMS 48
+#elif defined(CONFIG_PPC64_ELF_ABI_V2)
+#define STACK_FRAME_PARAMS 32
+#elif defined(CONFIG_PPC32)
+#define STACK_FRAME_PARAMS 8
+#endif
+
#endif /* _ASM_POWERPC_PPC_ASM_H */
diff --git a/arch/powerpc/include/asm/preempt.h b/arch/powerpc/include/asm/preempt.h
new file mode 100644
index 000000000000..000e2b9681f3
--- /dev/null
+++ b/arch/powerpc/include/asm/preempt.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_POWERPC_PREEMPT_H
+#define __ASM_POWERPC_PREEMPT_H
+
+#include <asm-generic/preempt.h>
+
+#if defined(CONFIG_PREEMPT_DYNAMIC)
+#include <linux/jump_label.h>
+DECLARE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);
+#define need_irq_preemption() \
+ (static_branch_unlikely(&sk_dynamic_irqentry_exit_cond_resched))
+#else
+#define need_irq_preemption() (IS_ENABLED(CONFIG_PREEMPTION))
+#endif
+
+#endif /* __ASM_POWERPC_PREEMPT_H */
diff --git a/arch/powerpc/include/asm/probes.h b/arch/powerpc/include/asm/probes.h
index 3421637cfd7b..e77a2ed7d938 100644
--- a/arch/powerpc/include/asm/probes.h
+++ b/arch/powerpc/include/asm/probes.h
@@ -1,29 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _ASM_POWERPC_PROBES_H
#define _ASM_POWERPC_PROBES_H
#ifdef __KERNEL__
/*
* Definitions common to probes files
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
* Copyright IBM Corporation, 2012
*/
#include <linux/types.h>
+#include <asm/disassemble.h>
+#include <asm/ppc-opcode.h>
-typedef u32 ppc_opcode_t;
-#define BREAKPOINT_INSTRUCTION 0x7fe00008 /* trap */
+#define BREAKPOINT_INSTRUCTION PPC_RAW_TRAP() /* trap */
/* Trap definitions per ISA */
#define IS_TW(instr) (((instr) & 0xfc0007fe) == 0x7c000008)
@@ -44,17 +32,52 @@ typedef u32 ppc_opcode_t;
#define MSR_SINGLESTEP (MSR_SE)
#endif
+static inline bool can_single_step(u32 inst)
+{
+ switch (get_op(inst)) {
+ case OP_TRAP_64: return false;
+ case OP_TRAP: return false;
+ case OP_SC: return false;
+ case OP_19:
+ switch (get_xop(inst)) {
+ case OP_19_XOP_RFID: return false;
+ case OP_19_XOP_RFMCI: return false;
+ case OP_19_XOP_RFDI: return false;
+ case OP_19_XOP_RFI: return false;
+ case OP_19_XOP_RFCI: return false;
+ case OP_19_XOP_RFSCV: return false;
+ case OP_19_XOP_HRFID: return false;
+ case OP_19_XOP_URFID: return false;
+ case OP_19_XOP_STOP: return false;
+ case OP_19_XOP_DOZE: return false;
+ case OP_19_XOP_NAP: return false;
+ case OP_19_XOP_SLEEP: return false;
+ case OP_19_XOP_RVWINKLE: return false;
+ }
+ break;
+ case OP_31:
+ switch (get_xop(inst)) {
+ case OP_31_XOP_TRAP: return false;
+ case OP_31_XOP_TRAP_64: return false;
+ case OP_31_XOP_MTMSR: return false;
+ case OP_31_XOP_MTMSRD: return false;
+ }
+ break;
+ }
+ return true;
+}
+
/* Enable single stepping for the current task */
static inline void enable_single_step(struct pt_regs *regs)
{
- regs->msr |= MSR_SINGLESTEP;
+ regs_set_return_msr(regs, regs->msr | MSR_SINGLESTEP);
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
/*
* We turn off Critical Input Exception(CE) to ensure that the single
* step will be for the instruction we have the probe on; if we don't,
* it is possible we'd get the single step reported for CE.
*/
- regs->msr &= ~MSR_CE;
+ regs_set_return_msr(regs, regs->msr & ~MSR_CE);
mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM);
#ifdef CONFIG_PPC_47x
isync();
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index dda7ac4c80bd..f156bdb43e2b 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -1,15 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _ASM_POWERPC_PROCESSOR_H
#define _ASM_POWERPC_PROCESSOR_H
/*
* Copyright (C) 2001 PPC 64 Team, IBM Corp
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
+#include <vdso/processor.h>
+
#include <asm/reg.h>
#ifdef CONFIG_VSX
@@ -31,18 +29,17 @@
#ifdef CONFIG_PPC64
/* Default SMT priority is set to 3. Use 11- 13bits to save priority. */
#define PPR_PRIORITY 3
-#ifdef __ASSEMBLY__
-#define INIT_PPR (PPR_PRIORITY << 50)
+#ifdef __ASSEMBLER__
+#define DEFAULT_PPR (PPR_PRIORITY << 50)
#else
-#define INIT_PPR ((u64)PPR_PRIORITY << 50)
-#endif /* __ASSEMBLY__ */
+#define DEFAULT_PPR ((u64)PPR_PRIORITY << 50)
+#endif /* __ASSEMBLER__ */
#endif /* CONFIG_PPC64 */
-#ifndef __ASSEMBLY__
-#include <linux/compiler.h>
-#include <linux/cache.h>
+#ifndef __ASSEMBLER__
+#include <linux/types.h>
+#include <linux/thread_info.h>
#include <asm/ptrace.h>
-#include <asm/types.h>
#include <asm/hw_breakpoint.h>
/* We do _not_ want to define new machine types at all, those must die
@@ -68,92 +65,19 @@ extern int _chrp_type;
#endif /* defined(__KERNEL__) && defined(CONFIG_PPC32) */
-/*
- * Default implementation of macro that returns current
- * instruction pointer ("program counter").
- */
-#define current_text_addr() ({ __label__ _l; _l: &&_l;})
-
-/* Macros for adjusting thread priority (hardware multi-threading) */
-#define HMT_very_low() asm volatile("or 31,31,31 # very low priority")
-#define HMT_low() asm volatile("or 1,1,1 # low priority")
-#define HMT_medium_low() asm volatile("or 6,6,6 # medium low priority")
-#define HMT_medium() asm volatile("or 2,2,2 # medium priority")
-#define HMT_medium_high() asm volatile("or 5,5,5 # medium high priority")
-#define HMT_high() asm volatile("or 3,3,3 # high priority")
-
#ifdef __KERNEL__
-struct task_struct;
-void start_thread(struct pt_regs *regs, unsigned long fdptr, unsigned long sp);
-void release_thread(struct task_struct *);
-
-/* Lazy FPU handling on uni-processor */
-extern struct task_struct *last_task_used_math;
-extern struct task_struct *last_task_used_altivec;
-extern struct task_struct *last_task_used_vsx;
-extern struct task_struct *last_task_used_spe;
-
-#ifdef CONFIG_PPC32
-
-#if CONFIG_TASK_SIZE > CONFIG_KERNEL_START
-#error User TASK_SIZE overlaps with KERNEL_START address
-#endif
-#define TASK_SIZE (CONFIG_TASK_SIZE)
-
-/* This decides where the kernel will search for a free chunk of vm
- * space during mmap's.
- */
-#define TASK_UNMAPPED_BASE (TASK_SIZE / 8 * 3)
-#endif
-
#ifdef CONFIG_PPC64
-/* 64-bit user address space is 46-bits (64TB user VM) */
-#define TASK_SIZE_USER64 (0x0000400000000000UL)
-
-/*
- * 32-bit user address space is 4GB - 1 page
- * (this 1 page is needed so referencing of 0xFFFFFFFF generates EFAULT
- */
-#define TASK_SIZE_USER32 (0x0000000100000000UL - (1*PAGE_SIZE))
-
-#define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_32BIT) ? \
- TASK_SIZE_USER32 : TASK_SIZE_USER64)
-#define TASK_SIZE TASK_SIZE_OF(current)
-
-/* This decides where the kernel will search for a free chunk of vm
- * space during mmap's.
- */
-#define TASK_UNMAPPED_BASE_USER32 (PAGE_ALIGN(TASK_SIZE_USER32 / 4))
-#define TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(TASK_SIZE_USER64 / 4))
-
-#define TASK_UNMAPPED_BASE ((is_32bit_task()) ? \
- TASK_UNMAPPED_BASE_USER32 : TASK_UNMAPPED_BASE_USER64 )
+#include <asm/task_size_64.h>
+#else
+#include <asm/task_size_32.h>
#endif
-#ifdef __powerpc64__
-
-#define STACK_TOP_USER64 TASK_SIZE_USER64
-#define STACK_TOP_USER32 TASK_SIZE_USER32
-
-#define STACK_TOP (is_32bit_task() ? \
- STACK_TOP_USER32 : STACK_TOP_USER64)
-
-#define STACK_TOP_MAX STACK_TOP_USER64
-
-#else /* __powerpc64__ */
-
-#define STACK_TOP TASK_SIZE
-#define STACK_TOP_MAX STACK_TOP
-
-#endif /* __powerpc64__ */
-
-typedef struct {
- unsigned long seg;
-} mm_segment_t;
+struct task_struct;
+void start_thread(struct pt_regs *regs, unsigned long fdptr, unsigned long sp);
#define TS_FPR(i) fp_state.fpr[i][TS_FPROFFSET]
-#define TS_TRANS_FPR(i) transact_fp.fpr[i][TS_FPROFFSET]
+#define TS_CKFPR(i) ckfp_state.fpr[i][TS_FPROFFSET]
/* FP and VSX 0-31 register set */
struct thread_fp_state {
@@ -213,36 +137,48 @@ struct thread_struct {
unsigned long ksp_vsid;
#endif
struct pt_regs *regs; /* Pointer to saved register state */
- mm_segment_t fs; /* for get_fs() validation */
#ifdef CONFIG_BOOKE
/* BookE base exception scratch space; align on cacheline */
unsigned long normsave[8] ____cacheline_aligned;
#endif
#ifdef CONFIG_PPC32
void *pgdir; /* root of page-table tree */
- unsigned long ksp_limit; /* if ksp <= ksp_limit stack overflow */
+#ifdef CONFIG_PPC_RTAS
+ unsigned long rtas_sp; /* stack pointer for when in RTAS */
+#endif
+#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP)
+ unsigned long kuap; /* opened segments for user access */
+#endif
+ unsigned long srr0;
+ unsigned long srr1;
+ unsigned long dar;
+ unsigned long dsisr;
+#ifdef CONFIG_PPC_BOOK3S_32
+ unsigned long r0, r3, r4, r5, r6, r8, r9, r11;
+ unsigned long lr, ctr;
+ unsigned long sr0;
+#endif
+#endif /* CONFIG_PPC32 */
+#if defined(CONFIG_BOOKE) && defined(CONFIG_PPC_KUAP)
+ unsigned long pid; /* value written in PID reg. at interrupt exit */
#endif
/* Debug Registers */
struct debug_reg debug;
+#ifdef CONFIG_PPC_FPU_REGS
struct thread_fp_state fp_state;
struct thread_fp_state *fp_save_area;
+#endif
int fpexc_mode; /* floating-point exception mode */
unsigned int align_ctl; /* alignment handling control */
-#ifdef CONFIG_PPC64
- unsigned long start_tb; /* Start purr when proc switched in */
- unsigned long accum_tb; /* Total accumilated purr for process */
#ifdef CONFIG_HAVE_HW_BREAKPOINT
- struct perf_event *ptrace_bps[HBP_NUM];
- /*
- * Helps identify source of single-step exception and subsequent
- * hw-breakpoint enablement
- */
- struct perf_event *last_hit_ubp;
+ struct perf_event *ptrace_bps[HBP_NUM_MAX];
#endif /* CONFIG_HAVE_HW_BREAKPOINT */
-#endif
- struct arch_hw_breakpoint hw_brk; /* info on the hardware breakpoint */
+ struct arch_hw_breakpoint hw_brk[HBP_NUM_MAX]; /* hardware breakpoint info */
unsigned long trap_nr; /* last trap # on this thread */
+ u8 load_slb; /* Ages out SLB preload cache entries */
+ u8 load_fp;
#ifdef CONFIG_ALTIVEC
+ u8 load_vec;
struct thread_vr_state vr_state;
struct thread_vr_state *vr_save_area;
unsigned long vrsave;
@@ -250,42 +186,42 @@ struct thread_struct {
#endif /* CONFIG_ALTIVEC */
#ifdef CONFIG_VSX
/* VSR status */
- int used_vsr; /* set if process has used altivec */
+ int used_vsr; /* set if process has used VSX */
#endif /* CONFIG_VSX */
#ifdef CONFIG_SPE
- unsigned long evr[32]; /* upper 32-bits of SPE regs */
- u64 acc; /* Accumulator */
+ struct_group(spe,
+ unsigned long evr[32]; /* upper 32-bits of SPE regs */
+ u64 acc; /* Accumulator */
+ );
unsigned long spefscr; /* SPE & eFP status */
unsigned long spefscr_last; /* SPEFSCR value on last prctl
call or trap return */
int used_spe; /* set if process has used spe */
#endif /* CONFIG_SPE */
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ u8 load_tm;
u64 tm_tfhar; /* Transaction fail handler addr */
u64 tm_texasr; /* Transaction exception & summary */
u64 tm_tfiar; /* Transaction fail instr address reg */
- unsigned long tm_orig_msr; /* Thread's MSR on ctx switch */
struct pt_regs ckpt_regs; /* Checkpointed registers */
unsigned long tm_tar;
unsigned long tm_ppr;
unsigned long tm_dscr;
+ unsigned long tm_amr;
/*
- * Transactional FP and VSX 0-31 register set.
- * NOTE: the sense of these is the opposite of the integer ckpt_regs!
+ * Checkpointed FP and VSX 0-31 register set.
*
* When a transaction is active/signalled/scheduled etc., *regs is the
* most recent set of/speculated GPRs with ckpt_regs being the older
* checkpointed regs to which we roll back if transaction aborts.
*
- * However, fpr[] is the checkpointed 'base state' of FP regs, and
- * transact_fpr[] is the new set of transactional values.
- * VRs work the same way.
+ * These are analogous to how ckpt_regs and pt_regs work
*/
- struct thread_fp_state transact_fp;
- struct thread_vr_state transact_vr;
- unsigned long transact_vrsave;
+ struct thread_fp_state ckfp_state; /* Checkpointed FP state */
+ struct thread_vr_state ckvr_state; /* Checkpointed VR state */
+ unsigned long ckvrsave; /* Checkpointed VRSAVE */
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
void* kvm_shadow_vcpu; /* KVM internal data */
@@ -295,8 +231,18 @@ struct thread_struct {
#endif
#ifdef CONFIG_PPC64
unsigned long dscr;
+ unsigned long fscr;
+ /*
+ * This member element dscr_inherit indicates that the process
+ * has explicitly attempted and changed the DSCR register value
+ * for itself. Hence kernel wont use the default CPU DSCR value
+ * contained in the PACA structure anymore during process context
+ * switch. Once this variable is set, this behaviour will also be
+ * inherited to all the children of this process from that point
+ * onwards.
+ */
int dscr_inherit;
- unsigned long ppr; /* used to save/restore SMT priority */
+ unsigned long tidr;
#endif
#ifdef CONFIG_PPC_BOOK3S_64
unsigned long tar;
@@ -308,15 +254,21 @@ struct thread_struct {
unsigned long sier;
unsigned long mmcr2;
unsigned mmcr0;
+
unsigned used_ebb;
+ unsigned long mmcr3;
+ unsigned long sier2;
+ unsigned long sier3;
+ unsigned long hashkeyr;
+ unsigned long dexcr;
+ unsigned long dexcr_onexec; /* Reset value to load on exec */
#endif
};
#define ARCH_MIN_TASKALIGN 16
#define INIT_SP (sizeof(init_stack) + (unsigned long) &init_stack)
-#define INIT_SP_LIMIT \
- (_ALIGN_UP(sizeof(init_thread_info), 16) + (unsigned long) &init_stack)
+#define INIT_SP_LIMIT ((unsigned long)&init_stack)
#ifdef CONFIG_SPE
#define SPEFSCR_INIT \
@@ -326,34 +278,39 @@ struct thread_struct {
#define SPEFSCR_INIT
#endif
-#ifdef CONFIG_PPC32
+#ifdef CONFIG_PPC_BOOK3S_32
+#define SR0_INIT .sr0 = IS_ENABLED(CONFIG_PPC_KUEP) ? SR_NX : 0,
+#else
+#define SR0_INIT
+#endif
+
+#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP)
#define INIT_THREAD { \
.ksp = INIT_SP, \
- .ksp_limit = INIT_SP_LIMIT, \
- .fs = KERNEL_DS, \
.pgdir = swapper_pg_dir, \
+ .kuap = ~0UL, /* KUAP_NONE */ \
.fpexc_mode = MSR_FE0 | MSR_FE1, \
SPEFSCR_INIT \
+ SR0_INIT \
+}
+#elif defined(CONFIG_PPC32)
+#define INIT_THREAD { \
+ .ksp = INIT_SP, \
+ .pgdir = swapper_pg_dir, \
+ .fpexc_mode = MSR_FE0 | MSR_FE1, \
+ SPEFSCR_INIT \
+ SR0_INIT \
}
#else
#define INIT_THREAD { \
.ksp = INIT_SP, \
- .regs = (struct pt_regs *)INIT_SP - 1, /* XXX bogus, I think */ \
- .fs = KERNEL_DS, \
.fpexc_mode = 0, \
- .ppr = INIT_PPR, \
}
#endif
-/*
- * Return saved PC of a blocked thread. For now, this is the "user" PC
- */
-#define thread_saved_pc(tsk) \
- ((tsk)->thread.regs? (tsk)->thread.regs->nip: 0)
+#define task_pt_regs(tsk) ((tsk)->thread.regs)
-#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.regs)
-
-unsigned long get_wchan(struct task_struct *p);
+unsigned long __get_wchan(struct task_struct *p);
#define KSTK_EIP(tsk) ((tsk)->thread.regs? (tsk)->thread.regs->nip: 0)
#define KSTK_ESP(tsk) ((tsk)->thread.regs? (tsk)->thread.regs->gpr[1]: 0)
@@ -377,8 +334,16 @@ extern int set_endian(struct task_struct *tsk, unsigned int val);
extern int get_unalign_ctl(struct task_struct *tsk, unsigned long adr);
extern int set_unalign_ctl(struct task_struct *tsk, unsigned int val);
-extern void fp_enable(void);
-extern void vec_enable(void);
+#ifdef CONFIG_PPC_BOOK3S_64
+
+#define PPC_GET_DEXCR_ASPECT(tsk, asp) get_dexcr_prctl((tsk), (asp))
+#define PPC_SET_DEXCR_ASPECT(tsk, asp, val) set_dexcr_prctl((tsk), (asp), (val))
+
+int get_dexcr_prctl(struct task_struct *tsk, unsigned long asp);
+int set_dexcr_prctl(struct task_struct *tsk, unsigned long asp, unsigned long val);
+
+#endif
+
extern void load_fp_state(struct thread_fp_state *fp);
extern void store_fp_state(struct thread_fp_state *fp);
extern void load_vr_state(struct thread_vr_state *vr);
@@ -395,23 +360,45 @@ static inline unsigned long __pack_fe01(unsigned int fpmode)
}
#ifdef CONFIG_PPC64
-#define cpu_relax() do { HMT_low(); HMT_medium(); barrier(); } while (0)
-#else
-#define cpu_relax() barrier()
+
+#define spin_begin() \
+ asm volatile(ASM_FTR_IFCLR( \
+ "or 1,1,1", /* HMT_LOW */ \
+ "nop", /* v3.1 uses pause_short in cpu_relax instead */ \
+ %0) :: "i" (CPU_FTR_ARCH_31) : "memory")
+
+#define spin_cpu_relax() \
+ asm volatile(ASM_FTR_IFCLR( \
+ "nop", /* Before v3.1 use priority nops in spin_begin/end */ \
+ PPC_WAIT(2, 0), /* aka pause_short */ \
+ %0) :: "i" (CPU_FTR_ARCH_31) : "memory")
+
+#define spin_end() \
+ asm volatile(ASM_FTR_IFCLR( \
+ "or 2,2,2", /* HMT_MEDIUM */ \
+ "nop", \
+ %0) :: "i" (CPU_FTR_ARCH_31) : "memory")
+
#endif
-#define cpu_relax_lowlatency() cpu_relax()
+/*
+ * Check that a certain kernel stack pointer is a valid (minimum sized)
+ * stack frame in task_struct p.
+ */
+int validate_sp(unsigned long sp, struct task_struct *p);
-/* Check that a certain kernel stack pointer is valid in task_struct p */
-int validate_sp(unsigned long sp, struct task_struct *p,
- unsigned long nbytes);
+/*
+ * validate the stack frame of a particular minimum size, used for when we are
+ * looking at a certain object in the stack beyond the minimum.
+ */
+int validate_sp_size(unsigned long sp, struct task_struct *p,
+ unsigned long nbytes);
/*
* Prefetch macros.
*/
#define ARCH_HAS_PREFETCH
#define ARCH_HAS_PREFETCHW
-#define ARCH_HAS_SPINLOCK_PREFETCH
static inline void prefetch(const void *x)
{
@@ -429,37 +416,27 @@ static inline void prefetchw(const void *x)
__asm__ __volatile__ ("dcbtst 0,%0" : : "r" (x));
}
-#define spin_lock_prefetch(x) prefetchw(x)
-
-#define HAVE_ARCH_PICK_MMAP_LAYOUT
-
-#ifdef CONFIG_PPC64
-static inline unsigned long get_clean_sp(unsigned long sp, int is_32)
-{
- if (is_32)
- return sp & 0x0ffffffffUL;
- return sp;
-}
-#else
-static inline unsigned long get_clean_sp(unsigned long sp, int is_32)
-{
- return sp;
-}
+/* asm stubs */
+extern unsigned long isa300_idle_stop_noloss(unsigned long psscr_val);
+extern unsigned long isa300_idle_stop_mayloss(unsigned long psscr_val);
+extern unsigned long isa206_idle_insn_mayloss(unsigned long type);
+#ifdef CONFIG_PPC_970_NAP
+extern void power4_idle_nap(void);
+void power4_idle_nap_return(void);
#endif
extern unsigned long cpuidle_disable;
enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF};
extern int powersave_nap; /* set if nap mode can be used in idle loop */
-extern void power7_nap(int check_irq);
-extern void power7_sleep(void);
-extern void flush_instruction_cache(void);
-extern void hard_reset_now(void);
-extern void poweroff_now(void);
+
+extern void power7_idle_type(unsigned long type);
+extern void arch300_idle_type(unsigned long stop_psscr_val,
+ unsigned long stop_psscr_mask);
+void pnv_power9_force_smt4_catch(void);
+void pnv_power9_force_smt4_release(void);
+
extern int fix_alignment(struct pt_regs *);
-extern void cvt_fd(float *from, double *to);
-extern void cvt_df(double *from, float *to);
-extern void _nmask_and_or_msr(unsigned long nmask, unsigned long or_val);
#ifdef CONFIG_PPC64
/*
@@ -472,6 +449,16 @@ extern void _nmask_and_or_msr(unsigned long nmask, unsigned long or_val);
#define NET_IP_ALIGN 0
#endif
+int do_mathemu(struct pt_regs *regs);
+int do_spe_mathemu(struct pt_regs *regs);
+int speround_handler(struct pt_regs *regs);
+
+/* VMX copying */
+int enter_vmx_usercopy(void);
+int exit_vmx_usercopy(void);
+int enter_vmx_ops(void);
+void *exit_vmx_ops(void *dest);
+
#endif /* __KERNEL__ */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_POWERPC_PROCESSOR_H */
diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index 74b79f07f041..f679a11a7e7f 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _POWERPC_PROM_H
#define _POWERPC_PROM_H
#ifdef __KERNEL__
@@ -9,22 +10,14 @@
* Copyright (C) 1996-2005 Paul Mackerras.
*
* Updates for PPC64 by Peter Bergner & David Engebretsen, IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/types.h>
-#include <asm/irq.h>
-#include <linux/atomic.h>
+#include <asm/firmware.h>
+
+struct device_node;
+struct property;
-/* These includes should be removed once implicit includes are cleaned up. */
-#include <linux/of.h>
-#include <linux/of_fdt.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
-#include <linux/platform_device.h>
+#define MIN_RMA 768 /* Minimum RMA (in MB) for CAS negotiation */
#define OF_DT_BEGIN_NODE 0x1 /* Start of node, full name */
#define OF_DT_END_NODE 0x2 /* End node */
@@ -76,27 +69,25 @@ void of_parse_dma_window(struct device_node *dn, const __be32 *dma_window,
unsigned long *busno, unsigned long *phys,
unsigned long *size);
-extern void kdump_move_device_tree(void);
-
extern void of_instantiate_rtc(void);
extern int of_get_ibm_chip_id(struct device_node *np);
-/* The of_drconf_cell struct defines the layout of the LMB array
- * specified in the device tree property
- * ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory
- */
-struct of_drconf_cell {
- u64 base_addr;
- u32 drc_index;
- u32 reserved;
- u32 aa_index;
- u32 flags;
+struct of_drc_info {
+ char *drc_type;
+ char *drc_name_prefix;
+ u32 drc_index_start;
+ u32 drc_name_suffix_start;
+ u32 num_sequential_elems;
+ u32 sequential_inc;
+ u32 drc_power_domain;
+ u32 last_drc_index;
};
-#define DRCONF_MEM_ASSIGNED 0x00000008
-#define DRCONF_MEM_AI_INVALID 0x00000040
-#define DRCONF_MEM_RESERVED 0x00000080
+extern int of_read_drc_info_cell(struct property **prop,
+ const __be32 **curval, struct of_drc_info *data);
+
+extern unsigned int boot_cpu_node_count;
/*
* There are two methods for telling firmware what our capabilities are.
@@ -123,6 +114,9 @@ struct of_drconf_cell {
#define OV1_PPC_2_06 0x02 /* set if we support PowerPC 2.06 */
#define OV1_PPC_2_07 0x01 /* set if we support PowerPC 2.07 */
+#define OV1_PPC_3_00 0x80 /* set if we support PowerPC 3.00 */
+#define OV1_PPC_3_1 0x40 /* set if we support PowerPC 3.1 */
+
/* Option vector 2: Open Firmware options supported */
#define OV2_REAL_MODE 0x20 /* set if we want OF in real mode */
@@ -151,21 +145,36 @@ struct of_drconf_cell {
#define OV5_MSI 0x0201 /* PCIe/MSI support */
#define OV5_CMO 0x0480 /* Cooperative Memory Overcommitment */
#define OV5_XCMO 0x0440 /* Page Coalescing */
-#define OV5_TYPE1_AFFINITY 0x0580 /* Type 1 NUMA affinity */
+#define OV5_FORM1_AFFINITY 0x0580 /* FORM1 NUMA affinity */
#define OV5_PRRN 0x0540 /* Platform Resource Reassignment */
-#define OV5_PFO_HW_RNG 0x0E80 /* PFO Random Number Generator */
-#define OV5_PFO_HW_842 0x0E40 /* PFO Compression Accelerator */
-#define OV5_PFO_HW_ENCR 0x0E20 /* PFO Encryption Accelerator */
-#define OV5_SUB_PROCESSORS 0x0F01 /* 1,2,or 4 Sub-Processors supported */
+#define OV5_FORM2_AFFINITY 0x0520 /* Form2 NUMA affinity */
+#define OV5_HP_EVT 0x0604 /* Hot Plug Event support */
+#define OV5_RESIZE_HPT 0x0601 /* Hash Page Table resizing */
+#define OV5_PFO_HW_RNG 0x1180 /* PFO Random Number Generator */
+#define OV5_PFO_HW_842 0x1140 /* PFO Compression Accelerator */
+#define OV5_PFO_HW_ENCR 0x1120 /* PFO Encryption Accelerator */
+#define OV5_SUB_PROCESSORS 0x1501 /* 1,2,or 4 Sub-Processors supported */
+#define OV5_DRMEM_V2 0x1680 /* ibm,dynamic-reconfiguration-v2 */
+#define OV5_XIVE_SUPPORT 0x17C0 /* XIVE Exploitation Support Mask */
+#define OV5_XIVE_LEGACY 0x1700 /* XIVE legacy mode Only */
+#define OV5_XIVE_EXPLOIT 0x1740 /* XIVE exploitation mode Only */
+#define OV5_XIVE_EITHER 0x1780 /* XIVE legacy or exploitation mode */
+/* MMU Base Architecture */
+#define OV5_MMU_SUPPORT 0x18C0 /* MMU Mode Support Mask */
+#define OV5_MMU_HASH 0x1800 /* Hash MMU Only */
+#define OV5_MMU_RADIX 0x1840 /* Radix MMU Only */
+#define OV5_MMU_EITHER 0x1880 /* Hash or Radix Supported */
+#define OV5_MMU_DYNAMIC 0x18C0 /* Hash or Radix Can Switch Later */
+#define OV5_NMMU 0x1820 /* Nest MMU Available */
+/* Hash Table Extensions */
+#define OV5_HASH_SEG_TBL 0x1980 /* In Memory Segment Tables Available */
+#define OV5_HASH_GTSE 0x1940 /* Guest Translation Shoot Down Avail */
+/* Radix Table Extensions */
+#define OV5_RADIX_GTSE 0x1A40 /* Guest Translation Shoot Down Avail */
+#define OV5_DRC_INFO 0x1640 /* Redef Prop Structures: drc-info */
/* Option Vector 6: IBM PAPR hints */
#define OV6_LINUX 0x02 /* Linux is our OS */
-/*
- * The architecture vector has an array of PVR mask/value pairs,
- * followed by # option vectors - 1, followed by the option vectors.
- */
-extern unsigned char ibm_architecture_vec[];
-
#endif /* __KERNEL__ */
#endif /* _POWERPC_PROM_H */
diff --git a/arch/powerpc/include/asm/ps3.h b/arch/powerpc/include/asm/ps3.h
index a1bc7e758422..987e23a2bd28 100644
--- a/arch/powerpc/include/asm/ps3.h
+++ b/arch/powerpc/include/asm/ps3.h
@@ -1,21 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* PS3 platform declarations.
*
* Copyright (C) 2006 Sony Computer Entertainment Inc.
* Copyright 2006 Sony Corp.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#if !defined(_ASM_POWERPC_PS3_H)
@@ -83,6 +71,7 @@ struct ps3_dma_region_ops;
* @bus_addr: The 'translated' bus address of the region.
* @len: The length in bytes of the region.
* @offset: The offset from the start of memory of the region.
+ * @dma_mask: Device dma_mask.
* @ioid: The IOID of the device who owns this region
* @chunk_list: Opaque variable used by the ioc page manager.
* @region_ops: struct ps3_dma_region_ops - dma region operations
@@ -97,6 +86,7 @@ struct ps3_dma_region {
enum ps3_dma_region_type region_type;
unsigned long len;
unsigned long offset;
+ u64 dma_mask;
/* driver variables (set by ps3_dma_region_create) */
unsigned long bus_addr;
@@ -244,7 +234,7 @@ enum lv1_result {
static inline const char* ps3_result(int result)
{
-#if defined(DEBUG) || defined(PS3_VERBOSE_RESULT)
+#if defined(DEBUG) || defined(PS3_VERBOSE_RESULT) || defined(CONFIG_PS3_VERBOSE_RESULT)
switch (result) {
case LV1_SUCCESS:
return "LV1_SUCCESS (0)";
@@ -390,8 +380,8 @@ struct ps3_system_bus_driver {
enum ps3_match_sub_id match_sub_id;
struct device_driver core;
int (*probe)(struct ps3_system_bus_device *);
- int (*remove)(struct ps3_system_bus_device *);
- int (*shutdown)(struct ps3_system_bus_device *);
+ void (*remove)(struct ps3_system_bus_device *);
+ void (*shutdown)(struct ps3_system_bus_device *);
/* int (*suspend)(struct ps3_system_bus_device *, pm_message_t); */
/* int (*resume)(struct ps3_system_bus_device *); */
};
@@ -400,13 +390,9 @@ int ps3_system_bus_device_register(struct ps3_system_bus_device *dev);
int ps3_system_bus_driver_register(struct ps3_system_bus_driver *drv);
void ps3_system_bus_driver_unregister(struct ps3_system_bus_driver *drv);
-static inline struct ps3_system_bus_driver *ps3_drv_to_system_bus_drv(
- struct device_driver *_drv)
-{
- return container_of(_drv, struct ps3_system_bus_driver, core);
-}
+#define ps3_drv_to_system_bus_drv(_drv) container_of_const(_drv, struct ps3_system_bus_driver, core)
static inline struct ps3_system_bus_device *ps3_dev_to_system_bus_dev(
- struct device *_dev)
+ const struct device *_dev)
{
return container_of(_dev, struct ps3_system_bus_device, core);
}
@@ -435,10 +421,6 @@ static inline void *ps3_system_bus_get_drvdata(
return dev_get_drvdata(&dev->core);
}
-/* These two need global scope for get_dma_ops(). */
-
-extern struct bus_type ps3_system_bus_type;
-
/* system manager */
struct ps3_sys_manager_ops {
@@ -526,4 +508,12 @@ void ps3_sync_irq(int node);
u32 ps3_get_hw_thread_id(int cpu);
u64 ps3_get_spe_id(void *arg);
+void ps3_early_mm_init(void);
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_PS3GELIC
+void udbg_shutdown_ps3gelic(void);
+#else
+static inline void udbg_shutdown_ps3gelic(void) {}
+#endif
+
#endif
diff --git a/arch/powerpc/include/asm/ps3av.h b/arch/powerpc/include/asm/ps3av.h
index 0427b0b53d2d..c8b0f2ffcd35 100644
--- a/arch/powerpc/include/asm/ps3av.h
+++ b/arch/powerpc/include/asm/ps3av.h
@@ -1,21 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* PS3 AV backend support.
*
* Copyright (C) 2007 Sony Computer Entertainment Inc.
* Copyright 2007 Sony Corp.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _ASM_POWERPC_PS3AV_H_
@@ -104,7 +92,7 @@
#define PS3AV_CMD_AV_INPUTLEN_16 0x02
#define PS3AV_CMD_AV_INPUTLEN_20 0x0a
#define PS3AV_CMD_AV_INPUTLEN_24 0x0b
-/* alayout */
+/* av_layout */
#define PS3AV_CMD_AV_LAYOUT_32 (1 << 0)
#define PS3AV_CMD_AV_LAYOUT_44 (1 << 1)
#define PS3AV_CMD_AV_LAYOUT_48 (1 << 2)
@@ -738,6 +726,4 @@ extern int ps3av_video_mode2res(u32, u32 *, u32 *);
extern int ps3av_video_mute(int);
extern int ps3av_audio_mute(int);
extern int ps3av_audio_mute_analog(int);
-extern int ps3av_dev_open(void);
-extern int ps3av_dev_close(void);
#endif /* _ASM_POWERPC_PS3AV_H_ */
diff --git a/arch/powerpc/include/asm/ps3gpu.h b/arch/powerpc/include/asm/ps3gpu.h
index b2b89591907c..9645c30471b5 100644
--- a/arch/powerpc/include/asm/ps3gpu.h
+++ b/arch/powerpc/include/asm/ps3gpu.h
@@ -1,20 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* PS3 GPU declarations.
*
* Copyright 2009 Sony Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.
- * If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _ASM_POWERPC_PS3GPU_H
diff --git a/arch/powerpc/include/asm/ps3stor.h b/arch/powerpc/include/asm/ps3stor.h
index 6fcaf714fa50..1d8279014f22 100644
--- a/arch/powerpc/include/asm/ps3stor.h
+++ b/arch/powerpc/include/asm/ps3stor.h
@@ -1,21 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* PS3 Storage Devices
*
* Copyright (C) 2007 Sony Computer Entertainment Inc.
* Copyright 2007 Sony Corp.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published
- * by the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#ifndef _ASM_POWERPC_PS3STOR_H_
@@ -51,7 +39,7 @@ struct ps3_storage_device {
unsigned int num_regions;
unsigned long accessible_regions;
unsigned int region_idx; /* first accessible region */
- struct ps3_storage_region regions[0]; /* Must be last */
+ struct ps3_storage_region regions[]; /* Must be last */
};
static inline struct ps3_storage_device *to_ps3_storage_device(struct device *dev)
diff --git a/arch/powerpc/include/asm/pte-40x.h b/arch/powerpc/include/asm/pte-40x.h
deleted file mode 100644
index ec0b0b0d1df9..000000000000
--- a/arch/powerpc/include/asm/pte-40x.h
+++ /dev/null
@@ -1,65 +0,0 @@
-#ifndef _ASM_POWERPC_PTE_40x_H
-#define _ASM_POWERPC_PTE_40x_H
-#ifdef __KERNEL__
-
-/*
- * At present, all PowerPC 400-class processors share a similar TLB
- * architecture. The instruction and data sides share a unified,
- * 64-entry, fully-associative TLB which is maintained totally under
- * software control. In addition, the instruction side has a
- * hardware-managed, 4-entry, fully-associative TLB which serves as a
- * first level to the shared TLB. These two TLBs are known as the UTLB
- * and ITLB, respectively (see "mmu.h" for definitions).
- *
- * There are several potential gotchas here. The 40x hardware TLBLO
- * field looks like this:
- *
- * 0 1 2 3 4 ... 18 19 20 21 22 23 24 25 26 27 28 29 30 31
- * RPN..................... 0 0 EX WR ZSEL....... W I M G
- *
- * Where possible we make the Linux PTE bits match up with this
- *
- * - bits 20 and 21 must be cleared, because we use 4k pages (40x can
- * support down to 1k pages), this is done in the TLBMiss exception
- * handler.
- * - We use only zones 0 (for kernel pages) and 1 (for user pages)
- * of the 16 available. Bit 24-26 of the TLB are cleared in the TLB
- * miss handler. Bit 27 is PAGE_USER, thus selecting the correct
- * zone.
- * - PRESENT *must* be in the bottom two bits because swap cache
- * entries use the top 30 bits. Because 40x doesn't support SMP
- * anyway, M is irrelevant so we borrow it for PAGE_PRESENT. Bit 30
- * is cleared in the TLB miss handler before the TLB entry is loaded.
- * - All other bits of the PTE are loaded into TLBLO without
- * modification, leaving us only the bits 20, 21, 24, 25, 26, 30 for
- * software PTE bits. We actually use use bits 21, 24, 25, and
- * 30 respectively for the software bits: ACCESSED, DIRTY, RW, and
- * PRESENT.
- */
-
-#define _PAGE_GUARDED 0x001 /* G: page is guarded from prefetch */
-#define _PAGE_FILE 0x001 /* when !present: nonlinear file mapping */
-#define _PAGE_PRESENT 0x002 /* software: PTE contains a translation */
-#define _PAGE_NO_CACHE 0x004 /* I: caching is inhibited */
-#define _PAGE_WRITETHRU 0x008 /* W: caching is write-through */
-#define _PAGE_USER 0x010 /* matches one of the zone permission bits */
-#define _PAGE_SPECIAL 0x020 /* software: Special page */
-#define _PAGE_RW 0x040 /* software: Writes permitted */
-#define _PAGE_DIRTY 0x080 /* software: dirty page */
-#define _PAGE_HWWRITE 0x100 /* hardware: Dirty & RW, set in exception */
-#define _PAGE_EXEC 0x200 /* hardware: EX permission */
-#define _PAGE_ACCESSED 0x400 /* software: R: page referenced */
-
-#define _PMD_PRESENT 0x400 /* PMD points to page of PTEs */
-#define _PMD_BAD 0x802
-#define _PMD_SIZE 0x0e0 /* size field, != 0 for large-page PMD entry */
-#define _PMD_SIZE_4M 0x0c0
-#define _PMD_SIZE_16M 0x0e0
-
-#define PMD_PAGE_SIZE(pmdval) (1024 << (((pmdval) & _PMD_SIZE) >> 4))
-
-/* Until my rework is finished, 40x still needs atomic PTE updates */
-#define PTE_ATOMIC_UPDATES 1
-
-#endif /* __KERNEL__ */
-#endif /* _ASM_POWERPC_PTE_40x_H */
diff --git a/arch/powerpc/include/asm/pte-8xx.h b/arch/powerpc/include/asm/pte-8xx.h
deleted file mode 100644
index d44826e4ff97..000000000000
--- a/arch/powerpc/include/asm/pte-8xx.h
+++ /dev/null
@@ -1,67 +0,0 @@
-#ifndef _ASM_POWERPC_PTE_8xx_H
-#define _ASM_POWERPC_PTE_8xx_H
-#ifdef __KERNEL__
-
-/*
- * The PowerPC MPC8xx uses a TLB with hardware assisted, software tablewalk.
- * We also use the two level tables, but we can put the real bits in them
- * needed for the TLB and tablewalk. These definitions require Mx_CTR.PPM = 0,
- * Mx_CTR.PPCS = 0, and MD_CTR.TWAM = 1. The level 2 descriptor has
- * additional page protection (when Mx_CTR.PPCS = 1) that allows TLB hit
- * based upon user/super access. The TLB does not have accessed nor write
- * protect. We assume that if the TLB get loaded with an entry it is
- * accessed, and overload the changed bit for write protect. We use
- * two bits in the software pte that are supposed to be set to zero in
- * the TLB entry (24 and 25) for these indicators. Although the level 1
- * descriptor contains the guarded and writethrough/copyback bits, we can
- * set these at the page level since they get copied from the Mx_TWC
- * register when the TLB entry is loaded. We will use bit 27 for guard, since
- * that is where it exists in the MD_TWC, and bit 26 for writethrough.
- * These will get masked from the level 2 descriptor at TLB load time, and
- * copied to the MD_TWC before it gets loaded.
- * Large page sizes added. We currently support two sizes, 4K and 8M.
- * This also allows a TLB hander optimization because we can directly
- * load the PMD into MD_TWC. The 8M pages are only used for kernel
- * mapping of well known areas. The PMD (PGD) entries contain control
- * flags in addition to the address, so care must be taken that the
- * software no longer assumes these are only pointers.
- */
-
-/* Definitions for 8xx embedded chips. */
-#define _PAGE_PRESENT 0x0001 /* Page is valid */
-#define _PAGE_FILE 0x0002 /* when !present: nonlinear file mapping */
-#define _PAGE_NO_CACHE 0x0002 /* I: cache inhibit */
-#define _PAGE_SHARED 0x0004 /* No ASID (context) compare */
-#define _PAGE_SPECIAL 0x0008 /* SW entry, forced to 0 by the TLB miss */
-#define _PAGE_DIRTY 0x0100 /* C: page changed */
-
-/* These 4 software bits must be masked out when the entry is loaded
- * into the TLB, 1 SW bit left(0x0080).
- */
-#define _PAGE_GUARDED 0x0010 /* software: guarded access */
-#define _PAGE_ACCESSED 0x0020 /* software: page referenced */
-#define _PAGE_WRITETHRU 0x0040 /* software: caching is write through */
-
-/* Setting any bits in the nibble with the follow two controls will
- * require a TLB exception handler change. It is assumed unused bits
- * are always zero.
- */
-#define _PAGE_RW 0x0400 /* lsb PP bits, inverted in HW */
-#define _PAGE_USER 0x0800 /* msb PP bits */
-
-#define _PMD_PRESENT 0x0001
-#define _PMD_BAD 0x0ff0
-#define _PMD_PAGE_MASK 0x000c
-#define _PMD_PAGE_8M 0x000c
-
-#define _PTE_NONE_MASK _PAGE_ACCESSED
-
-/* Until my rework is finished, 8xx still needs atomic PTE updates */
-#define PTE_ATOMIC_UPDATES 1
-
-/* We need to add _PAGE_SHARED to kernel pages */
-#define _PAGE_KERNEL_RO (_PAGE_SHARED)
-#define _PAGE_KERNEL_RW (_PAGE_DIRTY | _PAGE_RW | _PAGE_HWWRITE)
-
-#endif /* __KERNEL__ */
-#endif /* _ASM_POWERPC_PTE_8xx_H */
diff --git a/arch/powerpc/include/asm/pte-book3e.h b/arch/powerpc/include/asm/pte-book3e.h
deleted file mode 100644
index 576ad88104cb..000000000000
--- a/arch/powerpc/include/asm/pte-book3e.h
+++ /dev/null
@@ -1,87 +0,0 @@
-#ifndef _ASM_POWERPC_PTE_BOOK3E_H
-#define _ASM_POWERPC_PTE_BOOK3E_H
-#ifdef __KERNEL__
-
-/* PTE bit definitions for processors compliant to the Book3E
- * architecture 2.06 or later. The position of the PTE bits
- * matches the HW definition of the optional Embedded Page Table
- * category.
- */
-
-/* Architected bits */
-#define _PAGE_PRESENT 0x000001 /* software: pte contains a translation */
-#define _PAGE_FILE 0x000002 /* (!present only) software: pte holds file offset */
-#define _PAGE_SW1 0x000002
-#define _PAGE_BAP_SR 0x000004
-#define _PAGE_BAP_UR 0x000008
-#define _PAGE_BAP_SW 0x000010
-#define _PAGE_BAP_UW 0x000020
-#define _PAGE_BAP_SX 0x000040
-#define _PAGE_BAP_UX 0x000080
-#define _PAGE_PSIZE_MSK 0x000f00
-#define _PAGE_PSIZE_4K 0x000200
-#define _PAGE_PSIZE_8K 0x000300
-#define _PAGE_PSIZE_16K 0x000400
-#define _PAGE_PSIZE_32K 0x000500
-#define _PAGE_PSIZE_64K 0x000600
-#define _PAGE_PSIZE_128K 0x000700
-#define _PAGE_PSIZE_256K 0x000800
-#define _PAGE_PSIZE_512K 0x000900
-#define _PAGE_PSIZE_1M 0x000a00
-#define _PAGE_PSIZE_2M 0x000b00
-#define _PAGE_PSIZE_4M 0x000c00
-#define _PAGE_PSIZE_8M 0x000d00
-#define _PAGE_PSIZE_16M 0x000e00
-#define _PAGE_PSIZE_32M 0x000f00
-#define _PAGE_DIRTY 0x001000 /* C: page changed */
-#define _PAGE_SW0 0x002000
-#define _PAGE_U3 0x004000
-#define _PAGE_U2 0x008000
-#define _PAGE_U1 0x010000
-#define _PAGE_U0 0x020000
-#define _PAGE_ACCESSED 0x040000
-#define _PAGE_ENDIAN 0x080000
-#define _PAGE_GUARDED 0x100000
-#define _PAGE_COHERENT 0x200000 /* M: enforce memory coherence */
-#define _PAGE_NO_CACHE 0x400000 /* I: cache inhibit */
-#define _PAGE_WRITETHRU 0x800000 /* W: cache write-through */
-
-/* "Higher level" linux bit combinations */
-#define _PAGE_EXEC _PAGE_BAP_UX /* .. and was cache cleaned */
-#define _PAGE_RW (_PAGE_BAP_SW | _PAGE_BAP_UW) /* User write permission */
-#define _PAGE_KERNEL_RW (_PAGE_BAP_SW | _PAGE_BAP_SR | _PAGE_DIRTY)
-#define _PAGE_KERNEL_RO (_PAGE_BAP_SR)
-#define _PAGE_KERNEL_RWX (_PAGE_BAP_SW | _PAGE_BAP_SR | _PAGE_DIRTY | _PAGE_BAP_SX)
-#define _PAGE_KERNEL_ROX (_PAGE_BAP_SR | _PAGE_BAP_SX)
-#define _PAGE_USER (_PAGE_BAP_UR | _PAGE_BAP_SR) /* Can be read */
-
-#define _PAGE_HASHPTE 0
-#define _PAGE_BUSY 0
-
-#define _PAGE_SPECIAL _PAGE_SW0
-
-/* Flags to be preserved on PTE modifications */
-#define _PAGE_HPTEFLAGS _PAGE_BUSY
-
-/* Base page size */
-#ifdef CONFIG_PPC_64K_PAGES
-#define _PAGE_PSIZE _PAGE_PSIZE_64K
-#define PTE_RPN_SHIFT (28)
-#else
-#define _PAGE_PSIZE _PAGE_PSIZE_4K
-#define PTE_RPN_SHIFT (24)
-#endif
-
-#define PTE_WIMGE_SHIFT (19)
-#define PTE_BAP_SHIFT (2)
-
-/* On 32-bit, we never clear the top part of the PTE */
-#ifdef CONFIG_PPC32
-#define _PTE_NONE_MASK 0xffffffff00000000ULL
-#define _PMD_PRESENT 0
-#define _PMD_PRESENT_MASK (PAGE_MASK)
-#define _PMD_BAD (~PAGE_MASK)
-#endif
-
-#endif /* __KERNEL__ */
-#endif /* _ASM_POWERPC_PTE_FSL_BOOKE_H */
diff --git a/arch/powerpc/include/asm/pte-common.h b/arch/powerpc/include/asm/pte-common.h
deleted file mode 100644
index 8d1569c29042..000000000000
--- a/arch/powerpc/include/asm/pte-common.h
+++ /dev/null
@@ -1,187 +0,0 @@
-/* Included from asm/pgtable-*.h only ! */
-
-/*
- * Some bits are only used on some cpu families... Make sure that all
- * the undefined gets a sensible default
- */
-#ifndef _PAGE_HASHPTE
-#define _PAGE_HASHPTE 0
-#endif
-#ifndef _PAGE_SHARED
-#define _PAGE_SHARED 0
-#endif
-#ifndef _PAGE_HWWRITE
-#define _PAGE_HWWRITE 0
-#endif
-#ifndef _PAGE_EXEC
-#define _PAGE_EXEC 0
-#endif
-#ifndef _PAGE_ENDIAN
-#define _PAGE_ENDIAN 0
-#endif
-#ifndef _PAGE_COHERENT
-#define _PAGE_COHERENT 0
-#endif
-#ifndef _PAGE_WRITETHRU
-#define _PAGE_WRITETHRU 0
-#endif
-#ifndef _PAGE_4K_PFN
-#define _PAGE_4K_PFN 0
-#endif
-#ifndef _PAGE_SAO
-#define _PAGE_SAO 0
-#endif
-#ifndef _PAGE_PSIZE
-#define _PAGE_PSIZE 0
-#endif
-#ifndef _PMD_PRESENT_MASK
-#define _PMD_PRESENT_MASK _PMD_PRESENT
-#endif
-#ifndef _PMD_SIZE
-#define _PMD_SIZE 0
-#define PMD_PAGE_SIZE(pmd) bad_call_to_PMD_PAGE_SIZE()
-#endif
-#ifndef _PAGE_KERNEL_RO
-#define _PAGE_KERNEL_RO 0
-#endif
-#ifndef _PAGE_KERNEL_ROX
-#define _PAGE_KERNEL_ROX (_PAGE_EXEC)
-#endif
-#ifndef _PAGE_KERNEL_RW
-#define _PAGE_KERNEL_RW (_PAGE_DIRTY | _PAGE_RW | _PAGE_HWWRITE)
-#endif
-#ifndef _PAGE_KERNEL_RWX
-#define _PAGE_KERNEL_RWX (_PAGE_DIRTY | _PAGE_RW | _PAGE_HWWRITE | _PAGE_EXEC)
-#endif
-#ifndef _PAGE_HPTEFLAGS
-#define _PAGE_HPTEFLAGS _PAGE_HASHPTE
-#endif
-#ifndef _PTE_NONE_MASK
-#define _PTE_NONE_MASK _PAGE_HPTEFLAGS
-#endif
-
-/* Make sure we get a link error if PMD_PAGE_SIZE is ever called on a
- * kernel without large page PMD support
- */
-#ifndef __ASSEMBLY__
-extern unsigned long bad_call_to_PMD_PAGE_SIZE(void);
-#endif /* __ASSEMBLY__ */
-
-/* Location of the PFN in the PTE. Most 32-bit platforms use the same
- * as _PAGE_SHIFT here (ie, naturally aligned).
- * Platform who don't just pre-define the value so we don't override it here
- */
-#ifndef PTE_RPN_SHIFT
-#define PTE_RPN_SHIFT (PAGE_SHIFT)
-#endif
-
-/* The mask convered by the RPN must be a ULL on 32-bit platforms with
- * 64-bit PTEs
- */
-#if defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT)
-#define PTE_RPN_MAX (1ULL << (64 - PTE_RPN_SHIFT))
-#define PTE_RPN_MASK (~((1ULL<<PTE_RPN_SHIFT)-1))
-#else
-#define PTE_RPN_MAX (1UL << (32 - PTE_RPN_SHIFT))
-#define PTE_RPN_MASK (~((1UL<<PTE_RPN_SHIFT)-1))
-#endif
-
-/* _PAGE_CHG_MASK masks of bits that are to be preserved across
- * pgprot changes
- */
-#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
- _PAGE_ACCESSED | _PAGE_SPECIAL)
-
-/* Mask of bits returned by pte_pgprot() */
-#define PAGE_PROT_BITS (_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \
- _PAGE_WRITETHRU | _PAGE_ENDIAN | _PAGE_4K_PFN | \
- _PAGE_USER | _PAGE_ACCESSED | \
- _PAGE_RW | _PAGE_HWWRITE | _PAGE_DIRTY | _PAGE_EXEC)
-
-/*
- * We define 2 sets of base prot bits, one for basic pages (ie,
- * cacheable kernel and user pages) and one for non cacheable
- * pages. We always set _PAGE_COHERENT when SMP is enabled or
- * the processor might need it for DMA coherency.
- */
-#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_PSIZE)
-#if defined(CONFIG_SMP) || defined(CONFIG_PPC_STD_MMU)
-#define _PAGE_BASE (_PAGE_BASE_NC | _PAGE_COHERENT)
-#else
-#define _PAGE_BASE (_PAGE_BASE_NC)
-#endif
-
-/* Permission masks used to generate the __P and __S table,
- *
- * Note:__pgprot is defined in arch/powerpc/include/asm/page.h
- *
- * Write permissions imply read permissions for now (we could make write-only
- * pages on BookE but we don't bother for now). Execute permission control is
- * possible on platforms that define _PAGE_EXEC
- *
- * Note due to the way vm flags are laid out, the bits are XWR
- */
-#define PAGE_NONE __pgprot(_PAGE_BASE)
-#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
-#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | _PAGE_EXEC)
-#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER)
-#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
-#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER)
-#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
-
-#define __P000 PAGE_NONE
-#define __P001 PAGE_READONLY
-#define __P010 PAGE_COPY
-#define __P011 PAGE_COPY
-#define __P100 PAGE_READONLY_X
-#define __P101 PAGE_READONLY_X
-#define __P110 PAGE_COPY_X
-#define __P111 PAGE_COPY_X
-
-#define __S000 PAGE_NONE
-#define __S001 PAGE_READONLY
-#define __S010 PAGE_SHARED
-#define __S011 PAGE_SHARED
-#define __S100 PAGE_READONLY_X
-#define __S101 PAGE_READONLY_X
-#define __S110 PAGE_SHARED_X
-#define __S111 PAGE_SHARED_X
-
-/* Permission masks used for kernel mappings */
-#define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
-#define PAGE_KERNEL_NC __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
- _PAGE_NO_CACHE)
-#define PAGE_KERNEL_NCG __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
- _PAGE_NO_CACHE | _PAGE_GUARDED)
-#define PAGE_KERNEL_X __pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX)
-#define PAGE_KERNEL_RO __pgprot(_PAGE_BASE | _PAGE_KERNEL_RO)
-#define PAGE_KERNEL_ROX __pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX)
-
-/* Protection used for kernel text. We want the debuggers to be able to
- * set breakpoints anywhere, so don't write protect the kernel text
- * on platforms where such control is possible.
- */
-#if defined(CONFIG_KGDB) || defined(CONFIG_XMON) || defined(CONFIG_BDI_SWITCH) ||\
- defined(CONFIG_KPROBES) || defined(CONFIG_DYNAMIC_FTRACE)
-#define PAGE_KERNEL_TEXT PAGE_KERNEL_X
-#else
-#define PAGE_KERNEL_TEXT PAGE_KERNEL_ROX
-#endif
-
-/* Make modules code happy. We don't set RO yet */
-#define PAGE_KERNEL_EXEC PAGE_KERNEL_X
-
-/*
- * Don't just check for any non zero bits in __PAGE_USER, since for book3e
- * and PTE_64BIT, PAGE_KERNEL_X contains _PAGE_BAP_SR which is also in
- * _PAGE_USER. Need to explicitly match _PAGE_BAP_UR bit in that case too.
- */
-#define pte_user(val) ((val & _PAGE_USER) == _PAGE_USER)
-
-/* Advertise special mapping type for AGP */
-#define PAGE_AGP (PAGE_KERNEL_NC)
-#define HAVE_PAGE_AGP
-
-/* Advertise support for _PAGE_SPECIAL */
-#define __HAVE_ARCH_PTE_SPECIAL
-
diff --git a/arch/powerpc/include/asm/pte-fsl-booke.h b/arch/powerpc/include/asm/pte-fsl-booke.h
deleted file mode 100644
index e84dd7ed505e..000000000000
--- a/arch/powerpc/include/asm/pte-fsl-booke.h
+++ /dev/null
@@ -1,43 +0,0 @@
-#ifndef _ASM_POWERPC_PTE_FSL_BOOKE_H
-#define _ASM_POWERPC_PTE_FSL_BOOKE_H
-#ifdef __KERNEL__
-
-/* PTE bit definitions for Freescale BookE SW loaded TLB MMU based
- * processors
- *
- MMU Assist Register 3:
-
- 32 33 34 35 36 ... 50 51 52 53 54 55 56 57 58 59 60 61 62 63
- RPN...................... 0 0 U0 U1 U2 U3 UX SX UW SW UR SR
-
- - PRESENT *must* be in the bottom three bits because swap cache
- entries use the top 29 bits.
-
- - FILE *must* be in the bottom three bits because swap cache
- entries use the top 29 bits.
-*/
-
-/* Definitions for FSL Book-E Cores */
-#define _PAGE_PRESENT 0x00001 /* S: PTE contains a translation */
-#define _PAGE_USER 0x00002 /* S: User page (maps to UR) */
-#define _PAGE_FILE 0x00002 /* S: when !present: nonlinear file mapping */
-#define _PAGE_RW 0x00004 /* S: Write permission (SW) */
-#define _PAGE_DIRTY 0x00008 /* S: Page dirty */
-#define _PAGE_EXEC 0x00010 /* H: SX permission */
-#define _PAGE_ACCESSED 0x00020 /* S: Page referenced */
-
-#define _PAGE_ENDIAN 0x00040 /* H: E bit */
-#define _PAGE_GUARDED 0x00080 /* H: G bit */
-#define _PAGE_COHERENT 0x00100 /* H: M bit */
-#define _PAGE_NO_CACHE 0x00200 /* H: I bit */
-#define _PAGE_WRITETHRU 0x00400 /* H: W bit */
-#define _PAGE_SPECIAL 0x00800 /* S: Special page */
-
-#define _PMD_PRESENT 0
-#define _PMD_PRESENT_MASK (PAGE_MASK)
-#define _PMD_BAD (~PAGE_MASK)
-
-#define PTE_WIMGE_SHIFT (6)
-
-#endif /* __KERNEL__ */
-#endif /* _ASM_POWERPC_PTE_FSL_BOOKE_H */
diff --git a/arch/powerpc/include/asm/pte-hash32.h b/arch/powerpc/include/asm/pte-hash32.h
deleted file mode 100644
index 4aad4132d0a8..000000000000
--- a/arch/powerpc/include/asm/pte-hash32.h
+++ /dev/null
@@ -1,47 +0,0 @@
-#ifndef _ASM_POWERPC_PTE_HASH32_H
-#define _ASM_POWERPC_PTE_HASH32_H
-#ifdef __KERNEL__
-
-/*
- * The "classic" 32-bit implementation of the PowerPC MMU uses a hash
- * table containing PTEs, together with a set of 16 segment registers,
- * to define the virtual to physical address mapping.
- *
- * We use the hash table as an extended TLB, i.e. a cache of currently
- * active mappings. We maintain a two-level page table tree, much
- * like that used by the i386, for the sake of the Linux memory
- * management code. Low-level assembler code in hash_low_32.S
- * (procedure hash_page) is responsible for extracting ptes from the
- * tree and putting them into the hash table when necessary, and
- * updating the accessed and modified bits in the page table tree.
- */
-
-#define _PAGE_PRESENT 0x001 /* software: pte contains a translation */
-#define _PAGE_HASHPTE 0x002 /* hash_page has made an HPTE for this pte */
-#define _PAGE_FILE 0x004 /* when !present: nonlinear file mapping */
-#define _PAGE_USER 0x004 /* usermode access allowed */
-#define _PAGE_GUARDED 0x008 /* G: prohibit speculative access */
-#define _PAGE_COHERENT 0x010 /* M: enforce memory coherence (SMP systems) */
-#define _PAGE_NO_CACHE 0x020 /* I: cache inhibit */
-#define _PAGE_WRITETHRU 0x040 /* W: cache write-through */
-#define _PAGE_DIRTY 0x080 /* C: page changed */
-#define _PAGE_ACCESSED 0x100 /* R: page referenced */
-#define _PAGE_RW 0x400 /* software: user write access allowed */
-#define _PAGE_SPECIAL 0x800 /* software: Special page */
-
-#ifdef CONFIG_PTE_64BIT
-/* We never clear the high word of the pte */
-#define _PTE_NONE_MASK (0xffffffff00000000ULL | _PAGE_HASHPTE)
-#else
-#define _PTE_NONE_MASK _PAGE_HASHPTE
-#endif
-
-#define _PMD_PRESENT 0
-#define _PMD_PRESENT_MASK (PAGE_MASK)
-#define _PMD_BAD (~PAGE_MASK)
-
-/* Hash table based platforms need atomic updates of the linux PTE */
-#define PTE_ATOMIC_UPDATES 1
-
-#endif /* __KERNEL__ */
-#endif /* _ASM_POWERPC_PTE_HASH32_H */
diff --git a/arch/powerpc/include/asm/pte-hash64-4k.h b/arch/powerpc/include/asm/pte-hash64-4k.h
deleted file mode 100644
index c134e809aac3..000000000000
--- a/arch/powerpc/include/asm/pte-hash64-4k.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* To be include by pgtable-hash64.h only */
-
-/* PTE bits */
-#define _PAGE_HASHPTE 0x0400 /* software: pte has an associated HPTE */
-#define _PAGE_SECONDARY 0x8000 /* software: HPTE is in secondary group */
-#define _PAGE_GROUP_IX 0x7000 /* software: HPTE index within group */
-#define _PAGE_F_SECOND _PAGE_SECONDARY
-#define _PAGE_F_GIX _PAGE_GROUP_IX
-#define _PAGE_SPECIAL 0x10000 /* software: special page */
-
-/* PTE flags to conserve for HPTE identification */
-#define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_HASHPTE | \
- _PAGE_SECONDARY | _PAGE_GROUP_IX)
-
-/* shift to put page number into pte */
-#define PTE_RPN_SHIFT (17)
-
diff --git a/arch/powerpc/include/asm/pte-hash64-64k.h b/arch/powerpc/include/asm/pte-hash64-64k.h
deleted file mode 100644
index 4f4ec2ab45c9..000000000000
--- a/arch/powerpc/include/asm/pte-hash64-64k.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/* To be include by pgtable-hash64.h only */
-
-/* Additional PTE bits (don't change without checking asm in hash_low.S) */
-#define _PAGE_SPECIAL 0x00000400 /* software: special page */
-#define _PAGE_HPTE_SUB 0x0ffff000 /* combo only: sub pages HPTE bits */
-#define _PAGE_HPTE_SUB0 0x08000000 /* combo only: first sub page */
-#define _PAGE_COMBO 0x10000000 /* this is a combo 4k page */
-#define _PAGE_4K_PFN 0x20000000 /* PFN is for a single 4k page */
-
-/* For 64K page, we don't have a separate _PAGE_HASHPTE bit. Instead,
- * we set that to be the whole sub-bits mask. The C code will only
- * test this, so a multi-bit mask will work. For combo pages, this
- * is equivalent as effectively, the old _PAGE_HASHPTE was an OR of
- * all the sub bits. For real 64k pages, we now have the assembly set
- * _PAGE_HPTE_SUB0 in addition to setting the HIDX bits which overlap
- * that mask. This is fine as long as the HIDX bits are never set on
- * a PTE that isn't hashed, which is the case today.
- *
- * A little nit is for the huge page C code, which does the hashing
- * in C, we need to provide which bit to use.
- */
-#define _PAGE_HASHPTE _PAGE_HPTE_SUB
-
-/* Note the full page bits must be in the same location as for normal
- * 4k pages as the same assembly will be used to insert 64K pages
- * whether the kernel has CONFIG_PPC_64K_PAGES or not
- */
-#define _PAGE_F_SECOND 0x00008000 /* full page: hidx bits */
-#define _PAGE_F_GIX 0x00007000 /* full page: hidx bits */
-
-/* PTE flags to conserve for HPTE identification */
-#define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_HASHPTE | _PAGE_COMBO)
-
-/* Shift to put page number into pte.
- *
- * That gives us a max RPN of 34 bits, which means a max of 50 bits
- * of addressable physical space, or 46 bits for the special 4k PFNs.
- */
-#define PTE_RPN_SHIFT (30)
-
-#ifndef __ASSEMBLY__
-
-/*
- * With 64K pages on hash table, we have a special PTE format that
- * uses a second "half" of the page table to encode sub-page information
- * in order to deal with 64K made of 4K HW pages. Thus we override the
- * generic accessors and iterators here
- */
-#define __real_pte __real_pte
-static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep)
-{
- real_pte_t rpte;
-
- rpte.pte = pte;
- rpte.hidx = 0;
- if (pte_val(pte) & _PAGE_COMBO) {
- /*
- * Make sure we order the hidx load against the _PAGE_COMBO
- * check. The store side ordering is done in __hash_page_4K
- */
- smp_rmb();
- rpte.hidx = pte_val(*((ptep) + PTRS_PER_PTE));
- }
- return rpte;
-}
-
-static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index)
-{
- if ((pte_val(rpte.pte) & _PAGE_COMBO))
- return (rpte.hidx >> (index<<2)) & 0xf;
- return (pte_val(rpte.pte) >> 12) & 0xf;
-}
-
-#define __rpte_to_pte(r) ((r).pte)
-#define __rpte_sub_valid(rpte, index) \
- (pte_val(rpte.pte) & (_PAGE_HPTE_SUB0 >> (index)))
-
-/* Trick: we set __end to va + 64k, which happens works for
- * a 16M page as well as we want only one iteration
- */
-#define pte_iterate_hashed_subpages(rpte, psize, vpn, index, shift) \
- do { \
- unsigned long __end = vpn + (1UL << (PAGE_SHIFT - VPN_SHIFT)); \
- unsigned __split = (psize == MMU_PAGE_4K || \
- psize == MMU_PAGE_64K_AP); \
- shift = mmu_psize_defs[psize].shift; \
- for (index = 0; vpn < __end; index++, \
- vpn += (1L << (shift - VPN_SHIFT))) { \
- if (!__split || __rpte_sub_valid(rpte, index)) \
- do {
-
-#define pte_iterate_hashed_end() } while(0); } } while(0)
-
-#define pte_pagesize_index(mm, addr, pte) \
- (((pte) & _PAGE_COMBO)? MMU_PAGE_4K: MMU_PAGE_64K)
-
-#define remap_4k_pfn(vma, addr, pfn, prot) \
- (WARN_ON(((pfn) >= (1UL << (64 - PTE_RPN_SHIFT)))) ? -EINVAL : \
- remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE, \
- __pgprot(pgprot_val((prot)) | _PAGE_4K_PFN)))
-
-#endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/pte-hash64.h b/arch/powerpc/include/asm/pte-hash64.h
deleted file mode 100644
index 2505d8eab15c..000000000000
--- a/arch/powerpc/include/asm/pte-hash64.h
+++ /dev/null
@@ -1,60 +0,0 @@
-#ifndef _ASM_POWERPC_PTE_HASH64_H
-#define _ASM_POWERPC_PTE_HASH64_H
-#ifdef __KERNEL__
-
-/*
- * Common bits between 4K and 64K pages in a linux-style PTE.
- * These match the bits in the (hardware-defined) PowerPC PTE as closely
- * as possible. Additional bits may be defined in pgtable-hash64-*.h
- *
- * Note: We only support user read/write permissions. Supervisor always
- * have full read/write to pages above PAGE_OFFSET (pages below that
- * always use the user access permissions).
- *
- * We could create separate kernel read-only if we used the 3 PP bits
- * combinations that newer processors provide but we currently don't.
- */
-#define _PAGE_PRESENT 0x0001 /* software: pte contains a translation */
-#define _PAGE_USER 0x0002 /* matches one of the PP bits */
-#define _PAGE_FILE 0x0002 /* (!present only) software: pte holds file offset */
-#define _PAGE_EXEC 0x0004 /* No execute on POWER4 and newer (we invert) */
-#define _PAGE_GUARDED 0x0008
-/* We can derive Memory coherence from _PAGE_NO_CACHE */
-#define _PAGE_NO_CACHE 0x0020 /* I: cache inhibit */
-#define _PAGE_WRITETHRU 0x0040 /* W: cache write-through */
-#define _PAGE_DIRTY 0x0080 /* C: page changed */
-#define _PAGE_ACCESSED 0x0100 /* R: page referenced */
-#define _PAGE_RW 0x0200 /* software: user write access allowed */
-#define _PAGE_BUSY 0x0800 /* software: PTE & hash are busy */
-
-/*
- * Used for tracking numa faults
- */
-#define _PAGE_NUMA 0x00000010 /* Gather numa placement stats */
-
-
-/* No separate kernel read-only */
-#define _PAGE_KERNEL_RW (_PAGE_RW | _PAGE_DIRTY) /* user access blocked by key */
-#define _PAGE_KERNEL_RO _PAGE_KERNEL_RW
-
-/* Strong Access Ordering */
-#define _PAGE_SAO (_PAGE_WRITETHRU | _PAGE_NO_CACHE | _PAGE_COHERENT)
-
-/* No page size encoding in the linux PTE */
-#define _PAGE_PSIZE 0
-
-/* PTEIDX nibble */
-#define _PTEIDX_SECONDARY 0x8
-#define _PTEIDX_GROUP_IX 0x7
-
-/* Hash table based platforms need atomic updates of the linux PTE */
-#define PTE_ATOMIC_UPDATES 1
-
-#ifdef CONFIG_PPC_64K_PAGES
-#include <asm/pte-hash64-64k.h>
-#else
-#include <asm/pte-hash64-4k.h>
-#endif
-
-#endif /* __KERNEL__ */
-#endif /* _ASM_POWERPC_PTE_HASH64_H */
diff --git a/arch/powerpc/include/asm/pte-walk.h b/arch/powerpc/include/asm/pte-walk.h
new file mode 100644
index 000000000000..73c22c579a79
--- /dev/null
+++ b/arch/powerpc/include/asm/pte-walk.h
@@ -0,0 +1,63 @@
+#ifndef _ASM_POWERPC_PTE_WALK_H
+#define _ASM_POWERPC_PTE_WALK_H
+
+#include <linux/sched.h>
+
+/* Don't use this directly */
+extern pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
+ bool *is_thp, unsigned *hshift);
+
+static inline pte_t *find_linux_pte(pgd_t *pgdir, unsigned long ea,
+ bool *is_thp, unsigned *hshift)
+{
+ pte_t *pte;
+
+ VM_WARN(!arch_irqs_disabled(), "%s called with irq enabled\n", __func__);
+ pte = __find_linux_pte(pgdir, ea, is_thp, hshift);
+
+#if defined(CONFIG_DEBUG_VM) && \
+ !(defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE))
+ /*
+ * We should not find huge page if these configs are not enabled.
+ */
+ if (hshift)
+ WARN_ON(*hshift);
+#endif
+ return pte;
+}
+
+static inline pte_t *find_init_mm_pte(unsigned long ea, unsigned *hshift)
+{
+ pgd_t *pgdir = init_mm.pgd;
+ return __find_linux_pte(pgdir, ea, NULL, hshift);
+}
+
+/*
+ * Convert a kernel vmap virtual address (vmalloc or ioremap space) to a
+ * physical address, without taking locks. This can be used in real-mode.
+ */
+static inline phys_addr_t ppc_find_vmap_phys(unsigned long addr)
+{
+ pte_t *ptep;
+ phys_addr_t pa;
+ int hugepage_shift;
+
+ /*
+ * init_mm does not free page tables, and does not do THP. It may
+ * have huge pages from huge vmalloc / ioremap etc.
+ */
+ ptep = find_init_mm_pte(addr, &hugepage_shift);
+ if (WARN_ON(!ptep))
+ return 0;
+
+ pa = PFN_PHYS(pte_pfn(*ptep));
+
+ if (!hugepage_shift)
+ hugepage_shift = PAGE_SHIFT;
+
+ pa |= addr & ((1ul << hugepage_shift) - 1);
+
+ return pa;
+}
+
+#endif /* _ASM_POWERPC_PTE_WALK_H */
diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index 279b80f3bb29..94aa1de2b06e 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (C) 2001 PPC64 Team, IBM Corp
*
@@ -14,17 +15,94 @@
*
* Note that the offsets of the fields in this struct correspond with
* the PT_* values below. This simplifies arch/powerpc/kernel/ptrace.c.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#ifndef _ASM_POWERPC_PTRACE_H
#define _ASM_POWERPC_PTRACE_H
+#include <linux/err.h>
#include <uapi/asm/ptrace.h>
+#include <asm/asm-const.h>
+#include <asm/reg.h>
+#ifndef __ASSEMBLER__
+struct pt_regs
+{
+ union {
+ struct user_pt_regs user_regs;
+ struct {
+ unsigned long gpr[32];
+ unsigned long nip;
+ unsigned long msr;
+ unsigned long orig_gpr3;
+ unsigned long ctr;
+ unsigned long link;
+ unsigned long xer;
+ unsigned long ccr;
+#ifdef CONFIG_PPC64
+ unsigned long softe;
+#else
+ unsigned long mq;
+#endif
+ unsigned long trap;
+ union {
+ unsigned long dar;
+ unsigned long dear;
+ };
+ union {
+ unsigned long dsisr;
+ unsigned long esr;
+ };
+ unsigned long result;
+ };
+ };
+#if defined(CONFIG_PPC64) || defined(CONFIG_PPC_KUAP)
+ union {
+ struct {
+#ifdef CONFIG_PPC64
+ unsigned long ppr;
+ unsigned long exit_result;
+#endif
+ union {
+#ifdef CONFIG_PPC_KUAP
+ unsigned long kuap;
+#endif
+#ifdef CONFIG_PPC_PKEY
+ unsigned long amr;
+#endif
+ };
+#ifdef CONFIG_PPC_PKEY
+ unsigned long iamr;
+#endif
+ };
+ unsigned long __pad[4]; /* Maintain 16 byte interrupt stack alignment */
+ };
+#endif
+#if defined(CONFIG_PPC32) && defined(CONFIG_BOOKE)
+ struct { /* Must be a multiple of 16 bytes */
+ unsigned long mas0;
+ unsigned long mas1;
+ unsigned long mas2;
+ unsigned long mas3;
+ unsigned long mas6;
+ unsigned long mas7;
+ unsigned long srr0;
+ unsigned long srr1;
+ unsigned long csrr0;
+ unsigned long csrr1;
+ unsigned long dsrr0;
+ unsigned long dsrr1;
+ };
+#endif
+};
+#endif
+
+
+// Always displays as "REGS" in memory dumps
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define STACK_FRAME_REGS_MARKER ASM_CONST(0x52454753)
+#else
+#define STACK_FRAME_REGS_MARKER ASM_CONST(0x53474552)
+#endif
#ifdef __powerpc64__
@@ -40,12 +118,28 @@
#define USER_REDZONE_SIZE 512
#define KERNEL_REDZONE_SIZE 288
-#define STACK_FRAME_OVERHEAD 112 /* size of minimum stack frame */
#define STACK_FRAME_LR_SAVE 2 /* Location of LR in stack frame */
-#define STACK_FRAME_REGS_MARKER ASM_CONST(0x7265677368657265)
-#define STACK_INT_FRAME_SIZE (sizeof(struct pt_regs) + \
- STACK_FRAME_OVERHEAD + KERNEL_REDZONE_SIZE)
-#define STACK_FRAME_MARKER 12
+
+#ifdef CONFIG_PPC64_ELF_ABI_V2
+#define STACK_FRAME_MIN_SIZE 32
+#define STACK_USER_INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_MIN_SIZE + 16)
+#define STACK_INT_FRAME_REGS (STACK_FRAME_MIN_SIZE + 16)
+#define STACK_INT_FRAME_MARKER STACK_FRAME_MIN_SIZE
+#define STACK_SWITCH_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_MIN_SIZE + 16)
+#define STACK_SWITCH_FRAME_REGS (STACK_FRAME_MIN_SIZE + 16)
+#else
+/*
+ * The ELFv1 ABI specifies 48 bytes plus a minimum 64 byte parameter save
+ * area. This parameter area is not used by calls to C from interrupt entry,
+ * so the second from last one of those is used for the frame marker.
+ */
+#define STACK_FRAME_MIN_SIZE 112
+#define STACK_USER_INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_MIN_SIZE)
+#define STACK_INT_FRAME_REGS STACK_FRAME_MIN_SIZE
+#define STACK_INT_FRAME_MARKER (STACK_FRAME_MIN_SIZE - 16)
+#define STACK_SWITCH_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_MIN_SIZE)
+#define STACK_SWITCH_FRAME_REGS STACK_FRAME_MIN_SIZE
+#endif
/* Size of dummy stack frame allocated when calling signal handler. */
#define __SIGNAL_FRAMESIZE 128
@@ -55,93 +149,190 @@
#define USER_REDZONE_SIZE 0
#define KERNEL_REDZONE_SIZE 0
-#define STACK_FRAME_OVERHEAD 16 /* size of minimum stack frame */
+#define STACK_FRAME_MIN_SIZE 16
#define STACK_FRAME_LR_SAVE 1 /* Location of LR in stack frame */
-#define STACK_FRAME_REGS_MARKER ASM_CONST(0x72656773)
-#define STACK_INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD)
-#define STACK_FRAME_MARKER 2
+#define STACK_USER_INT_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_MIN_SIZE)
+#define STACK_INT_FRAME_REGS STACK_FRAME_MIN_SIZE
+#define STACK_INT_FRAME_MARKER (STACK_FRAME_MIN_SIZE - 8)
+#define STACK_SWITCH_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_MIN_SIZE)
+#define STACK_SWITCH_FRAME_REGS STACK_FRAME_MIN_SIZE
/* Size of stack frame allocated when calling signal handler. */
#define __SIGNAL_FRAMESIZE 64
#endif /* __powerpc64__ */
-#ifndef __ASSEMBLY__
+#define STACK_INT_FRAME_SIZE (KERNEL_REDZONE_SIZE + STACK_USER_INT_FRAME_SIZE)
+#define STACK_INT_FRAME_MARKER_LONGS (STACK_INT_FRAME_MARKER/sizeof(long))
-#define GET_IP(regs) ((regs)->nip)
-#define GET_USP(regs) ((regs)->gpr[1])
-#define GET_FP(regs) (0)
-#define SET_FP(regs, val)
+#ifndef __ASSEMBLER__
+#include <asm/paca.h>
#ifdef CONFIG_SMP
extern unsigned long profile_pc(struct pt_regs *regs);
-#define profile_pc profile_pc
+#else
+#define profile_pc(regs) instruction_pointer(regs)
#endif
-#include <asm-generic/ptrace.h>
+long do_syscall_trace_enter(struct pt_regs *regs);
+void do_syscall_trace_leave(struct pt_regs *regs);
-#define kernel_stack_pointer(regs) ((regs)->gpr[1])
-static inline int is_syscall_success(struct pt_regs *regs)
+static inline void set_return_regs_changed(void)
{
- return !(regs->ccr & 0x10000000);
+#ifdef CONFIG_PPC_BOOK3S_64
+ WRITE_ONCE(local_paca->hsrr_valid, 0);
+ WRITE_ONCE(local_paca->srr_valid, 0);
+#endif
}
-static inline long regs_return_value(struct pt_regs *regs)
+static inline void regs_set_return_ip(struct pt_regs *regs, unsigned long ip)
{
- if (is_syscall_success(regs))
- return regs->gpr[3];
- else
- return -regs->gpr[3];
+ regs->nip = ip;
+ set_return_regs_changed();
+}
+
+static inline void regs_set_return_msr(struct pt_regs *regs, unsigned long msr)
+{
+ regs->msr = msr;
+ set_return_regs_changed();
+}
+
+static inline void regs_add_return_ip(struct pt_regs *regs, long offset)
+{
+ regs_set_return_ip(regs, regs->nip + offset);
+}
+
+static inline unsigned long instruction_pointer(struct pt_regs *regs)
+{
+ return regs->nip;
+}
+
+static inline void instruction_pointer_set(struct pt_regs *regs,
+ unsigned long val)
+{
+ regs_set_return_ip(regs, val);
+}
+
+static inline unsigned long user_stack_pointer(struct pt_regs *regs)
+{
+ return regs->gpr[1];
+}
+
+static inline unsigned long frame_pointer(struct pt_regs *regs)
+{
+ return 0;
}
-#ifdef __powerpc64__
-#define user_mode(regs) ((((regs)->msr) >> MSR_PR_LG) & 0x1)
-#else
#define user_mode(regs) (((regs)->msr & MSR_PR) != 0)
-#endif
#define force_successful_syscall_return() \
do { \
set_thread_flag(TIF_NOERROR); \
} while(0)
-struct task_struct;
-extern int ptrace_get_reg(struct task_struct *task, int regno,
- unsigned long *data);
-extern int ptrace_put_reg(struct task_struct *task, int regno,
- unsigned long data);
-
#define current_pt_regs() \
- ((struct pt_regs *)((unsigned long)current_thread_info() + THREAD_SIZE) - 1)
+ ((struct pt_regs *)((unsigned long)task_stack_page(current) + THREAD_SIZE) - 1)
+
/*
- * We use the least-significant bit of the trap field to indicate
- * whether we have saved the full set of registers, or only a
- * partial set. A 1 there means the partial set.
- * On 4xx we use the next bit to indicate whether the exception
+ * The 4 low bits (0xf) are available as flags to overload the trap word,
+ * because interrupt vectors have minimum alignment of 0x10. TRAP_FLAGS_MASK
+ * must cover the bits used as flags, including bit 0 which is used as the
+ * "norestart" bit.
+ */
+#ifdef __powerpc64__
+#define TRAP_FLAGS_MASK 0x1
+#else
+/*
+ * On 4xx we use bit 1 in the trap word to indicate whether the exception
* is a critical exception (1 means it is).
*/
-#define FULL_REGS(regs) (((regs)->trap & 1) == 0)
-#ifndef __powerpc64__
+#define TRAP_FLAGS_MASK 0xf
#define IS_CRITICAL_EXC(regs) (((regs)->trap & 2) != 0)
#define IS_MCHECK_EXC(regs) (((regs)->trap & 4) != 0)
#define IS_DEBUG_EXC(regs) (((regs)->trap & 8) != 0)
-#endif /* ! __powerpc64__ */
-#define TRAP(regs) ((regs)->trap & ~0xF)
-#ifdef __powerpc64__
-#define NV_REG_POISON 0xdeadbeefdeadbeefUL
-#define CHECK_FULL_REGS(regs) BUG_ON(regs->trap & 1)
-#else
-#define NV_REG_POISON 0xdeadbeef
-#define CHECK_FULL_REGS(regs) \
-do { \
- if ((regs)->trap & 1) \
- printk(KERN_CRIT "%s: partial register set\n", __func__); \
-} while (0)
#endif /* __powerpc64__ */
+#define TRAP(regs) ((regs)->trap & ~TRAP_FLAGS_MASK)
+
+static __always_inline void set_trap(struct pt_regs *regs, unsigned long val)
+{
+ regs->trap = (regs->trap & TRAP_FLAGS_MASK) | (val & ~TRAP_FLAGS_MASK);
+}
+
+static inline bool trap_is_scv(struct pt_regs *regs)
+{
+ return (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && TRAP(regs) == 0x3000);
+}
+
+static inline bool trap_is_unsupported_scv(struct pt_regs *regs)
+{
+ return IS_ENABLED(CONFIG_PPC_BOOK3S_64) && TRAP(regs) == 0x7ff0;
+}
+
+static inline bool trap_is_syscall(struct pt_regs *regs)
+{
+ return (trap_is_scv(regs) || TRAP(regs) == 0xc00);
+}
+
+static inline bool trap_norestart(struct pt_regs *regs)
+{
+ return regs->trap & 0x1;
+}
+
+static __always_inline void set_trap_norestart(struct pt_regs *regs)
+{
+ regs->trap |= 0x1;
+}
+
+#define kernel_stack_pointer(regs) ((regs)->gpr[1])
+static inline int is_syscall_success(struct pt_regs *regs)
+{
+ if (trap_is_scv(regs))
+ return !IS_ERR_VALUE((unsigned long)regs->gpr[3]);
+ else
+ return !(regs->ccr & 0x10000000);
+}
+
+static inline long regs_return_value(struct pt_regs *regs)
+{
+ if (trap_is_scv(regs))
+ return regs->gpr[3];
+
+ if (is_syscall_success(regs))
+ return regs->gpr[3];
+ else
+ return -regs->gpr[3];
+}
+
+static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
+{
+ regs->gpr[3] = rc;
+}
+
+static inline bool cpu_has_msr_ri(void)
+{
+ return !IS_ENABLED(CONFIG_BOOKE);
+}
+
+static inline bool regs_is_unrecoverable(struct pt_regs *regs)
+{
+ return unlikely(cpu_has_msr_ri() && !(regs->msr & MSR_RI));
+}
+
+static inline void regs_set_recoverable(struct pt_regs *regs)
+{
+ if (cpu_has_msr_ri())
+ regs_set_return_msr(regs, regs->msr | MSR_RI);
+}
+
+static inline void regs_set_unrecoverable(struct pt_regs *regs)
+{
+ if (cpu_has_msr_ri())
+ regs_set_return_msr(regs, regs->msr & ~MSR_RI);
+}
#define arch_has_single_step() (1)
-#define arch_has_block_step() (!cpu_has_feature(CPU_FTR_601))
-#define ARCH_HAS_USER_SINGLE_STEP_INFO
+#define arch_has_block_step() (true)
+#define ARCH_HAS_USER_SINGLE_STEP_REPORT
/*
* kprobe-based event tracer support
@@ -206,9 +397,28 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
return 0;
}
-#endif /* __ASSEMBLY__ */
+/**
+ * regs_get_kernel_argument() - get Nth function argument in kernel
+ * @regs: pt_regs of that context
+ * @n: function argument number (start from 0)
+ *
+ * We support up to 8 arguments and assume they are sent in through the GPRs.
+ * This will fail for fp/vector arguments, but those aren't usually found in
+ * kernel code. This is expected to be called from kprobes or ftrace with regs.
+ */
+static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs, unsigned int n)
+{
+#define NR_REG_ARGUMENTS 8
+ if (n < NR_REG_ARGUMENTS)
+ return regs_get_register(regs, offsetof(struct pt_regs, gpr[3 + n]));
+ return 0;
+}
+
+#endif /* __ASSEMBLER__ */
#ifndef __powerpc64__
+/* We need PT_SOFTE defined at all time to avoid #ifdefs */
+#define PT_SOFTE PT_MQ
#else /* __powerpc64__ */
#define PT_FPSCR32 (PT_FPR0 + 2*32 + 1) /* each FP reg occupies 2 32-bit userspace slots */
#define PT_VR0_32 164 /* each Vector reg occupies 4 slots in 32-bit */
diff --git a/arch/powerpc/include/asm/qe.h b/arch/powerpc/include/asm/qe.h
deleted file mode 100644
index 32b9bfa0c9bd..000000000000
--- a/arch/powerpc/include/asm/qe.h
+++ /dev/null
@@ -1,740 +0,0 @@
-/*
- * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights reserved.
- *
- * Authors: Shlomi Gridish <gridish@freescale.com>
- * Li Yang <leoli@freescale.com>
- *
- * Description:
- * QUICC Engine (QE) external definitions and structure.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-#ifndef _ASM_POWERPC_QE_H
-#define _ASM_POWERPC_QE_H
-#ifdef __KERNEL__
-
-#include <linux/spinlock.h>
-#include <linux/errno.h>
-#include <linux/err.h>
-#include <asm/cpm.h>
-#include <asm/immap_qe.h>
-
-#define QE_NUM_OF_SNUM 256 /* There are 256 serial number in QE */
-#define QE_NUM_OF_BRGS 16
-#define QE_NUM_OF_PORTS 1024
-
-/* Memory partitions
-*/
-#define MEM_PART_SYSTEM 0
-#define MEM_PART_SECONDARY 1
-#define MEM_PART_MURAM 2
-
-/* Clocks and BRGs */
-enum qe_clock {
- QE_CLK_NONE = 0,
- QE_BRG1, /* Baud Rate Generator 1 */
- QE_BRG2, /* Baud Rate Generator 2 */
- QE_BRG3, /* Baud Rate Generator 3 */
- QE_BRG4, /* Baud Rate Generator 4 */
- QE_BRG5, /* Baud Rate Generator 5 */
- QE_BRG6, /* Baud Rate Generator 6 */
- QE_BRG7, /* Baud Rate Generator 7 */
- QE_BRG8, /* Baud Rate Generator 8 */
- QE_BRG9, /* Baud Rate Generator 9 */
- QE_BRG10, /* Baud Rate Generator 10 */
- QE_BRG11, /* Baud Rate Generator 11 */
- QE_BRG12, /* Baud Rate Generator 12 */
- QE_BRG13, /* Baud Rate Generator 13 */
- QE_BRG14, /* Baud Rate Generator 14 */
- QE_BRG15, /* Baud Rate Generator 15 */
- QE_BRG16, /* Baud Rate Generator 16 */
- QE_CLK1, /* Clock 1 */
- QE_CLK2, /* Clock 2 */
- QE_CLK3, /* Clock 3 */
- QE_CLK4, /* Clock 4 */
- QE_CLK5, /* Clock 5 */
- QE_CLK6, /* Clock 6 */
- QE_CLK7, /* Clock 7 */
- QE_CLK8, /* Clock 8 */
- QE_CLK9, /* Clock 9 */
- QE_CLK10, /* Clock 10 */
- QE_CLK11, /* Clock 11 */
- QE_CLK12, /* Clock 12 */
- QE_CLK13, /* Clock 13 */
- QE_CLK14, /* Clock 14 */
- QE_CLK15, /* Clock 15 */
- QE_CLK16, /* Clock 16 */
- QE_CLK17, /* Clock 17 */
- QE_CLK18, /* Clock 18 */
- QE_CLK19, /* Clock 19 */
- QE_CLK20, /* Clock 20 */
- QE_CLK21, /* Clock 21 */
- QE_CLK22, /* Clock 22 */
- QE_CLK23, /* Clock 23 */
- QE_CLK24, /* Clock 24 */
- QE_CLK_DUMMY
-};
-
-static inline bool qe_clock_is_brg(enum qe_clock clk)
-{
- return clk >= QE_BRG1 && clk <= QE_BRG16;
-}
-
-extern spinlock_t cmxgcr_lock;
-
-/* Export QE common operations */
-#ifdef CONFIG_QUICC_ENGINE
-extern void qe_reset(void);
-#else
-static inline void qe_reset(void) {}
-#endif
-
-/* QE PIO */
-#define QE_PIO_PINS 32
-
-struct qe_pio_regs {
- __be32 cpodr; /* Open drain register */
- __be32 cpdata; /* Data register */
- __be32 cpdir1; /* Direction register */
- __be32 cpdir2; /* Direction register */
- __be32 cppar1; /* Pin assignment register */
- __be32 cppar2; /* Pin assignment register */
-#ifdef CONFIG_PPC_85xx
- u8 pad[8];
-#endif
-};
-
-#define QE_PIO_DIR_IN 2
-#define QE_PIO_DIR_OUT 1
-extern void __par_io_config_pin(struct qe_pio_regs __iomem *par_io, u8 pin,
- int dir, int open_drain, int assignment,
- int has_irq);
-#ifdef CONFIG_QUICC_ENGINE
-extern int par_io_init(struct device_node *np);
-extern int par_io_of_config(struct device_node *np);
-extern int par_io_config_pin(u8 port, u8 pin, int dir, int open_drain,
- int assignment, int has_irq);
-extern int par_io_data_set(u8 port, u8 pin, u8 val);
-#else
-static inline int par_io_init(struct device_node *np) { return -ENOSYS; }
-static inline int par_io_of_config(struct device_node *np) { return -ENOSYS; }
-static inline int par_io_config_pin(u8 port, u8 pin, int dir, int open_drain,
- int assignment, int has_irq) { return -ENOSYS; }
-static inline int par_io_data_set(u8 port, u8 pin, u8 val) { return -ENOSYS; }
-#endif /* CONFIG_QUICC_ENGINE */
-
-/*
- * Pin multiplexing functions.
- */
-struct qe_pin;
-#ifdef CONFIG_QE_GPIO
-extern struct qe_pin *qe_pin_request(struct device_node *np, int index);
-extern void qe_pin_free(struct qe_pin *qe_pin);
-extern void qe_pin_set_gpio(struct qe_pin *qe_pin);
-extern void qe_pin_set_dedicated(struct qe_pin *pin);
-#else
-static inline struct qe_pin *qe_pin_request(struct device_node *np, int index)
-{
- return ERR_PTR(-ENOSYS);
-}
-static inline void qe_pin_free(struct qe_pin *qe_pin) {}
-static inline void qe_pin_set_gpio(struct qe_pin *qe_pin) {}
-static inline void qe_pin_set_dedicated(struct qe_pin *pin) {}
-#endif /* CONFIG_QE_GPIO */
-
-#ifdef CONFIG_QUICC_ENGINE
-int qe_issue_cmd(u32 cmd, u32 device, u8 mcn_protocol, u32 cmd_input);
-#else
-static inline int qe_issue_cmd(u32 cmd, u32 device, u8 mcn_protocol,
- u32 cmd_input)
-{
- return -ENOSYS;
-}
-#endif /* CONFIG_QUICC_ENGINE */
-
-/* QE internal API */
-enum qe_clock qe_clock_source(const char *source);
-unsigned int qe_get_brg_clk(void);
-int qe_setbrg(enum qe_clock brg, unsigned int rate, unsigned int multiplier);
-int qe_get_snum(void);
-void qe_put_snum(u8 snum);
-unsigned int qe_get_num_of_risc(void);
-unsigned int qe_get_num_of_snums(void);
-
-static inline int qe_alive_during_sleep(void)
-{
- /*
- * MPC8568E reference manual says:
- *
- * "...power down sequence waits for all I/O interfaces to become idle.
- * In some applications this may happen eventually without actively
- * shutting down interfaces, but most likely, software will have to
- * take steps to shut down the eTSEC, QUICC Engine Block, and PCI
- * interfaces before issuing the command (either the write to the core
- * MSR[WE] as described above or writing to POWMGTCSR) to put the
- * device into sleep state."
- *
- * MPC8569E reference manual has a similar paragraph.
- */
-#ifdef CONFIG_PPC_85xx
- return 0;
-#else
- return 1;
-#endif
-}
-
-/* we actually use cpm_muram implementation, define this for convenience */
-#define qe_muram_init cpm_muram_init
-#define qe_muram_alloc cpm_muram_alloc
-#define qe_muram_alloc_fixed cpm_muram_alloc_fixed
-#define qe_muram_free cpm_muram_free
-#define qe_muram_addr cpm_muram_addr
-#define qe_muram_offset cpm_muram_offset
-
-/* Structure that defines QE firmware binary files.
- *
- * See Documentation/powerpc/qe_firmware.txt for a description of these
- * fields.
- */
-struct qe_firmware {
- struct qe_header {
- __be32 length; /* Length of the entire structure, in bytes */
- u8 magic[3]; /* Set to { 'Q', 'E', 'F' } */
- u8 version; /* Version of this layout. First ver is '1' */
- } header;
- u8 id[62]; /* Null-terminated identifier string */
- u8 split; /* 0 = shared I-RAM, 1 = split I-RAM */
- u8 count; /* Number of microcode[] structures */
- struct {
- __be16 model; /* The SOC model */
- u8 major; /* The SOC revision major */
- u8 minor; /* The SOC revision minor */
- } __attribute__ ((packed)) soc;
- u8 padding[4]; /* Reserved, for alignment */
- __be64 extended_modes; /* Extended modes */
- __be32 vtraps[8]; /* Virtual trap addresses */
- u8 reserved[4]; /* Reserved, for future expansion */
- struct qe_microcode {
- u8 id[32]; /* Null-terminated identifier */
- __be32 traps[16]; /* Trap addresses, 0 == ignore */
- __be32 eccr; /* The value for the ECCR register */
- __be32 iram_offset; /* Offset into I-RAM for the code */
- __be32 count; /* Number of 32-bit words of the code */
- __be32 code_offset; /* Offset of the actual microcode */
- u8 major; /* The microcode version major */
- u8 minor; /* The microcode version minor */
- u8 revision; /* The microcode version revision */
- u8 padding; /* Reserved, for alignment */
- u8 reserved[4]; /* Reserved, for future expansion */
- } __attribute__ ((packed)) microcode[1];
- /* All microcode binaries should be located here */
- /* CRC32 should be located here, after the microcode binaries */
-} __attribute__ ((packed));
-
-struct qe_firmware_info {
- char id[64]; /* Firmware name */
- u32 vtraps[8]; /* Virtual trap addresses */
- u64 extended_modes; /* Extended modes */
-};
-
-#ifdef CONFIG_QUICC_ENGINE
-/* Upload a firmware to the QE */
-int qe_upload_firmware(const struct qe_firmware *firmware);
-#else
-static inline int qe_upload_firmware(const struct qe_firmware *firmware)
-{
- return -ENOSYS;
-}
-#endif /* CONFIG_QUICC_ENGINE */
-
-/* Obtain information on the uploaded firmware */
-struct qe_firmware_info *qe_get_firmware_info(void);
-
-/* QE USB */
-int qe_usb_clock_set(enum qe_clock clk, int rate);
-
-/* Buffer descriptors */
-struct qe_bd {
- __be16 status;
- __be16 length;
- __be32 buf;
-} __attribute__ ((packed));
-
-#define BD_STATUS_MASK 0xffff0000
-#define BD_LENGTH_MASK 0x0000ffff
-
-/* Alignment */
-#define QE_INTR_TABLE_ALIGN 16 /* ??? */
-#define QE_ALIGNMENT_OF_BD 8
-#define QE_ALIGNMENT_OF_PRAM 64
-
-/* RISC allocation */
-#define QE_RISC_ALLOCATION_RISC1 0x1 /* RISC 1 */
-#define QE_RISC_ALLOCATION_RISC2 0x2 /* RISC 2 */
-#define QE_RISC_ALLOCATION_RISC3 0x4 /* RISC 3 */
-#define QE_RISC_ALLOCATION_RISC4 0x8 /* RISC 4 */
-#define QE_RISC_ALLOCATION_RISC1_AND_RISC2 (QE_RISC_ALLOCATION_RISC1 | \
- QE_RISC_ALLOCATION_RISC2)
-#define QE_RISC_ALLOCATION_FOUR_RISCS (QE_RISC_ALLOCATION_RISC1 | \
- QE_RISC_ALLOCATION_RISC2 | \
- QE_RISC_ALLOCATION_RISC3 | \
- QE_RISC_ALLOCATION_RISC4)
-
-/* QE extended filtering Table Lookup Key Size */
-enum qe_fltr_tbl_lookup_key_size {
- QE_FLTR_TABLE_LOOKUP_KEY_SIZE_8_BYTES
- = 0x3f, /* LookupKey parsed by the Generate LookupKey
- CMD is truncated to 8 bytes */
- QE_FLTR_TABLE_LOOKUP_KEY_SIZE_16_BYTES
- = 0x5f, /* LookupKey parsed by the Generate LookupKey
- CMD is truncated to 16 bytes */
-};
-
-/* QE FLTR extended filtering Largest External Table Lookup Key Size */
-enum qe_fltr_largest_external_tbl_lookup_key_size {
- QE_FLTR_LARGEST_EXTERNAL_TABLE_LOOKUP_KEY_SIZE_NONE
- = 0x0,/* not used */
- QE_FLTR_LARGEST_EXTERNAL_TABLE_LOOKUP_KEY_SIZE_8_BYTES
- = QE_FLTR_TABLE_LOOKUP_KEY_SIZE_8_BYTES, /* 8 bytes */
- QE_FLTR_LARGEST_EXTERNAL_TABLE_LOOKUP_KEY_SIZE_16_BYTES
- = QE_FLTR_TABLE_LOOKUP_KEY_SIZE_16_BYTES, /* 16 bytes */
-};
-
-/* structure representing QE parameter RAM */
-struct qe_timer_tables {
- u16 tm_base; /* QE timer table base adr */
- u16 tm_ptr; /* QE timer table pointer */
- u16 r_tmr; /* QE timer mode register */
- u16 r_tmv; /* QE timer valid register */
- u32 tm_cmd; /* QE timer cmd register */
- u32 tm_cnt; /* QE timer internal cnt */
-} __attribute__ ((packed));
-
-#define QE_FLTR_TAD_SIZE 8
-
-/* QE extended filtering Termination Action Descriptor (TAD) */
-struct qe_fltr_tad {
- u8 serialized[QE_FLTR_TAD_SIZE];
-} __attribute__ ((packed));
-
-/* Communication Direction */
-enum comm_dir {
- COMM_DIR_NONE = 0,
- COMM_DIR_RX = 1,
- COMM_DIR_TX = 2,
- COMM_DIR_RX_AND_TX = 3
-};
-
-/* QE CMXUCR Registers.
- * There are two UCCs represented in each of the four CMXUCR registers.
- * These values are for the UCC in the LSBs
- */
-#define QE_CMXUCR_MII_ENET_MNG 0x00007000
-#define QE_CMXUCR_MII_ENET_MNG_SHIFT 12
-#define QE_CMXUCR_GRANT 0x00008000
-#define QE_CMXUCR_TSA 0x00004000
-#define QE_CMXUCR_BKPT 0x00000100
-#define QE_CMXUCR_TX_CLK_SRC_MASK 0x0000000F
-
-/* QE CMXGCR Registers.
-*/
-#define QE_CMXGCR_MII_ENET_MNG 0x00007000
-#define QE_CMXGCR_MII_ENET_MNG_SHIFT 12
-#define QE_CMXGCR_USBCS 0x0000000f
-#define QE_CMXGCR_USBCS_CLK3 0x1
-#define QE_CMXGCR_USBCS_CLK5 0x2
-#define QE_CMXGCR_USBCS_CLK7 0x3
-#define QE_CMXGCR_USBCS_CLK9 0x4
-#define QE_CMXGCR_USBCS_CLK13 0x5
-#define QE_CMXGCR_USBCS_CLK17 0x6
-#define QE_CMXGCR_USBCS_CLK19 0x7
-#define QE_CMXGCR_USBCS_CLK21 0x8
-#define QE_CMXGCR_USBCS_BRG9 0x9
-#define QE_CMXGCR_USBCS_BRG10 0xa
-
-/* QE CECR Commands.
-*/
-#define QE_CR_FLG 0x00010000
-#define QE_RESET 0x80000000
-#define QE_INIT_TX_RX 0x00000000
-#define QE_INIT_RX 0x00000001
-#define QE_INIT_TX 0x00000002
-#define QE_ENTER_HUNT_MODE 0x00000003
-#define QE_STOP_TX 0x00000004
-#define QE_GRACEFUL_STOP_TX 0x00000005
-#define QE_RESTART_TX 0x00000006
-#define QE_CLOSE_RX_BD 0x00000007
-#define QE_SWITCH_COMMAND 0x00000007
-#define QE_SET_GROUP_ADDRESS 0x00000008
-#define QE_START_IDMA 0x00000009
-#define QE_MCC_STOP_RX 0x00000009
-#define QE_ATM_TRANSMIT 0x0000000a
-#define QE_HPAC_CLEAR_ALL 0x0000000b
-#define QE_GRACEFUL_STOP_RX 0x0000001a
-#define QE_RESTART_RX 0x0000001b
-#define QE_HPAC_SET_PRIORITY 0x0000010b
-#define QE_HPAC_STOP_TX 0x0000020b
-#define QE_HPAC_STOP_RX 0x0000030b
-#define QE_HPAC_GRACEFUL_STOP_TX 0x0000040b
-#define QE_HPAC_GRACEFUL_STOP_RX 0x0000050b
-#define QE_HPAC_START_TX 0x0000060b
-#define QE_HPAC_START_RX 0x0000070b
-#define QE_USB_STOP_TX 0x0000000a
-#define QE_USB_RESTART_TX 0x0000000c
-#define QE_QMC_STOP_TX 0x0000000c
-#define QE_QMC_STOP_RX 0x0000000d
-#define QE_SS7_SU_FIL_RESET 0x0000000e
-/* jonathbr added from here down for 83xx */
-#define QE_RESET_BCS 0x0000000a
-#define QE_MCC_INIT_TX_RX_16 0x00000003
-#define QE_MCC_STOP_TX 0x00000004
-#define QE_MCC_INIT_TX_1 0x00000005
-#define QE_MCC_INIT_RX_1 0x00000006
-#define QE_MCC_RESET 0x00000007
-#define QE_SET_TIMER 0x00000008
-#define QE_RANDOM_NUMBER 0x0000000c
-#define QE_ATM_MULTI_THREAD_INIT 0x00000011
-#define QE_ASSIGN_PAGE 0x00000012
-#define QE_ADD_REMOVE_HASH_ENTRY 0x00000013
-#define QE_START_FLOW_CONTROL 0x00000014
-#define QE_STOP_FLOW_CONTROL 0x00000015
-#define QE_ASSIGN_PAGE_TO_DEVICE 0x00000016
-
-#define QE_ASSIGN_RISC 0x00000010
-#define QE_CR_MCN_NORMAL_SHIFT 6
-#define QE_CR_MCN_USB_SHIFT 4
-#define QE_CR_MCN_RISC_ASSIGN_SHIFT 8
-#define QE_CR_SNUM_SHIFT 17
-
-/* QE CECR Sub Block - sub block of QE command.
-*/
-#define QE_CR_SUBBLOCK_INVALID 0x00000000
-#define QE_CR_SUBBLOCK_USB 0x03200000
-#define QE_CR_SUBBLOCK_UCCFAST1 0x02000000
-#define QE_CR_SUBBLOCK_UCCFAST2 0x02200000
-#define QE_CR_SUBBLOCK_UCCFAST3 0x02400000
-#define QE_CR_SUBBLOCK_UCCFAST4 0x02600000
-#define QE_CR_SUBBLOCK_UCCFAST5 0x02800000
-#define QE_CR_SUBBLOCK_UCCFAST6 0x02a00000
-#define QE_CR_SUBBLOCK_UCCFAST7 0x02c00000
-#define QE_CR_SUBBLOCK_UCCFAST8 0x02e00000
-#define QE_CR_SUBBLOCK_UCCSLOW1 0x00000000
-#define QE_CR_SUBBLOCK_UCCSLOW2 0x00200000
-#define QE_CR_SUBBLOCK_UCCSLOW3 0x00400000
-#define QE_CR_SUBBLOCK_UCCSLOW4 0x00600000
-#define QE_CR_SUBBLOCK_UCCSLOW5 0x00800000
-#define QE_CR_SUBBLOCK_UCCSLOW6 0x00a00000
-#define QE_CR_SUBBLOCK_UCCSLOW7 0x00c00000
-#define QE_CR_SUBBLOCK_UCCSLOW8 0x00e00000
-#define QE_CR_SUBBLOCK_MCC1 0x03800000
-#define QE_CR_SUBBLOCK_MCC2 0x03a00000
-#define QE_CR_SUBBLOCK_MCC3 0x03000000
-#define QE_CR_SUBBLOCK_IDMA1 0x02800000
-#define QE_CR_SUBBLOCK_IDMA2 0x02a00000
-#define QE_CR_SUBBLOCK_IDMA3 0x02c00000
-#define QE_CR_SUBBLOCK_IDMA4 0x02e00000
-#define QE_CR_SUBBLOCK_HPAC 0x01e00000
-#define QE_CR_SUBBLOCK_SPI1 0x01400000
-#define QE_CR_SUBBLOCK_SPI2 0x01600000
-#define QE_CR_SUBBLOCK_RAND 0x01c00000
-#define QE_CR_SUBBLOCK_TIMER 0x01e00000
-#define QE_CR_SUBBLOCK_GENERAL 0x03c00000
-
-/* QE CECR Protocol - For non-MCC, specifies mode for QE CECR command */
-#define QE_CR_PROTOCOL_UNSPECIFIED 0x00 /* For all other protocols */
-#define QE_CR_PROTOCOL_HDLC_TRANSPARENT 0x00
-#define QE_CR_PROTOCOL_QMC 0x02
-#define QE_CR_PROTOCOL_UART 0x04
-#define QE_CR_PROTOCOL_ATM_POS 0x0A
-#define QE_CR_PROTOCOL_ETHERNET 0x0C
-#define QE_CR_PROTOCOL_L2_SWITCH 0x0D
-
-/* BRG configuration register */
-#define QE_BRGC_ENABLE 0x00010000
-#define QE_BRGC_DIVISOR_SHIFT 1
-#define QE_BRGC_DIVISOR_MAX 0xFFF
-#define QE_BRGC_DIV16 1
-
-/* QE Timers registers */
-#define QE_GTCFR1_PCAS 0x80
-#define QE_GTCFR1_STP2 0x20
-#define QE_GTCFR1_RST2 0x10
-#define QE_GTCFR1_GM2 0x08
-#define QE_GTCFR1_GM1 0x04
-#define QE_GTCFR1_STP1 0x02
-#define QE_GTCFR1_RST1 0x01
-
-/* SDMA registers */
-#define QE_SDSR_BER1 0x02000000
-#define QE_SDSR_BER2 0x01000000
-
-#define QE_SDMR_GLB_1_MSK 0x80000000
-#define QE_SDMR_ADR_SEL 0x20000000
-#define QE_SDMR_BER1_MSK 0x02000000
-#define QE_SDMR_BER2_MSK 0x01000000
-#define QE_SDMR_EB1_MSK 0x00800000
-#define QE_SDMR_ER1_MSK 0x00080000
-#define QE_SDMR_ER2_MSK 0x00040000
-#define QE_SDMR_CEN_MASK 0x0000E000
-#define QE_SDMR_SBER_1 0x00000200
-#define QE_SDMR_SBER_2 0x00000200
-#define QE_SDMR_EB1_PR_MASK 0x000000C0
-#define QE_SDMR_ER1_PR 0x00000008
-
-#define QE_SDMR_CEN_SHIFT 13
-#define QE_SDMR_EB1_PR_SHIFT 6
-
-#define QE_SDTM_MSNUM_SHIFT 24
-
-#define QE_SDEBCR_BA_MASK 0x01FFFFFF
-
-/* Communication Processor */
-#define QE_CP_CERCR_MEE 0x8000 /* Multi-user RAM ECC enable */
-#define QE_CP_CERCR_IEE 0x4000 /* Instruction RAM ECC enable */
-#define QE_CP_CERCR_CIR 0x0800 /* Common instruction RAM */
-
-/* I-RAM */
-#define QE_IRAM_IADD_AIE 0x80000000 /* Auto Increment Enable */
-#define QE_IRAM_IADD_BADDR 0x00080000 /* Base Address */
-#define QE_IRAM_READY 0x80000000 /* Ready */
-
-/* UPC */
-#define UPGCR_PROTOCOL 0x80000000 /* protocol ul2 or pl2 */
-#define UPGCR_TMS 0x40000000 /* Transmit master/slave mode */
-#define UPGCR_RMS 0x20000000 /* Receive master/slave mode */
-#define UPGCR_ADDR 0x10000000 /* Master MPHY Addr multiplexing */
-#define UPGCR_DIAG 0x01000000 /* Diagnostic mode */
-
-/* UCC GUEMR register */
-#define UCC_GUEMR_MODE_MASK_RX 0x02
-#define UCC_GUEMR_MODE_FAST_RX 0x02
-#define UCC_GUEMR_MODE_SLOW_RX 0x00
-#define UCC_GUEMR_MODE_MASK_TX 0x01
-#define UCC_GUEMR_MODE_FAST_TX 0x01
-#define UCC_GUEMR_MODE_SLOW_TX 0x00
-#define UCC_GUEMR_MODE_MASK (UCC_GUEMR_MODE_MASK_RX | UCC_GUEMR_MODE_MASK_TX)
-#define UCC_GUEMR_SET_RESERVED3 0x10 /* Bit 3 in the guemr is reserved but
- must be set 1 */
-
-/* structure representing UCC SLOW parameter RAM */
-struct ucc_slow_pram {
- __be16 rbase; /* RX BD base address */
- __be16 tbase; /* TX BD base address */
- u8 rbmr; /* RX bus mode register (same as CPM's RFCR) */
- u8 tbmr; /* TX bus mode register (same as CPM's TFCR) */
- __be16 mrblr; /* Rx buffer length */
- __be32 rstate; /* Rx internal state */
- __be32 rptr; /* Rx internal data pointer */
- __be16 rbptr; /* rb BD Pointer */
- __be16 rcount; /* Rx internal byte count */
- __be32 rtemp; /* Rx temp */
- __be32 tstate; /* Tx internal state */
- __be32 tptr; /* Tx internal data pointer */
- __be16 tbptr; /* Tx BD pointer */
- __be16 tcount; /* Tx byte count */
- __be32 ttemp; /* Tx temp */
- __be32 rcrc; /* temp receive CRC */
- __be32 tcrc; /* temp transmit CRC */
-} __attribute__ ((packed));
-
-/* General UCC SLOW Mode Register (GUMRH & GUMRL) */
-#define UCC_SLOW_GUMR_H_SAM_QMC 0x00000000
-#define UCC_SLOW_GUMR_H_SAM_SATM 0x00008000
-#define UCC_SLOW_GUMR_H_REVD 0x00002000
-#define UCC_SLOW_GUMR_H_TRX 0x00001000
-#define UCC_SLOW_GUMR_H_TTX 0x00000800
-#define UCC_SLOW_GUMR_H_CDP 0x00000400
-#define UCC_SLOW_GUMR_H_CTSP 0x00000200
-#define UCC_SLOW_GUMR_H_CDS 0x00000100
-#define UCC_SLOW_GUMR_H_CTSS 0x00000080
-#define UCC_SLOW_GUMR_H_TFL 0x00000040
-#define UCC_SLOW_GUMR_H_RFW 0x00000020
-#define UCC_SLOW_GUMR_H_TXSY 0x00000010
-#define UCC_SLOW_GUMR_H_4SYNC 0x00000004
-#define UCC_SLOW_GUMR_H_8SYNC 0x00000008
-#define UCC_SLOW_GUMR_H_16SYNC 0x0000000c
-#define UCC_SLOW_GUMR_H_RTSM 0x00000002
-#define UCC_SLOW_GUMR_H_RSYN 0x00000001
-
-#define UCC_SLOW_GUMR_L_TCI 0x10000000
-#define UCC_SLOW_GUMR_L_RINV 0x02000000
-#define UCC_SLOW_GUMR_L_TINV 0x01000000
-#define UCC_SLOW_GUMR_L_TEND 0x00040000
-#define UCC_SLOW_GUMR_L_TDCR_MASK 0x00030000
-#define UCC_SLOW_GUMR_L_TDCR_32 0x00030000
-#define UCC_SLOW_GUMR_L_TDCR_16 0x00020000
-#define UCC_SLOW_GUMR_L_TDCR_8 0x00010000
-#define UCC_SLOW_GUMR_L_TDCR_1 0x00000000
-#define UCC_SLOW_GUMR_L_RDCR_MASK 0x0000c000
-#define UCC_SLOW_GUMR_L_RDCR_32 0x0000c000
-#define UCC_SLOW_GUMR_L_RDCR_16 0x00008000
-#define UCC_SLOW_GUMR_L_RDCR_8 0x00004000
-#define UCC_SLOW_GUMR_L_RDCR_1 0x00000000
-#define UCC_SLOW_GUMR_L_RENC_NRZI 0x00000800
-#define UCC_SLOW_GUMR_L_RENC_NRZ 0x00000000
-#define UCC_SLOW_GUMR_L_TENC_NRZI 0x00000100
-#define UCC_SLOW_GUMR_L_TENC_NRZ 0x00000000
-#define UCC_SLOW_GUMR_L_DIAG_MASK 0x000000c0
-#define UCC_SLOW_GUMR_L_DIAG_LE 0x000000c0
-#define UCC_SLOW_GUMR_L_DIAG_ECHO 0x00000080
-#define UCC_SLOW_GUMR_L_DIAG_LOOP 0x00000040
-#define UCC_SLOW_GUMR_L_DIAG_NORM 0x00000000
-#define UCC_SLOW_GUMR_L_ENR 0x00000020
-#define UCC_SLOW_GUMR_L_ENT 0x00000010
-#define UCC_SLOW_GUMR_L_MODE_MASK 0x0000000F
-#define UCC_SLOW_GUMR_L_MODE_BISYNC 0x00000008
-#define UCC_SLOW_GUMR_L_MODE_AHDLC 0x00000006
-#define UCC_SLOW_GUMR_L_MODE_UART 0x00000004
-#define UCC_SLOW_GUMR_L_MODE_QMC 0x00000002
-
-/* General UCC FAST Mode Register */
-#define UCC_FAST_GUMR_TCI 0x20000000
-#define UCC_FAST_GUMR_TRX 0x10000000
-#define UCC_FAST_GUMR_TTX 0x08000000
-#define UCC_FAST_GUMR_CDP 0x04000000
-#define UCC_FAST_GUMR_CTSP 0x02000000
-#define UCC_FAST_GUMR_CDS 0x01000000
-#define UCC_FAST_GUMR_CTSS 0x00800000
-#define UCC_FAST_GUMR_TXSY 0x00020000
-#define UCC_FAST_GUMR_RSYN 0x00010000
-#define UCC_FAST_GUMR_RTSM 0x00002000
-#define UCC_FAST_GUMR_REVD 0x00000400
-#define UCC_FAST_GUMR_ENR 0x00000020
-#define UCC_FAST_GUMR_ENT 0x00000010
-
-/* UART Slow UCC Event Register (UCCE) */
-#define UCC_UART_UCCE_AB 0x0200
-#define UCC_UART_UCCE_IDLE 0x0100
-#define UCC_UART_UCCE_GRA 0x0080
-#define UCC_UART_UCCE_BRKE 0x0040
-#define UCC_UART_UCCE_BRKS 0x0020
-#define UCC_UART_UCCE_CCR 0x0008
-#define UCC_UART_UCCE_BSY 0x0004
-#define UCC_UART_UCCE_TX 0x0002
-#define UCC_UART_UCCE_RX 0x0001
-
-/* HDLC Slow UCC Event Register (UCCE) */
-#define UCC_HDLC_UCCE_GLR 0x1000
-#define UCC_HDLC_UCCE_GLT 0x0800
-#define UCC_HDLC_UCCE_IDLE 0x0100
-#define UCC_HDLC_UCCE_BRKE 0x0040
-#define UCC_HDLC_UCCE_BRKS 0x0020
-#define UCC_HDLC_UCCE_TXE 0x0010
-#define UCC_HDLC_UCCE_RXF 0x0008
-#define UCC_HDLC_UCCE_BSY 0x0004
-#define UCC_HDLC_UCCE_TXB 0x0002
-#define UCC_HDLC_UCCE_RXB 0x0001
-
-/* BISYNC Slow UCC Event Register (UCCE) */
-#define UCC_BISYNC_UCCE_GRA 0x0080
-#define UCC_BISYNC_UCCE_TXE 0x0010
-#define UCC_BISYNC_UCCE_RCH 0x0008
-#define UCC_BISYNC_UCCE_BSY 0x0004
-#define UCC_BISYNC_UCCE_TXB 0x0002
-#define UCC_BISYNC_UCCE_RXB 0x0001
-
-/* Gigabit Ethernet Fast UCC Event Register (UCCE) */
-#define UCC_GETH_UCCE_MPD 0x80000000
-#define UCC_GETH_UCCE_SCAR 0x40000000
-#define UCC_GETH_UCCE_GRA 0x20000000
-#define UCC_GETH_UCCE_CBPR 0x10000000
-#define UCC_GETH_UCCE_BSY 0x08000000
-#define UCC_GETH_UCCE_RXC 0x04000000
-#define UCC_GETH_UCCE_TXC 0x02000000
-#define UCC_GETH_UCCE_TXE 0x01000000
-#define UCC_GETH_UCCE_TXB7 0x00800000
-#define UCC_GETH_UCCE_TXB6 0x00400000
-#define UCC_GETH_UCCE_TXB5 0x00200000
-#define UCC_GETH_UCCE_TXB4 0x00100000
-#define UCC_GETH_UCCE_TXB3 0x00080000
-#define UCC_GETH_UCCE_TXB2 0x00040000
-#define UCC_GETH_UCCE_TXB1 0x00020000
-#define UCC_GETH_UCCE_TXB0 0x00010000
-#define UCC_GETH_UCCE_RXB7 0x00008000
-#define UCC_GETH_UCCE_RXB6 0x00004000
-#define UCC_GETH_UCCE_RXB5 0x00002000
-#define UCC_GETH_UCCE_RXB4 0x00001000
-#define UCC_GETH_UCCE_RXB3 0x00000800
-#define UCC_GETH_UCCE_RXB2 0x00000400
-#define UCC_GETH_UCCE_RXB1 0x00000200
-#define UCC_GETH_UCCE_RXB0 0x00000100
-#define UCC_GETH_UCCE_RXF7 0x00000080
-#define UCC_GETH_UCCE_RXF6 0x00000040
-#define UCC_GETH_UCCE_RXF5 0x00000020
-#define UCC_GETH_UCCE_RXF4 0x00000010
-#define UCC_GETH_UCCE_RXF3 0x00000008
-#define UCC_GETH_UCCE_RXF2 0x00000004
-#define UCC_GETH_UCCE_RXF1 0x00000002
-#define UCC_GETH_UCCE_RXF0 0x00000001
-
-/* UCC Protocol Specific Mode Register (UPSMR), when used for UART */
-#define UCC_UART_UPSMR_FLC 0x8000
-#define UCC_UART_UPSMR_SL 0x4000
-#define UCC_UART_UPSMR_CL_MASK 0x3000
-#define UCC_UART_UPSMR_CL_8 0x3000
-#define UCC_UART_UPSMR_CL_7 0x2000
-#define UCC_UART_UPSMR_CL_6 0x1000
-#define UCC_UART_UPSMR_CL_5 0x0000
-#define UCC_UART_UPSMR_UM_MASK 0x0c00
-#define UCC_UART_UPSMR_UM_NORMAL 0x0000
-#define UCC_UART_UPSMR_UM_MAN_MULTI 0x0400
-#define UCC_UART_UPSMR_UM_AUTO_MULTI 0x0c00
-#define UCC_UART_UPSMR_FRZ 0x0200
-#define UCC_UART_UPSMR_RZS 0x0100
-#define UCC_UART_UPSMR_SYN 0x0080
-#define UCC_UART_UPSMR_DRT 0x0040
-#define UCC_UART_UPSMR_PEN 0x0010
-#define UCC_UART_UPSMR_RPM_MASK 0x000c
-#define UCC_UART_UPSMR_RPM_ODD 0x0000
-#define UCC_UART_UPSMR_RPM_LOW 0x0004
-#define UCC_UART_UPSMR_RPM_EVEN 0x0008
-#define UCC_UART_UPSMR_RPM_HIGH 0x000C
-#define UCC_UART_UPSMR_TPM_MASK 0x0003
-#define UCC_UART_UPSMR_TPM_ODD 0x0000
-#define UCC_UART_UPSMR_TPM_LOW 0x0001
-#define UCC_UART_UPSMR_TPM_EVEN 0x0002
-#define UCC_UART_UPSMR_TPM_HIGH 0x0003
-
-/* UCC Protocol Specific Mode Register (UPSMR), when used for Ethernet */
-#define UCC_GETH_UPSMR_FTFE 0x80000000
-#define UCC_GETH_UPSMR_PTPE 0x40000000
-#define UCC_GETH_UPSMR_ECM 0x04000000
-#define UCC_GETH_UPSMR_HSE 0x02000000
-#define UCC_GETH_UPSMR_PRO 0x00400000
-#define UCC_GETH_UPSMR_CAP 0x00200000
-#define UCC_GETH_UPSMR_RSH 0x00100000
-#define UCC_GETH_UPSMR_RPM 0x00080000
-#define UCC_GETH_UPSMR_R10M 0x00040000
-#define UCC_GETH_UPSMR_RLPB 0x00020000
-#define UCC_GETH_UPSMR_TBIM 0x00010000
-#define UCC_GETH_UPSMR_RES1 0x00002000
-#define UCC_GETH_UPSMR_RMM 0x00001000
-#define UCC_GETH_UPSMR_CAM 0x00000400
-#define UCC_GETH_UPSMR_BRO 0x00000200
-#define UCC_GETH_UPSMR_SMM 0x00000080
-#define UCC_GETH_UPSMR_SGMM 0x00000020
-
-/* UCC Transmit On Demand Register (UTODR) */
-#define UCC_SLOW_TOD 0x8000
-#define UCC_FAST_TOD 0x8000
-
-/* UCC Bus Mode Register masks */
-/* Not to be confused with the Bundle Mode Register */
-#define UCC_BMR_GBL 0x20
-#define UCC_BMR_BO_BE 0x10
-#define UCC_BMR_CETM 0x04
-#define UCC_BMR_DTB 0x02
-#define UCC_BMR_BDB 0x01
-
-/* Function code masks */
-#define FC_GBL 0x20
-#define FC_DTB_LCL 0x02
-#define UCC_FAST_FUNCTION_CODE_GBL 0x20
-#define UCC_FAST_FUNCTION_CODE_DTB_LCL 0x02
-#define UCC_FAST_FUNCTION_CODE_BDB_LCL 0x01
-
-#endif /* __KERNEL__ */
-#endif /* _ASM_POWERPC_QE_H */
diff --git a/arch/powerpc/include/asm/qe_ic.h b/arch/powerpc/include/asm/qe_ic.h
deleted file mode 100644
index 25784cc959a0..000000000000
--- a/arch/powerpc/include/asm/qe_ic.h
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights reserved.
- *
- * Authors: Shlomi Gridish <gridish@freescale.com>
- * Li Yang <leoli@freescale.com>
- *
- * Description:
- * QE IC external definitions and structure.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-#ifndef _ASM_POWERPC_QE_IC_H
-#define _ASM_POWERPC_QE_IC_H
-
-#include <linux/irq.h>
-
-struct device_node;
-struct qe_ic;
-
-#define NUM_OF_QE_IC_GROUPS 6
-
-/* Flags when we init the QE IC */
-#define QE_IC_SPREADMODE_GRP_W 0x00000001
-#define QE_IC_SPREADMODE_GRP_X 0x00000002
-#define QE_IC_SPREADMODE_GRP_Y 0x00000004
-#define QE_IC_SPREADMODE_GRP_Z 0x00000008
-#define QE_IC_SPREADMODE_GRP_RISCA 0x00000010
-#define QE_IC_SPREADMODE_GRP_RISCB 0x00000020
-
-#define QE_IC_LOW_SIGNAL 0x00000100
-#define QE_IC_HIGH_SIGNAL 0x00000200
-
-#define QE_IC_GRP_W_PRI0_DEST_SIGNAL_HIGH 0x00001000
-#define QE_IC_GRP_W_PRI1_DEST_SIGNAL_HIGH 0x00002000
-#define QE_IC_GRP_X_PRI0_DEST_SIGNAL_HIGH 0x00004000
-#define QE_IC_GRP_X_PRI1_DEST_SIGNAL_HIGH 0x00008000
-#define QE_IC_GRP_Y_PRI0_DEST_SIGNAL_HIGH 0x00010000
-#define QE_IC_GRP_Y_PRI1_DEST_SIGNAL_HIGH 0x00020000
-#define QE_IC_GRP_Z_PRI0_DEST_SIGNAL_HIGH 0x00040000
-#define QE_IC_GRP_Z_PRI1_DEST_SIGNAL_HIGH 0x00080000
-#define QE_IC_GRP_RISCA_PRI0_DEST_SIGNAL_HIGH 0x00100000
-#define QE_IC_GRP_RISCA_PRI1_DEST_SIGNAL_HIGH 0x00200000
-#define QE_IC_GRP_RISCB_PRI0_DEST_SIGNAL_HIGH 0x00400000
-#define QE_IC_GRP_RISCB_PRI1_DEST_SIGNAL_HIGH 0x00800000
-#define QE_IC_GRP_W_DEST_SIGNAL_SHIFT (12)
-
-/* QE interrupt sources groups */
-enum qe_ic_grp_id {
- QE_IC_GRP_W = 0, /* QE interrupt controller group W */
- QE_IC_GRP_X, /* QE interrupt controller group X */
- QE_IC_GRP_Y, /* QE interrupt controller group Y */
- QE_IC_GRP_Z, /* QE interrupt controller group Z */
- QE_IC_GRP_RISCA, /* QE interrupt controller RISC group A */
- QE_IC_GRP_RISCB /* QE interrupt controller RISC group B */
-};
-
-#ifdef CONFIG_QUICC_ENGINE
-void qe_ic_init(struct device_node *node, unsigned int flags,
- void (*low_handler)(unsigned int irq, struct irq_desc *desc),
- void (*high_handler)(unsigned int irq, struct irq_desc *desc));
-unsigned int qe_ic_get_low_irq(struct qe_ic *qe_ic);
-unsigned int qe_ic_get_high_irq(struct qe_ic *qe_ic);
-#else
-static inline void qe_ic_init(struct device_node *node, unsigned int flags,
- void (*low_handler)(unsigned int irq, struct irq_desc *desc),
- void (*high_handler)(unsigned int irq, struct irq_desc *desc))
-{}
-static inline unsigned int qe_ic_get_low_irq(struct qe_ic *qe_ic)
-{ return 0; }
-static inline unsigned int qe_ic_get_high_irq(struct qe_ic *qe_ic)
-{ return 0; }
-#endif /* CONFIG_QUICC_ENGINE */
-
-void qe_ic_set_highest_priority(unsigned int virq, int high);
-int qe_ic_set_priority(unsigned int virq, unsigned int priority);
-int qe_ic_set_high_priority(unsigned int virq, unsigned int priority, int high);
-
-static inline void qe_ic_cascade_low_ipic(unsigned int irq,
- struct irq_desc *desc)
-{
- struct qe_ic *qe_ic = irq_desc_get_handler_data(desc);
- unsigned int cascade_irq = qe_ic_get_low_irq(qe_ic);
-
- if (cascade_irq != NO_IRQ)
- generic_handle_irq(cascade_irq);
-}
-
-static inline void qe_ic_cascade_high_ipic(unsigned int irq,
- struct irq_desc *desc)
-{
- struct qe_ic *qe_ic = irq_desc_get_handler_data(desc);
- unsigned int cascade_irq = qe_ic_get_high_irq(qe_ic);
-
- if (cascade_irq != NO_IRQ)
- generic_handle_irq(cascade_irq);
-}
-
-static inline void qe_ic_cascade_low_mpic(unsigned int irq,
- struct irq_desc *desc)
-{
- struct qe_ic *qe_ic = irq_desc_get_handler_data(desc);
- unsigned int cascade_irq = qe_ic_get_low_irq(qe_ic);
- struct irq_chip *chip = irq_desc_get_chip(desc);
-
- if (cascade_irq != NO_IRQ)
- generic_handle_irq(cascade_irq);
-
- chip->irq_eoi(&desc->irq_data);
-}
-
-static inline void qe_ic_cascade_high_mpic(unsigned int irq,
- struct irq_desc *desc)
-{
- struct qe_ic *qe_ic = irq_desc_get_handler_data(desc);
- unsigned int cascade_irq = qe_ic_get_high_irq(qe_ic);
- struct irq_chip *chip = irq_desc_get_chip(desc);
-
- if (cascade_irq != NO_IRQ)
- generic_handle_irq(cascade_irq);
-
- chip->irq_eoi(&desc->irq_data);
-}
-
-static inline void qe_ic_cascade_muxed_mpic(unsigned int irq,
- struct irq_desc *desc)
-{
- struct qe_ic *qe_ic = irq_desc_get_handler_data(desc);
- unsigned int cascade_irq;
- struct irq_chip *chip = irq_desc_get_chip(desc);
-
- cascade_irq = qe_ic_get_high_irq(qe_ic);
- if (cascade_irq == NO_IRQ)
- cascade_irq = qe_ic_get_low_irq(qe_ic);
-
- if (cascade_irq != NO_IRQ)
- generic_handle_irq(cascade_irq);
-
- chip->irq_eoi(&desc->irq_data);
-}
-
-#endif /* _ASM_POWERPC_QE_IC_H */
diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h
new file mode 100644
index 000000000000..28a53fb69b38
--- /dev/null
+++ b/arch/powerpc/include/asm/qspinlock.h
@@ -0,0 +1,174 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_QSPINLOCK_H
+#define _ASM_POWERPC_QSPINLOCK_H
+
+#include <linux/compiler.h>
+#include <asm/qspinlock_types.h>
+#include <asm/paravirt.h>
+
+#ifdef CONFIG_PPC64
+/*
+ * Use the EH=1 hint for accesses that result in the lock being acquired.
+ * The hardware is supposed to optimise this pattern by holding the lock
+ * cacheline longer, and releasing when a store to the same memory (the
+ * unlock) is performed.
+ */
+#define _Q_SPIN_EH_HINT 1
+#else
+#define _Q_SPIN_EH_HINT 0
+#endif
+
+/*
+ * The trylock itself may steal. This makes trylocks slightly stronger, and
+ * makes locks slightly more efficient when stealing.
+ *
+ * This is compile-time, so if true then there may always be stealers, so the
+ * nosteal paths become unused.
+ */
+#define _Q_SPIN_TRY_LOCK_STEAL 1
+
+/*
+ * Put a speculation barrier after testing the lock/node and finding it
+ * busy. Try to prevent pointless speculation in slow paths.
+ *
+ * Slows down the lockstorm microbenchmark with no stealing, where locking
+ * is purely FIFO through the queue. May have more benefit in real workload
+ * where speculating into the wrong place could have a greater cost.
+ */
+#define _Q_SPIN_SPEC_BARRIER 0
+
+#ifdef CONFIG_PPC64
+/*
+ * Execute a miso instruction after passing the MCS lock ownership to the
+ * queue head. Miso is intended to make stores visible to other CPUs sooner.
+ *
+ * This seems to make the lockstorm microbenchmark nospin test go slightly
+ * faster on POWER10, but disable for now.
+ */
+#define _Q_SPIN_MISO 0
+#else
+#define _Q_SPIN_MISO 0
+#endif
+
+#ifdef CONFIG_PPC64
+/*
+ * This executes miso after an unlock of the lock word, having ownership
+ * pass to the next CPU sooner. This will slow the uncontended path to some
+ * degree. Not evidence it helps yet.
+ */
+#define _Q_SPIN_MISO_UNLOCK 0
+#else
+#define _Q_SPIN_MISO_UNLOCK 0
+#endif
+
+/*
+ * Seems to slow down lockstorm microbenchmark, suspect queue node just
+ * has to become shared again right afterwards when its waiter spins on
+ * the lock field.
+ */
+#define _Q_SPIN_PREFETCH_NEXT 0
+
+static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
+{
+ return READ_ONCE(lock->val);
+}
+
+static __always_inline int queued_spin_value_unlocked(struct qspinlock lock)
+{
+ return !lock.val;
+}
+
+static __always_inline int queued_spin_is_contended(struct qspinlock *lock)
+{
+ return !!(READ_ONCE(lock->val) & _Q_TAIL_CPU_MASK);
+}
+
+static __always_inline u32 queued_spin_encode_locked_val(void)
+{
+ /* XXX: make this use lock value in paca like simple spinlocks? */
+ return _Q_LOCKED_VAL | (smp_processor_id() << _Q_OWNER_CPU_OFFSET);
+}
+
+static __always_inline int __queued_spin_trylock_nosteal(struct qspinlock *lock)
+{
+ u32 new = queued_spin_encode_locked_val();
+ u32 prev;
+
+ /* Trylock succeeds only when unlocked and no queued nodes */
+ asm volatile(
+"1: lwarx %0,0,%1,%3 # __queued_spin_trylock_nosteal \n"
+" cmpwi 0,%0,0 \n"
+" bne- 2f \n"
+" stwcx. %2,0,%1 \n"
+" bne- 1b \n"
+"\t" PPC_ACQUIRE_BARRIER " \n"
+"2: \n"
+ : "=&r" (prev)
+ : "r" (&lock->val), "r" (new),
+ "i" (_Q_SPIN_EH_HINT)
+ : "cr0", "memory");
+
+ return likely(prev == 0);
+}
+
+static __always_inline int __queued_spin_trylock_steal(struct qspinlock *lock)
+{
+ u32 new = queued_spin_encode_locked_val();
+ u32 prev, tmp;
+
+ /* Trylock may get ahead of queued nodes if it finds unlocked */
+ asm volatile(
+"1: lwarx %0,0,%2,%5 # __queued_spin_trylock_steal \n"
+" andc. %1,%0,%4 \n"
+" bne- 2f \n"
+" and %1,%0,%4 \n"
+" or %1,%1,%3 \n"
+" stwcx. %1,0,%2 \n"
+" bne- 1b \n"
+"\t" PPC_ACQUIRE_BARRIER " \n"
+"2: \n"
+ : "=&r" (prev), "=&r" (tmp)
+ : "r" (&lock->val), "r" (new), "r" (_Q_TAIL_CPU_MASK),
+ "i" (_Q_SPIN_EH_HINT)
+ : "cr0", "memory");
+
+ return likely(!(prev & ~_Q_TAIL_CPU_MASK));
+}
+
+static __always_inline int queued_spin_trylock(struct qspinlock *lock)
+{
+ if (!_Q_SPIN_TRY_LOCK_STEAL)
+ return __queued_spin_trylock_nosteal(lock);
+ else
+ return __queued_spin_trylock_steal(lock);
+}
+
+void queued_spin_lock_slowpath(struct qspinlock *lock);
+
+static __always_inline void queued_spin_lock(struct qspinlock *lock)
+{
+ if (!queued_spin_trylock(lock))
+ queued_spin_lock_slowpath(lock);
+}
+
+static inline void queued_spin_unlock(struct qspinlock *lock)
+{
+ smp_store_release(&lock->locked, 0);
+ if (_Q_SPIN_MISO_UNLOCK)
+ asm volatile("miso" ::: "memory");
+}
+
+#define arch_spin_is_locked(l) queued_spin_is_locked(l)
+#define arch_spin_is_contended(l) queued_spin_is_contended(l)
+#define arch_spin_value_unlocked(l) queued_spin_value_unlocked(l)
+#define arch_spin_lock(l) queued_spin_lock(l)
+#define arch_spin_trylock(l) queued_spin_trylock(l)
+#define arch_spin_unlock(l) queued_spin_unlock(l)
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+void pv_spinlocks_init(void);
+#else
+static inline void pv_spinlocks_init(void) { }
+#endif
+
+#endif /* _ASM_POWERPC_QSPINLOCK_H */
diff --git a/arch/powerpc/include/asm/qspinlock_types.h b/arch/powerpc/include/asm/qspinlock_types.h
new file mode 100644
index 000000000000..4766a7aa03cb
--- /dev/null
+++ b/arch/powerpc/include/asm/qspinlock_types.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_QSPINLOCK_TYPES_H
+#define _ASM_POWERPC_QSPINLOCK_TYPES_H
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+
+typedef struct qspinlock {
+ union {
+ u32 val;
+
+#ifdef __LITTLE_ENDIAN
+ struct {
+ u16 locked;
+ u8 reserved[2];
+ };
+#else
+ struct {
+ u8 reserved[2];
+ u16 locked;
+ };
+#endif
+ };
+} arch_spinlock_t;
+
+#define __ARCH_SPIN_LOCK_UNLOCKED { { .val = 0 } }
+
+/*
+ * Bitfields in the lock word:
+ *
+ * 0: locked bit
+ * 1-14: lock holder cpu
+ * 15: lock owner or queuer vcpus observed to be preempted bit
+ * 16: must queue bit
+ * 17-31: tail cpu (+1)
+ */
+#define _Q_SET_MASK(type) (((1U << _Q_ ## type ## _BITS) - 1)\
+ << _Q_ ## type ## _OFFSET)
+/* 0x00000001 */
+#define _Q_LOCKED_OFFSET 0
+#define _Q_LOCKED_BITS 1
+#define _Q_LOCKED_VAL (1U << _Q_LOCKED_OFFSET)
+
+/* 0x00007ffe */
+#define _Q_OWNER_CPU_OFFSET 1
+#define _Q_OWNER_CPU_BITS 14
+#define _Q_OWNER_CPU_MASK _Q_SET_MASK(OWNER_CPU)
+
+#if CONFIG_NR_CPUS > (1U << _Q_OWNER_CPU_BITS)
+#error "qspinlock does not support such large CONFIG_NR_CPUS"
+#endif
+
+/* 0x00008000 */
+#define _Q_SLEEPY_OFFSET 15
+#define _Q_SLEEPY_BITS 1
+#define _Q_SLEEPY_VAL (1U << _Q_SLEEPY_OFFSET)
+
+/* 0x00010000 */
+#define _Q_MUST_Q_OFFSET 16
+#define _Q_MUST_Q_BITS 1
+#define _Q_MUST_Q_VAL (1U << _Q_MUST_Q_OFFSET)
+
+/* 0xfffe0000 */
+#define _Q_TAIL_CPU_OFFSET 17
+#define _Q_TAIL_CPU_BITS 15
+#define _Q_TAIL_CPU_MASK _Q_SET_MASK(TAIL_CPU)
+
+#if CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS)
+#error "qspinlock does not support such large CONFIG_NR_CPUS"
+#endif
+
+#endif /* _ASM_POWERPC_QSPINLOCK_TYPES_H */
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 0c0505956a29..3fe186635432 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Contains the definition of registers common to all PowerPC variants.
* If a register definition has been changed in a different PowerPC
@@ -11,23 +12,23 @@
#ifdef __KERNEL__
#include <linux/stringify.h>
+#include <linux/const.h>
#include <asm/cputable.h>
+#include <asm/asm-const.h>
+#include <asm/feature-fixups.h>
/* Pickup Book E specific registers. */
-#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+#ifdef CONFIG_BOOKE
#include <asm/reg_booke.h>
-#endif /* CONFIG_BOOKE || CONFIG_40x */
+#endif
#ifdef CONFIG_FSL_EMB_PERFMON
#include <asm/reg_fsl_emb.h>
#endif
-#ifdef CONFIG_8xx
#include <asm/reg_8xx.h>
-#endif /* CONFIG_8xx */
#define MSR_SF_LG 63 /* Enable 64 bit mode */
-#define MSR_ISF_LG 61 /* Interrupt 64b mode valid on 630 */
#define MSR_HV_LG 60 /* Hypervisor state */
#define MSR_TS_T_LG 34 /* Trans Mem state: Transactional */
#define MSR_TS_S_LG 33 /* Trans Mem state: Suspended */
@@ -35,6 +36,7 @@
#define MSR_TM_LG 32 /* Trans Mem Available */
#define MSR_VEC_LG 25 /* Enable AltiVec */
#define MSR_VSX_LG 23 /* Enable VSX */
+#define MSR_S_LG 22 /* Secure state */
#define MSR_POW_LG 18 /* Enable Power Management */
#define MSR_WE_LG 18 /* Wait State Enable */
#define MSR_TGPR_LG 17 /* TLB Update registers in use */
@@ -58,7 +60,7 @@
#define MSR_RI_LG 1 /* Recoverable Exception */
#define MSR_LE_LG 0 /* Little Endian */
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#define __MASK(X) (1<<(X))
#else
#define __MASK(X) (1UL<<(X))
@@ -66,13 +68,21 @@
#ifdef CONFIG_PPC64
#define MSR_SF __MASK(MSR_SF_LG) /* Enable 64 bit mode */
-#define MSR_ISF __MASK(MSR_ISF_LG) /* Interrupt 64b mode valid on 630 */
#define MSR_HV __MASK(MSR_HV_LG) /* Hypervisor state */
+#define MSR_S __MASK(MSR_S_LG) /* Secure state */
#else
/* so tests for these bits fail on 32-bit */
#define MSR_SF 0
-#define MSR_ISF 0
#define MSR_HV 0
+#define MSR_S 0
+#endif
+
+/*
+ * To be used in shared book E/book S, this avoids needing to worry about
+ * book S/book E in shared code
+ */
+#ifndef MSR_SPE
+#define MSR_SPE 0
#endif
#define MSR_VEC __MASK(MSR_VEC_LG) /* Enable AltiVec */
@@ -107,24 +117,32 @@
#define MSR_TS_S __MASK(MSR_TS_S_LG) /* Transaction Suspended */
#define MSR_TS_T __MASK(MSR_TS_T_LG) /* Transaction Transactional */
#define MSR_TS_MASK (MSR_TS_T | MSR_TS_S) /* Transaction State bits */
-#define MSR_TM_ACTIVE(x) (((x) & MSR_TS_MASK) != 0) /* Transaction active? */
+#define MSR_TM_RESV(x) (((x) & MSR_TS_MASK) == MSR_TS_MASK) /* Reserved */
#define MSR_TM_TRANSACTIONAL(x) (((x) & MSR_TS_MASK) == MSR_TS_T)
#define MSR_TM_SUSPENDED(x) (((x) & MSR_TS_MASK) == MSR_TS_S)
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+#define MSR_TM_ACTIVE(x) (((x) & MSR_TS_MASK) != 0) /* Transaction active? */
+#else
+#define MSR_TM_ACTIVE(x) ((void)(x), 0)
+#endif
+
#if defined(CONFIG_PPC_BOOK3S_64)
#define MSR_64BIT MSR_SF
/* Server variant */
-#define __MSR (MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_ISF |MSR_HV)
+#define __MSR (MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_HV)
#ifdef __BIG_ENDIAN__
#define MSR_ __MSR
+#define MSR_IDLE (MSR_ME | MSR_SF | MSR_HV)
#else
#define MSR_ (__MSR | MSR_LE)
+#define MSR_IDLE (MSR_ME | MSR_SF | MSR_HV | MSR_LE)
#endif
#define MSR_KERNEL (MSR_ | MSR_64BIT)
#define MSR_USER32 (MSR_ | MSR_PR | MSR_EE)
#define MSR_USER64 (MSR_USER32 | MSR_64BIT)
-#elif defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_8xx)
+#elif defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_PPC_8xx)
/* Default MSR for kernel mode. */
#define MSR_KERNEL (MSR_ME|MSR_RI|MSR_IR|MSR_DR)
#define MSR_USER (MSR_KERNEL|MSR_PR|MSR_EE)
@@ -134,6 +152,26 @@
#define MSR_64BIT 0
#endif
+/* Condition Register related */
+#define CR0_SHIFT 28
+#define CR0_MASK 0xF
+#define CR0_TBEGIN_FAILURE (0x2 << 28) /* 0b0010 */
+
+
+/* Power Management - Processor Stop Status and Control Register Fields */
+#define PSSCR_RL_MASK 0x0000000F /* Requested Level */
+#define PSSCR_MTL_MASK 0x000000F0 /* Maximum Transition Level */
+#define PSSCR_TR_MASK 0x00000300 /* Transition State */
+#define PSSCR_PSLL_MASK 0x000F0000 /* Power-Saving Level Limit */
+#define PSSCR_EC 0x00100000 /* Exit Criterion */
+#define PSSCR_ESL 0x00200000 /* Enable State Loss */
+#define PSSCR_SD 0x00400000 /* Status Disable */
+#define PSSCR_PLS 0xf000000000000000 /* Power-saving Level Status */
+#define PSSCR_PLS_SHIFT 60
+#define PSSCR_GUEST_VIS 0xf0000000000003ffUL /* Guest-visible PSSCR fields */
+#define PSSCR_FAKE_SUSPEND 0x00000400 /* Fake-suspend bit (P9 DD2.2) */
+#define PSSCR_FAKE_SUSPEND_LG 10 /* Fake-suspend bit position */
+
/* Floating Point Status and Control Register (FPSCR) Fields */
#define FPSCR_FX 0x80000000 /* FPU exception summary */
#define FPSCR_FEX 0x40000000 /* FPU enabled exception summary */
@@ -195,14 +233,10 @@
/* Special Purpose Registers (SPRNs)*/
-#ifdef CONFIG_40x
-#define SPRN_PID 0x3B1 /* Process ID */
-#else
#define SPRN_PID 0x030 /* Process ID */
#ifdef CONFIG_BOOKE
#define SPRN_PID0 SPRN_PID/* Process ID Register 0 */
#endif
-#endif
#define SPRN_CTR 0x009 /* Count Register */
#define SPRN_DSCR 0x11
@@ -214,8 +248,28 @@
#define SPRN_TFIAR 0x81 /* Transaction Failure Inst Addr */
#define SPRN_TEXASR 0x82 /* Transaction EXception & Summary */
#define SPRN_TEXASRU 0x83 /* '' '' '' Upper 32 */
-#define TEXASR_FS __MASK(63-36) /* TEXASR Failure Summary */
+
+#define TEXASR_FC_LG (63 - 7) /* Failure Code */
+#define TEXASR_AB_LG (63 - 31) /* Abort */
+#define TEXASR_SU_LG (63 - 32) /* Suspend */
+#define TEXASR_HV_LG (63 - 34) /* Hypervisor state*/
+#define TEXASR_PR_LG (63 - 35) /* Privilege level */
+#define TEXASR_FS_LG (63 - 36) /* failure summary */
+#define TEXASR_EX_LG (63 - 37) /* TFIAR exact bit */
+#define TEXASR_ROT_LG (63 - 38) /* ROT bit */
+
+#define TEXASR_ABORT __MASK(TEXASR_AB_LG) /* terminated by tabort or treclaim */
+#define TEXASR_SUSP __MASK(TEXASR_SU_LG) /* tx failed in suspended state */
+#define TEXASR_HV __MASK(TEXASR_HV_LG) /* MSR[HV] when failure occurred */
+#define TEXASR_PR __MASK(TEXASR_PR_LG) /* MSR[PR] when failure occurred */
+#define TEXASR_FS __MASK(TEXASR_FS_LG) /* TEXASR Failure Summary */
+#define TEXASR_EXACT __MASK(TEXASR_EX_LG) /* TFIAR value is exact */
+#define TEXASR_ROT __MASK(TEXASR_ROT_LG)
+#define TEXASR_FC (ASM_CONST(0xFF) << TEXASR_FC_LG)
+
#define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */
+
+#define SPRN_TIDR 144 /* Thread ID register */
#define SPRN_CTRLF 0x088
#define SPRN_CTRLT 0x098
#define CTRL_CT 0xc0000000 /* current thread */
@@ -223,15 +277,16 @@
#define CTRL_CT1 0x40000000 /* thread 1 */
#define CTRL_TE 0x00c00000 /* thread enable */
#define CTRL_RUNLATCH 0x1
-#define SPRN_DAWR 0xB4
-#define SPRN_MPPR 0xB8 /* Micro Partition Prefetch Register */
+#define SPRN_DAWR0 0xB4
+#define SPRN_DAWR1 0xB5
#define SPRN_RPR 0xBA /* Relative Priority Register */
#define SPRN_CIABR 0xBB
#define CIABR_PRIV 0x3
#define CIABR_PRIV_USER 1
#define CIABR_PRIV_SUPER 2
#define CIABR_PRIV_HYPER 3
-#define SPRN_DAWRX 0xBC
+#define SPRN_DAWRX0 0xBC
+#define SPRN_DAWRX1 0xBD
#define DAWRX_USER __MASK(0)
#define DAWRX_KERNEL __MASK(1)
#define DAWRX_HYP __MASK(2)
@@ -250,14 +305,67 @@
#define SPRN_DAR 0x013 /* Data Address Register */
#define SPRN_DBCR 0x136 /* e300 Data Breakpoint Control Reg */
#define SPRN_DSISR 0x012 /* Data Storage Interrupt Status Register */
-#define DSISR_NOHPTE 0x40000000 /* no translation found */
-#define DSISR_PROTFAULT 0x08000000 /* protection fault */
-#define DSISR_ISSTORE 0x02000000 /* access was a store */
-#define DSISR_DABRMATCH 0x00400000 /* hit data breakpoint */
-#define DSISR_NOSEGMENT 0x00200000 /* SLB miss */
-#define DSISR_KEYFAULT 0x00200000 /* Key fault */
+#define DSISR_BAD_DIRECT_ST 0x80000000 /* Obsolete: Direct store error */
+#define DSISR_NOHPTE 0x40000000 /* no translation found */
+#define DSISR_ATTR_CONFLICT 0x20000000 /* P9: Process vs. Partition attr */
+#define DSISR_NOEXEC_OR_G 0x10000000 /* Alias of SRR1 bit, see below */
+#define DSISR_PROTFAULT 0x08000000 /* protection fault */
+#define DSISR_BADACCESS 0x04000000 /* bad access to CI or G */
+#define DSISR_ISSTORE 0x02000000 /* access was a store */
+#define DSISR_DABRMATCH 0x00400000 /* hit data breakpoint */
+#define DSISR_NOSEGMENT 0x00200000 /* STAB miss (unsupported) */
+#define DSISR_KEYFAULT 0x00200000 /* Storage Key fault */
+#define DSISR_BAD_EXT_CTRL 0x00100000 /* Obsolete: External ctrl error */
+#define DSISR_UNSUPP_MMU 0x00080000 /* P9: Unsupported MMU config */
+#define DSISR_SET_RC 0x00040000 /* P9: Failed setting of R/C bits */
+#define DSISR_PRTABLE_FAULT 0x00020000 /* P9: Fault on process table */
+#define DSISR_ICSWX_NO_CT 0x00004000 /* P7: icswx unavailable cp type */
+#define DSISR_BAD_COPYPASTE 0x00000008 /* P9: Copy/Paste on wrong memtype */
+#define DSISR_BAD_AMO 0x00000004 /* P9: Incorrect AMO opcode */
+#define DSISR_BAD_CI_LDST 0x00000002 /* P8: Bad HV CI load/store */
+
+/*
+ * DSISR_NOEXEC_OR_G doesn't actually exist. This bit is always
+ * 0 on DSIs. However, on ISIs, the corresponding bit in SRR1
+ * indicates an attempt at executing from a no-execute PTE
+ * or segment or from a guarded page.
+ *
+ * We add a definition here for completeness as we alias
+ * DSISR and SRR1 in do_page_fault.
+ */
+
+/*
+ * DSISR bits that are treated as a fault. Any bit set
+ * here will skip hash_page, and cause do_page_fault to
+ * trigger a SIGBUS or SIGSEGV:
+ */
+#define DSISR_BAD_FAULT_32S (DSISR_BAD_DIRECT_ST | \
+ DSISR_BADACCESS | \
+ DSISR_BAD_EXT_CTRL)
+#define DSISR_BAD_FAULT_64S (DSISR_BAD_FAULT_32S | \
+ DSISR_ATTR_CONFLICT | \
+ DSISR_UNSUPP_MMU | \
+ DSISR_PRTABLE_FAULT | \
+ DSISR_ICSWX_NO_CT | \
+ DSISR_BAD_COPYPASTE | \
+ DSISR_BAD_AMO | \
+ DSISR_BAD_CI_LDST)
+/*
+ * These bits are equivalent in SRR1 and DSISR for 0x400
+ * instruction access interrupts on Book3S
+ */
+#define DSISR_SRR1_MATCH_32S (DSISR_NOHPTE | \
+ DSISR_NOEXEC_OR_G | \
+ DSISR_PROTFAULT)
+#define DSISR_SRR1_MATCH_64S (DSISR_SRR1_MATCH_32S | \
+ DSISR_KEYFAULT | \
+ DSISR_UNSUPP_MMU | \
+ DSISR_SET_RC | \
+ DSISR_PRTABLE_FAULT)
+
#define SPRN_TBRL 0x10C /* Time Base Read Lower Register (user, R/O) */
#define SPRN_TBRU 0x10D /* Time Base Read Upper Register (user, R/O) */
+#define SPRN_CIR 0x11B /* Chip Information Register (hyper, R/0) */
#define SPRN_TBWL 0x11C /* Time Base Lower Register (super, R/W) */
#define SPRN_TBWU 0x11D /* Time Base Upper Register (super, R/W) */
#define SPRN_TBU40 0x11E /* Timebase upper 40 bits (hyper, R/W) */
@@ -270,17 +378,33 @@
#define SPRN_HIOR 0x137 /* 970 Hypervisor interrupt offset */
#define SPRN_RMOR 0x138 /* Real mode offset register */
#define SPRN_HRMOR 0x139 /* Real mode offset register */
-#define SPRN_HSRR0 0x13A /* Hypervisor Save/Restore 0 */
-#define SPRN_HSRR1 0x13B /* Hypervisor Save/Restore 1 */
+#define SPRN_HDEXCR_RO 0x1C7 /* Hypervisor DEXCR (non-privileged, readonly) */
+#define SPRN_HASHKEYR 0x1D4 /* Non-privileged hashst/hashchk key register */
+#define SPRN_HDEXCR 0x1D7 /* Hypervisor dynamic execution control register */
+#define SPRN_DEXCR_RO 0x32C /* DEXCR (non-privileged, readonly) */
+#define SPRN_ASDR 0x330 /* Access segment descriptor register */
+#define SPRN_DEXCR 0x33C /* Dynamic execution control register */
+#define DEXCR_PR_SBHE 0x80000000UL /* 0: Speculative Branch Hint Enable */
+#define DEXCR_PR_IBRTPD 0x10000000UL /* 3: Indirect Branch Recurrent Target Prediction Disable */
+#define DEXCR_PR_SRAPD 0x08000000UL /* 4: Subroutine Return Address Prediction Disable */
+#define DEXCR_PR_NPHIE 0x04000000UL /* 5: Non-Privileged Hash Instruction Enable */
+#define DEXCR_INIT DEXCR_PR_NPHIE /* Fixed DEXCR value to initialise all CPUs with */
#define SPRN_IC 0x350 /* Virtual Instruction Count */
#define SPRN_VTB 0x351 /* Virtual Time Base */
#define SPRN_LDBAR 0x352 /* LD Base Address Register */
#define SPRN_PMICR 0x354 /* Power Management Idle Control Reg */
#define SPRN_PMSR 0x355 /* Power Management Status Reg */
#define SPRN_PMMAR 0x356 /* Power Management Memory Activity Register */
+#define SPRN_PSSCR 0x357 /* Processor Stop Status and Control Register (ISA 3.0) */
+#define SPRN_PSSCR_PR 0x337 /* PSSCR ISA 3.0, privileged mode access */
+#define SPRN_TRIG2 0x372
#define SPRN_PMCR 0x374 /* Power Management Control Register */
+#define SPRN_RWMR 0x375 /* Region-Weighting Mode Register */
/* HFSCR and FSCR bit numbers are the same */
+#define FSCR_PREFIX_LG 13 /* Enable Prefix Instructions */
+#define FSCR_SCV_LG 12 /* Enable System Call Vectored */
+#define FSCR_MSGP_LG 10 /* Enable MSGP */
#define FSCR_TAR_LG 8 /* Enable Target Address Register */
#define FSCR_EBB_LG 7 /* Enable Event Based Branching */
#define FSCR_TM_LG 5 /* Enable Transactional Memory */
@@ -290,10 +414,15 @@
#define FSCR_VECVSX_LG 1 /* Enable VMX/VSX */
#define FSCR_FP_LG 0 /* Enable Floating Point */
#define SPRN_FSCR 0x099 /* Facility Status & Control Register */
+#define FSCR_PREFIX __MASK(FSCR_PREFIX_LG)
+#define FSCR_SCV __MASK(FSCR_SCV_LG)
#define FSCR_TAR __MASK(FSCR_TAR_LG)
#define FSCR_EBB __MASK(FSCR_EBB_LG)
#define FSCR_DSCR __MASK(FSCR_DSCR_LG)
+#define FSCR_INTR_CAUSE (ASM_CONST(0xFF) << 56) /* interrupt cause */
#define SPRN_HFSCR 0xbe /* HV=1 Facility Status & Control Register */
+#define HFSCR_PREFIX __MASK(FSCR_PREFIX_LG)
+#define HFSCR_MSGP __MASK(FSCR_MSGP_LG)
#define HFSCR_TAR __MASK(FSCR_TAR_LG)
#define HFSCR_EBB __MASK(FSCR_EBB_LG)
#define HFSCR_TM __MASK(FSCR_TM_LG)
@@ -302,53 +431,73 @@
#define HFSCR_DSCR __MASK(FSCR_DSCR_LG)
#define HFSCR_VECVSX __MASK(FSCR_VECVSX_LG)
#define HFSCR_FP __MASK(FSCR_FP_LG)
+#define HFSCR_INTR_CAUSE FSCR_INTR_CAUSE
#define SPRN_TAR 0x32f /* Target Address Register */
#define SPRN_LPCR 0x13E /* LPAR Control Register */
-#define LPCR_VPM0 (1ul << (63-0))
-#define LPCR_VPM1 (1ul << (63-1))
-#define LPCR_ISL (1ul << (63-2))
-#define LPCR_VC_SH (63-2)
-#define LPCR_DPFD_SH (63-11)
-#define LPCR_DPFD (7ul << LPCR_DPFD_SH)
-#define LPCR_VRMASD (0x1ful << (63-16))
-#define LPCR_VRMA_L (1ul << (63-12))
-#define LPCR_VRMA_LP0 (1ul << (63-15))
-#define LPCR_VRMA_LP1 (1ul << (63-16))
-#define LPCR_VRMASD_SH (63-16)
-#define LPCR_RMLS 0x1C000000 /* impl dependent rmo limit sel */
-#define LPCR_RMLS_SH (63-37)
-#define LPCR_ILE 0x02000000 /* !HV irqs set MSR:LE */
-#define LPCR_AIL 0x01800000 /* Alternate interrupt location */
-#define LPCR_AIL_0 0x00000000 /* MMU off exception offset 0x0 */
-#define LPCR_AIL_3 0x01800000 /* MMU on exception offset 0xc00...4xxx */
-#define LPCR_ONL 0x00040000 /* online - PURR/SPURR count */
-#define LPCR_PECE 0x0001f000 /* powersave exit cause enable */
-#define LPCR_PECEDP 0x00010000 /* directed priv dbells cause exit */
-#define LPCR_PECEDH 0x00008000 /* directed hyp dbells cause exit */
-#define LPCR_PECE0 0x00004000 /* ext. exceptions can cause exit */
-#define LPCR_PECE1 0x00002000 /* decrementer can cause exit */
-#define LPCR_PECE2 0x00001000 /* machine check etc can cause exit */
-#define LPCR_MER 0x00000800 /* Mediated External Exception */
-#define LPCR_MER_SH 11
-#define LPCR_TC 0x00000200 /* Translation control */
-#define LPCR_LPES 0x0000000c
-#define LPCR_LPES0 0x00000008 /* LPAR Env selector 0 */
-#define LPCR_LPES1 0x00000004 /* LPAR Env selector 1 */
-#define LPCR_LPES_SH 2
-#define LPCR_RMI 0x00000002 /* real mode is cache inhibit */
-#define LPCR_HDICE 0x00000001 /* Hyp Decr enable (HV,PR,EE) */
+#define LPCR_VPM0 ASM_CONST(0x8000000000000000)
+#define LPCR_VPM1 ASM_CONST(0x4000000000000000)
+#define LPCR_ISL ASM_CONST(0x2000000000000000)
+#define LPCR_VC_SH 61
+#define LPCR_DPFD_SH 52
+#define LPCR_DPFD (ASM_CONST(7) << LPCR_DPFD_SH)
+#define LPCR_VRMASD_SH 47
+#define LPCR_VRMASD (ASM_CONST(0x1f) << LPCR_VRMASD_SH)
+#define LPCR_VRMA_L ASM_CONST(0x0008000000000000)
+#define LPCR_VRMA_LP0 ASM_CONST(0x0001000000000000)
+#define LPCR_VRMA_LP1 ASM_CONST(0x0000800000000000)
+#define LPCR_RMLS 0x1C000000 /* Implementation dependent RMO limit sel */
+#define LPCR_RMLS_SH 26
+#define LPCR_HAIL ASM_CONST(0x0000000004000000) /* HV AIL (ISAv3.1) */
+#define LPCR_ILE ASM_CONST(0x0000000002000000) /* !HV irqs set MSR:LE */
+#define LPCR_AIL ASM_CONST(0x0000000001800000) /* Alternate interrupt location */
+#define LPCR_AIL_0 ASM_CONST(0x0000000000000000) /* MMU off exception offset 0x0 */
+#define LPCR_AIL_3 ASM_CONST(0x0000000001800000) /* MMU on exception offset 0xc00...4xxx */
+#define LPCR_ONL ASM_CONST(0x0000000000040000) /* online - PURR/SPURR count */
+#define LPCR_LD ASM_CONST(0x0000000000020000) /* large decremeter */
+#define LPCR_PECE ASM_CONST(0x000000000001f000) /* powersave exit cause enable */
+#define LPCR_PECEDP ASM_CONST(0x0000000000010000) /* directed priv dbells cause exit */
+#define LPCR_PECEDH ASM_CONST(0x0000000000008000) /* directed hyp dbells cause exit */
+#define LPCR_PECE0 ASM_CONST(0x0000000000004000) /* ext. exceptions can cause exit */
+#define LPCR_PECE1 ASM_CONST(0x0000000000002000) /* decrementer can cause exit */
+#define LPCR_PECE2 ASM_CONST(0x0000000000001000) /* machine check etc can cause exit */
+#define LPCR_PECE_HVEE ASM_CONST(0x0000400000000000) /* P9 Wakeup on HV interrupts */
+#define LPCR_MER ASM_CONST(0x0000000000000800) /* Mediated External Exception */
+#define LPCR_MER_SH 11
+#define LPCR_GTSE ASM_CONST(0x0000000000000400) /* Guest Translation Shootdown Enable */
+#define LPCR_TC ASM_CONST(0x0000000000000200) /* Translation control */
+#define LPCR_HEIC ASM_CONST(0x0000000000000010) /* Hypervisor External Interrupt Control */
+#define LPCR_LPES 0x0000000c
+#define LPCR_LPES0 ASM_CONST(0x0000000000000008) /* LPAR Env selector 0 */
+#define LPCR_LPES1 ASM_CONST(0x0000000000000004) /* LPAR Env selector 1 */
+#define LPCR_LPES_SH 2
+#define LPCR_RMI ASM_CONST(0x0000000000000002) /* real mode is cache inhibit */
+#define LPCR_HVICE ASM_CONST(0x0000000000000002) /* P9: HV interrupt enable */
+#define LPCR_HDICE ASM_CONST(0x0000000000000001) /* Hyp Decr enable (HV,PR,EE) */
+#define LPCR_UPRT ASM_CONST(0x0000000000400000) /* Use Process Table (ISA 3) */
+#define LPCR_HR ASM_CONST(0x0000000000100000)
#ifndef SPRN_LPID
#define SPRN_LPID 0x13F /* Logical Partition Identifier */
#endif
-#define LPID_RSVD 0x3ff /* Reserved LPID for partn switching */
-#define SPRN_HMER 0x150 /* Hardware m? error recovery */
-#define SPRN_HMEER 0x151 /* Hardware m? enable error recovery */
+#define SPRN_HMER 0x150 /* Hypervisor maintenance exception reg */
+#define HMER_DEBUG_TRIG (1ul << (63 - 17)) /* Debug trigger */
+#define SPRN_HMEER 0x151 /* Hyp maintenance exception enable reg */
#define SPRN_PCR 0x152 /* Processor compatibility register */
-#define PCR_VEC_DIS (1ul << (63-0)) /* Vec. disable (bit NA since POWER8) */
-#define PCR_VSX_DIS (1ul << (63-1)) /* VSX disable (bit NA since POWER8) */
-#define PCR_TM_DIS (1ul << (63-2)) /* Trans. memory disable (POWER8) */
+#define PCR_VEC_DIS (__MASK(63-0)) /* Vec. disable (bit NA since POWER8) */
+#define PCR_VSX_DIS (__MASK(63-1)) /* VSX disable (bit NA since POWER8) */
+#define PCR_TM_DIS (__MASK(63-2)) /* Trans. memory disable (POWER8) */
+#define PCR_MMA_DIS (__MASK(63-3)) /* Matrix-Multiply Accelerator */
+#define PCR_HIGH_BITS (PCR_MMA_DIS | PCR_VEC_DIS | PCR_VSX_DIS | PCR_TM_DIS)
+/*
+ * These bits are used in the function kvmppc_set_arch_compat() to specify and
+ * determine both the compatibility level which we want to emulate and the
+ * compatibility level which the host is capable of emulating.
+ */
+#define PCR_ARCH_300 0x10 /* Architecture 3.00 */
+#define PCR_ARCH_207 0x8 /* Architecture 2.07 */
#define PCR_ARCH_206 0x4 /* Architecture 2.06 */
#define PCR_ARCH_205 0x2 /* Architecture 2.05 */
+#define PCR_LOW_BITS (PCR_ARCH_207 | PCR_ARCH_206 | PCR_ARCH_205 | PCR_ARCH_300)
+#define PCR_MASK ~(PCR_HIGH_BITS | PCR_LOW_BITS) /* PCR Reserved Bits */
#define SPRN_HEIR 0x153 /* Hypervisor Emulated Instruction Register */
#define SPRN_TLBINDEXR 0x154 /* P7 TLB control register */
#define SPRN_TLBVPNR 0x155 /* P7 TLB control register */
@@ -371,9 +520,12 @@
#define SPRN_DBAT7L 0x23F /* Data BAT 7 Lower Register */
#define SPRN_DBAT7U 0x23E /* Data BAT 7 Upper Register */
#define SPRN_PPR 0x380 /* SMT Thread status Register */
+#define SPRN_TSCR 0x399 /* Thread Switch Control Register */
#define SPRN_DEC 0x016 /* Decrement Register */
-#define SPRN_DER 0x095 /* Debug Enable Regsiter */
+#define SPRN_PIT 0x3DB /* Programmable Interval Timer (BOOKE) */
+
+#define SPRN_DER 0x095 /* Debug Enable Register */
#define DER_RSTE 0x40000000 /* Reset Interrupt */
#define DER_CHSTPE 0x20000000 /* Check Stop */
#define DER_MCIE 0x10000000 /* Machine Check Interrupt */
@@ -398,7 +550,7 @@
#define SPRN_DPDES 0x0B0 /* Directed Priv. Doorbell Exc. State */
#define SPRN_EAR 0x11A /* External Address Register */
#define SPRN_HASH1 0x3D2 /* Primary Hash Address Register */
-#define SPRN_HASH2 0x3D3 /* Secondary Hash Address Resgister */
+#define SPRN_HASH2 0x3D3 /* Secondary Hash Address Register */
#define SPRN_HID0 0x3F0 /* Hardware Implementation Register 0 */
#define HID0_HDICE_SH (63 - 23) /* 970 HDEC interrupt enable */
#define HID0_EMCP (1<<31) /* Enable Machine Check pin */
@@ -444,8 +596,11 @@
#define HID0_POWER8_1TO4LPAR __MASK(51)
#define HID0_POWER8_DYNLPARDIS __MASK(48)
+/* POWER9 HID0 bits */
+#define HID0_POWER9_RADIX __MASK(63 - 8)
+
#define SPRN_HID1 0x3F1 /* Hardware Implementation Register 1 */
-#ifdef CONFIG_6xx
+#ifdef CONFIG_PPC_BOOK3S_32
#define HID1_EMCP (1<<31) /* 7450 Machine Check Pin Enable */
#define HID1_DFS (1<<22) /* 7447A Dynamic Frequency Scaling */
#define HID1_PC0 (1<<16) /* 7450 PLL_CFG[0] */
@@ -456,8 +611,10 @@
#define HID1_ABE (1<<10) /* 7450 Address Broadcast Enable */
#define HID1_PS (1<<16) /* 750FX PLL selection */
#endif
-#define SPRN_HID2 0x3F8 /* Hardware Implementation Register 2 */
+#define SPRN_HID2_750FX 0x3F8 /* IBM 750FX HID2 Register */
#define SPRN_HID2_GEKKO 0x398 /* Gekko HID2 Register */
+#define SPRN_HID2_G2_LE 0x3F3 /* G2_LE HID2 Register */
+#define HID2_G2_LE_HBE (1<<18) /* High BAT Enable (G2_LE) */
#define SPRN_IABR 0x3F2 /* Instruction Address Breakpoint Register */
#define SPRN_IABR2 0x3FA /* 83xx */
#define SPRN_IBCR 0x135 /* 83xx Insn Breakpoint Control Reg */
@@ -505,13 +662,15 @@
#define SPRN_IBAT7U 0x236 /* Instruction BAT 7 Upper Register */
#define SPRN_ICMP 0x3D5 /* Instruction TLB Compare Register */
#define SPRN_ICTC 0x3FB /* Instruction Cache Throttling Control Reg */
+#ifndef SPRN_ICTRL
#define SPRN_ICTRL 0x3F3 /* 1011 7450 icache and interrupt ctrl */
+#endif
#define ICTRL_EICE 0x08000000 /* enable icache parity errs */
#define ICTRL_EDC 0x04000000 /* enable dcache parity errs */
#define ICTRL_EICP 0x00000100 /* enable icache par. check */
#define SPRN_IMISS 0x3D4 /* Instruction TLB Miss Register */
#define SPRN_IMMR 0x27E /* Internal Memory Map Register */
-#define SPRN_L2CR 0x3F9 /* Level 2 Cache Control Regsiter */
+#define SPRN_L2CR 0x3F9 /* Level 2 Cache Control Register */
#define SPRN_L2CR2 0x3f8
#define L2CR_L2E 0x80000000 /* L2 enable */
#define L2CR_L2PE 0x40000000 /* L2 parity enable */
@@ -546,7 +705,7 @@
#define L2CR_L2DO_745x 0x00010000 /* L2 data only (745x) */
#define L2CR_L2REP_745x 0x00001000 /* L2 repl. algorithm (745x) */
#define L2CR_L2HWF_745x 0x00000800 /* L2 hardware flush (745x) */
-#define SPRN_L3CR 0x3FA /* Level 3 Cache Control Regsiter */
+#define SPRN_L3CR 0x3FA /* Level 3 Cache Control Register */
#define L3CR_L3E 0x80000000 /* L3 enable */
#define L3CR_L3PE 0x40000000 /* L3 data parity enable */
#define L3CR_L3APE 0x20000000 /* L3 addr parity enable */
@@ -576,6 +735,7 @@
#define SPRN_PIR 0x3FF /* Processor Identification Register */
#endif
#define SPRN_TIR 0x1BE /* Thread Identification Register */
+#define SPRN_PTCR 0x1D0 /* Partition table control Register */
#define SPRN_PSPB 0x09F /* Problem State Priority Boost reg */
#define SPRN_PTEHI 0x3D5 /* 981 7450 PTE HI word (S/W TLB load) */
#define SPRN_PTELO 0x3D6 /* 982 7450 PTE LO word (S/W TLB load) */
@@ -601,31 +761,53 @@
#define SPRN_USPRG7 0x107 /* SPRG7 userspace read */
#define SPRN_SRR0 0x01A /* Save/Restore Register 0 */
#define SPRN_SRR1 0x01B /* Save/Restore Register 1 */
+
+#ifdef CONFIG_PPC_BOOK3S
+/*
+ * Bits loaded from MSR upon interrupt.
+ * PPC (64-bit) bits 33-36,42-47 are interrupt dependent, the others are
+ * loaded from MSR. The exception is that SRESET and MCE do not always load
+ * bit 62 (RI) from MSR. Don't use PPC_BITMASK for this because 32-bit uses
+ * it.
+ */
+#define SRR1_MSR_BITS (~0x783f0000UL)
+#endif
+
#define SRR1_ISI_NOPT 0x40000000 /* ISI: Not found in hash */
-#define SRR1_ISI_N_OR_G 0x10000000 /* ISI: Access is no-exec or G */
+#define SRR1_ISI_N_G_OR_CIP 0x10000000 /* ISI: Access is no-exec or G or CI for a prefixed instruction */
#define SRR1_ISI_PROT 0x08000000 /* ISI: Other protection fault */
#define SRR1_WAKEMASK 0x00380000 /* reason for wakeup */
+#define SRR1_WAKEMASK_P8 0x003c0000 /* reason for wakeup on POWER8 and 9 */
+#define SRR1_WAKEMCE_RESVD 0x003c0000 /* Unused/reserved value used by MCE wakeup to indicate cause to idle wakeup handler */
#define SRR1_WAKESYSERR 0x00300000 /* System error */
#define SRR1_WAKEEE 0x00200000 /* External interrupt */
+#define SRR1_WAKEHVI 0x00240000 /* Hypervisor Virtualization Interrupt (P9) */
#define SRR1_WAKEMT 0x00280000 /* mtctrl */
#define SRR1_WAKEHMI 0x00280000 /* Hypervisor maintenance */
#define SRR1_WAKEDEC 0x00180000 /* Decrementer interrupt */
+#define SRR1_WAKEDBELL 0x00140000 /* Privileged doorbell on P8 */
#define SRR1_WAKETHERM 0x00100000 /* Thermal management interrupt */
#define SRR1_WAKERESET 0x00100000 /* System reset */
+#define SRR1_WAKEHDBELL 0x000c0000 /* Hypervisor doorbell on P8 */
#define SRR1_WAKESTATE 0x00030000 /* Powersave exit mask [46:47] */
-#define SRR1_WS_DEEPEST 0x00030000 /* Some resources not maintained,
- * may not be recoverable */
-#define SRR1_WS_DEEPER 0x00020000 /* Some resources not maintained */
-#define SRR1_WS_DEEP 0x00010000 /* All resources maintained */
+#define SRR1_WS_HVLOSS 0x00030000 /* HV resources not maintained */
+#define SRR1_WS_GPRLOSS 0x00020000 /* GPRs not maintained */
+#define SRR1_WS_NOLOSS 0x00010000 /* All resources maintained */
+#define SRR1_PROGTM 0x00200000 /* TM Bad Thing */
#define SRR1_PROGFPE 0x00100000 /* Floating Point Enabled */
#define SRR1_PROGILL 0x00080000 /* Illegal instruction */
#define SRR1_PROGPRIV 0x00040000 /* Privileged instruction */
#define SRR1_PROGTRAP 0x00020000 /* Trap */
#define SRR1_PROGADDR 0x00010000 /* SRR0 contains subsequent addr */
+#define SRR1_MCE_MCP 0x00080000 /* Machine check signal caused interrupt */
+#define SRR1_BOUNDARY 0x10000000 /* Prefixed instruction crosses 64-byte boundary */
+#define SRR1_PREFIXED 0x20000000 /* Exception caused by prefixed instruction */
+
#define SPRN_HSRR0 0x13A /* Save/Restore Register 0 */
#define SPRN_HSRR1 0x13B /* Save/Restore Register 1 */
#define HSRR1_DENORM 0x00100000 /* Denorm exception */
+#define HSRR1_HISI_WRITE 0x00010000 /* HISI bcs couldn't update mem */
#define SPRN_TBCTL 0x35f /* PA6T Timebase control register */
#define TBCTL_FREEZE 0x0000000000000000ull /* Freeze all tbs */
@@ -641,7 +823,7 @@
#define THRM1_TIN (1 << 31)
#define THRM1_TIV (1 << 30)
#define THRM1_THRES(x) ((x&0x7f)<<23)
-#define THRM3_SITV(x) ((x&0x3fff)<<1)
+#define THRM3_SITV(x) ((x & 0x1fff) << 1)
#define THRM1_TID (1<<2)
#define THRM1_TIE (1<<1)
#define THRM1_V (1<<0)
@@ -686,6 +868,7 @@
#define MMCR0_BHRBA 0x00200000UL /* BHRB Access allowed in userspace */
#define MMCR0_EBE 0x00100000UL /* Event based branch enable */
#define MMCR0_PMCC 0x000c0000UL /* PMC control */
+#define MMCR0_PMCCEXT ASM_CONST(0x00000200) /* PMCCEXT control */
#define MMCR0_PMCC_U6 0x00080000UL /* PMC1-6 are R/W by user (PR) */
#define MMCR0_PMC1CE 0x00008000UL /* PMC1 count enable*/
#define MMCR0_PMCjCE ASM_CONST(0x00004000) /* PMCj count enable*/
@@ -701,7 +884,10 @@
#define MMCR0_FCWAIT 0x00000002UL /* freeze counter in WAIT state */
#define MMCR0_FCHV 0x00000001UL /* freeze conditions in hypervisor mode */
#define SPRN_MMCR1 798
-#define SPRN_MMCR2 769
+#define SPRN_MMCR2 785
+#define SPRN_MMCR3 754
+#define SPRN_UMMCR2 769
+#define SPRN_UMMCR3 738
#define SPRN_MMCRA 0x312
#define MMCRA_SDSYNC 0x80000000UL /* SDAR synced with SIAR */
#define MMCRA_SDAR_DCACHE_MISS 0x40000000UL
@@ -711,6 +897,7 @@
#define MMCRA_SLOT 0x07000000UL /* SLOT bits (37-39) */
#define MMCRA_SLOT_SHIFT 24
#define MMCRA_SAMPLE_ENABLE 0x00000001UL /* enable sampling */
+#define MMCRA_BHRB_DISABLE _UL(0x2000000000) // BHRB disable bit for ISA v3.1
#define POWER6_MMCRA_SDSYNC 0x0000080000000000ULL /* SDAR/SIAR synced */
#define POWER6_MMCRA_SIHV 0x0000040000000000ULL
#define POWER6_MMCRA_SIPR 0x0000020000000000ULL
@@ -728,6 +915,7 @@
#define SPRN_BESCR 806 /* Branch event status and control register */
#define BESCR_GE 0x8000000000000000ULL /* Global Enable */
#define SPRN_WORT 895 /* Workload optimization register - thread */
+#define SPRN_WORC 863 /* Workload optimization register - core */
#define SPRN_PMC1 787
#define SPRN_PMC2 788
@@ -737,13 +925,17 @@
#define SPRN_PMC6 792
#define SPRN_PMC7 793
#define SPRN_PMC8 794
-#define SPRN_SIAR 780
-#define SPRN_SDAR 781
#define SPRN_SIER 784
#define SIER_SIPR 0x2000000 /* Sampled MSR_PR */
#define SIER_SIHV 0x1000000 /* Sampled MSR_HV */
#define SIER_SIAR_VALID 0x0400000 /* SIAR contents valid */
#define SIER_SDAR_VALID 0x0200000 /* SDAR contents valid */
+#define SPRN_SIER2 752
+#define SPRN_SIER3 753
+#define SPRN_USIER2 736
+#define SPRN_USIER3 737
+#define SPRN_SIAR 796
+#define SPRN_SDAR 797
#define SPRN_TACR 888
#define SPRN_TCSCR 889
#define SPRN_CSIGR 890
@@ -911,7 +1103,7 @@
* - SPRG9 debug exception scratch
*
* All 32-bit:
- * - SPRG3 current thread_info pointer
+ * - SPRG3 current thread_struct physical addr pointer
* (virtual on BookE, physical on others)
*
* 32-bit classic:
@@ -920,15 +1112,6 @@
* - SPRG2 indicator that we are in RTAS
* - SPRG4 (603 only) pseudo TLB LRU data
*
- * 32-bit 40x:
- * - SPRG0 scratch for exception vectors
- * - SPRG1 scratch for exception vectors
- * - SPRG2 scratch for exception vectors
- * - SPRG4 scratch for exception vectors (not 403)
- * - SPRG5 scratch for exception vectors (not 403)
- * - SPRG6 scratch for exception vectors (not 403)
- * - SPRG7 scratch for exception vectors (not 403)
- *
* 32-bit 440 and FSL BookE:
* - SPRG0 scratch for exception vectors
* - SPRG1 scratch for exception vectors (*)
@@ -947,7 +1130,7 @@
* 32-bit 8xx:
* - SPRG0 scratch for exception vectors
* - SPRG1 scratch for exception vectors
- * - SPRG2 apparently unused but initialized
+ * - SPRG2 scratch for exception vectors
*
*/
#ifdef CONFIG_PPC64
@@ -1016,18 +1199,8 @@
#ifdef CONFIG_PPC_BOOK3S_32
#define SPRN_SPRG_SCRATCH0 SPRN_SPRG0
#define SPRN_SPRG_SCRATCH1 SPRN_SPRG1
-#define SPRN_SPRG_RTAS SPRN_SPRG2
-#define SPRN_SPRG_603_LRU SPRN_SPRG4
-#endif
-
-#ifdef CONFIG_40x
-#define SPRN_SPRG_SCRATCH0 SPRN_SPRG0
-#define SPRN_SPRG_SCRATCH1 SPRN_SPRG1
#define SPRN_SPRG_SCRATCH2 SPRN_SPRG2
-#define SPRN_SPRG_SCRATCH3 SPRN_SPRG4
-#define SPRN_SPRG_SCRATCH4 SPRN_SPRG5
-#define SPRN_SPRG_SCRATCH5 SPRN_SPRG6
-#define SPRN_SPRG_SCRATCH6 SPRN_SPRG7
+#define SPRN_SPRG_603_LRU SPRN_SPRG4
#endif
#ifdef CONFIG_BOOKE
@@ -1045,18 +1218,14 @@
#define SPRN_SPRG_WSCRATCH_MC SPRN_SPRG1
#define SPRN_SPRG_RSCRATCH4 SPRN_SPRG7R
#define SPRN_SPRG_WSCRATCH4 SPRN_SPRG7W
-#ifdef CONFIG_E200
-#define SPRN_SPRG_RSCRATCH_DBG SPRN_SPRG6R
-#define SPRN_SPRG_WSCRATCH_DBG SPRN_SPRG6W
-#else
#define SPRN_SPRG_RSCRATCH_DBG SPRN_SPRG9
#define SPRN_SPRG_WSCRATCH_DBG SPRN_SPRG9
#endif
-#endif
-#ifdef CONFIG_8xx
+#ifdef CONFIG_PPC_8xx
#define SPRN_SPRG_SCRATCH0 SPRN_SPRG0
#define SPRN_SPRG_SCRATCH1 SPRN_SPRG1
+#define SPRN_SPRG_SCRATCH2 SPRN_SPRG2
#endif
@@ -1129,6 +1298,11 @@
#define PVR_VER_E500MC 0x8023
#define PVR_VER_E5500 0x8024
#define PVR_VER_E6500 0x8040
+#define PVR_VER_7450 0x8000
+#define PVR_VER_7455 0x8001
+#define PVR_VER_7447 0x8002
+#define PVR_VER_7447A 0x8003
+#define PVR_VER_7448 0x8004
/*
* For the 8xx processors, all of them report the same PVR family for
@@ -1136,10 +1310,8 @@
* differentiated by the version number in the Communication Processor
* Module (CPM).
*/
-#define PVR_821 0x00500000
-#define PVR_823 PVR_821
-#define PVR_850 PVR_821
-#define PVR_860 PVR_821
+#define PVR_8xx 0x00500000
+
#define PVR_8240 0x00810100
#define PVR_8245 0x80811014
#define PVR_8260 PVR_8240
@@ -1166,7 +1338,12 @@
#define PVR_970GX 0x0045
#define PVR_POWER7p 0x004A
#define PVR_POWER8E 0x004B
+#define PVR_POWER8NVL 0x004C
#define PVR_POWER8 0x004D
+#define PVR_HX_C2000 0x0066
+#define PVR_POWER9 0x004E
+#define PVR_POWER10 0x0080
+#define PVR_POWER11 0x0082
#define PVR_BE 0x0070
#define PVR_PA6T 0x0090
@@ -1176,96 +1353,96 @@
#define PVR_ARCH_206 0x0f000003
#define PVR_ARCH_206p 0x0f100003
#define PVR_ARCH_207 0x0f000004
+#define PVR_ARCH_300 0x0f000005
+#define PVR_ARCH_31 0x0f000006
+#define PVR_ARCH_31_P11 0x0f000007
/* Macros for setting and retrieving special purpose registers */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
+
+#if defined(CONFIG_PPC64) || defined(__CHECKER__)
+typedef struct {
+ u32 val;
+#ifdef CONFIG_PPC64
+ u32 suffix;
+#endif
+} __packed ppc_inst_t;
+#else
+typedef u32 ppc_inst_t;
+#endif
+
#define mfmsr() ({unsigned long rval; \
asm volatile("mfmsr %0" : "=r" (rval) : \
: "memory"); rval;})
#ifdef CONFIG_PPC_BOOK3S_64
#define __mtmsrd(v, l) asm volatile("mtmsrd %0," __stringify(l) \
: : "r" (v) : "memory")
-#define mtmsrd(v) __mtmsrd((v), 0)
-#define mtmsr(v) mtmsrd(v)
+#define mtmsr(v) __mtmsrd((v), 0)
+#define __MTMSR "mtmsrd"
#else
#define mtmsr(v) asm volatile("mtmsr %0" : \
: "r" ((unsigned long)(v)) \
: "memory")
+#define __mtmsrd(v, l) BUILD_BUG()
+#define __MTMSR "mtmsr"
#endif
+static inline void mtmsr_isync(unsigned long val)
+{
+ asm volatile(__MTMSR " %0; " ASM_FTR_IFCLR("isync", "nop", %1) : :
+ "r" (val), "i" (CPU_FTR_ARCH_206) : "memory");
+}
+
#define mfspr(rn) ({unsigned long rval; \
asm volatile("mfspr %0," __stringify(rn) \
: "=r" (rval)); rval;})
#define mtspr(rn, v) asm volatile("mtspr " __stringify(rn) ",%0" : \
: "r" ((unsigned long)(v)) \
: "memory")
+#define wrtspr(rn) asm volatile("mtspr " __stringify(rn) ",2" : : : "memory")
-static inline unsigned long mfvtb (void)
+static inline void wrtee(unsigned long val)
{
-#ifdef CONFIG_PPC_BOOK3S_64
- if (cpu_has_feature(CPU_FTR_ARCH_207S))
- return mfspr(SPRN_VTB);
-#endif
- return 0;
+ if (__builtin_constant_p(val))
+ asm volatile("wrteei %0" : : "i" ((val & MSR_EE) ? 1 : 0) : "memory");
+ else
+ asm volatile("wrtee %0" : : "r" (val) : "memory");
}
-#ifdef __powerpc64__
-#if defined(CONFIG_PPC_CELL) || defined(CONFIG_PPC_FSL_BOOK3E)
-#define mftb() ({unsigned long rval; \
- asm volatile( \
- "90: mfspr %0, %2;\n" \
- "97: cmpwi %0,0;\n" \
- " beq- 90b;\n" \
- "99:\n" \
- ".section __ftr_fixup,\"a\"\n" \
- ".align 3\n" \
- "98:\n" \
- " .llong %1\n" \
- " .llong %1\n" \
- " .llong 97b-98b\n" \
- " .llong 99b-98b\n" \
- " .llong 0\n" \
- " .llong 0\n" \
- ".previous" \
- : "=r" (rval) \
- : "i" (CPU_FTR_CELL_TB_BUG), "i" (SPRN_TBRL)); \
- rval;})
-#else
-#define mftb() ({unsigned long rval; \
- asm volatile("mfspr %0, %1" : \
- "=r" (rval) : "i" (SPRN_TBRL)); rval;})
-#endif /* !CONFIG_PPC_CELL */
-
-#else /* __powerpc64__ */
-
-#if defined(CONFIG_8xx)
-#define mftbl() ({unsigned long rval; \
- asm volatile("mftbl %0" : "=r" (rval)); rval;})
-#define mftbu() ({unsigned long rval; \
- asm volatile("mftbu %0" : "=r" (rval)); rval;})
-#else
-#define mftbl() ({unsigned long rval; \
- asm volatile("mfspr %0, %1" : "=r" (rval) : \
- "i" (SPRN_TBRL)); rval;})
-#define mftbu() ({unsigned long rval; \
- asm volatile("mfspr %0, %1" : "=r" (rval) : \
- "i" (SPRN_TBRU)); rval;})
-#endif
-#endif /* !__powerpc64__ */
-
-#define mttbl(v) asm volatile("mttbl %0":: "r"(v))
-#define mttbu(v) asm volatile("mttbu %0":: "r"(v))
+extern unsigned long msr_check_and_set(unsigned long bits);
+extern bool strict_msr_control;
+extern void __msr_check_and_clear(unsigned long bits);
+static inline void msr_check_and_clear(unsigned long bits)
+{
+ if (strict_msr_control)
+ __msr_check_and_clear(bits);
+}
#ifdef CONFIG_PPC32
-#define mfsrin(v) ({unsigned int rval; \
- asm volatile("mfsrin %0,%1" : "=r" (rval) : "r" (v)); \
- rval;})
+static inline u32 mfsr(u32 idx)
+{
+ u32 val;
+
+ if (__builtin_constant_p(idx))
+ asm volatile("mfsr %0, %1" : "=r" (val): "i" (idx >> 28));
+ else
+ asm volatile("mfsrin %0, %1" : "=r" (val): "r" (idx));
+
+ return val;
+}
+
+static inline void mtsr(u32 val, u32 idx)
+{
+ if (__builtin_constant_p(idx))
+ asm volatile("mtsr %1, %0" : : "r" (val), "i" (idx >> 28));
+ else
+ asm volatile("mtsrin %0, %1" : : "r" (val), "r" (idx));
+}
#endif
-#define proc_trap() asm volatile("trap")
+extern unsigned long current_stack_frame(void);
-#define __get_SP() ({unsigned long sp; \
- asm volatile("mr %0,1": "=r" (sp)); sp;})
+register unsigned long current_stack_pointer asm("r1");
extern unsigned long scom970_read(unsigned int address);
extern void scom970_write(unsigned int address, unsigned long value);
@@ -1273,7 +1450,6 @@ extern void scom970_write(unsigned int address, unsigned long value);
struct pt_regs;
extern void ppc_save_regs(struct pt_regs *regs);
-
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_REG_H */
diff --git a/arch/powerpc/include/asm/reg_8xx.h b/arch/powerpc/include/asm/reg_8xx.h
index e8ea346b21d3..299ee7be0f67 100644
--- a/arch/powerpc/include/asm/reg_8xx.h
+++ b/arch/powerpc/include/asm/reg_8xx.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Contains register definitions common to PowerPC 8xx CPUs. Notice
*/
@@ -14,6 +15,46 @@
#define SPRN_DC_ADR 569 /* Address needed for some commands */
#define SPRN_DC_DAT 570 /* Read-only data register */
+/* Misc Debug */
+#define SPRN_DPDR 630
+#define SPRN_MI_CAM 816
+#define SPRN_MI_RAM0 817
+#define SPRN_MI_RAM1 818
+#define SPRN_MD_CAM 824
+#define SPRN_MD_RAM0 825
+#define SPRN_MD_RAM1 826
+
+/* Special MSR manipulation registers */
+#define SPRN_EIE 80 /* External interrupt enable (EE=1, RI=1) */
+#define SPRN_EID 81 /* External interrupt disable (EE=0, RI=1) */
+#define SPRN_NRI 82 /* Non recoverable interrupt (EE=0, RI=0) */
+
+/* Debug registers */
+#define SPRN_CMPA 144
+#define SPRN_COUNTA 150
+#define SPRN_CMPE 152
+#define SPRN_CMPF 153
+#define SPRN_LCTRL1 156
+#define LCTRL1_CTE_GT 0xc0000000
+#define LCTRL1_CTF_LT 0x14000000
+#define LCTRL1_CRWE_RW 0x00000000
+#define LCTRL1_CRWE_RO 0x00040000
+#define LCTRL1_CRWE_WO 0x000c0000
+#define LCTRL1_CRWF_RW 0x00000000
+#define LCTRL1_CRWF_RO 0x00010000
+#define LCTRL1_CRWF_WO 0x00030000
+#define SPRN_LCTRL2 157
+#define LCTRL2_LW0EN 0x80000000
+#define LCTRL2_LW0LA_E 0x00000000
+#define LCTRL2_LW0LA_F 0x04000000
+#define LCTRL2_LW0LA_EandF 0x08000000
+#define LCTRL2_LW0LADC 0x02000000
+#define LCTRL2_SLW0EN 0x00000002
+#ifdef CONFIG_PPC_8xx
+#define SPRN_ICTRL 158
+#endif
+#define SPRN_BAR 159
+
/* Commands. Only the first few are available to the instruction cache.
*/
#define IDC_ENABLE 0x02000000 /* Cache enable */
diff --git a/arch/powerpc/include/asm/reg_a2.h b/arch/powerpc/include/asm/reg_a2.h
deleted file mode 100644
index 3ba9c6f096fc..000000000000
--- a/arch/powerpc/include/asm/reg_a2.h
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * Register definitions specific to the A2 core
- *
- * Copyright (C) 2008 Ben. Herrenschmidt (benh@kernel.crashing.org), IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef __ASM_POWERPC_REG_A2_H__
-#define __ASM_POWERPC_REG_A2_H__
-
-#define SPRN_TENSR 0x1b5
-#define SPRN_TENS 0x1b6 /* Thread ENable Set */
-#define SPRN_TENC 0x1b7 /* Thread ENable Clear */
-
-#define SPRN_A2_CCR0 0x3f0 /* Core Configuration Register 0 */
-#define SPRN_A2_CCR1 0x3f1 /* Core Configuration Register 1 */
-#define SPRN_A2_CCR2 0x3f2 /* Core Configuration Register 2 */
-#define SPRN_MMUCR0 0x3fc /* MMU Control Register 0 */
-#define SPRN_MMUCR1 0x3fd /* MMU Control Register 1 */
-#define SPRN_MMUCR2 0x3fe /* MMU Control Register 2 */
-#define SPRN_MMUCR3 0x3ff /* MMU Control Register 3 */
-
-#define SPRN_IAR 0x372
-
-#define SPRN_IUCR0 0x3f3
-#define IUCR0_ICBI_ACK 0x1000
-
-#define SPRN_XUCR0 0x3f6 /* Execution Unit Config Register 0 */
-
-#define A2_IERAT_SIZE 16
-#define A2_DERAT_SIZE 32
-
-/* A2 MMUCR0 bits */
-#define MMUCR0_ECL 0x80000000 /* Extended Class for TLB fills */
-#define MMUCR0_TID_NZ 0x40000000 /* TID is non-zero */
-#define MMUCR0_TS 0x10000000 /* Translation space for TLB fills */
-#define MMUCR0_TGS 0x20000000 /* Guest space for TLB fills */
-#define MMUCR0_TLBSEL 0x0c000000 /* TLB or ERAT target for TLB fills */
-#define MMUCR0_TLBSEL_U 0x00000000 /* TLBSEL = UTLB */
-#define MMUCR0_TLBSEL_I 0x08000000 /* TLBSEL = I-ERAT */
-#define MMUCR0_TLBSEL_D 0x0c000000 /* TLBSEL = D-ERAT */
-#define MMUCR0_LOCKSRSH 0x02000000 /* Use TLB lock on tlbsx. */
-#define MMUCR0_TID_MASK 0x000000ff /* TID field */
-
-/* A2 MMUCR1 bits */
-#define MMUCR1_IRRE 0x80000000 /* I-ERAT round robin enable */
-#define MMUCR1_DRRE 0x40000000 /* D-ERAT round robin enable */
-#define MMUCR1_REE 0x20000000 /* Reference Exception Enable*/
-#define MMUCR1_CEE 0x10000000 /* Change exception enable */
-#define MMUCR1_CSINV_ALL 0x00000000 /* Inval ERAT on all CS evts */
-#define MMUCR1_CSINV_NISYNC 0x04000000 /* Inval ERAT on all ex isync*/
-#define MMUCR1_CSINV_NEVER 0x0c000000 /* Don't inval ERAT on CS */
-#define MMUCR1_ICTID 0x00080000 /* IERAT class field as TID */
-#define MMUCR1_ITTID 0x00040000 /* IERAT thdid field as TID */
-#define MMUCR1_DCTID 0x00020000 /* DERAT class field as TID */
-#define MMUCR1_DTTID 0x00010000 /* DERAT thdid field as TID */
-#define MMUCR1_DCCD 0x00008000 /* DERAT class ignore */
-#define MMUCR1_TLBWE_BINV 0x00004000 /* back invalidate on tlbwe */
-
-/* A2 MMUCR2 bits */
-#define MMUCR2_PSSEL_SHIFT 4
-
-/* A2 MMUCR3 bits */
-#define MMUCR3_THID 0x0000000f /* Thread ID */
-
-/* *** ERAT TLB bits definitions */
-#define TLB0_EPN_MASK ASM_CONST(0xfffffffffffff000)
-#define TLB0_CLASS_MASK ASM_CONST(0x0000000000000c00)
-#define TLB0_CLASS_00 ASM_CONST(0x0000000000000000)
-#define TLB0_CLASS_01 ASM_CONST(0x0000000000000400)
-#define TLB0_CLASS_10 ASM_CONST(0x0000000000000800)
-#define TLB0_CLASS_11 ASM_CONST(0x0000000000000c00)
-#define TLB0_V ASM_CONST(0x0000000000000200)
-#define TLB0_X ASM_CONST(0x0000000000000100)
-#define TLB0_SIZE_MASK ASM_CONST(0x00000000000000f0)
-#define TLB0_SIZE_4K ASM_CONST(0x0000000000000010)
-#define TLB0_SIZE_64K ASM_CONST(0x0000000000000030)
-#define TLB0_SIZE_1M ASM_CONST(0x0000000000000050)
-#define TLB0_SIZE_16M ASM_CONST(0x0000000000000070)
-#define TLB0_SIZE_1G ASM_CONST(0x00000000000000a0)
-#define TLB0_THDID_MASK ASM_CONST(0x000000000000000f)
-#define TLB0_THDID_0 ASM_CONST(0x0000000000000001)
-#define TLB0_THDID_1 ASM_CONST(0x0000000000000002)
-#define TLB0_THDID_2 ASM_CONST(0x0000000000000004)
-#define TLB0_THDID_3 ASM_CONST(0x0000000000000008)
-#define TLB0_THDID_ALL ASM_CONST(0x000000000000000f)
-
-#define TLB1_RESVATTR ASM_CONST(0x00f0000000000000)
-#define TLB1_U0 ASM_CONST(0x0008000000000000)
-#define TLB1_U1 ASM_CONST(0x0004000000000000)
-#define TLB1_U2 ASM_CONST(0x0002000000000000)
-#define TLB1_U3 ASM_CONST(0x0001000000000000)
-#define TLB1_R ASM_CONST(0x0000800000000000)
-#define TLB1_C ASM_CONST(0x0000400000000000)
-#define TLB1_RPN_MASK ASM_CONST(0x000003fffffff000)
-#define TLB1_W ASM_CONST(0x0000000000000800)
-#define TLB1_I ASM_CONST(0x0000000000000400)
-#define TLB1_M ASM_CONST(0x0000000000000200)
-#define TLB1_G ASM_CONST(0x0000000000000100)
-#define TLB1_E ASM_CONST(0x0000000000000080)
-#define TLB1_VF ASM_CONST(0x0000000000000040)
-#define TLB1_UX ASM_CONST(0x0000000000000020)
-#define TLB1_SX ASM_CONST(0x0000000000000010)
-#define TLB1_UW ASM_CONST(0x0000000000000008)
-#define TLB1_SW ASM_CONST(0x0000000000000004)
-#define TLB1_UR ASM_CONST(0x0000000000000002)
-#define TLB1_SR ASM_CONST(0x0000000000000001)
-
-/* A2 erativax attributes definitions */
-#define ERATIVAX_RS_IS_ALL 0x000
-#define ERATIVAX_RS_IS_TID 0x040
-#define ERATIVAX_RS_IS_CLASS 0x080
-#define ERATIVAX_RS_IS_FULLMATCH 0x0c0
-#define ERATIVAX_CLASS_00 0x000
-#define ERATIVAX_CLASS_01 0x010
-#define ERATIVAX_CLASS_10 0x020
-#define ERATIVAX_CLASS_11 0x030
-#define ERATIVAX_PSIZE_4K (TLB_PSIZE_4K >> 1)
-#define ERATIVAX_PSIZE_64K (TLB_PSIZE_64K >> 1)
-#define ERATIVAX_PSIZE_1M (TLB_PSIZE_1M >> 1)
-#define ERATIVAX_PSIZE_16M (TLB_PSIZE_16M >> 1)
-#define ERATIVAX_PSIZE_1G (TLB_PSIZE_1G >> 1)
-
-/* A2 eratilx attributes definitions */
-#define ERATILX_T_ALL 0
-#define ERATILX_T_TID 1
-#define ERATILX_T_TGS 2
-#define ERATILX_T_FULLMATCH 3
-#define ERATILX_T_CLASS0 4
-#define ERATILX_T_CLASS1 5
-#define ERATILX_T_CLASS2 6
-#define ERATILX_T_CLASS3 7
-
-/* XUCR0 bits */
-#define XUCR0_TRACE_UM_T0 0x40000000 /* Thread 0 */
-#define XUCR0_TRACE_UM_T1 0x20000000 /* Thread 1 */
-#define XUCR0_TRACE_UM_T2 0x10000000 /* Thread 2 */
-#define XUCR0_TRACE_UM_T3 0x08000000 /* Thread 3 */
-
-/* A2 CCR0 register */
-#define A2_CCR0_PME_DISABLED 0x00000000
-#define A2_CCR0_PME_SLEEP 0x40000000
-#define A2_CCR0_PME_RVW 0x80000000
-#define A2_CCR0_PME_DISABLED2 0xc0000000
-
-/* A2 CCR2 register */
-#define A2_CCR2_ERAT_ONLY_MODE 0x00000001
-#define A2_CCR2_ENABLE_ICSWX 0x00000002
-#define A2_CCR2_ENABLE_PC 0x20000000
-#define A2_CCR2_ENABLE_TRACE 0x40000000
-
-#endif /* __ASM_POWERPC_REG_A2_H__ */
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
index 1d653308a33c..56f9d3b1de85 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -1,13 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Contains register definitions common to the Book E PowerPC
- * specification. Notice that while the IBM-40x series of CPUs
- * are not true Book E PowerPCs, they borrowed a number of features
- * before Book E was finalized, and are included here as well. Unfortunately,
- * they sometimes used different locations than true Book E CPUs did.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
+ * specification.
*
* Copyright 2009-2010 Freescale Semiconductor, Inc.
*/
@@ -41,13 +35,10 @@
#if defined(CONFIG_PPC_BOOK3E_64)
#define MSR_64BIT MSR_CM
-#define MSR_ (MSR_ME | MSR_CE)
+#define MSR_ (MSR_ME | MSR_RI | MSR_CE)
#define MSR_KERNEL (MSR_ | MSR_64BIT)
#define MSR_USER32 (MSR_ | MSR_PR | MSR_EE)
#define MSR_USER64 (MSR_USER32 | MSR_64BIT)
-#elif defined (CONFIG_40x)
-#define MSR_KERNEL (MSR_ME|MSR_RI|MSR_IR|MSR_DR|MSR_CE)
-#define MSR_USER (MSR_KERNEL|MSR_PR|MSR_EE)
#else
#define MSR_KERNEL (MSR_ME|MSR_RI|MSR_CE)
#define MSR_USER (MSR_KERNEL|MSR_PR|MSR_EE)
@@ -160,7 +151,6 @@
#define SPRN_TLB3CFG 0x2B3 /* TLB 3 Config Register */
#define SPRN_EPR 0x2BE /* External Proxy Register */
#define SPRN_CCR1 0x378 /* Core Configuration Register 1 */
-#define SPRN_ZPR 0x3B0 /* Zone Protection Register (40x) */
#define SPRN_MAS7 0x3B0 /* MMU Assist Register 7 */
#define SPRN_MMUCR 0x3B2 /* MMU Control Register */
#define SPRN_CCR0 0x3B3 /* Core Configuration Register 0 */
@@ -169,7 +159,6 @@
#define SPRN_SGR 0x3B9 /* Storage Guarded Register */
#define SPRN_DCWR 0x3BA /* Data Cache Write-thru Register */
#define SPRN_SLER 0x3BB /* Little-endian real mode */
-#define SPRN_SU0R 0x3BC /* "User 0" real mode (40x) */
#define SPRN_DCMP 0x3D1 /* Data TLB Compare Register */
#define SPRN_ICDBDR 0x3D3 /* Instruction Cache Debug Data Register */
#define SPRN_EVPR 0x3D6 /* Exception Vector Prefix Register */
@@ -177,7 +166,6 @@
#define SPRN_L1CSR1 0x3F3 /* L1 Cache Control and Status Register 1 */
#define SPRN_MMUCSR0 0x3F4 /* MMU Control and Status Register 0 */
#define SPRN_MMUCFG 0x3F7 /* MMU Configuration Register */
-#define SPRN_PIT 0x3DB /* Programmable Interval Timer */
#define SPRN_BUCSR 0x3F5 /* Branch Unit Control and Status */
#define SPRN_L2CSR0 0x3F9 /* L2 Data Cache Control and Status Register 0 */
#define SPRN_L2CSR1 0x3FA /* L2 Data Cache Control and Status Register 1 */
@@ -187,10 +175,8 @@
#define SPRN_SVR 0x3FF /* System Version Register */
/*
- * SPRs which have conflicting definitions on true Book E versus classic,
- * or IBM 40x.
+ * SPRs which have conflicting definitions on true Book E versus classic.
*/
-#ifdef CONFIG_BOOKE
#define SPRN_CSRR0 0x03A /* Critical Save and Restore Register 0 */
#define SPRN_CSRR1 0x03B /* Critical Save and Restore Register 1 */
#define SPRN_DEAR 0x03D /* Data Error Address Register */
@@ -205,26 +191,7 @@
#define SPRN_DAC2 0x13D /* Data Address Compare 2 */
#define SPRN_TSR 0x150 /* Timer Status Register */
#define SPRN_TCR 0x154 /* Timer Control Register */
-#endif /* Book E */
-#ifdef CONFIG_40x
-#define SPRN_DBCR1 0x3BD /* Debug Control Register 1 */
-#define SPRN_ESR 0x3D4 /* Exception Syndrome Register */
-#define SPRN_DEAR 0x3D5 /* Data Error Address Register */
-#define SPRN_TSR 0x3D8 /* Timer Status Register */
-#define SPRN_TCR 0x3DA /* Timer Control Register */
-#define SPRN_SRR2 0x3DE /* Save/Restore Register 2 */
-#define SPRN_SRR3 0x3DF /* Save/Restore Register 3 */
-#define SPRN_DBSR 0x3F0 /* Debug Status Register */
-#define SPRN_DBCR0 0x3F2 /* Debug Control Register 0 */
-#define SPRN_DAC1 0x3F6 /* Data Address Compare 1 */
-#define SPRN_DAC2 0x3F7 /* Data Address Compare 2 */
-#define SPRN_CSRR0 SPRN_SRR2 /* Critical Save and Restore Register 0 */
-#define SPRN_CSRR1 SPRN_SRR3 /* Critical Save and Restore Register 1 */
-#endif
-
-#ifdef CONFIG_PPC_ICSWX
#define SPRN_HACOP 0x15F /* Hypervisor Available Coprocessor Register */
-#endif
/* Bit definitions for CCR1. */
#define CCR1_DPC 0x00000100 /* Disable L1 I-Cache/D-Cache parity checking */
@@ -253,7 +220,7 @@
#define PPC47x_MCSR_FPR 0x00800000 /* FPR parity error */
#define PPC47x_MCSR_IPR 0x00400000 /* Imprecise Machine Check Exception */
-#ifdef CONFIG_E500
+#ifdef CONFIG_PPC_E500
/* All e500 */
#define MCSR_MCP 0x80000000UL /* Machine Check Input Pin */
#define MCSR_ICPERR 0x40000000UL /* I-Cache Parity Error */
@@ -288,20 +255,8 @@
#define MSRP_PMMP 0x00000004 /* Protect MSR[PMM] */
#endif
-#ifdef CONFIG_E200
-#define MCSR_MCP 0x80000000UL /* Machine Check Input Pin */
-#define MCSR_CP_PERR 0x20000000UL /* Cache Push Parity Error */
-#define MCSR_CPERR 0x10000000UL /* Cache Parity Error */
-#define MCSR_EXCP_ERR 0x08000000UL /* ISI, ITLB, or Bus Error on 1st insn
- fetch for an exception handler */
-#define MCSR_BUS_IRERR 0x00000010UL /* Read Bus Error on instruction fetch*/
-#define MCSR_BUS_DRERR 0x00000008UL /* Read Bus Error on data load */
-#define MCSR_BUS_WRERR 0x00000004UL /* Write Bus Error on buffered
- store or cache line push */
-#endif
-
/* Bit definitions for the HID1 */
-#ifdef CONFIG_E500
+#ifdef CONFIG_PPC_E500
/* e500v1/v2 */
#define HID1_PLL_CFG_MASK 0xfc000000 /* PLL_CFG input pins */
#define HID1_RFXE 0x00020000 /* Read fault exception enable */
@@ -315,10 +270,8 @@
#endif
/* Bit definitions for the DBSR. */
-/*
- * DBSR bits which have conflicting definitions on true Book E versus IBM 40x.
- */
-#ifdef CONFIG_BOOKE
+#define DBSR_IDE 0x80000000 /* Imprecise Debug Event */
+#define DBSR_MRR 0x30000000 /* Most Recent Reset */
#define DBSR_IC 0x08000000 /* Instruction Completion */
#define DBSR_BT 0x04000000 /* Branch Taken */
#define DBSR_IRPT 0x02000000 /* Exception Debug Event */
@@ -336,21 +289,6 @@
#define DBSR_CRET 0x00000020 /* Critical Return Debug Event */
#define DBSR_IAC12ATS 0x00000002 /* Instr Address Compare 1/2 Toggle */
#define DBSR_IAC34ATS 0x00000001 /* Instr Address Compare 3/4 Toggle */
-#endif
-#ifdef CONFIG_40x
-#define DBSR_IC 0x80000000 /* Instruction Completion */
-#define DBSR_BT 0x40000000 /* Branch taken */
-#define DBSR_IRPT 0x20000000 /* Exception Debug Event */
-#define DBSR_TIE 0x10000000 /* Trap Instruction debug Event */
-#define DBSR_IAC1 0x04000000 /* Instruction Address Compare 1 Event */
-#define DBSR_IAC2 0x02000000 /* Instruction Address Compare 2 Event */
-#define DBSR_IAC3 0x00080000 /* Instruction Address Compare 3 Event */
-#define DBSR_IAC4 0x00040000 /* Instruction Address Compare 4 Event */
-#define DBSR_DAC1R 0x01000000 /* Data Address Compare 1 Read Event */
-#define DBSR_DAC1W 0x00800000 /* Data Address Compare 1 Write Event */
-#define DBSR_DAC2R 0x00400000 /* Data Address Compare 2 Read Event */
-#define DBSR_DAC2W 0x00200000 /* Data Address Compare 2 Write Event */
-#endif
/* Bit definitions related to the ESR. */
#define ESR_MCI 0x80000000 /* Machine Check - Instruction */
@@ -372,69 +310,6 @@
#define ESR_SPV 0x00000080 /* Signal Processing operation */
/* Bit definitions related to the DBCR0. */
-#if defined(CONFIG_40x)
-#define DBCR0_EDM 0x80000000 /* External Debug Mode */
-#define DBCR0_IDM 0x40000000 /* Internal Debug Mode */
-#define DBCR0_RST 0x30000000 /* all the bits in the RST field */
-#define DBCR0_RST_SYSTEM 0x30000000 /* System Reset */
-#define DBCR0_RST_CHIP 0x20000000 /* Chip Reset */
-#define DBCR0_RST_CORE 0x10000000 /* Core Reset */
-#define DBCR0_RST_NONE 0x00000000 /* No Reset */
-#define DBCR0_IC 0x08000000 /* Instruction Completion */
-#define DBCR0_ICMP DBCR0_IC
-#define DBCR0_BT 0x04000000 /* Branch Taken */
-#define DBCR0_BRT DBCR0_BT
-#define DBCR0_EDE 0x02000000 /* Exception Debug Event */
-#define DBCR0_IRPT DBCR0_EDE
-#define DBCR0_TDE 0x01000000 /* TRAP Debug Event */
-#define DBCR0_IA1 0x00800000 /* Instr Addr compare 1 enable */
-#define DBCR0_IAC1 DBCR0_IA1
-#define DBCR0_IA2 0x00400000 /* Instr Addr compare 2 enable */
-#define DBCR0_IAC2 DBCR0_IA2
-#define DBCR0_IA12 0x00200000 /* Instr Addr 1-2 range enable */
-#define DBCR0_IA12X 0x00100000 /* Instr Addr 1-2 range eXclusive */
-#define DBCR0_IA3 0x00080000 /* Instr Addr compare 3 enable */
-#define DBCR0_IAC3 DBCR0_IA3
-#define DBCR0_IA4 0x00040000 /* Instr Addr compare 4 enable */
-#define DBCR0_IAC4 DBCR0_IA4
-#define DBCR0_IA34 0x00020000 /* Instr Addr 3-4 range Enable */
-#define DBCR0_IA34X 0x00010000 /* Instr Addr 3-4 range eXclusive */
-#define DBCR0_IA12T 0x00008000 /* Instr Addr 1-2 range Toggle */
-#define DBCR0_IA34T 0x00004000 /* Instr Addr 3-4 range Toggle */
-#define DBCR0_FT 0x00000001 /* Freeze Timers on debug event */
-
-#define dbcr_iac_range(task) ((task)->thread.debug.dbcr0)
-#define DBCR_IAC12I DBCR0_IA12 /* Range Inclusive */
-#define DBCR_IAC12X (DBCR0_IA12 | DBCR0_IA12X) /* Range Exclusive */
-#define DBCR_IAC12MODE (DBCR0_IA12 | DBCR0_IA12X) /* IAC 1-2 Mode Bits */
-#define DBCR_IAC34I DBCR0_IA34 /* Range Inclusive */
-#define DBCR_IAC34X (DBCR0_IA34 | DBCR0_IA34X) /* Range Exclusive */
-#define DBCR_IAC34MODE (DBCR0_IA34 | DBCR0_IA34X) /* IAC 3-4 Mode Bits */
-
-/* Bit definitions related to the DBCR1. */
-#define DBCR1_DAC1R 0x80000000 /* DAC1 Read Debug Event */
-#define DBCR1_DAC2R 0x40000000 /* DAC2 Read Debug Event */
-#define DBCR1_DAC1W 0x20000000 /* DAC1 Write Debug Event */
-#define DBCR1_DAC2W 0x10000000 /* DAC2 Write Debug Event */
-
-#define dbcr_dac(task) ((task)->thread.debug.dbcr1)
-#define DBCR_DAC1R DBCR1_DAC1R
-#define DBCR_DAC1W DBCR1_DAC1W
-#define DBCR_DAC2R DBCR1_DAC2R
-#define DBCR_DAC2W DBCR1_DAC2W
-
-/*
- * Are there any active Debug Events represented in the
- * Debug Control Registers?
- */
-#define DBCR0_ACTIVE_EVENTS (DBCR0_ICMP | DBCR0_IAC1 | DBCR0_IAC2 | \
- DBCR0_IAC3 | DBCR0_IAC4)
-#define DBCR1_ACTIVE_EVENTS (DBCR1_DAC1R | DBCR1_DAC2R | \
- DBCR1_DAC1W | DBCR1_DAC2W)
-#define DBCR_ACTIVE_EVENTS(dbcr0, dbcr1) (((dbcr0) & DBCR0_ACTIVE_EVENTS) || \
- ((dbcr1) & DBCR1_ACTIVE_EVENTS))
-
-#elif defined(CONFIG_BOOKE)
#define DBCR0_EDM 0x80000000 /* External Debug Mode */
#define DBCR0_IDM 0x40000000 /* Internal Debug Mode */
#define DBCR0_RST 0x30000000 /* all the bits in the RST field */
@@ -535,7 +410,6 @@
#define DBCR_ACTIVE_EVENTS(dbcr0, dbcr1) (((dbcr0) & DBCR0_ACTIVE_EVENTS) || \
((dbcr1) & DBCR1_ACTIVE_EVENTS))
-#endif /* #elif defined(CONFIG_BOOKE) */
/* Bit definitions related to the TCR. */
#define TCR_WP(x) (((x)&0x3)<<30) /* WDT Period */
@@ -562,7 +436,7 @@
#define TCR_FIE 0x00800000 /* FIT Interrupt Enable */
#define TCR_ARE 0x00400000 /* Auto Reload Enable */
-#ifdef CONFIG_E500
+#ifdef CONFIG_PPC_E500
#define TCR_GET_WP(tcr) ((((tcr) & 0xC0000000) >> 30) | \
(((tcr) & 0x1E0000) >> 15))
#else
@@ -667,60 +541,6 @@
#define EPC_EPID 0x00003fff
#define EPC_EPID_SHIFT 0
-/*
- * The IBM-403 is an even more odd special case, as it is much
- * older than the IBM-405 series. We put these down here incase someone
- * wishes to support these machines again.
- */
-#ifdef CONFIG_403GCX
-/* Special Purpose Registers (SPRNs)*/
-#define SPRN_TBHU 0x3CC /* Time Base High User-mode */
-#define SPRN_TBLU 0x3CD /* Time Base Low User-mode */
-#define SPRN_CDBCR 0x3D7 /* Cache Debug Control Register */
-#define SPRN_TBHI 0x3DC /* Time Base High */
-#define SPRN_TBLO 0x3DD /* Time Base Low */
-#define SPRN_DBCR 0x3F2 /* Debug Control Regsiter */
-#define SPRN_PBL1 0x3FC /* Protection Bound Lower 1 */
-#define SPRN_PBL2 0x3FE /* Protection Bound Lower 2 */
-#define SPRN_PBU1 0x3FD /* Protection Bound Upper 1 */
-#define SPRN_PBU2 0x3FF /* Protection Bound Upper 2 */
-
-
-/* Bit definitions for the DBCR. */
-#define DBCR_EDM DBCR0_EDM
-#define DBCR_IDM DBCR0_IDM
-#define DBCR_RST(x) (((x) & 0x3) << 28)
-#define DBCR_RST_NONE 0
-#define DBCR_RST_CORE 1
-#define DBCR_RST_CHIP 2
-#define DBCR_RST_SYSTEM 3
-#define DBCR_IC DBCR0_IC /* Instruction Completion Debug Evnt */
-#define DBCR_BT DBCR0_BT /* Branch Taken Debug Event */
-#define DBCR_EDE DBCR0_EDE /* Exception Debug Event */
-#define DBCR_TDE DBCR0_TDE /* TRAP Debug Event */
-#define DBCR_FER 0x00F80000 /* First Events Remaining Mask */
-#define DBCR_FT 0x00040000 /* Freeze Timers on Debug Event */
-#define DBCR_IA1 0x00020000 /* Instr. Addr. Compare 1 Enable */
-#define DBCR_IA2 0x00010000 /* Instr. Addr. Compare 2 Enable */
-#define DBCR_D1R 0x00008000 /* Data Addr. Compare 1 Read Enable */
-#define DBCR_D1W 0x00004000 /* Data Addr. Compare 1 Write Enable */
-#define DBCR_D1S(x) (((x) & 0x3) << 12) /* Data Adrr. Compare 1 Size */
-#define DAC_BYTE 0
-#define DAC_HALF 1
-#define DAC_WORD 2
-#define DAC_QUAD 3
-#define DBCR_D2R 0x00000800 /* Data Addr. Compare 2 Read Enable */
-#define DBCR_D2W 0x00000400 /* Data Addr. Compare 2 Write Enable */
-#define DBCR_D2S(x) (((x) & 0x3) << 8) /* Data Addr. Compare 2 Size */
-#define DBCR_SBT 0x00000040 /* Second Branch Taken Debug Event */
-#define DBCR_SED 0x00000020 /* Second Exception Debug Event */
-#define DBCR_STD 0x00000010 /* Second Trap Debug Event */
-#define DBCR_SIA 0x00000008 /* Second IAC Enable */
-#define DBCR_SDA 0x00000004 /* Second DAC Enable */
-#define DBCR_JOI 0x00000002 /* JTAG Serial Outbound Int. Enable */
-#define DBCR_JII 0x00000001 /* JTAG Serial Inbound Int. Enable */
-#endif /* 403GCX */
-
/* Some 476 specific registers */
#define SPRN_SSPCR 830
#define SPRN_USPCR 831
@@ -740,6 +560,12 @@
#define MMUBE1_VBE4 0x00000002
#define MMUBE1_VBE5 0x00000001
+#define TMRN_TMCFG0 16 /* Thread Management Configuration Register 0 */
+#define TMRN_TMCFG0_NPRIBITS 0x003f0000 /* Bits of thread priority */
+#define TMRN_TMCFG0_NPRIBITS_SHIFT 16
+#define TMRN_TMCFG0_NATHRD 0x00003f00 /* Number of active threads */
+#define TMRN_TMCFG0_NATHRD_SHIFT 8
+#define TMRN_TMCFG0_NTHRD 0x0000003f /* Number of threads */
#define TMRN_IMSR0 0x120 /* Initial MSR Register 0 (e6500) */
#define TMRN_IMSR1 0x121 /* Initial MSR Register 1 (e6500) */
#define TMRN_INIA0 0x140 /* Next Instruction Address Register 0 */
@@ -750,13 +576,16 @@
#define TEN_THREAD(x) (1 << (x))
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#define mftmr(rn) ({unsigned long rval; \
asm volatile(MFTMR(rn, %0) : "=r" (rval)); rval;})
#define mttmr(rn, v) asm volatile(MTTMR(rn, %0) : \
: "r" ((unsigned long)(v)) \
: "memory")
-#endif /* !__ASSEMBLY__ */
+
+extern unsigned long global_dbcr0[];
+
+#endif /* !__ASSEMBLER__ */
#endif /* __ASM_POWERPC_REG_BOOKE_H__ */
#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/reg_fsl_emb.h b/arch/powerpc/include/asm/reg_fsl_emb.h
index 0e3ddf5177f6..ec459c3d9498 100644
--- a/arch/powerpc/include/asm/reg_fsl_emb.h
+++ b/arch/powerpc/include/asm/reg_fsl_emb.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Contains register definitions for the Freescale Embedded Performance
* Monitor.
@@ -6,13 +7,32 @@
#ifndef __ASM_POWERPC_REG_FSL_EMB_H__
#define __ASM_POWERPC_REG_FSL_EMB_H__
-#ifndef __ASSEMBLY__
+#include <linux/stringify.h>
+
+#ifndef __ASSEMBLER__
/* Performance Monitor Registers */
-#define mfpmr(rn) ({unsigned int rval; \
- asm volatile("mfpmr %0," __stringify(rn) \
- : "=r" (rval)); rval;})
-#define mtpmr(rn, v) asm volatile("mtpmr " __stringify(rn) ",%0" : : "r" (v))
-#endif /* __ASSEMBLY__ */
+static __always_inline unsigned int mfpmr(unsigned int rn)
+{
+ unsigned int rval;
+
+ asm (".machine push; "
+ ".machine e300; "
+ "mfpmr %[rval], %[rn];"
+ ".machine pop;"
+ : [rval] "=r" (rval) : [rn] "i" (rn));
+
+ return rval;
+}
+
+static __always_inline void mtpmr(unsigned int rn, unsigned int val)
+{
+ asm (".machine push; "
+ ".machine e300; "
+ "mtpmr %[rn], %[val];"
+ ".machine pop;"
+ : [val] "=r" (val) : [rn] "i" (rn));
+}
+#endif /* __ASSEMBLER__ */
/* Freescale Book E Performance Monitor APU Registers */
#define PMRN_PMC0 0x010 /* Performance Monitor Counter 0 */
diff --git a/arch/powerpc/include/asm/rheap.h b/arch/powerpc/include/asm/rheap.h
index 172381769cfc..8e83703d6736 100644
--- a/arch/powerpc/include/asm/rheap.h
+++ b/arch/powerpc/include/asm/rheap.h
@@ -83,6 +83,9 @@ extern int rh_get_stats(rh_info_t * info, int what, int max_stats,
/* Simple dump of remote heap info */
extern void rh_dump(rh_info_t * info);
+/* Simple dump of remote info block */
+void rh_dump_blk(rh_info_t *info, rh_block_t *blk);
+
/* Set owner of taken block */
extern int rh_set_owner(rh_info_t * info, unsigned long start, const char *owner);
diff --git a/arch/powerpc/include/asm/rio.h b/arch/powerpc/include/asm/rio.h
index b1d2deceeedb..0e57cda2a64c 100644
--- a/arch/powerpc/include/asm/rio.h
+++ b/arch/powerpc/include/asm/rio.h
@@ -1,19 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* RapidIO architecture support
*
* Copyright 2005 MontaVista Software, Inc.
* Matt Porter <mporter@kernel.crashing.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#ifndef ASM_PPC_RIO_H
#define ASM_PPC_RIO_H
-extern void platform_rio_init(void);
#ifdef CONFIG_FSL_RIO
extern int fsl_rio_mcheck_exception(struct pt_regs *);
#else
diff --git a/arch/powerpc/include/asm/rtas-types.h b/arch/powerpc/include/asm/rtas-types.h
new file mode 100644
index 000000000000..9d5b16803cbb
--- /dev/null
+++ b/arch/powerpc/include/asm/rtas-types.h
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_RTAS_TYPES_H
+#define _ASM_POWERPC_RTAS_TYPES_H
+
+#include <linux/compiler_attributes.h>
+
+typedef __be32 rtas_arg_t;
+
+struct rtas_args {
+ __be32 token;
+ __be32 nargs;
+ __be32 nret;
+ rtas_arg_t args[16];
+ rtas_arg_t *rets; /* Pointer to return values in args[]. */
+} __aligned(8);
+
+struct rtas_t {
+ unsigned long entry; /* physical address pointer */
+ unsigned long base; /* physical address pointer */
+ unsigned long size;
+ struct device_node *dev; /* virtual address pointer */
+};
+
+struct rtas_error_log {
+ /* Byte 0 */
+ u8 byte0; /* Architectural version */
+
+ /* Byte 1 */
+ u8 byte1;
+ /* XXXXXXXX
+ * XXX 3: Severity level of error
+ * XX 2: Degree of recovery
+ * X 1: Extended log present?
+ * XX 2: Reserved
+ */
+
+ /* Byte 2 */
+ u8 byte2;
+ /* XXXXXXXX
+ * XXXX 4: Initiator of event
+ * XXXX 4: Target of failed operation
+ */
+ u8 byte3; /* General event or error*/
+ __be32 extended_log_length; /* length in bytes */
+ unsigned char buffer[1]; /* Start of extended log */
+ /* Variable length. */
+};
+
+/* RTAS general extended event log, Version 6. The extended log starts
+ * from "buffer" field of struct rtas_error_log defined above.
+ */
+struct rtas_ext_event_log_v6 {
+ /* Byte 0 */
+ u8 byte0;
+ /* XXXXXXXX
+ * X 1: Log valid
+ * X 1: Unrecoverable error
+ * X 1: Recoverable (correctable or successfully retried)
+ * X 1: Bypassed unrecoverable error (degraded operation)
+ * X 1: Predictive error
+ * X 1: "New" log (always 1 for data returned from RTAS)
+ * X 1: Big Endian
+ * X 1: Reserved
+ */
+
+ /* Byte 1 */
+ u8 byte1; /* reserved */
+
+ /* Byte 2 */
+ u8 byte2;
+ /* XXXXXXXX
+ * X 1: Set to 1 (indicating log is in PowerPC format)
+ * XXX 3: Reserved
+ * XXXX 4: Log format used for bytes 12-2047
+ */
+
+ /* Byte 3 */
+ u8 byte3; /* reserved */
+ /* Byte 4-11 */
+ u8 reserved[8]; /* reserved */
+ /* Byte 12-15 */
+ __be32 company_id; /* Company ID of the company */
+ /* that defines the format for */
+ /* the vendor specific log type */
+ /* Byte 16-end of log */
+ u8 vendor_log[1]; /* Start of vendor specific log */
+ /* Variable length. */
+};
+
+/* Vendor specific Platform Event Log Format, Version 6, section header */
+struct pseries_errorlog {
+ __be16 id; /* 0x00 2-byte ASCII section ID */
+ __be16 length; /* 0x02 Section length in bytes */
+ u8 version; /* 0x04 Section version */
+ u8 subtype; /* 0x05 Section subtype */
+ __be16 creator_component; /* 0x06 Creator component ID */
+ u8 data[]; /* 0x08 Start of section data */
+};
+
+/* RTAS pseries hotplug errorlog section */
+struct pseries_hp_errorlog {
+ u8 resource;
+ u8 action;
+ u8 id_type;
+ u8 reserved;
+ union {
+ __be32 drc_index;
+ __be32 drc_count;
+ struct { __be32 count, index; } ic;
+ char drc_name[1];
+ } _drc_u;
+};
+
+#endif /* _ASM_POWERPC_RTAS_TYPES_H */
diff --git a/arch/powerpc/include/asm/rtas-work-area.h b/arch/powerpc/include/asm/rtas-work-area.h
new file mode 100644
index 000000000000..251a395dbd2e
--- /dev/null
+++ b/arch/powerpc/include/asm/rtas-work-area.h
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _ASM_POWERPC_RTAS_WORK_AREA_H
+#define _ASM_POWERPC_RTAS_WORK_AREA_H
+
+#include <linux/build_bug.h>
+#include <linux/sizes.h>
+#include <linux/types.h>
+
+#include <asm/page.h>
+
+/**
+ * struct rtas_work_area - RTAS work area descriptor.
+ *
+ * Descriptor for a "work area" in PAPR terminology that satisfies
+ * RTAS addressing requirements.
+ */
+struct rtas_work_area {
+ /* private: Use the APIs provided below. */
+ char *buf;
+ size_t size;
+};
+
+enum {
+ /* Maximum allocation size, enforced at build time. */
+ RTAS_WORK_AREA_MAX_ALLOC_SZ = SZ_128K,
+};
+
+/**
+ * rtas_work_area_alloc() - Acquire a work area of the requested size.
+ * @size_: Allocation size. Must be compile-time constant and not more
+ * than %RTAS_WORK_AREA_MAX_ALLOC_SZ.
+ *
+ * Allocate a buffer suitable for passing to RTAS functions that have
+ * a memory address parameter, often (but not always) referred to as a
+ * "work area" in PAPR. Although callers are allowed to block while
+ * holding a work area, the amount of memory reserved for this purpose
+ * is limited, and allocations should be short-lived. A good guideline
+ * is to release any allocated work area before returning from a
+ * system call.
+ *
+ * This function does not fail. It blocks until the allocation
+ * succeeds. To prevent deadlocks, callers are discouraged from
+ * allocating more than one work area simultaneously in a single task
+ * context.
+ *
+ * Context: This function may sleep.
+ * Return: A &struct rtas_work_area descriptor for the allocated work area.
+ */
+#define rtas_work_area_alloc(size_) ({ \
+ static_assert(__builtin_constant_p(size_)); \
+ static_assert((size_) > 0); \
+ static_assert((size_) <= RTAS_WORK_AREA_MAX_ALLOC_SZ); \
+ __rtas_work_area_alloc(size_); \
+})
+
+/*
+ * Do not call __rtas_work_area_alloc() directly. Use
+ * rtas_work_area_alloc().
+ */
+struct rtas_work_area *__rtas_work_area_alloc(size_t size);
+
+/**
+ * rtas_work_area_free() - Release a work area.
+ * @area: Work area descriptor as returned from rtas_work_area_alloc().
+ *
+ * Return a work area buffer to the pool.
+ */
+void rtas_work_area_free(struct rtas_work_area *area);
+
+static inline char *rtas_work_area_raw_buf(const struct rtas_work_area *area)
+{
+ return area->buf;
+}
+
+static inline size_t rtas_work_area_size(const struct rtas_work_area *area)
+{
+ return area->size;
+}
+
+static inline phys_addr_t rtas_work_area_phys(const struct rtas_work_area *area)
+{
+ return __pa(area->buf);
+}
+
+/*
+ * Early setup for the work area allocator. Call from
+ * rtas_initialize() only.
+ */
+
+#ifdef CONFIG_PPC_PSERIES
+void rtas_work_area_reserve_arena(phys_addr_t limit);
+#else /* CONFIG_PPC_PSERIES */
+static inline void rtas_work_area_reserve_arena(phys_addr_t limit) {}
+#endif /* CONFIG_PPC_PSERIES */
+
+#endif /* _ASM_POWERPC_RTAS_WORK_AREA_H */
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index b390f55b0df1..d046bbd5017d 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -1,81 +1,244 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _POWERPC_RTAS_H
#define _POWERPC_RTAS_H
#ifdef __KERNEL__
+#include <linux/mutex.h>
#include <linux/spinlock.h>
#include <asm/page.h>
+#include <asm/rtas-types.h>
+#include <linux/time.h>
+#include <linux/cpumask.h>
/*
* Definitions for talking to the RTAS on CHRP machines.
*
* Copyright (C) 2001 Peter Bergner
* Copyright (C) 2001 PPC 64 Team, IBM Corp
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
+enum rtas_function_index {
+ RTAS_FNIDX__CHECK_EXCEPTION,
+ RTAS_FNIDX__DISPLAY_CHARACTER,
+ RTAS_FNIDX__EVENT_SCAN,
+ RTAS_FNIDX__FREEZE_TIME_BASE,
+ RTAS_FNIDX__GET_POWER_LEVEL,
+ RTAS_FNIDX__GET_SENSOR_STATE,
+ RTAS_FNIDX__GET_TERM_CHAR,
+ RTAS_FNIDX__GET_TIME_OF_DAY,
+ RTAS_FNIDX__IBM_ACTIVATE_FIRMWARE,
+ RTAS_FNIDX__IBM_CBE_START_PTCAL,
+ RTAS_FNIDX__IBM_CBE_STOP_PTCAL,
+ RTAS_FNIDX__IBM_CHANGE_MSI,
+ RTAS_FNIDX__IBM_CLOSE_ERRINJCT,
+ RTAS_FNIDX__IBM_CONFIGURE_BRIDGE,
+ RTAS_FNIDX__IBM_CONFIGURE_CONNECTOR,
+ RTAS_FNIDX__IBM_CONFIGURE_KERNEL_DUMP,
+ RTAS_FNIDX__IBM_CONFIGURE_PE,
+ RTAS_FNIDX__IBM_CREATE_PE_DMA_WINDOW,
+ RTAS_FNIDX__IBM_DISPLAY_MESSAGE,
+ RTAS_FNIDX__IBM_ERRINJCT,
+ RTAS_FNIDX__IBM_EXTI2C,
+ RTAS_FNIDX__IBM_GET_CONFIG_ADDR_INFO,
+ RTAS_FNIDX__IBM_GET_CONFIG_ADDR_INFO2,
+ RTAS_FNIDX__IBM_GET_DYNAMIC_SENSOR_STATE,
+ RTAS_FNIDX__IBM_GET_INDICES,
+ RTAS_FNIDX__IBM_GET_RIO_TOPOLOGY,
+ RTAS_FNIDX__IBM_GET_SYSTEM_PARAMETER,
+ RTAS_FNIDX__IBM_GET_VPD,
+ RTAS_FNIDX__IBM_GET_XIVE,
+ RTAS_FNIDX__IBM_INT_OFF,
+ RTAS_FNIDX__IBM_INT_ON,
+ RTAS_FNIDX__IBM_IO_QUIESCE_ACK,
+ RTAS_FNIDX__IBM_LPAR_PERFTOOLS,
+ RTAS_FNIDX__IBM_MANAGE_FLASH_IMAGE,
+ RTAS_FNIDX__IBM_MANAGE_STORAGE_PRESERVATION,
+ RTAS_FNIDX__IBM_NMI_INTERLOCK,
+ RTAS_FNIDX__IBM_NMI_REGISTER,
+ RTAS_FNIDX__IBM_OPEN_ERRINJCT,
+ RTAS_FNIDX__IBM_OPEN_SRIOV_ALLOW_UNFREEZE,
+ RTAS_FNIDX__IBM_OPEN_SRIOV_MAP_PE_NUMBER,
+ RTAS_FNIDX__IBM_OS_TERM,
+ RTAS_FNIDX__IBM_PARTNER_CONTROL,
+ RTAS_FNIDX__IBM_PHYSICAL_ATTESTATION,
+ RTAS_FNIDX__IBM_PLATFORM_DUMP,
+ RTAS_FNIDX__IBM_POWER_OFF_UPS,
+ RTAS_FNIDX__IBM_QUERY_INTERRUPT_SOURCE_NUMBER,
+ RTAS_FNIDX__IBM_QUERY_PE_DMA_WINDOW,
+ RTAS_FNIDX__IBM_READ_PCI_CONFIG,
+ RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE,
+ RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE2,
+ RTAS_FNIDX__IBM_RECEIVE_HVPIPE_MSG,
+ RTAS_FNIDX__IBM_REMOVE_PE_DMA_WINDOW,
+ RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOW,
+ RTAS_FNIDX__IBM_SCAN_LOG_DUMP,
+ RTAS_FNIDX__IBM_SEND_HVPIPE_MSG,
+ RTAS_FNIDX__IBM_SET_DYNAMIC_INDICATOR,
+ RTAS_FNIDX__IBM_SET_EEH_OPTION,
+ RTAS_FNIDX__IBM_SET_SLOT_RESET,
+ RTAS_FNIDX__IBM_SET_SYSTEM_PARAMETER,
+ RTAS_FNIDX__IBM_SET_XIVE,
+ RTAS_FNIDX__IBM_SLOT_ERROR_DETAIL,
+ RTAS_FNIDX__IBM_SUSPEND_ME,
+ RTAS_FNIDX__IBM_TUNE_DMA_PARMS,
+ RTAS_FNIDX__IBM_UPDATE_FLASH_64_AND_REBOOT,
+ RTAS_FNIDX__IBM_UPDATE_NODES,
+ RTAS_FNIDX__IBM_UPDATE_PROPERTIES,
+ RTAS_FNIDX__IBM_VALIDATE_FLASH_IMAGE,
+ RTAS_FNIDX__IBM_WRITE_PCI_CONFIG,
+ RTAS_FNIDX__NVRAM_FETCH,
+ RTAS_FNIDX__NVRAM_STORE,
+ RTAS_FNIDX__POWER_OFF,
+ RTAS_FNIDX__PUT_TERM_CHAR,
+ RTAS_FNIDX__QUERY_CPU_STOPPED_STATE,
+ RTAS_FNIDX__READ_PCI_CONFIG,
+ RTAS_FNIDX__RTAS_LAST_ERROR,
+ RTAS_FNIDX__SET_INDICATOR,
+ RTAS_FNIDX__SET_POWER_LEVEL,
+ RTAS_FNIDX__SET_TIME_FOR_POWER_ON,
+ RTAS_FNIDX__SET_TIME_OF_DAY,
+ RTAS_FNIDX__START_CPU,
+ RTAS_FNIDX__STOP_SELF,
+ RTAS_FNIDX__SYSTEM_REBOOT,
+ RTAS_FNIDX__THAW_TIME_BASE,
+ RTAS_FNIDX__WRITE_PCI_CONFIG,
+};
+
+/*
+ * Opaque handle for client code to refer to RTAS functions. All valid
+ * function handles are build-time constants prefixed with RTAS_FN_.
+ */
+typedef struct {
+ const enum rtas_function_index index;
+} rtas_fn_handle_t;
+
+
+#define rtas_fn_handle(x_) ((const rtas_fn_handle_t) { .index = x_, })
+
+#define RTAS_FN_CHECK_EXCEPTION rtas_fn_handle(RTAS_FNIDX__CHECK_EXCEPTION)
+#define RTAS_FN_DISPLAY_CHARACTER rtas_fn_handle(RTAS_FNIDX__DISPLAY_CHARACTER)
+#define RTAS_FN_EVENT_SCAN rtas_fn_handle(RTAS_FNIDX__EVENT_SCAN)
+#define RTAS_FN_FREEZE_TIME_BASE rtas_fn_handle(RTAS_FNIDX__FREEZE_TIME_BASE)
+#define RTAS_FN_GET_POWER_LEVEL rtas_fn_handle(RTAS_FNIDX__GET_POWER_LEVEL)
+#define RTAS_FN_GET_SENSOR_STATE rtas_fn_handle(RTAS_FNIDX__GET_SENSOR_STATE)
+#define RTAS_FN_GET_TERM_CHAR rtas_fn_handle(RTAS_FNIDX__GET_TERM_CHAR)
+#define RTAS_FN_GET_TIME_OF_DAY rtas_fn_handle(RTAS_FNIDX__GET_TIME_OF_DAY)
+#define RTAS_FN_IBM_ACTIVATE_FIRMWARE rtas_fn_handle(RTAS_FNIDX__IBM_ACTIVATE_FIRMWARE)
+#define RTAS_FN_IBM_CBE_START_PTCAL rtas_fn_handle(RTAS_FNIDX__IBM_CBE_START_PTCAL)
+#define RTAS_FN_IBM_CBE_STOP_PTCAL rtas_fn_handle(RTAS_FNIDX__IBM_CBE_STOP_PTCAL)
+#define RTAS_FN_IBM_CHANGE_MSI rtas_fn_handle(RTAS_FNIDX__IBM_CHANGE_MSI)
+#define RTAS_FN_IBM_CLOSE_ERRINJCT rtas_fn_handle(RTAS_FNIDX__IBM_CLOSE_ERRINJCT)
+#define RTAS_FN_IBM_CONFIGURE_BRIDGE rtas_fn_handle(RTAS_FNIDX__IBM_CONFIGURE_BRIDGE)
+#define RTAS_FN_IBM_CONFIGURE_CONNECTOR rtas_fn_handle(RTAS_FNIDX__IBM_CONFIGURE_CONNECTOR)
+#define RTAS_FN_IBM_CONFIGURE_KERNEL_DUMP rtas_fn_handle(RTAS_FNIDX__IBM_CONFIGURE_KERNEL_DUMP)
+#define RTAS_FN_IBM_CONFIGURE_PE rtas_fn_handle(RTAS_FNIDX__IBM_CONFIGURE_PE)
+#define RTAS_FN_IBM_CREATE_PE_DMA_WINDOW rtas_fn_handle(RTAS_FNIDX__IBM_CREATE_PE_DMA_WINDOW)
+#define RTAS_FN_IBM_DISPLAY_MESSAGE rtas_fn_handle(RTAS_FNIDX__IBM_DISPLAY_MESSAGE)
+#define RTAS_FN_IBM_ERRINJCT rtas_fn_handle(RTAS_FNIDX__IBM_ERRINJCT)
+#define RTAS_FN_IBM_EXTI2C rtas_fn_handle(RTAS_FNIDX__IBM_EXTI2C)
+#define RTAS_FN_IBM_GET_CONFIG_ADDR_INFO rtas_fn_handle(RTAS_FNIDX__IBM_GET_CONFIG_ADDR_INFO)
+#define RTAS_FN_IBM_GET_CONFIG_ADDR_INFO2 rtas_fn_handle(RTAS_FNIDX__IBM_GET_CONFIG_ADDR_INFO2)
+#define RTAS_FN_IBM_GET_DYNAMIC_SENSOR_STATE rtas_fn_handle(RTAS_FNIDX__IBM_GET_DYNAMIC_SENSOR_STATE)
+#define RTAS_FN_IBM_GET_INDICES rtas_fn_handle(RTAS_FNIDX__IBM_GET_INDICES)
+#define RTAS_FN_IBM_GET_RIO_TOPOLOGY rtas_fn_handle(RTAS_FNIDX__IBM_GET_RIO_TOPOLOGY)
+#define RTAS_FN_IBM_GET_SYSTEM_PARAMETER rtas_fn_handle(RTAS_FNIDX__IBM_GET_SYSTEM_PARAMETER)
+#define RTAS_FN_IBM_GET_VPD rtas_fn_handle(RTAS_FNIDX__IBM_GET_VPD)
+#define RTAS_FN_IBM_GET_XIVE rtas_fn_handle(RTAS_FNIDX__IBM_GET_XIVE)
+#define RTAS_FN_IBM_INT_OFF rtas_fn_handle(RTAS_FNIDX__IBM_INT_OFF)
+#define RTAS_FN_IBM_INT_ON rtas_fn_handle(RTAS_FNIDX__IBM_INT_ON)
+#define RTAS_FN_IBM_IO_QUIESCE_ACK rtas_fn_handle(RTAS_FNIDX__IBM_IO_QUIESCE_ACK)
+#define RTAS_FN_IBM_LPAR_PERFTOOLS rtas_fn_handle(RTAS_FNIDX__IBM_LPAR_PERFTOOLS)
+#define RTAS_FN_IBM_MANAGE_FLASH_IMAGE rtas_fn_handle(RTAS_FNIDX__IBM_MANAGE_FLASH_IMAGE)
+#define RTAS_FN_IBM_MANAGE_STORAGE_PRESERVATION rtas_fn_handle(RTAS_FNIDX__IBM_MANAGE_STORAGE_PRESERVATION)
+#define RTAS_FN_IBM_NMI_INTERLOCK rtas_fn_handle(RTAS_FNIDX__IBM_NMI_INTERLOCK)
+#define RTAS_FN_IBM_NMI_REGISTER rtas_fn_handle(RTAS_FNIDX__IBM_NMI_REGISTER)
+#define RTAS_FN_IBM_OPEN_ERRINJCT rtas_fn_handle(RTAS_FNIDX__IBM_OPEN_ERRINJCT)
+#define RTAS_FN_IBM_OPEN_SRIOV_ALLOW_UNFREEZE rtas_fn_handle(RTAS_FNIDX__IBM_OPEN_SRIOV_ALLOW_UNFREEZE)
+#define RTAS_FN_IBM_OPEN_SRIOV_MAP_PE_NUMBER rtas_fn_handle(RTAS_FNIDX__IBM_OPEN_SRIOV_MAP_PE_NUMBER)
+#define RTAS_FN_IBM_OS_TERM rtas_fn_handle(RTAS_FNIDX__IBM_OS_TERM)
+#define RTAS_FN_IBM_PARTNER_CONTROL rtas_fn_handle(RTAS_FNIDX__IBM_PARTNER_CONTROL)
+#define RTAS_FN_IBM_PHYSICAL_ATTESTATION rtas_fn_handle(RTAS_FNIDX__IBM_PHYSICAL_ATTESTATION)
+#define RTAS_FN_IBM_PLATFORM_DUMP rtas_fn_handle(RTAS_FNIDX__IBM_PLATFORM_DUMP)
+#define RTAS_FN_IBM_POWER_OFF_UPS rtas_fn_handle(RTAS_FNIDX__IBM_POWER_OFF_UPS)
+#define RTAS_FN_IBM_QUERY_INTERRUPT_SOURCE_NUMBER rtas_fn_handle(RTAS_FNIDX__IBM_QUERY_INTERRUPT_SOURCE_NUMBER)
+#define RTAS_FN_IBM_QUERY_PE_DMA_WINDOW rtas_fn_handle(RTAS_FNIDX__IBM_QUERY_PE_DMA_WINDOW)
+#define RTAS_FN_IBM_READ_PCI_CONFIG rtas_fn_handle(RTAS_FNIDX__IBM_READ_PCI_CONFIG)
+#define RTAS_FN_IBM_READ_SLOT_RESET_STATE rtas_fn_handle(RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE)
+#define RTAS_FN_IBM_READ_SLOT_RESET_STATE2 rtas_fn_handle(RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE2)
+#define RTAS_FN_IBM_RECEIVE_HVPIPE_MSG rtas_fn_handle(RTAS_FNIDX__IBM_RECEIVE_HVPIPE_MSG)
+#define RTAS_FN_IBM_REMOVE_PE_DMA_WINDOW rtas_fn_handle(RTAS_FNIDX__IBM_REMOVE_PE_DMA_WINDOW)
+#define RTAS_FN_IBM_RESET_PE_DMA_WINDOW rtas_fn_handle(RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOW)
+#define RTAS_FN_IBM_SCAN_LOG_DUMP rtas_fn_handle(RTAS_FNIDX__IBM_SCAN_LOG_DUMP)
+#define RTAS_FN_IBM_SEND_HVPIPE_MSG rtas_fn_handle(RTAS_FNIDX__IBM_SEND_HVPIPE_MSG)
+#define RTAS_FN_IBM_SET_DYNAMIC_INDICATOR rtas_fn_handle(RTAS_FNIDX__IBM_SET_DYNAMIC_INDICATOR)
+#define RTAS_FN_IBM_SET_EEH_OPTION rtas_fn_handle(RTAS_FNIDX__IBM_SET_EEH_OPTION)
+#define RTAS_FN_IBM_SET_SLOT_RESET rtas_fn_handle(RTAS_FNIDX__IBM_SET_SLOT_RESET)
+#define RTAS_FN_IBM_SET_SYSTEM_PARAMETER rtas_fn_handle(RTAS_FNIDX__IBM_SET_SYSTEM_PARAMETER)
+#define RTAS_FN_IBM_SET_XIVE rtas_fn_handle(RTAS_FNIDX__IBM_SET_XIVE)
+#define RTAS_FN_IBM_SLOT_ERROR_DETAIL rtas_fn_handle(RTAS_FNIDX__IBM_SLOT_ERROR_DETAIL)
+#define RTAS_FN_IBM_SUSPEND_ME rtas_fn_handle(RTAS_FNIDX__IBM_SUSPEND_ME)
+#define RTAS_FN_IBM_TUNE_DMA_PARMS rtas_fn_handle(RTAS_FNIDX__IBM_TUNE_DMA_PARMS)
+#define RTAS_FN_IBM_UPDATE_FLASH_64_AND_REBOOT rtas_fn_handle(RTAS_FNIDX__IBM_UPDATE_FLASH_64_AND_REBOOT)
+#define RTAS_FN_IBM_UPDATE_NODES rtas_fn_handle(RTAS_FNIDX__IBM_UPDATE_NODES)
+#define RTAS_FN_IBM_UPDATE_PROPERTIES rtas_fn_handle(RTAS_FNIDX__IBM_UPDATE_PROPERTIES)
+#define RTAS_FN_IBM_VALIDATE_FLASH_IMAGE rtas_fn_handle(RTAS_FNIDX__IBM_VALIDATE_FLASH_IMAGE)
+#define RTAS_FN_IBM_WRITE_PCI_CONFIG rtas_fn_handle(RTAS_FNIDX__IBM_WRITE_PCI_CONFIG)
+#define RTAS_FN_NVRAM_FETCH rtas_fn_handle(RTAS_FNIDX__NVRAM_FETCH)
+#define RTAS_FN_NVRAM_STORE rtas_fn_handle(RTAS_FNIDX__NVRAM_STORE)
+#define RTAS_FN_POWER_OFF rtas_fn_handle(RTAS_FNIDX__POWER_OFF)
+#define RTAS_FN_PUT_TERM_CHAR rtas_fn_handle(RTAS_FNIDX__PUT_TERM_CHAR)
+#define RTAS_FN_QUERY_CPU_STOPPED_STATE rtas_fn_handle(RTAS_FNIDX__QUERY_CPU_STOPPED_STATE)
+#define RTAS_FN_READ_PCI_CONFIG rtas_fn_handle(RTAS_FNIDX__READ_PCI_CONFIG)
+#define RTAS_FN_RTAS_LAST_ERROR rtas_fn_handle(RTAS_FNIDX__RTAS_LAST_ERROR)
+#define RTAS_FN_SET_INDICATOR rtas_fn_handle(RTAS_FNIDX__SET_INDICATOR)
+#define RTAS_FN_SET_POWER_LEVEL rtas_fn_handle(RTAS_FNIDX__SET_POWER_LEVEL)
+#define RTAS_FN_SET_TIME_FOR_POWER_ON rtas_fn_handle(RTAS_FNIDX__SET_TIME_FOR_POWER_ON)
+#define RTAS_FN_SET_TIME_OF_DAY rtas_fn_handle(RTAS_FNIDX__SET_TIME_OF_DAY)
+#define RTAS_FN_START_CPU rtas_fn_handle(RTAS_FNIDX__START_CPU)
+#define RTAS_FN_STOP_SELF rtas_fn_handle(RTAS_FNIDX__STOP_SELF)
+#define RTAS_FN_SYSTEM_REBOOT rtas_fn_handle(RTAS_FNIDX__SYSTEM_REBOOT)
+#define RTAS_FN_THAW_TIME_BASE rtas_fn_handle(RTAS_FNIDX__THAW_TIME_BASE)
+#define RTAS_FN_WRITE_PCI_CONFIG rtas_fn_handle(RTAS_FNIDX__WRITE_PCI_CONFIG)
+
#define RTAS_UNKNOWN_SERVICE (-1)
#define RTAS_INSTANTIATE_MAX (1ULL<<30) /* Don't instantiate rtas at/above this value */
-/* Buffer size for ppc_rtas system call. */
-#define RTAS_RMOBUF_MAX (64 * 1024)
-
-/* RTAS return status codes */
-#define RTAS_NOT_SUSPENDABLE -9004
-#define RTAS_BUSY -2 /* RTAS Busy */
-#define RTAS_EXTENDED_DELAY_MIN 9900
-#define RTAS_EXTENDED_DELAY_MAX 9905
+/* Memory set aside for sys_rtas to use with calls that need a work area. */
+#define RTAS_USER_REGION_SIZE (64 * 1024)
/*
- * In general to call RTAS use rtas_token("string") to lookup
- * an RTAS token for the given string (e.g. "event-scan").
- * To actually perform the call use
- * ret = rtas_call(token, n_in, n_out, ...)
- * Where n_in is the number of input parameters and
- * n_out is the number of output parameters
- *
- * If the "string" is invalid on this system, RTAS_UNKNOWN_SERVICE
- * will be returned as a token. rtas_call() does look for this
- * token and error out gracefully so rtas_call(rtas_token("str"), ...)
- * may be safely used for one-shot calls to RTAS.
+ * Common RTAS function return values, derived from the table "RTAS
+ * Status Word Values" in PAPR+ v2.13 7.2.8: "Return Codes". If a
+ * function can return a value in this table then generally it has the
+ * meaning listed here. More extended commentary in the documentation
+ * for rtas_call().
*
+ * RTAS functions may use negative and positive numbers not in this
+ * set for function-specific error and success conditions,
+ * respectively.
*/
-
-typedef __be32 rtas_arg_t;
-
-struct rtas_args {
- __be32 token;
- __be32 nargs;
- __be32 nret;
- rtas_arg_t args[16];
- rtas_arg_t *rets; /* Pointer to return values in args[]. */
-};
-
-struct rtas_t {
- unsigned long entry; /* physical address pointer */
- unsigned long base; /* physical address pointer */
- unsigned long size;
- arch_spinlock_t lock;
- struct rtas_args args;
- struct device_node *dev; /* virtual address pointer */
-};
-
-struct rtas_suspend_me_data {
- atomic_t working; /* number of cpus accessing this struct */
- atomic_t done;
- int token; /* ibm,suspend-me */
- atomic_t error;
- struct completion *complete; /* wait on this until working == 0 */
-};
+#define RTAS_SUCCESS 0 /* Success. */
+#define RTAS_HARDWARE_ERROR -1 /* Hardware or other unspecified error. */
+#define RTAS_BUSY -2 /* Retry immediately. */
+#define RTAS_INVALID_PARAMETER -3 /* Invalid indicator/domain/sensor etc. */
+#define RTAS_FUNC_NOT_SUPPORTED -5 /* Function not supported */
+#define RTAS_UNEXPECTED_STATE_CHANGE -7 /* Seems limited to EEH and slot reset. */
+#define RTAS_EXTENDED_DELAY_MIN 9900 /* Retry after delaying for ~1ms. */
+#define RTAS_EXTENDED_DELAY_MAX 9905 /* Retry after delaying for ~100s. */
+#define RTAS_ML_ISOLATION_ERROR -9000 /* Multi-level isolation error. */
+
+/* statuses specific to ibm,suspend-me */
+#define RTAS_SUSPEND_ABORTED 9000 /* Suspension aborted */
+#define RTAS_NOT_SUSPENDABLE -9004 /* Partition not suspendable */
+#define RTAS_THREADS_ACTIVE -9005 /* Multiple processor threads active */
+#define RTAS_OUTSTANDING_COPROC -9006 /* Outstanding coprocessor operations */
/* RTAS event classes */
#define RTAS_INTERNAL_ERROR 0x80000000 /* set bit 0 */
#define RTAS_EPOW_WARNING 0x40000000 /* set bit 1 */
#define RTAS_HOTPLUG_EVENTS 0x10000000 /* set bit 3 */
#define RTAS_IO_EVENTS 0x08000000 /* set bit 4 */
+#define RTAS_HVPIPE_MSG_EVENTS 0x04000000 /* set bit 5 */
#define RTAS_EVENT_SCAN_ALL_EVENTS 0xffffffff
/* RTAS event severity */
@@ -124,7 +287,9 @@ struct rtas_suspend_me_data {
#define RTAS_TYPE_INFO 0xE2
#define RTAS_TYPE_DEALLOC 0xE3
#define RTAS_TYPE_DUMP 0xE4
-/* I don't add PowerMGM events right now, this is a different topic */
+#define RTAS_TYPE_HOTPLUG 0xE5
+#define RTAS_TYPE_HVPIPE 0xE6
+/* I don't add PowerMGM events right now, this is a different topic */
#define RTAS_TYPE_PMGM_POWER_SW_ON 0x60
#define RTAS_TYPE_PMGM_POWER_SW_OFF 0x61
#define RTAS_TYPE_PMGM_LID_OPEN 0x62
@@ -149,31 +314,6 @@ struct rtas_suspend_me_data {
/* RTAS check-exception vector offset */
#define RTAS_VECTOR_EXTERNAL_INTERRUPT 0x500
-struct rtas_error_log {
- /* Byte 0 */
- uint8_t byte0; /* Architectural version */
-
- /* Byte 1 */
- uint8_t byte1;
- /* XXXXXXXX
- * XXX 3: Severity level of error
- * XX 2: Degree of recovery
- * X 1: Extended log present?
- * XX 2: Reserved
- */
-
- /* Byte 2 */
- uint8_t byte2;
- /* XXXXXXXX
- * XXXX 4: Initiator of event
- * XXXX 4: Target of failed operation
- */
- uint8_t byte3; /* General event or error*/
- __be32 extended_log_length; /* length in bytes */
- unsigned char buffer[1]; /* Start of extended log */
- /* Variable length. */
-};
-
static inline uint8_t rtas_error_severity(const struct rtas_error_log *elog)
{
return (elog->byte1 & 0xE0) >> 5;
@@ -184,11 +324,23 @@ static inline uint8_t rtas_error_disposition(const struct rtas_error_log *elog)
return (elog->byte1 & 0x18) >> 3;
}
+static inline
+void rtas_set_disposition_recovered(struct rtas_error_log *elog)
+{
+ elog->byte1 &= ~0x18;
+ elog->byte1 |= (RTAS_DISP_FULLY_RECOVERED << 3);
+}
+
static inline uint8_t rtas_error_extended(const struct rtas_error_log *elog)
{
return (elog->byte1 & 0x04) >> 2;
}
+static inline uint8_t rtas_error_initiator(const struct rtas_error_log *elog)
+{
+ return (elog->byte2 & 0xf0) >> 4;
+}
+
#define rtas_error_type(x) ((x)->byte3)
static inline
@@ -201,47 +353,6 @@ uint32_t rtas_error_extended_log_length(const struct rtas_error_log *elog)
#define RTAS_V6EXT_COMPANY_ID_IBM (('I' << 24) | ('B' << 16) | ('M' << 8))
-/* RTAS general extended event log, Version 6. The extended log starts
- * from "buffer" field of struct rtas_error_log defined above.
- */
-struct rtas_ext_event_log_v6 {
- /* Byte 0 */
- uint8_t byte0;
- /* XXXXXXXX
- * X 1: Log valid
- * X 1: Unrecoverable error
- * X 1: Recoverable (correctable or successfully retried)
- * X 1: Bypassed unrecoverable error (degraded operation)
- * X 1: Predictive error
- * X 1: "New" log (always 1 for data returned from RTAS)
- * X 1: Big Endian
- * X 1: Reserved
- */
-
- /* Byte 1 */
- uint8_t byte1; /* reserved */
-
- /* Byte 2 */
- uint8_t byte2;
- /* XXXXXXXX
- * X 1: Set to 1 (indicating log is in PowerPC format)
- * XXX 3: Reserved
- * XXXX 4: Log format used for bytes 12-2047
- */
-
- /* Byte 3 */
- uint8_t byte3; /* reserved */
- /* Byte 4-11 */
- uint8_t reserved[8]; /* reserved */
- /* Byte 12-15 */
- __be32 company_id; /* Company ID of the company */
- /* that defines the format for */
- /* the vendor specific log type */
- /* Byte 16-end of log */
- uint8_t vendor_log[1]; /* Start of vendor specific log */
- /* Variable length. */
-};
-
static
inline uint8_t rtas_ext_event_log_format(struct rtas_ext_event_log_v6 *ext_log)
{
@@ -270,19 +381,12 @@ inline uint32_t rtas_ext_event_company_id(struct rtas_ext_event_log_v6 *ext_log)
#define PSERIES_ELOG_SECT_ID_HMC_ID (('H' << 8) | 'M')
#define PSERIES_ELOG_SECT_ID_EPOW (('E' << 8) | 'P')
#define PSERIES_ELOG_SECT_ID_IO_EVENT (('I' << 8) | 'E')
+#define PSERIES_ELOG_SECT_ID_HVPIPE_EVENT (('P' << 8) | 'E')
#define PSERIES_ELOG_SECT_ID_MANUFACT_INFO (('M' << 8) | 'I')
#define PSERIES_ELOG_SECT_ID_CALL_HOME (('C' << 8) | 'H')
#define PSERIES_ELOG_SECT_ID_USER_DEF (('U' << 8) | 'D')
-
-/* Vendor specific Platform Event Log Format, Version 6, section header */
-struct pseries_errorlog {
- __be16 id; /* 0x00 2-byte ASCII section ID */
- __be16 length; /* 0x02 Section length in bytes */
- uint8_t version; /* 0x04 Section version */
- uint8_t subtype; /* 0x05 Section subtype */
- __be16 creator_component; /* 0x06 Creator component ID */
- uint8_t data[]; /* 0x08 Start of section data */
-};
+#define PSERIES_ELOG_SECT_ID_HOTPLUG (('H' << 8) | 'P')
+#define PSERIES_ELOG_SECT_ID_MCE (('M' << 8) | 'C')
static
inline uint16_t pseries_errorlog_id(struct pseries_errorlog *sect)
@@ -296,6 +400,21 @@ inline uint16_t pseries_errorlog_length(struct pseries_errorlog *sect)
return be16_to_cpu(sect->length);
}
+#define PSERIES_HP_ELOG_RESOURCE_CPU 1
+#define PSERIES_HP_ELOG_RESOURCE_MEM 2
+#define PSERIES_HP_ELOG_RESOURCE_SLOT 3
+#define PSERIES_HP_ELOG_RESOURCE_PHB 4
+#define PSERIES_HP_ELOG_RESOURCE_PMEM 6
+#define PSERIES_HP_ELOG_RESOURCE_DT 7
+
+#define PSERIES_HP_ELOG_ACTION_ADD 1
+#define PSERIES_HP_ELOG_ACTION_REMOVE 2
+
+#define PSERIES_HP_ELOG_ID_DRC_NAME 1
+#define PSERIES_HP_ELOG_ID_DRC_INDEX 2
+#define PSERIES_HP_ELOG_ID_DRC_COUNT 3
+#define PSERIES_HP_ELOG_ID_DRC_IC 4
+
struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log,
uint16_t section_id);
@@ -307,55 +426,64 @@ extern void (*rtas_flash_term_hook)(int);
extern struct rtas_t rtas;
-extern void enter_rtas(unsigned long);
-extern int rtas_token(const char *service);
-extern int rtas_service_present(const char *service);
-extern int rtas_call(int token, int, int, int *, ...);
-extern void rtas_restart(char *cmd);
-extern void rtas_power_off(void);
-extern void rtas_halt(void);
-extern void rtas_os_term(char *str);
-extern int rtas_get_sensor(int sensor, int index, int *state);
-extern int rtas_get_power_level(int powerdomain, int *level);
-extern int rtas_set_power_level(int powerdomain, int level, int *setlevel);
-extern bool rtas_indicator_present(int token, int *maxindex);
-extern int rtas_set_indicator(int indicator, int index, int new_value);
-extern int rtas_set_indicator_fast(int indicator, int index, int new_value);
-extern void rtas_progress(char *s, unsigned short hex);
-extern void rtas_initialize(void);
-extern int rtas_suspend_cpu(struct rtas_suspend_me_data *data);
-extern int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data);
-extern int rtas_online_cpus_mask(cpumask_var_t cpus);
-extern int rtas_offline_cpus_mask(cpumask_var_t cpus);
-extern int rtas_ibm_suspend_me(struct rtas_args *);
+s32 rtas_function_token(const rtas_fn_handle_t handle);
+static inline bool rtas_function_implemented(const rtas_fn_handle_t handle)
+{
+ return rtas_function_token(handle) != RTAS_UNKNOWN_SERVICE;
+}
+int rtas_token(const char *service);
+int rtas_call(int token, int nargs, int nret, int *outputs, ...);
+void rtas_call_unlocked(struct rtas_args *args, int token, int nargs,
+ int nret, ...);
+void __noreturn rtas_restart(char *cmd);
+void rtas_power_off(void);
+void __noreturn rtas_halt(void);
+void rtas_os_term(char *str);
+void rtas_activate_firmware(void);
+int rtas_get_sensor(int sensor, int index, int *state);
+int rtas_get_sensor_fast(int sensor, int index, int *state);
+int rtas_get_power_level(int powerdomain, int *level);
+int rtas_set_power_level(int powerdomain, int level, int *setlevel);
+bool rtas_indicator_present(int token, int *maxindex);
+int rtas_set_indicator(int indicator, int index, int new_value);
+int rtas_set_indicator_fast(int indicator, int index, int new_value);
+void rtas_progress(char *s, unsigned short hex);
+int rtas_ibm_suspend_me(int *fw_status);
+int rtas_error_rc(int rtas_rc);
struct rtc_time;
-extern unsigned long rtas_get_boot_time(void);
-extern void rtas_get_rtc_time(struct rtc_time *rtc_time);
-extern int rtas_set_rtc_time(struct rtc_time *rtc_time);
+time64_t rtas_get_boot_time(void);
+void rtas_get_rtc_time(struct rtc_time *rtc_time);
+int rtas_set_rtc_time(struct rtc_time *rtc_time);
-extern unsigned int rtas_busy_delay_time(int status);
-extern unsigned int rtas_busy_delay(int status);
+unsigned int rtas_busy_delay_time(int status);
+bool rtas_busy_delay(int status);
-extern int early_init_dt_scan_rtas(unsigned long node,
- const char *uname, int depth, void *data);
+int early_init_dt_scan_rtas(unsigned long node, const char *uname, int depth, void *data);
-extern void pSeries_log_error(char *buf, unsigned int err_type, int fatal);
+void pSeries_log_error(char *buf, unsigned int err_type, int fatal);
#ifdef CONFIG_PPC_PSERIES
-extern int pseries_devicetree_update(s32 scope);
-extern void post_mobility_fixup(void);
+extern time64_t last_rtas_event;
+int clobbering_unread_rtas_event(void);
+int rtas_syscall_dispatch_ibm_suspend_me(u64 handle);
+#else
+static inline int clobbering_unread_rtas_event(void) { return 0; }
+static inline int rtas_syscall_dispatch_ibm_suspend_me(u64 handle)
+{
+ return -EINVAL;
+}
#endif
#ifdef CONFIG_PPC_RTAS_DAEMON
-extern void rtas_cancel_event_scan(void);
+void rtas_cancel_event_scan(void);
#else
static inline void rtas_cancel_event_scan(void) { }
#endif
/* Error types logged. */
#define ERR_FLAG_ALREADY_LOGGED 0x0
-#define ERR_FLAG_BOOT 0x1 /* log was pulled from NVRAM on boot */
+#define ERR_FLAG_BOOT 0x1 /* log was pulled from NVRAM on boot */
#define ERR_TYPE_RTAS_LOG 0x2 /* from rtas event-scan */
#define ERR_TYPE_KERNEL_PANIC 0x4 /* from die()/panic() */
#define ERR_TYPE_KERNEL_PANIC_GZ 0x8 /* ditto, compressed */
@@ -365,7 +493,7 @@ static inline void rtas_cancel_event_scan(void) { }
(ERR_TYPE_RTAS_LOG | ERR_TYPE_KERNEL_PANIC | ERR_TYPE_KERNEL_PANIC_GZ)
#define RTAS_DEBUG KERN_DEBUG "RTAS: "
-
+
#define RTAS_ERROR_LOG_MAX 2048
/*
@@ -373,7 +501,7 @@ static inline void rtas_cancel_event_scan(void) { }
* for all rtas calls that require an error buffer argument.
* This includes 'check-exception' and 'rtas-last-error'.
*/
-extern int rtas_get_error_log_max(void);
+int rtas_get_error_log_max(void);
/* Event Scan Parameters */
#define EVENT_SCAN_ALL_EVENTS 0xf0000000
@@ -394,6 +522,13 @@ extern char rtas_data_buf[RTAS_DATA_BUF_SIZE];
/* RMO buffer reserved for user-space RTAS use */
extern unsigned long rtas_rmo_buf;
+extern struct mutex rtas_ibm_get_vpd_lock;
+extern struct mutex rtas_ibm_get_indices_lock;
+extern struct mutex rtas_ibm_set_dynamic_indicator_lock;
+extern struct mutex rtas_ibm_get_dynamic_sensor_state_lock;
+extern struct mutex rtas_ibm_physical_attestation_lock;
+extern struct mutex rtas_ibm_send_hvpipe_msg_lock;
+
#define GLOBAL_INTERRUPT_QUEUE 9005
/**
@@ -412,27 +547,33 @@ static inline u32 rtas_config_addr(int busno, int devfn, int reg)
(devfn << 8) | (reg & 0xff);
}
-extern void rtas_give_timebase(void);
-extern void rtas_take_timebase(void);
+void rtas_give_timebase(void);
+void rtas_take_timebase(void);
#ifdef CONFIG_PPC_RTAS
static inline int page_is_rtas_user_buf(unsigned long pfn)
{
unsigned long paddr = (pfn << PAGE_SHIFT);
- if (paddr >= rtas_rmo_buf && paddr < (rtas_rmo_buf + RTAS_RMOBUF_MAX))
+ if (paddr >= rtas_rmo_buf && paddr < (rtas_rmo_buf + RTAS_USER_REGION_SIZE))
return 1;
return 0;
}
/* Not the best place to put pSeries_coalesce_init, will be fixed when we
* move some of the rtas suspend-me stuff to pseries */
-extern void pSeries_coalesce_init(void);
+void pSeries_coalesce_init(void);
+void rtas_initialize(void);
#else
static inline int page_is_rtas_user_buf(unsigned long pfn) { return 0;}
static inline void pSeries_coalesce_init(void) { }
+static inline void rtas_initialize(void) { }
#endif
-extern int call_rtas(const char *, int, int, unsigned long *, ...);
+#ifdef CONFIG_HV_PERF_CTRS
+void read_24x7_sys_info(void);
+#else
+static inline void read_24x7_sys_info(void) { }
+#endif
#endif /* __KERNEL__ */
#endif /* _POWERPC_RTAS_H */
diff --git a/arch/powerpc/include/asm/rtc.h b/arch/powerpc/include/asm/rtc.h
deleted file mode 100644
index f5802926b6c0..000000000000
--- a/arch/powerpc/include/asm/rtc.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Real-time clock definitions and interfaces
- *
- * Author: Tom Rini <trini@mvista.com>
- *
- * 2002 (c) MontaVista, Software, Inc. This file is licensed under
- * the terms of the GNU General Public License version 2. This program
- * is licensed "as is" without any warranty of any kind, whether express
- * or implied.
- *
- * Based on:
- * include/asm-m68k/rtc.h
- *
- * Copyright Richard Zidlicky
- * implementation details for genrtc/q40rtc driver
- *
- * And the old drivers/macintosh/rtc.c which was heavily based on:
- * Linux/SPARC Real Time Clock Driver
- * Copyright (C) 1996 Thomas K. Dyas (tdyas@eden.rutgers.edu)
- *
- * With additional work by Paul Mackerras and Franz Sirl.
- */
-
-#ifndef __ASM_POWERPC_RTC_H__
-#define __ASM_POWERPC_RTC_H__
-
-#ifdef __KERNEL__
-
-#include <linux/rtc.h>
-
-#include <asm/machdep.h>
-#include <asm/time.h>
-
-#define RTC_PIE 0x40 /* periodic interrupt enable */
-#define RTC_AIE 0x20 /* alarm interrupt enable */
-#define RTC_UIE 0x10 /* update-finished interrupt enable */
-
-/* some dummy definitions */
-#define RTC_BATT_BAD 0x100 /* battery bad */
-#define RTC_SQWE 0x08 /* enable square-wave output */
-#define RTC_DM_BINARY 0x04 /* all time/date values are BCD if clear */
-#define RTC_24H 0x02 /* 24 hour mode - else hours bit 7 means pm */
-#define RTC_DST_EN 0x01 /* auto switch DST - works f. USA only */
-
-static inline unsigned int get_rtc_time(struct rtc_time *time)
-{
- if (ppc_md.get_rtc_time)
- ppc_md.get_rtc_time(time);
- return RTC_24H;
-}
-
-/* Set the current date and time in the real time clock. */
-static inline int set_rtc_time(struct rtc_time *time)
-{
- if (ppc_md.set_rtc_time)
- return ppc_md.set_rtc_time(time);
- return -EINVAL;
-}
-
-static inline unsigned int get_rtc_ss(void)
-{
- struct rtc_time h;
-
- get_rtc_time(&h);
- return h.tm_sec;
-}
-
-static inline int get_rtc_pll(struct rtc_pll_info *pll)
-{
- return -EINVAL;
-}
-static inline int set_rtc_pll(struct rtc_pll_info *pll)
-{
- return -EINVAL;
-}
-
-#endif /* __KERNEL__ */
-#endif /* __ASM_POWERPC_RTC_H__ */
diff --git a/arch/powerpc/include/asm/runlatch.h b/arch/powerpc/include/asm/runlatch.h
index 54e9b963876e..ceb66d761fe1 100644
--- a/arch/powerpc/include/asm/runlatch.h
+++ b/arch/powerpc/include/asm/runlatch.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
*/
@@ -18,10 +19,9 @@ extern void __ppc64_runlatch_off(void);
do { \
if (cpu_has_feature(CPU_FTR_CTRL) && \
test_thread_local_flags(_TLF_RUNLATCH)) { \
- unsigned long msr = mfmsr(); \
__hard_irq_disable(); \
__ppc64_runlatch_off(); \
- if (msr & MSR_EE) \
+ if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) \
__hard_irq_enable(); \
} \
} while (0)
@@ -30,10 +30,9 @@ extern void __ppc64_runlatch_off(void);
do { \
if (cpu_has_feature(CPU_FTR_CTRL) && \
!test_thread_local_flags(_TLF_RUNLATCH)) { \
- unsigned long msr = mfmsr(); \
__hard_irq_disable(); \
__ppc64_runlatch_on(); \
- if (msr & MSR_EE) \
+ if (!(local_paca->irq_happened & PACA_IRQ_HARD_DIS)) \
__hard_irq_enable(); \
} \
} while (0)
diff --git a/arch/powerpc/include/asm/scom.h b/arch/powerpc/include/asm/scom.h
deleted file mode 100644
index f5cde45b1161..000000000000
--- a/arch/powerpc/include/asm/scom.h
+++ /dev/null
@@ -1,167 +0,0 @@
-/*
- * Copyright 2010 Benjamin Herrenschmidt, IBM Corp
- * <benh@kernel.crashing.org>
- * and David Gibson, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
- * the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#ifndef _ASM_POWERPC_SCOM_H
-#define _ASM_POWERPC_SCOM_H
-
-#ifdef __KERNEL__
-#ifndef __ASSEMBLY__
-#ifdef CONFIG_PPC_SCOM
-
-/*
- * The SCOM bus is a sideband bus used for accessing various internal
- * registers of the processor or the chipset. The implementation details
- * differ between processors and platforms, and the access method as
- * well.
- *
- * This API allows to "map" ranges of SCOM register numbers associated
- * with a given SCOM controller. The later must be represented by a
- * device node, though some implementations might support NULL if there
- * is no possible ambiguity
- *
- * Then, scom_read/scom_write can be used to accesses registers inside
- * that range. The argument passed is a register number relative to
- * the beginning of the range mapped.
- */
-
-typedef void *scom_map_t;
-
-/* Value for an invalid SCOM map */
-#define SCOM_MAP_INVALID (NULL)
-
-/* The scom_controller data structure is what the platform passes
- * to the core code in scom_init, it provides the actual implementation
- * of all the SCOM functions
- */
-struct scom_controller {
- scom_map_t (*map)(struct device_node *ctrl_dev, u64 reg, u64 count);
- void (*unmap)(scom_map_t map);
-
- int (*read)(scom_map_t map, u64 reg, u64 *value);
- int (*write)(scom_map_t map, u64 reg, u64 value);
-};
-
-extern const struct scom_controller *scom_controller;
-
-/**
- * scom_init - Initialize the SCOM backend, called by the platform
- * @controller: The platform SCOM controller
- */
-static inline void scom_init(const struct scom_controller *controller)
-{
- scom_controller = controller;
-}
-
-/**
- * scom_map_ok - Test is a SCOM mapping is successful
- * @map: The result of scom_map to test
- */
-static inline int scom_map_ok(scom_map_t map)
-{
- return map != SCOM_MAP_INVALID;
-}
-
-/**
- * scom_map - Map a block of SCOM registers
- * @ctrl_dev: Device node of the SCOM controller
- * some implementations allow NULL here
- * @reg: first SCOM register to map
- * @count: Number of SCOM registers to map
- */
-
-static inline scom_map_t scom_map(struct device_node *ctrl_dev,
- u64 reg, u64 count)
-{
- return scom_controller->map(ctrl_dev, reg, count);
-}
-
-/**
- * scom_find_parent - Find the SCOM controller for a device
- * @dev: OF node of the device
- *
- * This is not meant for general usage, but in combination with
- * scom_map() allows to map registers not represented by the
- * device own scom-reg property. Useful for applying HW workarounds
- * on things not properly represented in the device-tree for example.
- */
-struct device_node *scom_find_parent(struct device_node *dev);
-
-
-/**
- * scom_map_device - Map a device's block of SCOM registers
- * @dev: OF node of the device
- * @index: Register bank index (index in "scom-reg" property)
- *
- * This function will use the device-tree binding for SCOM which
- * is to follow "scom-parent" properties until it finds a node with
- * a "scom-controller" property to find the controller. It will then
- * use the "scom-reg" property which is made of reg/count pairs,
- * each of them having a size defined by the controller's #scom-cells
- * property
- */
-extern scom_map_t scom_map_device(struct device_node *dev, int index);
-
-
-/**
- * scom_unmap - Unmap a block of SCOM registers
- * @map: Result of scom_map is to be unmapped
- */
-static inline void scom_unmap(scom_map_t map)
-{
- if (scom_map_ok(map))
- scom_controller->unmap(map);
-}
-
-/**
- * scom_read - Read a SCOM register
- * @map: Result of scom_map
- * @reg: Register index within that map
- * @value: Updated with the value read
- *
- * Returns 0 (success) or a negative error code
- */
-static inline int scom_read(scom_map_t map, u64 reg, u64 *value)
-{
- int rc;
-
- rc = scom_controller->read(map, reg, value);
- if (rc)
- *value = 0xfffffffffffffffful;
- return rc;
-}
-
-/**
- * scom_write - Write to a SCOM register
- * @map: Result of scom_map
- * @reg: Register index within that map
- * @value: Value to write
- *
- * Returns 0 (success) or a negative error code
- */
-static inline int scom_write(scom_map_t map, u64 reg, u64 value)
-{
- return scom_controller->write(map, reg, value);
-}
-
-
-#endif /* CONFIG_PPC_SCOM */
-#endif /* __ASSEMBLY__ */
-#endif /* __KERNEL__ */
-#endif /* _ASM_POWERPC_SCOM_H */
diff --git a/arch/powerpc/include/asm/seccomp.h b/arch/powerpc/include/asm/seccomp.h
new file mode 100644
index 000000000000..ac2033f134f0
--- /dev/null
+++ b/arch/powerpc/include/asm/seccomp.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_SECCOMP_H
+#define _ASM_POWERPC_SECCOMP_H
+
+#include <linux/unistd.h>
+
+#define __NR_seccomp_sigreturn_32 __NR_sigreturn
+
+#include <asm-generic/seccomp.h>
+
+#ifdef __LITTLE_ENDIAN__
+#define __SECCOMP_ARCH_LE __AUDIT_ARCH_LE
+#define __SECCOMP_ARCH_LE_NAME "le"
+#else
+#define __SECCOMP_ARCH_LE 0
+#define __SECCOMP_ARCH_LE_NAME
+#endif
+
+#ifdef CONFIG_PPC64
+# define SECCOMP_ARCH_NATIVE (AUDIT_ARCH_PPC64 | __SECCOMP_ARCH_LE)
+# define SECCOMP_ARCH_NATIVE_NR NR_syscalls
+# define SECCOMP_ARCH_NATIVE_NAME "ppc64" __SECCOMP_ARCH_LE_NAME
+# ifdef CONFIG_COMPAT
+# define SECCOMP_ARCH_COMPAT (AUDIT_ARCH_PPC | __SECCOMP_ARCH_LE)
+# define SECCOMP_ARCH_COMPAT_NR NR_syscalls
+# define SECCOMP_ARCH_COMPAT_NAME "ppc" __SECCOMP_ARCH_LE_NAME
+# endif
+#else /* !CONFIG_PPC64 */
+# define SECCOMP_ARCH_NATIVE (AUDIT_ARCH_PPC | __SECCOMP_ARCH_LE)
+# define SECCOMP_ARCH_NATIVE_NR NR_syscalls
+# define SECCOMP_ARCH_NATIVE_NAME "ppc" __SECCOMP_ARCH_LE_NAME
+#endif
+
+#endif /* _ASM_POWERPC_SECCOMP_H */
diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h
index a5e930aca804..f43f3a6b0051 100644
--- a/arch/powerpc/include/asm/sections.h
+++ b/arch/powerpc/include/asm/sections.h
@@ -1,25 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_SECTIONS_H
#define _ASM_POWERPC_SECTIONS_H
#ifdef __KERNEL__
#include <linux/elf.h>
#include <linux/uaccess.h>
+
+#ifdef CONFIG_HAVE_FUNCTION_DESCRIPTORS
+typedef struct func_desc func_desc_t;
+#endif
+
#include <asm-generic/sections.h>
+extern char __head_end[];
+extern char __srwx_boundary[];
+extern char __exittext_begin[], __exittext_end[];
+
+/* Patch sites */
+extern s32 patch__call_flush_branch_caches1;
+extern s32 patch__call_flush_branch_caches2;
+extern s32 patch__call_flush_branch_caches3;
+extern s32 patch__flush_count_cache_return;
+extern s32 patch__flush_link_stack_return;
+extern s32 patch__call_kvm_flush_link_stack;
+extern s32 patch__call_kvm_flush_link_stack_p9;
+extern s32 patch__memset_nocache, patch__memcpy_nocache;
+
+extern long flush_branch_caches;
+extern long kvm_flush_link_stack;
+
#ifdef __powerpc64__
extern char __start_interrupts[];
extern char __end_interrupts[];
-extern char __prom_init_toc_start[];
-extern char __prom_init_toc_end[];
+#ifdef CONFIG_PPC_POWERNV
+extern char start_real_trampolines[];
+extern char end_real_trampolines[];
+extern char start_virt_trampolines[];
+extern char end_virt_trampolines[];
+#endif
-static inline int in_kernel_text(unsigned long addr)
+/*
+ * This assumes the kernel is never compiled -mcmodel=small or
+ * the total .toc is always less than 64k.
+ */
+static inline unsigned long kernel_toc_addr(void)
{
- if (addr >= (unsigned long)_stext && addr < (unsigned long)__init_end)
- return 1;
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ BUILD_BUG();
+ return -1UL;
+#else
+ unsigned long toc_ptr;
- return 0;
+ asm volatile("mr %0, 2" : "=r" (toc_ptr));
+ return toc_ptr;
+#endif
}
static inline int overlaps_interrupt_vector_text(unsigned long start,
@@ -39,30 +75,8 @@ static inline int overlaps_kernel_text(unsigned long start, unsigned long end)
(unsigned long)_stext < end;
}
-static inline int overlaps_kvm_tmp(unsigned long start, unsigned long end)
-{
-#ifdef CONFIG_KVM_GUEST
- extern char kvm_tmp[];
- return start < (unsigned long)kvm_tmp &&
- (unsigned long)&kvm_tmp[1024 * 1024] < end;
#else
- return 0;
-#endif
-}
-
-#if !defined(_CALL_ELF) || _CALL_ELF != 2
-#undef dereference_function_descriptor
-static inline void *dereference_function_descriptor(void *ptr)
-{
- struct ppc64_opd_entry *desc = ptr;
- void *p;
-
- if (!probe_kernel_address(&desc->funcaddr, p))
- ptr = p;
- return ptr;
-}
-#endif
-
+static inline unsigned long kernel_toc_addr(void) { BUILD_BUG(); return -1UL; }
#endif
#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/secure_boot.h b/arch/powerpc/include/asm/secure_boot.h
new file mode 100644
index 000000000000..a2ff556916c6
--- /dev/null
+++ b/arch/powerpc/include/asm/secure_boot.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Secure boot definitions
+ *
+ * Copyright (C) 2019 IBM Corporation
+ * Author: Nayna Jain
+ */
+#ifndef _ASM_POWER_SECURE_BOOT_H
+#define _ASM_POWER_SECURE_BOOT_H
+
+#ifdef CONFIG_PPC_SECURE_BOOT
+
+bool is_ppc_secureboot_enabled(void);
+bool is_ppc_trustedboot_enabled(void);
+
+#else
+
+static inline bool is_ppc_secureboot_enabled(void)
+{
+ return false;
+}
+
+static inline bool is_ppc_trustedboot_enabled(void)
+{
+ return false;
+}
+
+#endif
+#endif
diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h
new file mode 100644
index 000000000000..27574f218b37
--- /dev/null
+++ b/arch/powerpc/include/asm/security_features.h
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Security related feature bit definitions.
+ *
+ * Copyright 2018, Michael Ellerman, IBM Corporation.
+ */
+
+#ifndef _ASM_POWERPC_SECURITY_FEATURES_H
+#define _ASM_POWERPC_SECURITY_FEATURES_H
+
+
+extern u64 powerpc_security_features;
+extern bool rfi_flush;
+
+/* These are bit flags */
+enum stf_barrier_type {
+ STF_BARRIER_NONE = 0x1,
+ STF_BARRIER_FALLBACK = 0x2,
+ STF_BARRIER_EIEIO = 0x4,
+ STF_BARRIER_SYNC_ORI = 0x8,
+};
+
+void setup_stf_barrier(void);
+void do_stf_barrier_fixups(enum stf_barrier_type types);
+void setup_count_cache_flush(void);
+
+static inline void security_ftr_set(u64 feature)
+{
+ powerpc_security_features |= feature;
+}
+
+static inline void security_ftr_clear(u64 feature)
+{
+ powerpc_security_features &= ~feature;
+}
+
+static inline bool security_ftr_enabled(u64 feature)
+{
+ return !!(powerpc_security_features & feature);
+}
+
+#ifdef CONFIG_PPC_BOOK3S_64
+enum stf_barrier_type stf_barrier_type_get(void);
+#else
+static inline enum stf_barrier_type stf_barrier_type_get(void) { return STF_BARRIER_NONE; }
+#endif
+
+// Features indicating support for Spectre/Meltdown mitigations
+
+// The L1-D cache can be flushed with ori r30,r30,0
+#define SEC_FTR_L1D_FLUSH_ORI30 0x0000000000000001ull
+
+// The L1-D cache can be flushed with mtspr 882,r0 (aka SPRN_TRIG2)
+#define SEC_FTR_L1D_FLUSH_TRIG2 0x0000000000000002ull
+
+// ori r31,r31,0 acts as a speculation barrier
+#define SEC_FTR_SPEC_BAR_ORI31 0x0000000000000004ull
+
+// Speculation past bctr is disabled
+#define SEC_FTR_BCCTRL_SERIALISED 0x0000000000000008ull
+
+// Entries in L1-D are private to a SMT thread
+#define SEC_FTR_L1D_THREAD_PRIV 0x0000000000000010ull
+
+// Indirect branch prediction cache disabled
+#define SEC_FTR_COUNT_CACHE_DISABLED 0x0000000000000020ull
+
+// bcctr 2,0,0 triggers a hardware assisted count cache flush
+#define SEC_FTR_BCCTR_FLUSH_ASSIST 0x0000000000000800ull
+
+// bcctr 2,0,0 triggers a hardware assisted link stack flush
+#define SEC_FTR_BCCTR_LINK_FLUSH_ASSIST 0x0000000000002000ull
+
+// Features indicating need for Spectre/Meltdown mitigations
+
+// The L1-D cache should be flushed on MSR[HV] 1->0 transition (hypervisor to guest)
+#define SEC_FTR_L1D_FLUSH_HV 0x0000000000000040ull
+
+// The L1-D cache should be flushed on MSR[PR] 0->1 transition (kernel to userspace)
+#define SEC_FTR_L1D_FLUSH_PR 0x0000000000000080ull
+
+// A speculation barrier should be used for bounds checks (Spectre variant 1)
+#define SEC_FTR_BNDS_CHK_SPEC_BAR 0x0000000000000100ull
+
+// Firmware configuration indicates user favours security over performance
+#define SEC_FTR_FAVOUR_SECURITY 0x0000000000000200ull
+
+// Software required to flush count cache on context switch
+#define SEC_FTR_FLUSH_COUNT_CACHE 0x0000000000000400ull
+
+// Software required to flush link stack on context switch
+#define SEC_FTR_FLUSH_LINK_STACK 0x0000000000001000ull
+
+// The L1-D cache should be flushed when entering the kernel
+#define SEC_FTR_L1D_FLUSH_ENTRY 0x0000000000004000ull
+
+// The L1-D cache should be flushed after user accesses from the kernel
+#define SEC_FTR_L1D_FLUSH_UACCESS 0x0000000000008000ull
+
+// The STF flush should be executed on privilege state switch
+#define SEC_FTR_STF_BARRIER 0x0000000000010000ull
+
+// Features enabled by default
+#define SEC_FTR_DEFAULT \
+ (SEC_FTR_L1D_FLUSH_HV | \
+ SEC_FTR_L1D_FLUSH_PR | \
+ SEC_FTR_BNDS_CHK_SPEC_BAR | \
+ SEC_FTR_L1D_FLUSH_ENTRY | \
+ SEC_FTR_L1D_FLUSH_UACCESS | \
+ SEC_FTR_STF_BARRIER | \
+ SEC_FTR_FAVOUR_SECURITY)
+
+#endif /* _ASM_POWERPC_SECURITY_FEATURES_H */
diff --git a/arch/powerpc/include/asm/secvar.h b/arch/powerpc/include/asm/secvar.h
new file mode 100644
index 000000000000..4828e0ab7e3c
--- /dev/null
+++ b/arch/powerpc/include/asm/secvar.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 IBM Corporation
+ * Author: Nayna Jain
+ *
+ * PowerPC secure variable operations.
+ */
+#ifndef SECVAR_OPS_H
+#define SECVAR_OPS_H
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/sysfs.h>
+
+extern const struct secvar_operations *secvar_ops;
+
+struct secvar_operations {
+ int (*get)(const char *key, u64 key_len, u8 *data, u64 *data_size);
+ int (*get_next)(const char *key, u64 *key_len, u64 keybufsize);
+ int (*set)(const char *key, u64 key_len, u8 *data, u64 data_size);
+ ssize_t (*format)(char *buf, size_t bufsize);
+ int (*max_size)(u64 *max_size);
+ const struct attribute **config_attrs;
+
+ // NULL-terminated array of fixed variable names
+ // Only used if get_next() isn't provided
+ const char * const *var_names;
+};
+
+#ifdef CONFIG_PPC_SECURE_BOOT
+
+int set_secvar_ops(const struct secvar_operations *ops);
+
+#else
+
+static inline int set_secvar_ops(const struct secvar_operations *ops) { return 0; }
+
+#endif
+
+#endif
diff --git a/arch/powerpc/include/asm/serial.h b/arch/powerpc/include/asm/serial.h
index 3e8589b43cb2..cd6c18d0e66e 100644
--- a/arch/powerpc/include/asm/serial.h
+++ b/arch/powerpc/include/asm/serial.h
@@ -1,8 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#ifndef _ASM_POWERPC_SERIAL_H
#define _ASM_POWERPC_SERIAL_H
diff --git a/arch/powerpc/include/asm/set_memory.h b/arch/powerpc/include/asm/set_memory.h
new file mode 100644
index 000000000000..9c8d5747755d
--- /dev/null
+++ b/arch/powerpc/include/asm/set_memory.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_SET_MEMORY_H
+#define _ASM_POWERPC_SET_MEMORY_H
+
+#define SET_MEMORY_RO 0
+#define SET_MEMORY_RW 1
+#define SET_MEMORY_NX 2
+#define SET_MEMORY_X 3
+#define SET_MEMORY_NP 4 /* Set memory non present */
+#define SET_MEMORY_P 5 /* Set memory present */
+#define SET_MEMORY_ROX 6
+
+int change_memory_attr(unsigned long addr, int numpages, long action);
+
+static inline int __must_check set_memory_ro(unsigned long addr, int numpages)
+{
+ return change_memory_attr(addr, numpages, SET_MEMORY_RO);
+}
+
+static inline int __must_check set_memory_rw(unsigned long addr, int numpages)
+{
+ return change_memory_attr(addr, numpages, SET_MEMORY_RW);
+}
+
+static inline int __must_check set_memory_nx(unsigned long addr, int numpages)
+{
+ return change_memory_attr(addr, numpages, SET_MEMORY_NX);
+}
+
+static inline int __must_check set_memory_x(unsigned long addr, int numpages)
+{
+ return change_memory_attr(addr, numpages, SET_MEMORY_X);
+}
+
+static inline int __must_check set_memory_np(unsigned long addr, int numpages)
+{
+ return change_memory_attr(addr, numpages, SET_MEMORY_NP);
+}
+
+static inline int __must_check set_memory_p(unsigned long addr, int numpages)
+{
+ return change_memory_attr(addr, numpages, SET_MEMORY_P);
+}
+
+static inline int __must_check set_memory_rox(unsigned long addr, int numpages)
+{
+ return change_memory_attr(addr, numpages, SET_MEMORY_ROX);
+}
+#define set_memory_rox set_memory_rox
+
+#endif
diff --git a/arch/powerpc/include/asm/setjmp.h b/arch/powerpc/include/asm/setjmp.h
index 279d03a1eec6..f798e80e4106 100644
--- a/arch/powerpc/include/asm/setjmp.h
+++ b/arch/powerpc/include/asm/setjmp.h
@@ -1,18 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright © 2008 Michael Neuling IBM Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
#ifndef _ASM_POWERPC_SETJMP_H
#define _ASM_POWERPC_SETJMP_H
#define JMP_BUF_LEN 23
-extern long setjmp(long *);
-extern void longjmp(long *, long);
+typedef long jmp_buf[JMP_BUF_LEN];
+
+extern int setjmp(jmp_buf env) __attribute__((returns_twice));
+extern void longjmp(jmp_buf env, int val) __attribute__((noreturn));
#endif /* _ASM_POWERPC_SETJMP_H */
diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h
index 11ba86e17631..50a92b24628d 100644
--- a/arch/powerpc/include/asm/setup.h
+++ b/arch/powerpc/include/asm/setup.h
@@ -1,20 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_SETUP_H
#define _ASM_POWERPC_SETUP_H
#include <uapi/asm/setup.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
extern void ppc_printk_progress(char *s, unsigned short hex);
-extern unsigned int rtas_data;
-extern int mem_init_done; /* set on boot once kmalloc can be called */
-extern int init_bootmem_done; /* set once bootmem is available */
extern unsigned long long memory_limit;
-extern unsigned long klimit;
-extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask);
struct device_node;
-extern void note_scsi_host(struct device_node *, void *);
/* Used in very early kernel initialization. */
extern unsigned long reloc_offset(void);
@@ -24,11 +19,77 @@ extern void reloc_got2(unsigned long);
#define PTRRELOC(x) ((typeof(x)) add_reloc_offset((unsigned long)(x)))
void check_for_initrd(void);
-void do_init_bootmem(void);
+void mem_topology_setup(void);
+void initmem_init(void);
void setup_panic(void);
#define ARCH_PANIC_TIMEOUT 180
-#endif /* !__ASSEMBLY__ */
+#ifdef CONFIG_PPC_PSERIES
+extern bool pseries_reloc_on_exception(void);
+extern bool pseries_enable_reloc_on_exc(void);
+extern void pseries_disable_reloc_on_exc(void);
+extern void pseries_big_endian_exceptions(void);
+void __init pseries_little_endian_exceptions(void);
+#else
+static inline bool pseries_reloc_on_exception(void) { return false; }
+static inline bool pseries_enable_reloc_on_exc(void) { return false; }
+static inline void pseries_disable_reloc_on_exc(void) {}
+static inline void pseries_big_endian_exceptions(void) {}
+static inline void pseries_little_endian_exceptions(void) {}
+#endif /* CONFIG_PPC_PSERIES */
+
+void rfi_flush_enable(bool enable);
+
+/* These are bit flags */
+enum l1d_flush_type {
+ L1D_FLUSH_NONE = 0x1,
+ L1D_FLUSH_FALLBACK = 0x2,
+ L1D_FLUSH_ORI = 0x4,
+ L1D_FLUSH_MTTRIG = 0x8,
+};
+
+void setup_rfi_flush(enum l1d_flush_type, bool enable);
+void setup_entry_flush(bool enable);
+void setup_uaccess_flush(bool enable);
+void do_rfi_flush_fixups(enum l1d_flush_type types);
+#ifdef CONFIG_PPC_BARRIER_NOSPEC
+void __init setup_barrier_nospec(void);
+#else
+static inline void setup_barrier_nospec(void) { }
+#endif
+void do_uaccess_flush_fixups(enum l1d_flush_type types);
+void do_entry_flush_fixups(enum l1d_flush_type types);
+void do_barrier_nospec_fixups(bool enable);
+extern bool barrier_nospec_enabled;
+
+#ifdef CONFIG_PPC_BARRIER_NOSPEC
+void do_barrier_nospec_fixups_range(bool enable, void *start, void *end);
+#else
+static inline void do_barrier_nospec_fixups_range(bool enable, void *start, void *end) { }
+#endif
+
+#ifdef CONFIG_PPC_E500
+void __init setup_spectre_v2(void);
+#else
+static inline void setup_spectre_v2(void) {}
+#endif
+void __init do_btb_flush_fixups(void);
+
+#ifdef CONFIG_PPC32
+unsigned long __init early_init(unsigned long dt_ptr);
+void __init machine_init(u64 dt_ptr);
+#endif
+void __init early_setup(unsigned long dt_ptr);
+void early_setup_secondary(void);
+
+/* prom_init (OpenFirmware) */
+unsigned long __init prom_init(unsigned long r3, unsigned long r4,
+ unsigned long pp, unsigned long r6,
+ unsigned long r7, unsigned long kbase);
+
+extern struct seq_buf ppc_hw_desc;
+
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_POWERPC_SETUP_H */
diff --git a/arch/powerpc/include/asm/sfp-machine.h b/arch/powerpc/include/asm/sfp-machine.h
index d89beaba26ff..8b957aabb826 100644
--- a/arch/powerpc/include/asm/sfp-machine.h
+++ b/arch/powerpc/include/asm/sfp-machine.h
@@ -213,30 +213,18 @@
* respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow
* (i.e. carry out) is not stored anywhere, and is lost.
*/
-#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
do { \
if (__builtin_constant_p (bh) && (bh) == 0) \
- __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \
- : "=r" ((USItype)(sh)), \
- "=&r" ((USItype)(sl)) \
- : "%r" ((USItype)(ah)), \
- "%r" ((USItype)(al)), \
- "rI" ((USItype)(bl))); \
- else if (__builtin_constant_p (bh) && (bh) ==~(USItype) 0) \
- __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \
- : "=r" ((USItype)(sh)), \
- "=&r" ((USItype)(sl)) \
- : "%r" ((USItype)(ah)), \
- "%r" ((USItype)(al)), \
- "rI" ((USItype)(bl))); \
+ __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2" \
+ : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
+ else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \
+ __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2" \
+ : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
else \
- __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \
- : "=r" ((USItype)(sh)), \
- "=&r" ((USItype)(sl)) \
- : "%r" ((USItype)(ah)), \
- "r" ((USItype)(bh)), \
- "%r" ((USItype)(al)), \
- "rI" ((USItype)(bl))); \
+ __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3" \
+ : "=r" (sh), "=&r" (sl) \
+ : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \
} while (0)
/* sub_ddmmss is used in op-2.h and udivmodti4.c and should be equivalent to
@@ -248,44 +236,24 @@
* and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere,
* and is lost.
*/
-#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
do { \
if (__builtin_constant_p (ah) && (ah) == 0) \
- __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \
- : "=r" ((USItype)(sh)), \
- "=&r" ((USItype)(sl)) \
- : "r" ((USItype)(bh)), \
- "rI" ((USItype)(al)), \
- "r" ((USItype)(bl))); \
- else if (__builtin_constant_p (ah) && (ah) ==~(USItype) 0) \
- __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \
- : "=r" ((USItype)(sh)), \
- "=&r" ((USItype)(sl)) \
- : "r" ((USItype)(bh)), \
- "rI" ((USItype)(al)), \
- "r" ((USItype)(bl))); \
+ __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \
+ : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
+ else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0) \
+ __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \
+ : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
else if (__builtin_constant_p (bh) && (bh) == 0) \
- __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \
- : "=r" ((USItype)(sh)), \
- "=&r" ((USItype)(sl)) \
- : "r" ((USItype)(ah)), \
- "rI" ((USItype)(al)), \
- "r" ((USItype)(bl))); \
- else if (__builtin_constant_p (bh) && (bh) ==~(USItype) 0) \
- __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \
- : "=r" ((USItype)(sh)), \
- "=&r" ((USItype)(sl)) \
- : "r" ((USItype)(ah)), \
- "rI" ((USItype)(al)), \
- "r" ((USItype)(bl))); \
+ __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \
+ : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
+ else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \
+ __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2" \
+ : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
else \
- __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \
- : "=r" ((USItype)(sh)), \
- "=&r" ((USItype)(sl)) \
- : "r" ((USItype)(ah)), \
- "r" ((USItype)(bh)), \
- "rI" ((USItype)(al)), \
- "r" ((USItype)(bl))); \
+ __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2" \
+ : "=r" (sh), "=&r" (sl) \
+ : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \
} while (0)
/* asm fragments for mul and div */
@@ -294,13 +262,10 @@
* UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype
* word product in HIGH_PROD and LOW_PROD.
*/
-#define umul_ppmm(ph, pl, m0, m1) \
+#define umul_ppmm(ph, pl, m0, m1) \
do { \
USItype __m0 = (m0), __m1 = (m1); \
- __asm__ ("mulhwu %0,%1,%2" \
- : "=r" ((USItype)(ph)) \
- : "%r" (__m0), \
- "r" (__m1)); \
+ __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
(pl) = __m0 * __m1; \
} while (0)
@@ -312,9 +277,10 @@
* significant bit of DENOMINATOR must be 1, then the pre-processor symbol
* UDIV_NEEDS_NORMALIZATION is defined to 1.
*/
-#define udiv_qrnnd(q, r, n1, n0, d) \
+#define udiv_qrnnd(q, r, n1, n0, d) \
do { \
- UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \
+ UWtype __d1, __d0, __q1, __q0; \
+ UWtype __r1, __r0, __m; \
__d1 = __ll_highpart (d); \
__d0 = __ll_lowpart (d); \
\
@@ -325,7 +291,7 @@
if (__r1 < __m) \
{ \
__q1--, __r1 += (d); \
- if (__r1 >= (d)) /* we didn't get carry when adding to __r1 */ \
+ if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
if (__r1 < __m) \
__q1--, __r1 += (d); \
} \
diff --git a/arch/powerpc/include/asm/shmparam.h b/arch/powerpc/include/asm/shmparam.h
index 5cda42a6d39e..bc0968839565 100644
--- a/arch/powerpc/include/asm/shmparam.h
+++ b/arch/powerpc/include/asm/shmparam.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_SHMPARAM_H
#define _ASM_POWERPC_SHMPARAM_H
diff --git a/arch/powerpc/include/asm/signal.h b/arch/powerpc/include/asm/signal.h
index 9322c28aebd2..922d43700fb4 100644
--- a/arch/powerpc/include/asm/signal.h
+++ b/arch/powerpc/include/asm/signal.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_SIGNAL_H
#define _ASM_POWERPC_SIGNAL_H
@@ -5,6 +6,12 @@
#include <uapi/asm/signal.h>
#include <uapi/asm/ptrace.h>
-extern unsigned long get_tm_stackpointer(struct pt_regs *regs);
+struct pt_regs;
+void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags);
+
+unsigned long get_min_sigframe_size_32(void);
+unsigned long get_min_sigframe_size_64(void);
+unsigned long get_min_sigframe_size(void);
+unsigned long get_min_sigframe_size_compat(void);
#endif /* _ASM_POWERPC_SIGNAL_H */
diff --git a/arch/powerpc/include/asm/simple_spinlock.h b/arch/powerpc/include/asm/simple_spinlock.h
new file mode 100644
index 000000000000..4dd12dcb9ef8
--- /dev/null
+++ b/arch/powerpc/include/asm/simple_spinlock.h
@@ -0,0 +1,268 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_SIMPLE_SPINLOCK_H
+#define _ASM_POWERPC_SIMPLE_SPINLOCK_H
+
+/*
+ * Simple spin lock operations.
+ *
+ * Copyright (C) 2001-2004 Paul Mackerras <paulus@au.ibm.com>, IBM
+ * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
+ * Copyright (C) 2002 Dave Engebretsen <engebret@us.ibm.com>, IBM
+ * Rework to support virtual processors
+ *
+ * Type of int is used as a full 64b word is not necessary.
+ *
+ * (the type definitions are in asm/simple_spinlock_types.h)
+ */
+#include <linux/irqflags.h>
+#include <linux/kcsan-checks.h>
+#include <asm/paravirt.h>
+#include <asm/paca.h>
+#include <asm/synch.h>
+#include <asm/ppc-opcode.h>
+
+#ifdef CONFIG_PPC64
+/* use 0x800000yy when locked, where yy == CPU number */
+#ifdef __BIG_ENDIAN__
+#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token))
+#else
+#define LOCK_TOKEN (*(u32 *)(&get_paca()->paca_index))
+#endif
+#else
+#define LOCK_TOKEN 1
+#endif
+
+static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
+{
+ return lock.slock == 0;
+}
+
+static inline int arch_spin_is_locked(arch_spinlock_t *lock)
+{
+ return !arch_spin_value_unlocked(READ_ONCE(*lock));
+}
+
+/*
+ * This returns the old value in the lock, so we succeeded
+ * in getting the lock if the return value is 0.
+ */
+static inline unsigned long __arch_spin_trylock(arch_spinlock_t *lock)
+{
+ unsigned long tmp, token;
+ unsigned int eh = IS_ENABLED(CONFIG_PPC64);
+
+ token = LOCK_TOKEN;
+ __asm__ __volatile__(
+"1: lwarx %0,0,%2,%[eh]\n\
+ cmpwi 0,%0,0\n\
+ bne- 2f\n\
+ stwcx. %1,0,%2\n\
+ bne- 1b\n"
+ PPC_ACQUIRE_BARRIER
+"2:"
+ : "=&r" (tmp)
+ : "r" (token), "r" (&lock->slock), [eh] "n" (eh)
+ : "cr0", "memory");
+
+ return tmp;
+}
+
+static inline int arch_spin_trylock(arch_spinlock_t *lock)
+{
+ return __arch_spin_trylock(lock) == 0;
+}
+
+/*
+ * On a system with shared processors (that is, where a physical
+ * processor is multiplexed between several virtual processors),
+ * there is no point spinning on a lock if the holder of the lock
+ * isn't currently scheduled on a physical processor. Instead
+ * we detect this situation and ask the hypervisor to give the
+ * rest of our timeslice to the lock holder.
+ *
+ * So that we can tell which virtual processor is holding a lock,
+ * we put 0x80000000 | smp_processor_id() in the lock when it is
+ * held. Conveniently, we have a word in the paca that holds this
+ * value.
+ */
+
+#if defined(CONFIG_PPC_SPLPAR)
+/* We only yield to the hypervisor if we are in shared processor mode */
+void splpar_spin_yield(arch_spinlock_t *lock);
+void splpar_rw_yield(arch_rwlock_t *lock);
+#else /* SPLPAR */
+static inline void splpar_spin_yield(arch_spinlock_t *lock) {}
+static inline void splpar_rw_yield(arch_rwlock_t *lock) {}
+#endif
+
+static inline void spin_yield(arch_spinlock_t *lock)
+{
+ if (is_shared_processor())
+ splpar_spin_yield(lock);
+ else
+ barrier();
+}
+
+static inline void rw_yield(arch_rwlock_t *lock)
+{
+ if (is_shared_processor())
+ splpar_rw_yield(lock);
+ else
+ barrier();
+}
+
+static inline void arch_spin_lock(arch_spinlock_t *lock)
+{
+ while (1) {
+ if (likely(__arch_spin_trylock(lock) == 0))
+ break;
+ do {
+ HMT_low();
+ if (is_shared_processor())
+ splpar_spin_yield(lock);
+ } while (unlikely(lock->slock != 0));
+ HMT_medium();
+ }
+}
+
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
+{
+ kcsan_mb();
+ __asm__ __volatile__("# arch_spin_unlock\n\t"
+ PPC_RELEASE_BARRIER: : :"memory");
+ lock->slock = 0;
+}
+
+/*
+ * Read-write spinlocks, allowing multiple readers
+ * but only one writer.
+ *
+ * NOTE! it is quite common to have readers in interrupts
+ * but no interrupt writers. For those circumstances we
+ * can "mix" irq-safe locks - any writer needs to get a
+ * irq-safe write-lock, but readers can get non-irqsafe
+ * read-locks.
+ */
+
+#ifdef CONFIG_PPC64
+#define __DO_SIGN_EXTEND "extsw %0,%0\n"
+#define WRLOCK_TOKEN LOCK_TOKEN /* it's negative */
+#else
+#define __DO_SIGN_EXTEND
+#define WRLOCK_TOKEN (-1)
+#endif
+
+/*
+ * This returns the old value in the lock + 1,
+ * so we got a read lock if the return value is > 0.
+ */
+static inline long __arch_read_trylock(arch_rwlock_t *rw)
+{
+ long tmp;
+ unsigned int eh = IS_ENABLED(CONFIG_PPC64);
+
+ __asm__ __volatile__(
+"1: lwarx %0,0,%1,%[eh]\n"
+ __DO_SIGN_EXTEND
+" addic. %0,%0,1\n\
+ ble- 2f\n"
+" stwcx. %0,0,%1\n\
+ bne- 1b\n"
+ PPC_ACQUIRE_BARRIER
+"2:" : "=&r" (tmp)
+ : "r" (&rw->lock), [eh] "n" (eh)
+ : "cr0", "xer", "memory");
+
+ return tmp;
+}
+
+/*
+ * This returns the old value in the lock,
+ * so we got the write lock if the return value is 0.
+ */
+static inline long __arch_write_trylock(arch_rwlock_t *rw)
+{
+ long tmp, token;
+ unsigned int eh = IS_ENABLED(CONFIG_PPC64);
+
+ token = WRLOCK_TOKEN;
+ __asm__ __volatile__(
+"1: lwarx %0,0,%2,%[eh]\n\
+ cmpwi 0,%0,0\n\
+ bne- 2f\n"
+" stwcx. %1,0,%2\n\
+ bne- 1b\n"
+ PPC_ACQUIRE_BARRIER
+"2:" : "=&r" (tmp)
+ : "r" (token), "r" (&rw->lock), [eh] "n" (eh)
+ : "cr0", "memory");
+
+ return tmp;
+}
+
+static inline void arch_read_lock(arch_rwlock_t *rw)
+{
+ while (1) {
+ if (likely(__arch_read_trylock(rw) > 0))
+ break;
+ do {
+ HMT_low();
+ if (is_shared_processor())
+ splpar_rw_yield(rw);
+ } while (unlikely(rw->lock < 0));
+ HMT_medium();
+ }
+}
+
+static inline void arch_write_lock(arch_rwlock_t *rw)
+{
+ while (1) {
+ if (likely(__arch_write_trylock(rw) == 0))
+ break;
+ do {
+ HMT_low();
+ if (is_shared_processor())
+ splpar_rw_yield(rw);
+ } while (unlikely(rw->lock != 0));
+ HMT_medium();
+ }
+}
+
+static inline int arch_read_trylock(arch_rwlock_t *rw)
+{
+ return __arch_read_trylock(rw) > 0;
+}
+
+static inline int arch_write_trylock(arch_rwlock_t *rw)
+{
+ return __arch_write_trylock(rw) == 0;
+}
+
+static inline void arch_read_unlock(arch_rwlock_t *rw)
+{
+ long tmp;
+
+ __asm__ __volatile__(
+ "# read_unlock\n\t"
+ PPC_RELEASE_BARRIER
+"1: lwarx %0,0,%1\n\
+ addic %0,%0,-1\n"
+" stwcx. %0,0,%1\n\
+ bne- 1b"
+ : "=&r"(tmp)
+ : "r"(&rw->lock)
+ : "cr0", "xer", "memory");
+}
+
+static inline void arch_write_unlock(arch_rwlock_t *rw)
+{
+ __asm__ __volatile__("# write_unlock\n\t"
+ PPC_RELEASE_BARRIER: : :"memory");
+ rw->lock = 0;
+}
+
+#define arch_spin_relax(lock) spin_yield(lock)
+#define arch_read_relax(lock) rw_yield(lock)
+#define arch_write_relax(lock) rw_yield(lock)
+
+#endif /* _ASM_POWERPC_SIMPLE_SPINLOCK_H */
diff --git a/arch/powerpc/include/asm/simple_spinlock_types.h b/arch/powerpc/include/asm/simple_spinlock_types.h
new file mode 100644
index 000000000000..391fc19f7272
--- /dev/null
+++ b/arch/powerpc/include/asm/simple_spinlock_types.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_SIMPLE_SPINLOCK_TYPES_H
+#define _ASM_POWERPC_SIMPLE_SPINLOCK_TYPES_H
+
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
+# error "Please do not include this file directly."
+#endif
+
+typedef struct {
+ volatile unsigned int slock;
+} arch_spinlock_t;
+
+#define __ARCH_SPIN_LOCK_UNLOCKED { 0 }
+
+typedef struct {
+ volatile signed int lock;
+} arch_rwlock_t;
+
+#define __ARCH_RW_LOCK_UNLOCKED { 0 }
+
+#endif /* _ASM_POWERPC_SIMPLE_SPINLOCK_TYPES_H */
diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 5a6614a7f0b2..e41b9ea42122 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* smp.h: PowerPC-specific SMP code.
*
@@ -6,11 +7,6 @@
*
* Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
* Copyright (C) 1996-2001 Cort Dougan <cort@fsmlabs.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#ifndef _ASM_POWERPC_SMP_H
@@ -22,7 +18,7 @@
#include <linux/kernel.h>
#include <linux/irqreturn.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#ifdef CONFIG_PPC64
#include <asm/paca.h>
@@ -30,20 +26,30 @@
#include <asm/percpu.h>
extern int boot_cpuid;
+extern int boot_cpu_hwid; /* PPC64 only */
+extern int boot_core_hwid;
extern int spinning_secondaries;
+extern u32 *cpu_to_phys_id;
+extern bool coregroup_enabled;
-extern void cpu_die(void);
extern int cpu_to_chip_id(int cpu);
+extern int *chip_id_lookup_table;
+
+DECLARE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);
+DECLARE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
+DECLARE_PER_CPU(cpumask_var_t, thread_group_l3_cache_map);
#ifdef CONFIG_SMP
struct smp_ops_t {
void (*message_pass)(int cpu, int msg);
#ifdef CONFIG_PPC_SMP_MUXED_IPI
- void (*cause_ipi)(int cpu, unsigned long data);
+ void (*cause_ipi)(int cpu);
#endif
- int (*probe)(void);
+ int (*cause_nmi_ipi)(int cpu);
+ void (*probe)(void);
int (*kick_cpu)(int nr);
+ int (*prepare_cpu)(int nr);
void (*setup_cpu)(int nr);
void (*bringup_done)(void);
void (*take_timebase)(void);
@@ -51,23 +57,32 @@ struct smp_ops_t {
int (*cpu_disable)(void);
void (*cpu_die)(unsigned int nr);
int (*cpu_bootable)(unsigned int nr);
+#ifdef CONFIG_HOTPLUG_CPU
+ void (*cpu_offline_self)(void);
+#endif
};
+extern struct task_struct *secondary_current;
+
+void start_secondary(void *unused);
+extern int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us);
+extern int smp_send_safe_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us);
extern void smp_send_debugger_break(void);
-extern void start_secondary_resume(void);
+extern void __noreturn start_secondary_resume(void);
extern void smp_generic_give_timebase(void);
extern void smp_generic_take_timebase(void);
DECLARE_PER_CPU(unsigned int, cpu_pvr);
#ifdef CONFIG_HOTPLUG_CPU
-extern void migrate_irqs(void);
int generic_cpu_disable(void);
void generic_cpu_die(unsigned int cpu);
-void generic_mach_cpu_die(void);
void generic_set_cpu_dead(unsigned int cpu);
void generic_set_cpu_up(unsigned int cpu);
int generic_check_cpu_restart(unsigned int cpu);
+int is_cpu_dead(unsigned int cpu);
+#else
+#define generic_set_cpu_up(i) do { } while (0)
#endif
#ifdef CONFIG_PPC64
@@ -77,7 +92,7 @@ int generic_check_cpu_restart(unsigned int cpu);
/* 32-bit */
extern int smp_hw_index[];
-#define raw_smp_processor_id() (current_thread_info()->cpu)
+#define raw_smp_processor_id() (current_thread_info()->cpu)
#define hard_smp_processor_id() (smp_hw_index[smp_processor_id()])
static inline int get_hard_smp_processor_id(int cpu)
@@ -92,7 +107,9 @@ static inline void set_hard_smp_processor_id(int cpu, int phys)
#endif
DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map);
+DECLARE_PER_CPU(cpumask_var_t, cpu_l2_cache_map);
DECLARE_PER_CPU(cpumask_var_t, cpu_core_map);
+DECLARE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
static inline struct cpumask *cpu_sibling_mask(int cpu)
{
@@ -104,29 +121,65 @@ static inline struct cpumask *cpu_core_mask(int cpu)
return per_cpu(cpu_core_map, cpu);
}
+static inline struct cpumask *cpu_l2_cache_mask(int cpu)
+{
+ return per_cpu(cpu_l2_cache_map, cpu);
+}
+
+static inline struct cpumask *cpu_smallcore_mask(int cpu)
+{
+ return per_cpu(cpu_smallcore_map, cpu);
+}
+
extern int cpu_to_core_id(int cpu);
+extern bool has_big_cores;
+extern bool thread_group_shares_l2;
+extern bool thread_group_shares_l3;
+
+#define cpu_smt_mask cpu_smt_mask
+#ifdef CONFIG_SCHED_SMT
+static inline const struct cpumask *cpu_smt_mask(int cpu)
+{
+ if (has_big_cores)
+ return per_cpu(cpu_smallcore_map, cpu);
+
+ return per_cpu(cpu_sibling_map, cpu);
+}
+#endif /* CONFIG_SCHED_SMT */
+
/* Since OpenPIC has only 4 IPIs, we use slightly different message numbers.
*
* Make sure this matches openpic_request_IPIs in open_pic.c, or what shows up
* in /proc/interrupts will be wrong!!! --Troy */
-#define PPC_MSG_CALL_FUNCTION 0
-#define PPC_MSG_RESCHEDULE 1
+#define PPC_MSG_CALL_FUNCTION 0
+#define PPC_MSG_RESCHEDULE 1
#define PPC_MSG_TICK_BROADCAST 2
-#define PPC_MSG_DEBUGGER_BREAK 3
+#define PPC_MSG_NMI_IPI 3
+
+/* This is only used by the powernv kernel */
+#define PPC_MSG_RM_HOST_ACTION 4
+
+#define NMI_IPI_ALL_OTHERS -2
+
+#ifdef CONFIG_NMI_IPI
+extern int smp_handle_nmi_ipi(struct pt_regs *regs);
+#else
+static inline int smp_handle_nmi_ipi(struct pt_regs *regs) { return 0; }
+#endif
/* for irq controllers that have dedicated ipis per message (4) */
extern int smp_request_message_ipi(int virq, int message);
extern const char *smp_ipi_name[];
/* for irq controllers with only a single ipi */
-extern void smp_muxed_ipi_set_data(int cpu, unsigned long data);
extern void smp_muxed_ipi_message_pass(int cpu, int msg);
+extern void smp_muxed_ipi_set_message(int cpu, int msg);
extern irqreturn_t smp_ipi_demux(void);
+extern irqreturn_t smp_ipi_demux_relaxed(void);
void smp_init_pSeries(void);
void smp_init_cell(void);
-void smp_init_celleb(void);
void smp_setup_cpu_maps(void);
extern int __cpu_disable(void);
@@ -136,28 +189,34 @@ extern void __cpu_die(unsigned int cpu);
/* for UP */
#define hard_smp_processor_id() get_hard_smp_processor_id(0)
#define smp_setup_cpu_maps()
-static inline void inhibit_secondary_onlining(void) {}
-static inline void uninhibit_secondary_onlining(void) {}
+#define thread_group_shares_l2 0
+#define thread_group_shares_l3 0
static inline const struct cpumask *cpu_sibling_mask(int cpu)
{
return cpumask_of(cpu);
}
+static inline const struct cpumask *cpu_smallcore_mask(int cpu)
+{
+ return cpumask_of(cpu);
+}
+
+static inline const struct cpumask *cpu_l2_cache_mask(int cpu)
+{
+ return cpumask_of(cpu);
+}
#endif /* CONFIG_SMP */
#ifdef CONFIG_PPC64
static inline int get_hard_smp_processor_id(int cpu)
{
- return paca[cpu].hw_cpu_id;
+ return paca_ptrs[cpu]->hw_cpu_id;
}
static inline void set_hard_smp_processor_id(int cpu, int phys)
{
- paca[cpu].hw_cpu_id = phys;
+ paca_ptrs[cpu]->hw_cpu_id = phys;
}
-
-extern void smp_release_cpus(void);
-
#else
/* 32-bit */
#ifndef CONFIG_SMP
@@ -174,9 +233,15 @@ static inline void set_hard_smp_processor_id(int cpu, int phys)
#endif /* !CONFIG_SMP */
#endif /* !CONFIG_PPC64 */
+#if defined(CONFIG_PPC64) && (defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE))
+extern void smp_release_cpus(void);
+#else
+static inline void smp_release_cpus(void) { }
+#endif
+
extern int smt_enabled_at_boot;
-extern int smp_mpic_probe(void);
+extern void smp_mpic_probe(void);
extern void smp_mpic_setup_cpu(int cpu);
extern int smp_generic_kick_cpu(int nr);
extern int smp_generic_cpu_bootable(unsigned int nr);
@@ -195,13 +260,13 @@ extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
* 64-bit but defining them all here doesn't harm
*/
extern void generic_secondary_smp_init(void);
-extern void generic_secondary_thread_init(void);
extern unsigned long __secondary_hold_spinloop;
extern unsigned long __secondary_hold_acknowledge;
extern char __secondary_hold;
+extern unsigned int booting_thread_hwid;
extern void __early_start(void);
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_SMP_H) */
diff --git a/arch/powerpc/include/asm/smu.h b/arch/powerpc/include/asm/smu.h
index 6e909f3e6a46..2ac6ab903023 100644
--- a/arch/powerpc/include/asm/smu.h
+++ b/arch/powerpc/include/asm/smu.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _SMU_H
#define _SMU_H
@@ -107,7 +108,7 @@
/*
* i2c commands
*
- * To issue an i2c command, first is to send a parameter block to the
+ * To issue an i2c command, first is to send a parameter block to
* the SMU. This is a command of type 0x9a with 9 bytes of header
* eventually followed by data for a write:
*
@@ -154,7 +155,7 @@
*
* The Darwin I2C driver is less subtle though. On any non-success status
* from the response command, it waits 5ms and tries again up to 20 times,
- * it doesn't differenciate between fatal errors or "busy" status.
+ * it doesn't differentiate between fatal errors or "busy" status.
*
* This driver provides an asynchronous paramblock based i2c command
* interface to be used either directly by low level code or by a higher
@@ -185,7 +186,7 @@
* x = processor mask
* y = op. point index
* z = processor freq. step index
- * I haven't yet decyphered result codes
+ * I haven't yet deciphered result codes
*
*/
#define SMU_CMD_POWER_COMMAND 0xaa
@@ -455,7 +456,7 @@ extern void smu_poll(void);
/*
* Init routine, presence check....
*/
-extern int smu_init(void);
+int __init smu_init(void);
extern int smu_present(void);
struct platform_device;
extern struct platform_device *smu_get_ofdev(void);
@@ -471,14 +472,7 @@ extern int smu_get_rtc_time(struct rtc_time *time, int spinwait);
extern int smu_set_rtc_time(struct rtc_time *time, int spinwait);
/*
- * SMU command buffer absolute address, exported by pmac_setup,
- * this is allocated very early during boot.
- */
-extern unsigned long smu_cmdbuf_abs;
-
-
-/*
- * Kenrel asynchronous i2c interface
+ * Kernel asynchronous i2c interface
*/
#define SMU_I2C_READ_MAX 0x1d
diff --git a/arch/powerpc/include/asm/sparsemem.h b/arch/powerpc/include/asm/sparsemem.h
index f6fc0ee813d7..d072866842e4 100644
--- a/arch/powerpc/include/asm/sparsemem.h
+++ b/arch/powerpc/include/asm/sparsemem.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_SPARSEMEM_H
#define _ASM_POWERPC_SPARSEMEM_H 1
#ifdef __KERNEL__
@@ -5,19 +6,17 @@
#ifdef CONFIG_SPARSEMEM
/*
* SECTION_SIZE_BITS 2^N: how big each section will be
- * MAX_PHYSADDR_BITS 2^N: how much physical address space we have
* MAX_PHYSMEM_BITS 2^N: how much memory we can have in that space
*/
#define SECTION_SIZE_BITS 24
-#define MAX_PHYSADDR_BITS 46
-#define MAX_PHYSMEM_BITS 46
-
#endif /* CONFIG_SPARSEMEM */
#ifdef CONFIG_MEMORY_HOTPLUG
-extern int create_section_mapping(unsigned long start, unsigned long end);
extern int remove_section_mapping(unsigned long start, unsigned long end);
+extern int memory_add_physaddr_to_nid(u64 start);
+#define memory_add_physaddr_to_nid memory_add_physaddr_to_nid
+
#ifdef CONFIG_NUMA
extern int hot_add_scn_to_nid(unsigned long scn_addr);
#else
@@ -27,6 +26,5 @@ static inline int hot_add_scn_to_nid(unsigned long scn_addr)
}
#endif /* CONFIG_NUMA */
#endif /* CONFIG_MEMORY_HOTPLUG */
-
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_SPARSEMEM_H */
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index 4dbe072eecbe..7dafca8e3f02 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -1,314 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef __ASM_SPINLOCK_H
#define __ASM_SPINLOCK_H
#ifdef __KERNEL__
-/*
- * Simple spin lock operations.
- *
- * Copyright (C) 2001-2004 Paul Mackerras <paulus@au.ibm.com>, IBM
- * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
- * Copyright (C) 2002 Dave Engebretsen <engebret@us.ibm.com>, IBM
- * Rework to support virtual processors
- *
- * Type of int is used as a full 64b word is not necessary.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * (the type definitions are in asm/spinlock_types.h)
- */
-#include <linux/irqflags.h>
-#ifdef CONFIG_PPC64
-#include <asm/paca.h>
-#include <asm/hvcall.h>
-#endif
-#include <asm/asm-compat.h>
-#include <asm/synch.h>
-#include <asm/ppc-opcode.h>
-
-#define smp_mb__after_unlock_lock() smp_mb() /* Full ordering for lock. */
-
-#ifdef CONFIG_PPC64
-/* use 0x800000yy when locked, where yy == CPU number */
-#ifdef __BIG_ENDIAN__
-#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token))
+#ifdef CONFIG_PPC_QUEUED_SPINLOCKS
+#include <asm/qspinlock.h>
+#include <asm/qrwlock.h>
#else
-#define LOCK_TOKEN (*(u32 *)(&get_paca()->paca_index))
+#include <asm/simple_spinlock.h>
#endif
-#else
-#define LOCK_TOKEN 1
-#endif
-
-#if defined(CONFIG_PPC64) && defined(CONFIG_SMP)
-#define CLEAR_IO_SYNC (get_paca()->io_sync = 0)
-#define SYNC_IO do { \
- if (unlikely(get_paca()->io_sync)) { \
- mb(); \
- get_paca()->io_sync = 0; \
- } \
- } while (0)
-#else
-#define CLEAR_IO_SYNC
-#define SYNC_IO
-#endif
-
-static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
-{
- return lock.slock == 0;
-}
-
-static inline int arch_spin_is_locked(arch_spinlock_t *lock)
-{
- smp_mb();
- return !arch_spin_value_unlocked(*lock);
-}
-
-/*
- * This returns the old value in the lock, so we succeeded
- * in getting the lock if the return value is 0.
- */
-static inline unsigned long __arch_spin_trylock(arch_spinlock_t *lock)
-{
- unsigned long tmp, token;
- token = LOCK_TOKEN;
- __asm__ __volatile__(
-"1: " PPC_LWARX(%0,0,%2,1) "\n\
- cmpwi 0,%0,0\n\
- bne- 2f\n\
- stwcx. %1,0,%2\n\
- bne- 1b\n"
- PPC_ACQUIRE_BARRIER
-"2:"
- : "=&r" (tmp)
- : "r" (token), "r" (&lock->slock)
- : "cr0", "memory");
+/* See include/linux/spinlock.h */
+#define smp_mb__after_spinlock() smp_mb()
- return tmp;
-}
-
-static inline int arch_spin_trylock(arch_spinlock_t *lock)
-{
- CLEAR_IO_SYNC;
- return __arch_spin_trylock(lock) == 0;
-}
-
-/*
- * On a system with shared processors (that is, where a physical
- * processor is multiplexed between several virtual processors),
- * there is no point spinning on a lock if the holder of the lock
- * isn't currently scheduled on a physical processor. Instead
- * we detect this situation and ask the hypervisor to give the
- * rest of our timeslice to the lock holder.
- *
- * So that we can tell which virtual processor is holding a lock,
- * we put 0x80000000 | smp_processor_id() in the lock when it is
- * held. Conveniently, we have a word in the paca that holds this
- * value.
- */
-
-#if defined(CONFIG_PPC_SPLPAR)
-/* We only yield to the hypervisor if we are in shared processor mode */
-#define SHARED_PROCESSOR (lppaca_shared_proc(local_paca->lppaca_ptr))
-extern void __spin_yield(arch_spinlock_t *lock);
-extern void __rw_yield(arch_rwlock_t *lock);
-#else /* SPLPAR */
-#define __spin_yield(x) barrier()
-#define __rw_yield(x) barrier()
-#define SHARED_PROCESSOR 0
-#endif
-
-static inline void arch_spin_lock(arch_spinlock_t *lock)
-{
- CLEAR_IO_SYNC;
- while (1) {
- if (likely(__arch_spin_trylock(lock) == 0))
- break;
- do {
- HMT_low();
- if (SHARED_PROCESSOR)
- __spin_yield(lock);
- } while (unlikely(lock->slock != 0));
- HMT_medium();
- }
-}
-
-static inline
-void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
-{
- unsigned long flags_dis;
-
- CLEAR_IO_SYNC;
- while (1) {
- if (likely(__arch_spin_trylock(lock) == 0))
- break;
- local_save_flags(flags_dis);
- local_irq_restore(flags);
- do {
- HMT_low();
- if (SHARED_PROCESSOR)
- __spin_yield(lock);
- } while (unlikely(lock->slock != 0));
- HMT_medium();
- local_irq_restore(flags_dis);
- }
-}
-
-static inline void arch_spin_unlock(arch_spinlock_t *lock)
-{
- SYNC_IO;
- __asm__ __volatile__("# arch_spin_unlock\n\t"
- PPC_RELEASE_BARRIER: : :"memory");
- lock->slock = 0;
-}
-
-#ifdef CONFIG_PPC64
-extern void arch_spin_unlock_wait(arch_spinlock_t *lock);
-#else
-#define arch_spin_unlock_wait(lock) \
- do { while (arch_spin_is_locked(lock)) cpu_relax(); } while (0)
+#ifndef CONFIG_PPC_QUEUED_SPINLOCKS
+static inline void pv_spinlocks_init(void) { }
#endif
-/*
- * Read-write spinlocks, allowing multiple readers
- * but only one writer.
- *
- * NOTE! it is quite common to have readers in interrupts
- * but no interrupt writers. For those circumstances we
- * can "mix" irq-safe locks - any writer needs to get a
- * irq-safe write-lock, but readers can get non-irqsafe
- * read-locks.
- */
-
-#define arch_read_can_lock(rw) ((rw)->lock >= 0)
-#define arch_write_can_lock(rw) (!(rw)->lock)
-
-#ifdef CONFIG_PPC64
-#define __DO_SIGN_EXTEND "extsw %0,%0\n"
-#define WRLOCK_TOKEN LOCK_TOKEN /* it's negative */
-#else
-#define __DO_SIGN_EXTEND
-#define WRLOCK_TOKEN (-1)
-#endif
-
-/*
- * This returns the old value in the lock + 1,
- * so we got a read lock if the return value is > 0.
- */
-static inline long __arch_read_trylock(arch_rwlock_t *rw)
-{
- long tmp;
-
- __asm__ __volatile__(
-"1: " PPC_LWARX(%0,0,%1,1) "\n"
- __DO_SIGN_EXTEND
-" addic. %0,%0,1\n\
- ble- 2f\n"
- PPC405_ERR77(0,%1)
-" stwcx. %0,0,%1\n\
- bne- 1b\n"
- PPC_ACQUIRE_BARRIER
-"2:" : "=&r" (tmp)
- : "r" (&rw->lock)
- : "cr0", "xer", "memory");
-
- return tmp;
-}
-
-/*
- * This returns the old value in the lock,
- * so we got the write lock if the return value is 0.
- */
-static inline long __arch_write_trylock(arch_rwlock_t *rw)
-{
- long tmp, token;
-
- token = WRLOCK_TOKEN;
- __asm__ __volatile__(
-"1: " PPC_LWARX(%0,0,%2,1) "\n\
- cmpwi 0,%0,0\n\
- bne- 2f\n"
- PPC405_ERR77(0,%1)
-" stwcx. %1,0,%2\n\
- bne- 1b\n"
- PPC_ACQUIRE_BARRIER
-"2:" : "=&r" (tmp)
- : "r" (token), "r" (&rw->lock)
- : "cr0", "memory");
-
- return tmp;
-}
-
-static inline void arch_read_lock(arch_rwlock_t *rw)
-{
- while (1) {
- if (likely(__arch_read_trylock(rw) > 0))
- break;
- do {
- HMT_low();
- if (SHARED_PROCESSOR)
- __rw_yield(rw);
- } while (unlikely(rw->lock < 0));
- HMT_medium();
- }
-}
-
-static inline void arch_write_lock(arch_rwlock_t *rw)
-{
- while (1) {
- if (likely(__arch_write_trylock(rw) == 0))
- break;
- do {
- HMT_low();
- if (SHARED_PROCESSOR)
- __rw_yield(rw);
- } while (unlikely(rw->lock != 0));
- HMT_medium();
- }
-}
-
-static inline int arch_read_trylock(arch_rwlock_t *rw)
-{
- return __arch_read_trylock(rw) > 0;
-}
-
-static inline int arch_write_trylock(arch_rwlock_t *rw)
-{
- return __arch_write_trylock(rw) == 0;
-}
-
-static inline void arch_read_unlock(arch_rwlock_t *rw)
-{
- long tmp;
-
- __asm__ __volatile__(
- "# read_unlock\n\t"
- PPC_RELEASE_BARRIER
-"1: lwarx %0,0,%1\n\
- addic %0,%0,-1\n"
- PPC405_ERR77(0,%1)
-" stwcx. %0,0,%1\n\
- bne- 1b"
- : "=&r"(tmp)
- : "r"(&rw->lock)
- : "cr0", "xer", "memory");
-}
-
-static inline void arch_write_unlock(arch_rwlock_t *rw)
-{
- __asm__ __volatile__("# write_unlock\n\t"
- PPC_RELEASE_BARRIER: : :"memory");
- rw->lock = 0;
-}
-
-#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
-#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
-
-#define arch_spin_relax(lock) __spin_yield(lock)
-#define arch_read_relax(lock) __rw_yield(lock)
-#define arch_write_relax(lock) __rw_yield(lock)
-
#endif /* __KERNEL__ */
#endif /* __ASM_SPINLOCK_H */
diff --git a/arch/powerpc/include/asm/spinlock_types.h b/arch/powerpc/include/asm/spinlock_types.h
index 2351adc4fdc4..569765fa16bc 100644
--- a/arch/powerpc/include/asm/spinlock_types.h
+++ b/arch/powerpc/include/asm/spinlock_types.h
@@ -1,20 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_SPINLOCK_TYPES_H
#define _ASM_POWERPC_SPINLOCK_TYPES_H
-#ifndef __LINUX_SPINLOCK_TYPES_H
-# error "please don't include this file directly"
+#ifndef __LINUX_SPINLOCK_TYPES_RAW_H
+# error "Please do not include this file directly."
#endif
-typedef struct {
- volatile unsigned int slock;
-} arch_spinlock_t;
-
-#define __ARCH_SPIN_LOCK_UNLOCKED { 0 }
-
-typedef struct {
- volatile signed int lock;
-} arch_rwlock_t;
-
-#define __ARCH_RW_LOCK_UNLOCKED { 0 }
+#ifdef CONFIG_PPC_QUEUED_SPINLOCKS
+#include <asm/qspinlock_types.h>
+#include <asm-generic/qrwlock_types.h>
+#else
+#include <asm/simple_spinlock_types.h>
+#endif
#endif
diff --git a/arch/powerpc/include/asm/spu.h b/arch/powerpc/include/asm/spu.h
index 37b7ca39ec9f..96ad4510c895 100644
--- a/arch/powerpc/include/asm/spu.h
+++ b/arch/powerpc/include/asm/spu.h
@@ -1,23 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* SPU core / file system interface and HW structures
*
* (C) Copyright IBM Deutschland Entwicklung GmbH 2005
*
* Author: Arnd Bergmann <arndb@de.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#ifndef _SPU_H
@@ -27,6 +14,8 @@
#include <linux/workqueue.h>
#include <linux/device.h>
#include <linux/mutex.h>
+#include <asm/reg.h>
+#include <asm/copro.h>
#define LS_SIZE (256 * 1024)
#define LS_ADDR_MASK (LS_SIZE - 1)
@@ -212,20 +201,6 @@ int spu_64k_pages_available(void);
struct mm_struct;
extern void spu_flush_all_slbs(struct mm_struct *mm);
-/* This interface allows a profiler (e.g., OProfile) to store a ref
- * to spu context information that it creates. This caching technique
- * avoids the need to recreate this information after a save/restore operation.
- *
- * Assumes the caller has already incremented the ref count to
- * profile_info; then spu_context_destroy must call kref_put
- * on prof_info_kref.
- */
-void spu_set_profile_private_kref(struct spu_context *ctx,
- struct kref *prof_info_kref,
- void ( * prof_info_release) (struct kref *kref));
-
-void *spu_get_profile_private_kref(struct spu_context *ctx);
-
/* system callbacks from the SPU */
struct spu_syscall_block {
u64 nr_ret;
@@ -274,30 +249,8 @@ void unregister_spu_syscalls(struct spufs_calls *calls);
int spu_add_dev_attr(struct device_attribute *attr);
void spu_remove_dev_attr(struct device_attribute *attr);
-int spu_add_dev_attr_group(struct attribute_group *attrs);
-void spu_remove_dev_attr_group(struct attribute_group *attrs);
-
-int spu_handle_mm_fault(struct mm_struct *mm, unsigned long ea,
- unsigned long dsisr, unsigned *flt);
-
-/*
- * Notifier blocks:
- *
- * oprofile can get notified when a context switch is performed
- * on an spe. The notifer function that gets called is passed
- * a pointer to the SPU structure as well as the object-id that
- * identifies the binary running on that SPU now.
- *
- * For a context save, the object-id that is passed is zero,
- * identifying that the kernel will run from that moment on.
- *
- * For a context restore, the object-id is the value written
- * to object-id spufs file from user space and the notifer
- * function can assume that spu->ctx is valid.
- */
-struct notifier_block;
-int spu_switch_event_register(struct notifier_block * n);
-int spu_switch_event_unregister(struct notifier_block * n);
+int spu_add_dev_attr_group(const struct attribute_group *attrs);
+void spu_remove_dev_attr_group(const struct attribute_group *attrs);
extern void notify_spus_active(void);
extern void do_notify_spus_active(void);
diff --git a/arch/powerpc/include/asm/spu_csa.h b/arch/powerpc/include/asm/spu_csa.h
index a40fd491250c..1b3271a03392 100644
--- a/arch/powerpc/include/asm/spu_csa.h
+++ b/arch/powerpc/include/asm/spu_csa.h
@@ -1,23 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* spu_csa.h: Definitions for SPU context save area (CSA).
*
* (C) Copyright IBM 2005
*
* Author: Mark Nutter <mnutter@us.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#ifndef _SPU_CSA_H_
@@ -56,7 +43,7 @@
#define SPU_DECR_STATUS_RUNNING 0x1
#define SPU_DECR_STATUS_WRAPPED 0x2
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/**
* spu_reg128 - generic 128-bit register definition.
*/
@@ -241,12 +228,6 @@ struct spu_priv2_collapsed {
*/
struct spu_state {
struct spu_lscsa *lscsa;
-#ifdef CONFIG_SPU_FS_64K_LS
- int use_big_pages;
- /* One struct page per 64k page */
-#define SPU_LSCSA_NUM_BIG_PAGES (sizeof(struct spu_lscsa) / 0x10000)
- struct page *lscsa_pages[SPU_LSCSA_NUM_BIG_PAGES];
-#endif
struct spu_problem_collapsed prob;
struct spu_priv1_collapsed priv1;
struct spu_priv2_collapsed priv2;
@@ -262,5 +243,5 @@ struct spu_state {
#endif /* !__SPU__ */
#endif /* __KERNEL__ */
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _SPU_CSA_H_ */
diff --git a/arch/powerpc/include/asm/spu_info.h b/arch/powerpc/include/asm/spu_info.h
index 7146b78e40f1..732431034a63 100644
--- a/arch/powerpc/include/asm/spu_info.h
+++ b/arch/powerpc/include/asm/spu_info.h
@@ -1,23 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* SPU info structures
*
* (C) Copyright 2006 IBM Corp.
*
* Author: Dwayne Grant McConnell <decimal@us.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#ifndef _SPU_INFO_H
#define _SPU_INFO_H
diff --git a/arch/powerpc/include/asm/spu_priv1.h b/arch/powerpc/include/asm/spu_priv1.h
index d8f5c60f61c1..66b111fa1cd1 100644
--- a/arch/powerpc/include/asm/spu_priv1.h
+++ b/arch/powerpc/include/asm/spu_priv1.h
@@ -1,20 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Defines an spu hypervisor abstraction layer.
*
* Copyright 2006 Sony Corp.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#if !defined(_SPU_PRIV1_H)
@@ -227,9 +215,6 @@ spu_disable_spu (struct spu_context *ctx)
* and only intended to be used by the platform setup code.
*/
-extern const struct spu_priv1_ops spu_priv1_mmio_ops;
-extern const struct spu_priv1_ops spu_priv1_beat_ops;
-
extern const struct spu_management_ops spu_management_of_ops;
#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h
index f593b0f9b627..e3d0e714ff28 100644
--- a/arch/powerpc/include/asm/sstep.h
+++ b/arch/powerpc/include/asm/sstep.h
@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (C) 2004 Paul Mackerras <paulus@au.ibm.com>, IBM
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
+#include <asm/inst.h>
struct pt_regs;
@@ -16,12 +13,164 @@ struct pt_regs;
* we don't allow putting a breakpoint on an mtmsrd instruction.
* Similarly we don't allow breakpoints on rfid instructions.
* These macros tell us if an instruction is a mtmsrd or rfid.
- * Note that IS_MTMSRD returns true for both an mtmsr (32-bit)
- * and an mtmsrd (64-bit).
+ * Note that these return true for both mtmsr/rfi (32-bit)
+ * and mtmsrd/rfid (64-bit).
+ */
+#define IS_MTMSRD(instr) ((ppc_inst_val(instr) & 0xfc0007be) == 0x7c000124)
+#define IS_RFID(instr) ((ppc_inst_val(instr) & 0xfc0007be) == 0x4c000024)
+
+enum instruction_type {
+ COMPUTE, /* arith/logical/CR op, etc. */
+ LOAD, /* load and store types need to be contiguous */
+ LOAD_MULTI,
+ LOAD_FP,
+ LOAD_VMX,
+ LOAD_VSX,
+ STORE,
+ STORE_MULTI,
+ STORE_FP,
+ STORE_VMX,
+ STORE_VSX,
+ LARX,
+ STCX,
+ BRANCH,
+ MFSPR,
+ MTSPR,
+ CACHEOP,
+ BARRIER,
+ SYSCALL,
+ SYSCALL_VECTORED_0,
+ MFMSR,
+ MTMSR,
+ RFI,
+ INTERRUPT,
+ UNKNOWN
+};
+
+#define INSTR_TYPE_MASK 0x1f
+
+#define OP_IS_LOAD(type) ((LOAD <= (type) && (type) <= LOAD_VSX) || (type) == LARX)
+#define OP_IS_STORE(type) ((STORE <= (type) && (type) <= STORE_VSX) || (type) == STCX)
+#define OP_IS_LOAD_STORE(type) (LOAD <= (type) && (type) <= STCX)
+
+/* Compute flags, ORed in with type */
+#define SETREG 0x20
+#define SETCC 0x40
+#define SETXER 0x80
+
+/* Branch flags, ORed in with type */
+#define SETLK 0x20
+#define BRTAKEN 0x40
+#define DECCTR 0x80
+
+/* Load/store flags, ORed in with type */
+#define SIGNEXT 0x20
+#define UPDATE 0x40 /* matches bit in opcode 31 instructions */
+#define BYTEREV 0x80
+#define FPCONV 0x100
+
+/* Barrier type field, ORed in with type */
+#define BARRIER_MASK 0xe0
+#define BARRIER_SYNC 0x00
+#define BARRIER_ISYNC 0x20
+#define BARRIER_EIEIO 0x40
+#define BARRIER_LWSYNC 0x60
+#define BARRIER_PTESYNC 0x80
+
+/* Cacheop values, ORed in with type */
+#define CACHEOP_MASK 0x700
+#define DCBST 0
+#define DCBF 0x100
+#define DCBTST 0x200
+#define DCBT 0x300
+#define ICBI 0x400
+#define DCBZ 0x500
+
+/* VSX flags values */
+#define VSX_FPCONV 1 /* do floating point SP/DP conversion */
+#define VSX_SPLAT 2 /* store loaded value into all elements */
+#define VSX_LDLEFT 4 /* load VSX register from left */
+#define VSX_CHECK_VEC 8 /* check MSR_VEC not MSR_VSX for reg >= 32 */
+
+/* Prefixed flag, ORed in with type */
+#define PREFIXED 0x800
+
+/* Size field in type word */
+#define SIZE(n) ((n) << 12)
+#define GETSIZE(w) ((w) >> 12)
+
+#define GETTYPE(t) ((t) & INSTR_TYPE_MASK)
+#define GETLENGTH(t) (((t) & PREFIXED) ? 8 : 4)
+
+#define MKOP(t, f, s) ((t) | (f) | SIZE(s))
+
+/* Prefix instruction operands */
+#define GET_PREFIX_RA(i) (((i) >> 16) & 0x1f)
+#define GET_PREFIX_R(i) ((i) & (1ul << 20))
+
+extern s32 patch__exec_instr;
+
+struct instruction_op {
+ int type;
+ int reg;
+ unsigned long val;
+ /* For LOAD/STORE/LARX/STCX */
+ unsigned long ea;
+ int update_reg;
+ /* For MFSPR */
+ int spr;
+ u32 ccval;
+ u32 xerval;
+ u8 element_size; /* for VSX/VMX loads/stores */
+ u8 vsx_flags;
+};
+
+union vsx_reg {
+ u8 b[16];
+ u16 h[8];
+ u32 w[4];
+ unsigned long d[2];
+ float fp[4];
+ double dp[2];
+ __vector128 v;
+};
+
+/*
+ * Decode an instruction, and return information about it in *op
+ * without changing *regs.
+ *
+ * Return value is 1 if the instruction can be emulated just by
+ * updating *regs with the information in *op, -1 if we need the
+ * GPRs but *regs doesn't contain the full register set, or 0
+ * otherwise.
+ */
+extern int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
+ ppc_inst_t instr);
+
+/*
+ * Emulate an instruction that can be executed just by updating
+ * fields in *regs.
+ */
+void emulate_update_regs(struct pt_regs *reg, struct instruction_op *op);
+
+/*
+ * Emulate instructions that cause a transfer of control,
+ * arithmetic/logical instructions, loads and stores,
+ * cache operations and barriers.
+ *
+ * Returns 1 if the instruction was emulated successfully,
+ * 0 if it could not be emulated, or -1 for an instruction that
+ * should not be emulated (rfid, mtmsrd clearing MSR_RI, etc.).
+ */
+int emulate_step(struct pt_regs *regs, ppc_inst_t instr);
+
+/*
+ * Emulate a load or store instruction by reading/writing the
+ * memory of the current process. FP/VMX/VSX registers are assumed
+ * to hold live values if the appropriate enable bit in regs->msr is
+ * set; otherwise this will use the saved values in the thread struct
+ * for user-mode accesses.
*/
-#define IS_MTMSRD(instr) (((instr) & 0xfc0007be) == 0x7c000124)
-#define IS_RFID(instr) (((instr) & 0xfc0007fe) == 0x4c000024)
-#define IS_RFI(instr) (((instr) & 0xfc0007fe) == 0x4c000064)
+extern int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op);
-/* Emulate instructions that cause a transfer of control. */
-extern int emulate_step(struct pt_regs *regs, unsigned int instr);
+extern int emulate_dcbz(unsigned long ea, struct pt_regs *regs);
diff --git a/arch/powerpc/include/asm/stackprotector.h b/arch/powerpc/include/asm/stackprotector.h
new file mode 100644
index 000000000000..283c34647856
--- /dev/null
+++ b/arch/powerpc/include/asm/stackprotector.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * GCC stack protector support.
+ *
+ */
+
+#ifndef _ASM_STACKPROTECTOR_H
+#define _ASM_STACKPROTECTOR_H
+
+#include <asm/reg.h>
+#include <asm/current.h>
+#include <asm/paca.h>
+
+/*
+ * Initialize the stackprotector canary value.
+ *
+ * NOTE: this must only be called from functions that never return,
+ * and it must always be inlined.
+ */
+static __always_inline void boot_init_stack_canary(void)
+{
+ unsigned long canary = get_random_canary();
+
+ current->stack_canary = canary;
+#ifdef CONFIG_PPC64
+ get_paca()->canary = canary;
+#endif
+}
+
+#endif /* _ASM_STACKPROTECTOR_H */
diff --git a/arch/powerpc/include/asm/stacktrace.h b/arch/powerpc/include/asm/stacktrace.h
new file mode 100644
index 000000000000..6149b53b3bc8
--- /dev/null
+++ b/arch/powerpc/include/asm/stacktrace.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Stack trace functions.
+ *
+ * Copyright 2018, Murilo Opsfelder Araujo, IBM Corporation.
+ */
+
+#ifndef _ASM_POWERPC_STACKTRACE_H
+#define _ASM_POWERPC_STACKTRACE_H
+
+void show_user_instructions(struct pt_regs *regs);
+
+#endif /* _ASM_POWERPC_STACKTRACE_H */
diff --git a/arch/powerpc/include/asm/static_call.h b/arch/powerpc/include/asm/static_call.h
new file mode 100644
index 000000000000..e3d5d3823dac
--- /dev/null
+++ b/arch/powerpc/include/asm/static_call.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_STATIC_CALL_H
+#define _ASM_POWERPC_STATIC_CALL_H
+
+#define __PPC_SCT(name, inst) \
+ asm(".pushsection .text, \"ax\" \n" \
+ ".align 5 \n" \
+ ".globl " STATIC_CALL_TRAMP_STR(name) " \n" \
+ STATIC_CALL_TRAMP_STR(name) ": \n" \
+ inst " \n" \
+ " lis 12,2f@ha \n" \
+ " lwz 12,2f@l(12) \n" \
+ " mtctr 12 \n" \
+ " bctr \n" \
+ "1: li 3, 0 \n" \
+ " blr \n" \
+ "2: .long 0 \n" \
+ ".type " STATIC_CALL_TRAMP_STR(name) ", @function \n" \
+ ".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \
+ ".popsection \n")
+
+#define PPC_SCT_RET0 20 /* Offset of label 1 */
+#define PPC_SCT_DATA 28 /* Offset of label 2 */
+
+#define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) __PPC_SCT(name, "b " #func)
+#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) __PPC_SCT(name, "blr")
+#define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) __PPC_SCT(name, "b .+20")
+
+#define CALL_INSN_SIZE 4
+
+#endif /* _ASM_POWERPC_STATIC_CALL_H */
diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h
index e40010abcaf1..60ba22770f51 100644
--- a/arch/powerpc/include/asm/string.h
+++ b/arch/powerpc/include/asm/string.h
@@ -1,19 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_STRING_H
#define _ASM_POWERPC_STRING_H
#ifdef __KERNEL__
-#define __HAVE_ARCH_STRCPY
+#ifndef CONFIG_KASAN
#define __HAVE_ARCH_STRNCPY
-#define __HAVE_ARCH_STRLEN
-#define __HAVE_ARCH_STRCMP
#define __HAVE_ARCH_STRNCMP
-#define __HAVE_ARCH_STRCAT
+#define __HAVE_ARCH_MEMCHR
+#define __HAVE_ARCH_MEMCMP
+#define __HAVE_ARCH_MEMSET16
+#endif
+
#define __HAVE_ARCH_MEMSET
#define __HAVE_ARCH_MEMCPY
#define __HAVE_ARCH_MEMMOVE
-#define __HAVE_ARCH_MEMCMP
-#define __HAVE_ARCH_MEMCHR
+#define __HAVE_ARCH_MEMCPY_FLUSHCACHE
extern char * strcpy(char *,const char *);
extern char * strncpy(char *,const char *, __kernel_size_t);
@@ -26,7 +28,65 @@ extern void * memcpy(void *,const void *,__kernel_size_t);
extern void * memmove(void *,const void *,__kernel_size_t);
extern int memcmp(const void *,const void *,__kernel_size_t);
extern void * memchr(const void *,int,__kernel_size_t);
+void memcpy_flushcache(void *dest, const void *src, size_t size);
+
+#ifdef CONFIG_KASAN
+/* __mem variants are used by KASAN to implement instrumented meminstrinsics. */
+#ifdef CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX
+#define __memset memset
+#define __memcpy memcpy
+#define __memmove memmove
+#else /* CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX */
+void *__memset(void *s, int c, __kernel_size_t count);
+void *__memcpy(void *to, const void *from, __kernel_size_t n);
+void *__memmove(void *to, const void *from, __kernel_size_t n);
+#ifndef __SANITIZE_ADDRESS__
+/*
+ * For files that are not instrumented (e.g. mm/slub.c) we
+ * should use not instrumented version of mem* functions.
+ */
+#define memcpy(dst, src, len) __memcpy(dst, src, len)
+#define memmove(dst, src, len) __memmove(dst, src, len)
+#define memset(s, c, n) __memset(s, c, n)
+
+#ifndef __NO_FORTIFY
+#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */
+#endif
+#endif /* !__SANITIZE_ADDRESS__ */
+#endif /* CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX */
+#endif /* CONFIG_KASAN */
+
+#ifdef CONFIG_PPC64
+#ifndef CONFIG_KASAN
+#define __HAVE_ARCH_MEMSET32
+#define __HAVE_ARCH_MEMSET64
+
+extern void *__memset16(uint16_t *, uint16_t v, __kernel_size_t);
+extern void *__memset32(uint32_t *, uint32_t v, __kernel_size_t);
+extern void *__memset64(uint64_t *, uint64_t v, __kernel_size_t);
+
+static inline void *memset16(uint16_t *p, uint16_t v, __kernel_size_t n)
+{
+ return __memset16(p, v, n * 2);
+}
+
+static inline void *memset32(uint32_t *p, uint32_t v, __kernel_size_t n)
+{
+ return __memset32(p, v, n * 4);
+}
+
+static inline void *memset64(uint64_t *p, uint64_t v, __kernel_size_t n)
+{
+ return __memset64(p, v, n * 8);
+}
+#endif
+#else
+#ifndef CONFIG_KASAN
+#define __HAVE_ARCH_STRLEN
+#endif
+extern void *memset16(uint16_t *, uint16_t, __kernel_size_t);
+#endif
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_STRING_H */
diff --git a/arch/powerpc/include/asm/svm.h b/arch/powerpc/include/asm/svm.h
new file mode 100644
index 000000000000..a02bd54b8948
--- /dev/null
+++ b/arch/powerpc/include/asm/svm.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * SVM helper functions
+ *
+ * Copyright 2018 Anshuman Khandual, IBM Corporation.
+ */
+
+#ifndef _ASM_POWERPC_SVM_H
+#define _ASM_POWERPC_SVM_H
+
+#ifdef CONFIG_PPC_SVM
+
+#include <asm/reg.h>
+
+static inline bool is_secure_guest(void)
+{
+ return mfmsr() & MSR_S;
+}
+
+void dtl_cache_ctor(void *addr);
+#define get_dtl_cache_ctor() (is_secure_guest() ? dtl_cache_ctor : NULL)
+
+#else /* CONFIG_PPC_SVM */
+
+static inline bool is_secure_guest(void)
+{
+ return false;
+}
+
+#define get_dtl_cache_ctor() NULL
+
+#endif /* CONFIG_PPC_SVM */
+#endif /* _ASM_POWERPC_SVM_H */
diff --git a/arch/powerpc/include/asm/swab.h b/arch/powerpc/include/asm/swab.h
index 96f59de61855..f4cfdc1246d3 100644
--- a/arch/powerpc/include/asm/swab.h
+++ b/arch/powerpc/include/asm/swab.h
@@ -1,38 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#ifndef _ASM_POWERPC_SWAB_H
#define _ASM_POWERPC_SWAB_H
#include <uapi/asm/swab.h>
-static __inline__ __u16 ld_le16(const volatile __u16 *addr)
-{
- __u16 val;
-
- __asm__ __volatile__ ("lhbrx %0,0,%1" : "=r" (val) : "r" (addr), "m" (*addr));
- return val;
-}
-
-static __inline__ void st_le16(volatile __u16 *addr, const __u16 val)
-{
- __asm__ __volatile__ ("sthbrx %1,0,%2" : "=m" (*addr) : "r" (val), "r" (addr));
-}
-
-static __inline__ __u32 ld_le32(const volatile __u32 *addr)
-{
- __u32 val;
-
- __asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (val) : "r" (addr), "m" (*addr));
- return val;
-}
-
-static __inline__ void st_le32(volatile __u32 *addr, const __u32 val)
-{
- __asm__ __volatile__ ("stwbrx %1,0,%2" : "=m" (*addr) : "r" (val), "r" (addr));
-}
-
#endif /* _ASM_POWERPC_SWAB_H */
diff --git a/arch/powerpc/include/asm/swiotlb.h b/arch/powerpc/include/asm/swiotlb.h
index de99d6e29430..4203b5e0a88e 100644
--- a/arch/powerpc/include/asm/swiotlb.h
+++ b/arch/powerpc/include/asm/swiotlb.h
@@ -1,11 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (C) 2009 Becky Bruce, Freescale Semiconductor
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
*/
#ifndef __ASM_SWIOTLB_H
@@ -13,14 +8,8 @@
#include <linux/swiotlb.h>
-extern struct dma_map_ops swiotlb_dma_ops;
-
-static inline void dma_mark_clean(void *addr, size_t size) {}
-
extern unsigned int ppc_swiotlb_enable;
-int __init swiotlb_setup_bus_notifier(void);
-
-extern void pci_dma_dev_setup_swiotlb(struct pci_dev *pdev);
+extern unsigned int ppc_swiotlb_flags;
#ifdef CONFIG_SWIOTLB
void swiotlb_detect_4g(void);
diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h
index 58abeda64cb7..fc933807ddc8 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -1,9 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
*/
#ifndef _ASM_POWERPC_SWITCH_TO_H
#define _ASM_POWERPC_SWITCH_TO_H
+#include <linux/sched.h>
+#include <asm/reg.h>
+
struct thread_struct;
struct task_struct;
struct pt_regs;
@@ -12,74 +16,97 @@ extern struct task_struct *__switch_to(struct task_struct *,
struct task_struct *);
#define switch_to(prev, next, last) ((last) = __switch_to((prev), (next)))
-struct thread_struct;
extern struct task_struct *_switch(struct thread_struct *prev,
struct thread_struct *next);
-#ifdef CONFIG_PPC_BOOK3S_64
-static inline void save_early_sprs(struct thread_struct *prev)
-{
- if (cpu_has_feature(CPU_FTR_ARCH_207S))
- prev->tar = mfspr(SPRN_TAR);
- if (cpu_has_feature(CPU_FTR_DSCR))
- prev->dscr = mfspr(SPRN_DSCR);
-}
-#else
-static inline void save_early_sprs(struct thread_struct *prev) {}
-#endif
-extern void enable_kernel_fp(void);
-extern void enable_kernel_altivec(void);
-extern int emulate_altivec(struct pt_regs *);
-extern void __giveup_vsx(struct task_struct *);
-extern void giveup_vsx(struct task_struct *);
-extern void enable_kernel_spe(void);
-extern void giveup_spe(struct task_struct *);
-extern void load_up_spe(struct task_struct *);
extern void switch_booke_debug_regs(struct debug_reg *new_debug);
-#ifndef CONFIG_SMP
-extern void discard_lazy_cpu_state(void);
+extern int emulate_altivec(struct pt_regs *);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+void restore_math(struct pt_regs *regs);
#else
-static inline void discard_lazy_cpu_state(void)
+static inline void restore_math(struct pt_regs *regs)
{
}
#endif
+void restore_tm_state(struct pt_regs *regs);
+
+extern void flush_all_to_thread(struct task_struct *);
+extern void giveup_all(struct task_struct *);
+
#ifdef CONFIG_PPC_FPU
+extern void enable_kernel_fp(void);
extern void flush_fp_to_thread(struct task_struct *);
extern void giveup_fpu(struct task_struct *);
+extern void save_fpu(struct task_struct *);
+static inline void disable_kernel_fp(void)
+{
+ msr_check_and_clear(MSR_FP);
+}
#else
+static inline void save_fpu(struct task_struct *t) { }
static inline void flush_fp_to_thread(struct task_struct *t) { }
-static inline void giveup_fpu(struct task_struct *t) { }
+static inline void enable_kernel_fp(void)
+{
+ BUILD_BUG();
+}
#endif
#ifdef CONFIG_ALTIVEC
+extern void enable_kernel_altivec(void);
extern void flush_altivec_to_thread(struct task_struct *);
extern void giveup_altivec(struct task_struct *);
-extern void giveup_altivec_notask(void);
+extern void save_altivec(struct task_struct *);
+static inline void disable_kernel_altivec(void)
+{
+ msr_check_and_clear(MSR_VEC);
+}
#else
-static inline void flush_altivec_to_thread(struct task_struct *t)
+static inline void save_altivec(struct task_struct *t) { }
+static inline void __giveup_altivec(struct task_struct *t) { }
+static inline void enable_kernel_altivec(void)
{
+ BUILD_BUG();
}
-static inline void giveup_altivec(struct task_struct *t)
+
+static inline void disable_kernel_altivec(void)
{
+ BUILD_BUG();
}
#endif
#ifdef CONFIG_VSX
+extern void enable_kernel_vsx(void);
extern void flush_vsx_to_thread(struct task_struct *);
+static inline void disable_kernel_vsx(void)
+{
+ msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX);
+}
#else
-static inline void flush_vsx_to_thread(struct task_struct *t)
+static inline void enable_kernel_vsx(void)
{
+ BUILD_BUG();
+}
+
+static inline void disable_kernel_vsx(void)
+{
+ BUILD_BUG();
}
#endif
#ifdef CONFIG_SPE
+extern void enable_kernel_spe(void);
extern void flush_spe_to_thread(struct task_struct *);
-#else
-static inline void flush_spe_to_thread(struct task_struct *t)
+extern void giveup_spe(struct task_struct *);
+extern void __giveup_spe(struct task_struct *);
+static inline void disable_kernel_spe(void)
{
+ msr_check_and_clear(MSR_SPE);
}
+#else
+static inline void __giveup_spe(struct task_struct *t) { }
#endif
static inline void clear_task_ebb(struct task_struct *t)
@@ -98,4 +125,9 @@ static inline void clear_task_ebb(struct task_struct *t)
#endif
}
+void kvmppc_save_user_regs(void);
+void kvmppc_save_current_sprs(void);
+
+extern int set_thread_tidr(struct task_struct *t);
+
#endif /* _ASM_POWERPC_SWITCH_TO_H */
diff --git a/arch/powerpc/include/asm/synch.h b/arch/powerpc/include/asm/synch.h
index e682a7143edb..0d3ccb34adfb 100644
--- a/arch/powerpc/include/asm/synch.h
+++ b/arch/powerpc/include/asm/synch.h
@@ -1,34 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_SYNCH_H
#define _ASM_POWERPC_SYNCH_H
#ifdef __KERNEL__
-#include <linux/stringify.h>
+#include <asm/cputable.h>
#include <asm/feature-fixups.h>
+#include <asm/ppc-opcode.h>
-#if defined(__powerpc64__) || defined(CONFIG_PPC_E500MC)
-#define __SUBARCH_HAS_LWSYNC
-#endif
-
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
extern unsigned int __start___lwsync_fixup, __stop___lwsync_fixup;
extern void do_lwsync_fixups(unsigned long value, void *fixup_start,
void *fixup_end);
-extern void do_final_fixups(void);
static inline void eieio(void)
{
- __asm__ __volatile__ ("eieio" : : : "memory");
+ if (IS_ENABLED(CONFIG_BOOKE))
+ __asm__ __volatile__ ("mbar" : : : "memory");
+ else
+ __asm__ __volatile__ ("eieio" : : : "memory");
}
static inline void isync(void)
{
__asm__ __volatile__ ("isync" : : : "memory");
}
-#endif /* __ASSEMBLY__ */
+
+static inline void ppc_after_tlbiel_barrier(void)
+{
+ asm volatile("ptesync": : :"memory");
+ /*
+ * POWER9, POWER10 need a cp_abort after tlbiel to ensure the copy is
+ * invalidated correctly. If this is not done, the paste can take data
+ * from the physical address that was translated at copy time.
+ *
+ * POWER9 in practice does not need this, because address spaces with
+ * accelerators mapped will use tlbie (which does invalidate the copy)
+ * to invalidate translations. It's not possible to limit POWER10 this
+ * way due to local copy-paste.
+ */
+ asm volatile(ASM_FTR_IFSET(PPC_CP_ABORT, "", %0) : : "i" (CPU_FTR_ARCH_31) : "memory");
+}
+#endif /* __ASSEMBLER__ */
#if defined(__powerpc64__)
# define LWSYNC lwsync
-#elif defined(CONFIG_E500)
+#elif defined(CONFIG_PPC_E500)
# define LWSYNC \
START_LWSYNC_SECTION(96); \
sync; \
@@ -44,7 +60,7 @@ static inline void isync(void)
MAKE_LWSYNC_SECTION_ENTRY(97, __lwsync_fixup);
#define PPC_ACQUIRE_BARRIER "\n" stringify_in_c(__PPC_ACQUIRE_BARRIER)
#define PPC_RELEASE_BARRIER stringify_in_c(LWSYNC) "\n"
-#define PPC_ATOMIC_ENTRY_BARRIER "\n" stringify_in_c(LWSYNC) "\n"
+#define PPC_ATOMIC_ENTRY_BARRIER "\n" stringify_in_c(sync) "\n"
#define PPC_ATOMIC_EXIT_BARRIER "\n" stringify_in_c(sync) "\n"
#else
#define PPC_ACQUIRE_BARRIER
diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h
index b54b2add07be..4b3c52ed6e9d 100644
--- a/arch/powerpc/include/asm/syscall.h
+++ b/arch/powerpc/include/asm/syscall.h
@@ -1,29 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Access to user system call parameters and results
*
* Copyright (C) 2008 Red Hat, Inc. All rights reserved.
*
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License v.2.
- *
* See asm-generic/syscall.h for descriptions of what we must do here.
*/
#ifndef _ASM_SYSCALL_H
#define _ASM_SYSCALL_H 1
+#include <uapi/linux/audit.h>
#include <linux/sched.h>
+#include <linux/thread_info.h>
+
+#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
+typedef long (*syscall_fn)(const struct pt_regs *);
+#else
+typedef long (*syscall_fn)(unsigned long, unsigned long, unsigned long,
+ unsigned long, unsigned long, unsigned long);
+#endif
/* ftrace syscalls requires exporting the sys_call_table */
-#ifdef CONFIG_FTRACE_SYSCALLS
-extern const unsigned long *sys_call_table;
-#endif /* CONFIG_FTRACE_SYSCALLS */
+extern const syscall_fn sys_call_table[];
+extern const syscall_fn compat_sys_call_table[];
+
+static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs)
+{
+ /*
+ * Note that we are returning an int here. That means 0xffffffff, ie.
+ * 32-bit negative 1, will be interpreted as -1 on a 64-bit kernel.
+ * This is important for seccomp so that compat tasks can set r0 = -1
+ * to reject the syscall.
+ */
+ if (trap_is_syscall(regs))
+ return regs->gpr[0];
+ else
+ return -1;
+}
-static inline long syscall_get_nr(struct task_struct *task,
- struct pt_regs *regs)
+static inline void syscall_set_nr(struct task_struct *task, struct pt_regs *regs, int nr)
{
- return TRAP(regs) == 0xc00 ? regs->gpr[0] : -1L;
+ /*
+ * Unlike syscall_get_nr(), syscall_set_nr() can be called only when
+ * the target task is stopped for tracing on entering syscall, so
+ * there is no need to have the same check syscall_get_nr() has.
+ */
+ regs->gpr[0] = nr;
}
static inline void syscall_rollback(struct task_struct *task,
@@ -35,7 +58,17 @@ static inline void syscall_rollback(struct task_struct *task,
static inline long syscall_get_error(struct task_struct *task,
struct pt_regs *regs)
{
- return (regs->ccr & 0x10000000) ? -regs->gpr[3] : 0;
+ if (trap_is_scv(regs)) {
+ unsigned long error = regs->gpr[3];
+
+ return IS_ERR_VALUE(error) ? error : 0;
+ } else {
+ /*
+ * If the system call failed,
+ * regs->gpr[3] contains a positive ERRORCODE.
+ */
+ return (regs->ccr & 0x10000000UL) ? -regs->gpr[3] : 0;
+ }
}
static inline long syscall_get_return_value(struct task_struct *task,
@@ -48,42 +81,62 @@ static inline void syscall_set_return_value(struct task_struct *task,
struct pt_regs *regs,
int error, long val)
{
- if (error) {
- regs->ccr |= 0x10000000L;
- regs->gpr[3] = -error;
+ if (trap_is_scv(regs)) {
+ regs->gpr[3] = (long) error ?: val;
} else {
- regs->ccr &= ~0x10000000L;
- regs->gpr[3] = val;
+ /*
+ * In the general case it's not obvious that we must deal with
+ * CCR here, as the syscall exit path will also do that for us.
+ * However there are some places, eg. the signal code, which
+ * check ccr to decide if the value in r3 is actually an error.
+ */
+ if (error) {
+ regs->ccr |= 0x10000000L;
+ regs->gpr[3] = error;
+ } else {
+ regs->ccr &= ~0x10000000L;
+ regs->gpr[3] = val;
+ }
}
}
static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
unsigned long *args)
{
- BUG_ON(i + n > 6);
-#ifdef CONFIG_PPC64
- if (test_tsk_thread_flag(task, TIF_32BIT)) {
- /*
- * Zero-extend 32-bit argument values. The high bits are
- * garbage ignored by the actual syscall dispatch.
- */
- while (n-- > 0)
- args[n] = (u32) regs->gpr[3 + i + n];
- return;
+ unsigned long val, mask = -1UL;
+ unsigned int n = 6;
+
+ if (is_tsk_32bit_task(task))
+ mask = 0xffffffff;
+
+ while (n--) {
+ if (n == 0)
+ val = regs->orig_gpr3;
+ else
+ val = regs->gpr[3 + n];
+
+ args[n] = val & mask;
}
-#endif
- memcpy(args, &regs->gpr[3 + i], n * sizeof(args[0]));
}
static inline void syscall_set_arguments(struct task_struct *task,
struct pt_regs *regs,
- unsigned int i, unsigned int n,
const unsigned long *args)
{
- BUG_ON(i + n > 6);
- memcpy(&regs->gpr[3 + i], args, n * sizeof(args[0]));
+ memcpy(&regs->gpr[3], args, 6 * sizeof(args[0]));
+
+ /* Also copy the first argument into orig_gpr3 */
+ regs->orig_gpr3 = args[0];
}
+static inline int syscall_get_arch(struct task_struct *task)
+{
+ if (is_tsk_32bit_task(task))
+ return AUDIT_ARCH_PPC;
+ else if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN))
+ return AUDIT_ARCH_PPC64LE;
+ else
+ return AUDIT_ARCH_PPC64;
+}
#endif /* _ASM_SYSCALL_H */
diff --git a/arch/powerpc/include/asm/syscall_wrapper.h b/arch/powerpc/include/asm/syscall_wrapper.h
new file mode 100644
index 000000000000..67486c67e8a2
--- /dev/null
+++ b/arch/powerpc/include/asm/syscall_wrapper.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * syscall_wrapper.h - powerpc specific wrappers to syscall definitions
+ *
+ * Based on arch/{x86,arm64}/include/asm/syscall_wrapper.h
+ */
+
+#ifndef __ASM_POWERPC_SYSCALL_WRAPPER_H
+#define __ASM_POWERPC_SYSCALL_WRAPPER_H
+
+struct pt_regs;
+
+#define SC_POWERPC_REGS_TO_ARGS(x, ...) \
+ __MAP(x,__SC_ARGS \
+ ,,regs->gpr[3],,regs->gpr[4],,regs->gpr[5] \
+ ,,regs->gpr[6],,regs->gpr[7],,regs->gpr[8])
+
+#define __SYSCALL_DEFINEx(x, name, ...) \
+ long sys##name(const struct pt_regs *regs); \
+ ALLOW_ERROR_INJECTION(sys##name, ERRNO); \
+ static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \
+ static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \
+ long sys##name(const struct pt_regs *regs) \
+ { \
+ return __se_sys##name(SC_POWERPC_REGS_TO_ARGS(x,__VA_ARGS__)); \
+ } \
+ static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \
+ { \
+ long ret = __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \
+ __MAP(x,__SC_TEST,__VA_ARGS__); \
+ __PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__)); \
+ return ret; \
+ } \
+ static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
+
+#define SYSCALL_DEFINE0(sname) \
+ SYSCALL_METADATA(_##sname, 0); \
+ long sys_##sname(const struct pt_regs *__unused); \
+ ALLOW_ERROR_INJECTION(sys_##sname, ERRNO); \
+ long sys_##sname(const struct pt_regs *__unused)
+
+#define COND_SYSCALL(name) \
+ long sys_##name(const struct pt_regs *regs); \
+ long __weak sys_##name(const struct pt_regs *regs) \
+ { \
+ return sys_ni_syscall(); \
+ }
+
+#endif // __ASM_POWERPC_SYSCALL_WRAPPER_H
diff --git a/arch/powerpc/include/asm/syscalls.h b/arch/powerpc/include/asm/syscalls.h
index 23be8f1e7e64..6d51b007b59e 100644
--- a/arch/powerpc/include/asm/syscalls.h
+++ b/arch/powerpc/include/asm/syscalls.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __ASM_POWERPC_SYSCALLS_H
#define __ASM_POWERPC_SYSCALLS_H
#ifdef __KERNEL__
@@ -5,17 +6,156 @@
#include <linux/compiler.h>
#include <linux/linkage.h>
#include <linux/types.h>
+#include <linux/compat.h>
+
+#include <asm/syscall.h>
+#ifdef CONFIG_PPC64
+#include <asm/syscalls_32.h>
+#endif
+#include <asm/unistd.h>
+#include <asm/ucontext.h>
+
+#ifndef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
+long sys_ni_syscall(void);
+#else
+long sys_ni_syscall(const struct pt_regs *regs);
+#endif
struct rtas_args;
-asmlinkage unsigned long sys_mmap(unsigned long addr, size_t len,
- unsigned long prot, unsigned long flags,
- unsigned long fd, off_t offset);
-asmlinkage unsigned long sys_mmap2(unsigned long addr, size_t len,
- unsigned long prot, unsigned long flags,
- unsigned long fd, unsigned long pgoff);
-asmlinkage long ppc64_personality(unsigned long personality);
-asmlinkage int ppc_rtas(struct rtas_args __user *uargs);
+/*
+ * long long munging:
+ * The 32 bit ABI passes long longs in an odd even register pair.
+ * High and low parts are swapped depending on endian mode,
+ * so define a macro (similar to mips linux32) to handle that.
+ */
+#ifdef __LITTLE_ENDIAN__
+#define merge_64(low, high) (((u64)high << 32) | low)
+#else
+#define merge_64(high, low) (((u64)high << 32) | low)
+#endif
+
+/*
+ * PowerPC architecture-specific syscalls
+ */
+
+#ifndef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
+
+long sys_rtas(struct rtas_args __user *uargs);
+
+#ifdef CONFIG_PPC64
+long sys_ppc64_personality(unsigned long personality);
+#ifdef CONFIG_COMPAT
+long compat_sys_ppc64_personality(unsigned long personality);
+#endif /* CONFIG_COMPAT */
+#endif /* CONFIG_PPC64 */
+
+long sys_swapcontext(struct ucontext __user *old_ctx,
+ struct ucontext __user *new_ctx, long ctx_size);
+long sys_mmap(unsigned long addr, size_t len,
+ unsigned long prot, unsigned long flags,
+ unsigned long fd, off_t offset);
+long sys_mmap2(unsigned long addr, size_t len,
+ unsigned long prot, unsigned long flags,
+ unsigned long fd, unsigned long pgoff);
+long sys_switch_endian(void);
+
+#ifdef CONFIG_PPC32
+long sys_sigreturn(void);
+long sys_debug_setcontext(struct ucontext __user *ctx, int ndbg,
+ struct sig_dbg_op __user *dbg);
+#endif
+
+long sys_rt_sigreturn(void);
+
+long sys_subpage_prot(unsigned long addr,
+ unsigned long len, u32 __user *map);
+
+#ifdef CONFIG_COMPAT
+long compat_sys_swapcontext(struct ucontext32 __user *old_ctx,
+ struct ucontext32 __user *new_ctx,
+ int ctx_size);
+long compat_sys_old_getrlimit(unsigned int resource,
+ struct compat_rlimit __user *rlim);
+long compat_sys_sigreturn(void);
+long compat_sys_rt_sigreturn(void);
+#endif /* CONFIG_COMPAT */
+
+/*
+ * Architecture specific signatures required by long long munging:
+ * The 32 bit ABI passes long longs in an odd even register pair.
+ * The following signatures provide a machine long parameter for
+ * each register that will be supplied. The implementation is
+ * responsible for combining parameter pairs.
+ */
+
+#ifdef CONFIG_PPC32
+long sys_ppc_pread64(unsigned int fd,
+ char __user *ubuf, compat_size_t count,
+ u32 reg6, u32 pos1, u32 pos2);
+long sys_ppc_pwrite64(unsigned int fd,
+ const char __user *ubuf, compat_size_t count,
+ u32 reg6, u32 pos1, u32 pos2);
+long sys_ppc_readahead(int fd, u32 r4,
+ u32 offset1, u32 offset2, u32 count);
+long sys_ppc_truncate64(const char __user *path, u32 reg4,
+ unsigned long len1, unsigned long len2);
+long sys_ppc_ftruncate64(unsigned int fd, u32 reg4,
+ unsigned long len1, unsigned long len2);
+long sys_ppc32_fadvise64(int fd, u32 unused, u32 offset1, u32 offset2,
+ size_t len, int advice);
+long sys_ppc_sync_file_range2(int fd, unsigned int flags,
+ unsigned int offset1,
+ unsigned int offset2,
+ unsigned int nbytes1,
+ unsigned int nbytes2);
+long sys_ppc_fallocate(int fd, int mode, u32 offset1, u32 offset2,
+ u32 len1, u32 len2);
+#endif
+#ifdef CONFIG_COMPAT
+long compat_sys_mmap2(unsigned long addr, size_t len,
+ unsigned long prot, unsigned long flags,
+ unsigned long fd, unsigned long pgoff);
+long compat_sys_ppc_pread64(unsigned int fd,
+ char __user *ubuf, compat_size_t count,
+ u32 reg6, u32 pos1, u32 pos2);
+long compat_sys_ppc_pwrite64(unsigned int fd,
+ const char __user *ubuf, compat_size_t count,
+ u32 reg6, u32 pos1, u32 pos2);
+long compat_sys_ppc_readahead(int fd, u32 r4,
+ u32 offset1, u32 offset2, u32 count);
+long compat_sys_ppc_truncate64(const char __user *path, u32 reg4,
+ unsigned long len1, unsigned long len2);
+long compat_sys_ppc_ftruncate64(unsigned int fd, u32 reg4,
+ unsigned long len1, unsigned long len2);
+long compat_sys_ppc32_fadvise64(int fd, u32 unused, u32 offset1, u32 offset2,
+ size_t len, int advice);
+long compat_sys_ppc_sync_file_range2(int fd, unsigned int flags,
+ unsigned int offset1,
+ unsigned int offset2,
+ unsigned int nbytes1,
+ unsigned int nbytes2);
+#endif /* CONFIG_COMPAT */
+
+#if defined(CONFIG_PPC32) || defined(CONFIG_COMPAT)
+long sys_ppc_fadvise64_64(int fd, int advice,
+ u32 offset_high, u32 offset_low,
+ u32 len_high, u32 len_low);
+#endif
+
+#else
+
+#define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, native)
+#define __SYSCALL(nr, entry) \
+ long entry(const struct pt_regs *regs);
+
+#ifdef CONFIG_PPC64
+#include <asm/syscall_table_64.h>
+#else
+#include <asm/syscall_table_32.h>
+#endif /* CONFIG_PPC64 */
+
+#endif /* CONFIG_ARCH_HAS_SYSCALL_WRAPPER */
#endif /* __KERNEL__ */
#endif /* __ASM_POWERPC_SYSCALLS_H */
diff --git a/arch/powerpc/kernel/ppc32.h b/arch/powerpc/include/asm/syscalls_32.h
index a27c914d5802..749255568be9 100644
--- a/arch/powerpc/kernel/ppc32.h
+++ b/arch/powerpc/include/asm/syscalls_32.h
@@ -1,5 +1,6 @@
-#ifndef _PPC64_PPC32_H
-#define _PPC64_PPC32_H
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_SYSCALLS_32_H
+#define _ASM_POWERPC_SYSCALLS_32_H
#include <linux/compat.h>
#include <asm/siginfo.h>
@@ -7,11 +8,6 @@
/*
* Data types and macros for providing 32b PowerPC support.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
/* These are here to support 32-bit syscalls on a 64-bit kernel. */
@@ -61,4 +57,4 @@ struct ucontext32 {
struct mcontext32 uc_mcontext;
};
-#endif /* _PPC64_PPC32_H */
+#endif // _ASM_POWERPC_SYSCALLS_32_H
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
deleted file mode 100644
index 542bc0f0673f..000000000000
--- a/arch/powerpc/include/asm/systbl.h
+++ /dev/null
@@ -1,364 +0,0 @@
-/*
- * List of powerpc syscalls. For the meaning of the _SPU suffix see
- * arch/powerpc/platforms/cell/spu_callbacks.c
- */
-
-SYSCALL(restart_syscall)
-SYSCALL(exit)
-PPC_SYS(fork)
-SYSCALL_SPU(read)
-SYSCALL_SPU(write)
-COMPAT_SYS_SPU(open)
-SYSCALL_SPU(close)
-SYSCALL_SPU(waitpid)
-SYSCALL_SPU(creat)
-SYSCALL_SPU(link)
-SYSCALL_SPU(unlink)
-COMPAT_SYS(execve)
-SYSCALL_SPU(chdir)
-COMPAT_SYS_SPU(time)
-SYSCALL_SPU(mknod)
-SYSCALL_SPU(chmod)
-SYSCALL_SPU(lchown)
-SYSCALL(ni_syscall)
-OLDSYS(stat)
-COMPAT_SYS_SPU(lseek)
-SYSCALL_SPU(getpid)
-COMPAT_SYS(mount)
-SYSX(sys_ni_syscall,sys_oldumount,sys_oldumount)
-SYSCALL_SPU(setuid)
-SYSCALL_SPU(getuid)
-COMPAT_SYS_SPU(stime)
-COMPAT_SYS(ptrace)
-SYSCALL_SPU(alarm)
-OLDSYS(fstat)
-SYSCALL(pause)
-COMPAT_SYS(utime)
-SYSCALL(ni_syscall)
-SYSCALL(ni_syscall)
-SYSCALL_SPU(access)
-SYSCALL_SPU(nice)
-SYSCALL(ni_syscall)
-SYSCALL_SPU(sync)
-SYSCALL_SPU(kill)
-SYSCALL_SPU(rename)
-SYSCALL_SPU(mkdir)
-SYSCALL_SPU(rmdir)
-SYSCALL_SPU(dup)
-SYSCALL_SPU(pipe)
-COMPAT_SYS_SPU(times)
-SYSCALL(ni_syscall)
-SYSCALL_SPU(brk)
-SYSCALL_SPU(setgid)
-SYSCALL_SPU(getgid)
-SYSCALL(signal)
-SYSCALL_SPU(geteuid)
-SYSCALL_SPU(getegid)
-SYSCALL(acct)
-SYSCALL(umount)
-SYSCALL(ni_syscall)
-COMPAT_SYS_SPU(ioctl)
-COMPAT_SYS_SPU(fcntl)
-SYSCALL(ni_syscall)
-SYSCALL_SPU(setpgid)
-SYSCALL(ni_syscall)
-SYSX(sys_ni_syscall,sys_olduname,sys_olduname)
-SYSCALL_SPU(umask)
-SYSCALL_SPU(chroot)
-COMPAT_SYS(ustat)
-SYSCALL_SPU(dup2)
-SYSCALL_SPU(getppid)
-SYSCALL_SPU(getpgrp)
-SYSCALL_SPU(setsid)
-SYS32ONLY(sigaction)
-SYSCALL_SPU(sgetmask)
-SYSCALL_SPU(ssetmask)
-SYSCALL_SPU(setreuid)
-SYSCALL_SPU(setregid)
-#define compat_sys_sigsuspend sys_sigsuspend
-SYS32ONLY(sigsuspend)
-SYSX(sys_ni_syscall,compat_sys_sigpending,sys_sigpending)
-SYSCALL_SPU(sethostname)
-COMPAT_SYS_SPU(setrlimit)
-SYSX(sys_ni_syscall,compat_sys_old_getrlimit,sys_old_getrlimit)
-COMPAT_SYS_SPU(getrusage)
-COMPAT_SYS_SPU(gettimeofday)
-COMPAT_SYS_SPU(settimeofday)
-SYSCALL_SPU(getgroups)
-SYSCALL_SPU(setgroups)
-SYSX(sys_ni_syscall,sys_ni_syscall,ppc_select)
-SYSCALL_SPU(symlink)
-OLDSYS(lstat)
-SYSCALL_SPU(readlink)
-SYSCALL(uselib)
-SYSCALL(swapon)
-SYSCALL(reboot)
-SYSX(sys_ni_syscall,compat_sys_old_readdir,sys_old_readdir)
-SYSCALL_SPU(mmap)
-SYSCALL_SPU(munmap)
-COMPAT_SYS_SPU(truncate)
-COMPAT_SYS_SPU(ftruncate)
-SYSCALL_SPU(fchmod)
-SYSCALL_SPU(fchown)
-SYSCALL_SPU(getpriority)
-SYSCALL_SPU(setpriority)
-SYSCALL(ni_syscall)
-COMPAT_SYS(statfs)
-COMPAT_SYS(fstatfs)
-SYSCALL(ni_syscall)
-COMPAT_SYS_SPU(socketcall)
-SYSCALL_SPU(syslog)
-COMPAT_SYS_SPU(setitimer)
-COMPAT_SYS_SPU(getitimer)
-COMPAT_SYS_SPU(newstat)
-COMPAT_SYS_SPU(newlstat)
-COMPAT_SYS_SPU(newfstat)
-SYSX(sys_ni_syscall,sys_uname,sys_uname)
-SYSCALL(ni_syscall)
-SYSCALL_SPU(vhangup)
-SYSCALL(ni_syscall)
-SYSCALL(ni_syscall)
-COMPAT_SYS_SPU(wait4)
-SYSCALL(swapoff)
-COMPAT_SYS_SPU(sysinfo)
-COMPAT_SYS(ipc)
-SYSCALL_SPU(fsync)
-SYS32ONLY(sigreturn)
-PPC_SYS(clone)
-SYSCALL_SPU(setdomainname)
-SYSCALL_SPU(newuname)
-SYSCALL(ni_syscall)
-COMPAT_SYS_SPU(adjtimex)
-SYSCALL_SPU(mprotect)
-SYSX(sys_ni_syscall,compat_sys_sigprocmask,sys_sigprocmask)
-SYSCALL(ni_syscall)
-SYSCALL(init_module)
-SYSCALL(delete_module)
-SYSCALL(ni_syscall)
-SYSCALL(quotactl)
-SYSCALL_SPU(getpgid)
-SYSCALL_SPU(fchdir)
-SYSCALL_SPU(bdflush)
-SYSCALL_SPU(sysfs)
-SYSX_SPU(ppc64_personality,ppc64_personality,sys_personality)
-SYSCALL(ni_syscall)
-SYSCALL_SPU(setfsuid)
-SYSCALL_SPU(setfsgid)
-SYSCALL_SPU(llseek)
-COMPAT_SYS_SPU(getdents)
-SYSX_SPU(sys_select,ppc32_select,sys_select)
-SYSCALL_SPU(flock)
-SYSCALL_SPU(msync)
-COMPAT_SYS_SPU(readv)
-COMPAT_SYS_SPU(writev)
-SYSCALL_SPU(getsid)
-SYSCALL_SPU(fdatasync)
-COMPAT_SYS(sysctl)
-SYSCALL_SPU(mlock)
-SYSCALL_SPU(munlock)
-SYSCALL_SPU(mlockall)
-SYSCALL_SPU(munlockall)
-SYSCALL_SPU(sched_setparam)
-SYSCALL_SPU(sched_getparam)
-SYSCALL_SPU(sched_setscheduler)
-SYSCALL_SPU(sched_getscheduler)
-SYSCALL_SPU(sched_yield)
-SYSCALL_SPU(sched_get_priority_max)
-SYSCALL_SPU(sched_get_priority_min)
-COMPAT_SYS_SPU(sched_rr_get_interval)
-COMPAT_SYS_SPU(nanosleep)
-SYSCALL_SPU(mremap)
-SYSCALL_SPU(setresuid)
-SYSCALL_SPU(getresuid)
-SYSCALL(ni_syscall)
-SYSCALL_SPU(poll)
-SYSCALL(ni_syscall)
-SYSCALL_SPU(setresgid)
-SYSCALL_SPU(getresgid)
-SYSCALL_SPU(prctl)
-COMPAT_SYS(rt_sigreturn)
-COMPAT_SYS(rt_sigaction)
-COMPAT_SYS(rt_sigprocmask)
-COMPAT_SYS(rt_sigpending)
-COMPAT_SYS(rt_sigtimedwait)
-COMPAT_SYS(rt_sigqueueinfo)
-COMPAT_SYS(rt_sigsuspend)
-COMPAT_SYS_SPU(pread64)
-COMPAT_SYS_SPU(pwrite64)
-SYSCALL_SPU(chown)
-SYSCALL_SPU(getcwd)
-SYSCALL_SPU(capget)
-SYSCALL_SPU(capset)
-COMPAT_SYS(sigaltstack)
-SYSX_SPU(sys_sendfile64,compat_sys_sendfile,sys_sendfile)
-SYSCALL(ni_syscall)
-SYSCALL(ni_syscall)
-PPC_SYS(vfork)
-COMPAT_SYS_SPU(getrlimit)
-COMPAT_SYS_SPU(readahead)
-SYS32ONLY(mmap2)
-SYS32ONLY(truncate64)
-SYS32ONLY(ftruncate64)
-SYSX(sys_ni_syscall,sys_stat64,sys_stat64)
-SYSX(sys_ni_syscall,sys_lstat64,sys_lstat64)
-SYSX(sys_ni_syscall,sys_fstat64,sys_fstat64)
-SYSCALL(pciconfig_read)
-SYSCALL(pciconfig_write)
-SYSCALL(pciconfig_iobase)
-SYSCALL(ni_syscall)
-SYSCALL_SPU(getdents64)
-SYSCALL_SPU(pivot_root)
-SYSX(sys_ni_syscall,compat_sys_fcntl64,sys_fcntl64)
-SYSCALL_SPU(madvise)
-SYSCALL_SPU(mincore)
-SYSCALL_SPU(gettid)
-SYSCALL_SPU(tkill)
-SYSCALL_SPU(setxattr)
-SYSCALL_SPU(lsetxattr)
-SYSCALL_SPU(fsetxattr)
-SYSCALL_SPU(getxattr)
-SYSCALL_SPU(lgetxattr)
-SYSCALL_SPU(fgetxattr)
-SYSCALL_SPU(listxattr)
-SYSCALL_SPU(llistxattr)
-SYSCALL_SPU(flistxattr)
-SYSCALL_SPU(removexattr)
-SYSCALL_SPU(lremovexattr)
-SYSCALL_SPU(fremovexattr)
-COMPAT_SYS_SPU(futex)
-COMPAT_SYS_SPU(sched_setaffinity)
-COMPAT_SYS_SPU(sched_getaffinity)
-SYSCALL(ni_syscall)
-SYSCALL(ni_syscall)
-SYS32ONLY(sendfile64)
-COMPAT_SYS_SPU(io_setup)
-SYSCALL_SPU(io_destroy)
-COMPAT_SYS_SPU(io_getevents)
-COMPAT_SYS_SPU(io_submit)
-SYSCALL_SPU(io_cancel)
-SYSCALL(set_tid_address)
-SYSX_SPU(sys_fadvise64,ppc32_fadvise64,sys_fadvise64)
-SYSCALL(exit_group)
-COMPAT_SYS(lookup_dcookie)
-SYSCALL_SPU(epoll_create)
-SYSCALL_SPU(epoll_ctl)
-SYSCALL_SPU(epoll_wait)
-SYSCALL_SPU(remap_file_pages)
-SYSX_SPU(sys_timer_create,compat_sys_timer_create,sys_timer_create)
-COMPAT_SYS_SPU(timer_settime)
-COMPAT_SYS_SPU(timer_gettime)
-SYSCALL_SPU(timer_getoverrun)
-SYSCALL_SPU(timer_delete)
-COMPAT_SYS_SPU(clock_settime)
-COMPAT_SYS_SPU(clock_gettime)
-COMPAT_SYS_SPU(clock_getres)
-COMPAT_SYS_SPU(clock_nanosleep)
-SYSX(ppc64_swapcontext,ppc32_swapcontext,ppc_swapcontext)
-SYSCALL_SPU(tgkill)
-COMPAT_SYS_SPU(utimes)
-COMPAT_SYS_SPU(statfs64)
-COMPAT_SYS_SPU(fstatfs64)
-SYSX(sys_ni_syscall,ppc_fadvise64_64,ppc_fadvise64_64)
-PPC_SYS_SPU(rtas)
-OLDSYS(debug_setcontext)
-SYSCALL(ni_syscall)
-COMPAT_SYS(migrate_pages)
-COMPAT_SYS(mbind)
-COMPAT_SYS(get_mempolicy)
-COMPAT_SYS(set_mempolicy)
-COMPAT_SYS(mq_open)
-SYSCALL(mq_unlink)
-COMPAT_SYS(mq_timedsend)
-COMPAT_SYS(mq_timedreceive)
-COMPAT_SYS(mq_notify)
-COMPAT_SYS(mq_getsetattr)
-COMPAT_SYS(kexec_load)
-SYSCALL(add_key)
-SYSCALL(request_key)
-COMPAT_SYS(keyctl)
-COMPAT_SYS(waitid)
-SYSCALL(ioprio_set)
-SYSCALL(ioprio_get)
-SYSCALL(inotify_init)
-SYSCALL(inotify_add_watch)
-SYSCALL(inotify_rm_watch)
-SYSCALL(spu_run)
-SYSCALL(spu_create)
-COMPAT_SYS(pselect6)
-COMPAT_SYS(ppoll)
-SYSCALL_SPU(unshare)
-SYSCALL_SPU(splice)
-SYSCALL_SPU(tee)
-COMPAT_SYS_SPU(vmsplice)
-COMPAT_SYS_SPU(openat)
-SYSCALL_SPU(mkdirat)
-SYSCALL_SPU(mknodat)
-SYSCALL_SPU(fchownat)
-COMPAT_SYS_SPU(futimesat)
-SYSX_SPU(sys_newfstatat,sys_fstatat64,sys_fstatat64)
-SYSCALL_SPU(unlinkat)
-SYSCALL_SPU(renameat)
-SYSCALL_SPU(linkat)
-SYSCALL_SPU(symlinkat)
-SYSCALL_SPU(readlinkat)
-SYSCALL_SPU(fchmodat)
-SYSCALL_SPU(faccessat)
-COMPAT_SYS_SPU(get_robust_list)
-COMPAT_SYS_SPU(set_robust_list)
-COMPAT_SYS_SPU(move_pages)
-SYSCALL_SPU(getcpu)
-COMPAT_SYS(epoll_pwait)
-COMPAT_SYS_SPU(utimensat)
-COMPAT_SYS_SPU(signalfd)
-SYSCALL_SPU(timerfd_create)
-SYSCALL_SPU(eventfd)
-COMPAT_SYS_SPU(sync_file_range2)
-COMPAT_SYS(fallocate)
-SYSCALL(subpage_prot)
-COMPAT_SYS_SPU(timerfd_settime)
-COMPAT_SYS_SPU(timerfd_gettime)
-COMPAT_SYS_SPU(signalfd4)
-SYSCALL_SPU(eventfd2)
-SYSCALL_SPU(epoll_create1)
-SYSCALL_SPU(dup3)
-SYSCALL_SPU(pipe2)
-SYSCALL(inotify_init1)
-SYSCALL_SPU(perf_event_open)
-COMPAT_SYS_SPU(preadv)
-COMPAT_SYS_SPU(pwritev)
-COMPAT_SYS(rt_tgsigqueueinfo)
-SYSCALL(fanotify_init)
-COMPAT_SYS(fanotify_mark)
-SYSCALL_SPU(prlimit64)
-SYSCALL_SPU(socket)
-SYSCALL_SPU(bind)
-SYSCALL_SPU(connect)
-SYSCALL_SPU(listen)
-SYSCALL_SPU(accept)
-SYSCALL_SPU(getsockname)
-SYSCALL_SPU(getpeername)
-SYSCALL_SPU(socketpair)
-SYSCALL_SPU(send)
-SYSCALL_SPU(sendto)
-COMPAT_SYS_SPU(recv)
-COMPAT_SYS_SPU(recvfrom)
-SYSCALL_SPU(shutdown)
-COMPAT_SYS_SPU(setsockopt)
-COMPAT_SYS_SPU(getsockopt)
-COMPAT_SYS_SPU(sendmsg)
-COMPAT_SYS_SPU(recvmsg)
-COMPAT_SYS_SPU(recvmmsg)
-SYSCALL_SPU(accept4)
-SYSCALL_SPU(name_to_handle_at)
-COMPAT_SYS_SPU(open_by_handle_at)
-COMPAT_SYS_SPU(clock_adjtime)
-SYSCALL_SPU(syncfs)
-COMPAT_SYS_SPU(sendmmsg)
-SYSCALL_SPU(setns)
-COMPAT_SYS(process_vm_readv)
-COMPAT_SYS(process_vm_writev)
-SYSCALL(finit_module)
-SYSCALL(ni_syscall) /* sys_kcmp */
-SYSCALL_SPU(sched_setattr)
-SYSCALL_SPU(sched_getattr)
-SYSCALL_SPU(renameat2)
diff --git a/arch/powerpc/include/asm/systemcfg.h b/arch/powerpc/include/asm/systemcfg.h
new file mode 100644
index 000000000000..2f9b1d6a5c98
--- /dev/null
+++ b/arch/powerpc/include/asm/systemcfg.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _SYSTEMCFG_H
+#define _SYSTEMCFG_H
+
+/*
+ * Copyright (C) 2002 Peter Bergner <bergner@vnet.ibm.com>, IBM
+ * Copyright (C) 2005 Benjamin Herrenschmidy <benh@kernel.crashing.org>,
+ * IBM Corp.
+ */
+
+#ifdef CONFIG_PPC64
+
+/*
+ * If the major version changes we are incompatible.
+ * Minor version changes are a hint.
+ */
+#define SYSTEMCFG_MAJOR 1
+#define SYSTEMCFG_MINOR 1
+
+#include <linux/types.h>
+
+struct systemcfg {
+ __u8 eye_catcher[16]; /* Eyecatcher: SYSTEMCFG:PPC64 0x00 */
+ struct { /* Systemcfg version numbers */
+ __u32 major; /* Major number 0x10 */
+ __u32 minor; /* Minor number 0x14 */
+ } version;
+
+ /* Note about the platform flags: it now only contains the lpar
+ * bit. The actual platform number is dead and buried
+ */
+ __u32 platform; /* Platform flags 0x18 */
+ __u32 processor; /* Processor type 0x1C */
+ __u64 processorCount; /* # of physical processors 0x20 */
+ __u64 physicalMemorySize; /* Size of real memory(B) 0x28 */
+ __u64 tb_orig_stamp; /* (NU) Timebase at boot 0x30 */
+ __u64 tb_ticks_per_sec; /* Timebase tics / sec 0x38 */
+ __u64 tb_to_xs; /* (NU) Inverse of TB to 2^20 0x40 */
+ __u64 stamp_xsec; /* (NU) 0x48 */
+ __u64 tb_update_count; /* (NU) Timebase atomicity ctr 0x50 */
+ __u32 tz_minuteswest; /* (NU) Min. west of Greenwich 0x58 */
+ __u32 tz_dsttime; /* (NU) Type of dst correction 0x5C */
+ __u32 dcache_size; /* L1 d-cache size 0x60 */
+ __u32 dcache_line_size; /* L1 d-cache line size 0x64 */
+ __u32 icache_size; /* L1 i-cache size 0x68 */
+ __u32 icache_line_size; /* L1 i-cache line size 0x6C */
+};
+
+extern struct systemcfg *systemcfg;
+
+#endif /* CONFIG_PPC64 */
+#endif /* _SYSTEMCFG_H */
diff --git a/arch/powerpc/include/asm/task_size_32.h b/arch/powerpc/include/asm/task_size_32.h
new file mode 100644
index 000000000000..de7290ee770f
--- /dev/null
+++ b/arch/powerpc/include/asm/task_size_32.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_TASK_SIZE_32_H
+#define _ASM_POWERPC_TASK_SIZE_32_H
+
+#if CONFIG_TASK_SIZE > CONFIG_KERNEL_START
+#error User TASK_SIZE overlaps with KERNEL_START address
+#endif
+
+#define TASK_SIZE (CONFIG_TASK_SIZE)
+
+/*
+ * This decides where the kernel will search for a free chunk of vm space during
+ * mmap's.
+ */
+#define TASK_UNMAPPED_BASE (TASK_SIZE / 8 * 3)
+
+#define DEFAULT_MAP_WINDOW TASK_SIZE
+#define STACK_TOP TASK_SIZE
+#define STACK_TOP_MAX STACK_TOP
+
+#endif /* _ASM_POWERPC_TASK_SIZE_32_H */
diff --git a/arch/powerpc/include/asm/task_size_64.h b/arch/powerpc/include/asm/task_size_64.h
new file mode 100644
index 000000000000..5a709951c901
--- /dev/null
+++ b/arch/powerpc/include/asm/task_size_64.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_TASK_SIZE_64_H
+#define _ASM_POWERPC_TASK_SIZE_64_H
+
+/*
+ * 64-bit user address space can have multiple limits
+ * For now supported values are:
+ */
+#define TASK_SIZE_64TB (0x0000400000000000UL)
+#define TASK_SIZE_128TB (0x0000800000000000UL)
+#define TASK_SIZE_512TB (0x0002000000000000UL)
+#define TASK_SIZE_1PB (0x0004000000000000UL)
+#define TASK_SIZE_2PB (0x0008000000000000UL)
+
+/*
+ * With 52 bits in the address we can support up to 4PB of range.
+ */
+#define TASK_SIZE_4PB (0x0010000000000000UL)
+
+/*
+ * For now 512TB is only supported with book3s and 64K linux page size.
+ */
+#ifdef CONFIG_PPC_64K_PAGES
+/*
+ * Max value currently used:
+ */
+#define TASK_SIZE_USER64 TASK_SIZE_4PB
+#define DEFAULT_MAP_WINDOW_USER64 TASK_SIZE_128TB
+#define TASK_CONTEXT_SIZE TASK_SIZE_512TB
+#else
+#define TASK_SIZE_USER64 TASK_SIZE_64TB
+#define DEFAULT_MAP_WINDOW_USER64 TASK_SIZE_64TB
+
+/*
+ * We don't need to allocate extended context ids for 4K page size, because we
+ * limit the max effective address on this config to 64TB.
+ */
+#define TASK_CONTEXT_SIZE TASK_SIZE_64TB
+#endif
+
+/*
+ * 32-bit user address space is 4GB - 1 page
+ * (this 1 page is needed so referencing of 0xFFFFFFFF generates EFAULT
+ */
+#define TASK_SIZE_USER32 (0x0000000100000000UL - (1 * PAGE_SIZE))
+
+#define TASK_SIZE (is_32bit_task() ? TASK_SIZE_USER32 : TASK_SIZE_USER64)
+
+#define TASK_UNMAPPED_BASE_USER32 (PAGE_ALIGN(TASK_SIZE_USER32 / 4))
+#define TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(DEFAULT_MAP_WINDOW_USER64 / 4))
+
+/*
+ * This decides where the kernel will search for a free chunk of vm space during
+ * mmap's.
+ */
+#define TASK_UNMAPPED_BASE \
+ ((is_32bit_task()) ? TASK_UNMAPPED_BASE_USER32 : TASK_UNMAPPED_BASE_USER64)
+
+/*
+ * Initial task size value for user applications. For book3s 64 we start
+ * with 128TB and conditionally enable upto 512TB
+ */
+#ifdef CONFIG_PPC_BOOK3S_64
+#define DEFAULT_MAP_WINDOW \
+ ((is_32bit_task()) ? TASK_SIZE_USER32 : DEFAULT_MAP_WINDOW_USER64)
+#else
+#define DEFAULT_MAP_WINDOW TASK_SIZE
+#endif
+
+#define STACK_TOP_USER64 DEFAULT_MAP_WINDOW_USER64
+#define STACK_TOP_USER32 TASK_SIZE_USER32
+#define STACK_TOP_MAX TASK_SIZE_USER64
+#define STACK_TOP (is_32bit_task() ? STACK_TOP_USER32 : STACK_TOP_USER64)
+
+#define arch_get_mmap_base(addr, base) \
+ (((addr) > DEFAULT_MAP_WINDOW) ? (base) + TASK_SIZE - DEFAULT_MAP_WINDOW : (base))
+
+#define arch_get_mmap_end(addr, len, flags) \
+ (((addr) > DEFAULT_MAP_WINDOW) || \
+ (((flags) & MAP_FIXED) && ((addr) + (len) > DEFAULT_MAP_WINDOW)) ? TASK_SIZE : \
+ DEFAULT_MAP_WINDOW)
+
+#endif /* _ASM_POWERPC_TASK_SIZE_64_H */
diff --git a/arch/powerpc/include/asm/tce.h b/arch/powerpc/include/asm/tce.h
index 743f36b38e5d..0c34d2756d92 100644
--- a/arch/powerpc/include/asm/tce.h
+++ b/arch/powerpc/include/asm/tce.h
@@ -1,21 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
* Rewrite, cleanup:
* Copyright (C) 2004 Olof Johansson <olof@lixom.net>, IBM Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef _ASM_POWERPC_TCE_H
@@ -31,19 +18,8 @@
*/
#define TCE_VB 0
#define TCE_PCI 1
-#define TCE_PCI_SWINV_CREATE 2
-#define TCE_PCI_SWINV_FREE 4
-#define TCE_PCI_SWINV_PAIR 8
-
-/* TCE page size is 4096 bytes (1 << 12) */
-
-#define TCE_SHIFT 12
-#define TCE_PAGE_SIZE (1 << TCE_SHIFT)
#define TCE_ENTRY_SIZE 8 /* each TCE is 64 bits */
-
-#define TCE_RPN_MASK 0xfffffffffful /* 40-bit RPN (4K pages) */
-#define TCE_RPN_SHIFT 12
#define TCE_VALID 0x800 /* TCE valid */
#define TCE_ALLIO 0x400 /* TCE valid for all lpars */
#define TCE_PCI_WRITE 0x2 /* write from PCI allowed */
diff --git a/arch/powerpc/include/asm/termios.h b/arch/powerpc/include/asm/termios.h
deleted file mode 100644
index b8353e2032d0..000000000000
--- a/arch/powerpc/include/asm/termios.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Liberally adapted from alpha/termios.h. In particular, the c_cc[]
- * fields have been reordered so that termio & termios share the
- * common subset in the same order (for brain dead programs that don't
- * know or care about the differences).
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#ifndef _ASM_POWERPC_TERMIOS_H
-#define _ASM_POWERPC_TERMIOS_H
-
-#include <uapi/asm/termios.h>
-
-/* ^C ^\ del ^U ^D 1 0 0 0 0 ^W ^R ^Z ^Q ^S ^V ^U */
-#define INIT_C_CC "\003\034\177\025\004\001\000\000\000\000\027\022\032\021\023\026\025"
-
-#include <asm-generic/termios-base.h>
-
-#endif /* _ASM_POWERPC_TERMIOS_H */
diff --git a/arch/powerpc/include/asm/text-patching.h b/arch/powerpc/include/asm/text-patching.h
new file mode 100644
index 000000000000..e7f14720f630
--- /dev/null
+++ b/arch/powerpc/include/asm/text-patching.h
@@ -0,0 +1,275 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_CODE_PATCHING_H
+#define _ASM_POWERPC_CODE_PATCHING_H
+
+/*
+ * Copyright 2008, Michael Ellerman, IBM Corporation.
+ */
+
+#include <asm/types.h>
+#include <asm/ppc-opcode.h>
+#include <linux/string.h>
+#include <linux/kallsyms.h>
+#include <asm/asm-compat.h>
+#include <asm/inst.h>
+
+/* Flags for create_branch:
+ * "b" == create_branch(addr, target, 0);
+ * "ba" == create_branch(addr, target, BRANCH_ABSOLUTE);
+ * "bl" == create_branch(addr, target, BRANCH_SET_LINK);
+ * "bla" == create_branch(addr, target, BRANCH_ABSOLUTE | BRANCH_SET_LINK);
+ */
+#define BRANCH_SET_LINK 0x1
+#define BRANCH_ABSOLUTE 0x2
+
+/*
+ * Powerpc branch instruction is :
+ *
+ * 0 6 30 31
+ * +---------+----------------+---+---+
+ * | opcode | LI |AA |LK |
+ * +---------+----------------+---+---+
+ * Where AA = 0 and LK = 0
+ *
+ * LI is a signed 24 bits integer. The real branch offset is computed
+ * by: imm32 = SignExtend(LI:'0b00', 32);
+ *
+ * So the maximum forward branch should be:
+ * (0x007fffff << 2) = 0x01fffffc = 0x1fffffc
+ * The maximum backward branch should be:
+ * (0xff800000 << 2) = 0xfe000000 = -0x2000000
+ */
+static inline bool is_offset_in_branch_range(long offset)
+{
+ return (offset >= -0x2000000 && offset <= 0x1fffffc && !(offset & 0x3));
+}
+
+static inline bool is_offset_in_cond_branch_range(long offset)
+{
+ return offset >= -0x8000 && offset <= 0x7fff && !(offset & 0x3);
+}
+
+static inline int create_branch(ppc_inst_t *instr, const u32 *addr,
+ unsigned long target, int flags)
+{
+ long offset;
+
+ *instr = ppc_inst(0);
+ offset = target;
+ if (! (flags & BRANCH_ABSOLUTE))
+ offset = offset - (unsigned long)addr;
+
+ /* Check we can represent the target in the instruction format */
+ if (!is_offset_in_branch_range(offset))
+ return 1;
+
+ /* Mask out the flags and target, so they don't step on each other. */
+ *instr = ppc_inst(0x48000000 | (flags & 0x3) | (offset & 0x03FFFFFC));
+
+ return 0;
+}
+
+int create_cond_branch(ppc_inst_t *instr, const u32 *addr,
+ unsigned long target, int flags);
+int patch_branch(u32 *addr, unsigned long target, int flags);
+int patch_instruction(u32 *addr, ppc_inst_t instr);
+int raw_patch_instruction(u32 *addr, ppc_inst_t instr);
+int patch_instructions(u32 *addr, u32 *code, size_t len, bool repeat_instr);
+
+/*
+ * The data patching functions patch_uint() and patch_ulong(), etc., must be
+ * called on aligned addresses.
+ *
+ * The instruction patching functions patch_instruction() and similar must be
+ * called on addresses satisfying instruction alignment requirements.
+ */
+
+#ifdef CONFIG_PPC64
+
+int patch_uint(void *addr, unsigned int val);
+int patch_ulong(void *addr, unsigned long val);
+
+#define patch_u64 patch_ulong
+
+#else
+
+static inline int patch_uint(void *addr, unsigned int val)
+{
+ if (!IS_ALIGNED((unsigned long)addr, sizeof(unsigned int)))
+ return -EINVAL;
+
+ return patch_instruction(addr, ppc_inst(val));
+}
+
+static inline int patch_ulong(void *addr, unsigned long val)
+{
+ if (!IS_ALIGNED((unsigned long)addr, sizeof(unsigned long)))
+ return -EINVAL;
+
+ return patch_instruction(addr, ppc_inst(val));
+}
+
+#endif
+
+#define patch_u32 patch_uint
+
+static inline unsigned long patch_site_addr(s32 *site)
+{
+ return (unsigned long)site + *site;
+}
+
+static inline int patch_instruction_site(s32 *site, ppc_inst_t instr)
+{
+ return patch_instruction((u32 *)patch_site_addr(site), instr);
+}
+
+static inline int patch_branch_site(s32 *site, unsigned long target, int flags)
+{
+ return patch_branch((u32 *)patch_site_addr(site), target, flags);
+}
+
+static inline int modify_instruction(unsigned int *addr, unsigned int clr,
+ unsigned int set)
+{
+ return patch_instruction(addr, ppc_inst((*addr & ~clr) | set));
+}
+
+static inline int modify_instruction_site(s32 *site, unsigned int clr, unsigned int set)
+{
+ return modify_instruction((unsigned int *)patch_site_addr(site), clr, set);
+}
+
+static inline unsigned int branch_opcode(ppc_inst_t instr)
+{
+ return ppc_inst_primary_opcode(instr) & 0x3F;
+}
+
+static inline int instr_is_branch_iform(ppc_inst_t instr)
+{
+ return branch_opcode(instr) == 18;
+}
+
+static inline int instr_is_branch_bform(ppc_inst_t instr)
+{
+ return branch_opcode(instr) == 16;
+}
+
+int instr_is_relative_branch(ppc_inst_t instr);
+int instr_is_relative_link_branch(ppc_inst_t instr);
+unsigned long branch_target(const u32 *instr);
+int translate_branch(ppc_inst_t *instr, const u32 *dest, const u32 *src);
+bool is_conditional_branch(ppc_inst_t instr);
+
+#define OP_RT_RA_MASK 0xffff0000UL
+#define LIS_R2 (PPC_RAW_LIS(_R2, 0))
+#define ADDIS_R2_R12 (PPC_RAW_ADDIS(_R2, _R12, 0))
+#define ADDI_R2_R2 (PPC_RAW_ADDI(_R2, _R2, 0))
+
+
+static inline unsigned long ppc_function_entry(void *func)
+{
+#ifdef CONFIG_PPC64_ELF_ABI_V2
+ u32 *insn = func;
+
+ /*
+ * A PPC64 ABIv2 function may have a local and a global entry
+ * point. We need to use the local entry point when patching
+ * functions, so identify and step over the global entry point
+ * sequence.
+ *
+ * The global entry point sequence is always of the form:
+ *
+ * addis r2,r12,XXXX
+ * addi r2,r2,XXXX
+ *
+ * A linker optimisation may convert the addis to lis:
+ *
+ * lis r2,XXXX
+ * addi r2,r2,XXXX
+ */
+ if ((((*insn & OP_RT_RA_MASK) == ADDIS_R2_R12) ||
+ ((*insn & OP_RT_RA_MASK) == LIS_R2)) &&
+ ((*(insn+1) & OP_RT_RA_MASK) == ADDI_R2_R2))
+ return (unsigned long)(insn + 2);
+ else
+ return (unsigned long)func;
+#elif defined(CONFIG_PPC64_ELF_ABI_V1)
+ /*
+ * On PPC64 ABIv1 the function pointer actually points to the
+ * function's descriptor. The first entry in the descriptor is the
+ * address of the function text.
+ */
+ return ((struct func_desc *)func)->addr;
+#else
+ return (unsigned long)func;
+#endif
+}
+
+static inline unsigned long ppc_global_function_entry(void *func)
+{
+#ifdef CONFIG_PPC64_ELF_ABI_V2
+ /* PPC64 ABIv2 the global entry point is at the address */
+ return (unsigned long)func;
+#else
+ /* All other cases there is no change vs ppc_function_entry() */
+ return ppc_function_entry(func);
+#endif
+}
+
+/*
+ * Wrapper around kallsyms_lookup() to return function entry address:
+ * - For ABIv1, we lookup the dot variant.
+ * - For ABIv2, we return the local entry point.
+ */
+static inline unsigned long ppc_kallsyms_lookup_name(const char *name)
+{
+ unsigned long addr;
+#ifdef CONFIG_PPC64_ELF_ABI_V1
+ /* check for dot variant */
+ char dot_name[1 + KSYM_NAME_LEN];
+ bool dot_appended = false;
+
+ if (strnlen(name, KSYM_NAME_LEN) >= KSYM_NAME_LEN)
+ return 0;
+
+ if (name[0] != '.') {
+ dot_name[0] = '.';
+ dot_name[1] = '\0';
+ strlcat(dot_name, name, sizeof(dot_name));
+ dot_appended = true;
+ } else {
+ dot_name[0] = '\0';
+ strlcat(dot_name, name, sizeof(dot_name));
+ }
+ addr = kallsyms_lookup_name(dot_name);
+ if (!addr && dot_appended)
+ /* Let's try the original non-dot symbol lookup */
+ addr = kallsyms_lookup_name(name);
+#elif defined(CONFIG_PPC64_ELF_ABI_V2)
+ addr = kallsyms_lookup_name(name);
+ if (addr)
+ addr = ppc_function_entry((void *)addr);
+#else
+ addr = kallsyms_lookup_name(name);
+#endif
+ return addr;
+}
+
+/*
+ * Some instruction encodings commonly used in dynamic ftracing
+ * and function live patching.
+ */
+
+/* This must match the definition of STK_GOT in <asm/ppc_asm.h> */
+#ifdef CONFIG_PPC64_ELF_ABI_V2
+#define R2_STACK_OFFSET 24
+#else
+#define R2_STACK_OFFSET 40
+#endif
+
+#define PPC_INST_LD_TOC PPC_RAW_LD(_R2, _R1, R2_STACK_OFFSET)
+
+/* usually preceded by a mflr r0 */
+#define PPC_INST_STD_LR PPC_RAW_STD(_R0, _R1, PPC_LR_STKOFF)
+
+#endif /* _ASM_POWERPC_CODE_PATCHING_H */
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index b034ecdb7c74..b0f200aba2b3 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* thread_info.h: PowerPC low-level thread information
* adapted from the i386 version by Paul Mackerras
*
@@ -8,43 +9,64 @@
#ifndef _ASM_POWERPC_THREAD_INFO_H
#define _ASM_POWERPC_THREAD_INFO_H
+#include <asm/asm-const.h>
+#include <asm/page.h>
+
#ifdef __KERNEL__
-/* We have 8k stacks on ppc32 and 16k on ppc64 */
+#if defined(CONFIG_KASAN) && CONFIG_THREAD_SHIFT < 15
+#define MIN_THREAD_SHIFT (CONFIG_THREAD_SHIFT + 1)
+#else
+#define MIN_THREAD_SHIFT CONFIG_THREAD_SHIFT
+#endif
-#if defined(CONFIG_PPC64)
-#define THREAD_SHIFT 14
-#elif defined(CONFIG_PPC_256K_PAGES)
-#define THREAD_SHIFT 15
+#if defined(CONFIG_VMAP_STACK) && MIN_THREAD_SHIFT < PAGE_SHIFT
+#define THREAD_SHIFT PAGE_SHIFT
#else
-#define THREAD_SHIFT 13
+#define THREAD_SHIFT MIN_THREAD_SHIFT
#endif
#define THREAD_SIZE (1 << THREAD_SHIFT)
-#ifdef CONFIG_PPC64
-#define CURRENT_THREAD_INFO(dest, sp) clrrdi dest, sp, THREAD_SHIFT
+/*
+ * By aligning VMAP'd stacks to 2 * THREAD_SIZE, we can detect overflow by
+ * checking sp & (1 << THREAD_SHIFT), which we can do cheaply in the entry
+ * assembly.
+ */
+#ifdef CONFIG_VMAP_STACK
+#define THREAD_ALIGN_SHIFT (THREAD_SHIFT + 1)
#else
-#define CURRENT_THREAD_INFO(dest, sp) rlwinm dest, sp, 0, 0, 31-THREAD_SHIFT
+#define THREAD_ALIGN_SHIFT THREAD_SHIFT
#endif
-#ifndef __ASSEMBLY__
+#define THREAD_ALIGN (1 << THREAD_ALIGN_SHIFT)
+
+#ifndef __ASSEMBLER__
#include <linux/cache.h>
#include <asm/processor.h>
-#include <asm/page.h>
-#include <linux/stringify.h>
+#include <asm/accounting.h>
+#include <asm/ppc_asm.h>
+#define SLB_PRELOAD_NR 16U
/*
* low level task data.
*/
struct thread_info {
- struct task_struct *task; /* main task structure */
- struct exec_domain *exec_domain; /* execution domain */
- int cpu; /* cpu we're on */
int preempt_count; /* 0 => preemptable,
<0 => BUG */
- struct restart_block restart_block;
+#ifdef CONFIG_SMP
+ unsigned int cpu;
+#endif
unsigned long local_flags; /* private flags for thread */
+#ifdef CONFIG_LIVEPATCH_64
+ unsigned long *livepatch_sp;
+#endif
+#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC32)
+ struct cpu_accounting_data accounting;
+#endif
+ unsigned char slb_preload_nr;
+ unsigned char slb_preload_tail;
+ u32 slb_preload_esid[SLB_PRELOAD_NR];
/* low level flags - has atomic operations done on it */
unsigned long flags ____cacheline_aligned_in_smp;
@@ -55,32 +77,19 @@ struct thread_info {
*/
#define INIT_THREAD_INFO(tsk) \
{ \
- .task = &tsk, \
- .exec_domain = &default_exec_domain, \
- .cpu = 0, \
.preempt_count = INIT_PREEMPT_COUNT, \
- .restart_block = { \
- .fn = do_no_restart_syscall, \
- }, \
.flags = 0, \
}
-#define init_thread_info (init_thread_union.thread_info)
-#define init_stack (init_thread_union.stack)
-
#define THREAD_SIZE_ORDER (THREAD_SHIFT - PAGE_SHIFT)
/* how to get the thread information struct from C */
-static inline struct thread_info *current_thread_info(void)
-{
- register unsigned long sp asm("r1");
+extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
- /* gcc4, at least, is smart enough to turn this into a single
- * rlwinm for ppc32 and clrrdi for ppc64 */
- return (struct thread_info *)(sp & ~(THREAD_SIZE-1));
-}
+void arch_setup_new_exec(void);
+#define arch_setup_new_exec arch_setup_new_exec
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
/*
* thread information flag bit numbers
@@ -88,13 +97,13 @@ static inline struct thread_info *current_thread_info(void)
#define TIF_SYSCALL_TRACE 0 /* syscall trace active */
#define TIF_SIGPENDING 1 /* signal pending */
#define TIF_NEED_RESCHED 2 /* rescheduling necessary */
-#define TIF_POLLING_NRFLAG 3 /* true if poll_idle() is polling
- TIF_NEED_RESCHED */
-#define TIF_32BIT 4 /* 32 bit binary */
+#define TIF_NOTIFY_SIGNAL 3 /* signal notifications exist */
+#define TIF_SYSCALL_EMU 4 /* syscall emulation active */
#define TIF_RESTORE_TM 5 /* need to restore TM FP/VEC/VSX */
+#define TIF_PATCH_PENDING 6 /* pending live patching update */
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
#define TIF_SINGLESTEP 8 /* singlestepping active */
-#define TIF_NOHZ 9 /* in adaptive nohz mode */
+#define TIF_NEED_RESCHED_LAZY 9 /* Scheduler driven lazy preemption */
#define TIF_SECCOMP 10 /* secure computing */
#define TIF_RESTOREALL 11 /* Restore all regs (implies NOERROR) */
#define TIF_NOERROR 12 /* Force successful syscall return */
@@ -107,14 +116,19 @@ static inline struct thread_info *current_thread_info(void)
#if defined(CONFIG_PPC64)
#define TIF_ELF2ABI 18 /* function descriptors must die! */
#endif
+#define TIF_POLLING_NRFLAG 19 /* true if poll_idle() is polling TIF_NEED_RESCHED */
+#define TIF_32BIT 20 /* 32 bit binary */
/* as above, but as bit values */
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
#define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
+#define _TIF_NEED_RESCHED_LAZY (1<<TIF_NEED_RESCHED_LAZY)
+#define _TIF_NOTIFY_SIGNAL (1<<TIF_NOTIFY_SIGNAL)
#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
#define _TIF_32BIT (1<<TIF_32BIT)
#define _TIF_RESTORE_TM (1<<TIF_RESTORE_TM)
+#define _TIF_PATCH_PENDING (1<<TIF_PATCH_PENDING)
#define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT)
#define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP)
#define _TIF_SECCOMP (1<<TIF_SECCOMP)
@@ -124,53 +138,36 @@ static inline struct thread_info *current_thread_info(void)
#define _TIF_UPROBE (1<<TIF_UPROBE)
#define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT)
#define _TIF_EMULATE_STACK_STORE (1<<TIF_EMULATE_STACK_STORE)
-#define _TIF_NOHZ (1<<TIF_NOHZ)
-#define _TIF_SYSCALL_T_OR_A (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
+#define _TIF_SYSCALL_EMU (1<<TIF_SYSCALL_EMU)
+#define _TIF_SYSCALL_DOTRACE (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
_TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \
- _TIF_NOHZ)
+ _TIF_SYSCALL_EMU)
#define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
- _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
- _TIF_RESTORE_TM)
+ _TIF_NEED_RESCHED_LAZY | _TIF_NOTIFY_RESUME | \
+ _TIF_UPROBE | _TIF_RESTORE_TM | \
+ _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL)
+
#define _TIF_PERSYSCALL_MASK (_TIF_RESTOREALL|_TIF_NOERROR)
/* Bits in local_flags */
/* Don't move TLF_NAPPING without adjusting the code in entry_32.S */
#define TLF_NAPPING 0 /* idle thread enabled NAP mode */
#define TLF_SLEEPING 1 /* suspend code enabled SLEEP mode */
-#define TLF_RESTORE_SIGMASK 2 /* Restore signal mask in do_signal */
#define TLF_LAZY_MMU 3 /* tlb_batch is active */
#define TLF_RUNLATCH 4 /* Is the runlatch enabled? */
#define _TLF_NAPPING (1 << TLF_NAPPING)
#define _TLF_SLEEPING (1 << TLF_SLEEPING)
-#define _TLF_RESTORE_SIGMASK (1 << TLF_RESTORE_SIGMASK)
#define _TLF_LAZY_MMU (1 << TLF_LAZY_MMU)
#define _TLF_RUNLATCH (1 << TLF_RUNLATCH)
-#ifndef __ASSEMBLY__
-#define HAVE_SET_RESTORE_SIGMASK 1
-static inline void set_restore_sigmask(void)
-{
- struct thread_info *ti = current_thread_info();
- ti->local_flags |= _TLF_RESTORE_SIGMASK;
- WARN_ON(!test_bit(TIF_SIGPENDING, &ti->flags));
-}
-static inline void clear_restore_sigmask(void)
-{
- current_thread_info()->local_flags &= ~_TLF_RESTORE_SIGMASK;
-}
-static inline bool test_restore_sigmask(void)
-{
- return current_thread_info()->local_flags & _TLF_RESTORE_SIGMASK;
-}
-static inline bool test_and_clear_restore_sigmask(void)
+#ifndef __ASSEMBLER__
+
+static inline void clear_thread_local_flags(unsigned int flags)
{
struct thread_info *ti = current_thread_info();
- if (!(ti->local_flags & _TLF_RESTORE_SIGMASK))
- return false;
- ti->local_flags &= ~_TLF_RESTORE_SIGMASK;
- return true;
+ ti->local_flags &= ~flags;
}
static inline bool test_thread_local_flags(unsigned int flags)
@@ -179,10 +176,14 @@ static inline bool test_thread_local_flags(unsigned int flags)
return (ti->local_flags & flags) != 0;
}
-#ifdef CONFIG_PPC64
+#ifdef CONFIG_COMPAT
#define is_32bit_task() (test_thread_flag(TIF_32BIT))
+#define is_tsk_32bit_task(tsk) (test_tsk_thread_flag(tsk, TIF_32BIT))
+#define clear_tsk_compat_task(tsk) (clear_tsk_thread_flag(p, TIF_32BIT))
#else
-#define is_32bit_task() (1)
+#define is_32bit_task() (IS_ENABLED(CONFIG_PPC32))
+#define is_tsk_32bit_task(tsk) (IS_ENABLED(CONFIG_PPC32))
+#define clear_tsk_compat_task(tsk) do { } while (0)
#endif
#if defined(CONFIG_PPC64)
@@ -191,7 +192,48 @@ static inline bool test_thread_local_flags(unsigned int flags)
#define is_elf2_task() (0)
#endif
-#endif /* !__ASSEMBLY__ */
+/*
+ * Walks up the stack frames to make sure that the specified object is
+ * entirely contained by a single stack frame.
+ *
+ * Returns:
+ * GOOD_FRAME if within a frame
+ * BAD_STACK if placed across a frame boundary (or outside stack)
+ */
+static inline int arch_within_stack_frames(const void * const stack,
+ const void * const stackend,
+ const void *obj, unsigned long len)
+{
+ const void *params;
+ const void *frame;
+
+ params = *(const void * const *)current_stack_pointer + STACK_FRAME_PARAMS;
+ frame = **(const void * const * const *)current_stack_pointer;
+
+ /*
+ * low -----------------------------------------------------------> high
+ * [backchain][metadata][params][local vars][saved registers][backchain]
+ * ^------------------------------------^
+ * | allows copies only in this region |
+ * | |
+ * params frame
+ * The metadata region contains the saved LR, CR etc.
+ */
+ while (stack <= frame && frame < stackend) {
+ if (obj + len <= frame)
+ return obj >= params ? GOOD_FRAME : BAD_STACK;
+ params = frame + STACK_FRAME_PARAMS;
+ frame = *(const void * const *)frame;
+ }
+
+ return BAD_STACK;
+}
+
+#ifdef CONFIG_PPC32
+extern void *emergency_ctx[];
+#endif
+
+#endif /* !__ASSEMBLER__ */
#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 03cbada59d3a..7991ab1d4cb8 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -1,13 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Common time prototypes and such for all ppc machines.
*
* Written by Cort Dougan (cort@cs.nmt.edu) to merge
* Paul Mackerras' version and mine for PReP and Pmac.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#ifndef __POWERPC_TIME_H
@@ -18,21 +14,24 @@
#include <linux/percpu.h>
#include <asm/processor.h>
+#include <asm/cpu_has_feature.h>
+#include <asm/vdso/timebase.h>
/* time.c */
+extern u64 decrementer_max;
+
extern unsigned long tb_ticks_per_jiffy;
extern unsigned long tb_ticks_per_usec;
extern unsigned long tb_ticks_per_sec;
extern struct clock_event_device decrementer_clockevent;
+extern u64 decrementer_max;
-struct rtc_time;
-extern void to_tm(int tim, struct rtc_time * tm);
-extern void GregorianDay(struct rtc_time *tm);
-extern void tick_broadcast_ipi_handler(void);
extern void generic_calibrate_decr(void);
-extern void set_dec_cpu6(unsigned int val);
+#ifdef CONFIG_PPC_SPLPAR
+extern u64 get_boot_tb(void);
+#endif
/* Some sane defaults: 125 MHz timebase, 1GHz processor */
extern unsigned long ppc_proc_freq;
@@ -40,107 +39,19 @@ extern unsigned long ppc_proc_freq;
extern unsigned long ppc_tb_freq;
#define DEFAULT_TB_FREQ 125000000UL
+extern bool tb_invalid;
+
struct div_result {
u64 result_high;
u64 result_low;
};
-/* Accessor functions for the timebase (RTC on 601) registers. */
-/* If one day CONFIG_POWER is added just define __USE_RTC as 1 */
-#ifdef CONFIG_6xx
-#define __USE_RTC() (!cpu_has_feature(CPU_FTR_USE_TB))
-#else
-#define __USE_RTC() 0
-#endif
-
-#ifdef CONFIG_PPC64
-
-/* For compatibility, get_tbl() is defined as get_tb() on ppc64 */
-#define get_tbl get_tb
-
-#else
-
-static inline unsigned long get_tbl(void)
-{
-#if defined(CONFIG_403GCX)
- unsigned long tbl;
- asm volatile("mfspr %0, 0x3dd" : "=r" (tbl));
- return tbl;
-#else
- return mftbl();
-#endif
-}
-
-static inline unsigned int get_tbu(void)
-{
-#ifdef CONFIG_403GCX
- unsigned int tbu;
- asm volatile("mfspr %0, 0x3dc" : "=r" (tbu));
- return tbu;
-#else
- return mftbu();
-#endif
-}
-#endif /* !CONFIG_PPC64 */
-
-static inline unsigned int get_rtcl(void)
-{
- unsigned int rtcl;
-
- asm volatile("mfrtcl %0" : "=r" (rtcl));
- return rtcl;
-}
-
-static inline u64 get_rtc(void)
-{
- unsigned int hi, lo, hi2;
-
- do {
- asm volatile("mfrtcu %0; mfrtcl %1; mfrtcu %2"
- : "=r" (hi), "=r" (lo), "=r" (hi2));
- } while (hi2 != hi);
- return (u64)hi * 1000000000 + lo;
-}
-
static inline u64 get_vtb(void)
{
-#ifdef CONFIG_PPC_BOOK3S_64
if (cpu_has_feature(CPU_FTR_ARCH_207S))
- return mfvtb();
-#endif
- return 0;
-}
-
-#ifdef CONFIG_PPC64
-static inline u64 get_tb(void)
-{
- return mftb();
-}
-#else /* CONFIG_PPC64 */
-static inline u64 get_tb(void)
-{
- unsigned int tbhi, tblo, tbhi2;
+ return mfspr(SPRN_VTB);
- do {
- tbhi = get_tbu();
- tblo = get_tbl();
- tbhi2 = get_tbu();
- } while (tbhi != tbhi2);
-
- return ((u64)tbhi << 32) | tblo;
-}
-#endif /* !CONFIG_PPC64 */
-
-static inline u64 get_tb_or_rtc(void)
-{
- return __USE_RTC() ? get_rtc() : get_tb();
-}
-
-static inline void set_tb(unsigned int upper, unsigned int lower)
-{
- mtspr(SPRN_TBWL, 0);
- mtspr(SPRN_TBWU, upper);
- mtspr(SPRN_TBWL, lower);
+ return 0;
}
/* Accessor functions for the decrementer register.
@@ -149,13 +60,9 @@ static inline void set_tb(unsigned int upper, unsigned int lower)
* in auto-reload mode. The problem is PIT stops counting when it
* hits zero. If it would wrap, we could use it just like a decrementer.
*/
-static inline unsigned int get_dec(void)
+static inline u64 get_dec(void)
{
-#if defined(CONFIG_40x)
- return (mfspr(SPRN_PIT));
-#else
- return (mfspr(SPRN_DEC));
-#endif
+ return mfspr(SPRN_DEC);
}
/*
@@ -163,27 +70,17 @@ static inline unsigned int get_dec(void)
* in when the decrementer generates its interrupt: on the 1 to 0
* transition for Book E/4xx, but on the 0 to -1 transition for others.
*/
-static inline void set_dec(int val)
+static inline void set_dec(u64 val)
{
-#if defined(CONFIG_40x)
- mtspr(SPRN_PIT, val);
-#elif defined(CONFIG_8xx_CPU6)
- set_dec_cpu6(val - 1);
-#else
-#ifndef CONFIG_BOOKE
- --val;
-#endif
- mtspr(SPRN_DEC, val);
-#endif /* not 40x or 8xx_CPU6 */
+ if (IS_ENABLED(CONFIG_BOOKE))
+ mtspr(SPRN_DEC, val);
+ else
+ mtspr(SPRN_DEC, val - 1);
}
static inline unsigned long tb_ticks_since(unsigned long tstamp)
{
- if (__USE_RTC()) {
- int delta = get_rtcl() - (unsigned int) tstamp;
- return delta < 0 ? delta + 1000000000 : delta;
- }
- return get_tbl() - tstamp;
+ return mftb() - tstamp;
}
#define mulhwu(x,y) \
@@ -193,23 +90,31 @@ static inline unsigned long tb_ticks_since(unsigned long tstamp)
#define mulhdu(x,y) \
({unsigned long z; asm ("mulhdu %0,%1,%2" : "=r" (z) : "r" (x), "r" (y)); z;})
#else
-extern u64 mulhdu(u64, u64);
+#define mulhdu(x, y) mul_u64_u64_shr(x, y, 64)
#endif
-extern void div128_by_32(u64 dividend_high, u64 dividend_low,
- unsigned divisor, struct div_result *dr);
+extern void secondary_cpu_time_init(void);
+extern void __init time_init(void);
+
+DECLARE_PER_CPU(u64, decrementers_next_tb);
-/* Used to store Processor Utilization register (purr) values */
+static inline u64 timer_get_next_tb(void)
+{
+ return __this_cpu_read(decrementers_next_tb);
+}
-struct cpu_usage {
- u64 current_tb; /* Holds the current purr register values */
-};
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+void timer_rearm_host_dec(u64 now);
+#endif
-DECLARE_PER_CPU(struct cpu_usage, cpu_usage_array);
+/* Convert timebase ticks to nanoseconds */
+unsigned long long tb_to_ns(unsigned long long tb_ticks);
-extern void secondary_cpu_time_init(void);
+void timer_broadcast_interrupt(void);
-DECLARE_PER_CPU(u64, decrementers_next_tb);
+/* SPLPAR and VIRT_CPU_ACCOUNTING_NATIVE */
+void pseries_accumulate_stolen_time(void);
+u64 pseries_calculate_stolen_time(u64 stop_tb);
#endif /* __KERNEL__ */
#endif /* __POWERPC_TIME_H */
diff --git a/arch/powerpc/include/asm/timex.h b/arch/powerpc/include/asm/timex.h
index 2cf846edb3fc..14b4489de52c 100644
--- a/arch/powerpc/include/asm/timex.h
+++ b/arch/powerpc/include/asm/timex.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_TIMEX_H
#define _ASM_POWERPC_TIMEX_H
@@ -8,7 +9,7 @@
*/
#include <asm/cputable.h>
-#include <asm/reg.h>
+#include <asm/vdso/timebase.h>
#define CLOCK_TICK_RATE 1024000 /* Underlying HZ */
@@ -16,43 +17,9 @@ typedef unsigned long cycles_t;
static inline cycles_t get_cycles(void)
{
-#ifdef __powerpc64__
return mftb();
-#else
- cycles_t ret;
-
- /*
- * For the "cycle" counter we use the timebase lower half.
- * Currently only used on SMP.
- */
-
- ret = 0;
-
- __asm__ __volatile__(
-#ifdef CONFIG_8xx
- "97: mftb %0\n"
-#else
- "97: mfspr %0, %2\n"
-#endif
- "99:\n"
- ".section __ftr_fixup,\"a\"\n"
- ".align 2\n"
- "98:\n"
- " .long %1\n"
- " .long 0\n"
- " .long 97b-98b\n"
- " .long 99b-98b\n"
- " .long 0\n"
- " .long 0\n"
- ".previous"
-#ifdef CONFIG_8xx
- : "=r" (ret) : "i" (CPU_FTR_601));
-#else
- : "=r" (ret) : "i" (CPU_FTR_601), "i" (SPRN_TBRL));
-#endif
- return ret;
-#endif
}
+#define get_cycles get_cycles
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_TIMEX_H */
diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h
index e2b428b0f7ba..2058e8d3e013 100644
--- a/arch/powerpc/include/asm/tlb.h
+++ b/arch/powerpc/include/asm/tlb.h
@@ -1,23 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* TLB shootdown specifics for powerpc
*
* Copyright (C) 2002 Anton Blanchard, IBM Corp.
* Copyright (C) 2002 Paul Mackerras, IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#ifndef _ASM_POWERPC_TLB_H
#define _ASM_POWERPC_TLB_H
#ifdef __KERNEL__
#ifndef __powerpc64__
-#include <asm/pgtable.h>
+#include <linux/pgtable.h>
#endif
-#include <asm/pgalloc.h>
-#include <asm/tlbflush.h>
#ifndef __powerpc64__
#include <asm/page.h>
#include <asm/mmu.h>
@@ -25,25 +19,76 @@
#include <linux/pagemap.h>
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
+static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep,
+ unsigned long address);
+#define __tlb_remove_tlb_entry __tlb_remove_tlb_entry
+#define tlb_flush tlb_flush
extern void tlb_flush(struct mmu_gather *tlb);
+/*
+ * book3s:
+ * Hash does not use the linux page-tables, so we can avoid
+ * the TLB invalidate for page-table freeing, Radix otoh does use the
+ * page-tables and needs the TLBI.
+ *
+ * nohash:
+ * We still do TLB invalidate in the __pte_free_tlb routine before we
+ * add the page table pages to mmu gather table batch.
+ */
+#define tlb_needs_table_invalidate() radix_enabled()
+#define __HAVE_ARCH_TLB_REMOVE_TABLE
/* Get the generic bits... */
#include <asm-generic/tlb.h>
-extern void flush_hash_entry(struct mm_struct *mm, pte_t *ptep,
- unsigned long address);
-
static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep,
unsigned long address)
{
-#ifdef CONFIG_PPC_STD_MMU_32
+#ifdef CONFIG_PPC_BOOK3S_32
if (pte_val(*ptep) & _PAGE_HASHPTE)
flush_hash_entry(tlb->mm, ptep, address);
#endif
}
+#ifdef CONFIG_SMP
+static inline int mm_is_core_local(struct mm_struct *mm)
+{
+ return cpumask_subset(mm_cpumask(mm),
+ topology_sibling_cpumask(smp_processor_id()));
+}
+
+#ifdef CONFIG_PPC_BOOK3S_64
+static inline int mm_is_thread_local(struct mm_struct *mm)
+{
+ if (atomic_read(&mm->context.active_cpus) > 1)
+ return false;
+ return cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm));
+}
+#else /* CONFIG_PPC_BOOK3S_64 */
+static inline int mm_is_thread_local(struct mm_struct *mm)
+{
+ return cpumask_equal(mm_cpumask(mm),
+ cpumask_of(smp_processor_id()));
+}
+#endif /* !CONFIG_PPC_BOOK3S_64 */
+
+#else /* CONFIG_SMP */
+static inline int mm_is_core_local(struct mm_struct *mm)
+{
+ return 1;
+}
+
+static inline int mm_is_thread_local(struct mm_struct *mm)
+{
+ return 1;
+}
+#endif
+
+#define arch_supports_page_table_move arch_supports_page_table_move
+static inline bool arch_supports_page_table_move(void)
+{
+ return radix_enabled();
+}
+
#endif /* __KERNEL__ */
#endif /* __ASM_POWERPC_TLB_H */
diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h
index 2def01ed0cb2..61fba43bf8b2 100644
--- a/arch/powerpc/include/asm/tlbflush.h
+++ b/arch/powerpc/include/asm/tlbflush.h
@@ -1,175 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_TLBFLUSH_H
#define _ASM_POWERPC_TLBFLUSH_H
-/*
- * TLB flushing:
- *
- * - flush_tlb_mm(mm) flushes the specified mm context TLB's
- * - flush_tlb_page(vma, vmaddr) flushes one page
- * - local_flush_tlb_mm(mm, full) flushes the specified mm context on
- * the local processor
- * - local_flush_tlb_page(vma, vmaddr) flushes one page on the local processor
- * - flush_tlb_page_nohash(vma, vmaddr) flushes one page if SW loaded TLB
- * - flush_tlb_range(vma, start, end) flushes a range of pages
- * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#ifdef __KERNEL__
-
-#ifdef CONFIG_PPC_MMU_NOHASH
-/*
- * TLB flushing for software loaded TLB chips
- *
- * TODO: (CONFIG_FSL_BOOKE) determine if flush_tlb_range &
- * flush_tlb_kernel_range are best implemented as tlbia vs
- * specific tlbie's
- */
-
-struct vm_area_struct;
-struct mm_struct;
-
-#define MMU_NO_CONTEXT ((unsigned int)-1)
-
-extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
- unsigned long end);
-extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
-
-extern void local_flush_tlb_mm(struct mm_struct *mm);
-extern void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
-
-extern void __local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
- int tsize, int ind);
-
-#ifdef CONFIG_SMP
-extern void flush_tlb_mm(struct mm_struct *mm);
-extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
-extern void __flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
- int tsize, int ind);
-#else
-#define flush_tlb_mm(mm) local_flush_tlb_mm(mm)
-#define flush_tlb_page(vma,addr) local_flush_tlb_page(vma,addr)
-#define __flush_tlb_page(mm,addr,p,i) __local_flush_tlb_page(mm,addr,p,i)
-#endif
-#define flush_tlb_page_nohash(vma,addr) flush_tlb_page(vma,addr)
-
-#elif defined(CONFIG_PPC_STD_MMU_32)
-
-/*
- * TLB flushing for "classic" hash-MMU 32-bit CPUs, 6xx, 7xx, 7xxx
- */
-extern void flush_tlb_mm(struct mm_struct *mm);
-extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
-extern void flush_tlb_page_nohash(struct vm_area_struct *vma, unsigned long addr);
-extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
- unsigned long end);
-extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
-static inline void local_flush_tlb_page(struct vm_area_struct *vma,
- unsigned long vmaddr)
-{
- flush_tlb_page(vma, vmaddr);
-}
-static inline void local_flush_tlb_mm(struct mm_struct *mm)
-{
- flush_tlb_mm(mm);
-}
-
-#elif defined(CONFIG_PPC_STD_MMU_64)
-
-#define MMU_NO_CONTEXT 0
-
-/*
- * TLB flushing for 64-bit hash-MMU CPUs
- */
-
-#include <linux/percpu.h>
-#include <asm/page.h>
-
-#define PPC64_TLB_BATCH_NR 192
-
-struct ppc64_tlb_batch {
- int active;
- unsigned long index;
- struct mm_struct *mm;
- real_pte_t pte[PPC64_TLB_BATCH_NR];
- unsigned long vpn[PPC64_TLB_BATCH_NR];
- unsigned int psize;
- int ssize;
-};
-DECLARE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
-
-extern void __flush_tlb_pending(struct ppc64_tlb_batch *batch);
-
-#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
-
-static inline void arch_enter_lazy_mmu_mode(void)
-{
- struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
-
- batch->active = 1;
-}
-
-static inline void arch_leave_lazy_mmu_mode(void)
-{
- struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
-
- if (batch->index)
- __flush_tlb_pending(batch);
- batch->active = 0;
-}
-
-#define arch_flush_lazy_mmu_mode() do {} while (0)
-
-
-extern void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize,
- int ssize, int local);
-extern void flush_hash_range(unsigned long number, int local);
-
-
-static inline void local_flush_tlb_mm(struct mm_struct *mm)
-{
-}
-
-static inline void flush_tlb_mm(struct mm_struct *mm)
-{
-}
-
-static inline void local_flush_tlb_page(struct vm_area_struct *vma,
- unsigned long vmaddr)
-{
-}
-
-static inline void flush_tlb_page(struct vm_area_struct *vma,
- unsigned long vmaddr)
-{
-}
-
-static inline void flush_tlb_page_nohash(struct vm_area_struct *vma,
- unsigned long vmaddr)
-{
-}
-
-static inline void flush_tlb_range(struct vm_area_struct *vma,
- unsigned long start, unsigned long end)
-{
-}
-
-static inline void flush_tlb_kernel_range(unsigned long start,
- unsigned long end)
-{
-}
-
-/* Private function for use by PCI IO mapping code */
-extern void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
- unsigned long end);
-extern void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd,
- unsigned long addr);
+#ifdef CONFIG_PPC_BOOK3S
+#include <asm/book3s/tlbflush.h>
#else
-#error Unsupported MMU type
-#endif
+#include <asm/nohash/tlbflush.h>
+#endif /* !CONFIG_PPC_BOOK3S */
-#endif /*__KERNEL__ */
#endif /* _ASM_POWERPC_TLBFLUSH_H */
diff --git a/arch/powerpc/include/asm/tm.h b/arch/powerpc/include/asm/tm.h
index c22d704b6d41..d700affba448 100644
--- a/arch/powerpc/include/asm/tm.h
+++ b/arch/powerpc/include/asm/tm.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Transactional memory support routines to reclaim and recheckpoint
* transactional process state.
@@ -7,21 +8,15 @@
#include <uapi/asm/tm.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-extern void do_load_up_transact_fpu(struct thread_struct *thread);
-extern void do_load_up_transact_altivec(struct thread_struct *thread);
-#endif
-
-extern void tm_enable(void);
extern void tm_reclaim(struct thread_struct *thread,
- unsigned long orig_msr, uint8_t cause);
+ uint8_t cause);
extern void tm_reclaim_current(uint8_t cause);
-extern void tm_recheckpoint(struct thread_struct *thread,
- unsigned long orig_msr);
-extern void tm_abort(uint8_t cause);
+extern void tm_recheckpoint(struct thread_struct *thread);
extern void tm_save_sprs(struct thread_struct *thread);
extern void tm_restore_sprs(struct thread_struct *thread);
-#endif /* __ASSEMBLY__ */
+extern bool tm_suspend_disabled;
+
+#endif /* __ASSEMBLER__ */
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 5f1048eaa5b6..f19ca44512d1 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_TOPOLOGY_H
#define _ASM_POWERPC_TOPOLOGY_H
#ifdef __KERNEL__
@@ -5,6 +6,7 @@
struct device;
struct device_node;
+struct drmem_lmb;
#ifdef CONFIG_NUMA
@@ -16,8 +18,6 @@ struct device_node;
#include <asm/mmzone.h>
-#define parent_node(node) (node)
-
#define cpumask_of_node(node) ((node) == -1 ? \
cpu_all_mask : \
node_to_cpumask_map[node])
@@ -36,6 +36,7 @@ static inline int pcibus_to_node(struct pci_bus *bus)
cpu_all_mask : \
cpumask_of_node(pcibus_to_node(bus)))
+int cpu_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc);
extern int __node_distance(int, int);
#define node_distance(a, b) __node_distance(a, b)
@@ -44,8 +45,36 @@ extern void __init dump_numa_cpu_topology(void);
extern int sysfs_add_device_to_node(struct device *dev, int nid);
extern void sysfs_remove_device_from_node(struct device *dev, int nid);
+static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node)
+{
+ numa_cpu_lookup_table[cpu] = node;
+}
+
+static inline int early_cpu_to_node(int cpu)
+{
+ int nid;
+
+ nid = numa_cpu_lookup_table[cpu];
+
+ /*
+ * Fall back to node 0 if nid is unset (it should be, except bugs).
+ * This allows callers to safely do NODE_DATA(early_cpu_to_node(cpu)).
+ */
+ return (nid < 0) ? 0 : nid;
+}
+
+int of_drconf_to_nid_single(struct drmem_lmb *lmb);
+void update_numa_distance(struct device_node *node);
+
+extern void map_cpu_to_node(int cpu, int node);
+#ifdef CONFIG_HOTPLUG_CPU
+extern void unmap_cpu_from_node(unsigned long cpu);
+#endif /* CONFIG_HOTPLUG_CPU */
+
#else
+static inline int early_cpu_to_node(int cpu) { return 0; }
+
static inline void dump_numa_cpu_topology(void) {}
static inline int sysfs_add_device_to_node(struct device *dev, int nid)
@@ -57,25 +86,44 @@ static inline void sysfs_remove_device_from_node(struct device *dev,
int nid)
{
}
-#endif /* CONFIG_NUMA */
-#if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)
-extern int start_topology_update(void);
-extern int stop_topology_update(void);
-extern int prrn_is_enabled(void);
-#else
-static inline int start_topology_update(void)
+static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node) {}
+
+static inline int cpu_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
{
return 0;
}
-static inline int stop_topology_update(void)
+
+static inline int of_drconf_to_nid_single(struct drmem_lmb *lmb)
{
- return 0;
+ return first_online_node;
}
-static inline int prrn_is_enabled(void)
+
+static inline void update_numa_distance(struct device_node *node) {}
+
+#ifdef CONFIG_SMP
+static inline void map_cpu_to_node(int cpu, int node) {}
+#ifdef CONFIG_HOTPLUG_CPU
+static inline void unmap_cpu_from_node(unsigned long cpu) {}
+#endif /* CONFIG_HOTPLUG_CPU */
+#endif /* CONFIG_SMP */
+
+#endif /* CONFIG_NUMA */
+
+#if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)
+void find_and_update_cpu_nid(int cpu);
+extern int cpu_to_coregroup_id(int cpu);
+#else
+static inline void find_and_update_cpu_nid(int cpu) {}
+static inline int cpu_to_coregroup_id(int cpu)
{
+#ifdef CONFIG_SMP
+ return cpu_to_core_id(cpu);
+#else
return 0;
+#endif
}
+
#endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */
#include <asm-generic/topology.h>
@@ -83,14 +131,47 @@ static inline int prrn_is_enabled(void)
#ifdef CONFIG_SMP
#include <asm/cputable.h>
+struct cpumask *cpu_coregroup_mask(int cpu);
+
#ifdef CONFIG_PPC64
#include <asm/smp.h>
#define topology_physical_package_id(cpu) (cpu_to_chip_id(cpu))
-#define topology_thread_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu))
+
+#define topology_sibling_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu))
#define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu))
#define topology_core_id(cpu) (cpu_to_core_id(cpu))
+
+#endif
#endif
+
+#ifdef CONFIG_HOTPLUG_SMT
+#include <linux/cpu_smt.h>
+#include <linux/cpumask.h>
+#include <asm/cputhreads.h>
+
+static inline bool topology_is_primary_thread(unsigned int cpu)
+{
+ return cpu == cpu_first_thread_sibling(cpu);
+}
+#define topology_is_primary_thread topology_is_primary_thread
+
+static inline bool topology_smt_thread_allowed(unsigned int cpu)
+{
+ return cpu_thread_in_core(cpu) < cpu_smt_num_threads;
+}
+
+#define topology_is_core_online topology_is_core_online
+static inline bool topology_is_core_online(unsigned int cpu)
+{
+ int i, first_cpu = cpu_first_thread_sibling(cpu);
+
+ for (i = first_cpu; i < first_cpu + threads_per_core; ++i) {
+ if (cpu_online(i))
+ return true;
+ }
+ return false;
+}
#endif
#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h
index c15da6073cb8..a7b69b25296b 100644
--- a/arch/powerpc/include/asm/trace.h
+++ b/arch/powerpc/include/asm/trace.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM powerpc
@@ -53,16 +54,34 @@ DEFINE_EVENT(ppc64_interrupt_class, timer_interrupt_exit,
TP_ARGS(regs)
);
+#ifdef CONFIG_PPC_DOORBELL
+DEFINE_EVENT(ppc64_interrupt_class, doorbell_entry,
+
+ TP_PROTO(struct pt_regs *regs),
+
+ TP_ARGS(regs)
+);
+
+DEFINE_EVENT(ppc64_interrupt_class, doorbell_exit,
+
+ TP_PROTO(struct pt_regs *regs),
+
+ TP_ARGS(regs)
+);
+#endif
+
#ifdef CONFIG_PPC_PSERIES
-extern void hcall_tracepoint_regfunc(void);
+extern int hcall_tracepoint_regfunc(void);
extern void hcall_tracepoint_unregfunc(void);
-TRACE_EVENT_FN(hcall_entry,
+TRACE_EVENT_FN_COND(hcall_entry,
TP_PROTO(unsigned long opcode, unsigned long *args),
TP_ARGS(opcode, args),
+ TP_CONDITION(cpu_online(raw_smp_processor_id())),
+
TP_STRUCT__entry(
__field(unsigned long, opcode)
),
@@ -76,16 +95,17 @@ TRACE_EVENT_FN(hcall_entry,
hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc
);
-TRACE_EVENT_FN(hcall_exit,
+TRACE_EVENT_FN_COND(hcall_exit,
- TP_PROTO(unsigned long opcode, unsigned long retval,
- unsigned long *retbuf),
+ TP_PROTO(unsigned long opcode, long retval, unsigned long *retbuf),
TP_ARGS(opcode, retval, retbuf),
+ TP_CONDITION(cpu_online(raw_smp_processor_id())),
+
TP_STRUCT__entry(
__field(unsigned long, opcode)
- __field(unsigned long, retval)
+ __field(long, retval)
),
TP_fast_assign(
@@ -93,14 +113,117 @@ TRACE_EVENT_FN(hcall_exit,
__entry->retval = retval;
),
- TP_printk("opcode=%lu retval=%lu", __entry->opcode, __entry->retval),
+ TP_printk("opcode=%lu retval=%ld", __entry->opcode, __entry->retval),
hcall_tracepoint_regfunc, hcall_tracepoint_unregfunc
);
#endif
+#ifdef CONFIG_PPC_RTAS
+
+#include <asm/rtas-types.h>
+
+TRACE_EVENT(rtas_input,
+
+ TP_PROTO(struct rtas_args *rtas_args, const char *name),
+
+ TP_ARGS(rtas_args, name),
+
+ TP_STRUCT__entry(
+ __field(__u32, nargs)
+ __string(name, name)
+ __dynamic_array(__u32, inputs, be32_to_cpu(rtas_args->nargs))
+ ),
+
+ TP_fast_assign(
+ __entry->nargs = be32_to_cpu(rtas_args->nargs);
+ __assign_str(name);
+ be32_to_cpu_array(__get_dynamic_array(inputs), rtas_args->args, __entry->nargs);
+ ),
+
+ TP_printk("%s arguments: %s", __get_str(name),
+ __print_array(__get_dynamic_array(inputs), __entry->nargs, 4)
+ )
+);
+
+TRACE_EVENT(rtas_output,
+
+ TP_PROTO(struct rtas_args *rtas_args, const char *name),
+
+ TP_ARGS(rtas_args, name),
+
+ TP_STRUCT__entry(
+ __field(__u32, nr_other)
+ __field(__s32, status)
+ __string(name, name)
+ __dynamic_array(__u32, other_outputs, be32_to_cpu(rtas_args->nret) - 1)
+ ),
+
+ TP_fast_assign(
+ __entry->nr_other = be32_to_cpu(rtas_args->nret) - 1;
+ __entry->status = be32_to_cpu(rtas_args->rets[0]);
+ __assign_str(name);
+ be32_to_cpu_array(__get_dynamic_array(other_outputs),
+ &rtas_args->rets[1], __entry->nr_other);
+ ),
+
+ TP_printk("%s status: %d, other outputs: %s", __get_str(name), __entry->status,
+ __print_array(__get_dynamic_array(other_outputs),
+ __entry->nr_other, 4)
+ )
+);
+
+DECLARE_EVENT_CLASS(rtas_parameter_block,
+
+ TP_PROTO(struct rtas_args *rtas_args),
+
+ TP_ARGS(rtas_args),
+
+ TP_STRUCT__entry(
+ __field(u32, token)
+ __field(u32, nargs)
+ __field(u32, nret)
+ __array(__u32, params, 16)
+ ),
+
+ TP_fast_assign(
+ __entry->token = be32_to_cpu(rtas_args->token);
+ __entry->nargs = be32_to_cpu(rtas_args->nargs);
+ __entry->nret = be32_to_cpu(rtas_args->nret);
+ be32_to_cpu_array(__entry->params, rtas_args->args, ARRAY_SIZE(rtas_args->args));
+ ),
+
+ TP_printk("token=%u nargs=%u nret=%u params:"
+ " [0]=0x%08x [1]=0x%08x [2]=0x%08x [3]=0x%08x"
+ " [4]=0x%08x [5]=0x%08x [6]=0x%08x [7]=0x%08x"
+ " [8]=0x%08x [9]=0x%08x [10]=0x%08x [11]=0x%08x"
+ " [12]=0x%08x [13]=0x%08x [14]=0x%08x [15]=0x%08x",
+ __entry->token, __entry->nargs, __entry->nret,
+ __entry->params[0], __entry->params[1], __entry->params[2], __entry->params[3],
+ __entry->params[4], __entry->params[5], __entry->params[6], __entry->params[7],
+ __entry->params[8], __entry->params[9], __entry->params[10], __entry->params[11],
+ __entry->params[12], __entry->params[13], __entry->params[14], __entry->params[15]
+ )
+);
+
+DEFINE_EVENT(rtas_parameter_block, rtas_ll_entry,
+
+ TP_PROTO(struct rtas_args *rtas_args),
+
+ TP_ARGS(rtas_args)
+);
+
+DEFINE_EVENT(rtas_parameter_block, rtas_ll_exit,
+
+ TP_PROTO(struct rtas_args *rtas_args),
+
+ TP_ARGS(rtas_args)
+);
+
+#endif /* CONFIG_PPC_RTAS */
+
#ifdef CONFIG_PPC_POWERNV
-extern void opal_tracepoint_regfunc(void);
+extern int opal_tracepoint_regfunc(void);
extern void opal_tracepoint_unregfunc(void);
TRACE_EVENT_FN(opal_entry,
@@ -144,6 +267,75 @@ TRACE_EVENT_FN(opal_exit,
);
#endif
+#ifdef CONFIG_PPC_64S_HASH_MMU
+TRACE_EVENT(hash_fault,
+
+ TP_PROTO(unsigned long addr, unsigned long access, unsigned long trap),
+ TP_ARGS(addr, access, trap),
+ TP_STRUCT__entry(
+ __field(unsigned long, addr)
+ __field(unsigned long, access)
+ __field(unsigned long, trap)
+ ),
+
+ TP_fast_assign(
+ __entry->addr = addr;
+ __entry->access = access;
+ __entry->trap = trap;
+ ),
+
+ TP_printk("hash fault with addr 0x%lx and access = 0x%lx trap = 0x%lx",
+ __entry->addr, __entry->access, __entry->trap)
+);
+#endif
+
+TRACE_EVENT(tlbie,
+
+ TP_PROTO(unsigned long lpid, unsigned long local, unsigned long rb,
+ unsigned long rs, unsigned long ric, unsigned long prs,
+ unsigned long r),
+ TP_ARGS(lpid, local, rb, rs, ric, prs, r),
+ TP_STRUCT__entry(
+ __field(unsigned long, lpid)
+ __field(unsigned long, local)
+ __field(unsigned long, rb)
+ __field(unsigned long, rs)
+ __field(unsigned long, ric)
+ __field(unsigned long, prs)
+ __field(unsigned long, r)
+ ),
+
+ TP_fast_assign(
+ __entry->lpid = lpid;
+ __entry->local = local;
+ __entry->rb = rb;
+ __entry->rs = rs;
+ __entry->ric = ric;
+ __entry->prs = prs;
+ __entry->r = r;
+ ),
+
+ TP_printk("lpid=%ld, local=%ld, rb=0x%lx, rs=0x%lx, ric=0x%lx, "
+ "prs=0x%lx, r=0x%lx", __entry->lpid, __entry->local,
+ __entry->rb, __entry->rs, __entry->ric, __entry->prs,
+ __entry->r)
+);
+
+TRACE_EVENT(tlbia,
+
+ TP_PROTO(unsigned long id),
+ TP_ARGS(id),
+ TP_STRUCT__entry(
+ __field(unsigned long, id)
+ ),
+
+ TP_fast_assign(
+ __entry->id = id;
+ ),
+
+ TP_printk("ctx.id=0x%lx", __entry->id)
+);
+
#endif /* _TRACE_POWERPC_H */
#undef TRACE_INCLUDE_PATH
diff --git a/arch/powerpc/include/asm/trace_clock.h b/arch/powerpc/include/asm/trace_clock.h
new file mode 100644
index 000000000000..ef70c2f7974d
--- /dev/null
+++ b/arch/powerpc/include/asm/trace_clock.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *
+ * Copyright (C) 2015 Naveen N. Rao, IBM Corporation
+ */
+
+#ifndef _ASM_PPC_TRACE_CLOCK_H
+#define _ASM_PPC_TRACE_CLOCK_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+extern u64 notrace trace_clock_ppc_tb(void);
+
+#define ARCH_TRACE_CLOCKS { trace_clock_ppc_tb, "ppc-tb", 0 },
+
+#endif /* _ASM_PPC_TRACE_CLOCK_H */
diff --git a/arch/powerpc/include/asm/tsi108.h b/arch/powerpc/include/asm/tsi108.h
index f8b60793b7a9..8a2b6427d300 100644
--- a/arch/powerpc/include/asm/tsi108.h
+++ b/arch/powerpc/include/asm/tsi108.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* common routine and memory layout for Tundra TSI108(Grendel) host bridge
* memory controller.
@@ -6,11 +7,6 @@
* Alex Bounine (alexandreb@tundra.com)
*
* Copyright 2004-2006 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#ifndef __PPC_KERNEL_TSI108_H
@@ -77,17 +73,13 @@
* nodes if your board uses the Broadcom PHYs
*/
#define TSI108_PHY_MV88E 0 /* Marvel 88Exxxx PHY */
-#define TSI108_PHY_BCM54XX 1 /* Broardcom BCM54xx PHY */
+#define TSI108_PHY_BCM54XX 1 /* Broadcom BCM54xx PHY */
/* Global variables */
extern u32 tsi108_pci_cfg_base;
/* Exported functions */
-extern int tsi108_bridge_init(struct pci_controller *hose, uint phys_csr_base);
-extern unsigned long tsi108_get_mem_size(void);
-extern unsigned long tsi108_get_cpu_clk(void);
-extern unsigned long tsi108_get_sdc_clk(void);
extern int tsi108_direct_write_config(struct pci_bus *bus, unsigned int devfn,
int offset, int len, u32 val);
extern int tsi108_direct_read_config(struct pci_bus *bus, unsigned int devfn,
diff --git a/arch/powerpc/include/asm/tsi108_irq.h b/arch/powerpc/include/asm/tsi108_irq.h
index 6ed93979fbe4..df602ca4cc52 100644
--- a/arch/powerpc/include/asm/tsi108_irq.h
+++ b/arch/powerpc/include/asm/tsi108_irq.h
@@ -1,24 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* (C) Copyright 2005 Tundra Semiconductor Corp.
* Alex Bounine, <alexandreb at tundra.com).
*
* See file CREDITS for list of people who contributed to this
* project.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
- * MA 02111-1307 USA
*/
/*
diff --git a/arch/powerpc/include/asm/tsi108_pci.h b/arch/powerpc/include/asm/tsi108_pci.h
index 5653d7cc3e24..fb6f62669154 100644
--- a/arch/powerpc/include/asm/tsi108_pci.h
+++ b/arch/powerpc/include/asm/tsi108_pci.h
@@ -1,21 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright 2007 IBM Corp
- *
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
- * MA 02111-1307 USA
*/
#ifndef _ASM_POWERPC_TSI108_PCI_H
@@ -39,7 +24,7 @@
extern int tsi108_setup_pci(struct device_node *dev, u32 cfg_phys, int primary);
extern void tsi108_pci_int_init(struct device_node *node);
-extern void tsi108_irq_cascade(unsigned int irq, struct irq_desc *desc);
+extern void tsi108_irq_cascade(struct irq_desc *desc);
extern void tsi108_clear_pci_cfg_error(void);
#endif /* _ASM_POWERPC_TSI108_PCI_H */
diff --git a/arch/powerpc/include/asm/types.h b/arch/powerpc/include/asm/types.h
index bfb6ded38ffa..55d7ba6d910b 100644
--- a/arch/powerpc/include/asm/types.h
+++ b/arch/powerpc/include/asm/types.h
@@ -1,30 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* This file is never included by application software unless
* explicitly requested (e.g., via linux/types.h) in which case the
* application is Linux specific so (user-) name space pollution is
* not a major issue. However, for interoperability, libraries still
* need to be careful to avoid a name clashes.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#ifndef _ASM_POWERPC_TYPES_H
#define _ASM_POWERPC_TYPES_H
#include <uapi/asm/types.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
typedef __vector128 vector128;
-typedef struct {
- unsigned long entry;
- unsigned long toc;
- unsigned long env;
-} func_descr_t;
-
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_POWERPC_TYPES_H */
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 9485b43a7c00..4f5a46a77fa2 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -1,84 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ARCH_POWERPC_UACCESS_H
#define _ARCH_POWERPC_UACCESS_H
-#ifdef __KERNEL__
-#ifndef __ASSEMBLY__
-
-#include <linux/sched.h>
-#include <linux/errno.h>
-#include <asm/asm-compat.h>
#include <asm/processor.h>
#include <asm/page.h>
+#include <asm/extable.h>
+#include <asm/kup.h>
+#include <asm/asm-compat.h>
-#define VERIFY_READ 0
-#define VERIFY_WRITE 1
-
-/*
- * The fs value determines whether argument validity checking should be
- * performed or not. If get_fs() == USER_DS, checking is performed, with
- * get_fs() == KERNEL_DS, checking is bypassed.
- *
- * For historical reasons, these macros are grossly misnamed.
- *
- * The fs/ds values are now the highest legal address in the "segment".
- * This simplifies the checking in the routines below.
- */
-
-#define MAKE_MM_SEG(s) ((mm_segment_t) { (s) })
-
-#define KERNEL_DS MAKE_MM_SEG(~0UL)
#ifdef __powerpc64__
/* We use TASK_SIZE_USER64 as TASK_SIZE is not constant */
-#define USER_DS MAKE_MM_SEG(TASK_SIZE_USER64 - 1)
-#else
-#define USER_DS MAKE_MM_SEG(TASK_SIZE - 1)
-#endif
-
-#define get_ds() (KERNEL_DS)
-#define get_fs() (current->thread.fs)
-#define set_fs(val) (current->thread.fs = (val))
-
-#define segment_eq(a, b) ((a).seg == (b).seg)
-
-#define user_addr_max() (get_fs().seg)
-
-#ifdef __powerpc64__
-/*
- * This check is sufficient because there is a large enough
- * gap between user addresses and the kernel addresses
- */
-#define __access_ok(addr, size, segment) \
- (((addr) <= (segment).seg) && ((size) <= (segment).seg))
-
-#else
-
-#define __access_ok(addr, size, segment) \
- (((addr) <= (segment).seg) && \
- (((size) == 0) || (((size) - 1) <= ((segment).seg - (addr)))))
-
+#define TASK_SIZE_MAX TASK_SIZE_USER64
#endif
-#define access_ok(type, addr, size) \
- (__chk_user_ptr(addr), \
- __access_ok((__force unsigned long)(addr), (size), get_fs()))
-
-/*
- * The exception table consists of pairs of addresses: the first is the
- * address of an instruction that is allowed to fault, and the second is
- * the address at which the program should continue. No registers are
- * modified, so it is entirely up to the continuation code to figure out
- * what to do.
- *
- * All the routines below use bits of fixup code that are out of line
- * with the main instruction path. This means when everything is well,
- * we don't even have to jump over them. Further, they do not intrude
- * on our cache or tlb entries.
- */
-
-struct exception_table_entry {
- unsigned long insn;
- unsigned long fixup;
-};
+#include <asm-generic/access_ok.h>
/*
* These are the main single-value transfer routines. They automatically
@@ -99,128 +34,212 @@ struct exception_table_entry {
* exception handling means that it's no longer "just"...)
*
*/
-#define get_user(x, ptr) \
- __get_user_check((x), (ptr), sizeof(*(ptr)))
-#define put_user(x, ptr) \
- __put_user_check((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
-
-#define __get_user(x, ptr) \
- __get_user_nocheck((x), (ptr), sizeof(*(ptr)))
-#define __put_user(x, ptr) \
- __put_user_nocheck((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
-
-#define __get_user_inatomic(x, ptr) \
- __get_user_nosleep((x), (ptr), sizeof(*(ptr)))
-#define __put_user_inatomic(x, ptr) \
- __put_user_nosleep((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
-
-#define __get_user_unaligned __get_user
-#define __put_user_unaligned __put_user
+#define __put_user(x, ptr) \
+({ \
+ long __pu_err; \
+ __typeof__(*(ptr)) __user *__pu_addr = (ptr); \
+ __typeof__(*(ptr)) __pu_val = (__typeof__(*(ptr)))(x); \
+ __typeof__(sizeof(*(ptr))) __pu_size = sizeof(*(ptr)); \
+ \
+ might_fault(); \
+ do { \
+ __label__ __pu_failed; \
+ \
+ allow_write_to_user(__pu_addr, __pu_size); \
+ __put_user_size_goto(__pu_val, __pu_addr, __pu_size, __pu_failed); \
+ prevent_write_to_user(__pu_addr, __pu_size); \
+ __pu_err = 0; \
+ break; \
+ \
+__pu_failed: \
+ prevent_write_to_user(__pu_addr, __pu_size); \
+ __pu_err = -EFAULT; \
+ } while (0); \
+ \
+ __pu_err; \
+})
-extern long __put_user_bad(void);
+#define put_user(x, ptr) \
+({ \
+ __typeof__(*(ptr)) __user *_pu_addr = (ptr); \
+ \
+ access_ok(_pu_addr, sizeof(*(ptr))) ? \
+ __put_user(x, _pu_addr) : -EFAULT; \
+})
/*
* We don't tell gcc that we are accessing memory, but this is OK
* because we do not write to any memory gcc knows about, so there
* are no aliasing issues.
*/
-#define __put_user_asm(x, addr, err, op) \
- __asm__ __volatile__( \
- "1: " op " %1,0(%2) # put_user\n" \
- "2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3: li %0,%3\n" \
- " b 2b\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n" \
- PPC_LONG_ALIGN "\n" \
- PPC_LONG "1b,3b\n" \
- ".previous" \
- : "=r" (err) \
- : "r" (x), "b" (addr), "i" (-EFAULT), "0" (err))
+/* -mprefixed can generate offsets beyond range, fall back hack */
+#ifdef CONFIG_PPC_KERNEL_PREFIXED
+#define __put_user_asm_goto(x, addr, label, op) \
+ asm goto( \
+ "1: " op " %0,0(%1) # put_user\n" \
+ EX_TABLE(1b, %l2) \
+ : \
+ : "r" (x), "b" (addr) \
+ : \
+ : label)
+#else
+#define __put_user_asm_goto(x, addr, label, op) \
+ asm goto( \
+ "1: " op "%U1%X1 %0,%1 # put_user\n" \
+ EX_TABLE(1b, %l2) \
+ : \
+ : "r" (x), "m<>" (*addr) \
+ : \
+ : label)
+#endif
#ifdef __powerpc64__
-#define __put_user_asm2(x, ptr, retval) \
- __put_user_asm(x, ptr, retval, "std")
+#ifdef CONFIG_PPC_KERNEL_PREFIXED
+#define __put_user_asm2_goto(x, ptr, label) \
+ __put_user_asm_goto(x, ptr, label, "std")
+#else
+#define __put_user_asm2_goto(x, addr, label) \
+ asm goto ("1: std%U1%X1 %0,%1 # put_user\n" \
+ EX_TABLE(1b, %l2) \
+ : \
+ : "r" (x), DS_FORM_CONSTRAINT (*addr) \
+ : \
+ : label)
+#endif // CONFIG_PPC_KERNEL_PREFIXED
#else /* __powerpc64__ */
-#define __put_user_asm2(x, addr, err) \
- __asm__ __volatile__( \
- "1: stw %1,0(%2)\n" \
- "2: stw %1+1,4(%2)\n" \
- "3:\n" \
- ".section .fixup,\"ax\"\n" \
- "4: li %0,%3\n" \
- " b 3b\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n" \
- PPC_LONG_ALIGN "\n" \
- PPC_LONG "1b,4b\n" \
- PPC_LONG "2b,4b\n" \
- ".previous" \
- : "=r" (err) \
- : "r" (x), "b" (addr), "i" (-EFAULT), "0" (err))
+#define __put_user_asm2_goto(x, addr, label) \
+ asm goto( \
+ "1: stw%X1 %0, %1\n" \
+ "2: stw%X1 %L0, %L1\n" \
+ EX_TABLE(1b, %l2) \
+ EX_TABLE(2b, %l2) \
+ : \
+ : "r" (x), "m" (*addr) \
+ : \
+ : label)
#endif /* __powerpc64__ */
-#define __put_user_size(x, ptr, size, retval) \
+#define __put_user_size_goto(x, ptr, size, label) \
do { \
- retval = 0; \
+ __typeof__(*(ptr)) __user *__pus_addr = (ptr); \
+ \
switch (size) { \
- case 1: __put_user_asm(x, ptr, retval, "stb"); break; \
- case 2: __put_user_asm(x, ptr, retval, "sth"); break; \
- case 4: __put_user_asm(x, ptr, retval, "stw"); break; \
- case 8: __put_user_asm2(x, ptr, retval); break; \
- default: __put_user_bad(); \
+ case 1: __put_user_asm_goto(x, __pus_addr, label, "stb"); break; \
+ case 2: __put_user_asm_goto(x, __pus_addr, label, "sth"); break; \
+ case 4: __put_user_asm_goto(x, __pus_addr, label, "stw"); break; \
+ case 8: __put_user_asm2_goto(x, __pus_addr, label); break; \
+ default: BUILD_BUG(); \
} \
} while (0)
-#define __put_user_nocheck(x, ptr, size) \
-({ \
- long __pu_err; \
- __typeof__(*(ptr)) __user *__pu_addr = (ptr); \
- if (!is_kernel_addr((unsigned long)__pu_addr)) \
- might_fault(); \
- __chk_user_ptr(ptr); \
- __put_user_size((x), __pu_addr, (size), __pu_err); \
- __pu_err; \
-})
+/*
+ * This does an atomic 128 byte aligned load from userspace.
+ * Upto caller to do enable_kernel_vmx() before calling!
+ */
+#define __get_user_atomic_128_aligned(kaddr, uaddr, err) \
+ __asm__ __volatile__( \
+ ".machine push\n" \
+ ".machine altivec\n" \
+ "1: lvx 0,0,%1 # get user\n" \
+ " stvx 0,0,%2 # put kernel\n" \
+ ".machine pop\n" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: li %0,%3\n" \
+ " b 2b\n" \
+ ".previous\n" \
+ EX_TABLE(1b, 3b) \
+ : "=r" (err) \
+ : "b" (uaddr), "b" (kaddr), "i" (-EFAULT), "0" (err))
+
+#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+
+/* -mprefixed can generate offsets beyond range, fall back hack */
+#ifdef CONFIG_PPC_KERNEL_PREFIXED
+#define __get_user_asm_goto(x, addr, label, op) \
+ asm_goto_output( \
+ "1: "op" %0,0(%1) # get_user\n" \
+ EX_TABLE(1b, %l2) \
+ : "=r" (x) \
+ : "b" (addr) \
+ : \
+ : label)
+#else
+#define __get_user_asm_goto(x, addr, label, op) \
+ asm_goto_output( \
+ "1: "op"%U1%X1 %0, %1 # get_user\n" \
+ EX_TABLE(1b, %l2) \
+ : "=r" (x) \
+ : "m<>" (*addr) \
+ : \
+ : label)
+#endif
-#define __put_user_check(x, ptr, size) \
-({ \
- long __pu_err = -EFAULT; \
- __typeof__(*(ptr)) __user *__pu_addr = (ptr); \
- might_fault(); \
- if (access_ok(VERIFY_WRITE, __pu_addr, size)) \
- __put_user_size((x), __pu_addr, (size), __pu_err); \
- __pu_err; \
-})
+#ifdef __powerpc64__
+#ifdef CONFIG_PPC_KERNEL_PREFIXED
+#define __get_user_asm2_goto(x, addr, label) \
+ __get_user_asm_goto(x, addr, label, "ld")
+#else
+#define __get_user_asm2_goto(x, addr, label) \
+ asm_goto_output( \
+ "1: ld%U1%X1 %0, %1 # get_user\n" \
+ EX_TABLE(1b, %l2) \
+ : "=r" (x) \
+ : DS_FORM_CONSTRAINT (*addr) \
+ : \
+ : label)
+#endif // CONFIG_PPC_KERNEL_PREFIXED
+#else /* __powerpc64__ */
+#define __get_user_asm2_goto(x, addr, label) \
+ asm_goto_output( \
+ "1: lwz%X1 %0, %1\n" \
+ "2: lwz%X1 %L0, %L1\n" \
+ EX_TABLE(1b, %l2) \
+ EX_TABLE(2b, %l2) \
+ : "=&r" (x) \
+ : "m" (*addr) \
+ : \
+ : label)
+#endif /* __powerpc64__ */
-#define __put_user_nosleep(x, ptr, size) \
-({ \
- long __pu_err; \
- __typeof__(*(ptr)) __user *__pu_addr = (ptr); \
- __chk_user_ptr(ptr); \
- __put_user_size((x), __pu_addr, (size), __pu_err); \
- __pu_err; \
-})
+#define __get_user_size_goto(x, ptr, size, label) \
+do { \
+ BUILD_BUG_ON(size > sizeof(x)); \
+ switch (size) { \
+ case 1: __get_user_asm_goto(x, (u8 __user *)ptr, label, "lbz"); break; \
+ case 2: __get_user_asm_goto(x, (u16 __user *)ptr, label, "lhz"); break; \
+ case 4: __get_user_asm_goto(x, (u32 __user *)ptr, label, "lwz"); break; \
+ case 8: __get_user_asm2_goto(x, (u64 __user *)ptr, label); break; \
+ default: x = 0; BUILD_BUG(); \
+ } \
+} while (0)
+#define __get_user_size_allowed(x, ptr, size, retval) \
+do { \
+ __label__ __gus_failed; \
+ \
+ __get_user_size_goto(x, ptr, size, __gus_failed); \
+ retval = 0; \
+ break; \
+__gus_failed: \
+ x = 0; \
+ retval = -EFAULT; \
+} while (0)
-extern long __get_user_bad(void);
+#else /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
#define __get_user_asm(x, addr, err, op) \
__asm__ __volatile__( \
- "1: "op" %1,0(%2) # get_user\n" \
+ "1: "op"%U2%X2 %1, %2 # get_user\n" \
"2:\n" \
".section .fixup,\"ax\"\n" \
"3: li %0,%3\n" \
" li %1,0\n" \
" b 2b\n" \
".previous\n" \
- ".section __ex_table,\"a\"\n" \
- PPC_LONG_ALIGN "\n" \
- PPC_LONG "1b,3b\n" \
- ".previous" \
+ EX_TABLE(1b, 3b) \
: "=r" (err), "=r" (x) \
- : "b" (addr), "i" (-EFAULT), "0" (err))
+ : "m<>" (*addr), "i" (-EFAULT), "0" (err))
#ifdef __powerpc64__
#define __get_user_asm2(x, addr, err) \
@@ -228,8 +247,8 @@ extern long __get_user_bad(void);
#else /* __powerpc64__ */
#define __get_user_asm2(x, addr, err) \
__asm__ __volatile__( \
- "1: lwz %1,0(%2)\n" \
- "2: lwz %1+1,4(%2)\n" \
+ "1: lwz%X2 %1, %2\n" \
+ "2: lwz%X2 %L1, %L2\n" \
"3:\n" \
".section .fixup,\"ax\"\n" \
"4: li %0,%3\n" \
@@ -237,218 +256,260 @@ extern long __get_user_bad(void);
" li %1+1,0\n" \
" b 3b\n" \
".previous\n" \
- ".section __ex_table,\"a\"\n" \
- PPC_LONG_ALIGN "\n" \
- PPC_LONG "1b,4b\n" \
- PPC_LONG "2b,4b\n" \
- ".previous" \
+ EX_TABLE(1b, 4b) \
+ EX_TABLE(2b, 4b) \
: "=r" (err), "=&r" (x) \
- : "b" (addr), "i" (-EFAULT), "0" (err))
+ : "m" (*addr), "i" (-EFAULT), "0" (err))
#endif /* __powerpc64__ */
-#define __get_user_size(x, ptr, size, retval) \
+#define __get_user_size_allowed(x, ptr, size, retval) \
do { \
retval = 0; \
- __chk_user_ptr(ptr); \
- if (size > sizeof(x)) \
- (x) = __get_user_bad(); \
+ BUILD_BUG_ON(size > sizeof(x)); \
switch (size) { \
- case 1: __get_user_asm(x, ptr, retval, "lbz"); break; \
- case 2: __get_user_asm(x, ptr, retval, "lhz"); break; \
- case 4: __get_user_asm(x, ptr, retval, "lwz"); break; \
- case 8: __get_user_asm2(x, ptr, retval); break; \
- default: (x) = __get_user_bad(); \
+ case 1: __get_user_asm(x, (u8 __user *)ptr, retval, "lbz"); break; \
+ case 2: __get_user_asm(x, (u16 __user *)ptr, retval, "lhz"); break; \
+ case 4: __get_user_asm(x, (u32 __user *)ptr, retval, "lwz"); break; \
+ case 8: __get_user_asm2(x, (u64 __user *)ptr, retval); break; \
+ default: x = 0; BUILD_BUG(); \
} \
} while (0)
-#define __get_user_nocheck(x, ptr, size) \
-({ \
- long __gu_err; \
- unsigned long __gu_val; \
- const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \
- __chk_user_ptr(ptr); \
- if (!is_kernel_addr((unsigned long)__gu_addr)) \
- might_fault(); \
- __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \
- (x) = (__typeof__(*(ptr)))__gu_val; \
- __gu_err; \
-})
+#define __get_user_size_goto(x, ptr, size, label) \
+do { \
+ long __gus_retval; \
+ \
+ __get_user_size_allowed(x, ptr, size, __gus_retval); \
+ if (__gus_retval) \
+ goto label; \
+} while (0)
-#ifndef __powerpc64__
-#define __get_user64_nocheck(x, ptr, size) \
-({ \
- long __gu_err; \
- long long __gu_val; \
- const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \
- __chk_user_ptr(ptr); \
- if (!is_kernel_addr((unsigned long)__gu_addr)) \
- might_fault(); \
- __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \
- (x) = (__typeof__(*(ptr)))__gu_val; \
- __gu_err; \
-})
-#endif /* __powerpc64__ */
+#endif /* CONFIG_CC_HAS_ASM_GOTO_OUTPUT */
-#define __get_user_check(x, ptr, size) \
-({ \
- long __gu_err = -EFAULT; \
- unsigned long __gu_val = 0; \
- const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \
- might_fault(); \
- if (access_ok(VERIFY_READ, __gu_addr, (size))) \
- __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \
- (x) = (__typeof__(*(ptr)))__gu_val; \
- __gu_err; \
-})
+/*
+ * This is a type: either unsigned long, if the argument fits into
+ * that type, or otherwise unsigned long long.
+ */
+#define __long_type(x) \
+ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL))
-#define __get_user_nosleep(x, ptr, size) \
+#define __get_user(x, ptr) \
({ \
long __gu_err; \
- unsigned long __gu_val; \
- const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \
- __chk_user_ptr(ptr); \
- __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \
+ __long_type(*(ptr)) __gu_val; \
+ __typeof__(*(ptr)) __user *__gu_addr = (ptr); \
+ __typeof__(sizeof(*(ptr))) __gu_size = sizeof(*(ptr)); \
+ \
+ might_fault(); \
+ allow_read_from_user(__gu_addr, __gu_size); \
+ __get_user_size_allowed(__gu_val, __gu_addr, __gu_size, __gu_err); \
+ prevent_read_from_user(__gu_addr, __gu_size); \
(x) = (__typeof__(*(ptr)))__gu_val; \
+ \
__gu_err; \
})
+#define get_user(x, ptr) \
+({ \
+ __typeof__(*(ptr)) __user *_gu_addr = (ptr); \
+ \
+ access_ok(_gu_addr, sizeof(*(ptr))) ? \
+ __get_user(x, _gu_addr) : \
+ ((x) = (__force __typeof__(*(ptr)))0, -EFAULT); \
+})
/* more complex routines */
extern unsigned long __copy_tofrom_user(void __user *to,
const void __user *from, unsigned long size);
-#ifndef __powerpc64__
+#ifdef __powerpc64__
+static inline unsigned long
+raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
+{
+ unsigned long ret;
+
+ allow_read_write_user(to, from, n);
+ ret = __copy_tofrom_user(to, from, n);
+ prevent_read_write_user(to, from, n);
+ return ret;
+}
+#endif /* __powerpc64__ */
-static inline unsigned long copy_from_user(void *to,
+static inline unsigned long raw_copy_from_user(void *to,
const void __user *from, unsigned long n)
{
- unsigned long over;
-
- if (access_ok(VERIFY_READ, from, n))
- return __copy_tofrom_user((__force void __user *)to, from, n);
- if ((unsigned long)from < TASK_SIZE) {
- over = (unsigned long)from + n - TASK_SIZE;
- return __copy_tofrom_user((__force void __user *)to, from,
- n - over) + over;
- }
- return n;
+ unsigned long ret;
+
+ allow_read_from_user(from, n);
+ ret = __copy_tofrom_user((__force void __user *)to, from, n);
+ prevent_read_from_user(from, n);
+ return ret;
}
-static inline unsigned long copy_to_user(void __user *to,
- const void *from, unsigned long n)
+static inline unsigned long
+raw_copy_to_user(void __user *to, const void *from, unsigned long n)
{
- unsigned long over;
-
- if (access_ok(VERIFY_WRITE, to, n))
- return __copy_tofrom_user(to, (__force void __user *)from, n);
- if ((unsigned long)to < TASK_SIZE) {
- over = (unsigned long)to + n - TASK_SIZE;
- return __copy_tofrom_user(to, (__force void __user *)from,
- n - over) + over;
- }
- return n;
+ unsigned long ret;
+
+ allow_write_to_user(to, n);
+ ret = __copy_tofrom_user(to, (__force const void __user *)from, n);
+ prevent_write_to_user(to, n);
+ return ret;
}
-#else /* __powerpc64__ */
+unsigned long __arch_clear_user(void __user *addr, unsigned long size);
-#define __copy_in_user(to, from, size) \
- __copy_tofrom_user((to), (from), (size))
+static inline unsigned long __clear_user(void __user *addr, unsigned long size)
+{
+ unsigned long ret;
-extern unsigned long copy_from_user(void *to, const void __user *from,
- unsigned long n);
-extern unsigned long copy_to_user(void __user *to, const void *from,
- unsigned long n);
-extern unsigned long copy_in_user(void __user *to, const void __user *from,
- unsigned long n);
+ might_fault();
+ allow_write_to_user(addr, size);
+ ret = __arch_clear_user(addr, size);
+ prevent_write_to_user(addr, size);
+ return ret;
+}
-#endif /* __powerpc64__ */
+static inline unsigned long clear_user(void __user *addr, unsigned long size)
+{
+ return likely(access_ok(addr, size)) ? __clear_user(addr, size) : size;
+}
-static inline unsigned long __copy_from_user_inatomic(void *to,
- const void __user *from, unsigned long n)
+extern long strncpy_from_user(char *dst, const char __user *src, long count);
+extern __must_check long strnlen_user(const char __user *str, long n);
+
+#ifdef CONFIG_ARCH_HAS_COPY_MC
+unsigned long __must_check
+copy_mc_generic(void *to, const void *from, unsigned long size);
+
+static inline unsigned long __must_check
+copy_mc_to_kernel(void *to, const void *from, unsigned long size)
{
- if (__builtin_constant_p(n) && (n <= 8)) {
- unsigned long ret = 1;
-
- switch (n) {
- case 1:
- __get_user_size(*(u8 *)to, from, 1, ret);
- break;
- case 2:
- __get_user_size(*(u16 *)to, from, 2, ret);
- break;
- case 4:
- __get_user_size(*(u32 *)to, from, 4, ret);
- break;
- case 8:
- __get_user_size(*(u64 *)to, from, 8, ret);
- break;
- }
- if (ret == 0)
- return 0;
- }
- return __copy_tofrom_user((__force void __user *)to, from, n);
+ return copy_mc_generic(to, from, size);
}
+#define copy_mc_to_kernel copy_mc_to_kernel
-static inline unsigned long __copy_to_user_inatomic(void __user *to,
- const void *from, unsigned long n)
+static inline unsigned long __must_check
+copy_mc_to_user(void __user *to, const void *from, unsigned long n)
{
- if (__builtin_constant_p(n) && (n <= 8)) {
- unsigned long ret = 1;
-
- switch (n) {
- case 1:
- __put_user_size(*(u8 *)from, (u8 __user *)to, 1, ret);
- break;
- case 2:
- __put_user_size(*(u16 *)from, (u16 __user *)to, 2, ret);
- break;
- case 4:
- __put_user_size(*(u32 *)from, (u32 __user *)to, 4, ret);
- break;
- case 8:
- __put_user_size(*(u64 *)from, (u64 __user *)to, 8, ret);
- break;
+ if (check_copy_size(from, n, true)) {
+ if (access_ok(to, n)) {
+ allow_write_to_user(to, n);
+ n = copy_mc_generic((void __force *)to, from, n);
+ prevent_write_to_user(to, n);
}
- if (ret == 0)
- return 0;
}
- return __copy_tofrom_user(to, (__force const void __user *)from, n);
+
+ return n;
}
+#endif
+
+extern long __copy_from_user_flushcache(void *dst, const void __user *src,
+ unsigned size);
-static inline unsigned long __copy_from_user(void *to,
- const void __user *from, unsigned long size)
+static __must_check __always_inline bool user_access_begin(const void __user *ptr, size_t len)
{
+ if (unlikely(!access_ok(ptr, len)))
+ return false;
+
might_fault();
- return __copy_from_user_inatomic(to, from, size);
+
+ allow_read_write_user((void __user *)ptr, ptr, len);
+ return true;
}
+#define user_access_begin user_access_begin
+#define user_access_end prevent_current_access_user
+#define user_access_save prevent_user_access_return
+#define user_access_restore restore_user_access
-static inline unsigned long __copy_to_user(void __user *to,
- const void *from, unsigned long size)
+static __must_check __always_inline bool
+user_read_access_begin(const void __user *ptr, size_t len)
{
+ if (unlikely(!access_ok(ptr, len)))
+ return false;
+
might_fault();
- return __copy_to_user_inatomic(to, from, size);
-}
-extern unsigned long __clear_user(void __user *addr, unsigned long size);
+ allow_read_from_user(ptr, len);
+ return true;
+}
+#define user_read_access_begin user_read_access_begin
+#define user_read_access_end prevent_current_read_from_user
-static inline unsigned long clear_user(void __user *addr, unsigned long size)
+static __must_check __always_inline bool
+user_write_access_begin(const void __user *ptr, size_t len)
{
+ if (unlikely(!access_ok(ptr, len)))
+ return false;
+
might_fault();
- if (likely(access_ok(VERIFY_WRITE, addr, size)))
- return __clear_user(addr, size);
- if ((unsigned long)addr < TASK_SIZE) {
- unsigned long over = (unsigned long)addr + size - TASK_SIZE;
- return __clear_user(addr, size - over) + over;
- }
- return size;
+
+ allow_write_to_user((void __user *)ptr, len);
+ return true;
}
+#define user_write_access_begin user_write_access_begin
+#define user_write_access_end prevent_current_write_to_user
+
+#define unsafe_get_user(x, p, e) do { \
+ __long_type(*(p)) __gu_val; \
+ __typeof__(*(p)) __user *__gu_addr = (p); \
+ \
+ __get_user_size_goto(__gu_val, __gu_addr, sizeof(*(p)), e); \
+ (x) = (__typeof__(*(p)))__gu_val; \
+} while (0)
-extern long strncpy_from_user(char *dst, const char __user *src, long count);
-extern __must_check long strlen_user(const char __user *str);
-extern __must_check long strnlen_user(const char __user *str, long n);
+#define unsafe_put_user(x, p, e) \
+ __put_user_size_goto((__typeof__(*(p)))(x), (p), sizeof(*(p)), e)
+
+#define unsafe_copy_from_user(d, s, l, e) \
+do { \
+ u8 *_dst = (u8 *)(d); \
+ const u8 __user *_src = (const u8 __user *)(s); \
+ size_t _len = (l); \
+ int _i; \
+ \
+ for (_i = 0; _i < (_len & ~(sizeof(u64) - 1)); _i += sizeof(u64)) \
+ unsafe_get_user(*(u64 *)(_dst + _i), (u64 __user *)(_src + _i), e); \
+ if (_len & 4) { \
+ unsafe_get_user(*(u32 *)(_dst + _i), (u32 __user *)(_src + _i), e); \
+ _i += 4; \
+ } \
+ if (_len & 2) { \
+ unsafe_get_user(*(u16 *)(_dst + _i), (u16 __user *)(_src + _i), e); \
+ _i += 2; \
+ } \
+ if (_len & 1) \
+ unsafe_get_user(*(u8 *)(_dst + _i), (u8 __user *)(_src + _i), e); \
+} while (0)
+
+#define unsafe_copy_to_user(d, s, l, e) \
+do { \
+ u8 __user *_dst = (u8 __user *)(d); \
+ const u8 *_src = (const u8 *)(s); \
+ size_t _len = (l); \
+ int _i; \
+ \
+ for (_i = 0; _i < (_len & ~(sizeof(u64) - 1)); _i += sizeof(u64)) \
+ unsafe_put_user(*(u64 *)(_src + _i), (u64 __user *)(_dst + _i), e); \
+ if (_len & 4) { \
+ unsafe_put_user(*(u32*)(_src + _i), (u32 __user *)(_dst + _i), e); \
+ _i += 4; \
+ } \
+ if (_len & 2) { \
+ unsafe_put_user(*(u16*)(_src + _i), (u16 __user *)(_dst + _i), e); \
+ _i += 2; \
+ } \
+ if (_len & 1) \
+ unsafe_put_user(*(u8*)(_src + _i), (u8 __user *)(_dst + _i), e); \
+} while (0)
+
+#define __get_kernel_nofault(dst, src, type, err_label) \
+ __get_user_size_goto(*((type *)(dst)), \
+ (__force type __user *)(src), sizeof(type), err_label)
-#endif /* __ASSEMBLY__ */
-#endif /* __KERNEL__ */
+#define __put_kernel_nofault(dst, src, type, err_label) \
+ __put_user_size_goto(*((type *)(src)), \
+ (__force type __user *)(dst), sizeof(type), err_label)
#endif /* _ARCH_POWERPC_UACCESS_H */
diff --git a/arch/powerpc/include/asm/ucc.h b/arch/powerpc/include/asm/ucc.h
deleted file mode 100644
index 6927ac26516e..000000000000
--- a/arch/powerpc/include/asm/ucc.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights reserved.
- *
- * Authors: Shlomi Gridish <gridish@freescale.com>
- * Li Yang <leoli@freescale.com>
- *
- * Description:
- * Internal header file for UCC unit routines.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-#ifndef __UCC_H__
-#define __UCC_H__
-
-#include <asm/immap_qe.h>
-#include <asm/qe.h>
-
-#define STATISTICS
-
-#define UCC_MAX_NUM 8
-
-/* Slow or fast type for UCCs.
-*/
-enum ucc_speed_type {
- UCC_SPEED_TYPE_FAST = UCC_GUEMR_MODE_FAST_RX | UCC_GUEMR_MODE_FAST_TX,
- UCC_SPEED_TYPE_SLOW = UCC_GUEMR_MODE_SLOW_RX | UCC_GUEMR_MODE_SLOW_TX
-};
-
-/* ucc_set_type
- * Sets UCC to slow or fast mode.
- *
- * ucc_num - (In) number of UCC (0-7).
- * speed - (In) slow or fast mode for UCC.
- */
-int ucc_set_type(unsigned int ucc_num, enum ucc_speed_type speed);
-
-int ucc_set_qe_mux_mii_mng(unsigned int ucc_num);
-
-int ucc_set_qe_mux_rxtx(unsigned int ucc_num, enum qe_clock clock,
- enum comm_dir mode);
-
-int ucc_mux_set_grant_tsa_bkpt(unsigned int ucc_num, int set, u32 mask);
-
-/* QE MUX clock routing for UCC
-*/
-static inline int ucc_set_qe_mux_grant(unsigned int ucc_num, int set)
-{
- return ucc_mux_set_grant_tsa_bkpt(ucc_num, set, QE_CMXUCR_GRANT);
-}
-
-static inline int ucc_set_qe_mux_tsa(unsigned int ucc_num, int set)
-{
- return ucc_mux_set_grant_tsa_bkpt(ucc_num, set, QE_CMXUCR_TSA);
-}
-
-static inline int ucc_set_qe_mux_bkpt(unsigned int ucc_num, int set)
-{
- return ucc_mux_set_grant_tsa_bkpt(ucc_num, set, QE_CMXUCR_BKPT);
-}
-
-#endif /* __UCC_H__ */
diff --git a/arch/powerpc/include/asm/ucc_fast.h b/arch/powerpc/include/asm/ucc_fast.h
deleted file mode 100644
index 72ea9bab07df..000000000000
--- a/arch/powerpc/include/asm/ucc_fast.h
+++ /dev/null
@@ -1,244 +0,0 @@
-/*
- * Internal header file for UCC FAST unit routines.
- *
- * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights reserved.
- *
- * Authors: Shlomi Gridish <gridish@freescale.com>
- * Li Yang <leoli@freescale.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-#ifndef __UCC_FAST_H__
-#define __UCC_FAST_H__
-
-#include <linux/kernel.h>
-
-#include <asm/immap_qe.h>
-#include <asm/qe.h>
-
-#include <asm/ucc.h>
-
-/* Receive BD's status */
-#define R_E 0x80000000 /* buffer empty */
-#define R_W 0x20000000 /* wrap bit */
-#define R_I 0x10000000 /* interrupt on reception */
-#define R_L 0x08000000 /* last */
-#define R_F 0x04000000 /* first */
-
-/* transmit BD's status */
-#define T_R 0x80000000 /* ready bit */
-#define T_W 0x20000000 /* wrap bit */
-#define T_I 0x10000000 /* interrupt on completion */
-#define T_L 0x08000000 /* last */
-
-/* Rx Data buffer must be 4 bytes aligned in most cases */
-#define UCC_FAST_RX_ALIGN 4
-#define UCC_FAST_MRBLR_ALIGNMENT 4
-#define UCC_FAST_VIRT_FIFO_REGS_ALIGNMENT 8
-
-/* Sizes */
-#define UCC_FAST_URFS_MIN_VAL 0x88
-#define UCC_FAST_RECEIVE_VIRTUAL_FIFO_SIZE_FUDGE_FACTOR 8
-
-/* ucc_fast_channel_protocol_mode - UCC FAST mode */
-enum ucc_fast_channel_protocol_mode {
- UCC_FAST_PROTOCOL_MODE_HDLC = 0x00000000,
- UCC_FAST_PROTOCOL_MODE_RESERVED01 = 0x00000001,
- UCC_FAST_PROTOCOL_MODE_RESERVED_QMC = 0x00000002,
- UCC_FAST_PROTOCOL_MODE_RESERVED02 = 0x00000003,
- UCC_FAST_PROTOCOL_MODE_RESERVED_UART = 0x00000004,
- UCC_FAST_PROTOCOL_MODE_RESERVED03 = 0x00000005,
- UCC_FAST_PROTOCOL_MODE_RESERVED_EX_MAC_1 = 0x00000006,
- UCC_FAST_PROTOCOL_MODE_RESERVED_EX_MAC_2 = 0x00000007,
- UCC_FAST_PROTOCOL_MODE_RESERVED_BISYNC = 0x00000008,
- UCC_FAST_PROTOCOL_MODE_RESERVED04 = 0x00000009,
- UCC_FAST_PROTOCOL_MODE_ATM = 0x0000000A,
- UCC_FAST_PROTOCOL_MODE_RESERVED05 = 0x0000000B,
- UCC_FAST_PROTOCOL_MODE_ETHERNET = 0x0000000C,
- UCC_FAST_PROTOCOL_MODE_RESERVED06 = 0x0000000D,
- UCC_FAST_PROTOCOL_MODE_POS = 0x0000000E,
- UCC_FAST_PROTOCOL_MODE_RESERVED07 = 0x0000000F
-};
-
-/* ucc_fast_transparent_txrx - UCC Fast Transparent TX & RX */
-enum ucc_fast_transparent_txrx {
- UCC_FAST_GUMR_TRANSPARENT_TTX_TRX_NORMAL = 0x00000000,
- UCC_FAST_GUMR_TRANSPARENT_TTX_TRX_TRANSPARENT = 0x18000000
-};
-
-/* UCC fast diagnostic mode */
-enum ucc_fast_diag_mode {
- UCC_FAST_DIAGNOSTIC_NORMAL = 0x0,
- UCC_FAST_DIAGNOSTIC_LOCAL_LOOP_BACK = 0x40000000,
- UCC_FAST_DIAGNOSTIC_AUTO_ECHO = 0x80000000,
- UCC_FAST_DIAGNOSTIC_LOOP_BACK_AND_ECHO = 0xC0000000
-};
-
-/* UCC fast Sync length (transparent mode only) */
-enum ucc_fast_sync_len {
- UCC_FAST_SYNC_LEN_NOT_USED = 0x0,
- UCC_FAST_SYNC_LEN_AUTOMATIC = 0x00004000,
- UCC_FAST_SYNC_LEN_8_BIT = 0x00008000,
- UCC_FAST_SYNC_LEN_16_BIT = 0x0000C000
-};
-
-/* UCC fast RTS mode */
-enum ucc_fast_ready_to_send {
- UCC_FAST_SEND_IDLES_BETWEEN_FRAMES = 0x00000000,
- UCC_FAST_SEND_FLAGS_BETWEEN_FRAMES = 0x00002000
-};
-
-/* UCC fast receiver decoding mode */
-enum ucc_fast_rx_decoding_method {
- UCC_FAST_RX_ENCODING_NRZ = 0x00000000,
- UCC_FAST_RX_ENCODING_NRZI = 0x00000800,
- UCC_FAST_RX_ENCODING_RESERVED0 = 0x00001000,
- UCC_FAST_RX_ENCODING_RESERVED1 = 0x00001800
-};
-
-/* UCC fast transmitter encoding mode */
-enum ucc_fast_tx_encoding_method {
- UCC_FAST_TX_ENCODING_NRZ = 0x00000000,
- UCC_FAST_TX_ENCODING_NRZI = 0x00000100,
- UCC_FAST_TX_ENCODING_RESERVED0 = 0x00000200,
- UCC_FAST_TX_ENCODING_RESERVED1 = 0x00000300
-};
-
-/* UCC fast CRC length */
-enum ucc_fast_transparent_tcrc {
- UCC_FAST_16_BIT_CRC = 0x00000000,
- UCC_FAST_CRC_RESERVED0 = 0x00000040,
- UCC_FAST_32_BIT_CRC = 0x00000080,
- UCC_FAST_CRC_RESERVED1 = 0x000000C0
-};
-
-/* Fast UCC initialization structure */
-struct ucc_fast_info {
- int ucc_num;
- enum qe_clock rx_clock;
- enum qe_clock tx_clock;
- u32 regs;
- int irq;
- u32 uccm_mask;
- int bd_mem_part;
- int brkpt_support;
- int grant_support;
- int tsa;
- int cdp;
- int cds;
- int ctsp;
- int ctss;
- int tci;
- int txsy;
- int rtsm;
- int revd;
- int rsyn;
- u16 max_rx_buf_length;
- u16 urfs;
- u16 urfet;
- u16 urfset;
- u16 utfs;
- u16 utfet;
- u16 utftt;
- u16 ufpt;
- enum ucc_fast_channel_protocol_mode mode;
- enum ucc_fast_transparent_txrx ttx_trx;
- enum ucc_fast_tx_encoding_method tenc;
- enum ucc_fast_rx_decoding_method renc;
- enum ucc_fast_transparent_tcrc tcrc;
- enum ucc_fast_sync_len synl;
-};
-
-struct ucc_fast_private {
- struct ucc_fast_info *uf_info;
- struct ucc_fast __iomem *uf_regs; /* a pointer to the UCC regs. */
- u32 __iomem *p_ucce; /* a pointer to the event register in memory. */
- u32 __iomem *p_uccm; /* a pointer to the mask register in memory. */
-#ifdef CONFIG_UGETH_TX_ON_DEMAND
- u16 __iomem *p_utodr; /* pointer to the transmit on demand register */
-#endif
- int enabled_tx; /* Whether channel is enabled for Tx (ENT) */
- int enabled_rx; /* Whether channel is enabled for Rx (ENR) */
- int stopped_tx; /* Whether channel has been stopped for Tx
- (STOP_TX, etc.) */
- int stopped_rx; /* Whether channel has been stopped for Rx */
- u32 ucc_fast_tx_virtual_fifo_base_offset;/* pointer to base of Tx
- virtual fifo */
- u32 ucc_fast_rx_virtual_fifo_base_offset;/* pointer to base of Rx
- virtual fifo */
-#ifdef STATISTICS
- u32 tx_frames; /* Transmitted frames counter. */
- u32 rx_frames; /* Received frames counter (only frames
- passed to application). */
- u32 tx_discarded; /* Discarded tx frames counter (frames that
- were discarded by the driver due to errors).
- */
- u32 rx_discarded; /* Discarded rx frames counter (frames that
- were discarded by the driver due to errors).
- */
-#endif /* STATISTICS */
- u16 mrblr; /* maximum receive buffer length */
-};
-
-/* ucc_fast_init
- * Initializes Fast UCC according to user provided parameters.
- *
- * uf_info - (In) pointer to the fast UCC info structure.
- * uccf_ret - (Out) pointer to the fast UCC structure.
- */
-int ucc_fast_init(struct ucc_fast_info * uf_info, struct ucc_fast_private ** uccf_ret);
-
-/* ucc_fast_free
- * Frees all resources for fast UCC.
- *
- * uccf - (In) pointer to the fast UCC structure.
- */
-void ucc_fast_free(struct ucc_fast_private * uccf);
-
-/* ucc_fast_enable
- * Enables a fast UCC port.
- * This routine enables Tx and/or Rx through the General UCC Mode Register.
- *
- * uccf - (In) pointer to the fast UCC structure.
- * mode - (In) TX, RX, or both.
- */
-void ucc_fast_enable(struct ucc_fast_private * uccf, enum comm_dir mode);
-
-/* ucc_fast_disable
- * Disables a fast UCC port.
- * This routine disables Tx and/or Rx through the General UCC Mode Register.
- *
- * uccf - (In) pointer to the fast UCC structure.
- * mode - (In) TX, RX, or both.
- */
-void ucc_fast_disable(struct ucc_fast_private * uccf, enum comm_dir mode);
-
-/* ucc_fast_irq
- * Handles interrupts on fast UCC.
- * Called from the general interrupt routine to handle interrupts on fast UCC.
- *
- * uccf - (In) pointer to the fast UCC structure.
- */
-void ucc_fast_irq(struct ucc_fast_private * uccf);
-
-/* ucc_fast_transmit_on_demand
- * Immediately forces a poll of the transmitter for data to be sent.
- * Typically, the hardware performs a periodic poll for data that the
- * transmit routine has set up to be transmitted. In cases where
- * this polling cycle is not soon enough, this optional routine can
- * be invoked to force a poll right away, instead. Proper use for
- * each transmission for which this functionality is desired is to
- * call the transmit routine and then this routine right after.
- *
- * uccf - (In) pointer to the fast UCC structure.
- */
-void ucc_fast_transmit_on_demand(struct ucc_fast_private * uccf);
-
-u32 ucc_fast_get_qe_cr_subblock(int uccf_num);
-
-void ucc_fast_dump_regs(struct ucc_fast_private * uccf);
-
-#endif /* __UCC_FAST_H__ */
diff --git a/arch/powerpc/include/asm/ucc_slow.h b/arch/powerpc/include/asm/ucc_slow.h
deleted file mode 100644
index c44131e68e11..000000000000
--- a/arch/powerpc/include/asm/ucc_slow.h
+++ /dev/null
@@ -1,290 +0,0 @@
-/*
- * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights reserved.
- *
- * Authors: Shlomi Gridish <gridish@freescale.com>
- * Li Yang <leoli@freescale.com>
- *
- * Description:
- * Internal header file for UCC SLOW unit routines.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-#ifndef __UCC_SLOW_H__
-#define __UCC_SLOW_H__
-
-#include <linux/kernel.h>
-
-#include <asm/immap_qe.h>
-#include <asm/qe.h>
-
-#include <asm/ucc.h>
-
-/* transmit BD's status */
-#define T_R 0x80000000 /* ready bit */
-#define T_PAD 0x40000000 /* add pads to short frames */
-#define T_W 0x20000000 /* wrap bit */
-#define T_I 0x10000000 /* interrupt on completion */
-#define T_L 0x08000000 /* last */
-
-#define T_A 0x04000000 /* Address - the data transmitted as address
- chars */
-#define T_TC 0x04000000 /* transmit CRC */
-#define T_CM 0x02000000 /* continuous mode */
-#define T_DEF 0x02000000 /* collision on previous attempt to transmit */
-#define T_P 0x01000000 /* Preamble - send Preamble sequence before
- data */
-#define T_HB 0x01000000 /* heartbeat */
-#define T_NS 0x00800000 /* No Stop */
-#define T_LC 0x00800000 /* late collision */
-#define T_RL 0x00400000 /* retransmission limit */
-#define T_UN 0x00020000 /* underrun */
-#define T_CT 0x00010000 /* CTS lost */
-#define T_CSL 0x00010000 /* carrier sense lost */
-#define T_RC 0x003c0000 /* retry count */
-
-/* Receive BD's status */
-#define R_E 0x80000000 /* buffer empty */
-#define R_W 0x20000000 /* wrap bit */
-#define R_I 0x10000000 /* interrupt on reception */
-#define R_L 0x08000000 /* last */
-#define R_C 0x08000000 /* the last byte in this buffer is a cntl
- char */
-#define R_F 0x04000000 /* first */
-#define R_A 0x04000000 /* the first byte in this buffer is address
- byte */
-#define R_CM 0x02000000 /* continuous mode */
-#define R_ID 0x01000000 /* buffer close on reception of idles */
-#define R_M 0x01000000 /* Frame received because of promiscuous
- mode */
-#define R_AM 0x00800000 /* Address match */
-#define R_DE 0x00800000 /* Address match */
-#define R_LG 0x00200000 /* Break received */
-#define R_BR 0x00200000 /* Frame length violation */
-#define R_NO 0x00100000 /* Rx Non Octet Aligned Packet */
-#define R_FR 0x00100000 /* Framing Error (no stop bit) character
- received */
-#define R_PR 0x00080000 /* Parity Error character received */
-#define R_AB 0x00080000 /* Frame Aborted */
-#define R_SH 0x00080000 /* frame is too short */
-#define R_CR 0x00040000 /* CRC Error */
-#define R_OV 0x00020000 /* Overrun */
-#define R_CD 0x00010000 /* CD lost */
-#define R_CL 0x00010000 /* this frame is closed because of a
- collision */
-
-/* Rx Data buffer must be 4 bytes aligned in most cases.*/
-#define UCC_SLOW_RX_ALIGN 4
-#define UCC_SLOW_MRBLR_ALIGNMENT 4
-#define UCC_SLOW_PRAM_SIZE 0x100
-#define ALIGNMENT_OF_UCC_SLOW_PRAM 64
-
-/* UCC Slow Channel Protocol Mode */
-enum ucc_slow_channel_protocol_mode {
- UCC_SLOW_CHANNEL_PROTOCOL_MODE_QMC = 0x00000002,
- UCC_SLOW_CHANNEL_PROTOCOL_MODE_UART = 0x00000004,
- UCC_SLOW_CHANNEL_PROTOCOL_MODE_BISYNC = 0x00000008,
-};
-
-/* UCC Slow Transparent Transmit CRC (TCRC) */
-enum ucc_slow_transparent_tcrc {
- /* 16-bit CCITT CRC (HDLC). (X16 + X12 + X5 + 1) */
- UCC_SLOW_TRANSPARENT_TCRC_CCITT_CRC16 = 0x00000000,
- /* CRC16 (BISYNC). (X16 + X15 + X2 + 1) */
- UCC_SLOW_TRANSPARENT_TCRC_CRC16 = 0x00004000,
- /* 32-bit CCITT CRC (Ethernet and HDLC) */
- UCC_SLOW_TRANSPARENT_TCRC_CCITT_CRC32 = 0x00008000,
-};
-
-/* UCC Slow oversampling rate for transmitter (TDCR) */
-enum ucc_slow_tx_oversampling_rate {
- /* 1x clock mode */
- UCC_SLOW_OVERSAMPLING_RATE_TX_TDCR_1 = 0x00000000,
- /* 8x clock mode */
- UCC_SLOW_OVERSAMPLING_RATE_TX_TDCR_8 = 0x00010000,
- /* 16x clock mode */
- UCC_SLOW_OVERSAMPLING_RATE_TX_TDCR_16 = 0x00020000,
- /* 32x clock mode */
- UCC_SLOW_OVERSAMPLING_RATE_TX_TDCR_32 = 0x00030000,
-};
-
-/* UCC Slow Oversampling rate for receiver (RDCR)
-*/
-enum ucc_slow_rx_oversampling_rate {
- /* 1x clock mode */
- UCC_SLOW_OVERSAMPLING_RATE_RX_RDCR_1 = 0x00000000,
- /* 8x clock mode */
- UCC_SLOW_OVERSAMPLING_RATE_RX_RDCR_8 = 0x00004000,
- /* 16x clock mode */
- UCC_SLOW_OVERSAMPLING_RATE_RX_RDCR_16 = 0x00008000,
- /* 32x clock mode */
- UCC_SLOW_OVERSAMPLING_RATE_RX_RDCR_32 = 0x0000c000,
-};
-
-/* UCC Slow Transmitter encoding method (TENC)
-*/
-enum ucc_slow_tx_encoding_method {
- UCC_SLOW_TRANSMITTER_ENCODING_METHOD_TENC_NRZ = 0x00000000,
- UCC_SLOW_TRANSMITTER_ENCODING_METHOD_TENC_NRZI = 0x00000100
-};
-
-/* UCC Slow Receiver decoding method (RENC)
-*/
-enum ucc_slow_rx_decoding_method {
- UCC_SLOW_RECEIVER_DECODING_METHOD_RENC_NRZ = 0x00000000,
- UCC_SLOW_RECEIVER_DECODING_METHOD_RENC_NRZI = 0x00000800
-};
-
-/* UCC Slow Diagnostic mode (DIAG)
-*/
-enum ucc_slow_diag_mode {
- UCC_SLOW_DIAG_MODE_NORMAL = 0x00000000,
- UCC_SLOW_DIAG_MODE_LOOPBACK = 0x00000040,
- UCC_SLOW_DIAG_MODE_ECHO = 0x00000080,
- UCC_SLOW_DIAG_MODE_LOOPBACK_ECHO = 0x000000c0
-};
-
-struct ucc_slow_info {
- int ucc_num;
- int protocol; /* QE_CR_PROTOCOL_xxx */
- enum qe_clock rx_clock;
- enum qe_clock tx_clock;
- phys_addr_t regs;
- int irq;
- u16 uccm_mask;
- int data_mem_part;
- int init_tx;
- int init_rx;
- u32 tx_bd_ring_len;
- u32 rx_bd_ring_len;
- int rx_interrupts;
- int brkpt_support;
- int grant_support;
- int tsa;
- int cdp;
- int cds;
- int ctsp;
- int ctss;
- int rinv;
- int tinv;
- int rtsm;
- int rfw;
- int tci;
- int tend;
- int tfl;
- int txsy;
- u16 max_rx_buf_length;
- enum ucc_slow_transparent_tcrc tcrc;
- enum ucc_slow_channel_protocol_mode mode;
- enum ucc_slow_diag_mode diag;
- enum ucc_slow_tx_oversampling_rate tdcr;
- enum ucc_slow_rx_oversampling_rate rdcr;
- enum ucc_slow_tx_encoding_method tenc;
- enum ucc_slow_rx_decoding_method renc;
-};
-
-struct ucc_slow_private {
- struct ucc_slow_info *us_info;
- struct ucc_slow __iomem *us_regs; /* Ptr to memory map of UCC regs */
- struct ucc_slow_pram *us_pram; /* a pointer to the parameter RAM */
- u32 us_pram_offset;
- int enabled_tx; /* Whether channel is enabled for Tx (ENT) */
- int enabled_rx; /* Whether channel is enabled for Rx (ENR) */
- int stopped_tx; /* Whether channel has been stopped for Tx
- (STOP_TX, etc.) */
- int stopped_rx; /* Whether channel has been stopped for Rx */
- struct list_head confQ; /* frames passed to chip waiting for tx */
- u32 first_tx_bd_mask; /* mask is used in Tx routine to save status
- and length for first BD in a frame */
- u32 tx_base_offset; /* first BD in Tx BD table offset (In MURAM) */
- u32 rx_base_offset; /* first BD in Rx BD table offset (In MURAM) */
- struct qe_bd *confBd; /* next BD for confirm after Tx */
- struct qe_bd *tx_bd; /* next BD for new Tx request */
- struct qe_bd *rx_bd; /* next BD to collect after Rx */
- void *p_rx_frame; /* accumulating receive frame */
- u16 *p_ucce; /* a pointer to the event register in memory.
- */
- u16 *p_uccm; /* a pointer to the mask register in memory */
- u16 saved_uccm; /* a saved mask for the RX Interrupt bits */
-#ifdef STATISTICS
- u32 tx_frames; /* Transmitted frames counters */
- u32 rx_frames; /* Received frames counters (only frames
- passed to application) */
- u32 rx_discarded; /* Discarded frames counters (frames that
- were discarded by the driver due to
- errors) */
-#endif /* STATISTICS */
-};
-
-/* ucc_slow_init
- * Initializes Slow UCC according to provided parameters.
- *
- * us_info - (In) pointer to the slow UCC info structure.
- * uccs_ret - (Out) pointer to the slow UCC structure.
- */
-int ucc_slow_init(struct ucc_slow_info * us_info, struct ucc_slow_private ** uccs_ret);
-
-/* ucc_slow_free
- * Frees all resources for slow UCC.
- *
- * uccs - (In) pointer to the slow UCC structure.
- */
-void ucc_slow_free(struct ucc_slow_private * uccs);
-
-/* ucc_slow_enable
- * Enables a fast UCC port.
- * This routine enables Tx and/or Rx through the General UCC Mode Register.
- *
- * uccs - (In) pointer to the slow UCC structure.
- * mode - (In) TX, RX, or both.
- */
-void ucc_slow_enable(struct ucc_slow_private * uccs, enum comm_dir mode);
-
-/* ucc_slow_disable
- * Disables a fast UCC port.
- * This routine disables Tx and/or Rx through the General UCC Mode Register.
- *
- * uccs - (In) pointer to the slow UCC structure.
- * mode - (In) TX, RX, or both.
- */
-void ucc_slow_disable(struct ucc_slow_private * uccs, enum comm_dir mode);
-
-/* ucc_slow_poll_transmitter_now
- * Immediately forces a poll of the transmitter for data to be sent.
- * Typically, the hardware performs a periodic poll for data that the
- * transmit routine has set up to be transmitted. In cases where
- * this polling cycle is not soon enough, this optional routine can
- * be invoked to force a poll right away, instead. Proper use for
- * each transmission for which this functionality is desired is to
- * call the transmit routine and then this routine right after.
- *
- * uccs - (In) pointer to the slow UCC structure.
- */
-void ucc_slow_poll_transmitter_now(struct ucc_slow_private * uccs);
-
-/* ucc_slow_graceful_stop_tx
- * Smoothly stops transmission on a specified slow UCC.
- *
- * uccs - (In) pointer to the slow UCC structure.
- */
-void ucc_slow_graceful_stop_tx(struct ucc_slow_private * uccs);
-
-/* ucc_slow_stop_tx
- * Stops transmission on a specified slow UCC.
- *
- * uccs - (In) pointer to the slow UCC structure.
- */
-void ucc_slow_stop_tx(struct ucc_slow_private * uccs);
-
-/* ucc_slow_restart_tx
- * Restarts transmitting on a specified slow UCC.
- *
- * uccs - (In) pointer to the slow UCC structure.
- */
-void ucc_slow_restart_tx(struct ucc_slow_private *uccs);
-
-u32 ucc_slow_get_qe_cr_subblock(int uccs_num);
-
-#endif /* __UCC_SLOW_H__ */
diff --git a/arch/powerpc/include/asm/udbg.h b/arch/powerpc/include/asm/udbg.h
index b51fba10e733..a8681b12864f 100644
--- a/arch/powerpc/include/asm/udbg.h
+++ b/arch/powerpc/include/asm/udbg.h
@@ -1,10 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* (c) 2001, 2006 IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#ifndef _ASM_POWERPC_UDBG_H
@@ -19,45 +15,42 @@ extern void (*udbg_flush)(void);
extern int (*udbg_getc)(void);
extern int (*udbg_getc_poll)(void);
-extern void udbg_puts(const char *s);
-extern int udbg_write(const char *s, int n);
+void udbg_puts(const char *s);
+int udbg_write(const char *s, int n);
-extern void register_early_udbg_console(void);
-extern void udbg_printf(const char *fmt, ...)
+void register_early_udbg_console(void);
+void udbg_printf(const char *fmt, ...)
__attribute__ ((format (printf, 1, 2)));
-extern void udbg_progress(char *s, unsigned short hex);
+void udbg_progress(char *s, unsigned short hex);
-extern void udbg_uart_init_mmio(void __iomem *addr, unsigned int stride);
-extern void udbg_uart_init_pio(unsigned long port, unsigned int stride);
+void __init udbg_uart_init_mmio(void __iomem *addr, unsigned int stride);
+void __init udbg_uart_init_pio(unsigned long port, unsigned int stride);
-extern void udbg_uart_setup(unsigned int speed, unsigned int clock);
-extern unsigned int udbg_probe_uart_speed(unsigned int clock);
+void __init udbg_uart_setup(unsigned int speed, unsigned int clock);
+unsigned int __init udbg_probe_uart_speed(unsigned int clock);
struct device_node;
-extern void udbg_scc_init(int force_scc);
-extern int udbg_adb_init(int force_btext);
-extern void udbg_adb_init_early(void);
-
-extern void __init udbg_early_init(void);
-extern void __init udbg_init_debug_lpar(void);
-extern void __init udbg_init_debug_lpar_hvsi(void);
-extern void __init udbg_init_pmac_realmode(void);
-extern void __init udbg_init_maple_realmode(void);
-extern void __init udbg_init_pas_realmode(void);
-extern void __init udbg_init_rtas_panel(void);
-extern void __init udbg_init_rtas_console(void);
-extern void __init udbg_init_debug_beat(void);
-extern void __init udbg_init_btext(void);
-extern void __init udbg_init_44x_as1(void);
-extern void __init udbg_init_40x_realmode(void);
-extern void __init udbg_init_cpm(void);
-extern void __init udbg_init_usbgecko(void);
-extern void __init udbg_init_wsp(void);
-extern void __init udbg_init_memcons(void);
-extern void __init udbg_init_ehv_bc(void);
-extern void __init udbg_init_ps3gelic(void);
-extern void __init udbg_init_debug_opal_raw(void);
-extern void __init udbg_init_debug_opal_hvsi(void);
+void __init udbg_scc_init(int force_scc);
+int udbg_adb_init(int force_btext);
+void udbg_adb_init_early(void);
+
+void __init udbg_early_init(void);
+void __init udbg_init_debug_lpar(void);
+void __init udbg_init_debug_lpar_hvsi(void);
+void __init udbg_init_pmac_realmode(void);
+void __init udbg_init_pas_realmode(void);
+void __init udbg_init_rtas_panel(void);
+void __init udbg_init_rtas_console(void);
+void __init udbg_init_btext(void);
+void __init udbg_init_44x_as1(void);
+void __init udbg_init_cpm(void);
+void __init udbg_init_usbgecko(void);
+void __init udbg_init_memcons(void);
+void __init udbg_init_ehv_bc(void);
+void __init udbg_init_ps3gelic(void);
+void __init udbg_init_debug_opal_raw(void);
+void __init udbg_init_debug_opal_hvsi(void);
+void __init udbg_init_debug_16550(void);
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_UDBG_H */
diff --git a/arch/powerpc/include/asm/uic.h b/arch/powerpc/include/asm/uic.h
index 597edfcae3d6..7b7bd15b1c5c 100644
--- a/arch/powerpc/include/asm/uic.h
+++ b/arch/powerpc/include/asm/uic.h
@@ -1,13 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* IBM PPC4xx UIC external definitions and structure.
*
* Maintainer: David Gibson <dwg@au1.ibm.com>
* Copyright 2007 IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#ifndef _ASM_POWERPC_UIC_H
#define _ASM_POWERPC_UIC_H
diff --git a/arch/powerpc/include/asm/ultravisor-api.h b/arch/powerpc/include/asm/ultravisor-api.h
new file mode 100644
index 000000000000..b66f6db7be6c
--- /dev/null
+++ b/arch/powerpc/include/asm/ultravisor-api.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Ultravisor API.
+ *
+ * Copyright 2019, IBM Corporation.
+ *
+ */
+#ifndef _ASM_POWERPC_ULTRAVISOR_API_H
+#define _ASM_POWERPC_ULTRAVISOR_API_H
+
+#include <asm/hvcall.h>
+
+/* Return codes */
+#define U_BUSY H_BUSY
+#define U_FUNCTION H_FUNCTION
+#define U_NOT_AVAILABLE H_NOT_AVAILABLE
+#define U_P2 H_P2
+#define U_P3 H_P3
+#define U_P4 H_P4
+#define U_P5 H_P5
+#define U_PARAMETER H_PARAMETER
+#define U_PERMISSION H_PERMISSION
+#define U_SUCCESS H_SUCCESS
+
+/* opcodes */
+#define UV_WRITE_PATE 0xF104
+#define UV_RETURN 0xF11C
+#define UV_ESM 0xF110
+#define UV_REGISTER_MEM_SLOT 0xF120
+#define UV_UNREGISTER_MEM_SLOT 0xF124
+#define UV_PAGE_IN 0xF128
+#define UV_PAGE_OUT 0xF12C
+#define UV_SHARE_PAGE 0xF130
+#define UV_UNSHARE_PAGE 0xF134
+#define UV_UNSHARE_ALL_PAGES 0xF140
+#define UV_PAGE_INVAL 0xF138
+#define UV_SVM_TERMINATE 0xF13C
+
+#endif /* _ASM_POWERPC_ULTRAVISOR_API_H */
diff --git a/arch/powerpc/include/asm/ultravisor.h b/arch/powerpc/include/asm/ultravisor.h
new file mode 100644
index 000000000000..790b0e63681f
--- /dev/null
+++ b/arch/powerpc/include/asm/ultravisor.h
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Ultravisor definitions
+ *
+ * Copyright 2019, IBM Corporation.
+ *
+ */
+#ifndef _ASM_POWERPC_ULTRAVISOR_H
+#define _ASM_POWERPC_ULTRAVISOR_H
+
+#include <asm/asm-prototypes.h>
+#include <asm/ultravisor-api.h>
+#include <asm/firmware.h>
+
+int early_init_dt_scan_ultravisor(unsigned long node, const char *uname,
+ int depth, void *data);
+
+/*
+ * In ultravisor enabled systems, PTCR becomes ultravisor privileged only for
+ * writing and an attempt to write to it will cause a Hypervisor Emulation
+ * Assistance interrupt.
+ */
+static inline void set_ptcr_when_no_uv(u64 val)
+{
+ if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
+ mtspr(SPRN_PTCR, val);
+}
+
+static inline int uv_register_pate(u64 lpid, u64 dw0, u64 dw1)
+{
+ return ucall_norets(UV_WRITE_PATE, lpid, dw0, dw1);
+}
+
+static inline int uv_share_page(u64 pfn, u64 npages)
+{
+ return ucall_norets(UV_SHARE_PAGE, pfn, npages);
+}
+
+static inline int uv_unshare_page(u64 pfn, u64 npages)
+{
+ return ucall_norets(UV_UNSHARE_PAGE, pfn, npages);
+}
+
+static inline int uv_unshare_all_pages(void)
+{
+ return ucall_norets(UV_UNSHARE_ALL_PAGES);
+}
+
+static inline int uv_page_in(u64 lpid, u64 src_ra, u64 dst_gpa, u64 flags,
+ u64 page_shift)
+{
+ return ucall_norets(UV_PAGE_IN, lpid, src_ra, dst_gpa, flags,
+ page_shift);
+}
+
+static inline int uv_page_out(u64 lpid, u64 dst_ra, u64 src_gpa, u64 flags,
+ u64 page_shift)
+{
+ return ucall_norets(UV_PAGE_OUT, lpid, dst_ra, src_gpa, flags,
+ page_shift);
+}
+
+static inline int uv_register_mem_slot(u64 lpid, u64 start_gpa, u64 size,
+ u64 flags, u64 slotid)
+{
+ return ucall_norets(UV_REGISTER_MEM_SLOT, lpid, start_gpa,
+ size, flags, slotid);
+}
+
+static inline int uv_unregister_mem_slot(u64 lpid, u64 slotid)
+{
+ return ucall_norets(UV_UNREGISTER_MEM_SLOT, lpid, slotid);
+}
+
+static inline int uv_page_inval(u64 lpid, u64 gpa, u64 page_shift)
+{
+ return ucall_norets(UV_PAGE_INVAL, lpid, gpa, page_shift);
+}
+
+static inline int uv_svm_terminate(u64 lpid)
+{
+ return ucall_norets(UV_SVM_TERMINATE, lpid);
+}
+
+#endif /* _ASM_POWERPC_ULTRAVISOR_H */
diff --git a/arch/powerpc/include/asm/unaligned.h b/arch/powerpc/include/asm/unaligned.h
deleted file mode 100644
index 8296381ae432..000000000000
--- a/arch/powerpc/include/asm/unaligned.h
+++ /dev/null
@@ -1,21 +0,0 @@
-#ifndef _ASM_POWERPC_UNALIGNED_H
-#define _ASM_POWERPC_UNALIGNED_H
-
-#ifdef __KERNEL__
-
-/*
- * The PowerPC can do unaligned accesses itself based on its endian mode.
- */
-#include <linux/unaligned/access_ok.h>
-#include <linux/unaligned/generic.h>
-
-#ifdef __LITTLE_ENDIAN__
-#define get_unaligned __get_unaligned_le
-#define put_unaligned __put_unaligned_le
-#else
-#define get_unaligned __get_unaligned_be
-#define put_unaligned __put_unaligned_be
-#endif
-
-#endif /* __KERNEL__ */
-#endif /* _ASM_POWERPC_UNALIGNED_H */
diff --git a/arch/powerpc/include/asm/uninorth.h b/arch/powerpc/include/asm/uninorth.h
index d12b11d7641e..6949b5daa37d 100644
--- a/arch/powerpc/include/asm/uninorth.h
+++ b/arch/powerpc/include/asm/uninorth.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* uninorth.h: definitions for using the "UniNorth" host bridge chip
* from Apple. This chip is used on "Core99" machines
@@ -132,7 +133,7 @@
/* This one _might_ return the CPU number of the CPU reading it;
* the bootROM decides whether to boot or to sleep/spinloop depending
- * on this register beeing 0 or not
+ * on this register being 0 or not
*/
#define UNI_N_CPU_NUMBER 0x0050
@@ -143,7 +144,7 @@
#define UNI_N_HWINIT_STATE_SLEEPING 0x01
#define UNI_N_HWINIT_STATE_RUNNING 0x02
/* This last bit appear to be used by the bootROM to know the second
- * CPU has started and will enter it's sleep loop with IP=0
+ * CPU has started and will enter its sleep loop with IP=0
*/
#define UNI_N_HWINIT_STATE_CPU1_FLAG 0x10000000
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index 5ce5552ab9f5..b873fbb6d712 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -1,28 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* This file contains the system call numbers.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#ifndef _ASM_POWERPC_UNISTD_H_
#define _ASM_POWERPC_UNISTD_H_
#include <uapi/asm/unistd.h>
-
-#define __NR_syscalls 358
-
-#define __NR__exit __NR_exit
#define NR_syscalls __NR_syscalls
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
#include <linux/compiler.h>
#include <linux/linkage.h>
+#define __ARCH_WANT_NEW_STAT
#define __ARCH_WANT_OLD_READDIR
#define __ARCH_WANT_STAT64
#define __ARCH_WANT_SYS_ALARM
@@ -30,8 +23,8 @@
#define __ARCH_WANT_SYS_IPC
#define __ARCH_WANT_SYS_PAUSE
#define __ARCH_WANT_SYS_SIGNAL
-#define __ARCH_WANT_SYS_TIME
-#define __ARCH_WANT_SYS_UTIME
+#define __ARCH_WANT_SYS_TIME32
+#define __ARCH_WANT_SYS_UTIME32
#define __ARCH_WANT_SYS_WAITPID
#define __ARCH_WANT_SYS_SOCKETCALL
#define __ARCH_WANT_SYS_FADVISE64
@@ -45,15 +38,19 @@
#define __ARCH_WANT_SYS_SIGPROCMASK
#ifdef CONFIG_PPC32
#define __ARCH_WANT_OLD_STAT
+#define __ARCH_WANT_SYS_OLD_SELECT
#endif
#ifdef CONFIG_PPC64
-#define __ARCH_WANT_COMPAT_SYS_TIME
+#define __ARCH_WANT_SYS_TIME
+#define __ARCH_WANT_SYS_UTIME
#define __ARCH_WANT_SYS_NEWFSTATAT
+#define __ARCH_WANT_COMPAT_STAT
+#define __ARCH_WANT_COMPAT_FALLOCATE
#define __ARCH_WANT_COMPAT_SYS_SENDFILE
#endif
#define __ARCH_WANT_SYS_FORK
#define __ARCH_WANT_SYS_VFORK
#define __ARCH_WANT_SYS_CLONE
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_POWERPC_UNISTD_H_ */
diff --git a/arch/powerpc/include/asm/uprobes.h b/arch/powerpc/include/asm/uprobes.h
index 7422a999a39a..4fea116d3d37 100644
--- a/arch/powerpc/include/asm/uprobes.h
+++ b/arch/powerpc/include/asm/uprobes.h
@@ -1,22 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _ASM_UPROBES_H
#define _ASM_UPROBES_H
/*
* User-space Probes (UProbes) for powerpc
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
* Copyright IBM Corporation, 2007-2012
*
* Adapted from the x86 port by Ananth N Mavinakayanahalli <ananth@in.ibm.com>
@@ -25,9 +12,9 @@
#include <linux/notifier.h>
#include <asm/probes.h>
-typedef ppc_opcode_t uprobe_opcode_t;
+typedef u32 uprobe_opcode_t;
-#define MAX_UINSN_BYTES 4
+#define MAX_UINSN_BYTES 8
#define UPROBE_XOL_SLOT_BYTES (MAX_UINSN_BYTES)
/* The following alias is needed for reference from arch-agnostic code */
@@ -36,8 +23,8 @@ typedef ppc_opcode_t uprobe_opcode_t;
struct arch_uprobe {
union {
- u32 insn;
- u32 ixol;
+ u32 insn[2];
+ u32 ixol[2];
};
};
diff --git a/arch/powerpc/include/asm/user.h b/arch/powerpc/include/asm/user.h
index 3fd4545dd74e..7fae7e597ba4 100644
--- a/arch/powerpc/include/asm/user.h
+++ b/arch/powerpc/include/asm/user.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_USER_H
#define _ASM_POWERPC_USER_H
@@ -30,7 +31,7 @@
* to write an integer number of pages.
*/
struct user {
- struct pt_regs regs; /* entire machine state */
+ struct user_pt_regs regs; /* entire machine state */
size_t u_tsize; /* text size (pages) */
size_t u_dsize; /* data size (pages) */
size_t u_ssize; /* stack size (pages) */
@@ -43,9 +44,4 @@ struct user {
char u_comm[32]; /* user command name */
};
-#define NBPG PAGE_SIZE
-#define UPAGES 1
-#define HOST_TEXT_START_ADDR (u.start_code)
-#define HOST_DATA_START_ADDR (u.start_data)
-#define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG)
#endif /* _ASM_POWERPC_USER_H */
diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h
new file mode 100644
index 000000000000..c36f71e01c0f
--- /dev/null
+++ b/arch/powerpc/include/asm/vas.h
@@ -0,0 +1,294 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2016-17 IBM Corp.
+ */
+
+#ifndef _ASM_POWERPC_VAS_H
+#define _ASM_POWERPC_VAS_H
+#include <linux/sched/mm.h>
+#include <linux/mmu_context.h>
+#include <asm/icswx.h>
+#include <uapi/asm/vas-api.h>
+
+/*
+ * Min and max FIFO sizes are based on Version 1.05 Section 3.1.4.25
+ * (Local FIFO Size Register) of the VAS workbook.
+ */
+#define VAS_RX_FIFO_SIZE_MIN (1 << 10) /* 1KB */
+#define VAS_RX_FIFO_SIZE_MAX (8 << 20) /* 8MB */
+
+/*
+ * Threshold Control Mode: Have paste operation fail if the number of
+ * requests in receive FIFO exceeds a threshold.
+ *
+ * NOTE: No special error code yet if paste is rejected because of these
+ * limits. So users can't distinguish between this and other errors.
+ */
+#define VAS_THRESH_DISABLED 0
+#define VAS_THRESH_FIFO_GT_HALF_FULL 1
+#define VAS_THRESH_FIFO_GT_QTR_FULL 2
+#define VAS_THRESH_FIFO_GT_EIGHTH_FULL 3
+
+/*
+ * VAS window Linux status bits
+ */
+#define VAS_WIN_ACTIVE 0x0 /* Used in platform independent */
+ /* vas mmap() */
+/* Window is closed in the hypervisor due to lost credit */
+#define VAS_WIN_NO_CRED_CLOSE 0x00000001
+/* Window is closed due to migration */
+#define VAS_WIN_MIGRATE_CLOSE 0x00000002
+
+/*
+ * Get/Set bit fields
+ */
+#define GET_FIELD(m, v) (((v) & (m)) >> MASK_LSH(m))
+#define MASK_LSH(m) (__builtin_ffsl(m) - 1)
+#define SET_FIELD(m, v, val) \
+ (((v) & ~(m)) | ((((typeof(v))(val)) << MASK_LSH(m)) & (m)))
+
+/*
+ * Co-processor Engine type.
+ */
+enum vas_cop_type {
+ VAS_COP_TYPE_FAULT,
+ VAS_COP_TYPE_842,
+ VAS_COP_TYPE_842_HIPRI,
+ VAS_COP_TYPE_GZIP,
+ VAS_COP_TYPE_GZIP_HIPRI,
+ VAS_COP_TYPE_FTW,
+ VAS_COP_TYPE_MAX,
+};
+
+/*
+ * User space VAS windows are opened by tasks and take references
+ * to pid and mm until windows are closed.
+ * Stores pid, mm, and tgid for each window.
+ */
+struct vas_user_win_ref {
+ struct pid *pid; /* PID of owner */
+ struct pid *tgid; /* Thread group ID of owner */
+ struct mm_struct *mm; /* Linux process mm_struct */
+ struct mutex mmap_mutex; /* protects paste address mmap() */
+ /* with DLPAR close/open windows */
+ struct vm_area_struct *vma; /* Save VMA and used in DLPAR ops */
+};
+
+/*
+ * Common VAS window struct on PowerNV and PowerVM
+ */
+struct vas_window {
+ u32 winid;
+ u32 wcreds_max; /* Window credits */
+ u32 status; /* Window status used in OS */
+ enum vas_cop_type cop;
+ struct vas_user_win_ref task_ref;
+ char *dbgname;
+ struct dentry *dbgdir;
+};
+
+/*
+ * User space window operations used for powernv and powerVM
+ */
+struct vas_user_win_ops {
+ struct vas_window * (*open_win)(int vas_id, u64 flags,
+ enum vas_cop_type);
+ u64 (*paste_addr)(struct vas_window *);
+ int (*close_win)(struct vas_window *);
+};
+
+static inline void put_vas_user_win_ref(struct vas_user_win_ref *ref)
+{
+ /* Drop references to pid, tgid, and mm */
+ put_pid(ref->pid);
+ put_pid(ref->tgid);
+ if (ref->mm)
+ mmdrop(ref->mm);
+}
+
+static inline void vas_user_win_add_mm_context(struct vas_user_win_ref *ref)
+{
+ mm_context_add_vas_window(ref->mm);
+ /*
+ * Even a process that has no foreign real address mapping can
+ * use an unpaired COPY instruction (to no real effect). Issue
+ * CP_ABORT to clear any pending COPY and prevent a covert
+ * channel.
+ *
+ * __switch_to() will issue CP_ABORT on future context switches
+ * if process / thread has any open VAS window (Use
+ * current->mm->context.vas_windows).
+ */
+ asm volatile(PPC_CP_ABORT);
+}
+
+/*
+ * Receive window attributes specified by the (in-kernel) owner of window.
+ */
+struct vas_rx_win_attr {
+ u64 rx_fifo;
+ int rx_fifo_size;
+ int wcreds_max;
+
+ bool pin_win;
+ bool rej_no_credit;
+ bool tx_wcred_mode;
+ bool rx_wcred_mode;
+ bool tx_win_ord_mode;
+ bool rx_win_ord_mode;
+ bool data_stamp;
+ bool nx_win;
+ bool fault_win;
+ bool user_win;
+ bool notify_disable;
+ bool intr_disable;
+ bool notify_early;
+
+ int lnotify_lpid;
+ int lnotify_pid;
+ int lnotify_tid;
+ u32 pswid;
+
+ int tc_mode;
+};
+
+/*
+ * Window attributes specified by the in-kernel owner of a send window.
+ */
+struct vas_tx_win_attr {
+ enum vas_cop_type cop;
+ int wcreds_max;
+ int lpid;
+ int pidr; /* hardware PID (from SPRN_PID) */
+ int pswid;
+ int rsvd_txbuf_count;
+ int tc_mode;
+
+ bool user_win;
+ bool pin_win;
+ bool rej_no_credit;
+ bool rsvd_txbuf_enable;
+ bool tx_wcred_mode;
+ bool rx_wcred_mode;
+ bool tx_win_ord_mode;
+ bool rx_win_ord_mode;
+};
+
+#ifdef CONFIG_PPC_POWERNV
+/*
+ * Helper to map a chip id to VAS id.
+ * For POWER9, this is a 1:1 mapping. In the future this maybe a 1:N
+ * mapping in which case, we will need to update this helper.
+ *
+ * Return the VAS id or -1 if no matching vasid is found.
+ */
+int chip_to_vas_id(int chipid);
+
+/*
+ * Helper to initialize receive window attributes to defaults for an
+ * NX window.
+ */
+void vas_init_rx_win_attr(struct vas_rx_win_attr *rxattr, enum vas_cop_type cop);
+
+/*
+ * Open a VAS receive window for the instance of VAS identified by @vasid
+ * Use @attr to initialize the attributes of the window.
+ *
+ * Return a handle to the window or ERR_PTR() on error.
+ */
+struct vas_window *vas_rx_win_open(int vasid, enum vas_cop_type cop,
+ struct vas_rx_win_attr *attr);
+
+/*
+ * Helper to initialize send window attributes to defaults for an NX window.
+ */
+extern void vas_init_tx_win_attr(struct vas_tx_win_attr *txattr,
+ enum vas_cop_type cop);
+
+/*
+ * Open a VAS send window for the instance of VAS identified by @vasid
+ * and the co-processor type @cop. Use @attr to initialize attributes
+ * of the window.
+ *
+ * Note: The instance of VAS must already have an open receive window for
+ * the coprocessor type @cop.
+ *
+ * Return a handle to the send window or ERR_PTR() on error.
+ */
+struct vas_window *vas_tx_win_open(int vasid, enum vas_cop_type cop,
+ struct vas_tx_win_attr *attr);
+
+/*
+ * Close the send or receive window identified by @win. For receive windows
+ * return -EAGAIN if there are active send windows attached to this receive
+ * window.
+ */
+int vas_win_close(struct vas_window *win);
+
+/*
+ * Copy the co-processor request block (CRB) @crb into the local L2 cache.
+ */
+int vas_copy_crb(void *crb, int offset);
+
+/*
+ * Paste a previously copied CRB (see vas_copy_crb()) from the L2 cache to
+ * the hardware address associated with the window @win. @re is expected/
+ * assumed to be true for NX windows.
+ */
+int vas_paste_crb(struct vas_window *win, int offset, bool re);
+
+int vas_register_api_powernv(struct module *mod, enum vas_cop_type cop_type,
+ const char *name);
+void vas_unregister_api_powernv(void);
+#endif
+
+#ifdef CONFIG_PPC_PSERIES
+
+/* VAS Capabilities */
+#define VAS_GZIP_QOS_FEAT 0x1
+#define VAS_GZIP_DEF_FEAT 0x2
+#define VAS_GZIP_QOS_FEAT_BIT PPC_BIT(VAS_GZIP_QOS_FEAT) /* Bit 1 */
+#define VAS_GZIP_DEF_FEAT_BIT PPC_BIT(VAS_GZIP_DEF_FEAT) /* Bit 2 */
+
+/* NX Capabilities */
+#define VAS_NX_GZIP_FEAT 0x1
+#define VAS_NX_GZIP_FEAT_BIT PPC_BIT(VAS_NX_GZIP_FEAT) /* Bit 1 */
+
+/*
+ * These structs are used to retrieve overall VAS capabilities that
+ * the hypervisor provides.
+ */
+struct hv_vas_all_caps {
+ __be64 descriptor;
+ __be64 feat_type;
+} __packed __aligned(0x1000);
+
+struct vas_all_caps {
+ u64 descriptor;
+ u64 feat_type;
+};
+
+int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result);
+int vas_register_api_pseries(struct module *mod,
+ enum vas_cop_type cop_type, const char *name);
+void vas_unregister_api_pseries(void);
+#endif
+
+/*
+ * Register / unregister coprocessor type to VAS API which will be exported
+ * to user space. Applications can use this API to open / close window
+ * which can be used to send / receive requests directly to cooprcessor.
+ *
+ * Only NX GZIP coprocessor type is supported now, but this API can be
+ * used for others in future.
+ */
+int vas_register_coproc_api(struct module *mod, enum vas_cop_type cop_type,
+ const char *name,
+ const struct vas_user_win_ops *vops);
+void vas_unregister_coproc_api(void);
+
+int get_vas_user_win_ref(struct vas_user_win_ref *task_ref);
+void vas_update_csb(struct coprocessor_request_block *crb,
+ struct vas_user_win_ref *task_ref);
+void vas_dump_crb(struct coprocessor_request_block *crb);
+#endif /* __ASM_POWERPC_VAS_H */
diff --git a/arch/powerpc/include/asm/vdso.h b/arch/powerpc/include/asm/vdso.h
index c53f5f6d1761..07af32576072 100644
--- a/arch/powerpc/include/asm/vdso.h
+++ b/arch/powerpc/include/asm/vdso.h
@@ -1,61 +1,38 @@
-#ifndef __PPC64_VDSO_H__
-#define __PPC64_VDSO_H__
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_VDSO_H
+#define _ASM_POWERPC_VDSO_H
-#ifdef __KERNEL__
-
-/* Default link addresses for the vDSOs */
-#define VDSO32_LBASE 0x0
-#define VDSO64_LBASE 0x0
+#define VDSO_VERSION_STRING LINUX_2.6.15
+#define __VDSO_PAGES 4
-/* Default map addresses for 32bit vDSO */
-#define VDSO32_MBASE 0x100000
+#ifndef __ASSEMBLER__
-#define VDSO_VERSION_STRING LINUX_2.6.15
+#ifdef CONFIG_PPC64
+#include <generated/vdso64-offsets.h>
+#endif
-/* Define if 64 bits VDSO has procedure descriptors */
-#undef VDS64_HAS_DESCRIPTORS
+#ifdef CONFIG_VDSO32
+#include <generated/vdso32-offsets.h>
+#endif
-#ifndef __ASSEMBLY__
+#define VDSO64_SYMBOL(base, name) ((unsigned long)(base) + (vdso64_offset_##name))
-/* Offsets relative to thread->vdso_base */
-extern unsigned long vdso64_rt_sigtramp;
-extern unsigned long vdso32_sigtramp;
-extern unsigned long vdso32_rt_sigtramp;
+#define VDSO32_SYMBOL(base, name) ((unsigned long)(base) + (vdso32_offset_##name))
int vdso_getcpu_init(void);
-#else /* __ASSEMBLY__ */
+#else /* __ASSEMBLER__ */
#ifdef __VDSO64__
-#ifdef VDS64_HAS_DESCRIPTORS
-#define V_FUNCTION_BEGIN(name) \
- .globl name; \
- .section ".opd","a"; \
- .align 3; \
- name: \
- .quad .name,.TOC.@tocbase,0; \
- .previous; \
- .globl .name; \
- .type .name,@function; \
- .name: \
-
-#define V_FUNCTION_END(name) \
- .size .name,.-.name;
-
-#define V_LOCAL_FUNC(name) (.name)
-
-#else /* VDS64_HAS_DESCRIPTORS */
-
#define V_FUNCTION_BEGIN(name) \
.globl name; \
+ .type name,@function; \
name: \
#define V_FUNCTION_END(name) \
.size name,.-name;
#define V_LOCAL_FUNC(name) (name)
-
-#endif /* VDS64_HAS_DESCRIPTORS */
#endif /* __VDSO64__ */
#ifdef __VDSO32__
@@ -72,8 +49,6 @@ int vdso_getcpu_init(void);
#endif /* __VDSO32__ */
-#endif /* __ASSEMBLY__ */
-
-#endif /* __KERNEL__ */
+#endif /* __ASSEMBLER__ */
-#endif /* __PPC64_VDSO_H__ */
+#endif /* _ASM_POWERPC_VDSO_H */
diff --git a/arch/powerpc/include/asm/vdso/arch_data.h b/arch/powerpc/include/asm/vdso/arch_data.h
new file mode 100644
index 000000000000..c240a6b87518
--- /dev/null
+++ b/arch/powerpc/include/asm/vdso/arch_data.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2002 Peter Bergner <bergner@vnet.ibm.com>, IBM
+ * Copyright (C) 2005 Benjamin Herrenschmidy <benh@kernel.crashing.org>,
+ * IBM Corp.
+ */
+#ifndef _ASM_POWERPC_VDSO_ARCH_DATA_H
+#define _ASM_POWERPC_VDSO_ARCH_DATA_H
+
+#include <linux/unistd.h>
+#include <linux/types.h>
+
+#define SYSCALL_MAP_SIZE ((NR_syscalls + 31) / 32)
+
+#ifdef CONFIG_PPC64
+
+struct vdso_arch_data {
+ __u64 tb_ticks_per_sec; /* Timebase tics / sec */
+ __u32 dcache_block_size; /* L1 d-cache block size */
+ __u32 icache_block_size; /* L1 i-cache block size */
+ __u32 dcache_log_block_size; /* L1 d-cache log block size */
+ __u32 icache_log_block_size; /* L1 i-cache log block size */
+ __u32 syscall_map[SYSCALL_MAP_SIZE]; /* Map of syscalls */
+ __u32 compat_syscall_map[SYSCALL_MAP_SIZE]; /* Map of compat syscalls */
+};
+
+#else /* CONFIG_PPC64 */
+
+struct vdso_arch_data {
+ __u64 tb_ticks_per_sec; /* Timebase tics / sec */
+ __u32 syscall_map[SYSCALL_MAP_SIZE]; /* Map of syscalls */
+ __u32 compat_syscall_map[0]; /* No compat syscalls on PPC32 */
+};
+
+#endif /* CONFIG_PPC64 */
+
+#endif /* _ASM_POWERPC_VDSO_ARCH_DATA_H */
diff --git a/arch/powerpc/include/asm/vdso/clocksource.h b/arch/powerpc/include/asm/vdso/clocksource.h
new file mode 100644
index 000000000000..c1ba56b82ee5
--- /dev/null
+++ b/arch/powerpc/include/asm/vdso/clocksource.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_VDSO_CLOCKSOURCE_H
+#define _ASM_POWERPC_VDSO_CLOCKSOURCE_H
+
+#define VDSO_ARCH_CLOCKMODES VDSO_CLOCKMODE_ARCHTIMER
+
+#endif
diff --git a/arch/powerpc/include/asm/vdso/getrandom.h b/arch/powerpc/include/asm/vdso/getrandom.h
new file mode 100644
index 000000000000..4c24976061f4
--- /dev/null
+++ b/arch/powerpc/include/asm/vdso/getrandom.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2024 Christophe Leroy <christophe.leroy@csgroup.eu>, CS GROUP France
+ */
+#ifndef _ASM_POWERPC_VDSO_GETRANDOM_H
+#define _ASM_POWERPC_VDSO_GETRANDOM_H
+
+#ifndef __ASSEMBLER__
+
+#include <asm/vdso_datapage.h>
+
+static __always_inline int do_syscall_3(const unsigned long _r0, const unsigned long _r3,
+ const unsigned long _r4, const unsigned long _r5)
+{
+ register long r0 asm("r0") = _r0;
+ register unsigned long r3 asm("r3") = _r3;
+ register unsigned long r4 asm("r4") = _r4;
+ register unsigned long r5 asm("r5") = _r5;
+ register int ret asm ("r3");
+
+ asm volatile(
+ " sc\n"
+ " bns+ 1f\n"
+ " neg %0, %0\n"
+ "1:\n"
+ : "=r" (ret), "+r" (r4), "+r" (r5), "+r" (r0)
+ : "r" (r3)
+ : "memory", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cr0", "ctr");
+
+ return ret;
+}
+
+/**
+ * getrandom_syscall - Invoke the getrandom() syscall.
+ * @buffer: Destination buffer to fill with random bytes.
+ * @len: Size of @buffer in bytes.
+ * @flags: Zero or more GRND_* flags.
+ * Returns: The number of bytes written to @buffer, or a negative value indicating an error.
+ */
+static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags)
+{
+ return do_syscall_3(__NR_getrandom, (unsigned long)buffer,
+ (unsigned long)len, (unsigned long)flags);
+}
+
+static __always_inline const struct vdso_rng_data *__arch_get_vdso_u_rng_data(void)
+{
+ struct vdso_rng_data *data;
+
+ asm (
+ " bcl 20, 31, .+4 ;"
+ "0: mflr %0 ;"
+ " addis %0, %0, (vdso_u_rng_data - 0b)@ha ;"
+ " addi %0, %0, (vdso_u_rng_data - 0b)@l ;"
+ : "=r" (data) : : "lr"
+ );
+
+ return data;
+}
+#define __arch_get_vdso_u_rng_data __arch_get_vdso_u_rng_data
+
+ssize_t __c_kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state,
+ size_t opaque_len);
+
+#endif /* !__ASSEMBLER__ */
+
+#endif /* _ASM_POWERPC_VDSO_GETRANDOM_H */
diff --git a/arch/powerpc/include/asm/vdso/gettimeofday.h b/arch/powerpc/include/asm/vdso/gettimeofday.h
new file mode 100644
index 000000000000..ab3df12c8d94
--- /dev/null
+++ b/arch/powerpc/include/asm/vdso/gettimeofday.h
@@ -0,0 +1,146 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_VDSO_GETTIMEOFDAY_H
+#define _ASM_POWERPC_VDSO_GETTIMEOFDAY_H
+
+#ifndef __ASSEMBLER__
+
+#include <asm/vdso/timebase.h>
+#include <asm/barrier.h>
+#include <asm/unistd.h>
+#include <uapi/linux/time.h>
+
+#define VDSO_HAS_CLOCK_GETRES 1
+
+#define VDSO_HAS_TIME 1
+
+/*
+ * powerpc specific delta calculation.
+ *
+ * This variant removes the masking of the subtraction because the
+ * clocksource mask of all VDSO capable clocksources on powerpc is U64_MAX
+ * which would result in a pointless operation. The compiler cannot
+ * optimize it away as the mask comes from the vdso data and is not compile
+ * time constant.
+ */
+#define VDSO_DELTA_NOMASK 1
+
+static __always_inline int do_syscall_2(const unsigned long _r0, const unsigned long _r3,
+ const unsigned long _r4)
+{
+ register long r0 asm("r0") = _r0;
+ register unsigned long r3 asm("r3") = _r3;
+ register unsigned long r4 asm("r4") = _r4;
+ register int ret asm ("r3");
+
+ asm volatile(
+ " sc\n"
+ " bns+ 1f\n"
+ " neg %0, %0\n"
+ "1:\n"
+ : "=r" (ret), "+r" (r4), "+r" (r0)
+ : "r" (r3)
+ : "memory", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cr0", "ctr");
+
+ return ret;
+}
+
+static __always_inline
+int gettimeofday_fallback(struct __kernel_old_timeval *_tv, struct timezone *_tz)
+{
+ return do_syscall_2(__NR_gettimeofday, (unsigned long)_tv, (unsigned long)_tz);
+}
+
+#ifdef __powerpc64__
+
+static __always_inline
+int clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ return do_syscall_2(__NR_clock_gettime, _clkid, (unsigned long)_ts);
+}
+
+static __always_inline
+int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ return do_syscall_2(__NR_clock_getres, _clkid, (unsigned long)_ts);
+}
+
+#else
+
+#define BUILD_VDSO32 1
+
+static __always_inline
+int clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ return do_syscall_2(__NR_clock_gettime64, _clkid, (unsigned long)_ts);
+}
+
+static __always_inline
+int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ return do_syscall_2(__NR_clock_getres_time64, _clkid, (unsigned long)_ts);
+}
+
+static __always_inline
+int clock_gettime32_fallback(clockid_t _clkid, struct old_timespec32 *_ts)
+{
+ return do_syscall_2(__NR_clock_gettime, _clkid, (unsigned long)_ts);
+}
+
+static __always_inline
+int clock_getres32_fallback(clockid_t _clkid, struct old_timespec32 *_ts)
+{
+ return do_syscall_2(__NR_clock_getres, _clkid, (unsigned long)_ts);
+}
+#endif
+
+static __always_inline u64 __arch_get_hw_counter(s32 clock_mode,
+ const struct vdso_time_data *vd)
+{
+ return get_tb();
+}
+
+static inline bool vdso_clocksource_ok(const struct vdso_clock *vc)
+{
+ return true;
+}
+#define vdso_clocksource_ok vdso_clocksource_ok
+
+#ifndef __powerpc64__
+static __always_inline u64 vdso_shift_ns(u64 ns, unsigned long shift)
+{
+ u32 hi = ns >> 32;
+ u32 lo = ns;
+
+ lo >>= shift;
+ lo |= hi << (32 - shift);
+ hi >>= shift;
+
+ if (likely(hi == 0))
+ return lo;
+
+ return ((u64)hi << 32) | lo;
+}
+#define vdso_shift_ns vdso_shift_ns
+#endif
+
+#ifdef __powerpc64__
+int __c_kernel_clock_gettime(clockid_t clock, struct __kernel_timespec *ts,
+ const struct vdso_time_data *vd);
+int __c_kernel_clock_getres(clockid_t clock_id, struct __kernel_timespec *res,
+ const struct vdso_time_data *vd);
+#else
+int __c_kernel_clock_gettime(clockid_t clock, struct old_timespec32 *ts,
+ const struct vdso_time_data *vd);
+int __c_kernel_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts,
+ const struct vdso_time_data *vd);
+int __c_kernel_clock_getres(clockid_t clock_id, struct old_timespec32 *res,
+ const struct vdso_time_data *vd);
+#endif
+int __c_kernel_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz,
+ const struct vdso_time_data *vd);
+__kernel_old_time_t __c_kernel_time(__kernel_old_time_t *time,
+ const struct vdso_time_data *vd);
+
+#endif /* __ASSEMBLER__ */
+
+#endif /* _ASM_POWERPC_VDSO_GETTIMEOFDAY_H */
diff --git a/arch/powerpc/include/asm/vdso/processor.h b/arch/powerpc/include/asm/vdso/processor.h
new file mode 100644
index 000000000000..c1f3d7aaf3ee
--- /dev/null
+++ b/arch/powerpc/include/asm/vdso/processor.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _ASM_POWERPC_VDSO_PROCESSOR_H
+#define _ASM_POWERPC_VDSO_PROCESSOR_H
+
+#ifndef __ASSEMBLER__
+
+/* Macros for adjusting thread priority (hardware multi-threading) */
+#ifdef CONFIG_PPC64
+#define HMT_very_low() asm volatile("or 31, 31, 31 # very low priority")
+#define HMT_low() asm volatile("or 1, 1, 1 # low priority")
+#define HMT_medium_low() asm volatile("or 6, 6, 6 # medium low priority")
+#define HMT_medium() asm volatile("or 2, 2, 2 # medium priority")
+#define HMT_medium_high() asm volatile("or 5, 5, 5 # medium high priority")
+#define HMT_high() asm volatile("or 3, 3, 3 # high priority")
+#else
+#define HMT_very_low()
+#define HMT_low()
+#define HMT_medium_low()
+#define HMT_medium()
+#define HMT_medium_high()
+#define HMT_high()
+#endif
+
+#ifdef CONFIG_PPC64
+#define cpu_relax() \
+ asm volatile(ASM_FTR_IFCLR( \
+ /* Pre-POWER10 uses low ; medium priority nops */ \
+ "or 1,1,1 ; or 2,2,2", \
+ /* POWER10 onward uses pause_short (wait 2,0) */ \
+ PPC_WAIT(2, 0), \
+ %0) :: "i" (CPU_FTR_ARCH_31) : "memory")
+#else
+#define cpu_relax() barrier()
+#endif
+
+#endif /* __ASSEMBLER__ */
+
+#endif /* _ASM_POWERPC_VDSO_PROCESSOR_H */
diff --git a/arch/powerpc/include/asm/vdso/timebase.h b/arch/powerpc/include/asm/vdso/timebase.h
new file mode 100644
index 000000000000..e9245f86a46c
--- /dev/null
+++ b/arch/powerpc/include/asm/vdso/timebase.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Common timebase prototypes and such for all ppc machines.
+ */
+
+#ifndef _ASM_POWERPC_VDSO_TIMEBASE_H
+#define _ASM_POWERPC_VDSO_TIMEBASE_H
+
+#include <asm/reg.h>
+
+/*
+ * We use __powerpc64__ here because we want the compat VDSO to use the 32-bit
+ * version below in the else case of the ifdef.
+ */
+#if defined(__powerpc64__) && (defined(CONFIG_PPC_CELL) || defined(CONFIG_PPC_E500))
+#define mftb() ({unsigned long rval; \
+ asm volatile( \
+ "90: mfspr %0, %2;\n" \
+ ASM_FTR_IFSET( \
+ "97: cmpwi %0,0;\n" \
+ " beq- 90b;\n", "", %1) \
+ : "=r" (rval) \
+ : "i" (CPU_FTR_CELL_TB_BUG), "i" (SPRN_TBRL) : "cr0"); \
+ rval;})
+#elif defined(CONFIG_PPC_8xx)
+#define mftb() ({unsigned long rval; \
+ asm volatile("mftbl %0" : "=r" (rval)); rval;})
+#else
+#define mftb() ({unsigned long rval; \
+ asm volatile("mfspr %0, %1" : \
+ "=r" (rval) : "i" (SPRN_TBRL)); rval;})
+#endif /* !CONFIG_PPC_CELL */
+
+#if defined(CONFIG_PPC_8xx)
+#define mftbu() ({unsigned long rval; \
+ asm volatile("mftbu %0" : "=r" (rval)); rval;})
+#else
+#define mftbu() ({unsigned long rval; \
+ asm volatile("mfspr %0, %1" : "=r" (rval) : \
+ "i" (SPRN_TBRU)); rval;})
+#endif
+
+#define mttbl(v) asm volatile("mttbl %0":: "r"(v))
+#define mttbu(v) asm volatile("mttbu %0":: "r"(v))
+
+static __always_inline u64 get_tb(void)
+{
+ unsigned int tbhi, tblo, tbhi2;
+
+ /*
+ * We use __powerpc64__ here not CONFIG_PPC64 because we want the compat
+ * VDSO to use the 32-bit compatible version in the while loop below.
+ */
+ if (__is_defined(__powerpc64__))
+ return mftb();
+
+ do {
+ tbhi = mftbu();
+ tblo = mftb();
+ tbhi2 = mftbu();
+ } while (tbhi != tbhi2);
+
+ return ((u64)tbhi << 32) | tblo;
+}
+
+static inline void set_tb(unsigned int upper, unsigned int lower)
+{
+ mtspr(SPRN_TBWL, 0);
+ mtspr(SPRN_TBWU, upper);
+ mtspr(SPRN_TBWL, lower);
+}
+
+#endif /* _ASM_POWERPC_VDSO_TIMEBASE_H */
diff --git a/arch/powerpc/include/asm/vdso/vsyscall.h b/arch/powerpc/include/asm/vdso/vsyscall.h
new file mode 100644
index 000000000000..bee18e8660a0
--- /dev/null
+++ b/arch/powerpc/include/asm/vdso/vsyscall.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_VDSO_VSYSCALL_H
+#define _ASM_POWERPC_VDSO_VSYSCALL_H
+
+#ifndef __ASSEMBLER__
+
+#include <asm/vdso_datapage.h>
+
+/* The asm-generic header needs to be included after the definitions above */
+#include <asm-generic/vdso/vsyscall.h>
+
+#endif /* !__ASSEMBLER__ */
+
+#endif /* _ASM_POWERPC_VDSO_VSYSCALL_H */
diff --git a/arch/powerpc/include/asm/vdso_datapage.h b/arch/powerpc/include/asm/vdso_datapage.h
index b73a8199f161..441264af0e36 100644
--- a/arch/powerpc/include/asm/vdso_datapage.h
+++ b/arch/powerpc/include/asm/vdso_datapage.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _VDSO_DATAPAGE_H
#define _VDSO_DATAPAGE_H
#ifdef __KERNEL__
@@ -6,119 +7,23 @@
* Copyright (C) 2002 Peter Bergner <bergner@vnet.ibm.com>, IBM
* Copyright (C) 2005 Benjamin Herrenschmidy <benh@kernel.crashing.org>,
* IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
+#ifndef __ASSEMBLER__
-/*
- * Note about this structure:
- *
- * This structure was historically called systemcfg and exposed to
- * userland via /proc/ppc64/systemcfg. Unfortunately, this became an
- * ABI issue as some proprietary software started relying on being able
- * to mmap() it, thus we have to keep the base layout at least for a
- * few kernel versions.
- *
- * However, since ppc32 doesn't suffer from this backward handicap,
- * a simpler version of the data structure is used there with only the
- * fields actually used by the vDSO.
- *
- */
-
-/*
- * If the major version changes we are incompatible.
- * Minor version changes are a hint.
- */
-#define SYSTEMCFG_MAJOR 1
-#define SYSTEMCFG_MINOR 1
-
-#ifndef __ASSEMBLY__
-
-#include <linux/unistd.h>
-#include <linux/time.h>
-
-#define SYSCALL_MAP_SIZE ((__NR_syscalls + 31) / 32)
-
-/*
- * So here is the ppc64 backward compatible version
- */
-
-#ifdef CONFIG_PPC64
-
-struct vdso_data {
- __u8 eye_catcher[16]; /* Eyecatcher: SYSTEMCFG:PPC64 0x00 */
- struct { /* Systemcfg version numbers */
- __u32 major; /* Major number 0x10 */
- __u32 minor; /* Minor number 0x14 */
- } version;
-
- /* Note about the platform flags: it now only contains the lpar
- * bit. The actual platform number is dead and buried
- */
- __u32 platform; /* Platform flags 0x18 */
- __u32 processor; /* Processor type 0x1C */
- __u64 processorCount; /* # of physical processors 0x20 */
- __u64 physicalMemorySize; /* Size of real memory(B) 0x28 */
- __u64 tb_orig_stamp; /* Timebase at boot 0x30 */
- __u64 tb_ticks_per_sec; /* Timebase tics / sec 0x38 */
- __u64 tb_to_xs; /* Inverse of TB to 2^20 0x40 */
- __u64 stamp_xsec; /* 0x48 */
- __u64 tb_update_count; /* Timebase atomicity ctr 0x50 */
- __u32 tz_minuteswest; /* Minutes west of Greenwich 0x58 */
- __u32 tz_dsttime; /* Type of dst correction 0x5C */
- __u32 dcache_size; /* L1 d-cache size 0x60 */
- __u32 dcache_line_size; /* L1 d-cache line size 0x64 */
- __u32 icache_size; /* L1 i-cache size 0x68 */
- __u32 icache_line_size; /* L1 i-cache line size 0x6C */
-
- /* those additional ones don't have to be located anywhere
- * special as they were not part of the original systemcfg
- */
- __u32 dcache_block_size; /* L1 d-cache block size */
- __u32 icache_block_size; /* L1 i-cache block size */
- __u32 dcache_log_block_size; /* L1 d-cache log block size */
- __u32 icache_log_block_size; /* L1 i-cache log block size */
- __s32 wtom_clock_sec; /* Wall to monotonic clock */
- __s32 wtom_clock_nsec;
- struct timespec stamp_xtime; /* xtime as at tb_orig_stamp */
- __u32 stamp_sec_fraction; /* fractional seconds of stamp_xtime */
- __u32 syscall_map_64[SYSCALL_MAP_SIZE]; /* map of syscalls */
- __u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */
-};
-
-#else /* CONFIG_PPC64 */
-
-/*
- * And here is the simpler 32 bits version
- */
-struct vdso_data {
- __u64 tb_orig_stamp; /* Timebase at boot 0x30 */
- __u64 tb_ticks_per_sec; /* Timebase tics / sec 0x38 */
- __u64 tb_to_xs; /* Inverse of TB to 2^20 0x40 */
- __u64 stamp_xsec; /* 0x48 */
- __u32 tb_update_count; /* Timebase atomicity ctr 0x50 */
- __u32 tz_minuteswest; /* Minutes west of Greenwich 0x58 */
- __u32 tz_dsttime; /* Type of dst correction 0x5C */
- __s32 wtom_clock_sec; /* Wall to monotonic clock */
- __s32 wtom_clock_nsec;
- struct timespec stamp_xtime; /* xtime as at tb_orig_stamp */
- __u32 stamp_sec_fraction; /* fractional seconds of stamp_xtime */
- __u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */
- __u32 dcache_block_size; /* L1 d-cache block size */
- __u32 icache_block_size; /* L1 i-cache block size */
- __u32 dcache_log_block_size; /* L1 d-cache log block size */
- __u32 icache_log_block_size; /* L1 i-cache log block size */
-};
+#include <vdso/datapage.h>
-#endif /* CONFIG_PPC64 */
+#else /* __ASSEMBLER__ */
-extern struct vdso_data *vdso_data;
+.macro get_datapage ptr symbol
+ bcl 20, 31, .+4
+999:
+ mflr \ptr
+ addis \ptr, \ptr, (\symbol - 999b)@ha
+ addi \ptr, \ptr, (\symbol - 999b)@l
+.endm
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __KERNEL__ */
#endif /* _SYSTEMCFG_H */
diff --git a/arch/powerpc/include/asm/vermagic.h b/arch/powerpc/include/asm/vermagic.h
new file mode 100644
index 000000000000..6f250fe506bd
--- /dev/null
+++ b/arch/powerpc/include/asm/vermagic.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_VERMAGIC_H
+#define _ASM_VERMAGIC_H
+
+#ifdef CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY
+#define MODULE_ARCH_VERMAGIC_FTRACE "patchable-function-entry "
+#elif defined(CONFIG_MPROFILE_KERNEL)
+#define MODULE_ARCH_VERMAGIC_FTRACE "mprofile-kernel "
+#else
+#define MODULE_ARCH_VERMAGIC_FTRACE ""
+#endif
+
+#ifdef CONFIG_RELOCATABLE
+#define MODULE_ARCH_VERMAGIC_RELOCATABLE "relocatable "
+#else
+#define MODULE_ARCH_VERMAGIC_RELOCATABLE ""
+#endif
+
+#define MODULE_ARCH_VERMAGIC \
+ MODULE_ARCH_VERMAGIC_FTRACE MODULE_ARCH_VERMAGIC_RELOCATABLE
+
+#endif /* _ASM_VERMAGIC_H */
diff --git a/arch/powerpc/include/asm/vga.h b/arch/powerpc/include/asm/vga.h
index a2eac409c1ec..f2dc40e1c52a 100644
--- a/arch/powerpc/include/asm/vga.h
+++ b/arch/powerpc/include/asm/vga.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_POWERPC_VGA_H_
#define _ASM_POWERPC_VGA_H_
@@ -25,25 +26,26 @@
static inline void scr_writew(u16 val, volatile u16 *addr)
{
- st_le16(addr, val);
+ *addr = cpu_to_le16(val);
}
static inline u16 scr_readw(volatile const u16 *addr)
{
- return ld_le16(addr);
+ return le16_to_cpu(*addr);
}
-#define VT_BUF_HAVE_MEMCPYW
-#define scr_memcpyw memcpy
+#define VT_BUF_HAVE_MEMSETW
+static inline void scr_memsetw(u16 *s, u16 v, unsigned int n)
+{
+ memset16(s, cpu_to_le16(v), n / 2);
+}
#endif /* !CONFIG_VGA_CONSOLE && !CONFIG_MDA_CONSOLE */
-extern unsigned long vgacon_remap_base;
-
#ifdef __powerpc64__
#define VGA_MAP_MEM(x,s) ((unsigned long) ioremap((x), s))
#else
-#define VGA_MAP_MEM(x,s) (x + vgacon_remap_base)
+#define VGA_MAP_MEM(x,s) (x)
#endif
#define vga_readb(x) (*(x))
diff --git a/arch/powerpc/include/asm/video.h b/arch/powerpc/include/asm/video.h
new file mode 100644
index 000000000000..e1770114ffc3
--- /dev/null
+++ b/arch/powerpc/include/asm/video.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_VIDEO_H_
+#define _ASM_VIDEO_H_
+
+#include <asm/page.h>
+
+static inline pgprot_t pgprot_framebuffer(pgprot_t prot,
+ unsigned long vm_start, unsigned long vm_end,
+ unsigned long offset)
+{
+ return __phys_mem_access_prot(PHYS_PFN(offset), vm_end - vm_start, prot);
+}
+#define pgprot_framebuffer pgprot_framebuffer
+
+#include <asm-generic/video.h>
+
+#endif /* _ASM_VIDEO_H_ */
diff --git a/arch/powerpc/include/asm/vio.h b/arch/powerpc/include/asm/vio.h
index 4f9b7ca0710f..7c444150c5ad 100644
--- a/arch/powerpc/include/asm/vio.h
+++ b/arch/powerpc/include/asm/vio.h
@@ -1,14 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* IBM PowerPC Virtual I/O Infrastructure Support.
*
* Copyright (c) 2003 IBM Corp.
* Dave Engebretsen engebret@us.ibm.com
* Santiago Leon santil@us.ibm.com
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#ifndef _ASM_POWERPC_VIO_H
@@ -19,9 +15,9 @@
#include <linux/device.h>
#include <linux/dma-mapping.h>
#include <linux/mod_devicetable.h>
+#include <linux/scatterlist.h>
#include <asm/hvcall.h>
-#include <asm/scatterlist.h>
/*
* Architecture-specific constants for drivers to
@@ -43,7 +39,7 @@
*/
#define VIO_CMO_MIN_ENT 1562624
-extern struct bus_type vio_bus_type;
+extern const struct bus_type vio_bus_type;
struct iommu_table;
@@ -117,7 +113,8 @@ struct vio_driver {
const char *name;
const struct vio_device_id *id_table;
int (*probe)(struct vio_dev *dev, const struct vio_device_id *id);
- int (*remove)(struct vio_dev *dev);
+ void (*remove)(struct vio_dev *dev);
+ void (*shutdown)(struct vio_dev *dev);
/* A driver must have a get_desired_dma() function to
* be loaded in a CMO environment if it uses DMA.
*/
@@ -159,15 +156,8 @@ static inline int vio_enable_interrupts(struct vio_dev *dev)
}
#endif
-static inline struct vio_driver *to_vio_driver(struct device_driver *drv)
-{
- return container_of(drv, struct vio_driver, driver);
-}
-
-static inline struct vio_dev *to_vio_dev(struct device *dev)
-{
- return container_of(dev, struct vio_dev, dev);
-}
+#define to_vio_driver(__drv) container_of_const(__drv, struct vio_driver, driver)
+#define to_vio_dev(__dev) container_of_const(__dev, struct vio_dev, dev)
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_VIO_H */
diff --git a/arch/powerpc/include/asm/vmalloc.h b/arch/powerpc/include/asm/vmalloc.h
new file mode 100644
index 000000000000..59ed89890c90
--- /dev/null
+++ b/arch/powerpc/include/asm/vmalloc.h
@@ -0,0 +1,24 @@
+#ifndef _ASM_POWERPC_VMALLOC_H
+#define _ASM_POWERPC_VMALLOC_H
+
+#include <asm/mmu.h>
+#include <asm/page.h>
+
+#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
+
+#define arch_vmap_pud_supported arch_vmap_pud_supported
+static __always_inline bool arch_vmap_pud_supported(pgprot_t prot)
+{
+ /* HPT does not cope with large pages in the vmalloc area */
+ return radix_enabled();
+}
+
+#define arch_vmap_pmd_supported arch_vmap_pmd_supported
+static __always_inline bool arch_vmap_pmd_supported(pgprot_t prot)
+{
+ return radix_enabled();
+}
+
+#endif
+
+#endif /* _ASM_POWERPC_VMALLOC_H */
diff --git a/arch/powerpc/include/asm/vphn.h b/arch/powerpc/include/asm/vphn.h
new file mode 100644
index 000000000000..8c2f795eea68
--- /dev/null
+++ b/arch/powerpc/include/asm/vphn.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_VPHN_H
+#define _ASM_POWERPC_VPHN_H
+
+/* The H_HOME_NODE_ASSOCIATIVITY h_call returns 6 64-bit registers. */
+#define VPHN_REGISTER_COUNT 6
+
+/*
+ * 6 64-bit registers unpacked into up to 24 be32 associativity values. To
+ * form the complete property we have to add the length in the first cell.
+ */
+#define VPHN_ASSOC_BUFSIZE (VPHN_REGISTER_COUNT*sizeof(u64)/sizeof(u16) + 1)
+
+/*
+ * The H_HOME_NODE_ASSOCIATIVITY hcall takes two values for flags:
+ * 1 for retrieving associativity information for a guest cpu
+ * 2 for retrieving associativity information for a host/hypervisor cpu
+ */
+#define VPHN_FLAG_VCPU 1
+#define VPHN_FLAG_PCPU 2
+
+long hcall_vphn(unsigned long cpu, u64 flags, __be32 *associativity);
+
+#endif // _ASM_POWERPC_VPHN_H
diff --git a/arch/powerpc/include/asm/word-at-a-time.h b/arch/powerpc/include/asm/word-at-a-time.h
index 9a5c928bb3c6..54653a863414 100644
--- a/arch/powerpc/include/asm/word-at-a-time.h
+++ b/arch/powerpc/include/asm/word-at-a-time.h
@@ -4,9 +4,10 @@
/*
* Word-at-a-time interfaces for PowerPC.
*/
-
-#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/wordpart.h>
#include <asm/asm-compat.h>
+#include <asm/extable.h>
#ifdef __BIG_ENDIAN__
@@ -33,41 +34,79 @@ static inline long find_zero(unsigned long mask)
return leading_zero_bits >> 3;
}
-static inline bool has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
+static inline unsigned long has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
{
unsigned long rhs = val | c->low_bits;
*data = rhs;
return (val + c->high_bits) & ~rhs;
}
+static inline unsigned long zero_bytemask(unsigned long mask)
+{
+ return ~1ul << __fls(mask);
+}
+
#else
+#ifdef CONFIG_64BIT
+
+/* unused */
struct word_at_a_time {
- const unsigned long one_bits, high_bits;
};
-#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) }
+#define WORD_AT_A_TIME_CONSTANTS { }
-#ifdef CONFIG_64BIT
+/* This will give us 0xff for a NULL char and 0x00 elsewhere */
+static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c)
+{
+ unsigned long ret;
+ unsigned long zero = 0;
-/* Alan Modra's little-endian strlen tail for 64-bit */
-#define create_zero_mask(mask) (mask)
+ asm("cmpb %0,%1,%2" : "=r" (ret) : "r" (a), "r" (zero));
+ *bits = ret;
-static inline unsigned long find_zero(unsigned long mask)
+ return ret;
+}
+
+static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, const struct word_at_a_time *c)
+{
+ return bits;
+}
+
+/* Alan Modra's little-endian strlen tail for 64-bit */
+static inline unsigned long create_zero_mask(unsigned long bits)
{
unsigned long leading_zero_bits;
long trailing_zero_bit_mask;
- asm ("addi %1,%2,-1\n\t"
- "andc %1,%1,%2\n\t"
- "popcntd %0,%1"
- : "=r" (leading_zero_bits), "=&r" (trailing_zero_bit_mask)
- : "r" (mask));
- return leading_zero_bits >> 3;
+ asm("addi %1,%2,-1\n\t"
+ "andc %1,%1,%2\n\t"
+ "popcntd %0,%1"
+ : "=r" (leading_zero_bits), "=&r" (trailing_zero_bit_mask)
+ : "b" (bits));
+
+ return leading_zero_bits;
+}
+
+static inline unsigned long find_zero(unsigned long mask)
+{
+ return mask >> 3;
+}
+
+/* This assumes that we never ask for an all 1s bitmask */
+static inline unsigned long zero_bytemask(unsigned long mask)
+{
+ return (1UL << mask) - 1;
}
#else /* 32-bit case */
+struct word_at_a_time {
+ const unsigned long one_bits, high_bits;
+};
+
+#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) }
+
/*
* This is largely generic for little-endian machines, but the
* optimal byte mask counting is probably going to be something
@@ -96,8 +135,6 @@ static inline unsigned long find_zero(unsigned long mask)
return count_masked_bytes(mask);
}
-#endif
-
/* Return nonzero if it has a zero */
static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c)
{
@@ -114,6 +151,56 @@ static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits,
/* The mask we created is directly usable as a bytemask */
#define zero_bytemask(mask) (mask)
+#endif /* CONFIG_64BIT */
+
+#endif /* __BIG_ENDIAN__ */
+
+/*
+ * We use load_unaligned_zero() in a selftest, which builds a userspace
+ * program. Some linker scripts seem to discard the .fixup section, so allow
+ * the test code to use a different section name.
+ */
+#ifndef FIXUP_SECTION
+#define FIXUP_SECTION ".fixup"
+#endif
+
+static inline unsigned long load_unaligned_zeropad(const void *addr)
+{
+ unsigned long ret, offset, tmp;
+
+ asm(
+ "1: " PPC_LL "%[ret], 0(%[addr])\n"
+ "2:\n"
+ ".section " FIXUP_SECTION ",\"ax\"\n"
+ "3: "
+#ifdef __powerpc64__
+ "clrrdi %[tmp], %[addr], 3\n\t"
+ "clrlsldi %[offset], %[addr], 61, 3\n\t"
+ "ld %[ret], 0(%[tmp])\n\t"
+#ifdef __BIG_ENDIAN__
+ "sld %[ret], %[ret], %[offset]\n\t"
+#else
+ "srd %[ret], %[ret], %[offset]\n\t"
+#endif
+#else
+ "clrrwi %[tmp], %[addr], 2\n\t"
+ "clrlslwi %[offset], %[addr], 30, 3\n\t"
+ "lwz %[ret], 0(%[tmp])\n\t"
+#ifdef __BIG_ENDIAN__
+ "slw %[ret], %[ret], %[offset]\n\t"
+#else
+ "srw %[ret], %[ret], %[offset]\n\t"
+#endif
#endif
+ "b 2b\n"
+ ".previous\n"
+ EX_TABLE(1b, 3b)
+ : [tmp] "=&b" (tmp), [offset] "=&r" (offset), [ret] "=&r" (ret)
+ : [addr] "b" (addr), "m" (*(unsigned long *)addr));
+
+ return ret;
+}
+
+#undef FIXUP_SECTION
#endif /* _ASM_WORD_AT_A_TIME_H */
diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h
index 282d43a0c855..60ef312dab05 100644
--- a/arch/powerpc/include/asm/xics.h
+++ b/arch/powerpc/include/asm/xics.h
@@ -1,5 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
- * Common definitions accross all variants of ICP and ICS interrupt
+ * Common definitions across all variants of ICP and ICS interrupt
* controllers.
*/
@@ -29,17 +30,25 @@
/* Native ICP */
#ifdef CONFIG_PPC_ICP_NATIVE
extern int icp_native_init(void);
+extern void icp_native_flush_interrupt(void);
#else
static inline int icp_native_init(void) { return -ENODEV; }
#endif
/* PAPR ICP */
#ifdef CONFIG_PPC_ICP_HV
-extern int icp_hv_init(void);
+int __init icp_hv_init(void);
#else
static inline int icp_hv_init(void) { return -ENODEV; }
#endif
+#ifdef CONFIG_PPC_POWERNV
+int __init icp_opal_init(void);
+extern void icp_opal_flush_interrupt(void);
+#else
+static inline int icp_opal_init(void) { return -ENODEV; }
+#endif
+
/* ICP ops */
struct icp_ops {
unsigned int (*get_irq)(void);
@@ -48,15 +57,19 @@ struct icp_ops {
void (*teardown_cpu)(void);
void (*flush_ipi)(void);
#ifdef CONFIG_SMP
- void (*cause_ipi)(int cpu, unsigned long data);
+ void (*cause_ipi)(int cpu);
irq_handler_t ipi_action;
#endif
};
extern const struct icp_ops *icp_ops;
+#ifdef CONFIG_PPC_ICS_NATIVE
/* Native ICS */
extern int ics_native_init(void);
+#else
+static inline int ics_native_init(void) { return -ENODEV; }
+#endif
/* RTAS ICS */
#ifdef CONFIG_PPC_ICS_RTAS
@@ -75,10 +88,11 @@ static inline int ics_opal_init(void) { return -ENODEV; }
/* ICS instance, hooked up to chip_data of an irq */
struct ics {
struct list_head link;
- int (*map)(struct ics *ics, unsigned int virq);
+ int (*check)(struct ics *ics, unsigned int hwirq);
void (*mask_unknown)(struct ics *ics, unsigned long vec);
long (*get_server)(struct ics *ics, unsigned long vec);
int (*host_match)(struct ics *ics, struct device_node *node);
+ struct irq_chip *chip;
char data[];
};
@@ -97,7 +111,7 @@ DECLARE_PER_CPU(struct xics_cppr, xics_cppr);
static inline void xics_push_cppr(unsigned int vec)
{
- struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
+ struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr);
if (WARN_ON(os_cppr->index >= MAX_NUM_PRIORITIES - 1))
return;
@@ -110,7 +124,7 @@ static inline void xics_push_cppr(unsigned int vec)
static inline unsigned char xics_pop_cppr(void)
{
- struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
+ struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr);
if (WARN_ON(os_cppr->index < 1))
return LOWEST_PRIORITY;
@@ -120,7 +134,7 @@ static inline unsigned char xics_pop_cppr(void)
static inline void xics_set_base_cppr(unsigned char cppr)
{
- struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
+ struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr);
/* we only really want to set the priority when there's
* just one cppr value on the stack
@@ -132,7 +146,7 @@ static inline void xics_set_base_cppr(unsigned char cppr)
static inline unsigned char xics_cppr_top(void)
{
- struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
+ struct xics_cppr *os_cppr = this_cpu_ptr(&xics_cppr);
return os_cppr->stack[os_cppr->index];
}
@@ -144,13 +158,14 @@ extern void xics_setup_cpu(void);
extern void xics_update_irq_servers(void);
extern void xics_set_cpu_giq(unsigned int gserver, unsigned int join);
extern void xics_mask_unknown_vec(unsigned int vec);
-extern irqreturn_t xics_ipi_dispatch(int cpu);
-extern int xics_smp_probe(void);
+extern void xics_smp_probe(void);
extern void xics_register_ics(struct ics *ics);
extern void xics_teardown_cpu(void);
extern void xics_kexec_teardown_cpu(int secondary);
extern void xics_migrate_irqs_away(void);
extern void icp_native_eoi(struct irq_data *d);
+extern int xics_set_irq_type(struct irq_data *d, unsigned int flow_type);
+extern int xics_retrigger(struct irq_data *data);
#ifdef CONFIG_SMP
extern int xics_get_irq_server(unsigned int virq, const struct cpumask *cpumask,
unsigned int strict_check);
diff --git a/arch/powerpc/include/asm/xilinx_intc.h b/arch/powerpc/include/asm/xilinx_intc.h
deleted file mode 100644
index 343612f8fece..000000000000
--- a/arch/powerpc/include/asm/xilinx_intc.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * Xilinx intc external definitions
- *
- * Copyright 2007 Secret Lab Technologies Ltd.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-#ifndef _ASM_POWERPC_XILINX_INTC_H
-#define _ASM_POWERPC_XILINX_INTC_H
-
-#ifdef __KERNEL__
-
-extern void __init xilinx_intc_init_tree(void);
-extern unsigned int xilinx_intc_get_irq(void);
-
-#endif /* __KERNEL__ */
-#endif /* _ASM_POWERPC_XILINX_INTC_H */
diff --git a/arch/powerpc/include/asm/xilinx_pci.h b/arch/powerpc/include/asm/xilinx_pci.h
deleted file mode 100644
index 7a8275caf6af..000000000000
--- a/arch/powerpc/include/asm/xilinx_pci.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Xilinx pci external definitions
- *
- * Copyright 2009 Roderick Colenbrander
- * Copyright 2009 Secret Lab Technologies Ltd.
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
- */
-
-#ifndef INCLUDE_XILINX_PCI
-#define INCLUDE_XILINX_PCI
-
-#ifdef CONFIG_XILINX_PCI
-extern void __init xilinx_pci_init(void);
-#else
-static inline void __init xilinx_pci_init(void) { return; }
-#endif
-
-#endif /* INCLUDE_XILINX_PCI */
diff --git a/arch/powerpc/include/asm/xive-regs.h b/arch/powerpc/include/asm/xive-regs.h
new file mode 100644
index 000000000000..cf8bb6ac4463
--- /dev/null
+++ b/arch/powerpc/include/asm/xive-regs.h
@@ -0,0 +1,134 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2016,2017 IBM Corporation.
+ */
+#ifndef _ASM_POWERPC_XIVE_REGS_H
+#define _ASM_POWERPC_XIVE_REGS_H
+
+/*
+ * "magic" Event State Buffer (ESB) MMIO offsets.
+ *
+ * Each interrupt source has a 2-bit state machine called ESB
+ * which can be controlled by MMIO. It's made of 2 bits, P and
+ * Q. P indicates that an interrupt is pending (has been sent
+ * to a queue and is waiting for an EOI). Q indicates that the
+ * interrupt has been triggered while pending.
+ *
+ * This acts as a coalescing mechanism in order to guarantee
+ * that a given interrupt only occurs at most once in a queue.
+ *
+ * When doing an EOI, the Q bit will indicate if the interrupt
+ * needs to be re-triggered.
+ *
+ * The following offsets into the ESB MMIO allow to read or
+ * manipulate the PQ bits. They must be used with an 8-bytes
+ * load instruction. They all return the previous state of the
+ * interrupt (atomically).
+ *
+ * Additionally, some ESB pages support doing an EOI via a
+ * store at 0 and some ESBs support doing a trigger via a
+ * separate trigger page.
+ */
+#define XIVE_ESB_STORE_EOI 0x400 /* Store */
+#define XIVE_ESB_LOAD_EOI 0x000 /* Load */
+#define XIVE_ESB_GET 0x800 /* Load */
+#define XIVE_ESB_SET_PQ_00 0xc00 /* Load */
+#define XIVE_ESB_SET_PQ_01 0xd00 /* Load */
+#define XIVE_ESB_SET_PQ_10 0xe00 /* Load */
+#define XIVE_ESB_SET_PQ_11 0xf00 /* Load */
+
+/*
+ * Load-after-store ordering
+ *
+ * Adding this offset to the load address will enforce
+ * load-after-store ordering. This is required to use StoreEOI.
+ */
+#define XIVE_ESB_LD_ST_MO 0x40 /* Load-after-store ordering */
+
+#define XIVE_ESB_VAL_P 0x2
+#define XIVE_ESB_VAL_Q 0x1
+#define XIVE_ESB_INVALID 0xFF
+
+/*
+ * Thread Management (aka "TM") registers
+ */
+
+/* TM register offsets */
+#define TM_QW0_USER 0x000 /* All rings */
+#define TM_QW1_OS 0x010 /* Ring 0..2 */
+#define TM_QW2_HV_POOL 0x020 /* Ring 0..1 */
+#define TM_QW3_HV_PHYS 0x030 /* Ring 0..1 */
+
+/* Byte offsets inside a QW QW0 QW1 QW2 QW3 */
+#define TM_NSR 0x0 /* + + - + */
+#define TM_CPPR 0x1 /* - + - + */
+#define TM_IPB 0x2 /* - + + + */
+#define TM_LSMFB 0x3 /* - + + + */
+#define TM_ACK_CNT 0x4 /* - + - - */
+#define TM_INC 0x5 /* - + - + */
+#define TM_AGE 0x6 /* - + - + */
+#define TM_PIPR 0x7 /* - + - + */
+
+#define TM_WORD0 0x0
+#define TM_WORD1 0x4
+
+/*
+ * QW word 2 contains the valid bit at the top and other fields
+ * depending on the QW.
+ */
+#define TM_WORD2 0x8
+#define TM_QW0W2_VU PPC_BIT32(0)
+#define TM_QW0W2_LOGIC_SERV PPC_BITMASK32(1,31) // XX 2,31 ?
+#define TM_QW1W2_VO PPC_BIT32(0)
+#define TM_QW1W2_HO PPC_BIT32(1) /* P10 XIVE2 */
+#define TM_QW1W2_OS_CAM PPC_BITMASK32(8,31)
+#define TM_QW2W2_VP PPC_BIT32(0)
+#define TM_QW2W2_HP PPC_BIT32(1) /* P10 XIVE2 */
+#define TM_QW2W2_POOL_CAM PPC_BITMASK32(8,31)
+#define TM_QW3W2_VT PPC_BIT32(0)
+#define TM_QW3W2_HT PPC_BIT32(1) /* P10 XIVE2 */
+#define TM_QW3W2_LP PPC_BIT32(6)
+#define TM_QW3W2_LE PPC_BIT32(7)
+#define TM_QW3W2_T PPC_BIT32(31)
+
+/*
+ * In addition to normal loads to "peek" and writes (only when invalid)
+ * using 4 and 8 bytes accesses, the above registers support these
+ * "special" byte operations:
+ *
+ * - Byte load from QW0[NSR] - User level NSR (EBB)
+ * - Byte store to QW0[NSR] - User level NSR (EBB)
+ * - Byte load/store to QW1[CPPR] and QW3[CPPR] - CPPR access
+ * - Byte load from QW3[TM_WORD2] - Read VT||00000||LP||LE on thrd 0
+ * otherwise VT||0000000
+ * - Byte store to QW3[TM_WORD2] - Set VT bit (and LP/LE if present)
+ *
+ * Then we have all these "special" CI ops at these offset that trigger
+ * all sorts of side effects:
+ */
+#define TM_SPC_ACK_EBB 0x800 /* Load8 ack EBB to reg*/
+#define TM_SPC_ACK_OS_REG 0x810 /* Load16 ack OS irq to reg */
+#define TM_SPC_PUSH_USR_CTX 0x808 /* Store32 Push/Validate user context */
+#define TM_SPC_PULL_USR_CTX 0x808 /* Load32 Pull/Invalidate user context */
+#define TM_SPC_SET_OS_PENDING 0x812 /* Store8 Set OS irq pending bit */
+#define TM_SPC_PULL_OS_CTX 0x818 /* Load32/Load64 Pull/Invalidate OS context to reg */
+#define TM_SPC_PULL_POOL_CTX 0x828 /* Load32/Load64 Pull/Invalidate Pool context to reg*/
+#define TM_SPC_ACK_HV_REG 0x830 /* Load16 ack HV irq to reg */
+#define TM_SPC_PULL_USR_CTX_OL 0xc08 /* Store8 Pull/Inval usr ctx to odd line */
+#define TM_SPC_ACK_OS_EL 0xc10 /* Store8 ack OS irq to even line */
+#define TM_SPC_ACK_HV_POOL_EL 0xc20 /* Store8 ack HV evt pool to even line */
+#define TM_SPC_ACK_HV_EL 0xc30 /* Store8 ack HV irq to even line */
+/* XXX more... */
+
+/* NSR fields for the various QW ack types */
+#define TM_QW0_NSR_EB PPC_BIT8(0)
+#define TM_QW1_NSR_EO PPC_BIT8(0)
+#define TM_QW3_NSR_HE PPC_BITMASK8(0,1)
+#define TM_QW3_NSR_HE_NONE 0
+#define TM_QW3_NSR_HE_POOL 1
+#define TM_QW3_NSR_HE_PHYS 2
+#define TM_QW3_NSR_HE_LSI 3
+#define TM_QW3_NSR_I PPC_BIT8(2)
+#define TM_QW3_NSR_GRP_LVL PPC_BIT8(3,7)
+
+#endif /* _ASM_POWERPC_XIVE_REGS_H */
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
new file mode 100644
index 000000000000..efb0f5effcc6
--- /dev/null
+++ b/arch/powerpc/include/asm/xive.h
@@ -0,0 +1,168 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2016,2017 IBM Corporation.
+ */
+#ifndef _ASM_POWERPC_XIVE_H
+#define _ASM_POWERPC_XIVE_H
+
+#include <asm/opal-api.h>
+
+#define XIVE_INVALID_VP 0xffffffff
+
+#ifdef CONFIG_PPC_XIVE
+
+/*
+ * Thread Interrupt Management Area (TIMA)
+ *
+ * This is a global MMIO region divided in 4 pages of varying access
+ * permissions, providing access to per-cpu interrupt management
+ * functions. It always identifies the CPU doing the access based
+ * on the PowerBus initiator ID, thus we always access via the
+ * same offset regardless of where the code is executing
+ */
+extern void __iomem *xive_tima;
+extern unsigned long xive_tima_os;
+
+/*
+ * Offset in the TM area of our current execution level (provided by
+ * the backend)
+ */
+extern u32 xive_tima_offset;
+
+/*
+ * Per-irq data (irq_get_handler_data for normal IRQs), IPIs
+ * have it stored in the xive_cpu structure. We also cache
+ * for normal interrupts the current target CPU.
+ *
+ * This structure is setup by the backend for each interrupt.
+ */
+struct xive_irq_data {
+ u64 flags;
+ u64 eoi_page;
+ void __iomem *eoi_mmio;
+ u64 trig_page;
+ void __iomem *trig_mmio;
+ u32 esb_shift;
+ int src_chip;
+ u32 hw_irq;
+
+ /* Setup/used by frontend */
+ int target;
+ /*
+ * saved_p means that there is a queue entry for this interrupt
+ * in some CPU's queue (not including guest vcpu queues), even
+ * if P is not set in the source ESB.
+ * stale_p means that there is no queue entry for this interrupt
+ * in some CPU's queue, even if P is set in the source ESB.
+ */
+ bool saved_p;
+ bool stale_p;
+};
+#define XIVE_IRQ_FLAG_STORE_EOI 0x01
+#define XIVE_IRQ_FLAG_LSI 0x02
+/* #define XIVE_IRQ_FLAG_SHIFT_BUG 0x04 */ /* P9 DD1.0 workaround */
+/* #define XIVE_IRQ_FLAG_MASK_FW 0x08 */ /* P9 DD1.0 workaround */
+/* #define XIVE_IRQ_FLAG_EOI_FW 0x10 */ /* P9 DD1.0 workaround */
+#define XIVE_IRQ_FLAG_H_INT_ESB 0x20
+
+/* Special flag set by KVM for excalation interrupts */
+#define XIVE_IRQ_FLAG_NO_EOI 0x80
+
+#define XIVE_INVALID_CHIP_ID -1
+
+/* A queue tracking structure in a CPU */
+struct xive_q {
+ __be32 *qpage;
+ u32 msk;
+ u32 idx;
+ u32 toggle;
+ u64 eoi_phys;
+ u32 esc_irq;
+ atomic_t count;
+ atomic_t pending_count;
+ u64 guest_qaddr;
+ u32 guest_qshift;
+};
+
+/* Global enable flags for the XIVE support */
+extern bool __xive_enabled;
+
+static inline bool xive_enabled(void) { return __xive_enabled; }
+
+bool xive_spapr_init(void);
+bool xive_native_init(void);
+void xive_smp_probe(void);
+int xive_smp_prepare_cpu(unsigned int cpu);
+void xive_smp_setup_cpu(void);
+void xive_smp_disable_cpu(void);
+void xive_teardown_cpu(void);
+void xive_shutdown(void);
+void xive_flush_interrupt(void);
+
+/* xmon hook */
+void xmon_xive_do_dump(int cpu);
+int xmon_xive_get_irq_config(u32 hw_irq, struct irq_data *d);
+void xmon_xive_get_irq_all(void);
+
+/* APIs used by KVM */
+u32 xive_native_default_eq_shift(void);
+u32 xive_native_alloc_vp_block(u32 max_vcpus);
+void xive_native_free_vp_block(u32 vp_base);
+int xive_native_populate_irq_data(u32 hw_irq,
+ struct xive_irq_data *data);
+void xive_cleanup_irq_data(struct xive_irq_data *xd);
+void xive_native_free_irq(u32 irq);
+int xive_native_configure_irq(u32 hw_irq, u32 target, u8 prio, u32 sw_irq);
+
+int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio,
+ __be32 *qpage, u32 order, bool can_escalate);
+void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio);
+
+void xive_native_sync_source(u32 hw_irq);
+void xive_native_sync_queue(u32 hw_irq);
+bool is_xive_irq(struct irq_chip *chip);
+int xive_native_enable_vp(u32 vp_id, bool single_escalation);
+int xive_native_disable_vp(u32 vp_id);
+int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id);
+bool xive_native_has_single_escalation(void);
+bool xive_native_has_save_restore(void);
+
+int xive_native_get_queue_info(u32 vp_id, uint32_t prio,
+ u64 *out_qpage,
+ u64 *out_qsize,
+ u64 *out_qeoi_page,
+ u32 *out_escalate_irq,
+ u64 *out_qflags);
+
+int xive_native_get_queue_state(u32 vp_id, uint32_t prio, u32 *qtoggle,
+ u32 *qindex);
+int xive_native_set_queue_state(u32 vp_id, uint32_t prio, u32 qtoggle,
+ u32 qindex);
+int xive_native_get_vp_state(u32 vp_id, u64 *out_state);
+bool xive_native_has_queue_state_support(void);
+extern u32 xive_native_alloc_irq_on_chip(u32 chip_id);
+
+static inline u32 xive_native_alloc_irq(void)
+{
+ return xive_native_alloc_irq_on_chip(OPAL_XIVE_ANY_CHIP);
+}
+
+#else
+
+static inline bool xive_enabled(void) { return false; }
+
+static inline bool xive_spapr_init(void) { return false; }
+static inline bool xive_native_init(void) { return false; }
+static inline void xive_smp_probe(void) { }
+static inline int xive_smp_prepare_cpu(unsigned int cpu) { return -EINVAL; }
+static inline void xive_smp_setup_cpu(void) { }
+static inline void xive_smp_disable_cpu(void) { }
+static inline void xive_shutdown(void) { }
+static inline void xive_flush_interrupt(void) { }
+
+static inline u32 xive_native_alloc_vp_block(u32 max_vcpus) { return XIVE_INVALID_VP; }
+static inline void xive_native_free_vp_block(u32 vp_base) { }
+
+#endif
+
+#endif /* _ASM_POWERPC_XIVE_H */
diff --git a/arch/powerpc/include/asm/xmon.h b/arch/powerpc/include/asm/xmon.h
index 5eb8e599e5cc..535cdb1e411a 100644
--- a/arch/powerpc/include/asm/xmon.h
+++ b/arch/powerpc/include/asm/xmon.h
@@ -1,13 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef __ASM_POWERPC_XMON_H
#define __ASM_POWERPC_XMON_H
/*
* Copyrignt (C) 2006 IBM Corp
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#ifdef __KERNEL__
@@ -16,18 +12,18 @@
#ifdef CONFIG_XMON
extern void xmon_setup(void);
-extern void xmon_register_spus(struct list_head *list);
struct pt_regs;
extern int xmon(struct pt_regs *excp);
extern irqreturn_t xmon_irq(int, void *);
#else
-static inline void xmon_setup(void) { };
-static inline void xmon_register_spus(struct list_head *list) { };
+static inline void xmon_setup(void) { }
#endif
#if defined(CONFIG_XMON) && defined(CONFIG_SMP)
extern int cpus_are_in_xmon(void);
#endif
+extern __printf(1, 2) void xmon_printf(const char *format, ...);
+
#endif /* __KERNEL __ */
#endif /* __ASM_POWERPC_XMON_H */
diff --git a/arch/powerpc/include/asm/xor.h b/arch/powerpc/include/asm/xor.h
index 0abb97f3be10..37d05c11d09c 100644
--- a/arch/powerpc/include/asm/xor.h
+++ b/arch/powerpc/include/asm/xor.h
@@ -1,17 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright (C) IBM Corporation, 2012
*
@@ -23,17 +11,8 @@
#ifdef CONFIG_ALTIVEC
#include <asm/cputable.h>
-
-void xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
- unsigned long *v2_in);
-void xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
- unsigned long *v2_in, unsigned long *v3_in);
-void xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
- unsigned long *v2_in, unsigned long *v3_in,
- unsigned long *v4_in);
-void xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
- unsigned long *v2_in, unsigned long *v3_in,
- unsigned long *v4_in, unsigned long *v5_in);
+#include <asm/cpu_has_feature.h>
+#include <asm/xor_altivec.h>
static struct xor_block_template xor_block_altivec = {
.name = "altivec",
diff --git a/arch/powerpc/include/asm/xor_altivec.h b/arch/powerpc/include/asm/xor_altivec.h
new file mode 100644
index 000000000000..294620a25f80
--- /dev/null
+++ b/arch/powerpc/include/asm/xor_altivec.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_XOR_ALTIVEC_H
+#define _ASM_POWERPC_XOR_ALTIVEC_H
+
+#ifdef CONFIG_ALTIVEC
+void xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2);
+void xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3);
+void xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4);
+void xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4,
+ const unsigned long * __restrict p5);
+
+#endif
+#endif /* _ASM_POWERPC_XOR_ALTIVEC_H */
diff --git a/arch/powerpc/include/uapi/asm/Kbuild b/arch/powerpc/include/uapi/asm/Kbuild
index 7a3f795ac218..353b70b1998f 100644
--- a/arch/powerpc/include/uapi/asm/Kbuild
+++ b/arch/powerpc/include/uapi/asm/Kbuild
@@ -1,46 +1,3 @@
-# UAPI Header export list
-include include/uapi/asm-generic/Kbuild.asm
-
-header-y += auxvec.h
-header-y += bitsperlong.h
-header-y += bootx.h
-header-y += byteorder.h
-header-y += cputable.h
-header-y += elf.h
-header-y += epapr_hcalls.h
-header-y += errno.h
-header-y += fcntl.h
-header-y += ioctl.h
-header-y += ioctls.h
-header-y += ipcbuf.h
-header-y += kvm.h
-header-y += kvm_para.h
-header-y += mman.h
-header-y += msgbuf.h
-header-y += nvram.h
-header-y += param.h
-header-y += perf_event.h
-header-y += poll.h
-header-y += posix_types.h
-header-y += ps3fb.h
-header-y += ptrace.h
-header-y += resource.h
-header-y += seccomp.h
-header-y += sembuf.h
-header-y += setup.h
-header-y += shmbuf.h
-header-y += sigcontext.h
-header-y += siginfo.h
-header-y += signal.h
-header-y += socket.h
-header-y += sockios.h
-header-y += spu_info.h
-header-y += stat.h
-header-y += statfs.h
-header-y += swab.h
-header-y += termbits.h
-header-y += termios.h
-header-y += tm.h
-header-y += types.h
-header-y += ucontext.h
-header-y += unistd.h
+# SPDX-License-Identifier: GPL-2.0
+generated-y += unistd_32.h
+generated-y += unistd_64.h
diff --git a/arch/powerpc/include/uapi/asm/auxvec.h b/arch/powerpc/include/uapi/asm/auxvec.h
index ce17d2c9eb4e..aa7c16215453 100644
--- a/arch/powerpc/include/uapi/asm/auxvec.h
+++ b/arch/powerpc/include/uapi/asm/auxvec.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _ASM_POWERPC_AUXVEC_H
#define _ASM_POWERPC_AUXVEC_H
@@ -16,6 +17,39 @@
*/
#define AT_SYSINFO_EHDR 33
-#define AT_VECTOR_SIZE_ARCH 6 /* entries in ARCH_DLINFO */
+/*
+ * AT_*CACHEBSIZE above represent the cache *block* size which is
+ * the size that is affected by the cache management instructions.
+ *
+ * It doesn't nececssarily matches the cache *line* size which is
+ * more of a performance tuning hint. Additionally the latter can
+ * be different for the different cache levels.
+ *
+ * The set of entries below represent more extensive information
+ * about the caches, in the form of two entry per cache type,
+ * one entry containing the cache size in bytes, and the other
+ * containing the cache line size in bytes in the bottom 16 bits
+ * and the cache associativity in the next 16 bits.
+ *
+ * The associativity is such that if N is the 16-bit value, the
+ * cache is N way set associative. A value if 0xffff means fully
+ * associative, a value of 1 means directly mapped.
+ *
+ * For all these fields, a value of 0 means that the information
+ * is not known.
+ */
+
+#define AT_L1I_CACHESIZE 40
+#define AT_L1I_CACHEGEOMETRY 41
+#define AT_L1D_CACHESIZE 42
+#define AT_L1D_CACHEGEOMETRY 43
+#define AT_L2_CACHESIZE 44
+#define AT_L2_CACHEGEOMETRY 45
+#define AT_L3_CACHESIZE 46
+#define AT_L3_CACHEGEOMETRY 47
+
+#define AT_MINSIGSTKSZ 51 /* stack needed for signal delivery */
+
+#define AT_VECTOR_SIZE_ARCH 15 /* entries in ARCH_DLINFO */
#endif
diff --git a/arch/powerpc/include/uapi/asm/bitsperlong.h b/arch/powerpc/include/uapi/asm/bitsperlong.h
index 5f1659032c40..46ece3ecff31 100644
--- a/arch/powerpc/include/uapi/asm/bitsperlong.h
+++ b/arch/powerpc/include/uapi/asm/bitsperlong.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef __ASM_POWERPC_BITSPERLONG_H
#define __ASM_POWERPC_BITSPERLONG_H
diff --git a/arch/powerpc/include/uapi/asm/bootx.h b/arch/powerpc/include/uapi/asm/bootx.h
index 6e51cf0708a1..1b8c121071d9 100644
--- a/arch/powerpc/include/uapi/asm/bootx.h
+++ b/arch/powerpc/include/uapi/asm/bootx.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
* This file describes the structure passed from the BootX application
* (for MacOS) when it is used to boot Linux.
@@ -107,7 +108,7 @@ typedef struct boot_infos
/* ALL BELOW NEW (vers. 4) */
/* This defines the physical memory. Valid with BOOT_ARCH_NUBUS flag
- (non-PCI) only. On PCI, memory is contiguous and it's size is in the
+ (non-PCI) only. On PCI, memory is contiguous and its size is in the
device-tree. */
boot_info_map_entry_t
physMemoryMap[MAX_MEM_MAP_SIZE]; /* Where the phys memory is */
diff --git a/arch/powerpc/include/uapi/asm/byteorder.h b/arch/powerpc/include/uapi/asm/byteorder.h
index ca931d074000..8ef66f7d9db9 100644
--- a/arch/powerpc/include/uapi/asm/byteorder.h
+++ b/arch/powerpc/include/uapi/asm/byteorder.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
#ifndef _ASM_POWERPC_BYTEORDER_H
#define _ASM_POWERPC_BYTEORDER_H
diff --git a/arch/powerpc/include/uapi/asm/cputable.h b/arch/powerpc/include/uapi/asm/cputable.h
index de2c0e4ee1aa..731b97dc2d15 100644
--- a/arch/powerpc/include/uapi/asm/cputable.h
+++ b/arch/powerpc/include/uapi/asm/cputable.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _UAPI__ASM_POWERPC_CPUTABLE_H
#define _UAPI__ASM_POWERPC_CPUTABLE_H
@@ -31,6 +32,7 @@
#define PPC_FEATURE_PSERIES_PERFMON_COMPAT \
0x00000040
+/* Reserved - do not use 0x00000004 */
#define PPC_FEATURE_TRUE_LE 0x00000002
#define PPC_FEATURE_PPC_LE 0x00000001
@@ -42,5 +44,20 @@
#define PPC_FEATURE2_ISEL 0x08000000
#define PPC_FEATURE2_TAR 0x04000000
#define PPC_FEATURE2_VEC_CRYPTO 0x02000000
+#define PPC_FEATURE2_HTM_NOSC 0x01000000
+#define PPC_FEATURE2_ARCH_3_00 0x00800000 /* ISA 3.00 */
+#define PPC_FEATURE2_HAS_IEEE128 0x00400000 /* VSX IEEE Binary Float 128-bit */
+#define PPC_FEATURE2_DARN 0x00200000 /* darn random number insn */
+#define PPC_FEATURE2_SCV 0x00100000 /* scv syscall */
+#define PPC_FEATURE2_HTM_NO_SUSPEND 0x00080000 /* TM w/out suspended state */
+#define PPC_FEATURE2_ARCH_3_1 0x00040000 /* ISA 3.1 */
+#define PPC_FEATURE2_MMA 0x00020000 /* Matrix Multiply Assist */
+
+/*
+ * IMPORTANT!
+ * All future PPC_FEATURE definitions should be allocated in cooperation with
+ * OPAL / skiboot firmware, in accordance with the ibm,powerpc-cpu-features
+ * device tree binding.
+ */
#endif /* _UAPI__ASM_POWERPC_CPUTABLE_H */
diff --git a/arch/powerpc/include/uapi/asm/eeh.h b/arch/powerpc/include/uapi/asm/eeh.h
new file mode 100644
index 000000000000..3b5c47ff3fc4
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/eeh.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright IBM Corp. 2015
+ *
+ * Authors: Gavin Shan <gwshan@linux.vnet.ibm.com>
+ */
+
+#ifndef _ASM_POWERPC_EEH_H
+#define _ASM_POWERPC_EEH_H
+
+/* PE states */
+#define EEH_PE_STATE_NORMAL 0 /* Normal state */
+#define EEH_PE_STATE_RESET 1 /* PE reset asserted */
+#define EEH_PE_STATE_STOPPED_IO_DMA 2 /* Frozen PE */
+#define EEH_PE_STATE_STOPPED_DMA 4 /* Stopped DMA only */
+#define EEH_PE_STATE_UNAVAIL 5 /* Unavailable */
+
+/* EEH error types and functions */
+#define EEH_ERR_TYPE_32 0 /* 32-bits error */
+#define EEH_ERR_TYPE_64 1 /* 64-bits error */
+#define EEH_ERR_FUNC_MIN 0
+#define EEH_ERR_FUNC_LD_MEM_ADDR 0 /* Memory load */
+#define EEH_ERR_FUNC_LD_MEM_DATA 1
+#define EEH_ERR_FUNC_LD_IO_ADDR 2 /* IO load */
+#define EEH_ERR_FUNC_LD_IO_DATA 3
+#define EEH_ERR_FUNC_LD_CFG_ADDR 4 /* Config load */
+#define EEH_ERR_FUNC_LD_CFG_DATA 5
+#define EEH_ERR_FUNC_ST_MEM_ADDR 6 /* Memory store */
+#define EEH_ERR_FUNC_ST_MEM_DATA 7
+#define EEH_ERR_FUNC_ST_IO_ADDR 8 /* IO store */
+#define EEH_ERR_FUNC_ST_IO_DATA 9
+#define EEH_ERR_FUNC_ST_CFG_ADDR 10 /* Config store */
+#define EEH_ERR_FUNC_ST_CFG_DATA 11
+#define EEH_ERR_FUNC_DMA_RD_ADDR 12 /* DMA read */
+#define EEH_ERR_FUNC_DMA_RD_DATA 13
+#define EEH_ERR_FUNC_DMA_RD_MASTER 14
+#define EEH_ERR_FUNC_DMA_RD_TARGET 15
+#define EEH_ERR_FUNC_DMA_WR_ADDR 16 /* DMA write */
+#define EEH_ERR_FUNC_DMA_WR_DATA 17
+#define EEH_ERR_FUNC_DMA_WR_MASTER 18
+#define EEH_ERR_FUNC_DMA_WR_TARGET 19
+#define EEH_ERR_FUNC_MAX 19
+
+#endif /* _ASM_POWERPC_EEH_H */
diff --git a/arch/powerpc/include/uapi/asm/elf.h b/arch/powerpc/include/uapi/asm/elf.h
index 59dad113897b..a5377f494fa3 100644
--- a/arch/powerpc/include/uapi/asm/elf.h
+++ b/arch/powerpc/include/uapi/asm/elf.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
/*
* ELF register definitions..
*
@@ -91,6 +92,14 @@
#define ELF_NGREG 48 /* includes nip, msr, lr, etc. */
#define ELF_NFPREG 33 /* includes fpscr */
+#define ELF_NVMX 34 /* includes all vector registers */
+#define ELF_NVSX 32 /* includes all VSX registers */
+#define ELF_NTMSPRREG 3 /* include tfhar, tfiar, texasr */
+#define ELF_NEBB 3 /* includes ebbrr, ebbhr, bescr */
+#define ELF_NPMU 5 /* includes siar, sdar, sier, mmcr2, mmcr0 */
+#define ELF_NPKEY 3 /* includes amr, iamr, uamor */
+#define ELF_NDEXCR 2 /* includes dexcr, hdexcr */
+#define ELF_NHASHKEYR 1 /* includes hashkeyr */
typedef unsigned long elf_greg_t64;
typedef elf_greg_t64 elf_gregset_t64[ELF_NGREG];
@@ -157,29 +166,6 @@ typedef elf_vrreg_t elf_vrregset_t32[ELF_NVRREG32];
typedef elf_fpreg_t elf_vsrreghalf_t32[ELF_NVSRHALFREG];
#endif
-
-/*
- * The requirements here are:
- * - keep the final alignment of sp (sp & 0xf)
- * - make sure the 32-bit value at the first 16 byte aligned position of
- * AUXV is greater than 16 for glibc compatibility.
- * AT_IGNOREPPC is used for that.
- * - for compatibility with glibc ARCH_DLINFO must always be defined on PPC,
- * even if DLINFO_ARCH_ITEMS goes to zero or is undefined.
- * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes
- */
-#define ARCH_DLINFO \
-do { \
- /* Handle glibc compatibility. */ \
- NEW_AUX_ENT(AT_IGNOREPPC, AT_IGNOREPPC); \
- NEW_AUX_ENT(AT_IGNOREPPC, AT_IGNOREPPC); \
- /* Cache size items */ \
- NEW_AUX_ENT(AT_DCACHEBSIZE, dcache_bsize); \
- NEW_AUX_ENT(AT_ICACHEBSIZE, icache_bsize); \
- NEW_AUX_ENT(AT_UCACHEBSIZE, ucache_bsize); \
- VDSO_AUX_ENT(AT_SYSINFO_EHDR, current->mm->context.vdso_base); \
-} while (0)
-
/* PowerPC64 relocations defined by the ABIs */
#define R_PPC64_NONE R_PPC_NONE
#define R_PPC64_ADDR32 R_PPC_ADDR32 /* 32bit absolute address. */
@@ -295,6 +281,12 @@ do { \
#define R_PPC64_TLSLD 108
#define R_PPC64_TOCSAVE 109
+#define R_PPC64_REL24_NOTOC 116
+#define R_PPC64_ENTRY 118
+
+#define R_PPC64_PCREL34 132
+#define R_PPC64_GOT_PCREL34 133
+
#define R_PPC64_REL16 249
#define R_PPC64_REL16_LO 250
#define R_PPC64_REL16_HI 251
@@ -303,12 +295,4 @@ do { \
/* Keep this the last entry. */
#define R_PPC64_NUM 253
-/* There's actually a third entry here, but it's unused */
-struct ppc64_opd_entry
-{
- unsigned long funcaddr;
- unsigned long r2;
-};
-
-
#endif /* _UAPI_ASM_POWERPC_ELF_H */
diff --git a/arch/powerpc/include/uapi/asm/epapr_hcalls.h b/arch/powerpc/include/uapi/asm/epapr_hcalls.h
index 7f9c74b46704..90a0ee6d0bb3 100644
--- a/arch/powerpc/include/uapi/asm/epapr_hcalls.h
+++ b/arch/powerpc/include/uapi/asm/epapr_hcalls.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: ((GPL-2.0+ WITH Linux-syscall-note) OR BSD-3-Clause) */
/*
* ePAPR hcall interface
*
@@ -78,7 +79,7 @@
#define EV_SUCCESS 0
#define EV_EPERM 1 /* Operation not permitted */
#define EV_ENOENT 2 /* Entry Not Found */
-#define EV_EIO 3 /* I/O error occured */
+#define EV_EIO 3 /* I/O error occurred */
#define EV_EAGAIN 4 /* The operation had insufficient
* resources to complete and should be
* retried
@@ -89,7 +90,7 @@
#define EV_ENODEV 7 /* No such device */
#define EV_EINVAL 8 /* An argument supplied to the hcall
was out of range or invalid */
-#define EV_INTERNAL 9 /* An internal error occured */
+#define EV_INTERNAL 9 /* An internal error occurred */
#define EV_CONFIG 10 /* A configuration error was detected */
#define EV_INVALID_STATE 11 /* The object is in an invalid state */
#define EV_UNIMPLEMENTED 12 /* Unimplemented hypercall */
diff --git a/arch/powerpc/include/uapi/asm/errno.h b/arch/powerpc/include/uapi/asm/errno.h
index 8c145fd17d86..4ba87de32be0 100644
--- a/arch/powerpc/include/uapi/asm/errno.h
+++ b/arch/powerpc/include/uapi/asm/errno.h
@@ -1,11 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _ASM_POWERPC_ERRNO_H
#define _ASM_POWERPC_ERRNO_H
+#undef EDEADLOCK
#include <asm-generic/errno.h>
#undef EDEADLOCK
#define EDEADLOCK 58 /* File locking deadlock error */
-#define _LAST_ERRNO 516
-
#endif /* _ASM_POWERPC_ERRNO_H */
diff --git a/arch/powerpc/include/uapi/asm/fcntl.h b/arch/powerpc/include/uapi/asm/fcntl.h
index ce5c4516d404..65ce08322a89 100644
--- a/arch/powerpc/include/uapi/asm/fcntl.h
+++ b/arch/powerpc/include/uapi/asm/fcntl.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _ASM_FCNTL_H
#define _ASM_FCNTL_H
diff --git a/arch/powerpc/include/uapi/asm/ioctl.h b/arch/powerpc/include/uapi/asm/ioctl.h
index 57d68304218b..d623af4b9cd6 100644
--- a/arch/powerpc/include/uapi/asm/ioctl.h
+++ b/arch/powerpc/include/uapi/asm/ioctl.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _ASM_POWERPC_IOCTL_H
#define _ASM_POWERPC_IOCTL_H
diff --git a/arch/powerpc/include/uapi/asm/ioctls.h b/arch/powerpc/include/uapi/asm/ioctls.h
index 49a25796a61a..b5211e413829 100644
--- a/arch/powerpc/include/uapi/asm/ioctls.h
+++ b/arch/powerpc/include/uapi/asm/ioctls.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _ASM_POWERPC_IOCTLS_H
#define _ASM_POWERPC_IOCTLS_H
@@ -22,10 +23,10 @@
#define TCSETSW _IOW('t', 21, struct termios)
#define TCSETSF _IOW('t', 22, struct termios)
-#define TCGETA _IOR('t', 23, struct termio)
-#define TCSETA _IOW('t', 24, struct termio)
-#define TCSETAW _IOW('t', 25, struct termio)
-#define TCSETAF _IOW('t', 28, struct termio)
+#define TCGETA 0x40147417 /* _IOR('t', 23, struct termio) */
+#define TCSETA 0x80147418 /* _IOW('t', 24, struct termio) */
+#define TCSETAW 0x80147419 /* _IOW('t', 25, struct termio) */
+#define TCSETAF 0x8014741c /* _IOW('t', 28, struct termio) */
#define TCSBRK _IO('t', 29)
#define TCXONC _IO('t', 30)
@@ -100,6 +101,9 @@
#define TIOCGPKT _IOR('T', 0x38, int) /* Get packet mode state */
#define TIOCGPTLCK _IOR('T', 0x39, int) /* Get Pty lock state */
#define TIOCGEXCL _IOR('T', 0x40, int) /* Get exclusive mode state */
+#define TIOCGPTPEER _IO('T', 0x41) /* Safely open the slave */
+#define TIOCGISO7816 _IOR('T', 0x42, struct serial_iso7816)
+#define TIOCSISO7816 _IOWR('T', 0x43, struct serial_iso7816)
#define TIOCSERCONFIG 0x5453
#define TIOCSERGWILD 0x5454
diff --git a/arch/powerpc/include/uapi/asm/ipcbuf.h b/arch/powerpc/include/uapi/asm/ipcbuf.h
index 2c3e1d94db1d..21e1e0ec0ba2 100644
--- a/arch/powerpc/include/uapi/asm/ipcbuf.h
+++ b/arch/powerpc/include/uapi/asm/ipcbuf.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
#ifndef _ASM_POWERPC_IPCBUF_H
#define _ASM_POWERPC_IPCBUF_H
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index e0e49dbb145d..077c5437f521 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -1,17 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- *
* Copyright IBM Corp. 2007
*
* Authors: Hollis Blanchard <hollisb@us.ibm.com>
@@ -27,7 +15,9 @@
#define __KVM_HAVE_PPC_SMT
#define __KVM_HAVE_IRQCHIP
#define __KVM_HAVE_IRQ_LINE
-#define __KVM_HAVE_GUEST_DEBUG
+
+/* Not always available, but if it is, this is the correct offset. */
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
struct kvm_regs {
__u64 pc;
@@ -57,6 +47,12 @@ struct kvm_regs {
#define KVM_SREGS_E_FSL_PIDn (1 << 0) /* PID1/PID2 */
+/* flags for kvm_run.flags */
+#define KVM_RUN_PPC_NMI_DISP_MASK (3 << 0)
+#define KVM_RUN_PPC_NMI_DISP_FULLY_RECOV (1 << 0)
+#define KVM_RUN_PPC_NMI_DISP_LIMITED_RECOV (2 << 0)
+#define KVM_RUN_PPC_NMI_DISP_NOT_RECOV (3 << 0)
+
/*
* Feature bits indicate which sections of the sregs struct are valid,
* both in KVM_GET_SREGS and KVM_SET_SREGS. On KVM_SET_SREGS, registers
@@ -333,6 +329,15 @@ struct kvm_create_spapr_tce {
__u32 window_size;
};
+/* for KVM_CAP_SPAPR_TCE_64 */
+struct kvm_create_spapr_tce_64 {
+ __u64 liobn;
+ __u32 page_shift;
+ __u32 flags;
+ __u64 offset; /* in pages */
+ __u64 size; /* in pages */
+};
+
/* for KVM_ALLOCATE_RMA */
struct kvm_allocate_rma {
__u64 rma_size;
@@ -404,6 +409,53 @@ struct kvm_get_htab_header {
__u16 n_invalid;
};
+/* For KVM_PPC_CONFIGURE_V3_MMU */
+struct kvm_ppc_mmuv3_cfg {
+ __u64 flags;
+ __u64 process_table; /* second doubleword of partition table entry */
+};
+
+/* Flag values for KVM_PPC_CONFIGURE_V3_MMU */
+#define KVM_PPC_MMUV3_RADIX 1 /* 1 = radix mode, 0 = HPT */
+#define KVM_PPC_MMUV3_GTSE 2 /* global translation shootdown enb. */
+
+/* For KVM_PPC_GET_RMMU_INFO */
+struct kvm_ppc_rmmu_info {
+ struct kvm_ppc_radix_geom {
+ __u8 page_shift;
+ __u8 level_bits[4];
+ __u8 pad[3];
+ } geometries[8];
+ __u32 ap_encodings[8];
+};
+
+/* For KVM_PPC_GET_CPU_CHAR */
+struct kvm_ppc_cpu_char {
+ __u64 character; /* characteristics of the CPU */
+ __u64 behaviour; /* recommended software behaviour */
+ __u64 character_mask; /* valid bits in character */
+ __u64 behaviour_mask; /* valid bits in behaviour */
+};
+
+/*
+ * Values for character and character_mask.
+ * These are identical to the values used by H_GET_CPU_CHARACTERISTICS.
+ */
+#define KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31 (1ULL << 63)
+#define KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED (1ULL << 62)
+#define KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30 (1ULL << 61)
+#define KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2 (1ULL << 60)
+#define KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV (1ULL << 59)
+#define KVM_PPC_CPU_CHAR_BR_HINT_HONOURED (1ULL << 58)
+#define KVM_PPC_CPU_CHAR_MTTRIG_THR_RECONF (1ULL << 57)
+#define KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS (1ULL << 56)
+#define KVM_PPC_CPU_CHAR_BCCTR_FLUSH_ASSIST (1ull << 54)
+
+#define KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY (1ULL << 63)
+#define KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR (1ULL << 62)
+#define KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ULL << 61)
+#define KVM_PPC_CPU_BEHAV_FLUSH_COUNT_CACHE (1ull << 58)
+
/* Per-vcpu XICS interrupt controller state */
#define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c)
@@ -416,6 +468,8 @@ struct kvm_get_htab_header {
#define KVM_REG_PPC_ICP_PPRI_SHIFT 16 /* pending irq priority */
#define KVM_REG_PPC_ICP_PPRI_MASK 0xff
+#define KVM_REG_PPC_VP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x8d)
+
/* Device control API: PPC-specific devices */
#define KVM_DEV_MPIC_GRP_MISC 1
#define KVM_DEV_MPIC_BASE_ADDR 0 /* 64-bit */
@@ -476,6 +530,11 @@ struct kvm_get_htab_header {
/* FP and vector status/control registers */
#define KVM_REG_PPC_FPSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x80)
+/*
+ * VSCR register is documented as a 32-bit register in the ISA, but it can
+ * only be accesses via a vector register. Expose VSCR as a 32-bit register
+ * even though the kernel represents it as a 128-bit vector.
+ */
#define KVM_REG_PPC_VSCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x81)
/* Virtual processor areas */
@@ -557,6 +616,25 @@ struct kvm_get_htab_header {
#define KVM_REG_PPC_DABRX (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb8)
#define KVM_REG_PPC_WORT (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb9)
#define KVM_REG_PPC_SPRG9 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xba)
+#define KVM_REG_PPC_DBSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbb)
+
+/* POWER9 registers */
+#define KVM_REG_PPC_TIDR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbc)
+#define KVM_REG_PPC_PSSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd)
+
+#define KVM_REG_PPC_DEC_EXPIRY (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe)
+#define KVM_REG_PPC_ONLINE (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbf)
+#define KVM_REG_PPC_PTCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc0)
+
+/* POWER10 registers */
+#define KVM_REG_PPC_MMCR3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc1)
+#define KVM_REG_PPC_SIER2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc2)
+#define KVM_REG_PPC_SIER3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3)
+#define KVM_REG_PPC_DAWR1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc4)
+#define KVM_REG_PPC_DAWRX1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc5)
+#define KVM_REG_PPC_DEXCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc6)
+#define KVM_REG_PPC_HASHKEYR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc7)
+#define KVM_REG_PPC_HASHPKEYR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc8)
/* Transactional Memory checkpointed state:
* This is all GPRs, all VSX regs and a subset of SPRs
@@ -581,9 +659,12 @@ struct kvm_get_htab_header {
#define KVM_REG_PPC_TM_VSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U32 | 0x67)
#define KVM_REG_PPC_TM_DSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x68)
#define KVM_REG_PPC_TM_TAR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x69)
+#define KVM_REG_PPC_TM_XER (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x6a)
/* PPC64 eXternal Interrupt Controller Specification */
#define KVM_DEV_XICS_GRP_SOURCES 1 /* 64-bit source attributes */
+#define KVM_DEV_XICS_GRP_CTRL 2
+#define KVM_DEV_XICS_NR_SERVERS 1
/* Layout of 64-bit source attribute values */
#define KVM_XICS_DESTINATION_SHIFT 0
@@ -593,5 +674,96 @@ struct kvm_get_htab_header {
#define KVM_XICS_LEVEL_SENSITIVE (1ULL << 40)
#define KVM_XICS_MASKED (1ULL << 41)
#define KVM_XICS_PENDING (1ULL << 42)
+#define KVM_XICS_PRESENTED (1ULL << 43)
+#define KVM_XICS_QUEUED (1ULL << 44)
+
+/* POWER9 XIVE Native Interrupt Controller */
+#define KVM_DEV_XIVE_GRP_CTRL 1
+#define KVM_DEV_XIVE_RESET 1
+#define KVM_DEV_XIVE_EQ_SYNC 2
+#define KVM_DEV_XIVE_NR_SERVERS 3
+#define KVM_DEV_XIVE_GRP_SOURCE 2 /* 64-bit source identifier */
+#define KVM_DEV_XIVE_GRP_SOURCE_CONFIG 3 /* 64-bit source identifier */
+#define KVM_DEV_XIVE_GRP_EQ_CONFIG 4 /* 64-bit EQ identifier */
+#define KVM_DEV_XIVE_GRP_SOURCE_SYNC 5 /* 64-bit source identifier */
+
+/* Layout of 64-bit XIVE source attribute values */
+#define KVM_XIVE_LEVEL_SENSITIVE (1ULL << 0)
+#define KVM_XIVE_LEVEL_ASSERTED (1ULL << 1)
+
+/* Layout of 64-bit XIVE source configuration attribute values */
+#define KVM_XIVE_SOURCE_PRIORITY_SHIFT 0
+#define KVM_XIVE_SOURCE_PRIORITY_MASK 0x7
+#define KVM_XIVE_SOURCE_SERVER_SHIFT 3
+#define KVM_XIVE_SOURCE_SERVER_MASK 0xfffffff8ULL
+#define KVM_XIVE_SOURCE_MASKED_SHIFT 32
+#define KVM_XIVE_SOURCE_MASKED_MASK 0x100000000ULL
+#define KVM_XIVE_SOURCE_EISN_SHIFT 33
+#define KVM_XIVE_SOURCE_EISN_MASK 0xfffffffe00000000ULL
+
+/* Layout of 64-bit EQ identifier */
+#define KVM_XIVE_EQ_PRIORITY_SHIFT 0
+#define KVM_XIVE_EQ_PRIORITY_MASK 0x7
+#define KVM_XIVE_EQ_SERVER_SHIFT 3
+#define KVM_XIVE_EQ_SERVER_MASK 0xfffffff8ULL
+
+/* Layout of EQ configuration values (64 bytes) */
+struct kvm_ppc_xive_eq {
+ __u32 flags;
+ __u32 qshift;
+ __u64 qaddr;
+ __u32 qtoggle;
+ __u32 qindex;
+ __u8 pad[40];
+};
+
+#define KVM_XIVE_EQ_ALWAYS_NOTIFY 0x00000001
+
+#define KVM_XIVE_TIMA_PAGE_OFFSET 0
+#define KVM_XIVE_ESB_PAGE_OFFSET 4
+
+/* for KVM_PPC_GET_PVINFO */
+
+#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0)
+
+struct kvm_ppc_pvinfo {
+ /* out */
+ __u32 flags;
+ __u32 hcall[4];
+ __u8 pad[108];
+};
+
+/* for KVM_PPC_GET_SMMU_INFO */
+#define KVM_PPC_PAGE_SIZES_MAX_SZ 8
+
+struct kvm_ppc_one_page_size {
+ __u32 page_shift; /* Page shift (or 0) */
+ __u32 pte_enc; /* Encoding in the HPTE (>>12) */
+};
+
+struct kvm_ppc_one_seg_page_size {
+ __u32 page_shift; /* Base page shift of segment (or 0) */
+ __u32 slb_enc; /* SLB encoding for BookS */
+ struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ];
+};
+
+#define KVM_PPC_PAGE_SIZES_REAL 0x00000001
+#define KVM_PPC_1T_SEGMENTS 0x00000002
+#define KVM_PPC_NO_HASH 0x00000004
+
+struct kvm_ppc_smmu_info {
+ __u64 flags;
+ __u32 slb_size;
+ __u16 data_keys; /* # storage keys supported for data */
+ __u16 instr_keys; /* # storage keys supported for instructions */
+ struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ];
+};
+
+/* for KVM_PPC_RESIZE_HPT_{PREPARE,COMMIT} */
+struct kvm_ppc_resize_hpt {
+ __u64 flags;
+ __u32 shift;
+ __u32 pad;
+};
#endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/include/uapi/asm/kvm_para.h b/arch/powerpc/include/uapi/asm/kvm_para.h
index 91e42f09b323..ac596064d4c7 100644
--- a/arch/powerpc/include/uapi/asm/kvm_para.h
+++ b/arch/powerpc/include/uapi/asm/kvm_para.h
@@ -1,17 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- *
* Copyright IBM Corp. 2008
*
* Authors: Hollis Blanchard <hollisb@us.ibm.com>
@@ -30,7 +18,7 @@
* Struct fields are always 32 or 64 bit aligned, depending on them being 32
* or 64 bit wide respectively.
*
- * See Documentation/virtual/kvm/ppc-pv.txt
+ * See Documentation/virt/kvm/ppc-pv.rst
*/
struct kvm_vcpu_arch_shared {
__u64 scratch1;
diff --git a/arch/powerpc/include/uapi/asm/mman.h b/arch/powerpc/include/uapi/asm/mman.h
index 6ea26df0a73c..c0c737215b00 100644
--- a/arch/powerpc/include/uapi/asm/mman.h
+++ b/arch/powerpc/include/uapi/asm/mman.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
/*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -20,12 +21,15 @@
#define MAP_DENYWRITE 0x0800 /* ETXTBSY */
#define MAP_EXECUTABLE 0x1000 /* mark it as an executable */
+
#define MCL_CURRENT 0x2000 /* lock all currently mapped pages */
#define MCL_FUTURE 0x4000 /* lock all additions to address space */
-
-#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */
-#define MAP_NONBLOCK 0x10000 /* do not block on IO */
-#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */
-#define MAP_HUGETLB 0x40000 /* create a huge page mapping */
-
+#define MCL_ONFAULT 0x8000 /* lock all pages that are faulted in */
+
+/* Override any generic PKEY permission defines */
+#define PKEY_DISABLE_EXECUTE 0x4
+#undef PKEY_ACCESS_MASK
+#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\
+ PKEY_DISABLE_WRITE |\
+ PKEY_DISABLE_EXECUTE)
#endif /* _UAPI_ASM_POWERPC_MMAN_H */
diff --git a/arch/powerpc/include/uapi/asm/msgbuf.h b/arch/powerpc/include/uapi/asm/msgbuf.h
index dd76743c7537..7919b2ba41b5 100644
--- a/arch/powerpc/include/uapi/asm/msgbuf.h
+++ b/arch/powerpc/include/uapi/asm/msgbuf.h
@@ -1,6 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _ASM_POWERPC_MSGBUF_H
#define _ASM_POWERPC_MSGBUF_H
+#include <asm/ipcbuf.h>
+
/*
* The msqid64_ds structure for the PowerPC architecture.
* Note extra padding because this structure is passed back and forth
@@ -9,18 +12,18 @@
struct msqid64_ds {
struct ipc64_perm msg_perm;
-#ifndef __powerpc64__
- unsigned int __unused1;
-#endif
- __kernel_time_t msg_stime; /* last msgsnd time */
-#ifndef __powerpc64__
- unsigned int __unused2;
-#endif
- __kernel_time_t msg_rtime; /* last msgrcv time */
-#ifndef __powerpc64__
- unsigned int __unused3;
+#ifdef __powerpc64__
+ long msg_stime; /* last msgsnd time */
+ long msg_rtime; /* last msgrcv time */
+ long msg_ctime; /* last change time */
+#else
+ unsigned long msg_stime_high;
+ unsigned long msg_stime; /* last msgsnd time */
+ unsigned long msg_rtime_high;
+ unsigned long msg_rtime; /* last msgrcv time */
+ unsigned long msg_ctime_high;
+ unsigned long msg_ctime; /* last change time */
#endif
- __kernel_time_t msg_ctime; /* last change time */
unsigned long msg_cbytes; /* current number of bytes on queue */
unsigned long msg_qnum; /* number of messages in queue */
unsigned long msg_qbytes; /* max number of bytes on queue */
diff --git a/arch/powerpc/include/uapi/asm/nvram.h b/arch/powerpc/include/uapi/asm/nvram.h
index 608bdc8aedd1..c92c7f056a91 100644
--- a/arch/powerpc/include/uapi/asm/nvram.h
+++ b/arch/powerpc/include/uapi/asm/nvram.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
/*
* NVRAM definitions and access functions.
*
diff --git a/arch/powerpc/include/uapi/asm/opal-prd.h b/arch/powerpc/include/uapi/asm/opal-prd.h
new file mode 100644
index 000000000000..11abcf0192ca
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/opal-prd.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * OPAL Runtime Diagnostics interface driver
+ * Supported on POWERNV platform
+ *
+ * (C) Copyright IBM 2015
+ *
+ * Author: Vaidyanathan Srinivasan <svaidy at linux.vnet.ibm.com>
+ * Author: Jeremy Kerr <jk@ozlabs.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _UAPI_ASM_POWERPC_OPAL_PRD_H_
+#define _UAPI_ASM_POWERPC_OPAL_PRD_H_
+
+#include <linux/types.h>
+
+/**
+ * The version of the kernel interface of the PRD system. This describes the
+ * interface available for the /dev/opal-prd device. The actual PRD message
+ * layout and content is private to the firmware <--> userspace interface, so
+ * is not covered by this versioning.
+ *
+ * Future interface versions are backwards-compatible; if a later kernel
+ * version is encountered, functionality provided in earlier versions
+ * will work.
+ */
+#define OPAL_PRD_KERNEL_VERSION 1
+
+#define OPAL_PRD_GET_INFO _IOR('o', 0x01, struct opal_prd_info)
+#define OPAL_PRD_SCOM_READ _IOR('o', 0x02, struct opal_prd_scom)
+#define OPAL_PRD_SCOM_WRITE _IOW('o', 0x03, struct opal_prd_scom)
+
+#ifndef __ASSEMBLER__
+
+struct opal_prd_info {
+ __u64 version;
+ __u64 reserved[3];
+};
+
+struct opal_prd_scom {
+ __u64 chip;
+ __u64 addr;
+ __u64 data;
+ __s64 rc;
+};
+
+#endif /* __ASSEMBLER__ */
+
+#endif /* _UAPI_ASM_POWERPC_OPAL_PRD_H */
diff --git a/arch/powerpc/include/uapi/asm/papr-hvpipe.h b/arch/powerpc/include/uapi/asm/papr-hvpipe.h
new file mode 100644
index 000000000000..f8794139d06a
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/papr-hvpipe.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_PAPR_HVPIPE_H_
+#define _UAPI_PAPR_HVPIPE_H_
+
+#include <linux/types.h>
+#include <asm/ioctl.h>
+#include <asm/papr-miscdev.h>
+
+/*
+ * This header is included in payload between OS and the user
+ * space.
+ * flags: OS notifies the user space whether the hvpipe is
+ * closed or the buffer has the payload.
+ */
+struct papr_hvpipe_hdr {
+ __u8 version;
+ __u8 reserved[3];
+ __u32 flags;
+ __u8 reserved2[40];
+};
+
+/*
+ * ioctl for /dev/papr-hvpipe
+ */
+#define PAPR_HVPIPE_IOC_CREATE_HANDLE _IOW(PAPR_MISCDEV_IOC_ID, 9, __u32)
+
+/*
+ * hvpipe_hdr flags used for read()
+ */
+#define HVPIPE_MSG_AVAILABLE 0x01 /* Payload is available */
+#define HVPIPE_LOST_CONNECTION 0x02 /* Pipe connection is closed/unavailable */
+
+#endif /* _UAPI_PAPR_HVPIPE_H_ */
diff --git a/arch/powerpc/include/uapi/asm/papr-indices.h b/arch/powerpc/include/uapi/asm/papr-indices.h
new file mode 100644
index 000000000000..c2999d89d52a
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/papr-indices.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_PAPR_INDICES_H_
+#define _UAPI_PAPR_INDICES_H_
+
+#include <linux/types.h>
+#include <asm/ioctl.h>
+#include <asm/papr-miscdev.h>
+
+#define LOC_CODE_SIZE 80
+#define RTAS_GET_INDICES_BUF_SIZE SZ_4K
+
+struct papr_indices_io_block {
+ union {
+ struct {
+ __u8 is_sensor; /* 0 for indicator and 1 for sensor */
+ __u32 indice_type;
+ } indices;
+ struct {
+ __u32 token; /* Sensor or indicator token */
+ __u32 state; /* get / set state */
+ /*
+ * PAPR+ 12.3.2.4 Converged Location Code Rules - Length
+ * Restrictions. 79 characters plus null.
+ */
+ char location_code_str[LOC_CODE_SIZE]; /* location code */
+ } dynamic_param;
+ };
+};
+
+/*
+ * ioctls for /dev/papr-indices.
+ * PAPR_INDICES_IOC_GET: Returns a get-indices handle fd to read data
+ * PAPR_DYNAMIC_SENSOR_IOC_GET: Gets the state of the input sensor
+ * PAPR_DYNAMIC_INDICATOR_IOC_SET: Sets the new state for the input indicator
+ */
+#define PAPR_INDICES_IOC_GET _IOW(PAPR_MISCDEV_IOC_ID, 3, struct papr_indices_io_block)
+#define PAPR_DYNAMIC_SENSOR_IOC_GET _IOWR(PAPR_MISCDEV_IOC_ID, 4, struct papr_indices_io_block)
+#define PAPR_DYNAMIC_INDICATOR_IOC_SET _IOW(PAPR_MISCDEV_IOC_ID, 5, struct papr_indices_io_block)
+
+
+#endif /* _UAPI_PAPR_INDICES_H_ */
diff --git a/arch/powerpc/include/uapi/asm/papr-miscdev.h b/arch/powerpc/include/uapi/asm/papr-miscdev.h
new file mode 100644
index 000000000000..49a2a270b7f3
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/papr-miscdev.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_PAPR_MISCDEV_H_
+#define _UAPI_PAPR_MISCDEV_H_
+
+enum {
+ PAPR_MISCDEV_IOC_ID = 0xb2,
+};
+
+#endif /* _UAPI_PAPR_MISCDEV_H_ */
diff --git a/arch/powerpc/include/uapi/asm/papr-physical-attestation.h b/arch/powerpc/include/uapi/asm/papr-physical-attestation.h
new file mode 100644
index 000000000000..ea746837bb9a
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/papr-physical-attestation.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_PAPR_PHYSICAL_ATTESTATION_H_
+#define _UAPI_PAPR_PHYSICAL_ATTESTATION_H_
+
+#include <linux/types.h>
+#include <asm/ioctl.h>
+#include <asm/papr-miscdev.h>
+
+#define PAPR_PHYATTEST_MAX_INPUT 4084 /* Max 4K buffer: 4K-12 */
+
+/*
+ * Defined in PAPR 2.13+ 21.6 Attestation Command Structures.
+ * User space pass this struct and the max size should be 4K.
+ */
+struct papr_phy_attest_io_block {
+ __u8 version;
+ __u8 command;
+ __u8 TCG_major_ver;
+ __u8 TCG_minor_ver;
+ __be32 length;
+ __be32 correlator;
+ __u8 payload[PAPR_PHYATTEST_MAX_INPUT];
+};
+
+/*
+ * ioctl for /dev/papr-physical-attestation. Returns a attestation
+ * command fd handle
+ */
+#define PAPR_PHY_ATTEST_IOC_HANDLE _IOW(PAPR_MISCDEV_IOC_ID, 8, struct papr_phy_attest_io_block)
+
+#endif /* _UAPI_PAPR_PHYSICAL_ATTESTATION_H_ */
diff --git a/arch/powerpc/include/uapi/asm/papr-platform-dump.h b/arch/powerpc/include/uapi/asm/papr-platform-dump.h
new file mode 100644
index 000000000000..8a1c060e89a9
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/papr-platform-dump.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_PAPR_PLATFORM_DUMP_H_
+#define _UAPI_PAPR_PLATFORM_DUMP_H_
+
+#include <linux/types.h>
+#include <asm/ioctl.h>
+#include <asm/papr-miscdev.h>
+
+/*
+ * ioctl for /dev/papr-platform-dump. Returns a platform-dump handle fd
+ * corresponding to dump tag.
+ */
+#define PAPR_PLATFORM_DUMP_IOC_CREATE_HANDLE _IOW(PAPR_MISCDEV_IOC_ID, 6, __u64)
+#define PAPR_PLATFORM_DUMP_IOC_INVALIDATE _IOW(PAPR_MISCDEV_IOC_ID, 7, __u64)
+
+#endif /* _UAPI_PAPR_PLATFORM_DUMP_H_ */
diff --git a/arch/powerpc/include/uapi/asm/papr-sysparm.h b/arch/powerpc/include/uapi/asm/papr-sysparm.h
new file mode 100644
index 000000000000..f733467b1534
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/papr-sysparm.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_PAPR_SYSPARM_H_
+#define _UAPI_PAPR_SYSPARM_H_
+
+#include <linux/types.h>
+#include <asm/ioctl.h>
+#include <asm/papr-miscdev.h>
+
+enum {
+ PAPR_SYSPARM_MAX_INPUT = 1024,
+ PAPR_SYSPARM_MAX_OUTPUT = 4000,
+};
+
+struct papr_sysparm_io_block {
+ __u32 parameter;
+ __u16 length;
+ __u8 data[PAPR_SYSPARM_MAX_OUTPUT];
+};
+
+/**
+ * PAPR_SYSPARM_IOC_GET - Retrieve the value of a PAPR system parameter.
+ *
+ * Uses _IOWR because of one corner case: Retrieving the value of the
+ * "OS Service Entitlement Status" parameter (60) requires the caller
+ * to supply input data (a date string) in the buffer passed to
+ * firmware. So the @length and @data of the incoming
+ * papr_sysparm_io_block are always used to initialize the work area
+ * supplied to ibm,get-system-parameter. No other parameters are known
+ * to parameterize the result this way, and callers are encouraged
+ * (but not required) to zero-initialize @length and @data in the
+ * common case.
+ *
+ * On error the contents of the ioblock are indeterminate.
+ *
+ * Return:
+ * 0: Success; @length is the length of valid data in @data, not to exceed @PAPR_SYSPARM_MAX_OUTPUT.
+ * -EIO: Platform error. (-1)
+ * -EINVAL: Incorrect data length or format. (-9999)
+ * -EPERM: The calling partition is not allowed to access this parameter. (-9002)
+ * -EOPNOTSUPP: Parameter not supported on this platform (-3)
+ */
+#define PAPR_SYSPARM_IOC_GET _IOWR(PAPR_MISCDEV_IOC_ID, 1, struct papr_sysparm_io_block)
+
+/**
+ * PAPR_SYSPARM_IOC_SET - Update the value of a PAPR system parameter.
+ *
+ * The contents of the ioblock are unchanged regardless of success.
+ *
+ * Return:
+ * 0: Success; the parameter has been updated.
+ * -EIO: Platform error. (-1)
+ * -EINVAL: Incorrect data length or format. (-9999)
+ * -EPERM: The calling partition is not allowed to access this parameter. (-9002)
+ * -EOPNOTSUPP: Parameter not supported on this platform (-3)
+ */
+#define PAPR_SYSPARM_IOC_SET _IOW(PAPR_MISCDEV_IOC_ID, 2, struct papr_sysparm_io_block)
+
+#endif /* _UAPI_PAPR_SYSPARM_H_ */
diff --git a/arch/powerpc/include/uapi/asm/papr-vpd.h b/arch/powerpc/include/uapi/asm/papr-vpd.h
new file mode 100644
index 000000000000..1c88e87cb420
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/papr-vpd.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_PAPR_VPD_H_
+#define _UAPI_PAPR_VPD_H_
+
+#include <asm/ioctl.h>
+#include <asm/papr-miscdev.h>
+
+struct papr_location_code {
+ /*
+ * PAPR+ v2.13 12.3.2.4 Converged Location Code Rules - Length
+ * Restrictions. 79 characters plus nul.
+ */
+ char str[80];
+};
+
+/*
+ * ioctl for /dev/papr-vpd. Returns a VPD handle fd corresponding to
+ * the location code.
+ */
+#define PAPR_VPD_IOC_CREATE_HANDLE _IOW(PAPR_MISCDEV_IOC_ID, 0, struct papr_location_code)
+
+#endif /* _UAPI_PAPR_VPD_H_ */
diff --git a/arch/powerpc/include/uapi/asm/param.h b/arch/powerpc/include/uapi/asm/param.h
deleted file mode 100644
index 965d45427975..000000000000
--- a/arch/powerpc/include/uapi/asm/param.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/param.h>
diff --git a/arch/powerpc/include/uapi/asm/perf_event.h b/arch/powerpc/include/uapi/asm/perf_event.h
index 80a4d40cf5bc..ce488e48db44 100644
--- a/arch/powerpc/include/uapi/asm/perf_event.h
+++ b/arch/powerpc/include/uapi/asm/perf_event.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
* Copyright 2013 Michael Ellerman, IBM Corp.
*
diff --git a/arch/powerpc/include/uapi/asm/perf_regs.h b/arch/powerpc/include/uapi/asm/perf_regs.h
new file mode 100644
index 000000000000..749a2e3af89e
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/perf_regs.h
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_ASM_POWERPC_PERF_REGS_H
+#define _UAPI_ASM_POWERPC_PERF_REGS_H
+
+enum perf_event_powerpc_regs {
+ PERF_REG_POWERPC_R0,
+ PERF_REG_POWERPC_R1,
+ PERF_REG_POWERPC_R2,
+ PERF_REG_POWERPC_R3,
+ PERF_REG_POWERPC_R4,
+ PERF_REG_POWERPC_R5,
+ PERF_REG_POWERPC_R6,
+ PERF_REG_POWERPC_R7,
+ PERF_REG_POWERPC_R8,
+ PERF_REG_POWERPC_R9,
+ PERF_REG_POWERPC_R10,
+ PERF_REG_POWERPC_R11,
+ PERF_REG_POWERPC_R12,
+ PERF_REG_POWERPC_R13,
+ PERF_REG_POWERPC_R14,
+ PERF_REG_POWERPC_R15,
+ PERF_REG_POWERPC_R16,
+ PERF_REG_POWERPC_R17,
+ PERF_REG_POWERPC_R18,
+ PERF_REG_POWERPC_R19,
+ PERF_REG_POWERPC_R20,
+ PERF_REG_POWERPC_R21,
+ PERF_REG_POWERPC_R22,
+ PERF_REG_POWERPC_R23,
+ PERF_REG_POWERPC_R24,
+ PERF_REG_POWERPC_R25,
+ PERF_REG_POWERPC_R26,
+ PERF_REG_POWERPC_R27,
+ PERF_REG_POWERPC_R28,
+ PERF_REG_POWERPC_R29,
+ PERF_REG_POWERPC_R30,
+ PERF_REG_POWERPC_R31,
+ PERF_REG_POWERPC_NIP,
+ PERF_REG_POWERPC_MSR,
+ PERF_REG_POWERPC_ORIG_R3,
+ PERF_REG_POWERPC_CTR,
+ PERF_REG_POWERPC_LINK,
+ PERF_REG_POWERPC_XER,
+ PERF_REG_POWERPC_CCR,
+ PERF_REG_POWERPC_SOFTE,
+ PERF_REG_POWERPC_TRAP,
+ PERF_REG_POWERPC_DAR,
+ PERF_REG_POWERPC_DSISR,
+ PERF_REG_POWERPC_SIER,
+ PERF_REG_POWERPC_MMCRA,
+ /* Extended registers */
+ PERF_REG_POWERPC_MMCR0,
+ PERF_REG_POWERPC_MMCR1,
+ PERF_REG_POWERPC_MMCR2,
+ PERF_REG_POWERPC_MMCR3,
+ PERF_REG_POWERPC_SIER2,
+ PERF_REG_POWERPC_SIER3,
+ PERF_REG_POWERPC_PMC1,
+ PERF_REG_POWERPC_PMC2,
+ PERF_REG_POWERPC_PMC3,
+ PERF_REG_POWERPC_PMC4,
+ PERF_REG_POWERPC_PMC5,
+ PERF_REG_POWERPC_PMC6,
+ PERF_REG_POWERPC_SDAR,
+ PERF_REG_POWERPC_SIAR,
+ /* Max mask value for interrupt regs w/o extended regs */
+ PERF_REG_POWERPC_MAX = PERF_REG_POWERPC_MMCRA + 1,
+ /* Max mask value for interrupt regs including extended regs */
+ PERF_REG_EXTENDED_MAX = PERF_REG_POWERPC_SIAR + 1,
+};
+
+#define PERF_REG_PMU_MASK ((1ULL << PERF_REG_POWERPC_MAX) - 1)
+
+/*
+ * PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300
+ * includes 11 SPRS from MMCR0 to SIAR excluding the
+ * unsupported SPRS MMCR3, SIER2 and SIER3.
+ */
+#define PERF_REG_PMU_MASK_300 \
+ ((1ULL << PERF_REG_POWERPC_MMCR0) | (1ULL << PERF_REG_POWERPC_MMCR1) | \
+ (1ULL << PERF_REG_POWERPC_MMCR2) | (1ULL << PERF_REG_POWERPC_PMC1) | \
+ (1ULL << PERF_REG_POWERPC_PMC2) | (1ULL << PERF_REG_POWERPC_PMC3) | \
+ (1ULL << PERF_REG_POWERPC_PMC4) | (1ULL << PERF_REG_POWERPC_PMC5) | \
+ (1ULL << PERF_REG_POWERPC_PMC6) | (1ULL << PERF_REG_POWERPC_SDAR) | \
+ (1ULL << PERF_REG_POWERPC_SIAR))
+
+/*
+ * PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_31
+ * includes 14 SPRs from MMCR0 to SIAR.
+ */
+#define PERF_REG_PMU_MASK_31 \
+ (PERF_REG_PMU_MASK_300 | (1ULL << PERF_REG_POWERPC_MMCR3) | \
+ (1ULL << PERF_REG_POWERPC_SIER2) | (1ULL << PERF_REG_POWERPC_SIER3))
+
+#endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */
diff --git a/arch/powerpc/include/uapi/asm/poll.h b/arch/powerpc/include/uapi/asm/poll.h
deleted file mode 100644
index c98509d3149e..000000000000
--- a/arch/powerpc/include/uapi/asm/poll.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/poll.h>
diff --git a/arch/powerpc/include/uapi/asm/posix_types.h b/arch/powerpc/include/uapi/asm/posix_types.h
index 2958c5b97b2d..9c0342312544 100644
--- a/arch/powerpc/include/uapi/asm/posix_types.h
+++ b/arch/powerpc/include/uapi/asm/posix_types.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _ASM_POWERPC_POSIX_TYPES_H
#define _ASM_POWERPC_POSIX_TYPES_H
@@ -11,11 +12,6 @@
typedef unsigned long __kernel_old_dev_t;
#define __kernel_old_dev_t __kernel_old_dev_t
#else
-typedef unsigned int __kernel_size_t;
-typedef int __kernel_ssize_t;
-typedef long __kernel_ptrdiff_t;
-#define __kernel_size_t __kernel_size_t
-
typedef short __kernel_ipc_pid_t;
#define __kernel_ipc_pid_t __kernel_ipc_pid_t
#endif
diff --git a/arch/powerpc/include/uapi/asm/ps3fb.h b/arch/powerpc/include/uapi/asm/ps3fb.h
index e7233a849680..b1c6b0cd9e80 100644
--- a/arch/powerpc/include/uapi/asm/ps3fb.h
+++ b/arch/powerpc/include/uapi/asm/ps3fb.h
@@ -1,19 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
* Copyright (C) 2006 Sony Computer Entertainment Inc.
* Copyright 2006, 2007 Sony Corporation
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published
- * by the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#ifndef _ASM_POWERPC_PS3FB_H_
diff --git a/arch/powerpc/include/uapi/asm/ptrace.h b/arch/powerpc/include/uapi/asm/ptrace.h
index 77d2ed35b111..01e630149d48 100644
--- a/arch/powerpc/include/uapi/asm/ptrace.h
+++ b/arch/powerpc/include/uapi/asm/ptrace.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
/*
* Copyright (C) 2001 PPC64 Team, IBM Corp
*
@@ -26,9 +27,14 @@
#include <linux/types.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
-struct pt_regs {
+#ifdef __KERNEL__
+struct user_pt_regs
+#else
+struct pt_regs
+#endif
+{
unsigned long gpr[32];
unsigned long nip;
unsigned long msr;
@@ -51,7 +57,7 @@ struct pt_regs {
unsigned long result; /* Result of a system call */
};
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
/*
@@ -136,7 +142,7 @@ struct pt_regs {
#endif /* __powerpc64__ */
/*
- * Get/set all the altivec registers vr0..vr31, vscr, vrsave, in one go.
+ * Get/set all the altivec registers v0..v31, vscr, vrsave, in one go.
* The transfer totals 34 quadword. Quadwords 0-31 contain the
* corresponding vector registers. Quadword 32 contains the vscr as the
* last word (offset 12) within that quadword. Quadword 33 contains the
@@ -159,6 +165,10 @@ struct pt_regs {
#define PTRACE_GETVSRREGS 0x1b
#define PTRACE_SETVSRREGS 0x1c
+/* Syscall emulation defines */
+#define PTRACE_SYSEMU 0x1d
+#define PTRACE_SYSEMU_SINGLESTEP 0x1e
+
/*
* Get or set a debug register. The first 16 are DABR registers and the
* second 16 are IABR registers.
@@ -190,7 +200,7 @@ struct pt_regs {
#define PPC_PTRACE_SETHWDEBUG 0x88
#define PPC_PTRACE_DELHWDEBUG 0x87
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
struct ppc_debug_info {
__u32 version; /* Only version 1 exists to date */
@@ -202,7 +212,7 @@ struct ppc_debug_info {
__u64 features;
};
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
/*
* features will have bits indication whether there is support for:
@@ -212,8 +222,9 @@ struct ppc_debug_info {
#define PPC_DEBUG_FEATURE_DATA_BP_RANGE 0x0000000000000004
#define PPC_DEBUG_FEATURE_DATA_BP_MASK 0x0000000000000008
#define PPC_DEBUG_FEATURE_DATA_BP_DAWR 0x0000000000000010
+#define PPC_DEBUG_FEATURE_DATA_BP_ARCH_31 0x0000000000000020
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
struct ppc_hw_breakpoint {
__u32 version; /* currently, version must be 1 */
@@ -225,7 +236,7 @@ struct ppc_hw_breakpoint {
__u64 condition_value; /* contents of the DVC register */
};
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
/*
* Trigger Type
diff --git a/arch/powerpc/include/uapi/asm/resource.h b/arch/powerpc/include/uapi/asm/resource.h
deleted file mode 100644
index 04bc4db8921b..000000000000
--- a/arch/powerpc/include/uapi/asm/resource.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/resource.h>
diff --git a/arch/powerpc/include/uapi/asm/seccomp.h b/arch/powerpc/include/uapi/asm/seccomp.h
deleted file mode 100644
index 00c1d9133cfe..000000000000
--- a/arch/powerpc/include/uapi/asm/seccomp.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef _ASM_POWERPC_SECCOMP_H
-#define _ASM_POWERPC_SECCOMP_H
-
-#include <linux/unistd.h>
-
-#define __NR_seccomp_read __NR_read
-#define __NR_seccomp_write __NR_write
-#define __NR_seccomp_exit __NR_exit
-#define __NR_seccomp_sigreturn __NR_rt_sigreturn
-
-#define __NR_seccomp_read_32 __NR_read
-#define __NR_seccomp_write_32 __NR_write
-#define __NR_seccomp_exit_32 __NR_exit
-#define __NR_seccomp_sigreturn_32 __NR_sigreturn
-
-#endif /* _ASM_POWERPC_SECCOMP_H */
diff --git a/arch/powerpc/include/uapi/asm/sembuf.h b/arch/powerpc/include/uapi/asm/sembuf.h
index 99a41938ae3d..85e96ccb5f0f 100644
--- a/arch/powerpc/include/uapi/asm/sembuf.h
+++ b/arch/powerpc/include/uapi/asm/sembuf.h
@@ -1,6 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
#ifndef _ASM_POWERPC_SEMBUF_H
#define _ASM_POWERPC_SEMBUF_H
+#include <asm/ipcbuf.h>
+
/*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -14,20 +17,20 @@
* between kernel and user space.
*
* Pad space is left for:
- * - 64-bit time_t to solve y2038 problem
- * - 2 miscellaneous 32-bit values
+ * - 2 miscellaneous 32/64-bit values
*/
struct semid64_ds {
struct ipc64_perm sem_perm; /* permissions .. see ipc.h */
#ifndef __powerpc64__
- unsigned long __unused1;
-#endif
- __kernel_time_t sem_otime; /* last semop time */
-#ifndef __powerpc64__
- unsigned long __unused2;
+ unsigned long sem_otime_high;
+ unsigned long sem_otime; /* last semop time */
+ unsigned long sem_ctime_high;
+ unsigned long sem_ctime; /* last change time */
+#else
+ long sem_otime; /* last semop time */
+ long sem_ctime; /* last change time */
#endif
- __kernel_time_t sem_ctime; /* last change time */
unsigned long sem_nsems; /* no. of semaphores in array */
unsigned long __unused3;
unsigned long __unused4;
diff --git a/arch/powerpc/include/uapi/asm/setup.h b/arch/powerpc/include/uapi/asm/setup.h
index ae3fb68cb28e..c54940b09d06 100644
--- a/arch/powerpc/include/uapi/asm/setup.h
+++ b/arch/powerpc/include/uapi/asm/setup.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _UAPI_ASM_POWERPC_SETUP_H
#define _UAPI_ASM_POWERPC_SETUP_H
diff --git a/arch/powerpc/include/uapi/asm/shmbuf.h b/arch/powerpc/include/uapi/asm/shmbuf.h
index 8efa39698b6c..439a3a02ba64 100644
--- a/arch/powerpc/include/uapi/asm/shmbuf.h
+++ b/arch/powerpc/include/uapi/asm/shmbuf.h
@@ -1,6 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
#ifndef _ASM_POWERPC_SHMBUF_H
#define _ASM_POWERPC_SHMBUF_H
+#include <asm/ipcbuf.h>
+#include <asm/posix_types.h>
+
/*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -15,28 +19,25 @@
* between kernel and user space.
*
* Pad space is left for:
- * - 64-bit time_t to solve y2038 problem
* - 2 miscellaneous 32-bit values
*/
struct shmid64_ds {
struct ipc64_perm shm_perm; /* operation perms */
-#ifndef __powerpc64__
- unsigned long __unused1;
-#endif
- __kernel_time_t shm_atime; /* last attach time */
-#ifndef __powerpc64__
- unsigned long __unused2;
-#endif
- __kernel_time_t shm_dtime; /* last detach time */
-#ifndef __powerpc64__
- unsigned long __unused3;
-#endif
- __kernel_time_t shm_ctime; /* last change time */
-#ifndef __powerpc64__
+#ifdef __powerpc64__
+ long shm_atime; /* last attach time */
+ long shm_dtime; /* last detach time */
+ long shm_ctime; /* last change time */
+#else
+ unsigned long shm_atime_high;
+ unsigned long shm_atime; /* last attach time */
+ unsigned long shm_dtime_high;
+ unsigned long shm_dtime; /* last detach time */
+ unsigned long shm_ctime_high;
+ unsigned long shm_ctime; /* last change time */
unsigned long __unused4;
#endif
- size_t shm_segsz; /* size of segment (bytes) */
+ __kernel_size_t shm_segsz; /* size of segment (bytes) */
__kernel_pid_t shm_cpid; /* pid of creator */
__kernel_pid_t shm_lpid; /* pid of last operator */
unsigned long shm_nattch; /* no. of current attaches */
diff --git a/arch/powerpc/include/uapi/asm/sigcontext.h b/arch/powerpc/include/uapi/asm/sigcontext.h
index 9c1f24fd5d11..630aeda56d59 100644
--- a/arch/powerpc/include/uapi/asm/sigcontext.h
+++ b/arch/powerpc/include/uapi/asm/sigcontext.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
#ifndef _ASM_POWERPC_SIGCONTEXT_H
#define _ASM_POWERPC_SIGCONTEXT_H
@@ -21,14 +22,18 @@ struct sigcontext {
#endif
unsigned long handler;
unsigned long oldmask;
- struct pt_regs __user *regs;
+#ifdef __KERNEL__
+ struct user_pt_regs __user *regs;
+#else
+ struct pt_regs *regs;
+#endif
#ifdef __powerpc64__
elf_gregset_t gp_regs;
elf_fpregset_t fp_regs;
/*
* To maintain compatibility with current implementations the sigcontext is
* extended by appending a pointer (v_regs) to a quadword type (elf_vrreg_t)
- * followed by an unstructured (vmx_reserve) field of 69 doublewords. This
+ * followed by an unstructured (vmx_reserve) field of 101 doublewords. This
* allows the array of vector registers to be quadword aligned independent of
* the alignment of the containing sigcontext or ucontext. It is the
* responsibility of the code setting the sigcontext to set this pointer to
@@ -80,7 +85,7 @@ struct sigcontext {
* registers and vscr/vrsave.
*/
elf_vrreg_t __user *v_regs;
- long vmx_reserve[ELF_NVRREG+ELF_NVRREG+32+1];
+ long vmx_reserve[ELF_NVRREG + ELF_NVRREG + 1 + 32];
#endif
};
diff --git a/arch/powerpc/include/uapi/asm/siginfo.h b/arch/powerpc/include/uapi/asm/siginfo.h
deleted file mode 100644
index ccce3ef5cd86..000000000000
--- a/arch/powerpc/include/uapi/asm/siginfo.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef _ASM_POWERPC_SIGINFO_H
-#define _ASM_POWERPC_SIGINFO_H
-
-/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifdef __powerpc64__
-# define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int))
-#endif
-
-#include <asm-generic/siginfo.h>
-
-#undef NSIGTRAP
-#define NSIGTRAP 4
-
-#endif /* _ASM_POWERPC_SIGINFO_H */
diff --git a/arch/powerpc/include/uapi/asm/signal.h b/arch/powerpc/include/uapi/asm/signal.h
index 6c69ee94fd8d..a5dfe84f50ab 100644
--- a/arch/powerpc/include/uapi/asm/signal.h
+++ b/arch/powerpc/include/uapi/asm/signal.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _UAPI_ASM_POWERPC_SIGNAL_H
#define _UAPI_ASM_POWERPC_SIGNAL_H
@@ -59,34 +60,15 @@ typedef struct {
#define SIGRTMIN 32
#define SIGRTMAX _NSIG
-/*
- * SA_FLAGS values:
- *
- * SA_ONSTACK is not currently supported, but will allow sigaltstack(2).
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- */
-#define SA_NOCLDSTOP 0x00000001U
-#define SA_NOCLDWAIT 0x00000002U
-#define SA_SIGINFO 0x00000004U
-#define SA_ONSTACK 0x08000000U
-#define SA_RESTART 0x10000000U
-#define SA_NODEFER 0x40000000U
-#define SA_RESETHAND 0x80000000U
-
-#define SA_NOMASK SA_NODEFER
-#define SA_ONESHOT SA_RESETHAND
-
#define SA_RESTORER 0x04000000U
+#ifdef __powerpc64__
+#define MINSIGSTKSZ 8192
+#define SIGSTKSZ 32768
+#else
#define MINSIGSTKSZ 2048
#define SIGSTKSZ 8192
+#endif
#include <asm-generic/signal-defs.h>
@@ -109,7 +91,7 @@ struct sigaction {
typedef struct sigaltstack {
void __user *ss_sp;
int ss_flags;
- size_t ss_size;
+ __kernel_size_t ss_size;
} stack_t;
diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h
index a9c3e2e18c05..12aa0c43e775 100644
--- a/arch/powerpc/include/uapi/asm/socket.h
+++ b/arch/powerpc/include/uapi/asm/socket.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
#ifndef _ASM_POWERPC_SOCKET_H
#define _ASM_POWERPC_SOCKET_H
@@ -8,83 +9,13 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <asm/sockios.h>
-
-/* For setsockopt(2) */
-#define SOL_SOCKET 1
-
-#define SO_DEBUG 1
-#define SO_REUSEADDR 2
-#define SO_TYPE 3
-#define SO_ERROR 4
-#define SO_DONTROUTE 5
-#define SO_BROADCAST 6
-#define SO_SNDBUF 7
-#define SO_RCVBUF 8
-#define SO_SNDBUFFORCE 32
-#define SO_RCVBUFFORCE 33
-#define SO_KEEPALIVE 9
-#define SO_OOBINLINE 10
-#define SO_NO_CHECK 11
-#define SO_PRIORITY 12
-#define SO_LINGER 13
-#define SO_BSDCOMPAT 14
-#define SO_REUSEPORT 15
#define SO_RCVLOWAT 16
#define SO_SNDLOWAT 17
-#define SO_RCVTIMEO 18
-#define SO_SNDTIMEO 19
+#define SO_RCVTIMEO_OLD 18
+#define SO_SNDTIMEO_OLD 19
#define SO_PASSCRED 20
#define SO_PEERCRED 21
-/* Security levels - as per NRL IPv6 - don't actually do anything */
-#define SO_SECURITY_AUTHENTICATION 22
-#define SO_SECURITY_ENCRYPTION_TRANSPORT 23
-#define SO_SECURITY_ENCRYPTION_NETWORK 24
-
-#define SO_BINDTODEVICE 25
-
-/* Socket filtering */
-#define SO_ATTACH_FILTER 26
-#define SO_DETACH_FILTER 27
-#define SO_GET_FILTER SO_ATTACH_FILTER
-
-#define SO_PEERNAME 28
-#define SO_TIMESTAMP 29
-#define SCM_TIMESTAMP SO_TIMESTAMP
-
-#define SO_ACCEPTCONN 30
-
-#define SO_PEERSEC 31
-#define SO_PASSSEC 34
-#define SO_TIMESTAMPNS 35
-#define SCM_TIMESTAMPNS SO_TIMESTAMPNS
-
-#define SO_MARK 36
-
-#define SO_TIMESTAMPING 37
-#define SCM_TIMESTAMPING SO_TIMESTAMPING
-
-#define SO_PROTOCOL 38
-#define SO_DOMAIN 39
-
-#define SO_RXQ_OVFL 40
-
-#define SO_WIFI_STATUS 41
-#define SCM_WIFI_STATUS SO_WIFI_STATUS
-#define SO_PEEK_OFF 42
-
-/* Instruct lower device to use last 4-bytes of skb data as FCS */
-#define SO_NOFCS 43
-
-#define SO_LOCK_FILTER 44
-
-#define SO_SELECT_ERR_QUEUE 45
-
-#define SO_BUSY_POLL 46
-
-#define SO_MAX_PACING_RATE 47
-
-#define SO_BPF_EXTENSIONS 48
+#include <asm-generic/socket.h>
#endif /* _ASM_POWERPC_SOCKET_H */
diff --git a/arch/powerpc/include/uapi/asm/sockios.h b/arch/powerpc/include/uapi/asm/sockios.h
deleted file mode 100644
index 55cef7675a31..000000000000
--- a/arch/powerpc/include/uapi/asm/sockios.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef _ASM_POWERPC_SOCKIOS_H
-#define _ASM_POWERPC_SOCKIOS_H
-
-/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-/* Socket-level I/O control calls. */
-#define FIOSETOWN 0x8901
-#define SIOCSPGRP 0x8902
-#define FIOGETOWN 0x8903
-#define SIOCGPGRP 0x8904
-#define SIOCATMARK 0x8905
-#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */
-#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */
-
-#endif /* _ASM_POWERPC_SOCKIOS_H */
diff --git a/arch/powerpc/include/uapi/asm/spu_info.h b/arch/powerpc/include/uapi/asm/spu_info.h
index ed071bf97707..45f97150587b 100644
--- a/arch/powerpc/include/uapi/asm/spu_info.h
+++ b/arch/powerpc/include/uapi/asm/spu_info.h
@@ -1,23 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
/*
* SPU info structures
*
* (C) Copyright 2006 IBM Corp.
*
* Author: Dwayne Grant McConnell <decimal@us.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#ifndef _UAPI_SPU_INFO_H
diff --git a/arch/powerpc/include/uapi/asm/stat.h b/arch/powerpc/include/uapi/asm/stat.h
index 84880b80cc1c..d50901664239 100644
--- a/arch/powerpc/include/uapi/asm/stat.h
+++ b/arch/powerpc/include/uapi/asm/stat.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
#ifndef _ASM_POWERPC_STAT_H
#define _ASM_POWERPC_STAT_H
/*
@@ -28,18 +29,18 @@ struct __old_kernel_stat {
struct stat {
unsigned long st_dev;
- ino_t st_ino;
+ __kernel_ino_t st_ino;
#ifdef __powerpc64__
unsigned long st_nlink;
- mode_t st_mode;
+ __kernel_mode_t st_mode;
#else
- mode_t st_mode;
+ __kernel_mode_t st_mode;
unsigned short st_nlink;
#endif
- uid_t st_uid;
- gid_t st_gid;
+ __kernel_uid32_t st_uid;
+ __kernel_gid32_t st_gid;
unsigned long st_rdev;
- off_t st_size;
+ long st_size;
unsigned long st_blksize;
unsigned long st_blocks;
unsigned long st_atime;
diff --git a/arch/powerpc/include/uapi/asm/statfs.h b/arch/powerpc/include/uapi/asm/statfs.h
deleted file mode 100644
index 5244834583a4..000000000000
--- a/arch/powerpc/include/uapi/asm/statfs.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_POWERPC_STATFS_H
-#define _ASM_POWERPC_STATFS_H
-
-#include <asm-generic/statfs.h>
-
-#endif
diff --git a/arch/powerpc/include/uapi/asm/swab.h b/arch/powerpc/include/uapi/asm/swab.h
index b6c368aa5c05..17b16c44d20c 100644
--- a/arch/powerpc/include/uapi/asm/swab.h
+++ b/arch/powerpc/include/uapi/asm/swab.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
/*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
diff --git a/arch/powerpc/include/uapi/asm/termbits.h b/arch/powerpc/include/uapi/asm/termbits.h
index 549d700e18f2..21dc86dcb2f1 100644
--- a/arch/powerpc/include/uapi/asm/termbits.h
+++ b/arch/powerpc/include/uapi/asm/termbits.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
#ifndef _ASM_POWERPC_TERMBITS_H
#define _ASM_POWERPC_TERMBITS_H
@@ -8,8 +9,8 @@
* 2 of the License, or (at your option) any later version.
*/
-typedef unsigned char cc_t;
-typedef unsigned int speed_t;
+#include <asm-generic/termbits-common.h>
+
typedef unsigned int tcflag_t;
/*
@@ -63,115 +64,72 @@ struct ktermios {
#define VDISCARD 16
/* c_iflag bits */
-#define IGNBRK 0000001
-#define BRKINT 0000002
-#define IGNPAR 0000004
-#define PARMRK 0000010
-#define INPCK 0000020
-#define ISTRIP 0000040
-#define INLCR 0000100
-#define IGNCR 0000200
-#define ICRNL 0000400
-#define IXON 0001000
-#define IXOFF 0002000
-#define IXANY 0004000
-#define IUCLC 0010000
-#define IMAXBEL 0020000
-#define IUTF8 0040000
+#define IXON 0x0200
+#define IXOFF 0x0400
+#define IUCLC 0x1000
+#define IMAXBEL 0x2000
+#define IUTF8 0x4000
/* c_oflag bits */
-#define OPOST 0000001
-#define ONLCR 0000002
-#define OLCUC 0000004
-
-#define OCRNL 0000010
-#define ONOCR 0000020
-#define ONLRET 0000040
-
-#define OFILL 00000100
-#define OFDEL 00000200
-#define NLDLY 00001400
-#define NL0 00000000
-#define NL1 00000400
-#define NL2 00001000
-#define NL3 00001400
-#define TABDLY 00006000
-#define TAB0 00000000
-#define TAB1 00002000
-#define TAB2 00004000
-#define TAB3 00006000
-#define XTABS 00006000 /* required by POSIX to == TAB3 */
-#define CRDLY 00030000
-#define CR0 00000000
-#define CR1 00010000
-#define CR2 00020000
-#define CR3 00030000
-#define FFDLY 00040000
-#define FF0 00000000
-#define FF1 00040000
-#define BSDLY 00100000
-#define BS0 00000000
-#define BS1 00100000
-#define VTDLY 00200000
-#define VT0 00000000
-#define VT1 00200000
+#define ONLCR 0x00002
+#define OLCUC 0x00004
+#define NLDLY 0x00300
+#define NL0 0x00000
+#define NL1 0x00100
+#define NL2 0x00200
+#define NL3 0x00300
+#define TABDLY 0x00c00
+#define TAB0 0x00000
+#define TAB1 0x00400
+#define TAB2 0x00800
+#define TAB3 0x00c00
+#define XTABS 0x00c00 /* required by POSIX to == TAB3 */
+#define CRDLY 0x03000
+#define CR0 0x00000
+#define CR1 0x01000
+#define CR2 0x02000
+#define CR3 0x03000
+#define FFDLY 0x04000
+#define FF0 0x00000
+#define FF1 0x04000
+#define BSDLY 0x08000
+#define BS0 0x00000
+#define BS1 0x08000
+#define VTDLY 0x10000
+#define VT0 0x00000
+#define VT1 0x10000
/* c_cflag bit meaning */
-#define CBAUD 0000377
-#define B0 0000000 /* hang up */
-#define B50 0000001
-#define B75 0000002
-#define B110 0000003
-#define B134 0000004
-#define B150 0000005
-#define B200 0000006
-#define B300 0000007
-#define B600 0000010
-#define B1200 0000011
-#define B1800 0000012
-#define B2400 0000013
-#define B4800 0000014
-#define B9600 0000015
-#define B19200 0000016
-#define B38400 0000017
-#define EXTA B19200
-#define EXTB B38400
-#define CBAUDEX 0000000
-#define B57600 00020
-#define B115200 00021
-#define B230400 00022
-#define B460800 00023
-#define B500000 00024
-#define B576000 00025
-#define B921600 00026
-#define B1000000 00027
-#define B1152000 00030
-#define B1500000 00031
-#define B2000000 00032
-#define B2500000 00033
-#define B3000000 00034
-#define B3500000 00035
-#define B4000000 00036
-#define BOTHER 00037
-
-#define CIBAUD 077600000
-#define IBSHIFT 16 /* Shift from CBAUD to CIBAUD */
-
-#define CSIZE 00001400
-#define CS5 00000000
-#define CS6 00000400
-#define CS7 00001000
-#define CS8 00001400
-
-#define CSTOPB 00002000
-#define CREAD 00004000
-#define PARENB 00010000
-#define PARODD 00020000
-#define HUPCL 00040000
-
-#define CLOCAL 00100000
-#define CMSPAR 010000000000 /* mark or space (stick) parity */
-#define CRTSCTS 020000000000 /* flow control */
+#define CBAUD 0x000000ff
+#define CBAUDEX 0x00000000
+#define BOTHER 0x0000001f
+#define B57600 0x00000010
+#define B115200 0x00000011
+#define B230400 0x00000012
+#define B460800 0x00000013
+#define B500000 0x00000014
+#define B576000 0x00000015
+#define B921600 0x00000016
+#define B1000000 0x00000017
+#define B1152000 0x00000018
+#define B1500000 0x00000019
+#define B2000000 0x0000001a
+#define B2500000 0x0000001b
+#define B3000000 0x0000001c
+#define B3500000 0x0000001d
+#define B4000000 0x0000001e
+#define CSIZE 0x00000300
+#define CS5 0x00000000
+#define CS6 0x00000100
+#define CS7 0x00000200
+#define CS8 0x00000300
+#define CSTOPB 0x00000400
+#define CREAD 0x00000800
+#define PARENB 0x00001000
+#define PARODD 0x00002000
+#define HUPCL 0x00004000
+#define CLOCAL 0x00008000
+#define CIBAUD 0x00ff0000
/* c_lflag bits */
#define ISIG 0x00000080
@@ -191,17 +149,6 @@ struct ktermios {
#define IEXTEN 0x00000400
#define EXTPROC 0x10000000
-/* Values for the ACTION argument to `tcflow'. */
-#define TCOOFF 0
-#define TCOON 1
-#define TCIOFF 2
-#define TCION 3
-
-/* Values for the QUEUE_SELECTOR argument to `tcflush'. */
-#define TCIFLUSH 0
-#define TCOFLUSH 1
-#define TCIOFLUSH 2
-
/* Values for the OPTIONAL_ACTIONS argument to `tcsetattr'. */
#define TCSANOW 0
#define TCSADRAIN 1
diff --git a/arch/powerpc/include/uapi/asm/termios.h b/arch/powerpc/include/uapi/asm/termios.h
index 6cca5cdfec04..5d07fc89bcb6 100644
--- a/arch/powerpc/include/uapi/asm/termios.h
+++ b/arch/powerpc/include/uapi/asm/termios.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
/*
* Liberally adapted from alpha/termios.h. In particular, the c_cc[]
* fields have been reordered so that termio & termios share the
diff --git a/arch/powerpc/include/uapi/asm/tm.h b/arch/powerpc/include/uapi/asm/tm.h
index 5d836b7c1176..e1bf0e2fac43 100644
--- a/arch/powerpc/include/uapi/asm/tm.h
+++ b/arch/powerpc/include/uapi/asm/tm.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _ASM_POWERPC_TM_H
#define _ASM_POWERPC_TM_H
@@ -11,7 +12,7 @@
#define TM_CAUSE_RESCHED 0xde
#define TM_CAUSE_TLBI 0xdc
#define TM_CAUSE_FAC_UNAV 0xda
-#define TM_CAUSE_SYSCALL 0xd8 /* future use */
+#define TM_CAUSE_SYSCALL 0xd8
#define TM_CAUSE_MISC 0xd6 /* future use */
#define TM_CAUSE_SIGNAL 0xd4
#define TM_CAUSE_ALIGNMENT 0xd2
diff --git a/arch/powerpc/include/uapi/asm/types.h b/arch/powerpc/include/uapi/asm/types.h
index 4b8ab990a3c1..9dbf55e38ea5 100644
--- a/arch/powerpc/include/uapi/asm/types.h
+++ b/arch/powerpc/include/uapi/asm/types.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
/*
* This file is never included by application software unless
* explicitly requested (e.g., via linux/types.h) in which case the
@@ -27,14 +28,14 @@
# include <asm-generic/int-ll64.h>
#endif
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
typedef struct {
__u32 u[4];
} __attribute__((aligned(16))) __vector128;
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _UAPI_ASM_POWERPC_TYPES_H */
diff --git a/arch/powerpc/include/uapi/asm/ucontext.h b/arch/powerpc/include/uapi/asm/ucontext.h
index d9a4ddf0cc86..6f14a96d4985 100644
--- a/arch/powerpc/include/uapi/asm/ucontext.h
+++ b/arch/powerpc/include/uapi/asm/ucontext.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _ASM_POWERPC_UCONTEXT_H
#define _ASM_POWERPC_UCONTEXT_H
diff --git a/arch/powerpc/include/uapi/asm/unistd.h b/arch/powerpc/include/uapi/asm/unistd.h
index 2d526f7b48da..5f84e3dc98d0 100644
--- a/arch/powerpc/include/uapi/asm/unistd.h
+++ b/arch/powerpc/include/uapi/asm/unistd.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
/*
* This file contains the system call numbers.
*
@@ -9,376 +10,10 @@
#ifndef _UAPI_ASM_POWERPC_UNISTD_H_
#define _UAPI_ASM_POWERPC_UNISTD_H_
-
-#define __NR_restart_syscall 0
-#define __NR_exit 1
-#define __NR_fork 2
-#define __NR_read 3
-#define __NR_write 4
-#define __NR_open 5
-#define __NR_close 6
-#define __NR_waitpid 7
-#define __NR_creat 8
-#define __NR_link 9
-#define __NR_unlink 10
-#define __NR_execve 11
-#define __NR_chdir 12
-#define __NR_time 13
-#define __NR_mknod 14
-#define __NR_chmod 15
-#define __NR_lchown 16
-#define __NR_break 17
-#define __NR_oldstat 18
-#define __NR_lseek 19
-#define __NR_getpid 20
-#define __NR_mount 21
-#define __NR_umount 22
-#define __NR_setuid 23
-#define __NR_getuid 24
-#define __NR_stime 25
-#define __NR_ptrace 26
-#define __NR_alarm 27
-#define __NR_oldfstat 28
-#define __NR_pause 29
-#define __NR_utime 30
-#define __NR_stty 31
-#define __NR_gtty 32
-#define __NR_access 33
-#define __NR_nice 34
-#define __NR_ftime 35
-#define __NR_sync 36
-#define __NR_kill 37
-#define __NR_rename 38
-#define __NR_mkdir 39
-#define __NR_rmdir 40
-#define __NR_dup 41
-#define __NR_pipe 42
-#define __NR_times 43
-#define __NR_prof 44
-#define __NR_brk 45
-#define __NR_setgid 46
-#define __NR_getgid 47
-#define __NR_signal 48
-#define __NR_geteuid 49
-#define __NR_getegid 50
-#define __NR_acct 51
-#define __NR_umount2 52
-#define __NR_lock 53
-#define __NR_ioctl 54
-#define __NR_fcntl 55
-#define __NR_mpx 56
-#define __NR_setpgid 57
-#define __NR_ulimit 58
-#define __NR_oldolduname 59
-#define __NR_umask 60
-#define __NR_chroot 61
-#define __NR_ustat 62
-#define __NR_dup2 63
-#define __NR_getppid 64
-#define __NR_getpgrp 65
-#define __NR_setsid 66
-#define __NR_sigaction 67
-#define __NR_sgetmask 68
-#define __NR_ssetmask 69
-#define __NR_setreuid 70
-#define __NR_setregid 71
-#define __NR_sigsuspend 72
-#define __NR_sigpending 73
-#define __NR_sethostname 74
-#define __NR_setrlimit 75
-#define __NR_getrlimit 76
-#define __NR_getrusage 77
-#define __NR_gettimeofday 78
-#define __NR_settimeofday 79
-#define __NR_getgroups 80
-#define __NR_setgroups 81
-#define __NR_select 82
-#define __NR_symlink 83
-#define __NR_oldlstat 84
-#define __NR_readlink 85
-#define __NR_uselib 86
-#define __NR_swapon 87
-#define __NR_reboot 88
-#define __NR_readdir 89
-#define __NR_mmap 90
-#define __NR_munmap 91
-#define __NR_truncate 92
-#define __NR_ftruncate 93
-#define __NR_fchmod 94
-#define __NR_fchown 95
-#define __NR_getpriority 96
-#define __NR_setpriority 97
-#define __NR_profil 98
-#define __NR_statfs 99
-#define __NR_fstatfs 100
-#define __NR_ioperm 101
-#define __NR_socketcall 102
-#define __NR_syslog 103
-#define __NR_setitimer 104
-#define __NR_getitimer 105
-#define __NR_stat 106
-#define __NR_lstat 107
-#define __NR_fstat 108
-#define __NR_olduname 109
-#define __NR_iopl 110
-#define __NR_vhangup 111
-#define __NR_idle 112
-#define __NR_vm86 113
-#define __NR_wait4 114
-#define __NR_swapoff 115
-#define __NR_sysinfo 116
-#define __NR_ipc 117
-#define __NR_fsync 118
-#define __NR_sigreturn 119
-#define __NR_clone 120
-#define __NR_setdomainname 121
-#define __NR_uname 122
-#define __NR_modify_ldt 123
-#define __NR_adjtimex 124
-#define __NR_mprotect 125
-#define __NR_sigprocmask 126
-#define __NR_create_module 127
-#define __NR_init_module 128
-#define __NR_delete_module 129
-#define __NR_get_kernel_syms 130
-#define __NR_quotactl 131
-#define __NR_getpgid 132
-#define __NR_fchdir 133
-#define __NR_bdflush 134
-#define __NR_sysfs 135
-#define __NR_personality 136
-#define __NR_afs_syscall 137 /* Syscall for Andrew File System */
-#define __NR_setfsuid 138
-#define __NR_setfsgid 139
-#define __NR__llseek 140
-#define __NR_getdents 141
-#define __NR__newselect 142
-#define __NR_flock 143
-#define __NR_msync 144
-#define __NR_readv 145
-#define __NR_writev 146
-#define __NR_getsid 147
-#define __NR_fdatasync 148
-#define __NR__sysctl 149
-#define __NR_mlock 150
-#define __NR_munlock 151
-#define __NR_mlockall 152
-#define __NR_munlockall 153
-#define __NR_sched_setparam 154
-#define __NR_sched_getparam 155
-#define __NR_sched_setscheduler 156
-#define __NR_sched_getscheduler 157
-#define __NR_sched_yield 158
-#define __NR_sched_get_priority_max 159
-#define __NR_sched_get_priority_min 160
-#define __NR_sched_rr_get_interval 161
-#define __NR_nanosleep 162
-#define __NR_mremap 163
-#define __NR_setresuid 164
-#define __NR_getresuid 165
-#define __NR_query_module 166
-#define __NR_poll 167
-#define __NR_nfsservctl 168
-#define __NR_setresgid 169
-#define __NR_getresgid 170
-#define __NR_prctl 171
-#define __NR_rt_sigreturn 172
-#define __NR_rt_sigaction 173
-#define __NR_rt_sigprocmask 174
-#define __NR_rt_sigpending 175
-#define __NR_rt_sigtimedwait 176
-#define __NR_rt_sigqueueinfo 177
-#define __NR_rt_sigsuspend 178
-#define __NR_pread64 179
-#define __NR_pwrite64 180
-#define __NR_chown 181
-#define __NR_getcwd 182
-#define __NR_capget 183
-#define __NR_capset 184
-#define __NR_sigaltstack 185
-#define __NR_sendfile 186
-#define __NR_getpmsg 187 /* some people actually want streams */
-#define __NR_putpmsg 188 /* some people actually want streams */
-#define __NR_vfork 189
-#define __NR_ugetrlimit 190 /* SuS compliant getrlimit */
-#define __NR_readahead 191
-#ifndef __powerpc64__ /* these are 32-bit only */
-#define __NR_mmap2 192
-#define __NR_truncate64 193
-#define __NR_ftruncate64 194
-#define __NR_stat64 195
-#define __NR_lstat64 196
-#define __NR_fstat64 197
-#endif
-#define __NR_pciconfig_read 198
-#define __NR_pciconfig_write 199
-#define __NR_pciconfig_iobase 200
-#define __NR_multiplexer 201
-#define __NR_getdents64 202
-#define __NR_pivot_root 203
-#ifndef __powerpc64__
-#define __NR_fcntl64 204
-#endif
-#define __NR_madvise 205
-#define __NR_mincore 206
-#define __NR_gettid 207
-#define __NR_tkill 208
-#define __NR_setxattr 209
-#define __NR_lsetxattr 210
-#define __NR_fsetxattr 211
-#define __NR_getxattr 212
-#define __NR_lgetxattr 213
-#define __NR_fgetxattr 214
-#define __NR_listxattr 215
-#define __NR_llistxattr 216
-#define __NR_flistxattr 217
-#define __NR_removexattr 218
-#define __NR_lremovexattr 219
-#define __NR_fremovexattr 220
-#define __NR_futex 221
-#define __NR_sched_setaffinity 222
-#define __NR_sched_getaffinity 223
-/* 224 currently unused */
-#define __NR_tuxcall 225
#ifndef __powerpc64__
-#define __NR_sendfile64 226
-#endif
-#define __NR_io_setup 227
-#define __NR_io_destroy 228
-#define __NR_io_getevents 229
-#define __NR_io_submit 230
-#define __NR_io_cancel 231
-#define __NR_set_tid_address 232
-#define __NR_fadvise64 233
-#define __NR_exit_group 234
-#define __NR_lookup_dcookie 235
-#define __NR_epoll_create 236
-#define __NR_epoll_ctl 237
-#define __NR_epoll_wait 238
-#define __NR_remap_file_pages 239
-#define __NR_timer_create 240
-#define __NR_timer_settime 241
-#define __NR_timer_gettime 242
-#define __NR_timer_getoverrun 243
-#define __NR_timer_delete 244
-#define __NR_clock_settime 245
-#define __NR_clock_gettime 246
-#define __NR_clock_getres 247
-#define __NR_clock_nanosleep 248
-#define __NR_swapcontext 249
-#define __NR_tgkill 250
-#define __NR_utimes 251
-#define __NR_statfs64 252
-#define __NR_fstatfs64 253
-#ifndef __powerpc64__
-#define __NR_fadvise64_64 254
-#endif
-#define __NR_rtas 255
-#define __NR_sys_debug_setcontext 256
-/* Number 257 is reserved for vserver */
-#define __NR_migrate_pages 258
-#define __NR_mbind 259
-#define __NR_get_mempolicy 260
-#define __NR_set_mempolicy 261
-#define __NR_mq_open 262
-#define __NR_mq_unlink 263
-#define __NR_mq_timedsend 264
-#define __NR_mq_timedreceive 265
-#define __NR_mq_notify 266
-#define __NR_mq_getsetattr 267
-#define __NR_kexec_load 268
-#define __NR_add_key 269
-#define __NR_request_key 270
-#define __NR_keyctl 271
-#define __NR_waitid 272
-#define __NR_ioprio_set 273
-#define __NR_ioprio_get 274
-#define __NR_inotify_init 275
-#define __NR_inotify_add_watch 276
-#define __NR_inotify_rm_watch 277
-#define __NR_spu_run 278
-#define __NR_spu_create 279
-#define __NR_pselect6 280
-#define __NR_ppoll 281
-#define __NR_unshare 282
-#define __NR_splice 283
-#define __NR_tee 284
-#define __NR_vmsplice 285
-#define __NR_openat 286
-#define __NR_mkdirat 287
-#define __NR_mknodat 288
-#define __NR_fchownat 289
-#define __NR_futimesat 290
-#ifdef __powerpc64__
-#define __NR_newfstatat 291
+#include <asm/unistd_32.h>
#else
-#define __NR_fstatat64 291
+#include <asm/unistd_64.h>
#endif
-#define __NR_unlinkat 292
-#define __NR_renameat 293
-#define __NR_linkat 294
-#define __NR_symlinkat 295
-#define __NR_readlinkat 296
-#define __NR_fchmodat 297
-#define __NR_faccessat 298
-#define __NR_get_robust_list 299
-#define __NR_set_robust_list 300
-#define __NR_move_pages 301
-#define __NR_getcpu 302
-#define __NR_epoll_pwait 303
-#define __NR_utimensat 304
-#define __NR_signalfd 305
-#define __NR_timerfd_create 306
-#define __NR_eventfd 307
-#define __NR_sync_file_range2 308
-#define __NR_fallocate 309
-#define __NR_subpage_prot 310
-#define __NR_timerfd_settime 311
-#define __NR_timerfd_gettime 312
-#define __NR_signalfd4 313
-#define __NR_eventfd2 314
-#define __NR_epoll_create1 315
-#define __NR_dup3 316
-#define __NR_pipe2 317
-#define __NR_inotify_init1 318
-#define __NR_perf_event_open 319
-#define __NR_preadv 320
-#define __NR_pwritev 321
-#define __NR_rt_tgsigqueueinfo 322
-#define __NR_fanotify_init 323
-#define __NR_fanotify_mark 324
-#define __NR_prlimit64 325
-#define __NR_socket 326
-#define __NR_bind 327
-#define __NR_connect 328
-#define __NR_listen 329
-#define __NR_accept 330
-#define __NR_getsockname 331
-#define __NR_getpeername 332
-#define __NR_socketpair 333
-#define __NR_send 334
-#define __NR_sendto 335
-#define __NR_recv 336
-#define __NR_recvfrom 337
-#define __NR_shutdown 338
-#define __NR_setsockopt 339
-#define __NR_getsockopt 340
-#define __NR_sendmsg 341
-#define __NR_recvmsg 342
-#define __NR_recvmmsg 343
-#define __NR_accept4 344
-#define __NR_name_to_handle_at 345
-#define __NR_open_by_handle_at 346
-#define __NR_clock_adjtime 347
-#define __NR_syncfs 348
-#define __NR_sendmmsg 349
-#define __NR_setns 350
-#define __NR_process_vm_readv 351
-#define __NR_process_vm_writev 352
-#define __NR_finit_module 353
-#define __NR_kcmp 354
-#define __NR_sched_setattr 355
-#define __NR_sched_getattr 356
-#define __NR_renameat2 357
#endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */
diff --git a/arch/powerpc/include/uapi/asm/vas-api.h b/arch/powerpc/include/uapi/asm/vas-api.h
new file mode 100644
index 000000000000..7c81301ecdba
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/vas-api.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * Copyright 2019 IBM Corp.
+ */
+
+#ifndef _UAPI_MISC_VAS_H
+#define _UAPI_MISC_VAS_H
+
+#include <linux/types.h>
+
+#include <asm/ioctl.h>
+
+#define VAS_MAGIC 'v'
+#define VAS_TX_WIN_OPEN _IOW(VAS_MAGIC, 0x20, struct vas_tx_win_open_attr)
+
+/* Flags to VAS TX open window ioctl */
+/* To allocate a window with QoS credit, otherwise use default credit */
+#define VAS_TX_WIN_FLAG_QOS_CREDIT 0x0000000000000001
+
+struct vas_tx_win_open_attr {
+ __u32 version;
+ __s16 vas_id; /* specific instance of vas or -1 for default */
+ __u16 reserved1;
+ __u64 flags;
+ __u64 reserved2[6];
+};
+
+#endif /* _UAPI_MISC_VAS_H */
diff --git a/arch/powerpc/kernel/.gitignore b/arch/powerpc/kernel/.gitignore
index c5f676c3c224..d71179d3ffe9 100644
--- a/arch/powerpc/kernel/.gitignore
+++ b/arch/powerpc/kernel/.gitignore
@@ -1 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+prom_init_check
vmlinux.lds
diff --git a/arch/powerpc/kernel/fsl_booke_entry_mapping.S b/arch/powerpc/kernel/85xx_entry_mapping.S
index f22e7e44fbf3..dedc17fac8f8 100644
--- a/arch/powerpc/kernel/fsl_booke_entry_mapping.S
+++ b/arch/powerpc/kernel/85xx_entry_mapping.S
@@ -1,6 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/* 1. Find the index of the entry we're executing in */
- bl invstr /* Find our address */
+ bcl 20,31,$+4 /* Find our address */
invstr: mflr r6 /* Make it accessible */
mfmsr r7
rlwinm r4,r7,27,31,31 /* extract MSR[IS] */
@@ -84,7 +85,7 @@ skpinv: addi r6,r6,1 /* Increment */
addi r6,r6,10
slw r6,r8,r6 /* convert to mask */
- bl 1f /* Find our address */
+ bcl 20,31,$+4 /* Find our address */
1: mflr r7
mfspr r8,SPRN_MAS3
@@ -116,7 +117,7 @@ skpinv: addi r6,r6,1 /* Increment */
xori r6,r4,1
slwi r6,r6,5 /* setup new context with other address space */
- bl 1f /* Find our address */
+ bcl 20,31,$+4 /* Find our address */
1: mflr r9
rlwimi r7,r9,0,20,31
addi r7,r7,(2f - 1b)
@@ -152,32 +153,24 @@ skpinv: addi r6,r6,1 /* Increment */
tlbivax 0,r9
TLBSYNC
-/* The mapping only needs to be cache-coherent on SMP */
-#ifdef CONFIG_SMP
-#define M_IF_SMP MAS2_M
-#else
-#define M_IF_SMP 0
-#endif
-
#if defined(ENTRY_MAPPING_BOOT_SETUP)
-/* 6. Setup KERNELBASE mapping in TLB1[0] */
+/* 6. Setup kernstart_virt_addr mapping in TLB1[0] */
lis r6,0x1000 /* Set MAS0(TLBSEL) = TLB1(1), ESEL = 0 */
mtspr SPRN_MAS0,r6
lis r6,(MAS1_VALID|MAS1_IPROT)@h
ori r6,r6,(MAS1_TSIZE(BOOK3E_PAGESZ_64M))@l
mtspr SPRN_MAS1,r6
- lis r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_64M, M_IF_SMP)@h
- ori r6,r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_64M, M_IF_SMP)@l
+ lis r6,MAS2_EPN_MASK(BOOK3E_PAGESZ_64M)@h
+ ori r6,r6,MAS2_EPN_MASK(BOOK3E_PAGESZ_64M)@l
+ and r6,r6,r20
+ ori r6,r6,MAS2_M_IF_NEEDED@l
mtspr SPRN_MAS2,r6
mtspr SPRN_MAS3,r8
tlbwe
-/* 7. Jump to KERNELBASE mapping */
- lis r6,(KERNELBASE & ~0xfff)@h
- ori r6,r6,(KERNELBASE & ~0xfff)@l
- rlwinm r7,r25,0,0x03ffffff
- add r6,r7,r6
+/* 7. Jump to kernstart_virt_addr mapping */
+ mr r6,r20
#elif defined(ENTRY_MAPPING_KEXEC_SETUP)
/*
@@ -214,7 +207,7 @@ next_tlb_setup:
lis r7,MSR_KERNEL@h
ori r7,r7,MSR_KERNEL@l
- bl 1f /* Find our address */
+ bcl 20,31,$+4 /* Find our address */
1: mflr r9
rlwimi r6,r9,0,20,31
addi r6,r6,(2f - 1b)
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 670c312d914e..2f0a2e69c607 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -1,120 +1,158 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the linux kernel.
#
-CFLAGS_prom.o = -I$(src)/../../../scripts/dtc/libfdt
-CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"'
-
-subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
-
-ifeq ($(CONFIG_PPC64),y)
-CFLAGS_prom_init.o += $(NO_MINIMAL_TOC)
-endif
-ifeq ($(CONFIG_PPC32),y)
+ifdef CONFIG_PPC32
CFLAGS_prom_init.o += -fPIC
CFLAGS_btext.o += -fPIC
endif
+CFLAGS_early_32.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
+CFLAGS_cputable.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
+CFLAGS_prom_init.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
+CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
+CFLAGS_prom.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
+
+CFLAGS_prom_init.o += -fno-stack-protector
+CFLAGS_prom_init.o += -DDISABLE_BRANCH_PROFILING
+CFLAGS_prom_init.o += -ffreestanding
+CFLAGS_prom_init.o += $(call cc-option, -ftrivial-auto-var-init=uninitialized)
+
ifdef CONFIG_FUNCTION_TRACER
# Do not trace early boot code
-CFLAGS_REMOVE_cputable.o = -pg -mno-sched-epilog
-CFLAGS_REMOVE_prom_init.o = -pg -mno-sched-epilog
-CFLAGS_REMOVE_btext.o = -pg -mno-sched-epilog
-CFLAGS_REMOVE_prom.o = -pg -mno-sched-epilog
-# do not trace tracer code
-CFLAGS_REMOVE_ftrace.o = -pg -mno-sched-epilog
-# timers used by tracing
-CFLAGS_REMOVE_time.o = -pg -mno-sched-epilog
+CFLAGS_REMOVE_cputable.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_prom_init.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_btext.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_prom.o = $(CC_FLAGS_FTRACE)
+endif
+
+KASAN_SANITIZE_early_32.o := n
+KASAN_SANITIZE_cputable.o := n
+KASAN_SANITIZE_prom_init.o := n
+KASAN_SANITIZE_btext.o := n
+KASAN_SANITIZE_paca.o := n
+KASAN_SANITIZE_setup_64.o := n
+KASAN_SANITIZE_mce.o := n
+KASAN_SANITIZE_mce_power.o := n
+KASAN_SANITIZE_udbg.o := n
+KASAN_SANITIZE_udbg_16550.o := n
+
+# we have to be particularly careful in ppc64 to exclude code that
+# runs with translations off, as we cannot access the shadow with
+# translations off. However, ppc32 can sanitize this.
+ifdef CONFIG_PPC64
+KASAN_SANITIZE_traps.o := n
endif
-obj-y := cputable.o ptrace.o syscalls.o \
- irq.o align.o signal_32.o pmc.o vdso.o \
+ifdef CONFIG_KASAN
+CFLAGS_early_32.o += -DDISABLE_BRANCH_PROFILING
+CFLAGS_cputable.o += -DDISABLE_BRANCH_PROFILING
+CFLAGS_btext.o += -DDISABLE_BRANCH_PROFILING
+endif
+
+KCSAN_SANITIZE_early_32.o := n
+KCSAN_SANITIZE_cputable.o := n
+KCSAN_SANITIZE_btext.o := n
+KCSAN_SANITIZE_paca.o := n
+KCSAN_SANITIZE_setup_64.o := n
+
+#ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET
+# Remove stack protector to avoid triggering unneeded stack canary
+# checks due to randomize_kstack_offset.
+CFLAGS_REMOVE_syscall.o = -fstack-protector -fstack-protector-strong
+CFLAGS_syscall.o += -fno-stack-protector
+#endif
+
+obj-y := cputable.o syscalls.o switch.o \
+ irq.o align.o signal_$(BITS).o pmc.o vdso.o \
process.o systbl.o idle.o \
signal.o sysfs.o cacheinfo.o time.o \
prom.o traps.o setup-common.o \
- udbg.o misc.o io.o dma.o \
- misc_$(CONFIG_WORD_SIZE).o vdso32/
-obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \
- signal_64.o ptrace32.o \
- paca.o nvram_64.o firmware.o
+ udbg.o misc.o io.o misc_$(BITS).o \
+ prom_parse.o firmware.o \
+ hw_breakpoint_constraints.o interrupt.o \
+ kdebugfs.o stacktrace.o syscall.o
+obj-y += ptrace/
+obj-$(CONFIG_PPC64) += setup_64.o irq_64.o\
+ paca.o nvram_64.o note.o
+obj-$(CONFIG_PPC32) += sys_ppc32.o
+obj-$(CONFIG_COMPAT) += sys_ppc32.o signal_32.o
+obj-$(CONFIG_VDSO32) += vdso32_wrapper.o
+obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
+obj-$(CONFIG_PPC_DAWR) += dawr.o
obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o
obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o
+obj-$(CONFIG_PPC_BOOK3S_64) += dexcr.o
obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o
-obj64-$(CONFIG_RELOCATABLE) += reloc_64.o
-obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o
-obj-$(CONFIG_PPC64) += vdso64/
+obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_64e.o
+obj-$(CONFIG_PPC_BARRIER_NOSPEC) += security.o
+obj-$(CONFIG_PPC64) += vdso64_wrapper.o
obj-$(CONFIG_ALTIVEC) += vecemu.o
-obj-$(CONFIG_PPC_970_NAP) += idle_power4.o
-obj-$(CONFIG_PPC_P7_NAP) += idle_power7.o
-obj-$(CONFIG_PPC_OF) += of_platform.o prom_parse.o
+obj-$(CONFIG_PPC_BOOK3S_IDLE) += idle_book3s.o
procfs-y := proc_powerpc.o
obj-$(CONFIG_PROC_FS) += $(procfs-y)
rtaspci-$(CONFIG_PPC64)-$(CONFIG_PCI) := rtas_pci.o
-obj-$(CONFIG_PPC_RTAS) += rtas.o rtas-rtc.o $(rtaspci-y-y)
+obj-$(CONFIG_PPC_RTAS) += rtas_entry.o rtas.o rtas-rtc.o $(rtaspci-y-y)
obj-$(CONFIG_PPC_RTAS_DAEMON) += rtasd.o
obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o
obj-$(CONFIG_RTAS_PROC) += rtas-proc.o
-obj-$(CONFIG_IBMVIO) += vio.o
-obj-$(CONFIG_IBMEBUS) += ibmebus.o
-obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \
+obj-$(CONFIG_PPC_DT_CPU_FTRS) += dt_cpu_ftrs.o
+obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_cache.o \
eeh_driver.o eeh_event.o eeh_sysfs.o
obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_FA_DUMP) += fadump.o
-ifeq ($(CONFIG_PPC32),y)
-obj-$(CONFIG_E500) += idle_e500.o
-endif
-obj-$(CONFIG_6xx) += idle_6xx.o l2cr_6xx.o cpu_setup_6xx.o
+obj-$(CONFIG_PRESERVE_FA_DUMP) += fadump.o
+obj-$(CONFIG_PPC_85xx) += idle_85xx.o
+obj-$(CONFIG_PPC_BOOK3S_32) += idle_6xx.o l2cr_6xx.o cpu_setup_6xx.o
obj-$(CONFIG_TAU) += tau_6xx.o
obj-$(CONFIG_HIBERNATION) += swsusp.o suspend.o
-ifeq ($(CONFIG_FSL_BOOKE),y)
-obj-$(CONFIG_HIBERNATION) += swsusp_booke.o
+ifdef CONFIG_PPC_85xx
+obj-$(CONFIG_HIBERNATION) += swsusp_85xx.o
else
-obj-$(CONFIG_HIBERNATION) += swsusp_$(CONFIG_WORD_SIZE).o
+obj-$(CONFIG_HIBERNATION) += swsusp_$(BITS).o
endif
obj64-$(CONFIG_HIBERNATION) += swsusp_asm64.o
-obj-$(CONFIG_MODULES) += module.o module_$(CONFIG_WORD_SIZE).o
+obj-$(CONFIG_MODULES) += module.o module_$(BITS).o
obj-$(CONFIG_44x) += cpu_setup_44x.o
-obj-$(CONFIG_PPC_FSL_BOOK3E) += cpu_setup_fsl_booke.o
+obj-$(CONFIG_PPC_E500) += cpu_setup_e500.o
obj-$(CONFIG_PPC_DOORBELL) += dbell.o
obj-$(CONFIG_JUMP_LABEL) += jump_label.o
-extra-y := head_$(CONFIG_WORD_SIZE).o
-extra-$(CONFIG_40x) := head_40x.o
-extra-$(CONFIG_44x) := head_44x.o
-extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o
-extra-$(CONFIG_8xx) := head_8xx.o
-extra-y += vmlinux.lds
+obj-$(CONFIG_PPC64) += head_64.o
+obj-$(CONFIG_PPC_BOOK3S_32) += head_book3s_32.o
+obj-$(CONFIG_44x) += head_44x.o
+obj-$(CONFIG_PPC_8xx) += head_8xx.o
+obj-$(CONFIG_PPC_85xx) += head_85xx.o
+always-$(KBUILD_BUILTIN) += vmlinux.lds
-obj-$(CONFIG_RELOCATABLE_PPC32) += reloc_32.o
+obj-$(CONFIG_RELOCATABLE) += reloc_$(BITS).o
-obj-$(CONFIG_PPC32) += entry_32.o setup_32.o
+obj-$(CONFIG_PPC32) += entry_32.o setup_32.o early_32.o static_call.o
obj-$(CONFIG_PPC64) += dma-iommu.o iommu.o
obj-$(CONFIG_KGDB) += kgdb.o
-obj-$(CONFIG_MODULES) += ppc_ksyms.o
obj-$(CONFIG_BOOTX_TEXT) += btext.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_KPROBES) += kprobes.o
+obj-$(CONFIG_OPTPROBES) += optprobes.o optprobes_head.o
+obj-$(CONFIG_KPROBES_ON_FTRACE) += kprobes-ftrace.o
obj-$(CONFIG_UPROBES) += uprobes.o
+obj-$(CONFIG_RETHOOK) += rethook.o
obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o
-obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-$(CONFIG_SWIOTLB) += dma-swiotlb.o
+obj-$(CONFIG_ARCH_HAS_DMA_SET_MASK) += dma-mask.o
pci64-$(CONFIG_PPC64) += pci_dn.o pci-hotplug.o isa-bridge.o
-obj-$(CONFIG_PCI) += pci_$(CONFIG_WORD_SIZE).o $(pci64-y) \
+obj-$(CONFIG_PCI) += pci_$(BITS).o $(pci64-y) \
pci-common.o pci_of_scan.o
obj-$(CONFIG_PCI_MSI) += msi.o
-obj-$(CONFIG_KEXEC) += machine_kexec.o crash.o \
- machine_kexec_$(CONFIG_WORD_SIZE).o
+
obj-$(CONFIG_AUDIT) += audit.o
obj64-$(CONFIG_AUDIT) += compat_audit.o
-obj-$(CONFIG_PPC_IO_WORKAROUNDS) += io-workarounds.o
-
-obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
-obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
-obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o
+obj-y += trace/
ifneq ($(CONFIG_PPC_INDIRECT_PIO),y)
obj-y += iomap.o
@@ -122,47 +160,64 @@ endif
obj64-$(CONFIG_PPC_TRANSACTIONAL_MEM) += tm.o
-obj-$(CONFIG_PPC64) += $(obj64-y)
-obj-$(CONFIG_PPC32) += $(obj32-y)
-
-ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC),)
obj-y += ppc_save_regs.o
-endif
obj-$(CONFIG_EPAPR_PARAVIRT) += epapr_paravirt.o epapr_hcalls.o
obj-$(CONFIG_KVM_GUEST) += kvm.o kvm_emul.o
+ifneq ($(CONFIG_PPC_POWERNV)$(CONFIG_PPC_SVM),)
+obj-y += ucall.o
+endif
-# Disable GCOV in odd or sensitive code
+obj-$(CONFIG_PPC_SECURE_BOOT) += secure_boot.o ima_arch.o secvar-ops.o
+obj-$(CONFIG_PPC_SECVAR_SYSFS) += secvar-sysfs.o
+
+# Disable GCOV, KCOV & sanitizers in odd or sensitive code
GCOV_PROFILE_prom_init.o := n
-GCOV_PROFILE_ftrace.o := n
-GCOV_PROFILE_machine_kexec_64.o := n
-GCOV_PROFILE_machine_kexec_32.o := n
+KCOV_INSTRUMENT_prom_init.o := n
+KCSAN_SANITIZE_prom_init.o := n
+UBSAN_SANITIZE_prom_init.o := n
GCOV_PROFILE_kprobes.o := n
+KCOV_INSTRUMENT_kprobes.o := n
+KCSAN_SANITIZE_kprobes.o := n
+UBSAN_SANITIZE_kprobes.o := n
+GCOV_PROFILE_kprobes-ftrace.o := n
+KCOV_INSTRUMENT_kprobes-ftrace.o := n
+KCSAN_SANITIZE_kprobes-ftrace.o := n
+UBSAN_SANITIZE_kprobes-ftrace.o := n
+UBSAN_SANITIZE_vdso.o := n
+
+# Necessary for booting with kcov enabled on book3e machines
+KCOV_INSTRUMENT_cputable.o := n
+KCOV_INSTRUMENT_setup_64.o := n
+KCOV_INSTRUMENT_paca.o := n
+
+CFLAGS_setup_64.o += -fno-stack-protector
+CFLAGS_paca.o += -fno-stack-protector
+
+obj-$(CONFIG_PPC_FPU) += fpu.o
+obj-$(CONFIG_ALTIVEC) += vector.o
+
+obj-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init.o
+obj64-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_entry_64.o
+ifdef KBUILD_BUILTIN
+always-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init_check
+endif
-extra-$(CONFIG_PPC_FPU) += fpu.o
-extra-$(CONFIG_ALTIVEC) += vector.o
-extra-$(CONFIG_PPC64) += entry_64.o
-extra-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init.o
-
-extra-y += systbl_chk.i
-$(obj)/systbl.o: systbl_chk
-
-quiet_cmd_systbl_chk = CALL $<
- cmd_systbl_chk = $(CONFIG_SHELL) $< $(obj)/systbl_chk.i
+obj-$(CONFIG_PPC64) += $(obj64-y)
+obj-$(CONFIG_PPC32) += $(obj32-y)
-PHONY += systbl_chk
-systbl_chk: $(src)/systbl_chk.sh $(obj)/systbl_chk.i
- $(call cmd,systbl_chk)
+quiet_cmd_prom_init_check = PROMCHK $@
+ cmd_prom_init_check = $(CONFIG_SHELL) $< "$(NM)" $(obj)/prom_init.o; touch $@
-ifeq ($(CONFIG_PPC_OF_BOOT_TRAMPOLINE),y)
-$(obj)/built-in.o: prom_init_check
+$(obj)/prom_init_check: $(src)/prom_init_check.sh $(obj)/prom_init.o FORCE
+ $(call if_changed,prom_init_check)
+targets += prom_init_check
-quiet_cmd_prom_init_check = CALL $<
- cmd_prom_init_check = $(CONFIG_SHELL) $< "$(NM)" "$(obj)/prom_init.o"
+clean-files := vmlinux.lds
-PHONY += prom_init_check
-prom_init_check: $(src)/prom_init_check.sh $(obj)/prom_init.o
- $(call cmd,prom_init_check)
-endif
+# Force dependency (incbin is bad)
+$(obj)/vdso32_wrapper.o : $(obj)/vdso/vdso32.so.dbg
+$(obj)/vdso64_wrapper.o : $(obj)/vdso/vdso64.so.dbg
-clean-files := vmlinux.lds
+# for cleaning
+subdir- += vdso
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index 34f55524d456..3e37ece06739 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/* align.c - handle alignment exceptions for the Power PC.
*
* Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
@@ -10,22 +11,20 @@
* Copyright (c) 2005 Benjamin Herrenschmidt, IBM Corp
* <benh@kernel.crashing.org>
* Merge ppc32 and ppc64 implementations
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/mm.h>
#include <asm/processor.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/cache.h>
#include <asm/cputable.h>
#include <asm/emulated_ops.h>
#include <asm/switch_to.h>
#include <asm/disassemble.h>
+#include <asm/cpu_has_feature.h>
+#include <asm/sstep.h>
+#include <asm/inst.h>
struct aligninfo {
unsigned char len;
@@ -39,366 +38,9 @@ struct aligninfo {
#define LD 0 /* load */
#define ST 1 /* store */
#define SE 2 /* sign-extend value, or FP ld/st as word */
-#define F 4 /* to/from fp regs */
-#define U 8 /* update index register */
-#define M 0x10 /* multiple load/store */
#define SW 0x20 /* byte swap */
-#define S 0x40 /* single-precision fp or... */
-#define SX 0x40 /* ... byte count in XER */
-#define HARD 0x80 /* string, stwcx. */
#define E4 0x40 /* SPE endianness is word */
#define E8 0x80 /* SPE endianness is double word */
-#define SPLT 0x80 /* VSX SPLAT load */
-
-/* DSISR bits reported for a DCBZ instruction: */
-#define DCBZ 0x5f /* 8xx/82xx dcbz faults when cache not enabled */
-
-/*
- * The PowerPC stores certain bits of the instruction that caused the
- * alignment exception in the DSISR register. This array maps those
- * bits to information about the operand length and what the
- * instruction would do.
- */
-static struct aligninfo aligninfo[128] = {
- { 4, LD }, /* 00 0 0000: lwz / lwarx */
- INVALID, /* 00 0 0001 */
- { 4, ST }, /* 00 0 0010: stw */
- INVALID, /* 00 0 0011 */
- { 2, LD }, /* 00 0 0100: lhz */
- { 2, LD+SE }, /* 00 0 0101: lha */
- { 2, ST }, /* 00 0 0110: sth */
- { 4, LD+M }, /* 00 0 0111: lmw */
- { 4, LD+F+S }, /* 00 0 1000: lfs */
- { 8, LD+F }, /* 00 0 1001: lfd */
- { 4, ST+F+S }, /* 00 0 1010: stfs */
- { 8, ST+F }, /* 00 0 1011: stfd */
- { 16, LD }, /* 00 0 1100: lq */
- { 8, LD }, /* 00 0 1101: ld/ldu/lwa */
- INVALID, /* 00 0 1110 */
- { 8, ST }, /* 00 0 1111: std/stdu */
- { 4, LD+U }, /* 00 1 0000: lwzu */
- INVALID, /* 00 1 0001 */
- { 4, ST+U }, /* 00 1 0010: stwu */
- INVALID, /* 00 1 0011 */
- { 2, LD+U }, /* 00 1 0100: lhzu */
- { 2, LD+SE+U }, /* 00 1 0101: lhau */
- { 2, ST+U }, /* 00 1 0110: sthu */
- { 4, ST+M }, /* 00 1 0111: stmw */
- { 4, LD+F+S+U }, /* 00 1 1000: lfsu */
- { 8, LD+F+U }, /* 00 1 1001: lfdu */
- { 4, ST+F+S+U }, /* 00 1 1010: stfsu */
- { 8, ST+F+U }, /* 00 1 1011: stfdu */
- { 16, LD+F }, /* 00 1 1100: lfdp */
- INVALID, /* 00 1 1101 */
- { 16, ST+F }, /* 00 1 1110: stfdp */
- INVALID, /* 00 1 1111 */
- { 8, LD }, /* 01 0 0000: ldx */
- INVALID, /* 01 0 0001 */
- { 8, ST }, /* 01 0 0010: stdx */
- INVALID, /* 01 0 0011 */
- INVALID, /* 01 0 0100 */
- { 4, LD+SE }, /* 01 0 0101: lwax */
- INVALID, /* 01 0 0110 */
- INVALID, /* 01 0 0111 */
- { 4, LD+M+HARD+SX }, /* 01 0 1000: lswx */
- { 4, LD+M+HARD }, /* 01 0 1001: lswi */
- { 4, ST+M+HARD+SX }, /* 01 0 1010: stswx */
- { 4, ST+M+HARD }, /* 01 0 1011: stswi */
- INVALID, /* 01 0 1100 */
- { 8, LD+U }, /* 01 0 1101: ldu */
- INVALID, /* 01 0 1110 */
- { 8, ST+U }, /* 01 0 1111: stdu */
- { 8, LD+U }, /* 01 1 0000: ldux */
- INVALID, /* 01 1 0001 */
- { 8, ST+U }, /* 01 1 0010: stdux */
- INVALID, /* 01 1 0011 */
- INVALID, /* 01 1 0100 */
- { 4, LD+SE+U }, /* 01 1 0101: lwaux */
- INVALID, /* 01 1 0110 */
- INVALID, /* 01 1 0111 */
- INVALID, /* 01 1 1000 */
- INVALID, /* 01 1 1001 */
- INVALID, /* 01 1 1010 */
- INVALID, /* 01 1 1011 */
- INVALID, /* 01 1 1100 */
- INVALID, /* 01 1 1101 */
- INVALID, /* 01 1 1110 */
- INVALID, /* 01 1 1111 */
- INVALID, /* 10 0 0000 */
- INVALID, /* 10 0 0001 */
- INVALID, /* 10 0 0010: stwcx. */
- INVALID, /* 10 0 0011 */
- INVALID, /* 10 0 0100 */
- INVALID, /* 10 0 0101 */
- INVALID, /* 10 0 0110 */
- INVALID, /* 10 0 0111 */
- { 4, LD+SW }, /* 10 0 1000: lwbrx */
- INVALID, /* 10 0 1001 */
- { 4, ST+SW }, /* 10 0 1010: stwbrx */
- INVALID, /* 10 0 1011 */
- { 2, LD+SW }, /* 10 0 1100: lhbrx */
- { 4, LD+SE }, /* 10 0 1101 lwa */
- { 2, ST+SW }, /* 10 0 1110: sthbrx */
- { 16, ST }, /* 10 0 1111: stq */
- INVALID, /* 10 1 0000 */
- INVALID, /* 10 1 0001 */
- INVALID, /* 10 1 0010 */
- INVALID, /* 10 1 0011 */
- INVALID, /* 10 1 0100 */
- INVALID, /* 10 1 0101 */
- INVALID, /* 10 1 0110 */
- INVALID, /* 10 1 0111 */
- INVALID, /* 10 1 1000 */
- INVALID, /* 10 1 1001 */
- INVALID, /* 10 1 1010 */
- INVALID, /* 10 1 1011 */
- INVALID, /* 10 1 1100 */
- INVALID, /* 10 1 1101 */
- INVALID, /* 10 1 1110 */
- { 0, ST+HARD }, /* 10 1 1111: dcbz */
- { 4, LD }, /* 11 0 0000: lwzx */
- INVALID, /* 11 0 0001 */
- { 4, ST }, /* 11 0 0010: stwx */
- INVALID, /* 11 0 0011 */
- { 2, LD }, /* 11 0 0100: lhzx */
- { 2, LD+SE }, /* 11 0 0101: lhax */
- { 2, ST }, /* 11 0 0110: sthx */
- INVALID, /* 11 0 0111 */
- { 4, LD+F+S }, /* 11 0 1000: lfsx */
- { 8, LD+F }, /* 11 0 1001: lfdx */
- { 4, ST+F+S }, /* 11 0 1010: stfsx */
- { 8, ST+F }, /* 11 0 1011: stfdx */
- { 16, LD+F }, /* 11 0 1100: lfdpx */
- { 4, LD+F+SE }, /* 11 0 1101: lfiwax */
- { 16, ST+F }, /* 11 0 1110: stfdpx */
- { 4, ST+F }, /* 11 0 1111: stfiwx */
- { 4, LD+U }, /* 11 1 0000: lwzux */
- INVALID, /* 11 1 0001 */
- { 4, ST+U }, /* 11 1 0010: stwux */
- INVALID, /* 11 1 0011 */
- { 2, LD+U }, /* 11 1 0100: lhzux */
- { 2, LD+SE+U }, /* 11 1 0101: lhaux */
- { 2, ST+U }, /* 11 1 0110: sthux */
- INVALID, /* 11 1 0111 */
- { 4, LD+F+S+U }, /* 11 1 1000: lfsux */
- { 8, LD+F+U }, /* 11 1 1001: lfdux */
- { 4, ST+F+S+U }, /* 11 1 1010: stfsux */
- { 8, ST+F+U }, /* 11 1 1011: stfdux */
- INVALID, /* 11 1 1100 */
- { 4, LD+F }, /* 11 1 1101: lfiwzx */
- INVALID, /* 11 1 1110 */
- INVALID, /* 11 1 1111 */
-};
-
-/*
- * The dcbz (data cache block zero) instruction
- * gives an alignment fault if used on non-cacheable
- * memory. We handle the fault mainly for the
- * case when we are running with the cache disabled
- * for debugging.
- */
-static int emulate_dcbz(struct pt_regs *regs, unsigned char __user *addr)
-{
- long __user *p;
- int i, size;
-
-#ifdef __powerpc64__
- size = ppc64_caches.dline_size;
-#else
- size = L1_CACHE_BYTES;
-#endif
- p = (long __user *) (regs->dar & -size);
- if (user_mode(regs) && !access_ok(VERIFY_WRITE, p, size))
- return -EFAULT;
- for (i = 0; i < size / sizeof(long); ++i)
- if (__put_user_inatomic(0, p+i))
- return -EFAULT;
- return 1;
-}
-
-/*
- * Emulate load & store multiple instructions
- * On 64-bit machines, these instructions only affect/use the
- * bottom 4 bytes of each register, and the loads clear the
- * top 4 bytes of the affected register.
- */
-#ifdef __BIG_ENDIAN__
-#ifdef CONFIG_PPC64
-#define REG_BYTE(rp, i) *((u8 *)((rp) + ((i) >> 2)) + ((i) & 3) + 4)
-#else
-#define REG_BYTE(rp, i) *((u8 *)(rp) + (i))
-#endif
-#endif
-
-#ifdef __LITTLE_ENDIAN__
-#define REG_BYTE(rp, i) (*(((u8 *)((rp) + ((i)>>2)) + ((i)&3))))
-#endif
-
-#define SWIZ_PTR(p) ((unsigned char __user *)((p) ^ swiz))
-
-static int emulate_multiple(struct pt_regs *regs, unsigned char __user *addr,
- unsigned int reg, unsigned int nb,
- unsigned int flags, unsigned int instr,
- unsigned long swiz)
-{
- unsigned long *rptr;
- unsigned int nb0, i, bswiz;
- unsigned long p;
-
- /*
- * We do not try to emulate 8 bytes multiple as they aren't really
- * available in our operating environments and we don't try to
- * emulate multiples operations in kernel land as they should never
- * be used/generated there at least not on unaligned boundaries
- */
- if (unlikely((nb > 4) || !user_mode(regs)))
- return 0;
-
- /* lmw, stmw, lswi/x, stswi/x */
- nb0 = 0;
- if (flags & HARD) {
- if (flags & SX) {
- nb = regs->xer & 127;
- if (nb == 0)
- return 1;
- } else {
- unsigned long pc = regs->nip ^ (swiz & 4);
-
- if (__get_user_inatomic(instr,
- (unsigned int __user *)pc))
- return -EFAULT;
- if (swiz == 0 && (flags & SW))
- instr = cpu_to_le32(instr);
- nb = (instr >> 11) & 0x1f;
- if (nb == 0)
- nb = 32;
- }
- if (nb + reg * 4 > 128) {
- nb0 = nb + reg * 4 - 128;
- nb = 128 - reg * 4;
- }
-#ifdef __LITTLE_ENDIAN__
- /*
- * String instructions are endian neutral but the code
- * below is not. Force byte swapping on so that the
- * effects of swizzling are undone in the load/store
- * loops below.
- */
- flags ^= SW;
-#endif
- } else {
- /* lwm, stmw */
- nb = (32 - reg) * 4;
- }
-
- if (!access_ok((flags & ST ? VERIFY_WRITE: VERIFY_READ), addr, nb+nb0))
- return -EFAULT; /* bad address */
-
- rptr = &regs->gpr[reg];
- p = (unsigned long) addr;
- bswiz = (flags & SW)? 3: 0;
-
- if (!(flags & ST)) {
- /*
- * This zeroes the top 4 bytes of the affected registers
- * in 64-bit mode, and also zeroes out any remaining
- * bytes of the last register for lsw*.
- */
- memset(rptr, 0, ((nb + 3) / 4) * sizeof(unsigned long));
- if (nb0 > 0)
- memset(&regs->gpr[0], 0,
- ((nb0 + 3) / 4) * sizeof(unsigned long));
-
- for (i = 0; i < nb; ++i, ++p)
- if (__get_user_inatomic(REG_BYTE(rptr, i ^ bswiz),
- SWIZ_PTR(p)))
- return -EFAULT;
- if (nb0 > 0) {
- rptr = &regs->gpr[0];
- addr += nb;
- for (i = 0; i < nb0; ++i, ++p)
- if (__get_user_inatomic(REG_BYTE(rptr,
- i ^ bswiz),
- SWIZ_PTR(p)))
- return -EFAULT;
- }
-
- } else {
- for (i = 0; i < nb; ++i, ++p)
- if (__put_user_inatomic(REG_BYTE(rptr, i ^ bswiz),
- SWIZ_PTR(p)))
- return -EFAULT;
- if (nb0 > 0) {
- rptr = &regs->gpr[0];
- addr += nb;
- for (i = 0; i < nb0; ++i, ++p)
- if (__put_user_inatomic(REG_BYTE(rptr,
- i ^ bswiz),
- SWIZ_PTR(p)))
- return -EFAULT;
- }
- }
- return 1;
-}
-
-/*
- * Emulate floating-point pair loads and stores.
- * Only POWER6 has these instructions, and it does true little-endian,
- * so we don't need the address swizzling.
- */
-static int emulate_fp_pair(unsigned char __user *addr, unsigned int reg,
- unsigned int flags)
-{
- char *ptr0 = (char *) &current->thread.TS_FPR(reg);
- char *ptr1 = (char *) &current->thread.TS_FPR(reg+1);
- int i, ret, sw = 0;
-
- if (reg & 1)
- return 0; /* invalid form: FRS/FRT must be even */
- if (flags & SW)
- sw = 7;
- ret = 0;
- for (i = 0; i < 8; ++i) {
- if (!(flags & ST)) {
- ret |= __get_user(ptr0[i^sw], addr + i);
- ret |= __get_user(ptr1[i^sw], addr + i + 8);
- } else {
- ret |= __put_user(ptr0[i^sw], addr + i);
- ret |= __put_user(ptr1[i^sw], addr + i + 8);
- }
- }
- if (ret)
- return -EFAULT;
- return 1; /* exception handled and fixed up */
-}
-
-#ifdef CONFIG_PPC64
-static int emulate_lq_stq(struct pt_regs *regs, unsigned char __user *addr,
- unsigned int reg, unsigned int flags)
-{
- char *ptr0 = (char *)&regs->gpr[reg];
- char *ptr1 = (char *)&regs->gpr[reg+1];
- int i, ret, sw = 0;
-
- if (reg & 1)
- return 0; /* invalid form: GPR must be even */
- if (flags & SW)
- sw = 7;
- ret = 0;
- for (i = 0; i < 8; ++i) {
- if (!(flags & ST)) {
- ret |= __get_user(ptr0[i^sw], addr + i);
- ret |= __get_user(ptr1[i^sw], addr + i + 8);
- } else {
- ret |= __put_user(ptr0[i^sw], addr + i);
- ret |= __put_user(ptr1[i^sw], addr + i + 8);
- }
- }
- if (ret)
- return -EFAULT;
- return 1; /* exception handled and fixed up */
-}
-#endif /* CONFIG_PPC64 */
#ifdef CONFIG_SPE
@@ -463,9 +105,8 @@ static struct aligninfo spe_aligninfo[32] = {
* so we don't need the address swizzling.
*/
static int emulate_spe(struct pt_regs *regs, unsigned int reg,
- unsigned int instr)
+ ppc_inst_t ppc_instr)
{
- int ret;
union {
u64 ll;
u32 w[2];
@@ -474,8 +115,9 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg,
} data, temp;
unsigned char __user *p, *addr;
unsigned long *evr = &current->thread.evr[reg];
- unsigned int nb, flags;
+ unsigned int nb, flags, instr;
+ instr = ppc_inst_val(ppc_instr);
instr = (instr >> 1) & 0x1f;
/* DAR has the operand effective address */
@@ -484,12 +126,6 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg,
nb = spe_aligninfo[instr].len;
flags = spe_aligninfo[instr].flags;
- /* Verify the address of the operand */
- if (unlikely(user_mode(regs) &&
- !access_ok((flags & ST ? VERIFY_WRITE : VERIFY_READ),
- addr, nb)))
- return -EFAULT;
-
/* userland only */
if (unlikely(!user_mode(regs)))
return 0;
@@ -527,24 +163,27 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg,
}
} else {
temp.ll = data.ll = 0;
- ret = 0;
p = addr;
+ if (!user_read_access_begin(addr, nb))
+ return -EFAULT;
+
switch (nb) {
case 8:
- ret |= __get_user_inatomic(temp.v[0], p++);
- ret |= __get_user_inatomic(temp.v[1], p++);
- ret |= __get_user_inatomic(temp.v[2], p++);
- ret |= __get_user_inatomic(temp.v[3], p++);
+ unsafe_get_user(temp.v[0], p++, Efault_read);
+ unsafe_get_user(temp.v[1], p++, Efault_read);
+ unsafe_get_user(temp.v[2], p++, Efault_read);
+ unsafe_get_user(temp.v[3], p++, Efault_read);
+ fallthrough;
case 4:
- ret |= __get_user_inatomic(temp.v[4], p++);
- ret |= __get_user_inatomic(temp.v[5], p++);
+ unsafe_get_user(temp.v[4], p++, Efault_read);
+ unsafe_get_user(temp.v[5], p++, Efault_read);
+ fallthrough;
case 2:
- ret |= __get_user_inatomic(temp.v[6], p++);
- ret |= __get_user_inatomic(temp.v[7], p++);
- if (unlikely(ret))
- return -EFAULT;
+ unsafe_get_user(temp.v[6], p++, Efault_read);
+ unsafe_get_user(temp.v[7], p++, Efault_read);
}
+ user_read_access_end();
switch (instr) {
case EVLDD:
@@ -611,119 +250,43 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg,
/* Store result to memory or update registers */
if (flags & ST) {
- ret = 0;
p = addr;
+
+ if (!user_write_access_begin(addr, nb))
+ return -EFAULT;
+
switch (nb) {
case 8:
- ret |= __put_user_inatomic(data.v[0], p++);
- ret |= __put_user_inatomic(data.v[1], p++);
- ret |= __put_user_inatomic(data.v[2], p++);
- ret |= __put_user_inatomic(data.v[3], p++);
+ unsafe_put_user(data.v[0], p++, Efault_write);
+ unsafe_put_user(data.v[1], p++, Efault_write);
+ unsafe_put_user(data.v[2], p++, Efault_write);
+ unsafe_put_user(data.v[3], p++, Efault_write);
+ fallthrough;
case 4:
- ret |= __put_user_inatomic(data.v[4], p++);
- ret |= __put_user_inatomic(data.v[5], p++);
+ unsafe_put_user(data.v[4], p++, Efault_write);
+ unsafe_put_user(data.v[5], p++, Efault_write);
+ fallthrough;
case 2:
- ret |= __put_user_inatomic(data.v[6], p++);
- ret |= __put_user_inatomic(data.v[7], p++);
+ unsafe_put_user(data.v[6], p++, Efault_write);
+ unsafe_put_user(data.v[7], p++, Efault_write);
}
- if (unlikely(ret))
- return -EFAULT;
+ user_write_access_end();
} else {
*evr = data.w[0];
regs->gpr[reg] = data.w[1];
}
return 1;
-}
-#endif /* CONFIG_SPE */
-#ifdef CONFIG_VSX
-/*
- * Emulate VSX instructions...
- */
-static int emulate_vsx(unsigned char __user *addr, unsigned int reg,
- unsigned int areg, struct pt_regs *regs,
- unsigned int flags, unsigned int length,
- unsigned int elsize)
-{
- char *ptr;
- unsigned long *lptr;
- int ret = 0;
- int sw = 0;
- int i, j;
+Efault_read:
+ user_read_access_end();
+ return -EFAULT;
- /* userland only */
- if (unlikely(!user_mode(regs)))
- return 0;
-
- flush_vsx_to_thread(current);
-
- if (reg < 32)
- ptr = (char *) &current->thread.fp_state.fpr[reg][0];
- else
- ptr = (char *) &current->thread.vr_state.vr[reg - 32];
-
- lptr = (unsigned long *) ptr;
-
-#ifdef __LITTLE_ENDIAN__
- if (flags & SW) {
- elsize = length;
- sw = length-1;
- } else {
- /*
- * The elements are BE ordered, even in LE mode, so process
- * them in reverse order.
- */
- addr += length - elsize;
-
- /* 8 byte memory accesses go in the top 8 bytes of the VR */
- if (length == 8)
- ptr += 8;
- }
-#else
- if (flags & SW)
- sw = elsize-1;
-#endif
-
- for (j = 0; j < length; j += elsize) {
- for (i = 0; i < elsize; ++i) {
- if (flags & ST)
- ret |= __put_user(ptr[i^sw], addr + i);
- else
- ret |= __get_user(ptr[i^sw], addr + i);
- }
- ptr += elsize;
-#ifdef __LITTLE_ENDIAN__
- addr -= elsize;
-#else
- addr += elsize;
-#endif
- }
-
-#ifdef __BIG_ENDIAN__
-#define VSX_HI 0
-#define VSX_LO 1
-#else
-#define VSX_HI 1
-#define VSX_LO 0
-#endif
-
- if (!ret) {
- if (flags & U)
- regs->gpr[areg] = regs->dar;
-
- /* Splat load copies the same data to top and bottom 8 bytes */
- if (flags & SPLT)
- lptr[VSX_LO] = lptr[VSX_HI];
- /* For 8 byte loads, zero the low 8 bytes */
- else if (!(flags & ST) && (8 == length))
- lptr[VSX_LO] = 0;
- } else
- return -EFAULT;
-
- return 1;
+Efault_write:
+ user_write_access_end();
+ return -EFAULT;
}
-#endif
+#endif /* CONFIG_SPE */
/*
* Called on alignment exception. Attempts to fixup
@@ -731,312 +294,71 @@ static int emulate_vsx(unsigned char __user *addr, unsigned int reg,
* Return 1 on success
* Return 0 if unable to handle the interrupt
* Return -EFAULT if data address is bad
+ * Other negative return values indicate that the instruction can't
+ * be emulated, and the process should be given a SIGBUS.
*/
int fix_alignment(struct pt_regs *regs)
{
- unsigned int instr, nb, flags, instruction = 0;
- unsigned int reg, areg;
- unsigned int dsisr;
- unsigned char __user *addr;
- unsigned long p, swiz;
- int ret, i;
- union data {
- u64 ll;
- double dd;
- unsigned char v[8];
- struct {
-#ifdef __LITTLE_ENDIAN__
- int low32;
- unsigned hi32;
-#else
- unsigned hi32;
- int low32;
-#endif
- } x32;
- struct {
-#ifdef __LITTLE_ENDIAN__
- short low16;
- unsigned char hi48[6];
-#else
- unsigned char hi48[6];
- short low16;
-#endif
- } x16;
- } data;
-
- /*
- * We require a complete register set, if not, then our assembly
- * is broken
- */
- CHECK_FULL_REGS(regs);
+ ppc_inst_t instr;
+ struct instruction_op op;
+ int r, type;
- dsisr = regs->dsisr;
-
- /* Some processors don't provide us with a DSISR we can use here,
- * let's make one up from the instruction
- */
- if (cpu_has_feature(CPU_FTR_NODSISRALIGN)) {
- unsigned long pc = regs->nip;
+ if (is_kernel_addr(regs->nip))
+ r = copy_inst_from_kernel_nofault(&instr, (void *)regs->nip);
+ else
+ r = __get_user_instr(instr, (void __user *)regs->nip);
- if (cpu_has_feature(CPU_FTR_PPC_LE) && (regs->msr & MSR_LE))
- pc ^= 4;
- if (unlikely(__get_user_inatomic(instr,
- (unsigned int __user *)pc)))
- return -EFAULT;
- if (cpu_has_feature(CPU_FTR_REAL_LE) && (regs->msr & MSR_LE))
- instr = cpu_to_le32(instr);
- dsisr = make_dsisr(instr);
- instruction = instr;
+ if (unlikely(r))
+ return -EFAULT;
+ if ((regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE)) {
+ /* We don't handle PPC little-endian any more... */
+ if (cpu_has_feature(CPU_FTR_PPC_LE))
+ return -EIO;
+ instr = ppc_inst_swab(instr);
}
- /* extract the operation and registers from the dsisr */
- reg = (dsisr >> 5) & 0x1f; /* source/dest register */
- areg = dsisr & 0x1f; /* register to update */
-
#ifdef CONFIG_SPE
- if ((instr >> 26) == 0x4) {
+ if (ppc_inst_primary_opcode(instr) == 0x4) {
+ int reg = (ppc_inst_val(instr) >> 21) & 0x1f;
PPC_WARN_ALIGNMENT(spe, regs);
return emulate_spe(regs, reg, instr);
}
#endif
- instr = (dsisr >> 10) & 0x7f;
- instr |= (dsisr >> 13) & 0x60;
-
- /* Lookup the operation in our table */
- nb = aligninfo[instr].len;
- flags = aligninfo[instr].flags;
-
- /* ldbrx/stdbrx overlap lfs/stfs in the DSISR unfortunately */
- if (IS_XFORM(instruction) && ((instruction >> 1) & 0x3ff) == 532) {
- nb = 8;
- flags = LD+SW;
- } else if (IS_XFORM(instruction) &&
- ((instruction >> 1) & 0x3ff) == 660) {
- nb = 8;
- flags = ST+SW;
- }
-
- /* Byteswap little endian loads and stores */
- swiz = 0;
- if ((regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE)) {
- flags ^= SW;
-#ifdef __BIG_ENDIAN__
- /*
- * So-called "PowerPC little endian" mode works by
- * swizzling addresses rather than by actually doing
- * any byte-swapping. To emulate this, we XOR each
- * byte address with 7. We also byte-swap, because
- * the processor's address swizzling depends on the
- * operand size (it xors the address with 7 for bytes,
- * 6 for halfwords, 4 for words, 0 for doublewords) but
- * we will xor with 7 and load/store each byte separately.
- */
- if (cpu_has_feature(CPU_FTR_PPC_LE))
- swiz = 7;
-#endif
- }
-
- /* DAR has the operand effective address */
- addr = (unsigned char __user *)regs->dar;
-#ifdef CONFIG_VSX
- if ((instruction & 0xfc00003e) == 0x7c000018) {
- unsigned int elsize;
-
- /* Additional register addressing bit (64 VSX vs 32 FPR/GPR) */
- reg |= (instruction & 0x1) << 5;
- /* Simple inline decoder instead of a table */
- /* VSX has only 8 and 16 byte memory accesses */
- nb = 8;
- if (instruction & 0x200)
- nb = 16;
-
- /* Vector stores in little-endian mode swap individual
- elements, so process them separately */
- elsize = 4;
- if (instruction & 0x80)
- elsize = 8;
-
- flags = 0;
- if ((regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE))
- flags |= SW;
- if (instruction & 0x100)
- flags |= ST;
- if (instruction & 0x040)
- flags |= U;
- /* splat load needs a special decoder */
- if ((instruction & 0x400) == 0){
- flags |= SPLT;
- nb = 8;
- }
- PPC_WARN_ALIGNMENT(vsx, regs);
- return emulate_vsx(addr, reg, areg, regs, flags, nb, elsize);
- }
-#endif
- /* A size of 0 indicates an instruction we don't support, with
- * the exception of DCBZ which is handled as a special case here
- */
- if (instr == DCBZ) {
- PPC_WARN_ALIGNMENT(dcbz, regs);
- return emulate_dcbz(regs, addr);
- }
- if (unlikely(nb == 0))
- return 0;
-
- /* Load/Store Multiple instructions are handled in their own
- * function
- */
- if (flags & M) {
- PPC_WARN_ALIGNMENT(multiple, regs);
- return emulate_multiple(regs, addr, reg, nb,
- flags, instr, swiz);
- }
-
- /* Verify the address of the operand */
- if (unlikely(user_mode(regs) &&
- !access_ok((flags & ST ? VERIFY_WRITE : VERIFY_READ),
- addr, nb)))
- return -EFAULT;
-
- /* Force the fprs into the save area so we can reference them */
- if (flags & F) {
- /* userland only */
- if (unlikely(!user_mode(regs)))
- return 0;
- flush_fp_to_thread(current);
- }
-
- if ((nb == 16)) {
- if (flags & F) {
- /* Special case for 16-byte FP loads and stores */
- PPC_WARN_ALIGNMENT(fp_pair, regs);
- return emulate_fp_pair(addr, reg, flags);
- } else {
-#ifdef CONFIG_PPC64
- /* Special case for 16-byte loads and stores */
- PPC_WARN_ALIGNMENT(lq_stq, regs);
- return emulate_lq_stq(regs, addr, reg, flags);
-#else
- return 0;
-#endif
- }
- }
-
- PPC_WARN_ALIGNMENT(unaligned, regs);
-
- /* If we are loading, get the data from user space, else
- * get it from register values
+ /*
+ * ISA 3.0 (such as P9) copy, copy_first, paste and paste_last alignment
+ * check.
+ *
+ * Send a SIGBUS to the process that caused the fault.
+ *
+ * We do not emulate these because paste may contain additional metadata
+ * when pasting to a co-processor. Furthermore, paste_last is the
+ * synchronisation point for preceding copy/paste sequences.
*/
- if (!(flags & ST)) {
- unsigned int start = 0;
-
- switch (nb) {
- case 4:
- start = offsetof(union data, x32.low32);
- break;
- case 2:
- start = offsetof(union data, x16.low16);
- break;
- }
-
- data.ll = 0;
- ret = 0;
- p = (unsigned long)addr;
-
- for (i = 0; i < nb; i++)
- ret |= __get_user_inatomic(data.v[start + i],
- SWIZ_PTR(p++));
-
- if (unlikely(ret))
- return -EFAULT;
+ if ((ppc_inst_val(instr) & 0xfc0006fe) == (PPC_INST_COPY & 0xfc0006fe))
+ return -EIO;
- } else if (flags & F) {
- data.ll = current->thread.TS_FPR(reg);
- if (flags & S) {
- /* Single-precision FP store requires conversion... */
-#ifdef CONFIG_PPC_FPU
- preempt_disable();
- enable_kernel_fp();
- cvt_df(&data.dd, (float *)&data.x32.low32);
- preempt_enable();
-#else
- return 0;
-#endif
- }
- } else
- data.ll = regs->gpr[reg];
+ r = analyse_instr(&op, regs, instr);
+ if (r < 0)
+ return -EINVAL;
- if (flags & SW) {
- switch (nb) {
- case 8:
- data.ll = swab64(data.ll);
- break;
- case 4:
- data.x32.low32 = swab32(data.x32.low32);
- break;
- case 2:
- data.x16.low16 = swab16(data.x16.low16);
- break;
- }
- }
-
- /* Perform other misc operations like sign extension
- * or floating point single precision conversion
- */
- switch (flags & ~(U|SW)) {
- case LD+SE: /* sign extending integer loads */
- case LD+F+SE: /* sign extend for lfiwax */
- if ( nb == 2 )
- data.ll = data.x16.low16;
- else /* nb must be 4 */
- data.ll = data.x32.low32;
- break;
-
- /* Single-precision FP load requires conversion... */
- case LD+F+S:
-#ifdef CONFIG_PPC_FPU
- preempt_disable();
- enable_kernel_fp();
- cvt_fd((float *)&data.x32.low32, &data.dd);
- preempt_enable();
-#else
- return 0;
-#endif
- break;
+ type = GETTYPE(op.type);
+ if (!OP_IS_LOAD_STORE(type)) {
+ if (op.type != CACHEOP + DCBZ)
+ return -EINVAL;
+ PPC_WARN_ALIGNMENT(dcbz, regs);
+ WARN_ON_ONCE(!user_mode(regs));
+ r = emulate_dcbz(op.ea, regs);
+ } else {
+ if (type == LARX || type == STCX)
+ return -EIO;
+ PPC_WARN_ALIGNMENT(unaligned, regs);
+ r = emulate_loadstore(regs, &op);
}
- /* Store result to memory or update registers */
- if (flags & ST) {
- unsigned int start = 0;
-
- switch (nb) {
- case 4:
- start = offsetof(union data, x32.low32);
- break;
- case 2:
- start = offsetof(union data, x16.low16);
- break;
- }
-
- ret = 0;
- p = (unsigned long)addr;
-
- for (i = 0; i < nb; i++)
- ret |= __put_user_inatomic(data.v[start + i],
- SWIZ_PTR(p++));
-
- if (unlikely(ret))
- return -EFAULT;
- } else if (flags & F)
- current->thread.TS_FPR(reg) = data.ll;
- else
- regs->gpr[reg] = data.ll;
-
- /* Update RA as needed */
- if (flags & U)
- regs->gpr[areg] = regs->dar;
-
- return 1;
+ if (!r)
+ return 1;
+ return r;
}
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 9d7dede2847c..a4bc80b30410 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* This program is used to generate definitions needed by
* assembly language modules.
@@ -6,13 +7,10 @@
* generate asm statements containing #defines,
* compile this file to assembler, and then extract the
* #defines from the assembly-language output.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
+#define COMPILE_OFFSETS
+#include <linux/compat.h>
#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/kernel.h>
@@ -31,17 +29,16 @@
#include <asm/io.h>
#include <asm/page.h>
-#include <asm/pgtable.h>
#include <asm/processor.h>
#include <asm/cputable.h>
#include <asm/thread_info.h>
#include <asm/rtas.h>
#include <asm/vdso_datapage.h>
+#include <asm/dbell.h>
#ifdef CONFIG_PPC64
#include <asm/paca.h>
#include <asm/lppaca.h>
#include <asm/cache.h>
-#include <asm/compat.h>
#include <asm/mmu.h>
#include <asm/hvcall.h>
#include <asm/xics.h>
@@ -58,324 +55,279 @@
#endif
#ifdef CONFIG_PPC32
-#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+#ifdef CONFIG_BOOKE
#include "head_booke.h"
#endif
#endif
-#if defined(CONFIG_PPC_FSL_BOOK3E)
+#if defined(CONFIG_PPC_E500)
#include "../mm/mmu_decl.h"
#endif
+#ifdef CONFIG_PPC_8xx
+#include <asm/fixmap.h>
+#endif
+
+#ifdef CONFIG_XMON
+#include "../xmon/xmon_bpts.h"
+#endif
+
+#define STACK_PT_REGS_OFFSET(sym, val) \
+ DEFINE(sym, STACK_INT_FRAME_REGS + offsetof(struct pt_regs, val))
+
int main(void)
{
- DEFINE(THREAD, offsetof(struct task_struct, thread));
- DEFINE(MM, offsetof(struct task_struct, mm));
- DEFINE(MMCONTEXTID, offsetof(struct mm_struct, context.id));
+ OFFSET(THREAD, task_struct, thread);
+ OFFSET(MM, task_struct, mm);
+#ifdef CONFIG_STACKPROTECTOR
+ OFFSET(TASK_CANARY, task_struct, stack_canary);
#ifdef CONFIG_PPC64
- DEFINE(AUDITCONTEXT, offsetof(struct task_struct, audit_context));
- DEFINE(SIGSEGV, SIGSEGV);
- DEFINE(NMI_MASK, NMI_MASK);
- DEFINE(THREAD_DSCR, offsetof(struct thread_struct, dscr));
- DEFINE(THREAD_DSCR_INHERIT, offsetof(struct thread_struct, dscr_inherit));
- DEFINE(TASKTHREADPPR, offsetof(struct task_struct, thread.ppr));
-#else
- DEFINE(THREAD_INFO, offsetof(struct task_struct, stack));
- DEFINE(THREAD_INFO_GAP, _ALIGN_UP(sizeof(struct thread_info), 16));
- DEFINE(KSP_LIMIT, offsetof(struct thread_struct, ksp_limit));
+ OFFSET(PACA_CANARY, paca_struct, canary);
+#endif
+#endif
+#ifdef CONFIG_PPC32
+#ifdef CONFIG_PPC_RTAS
+ OFFSET(RTAS_SP, thread_struct, rtas_sp);
+#endif
#endif /* CONFIG_PPC64 */
+ OFFSET(TASK_STACK, task_struct, stack);
+#ifdef CONFIG_SMP
+ OFFSET(TASK_CPU, task_struct, thread_info.cpu);
+#endif
+
+#ifdef CONFIG_LIVEPATCH_64
+ OFFSET(TI_livepatch_sp, thread_info, livepatch_sp);
+#endif
- DEFINE(KSP, offsetof(struct thread_struct, ksp));
- DEFINE(PT_REGS, offsetof(struct thread_struct, regs));
+ OFFSET(KSP, thread_struct, ksp);
+ OFFSET(PT_REGS, thread_struct, regs);
#ifdef CONFIG_BOOKE
- DEFINE(THREAD_NORMSAVES, offsetof(struct thread_struct, normsave[0]));
+ OFFSET(THREAD_NORMSAVES, thread_struct, normsave[0]);
+#endif
+#ifdef CONFIG_PPC_FPU
+ OFFSET(THREAD_FPEXC_MODE, thread_struct, fpexc_mode);
+ OFFSET(THREAD_FPSTATE, thread_struct, fp_state.fpr);
+ OFFSET(THREAD_FPSAVEAREA, thread_struct, fp_save_area);
#endif
- DEFINE(THREAD_FPEXC_MODE, offsetof(struct thread_struct, fpexc_mode));
- DEFINE(THREAD_FPSTATE, offsetof(struct thread_struct, fp_state));
- DEFINE(THREAD_FPSAVEAREA, offsetof(struct thread_struct, fp_save_area));
- DEFINE(FPSTATE_FPSCR, offsetof(struct thread_fp_state, fpscr));
+ OFFSET(FPSTATE_FPSCR, thread_fp_state, fpscr);
+ OFFSET(THREAD_LOAD_FP, thread_struct, load_fp);
#ifdef CONFIG_ALTIVEC
- DEFINE(THREAD_VRSTATE, offsetof(struct thread_struct, vr_state));
- DEFINE(THREAD_VRSAVEAREA, offsetof(struct thread_struct, vr_save_area));
- DEFINE(THREAD_VRSAVE, offsetof(struct thread_struct, vrsave));
- DEFINE(THREAD_USED_VR, offsetof(struct thread_struct, used_vr));
- DEFINE(VRSTATE_VSCR, offsetof(struct thread_vr_state, vscr));
+ OFFSET(THREAD_VRSTATE, thread_struct, vr_state.vr);
+ OFFSET(THREAD_VRSAVEAREA, thread_struct, vr_save_area);
+ OFFSET(THREAD_USED_VR, thread_struct, used_vr);
+ OFFSET(VRSTATE_VSCR, thread_vr_state, vscr);
+ OFFSET(THREAD_LOAD_VEC, thread_struct, load_vec);
#endif /* CONFIG_ALTIVEC */
#ifdef CONFIG_VSX
- DEFINE(THREAD_USED_VSR, offsetof(struct thread_struct, used_vsr));
+ OFFSET(THREAD_USED_VSR, thread_struct, used_vsr);
#endif /* CONFIG_VSX */
#ifdef CONFIG_PPC64
- DEFINE(KSP_VSID, offsetof(struct thread_struct, ksp_vsid));
+ OFFSET(KSP_VSID, thread_struct, ksp_vsid);
#else /* CONFIG_PPC64 */
- DEFINE(PGDIR, offsetof(struct thread_struct, pgdir));
+ OFFSET(PGDIR, thread_struct, pgdir);
+ OFFSET(SRR0, thread_struct, srr0);
+ OFFSET(SRR1, thread_struct, srr1);
+ OFFSET(DAR, thread_struct, dar);
+ OFFSET(DSISR, thread_struct, dsisr);
+#ifdef CONFIG_PPC_BOOK3S_32
+ OFFSET(THR0, thread_struct, r0);
+ OFFSET(THR3, thread_struct, r3);
+ OFFSET(THR4, thread_struct, r4);
+ OFFSET(THR5, thread_struct, r5);
+ OFFSET(THR6, thread_struct, r6);
+ OFFSET(THR8, thread_struct, r8);
+ OFFSET(THR9, thread_struct, r9);
+ OFFSET(THR11, thread_struct, r11);
+ OFFSET(THLR, thread_struct, lr);
+ OFFSET(THCTR, thread_struct, ctr);
+ OFFSET(THSR0, thread_struct, sr0);
+#endif
#ifdef CONFIG_SPE
- DEFINE(THREAD_EVR0, offsetof(struct thread_struct, evr[0]));
- DEFINE(THREAD_ACC, offsetof(struct thread_struct, acc));
- DEFINE(THREAD_SPEFSCR, offsetof(struct thread_struct, spefscr));
- DEFINE(THREAD_USED_SPE, offsetof(struct thread_struct, used_spe));
+ OFFSET(THREAD_EVR0, thread_struct, evr[0]);
+ OFFSET(THREAD_ACC, thread_struct, acc);
+ OFFSET(THREAD_USED_SPE, thread_struct, used_spe);
#endif /* CONFIG_SPE */
#endif /* CONFIG_PPC64 */
-#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
- DEFINE(THREAD_DBCR0, offsetof(struct thread_struct, debug.dbcr0));
-#endif
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
- DEFINE(THREAD_KVM_SVCPU, offsetof(struct thread_struct, kvm_shadow_vcpu));
+ OFFSET(THREAD_KVM_SVCPU, thread_struct, kvm_shadow_vcpu);
#endif
#if defined(CONFIG_KVM) && defined(CONFIG_BOOKE)
- DEFINE(THREAD_KVM_VCPU, offsetof(struct thread_struct, kvm_vcpu));
+ OFFSET(THREAD_KVM_VCPU, thread_struct, kvm_vcpu);
#endif
-#ifdef CONFIG_PPC_BOOK3S_64
- DEFINE(THREAD_TAR, offsetof(struct thread_struct, tar));
- DEFINE(THREAD_BESCR, offsetof(struct thread_struct, bescr));
- DEFINE(THREAD_EBBHR, offsetof(struct thread_struct, ebbhr));
- DEFINE(THREAD_EBBRR, offsetof(struct thread_struct, ebbrr));
- DEFINE(THREAD_SIAR, offsetof(struct thread_struct, siar));
- DEFINE(THREAD_SDAR, offsetof(struct thread_struct, sdar));
- DEFINE(THREAD_SIER, offsetof(struct thread_struct, sier));
- DEFINE(THREAD_MMCR0, offsetof(struct thread_struct, mmcr0));
- DEFINE(THREAD_MMCR2, offsetof(struct thread_struct, mmcr2));
-#endif
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- DEFINE(PACATMSCRATCH, offsetof(struct paca_struct, tm_scratch));
- DEFINE(THREAD_TM_TFHAR, offsetof(struct thread_struct, tm_tfhar));
- DEFINE(THREAD_TM_TEXASR, offsetof(struct thread_struct, tm_texasr));
- DEFINE(THREAD_TM_TFIAR, offsetof(struct thread_struct, tm_tfiar));
- DEFINE(THREAD_TM_TAR, offsetof(struct thread_struct, tm_tar));
- DEFINE(THREAD_TM_PPR, offsetof(struct thread_struct, tm_ppr));
- DEFINE(THREAD_TM_DSCR, offsetof(struct thread_struct, tm_dscr));
- DEFINE(PT_CKPT_REGS, offsetof(struct thread_struct, ckpt_regs));
- DEFINE(THREAD_TRANSACT_VRSTATE, offsetof(struct thread_struct,
- transact_vr));
- DEFINE(THREAD_TRANSACT_VRSAVE, offsetof(struct thread_struct,
- transact_vrsave));
- DEFINE(THREAD_TRANSACT_FPSTATE, offsetof(struct thread_struct,
- transact_fp));
- /* Local pt_regs on stack for Transactional Memory funcs. */
- DEFINE(TM_FRAME_SIZE, STACK_FRAME_OVERHEAD +
- sizeof(struct pt_regs) + 16);
+ OFFSET(PACATMSCRATCH, paca_struct, tm_scratch);
+ OFFSET(THREAD_TM_TFHAR, thread_struct, tm_tfhar);
+ OFFSET(THREAD_TM_TEXASR, thread_struct, tm_texasr);
+ OFFSET(THREAD_TM_TFIAR, thread_struct, tm_tfiar);
+ OFFSET(THREAD_TM_TAR, thread_struct, tm_tar);
+ OFFSET(THREAD_TM_PPR, thread_struct, tm_ppr);
+ OFFSET(THREAD_TM_DSCR, thread_struct, tm_dscr);
+ OFFSET(THREAD_TM_AMR, thread_struct, tm_amr);
+ OFFSET(PT_CKPT_REGS, thread_struct, ckpt_regs);
+ OFFSET(THREAD_CKVRSTATE, thread_struct, ckvr_state.vr);
+ OFFSET(THREAD_CKVRSAVE, thread_struct, ckvrsave);
+ OFFSET(THREAD_CKFPSTATE, thread_struct, ckfp_state.fpr);
+ /* Local pt_regs on stack in int frame form, plus 16 bytes for TM */
+ DEFINE(TM_FRAME_SIZE, STACK_INT_FRAME_SIZE + 16);
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
- DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
- DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags));
- DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
- DEFINE(TI_TASK, offsetof(struct thread_info, task));
- DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
+ OFFSET(TI_LOCAL_FLAGS, thread_info, local_flags);
#ifdef CONFIG_PPC64
- DEFINE(DCACHEL1LINESIZE, offsetof(struct ppc64_caches, dline_size));
- DEFINE(DCACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_dline_size));
- DEFINE(DCACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, dlines_per_page));
- DEFINE(ICACHEL1LINESIZE, offsetof(struct ppc64_caches, iline_size));
- DEFINE(ICACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_iline_size));
- DEFINE(ICACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, ilines_per_page));
+ OFFSET(DCACHEL1BLOCKSIZE, ppc64_caches, l1d.block_size);
+ OFFSET(DCACHEL1LOGBLOCKSIZE, ppc64_caches, l1d.log_block_size);
/* paca */
- DEFINE(PACA_SIZE, sizeof(struct paca_struct));
- DEFINE(PACA_LOCK_TOKEN, offsetof(struct paca_struct, lock_token));
- DEFINE(PACAPACAINDEX, offsetof(struct paca_struct, paca_index));
- DEFINE(PACAPROCSTART, offsetof(struct paca_struct, cpu_start));
- DEFINE(PACAKSAVE, offsetof(struct paca_struct, kstack));
- DEFINE(PACACURRENT, offsetof(struct paca_struct, __current));
- DEFINE(PACASAVEDMSR, offsetof(struct paca_struct, saved_msr));
- DEFINE(PACASTABRR, offsetof(struct paca_struct, stab_rr));
- DEFINE(PACAR1, offsetof(struct paca_struct, saved_r1));
- DEFINE(PACATOC, offsetof(struct paca_struct, kernel_toc));
- DEFINE(PACAKBASE, offsetof(struct paca_struct, kernelbase));
- DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr));
- DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled));
- DEFINE(PACAIRQHAPPENED, offsetof(struct paca_struct, irq_happened));
- DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
-#ifdef CONFIG_PPC_MM_SLICES
- DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct,
- context.low_slices_psize));
- DEFINE(PACAHIGHSLICEPSIZE, offsetof(struct paca_struct,
- context.high_slices_psize));
- DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def));
-#endif /* CONFIG_PPC_MM_SLICES */
-
-#ifdef CONFIG_PPC_BOOK3E
- DEFINE(PACAPGD, offsetof(struct paca_struct, pgd));
- DEFINE(PACA_KERNELPGD, offsetof(struct paca_struct, kernel_pgd));
- DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen));
- DEFINE(PACA_EXTLB, offsetof(struct paca_struct, extlb));
- DEFINE(PACA_EXMC, offsetof(struct paca_struct, exmc));
- DEFINE(PACA_EXCRIT, offsetof(struct paca_struct, excrit));
- DEFINE(PACA_EXDBG, offsetof(struct paca_struct, exdbg));
- DEFINE(PACA_MC_STACK, offsetof(struct paca_struct, mc_kstack));
- DEFINE(PACA_CRIT_STACK, offsetof(struct paca_struct, crit_kstack));
- DEFINE(PACA_DBG_STACK, offsetof(struct paca_struct, dbg_kstack));
- DEFINE(PACA_TCD_PTR, offsetof(struct paca_struct, tcd_ptr));
-
- DEFINE(TCD_ESEL_NEXT,
- offsetof(struct tlb_core_data, esel_next));
- DEFINE(TCD_ESEL_MAX,
- offsetof(struct tlb_core_data, esel_max));
- DEFINE(TCD_ESEL_FIRST,
- offsetof(struct tlb_core_data, esel_first));
- DEFINE(TCD_LOCK, offsetof(struct tlb_core_data, lock));
-#endif /* CONFIG_PPC_BOOK3E */
-
-#ifdef CONFIG_PPC_STD_MMU_64
- DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache));
- DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
- DEFINE(PACAVMALLOCSLLP, offsetof(struct paca_struct, vmalloc_sllp));
-#ifdef CONFIG_PPC_MM_SLICES
- DEFINE(MMUPSIZESLLP, offsetof(struct mmu_psize_def, sllp));
-#else
- DEFINE(PACACONTEXTSLLP, offsetof(struct paca_struct, context.sllp));
-#endif /* CONFIG_PPC_MM_SLICES */
- DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen));
- DEFINE(PACA_EXMC, offsetof(struct paca_struct, exmc));
- DEFINE(PACA_EXSLB, offsetof(struct paca_struct, exslb));
- DEFINE(PACALPPACAPTR, offsetof(struct paca_struct, lppaca_ptr));
- DEFINE(PACA_SLBSHADOWPTR, offsetof(struct paca_struct, slb_shadow_ptr));
- DEFINE(SLBSHADOW_STACKVSID,
- offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid));
- DEFINE(SLBSHADOW_STACKESID,
- offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid));
- DEFINE(SLBSHADOW_SAVEAREA, offsetof(struct slb_shadow, save_area));
- DEFINE(LPPACA_PMCINUSE, offsetof(struct lppaca, pmcregs_in_use));
- DEFINE(LPPACA_DTLIDX, offsetof(struct lppaca, dtl_idx));
- DEFINE(LPPACA_YIELDCOUNT, offsetof(struct lppaca, yield_count));
- DEFINE(PACA_DTL_RIDX, offsetof(struct paca_struct, dtl_ridx));
-#endif /* CONFIG_PPC_STD_MMU_64 */
- DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp));
+ OFFSET(PACAPACAINDEX, paca_struct, paca_index);
+ OFFSET(PACAPROCSTART, paca_struct, cpu_start);
+ OFFSET(PACAKSAVE, paca_struct, kstack);
+ OFFSET(PACACURRENT, paca_struct, __current);
+ DEFINE(PACA_THREAD_INFO, offsetof(struct paca_struct, __current) +
+ offsetof(struct task_struct, thread_info));
+ OFFSET(PACASAVEDMSR, paca_struct, saved_msr);
+ OFFSET(PACAR1, paca_struct, saved_r1);
+#ifndef CONFIG_PPC_KERNEL_PCREL
+ OFFSET(PACATOC, paca_struct, kernel_toc);
+#endif
+ OFFSET(PACAKBASE, paca_struct, kernelbase);
+ OFFSET(PACAKMSR, paca_struct, kernel_msr);
+#ifdef CONFIG_PPC_BOOK3S_64
+ OFFSET(PACAHSRR_VALID, paca_struct, hsrr_valid);
+ OFFSET(PACASRR_VALID, paca_struct, srr_valid);
+#endif
+ OFFSET(PACAIRQSOFTMASK, paca_struct, irq_soft_mask);
+ OFFSET(PACAIRQHAPPENED, paca_struct, irq_happened);
+ OFFSET(PACA_FTRACE_ENABLED, paca_struct, ftrace_enabled);
+
+#ifdef CONFIG_PPC_BOOK3E_64
+ OFFSET(PACAPGD, paca_struct, pgd);
+ OFFSET(PACA_KERNELPGD, paca_struct, kernel_pgd);
+ OFFSET(PACA_EXGEN, paca_struct, exgen);
+ OFFSET(PACA_EXTLB, paca_struct, extlb);
+ OFFSET(PACA_EXMC, paca_struct, exmc);
+ OFFSET(PACA_EXCRIT, paca_struct, excrit);
+ OFFSET(PACA_EXDBG, paca_struct, exdbg);
+ OFFSET(PACA_MC_STACK, paca_struct, mc_kstack);
+ OFFSET(PACA_CRIT_STACK, paca_struct, crit_kstack);
+ OFFSET(PACA_DBG_STACK, paca_struct, dbg_kstack);
+ OFFSET(PACA_TCD_PTR, paca_struct, tcd_ptr);
+
+ OFFSET(TCD_ESEL_NEXT, tlb_core_data, esel_next);
+ OFFSET(TCD_ESEL_MAX, tlb_core_data, esel_max);
+ OFFSET(TCD_ESEL_FIRST, tlb_core_data, esel_first);
+#endif /* CONFIG_PPC_BOOK3E_64 */
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ OFFSET(PACA_EXGEN, paca_struct, exgen);
+ OFFSET(PACA_EXMC, paca_struct, exmc);
+ OFFSET(PACA_EXNMI, paca_struct, exnmi);
+#ifdef CONFIG_PPC_64S_HASH_MMU
+ OFFSET(PACA_SLBSHADOWPTR, paca_struct, slb_shadow_ptr);
+ OFFSET(SLBSHADOW_STACKVSID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid);
+ OFFSET(SLBSHADOW_STACKESID, slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid);
+ OFFSET(SLBSHADOW_SAVEAREA, slb_shadow, save_area);
+#endif
+ OFFSET(LPPACA_PMCINUSE, lppaca, pmcregs_in_use);
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ OFFSET(PACA_PMCINUSE, paca_struct, pmcregs_in_use);
+#endif
+ OFFSET(LPPACA_YIELDCOUNT, lppaca, yield_count);
+#endif /* CONFIG_PPC_BOOK3S_64 */
+ OFFSET(PACAEMERGSP, paca_struct, emergency_sp);
#ifdef CONFIG_PPC_BOOK3S_64
- DEFINE(PACAMCEMERGSP, offsetof(struct paca_struct, mc_emergency_sp));
- DEFINE(PACA_IN_MCE, offsetof(struct paca_struct, in_mce));
-#endif
- DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id));
- DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state));
- DEFINE(PACA_DSCR, offsetof(struct paca_struct, dscr_default));
- DEFINE(PACA_STARTTIME, offsetof(struct paca_struct, starttime));
- DEFINE(PACA_STARTTIME_USER, offsetof(struct paca_struct, starttime_user));
- DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time));
- DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time));
- DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save));
- DEFINE(PACA_NAPSTATELOST, offsetof(struct paca_struct, nap_state_lost));
- DEFINE(PACA_SPRG_VDSO, offsetof(struct paca_struct, sprg_vdso));
+ OFFSET(PACAMCEMERGSP, paca_struct, mc_emergency_sp);
+ OFFSET(PACA_NMI_EMERG_SP, paca_struct, nmi_emergency_sp);
+ OFFSET(PACA_IN_MCE, paca_struct, in_mce);
+ OFFSET(PACA_IN_NMI, paca_struct, in_nmi);
+ OFFSET(PACA_RFI_FLUSH_FALLBACK_AREA, paca_struct, rfi_flush_fallback_area);
+ OFFSET(PACA_EXRFI, paca_struct, exrfi);
+ OFFSET(PACA_L1D_FLUSH_SIZE, paca_struct, l1d_flush_size);
+
+#endif
+ OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id);
+ OFFSET(PACAKEXECSTATE, paca_struct, kexec_state);
+ OFFSET(PACA_DSCR_DEFAULT, paca_struct, dscr_default);
+ OFFSET(PACA_EXIT_SAVE_R1, paca_struct, exit_save_r1);
+#ifdef CONFIG_PPC_BOOK3E_64
+ OFFSET(PACA_TRAP_SAVE, paca_struct, trap_save);
+#endif
+ OFFSET(PACA_SPRG_VDSO, paca_struct, sprg_vdso);
+#else /* CONFIG_PPC64 */
#endif /* CONFIG_PPC64 */
/* RTAS */
- DEFINE(RTASBASE, offsetof(struct rtas_t, base));
- DEFINE(RTASENTRY, offsetof(struct rtas_t, entry));
+ OFFSET(RTASBASE, rtas_t, base);
+ OFFSET(RTASENTRY, rtas_t, entry);
/* Interrupt register frame */
DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE);
- DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs));
-#ifdef CONFIG_PPC64
- /* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */
- DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
- DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
-
- /* hcall statistics */
- DEFINE(HCALL_STAT_SIZE, sizeof(struct hcall_stats));
- DEFINE(HCALL_STAT_CALLS, offsetof(struct hcall_stats, num_calls));
- DEFINE(HCALL_STAT_TB, offsetof(struct hcall_stats, tb_total));
- DEFINE(HCALL_STAT_PURR, offsetof(struct hcall_stats, purr_total));
-#endif /* CONFIG_PPC64 */
- DEFINE(GPR0, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[0]));
- DEFINE(GPR1, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[1]));
- DEFINE(GPR2, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[2]));
- DEFINE(GPR3, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[3]));
- DEFINE(GPR4, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[4]));
- DEFINE(GPR5, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[5]));
- DEFINE(GPR6, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[6]));
- DEFINE(GPR7, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[7]));
- DEFINE(GPR8, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[8]));
- DEFINE(GPR9, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[9]));
- DEFINE(GPR10, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[10]));
- DEFINE(GPR11, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[11]));
- DEFINE(GPR12, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[12]));
- DEFINE(GPR13, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[13]));
-#ifndef CONFIG_PPC64
- DEFINE(GPR14, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[14]));
- DEFINE(GPR15, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[15]));
- DEFINE(GPR16, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[16]));
- DEFINE(GPR17, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[17]));
- DEFINE(GPR18, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[18]));
- DEFINE(GPR19, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[19]));
- DEFINE(GPR20, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[20]));
- DEFINE(GPR21, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[21]));
- DEFINE(GPR22, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[22]));
- DEFINE(GPR23, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[23]));
- DEFINE(GPR24, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[24]));
- DEFINE(GPR25, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[25]));
- DEFINE(GPR26, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[26]));
- DEFINE(GPR27, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[27]));
- DEFINE(GPR28, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[28]));
- DEFINE(GPR29, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[29]));
- DEFINE(GPR30, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[30]));
- DEFINE(GPR31, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[31]));
-#endif /* CONFIG_PPC64 */
+ DEFINE(SWITCH_FRAME_SIZE, STACK_SWITCH_FRAME_SIZE);
+ STACK_PT_REGS_OFFSET(GPR0, gpr[0]);
+ STACK_PT_REGS_OFFSET(GPR1, gpr[1]);
+ STACK_PT_REGS_OFFSET(GPR2, gpr[2]);
+ STACK_PT_REGS_OFFSET(GPR3, gpr[3]);
+ STACK_PT_REGS_OFFSET(GPR4, gpr[4]);
+ STACK_PT_REGS_OFFSET(GPR5, gpr[5]);
+ STACK_PT_REGS_OFFSET(GPR6, gpr[6]);
+ STACK_PT_REGS_OFFSET(GPR7, gpr[7]);
+ STACK_PT_REGS_OFFSET(GPR8, gpr[8]);
+ STACK_PT_REGS_OFFSET(GPR9, gpr[9]);
+ STACK_PT_REGS_OFFSET(GPR10, gpr[10]);
+ STACK_PT_REGS_OFFSET(GPR11, gpr[11]);
+ STACK_PT_REGS_OFFSET(GPR12, gpr[12]);
+ STACK_PT_REGS_OFFSET(GPR13, gpr[13]);
/*
* Note: these symbols include _ because they overlap with special
* register names
*/
- DEFINE(_NIP, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, nip));
- DEFINE(_MSR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, msr));
- DEFINE(_CTR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, ctr));
- DEFINE(_LINK, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, link));
- DEFINE(_CCR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, ccr));
- DEFINE(_XER, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, xer));
- DEFINE(_DAR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dar));
- DEFINE(_DSISR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dsisr));
- DEFINE(ORIG_GPR3, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, orig_gpr3));
- DEFINE(RESULT, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, result));
- DEFINE(_TRAP, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, trap));
-#ifndef CONFIG_PPC64
- DEFINE(_MQ, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, mq));
- /*
- * The PowerPC 400-class & Book-E processors have neither the DAR
- * nor the DSISR SPRs. Hence, we overload them to hold the similar
- * DEAR and ESR SPRs for such processors. For critical interrupts
- * we use them to hold SRR0 and SRR1.
- */
- DEFINE(_DEAR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dar));
- DEFINE(_ESR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dsisr));
-#else /* CONFIG_PPC64 */
- DEFINE(SOFTE, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, softe));
+ STACK_PT_REGS_OFFSET(_NIP, nip);
+ STACK_PT_REGS_OFFSET(_MSR, msr);
+ STACK_PT_REGS_OFFSET(_CTR, ctr);
+ STACK_PT_REGS_OFFSET(_LINK, link);
+ STACK_PT_REGS_OFFSET(_CCR, ccr);
+ STACK_PT_REGS_OFFSET(_XER, xer);
+ STACK_PT_REGS_OFFSET(_DAR, dar);
+ STACK_PT_REGS_OFFSET(_DEAR, dear);
+ STACK_PT_REGS_OFFSET(_DSISR, dsisr);
+ STACK_PT_REGS_OFFSET(_ESR, esr);
+ STACK_PT_REGS_OFFSET(ORIG_GPR3, orig_gpr3);
+ STACK_PT_REGS_OFFSET(RESULT, result);
+ STACK_PT_REGS_OFFSET(_TRAP, trap);
+#ifdef CONFIG_PPC64
+ STACK_PT_REGS_OFFSET(SOFTE, softe);
+ STACK_PT_REGS_OFFSET(_PPR, ppr);
+#endif
- /* These _only_ to be used with {PROM,RTAS}_FRAME_SIZE!!! */
- DEFINE(_SRR0, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs));
- DEFINE(_SRR1, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs)+8);
-#endif /* CONFIG_PPC64 */
+#ifdef CONFIG_PPC_PKEY
+ STACK_PT_REGS_OFFSET(STACK_REGS_AMR, amr);
+ STACK_PT_REGS_OFFSET(STACK_REGS_IAMR, iamr);
+#endif
-#if defined(CONFIG_PPC32)
-#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
- DEFINE(EXC_LVL_SIZE, STACK_EXC_LVL_FRAME_SIZE);
- DEFINE(MAS0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas0));
+#if defined(CONFIG_PPC32) && defined(CONFIG_BOOKE)
+ STACK_PT_REGS_OFFSET(MAS0, mas0);
/* we overload MMUCR for 44x on MAS0 since they are mutually exclusive */
- DEFINE(MMUCR, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas0));
- DEFINE(MAS1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas1));
- DEFINE(MAS2, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas2));
- DEFINE(MAS3, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas3));
- DEFINE(MAS6, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas6));
- DEFINE(MAS7, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, mas7));
- DEFINE(_SRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, srr0));
- DEFINE(_SRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, srr1));
- DEFINE(_CSRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, csrr0));
- DEFINE(_CSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, csrr1));
- DEFINE(_DSRR0, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, dsrr0));
- DEFINE(_DSRR1, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, dsrr1));
- DEFINE(SAVED_KSP_LIMIT, STACK_INT_FRAME_SIZE+offsetof(struct exception_regs, saved_ksp_limit));
-#endif
-#endif
- DEFINE(CLONE_VM, CLONE_VM);
- DEFINE(CLONE_UNTRACED, CLONE_UNTRACED);
-
-#ifndef CONFIG_PPC64
- DEFINE(MM_PGD, offsetof(struct mm_struct, pgd));
-#endif /* ! CONFIG_PPC64 */
+ STACK_PT_REGS_OFFSET(MMUCR, mas0);
+ STACK_PT_REGS_OFFSET(MAS1, mas1);
+ STACK_PT_REGS_OFFSET(MAS2, mas2);
+ STACK_PT_REGS_OFFSET(MAS3, mas3);
+ STACK_PT_REGS_OFFSET(MAS6, mas6);
+ STACK_PT_REGS_OFFSET(MAS7, mas7);
+ STACK_PT_REGS_OFFSET(_SRR0, srr0);
+ STACK_PT_REGS_OFFSET(_SRR1, srr1);
+ STACK_PT_REGS_OFFSET(_CSRR0, csrr0);
+ STACK_PT_REGS_OFFSET(_CSRR1, csrr1);
+ STACK_PT_REGS_OFFSET(_DSRR0, dsrr0);
+ STACK_PT_REGS_OFFSET(_DSRR1, dsrr1);
+#endif
/* About the CPU features table */
- DEFINE(CPU_SPEC_FEATURES, offsetof(struct cpu_spec, cpu_features));
- DEFINE(CPU_SPEC_SETUP, offsetof(struct cpu_spec, cpu_setup));
- DEFINE(CPU_SPEC_RESTORE, offsetof(struct cpu_spec, cpu_restore));
+ OFFSET(CPU_SPEC_FEATURES, cpu_spec, cpu_features);
+ OFFSET(CPU_SPEC_SETUP, cpu_spec, cpu_setup);
+ OFFSET(CPU_SPEC_RESTORE, cpu_spec, cpu_restore);
- DEFINE(pbe_address, offsetof(struct pbe, address));
- DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address));
- DEFINE(pbe_next, offsetof(struct pbe, next));
+ OFFSET(pbe_address, pbe, address);
+ OFFSET(pbe_orig_address, pbe, orig_address);
+ OFFSET(pbe_next, pbe, next);
#ifndef CONFIG_PPC64
DEFINE(TASK_SIZE, TASK_SIZE);
@@ -383,205 +335,185 @@ int main(void)
#endif /* ! CONFIG_PPC64 */
/* datapage offsets for use by vdso */
- DEFINE(CFG_TB_ORIG_STAMP, offsetof(struct vdso_data, tb_orig_stamp));
- DEFINE(CFG_TB_TICKS_PER_SEC, offsetof(struct vdso_data, tb_ticks_per_sec));
- DEFINE(CFG_TB_TO_XS, offsetof(struct vdso_data, tb_to_xs));
- DEFINE(CFG_STAMP_XSEC, offsetof(struct vdso_data, stamp_xsec));
- DEFINE(CFG_TB_UPDATE_COUNT, offsetof(struct vdso_data, tb_update_count));
- DEFINE(CFG_TZ_MINUTEWEST, offsetof(struct vdso_data, tz_minuteswest));
- DEFINE(CFG_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime));
- DEFINE(CFG_SYSCALL_MAP32, offsetof(struct vdso_data, syscall_map_32));
- DEFINE(WTOM_CLOCK_SEC, offsetof(struct vdso_data, wtom_clock_sec));
- DEFINE(WTOM_CLOCK_NSEC, offsetof(struct vdso_data, wtom_clock_nsec));
- DEFINE(STAMP_XTIME, offsetof(struct vdso_data, stamp_xtime));
- DEFINE(STAMP_SEC_FRAC, offsetof(struct vdso_data, stamp_sec_fraction));
- DEFINE(CFG_ICACHE_BLOCKSZ, offsetof(struct vdso_data, icache_block_size));
- DEFINE(CFG_DCACHE_BLOCKSZ, offsetof(struct vdso_data, dcache_block_size));
- DEFINE(CFG_ICACHE_LOGBLOCKSZ, offsetof(struct vdso_data, icache_log_block_size));
- DEFINE(CFG_DCACHE_LOGBLOCKSZ, offsetof(struct vdso_data, dcache_log_block_size));
+ OFFSET(CFG_TB_TICKS_PER_SEC, vdso_arch_data, tb_ticks_per_sec);
#ifdef CONFIG_PPC64
- DEFINE(CFG_SYSCALL_MAP64, offsetof(struct vdso_data, syscall_map_64));
- DEFINE(TVAL64_TV_SEC, offsetof(struct timeval, tv_sec));
- DEFINE(TVAL64_TV_USEC, offsetof(struct timeval, tv_usec));
- DEFINE(TVAL32_TV_SEC, offsetof(struct compat_timeval, tv_sec));
- DEFINE(TVAL32_TV_USEC, offsetof(struct compat_timeval, tv_usec));
- DEFINE(TSPC64_TV_SEC, offsetof(struct timespec, tv_sec));
- DEFINE(TSPC64_TV_NSEC, offsetof(struct timespec, tv_nsec));
- DEFINE(TSPC32_TV_SEC, offsetof(struct compat_timespec, tv_sec));
- DEFINE(TSPC32_TV_NSEC, offsetof(struct compat_timespec, tv_nsec));
+ OFFSET(CFG_ICACHE_BLOCKSZ, vdso_arch_data, icache_block_size);
+ OFFSET(CFG_DCACHE_BLOCKSZ, vdso_arch_data, dcache_block_size);
+ OFFSET(CFG_ICACHE_LOGBLOCKSZ, vdso_arch_data, icache_log_block_size);
+ OFFSET(CFG_DCACHE_LOGBLOCKSZ, vdso_arch_data, dcache_log_block_size);
+ OFFSET(CFG_SYSCALL_MAP64, vdso_arch_data, syscall_map);
+ OFFSET(CFG_SYSCALL_MAP32, vdso_arch_data, compat_syscall_map);
#else
- DEFINE(TVAL32_TV_SEC, offsetof(struct timeval, tv_sec));
- DEFINE(TVAL32_TV_USEC, offsetof(struct timeval, tv_usec));
- DEFINE(TSPC32_TV_SEC, offsetof(struct timespec, tv_sec));
- DEFINE(TSPC32_TV_NSEC, offsetof(struct timespec, tv_nsec));
+ OFFSET(CFG_SYSCALL_MAP32, vdso_arch_data, syscall_map);
#endif
- /* timeval/timezone offsets for use by vdso */
- DEFINE(TZONE_TZ_MINWEST, offsetof(struct timezone, tz_minuteswest));
- DEFINE(TZONE_TZ_DSTTIME, offsetof(struct timezone, tz_dsttime));
-
- /* Other bits used by the vdso */
- DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
- DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
- DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
- DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC);
#ifdef CONFIG_BUG
DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry));
#endif
- DEFINE(PGD_TABLE_SIZE, PGD_TABLE_SIZE);
- DEFINE(PTE_SIZE, sizeof(pte_t));
-
#ifdef CONFIG_KVM
- DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
- DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
- DEFINE(VCPU_GUEST_PID, offsetof(struct kvm_vcpu, arch.pid));
- DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
- DEFINE(VCPU_VRSAVE, offsetof(struct kvm_vcpu, arch.vrsave));
- DEFINE(VCPU_FPRS, offsetof(struct kvm_vcpu, arch.fp.fpr));
+ OFFSET(VCPU_HOST_STACK, kvm_vcpu, arch.host_stack);
+ OFFSET(VCPU_HOST_PID, kvm_vcpu, arch.host_pid);
+ OFFSET(VCPU_GUEST_PID, kvm_vcpu, arch.pid);
+ OFFSET(VCPU_GPRS, kvm_vcpu, arch.regs.gpr);
+ OFFSET(VCPU_VRSAVE, kvm_vcpu, arch.vrsave);
+ OFFSET(VCPU_FPRS, kvm_vcpu, arch.fp.fpr);
#ifdef CONFIG_ALTIVEC
- DEFINE(VCPU_VRS, offsetof(struct kvm_vcpu, arch.vr.vr));
+ OFFSET(VCPU_VRS, kvm_vcpu, arch.vr.vr);
#endif
- DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer));
- DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr));
- DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
+ OFFSET(VCPU_XER, kvm_vcpu, arch.regs.xer);
+ OFFSET(VCPU_CTR, kvm_vcpu, arch.regs.ctr);
+ OFFSET(VCPU_LR, kvm_vcpu, arch.regs.link);
#ifdef CONFIG_PPC_BOOK3S
- DEFINE(VCPU_TAR, offsetof(struct kvm_vcpu, arch.tar));
+ OFFSET(VCPU_TAR, kvm_vcpu, arch.tar);
#endif
- DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
- DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc));
+ OFFSET(VCPU_CR, kvm_vcpu, arch.regs.ccr);
+ OFFSET(VCPU_PC, kvm_vcpu, arch.regs.nip);
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.shregs.msr));
- DEFINE(VCPU_SRR0, offsetof(struct kvm_vcpu, arch.shregs.srr0));
- DEFINE(VCPU_SRR1, offsetof(struct kvm_vcpu, arch.shregs.srr1));
- DEFINE(VCPU_SPRG0, offsetof(struct kvm_vcpu, arch.shregs.sprg0));
- DEFINE(VCPU_SPRG1, offsetof(struct kvm_vcpu, arch.shregs.sprg1));
- DEFINE(VCPU_SPRG2, offsetof(struct kvm_vcpu, arch.shregs.sprg2));
- DEFINE(VCPU_SPRG3, offsetof(struct kvm_vcpu, arch.shregs.sprg3));
-#endif
- DEFINE(VCPU_SHARED_SPRG3, offsetof(struct kvm_vcpu_arch_shared, sprg3));
- DEFINE(VCPU_SHARED_SPRG4, offsetof(struct kvm_vcpu_arch_shared, sprg4));
- DEFINE(VCPU_SHARED_SPRG5, offsetof(struct kvm_vcpu_arch_shared, sprg5));
- DEFINE(VCPU_SHARED_SPRG6, offsetof(struct kvm_vcpu_arch_shared, sprg6));
- DEFINE(VCPU_SHARED_SPRG7, offsetof(struct kvm_vcpu_arch_shared, sprg7));
- DEFINE(VCPU_SHADOW_PID, offsetof(struct kvm_vcpu, arch.shadow_pid));
- DEFINE(VCPU_SHADOW_PID1, offsetof(struct kvm_vcpu, arch.shadow_pid1));
- DEFINE(VCPU_SHARED, offsetof(struct kvm_vcpu, arch.shared));
- DEFINE(VCPU_SHARED_MSR, offsetof(struct kvm_vcpu_arch_shared, msr));
- DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr));
+ OFFSET(VCPU_MSR, kvm_vcpu, arch.shregs.msr);
+ OFFSET(VCPU_SRR0, kvm_vcpu, arch.shregs.srr0);
+ OFFSET(VCPU_SRR1, kvm_vcpu, arch.shregs.srr1);
+ OFFSET(VCPU_SPRG0, kvm_vcpu, arch.shregs.sprg0);
+ OFFSET(VCPU_SPRG1, kvm_vcpu, arch.shregs.sprg1);
+ OFFSET(VCPU_SPRG2, kvm_vcpu, arch.shregs.sprg2);
+ OFFSET(VCPU_SPRG3, kvm_vcpu, arch.shregs.sprg3);
+#endif
+#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING
+ OFFSET(VCPU_TB_RMENTRY, kvm_vcpu, arch.rm_entry);
+ OFFSET(VCPU_TB_RMINTR, kvm_vcpu, arch.rm_intr);
+ OFFSET(VCPU_TB_RMEXIT, kvm_vcpu, arch.rm_exit);
+ OFFSET(VCPU_TB_GUEST, kvm_vcpu, arch.guest_time);
+ OFFSET(VCPU_TB_CEDE, kvm_vcpu, arch.cede_time);
+ OFFSET(VCPU_CUR_ACTIVITY, kvm_vcpu, arch.cur_activity);
+ OFFSET(VCPU_ACTIVITY_START, kvm_vcpu, arch.cur_tb_start);
+ OFFSET(TAS_SEQCOUNT, kvmhv_tb_accumulator, seqcount);
+ OFFSET(TAS_TOTAL, kvmhv_tb_accumulator, tb_total);
+ OFFSET(TAS_MIN, kvmhv_tb_accumulator, tb_min);
+ OFFSET(TAS_MAX, kvmhv_tb_accumulator, tb_max);
+#endif
+ OFFSET(VCPU_SHARED_SPRG3, kvm_vcpu_arch_shared, sprg3);
+ OFFSET(VCPU_SHARED_SPRG4, kvm_vcpu_arch_shared, sprg4);
+ OFFSET(VCPU_SHARED_SPRG5, kvm_vcpu_arch_shared, sprg5);
+ OFFSET(VCPU_SHARED_SPRG6, kvm_vcpu_arch_shared, sprg6);
+ OFFSET(VCPU_SHARED_SPRG7, kvm_vcpu_arch_shared, sprg7);
+ OFFSET(VCPU_SHADOW_PID, kvm_vcpu, arch.shadow_pid);
+ OFFSET(VCPU_SHADOW_PID1, kvm_vcpu, arch.shadow_pid1);
+ OFFSET(VCPU_SHARED, kvm_vcpu, arch.shared);
+ OFFSET(VCPU_SHARED_MSR, kvm_vcpu_arch_shared, msr);
+ OFFSET(VCPU_SHADOW_MSR, kvm_vcpu, arch.shadow_msr);
#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_KVM_BOOK3S_PR_POSSIBLE)
- DEFINE(VCPU_SHAREDBE, offsetof(struct kvm_vcpu, arch.shared_big_endian));
+ OFFSET(VCPU_SHAREDBE, kvm_vcpu, arch.shared_big_endian);
#endif
- DEFINE(VCPU_SHARED_MAS0, offsetof(struct kvm_vcpu_arch_shared, mas0));
- DEFINE(VCPU_SHARED_MAS1, offsetof(struct kvm_vcpu_arch_shared, mas1));
- DEFINE(VCPU_SHARED_MAS2, offsetof(struct kvm_vcpu_arch_shared, mas2));
- DEFINE(VCPU_SHARED_MAS7_3, offsetof(struct kvm_vcpu_arch_shared, mas7_3));
- DEFINE(VCPU_SHARED_MAS4, offsetof(struct kvm_vcpu_arch_shared, mas4));
- DEFINE(VCPU_SHARED_MAS6, offsetof(struct kvm_vcpu_arch_shared, mas6));
+ OFFSET(VCPU_SHARED_MAS0, kvm_vcpu_arch_shared, mas0);
+ OFFSET(VCPU_SHARED_MAS1, kvm_vcpu_arch_shared, mas1);
+ OFFSET(VCPU_SHARED_MAS2, kvm_vcpu_arch_shared, mas2);
+ OFFSET(VCPU_SHARED_MAS7_3, kvm_vcpu_arch_shared, mas7_3);
+ OFFSET(VCPU_SHARED_MAS4, kvm_vcpu_arch_shared, mas4);
+ OFFSET(VCPU_SHARED_MAS6, kvm_vcpu_arch_shared, mas6);
- DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm));
- DEFINE(KVM_LPID, offsetof(struct kvm, arch.lpid));
+ OFFSET(VCPU_KVM, kvm_vcpu, kvm);
+ OFFSET(KVM_LPID, kvm, arch.lpid);
/* book3s */
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- DEFINE(KVM_SDR1, offsetof(struct kvm, arch.sdr1));
- DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid));
- DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr));
- DEFINE(KVM_HOST_SDR1, offsetof(struct kvm, arch.host_sdr1));
- DEFINE(KVM_TLBIE_LOCK, offsetof(struct kvm, arch.tlbie_lock));
- DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits));
- DEFINE(KVM_ENABLED_HCALLS, offsetof(struct kvm, arch.enabled_hcalls));
- DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr));
- DEFINE(KVM_RMOR, offsetof(struct kvm, arch.rmor));
- DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v));
- DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr));
- DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar));
- DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr));
- DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty));
+ OFFSET(KVM_SDR1, kvm, arch.sdr1);
+ OFFSET(KVM_HOST_LPID, kvm, arch.host_lpid);
+ OFFSET(KVM_HOST_LPCR, kvm, arch.host_lpcr);
+ OFFSET(KVM_HOST_SDR1, kvm, arch.host_sdr1);
+ OFFSET(KVM_ENABLED_HCALLS, kvm, arch.enabled_hcalls);
+ OFFSET(KVM_VRMA_SLB_V, kvm, arch.vrma_slb_v);
+ OFFSET(KVM_SECURE_GUEST, kvm, arch.secure_guest);
+ OFFSET(VCPU_DSISR, kvm_vcpu, arch.shregs.dsisr);
+ OFFSET(VCPU_DAR, kvm_vcpu, arch.shregs.dar);
+ OFFSET(VCPU_VPA, kvm_vcpu, arch.vpa.pinned_addr);
+ OFFSET(VCPU_VPA_DIRTY, kvm_vcpu, arch.vpa.dirty);
+ OFFSET(VCPU_HEIR, kvm_vcpu, arch.emul_inst);
+ OFFSET(VCPU_CPU, kvm_vcpu, cpu);
+ OFFSET(VCPU_THREAD_CPU, kvm_vcpu, arch.thread_cpu);
#endif
#ifdef CONFIG_PPC_BOOK3S
- DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));
- DEFINE(VCPU_PURR, offsetof(struct kvm_vcpu, arch.purr));
- DEFINE(VCPU_SPURR, offsetof(struct kvm_vcpu, arch.spurr));
- DEFINE(VCPU_IC, offsetof(struct kvm_vcpu, arch.ic));
- DEFINE(VCPU_VTB, offsetof(struct kvm_vcpu, arch.vtb));
- DEFINE(VCPU_DSCR, offsetof(struct kvm_vcpu, arch.dscr));
- DEFINE(VCPU_AMR, offsetof(struct kvm_vcpu, arch.amr));
- DEFINE(VCPU_UAMOR, offsetof(struct kvm_vcpu, arch.uamor));
- DEFINE(VCPU_IAMR, offsetof(struct kvm_vcpu, arch.iamr));
- DEFINE(VCPU_CTRL, offsetof(struct kvm_vcpu, arch.ctrl));
- DEFINE(VCPU_DABR, offsetof(struct kvm_vcpu, arch.dabr));
- DEFINE(VCPU_DABRX, offsetof(struct kvm_vcpu, arch.dabrx));
- DEFINE(VCPU_DAWR, offsetof(struct kvm_vcpu, arch.dawr));
- DEFINE(VCPU_DAWRX, offsetof(struct kvm_vcpu, arch.dawrx));
- DEFINE(VCPU_CIABR, offsetof(struct kvm_vcpu, arch.ciabr));
- DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags));
- DEFINE(VCPU_DEC, offsetof(struct kvm_vcpu, arch.dec));
- DEFINE(VCPU_DEC_EXPIRES, offsetof(struct kvm_vcpu, arch.dec_expires));
- DEFINE(VCPU_PENDING_EXC, offsetof(struct kvm_vcpu, arch.pending_exceptions));
- DEFINE(VCPU_CEDED, offsetof(struct kvm_vcpu, arch.ceded));
- DEFINE(VCPU_PRODDED, offsetof(struct kvm_vcpu, arch.prodded));
- DEFINE(VCPU_MMCR, offsetof(struct kvm_vcpu, arch.mmcr));
- DEFINE(VCPU_PMC, offsetof(struct kvm_vcpu, arch.pmc));
- DEFINE(VCPU_SPMC, offsetof(struct kvm_vcpu, arch.spmc));
- DEFINE(VCPU_SIAR, offsetof(struct kvm_vcpu, arch.siar));
- DEFINE(VCPU_SDAR, offsetof(struct kvm_vcpu, arch.sdar));
- DEFINE(VCPU_SIER, offsetof(struct kvm_vcpu, arch.sier));
- DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb));
- DEFINE(VCPU_SLB_MAX, offsetof(struct kvm_vcpu, arch.slb_max));
- DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr));
- DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr));
- DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar));
- DEFINE(VCPU_INTR_MSR, offsetof(struct kvm_vcpu, arch.intr_msr));
- DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
- DEFINE(VCPU_TRAP, offsetof(struct kvm_vcpu, arch.trap));
- DEFINE(VCPU_CFAR, offsetof(struct kvm_vcpu, arch.cfar));
- DEFINE(VCPU_PPR, offsetof(struct kvm_vcpu, arch.ppr));
- DEFINE(VCPU_FSCR, offsetof(struct kvm_vcpu, arch.fscr));
- DEFINE(VCPU_SHADOW_FSCR, offsetof(struct kvm_vcpu, arch.shadow_fscr));
- DEFINE(VCPU_PSPB, offsetof(struct kvm_vcpu, arch.pspb));
- DEFINE(VCPU_EBBHR, offsetof(struct kvm_vcpu, arch.ebbhr));
- DEFINE(VCPU_EBBRR, offsetof(struct kvm_vcpu, arch.ebbrr));
- DEFINE(VCPU_BESCR, offsetof(struct kvm_vcpu, arch.bescr));
- DEFINE(VCPU_CSIGR, offsetof(struct kvm_vcpu, arch.csigr));
- DEFINE(VCPU_TACR, offsetof(struct kvm_vcpu, arch.tacr));
- DEFINE(VCPU_TCSCR, offsetof(struct kvm_vcpu, arch.tcscr));
- DEFINE(VCPU_ACOP, offsetof(struct kvm_vcpu, arch.acop));
- DEFINE(VCPU_WORT, offsetof(struct kvm_vcpu, arch.wort));
- DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1));
- DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count));
- DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count));
- DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest));
- DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads));
- DEFINE(VCORE_KVM, offsetof(struct kvmppc_vcore, kvm));
- DEFINE(VCORE_TB_OFFSET, offsetof(struct kvmppc_vcore, tb_offset));
- DEFINE(VCORE_LPCR, offsetof(struct kvmppc_vcore, lpcr));
- DEFINE(VCORE_PCR, offsetof(struct kvmppc_vcore, pcr));
- DEFINE(VCORE_DPDES, offsetof(struct kvmppc_vcore, dpdes));
- DEFINE(VCPU_SLB_E, offsetof(struct kvmppc_slb, orige));
- DEFINE(VCPU_SLB_V, offsetof(struct kvmppc_slb, origv));
+ OFFSET(VCPU_PURR, kvm_vcpu, arch.purr);
+ OFFSET(VCPU_SPURR, kvm_vcpu, arch.spurr);
+ OFFSET(VCPU_IC, kvm_vcpu, arch.ic);
+ OFFSET(VCPU_DSCR, kvm_vcpu, arch.dscr);
+ OFFSET(VCPU_AMR, kvm_vcpu, arch.amr);
+ OFFSET(VCPU_UAMOR, kvm_vcpu, arch.uamor);
+ OFFSET(VCPU_IAMR, kvm_vcpu, arch.iamr);
+ OFFSET(VCPU_CTRL, kvm_vcpu, arch.ctrl);
+ OFFSET(VCPU_DABR, kvm_vcpu, arch.dabr);
+ OFFSET(VCPU_DABRX, kvm_vcpu, arch.dabrx);
+ OFFSET(VCPU_DAWR0, kvm_vcpu, arch.dawr0);
+ OFFSET(VCPU_DAWRX0, kvm_vcpu, arch.dawrx0);
+ OFFSET(VCPU_CIABR, kvm_vcpu, arch.ciabr);
+ OFFSET(VCPU_HFLAGS, kvm_vcpu, arch.hflags);
+ OFFSET(VCPU_DEC_EXPIRES, kvm_vcpu, arch.dec_expires);
+ OFFSET(VCPU_PENDING_EXC, kvm_vcpu, arch.pending_exceptions);
+ OFFSET(VCPU_CEDED, kvm_vcpu, arch.ceded);
+ OFFSET(VCPU_PRODDED, kvm_vcpu, arch.prodded);
+ OFFSET(VCPU_MMCR, kvm_vcpu, arch.mmcr);
+ OFFSET(VCPU_MMCRA, kvm_vcpu, arch.mmcra);
+ OFFSET(VCPU_MMCRS, kvm_vcpu, arch.mmcrs);
+ OFFSET(VCPU_PMC, kvm_vcpu, arch.pmc);
+ OFFSET(VCPU_SIAR, kvm_vcpu, arch.siar);
+ OFFSET(VCPU_SDAR, kvm_vcpu, arch.sdar);
+ OFFSET(VCPU_SIER, kvm_vcpu, arch.sier);
+ OFFSET(VCPU_SLB, kvm_vcpu, arch.slb);
+ OFFSET(VCPU_SLB_MAX, kvm_vcpu, arch.slb_max);
+ OFFSET(VCPU_SLB_NR, kvm_vcpu, arch.slb_nr);
+ OFFSET(VCPU_FAULT_DSISR, kvm_vcpu, arch.fault_dsisr);
+ OFFSET(VCPU_FAULT_DAR, kvm_vcpu, arch.fault_dar);
+ OFFSET(VCPU_INTR_MSR, kvm_vcpu, arch.intr_msr);
+ OFFSET(VCPU_LAST_INST, kvm_vcpu, arch.last_inst);
+ OFFSET(VCPU_TRAP, kvm_vcpu, arch.trap);
+ OFFSET(VCPU_CFAR, kvm_vcpu, arch.cfar);
+ OFFSET(VCPU_PPR, kvm_vcpu, arch.ppr);
+ OFFSET(VCPU_FSCR, kvm_vcpu, arch.fscr);
+ OFFSET(VCPU_PSPB, kvm_vcpu, arch.pspb);
+ OFFSET(VCPU_EBBHR, kvm_vcpu, arch.ebbhr);
+ OFFSET(VCPU_EBBRR, kvm_vcpu, arch.ebbrr);
+ OFFSET(VCPU_BESCR, kvm_vcpu, arch.bescr);
+ OFFSET(VCPU_CSIGR, kvm_vcpu, arch.csigr);
+ OFFSET(VCPU_TACR, kvm_vcpu, arch.tacr);
+ OFFSET(VCPU_TCSCR, kvm_vcpu, arch.tcscr);
+ OFFSET(VCPU_ACOP, kvm_vcpu, arch.acop);
+ OFFSET(VCPU_WORT, kvm_vcpu, arch.wort);
+ OFFSET(VCPU_HFSCR, kvm_vcpu, arch.hfscr);
+ OFFSET(VCORE_ENTRY_EXIT, kvmppc_vcore, entry_exit_map);
+ OFFSET(VCORE_IN_GUEST, kvmppc_vcore, in_guest);
+ OFFSET(VCORE_NAPPING_THREADS, kvmppc_vcore, napping_threads);
+ OFFSET(VCORE_KVM, kvmppc_vcore, kvm);
+ OFFSET(VCORE_TB_OFFSET, kvmppc_vcore, tb_offset);
+ OFFSET(VCORE_TB_OFFSET_APPL, kvmppc_vcore, tb_offset_applied);
+ OFFSET(VCORE_LPCR, kvmppc_vcore, lpcr);
+ OFFSET(VCORE_PCR, kvmppc_vcore, pcr);
+ OFFSET(VCORE_DPDES, kvmppc_vcore, dpdes);
+ OFFSET(VCORE_VTB, kvmppc_vcore, vtb);
+ OFFSET(VCPU_SLB_E, kvmppc_slb, orige);
+ OFFSET(VCPU_SLB_V, kvmppc_slb, origv);
DEFINE(VCPU_SLB_SIZE, sizeof(struct kvmppc_slb));
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- DEFINE(VCPU_TFHAR, offsetof(struct kvm_vcpu, arch.tfhar));
- DEFINE(VCPU_TFIAR, offsetof(struct kvm_vcpu, arch.tfiar));
- DEFINE(VCPU_TEXASR, offsetof(struct kvm_vcpu, arch.texasr));
- DEFINE(VCPU_GPR_TM, offsetof(struct kvm_vcpu, arch.gpr_tm));
- DEFINE(VCPU_FPRS_TM, offsetof(struct kvm_vcpu, arch.fp_tm.fpr));
- DEFINE(VCPU_VRS_TM, offsetof(struct kvm_vcpu, arch.vr_tm.vr));
- DEFINE(VCPU_VRSAVE_TM, offsetof(struct kvm_vcpu, arch.vrsave_tm));
- DEFINE(VCPU_CR_TM, offsetof(struct kvm_vcpu, arch.cr_tm));
- DEFINE(VCPU_LR_TM, offsetof(struct kvm_vcpu, arch.lr_tm));
- DEFINE(VCPU_CTR_TM, offsetof(struct kvm_vcpu, arch.ctr_tm));
- DEFINE(VCPU_AMR_TM, offsetof(struct kvm_vcpu, arch.amr_tm));
- DEFINE(VCPU_PPR_TM, offsetof(struct kvm_vcpu, arch.ppr_tm));
- DEFINE(VCPU_DSCR_TM, offsetof(struct kvm_vcpu, arch.dscr_tm));
- DEFINE(VCPU_TAR_TM, offsetof(struct kvm_vcpu, arch.tar_tm));
+ OFFSET(VCPU_TFHAR, kvm_vcpu, arch.tfhar);
+ OFFSET(VCPU_TFIAR, kvm_vcpu, arch.tfiar);
+ OFFSET(VCPU_TEXASR, kvm_vcpu, arch.texasr);
+ OFFSET(VCPU_ORIG_TEXASR, kvm_vcpu, arch.orig_texasr);
+ OFFSET(VCPU_GPR_TM, kvm_vcpu, arch.gpr_tm);
+ OFFSET(VCPU_FPRS_TM, kvm_vcpu, arch.fp_tm.fpr);
+ OFFSET(VCPU_VRS_TM, kvm_vcpu, arch.vr_tm.vr);
+ OFFSET(VCPU_VRSAVE_TM, kvm_vcpu, arch.vrsave_tm);
+ OFFSET(VCPU_CR_TM, kvm_vcpu, arch.cr_tm);
+ OFFSET(VCPU_XER_TM, kvm_vcpu, arch.xer_tm);
+ OFFSET(VCPU_LR_TM, kvm_vcpu, arch.lr_tm);
+ OFFSET(VCPU_CTR_TM, kvm_vcpu, arch.ctr_tm);
+ OFFSET(VCPU_AMR_TM, kvm_vcpu, arch.amr_tm);
+ OFFSET(VCPU_PPR_TM, kvm_vcpu, arch.ppr_tm);
+ OFFSET(VCPU_DSCR_TM, kvm_vcpu, arch.dscr_tm);
+ OFFSET(VCPU_TAR_TM, kvm_vcpu, arch.tar_tm);
#endif
#ifdef CONFIG_PPC_BOOK3S_64
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
- DEFINE(PACA_SVCPU, offsetof(struct paca_struct, shadow_vcpu));
+ OFFSET(PACA_SVCPU, paca_struct, shadow_vcpu);
# define SVCPU_FIELD(x, f) DEFINE(x, offsetof(struct paca_struct, shadow_vcpu.f))
#else
# define SVCPU_FIELD(x, f)
@@ -640,18 +572,33 @@ int main(void)
HSTATE_FIELD(HSTATE_HWTHREAD_STATE, hwthread_state);
HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu);
HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore);
- HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys);
- HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr);
HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
HSTATE_FIELD(HSTATE_PTID, ptid);
- HSTATE_FIELD(HSTATE_MMCR, host_mmcr);
- HSTATE_FIELD(HSTATE_PMC, host_pmc);
+ HSTATE_FIELD(HSTATE_FAKE_SUSPEND, fake_suspend);
+ HSTATE_FIELD(HSTATE_MMCR0, host_mmcr[0]);
+ HSTATE_FIELD(HSTATE_MMCR1, host_mmcr[1]);
+ HSTATE_FIELD(HSTATE_MMCRA, host_mmcr[2]);
+ HSTATE_FIELD(HSTATE_SIAR, host_mmcr[3]);
+ HSTATE_FIELD(HSTATE_SDAR, host_mmcr[4]);
+ HSTATE_FIELD(HSTATE_MMCR2, host_mmcr[5]);
+ HSTATE_FIELD(HSTATE_SIER, host_mmcr[6]);
+ HSTATE_FIELD(HSTATE_PMC1, host_pmc[0]);
+ HSTATE_FIELD(HSTATE_PMC2, host_pmc[1]);
+ HSTATE_FIELD(HSTATE_PMC3, host_pmc[2]);
+ HSTATE_FIELD(HSTATE_PMC4, host_pmc[3]);
+ HSTATE_FIELD(HSTATE_PMC5, host_pmc[4]);
+ HSTATE_FIELD(HSTATE_PMC6, host_pmc[5]);
HSTATE_FIELD(HSTATE_PURR, host_purr);
HSTATE_FIELD(HSTATE_SPURR, host_spurr);
HSTATE_FIELD(HSTATE_DSCR, host_dscr);
HSTATE_FIELD(HSTATE_DABR, dabr);
HSTATE_FIELD(HSTATE_DECEXP, dec_expires);
- DEFINE(IPI_PRIORITY, IPI_PRIORITY);
+ HSTATE_FIELD(HSTATE_SPLIT_MODE, kvm_split_mode);
+ OFFSET(KVM_SPLIT_RPR, kvm_split_mode, rpr);
+ OFFSET(KVM_SPLIT_PMMAR, kvm_split_mode, pmmar);
+ OFFSET(KVM_SPLIT_LDBAR, kvm_split_mode, ldbar);
+ OFFSET(KVM_SPLIT_DO_NAP, kvm_split_mode, do_nap);
+ OFFSET(KVM_SPLIT_NAPPED, kvm_split_mode, napped);
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
#ifdef CONFIG_PPC_BOOK3S_64
@@ -661,76 +608,80 @@ int main(void)
#endif /* CONFIG_PPC_BOOK3S_64 */
#else /* CONFIG_PPC_BOOK3S */
- DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
- DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer));
- DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
- DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr));
- DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc));
- DEFINE(VCPU_SPRG9, offsetof(struct kvm_vcpu, arch.sprg9));
- DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
- DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear));
- DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr));
- DEFINE(VCPU_CRIT_SAVE, offsetof(struct kvm_vcpu, arch.crit_save));
+ OFFSET(VCPU_CR, kvm_vcpu, arch.regs.ccr);
+ OFFSET(VCPU_XER, kvm_vcpu, arch.regs.xer);
+ OFFSET(VCPU_LR, kvm_vcpu, arch.regs.link);
+ OFFSET(VCPU_CTR, kvm_vcpu, arch.regs.ctr);
+ OFFSET(VCPU_PC, kvm_vcpu, arch.regs.nip);
+ OFFSET(VCPU_SPRG9, kvm_vcpu, arch.sprg9);
+ OFFSET(VCPU_LAST_INST, kvm_vcpu, arch.last_inst);
+ OFFSET(VCPU_FAULT_DEAR, kvm_vcpu, arch.fault_dear);
+ OFFSET(VCPU_FAULT_ESR, kvm_vcpu, arch.fault_esr);
+ OFFSET(VCPU_CRIT_SAVE, kvm_vcpu, arch.crit_save);
#endif /* CONFIG_PPC_BOOK3S */
#endif /* CONFIG_KVM */
#ifdef CONFIG_KVM_GUEST
- DEFINE(KVM_MAGIC_SCRATCH1, offsetof(struct kvm_vcpu_arch_shared,
- scratch1));
- DEFINE(KVM_MAGIC_SCRATCH2, offsetof(struct kvm_vcpu_arch_shared,
- scratch2));
- DEFINE(KVM_MAGIC_SCRATCH3, offsetof(struct kvm_vcpu_arch_shared,
- scratch3));
- DEFINE(KVM_MAGIC_INT, offsetof(struct kvm_vcpu_arch_shared,
- int_pending));
- DEFINE(KVM_MAGIC_MSR, offsetof(struct kvm_vcpu_arch_shared, msr));
- DEFINE(KVM_MAGIC_CRITICAL, offsetof(struct kvm_vcpu_arch_shared,
- critical));
- DEFINE(KVM_MAGIC_SR, offsetof(struct kvm_vcpu_arch_shared, sr));
+ OFFSET(KVM_MAGIC_SCRATCH1, kvm_vcpu_arch_shared, scratch1);
+ OFFSET(KVM_MAGIC_SCRATCH2, kvm_vcpu_arch_shared, scratch2);
+ OFFSET(KVM_MAGIC_SCRATCH3, kvm_vcpu_arch_shared, scratch3);
+ OFFSET(KVM_MAGIC_INT, kvm_vcpu_arch_shared, int_pending);
+ OFFSET(KVM_MAGIC_MSR, kvm_vcpu_arch_shared, msr);
+ OFFSET(KVM_MAGIC_CRITICAL, kvm_vcpu_arch_shared, critical);
+ OFFSET(KVM_MAGIC_SR, kvm_vcpu_arch_shared, sr);
#endif
#ifdef CONFIG_44x
DEFINE(PGD_T_LOG2, PGD_T_LOG2);
DEFINE(PTE_T_LOG2, PTE_T_LOG2);
#endif
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#ifdef CONFIG_PPC_E500
DEFINE(TLBCAM_SIZE, sizeof(struct tlbcam));
- DEFINE(TLBCAM_MAS0, offsetof(struct tlbcam, MAS0));
- DEFINE(TLBCAM_MAS1, offsetof(struct tlbcam, MAS1));
- DEFINE(TLBCAM_MAS2, offsetof(struct tlbcam, MAS2));
- DEFINE(TLBCAM_MAS3, offsetof(struct tlbcam, MAS3));
- DEFINE(TLBCAM_MAS7, offsetof(struct tlbcam, MAS7));
+ OFFSET(TLBCAM_MAS0, tlbcam, MAS0);
+ OFFSET(TLBCAM_MAS1, tlbcam, MAS1);
+ OFFSET(TLBCAM_MAS2, tlbcam, MAS2);
+ OFFSET(TLBCAM_MAS3, tlbcam, MAS3);
+ OFFSET(TLBCAM_MAS7, tlbcam, MAS7);
#endif
#if defined(CONFIG_KVM) && defined(CONFIG_SPE)
- DEFINE(VCPU_EVR, offsetof(struct kvm_vcpu, arch.evr[0]));
- DEFINE(VCPU_ACC, offsetof(struct kvm_vcpu, arch.acc));
- DEFINE(VCPU_SPEFSCR, offsetof(struct kvm_vcpu, arch.spefscr));
- DEFINE(VCPU_HOST_SPEFSCR, offsetof(struct kvm_vcpu, arch.host_spefscr));
+ OFFSET(VCPU_EVR, kvm_vcpu, arch.evr[0]);
+ OFFSET(VCPU_ACC, kvm_vcpu, arch.acc);
+ OFFSET(VCPU_SPEFSCR, kvm_vcpu, arch.spefscr);
+ OFFSET(VCPU_HOST_SPEFSCR, kvm_vcpu, arch.host_spefscr);
#endif
#ifdef CONFIG_KVM_BOOKE_HV
- DEFINE(VCPU_HOST_MAS4, offsetof(struct kvm_vcpu, arch.host_mas4));
- DEFINE(VCPU_HOST_MAS6, offsetof(struct kvm_vcpu, arch.host_mas6));
- DEFINE(VCPU_EPLC, offsetof(struct kvm_vcpu, arch.eplc));
+ OFFSET(VCPU_HOST_MAS4, kvm_vcpu, arch.host_mas4);
+ OFFSET(VCPU_HOST_MAS6, kvm_vcpu, arch.host_mas6);
#endif
#ifdef CONFIG_KVM_EXIT_TIMING
- DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu,
- arch.timing_exit.tv32.tbu));
- DEFINE(VCPU_TIMING_EXIT_TBL, offsetof(struct kvm_vcpu,
- arch.timing_exit.tv32.tbl));
- DEFINE(VCPU_TIMING_LAST_ENTER_TBU, offsetof(struct kvm_vcpu,
- arch.timing_last_enter.tv32.tbu));
- DEFINE(VCPU_TIMING_LAST_ENTER_TBL, offsetof(struct kvm_vcpu,
- arch.timing_last_enter.tv32.tbl));
+ OFFSET(VCPU_TIMING_EXIT_TBU, kvm_vcpu, arch.timing_exit.tv32.tbu);
+ OFFSET(VCPU_TIMING_EXIT_TBL, kvm_vcpu, arch.timing_exit.tv32.tbl);
+ OFFSET(VCPU_TIMING_LAST_ENTER_TBU, kvm_vcpu, arch.timing_last_enter.tv32.tbu);
+ OFFSET(VCPU_TIMING_LAST_ENTER_TBL, kvm_vcpu, arch.timing_last_enter.tv32.tbl);
#endif
-#ifdef CONFIG_PPC_POWERNV
- DEFINE(OPAL_MC_GPR3, offsetof(struct opal_machine_check_event, gpr3));
- DEFINE(OPAL_MC_SRR0, offsetof(struct opal_machine_check_event, srr0));
- DEFINE(OPAL_MC_SRR1, offsetof(struct opal_machine_check_event, srr1));
- DEFINE(PACA_OPAL_MC_EVT, offsetof(struct paca_struct, opal_mc_evt));
+ DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER);
+
+#ifdef CONFIG_PPC_8xx
+ DEFINE(VIRT_IMMR_BASE, (u64)__fix_to_virt(FIX_IMMR_BASE));
+#endif
+
+#ifdef CONFIG_XMON
+ DEFINE(BPT_SIZE, BPT_SIZE);
+#endif
+
+#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE
+ DEFINE(FTRACE_OOL_STUB_SIZE, sizeof(struct ftrace_ool_stub));
+#endif
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS
+ OFFSET(FTRACE_OPS_FUNC, ftrace_ops, func);
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+ OFFSET(FTRACE_OPS_DIRECT_CALL, ftrace_ops, direct_call);
+#endif
#endif
return 0;
diff --git a/arch/powerpc/kernel/audit.c b/arch/powerpc/kernel/audit.c
index a4dab7cab348..92298d6a3a37 100644
--- a/arch/powerpc/kernel/audit.c
+++ b/arch/powerpc/kernel/audit.c
@@ -1,8 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/init.h>
#include <linux/types.h>
#include <linux/audit.h>
#include <asm/unistd.h>
+#include "audit_32.h"
+
static unsigned dir_class[] = {
#include <asm-generic/audit_dir_write.h>
~0U
@@ -40,21 +43,22 @@ int audit_classify_arch(int arch)
int audit_classify_syscall(int abi, unsigned syscall)
{
#ifdef CONFIG_PPC64
- extern int ppc32_classify_syscall(unsigned);
if (abi == AUDIT_ARCH_PPC)
return ppc32_classify_syscall(syscall);
#endif
switch(syscall) {
case __NR_open:
- return 2;
+ return AUDITSC_OPEN;
case __NR_openat:
- return 3;
+ return AUDITSC_OPENAT;
case __NR_socketcall:
- return 4;
+ return AUDITSC_SOCKETCALL;
case __NR_execve:
- return 5;
+ return AUDITSC_EXECVE;
+ case __NR_openat2:
+ return AUDITSC_OPENAT2;
default:
- return 0;
+ return AUDITSC_NATIVE;
}
}
diff --git a/arch/powerpc/kernel/audit_32.h b/arch/powerpc/kernel/audit_32.h
new file mode 100644
index 000000000000..c6c79c3041ab
--- /dev/null
+++ b/arch/powerpc/kernel/audit_32.h
@@ -0,0 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef __AUDIT_32_H__
+#define __AUDIT_32_H__
+
+extern int ppc32_classify_syscall(unsigned);
+
+#endif
diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c
index 41c011cb6070..7f63f1cdc6c3 100644
--- a/arch/powerpc/kernel/btext.c
+++ b/arch/powerpc/kernel/btext.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Procedures for drawing on the screen early on in the boot process.
*
@@ -7,14 +8,15 @@
#include <linux/string.h>
#include <linux/init.h>
#include <linux/export.h>
+#include <linux/font.h>
#include <linux/memblock.h>
+#include <linux/pgtable.h>
+#include <linux/of.h>
#include <asm/sections.h>
-#include <asm/prom.h>
#include <asm/btext.h>
#include <asm/page.h>
#include <asm/mmu.h>
-#include <asm/pgtable.h>
#include <asm/io.h>
#include <asm/processor.h>
#include <asm/udbg.h>
@@ -25,7 +27,7 @@
static void scrollscreen(void);
#endif
-#define __force_data __attribute__((__section__(".data")))
+#define __force_data __section(".data")
static int g_loc_X __force_data;
static int g_loc_Y __force_data;
@@ -40,12 +42,7 @@ static unsigned char *logicalDisplayBase __force_data;
unsigned long disp_BAT[2] __initdata = {0, 0};
-#define cmapsz (16*256)
-
-static unsigned char vga_font[cmapsz];
-
-int boot_text_mapped __force_data = 0;
-int force_printk_to_btext = 0;
+static int boot_text_mapped __force_data;
extern void rmci_on(void);
extern void rmci_off(void);
@@ -73,7 +70,7 @@ static inline void rmci_maybe_off(void)
* the display during identify_machine() and MMU_Init()
*
* The display is mapped to virtual address 0xD0000000, rather
- * than 1:1, because some some CHRP machines put the frame buffer
+ * than 1:1, because some CHRP machines put the frame buffer
* in the region starting at 0xC0000000 (PAGE_OFFSET).
* This mapping is temporary and will disappear as soon as the
* setup done by MMU_Init() is applied.
@@ -94,19 +91,10 @@ void __init btext_prepare_BAT(void)
boot_text_mapped = 0;
return;
}
- if (PVR_VER(mfspr(SPRN_PVR)) != 1) {
- /* 603, 604, G3, G4, ... */
- lowbits = addr & ~0xFF000000UL;
- addr &= 0xFF000000UL;
- disp_BAT[0] = vaddr | (BL_16M<<2) | 2;
- disp_BAT[1] = addr | (_PAGE_NO_CACHE | _PAGE_GUARDED | BPP_RW);
- } else {
- /* 601 */
- lowbits = addr & ~0xFF800000UL;
- addr &= 0xFF800000UL;
- disp_BAT[0] = vaddr | (_PAGE_NO_CACHE | PP_RWXX) | 4;
- disp_BAT[1] = addr | BL_8M | 0x40;
- }
+ lowbits = addr & ~0xFF000000UL;
+ addr &= 0xFF000000UL;
+ disp_BAT[0] = vaddr | (BL_16M<<2) | 2;
+ disp_BAT[1] = addr | (_PAGE_NO_CACHE | _PAGE_GUARDED | BPP_RW);
logicalDisplayBase = (void *) (vaddr + lowbits);
}
#endif
@@ -156,20 +144,20 @@ void btext_map(void)
/* By default, we are no longer mapped */
boot_text_mapped = 0;
- if (dispDeviceBase == 0)
+ if (!dispDeviceBase)
return;
base = ((unsigned long) dispDeviceBase) & 0xFFFFF000UL;
offset = ((unsigned long) dispDeviceBase) - base;
size = dispDeviceRowBytes * dispDeviceRect[3] + offset
+ dispDeviceRect[0];
- vbase = __ioremap(base, size, _PAGE_NO_CACHE);
- if (vbase == 0)
+ vbase = ioremap_wc(base, size);
+ if (!vbase)
return;
logicalDisplayBase = vbase + offset;
boot_text_mapped = 1;
}
-int btext_initialize(struct device_node *np)
+static int __init btext_initialize(struct device_node *np)
{
unsigned int width, height, depth, pitch;
unsigned long address = 0;
@@ -231,20 +219,12 @@ int btext_initialize(struct device_node *np)
int __init btext_find_display(int allow_nonstdout)
{
- const char *name;
- struct device_node *np = NULL;
+ struct device_node *np = of_stdout;
int rc = -ENODEV;
- name = of_get_property(of_chosen, "linux,stdout-path", NULL);
- if (name != NULL) {
- np = of_find_node_by_path(name);
- if (np != NULL) {
- if (strcmp(np->type, "display") != 0) {
- printk("boot stdout isn't a display !\n");
- of_node_put(np);
- np = NULL;
- }
- }
+ if (!of_node_is_type(np, "display")) {
+ printk("boot stdout isn't a display !\n");
+ np = NULL;
}
if (np)
rc = btext_initialize(np);
@@ -252,13 +232,15 @@ int __init btext_find_display(int allow_nonstdout)
return rc;
for_each_node_by_type(np, "display") {
- if (of_get_property(np, "linux,opened", NULL)) {
- printk("trying %s ...\n", np->full_name);
+ if (of_property_read_bool(np, "linux,opened")) {
+ printk("trying %pOF ...\n", np);
rc = btext_initialize(np);
printk("result: %d\n", rc);
}
- if (rc == 0)
+ if (rc == 0) {
+ of_node_put(np);
break;
+ }
}
return rc;
}
@@ -269,7 +251,7 @@ static unsigned char * calc_base(int x, int y)
unsigned char *base;
base = logicalDisplayBase;
- if (base == 0)
+ if (!base)
base = dispDeviceBase;
base += (x + dispDeviceRect[0]) * (dispDeviceDepth >> 3);
base += (y + dispDeviceRect[1]) * dispDeviceRowBytes;
@@ -280,7 +262,7 @@ static unsigned char * calc_base(int x, int y)
void btext_update_display(unsigned long phys, int width, int height,
int depth, int pitch)
{
- if (dispDeviceBase == 0)
+ if (!dispDeviceBase)
return;
/* check it's the same frame buffer (within 256MB) */
@@ -306,7 +288,7 @@ void btext_update_display(unsigned long phys, int width, int height,
}
EXPORT_SYMBOL(btext_update_display);
-void btext_clearscreen(void)
+void __init btext_clearscreen(void)
{
unsigned int *base = (unsigned int *)calc_base(0, 0);
unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) *
@@ -324,7 +306,7 @@ void btext_clearscreen(void)
rmci_maybe_off();
}
-void btext_flushscreen(void)
+void __init btext_flushscreen(void)
{
unsigned int *base = (unsigned int *)calc_base(0, 0);
unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) *
@@ -343,7 +325,7 @@ void btext_flushscreen(void)
__asm__ __volatile__ ("sync" ::: "memory");
}
-void btext_flushline(void)
+void __init btext_flushline(void)
{
unsigned int *base = (unsigned int *)calc_base(0, g_loc_Y << 4);
unsigned long width = ((dispDeviceRect[2] - dispDeviceRect[0]) *
@@ -422,7 +404,7 @@ static unsigned int expand_bits_16[4] = {
};
-static void draw_byte_32(unsigned char *font, unsigned int *base, int rb)
+static void draw_byte_32(const unsigned char *font, unsigned int *base, int rb)
{
int l, bits;
int fg = 0xFFFFFFFFUL;
@@ -443,7 +425,7 @@ static void draw_byte_32(unsigned char *font, unsigned int *base, int rb)
}
}
-static inline void draw_byte_16(unsigned char *font, unsigned int *base, int rb)
+static inline void draw_byte_16(const unsigned char *font, unsigned int *base, int rb)
{
int l, bits;
int fg = 0xFFFFFFFFUL;
@@ -461,7 +443,7 @@ static inline void draw_byte_16(unsigned char *font, unsigned int *base, int rb)
}
}
-static inline void draw_byte_8(unsigned char *font, unsigned int *base, int rb)
+static inline void draw_byte_8(const unsigned char *font, unsigned int *base, int rb)
{
int l, bits;
int fg = 0x0F0F0F0FUL;
@@ -480,7 +462,8 @@ static inline void draw_byte_8(unsigned char *font, unsigned int *base, int rb)
static noinline void draw_byte(unsigned char c, long locX, long locY)
{
unsigned char *base = calc_base(locX << 3, locY << 4);
- unsigned char *font = &vga_font[((unsigned int)c) * 16];
+ unsigned int font_index = c * 16;
+ const unsigned char *font = font_sun_8x16.data + font_index;
int rb = dispDeviceRowBytes;
rmci_maybe_on();
@@ -558,7 +541,7 @@ void btext_drawstring(const char *c)
btext_drawchar(*c++);
}
-void btext_drawtext(const char *c, unsigned int len)
+void __init btext_drawtext(const char *c, unsigned int len)
{
if (!boot_text_mapped)
return;
@@ -566,7 +549,7 @@ void btext_drawtext(const char *c, unsigned int len)
btext_drawchar(*c++);
}
-void btext_drawhex(unsigned long v)
+void __init btext_drawhex(unsigned long v)
{
if (!boot_text_mapped)
return;
@@ -598,349 +581,3 @@ void __init udbg_init_btext(void)
*/
udbg_putc = btext_drawchar;
}
-
-static unsigned char vga_font[cmapsz] = {
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x81, 0xa5, 0x81, 0x81, 0xbd,
-0x99, 0x81, 0x81, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xff,
-0xdb, 0xff, 0xff, 0xc3, 0xe7, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x6c, 0xfe, 0xfe, 0xfe, 0xfe, 0x7c, 0x38, 0x10,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x7c, 0xfe,
-0x7c, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18,
-0x3c, 0x3c, 0xe7, 0xe7, 0xe7, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x18, 0x3c, 0x7e, 0xff, 0xff, 0x7e, 0x18, 0x18, 0x3c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c,
-0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff,
-0xff, 0xff, 0xe7, 0xc3, 0xc3, 0xe7, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0x42, 0x42, 0x66, 0x3c, 0x00,
-0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc3, 0x99, 0xbd,
-0xbd, 0x99, 0xc3, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x1e, 0x0e,
-0x1a, 0x32, 0x78, 0xcc, 0xcc, 0xcc, 0xcc, 0x78, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x3c, 0x66, 0x66, 0x66, 0x66, 0x3c, 0x18, 0x7e, 0x18, 0x18,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0x33, 0x3f, 0x30, 0x30, 0x30,
-0x30, 0x70, 0xf0, 0xe0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x63,
-0x7f, 0x63, 0x63, 0x63, 0x63, 0x67, 0xe7, 0xe6, 0xc0, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x18, 0x18, 0xdb, 0x3c, 0xe7, 0x3c, 0xdb, 0x18, 0x18,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfe, 0xf8,
-0xf0, 0xe0, 0xc0, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x06, 0x0e,
-0x1e, 0x3e, 0xfe, 0x3e, 0x1e, 0x0e, 0x06, 0x02, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66,
-0x66, 0x00, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0xdb,
-0xdb, 0xdb, 0x7b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x7c, 0xc6, 0x60, 0x38, 0x6c, 0xc6, 0xc6, 0x6c, 0x38, 0x0c, 0xc6,
-0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0xfe, 0xfe, 0xfe, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c,
-0x7e, 0x18, 0x18, 0x18, 0x7e, 0x3c, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x18, 0x3c, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x7e, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x18, 0x0c, 0xfe, 0x0c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x60, 0xfe, 0x60, 0x30, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0xc0,
-0xc0, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x24, 0x66, 0xff, 0x66, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x38, 0x7c, 0x7c, 0xfe, 0xfe, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xfe, 0x7c, 0x7c,
-0x38, 0x38, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x18, 0x3c, 0x3c, 0x3c, 0x18, 0x18, 0x18, 0x00, 0x18, 0x18,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x66, 0x66, 0x24, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6c,
-0x6c, 0xfe, 0x6c, 0x6c, 0x6c, 0xfe, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00,
-0x18, 0x18, 0x7c, 0xc6, 0xc2, 0xc0, 0x7c, 0x06, 0x06, 0x86, 0xc6, 0x7c,
-0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2, 0xc6, 0x0c, 0x18,
-0x30, 0x60, 0xc6, 0x86, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c,
-0x6c, 0x38, 0x76, 0xdc, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x30, 0x30, 0x30, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x30, 0x30, 0x30,
-0x30, 0x30, 0x18, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x18,
-0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x18, 0x30, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x3c, 0xff, 0x3c, 0x66, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e,
-0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x30, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x02, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0x80, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xce, 0xde, 0xf6, 0xe6, 0xc6, 0xc6, 0x7c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x38, 0x78, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6,
-0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x7c, 0xc6, 0x06, 0x06, 0x3c, 0x06, 0x06, 0x06, 0xc6, 0x7c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x1c, 0x3c, 0x6c, 0xcc, 0xfe,
-0x0c, 0x0c, 0x0c, 0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc0,
-0xc0, 0xc0, 0xfc, 0x06, 0x06, 0x06, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x38, 0x60, 0xc0, 0xc0, 0xfc, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc6, 0x06, 0x06, 0x0c, 0x18,
-0x30, 0x30, 0x30, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6,
-0xc6, 0xc6, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x06, 0x06, 0x0c, 0x78,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00,
-0x00, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x18, 0x18, 0x00, 0x00, 0x00, 0x18, 0x18, 0x30, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x06, 0x0c, 0x18, 0x30, 0x60, 0x30, 0x18, 0x0c, 0x06,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00,
-0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60,
-0x30, 0x18, 0x0c, 0x06, 0x0c, 0x18, 0x30, 0x60, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x7c, 0xc6, 0xc6, 0x0c, 0x18, 0x18, 0x18, 0x00, 0x18, 0x18,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xde, 0xde,
-0xde, 0xdc, 0xc0, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38,
-0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x66, 0x66, 0x66, 0x66, 0xfc,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0xc2, 0xc0, 0xc0, 0xc0,
-0xc0, 0xc2, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x6c,
-0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x6c, 0xf8, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0xfe, 0x66, 0x62, 0x68, 0x78, 0x68, 0x60, 0x62, 0x66, 0xfe,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x66, 0x62, 0x68, 0x78, 0x68,
-0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66,
-0xc2, 0xc0, 0xc0, 0xde, 0xc6, 0xc6, 0x66, 0x3a, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x18, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x0c,
-0x0c, 0x0c, 0x0c, 0x0c, 0xcc, 0xcc, 0xcc, 0x78, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0xe6, 0x66, 0x66, 0x6c, 0x78, 0x78, 0x6c, 0x66, 0x66, 0xe6,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x60, 0x60, 0x60, 0x60, 0x60,
-0x60, 0x62, 0x66, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xe7,
-0xff, 0xff, 0xdb, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0xc6, 0xe6, 0xf6, 0xfe, 0xde, 0xce, 0xc6, 0xc6, 0xc6, 0xc6,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6,
-0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66,
-0x66, 0x66, 0x7c, 0x60, 0x60, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xd6, 0xde, 0x7c,
-0x0c, 0x0e, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66, 0x66, 0x66, 0x7c, 0x6c,
-0x66, 0x66, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6,
-0xc6, 0x60, 0x38, 0x0c, 0x06, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0xff, 0xdb, 0x99, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6,
-0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3,
-0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0xc3, 0xc3, 0xc3, 0xc3, 0xc3, 0xdb, 0xdb, 0xff, 0x66, 0x66,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, 0x66, 0x3c, 0x18, 0x18,
-0x3c, 0x66, 0xc3, 0xc3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3,
-0xc3, 0x66, 0x3c, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0xff, 0xc3, 0x86, 0x0c, 0x18, 0x30, 0x60, 0xc1, 0xc3, 0xff,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x30, 0x30, 0x30, 0x30, 0x30,
-0x30, 0x30, 0x30, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80,
-0xc0, 0xe0, 0x70, 0x38, 0x1c, 0x0e, 0x06, 0x02, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x3c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x3c,
-0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0x00,
-0x30, 0x30, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, 0x0c, 0x7c,
-0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe0, 0x60,
-0x60, 0x78, 0x6c, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc0, 0xc0, 0xc0, 0xc6, 0x7c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x0c, 0x0c, 0x3c, 0x6c, 0xcc,
-0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x38, 0x6c, 0x64, 0x60, 0xf0, 0x60, 0x60, 0x60, 0x60, 0xf0,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xcc, 0xcc,
-0xcc, 0xcc, 0xcc, 0x7c, 0x0c, 0xcc, 0x78, 0x00, 0x00, 0x00, 0xe0, 0x60,
-0x60, 0x6c, 0x76, 0x66, 0x66, 0x66, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x18, 0x18, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x06, 0x00, 0x0e, 0x06, 0x06,
-0x06, 0x06, 0x06, 0x06, 0x66, 0x66, 0x3c, 0x00, 0x00, 0x00, 0xe0, 0x60,
-0x60, 0x66, 0x6c, 0x78, 0x78, 0x6c, 0x66, 0xe6, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe6, 0xff, 0xdb,
-0xdb, 0xdb, 0xdb, 0xdb, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0xdc, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x66, 0x66,
-0x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x76, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x7c, 0x0c, 0x0c, 0x1e, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0xdc, 0x76, 0x66, 0x60, 0x60, 0x60, 0xf0,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xc6, 0x60,
-0x38, 0x0c, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x30,
-0x30, 0xfc, 0x30, 0x30, 0x30, 0x30, 0x36, 0x1c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0xc3, 0xc3,
-0xc3, 0x66, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0xc3, 0xc3, 0xc3, 0xdb, 0xdb, 0xff, 0x66, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x66, 0x3c, 0x18, 0x3c, 0x66, 0xc3,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0xc6, 0xc6,
-0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x0c, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0xfe, 0xcc, 0x18, 0x30, 0x60, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x0e, 0x18, 0x18, 0x18, 0x70, 0x18, 0x18, 0x18, 0x18, 0x0e,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x00, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0x18,
-0x18, 0x18, 0x0e, 0x18, 0x18, 0x18, 0x18, 0x70, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x76, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0xc6,
-0xc6, 0xc6, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x66,
-0xc2, 0xc0, 0xc0, 0xc0, 0xc2, 0x66, 0x3c, 0x0c, 0x06, 0x7c, 0x00, 0x00,
-0x00, 0x00, 0xcc, 0x00, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x00, 0x7c, 0xc6, 0xfe,
-0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c,
-0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0xcc, 0x00, 0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, 0x00, 0x78, 0x0c, 0x7c,
-0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x38,
-0x00, 0x78, 0x0c, 0x7c, 0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x3c, 0x66, 0x60, 0x60, 0x66, 0x3c, 0x0c, 0x06,
-0x3c, 0x00, 0x00, 0x00, 0x00, 0x10, 0x38, 0x6c, 0x00, 0x7c, 0xc6, 0xfe,
-0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00,
-0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x60, 0x30, 0x18, 0x00, 0x7c, 0xc6, 0xfe, 0xc0, 0xc0, 0xc6, 0x7c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x66, 0x00, 0x00, 0x38, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x3c, 0x66,
-0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x60, 0x30, 0x18, 0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0x10, 0x38, 0x6c, 0xc6, 0xc6,
-0xfe, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x38, 0x00,
-0x38, 0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00,
-0x18, 0x30, 0x60, 0x00, 0xfe, 0x66, 0x60, 0x7c, 0x60, 0x60, 0x66, 0xfe,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6e, 0x3b, 0x1b,
-0x7e, 0xd8, 0xdc, 0x77, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x6c,
-0xcc, 0xcc, 0xfe, 0xcc, 0xcc, 0xcc, 0xcc, 0xce, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x10, 0x38, 0x6c, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0x00, 0x7c, 0xc6, 0xc6,
-0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18,
-0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x30, 0x78, 0xcc, 0x00, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x76,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x30, 0x18, 0x00, 0xcc, 0xcc, 0xcc,
-0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00,
-0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7e, 0x06, 0x0c, 0x78, 0x00,
-0x00, 0xc6, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0xc6, 0x00, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6,
-0xc6, 0xc6, 0xc6, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e,
-0xc3, 0xc0, 0xc0, 0xc0, 0xc3, 0x7e, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x38, 0x6c, 0x64, 0x60, 0xf0, 0x60, 0x60, 0x60, 0x60, 0xe6, 0xfc,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc3, 0x66, 0x3c, 0x18, 0xff, 0x18,
-0xff, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfc, 0x66, 0x66,
-0x7c, 0x62, 0x66, 0x6f, 0x66, 0x66, 0x66, 0xf3, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x0e, 0x1b, 0x18, 0x18, 0x18, 0x7e, 0x18, 0x18, 0x18, 0x18, 0x18,
-0xd8, 0x70, 0x00, 0x00, 0x00, 0x18, 0x30, 0x60, 0x00, 0x78, 0x0c, 0x7c,
-0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x18, 0x30,
-0x00, 0x38, 0x18, 0x18, 0x18, 0x18, 0x18, 0x3c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x18, 0x30, 0x60, 0x00, 0x7c, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x7c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x30, 0x60, 0x00, 0xcc, 0xcc, 0xcc,
-0xcc, 0xcc, 0xcc, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc,
-0x00, 0xdc, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x00, 0x00, 0x00, 0x00,
-0x76, 0xdc, 0x00, 0xc6, 0xe6, 0xf6, 0xfe, 0xde, 0xce, 0xc6, 0xc6, 0xc6,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x3c, 0x6c, 0x6c, 0x3e, 0x00, 0x7e, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x6c,
-0x38, 0x00, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x30, 0x30, 0x00, 0x30, 0x30, 0x60, 0xc0, 0xc6, 0xc6, 0x7c,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc0,
-0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0xfe, 0x06, 0x06, 0x06, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0xc0, 0xc0, 0xc2, 0xc6, 0xcc, 0x18, 0x30, 0x60, 0xce, 0x9b, 0x06,
-0x0c, 0x1f, 0x00, 0x00, 0x00, 0xc0, 0xc0, 0xc2, 0xc6, 0xcc, 0x18, 0x30,
-0x66, 0xce, 0x96, 0x3e, 0x06, 0x06, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18,
-0x00, 0x18, 0x18, 0x18, 0x3c, 0x3c, 0x3c, 0x18, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x6c, 0xd8, 0x6c, 0x36, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd8, 0x6c, 0x36,
-0x6c, 0xd8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x44, 0x11, 0x44,
-0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44, 0x11, 0x44,
-0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa, 0x55, 0xaa,
-0x55, 0xaa, 0x55, 0xaa, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77,
-0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0xdd, 0x77, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0xf8,
-0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36,
-0x36, 0x36, 0x36, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x36, 0x36, 0x36, 0x36,
-0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x18, 0xf8,
-0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36,
-0x36, 0xf6, 0x06, 0xf6, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
-0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
-0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0x06, 0xf6,
-0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
-0x36, 0xf6, 0x06, 0xfe, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xfe, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8, 0x18, 0xf8,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0xf8, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xff,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x18, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x37,
-0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
-0x36, 0x37, 0x30, 0x3f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x3f, 0x30, 0x37, 0x36, 0x36, 0x36, 0x36,
-0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xf7, 0x00, 0xff,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0xff, 0x00, 0xf7, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
-0x36, 0x36, 0x36, 0x36, 0x36, 0x37, 0x30, 0x37, 0x36, 0x36, 0x36, 0x36,
-0x36, 0x36, 0x36, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x00, 0xff,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x36, 0x36, 0x36,
-0x36, 0xf7, 0x00, 0xf7, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
-0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0xff,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0xff, 0x00, 0xff, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0x36, 0x36, 0x36, 0x36,
-0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x3f,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x1f, 0x18, 0x1f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x1f, 0x18, 0x1f, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3f,
-0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
-0x36, 0x36, 0x36, 0xff, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36, 0x36,
-0x18, 0x18, 0x18, 0x18, 0x18, 0xff, 0x18, 0xff, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0xf8,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x1f, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff,
-0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf0, 0xf0, 0xf0, 0xf0,
-0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
-0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
-0x0f, 0x0f, 0x0f, 0x0f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x76, 0xdc, 0xd8, 0xd8, 0xd8, 0xdc, 0x76, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x78, 0xcc, 0xcc, 0xcc, 0xd8, 0xcc, 0xc6, 0xc6, 0xc6, 0xcc,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfe, 0xc6, 0xc6, 0xc0, 0xc0, 0xc0,
-0xc0, 0xc0, 0xc0, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0xfe, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0xfe, 0xc6, 0x60, 0x30, 0x18, 0x30, 0x60, 0xc6, 0xfe,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0xd8, 0xd8,
-0xd8, 0xd8, 0xd8, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x66, 0x66, 0x66, 0x66, 0x66, 0x7c, 0x60, 0x60, 0xc0, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7e, 0x18, 0x3c, 0x66, 0x66,
-0x66, 0x3c, 0x18, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38,
-0x6c, 0xc6, 0xc6, 0xfe, 0xc6, 0xc6, 0x6c, 0x38, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x38, 0x6c, 0xc6, 0xc6, 0xc6, 0x6c, 0x6c, 0x6c, 0x6c, 0xee,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x30, 0x18, 0x0c, 0x3e, 0x66,
-0x66, 0x66, 0x66, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x7e, 0xdb, 0xdb, 0xdb, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x03, 0x06, 0x7e, 0xdb, 0xdb, 0xf3, 0x7e, 0x60, 0xc0,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x30, 0x60, 0x60, 0x7c, 0x60,
-0x60, 0x60, 0x30, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c,
-0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0xc6, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0xfe, 0x00, 0x00, 0xfe, 0x00, 0x00, 0xfe, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x7e, 0x18,
-0x18, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30,
-0x18, 0x0c, 0x06, 0x0c, 0x18, 0x30, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x0c, 0x18, 0x30, 0x60, 0x30, 0x18, 0x0c, 0x00, 0x7e,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x1b, 0x1b, 0x1b, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
-0x18, 0x18, 0x18, 0x18, 0xd8, 0xd8, 0xd8, 0x70, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x7e, 0x00, 0x18, 0x18, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0xdc, 0x00,
-0x76, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0x6c, 0x6c,
-0x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x18, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0f, 0x0c, 0x0c,
-0x0c, 0x0c, 0x0c, 0xec, 0x6c, 0x6c, 0x3c, 0x1c, 0x00, 0x00, 0x00, 0x00,
-0x00, 0xd8, 0x6c, 0x6c, 0x6c, 0x6c, 0x6c, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x70, 0xd8, 0x30, 0x60, 0xc8, 0xf8, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-0x00, 0x00, 0x00, 0x00,
-};
-
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
index 40198d50b4c2..0fcc463b02e2 100644
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -1,15 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Processor cache information made available to userspace via sysfs;
* intended to be compatible with x86 intel_cacheinfo implementation.
*
* Copyright 2008 IBM Corporation
* Author: Nathan Lynch
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
*/
+#define pr_fmt(fmt) "cacheinfo: " fmt
+
#include <linux/cpu.h>
#include <linux/cpumask.h>
#include <linux/kernel.h>
@@ -19,7 +18,8 @@
#include <linux/of.h>
#include <linux/percpu.h>
#include <linux/slab.h>
-#include <asm/prom.h>
+#include <asm/cputhreads.h>
+#include <asm/smp.h>
#include "cacheinfo.h"
@@ -61,12 +61,22 @@ struct cache_type_info {
};
/* These are used to index the cache_type_info array. */
-#define CACHE_TYPE_UNIFIED 0
-#define CACHE_TYPE_INSTRUCTION 1
-#define CACHE_TYPE_DATA 2
+#define CACHE_TYPE_UNIFIED 0 /* cache-size, cache-block-size, etc. */
+#define CACHE_TYPE_UNIFIED_D 1 /* d-cache-size, d-cache-block-size, etc */
+#define CACHE_TYPE_INSTRUCTION 2
+#define CACHE_TYPE_DATA 3
static const struct cache_type_info cache_type_info[] = {
{
+ /* Embedded systems that use cache-size, cache-block-size,
+ * etc. for the Unified (typically L2) cache. */
+ .name = "Unified",
+ .size_prop = "cache-size",
+ .line_size_props = { "cache-line-size",
+ "cache-block-size", },
+ .nr_sets_prop = "cache-sets",
+ },
+ {
/* PowerPC Processor binding says the [di]-cache-*
* must be equal on unified caches, so just use
* d-cache properties. */
@@ -109,6 +119,7 @@ struct cache {
struct cpumask shared_cpu_map; /* online CPUs using this cache */
int type; /* split cache disambiguation */
int level; /* level not explicit in device tree */
+ int group_id; /* id of the group of threads that share this cache */
struct list_head list; /* global list of cache objects */
struct cache *next_local; /* next cache of >= level */
};
@@ -131,22 +142,24 @@ static const char *cache_type_string(const struct cache *cache)
}
static void cache_init(struct cache *cache, int type, int level,
- struct device_node *ofnode)
+ struct device_node *ofnode, int group_id)
{
cache->type = type;
cache->level = level;
cache->ofnode = of_node_get(ofnode);
+ cache->group_id = group_id;
INIT_LIST_HEAD(&cache->list);
list_add(&cache->list, &cache_list);
}
-static struct cache *new_cache(int type, int level, struct device_node *ofnode)
+static struct cache *new_cache(int type, int level,
+ struct device_node *ofnode, int group_id)
{
struct cache *cache;
cache = kzalloc(sizeof(*cache), GFP_KERNEL);
if (cache)
- cache_init(cache, type, level, ofnode);
+ cache_init(cache, type, level, ofnode, group_id);
return cache;
}
@@ -157,10 +170,10 @@ static void release_cache_debugcheck(struct cache *cache)
list_for_each_entry(iter, &cache_list, list)
WARN_ONCE(iter->next_local == cache,
- "cache for %s(%s) refers to cache for %s(%s)\n",
- iter->ofnode->full_name,
+ "cache for %pOFP(%s) refers to cache for %pOFP(%s)\n",
+ iter->ofnode,
cache_type_string(iter),
- cache->ofnode->full_name,
+ cache->ofnode,
cache_type_string(cache));
}
@@ -169,8 +182,8 @@ static void release_cache(struct cache *cache)
if (!cache)
return;
- pr_debug("freeing L%d %s cache for %s\n", cache->level,
- cache_type_string(cache), cache->ofnode->full_name);
+ pr_debug("freeing L%d %s cache for %pOFP\n", cache->level,
+ cache_type_string(cache), cache->ofnode);
release_cache_debugcheck(cache);
list_del(&cache->list);
@@ -184,8 +197,8 @@ static void cache_cpu_set(struct cache *cache, int cpu)
while (next) {
WARN_ONCE(cpumask_test_cpu(cpu, &next->shared_cpu_map),
- "CPU %i already accounted in %s(%s)\n",
- cpu, next->ofnode->full_name,
+ "CPU %i already accounted in %pOFP(%s)\n",
+ cpu, next->ofnode,
cache_type_string(next));
cpumask_set_cpu(cpu, &next->shared_cpu_map);
next = next->next_local;
@@ -293,24 +306,29 @@ static struct cache *cache_find_first_sibling(struct cache *cache)
{
struct cache *iter;
- if (cache->type == CACHE_TYPE_UNIFIED)
+ if (cache->type == CACHE_TYPE_UNIFIED ||
+ cache->type == CACHE_TYPE_UNIFIED_D)
return cache;
list_for_each_entry(iter, &cache_list, list)
- if (iter->ofnode == cache->ofnode && iter->next_local == cache)
+ if (iter->ofnode == cache->ofnode &&
+ iter->group_id == cache->group_id &&
+ iter->next_local == cache)
return iter;
return cache;
}
-/* return the first cache on a local list matching node */
-static struct cache *cache_lookup_by_node(const struct device_node *node)
+/* return the first cache on a local list matching node and thread-group id */
+static struct cache *cache_lookup_by_node_group(const struct device_node *node,
+ int group_id)
{
struct cache *cache = NULL;
struct cache *iter;
list_for_each_entry(iter, &cache_list, list) {
- if (iter->ofnode != node)
+ if (iter->ofnode != node ||
+ iter->group_id != group_id)
continue;
cache = cache_find_first_sibling(iter);
break;
@@ -324,28 +342,40 @@ static bool cache_node_is_unified(const struct device_node *np)
return of_get_property(np, "cache-unified", NULL);
}
-static struct cache *cache_do_one_devnode_unified(struct device_node *node,
- int level)
+/*
+ * Unified caches can have two different sets of tags. Most embedded
+ * use cache-size, etc. for the unified cache size, but open firmware systems
+ * use d-cache-size, etc. Check on initialization for which type we have, and
+ * return the appropriate structure type. Assume it's embedded if it isn't
+ * open firmware. If it's yet a 3rd type, then there will be missing entries
+ * in /sys/devices/system/cpu/cpu0/cache/index2/, and this code will need
+ * to be extended further.
+ */
+static int cache_is_unified_d(const struct device_node *np)
{
- struct cache *cache;
-
- pr_debug("creating L%d ucache for %s\n", level, node->full_name);
+ return of_get_property(np,
+ cache_type_info[CACHE_TYPE_UNIFIED_D].size_prop, NULL) ?
+ CACHE_TYPE_UNIFIED_D : CACHE_TYPE_UNIFIED;
+}
- cache = new_cache(CACHE_TYPE_UNIFIED, level, node);
+static struct cache *cache_do_one_devnode_unified(struct device_node *node, int group_id,
+ int level)
+{
+ pr_debug("creating L%d ucache for %pOFP\n", level, node);
- return cache;
+ return new_cache(cache_is_unified_d(node), level, node, group_id);
}
-static struct cache *cache_do_one_devnode_split(struct device_node *node,
+static struct cache *cache_do_one_devnode_split(struct device_node *node, int group_id,
int level)
{
struct cache *dcache, *icache;
- pr_debug("creating L%d dcache and icache for %s\n", level,
- node->full_name);
+ pr_debug("creating L%d dcache and icache for %pOFP\n", level,
+ node);
- dcache = new_cache(CACHE_TYPE_DATA, level, node);
- icache = new_cache(CACHE_TYPE_INSTRUCTION, level, node);
+ dcache = new_cache(CACHE_TYPE_DATA, level, node, group_id);
+ icache = new_cache(CACHE_TYPE_INSTRUCTION, level, node, group_id);
if (!dcache || !icache)
goto err;
@@ -359,31 +389,32 @@ err:
return NULL;
}
-static struct cache *cache_do_one_devnode(struct device_node *node, int level)
+static struct cache *cache_do_one_devnode(struct device_node *node, int group_id, int level)
{
struct cache *cache;
if (cache_node_is_unified(node))
- cache = cache_do_one_devnode_unified(node, level);
+ cache = cache_do_one_devnode_unified(node, group_id, level);
else
- cache = cache_do_one_devnode_split(node, level);
+ cache = cache_do_one_devnode_split(node, group_id, level);
return cache;
}
static struct cache *cache_lookup_or_instantiate(struct device_node *node,
+ int group_id,
int level)
{
struct cache *cache;
- cache = cache_lookup_by_node(node);
+ cache = cache_lookup_by_node_group(node, group_id);
WARN_ONCE(cache && cache->level != level,
"cache level mismatch on lookup (got %d, expected %d)\n",
cache->level, level);
if (!cache)
- cache = cache_do_one_devnode(node, level);
+ cache = cache_do_one_devnode(node, group_id, level);
return cache;
}
@@ -397,15 +428,53 @@ static void link_cache_lists(struct cache *smaller, struct cache *bigger)
}
smaller->next_local = bigger;
+
+ /*
+ * The cache->next_local list sorts by level ascending:
+ * L1d -> L1i -> L2 -> L3 ...
+ */
+ WARN_ONCE((smaller->level == 1 && bigger->level > 2) ||
+ (smaller->level > 1 && bigger->level != smaller->level + 1),
+ "linking L%i cache %pOFP to L%i cache %pOFP; skipped a level?\n",
+ smaller->level, smaller->ofnode, bigger->level, bigger->ofnode);
}
static void do_subsidiary_caches_debugcheck(struct cache *cache)
{
- WARN_ON_ONCE(cache->level != 1);
- WARN_ON_ONCE(strcmp(cache->ofnode->type, "cpu"));
+ WARN_ONCE(cache->level != 1,
+ "instantiating cache chain from L%d %s cache for "
+ "%pOFP instead of an L1\n", cache->level,
+ cache_type_string(cache), cache->ofnode);
+ WARN_ONCE(!of_node_is_type(cache->ofnode, "cpu"),
+ "instantiating cache chain from node %pOFP of type '%s' "
+ "instead of a cpu node\n", cache->ofnode,
+ of_node_get_device_type(cache->ofnode));
}
-static void do_subsidiary_caches(struct cache *cache)
+/*
+ * If sub-groups of threads in a core containing @cpu_id share the
+ * L@level-cache (information obtained via "ibm,thread-groups"
+ * device-tree property), then we identify the group by the first
+ * thread-sibling in the group. We define this to be the group-id.
+ *
+ * In the absence of any thread-group information for L@level-cache,
+ * this function returns -1.
+ */
+static int get_group_id(unsigned int cpu_id, int level)
+{
+ if (has_big_cores && level == 1)
+ return cpumask_first(per_cpu(thread_group_l1_cache_map,
+ cpu_id));
+ else if (thread_group_shares_l2 && level == 2)
+ return cpumask_first(per_cpu(thread_group_l2_cache_map,
+ cpu_id));
+ else if (thread_group_shares_l3 && level == 3)
+ return cpumask_first(per_cpu(thread_group_l3_cache_map,
+ cpu_id));
+ return -1;
+}
+
+static void do_subsidiary_caches(struct cache *cache, unsigned int cpu_id)
{
struct device_node *subcache_node;
int level = cache->level;
@@ -414,9 +483,11 @@ static void do_subsidiary_caches(struct cache *cache)
while ((subcache_node = of_find_next_cache_node(cache->ofnode))) {
struct cache *subcache;
+ int group_id;
level++;
- subcache = cache_lookup_or_instantiate(subcache_node, level);
+ group_id = get_group_id(cpu_id, level);
+ subcache = cache_lookup_or_instantiate(subcache_node, group_id, level);
of_node_put(subcache_node);
if (!subcache)
break;
@@ -430,6 +501,7 @@ static struct cache *cache_chain_instantiate(unsigned int cpu_id)
{
struct device_node *cpu_node;
struct cache *cpu_cache = NULL;
+ int group_id;
pr_debug("creating cache object(s) for CPU %i\n", cpu_id);
@@ -438,11 +510,13 @@ static struct cache *cache_chain_instantiate(unsigned int cpu_id)
if (!cpu_node)
goto out;
- cpu_cache = cache_lookup_or_instantiate(cpu_node, 1);
+ group_id = get_group_id(cpu_id, 1);
+
+ cpu_cache = cache_lookup_or_instantiate(cpu_node, group_id, 1);
if (!cpu_cache)
goto out;
- do_subsidiary_caches(cpu_cache);
+ do_subsidiary_caches(cpu_cache, cpu_id);
cache_cpu_set(cpu_cache, cpu_id);
out:
@@ -603,38 +677,49 @@ static ssize_t level_show(struct kobject *k, struct kobj_attribute *attr, char *
static struct kobj_attribute cache_level_attr =
__ATTR(level, 0444, level_show, NULL);
-static ssize_t shared_cpu_map_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+static ssize_t
+show_shared_cpumap(struct kobject *k, struct kobj_attribute *attr, char *buf, bool list)
{
struct cache_index_dir *index;
struct cache *cache;
- int len;
- int n = 0;
+ const struct cpumask *mask;
index = kobj_to_cache_index_dir(k);
cache = index->cache;
- len = PAGE_SIZE - 2;
- if (len > 1) {
- n = cpumask_scnprintf(buf, len, &cache->shared_cpu_map);
- buf[n++] = '\n';
- buf[n] = '\0';
- }
- return n;
+ mask = &cache->shared_cpu_map;
+
+ return cpumap_print_to_pagebuf(list, buf, mask);
+}
+
+static ssize_t shared_cpu_map_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+ return show_shared_cpumap(k, attr, buf, false);
+}
+
+static ssize_t shared_cpu_list_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+ return show_shared_cpumap(k, attr, buf, true);
}
static struct kobj_attribute cache_shared_cpu_map_attr =
__ATTR(shared_cpu_map, 0444, shared_cpu_map_show, NULL);
+static struct kobj_attribute cache_shared_cpu_list_attr =
+ __ATTR(shared_cpu_list, 0444, shared_cpu_list_show, NULL);
+
/* Attributes which should always be created -- the kobject/sysfs core
- * does this automatically via kobj_type->default_attrs. This is the
+ * does this automatically via kobj_type->default_groups. This is the
* minimum data required to uniquely identify a cache.
*/
static struct attribute *cache_index_default_attrs[] = {
&cache_type_attr.attr,
&cache_level_attr.attr,
&cache_shared_cpu_map_attr.attr,
+ &cache_shared_cpu_list_attr.attr,
NULL,
};
+ATTRIBUTE_GROUPS(cache_index_default);
/* Attributes which should be created if the cache device node has the
* right properties -- see cacheinfo_create_index_opt_attrs
@@ -650,15 +735,14 @@ static const struct sysfs_ops cache_index_ops = {
.show = cache_index_show,
};
-static struct kobj_type cache_index_type = {
+static const struct kobj_type cache_index_type = {
.release = cache_index_release,
.sysfs_ops = &cache_index_ops,
- .default_attrs = cache_index_default_attrs,
+ .default_groups = cache_index_default_groups,
};
static void cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir)
{
- const char *cache_name;
const char *cache_type;
struct cache *cache;
char *buf;
@@ -669,7 +753,6 @@ static void cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir)
return;
cache = dir->cache;
- cache_name = cache->ofnode->full_name;
cache_type = cache_type_string(cache);
/* We don't want to create an attribute that can't provide a
@@ -686,14 +769,14 @@ static void cacheinfo_create_index_opt_attrs(struct cache_index_dir *dir)
rc = attr->show(&dir->kobj, attr, buf);
if (rc <= 0) {
pr_debug("not creating %s attribute for "
- "%s(%s) (rc = %zd)\n",
- attr->attr.name, cache_name,
+ "%pOFP(%s) (rc = %zd)\n",
+ attr->attr.name, cache->ofnode,
cache_type, rc);
continue;
}
if (sysfs_create_file(&dir->kobj, &attr->attr))
- pr_debug("could not create %s attribute for %s(%s)\n",
- attr->attr.name, cache_name, cache_type);
+ pr_debug("could not create %s attribute for %pOFP(%s)\n",
+ attr->attr.name, cache->ofnode, cache_type);
}
kfree(buf);
@@ -707,23 +790,21 @@ static void cacheinfo_create_index_dir(struct cache *cache, int index,
index_dir = kzalloc(sizeof(*index_dir), GFP_KERNEL);
if (!index_dir)
- goto err;
+ return;
index_dir->cache = cache;
rc = kobject_init_and_add(&index_dir->kobj, &cache_index_type,
cache_dir->kobj, "index%d", index);
- if (rc)
- goto err;
+ if (rc) {
+ kobject_put(&index_dir->kobj);
+ return;
+ }
index_dir->next = cache_dir->index;
cache_dir->index = index_dir;
cacheinfo_create_index_opt_attrs(index_dir);
-
- return;
-err:
- kfree(index_dir);
}
static void cacheinfo_sysfs_populate(unsigned int cpu_id,
@@ -765,13 +846,15 @@ static struct cache *cache_lookup_by_cpu(unsigned int cpu_id)
{
struct device_node *cpu_node;
struct cache *cache;
+ int group_id;
cpu_node = of_get_cpu_node(cpu_id, NULL);
WARN_ONCE(!cpu_node, "no OF node found for CPU %i\n", cpu_id);
if (!cpu_node)
return NULL;
- cache = cache_lookup_by_node(cpu_node);
+ group_id = get_group_id(cpu_id, 1);
+ cache = cache_lookup_by_node_group(cpu_node, group_id);
of_node_put(cpu_node);
return cache;
@@ -810,8 +893,8 @@ static void cache_cpu_clear(struct cache *cache, int cpu)
struct cache *next = cache->next_local;
WARN_ONCE(!cpumask_test_cpu(cpu, &cache->shared_cpu_map),
- "CPU %i not accounted in %s(%s)\n",
- cpu, cache->ofnode->full_name,
+ "CPU %i not accounted in %pOFP(%s)\n",
+ cpu, cache->ofnode,
cache_type_string(cache));
cpumask_clear_cpu(cpu, &cache->shared_cpu_map);
@@ -846,4 +929,25 @@ void cacheinfo_cpu_offline(unsigned int cpu_id)
if (cache)
cache_cpu_clear(cache, cpu_id);
}
+
+void cacheinfo_teardown(void)
+{
+ unsigned int cpu;
+
+ lockdep_assert_cpus_held();
+
+ for_each_online_cpu(cpu)
+ cacheinfo_cpu_offline(cpu);
+}
+
+void cacheinfo_rebuild(void)
+{
+ unsigned int cpu;
+
+ lockdep_assert_cpus_held();
+
+ for_each_online_cpu(cpu)
+ cacheinfo_cpu_online(cpu);
+}
+
#endif /* (CONFIG_PPC_PSERIES && CONFIG_SUSPEND) || CONFIG_HOTPLUG_CPU */
diff --git a/arch/powerpc/kernel/cacheinfo.h b/arch/powerpc/kernel/cacheinfo.h
index a7b74d36acd7..52bd3fc6642d 100644
--- a/arch/powerpc/kernel/cacheinfo.h
+++ b/arch/powerpc/kernel/cacheinfo.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _PPC_CACHEINFO_H
#define _PPC_CACHEINFO_H
@@ -5,4 +6,8 @@
extern void cacheinfo_cpu_online(unsigned int cpu_id);
extern void cacheinfo_cpu_offline(unsigned int cpu_id);
+/* Allow migration/suspend to tear down and rebuild the hierarchy. */
+extern void cacheinfo_teardown(void);
+extern void cacheinfo_rebuild(void);
+
#endif /* _PPC_CACHEINFO_H */
diff --git a/arch/powerpc/kernel/compat_audit.c b/arch/powerpc/kernel/compat_audit.c
index 108ff14e2122..57b38c592b9f 100644
--- a/arch/powerpc/kernel/compat_audit.c
+++ b/arch/powerpc/kernel/compat_audit.c
@@ -1,6 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
#undef __powerpc64__
+#include <linux/audit_arch.h>
#include <asm/unistd.h>
+#include "audit_32.h"
+
unsigned ppc32_dir_class[] = {
#include <asm-generic/audit_dir_write.h>
~0U
@@ -30,14 +34,16 @@ int ppc32_classify_syscall(unsigned syscall)
{
switch(syscall) {
case __NR_open:
- return 2;
+ return AUDITSC_OPEN;
case __NR_openat:
- return 3;
+ return AUDITSC_OPENAT;
case __NR_socketcall:
- return 4;
+ return AUDITSC_SOCKETCALL;
case __NR_execve:
- return 5;
+ return AUDITSC_EXECVE;
+ case __NR_openat2:
+ return AUDITSC_OPENAT2;
default:
- return 1;
+ return AUDITSC_COMPAT;
}
}
diff --git a/arch/powerpc/kernel/cpu_setup_44x.S b/arch/powerpc/kernel/cpu_setup_44x.S
index e32b4a9a2c22..e1d705ea2cf5 100644
--- a/arch/powerpc/kernel/cpu_setup_44x.S
+++ b/arch/powerpc/kernel/cpu_setup_44x.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* This file contains low level CPU setup functions.
* Valentine Barshak <vbarshak@ru.mvista.com>
@@ -5,12 +6,6 @@
*
* Based on cpu_setup_6xx code by
* Benjamin Herrenschmidt <benh@kernel.crashing.org>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
#include <asm/processor.h>
diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S
index f8cd9fba4d35..ab3ca74e6730 100644
--- a/arch/powerpc/kernel/cpu_setup_6xx.S
+++ b/arch/powerpc/kernel/cpu_setup_6xx.S
@@ -1,14 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* This file contains low level CPU setup functions.
* Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
+#include <linux/linkage.h>
+
#include <asm/processor.h>
#include <asm/page.h>
#include <asm/cputable.h>
@@ -16,6 +13,7 @@
#include <asm/asm-offsets.h>
#include <asm/cache.h>
#include <asm/mmu.h>
+#include <asm/feature-fixups.h>
_GLOBAL(__setup_cpu_603)
mflr r5
@@ -23,10 +21,20 @@ BEGIN_MMU_FTR_SECTION
li r10,0
mtspr SPRN_SPRG_603_LRU,r10 /* init SW LRU tracking */
END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
+
BEGIN_FTR_SECTION
bl __init_fpu_registers
END_FTR_SECTION_IFCLR(CPU_FTR_FPU_UNAVAILABLE)
bl setup_common_caches
+
+ /*
+ * This assumes that all cores using __setup_cpu_603 with
+ * MMU_FTR_USE_HIGH_BATS are G2_LE compatible
+ */
+BEGIN_MMU_FTR_SECTION
+ bl setup_g2_le_hid2
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
+
mtlr r5
blr
_GLOBAL(__setup_cpu_604)
@@ -84,7 +92,7 @@ _GLOBAL(__setup_cpu_745x)
blr
/* Enable caches for 603's, 604, 750 & 7400 */
-setup_common_caches:
+SYM_FUNC_START_LOCAL(setup_common_caches)
mfspr r11,SPRN_HID0
andi. r0,r11,HID0_DCE
ori r11,r11,HID0_ICE|HID0_DCE
@@ -98,11 +106,12 @@ setup_common_caches:
sync
isync
blr
+SYM_FUNC_END(setup_common_caches)
/* 604, 604e, 604ev, ...
* Enable superscalar execution & branch history table
*/
-setup_604_hid0:
+SYM_FUNC_START_LOCAL(setup_604_hid0)
mfspr r11,SPRN_HID0
ori r11,r11,HID0_SIED|HID0_BHTE
ori r8,r11,HID0_BTCD
@@ -113,6 +122,17 @@ setup_604_hid0:
sync
isync
blr
+SYM_FUNC_END(setup_604_hid0)
+
+/* Enable high BATs for G2_LE and derivatives like e300cX */
+SYM_FUNC_START_LOCAL(setup_g2_le_hid2)
+ mfspr r11,SPRN_HID2_G2_LE
+ oris r11,r11,HID2_G2_LE_HBE@h
+ mtspr SPRN_HID2_G2_LE,r11
+ sync
+ isync
+ blr
+SYM_FUNC_END(setup_g2_le_hid2)
/* 7400 <= rev 2.7 and 7410 rev = 1.0 suffer from some
* erratas we work around here.
@@ -128,13 +148,14 @@ setup_604_hid0:
* needed once we have applied workaround #5 (though it's
* not set by Apple's firmware at least).
*/
-setup_7400_workarounds:
+SYM_FUNC_START_LOCAL(setup_7400_workarounds)
mfpvr r3
rlwinm r3,r3,0,20,31
cmpwi 0,r3,0x0207
ble 1f
blr
-setup_7410_workarounds:
+SYM_FUNC_END(setup_7400_workarounds)
+SYM_FUNC_START_LOCAL(setup_7410_workarounds)
mfpvr r3
rlwinm r3,r3,0,20,31
cmpwi 0,r3,0x0100
@@ -154,14 +175,15 @@ setup_7410_workarounds:
sync
isync
blr
+SYM_FUNC_END(setup_7410_workarounds)
/* 740/750/7400/7410
- * Enable Store Gathering (SGE), Address Brodcast (ABE),
+ * Enable Store Gathering (SGE), Address Broadcast (ABE),
* Branch History Table (BHTE), Branch Target ICache (BTIC)
* Dynamic Power Management (DPM), Speculative (SPD)
* Clear Instruction cache throttling (ICTC)
*/
-setup_750_7400_hid0:
+SYM_FUNC_START_LOCAL(setup_750_7400_hid0)
mfspr r11,SPRN_HID0
ori r11,r11,HID0_SGE | HID0_ABE | HID0_BHTE | HID0_BTIC
oris r11,r11,HID0_DPM@h
@@ -180,12 +202,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_NO_DPM)
sync
isync
blr
+SYM_FUNC_END(setup_750_7400_hid0)
/* 750cx specific
* Looks like we have to disable NAP feature for some PLL settings...
* (waiting for confirmation)
*/
-setup_750cx:
+SYM_FUNC_START_LOCAL(setup_750cx)
mfspr r10, SPRN_HID1
rlwinm r10,r10,4,28,31
cmpwi cr0,r10,7
@@ -199,11 +222,13 @@ setup_750cx:
andc r6,r6,r7
stw r6,CPU_SPEC_FEATURES(r4)
blr
+SYM_FUNC_END(setup_750cx)
/* 750fx specific
*/
-setup_750fx:
+SYM_FUNC_START_LOCAL(setup_750fx)
blr
+SYM_FUNC_END(setup_750fx)
/* MPC 745x
* Enable Store Gathering (SGE), Branch Folding (FOLD)
@@ -215,7 +240,7 @@ setup_750fx:
* Clear Instruction cache throttling (ICTC)
* Enable L2 HW prefetch
*/
-setup_745x_specifics:
+SYM_FUNC_START_LOCAL(setup_745x_specifics)
/* We check for the presence of an L3 cache setup by
* the firmware. If any, we disable NAP capability as
* it's known to be bogus on rev 2.1 and earlier
@@ -226,7 +251,7 @@ BEGIN_FTR_SECTION
beq 1f
END_FTR_SECTION_IFSET(CPU_FTR_L3CR)
lwz r6,CPU_SPEC_FEATURES(r4)
- andi. r0,r6,CPU_FTR_L3_DISABLE_NAP
+ andis. r0,r6,CPU_FTR_L3_DISABLE_NAP@h
beq 1f
li r7,CPU_FTR_CAN_NAP
andc r6,r6,r7
@@ -273,6 +298,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NO_DPM)
sync
isync
blr
+SYM_FUNC_END(setup_745x_specifics)
/*
* Initialize the FPU registers. This is needed to work around an errata
@@ -291,6 +317,7 @@ _GLOBAL(__init_fpu_registers)
mtmsr r10
isync
blr
+_ASM_NOKPROBE_SYMBOL(__init_fpu_registers)
/* Definitions for the table use to save CPU states */
@@ -325,7 +352,7 @@ _GLOBAL(__save_cpu_setup)
lis r5,cpu_state_storage@h
ori r5,r5,cpu_state_storage@l
- /* Save HID0 (common to all CONFIG_6xx cpus) */
+ /* Save HID0 (common to all CONFIG_PPC_BOOK3S_32 cpus) */
mfspr r3,SPRN_HID0
stw r3,CS_HID0(r5)
@@ -374,7 +401,7 @@ _GLOBAL(__save_cpu_setup)
andi. r3,r3,0xff00
cmpwi cr0,r3,0x0200
bne 1f
- mfspr r4,SPRN_HID2
+ mfspr r4,SPRN_HID2_750FX
stw r4,CS_HID2(r5)
1:
mtcr r7
@@ -469,7 +496,7 @@ _GLOBAL(__restore_cpu_setup)
bne 4f
lwz r4,CS_HID2(r5)
rlwinm r4,r4,0,19,17
- mtspr SPRN_HID2,r4
+ mtspr SPRN_HID2_750FX,r4
sync
4:
lwz r4,CS_HID1(r5)
@@ -486,4 +513,4 @@ _GLOBAL(__restore_cpu_setup)
1:
mtcr r7
blr
-
+_ASM_NOKPROBE_SYMBOL(__restore_cpu_setup)
diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_e500.S
index 4f1393d20079..077cfccc3461 100644
--- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S
+++ b/arch/powerpc/kernel/cpu_setup_e500.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* This file contains low level CPU setup functions.
* Kumar Gala <galak@kernel.crashing.org>
@@ -5,19 +6,17 @@
*
* Based on cpu_setup_6xx code by
* Benjamin Herrenschmidt <benh@kernel.crashing.org>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
+#include <linux/linkage.h>
+
+#include <asm/page.h>
#include <asm/processor.h>
#include <asm/cputable.h>
#include <asm/ppc_asm.h>
-#include <asm/mmu-book3e.h>
+#include <asm/nohash/mmu-e500.h>
#include <asm/asm-offsets.h>
+#include <asm/mpc85xx.h>
_GLOBAL(__e500_icache_setup)
mfspr r0, SPRN_L1CSR1
@@ -91,6 +90,7 @@ _GLOBAL(setup_altivec_idle)
blr
+#ifdef CONFIG_PPC_E500MC
_GLOBAL(__setup_cpu_e6500)
mflr r6
#ifdef CONFIG_PPC64
@@ -107,14 +107,11 @@ _GLOBAL(__setup_cpu_e6500)
bl __setup_cpu_e5500
mtlr r6
blr
+#endif /* CONFIG_PPC_E500MC */
#ifdef CONFIG_PPC32
-_GLOBAL(__setup_cpu_e200)
- /* enable dedicated debug exception handling resources (Debug APU) */
- mfspr r3,SPRN_HID0
- ori r3,r3,HID0_DAPUEN@l
- mtspr SPRN_HID0,r3
- b __setup_e200_ivors
+#ifdef CONFIG_PPC_E500
+#ifndef CONFIG_PPC_E500MC
_GLOBAL(__setup_cpu_e500v1)
_GLOBAL(__setup_cpu_e500v2)
mflr r4
@@ -129,6 +126,7 @@ _GLOBAL(__setup_cpu_e500v2)
#endif
mtlr r4
blr
+#else /* CONFIG_PPC_E500MC */
_GLOBAL(__setup_cpu_e500mc)
_GLOBAL(__setup_cpu_e5500)
mflr r5
@@ -152,14 +150,16 @@ _GLOBAL(__setup_cpu_e5500)
* the feature on the primary core, avoid doing it on the
* secondary core.
*/
- andis. r6, r3, CPU_FTR_EMB_HV@h
+ andi. r6, r3, CPU_FTR_EMB_HV
beq 2f
rlwinm r3, r3, 0, ~CPU_FTR_EMB_HV
stw r3, CPU_SPEC_FEATURES(r4)
2:
mtlr r5
blr
-#endif
+#endif /* CONFIG_PPC_E500MC */
+#endif /* CONFIG_PPC_E500 */
+#endif /* CONFIG_PPC32 */
#ifdef CONFIG_PPC_BOOK3E_64
_GLOBAL(__restore_cpu_e6500)
@@ -223,3 +223,115 @@ _GLOBAL(__setup_cpu_e5500)
mtlr r5
blr
#endif
+
+/* flush L1 data cache, it can apply to e500v2, e500mc and e5500 */
+_GLOBAL(flush_dcache_L1)
+ mfmsr r10
+ wrteei 0
+
+ mfspr r3,SPRN_L1CFG0
+ rlwinm r5,r3,9,3 /* Extract cache block size */
+ twlgti r5,1 /* Only 32 and 64 byte cache blocks
+ * are currently defined.
+ */
+ li r4,32
+ subfic r6,r5,2 /* r6 = log2(1KiB / cache block size) -
+ * log2(number of ways)
+ */
+ slw r5,r4,r5 /* r5 = cache block size */
+
+ rlwinm r7,r3,0,0xff /* Extract number of KiB in the cache */
+ mulli r7,r7,13 /* An 8-way cache will require 13
+ * loads per set.
+ */
+ slw r7,r7,r6
+
+ /* save off HID0 and set DCFA */
+ mfspr r8,SPRN_HID0
+ ori r9,r8,HID0_DCFA@l
+ mtspr SPRN_HID0,r9
+ isync
+
+ LOAD_REG_IMMEDIATE(r6, KERNELBASE)
+ mr r4, r6
+ mtctr r7
+
+1: lwz r3,0(r4) /* Load... */
+ add r4,r4,r5
+ bdnz 1b
+
+ msync
+ mr r4, r6
+ mtctr r7
+
+1: dcbf 0,r4 /* ...and flush. */
+ add r4,r4,r5
+ bdnz 1b
+
+ /* restore HID0 */
+ mtspr SPRN_HID0,r8
+ isync
+
+ wrtee r10
+
+ blr
+
+SYM_FUNC_START_LOCAL(has_L2_cache)
+ /* skip L2 cache on P2040/P2040E as they have no L2 cache */
+ mfspr r3, SPRN_SVR
+ /* shift right by 8 bits and clear E bit of SVR */
+ rlwinm r4, r3, 24, ~0x800
+
+ lis r3, SVR_P2040@h
+ ori r3, r3, SVR_P2040@l
+ cmpw r4, r3
+ beq 1f
+
+ li r3, 1
+ blr
+1:
+ li r3, 0
+ blr
+SYM_FUNC_END(has_L2_cache)
+
+/* flush backside L2 cache */
+SYM_FUNC_START_LOCAL(flush_backside_L2_cache)
+ mflr r10
+ bl has_L2_cache
+ mtlr r10
+ cmpwi r3, 0
+ beq 2f
+
+ /* Flush the L2 cache */
+ mfspr r3, SPRN_L2CSR0
+ ori r3, r3, L2CSR0_L2FL@l
+ msync
+ isync
+ mtspr SPRN_L2CSR0,r3
+ isync
+
+ /* check if it is complete */
+1: mfspr r3,SPRN_L2CSR0
+ andi. r3, r3, L2CSR0_L2FL@l
+ bne 1b
+2:
+ blr
+SYM_FUNC_END(flush_backside_L2_cache)
+
+_GLOBAL(cpu_down_flush_e500v2)
+ mflr r0
+ bl flush_dcache_L1
+ mtlr r0
+ blr
+
+_GLOBAL(cpu_down_flush_e500mc)
+_GLOBAL(cpu_down_flush_e5500)
+ mflr r0
+ bl flush_dcache_L1
+ bl flush_backside_L2_cache
+ mtlr r0
+ blr
+
+/* L1 Data Cache of e6500 contains no modified data, no flush is required */
+_GLOBAL(cpu_down_flush_e6500)
+ blr
diff --git a/arch/powerpc/kernel/cpu_setup_pa6t.S b/arch/powerpc/kernel/cpu_setup_pa6t.S
index d62cb9cae4e9..e6bfd4490e19 100644
--- a/arch/powerpc/kernel/cpu_setup_pa6t.S
+++ b/arch/powerpc/kernel/cpu_setup_pa6t.S
@@ -1,21 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2006-2007 PA Semi, Inc
*
* Maintained by: Olof Johansson <olof@lixom.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
*/
#include <asm/processor.h>
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
deleted file mode 100644
index 46733535cc0b..000000000000
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- * This file contains low level CPU setup functions.
- * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- */
-
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/cputable.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/cache.h>
-
-/* Entry: r3 = crap, r4 = ptr to cputable entry
- *
- * Note that we can be called twice for pseudo-PVRs
- */
-_GLOBAL(__setup_cpu_power7)
- mflr r11
- bl __init_hvmode_206
- mtlr r11
- beqlr
- li r0,0
- mtspr SPRN_LPID,r0
- mfspr r3,SPRN_LPCR
- bl __init_LPCR
- bl __init_tlb_power7
- mtlr r11
- blr
-
-_GLOBAL(__restore_cpu_power7)
- mflr r11
- mfmsr r3
- rldicl. r0,r3,4,63
- beqlr
- li r0,0
- mtspr SPRN_LPID,r0
- mfspr r3,SPRN_LPCR
- bl __init_LPCR
- bl __init_tlb_power7
- mtlr r11
- blr
-
-_GLOBAL(__setup_cpu_power8)
- mflr r11
- bl __init_FSCR
- bl __init_PMU
- bl __init_hvmode_206
- mtlr r11
- beqlr
- li r0,0
- mtspr SPRN_LPID,r0
- mfspr r3,SPRN_LPCR
- ori r3, r3, LPCR_PECEDH
- bl __init_LPCR
- bl __init_HFSCR
- bl __init_tlb_power8
- bl __init_PMU_HV
- mtlr r11
- blr
-
-_GLOBAL(__restore_cpu_power8)
- mflr r11
- bl __init_FSCR
- bl __init_PMU
- mfmsr r3
- rldicl. r0,r3,4,63
- mtlr r11
- beqlr
- li r0,0
- mtspr SPRN_LPID,r0
- mfspr r3,SPRN_LPCR
- ori r3, r3, LPCR_PECEDH
- bl __init_LPCR
- bl __init_HFSCR
- bl __init_tlb_power8
- bl __init_PMU_HV
- mtlr r11
- blr
-
-__init_hvmode_206:
- /* Disable CPU_FTR_HVMODE and exit if MSR:HV is not set */
- mfmsr r3
- rldicl. r0,r3,4,63
- bnelr
- ld r5,CPU_SPEC_FEATURES(r4)
- LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE)
- xor r5,r5,r6
- std r5,CPU_SPEC_FEATURES(r4)
- blr
-
-__init_LPCR:
- /* Setup a sane LPCR:
- * Called with initial LPCR in R3
- *
- * LPES = 0b01 (HSRR0/1 used for 0x500)
- * PECE = 0b111
- * DPFD = 4
- * HDICE = 0
- * VC = 0b100 (VPM0=1, VPM1=0, ISL=0)
- * VRMASD = 0b10000 (L=1, LP=00)
- *
- * Other bits untouched for now
- */
- li r5,1
- rldimi r3,r5, LPCR_LPES_SH, 64-LPCR_LPES_SH-2
- ori r3,r3,(LPCR_PECE0|LPCR_PECE1|LPCR_PECE2)
- li r5,4
- rldimi r3,r5, LPCR_DPFD_SH, 64-LPCR_DPFD_SH-3
- clrrdi r3,r3,1 /* clear HDICE */
- li r5,4
- rldimi r3,r5, LPCR_VC_SH, 0
- li r5,0x10
- rldimi r3,r5, LPCR_VRMASD_SH, 64-LPCR_VRMASD_SH-5
- mtspr SPRN_LPCR,r3
- isync
- blr
-
-__init_FSCR:
- mfspr r3,SPRN_FSCR
- ori r3,r3,FSCR_TAR|FSCR_DSCR|FSCR_EBB
- mtspr SPRN_FSCR,r3
- blr
-
-__init_HFSCR:
- mfspr r3,SPRN_HFSCR
- ori r3,r3,HFSCR_TAR|HFSCR_TM|HFSCR_BHRB|HFSCR_PM|\
- HFSCR_DSCR|HFSCR_VECVSX|HFSCR_FP|HFSCR_EBB
- mtspr SPRN_HFSCR,r3
- blr
-
-/*
- * Clear the TLB using the specified IS form of tlbiel instruction
- * (invalidate by congruence class). P7 has 128 CCs., P8 has 512.
- *
- * r3 = IS field
- */
-__init_tlb_power7:
- li r3,0xc00 /* IS field = 0b11 */
-_GLOBAL(__flush_tlb_power7)
- li r6,128
- mtctr r6
- mr r7,r3 /* IS field */
- ptesync
-2: tlbiel r7
- addi r7,r7,0x1000
- bdnz 2b
- ptesync
-1: blr
-
-__init_tlb_power8:
- li r3,0xc00 /* IS field = 0b11 */
-_GLOBAL(__flush_tlb_power8)
- li r6,512
- mtctr r6
- mr r7,r3 /* IS field */
- ptesync
-2: tlbiel r7
- addi r7,r7,0x1000
- bdnz 2b
- ptesync
-1: blr
-
-__init_PMU_HV:
- li r5,0
- mtspr SPRN_MMCRC,r5
- mtspr SPRN_MMCRH,r5
- blr
-
-__init_PMU:
- li r5,0
- mtspr SPRN_MMCRS,r5
- mtspr SPRN_MMCRA,r5
- mtspr SPRN_MMCR0,r5
- mtspr SPRN_MMCR1,r5
- mtspr SPRN_MMCR2,r5
- blr
diff --git a/arch/powerpc/kernel/cpu_setup_power.c b/arch/powerpc/kernel/cpu_setup_power.c
new file mode 100644
index 000000000000..98bd4e6c1770
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_setup_power.c
@@ -0,0 +1,288 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2020, Jordan Niethe, IBM Corporation.
+ *
+ * This file contains low level CPU setup functions.
+ * Originally written in assembly by Benjamin Herrenschmidt & various other
+ * authors.
+ */
+
+#include <asm/reg.h>
+#include <asm/synch.h>
+#include <linux/bitops.h>
+#include <asm/cputable.h>
+#include <asm/cpu_setup.h>
+
+/* Disable CPU_FTR_HVMODE and return false if MSR:HV is not set */
+static bool init_hvmode_206(struct cpu_spec *t)
+{
+ u64 msr;
+
+ msr = mfmsr();
+ if (msr & MSR_HV)
+ return true;
+
+ t->cpu_features &= ~(CPU_FTR_HVMODE | CPU_FTR_P9_TM_HV_ASSIST);
+ return false;
+}
+
+static void init_LPCR_ISA300(u64 lpcr, u64 lpes)
+{
+ /* POWER9 has no VRMASD */
+ lpcr |= (lpes << LPCR_LPES_SH) & LPCR_LPES;
+ lpcr |= LPCR_PECE0|LPCR_PECE1|LPCR_PECE2;
+ lpcr |= (4ull << LPCR_DPFD_SH) & LPCR_DPFD;
+ lpcr &= ~LPCR_HDICE; /* clear HDICE */
+ lpcr |= (4ull << LPCR_VC_SH);
+ mtspr(SPRN_LPCR, lpcr);
+ isync();
+}
+
+/*
+ * Setup a sane LPCR:
+ * Called with initial LPCR and desired LPES 2-bit value
+ *
+ * LPES = 0b01 (HSRR0/1 used for 0x500)
+ * PECE = 0b111
+ * DPFD = 4
+ * HDICE = 0
+ * VC = 0b100 (VPM0=1, VPM1=0, ISL=0)
+ * VRMASD = 0b10000 (L=1, LP=00)
+ *
+ * Other bits untouched for now
+ */
+static void init_LPCR_ISA206(u64 lpcr, u64 lpes)
+{
+ lpcr |= (0x10ull << LPCR_VRMASD_SH) & LPCR_VRMASD;
+ init_LPCR_ISA300(lpcr, lpes);
+}
+
+static void init_FSCR(void)
+{
+ u64 fscr;
+
+ fscr = mfspr(SPRN_FSCR);
+ fscr |= FSCR_TAR|FSCR_EBB;
+ mtspr(SPRN_FSCR, fscr);
+}
+
+static void init_FSCR_power9(void)
+{
+ u64 fscr;
+
+ fscr = mfspr(SPRN_FSCR);
+ fscr |= FSCR_SCV;
+ mtspr(SPRN_FSCR, fscr);
+ init_FSCR();
+}
+
+static void init_FSCR_power10(void)
+{
+ u64 fscr;
+
+ fscr = mfspr(SPRN_FSCR);
+ fscr |= FSCR_PREFIX;
+ mtspr(SPRN_FSCR, fscr);
+ init_FSCR_power9();
+}
+
+static void init_HFSCR(void)
+{
+ u64 hfscr;
+
+ hfscr = mfspr(SPRN_HFSCR);
+ hfscr |= HFSCR_TAR|HFSCR_TM|HFSCR_BHRB|HFSCR_PM|HFSCR_DSCR|\
+ HFSCR_VECVSX|HFSCR_FP|HFSCR_EBB|HFSCR_MSGP;
+ mtspr(SPRN_HFSCR, hfscr);
+}
+
+static void init_PMU_HV(void)
+{
+ mtspr(SPRN_MMCRC, 0);
+}
+
+static void init_PMU_HV_ISA207(void)
+{
+ mtspr(SPRN_MMCRH, 0);
+}
+
+static void init_PMU(void)
+{
+ mtspr(SPRN_MMCRA, 0);
+ mtspr(SPRN_MMCR0, MMCR0_FC);
+ mtspr(SPRN_MMCR1, 0);
+ mtspr(SPRN_MMCR2, 0);
+}
+
+static void init_PMU_ISA207(void)
+{
+ mtspr(SPRN_MMCRS, 0);
+}
+
+static void init_PMU_ISA31(void)
+{
+ mtspr(SPRN_MMCR3, 0);
+ mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
+ mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT);
+}
+
+static void init_DEXCR(void)
+{
+ mtspr(SPRN_DEXCR, DEXCR_INIT);
+ mtspr(SPRN_HASHKEYR, 0);
+}
+
+/*
+ * Note that we can be called twice of pseudo-PVRs.
+ * The parameter offset is not used.
+ */
+
+void __setup_cpu_power7(unsigned long offset, struct cpu_spec *t)
+{
+ if (!init_hvmode_206(t))
+ return;
+
+ mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_AMOR, ~0);
+ mtspr(SPRN_PCR, PCR_MASK);
+ init_LPCR_ISA206(mfspr(SPRN_LPCR), LPCR_LPES1 >> LPCR_LPES_SH);
+}
+
+void __restore_cpu_power7(void)
+{
+ u64 msr;
+
+ msr = mfmsr();
+ if (!(msr & MSR_HV))
+ return;
+
+ mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_AMOR, ~0);
+ mtspr(SPRN_PCR, PCR_MASK);
+ init_LPCR_ISA206(mfspr(SPRN_LPCR), LPCR_LPES1 >> LPCR_LPES_SH);
+}
+
+void __setup_cpu_power8(unsigned long offset, struct cpu_spec *t)
+{
+ init_FSCR();
+ init_PMU();
+ init_PMU_ISA207();
+
+ if (!init_hvmode_206(t))
+ return;
+
+ mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_AMOR, ~0);
+ mtspr(SPRN_PCR, PCR_MASK);
+ init_LPCR_ISA206(mfspr(SPRN_LPCR) | LPCR_PECEDH, 0); /* LPES = 0 */
+ init_HFSCR();
+ init_PMU_HV();
+ init_PMU_HV_ISA207();
+}
+
+void __restore_cpu_power8(void)
+{
+ u64 msr;
+
+ init_FSCR();
+ init_PMU();
+ init_PMU_ISA207();
+
+ msr = mfmsr();
+ if (!(msr & MSR_HV))
+ return;
+
+ mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_AMOR, ~0);
+ mtspr(SPRN_PCR, PCR_MASK);
+ init_LPCR_ISA206(mfspr(SPRN_LPCR) | LPCR_PECEDH, 0); /* LPES = 0 */
+ init_HFSCR();
+ init_PMU_HV();
+ init_PMU_HV_ISA207();
+}
+
+void __setup_cpu_power9(unsigned long offset, struct cpu_spec *t)
+{
+ init_FSCR_power9();
+ init_PMU();
+
+ if (!init_hvmode_206(t))
+ return;
+
+ mtspr(SPRN_PSSCR, 0);
+ mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_PID, 0);
+ mtspr(SPRN_AMOR, ~0);
+ mtspr(SPRN_PCR, PCR_MASK);
+ init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
+ LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
+ init_HFSCR();
+ init_PMU_HV();
+}
+
+void __restore_cpu_power9(void)
+{
+ u64 msr;
+
+ init_FSCR_power9();
+ init_PMU();
+
+ msr = mfmsr();
+ if (!(msr & MSR_HV))
+ return;
+
+ mtspr(SPRN_PSSCR, 0);
+ mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_PID, 0);
+ mtspr(SPRN_AMOR, ~0);
+ mtspr(SPRN_PCR, PCR_MASK);
+ init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
+ LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
+ init_HFSCR();
+ init_PMU_HV();
+}
+
+void __setup_cpu_power10(unsigned long offset, struct cpu_spec *t)
+{
+ init_FSCR_power10();
+ init_PMU();
+ init_PMU_ISA31();
+ init_DEXCR();
+
+ if (!init_hvmode_206(t))
+ return;
+
+ mtspr(SPRN_PSSCR, 0);
+ mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_PID, 0);
+ mtspr(SPRN_AMOR, ~0);
+ mtspr(SPRN_PCR, PCR_MASK);
+ init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
+ LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
+ init_HFSCR();
+ init_PMU_HV();
+}
+
+void __restore_cpu_power10(void)
+{
+ u64 msr;
+
+ init_FSCR_power10();
+ init_PMU();
+ init_PMU_ISA31();
+ init_DEXCR();
+
+ msr = mfmsr();
+ if (!(msr & MSR_HV))
+ return;
+
+ mtspr(SPRN_PSSCR, 0);
+ mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_PID, 0);
+ mtspr(SPRN_AMOR, ~0);
+ mtspr(SPRN_PCR, PCR_MASK);
+ init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\
+ LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0);
+ init_HFSCR();
+ init_PMU_HV();
+}
diff --git a/arch/powerpc/kernel/cpu_setup_ppc970.S b/arch/powerpc/kernel/cpu_setup_ppc970.S
index 12fac8df01c5..f0c07e70f0b6 100644
--- a/arch/powerpc/kernel/cpu_setup_ppc970.S
+++ b/arch/powerpc/kernel/cpu_setup_ppc970.S
@@ -1,12 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* This file contains low level CPU setup functions.
* Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
#include <asm/processor.h>
diff --git a/arch/powerpc/kernel/cpu_specs.h b/arch/powerpc/kernel/cpu_specs.h
new file mode 100644
index 000000000000..5ea14605bb41
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_specs.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifdef CONFIG_PPC_47x
+#include "cpu_specs_47x.h"
+#elif defined(CONFIG_44x)
+#include "cpu_specs_44x.h"
+#endif
+
+#ifdef CONFIG_PPC_8xx
+#include "cpu_specs_8xx.h"
+#endif
+
+#ifdef CONFIG_PPC_E500MC
+#include "cpu_specs_e500mc.h"
+#elif defined(CONFIG_PPC_85xx)
+#include "cpu_specs_85xx.h"
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_32
+#include "cpu_specs_book3s_32.h"
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_64
+#include "cpu_specs_book3s_64.h"
+#endif
diff --git a/arch/powerpc/kernel/cpu_specs_44x.h b/arch/powerpc/kernel/cpu_specs_44x.h
new file mode 100644
index 000000000000..69c4cdc0cdee
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_specs_44x.h
@@ -0,0 +1,304 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ */
+
+#define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \
+ PPC_FEATURE_BOOKE)
+
+static struct cpu_spec cpu_specs[] __initdata = {
+ {
+ .pvr_mask = 0xf0000fff,
+ .pvr_value = 0x40000850,
+ .cpu_name = "440GR Rev. A",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc440",
+ },
+ { /* Use logical PVR for 440EP (logical pvr = pvr | 0x8) */
+ .pvr_mask = 0xf0000fff,
+ .pvr_value = 0x40000858,
+ .cpu_name = "440EP Rev. A",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440ep,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc440",
+ },
+ {
+ .pvr_mask = 0xf0000fff,
+ .pvr_value = 0x400008d3,
+ .cpu_name = "440GR Rev. B",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc440",
+ },
+ { /* Matches both physical and logical PVR for 440EP (logical pvr = pvr | 0x8) */
+ .pvr_mask = 0xf0000ff7,
+ .pvr_value = 0x400008d4,
+ .cpu_name = "440EP Rev. C",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440ep,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc440",
+ },
+ { /* Use logical PVR for 440EP (logical pvr = pvr | 0x8) */
+ .pvr_mask = 0xf0000fff,
+ .pvr_value = 0x400008db,
+ .cpu_name = "440EP Rev. B",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440ep,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc440",
+ },
+ { /* 440GRX */
+ .pvr_mask = 0xf0000ffb,
+ .pvr_value = 0x200008D0,
+ .cpu_name = "440GRX",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440grx,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* Use logical PVR for 440EPx (logical pvr = pvr | 0x8) */
+ .pvr_mask = 0xf0000ffb,
+ .pvr_value = 0x200008D8,
+ .cpu_name = "440EPX",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440epx,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 440GP Rev. B */
+ .pvr_mask = 0xf0000fff,
+ .pvr_value = 0x40000440,
+ .cpu_name = "440GP Rev. B",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc440gp",
+ },
+ { /* 440GP Rev. C */
+ .pvr_mask = 0xf0000fff,
+ .pvr_value = 0x40000481,
+ .cpu_name = "440GP Rev. C",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc440gp",
+ },
+ { /* 440GX Rev. A */
+ .pvr_mask = 0xf0000fff,
+ .pvr_value = 0x50000850,
+ .cpu_name = "440GX Rev. A",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440gx,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 440GX Rev. B */
+ .pvr_mask = 0xf0000fff,
+ .pvr_value = 0x50000851,
+ .cpu_name = "440GX Rev. B",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440gx,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 440GX Rev. C */
+ .pvr_mask = 0xf0000fff,
+ .pvr_value = 0x50000892,
+ .cpu_name = "440GX Rev. C",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440gx,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 440GX Rev. F */
+ .pvr_mask = 0xf0000fff,
+ .pvr_value = 0x50000894,
+ .cpu_name = "440GX Rev. F",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440gx,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 440SP Rev. A */
+ .pvr_mask = 0xfff00fff,
+ .pvr_value = 0x53200891,
+ .cpu_name = "440SP Rev. A",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc440",
+ },
+ { /* 440SPe Rev. A */
+ .pvr_mask = 0xfff00fff,
+ .pvr_value = 0x53400890,
+ .cpu_name = "440SPe Rev. A",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440spe,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 440SPe Rev. B */
+ .pvr_mask = 0xfff00fff,
+ .pvr_value = 0x53400891,
+ .cpu_name = "440SPe Rev. B",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_440spe,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 460EX */
+ .pvr_mask = 0xffff0006,
+ .pvr_value = 0x13020002,
+ .cpu_name = "460EX",
+ .cpu_features = CPU_FTRS_440x6,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_460ex,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 460EX Rev B */
+ .pvr_mask = 0xffff0007,
+ .pvr_value = 0x13020004,
+ .cpu_name = "460EX Rev. B",
+ .cpu_features = CPU_FTRS_440x6,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_460ex,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 460GT */
+ .pvr_mask = 0xffff0006,
+ .pvr_value = 0x13020000,
+ .cpu_name = "460GT",
+ .cpu_features = CPU_FTRS_440x6,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_460gt,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 460GT Rev B */
+ .pvr_mask = 0xffff0007,
+ .pvr_value = 0x13020005,
+ .cpu_name = "460GT Rev. B",
+ .cpu_features = CPU_FTRS_440x6,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_460gt,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 460SX */
+ .pvr_mask = 0xffffff00,
+ .pvr_value = 0x13541800,
+ .cpu_name = "460SX",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_460sx,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* 464 in APM821xx */
+ .pvr_mask = 0xfffffff0,
+ .pvr_value = 0x12C41C80,
+ .cpu_name = "APM821XX",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE |
+ PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_apm821xx,
+ .machine_check = machine_check_440A,
+ .platform = "ppc440",
+ },
+ { /* default match */
+ .pvr_mask = 0x00000000,
+ .pvr_value = 0x00000000,
+ .cpu_name = "(generic 44x PPC)",
+ .cpu_features = CPU_FTRS_44X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_44x,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_4xx,
+ .platform = "ppc440",
+ }
+};
diff --git a/arch/powerpc/kernel/cpu_specs_47x.h b/arch/powerpc/kernel/cpu_specs_47x.h
new file mode 100644
index 000000000000..3143cd504a51
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_specs_47x.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ */
+
+#define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \
+ PPC_FEATURE_BOOKE)
+
+static struct cpu_spec cpu_specs[] __initdata = {
+ { /* 476 DD2 core */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x11a52080,
+ .cpu_name = "476",
+ .cpu_features = CPU_FTRS_47X | CPU_FTR_476_DD2,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_47x | MMU_FTR_USE_TLBIVAX_BCAST |
+ MMU_FTR_LOCK_BCAST_INVAL,
+ .icache_bsize = 32,
+ .dcache_bsize = 128,
+ .machine_check = machine_check_47x,
+ .platform = "ppc470",
+ },
+ { /* 476fpe */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x7ff50000,
+ .cpu_name = "476fpe",
+ .cpu_features = CPU_FTRS_47X | CPU_FTR_476_DD2,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_47x | MMU_FTR_USE_TLBIVAX_BCAST |
+ MMU_FTR_LOCK_BCAST_INVAL,
+ .icache_bsize = 32,
+ .dcache_bsize = 128,
+ .machine_check = machine_check_47x,
+ .platform = "ppc470",
+ },
+ { /* 476 iss */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00050000,
+ .cpu_name = "476",
+ .cpu_features = CPU_FTRS_47X,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_47x | MMU_FTR_USE_TLBIVAX_BCAST |
+ MMU_FTR_LOCK_BCAST_INVAL,
+ .icache_bsize = 32,
+ .dcache_bsize = 128,
+ .machine_check = machine_check_47x,
+ .platform = "ppc470",
+ },
+ { /* 476 others */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x11a50000,
+ .cpu_name = "476",
+ .cpu_features = CPU_FTRS_47X,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .mmu_features = MMU_FTR_TYPE_47x | MMU_FTR_USE_TLBIVAX_BCAST |
+ MMU_FTR_LOCK_BCAST_INVAL,
+ .icache_bsize = 32,
+ .dcache_bsize = 128,
+ .machine_check = machine_check_47x,
+ .platform = "ppc470",
+ },
+ { /* default match */
+ .pvr_mask = 0x00000000,
+ .pvr_value = 0x00000000,
+ .cpu_name = "(generic 47x PPC)",
+ .cpu_features = CPU_FTRS_47X,
+ .cpu_user_features = COMMON_USER_BOOKE,
+ .mmu_features = MMU_FTR_TYPE_47x,
+ .icache_bsize = 32,
+ .dcache_bsize = 128,
+ .machine_check = machine_check_47x,
+ .platform = "ppc470",
+ }
+};
diff --git a/arch/powerpc/kernel/cpu_specs_85xx.h b/arch/powerpc/kernel/cpu_specs_85xx.h
new file mode 100644
index 000000000000..aaae202c1a89
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_specs_85xx.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ */
+
+#define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \
+ PPC_FEATURE_BOOKE)
+
+static struct cpu_spec cpu_specs[] __initdata = {
+ { /* e500 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x80200000,
+ .cpu_name = "e500",
+ .cpu_features = CPU_FTRS_E500,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_SPE_COMP |
+ PPC_FEATURE_HAS_EFP_SINGLE_COMP,
+ .cpu_user_features2 = PPC_FEATURE2_ISEL,
+ .mmu_features = MMU_FTR_TYPE_FSL_E,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .cpu_setup = __setup_cpu_e500v1,
+ .machine_check = machine_check_e500,
+ .platform = "ppc8540",
+ },
+ { /* e500v2 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x80210000,
+ .cpu_name = "e500v2",
+ .cpu_features = CPU_FTRS_E500_2,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_SPE_COMP |
+ PPC_FEATURE_HAS_EFP_SINGLE_COMP |
+ PPC_FEATURE_HAS_EFP_DOUBLE_COMP,
+ .cpu_user_features2 = PPC_FEATURE2_ISEL,
+ .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .cpu_setup = __setup_cpu_e500v2,
+ .machine_check = machine_check_e500,
+ .platform = "ppc8548",
+ .cpu_down_flush = cpu_down_flush_e500v2,
+ },
+ { /* default match */
+ .pvr_mask = 0x00000000,
+ .pvr_value = 0x00000000,
+ .cpu_name = "(generic E500 PPC)",
+ .cpu_features = CPU_FTRS_E500,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_SPE_COMP |
+ PPC_FEATURE_HAS_EFP_SINGLE_COMP,
+ .mmu_features = MMU_FTR_TYPE_FSL_E,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_e500,
+ .platform = "powerpc",
+ }
+};
diff --git a/arch/powerpc/kernel/cpu_specs_8xx.h b/arch/powerpc/kernel/cpu_specs_8xx.h
new file mode 100644
index 000000000000..93ddbc202ba3
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_specs_8xx.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ */
+
+static struct cpu_spec cpu_specs[] __initdata = {
+ { /* 8xx */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = PVR_8xx,
+ .cpu_name = "8xx",
+ /*
+ * CPU_FTR_MAYBE_CAN_DOZE is possible,
+ * if the 8xx code is there....
+ */
+ .cpu_features = CPU_FTRS_8XX,
+ .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU,
+ .mmu_features = MMU_FTR_TYPE_8xx,
+ .icache_bsize = 16,
+ .dcache_bsize = 16,
+ .machine_check = machine_check_8xx,
+ .platform = "ppc823",
+ },
+};
diff --git a/arch/powerpc/kernel/cpu_specs_book3s_32.h b/arch/powerpc/kernel/cpu_specs_book3s_32.h
new file mode 100644
index 000000000000..3714634d194a
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_specs_book3s_32.h
@@ -0,0 +1,605 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ */
+
+#define COMMON_USER (PPC_FEATURE_32 | PPC_FEATURE_HAS_FPU | \
+ PPC_FEATURE_HAS_MMU)
+
+static struct cpu_spec cpu_specs[] __initdata = {
+#ifdef CONFIG_PPC_BOOK3S_603
+ { /* 603 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00030000,
+ .cpu_name = "603",
+ .cpu_features = CPU_FTRS_603,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = 0,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_603,
+ .machine_check = machine_check_generic,
+ .platform = "ppc603",
+ },
+ { /* 603e */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00060000,
+ .cpu_name = "603e",
+ .cpu_features = CPU_FTRS_603,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = 0,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_603,
+ .machine_check = machine_check_generic,
+ .platform = "ppc603",
+ },
+ { /* 603ev */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00070000,
+ .cpu_name = "603ev",
+ .cpu_features = CPU_FTRS_603,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = 0,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_603,
+ .machine_check = machine_check_generic,
+ .platform = "ppc603",
+ },
+ { /* 82xx (8240, 8245, 8260 are all 603e cores) */
+ .pvr_mask = 0x7fff0000,
+ .pvr_value = 0x00810000,
+ .cpu_name = "82xx",
+ .cpu_features = CPU_FTRS_82XX,
+ .cpu_user_features = COMMON_USER,
+ .mmu_features = 0,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_603,
+ .machine_check = machine_check_generic,
+ .platform = "ppc603",
+ },
+ { /* All G2_LE (603e core, plus some) have the same pvr */
+ .pvr_mask = 0x7fff0000,
+ .pvr_value = 0x00820000,
+ .cpu_name = "G2_LE",
+ .cpu_features = CPU_FTRS_G2_LE,
+ .cpu_user_features = COMMON_USER,
+ .mmu_features = MMU_FTR_USE_HIGH_BATS,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_603,
+ .machine_check = machine_check_generic,
+ .platform = "ppc603",
+ },
+#ifdef CONFIG_PPC_83xx
+ { /* e300c1 (a 603e core, plus some) on 83xx */
+ .pvr_mask = 0x7fff0000,
+ .pvr_value = 0x00830000,
+ .cpu_name = "e300c1",
+ .cpu_features = CPU_FTRS_E300,
+ .cpu_user_features = COMMON_USER,
+ .mmu_features = MMU_FTR_USE_HIGH_BATS,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_603,
+ .machine_check = machine_check_83xx,
+ .platform = "ppc603",
+ },
+ { /* e300c2 (an e300c1 core, plus some, minus FPU) on 83xx */
+ .pvr_mask = 0x7fff0000,
+ .pvr_value = 0x00840000,
+ .cpu_name = "e300c2",
+ .cpu_features = CPU_FTRS_E300C2,
+ .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU,
+ .mmu_features = MMU_FTR_USE_HIGH_BATS | MMU_FTR_NEED_DTLB_SW_LRU,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_603,
+ .machine_check = machine_check_83xx,
+ .platform = "ppc603",
+ },
+ { /* e300c3 (e300c1, plus one IU, half cache size) on 83xx */
+ .pvr_mask = 0x7fff0000,
+ .pvr_value = 0x00850000,
+ .cpu_name = "e300c3",
+ .cpu_features = CPU_FTRS_E300,
+ .cpu_user_features = COMMON_USER,
+ .mmu_features = MMU_FTR_USE_HIGH_BATS | MMU_FTR_NEED_DTLB_SW_LRU,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_603,
+ .machine_check = machine_check_83xx,
+ .num_pmcs = 4,
+ .platform = "ppc603",
+ },
+ { /* e300c4 (e300c1, plus one IU) */
+ .pvr_mask = 0x7fff0000,
+ .pvr_value = 0x00860000,
+ .cpu_name = "e300c4",
+ .cpu_features = CPU_FTRS_E300,
+ .cpu_user_features = COMMON_USER,
+ .mmu_features = MMU_FTR_USE_HIGH_BATS | MMU_FTR_NEED_DTLB_SW_LRU,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .cpu_setup = __setup_cpu_603,
+ .machine_check = machine_check_83xx,
+ .num_pmcs = 4,
+ .platform = "ppc603",
+ },
+#endif
+#endif /* CONFIG_PPC_BOOK3S_603 */
+#ifdef CONFIG_PPC_BOOK3S_604
+ { /* 604 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00040000,
+ .cpu_name = "604",
+ .cpu_features = CPU_FTRS_604,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 2,
+ .cpu_setup = __setup_cpu_604,
+ .machine_check = machine_check_generic,
+ .platform = "ppc604",
+ },
+ { /* 604e */
+ .pvr_mask = 0xfffff000,
+ .pvr_value = 0x00090000,
+ .cpu_name = "604e",
+ .cpu_features = CPU_FTRS_604,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .cpu_setup = __setup_cpu_604,
+ .machine_check = machine_check_generic,
+ .platform = "ppc604",
+ },
+ { /* 604r */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00090000,
+ .cpu_name = "604r",
+ .cpu_features = CPU_FTRS_604,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .cpu_setup = __setup_cpu_604,
+ .machine_check = machine_check_generic,
+ .platform = "ppc604",
+ },
+ { /* 604ev */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x000a0000,
+ .cpu_name = "604ev",
+ .cpu_features = CPU_FTRS_604,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .cpu_setup = __setup_cpu_604,
+ .machine_check = machine_check_generic,
+ .platform = "ppc604",
+ },
+ { /* 740/750 (0x4202, don't support TAU ?) */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x00084202,
+ .cpu_name = "740/750",
+ .cpu_features = CPU_FTRS_740_NOTAU,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .cpu_setup = __setup_cpu_750,
+ .machine_check = machine_check_generic,
+ .platform = "ppc750",
+ },
+ { /* 750CX (80100 and 8010x?) */
+ .pvr_mask = 0xfffffff0,
+ .pvr_value = 0x00080100,
+ .cpu_name = "750CX",
+ .cpu_features = CPU_FTRS_750,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .cpu_setup = __setup_cpu_750cx,
+ .machine_check = machine_check_generic,
+ .platform = "ppc750",
+ },
+ { /* 750CX (82201 and 82202) */
+ .pvr_mask = 0xfffffff0,
+ .pvr_value = 0x00082200,
+ .cpu_name = "750CX",
+ .cpu_features = CPU_FTRS_750,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_750cx,
+ .machine_check = machine_check_generic,
+ .platform = "ppc750",
+ },
+ { /* 750CXe (82214) */
+ .pvr_mask = 0xfffffff0,
+ .pvr_value = 0x00082210,
+ .cpu_name = "750CXe",
+ .cpu_features = CPU_FTRS_750,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_750cx,
+ .machine_check = machine_check_generic,
+ .platform = "ppc750",
+ },
+ { /* 750CXe "Gekko" (83214) */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x00083214,
+ .cpu_name = "750CXe",
+ .cpu_features = CPU_FTRS_750,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_750cx,
+ .machine_check = machine_check_generic,
+ .platform = "ppc750",
+ },
+ { /* 750CL (and "Broadway") */
+ .pvr_mask = 0xfffff0e0,
+ .pvr_value = 0x00087000,
+ .cpu_name = "750CL",
+ .cpu_features = CPU_FTRS_750CL,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_750,
+ .machine_check = machine_check_generic,
+ .platform = "ppc750",
+ },
+ { /* 745/755 */
+ .pvr_mask = 0xfffff000,
+ .pvr_value = 0x00083000,
+ .cpu_name = "745/755",
+ .cpu_features = CPU_FTRS_750,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_750,
+ .machine_check = machine_check_generic,
+ .platform = "ppc750",
+ },
+ { /* 750FX rev 1.x */
+ .pvr_mask = 0xffffff00,
+ .pvr_value = 0x70000100,
+ .cpu_name = "750FX",
+ .cpu_features = CPU_FTRS_750FX1,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_750,
+ .machine_check = machine_check_generic,
+ .platform = "ppc750",
+ },
+ { /* 750FX rev 2.0 must disable HID0[DPM] */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x70000200,
+ .cpu_name = "750FX",
+ .cpu_features = CPU_FTRS_750FX2,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_750,
+ .machine_check = machine_check_generic,
+ .platform = "ppc750",
+ },
+ { /* 750FX (All revs except 2.0) */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x70000000,
+ .cpu_name = "750FX",
+ .cpu_features = CPU_FTRS_750FX,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_750fx,
+ .machine_check = machine_check_generic,
+ .platform = "ppc750",
+ },
+ { /* 750GX */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x70020000,
+ .cpu_name = "750GX",
+ .cpu_features = CPU_FTRS_750GX,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_750fx,
+ .machine_check = machine_check_generic,
+ .platform = "ppc750",
+ },
+ { /* 740/750 (L2CR bit need fixup for 740) */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00080000,
+ .cpu_name = "740/750",
+ .cpu_features = CPU_FTRS_740,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_750,
+ .machine_check = machine_check_generic,
+ .platform = "ppc750",
+ },
+ { /* 7400 rev 1.1 ? (no TAU) */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x000c1101,
+ .cpu_name = "7400 (1.1)",
+ .cpu_features = CPU_FTRS_7400_NOTAU,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+ PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .pmc_type = PPC_PMC_G4,
+ .cpu_setup = __setup_cpu_7400,
+ .machine_check = machine_check_generic,
+ .platform = "ppc7400",
+ },
+ { /* 7400 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x000c0000,
+ .cpu_name = "7400",
+ .cpu_features = CPU_FTRS_7400,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+ PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .pmc_type = PPC_PMC_G4,
+ .cpu_setup = __setup_cpu_7400,
+ .machine_check = machine_check_generic,
+ .platform = "ppc7400",
+ },
+ { /* 7410 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x800c0000,
+ .cpu_name = "7410",
+ .cpu_features = CPU_FTRS_7400,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+ PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 4,
+ .pmc_type = PPC_PMC_G4,
+ .cpu_setup = __setup_cpu_7410,
+ .machine_check = machine_check_generic,
+ .platform = "ppc7400",
+ },
+ { /* 7450 2.0 - no doze/nap */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x80000200,
+ .cpu_name = "7450",
+ .cpu_features = CPU_FTRS_7450_20,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+ PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_G4,
+ .cpu_setup = __setup_cpu_745x,
+ .machine_check = machine_check_generic,
+ .platform = "ppc7450",
+ },
+ { /* 7450 2.1 */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x80000201,
+ .cpu_name = "7450",
+ .cpu_features = CPU_FTRS_7450_21,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+ PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_G4,
+ .cpu_setup = __setup_cpu_745x,
+ .machine_check = machine_check_generic,
+ .platform = "ppc7450",
+ },
+ { /* 7450 2.3 and newer */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x80000000,
+ .cpu_name = "7450",
+ .cpu_features = CPU_FTRS_7450_23,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+ PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_G4,
+ .cpu_setup = __setup_cpu_745x,
+ .machine_check = machine_check_generic,
+ .platform = "ppc7450",
+ },
+ { /* 7455 rev 1.x */
+ .pvr_mask = 0xffffff00,
+ .pvr_value = 0x80010100,
+ .cpu_name = "7455",
+ .cpu_features = CPU_FTRS_7455_1,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+ PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_G4,
+ .cpu_setup = __setup_cpu_745x,
+ .machine_check = machine_check_generic,
+ .platform = "ppc7450",
+ },
+ { /* 7455 rev 2.0 */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x80010200,
+ .cpu_name = "7455",
+ .cpu_features = CPU_FTRS_7455_20,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+ PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_G4,
+ .cpu_setup = __setup_cpu_745x,
+ .machine_check = machine_check_generic,
+ .platform = "ppc7450",
+ },
+ { /* 7455 others */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x80010000,
+ .cpu_name = "7455",
+ .cpu_features = CPU_FTRS_7455,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+ PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_G4,
+ .cpu_setup = __setup_cpu_745x,
+ .machine_check = machine_check_generic,
+ .platform = "ppc7450",
+ },
+ { /* 7447/7457 Rev 1.0 */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x80020100,
+ .cpu_name = "7447/7457",
+ .cpu_features = CPU_FTRS_7447_10,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+ PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_G4,
+ .cpu_setup = __setup_cpu_745x,
+ .machine_check = machine_check_generic,
+ .platform = "ppc7450",
+ },
+ { /* 7447/7457 Rev 1.1 */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x80020101,
+ .cpu_name = "7447/7457",
+ .cpu_features = CPU_FTRS_7447_10,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+ PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_G4,
+ .cpu_setup = __setup_cpu_745x,
+ .machine_check = machine_check_generic,
+ .platform = "ppc7450",
+ },
+ { /* 7447/7457 Rev 1.2 and later */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x80020000,
+ .cpu_name = "7447/7457",
+ .cpu_features = CPU_FTRS_7447,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+ PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_G4,
+ .cpu_setup = __setup_cpu_745x,
+ .machine_check = machine_check_generic,
+ .platform = "ppc7450",
+ },
+ { /* 7447A */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x80030000,
+ .cpu_name = "7447A",
+ .cpu_features = CPU_FTRS_7447A,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+ PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_G4,
+ .cpu_setup = __setup_cpu_745x,
+ .machine_check = machine_check_generic,
+ .platform = "ppc7450",
+ },
+ { /* 7448 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x80040000,
+ .cpu_name = "7448",
+ .cpu_features = CPU_FTRS_7448,
+ .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP |
+ PPC_FEATURE_PPC_LE,
+ .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_G4,
+ .cpu_setup = __setup_cpu_745x,
+ .machine_check = machine_check_generic,
+ .platform = "ppc7450",
+ },
+ { /* default match, we assume split I/D cache & TB (non-601)... */
+ .pvr_mask = 0x00000000,
+ .pvr_value = 0x00000000,
+ .cpu_name = "(generic PPC)",
+ .cpu_features = CPU_FTRS_CLASSIC32,
+ .cpu_user_features = COMMON_USER,
+ .mmu_features = MMU_FTR_HPTE_TABLE,
+ .icache_bsize = 32,
+ .dcache_bsize = 32,
+ .machine_check = machine_check_generic,
+ .platform = "ppc603",
+ },
+#endif /* CONFIG_PPC_BOOK3S_604 */
+};
diff --git a/arch/powerpc/kernel/cpu_specs_book3s_64.h b/arch/powerpc/kernel/cpu_specs_book3s_64.h
new file mode 100644
index 000000000000..98d4274a1b6b
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_specs_book3s_64.h
@@ -0,0 +1,530 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ *
+ * Modifications for ppc64:
+ * Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com>
+ */
+
+/* NOTE:
+ * Unlike ppc32, ppc64 will only call cpu_setup() for the boot CPU, it's
+ * the responsibility of the appropriate CPU save/restore functions to
+ * eventually copy these settings over. Those save/restore aren't yet
+ * part of the cputable though. That has to be fixed for both ppc32
+ * and ppc64
+ */
+#define COMMON_USER_PPC64 (PPC_FEATURE_32 | PPC_FEATURE_HAS_FPU | \
+ PPC_FEATURE_HAS_MMU | PPC_FEATURE_64)
+#define COMMON_USER_POWER4 (COMMON_USER_PPC64 | PPC_FEATURE_POWER4)
+#define COMMON_USER_POWER5 (COMMON_USER_PPC64 | PPC_FEATURE_POWER5 |\
+ PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP)
+#define COMMON_USER_POWER5_PLUS (COMMON_USER_PPC64 | PPC_FEATURE_POWER5_PLUS|\
+ PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP)
+#define COMMON_USER_POWER6 (COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_05 |\
+ PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \
+ PPC_FEATURE_TRUE_LE | \
+ PPC_FEATURE_PSERIES_PERFMON_COMPAT)
+#define COMMON_USER_POWER7 (COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_06 |\
+ PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \
+ PPC_FEATURE_TRUE_LE | \
+ PPC_FEATURE_PSERIES_PERFMON_COMPAT)
+#define COMMON_USER2_POWER7 (PPC_FEATURE2_DSCR)
+#define COMMON_USER_POWER8 (COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_06 |\
+ PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \
+ PPC_FEATURE_TRUE_LE | \
+ PPC_FEATURE_PSERIES_PERFMON_COMPAT)
+#define COMMON_USER2_POWER8 (PPC_FEATURE2_ARCH_2_07 | \
+ PPC_FEATURE2_HTM_COMP | \
+ PPC_FEATURE2_HTM_NOSC_COMP | \
+ PPC_FEATURE2_DSCR | \
+ PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR | \
+ PPC_FEATURE2_VEC_CRYPTO)
+#define COMMON_USER_PA6T (COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\
+ PPC_FEATURE_TRUE_LE | \
+ PPC_FEATURE_HAS_ALTIVEC_COMP)
+#define COMMON_USER_POWER9 COMMON_USER_POWER8
+#define COMMON_USER2_POWER9 (COMMON_USER2_POWER8 | \
+ PPC_FEATURE2_ARCH_3_00 | \
+ PPC_FEATURE2_HAS_IEEE128 | \
+ PPC_FEATURE2_DARN | \
+ PPC_FEATURE2_SCV)
+#define COMMON_USER_POWER10 COMMON_USER_POWER9
+#define COMMON_USER2_POWER10 (PPC_FEATURE2_ARCH_3_1 | \
+ PPC_FEATURE2_MMA | \
+ PPC_FEATURE2_ARCH_3_00 | \
+ PPC_FEATURE2_HAS_IEEE128 | \
+ PPC_FEATURE2_DARN | \
+ PPC_FEATURE2_SCV | \
+ PPC_FEATURE2_ARCH_2_07 | \
+ PPC_FEATURE2_DSCR | \
+ PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR | \
+ PPC_FEATURE2_VEC_CRYPTO)
+
+#define COMMON_USER_POWER11 COMMON_USER_POWER10
+#define COMMON_USER2_POWER11 COMMON_USER2_POWER10
+
+static struct cpu_spec cpu_specs[] __initdata = {
+ { /* PPC970 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00390000,
+ .cpu_name = "PPC970",
+ .cpu_features = CPU_FTRS_PPC970,
+ .cpu_user_features = COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP,
+ .mmu_features = MMU_FTRS_PPC970,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 8,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_ppc970,
+ .cpu_restore = __restore_cpu_ppc970,
+ .platform = "ppc970",
+ },
+ { /* PPC970FX */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x003c0000,
+ .cpu_name = "PPC970FX",
+ .cpu_features = CPU_FTRS_PPC970,
+ .cpu_user_features = COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP,
+ .mmu_features = MMU_FTRS_PPC970,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 8,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_ppc970,
+ .cpu_restore = __restore_cpu_ppc970,
+ .platform = "ppc970",
+ },
+ { /* PPC970MP DD1.0 - no DEEPNAP, use regular 970 init */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x00440100,
+ .cpu_name = "PPC970MP",
+ .cpu_features = CPU_FTRS_PPC970,
+ .cpu_user_features = COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP,
+ .mmu_features = MMU_FTRS_PPC970,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 8,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_ppc970,
+ .cpu_restore = __restore_cpu_ppc970,
+ .platform = "ppc970",
+ },
+ { /* PPC970MP */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00440000,
+ .cpu_name = "PPC970MP",
+ .cpu_features = CPU_FTRS_PPC970,
+ .cpu_user_features = COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP,
+ .mmu_features = MMU_FTRS_PPC970,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 8,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_ppc970MP,
+ .cpu_restore = __restore_cpu_ppc970,
+ .platform = "ppc970",
+ },
+ { /* PPC970GX */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00450000,
+ .cpu_name = "PPC970GX",
+ .cpu_features = CPU_FTRS_PPC970,
+ .cpu_user_features = COMMON_USER_POWER4 | PPC_FEATURE_HAS_ALTIVEC_COMP,
+ .mmu_features = MMU_FTRS_PPC970,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 8,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_ppc970,
+ .platform = "ppc970",
+ },
+ { /* Power5 GR */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x003a0000,
+ .cpu_name = "POWER5 (gr)",
+ .cpu_features = CPU_FTRS_POWER5,
+ .cpu_user_features = COMMON_USER_POWER5,
+ .mmu_features = MMU_FTRS_POWER5,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .platform = "power5",
+ },
+ { /* Power5++ */
+ .pvr_mask = 0xffffff00,
+ .pvr_value = 0x003b0300,
+ .cpu_name = "POWER5+ (gs)",
+ .cpu_features = CPU_FTRS_POWER5,
+ .cpu_user_features = COMMON_USER_POWER5_PLUS,
+ .mmu_features = MMU_FTRS_POWER5,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .platform = "power5+",
+ },
+ { /* Power5 GS */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x003b0000,
+ .cpu_name = "POWER5+ (gs)",
+ .cpu_features = CPU_FTRS_POWER5,
+ .cpu_user_features = COMMON_USER_POWER5_PLUS,
+ .mmu_features = MMU_FTRS_POWER5,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .platform = "power5+",
+ },
+ { /* POWER6 in P5+ mode; 2.04-compliant processor */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x0f000001,
+ .cpu_name = "POWER5+",
+ .cpu_features = CPU_FTRS_POWER5,
+ .cpu_user_features = COMMON_USER_POWER5_PLUS,
+ .mmu_features = MMU_FTRS_POWER5,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .platform = "power5+",
+ },
+ { /* Power6 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x003e0000,
+ .cpu_name = "POWER6 (raw)",
+ .cpu_features = CPU_FTRS_POWER6,
+ .cpu_user_features = COMMON_USER_POWER6 | PPC_FEATURE_POWER6_EXT,
+ .mmu_features = MMU_FTRS_POWER6,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .platform = "power6x",
+ },
+ { /* 2.05-compliant processor, i.e. Power6 "architected" mode */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x0f000002,
+ .cpu_name = "POWER6 (architected)",
+ .cpu_features = CPU_FTRS_POWER6,
+ .cpu_user_features = COMMON_USER_POWER6,
+ .mmu_features = MMU_FTRS_POWER6,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .platform = "power6",
+ },
+ { /* 2.06-compliant processor, i.e. Power7 "architected" mode */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x0f000003,
+ .cpu_name = "POWER7 (architected)",
+ .cpu_features = CPU_FTRS_POWER7,
+ .cpu_user_features = COMMON_USER_POWER7,
+ .cpu_user_features2 = COMMON_USER2_POWER7,
+ .mmu_features = MMU_FTRS_POWER7,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .cpu_setup = __setup_cpu_power7,
+ .cpu_restore = __restore_cpu_power7,
+ .machine_check_early = __machine_check_early_realmode_p7,
+ .platform = "power7",
+ },
+ { /* 2.07-compliant processor, i.e. Power8 "architected" mode */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x0f000004,
+ .cpu_name = "POWER8 (architected)",
+ .cpu_features = CPU_FTRS_POWER8,
+ .cpu_user_features = COMMON_USER_POWER8,
+ .cpu_user_features2 = COMMON_USER2_POWER8,
+ .mmu_features = MMU_FTRS_POWER8,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .cpu_setup = __setup_cpu_power8,
+ .cpu_restore = __restore_cpu_power8,
+ .machine_check_early = __machine_check_early_realmode_p8,
+ .platform = "power8",
+ },
+ { /* 2.07-compliant processor, HeXin C2000 processor */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00660000,
+ .cpu_name = "HX-C2000",
+ .cpu_features = CPU_FTRS_POWER8,
+ .cpu_user_features = COMMON_USER_POWER8,
+ .cpu_user_features2 = COMMON_USER2_POWER8,
+ .mmu_features = MMU_FTRS_POWER8,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .cpu_setup = __setup_cpu_power8,
+ .cpu_restore = __restore_cpu_power8,
+ .machine_check_early = __machine_check_early_realmode_p8,
+ .platform = "power8",
+ },
+ { /* 3.00-compliant processor, i.e. Power9 "architected" mode */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x0f000005,
+ .cpu_name = "POWER9 (architected)",
+ .cpu_features = CPU_FTRS_POWER9,
+ .cpu_user_features = COMMON_USER_POWER9,
+ .cpu_user_features2 = COMMON_USER2_POWER9,
+ .mmu_features = MMU_FTRS_POWER9,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .cpu_setup = __setup_cpu_power9,
+ .cpu_restore = __restore_cpu_power9,
+ .platform = "power9",
+ },
+ { /* 3.1-compliant processor, i.e. Power10 "architected" mode */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x0f000006,
+ .cpu_name = "POWER10 (architected)",
+ .cpu_features = CPU_FTRS_POWER10,
+ .cpu_user_features = COMMON_USER_POWER10,
+ .cpu_user_features2 = COMMON_USER2_POWER10,
+ .mmu_features = MMU_FTRS_POWER10,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .cpu_setup = __setup_cpu_power10,
+ .cpu_restore = __restore_cpu_power10,
+ .platform = "power10",
+ },
+ { /* 3.1-compliant processor, i.e. Power11 "architected" mode */
+ .pvr_mask = 0xffffffff,
+ .pvr_value = 0x0f000007,
+ .cpu_name = "Power11 (architected)",
+ .cpu_features = CPU_FTRS_POWER11,
+ .cpu_user_features = COMMON_USER_POWER11,
+ .cpu_user_features2 = COMMON_USER2_POWER11,
+ .mmu_features = MMU_FTRS_POWER11,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .cpu_setup = __setup_cpu_power10,
+ .cpu_restore = __restore_cpu_power10,
+ .platform = "power11",
+ },
+ { /* Power7 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x003f0000,
+ .cpu_name = "POWER7 (raw)",
+ .cpu_features = CPU_FTRS_POWER7,
+ .cpu_user_features = COMMON_USER_POWER7,
+ .cpu_user_features2 = COMMON_USER2_POWER7,
+ .mmu_features = MMU_FTRS_POWER7,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_power7,
+ .cpu_restore = __restore_cpu_power7,
+ .machine_check_early = __machine_check_early_realmode_p7,
+ .platform = "power7",
+ },
+ { /* Power7+ */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x004A0000,
+ .cpu_name = "POWER7+ (raw)",
+ .cpu_features = CPU_FTRS_POWER7,
+ .cpu_user_features = COMMON_USER_POWER7,
+ .cpu_user_features2 = COMMON_USER2_POWER7,
+ .mmu_features = MMU_FTRS_POWER7,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_power7,
+ .cpu_restore = __restore_cpu_power7,
+ .machine_check_early = __machine_check_early_realmode_p7,
+ .platform = "power7+",
+ },
+ { /* Power8E */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x004b0000,
+ .cpu_name = "POWER8E (raw)",
+ .cpu_features = CPU_FTRS_POWER8E,
+ .cpu_user_features = COMMON_USER_POWER8,
+ .cpu_user_features2 = COMMON_USER2_POWER8,
+ .mmu_features = MMU_FTRS_POWER8,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_power8,
+ .cpu_restore = __restore_cpu_power8,
+ .machine_check_early = __machine_check_early_realmode_p8,
+ .platform = "power8",
+ },
+ { /* Power8NVL */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x004c0000,
+ .cpu_name = "POWER8NVL (raw)",
+ .cpu_features = CPU_FTRS_POWER8,
+ .cpu_user_features = COMMON_USER_POWER8,
+ .cpu_user_features2 = COMMON_USER2_POWER8,
+ .mmu_features = MMU_FTRS_POWER8,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_power8,
+ .cpu_restore = __restore_cpu_power8,
+ .machine_check_early = __machine_check_early_realmode_p8,
+ .platform = "power8",
+ },
+ { /* Power8 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x004d0000,
+ .cpu_name = "POWER8 (raw)",
+ .cpu_features = CPU_FTRS_POWER8,
+ .cpu_user_features = COMMON_USER_POWER8,
+ .cpu_user_features2 = COMMON_USER2_POWER8,
+ .mmu_features = MMU_FTRS_POWER8,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_power8,
+ .cpu_restore = __restore_cpu_power8,
+ .machine_check_early = __machine_check_early_realmode_p8,
+ .platform = "power8",
+ },
+ { /* Power9 DD2.0 */
+ .pvr_mask = 0xffffefff,
+ .pvr_value = 0x004e0200,
+ .cpu_name = "POWER9 (raw)",
+ .cpu_features = CPU_FTRS_POWER9_DD2_0,
+ .cpu_user_features = COMMON_USER_POWER9,
+ .cpu_user_features2 = COMMON_USER2_POWER9,
+ .mmu_features = MMU_FTRS_POWER9,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_power9,
+ .cpu_restore = __restore_cpu_power9,
+ .machine_check_early = __machine_check_early_realmode_p9,
+ .platform = "power9",
+ },
+ { /* Power9 DD 2.1 */
+ .pvr_mask = 0xffffefff,
+ .pvr_value = 0x004e0201,
+ .cpu_name = "POWER9 (raw)",
+ .cpu_features = CPU_FTRS_POWER9_DD2_1,
+ .cpu_user_features = COMMON_USER_POWER9,
+ .cpu_user_features2 = COMMON_USER2_POWER9,
+ .mmu_features = MMU_FTRS_POWER9,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_power9,
+ .cpu_restore = __restore_cpu_power9,
+ .machine_check_early = __machine_check_early_realmode_p9,
+ .platform = "power9",
+ },
+ { /* Power9 DD2.2 */
+ .pvr_mask = 0xffffefff,
+ .pvr_value = 0x004e0202,
+ .cpu_name = "POWER9 (raw)",
+ .cpu_features = CPU_FTRS_POWER9_DD2_2,
+ .cpu_user_features = COMMON_USER_POWER9,
+ .cpu_user_features2 = COMMON_USER2_POWER9,
+ .mmu_features = MMU_FTRS_POWER9,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_power9,
+ .cpu_restore = __restore_cpu_power9,
+ .machine_check_early = __machine_check_early_realmode_p9,
+ .platform = "power9",
+ },
+ { /* Power9 DD2.3 or later */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x004e0000,
+ .cpu_name = "POWER9 (raw)",
+ .cpu_features = CPU_FTRS_POWER9_DD2_3,
+ .cpu_user_features = COMMON_USER_POWER9,
+ .cpu_user_features2 = COMMON_USER2_POWER9,
+ .mmu_features = MMU_FTRS_POWER9,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_power9,
+ .cpu_restore = __restore_cpu_power9,
+ .machine_check_early = __machine_check_early_realmode_p9,
+ .platform = "power9",
+ },
+ { /* Power10 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00800000,
+ .cpu_name = "POWER10 (raw)",
+ .cpu_features = CPU_FTRS_POWER10,
+ .cpu_user_features = COMMON_USER_POWER10,
+ .cpu_user_features2 = COMMON_USER2_POWER10,
+ .mmu_features = MMU_FTRS_POWER10,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_power10,
+ .cpu_restore = __restore_cpu_power10,
+ .machine_check_early = __machine_check_early_realmode_p10,
+ .platform = "power10",
+ },
+ { /* Power11 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00820000,
+ .cpu_name = "Power11 (raw)",
+ .cpu_features = CPU_FTRS_POWER11,
+ .cpu_user_features = COMMON_USER_POWER11,
+ .cpu_user_features2 = COMMON_USER2_POWER11,
+ .mmu_features = MMU_FTRS_POWER11,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .cpu_setup = __setup_cpu_power10,
+ .cpu_restore = __restore_cpu_power10,
+ .machine_check_early = __machine_check_early_realmode_p10,
+ .platform = "power11",
+ },
+ { /* Cell Broadband Engine */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x00700000,
+ .cpu_name = "Cell Broadband Engine",
+ .cpu_features = CPU_FTRS_CELL,
+ .cpu_user_features = COMMON_USER_PPC64 | PPC_FEATURE_CELL |
+ PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_SMT,
+ .mmu_features = MMU_FTRS_CELL,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 4,
+ .pmc_type = PPC_PMC_IBM,
+ .platform = "ppc-cell-be",
+ },
+ { /* PA Semi PA6T */
+ .pvr_mask = 0x7fff0000,
+ .pvr_value = 0x00900000,
+ .cpu_name = "PA6T",
+ .cpu_features = CPU_FTRS_PA6T,
+ .cpu_user_features = COMMON_USER_PA6T,
+ .mmu_features = MMU_FTRS_PA6T,
+ .icache_bsize = 64,
+ .dcache_bsize = 64,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_PA6T,
+ .cpu_setup = __setup_cpu_pa6t,
+ .cpu_restore = __restore_cpu_pa6t,
+ .platform = "pa6t",
+ },
+ { /* default match */
+ .pvr_mask = 0x00000000,
+ .pvr_value = 0x00000000,
+ .cpu_name = "POWER5 (compatible)",
+ .cpu_features = CPU_FTRS_COMPATIBLE,
+ .cpu_user_features = COMMON_USER_PPC64,
+ .mmu_features = MMU_FTRS_POWER,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .platform = "power5",
+ }
+};
diff --git a/arch/powerpc/kernel/cpu_specs_e500mc.h b/arch/powerpc/kernel/cpu_specs_e500mc.h
new file mode 100644
index 000000000000..2ae8e9a7b461
--- /dev/null
+++ b/arch/powerpc/kernel/cpu_specs_e500mc.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
+ *
+ * Modifications for ppc64:
+ * Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com>
+ */
+
+#ifdef CONFIG_PPC64
+#define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \
+ PPC_FEATURE_HAS_FPU | PPC_FEATURE_64 | \
+ PPC_FEATURE_BOOKE)
+#else
+#define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \
+ PPC_FEATURE_BOOKE)
+#endif
+
+static struct cpu_spec cpu_specs[] __initdata = {
+#ifdef CONFIG_PPC32
+ { /* e500mc */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x80230000,
+ .cpu_name = "e500mc",
+ .cpu_features = CPU_FTRS_E500MC,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .cpu_user_features2 = PPC_FEATURE2_ISEL,
+ .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS | MMU_FTR_USE_TLBILX,
+ .icache_bsize = 64,
+ .dcache_bsize = 64,
+ .num_pmcs = 4,
+ .cpu_setup = __setup_cpu_e500mc,
+ .machine_check = machine_check_e500mc,
+ .platform = "ppce500mc",
+ .cpu_down_flush = cpu_down_flush_e500mc,
+ },
+#endif /* CONFIG_PPC32 */
+ { /* e5500 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x80240000,
+ .cpu_name = "e5500",
+ .cpu_features = CPU_FTRS_E5500,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+ .cpu_user_features2 = PPC_FEATURE2_ISEL,
+ .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS | MMU_FTR_USE_TLBILX,
+ .icache_bsize = 64,
+ .dcache_bsize = 64,
+ .num_pmcs = 4,
+ .cpu_setup = __setup_cpu_e5500,
+#ifndef CONFIG_PPC32
+ .cpu_restore = __restore_cpu_e5500,
+#endif
+ .machine_check = machine_check_e500mc,
+ .platform = "ppce5500",
+ .cpu_down_flush = cpu_down_flush_e5500,
+ },
+ { /* e6500 */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x80400000,
+ .cpu_name = "e6500",
+ .cpu_features = CPU_FTRS_E6500,
+ .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU |
+ PPC_FEATURE_HAS_ALTIVEC_COMP,
+ .cpu_user_features2 = PPC_FEATURE2_ISEL,
+ .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS | MMU_FTR_USE_TLBILX,
+ .icache_bsize = 64,
+ .dcache_bsize = 64,
+ .num_pmcs = 6,
+ .cpu_setup = __setup_cpu_e6500,
+#ifndef CONFIG_PPC32
+ .cpu_restore = __restore_cpu_e6500,
+#endif
+ .machine_check = machine_check_e500mc,
+ .platform = "ppce6500",
+ .cpu_down_flush = cpu_down_flush_e6500,
+ },
+};
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 9b6dcaaec1a3..6f6801da9dc1 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
*
* Modifications for ppc64:
* Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/string.h>
@@ -15,2073 +11,38 @@
#include <linux/threads.h>
#include <linux/init.h>
#include <linux/export.h>
+#include <linux/jump_label.h>
+#include <linux/of.h>
-#include <asm/oprofile_impl.h>
#include <asm/cputable.h>
-#include <asm/prom.h> /* for PTRRELOC on ARCH=ppc */
+#include <asm/mce.h>
#include <asm/mmu.h>
#include <asm/setup.h>
+#include <asm/cpu_setup.h>
+
+static struct cpu_spec the_cpu_spec __ro_after_init;
-struct cpu_spec* cur_cpu_spec = NULL;
+struct cpu_spec *cur_cpu_spec __ro_after_init = NULL;
EXPORT_SYMBOL(cur_cpu_spec);
/* The platform string corresponding to the real PVR */
const char *powerpc_base_platform;
-/* NOTE:
- * Unlike ppc32, ppc64 will only call this once for the boot CPU, it's
- * the responsibility of the appropriate CPU save/restore functions to
- * eventually copy these settings over. Those save/restore aren't yet
- * part of the cputable though. That has to be fixed for both ppc32
- * and ppc64
- */
-#ifdef CONFIG_PPC32
-extern void __setup_cpu_e200(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_e500v1(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_e500v2(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_e500mc(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_440ep(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_440epx(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_440gx(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_440grx(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_440spe(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_440x5(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_460ex(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_460gt(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_460sx(unsigned long offset, struct cpu_spec *spec);
-extern void __setup_cpu_apm821xx(unsigned long offset, struct cpu_spec *spec);
-extern void __setup_cpu_603(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_604(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_750(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_750cx(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_750fx(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_7400(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_7410(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_745x(unsigned long offset, struct cpu_spec* spec);
-#endif /* CONFIG_PPC32 */
-#ifdef CONFIG_PPC64
-extern void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_ppc970MP(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_pa6t(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_a2(unsigned long offset, struct cpu_spec* spec);
-extern void __restore_cpu_pa6t(void);
-extern void __restore_cpu_ppc970(void);
-extern void __setup_cpu_power7(unsigned long offset, struct cpu_spec* spec);
-extern void __restore_cpu_power7(void);
-extern void __setup_cpu_power8(unsigned long offset, struct cpu_spec* spec);
-extern void __restore_cpu_power8(void);
-extern void __restore_cpu_a2(void);
-extern void __flush_tlb_power7(unsigned long inval_selector);
-extern void __flush_tlb_power8(unsigned long inval_selector);
-extern long __machine_check_early_realmode_p7(struct pt_regs *regs);
-extern long __machine_check_early_realmode_p8(struct pt_regs *regs);
-#endif /* CONFIG_PPC64 */
-#if defined(CONFIG_E500)
-extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_e6500(unsigned long offset, struct cpu_spec* spec);
-extern void __restore_cpu_e5500(void);
-extern void __restore_cpu_e6500(void);
-#endif /* CONFIG_E500 */
-
-/* This table only contains "desktop" CPUs, it need to be filled with embedded
- * ones as well...
- */
-#define COMMON_USER (PPC_FEATURE_32 | PPC_FEATURE_HAS_FPU | \
- PPC_FEATURE_HAS_MMU)
-#define COMMON_USER_PPC64 (COMMON_USER | PPC_FEATURE_64)
-#define COMMON_USER_POWER4 (COMMON_USER_PPC64 | PPC_FEATURE_POWER4)
-#define COMMON_USER_POWER5 (COMMON_USER_PPC64 | PPC_FEATURE_POWER5 |\
- PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP)
-#define COMMON_USER_POWER5_PLUS (COMMON_USER_PPC64 | PPC_FEATURE_POWER5_PLUS|\
- PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP)
-#define COMMON_USER_POWER6 (COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_05 |\
- PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \
- PPC_FEATURE_TRUE_LE | \
- PPC_FEATURE_PSERIES_PERFMON_COMPAT)
-#define COMMON_USER_POWER7 (COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_06 |\
- PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \
- PPC_FEATURE_TRUE_LE | \
- PPC_FEATURE_PSERIES_PERFMON_COMPAT)
-#define COMMON_USER2_POWER7 (PPC_FEATURE2_DSCR)
-#define COMMON_USER_POWER8 (COMMON_USER_PPC64 | PPC_FEATURE_ARCH_2_06 |\
- PPC_FEATURE_SMT | PPC_FEATURE_ICACHE_SNOOP | \
- PPC_FEATURE_TRUE_LE | \
- PPC_FEATURE_PSERIES_PERFMON_COMPAT)
-#define COMMON_USER2_POWER8 (PPC_FEATURE2_ARCH_2_07 | \
- PPC_FEATURE2_HTM_COMP | PPC_FEATURE2_DSCR | \
- PPC_FEATURE2_ISEL | PPC_FEATURE2_TAR | \
- PPC_FEATURE2_VEC_CRYPTO)
-#define COMMON_USER_PA6T (COMMON_USER_PPC64 | PPC_FEATURE_PA6T |\
- PPC_FEATURE_TRUE_LE | \
- PPC_FEATURE_HAS_ALTIVEC_COMP)
-#ifdef CONFIG_PPC_BOOK3E_64
-#define COMMON_USER_BOOKE (COMMON_USER_PPC64 | PPC_FEATURE_BOOKE)
-#else
-#define COMMON_USER_BOOKE (PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | \
- PPC_FEATURE_BOOKE)
-#endif
-
-static struct cpu_spec __initdata cpu_specs[] = {
-#ifdef CONFIG_PPC_BOOK3S_64
- { /* Power4 */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x00350000,
- .cpu_name = "POWER4 (gp)",
- .cpu_features = CPU_FTRS_POWER4,
- .cpu_user_features = COMMON_USER_POWER4,
- .mmu_features = MMU_FTRS_POWER4,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .num_pmcs = 8,
- .pmc_type = PPC_PMC_IBM,
- .oprofile_cpu_type = "ppc64/power4",
- .oprofile_type = PPC_OPROFILE_POWER4,
- .platform = "power4",
- },
- { /* Power4+ */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x00380000,
- .cpu_name = "POWER4+ (gq)",
- .cpu_features = CPU_FTRS_POWER4,
- .cpu_user_features = COMMON_USER_POWER4,
- .mmu_features = MMU_FTRS_POWER4,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .num_pmcs = 8,
- .pmc_type = PPC_PMC_IBM,
- .oprofile_cpu_type = "ppc64/power4",
- .oprofile_type = PPC_OPROFILE_POWER4,
- .platform = "power4",
- },
- { /* PPC970 */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x00390000,
- .cpu_name = "PPC970",
- .cpu_features = CPU_FTRS_PPC970,
- .cpu_user_features = COMMON_USER_POWER4 |
- PPC_FEATURE_HAS_ALTIVEC_COMP,
- .mmu_features = MMU_FTRS_PPC970,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .num_pmcs = 8,
- .pmc_type = PPC_PMC_IBM,
- .cpu_setup = __setup_cpu_ppc970,
- .cpu_restore = __restore_cpu_ppc970,
- .oprofile_cpu_type = "ppc64/970",
- .oprofile_type = PPC_OPROFILE_POWER4,
- .platform = "ppc970",
- },
- { /* PPC970FX */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x003c0000,
- .cpu_name = "PPC970FX",
- .cpu_features = CPU_FTRS_PPC970,
- .cpu_user_features = COMMON_USER_POWER4 |
- PPC_FEATURE_HAS_ALTIVEC_COMP,
- .mmu_features = MMU_FTRS_PPC970,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .num_pmcs = 8,
- .pmc_type = PPC_PMC_IBM,
- .cpu_setup = __setup_cpu_ppc970,
- .cpu_restore = __restore_cpu_ppc970,
- .oprofile_cpu_type = "ppc64/970",
- .oprofile_type = PPC_OPROFILE_POWER4,
- .platform = "ppc970",
- },
- { /* PPC970MP DD1.0 - no DEEPNAP, use regular 970 init */
- .pvr_mask = 0xffffffff,
- .pvr_value = 0x00440100,
- .cpu_name = "PPC970MP",
- .cpu_features = CPU_FTRS_PPC970,
- .cpu_user_features = COMMON_USER_POWER4 |
- PPC_FEATURE_HAS_ALTIVEC_COMP,
- .mmu_features = MMU_FTRS_PPC970,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .num_pmcs = 8,
- .pmc_type = PPC_PMC_IBM,
- .cpu_setup = __setup_cpu_ppc970,
- .cpu_restore = __restore_cpu_ppc970,
- .oprofile_cpu_type = "ppc64/970MP",
- .oprofile_type = PPC_OPROFILE_POWER4,
- .platform = "ppc970",
- },
- { /* PPC970MP */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x00440000,
- .cpu_name = "PPC970MP",
- .cpu_features = CPU_FTRS_PPC970,
- .cpu_user_features = COMMON_USER_POWER4 |
- PPC_FEATURE_HAS_ALTIVEC_COMP,
- .mmu_features = MMU_FTRS_PPC970,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .num_pmcs = 8,
- .pmc_type = PPC_PMC_IBM,
- .cpu_setup = __setup_cpu_ppc970MP,
- .cpu_restore = __restore_cpu_ppc970,
- .oprofile_cpu_type = "ppc64/970MP",
- .oprofile_type = PPC_OPROFILE_POWER4,
- .platform = "ppc970",
- },
- { /* PPC970GX */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x00450000,
- .cpu_name = "PPC970GX",
- .cpu_features = CPU_FTRS_PPC970,
- .cpu_user_features = COMMON_USER_POWER4 |
- PPC_FEATURE_HAS_ALTIVEC_COMP,
- .mmu_features = MMU_FTRS_PPC970,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .num_pmcs = 8,
- .pmc_type = PPC_PMC_IBM,
- .cpu_setup = __setup_cpu_ppc970,
- .oprofile_cpu_type = "ppc64/970",
- .oprofile_type = PPC_OPROFILE_POWER4,
- .platform = "ppc970",
- },
- { /* Power5 GR */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x003a0000,
- .cpu_name = "POWER5 (gr)",
- .cpu_features = CPU_FTRS_POWER5,
- .cpu_user_features = COMMON_USER_POWER5,
- .mmu_features = MMU_FTRS_POWER5,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_IBM,
- .oprofile_cpu_type = "ppc64/power5",
- .oprofile_type = PPC_OPROFILE_POWER4,
- /* SIHV / SIPR bits are implemented on POWER4+ (GQ)
- * and above but only works on POWER5 and above
- */
- .oprofile_mmcra_sihv = MMCRA_SIHV,
- .oprofile_mmcra_sipr = MMCRA_SIPR,
- .platform = "power5",
- },
- { /* Power5++ */
- .pvr_mask = 0xffffff00,
- .pvr_value = 0x003b0300,
- .cpu_name = "POWER5+ (gs)",
- .cpu_features = CPU_FTRS_POWER5,
- .cpu_user_features = COMMON_USER_POWER5_PLUS,
- .mmu_features = MMU_FTRS_POWER5,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .num_pmcs = 6,
- .oprofile_cpu_type = "ppc64/power5++",
- .oprofile_type = PPC_OPROFILE_POWER4,
- .oprofile_mmcra_sihv = MMCRA_SIHV,
- .oprofile_mmcra_sipr = MMCRA_SIPR,
- .platform = "power5+",
- },
- { /* Power5 GS */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x003b0000,
- .cpu_name = "POWER5+ (gs)",
- .cpu_features = CPU_FTRS_POWER5,
- .cpu_user_features = COMMON_USER_POWER5_PLUS,
- .mmu_features = MMU_FTRS_POWER5,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_IBM,
- .oprofile_cpu_type = "ppc64/power5+",
- .oprofile_type = PPC_OPROFILE_POWER4,
- .oprofile_mmcra_sihv = MMCRA_SIHV,
- .oprofile_mmcra_sipr = MMCRA_SIPR,
- .platform = "power5+",
- },
- { /* POWER6 in P5+ mode; 2.04-compliant processor */
- .pvr_mask = 0xffffffff,
- .pvr_value = 0x0f000001,
- .cpu_name = "POWER5+",
- .cpu_features = CPU_FTRS_POWER5,
- .cpu_user_features = COMMON_USER_POWER5_PLUS,
- .mmu_features = MMU_FTRS_POWER5,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .oprofile_cpu_type = "ppc64/ibm-compat-v1",
- .oprofile_type = PPC_OPROFILE_POWER4,
- .platform = "power5+",
- },
- { /* Power6 */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x003e0000,
- .cpu_name = "POWER6 (raw)",
- .cpu_features = CPU_FTRS_POWER6,
- .cpu_user_features = COMMON_USER_POWER6 |
- PPC_FEATURE_POWER6_EXT,
- .mmu_features = MMU_FTRS_POWER6,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_IBM,
- .oprofile_cpu_type = "ppc64/power6",
- .oprofile_type = PPC_OPROFILE_POWER4,
- .oprofile_mmcra_sihv = POWER6_MMCRA_SIHV,
- .oprofile_mmcra_sipr = POWER6_MMCRA_SIPR,
- .oprofile_mmcra_clear = POWER6_MMCRA_THRM |
- POWER6_MMCRA_OTHER,
- .platform = "power6x",
- },
- { /* 2.05-compliant processor, i.e. Power6 "architected" mode */
- .pvr_mask = 0xffffffff,
- .pvr_value = 0x0f000002,
- .cpu_name = "POWER6 (architected)",
- .cpu_features = CPU_FTRS_POWER6,
- .cpu_user_features = COMMON_USER_POWER6,
- .mmu_features = MMU_FTRS_POWER6,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .oprofile_cpu_type = "ppc64/ibm-compat-v1",
- .oprofile_type = PPC_OPROFILE_POWER4,
- .platform = "power6",
- },
- { /* 2.06-compliant processor, i.e. Power7 "architected" mode */
- .pvr_mask = 0xffffffff,
- .pvr_value = 0x0f000003,
- .cpu_name = "POWER7 (architected)",
- .cpu_features = CPU_FTRS_POWER7,
- .cpu_user_features = COMMON_USER_POWER7,
- .cpu_user_features2 = COMMON_USER2_POWER7,
- .mmu_features = MMU_FTRS_POWER7,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .oprofile_type = PPC_OPROFILE_POWER4,
- .oprofile_cpu_type = "ppc64/ibm-compat-v1",
- .cpu_setup = __setup_cpu_power7,
- .cpu_restore = __restore_cpu_power7,
- .flush_tlb = __flush_tlb_power7,
- .machine_check_early = __machine_check_early_realmode_p7,
- .platform = "power7",
- },
- { /* 2.07-compliant processor, i.e. Power8 "architected" mode */
- .pvr_mask = 0xffffffff,
- .pvr_value = 0x0f000004,
- .cpu_name = "POWER8 (architected)",
- .cpu_features = CPU_FTRS_POWER8,
- .cpu_user_features = COMMON_USER_POWER8,
- .cpu_user_features2 = COMMON_USER2_POWER8,
- .mmu_features = MMU_FTRS_POWER8,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .oprofile_type = PPC_OPROFILE_INVALID,
- .oprofile_cpu_type = "ppc64/ibm-compat-v1",
- .cpu_setup = __setup_cpu_power8,
- .cpu_restore = __restore_cpu_power8,
- .flush_tlb = __flush_tlb_power8,
- .machine_check_early = __machine_check_early_realmode_p8,
- .platform = "power8",
- },
- { /* Power7 */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x003f0000,
- .cpu_name = "POWER7 (raw)",
- .cpu_features = CPU_FTRS_POWER7,
- .cpu_user_features = COMMON_USER_POWER7,
- .cpu_user_features2 = COMMON_USER2_POWER7,
- .mmu_features = MMU_FTRS_POWER7,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_IBM,
- .oprofile_cpu_type = "ppc64/power7",
- .oprofile_type = PPC_OPROFILE_POWER4,
- .cpu_setup = __setup_cpu_power7,
- .cpu_restore = __restore_cpu_power7,
- .flush_tlb = __flush_tlb_power7,
- .machine_check_early = __machine_check_early_realmode_p7,
- .platform = "power7",
- },
- { /* Power7+ */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x004A0000,
- .cpu_name = "POWER7+ (raw)",
- .cpu_features = CPU_FTRS_POWER7,
- .cpu_user_features = COMMON_USER_POWER7,
- .cpu_user_features2 = COMMON_USER2_POWER7,
- .mmu_features = MMU_FTRS_POWER7,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_IBM,
- .oprofile_cpu_type = "ppc64/power7",
- .oprofile_type = PPC_OPROFILE_POWER4,
- .cpu_setup = __setup_cpu_power7,
- .cpu_restore = __restore_cpu_power7,
- .flush_tlb = __flush_tlb_power7,
- .machine_check_early = __machine_check_early_realmode_p7,
- .platform = "power7+",
- },
- { /* Power8E */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x004b0000,
- .cpu_name = "POWER8E (raw)",
- .cpu_features = CPU_FTRS_POWER8E,
- .cpu_user_features = COMMON_USER_POWER8,
- .cpu_user_features2 = COMMON_USER2_POWER8,
- .mmu_features = MMU_FTRS_POWER8,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_IBM,
- .oprofile_cpu_type = "ppc64/power8",
- .oprofile_type = PPC_OPROFILE_INVALID,
- .cpu_setup = __setup_cpu_power8,
- .cpu_restore = __restore_cpu_power8,
- .flush_tlb = __flush_tlb_power8,
- .machine_check_early = __machine_check_early_realmode_p8,
- .platform = "power8",
- },
- { /* Power8 DD1: Does not support doorbell IPIs */
- .pvr_mask = 0xffffff00,
- .pvr_value = 0x004d0100,
- .cpu_name = "POWER8 (raw)",
- .cpu_features = CPU_FTRS_POWER8_DD1,
- .cpu_user_features = COMMON_USER_POWER8,
- .cpu_user_features2 = COMMON_USER2_POWER8,
- .mmu_features = MMU_FTRS_POWER8,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_IBM,
- .oprofile_cpu_type = "ppc64/power8",
- .oprofile_type = PPC_OPROFILE_INVALID,
- .cpu_setup = __setup_cpu_power8,
- .cpu_restore = __restore_cpu_power8,
- .flush_tlb = __flush_tlb_power8,
- .machine_check_early = __machine_check_early_realmode_p8,
- .platform = "power8",
- },
- { /* Power8 */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x004d0000,
- .cpu_name = "POWER8 (raw)",
- .cpu_features = CPU_FTRS_POWER8,
- .cpu_user_features = COMMON_USER_POWER8,
- .cpu_user_features2 = COMMON_USER2_POWER8,
- .mmu_features = MMU_FTRS_POWER8,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_IBM,
- .oprofile_cpu_type = "ppc64/power8",
- .oprofile_type = PPC_OPROFILE_INVALID,
- .cpu_setup = __setup_cpu_power8,
- .cpu_restore = __restore_cpu_power8,
- .flush_tlb = __flush_tlb_power8,
- .machine_check_early = __machine_check_early_realmode_p8,
- .platform = "power8",
- },
- { /* Cell Broadband Engine */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x00700000,
- .cpu_name = "Cell Broadband Engine",
- .cpu_features = CPU_FTRS_CELL,
- .cpu_user_features = COMMON_USER_PPC64 |
- PPC_FEATURE_CELL | PPC_FEATURE_HAS_ALTIVEC_COMP |
- PPC_FEATURE_SMT,
- .mmu_features = MMU_FTRS_CELL,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .num_pmcs = 4,
- .pmc_type = PPC_PMC_IBM,
- .oprofile_cpu_type = "ppc64/cell-be",
- .oprofile_type = PPC_OPROFILE_CELL,
- .platform = "ppc-cell-be",
- },
- { /* PA Semi PA6T */
- .pvr_mask = 0x7fff0000,
- .pvr_value = 0x00900000,
- .cpu_name = "PA6T",
- .cpu_features = CPU_FTRS_PA6T,
- .cpu_user_features = COMMON_USER_PA6T,
- .mmu_features = MMU_FTRS_PA6T,
- .icache_bsize = 64,
- .dcache_bsize = 64,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_PA6T,
- .cpu_setup = __setup_cpu_pa6t,
- .cpu_restore = __restore_cpu_pa6t,
- .oprofile_cpu_type = "ppc64/pa6t",
- .oprofile_type = PPC_OPROFILE_PA6T,
- .platform = "pa6t",
- },
- { /* default match */
- .pvr_mask = 0x00000000,
- .pvr_value = 0x00000000,
- .cpu_name = "POWER4 (compatible)",
- .cpu_features = CPU_FTRS_COMPATIBLE,
- .cpu_user_features = COMMON_USER_PPC64,
- .mmu_features = MMU_FTRS_DEFAULT_HPTE_ARCH_V2,
- .icache_bsize = 128,
- .dcache_bsize = 128,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_IBM,
- .platform = "power4",
- }
-#endif /* CONFIG_PPC_BOOK3S_64 */
+#include "cpu_specs.h"
-#ifdef CONFIG_PPC32
-#ifdef CONFIG_PPC_BOOK3S_32
- { /* 601 */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x00010000,
- .cpu_name = "601",
- .cpu_features = CPU_FTRS_PPC601,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_601_INSTR |
- PPC_FEATURE_UNIFIED_CACHE | PPC_FEATURE_NO_TB,
- .mmu_features = MMU_FTR_HPTE_TABLE,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_generic,
- .platform = "ppc601",
- },
- { /* 603 */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x00030000,
- .cpu_name = "603",
- .cpu_features = CPU_FTRS_603,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
- .mmu_features = 0,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_603,
- .machine_check = machine_check_generic,
- .platform = "ppc603",
- },
- { /* 603e */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x00060000,
- .cpu_name = "603e",
- .cpu_features = CPU_FTRS_603,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
- .mmu_features = 0,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_603,
- .machine_check = machine_check_generic,
- .platform = "ppc603",
- },
- { /* 603ev */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x00070000,
- .cpu_name = "603ev",
- .cpu_features = CPU_FTRS_603,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
- .mmu_features = 0,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_603,
- .machine_check = machine_check_generic,
- .platform = "ppc603",
- },
- { /* 604 */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x00040000,
- .cpu_name = "604",
- .cpu_features = CPU_FTRS_604,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 2,
- .cpu_setup = __setup_cpu_604,
- .machine_check = machine_check_generic,
- .platform = "ppc604",
- },
- { /* 604e */
- .pvr_mask = 0xfffff000,
- .pvr_value = 0x00090000,
- .cpu_name = "604e",
- .cpu_features = CPU_FTRS_604,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 4,
- .cpu_setup = __setup_cpu_604,
- .machine_check = machine_check_generic,
- .platform = "ppc604",
- },
- { /* 604r */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x00090000,
- .cpu_name = "604r",
- .cpu_features = CPU_FTRS_604,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 4,
- .cpu_setup = __setup_cpu_604,
- .machine_check = machine_check_generic,
- .platform = "ppc604",
- },
- { /* 604ev */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x000a0000,
- .cpu_name = "604ev",
- .cpu_features = CPU_FTRS_604,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 4,
- .cpu_setup = __setup_cpu_604,
- .machine_check = machine_check_generic,
- .platform = "ppc604",
- },
- { /* 740/750 (0x4202, don't support TAU ?) */
- .pvr_mask = 0xffffffff,
- .pvr_value = 0x00084202,
- .cpu_name = "740/750",
- .cpu_features = CPU_FTRS_740_NOTAU,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 4,
- .cpu_setup = __setup_cpu_750,
- .machine_check = machine_check_generic,
- .platform = "ppc750",
- },
- { /* 750CX (80100 and 8010x?) */
- .pvr_mask = 0xfffffff0,
- .pvr_value = 0x00080100,
- .cpu_name = "750CX",
- .cpu_features = CPU_FTRS_750,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 4,
- .cpu_setup = __setup_cpu_750cx,
- .machine_check = machine_check_generic,
- .platform = "ppc750",
- },
- { /* 750CX (82201 and 82202) */
- .pvr_mask = 0xfffffff0,
- .pvr_value = 0x00082200,
- .cpu_name = "750CX",
- .cpu_features = CPU_FTRS_750,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 4,
- .pmc_type = PPC_PMC_IBM,
- .cpu_setup = __setup_cpu_750cx,
- .machine_check = machine_check_generic,
- .platform = "ppc750",
- },
- { /* 750CXe (82214) */
- .pvr_mask = 0xfffffff0,
- .pvr_value = 0x00082210,
- .cpu_name = "750CXe",
- .cpu_features = CPU_FTRS_750,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 4,
- .pmc_type = PPC_PMC_IBM,
- .cpu_setup = __setup_cpu_750cx,
- .machine_check = machine_check_generic,
- .platform = "ppc750",
- },
- { /* 750CXe "Gekko" (83214) */
- .pvr_mask = 0xffffffff,
- .pvr_value = 0x00083214,
- .cpu_name = "750CXe",
- .cpu_features = CPU_FTRS_750,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 4,
- .pmc_type = PPC_PMC_IBM,
- .cpu_setup = __setup_cpu_750cx,
- .machine_check = machine_check_generic,
- .platform = "ppc750",
- },
- { /* 750CL (and "Broadway") */
- .pvr_mask = 0xfffff0e0,
- .pvr_value = 0x00087000,
- .cpu_name = "750CL",
- .cpu_features = CPU_FTRS_750CL,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 4,
- .pmc_type = PPC_PMC_IBM,
- .cpu_setup = __setup_cpu_750,
- .machine_check = machine_check_generic,
- .platform = "ppc750",
- .oprofile_cpu_type = "ppc/750",
- .oprofile_type = PPC_OPROFILE_G4,
- },
- { /* 745/755 */
- .pvr_mask = 0xfffff000,
- .pvr_value = 0x00083000,
- .cpu_name = "745/755",
- .cpu_features = CPU_FTRS_750,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 4,
- .pmc_type = PPC_PMC_IBM,
- .cpu_setup = __setup_cpu_750,
- .machine_check = machine_check_generic,
- .platform = "ppc750",
- },
- { /* 750FX rev 1.x */
- .pvr_mask = 0xffffff00,
- .pvr_value = 0x70000100,
- .cpu_name = "750FX",
- .cpu_features = CPU_FTRS_750FX1,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 4,
- .pmc_type = PPC_PMC_IBM,
- .cpu_setup = __setup_cpu_750,
- .machine_check = machine_check_generic,
- .platform = "ppc750",
- .oprofile_cpu_type = "ppc/750",
- .oprofile_type = PPC_OPROFILE_G4,
- },
- { /* 750FX rev 2.0 must disable HID0[DPM] */
- .pvr_mask = 0xffffffff,
- .pvr_value = 0x70000200,
- .cpu_name = "750FX",
- .cpu_features = CPU_FTRS_750FX2,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 4,
- .pmc_type = PPC_PMC_IBM,
- .cpu_setup = __setup_cpu_750,
- .machine_check = machine_check_generic,
- .platform = "ppc750",
- .oprofile_cpu_type = "ppc/750",
- .oprofile_type = PPC_OPROFILE_G4,
- },
- { /* 750FX (All revs except 2.0) */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x70000000,
- .cpu_name = "750FX",
- .cpu_features = CPU_FTRS_750FX,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 4,
- .pmc_type = PPC_PMC_IBM,
- .cpu_setup = __setup_cpu_750fx,
- .machine_check = machine_check_generic,
- .platform = "ppc750",
- .oprofile_cpu_type = "ppc/750",
- .oprofile_type = PPC_OPROFILE_G4,
- },
- { /* 750GX */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x70020000,
- .cpu_name = "750GX",
- .cpu_features = CPU_FTRS_750GX,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 4,
- .pmc_type = PPC_PMC_IBM,
- .cpu_setup = __setup_cpu_750fx,
- .machine_check = machine_check_generic,
- .platform = "ppc750",
- .oprofile_cpu_type = "ppc/750",
- .oprofile_type = PPC_OPROFILE_G4,
- },
- { /* 740/750 (L2CR bit need fixup for 740) */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x00080000,
- .cpu_name = "740/750",
- .cpu_features = CPU_FTRS_740,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 4,
- .pmc_type = PPC_PMC_IBM,
- .cpu_setup = __setup_cpu_750,
- .machine_check = machine_check_generic,
- .platform = "ppc750",
- },
- { /* 7400 rev 1.1 ? (no TAU) */
- .pvr_mask = 0xffffffff,
- .pvr_value = 0x000c1101,
- .cpu_name = "7400 (1.1)",
- .cpu_features = CPU_FTRS_7400_NOTAU,
- .cpu_user_features = COMMON_USER |
- PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 4,
- .pmc_type = PPC_PMC_G4,
- .cpu_setup = __setup_cpu_7400,
- .machine_check = machine_check_generic,
- .platform = "ppc7400",
- },
- { /* 7400 */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x000c0000,
- .cpu_name = "7400",
- .cpu_features = CPU_FTRS_7400,
- .cpu_user_features = COMMON_USER |
- PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 4,
- .pmc_type = PPC_PMC_G4,
- .cpu_setup = __setup_cpu_7400,
- .machine_check = machine_check_generic,
- .platform = "ppc7400",
- },
- { /* 7410 */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x800c0000,
- .cpu_name = "7410",
- .cpu_features = CPU_FTRS_7400,
- .cpu_user_features = COMMON_USER |
- PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 4,
- .pmc_type = PPC_PMC_G4,
- .cpu_setup = __setup_cpu_7410,
- .machine_check = machine_check_generic,
- .platform = "ppc7400",
- },
- { /* 7450 2.0 - no doze/nap */
- .pvr_mask = 0xffffffff,
- .pvr_value = 0x80000200,
- .cpu_name = "7450",
- .cpu_features = CPU_FTRS_7450_20,
- .cpu_user_features = COMMON_USER |
- PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_G4,
- .cpu_setup = __setup_cpu_745x,
- .oprofile_cpu_type = "ppc/7450",
- .oprofile_type = PPC_OPROFILE_G4,
- .machine_check = machine_check_generic,
- .platform = "ppc7450",
- },
- { /* 7450 2.1 */
- .pvr_mask = 0xffffffff,
- .pvr_value = 0x80000201,
- .cpu_name = "7450",
- .cpu_features = CPU_FTRS_7450_21,
- .cpu_user_features = COMMON_USER |
- PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_G4,
- .cpu_setup = __setup_cpu_745x,
- .oprofile_cpu_type = "ppc/7450",
- .oprofile_type = PPC_OPROFILE_G4,
- .machine_check = machine_check_generic,
- .platform = "ppc7450",
- },
- { /* 7450 2.3 and newer */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x80000000,
- .cpu_name = "7450",
- .cpu_features = CPU_FTRS_7450_23,
- .cpu_user_features = COMMON_USER |
- PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_G4,
- .cpu_setup = __setup_cpu_745x,
- .oprofile_cpu_type = "ppc/7450",
- .oprofile_type = PPC_OPROFILE_G4,
- .machine_check = machine_check_generic,
- .platform = "ppc7450",
- },
- { /* 7455 rev 1.x */
- .pvr_mask = 0xffffff00,
- .pvr_value = 0x80010100,
- .cpu_name = "7455",
- .cpu_features = CPU_FTRS_7455_1,
- .cpu_user_features = COMMON_USER |
- PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_G4,
- .cpu_setup = __setup_cpu_745x,
- .oprofile_cpu_type = "ppc/7450",
- .oprofile_type = PPC_OPROFILE_G4,
- .machine_check = machine_check_generic,
- .platform = "ppc7450",
- },
- { /* 7455 rev 2.0 */
- .pvr_mask = 0xffffffff,
- .pvr_value = 0x80010200,
- .cpu_name = "7455",
- .cpu_features = CPU_FTRS_7455_20,
- .cpu_user_features = COMMON_USER |
- PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_G4,
- .cpu_setup = __setup_cpu_745x,
- .oprofile_cpu_type = "ppc/7450",
- .oprofile_type = PPC_OPROFILE_G4,
- .machine_check = machine_check_generic,
- .platform = "ppc7450",
- },
- { /* 7455 others */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x80010000,
- .cpu_name = "7455",
- .cpu_features = CPU_FTRS_7455,
- .cpu_user_features = COMMON_USER |
- PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_G4,
- .cpu_setup = __setup_cpu_745x,
- .oprofile_cpu_type = "ppc/7450",
- .oprofile_type = PPC_OPROFILE_G4,
- .machine_check = machine_check_generic,
- .platform = "ppc7450",
- },
- { /* 7447/7457 Rev 1.0 */
- .pvr_mask = 0xffffffff,
- .pvr_value = 0x80020100,
- .cpu_name = "7447/7457",
- .cpu_features = CPU_FTRS_7447_10,
- .cpu_user_features = COMMON_USER |
- PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_G4,
- .cpu_setup = __setup_cpu_745x,
- .oprofile_cpu_type = "ppc/7450",
- .oprofile_type = PPC_OPROFILE_G4,
- .machine_check = machine_check_generic,
- .platform = "ppc7450",
- },
- { /* 7447/7457 Rev 1.1 */
- .pvr_mask = 0xffffffff,
- .pvr_value = 0x80020101,
- .cpu_name = "7447/7457",
- .cpu_features = CPU_FTRS_7447_10,
- .cpu_user_features = COMMON_USER |
- PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_G4,
- .cpu_setup = __setup_cpu_745x,
- .oprofile_cpu_type = "ppc/7450",
- .oprofile_type = PPC_OPROFILE_G4,
- .machine_check = machine_check_generic,
- .platform = "ppc7450",
- },
- { /* 7447/7457 Rev 1.2 and later */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x80020000,
- .cpu_name = "7447/7457",
- .cpu_features = CPU_FTRS_7447,
- .cpu_user_features = COMMON_USER | PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_G4,
- .cpu_setup = __setup_cpu_745x,
- .oprofile_cpu_type = "ppc/7450",
- .oprofile_type = PPC_OPROFILE_G4,
- .machine_check = machine_check_generic,
- .platform = "ppc7450",
- },
- { /* 7447A */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x80030000,
- .cpu_name = "7447A",
- .cpu_features = CPU_FTRS_7447A,
- .cpu_user_features = COMMON_USER |
- PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_G4,
- .cpu_setup = __setup_cpu_745x,
- .oprofile_cpu_type = "ppc/7450",
- .oprofile_type = PPC_OPROFILE_G4,
- .machine_check = machine_check_generic,
- .platform = "ppc7450",
- },
- { /* 7448 */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x80040000,
- .cpu_name = "7448",
- .cpu_features = CPU_FTRS_7448,
- .cpu_user_features = COMMON_USER |
- PPC_FEATURE_HAS_ALTIVEC_COMP | PPC_FEATURE_PPC_LE,
- .mmu_features = MMU_FTR_HPTE_TABLE | MMU_FTR_USE_HIGH_BATS,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 6,
- .pmc_type = PPC_PMC_G4,
- .cpu_setup = __setup_cpu_745x,
- .oprofile_cpu_type = "ppc/7450",
- .oprofile_type = PPC_OPROFILE_G4,
- .machine_check = machine_check_generic,
- .platform = "ppc7450",
- },
- { /* 82xx (8240, 8245, 8260 are all 603e cores) */
- .pvr_mask = 0x7fff0000,
- .pvr_value = 0x00810000,
- .cpu_name = "82xx",
- .cpu_features = CPU_FTRS_82XX,
- .cpu_user_features = COMMON_USER,
- .mmu_features = 0,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_603,
- .machine_check = machine_check_generic,
- .platform = "ppc603",
- },
- { /* All G2_LE (603e core, plus some) have the same pvr */
- .pvr_mask = 0x7fff0000,
- .pvr_value = 0x00820000,
- .cpu_name = "G2_LE",
- .cpu_features = CPU_FTRS_G2_LE,
- .cpu_user_features = COMMON_USER,
- .mmu_features = MMU_FTR_USE_HIGH_BATS,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_603,
- .machine_check = machine_check_generic,
- .platform = "ppc603",
- },
- { /* e300c1 (a 603e core, plus some) on 83xx */
- .pvr_mask = 0x7fff0000,
- .pvr_value = 0x00830000,
- .cpu_name = "e300c1",
- .cpu_features = CPU_FTRS_E300,
- .cpu_user_features = COMMON_USER,
- .mmu_features = MMU_FTR_USE_HIGH_BATS,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_603,
- .machine_check = machine_check_generic,
- .platform = "ppc603",
- },
- { /* e300c2 (an e300c1 core, plus some, minus FPU) on 83xx */
- .pvr_mask = 0x7fff0000,
- .pvr_value = 0x00840000,
- .cpu_name = "e300c2",
- .cpu_features = CPU_FTRS_E300C2,
- .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU,
- .mmu_features = MMU_FTR_USE_HIGH_BATS |
- MMU_FTR_NEED_DTLB_SW_LRU,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_603,
- .machine_check = machine_check_generic,
- .platform = "ppc603",
- },
- { /* e300c3 (e300c1, plus one IU, half cache size) on 83xx */
- .pvr_mask = 0x7fff0000,
- .pvr_value = 0x00850000,
- .cpu_name = "e300c3",
- .cpu_features = CPU_FTRS_E300,
- .cpu_user_features = COMMON_USER,
- .mmu_features = MMU_FTR_USE_HIGH_BATS |
- MMU_FTR_NEED_DTLB_SW_LRU,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_603,
- .num_pmcs = 4,
- .oprofile_cpu_type = "ppc/e300",
- .oprofile_type = PPC_OPROFILE_FSL_EMB,
- .platform = "ppc603",
- },
- { /* e300c4 (e300c1, plus one IU) */
- .pvr_mask = 0x7fff0000,
- .pvr_value = 0x00860000,
- .cpu_name = "e300c4",
- .cpu_features = CPU_FTRS_E300,
- .cpu_user_features = COMMON_USER,
- .mmu_features = MMU_FTR_USE_HIGH_BATS |
- MMU_FTR_NEED_DTLB_SW_LRU,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_603,
- .machine_check = machine_check_generic,
- .num_pmcs = 4,
- .oprofile_cpu_type = "ppc/e300",
- .oprofile_type = PPC_OPROFILE_FSL_EMB,
- .platform = "ppc603",
- },
- { /* default match, we assume split I/D cache & TB (non-601)... */
- .pvr_mask = 0x00000000,
- .pvr_value = 0x00000000,
- .cpu_name = "(generic PPC)",
- .cpu_features = CPU_FTRS_CLASSIC32,
- .cpu_user_features = COMMON_USER,
- .mmu_features = MMU_FTR_HPTE_TABLE,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_generic,
- .platform = "ppc603",
- },
-#endif /* CONFIG_PPC_BOOK3S_32 */
-#ifdef CONFIG_8xx
- { /* 8xx */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x00500000,
- .cpu_name = "8xx",
- /* CPU_FTR_MAYBE_CAN_DOZE is possible,
- * if the 8xx code is there.... */
- .cpu_features = CPU_FTRS_8XX,
- .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU,
- .mmu_features = MMU_FTR_TYPE_8xx,
- .icache_bsize = 16,
- .dcache_bsize = 16,
- .platform = "ppc823",
- },
-#endif /* CONFIG_8xx */
-#ifdef CONFIG_40x
- { /* 403GC */
- .pvr_mask = 0xffffff00,
- .pvr_value = 0x00200200,
- .cpu_name = "403GC",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 16,
- .dcache_bsize = 16,
- .machine_check = machine_check_4xx,
- .platform = "ppc403",
- },
- { /* 403GCX */
- .pvr_mask = 0xffffff00,
- .pvr_value = 0x00201400,
- .cpu_name = "403GCX",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_NO_TB,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 16,
- .dcache_bsize = 16,
- .machine_check = machine_check_4xx,
- .platform = "ppc403",
- },
- { /* 403G ?? */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x00200000,
- .cpu_name = "403G ??",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 16,
- .dcache_bsize = 16,
- .machine_check = machine_check_4xx,
- .platform = "ppc403",
- },
- { /* 405GP */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x40110000,
- .cpu_name = "405GP",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* STB 03xxx */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x40130000,
- .cpu_name = "STB03xxx",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* STB 04xxx */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x41810000,
- .cpu_name = "STB04xxx",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* NP405L */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x41610000,
- .cpu_name = "NP405L",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* NP4GS3 */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x40B10000,
- .cpu_name = "NP4GS3",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* NP405H */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x41410000,
- .cpu_name = "NP405H",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* 405GPr */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x50910000,
- .cpu_name = "405GPr",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* STBx25xx */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x51510000,
- .cpu_name = "STBx25xx",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* 405LP */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x41F10000,
- .cpu_name = "405LP",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* Xilinx Virtex-II Pro */
- .pvr_mask = 0xfffff000,
- .pvr_value = 0x20010000,
- .cpu_name = "Virtex-II Pro",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* Xilinx Virtex-4 FX */
- .pvr_mask = 0xfffff000,
- .pvr_value = 0x20011000,
- .cpu_name = "Virtex-4 FX",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* 405EP */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x51210000,
- .cpu_name = "405EP",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* 405EX Rev. A/B with Security */
- .pvr_mask = 0xffff000f,
- .pvr_value = 0x12910007,
- .cpu_name = "405EX Rev. A/B",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* 405EX Rev. C without Security */
- .pvr_mask = 0xffff000f,
- .pvr_value = 0x1291000d,
- .cpu_name = "405EX Rev. C",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* 405EX Rev. C with Security */
- .pvr_mask = 0xffff000f,
- .pvr_value = 0x1291000f,
- .cpu_name = "405EX Rev. C",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* 405EX Rev. D without Security */
- .pvr_mask = 0xffff000f,
- .pvr_value = 0x12910003,
- .cpu_name = "405EX Rev. D",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* 405EX Rev. D with Security */
- .pvr_mask = 0xffff000f,
- .pvr_value = 0x12910005,
- .cpu_name = "405EX Rev. D",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* 405EXr Rev. A/B without Security */
- .pvr_mask = 0xffff000f,
- .pvr_value = 0x12910001,
- .cpu_name = "405EXr Rev. A/B",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* 405EXr Rev. C without Security */
- .pvr_mask = 0xffff000f,
- .pvr_value = 0x12910009,
- .cpu_name = "405EXr Rev. C",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* 405EXr Rev. C with Security */
- .pvr_mask = 0xffff000f,
- .pvr_value = 0x1291000b,
- .cpu_name = "405EXr Rev. C",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* 405EXr Rev. D without Security */
- .pvr_mask = 0xffff000f,
- .pvr_value = 0x12910000,
- .cpu_name = "405EXr Rev. D",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* 405EXr Rev. D with Security */
- .pvr_mask = 0xffff000f,
- .pvr_value = 0x12910002,
- .cpu_name = "405EXr Rev. D",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- {
- /* 405EZ */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x41510000,
- .cpu_name = "405EZ",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* APM8018X */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x7ff11432,
- .cpu_name = "APM8018X",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- },
- { /* default match */
- .pvr_mask = 0x00000000,
- .pvr_value = 0x00000000,
- .cpu_name = "(generic 40x PPC)",
- .cpu_features = CPU_FTRS_40X,
- .cpu_user_features = PPC_FEATURE_32 |
- PPC_FEATURE_HAS_MMU | PPC_FEATURE_HAS_4xxMAC,
- .mmu_features = MMU_FTR_TYPE_40x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc405",
- }
+void __init set_cur_cpu_spec(struct cpu_spec *s)
+{
+ struct cpu_spec *t = &the_cpu_spec;
-#endif /* CONFIG_40x */
-#ifdef CONFIG_44x
- {
- .pvr_mask = 0xf0000fff,
- .pvr_value = 0x40000850,
- .cpu_name = "440GR Rev. A",
- .cpu_features = CPU_FTRS_44X,
- .cpu_user_features = COMMON_USER_BOOKE,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc440",
- },
- { /* Use logical PVR for 440EP (logical pvr = pvr | 0x8) */
- .pvr_mask = 0xf0000fff,
- .pvr_value = 0x40000858,
- .cpu_name = "440EP Rev. A",
- .cpu_features = CPU_FTRS_44X,
- .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_440ep,
- .machine_check = machine_check_4xx,
- .platform = "ppc440",
- },
- {
- .pvr_mask = 0xf0000fff,
- .pvr_value = 0x400008d3,
- .cpu_name = "440GR Rev. B",
- .cpu_features = CPU_FTRS_44X,
- .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc440",
- },
- { /* Matches both physical and logical PVR for 440EP (logical pvr = pvr | 0x8) */
- .pvr_mask = 0xf0000ff7,
- .pvr_value = 0x400008d4,
- .cpu_name = "440EP Rev. C",
- .cpu_features = CPU_FTRS_44X,
- .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_440ep,
- .machine_check = machine_check_4xx,
- .platform = "ppc440",
- },
- { /* Use logical PVR for 440EP (logical pvr = pvr | 0x8) */
- .pvr_mask = 0xf0000fff,
- .pvr_value = 0x400008db,
- .cpu_name = "440EP Rev. B",
- .cpu_features = CPU_FTRS_44X,
- .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_440ep,
- .machine_check = machine_check_4xx,
- .platform = "ppc440",
- },
- { /* 440GRX */
- .pvr_mask = 0xf0000ffb,
- .pvr_value = 0x200008D0,
- .cpu_name = "440GRX",
- .cpu_features = CPU_FTRS_44X,
- .cpu_user_features = COMMON_USER_BOOKE,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_440grx,
- .machine_check = machine_check_440A,
- .platform = "ppc440",
- },
- { /* Use logical PVR for 440EPx (logical pvr = pvr | 0x8) */
- .pvr_mask = 0xf0000ffb,
- .pvr_value = 0x200008D8,
- .cpu_name = "440EPX",
- .cpu_features = CPU_FTRS_44X,
- .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_440epx,
- .machine_check = machine_check_440A,
- .platform = "ppc440",
- },
- { /* 440GP Rev. B */
- .pvr_mask = 0xf0000fff,
- .pvr_value = 0x40000440,
- .cpu_name = "440GP Rev. B",
- .cpu_features = CPU_FTRS_44X,
- .cpu_user_features = COMMON_USER_BOOKE,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc440gp",
- },
- { /* 440GP Rev. C */
- .pvr_mask = 0xf0000fff,
- .pvr_value = 0x40000481,
- .cpu_name = "440GP Rev. C",
- .cpu_features = CPU_FTRS_44X,
- .cpu_user_features = COMMON_USER_BOOKE,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc440gp",
- },
- { /* 440GX Rev. A */
- .pvr_mask = 0xf0000fff,
- .pvr_value = 0x50000850,
- .cpu_name = "440GX Rev. A",
- .cpu_features = CPU_FTRS_44X,
- .cpu_user_features = COMMON_USER_BOOKE,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_440gx,
- .machine_check = machine_check_440A,
- .platform = "ppc440",
- },
- { /* 440GX Rev. B */
- .pvr_mask = 0xf0000fff,
- .pvr_value = 0x50000851,
- .cpu_name = "440GX Rev. B",
- .cpu_features = CPU_FTRS_44X,
- .cpu_user_features = COMMON_USER_BOOKE,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_440gx,
- .machine_check = machine_check_440A,
- .platform = "ppc440",
- },
- { /* 440GX Rev. C */
- .pvr_mask = 0xf0000fff,
- .pvr_value = 0x50000892,
- .cpu_name = "440GX Rev. C",
- .cpu_features = CPU_FTRS_44X,
- .cpu_user_features = COMMON_USER_BOOKE,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_440gx,
- .machine_check = machine_check_440A,
- .platform = "ppc440",
- },
- { /* 440GX Rev. F */
- .pvr_mask = 0xf0000fff,
- .pvr_value = 0x50000894,
- .cpu_name = "440GX Rev. F",
- .cpu_features = CPU_FTRS_44X,
- .cpu_user_features = COMMON_USER_BOOKE,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_440gx,
- .machine_check = machine_check_440A,
- .platform = "ppc440",
- },
- { /* 440SP Rev. A */
- .pvr_mask = 0xfff00fff,
- .pvr_value = 0x53200891,
- .cpu_name = "440SP Rev. A",
- .cpu_features = CPU_FTRS_44X,
- .cpu_user_features = COMMON_USER_BOOKE,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc440",
- },
- { /* 440SPe Rev. A */
- .pvr_mask = 0xfff00fff,
- .pvr_value = 0x53400890,
- .cpu_name = "440SPe Rev. A",
- .cpu_features = CPU_FTRS_44X,
- .cpu_user_features = COMMON_USER_BOOKE,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_440spe,
- .machine_check = machine_check_440A,
- .platform = "ppc440",
- },
- { /* 440SPe Rev. B */
- .pvr_mask = 0xfff00fff,
- .pvr_value = 0x53400891,
- .cpu_name = "440SPe Rev. B",
- .cpu_features = CPU_FTRS_44X,
- .cpu_user_features = COMMON_USER_BOOKE,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_440spe,
- .machine_check = machine_check_440A,
- .platform = "ppc440",
- },
- { /* 440 in Xilinx Virtex-5 FXT */
- .pvr_mask = 0xfffffff0,
- .pvr_value = 0x7ff21910,
- .cpu_name = "440 in Virtex-5 FXT",
- .cpu_features = CPU_FTRS_44X,
- .cpu_user_features = COMMON_USER_BOOKE,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_440x5,
- .machine_check = machine_check_440A,
- .platform = "ppc440",
- },
- { /* 460EX */
- .pvr_mask = 0xffff0006,
- .pvr_value = 0x13020002,
- .cpu_name = "460EX",
- .cpu_features = CPU_FTRS_440x6,
- .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_460ex,
- .machine_check = machine_check_440A,
- .platform = "ppc440",
- },
- { /* 460EX Rev B */
- .pvr_mask = 0xffff0007,
- .pvr_value = 0x13020004,
- .cpu_name = "460EX Rev. B",
- .cpu_features = CPU_FTRS_440x6,
- .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_460ex,
- .machine_check = machine_check_440A,
- .platform = "ppc440",
- },
- { /* 460GT */
- .pvr_mask = 0xffff0006,
- .pvr_value = 0x13020000,
- .cpu_name = "460GT",
- .cpu_features = CPU_FTRS_440x6,
- .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_460gt,
- .machine_check = machine_check_440A,
- .platform = "ppc440",
- },
- { /* 460GT Rev B */
- .pvr_mask = 0xffff0007,
- .pvr_value = 0x13020005,
- .cpu_name = "460GT Rev. B",
- .cpu_features = CPU_FTRS_440x6,
- .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_460gt,
- .machine_check = machine_check_440A,
- .platform = "ppc440",
- },
- { /* 460SX */
- .pvr_mask = 0xffffff00,
- .pvr_value = 0x13541800,
- .cpu_name = "460SX",
- .cpu_features = CPU_FTRS_44X,
- .cpu_user_features = COMMON_USER_BOOKE,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_460sx,
- .machine_check = machine_check_440A,
- .platform = "ppc440",
- },
- { /* 464 in APM821xx */
- .pvr_mask = 0xfffffff0,
- .pvr_value = 0x12C41C80,
- .cpu_name = "APM821XX",
- .cpu_features = CPU_FTRS_44X,
- .cpu_user_features = COMMON_USER_BOOKE |
- PPC_FEATURE_HAS_FPU,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_apm821xx,
- .machine_check = machine_check_440A,
- .platform = "ppc440",
- },
- { /* 476 DD2 core */
- .pvr_mask = 0xffffffff,
- .pvr_value = 0x11a52080,
- .cpu_name = "476",
- .cpu_features = CPU_FTRS_47X | CPU_FTR_476_DD2,
- .cpu_user_features = COMMON_USER_BOOKE |
- PPC_FEATURE_HAS_FPU,
- .mmu_features = MMU_FTR_TYPE_47x |
- MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL,
- .icache_bsize = 32,
- .dcache_bsize = 128,
- .machine_check = machine_check_47x,
- .platform = "ppc470",
- },
- { /* 476fpe */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x7ff50000,
- .cpu_name = "476fpe",
- .cpu_features = CPU_FTRS_47X | CPU_FTR_476_DD2,
- .cpu_user_features = COMMON_USER_BOOKE |
- PPC_FEATURE_HAS_FPU,
- .mmu_features = MMU_FTR_TYPE_47x |
- MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL,
- .icache_bsize = 32,
- .dcache_bsize = 128,
- .machine_check = machine_check_47x,
- .platform = "ppc470",
- },
- { /* 476 iss */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x00050000,
- .cpu_name = "476",
- .cpu_features = CPU_FTRS_47X,
- .cpu_user_features = COMMON_USER_BOOKE |
- PPC_FEATURE_HAS_FPU,
- .mmu_features = MMU_FTR_TYPE_47x |
- MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL,
- .icache_bsize = 32,
- .dcache_bsize = 128,
- .machine_check = machine_check_47x,
- .platform = "ppc470",
- },
- { /* 476 others */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x11a50000,
- .cpu_name = "476",
- .cpu_features = CPU_FTRS_47X,
- .cpu_user_features = COMMON_USER_BOOKE |
- PPC_FEATURE_HAS_FPU,
- .mmu_features = MMU_FTR_TYPE_47x |
- MMU_FTR_USE_TLBIVAX_BCAST | MMU_FTR_LOCK_BCAST_INVAL,
- .icache_bsize = 32,
- .dcache_bsize = 128,
- .machine_check = machine_check_47x,
- .platform = "ppc470",
- },
- { /* default match */
- .pvr_mask = 0x00000000,
- .pvr_value = 0x00000000,
- .cpu_name = "(generic 44x PPC)",
- .cpu_features = CPU_FTRS_44X,
- .cpu_user_features = COMMON_USER_BOOKE,
- .mmu_features = MMU_FTR_TYPE_44x,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_4xx,
- .platform = "ppc440",
- }
-#endif /* CONFIG_44x */
-#ifdef CONFIG_E200
- { /* e200z5 */
- .pvr_mask = 0xfff00000,
- .pvr_value = 0x81000000,
- .cpu_name = "e200z5",
- /* xxx - galak: add CPU_FTR_MAYBE_CAN_DOZE */
- .cpu_features = CPU_FTRS_E200,
- .cpu_user_features = COMMON_USER_BOOKE |
- PPC_FEATURE_HAS_EFP_SINGLE |
- PPC_FEATURE_UNIFIED_CACHE,
- .mmu_features = MMU_FTR_TYPE_FSL_E,
- .dcache_bsize = 32,
- .machine_check = machine_check_e200,
- .platform = "ppc5554",
- },
- { /* e200z6 */
- .pvr_mask = 0xfff00000,
- .pvr_value = 0x81100000,
- .cpu_name = "e200z6",
- /* xxx - galak: add CPU_FTR_MAYBE_CAN_DOZE */
- .cpu_features = CPU_FTRS_E200,
- .cpu_user_features = COMMON_USER_BOOKE |
- PPC_FEATURE_HAS_SPE_COMP |
- PPC_FEATURE_HAS_EFP_SINGLE_COMP |
- PPC_FEATURE_UNIFIED_CACHE,
- .mmu_features = MMU_FTR_TYPE_FSL_E,
- .dcache_bsize = 32,
- .machine_check = machine_check_e200,
- .platform = "ppc5554",
- },
- { /* default match */
- .pvr_mask = 0x00000000,
- .pvr_value = 0x00000000,
- .cpu_name = "(generic E200 PPC)",
- .cpu_features = CPU_FTRS_E200,
- .cpu_user_features = COMMON_USER_BOOKE |
- PPC_FEATURE_HAS_EFP_SINGLE |
- PPC_FEATURE_UNIFIED_CACHE,
- .mmu_features = MMU_FTR_TYPE_FSL_E,
- .dcache_bsize = 32,
- .cpu_setup = __setup_cpu_e200,
- .machine_check = machine_check_e200,
- .platform = "ppc5554",
- }
-#endif /* CONFIG_E200 */
-#endif /* CONFIG_PPC32 */
-#ifdef CONFIG_E500
-#ifdef CONFIG_PPC32
- { /* e500 */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x80200000,
- .cpu_name = "e500",
- .cpu_features = CPU_FTRS_E500,
- .cpu_user_features = COMMON_USER_BOOKE |
- PPC_FEATURE_HAS_SPE_COMP |
- PPC_FEATURE_HAS_EFP_SINGLE_COMP,
- .cpu_user_features2 = PPC_FEATURE2_ISEL,
- .mmu_features = MMU_FTR_TYPE_FSL_E,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 4,
- .oprofile_cpu_type = "ppc/e500",
- .oprofile_type = PPC_OPROFILE_FSL_EMB,
- .cpu_setup = __setup_cpu_e500v1,
- .machine_check = machine_check_e500,
- .platform = "ppc8540",
- },
- { /* e500v2 */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x80210000,
- .cpu_name = "e500v2",
- .cpu_features = CPU_FTRS_E500_2,
- .cpu_user_features = COMMON_USER_BOOKE |
- PPC_FEATURE_HAS_SPE_COMP |
- PPC_FEATURE_HAS_EFP_SINGLE_COMP |
- PPC_FEATURE_HAS_EFP_DOUBLE_COMP,
- .cpu_user_features2 = PPC_FEATURE2_ISEL,
- .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .num_pmcs = 4,
- .oprofile_cpu_type = "ppc/e500",
- .oprofile_type = PPC_OPROFILE_FSL_EMB,
- .cpu_setup = __setup_cpu_e500v2,
- .machine_check = machine_check_e500,
- .platform = "ppc8548",
- },
- { /* e500mc */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x80230000,
- .cpu_name = "e500mc",
- .cpu_features = CPU_FTRS_E500MC,
- .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
- .cpu_user_features2 = PPC_FEATURE2_ISEL,
- .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS |
- MMU_FTR_USE_TLBILX,
- .icache_bsize = 64,
- .dcache_bsize = 64,
- .num_pmcs = 4,
- .oprofile_cpu_type = "ppc/e500mc",
- .oprofile_type = PPC_OPROFILE_FSL_EMB,
- .cpu_setup = __setup_cpu_e500mc,
- .machine_check = machine_check_e500mc,
- .platform = "ppce500mc",
- },
-#endif /* CONFIG_PPC32 */
- { /* e5500 */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x80240000,
- .cpu_name = "e5500",
- .cpu_features = CPU_FTRS_E5500,
- .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
- .cpu_user_features2 = PPC_FEATURE2_ISEL,
- .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS |
- MMU_FTR_USE_TLBILX,
- .icache_bsize = 64,
- .dcache_bsize = 64,
- .num_pmcs = 4,
- .oprofile_cpu_type = "ppc/e500mc",
- .oprofile_type = PPC_OPROFILE_FSL_EMB,
- .cpu_setup = __setup_cpu_e5500,
-#ifndef CONFIG_PPC32
- .cpu_restore = __restore_cpu_e5500,
-#endif
- .machine_check = machine_check_e500mc,
- .platform = "ppce5500",
- },
- { /* e6500 */
- .pvr_mask = 0xffff0000,
- .pvr_value = 0x80400000,
- .cpu_name = "e6500",
- .cpu_features = CPU_FTRS_E6500,
- .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU |
- PPC_FEATURE_HAS_ALTIVEC_COMP,
- .cpu_user_features2 = PPC_FEATURE2_ISEL,
- .mmu_features = MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS |
- MMU_FTR_USE_TLBILX,
- .icache_bsize = 64,
- .dcache_bsize = 64,
- .num_pmcs = 6,
- .oprofile_cpu_type = "ppc/e6500",
- .oprofile_type = PPC_OPROFILE_FSL_EMB,
- .cpu_setup = __setup_cpu_e6500,
-#ifndef CONFIG_PPC32
- .cpu_restore = __restore_cpu_e6500,
-#endif
- .machine_check = machine_check_e500mc,
- .platform = "ppce6500",
- },
-#ifdef CONFIG_PPC32
- { /* default match */
- .pvr_mask = 0x00000000,
- .pvr_value = 0x00000000,
- .cpu_name = "(generic E500 PPC)",
- .cpu_features = CPU_FTRS_E500,
- .cpu_user_features = COMMON_USER_BOOKE |
- PPC_FEATURE_HAS_SPE_COMP |
- PPC_FEATURE_HAS_EFP_SINGLE_COMP,
- .mmu_features = MMU_FTR_TYPE_FSL_E,
- .icache_bsize = 32,
- .dcache_bsize = 32,
- .machine_check = machine_check_e500,
- .platform = "powerpc",
- }
-#endif /* CONFIG_PPC32 */
-#endif /* CONFIG_E500 */
-};
+ t = PTRRELOC(t);
+ /*
+ * use memcpy() instead of *t = *s so that GCC replaces it
+ * by __memcpy() when KASAN is active
+ */
+ memcpy(t, s, sizeof(*t));
-static struct cpu_spec the_cpu_spec;
+ *PTRRELOC(&cur_cpu_spec) = &the_cpu_spec;
+}
static struct cpu_spec * __init setup_cpu_spec(unsigned long offset,
struct cpu_spec *s)
@@ -2092,8 +53,11 @@ static struct cpu_spec * __init setup_cpu_spec(unsigned long offset,
t = PTRRELOC(t);
old = *t;
- /* Copy everything, then do fixups */
- *t = *s;
+ /*
+ * Copy everything, then do fixups. Use memcpy() instead of *t = *s
+ * so that GCC replaces it by __memcpy() when KASAN is active
+ */
+ memcpy(t, s, sizeof(*t));
/*
* If we are overriding a previous value derived from the real
@@ -2103,30 +67,18 @@ static struct cpu_spec * __init setup_cpu_spec(unsigned long offset,
if (old.num_pmcs && !s->num_pmcs) {
t->num_pmcs = old.num_pmcs;
t->pmc_type = old.pmc_type;
- t->oprofile_type = old.oprofile_type;
- t->oprofile_mmcra_sihv = old.oprofile_mmcra_sihv;
- t->oprofile_mmcra_sipr = old.oprofile_mmcra_sipr;
- t->oprofile_mmcra_clear = old.oprofile_mmcra_clear;
/*
- * If we have passed through this logic once before and
- * have pulled the default case because the real PVR was
- * not found inside cpu_specs[], then we are possibly
- * running in compatibility mode. In that case, let the
- * oprofiler know which set of compatibility counters to
- * pull from by making sure the oprofile_cpu_type string
- * is set to that of compatibility mode. If the
- * oprofile_cpu_type already has a value, then we are
- * possibly overriding a real PVR with a logical one,
- * and, in that case, keep the current value for
- * oprofile_cpu_type.
+ * Let's ensure that the
+ * fix for the PMAO bug is enabled on compatibility mode.
*/
- if (old.oprofile_cpu_type != NULL) {
- t->oprofile_cpu_type = old.oprofile_cpu_type;
- t->oprofile_type = old.oprofile_type;
- }
+ t->cpu_features |= old.cpu_features & CPU_FTR_PMAO_BUG;
}
+ /* Set kuap ON at startup, will be disabled later if cmdline has 'nosmap' */
+ if (IS_ENABLED(CONFIG_PPC_KUAP) && IS_ENABLED(CONFIG_PPC32))
+ t->mmu_features |= MMU_FTR_KUAP;
+
*PTRRELOC(&cur_cpu_spec) = &the_cpu_spec;
/*
@@ -2156,6 +108,8 @@ struct cpu_spec * __init identify_cpu(unsigned long offset, unsigned int pvr)
struct cpu_spec *s = cpu_specs;
int i;
+ BUILD_BUG_ON(!ARRAY_SIZE(cpu_specs));
+
s = PTRRELOC(s);
for (i = 0; i < ARRAY_SIZE(cpu_specs); i++,s++) {
@@ -2167,3 +121,62 @@ struct cpu_spec * __init identify_cpu(unsigned long offset, unsigned int pvr)
return NULL;
}
+
+/*
+ * Used by cpufeatures to get the name for CPUs with a PVR table.
+ * If they don't hae a PVR table, cpufeatures gets the name from
+ * cpu device-tree node.
+ */
+void __init identify_cpu_name(unsigned int pvr)
+{
+ struct cpu_spec *s = cpu_specs;
+ struct cpu_spec *t = &the_cpu_spec;
+ int i;
+
+ s = PTRRELOC(s);
+ t = PTRRELOC(t);
+
+ for (i = 0; i < ARRAY_SIZE(cpu_specs); i++,s++) {
+ if ((pvr & s->pvr_mask) == s->pvr_value) {
+ t->cpu_name = s->cpu_name;
+ return;
+ }
+ }
+}
+
+
+#ifdef CONFIG_JUMP_LABEL_FEATURE_CHECKS
+struct static_key_true cpu_feature_keys[NUM_CPU_FTR_KEYS] = {
+ [0 ... NUM_CPU_FTR_KEYS - 1] = STATIC_KEY_TRUE_INIT
+};
+EXPORT_SYMBOL_GPL(cpu_feature_keys);
+
+void __init cpu_feature_keys_init(void)
+{
+ int i;
+
+ for (i = 0; i < NUM_CPU_FTR_KEYS; i++) {
+ unsigned long f = 1ul << i;
+
+ if (!(cur_cpu_spec->cpu_features & f))
+ static_branch_disable(&cpu_feature_keys[i]);
+ }
+}
+
+struct static_key_true mmu_feature_keys[NUM_MMU_FTR_KEYS] = {
+ [0 ... NUM_MMU_FTR_KEYS - 1] = STATIC_KEY_TRUE_INIT
+};
+EXPORT_SYMBOL(mmu_feature_keys);
+
+void __init mmu_feature_keys_init(void)
+{
+ int i;
+
+ for (i = 0; i < NUM_MMU_FTR_KEYS; i++) {
+ unsigned long f = 1ul << i;
+
+ if (!(cur_cpu_spec->mmu_features & f))
+ static_branch_disable(&mmu_feature_keys[i]);
+ }
+}
+#endif
diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c
index 7a13f378ca2c..103b6605dd68 100644
--- a/arch/powerpc/kernel/crash_dump.c
+++ b/arch/powerpc/kernel/crash_dump.c
@@ -1,25 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Routines for doing kexec-based kdump.
*
* Copyright (C) 2005, IBM Corp.
*
* Created by: Michael Ellerman
- *
- * This source code is licensed under the GNU General Public License,
- * Version 2. See the file COPYING for more details.
*/
#undef DEBUG
#include <linux/crash_dump.h>
-#include <linux/bootmem.h>
+#include <linux/io.h>
#include <linux/memblock.h>
-#include <asm/code-patching.h>
+#include <linux/of.h>
+#include <asm/text-patching.h>
#include <asm/kdump.h>
-#include <asm/prom.h>
#include <asm/firmware.h>
-#include <asm/uaccess.h>
+#include <linux/uio.h>
#include <asm/rtas.h>
+#include <asm/inst.h>
+#include <asm/fadump.h>
#ifdef DEBUG
#include <asm/udbg.h>
@@ -36,7 +36,7 @@ void __init reserve_kdump_trampoline(void)
static void __init create_trampoline(unsigned long addr)
{
- unsigned int *p = (unsigned int *)addr;
+ u32 *p = (u32 *)addr;
/* The maximum range of a single instruction branch, is the current
* instruction's address + (32 MB - 4) bytes. For the trampoline we
@@ -46,8 +46,8 @@ static void __init create_trampoline(unsigned long addr)
* branch to "addr" we jump to ("addr" + 32 MB). Although it requires
* two instructions it doesn't require any registers.
*/
- patch_instruction(p, PPC_INST_NOP);
- patch_branch(++p, addr + PHYSICAL_START, 0);
+ patch_instruction(p, ppc_inst(PPC_RAW_NOP()));
+ patch_branch(p + 1, addr + PHYSICAL_START, 0);
}
void __init setup_kdump_trampoline(void)
@@ -69,33 +69,8 @@ void __init setup_kdump_trampoline(void)
}
#endif /* CONFIG_NONSTATIC_KERNEL */
-static size_t copy_oldmem_vaddr(void *vaddr, char *buf, size_t csize,
- unsigned long offset, int userbuf)
-{
- if (userbuf) {
- if (copy_to_user((char __user *)buf, (vaddr + offset), csize))
- return -EFAULT;
- } else
- memcpy(buf, (vaddr + offset), csize);
-
- return csize;
-}
-
-/**
- * copy_oldmem_page - copy one page from "oldmem"
- * @pfn: page frame number to be copied
- * @buf: target memory address for the copy; this can be in kernel address
- * space or user address space (see @userbuf)
- * @csize: number of bytes to copy
- * @offset: offset in bytes into the page (based on pfn) to begin the copy
- * @userbuf: if set, @buf is in user address space, use copy_to_user(),
- * otherwise @buf is in kernel address space, use memcpy().
- *
- * Copy a page from "oldmem". For this page, there is no pte mapped
- * in the current kernel. We stitch up a pte, similar to kmap_atomic.
- */
-ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
- size_t csize, unsigned long offset, int userbuf)
+ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn,
+ size_t csize, unsigned long offset)
{
void *vaddr;
phys_addr_t paddr;
@@ -108,16 +83,27 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
if (memblock_is_region_memory(paddr, csize)) {
vaddr = __va(paddr);
- csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf);
+ csize = copy_to_iter(vaddr + offset, csize, iter);
} else {
- vaddr = __ioremap(paddr, PAGE_SIZE, 0);
- csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf);
+ vaddr = ioremap_cache(paddr, PAGE_SIZE);
+ csize = copy_to_iter(vaddr + offset, csize, iter);
iounmap(vaddr);
}
return csize;
}
+/*
+ * Return true only when kexec based kernel dump capturing method is used.
+ * This ensures all restritions applied for kdump case are not automatically
+ * applied for fadump case.
+ */
+bool is_kdump_kernel(void)
+{
+ return !is_fadump_active() && elfcorehdr_addr != ELFCORE_ADDR_MAX;
+}
+EXPORT_SYMBOL_GPL(is_kdump_kernel);
+
#ifdef CONFIG_PPC_RTAS
/*
* The crashkernel region will almost always overlap the RTAS region, so
diff --git a/arch/powerpc/kernel/dawr.c b/arch/powerpc/kernel/dawr.c
new file mode 100644
index 000000000000..909a05cd2809
--- /dev/null
+++ b/arch/powerpc/kernel/dawr.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * DAWR infrastructure
+ *
+ * Copyright 2019, Michael Neuling, IBM Corporation.
+ */
+
+#include <linux/types.h>
+#include <linux/export.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <asm/machdep.h>
+#include <asm/hvcall.h>
+#include <asm/firmware.h>
+
+bool dawr_force_enable;
+EXPORT_SYMBOL_GPL(dawr_force_enable);
+
+int set_dawr(int nr, struct arch_hw_breakpoint *brk)
+{
+ unsigned long dawr, dawrx, mrd;
+
+ dawr = brk->address;
+
+ dawrx = (brk->type & (HW_BRK_TYPE_READ | HW_BRK_TYPE_WRITE))
+ << (63 - 58);
+ dawrx |= ((brk->type & (HW_BRK_TYPE_TRANSLATE)) >> 2) << (63 - 59);
+ dawrx |= (brk->type & (HW_BRK_TYPE_PRIV_ALL)) >> 3;
+ /*
+ * DAWR length is stored in field MDR bits 48:53. Matches range in
+ * doublewords (64 bits) biased by -1 eg. 0b000000=1DW and
+ * 0b111111=64DW.
+ * brk->hw_len is in bytes.
+ * This aligns up to double word size, shifts and does the bias.
+ */
+ mrd = ((brk->hw_len + 7) >> 3) - 1;
+ dawrx |= (mrd & 0x3f) << (63 - 53);
+
+ if (ppc_md.set_dawr)
+ return ppc_md.set_dawr(nr, dawr, dawrx);
+
+ if (nr == 0) {
+ mtspr(SPRN_DAWR0, dawr);
+ mtspr(SPRN_DAWRX0, dawrx);
+ } else {
+ mtspr(SPRN_DAWR1, dawr);
+ mtspr(SPRN_DAWRX1, dawrx);
+ }
+
+ return 0;
+}
+
+static void disable_dawrs_cb(void *info)
+{
+ struct arch_hw_breakpoint null_brk = {0};
+ int i;
+
+ for (i = 0; i < nr_wp_slots(); i++)
+ set_dawr(i, &null_brk);
+}
+
+static ssize_t dawr_write_file_bool(struct file *file,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct arch_hw_breakpoint null_brk = {0};
+ size_t rc;
+
+ /* Send error to user if they hypervisor won't allow us to write DAWR */
+ if (!dawr_force_enable &&
+ firmware_has_feature(FW_FEATURE_LPAR) &&
+ set_dawr(0, &null_brk) != H_SUCCESS)
+ return -ENODEV;
+
+ rc = debugfs_write_file_bool(file, user_buf, count, ppos);
+ if (rc)
+ return rc;
+
+ /* If we are clearing, make sure all CPUs have the DAWR cleared */
+ if (!dawr_force_enable)
+ smp_call_function(disable_dawrs_cb, NULL, 0);
+
+ return rc;
+}
+
+static const struct file_operations dawr_enable_fops = {
+ .read = debugfs_read_file_bool,
+ .write = dawr_write_file_bool,
+ .open = simple_open,
+ .llseek = default_llseek,
+};
+
+static int __init dawr_force_setup(void)
+{
+ if (cpu_has_feature(CPU_FTR_DAWR)) {
+ /* Don't setup sysfs file for user control on P8 */
+ dawr_force_enable = true;
+ return 0;
+ }
+
+ if (PVR_VER(mfspr(SPRN_PVR)) == PVR_POWER9) {
+ /* Turn DAWR off by default, but allow admin to turn it on */
+ debugfs_create_file_unsafe("dawr_enable_dangerous", 0600,
+ arch_debugfs_dir,
+ &dawr_force_enable,
+ &dawr_enable_fops);
+ }
+ return 0;
+}
+arch_initcall(dawr_force_setup);
diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
index d55c76c571f3..5712dd846263 100644
--- a/arch/powerpc/kernel/dbell.c
+++ b/arch/powerpc/kernel/dbell.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Author: Kumar Gala <galak@kernel.crashing.org>
*
* Copyright 2009 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#include <linux/stddef.h>
@@ -16,42 +12,36 @@
#include <linux/hardirq.h>
#include <asm/dbell.h>
+#include <asm/interrupt.h>
#include <asm/irq_regs.h>
+#include <asm/kvm_ppc.h>
+#include <asm/trace.h>
#ifdef CONFIG_SMP
-void doorbell_setup_this_cpu(void)
-{
- unsigned long tag = mfspr(SPRN_DOORBELL_CPUTAG) & PPC_DBELL_TAG_MASK;
-
- smp_muxed_ipi_set_data(smp_processor_id(), tag);
-}
-
-void doorbell_cause_ipi(int cpu, unsigned long data)
-{
- /* Order previous accesses vs. msgsnd, which is treated as a store */
- mb();
- ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, data);
-}
-void doorbell_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER_ASYNC(doorbell_exception)
{
struct pt_regs *old_regs = set_irq_regs(regs);
- irq_enter();
+ trace_doorbell_entry(regs);
+
+ ppc_msgsync();
- may_hard_irq_enable();
+ if (should_hard_irq_enable(regs))
+ do_hard_irq_enable();
- __get_cpu_var(irq_stat).doorbell_irqs++;
+ kvmppc_clear_host_ipi(smp_processor_id());
+ __this_cpu_inc(irq_stat.doorbell_irqs);
- smp_ipi_demux();
+ smp_ipi_demux_relaxed(); /* already performed the barrier */
+
+ trace_doorbell_exit(regs);
- irq_exit();
set_irq_regs(old_regs);
}
#else /* CONFIG_SMP */
-void doorbell_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER_ASYNC(doorbell_exception)
{
printk(KERN_WARNING "Received doorbell on non-smp system\n");
}
#endif /* CONFIG_SMP */
-
diff --git a/arch/powerpc/kernel/dexcr.c b/arch/powerpc/kernel/dexcr.c
new file mode 100644
index 000000000000..3a0358e91c60
--- /dev/null
+++ b/arch/powerpc/kernel/dexcr.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/capability.h>
+#include <linux/cpu.h>
+#include <linux/init.h>
+#include <linux/prctl.h>
+#include <linux/sched.h>
+
+#include <asm/cpu_has_feature.h>
+#include <asm/cputable.h>
+#include <asm/processor.h>
+#include <asm/reg.h>
+
+static int __init init_task_dexcr(void)
+{
+ if (!early_cpu_has_feature(CPU_FTR_ARCH_31))
+ return 0;
+
+ current->thread.dexcr_onexec = mfspr(SPRN_DEXCR);
+
+ return 0;
+}
+early_initcall(init_task_dexcr)
+
+/* Allow thread local configuration of these by default */
+#define DEXCR_PRCTL_EDITABLE ( \
+ DEXCR_PR_IBRTPD | \
+ DEXCR_PR_SRAPD | \
+ DEXCR_PR_NPHIE)
+
+static int prctl_to_aspect(unsigned long which, unsigned int *aspect)
+{
+ switch (which) {
+ case PR_PPC_DEXCR_SBHE:
+ *aspect = DEXCR_PR_SBHE;
+ break;
+ case PR_PPC_DEXCR_IBRTPD:
+ *aspect = DEXCR_PR_IBRTPD;
+ break;
+ case PR_PPC_DEXCR_SRAPD:
+ *aspect = DEXCR_PR_SRAPD;
+ break;
+ case PR_PPC_DEXCR_NPHIE:
+ *aspect = DEXCR_PR_NPHIE;
+ break;
+ default:
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+int get_dexcr_prctl(struct task_struct *task, unsigned long which)
+{
+ unsigned int aspect;
+ int ret;
+
+ ret = prctl_to_aspect(which, &aspect);
+ if (ret)
+ return ret;
+
+ if (aspect & DEXCR_PRCTL_EDITABLE)
+ ret |= PR_PPC_DEXCR_CTRL_EDITABLE;
+
+ if (aspect & mfspr(SPRN_DEXCR))
+ ret |= PR_PPC_DEXCR_CTRL_SET;
+ else
+ ret |= PR_PPC_DEXCR_CTRL_CLEAR;
+
+ if (aspect & task->thread.dexcr_onexec)
+ ret |= PR_PPC_DEXCR_CTRL_SET_ONEXEC;
+ else
+ ret |= PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC;
+
+ return ret;
+}
+
+int set_dexcr_prctl(struct task_struct *task, unsigned long which, unsigned long ctrl)
+{
+ unsigned long dexcr;
+ unsigned int aspect;
+ int err = 0;
+
+ err = prctl_to_aspect(which, &aspect);
+ if (err)
+ return err;
+
+ if (!(aspect & DEXCR_PRCTL_EDITABLE))
+ return -EPERM;
+
+ if (ctrl & ~PR_PPC_DEXCR_CTRL_MASK)
+ return -EINVAL;
+
+ if (ctrl & PR_PPC_DEXCR_CTRL_SET && ctrl & PR_PPC_DEXCR_CTRL_CLEAR)
+ return -EINVAL;
+
+ if (ctrl & PR_PPC_DEXCR_CTRL_SET_ONEXEC && ctrl & PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC)
+ return -EINVAL;
+
+ /*
+ * We do not want an unprivileged process being able to disable
+ * a setuid process's hash check instructions
+ */
+ if (aspect == DEXCR_PR_NPHIE &&
+ ctrl & PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC &&
+ !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ dexcr = mfspr(SPRN_DEXCR);
+
+ if (ctrl & PR_PPC_DEXCR_CTRL_SET)
+ dexcr |= aspect;
+ else if (ctrl & PR_PPC_DEXCR_CTRL_CLEAR)
+ dexcr &= ~aspect;
+
+ if (ctrl & PR_PPC_DEXCR_CTRL_SET_ONEXEC)
+ task->thread.dexcr_onexec |= aspect;
+ else if (ctrl & PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC)
+ task->thread.dexcr_onexec &= ~aspect;
+
+ mtspr(SPRN_DEXCR, dexcr);
+
+ return 0;
+}
diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c
index 54d0116256f7..0359ab72cd3b 100644
--- a/arch/powerpc/kernel/dma-iommu.c
+++ b/arch/powerpc/kernel/dma-iommu.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2006 Benjamin Herrenschmidt, IBM Corporation
*
@@ -5,9 +6,67 @@
* busses using the iommu infrastructure
*/
-#include <linux/export.h>
+#include <linux/dma-direct.h>
+#include <linux/pci.h>
#include <asm/iommu.h>
+#ifdef CONFIG_ARCH_HAS_DMA_MAP_DIRECT
+#define can_map_direct(dev, addr) \
+ ((dev)->bus_dma_limit >= phys_to_dma((dev), (addr)))
+
+bool arch_dma_map_phys_direct(struct device *dev, phys_addr_t addr)
+{
+ if (likely(!dev->bus_dma_limit))
+ return false;
+
+ return can_map_direct(dev, addr);
+}
+
+#define is_direct_handle(dev, h) ((h) >= (dev)->archdata.dma_offset)
+
+bool arch_dma_unmap_phys_direct(struct device *dev, dma_addr_t dma_handle)
+{
+ if (likely(!dev->bus_dma_limit))
+ return false;
+
+ return is_direct_handle(dev, dma_handle);
+}
+
+bool arch_dma_map_sg_direct(struct device *dev, struct scatterlist *sg,
+ int nents)
+{
+ struct scatterlist *s;
+ int i;
+
+ if (likely(!dev->bus_dma_limit))
+ return false;
+
+ for_each_sg(sg, s, nents, i) {
+ if (!can_map_direct(dev, sg_phys(s) + s->offset + s->length))
+ return false;
+ }
+
+ return true;
+}
+
+bool arch_dma_unmap_sg_direct(struct device *dev, struct scatterlist *sg,
+ int nents)
+{
+ struct scatterlist *s;
+ int i;
+
+ if (likely(!dev->bus_dma_limit))
+ return false;
+
+ for_each_sg(sg, s, nents, i) {
+ if (!is_direct_handle(dev, s->dma_address + s->length))
+ return false;
+ }
+
+ return true;
+}
+#endif /* CONFIG_ARCH_HAS_DMA_MAP_DIRECT */
+
/*
* Generic iommu implementation
*/
@@ -18,7 +77,7 @@
*/
static void *dma_iommu_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t flag,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
return iommu_alloc_coherent(dev, get_iommu_table_base(dev), size,
dma_handle, dev->coherent_dma_mask, flag,
@@ -27,7 +86,7 @@ static void *dma_iommu_alloc_coherent(struct device *dev, size_t size,
static void dma_iommu_free_coherent(struct device *dev, size_t size,
void *vaddr, dma_addr_t dma_handle,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
iommu_free_coherent(get_iommu_table_base(dev), size, vaddr, dma_handle);
}
@@ -40,16 +99,16 @@ static void dma_iommu_free_coherent(struct device *dev, size_t size,
static dma_addr_t dma_iommu_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size,
enum dma_data_direction direction,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
return iommu_map_page(dev, get_iommu_table_base(dev), page, offset,
- size, device_to_mask(dev), direction, attrs);
+ size, dma_get_mask(dev), direction, attrs);
}
static void dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle,
size_t size, enum dma_data_direction direction,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
iommu_unmap_page(get_iommu_table_base(dev), dma_handle, size, direction,
attrs);
@@ -58,28 +117,55 @@ static void dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle,
static int dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
int nelems, enum dma_data_direction direction,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
- return iommu_map_sg(dev, get_iommu_table_base(dev), sglist, nelems,
- device_to_mask(dev), direction, attrs);
+ return ppc_iommu_map_sg(dev, get_iommu_table_base(dev), sglist, nelems,
+ dma_get_mask(dev), direction, attrs);
}
static void dma_iommu_unmap_sg(struct device *dev, struct scatterlist *sglist,
int nelems, enum dma_data_direction direction,
- struct dma_attrs *attrs)
+ unsigned long attrs)
+{
+ ppc_iommu_unmap_sg(get_iommu_table_base(dev), sglist, nelems,
+ direction, attrs);
+}
+
+static bool dma_iommu_bypass_supported(struct device *dev, u64 mask)
{
- iommu_unmap_sg(get_iommu_table_base(dev), sglist, nelems, direction,
- attrs);
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct pci_controller *phb = pci_bus_to_host(pdev->bus);
+
+ if (!phb->controller_ops.iommu_bypass_supported)
+ return false;
+ return phb->controller_ops.iommu_bypass_supported(pdev, mask);
}
/* We support DMA to/from any memory page via the iommu */
-static int dma_iommu_dma_supported(struct device *dev, u64 mask)
+int dma_iommu_dma_supported(struct device *dev, u64 mask)
{
- struct iommu_table *tbl = get_iommu_table_base(dev);
+ struct iommu_table *tbl;
+
+ if (dev_is_pci(dev) && dma_iommu_bypass_supported(dev, mask)) {
+ /*
+ * dma_iommu_bypass_supported() sets dma_max when there is
+ * 1:1 mapping but it is somehow limited.
+ * ibm,pmemory is one example.
+ */
+ dev->dma_ops_bypass = dev->bus_dma_limit == 0;
+ if (!dev->dma_ops_bypass)
+ dev_warn(dev,
+ "iommu: 64-bit OK but direct DMA is limited by %llx\n",
+ dev->bus_dma_limit);
+ else
+ dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n");
+ return 1;
+ }
+
+ tbl = get_iommu_table_base(dev);
if (!tbl) {
- dev_info(dev, "Warning: IOMMU dma not supported: mask 0x%08llx"
- ", table unavailable\n", mask);
+ dev_err(dev, "Warning: IOMMU dma not supported: mask 0x%08llx, table unavailable\n", mask);
return 0;
}
@@ -88,32 +174,48 @@ static int dma_iommu_dma_supported(struct device *dev, u64 mask)
dev_info(dev, "mask: 0x%08llx, table offset: 0x%08lx\n",
mask, tbl->it_offset << tbl->it_page_shift);
return 0;
- } else
- return 1;
+ }
+
+ dev_dbg(dev, "iommu: not 64-bit, using default ops\n");
+ dev->dma_ops_bypass = false;
+ return 1;
}
-static u64 dma_iommu_get_required_mask(struct device *dev)
+u64 dma_iommu_get_required_mask(struct device *dev)
{
struct iommu_table *tbl = get_iommu_table_base(dev);
u64 mask;
+
+ if (dev_is_pci(dev)) {
+ u64 bypass_mask = dma_direct_get_required_mask(dev);
+
+ if (dma_iommu_dma_supported(dev, bypass_mask)) {
+ dev_info(dev, "%s: returning bypass mask 0x%llx\n", __func__, bypass_mask);
+ return bypass_mask;
+ }
+ }
+
if (!tbl)
return 0;
- mask = 1ULL < (fls_long(tbl->it_offset + tbl->it_size) - 1);
+ mask = 1ULL << (fls_long(tbl->it_offset + tbl->it_size) +
+ tbl->it_page_shift - 1);
mask += mask - 1;
return mask;
}
-struct dma_map_ops dma_iommu_ops = {
+const struct dma_map_ops dma_iommu_ops = {
.alloc = dma_iommu_alloc_coherent,
.free = dma_iommu_free_coherent,
- .mmap = dma_direct_mmap_coherent,
.map_sg = dma_iommu_map_sg,
.unmap_sg = dma_iommu_unmap_sg,
.dma_supported = dma_iommu_dma_supported,
.map_page = dma_iommu_map_page,
.unmap_page = dma_iommu_unmap_page,
.get_required_mask = dma_iommu_get_required_mask,
+ .mmap = dma_common_mmap,
+ .get_sgtable = dma_common_get_sgtable,
+ .alloc_pages_op = dma_common_alloc_pages,
+ .free_pages = dma_common_free_pages,
};
-EXPORT_SYMBOL(dma_iommu_ops);
diff --git a/arch/powerpc/kernel/dma-mask.c b/arch/powerpc/kernel/dma-mask.c
new file mode 100644
index 000000000000..5b07ca7b73aa
--- /dev/null
+++ b/arch/powerpc/kernel/dma-mask.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/dma-mapping.h>
+#include <linux/dma-map-ops.h>
+#include <linux/export.h>
+#include <asm/machdep.h>
+
+void arch_dma_set_mask(struct device *dev, u64 dma_mask)
+{
+ if (ppc_md.dma_set_mask)
+ ppc_md.dma_set_mask(dev, dma_mask);
+}
+EXPORT_SYMBOL(arch_dma_set_mask);
diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c
index bd1a2aba599f..ba256c37bcc0 100644
--- a/arch/powerpc/kernel/dma-swiotlb.c
+++ b/arch/powerpc/kernel/dma-swiotlb.c
@@ -1,127 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Contains routines needed to support swiotlb for ppc.
*
* Copyright (C) 2009-2010 Freescale Semiconductor, Inc.
* Author: Becky Bruce
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
*/
-
-#include <linux/dma-mapping.h>
#include <linux/memblock.h>
-#include <linux/pfn.h>
-#include <linux/of_platform.h>
-#include <linux/platform_device.h>
-#include <linux/pci.h>
-
#include <asm/machdep.h>
#include <asm/swiotlb.h>
-#include <asm/dma.h>
unsigned int ppc_swiotlb_enable;
+unsigned int ppc_swiotlb_flags;
-static u64 swiotlb_powerpc_get_required(struct device *dev)
-{
- u64 end, mask, max_direct_dma_addr = dev->archdata.max_direct_dma_addr;
-
- end = memblock_end_of_DRAM();
- if (max_direct_dma_addr && end > max_direct_dma_addr)
- end = max_direct_dma_addr;
- end += get_dma_offset(dev);
-
- mask = 1ULL << (fls64(end) - 1);
- mask += mask - 1;
-
- return mask;
-}
-
-/*
- * At the moment, all platforms that use this code only require
- * swiotlb to be used if we're operating on HIGHMEM. Since
- * we don't ever call anything other than map_sg, unmap_sg,
- * map_page, and unmap_page on highmem, use normal dma_ops
- * for everything else.
- */
-struct dma_map_ops swiotlb_dma_ops = {
- .alloc = dma_direct_alloc_coherent,
- .free = dma_direct_free_coherent,
- .mmap = dma_direct_mmap_coherent,
- .map_sg = swiotlb_map_sg_attrs,
- .unmap_sg = swiotlb_unmap_sg_attrs,
- .dma_supported = swiotlb_dma_supported,
- .map_page = swiotlb_map_page,
- .unmap_page = swiotlb_unmap_page,
- .sync_single_for_cpu = swiotlb_sync_single_for_cpu,
- .sync_single_for_device = swiotlb_sync_single_for_device,
- .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
- .sync_sg_for_device = swiotlb_sync_sg_for_device,
- .mapping_error = swiotlb_dma_mapping_error,
- .get_required_mask = swiotlb_powerpc_get_required,
-};
-
-void pci_dma_dev_setup_swiotlb(struct pci_dev *pdev)
-{
- struct pci_controller *hose;
- struct dev_archdata *sd;
-
- hose = pci_bus_to_host(pdev->bus);
- sd = &pdev->dev.archdata;
- sd->max_direct_dma_addr =
- hose->dma_window_base_cur + hose->dma_window_size;
-}
-
-static int ppc_swiotlb_bus_notify(struct notifier_block *nb,
- unsigned long action, void *data)
-{
- struct device *dev = data;
- struct dev_archdata *sd;
-
- /* We are only intereted in device addition */
- if (action != BUS_NOTIFY_ADD_DEVICE)
- return 0;
-
- sd = &dev->archdata;
- sd->max_direct_dma_addr = 0;
-
- /* May need to bounce if the device can't address all of DRAM */
- if ((dma_get_mask(dev) + 1) < memblock_end_of_DRAM())
- set_dma_ops(dev, &swiotlb_dma_ops);
-
- return NOTIFY_DONE;
-}
-
-static struct notifier_block ppc_swiotlb_plat_bus_notifier = {
- .notifier_call = ppc_swiotlb_bus_notify,
- .priority = 0,
-};
-
-int __init swiotlb_setup_bus_notifier(void)
-{
- bus_register_notifier(&platform_bus_type,
- &ppc_swiotlb_plat_bus_notifier);
- return 0;
-}
-
-void swiotlb_detect_4g(void)
+void __init swiotlb_detect_4g(void)
{
if ((memblock_end_of_DRAM() - 1) > 0xffffffff)
ppc_swiotlb_enable = 1;
}
-static int __init swiotlb_late_init(void)
+static int __init check_swiotlb_enabled(void)
{
- if (ppc_swiotlb_enable) {
+ if (ppc_swiotlb_enable)
swiotlb_print_info();
- set_pci_dma_ops(&swiotlb_dma_ops);
- ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_swiotlb;
- } else {
- swiotlb_free();
- }
+ else
+ swiotlb_exit();
return 0;
}
-subsys_initcall(swiotlb_late_init);
+subsys_initcall(check_swiotlb_enabled);
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
deleted file mode 100644
index ee78f6e49d64..000000000000
--- a/arch/powerpc/kernel/dma.c
+++ /dev/null
@@ -1,243 +0,0 @@
-/*
- * Copyright (C) 2006 Benjamin Herrenschmidt, IBM Corporation
- *
- * Provide default implementations of the DMA mapping callbacks for
- * directly mapped busses.
- */
-
-#include <linux/device.h>
-#include <linux/dma-mapping.h>
-#include <linux/dma-debug.h>
-#include <linux/gfp.h>
-#include <linux/memblock.h>
-#include <linux/export.h>
-#include <linux/pci.h>
-#include <asm/vio.h>
-#include <asm/bug.h>
-#include <asm/machdep.h>
-
-/*
- * Generic direct DMA implementation
- *
- * This implementation supports a per-device offset that can be applied if
- * the address at which memory is visible to devices is not 0. Platform code
- * can set archdata.dma_data to an unsigned long holding the offset. By
- * default the offset is PCI_DRAM_OFFSET.
- */
-
-
-void *dma_direct_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag,
- struct dma_attrs *attrs)
-{
- void *ret;
-#ifdef CONFIG_NOT_COHERENT_CACHE
- ret = __dma_alloc_coherent(dev, size, dma_handle, flag);
- if (ret == NULL)
- return NULL;
- *dma_handle += get_dma_offset(dev);
- return ret;
-#else
- struct page *page;
- int node = dev_to_node(dev);
-
- /* ignore region specifiers */
- flag &= ~(__GFP_HIGHMEM);
-
- page = alloc_pages_node(node, flag, get_order(size));
- if (page == NULL)
- return NULL;
- ret = page_address(page);
- memset(ret, 0, size);
- *dma_handle = __pa(ret) + get_dma_offset(dev);
-
- return ret;
-#endif
-}
-
-void dma_direct_free_coherent(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_handle,
- struct dma_attrs *attrs)
-{
-#ifdef CONFIG_NOT_COHERENT_CACHE
- __dma_free_coherent(size, vaddr);
-#else
- free_pages((unsigned long)vaddr, get_order(size));
-#endif
-}
-
-int dma_direct_mmap_coherent(struct device *dev, struct vm_area_struct *vma,
- void *cpu_addr, dma_addr_t handle, size_t size,
- struct dma_attrs *attrs)
-{
- unsigned long pfn;
-
-#ifdef CONFIG_NOT_COHERENT_CACHE
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- pfn = __dma_get_coherent_pfn((unsigned long)cpu_addr);
-#else
- pfn = page_to_pfn(virt_to_page(cpu_addr));
-#endif
- return remap_pfn_range(vma, vma->vm_start,
- pfn + vma->vm_pgoff,
- vma->vm_end - vma->vm_start,
- vma->vm_page_prot);
-}
-
-static int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl,
- int nents, enum dma_data_direction direction,
- struct dma_attrs *attrs)
-{
- struct scatterlist *sg;
- int i;
-
- for_each_sg(sgl, sg, nents, i) {
- sg->dma_address = sg_phys(sg) + get_dma_offset(dev);
- sg->dma_length = sg->length;
- __dma_sync_page(sg_page(sg), sg->offset, sg->length, direction);
- }
-
- return nents;
-}
-
-static void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sg,
- int nents, enum dma_data_direction direction,
- struct dma_attrs *attrs)
-{
-}
-
-static int dma_direct_dma_supported(struct device *dev, u64 mask)
-{
-#ifdef CONFIG_PPC64
- /* Could be improved so platforms can set the limit in case
- * they have limited DMA windows
- */
- return mask >= get_dma_offset(dev) + (memblock_end_of_DRAM() - 1);
-#else
- return 1;
-#endif
-}
-
-static u64 dma_direct_get_required_mask(struct device *dev)
-{
- u64 end, mask;
-
- end = memblock_end_of_DRAM() + get_dma_offset(dev);
-
- mask = 1ULL << (fls64(end) - 1);
- mask += mask - 1;
-
- return mask;
-}
-
-static inline dma_addr_t dma_direct_map_page(struct device *dev,
- struct page *page,
- unsigned long offset,
- size_t size,
- enum dma_data_direction dir,
- struct dma_attrs *attrs)
-{
- BUG_ON(dir == DMA_NONE);
- __dma_sync_page(page, offset, size, dir);
- return page_to_phys(page) + offset + get_dma_offset(dev);
-}
-
-static inline void dma_direct_unmap_page(struct device *dev,
- dma_addr_t dma_address,
- size_t size,
- enum dma_data_direction direction,
- struct dma_attrs *attrs)
-{
-}
-
-#ifdef CONFIG_NOT_COHERENT_CACHE
-static inline void dma_direct_sync_sg(struct device *dev,
- struct scatterlist *sgl, int nents,
- enum dma_data_direction direction)
-{
- struct scatterlist *sg;
- int i;
-
- for_each_sg(sgl, sg, nents, i)
- __dma_sync_page(sg_page(sg), sg->offset, sg->length, direction);
-}
-
-static inline void dma_direct_sync_single(struct device *dev,
- dma_addr_t dma_handle, size_t size,
- enum dma_data_direction direction)
-{
- __dma_sync(bus_to_virt(dma_handle), size, direction);
-}
-#endif
-
-struct dma_map_ops dma_direct_ops = {
- .alloc = dma_direct_alloc_coherent,
- .free = dma_direct_free_coherent,
- .mmap = dma_direct_mmap_coherent,
- .map_sg = dma_direct_map_sg,
- .unmap_sg = dma_direct_unmap_sg,
- .dma_supported = dma_direct_dma_supported,
- .map_page = dma_direct_map_page,
- .unmap_page = dma_direct_unmap_page,
- .get_required_mask = dma_direct_get_required_mask,
-#ifdef CONFIG_NOT_COHERENT_CACHE
- .sync_single_for_cpu = dma_direct_sync_single,
- .sync_single_for_device = dma_direct_sync_single,
- .sync_sg_for_cpu = dma_direct_sync_sg,
- .sync_sg_for_device = dma_direct_sync_sg,
-#endif
-};
-EXPORT_SYMBOL(dma_direct_ops);
-
-#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
-
-int __dma_set_mask(struct device *dev, u64 dma_mask)
-{
- struct dma_map_ops *dma_ops = get_dma_ops(dev);
-
- if ((dma_ops != NULL) && (dma_ops->set_dma_mask != NULL))
- return dma_ops->set_dma_mask(dev, dma_mask);
- if (!dev->dma_mask || !dma_supported(dev, dma_mask))
- return -EIO;
- *dev->dma_mask = dma_mask;
- return 0;
-}
-int dma_set_mask(struct device *dev, u64 dma_mask)
-{
- if (ppc_md.dma_set_mask)
- return ppc_md.dma_set_mask(dev, dma_mask);
- return __dma_set_mask(dev, dma_mask);
-}
-EXPORT_SYMBOL(dma_set_mask);
-
-u64 dma_get_required_mask(struct device *dev)
-{
- struct dma_map_ops *dma_ops = get_dma_ops(dev);
-
- if (ppc_md.dma_get_required_mask)
- return ppc_md.dma_get_required_mask(dev);
-
- if (unlikely(dma_ops == NULL))
- return 0;
-
- if (dma_ops->get_required_mask)
- return dma_ops->get_required_mask(dev);
-
- return DMA_BIT_MASK(8 * sizeof(dma_addr_t));
-}
-EXPORT_SYMBOL_GPL(dma_get_required_mask);
-
-static int __init dma_init(void)
-{
- dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
-#ifdef CONFIG_PCI
- dma_debug_add_bus(&pci_bus_type);
-#endif
-#ifdef CONFIG_IBMVIO
- dma_debug_add_bus(&vio_bus_type);
-#endif
-
- return 0;
-}
-fs_initcall(dma_init);
-
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
new file mode 100644
index 000000000000..3af6c06af02f
--- /dev/null
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -0,0 +1,1128 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2017, Nicholas Piggin, IBM Corporation
+ */
+
+#define pr_fmt(fmt) "dt-cpu-ftrs: " fmt
+
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/jump_label.h>
+#include <linux/libfdt.h>
+#include <linux/memblock.h>
+#include <linux/of_fdt.h>
+#include <linux/printk.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/threads.h>
+
+#include <asm/cputable.h>
+#include <asm/dt_cpu_ftrs.h>
+#include <asm/mce.h>
+#include <asm/mmu.h>
+#include <asm/setup.h>
+
+
+/* Device-tree visible constants follow */
+#define ISA_V3_0B 3000
+#define ISA_V3_1 3100
+
+#define USABLE_PR (1U << 0)
+#define USABLE_OS (1U << 1)
+#define USABLE_HV (1U << 2)
+
+#define HV_SUPPORT_HFSCR (1U << 0)
+#define OS_SUPPORT_FSCR (1U << 0)
+
+/* For parsing, we define all bits set as "NONE" case */
+#define HV_SUPPORT_NONE 0xffffffffU
+#define OS_SUPPORT_NONE 0xffffffffU
+
+struct dt_cpu_feature {
+ const char *name;
+ uint32_t isa;
+ uint32_t usable_privilege;
+ uint32_t hv_support;
+ uint32_t os_support;
+ uint32_t hfscr_bit_nr;
+ uint32_t fscr_bit_nr;
+ uint32_t hwcap_bit_nr;
+ /* fdt parsing */
+ unsigned long node;
+ int enabled;
+ int disabled;
+};
+
+#define MMU_FTRS_HASH_BASE (MMU_FTRS_POWER8)
+
+#define COMMON_USER_BASE (PPC_FEATURE_32 | PPC_FEATURE_64 | \
+ PPC_FEATURE_ARCH_2_06 |\
+ PPC_FEATURE_ICACHE_SNOOP)
+#define COMMON_USER2_BASE (PPC_FEATURE2_ARCH_2_07 | \
+ PPC_FEATURE2_ISEL)
+/*
+ * Set up the base CPU
+ */
+
+static int hv_mode;
+
+static struct {
+ u64 lpcr;
+ u64 hfscr;
+ u64 fscr;
+ u64 pcr;
+} system_registers;
+
+static void (*init_pmu_registers)(void);
+
+static void __restore_cpu_cpufeatures(void)
+{
+ mtspr(SPRN_LPCR, system_registers.lpcr);
+ if (hv_mode) {
+ mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_AMOR, ~0);
+ mtspr(SPRN_HFSCR, system_registers.hfscr);
+ mtspr(SPRN_PCR, system_registers.pcr);
+ }
+ mtspr(SPRN_FSCR, system_registers.fscr);
+
+ if (init_pmu_registers)
+ init_pmu_registers();
+}
+
+static char dt_cpu_name[64];
+
+static struct cpu_spec __initdata base_cpu_spec = {
+ .cpu_name = NULL,
+ .cpu_features = CPU_FTRS_DT_CPU_BASE,
+ .cpu_user_features = COMMON_USER_BASE,
+ .cpu_user_features2 = COMMON_USER2_BASE,
+ .mmu_features = 0,
+ .icache_bsize = 32, /* minimum block size, fixed by */
+ .dcache_bsize = 32, /* cache info init. */
+ .num_pmcs = 0,
+ .pmc_type = PPC_PMC_DEFAULT,
+ .cpu_setup = NULL,
+ .cpu_restore = __restore_cpu_cpufeatures,
+ .machine_check_early = NULL,
+ .platform = NULL,
+};
+
+static void __init cpufeatures_setup_cpu(void)
+{
+ set_cur_cpu_spec(&base_cpu_spec);
+
+ cur_cpu_spec->pvr_mask = -1;
+ cur_cpu_spec->pvr_value = mfspr(SPRN_PVR);
+
+ /* Initialize the base environment -- clear FSCR/HFSCR. */
+ hv_mode = !!(mfmsr() & MSR_HV);
+ if (hv_mode) {
+ cur_cpu_spec->cpu_features |= CPU_FTR_HVMODE;
+ mtspr(SPRN_HFSCR, 0);
+ }
+ mtspr(SPRN_FSCR, 0);
+ mtspr(SPRN_PCR, PCR_MASK);
+
+ /*
+ * LPCR does not get cleared, to match behaviour with secondaries
+ * in __restore_cpu_cpufeatures. Once the idle code is fixed, this
+ * could clear LPCR too.
+ */
+}
+
+static int __init feat_try_enable_unknown(struct dt_cpu_feature *f)
+{
+ if (f->hv_support == HV_SUPPORT_NONE) {
+ } else if (f->hv_support & HV_SUPPORT_HFSCR) {
+ u64 hfscr = mfspr(SPRN_HFSCR);
+ hfscr |= 1UL << f->hfscr_bit_nr;
+ mtspr(SPRN_HFSCR, hfscr);
+ } else {
+ /* Does not have a known recipe */
+ return 0;
+ }
+
+ if (f->os_support == OS_SUPPORT_NONE) {
+ } else if (f->os_support & OS_SUPPORT_FSCR) {
+ u64 fscr = mfspr(SPRN_FSCR);
+ fscr |= 1UL << f->fscr_bit_nr;
+ mtspr(SPRN_FSCR, fscr);
+ } else {
+ /* Does not have a known recipe */
+ return 0;
+ }
+
+ if ((f->usable_privilege & USABLE_PR) && (f->hwcap_bit_nr != -1)) {
+ uint32_t word = f->hwcap_bit_nr / 32;
+ uint32_t bit = f->hwcap_bit_nr % 32;
+
+ if (word == 0)
+ cur_cpu_spec->cpu_user_features |= 1U << bit;
+ else if (word == 1)
+ cur_cpu_spec->cpu_user_features2 |= 1U << bit;
+ else
+ pr_err("%s could not advertise to user (no hwcap bits)\n", f->name);
+ }
+
+ return 1;
+}
+
+static int __init feat_enable(struct dt_cpu_feature *f)
+{
+ if (f->hv_support != HV_SUPPORT_NONE) {
+ if (f->hfscr_bit_nr != -1) {
+ u64 hfscr = mfspr(SPRN_HFSCR);
+ hfscr |= 1UL << f->hfscr_bit_nr;
+ mtspr(SPRN_HFSCR, hfscr);
+ }
+ }
+
+ if (f->os_support != OS_SUPPORT_NONE) {
+ if (f->fscr_bit_nr != -1) {
+ u64 fscr = mfspr(SPRN_FSCR);
+ fscr |= 1UL << f->fscr_bit_nr;
+ mtspr(SPRN_FSCR, fscr);
+ }
+ }
+
+ if ((f->usable_privilege & USABLE_PR) && (f->hwcap_bit_nr != -1)) {
+ uint32_t word = f->hwcap_bit_nr / 32;
+ uint32_t bit = f->hwcap_bit_nr % 32;
+
+ if (word == 0)
+ cur_cpu_spec->cpu_user_features |= 1U << bit;
+ else if (word == 1)
+ cur_cpu_spec->cpu_user_features2 |= 1U << bit;
+ else
+ pr_err("CPU feature: %s could not advertise to user (no hwcap bits)\n", f->name);
+ }
+
+ return 1;
+}
+
+static int __init feat_disable(struct dt_cpu_feature *f)
+{
+ return 0;
+}
+
+static int __init feat_enable_hv(struct dt_cpu_feature *f)
+{
+ u64 lpcr;
+
+ if (!hv_mode) {
+ pr_err("CPU feature hypervisor present in device tree but HV mode not enabled in the CPU. Ignoring.\n");
+ return 0;
+ }
+
+ mtspr(SPRN_LPID, 0);
+ mtspr(SPRN_AMOR, ~0);
+
+ lpcr = mfspr(SPRN_LPCR);
+ lpcr &= ~LPCR_LPES0; /* HV external interrupts */
+ mtspr(SPRN_LPCR, lpcr);
+
+ cur_cpu_spec->cpu_features |= CPU_FTR_HVMODE;
+
+ return 1;
+}
+
+static int __init feat_enable_le(struct dt_cpu_feature *f)
+{
+ cur_cpu_spec->cpu_user_features |= PPC_FEATURE_TRUE_LE;
+ return 1;
+}
+
+static int __init feat_enable_smt(struct dt_cpu_feature *f)
+{
+ cur_cpu_spec->cpu_features |= CPU_FTR_SMT;
+ cur_cpu_spec->cpu_user_features |= PPC_FEATURE_SMT;
+ return 1;
+}
+
+static int __init feat_enable_idle_nap(struct dt_cpu_feature *f)
+{
+ u64 lpcr;
+
+ /* Set PECE wakeup modes for ISA 207 */
+ lpcr = mfspr(SPRN_LPCR);
+ lpcr |= LPCR_PECE0;
+ lpcr |= LPCR_PECE1;
+ lpcr |= LPCR_PECE2;
+ mtspr(SPRN_LPCR, lpcr);
+
+ return 1;
+}
+
+static int __init feat_enable_idle_stop(struct dt_cpu_feature *f)
+{
+ u64 lpcr;
+
+ /* Set PECE wakeup modes for ISAv3.0B */
+ lpcr = mfspr(SPRN_LPCR);
+ lpcr |= LPCR_PECE0;
+ lpcr |= LPCR_PECE1;
+ lpcr |= LPCR_PECE2;
+ mtspr(SPRN_LPCR, lpcr);
+
+ return 1;
+}
+
+static int __init feat_enable_mmu_hash(struct dt_cpu_feature *f)
+{
+ u64 lpcr;
+
+ if (!IS_ENABLED(CONFIG_PPC_64S_HASH_MMU))
+ return 0;
+
+ lpcr = mfspr(SPRN_LPCR);
+ lpcr &= ~LPCR_ISL;
+
+ /* VRMASD */
+ lpcr |= LPCR_VPM0;
+ lpcr &= ~LPCR_VPM1;
+ lpcr |= 0x10UL << LPCR_VRMASD_SH; /* L=1 LP=00 */
+ mtspr(SPRN_LPCR, lpcr);
+
+ cur_cpu_spec->mmu_features |= MMU_FTRS_HASH_BASE;
+ cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_MMU;
+
+ return 1;
+}
+
+static int __init feat_enable_mmu_hash_v3(struct dt_cpu_feature *f)
+{
+ u64 lpcr;
+
+ if (!IS_ENABLED(CONFIG_PPC_64S_HASH_MMU))
+ return 0;
+
+ lpcr = mfspr(SPRN_LPCR);
+ lpcr &= ~(LPCR_ISL | LPCR_UPRT | LPCR_HR);
+ mtspr(SPRN_LPCR, lpcr);
+
+ cur_cpu_spec->mmu_features |= MMU_FTRS_HASH_BASE;
+ cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_MMU;
+
+ return 1;
+}
+
+
+static int __init feat_enable_mmu_radix(struct dt_cpu_feature *f)
+{
+ if (!IS_ENABLED(CONFIG_PPC_RADIX_MMU))
+ return 0;
+
+ cur_cpu_spec->mmu_features |= MMU_FTR_KERNEL_RO;
+ cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX;
+ cur_cpu_spec->mmu_features |= MMU_FTR_GTSE;
+ cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_MMU;
+
+ return 1;
+}
+
+static int __init feat_enable_dscr(struct dt_cpu_feature *f)
+{
+ u64 lpcr;
+
+ /*
+ * Linux relies on FSCR[DSCR] being clear, so that we can take the
+ * facility unavailable interrupt and track the task's usage of DSCR.
+ * See facility_unavailable_exception().
+ * Clear the bit here so that feat_enable() doesn't set it.
+ */
+ f->fscr_bit_nr = -1;
+
+ feat_enable(f);
+
+ lpcr = mfspr(SPRN_LPCR);
+ lpcr &= ~LPCR_DPFD;
+ lpcr |= (4UL << LPCR_DPFD_SH);
+ mtspr(SPRN_LPCR, lpcr);
+
+ return 1;
+}
+
+static void __init hfscr_pmu_enable(void)
+{
+ u64 hfscr = mfspr(SPRN_HFSCR);
+ hfscr |= PPC_BIT(60);
+ mtspr(SPRN_HFSCR, hfscr);
+}
+
+static void init_pmu_power8(void)
+{
+ if (hv_mode) {
+ mtspr(SPRN_MMCRC, 0);
+ mtspr(SPRN_MMCRH, 0);
+ }
+
+ mtspr(SPRN_MMCRA, 0);
+ mtspr(SPRN_MMCR0, MMCR0_FC);
+ mtspr(SPRN_MMCR1, 0);
+ mtspr(SPRN_MMCR2, 0);
+ mtspr(SPRN_MMCRS, 0);
+}
+
+static int __init feat_enable_mce_power8(struct dt_cpu_feature *f)
+{
+ cur_cpu_spec->platform = "power8";
+ cur_cpu_spec->machine_check_early = __machine_check_early_realmode_p8;
+
+ return 1;
+}
+
+static int __init feat_enable_pmu_power8(struct dt_cpu_feature *f)
+{
+ hfscr_pmu_enable();
+
+ init_pmu_power8();
+ init_pmu_registers = init_pmu_power8;
+
+ cur_cpu_spec->cpu_features |= CPU_FTR_MMCRA;
+ cur_cpu_spec->cpu_user_features |= PPC_FEATURE_PSERIES_PERFMON_COMPAT;
+ if (pvr_version_is(PVR_POWER8E))
+ cur_cpu_spec->cpu_features |= CPU_FTR_PMAO_BUG;
+
+ cur_cpu_spec->num_pmcs = 6;
+ cur_cpu_spec->pmc_type = PPC_PMC_IBM;
+
+ return 1;
+}
+
+static void init_pmu_power9(void)
+{
+ if (hv_mode)
+ mtspr(SPRN_MMCRC, 0);
+
+ mtspr(SPRN_MMCRA, 0);
+ mtspr(SPRN_MMCR0, MMCR0_FC);
+ mtspr(SPRN_MMCR1, 0);
+ mtspr(SPRN_MMCR2, 0);
+}
+
+static int __init feat_enable_mce_power9(struct dt_cpu_feature *f)
+{
+ cur_cpu_spec->platform = "power9";
+ cur_cpu_spec->machine_check_early = __machine_check_early_realmode_p9;
+
+ return 1;
+}
+
+static int __init feat_enable_pmu_power9(struct dt_cpu_feature *f)
+{
+ hfscr_pmu_enable();
+
+ init_pmu_power9();
+ init_pmu_registers = init_pmu_power9;
+
+ cur_cpu_spec->cpu_features |= CPU_FTR_MMCRA;
+ cur_cpu_spec->cpu_user_features |= PPC_FEATURE_PSERIES_PERFMON_COMPAT;
+
+ cur_cpu_spec->num_pmcs = 6;
+ cur_cpu_spec->pmc_type = PPC_PMC_IBM;
+
+ return 1;
+}
+
+static void init_pmu_power10(void)
+{
+ init_pmu_power9();
+
+ mtspr(SPRN_MMCR3, 0);
+ mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
+ mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMCCEXT);
+}
+
+static int __init feat_enable_pmu_power10(struct dt_cpu_feature *f)
+{
+ hfscr_pmu_enable();
+
+ init_pmu_power10();
+ init_pmu_registers = init_pmu_power10;
+
+ cur_cpu_spec->cpu_features |= CPU_FTR_MMCRA;
+ cur_cpu_spec->cpu_user_features |= PPC_FEATURE_PSERIES_PERFMON_COMPAT;
+
+ cur_cpu_spec->num_pmcs = 6;
+ cur_cpu_spec->pmc_type = PPC_PMC_IBM;
+
+ return 1;
+}
+
+static int __init feat_enable_mce_power10(struct dt_cpu_feature *f)
+{
+ cur_cpu_spec->platform = "power10";
+ cur_cpu_spec->machine_check_early = __machine_check_early_realmode_p10;
+
+ return 1;
+}
+
+static int __init feat_enable_mce_power11(struct dt_cpu_feature *f)
+{
+ cur_cpu_spec->platform = "power11";
+ cur_cpu_spec->machine_check_early = __machine_check_early_realmode_p10;
+
+ return 1;
+}
+
+static int __init feat_enable_tm(struct dt_cpu_feature *f)
+{
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ feat_enable(f);
+ cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_HTM_NOSC;
+ return 1;
+#endif
+ return 0;
+}
+
+static int __init feat_enable_fp(struct dt_cpu_feature *f)
+{
+ feat_enable(f);
+ cur_cpu_spec->cpu_features &= ~CPU_FTR_FPU_UNAVAILABLE;
+
+ return 1;
+}
+
+static int __init feat_enable_vector(struct dt_cpu_feature *f)
+{
+#ifdef CONFIG_ALTIVEC
+ feat_enable(f);
+ cur_cpu_spec->cpu_features |= CPU_FTR_ALTIVEC;
+ cur_cpu_spec->cpu_features |= CPU_FTR_VMX_COPY;
+ cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC;
+
+ return 1;
+#endif
+ return 0;
+}
+
+static int __init feat_enable_vsx(struct dt_cpu_feature *f)
+{
+#ifdef CONFIG_VSX
+ feat_enable(f);
+ cur_cpu_spec->cpu_features |= CPU_FTR_VSX;
+ cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_VSX;
+
+ return 1;
+#endif
+ return 0;
+}
+
+static int __init feat_enable_purr(struct dt_cpu_feature *f)
+{
+ cur_cpu_spec->cpu_features |= CPU_FTR_PURR | CPU_FTR_SPURR;
+
+ return 1;
+}
+
+static int __init feat_enable_ebb(struct dt_cpu_feature *f)
+{
+ /*
+ * PPC_FEATURE2_EBB is enabled in PMU init code because it has
+ * historically been related to the PMU facility. This may have
+ * to be decoupled if EBB becomes more generic. For now, follow
+ * existing convention.
+ */
+ f->hwcap_bit_nr = -1;
+ feat_enable(f);
+
+ return 1;
+}
+
+static int __init feat_enable_dbell(struct dt_cpu_feature *f)
+{
+ u64 lpcr;
+
+ /* P9 has an HFSCR for privileged state */
+ feat_enable(f);
+
+ cur_cpu_spec->cpu_features |= CPU_FTR_DBELL;
+
+ lpcr = mfspr(SPRN_LPCR);
+ lpcr |= LPCR_PECEDH; /* hyp doorbell wakeup */
+ mtspr(SPRN_LPCR, lpcr);
+
+ return 1;
+}
+
+static int __init feat_enable_hvi(struct dt_cpu_feature *f)
+{
+ u64 lpcr;
+
+ /*
+ * POWER9 XIVE interrupts including in OPAL XICS compatibility
+ * are always delivered as hypervisor virtualization interrupts (HVI)
+ * rather than EE.
+ *
+ * However LPES0 is not set here, in the chance that an EE does get
+ * delivered to the host somehow, the EE handler would not expect it
+ * to be delivered in LPES0 mode (e.g., using SRR[01]). This could
+ * happen if there is a bug in interrupt controller code, or IC is
+ * misconfigured in systemsim.
+ */
+
+ lpcr = mfspr(SPRN_LPCR);
+ lpcr |= LPCR_HVICE; /* enable hvi interrupts */
+ lpcr |= LPCR_HEIC; /* disable ee interrupts when MSR_HV */
+ lpcr |= LPCR_PECE_HVEE; /* hvi can wake from stop */
+ mtspr(SPRN_LPCR, lpcr);
+
+ return 1;
+}
+
+static int __init feat_enable_large_ci(struct dt_cpu_feature *f)
+{
+ cur_cpu_spec->mmu_features |= MMU_FTR_CI_LARGE_PAGE;
+
+ return 1;
+}
+
+static int __init feat_enable_mma(struct dt_cpu_feature *f)
+{
+ u64 pcr;
+
+ feat_enable(f);
+ pcr = mfspr(SPRN_PCR);
+ pcr &= ~PCR_MMA_DIS;
+ mtspr(SPRN_PCR, pcr);
+
+ return 1;
+}
+
+struct dt_cpu_feature_match {
+ const char *name;
+ int (*enable)(struct dt_cpu_feature *f);
+ u64 cpu_ftr_bit_mask;
+};
+
+static struct dt_cpu_feature_match __initdata
+ dt_cpu_feature_match_table[] = {
+ {"hypervisor", feat_enable_hv, 0},
+ {"big-endian", feat_enable, 0},
+ {"little-endian", feat_enable_le, CPU_FTR_REAL_LE},
+ {"smt", feat_enable_smt, 0},
+ {"interrupt-facilities", feat_enable, 0},
+ {"system-call-vectored", feat_enable, 0},
+ {"timer-facilities", feat_enable, 0},
+ {"timer-facilities-v3", feat_enable, 0},
+ {"debug-facilities", feat_enable, 0},
+ {"come-from-address-register", feat_enable, CPU_FTR_CFAR},
+ {"branch-tracing", feat_enable, 0},
+ {"floating-point", feat_enable_fp, 0},
+ {"vector", feat_enable_vector, 0},
+ {"vector-scalar", feat_enable_vsx, 0},
+ {"vector-scalar-v3", feat_enable, 0},
+ {"decimal-floating-point", feat_enable, 0},
+ {"decimal-integer", feat_enable, 0},
+ {"quadword-load-store", feat_enable, 0},
+ {"vector-crypto", feat_enable, 0},
+ {"mmu-hash", feat_enable_mmu_hash, 0},
+ {"mmu-radix", feat_enable_mmu_radix, 0},
+ {"mmu-hash-v3", feat_enable_mmu_hash_v3, 0},
+ {"virtual-page-class-key-protection", feat_enable, 0},
+ {"transactional-memory", feat_enable_tm, CPU_FTR_TM},
+ {"transactional-memory-v3", feat_enable_tm, 0},
+ {"tm-suspend-hypervisor-assist", feat_enable, CPU_FTR_P9_TM_HV_ASSIST},
+ {"tm-suspend-xer-so-bug", feat_enable, CPU_FTR_P9_TM_XER_SO_BUG},
+ {"idle-nap", feat_enable_idle_nap, 0},
+ /* alignment-interrupt-dsisr ignored */
+ {"idle-stop", feat_enable_idle_stop, 0},
+ {"machine-check-power8", feat_enable_mce_power8, 0},
+ {"performance-monitor-power8", feat_enable_pmu_power8, 0},
+ {"data-stream-control-register", feat_enable_dscr, CPU_FTR_DSCR},
+ {"event-based-branch", feat_enable_ebb, 0},
+ {"target-address-register", feat_enable, 0},
+ {"branch-history-rolling-buffer", feat_enable, 0},
+ {"control-register", feat_enable, CPU_FTR_CTRL},
+ {"processor-control-facility", feat_enable_dbell, CPU_FTR_DBELL},
+ {"processor-control-facility-v3", feat_enable_dbell, CPU_FTR_DBELL},
+ {"processor-utilization-of-resources-register", feat_enable_purr, 0},
+ {"no-execute", feat_enable, 0},
+ {"strong-access-ordering", feat_enable, CPU_FTR_SAO},
+ {"cache-inhibited-large-page", feat_enable_large_ci, 0},
+ {"coprocessor-icswx", feat_enable, 0},
+ {"hypervisor-virtualization-interrupt", feat_enable_hvi, 0},
+ {"program-priority-register", feat_enable, CPU_FTR_HAS_PPR},
+ {"wait", feat_enable, 0},
+ {"atomic-memory-operations", feat_enable, 0},
+ {"branch-v3", feat_enable, 0},
+ {"copy-paste", feat_enable, 0},
+ {"decimal-floating-point-v3", feat_enable, 0},
+ {"decimal-integer-v3", feat_enable, 0},
+ {"fixed-point-v3", feat_enable, 0},
+ {"floating-point-v3", feat_enable, 0},
+ {"group-start-register", feat_enable, 0},
+ {"pc-relative-addressing", feat_enable, 0},
+ {"machine-check-power9", feat_enable_mce_power9, 0},
+ {"machine-check-power10", feat_enable_mce_power10, 0},
+ {"machine-check-power11", feat_enable_mce_power11, 0},
+ {"performance-monitor-power9", feat_enable_pmu_power9, 0},
+ {"performance-monitor-power10", feat_enable_pmu_power10, 0},
+ {"performance-monitor-power11", feat_enable_pmu_power10, 0},
+ {"event-based-branch-v3", feat_enable, 0},
+ {"random-number-generator", feat_enable, 0},
+ {"system-call-vectored", feat_disable, 0},
+ {"trace-interrupt-v3", feat_enable, 0},
+ {"vector-v3", feat_enable, 0},
+ {"vector-binary128", feat_enable, 0},
+ {"vector-binary16", feat_enable, 0},
+ {"wait-v3", feat_enable, 0},
+ {"prefix-instructions", feat_enable, 0},
+ {"matrix-multiply-assist", feat_enable_mma, 0},
+ {"debug-facilities-v31", feat_enable, CPU_FTR_DAWR1},
+};
+
+static bool __initdata using_dt_cpu_ftrs;
+static bool __initdata enable_unknown = true;
+
+static int __init dt_cpu_ftrs_parse(char *str)
+{
+ if (!str)
+ return 0;
+
+ if (!strcmp(str, "off"))
+ using_dt_cpu_ftrs = false;
+ else if (!strcmp(str, "known"))
+ enable_unknown = false;
+ else
+ return 1;
+
+ return 0;
+}
+early_param("dt_cpu_ftrs", dt_cpu_ftrs_parse);
+
+static void __init cpufeatures_setup_start(u32 isa)
+{
+ pr_info("setup for ISA %d\n", isa);
+
+ if (isa >= ISA_V3_0B) {
+ cur_cpu_spec->cpu_features |= CPU_FTR_ARCH_300;
+ cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_ARCH_3_00;
+ }
+
+ if (isa >= ISA_V3_1) {
+ cur_cpu_spec->cpu_features |= CPU_FTR_ARCH_31;
+ cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_ARCH_3_1;
+ }
+}
+
+static bool __init cpufeatures_process_feature(struct dt_cpu_feature *f)
+{
+ const struct dt_cpu_feature_match *m;
+ bool known = false;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(dt_cpu_feature_match_table); i++) {
+ m = &dt_cpu_feature_match_table[i];
+ if (!strcmp(f->name, m->name)) {
+ known = true;
+ if (m->enable(f)) {
+ cur_cpu_spec->cpu_features |= m->cpu_ftr_bit_mask;
+ break;
+ }
+
+ pr_info("not enabling: %s (disabled or unsupported by kernel)\n",
+ f->name);
+ return false;
+ }
+ }
+
+ if (!known && (!enable_unknown || !feat_try_enable_unknown(f))) {
+ pr_info("not enabling: %s (unknown and unsupported by kernel)\n",
+ f->name);
+ return false;
+ }
+
+ if (known)
+ pr_debug("enabling: %s\n", f->name);
+ else
+ pr_debug("enabling: %s (unknown)\n", f->name);
+
+ return true;
+}
+
+/*
+ * Handle POWER9 broadcast tlbie invalidation issue using
+ * cpu feature flag.
+ */
+static __init void update_tlbie_feature_flag(unsigned long pvr)
+{
+ if (PVR_VER(pvr) == PVR_POWER9) {
+ /*
+ * Set the tlbie feature flag for anything below
+ * Nimbus DD 2.3 and Cumulus DD 1.3
+ */
+ if ((pvr & 0xe000) == 0) {
+ /* Nimbus */
+ if ((pvr & 0xfff) < 0x203)
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG;
+ } else if ((pvr & 0xc000) == 0) {
+ /* Cumulus */
+ if ((pvr & 0xfff) < 0x103)
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG;
+ } else {
+ WARN_ONCE(1, "Unknown PVR");
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG;
+ }
+
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_ERAT_BUG;
+ }
+}
+
+static __init void cpufeatures_cpu_quirks(void)
+{
+ unsigned long version = mfspr(SPRN_PVR);
+
+ /*
+ * Not all quirks can be derived from the cpufeatures device tree.
+ */
+ if ((version & 0xffffefff) == 0x004e0200) {
+ /* DD2.0 has no feature flag */
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_RADIX_PREFETCH_BUG;
+ cur_cpu_spec->cpu_features &= ~(CPU_FTR_DAWR);
+ } else if ((version & 0xffffefff) == 0x004e0201) {
+ cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_RADIX_PREFETCH_BUG;
+ cur_cpu_spec->cpu_features &= ~(CPU_FTR_DAWR);
+ } else if ((version & 0xffffefff) == 0x004e0202) {
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_HV_ASSIST;
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_XER_SO_BUG;
+ cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
+ cur_cpu_spec->cpu_features &= ~(CPU_FTR_DAWR);
+ } else if ((version & 0xffffefff) == 0x004e0203) {
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_HV_ASSIST;
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_XER_SO_BUG;
+ cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
+ } else if ((version & 0xffff0000) == 0x004e0000) {
+ /* DD2.1 and up have DD2_1 */
+ cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
+ }
+
+ if ((version & 0xffff0000) == 0x004e0000) {
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TIDR;
+ }
+
+ update_tlbie_feature_flag(version);
+}
+
+static void __init cpufeatures_setup_finished(void)
+{
+ cpufeatures_cpu_quirks();
+
+ if (hv_mode && !(cur_cpu_spec->cpu_features & CPU_FTR_HVMODE)) {
+ pr_err("hypervisor not present in device tree but HV mode is enabled in the CPU. Enabling.\n");
+ cur_cpu_spec->cpu_features |= CPU_FTR_HVMODE;
+ }
+
+ /* Make sure powerpc_base_platform is non-NULL */
+ powerpc_base_platform = cur_cpu_spec->platform;
+
+ system_registers.lpcr = mfspr(SPRN_LPCR);
+ system_registers.hfscr = mfspr(SPRN_HFSCR);
+ system_registers.fscr = mfspr(SPRN_FSCR);
+ system_registers.pcr = mfspr(SPRN_PCR);
+
+ pr_info("final cpu/mmu features = 0x%016lx 0x%08x\n",
+ cur_cpu_spec->cpu_features, cur_cpu_spec->mmu_features);
+}
+
+static int __init disabled_on_cmdline(void)
+{
+ unsigned long root, chosen;
+ const char *p;
+
+ root = of_get_flat_dt_root();
+ chosen = of_get_flat_dt_subnode_by_name(root, "chosen");
+ if (chosen == -FDT_ERR_NOTFOUND)
+ return false;
+
+ p = of_get_flat_dt_prop(chosen, "bootargs", NULL);
+ if (!p)
+ return false;
+
+ if (strstr(p, "dt_cpu_ftrs=off"))
+ return true;
+
+ return false;
+}
+
+static int __init fdt_find_cpu_features(unsigned long node, const char *uname,
+ int depth, void *data)
+{
+ if (of_flat_dt_is_compatible(node, "ibm,powerpc-cpu-features")
+ && of_get_flat_dt_prop(node, "isa", NULL))
+ return 1;
+
+ return 0;
+}
+
+bool __init dt_cpu_ftrs_in_use(void)
+{
+ return using_dt_cpu_ftrs;
+}
+
+bool __init dt_cpu_ftrs_init(void *fdt)
+{
+ using_dt_cpu_ftrs = false;
+
+ /* Setup and verify the FDT, if it fails we just bail */
+ if (!early_init_dt_verify(fdt, __pa(fdt)))
+ return false;
+
+ if (!of_scan_flat_dt(fdt_find_cpu_features, NULL))
+ return false;
+
+ if (disabled_on_cmdline())
+ return false;
+
+ cpufeatures_setup_cpu();
+
+ using_dt_cpu_ftrs = true;
+ return true;
+}
+
+static int nr_dt_cpu_features;
+static struct dt_cpu_feature *dt_cpu_features;
+
+static int __init process_cpufeatures_node(unsigned long node,
+ const char *uname, int i)
+{
+ const __be32 *prop;
+ struct dt_cpu_feature *f;
+ int len;
+
+ f = &dt_cpu_features[i];
+
+ f->node = node;
+
+ f->name = uname;
+
+ prop = of_get_flat_dt_prop(node, "isa", &len);
+ if (!prop) {
+ pr_warn("%s: missing isa property\n", uname);
+ return 0;
+ }
+ f->isa = be32_to_cpup(prop);
+
+ prop = of_get_flat_dt_prop(node, "usable-privilege", &len);
+ if (!prop) {
+ pr_warn("%s: missing usable-privilege property", uname);
+ return 0;
+ }
+ f->usable_privilege = be32_to_cpup(prop);
+
+ prop = of_get_flat_dt_prop(node, "hv-support", &len);
+ if (prop)
+ f->hv_support = be32_to_cpup(prop);
+ else
+ f->hv_support = HV_SUPPORT_NONE;
+
+ prop = of_get_flat_dt_prop(node, "os-support", &len);
+ if (prop)
+ f->os_support = be32_to_cpup(prop);
+ else
+ f->os_support = OS_SUPPORT_NONE;
+
+ prop = of_get_flat_dt_prop(node, "hfscr-bit-nr", &len);
+ if (prop)
+ f->hfscr_bit_nr = be32_to_cpup(prop);
+ else
+ f->hfscr_bit_nr = -1;
+ prop = of_get_flat_dt_prop(node, "fscr-bit-nr", &len);
+ if (prop)
+ f->fscr_bit_nr = be32_to_cpup(prop);
+ else
+ f->fscr_bit_nr = -1;
+ prop = of_get_flat_dt_prop(node, "hwcap-bit-nr", &len);
+ if (prop)
+ f->hwcap_bit_nr = be32_to_cpup(prop);
+ else
+ f->hwcap_bit_nr = -1;
+
+ if (f->usable_privilege & USABLE_HV) {
+ if (!(mfmsr() & MSR_HV)) {
+ pr_warn("%s: HV feature passed to guest\n", uname);
+ return 0;
+ }
+
+ if (f->hv_support == HV_SUPPORT_NONE && f->hfscr_bit_nr != -1) {
+ pr_warn("%s: unwanted hfscr_bit_nr\n", uname);
+ return 0;
+ }
+
+ if (f->hv_support == HV_SUPPORT_HFSCR) {
+ if (f->hfscr_bit_nr == -1) {
+ pr_warn("%s: missing hfscr_bit_nr\n", uname);
+ return 0;
+ }
+ }
+ } else {
+ if (f->hv_support != HV_SUPPORT_NONE || f->hfscr_bit_nr != -1) {
+ pr_warn("%s: unwanted hv_support/hfscr_bit_nr\n", uname);
+ return 0;
+ }
+ }
+
+ if (f->usable_privilege & USABLE_OS) {
+ if (f->os_support == OS_SUPPORT_NONE && f->fscr_bit_nr != -1) {
+ pr_warn("%s: unwanted fscr_bit_nr\n", uname);
+ return 0;
+ }
+
+ if (f->os_support == OS_SUPPORT_FSCR) {
+ if (f->fscr_bit_nr == -1) {
+ pr_warn("%s: missing fscr_bit_nr\n", uname);
+ return 0;
+ }
+ }
+ } else {
+ if (f->os_support != OS_SUPPORT_NONE || f->fscr_bit_nr != -1) {
+ pr_warn("%s: unwanted os_support/fscr_bit_nr\n", uname);
+ return 0;
+ }
+ }
+
+ if (!(f->usable_privilege & USABLE_PR)) {
+ if (f->hwcap_bit_nr != -1) {
+ pr_warn("%s: unwanted hwcap_bit_nr\n", uname);
+ return 0;
+ }
+ }
+
+ /* Do all the independent features in the first pass */
+ if (!of_get_flat_dt_prop(node, "dependencies", &len)) {
+ if (cpufeatures_process_feature(f))
+ f->enabled = 1;
+ else
+ f->disabled = 1;
+ }
+
+ return 0;
+}
+
+static void __init cpufeatures_deps_enable(struct dt_cpu_feature *f)
+{
+ const __be32 *prop;
+ int len;
+ int nr_deps;
+ int i;
+
+ if (f->enabled || f->disabled)
+ return;
+
+ prop = of_get_flat_dt_prop(f->node, "dependencies", &len);
+ if (!prop) {
+ pr_warn("%s: missing dependencies property", f->name);
+ return;
+ }
+
+ nr_deps = len / sizeof(int);
+
+ for (i = 0; i < nr_deps; i++) {
+ unsigned long phandle = be32_to_cpu(prop[i]);
+ int j;
+
+ for (j = 0; j < nr_dt_cpu_features; j++) {
+ struct dt_cpu_feature *d = &dt_cpu_features[j];
+
+ if (of_get_flat_dt_phandle(d->node) == phandle) {
+ cpufeatures_deps_enable(d);
+ if (d->disabled) {
+ f->disabled = 1;
+ return;
+ }
+ }
+ }
+ }
+
+ if (cpufeatures_process_feature(f))
+ f->enabled = 1;
+ else
+ f->disabled = 1;
+}
+
+static int __init scan_cpufeatures_subnodes(unsigned long node,
+ const char *uname,
+ void *data)
+{
+ int *count = data;
+
+ process_cpufeatures_node(node, uname, *count);
+
+ (*count)++;
+
+ return 0;
+}
+
+static int __init count_cpufeatures_subnodes(unsigned long node,
+ const char *uname,
+ void *data)
+{
+ int *count = data;
+
+ (*count)++;
+
+ return 0;
+}
+
+static int __init dt_cpu_ftrs_scan_callback(unsigned long node, const char
+ *uname, int depth, void *data)
+{
+ const __be32 *prop;
+ int count, i;
+ u32 isa;
+
+ /* We are scanning "ibm,powerpc-cpu-features" nodes only */
+ if (!of_flat_dt_is_compatible(node, "ibm,powerpc-cpu-features"))
+ return 0;
+
+ prop = of_get_flat_dt_prop(node, "isa", NULL);
+ if (!prop)
+ /* We checked before, "can't happen" */
+ return 0;
+
+ isa = be32_to_cpup(prop);
+
+ /* Count and allocate space for cpu features */
+ of_scan_flat_dt_subnodes(node, count_cpufeatures_subnodes,
+ &nr_dt_cpu_features);
+ dt_cpu_features =
+ memblock_alloc_or_panic(
+ sizeof(struct dt_cpu_feature) * nr_dt_cpu_features,
+ PAGE_SIZE);
+
+ cpufeatures_setup_start(isa);
+
+ /* Scan nodes into dt_cpu_features and enable those without deps */
+ count = 0;
+ of_scan_flat_dt_subnodes(node, scan_cpufeatures_subnodes, &count);
+
+ /* Recursive enable remaining features with dependencies */
+ for (i = 0; i < nr_dt_cpu_features; i++) {
+ struct dt_cpu_feature *f = &dt_cpu_features[i];
+
+ cpufeatures_deps_enable(f);
+ }
+
+ prop = of_get_flat_dt_prop(node, "display-name", NULL);
+ if (prop && strlen((char *)prop) != 0) {
+ strscpy(dt_cpu_name, (char *)prop, sizeof(dt_cpu_name));
+ cur_cpu_spec->cpu_name = dt_cpu_name;
+ }
+
+ cpufeatures_setup_finished();
+
+ memblock_free(dt_cpu_features,
+ sizeof(struct dt_cpu_feature) * nr_dt_cpu_features);
+
+ return 0;
+}
+
+void __init dt_cpu_ftrs_scan(void)
+{
+ if (!using_dt_cpu_ftrs)
+ return;
+
+ of_scan_flat_dt(dt_cpu_ftrs_scan_callback, NULL);
+}
diff --git a/arch/powerpc/kernel/early_32.c b/arch/powerpc/kernel/early_32.c
new file mode 100644
index 000000000000..03f1135ef64f
--- /dev/null
+++ b/arch/powerpc/kernel/early_32.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Early init before relocation
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <asm/setup.h>
+#include <asm/sections.h>
+
+/*
+ * We're called here very early in the boot.
+ *
+ * Note that the kernel may be running at an address which is different
+ * from the address that it was linked at, so we must use RELOC/PTRRELOC
+ * to access static data (including strings). -- paulus
+ */
+notrace unsigned long __init early_init(unsigned long dt_ptr)
+{
+ unsigned long kva, offset = reloc_offset();
+
+ kva = *PTRRELOC(&kernstart_virt_addr);
+
+ /* First zero the BSS */
+ if (kva == KERNELBASE)
+ memset(PTRRELOC(&__bss_start), 0, __bss_stop - __bss_start);
+
+ /*
+ * Identify the CPU type and fix up code sections
+ * that depend on which cpu we have.
+ */
+ identify_cpu(offset, mfspr(SPRN_PVR));
+
+ apply_feature_fixups();
+
+ return kva + offset;
+}
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 59a64f8dc85f..bb836f02101c 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1,28 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright IBM Corporation 2001, 2005, 2006
* Copyright Dave Engebretsen & Todd Inglett 2001
* Copyright Linas Vepstas 2005, 2006
* Copyright 2001-2012 IBM Corporation.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
* Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com>
*/
#include <linux/delay.h>
-#include <linux/debugfs.h>
#include <linux/sched.h>
#include <linux/init.h>
#include <linux/list.h>
@@ -35,9 +21,9 @@
#include <linux/spinlock.h>
#include <linux/export.h>
#include <linux/of.h>
+#include <linux/debugfs.h>
#include <linux/atomic.h>
-#include <asm/debug.h>
#include <asm/eeh.h>
#include <asm/eeh_event.h>
#include <asm/io.h>
@@ -45,10 +31,11 @@
#include <asm/machdep.h>
#include <asm/ppc-pci.h>
#include <asm/rtas.h>
+#include <asm/pte-walk.h>
/** Overview:
- * EEH, or "Extended Error Handling" is a PCI bridge technology for
+ * EEH, or "Enhanced Error Handling" is a PCI bridge technology for
* dealing with PCI bus errors that can't be dealt with within the
* usual PCI framework, except by check-stopping the CPU. Systems
* that are designed for high-availability/reliability cannot afford
@@ -104,11 +91,26 @@
int eeh_subsystem_flags;
EXPORT_SYMBOL(eeh_subsystem_flags);
+/*
+ * EEH allowed maximal frozen times. If one particular PE's
+ * frozen count in last hour exceeds this limit, the PE will
+ * be forced to be offline permanently.
+ */
+u32 eeh_max_freezes = 5;
+
+/*
+ * Controls whether a recovery event should be scheduled when an
+ * isolated device is discovered. This is only really useful for
+ * debugging problems with the EEH core.
+ */
+bool eeh_debugfs_no_recover;
+
/* Platform dependent EEH operations */
struct eeh_ops *eeh_ops = NULL;
/* Lock to avoid races due to multiple reports of an error */
DEFINE_RAW_SPINLOCK(confirm_error_lock);
+EXPORT_SYMBOL_GPL(confirm_error_lock);
/* Lock to protect passed flags */
static DEFINE_MUTEX(eeh_dev_mutex);
@@ -117,7 +119,7 @@ static DEFINE_MUTEX(eeh_dev_mutex);
* not dynamically alloced, so that it ends up in RMO where RTAS
* can access it.
*/
-#define EEH_PCI_REGS_LOG_LEN 4096
+#define EEH_PCI_REGS_LOG_LEN 8192
static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN];
/*
@@ -137,52 +139,61 @@ struct eeh_stats {
static struct eeh_stats eeh_stats;
-#define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE)
-
static int __init eeh_setup(char *str)
{
if (!strcmp(str, "off"))
eeh_add_flag(EEH_FORCE_DISABLED);
+ else if (!strcmp(str, "early_log"))
+ eeh_add_flag(EEH_EARLY_DUMP_LOG);
return 1;
}
__setup("eeh=", eeh_setup);
-/**
- * eeh_gather_pci_data - Copy assorted PCI config space registers to buff
- * @edev: device to report data for
- * @buf: point to buffer in which to log
- * @len: amount of room in buffer
- *
- * This routine captures assorted PCI configuration space data,
- * and puts them into a buffer for RTAS error logging.
+void eeh_show_enabled(void)
+{
+ if (eeh_has_flag(EEH_FORCE_DISABLED))
+ pr_info("EEH: Recovery disabled by kernel parameter.\n");
+ else if (eeh_has_flag(EEH_ENABLED))
+ pr_info("EEH: Capable adapter found: recovery enabled.\n");
+ else
+ pr_info("EEH: No capable adapters found: recovery disabled.\n");
+}
+
+/*
+ * This routine captures assorted PCI configuration space data
+ * for the indicated PCI device, and puts them into a buffer
+ * for RTAS error logging.
*/
-static size_t eeh_gather_pci_data(struct eeh_dev *edev, char *buf, size_t len)
+static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len)
{
- struct device_node *dn = eeh_dev_to_of_node(edev);
u32 cfg;
int cap, i;
int n = 0, l = 0;
char buffer[128];
- n += scnprintf(buf+n, len-n, "%s\n", dn->full_name);
- pr_warn("EEH: of node=%s\n", dn->full_name);
+ n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n",
+ edev->pe->phb->global_number, edev->bdfn >> 8,
+ PCI_SLOT(edev->bdfn), PCI_FUNC(edev->bdfn));
+ pr_warn("EEH: of node=%04x:%02x:%02x.%01x\n",
+ edev->pe->phb->global_number, edev->bdfn >> 8,
+ PCI_SLOT(edev->bdfn), PCI_FUNC(edev->bdfn));
- eeh_ops->read_config(dn, PCI_VENDOR_ID, 4, &cfg);
+ eeh_ops->read_config(edev, PCI_VENDOR_ID, 4, &cfg);
n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg);
pr_warn("EEH: PCI device/vendor: %08x\n", cfg);
- eeh_ops->read_config(dn, PCI_COMMAND, 4, &cfg);
+ eeh_ops->read_config(edev, PCI_COMMAND, 4, &cfg);
n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg);
pr_warn("EEH: PCI cmd/status register: %08x\n", cfg);
/* Gather bridge-specific registers */
if (edev->mode & EEH_DEV_BRIDGE) {
- eeh_ops->read_config(dn, PCI_SEC_STATUS, 2, &cfg);
+ eeh_ops->read_config(edev, PCI_SEC_STATUS, 2, &cfg);
n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg);
pr_warn("EEH: Bridge secondary status: %04x\n", cfg);
- eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &cfg);
+ eeh_ops->read_config(edev, PCI_BRIDGE_CONTROL, 2, &cfg);
n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg);
pr_warn("EEH: Bridge control: %04x\n", cfg);
}
@@ -190,11 +201,11 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char *buf, size_t len)
/* Dump out the PCI-X command and status regs */
cap = edev->pcix_cap;
if (cap) {
- eeh_ops->read_config(dn, cap, 4, &cfg);
+ eeh_ops->read_config(edev, cap, 4, &cfg);
n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg);
pr_warn("EEH: PCI-X cmd: %08x\n", cfg);
- eeh_ops->read_config(dn, cap+4, 4, &cfg);
+ eeh_ops->read_config(edev, cap+4, 4, &cfg);
n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg);
pr_warn("EEH: PCI-X status: %08x\n", cfg);
}
@@ -206,7 +217,7 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char *buf, size_t len)
pr_warn("EEH: PCI-E capabilities and status follow:\n");
for (i=0; i<=8; i++) {
- eeh_ops->read_config(dn, cap+4*i, 4, &cfg);
+ eeh_ops->read_config(edev, cap+4*i, 4, &cfg);
n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
if ((i % 4) == 0) {
@@ -233,7 +244,7 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char *buf, size_t len)
pr_warn("EEH: PCI-E AER capability register set follows:\n");
for (i=0; i<=13; i++) {
- eeh_ops->read_config(dn, cap+4*i, 4, &cfg);
+ eeh_ops->read_config(edev, cap+4*i, 4, &cfg);
n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
if ((i % 4) == 0) {
@@ -255,6 +266,18 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char *buf, size_t len)
return n;
}
+static void *eeh_dump_pe_log(struct eeh_pe *pe, void *flag)
+{
+ struct eeh_dev *edev, *tmp;
+ size_t *plen = flag;
+
+ eeh_pe_for_each_dev(pe, edev, tmp)
+ *plen += eeh_dump_dev_log(edev, pci_regs_buf + *plen,
+ EEH_PCI_REGS_LOG_LEN - *plen);
+
+ return NULL;
+}
+
/**
* eeh_slot_error_detail - Generate combined log including driver log and error log
* @pe: EEH PE
@@ -268,7 +291,6 @@ static size_t eeh_gather_pci_data(struct eeh_dev *edev, char *buf, size_t len)
void eeh_slot_error_detail(struct eeh_pe *pe, int severity)
{
size_t loglen = 0;
- struct eeh_dev *edev, *tmp;
/*
* When the PHB is fenced or dead, it's pointless to collect
@@ -278,17 +300,37 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity)
*
* For pHyp, we have to enable IO for log retrieval. Otherwise,
* 0xFF's is always returned from PCI config space.
+ *
+ * When the @severity is EEH_LOG_PERM, the PE is going to be
+ * removed. Prior to that, the drivers for devices included in
+ * the PE will be closed. The drivers rely on working IO path
+ * to bring the devices to quiet state. Otherwise, PCI traffic
+ * from those devices after they are removed is like to cause
+ * another unexpected EEH error.
*/
if (!(pe->type & EEH_PE_PHB)) {
- if (eeh_has_flag(EEH_ENABLE_IO_FOR_LOG))
+ if (eeh_has_flag(EEH_ENABLE_IO_FOR_LOG) ||
+ severity == EEH_LOG_PERM)
eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
+
+ /*
+ * The config space of some PCI devices can't be accessed
+ * when their PEs are in frozen state. Otherwise, fenced
+ * PHB might be seen. Those PEs are identified with flag
+ * EEH_PE_CFG_RESTRICTED, indicating EEH_PE_CFG_BLOCKED
+ * is set automatically when the PE is put to EEH_PE_ISOLATED.
+ *
+ * Restoring BARs possibly triggers PCI config access in
+ * (OPAL) firmware and then causes fenced PHB. If the
+ * PCI config is blocked with flag EEH_PE_CFG_BLOCKED, it's
+ * pointless to restore BARs and dump config space.
+ */
eeh_ops->configure_bridge(pe);
- eeh_pe_restore_bars(pe);
+ if (!(pe->state & EEH_PE_CFG_BLOCKED)) {
+ eeh_pe_restore_bars(pe);
- pci_regs_buf[0] = 0;
- eeh_pe_for_each_dev(pe, edev, tmp) {
- loglen += eeh_gather_pci_data(edev, pci_regs_buf + loglen,
- EEH_PCI_REGS_LOG_LEN - loglen);
+ pci_regs_buf[0] = 0;
+ eeh_pe_traverse(pe, eeh_dump_pe_log, &loglen);
}
}
@@ -304,20 +346,7 @@ void eeh_slot_error_detail(struct eeh_pe *pe, int severity)
*/
static inline unsigned long eeh_token_to_phys(unsigned long token)
{
- pte_t *ptep;
- unsigned long pa;
- int hugepage_shift;
-
- /*
- * We won't find hugepages here, iomem
- */
- ptep = find_linux_pte_or_hugepte(init_mm.pgd, token, &hugepage_shift);
- if (!ptep)
- return token;
- WARN_ON(hugepage_shift);
- pa = pte_pfn(*ptep) << PAGE_SHIFT;
-
- return pa | (token & (PAGE_SIZE-1));
+ return ppc_find_vmap_phys(token);
}
/*
@@ -337,7 +366,7 @@ static int eeh_phb_check_failure(struct eeh_pe *pe)
/* Find the PHB PE */
phb_pe = eeh_phb_pe_get(pe->phb);
if (!phb_pe) {
- pr_warn("%s Can't find PE for PHB#%d\n",
+ pr_warn("%s Can't find PE for PHB#%x\n",
__func__, pe->phb->global_number);
return -EEXIST;
}
@@ -352,28 +381,32 @@ static int eeh_phb_check_failure(struct eeh_pe *pe)
/* Check PHB state */
ret = eeh_ops->get_state(phb_pe, NULL);
if ((ret < 0) ||
- (ret == EEH_STATE_NOT_SUPPORT) ||
- (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) ==
- (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) {
+ (ret == EEH_STATE_NOT_SUPPORT) || eeh_state_active(ret)) {
ret = 0;
goto out;
}
/* Isolate the PHB and send event */
- eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED);
+ eeh_pe_mark_isolated(phb_pe);
eeh_serialize_unlock(flags);
- pr_err("EEH: PHB#%x failure detected, location: %s\n",
+ pr_debug("EEH: PHB#%x failure detected, location: %s\n",
phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe));
- dump_stack();
eeh_send_failure_event(phb_pe);
-
return 1;
out:
eeh_serialize_unlock(flags);
return ret;
}
+static inline const char *eeh_driver_name(struct pci_dev *pdev)
+{
+ if (pdev)
+ return dev_driver_string(&pdev->dev);
+
+ return "<null>";
+}
+
/**
* eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze
* @edev: eeh device
@@ -391,13 +424,12 @@ out:
int eeh_dev_check_failure(struct eeh_dev *edev)
{
int ret;
- int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
unsigned long flags;
struct device_node *dn;
struct pci_dev *dev;
- struct eeh_pe *pe, *parent_pe, *phb_pe;
+ struct eeh_pe *pe, *parent_pe;
int rc = 0;
- const char *location;
+ const char *location = NULL;
eeh_stats.total_mmio_ffs++;
@@ -408,20 +440,13 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
eeh_stats.no_dn++;
return 0;
}
- dn = eeh_dev_to_of_node(edev);
dev = eeh_dev_to_pci_dev(edev);
- pe = edev->pe;
+ pe = eeh_dev_to_pe(edev);
/* Access to IO BARs might get this far and still not want checking. */
if (!pe) {
eeh_stats.ignored_check++;
- pr_debug("EEH: Ignored check for %s %s\n",
- eeh_pci_name(dev), dn->full_name);
- return 0;
- }
-
- if (!pe->addr && !pe->config_addr) {
- eeh_stats.no_cfg_addr++;
+ eeh_edev_dbg(edev, "Ignored check\n");
return 0;
}
@@ -451,13 +476,16 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
rc = 1;
if (pe->state & EEH_PE_ISOLATED) {
pe->check_count++;
- if (pe->check_count % EEH_MAX_FAILS == 0) {
- location = of_get_property(dn, "ibm,loc-code", NULL);
- printk(KERN_ERR "EEH: %d reads ignored for recovering device at "
- "location=%s driver=%s pci addr=%s\n",
- pe->check_count, location,
- eeh_driver_name(dev), eeh_pci_name(dev));
- printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n",
+ if (pe->check_count == EEH_MAX_FAILS) {
+ dn = pci_device_to_OF_node(dev);
+ if (dn)
+ location = of_get_property(dn, "ibm,loc-code",
+ NULL);
+ eeh_edev_err(edev, "%d reads ignored for recovering device at location=%s driver=%s\n",
+ pe->check_count,
+ location ? location : "unknown",
+ eeh_driver_name(dev));
+ eeh_edev_err(edev, "Might be infinite loop in %s driver\n",
eeh_driver_name(dev));
dump_stack();
}
@@ -478,10 +506,18 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
* We will punt with the following conditions: Failure to get
* PE's state, EEH not support and Permanently unavailable
* state, PE is in good state.
+ *
+ * On the pSeries, after reaching the threshold, get_state might
+ * return EEH_STATE_NOT_SUPPORT. However, it's possible that the
+ * device state remains uncleared if the device is not marked
+ * pci_channel_io_perm_failure. Therefore, consider logging the
+ * event to let device removal happen.
+ *
*/
if ((ret < 0) ||
- (ret == EEH_STATE_NOT_SUPPORT) ||
- ((ret & active_flags) == active_flags)) {
+ (ret == EEH_STATE_NOT_SUPPORT &&
+ dev->error_state == pci_channel_io_perm_failure) ||
+ eeh_state_active(ret)) {
eeh_stats.false_positives++;
pe->false_positives++;
rc = 0;
@@ -501,9 +537,12 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
/* Frozen parent PE ? */
ret = eeh_ops->get_state(parent_pe, NULL);
- if (ret > 0 &&
- (ret & active_flags) != active_flags)
+ if (ret > 0 && !eeh_state_active(ret)) {
pe = parent_pe;
+ pr_err("EEH: Failure of PHB#%x-PE#%x will be handled at parent PHB#%x-PE#%x.\n",
+ pe->phb->global_number, pe->addr,
+ pe->phb->global_number, parent_pe->addr);
+ }
/* Next parent level */
parent_pe = parent_pe->parent;
@@ -515,20 +554,15 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
* with other functions on this device, and functions under
* bridges.
*/
- eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
+ eeh_pe_mark_isolated(pe);
eeh_serialize_unlock(flags);
/* Most EEH events are due to device driver bugs. Having
* a stack trace will help the device-driver authors figure
* out what happened. So print that out.
*/
- phb_pe = eeh_phb_pe_get(pe->phb);
- pr_err("EEH: Frozen PHB#%x-PE#%x detected\n",
- pe->phb->global_number, pe->addr);
- pr_err("EEH: PE location: %s, PHB location: %s\n",
- eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe));
- dump_stack();
-
+ pr_debug("EEH: %s: Frozen PHB#%x-PE#%x detected\n",
+ __func__, pe->phb->global_number, pe->addr);
eeh_send_failure_event(pe);
return 1;
@@ -542,17 +576,16 @@ EXPORT_SYMBOL_GPL(eeh_dev_check_failure);
/**
* eeh_check_failure - Check if all 1's data is due to EEH slot freeze
- * @token: I/O token, should be address in the form 0xA....
- * @val: value, should be all 1's (XXX why do we need this arg??)
+ * @token: I/O address
*
- * Check for an EEH failure at the given token address. Call this
+ * Check for an EEH failure at the given I/O address. Call this
* routine if the result of a read was all 0xff's and you want to
- * find out if this is due to an EEH slot freeze event. This routine
+ * find out if this is due to an EEH slot freeze event. This routine
* will query firmware for the EEH status.
*
* Note this routine is safe to call in an interrupt context.
*/
-unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val)
+int eeh_check_failure(const volatile void __iomem *token)
{
unsigned long addr;
struct eeh_dev *edev;
@@ -562,19 +595,18 @@ unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned lon
edev = eeh_addr_cache_get_dev(addr);
if (!edev) {
eeh_stats.no_device++;
- return val;
+ return 0;
}
- eeh_dev_check_failure(edev);
- return val;
+ return eeh_dev_check_failure(edev);
}
-
EXPORT_SYMBOL(eeh_check_failure);
/**
* eeh_pci_enable - Enable MMIO or DMA transfers for this slot
* @pe: EEH PE
+ * @function: EEH option
*
* This routine should be called to reenable frozen MMIO or DMA
* so that it would work correctly again. It's useful while doing
@@ -582,49 +614,118 @@ EXPORT_SYMBOL(eeh_check_failure);
*/
int eeh_pci_enable(struct eeh_pe *pe, int function)
{
- int rc, flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
+ int active_flag, rc;
/*
* pHyp doesn't allow to enable IO or DMA on unfrozen PE.
* Also, it's pointless to enable them on unfrozen PE. So
- * we have the check here.
+ * we have to check before enabling IO or DMA.
*/
- if (function == EEH_OPT_THAW_MMIO ||
- function == EEH_OPT_THAW_DMA) {
+ switch (function) {
+ case EEH_OPT_THAW_MMIO:
+ active_flag = EEH_STATE_MMIO_ACTIVE | EEH_STATE_MMIO_ENABLED;
+ break;
+ case EEH_OPT_THAW_DMA:
+ active_flag = EEH_STATE_DMA_ACTIVE;
+ break;
+ case EEH_OPT_DISABLE:
+ case EEH_OPT_ENABLE:
+ case EEH_OPT_FREEZE_PE:
+ active_flag = 0;
+ break;
+ default:
+ pr_warn("%s: Invalid function %d\n",
+ __func__, function);
+ return -EINVAL;
+ }
+
+ /*
+ * Check if IO or DMA has been enabled before
+ * enabling them.
+ */
+ if (active_flag) {
rc = eeh_ops->get_state(pe, NULL);
if (rc < 0)
return rc;
- /* Needn't to enable or already enabled */
- if ((rc == EEH_STATE_NOT_SUPPORT) ||
- ((rc & flags) == flags))
+ /* Needn't enable it at all */
+ if (rc == EEH_STATE_NOT_SUPPORT)
+ return 0;
+
+ /* It's already enabled */
+ if (rc & active_flag)
return 0;
}
+
+ /* Issue the request */
rc = eeh_ops->set_option(pe, function);
if (rc)
pr_warn("%s: Unexpected state change %d on "
- "PHB#%d-PE#%x, err=%d\n",
+ "PHB#%x-PE#%x, err=%d\n",
__func__, function, pe->phb->global_number,
pe->addr, rc);
- rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
- if (rc <= 0)
- return rc;
+ /* Check if the request is finished successfully */
+ if (active_flag) {
+ rc = eeh_wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
+ if (rc < 0)
+ return rc;
- if ((function == EEH_OPT_THAW_MMIO) &&
- (rc & EEH_STATE_MMIO_ENABLED))
- return 0;
+ if (rc & active_flag)
+ return 0;
- if ((function == EEH_OPT_THAW_DMA) &&
- (rc & EEH_STATE_DMA_ENABLED))
- return 0;
+ return -EIO;
+ }
return rc;
}
+static void eeh_disable_and_save_dev_state(struct eeh_dev *edev,
+ void *userdata)
+{
+ struct pci_dev *pdev = eeh_dev_to_pci_dev(edev);
+ struct pci_dev *dev = userdata;
+
+ /*
+ * The caller should have disabled and saved the
+ * state for the specified device
+ */
+ if (!pdev || pdev == dev)
+ return;
+
+ /* Ensure we have D0 power state */
+ pci_set_power_state(pdev, PCI_D0);
+
+ /* Save device state */
+ pci_save_state(pdev);
+
+ /*
+ * Disable device to avoid any DMA traffic and
+ * interrupt from the device
+ */
+ pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE);
+}
+
+static void eeh_restore_dev_state(struct eeh_dev *edev, void *userdata)
+{
+ struct pci_dev *pdev = eeh_dev_to_pci_dev(edev);
+ struct pci_dev *dev = userdata;
+
+ if (!pdev)
+ return;
+
+ /* Apply customization from firmware */
+ if (eeh_ops->restore_config)
+ eeh_ops->restore_config(edev);
+
+ /* The caller should restore state for the specified device */
+ if (pdev != dev)
+ pci_restore_state(pdev);
+}
+
/**
- * pcibios_set_pcie_slot_reset - Set PCI-E reset state
+ * pcibios_set_pcie_reset_state - Set PCI-E reset state
* @dev: pci device struct
* @state: reset state to enter
*
@@ -634,7 +735,7 @@ int eeh_pci_enable(struct eeh_pe *pe, int function)
int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state)
{
struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
- struct eeh_pe *pe = edev->pe;
+ struct eeh_pe *pe = eeh_dev_to_pe(edev);
if (!pe) {
pr_err("%s: No PE found on PCI device %s\n",
@@ -645,23 +746,41 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
switch (state) {
case pcie_deassert_reset:
eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
+ eeh_unfreeze_pe(pe);
+ if (!(pe->type & EEH_PE_VF))
+ eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, true);
+ eeh_pe_dev_traverse(pe, eeh_restore_dev_state, dev);
+ eeh_pe_state_clear(pe, EEH_PE_ISOLATED, true);
break;
case pcie_hot_reset:
+ eeh_pe_mark_isolated(pe);
+ eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, true);
+ eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
+ eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev);
+ if (!(pe->type & EEH_PE_VF))
+ eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
eeh_ops->reset(pe, EEH_RESET_HOT);
break;
case pcie_warm_reset:
+ eeh_pe_mark_isolated(pe);
+ eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, true);
+ eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
+ eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev);
+ if (!(pe->type & EEH_PE_VF))
+ eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL);
break;
default:
+ eeh_pe_state_clear(pe, EEH_PE_ISOLATED | EEH_PE_CFG_BLOCKED, true);
return -EINVAL;
- };
+ }
return 0;
}
/**
- * eeh_set_pe_freset - Check the required reset for the indicated device
- * @data: EEH device
+ * eeh_set_dev_freset - Check the required reset for the indicated device
+ * @edev: EEH device
* @flag: return value
*
* Each device might have its preferred reset type: fundamental or
@@ -669,80 +788,106 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
* the indicated device and its children so that the bunch of the
* devices could be reset properly.
*/
-static void *eeh_set_dev_freset(void *data, void *flag)
+static void eeh_set_dev_freset(struct eeh_dev *edev, void *flag)
{
struct pci_dev *dev;
unsigned int *freset = (unsigned int *)flag;
- struct eeh_dev *edev = (struct eeh_dev *)data;
dev = eeh_dev_to_pci_dev(edev);
if (dev)
*freset |= dev->needs_freset;
+}
- return NULL;
+static void eeh_pe_refreeze_passed(struct eeh_pe *root)
+{
+ struct eeh_pe *pe;
+ int state;
+
+ eeh_for_each_pe(root, pe) {
+ if (eeh_pe_passed(pe)) {
+ state = eeh_ops->get_state(pe, NULL);
+ if (state &
+ (EEH_STATE_MMIO_ACTIVE | EEH_STATE_MMIO_ENABLED)) {
+ pr_info("EEH: Passed-through PE PHB#%x-PE#%x was thawed by reset, re-freezing for safety.\n",
+ pe->phb->global_number, pe->addr);
+ eeh_pe_set_option(pe, EEH_OPT_FREEZE_PE);
+ }
+ }
+ }
}
/**
- * eeh_reset_pe_once - Assert the pci #RST line for 1/4 second
+ * eeh_pe_reset_full - Complete a full reset process on the indicated PE
* @pe: EEH PE
+ * @include_passed: include passed-through devices?
+ *
+ * This function executes a full reset procedure on a PE, including setting
+ * the appropriate flags, performing a fundamental or hot reset, and then
+ * deactivating the reset status. It is designed to be used within the EEH
+ * subsystem, as opposed to eeh_pe_reset which is exported to drivers and
+ * only performs a single operation at a time.
*
- * Assert the PCI #RST line for 1/4 second.
+ * This function will attempt to reset a PE three times before failing.
*/
-static void eeh_reset_pe_once(struct eeh_pe *pe)
+int eeh_pe_reset_full(struct eeh_pe *pe, bool include_passed)
{
+ int reset_state = (EEH_PE_RESET | EEH_PE_CFG_BLOCKED);
+ int type = EEH_RESET_HOT;
unsigned int freset = 0;
+ int i, state = 0, ret;
- /* Determine type of EEH reset required for
- * Partitionable Endpoint, a hot-reset (1)
- * or a fundamental reset (3).
- * A fundamental reset required by any device under
- * Partitionable Endpoint trumps hot-reset.
+ /*
+ * Determine the type of reset to perform - hot or fundamental.
+ * Hot reset is the default operation, unless any device under the
+ * PE requires a fundamental reset.
*/
eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset);
if (freset)
- eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL);
- else
- eeh_ops->reset(pe, EEH_RESET_HOT);
-
- eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
-}
-
-/**
- * eeh_reset_pe - Reset the indicated PE
- * @pe: EEH PE
- *
- * This routine should be called to reset indicated device, including
- * PE. A PE might include multiple PCI devices and sometimes PCI bridges
- * might be involved as well.
- */
-int eeh_reset_pe(struct eeh_pe *pe)
-{
- int flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
- int i, rc;
+ type = EEH_RESET_FUNDAMENTAL;
- /* Take three shots at resetting the bus */
- for (i=0; i<3; i++) {
- eeh_reset_pe_once(pe);
+ /* Mark the PE as in reset state and block config space accesses */
+ eeh_pe_state_mark(pe, reset_state);
- /*
- * EEH_PE_ISOLATED is expected to be removed after
- * BAR restore.
- */
- rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
- if ((rc & flags) == flags)
- return 0;
-
- if (rc < 0) {
- pr_err("%s: Unrecoverable slot failure on PHB#%d-PE#%x",
- __func__, pe->phb->global_number, pe->addr);
- return -1;
+ /* Make three attempts at resetting the bus */
+ for (i = 0; i < 3; i++) {
+ ret = eeh_pe_reset(pe, type, include_passed);
+ if (!ret)
+ ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE,
+ include_passed);
+ if (ret) {
+ ret = -EIO;
+ pr_warn("EEH: Failure %d resetting PHB#%x-PE#%x (attempt %d)\n\n",
+ state, pe->phb->global_number, pe->addr, i + 1);
+ continue;
}
- pr_err("EEH: bus reset %d failed on PHB#%d-PE#%x, rc=%d\n",
- i+1, pe->phb->global_number, pe->addr, rc);
+ if (i)
+ pr_warn("EEH: PHB#%x-PE#%x: Successful reset (attempt %d)\n",
+ pe->phb->global_number, pe->addr, i + 1);
+
+ /* Wait until the PE is in a functioning state */
+ state = eeh_wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
+ if (state < 0) {
+ pr_warn("EEH: Unrecoverable slot failure on PHB#%x-PE#%x",
+ pe->phb->global_number, pe->addr);
+ ret = -ENOTRECOVERABLE;
+ break;
+ }
+ if (eeh_state_active(state))
+ break;
+ else
+ pr_warn("EEH: PHB#%x-PE#%x: Slot inactive after reset: 0x%x (attempt %d)\n",
+ pe->phb->global_number, pe->addr, state, i + 1);
}
- return -1;
+ /* Resetting the PE may have unfrozen child PEs. If those PEs have been
+ * (potentially) passed through to a guest, re-freeze them:
+ */
+ if (!include_passed)
+ eeh_pe_refreeze_passed(pe);
+
+ eeh_pe_state_clear(pe, reset_state, true);
+ return ret;
}
/**
@@ -757,14 +902,12 @@ int eeh_reset_pe(struct eeh_pe *pe)
void eeh_save_bars(struct eeh_dev *edev)
{
int i;
- struct device_node *dn;
if (!edev)
return;
- dn = eeh_dev_to_of_node(edev);
for (i = 0; i < 16; i++)
- eeh_ops->read_config(dn, i * 4, 4, &edev->config_space[i]);
+ eeh_ops->read_config(edev, i * 4, 4, &edev->config_space[i]);
/*
* For PCI bridges including root port, we need enable bus
@@ -776,56 +919,6 @@ void eeh_save_bars(struct eeh_dev *edev)
edev->config_space[1] |= PCI_COMMAND_MASTER;
}
-/**
- * eeh_ops_register - Register platform dependent EEH operations
- * @ops: platform dependent EEH operations
- *
- * Register the platform dependent EEH operation callback
- * functions. The platform should call this function before
- * any other EEH operations.
- */
-int __init eeh_ops_register(struct eeh_ops *ops)
-{
- if (!ops->name) {
- pr_warn("%s: Invalid EEH ops name for %p\n",
- __func__, ops);
- return -EINVAL;
- }
-
- if (eeh_ops && eeh_ops != ops) {
- pr_warn("%s: EEH ops of platform %s already existing (%s)\n",
- __func__, eeh_ops->name, ops->name);
- return -EEXIST;
- }
-
- eeh_ops = ops;
-
- return 0;
-}
-
-/**
- * eeh_ops_unregister - Unreigster platform dependent EEH operations
- * @name: name of EEH platform operations
- *
- * Unregister the platform dependent EEH operation callback
- * functions.
- */
-int __exit eeh_ops_unregister(const char *name)
-{
- if (!name || !strlen(name)) {
- pr_warn("%s: Invalid EEH ops name\n",
- __func__);
- return -EINVAL;
- }
-
- if (eeh_ops && !strcmp(eeh_ops->name, name)) {
- eeh_ops = NULL;
- return 0;
- }
-
- return -EEXIST;
-}
-
static int eeh_reboot_notifier(struct notifier_block *nb,
unsigned long action, void *unused)
{
@@ -837,187 +930,114 @@ static struct notifier_block eeh_reboot_nb = {
.notifier_call = eeh_reboot_notifier,
};
+static int eeh_device_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct device *dev = data;
+
+ switch (action) {
+ /*
+ * Note: It's not possible to perform EEH device addition (i.e.
+ * {pseries,pnv}_pcibios_bus_add_device()) here because it depends on
+ * the device's resources, which have not yet been set up.
+ */
+ case BUS_NOTIFY_DEL_DEVICE:
+ eeh_remove_device(to_pci_dev(dev));
+ break;
+ default:
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block eeh_device_nb = {
+ .notifier_call = eeh_device_notifier,
+};
+
/**
- * eeh_init - EEH initialization
+ * eeh_init - System wide EEH initialization
+ * @ops: struct to trace EEH operation callback functions
*
- * Initialize EEH by trying to enable it for all of the adapters in the system.
- * As a side effect we can determine here if eeh is supported at all.
- * Note that we leave EEH on so failed config cycles won't cause a machine
- * check. If a user turns off EEH for a particular adapter they are really
- * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't
- * grant access to a slot if EEH isn't enabled, and so we always enable
- * EEH for all slots/all devices.
- *
- * The eeh-force-off option disables EEH checking globally, for all slots.
- * Even if force-off is set, the EEH hardware is still enabled, so that
- * newer systems can boot.
+ * It's the platform's job to call this from an arch_initcall().
*/
-int eeh_init(void)
+int eeh_init(struct eeh_ops *ops)
{
struct pci_controller *hose, *tmp;
- struct device_node *phb;
- static int cnt = 0;
int ret = 0;
- /*
- * We have to delay the initialization on PowerNV after
- * the PCI hierarchy tree has been built because the PEs
- * are figured out based on PCI devices instead of device
- * tree nodes
- */
- if (machine_is(powernv) && cnt++ <= 0)
- return ret;
+ /* the platform should only initialise EEH once */
+ if (WARN_ON(eeh_ops))
+ return -EEXIST;
+ if (WARN_ON(!ops))
+ return -ENOENT;
+ eeh_ops = ops;
/* Register reboot notifier */
ret = register_reboot_notifier(&eeh_reboot_nb);
if (ret) {
- pr_warn("%s: Failed to register notifier (%d)\n",
+ pr_warn("%s: Failed to register reboot notifier (%d)\n",
__func__, ret);
return ret;
}
- /* call platform initialization function */
- if (!eeh_ops) {
- pr_warn("%s: Platform EEH operation not found\n",
- __func__);
- return -EEXIST;
- } else if ((ret = eeh_ops->init())) {
- pr_warn("%s: Failed to call platform init function (%d)\n",
+ ret = bus_register_notifier(&pci_bus_type, &eeh_device_nb);
+ if (ret) {
+ pr_warn("%s: Failed to register bus notifier (%d)\n",
__func__, ret);
return ret;
}
- /* Initialize EEH event */
- ret = eeh_event_init();
- if (ret)
- return ret;
+ /* Initialize PHB PEs */
+ list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
+ eeh_phb_pe_create(hose);
- /* Enable EEH for all adapters */
- if (eeh_has_flag(EEH_PROBE_MODE_DEVTREE)) {
- list_for_each_entry_safe(hose, tmp,
- &hose_list, list_node) {
- phb = hose->dn;
- traverse_pci_devices(phb, eeh_ops->of_probe, NULL);
- }
- } else if (eeh_has_flag(EEH_PROBE_MODE_DEV)) {
- list_for_each_entry_safe(hose, tmp,
- &hose_list, list_node)
- pci_walk_bus(hose->bus, eeh_ops->dev_probe, NULL);
- } else {
- pr_warn("%s: Invalid probe mode %x",
- __func__, eeh_subsystem_flags);
- return -EINVAL;
- }
-
- /*
- * Call platform post-initialization. Actually, It's good chance
- * to inform platform that EEH is ready to supply service if the
- * I/O cache stuff has been built up.
- */
- if (eeh_ops->post_init) {
- ret = eeh_ops->post_init();
- if (ret)
- return ret;
- }
-
- if (eeh_enabled())
- pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
- else
- pr_warn("EEH: No capable adapters found\n");
-
- return ret;
-}
-
-core_initcall_sync(eeh_init);
-
-/**
- * eeh_add_device_early - Enable EEH for the indicated device_node
- * @dn: device node for which to set up EEH
- *
- * This routine must be used to perform EEH initialization for PCI
- * devices that were added after system boot (e.g. hotplug, dlpar).
- * This routine must be called before any i/o is performed to the
- * adapter (inluding any config-space i/o).
- * Whether this actually enables EEH or not for this device depends
- * on the CEC architecture, type of the device, on earlier boot
- * command-line arguments & etc.
- */
-void eeh_add_device_early(struct device_node *dn)
-{
- struct pci_controller *phb;
-
- /*
- * If we're doing EEH probe based on PCI device, we
- * would delay the probe until late stage because
- * the PCI device isn't available this moment.
- */
- if (!eeh_has_flag(EEH_PROBE_MODE_DEVTREE))
- return;
-
- if (!of_node_to_eeh_dev(dn))
- return;
- phb = of_node_to_eeh_dev(dn)->phb;
-
- /* USB Bus children of PCI devices will not have BUID's */
- if (NULL == phb || 0 == phb->buid)
- return;
+ eeh_addr_cache_init();
- eeh_ops->of_probe(dn, NULL);
-}
-
-/**
- * eeh_add_device_tree_early - Enable EEH for the indicated device
- * @dn: device node
- *
- * This routine must be used to perform EEH initialization for the
- * indicated PCI device that was added after system boot (e.g.
- * hotplug, dlpar).
- */
-void eeh_add_device_tree_early(struct device_node *dn)
-{
- struct device_node *sib;
-
- for_each_child_of_node(dn, sib)
- eeh_add_device_tree_early(sib);
- eeh_add_device_early(dn);
+ /* Initialize EEH event */
+ return eeh_event_init();
}
-EXPORT_SYMBOL_GPL(eeh_add_device_tree_early);
/**
- * eeh_add_device_late - Perform EEH initialization for the indicated pci device
+ * eeh_probe_device() - Perform EEH initialization for the indicated pci device
* @dev: pci device for which to set up EEH
*
* This routine must be used to complete EEH initialization for PCI
* devices that were added after system boot (e.g. hotplug, dlpar).
*/
-void eeh_add_device_late(struct pci_dev *dev)
+void eeh_probe_device(struct pci_dev *dev)
{
- struct device_node *dn;
struct eeh_dev *edev;
- if (!dev || !eeh_enabled())
- return;
-
pr_debug("EEH: Adding device %s\n", pci_name(dev));
- dn = pci_device_to_OF_node(dev);
- edev = of_node_to_eeh_dev(dn);
- if (edev->pdev == dev) {
- pr_debug("EEH: Already referenced !\n");
+ /*
+ * pci_dev_to_eeh_dev() can only work if eeh_probe_dev() was
+ * already called for this device.
+ */
+ if (WARN_ON_ONCE(pci_dev_to_eeh_dev(dev))) {
+ pci_dbg(dev, "Already bound to an eeh_dev!\n");
+ return;
+ }
+
+ edev = eeh_ops->probe(dev);
+ if (!edev) {
+ pr_debug("EEH: Adding device failed\n");
return;
}
/*
- * The EEH cache might not be removed correctly because of
- * unbalanced kref to the device during unplug time, which
- * relies on pcibios_release_device(). So we have to remove
- * that here explicitly.
+ * FIXME: We rely on pcibios_release_device() to remove the
+ * existing EEH state. The release function is only called if
+ * the pci_dev's refcount drops to zero so if something is
+ * keeping a ref to a device (e.g. a filesystem) we need to
+ * remove the old EEH state.
+ *
+ * FIXME: HEY MA, LOOK AT ME, NO LOCKING!
*/
- if (edev->pdev) {
- eeh_rmv_from_parent_pe(edev);
+ if (edev->pdev && edev->pdev != dev) {
+ eeh_pe_tree_remove(edev);
eeh_addr_cache_rmv_dev(edev->pdev);
eeh_sysfs_remove_device(edev->pdev);
- edev->mode &= ~EEH_DEV_SYSFS;
/*
* We definitely should have the PCI device removed
@@ -1025,71 +1045,16 @@ void eeh_add_device_late(struct pci_dev *dev)
* into error handler afterwards.
*/
edev->mode |= EEH_DEV_NO_HANDLER;
-
- edev->pdev = NULL;
- dev->dev.archdata.edev = NULL;
}
+ /* bind the pdev and the edev together */
edev->pdev = dev;
dev->dev.archdata.edev = edev;
-
- /*
- * We have to do the EEH probe here because the PCI device
- * hasn't been created yet in the early stage.
- */
- if (eeh_has_flag(EEH_PROBE_MODE_DEV))
- eeh_ops->dev_probe(dev, NULL);
-
eeh_addr_cache_insert_dev(dev);
+ eeh_sysfs_add_device(dev);
}
/**
- * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus
- * @bus: PCI bus
- *
- * This routine must be used to perform EEH initialization for PCI
- * devices which are attached to the indicated PCI bus. The PCI bus
- * is added after system boot through hotplug or dlpar.
- */
-void eeh_add_device_tree_late(struct pci_bus *bus)
-{
- struct pci_dev *dev;
-
- list_for_each_entry(dev, &bus->devices, bus_list) {
- eeh_add_device_late(dev);
- if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
- struct pci_bus *subbus = dev->subordinate;
- if (subbus)
- eeh_add_device_tree_late(subbus);
- }
- }
-}
-EXPORT_SYMBOL_GPL(eeh_add_device_tree_late);
-
-/**
- * eeh_add_sysfs_files - Add EEH sysfs files for the indicated PCI bus
- * @bus: PCI bus
- *
- * This routine must be used to add EEH sysfs files for PCI
- * devices which are attached to the indicated PCI bus. The PCI bus
- * is added after system boot through hotplug or dlpar.
- */
-void eeh_add_sysfs_files(struct pci_bus *bus)
-{
- struct pci_dev *dev;
-
- list_for_each_entry(dev, &bus->devices, bus_list) {
- eeh_sysfs_add_device(dev);
- if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
- struct pci_bus *subbus = dev->subordinate;
- if (subbus)
- eeh_add_sysfs_files(subbus);
- }
- }
-}
-EXPORT_SYMBOL_GPL(eeh_add_sysfs_files);
-
-/**
* eeh_remove_device - Undo EEH setup for the indicated pci device
* @dev: pci device to be removed
*
@@ -1108,10 +1073,10 @@ void eeh_remove_device(struct pci_dev *dev)
edev = pci_dev_to_eeh_dev(dev);
/* Unregister the device with the EEH/PCI address search system */
- pr_debug("EEH: Removing device %s\n", pci_name(dev));
+ dev_dbg(&dev->dev, "EEH: Removing device\n");
if (!edev || !edev->pdev || !edev->pe) {
- pr_debug("EEH: Not referenced !\n");
+ dev_dbg(&dev->dev, "EEH: Device not referenced!\n");
return;
}
@@ -1122,11 +1087,13 @@ void eeh_remove_device(struct pci_dev *dev)
* from the parent PE during the BAR resotre.
*/
edev->pdev = NULL;
- dev->dev.archdata.edev = NULL;
- if (!(edev->pe->state & EEH_PE_KEEP))
- eeh_rmv_from_parent_pe(edev);
- else
- edev->mode |= EEH_DEV_DISCONNECTED;
+
+ /*
+ * eeh_sysfs_remove_device() uses pci_dev_to_eeh_dev() so we need to
+ * remove the sysfs files before clearing dev.archdata.edev
+ */
+ if (edev->mode & EEH_DEV_SYSFS)
+ eeh_sysfs_remove_device(dev);
/*
* We're removing from the PCI subsystem, that means
@@ -1137,8 +1104,95 @@ void eeh_remove_device(struct pci_dev *dev)
edev->mode |= EEH_DEV_NO_HANDLER;
eeh_addr_cache_rmv_dev(dev);
- eeh_sysfs_remove_device(dev);
- edev->mode &= ~EEH_DEV_SYSFS;
+
+ /*
+ * The flag "in_error" is used to trace EEH devices for VFs
+ * in error state or not. It's set in eeh_report_error(). If
+ * it's not set, eeh_report_{reset,resume}() won't be called
+ * for the VF EEH device.
+ */
+ edev->in_error = false;
+ dev->dev.archdata.edev = NULL;
+ if (!(edev->pe->state & EEH_PE_KEEP))
+ eeh_pe_tree_remove(edev);
+ else
+ edev->mode |= EEH_DEV_DISCONNECTED;
+}
+
+int eeh_unfreeze_pe(struct eeh_pe *pe)
+{
+ int ret;
+
+ ret = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
+ if (ret) {
+ pr_warn("%s: Failure %d enabling IO on PHB#%x-PE#%x\n",
+ __func__, ret, pe->phb->global_number, pe->addr);
+ return ret;
+ }
+
+ ret = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
+ if (ret) {
+ pr_warn("%s: Failure %d enabling DMA on PHB#%x-PE#%x\n",
+ __func__, ret, pe->phb->global_number, pe->addr);
+ return ret;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(eeh_unfreeze_pe);
+
+
+static struct pci_device_id eeh_reset_ids[] = {
+ { PCI_DEVICE(0x19a2, 0x0710) }, /* Emulex, BE */
+ { PCI_DEVICE(0x10df, 0xe220) }, /* Emulex, Lancer */
+ { PCI_DEVICE(0x14e4, 0x1657) }, /* Broadcom BCM5719 */
+ { 0 }
+};
+
+static int eeh_pe_change_owner(struct eeh_pe *pe)
+{
+ struct eeh_dev *edev, *tmp;
+ struct pci_dev *pdev;
+ struct pci_device_id *id;
+ int ret;
+
+ /* Check PE state */
+ ret = eeh_ops->get_state(pe, NULL);
+ if (ret < 0 || ret == EEH_STATE_NOT_SUPPORT)
+ return 0;
+
+ /* Unfrozen PE, nothing to do */
+ if (eeh_state_active(ret))
+ return 0;
+
+ /* Frozen PE, check if it needs PE level reset */
+ eeh_pe_for_each_dev(pe, edev, tmp) {
+ pdev = eeh_dev_to_pci_dev(edev);
+ if (!pdev)
+ continue;
+
+ for (id = &eeh_reset_ids[0]; id->vendor != 0; id++) {
+ if (id->vendor != PCI_ANY_ID &&
+ id->vendor != pdev->vendor)
+ continue;
+ if (id->device != PCI_ANY_ID &&
+ id->device != pdev->device)
+ continue;
+ if (id->subvendor != PCI_ANY_ID &&
+ id->subvendor != pdev->subsystem_vendor)
+ continue;
+ if (id->subdevice != PCI_ANY_ID &&
+ id->subdevice != pdev->subsystem_device)
+ continue;
+
+ return eeh_pe_reset_and_recover(pe);
+ }
+ }
+
+ ret = eeh_unfreeze_pe(pe);
+ if (!ret)
+ eeh_pe_state_clear(pe, EEH_PE_ISOLATED, true);
+ return ret;
}
/**
@@ -1153,26 +1207,33 @@ void eeh_remove_device(struct pci_dev *dev)
int eeh_dev_open(struct pci_dev *pdev)
{
struct eeh_dev *edev;
+ int ret = -ENODEV;
- mutex_lock(&eeh_dev_mutex);
+ guard(mutex)(&eeh_dev_mutex);
/* No PCI device ? */
if (!pdev)
- goto out;
+ return ret;
/* No EEH device or PE ? */
edev = pci_dev_to_eeh_dev(pdev);
if (!edev || !edev->pe)
- goto out;
+ return ret;
+
+ /*
+ * The PE might have been put into frozen state, but we
+ * didn't detect that yet. The passed through PCI devices
+ * in frozen PE won't work properly. Clear the frozen state
+ * in advance.
+ */
+ ret = eeh_pe_change_owner(edev->pe);
+ if (ret)
+ return ret;
/* Increase PE's pass through count */
atomic_inc(&edev->pe->pass_dev_cnt);
- mutex_unlock(&eeh_dev_mutex);
return 0;
-out:
- mutex_unlock(&eeh_dev_mutex);
- return -ENODEV;
}
EXPORT_SYMBOL_GPL(eeh_dev_open);
@@ -1188,45 +1249,25 @@ void eeh_dev_release(struct pci_dev *pdev)
{
struct eeh_dev *edev;
- mutex_lock(&eeh_dev_mutex);
+ guard(mutex)(&eeh_dev_mutex);
/* No PCI device ? */
if (!pdev)
- goto out;
+ return;
/* No EEH device ? */
edev = pci_dev_to_eeh_dev(pdev);
if (!edev || !edev->pe || !eeh_pe_passed(edev->pe))
- goto out;
+ return;
/* Decrease PE's pass through count */
- atomic_dec(&edev->pe->pass_dev_cnt);
- WARN_ON(atomic_read(&edev->pe->pass_dev_cnt) < 0);
-out:
- mutex_unlock(&eeh_dev_mutex);
+ WARN_ON(atomic_dec_if_positive(&edev->pe->pass_dev_cnt) < 0);
+ eeh_pe_change_owner(edev->pe);
}
EXPORT_SYMBOL(eeh_dev_release);
#ifdef CONFIG_IOMMU_API
-static int dev_has_iommu_table(struct device *dev, void *data)
-{
- struct pci_dev *pdev = to_pci_dev(dev);
- struct pci_dev **ppdev = data;
- struct iommu_table *tbl;
-
- if (!dev)
- return 0;
-
- tbl = get_iommu_table_base(dev);
- if (tbl && tbl->it_group) {
- *ppdev = pdev;
- return 1;
- }
-
- return 0;
-}
-
/**
* eeh_iommu_group_to_pe - Convert IOMMU group to EEH PE
* @group: IOMMU group
@@ -1276,25 +1317,28 @@ int eeh_pe_set_option(struct eeh_pe *pe, int option)
/*
* EEH functionality could possibly be disabled, just
- * return error for the case. And the EEH functinality
+ * return error for the case. And the EEH functionality
* isn't expected to be disabled on one specific PE.
*/
switch (option) {
case EEH_OPT_ENABLE:
- if (eeh_enabled())
+ if (eeh_enabled()) {
+ ret = eeh_pe_change_owner(pe);
break;
+ }
ret = -EIO;
break;
case EEH_OPT_DISABLE:
break;
case EEH_OPT_THAW_MMIO:
case EEH_OPT_THAW_DMA:
+ case EEH_OPT_FREEZE_PE:
if (!eeh_ops || !eeh_ops->set_option) {
ret = -ENOENT;
break;
}
- ret = eeh_ops->set_option(pe, option);
+ ret = eeh_pci_enable(pe, option);
break;
default:
pr_debug("%s: Option %d out of range (%d, %d)\n",
@@ -1325,6 +1369,17 @@ int eeh_pe_get_state(struct eeh_pe *pe)
if (!eeh_ops || !eeh_ops->get_state)
return -ENOENT;
+ /*
+ * If the parent PE is owned by the host kernel and is undergoing
+ * error recovery, we should return the PE state as temporarily
+ * unavailable so that the error recovery on the guest is suspended
+ * until the recovery completes on the host.
+ */
+ if (pe->parent &&
+ !(pe->state & EEH_PE_REMOVED) &&
+ (pe->parent->state & (EEH_PE_ISOLATED | EEH_PE_RECOVERING)))
+ return EEH_PE_STATE_UNAVAIL;
+
result = eeh_ops->get_state(pe, NULL);
rst_active = !!(result & EEH_STATE_RESET_ACTIVE);
dma_en = !!(result & EEH_STATE_DMA_ENABLED);
@@ -1345,16 +1400,54 @@ int eeh_pe_get_state(struct eeh_pe *pe)
}
EXPORT_SYMBOL_GPL(eeh_pe_get_state);
+static int eeh_pe_reenable_devices(struct eeh_pe *pe, bool include_passed)
+{
+ struct eeh_dev *edev, *tmp;
+ struct pci_dev *pdev;
+ int ret = 0;
+
+ eeh_pe_restore_bars(pe);
+
+ /*
+ * Reenable PCI devices as the devices passed
+ * through are always enabled before the reset.
+ */
+ eeh_pe_for_each_dev(pe, edev, tmp) {
+ pdev = eeh_dev_to_pci_dev(edev);
+ if (!pdev)
+ continue;
+
+ ret = pci_reenable_device(pdev);
+ if (ret) {
+ pr_warn("%s: Failure %d reenabling %s\n",
+ __func__, ret, pci_name(pdev));
+ return ret;
+ }
+ }
+
+ /* The PE is still in frozen state */
+ if (include_passed || !eeh_pe_passed(pe)) {
+ ret = eeh_unfreeze_pe(pe);
+ } else
+ pr_info("EEH: Note: Leaving passthrough PHB#%x-PE#%x frozen.\n",
+ pe->phb->global_number, pe->addr);
+ if (!ret)
+ eeh_pe_state_clear(pe, EEH_PE_ISOLATED, include_passed);
+ return ret;
+}
+
+
/**
* eeh_pe_reset - Issue PE reset according to specified type
* @pe: EEH PE
* @option: reset type
+ * @include_passed: include passed-through devices?
*
* The routine is called to reset the specified PE with the
* indicated type, either fundamental reset or hot reset.
* PE reset is the most important part for error recovery.
*/
-int eeh_pe_reset(struct eeh_pe *pe, int option)
+int eeh_pe_reset(struct eeh_pe *pe, int option, bool include_passed)
{
int ret = 0;
@@ -1368,23 +1461,22 @@ int eeh_pe_reset(struct eeh_pe *pe, int option)
switch (option) {
case EEH_RESET_DEACTIVATE:
ret = eeh_ops->reset(pe, option);
+ eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, include_passed);
if (ret)
break;
- /*
- * The PE is still in frozen state and we need to clear
- * that. It's good to clear frozen state after deassert
- * to avoid messy IO access during reset, which might
- * cause recursive frozen PE.
- */
- ret = eeh_ops->set_option(pe, EEH_OPT_THAW_MMIO);
- if (!ret)
- ret = eeh_ops->set_option(pe, EEH_OPT_THAW_DMA);
- if (!ret)
- eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
+ ret = eeh_pe_reenable_devices(pe, include_passed);
break;
case EEH_RESET_HOT:
case EEH_RESET_FUNDAMENTAL:
+ /*
+ * Proactively freeze the PE to drop all MMIO access
+ * during reset, which should be banned as it's always
+ * cause recursive EEH error.
+ */
+ eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
+
+ eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED);
ret = eeh_ops->reset(pe, option);
break;
default:
@@ -1412,14 +1504,45 @@ int eeh_pe_configure(struct eeh_pe *pe)
/* Invalid PE ? */
if (!pe)
return -ENODEV;
-
- /* Restore config space for the affected devices */
- eeh_pe_restore_bars(pe);
+ else
+ ret = eeh_ops->configure_bridge(pe);
return ret;
}
EXPORT_SYMBOL_GPL(eeh_pe_configure);
+/**
+ * eeh_pe_inject_err - Injecting the specified PCI error to the indicated PE
+ * @pe: the indicated PE
+ * @type: error type
+ * @func: error function
+ * @addr: address
+ * @mask: address mask
+ *
+ * The routine is called to inject the specified PCI error, which
+ * is determined by @type and @func, to the indicated PE for
+ * testing purpose.
+ */
+int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
+ unsigned long addr, unsigned long mask)
+{
+ /* Invalid PE ? */
+ if (!pe)
+ return -ENODEV;
+
+ /* Unsupported operation ? */
+ if (!eeh_ops || !eeh_ops->err_inject)
+ return -ENOENT;
+
+ /* Check on PCI error function */
+ if (func < EEH_ERR_FUNC_MIN || func > EEH_ERR_FUNC_MAX)
+ return -EINVAL;
+
+ return eeh_ops->err_inject(pe, type, func, addr, mask);
+}
+EXPORT_SYMBOL_GPL(eeh_pe_inject_err);
+
+#ifdef CONFIG_PROC_FS
static int proc_eeh_show(struct seq_file *m, void *v)
{
if (!eeh_enabled()) {
@@ -1446,20 +1569,136 @@ static int proc_eeh_show(struct seq_file *m, void *v)
return 0;
}
+#endif /* CONFIG_PROC_FS */
-static int proc_eeh_open(struct inode *inode, struct file *file)
+static int eeh_break_device(struct pci_dev *pdev)
{
- return single_open(file, proc_eeh_show, NULL);
+ struct resource *bar = NULL;
+ void __iomem *mapped;
+ u16 old, bit;
+ int i, pos;
+
+ /* Do we have an MMIO BAR to disable? */
+ for (i = 0; i <= PCI_STD_RESOURCE_END; i++) {
+ struct resource *r = &pdev->resource[i];
+
+ if (!r->flags || !r->start)
+ continue;
+ if (r->flags & IORESOURCE_IO)
+ continue;
+ if (r->flags & IORESOURCE_UNSET)
+ continue;
+
+ bar = r;
+ break;
+ }
+
+ if (!bar) {
+ pci_err(pdev, "Unable to find Memory BAR to cause EEH with\n");
+ return -ENXIO;
+ }
+
+ pci_err(pdev, "Going to break: %pR\n", bar);
+
+ if (pdev->is_virtfn) {
+#ifndef CONFIG_PCI_IOV
+ return -ENXIO;
+#else
+ /*
+ * VFs don't have a per-function COMMAND register, so the best
+ * we can do is clear the Memory Space Enable bit in the PF's
+ * SRIOV control reg.
+ *
+ * Unfortunately, this requires that we have a PF (i.e doesn't
+ * work for a passed-through VF) and it has the potential side
+ * effect of also causing an EEH on every other VF under the
+ * PF. Oh well.
+ */
+ pdev = pdev->physfn;
+ if (!pdev)
+ return -ENXIO; /* passed through VFs have no PF */
+
+ pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
+ pos += PCI_SRIOV_CTRL;
+ bit = PCI_SRIOV_CTRL_MSE;
+#endif /* !CONFIG_PCI_IOV */
+ } else {
+ bit = PCI_COMMAND_MEMORY;
+ pos = PCI_COMMAND;
+ }
+
+ /*
+ * Process here is:
+ *
+ * 1. Disable Memory space.
+ *
+ * 2. Perform an MMIO to the device. This should result in an error
+ * (CA / UR) being raised by the device which results in an EEH
+ * PE freeze. Using the in_8() accessor skips the eeh detection hook
+ * so the freeze hook so the EEH Detection machinery won't be
+ * triggered here. This is to match the usual behaviour of EEH
+ * where the HW will asynchronously freeze a PE and it's up to
+ * the kernel to notice and deal with it.
+ *
+ * 3. Turn Memory space back on. This is more important for VFs
+ * since recovery will probably fail if we don't. For normal
+ * the COMMAND register is reset as a part of re-initialising
+ * the device.
+ *
+ * Breaking stuff is the point so who cares if it's racy ;)
+ */
+ pci_read_config_word(pdev, pos, &old);
+
+ mapped = ioremap(bar->start, PAGE_SIZE);
+ if (!mapped) {
+ pci_err(pdev, "Unable to map MMIO BAR %pR\n", bar);
+ return -ENXIO;
+ }
+
+ pci_write_config_word(pdev, pos, old & ~bit);
+ in_8(mapped);
+ pci_write_config_word(pdev, pos, old);
+
+ iounmap(mapped);
+
+ return 0;
}
-static const struct file_operations proc_eeh_operations = {
- .open = proc_eeh_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+int eeh_pe_inject_mmio_error(struct pci_dev *pdev)
+{
+ return eeh_break_device(pdev);
+}
#ifdef CONFIG_DEBUG_FS
+
+
+static struct pci_dev *eeh_debug_lookup_pdev(struct file *filp,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ uint32_t domain, bus, dev, fn;
+ struct pci_dev *pdev;
+ char buf[20];
+ int ret;
+
+ memset(buf, 0, sizeof(buf));
+ ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count);
+ if (!ret)
+ return ERR_PTR(-EFAULT);
+
+ ret = sscanf(buf, "%x:%x:%x.%x", &domain, &bus, &dev, &fn);
+ if (ret != 4) {
+ pr_err("%s: expected 4 args, got %d\n", __func__, ret);
+ return ERR_PTR(-EINVAL);
+ }
+
+ pdev = pci_get_domain_bus_and_slot(domain, bus, (dev << 3) | fn);
+ if (!pdev)
+ return ERR_PTR(-ENODEV);
+
+ return pdev;
+}
+
static int eeh_enable_dbgfs_set(void *data, u64 val)
{
if (val)
@@ -1467,10 +1706,6 @@ static int eeh_enable_dbgfs_set(void *data, u64 val)
else
eeh_add_flag(EEH_FORCE_DISABLED);
- /* Notify the backend */
- if (eeh_ops->post_init)
- eeh_ops->post_init();
-
return 0;
}
@@ -1483,18 +1718,208 @@ static int eeh_enable_dbgfs_get(void *data, u64 *val)
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get,
- eeh_enable_dbgfs_set, "0x%llx\n");
+DEFINE_DEBUGFS_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get,
+ eeh_enable_dbgfs_set, "0x%llx\n");
+
+static ssize_t eeh_force_recover_write(struct file *filp,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct pci_controller *hose;
+ uint32_t phbid, pe_no;
+ struct eeh_pe *pe;
+ char buf[20];
+ int ret;
+
+ ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count);
+ if (!ret)
+ return -EFAULT;
+
+ /*
+ * When PE is NULL the event is a "special" event. Rather than
+ * recovering a specific PE it forces the EEH core to scan for failed
+ * PHBs and recovers each. This needs to be done before any device
+ * recoveries can occur.
+ */
+ if (!strncmp(buf, "hwcheck", 7)) {
+ __eeh_send_failure_event(NULL);
+ return count;
+ }
+
+ ret = sscanf(buf, "%x:%x", &phbid, &pe_no);
+ if (ret != 2)
+ return -EINVAL;
+
+ hose = pci_find_controller_for_domain(phbid);
+ if (!hose)
+ return -ENODEV;
+
+ /* Retrieve PE */
+ pe = eeh_pe_get(hose, pe_no);
+ if (!pe)
+ return -ENODEV;
+
+ /*
+ * We don't do any state checking here since the detection
+ * process is async to the recovery process. The recovery
+ * thread *should* not break even if we schedule a recovery
+ * from an odd state (e.g. PE removed, or recovery of a
+ * non-isolated PE)
+ */
+ __eeh_send_failure_event(pe);
+
+ return ret < 0 ? ret : count;
+}
+
+static const struct file_operations eeh_force_recover_fops = {
+ .open = simple_open,
+ .write = eeh_force_recover_write,
+};
+
+static ssize_t eeh_debugfs_dev_usage(struct file *filp,
+ char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ static const char usage[] = "input format: <domain>:<bus>:<dev>.<fn>\n";
+
+ return simple_read_from_buffer(user_buf, count, ppos,
+ usage, sizeof(usage) - 1);
+}
+
+static ssize_t eeh_dev_check_write(struct file *filp,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct pci_dev *pdev;
+ struct eeh_dev *edev;
+ int ret;
+
+ pdev = eeh_debug_lookup_pdev(filp, user_buf, count, ppos);
+ if (IS_ERR(pdev))
+ return PTR_ERR(pdev);
+
+ edev = pci_dev_to_eeh_dev(pdev);
+ if (!edev) {
+ pci_err(pdev, "No eeh_dev for this device!\n");
+ pci_dev_put(pdev);
+ return -ENODEV;
+ }
+
+ ret = eeh_dev_check_failure(edev);
+ pci_info(pdev, "eeh_dev_check_failure(%s) = %d\n",
+ pci_name(pdev), ret);
+
+ pci_dev_put(pdev);
+
+ return count;
+}
+
+static const struct file_operations eeh_dev_check_fops = {
+ .open = simple_open,
+ .write = eeh_dev_check_write,
+ .read = eeh_debugfs_dev_usage,
+};
+
+static ssize_t eeh_dev_break_write(struct file *filp,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct pci_dev *pdev;
+ int ret;
+
+ pdev = eeh_debug_lookup_pdev(filp, user_buf, count, ppos);
+ if (IS_ERR(pdev))
+ return PTR_ERR(pdev);
+
+ ret = eeh_break_device(pdev);
+ pci_dev_put(pdev);
+
+ if (ret < 0)
+ return ret;
+
+ return count;
+}
+
+static const struct file_operations eeh_dev_break_fops = {
+ .open = simple_open,
+ .write = eeh_dev_break_write,
+ .read = eeh_debugfs_dev_usage,
+};
+
+static ssize_t eeh_dev_can_recover(struct file *filp,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct pci_driver *drv;
+ struct pci_dev *pdev;
+ size_t ret;
+
+ pdev = eeh_debug_lookup_pdev(filp, user_buf, count, ppos);
+ if (IS_ERR(pdev))
+ return PTR_ERR(pdev);
+
+ /*
+ * In order for error recovery to work the driver needs to implement
+ * .error_detected(), so it can quiesce IO to the device, and
+ * .slot_reset() so it can re-initialise the device after a reset.
+ *
+ * Ideally they'd implement .resume() too, but some drivers which
+ * we need to support (notably IPR) don't so I guess we can tolerate
+ * that.
+ *
+ * .mmio_enabled() is mostly there as a work-around for devices which
+ * take forever to re-init after a hot reset. Implementing that is
+ * strictly optional.
+ */
+ drv = pci_dev_driver(pdev);
+ if (drv &&
+ drv->err_handler &&
+ drv->err_handler->error_detected &&
+ drv->err_handler->slot_reset) {
+ ret = count;
+ } else {
+ ret = -EOPNOTSUPP;
+ }
+
+ pci_dev_put(pdev);
+
+ return ret;
+}
+
+static const struct file_operations eeh_dev_can_recover_fops = {
+ .open = simple_open,
+ .write = eeh_dev_can_recover,
+ .read = eeh_debugfs_dev_usage,
+};
+
#endif
static int __init eeh_init_proc(void)
{
if (machine_is(pseries) || machine_is(powernv)) {
- proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations);
+ proc_create_single("powerpc/eeh", 0, NULL, proc_eeh_show);
#ifdef CONFIG_DEBUG_FS
- debugfs_create_file("eeh_enable", 0600,
- powerpc_debugfs_root, NULL,
- &eeh_enable_dbgfs_ops);
+ debugfs_create_file_unsafe("eeh_enable", 0600,
+ arch_debugfs_dir, NULL,
+ &eeh_enable_dbgfs_ops);
+ debugfs_create_u32("eeh_max_freezes", 0600,
+ arch_debugfs_dir, &eeh_max_freezes);
+ debugfs_create_bool("eeh_disable_recovery", 0600,
+ arch_debugfs_dir,
+ &eeh_debugfs_no_recover);
+ debugfs_create_file_unsafe("eeh_dev_check", 0600,
+ arch_debugfs_dir, NULL,
+ &eeh_dev_check_fops);
+ debugfs_create_file_unsafe("eeh_dev_break", 0600,
+ arch_debugfs_dir, NULL,
+ &eeh_dev_break_fops);
+ debugfs_create_file_unsafe("eeh_force_recover", 0600,
+ arch_debugfs_dir, NULL,
+ &eeh_force_recover_fops);
+ debugfs_create_file_unsafe("eeh_dev_can_recover", 0600,
+ arch_debugfs_dir, NULL,
+ &eeh_dev_can_recover_fops);
+ eeh_cache_debugfs_init();
#endif
}
diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c
index 07d8a2423a61..2f9dbf8ad2ee 100644
--- a/arch/powerpc/kernel/eeh_cache.c
+++ b/arch/powerpc/kernel/eeh_cache.c
@@ -1,22 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* PCI address cache; allows the lookup of PCI devices based on I/O address
*
* Copyright IBM Corporation 2004
* Copyright Linas Vepstas <linas@austin.ibm.com> 2004
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/list.h>
@@ -25,11 +12,14 @@
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/atomic.h>
+#include <linux/debugfs.h>
#include <asm/pci-bridge.h>
#include <asm/ppc-pci.h>
/**
+ * DOC: Overview
+ *
* The pci address cache subsystem. This subsystem places
* PCI device address resources into a red-black tree, sorted
* according to the address range, so that given only an i/o
@@ -46,13 +36,14 @@
* than any hash algo I could think of for this problem, even
* with the penalty of slow pointer chases for d-cache misses).
*/
+
struct pci_io_addr_range {
struct rb_node rb_node;
- unsigned long addr_lo;
- unsigned long addr_hi;
+ resource_size_t addr_lo;
+ resource_size_t addr_hi;
struct eeh_dev *edev;
struct pci_dev *pcidev;
- unsigned int flags;
+ unsigned long flags;
};
static struct pci_io_addr_cache {
@@ -84,8 +75,7 @@ static inline struct eeh_dev *__eeh_addr_cache_get_device(unsigned long addr)
* @addr: mmio (PIO) phys address or i/o port number
*
* Given an mmio phys address, or a port number, find a pci device
- * that implements this address. Be sure to pci_dev_put the device
- * when finished. I/O port numbers are assumed to be offset
+ * that implements this address. I/O port numbers are assumed to be offset
* from zero (that is, they do *not* have pci_io_addr added in).
* It is safe to call this function within an interrupt.
*/
@@ -114,9 +104,9 @@ static void eeh_addr_cache_print(struct pci_io_addr_cache *cache)
while (n) {
struct pci_io_addr_range *piar;
piar = rb_entry(n, struct pci_io_addr_range, rb_node);
- pr_debug("PCI: %s addr range %d [%lx-%lx]: %s\n",
+ pr_info("PCI: %s addr range %d [%pap-%pap]: %s\n",
(piar->flags & IORESOURCE_IO) ? "i/o" : "mem", cnt,
- piar->addr_lo, piar->addr_hi, pci_name(piar->pcidev));
+ &piar->addr_lo, &piar->addr_hi, pci_name(piar->pcidev));
cnt++;
n = rb_next(n);
}
@@ -125,8 +115,8 @@ static void eeh_addr_cache_print(struct pci_io_addr_cache *cache)
/* Insert address range into the rb tree. */
static struct pci_io_addr_range *
-eeh_addr_cache_insert(struct pci_dev *dev, unsigned long alo,
- unsigned long ahi, unsigned int flags)
+eeh_addr_cache_insert(struct pci_dev *dev, resource_size_t alo,
+ resource_size_t ahi, unsigned long flags)
{
struct rb_node **p = &pci_io_addr_cache_root.rb_root.rb_node;
struct rb_node *parent = NULL;
@@ -158,10 +148,8 @@ eeh_addr_cache_insert(struct pci_dev *dev, unsigned long alo,
piar->pcidev = dev;
piar->flags = flags;
-#ifdef DEBUG
- pr_debug("PIAR: insert range=[%lx:%lx] dev=%s\n",
- alo, ahi, pci_name(dev));
-#endif
+ eeh_edev_dbg(piar->edev, "PIAR: insert range=[%pap:%pap]\n",
+ &alo, &ahi);
rb_link_node(&piar->rb_node, parent, p);
rb_insert_color(&piar->rb_node, &pci_io_addr_cache_root.rb_root);
@@ -171,38 +159,30 @@ eeh_addr_cache_insert(struct pci_dev *dev, unsigned long alo,
static void __eeh_addr_cache_insert_dev(struct pci_dev *dev)
{
- struct device_node *dn;
struct eeh_dev *edev;
int i;
- dn = pci_device_to_OF_node(dev);
- if (!dn) {
- pr_warn("PCI: no pci dn found for dev=%s\n",
- pci_name(dev));
- return;
- }
-
- edev = of_node_to_eeh_dev(dn);
+ edev = pci_dev_to_eeh_dev(dev);
if (!edev) {
- pr_warn("PCI: no EEH dev found for dn=%s\n",
- dn->full_name);
+ pr_warn("PCI: no EEH dev found for %s\n",
+ pci_name(dev));
return;
}
/* Skip any devices for which EEH is not enabled. */
if (!edev->pe) {
-#ifdef DEBUG
- pr_info("PCI: skip building address cache for=%s - %s\n",
- pci_name(dev), dn->full_name);
-#endif
+ dev_dbg(&dev->dev, "EEH: Skip building address cache\n");
return;
}
- /* Walk resources on this device, poke them into the tree */
- for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
- unsigned long start = pci_resource_start(dev,i);
- unsigned long end = pci_resource_end(dev,i);
- unsigned int flags = pci_resource_flags(dev,i);
+ /*
+ * Walk resources on this device, poke the first 7 (6 normal BAR and 1
+ * ROM BAR) into the tree.
+ */
+ for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
+ resource_size_t start = pci_resource_start(dev,i);
+ resource_size_t end = pci_resource_end(dev,i);
+ unsigned long flags = pci_resource_flags(dev,i);
/* We are interested only bus addresses, not dma or other stuff */
if (0 == (flags & (IORESOURCE_IO | IORESOURCE_MEM)))
@@ -225,10 +205,6 @@ void eeh_addr_cache_insert_dev(struct pci_dev *dev)
{
unsigned long flags;
- /* Ignore PCI bridges */
- if ((dev->class >> 16) == PCI_BASE_CLASS_BRIDGE)
- return;
-
spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
__eeh_addr_cache_insert_dev(dev);
spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
@@ -245,6 +221,8 @@ restart:
piar = rb_entry(n, struct pci_io_addr_range, rb_node);
if (piar->pcidev == dev) {
+ eeh_edev_dbg(piar->edev, "PIAR: remove range=[%pap:%pap]\n",
+ &piar->addr_lo, &piar->addr_hi);
rb_erase(n, &pci_io_addr_cache_root.rb_root);
kfree(piar);
goto restart;
@@ -272,40 +250,39 @@ void eeh_addr_cache_rmv_dev(struct pci_dev *dev)
}
/**
- * eeh_addr_cache_build - Build a cache of I/O addresses
+ * eeh_addr_cache_init - Initialize a cache of I/O addresses
*
- * Build a cache of pci i/o addresses. This cache will be used to
+ * Initialize a cache of pci i/o addresses. This cache will be used to
* find the pci device that corresponds to a given address.
- * This routine scans all pci busses to build the cache.
- * Must be run late in boot process, after the pci controllers
- * have been scanned for devices (after all device resources are known).
*/
-void eeh_addr_cache_build(void)
+void eeh_addr_cache_init(void)
{
- struct device_node *dn;
- struct eeh_dev *edev;
- struct pci_dev *dev = NULL;
-
spin_lock_init(&pci_io_addr_cache_root.piar_lock);
+}
- for_each_pci_dev(dev) {
- dn = pci_device_to_OF_node(dev);
- if (!dn)
- continue;
-
- edev = of_node_to_eeh_dev(dn);
- if (!edev)
- continue;
+static int eeh_addr_cache_show(struct seq_file *s, void *v)
+{
+ struct pci_io_addr_range *piar;
+ struct rb_node *n;
+ unsigned long flags;
- dev->dev.archdata.edev = edev;
- edev->pdev = dev;
+ spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
+ for (n = rb_first(&pci_io_addr_cache_root.rb_root); n; n = rb_next(n)) {
+ piar = rb_entry(n, struct pci_io_addr_range, rb_node);
- eeh_addr_cache_insert_dev(dev);
- eeh_sysfs_add_device(dev);
+ seq_printf(s, "%s addr range [%pap-%pap]: %s\n",
+ (piar->flags & IORESOURCE_IO) ? "i/o" : "mem",
+ &piar->addr_lo, &piar->addr_hi, pci_name(piar->pcidev));
}
+ spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
-#ifdef DEBUG
- /* Verify tree built up above, echo back the list of addrs. */
- eeh_addr_cache_print(&pci_io_addr_cache_root);
-#endif
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(eeh_addr_cache);
+
+void __init eeh_cache_debugfs_init(void)
+{
+ debugfs_create_file_unsafe("eeh_address_cache", 0400,
+ arch_debugfs_dir, NULL,
+ &eeh_addr_cache_fops);
}
diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c
deleted file mode 100644
index e5274ee9a75f..000000000000
--- a/arch/powerpc/kernel/eeh_dev.c
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * The file intends to implement dynamic creation of EEH device, which will
- * be bound with OF node and PCI device simutaneously. The EEH devices would
- * be foundamental information for EEH core components to work proerly. Besides,
- * We have to support multiple situations where dynamic creation of EEH device
- * is required:
- *
- * 1) Before PCI emunation starts, we need create EEH devices according to the
- * PCI sensitive OF nodes.
- * 2) When PCI emunation is done, we need do the binding between PCI device and
- * the associated EEH device.
- * 3) DR (Dynamic Reconfiguration) would create PCI sensitive OF node. EEH device
- * will be created while PCI sensitive OF node is detected from DR.
- * 4) PCI hotplug needs redoing the binding between PCI device and EEH device. If
- * PHB is newly inserted, we also need create EEH devices accordingly.
- *
- * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2012.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <linux/export.h>
-#include <linux/gfp.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/string.h>
-
-#include <asm/pci-bridge.h>
-#include <asm/ppc-pci.h>
-
-/**
- * eeh_dev_init - Create EEH device according to OF node
- * @dn: device node
- * @data: PHB
- *
- * It will create EEH device according to the given OF node. The function
- * might be called by PCI emunation, DR, PHB hotplug.
- */
-void *eeh_dev_init(struct device_node *dn, void *data)
-{
- struct pci_controller *phb = data;
- struct eeh_dev *edev;
-
- /* Allocate EEH device */
- edev = kzalloc(sizeof(*edev), GFP_KERNEL);
- if (!edev) {
- pr_warn("%s: out of memory\n",
- __func__);
- return NULL;
- }
-
- /* Associate EEH device with OF node */
- PCI_DN(dn)->edev = edev;
- edev->dn = dn;
- edev->phb = phb;
- INIT_LIST_HEAD(&edev->list);
-
- return NULL;
-}
-
-/**
- * eeh_dev_phb_init_dynamic - Create EEH devices for devices included in PHB
- * @phb: PHB
- *
- * Scan the PHB OF node and its child association, then create the
- * EEH devices accordingly
- */
-void eeh_dev_phb_init_dynamic(struct pci_controller *phb)
-{
- struct device_node *dn = phb->dn;
-
- /* EEH PE for PHB */
- eeh_phb_pe_create(phb);
-
- /* EEH device for PHB */
- eeh_dev_init(dn, phb);
-
- /* EEH devices for children OF nodes */
- traverse_pci_devices(dn, eeh_dev_init, phb);
-}
-
-/**
- * eeh_dev_phb_init - Create EEH devices for devices included in existing PHBs
- *
- * Scan all the existing PHBs and create EEH devices for their OF
- * nodes and their children OF nodes
- */
-static int __init eeh_dev_phb_init(void)
-{
- struct pci_controller *phb, *tmp;
-
- list_for_each_entry_safe(phb, tmp, &hose_list, list_node)
- eeh_dev_phb_init_dynamic(phb);
-
- pr_info("EEH: devices created\n");
-
- return 0;
-}
-
-core_initcall(eeh_dev_phb_init);
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 6a0dcee8e931..ef78ff77cf8f 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -1,25 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* PCI Error Recovery Driver for RPA-compliant PPC64 platform.
* Copyright IBM Corp. 2004 2005
* Copyright Linas Vepstas <linas@linas.org> 2004, 2005
*
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
* Send comments and feedback to Linas Vepstas <linas@austin.ibm.com>
*/
#include <linux/delay.h>
@@ -27,25 +11,85 @@
#include <linux/irq.h>
#include <linux/module.h>
#include <linux/pci.h>
+#include <linux/pci_hotplug.h>
#include <asm/eeh.h>
#include <asm/eeh_event.h>
#include <asm/ppc-pci.h>
#include <asm/pci-bridge.h>
-#include <asm/prom.h>
#include <asm/rtas.h>
-/**
- * eeh_pcid_name - Retrieve name of PCI device driver
- * @pdev: PCI device
- *
- * This routine is used to retrieve the name of PCI device driver
- * if that's valid.
- */
-static inline const char *eeh_pcid_name(struct pci_dev *pdev)
+struct eeh_rmv_data {
+ struct list_head removed_vf_list;
+ int removed_dev_count;
+};
+
+static int eeh_result_priority(enum pci_ers_result result)
+{
+ switch (result) {
+ case PCI_ERS_RESULT_NONE:
+ return 1;
+ case PCI_ERS_RESULT_NO_AER_DRIVER:
+ return 2;
+ case PCI_ERS_RESULT_RECOVERED:
+ return 3;
+ case PCI_ERS_RESULT_CAN_RECOVER:
+ return 4;
+ case PCI_ERS_RESULT_DISCONNECT:
+ return 5;
+ case PCI_ERS_RESULT_NEED_RESET:
+ return 6;
+ default:
+ WARN_ONCE(1, "Unknown pci_ers_result value: %d\n", result);
+ return 0;
+ }
+};
+
+static const char *pci_ers_result_name(enum pci_ers_result result)
+{
+ switch (result) {
+ case PCI_ERS_RESULT_NONE:
+ return "none";
+ case PCI_ERS_RESULT_CAN_RECOVER:
+ return "can recover";
+ case PCI_ERS_RESULT_NEED_RESET:
+ return "need reset";
+ case PCI_ERS_RESULT_DISCONNECT:
+ return "disconnect";
+ case PCI_ERS_RESULT_RECOVERED:
+ return "recovered";
+ case PCI_ERS_RESULT_NO_AER_DRIVER:
+ return "no AER driver";
+ default:
+ WARN_ONCE(1, "Unknown result type: %d\n", result);
+ return "unknown";
+ }
+};
+
+static enum pci_ers_result pci_ers_merge_result(enum pci_ers_result old,
+ enum pci_ers_result new)
+{
+ if (eeh_result_priority(new) > eeh_result_priority(old))
+ return new;
+ return old;
+}
+
+static bool eeh_dev_removed(struct eeh_dev *edev)
+{
+ return !edev || (edev->mode & EEH_DEV_REMOVED);
+}
+
+static bool eeh_edev_actionable(struct eeh_dev *edev)
{
- if (pdev && pdev->dev.driver)
- return pdev->dev.driver->name;
- return "";
+ if (!edev->pdev)
+ return false;
+ if (edev->pdev->error_state == pci_channel_io_perm_failure)
+ return false;
+ if (eeh_dev_removed(edev))
+ return false;
+ if (eeh_pe_passed(edev->pe))
+ return false;
+
+ return true;
}
/**
@@ -59,13 +103,13 @@ static inline const char *eeh_pcid_name(struct pci_dev *pdev)
*/
static inline struct pci_driver *eeh_pcid_get(struct pci_dev *pdev)
{
- if (!pdev || !pdev->driver)
+ if (!pdev || !pdev->dev.driver)
return NULL;
- if (!try_module_get(pdev->driver->driver.owner))
+ if (!try_module_get(pdev->dev.driver->owner))
return NULL;
- return pdev->driver;
+ return to_pci_driver(pdev->dev.driver);
}
/**
@@ -77,34 +121,12 @@ static inline struct pci_driver *eeh_pcid_get(struct pci_dev *pdev)
*/
static inline void eeh_pcid_put(struct pci_dev *pdev)
{
- if (!pdev || !pdev->driver)
+ if (!pdev || !pdev->dev.driver)
return;
- module_put(pdev->driver->driver.owner);
+ module_put(pdev->dev.driver->owner);
}
-#if 0
-static void print_device_node_tree(struct pci_dn *pdn, int dent)
-{
- int i;
- struct device_node *pc;
-
- if (!pdn)
- return;
- for (i = 0; i < dent; i++)
- printk(" ");
- printk("dn=%s mode=%x \tcfg_addr=%x pe_addr=%x \tfull=%s\n",
- pdn->node->name, pdn->eeh_mode, pdn->eeh_config_addr,
- pdn->eeh_pe_config_addr, pdn->node->full_name);
- dent += 3;
- pc = pdn->node->child;
- while (pc) {
- print_device_node_tree(PCI_DN(pc), dent);
- pc = pc->sibling;
- }
-}
-#endif
-
/**
* eeh_disable_irq - Disable interrupt for the recovering device
* @dev: PCI device
@@ -115,22 +137,20 @@ static void print_device_node_tree(struct pci_dn *pdn, int dent)
* do real work because EEH should freeze DMA transfers for those PCI
* devices encountering EEH errors, which includes MSI or MSI-X.
*/
-static void eeh_disable_irq(struct pci_dev *dev)
+static void eeh_disable_irq(struct eeh_dev *edev)
{
- struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
-
/* Don't disable MSI and MSI-X interrupts. They are
* effectively disabled by the DMA Stopped state
* when an EEH error occurs.
*/
- if (dev->msi_enabled || dev->msix_enabled)
+ if (edev->pdev->msi_enabled || edev->pdev->msix_enabled)
return;
- if (!irq_has_action(dev->irq))
+ if (!irq_has_action(edev->pdev->irq))
return;
edev->mode |= EEH_DEV_IRQ_DISABLED;
- disable_irq_nosync(dev->irq);
+ disable_irq_nosync(edev->pdev->irq);
}
/**
@@ -140,10 +160,8 @@ static void eeh_disable_irq(struct pci_dev *dev)
* This routine must be called to enable interrupt while failed
* device could be resumed.
*/
-static void eeh_enable_irq(struct pci_dev *dev)
+static void eeh_enable_irq(struct eeh_dev *edev)
{
- struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
-
if ((edev->mode) & EEH_DEV_IRQ_DISABLED) {
edev->mode &= ~EEH_DEV_IRQ_DISABLED;
/*
@@ -156,7 +174,7 @@ static void eeh_enable_irq(struct pci_dev *dev)
* into it.
*
* That's just wrong.The warning in the core code is
- * there to tell people to fix their assymetries in
+ * there to tell people to fix their asymmetries in
* their own code, not by abusing the core information
* to avoid it.
*
@@ -166,222 +184,315 @@ static void eeh_enable_irq(struct pci_dev *dev)
*
* tglx
*/
- if (irqd_irq_disabled(irq_get_irq_data(dev->irq)))
- enable_irq(dev->irq);
+ if (irqd_irq_disabled(irq_get_irq_data(edev->pdev->irq)))
+ enable_irq(edev->pdev->irq);
}
}
-static bool eeh_dev_removed(struct eeh_dev *edev)
+static void eeh_dev_save_state(struct eeh_dev *edev, void *userdata)
{
- /* EEH device removed ? */
- if (!edev || (edev->mode & EEH_DEV_REMOVED))
- return true;
+ struct pci_dev *pdev;
+
+ if (!edev)
+ return;
+
+ /*
+ * We cannot access the config space on some adapters.
+ * Otherwise, it will cause fenced PHB. We don't save
+ * the content in their config space and will restore
+ * from the initial config space saved when the EEH
+ * device is created.
+ */
+ if (edev->pe && (edev->pe->state & EEH_PE_CFG_RESTRICTED))
+ return;
+
+ pdev = eeh_dev_to_pci_dev(edev);
+ if (!pdev)
+ return;
- return false;
+ pci_save_state(pdev);
}
-/**
- * eeh_report_error - Report pci error to each device driver
- * @data: eeh device
- * @userdata: return value
- *
- * Report an EEH error to each device driver, collect up and
- * merge the device driver responses. Cumulative response
- * passed back in "userdata".
- */
-static void *eeh_report_error(void *data, void *userdata)
+static void eeh_set_channel_state(struct eeh_pe *root, pci_channel_state_t s)
{
- struct eeh_dev *edev = (struct eeh_dev *)data;
- struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
- enum pci_ers_result rc, *res = userdata;
- struct pci_driver *driver;
+ struct eeh_pe *pe;
+ struct eeh_dev *edev, *tmp;
- if (!dev || eeh_dev_removed(edev))
- return NULL;
- dev->error_state = pci_channel_io_frozen;
+ eeh_for_each_pe(root, pe)
+ eeh_pe_for_each_dev(pe, edev, tmp)
+ if (eeh_edev_actionable(edev))
+ edev->pdev->error_state = s;
+}
- driver = eeh_pcid_get(dev);
- if (!driver) return NULL;
+static void eeh_set_irq_state(struct eeh_pe *root, bool enable)
+{
+ struct eeh_pe *pe;
+ struct eeh_dev *edev, *tmp;
- eeh_disable_irq(dev);
+ eeh_for_each_pe(root, pe) {
+ eeh_pe_for_each_dev(pe, edev, tmp) {
+ if (!eeh_edev_actionable(edev))
+ continue;
- if (!driver->err_handler ||
- !driver->err_handler->error_detected) {
- eeh_pcid_put(dev);
- return NULL;
+ if (!eeh_pcid_get(edev->pdev))
+ continue;
+
+ if (enable)
+ eeh_enable_irq(edev);
+ else
+ eeh_disable_irq(edev);
+
+ eeh_pcid_put(edev->pdev);
+ }
}
+}
- rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen);
+typedef enum pci_ers_result (*eeh_report_fn)(struct eeh_dev *,
+ struct pci_dev *,
+ struct pci_driver *);
+static void eeh_pe_report_edev(struct eeh_dev *edev, eeh_report_fn fn,
+ enum pci_ers_result *result)
+{
+ struct pci_dev *pdev;
+ struct pci_driver *driver;
+ enum pci_ers_result new_result;
+
+ pdev = edev->pdev;
+ if (pdev)
+ get_device(&pdev->dev);
+ if (!pdev) {
+ eeh_edev_info(edev, "no device");
+ *result = PCI_ERS_RESULT_DISCONNECT;
+ return;
+ }
+ device_lock(&pdev->dev);
+ if (eeh_edev_actionable(edev)) {
+ driver = eeh_pcid_get(pdev);
+
+ if (!driver)
+ eeh_edev_info(edev, "no driver");
+ else if (!driver->err_handler)
+ eeh_edev_info(edev, "driver not EEH aware");
+ else if (edev->mode & EEH_DEV_NO_HANDLER)
+ eeh_edev_info(edev, "driver bound too late");
+ else {
+ new_result = fn(edev, pdev, driver);
+ eeh_edev_info(edev, "%s driver reports: '%s'",
+ driver->name,
+ pci_ers_result_name(new_result));
+ if (result)
+ *result = pci_ers_merge_result(*result,
+ new_result);
+ }
+ if (driver)
+ eeh_pcid_put(pdev);
+ } else {
+ eeh_edev_info(edev, "not actionable (%d,%d,%d)", !!pdev,
+ !eeh_dev_removed(edev), !eeh_pe_passed(edev->pe));
+ }
+ device_unlock(&pdev->dev);
+ if (edev->pdev != pdev)
+ eeh_edev_warn(edev, "Device changed during processing!\n");
+ put_device(&pdev->dev);
+}
- /* A driver that needs a reset trumps all others */
- if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
- if (*res == PCI_ERS_RESULT_NONE) *res = rc;
+static void eeh_pe_report(const char *name, struct eeh_pe *root,
+ eeh_report_fn fn, enum pci_ers_result *result)
+{
+ struct eeh_pe *pe;
+ struct eeh_dev *edev, *tmp;
- eeh_pcid_put(dev);
- return NULL;
+ pr_info("EEH: Beginning: '%s'\n", name);
+ eeh_for_each_pe(root, pe)
+ eeh_pe_for_each_dev(pe, edev, tmp)
+ eeh_pe_report_edev(edev, fn, result);
+ if (result)
+ pr_info("EEH: Finished:'%s' with aggregate recovery state:'%s'\n",
+ name, pci_ers_result_name(*result));
+ else
+ pr_info("EEH: Finished:'%s'", name);
}
/**
- * eeh_report_mmio_enabled - Tell drivers that MMIO has been enabled
- * @data: eeh device
- * @userdata: return value
+ * eeh_report_error - Report pci error to each device driver
+ * @edev: eeh device
+ * @driver: device's PCI driver
*
- * Tells each device driver that IO ports, MMIO and config space I/O
- * are now enabled. Collects up and merges the device driver responses.
- * Cumulative response passed back in "userdata".
+ * Report an EEH error to each device driver.
*/
-static void *eeh_report_mmio_enabled(void *data, void *userdata)
+static enum pci_ers_result eeh_report_error(struct eeh_dev *edev,
+ struct pci_dev *pdev,
+ struct pci_driver *driver)
{
- struct eeh_dev *edev = (struct eeh_dev *)data;
- struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
- enum pci_ers_result rc, *res = userdata;
- struct pci_driver *driver;
+ enum pci_ers_result rc;
- if (!dev || eeh_dev_removed(edev))
- return NULL;
+ if (!driver->err_handler->error_detected)
+ return PCI_ERS_RESULT_NONE;
- driver = eeh_pcid_get(dev);
- if (!driver) return NULL;
+ eeh_edev_info(edev, "Invoking %s->error_detected(IO frozen)",
+ driver->name);
+ rc = driver->err_handler->error_detected(pdev, pci_channel_io_frozen);
- if (!driver->err_handler ||
- !driver->err_handler->mmio_enabled ||
- (edev->mode & EEH_DEV_NO_HANDLER)) {
- eeh_pcid_put(dev);
- return NULL;
- }
-
- rc = driver->err_handler->mmio_enabled(dev);
-
- /* A driver that needs a reset trumps all others */
- if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
- if (*res == PCI_ERS_RESULT_NONE) *res = rc;
+ edev->in_error = true;
+ pci_uevent_ers(pdev, rc);
+ return rc;
+}
- eeh_pcid_put(dev);
- return NULL;
+/**
+ * eeh_report_mmio_enabled - Tell drivers that MMIO has been enabled
+ * @edev: eeh device
+ * @driver: device's PCI driver
+ *
+ * Tells each device driver that IO ports, MMIO and config space I/O
+ * are now enabled.
+ */
+static enum pci_ers_result eeh_report_mmio_enabled(struct eeh_dev *edev,
+ struct pci_dev *pdev,
+ struct pci_driver *driver)
+{
+ if (!driver->err_handler->mmio_enabled)
+ return PCI_ERS_RESULT_NONE;
+ eeh_edev_info(edev, "Invoking %s->mmio_enabled()", driver->name);
+ return driver->err_handler->mmio_enabled(pdev);
}
/**
* eeh_report_reset - Tell device that slot has been reset
- * @data: eeh device
- * @userdata: return value
+ * @edev: eeh device
+ * @driver: device's PCI driver
*
* This routine must be called while EEH tries to reset particular
* PCI device so that the associated PCI device driver could take
* some actions, usually to save data the driver needs so that the
* driver can work again while the device is recovered.
*/
-static void *eeh_report_reset(void *data, void *userdata)
+static enum pci_ers_result eeh_report_reset(struct eeh_dev *edev,
+ struct pci_dev *pdev,
+ struct pci_driver *driver)
{
- struct eeh_dev *edev = (struct eeh_dev *)data;
- struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
- enum pci_ers_result rc, *res = userdata;
- struct pci_driver *driver;
+ if (!driver->err_handler->slot_reset || !edev->in_error)
+ return PCI_ERS_RESULT_NONE;
+ eeh_edev_info(edev, "Invoking %s->slot_reset()", driver->name);
+ return driver->err_handler->slot_reset(pdev);
+}
- if (!dev || eeh_dev_removed(edev))
- return NULL;
- dev->error_state = pci_channel_io_normal;
+static void eeh_dev_restore_state(struct eeh_dev *edev, void *userdata)
+{
+ struct pci_dev *pdev;
- driver = eeh_pcid_get(dev);
- if (!driver) return NULL;
+ if (!edev)
+ return;
- eeh_enable_irq(dev);
+ pci_lock_rescan_remove();
- if (!driver->err_handler ||
- !driver->err_handler->slot_reset ||
- (edev->mode & EEH_DEV_NO_HANDLER)) {
- eeh_pcid_put(dev);
- return NULL;
+ /*
+ * The content in the config space isn't saved because
+ * the blocked config space on some adapters. We have
+ * to restore the initial saved config space when the
+ * EEH device is created.
+ */
+ if (edev->pe && (edev->pe->state & EEH_PE_CFG_RESTRICTED)) {
+ if (list_is_last(&edev->entry, &edev->pe->edevs))
+ eeh_pe_restore_bars(edev->pe);
+
+ pci_unlock_rescan_remove();
+ return;
+ }
+
+ pdev = eeh_dev_to_pci_dev(edev);
+ if (!pdev) {
+ pci_unlock_rescan_remove();
+ return;
}
- rc = driver->err_handler->slot_reset(dev);
- if ((*res == PCI_ERS_RESULT_NONE) ||
- (*res == PCI_ERS_RESULT_RECOVERED)) *res = rc;
- if (*res == PCI_ERS_RESULT_DISCONNECT &&
- rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
+ pci_restore_state(pdev);
- eeh_pcid_put(dev);
- return NULL;
+ pci_unlock_rescan_remove();
}
/**
* eeh_report_resume - Tell device to resume normal operations
- * @data: eeh device
- * @userdata: return value
+ * @edev: eeh device
+ * @driver: device's PCI driver
*
* This routine must be called to notify the device driver that it
* could resume so that the device driver can do some initialization
* to make the recovered device work again.
*/
-static void *eeh_report_resume(void *data, void *userdata)
+static enum pci_ers_result eeh_report_resume(struct eeh_dev *edev,
+ struct pci_dev *pdev,
+ struct pci_driver *driver)
{
- struct eeh_dev *edev = (struct eeh_dev *)data;
- struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
- struct pci_driver *driver;
-
- if (!dev || eeh_dev_removed(edev))
- return NULL;
- dev->error_state = pci_channel_io_normal;
-
- driver = eeh_pcid_get(dev);
- if (!driver) return NULL;
+ if (!driver->err_handler->resume || !edev->in_error)
+ return PCI_ERS_RESULT_NONE;
- eeh_enable_irq(dev);
+ eeh_edev_info(edev, "Invoking %s->resume()", driver->name);
+ driver->err_handler->resume(pdev);
- if (!driver->err_handler ||
- !driver->err_handler->resume ||
- (edev->mode & EEH_DEV_NO_HANDLER)) {
- edev->mode &= ~EEH_DEV_NO_HANDLER;
- eeh_pcid_put(dev);
- return NULL;
- }
-
- driver->err_handler->resume(dev);
-
- eeh_pcid_put(dev);
- return NULL;
+ pci_uevent_ers(edev->pdev, PCI_ERS_RESULT_RECOVERED);
+#ifdef CONFIG_PCI_IOV
+ if (eeh_ops->notify_resume)
+ eeh_ops->notify_resume(edev);
+#endif
+ return PCI_ERS_RESULT_NONE;
}
/**
* eeh_report_failure - Tell device driver that device is dead.
- * @data: eeh device
- * @userdata: return value
+ * @edev: eeh device
+ * @driver: device's PCI driver
*
* This informs the device driver that the device is permanently
* dead, and that no further recovery attempts will be made on it.
*/
-static void *eeh_report_failure(void *data, void *userdata)
+static enum pci_ers_result eeh_report_failure(struct eeh_dev *edev,
+ struct pci_dev *pdev,
+ struct pci_driver *driver)
{
- struct eeh_dev *edev = (struct eeh_dev *)data;
- struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
- struct pci_driver *driver;
+ enum pci_ers_result rc;
- if (!dev || eeh_dev_removed(edev))
- return NULL;
- dev->error_state = pci_channel_io_perm_failure;
+ if (!driver->err_handler->error_detected)
+ return PCI_ERS_RESULT_NONE;
- driver = eeh_pcid_get(dev);
- if (!driver) return NULL;
+ eeh_edev_info(edev, "Invoking %s->error_detected(permanent failure)",
+ driver->name);
+ rc = driver->err_handler->error_detected(pdev,
+ pci_channel_io_perm_failure);
- eeh_disable_irq(dev);
+ pci_uevent_ers(pdev, PCI_ERS_RESULT_DISCONNECT);
+ return rc;
+}
- if (!driver->err_handler ||
- !driver->err_handler->error_detected) {
- eeh_pcid_put(dev);
+static void *eeh_add_virt_device(struct eeh_dev *edev)
+{
+ struct pci_driver *driver;
+ struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
+
+ if (!(edev->physfn)) {
+ eeh_edev_warn(edev, "Not for VF\n");
return NULL;
}
- driver->err_handler->error_detected(dev, pci_channel_io_perm_failure);
+ driver = eeh_pcid_get(dev);
+ if (driver) {
+ if (driver->err_handler) {
+ eeh_pcid_put(dev);
+ return NULL;
+ }
+ eeh_pcid_put(dev);
+ }
- eeh_pcid_put(dev);
+#ifdef CONFIG_PCI_IOV
+ pci_iov_add_virtfn(edev->physfn, edev->vf_index);
+#endif
return NULL;
}
-static void *eeh_rmv_device(void *data, void *userdata)
+static void eeh_rmv_device(struct eeh_dev *edev, void *userdata)
{
struct pci_driver *driver;
- struct eeh_dev *edev = (struct eeh_dev *)data;
struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
- int *removed = (int *)userdata;
+ struct eeh_rmv_data *rmv_data = (struct eeh_rmv_data *)userdata;
/*
* Actually, we should remove the PCI bridges as well.
@@ -390,43 +501,46 @@ static void *eeh_rmv_device(void *data, void *userdata)
* support EEH. So we just care about PCI devices for
* simplicity here.
*/
- if (!dev || (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE))
- return NULL;
-
- /*
- * We rely on count-based pcibios_release_device() to
- * detach permanently offlined PEs. Unfortunately, that's
- * not reliable enough. We might have the permanently
- * offlined PEs attached, but we needn't take care of
- * them and their child devices.
- */
- if (eeh_dev_removed(edev))
- return NULL;
+ if (!eeh_edev_actionable(edev) ||
+ (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE))
+ return;
- driver = eeh_pcid_get(dev);
- if (driver) {
- eeh_pcid_put(dev);
- if (driver->err_handler)
- return NULL;
+ if (rmv_data) {
+ driver = eeh_pcid_get(dev);
+ if (driver) {
+ if (driver->err_handler &&
+ driver->err_handler->error_detected &&
+ driver->err_handler->slot_reset) {
+ eeh_pcid_put(dev);
+ return;
+ }
+ eeh_pcid_put(dev);
+ }
}
/* Remove it from PCI subsystem */
- pr_debug("EEH: Removing %s without EEH sensitive driver\n",
- pci_name(dev));
- edev->bus = dev->bus;
+ pr_info("EEH: Removing %s without EEH sensitive driver\n",
+ pci_name(dev));
edev->mode |= EEH_DEV_DISCONNECTED;
- (*removed)++;
+ if (rmv_data)
+ rmv_data->removed_dev_count++;
- pci_lock_rescan_remove();
- pci_stop_and_remove_bus_device(dev);
- pci_unlock_rescan_remove();
-
- return NULL;
+ if (edev->physfn) {
+#ifdef CONFIG_PCI_IOV
+ pci_iov_remove_virtfn(edev->physfn, edev->vf_index);
+ edev->pdev = NULL;
+#endif
+ if (rmv_data)
+ list_add(&edev->rmv_entry, &rmv_data->removed_vf_list);
+ } else {
+ pci_lock_rescan_remove();
+ pci_stop_and_remove_bus_device(dev);
+ pci_unlock_rescan_remove();
+ }
}
-static void *eeh_pe_detach_dev(void *data, void *userdata)
+static void *eeh_pe_detach_dev(struct eeh_pe *pe, void *userdata)
{
- struct eeh_pe *pe = (struct eeh_pe *)data;
struct eeh_dev *edev, *tmp;
eeh_pe_for_each_dev(pe, edev, tmp) {
@@ -434,7 +548,7 @@ static void *eeh_pe_detach_dev(void *data, void *userdata)
continue;
edev->mode &= ~(EEH_DEV_DISCONNECTED | EEH_DEV_IRQ_DISABLED);
- eeh_rmv_from_parent_pe(edev);
+ eeh_pe_tree_remove(edev);
}
return NULL;
@@ -447,55 +561,84 @@ static void *eeh_pe_detach_dev(void *data, void *userdata)
* PE reset (for 3 times), we try to clear the frozen state
* for 3 times as well.
*/
-static void *__eeh_clear_pe_frozen_state(void *data, void *flag)
+static int eeh_clear_pe_frozen_state(struct eeh_pe *root, bool include_passed)
{
- struct eeh_pe *pe = (struct eeh_pe *)data;
- int i, rc;
+ struct eeh_pe *pe;
+ int i;
- for (i = 0; i < 3; i++) {
- rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
- if (rc)
- continue;
- rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
- if (!rc)
- break;
+ eeh_for_each_pe(root, pe) {
+ if (include_passed || !eeh_pe_passed(pe)) {
+ for (i = 0; i < 3; i++)
+ if (!eeh_unfreeze_pe(pe))
+ break;
+ if (i >= 3)
+ return -EIO;
+ }
}
+ eeh_pe_state_clear(root, EEH_PE_ISOLATED, include_passed);
+ return 0;
+}
- /* The PE has been isolated, clear it */
- if (rc) {
- pr_warn("%s: Can't clear frozen PHB#%x-PE#%x (%d)\n",
- __func__, pe->phb->global_number, pe->addr, rc);
- return (void *)pe;
+int eeh_pe_reset_and_recover(struct eeh_pe *pe)
+{
+ int ret;
+
+ /* Bail if the PE is being recovered */
+ if (pe->state & EEH_PE_RECOVERING)
+ return 0;
+
+ /* Put the PE into recovery mode */
+ eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
+
+ /* Save states */
+ eeh_pe_dev_traverse(pe, eeh_dev_save_state, NULL);
+
+ /* Issue reset */
+ ret = eeh_pe_reset_full(pe, true);
+ if (ret) {
+ eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true);
+ return ret;
}
- return NULL;
-}
+ /* Unfreeze the PE */
+ ret = eeh_clear_pe_frozen_state(pe, true);
+ if (ret) {
+ eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true);
+ return ret;
+ }
-static int eeh_clear_pe_frozen_state(struct eeh_pe *pe)
-{
- void *rc;
+ /* Restore device state */
+ eeh_pe_dev_traverse(pe, eeh_dev_restore_state, NULL);
- rc = eeh_pe_traverse(pe, __eeh_clear_pe_frozen_state, NULL);
- if (!rc)
- eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
+ /* Clear recovery mode */
+ eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true);
- return rc ? -EIO : 0;
+ return 0;
}
/**
* eeh_reset_device - Perform actual reset of a pci slot
+ * @driver_eeh_aware: Does the device's driver provide EEH support?
* @pe: EEH PE
* @bus: PCI bus corresponding to the isolcated slot
+ * @rmv_data: Optional, list to record removed devices
*
* This routine must be called to do reset on the indicated PE.
* During the reset, udev might be invoked because those affected
* PCI devices will be removed and then added.
*/
-static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
+static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
+ struct eeh_rmv_data *rmv_data,
+ bool driver_eeh_aware)
{
- struct pci_bus *frozen_bus = eeh_pe_bus_get(pe);
- struct timeval tstamp;
- int cnt, rc, removed = 0;
+ time64_t tstamp;
+ int cnt, rc;
+ struct eeh_dev *edev;
+ struct eeh_pe *tmp_pe;
+ bool any_passed = false;
+
+ eeh_for_each_pe(pe, tmp_pe)
+ any_passed |= eeh_pe_passed(tmp_pe);
/* pcibios will clear the counter; save the value */
cnt = pe->freeze_count;
@@ -505,15 +648,13 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
* We don't remove the corresponding PE instances because
* we need the information afterwords. The attached EEH
* devices are expected to be attached soon when calling
- * into pcibios_add_pci_devices().
+ * into pci_hp_add_devices().
*/
eeh_pe_state_mark(pe, EEH_PE_KEEP);
- if (bus) {
- pci_lock_rescan_remove();
- pcibios_remove_pci_devices(bus);
- pci_unlock_rescan_remove();
- } else if (frozen_bus) {
- eeh_pe_dev_traverse(pe, eeh_rmv_device, &removed);
+ if (any_passed || driver_eeh_aware || (pe->type & EEH_PE_VF)) {
+ eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data);
+ } else {
+ pci_hp_remove_devices(bus);
}
/*
@@ -525,24 +666,19 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
* config accesses. So we prefer to block them. However, controlled
* PCI config accesses initiated from EEH itself are allowed.
*/
- eeh_pe_state_mark(pe, EEH_PE_RESET);
- rc = eeh_reset_pe(pe);
- if (rc) {
- eeh_pe_state_clear(pe, EEH_PE_RESET);
+ rc = eeh_pe_reset_full(pe, false);
+ if (rc)
return rc;
- }
-
- pci_lock_rescan_remove();
/* Restore PE */
eeh_ops->configure_bridge(pe);
eeh_pe_restore_bars(pe);
- eeh_pe_state_clear(pe, EEH_PE_RESET);
/* Clear frozen state */
- rc = eeh_clear_pe_frozen_state(pe);
- if (rc)
+ rc = eeh_clear_pe_frozen_state(pe, false);
+ if (rc) {
return rc;
+ }
/* Give the system 5 seconds to finish running the user-space
* hotplug shutdown scripts, e.g. ifdown for ethernet. Yes,
@@ -550,8 +686,9 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
* the device up before the scripts have taken it down,
* potentially weird things happen.
*/
- if (bus) {
- pr_info("EEH: Sleep 5s ahead of complete hotplug\n");
+ if (!driver_eeh_aware || rmv_data->removed_dev_count) {
+ pr_info("EEH: Sleep 5s ahead of %s hotplug\n",
+ (driver_eeh_aware ? "partial" : "complete"));
ssleep(5);
/*
@@ -559,21 +696,21 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
* PE. We should disconnect it so the binding can be
* rebuilt when adding PCI devices.
*/
+ edev = list_first_entry(&pe->edevs, struct eeh_dev, entry);
eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
- pcibios_add_pci_devices(bus);
- } else if (frozen_bus && removed) {
- pr_info("EEH: Sleep 5s ahead of partial hotplug\n");
- ssleep(5);
-
- eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
- pcibios_add_pci_devices(frozen_bus);
+ if (pe->type & EEH_PE_VF) {
+ eeh_add_virt_device(edev);
+ } else {
+ if (!driver_eeh_aware)
+ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true);
+ pci_hp_add_devices(bus);
+ }
}
- eeh_pe_state_clear(pe, EEH_PE_KEEP);
+ eeh_pe_state_clear(pe, EEH_PE_KEEP, true);
pe->tstamp = tstamp;
pe->freeze_count = cnt;
- pci_unlock_rescan_remove();
return 0;
}
@@ -582,42 +719,252 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
*/
#define MAX_WAIT_FOR_RECOVERY 300
-static void eeh_handle_normal_event(struct eeh_pe *pe)
+
+/* Walks the PE tree after processing an event to remove any stale PEs.
+ *
+ * NB: This needs to be recursive to ensure the leaf PEs get removed
+ * before their parents do. Although this is possible to do recursively
+ * we don't since this is easier to read and we need to garantee
+ * the leaf nodes will be handled first.
+ */
+static void eeh_pe_cleanup(struct eeh_pe *pe)
+{
+ struct eeh_pe *child_pe, *tmp;
+
+ list_for_each_entry_safe(child_pe, tmp, &pe->child_list, child)
+ eeh_pe_cleanup(child_pe);
+
+ if (pe->state & EEH_PE_KEEP)
+ return;
+
+ if (!(pe->state & EEH_PE_INVALID))
+ return;
+
+ if (list_empty(&pe->edevs) && list_empty(&pe->child_list)) {
+ list_del(&pe->child);
+ kfree(pe);
+ }
+}
+
+/**
+ * eeh_check_slot_presence - Check if a device is still present in a slot
+ * @pdev: pci_dev to check
+ *
+ * This function may return a false positive if we can't determine the slot's
+ * presence state. This might happen for PCIe slots if the PE containing
+ * the upstream bridge is also frozen, or the bridge is part of the same PE
+ * as the device.
+ *
+ * This shouldn't happen often, but you might see it if you hotplug a PCIe
+ * switch.
+ */
+static bool eeh_slot_presence_check(struct pci_dev *pdev)
+{
+ const struct hotplug_slot_ops *ops;
+ struct pci_slot *slot;
+ u8 state;
+ int rc;
+
+ if (!pdev)
+ return false;
+
+ if (pdev->error_state == pci_channel_io_perm_failure)
+ return false;
+
+ slot = pdev->slot;
+ if (!slot || !slot->hotplug)
+ return true;
+
+ ops = slot->hotplug->ops;
+ if (!ops || !ops->get_adapter_status)
+ return true;
+
+ /* set the attention indicator while we've got the slot ops */
+ if (ops->set_attention_status)
+ ops->set_attention_status(slot->hotplug, 1);
+
+ rc = ops->get_adapter_status(slot->hotplug, &state);
+ if (rc)
+ return true;
+
+ return !!state;
+}
+
+static void eeh_clear_slot_attention(struct pci_dev *pdev)
+{
+ const struct hotplug_slot_ops *ops;
+ struct pci_slot *slot;
+
+ if (!pdev)
+ return;
+
+ if (pdev->error_state == pci_channel_io_perm_failure)
+ return;
+
+ slot = pdev->slot;
+ if (!slot || !slot->hotplug)
+ return;
+
+ ops = slot->hotplug->ops;
+ if (!ops || !ops->set_attention_status)
+ return;
+
+ ops->set_attention_status(slot->hotplug, 0);
+}
+
+/**
+ * eeh_handle_normal_event - Handle EEH events on a specific PE
+ * @pe: EEH PE - which should not be used after we return, as it may
+ * have been invalidated.
+ *
+ * Attempts to recover the given PE. If recovery fails or the PE has failed
+ * too many times, remove the PE.
+ *
+ * While PHB detects address or data parity errors on particular PCI
+ * slot, the associated PE will be frozen. Besides, DMA's occurring
+ * to wild addresses (which usually happen due to bugs in device
+ * drivers or in PCI adapter firmware) can cause EEH error. #SERR,
+ * #PERR or other misc PCI-related errors also can trigger EEH errors.
+ *
+ * Recovery process consists of unplugging the device driver (which
+ * generated hotplug events to userspace), then issuing a PCI #RST to
+ * the device, then reconfiguring the PCI config space for all bridges
+ * & devices under this slot, and then finally restarting the device
+ * drivers (which cause a second set of hotplug events to go out to
+ * userspace).
+ */
+void eeh_handle_normal_event(struct eeh_pe *pe)
{
- struct pci_bus *frozen_bus;
+ struct pci_bus *bus;
+ struct eeh_dev *edev, *tmp;
+ struct eeh_pe *tmp_pe;
int rc = 0;
enum pci_ers_result result = PCI_ERS_RESULT_NONE;
+ struct eeh_rmv_data rmv_data =
+ {LIST_HEAD_INIT(rmv_data.removed_vf_list), 0};
+ int devices = 0;
- frozen_bus = eeh_pe_bus_get(pe);
- if (!frozen_bus) {
- pr_err("%s: Cannot find PCI bus for PHB#%d-PE#%x\n",
+ pci_lock_rescan_remove();
+
+ bus = eeh_pe_bus_get(pe);
+ if (!bus) {
+ pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n",
__func__, pe->phb->global_number, pe->addr);
+ pci_unlock_rescan_remove();
return;
}
+ /*
+ * When devices are hot-removed we might get an EEH due to
+ * a driver attempting to touch the MMIO space of a removed
+ * device. In this case we don't have a device to recover
+ * so suppress the event if we can't find any present devices.
+ *
+ * The hotplug driver should take care of tearing down the
+ * device itself.
+ */
+ eeh_for_each_pe(pe, tmp_pe)
+ eeh_pe_for_each_dev(tmp_pe, edev, tmp)
+ if (eeh_slot_presence_check(edev->pdev))
+ devices++;
+
+ if (!devices) {
+ pr_warn("EEH: Frozen PHB#%x-PE#%x is empty!\n",
+ pe->phb->global_number, pe->addr);
+ /*
+ * The device is removed, tear down its state, on powernv
+ * hotplug driver would take care of it but not on pseries,
+ * permanently disable the card as it is hot removed.
+ *
+ * In the case of powernv, note that the removal of device
+ * is covered by pci rescan lock, so no problem even if hotplug
+ * driver attempts to remove the device.
+ */
+ goto recover_failed;
+ }
+
+ /* Log the event */
+ if (pe->type & EEH_PE_PHB) {
+ pr_err("EEH: Recovering PHB#%x, location: %s\n",
+ pe->phb->global_number, eeh_pe_loc_get(pe));
+ } else {
+ struct eeh_pe *phb_pe = eeh_phb_pe_get(pe->phb);
+
+ pr_err("EEH: Recovering PHB#%x-PE#%x\n",
+ pe->phb->global_number, pe->addr);
+ pr_err("EEH: PE location: %s, PHB location: %s\n",
+ eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe));
+ }
+
+#ifdef CONFIG_STACKTRACE
+ /*
+ * Print the saved stack trace now that we've verified there's
+ * something to recover.
+ */
+ if (pe->trace_entries) {
+ void **ptrs = (void **) pe->stack_trace;
+ int i;
+
+ pr_err("EEH: Frozen PHB#%x-PE#%x detected\n",
+ pe->phb->global_number, pe->addr);
+
+ /* FIXME: Use the same format as dump_stack() */
+ pr_err("EEH: Call Trace:\n");
+ for (i = 0; i < pe->trace_entries; i++)
+ pr_err("EEH: [%p] %pS\n", ptrs[i], ptrs[i]);
+
+ pe->trace_entries = 0;
+ }
+#endif /* CONFIG_STACKTRACE */
+
+ eeh_for_each_pe(pe, tmp_pe)
+ eeh_pe_for_each_dev(tmp_pe, edev, tmp)
+ edev->mode &= ~EEH_DEV_NO_HANDLER;
+
eeh_pe_update_time_stamp(pe);
pe->freeze_count++;
- if (pe->freeze_count > EEH_MAX_ALLOWED_FREEZES)
- goto excess_failures;
- pr_warn("EEH: This PCI device has failed %d times in the last hour\n",
- pe->freeze_count);
+ if (pe->freeze_count > eeh_max_freezes) {
+ pr_err("EEH: PHB#%x-PE#%x has failed %d times in the last hour and has been permanently disabled.\n",
+ pe->phb->global_number, pe->addr,
+ pe->freeze_count);
+
+ goto recover_failed;
+ }
/* Walk the various device drivers attached to this slot through
* a reset sequence, giving each an opportunity to do what it needs
* to accomplish the reset. Each child gets a report of the
* status ... if any child can't handle the reset, then the entire
* slot is dlpar removed and added.
+ *
+ * When the PHB is fenced, we have to issue a reset to recover from
+ * the error. Override the result if necessary to have partially
+ * hotplug for this case.
*/
+ pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n",
+ pe->freeze_count, eeh_max_freezes);
pr_info("EEH: Notify device drivers to shutdown\n");
- eeh_pe_dev_traverse(pe, eeh_report_error, &result);
+ eeh_set_channel_state(pe, pci_channel_io_frozen);
+ eeh_set_irq_state(pe, false);
+ eeh_pe_report("error_detected(IO frozen)", pe,
+ eeh_report_error, &result);
+ if (result == PCI_ERS_RESULT_DISCONNECT)
+ goto recover_failed;
+
+ /*
+ * Error logged on a PHB are always fences which need a full
+ * PHB reset to clear so force that to happen.
+ */
+ if ((pe->type & EEH_PE_PHB) && result != PCI_ERS_RESULT_NONE)
+ result = PCI_ERS_RESULT_NEED_RESET;
/* Get the current PCI slot state. This can take a long time,
- * sometimes over 3 seconds for certain systems.
+ * sometimes over 300 seconds for certain systems.
*/
- rc = eeh_ops->wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000);
+ rc = eeh_wait_state(pe, MAX_WAIT_FOR_RECOVERY * 1000);
if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
pr_warn("EEH: Permanent failure\n");
- goto hard_fail;
+ goto recover_failed;
}
/* Since rtas may enable MMIO when posting the error log,
@@ -633,11 +980,10 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
*/
if (result == PCI_ERS_RESULT_NONE) {
pr_info("EEH: Reset with hotplug activity\n");
- rc = eeh_reset_device(pe, frozen_bus);
+ rc = eeh_reset_device(pe, bus, NULL, false);
if (rc) {
- pr_warn("%s: Unable to reset, err=%d\n",
- __func__, rc);
- goto hard_fail;
+ pr_warn("%s: Unable to reset, err=%d\n", __func__, rc);
+ goto recover_failed;
}
}
@@ -645,24 +991,23 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
if (result == PCI_ERS_RESULT_CAN_RECOVER) {
pr_info("EEH: Enable I/O for affected devices\n");
rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
-
if (rc < 0)
- goto hard_fail;
+ goto recover_failed;
+
if (rc) {
result = PCI_ERS_RESULT_NEED_RESET;
} else {
pr_info("EEH: Notify device drivers to resume I/O\n");
- eeh_pe_dev_traverse(pe, eeh_report_mmio_enabled, &result);
+ eeh_pe_report("mmio_enabled", pe,
+ eeh_report_mmio_enabled, &result);
}
}
-
- /* If all devices reported they can proceed, then re-enable DMA */
if (result == PCI_ERS_RESULT_CAN_RECOVER) {
pr_info("EEH: Enabled DMA for affected devices\n");
rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
-
if (rc < 0)
- goto hard_fail;
+ goto recover_failed;
+
if (rc) {
result = PCI_ERS_RESULT_NEED_RESET;
} else {
@@ -671,95 +1016,134 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
* is still in frozen state. Clear it before
* resuming the PE.
*/
- eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
+ eeh_pe_state_clear(pe, EEH_PE_ISOLATED, true);
result = PCI_ERS_RESULT_RECOVERED;
}
}
- /* If any device has a hard failure, then shut off everything. */
- if (result == PCI_ERS_RESULT_DISCONNECT) {
- pr_warn("EEH: Device driver gave up\n");
- goto hard_fail;
- }
-
/* If any device called out for a reset, then reset the slot */
if (result == PCI_ERS_RESULT_NEED_RESET) {
pr_info("EEH: Reset without hotplug activity\n");
- rc = eeh_reset_device(pe, NULL);
+ rc = eeh_reset_device(pe, bus, &rmv_data, true);
if (rc) {
- pr_warn("%s: Cannot reset, err=%d\n",
- __func__, rc);
- goto hard_fail;
+ pr_warn("%s: Cannot reset, err=%d\n", __func__, rc);
+ goto recover_failed;
}
- pr_info("EEH: Notify device drivers "
- "the completion of reset\n");
result = PCI_ERS_RESULT_NONE;
- eeh_pe_dev_traverse(pe, eeh_report_reset, &result);
+ eeh_set_channel_state(pe, pci_channel_io_normal);
+ eeh_set_irq_state(pe, true);
+ eeh_pe_report("slot_reset", pe, eeh_report_reset,
+ &result);
}
- /* All devices should claim they have recovered by now. */
- if ((result != PCI_ERS_RESULT_RECOVERED) &&
- (result != PCI_ERS_RESULT_NONE)) {
- pr_warn("EEH: Not recovered\n");
- goto hard_fail;
- }
+ if ((result == PCI_ERS_RESULT_RECOVERED) ||
+ (result == PCI_ERS_RESULT_NONE)) {
+ /*
+ * For those hot removed VFs, we should add back them after PF
+ * get recovered properly.
+ */
+ list_for_each_entry_safe(edev, tmp, &rmv_data.removed_vf_list,
+ rmv_entry) {
+ eeh_add_virt_device(edev);
+ list_del(&edev->rmv_entry);
+ }
- /* Tell all device drivers that they can resume operations */
- pr_info("EEH: Notify device driver to resume\n");
- eeh_pe_dev_traverse(pe, eeh_report_resume, NULL);
+ /* Tell all device drivers that they can resume operations */
+ pr_info("EEH: Notify device driver to resume\n");
+ eeh_set_channel_state(pe, pci_channel_io_normal);
+ eeh_set_irq_state(pe, true);
+ eeh_pe_report("resume", pe, eeh_report_resume, NULL);
+ eeh_for_each_pe(pe, tmp_pe) {
+ eeh_pe_for_each_dev(tmp_pe, edev, tmp) {
+ edev->mode &= ~EEH_DEV_NO_HANDLER;
+ edev->in_error = false;
+ }
+ }
- return;
+ pr_info("EEH: Recovery successful.\n");
+ goto out;
+ }
-excess_failures:
+recover_failed:
/*
* About 90% of all real-life EEH failures in the field
* are due to poorly seated PCI cards. Only 10% or so are
* due to actual, failed cards.
*/
- pr_err("EEH: PHB#%d-PE#%x has failed %d times in the\n"
- "last hour and has been permanently disabled.\n"
- "Please try reseating or replacing it.\n",
- pe->phb->global_number, pe->addr,
- pe->freeze_count);
- goto perm_error;
-
-hard_fail:
- pr_err("EEH: Unable to recover from failure from PHB#%d-PE#%x.\n"
- "Please try reseating or replacing it\n",
+ pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n"
+ "Please try reseating or replacing it\n",
pe->phb->global_number, pe->addr);
-perm_error:
eeh_slot_error_detail(pe, EEH_LOG_PERM);
/* Notify all devices that they're about to go down. */
- eeh_pe_dev_traverse(pe, eeh_report_failure, NULL);
+ eeh_set_irq_state(pe, false);
+ eeh_pe_report("error_detected(permanent failure)", pe,
+ eeh_report_failure, NULL);
+ eeh_set_channel_state(pe, pci_channel_io_perm_failure);
/* Mark the PE to be removed permanently */
- pe->freeze_count = EEH_MAX_ALLOWED_FREEZES + 1;
+ eeh_pe_state_mark(pe, EEH_PE_REMOVED);
/*
* Shut down the device drivers for good. We mark
* all removed devices correctly to avoid access
* the their PCI config any more.
*/
- if (frozen_bus) {
+ if (pe->type & EEH_PE_VF) {
+ eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
+ eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
+ } else {
+ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true);
eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
- pci_lock_rescan_remove();
- pcibios_remove_pci_devices(frozen_bus);
+ bus = eeh_pe_bus_get(pe);
+ if (bus)
+ pci_hp_remove_devices(bus);
+ else
+ pr_err("%s: PCI bus for PHB#%x-PE#%x disappeared\n",
+ __func__, pe->phb->global_number, pe->addr);
+
+ /* The passed PE should no longer be used */
pci_unlock_rescan_remove();
+ return;
}
+
+out:
+ /*
+ * Clean up any PEs without devices. While marked as EEH_PE_RECOVERYING
+ * we don't want to modify the PE tree structure so we do it here.
+ */
+ eeh_pe_cleanup(pe);
+
+ /* clear the slot attention LED for all recovered devices */
+ eeh_for_each_pe(pe, tmp_pe)
+ eeh_pe_for_each_dev(tmp_pe, edev, tmp)
+ eeh_clear_slot_attention(edev->pdev);
+
+ eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true);
+
+ pci_unlock_rescan_remove();
}
-static void eeh_handle_special_event(void)
+/**
+ * eeh_handle_special_event - Handle EEH events without a specific failing PE
+ *
+ * Called when an EEH event is detected but can't be narrowed down to a
+ * specific PE. Iterates through possible failures and handles them as
+ * necessary.
+ */
+void eeh_handle_special_event(void)
{
- struct eeh_pe *pe, *phb_pe;
+ struct eeh_pe *pe, *phb_pe, *tmp_pe;
+ struct eeh_dev *edev, *tmp_edev;
struct pci_bus *bus;
struct pci_controller *hose;
unsigned long flags;
int rc;
+ pci_lock_rescan_remove();
do {
rc = eeh_ops->next_error(&pe);
@@ -776,7 +1160,7 @@ static void eeh_handle_special_event(void)
phb_pe = eeh_phb_pe_get(hose);
if (!phb_pe) continue;
- eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED);
+ eeh_pe_mark_isolated(phb_pe);
}
eeh_serialize_unlock(flags);
@@ -791,20 +1175,20 @@ static void eeh_handle_special_event(void)
/* Purge all events of the PHB */
eeh_remove_event(pe, true);
- if (rc == EEH_NEXT_ERR_DEAD_PHB)
- eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
- else
- eeh_pe_state_mark(pe,
- EEH_PE_ISOLATED | EEH_PE_RECOVERING);
+ if (rc != EEH_NEXT_ERR_DEAD_PHB)
+ eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
+ eeh_pe_mark_isolated(pe);
eeh_serialize_unlock(flags);
break;
case EEH_NEXT_ERR_NONE:
+ pci_unlock_rescan_remove();
return;
default:
pr_warn("%s: Invalid value %d from next_error()\n",
__func__, rc);
+ pci_unlock_rescan_remove();
return;
}
@@ -815,10 +1199,22 @@ static void eeh_handle_special_event(void)
*/
if (rc == EEH_NEXT_ERR_FROZEN_PE ||
rc == EEH_NEXT_ERR_FENCED_PHB) {
+ eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
+ pci_unlock_rescan_remove();
eeh_handle_normal_event(pe);
- eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
- } else {
pci_lock_rescan_remove();
+ } else {
+ eeh_for_each_pe(pe, tmp_pe)
+ eeh_pe_for_each_dev(tmp_pe, edev, tmp_edev)
+ edev->mode &= ~EEH_DEV_NO_HANDLER;
+
+ /* Notify all devices to be down */
+ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true);
+ eeh_pe_report(
+ "error_detected(permanent failure)", pe,
+ eeh_report_failure, NULL);
+ eeh_set_channel_state(pe, pci_channel_io_perm_failure);
+
list_for_each_entry(hose, &hose_list, list_node) {
phb_pe = eeh_phb_pe_get(hose);
if (!phb_pe ||
@@ -826,13 +1222,17 @@ static void eeh_handle_special_event(void)
(phb_pe->state & EEH_PE_RECOVERING))
continue;
- /* Notify all devices to be down */
bus = eeh_pe_bus_get(phb_pe);
- eeh_pe_dev_traverse(pe,
- eeh_report_failure, NULL);
- pcibios_remove_pci_devices(bus);
+ if (!bus) {
+ pr_err("%s: Cannot find PCI bus for "
+ "PHB#%x-PE#%x\n",
+ __func__,
+ pe->phb->global_number,
+ pe->addr);
+ break;
+ }
+ pci_hp_remove_devices(bus);
}
- pci_unlock_rescan_remove();
}
/*
@@ -842,29 +1242,6 @@ static void eeh_handle_special_event(void)
if (rc == EEH_NEXT_ERR_DEAD_IOC)
break;
} while (rc != EEH_NEXT_ERR_NONE);
-}
-/**
- * eeh_handle_event - Reset a PCI device after hard lockup.
- * @pe: EEH PE
- *
- * While PHB detects address or data parity errors on particular PCI
- * slot, the associated PE will be frozen. Besides, DMA's occurring
- * to wild addresses (which usually happen due to bugs in device
- * drivers or in PCI adapter firmware) can cause EEH error. #SERR,
- * #PERR or other misc PCI-related errors also can trigger EEH errors.
- *
- * Recovery process consists of unplugging the device driver (which
- * generated hotplug events to userspace), then issuing a PCI #RST to
- * the device, then reconfiguring the PCI config space for all bridges
- * & devices under this slot, and then finally restarting the device
- * drivers (which cause a second set of hotplug events to go out to
- * userspace).
- */
-void eeh_handle_event(struct eeh_pe *pe)
-{
- if (pe)
- eeh_handle_normal_event(pe);
- else
- eeh_handle_special_event();
+ pci_unlock_rescan_remove();
}
diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c
index 4eefb6e34dbb..c23a454af08a 100644
--- a/arch/powerpc/kernel/eeh_event.c
+++ b/arch/powerpc/kernel/eeh_event.c
@@ -1,17 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* Copyright (c) 2005 Linas Vepstas <linas@linas.org>
*/
@@ -35,8 +23,8 @@
*/
static DEFINE_SPINLOCK(eeh_eventlist_lock);
-static struct semaphore eeh_eventlist_sem;
-LIST_HEAD(eeh_eventlist);
+static DECLARE_COMPLETION(eeh_eventlist_event);
+static LIST_HEAD(eeh_eventlist);
/**
* eeh_event_handler - Dispatch EEH events.
@@ -52,10 +40,9 @@ static int eeh_event_handler(void * dummy)
{
unsigned long flags;
struct eeh_event *event;
- struct eeh_pe *pe;
while (!kthread_should_stop()) {
- if (down_interruptible(&eeh_eventlist_sem))
+ if (wait_for_completion_interruptible(&eeh_eventlist_event))
break;
/* Fetch EEH event from the queue */
@@ -71,21 +58,10 @@ static int eeh_event_handler(void * dummy)
continue;
/* We might have event without binding PE */
- pe = event->pe;
- if (pe) {
- eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
- if (pe->type & EEH_PE_PHB)
- pr_info("EEH: Detected error on PHB#%d\n",
- pe->phb->global_number);
- else
- pr_info("EEH: Detected PCI bus error on "
- "PHB#%d-PE#%x\n",
- pe->phb->global_number, pe->addr);
- eeh_handle_event(pe);
- eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
- } else {
- eeh_handle_event(NULL);
- }
+ if (event->pe)
+ eeh_handle_normal_event(event->pe);
+ else
+ eeh_handle_special_event();
kfree(event);
}
@@ -104,9 +80,6 @@ int eeh_event_init(void)
struct task_struct *t;
int ret = 0;
- /* Initialize semaphore */
- sema_init(&eeh_eventlist_sem, 0);
-
t = kthread_run(eeh_event_handler, NULL, "eehd");
if (IS_ERR(t)) {
ret = PTR_ERR(t);
@@ -126,7 +99,7 @@ int eeh_event_init(void)
* the actual event will be delivered in a normal context
* (from a workqueue).
*/
-int eeh_send_failure_event(struct eeh_pe *pe)
+int __eeh_send_failure_event(struct eeh_pe *pe)
{
unsigned long flags;
struct eeh_event *event;
@@ -138,17 +111,49 @@ int eeh_send_failure_event(struct eeh_pe *pe)
}
event->pe = pe;
+ /*
+ * Mark the PE as recovering before inserting it in the queue.
+ * This prevents the PE from being free()ed by a hotplug driver
+ * while the PE is sitting in the event queue.
+ */
+ if (pe) {
+#ifdef CONFIG_STACKTRACE
+ /*
+ * Save the current stack trace so we can dump it from the
+ * event handler thread.
+ */
+ pe->trace_entries = stack_trace_save(pe->stack_trace,
+ ARRAY_SIZE(pe->stack_trace), 0);
+#endif /* CONFIG_STACKTRACE */
+
+ eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
+ }
+
/* We may or may not be called in an interrupt context */
spin_lock_irqsave(&eeh_eventlist_lock, flags);
list_add(&event->list, &eeh_eventlist);
spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
/* For EEH deamon to knick in */
- up(&eeh_eventlist_sem);
+ complete(&eeh_eventlist_event);
return 0;
}
+int eeh_send_failure_event(struct eeh_pe *pe)
+{
+ /*
+ * If we've manually suppressed recovery events via debugfs
+ * then just drop it on the floor.
+ */
+ if (eeh_debugfs_no_recover) {
+ pr_err("EEH: Event dropped due to no_recover setting\n");
+ return 0;
+ }
+
+ return __eeh_send_failure_event(pe);
+}
+
/**
* eeh_remove_event - Remove EEH event from the queue
* @pe: Event binding to the PE
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 00e3844525a6..e740101fadf3 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* The file intends to implement PE based on the information from
* platforms. Basically, there have 3 types of PEs: PHB/Bus/Device.
@@ -6,26 +7,13 @@
* PE is only meaningful in one PHB domain.
*
* Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2012.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/delay.h>
#include <linux/export.h>
#include <linux/gfp.h>
#include <linux/kernel.h>
+#include <linux/of.h>
#include <linux/pci.h>
#include <linux/string.h>
@@ -36,10 +24,10 @@ static int eeh_pe_aux_size = 0;
static LIST_HEAD(eeh_phb_pe);
/**
- * eeh_set_pe_aux_size - Set PE auxillary data size
- * @size: PE auxillary data size
+ * eeh_set_pe_aux_size - Set PE auxiliary data size
+ * @size: PE auxiliary data size in bytes
*
- * Set PE auxillary data size
+ * Set PE auxiliary data size.
*/
void eeh_set_pe_aux_size(int size)
{
@@ -75,7 +63,6 @@ static struct eeh_pe *eeh_pe_alloc(struct pci_controller *phb, int type)
pe->type = type;
pe->phb = phb;
INIT_LIST_HEAD(&pe->child_list);
- INIT_LIST_HEAD(&pe->child);
INIT_LIST_HEAD(&pe->edevs);
pe->data = (void *)pe + ALIGN(sizeof(struct eeh_pe),
@@ -104,12 +91,63 @@ int eeh_phb_pe_create(struct pci_controller *phb)
/* Put it into the list */
list_add_tail(&pe->child, &eeh_phb_pe);
- pr_debug("EEH: Add PE for PHB#%d\n", phb->global_number);
+ pr_debug("EEH: Add PE for PHB#%x\n", phb->global_number);
return 0;
}
/**
+ * eeh_wait_state - Wait for PE state
+ * @pe: EEH PE
+ * @max_wait: maximal period in millisecond
+ *
+ * Wait for the state of associated PE. It might take some time
+ * to retrieve the PE's state.
+ */
+int eeh_wait_state(struct eeh_pe *pe, int max_wait)
+{
+ int ret;
+ int mwait;
+
+ /*
+ * According to PAPR, the state of PE might be temporarily
+ * unavailable. Under the circumstance, we have to wait
+ * for indicated time determined by firmware. The maximal
+ * wait time is 5 minutes, which is acquired from the original
+ * EEH implementation. Also, the original implementation
+ * also defined the minimal wait time as 1 second.
+ */
+#define EEH_STATE_MIN_WAIT_TIME (1000)
+#define EEH_STATE_MAX_WAIT_TIME (300 * 1000)
+
+ while (1) {
+ ret = eeh_ops->get_state(pe, &mwait);
+
+ if (ret != EEH_STATE_UNAVAILABLE)
+ return ret;
+
+ if (max_wait <= 0) {
+ pr_warn("%s: Timeout when getting PE's state (%d)\n",
+ __func__, max_wait);
+ return EEH_STATE_NOT_SUPPORT;
+ }
+
+ if (mwait < EEH_STATE_MIN_WAIT_TIME) {
+ pr_warn("%s: Firmware returned bad wait value %d\n",
+ __func__, mwait);
+ mwait = EEH_STATE_MIN_WAIT_TIME;
+ } else if (mwait > EEH_STATE_MAX_WAIT_TIME) {
+ pr_warn("%s: Firmware returned too long wait value %d\n",
+ __func__, mwait);
+ mwait = EEH_STATE_MAX_WAIT_TIME;
+ }
+
+ msleep(min(mwait, max_wait));
+ max_wait -= mwait;
+ }
+}
+
+/**
* eeh_phb_pe_get - Retrieve PHB PE based on the given PHB
* @phb: PCI controller
*
@@ -142,8 +180,7 @@ struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb)
* The function is used to retrieve the next PE in the
* hierarchy PE tree.
*/
-static struct eeh_pe *eeh_pe_next(struct eeh_pe *pe,
- struct eeh_pe *root)
+struct eeh_pe *eeh_pe_next(struct eeh_pe *pe, struct eeh_pe *root)
{
struct list_head *next = pe->child_list.next;
@@ -173,12 +210,12 @@ static struct eeh_pe *eeh_pe_next(struct eeh_pe *pe,
* to be traversed.
*/
void *eeh_pe_traverse(struct eeh_pe *root,
- eeh_traverse_func fn, void *flag)
+ eeh_pe_traverse_func fn, void *flag)
{
struct eeh_pe *pe;
void *ret;
- for (pe = root; pe; pe = eeh_pe_next(pe, root)) {
+ eeh_for_each_pe(root, pe) {
ret = fn(pe, flag);
if (ret) return ret;
}
@@ -195,58 +232,41 @@ void *eeh_pe_traverse(struct eeh_pe *root,
* The function is used to traverse the devices of the specified
* PE and its child PEs.
*/
-void *eeh_pe_dev_traverse(struct eeh_pe *root,
- eeh_traverse_func fn, void *flag)
+void eeh_pe_dev_traverse(struct eeh_pe *root,
+ eeh_edev_traverse_func fn, void *flag)
{
struct eeh_pe *pe;
struct eeh_dev *edev, *tmp;
- void *ret;
if (!root) {
pr_warn("%s: Invalid PE %p\n",
__func__, root);
- return NULL;
+ return;
}
/* Traverse root PE */
- for (pe = root; pe; pe = eeh_pe_next(pe, root)) {
- eeh_pe_for_each_dev(pe, edev, tmp) {
- ret = fn(edev, flag);
- if (ret)
- return ret;
- }
- }
-
- return NULL;
+ eeh_for_each_pe(root, pe)
+ eeh_pe_for_each_dev(pe, edev, tmp)
+ fn(edev, flag);
}
/**
* __eeh_pe_get - Check the PE address
- * @data: EEH PE
- * @flag: EEH device
*
* For one particular PE, it can be identified by PE address
* or tranditional BDF address. BDF address is composed of
* Bus/Device/Function number. The extra data referred by flag
* indicates which type of address should be used.
*/
-static void *__eeh_pe_get(void *data, void *flag)
+static void *__eeh_pe_get(struct eeh_pe *pe, void *flag)
{
- struct eeh_pe *pe = (struct eeh_pe *)data;
- struct eeh_dev *edev = (struct eeh_dev *)flag;
+ int *target_pe = flag;
- /* Unexpected PHB PE */
+ /* PHB PEs are special and should be ignored */
if (pe->type & EEH_PE_PHB)
return NULL;
- /* We prefer PE address */
- if (edev->pe_config_addr &&
- (edev->pe_config_addr == pe->addr))
- return pe;
-
- /* Try BDF address */
- if (edev->config_addr &&
- (edev->config_addr == pe->config_addr))
+ if (*target_pe == pe->addr)
return pe;
return NULL;
@@ -254,7 +274,8 @@ static void *__eeh_pe_get(void *data, void *flag)
/**
* eeh_pe_get - Search PE based on the given address
- * @edev: EEH device
+ * @phb: PCI controller
+ * @pe_no: PE number
*
* Search the corresponding PE based on the specified address which
* is included in the eeh device. The function is used to check if
@@ -263,63 +284,29 @@ static void *__eeh_pe_get(void *data, void *flag)
* which is composed of PCI bus/device/function number, or unified
* PE address.
*/
-struct eeh_pe *eeh_pe_get(struct eeh_dev *edev)
+struct eeh_pe *eeh_pe_get(struct pci_controller *phb, int pe_no)
{
- struct eeh_pe *root = eeh_phb_pe_get(edev->phb);
- struct eeh_pe *pe;
-
- pe = eeh_pe_traverse(root, __eeh_pe_get, edev);
+ struct eeh_pe *root = eeh_phb_pe_get(phb);
- return pe;
+ return eeh_pe_traverse(root, __eeh_pe_get, &pe_no);
}
/**
- * eeh_pe_get_parent - Retrieve the parent PE
+ * eeh_pe_tree_insert - Add EEH device to parent PE
* @edev: EEH device
+ * @new_pe_parent: PE to create additional PEs under
*
- * The whole PEs existing in the system are organized as hierarchy
- * tree. The function is used to retrieve the parent PE according
- * to the parent EEH device.
- */
-static struct eeh_pe *eeh_pe_get_parent(struct eeh_dev *edev)
-{
- struct device_node *dn;
- struct eeh_dev *parent;
-
- /*
- * It might have the case for the indirect parent
- * EEH device already having associated PE, but
- * the direct parent EEH device doesn't have yet.
- */
- dn = edev->dn->parent;
- while (dn) {
- /* We're poking out of PCI territory */
- if (!PCI_DN(dn)) return NULL;
-
- parent = of_node_to_eeh_dev(dn);
- /* We're poking out of PCI territory */
- if (!parent) return NULL;
-
- if (parent->pe)
- return parent->pe;
-
- dn = dn->parent;
- }
-
- return NULL;
-}
-
-/**
- * eeh_add_to_parent_pe - Add EEH device to parent PE
- * @edev: EEH device
+ * Add EEH device to the PE in edev->pe_config_addr. If a PE already
+ * exists with that address then @edev is added to that PE. Otherwise
+ * a new PE is created and inserted into the PE tree as a child of
+ * @new_pe_parent.
*
- * Add EEH device to the parent PE. If the parent PE already
- * exists, the PE type will be changed to EEH_PE_BUS. Otherwise,
- * we have to create new PE to hold the EEH device and the new
- * PE will be linked to its parent PE as well.
+ * If @new_pe_parent is NULL then the new PE will be inserted under
+ * directly under the PHB.
*/
-int eeh_add_to_parent_pe(struct eeh_dev *edev)
+int eeh_pe_tree_insert(struct eeh_dev *edev, struct eeh_pe *new_pe_parent)
{
+ struct pci_controller *hose = edev->controller;
struct eeh_pe *pe, *parent;
/*
@@ -328,52 +315,48 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
* PE should be composed of PCI bus and its subordinate
* components.
*/
- pe = eeh_pe_get(edev);
- if (pe && !(pe->type & EEH_PE_INVALID)) {
- if (!edev->pe_config_addr) {
- pr_err("%s: PE with addr 0x%x already exists\n",
- __func__, edev->config_addr);
- return -EEXIST;
- }
-
- /* Mark the PE as type of PCI bus */
- pe->type = EEH_PE_BUS;
- edev->pe = pe;
+ pe = eeh_pe_get(hose, edev->pe_config_addr);
+ if (pe) {
+ if (pe->type & EEH_PE_INVALID) {
+ list_add_tail(&edev->entry, &pe->edevs);
+ edev->pe = pe;
+ /*
+ * We're running to here because of PCI hotplug caused by
+ * EEH recovery. We need clear EEH_PE_INVALID until the top.
+ */
+ parent = pe;
+ while (parent) {
+ if (!(parent->type & EEH_PE_INVALID))
+ break;
+ parent->type &= ~EEH_PE_INVALID;
+ parent = parent->parent;
+ }
- /* Put the edev to PE */
- list_add_tail(&edev->list, &pe->edevs);
- pr_debug("EEH: Add %s to Bus PE#%x\n",
- edev->dn->full_name, pe->addr);
+ eeh_edev_dbg(edev, "Added to existing PE (parent: PE#%x)\n",
+ pe->parent->addr);
+ } else {
+ /* Mark the PE as type of PCI bus */
+ pe->type = EEH_PE_BUS;
+ edev->pe = pe;
- return 0;
- } else if (pe && (pe->type & EEH_PE_INVALID)) {
- list_add_tail(&edev->list, &pe->edevs);
- edev->pe = pe;
- /*
- * We're running to here because of PCI hotplug caused by
- * EEH recovery. We need clear EEH_PE_INVALID until the top.
- */
- parent = pe;
- while (parent) {
- if (!(parent->type & EEH_PE_INVALID))
- break;
- parent->type &= ~(EEH_PE_INVALID | EEH_PE_KEEP);
- parent = parent->parent;
+ /* Put the edev to PE */
+ list_add_tail(&edev->entry, &pe->edevs);
+ eeh_edev_dbg(edev, "Added to bus PE\n");
}
- pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n",
- edev->dn->full_name, pe->addr, pe->parent->addr);
-
return 0;
}
/* Create a new EEH PE */
- pe = eeh_pe_alloc(edev->phb, EEH_PE_DEVICE);
+ if (edev->physfn)
+ pe = eeh_pe_alloc(hose, EEH_PE_VF);
+ else
+ pe = eeh_pe_alloc(hose, EEH_PE_DEVICE);
if (!pe) {
pr_err("%s: out of memory!\n", __func__);
return -ENOMEM;
}
- pe->addr = edev->pe_config_addr;
- pe->config_addr = edev->config_addr;
+
+ pe->addr = edev->pe_config_addr;
/*
* Put the new EEH PE into hierarchy tree. If the parent
@@ -381,34 +364,35 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
* to PHB directly. Otherwise, we have to associate the
* PE with its parent.
*/
- parent = eeh_pe_get_parent(edev);
- if (!parent) {
- parent = eeh_phb_pe_get(edev->phb);
- if (!parent) {
+ if (!new_pe_parent) {
+ new_pe_parent = eeh_phb_pe_get(hose);
+ if (!new_pe_parent) {
pr_err("%s: No PHB PE is found (PHB Domain=%d)\n",
- __func__, edev->phb->global_number);
+ __func__, hose->global_number);
edev->pe = NULL;
kfree(pe);
return -EEXIST;
}
}
- pe->parent = parent;
+
+ /* link new PE into the tree */
+ pe->parent = new_pe_parent;
+ list_add_tail(&pe->child, &new_pe_parent->child_list);
/*
* Put the newly created PE into the child list and
* link the EEH device accordingly.
*/
- list_add_tail(&pe->child, &parent->child_list);
- list_add_tail(&edev->list, &pe->edevs);
+ list_add_tail(&edev->entry, &pe->edevs);
edev->pe = pe;
- pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n",
- edev->dn->full_name, pe->addr, pe->parent->addr);
+ eeh_edev_dbg(edev, "Added to new (parent: PE#%x)\n",
+ new_pe_parent->addr);
return 0;
}
/**
- * eeh_rmv_from_parent_pe - Remove one EEH device from the associated PE
+ * eeh_pe_tree_remove - Remove one EEH device from the associated PE
* @edev: EEH device
*
* The PE hierarchy tree might be changed when doing PCI hotplug.
@@ -416,21 +400,21 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
* during EEH recovery. So we have to call the function remove the
* corresponding PE accordingly if necessary.
*/
-int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
+int eeh_pe_tree_remove(struct eeh_dev *edev)
{
struct eeh_pe *pe, *parent, *child;
+ bool keep, recover;
int cnt;
- if (!edev->pe) {
- pr_debug("%s: No PE found for EEH device %s\n",
- __func__, edev->dn->full_name);
+ pe = eeh_dev_to_pe(edev);
+ if (!pe) {
+ eeh_edev_dbg(edev, "No PE found for device.\n");
return -EEXIST;
}
/* Remove the EEH device */
- pe = edev->pe;
edev->pe = NULL;
- list_del(&edev->list);
+ list_del(&edev->entry);
/*
* Check if the parent PE includes any EEH devices.
@@ -440,10 +424,21 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
*/
while (1) {
parent = pe->parent;
+
+ /* PHB PEs should never be removed */
if (pe->type & EEH_PE_PHB)
break;
- if (!(pe->state & EEH_PE_KEEP)) {
+ /*
+ * XXX: KEEP is set while resetting a PE. I don't think it's
+ * ever set without RECOVERING also being set. I could
+ * be wrong though so catch that with a WARN.
+ */
+ keep = !!(pe->state & EEH_PE_KEEP);
+ recover = !!(pe->state & EEH_PE_RECOVERING);
+ WARN_ON(keep && !recover);
+
+ if (!keep && !recover) {
if (list_empty(&pe->edevs) &&
list_empty(&pe->child_list)) {
list_del(&pe->child);
@@ -452,6 +447,15 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
break;
}
} else {
+ /*
+ * Mark the PE as invalid. At the end of the recovery
+ * process any invalid PEs will be garbage collected.
+ *
+ * We need to delay the free()ing of them since we can
+ * remove edev's while traversing the PE tree which
+ * might trigger the removal of a PE and we can't
+ * deal with that (yet).
+ */
if (list_empty(&pe->edevs)) {
cnt = 0;
list_for_each_entry(child, &pe->child_list, child) {
@@ -485,16 +489,16 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
*/
void eeh_pe_update_time_stamp(struct eeh_pe *pe)
{
- struct timeval tstamp;
+ time64_t tstamp;
if (!pe) return;
if (pe->freeze_count <= 0) {
pe->freeze_count = 0;
- do_gettimeofday(&pe->tstamp);
+ pe->tstamp = ktime_get_seconds();
} else {
- do_gettimeofday(&tstamp);
- if (tstamp.tv_sec - pe->tstamp.tv_sec > 3600) {
+ tstamp = ktime_get_seconds();
+ if (tstamp - pe->tstamp > 3600) {
pe->tstamp = tstamp;
pe->freeze_count = 0;
}
@@ -502,62 +506,56 @@ void eeh_pe_update_time_stamp(struct eeh_pe *pe)
}
/**
- * __eeh_pe_state_mark - Mark the state for the PE
- * @data: EEH PE
- * @flag: state
+ * eeh_pe_state_mark - Mark specified state for PE and its associated device
+ * @pe: EEH PE
*
- * The function is used to mark the indicated state for the given
- * PE. Also, the associated PCI devices will be put into IO frozen
- * state as well.
+ * EEH error affects the current PE and its child PEs. The function
+ * is used to mark appropriate state for the affected PEs and the
+ * associated devices.
*/
-static void *__eeh_pe_state_mark(void *data, void *flag)
+void eeh_pe_state_mark(struct eeh_pe *root, int state)
{
- struct eeh_pe *pe = (struct eeh_pe *)data;
- int state = *((int *)flag);
- struct eeh_dev *edev, *tmp;
- struct pci_dev *pdev;
-
- /* Keep the state of permanently removed PE intact */
- if ((pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) &&
- (state & (EEH_PE_ISOLATED | EEH_PE_RECOVERING)))
- return NULL;
-
- pe->state |= state;
-
- /* Offline PCI devices if applicable */
- if (state != EEH_PE_ISOLATED)
- return NULL;
-
- eeh_pe_for_each_dev(pe, edev, tmp) {
- pdev = eeh_dev_to_pci_dev(edev);
- if (pdev)
- pdev->error_state = pci_channel_io_frozen;
- }
+ struct eeh_pe *pe;
- return NULL;
+ eeh_for_each_pe(root, pe)
+ if (!(pe->state & EEH_PE_REMOVED))
+ pe->state |= state;
}
+EXPORT_SYMBOL_GPL(eeh_pe_state_mark);
/**
- * eeh_pe_state_mark - Mark specified state for PE and its associated device
+ * eeh_pe_mark_isolated
* @pe: EEH PE
*
- * EEH error affects the current PE and its child PEs. The function
- * is used to mark appropriate state for the affected PEs and the
- * associated devices.
+ * Record that a PE has been isolated by marking the PE and its children as
+ * EEH_PE_ISOLATED (and EEH_PE_CFG_BLOCKED, if required) and their PCI devices
+ * as pci_channel_io_frozen.
*/
-void eeh_pe_state_mark(struct eeh_pe *pe, int state)
+void eeh_pe_mark_isolated(struct eeh_pe *root)
{
- eeh_pe_traverse(pe, __eeh_pe_state_mark, &state);
+ struct eeh_pe *pe;
+ struct eeh_dev *edev;
+ struct pci_dev *pdev;
+
+ eeh_pe_state_mark(root, EEH_PE_ISOLATED);
+ eeh_for_each_pe(root, pe) {
+ list_for_each_entry(edev, &pe->edevs, entry) {
+ pdev = eeh_dev_to_pci_dev(edev);
+ if (pdev)
+ pdev->error_state = pci_channel_io_frozen;
+ }
+ /* Block PCI config access if required */
+ if (pe->state & EEH_PE_CFG_RESTRICTED)
+ pe->state |= EEH_PE_CFG_BLOCKED;
+ }
}
+EXPORT_SYMBOL_GPL(eeh_pe_mark_isolated);
-static void *__eeh_pe_dev_mode_mark(void *data, void *flag)
+static void __eeh_pe_dev_mode_mark(struct eeh_dev *edev, void *flag)
{
- struct eeh_dev *edev = data;
int mode = *((int *)flag);
edev->mode |= mode;
-
- return NULL;
}
/**
@@ -572,45 +570,52 @@ void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode)
}
/**
- * __eeh_pe_state_clear - Clear state for the PE
+ * eeh_pe_state_clear - Clear state for the PE
* @data: EEH PE
- * @flag: state
+ * @state: state
+ * @include_passed: include passed-through devices?
*
* The function is used to clear the indicated state from the
* given PE. Besides, we also clear the check count of the PE
* as well.
*/
-static void *__eeh_pe_state_clear(void *data, void *flag)
+void eeh_pe_state_clear(struct eeh_pe *root, int state, bool include_passed)
{
- struct eeh_pe *pe = (struct eeh_pe *)data;
- int state = *((int *)flag);
+ struct eeh_pe *pe;
+ struct eeh_dev *edev, *tmp;
+ struct pci_dev *pdev;
- /* Keep the state of permanently removed PE intact */
- if ((pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) &&
- (state & EEH_PE_ISOLATED))
- return NULL;
+ eeh_for_each_pe(root, pe) {
+ /* Keep the state of permanently removed PE intact */
+ if (pe->state & EEH_PE_REMOVED)
+ continue;
+
+ if (!include_passed && eeh_pe_passed(pe))
+ continue;
- pe->state &= ~state;
+ pe->state &= ~state;
+
+ /*
+ * Special treatment on clearing isolated state. Clear
+ * check count since last isolation and put all affected
+ * devices to normal state.
+ */
+ if (!(state & EEH_PE_ISOLATED))
+ continue;
- /* Clear check count since last isolation */
- if (state & EEH_PE_ISOLATED)
pe->check_count = 0;
+ eeh_pe_for_each_dev(pe, edev, tmp) {
+ pdev = eeh_dev_to_pci_dev(edev);
+ if (!pdev)
+ continue;
- return NULL;
-}
+ pdev->error_state = pci_channel_io_normal;
+ }
-/**
- * eeh_pe_state_clear - Clear state for the PE and its children
- * @pe: PE
- * @state: state to be cleared
- *
- * When the PE and its children has been recovered from error,
- * we need clear the error state for that. The function is used
- * for the purpose.
- */
-void eeh_pe_state_clear(struct eeh_pe *pe, int state)
-{
- eeh_pe_traverse(pe, __eeh_pe_state_clear, &state);
+ /* Unblock PCI config access if required */
+ if (pe->state & EEH_PE_CFG_RESTRICTED)
+ pe->state &= ~EEH_PE_CFG_BLOCKED;
+ }
}
/*
@@ -624,8 +629,7 @@ void eeh_pe_state_clear(struct eeh_pe *pe, int state)
* blocked on normal path during the stage. So we need utilize
* eeh operations, which is always permitted.
*/
-static void eeh_bridge_check_link(struct eeh_dev *edev,
- struct device_node *dn)
+static void eeh_bridge_check_link(struct eeh_dev *edev)
{
int cap;
uint32_t val;
@@ -638,44 +642,41 @@ static void eeh_bridge_check_link(struct eeh_dev *edev,
if (!(edev->mode & (EEH_DEV_ROOT_PORT | EEH_DEV_DS_PORT)))
return;
- pr_debug("%s: Check PCIe link for %04x:%02x:%02x.%01x ...\n",
- __func__, edev->phb->global_number,
- edev->config_addr >> 8,
- PCI_SLOT(edev->config_addr & 0xFF),
- PCI_FUNC(edev->config_addr & 0xFF));
+ eeh_edev_dbg(edev, "Checking PCIe link...\n");
/* Check slot status */
cap = edev->pcie_cap;
- eeh_ops->read_config(dn, cap + PCI_EXP_SLTSTA, 2, &val);
+ eeh_ops->read_config(edev, cap + PCI_EXP_SLTSTA, 2, &val);
if (!(val & PCI_EXP_SLTSTA_PDS)) {
- pr_debug(" No card in the slot (0x%04x) !\n", val);
+ eeh_edev_dbg(edev, "No card in the slot (0x%04x) !\n", val);
return;
}
/* Check power status if we have the capability */
- eeh_ops->read_config(dn, cap + PCI_EXP_SLTCAP, 2, &val);
+ eeh_ops->read_config(edev, cap + PCI_EXP_SLTCAP, 2, &val);
if (val & PCI_EXP_SLTCAP_PCP) {
- eeh_ops->read_config(dn, cap + PCI_EXP_SLTCTL, 2, &val);
+ eeh_ops->read_config(edev, cap + PCI_EXP_SLTCTL, 2, &val);
if (val & PCI_EXP_SLTCTL_PCC) {
- pr_debug(" In power-off state, power it on ...\n");
+ eeh_edev_dbg(edev, "In power-off state, power it on ...\n");
val &= ~(PCI_EXP_SLTCTL_PCC | PCI_EXP_SLTCTL_PIC);
val |= (0x0100 & PCI_EXP_SLTCTL_PIC);
- eeh_ops->write_config(dn, cap + PCI_EXP_SLTCTL, 2, val);
+ eeh_ops->write_config(edev, cap + PCI_EXP_SLTCTL, 2, val);
msleep(2 * 1000);
}
}
/* Enable link */
- eeh_ops->read_config(dn, cap + PCI_EXP_LNKCTL, 2, &val);
+ eeh_ops->read_config(edev, cap + PCI_EXP_LNKCTL, 2, &val);
val &= ~PCI_EXP_LNKCTL_LD;
- eeh_ops->write_config(dn, cap + PCI_EXP_LNKCTL, 2, val);
+ eeh_ops->write_config(edev, cap + PCI_EXP_LNKCTL, 2, val);
/* Check link */
- eeh_ops->read_config(dn, cap + PCI_EXP_LNKCAP, 4, &val);
- if (!(val & PCI_EXP_LNKCAP_DLLLARC)) {
- pr_debug(" No link reporting capability (0x%08x) \n", val);
- msleep(1000);
- return;
+ if (edev->pdev) {
+ if (!edev->pdev->link_active_reporting) {
+ eeh_edev_dbg(edev, "No link reporting capability\n");
+ msleep(1000);
+ return;
+ }
}
/* Wait the link is up until timeout (5s) */
@@ -684,23 +685,22 @@ static void eeh_bridge_check_link(struct eeh_dev *edev,
msleep(20);
timeout += 20;
- eeh_ops->read_config(dn, cap + PCI_EXP_LNKSTA, 2, &val);
+ eeh_ops->read_config(edev, cap + PCI_EXP_LNKSTA, 2, &val);
if (val & PCI_EXP_LNKSTA_DLLLA)
break;
}
if (val & PCI_EXP_LNKSTA_DLLLA)
- pr_debug(" Link up (%s)\n",
+ eeh_edev_dbg(edev, "Link up (%s)\n",
(val & PCI_EXP_LNKSTA_CLS_2_5GB) ? "2.5GB" : "5GB");
else
- pr_debug(" Link not ready (0x%04x)\n", val);
+ eeh_edev_dbg(edev, "Link not ready (0x%04x)\n", val);
}
#define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF))
#define SAVED_BYTE(OFF) (((u8 *)(edev->config_space))[BYTE_SWAP(OFF)])
-static void eeh_restore_bridge_bars(struct eeh_dev *edev,
- struct device_node *dn)
+static void eeh_restore_bridge_bars(struct eeh_dev *edev)
{
int i;
@@ -709,49 +709,49 @@ static void eeh_restore_bridge_bars(struct eeh_dev *edev,
* Bus numbers and windows: 0x18 - 0x30
*/
for (i = 4; i < 13; i++)
- eeh_ops->write_config(dn, i*4, 4, edev->config_space[i]);
+ eeh_ops->write_config(edev, i*4, 4, edev->config_space[i]);
/* Rom: 0x38 */
- eeh_ops->write_config(dn, 14*4, 4, edev->config_space[14]);
+ eeh_ops->write_config(edev, 14*4, 4, edev->config_space[14]);
/* Cache line & Latency timer: 0xC 0xD */
- eeh_ops->write_config(dn, PCI_CACHE_LINE_SIZE, 1,
+ eeh_ops->write_config(edev, PCI_CACHE_LINE_SIZE, 1,
SAVED_BYTE(PCI_CACHE_LINE_SIZE));
- eeh_ops->write_config(dn, PCI_LATENCY_TIMER, 1,
- SAVED_BYTE(PCI_LATENCY_TIMER));
+ eeh_ops->write_config(edev, PCI_LATENCY_TIMER, 1,
+ SAVED_BYTE(PCI_LATENCY_TIMER));
/* Max latency, min grant, interrupt ping and line: 0x3C */
- eeh_ops->write_config(dn, 15*4, 4, edev->config_space[15]);
+ eeh_ops->write_config(edev, 15*4, 4, edev->config_space[15]);
/* PCI Command: 0x4 */
- eeh_ops->write_config(dn, PCI_COMMAND, 4, edev->config_space[1]);
+ eeh_ops->write_config(edev, PCI_COMMAND, 4, edev->config_space[1] |
+ PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
/* Check the PCIe link is ready */
- eeh_bridge_check_link(edev, dn);
+ eeh_bridge_check_link(edev);
}
-static void eeh_restore_device_bars(struct eeh_dev *edev,
- struct device_node *dn)
+static void eeh_restore_device_bars(struct eeh_dev *edev)
{
int i;
u32 cmd;
for (i = 4; i < 10; i++)
- eeh_ops->write_config(dn, i*4, 4, edev->config_space[i]);
+ eeh_ops->write_config(edev, i*4, 4, edev->config_space[i]);
/* 12 == Expansion ROM Address */
- eeh_ops->write_config(dn, 12*4, 4, edev->config_space[12]);
+ eeh_ops->write_config(edev, 12*4, 4, edev->config_space[12]);
- eeh_ops->write_config(dn, PCI_CACHE_LINE_SIZE, 1,
+ eeh_ops->write_config(edev, PCI_CACHE_LINE_SIZE, 1,
SAVED_BYTE(PCI_CACHE_LINE_SIZE));
- eeh_ops->write_config(dn, PCI_LATENCY_TIMER, 1,
+ eeh_ops->write_config(edev, PCI_LATENCY_TIMER, 1,
SAVED_BYTE(PCI_LATENCY_TIMER));
/* max latency, min grant, interrupt pin and line */
- eeh_ops->write_config(dn, 15*4, 4, edev->config_space[15]);
+ eeh_ops->write_config(edev, 15*4, 4, edev->config_space[15]);
/*
* Restore PERR & SERR bits, some devices require it,
* don't touch the other command bits
*/
- eeh_ops->read_config(dn, PCI_COMMAND, 4, &cmd);
+ eeh_ops->read_config(edev, PCI_COMMAND, 4, &cmd);
if (edev->config_space[1] & PCI_COMMAND_PARITY)
cmd |= PCI_COMMAND_PARITY;
else
@@ -760,7 +760,7 @@ static void eeh_restore_device_bars(struct eeh_dev *edev,
cmd |= PCI_COMMAND_SERR;
else
cmd &= ~PCI_COMMAND_SERR;
- eeh_ops->write_config(dn, PCI_COMMAND, 4, cmd);
+ eeh_ops->write_config(edev, PCI_COMMAND, 4, cmd);
}
/**
@@ -772,21 +772,16 @@ static void eeh_restore_device_bars(struct eeh_dev *edev,
* the expansion ROM base address, the latency timer, and etc.
* from the saved values in the device node.
*/
-static void *eeh_restore_one_device_bars(void *data, void *flag)
+static void eeh_restore_one_device_bars(struct eeh_dev *edev, void *flag)
{
- struct eeh_dev *edev = (struct eeh_dev *)data;
- struct device_node *dn = eeh_dev_to_of_node(edev);
-
/* Do special restore for bridges */
if (edev->mode & EEH_DEV_BRIDGE)
- eeh_restore_bridge_bars(edev, dn);
+ eeh_restore_bridge_bars(edev);
else
- eeh_restore_device_bars(edev, dn);
+ eeh_restore_device_bars(edev);
if (eeh_ops->restore_config)
- eeh_ops->restore_config(dn);
-
- return NULL;
+ eeh_ops->restore_config(edev);
}
/**
@@ -817,32 +812,29 @@ void eeh_pe_restore_bars(struct eeh_pe *pe)
const char *eeh_pe_loc_get(struct eeh_pe *pe)
{
struct pci_bus *bus = eeh_pe_bus_get(pe);
- struct device_node *dn = pci_bus_to_OF_node(bus);
+ struct device_node *dn;
const char *loc = NULL;
- if (!dn)
- goto out;
+ while (bus) {
+ dn = pci_bus_to_OF_node(bus);
+ if (!dn) {
+ bus = bus->parent;
+ continue;
+ }
- /* PHB PE or root PE ? */
- if (pci_is_root_bus(bus)) {
- loc = of_get_property(dn, "ibm,loc-code", NULL);
- if (!loc)
+ if (pci_is_root_bus(bus))
loc = of_get_property(dn, "ibm,io-base-loc-code", NULL);
+ else
+ loc = of_get_property(dn, "ibm,slot-location-code",
+ NULL);
+
if (loc)
- goto out;
+ return loc;
- /* Check the root port */
- dn = dn->child;
- if (!dn)
- goto out;
+ bus = bus->parent;
}
- loc = of_get_property(dn, "ibm,loc-code", NULL);
- if (!loc)
- loc = of_get_property(dn, "ibm,slot-location-code", NULL);
-
-out:
- return loc ? loc : "N/A";
+ return "N/A";
}
/**
@@ -857,25 +849,24 @@ out:
*/
struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
{
- struct pci_bus *bus = NULL;
struct eeh_dev *edev;
struct pci_dev *pdev;
+ struct pci_bus *bus = NULL;
- if (pe->type & EEH_PE_PHB) {
- bus = pe->phb->bus;
- } else if (pe->type & EEH_PE_BUS ||
- pe->type & EEH_PE_DEVICE) {
- if (pe->bus) {
- bus = pe->bus;
- goto out;
- }
+ if (pe->type & EEH_PE_PHB)
+ return pe->phb->bus;
- edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
- pdev = eeh_dev_to_pci_dev(edev);
- if (pdev)
- bus = pdev->bus;
- }
+ /* The primary bus might be cached during probe time */
+ if (pe->state & EEH_PE_PRI_BUS)
+ return pe->bus;
+
+ /* Retrieve the parent PCI bus of first (top) PCI device */
+ edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, entry);
+ pci_lock_rescan_remove();
+ pdev = eeh_dev_to_pci_dev(edev);
+ if (pdev)
+ bus = pdev->bus;
+ pci_unlock_rescan_remove();
-out:
return bus;
}
diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c
index e2595ba4b720..706e1eb95efe 100644
--- a/arch/powerpc/kernel/eeh_sysfs.c
+++ b/arch/powerpc/kernel/eeh_sysfs.c
@@ -1,27 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Sysfs entries for PCI Error Recovery for PAPR-compliant platform.
* Copyright IBM Corporation 2007
* Copyright Linas Vepstas <linas@austin.ibm.com> 2007
*
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT. See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
* Send comments and feedback to Linas Vepstas <linas@austin.ibm.com>
*/
+#include <linux/of.h>
#include <linux/pci.h>
#include <linux/stat.h>
#include <asm/ppc-pci.h>
@@ -30,7 +15,7 @@
/**
* EEH_SHOW_ATTR -- Create sysfs entry for eeh statistic
* @_name: name of file in sysfs directory
- * @_memb: name of member in struct pci_dn to access
+ * @_memb: name of member in struct eeh_dev to access
* @_format: printf format for display
*
* All of the attributes look very similar, so just
@@ -48,12 +33,107 @@ static ssize_t eeh_show_##_name(struct device *dev, \
\
return sprintf(buf, _format "\n", edev->_memb); \
} \
-static DEVICE_ATTR(_name, S_IRUGO, eeh_show_##_name, NULL);
+static DEVICE_ATTR(_name, 0444, eeh_show_##_name, NULL);
EEH_SHOW_ATTR(eeh_mode, mode, "0x%x");
-EEH_SHOW_ATTR(eeh_config_addr, config_addr, "0x%x");
EEH_SHOW_ATTR(eeh_pe_config_addr, pe_config_addr, "0x%x");
+static ssize_t eeh_pe_state_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
+ int state;
+
+ if (!edev || !edev->pe)
+ return -ENODEV;
+
+ state = eeh_ops->get_state(edev->pe, NULL);
+ return sprintf(buf, "0x%08x 0x%08x\n",
+ state, edev->pe->state);
+}
+
+static ssize_t eeh_pe_state_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
+
+ if (!edev || !edev->pe)
+ return -ENODEV;
+
+ /* Nothing to do if it's not frozen */
+ if (!(edev->pe->state & EEH_PE_ISOLATED))
+ return count;
+
+ if (eeh_unfreeze_pe(edev->pe))
+ return -EIO;
+ eeh_pe_state_clear(edev->pe, EEH_PE_ISOLATED, true);
+
+ return count;
+}
+
+static DEVICE_ATTR_RW(eeh_pe_state);
+
+#if defined(CONFIG_PCI_IOV) && defined(CONFIG_PPC_PSERIES)
+static ssize_t eeh_notify_resume_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
+ struct pci_dn *pdn = pci_get_pdn(pdev);
+
+ if (!edev || !edev->pe)
+ return -ENODEV;
+
+ return sprintf(buf, "%d\n", pdn->last_allow_rc);
+}
+
+static ssize_t eeh_notify_resume_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
+
+ if (!edev || !edev->pe || !eeh_ops->notify_resume)
+ return -ENODEV;
+
+ if (eeh_ops->notify_resume(edev))
+ return -EIO;
+
+ return count;
+}
+static DEVICE_ATTR_RW(eeh_notify_resume);
+
+static int eeh_notify_resume_add(struct pci_dev *pdev)
+{
+ struct device_node *np;
+ int rc = 0;
+
+ np = pci_device_to_OF_node(pdev->is_physfn ? pdev : pdev->physfn);
+
+ if (of_property_read_bool(np, "ibm,is-open-sriov-pf"))
+ rc = device_create_file(&pdev->dev, &dev_attr_eeh_notify_resume);
+
+ return rc;
+}
+
+static void eeh_notify_resume_remove(struct pci_dev *pdev)
+{
+ struct device_node *np;
+
+ np = pci_device_to_OF_node(pdev->is_physfn ? pdev : pdev->physfn);
+
+ if (of_property_read_bool(np, "ibm,is-open-sriov-pf"))
+ device_remove_file(&pdev->dev, &dev_attr_eeh_notify_resume);
+}
+#else
+static inline int eeh_notify_resume_add(struct pci_dev *pdev) { return 0; }
+static inline void eeh_notify_resume_remove(struct pci_dev *pdev) { }
+#endif /* CONFIG_PCI_IOV && CONFIG PPC_PSERIES*/
+
void eeh_sysfs_add_device(struct pci_dev *pdev)
{
struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
@@ -66,11 +146,12 @@ void eeh_sysfs_add_device(struct pci_dev *pdev)
return;
rc += device_create_file(&pdev->dev, &dev_attr_eeh_mode);
- rc += device_create_file(&pdev->dev, &dev_attr_eeh_config_addr);
rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
+ rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_state);
+ rc += eeh_notify_resume_add(pdev);
if (rc)
- printk(KERN_WARNING "EEH: Unable to create sysfs entries\n");
+ pr_warn("EEH: Unable to create sysfs entries\n");
else if (edev)
edev->mode |= EEH_DEV_SYSFS;
}
@@ -79,20 +160,23 @@ void eeh_sysfs_remove_device(struct pci_dev *pdev)
{
struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
+ if (!edev) {
+ WARN_ON(eeh_enabled());
+ return;
+ }
+
+ edev->mode &= ~EEH_DEV_SYSFS;
+
/*
* The parent directory might have been removed. We needn't
* continue for that case.
*/
- if (!pdev->dev.kobj.sd) {
- if (edev)
- edev->mode &= ~EEH_DEV_SYSFS;
+ if (!pdev->dev.kobj.sd)
return;
- }
device_remove_file(&pdev->dev, &dev_attr_eeh_mode);
- device_remove_file(&pdev->dev, &dev_attr_eeh_config_addr);
device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
+ device_remove_file(&pdev->dev, &dev_attr_eeh_pe_state);
- if (edev)
- edev->mode &= ~EEH_DEV_SYSFS;
+ eeh_notify_resume_remove(pdev);
}
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 22b45a4955cd..f4a8c9877249 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* PowerPC version
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
@@ -11,17 +12,14 @@
*
* This file contains the system call entry code, context switch
* code, and exception/interrupt return code for PowerPC.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
#include <linux/errno.h>
+#include <linux/err.h>
#include <linux/sys.h>
#include <linux/threads.h>
+#include <linux/linkage.h>
+
#include <asm/reg.h>
#include <asm/page.h>
#include <asm/mmu.h>
@@ -30,1015 +28,344 @@
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/unistd.h>
-#include <asm/ftrace.h>
#include <asm/ptrace.h>
+#include <asm/feature-fixups.h>
+#include <asm/barrier.h>
+#include <asm/kup.h>
+#include <asm/bug.h>
+#include <asm/interrupt.h>
-#undef SHOW_SYSCALLS
-#undef SHOW_SYSCALLS_TASK
+#include "head_32.h"
/*
- * MSR_KERNEL is > 0x10000 on 4xx/Book-E since it include MSR_CE.
+ * powerpc relies on return from interrupt/syscall being context synchronising
+ * (which rfi is) to support ARCH_HAS_MEMBARRIER_SYNC_CORE without additional
+ * synchronisation instructions.
*/
-#if MSR_KERNEL >= 0x10000
-#define LOAD_MSR_KERNEL(r, x) lis r,(x)@h; ori r,r,(x)@l
-#else
-#define LOAD_MSR_KERNEL(r, x) li r,(x)
-#endif
-
-#ifdef CONFIG_BOOKE
- .globl mcheck_transfer_to_handler
-mcheck_transfer_to_handler:
- mfspr r0,SPRN_DSRR0
- stw r0,_DSRR0(r11)
- mfspr r0,SPRN_DSRR1
- stw r0,_DSRR1(r11)
- /* fall through */
-
- .globl debug_transfer_to_handler
-debug_transfer_to_handler:
- mfspr r0,SPRN_CSRR0
- stw r0,_CSRR0(r11)
- mfspr r0,SPRN_CSRR1
- stw r0,_CSRR1(r11)
- /* fall through */
-
- .globl crit_transfer_to_handler
-crit_transfer_to_handler:
-#ifdef CONFIG_PPC_BOOK3E_MMU
- mfspr r0,SPRN_MAS0
- stw r0,MAS0(r11)
- mfspr r0,SPRN_MAS1
- stw r0,MAS1(r11)
- mfspr r0,SPRN_MAS2
- stw r0,MAS2(r11)
- mfspr r0,SPRN_MAS3
- stw r0,MAS3(r11)
- mfspr r0,SPRN_MAS6
- stw r0,MAS6(r11)
-#ifdef CONFIG_PHYS_64BIT
- mfspr r0,SPRN_MAS7
- stw r0,MAS7(r11)
-#endif /* CONFIG_PHYS_64BIT */
-#endif /* CONFIG_PPC_BOOK3E_MMU */
-#ifdef CONFIG_44x
- mfspr r0,SPRN_MMUCR
- stw r0,MMUCR(r11)
-#endif
- mfspr r0,SPRN_SRR0
- stw r0,_SRR0(r11)
- mfspr r0,SPRN_SRR1
- stw r0,_SRR1(r11)
-
- /* set the stack limit to the current stack
- * and set the limit to protect the thread_info
- * struct
- */
- mfspr r8,SPRN_SPRG_THREAD
- lwz r0,KSP_LIMIT(r8)
- stw r0,SAVED_KSP_LIMIT(r11)
- rlwimi r0,r1,0,0,(31-THREAD_SHIFT)
- stw r0,KSP_LIMIT(r8)
- /* fall through */
-#endif
-
-#ifdef CONFIG_40x
- .globl crit_transfer_to_handler
-crit_transfer_to_handler:
- lwz r0,crit_r10@l(0)
- stw r0,GPR10(r11)
- lwz r0,crit_r11@l(0)
- stw r0,GPR11(r11)
- mfspr r0,SPRN_SRR0
- stw r0,crit_srr0@l(0)
- mfspr r0,SPRN_SRR1
- stw r0,crit_srr1@l(0)
-
- /* set the stack limit to the current stack
- * and set the limit to protect the thread_info
- * struct
- */
- mfspr r8,SPRN_SPRG_THREAD
- lwz r0,KSP_LIMIT(r8)
- stw r0,saved_ksp_limit@l(0)
- rlwimi r0,r1,0,0,(31-THREAD_SHIFT)
- stw r0,KSP_LIMIT(r8)
- /* fall through */
-#endif
/*
- * This code finishes saving the registers to the exception frame
- * and jumps to the appropriate handler for the exception, turning
- * on address translation.
- * Note that we rely on the caller having set cr0.eq iff the exception
- * occurred in kernel mode (i.e. MSR:PR = 0).
+ * Align to 4k in order to ensure that all functions modyfing srr0/srr1
+ * fit into one page in order to not encounter a TLB miss between the
+ * modification of srr0/srr1 and the associated rfi.
*/
- .globl transfer_to_handler_full
-transfer_to_handler_full:
- SAVE_NVGPRS(r11)
- /* fall through */
-
- .globl transfer_to_handler
-transfer_to_handler:
- stw r2,GPR2(r11)
- stw r12,_NIP(r11)
- stw r9,_MSR(r11)
- andi. r2,r9,MSR_PR
- mfctr r12
- mfspr r2,SPRN_XER
- stw r12,_CTR(r11)
- stw r2,_XER(r11)
- mfspr r12,SPRN_SPRG_THREAD
- addi r2,r12,-THREAD
- tovirt(r2,r2) /* set r2 to current */
- beq 2f /* if from user, fix up THREAD.regs */
- addi r11,r1,STACK_FRAME_OVERHEAD
- stw r11,PT_REGS(r12)
-#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
- /* Check to see if the dbcr0 register is set up to debug. Use the
- internal debug mode bit to do this. */
- lwz r12,THREAD_DBCR0(r12)
- andis. r12,r12,DBCR0_IDM@h
- beq+ 3f
- /* From user and task is ptraced - load up global dbcr0 */
- li r12,-1 /* clear all pending debug events */
- mtspr SPRN_DBSR,r12
- lis r11,global_dbcr0@ha
- tophys(r11,r11)
- addi r11,r11,global_dbcr0@l
-#ifdef CONFIG_SMP
- CURRENT_THREAD_INFO(r9, r1)
- lwz r9,TI_CPU(r9)
- slwi r9,r9,3
- add r11,r11,r9
-#endif
- lwz r12,0(r11)
- mtspr SPRN_DBCR0,r12
- lwz r12,4(r11)
- addi r12,r12,-1
- stw r12,4(r11)
-#endif
- b 3f
+ .align 12
-2: /* if from kernel, check interrupted DOZE/NAP mode and
- * check for stack overflow
- */
- lwz r9,KSP_LIMIT(r12)
- cmplw r1,r9 /* if r1 <= ksp_limit */
- ble- stack_ovf /* then the kernel stack overflowed */
-5:
-#if defined(CONFIG_6xx) || defined(CONFIG_E500)
- CURRENT_THREAD_INFO(r9, r1)
- tophys(r9,r9) /* check local flags */
- lwz r12,TI_LOCAL_FLAGS(r9)
+#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_PPC_E500)
+ .globl prepare_transfer_to_handler
+prepare_transfer_to_handler:
+ /* if from kernel, check interrupted DOZE/NAP mode */
+ lwz r12,TI_LOCAL_FLAGS(r2)
mtcrf 0x01,r12
bt- 31-TLF_NAPPING,4f
bt- 31-TLF_SLEEPING,7f
-#endif /* CONFIG_6xx || CONFIG_E500 */
- .globl transfer_to_handler_cont
-transfer_to_handler_cont:
-3:
- mflr r9
- lwz r11,0(r9) /* virtual address of handler */
- lwz r9,4(r9) /* where to go when done */
-#ifdef CONFIG_TRACE_IRQFLAGS
- lis r12,reenable_mmu@h
- ori r12,r12,reenable_mmu@l
- mtspr SPRN_SRR0,r12
- mtspr SPRN_SRR1,r10
- SYNC
- RFI
-reenable_mmu: /* re-enable mmu so we can */
- mfmsr r10
- lwz r12,_MSR(r1)
- xor r10,r10,r12
- andi. r10,r10,MSR_EE /* Did EE change? */
- beq 1f
-
- /*
- * The trace_hardirqs_off will use CALLER_ADDR0 and CALLER_ADDR1.
- * If from user mode there is only one stack frame on the stack, and
- * accessing CALLER_ADDR1 will cause oops. So we need create a dummy
- * stack frame to make trace_hardirqs_off happy.
- *
- * This is handy because we also need to save a bunch of GPRs,
- * r3 can be different from GPR3(r1) at this point, r9 and r11
- * contains the old MSR and handler address respectively,
- * r4 & r5 can contain page fault arguments that need to be passed
- * along as well. r12, CCR, CTR, XER etc... are left clobbered as
- * they aren't useful past this point (aren't syscall arguments),
- * the rest is restored from the exception frame.
- */
- stwu r1,-32(r1)
- stw r9,8(r1)
- stw r11,12(r1)
- stw r3,16(r1)
- stw r4,20(r1)
- stw r5,24(r1)
- bl trace_hardirqs_off
- lwz r5,24(r1)
- lwz r4,20(r1)
- lwz r3,16(r1)
- lwz r11,12(r1)
- lwz r9,8(r1)
- addi r1,r1,32
- lwz r0,GPR0(r1)
- lwz r6,GPR6(r1)
- lwz r7,GPR7(r1)
- lwz r8,GPR8(r1)
-1: mtctr r11
- mtlr r9
- bctr /* jump to handler */
-#else /* CONFIG_TRACE_IRQFLAGS */
- mtspr SPRN_SRR0,r11
- mtspr SPRN_SRR1,r10
- mtlr r9
- SYNC
- RFI /* jump to handler, enable MMU */
-#endif /* CONFIG_TRACE_IRQFLAGS */
+ blr
-#if defined (CONFIG_6xx) || defined(CONFIG_E500)
4: rlwinm r12,r12,0,~_TLF_NAPPING
- stw r12,TI_LOCAL_FLAGS(r9)
+ stw r12,TI_LOCAL_FLAGS(r2)
b power_save_ppc32_restore
7: rlwinm r12,r12,0,~_TLF_SLEEPING
- stw r12,TI_LOCAL_FLAGS(r9)
+ stw r12,TI_LOCAL_FLAGS(r2)
lwz r9,_MSR(r11) /* if sleeping, clear MSR.EE */
rlwinm r9,r9,0,~MSR_EE
lwz r12,_LINK(r11) /* and return to address in LR */
+ REST_GPR(2, r11)
b fast_exception_return
+_ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler)
+#endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_PPC_E500 */
+
+#if defined(CONFIG_PPC_KUEP) && defined(CONFIG_PPC_BOOK3S_32)
+SYM_FUNC_START(__kuep_lock)
+ lwz r9, THREAD+THSR0(r2)
+ update_user_segments_by_4 r9, r10, r11, r12
+ blr
+SYM_FUNC_END(__kuep_lock)
+
+SYM_FUNC_START_LOCAL(__kuep_unlock)
+ lwz r9, THREAD+THSR0(r2)
+ rlwinm r9,r9,0,~SR_NX
+ update_user_segments_by_4 r9, r10, r11, r12
+ blr
+SYM_FUNC_END(__kuep_unlock)
+
+.macro kuep_lock
+ bl __kuep_lock
+.endm
+.macro kuep_unlock
+ bl __kuep_unlock
+.endm
+#else
+.macro kuep_lock
+.endm
+.macro kuep_unlock
+.endm
#endif
-/*
- * On kernel stack overflow, load up an initial stack pointer
- * and call StackOverflow(regs), which should not return.
- */
-stack_ovf:
- /* sometimes we use a statically-allocated stack, which is OK. */
- lis r12,_end@h
- ori r12,r12,_end@l
- cmplw r1,r12
- ble 5b /* r1 <= &_end is OK */
- SAVE_NVGPRS(r11)
- addi r3,r1,STACK_FRAME_OVERHEAD
- lis r1,init_thread_union@ha
- addi r1,r1,init_thread_union@l
- addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
- lis r9,StackOverflow@ha
- addi r9,r9,StackOverflow@l
- LOAD_MSR_KERNEL(r10,MSR_KERNEL)
- FIX_SRR1(r10,r12)
- mtspr SPRN_SRR0,r9
- mtspr SPRN_SRR1,r10
- SYNC
- RFI
+ .globl transfer_to_syscall
+transfer_to_syscall:
+ stw r3, ORIG_GPR3(r1)
+ stw r11, GPR1(r1)
+ stw r11, 0(r1)
+ mflr r12
+ stw r12, _LINK(r1)
+#ifdef CONFIG_BOOKE
+ rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */
+#endif
+ lis r12,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
+ SAVE_GPR(2, r1)
+ addi r12,r12,STACK_FRAME_REGS_MARKER@l
+ stw r9,_MSR(r1)
+ li r2, INTERRUPT_SYSCALL
+ stw r12,STACK_INT_FRAME_MARKER(r1)
+ stw r2,_TRAP(r1)
+ SAVE_GPR(0, r1)
+ SAVE_GPRS(3, 8, r1)
+ addi r2,r10,-THREAD
+ SAVE_NVGPRS(r1)
+ kuep_lock
-/*
- * Handle a system call.
- */
- .stabs "arch/powerpc/kernel/",N_SO,0,0,0f
- .stabs "entry_32.S",N_SO,0,0,0f
-0:
+ /* Calling convention has r3 = regs, r4 = orig r0 */
+ addi r3,r1,STACK_INT_FRAME_REGS
+ mr r4,r0
+ bl system_call_exception
-_GLOBAL(DoSyscall)
- stw r3,ORIG_GPR3(r1)
- li r12,0
- stw r12,RESULT(r1)
- lwz r11,_CCR(r1) /* Clear SO bit in CR */
- rlwinm r11,r11,0,4,2
- stw r11,_CCR(r1)
-#ifdef SHOW_SYSCALLS
- bl do_show_syscall
-#endif /* SHOW_SYSCALLS */
-#ifdef CONFIG_TRACE_IRQFLAGS
- /* Return from syscalls can (and generally will) hard enable
- * interrupts. You aren't supposed to call a syscall with
- * interrupts disabled in the first place. However, to ensure
- * that we get it right vs. lockdep if it happens, we force
- * that hard enable here with appropriate tracing if we see
- * that we have been called with interrupts off
- */
- mfmsr r11
- andi. r12,r11,MSR_EE
- bne+ 1f
- /* We came in with interrupts disabled, we enable them now */
- bl trace_hardirqs_on
- mfmsr r11
- lwz r0,GPR0(r1)
- lwz r3,GPR3(r1)
- lwz r4,GPR4(r1)
- ori r11,r11,MSR_EE
- lwz r5,GPR5(r1)
- lwz r6,GPR6(r1)
- lwz r7,GPR7(r1)
- lwz r8,GPR8(r1)
- mtmsr r11
-1:
-#endif /* CONFIG_TRACE_IRQFLAGS */
- CURRENT_THREAD_INFO(r10, r1)
- lwz r11,TI_FLAGS(r10)
- andi. r11,r11,_TIF_SYSCALL_T_OR_A
- bne- syscall_dotrace
-syscall_dotrace_cont:
- cmplwi 0,r0,NR_syscalls
- lis r10,sys_call_table@h
- ori r10,r10,sys_call_table@l
- slwi r0,r0,2
- bge- 66f
- lwzx r10,r10,r0 /* Fetch system call handler [ptr] */
- mtlr r10
- addi r9,r1,STACK_FRAME_OVERHEAD
- PPC440EP_ERR42
- blrl /* Call handler */
- .globl ret_from_syscall
ret_from_syscall:
-#ifdef SHOW_SYSCALLS
- bl do_show_syscall_exit
-#endif
- mr r6,r3
- CURRENT_THREAD_INFO(r12, r1)
- /* disable interrupts so current_thread_info()->flags can't change */
- LOAD_MSR_KERNEL(r10,MSR_KERNEL) /* doesn't include MSR_EE */
- /* Note: We don't bother telling lockdep about it */
- SYNC
- MTMSRD(r10)
- lwz r9,TI_FLAGS(r12)
- li r8,-_LAST_ERRNO
- andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
- bne- syscall_exit_work
- cmplw 0,r3,r8
- blt+ syscall_exit_cont
- lwz r11,_CCR(r1) /* Load CR */
- neg r3,r3
- oris r11,r11,0x1000 /* Set SO bit in CR */
- stw r11,_CCR(r1)
-syscall_exit_cont:
- lwz r8,_MSR(r1)
-#ifdef CONFIG_TRACE_IRQFLAGS
- /* If we are going to return from the syscall with interrupts
- * off, we trace that here. It shouldn't happen though but we
- * want to catch the bugger if it does right ?
- */
- andi. r10,r8,MSR_EE
- bne+ 1f
- stw r3,GPR3(r1)
- bl trace_hardirqs_off
- lwz r3,GPR3(r1)
-1:
-#endif /* CONFIG_TRACE_IRQFLAGS */
-#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
- /* If the process has its own DBCR0 value, load it up. The internal
- debug mode bit tells us that dbcr0 should be loaded. */
- lwz r0,THREAD+THREAD_DBCR0(r2)
- andis. r10,r0,DBCR0_IDM@h
- bnel- load_dbcr0
-#endif
-#ifdef CONFIG_44x
-BEGIN_MMU_FTR_SECTION
+ addi r4,r1,STACK_INT_FRAME_REGS
+ li r5,0
+ bl syscall_exit_prepare
+#ifdef CONFIG_PPC_47x
lis r4,icache_44x_need_flush@ha
lwz r5,icache_44x_need_flush@l(r4)
cmplwi cr0,r5,0
- bne- 2f
-1:
-END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_47x)
-#endif /* CONFIG_44x */
-BEGIN_FTR_SECTION
- lwarx r7,0,r1
-END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
- stwcx. r0,0,r1 /* to clear the reservation */
+ bne- .L44x_icache_flush
+#endif /* CONFIG_PPC_47x */
+.L44x_icache_flush_return:
+ kuep_unlock
lwz r4,_LINK(r1)
lwz r5,_CCR(r1)
mtlr r4
- mtcr r5
lwz r7,_NIP(r1)
- FIX_SRR1(r8, r0)
- lwz r2,GPR2(r1)
- lwz r1,GPR1(r1)
+ lwz r8,_MSR(r1)
+ cmpwi r3,0
+ REST_GPR(3, r1)
+syscall_exit_finish:
mtspr SPRN_SRR0,r7
mtspr SPRN_SRR1,r8
- SYNC
- RFI
+
+ bne 3f
+ mtcr r5
+
+1: REST_GPR(2, r1)
+ REST_GPR(1, r1)
+ rfi
+
+3: mtcr r5
+ lwz r4,_CTR(r1)
+ lwz r5,_XER(r1)
+ REST_NVGPRS(r1)
+ mtctr r4
+ mtxer r5
+ REST_GPR(0, r1)
+ REST_GPRS(3, 12, r1)
+ b 1b
+
#ifdef CONFIG_44x
-2: li r7,0
+.L44x_icache_flush:
+ li r7,0
iccci r0,r0
stw r7,icache_44x_need_flush@l(r4)
- b 1b
+ b .L44x_icache_flush_return
#endif /* CONFIG_44x */
-66: li r3,-ENOSYS
- b ret_from_syscall
-
.globl ret_from_fork
ret_from_fork:
REST_NVGPRS(r1)
bl schedule_tail
- li r3,0
+ li r3,0 /* fork() return value */
b ret_from_syscall
- .globl ret_from_kernel_thread
-ret_from_kernel_thread:
- REST_NVGPRS(r1)
+ .globl ret_from_kernel_user_thread
+ret_from_kernel_user_thread:
bl schedule_tail
- mtlr r14
+ mtctr r14
mr r3,r15
PPC440EP_ERR42
- blrl
+ bctrl
li r3,0
b ret_from_syscall
-/* Traced system call support */
-syscall_dotrace:
- SAVE_NVGPRS(r1)
- li r0,0xc00
- stw r0,_TRAP(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl do_syscall_trace_enter
+ .globl start_kernel_thread
+start_kernel_thread:
+ bl schedule_tail
+ mtctr r14
+ mr r3,r15
+ PPC440EP_ERR42
+ bctrl
/*
- * Restore argument registers possibly just changed.
- * We use the return value of do_syscall_trace_enter
- * for call number to look up in the table (r0).
- */
- mr r0,r3
- lwz r3,GPR3(r1)
- lwz r4,GPR4(r1)
- lwz r5,GPR5(r1)
- lwz r6,GPR6(r1)
- lwz r7,GPR7(r1)
- lwz r8,GPR8(r1)
- REST_NVGPRS(r1)
- b syscall_dotrace_cont
-
-syscall_exit_work:
- andi. r0,r9,_TIF_RESTOREALL
- beq+ 0f
- REST_NVGPRS(r1)
- b 2f
-0: cmplw 0,r3,r8
- blt+ 1f
- andi. r0,r9,_TIF_NOERROR
- bne- 1f
- lwz r11,_CCR(r1) /* Load CR */
- neg r3,r3
- oris r11,r11,0x1000 /* Set SO bit in CR */
- stw r11,_CCR(r1)
-
-1: stw r6,RESULT(r1) /* Save result */
- stw r3,GPR3(r1) /* Update return value */
-2: andi. r0,r9,(_TIF_PERSYSCALL_MASK)
- beq 4f
-
- /* Clear per-syscall TIF flags if any are set. */
-
- li r11,_TIF_PERSYSCALL_MASK
- addi r12,r12,TI_FLAGS
-3: lwarx r8,0,r12
- andc r8,r8,r11
-#ifdef CONFIG_IBM405_ERR77
- dcbt 0,r12
-#endif
- stwcx. r8,0,r12
- bne- 3b
- subi r12,r12,TI_FLAGS
-
-4: /* Anything which requires enabling interrupts? */
- andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP)
- beq ret_from_except
-
- /* Re-enable interrupts. There is no need to trace that with
- * lockdep as we are supposed to have IRQs on at this point
+ * This must not return. We actually want to BUG here, not WARN,
+ * because BUG will exit the process which is what the kernel thread
+ * should have done, which may give some hope of continuing.
*/
- ori r10,r10,MSR_EE
- SYNC
- MTMSRD(r10)
-
- /* Save NVGPRS if they're not saved already */
- lwz r4,_TRAP(r1)
- andi. r4,r4,1
- beq 5f
- SAVE_NVGPRS(r1)
- li r4,0xc00
- stw r4,_TRAP(r1)
-5:
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl do_syscall_trace_leave
- b ret_from_except_full
-
-#ifdef SHOW_SYSCALLS
-do_show_syscall:
-#ifdef SHOW_SYSCALLS_TASK
- lis r11,show_syscalls_task@ha
- lwz r11,show_syscalls_task@l(r11)
- cmp 0,r2,r11
- bnelr
-#endif
- stw r31,GPR31(r1)
- mflr r31
- lis r3,7f@ha
- addi r3,r3,7f@l
- lwz r4,GPR0(r1)
- lwz r5,GPR3(r1)
- lwz r6,GPR4(r1)
- lwz r7,GPR5(r1)
- lwz r8,GPR6(r1)
- lwz r9,GPR7(r1)
- bl printk
- lis r3,77f@ha
- addi r3,r3,77f@l
- lwz r4,GPR8(r1)
- mr r5,r2
- bl printk
- lwz r0,GPR0(r1)
- lwz r3,GPR3(r1)
- lwz r4,GPR4(r1)
- lwz r5,GPR5(r1)
- lwz r6,GPR6(r1)
- lwz r7,GPR7(r1)
- lwz r8,GPR8(r1)
- mtlr r31
- lwz r31,GPR31(r1)
- blr
-
-do_show_syscall_exit:
-#ifdef SHOW_SYSCALLS_TASK
- lis r11,show_syscalls_task@ha
- lwz r11,show_syscalls_task@l(r11)
- cmp 0,r2,r11
- bnelr
-#endif
- stw r31,GPR31(r1)
- mflr r31
- stw r3,RESULT(r1) /* Save result */
- mr r4,r3
- lis r3,79f@ha
- addi r3,r3,79f@l
- bl printk
- lwz r3,RESULT(r1)
- mtlr r31
- lwz r31,GPR31(r1)
- blr
-
-7: .string "syscall %d(%x, %x, %x, %x, %x, "
-77: .string "%x), current=%p\n"
-79: .string " -> %x\n"
- .align 2,0
-
-#ifdef SHOW_SYSCALLS_TASK
- .data
- .globl show_syscalls_task
-show_syscalls_task:
- .long -1
- .text
-#endif
-#endif /* SHOW_SYSCALLS */
-
-/*
- * The fork/clone functions need to copy the full register set into
- * the child process. Therefore we need to save all the nonvolatile
- * registers (r13 - r31) before calling the C code.
- */
- .globl ppc_fork
-ppc_fork:
- SAVE_NVGPRS(r1)
- lwz r0,_TRAP(r1)
- rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */
- stw r0,_TRAP(r1) /* register set saved */
- b sys_fork
-
- .globl ppc_vfork
-ppc_vfork:
- SAVE_NVGPRS(r1)
- lwz r0,_TRAP(r1)
- rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */
- stw r0,_TRAP(r1) /* register set saved */
- b sys_vfork
-
- .globl ppc_clone
-ppc_clone:
- SAVE_NVGPRS(r1)
- lwz r0,_TRAP(r1)
- rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */
- stw r0,_TRAP(r1) /* register set saved */
- b sys_clone
-
- .globl ppc_swapcontext
-ppc_swapcontext:
- SAVE_NVGPRS(r1)
- lwz r0,_TRAP(r1)
- rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */
- stw r0,_TRAP(r1) /* register set saved */
- b sys_swapcontext
-
-/*
- * Top-level page fault handling.
- * This is in assembler because if do_page_fault tells us that
- * it is a bad kernel page fault, we want to save the non-volatile
- * registers before calling bad_page_fault.
- */
- .globl handle_page_fault
-handle_page_fault:
- stw r4,_DAR(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl do_page_fault
- cmpwi r3,0
- beq+ ret_from_except
- SAVE_NVGPRS(r1)
- lwz r0,_TRAP(r1)
- clrrwi r0,r0,1
- stw r0,_TRAP(r1)
- mr r5,r3
- addi r3,r1,STACK_FRAME_OVERHEAD
- lwz r4,_DAR(r1)
- bl bad_page_fault
- b ret_from_except_full
-
-/*
- * This routine switches between two different tasks. The process
- * state of one is saved on its kernel stack. Then the state
- * of the other is restored from its kernel stack. The memory
- * management hardware is updated to the second process's state.
- * Finally, we can return to the second process.
- * On entry, r3 points to the THREAD for the current task, r4
- * points to the THREAD for the new task.
- *
- * This routine is always called with interrupts disabled.
- *
- * Note: there are two ways to get to the "going out" portion
- * of this code; either by coming in via the entry (_switch)
- * or via "fork" which must set up an environment equivalent
- * to the "_switch" path. If you change this , you'll have to
- * change the fork code also.
- *
- * The code which creates the new task context is in 'copy_thread'
- * in arch/ppc/kernel/process.c
- */
-_GLOBAL(_switch)
- stwu r1,-INT_FRAME_SIZE(r1)
- mflr r0
- stw r0,INT_FRAME_SIZE+4(r1)
- /* r3-r12 are caller saved -- Cort */
- SAVE_NVGPRS(r1)
- stw r0,_NIP(r1) /* Return to switch caller */
- mfmsr r11
- li r0,MSR_FP /* Disable floating-point */
-#ifdef CONFIG_ALTIVEC
-BEGIN_FTR_SECTION
- oris r0,r0,MSR_VEC@h /* Disable altivec */
- mfspr r12,SPRN_VRSAVE /* save vrsave register value */
- stw r12,THREAD+THREAD_VRSAVE(r2)
-END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
-#endif /* CONFIG_ALTIVEC */
-#ifdef CONFIG_SPE
-BEGIN_FTR_SECTION
- oris r0,r0,MSR_SPE@h /* Disable SPE */
- mfspr r12,SPRN_SPEFSCR /* save spefscr register value */
- stw r12,THREAD+THREAD_SPEFSCR(r2)
-END_FTR_SECTION_IFSET(CPU_FTR_SPE)
-#endif /* CONFIG_SPE */
- and. r0,r0,r11 /* FP or altivec or SPE enabled? */
- beq+ 1f
- andc r11,r11,r0
- MTMSRD(r11)
- isync
-1: stw r11,_MSR(r1)
- mfcr r10
- stw r10,_CCR(r1)
- stw r1,KSP(r3) /* Set old stack pointer */
-
-#ifdef CONFIG_SMP
- /* We need a sync somewhere here to make sure that if the
- * previous task gets rescheduled on another CPU, it sees all
- * stores it has performed on this one.
- */
- sync
-#endif /* CONFIG_SMP */
-
- tophys(r0,r4)
- CLR_TOP32(r0)
- mtspr SPRN_SPRG_THREAD,r0 /* Update current THREAD phys addr */
- lwz r1,KSP(r4) /* Load new stack pointer */
-
- /* save the old current 'last' for return value */
- mr r3,r2
- addi r2,r4,-THREAD /* Update current */
-
-#ifdef CONFIG_ALTIVEC
-BEGIN_FTR_SECTION
- lwz r0,THREAD+THREAD_VRSAVE(r2)
- mtspr SPRN_VRSAVE,r0 /* if G4, restore VRSAVE reg */
-END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
-#endif /* CONFIG_ALTIVEC */
-#ifdef CONFIG_SPE
-BEGIN_FTR_SECTION
- lwz r0,THREAD+THREAD_SPEFSCR(r2)
- mtspr SPRN_SPEFSCR,r0 /* restore SPEFSCR reg */
-END_FTR_SECTION_IFSET(CPU_FTR_SPE)
-#endif /* CONFIG_SPE */
-
- lwz r0,_CCR(r1)
- mtcrf 0xFF,r0
- /* r3-r12 are destroyed -- Cort */
- REST_NVGPRS(r1)
-
- lwz r4,_NIP(r1) /* Return to _switch caller in new task */
- mtlr r4
- addi r1,r1,INT_FRAME_SIZE
- blr
+100: trap
+ EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,0
.globl fast_exception_return
fast_exception_return:
-#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
+#ifndef CONFIG_BOOKE
andi. r10,r9,MSR_RI /* check for recoverable interrupt */
- beq 1f /* if not, we've got problems */
+ beq 3f /* if not, we've got problems */
#endif
-2: REST_4GPRS(3, r11)
- lwz r10,_CCR(r11)
- REST_GPR(1, r11)
+2: lwz r10,_CCR(r11)
+ REST_GPRS(1, 6, r11)
mtcr r10
lwz r10,_LINK(r11)
mtlr r10
+ /* Clear the exception marker on the stack to avoid confusing stacktrace */
+ li r10, 0
+ stw r10, 8(r11)
REST_GPR(10, r11)
+#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
+ mtspr SPRN_NRI, r0
+#endif
mtspr SPRN_SRR1,r9
mtspr SPRN_SRR0,r12
REST_GPR(9, r11)
REST_GPR(12, r11)
- lwz r11,GPR11(r11)
- SYNC
- RFI
-
-#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
-/* check if the exception happened in a restartable section */
-1: lis r3,exc_exit_restart_end@ha
- addi r3,r3,exc_exit_restart_end@l
- cmplw r12,r3
- bge 3f
- lis r4,exc_exit_restart@ha
- addi r4,r4,exc_exit_restart@l
- cmplw r12,r4
- blt 3f
- lis r3,fee_restarts@ha
- tophys(r3,r3)
- lwz r5,fee_restarts@l(r3)
- addi r5,r5,1
- stw r5,fee_restarts@l(r3)
- mr r12,r4 /* restart at exc_exit_restart */
- b 2b
-
- .section .bss
- .align 2
-fee_restarts:
- .space 4
- .previous
+ REST_GPR(11, r11)
+ rfi
+_ASM_NOKPROBE_SYMBOL(fast_exception_return)
/* aargh, a nonrecoverable interrupt, panic */
/* aargh, we don't know which trap this is */
-/* but the 601 doesn't implement the RI bit, so assume it's OK */
3:
-BEGIN_FTR_SECTION
- b 2b
-END_FTR_SECTION_IFSET(CPU_FTR_601)
li r10,-1
stw r10,_TRAP(r11)
- addi r3,r1,STACK_FRAME_OVERHEAD
- lis r10,MSR_KERNEL@h
- ori r10,r10,MSR_KERNEL@l
- bl transfer_to_handler_full
- .long nonrecoverable_exception
- .long ret_from_except
-#endif
-
- .globl ret_from_except_full
-ret_from_except_full:
- REST_NVGPRS(r1)
- /* fall through */
-
- .globl ret_from_except
-ret_from_except:
- /* Hard-disable interrupts so that current_thread_info()->flags
- * can't change between when we test it and when we return
- * from the interrupt. */
- /* Note: We don't bother telling lockdep about it */
- LOAD_MSR_KERNEL(r10,MSR_KERNEL)
- SYNC /* Some chip revs have problems here... */
- MTMSRD(r10) /* disable interrupts */
-
- lwz r3,_MSR(r1) /* Returning to user mode? */
- andi. r0,r3,MSR_PR
- beq resume_kernel
-
-user_exc_return: /* r10 contains MSR_KERNEL here */
- /* Check current_thread_info()->flags */
- CURRENT_THREAD_INFO(r9, r1)
- lwz r9,TI_FLAGS(r9)
- andi. r0,r9,_TIF_USER_WORK_MASK
- bne do_work
-
-restore_user:
-#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
- /* Check whether this process has its own DBCR0 value. The internal
- debug mode bit tells us that dbcr0 should be loaded. */
- lwz r0,THREAD+THREAD_DBCR0(r2)
- andis. r10,r0,DBCR0_IDM@h
- bnel- load_dbcr0
-#endif
+ prepare_transfer_to_handler
+ bl unrecoverable_exception
+ trap /* should not get here */
+
+ .globl interrupt_return
+interrupt_return:
+ lwz r4,_MSR(r1)
+ addi r3,r1,STACK_INT_FRAME_REGS
+ andi. r0,r4,MSR_PR
+ beq .Lkernel_interrupt_return
+ bl interrupt_exit_user_prepare
+ cmpwi r3,0
+ kuep_unlock
+ bne- .Lrestore_nvgprs
- b restore
+.Lfast_user_interrupt_return:
+ lwz r11,_NIP(r1)
+ lwz r12,_MSR(r1)
+ mtspr SPRN_SRR0,r11
+ mtspr SPRN_SRR1,r12
-/* N.B. the only way to get here is from the beq following ret_from_except. */
-resume_kernel:
- /* check current_thread_info, _TIF_EMULATE_STACK_STORE */
- CURRENT_THREAD_INFO(r9, r1)
- lwz r8,TI_FLAGS(r9)
- andis. r0,r8,_TIF_EMULATE_STACK_STORE@h
- beq+ 1f
+BEGIN_FTR_SECTION
+ stwcx. r0,0,r1 /* to clear the reservation */
+FTR_SECTION_ELSE
+ lwarx r0,0,r1
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
- addi r8,r1,INT_FRAME_SIZE /* Get the kprobed function entry */
+ lwz r3,_CCR(r1)
+ lwz r4,_LINK(r1)
+ lwz r5,_CTR(r1)
+ lwz r6,_XER(r1)
+ li r0,0
- lwz r3,GPR1(r1)
- subi r3,r3,INT_FRAME_SIZE /* dst: Allocate a trampoline exception frame */
- mr r4,r1 /* src: current exception frame */
- mr r1,r3 /* Reroute the trampoline frame to r1 */
+ /*
+ * Leaving a stale exception marker on the stack can confuse
+ * the reliable stack unwinder later on. Clear it.
+ */
+ stw r0,8(r1)
+ REST_GPRS(7, 12, r1)
- /* Copy from the original to the trampoline. */
- li r5,INT_FRAME_SIZE/4 /* size: INT_FRAME_SIZE */
- li r6,0 /* start offset: 0 */
+ mtcr r3
+ mtlr r4
mtctr r5
-2: lwzx r0,r6,r4
- stwx r0,r6,r3
- addi r6,r6,4
- bdnz 2b
-
- /* Do real store operation to complete stwu */
- lwz r5,GPR1(r1)
- stw r8,0(r5)
-
- /* Clear _TIF_EMULATE_STACK_STORE flag */
- lis r11,_TIF_EMULATE_STACK_STORE@h
- addi r5,r9,TI_FLAGS
-0: lwarx r8,0,r5
- andc r8,r8,r11
-#ifdef CONFIG_IBM405_ERR77
- dcbt 0,r5
-#endif
- stwcx. r8,0,r5
- bne- 0b
-1:
-
-#ifdef CONFIG_PREEMPT
- /* check current_thread_info->preempt_count */
- lwz r0,TI_PREEMPT(r9)
- cmpwi 0,r0,0 /* if non-zero, just restore regs and return */
- bne restore
- andi. r8,r8,_TIF_NEED_RESCHED
- beq+ restore
- lwz r3,_MSR(r1)
- andi. r0,r3,MSR_EE /* interrupts off? */
- beq restore /* don't schedule if so */
-#ifdef CONFIG_TRACE_IRQFLAGS
- /* Lockdep thinks irqs are enabled, we need to call
- * preempt_schedule_irq with IRQs off, so we inform lockdep
- * now that we -did- turn them off already
- */
- bl trace_hardirqs_off
-#endif
-1: bl preempt_schedule_irq
- CURRENT_THREAD_INFO(r9, r1)
- lwz r3,TI_FLAGS(r9)
- andi. r0,r3,_TIF_NEED_RESCHED
- bne- 1b
-#ifdef CONFIG_TRACE_IRQFLAGS
- /* And now, to properly rebalance the above, we tell lockdep they
- * are being turned back on, which will happen when we return
- */
- bl trace_hardirqs_on
-#endif
-#endif /* CONFIG_PREEMPT */
+ mtspr SPRN_XER,r6
- /* interrupts are hard-disabled at this point */
-restore:
-#ifdef CONFIG_44x
-BEGIN_MMU_FTR_SECTION
- b 1f
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x)
- lis r4,icache_44x_need_flush@ha
- lwz r5,icache_44x_need_flush@l(r4)
- cmplwi cr0,r5,0
- beq+ 1f
- li r6,0
- iccci r0,r0
- stw r6,icache_44x_need_flush@l(r4)
-1:
-#endif /* CONFIG_44x */
+ REST_GPRS(2, 6, r1)
+ REST_GPR(0, r1)
+ REST_GPR(1, r1)
+ rfi
- lwz r9,_MSR(r1)
-#ifdef CONFIG_TRACE_IRQFLAGS
- /* Lockdep doesn't know about the fact that IRQs are temporarily turned
- * off in this assembly code while peeking at TI_FLAGS() and such. However
- * we need to inform it if the exception turned interrupts off, and we
- * are about to trun them back on.
- *
- * The problem here sadly is that we don't know whether the exceptions was
- * one that turned interrupts off or not. So we always tell lockdep about
- * turning them on here when we go back to wherever we came from with EE
- * on, even if that may meen some redudant calls being tracked. Maybe later
- * we could encode what the exception did somewhere or test the exception
- * type in the pt_regs but that sounds overkill
- */
- andi. r10,r9,MSR_EE
- beq 1f
- /*
- * Since the ftrace irqsoff latency trace checks CALLER_ADDR1,
- * which is the stack frame here, we need to force a stack frame
- * in case we came from user space.
- */
- stwu r1,-32(r1)
- mflr r0
- stw r0,4(r1)
- stwu r1,-32(r1)
- bl trace_hardirqs_on
- lwz r1,0(r1)
- lwz r1,0(r1)
- lwz r9,_MSR(r1)
-1:
-#endif /* CONFIG_TRACE_IRQFLAGS */
+.Lrestore_nvgprs:
+ REST_NVGPRS(r1)
+ b .Lfast_user_interrupt_return
- lwz r0,GPR0(r1)
- lwz r2,GPR2(r1)
- REST_4GPRS(3, r1)
- REST_2GPRS(7, r1)
+.Lkernel_interrupt_return:
+ bl interrupt_exit_kernel_prepare
- lwz r10,_XER(r1)
- lwz r11,_CTR(r1)
- mtspr SPRN_XER,r10
- mtctr r11
+.Lfast_kernel_interrupt_return:
+ cmpwi cr1,r3,0
+ lwz r11,_NIP(r1)
+ lwz r12,_MSR(r1)
+ mtspr SPRN_SRR0,r11
+ mtspr SPRN_SRR1,r12
- PPC405_ERR77(0,r1)
BEGIN_FTR_SECTION
- lwarx r11,0,r1
-END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
- stwcx. r0,0,r1 /* to clear the reservation */
+ stwcx. r0,0,r1 /* to clear the reservation */
+FTR_SECTION_ELSE
+ lwarx r0,0,r1
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
+
+ lwz r3,_LINK(r1)
+ lwz r4,_CTR(r1)
+ lwz r5,_XER(r1)
+ lwz r6,_CCR(r1)
+ li r0,0
-#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
- andi. r10,r9,MSR_RI /* check if this exception occurred */
- beql nonrecoverable /* at a bad place (MSR:RI = 0) */
+ REST_GPRS(7, 12, r1)
- lwz r10,_CCR(r1)
- lwz r11,_LINK(r1)
- mtcrf 0xFF,r10
- mtlr r11
+ mtlr r3
+ mtctr r4
+ mtspr SPRN_XER,r5
/*
- * Once we put values in SRR0 and SRR1, we are in a state
- * where exceptions are not recoverable, since taking an
- * exception will trash SRR0 and SRR1. Therefore we clear the
- * MSR:RI bit to indicate this. If we do take an exception,
- * we can't return to the point of the exception but we
- * can restart the exception exit path at the label
- * exc_exit_restart below. -- paulus
+ * Leaving a stale exception marker on the stack can confuse
+ * the reliable stack unwinder later on. Clear it.
*/
- LOAD_MSR_KERNEL(r10,MSR_KERNEL & ~MSR_RI)
- SYNC
- MTMSRD(r10) /* clear the RI bit */
- .globl exc_exit_restart
-exc_exit_restart:
- lwz r12,_NIP(r1)
- FIX_SRR1(r9,r10)
- mtspr SPRN_SRR0,r12
- mtspr SPRN_SRR1,r9
- REST_4GPRS(9, r1)
- lwz r1,GPR1(r1)
- .globl exc_exit_restart_end
-exc_exit_restart_end:
- SYNC
- RFI
+ stw r0,8(r1)
-#else /* !(CONFIG_4xx || CONFIG_BOOKE) */
- /*
- * This is a bit different on 4xx/Book-E because it doesn't have
- * the RI bit in the MSR.
- * The TLB miss handler checks if we have interrupted
- * the exception exit path and restarts it if so
- * (well maybe one day it will... :).
+ REST_GPRS(2, 5, r1)
+
+ bne- cr1,1f /* emulate stack store */
+ mtcr r6
+ REST_GPR(6, r1)
+ REST_GPR(0, r1)
+ REST_GPR(1, r1)
+ rfi
+
+1: /*
+ * Emulate stack store with update. New r1 value was already calculated
+ * and updated in our interrupt regs by emulate_loadstore, but we can't
+ * store the previous value of r1 to the stack before re-loading our
+ * registers from it, otherwise they could be clobbered. Use
+ * SPRG Scratch0 as temporary storage to hold the store
+ * data, as interrupts are disabled here so it won't be clobbered.
*/
- lwz r11,_LINK(r1)
- mtlr r11
- lwz r10,_CCR(r1)
- mtcrf 0xff,r10
- REST_2GPRS(9, r1)
- .globl exc_exit_restart
-exc_exit_restart:
- lwz r11,_NIP(r1)
- lwz r12,_MSR(r1)
-exc_exit_start:
- mtspr SPRN_SRR0,r11
- mtspr SPRN_SRR1,r12
- REST_2GPRS(11, r1)
- lwz r1,GPR1(r1)
- .globl exc_exit_restart_end
-exc_exit_restart_end:
- PPC405_ERR77_SYNC
+ mtcr r6
+#ifdef CONFIG_BOOKE
+ mtspr SPRN_SPRG_WSCRATCH0, r9
+#else
+ mtspr SPRN_SPRG_SCRATCH0, r9
+#endif
+ addi r9,r1,INT_FRAME_SIZE /* get original r1 */
+ REST_GPR(6, r1)
+ REST_GPR(0, r1)
+ REST_GPR(1, r1)
+ stw r9,0(r1) /* perform store component of stwu */
+#ifdef CONFIG_BOOKE
+ mfspr r9, SPRN_SPRG_RSCRATCH0
+#else
+ mfspr r9, SPRN_SPRG_SCRATCH0
+#endif
rfi
- b . /* prevent prefetch past rfi */
+_ASM_NOKPROBE_SYMBOL(interrupt_return)
+
+#ifdef CONFIG_BOOKE
/*
* Returning from a critical interrupt in user mode doesn't need
@@ -1053,39 +380,23 @@ exc_exit_restart_end:
* time of the critical interrupt.
*
*/
-#ifdef CONFIG_40x
-#define PPC_40x_TURN_OFF_MSR_DR \
- /* avoid any possible TLB misses here by turning off MSR.DR, we \
- * assume the instructions here are mapped by a pinned TLB entry */ \
- li r10,MSR_IR; \
- mtmsr r10; \
- isync; \
- tophys(r1, r1);
-#else
-#define PPC_40x_TURN_OFF_MSR_DR
-#endif
#define RET_FROM_EXC_LEVEL(exc_lvl_srr0, exc_lvl_srr1, exc_lvl_rfi) \
REST_NVGPRS(r1); \
lwz r3,_MSR(r1); \
andi. r3,r3,MSR_PR; \
- LOAD_MSR_KERNEL(r10,MSR_KERNEL); \
- bne user_exc_return; \
- lwz r0,GPR0(r1); \
- lwz r2,GPR2(r1); \
- REST_4GPRS(3, r1); \
- REST_2GPRS(7, r1); \
+ bne interrupt_return; \
+ REST_GPR(0, r1); \
+ REST_GPRS(2, 8, r1); \
lwz r10,_XER(r1); \
lwz r11,_CTR(r1); \
mtspr SPRN_XER,r10; \
mtctr r11; \
- PPC405_ERR77(0,r1); \
stwcx. r0,0,r1; /* to clear the reservation */ \
lwz r11,_LINK(r1); \
mtlr r11; \
lwz r10,_CCR(r1); \
mtcrf 0xff,r10; \
- PPC_40x_TURN_OFF_MSR_DR; \
lwz r9,_DEAR(r1); \
lwz r10,_ESR(r1); \
mtspr SPRN_DEAR,r9; \
@@ -1094,12 +405,8 @@ exc_exit_restart_end:
lwz r12,_MSR(r1); \
mtspr exc_lvl_srr0,r11; \
mtspr exc_lvl_srr1,r12; \
- lwz r9,GPR9(r1); \
- lwz r12,GPR12(r1); \
- lwz r10,GPR10(r1); \
- lwz r11,GPR11(r1); \
- lwz r1,GPR1(r1); \
- PPC405_ERR77_SYNC; \
+ REST_GPRS(9, 12, r1); \
+ REST_GPR(1, r1); \
exc_lvl_rfi; \
b .; /* prevent prefetch past exc_lvl_rfi */
@@ -1109,7 +416,7 @@ exc_exit_restart_end:
mtspr SPRN_##exc_lvl_srr0,r9; \
mtspr SPRN_##exc_lvl_srr1,r10;
-#if defined(CONFIG_PPC_BOOK3E_MMU)
+#if defined(CONFIG_PPC_E500)
#ifdef CONFIG_PHYS_64BIT
#define RESTORE_MAS7 \
lwz r11,MAS7(r1); \
@@ -1137,324 +444,27 @@ exc_exit_restart_end:
#define RESTORE_MMU_REGS
#endif
-#ifdef CONFIG_40x
.globl ret_from_crit_exc
ret_from_crit_exc:
- mfspr r9,SPRN_SPRG_THREAD
- lis r10,saved_ksp_limit@ha;
- lwz r10,saved_ksp_limit@l(r10);
- tovirt(r9,r9);
- stw r10,KSP_LIMIT(r9)
- lis r9,crit_srr0@ha;
- lwz r9,crit_srr0@l(r9);
- lis r10,crit_srr1@ha;
- lwz r10,crit_srr1@l(r10);
- mtspr SPRN_SRR0,r9;
- mtspr SPRN_SRR1,r10;
- RET_FROM_EXC_LEVEL(SPRN_CSRR0, SPRN_CSRR1, PPC_RFCI)
-#endif /* CONFIG_40x */
-
-#ifdef CONFIG_BOOKE
- .globl ret_from_crit_exc
-ret_from_crit_exc:
- mfspr r9,SPRN_SPRG_THREAD
- lwz r10,SAVED_KSP_LIMIT(r1)
- stw r10,KSP_LIMIT(r9)
RESTORE_xSRR(SRR0,SRR1);
RESTORE_MMU_REGS;
RET_FROM_EXC_LEVEL(SPRN_CSRR0, SPRN_CSRR1, PPC_RFCI)
+_ASM_NOKPROBE_SYMBOL(ret_from_crit_exc)
.globl ret_from_debug_exc
ret_from_debug_exc:
- mfspr r9,SPRN_SPRG_THREAD
- lwz r10,SAVED_KSP_LIMIT(r1)
- stw r10,KSP_LIMIT(r9)
- lwz r9,THREAD_INFO-THREAD(r9)
- CURRENT_THREAD_INFO(r10, r1)
- lwz r10,TI_PREEMPT(r10)
- stw r10,TI_PREEMPT(r9)
RESTORE_xSRR(SRR0,SRR1);
RESTORE_xSRR(CSRR0,CSRR1);
RESTORE_MMU_REGS;
RET_FROM_EXC_LEVEL(SPRN_DSRR0, SPRN_DSRR1, PPC_RFDI)
+_ASM_NOKPROBE_SYMBOL(ret_from_debug_exc)
.globl ret_from_mcheck_exc
ret_from_mcheck_exc:
- mfspr r9,SPRN_SPRG_THREAD
- lwz r10,SAVED_KSP_LIMIT(r1)
- stw r10,KSP_LIMIT(r9)
RESTORE_xSRR(SRR0,SRR1);
RESTORE_xSRR(CSRR0,CSRR1);
RESTORE_xSRR(DSRR0,DSRR1);
RESTORE_MMU_REGS;
RET_FROM_EXC_LEVEL(SPRN_MCSRR0, SPRN_MCSRR1, PPC_RFMCI)
+_ASM_NOKPROBE_SYMBOL(ret_from_mcheck_exc)
#endif /* CONFIG_BOOKE */
-
-/*
- * Load the DBCR0 value for a task that is being ptraced,
- * having first saved away the global DBCR0. Note that r0
- * has the dbcr0 value to set upon entry to this.
- */
-load_dbcr0:
- mfmsr r10 /* first disable debug exceptions */
- rlwinm r10,r10,0,~MSR_DE
- mtmsr r10
- isync
- mfspr r10,SPRN_DBCR0
- lis r11,global_dbcr0@ha
- addi r11,r11,global_dbcr0@l
-#ifdef CONFIG_SMP
- CURRENT_THREAD_INFO(r9, r1)
- lwz r9,TI_CPU(r9)
- slwi r9,r9,3
- add r11,r11,r9
-#endif
- stw r10,0(r11)
- mtspr SPRN_DBCR0,r0
- lwz r10,4(r11)
- addi r10,r10,1
- stw r10,4(r11)
- li r11,-1
- mtspr SPRN_DBSR,r11 /* clear all pending debug events */
- blr
-
- .section .bss
- .align 4
-global_dbcr0:
- .space 8*NR_CPUS
- .previous
-#endif /* !(CONFIG_4xx || CONFIG_BOOKE) */
-
-do_work: /* r10 contains MSR_KERNEL here */
- andi. r0,r9,_TIF_NEED_RESCHED
- beq do_user_signal
-
-do_resched: /* r10 contains MSR_KERNEL here */
- /* Note: We don't need to inform lockdep that we are enabling
- * interrupts here. As far as it knows, they are already enabled
- */
- ori r10,r10,MSR_EE
- SYNC
- MTMSRD(r10) /* hard-enable interrupts */
- bl schedule
-recheck:
- /* Note: And we don't tell it we are disabling them again
- * neither. Those disable/enable cycles used to peek at
- * TI_FLAGS aren't advertised.
- */
- LOAD_MSR_KERNEL(r10,MSR_KERNEL)
- SYNC
- MTMSRD(r10) /* disable interrupts */
- CURRENT_THREAD_INFO(r9, r1)
- lwz r9,TI_FLAGS(r9)
- andi. r0,r9,_TIF_NEED_RESCHED
- bne- do_resched
- andi. r0,r9,_TIF_USER_WORK_MASK
- beq restore_user
-do_user_signal: /* r10 contains MSR_KERNEL here */
- ori r10,r10,MSR_EE
- SYNC
- MTMSRD(r10) /* hard-enable interrupts */
- /* save r13-r31 in the exception frame, if not already done */
- lwz r3,_TRAP(r1)
- andi. r0,r3,1
- beq 2f
- SAVE_NVGPRS(r1)
- rlwinm r3,r3,0,0,30
- stw r3,_TRAP(r1)
-2: addi r3,r1,STACK_FRAME_OVERHEAD
- mr r4,r9
- bl do_notify_resume
- REST_NVGPRS(r1)
- b recheck
-
-/*
- * We come here when we are at the end of handling an exception
- * that occurred at a place where taking an exception will lose
- * state information, such as the contents of SRR0 and SRR1.
- */
-nonrecoverable:
- lis r10,exc_exit_restart_end@ha
- addi r10,r10,exc_exit_restart_end@l
- cmplw r12,r10
- bge 3f
- lis r11,exc_exit_restart@ha
- addi r11,r11,exc_exit_restart@l
- cmplw r12,r11
- blt 3f
- lis r10,ee_restarts@ha
- lwz r12,ee_restarts@l(r10)
- addi r12,r12,1
- stw r12,ee_restarts@l(r10)
- mr r12,r11 /* restart at exc_exit_restart */
- blr
-3: /* OK, we can't recover, kill this process */
- /* but the 601 doesn't implement the RI bit, so assume it's OK */
-BEGIN_FTR_SECTION
- blr
-END_FTR_SECTION_IFSET(CPU_FTR_601)
- lwz r3,_TRAP(r1)
- andi. r0,r3,1
- beq 4f
- SAVE_NVGPRS(r1)
- rlwinm r3,r3,0,0,30
- stw r3,_TRAP(r1)
-4: addi r3,r1,STACK_FRAME_OVERHEAD
- bl nonrecoverable_exception
- /* shouldn't return */
- b 4b
-
- .section .bss
- .align 2
-ee_restarts:
- .space 4
- .previous
-
-/*
- * PROM code for specific machines follows. Put it
- * here so it's easy to add arch-specific sections later.
- * -- Cort
- */
-#ifdef CONFIG_PPC_RTAS
-/*
- * On CHRP, the Run-Time Abstraction Services (RTAS) have to be
- * called with the MMU off.
- */
-_GLOBAL(enter_rtas)
- stwu r1,-INT_FRAME_SIZE(r1)
- mflr r0
- stw r0,INT_FRAME_SIZE+4(r1)
- LOAD_REG_ADDR(r4, rtas)
- lis r6,1f@ha /* physical return address for rtas */
- addi r6,r6,1f@l
- tophys(r6,r6)
- tophys(r7,r1)
- lwz r8,RTASENTRY(r4)
- lwz r4,RTASBASE(r4)
- mfmsr r9
- stw r9,8(r1)
- LOAD_MSR_KERNEL(r0,MSR_KERNEL)
- SYNC /* disable interrupts so SRR0/1 */
- MTMSRD(r0) /* don't get trashed */
- li r9,MSR_KERNEL & ~(MSR_IR|MSR_DR)
- mtlr r6
- mtspr SPRN_SPRG_RTAS,r7
- mtspr SPRN_SRR0,r8
- mtspr SPRN_SRR1,r9
- RFI
-1: tophys(r9,r1)
- lwz r8,INT_FRAME_SIZE+4(r9) /* get return address */
- lwz r9,8(r9) /* original msr value */
- FIX_SRR1(r9,r0)
- addi r1,r1,INT_FRAME_SIZE
- li r0,0
- mtspr SPRN_SPRG_RTAS,r0
- mtspr SPRN_SRR0,r8
- mtspr SPRN_SRR1,r9
- RFI /* return to caller */
-
- .globl machine_check_in_rtas
-machine_check_in_rtas:
- twi 31,0,0
- /* XXX load up BATs and panic */
-
-#endif /* CONFIG_PPC_RTAS */
-
-#ifdef CONFIG_FUNCTION_TRACER
-#ifdef CONFIG_DYNAMIC_FTRACE
-_GLOBAL(mcount)
-_GLOBAL(_mcount)
- /*
- * It is required that _mcount on PPC32 must preserve the
- * link register. But we have r0 to play with. We use r0
- * to push the return address back to the caller of mcount
- * into the ctr register, restore the link register and
- * then jump back using the ctr register.
- */
- mflr r0
- mtctr r0
- lwz r0, 4(r1)
- mtlr r0
- bctr
-
-_GLOBAL(ftrace_caller)
- MCOUNT_SAVE_FRAME
- /* r3 ends up with link register */
- subi r3, r3, MCOUNT_INSN_SIZE
-.globl ftrace_call
-ftrace_call:
- bl ftrace_stub
- nop
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-.globl ftrace_graph_call
-ftrace_graph_call:
- b ftrace_graph_stub
-_GLOBAL(ftrace_graph_stub)
-#endif
- MCOUNT_RESTORE_FRAME
- /* old link register ends up in ctr reg */
- bctr
-#else
-_GLOBAL(mcount)
-_GLOBAL(_mcount)
-
- MCOUNT_SAVE_FRAME
-
- subi r3, r3, MCOUNT_INSN_SIZE
- LOAD_REG_ADDR(r5, ftrace_trace_function)
- lwz r5,0(r5)
-
- mtctr r5
- bctrl
- nop
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- b ftrace_graph_caller
-#endif
- MCOUNT_RESTORE_FRAME
- bctr
-#endif
-
-_GLOBAL(ftrace_stub)
- blr
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-_GLOBAL(ftrace_graph_caller)
- /* load r4 with local address */
- lwz r4, 44(r1)
- subi r4, r4, MCOUNT_INSN_SIZE
-
- /* get the parent address */
- addi r3, r1, 52
-
- bl prepare_ftrace_return
- nop
-
- MCOUNT_RESTORE_FRAME
- /* old link register ends up in ctr reg */
- bctr
-
-_GLOBAL(return_to_handler)
- /* need to save return values */
- stwu r1, -32(r1)
- stw r3, 20(r1)
- stw r4, 16(r1)
- stw r31, 12(r1)
- mr r31, r1
-
- bl ftrace_return_to_handler
- nop
-
- /* return value has real return address */
- mtlr r3
-
- lwz r3, 20(r1)
- lwz r4, 16(r1)
- lwz r31,12(r1)
- lwz r1, 0(r1)
-
- /* Jump back to real return address */
- blr
-#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
-
-#endif /* CONFIG_MCOUNT */
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
deleted file mode 100644
index 5bbd1bc8c3b0..000000000000
--- a/arch/powerpc/kernel/entry_64.S
+++ /dev/null
@@ -1,1289 +0,0 @@
-/*
- * PowerPC version
- * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
- * Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
- * Adapted for Power Macintosh by Paul Mackerras.
- * Low-level exception handlers and MMU support
- * rewritten by Paul Mackerras.
- * Copyright (C) 1996 Paul Mackerras.
- * MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek@jlc.net).
- *
- * This file contains the system call entry code, context switch
- * code, and exception/interrupt return code for PowerPC.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/errno.h>
-#include <asm/unistd.h>
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/mmu.h>
-#include <asm/thread_info.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/cputable.h>
-#include <asm/firmware.h>
-#include <asm/bug.h>
-#include <asm/ptrace.h>
-#include <asm/irqflags.h>
-#include <asm/ftrace.h>
-#include <asm/hw_irq.h>
-#include <asm/context_tracking.h>
-
-/*
- * System calls.
- */
- .section ".toc","aw"
-SYS_CALL_TABLE:
- .tc sys_call_table[TC],sys_call_table
-
-/* This value is used to mark exception frames on the stack. */
-exception_marker:
- .tc ID_EXC_MARKER[TC],STACK_FRAME_REGS_MARKER
-
- .section ".text"
- .align 7
-
-#undef SHOW_SYSCALLS
-
- .globl system_call_common
-system_call_common:
- andi. r10,r12,MSR_PR
- mr r10,r1
- addi r1,r1,-INT_FRAME_SIZE
- beq- 1f
- ld r1,PACAKSAVE(r13)
-1: std r10,0(r1)
- std r11,_NIP(r1)
- std r12,_MSR(r1)
- std r0,GPR0(r1)
- std r10,GPR1(r1)
- beq 2f /* if from kernel mode */
- ACCOUNT_CPU_USER_ENTRY(r10, r11)
-2: std r2,GPR2(r1)
- std r3,GPR3(r1)
- mfcr r2
- std r4,GPR4(r1)
- std r5,GPR5(r1)
- std r6,GPR6(r1)
- std r7,GPR7(r1)
- std r8,GPR8(r1)
- li r11,0
- std r11,GPR9(r1)
- std r11,GPR10(r1)
- std r11,GPR11(r1)
- std r11,GPR12(r1)
- std r11,_XER(r1)
- std r11,_CTR(r1)
- std r9,GPR13(r1)
- mflr r10
- /*
- * This clears CR0.SO (bit 28), which is the error indication on
- * return from this system call.
- */
- rldimi r2,r11,28,(63-28)
- li r11,0xc01
- std r10,_LINK(r1)
- std r11,_TRAP(r1)
- std r3,ORIG_GPR3(r1)
- std r2,_CCR(r1)
- ld r2,PACATOC(r13)
- addi r9,r1,STACK_FRAME_OVERHEAD
- ld r11,exception_marker@toc(r2)
- std r11,-16(r9) /* "regshere" marker */
-#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC_SPLPAR)
-BEGIN_FW_FTR_SECTION
- beq 33f
- /* if from user, see if there are any DTL entries to process */
- ld r10,PACALPPACAPTR(r13) /* get ptr to VPA */
- ld r11,PACA_DTL_RIDX(r13) /* get log read index */
- addi r10,r10,LPPACA_DTLIDX
- LDX_BE r10,0,r10 /* get log write index */
- cmpd cr1,r11,r10
- beq+ cr1,33f
- bl accumulate_stolen_time
- REST_GPR(0,r1)
- REST_4GPRS(3,r1)
- REST_2GPRS(7,r1)
- addi r9,r1,STACK_FRAME_OVERHEAD
-33:
-END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
-#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE && CONFIG_PPC_SPLPAR */
-
- /*
- * A syscall should always be called with interrupts enabled
- * so we just unconditionally hard-enable here. When some kind
- * of irq tracing is used, we additionally check that condition
- * is correct
- */
-#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_BUG)
- lbz r10,PACASOFTIRQEN(r13)
- xori r10,r10,1
-1: tdnei r10,0
- EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
-#endif
-
-#ifdef CONFIG_PPC_BOOK3E
- wrteei 1
-#else
- ld r11,PACAKMSR(r13)
- ori r11,r11,MSR_EE
- mtmsrd r11,1
-#endif /* CONFIG_PPC_BOOK3E */
-
- /* We do need to set SOFTE in the stack frame or the return
- * from interrupt will be painful
- */
- li r10,1
- std r10,SOFTE(r1)
-
-#ifdef SHOW_SYSCALLS
- bl do_show_syscall
- REST_GPR(0,r1)
- REST_4GPRS(3,r1)
- REST_2GPRS(7,r1)
- addi r9,r1,STACK_FRAME_OVERHEAD
-#endif
- CURRENT_THREAD_INFO(r11, r1)
- ld r10,TI_FLAGS(r11)
- andi. r11,r10,_TIF_SYSCALL_T_OR_A
- bne syscall_dotrace
-.Lsyscall_dotrace_cont:
- cmpldi 0,r0,NR_syscalls
- bge- syscall_enosys
-
-system_call: /* label this so stack traces look sane */
-/*
- * Need to vector to 32 Bit or default sys_call_table here,
- * based on caller's run-mode / personality.
- */
- ld r11,SYS_CALL_TABLE@toc(2)
- andi. r10,r10,_TIF_32BIT
- beq 15f
- addi r11,r11,8 /* use 32-bit syscall entries */
- clrldi r3,r3,32
- clrldi r4,r4,32
- clrldi r5,r5,32
- clrldi r6,r6,32
- clrldi r7,r7,32
- clrldi r8,r8,32
-15:
- slwi r0,r0,4
- ldx r12,r11,r0 /* Fetch system call handler [ptr] */
- mtctr r12
- bctrl /* Call handler */
-
-syscall_exit:
- std r3,RESULT(r1)
-#ifdef SHOW_SYSCALLS
- bl do_show_syscall_exit
- ld r3,RESULT(r1)
-#endif
- CURRENT_THREAD_INFO(r12, r1)
-
- ld r8,_MSR(r1)
-#ifdef CONFIG_PPC_BOOK3S
- /* No MSR:RI on BookE */
- andi. r10,r8,MSR_RI
- beq- unrecov_restore
-#endif
- /*
- * Disable interrupts so current_thread_info()->flags can't change,
- * and so that we don't get interrupted after loading SRR0/1.
- */
-#ifdef CONFIG_PPC_BOOK3E
- wrteei 0
-#else
- ld r10,PACAKMSR(r13)
- /*
- * For performance reasons we clear RI the same time that we
- * clear EE. We only need to clear RI just before we restore r13
- * below, but batching it with EE saves us one expensive mtmsrd call.
- * We have to be careful to restore RI if we branch anywhere from
- * here (eg syscall_exit_work).
- */
- li r9,MSR_RI
- andc r11,r10,r9
- mtmsrd r11,1
-#endif /* CONFIG_PPC_BOOK3E */
-
- ld r9,TI_FLAGS(r12)
- li r11,-_LAST_ERRNO
- andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP|_TIF_USER_WORK_MASK|_TIF_PERSYSCALL_MASK)
- bne- syscall_exit_work
- cmpld r3,r11
- ld r5,_CCR(r1)
- bge- syscall_error
-.Lsyscall_error_cont:
- ld r7,_NIP(r1)
-BEGIN_FTR_SECTION
- stdcx. r0,0,r1 /* to clear the reservation */
-END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
- andi. r6,r8,MSR_PR
- ld r4,_LINK(r1)
-
- beq- 1f
- ACCOUNT_CPU_USER_EXIT(r11, r12)
- HMT_MEDIUM_LOW_HAS_PPR
- ld r13,GPR13(r1) /* only restore r13 if returning to usermode */
-1: ld r2,GPR2(r1)
- ld r1,GPR1(r1)
- mtlr r4
- mtcr r5
- mtspr SPRN_SRR0,r7
- mtspr SPRN_SRR1,r8
- RFI
- b . /* prevent speculative execution */
-
-syscall_error:
- oris r5,r5,0x1000 /* Set SO bit in CR */
- neg r3,r3
- std r5,_CCR(r1)
- b .Lsyscall_error_cont
-
-/* Traced system call support */
-syscall_dotrace:
- bl save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl do_syscall_trace_enter
- /*
- * Restore argument registers possibly just changed.
- * We use the return value of do_syscall_trace_enter
- * for the call number to look up in the table (r0).
- */
- mr r0,r3
- ld r3,GPR3(r1)
- ld r4,GPR4(r1)
- ld r5,GPR5(r1)
- ld r6,GPR6(r1)
- ld r7,GPR7(r1)
- ld r8,GPR8(r1)
- addi r9,r1,STACK_FRAME_OVERHEAD
- CURRENT_THREAD_INFO(r10, r1)
- ld r10,TI_FLAGS(r10)
- b .Lsyscall_dotrace_cont
-
-syscall_enosys:
- li r3,-ENOSYS
- b syscall_exit
-
-syscall_exit_work:
-#ifdef CONFIG_PPC_BOOK3S
- mtmsrd r10,1 /* Restore RI */
-#endif
- /* If TIF_RESTOREALL is set, don't scribble on either r3 or ccr.
- If TIF_NOERROR is set, just save r3 as it is. */
-
- andi. r0,r9,_TIF_RESTOREALL
- beq+ 0f
- REST_NVGPRS(r1)
- b 2f
-0: cmpld r3,r11 /* r10 is -LAST_ERRNO */
- blt+ 1f
- andi. r0,r9,_TIF_NOERROR
- bne- 1f
- ld r5,_CCR(r1)
- neg r3,r3
- oris r5,r5,0x1000 /* Set SO bit in CR */
- std r5,_CCR(r1)
-1: std r3,GPR3(r1)
-2: andi. r0,r9,(_TIF_PERSYSCALL_MASK)
- beq 4f
-
- /* Clear per-syscall TIF flags if any are set. */
-
- li r11,_TIF_PERSYSCALL_MASK
- addi r12,r12,TI_FLAGS
-3: ldarx r10,0,r12
- andc r10,r10,r11
- stdcx. r10,0,r12
- bne- 3b
- subi r12,r12,TI_FLAGS
-
-4: /* Anything else left to do? */
- SET_DEFAULT_THREAD_PPR(r3, r10) /* Set thread.ppr = 3 */
- andi. r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP)
- beq ret_from_except_lite
-
- /* Re-enable interrupts */
-#ifdef CONFIG_PPC_BOOK3E
- wrteei 1
-#else
- ld r10,PACAKMSR(r13)
- ori r10,r10,MSR_EE
- mtmsrd r10,1
-#endif /* CONFIG_PPC_BOOK3E */
-
- bl save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl do_syscall_trace_leave
- b ret_from_except
-
-/* Save non-volatile GPRs, if not already saved. */
-_GLOBAL(save_nvgprs)
- ld r11,_TRAP(r1)
- andi. r0,r11,1
- beqlr-
- SAVE_NVGPRS(r1)
- clrrdi r0,r11,1
- std r0,_TRAP(r1)
- blr
-
-
-/*
- * The sigsuspend and rt_sigsuspend system calls can call do_signal
- * and thus put the process into the stopped state where we might
- * want to examine its user state with ptrace. Therefore we need
- * to save all the nonvolatile registers (r14 - r31) before calling
- * the C code. Similarly, fork, vfork and clone need the full
- * register state on the stack so that it can be copied to the child.
- */
-
-_GLOBAL(ppc_fork)
- bl save_nvgprs
- bl sys_fork
- b syscall_exit
-
-_GLOBAL(ppc_vfork)
- bl save_nvgprs
- bl sys_vfork
- b syscall_exit
-
-_GLOBAL(ppc_clone)
- bl save_nvgprs
- bl sys_clone
- b syscall_exit
-
-_GLOBAL(ppc32_swapcontext)
- bl save_nvgprs
- bl compat_sys_swapcontext
- b syscall_exit
-
-_GLOBAL(ppc64_swapcontext)
- bl save_nvgprs
- bl sys_swapcontext
- b syscall_exit
-
-_GLOBAL(ret_from_fork)
- bl schedule_tail
- REST_NVGPRS(r1)
- li r3,0
- b syscall_exit
-
-_GLOBAL(ret_from_kernel_thread)
- bl schedule_tail
- REST_NVGPRS(r1)
- mtlr r14
- mr r3,r15
-#if defined(_CALL_ELF) && _CALL_ELF == 2
- mr r12,r14
-#endif
- blrl
- li r3,0
- b syscall_exit
-
-/*
- * This routine switches between two different tasks. The process
- * state of one is saved on its kernel stack. Then the state
- * of the other is restored from its kernel stack. The memory
- * management hardware is updated to the second process's state.
- * Finally, we can return to the second process, via ret_from_except.
- * On entry, r3 points to the THREAD for the current task, r4
- * points to the THREAD for the new task.
- *
- * Note: there are two ways to get to the "going out" portion
- * of this code; either by coming in via the entry (_switch)
- * or via "fork" which must set up an environment equivalent
- * to the "_switch" path. If you change this you'll have to change
- * the fork code also.
- *
- * The code which creates the new task context is in 'copy_thread'
- * in arch/powerpc/kernel/process.c
- */
- .align 7
-_GLOBAL(_switch)
- mflr r0
- std r0,16(r1)
- stdu r1,-SWITCH_FRAME_SIZE(r1)
- /* r3-r13 are caller saved -- Cort */
- SAVE_8GPRS(14, r1)
- SAVE_10GPRS(22, r1)
- mflr r20 /* Return to switch caller */
- mfmsr r22
- li r0, MSR_FP
-#ifdef CONFIG_VSX
-BEGIN_FTR_SECTION
- oris r0,r0,MSR_VSX@h /* Disable VSX */
-END_FTR_SECTION_IFSET(CPU_FTR_VSX)
-#endif /* CONFIG_VSX */
-#ifdef CONFIG_ALTIVEC
-BEGIN_FTR_SECTION
- oris r0,r0,MSR_VEC@h /* Disable altivec */
- mfspr r24,SPRN_VRSAVE /* save vrsave register value */
- std r24,THREAD_VRSAVE(r3)
-END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
-#endif /* CONFIG_ALTIVEC */
- and. r0,r0,r22
- beq+ 1f
- andc r22,r22,r0
- MTMSRD(r22)
- isync
-1: std r20,_NIP(r1)
- mfcr r23
- std r23,_CCR(r1)
- std r1,KSP(r3) /* Set old stack pointer */
-
-#ifdef CONFIG_PPC_BOOK3S_64
-BEGIN_FTR_SECTION
- /* Event based branch registers */
- mfspr r0, SPRN_BESCR
- std r0, THREAD_BESCR(r3)
- mfspr r0, SPRN_EBBHR
- std r0, THREAD_EBBHR(r3)
- mfspr r0, SPRN_EBBRR
- std r0, THREAD_EBBRR(r3)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-#endif
-
-#ifdef CONFIG_SMP
- /* We need a sync somewhere here to make sure that if the
- * previous task gets rescheduled on another CPU, it sees all
- * stores it has performed on this one.
- */
- sync
-#endif /* CONFIG_SMP */
-
- /*
- * If we optimise away the clear of the reservation in system
- * calls because we know the CPU tracks the address of the
- * reservation, then we need to clear it here to cover the
- * case that the kernel context switch path has no larx
- * instructions.
- */
-BEGIN_FTR_SECTION
- ldarx r6,0,r1
-END_FTR_SECTION_IFSET(CPU_FTR_STCX_CHECKS_ADDRESS)
-
-#ifdef CONFIG_PPC_BOOK3S
-/* Cancel all explict user streams as they will have no use after context
- * switch and will stop the HW from creating streams itself
- */
- DCBT_STOP_ALL_STREAM_IDS(r6)
-#endif
-
- addi r6,r4,-THREAD /* Convert THREAD to 'current' */
- std r6,PACACURRENT(r13) /* Set new 'current' */
-
- ld r8,KSP(r4) /* new stack pointer */
-#ifdef CONFIG_PPC_BOOK3S
-BEGIN_FTR_SECTION
- clrrdi r6,r8,28 /* get its ESID */
- clrrdi r9,r1,28 /* get current sp ESID */
-FTR_SECTION_ELSE
- clrrdi r6,r8,40 /* get its 1T ESID */
- clrrdi r9,r1,40 /* get current sp 1T ESID */
-ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_1T_SEGMENT)
- clrldi. r0,r6,2 /* is new ESID c00000000? */
- cmpd cr1,r6,r9 /* or is new ESID the same as current ESID? */
- cror eq,4*cr1+eq,eq
- beq 2f /* if yes, don't slbie it */
-
- /* Bolt in the new stack SLB entry */
- ld r7,KSP_VSID(r4) /* Get new stack's VSID */
- oris r0,r6,(SLB_ESID_V)@h
- ori r0,r0,(SLB_NUM_BOLTED-1)@l
-BEGIN_FTR_SECTION
- li r9,MMU_SEGSIZE_1T /* insert B field */
- oris r6,r6,(MMU_SEGSIZE_1T << SLBIE_SSIZE_SHIFT)@h
- rldimi r7,r9,SLB_VSID_SSIZE_SHIFT,0
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
-
- /* Update the last bolted SLB. No write barriers are needed
- * here, provided we only update the current CPU's SLB shadow
- * buffer.
- */
- ld r9,PACA_SLBSHADOWPTR(r13)
- li r12,0
- std r12,SLBSHADOW_STACKESID(r9) /* Clear ESID */
- li r12,SLBSHADOW_STACKVSID
- STDX_BE r7,r12,r9 /* Save VSID */
- li r12,SLBSHADOW_STACKESID
- STDX_BE r0,r12,r9 /* Save ESID */
-
- /* No need to check for MMU_FTR_NO_SLBIE_B here, since when
- * we have 1TB segments, the only CPUs known to have the errata
- * only support less than 1TB of system memory and we'll never
- * actually hit this code path.
- */
-
- slbie r6
- slbie r6 /* Workaround POWER5 < DD2.1 issue */
- slbmte r7,r0
- isync
-2:
-#endif /* !CONFIG_PPC_BOOK3S */
-
- CURRENT_THREAD_INFO(r7, r8) /* base of new stack */
- /* Note: this uses SWITCH_FRAME_SIZE rather than INT_FRAME_SIZE
- because we don't need to leave the 288-byte ABI gap at the
- top of the kernel stack. */
- addi r7,r7,THREAD_SIZE-SWITCH_FRAME_SIZE
-
- mr r1,r8 /* start using new stack pointer */
- std r7,PACAKSAVE(r13)
-
-#ifdef CONFIG_PPC_BOOK3S_64
-BEGIN_FTR_SECTION
- /* Event based branch registers */
- ld r0, THREAD_BESCR(r4)
- mtspr SPRN_BESCR, r0
- ld r0, THREAD_EBBHR(r4)
- mtspr SPRN_EBBHR, r0
- ld r0, THREAD_EBBRR(r4)
- mtspr SPRN_EBBRR, r0
-
- ld r0,THREAD_TAR(r4)
- mtspr SPRN_TAR,r0
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-#endif
-
-#ifdef CONFIG_ALTIVEC
-BEGIN_FTR_SECTION
- ld r0,THREAD_VRSAVE(r4)
- mtspr SPRN_VRSAVE,r0 /* if G4, restore VRSAVE reg */
-END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
-#endif /* CONFIG_ALTIVEC */
-#ifdef CONFIG_PPC64
-BEGIN_FTR_SECTION
- lwz r6,THREAD_DSCR_INHERIT(r4)
- ld r0,THREAD_DSCR(r4)
- cmpwi r6,0
- bne 1f
- ld r0,PACA_DSCR(r13)
-1:
-BEGIN_FTR_SECTION_NESTED(70)
- mfspr r8, SPRN_FSCR
- rldimi r8, r6, FSCR_DSCR_LG, (63 - FSCR_DSCR_LG)
- mtspr SPRN_FSCR, r8
-END_FTR_SECTION_NESTED(CPU_FTR_ARCH_207S, CPU_FTR_ARCH_207S, 70)
- cmpd r0,r25
- beq 2f
- mtspr SPRN_DSCR,r0
-2:
-END_FTR_SECTION_IFSET(CPU_FTR_DSCR)
-#endif
-
- ld r6,_CCR(r1)
- mtcrf 0xFF,r6
-
- /* r3-r13 are destroyed -- Cort */
- REST_8GPRS(14, r1)
- REST_10GPRS(22, r1)
-
- /* convert old thread to its task_struct for return value */
- addi r3,r3,-THREAD
- ld r7,_NIP(r1) /* Return to _switch caller in new task */
- mtlr r7
- addi r1,r1,SWITCH_FRAME_SIZE
- blr
-
- .align 7
-_GLOBAL(ret_from_except)
- ld r11,_TRAP(r1)
- andi. r0,r11,1
- bne ret_from_except_lite
- REST_NVGPRS(r1)
-
-_GLOBAL(ret_from_except_lite)
- /*
- * Disable interrupts so that current_thread_info()->flags
- * can't change between when we test it and when we return
- * from the interrupt.
- */
-#ifdef CONFIG_PPC_BOOK3E
- wrteei 0
-#else
- ld r10,PACAKMSR(r13) /* Get kernel MSR without EE */
- mtmsrd r10,1 /* Update machine state */
-#endif /* CONFIG_PPC_BOOK3E */
-
- CURRENT_THREAD_INFO(r9, r1)
- ld r3,_MSR(r1)
-#ifdef CONFIG_PPC_BOOK3E
- ld r10,PACACURRENT(r13)
-#endif /* CONFIG_PPC_BOOK3E */
- ld r4,TI_FLAGS(r9)
- andi. r3,r3,MSR_PR
- beq resume_kernel
-#ifdef CONFIG_PPC_BOOK3E
- lwz r3,(THREAD+THREAD_DBCR0)(r10)
-#endif /* CONFIG_PPC_BOOK3E */
-
- /* Check current_thread_info()->flags */
- andi. r0,r4,_TIF_USER_WORK_MASK
-#ifdef CONFIG_PPC_BOOK3E
- bne 1f
- /*
- * Check to see if the dbcr0 register is set up to debug.
- * Use the internal debug mode bit to do this.
- */
- andis. r0,r3,DBCR0_IDM@h
- beq restore
- mfmsr r0
- rlwinm r0,r0,0,~MSR_DE /* Clear MSR.DE */
- mtmsr r0
- mtspr SPRN_DBCR0,r3
- li r10, -1
- mtspr SPRN_DBSR,r10
- b restore
-#else
- beq restore
-#endif
-1: andi. r0,r4,_TIF_NEED_RESCHED
- beq 2f
- bl restore_interrupts
- SCHEDULE_USER
- b ret_from_except_lite
-2:
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- andi. r0,r4,_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM
- bne 3f /* only restore TM if nothing else to do */
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl restore_tm_state
- b restore
-3:
-#endif
- bl save_nvgprs
- bl restore_interrupts
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl do_notify_resume
- b ret_from_except
-
-resume_kernel:
- /* check current_thread_info, _TIF_EMULATE_STACK_STORE */
- andis. r8,r4,_TIF_EMULATE_STACK_STORE@h
- beq+ 1f
-
- addi r8,r1,INT_FRAME_SIZE /* Get the kprobed function entry */
-
- lwz r3,GPR1(r1)
- subi r3,r3,INT_FRAME_SIZE /* dst: Allocate a trampoline exception frame */
- mr r4,r1 /* src: current exception frame */
- mr r1,r3 /* Reroute the trampoline frame to r1 */
-
- /* Copy from the original to the trampoline. */
- li r5,INT_FRAME_SIZE/8 /* size: INT_FRAME_SIZE */
- li r6,0 /* start offset: 0 */
- mtctr r5
-2: ldx r0,r6,r4
- stdx r0,r6,r3
- addi r6,r6,8
- bdnz 2b
-
- /* Do real store operation to complete stwu */
- lwz r5,GPR1(r1)
- std r8,0(r5)
-
- /* Clear _TIF_EMULATE_STACK_STORE flag */
- lis r11,_TIF_EMULATE_STACK_STORE@h
- addi r5,r9,TI_FLAGS
-0: ldarx r4,0,r5
- andc r4,r4,r11
- stdcx. r4,0,r5
- bne- 0b
-1:
-
-#ifdef CONFIG_PREEMPT
- /* Check if we need to preempt */
- andi. r0,r4,_TIF_NEED_RESCHED
- beq+ restore
- /* Check that preempt_count() == 0 and interrupts are enabled */
- lwz r8,TI_PREEMPT(r9)
- cmpwi cr1,r8,0
- ld r0,SOFTE(r1)
- cmpdi r0,0
- crandc eq,cr1*4+eq,eq
- bne restore
-
- /*
- * Here we are preempting the current task. We want to make
- * sure we are soft-disabled first and reconcile irq state.
- */
- RECONCILE_IRQ_STATE(r3,r4)
-1: bl preempt_schedule_irq
-
- /* Re-test flags and eventually loop */
- CURRENT_THREAD_INFO(r9, r1)
- ld r4,TI_FLAGS(r9)
- andi. r0,r4,_TIF_NEED_RESCHED
- bne 1b
-
- /*
- * arch_local_irq_restore() from preempt_schedule_irq above may
- * enable hard interrupt but we really should disable interrupts
- * when we return from the interrupt, and so that we don't get
- * interrupted after loading SRR0/1.
- */
-#ifdef CONFIG_PPC_BOOK3E
- wrteei 0
-#else
- ld r10,PACAKMSR(r13) /* Get kernel MSR without EE */
- mtmsrd r10,1 /* Update machine state */
-#endif /* CONFIG_PPC_BOOK3E */
-#endif /* CONFIG_PREEMPT */
-
- .globl fast_exc_return_irq
-fast_exc_return_irq:
-restore:
- /*
- * This is the main kernel exit path. First we check if we
- * are about to re-enable interrupts
- */
- ld r5,SOFTE(r1)
- lbz r6,PACASOFTIRQEN(r13)
- cmpwi cr0,r5,0
- beq restore_irq_off
-
- /* We are enabling, were we already enabled ? Yes, just return */
- cmpwi cr0,r6,1
- beq cr0,do_restore
-
- /*
- * We are about to soft-enable interrupts (we are hard disabled
- * at this point). We check if there's anything that needs to
- * be replayed first.
- */
- lbz r0,PACAIRQHAPPENED(r13)
- cmpwi cr0,r0,0
- bne- restore_check_irq_replay
-
- /*
- * Get here when nothing happened while soft-disabled, just
- * soft-enable and move-on. We will hard-enable as a side
- * effect of rfi
- */
-restore_no_replay:
- TRACE_ENABLE_INTS
- li r0,1
- stb r0,PACASOFTIRQEN(r13);
-
- /*
- * Final return path. BookE is handled in a different file
- */
-do_restore:
-#ifdef CONFIG_PPC_BOOK3E
- b exception_return_book3e
-#else
- /*
- * Clear the reservation. If we know the CPU tracks the address of
- * the reservation then we can potentially save some cycles and use
- * a larx. On POWER6 and POWER7 this is significantly faster.
- */
-BEGIN_FTR_SECTION
- stdcx. r0,0,r1 /* to clear the reservation */
-FTR_SECTION_ELSE
- ldarx r4,0,r1
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
-
- /*
- * Some code path such as load_up_fpu or altivec return directly
- * here. They run entirely hard disabled and do not alter the
- * interrupt state. They also don't use lwarx/stwcx. and thus
- * are known not to leave dangling reservations.
- */
- .globl fast_exception_return
-fast_exception_return:
- ld r3,_MSR(r1)
- ld r4,_CTR(r1)
- ld r0,_LINK(r1)
- mtctr r4
- mtlr r0
- ld r4,_XER(r1)
- mtspr SPRN_XER,r4
-
- REST_8GPRS(5, r1)
-
- andi. r0,r3,MSR_RI
- beq- unrecov_restore
-
- /* Load PPR from thread struct before we clear MSR:RI */
-BEGIN_FTR_SECTION
- ld r2,PACACURRENT(r13)
- ld r2,TASKTHREADPPR(r2)
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
-
- /*
- * Clear RI before restoring r13. If we are returning to
- * userspace and we take an exception after restoring r13,
- * we end up corrupting the userspace r13 value.
- */
- ld r4,PACAKMSR(r13) /* Get kernel MSR without EE */
- andc r4,r4,r0 /* r0 contains MSR_RI here */
- mtmsrd r4,1
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- /* TM debug */
- std r3, PACATMSCRATCH(r13) /* Stash returned-to MSR */
-#endif
- /*
- * r13 is our per cpu area, only restore it if we are returning to
- * userspace the value stored in the stack frame may belong to
- * another CPU.
- */
- andi. r0,r3,MSR_PR
- beq 1f
-BEGIN_FTR_SECTION
- mtspr SPRN_PPR,r2 /* Restore PPR */
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
- ACCOUNT_CPU_USER_EXIT(r2, r4)
- REST_GPR(13, r1)
-1:
- mtspr SPRN_SRR1,r3
-
- ld r2,_CCR(r1)
- mtcrf 0xFF,r2
- ld r2,_NIP(r1)
- mtspr SPRN_SRR0,r2
-
- ld r0,GPR0(r1)
- ld r2,GPR2(r1)
- ld r3,GPR3(r1)
- ld r4,GPR4(r1)
- ld r1,GPR1(r1)
-
- rfid
- b . /* prevent speculative execution */
-
-#endif /* CONFIG_PPC_BOOK3E */
-
- /*
- * We are returning to a context with interrupts soft disabled.
- *
- * However, we may also about to hard enable, so we need to
- * make sure that in this case, we also clear PACA_IRQ_HARD_DIS
- * or that bit can get out of sync and bad things will happen
- */
-restore_irq_off:
- ld r3,_MSR(r1)
- lbz r7,PACAIRQHAPPENED(r13)
- andi. r0,r3,MSR_EE
- beq 1f
- rlwinm r7,r7,0,~PACA_IRQ_HARD_DIS
- stb r7,PACAIRQHAPPENED(r13)
-1: li r0,0
- stb r0,PACASOFTIRQEN(r13);
- TRACE_DISABLE_INTS
- b do_restore
-
- /*
- * Something did happen, check if a re-emit is needed
- * (this also clears paca->irq_happened)
- */
-restore_check_irq_replay:
- /* XXX: We could implement a fast path here where we check
- * for irq_happened being just 0x01, in which case we can
- * clear it and return. That means that we would potentially
- * miss a decrementer having wrapped all the way around.
- *
- * Still, this might be useful for things like hash_page
- */
- bl __check_irq_replay
- cmpwi cr0,r3,0
- beq restore_no_replay
-
- /*
- * We need to re-emit an interrupt. We do so by re-using our
- * existing exception frame. We first change the trap value,
- * but we need to ensure we preserve the low nibble of it
- */
- ld r4,_TRAP(r1)
- clrldi r4,r4,60
- or r4,r4,r3
- std r4,_TRAP(r1)
-
- /*
- * Then find the right handler and call it. Interrupts are
- * still soft-disabled and we keep them that way.
- */
- cmpwi cr0,r3,0x500
- bne 1f
- addi r3,r1,STACK_FRAME_OVERHEAD;
- bl do_IRQ
- b ret_from_except
-1: cmpwi cr0,r3,0xe60
- bne 1f
- addi r3,r1,STACK_FRAME_OVERHEAD;
- bl handle_hmi_exception
- b ret_from_except
-1: cmpwi cr0,r3,0x900
- bne 1f
- addi r3,r1,STACK_FRAME_OVERHEAD;
- bl timer_interrupt
- b ret_from_except
-#ifdef CONFIG_PPC_DOORBELL
-1:
-#ifdef CONFIG_PPC_BOOK3E
- cmpwi cr0,r3,0x280
-#else
- BEGIN_FTR_SECTION
- cmpwi cr0,r3,0xe80
- FTR_SECTION_ELSE
- cmpwi cr0,r3,0xa00
- ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
-#endif /* CONFIG_PPC_BOOK3E */
- bne 1f
- addi r3,r1,STACK_FRAME_OVERHEAD;
- bl doorbell_exception
- b ret_from_except
-#endif /* CONFIG_PPC_DOORBELL */
-1: b ret_from_except /* What else to do here ? */
-
-unrecov_restore:
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl unrecoverable_exception
- b unrecov_restore
-
-#ifdef CONFIG_PPC_RTAS
-/*
- * On CHRP, the Run-Time Abstraction Services (RTAS) have to be
- * called with the MMU off.
- *
- * In addition, we need to be in 32b mode, at least for now.
- *
- * Note: r3 is an input parameter to rtas, so don't trash it...
- */
-_GLOBAL(enter_rtas)
- mflr r0
- std r0,16(r1)
- stdu r1,-RTAS_FRAME_SIZE(r1) /* Save SP and create stack space. */
-
- /* Because RTAS is running in 32b mode, it clobbers the high order half
- * of all registers that it saves. We therefore save those registers
- * RTAS might touch to the stack. (r0, r3-r13 are caller saved)
- */
- SAVE_GPR(2, r1) /* Save the TOC */
- SAVE_GPR(13, r1) /* Save paca */
- SAVE_8GPRS(14, r1) /* Save the non-volatiles */
- SAVE_10GPRS(22, r1) /* ditto */
-
- mfcr r4
- std r4,_CCR(r1)
- mfctr r5
- std r5,_CTR(r1)
- mfspr r6,SPRN_XER
- std r6,_XER(r1)
- mfdar r7
- std r7,_DAR(r1)
- mfdsisr r8
- std r8,_DSISR(r1)
-
- /* Temporary workaround to clear CR until RTAS can be modified to
- * ignore all bits.
- */
- li r0,0
- mtcr r0
-
-#ifdef CONFIG_BUG
- /* There is no way it is acceptable to get here with interrupts enabled,
- * check it with the asm equivalent of WARN_ON
- */
- lbz r0,PACASOFTIRQEN(r13)
-1: tdnei r0,0
- EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,BUGFLAG_WARNING
-#endif
-
- /* Hard-disable interrupts */
- mfmsr r6
- rldicl r7,r6,48,1
- rotldi r7,r7,16
- mtmsrd r7,1
-
- /* Unfortunately, the stack pointer and the MSR are also clobbered,
- * so they are saved in the PACA which allows us to restore
- * our original state after RTAS returns.
- */
- std r1,PACAR1(r13)
- std r6,PACASAVEDMSR(r13)
-
- /* Setup our real return addr */
- LOAD_REG_ADDR(r4,rtas_return_loc)
- clrldi r4,r4,2 /* convert to realmode address */
- mtlr r4
-
- li r0,0
- ori r0,r0,MSR_EE|MSR_SE|MSR_BE|MSR_RI
- andc r0,r6,r0
-
- li r9,1
- rldicr r9,r9,MSR_SF_LG,(63-MSR_SF_LG)
- ori r9,r9,MSR_IR|MSR_DR|MSR_FE0|MSR_FE1|MSR_FP|MSR_RI|MSR_LE
- andc r6,r0,r9
- sync /* disable interrupts so SRR0/1 */
- mtmsrd r0 /* don't get trashed */
-
- LOAD_REG_ADDR(r4, rtas)
- ld r5,RTASENTRY(r4) /* get the rtas->entry value */
- ld r4,RTASBASE(r4) /* get the rtas->base value */
-
- mtspr SPRN_SRR0,r5
- mtspr SPRN_SRR1,r6
- rfid
- b . /* prevent speculative execution */
-
-rtas_return_loc:
- FIXUP_ENDIAN
-
- /* relocation is off at this point */
- GET_PACA(r4)
- clrldi r4,r4,2 /* convert to realmode address */
-
- bcl 20,31,$+4
-0: mflr r3
- ld r3,(1f-0b)(r3) /* get &rtas_restore_regs */
-
- mfmsr r6
- li r0,MSR_RI
- andc r6,r6,r0
- sync
- mtmsrd r6
-
- ld r1,PACAR1(r4) /* Restore our SP */
- ld r4,PACASAVEDMSR(r4) /* Restore our MSR */
-
- mtspr SPRN_SRR0,r3
- mtspr SPRN_SRR1,r4
- rfid
- b . /* prevent speculative execution */
-
- .align 3
-1: .llong rtas_restore_regs
-
-rtas_restore_regs:
- /* relocation is on at this point */
- REST_GPR(2, r1) /* Restore the TOC */
- REST_GPR(13, r1) /* Restore paca */
- REST_8GPRS(14, r1) /* Restore the non-volatiles */
- REST_10GPRS(22, r1) /* ditto */
-
- GET_PACA(r13)
-
- ld r4,_CCR(r1)
- mtcr r4
- ld r5,_CTR(r1)
- mtctr r5
- ld r6,_XER(r1)
- mtspr SPRN_XER,r6
- ld r7,_DAR(r1)
- mtdar r7
- ld r8,_DSISR(r1)
- mtdsisr r8
-
- addi r1,r1,RTAS_FRAME_SIZE /* Unstack our frame */
- ld r0,16(r1) /* get return address */
-
- mtlr r0
- blr /* return to caller */
-
-#endif /* CONFIG_PPC_RTAS */
-
-_GLOBAL(enter_prom)
- mflr r0
- std r0,16(r1)
- stdu r1,-PROM_FRAME_SIZE(r1) /* Save SP and create stack space */
-
- /* Because PROM is running in 32b mode, it clobbers the high order half
- * of all registers that it saves. We therefore save those registers
- * PROM might touch to the stack. (r0, r3-r13 are caller saved)
- */
- SAVE_GPR(2, r1)
- SAVE_GPR(13, r1)
- SAVE_8GPRS(14, r1)
- SAVE_10GPRS(22, r1)
- mfcr r10
- mfmsr r11
- std r10,_CCR(r1)
- std r11,_MSR(r1)
-
- /* Put PROM address in SRR0 */
- mtsrr0 r4
-
- /* Setup our trampoline return addr in LR */
- bcl 20,31,$+4
-0: mflr r4
- addi r4,r4,(1f - 0b)
- mtlr r4
-
- /* Prepare a 32-bit mode big endian MSR
- */
-#ifdef CONFIG_PPC_BOOK3E
- rlwinm r11,r11,0,1,31
- mtsrr1 r11
- rfi
-#else /* CONFIG_PPC_BOOK3E */
- LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_ISF | MSR_LE)
- andc r11,r11,r12
- mtsrr1 r11
- rfid
-#endif /* CONFIG_PPC_BOOK3E */
-
-1: /* Return from OF */
- FIXUP_ENDIAN
-
- /* Just make sure that r1 top 32 bits didn't get
- * corrupt by OF
- */
- rldicl r1,r1,0,32
-
- /* Restore the MSR (back to 64 bits) */
- ld r0,_MSR(r1)
- MTMSRD(r0)
- isync
-
- /* Restore other registers */
- REST_GPR(2, r1)
- REST_GPR(13, r1)
- REST_8GPRS(14, r1)
- REST_10GPRS(22, r1)
- ld r4,_CCR(r1)
- mtcr r4
-
- addi r1,r1,PROM_FRAME_SIZE
- ld r0,16(r1)
- mtlr r0
- blr
-
-#ifdef CONFIG_FUNCTION_TRACER
-#ifdef CONFIG_DYNAMIC_FTRACE
-_GLOBAL(mcount)
-_GLOBAL(_mcount)
- blr
-
-_GLOBAL_TOC(ftrace_caller)
- /* Taken from output of objdump from lib64/glibc */
- mflr r3
- ld r11, 0(r1)
- stdu r1, -112(r1)
- std r3, 128(r1)
- ld r4, 16(r11)
- subi r3, r3, MCOUNT_INSN_SIZE
-.globl ftrace_call
-ftrace_call:
- bl ftrace_stub
- nop
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-.globl ftrace_graph_call
-ftrace_graph_call:
- b ftrace_graph_stub
-_GLOBAL(ftrace_graph_stub)
-#endif
- ld r0, 128(r1)
- mtlr r0
- addi r1, r1, 112
-_GLOBAL(ftrace_stub)
- blr
-#else
-_GLOBAL_TOC(_mcount)
- /* Taken from output of objdump from lib64/glibc */
- mflr r3
- ld r11, 0(r1)
- stdu r1, -112(r1)
- std r3, 128(r1)
- ld r4, 16(r11)
-
- subi r3, r3, MCOUNT_INSN_SIZE
- LOAD_REG_ADDR(r5,ftrace_trace_function)
- ld r5,0(r5)
- ld r5,0(r5)
- mtctr r5
- bctrl
- nop
-
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- b ftrace_graph_caller
-#endif
- ld r0, 128(r1)
- mtlr r0
- addi r1, r1, 112
-_GLOBAL(ftrace_stub)
- blr
-
-#endif /* CONFIG_DYNAMIC_FTRACE */
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-_GLOBAL(ftrace_graph_caller)
- /* load r4 with local address */
- ld r4, 128(r1)
- subi r4, r4, MCOUNT_INSN_SIZE
-
- /* get the parent address */
- ld r11, 112(r1)
- addi r3, r11, 16
-
- bl prepare_ftrace_return
- nop
-
- ld r0, 128(r1)
- mtlr r0
- addi r1, r1, 112
- blr
-
-_GLOBAL(return_to_handler)
- /* need to save return values */
- std r4, -24(r1)
- std r3, -16(r1)
- std r31, -8(r1)
- mr r31, r1
- stdu r1, -112(r1)
-
- bl ftrace_return_to_handler
- nop
-
- /* return value has real return address */
- mtlr r3
-
- ld r1, 0(r1)
- ld r4, -24(r1)
- ld r3, -16(r1)
- ld r31, -8(r1)
-
- /* Jump back to real return address */
- blr
-
-_GLOBAL(mod_return_to_handler)
- /* need to save return values */
- std r4, -32(r1)
- std r3, -24(r1)
- /* save TOC */
- std r2, -16(r1)
- std r31, -8(r1)
- mr r31, r1
- stdu r1, -112(r1)
-
- /*
- * We are in a module using the module's TOC.
- * Switch to our TOC to run inside the core kernel.
- */
- ld r2, PACATOC(r13)
-
- bl ftrace_return_to_handler
- nop
-
- /* return value has real return address */
- mtlr r3
-
- ld r1, 0(r1)
- ld r4, -32(r1)
- ld r3, -24(r1)
- ld r2, -16(r1)
- ld r31, -8(r1)
-
- /* Jump back to real return address */
- blr
-#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
-#endif /* CONFIG_FUNCTION_TRACER */
diff --git a/arch/powerpc/kernel/epapr_hcalls.S b/arch/powerpc/kernel/epapr_hcalls.S
index 9f1ebf7338f1..6a414ed5a411 100644
--- a/arch/powerpc/kernel/epapr_hcalls.S
+++ b/arch/powerpc/kernel/epapr_hcalls.S
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (C) 2012 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
+#include <linux/export.h>
#include <linux/threads.h>
#include <asm/epapr_hcalls.h>
#include <asm/reg.h>
@@ -20,12 +17,17 @@
#ifndef CONFIG_PPC64
/* epapr_ev_idle() was derived from e500_idle() */
_GLOBAL(epapr_ev_idle)
- CURRENT_THREAD_INFO(r3, r1)
- PPC_LL r4, TI_LOCAL_FLAGS(r3) /* set napping bit */
+ PPC_LL r4, TI_LOCAL_FLAGS(r2) /* set napping bit */
ori r4, r4,_TLF_NAPPING /* so when we take an exception */
- PPC_STL r4, TI_LOCAL_FLAGS(r3) /* it will return to our caller */
+ PPC_STL r4, TI_LOCAL_FLAGS(r2) /* it will return to our caller */
+#ifdef CONFIG_BOOKE
wrteei 1
+#else
+ mfmsr r4
+ ori r4, r4, MSR_EE
+ mtmsr r4
+#endif
idle_loop:
LOAD_REG_IMMEDIATE(r11, EV_HCALL_TOKEN(EV_IDLE))
@@ -53,3 +55,4 @@ epapr_hypercall_start:
nop
nop
blr
+EXPORT_SYMBOL(epapr_hypercall_start)
diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c
index 59e4ba74975d..247ab2acaccc 100644
--- a/arch/powerpc/kernel/epapr_paravirt.c
+++ b/arch/powerpc/kernel/epapr_paravirt.c
@@ -1,19 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* ePAPR para-virtualization support.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- *
* Copyright (C) 2012 Freescale Semiconductor, Inc.
*/
@@ -21,8 +9,9 @@
#include <linux/of_fdt.h>
#include <asm/epapr_hcalls.h>
#include <asm/cacheflush.h>
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
#include <asm/machdep.h>
+#include <asm/inst.h>
#if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64)
extern void epapr_ev_idle(void);
@@ -48,7 +37,7 @@ static int __init early_init_dt_scan_epapr(unsigned long node,
return -1;
for (i = 0; i < (len / 4); i++) {
- u32 inst = be32_to_cpu(insts[i]);
+ ppc_inst_t inst = ppc_inst(be32_to_cpu(insts[i]));
patch_instruction(epapr_hypercall_start + i, inst);
#if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64)
patch_instruction(epapr_ev_idle_start + i, inst);
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index bb9cac6c8051..63f6b9f513a4 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -1,14 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Boot code and exception vectors for Book3E processors
*
* Copyright (C) 2007 Ben. Herrenschmidt (benh@kernel.crashing.org), IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
+#include <linux/linkage.h>
#include <linux/threads.h>
#include <asm/reg.h>
#include <asm/page.h>
@@ -17,7 +14,6 @@
#include <asm/cputable.h>
#include <asm/setup.h>
#include <asm/thread_info.h>
-#include <asm/reg_a2.h>
#include <asm/exception-64e.h>
#include <asm/bug.h>
#include <asm/irqflags.h>
@@ -27,6 +23,12 @@
#include <asm/hw_irq.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_booke_hv_asm.h>
+#include <asm/feature-fixups.h>
+#include <asm/context_tracking.h>
+
+/* 64e interrupt returns always use SRR registers */
+#define fast_interrupt_return fast_interrupt_return_srr
+#define interrupt_return interrupt_return_srr
/* XXX This will ultimately add space for a special exception save
* structure used to save things like SRR0/SRR1, SPRGs, MAS, etc...
@@ -64,10 +66,7 @@
#define SPECIAL_EXC_LOAD(reg, name) \
ld reg, (SPECIAL_EXC_##name * 8 + SPECIAL_EXC_FRAME_OFFS)(r1)
-special_reg_save:
- lbz r9,PACAIRQHAPPENED(r13)
- RECONCILE_IRQ_STATE(r3,r4)
-
+SYM_CODE_START_LOCAL(special_reg_save)
/*
* We only need (or have stack space) to save this stuff if
* we interrupted the kernel.
@@ -76,17 +75,6 @@ special_reg_save:
andi. r3,r3,MSR_PR
bnelr
- /* Copy info into temporary exception thread info */
- ld r11,PACAKSAVE(r13)
- CURRENT_THREAD_INFO(r11, r11)
- CURRENT_THREAD_INFO(r12, r1)
- ld r10,TI_FLAGS(r11)
- std r10,TI_FLAGS(r12)
- ld r10,TI_PREEMPT(r11)
- std r10,TI_PREEMPT(r12)
- ld r10,TI_TASK(r11)
- std r10,TI_TASK(r12)
-
/*
* Advance to the next TLB exception frame for handler
* types that don't do it automatically.
@@ -132,27 +120,25 @@ BEGIN_FTR_SECTION
mtspr SPRN_MAS5,r10
mtspr SPRN_MAS8,r10
END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
- SPECIAL_EXC_STORE(r9,IRQHAPPENED)
-
mfspr r10,SPRN_DEAR
SPECIAL_EXC_STORE(r10,DEAR)
mfspr r10,SPRN_ESR
SPECIAL_EXC_STORE(r10,ESR)
- lbz r10,PACASOFTIRQEN(r13)
- SPECIAL_EXC_STORE(r10,SOFTE)
ld r10,_NIP(r1)
SPECIAL_EXC_STORE(r10,CSRR0)
ld r10,_MSR(r1)
SPECIAL_EXC_STORE(r10,CSRR1)
blr
+SYM_CODE_END(special_reg_save)
-ret_from_level_except:
+SYM_CODE_START_LOCAL(ret_from_level_except)
ld r3,_MSR(r1)
andi. r3,r3,MSR_PR
beq 1f
- b ret_from_except
+ REST_NVGPRS(r1)
+ b interrupt_return
1:
LOAD_REG_ADDR(r11,extlb_level_exc)
@@ -206,27 +192,6 @@ BEGIN_FTR_SECTION
mtspr SPRN_MAS8,r10
END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
- lbz r6,PACASOFTIRQEN(r13)
- ld r5,SOFTE(r1)
-
- /* Interrupts had better not already be enabled... */
- twnei r6,0
-
- cmpwi cr0,r5,0
- beq 1f
-
- TRACE_ENABLE_INTS
- stb r5,PACASOFTIRQEN(r13)
-1:
- /*
- * Restore PACAIRQHAPPENED rather than setting it based on
- * the return MSR[EE], since we could have interrupted
- * __check_irq_replay() or other inconsistent transitory
- * states that must remain that way.
- */
- SPECIAL_EXC_LOAD(r10,IRQHAPPENED)
- stb r10,PACAIRQHAPPENED(r13)
-
SPECIAL_EXC_LOAD(r10,DEAR)
mtspr SPRN_DEAR,r10
SPECIAL_EXC_LOAD(r10,ESR)
@@ -234,8 +199,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
stdcx. r0,0,r1 /* to clear the reservation */
- REST_4GPRS(2, r1)
- REST_4GPRS(6, r1)
+ REST_GPRS(2, 9, r1)
ld r10,_CTR(r1)
ld r11,_XER(r1)
@@ -243,6 +207,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
mtxer r11
blr
+SYM_CODE_END(ret_from_level_except)
.macro ret_from_level srr0 srr1 paca_ex scratch
bl ret_from_level_except
@@ -253,17 +218,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
mtlr r10
mtcr r11
- ld r10,GPR10(r1)
- ld r11,GPR11(r1)
- ld r12,GPR12(r1)
+ REST_GPRS(10, 12, r1)
mtspr \scratch,r0
std r10,\paca_ex+EX_R10(r13);
std r11,\paca_ex+EX_R11(r13);
ld r10,_NIP(r1)
ld r11,_MSR(r1)
- ld r0,GPR0(r1)
- ld r1,GPR1(r1)
+ REST_GPR(0, r1)
+ REST_GPR(1, r1)
mtspr \srr0,r10
mtspr \srr1,r11
ld r10,\paca_ex+EX_R10(r13)
@@ -271,13 +234,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
mfspr r13,\scratch
.endm
-ret_from_crit_except:
+SYM_CODE_START_LOCAL(ret_from_crit_except)
ret_from_level SPRN_CSRR0 SPRN_CSRR1 PACA_EXCRIT SPRN_SPRG_CRIT_SCRATCH
rfci
+SYM_CODE_END(ret_from_crit_except)
-ret_from_mc_except:
+SYM_CODE_START_LOCAL(ret_from_mc_except)
ret_from_level SPRN_MCSRR0 SPRN_MCSRR1 PACA_EXMC SPRN_SPRG_MC_SCRATCH
rfmci
+SYM_CODE_END(ret_from_mc_except)
/* Exception prolog code for all exceptions */
#define EXCEPTION_PROLOG(n, intnum, type, addition) \
@@ -295,7 +260,8 @@ ret_from_mc_except:
andi. r10,r11,MSR_PR; /* save stack pointer */ \
beq 1f; /* branch around if supervisor */ \
ld r1,PACAKSAVE(r13); /* get kernel stack coming from usr */\
-1: cmpdi cr1,r1,0; /* check if SP makes sense */ \
+1: type##_BTB_FLUSH \
+ cmpdi cr1,r1,0; /* check if SP makes sense */ \
bge- cr1,exc_##n##_bad_stack;/* bad stack (TODO: out of line) */ \
mfspr r10,SPRN_##type##_SRR0; /* read SRR0 before touching stack */
@@ -327,6 +293,22 @@ ret_from_mc_except:
#define SPRN_MC_SRR0 SPRN_MCSRR0
#define SPRN_MC_SRR1 SPRN_MCSRR1
+#define GEN_BTB_FLUSH \
+ START_BTB_FLUSH_SECTION \
+ beq 1f; \
+ BTB_FLUSH(r10) \
+ 1: \
+ END_BTB_FLUSH_SECTION
+
+#define CRIT_BTB_FLUSH \
+ START_BTB_FLUSH_SECTION \
+ BTB_FLUSH(r10) \
+ END_BTB_FLUSH_SECTION
+
+#define DBG_BTB_FLUSH CRIT_BTB_FLUSH
+#define MC_BTB_FLUSH CRIT_BTB_FLUSH
+#define GDBELL_BTB_FLUSH GEN_BTB_FLUSH
+
#define NORMAL_EXCEPTION_PROLOG(n, intnum, addition) \
EXCEPTION_PROLOG(n, intnum, GEN, addition##_GEN(n))
@@ -351,10 +333,16 @@ ret_from_mc_except:
#define PROLOG_ADDITION_NONE_MC(n)
#define PROLOG_ADDITION_MASKABLE_GEN(n) \
- lbz r10,PACASOFTIRQEN(r13); /* are irqs soft-disabled ? */ \
- cmpwi cr0,r10,0; /* yes -> go out of line */ \
- beq masked_interrupt_book3e_##n
+ lbz r10,PACAIRQSOFTMASK(r13); /* are irqs soft-masked? */ \
+ andi. r10,r10,IRQS_DISABLED; /* yes -> go out of line */ \
+ bne masked_interrupt_book3e_##n
+/*
+ * Additional regs must be re-loaded from paca before EXCEPTION_COMMON* is
+ * called, because that does SAVE_NVGPRS which must see the original register
+ * values, otherwise the scratch values might be restored when exiting the
+ * interrupt.
+ */
#define PROLOG_ADDITION_2REGS_GEN(n) \
std r14,PACA_EXGEN+EX_R14(r13); \
std r15,PACA_EXGEN+EX_R15(r13)
@@ -374,46 +362,43 @@ ret_from_mc_except:
std r14,PACA_EXMC+EX_R14(r13); \
std r15,PACA_EXMC+EX_R15(r13)
-
/* Core exception code for all exceptions except TLB misses. */
#define EXCEPTION_COMMON_LVL(n, scratch, excf) \
exc_##n##_common: \
- std r0,GPR0(r1); /* save r0 in stackframe */ \
- std r2,GPR2(r1); /* save r2 in stackframe */ \
- SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \
- SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \
- std r9,GPR9(r1); /* save r9 in stackframe */ \
+ SAVE_GPR(0, r1); /* save r0 in stackframe */ \
+ SAVE_GPRS(2, 9, r1); /* save r2 - r9 in stackframe */ \
std r10,_NIP(r1); /* save SRR0 to stackframe */ \
std r11,_MSR(r1); /* save SRR1 to stackframe */ \
beq 2f; /* if from kernel mode */ \
- ACCOUNT_CPU_USER_ENTRY(r10,r11);/* accounting (uses cr0+eq) */ \
2: ld r3,excf+EX_R10(r13); /* get back r10 */ \
ld r4,excf+EX_R11(r13); /* get back r11 */ \
mfspr r5,scratch; /* get back r13 */ \
- std r12,GPR12(r1); /* save r12 in stackframe */ \
- ld r2,PACATOC(r13); /* get kernel TOC into r2 */ \
+ SAVE_GPR(12, r1); /* save r12 in stackframe */ \
+ LOAD_PACA_TOC(); /* get kernel TOC into r2 */ \
mflr r6; /* save LR in stackframe */ \
mfctr r7; /* save CTR in stackframe */ \
mfspr r8,SPRN_XER; /* save XER in stackframe */ \
ld r9,excf+EX_R1(r13); /* load orig r1 back from PACA */ \
lwz r10,excf+EX_CR(r13); /* load orig CR back from PACA */ \
- lbz r11,PACASOFTIRQEN(r13); /* get current IRQ softe */ \
- ld r12,exception_marker@toc(r2); \
- li r0,0; \
+ lbz r11,PACAIRQSOFTMASK(r13); /* get current IRQ softe */ \
+ LOAD_REG_IMMEDIATE(r12, STACK_FRAME_REGS_MARKER); \
+ ZEROIZE_GPR(0); \
std r3,GPR10(r1); /* save r10 to stackframe */ \
std r4,GPR11(r1); /* save r11 to stackframe */ \
std r5,GPR13(r1); /* save it to stackframe */ \
std r6,_LINK(r1); \
std r7,_CTR(r1); \
std r8,_XER(r1); \
- li r3,(n)+1; /* indicate partial regs in trap */ \
+ li r3,(n); /* regs.trap vector */ \
std r9,0(r1); /* store stack frame back link */ \
std r10,_CCR(r1); /* store orig CR in stackframe */ \
std r9,GPR1(r1); /* store stack frame back link */ \
std r11,SOFTE(r1); /* and save it to stackframe */ \
- std r12,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */ \
+ std r12,STACK_INT_FRAME_MARKER(r1); /* mark the frame */ \
std r3,_TRAP(r1); /* set trap number */ \
- std r0,RESULT(r1); /* clear regs->result */
+ std r0,RESULT(r1); /* clear regs->result */ \
+ SAVE_NVGPRS(r1); \
+ SANITIZE_NVGPRS(); /* minimise speculation influence */
#define EXCEPTION_COMMON(n) \
EXCEPTION_COMMON_LVL(n, SPRN_SPRG_GEN_SCRATCH, PACA_EXGEN)
@@ -424,28 +409,6 @@ exc_##n##_common: \
#define EXCEPTION_COMMON_DBG(n) \
EXCEPTION_COMMON_LVL(n, SPRN_SPRG_DBG_SCRATCH, PACA_EXDBG)
-/*
- * This is meant for exceptions that don't immediately hard-enable. We
- * set a bit in paca->irq_happened to ensure that a subsequent call to
- * arch_local_irq_restore() will properly hard-enable and avoid the
- * fast-path, and then reconcile irq state.
- */
-#define INTS_DISABLE RECONCILE_IRQ_STATE(r3,r4)
-
-/*
- * This is called by exceptions that don't use INTS_DISABLE (that did not
- * touch irq indicators in the PACA). This will restore MSR:EE to it's
- * previous value
- *
- * XXX In the long run, we may want to open-code it in order to separate the
- * load from the wrtee, thus limiting the latency caused by the dependency
- * but at this point, I'll favor code clarity until we have a near to final
- * implementation
- */
-#define INTS_RESTORE_HARD \
- ld r11,_MSR(r1); \
- wrtee r11;
-
/* XXX FIXME: Restore r14/r15 when necessary */
#define BAD_STACK_TRAMPOLINE(n) \
exc_##n##_bad_stack: \
@@ -453,7 +416,7 @@ exc_##n##_bad_stack: \
sth r1,PACA_TRAP_SAVE(r13); /* store trap */ \
b bad_stack_book3e; /* bad stack error */
-/* WARNING: If you change the layout of this stub, make sure you chcek
+/* WARNING: If you change the layout of this stub, make sure you check
* the debug exception handler which handles single stepping
* into exceptions from userspace, and the MM code in
* arch/powerpc/mm/tlb_nohash.c which patches the branch here
@@ -479,7 +442,7 @@ exc_##n##_bad_stack: \
* interrupts happen before the wait instruction.
*/
#define CHECK_NAPPING() \
- CURRENT_THREAD_INFO(r11, r1); \
+ ld r11, PACA_THREAD_INFO(r13); \
ld r10,TI_LOCAL_FLAGS(r11); \
andi. r9,r10,_TLF_NAPPING; \
beq+ 1f; \
@@ -494,18 +457,11 @@ exc_##n##_bad_stack: \
START_EXCEPTION(label); \
NORMAL_EXCEPTION_PROLOG(trapnum, intnum, PROLOG_ADDITION_MASKABLE)\
EXCEPTION_COMMON(trapnum) \
- INTS_DISABLE; \
ack(r8); \
CHECK_NAPPING(); \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
+ addi r3,r1,STACK_INT_FRAME_REGS; \
bl hdlr; \
- b ret_from_except_lite;
-
-/* This value is used to mark exception frames on the stack. */
- .section ".toc","aw"
-exception_marker:
- .tc ID_EXC_MARKER[TC],STACK_FRAME_REGS_MARKER
-
+ b interrupt_return
/*
* And here we have the exception vectors !
@@ -529,8 +485,8 @@ interrupt_base_book3e: /* fake trap */
EXCEPTION_STUB(0x160, decrementer) /* 0x0900 */
EXCEPTION_STUB(0x180, fixed_interval) /* 0x0980 */
EXCEPTION_STUB(0x1a0, watchdog) /* 0x09f0 */
- EXCEPTION_STUB(0x1c0, data_tlb_miss)
- EXCEPTION_STUB(0x1e0, instruction_tlb_miss)
+ EXCEPTION_STUB(0x1c0, data_tlb_miss_bolted)
+ EXCEPTION_STUB(0x1e0, instruction_tlb_miss_bolted)
EXCEPTION_STUB(0x200, altivec_unavailable)
EXCEPTION_STUB(0x220, altivec_assist)
EXCEPTION_STUB(0x260, perfmon)
@@ -542,19 +498,18 @@ interrupt_base_book3e: /* fake trap */
EXCEPTION_STUB(0x320, ehpriv)
EXCEPTION_STUB(0x340, lrat_error)
- .globl interrupt_end_book3e
-interrupt_end_book3e:
+ .globl __end_interrupts
+__end_interrupts:
/* Critical Input Interrupt */
START_EXCEPTION(critical_input);
CRIT_EXCEPTION_PROLOG(0x100, BOOKE_INTERRUPT_CRITICAL,
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON_CRIT(0x100)
- bl save_nvgprs
bl special_reg_save
CHECK_NAPPING();
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl unknown_exception
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl unknown_nmi_exception
b ret_from_crit_except
/* Machine Check Interrupt */
@@ -562,10 +517,9 @@ interrupt_end_book3e:
MC_EXCEPTION_PROLOG(0x000, BOOKE_INTERRUPT_MACHINE_CHECK,
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON_MC(0x000)
- bl save_nvgprs
bl special_reg_save
CHECK_NAPPING();
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl machine_check_exception
b ret_from_mc_except
@@ -575,8 +529,11 @@ interrupt_end_book3e:
PROLOG_ADDITION_2REGS)
mfspr r14,SPRN_DEAR
mfspr r15,SPRN_ESR
+ std r14,_DEAR(r1)
+ std r15,_ESR(r1)
+ ld r14,PACA_EXGEN+EX_R14(r13)
+ ld r15,PACA_EXGEN+EX_R15(r13)
EXCEPTION_COMMON(0x300)
- INTS_DISABLE
b storage_fault_common
/* Instruction Storage Interrupt */
@@ -585,8 +542,11 @@ interrupt_end_book3e:
PROLOG_ADDITION_2REGS)
li r15,0
mr r14,r10
+ std r14,_DEAR(r1)
+ std r15,_ESR(r1)
+ ld r14,PACA_EXGEN+EX_R14(r13)
+ ld r15,PACA_EXGEN+EX_R15(r13)
EXCEPTION_COMMON(0x400)
- INTS_DISABLE
b storage_fault_common
/* External Input Interrupt */
@@ -599,6 +559,10 @@ interrupt_end_book3e:
PROLOG_ADDITION_2REGS)
mfspr r14,SPRN_DEAR
mfspr r15,SPRN_ESR
+ std r14,_DEAR(r1)
+ std r15,_ESR(r1)
+ ld r14,PACA_EXGEN+EX_R14(r13)
+ ld r15,PACA_EXGEN+EX_R15(r13)
EXCEPTION_COMMON(0x600)
b alignment_more /* no room, go out of line */
@@ -607,14 +571,13 @@ interrupt_end_book3e:
NORMAL_EXCEPTION_PROLOG(0x700, BOOKE_INTERRUPT_PROGRAM,
PROLOG_ADDITION_1REG)
mfspr r14,SPRN_ESR
- EXCEPTION_COMMON(0x700)
- INTS_DISABLE
- std r14,_DSISR(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
+ std r14,_ESR(r1)
ld r14,PACA_EXGEN+EX_R14(r13)
- bl save_nvgprs
+ EXCEPTION_COMMON(0x700)
+ addi r3,r1,STACK_INT_FRAME_REGS
bl program_check_exception
- b ret_from_except
+ REST_NVGPRS(r1)
+ b interrupt_return
/* Floating Point Unavailable Interrupt */
START_EXCEPTION(fp_unavailable);
@@ -626,16 +589,14 @@ interrupt_end_book3e:
andi. r0,r12,MSR_PR;
beq- 1f
bl load_up_fpu
- b fast_exception_return
-1: INTS_DISABLE
- bl save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
+ b fast_interrupt_return
+1: addi r3,r1,STACK_INT_FRAME_REGS
bl kernel_fp_unavailable_exception
- b ret_from_except
+ b interrupt_return
/* Altivec Unavailable Interrupt */
START_EXCEPTION(altivec_unavailable);
- NORMAL_EXCEPTION_PROLOG(0x200, BOOKE_INTERRUPT_SPE_ALTIVEC_UNAVAIL,
+ NORMAL_EXCEPTION_PROLOG(0x200, BOOKE_INTERRUPT_ALTIVEC_UNAVAIL,
PROLOG_ADDITION_NONE)
/* we can probably do a shorter exception entry for that one... */
EXCEPTION_COMMON(0x200)
@@ -645,33 +606,30 @@ BEGIN_FTR_SECTION
andi. r0,r12,MSR_PR;
beq- 1f
bl load_up_altivec
- b fast_exception_return
+ b fast_interrupt_return
1:
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
#endif
- INTS_DISABLE
- bl save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl altivec_unavailable_exception
- b ret_from_except
+ b interrupt_return
/* AltiVec Assist */
START_EXCEPTION(altivec_assist);
NORMAL_EXCEPTION_PROLOG(0x220,
- BOOKE_INTERRUPT_SPE_FP_DATA_ALTIVEC_ASSIST,
+ BOOKE_INTERRUPT_ALTIVEC_ASSIST,
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON(0x220)
- INTS_DISABLE
- bl save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
#ifdef CONFIG_ALTIVEC
BEGIN_FTR_SECTION
bl altivec_assist_exception
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+ REST_NVGPRS(r1)
#else
bl unknown_exception
#endif
- b ret_from_except
+ b interrupt_return
/* Decrementer Interrupt */
@@ -687,14 +645,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
CRIT_EXCEPTION_PROLOG(0x9f0, BOOKE_INTERRUPT_WATCHDOG,
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON_CRIT(0x9f0)
- bl save_nvgprs
bl special_reg_save
CHECK_NAPPING();
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
#ifdef CONFIG_BOOKE_WDT
bl WatchdogException
#else
- bl unknown_exception
+ bl unknown_nmi_exception
#endif
b ret_from_crit_except
@@ -711,11 +668,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
NORMAL_EXCEPTION_PROLOG(0xf20, BOOKE_INTERRUPT_AP_UNAVAIL,
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON(0xf20)
- INTS_DISABLE
- bl save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl unknown_exception
- b ret_from_except
+ b interrupt_return
/* Debug exception as a critical interrupt*/
START_EXCEPTION(debug_crit);
@@ -735,10 +690,18 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
andis. r15,r14,(DBSR_IC|DBSR_BT)@h
beq+ 1f
- LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e)
- LOAD_REG_IMMEDIATE(r15,interrupt_end_book3e)
+#ifdef CONFIG_RELOCATABLE
+ __LOAD_PACA_TOC(r15)
+ LOAD_REG_ADDR_ALTTOC(r14, r15, interrupt_base_book3e)
+ LOAD_REG_ADDR_ALTTOC(r15, r15, __end_interrupts)
cmpld cr0,r10,r14
cmpld cr1,r10,r15
+#else
+ LOAD_REG_IMMEDIATE_SYM(r14, r15, interrupt_base_book3e)
+ cmpld cr0, r10, r14
+ LOAD_REG_IMMEDIATE_SYM(r14, r15, __end_interrupts)
+ cmpld cr1, r10, r14
+#endif
blt+ cr0,1f
bge+ cr1,1f
@@ -768,15 +731,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
* normal exception
*/
mfspr r14,SPRN_DBSR
- EXCEPTION_COMMON_CRIT(0xd00)
std r14,_DSISR(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
- mr r4,r14
ld r14,PACA_EXCRIT+EX_R14(r13)
ld r15,PACA_EXCRIT+EX_R15(r13)
- bl save_nvgprs
+ EXCEPTION_COMMON_CRIT(0xd00)
+ addi r3,r1,STACK_INT_FRAME_REGS
bl DebugException
- b ret_from_except
+ REST_NVGPRS(r1)
+ b interrupt_return
kernel_dbg_exc:
b . /* NYI */
@@ -799,10 +761,18 @@ kernel_dbg_exc:
andis. r15,r14,(DBSR_IC|DBSR_BT)@h
beq+ 1f
- LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e)
- LOAD_REG_IMMEDIATE(r15,interrupt_end_book3e)
+#ifdef CONFIG_RELOCATABLE
+ __LOAD_PACA_TOC(r15)
+ LOAD_REG_ADDR_ALTTOC(r14, r15, interrupt_base_book3e)
+ LOAD_REG_ADDR_ALTTOC(r15, r15, __end_interrupts)
cmpld cr0,r10,r14
cmpld cr1,r10,r15
+#else
+ LOAD_REG_IMMEDIATE_SYM(r14, r15, interrupt_base_book3e)
+ cmpld cr0, r10, r14
+ LOAD_REG_IMMEDIATE_SYM(r14, r15,__end_interrupts)
+ cmpld cr1, r10, r14
+#endif
blt+ cr0,1f
bge+ cr1,1f
@@ -832,26 +802,30 @@ kernel_dbg_exc:
* normal exception
*/
mfspr r14,SPRN_DBSR
- EXCEPTION_COMMON_DBG(0xd08)
- INTS_DISABLE
std r14,_DSISR(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
- mr r4,r14
ld r14,PACA_EXDBG+EX_R14(r13)
ld r15,PACA_EXDBG+EX_R15(r13)
- bl save_nvgprs
+ EXCEPTION_COMMON_DBG(0xd08)
+ addi r3,r1,STACK_INT_FRAME_REGS
bl DebugException
- b ret_from_except
+ REST_NVGPRS(r1)
+ b interrupt_return
START_EXCEPTION(perfmon);
NORMAL_EXCEPTION_PROLOG(0x260, BOOKE_INTERRUPT_PERFORMANCE_MONITOR,
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON(0x260)
- INTS_DISABLE
CHECK_NAPPING()
- addi r3,r1,STACK_FRAME_OVERHEAD
+ addi r3,r1,STACK_INT_FRAME_REGS
+ /*
+ * XXX: Returning from performance_monitor_exception taken as a
+ * soft-NMI (Linux irqs disabled) may be risky to use interrupt_return
+ * and could cause bugs in return or elsewhere. That case should just
+ * restore registers and return. There is a workaround for one known
+ * problem in interrupt_exit_kernel_prepare().
+ */
bl performance_monitor_exception
- b ret_from_except_lite
+ b interrupt_return
/* Doorbell interrupt */
MASKABLE_EXCEPTION(0x280, BOOKE_INTERRUPT_DOORBELL,
@@ -862,11 +836,10 @@ kernel_dbg_exc:
CRIT_EXCEPTION_PROLOG(0x2a0, BOOKE_INTERRUPT_DOORBELL_CRITICAL,
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON_CRIT(0x2a0)
- bl save_nvgprs
bl special_reg_save
CHECK_NAPPING();
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl unknown_exception
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl unknown_nmi_exception
b ret_from_crit_except
/*
@@ -877,22 +850,19 @@ kernel_dbg_exc:
GDBELL_EXCEPTION_PROLOG(0x2c0, BOOKE_INTERRUPT_GUEST_DBELL,
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON(0x2c0)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl save_nvgprs
- INTS_RESTORE_HARD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl unknown_exception
- b ret_from_except
+ b interrupt_return
/* Guest Doorbell critical Interrupt */
START_EXCEPTION(guest_doorbell_crit);
CRIT_EXCEPTION_PROLOG(0x2e0, BOOKE_INTERRUPT_GUEST_DBELL_CRIT,
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON_CRIT(0x2e0)
- bl save_nvgprs
bl special_reg_save
CHECK_NAPPING();
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl unknown_exception
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl unknown_nmi_exception
b ret_from_crit_except
/* Hypervisor call */
@@ -900,61 +870,99 @@ kernel_dbg_exc:
NORMAL_EXCEPTION_PROLOG(0x310, BOOKE_INTERRUPT_HV_SYSCALL,
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON(0x310)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl save_nvgprs
- INTS_RESTORE_HARD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl unknown_exception
- b ret_from_except
+ b interrupt_return
/* Embedded Hypervisor priviledged */
START_EXCEPTION(ehpriv);
NORMAL_EXCEPTION_PROLOG(0x320, BOOKE_INTERRUPT_HV_PRIV,
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON(0x320)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl save_nvgprs
- INTS_RESTORE_HARD
+ addi r3,r1,STACK_INT_FRAME_REGS
bl unknown_exception
- b ret_from_except
+ b interrupt_return
/* LRAT Error interrupt */
START_EXCEPTION(lrat_error);
NORMAL_EXCEPTION_PROLOG(0x340, BOOKE_INTERRUPT_LRAT_ERROR,
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON(0x340)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl .save_nvgprs
- INTS_RESTORE_HARD
- bl .unknown_exception
- b .ret_from_except
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl unknown_exception
+ b interrupt_return
+
+.macro SEARCH_RESTART_TABLE
+#ifdef CONFIG_RELOCATABLE
+ __LOAD_PACA_TOC(r11)
+ LOAD_REG_ADDR_ALTTOC(r14, r11, __start___restart_table)
+ LOAD_REG_ADDR_ALTTOC(r15, r11, __stop___restart_table)
+#else
+ LOAD_REG_IMMEDIATE_SYM(r14, r11, __start___restart_table)
+ LOAD_REG_IMMEDIATE_SYM(r15, r11, __stop___restart_table)
+#endif
+300:
+ cmpd r14,r15
+ beq 302f
+ ld r11,0(r14)
+ cmpld r10,r11
+ blt 301f
+ ld r11,8(r14)
+ cmpld r10,r11
+ bge 301f
+ ld r11,16(r14)
+ b 303f
+301:
+ addi r14,r14,24
+ b 300b
+302:
+ li r11,0
+303:
+.endm
/*
* An interrupt came in while soft-disabled; We mark paca->irq_happened
* accordingly and if the interrupt is level sensitive, we hard disable
+ * hard disable (full_mask) corresponds to PACA_IRQ_MUST_HARD_MASK, so
+ * keep these in synch.
*/
.macro masked_interrupt_book3e paca_irq full_mask
+ std r14,PACA_EXGEN+EX_R14(r13)
+ std r15,PACA_EXGEN+EX_R15(r13)
+
lbz r10,PACAIRQHAPPENED(r13)
+ .if \full_mask == 1
+ ori r10,r10,\paca_irq | PACA_IRQ_HARD_DIS
+ .else
ori r10,r10,\paca_irq
+ .endif
stb r10,PACAIRQHAPPENED(r13)
.if \full_mask == 1
- rldicl r10,r11,48,1 /* clear MSR_EE */
- rotldi r11,r10,16
+ xori r11,r11,MSR_EE /* clear MSR_EE */
mtspr SPRN_SRR1,r11
.endif
+ mfspr r10,SPRN_SRR0
+ SEARCH_RESTART_TABLE
+ cmpdi r11,0
+ beq 1f
+ mtspr SPRN_SRR0,r11 /* return to restart address */
+1:
+
lwz r11,PACA_EXGEN+EX_CR(r13)
mtcr r11
ld r10,PACA_EXGEN+EX_R10(r13)
ld r11,PACA_EXGEN+EX_R11(r13)
+ ld r14,PACA_EXGEN+EX_R14(r13)
+ ld r15,PACA_EXGEN+EX_R15(r13)
mfspr r13,SPRN_SPRG_GEN_SCRATCH
rfi
b .
.endm
masked_interrupt_book3e_0x500:
- // XXX When adding support for EPR, use PACA_IRQ_EE_EDGE
masked_interrupt_book3e PACA_IRQ_EE 1
masked_interrupt_book3e_0x900:
@@ -970,126 +978,26 @@ masked_interrupt_book3e_0x2c0:
masked_interrupt_book3e PACA_IRQ_DBELL 0
/*
- * Called from arch_local_irq_enable when an interrupt needs
- * to be resent. r3 contains either 0x500,0x900,0x260 or 0x280
- * to indicate the kind of interrupt. MSR:EE is already off.
- * We generate a stackframe like if a real interrupt had happened.
- *
- * Note: While MSR:EE is off, we need to make sure that _MSR
- * in the generated frame has EE set to 1 or the exception
- * handler will not properly re-enable them.
- */
-_GLOBAL(__replay_interrupt)
- /* We are going to jump to the exception common code which
- * will retrieve various register values from the PACA which
- * we don't give a damn about.
- */
- mflr r10
- mfmsr r11
- mfcr r4
- mtspr SPRN_SPRG_GEN_SCRATCH,r13;
- std r1,PACA_EXGEN+EX_R1(r13);
- stw r4,PACA_EXGEN+EX_CR(r13);
- ori r11,r11,MSR_EE
- subi r1,r1,INT_FRAME_SIZE;
- cmpwi cr0,r3,0x500
- beq exc_0x500_common
- cmpwi cr0,r3,0x900
- beq exc_0x900_common
- cmpwi cr0,r3,0x280
- beq exc_0x280_common
- blr
-
-
-/*
* This is called from 0x300 and 0x400 handlers after the prologs with
* r14 and r15 containing the fault address and error code, with the
* original values stashed away in the PACA
*/
-storage_fault_common:
- std r14,_DAR(r1)
- std r15,_DSISR(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
- mr r4,r14
- mr r5,r15
- ld r14,PACA_EXGEN+EX_R14(r13)
- ld r15,PACA_EXGEN+EX_R15(r13)
+SYM_CODE_START_LOCAL(storage_fault_common)
+ addi r3,r1,STACK_INT_FRAME_REGS
bl do_page_fault
- cmpdi r3,0
- bne- 1f
- b ret_from_except_lite
-1: bl save_nvgprs
- mr r5,r3
- addi r3,r1,STACK_FRAME_OVERHEAD
- ld r4,_DAR(r1)
- bl bad_page_fault
- b ret_from_except
+ b interrupt_return
+SYM_CODE_END(storage_fault_common)
/*
* Alignment exception doesn't fit entirely in the 0x100 bytes so it
* continues here.
*/
-alignment_more:
- std r14,_DAR(r1)
- std r15,_DSISR(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
- ld r14,PACA_EXGEN+EX_R14(r13)
- ld r15,PACA_EXGEN+EX_R15(r13)
- bl save_nvgprs
- INTS_RESTORE_HARD
+SYM_CODE_START_LOCAL(alignment_more)
+ addi r3,r1,STACK_INT_FRAME_REGS
bl alignment_exception
- b ret_from_except
-
-/*
- * We branch here from entry_64.S for the last stage of the exception
- * return code path. MSR:EE is expected to be off at that point
- */
-_GLOBAL(exception_return_book3e)
- b 1f
-
-/* This is the return from load_up_fpu fast path which could do with
- * less GPR restores in fact, but for now we have a single return path
- */
- .globl fast_exception_return
-fast_exception_return:
- wrteei 0
-1: mr r0,r13
- ld r10,_MSR(r1)
- REST_4GPRS(2, r1)
- andi. r6,r10,MSR_PR
- REST_2GPRS(6, r1)
- beq 1f
- ACCOUNT_CPU_USER_EXIT(r10, r11)
- ld r0,GPR13(r1)
-
-1: stdcx. r0,0,r1 /* to clear the reservation */
-
- ld r8,_CCR(r1)
- ld r9,_LINK(r1)
- ld r10,_CTR(r1)
- ld r11,_XER(r1)
- mtcr r8
- mtlr r9
- mtctr r10
- mtxer r11
- REST_2GPRS(8, r1)
- ld r10,GPR10(r1)
- ld r11,GPR11(r1)
- ld r12,GPR12(r1)
- mtspr SPRN_SPRG_GEN_SCRATCH,r0
-
- std r10,PACA_EXGEN+EX_R10(r13);
- std r11,PACA_EXGEN+EX_R11(r13);
- ld r10,_NIP(r1)
- ld r11,_MSR(r1)
- ld r0,GPR0(r1)
- ld r1,GPR1(r1)
- mtspr SPRN_SRR0,r10
- mtspr SPRN_SRR1,r11
- ld r10,PACA_EXGEN+EX_R10(r13)
- ld r11,PACA_EXGEN+EX_R11(r13)
- mfspr r13,SPRN_SPRG_GEN_SCRATCH
- rfi
+ REST_NVGPRS(r1)
+ b interrupt_return
+SYM_CODE_END(alignment_more)
/*
* Trampolines used when spotting a bad kernel stack pointer in
@@ -1128,8 +1036,7 @@ BAD_STACK_TRAMPOLINE(0xe00)
BAD_STACK_TRAMPOLINE(0xf00)
BAD_STACK_TRAMPOLINE(0xf20)
- .globl bad_stack_book3e
-bad_stack_book3e:
+_GLOBAL(bad_stack_book3e)
/* XXX: Needs to make SPRN_SPRG_GEN depend on exception type */
mfspr r10,SPRN_SRR0; /* read SRR0 before touching stack */
ld r1,PACAEMERGSP(r13)
@@ -1142,19 +1049,16 @@ bad_stack_book3e:
std r11,_CCR(r1)
mfspr r10,SPRN_DEAR
mfspr r11,SPRN_ESR
- std r10,_DAR(r1)
- std r11,_DSISR(r1)
- std r0,GPR0(r1); /* save r0 in stackframe */ \
- std r2,GPR2(r1); /* save r2 in stackframe */ \
- SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \
- SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \
- std r9,GPR9(r1); /* save r9 in stackframe */ \
+ std r10,_DEAR(r1)
+ std r11,_ESR(r1)
+ SAVE_GPR(0, r1); /* save r0 in stackframe */ \
+ SAVE_GPRS(2, 9, r1); /* save r2 - r9 in stackframe */ \
ld r3,PACA_EXGEN+EX_R10(r13);/* get back r10 */ \
ld r4,PACA_EXGEN+EX_R11(r13);/* get back r11 */ \
mfspr r5,SPRN_SPRG_GEN_SCRATCH;/* get back r13 XXX can be wrong */ \
std r3,GPR10(r1); /* save r10 to stackframe */ \
std r4,GPR11(r1); /* save r11 to stackframe */ \
- std r12,GPR12(r1); /* save r12 in stackframe */ \
+ SAVE_GPR(12, r1); /* save r12 in stackframe */ \
std r5,GPR13(r1); /* save it to stackframe */ \
mflr r10
mfctr r11
@@ -1162,16 +1066,15 @@ bad_stack_book3e:
std r10,_LINK(r1)
std r11,_CTR(r1)
std r12,_XER(r1)
- SAVE_10GPRS(14,r1)
- SAVE_8GPRS(24,r1)
+ SAVE_NVGPRS(r1)
lhz r12,PACA_TRAP_SAVE(r13)
std r12,_TRAP(r1)
addi r11,r1,INT_FRAME_SIZE
std r11,0(r1)
- li r12,0
+ ZEROIZE_GPR(12)
std r12,0(r11)
- ld r2,PACATOC(r13)
-1: addi r3,r1,STACK_FRAME_OVERHEAD
+ LOAD_PACA_TOC()
+1: addi r3,r1,STACK_INT_FRAME_REGS
bl kernel_bad_stack
b 1b
@@ -1212,7 +1115,7 @@ found_iprot:
* r3 = MAS0_TLBSEL (for the iprot array)
* r4 = SPRN_TLBnCFG
*/
- bl invstr /* Find our address */
+ bcl 20,31,$+4 /* Find our address */
invstr: mflr r6 /* Make it accessible */
mfmsr r7
rlwinm r5,r7,27,31,31 /* extract MSR[IS] */
@@ -1281,7 +1184,7 @@ skpinv: addi r6,r6,1 /* Increment */
mfmsr r6
xori r6,r6,MSR_IS
mtspr SPRN_SRR1,r6
- bl 1f /* Find our address */
+ bcl 20,31,$+4 /* Find our address */
1: mflr r6
addi r6,r6,(2f - 1b)
mtspr SPRN_SRR0,r6
@@ -1313,13 +1216,6 @@ skpinv: addi r6,r6,1 /* Increment */
sync
isync
-/* The mapping only needs to be cache-coherent on SMP */
-#ifdef CONFIG_SMP
-#define M_IF_SMP MAS2_M
-#else
-#define M_IF_SMP 0
-#endif
-
/* 6. Setup KERNELBASE mapping in TLB[0]
*
* r3 = MAS0 w/TLBSEL & ESEL for the entry we started in
@@ -1332,7 +1228,7 @@ skpinv: addi r6,r6,1 /* Increment */
ori r6,r6,(MAS1_TSIZE(BOOK3E_PAGESZ_1GB))@l
mtspr SPRN_MAS1,r6
- LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET | M_IF_SMP)
+ LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET | MAS2_M_IF_NEEDED)
mtspr SPRN_MAS2,r6
rlwinm r5,r5,0,0,25
@@ -1348,7 +1244,10 @@ skpinv: addi r6,r6,1 /* Increment */
* r4 = MAS0 w/TLBSEL & ESEL for the temp mapping
*/
/* Now we branch the new virtual address mapped by this entry */
- LOAD_REG_IMMEDIATE(r6,2f)
+ bcl 20,31,$+4 /* Find our address */
+1: mflr r6
+ addi r6,r6,(2f - 1b)
+ tovirt(r6,r6)
lis r7,MSR_KERNEL@h
ori r7,r7,MSR_KERNEL@l
mtspr SPRN_SRR0,r6
@@ -1391,8 +1290,7 @@ have_hes:
* ever takes any parameters, the SCOM code must also be updated to
* provide them.
*/
- .globl a2_tlbinit_code_start
-a2_tlbinit_code_start:
+_GLOBAL(a2_tlbinit_code_start)
ori r11,r3,MAS0_WQ_ALLWAYS
oris r11,r11,MAS0_ESEL(3)@h /* Use way 3: workaround A2 erratum 376 */
@@ -1414,7 +1312,12 @@ a2_tlbinit_code_start:
a2_tlbinit_after_linear_map:
/* Now we branch the new virtual address mapped by this entry */
- LOAD_REG_IMMEDIATE(r3,1f)
+#ifdef CONFIG_RELOCATABLE
+ __LOAD_PACA_TOC(r5)
+ LOAD_REG_ADDR_ALTTOC(r3, r5, 1f)
+#else
+ LOAD_REG_IMMEDIATE_SYM(r3, r5, 1f)
+#endif
mtctr r3
bctr
@@ -1580,14 +1483,15 @@ _GLOBAL(book3e_secondary_thread_init)
mflr r28
b 3b
-init_core_book3e:
+_GLOBAL(init_core_book3e)
/* Establish the interrupt vector base */
- LOAD_REG_IMMEDIATE(r3, interrupt_base_book3e)
+ tovirt(r2,r2)
+ LOAD_REG_ADDR(r3, interrupt_base_book3e)
mtspr SPRN_IVPR,r3
sync
blr
-init_thread_book3e:
+SYM_CODE_START_LOCAL(init_thread_book3e)
lis r3,(SPRN_EPCR_ICM | SPRN_EPCR_GICM)@h
mtspr SPRN_EPCR,r3
@@ -1601,6 +1505,7 @@ init_thread_book3e:
mtspr SPRN_TSR,r3
blr
+SYM_CODE_END(init_thread_book3e)
_GLOBAL(__setup_base_ivors)
SET_IVOR(0, 0x020) /* Critical Input */
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 050f79a4a168..b7229430ca94 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* This file contains the 64-bit "server" PowerPC variant
* of the low level exception handling including exception
@@ -12,429 +13,1175 @@
*
*/
+#include <linux/linkage.h>
#include <asm/hw_irq.h>
#include <asm/exception-64s.h>
#include <asm/ptrace.h>
+#include <asm/cpuidle.h>
+#include <asm/head-64.h>
+#include <asm/feature-fixups.h>
+#include <asm/kup.h>
/*
- * We layout physical memory as follows:
- * 0x0000 - 0x00ff : Secondary processor spin code
- * 0x0100 - 0x17ff : pSeries Interrupt prologs
- * 0x1800 - 0x4000 : interrupt support common interrupt prologs
- * 0x4000 - 0x5fff : pSeries interrupts with IR=1,DR=1
- * 0x6000 - 0x6fff : more interrupt support including for IR=1,DR=1
- * 0x7000 - 0x7fff : FWNMI data area
- * 0x8000 - 0x8fff : Initial (CPU0) segment table
- * 0x9000 - : Early init and support code
+ * Following are fixed section helper macros.
+ *
+ * EXC_REAL_BEGIN/END - real, unrelocated exception vectors
+ * EXC_VIRT_BEGIN/END - virt (AIL), unrelocated exception vectors
+ * TRAMP_REAL_BEGIN - real, unrelocated helpers (virt may call these)
+ * TRAMP_VIRT_BEGIN - virt, unreloc helpers (in practice, real can use)
+ * EXC_COMMON - After switching to virtual, relocated mode.
*/
- /* Syscall routine is used twice, in reloc-off and reloc-on paths */
-#define SYSCALL_PSERIES_1 \
-BEGIN_FTR_SECTION \
- cmpdi r0,0x1ebe ; \
- beq- 1f ; \
-END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
- mr r9,r13 ; \
- GET_PACA(r13) ; \
- mfspr r11,SPRN_SRR0 ; \
-0:
-
-#define SYSCALL_PSERIES_2_RFID \
- mfspr r12,SPRN_SRR1 ; \
- ld r10,PACAKBASE(r13) ; \
- LOAD_HANDLER(r10, system_call_entry) ; \
- mtspr SPRN_SRR0,r10 ; \
- ld r10,PACAKMSR(r13) ; \
- mtspr SPRN_SRR1,r10 ; \
- rfid ; \
- b . ; /* prevent speculative execution */
-
-#define SYSCALL_PSERIES_3 \
- /* Fast LE/BE switch system call */ \
-1: mfspr r12,SPRN_SRR1 ; \
- xori r12,r12,MSR_LE ; \
- mtspr SPRN_SRR1,r12 ; \
- rfid ; /* return to userspace */ \
- b . ; /* prevent speculative execution */
-
-#if defined(CONFIG_RELOCATABLE)
- /*
- * We can't branch directly; in the direct case we use LR
- * and system_call_entry restores LR. (We thus need to move
- * LR to r10 in the RFID case too.)
- */
-#define SYSCALL_PSERIES_2_DIRECT \
- mflr r10 ; \
- ld r12,PACAKBASE(r13) ; \
- LOAD_HANDLER(r12, system_call_entry_direct) ; \
- mtctr r12 ; \
- mfspr r12,SPRN_SRR1 ; \
- /* Re-use of r13... No spare regs to do this */ \
- li r13,MSR_RI ; \
- mtmsrd r13,1 ; \
- GET_PACA(r13) ; /* get r13 back */ \
- bctr ;
-#else
- /* We can branch directly */
-#define SYSCALL_PSERIES_2_DIRECT \
- mfspr r12,SPRN_SRR1 ; \
- li r10,MSR_RI ; \
- mtmsrd r10,1 ; /* Set RI (EE=0) */ \
- b system_call_entry_direct ;
-#endif
+
+#define EXC_REAL_BEGIN(name, start, size) \
+ FIXED_SECTION_ENTRY_BEGIN_LOCATION(real_vectors, exc_real_##start##_##name, start, size)
+
+#define EXC_REAL_END(name, start, size) \
+ FIXED_SECTION_ENTRY_END_LOCATION(real_vectors, exc_real_##start##_##name, start, size)
+
+#define EXC_VIRT_BEGIN(name, start, size) \
+ FIXED_SECTION_ENTRY_BEGIN_LOCATION(virt_vectors, exc_virt_##start##_##name, start, size)
+
+#define EXC_VIRT_END(name, start, size) \
+ FIXED_SECTION_ENTRY_END_LOCATION(virt_vectors, exc_virt_##start##_##name, start, size)
+
+#define EXC_COMMON_BEGIN(name) \
+ USE_TEXT_SECTION(); \
+ .balign IFETCH_ALIGN_BYTES; \
+ .global name; \
+ _ASM_NOKPROBE_SYMBOL(name); \
+ DEFINE_FIXED_SYMBOL(name, text); \
+name:
+
+#define TRAMP_REAL_BEGIN(name) \
+ FIXED_SECTION_ENTRY_BEGIN(real_trampolines, name)
+
+#define TRAMP_VIRT_BEGIN(name) \
+ FIXED_SECTION_ENTRY_BEGIN(virt_trampolines, name)
+
+#define EXC_REAL_NONE(start, size) \
+ FIXED_SECTION_ENTRY_BEGIN_LOCATION(real_vectors, exc_real_##start##_##unused, start, size); \
+ FIXED_SECTION_ENTRY_END_LOCATION(real_vectors, exc_real_##start##_##unused, start, size)
+
+#define EXC_VIRT_NONE(start, size) \
+ FIXED_SECTION_ENTRY_BEGIN_LOCATION(virt_vectors, exc_virt_##start##_##unused, start, size); \
+ FIXED_SECTION_ENTRY_END_LOCATION(virt_vectors, exc_virt_##start##_##unused, start, size)
/*
- * This is the start of the interrupt handlers for pSeries
- * This code runs with relocation off.
- * Code from here to __end_interrupts gets copied down to real
- * address 0x100 when we are running a relocatable kernel.
- * Therefore any relative branches in this section must only
- * branch to labels in this section.
+ * We're short on space and time in the exception prolog, so we can't
+ * use the normal LOAD_REG_IMMEDIATE macro to load the address of label.
+ * Instead we get the base of the kernel from paca->kernelbase and or in the low
+ * part of label. This requires that the label be within 64KB of kernelbase, and
+ * that kernelbase be 64K aligned.
*/
- . = 0x100
- .globl __start_interrupts
-__start_interrupts:
-
- .globl system_reset_pSeries;
-system_reset_pSeries:
- HMT_MEDIUM_PPR_DISCARD
- SET_SCRATCH0(r13)
-#ifdef CONFIG_PPC_P7_NAP
-BEGIN_FTR_SECTION
- /* Running native on arch 2.06 or later, check if we are
- * waking up from nap. We only handle no state loss and
- * supervisor state loss. We do -not- handle hypervisor
- * state loss at this time.
- */
- mfspr r13,SPRN_SRR1
- rlwinm. r13,r13,47-31,30,31
- beq 9f
-
- /* waking up from powersave (nap) state */
- cmpwi cr1,r13,2
- /* Total loss of HV state is fatal, we could try to use the
- * PIR to locate a PACA, then use an emergency stack etc...
- * OPAL v3 based powernv platforms have new idle states
- * which fall in this catagory.
- */
- bgt cr1,8f
- GET_PACA(r13)
+#define LOAD_HANDLER(reg, label) \
+ ld reg,PACAKBASE(r13); /* get high part of &label */ \
+ ori reg,reg,FIXED_SYMBOL_ABS_ADDR(label)
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- li r0,KVM_HWTHREAD_IN_KERNEL
- stb r0,HSTATE_HWTHREAD_STATE(r13)
- /* Order setting hwthread_state vs. testing hwthread_req */
- sync
- lbz r0,HSTATE_HWTHREAD_REQ(r13)
- cmpwi r0,0
- beq 1f
- b kvm_start_guest
-1:
-#endif
+#define __LOAD_HANDLER(reg, label, section) \
+ ld reg,PACAKBASE(r13); \
+ ori reg,reg,(ABS_ADDR(label, section))@l
- beq cr1,2f
- b power7_wakeup_noloss
-2: b power7_wakeup_loss
+/*
+ * Branches from unrelocated code (e.g., interrupts) to labels outside
+ * head-y require >64K offsets.
+ */
+#define __LOAD_FAR_HANDLER(reg, label, section) \
+ ld reg,PACAKBASE(r13); \
+ ori reg,reg,(ABS_ADDR(label, section))@l; \
+ addis reg,reg,(ABS_ADDR(label, section))@h
- /* Fast Sleep wakeup on PowerNV */
-8: GET_PACA(r13)
- b power7_wakeup_tb_loss
+/*
+ * Interrupt code generation macros
+ */
+#define IVEC .L_IVEC_\name\() /* Interrupt vector address */
+#define IHSRR .L_IHSRR_\name\() /* Sets SRR or HSRR registers */
+#define IHSRR_IF_HVMODE .L_IHSRR_IF_HVMODE_\name\() /* HSRR if HV else SRR */
+#define IAREA .L_IAREA_\name\() /* PACA save area */
+#define IVIRT .L_IVIRT_\name\() /* Has virt mode entry point */
+#define IISIDE .L_IISIDE_\name\() /* Uses SRR0/1 not DAR/DSISR */
+#define ICFAR .L_ICFAR_\name\() /* Uses CFAR */
+#define ICFAR_IF_HVMODE .L_ICFAR_IF_HVMODE_\name\() /* Uses CFAR if HV */
+#define IDAR .L_IDAR_\name\() /* Uses DAR (or SRR0) */
+#define IDSISR .L_IDSISR_\name\() /* Uses DSISR (or SRR1) */
+#define IBRANCH_TO_COMMON .L_IBRANCH_TO_COMMON_\name\() /* ENTRY branch to common */
+#define IREALMODE_COMMON .L_IREALMODE_COMMON_\name\() /* Common runs in realmode */
+#define IMASK .L_IMASK_\name\() /* IRQ soft-mask bit */
+#define IKVM_REAL .L_IKVM_REAL_\name\() /* Real entry tests KVM */
+#define __IKVM_REAL(name) .L_IKVM_REAL_ ## name
+#define IKVM_VIRT .L_IKVM_VIRT_\name\() /* Virt entry tests KVM */
+#define ISTACK .L_ISTACK_\name\() /* Set regular kernel stack */
+#define __ISTACK(name) .L_ISTACK_ ## name
+#define IKUAP .L_IKUAP_\name\() /* Do KUAP lock */
+#define IMSR_R12 .L_IMSR_R12_\name\() /* Assumes MSR saved to r12 */
+
+#define INT_DEFINE_BEGIN(n) \
+.macro int_define_ ## n name
+
+#define INT_DEFINE_END(n) \
+.endm ; \
+int_define_ ## n n ; \
+do_define_int n
+
+.macro do_define_int name
+ .ifndef IVEC
+ .error "IVEC not defined"
+ .endif
+ .ifndef IHSRR
+ IHSRR=0
+ .endif
+ .ifndef IHSRR_IF_HVMODE
+ IHSRR_IF_HVMODE=0
+ .endif
+ .ifndef IAREA
+ IAREA=PACA_EXGEN
+ .endif
+ .ifndef IVIRT
+ IVIRT=1
+ .endif
+ .ifndef IISIDE
+ IISIDE=0
+ .endif
+ .ifndef ICFAR
+ ICFAR=1
+ .endif
+ .ifndef ICFAR_IF_HVMODE
+ ICFAR_IF_HVMODE=0
+ .endif
+ .ifndef IDAR
+ IDAR=0
+ .endif
+ .ifndef IDSISR
+ IDSISR=0
+ .endif
+ .ifndef IBRANCH_TO_COMMON
+ IBRANCH_TO_COMMON=1
+ .endif
+ .ifndef IREALMODE_COMMON
+ IREALMODE_COMMON=0
+ .else
+ .if ! IBRANCH_TO_COMMON
+ .error "IREALMODE_COMMON=1 but IBRANCH_TO_COMMON=0"
+ .endif
+ .endif
+ .ifndef IMASK
+ IMASK=0
+ .endif
+ .ifndef IKVM_REAL
+ IKVM_REAL=0
+ .endif
+ .ifndef IKVM_VIRT
+ IKVM_VIRT=0
+ .endif
+ .ifndef ISTACK
+ ISTACK=1
+ .endif
+ .ifndef IKUAP
+ IKUAP=1
+ .endif
+ .ifndef IMSR_R12
+ IMSR_R12=0
+ .endif
+.endm
-9:
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
-#endif /* CONFIG_PPC_P7_NAP */
- EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD,
- NOTEST, 0x100)
-
- . = 0x200
-machine_check_pSeries_1:
- /* This is moved out of line as it can be patched by FW, but
- * some code path might still want to branch into the original
- * vector
- */
- HMT_MEDIUM_PPR_DISCARD
- SET_SCRATCH0(r13) /* save r13 */
-#ifdef CONFIG_PPC_P7_NAP
-BEGIN_FTR_SECTION
- /* Running native on arch 2.06 or later, check if we are
- * waking up from nap. We only handle no state loss and
- * supervisor state loss. We do -not- handle hypervisor
- * state loss at this time.
- */
- mfspr r13,SPRN_SRR1
- rlwinm. r13,r13,47-31,30,31
- OPT_GET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR)
- beq 9f
-
- mfspr r13,SPRN_SRR1
- rlwinm. r13,r13,47-31,30,31
- /* waking up from powersave (nap) state */
- cmpwi cr1,r13,2
- /* Total loss of HV state is fatal. let's just stay stuck here */
- OPT_GET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR)
- bgt cr1,.
-9:
- OPT_SET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR)
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
-#endif /* CONFIG_PPC_P7_NAP */
- EXCEPTION_PROLOG_0(PACA_EXMC)
-BEGIN_FTR_SECTION
- b machine_check_pSeries_early
-FTR_SECTION_ELSE
- b machine_check_pSeries_0
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
+/*
+ * All interrupts which set HSRR registers, as well as SRESET and MCE and
+ * syscall when invoked with "sc 1" switch to MSR[HV]=1 (HVMODE) to be taken,
+ * so they all generally need to test whether they were taken in guest context.
+ *
+ * Note: SRESET and MCE may also be sent to the guest by the hypervisor, and be
+ * taken with MSR[HV]=0.
+ *
+ * Interrupts which set SRR registers (with the above exceptions) do not
+ * elevate to MSR[HV]=1 mode, though most can be taken when running with
+ * MSR[HV]=1 (e.g., bare metal kernel and userspace). So these interrupts do
+ * not need to test whether a guest is running because they get delivered to
+ * the guest directly, including nested HV KVM guests.
+ *
+ * The exception is PR KVM, where the guest runs with MSR[PR]=1 and the host
+ * runs with MSR[HV]=0, so the host takes all interrupts on behalf of the
+ * guest. PR KVM runs with LPCR[AIL]=0 which causes interrupts to always be
+ * delivered to the real-mode entry point, therefore such interrupts only test
+ * KVM in their real mode handlers, and only when PR KVM is possible.
+ *
+ * Interrupts that are taken in MSR[HV]=0 and escalate to MSR[HV]=1 are always
+ * delivered in real-mode when the MMU is in hash mode because the MMU
+ * registers are not set appropriately to translate host addresses. In nested
+ * radix mode these can be delivered in virt-mode as the host translations are
+ * used implicitly (see: effective LPID, effective PID).
+ */
- . = 0x300
- .globl data_access_pSeries
-data_access_pSeries:
- HMT_MEDIUM_PPR_DISCARD
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common, EXC_STD,
- KVMTEST, 0x300)
+/*
+ * If an interrupt is taken while a guest is running, it is immediately routed
+ * to KVM to handle.
+ */
- . = 0x380
- .globl data_access_slb_pSeries
-data_access_slb_pSeries:
- HMT_MEDIUM_PPR_DISCARD
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXSLB)
- EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST, 0x380)
- std r3,PACA_EXSLB+EX_R3(r13)
- mfspr r3,SPRN_DAR
-#ifdef __DISABLED__
- /* Keep that around for when we re-implement dynamic VSIDs */
- cmpdi r3,0
- bge slb_miss_user_pseries
-#endif /* __DISABLED__ */
- mfspr r12,SPRN_SRR1
-#ifndef CONFIG_RELOCATABLE
- b slb_miss_realmode
-#else
- /*
- * We can't just use a direct branch to slb_miss_realmode
- * because the distance from here to there depends on where
- * the kernel ends up being put.
- */
- mfctr r11
- ld r10,PACAKBASE(r13)
- LOAD_HANDLER(r10, slb_miss_realmode)
- mtctr r10
- bctr
+.macro KVMTEST name handler
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+ lbz r10,HSTATE_IN_GUEST(r13)
+ cmpwi r10,0
+ /* HSRR variants have the 0x2 bit added to their trap number */
+ .if IHSRR_IF_HVMODE
+ BEGIN_FTR_SECTION
+ li r10,(IVEC + 0x2)
+ FTR_SECTION_ELSE
+ li r10,(IVEC)
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif IHSRR
+ li r10,(IVEC + 0x2)
+ .else
+ li r10,(IVEC)
+ .endif
+ bne \handler
#endif
+.endm
- STD_EXCEPTION_PSERIES(0x400, 0x400, instruction_access)
+/*
+ * This is the BOOK3S interrupt entry code macro.
+ *
+ * This can result in one of several things happening:
+ * - Branch to the _common handler, relocated, in virtual mode.
+ * These are normal interrupts (synchronous and asynchronous) handled by
+ * the kernel.
+ * - Branch to KVM, relocated but real mode interrupts remain in real mode.
+ * These occur when HSTATE_IN_GUEST is set. The interrupt may be caused by
+ * / intended for host or guest kernel, but KVM must always be involved
+ * because the machine state is set for guest execution.
+ * - Branch to the masked handler, unrelocated.
+ * These occur when maskable asynchronous interrupts are taken with the
+ * irq_soft_mask set.
+ * - Branch to an "early" handler in real mode but relocated.
+ * This is done if early=1. MCE and HMI use these to handle errors in real
+ * mode.
+ * - Fall through and continue executing in real, unrelocated mode.
+ * This is done if early=2.
+ */
- . = 0x480
- .globl instruction_access_slb_pSeries
-instruction_access_slb_pSeries:
- HMT_MEDIUM_PPR_DISCARD
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXSLB)
- EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x480)
- std r3,PACA_EXSLB+EX_R3(r13)
- mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
-#ifdef __DISABLED__
- /* Keep that around for when we re-implement dynamic VSIDs */
- cmpdi r3,0
- bge slb_miss_user_pseries
-#endif /* __DISABLED__ */
- mfspr r12,SPRN_SRR1
+.macro GEN_BRANCH_TO_COMMON name, virt
+ .if IREALMODE_COMMON
+ LOAD_HANDLER(r10, \name\()_common)
+ mtctr r10
+ bctr
+ .else
+ .if \virt
#ifndef CONFIG_RELOCATABLE
- b slb_miss_realmode
+ b \name\()_common_virt
#else
- mfctr r11
- ld r10,PACAKBASE(r13)
- LOAD_HANDLER(r10, slb_miss_realmode)
+ LOAD_HANDLER(r10, \name\()_common_virt)
mtctr r10
bctr
#endif
+ .else
+ LOAD_HANDLER(r10, \name\()_common_real)
+ mtctr r10
+ bctr
+ .endif
+ .endif
+.endm
+
+.macro GEN_INT_ENTRY name, virt, ool=0
+ SET_SCRATCH0(r13) /* save r13 */
+ GET_PACA(r13)
+ std r9,IAREA+EX_R9(r13) /* save r9 */
+BEGIN_FTR_SECTION
+ mfspr r9,SPRN_PPR
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+ HMT_MEDIUM
+ std r10,IAREA+EX_R10(r13) /* save r10 */
+ .if ICFAR
+BEGIN_FTR_SECTION
+ mfspr r10,SPRN_CFAR
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+ .elseif ICFAR_IF_HVMODE
+BEGIN_FTR_SECTION
+ BEGIN_FTR_SECTION_NESTED(69)
+ mfspr r10,SPRN_CFAR
+ END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 69)
+FTR_SECTION_ELSE
+ BEGIN_FTR_SECTION_NESTED(69)
+ li r10,0
+ END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 69)
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .endif
+ .if \ool
+ .if !\virt
+ b tramp_real_\name
+ .pushsection .text
+ TRAMP_REAL_BEGIN(tramp_real_\name)
+ .else
+ b tramp_virt_\name
+ .pushsection .text
+ TRAMP_VIRT_BEGIN(tramp_virt_\name)
+ .endif
+ .endif
+
+BEGIN_FTR_SECTION
+ std r9,IAREA+EX_PPR(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+ .if ICFAR || ICFAR_IF_HVMODE
+BEGIN_FTR_SECTION
+ std r10,IAREA+EX_CFAR(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+ .endif
+ INTERRUPT_TO_KERNEL
+ mfctr r10
+ std r10,IAREA+EX_CTR(r13)
+ mfcr r9
+ std r11,IAREA+EX_R11(r13) /* save r11 - r12 */
+ std r12,IAREA+EX_R12(r13)
- /* We open code these as we can't have a ". = x" (even with
- * x = "." within a feature section
+ /*
+ * DAR/DSISR, SCRATCH0 must be read before setting MSR[RI],
+ * because a d-side MCE will clobber those registers so is
+ * not recoverable if they are live.
*/
- . = 0x500;
- .globl hardware_interrupt_pSeries;
- .globl hardware_interrupt_hv;
-hardware_interrupt_pSeries:
-hardware_interrupt_hv:
- HMT_MEDIUM_PPR_DISCARD
+ GET_SCRATCH0(r10)
+ std r10,IAREA+EX_R13(r13)
+ .if IDAR && !IISIDE
+ .if IHSRR
+ mfspr r10,SPRN_HDAR
+ .else
+ mfspr r10,SPRN_DAR
+ .endif
+ std r10,IAREA+EX_DAR(r13)
+ .endif
+ .if IDSISR && !IISIDE
+ .if IHSRR
+ mfspr r10,SPRN_HDSISR
+ .else
+ mfspr r10,SPRN_DSISR
+ .endif
+ stw r10,IAREA+EX_DSISR(r13)
+ .endif
+
+ .if IHSRR_IF_HVMODE
BEGIN_FTR_SECTION
- _MASKABLE_EXCEPTION_PSERIES(0x502, hardware_interrupt,
- EXC_HV, SOFTEN_TEST_HV)
- KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x502)
+ mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ mfspr r12,SPRN_HSRR1 /* and HSRR1 */
FTR_SECTION_ELSE
- _MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt,
- EXC_STD, SOFTEN_TEST_HV_201)
- KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x500)
+ mfspr r11,SPRN_SRR0 /* save SRR0 */
+ mfspr r12,SPRN_SRR1 /* and SRR1 */
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif IHSRR
+ mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ mfspr r12,SPRN_HSRR1 /* and HSRR1 */
+ .else
+ mfspr r11,SPRN_SRR0 /* save SRR0 */
+ mfspr r12,SPRN_SRR1 /* and SRR1 */
+ .endif
- STD_EXCEPTION_PSERIES(0x600, 0x600, alignment)
- KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x600)
+ .if IBRANCH_TO_COMMON
+ GEN_BRANCH_TO_COMMON \name \virt
+ .endif
- STD_EXCEPTION_PSERIES(0x700, 0x700, program_check)
- KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x700)
+ .if \ool
+ .popsection
+ .endif
+.endm
- STD_EXCEPTION_PSERIES(0x800, 0x800, fp_unavailable)
- KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x800)
+/*
+ * __GEN_COMMON_ENTRY is required to receive the branch from interrupt
+ * entry, except in the case of the real-mode handlers which require
+ * __GEN_REALMODE_COMMON_ENTRY.
+ *
+ * This switches to virtual mode and sets MSR[RI].
+ */
+.macro __GEN_COMMON_ENTRY name
+DEFINE_FIXED_SYMBOL(\name\()_common_real, text)
+\name\()_common_real:
+ .if IKVM_REAL
+ KVMTEST \name kvm_interrupt
+ .endif
+
+ ld r10,PACAKMSR(r13) /* get MSR value for kernel */
+ /* MSR[RI] is clear iff using SRR regs */
+ .if IHSRR_IF_HVMODE
+ BEGIN_FTR_SECTION
+ xori r10,r10,MSR_RI
+ END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
+ .elseif ! IHSRR
+ xori r10,r10,MSR_RI
+ .endif
+ mtmsrd r10
- . = 0x900
- .globl decrementer_pSeries
-decrementer_pSeries:
- _MASKABLE_EXCEPTION_PSERIES(0x900, decrementer, EXC_STD, SOFTEN_TEST_PR)
+ .if IVIRT
+ .if IKVM_VIRT
+ b 1f /* skip the virt test coming from real */
+ .endif
- STD_EXCEPTION_HV(0x980, 0x982, hdecrementer)
+ .balign IFETCH_ALIGN_BYTES
+DEFINE_FIXED_SYMBOL(\name\()_common_virt, text)
+\name\()_common_virt:
+ .if IKVM_VIRT
+ KVMTEST \name kvm_interrupt
+1:
+ .endif
+ .endif /* IVIRT */
+.endm
- MASKABLE_EXCEPTION_PSERIES(0xa00, 0xa00, doorbell_super)
- KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xa00)
+/*
+ * Don't switch to virt mode. Used for early MCE and HMI handlers that
+ * want to run in real mode.
+ */
+.macro __GEN_REALMODE_COMMON_ENTRY name
+DEFINE_FIXED_SYMBOL(\name\()_common_real, text)
+\name\()_common_real:
+ .if IKVM_REAL
+ KVMTEST \name kvm_interrupt
+ .endif
+.endm
+
+.macro __GEN_COMMON_BODY name
+ .if IMASK
+ .if ! ISTACK
+ .error "No support for masked interrupt to use custom stack"
+ .endif
+
+ /* If coming from user, skip soft-mask tests. */
+ andi. r10,r12,MSR_PR
+ bne 3f
+
+ /*
+ * Kernel code running below __end_soft_masked may be
+ * implicitly soft-masked if it is within the regions
+ * in the soft mask table.
+ */
+ LOAD_HANDLER(r10, __end_soft_masked)
+ cmpld r11,r10
+ bge+ 1f
+
+ /* SEARCH_SOFT_MASK_TABLE clobbers r9,r10,r12 */
+ mtctr r12
+ stw r9,PACA_EXGEN+EX_CCR(r13)
+ SEARCH_SOFT_MASK_TABLE
+ cmpdi r12,0
+ mfctr r12 /* Restore r12 to SRR1 */
+ lwz r9,PACA_EXGEN+EX_CCR(r13)
+ beq 1f /* Not in soft-mask table */
+ li r10,IMASK
+ b 2f /* In soft-mask table, always mask */
+
+ /* Test the soft mask state against our interrupt's bit */
+1: lbz r10,PACAIRQSOFTMASK(r13)
+2: andi. r10,r10,IMASK
+ /* Associate vector numbers with bits in paca->irq_happened */
+ .if IVEC == 0x500 || IVEC == 0xea0
+ li r10,PACA_IRQ_EE
+ .elseif IVEC == 0x900
+ li r10,PACA_IRQ_DEC
+ .elseif IVEC == 0xa00 || IVEC == 0xe80
+ li r10,PACA_IRQ_DBELL
+ .elseif IVEC == 0xe60
+ li r10,PACA_IRQ_HMI
+ .elseif IVEC == 0xf00
+ li r10,PACA_IRQ_PMI
+ .else
+ .abort "Bad maskable vector"
+ .endif
+
+ .if IHSRR_IF_HVMODE
+ BEGIN_FTR_SECTION
+ bne masked_Hinterrupt
+ FTR_SECTION_ELSE
+ bne masked_interrupt
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif IHSRR
+ bne masked_Hinterrupt
+ .else
+ bne masked_interrupt
+ .endif
+ .endif
+
+ .if ISTACK
+ andi. r10,r12,MSR_PR /* See if coming from user */
+3: mr r10,r1 /* Save r1 */
+ subi r1,r1,INT_FRAME_SIZE /* alloc frame on kernel stack */
+ beq- 100f
+ ld r1,PACAKSAVE(r13) /* kernel stack to use */
+100: tdgei r1,-INT_FRAME_SIZE /* trap if r1 is in userspace */
+ EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,0
+ .endif
- STD_EXCEPTION_PSERIES(0xb00, 0xb00, trap_0b)
- KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xb00)
+ std r9,_CCR(r1) /* save CR in stackframe */
+ std r11,_NIP(r1) /* save SRR0 in stackframe */
+ std r12,_MSR(r1) /* save SRR1 in stackframe */
+ std r10,0(r1) /* make stack chain pointer */
+ std r0,GPR0(r1) /* save r0 in stackframe */
+ std r10,GPR1(r1) /* save r1 in stackframe */
+ SANITIZE_GPR(0)
- . = 0xc00
- .globl system_call_pSeries
-system_call_pSeries:
- HMT_MEDIUM
-#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
- SET_SCRATCH0(r13)
- GET_PACA(r13)
- std r9,PACA_EXGEN+EX_R9(r13)
- std r10,PACA_EXGEN+EX_R10(r13)
- mfcr r9
- KVMTEST(0xc00)
- GET_SCRATCH0(r13)
+ /* Mark our [H]SRRs valid for return */
+ li r10,1
+ .if IHSRR_IF_HVMODE
+ BEGIN_FTR_SECTION
+ stb r10,PACAHSRR_VALID(r13)
+ FTR_SECTION_ELSE
+ stb r10,PACASRR_VALID(r13)
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif IHSRR
+ stb r10,PACAHSRR_VALID(r13)
+ .else
+ stb r10,PACASRR_VALID(r13)
+ .endif
+
+ .if ISTACK
+ .if IKUAP
+ kuap_save_amr_and_lock r9, r10, cr1, cr0
+ .endif
+ beq 101f /* if from kernel mode */
+BEGIN_FTR_SECTION
+ ld r9,IAREA+EX_PPR(r13) /* Read PPR from paca */
+ std r9,_PPR(r1)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+101:
+ .else
+ .if IKUAP
+ kuap_save_amr_and_lock r9, r10, cr1
+ .endif
+ .endif
+
+ /* Save original regs values from save area to stack frame. */
+ ld r9,IAREA+EX_R9(r13) /* move r9, r10 to stackframe */
+ ld r10,IAREA+EX_R10(r13)
+ std r9,GPR9(r1)
+ std r10,GPR10(r1)
+ ld r9,IAREA+EX_R11(r13) /* move r11 - r13 to stackframe */
+ ld r10,IAREA+EX_R12(r13)
+ ld r11,IAREA+EX_R13(r13)
+ std r9,GPR11(r1)
+ std r10,GPR12(r1)
+ std r11,GPR13(r1)
+ .if !IMSR_R12
+ SANITIZE_GPRS(9, 12)
+ .else
+ SANITIZE_GPRS(9, 11)
+ .endif
+
+ SAVE_NVGPRS(r1)
+ SANITIZE_NVGPRS()
+
+ .if IDAR
+ .if IISIDE
+ ld r10,_NIP(r1)
+ .else
+ ld r10,IAREA+EX_DAR(r13)
+ .endif
+ std r10,_DAR(r1)
+ .endif
+
+ .if IDSISR
+ .if IISIDE
+ ld r10,_MSR(r1)
+ lis r11,DSISR_SRR1_MATCH_64S@h
+ and r10,r10,r11
+ .else
+ lwz r10,IAREA+EX_DSISR(r13)
+ .endif
+ std r10,_DSISR(r1)
+ .endif
+
+BEGIN_FTR_SECTION
+ .if ICFAR || ICFAR_IF_HVMODE
+ ld r10,IAREA+EX_CFAR(r13)
+ .else
+ li r10,0
+ .endif
+ std r10,ORIG_GPR3(r1)
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+ ld r10,IAREA+EX_CTR(r13)
+ std r10,_CTR(r1)
+ SAVE_GPRS(2, 8, r1) /* save r2 - r8 in stackframe */
+ SANITIZE_GPRS(2, 8)
+ mflr r9 /* Get LR, later save to stack */
+ LOAD_PACA_TOC() /* get kernel TOC into r2 */
+ std r9,_LINK(r1)
+ lbz r10,PACAIRQSOFTMASK(r13)
+ mfspr r11,SPRN_XER /* save XER in stackframe */
+ std r10,SOFTE(r1)
+ std r11,_XER(r1)
+ li r9,IVEC
+ std r9,_TRAP(r1) /* set trap number */
+ li r10,0
+ LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER)
+ std r10,RESULT(r1) /* clear regs->result */
+ std r11,STACK_INT_FRAME_MARKER(r1) /* mark the frame */
+.endm
+
+/*
+ * On entry r13 points to the paca, r9-r13 are saved in the paca,
+ * r9 contains the saved CR, r11 and r12 contain the saved SRR0 and
+ * SRR1, and relocation is on.
+ *
+ * If stack=0, then the stack is already set in r1, and r1 is saved in r10.
+ * PPR save and CPU accounting is not done for the !stack case (XXX why not?)
+ */
+.macro GEN_COMMON name
+ __GEN_COMMON_ENTRY \name
+ __GEN_COMMON_BODY \name
+.endm
+
+.macro SEARCH_RESTART_TABLE
+#ifdef CONFIG_RELOCATABLE
+ mr r12,r2
+ LOAD_PACA_TOC()
+ LOAD_REG_ADDR(r9, __start___restart_table)
+ LOAD_REG_ADDR(r10, __stop___restart_table)
+ mr r2,r12
+#else
+ LOAD_REG_IMMEDIATE_SYM(r9, r12, __start___restart_table)
+ LOAD_REG_IMMEDIATE_SYM(r10, r12, __stop___restart_table)
#endif
- SYSCALL_PSERIES_1
- SYSCALL_PSERIES_2_RFID
- SYSCALL_PSERIES_3
- KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xc00)
+300:
+ cmpd r9,r10
+ beq 302f
+ ld r12,0(r9)
+ cmpld r11,r12
+ blt 301f
+ ld r12,8(r9)
+ cmpld r11,r12
+ bge 301f
+ ld r12,16(r9)
+ b 303f
+301:
+ addi r9,r9,24
+ b 300b
+302:
+ li r12,0
+303:
+.endm
- STD_EXCEPTION_PSERIES(0xd00, 0xd00, single_step)
- KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xd00)
+.macro SEARCH_SOFT_MASK_TABLE
+#ifdef CONFIG_RELOCATABLE
+ mr r12,r2
+ LOAD_PACA_TOC()
+ LOAD_REG_ADDR(r9, __start___soft_mask_table)
+ LOAD_REG_ADDR(r10, __stop___soft_mask_table)
+ mr r2,r12
+#else
+ LOAD_REG_IMMEDIATE_SYM(r9, r12, __start___soft_mask_table)
+ LOAD_REG_IMMEDIATE_SYM(r10, r12, __stop___soft_mask_table)
+#endif
+300:
+ cmpd r9,r10
+ beq 302f
+ ld r12,0(r9)
+ cmpld r11,r12
+ blt 301f
+ ld r12,8(r9)
+ cmpld r11,r12
+ bge 301f
+ li r12,1
+ b 303f
+301:
+ addi r9,r9,16
+ b 300b
+302:
+ li r12,0
+303:
+.endm
+
+/*
+ * Restore all registers including H/SRR0/1 saved in a stack frame of a
+ * standard exception.
+ */
+.macro EXCEPTION_RESTORE_REGS hsrr=0
+ /* Move original SRR0 and SRR1 into the respective regs */
+ ld r9,_MSR(r1)
+ li r10,0
+ .if \hsrr
+ mtspr SPRN_HSRR1,r9
+ stb r10,PACAHSRR_VALID(r13)
+ .else
+ mtspr SPRN_SRR1,r9
+ stb r10,PACASRR_VALID(r13)
+ .endif
+ ld r9,_NIP(r1)
+ .if \hsrr
+ mtspr SPRN_HSRR0,r9
+ .else
+ mtspr SPRN_SRR0,r9
+ .endif
+ ld r9,_CTR(r1)
+ mtctr r9
+ ld r9,_XER(r1)
+ mtxer r9
+ ld r9,_LINK(r1)
+ mtlr r9
+ ld r9,_CCR(r1)
+ mtcr r9
+ SANITIZE_RESTORE_NVGPRS()
+ REST_GPRS(2, 13, r1)
+ REST_GPR(0, r1)
+ /* restore original r1. */
+ ld r1,GPR1(r1)
+.endm
- /* At 0xe??? we have a bunch of hypervisor exceptions, we branch
- * out of line to handle them
+/*
+ * EARLY_BOOT_FIXUP - Fix real-mode interrupt with wrong endian in early boot.
+ *
+ * There's a short window during boot where although the kernel is running
+ * little endian, any exceptions will cause the CPU to switch back to big
+ * endian. For example a WARN() boils down to a trap instruction, which will
+ * cause a program check, and we end up here but with the CPU in big endian
+ * mode. The first instruction of the program check handler (in GEN_INT_ENTRY
+ * below) is an mtsprg, which when executed in the wrong endian is an lhzu with
+ * a ~3GB displacement from r3. The content of r3 is random, so that is a load
+ * from some random location, and depending on the system can easily lead to a
+ * checkstop, or an infinitely recursive page fault.
+ *
+ * So to handle that case we have a trampoline here that can detect we are in
+ * the wrong endian and flip us back to the correct endian. We can't flip
+ * MSR[LE] using mtmsr, so we have to use rfid. That requires backing up SRR0/1
+ * as well as a GPR. To do that we use SPRG0/2/3, as SPRG1 is already used for
+ * the paca. SPRG3 is user readable, but this trampoline is only active very
+ * early in boot, and SPRG3 will be reinitialised in vdso_getcpu_init() before
+ * userspace starts.
+ */
+.macro EARLY_BOOT_FIXUP
+BEGIN_FTR_SECTION
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+ tdi 0,0,0x48 // Trap never, or in reverse endian: b . + 8
+ b 2f // Skip trampoline if endian is correct
+ .long 0xa643707d // mtsprg 0, r11 Backup r11
+ .long 0xa6027a7d // mfsrr0 r11
+ .long 0xa643727d // mtsprg 2, r11 Backup SRR0 in SPRG2
+ .long 0xa6027b7d // mfsrr1 r11
+ .long 0xa643737d // mtsprg 3, r11 Backup SRR1 in SPRG3
+ .long 0xa600607d // mfmsr r11
+ .long 0x01006b69 // xori r11, r11, 1 Invert MSR[LE]
+ .long 0xa6037b7d // mtsrr1 r11
+ /*
+ * This is 'li r11,1f' where 1f is the absolute address of that
+ * label, byteswapped into the SI field of the instruction.
*/
- . = 0xe00
-hv_data_storage_trampoline:
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- b h_data_storage_hv
+ .long 0x00006039 | \
+ ((ABS_ADDR(1f, real_vectors) & 0x00ff) << 24) | \
+ ((ABS_ADDR(1f, real_vectors) & 0xff00) << 8)
+ .long 0xa6037a7d // mtsrr0 r11
+ .long 0x2400004c // rfid
+1:
+ mfsprg r11, 3
+ mtsrr1 r11 // Restore SRR1
+ mfsprg r11, 2
+ mtsrr0 r11 // Restore SRR0
+ mfsprg r11, 0 // Restore r11
+2:
+#endif
+ /*
+ * program check could hit at any time, and pseries can not block
+ * MSR[ME] in early boot. So check if there is anything useful in r13
+ * yet, and spin forever if not.
+ */
+ mtsprg 0, r11
+ mfcr r11
+ cmpdi r13, 0
+ beq .
+ mtcr r11
+ mfsprg r11, 0
+END_FTR_SECTION(0, 1) // nop out after boot
+.endm
- . = 0xe20
-hv_instr_storage_trampoline:
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- b h_instr_storage_hv
+/*
+ * There are a few constraints to be concerned with.
+ * - Real mode exceptions code/data must be located at their physical location.
+ * - Virtual mode exceptions must be mapped at their 0xc000... location.
+ * - Fixed location code must not call directly beyond the __end_interrupts
+ * area when built with CONFIG_RELOCATABLE. LOAD_HANDLER / bctr sequence
+ * must be used.
+ * - LOAD_HANDLER targets must be within first 64K of physical 0 /
+ * virtual 0xc00...
+ * - Conditional branch targets must be within +/-32K of caller.
+ *
+ * "Virtual exceptions" run with relocation on (MSR_IR=1, MSR_DR=1), and
+ * therefore don't have to run in physically located code or rfid to
+ * virtual mode kernel code. However on relocatable kernels they do have
+ * to branch to KERNELBASE offset because the rest of the kernel (outside
+ * the exception vectors) may be located elsewhere.
+ *
+ * Virtual exceptions correspond with physical, except their entry points
+ * are offset by 0xc000000000000000 and also tend to get an added 0x4000
+ * offset applied. Virtual exceptions are enabled with the Alternate
+ * Interrupt Location (AIL) bit set in the LPCR. However this does not
+ * guarantee they will be delivered virtually. Some conditions (see the ISA)
+ * cause exceptions to be delivered in real mode.
+ *
+ * The scv instructions are a special case. They get a 0x3000 offset applied.
+ * scv exceptions have unique reentrancy properties, see below.
+ *
+ * It's impossible to receive interrupts below 0x300 via AIL.
+ *
+ * KVM: None of the virtual exceptions are from the guest. Anything that
+ * escalated to HV=1 from HV=0 is delivered via real mode handlers.
+ *
+ *
+ * We layout physical memory as follows:
+ * 0x0000 - 0x00ff : Secondary processor spin code
+ * 0x0100 - 0x18ff : Real mode pSeries interrupt vectors
+ * 0x1900 - 0x2fff : Real mode trampolines
+ * 0x3000 - 0x58ff : Relon (IR=1,DR=1) mode pSeries interrupt vectors
+ * 0x5900 - 0x6fff : Relon mode trampolines
+ * 0x7000 - 0x7fff : FWNMI data area
+ * 0x8000 - .... : Common interrupt handlers, remaining early
+ * setup code, rest of kernel.
+ *
+ * We could reclaim 0x4000-0x42ff for real mode trampolines if the space
+ * is necessary. Until then it's more consistent to explicitly put VIRT_NONE
+ * vectors there.
+ */
+OPEN_FIXED_SECTION(real_vectors, 0x0100, 0x1900)
+OPEN_FIXED_SECTION(real_trampolines, 0x1900, 0x3000)
+OPEN_FIXED_SECTION(virt_vectors, 0x3000, 0x5900)
+OPEN_FIXED_SECTION(virt_trampolines, 0x5900, 0x7000)
- . = 0xe40
-emulation_assist_trampoline:
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- b emulation_assist_hv
+#ifdef CONFIG_PPC_POWERNV
+ .globl start_real_trampolines
+ .globl end_real_trampolines
+ .globl start_virt_trampolines
+ .globl end_virt_trampolines
+#endif
- . = 0xe60
-hv_exception_trampoline:
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- b hmi_exception_early
+#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
+/*
+ * Data area reserved for FWNMI option.
+ * This address (0x7000) is fixed by the RPA.
+ * pseries and powernv need to keep the whole page from
+ * 0x7000 to 0x8000 free for use by the firmware
+ */
+ZERO_FIXED_SECTION(fwnmi_page, 0x7000, 0x8000)
+OPEN_TEXT_SECTION(0x8000)
+#else
+OPEN_TEXT_SECTION(0x7000)
+#endif
+
+USE_FIXED_SECTION(real_vectors)
+
+/*
+ * This is the start of the interrupt handlers for pSeries
+ * This code runs with relocation off.
+ * Code from here to __end_interrupts gets copied down to real
+ * address 0x100 when we are running a relocatable kernel.
+ * Therefore any relative branches in this section must only
+ * branch to labels in this section.
+ */
+ .globl __start_interrupts
+__start_interrupts:
+
+/**
+ * Interrupt 0x3000 - System Call Vectored Interrupt (syscall).
+ * This is a synchronous interrupt invoked with the "scv" instruction. The
+ * system call does not alter the HV bit, so it is directed to the OS.
+ *
+ * Handling:
+ * scv instructions enter the kernel without changing EE, RI, ME, or HV.
+ * In particular, this means we can take a maskable interrupt at any point
+ * in the scv handler, which is unlike any other interrupt. This is solved
+ * by treating the instruction addresses in the handler as being soft-masked,
+ * by adding a SOFT_MASK_TABLE entry for them.
+ *
+ * AIL-0 mode scv exceptions go to 0x17000-0x17fff, but we set AIL-3 and
+ * ensure scv is never executed with relocation off, which means AIL-0
+ * should never happen.
+ *
+ * Before leaving the following inside-__end_soft_masked text, at least of the
+ * following must be true:
+ * - MSR[PR]=1 (i.e., return to userspace)
+ * - MSR_EE|MSR_RI is clear (no reentrant exceptions)
+ * - Standard kernel environment is set up (stack, paca, etc)
+ *
+ * KVM:
+ * These interrupts do not elevate HV 0->1, so HV is not involved. PR KVM
+ * ensures that FSCR[SCV] is disabled whenever it has to force AIL off.
+ *
+ * Call convention:
+ *
+ * syscall register convention is in Documentation/arch/powerpc/syscall64-abi.rst
+ */
+EXC_VIRT_BEGIN(system_call_vectored, 0x3000, 0x1000)
+ /* SCV 0 */
+ mr r9,r13
+ GET_PACA(r13)
+ mflr r11
+ mfctr r12
+ li r10,IRQS_ALL_DISABLED
+ stb r10,PACAIRQSOFTMASK(r13)
+#ifdef CONFIG_RELOCATABLE
+ b system_call_vectored_tramp
+#else
+ b system_call_vectored_common
+#endif
+ nop
+
+ /* SCV 1 - 127 */
+ .rept 127
+ mr r9,r13
+ GET_PACA(r13)
+ mflr r11
+ mfctr r12
+ li r10,IRQS_ALL_DISABLED
+ stb r10,PACAIRQSOFTMASK(r13)
+ li r0,-1 /* cause failure */
+#ifdef CONFIG_RELOCATABLE
+ b system_call_vectored_sigill_tramp
+#else
+ b system_call_vectored_sigill
+#endif
+ .endr
+EXC_VIRT_END(system_call_vectored, 0x3000, 0x1000)
+
+// Treat scv vectors as soft-masked, see comment above.
+// Use absolute values rather than labels here, so they don't get relocated,
+// because this code runs unrelocated.
+SOFT_MASK_TABLE(0xc000000000003000, 0xc000000000004000)
+
+#ifdef CONFIG_RELOCATABLE
+TRAMP_VIRT_BEGIN(system_call_vectored_tramp)
+ __LOAD_HANDLER(r10, system_call_vectored_common, virt_trampolines)
+ mtctr r10
+ bctr
+
+TRAMP_VIRT_BEGIN(system_call_vectored_sigill_tramp)
+ __LOAD_HANDLER(r10, system_call_vectored_sigill, virt_trampolines)
+ mtctr r10
+ bctr
+#endif
- . = 0xe80
-hv_doorbell_trampoline:
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- b h_doorbell_hv
- /* We need to deal with the Altivec unavailable exception
- * here which is at 0xf20, thus in the middle of the
- * prolog code of the PerformanceMonitor one. A little
- * trickery is thus necessary
+/* No virt vectors corresponding with 0x0..0x100 */
+EXC_VIRT_NONE(0x4000, 0x100)
+
+
+/**
+ * Interrupt 0x100 - System Reset Interrupt (SRESET aka NMI).
+ * This is a non-maskable, asynchronous interrupt always taken in real-mode.
+ * It is caused by:
+ * - Wake from power-saving state, on powernv.
+ * - An NMI from another CPU, triggered by firmware or hypercall.
+ * - As crash/debug signal injected from BMC, firmware or hypervisor.
+ *
+ * Handling:
+ * Power-save wakeup is the only performance critical path, so this is
+ * determined quickly as possible first. In this case volatile registers
+ * can be discarded and SPRs like CFAR don't need to be read.
+ *
+ * If not a powersave wakeup, then it's run as a regular interrupt, however
+ * it uses its own stack and PACA save area to preserve the regular kernel
+ * environment for debugging.
+ *
+ * This interrupt is not maskable, so triggering it when MSR[RI] is clear,
+ * or SCRATCH0 is in use, etc. may cause a crash. It's also not entirely
+ * correct to switch to virtual mode to run the regular interrupt handler
+ * because it might be interrupted when the MMU is in a bad state (e.g., SLB
+ * is clear).
+ *
+ * FWNMI:
+ * PAPR specifies a "fwnmi" facility which sends the sreset to a different
+ * entry point with a different register set up. Some hypervisors will
+ * send the sreset to 0x100 in the guest if it is not fwnmi capable.
+ *
+ * KVM:
+ * Unlike most SRR interrupts, this may be taken by the host while executing
+ * in a guest, so a KVM test is required. KVM will pull the CPU out of guest
+ * mode and then raise the sreset.
+ */
+INT_DEFINE_BEGIN(system_reset)
+ IVEC=0x100
+ IAREA=PACA_EXNMI
+ IVIRT=0 /* no virt entry point */
+ ISTACK=0
+ IKVM_REAL=1
+INT_DEFINE_END(system_reset)
+
+EXC_REAL_BEGIN(system_reset, 0x100, 0x100)
+#ifdef CONFIG_PPC_P7_NAP
+ /*
+ * If running native on arch 2.06 or later, check if we are waking up
+ * from nap/sleep/winkle, and branch to idle handler. This tests SRR1
+ * bits 46:47. A non-0 value indicates that we are coming from a power
+ * saving state. The idle wakeup handler initially runs in real mode,
+ * but we branch to the 0xc000... address so we can turn on relocation
+ * with mtmsrd later, after SPRs are restored.
+ *
+ * Careful to minimise cost for the fast path (idle wakeup) while
+ * also avoiding clobbering CFAR for the debug path (non-idle).
+ *
+ * For the idle wake case volatile registers can be clobbered, which
+ * is why we use those initially. If it turns out to not be an idle
+ * wake, carefully put everything back the way it was, so we can use
+ * common exception macros to handle it.
*/
- . = 0xf00
-performance_monitor_pseries_trampoline:
+BEGIN_FTR_SECTION
SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- b performance_monitor_pSeries
+ GET_PACA(r13)
+ std r3,PACA_EXNMI+0*8(r13)
+ std r4,PACA_EXNMI+1*8(r13)
+ std r5,PACA_EXNMI+2*8(r13)
+ mfspr r3,SPRN_SRR1
+ mfocrf r4,0x80
+ rlwinm. r5,r3,47-31,30,31
+ bne+ system_reset_idle_wake
+ /* Not powersave wakeup. Restore regs for regular interrupt handler. */
+ mtocrf 0x80,r4
+ ld r3,PACA_EXNMI+0*8(r13)
+ ld r4,PACA_EXNMI+1*8(r13)
+ ld r5,PACA_EXNMI+2*8(r13)
+ GET_SCRATCH0(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+#endif
- . = 0xf20
-altivec_unavailable_pseries_trampoline:
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- b altivec_unavailable_pSeries
+ GEN_INT_ENTRY system_reset, virt=0
+ /*
+ * In theory, we should not enable relocation here if it was disabled
+ * in SRR1, because the MMU may not be configured to support it (e.g.,
+ * SLB may have been cleared). In practice, there should only be a few
+ * small windows where that's the case, and sreset is considered to
+ * be dangerous anyway.
+ */
+EXC_REAL_END(system_reset, 0x100, 0x100)
+EXC_VIRT_NONE(0x4100, 0x100)
- . = 0xf40
-vsx_unavailable_pseries_trampoline:
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- b vsx_unavailable_pSeries
+#ifdef CONFIG_PPC_P7_NAP
+TRAMP_REAL_BEGIN(system_reset_idle_wake)
+ /* We are waking up from idle, so may clobber any volatile register */
+ cmpwi cr1,r5,2
+ bltlr cr1 /* no state loss, return to idle caller with r3=SRR1 */
+ __LOAD_FAR_HANDLER(r12, DOTSYM(idle_return_gpr_loss), real_trampolines)
+ mtctr r12
+ bctr
+#endif
- . = 0xf60
-facility_unavailable_trampoline:
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- b facility_unavailable_pSeries
+#ifdef CONFIG_PPC_PSERIES
+/*
+ * Vectors for the FWNMI option. Share common code.
+ */
+TRAMP_REAL_BEGIN(system_reset_fwnmi)
+ GEN_INT_ENTRY system_reset, virt=0
- . = 0xf80
-hv_facility_unavailable_trampoline:
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- b facility_unavailable_hv
+#endif /* CONFIG_PPC_PSERIES */
-#ifdef CONFIG_CBE_RAS
- STD_EXCEPTION_HV(0x1200, 0x1202, cbe_system_error)
- KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1202)
-#endif /* CONFIG_CBE_RAS */
+EXC_COMMON_BEGIN(system_reset_common)
+ __GEN_COMMON_ENTRY system_reset
+ /*
+ * Increment paca->in_nmi. When the interrupt entry wrapper later
+ * enable MSR_RI, then SLB or MCE will be able to recover, but a nested
+ * NMI will notice in_nmi and not recover because of the use of the NMI
+ * stack. in_nmi reentrancy is tested in system_reset_exception.
+ */
+ lhz r10,PACA_IN_NMI(r13)
+ addi r10,r10,1
+ sth r10,PACA_IN_NMI(r13)
- STD_EXCEPTION_PSERIES(0x1300, 0x1300, instruction_breakpoint)
- KVM_HANDLER_PR_SKIP(PACA_EXGEN, EXC_STD, 0x1300)
+ mr r10,r1
+ ld r1,PACA_NMI_EMERG_SP(r13)
+ subi r1,r1,INT_FRAME_SIZE
+ __GEN_COMMON_BODY system_reset
- . = 0x1500
- .global denorm_exception_hv
-denorm_exception_hv:
- HMT_MEDIUM_PPR_DISCARD
- mtspr SPRN_SPRG_HSCRATCH0,r13
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0x1500)
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(system_reset_exception)
-#ifdef CONFIG_PPC_DENORMALISATION
- mfspr r10,SPRN_HSRR1
- mfspr r11,SPRN_HSRR0 /* save HSRR0 */
- andis. r10,r10,(HSRR1_DENORM)@h /* denorm? */
- addi r11,r11,-4 /* HSRR0 is next instruction */
- bne+ denorm_assist
-#endif
+ /* Clear MSR_RI before setting SRR0 and SRR1. */
+ li r9,0
+ mtmsrd r9,1
- KVMTEST(0x1500)
- EXCEPTION_PROLOG_PSERIES_1(denorm_common, EXC_HV)
- KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x1500)
+ /*
+ * MSR_RI is clear, now we can decrement paca->in_nmi.
+ */
+ lhz r10,PACA_IN_NMI(r13)
+ subi r10,r10,1
+ sth r10,PACA_IN_NMI(r13)
-#ifdef CONFIG_CBE_RAS
- STD_EXCEPTION_HV(0x1600, 0x1602, cbe_maintenance)
- KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1602)
-#endif /* CONFIG_CBE_RAS */
+ kuap_kernel_restore r9, r10
+ EXCEPTION_RESTORE_REGS
+ RFI_TO_USER_OR_KERNEL
- STD_EXCEPTION_PSERIES(0x1700, 0x1700, altivec_assist)
- KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x1700)
-#ifdef CONFIG_CBE_RAS
- STD_EXCEPTION_HV(0x1800, 0x1802, cbe_thermal)
- KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1802)
-#else
- . = 0x1800
-#endif /* CONFIG_CBE_RAS */
+/**
+ * Interrupt 0x200 - Machine Check Interrupt (MCE).
+ * This is a non-maskable interrupt always taken in real-mode. It can be
+ * synchronous or asynchronous, caused by hardware or software, and it may be
+ * taken in a power-saving state.
+ *
+ * Handling:
+ * Similarly to system reset, this uses its own stack and PACA save area,
+ * the difference is re-entrancy is allowed on the machine check stack.
+ *
+ * machine_check_early is run in real mode, and carefully decodes the
+ * machine check and tries to handle it (e.g., flush the SLB if there was an
+ * error detected there), determines if it was recoverable and logs the
+ * event.
+ *
+ * This early code does not "reconcile" irq soft-mask state like SRESET or
+ * regular interrupts do, so irqs_disabled() among other things may not work
+ * properly (irq disable/enable already doesn't work because irq tracing can
+ * not work in real mode).
+ *
+ * Then, depending on the execution context when the interrupt is taken, there
+ * are 3 main actions:
+ * - Executing in kernel mode. The event is queued with irq_work, which means
+ * it is handled when it is next safe to do so (i.e., the kernel has enabled
+ * interrupts), which could be immediately when the interrupt returns. This
+ * avoids nasty issues like switching to virtual mode when the MMU is in a
+ * bad state, or when executing OPAL code. (SRESET is exposed to such issues,
+ * but it has different priorities). Check to see if the CPU was in power
+ * save, and return via the wake up code if it was.
+ *
+ * - Executing in user mode. machine_check_exception is run like a normal
+ * interrupt handler, which processes the data generated by the early handler.
+ *
+ * - Executing in guest mode. The interrupt is run with its KVM test, and
+ * branches to KVM to deal with. KVM may queue the event for the host
+ * to report later.
+ *
+ * This interrupt is not maskable, so if it triggers when MSR[RI] is clear,
+ * or SCRATCH0 is in use, it may cause a crash.
+ *
+ * KVM:
+ * See SRESET.
+ */
+INT_DEFINE_BEGIN(machine_check_early)
+ IVEC=0x200
+ IAREA=PACA_EXMC
+ IVIRT=0 /* no virt entry point */
+ IREALMODE_COMMON=1
+ ISTACK=0
+ IDAR=1
+ IDSISR=1
+ IKUAP=0 /* We don't touch AMR here, we never go to virtual mode */
+INT_DEFINE_END(machine_check_early)
+
+INT_DEFINE_BEGIN(machine_check)
+ IVEC=0x200
+ IAREA=PACA_EXMC
+ IVIRT=0 /* no virt entry point */
+ IDAR=1
+ IDSISR=1
+ IKVM_REAL=1
+INT_DEFINE_END(machine_check)
+
+EXC_REAL_BEGIN(machine_check, 0x200, 0x100)
+ EARLY_BOOT_FIXUP
+ GEN_INT_ENTRY machine_check_early, virt=0
+EXC_REAL_END(machine_check, 0x200, 0x100)
+EXC_VIRT_NONE(0x4200, 0x100)
+#ifdef CONFIG_PPC_PSERIES
+TRAMP_REAL_BEGIN(machine_check_fwnmi)
+ /* See comment at machine_check exception, don't turn on RI */
+ GEN_INT_ENTRY machine_check_early, virt=0
+#endif
-/*** Out of line interrupts support ***/
+#define MACHINE_CHECK_HANDLER_WINDUP \
+ /* Clear MSR_RI before setting SRR0 and SRR1. */\
+ li r9,0; \
+ mtmsrd r9,1; /* Clear MSR_RI */ \
+ /* Decrement paca->in_mce now RI is clear. */ \
+ lhz r12,PACA_IN_MCE(r13); \
+ subi r12,r12,1; \
+ sth r12,PACA_IN_MCE(r13); \
+ EXCEPTION_RESTORE_REGS
+
+EXC_COMMON_BEGIN(machine_check_early_common)
+ __GEN_REALMODE_COMMON_ENTRY machine_check_early
- .align 7
- /* moved from 0x200 */
-machine_check_pSeries_early:
-BEGIN_FTR_SECTION
- EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
/*
- * Register contents:
- * R13 = PACA
- * R9 = CR
- * Original R9 to R13 is saved on PACA_EXMC
- *
* Switch to mc_emergency stack and handle re-entrancy (we limit
* the nested MCE upto level 4 to avoid stack overflow).
* Save MCE registers srr1, srr0, dar and dsisr and then set ME=1
@@ -450,710 +1197,1201 @@ BEGIN_FTR_SECTION
* NOTE: We are here with MSR_ME=0 (off), which means we risk a
* checkstop if we get another machine check exception before we do
* rfid with MSR_ME=1.
+ *
+ * This interrupt can wake directly from idle. If that is the case,
+ * the machine check is handled then the idle wakeup code is called
+ * to restore state.
*/
- mr r11,r1 /* Save r1 */
lhz r10,PACA_IN_MCE(r13)
cmpwi r10,0 /* Are we in nested machine check */
- bne 0f /* Yes, we are. */
- /* First machine check entry */
- ld r1,PACAMCEMERGSP(r13) /* Use MC emergency stack */
-0: subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
+ cmpwi cr1,r10,MAX_MCE_DEPTH /* Are we at maximum nesting */
addi r10,r10,1 /* increment paca->in_mce */
sth r10,PACA_IN_MCE(r13)
- /* Limit nested MCE to level 4 to avoid stack overflow */
- cmpwi r10,4
- bgt 2f /* Check if we hit limit of 4 */
- std r11,GPR1(r1) /* Save r1 on the stack. */
- std r11,0(r1) /* make stack chain pointer */
- mfspr r11,SPRN_SRR0 /* Save SRR0 */
- std r11,_NIP(r1)
- mfspr r11,SPRN_SRR1 /* Save SRR1 */
- std r11,_MSR(r1)
- mfspr r11,SPRN_DAR /* Save DAR */
- std r11,_DAR(r1)
- mfspr r11,SPRN_DSISR /* Save DSISR */
- std r11,_DSISR(r1)
- std r9,_CCR(r1) /* Save CR in stackframe */
- /* Save r9 through r13 from EXMC save area to stack frame. */
- EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
- mfmsr r11 /* get MSR value */
- ori r11,r11,MSR_ME /* turn on ME bit */
- ori r11,r11,MSR_RI /* turn on RI bit */
- ld r12,PACAKBASE(r13) /* get high part of &label */
- LOAD_HANDLER(r12, machine_check_handle_early)
-1: mtspr SPRN_SRR0,r12
- mtspr SPRN_SRR1,r11
- rfid
- b . /* prevent speculative execution */
-2:
- /* Stack overflow. Stay on emergency stack and panic.
- * Keep the ME bit off while panic-ing, so that if we hit
- * another machine check we checkstop.
- */
- addi r1,r1,INT_FRAME_SIZE /* go back to previous stack frame */
- ld r11,PACAKMSR(r13)
- ld r12,PACAKBASE(r13)
- LOAD_HANDLER(r12, unrecover_mce)
- li r10,MSR_ME
- andc r11,r11,r10 /* Turn off MSR_ME */
- b 1b
- b . /* prevent speculative execution */
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
-machine_check_pSeries:
- .globl machine_check_fwnmi
-machine_check_fwnmi:
- HMT_MEDIUM_PPR_DISCARD
- SET_SCRATCH0(r13) /* save r13 */
- EXCEPTION_PROLOG_0(PACA_EXMC)
-machine_check_pSeries_0:
- EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST, 0x200)
- EXCEPTION_PROLOG_PSERIES_1(machine_check_common, EXC_STD)
- KVM_HANDLER_SKIP(PACA_EXMC, EXC_STD, 0x200)
- KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x300)
- KVM_HANDLER_SKIP(PACA_EXSLB, EXC_STD, 0x380)
- KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x400)
- KVM_HANDLER_PR(PACA_EXSLB, EXC_STD, 0x480)
- KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x900)
- KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x982)
+ mr r10,r1 /* Save r1 */
+ bne 1f
+ /* First machine check entry */
+ ld r1,PACAMCEMERGSP(r13) /* Use MC emergency stack */
+1: /* Limit nested MCE to level 4 to avoid stack overflow */
+ bgt cr1,unrecoverable_mce /* Check if we hit limit of 4 */
+ subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
+
+ __GEN_COMMON_BODY machine_check_early
-#ifdef CONFIG_PPC_DENORMALISATION
-denorm_assist:
BEGIN_FTR_SECTION
-/*
- * To denormalise we need to move a copy of the register to itself.
- * For POWER6 do that here for all FP regs.
- */
- mfmsr r10
- ori r10,r10,(MSR_FP|MSR_FE0|MSR_FE1)
- xori r10,r10,(MSR_FE0|MSR_FE1)
- mtmsrd r10
- sync
+ bl enable_machine_check
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
+ addi r3,r1,STACK_INT_FRAME_REGS
+BEGIN_FTR_SECTION
+ bl CFUNC(machine_check_early_boot)
+END_FTR_SECTION(0, 1) // nop out after boot
+ bl CFUNC(machine_check_early)
+ std r3,RESULT(r1) /* Save result */
+ ld r12,_MSR(r1)
-#define FMR2(n) fmr (n), (n) ; fmr n+1, n+1
-#define FMR4(n) FMR2(n) ; FMR2(n+2)
-#define FMR8(n) FMR4(n) ; FMR4(n+4)
-#define FMR16(n) FMR8(n) ; FMR8(n+8)
-#define FMR32(n) FMR16(n) ; FMR16(n+16)
- FMR32(0)
+#ifdef CONFIG_PPC_P7_NAP
+ /*
+ * Check if thread was in power saving mode. We come here when any
+ * of the following is true:
+ * a. thread wasn't in power saving mode
+ * b. thread was in power saving mode with no state loss,
+ * supervisor state loss or hypervisor state loss.
+ *
+ * Go back to nap/sleep/winkle mode again if (b) is true.
+ */
+BEGIN_FTR_SECTION
+ rlwinm. r11,r12,47-31,30,31
+ bne machine_check_idle_common
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+#endif
-FTR_SECTION_ELSE
-/*
- * To denormalise we need to move a copy of the register to itself.
- * For POWER7 do that here for the first 32 VSX registers only.
- */
- mfmsr r10
- oris r10,r10,MSR_VSX@h
- mtmsrd r10
- sync
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+ /*
+ * Check if we are coming from guest. If yes, then run the normal
+ * exception handler which will take the
+ * machine_check_kvm->kvm_interrupt branch to deliver the MC event
+ * to guest.
+ */
+ lbz r11,HSTATE_IN_GUEST(r13)
+ cmpwi r11,0 /* Check if coming from guest */
+ bne mce_deliver /* continue if we are. */
+#endif
-#define XVCPSGNDP2(n) XVCPSGNDP(n,n,n) ; XVCPSGNDP(n+1,n+1,n+1)
-#define XVCPSGNDP4(n) XVCPSGNDP2(n) ; XVCPSGNDP2(n+2)
-#define XVCPSGNDP8(n) XVCPSGNDP4(n) ; XVCPSGNDP4(n+4)
-#define XVCPSGNDP16(n) XVCPSGNDP8(n) ; XVCPSGNDP8(n+8)
-#define XVCPSGNDP32(n) XVCPSGNDP16(n) ; XVCPSGNDP16(n+16)
- XVCPSGNDP32(0)
+ /*
+ * Check if we are coming from userspace. If yes, then run the normal
+ * exception handler which will deliver the MC event to this kernel.
+ */
+ andi. r11,r12,MSR_PR /* See if coming from user. */
+ bne mce_deliver /* continue in V mode if we are. */
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_206)
+ /*
+ * At this point we are coming from kernel context.
+ * Queue up the MCE event and return from the interrupt.
+ * But before that, check if this is an un-recoverable exception.
+ * If yes, then stay on emergency stack and panic.
+ */
+ andi. r11,r12,MSR_RI
+ beq unrecoverable_mce
+ /*
+ * Check if we have successfully handled/recovered from error, if not
+ * then stay on emergency stack and panic.
+ */
+ ld r3,RESULT(r1) /* Load result */
+ cmpdi r3,0 /* see if we handled MCE successfully */
+ beq unrecoverable_mce /* if !handled then panic */
+
+ /*
+ * Return from MC interrupt.
+ * Queue up the MCE event so that we can log it later, while
+ * returning from kernel or opal call.
+ */
+ bl CFUNC(machine_check_queue_event)
+ MACHINE_CHECK_HANDLER_WINDUP
+ RFI_TO_KERNEL
+
+mce_deliver:
+ /*
+ * This is a host user or guest MCE. Restore all registers, then
+ * run the "late" handler. For host user, this will run the
+ * machine_check_exception handler in virtual mode like a normal
+ * interrupt handler. For guest, this will trigger the KVM test
+ * and branch to the KVM interrupt similarly to other interrupts.
+ */
BEGIN_FTR_SECTION
- b denorm_done
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
-/*
- * To denormalise we need to move a copy of the register to itself.
- * For POWER8 we need to do that for all 64 VSX registers
- */
- XVCPSGNDP32(32)
-denorm_done:
- mtspr SPRN_HSRR0,r11
- mtcrf 0x80,r9
- ld r9,PACA_EXGEN+EX_R9(r13)
- RESTORE_PPR_PACA(PACA_EXGEN, r10)
-BEGIN_FTR_SECTION
- ld r10,PACA_EXGEN+EX_CFAR(r13)
+ ld r10,ORIG_GPR3(r1)
mtspr SPRN_CFAR,r10
END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
- ld r10,PACA_EXGEN+EX_R10(r13)
- ld r11,PACA_EXGEN+EX_R11(r13)
- ld r12,PACA_EXGEN+EX_R12(r13)
- ld r13,PACA_EXGEN+EX_R13(r13)
- HRFID
- b .
-#endif
+ MACHINE_CHECK_HANDLER_WINDUP
+ GEN_INT_ENTRY machine_check, virt=0
- .align 7
- /* moved from 0xe00 */
- STD_EXCEPTION_HV_OOL(0xe02, h_data_storage)
- KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0xe02)
- STD_EXCEPTION_HV_OOL(0xe22, h_instr_storage)
- KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe22)
- STD_EXCEPTION_HV_OOL(0xe42, emulation_assist)
- KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe42)
- MASKABLE_EXCEPTION_HV_OOL(0xe62, hmi_exception)
- KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe62)
-
- MASKABLE_EXCEPTION_HV_OOL(0xe82, h_doorbell)
- KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe82)
-
- /* moved from 0xf00 */
- STD_EXCEPTION_PSERIES_OOL(0xf00, performance_monitor)
- KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf00)
- STD_EXCEPTION_PSERIES_OOL(0xf20, altivec_unavailable)
- KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf20)
- STD_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable)
- KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf40)
- STD_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable)
- KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf60)
- STD_EXCEPTION_HV_OOL(0xf82, facility_unavailable)
- KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xf82)
+EXC_COMMON_BEGIN(machine_check_common)
+ /*
+ * Machine check is different because we use a different
+ * save area: PACA_EXMC instead of PACA_EXGEN.
+ */
+ GEN_COMMON machine_check
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(machine_check_exception_async)
+ b interrupt_return_srr
-/*
- * An interrupt came in while soft-disabled. We set paca->irq_happened, then:
- * - If it was a decrementer interrupt, we bump the dec to max and and return.
- * - If it was a doorbell we return immediately since doorbells are edge
- * triggered and won't automatically refire.
- * - If it was a HMI we return immediately since we handled it in realmode
- * and it won't refire.
- * - else we hard disable and return.
- * This is called with r10 containing the value to OR to the paca field.
- */
-#define MASKED_INTERRUPT(_H) \
-masked_##_H##interrupt: \
- std r11,PACA_EXGEN+EX_R11(r13); \
- lbz r11,PACAIRQHAPPENED(r13); \
- or r11,r11,r10; \
- stb r11,PACAIRQHAPPENED(r13); \
- cmpwi r10,PACA_IRQ_DEC; \
- bne 1f; \
- lis r10,0x7fff; \
- ori r10,r10,0xffff; \
- mtspr SPRN_DEC,r10; \
- b 2f; \
-1: cmpwi r10,PACA_IRQ_DBELL; \
- beq 2f; \
- cmpwi r10,PACA_IRQ_HMI; \
- beq 2f; \
- mfspr r10,SPRN_##_H##SRR1; \
- rldicl r10,r10,48,1; /* clear MSR_EE */ \
- rotldi r10,r10,16; \
- mtspr SPRN_##_H##SRR1,r10; \
-2: mtcrf 0x80,r9; \
- ld r9,PACA_EXGEN+EX_R9(r13); \
- ld r10,PACA_EXGEN+EX_R10(r13); \
- ld r11,PACA_EXGEN+EX_R11(r13); \
- GET_SCRATCH0(r13); \
- ##_H##rfid; \
- b .
-
- MASKED_INTERRUPT()
- MASKED_INTERRUPT(H)
+#ifdef CONFIG_PPC_P7_NAP
/*
- * Called from arch_local_irq_enable when an interrupt needs
- * to be resent. r3 contains 0x500, 0x900, 0xa00 or 0xe80 to indicate
- * which kind of interrupt. MSR:EE is already off. We generate a
- * stackframe like if a real interrupt had happened.
- *
- * Note: While MSR:EE is off, we need to make sure that _MSR
- * in the generated frame has EE set to 1 or the exception
- * handler will not properly re-enable them.
+ * This is an idle wakeup. Low level machine check has already been
+ * done. Queue the event then call the idle code to do the wake up.
*/
-_GLOBAL(__replay_interrupt)
- /* We are going to jump to the exception common code which
- * will retrieve various register values from the PACA which
- * we don't give a damn about, so we don't bother storing them.
+EXC_COMMON_BEGIN(machine_check_idle_common)
+ bl CFUNC(machine_check_queue_event)
+
+ /*
+ * GPR-loss wakeups are relatively straightforward, because the
+ * idle sleep code has saved all non-volatile registers on its
+ * own stack, and r1 in PACAR1.
+ *
+ * For no-loss wakeups the r1 and lr registers used by the
+ * early machine check handler have to be restored first. r2 is
+ * the kernel TOC, so no need to restore it.
+ *
+ * Then decrement MCE nesting after finishing with the stack.
*/
- mfmsr r12
- mflr r11
- mfcr r9
- ori r12,r12,MSR_EE
- cmpwi r3,0x900
- beq decrementer_common
- cmpwi r3,0x500
- beq hardware_interrupt_common
-BEGIN_FTR_SECTION
- cmpwi r3,0xe80
- beq h_doorbell_common
-FTR_SECTION_ELSE
- cmpwi r3,0xa00
- beq doorbell_super_common
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
- blr
+ ld r3,_MSR(r1)
+ ld r4,_LINK(r1)
+ ld r1,GPR1(r1)
-#ifdef CONFIG_PPC_PSERIES
-/*
- * Vectors for the FWNMI option. Share common code.
- */
- .globl system_reset_fwnmi
- .align 7
-system_reset_fwnmi:
- HMT_MEDIUM_PPR_DISCARD
- SET_SCRATCH0(r13) /* save r13 */
- EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD,
- NOTEST, 0x100)
+ lhz r11,PACA_IN_MCE(r13)
+ subi r11,r11,1
+ sth r11,PACA_IN_MCE(r13)
-#endif /* CONFIG_PPC_PSERIES */
+ mtlr r4
+ rlwinm r10,r3,47-31,30,31
+ cmpwi cr1,r10,2
+ bltlr cr1 /* no state loss, return to idle caller with r3=SRR1 */
+ b idle_return_gpr_loss
+#endif
-#ifdef __DISABLED__
-/*
- * This is used for when the SLB miss handler has to go virtual,
- * which doesn't happen for now anymore but will once we re-implement
- * dynamic VSIDs for shared page tables
- */
-slb_miss_user_pseries:
- std r10,PACA_EXGEN+EX_R10(r13)
- std r11,PACA_EXGEN+EX_R11(r13)
- std r12,PACA_EXGEN+EX_R12(r13)
- GET_SCRATCH0(r10)
- ld r11,PACA_EXSLB+EX_R9(r13)
- ld r12,PACA_EXSLB+EX_R3(r13)
- std r10,PACA_EXGEN+EX_R13(r13)
- std r11,PACA_EXGEN+EX_R9(r13)
- std r12,PACA_EXGEN+EX_R3(r13)
- clrrdi r12,r13,32
- mfmsr r10
- mfspr r11,SRR0 /* save SRR0 */
- ori r12,r12,slb_miss_user_common@l /* virt addr of handler */
- ori r10,r10,MSR_IR|MSR_DR|MSR_RI
- mtspr SRR0,r12
- mfspr r12,SRR1 /* and SRR1 */
- mtspr SRR1,r10
- rfid
- b . /* prevent spec. execution */
-#endif /* __DISABLED__ */
+EXC_COMMON_BEGIN(unrecoverable_mce)
+ /*
+ * We are going down. But there are chances that we might get hit by
+ * another MCE during panic path and we may run into unstable state
+ * with no way out. Hence, turn ME bit off while going down, so that
+ * when another MCE is hit during panic path, system will checkstop
+ * and hypervisor will get restarted cleanly by SP.
+ */
+BEGIN_FTR_SECTION
+ li r10,0 /* clear MSR_RI */
+ mtmsrd r10,1
+ bl CFUNC(disable_machine_check)
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
+ ld r10,PACAKMSR(r13)
+ li r3,MSR_ME
+ andc r10,r10,r3
+ mtmsrd r10
+
+ lhz r12,PACA_IN_MCE(r13)
+ subi r12,r12,1
+ sth r12,PACA_IN_MCE(r13)
-#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-kvmppc_skip_interrupt:
/*
- * Here all GPRs are unchanged from when the interrupt happened
- * except for r13, which is saved in SPRG_SCRATCH0.
+ * Invoke machine_check_exception to print MCE event and panic.
+ * This is the NMI version of the handler because we are called from
+ * the early handler which is a true NMI.
*/
- mfspr r13, SPRN_SRR0
- addi r13, r13, 4
- mtspr SPRN_SRR0, r13
- GET_SCRATCH0(r13)
- rfid
- b .
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(machine_check_exception)
-kvmppc_skip_Hinterrupt:
/*
- * Here all GPRs are unchanged from when the interrupt happened
- * except for r13, which is saved in SPRG_SCRATCH0.
+ * We will not reach here. Even if we did, there is no way out.
+ * Call unrecoverable_exception and die.
*/
- mfspr r13, SPRN_HSRR0
- addi r13, r13, 4
- mtspr SPRN_HSRR0, r13
- GET_SCRATCH0(r13)
- hrfid
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(unrecoverable_exception)
b .
-#endif
-/*
- * Code from here down to __end_handlers is invoked from the
- * exception prologs above. Because the prologs assemble the
- * addresses of these handlers using the LOAD_HANDLER macro,
- * which uses an ori instruction, these handlers must be in
- * the first 64k of the kernel image.
+
+/**
+ * Interrupt 0x300 - Data Storage Interrupt (DSI).
+ * This is a synchronous interrupt generated due to a data access exception,
+ * e.g., a load orstore which does not have a valid page table entry with
+ * permissions. DAWR matches also fault here, as do RC updates, and minor misc
+ * errors e.g., copy/paste, AMO, certain invalid CI accesses, etc.
+ *
+ * Handling:
+ * - Hash MMU
+ * Go to do_hash_fault, which attempts to fill the HPT from an entry in the
+ * Linux page table. Hash faults can hit in kernel mode in a fairly
+ * arbitrary state (e.g., interrupts disabled, locks held) when accessing
+ * "non-bolted" regions, e.g., vmalloc space. However these should always be
+ * backed by Linux page table entries.
+ *
+ * If no entry is found the Linux page fault handler is invoked (by
+ * do_hash_fault). Linux page faults can happen in kernel mode due to user
+ * copy operations of course.
+ *
+ * KVM: The KVM HDSI handler may perform a load with MSR[DR]=1 in guest
+ * MMU context, which may cause a DSI in the host, which must go to the
+ * KVM handler. MSR[IR] is not enabled, so the real-mode handler will
+ * always be used regardless of AIL setting.
+ *
+ * - Radix MMU
+ * The hardware loads from the Linux page table directly, so a fault goes
+ * immediately to Linux page fault.
+ *
+ * Conditions like DAWR match are handled on the way in to Linux page fault.
*/
+INT_DEFINE_BEGIN(data_access)
+ IVEC=0x300
+ IDAR=1
+ IDSISR=1
+ IKVM_REAL=1
+INT_DEFINE_END(data_access)
+
+EXC_REAL_BEGIN(data_access, 0x300, 0x80)
+ GEN_INT_ENTRY data_access, virt=0
+EXC_REAL_END(data_access, 0x300, 0x80)
+EXC_VIRT_BEGIN(data_access, 0x4300, 0x80)
+ GEN_INT_ENTRY data_access, virt=1
+EXC_VIRT_END(data_access, 0x4300, 0x80)
+EXC_COMMON_BEGIN(data_access_common)
+ GEN_COMMON data_access
+ ld r4,_DSISR(r1)
+ addi r3,r1,STACK_INT_FRAME_REGS
+ andis. r0,r4,DSISR_DABRMATCH@h
+ bne- 1f
+#ifdef CONFIG_PPC_64S_HASH_MMU
+BEGIN_MMU_FTR_SECTION
+ bl CFUNC(do_hash_fault)
+MMU_FTR_SECTION_ELSE
+ bl CFUNC(do_page_fault)
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
+#else
+ bl CFUNC(do_page_fault)
+#endif
+ b interrupt_return_srr
-/*** Common interrupt handlers ***/
+1: bl CFUNC(do_break)
+ /*
+ * do_break() may have changed the NV GPRS while handling a breakpoint.
+ * If so, we need to restore them with their updated values.
+ */
+ HANDLER_RESTORE_NVGPRS()
+ b interrupt_return_srr
- STD_EXCEPTION_COMMON(0x100, system_reset, system_reset_exception)
- STD_EXCEPTION_COMMON_ASYNC(0x500, hardware_interrupt, do_IRQ)
- STD_EXCEPTION_COMMON_ASYNC(0x900, decrementer, timer_interrupt)
- STD_EXCEPTION_COMMON(0x980, hdecrementer, hdec_interrupt)
-#ifdef CONFIG_PPC_DOORBELL
- STD_EXCEPTION_COMMON_ASYNC(0xa00, doorbell_super, doorbell_exception)
+/**
+ * Interrupt 0x380 - Data Segment Interrupt (DSLB).
+ * This is a synchronous interrupt in response to an MMU fault missing SLB
+ * entry for HPT, or an address outside RPT translation range.
+ *
+ * Handling:
+ * - HPT:
+ * This refills the SLB, or reports an access fault similarly to a bad page
+ * fault. When coming from user-mode, the SLB handler may access any kernel
+ * data, though it may itself take a DSLB. When coming from kernel mode,
+ * recursive faults must be avoided so access is restricted to the kernel
+ * image text/data, kernel stack, and any data allocated below
+ * ppc64_bolted_size (first segment). The kernel handler must avoid stomping
+ * on user-handler data structures.
+ *
+ * KVM: Same as 0x300, DSLB must test for KVM guest.
+ */
+INT_DEFINE_BEGIN(data_access_slb)
+ IVEC=0x380
+ IDAR=1
+ IKVM_REAL=1
+INT_DEFINE_END(data_access_slb)
+
+EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80)
+ GEN_INT_ENTRY data_access_slb, virt=0
+EXC_REAL_END(data_access_slb, 0x380, 0x80)
+EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80)
+ GEN_INT_ENTRY data_access_slb, virt=1
+EXC_VIRT_END(data_access_slb, 0x4380, 0x80)
+EXC_COMMON_BEGIN(data_access_slb_common)
+ GEN_COMMON data_access_slb
+#ifdef CONFIG_PPC_64S_HASH_MMU
+BEGIN_MMU_FTR_SECTION
+ /* HPT case, do SLB fault */
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(do_slb_fault)
+ cmpdi r3,0
+ bne- 1f
+ b fast_interrupt_return_srr
+1: /* Error case */
+MMU_FTR_SECTION_ELSE
+ /* Radix case, access is outside page table range */
+ li r3,-EFAULT
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
#else
- STD_EXCEPTION_COMMON_ASYNC(0xa00, doorbell_super, unknown_exception)
+ li r3,-EFAULT
#endif
- STD_EXCEPTION_COMMON(0xb00, trap_0b, unknown_exception)
- STD_EXCEPTION_COMMON(0xd00, single_step, single_step_exception)
- STD_EXCEPTION_COMMON(0xe00, trap_0e, unknown_exception)
- STD_EXCEPTION_COMMON(0xe40, emulation_assist, emulation_assist_interrupt)
- STD_EXCEPTION_COMMON_ASYNC(0xe60, hmi_exception, handle_hmi_exception)
-#ifdef CONFIG_PPC_DOORBELL
- STD_EXCEPTION_COMMON_ASYNC(0xe80, h_doorbell, doorbell_exception)
-#else
- STD_EXCEPTION_COMMON_ASYNC(0xe80, h_doorbell, unknown_exception)
+ std r3,RESULT(r1)
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(do_bad_segment_interrupt)
+ b interrupt_return_srr
+
+
+/**
+ * Interrupt 0x400 - Instruction Storage Interrupt (ISI).
+ * This is a synchronous interrupt in response to an MMU fault due to an
+ * instruction fetch.
+ *
+ * Handling:
+ * Similar to DSI, though in response to fetch. The faulting address is found
+ * in SRR0 (rather than DAR), and status in SRR1 (rather than DSISR).
+ */
+INT_DEFINE_BEGIN(instruction_access)
+ IVEC=0x400
+ IISIDE=1
+ IDAR=1
+ IDSISR=1
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
#endif
- STD_EXCEPTION_COMMON_ASYNC(0xf00, performance_monitor, performance_monitor_exception)
- STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, instruction_breakpoint_exception)
- STD_EXCEPTION_COMMON(0x1502, denorm, unknown_exception)
-#ifdef CONFIG_ALTIVEC
- STD_EXCEPTION_COMMON(0x1700, altivec_assist, altivec_assist_exception)
+INT_DEFINE_END(instruction_access)
+
+EXC_REAL_BEGIN(instruction_access, 0x400, 0x80)
+ GEN_INT_ENTRY instruction_access, virt=0
+EXC_REAL_END(instruction_access, 0x400, 0x80)
+EXC_VIRT_BEGIN(instruction_access, 0x4400, 0x80)
+ GEN_INT_ENTRY instruction_access, virt=1
+EXC_VIRT_END(instruction_access, 0x4400, 0x80)
+EXC_COMMON_BEGIN(instruction_access_common)
+ GEN_COMMON instruction_access
+ addi r3,r1,STACK_INT_FRAME_REGS
+#ifdef CONFIG_PPC_64S_HASH_MMU
+BEGIN_MMU_FTR_SECTION
+ bl CFUNC(do_hash_fault)
+MMU_FTR_SECTION_ELSE
+ bl CFUNC(do_page_fault)
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
#else
- STD_EXCEPTION_COMMON(0x1700, altivec_assist, unknown_exception)
+ bl CFUNC(do_page_fault)
#endif
-#ifdef CONFIG_CBE_RAS
- STD_EXCEPTION_COMMON(0x1200, cbe_system_error, cbe_system_error_exception)
- STD_EXCEPTION_COMMON(0x1600, cbe_maintenance, cbe_maintenance_exception)
- STD_EXCEPTION_COMMON(0x1800, cbe_thermal, cbe_thermal_exception)
-#endif /* CONFIG_CBE_RAS */
+ b interrupt_return_srr
- /*
- * Relocation-on interrupts: A subset of the interrupts can be delivered
- * with IR=1/DR=1, if AIL==2 and MSR.HV won't be changed by delivering
- * it. Addresses are the same as the original interrupt addresses, but
- * offset by 0xc000000000004000.
- * It's impossible to receive interrupts below 0x300 via this mechanism.
- * KVM: None of these traps are from the guest ; anything that escalated
- * to HV=1 from HV=0 is delivered via real mode handlers.
- */
- /*
- * This uses the standard macro, since the original 0x300 vector
- * only has extra guff for STAB-based processors -- which never
- * come here.
- */
- STD_RELON_EXCEPTION_PSERIES(0x4300, 0x300, data_access)
- . = 0x4380
- .globl data_access_slb_relon_pSeries
-data_access_slb_relon_pSeries:
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXSLB)
- EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x380)
- std r3,PACA_EXSLB+EX_R3(r13)
- mfspr r3,SPRN_DAR
- mfspr r12,SPRN_SRR1
-#ifndef CONFIG_RELOCATABLE
- b slb_miss_realmode
-#else
- /*
- * We can't just use a direct branch to slb_miss_realmode
- * because the distance from here to there depends on where
- * the kernel ends up being put.
- */
- mfctr r11
- ld r10,PACAKBASE(r13)
- LOAD_HANDLER(r10, slb_miss_realmode)
- mtctr r10
- bctr
+/**
+ * Interrupt 0x480 - Instruction Segment Interrupt (ISLB).
+ * This is a synchronous interrupt in response to an MMU fault due to an
+ * instruction fetch.
+ *
+ * Handling:
+ * Similar to DSLB, though in response to fetch. The faulting address is found
+ * in SRR0 (rather than DAR).
+ */
+INT_DEFINE_BEGIN(instruction_access_slb)
+ IVEC=0x480
+ IISIDE=1
+ IDAR=1
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
#endif
-
- STD_RELON_EXCEPTION_PSERIES(0x4400, 0x400, instruction_access)
- . = 0x4480
- .globl instruction_access_slb_relon_pSeries
-instruction_access_slb_relon_pSeries:
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXSLB)
- EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x480)
- std r3,PACA_EXSLB+EX_R3(r13)
- mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
- mfspr r12,SPRN_SRR1
-#ifndef CONFIG_RELOCATABLE
- b slb_miss_realmode
+INT_DEFINE_END(instruction_access_slb)
+
+EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x80)
+ GEN_INT_ENTRY instruction_access_slb, virt=0
+EXC_REAL_END(instruction_access_slb, 0x480, 0x80)
+EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80)
+ GEN_INT_ENTRY instruction_access_slb, virt=1
+EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80)
+EXC_COMMON_BEGIN(instruction_access_slb_common)
+ GEN_COMMON instruction_access_slb
+#ifdef CONFIG_PPC_64S_HASH_MMU
+BEGIN_MMU_FTR_SECTION
+ /* HPT case, do SLB fault */
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(do_slb_fault)
+ cmpdi r3,0
+ bne- 1f
+ b fast_interrupt_return_srr
+1: /* Error case */
+MMU_FTR_SECTION_ELSE
+ /* Radix case, access is outside page table range */
+ li r3,-EFAULT
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
#else
- mfctr r11
- ld r10,PACAKBASE(r13)
- LOAD_HANDLER(r10, slb_miss_realmode)
- mtctr r10
- bctr
+ li r3,-EFAULT
#endif
-
- . = 0x4500
- .globl hardware_interrupt_relon_pSeries;
- .globl hardware_interrupt_relon_hv;
-hardware_interrupt_relon_pSeries:
-hardware_interrupt_relon_hv:
+ std r3,RESULT(r1)
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(do_bad_segment_interrupt)
+ b interrupt_return_srr
+
+
+/**
+ * Interrupt 0x500 - External Interrupt.
+ * This is an asynchronous maskable interrupt in response to an "external
+ * exception" from the interrupt controller or hypervisor (e.g., device
+ * interrupt). It is maskable in hardware by clearing MSR[EE], and
+ * soft-maskable with IRQS_DISABLED mask (i.e., local_irq_disable()).
+ *
+ * When running in HV mode, Linux sets up the LPCR[LPES] bit such that
+ * interrupts are delivered with HSRR registers, guests use SRRs, which
+ * reqiures IHSRR_IF_HVMODE.
+ *
+ * On bare metal POWER9 and later, Linux sets the LPCR[HVICE] bit such that
+ * external interrupts are delivered as Hypervisor Virtualization Interrupts
+ * rather than External Interrupts.
+ *
+ * Handling:
+ * This calls into Linux IRQ handler. NVGPRs are not saved to reduce overhead,
+ * because registers at the time of the interrupt are not so important as it is
+ * asynchronous.
+ *
+ * If soft masked, the masked handler will note the pending interrupt for
+ * replay, and clear MSR[EE] in the interrupted context.
+ *
+ * CFAR is not required because this is an asynchronous interrupt that in
+ * general won't have much bearing on the state of the CPU, with the possible
+ * exception of crash/debug IPIs, but those are generally moving to use SRESET
+ * IPIs. Unless this is an HV interrupt and KVM HV is possible, in which case
+ * it may be exiting the guest and need CFAR to be saved.
+ */
+INT_DEFINE_BEGIN(hardware_interrupt)
+ IVEC=0x500
+ IHSRR_IF_HVMODE=1
+ IMASK=IRQS_DISABLED
+ IKVM_REAL=1
+ IKVM_VIRT=1
+ ICFAR=0
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ ICFAR_IF_HVMODE=1
+#endif
+INT_DEFINE_END(hardware_interrupt)
+
+EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100)
+ GEN_INT_ENTRY hardware_interrupt, virt=0
+EXC_REAL_END(hardware_interrupt, 0x500, 0x100)
+EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100)
+ GEN_INT_ENTRY hardware_interrupt, virt=1
+EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100)
+EXC_COMMON_BEGIN(hardware_interrupt_common)
+ GEN_COMMON hardware_interrupt
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(do_IRQ)
BEGIN_FTR_SECTION
- _MASKABLE_RELON_EXCEPTION_PSERIES(0x502, hardware_interrupt, EXC_HV, SOFTEN_TEST_HV)
+ b interrupt_return_hsrr
FTR_SECTION_ELSE
- _MASKABLE_RELON_EXCEPTION_PSERIES(0x500, hardware_interrupt, EXC_STD, SOFTEN_TEST_PR)
- ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
- STD_RELON_EXCEPTION_PSERIES(0x4600, 0x600, alignment)
- STD_RELON_EXCEPTION_PSERIES(0x4700, 0x700, program_check)
- STD_RELON_EXCEPTION_PSERIES(0x4800, 0x800, fp_unavailable)
- MASKABLE_RELON_EXCEPTION_PSERIES(0x4900, 0x900, decrementer)
- STD_RELON_EXCEPTION_HV(0x4980, 0x982, hdecrementer)
- MASKABLE_RELON_EXCEPTION_PSERIES(0x4a00, 0xa00, doorbell_super)
- STD_RELON_EXCEPTION_PSERIES(0x4b00, 0xb00, trap_0b)
-
- . = 0x4c00
- .globl system_call_relon_pSeries
-system_call_relon_pSeries:
- HMT_MEDIUM
- SYSCALL_PSERIES_1
- SYSCALL_PSERIES_2_DIRECT
- SYSCALL_PSERIES_3
-
- STD_RELON_EXCEPTION_PSERIES(0x4d00, 0xd00, single_step)
+ b interrupt_return_srr
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
- . = 0x4e00
- b . /* Can't happen, see v2.07 Book III-S section 6.5 */
- . = 0x4e20
- b . /* Can't happen, see v2.07 Book III-S section 6.5 */
+/**
+ * Interrupt 0x600 - Alignment Interrupt
+ * This is a synchronous interrupt in response to data alignment fault.
+ */
+INT_DEFINE_BEGIN(alignment)
+ IVEC=0x600
+ IDAR=1
+ IDSISR=1
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(alignment)
+
+EXC_REAL_BEGIN(alignment, 0x600, 0x100)
+ GEN_INT_ENTRY alignment, virt=0
+EXC_REAL_END(alignment, 0x600, 0x100)
+EXC_VIRT_BEGIN(alignment, 0x4600, 0x100)
+ GEN_INT_ENTRY alignment, virt=1
+EXC_VIRT_END(alignment, 0x4600, 0x100)
+EXC_COMMON_BEGIN(alignment_common)
+ GEN_COMMON alignment
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(alignment_exception)
+ HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */
+ b interrupt_return_srr
+
+
+/**
+ * Interrupt 0x700 - Program Interrupt (program check).
+ * This is a synchronous interrupt in response to various instruction faults:
+ * traps, privilege errors, TM errors, floating point exceptions.
+ *
+ * Handling:
+ * This interrupt may use the "emergency stack" in some cases when being taken
+ * from kernel context, which complicates handling.
+ */
+INT_DEFINE_BEGIN(program_check)
+ IVEC=0x700
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(program_check)
+
+EXC_REAL_BEGIN(program_check, 0x700, 0x100)
+ EARLY_BOOT_FIXUP
+ GEN_INT_ENTRY program_check, virt=0
+EXC_REAL_END(program_check, 0x700, 0x100)
+EXC_VIRT_BEGIN(program_check, 0x4700, 0x100)
+ GEN_INT_ENTRY program_check, virt=1
+EXC_VIRT_END(program_check, 0x4700, 0x100)
+EXC_COMMON_BEGIN(program_check_common)
+ __GEN_COMMON_ENTRY program_check
- . = 0x4e40
-emulation_assist_relon_trampoline:
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- b emulation_assist_relon_hv
+ /*
+ * It's possible to receive a TM Bad Thing type program check with
+ * userspace register values (in particular r1), but with SRR1 reporting
+ * that we came from the kernel. Normally that would confuse the bad
+ * stack logic, and we would report a bad kernel stack pointer. Instead
+ * we switch to the emergency stack if we're taking a TM Bad Thing from
+ * the kernel.
+ */
- . = 0x4e60
- b . /* Can't happen, see v2.07 Book III-S section 6.5 */
+ andi. r10,r12,MSR_PR
+ bne .Lnormal_stack /* If userspace, go normal path */
- . = 0x4e80
-h_doorbell_relon_trampoline:
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- b h_doorbell_relon_hv
+ andis. r10,r12,(SRR1_PROGTM)@h
+ bne .Lemergency_stack /* If TM, emergency */
- . = 0x4f00
-performance_monitor_relon_pseries_trampoline:
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- b performance_monitor_relon_pSeries
+ cmpdi r1,-INT_FRAME_SIZE /* check if r1 is in userspace */
+ blt .Lnormal_stack /* normal path if not */
- . = 0x4f20
-altivec_unavailable_relon_pseries_trampoline:
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- b altivec_unavailable_relon_pSeries
+ /* Use the emergency stack */
+.Lemergency_stack:
+ andi. r10,r12,MSR_PR /* Set CR0 correctly for label */
+ /* 3 in EXCEPTION_PROLOG_COMMON */
+ mr r10,r1 /* Save r1 */
+ ld r1,PACAEMERGSP(r13) /* Use emergency stack */
+ subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
+ __ISTACK(program_check)=0
+ __GEN_COMMON_BODY program_check
+ b .Ldo_program_check
- . = 0x4f40
-vsx_unavailable_relon_pseries_trampoline:
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- b vsx_unavailable_relon_pSeries
+.Lnormal_stack:
+ __ISTACK(program_check)=1
+ __GEN_COMMON_BODY program_check
- . = 0x4f60
-facility_unavailable_relon_trampoline:
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- b facility_unavailable_relon_pSeries
+.Ldo_program_check:
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(program_check_exception)
+ HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */
+ b interrupt_return_srr
- . = 0x4f80
-hv_facility_unavailable_relon_trampoline:
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- b hv_facility_unavailable_relon_hv
- STD_RELON_EXCEPTION_PSERIES(0x5300, 0x1300, instruction_breakpoint)
-#ifdef CONFIG_PPC_DENORMALISATION
- . = 0x5500
- b denorm_exception_hv
+/*
+ * Interrupt 0x800 - Floating-Point Unavailable Interrupt.
+ * This is a synchronous interrupt in response to executing an fp instruction
+ * with MSR[FP]=0.
+ *
+ * Handling:
+ * This will load FP registers and enable the FP bit if coming from userspace,
+ * otherwise report a bad kernel use of FP.
+ */
+INT_DEFINE_BEGIN(fp_unavailable)
+ IVEC=0x800
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+ IMSR_R12=1
+INT_DEFINE_END(fp_unavailable)
+
+EXC_REAL_BEGIN(fp_unavailable, 0x800, 0x100)
+ GEN_INT_ENTRY fp_unavailable, virt=0
+EXC_REAL_END(fp_unavailable, 0x800, 0x100)
+EXC_VIRT_BEGIN(fp_unavailable, 0x4800, 0x100)
+ GEN_INT_ENTRY fp_unavailable, virt=1
+EXC_VIRT_END(fp_unavailable, 0x4800, 0x100)
+EXC_COMMON_BEGIN(fp_unavailable_common)
+ GEN_COMMON fp_unavailable
+ bne 1f /* if from user, just load it up */
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(kernel_fp_unavailable_exception)
+0: trap
+ EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0
+1:
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+BEGIN_FTR_SECTION
+ /* Test if 2 TM state bits are zero. If non-zero (ie. userspace was in
+ * transaction), go do TM stuff
+ */
+ rldicl. r0, r12, (64-MSR_TS_LG), (64-2)
+ bne- 2f
+END_FTR_SECTION_IFSET(CPU_FTR_TM)
+#endif
+ bl CFUNC(load_up_fpu)
+ b fast_interrupt_return_srr
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+2: /* User process was in a transaction */
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(fp_unavailable_tm)
+ b interrupt_return_srr
#endif
- STD_RELON_EXCEPTION_PSERIES(0x5700, 0x1700, altivec_assist)
- /* Other future vectors */
- .align 7
- .globl __end_interrupts
-__end_interrupts:
- .align 7
-system_call_entry_direct:
-#if defined(CONFIG_RELOCATABLE)
- /* The first level prologue may have used LR to get here, saving
- * orig in r10. To save hacking/ifdeffing common code, restore here.
- */
- mtlr r10
+/**
+ * Interrupt 0x900 - Decrementer Interrupt.
+ * This is an asynchronous interrupt in response to a decrementer exception
+ * (e.g., DEC has wrapped below zero). It is maskable in hardware by clearing
+ * MSR[EE], and soft-maskable with IRQS_DISABLED mask (i.e.,
+ * local_irq_disable()).
+ *
+ * Handling:
+ * This calls into Linux timer handler. NVGPRs are not saved (see 0x500).
+ *
+ * If soft masked, the masked handler will note the pending interrupt for
+ * replay, and bump the decrementer to a high value, leaving MSR[EE] enabled
+ * in the interrupted context.
+ * If PPC_WATCHDOG is configured, the soft masked handler will actually set
+ * things back up to run soft_nmi_interrupt as a regular interrupt handler
+ * on the emergency stack.
+ *
+ * CFAR is not required because this is asynchronous (see hardware_interrupt).
+ * A watchdog interrupt may like to have CFAR, but usually the interesting
+ * branch is long gone by that point (e.g., infinite loop).
+ */
+INT_DEFINE_BEGIN(decrementer)
+ IVEC=0x900
+ IMASK=IRQS_DISABLED
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
#endif
-system_call_entry:
- b system_call_common
+ ICFAR=0
+INT_DEFINE_END(decrementer)
+
+EXC_REAL_BEGIN(decrementer, 0x900, 0x80)
+ GEN_INT_ENTRY decrementer, virt=0
+EXC_REAL_END(decrementer, 0x900, 0x80)
+EXC_VIRT_BEGIN(decrementer, 0x4900, 0x80)
+ GEN_INT_ENTRY decrementer, virt=1
+EXC_VIRT_END(decrementer, 0x4900, 0x80)
+EXC_COMMON_BEGIN(decrementer_common)
+ GEN_COMMON decrementer
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(timer_interrupt)
+ b interrupt_return_srr
+
+
+/**
+ * Interrupt 0x980 - Hypervisor Decrementer Interrupt.
+ * This is an asynchronous interrupt, similar to 0x900 but for the HDEC
+ * register.
+ *
+ * Handling:
+ * Linux does not use this outside KVM where it's used to keep a host timer
+ * while the guest is given control of DEC. It should normally be caught by
+ * the KVM test and routed there.
+ */
+INT_DEFINE_BEGIN(hdecrementer)
+ IVEC=0x980
+ IHSRR=1
+ ISTACK=0
+ IKVM_REAL=1
+ IKVM_VIRT=1
+INT_DEFINE_END(hdecrementer)
+
+EXC_REAL_BEGIN(hdecrementer, 0x980, 0x80)
+ GEN_INT_ENTRY hdecrementer, virt=0
+EXC_REAL_END(hdecrementer, 0x980, 0x80)
+EXC_VIRT_BEGIN(hdecrementer, 0x4980, 0x80)
+ GEN_INT_ENTRY hdecrementer, virt=1
+EXC_VIRT_END(hdecrementer, 0x4980, 0x80)
+EXC_COMMON_BEGIN(hdecrementer_common)
+ __GEN_COMMON_ENTRY hdecrementer
+ /*
+ * Hypervisor decrementer interrupts not caught by the KVM test
+ * shouldn't occur but are sometimes left pending on exit from a KVM
+ * guest. We don't need to do anything to clear them, as they are
+ * edge-triggered.
+ *
+ * Be careful to avoid touching the kernel stack.
+ */
+ li r10,0
+ stb r10,PACAHSRR_VALID(r13)
+ ld r10,PACA_EXGEN+EX_CTR(r13)
+ mtctr r10
+ mtcrf 0x80,r9
+ ld r9,PACA_EXGEN+EX_R9(r13)
+ ld r10,PACA_EXGEN+EX_R10(r13)
+ ld r11,PACA_EXGEN+EX_R11(r13)
+ ld r12,PACA_EXGEN+EX_R12(r13)
+ ld r13,PACA_EXGEN+EX_R13(r13)
+ HRFI_TO_KERNEL
-ppc64_runlatch_on_trampoline:
- b __ppc64_runlatch_on
-/*
- * Here r13 points to the paca, r9 contains the saved CR,
- * SRR0 and SRR1 are saved in r11 and r12,
- * r9 - r13 are saved in paca->exgen.
+/**
+ * Interrupt 0xa00 - Directed Privileged Doorbell Interrupt.
+ * This is an asynchronous interrupt in response to a msgsndp doorbell.
+ * It is maskable in hardware by clearing MSR[EE], and soft-maskable with
+ * IRQS_DISABLED mask (i.e., local_irq_disable()).
+ *
+ * Handling:
+ * Guests may use this for IPIs between threads in a core if the
+ * hypervisor supports it. NVGPRS are not saved (see 0x500).
+ *
+ * If soft masked, the masked handler will note the pending interrupt for
+ * replay, leaving MSR[EE] enabled in the interrupted context because the
+ * doorbells are edge triggered.
+ *
+ * CFAR is not required, similarly to hardware_interrupt.
*/
- .align 7
- .globl data_access_common
-data_access_common:
- mfspr r10,SPRN_DAR
- std r10,PACA_EXGEN+EX_DAR(r13)
- mfspr r10,SPRN_DSISR
- stw r10,PACA_EXGEN+EX_DSISR(r13)
- EXCEPTION_PROLOG_COMMON(0x300, PACA_EXGEN)
- RECONCILE_IRQ_STATE(r10, r11)
- ld r12,_MSR(r1)
- ld r3,PACA_EXGEN+EX_DAR(r13)
- lwz r4,PACA_EXGEN+EX_DSISR(r13)
- li r5,0x300
- b do_hash_page /* Try to handle as hpte fault */
-
- .align 7
- .globl h_data_storage_common
-h_data_storage_common:
- mfspr r10,SPRN_HDAR
- std r10,PACA_EXGEN+EX_DAR(r13)
- mfspr r10,SPRN_HDSISR
- stw r10,PACA_EXGEN+EX_DSISR(r13)
- EXCEPTION_PROLOG_COMMON(0xe00, PACA_EXGEN)
- bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl unknown_exception
- b ret_from_except
+INT_DEFINE_BEGIN(doorbell_super)
+ IVEC=0xa00
+ IMASK=IRQS_DISABLED
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+ ICFAR=0
+INT_DEFINE_END(doorbell_super)
+
+EXC_REAL_BEGIN(doorbell_super, 0xa00, 0x100)
+ GEN_INT_ENTRY doorbell_super, virt=0
+EXC_REAL_END(doorbell_super, 0xa00, 0x100)
+EXC_VIRT_BEGIN(doorbell_super, 0x4a00, 0x100)
+ GEN_INT_ENTRY doorbell_super, virt=1
+EXC_VIRT_END(doorbell_super, 0x4a00, 0x100)
+EXC_COMMON_BEGIN(doorbell_super_common)
+ GEN_COMMON doorbell_super
+ addi r3,r1,STACK_INT_FRAME_REGS
+#ifdef CONFIG_PPC_DOORBELL
+ bl CFUNC(doorbell_exception)
+#else
+ bl CFUNC(unknown_async_exception)
+#endif
+ b interrupt_return_srr
- .align 7
- .globl instruction_access_common
-instruction_access_common:
- EXCEPTION_PROLOG_COMMON(0x400, PACA_EXGEN)
- RECONCILE_IRQ_STATE(r10, r11)
- ld r12,_MSR(r1)
- ld r3,_NIP(r1)
- andis. r4,r12,0x5820
- li r5,0x400
- b do_hash_page /* Try to handle as hpte fault */
- STD_EXCEPTION_COMMON(0xe20, h_instr_storage, unknown_exception)
+EXC_REAL_NONE(0xb00, 0x100)
+EXC_VIRT_NONE(0x4b00, 0x100)
-/*
- * Here is the common SLB miss user that is used when going to virtual
- * mode for SLB misses, that is currently not used
+/**
+ * Interrupt 0xc00 - System Call Interrupt (syscall, hcall).
+ * This is a synchronous interrupt invoked with the "sc" instruction. The
+ * system call is invoked with "sc 0" and does not alter the HV bit, so it
+ * is directed to the currently running OS. The hypercall is invoked with
+ * "sc 1" and it sets HV=1, so it elevates to hypervisor.
+ *
+ * In HPT, sc 1 always goes to 0xc00 real mode. In RADIX, sc 1 can go to
+ * 0x4c00 virtual mode.
+ *
+ * Handling:
+ * If the KVM test fires then it was due to a hypercall and is accordingly
+ * routed to KVM. Otherwise this executes a normal Linux system call.
+ *
+ * Call convention:
+ *
+ * syscall and hypercalls register conventions are documented in
+ * Documentation/arch/powerpc/syscall64-abi.rst and
+ * Documentation/arch/powerpc/papr_hcalls.rst respectively.
+ *
+ * The intersection of volatile registers that don't contain possible
+ * inputs is: cr0, xer, ctr. We may use these as scratch regs upon entry
+ * without saving, though xer is not a good idea to use, as hardware may
+ * interpret some bits so it may be costly to change them.
*/
-#ifdef __DISABLED__
- .align 7
- .globl slb_miss_user_common
-slb_miss_user_common:
- mflr r10
- std r3,PACA_EXGEN+EX_DAR(r13)
- stw r9,PACA_EXGEN+EX_CCR(r13)
- std r10,PACA_EXGEN+EX_LR(r13)
- std r11,PACA_EXGEN+EX_SRR0(r13)
- bl slb_allocate_user
+INT_DEFINE_BEGIN(system_call)
+ IVEC=0xc00
+ IKVM_REAL=1
+ IKVM_VIRT=1
+ ICFAR=0
+INT_DEFINE_END(system_call)
+
+.macro SYSTEM_CALL virt
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+ /*
+ * There is a little bit of juggling to get syscall and hcall
+ * working well. Save r13 in ctr to avoid using SPRG scratch
+ * register.
+ *
+ * Userspace syscalls have already saved the PPR, hcalls must save
+ * it before setting HMT_MEDIUM.
+ */
+ mtctr r13
+ GET_PACA(r13)
+ std r10,PACA_EXGEN+EX_R10(r13)
+ INTERRUPT_TO_KERNEL
+ KVMTEST system_call kvm_hcall /* uses r10, branch to kvm_hcall */
+ mfctr r9
+#else
+ mr r9,r13
+ GET_PACA(r13)
+ INTERRUPT_TO_KERNEL
+#endif
- ld r10,PACA_EXGEN+EX_LR(r13)
- ld r3,PACA_EXGEN+EX_R3(r13)
- lwz r9,PACA_EXGEN+EX_CCR(r13)
- ld r11,PACA_EXGEN+EX_SRR0(r13)
- mtlr r10
- beq- slb_miss_fault
+ /* We reach here with PACA in r13, r13 in r9. */
+ mfspr r11,SPRN_SRR0
+ mfspr r12,SPRN_SRR1
- andi. r10,r12,MSR_RI /* check for unrecoverable exception */
- beq- unrecov_user_slb
- mfmsr r10
+ HMT_MEDIUM
-.machine push
-.machine "power4"
- mtcrf 0x80,r9
-.machine pop
+ .if ! \virt
+ __LOAD_HANDLER(r10, system_call_common_real, real_vectors)
+ mtctr r10
+ bctr
+ .else
+#ifdef CONFIG_RELOCATABLE
+ __LOAD_HANDLER(r10, system_call_common, virt_vectors)
+ mtctr r10
+ bctr
+#else
+ b system_call_common
+#endif
+ .endif
+.endm
- clrrdi r10,r10,2 /* clear RI before setting SRR0/1 */
- mtmsrd r10,1
+EXC_REAL_BEGIN(system_call, 0xc00, 0x100)
+ SYSTEM_CALL 0
+EXC_REAL_END(system_call, 0xc00, 0x100)
+EXC_VIRT_BEGIN(system_call, 0x4c00, 0x100)
+ SYSTEM_CALL 1
+EXC_VIRT_END(system_call, 0x4c00, 0x100)
- mtspr SRR0,r11
- mtspr SRR1,r12
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+TRAMP_REAL_BEGIN(kvm_hcall)
+ std r9,PACA_EXGEN+EX_R9(r13)
+ std r11,PACA_EXGEN+EX_R11(r13)
+ std r12,PACA_EXGEN+EX_R12(r13)
+ mfcr r9
+ mfctr r10
+ std r10,PACA_EXGEN+EX_R13(r13)
+ li r10,0
+ std r10,PACA_EXGEN+EX_CFAR(r13)
+ std r10,PACA_EXGEN+EX_CTR(r13)
+ /*
+ * Save the PPR (on systems that support it) before changing to
+ * HMT_MEDIUM. That allows the KVM code to save that value into the
+ * guest state (it is the guest's PPR value).
+ */
+BEGIN_FTR_SECTION
+ mfspr r10,SPRN_PPR
+ std r10,PACA_EXGEN+EX_PPR(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
- ld r9,PACA_EXGEN+EX_R9(r13)
- ld r10,PACA_EXGEN+EX_R10(r13)
- ld r11,PACA_EXGEN+EX_R11(r13)
- ld r12,PACA_EXGEN+EX_R12(r13)
- ld r13,PACA_EXGEN+EX_R13(r13)
- rfid
- b .
+ HMT_MEDIUM
-slb_miss_fault:
- EXCEPTION_PROLOG_COMMON(0x380, PACA_EXGEN)
- ld r4,PACA_EXGEN+EX_DAR(r13)
- li r5,0
- std r4,_DAR(r1)
- std r5,_DSISR(r1)
- b handle_page_fault
+#ifdef CONFIG_RELOCATABLE
+ /*
+ * Requires __LOAD_FAR_HANDLER beause kvmppc_hcall lives
+ * outside the head section.
+ */
+ __LOAD_FAR_HANDLER(r10, kvmppc_hcall, real_trampolines)
+ mtctr r10
+ bctr
+#else
+ b kvmppc_hcall
+#endif
+#endif
+
+/**
+ * Interrupt 0xd00 - Trace Interrupt.
+ * This is a synchronous interrupt in response to instruction step or
+ * breakpoint faults.
+ */
+INT_DEFINE_BEGIN(single_step)
+ IVEC=0xd00
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(single_step)
+
+EXC_REAL_BEGIN(single_step, 0xd00, 0x100)
+ GEN_INT_ENTRY single_step, virt=0
+EXC_REAL_END(single_step, 0xd00, 0x100)
+EXC_VIRT_BEGIN(single_step, 0x4d00, 0x100)
+ GEN_INT_ENTRY single_step, virt=1
+EXC_VIRT_END(single_step, 0x4d00, 0x100)
+EXC_COMMON_BEGIN(single_step_common)
+ GEN_COMMON single_step
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(single_step_exception)
+ b interrupt_return_srr
+
+
+/**
+ * Interrupt 0xe00 - Hypervisor Data Storage Interrupt (HDSI).
+ * This is a synchronous interrupt in response to an MMU fault caused by a
+ * guest data access.
+ *
+ * Handling:
+ * This should always get routed to KVM. In radix MMU mode, this is caused
+ * by a guest nested radix access that can't be performed due to the
+ * partition scope page table. In hash mode, this can be caused by guests
+ * running with translation disabled (virtual real mode) or with VPM enabled.
+ * KVM will update the page table structures or disallow the access.
+ */
+INT_DEFINE_BEGIN(h_data_storage)
+ IVEC=0xe00
+ IHSRR=1
+ IDAR=1
+ IDSISR=1
+ IKVM_REAL=1
+ IKVM_VIRT=1
+INT_DEFINE_END(h_data_storage)
+
+EXC_REAL_BEGIN(h_data_storage, 0xe00, 0x20)
+ GEN_INT_ENTRY h_data_storage, virt=0, ool=1
+EXC_REAL_END(h_data_storage, 0xe00, 0x20)
+EXC_VIRT_BEGIN(h_data_storage, 0x4e00, 0x20)
+ GEN_INT_ENTRY h_data_storage, virt=1, ool=1
+EXC_VIRT_END(h_data_storage, 0x4e00, 0x20)
+EXC_COMMON_BEGIN(h_data_storage_common)
+ GEN_COMMON h_data_storage
+ addi r3,r1,STACK_INT_FRAME_REGS
+BEGIN_MMU_FTR_SECTION
+ bl CFUNC(do_bad_page_fault_segv)
+MMU_FTR_SECTION_ELSE
+ bl CFUNC(unknown_exception)
+ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX)
+ b interrupt_return_hsrr
+
+
+/**
+ * Interrupt 0xe20 - Hypervisor Instruction Storage Interrupt (HISI).
+ * This is a synchronous interrupt in response to an MMU fault caused by a
+ * guest instruction fetch, similar to HDSI.
+ */
+INT_DEFINE_BEGIN(h_instr_storage)
+ IVEC=0xe20
+ IHSRR=1
+ IKVM_REAL=1
+ IKVM_VIRT=1
+INT_DEFINE_END(h_instr_storage)
+
+EXC_REAL_BEGIN(h_instr_storage, 0xe20, 0x20)
+ GEN_INT_ENTRY h_instr_storage, virt=0, ool=1
+EXC_REAL_END(h_instr_storage, 0xe20, 0x20)
+EXC_VIRT_BEGIN(h_instr_storage, 0x4e20, 0x20)
+ GEN_INT_ENTRY h_instr_storage, virt=1, ool=1
+EXC_VIRT_END(h_instr_storage, 0x4e20, 0x20)
+EXC_COMMON_BEGIN(h_instr_storage_common)
+ GEN_COMMON h_instr_storage
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(unknown_exception)
+ b interrupt_return_hsrr
+
+
+/**
+ * Interrupt 0xe40 - Hypervisor Emulation Assistance Interrupt.
+ */
+INT_DEFINE_BEGIN(emulation_assist)
+ IVEC=0xe40
+ IHSRR=1
+ IKVM_REAL=1
+ IKVM_VIRT=1
+INT_DEFINE_END(emulation_assist)
+
+EXC_REAL_BEGIN(emulation_assist, 0xe40, 0x20)
+ GEN_INT_ENTRY emulation_assist, virt=0, ool=1
+EXC_REAL_END(emulation_assist, 0xe40, 0x20)
+EXC_VIRT_BEGIN(emulation_assist, 0x4e40, 0x20)
+ GEN_INT_ENTRY emulation_assist, virt=1, ool=1
+EXC_VIRT_END(emulation_assist, 0x4e40, 0x20)
+EXC_COMMON_BEGIN(emulation_assist_common)
+ GEN_COMMON emulation_assist
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(emulation_assist_interrupt)
+ HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */
+ b interrupt_return_hsrr
+
+
+/**
+ * Interrupt 0xe60 - Hypervisor Maintenance Interrupt (HMI).
+ * This is an asynchronous interrupt caused by a Hypervisor Maintenance
+ * Exception. It is always taken in real mode but uses HSRR registers
+ * unlike SRESET and MCE.
+ *
+ * It is maskable in hardware by clearing MSR[EE], and partially soft-maskable
+ * with IRQS_DISABLED mask (i.e., local_irq_disable()).
+ *
+ * Handling:
+ * This is a special case, this is handled similarly to machine checks, with an
+ * initial real mode handler that is not soft-masked, which attempts to fix the
+ * problem. Then a regular handler which is soft-maskable and reports the
+ * problem.
+ *
+ * The emergency stack is used for the early real mode handler.
+ *
+ * XXX: unclear why MCE and HMI schemes could not be made common, e.g.,
+ * either use soft-masking for the MCE, or use irq_work for the HMI.
+ *
+ * KVM:
+ * Unlike MCE, this calls into KVM without calling the real mode handler
+ * first.
+ */
+INT_DEFINE_BEGIN(hmi_exception_early)
+ IVEC=0xe60
+ IHSRR=1
+ IREALMODE_COMMON=1
+ ISTACK=0
+ IKUAP=0 /* We don't touch AMR here, we never go to virtual mode */
+ IKVM_REAL=1
+INT_DEFINE_END(hmi_exception_early)
+
+INT_DEFINE_BEGIN(hmi_exception)
+ IVEC=0xe60
+ IHSRR=1
+ IMASK=IRQS_DISABLED
+ IKVM_REAL=1
+INT_DEFINE_END(hmi_exception)
+
+EXC_REAL_BEGIN(hmi_exception, 0xe60, 0x20)
+ GEN_INT_ENTRY hmi_exception_early, virt=0, ool=1
+EXC_REAL_END(hmi_exception, 0xe60, 0x20)
+EXC_VIRT_NONE(0x4e60, 0x20)
+
+EXC_COMMON_BEGIN(hmi_exception_early_common)
+ __GEN_REALMODE_COMMON_ENTRY hmi_exception_early
+
+ mr r10,r1 /* Save r1 */
+ ld r1,PACAEMERGSP(r13) /* Use emergency stack for realmode */
+ subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
-unrecov_user_slb:
- EXCEPTION_PROLOG_COMMON(0x4200, PACA_EXGEN)
- RECONCILE_IRQ_STATE(r10, r11)
- bl save_nvgprs
-1: addi r3,r1,STACK_FRAME_OVERHEAD
- bl unrecoverable_exception
- b 1b
+ __GEN_COMMON_BODY hmi_exception_early
-#endif /* __DISABLED__ */
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(hmi_exception_realmode)
+ cmpdi cr0,r3,0
+ bne 1f
+ EXCEPTION_RESTORE_REGS hsrr=1
+ HRFI_TO_USER_OR_KERNEL
+1:
/*
- * Machine check is different because we use a different
- * save area: PACA_EXMC instead of PACA_EXGEN.
+ * Go to virtual mode and pull the HMI event information from
+ * firmware.
*/
- .align 7
- .globl machine_check_common
-machine_check_common:
+ EXCEPTION_RESTORE_REGS hsrr=1
+ GEN_INT_ENTRY hmi_exception, virt=0
- mfspr r10,SPRN_DAR
- std r10,PACA_EXGEN+EX_DAR(r13)
- mfspr r10,SPRN_DSISR
- stw r10,PACA_EXGEN+EX_DSISR(r13)
- EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC)
- FINISH_NAP
- RECONCILE_IRQ_STATE(r10, r11)
- ld r3,PACA_EXGEN+EX_DAR(r13)
- lwz r4,PACA_EXGEN+EX_DSISR(r13)
- std r3,_DAR(r1)
- std r4,_DSISR(r1)
- bl save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl machine_check_exception
- b ret_from_except
+EXC_COMMON_BEGIN(hmi_exception_common)
+ GEN_COMMON hmi_exception
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(handle_hmi_exception)
+ b interrupt_return_hsrr
- .align 7
- .globl alignment_common
-alignment_common:
- mfspr r10,SPRN_DAR
- std r10,PACA_EXGEN+EX_DAR(r13)
- mfspr r10,SPRN_DSISR
- stw r10,PACA_EXGEN+EX_DSISR(r13)
- EXCEPTION_PROLOG_COMMON(0x600, PACA_EXGEN)
- ld r3,PACA_EXGEN+EX_DAR(r13)
- lwz r4,PACA_EXGEN+EX_DSISR(r13)
- std r3,_DAR(r1)
- std r4,_DSISR(r1)
- bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl alignment_exception
- b ret_from_except
- .align 7
- .globl program_check_common
-program_check_common:
- EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN)
- bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl program_check_exception
- b ret_from_except
+/**
+ * Interrupt 0xe80 - Directed Hypervisor Doorbell Interrupt.
+ * This is an asynchronous interrupt in response to a msgsnd doorbell.
+ * Similar to the 0xa00 doorbell but for host rather than guest.
+ *
+ * CFAR is not required (similar to doorbell_interrupt), unless KVM HV
+ * is enabled, in which case it may be a guest exit. Most PowerNV kernels
+ * include KVM support so it would be nice if this could be dynamically
+ * patched out if KVM was not currently running any guests.
+ */
+INT_DEFINE_BEGIN(h_doorbell)
+ IVEC=0xe80
+ IHSRR=1
+ IMASK=IRQS_DISABLED
+ IKVM_REAL=1
+ IKVM_VIRT=1
+#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ ICFAR=0
+#endif
+INT_DEFINE_END(h_doorbell)
+
+EXC_REAL_BEGIN(h_doorbell, 0xe80, 0x20)
+ GEN_INT_ENTRY h_doorbell, virt=0, ool=1
+EXC_REAL_END(h_doorbell, 0xe80, 0x20)
+EXC_VIRT_BEGIN(h_doorbell, 0x4e80, 0x20)
+ GEN_INT_ENTRY h_doorbell, virt=1, ool=1
+EXC_VIRT_END(h_doorbell, 0x4e80, 0x20)
+EXC_COMMON_BEGIN(h_doorbell_common)
+ GEN_COMMON h_doorbell
+ addi r3,r1,STACK_INT_FRAME_REGS
+#ifdef CONFIG_PPC_DOORBELL
+ bl CFUNC(doorbell_exception)
+#else
+ bl CFUNC(unknown_async_exception)
+#endif
+ b interrupt_return_hsrr
- .align 7
- .globl fp_unavailable_common
-fp_unavailable_common:
- EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN)
- bne 1f /* if from user, just load it up */
- bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl kernel_fp_unavailable_exception
- BUG_OPCODE
-1:
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-BEGIN_FTR_SECTION
- /* Test if 2 TM state bits are zero. If non-zero (ie. userspace was in
- * transaction), go do TM stuff
- */
- rldicl. r0, r12, (64-MSR_TS_LG), (64-2)
- bne- 2f
-END_FTR_SECTION_IFSET(CPU_FTR_TM)
+
+/**
+ * Interrupt 0xea0 - Hypervisor Virtualization Interrupt.
+ * This is an asynchronous interrupt in response to an "external exception".
+ * Similar to 0x500 but for host only.
+ *
+ * Like h_doorbell, CFAR is only required for KVM HV because this can be
+ * a guest exit.
+ */
+INT_DEFINE_BEGIN(h_virt_irq)
+ IVEC=0xea0
+ IHSRR=1
+ IMASK=IRQS_DISABLED
+ IKVM_REAL=1
+ IKVM_VIRT=1
+#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ ICFAR=0
#endif
- bl load_up_fpu
- b fast_exception_return
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-2: /* User process was in a transaction */
- bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl fp_unavailable_tm
- b ret_from_except
+INT_DEFINE_END(h_virt_irq)
+
+EXC_REAL_BEGIN(h_virt_irq, 0xea0, 0x20)
+ GEN_INT_ENTRY h_virt_irq, virt=0, ool=1
+EXC_REAL_END(h_virt_irq, 0xea0, 0x20)
+EXC_VIRT_BEGIN(h_virt_irq, 0x4ea0, 0x20)
+ GEN_INT_ENTRY h_virt_irq, virt=1, ool=1
+EXC_VIRT_END(h_virt_irq, 0x4ea0, 0x20)
+EXC_COMMON_BEGIN(h_virt_irq_common)
+ GEN_COMMON h_virt_irq
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(do_IRQ)
+ b interrupt_return_hsrr
+
+
+EXC_REAL_NONE(0xec0, 0x20)
+EXC_VIRT_NONE(0x4ec0, 0x20)
+EXC_REAL_NONE(0xee0, 0x20)
+EXC_VIRT_NONE(0x4ee0, 0x20)
+
+
+/*
+ * Interrupt 0xf00 - Performance Monitor Interrupt (PMI, PMU).
+ * This is an asynchronous interrupt in response to a PMU exception.
+ * It is maskable in hardware by clearing MSR[EE], and soft-maskable with
+ * IRQS_PMI_DISABLED mask (NOTE: NOT local_irq_disable()).
+ *
+ * Handling:
+ * This calls into the perf subsystem.
+ *
+ * Like the watchdog soft-nmi, it appears an NMI interrupt to Linux, in that it
+ * runs under local_irq_disable. However it may be soft-masked in
+ * powerpc-specific code.
+ *
+ * If soft masked, the masked handler will note the pending interrupt for
+ * replay, and clear MSR[EE] in the interrupted context.
+ *
+ * CFAR is not used by perf interrupts so not required.
+ */
+INT_DEFINE_BEGIN(performance_monitor)
+ IVEC=0xf00
+ IMASK=IRQS_PMI_DISABLED
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
#endif
- .align 7
- .globl altivec_unavailable_common
-altivec_unavailable_common:
- EXCEPTION_PROLOG_COMMON(0xf20, PACA_EXGEN)
+ ICFAR=0
+INT_DEFINE_END(performance_monitor)
+
+EXC_REAL_BEGIN(performance_monitor, 0xf00, 0x20)
+ GEN_INT_ENTRY performance_monitor, virt=0, ool=1
+EXC_REAL_END(performance_monitor, 0xf00, 0x20)
+EXC_VIRT_BEGIN(performance_monitor, 0x4f00, 0x20)
+ GEN_INT_ENTRY performance_monitor, virt=1, ool=1
+EXC_VIRT_END(performance_monitor, 0x4f00, 0x20)
+EXC_COMMON_BEGIN(performance_monitor_common)
+ GEN_COMMON performance_monitor
+ addi r3,r1,STACK_INT_FRAME_REGS
+ lbz r4,PACAIRQSOFTMASK(r13)
+ cmpdi r4,IRQS_ENABLED
+ bne 1f
+ bl CFUNC(performance_monitor_exception_async)
+ b interrupt_return_srr
+1:
+ bl CFUNC(performance_monitor_exception_nmi)
+ /* Clear MSR_RI before setting SRR0 and SRR1. */
+ li r9,0
+ mtmsrd r9,1
+
+ kuap_kernel_restore r9, r10
+
+ EXCEPTION_RESTORE_REGS hsrr=0
+ RFI_TO_KERNEL
+
+/**
+ * Interrupt 0xf20 - Vector Unavailable Interrupt.
+ * This is a synchronous interrupt in response to
+ * executing a vector (or altivec) instruction with MSR[VEC]=0.
+ * Similar to FP unavailable.
+ */
+INT_DEFINE_BEGIN(altivec_unavailable)
+ IVEC=0xf20
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+ IMSR_R12=1
+INT_DEFINE_END(altivec_unavailable)
+
+EXC_REAL_BEGIN(altivec_unavailable, 0xf20, 0x20)
+ GEN_INT_ENTRY altivec_unavailable, virt=0, ool=1
+EXC_REAL_END(altivec_unavailable, 0xf20, 0x20)
+EXC_VIRT_BEGIN(altivec_unavailable, 0x4f20, 0x20)
+ GEN_INT_ENTRY altivec_unavailable, virt=1, ool=1
+EXC_VIRT_END(altivec_unavailable, 0x4f20, 0x20)
+EXC_COMMON_BEGIN(altivec_unavailable_common)
+ GEN_COMMON altivec_unavailable
#ifdef CONFIG_ALTIVEC
BEGIN_FTR_SECTION
beq 1f
@@ -1166,29 +2404,44 @@ BEGIN_FTR_SECTION
bne- 2f
END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69)
#endif
- bl load_up_altivec
- b fast_exception_return
+ bl CFUNC(load_up_altivec)
+ b fast_interrupt_return_srr
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2: /* User process was in a transaction */
- bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl altivec_unavailable_tm
- b ret_from_except
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(altivec_unavailable_tm)
+ b interrupt_return_srr
#endif
1:
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
#endif
- bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl altivec_unavailable_exception
- b ret_from_except
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(altivec_unavailable_exception)
+ b interrupt_return_srr
- .align 7
- .globl vsx_unavailable_common
-vsx_unavailable_common:
- EXCEPTION_PROLOG_COMMON(0xf40, PACA_EXGEN)
+
+/**
+ * Interrupt 0xf40 - VSX Unavailable Interrupt.
+ * This is a synchronous interrupt in response to
+ * executing a VSX instruction with MSR[VSX]=0.
+ * Similar to FP unavailable.
+ */
+INT_DEFINE_BEGIN(vsx_unavailable)
+ IVEC=0xf40
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+ IMSR_R12=1
+INT_DEFINE_END(vsx_unavailable)
+
+EXC_REAL_BEGIN(vsx_unavailable, 0xf40, 0x20)
+ GEN_INT_ENTRY vsx_unavailable, virt=0, ool=1
+EXC_REAL_END(vsx_unavailable, 0xf40, 0x20)
+EXC_VIRT_BEGIN(vsx_unavailable, 0x4f40, 0x20)
+ GEN_INT_ENTRY vsx_unavailable, virt=1, ool=1
+EXC_VIRT_END(vsx_unavailable, 0x4f40, 0x20)
+EXC_COMMON_BEGIN(vsx_unavailable_common)
+ GEN_COMMON vsx_unavailable
#ifdef CONFIG_VSX
BEGIN_FTR_SECTION
beq 1f
@@ -1204,495 +2457,629 @@ BEGIN_FTR_SECTION
b load_up_vsx
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2: /* User process was in a transaction */
- bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl vsx_unavailable_tm
- b ret_from_except
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(vsx_unavailable_tm)
+ b interrupt_return_srr
#endif
1:
END_FTR_SECTION_IFSET(CPU_FTR_VSX)
#endif
- bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl vsx_unavailable_exception
- b ret_from_except
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(vsx_unavailable_exception)
+ b interrupt_return_srr
- STD_EXCEPTION_COMMON(0xf60, facility_unavailable, facility_unavailable_exception)
- STD_EXCEPTION_COMMON(0xf80, hv_facility_unavailable, facility_unavailable_exception)
- .align 7
- .globl __end_handlers
-__end_handlers:
+/**
+ * Interrupt 0xf60 - Facility Unavailable Interrupt.
+ * This is a synchronous interrupt in response to
+ * executing an instruction without access to the facility that can be
+ * resolved by the OS (e.g., FSCR, MSR).
+ * Similar to FP unavailable.
+ */
+INT_DEFINE_BEGIN(facility_unavailable)
+ IVEC=0xf60
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(facility_unavailable)
+
+EXC_REAL_BEGIN(facility_unavailable, 0xf60, 0x20)
+ GEN_INT_ENTRY facility_unavailable, virt=0, ool=1
+EXC_REAL_END(facility_unavailable, 0xf60, 0x20)
+EXC_VIRT_BEGIN(facility_unavailable, 0x4f60, 0x20)
+ GEN_INT_ENTRY facility_unavailable, virt=1, ool=1
+EXC_VIRT_END(facility_unavailable, 0x4f60, 0x20)
+EXC_COMMON_BEGIN(facility_unavailable_common)
+ GEN_COMMON facility_unavailable
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(facility_unavailable_exception)
+ HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */
+ b interrupt_return_srr
+
+
+/**
+ * Interrupt 0xf60 - Hypervisor Facility Unavailable Interrupt.
+ * This is a synchronous interrupt in response to
+ * executing an instruction without access to the facility that can only
+ * be resolved in HV mode (e.g., HFSCR).
+ * Similar to FP unavailable.
+ */
+INT_DEFINE_BEGIN(h_facility_unavailable)
+ IVEC=0xf80
+ IHSRR=1
+ IKVM_REAL=1
+ IKVM_VIRT=1
+INT_DEFINE_END(h_facility_unavailable)
+
+EXC_REAL_BEGIN(h_facility_unavailable, 0xf80, 0x20)
+ GEN_INT_ENTRY h_facility_unavailable, virt=0, ool=1
+EXC_REAL_END(h_facility_unavailable, 0xf80, 0x20)
+EXC_VIRT_BEGIN(h_facility_unavailable, 0x4f80, 0x20)
+ GEN_INT_ENTRY h_facility_unavailable, virt=1, ool=1
+EXC_VIRT_END(h_facility_unavailable, 0x4f80, 0x20)
+EXC_COMMON_BEGIN(h_facility_unavailable_common)
+ GEN_COMMON h_facility_unavailable
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(facility_unavailable_exception)
+ /* XXX Shouldn't be necessary in practice */
+ HANDLER_RESTORE_NVGPRS()
+ b interrupt_return_hsrr
+
+
+EXC_REAL_NONE(0xfa0, 0x20)
+EXC_VIRT_NONE(0x4fa0, 0x20)
+EXC_REAL_NONE(0xfc0, 0x20)
+EXC_VIRT_NONE(0x4fc0, 0x20)
+EXC_REAL_NONE(0xfe0, 0x20)
+EXC_VIRT_NONE(0x4fe0, 0x20)
+
+EXC_REAL_NONE(0x1000, 0x100)
+EXC_VIRT_NONE(0x5000, 0x100)
+EXC_REAL_NONE(0x1100, 0x100)
+EXC_VIRT_NONE(0x5100, 0x100)
+EXC_REAL_NONE(0x1200, 0x100)
+EXC_VIRT_NONE(0x5200, 0x100)
+
+/**
+ * Interrupt 0x1300 - Instruction Address Breakpoint Interrupt.
+ * This has been removed from the ISA before 2.01, which is the earliest
+ * 64-bit BookS ISA supported, however the G5 / 970 implements this
+ * interrupt with a non-architected feature available through the support
+ * processor interface.
+ */
+INT_DEFINE_BEGIN(instruction_breakpoint)
+ IVEC=0x1300
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
+#endif
+INT_DEFINE_END(instruction_breakpoint)
+
+EXC_REAL_BEGIN(instruction_breakpoint, 0x1300, 0x100)
+ GEN_INT_ENTRY instruction_breakpoint, virt=0
+EXC_REAL_END(instruction_breakpoint, 0x1300, 0x100)
+EXC_VIRT_BEGIN(instruction_breakpoint, 0x5300, 0x100)
+ GEN_INT_ENTRY instruction_breakpoint, virt=1
+EXC_VIRT_END(instruction_breakpoint, 0x5300, 0x100)
+EXC_COMMON_BEGIN(instruction_breakpoint_common)
+ GEN_COMMON instruction_breakpoint
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(instruction_breakpoint_exception)
+ b interrupt_return_srr
+
+
+EXC_REAL_NONE(0x1400, 0x100)
+EXC_VIRT_NONE(0x5400, 0x100)
+
+/**
+ * Interrupt 0x1500 - Soft Patch Interrupt
+ *
+ * Handling:
+ * This is an implementation specific interrupt which can be used for a
+ * range of exceptions.
+ *
+ * This interrupt handler is unique in that it runs the denormal assist
+ * code even for guests (and even in guest context) without going to KVM,
+ * for speed. POWER9 does not raise denorm exceptions, so this special case
+ * could be phased out in future to reduce special cases.
+ */
+INT_DEFINE_BEGIN(denorm_exception)
+ IVEC=0x1500
+ IHSRR=1
+ IBRANCH_TO_COMMON=0
+ IKVM_REAL=1
+INT_DEFINE_END(denorm_exception)
+
+EXC_REAL_BEGIN(denorm_exception, 0x1500, 0x100)
+ GEN_INT_ENTRY denorm_exception, virt=0
+#ifdef CONFIG_PPC_DENORMALISATION
+ andis. r10,r12,(HSRR1_DENORM)@h /* denorm? */
+ bne+ denorm_assist
+#endif
+ GEN_BRANCH_TO_COMMON denorm_exception, virt=0
+EXC_REAL_END(denorm_exception, 0x1500, 0x100)
+#ifdef CONFIG_PPC_DENORMALISATION
+EXC_VIRT_BEGIN(denorm_exception, 0x5500, 0x100)
+ GEN_INT_ENTRY denorm_exception, virt=1
+ andis. r10,r12,(HSRR1_DENORM)@h /* denorm? */
+ bne+ denorm_assist
+ GEN_BRANCH_TO_COMMON denorm_exception, virt=1
+EXC_VIRT_END(denorm_exception, 0x5500, 0x100)
+#else
+EXC_VIRT_NONE(0x5500, 0x100)
+#endif
- /* Equivalents to the above handlers for relocation-on interrupt vectors */
- STD_RELON_EXCEPTION_HV_OOL(0xe40, emulation_assist)
- MASKABLE_RELON_EXCEPTION_HV_OOL(0xe80, h_doorbell)
+#ifdef CONFIG_PPC_DENORMALISATION
+TRAMP_REAL_BEGIN(denorm_assist)
+BEGIN_FTR_SECTION
+/*
+ * To denormalise we need to move a copy of the register to itself.
+ * For POWER6 do that here for all FP regs.
+ */
+ mfmsr r10
+ ori r10,r10,(MSR_FP|MSR_FE0|MSR_FE1)
+ xori r10,r10,(MSR_FE0|MSR_FE1)
+ mtmsrd r10
+ sync
- STD_RELON_EXCEPTION_PSERIES_OOL(0xf00, performance_monitor)
- STD_RELON_EXCEPTION_PSERIES_OOL(0xf20, altivec_unavailable)
- STD_RELON_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable)
- STD_RELON_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable)
- STD_RELON_EXCEPTION_HV_OOL(0xf80, hv_facility_unavailable)
+ .Lreg=0
+ .rept 32
+ fmr .Lreg,.Lreg
+ .Lreg=.Lreg+1
+ .endr
-#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
+FTR_SECTION_ELSE
/*
- * Data area reserved for FWNMI option.
- * This address (0x7000) is fixed by the RPA.
+ * To denormalise we need to move a copy of the register to itself.
+ * For POWER7 do that here for the first 32 VSX registers only.
*/
- .= 0x7000
- .globl fwnmi_data_area
-fwnmi_data_area:
+ mfmsr r10
+ oris r10,r10,MSR_VSX@h
+ mtmsrd r10
+ sync
- /* pseries and powernv need to keep the whole page from
- * 0x7000 to 0x8000 free for use by the firmware
- */
- . = 0x8000
-#endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */
+ .Lreg=0
+ .rept 32
+ XVCPSGNDP(.Lreg,.Lreg,.Lreg)
+ .Lreg=.Lreg+1
+ .endr
- .globl hmi_exception_early
-hmi_exception_early:
- EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0xe60)
- mr r10,r1 /* Save r1 */
- ld r1,PACAEMERGSP(r13) /* Use emergency stack */
- subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
- std r9,_CCR(r1) /* save CR in stackframe */
- mfspr r11,SPRN_HSRR0 /* Save HSRR0 */
- std r11,_NIP(r1) /* save HSRR0 in stackframe */
- mfspr r12,SPRN_HSRR1 /* Save SRR1 */
- std r12,_MSR(r1) /* save SRR1 in stackframe */
- std r10,0(r1) /* make stack chain pointer */
- std r0,GPR0(r1) /* save r0 in stackframe */
- std r10,GPR1(r1) /* save r1 in stackframe */
- EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN)
- EXCEPTION_PROLOG_COMMON_3(0xe60)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl hmi_exception_realmode
- /* Windup the stack. */
- /* Clear MSR_RI before setting SRR0 and SRR1. */
- li r0,MSR_RI
- mfmsr r9 /* get MSR value */
- andc r9,r9,r0
- mtmsrd r9,1 /* Clear MSR_RI */
- /* Move original HSRR0 and HSRR1 into the respective regs */
- ld r9,_MSR(r1)
- mtspr SPRN_HSRR1,r9
- ld r3,_NIP(r1)
- mtspr SPRN_HSRR0,r3
- ld r9,_CTR(r1)
- mtctr r9
- ld r9,_XER(r1)
- mtxer r9
- ld r9,_LINK(r1)
- mtlr r9
- REST_GPR(0, r1)
- REST_8GPRS(2, r1)
- REST_GPR(10, r1)
- ld r11,_CCR(r1)
- mtcr r11
- REST_GPR(11, r1)
- REST_2GPRS(12, r1)
- /* restore original r1. */
- ld r1,GPR1(r1)
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_206)
- /*
- * Go to virtual mode and pull the HMI event information from
- * firmware.
- */
- .globl hmi_exception_after_realmode
-hmi_exception_after_realmode:
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- b hmi_exception_hv
+BEGIN_FTR_SECTION
+ b denorm_done
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
+/*
+ * To denormalise we need to move a copy of the register to itself.
+ * For POWER8 we need to do that for all 64 VSX registers
+ */
+ .Lreg=32
+ .rept 32
+ XVCPSGNDP(.Lreg,.Lreg,.Lreg)
+ .Lreg=.Lreg+1
+ .endr
-#ifdef CONFIG_PPC_POWERNV
-_GLOBAL(opal_mc_secondary_handler)
- HMT_MEDIUM_PPR_DISCARD
- SET_SCRATCH0(r13)
- GET_PACA(r13)
- clrldi r3,r3,2
- tovirt(r3,r3)
- std r3,PACA_OPAL_MC_EVT(r13)
- ld r13,OPAL_MC_SRR0(r3)
- mtspr SPRN_SRR0,r13
- ld r13,OPAL_MC_SRR1(r3)
- mtspr SPRN_SRR1,r13
- ld r3,OPAL_MC_GPR3(r3)
- GET_SCRATCH0(r13)
- b machine_check_pSeries
-#endif /* CONFIG_PPC_POWERNV */
+denorm_done:
+ mfspr r11,SPRN_HSRR0
+ subi r11,r11,4
+ mtspr SPRN_HSRR0,r11
+ mtcrf 0x80,r9
+ ld r9,PACA_EXGEN+EX_R9(r13)
+BEGIN_FTR_SECTION
+ ld r10,PACA_EXGEN+EX_PPR(r13)
+ mtspr SPRN_PPR,r10
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+BEGIN_FTR_SECTION
+ ld r10,PACA_EXGEN+EX_CFAR(r13)
+ mtspr SPRN_CFAR,r10
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+ li r10,0
+ stb r10,PACAHSRR_VALID(r13)
+ ld r10,PACA_EXGEN+EX_R10(r13)
+ ld r11,PACA_EXGEN+EX_R11(r13)
+ ld r12,PACA_EXGEN+EX_R12(r13)
+ ld r13,PACA_EXGEN+EX_R13(r13)
+ HRFI_TO_UNKNOWN
+ b .
+#endif
+EXC_COMMON_BEGIN(denorm_exception_common)
+ GEN_COMMON denorm_exception
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(unknown_exception)
+ b interrupt_return_hsrr
-#define MACHINE_CHECK_HANDLER_WINDUP \
- /* Clear MSR_RI before setting SRR0 and SRR1. */\
- li r0,MSR_RI; \
- mfmsr r9; /* get MSR value */ \
- andc r9,r9,r0; \
- mtmsrd r9,1; /* Clear MSR_RI */ \
- /* Move original SRR0 and SRR1 into the respective regs */ \
- ld r9,_MSR(r1); \
- mtspr SPRN_SRR1,r9; \
- ld r3,_NIP(r1); \
- mtspr SPRN_SRR0,r3; \
- ld r9,_CTR(r1); \
- mtctr r9; \
- ld r9,_XER(r1); \
- mtxer r9; \
- ld r9,_LINK(r1); \
- mtlr r9; \
- REST_GPR(0, r1); \
- REST_8GPRS(2, r1); \
- REST_GPR(10, r1); \
- ld r11,_CCR(r1); \
- mtcr r11; \
- /* Decrement paca->in_mce. */ \
- lhz r12,PACA_IN_MCE(r13); \
- subi r12,r12,1; \
- sth r12,PACA_IN_MCE(r13); \
- REST_GPR(11, r1); \
- REST_2GPRS(12, r1); \
- /* restore original r1. */ \
- ld r1,GPR1(r1)
- /*
- * Handle machine check early in real mode. We come here with
- * ME=1, MMU (IR=0 and DR=0) off and using MC emergency stack.
- */
- .align 7
- .globl machine_check_handle_early
-machine_check_handle_early:
- std r0,GPR0(r1) /* Save r0 */
- EXCEPTION_PROLOG_COMMON_3(0x200)
- bl save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl machine_check_early
- std r3,RESULT(r1) /* Save result */
- ld r12,_MSR(r1)
-#ifdef CONFIG_PPC_P7_NAP
- /*
- * Check if thread was in power saving mode. We come here when any
- * of the following is true:
- * a. thread wasn't in power saving mode
- * b. thread was in power saving mode with no state loss or
- * supervisor state loss
- *
- * Go back to nap again if (b) is true.
- */
- rlwinm. r11,r12,47-31,30,31 /* Was it in power saving mode? */
- beq 4f /* No, it wasn;t */
- /* Thread was in power saving mode. Go back to nap again. */
- cmpwi r11,2
- bne 3f
- /* Supervisor state loss */
- li r0,1
- stb r0,PACA_NAPSTATELOST(r13)
-3: bl machine_check_queue_event
- MACHINE_CHECK_HANDLER_WINDUP
- GET_PACA(r13)
- ld r1,PACAR1(r13)
- b power7_enter_nap_mode
-4:
-#endif
- /*
- * Check if we are coming from hypervisor userspace. If yes then we
- * continue in host kernel in V mode to deliver the MC event.
- */
- rldicl. r11,r12,4,63 /* See if MC hit while in HV mode. */
- beq 5f
- andi. r11,r12,MSR_PR /* See if coming from user. */
- bne 9f /* continue in V mode if we are. */
+EXC_REAL_NONE(0x1600, 0x100)
+EXC_VIRT_NONE(0x5600, 0x100)
-5:
-#ifdef CONFIG_KVM_BOOK3S_64_HV
- /*
- * We are coming from kernel context. Check if we are coming from
- * guest. if yes, then we can continue. We will fall through
- * do_kvm_200->kvmppc_interrupt to deliver the MC event to guest.
- */
- lbz r11,HSTATE_IN_GUEST(r13)
- cmpwi r11,0 /* Check if coming from guest */
- bne 9f /* continue if we are. */
+
+INT_DEFINE_BEGIN(altivec_assist)
+ IVEC=0x1700
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ IKVM_REAL=1
#endif
- /*
- * At this point we are not sure about what context we come from.
- * Queue up the MCE event and return from the interrupt.
- * But before that, check if this is an un-recoverable exception.
- * If yes, then stay on emergency stack and panic.
- */
- andi. r11,r12,MSR_RI
- bne 2f
-1: mfspr r11,SPRN_SRR0
- ld r10,PACAKBASE(r13)
- LOAD_HANDLER(r10,unrecover_mce)
- mtspr SPRN_SRR0,r10
- ld r10,PACAKMSR(r13)
- /*
- * We are going down. But there are chances that we might get hit by
- * another MCE during panic path and we may run into unstable state
- * with no way out. Hence, turn ME bit off while going down, so that
- * when another MCE is hit during panic path, system will checkstop
- * and hypervisor will get restarted cleanly by SP.
- */
- li r3,MSR_ME
- andc r10,r10,r3 /* Turn off MSR_ME */
- mtspr SPRN_SRR1,r10
- rfid
- b .
-2:
- /*
- * Check if we have successfully handled/recovered from error, if not
- * then stay on emergency stack and panic.
- */
- ld r3,RESULT(r1) /* Load result */
- cmpdi r3,0 /* see if we handled MCE successfully */
+INT_DEFINE_END(altivec_assist)
+
+EXC_REAL_BEGIN(altivec_assist, 0x1700, 0x100)
+ GEN_INT_ENTRY altivec_assist, virt=0
+EXC_REAL_END(altivec_assist, 0x1700, 0x100)
+EXC_VIRT_BEGIN(altivec_assist, 0x5700, 0x100)
+ GEN_INT_ENTRY altivec_assist, virt=1
+EXC_VIRT_END(altivec_assist, 0x5700, 0x100)
+EXC_COMMON_BEGIN(altivec_assist_common)
+ GEN_COMMON altivec_assist
+ addi r3,r1,STACK_INT_FRAME_REGS
+#ifdef CONFIG_ALTIVEC
+ bl CFUNC(altivec_assist_exception)
+ HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */
+#else
+ bl CFUNC(unknown_exception)
+#endif
+ b interrupt_return_srr
- beq 1b /* if !handled then panic */
- /*
- * Return from MC interrupt.
- * Queue up the MCE event so that we can log it later, while
- * returning from kernel or opal call.
- */
- bl machine_check_queue_event
- MACHINE_CHECK_HANDLER_WINDUP
- rfid
-9:
- /* Deliver the machine check to host kernel in V mode. */
- MACHINE_CHECK_HANDLER_WINDUP
- b machine_check_pSeries
-unrecover_mce:
- /* Invoke machine_check_exception to print MCE event and panic. */
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl machine_check_exception
- /*
- * We will not reach here. Even if we did, there is no way out. Call
- * unrecoverable_exception and die.
- */
-1: addi r3,r1,STACK_FRAME_OVERHEAD
- bl unrecoverable_exception
- b 1b
+EXC_REAL_NONE(0x1800, 0x100)
+EXC_VIRT_NONE(0x5800, 0x100)
+
+
+#ifdef CONFIG_PPC_WATCHDOG
+
+INT_DEFINE_BEGIN(soft_nmi)
+ IVEC=0x900
+ ISTACK=0
+ ICFAR=0
+INT_DEFINE_END(soft_nmi)
+
/*
- * r13 points to the PACA, r9 contains the saved CR,
- * r12 contain the saved SRR1, SRR0 is still ready for return
- * r3 has the faulting address
- * r9 - r13 are saved in paca->exslb.
- * r3 is saved in paca->slb_r3
- * We assume we aren't going to take any exceptions during this procedure.
+ * Branch to soft_nmi_interrupt using the emergency stack. The emergency
+ * stack is one that is usable by maskable interrupts so long as MSR_EE
+ * remains off. It is used for recovery when something has corrupted the
+ * normal kernel stack, for example. The "soft NMI" must not use the process
+ * stack because we want irq disabled sections to avoid touching the stack
+ * at all (other than PMU interrupts), so use the emergency stack for this,
+ * and run it entirely with interrupts hard disabled.
*/
-slb_miss_realmode:
- mflr r10
-#ifdef CONFIG_RELOCATABLE
- mtctr r11
-#endif
-
- stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
- std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
+EXC_COMMON_BEGIN(soft_nmi_common)
+ mr r10,r1
+ ld r1,PACAEMERGSP(r13)
+ subi r1,r1,INT_FRAME_SIZE
+ __GEN_COMMON_BODY soft_nmi
- bl slb_allocate_realmode
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(soft_nmi_interrupt)
- /* All done -- return from exception. */
+ /* Clear MSR_RI before setting SRR0 and SRR1. */
+ li r9,0
+ mtmsrd r9,1
- ld r10,PACA_EXSLB+EX_LR(r13)
- ld r3,PACA_EXSLB+EX_R3(r13)
- lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
+ kuap_kernel_restore r9, r10
- mtlr r10
+ EXCEPTION_RESTORE_REGS hsrr=0
+ RFI_TO_KERNEL
- andi. r10,r12,MSR_RI /* check for unrecoverable exception */
- beq- 2f
+#endif /* CONFIG_PPC_WATCHDOG */
-.machine push
-.machine "power4"
+/*
+ * An interrupt came in while soft-disabled. We set paca->irq_happened, then:
+ * - If it was a decrementer interrupt, we bump the dec to max and return.
+ * - If it was a doorbell we return immediately since doorbells are edge
+ * triggered and won't automatically refire.
+ * - If it was a HMI we return immediately since we handled it in realmode
+ * and it won't refire.
+ * - Else it is one of PACA_IRQ_MUST_HARD_MASK, so hard disable and return.
+ * This is called with r10 containing the value to OR to the paca field.
+ */
+.macro MASKED_INTERRUPT hsrr=0
+ .if \hsrr
+masked_Hinterrupt:
+ .else
+masked_interrupt:
+ .endif
+ stw r9,PACA_EXGEN+EX_CCR(r13)
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
+ /*
+ * Ensure there was no previous MUST_HARD_MASK interrupt or
+ * HARD_DIS setting. If this does fire, the interrupt is still
+ * masked and MSR[EE] will be cleared on return, so no need to
+ * panic, but somebody probably enabled MSR[EE] under
+ * PACA_IRQ_HARD_DIS, mtmsr(mfmsr() | MSR_x) being a common
+ * cause.
+ */
+ lbz r9,PACAIRQHAPPENED(r13)
+ andi. r9,r9,(PACA_IRQ_MUST_HARD_MASK|PACA_IRQ_HARD_DIS)
+0: tdnei r9,0
+ EMIT_WARN_ENTRY 0b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+#endif
+ lbz r9,PACAIRQHAPPENED(r13)
+ or r9,r9,r10
+ stb r9,PACAIRQHAPPENED(r13)
+
+ .if ! \hsrr
+ cmpwi r10,PACA_IRQ_DEC
+ bne 1f
+ LOAD_REG_IMMEDIATE(r9, 0x7fffffff)
+ mtspr SPRN_DEC,r9
+#ifdef CONFIG_PPC_WATCHDOG
+ lwz r9,PACA_EXGEN+EX_CCR(r13)
+ b soft_nmi_common
+#else
+ b 2f
+#endif
+ .endif
+
+1: andi. r10,r10,PACA_IRQ_MUST_HARD_MASK
+ beq 2f
+ xori r12,r12,MSR_EE /* clear MSR_EE */
+ .if \hsrr
+ mtspr SPRN_HSRR1,r12
+ .else
+ mtspr SPRN_SRR1,r12
+ .endif
+ ori r9,r9,PACA_IRQ_HARD_DIS
+ stb r9,PACAIRQHAPPENED(r13)
+2: /* done */
+ li r9,0
+ .if \hsrr
+ stb r9,PACAHSRR_VALID(r13)
+ .else
+ stb r9,PACASRR_VALID(r13)
+ .endif
+
+ SEARCH_RESTART_TABLE
+ cmpdi r12,0
+ beq 3f
+ .if \hsrr
+ mtspr SPRN_HSRR0,r12
+ .else
+ mtspr SPRN_SRR0,r12
+ .endif
+3:
+
+ ld r9,PACA_EXGEN+EX_CTR(r13)
+ mtctr r9
+ lwz r9,PACA_EXGEN+EX_CCR(r13)
mtcrf 0x80,r9
- mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
-.machine pop
-
- RESTORE_PPR_PACA(PACA_EXSLB, r9)
- ld r9,PACA_EXSLB+EX_R9(r13)
- ld r10,PACA_EXSLB+EX_R10(r13)
- ld r11,PACA_EXSLB+EX_R11(r13)
- ld r12,PACA_EXSLB+EX_R12(r13)
- ld r13,PACA_EXSLB+EX_R13(r13)
- rfid
- b . /* prevent speculative execution */
-
-2: mfspr r11,SPRN_SRR0
- ld r10,PACAKBASE(r13)
- LOAD_HANDLER(r10,unrecov_slb)
- mtspr SPRN_SRR0,r10
- ld r10,PACAKMSR(r13)
- mtspr SPRN_SRR1,r10
- rfid
+ std r1,PACAR1(r13)
+ ld r9,PACA_EXGEN+EX_R9(r13)
+ ld r10,PACA_EXGEN+EX_R10(r13)
+ ld r11,PACA_EXGEN+EX_R11(r13)
+ ld r12,PACA_EXGEN+EX_R12(r13)
+ ld r13,PACA_EXGEN+EX_R13(r13)
+ /* May return to masked low address where r13 is not set up */
+ .if \hsrr
+ HRFI_TO_KERNEL
+ .else
+ RFI_TO_KERNEL
+ .endif
b .
+.endm
-unrecov_slb:
- EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)
- RECONCILE_IRQ_STATE(r10, r11)
- bl save_nvgprs
-1: addi r3,r1,STACK_FRAME_OVERHEAD
- bl unrecoverable_exception
- b 1b
-
-
-#ifdef CONFIG_PPC_970_NAP
-power4_fixup_nap:
- andc r9,r9,r10
- std r9,TI_LOCAL_FLAGS(r11)
- ld r10,_LINK(r1) /* make idle task do the */
- std r10,_NIP(r1) /* equivalent of a blr */
+TRAMP_REAL_BEGIN(stf_barrier_fallback)
+ std r9,PACA_EXRFI+EX_R9(r13)
+ std r10,PACA_EXRFI+EX_R10(r13)
+ sync
+ ld r9,PACA_EXRFI+EX_R9(r13)
+ ld r10,PACA_EXRFI+EX_R10(r13)
+ ori 31,31,0
+ .rept 14
+ b 1f
+1:
+ .endr
+ blr
+
+/* Clobbers r10, r11, ctr */
+.macro L1D_DISPLACEMENT_FLUSH
+ ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
+ ld r11,PACA_L1D_FLUSH_SIZE(r13)
+ srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
+ mtctr r11
+ DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
+
+ /* order ld/st prior to dcbt stop all streams with flushing */
+ sync
+
+ /*
+ * The load addresses are at staggered offsets within cachelines,
+ * which suits some pipelines better (on others it should not
+ * hurt).
+ */
+1:
+ ld r11,(0x80 + 8)*0(r10)
+ ld r11,(0x80 + 8)*1(r10)
+ ld r11,(0x80 + 8)*2(r10)
+ ld r11,(0x80 + 8)*3(r10)
+ ld r11,(0x80 + 8)*4(r10)
+ ld r11,(0x80 + 8)*5(r10)
+ ld r11,(0x80 + 8)*6(r10)
+ ld r11,(0x80 + 8)*7(r10)
+ addi r10,r10,0x80*8
+ bdnz 1b
+.endm
+
+TRAMP_REAL_BEGIN(entry_flush_fallback)
+ std r9,PACA_EXRFI+EX_R9(r13)
+ std r10,PACA_EXRFI+EX_R10(r13)
+ std r11,PACA_EXRFI+EX_R11(r13)
+ mfctr r9
+ L1D_DISPLACEMENT_FLUSH
+ mtctr r9
+ ld r9,PACA_EXRFI+EX_R9(r13)
+ ld r10,PACA_EXRFI+EX_R10(r13)
+ ld r11,PACA_EXRFI+EX_R11(r13)
blr
-#endif
/*
- * Hash table stuff
+ * The SCV entry flush happens with interrupts enabled, so it must disable
+ * to prevent EXRFI being clobbered by NMIs (e.g., soft_nmi_common). r10
+ * (containing LR) does not need to be preserved here because scv entry
+ * puts 0 in the pt_regs, CTR can be clobbered for the same reason.
*/
- .align 7
-do_hash_page:
- std r3,_DAR(r1)
- std r4,_DSISR(r1)
-
- andis. r0,r4,0xa410 /* weird error? */
- bne- handle_page_fault /* if not, try to insert a HPTE */
- andis. r0,r4,DSISR_DABRMATCH@h
- bne- handle_dabr_fault
- CURRENT_THREAD_INFO(r11, r1)
- lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */
- andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */
- bne 77f /* then don't call hash_page now */
+TRAMP_REAL_BEGIN(scv_entry_flush_fallback)
+ li r10,0
+ mtmsrd r10,1
+ lbz r10,PACAIRQHAPPENED(r13)
+ ori r10,r10,PACA_IRQ_HARD_DIS
+ stb r10,PACAIRQHAPPENED(r13)
+ std r11,PACA_EXRFI+EX_R11(r13)
+ L1D_DISPLACEMENT_FLUSH
+ ld r11,PACA_EXRFI+EX_R11(r13)
+ li r10,MSR_RI
+ mtmsrd r10,1
+ blr
+
+TRAMP_REAL_BEGIN(rfi_flush_fallback)
+ SET_SCRATCH0(r13);
+ GET_PACA(r13);
+ std r1,PACA_EXRFI+EX_R12(r13)
+ ld r1,PACAKSAVE(r13)
+ std r9,PACA_EXRFI+EX_R9(r13)
+ std r10,PACA_EXRFI+EX_R10(r13)
+ std r11,PACA_EXRFI+EX_R11(r13)
+ mfctr r9
+ L1D_DISPLACEMENT_FLUSH
+ mtctr r9
+ ld r9,PACA_EXRFI+EX_R9(r13)
+ ld r10,PACA_EXRFI+EX_R10(r13)
+ ld r11,PACA_EXRFI+EX_R11(r13)
+ ld r1,PACA_EXRFI+EX_R12(r13)
+ GET_SCRATCH0(r13);
+ rfid
+
+TRAMP_REAL_BEGIN(hrfi_flush_fallback)
+ SET_SCRATCH0(r13);
+ GET_PACA(r13);
+ std r1,PACA_EXRFI+EX_R12(r13)
+ ld r1,PACAKSAVE(r13)
+ std r9,PACA_EXRFI+EX_R9(r13)
+ std r10,PACA_EXRFI+EX_R10(r13)
+ std r11,PACA_EXRFI+EX_R11(r13)
+ mfctr r9
+ L1D_DISPLACEMENT_FLUSH
+ mtctr r9
+ ld r9,PACA_EXRFI+EX_R9(r13)
+ ld r10,PACA_EXRFI+EX_R10(r13)
+ ld r11,PACA_EXRFI+EX_R11(r13)
+ ld r1,PACA_EXRFI+EX_R12(r13)
+ GET_SCRATCH0(r13);
+ hrfid
+
+TRAMP_REAL_BEGIN(rfscv_flush_fallback)
+ /* system call volatile */
+ mr r7,r13
+ GET_PACA(r13);
+ mr r8,r1
+ ld r1,PACAKSAVE(r13)
+ mfctr r9
+ ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
+ ld r11,PACA_L1D_FLUSH_SIZE(r13)
+ srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
+ mtctr r11
+ DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
+
+ /* order ld/st prior to dcbt stop all streams with flushing */
+ sync
+
/*
- * We need to set the _PAGE_USER bit if MSR_PR is set or if we are
- * accessing a userspace segment (even from the kernel). We assume
- * kernel addresses always have the high bit set.
+ * The load adresses are at staggered offsets within cachelines,
+ * which suits some pipelines better (on others it should not
+ * hurt).
*/
- rlwinm r4,r4,32-25+9,31-9,31-9 /* DSISR_STORE -> _PAGE_RW */
- rotldi r0,r3,15 /* Move high bit into MSR_PR posn */
- orc r0,r12,r0 /* MSR_PR | ~high_bit */
- rlwimi r4,r0,32-13,30,30 /* becomes _PAGE_USER access bit */
- ori r4,r4,1 /* add _PAGE_PRESENT */
- rlwimi r4,r5,22+2,31-2,31-2 /* Set _PAGE_EXEC if trap is 0x400 */
+1:
+ ld r11,(0x80 + 8)*0(r10)
+ ld r11,(0x80 + 8)*1(r10)
+ ld r11,(0x80 + 8)*2(r10)
+ ld r11,(0x80 + 8)*3(r10)
+ ld r11,(0x80 + 8)*4(r10)
+ ld r11,(0x80 + 8)*5(r10)
+ ld r11,(0x80 + 8)*6(r10)
+ ld r11,(0x80 + 8)*7(r10)
+ addi r10,r10,0x80*8
+ bdnz 1b
+ mtctr r9
+ li r9,0
+ li r10,0
+ li r11,0
+ mr r1,r8
+ mr r13,r7
+ RFSCV
+
+USE_TEXT_SECTION()
+
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+kvm_interrupt:
/*
- * r3 contains the faulting address
- * r4 contains the required access permissions
- * r5 contains the trap number
- *
- * at return r3 = 0 for success, 1 for page fault, negative for error
+ * The conditional branch in KVMTEST can't reach all the way,
+ * make a stub.
*/
- bl hash_page /* build HPTE if possible */
- cmpdi r3,0 /* see if hash_page succeeded */
+ b kvmppc_interrupt
+#endif
- /* Success */
- beq fast_exc_return_irq /* Return from exception on success */
+_GLOBAL(do_uaccess_flush)
+ UACCESS_FLUSH_FIXUP_SECTION
+ nop
+ nop
+ nop
+ blr
+ L1D_DISPLACEMENT_FLUSH
+ blr
+_ASM_NOKPROBE_SYMBOL(do_uaccess_flush)
+EXPORT_SYMBOL(do_uaccess_flush)
- /* Error */
- blt- 13f
-/* Here we have a page fault that hash_page can't handle. */
-handle_page_fault:
-11: ld r4,_DAR(r1)
- ld r5,_DSISR(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl do_page_fault
- cmpdi r3,0
- beq+ 12f
- bl save_nvgprs
- mr r5,r3
- addi r3,r1,STACK_FRAME_OVERHEAD
- lwz r4,_DAR(r1)
- bl bad_page_fault
- b ret_from_except
-
-/* We have a data breakpoint exception - handle it */
-handle_dabr_fault:
- bl save_nvgprs
- ld r4,_DAR(r1)
- ld r5,_DSISR(r1)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl do_break
-12: b ret_from_except_lite
-
-
-/* We have a page fault that hash_page could handle but HV refused
- * the PTE insertion
- */
-13: bl save_nvgprs
- mr r5,r3
- addi r3,r1,STACK_FRAME_OVERHEAD
- ld r4,_DAR(r1)
- bl low_hash_fault
- b ret_from_except
+MASKED_INTERRUPT
+MASKED_INTERRUPT hsrr=1
-/*
- * We come here as a result of a DSI at a point where we don't want
- * to call hash_page, such as when we are accessing memory (possibly
- * user memory) inside a PMU interrupt that occurred while interrupts
- * were soft-disabled. We want to invoke the exception handler for
- * the access, or panic if there isn't a handler.
- */
-77: bl save_nvgprs
- mr r4,r3
- addi r3,r1,STACK_FRAME_OVERHEAD
- li r5,SIGSEGV
- bl bad_page_fault
- b ret_from_except
+USE_FIXED_SECTION(virt_trampolines)
+ /*
+ * All code below __end_soft_masked is treated as soft-masked. If
+ * any code runs here with MSR[EE]=1, it must then cope with pending
+ * soft interrupt being raised (i.e., by ensuring it is replayed).
+ *
+ * The __end_interrupts marker must be past the out-of-line (OOL)
+ * handlers, so that they are copied to real address 0x100 when running
+ * a relocatable kernel. This ensures they can be reached from the short
+ * trampoline handlers (like 0x4f00, 0x4f20, etc.) which branch
+ * directly, without using LOAD_HANDLER().
+ */
+ .align 7
+ .globl __end_interrupts
+__end_interrupts:
+DEFINE_FIXED_SYMBOL(__end_interrupts, virt_trampolines)
+
+CLOSE_FIXED_SECTION(real_vectors);
+CLOSE_FIXED_SECTION(real_trampolines);
+CLOSE_FIXED_SECTION(virt_vectors);
+CLOSE_FIXED_SECTION(virt_trampolines);
+
+USE_TEXT_SECTION()
+
+/* MSR[RI] should be clear because this uses SRR[01] */
+_GLOBAL(enable_machine_check)
+ mflr r0
+ bcl 20,31,$+4
+0: mflr r3
+ addi r3,r3,(1f - 0b)
+ mtspr SPRN_SRR0,r3
+ mfmsr r3
+ ori r3,r3,MSR_ME
+ mtspr SPRN_SRR1,r3
+ RFI_TO_KERNEL
+1: mtlr r0
+ blr
-/*
- * Here we have detected that the kernel stack pointer is bad.
- * R9 contains the saved CR, r13 points to the paca,
- * r10 contains the (bad) kernel stack pointer,
- * r11 and r12 contain the saved SRR0 and SRR1.
- * We switch to using an emergency stack, save the registers there,
- * and call kernel_bad_stack(), which panics.
- */
-bad_stack:
- ld r1,PACAEMERGSP(r13)
- subi r1,r1,64+INT_FRAME_SIZE
- std r9,_CCR(r1)
- std r10,GPR1(r1)
- std r11,_NIP(r1)
- std r12,_MSR(r1)
- mfspr r11,SPRN_DAR
- mfspr r12,SPRN_DSISR
- std r11,_DAR(r1)
- std r12,_DSISR(r1)
- mflr r10
- mfctr r11
- mfxer r12
- std r10,_LINK(r1)
- std r11,_CTR(r1)
- std r12,_XER(r1)
- SAVE_GPR(0,r1)
- SAVE_GPR(2,r1)
- ld r10,EX_R3(r3)
- std r10,GPR3(r1)
- SAVE_GPR(4,r1)
- SAVE_4GPRS(5,r1)
- ld r9,EX_R9(r3)
- ld r10,EX_R10(r3)
- SAVE_2GPRS(9,r1)
- ld r9,EX_R11(r3)
- ld r10,EX_R12(r3)
- ld r11,EX_R13(r3)
- std r9,GPR11(r1)
- std r10,GPR12(r1)
- std r11,GPR13(r1)
-BEGIN_FTR_SECTION
- ld r10,EX_CFAR(r3)
- std r10,ORIG_GPR3(r1)
-END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
- SAVE_8GPRS(14,r1)
- SAVE_10GPRS(22,r1)
- lhz r12,PACA_TRAP_SAVE(r13)
- std r12,_TRAP(r1)
- addi r11,r1,INT_FRAME_SIZE
- std r11,0(r1)
- li r12,0
- std r12,0(r11)
- ld r2,PACATOC(r13)
- ld r11,exception_marker@toc(r2)
- std r12,RESULT(r1)
- std r11,STACK_FRAME_OVERHEAD-16(r1)
-1: addi r3,r1,STACK_FRAME_OVERHEAD
- bl kernel_bad_stack
- b 1b
+/* MSR[RI] should be clear because this uses SRR[01] */
+SYM_FUNC_START_LOCAL(disable_machine_check)
+ mflr r0
+ bcl 20,31,$+4
+0: mflr r3
+ addi r3,r3,(1f - 0b)
+ mtspr SPRN_SRR0,r3
+ mfmsr r3
+ li r4,MSR_ME
+ andc r3,r3,r4
+ mtspr SPRN_SRR1,r3
+ RFI_TO_KERNEL
+1: mtlr r0
+ blr
+SYM_FUNC_END(disable_machine_check)
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 742694c1d852..5782e743fd27 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Firmware Assisted dump: A robust mechanism to get reliable kernel crash
* dump with assistance from firmware. This approach does not use kexec,
@@ -6,20 +7,6 @@
* from phyp assisted dump implementation written by Linas Vepstas and
* Manish Ahuja
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
* Copyright 2011 IBM Corporation
* Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
*/
@@ -30,87 +17,217 @@
#include <linux/string.h>
#include <linux/memblock.h>
#include <linux/delay.h>
-#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include <linux/crash_dump.h>
#include <linux/kobject.h>
#include <linux/sysfs.h>
+#include <linux/slab.h>
+#include <linux/cma.h>
+#include <linux/hugetlb.h>
+#include <linux/debugfs.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
#include <asm/page.h>
-#include <asm/prom.h>
-#include <asm/rtas.h>
#include <asm/fadump.h>
-#include <asm/debug.h>
+#include <asm/fadump-internal.h>
#include <asm/setup.h>
+#include <asm/interrupt.h>
+#include <asm/prom.h>
+
+/*
+ * The CPU who acquired the lock to trigger the fadump crash should
+ * wait for other CPUs to enter.
+ *
+ * The timeout is in milliseconds.
+ */
+#define CRASH_TIMEOUT 500
static struct fw_dump fw_dump;
-static struct fadump_mem_struct fdm;
-static const struct fadump_mem_struct *fdm_active;
+static void __init fadump_reserve_crash_area(u64 base);
+
+#ifndef CONFIG_PRESERVE_FA_DUMP
+
+static struct kobject *fadump_kobj;
+
+static atomic_t cpus_in_fadump;
static DEFINE_MUTEX(fadump_mutex);
-struct fad_crash_memory_ranges crash_memory_ranges[INIT_CRASHMEM_RANGES];
-int crash_mem_ranges;
-/* Scan the Firmware Assisted dump configuration details. */
-int __init early_init_dt_scan_fw_dump(unsigned long node,
- const char *uname, int depth, void *data)
+#define RESERVED_RNGS_SZ 16384 /* 16K - 128 entries */
+#define RESERVED_RNGS_CNT (RESERVED_RNGS_SZ / \
+ sizeof(struct fadump_memory_range))
+static struct fadump_memory_range rngs[RESERVED_RNGS_CNT];
+static struct fadump_mrange_info
+reserved_mrange_info = { "reserved", rngs, RESERVED_RNGS_SZ, 0, RESERVED_RNGS_CNT, true };
+
+static void __init early_init_dt_scan_reserved_ranges(unsigned long node);
+
+#ifdef CONFIG_CMA
+static struct cma *fadump_cma;
+
+/*
+ * fadump_cma_init() - Initialize CMA area from a fadump reserved memory
+ *
+ * This function initializes CMA area from fadump reserved memory.
+ * The total size of fadump reserved memory covers for boot memory size
+ * + cpu data size + hpte size and metadata.
+ * Initialize only the area equivalent to boot memory size for CMA use.
+ * The remaining portion of fadump reserved memory will be not given
+ * to CMA and pages for those will stay reserved. boot memory size is
+ * aligned per CMA requirement to satisy cma_init_reserved_mem() call.
+ * But for some reason even if it fails we still have the memory reservation
+ * with us and we can still continue doing fadump.
+ */
+void __init fadump_cma_init(void)
{
- const __be32 *sections;
- int i, num_sections;
- int size;
- const int *token;
+ unsigned long long base, size, end;
+ int rc;
- if (depth != 1 || strcmp(uname, "rtas") != 0)
- return 0;
+ if (!fw_dump.fadump_supported || !fw_dump.fadump_enabled ||
+ fw_dump.dump_active)
+ return;
+ /*
+ * Do not use CMA if user has provided fadump=nocma kernel parameter.
+ */
+ if (fw_dump.nocma || !fw_dump.boot_memory_size)
+ return;
/*
- * Check if Firmware Assisted dump is supported. if yes, check
- * if dump has been initiated on last reboot.
+ * [base, end) should be reserved during early init in
+ * fadump_reserve_mem(). No need to check this here as
+ * cma_init_reserved_mem() already checks for overlap.
+ * Here we give the aligned chunk of this reserved memory to CMA.
*/
- token = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL);
- if (!token)
- return 1;
+ base = fw_dump.reserve_dump_area_start;
+ size = fw_dump.boot_memory_size;
+ end = base + size;
- fw_dump.fadump_supported = 1;
- fw_dump.ibm_configure_kernel_dump = *token;
+ base = ALIGN(base, CMA_MIN_ALIGNMENT_BYTES);
+ end = ALIGN_DOWN(end, CMA_MIN_ALIGNMENT_BYTES);
+ size = end - base;
+
+ if (end <= base) {
+ pr_warn("%s: Too less memory to give to CMA\n", __func__);
+ return;
+ }
+
+ rc = cma_init_reserved_mem(base, size, 0, "fadump_cma", &fadump_cma);
+ if (rc) {
+ pr_err("Failed to init cma area for firmware-assisted dump,%d\n", rc);
+ /*
+ * Though the CMA init has failed we still have memory
+ * reservation with us. The reserved memory will be
+ * blocked from production system usage. Hence return 1,
+ * so that we can continue with fadump.
+ */
+ return;
+ }
/*
- * The 'ibm,kernel-dump' rtas node is present only if there is
- * dump data waiting for us.
+ * If CMA activation fails, keep the pages reserved, instead of
+ * exposing them to buddy allocator. Same as 'fadump=nocma' case.
*/
- fdm_active = of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL);
- if (fdm_active)
- fw_dump.dump_active = 1;
-
- /* Get the sizes required to store dump data for the firmware provided
- * dump sections.
- * For each dump section type supported, a 32bit cell which defines
- * the ID of a supported section followed by two 32 bit cells which
- * gives teh size of the section in bytes.
- */
- sections = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes",
- &size);
+ cma_reserve_pages_on_error(fadump_cma);
- if (!sections)
- return 1;
+ /*
+ * So we now have successfully initialized cma area for fadump.
+ */
+ pr_info("Initialized [0x%llx, %luMB] cma area from [0x%lx, %luMB] "
+ "bytes of memory reserved for firmware-assisted dump\n",
+ cma_get_base(fadump_cma), cma_get_size(fadump_cma) >> 20,
+ fw_dump.reserve_dump_area_start,
+ fw_dump.boot_memory_size >> 20);
+ return;
+}
+#endif /* CONFIG_CMA */
- num_sections = size / (3 * sizeof(u32));
+/*
+ * Additional parameters meant for capture kernel are placed in a dedicated area.
+ * If this is capture kernel boot, append these parameters to bootargs.
+ */
+void __init fadump_append_bootargs(void)
+{
+ char *append_args;
+ size_t len;
- for (i = 0; i < num_sections; i++, sections += 3) {
- u32 type = (u32)of_read_number(sections, 1);
+ if (!fw_dump.dump_active || !fw_dump.param_area_supported || !fw_dump.param_area)
+ return;
- switch (type) {
- case FADUMP_CPU_STATE_DATA:
- fw_dump.cpu_state_data_size =
- of_read_ulong(&sections[1], 2);
- break;
- case FADUMP_HPTE_REGION:
- fw_dump.hpte_region_size =
- of_read_ulong(&sections[1], 2);
- break;
+ if (fw_dump.param_area < fw_dump.boot_mem_top) {
+ if (memblock_reserve(fw_dump.param_area, COMMAND_LINE_SIZE)) {
+ pr_warn("WARNING: Can't use additional parameters area!\n");
+ fw_dump.param_area = 0;
+ return;
}
}
+ append_args = (char *)fw_dump.param_area;
+ len = strlen(boot_command_line);
+
+ /*
+ * Too late to fail even if cmdline size exceeds. Truncate additional parameters
+ * to cmdline size and proceed anyway.
+ */
+ if (len + strlen(append_args) >= COMMAND_LINE_SIZE - 1)
+ pr_warn("WARNING: Appending parameters exceeds cmdline size. Truncating!\n");
+
+ pr_debug("Cmdline: %s\n", boot_command_line);
+ snprintf(boot_command_line + len, COMMAND_LINE_SIZE - len, " %s", append_args);
+ pr_info("Updated cmdline: %s\n", boot_command_line);
+}
+
+/* Scan the Firmware Assisted dump configuration details. */
+int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
+ int depth, void *data)
+{
+ if (depth == 0) {
+ early_init_dt_scan_reserved_ranges(node);
+ return 0;
+ }
+
+ if (depth != 1)
+ return 0;
+
+ if (strcmp(uname, "rtas") == 0) {
+ rtas_fadump_dt_scan(&fw_dump, node);
+ return 1;
+ }
+
+ if (strcmp(uname, "ibm,opal") == 0) {
+ opal_fadump_dt_scan(&fw_dump, node);
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * If fadump is registered, check if the memory provided
+ * falls within boot memory area and reserved memory area.
+ */
+int is_fadump_memory_area(u64 addr, unsigned long size)
+{
+ u64 d_start, d_end;
+
+ if (!fw_dump.dump_registered)
+ return 0;
+
+ if (!size)
+ return 0;
+
+ d_start = fw_dump.reserve_dump_area_start;
+ d_end = d_start + fw_dump.reserve_dump_area_size;
+ if (((addr + size) > d_start) && (addr <= d_end))
+ return 1;
+
+ return (addr <= fw_dump.boot_mem_top);
+}
+
+int should_fadump_crash(void)
+{
+ if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr)
+ return 0;
return 1;
}
@@ -119,78 +236,72 @@ int is_fadump_active(void)
return fw_dump.dump_active;
}
+/*
+ * Returns true, if there are no holes in memory area between d_start to d_end,
+ * false otherwise.
+ */
+static bool is_fadump_mem_area_contiguous(u64 d_start, u64 d_end)
+{
+ phys_addr_t reg_start, reg_end;
+ bool ret = false;
+ u64 i, start, end;
+
+ for_each_mem_range(i, &reg_start, &reg_end) {
+ start = max_t(u64, d_start, reg_start);
+ end = min_t(u64, d_end, reg_end);
+ if (d_start < end) {
+ /* Memory hole from d_start to start */
+ if (start > d_start)
+ break;
+
+ if (end == d_end) {
+ ret = true;
+ break;
+ }
+
+ d_start = end + 1;
+ }
+ }
+
+ return ret;
+}
+
+/*
+ * Returns true, if there are no holes in reserved memory area,
+ * false otherwise.
+ */
+bool is_fadump_reserved_mem_contiguous(void)
+{
+ u64 d_start, d_end;
+
+ d_start = fw_dump.reserve_dump_area_start;
+ d_end = d_start + fw_dump.reserve_dump_area_size;
+ return is_fadump_mem_area_contiguous(d_start, d_end);
+}
+
/* Print firmware assisted dump configurations for debugging purpose. */
-static void fadump_show_config(void)
+static void __init fadump_show_config(void)
{
+ int i;
+
pr_debug("Support for firmware-assisted dump (fadump): %s\n",
(fw_dump.fadump_supported ? "present" : "no support"));
if (!fw_dump.fadump_supported)
return;
- pr_debug("Fadump enabled : %s\n",
- (fw_dump.fadump_enabled ? "yes" : "no"));
- pr_debug("Dump Active : %s\n",
- (fw_dump.dump_active ? "yes" : "no"));
+ pr_debug("Fadump enabled : %s\n", str_yes_no(fw_dump.fadump_enabled));
+ pr_debug("Dump Active : %s\n", str_yes_no(fw_dump.dump_active));
pr_debug("Dump section sizes:\n");
pr_debug(" CPU state data size: %lx\n", fw_dump.cpu_state_data_size);
pr_debug(" HPTE region size : %lx\n", fw_dump.hpte_region_size);
- pr_debug("Boot memory size : %lx\n", fw_dump.boot_memory_size);
-}
-
-static unsigned long init_fadump_mem_struct(struct fadump_mem_struct *fdm,
- unsigned long addr)
-{
- if (!fdm)
- return 0;
-
- memset(fdm, 0, sizeof(struct fadump_mem_struct));
- addr = addr & PAGE_MASK;
-
- fdm->header.dump_format_version = 0x00000001;
- fdm->header.dump_num_sections = 3;
- fdm->header.dump_status_flag = 0;
- fdm->header.offset_first_dump_section =
- (u32)offsetof(struct fadump_mem_struct, cpu_state_data);
-
- /*
- * Fields for disk dump option.
- * We are not using disk dump option, hence set these fields to 0.
- */
- fdm->header.dd_block_size = 0;
- fdm->header.dd_block_offset = 0;
- fdm->header.dd_num_blocks = 0;
- fdm->header.dd_offset_disk_path = 0;
-
- /* set 0 to disable an automatic dump-reboot. */
- fdm->header.max_time_auto = 0;
-
- /* Kernel dump sections */
- /* cpu state data section. */
- fdm->cpu_state_data.request_flag = FADUMP_REQUEST_FLAG;
- fdm->cpu_state_data.source_data_type = FADUMP_CPU_STATE_DATA;
- fdm->cpu_state_data.source_address = 0;
- fdm->cpu_state_data.source_len = fw_dump.cpu_state_data_size;
- fdm->cpu_state_data.destination_address = addr;
- addr += fw_dump.cpu_state_data_size;
-
- /* hpte region section */
- fdm->hpte_region.request_flag = FADUMP_REQUEST_FLAG;
- fdm->hpte_region.source_data_type = FADUMP_HPTE_REGION;
- fdm->hpte_region.source_address = 0;
- fdm->hpte_region.source_len = fw_dump.hpte_region_size;
- fdm->hpte_region.destination_address = addr;
- addr += fw_dump.hpte_region_size;
-
- /* RMA region section */
- fdm->rmr_region.request_flag = FADUMP_REQUEST_FLAG;
- fdm->rmr_region.source_data_type = FADUMP_REAL_MODE_REGION;
- fdm->rmr_region.source_address = RMA_START;
- fdm->rmr_region.source_len = fw_dump.boot_memory_size;
- fdm->rmr_region.destination_address = addr;
- addr += fw_dump.boot_memory_size;
-
- return addr;
+ pr_debug(" Boot memory size : %lx\n", fw_dump.boot_memory_size);
+ pr_debug(" Boot memory top : %llx\n", fw_dump.boot_mem_top);
+ pr_debug("Boot memory regions cnt: %llx\n", fw_dump.boot_mem_regs_cnt);
+ for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
+ pr_debug("[%03d] base = %llx, size = %llx\n", i,
+ fw_dump.boot_mem_addr[i], fw_dump.boot_mem_sz[i]);
+ }
}
/**
@@ -208,19 +319,51 @@ static unsigned long init_fadump_mem_struct(struct fadump_mem_struct *fdm,
* that is required for a kernel to boot successfully.
*
*/
-static inline unsigned long fadump_calculate_reserve_size(void)
+static __init u64 fadump_calculate_reserve_size(void)
{
- unsigned long size;
+ u64 base, size, bootmem_min;
+ int ret;
+
+ if (fw_dump.reserve_bootvar)
+ pr_warn("'fadump_reserve_mem=' parameter is deprecated in favor of 'crashkernel=' parameter.\n");
/*
- * Check if the size is specified through fadump_reserve_mem= cmdline
- * option. If yes, then use that.
+ * Check if the size is specified through crashkernel= cmdline
+ * option. If yes, then use that but ignore base as fadump reserves
+ * memory at a predefined offset.
*/
- if (fw_dump.reserve_bootvar)
+ ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
+ &size, &base, NULL, NULL, NULL);
+ if (ret == 0 && size > 0) {
+ unsigned long max_size;
+
+ if (fw_dump.reserve_bootvar)
+ pr_info("Using 'crashkernel=' parameter for memory reservation.\n");
+
+ fw_dump.reserve_bootvar = (unsigned long)size;
+
+ /*
+ * Adjust if the boot memory size specified is above
+ * the upper limit.
+ */
+ max_size = memblock_phys_mem_size() / MAX_BOOT_MEM_RATIO;
+ if (fw_dump.reserve_bootvar > max_size) {
+ fw_dump.reserve_bootvar = max_size;
+ pr_info("Adjusted boot memory size to %luMB\n",
+ (fw_dump.reserve_bootvar >> 20));
+ }
+
return fw_dump.reserve_bootvar;
+ } else if (fw_dump.reserve_bootvar) {
+ /*
+ * 'fadump_reserve_mem=' is being used to reserve memory
+ * for firmware-assisted dump.
+ */
+ return fw_dump.reserve_bootvar;
+ }
/* divide by 20 to get 5% of value */
- size = memblock_end_of_DRAM() / 20;
+ size = memblock_phys_mem_size() / 20;
/* round it down in multiples of 256 */
size = size & ~0x0FFFFFFFUL;
@@ -229,108 +372,280 @@ static inline unsigned long fadump_calculate_reserve_size(void)
if (memory_limit && size > memory_limit)
size = memory_limit;
- return (size > MIN_BOOT_MEM ? size : MIN_BOOT_MEM);
+ bootmem_min = fw_dump.ops->fadump_get_bootmem_min();
+ return (size > bootmem_min ? size : bootmem_min);
}
/*
* Calculate the total memory size required to be reserved for
* firmware-assisted dump registration.
*/
-static unsigned long get_fadump_area_size(void)
+static unsigned long __init get_fadump_area_size(void)
{
unsigned long size = 0;
size += fw_dump.cpu_state_data_size;
size += fw_dump.hpte_region_size;
+ /*
+ * Account for pagesize alignment of boot memory area destination address.
+ * This faciliates in mmap reading of first kernel's memory.
+ */
+ size = PAGE_ALIGN(size);
size += fw_dump.boot_memory_size;
size += sizeof(struct fadump_crash_info_header);
- size += sizeof(struct elfhdr); /* ELF core header.*/
- size += sizeof(struct elf_phdr); /* place holder for cpu notes */
- /* Program headers for crash memory regions. */
- size += sizeof(struct elf_phdr) * (memblock_num_regions(memory) + 2);
- size = PAGE_ALIGN(size);
+ /* This is to hold kernel metadata on platforms that support it */
+ size += (fw_dump.ops->fadump_get_metadata_size ?
+ fw_dump.ops->fadump_get_metadata_size() : 0);
return size;
}
+static int __init add_boot_mem_region(unsigned long rstart,
+ unsigned long rsize)
+{
+ int max_boot_mem_rgns = fw_dump.ops->fadump_max_boot_mem_rgns();
+ int i = fw_dump.boot_mem_regs_cnt++;
+
+ if (fw_dump.boot_mem_regs_cnt > max_boot_mem_rgns) {
+ fw_dump.boot_mem_regs_cnt = max_boot_mem_rgns;
+ return 0;
+ }
+
+ pr_debug("Added boot memory range[%d] [%#016lx-%#016lx)\n",
+ i, rstart, (rstart + rsize));
+ fw_dump.boot_mem_addr[i] = rstart;
+ fw_dump.boot_mem_sz[i] = rsize;
+ return 1;
+}
+
+/*
+ * Firmware usually has a hard limit on the data it can copy per region.
+ * Honour that by splitting a memory range into multiple regions.
+ */
+static int __init add_boot_mem_regions(unsigned long mstart,
+ unsigned long msize)
+{
+ unsigned long rstart, rsize, max_size;
+ int ret = 1;
+
+ rstart = mstart;
+ max_size = fw_dump.max_copy_size ? fw_dump.max_copy_size : msize;
+ while (msize) {
+ if (msize > max_size)
+ rsize = max_size;
+ else
+ rsize = msize;
+
+ ret = add_boot_mem_region(rstart, rsize);
+ if (!ret)
+ break;
+
+ msize -= rsize;
+ rstart += rsize;
+ }
+
+ return ret;
+}
+
+static int __init fadump_get_boot_mem_regions(void)
+{
+ unsigned long size, cur_size, hole_size, last_end;
+ unsigned long mem_size = fw_dump.boot_memory_size;
+ phys_addr_t reg_start, reg_end;
+ int ret = 1;
+ u64 i;
+
+ fw_dump.boot_mem_regs_cnt = 0;
+
+ last_end = 0;
+ hole_size = 0;
+ cur_size = 0;
+ for_each_mem_range(i, &reg_start, &reg_end) {
+ size = reg_end - reg_start;
+ hole_size += (reg_start - last_end);
+
+ if ((cur_size + size) >= mem_size) {
+ size = (mem_size - cur_size);
+ ret = add_boot_mem_regions(reg_start, size);
+ break;
+ }
+
+ mem_size -= size;
+ cur_size += size;
+ ret = add_boot_mem_regions(reg_start, size);
+ if (!ret)
+ break;
+
+ last_end = reg_end;
+ }
+ fw_dump.boot_mem_top = PAGE_ALIGN(fw_dump.boot_memory_size + hole_size);
+
+ return ret;
+}
+
+/*
+ * Returns true, if the given range overlaps with reserved memory ranges
+ * starting at idx. Also, updates idx to index of overlapping memory range
+ * with the given memory range.
+ * False, otherwise.
+ */
+static bool __init overlaps_reserved_ranges(u64 base, u64 end, int *idx)
+{
+ bool ret = false;
+ int i;
+
+ for (i = *idx; i < reserved_mrange_info.mem_range_cnt; i++) {
+ u64 rbase = reserved_mrange_info.mem_ranges[i].base;
+ u64 rend = rbase + reserved_mrange_info.mem_ranges[i].size;
+
+ if (end <= rbase)
+ break;
+
+ if ((end > rbase) && (base < rend)) {
+ *idx = i;
+ ret = true;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+/*
+ * Locate a suitable memory area to reserve memory for FADump. While at it,
+ * lookup reserved-ranges & avoid overlap with them, as they are used by F/W.
+ */
+static u64 __init fadump_locate_reserve_mem(u64 base, u64 size)
+{
+ struct fadump_memory_range *mrngs;
+ phys_addr_t mstart, mend;
+ int idx = 0;
+ u64 i, ret = 0;
+
+ mrngs = reserved_mrange_info.mem_ranges;
+ for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE,
+ &mstart, &mend, NULL) {
+ pr_debug("%llu) mstart: %llx, mend: %llx, base: %llx\n",
+ i, mstart, mend, base);
+
+ if (mstart > base)
+ base = PAGE_ALIGN(mstart);
+
+ while ((mend > base) && ((mend - base) >= size)) {
+ if (!overlaps_reserved_ranges(base, base+size, &idx)) {
+ ret = base;
+ goto out;
+ }
+
+ base = mrngs[idx].base + mrngs[idx].size;
+ base = PAGE_ALIGN(base);
+ }
+ }
+
+out:
+ return ret;
+}
+
int __init fadump_reserve_mem(void)
{
- unsigned long base, size, memory_boundary;
+ u64 base, size, mem_boundary, bootmem_min;
+ int ret = 1;
if (!fw_dump.fadump_enabled)
return 0;
if (!fw_dump.fadump_supported) {
- printk(KERN_INFO "Firmware-assisted dump is not supported on"
- " this hardware\n");
- fw_dump.fadump_enabled = 0;
- return 0;
+ pr_info("Firmware-Assisted Dump is not supported on this hardware\n");
+ goto error_out;
}
+
/*
* Initialize boot memory size
* If dump is active then we have already calculated the size during
* first kernel.
*/
- if (fdm_active)
- fw_dump.boot_memory_size = fdm_active->rmr_region.source_len;
- else
- fw_dump.boot_memory_size = fadump_calculate_reserve_size();
+ if (!fw_dump.dump_active) {
+ fw_dump.boot_memory_size =
+ PAGE_ALIGN(fadump_calculate_reserve_size());
- /*
- * Calculate the memory boundary.
- * If memory_limit is less than actual memory boundary then reserve
- * the memory for fadump beyond the memory_limit and adjust the
- * memory_limit accordingly, so that the running kernel can run with
- * specified memory_limit.
- */
- if (memory_limit && memory_limit < memblock_end_of_DRAM()) {
- size = get_fadump_area_size();
- if ((memory_limit + size) < memblock_end_of_DRAM())
- memory_limit += size;
- else
- memory_limit = memblock_end_of_DRAM();
- printk(KERN_INFO "Adjusted memory_limit for firmware-assisted"
- " dump, now %#016llx\n", memory_limit);
+ bootmem_min = fw_dump.ops->fadump_get_bootmem_min();
+ if (fw_dump.boot_memory_size < bootmem_min) {
+ pr_err("Can't enable fadump with boot memory size (0x%lx) less than 0x%llx\n",
+ fw_dump.boot_memory_size, bootmem_min);
+ goto error_out;
+ }
+
+ if (!fadump_get_boot_mem_regions()) {
+ pr_err("Too many holes in boot memory area to enable fadump\n");
+ goto error_out;
+ }
}
+
if (memory_limit)
- memory_boundary = memory_limit;
+ mem_boundary = memory_limit;
else
- memory_boundary = memblock_end_of_DRAM();
+ mem_boundary = memblock_end_of_DRAM();
+ base = fw_dump.boot_mem_top;
+ size = get_fadump_area_size();
+ fw_dump.reserve_dump_area_size = size;
if (fw_dump.dump_active) {
- printk(KERN_INFO "Firmware-assisted dump is active.\n");
+ pr_info("Firmware-assisted dump is active.\n");
+
+#ifdef CONFIG_HUGETLB_PAGE
+ /*
+ * FADump capture kernel doesn't care much about hugepages.
+ * In fact, handling hugepages in capture kernel is asking for
+ * trouble. So, disable HugeTLB support when fadump is active.
+ */
+ hugetlb_disabled = true;
+#endif
/*
* If last boot has crashed then reserve all the memory
- * above boot_memory_size so that we don't touch it until
+ * above boot memory size so that we don't touch it until
* dump is written to disk by userspace tool. This memory
- * will be released for general use once the dump is saved.
+ * can be released for general use by invalidating fadump.
*/
- base = fw_dump.boot_memory_size;
- size = memory_boundary - base;
- memblock_reserve(base, size);
- printk(KERN_INFO "Reserved %ldMB of memory at %ldMB "
- "for saving crash dump\n",
- (unsigned long)(size >> 20),
- (unsigned long)(base >> 20));
-
- fw_dump.fadumphdr_addr =
- fdm_active->rmr_region.destination_address +
- fdm_active->rmr_region.source_len;
- pr_debug("fadumphdr_addr = %p\n",
- (void *) fw_dump.fadumphdr_addr);
+ fadump_reserve_crash_area(base);
+
+ pr_debug("fadumphdr_addr = %#016lx\n", fw_dump.fadumphdr_addr);
+ pr_debug("Reserve dump area start address: 0x%lx\n",
+ fw_dump.reserve_dump_area_start);
} else {
- /* Reserve the memory at the top of memory. */
- size = get_fadump_area_size();
- base = memory_boundary - size;
- memblock_reserve(base, size);
- printk(KERN_INFO "Reserved %ldMB of memory at %ldMB "
- "for firmware-assisted dump\n",
- (unsigned long)(size >> 20),
- (unsigned long)(base >> 20));
+ /*
+ * Reserve memory at an offset closer to bottom of the RAM to
+ * minimize the impact of memory hot-remove operation.
+ */
+ base = fadump_locate_reserve_mem(base, size);
+
+ if (!base || (base + size > mem_boundary)) {
+ pr_err("Failed to find memory chunk for reservation!\n");
+ goto error_out;
+ }
+ fw_dump.reserve_dump_area_start = base;
+
+ /*
+ * Calculate the kernel metadata address and register it with
+ * f/w if the platform supports.
+ */
+ if (fw_dump.ops->fadump_setup_metadata &&
+ (fw_dump.ops->fadump_setup_metadata(&fw_dump) < 0))
+ goto error_out;
+
+ if (memblock_reserve(base, size)) {
+ pr_err("Failed to reserve memory!\n");
+ goto error_out;
+ }
+
+ pr_info("Reserved %lldMB of memory at %#016llx (System RAM: %lldMB)\n",
+ (size >> 20), base, (memblock_phys_mem_size() >> 20));
}
- fw_dump.reserve_dump_area_start = base;
- fw_dump.reserve_dump_area_size = size;
- return 1;
+
+ return ret;
+error_out:
+ fw_dump.fadump_enabled = 0;
+ fw_dump.reserve_dump_area_size = 0;
+ return 0;
}
/* Look for fadump= cmdline option. */
@@ -343,12 +658,20 @@ static int __init early_fadump_param(char *p)
fw_dump.fadump_enabled = 1;
else if (strncmp(p, "off", 3) == 0)
fw_dump.fadump_enabled = 0;
+ else if (strncmp(p, "nocma", 5) == 0) {
+ fw_dump.fadump_enabled = 1;
+ fw_dump.nocma = 1;
+ }
return 0;
}
early_param("fadump", early_fadump_param);
-/* Look for fadump_reserve_mem= cmdline option */
+/*
+ * Look for fadump_reserve_mem= cmdline option
+ * TODO: Remove references to 'fadump_reserve_mem=' parameter,
+ * the sooner 'crashkernel=' parameter is accustomed to.
+ */
static int __init early_fadump_reserve_mem(char *p)
{
if (p)
@@ -357,56 +680,43 @@ static int __init early_fadump_reserve_mem(char *p)
}
early_param("fadump_reserve_mem", early_fadump_reserve_mem);
-static void register_fw_dump(struct fadump_mem_struct *fdm)
+void crash_fadump(struct pt_regs *regs, const char *str)
{
- int rc;
- unsigned int wait_time;
-
- pr_debug("Registering for firmware-assisted kernel dump...\n");
-
- /* TODO: Add upper time limit for the delay */
- do {
- rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
- FADUMP_REGISTER, fdm,
- sizeof(struct fadump_mem_struct));
-
- wait_time = rtas_busy_delay_time(rc);
- if (wait_time)
- mdelay(wait_time);
+ unsigned int msecs;
+ struct fadump_crash_info_header *fdh = NULL;
+ int old_cpu, this_cpu;
+ /* Do not include first CPU */
+ unsigned int ncpus = num_online_cpus() - 1;
- } while (wait_time);
+ if (!should_fadump_crash())
+ return;
- switch (rc) {
- case -1:
- printk(KERN_ERR "Failed to register firmware-assisted kernel"
- " dump. Hardware Error(%d).\n", rc);
- break;
- case -3:
- printk(KERN_ERR "Failed to register firmware-assisted kernel"
- " dump. Parameter Error(%d).\n", rc);
- break;
- case -9:
- printk(KERN_ERR "firmware-assisted kernel dump is already "
- " registered.");
- fw_dump.dump_registered = 1;
- break;
- case 0:
- printk(KERN_INFO "firmware-assisted kernel dump registration"
- " is successful\n");
- fw_dump.dump_registered = 1;
- break;
- }
-}
+ /*
+ * old_cpu == -1 means this is the first CPU which has come here,
+ * go ahead and trigger fadump.
+ *
+ * old_cpu != -1 means some other CPU has already on its way
+ * to trigger fadump, just keep looping here.
+ */
+ this_cpu = smp_processor_id();
+ old_cpu = cmpxchg(&crashing_cpu, -1, this_cpu);
-void crash_fadump(struct pt_regs *regs, const char *str)
-{
- struct fadump_crash_info_header *fdh = NULL;
+ if (old_cpu != -1) {
+ atomic_inc(&cpus_in_fadump);
- if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr)
+ /*
+ * We can't loop here indefinitely. Wait as long as fadump
+ * is in force. If we race with fadump un-registration this
+ * loop will break and then we go down to normal panic path
+ * and reboot. If fadump is in force the first crashing
+ * cpu will definitely trigger fadump.
+ */
+ while (fw_dump.dump_registered)
+ cpu_relax();
return;
+ }
fdh = __va(fw_dump.fadumphdr_addr);
- crashing_cpu = smp_processor_id();
fdh->crashing_cpu = crashing_cpu;
crash_save_vmcoreinfo();
@@ -415,101 +725,22 @@ void crash_fadump(struct pt_regs *regs, const char *str)
else
ppc_save_regs(&fdh->regs);
- fdh->cpu_online_mask = *cpu_online_mask;
-
- /* Call ibm,os-term rtas call to trigger firmware assisted dump */
- rtas_os_term((char *)str);
-}
+ fdh->cpu_mask = *cpu_online_mask;
-#define GPR_MASK 0xffffff0000000000
-static inline int fadump_gpr_index(u64 id)
-{
- int i = -1;
- char str[3];
-
- if ((id & GPR_MASK) == REG_ID("GPR")) {
- /* get the digits at the end */
- id &= ~GPR_MASK;
- id >>= 24;
- str[2] = '\0';
- str[1] = id & 0xff;
- str[0] = (id >> 8) & 0xff;
- sscanf(str, "%d", &i);
- if (i > 31)
- i = -1;
- }
- return i;
-}
-
-static inline void fadump_set_regval(struct pt_regs *regs, u64 reg_id,
- u64 reg_val)
-{
- int i;
-
- i = fadump_gpr_index(reg_id);
- if (i >= 0)
- regs->gpr[i] = (unsigned long)reg_val;
- else if (reg_id == REG_ID("NIA"))
- regs->nip = (unsigned long)reg_val;
- else if (reg_id == REG_ID("MSR"))
- regs->msr = (unsigned long)reg_val;
- else if (reg_id == REG_ID("CTR"))
- regs->ctr = (unsigned long)reg_val;
- else if (reg_id == REG_ID("LR"))
- regs->link = (unsigned long)reg_val;
- else if (reg_id == REG_ID("XER"))
- regs->xer = (unsigned long)reg_val;
- else if (reg_id == REG_ID("CR"))
- regs->ccr = (unsigned long)reg_val;
- else if (reg_id == REG_ID("DAR"))
- regs->dar = (unsigned long)reg_val;
- else if (reg_id == REG_ID("DSISR"))
- regs->dsisr = (unsigned long)reg_val;
-}
-
-static struct fadump_reg_entry*
-fadump_read_registers(struct fadump_reg_entry *reg_entry, struct pt_regs *regs)
-{
- memset(regs, 0, sizeof(struct pt_regs));
-
- while (reg_entry->reg_id != REG_ID("CPUEND")) {
- fadump_set_regval(regs, reg_entry->reg_id,
- reg_entry->reg_value);
- reg_entry++;
+ /*
+ * If we came in via system reset, wait a while for the secondary
+ * CPUs to enter.
+ */
+ if (TRAP(&(fdh->regs)) == INTERRUPT_SYSTEM_RESET) {
+ msecs = CRASH_TIMEOUT;
+ while ((atomic_read(&cpus_in_fadump) < ncpus) && (--msecs > 0))
+ mdelay(1);
}
- reg_entry++;
- return reg_entry;
-}
-
-static u32 *fadump_append_elf_note(u32 *buf, char *name, unsigned type,
- void *data, size_t data_len)
-{
- struct elf_note note;
-
- note.n_namesz = strlen(name) + 1;
- note.n_descsz = data_len;
- note.n_type = type;
- memcpy(buf, &note, sizeof(note));
- buf += (sizeof(note) + 3)/4;
- memcpy(buf, name, note.n_namesz);
- buf += (note.n_namesz + 3)/4;
- memcpy(buf, data, note.n_descsz);
- buf += (note.n_descsz + 3)/4;
-
- return buf;
-}
-
-static void fadump_final_note(u32 *buf)
-{
- struct elf_note note;
- note.n_namesz = 0;
- note.n_descsz = 0;
- note.n_type = 0;
- memcpy(buf, &note, sizeof(note));
+ fw_dump.ops->fadump_trigger(fdh, str);
}
-static u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
+u32 *__init fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
{
struct elf_prstatus prstatus;
@@ -518,25 +749,23 @@ static u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
* FIXME: How do i get PID? Do I really need it?
* prstatus.pr_pid = ????
*/
- elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
- buf = fadump_append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
- &prstatus, sizeof(prstatus));
+ elf_core_copy_regs(&prstatus.pr_reg, regs);
+ buf = append_elf_note(buf, NN_PRSTATUS, NT_PRSTATUS,
+ &prstatus, sizeof(prstatus));
return buf;
}
-static void fadump_update_elfcore_header(char *bufp)
+void __init fadump_update_elfcore_header(char *bufp)
{
- struct elfhdr *elf;
struct elf_phdr *phdr;
- elf = (struct elfhdr *)bufp;
bufp += sizeof(struct elfhdr);
/* First note is a place holder for cpu notes info. */
phdr = (struct elf_phdr *)bufp;
if (phdr->p_type == PT_NOTE) {
- phdr->p_paddr = fw_dump.cpu_notes_buf;
+ phdr->p_paddr = __pa(fw_dump.cpu_notes_buf_vaddr);
phdr->p_offset = phdr->p_paddr;
phdr->p_filesz = fw_dump.cpu_notes_buf_size;
phdr->p_memsz = fw_dump.cpu_notes_buf_size;
@@ -544,220 +773,152 @@ static void fadump_update_elfcore_header(char *bufp)
return;
}
-static void *fadump_cpu_notes_buf_alloc(unsigned long size)
+static void *__init fadump_alloc_buffer(unsigned long size)
{
- void *vaddr;
+ unsigned long count, i;
struct page *page;
- unsigned long order, count, i;
+ void *vaddr;
- order = get_order(size);
- vaddr = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, order);
+ vaddr = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
if (!vaddr)
return NULL;
- count = 1 << order;
+ count = PAGE_ALIGN(size) / PAGE_SIZE;
page = virt_to_page(vaddr);
for (i = 0; i < count; i++)
- SetPageReserved(page + i);
+ mark_page_reserved(page + i);
return vaddr;
}
-static void fadump_cpu_notes_buf_free(unsigned long vaddr, unsigned long size)
+static void fadump_free_buffer(unsigned long vaddr, unsigned long size)
{
- struct page *page;
- unsigned long order, count, i;
-
- order = get_order(size);
- count = 1 << order;
- page = virt_to_page(vaddr);
- for (i = 0; i < count; i++)
- ClearPageReserved(page + i);
- __free_pages(page, order);
+ free_reserved_area((void *)vaddr, (void *)(vaddr + size), -1, NULL);
}
-/*
- * Read CPU state dump data and convert it into ELF notes.
- * The CPU dump starts with magic number "REGSAVE". NumCpusOffset should be
- * used to access the data to allow for additional fields to be added without
- * affecting compatibility. Each list of registers for a CPU starts with
- * "CPUSTRT" and ends with "CPUEND". Each register entry is of 16 bytes,
- * 8 Byte ASCII identifier and 8 Byte register value. The register entry
- * with identifier "CPUSTRT" and "CPUEND" contains 4 byte cpu id as part
- * of register value. For more details refer to PAPR document.
- *
- * Only for the crashing cpu we ignore the CPU dump data and get exact
- * state from fadump crash info structure populated by first kernel at the
- * time of crash.
- */
-static int __init fadump_build_cpu_notes(const struct fadump_mem_struct *fdm)
+s32 __init fadump_setup_cpu_notes_buf(u32 num_cpus)
{
- struct fadump_reg_save_area_header *reg_header;
- struct fadump_reg_entry *reg_entry;
- struct fadump_crash_info_header *fdh = NULL;
- void *vaddr;
- unsigned long addr;
- u32 num_cpus, *note_buf;
- struct pt_regs regs;
- int i, rc = 0, cpu = 0;
-
- if (!fdm->cpu_state_data.bytes_dumped)
- return -EINVAL;
-
- addr = fdm->cpu_state_data.destination_address;
- vaddr = __va(addr);
-
- reg_header = vaddr;
- if (reg_header->magic_number != REGSAVE_AREA_MAGIC) {
- printk(KERN_ERR "Unable to read register save area.\n");
- return -ENOENT;
- }
- pr_debug("--------CPU State Data------------\n");
- pr_debug("Magic Number: %llx\n", reg_header->magic_number);
- pr_debug("NumCpuOffset: %x\n", reg_header->num_cpu_offset);
-
- vaddr += reg_header->num_cpu_offset;
- num_cpus = *((u32 *)(vaddr));
- pr_debug("NumCpus : %u\n", num_cpus);
- vaddr += sizeof(u32);
- reg_entry = (struct fadump_reg_entry *)vaddr;
-
/* Allocate buffer to hold cpu crash notes. */
fw_dump.cpu_notes_buf_size = num_cpus * sizeof(note_buf_t);
fw_dump.cpu_notes_buf_size = PAGE_ALIGN(fw_dump.cpu_notes_buf_size);
- note_buf = fadump_cpu_notes_buf_alloc(fw_dump.cpu_notes_buf_size);
- if (!note_buf) {
- printk(KERN_ERR "Failed to allocate 0x%lx bytes for "
- "cpu notes buffer\n", fw_dump.cpu_notes_buf_size);
+ fw_dump.cpu_notes_buf_vaddr =
+ (unsigned long)fadump_alloc_buffer(fw_dump.cpu_notes_buf_size);
+ if (!fw_dump.cpu_notes_buf_vaddr) {
+ pr_err("Failed to allocate %ld bytes for CPU notes buffer\n",
+ fw_dump.cpu_notes_buf_size);
return -ENOMEM;
}
- fw_dump.cpu_notes_buf = __pa(note_buf);
- pr_debug("Allocated buffer for cpu notes of size %ld at %p\n",
- (num_cpus * sizeof(note_buf_t)), note_buf);
+ pr_debug("Allocated buffer for cpu notes of size %ld at 0x%lx\n",
+ fw_dump.cpu_notes_buf_size,
+ fw_dump.cpu_notes_buf_vaddr);
+ return 0;
+}
- if (fw_dump.fadumphdr_addr)
- fdh = __va(fw_dump.fadumphdr_addr);
+void fadump_free_cpu_notes_buf(void)
+{
+ if (!fw_dump.cpu_notes_buf_vaddr)
+ return;
- for (i = 0; i < num_cpus; i++) {
- if (reg_entry->reg_id != REG_ID("CPUSTRT")) {
- printk(KERN_ERR "Unable to read CPU state data\n");
- rc = -ENOENT;
- goto error_out;
- }
- /* Lower 4 bytes of reg_value contains logical cpu id */
- cpu = reg_entry->reg_value & FADUMP_CPU_ID_MASK;
- if (fdh && !cpumask_test_cpu(cpu, &fdh->cpu_online_mask)) {
- SKIP_TO_NEXT_CPU(reg_entry);
- continue;
- }
- pr_debug("Reading register data for cpu %d...\n", cpu);
- if (fdh && fdh->crashing_cpu == cpu) {
- regs = fdh->regs;
- note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
- SKIP_TO_NEXT_CPU(reg_entry);
- } else {
- reg_entry++;
- reg_entry = fadump_read_registers(reg_entry, &regs);
- note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
- }
- }
- fadump_final_note(note_buf);
+ fadump_free_buffer(fw_dump.cpu_notes_buf_vaddr,
+ fw_dump.cpu_notes_buf_size);
+ fw_dump.cpu_notes_buf_vaddr = 0;
+ fw_dump.cpu_notes_buf_size = 0;
+}
- if (fdh) {
- pr_debug("Updating elfcore header (%llx) with cpu notes\n",
- fdh->elfcorehdr_addr);
- fadump_update_elfcore_header((char *)__va(fdh->elfcorehdr_addr));
+static void fadump_free_mem_ranges(struct fadump_mrange_info *mrange_info)
+{
+ if (mrange_info->is_static) {
+ mrange_info->mem_range_cnt = 0;
+ return;
}
- return 0;
-
-error_out:
- fadump_cpu_notes_buf_free((unsigned long)__va(fw_dump.cpu_notes_buf),
- fw_dump.cpu_notes_buf_size);
- fw_dump.cpu_notes_buf = 0;
- fw_dump.cpu_notes_buf_size = 0;
- return rc;
+ kfree(mrange_info->mem_ranges);
+ memset((void *)((u64)mrange_info + RNG_NAME_SZ), 0,
+ (sizeof(struct fadump_mrange_info) - RNG_NAME_SZ));
}
/*
- * Validate and process the dump data stored by firmware before exporting
- * it through '/proc/vmcore'.
+ * Allocate or reallocate mem_ranges array in incremental units
+ * of PAGE_SIZE.
*/
-static int __init process_fadump(const struct fadump_mem_struct *fdm_active)
+static int fadump_alloc_mem_ranges(struct fadump_mrange_info *mrange_info)
{
- struct fadump_crash_info_header *fdh;
- int rc = 0;
-
- if (!fdm_active || !fw_dump.fadumphdr_addr)
- return -EINVAL;
-
- /* Check if the dump data is valid. */
- if ((fdm_active->header.dump_status_flag == FADUMP_ERROR_FLAG) ||
- (fdm_active->cpu_state_data.error_flags != 0) ||
- (fdm_active->rmr_region.error_flags != 0)) {
- printk(KERN_ERR "Dump taken by platform is not valid\n");
- return -EINVAL;
- }
- if ((fdm_active->rmr_region.bytes_dumped !=
- fdm_active->rmr_region.source_len) ||
- !fdm_active->cpu_state_data.bytes_dumped) {
- printk(KERN_ERR "Dump taken by platform is incomplete\n");
- return -EINVAL;
- }
-
- /* Validate the fadump crash info header */
- fdh = __va(fw_dump.fadumphdr_addr);
- if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) {
- printk(KERN_ERR "Crash info header is not valid.\n");
- return -EINVAL;
+ struct fadump_memory_range *new_array;
+ u64 new_size;
+
+ new_size = mrange_info->mem_ranges_sz + PAGE_SIZE;
+ pr_debug("Allocating %llu bytes of memory for %s memory ranges\n",
+ new_size, mrange_info->name);
+
+ new_array = krealloc(mrange_info->mem_ranges, new_size, GFP_KERNEL);
+ if (new_array == NULL) {
+ pr_err("Insufficient memory for setting up %s memory ranges\n",
+ mrange_info->name);
+ fadump_free_mem_ranges(mrange_info);
+ return -ENOMEM;
}
- rc = fadump_build_cpu_notes(fdm_active);
- if (rc)
- return rc;
-
- /*
- * We are done validating dump info and elfcore header is now ready
- * to be exported. set elfcorehdr_addr so that vmcore module will
- * export the elfcore header through '/proc/vmcore'.
- */
- elfcorehdr_addr = fdh->elfcorehdr_addr;
-
+ mrange_info->mem_ranges = new_array;
+ mrange_info->mem_ranges_sz = new_size;
+ mrange_info->max_mem_ranges = (new_size /
+ sizeof(struct fadump_memory_range));
return 0;
}
-
-static inline void fadump_add_crash_memory(unsigned long long base,
- unsigned long long end)
+static inline int fadump_add_mem_range(struct fadump_mrange_info *mrange_info,
+ u64 base, u64 end)
{
+ struct fadump_memory_range *mem_ranges = mrange_info->mem_ranges;
+ bool is_adjacent = false;
+ u64 start, size;
+
if (base == end)
- return;
+ return 0;
- pr_debug("crash_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n",
- crash_mem_ranges, base, end - 1, (end - base));
- crash_memory_ranges[crash_mem_ranges].base = base;
- crash_memory_ranges[crash_mem_ranges].size = end - base;
- crash_mem_ranges++;
-}
+ /*
+ * Fold adjacent memory ranges to bring down the memory ranges/
+ * PT_LOAD segments count.
+ */
+ if (mrange_info->mem_range_cnt) {
+ start = mem_ranges[mrange_info->mem_range_cnt - 1].base;
+ size = mem_ranges[mrange_info->mem_range_cnt - 1].size;
-static void fadump_exclude_reserved_area(unsigned long long start,
- unsigned long long end)
-{
- unsigned long long ra_start, ra_end;
+ /*
+ * Boot memory area needs separate PT_LOAD segment(s) as it
+ * is moved to a different location at the time of crash.
+ * So, fold only if the region is not boot memory area.
+ */
+ if ((start + size) == base && start >= fw_dump.boot_mem_top)
+ is_adjacent = true;
+ }
+ if (!is_adjacent) {
+ /* resize the array on reaching the limit */
+ if (mrange_info->mem_range_cnt == mrange_info->max_mem_ranges) {
+ int ret;
+
+ if (mrange_info->is_static) {
+ pr_err("Reached array size limit for %s memory ranges\n",
+ mrange_info->name);
+ return -ENOSPC;
+ }
+
+ ret = fadump_alloc_mem_ranges(mrange_info);
+ if (ret)
+ return ret;
+
+ /* Update to the new resized array */
+ mem_ranges = mrange_info->mem_ranges;
+ }
- ra_start = fw_dump.reserve_dump_area_start;
- ra_end = ra_start + fw_dump.reserve_dump_area_size;
+ start = base;
+ mem_ranges[mrange_info->mem_range_cnt].base = start;
+ mrange_info->mem_range_cnt++;
+ }
- if ((ra_start < end) && (ra_end > start)) {
- if ((start < ra_start) && (end > ra_end)) {
- fadump_add_crash_memory(start, ra_start);
- fadump_add_crash_memory(ra_end, end);
- } else if (start < ra_start) {
- fadump_add_crash_memory(start, ra_start);
- } else if (ra_end < end) {
- fadump_add_crash_memory(ra_end, end);
- }
- } else
- fadump_add_crash_memory(start, end);
+ mem_ranges[mrange_info->mem_range_cnt - 1].size = (end - start);
+ pr_debug("%s_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n",
+ mrange_info->name, (mrange_info->mem_range_cnt - 1),
+ start, end - 1, (end - start));
+ return 0;
}
static int fadump_init_elfcore_header(char *bufp)
@@ -778,7 +939,14 @@ static int fadump_init_elfcore_header(char *bufp)
elf->e_entry = 0;
elf->e_phoff = sizeof(struct elfhdr);
elf->e_shoff = 0;
- elf->e_flags = ELF_CORE_EFLAGS;
+
+ if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2))
+ elf->e_flags = 2;
+ else if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V1))
+ elf->e_flags = 1;
+ else
+ elf->e_flags = 0;
+
elf->e_ehsize = sizeof(struct elfhdr);
elf->e_phentsize = sizeof(struct elf_phdr);
elf->e_phnum = 0;
@@ -790,78 +958,79 @@ static int fadump_init_elfcore_header(char *bufp)
}
/*
- * Traverse through memblock structure and setup crash memory ranges. These
- * ranges will be used create PT_LOAD program headers in elfcore header.
+ * If the given physical address falls within the boot memory region then
+ * return the relocated address that points to the dump region reserved
+ * for saving initial boot memory contents.
*/
-static void fadump_setup_crash_memory_ranges(void)
+static inline unsigned long fadump_relocate(unsigned long paddr)
{
- struct memblock_region *reg;
- unsigned long long start, end;
+ unsigned long raddr, rstart, rend, rlast, hole_size;
+ int i;
- pr_debug("Setup crash memory ranges.\n");
- crash_mem_ranges = 0;
- /*
- * add the first memory chunk (RMA_START through boot_memory_size) as
- * a separate memory chunk. The reason is, at the time crash firmware
- * will move the content of this memory chunk to different location
- * specified during fadump registration. We need to create a separate
- * program header for this chunk with the correct offset.
- */
- fadump_add_crash_memory(RMA_START, fw_dump.boot_memory_size);
+ hole_size = 0;
+ rlast = 0;
+ raddr = paddr;
+ for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
+ rstart = fw_dump.boot_mem_addr[i];
+ rend = rstart + fw_dump.boot_mem_sz[i];
+ hole_size += (rstart - rlast);
- for_each_memblock(memory, reg) {
- start = (unsigned long long)reg->base;
- end = start + (unsigned long long)reg->size;
- if (start == RMA_START && end >= fw_dump.boot_memory_size)
- start = fw_dump.boot_memory_size;
+ if (paddr >= rstart && paddr < rend) {
+ raddr += fw_dump.boot_mem_dest_addr - hole_size;
+ break;
+ }
- /* add this range excluding the reserved dump area. */
- fadump_exclude_reserved_area(start, end);
+ rlast = rend;
}
+
+ pr_debug("vmcoreinfo: paddr = 0x%lx, raddr = 0x%lx\n", paddr, raddr);
+ return raddr;
}
-/*
- * If the given physical address falls within the boot memory region then
- * return the relocated address that points to the dump region reserved
- * for saving initial boot memory contents.
- */
-static inline unsigned long fadump_relocate(unsigned long paddr)
+static void __init populate_elf_pt_load(struct elf_phdr *phdr, u64 start,
+ u64 size, unsigned long long offset)
{
- if (paddr > RMA_START && paddr < fw_dump.boot_memory_size)
- return fdm.rmr_region.destination_address + paddr;
- else
- return paddr;
+ phdr->p_align = 0;
+ phdr->p_memsz = size;
+ phdr->p_filesz = size;
+ phdr->p_paddr = start;
+ phdr->p_offset = offset;
+ phdr->p_type = PT_LOAD;
+ phdr->p_flags = PF_R|PF_W|PF_X;
+ phdr->p_vaddr = (unsigned long)__va(start);
}
-static int fadump_create_elfcore_headers(char *bufp)
+static void __init fadump_populate_elfcorehdr(struct fadump_crash_info_header *fdh)
{
+ char *bufp;
struct elfhdr *elf;
struct elf_phdr *phdr;
- int i;
+ u64 boot_mem_dest_offset;
+ unsigned long long i, ra_start, ra_end, ra_size, mstart, mend;
+ bufp = (char *) fw_dump.elfcorehdr_addr;
fadump_init_elfcore_header(bufp);
elf = (struct elfhdr *)bufp;
bufp += sizeof(struct elfhdr);
/*
- * setup ELF PT_NOTE, place holder for cpu notes info. The notes info
- * will be populated during second kernel boot after crash. Hence
- * this PT_NOTE will always be the first elf note.
+ * Set up ELF PT_NOTE, a placeholder for CPU notes information.
+ * The notes info will be populated later by platform-specific code.
+ * Hence, this PT_NOTE will always be the first ELF note.
*
* NOTE: Any new ELF note addition should be placed after this note.
*/
phdr = (struct elf_phdr *)bufp;
bufp += sizeof(struct elf_phdr);
phdr->p_type = PT_NOTE;
- phdr->p_flags = 0;
- phdr->p_vaddr = 0;
- phdr->p_align = 0;
-
- phdr->p_offset = 0;
- phdr->p_paddr = 0;
- phdr->p_filesz = 0;
- phdr->p_memsz = 0;
-
+ phdr->p_flags = 0;
+ phdr->p_vaddr = 0;
+ phdr->p_align = 0;
+ phdr->p_offset = 0;
+ phdr->p_paddr = 0;
+ phdr->p_filesz = 0;
+ phdr->p_memsz = 0;
+ /* Increment number of program headers. */
(elf->e_phnum)++;
/* setup ELF PT_NOTE for vmcoreinfo */
@@ -871,50 +1040,66 @@ static int fadump_create_elfcore_headers(char *bufp)
phdr->p_flags = 0;
phdr->p_vaddr = 0;
phdr->p_align = 0;
-
- phdr->p_paddr = fadump_relocate(paddr_vmcoreinfo_note());
- phdr->p_offset = phdr->p_paddr;
- phdr->p_memsz = vmcoreinfo_max_size;
- phdr->p_filesz = vmcoreinfo_max_size;
-
+ phdr->p_paddr = phdr->p_offset = fdh->vmcoreinfo_raddr;
+ phdr->p_memsz = phdr->p_filesz = fdh->vmcoreinfo_size;
/* Increment number of program headers. */
(elf->e_phnum)++;
- /* setup PT_LOAD sections. */
-
- for (i = 0; i < crash_mem_ranges; i++) {
- unsigned long long mbase, msize;
- mbase = crash_memory_ranges[i].base;
- msize = crash_memory_ranges[i].size;
-
- if (!msize)
- continue;
-
+ /*
+ * Setup PT_LOAD sections. first include boot memory regions
+ * and then add rest of the memory regions.
+ */
+ boot_mem_dest_offset = fw_dump.boot_mem_dest_addr;
+ for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
phdr = (struct elf_phdr *)bufp;
bufp += sizeof(struct elf_phdr);
- phdr->p_type = PT_LOAD;
- phdr->p_flags = PF_R|PF_W|PF_X;
- phdr->p_offset = mbase;
-
- if (mbase == RMA_START) {
- /*
- * The entire RMA region will be moved by firmware
- * to the specified destination_address. Hence set
- * the correct offset.
- */
- phdr->p_offset = fdm.rmr_region.destination_address;
+ populate_elf_pt_load(phdr, fw_dump.boot_mem_addr[i],
+ fw_dump.boot_mem_sz[i],
+ boot_mem_dest_offset);
+ /* Increment number of program headers. */
+ (elf->e_phnum)++;
+ boot_mem_dest_offset += fw_dump.boot_mem_sz[i];
+ }
+
+ /* Memory reserved for fadump in first kernel */
+ ra_start = fw_dump.reserve_dump_area_start;
+ ra_size = get_fadump_area_size();
+ ra_end = ra_start + ra_size;
+
+ phdr = (struct elf_phdr *)bufp;
+ for_each_mem_range(i, &mstart, &mend) {
+ /* Boot memory regions already added, skip them now */
+ if (mstart < fw_dump.boot_mem_top) {
+ if (mend > fw_dump.boot_mem_top)
+ mstart = fw_dump.boot_mem_top;
+ else
+ continue;
}
- phdr->p_paddr = mbase;
- phdr->p_vaddr = (unsigned long)__va(mbase);
- phdr->p_filesz = msize;
- phdr->p_memsz = msize;
- phdr->p_align = 0;
+ /* Handle memblock regions overlaps with fadump reserved area */
+ if ((ra_start < mend) && (ra_end > mstart)) {
+ if ((mstart < ra_start) && (mend > ra_end)) {
+ populate_elf_pt_load(phdr, mstart, ra_start - mstart, mstart);
+ /* Increment number of program headers. */
+ (elf->e_phnum)++;
+ bufp += sizeof(struct elf_phdr);
+ phdr = (struct elf_phdr *)bufp;
+ populate_elf_pt_load(phdr, ra_end, mend - ra_end, ra_end);
+ } else if (mstart < ra_start) {
+ populate_elf_pt_load(phdr, mstart, ra_start - mstart, mstart);
+ } else if (ra_end < mend) {
+ populate_elf_pt_load(phdr, ra_end, mend - ra_end, ra_end);
+ }
+ } else {
+ /* No overlap with fadump reserved memory region */
+ populate_elf_pt_load(phdr, mstart, mend - mstart, mstart);
+ }
/* Increment number of program headers. */
(elf->e_phnum)++;
+ bufp += sizeof(struct elf_phdr);
+ phdr = (struct elf_phdr *) bufp;
}
- return 0;
}
static unsigned long init_fadump_header(unsigned long addr)
@@ -924,188 +1109,310 @@ static unsigned long init_fadump_header(unsigned long addr)
if (!addr)
return 0;
- fw_dump.fadumphdr_addr = addr;
fdh = __va(addr);
addr += sizeof(struct fadump_crash_info_header);
memset(fdh, 0, sizeof(struct fadump_crash_info_header));
fdh->magic_number = FADUMP_CRASH_INFO_MAGIC;
- fdh->elfcorehdr_addr = addr;
+ fdh->version = FADUMP_HEADER_VERSION;
/* We will set the crashing cpu id in crash_fadump() during crash. */
- fdh->crashing_cpu = CPU_UNKNOWN;
+ fdh->crashing_cpu = FADUMP_CPU_UNKNOWN;
+
+ /*
+ * The physical address and size of vmcoreinfo are required in the
+ * second kernel to prepare elfcorehdr.
+ */
+ fdh->vmcoreinfo_raddr = fadump_relocate(paddr_vmcoreinfo_note());
+ fdh->vmcoreinfo_size = VMCOREINFO_NOTE_SIZE;
+
+
+ fdh->pt_regs_sz = sizeof(struct pt_regs);
+ /*
+ * When LPAR is terminated by PYHP, ensure all possible CPUs'
+ * register data is processed while exporting the vmcore.
+ */
+ fdh->cpu_mask = *cpu_possible_mask;
+ fdh->cpu_mask_sz = sizeof(struct cpumask);
return addr;
}
-static void register_fadump(void)
+static int register_fadump(void)
{
unsigned long addr;
- void *vaddr;
/*
* If no memory is reserved then we can not register for firmware-
* assisted dump.
*/
if (!fw_dump.reserve_dump_area_size)
- return;
+ return -ENODEV;
- fadump_setup_crash_memory_ranges();
+ addr = fw_dump.fadumphdr_addr;
- addr = fdm.rmr_region.destination_address + fdm.rmr_region.source_len;
/* Initialize fadump crash info header. */
addr = init_fadump_header(addr);
- vaddr = __va(addr);
-
- pr_debug("Creating ELF core headers at %#016lx\n", addr);
- fadump_create_elfcore_headers(vaddr);
/* register the future kernel dump with firmware. */
- register_fw_dump(&fdm);
+ pr_debug("Registering for firmware-assisted kernel dump...\n");
+ return fw_dump.ops->fadump_register(&fw_dump);
}
-static int fadump_unregister_dump(struct fadump_mem_struct *fdm)
+void fadump_cleanup(void)
{
- int rc = 0;
- unsigned int wait_time;
+ if (!fw_dump.fadump_supported)
+ return;
+
+ /* Invalidate the registration only if dump is active. */
+ if (fw_dump.dump_active) {
+ pr_debug("Invalidating firmware-assisted dump registration\n");
+ fw_dump.ops->fadump_invalidate(&fw_dump);
+ } else if (fw_dump.dump_registered) {
+ /* Un-register Firmware-assisted dump if it was registered. */
+ fw_dump.ops->fadump_unregister(&fw_dump);
+ }
- pr_debug("Un-register firmware-assisted dump\n");
+ if (fw_dump.ops->fadump_cleanup)
+ fw_dump.ops->fadump_cleanup(&fw_dump);
+}
- /* TODO: Add upper time limit for the delay */
- do {
- rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
- FADUMP_UNREGISTER, fdm,
- sizeof(struct fadump_mem_struct));
+static void fadump_free_reserved_memory(unsigned long start_pfn,
+ unsigned long end_pfn)
+{
+ unsigned long pfn;
+ unsigned long time_limit = jiffies + HZ;
- wait_time = rtas_busy_delay_time(rc);
- if (wait_time)
- mdelay(wait_time);
- } while (wait_time);
+ pr_info("freeing reserved memory (0x%llx - 0x%llx)\n",
+ PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
- if (rc) {
- printk(KERN_ERR "Failed to un-register firmware-assisted dump."
- " unexpected error(%d).\n", rc);
- return rc;
+ for (pfn = start_pfn; pfn < end_pfn; pfn++) {
+ free_reserved_page(pfn_to_page(pfn));
+
+ if (time_after(jiffies, time_limit)) {
+ cond_resched();
+ time_limit = jiffies + HZ;
+ }
}
- fw_dump.dump_registered = 0;
- return 0;
}
-static int fadump_invalidate_dump(struct fadump_mem_struct *fdm)
+/*
+ * Skip memory holes and free memory that was actually reserved.
+ */
+static void fadump_release_reserved_area(u64 start, u64 end)
{
- int rc = 0;
- unsigned int wait_time;
+ unsigned long reg_spfn, reg_epfn;
+ u64 tstart, tend, spfn, epfn;
+ int i;
- pr_debug("Invalidating firmware-assisted dump registration\n");
+ spfn = PHYS_PFN(start);
+ epfn = PHYS_PFN(end);
- /* TODO: Add upper time limit for the delay */
- do {
- rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
- FADUMP_INVALIDATE, fdm,
- sizeof(struct fadump_mem_struct));
+ for_each_mem_pfn_range(i, MAX_NUMNODES, &reg_spfn, &reg_epfn, NULL) {
+ tstart = max_t(u64, spfn, reg_spfn);
+ tend = min_t(u64, epfn, reg_epfn);
- wait_time = rtas_busy_delay_time(rc);
- if (wait_time)
- mdelay(wait_time);
- } while (wait_time);
+ if (tstart < tend) {
+ fadump_free_reserved_memory(tstart, tend);
- if (rc) {
- printk(KERN_ERR "Failed to invalidate firmware-assisted dump "
- "rgistration. unexpected error(%d).\n", rc);
- return rc;
+ if (tend == epfn)
+ break;
+
+ spfn = tend;
+ }
}
- fw_dump.dump_active = 0;
- fdm_active = NULL;
- return 0;
}
-void fadump_cleanup(void)
+/*
+ * Sort the mem ranges in-place and merge adjacent ranges
+ * to minimize the memory ranges count.
+ */
+static void sort_and_merge_mem_ranges(struct fadump_mrange_info *mrange_info)
{
- /* Invalidate the registration only if dump is active. */
- if (fw_dump.dump_active) {
- init_fadump_mem_struct(&fdm,
- fdm_active->cpu_state_data.destination_address);
- fadump_invalidate_dump(&fdm);
+ struct fadump_memory_range *mem_ranges;
+ u64 base, size;
+ int i, j, idx;
+
+ if (!reserved_mrange_info.mem_range_cnt)
+ return;
+
+ /* Sort the memory ranges */
+ mem_ranges = mrange_info->mem_ranges;
+ for (i = 0; i < mrange_info->mem_range_cnt; i++) {
+ idx = i;
+ for (j = (i + 1); j < mrange_info->mem_range_cnt; j++) {
+ if (mem_ranges[idx].base > mem_ranges[j].base)
+ idx = j;
+ }
+ if (idx != i)
+ swap(mem_ranges[idx], mem_ranges[i]);
+ }
+
+ /* Merge adjacent reserved ranges */
+ idx = 0;
+ for (i = 1; i < mrange_info->mem_range_cnt; i++) {
+ base = mem_ranges[i-1].base;
+ size = mem_ranges[i-1].size;
+ if (mem_ranges[i].base == (base + size))
+ mem_ranges[idx].size += mem_ranges[i].size;
+ else {
+ idx++;
+ if (i == idx)
+ continue;
+
+ mem_ranges[idx] = mem_ranges[i];
+ }
}
+ mrange_info->mem_range_cnt = idx + 1;
}
/*
- * Release the memory that was reserved in early boot to preserve the memory
- * contents. The released memory will be available for general use.
+ * Scan reserved-ranges to consider them while reserving/releasing
+ * memory for FADump.
*/
-static void fadump_release_memory(unsigned long begin, unsigned long end)
+static void __init early_init_dt_scan_reserved_ranges(unsigned long node)
{
- unsigned long addr;
- unsigned long ra_start, ra_end;
+ const __be32 *prop;
+ int len, ret = -1;
+ unsigned long i;
- ra_start = fw_dump.reserve_dump_area_start;
- ra_end = ra_start + fw_dump.reserve_dump_area_size;
+ /* reserved-ranges already scanned */
+ if (reserved_mrange_info.mem_range_cnt != 0)
+ return;
- for (addr = begin; addr < end; addr += PAGE_SIZE) {
- /*
- * exclude the dump reserve area. Will reuse it for next
- * fadump registration.
- */
- if (addr <= ra_end && ((addr + PAGE_SIZE) > ra_start))
- continue;
+ prop = of_get_flat_dt_prop(node, "reserved-ranges", &len);
+ if (!prop)
+ return;
- free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT));
+ /*
+ * Each reserved range is an (address,size) pair, 2 cells each,
+ * totalling 4 cells per range.
+ */
+ for (i = 0; i < len / (sizeof(*prop) * 4); i++) {
+ u64 base, size;
+
+ base = of_read_number(prop + (i * 4) + 0, 2);
+ size = of_read_number(prop + (i * 4) + 2, 2);
+
+ if (size) {
+ ret = fadump_add_mem_range(&reserved_mrange_info,
+ base, base + size);
+ if (ret < 0) {
+ pr_warn("some reserved ranges are ignored!\n");
+ break;
+ }
+ }
}
+
+ /* Compact reserved ranges */
+ sort_and_merge_mem_ranges(&reserved_mrange_info);
}
-static void fadump_invalidate_release_mem(void)
+/*
+ * Release the memory that was reserved during early boot to preserve the
+ * crash'ed kernel's memory contents except reserved dump area (permanent
+ * reservation) and reserved ranges used by F/W. The released memory will
+ * be available for general use.
+ */
+static void fadump_release_memory(u64 begin, u64 end)
{
- unsigned long reserved_area_start, reserved_area_end;
- unsigned long destination_address;
+ u64 ra_start, ra_end, tstart;
+ int i, ret;
- mutex_lock(&fadump_mutex);
- if (!fw_dump.dump_active) {
- mutex_unlock(&fadump_mutex);
+ ra_start = fw_dump.reserve_dump_area_start;
+ ra_end = ra_start + fw_dump.reserve_dump_area_size;
+
+ /*
+ * If reserved ranges array limit is hit, overwrite the last reserved
+ * memory range with reserved dump area to ensure it is excluded from
+ * the memory being released (reused for next FADump registration).
+ */
+ if (reserved_mrange_info.mem_range_cnt ==
+ reserved_mrange_info.max_mem_ranges)
+ reserved_mrange_info.mem_range_cnt--;
+
+ ret = fadump_add_mem_range(&reserved_mrange_info, ra_start, ra_end);
+ if (ret != 0)
return;
+
+ /* Get the reserved ranges list in order first. */
+ sort_and_merge_mem_ranges(&reserved_mrange_info);
+
+ /* Exclude reserved ranges and release remaining memory */
+ tstart = begin;
+ for (i = 0; i < reserved_mrange_info.mem_range_cnt; i++) {
+ ra_start = reserved_mrange_info.mem_ranges[i].base;
+ ra_end = ra_start + reserved_mrange_info.mem_ranges[i].size;
+
+ if (tstart >= ra_end)
+ continue;
+
+ if (tstart < ra_start)
+ fadump_release_reserved_area(tstart, ra_start);
+ tstart = ra_end;
}
- destination_address = fdm_active->cpu_state_data.destination_address;
- fadump_cleanup();
- mutex_unlock(&fadump_mutex);
+ if (tstart < end)
+ fadump_release_reserved_area(tstart, end);
+}
+
+static void fadump_free_elfcorehdr_buf(void)
+{
+ if (fw_dump.elfcorehdr_addr == 0 || fw_dump.elfcorehdr_size == 0)
+ return;
/*
- * Save the current reserved memory bounds we will require them
- * later for releasing the memory for general use.
+ * Before freeing the memory of `elfcorehdr`, reset the global
+ * `elfcorehdr_addr` to prevent modules like `vmcore` from accessing
+ * invalid memory.
*/
- reserved_area_start = fw_dump.reserve_dump_area_start;
- reserved_area_end = reserved_area_start +
- fw_dump.reserve_dump_area_size;
+ elfcorehdr_addr = ELFCORE_ADDR_ERR;
+ fadump_free_buffer(fw_dump.elfcorehdr_addr, fw_dump.elfcorehdr_size);
+ fw_dump.elfcorehdr_addr = 0;
+ fw_dump.elfcorehdr_size = 0;
+}
+
+static void fadump_invalidate_release_mem(void)
+{
+ scoped_guard(mutex, &fadump_mutex) {
+ if (!fw_dump.dump_active)
+ return;
+ fadump_cleanup();
+ }
+
+ fadump_free_elfcorehdr_buf();
+ fadump_release_memory(fw_dump.boot_mem_top, memblock_end_of_DRAM());
+ fadump_free_cpu_notes_buf();
+
/*
- * Setup reserve_dump_area_start and its size so that we can
- * reuse this reserved memory for Re-registration.
+ * Setup kernel metadata and initialize the kernel dump
+ * memory structure for FADump re-registration.
*/
- fw_dump.reserve_dump_area_start = destination_address;
- fw_dump.reserve_dump_area_size = get_fadump_area_size();
-
- fadump_release_memory(reserved_area_start, reserved_area_end);
- if (fw_dump.cpu_notes_buf) {
- fadump_cpu_notes_buf_free(
- (unsigned long)__va(fw_dump.cpu_notes_buf),
- fw_dump.cpu_notes_buf_size);
- fw_dump.cpu_notes_buf = 0;
- fw_dump.cpu_notes_buf_size = 0;
- }
- /* Initialize the kernel dump memory structure for FAD registration. */
- init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start);
+ if (fw_dump.ops->fadump_setup_metadata &&
+ (fw_dump.ops->fadump_setup_metadata(&fw_dump) < 0))
+ pr_warn("Failed to setup kernel metadata!\n");
+ fw_dump.ops->fadump_init_mem_struct(&fw_dump);
}
-static ssize_t fadump_release_memory_store(struct kobject *kobj,
- struct kobj_attribute *attr,
- const char *buf, size_t count)
+static ssize_t release_mem_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
{
+ int input = -1;
+
if (!fw_dump.dump_active)
return -EPERM;
- if (buf[0] == '1') {
+ if (kstrtoint(buf, 0, &input))
+ return -EINVAL;
+
+ if (input == 1) {
/*
* Take away the '/proc/vmcore'. We are releasing the dump
* memory, hence it will not be valid anymore.
*/
+#ifdef CONFIG_PROC_VMCORE
vmcore_cleanup();
+#endif
fadump_invalidate_release_mem();
} else
@@ -1113,47 +1420,118 @@ static ssize_t fadump_release_memory_store(struct kobject *kobj,
return count;
}
-static ssize_t fadump_enabled_show(struct kobject *kobj,
- struct kobj_attribute *attr,
- char *buf)
+/* Release the reserved memory and disable the FADump */
+static void __init unregister_fadump(void)
+{
+ fadump_cleanup();
+ fadump_release_memory(fw_dump.reserve_dump_area_start,
+ fw_dump.reserve_dump_area_size);
+ fw_dump.fadump_enabled = 0;
+ kobject_put(fadump_kobj);
+}
+
+static ssize_t enabled_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
{
return sprintf(buf, "%d\n", fw_dump.fadump_enabled);
}
-static ssize_t fadump_register_show(struct kobject *kobj,
- struct kobj_attribute *attr,
- char *buf)
+/*
+ * /sys/kernel/fadump/hotplug_ready sysfs node returns 1, which inidcates
+ * to usersapce that fadump re-registration is not required on memory
+ * hotplug events.
+ */
+static ssize_t hotplug_ready_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%d\n", 1);
+}
+
+static ssize_t mem_reserved_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%ld\n", fw_dump.reserve_dump_area_size);
+}
+
+static ssize_t registered_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
{
return sprintf(buf, "%d\n", fw_dump.dump_registered);
}
-static ssize_t fadump_register_store(struct kobject *kobj,
- struct kobj_attribute *attr,
- const char *buf, size_t count)
+static ssize_t bootargs_append_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%s\n", (char *)__va(fw_dump.param_area));
+}
+
+static ssize_t bootargs_append_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ char *params;
+
+ if (!fw_dump.fadump_enabled || fw_dump.dump_active)
+ return -EPERM;
+
+ if (count >= COMMAND_LINE_SIZE)
+ return -EINVAL;
+
+ /*
+ * Fail here instead of handling this scenario with
+ * some silly workaround in capture kernel.
+ */
+ if (saved_command_line_len + count >= COMMAND_LINE_SIZE) {
+ pr_err("Appending parameters exceeds cmdline size!\n");
+ return -ENOSPC;
+ }
+
+ params = __va(fw_dump.param_area);
+ strscpy_pad(params, buf, COMMAND_LINE_SIZE);
+ /* Remove newline character at the end. */
+ if (params[count-1] == '\n')
+ params[count-1] = '\0';
+
+ return count;
+}
+
+static ssize_t registered_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
{
int ret = 0;
+ int input = -1;
- if (!fw_dump.fadump_enabled || fdm_active)
+ if (!fw_dump.fadump_enabled || fw_dump.dump_active)
return -EPERM;
+ if (kstrtoint(buf, 0, &input))
+ return -EINVAL;
+
mutex_lock(&fadump_mutex);
- switch (buf[0]) {
- case '0':
+ switch (input) {
+ case 0:
if (fw_dump.dump_registered == 0) {
- ret = -EINVAL;
goto unlock_out;
}
+
/* Un-register Firmware-assisted dump */
- fadump_unregister_dump(&fdm);
+ pr_debug("Un-register firmware-assisted dump\n");
+ fw_dump.ops->fadump_unregister(&fw_dump);
break;
- case '1':
+ case 1:
if (fw_dump.dump_registered == 1) {
- ret = -EINVAL;
- goto unlock_out;
+ /* Un-register Firmware-assisted dump */
+ fw_dump.ops->fadump_unregister(&fw_dump);
}
/* Register Firmware-assisted dump */
- register_fadump();
+ ret = register_fadump();
break;
default:
ret = -EINVAL;
@@ -1167,116 +1545,246 @@ unlock_out:
static int fadump_region_show(struct seq_file *m, void *private)
{
- const struct fadump_mem_struct *fdm_ptr;
-
if (!fw_dump.fadump_enabled)
return 0;
mutex_lock(&fadump_mutex);
- if (fdm_active)
- fdm_ptr = fdm_active;
- else {
- mutex_unlock(&fadump_mutex);
- fdm_ptr = &fdm;
- }
-
- seq_printf(m,
- "CPU : [%#016llx-%#016llx] %#llx bytes, "
- "Dumped: %#llx\n",
- fdm_ptr->cpu_state_data.destination_address,
- fdm_ptr->cpu_state_data.destination_address +
- fdm_ptr->cpu_state_data.source_len - 1,
- fdm_ptr->cpu_state_data.source_len,
- fdm_ptr->cpu_state_data.bytes_dumped);
- seq_printf(m,
- "HPTE: [%#016llx-%#016llx] %#llx bytes, "
- "Dumped: %#llx\n",
- fdm_ptr->hpte_region.destination_address,
- fdm_ptr->hpte_region.destination_address +
- fdm_ptr->hpte_region.source_len - 1,
- fdm_ptr->hpte_region.source_len,
- fdm_ptr->hpte_region.bytes_dumped);
- seq_printf(m,
- "DUMP: [%#016llx-%#016llx] %#llx bytes, "
- "Dumped: %#llx\n",
- fdm_ptr->rmr_region.destination_address,
- fdm_ptr->rmr_region.destination_address +
- fdm_ptr->rmr_region.source_len - 1,
- fdm_ptr->rmr_region.source_len,
- fdm_ptr->rmr_region.bytes_dumped);
-
- if (!fdm_active ||
- (fw_dump.reserve_dump_area_start ==
- fdm_ptr->cpu_state_data.destination_address))
- goto out;
-
- /* Dump is active. Show reserved memory region. */
- seq_printf(m,
- " : [%#016llx-%#016llx] %#llx bytes, "
- "Dumped: %#llx\n",
- (unsigned long long)fw_dump.reserve_dump_area_start,
- fdm_ptr->cpu_state_data.destination_address - 1,
- fdm_ptr->cpu_state_data.destination_address -
- fw_dump.reserve_dump_area_start,
- fdm_ptr->cpu_state_data.destination_address -
- fw_dump.reserve_dump_area_start);
-out:
- if (fdm_active)
- mutex_unlock(&fadump_mutex);
+ fw_dump.ops->fadump_region_show(&fw_dump, m);
+ mutex_unlock(&fadump_mutex);
return 0;
}
-static struct kobj_attribute fadump_release_attr = __ATTR(fadump_release_mem,
- 0200, NULL,
- fadump_release_memory_store);
-static struct kobj_attribute fadump_attr = __ATTR(fadump_enabled,
- 0444, fadump_enabled_show,
- NULL);
-static struct kobj_attribute fadump_register_attr = __ATTR(fadump_registered,
- 0644, fadump_register_show,
- fadump_register_store);
-
-static int fadump_region_open(struct inode *inode, struct file *file)
-{
- return single_open(file, fadump_region_show, inode->i_private);
-}
-
-static const struct file_operations fadump_region_fops = {
- .open = fadump_region_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
+static struct kobj_attribute release_attr = __ATTR_WO(release_mem);
+static struct kobj_attribute enable_attr = __ATTR_RO(enabled);
+static struct kobj_attribute register_attr = __ATTR_RW(registered);
+static struct kobj_attribute mem_reserved_attr = __ATTR_RO(mem_reserved);
+static struct kobj_attribute hotplug_ready_attr = __ATTR_RO(hotplug_ready);
+static struct kobj_attribute bootargs_append_attr = __ATTR_RW(bootargs_append);
+
+static struct attribute *fadump_attrs[] = {
+ &enable_attr.attr,
+ &register_attr.attr,
+ &mem_reserved_attr.attr,
+ &hotplug_ready_attr.attr,
+ NULL,
};
-static void fadump_init_files(void)
+ATTRIBUTE_GROUPS(fadump);
+
+DEFINE_SHOW_ATTRIBUTE(fadump_region);
+
+static void __init fadump_init_files(void)
{
- struct dentry *debugfs_file;
int rc = 0;
- rc = sysfs_create_file(kernel_kobj, &fadump_attr.attr);
- if (rc)
- printk(KERN_ERR "fadump: unable to create sysfs file"
- " fadump_enabled (%d)\n", rc);
+ fadump_kobj = kobject_create_and_add("fadump", kernel_kobj);
+ if (!fadump_kobj) {
+ pr_err("failed to create fadump kobject\n");
+ return;
+ }
- rc = sysfs_create_file(kernel_kobj, &fadump_register_attr.attr);
- if (rc)
- printk(KERN_ERR "fadump: unable to create sysfs file"
- " fadump_registered (%d)\n", rc);
+ if (fw_dump.param_area) {
+ rc = sysfs_create_file(fadump_kobj, &bootargs_append_attr.attr);
+ if (rc)
+ pr_err("unable to create bootargs_append sysfs file (%d)\n", rc);
+ }
- debugfs_file = debugfs_create_file("fadump_region", 0444,
- powerpc_debugfs_root, NULL,
- &fadump_region_fops);
- if (!debugfs_file)
- printk(KERN_ERR "fadump: unable to create debugfs file"
- " fadump_region\n");
+ debugfs_create_file("fadump_region", 0444, arch_debugfs_dir, NULL,
+ &fadump_region_fops);
if (fw_dump.dump_active) {
- rc = sysfs_create_file(kernel_kobj, &fadump_release_attr.attr);
+ rc = sysfs_create_file(fadump_kobj, &release_attr.attr);
if (rc)
- printk(KERN_ERR "fadump: unable to create sysfs file"
- " fadump_release_mem (%d)\n", rc);
+ pr_err("unable to create release_mem sysfs file (%d)\n",
+ rc);
+ }
+
+ rc = sysfs_create_groups(fadump_kobj, fadump_groups);
+ if (rc) {
+ pr_err("sysfs group creation failed (%d), unregistering FADump",
+ rc);
+ unregister_fadump();
+ return;
+ }
+
+ /*
+ * The FADump sysfs are moved from kernel_kobj to fadump_kobj need to
+ * create symlink at old location to maintain backward compatibility.
+ *
+ * - fadump_enabled -> fadump/enabled
+ * - fadump_registered -> fadump/registered
+ * - fadump_release_mem -> fadump/release_mem
+ */
+ rc = compat_only_sysfs_link_entry_to_kobj(kernel_kobj, fadump_kobj,
+ "enabled", "fadump_enabled");
+ if (rc) {
+ pr_err("unable to create fadump_enabled symlink (%d)", rc);
+ return;
+ }
+
+ rc = compat_only_sysfs_link_entry_to_kobj(kernel_kobj, fadump_kobj,
+ "registered",
+ "fadump_registered");
+ if (rc) {
+ pr_err("unable to create fadump_registered symlink (%d)", rc);
+ sysfs_remove_link(kernel_kobj, "fadump_enabled");
+ return;
+ }
+
+ if (fw_dump.dump_active) {
+ rc = compat_only_sysfs_link_entry_to_kobj(kernel_kobj,
+ fadump_kobj,
+ "release_mem",
+ "fadump_release_mem");
+ if (rc)
+ pr_err("unable to create fadump_release_mem symlink (%d)",
+ rc);
+ }
+ return;
+}
+
+static int __init fadump_setup_elfcorehdr_buf(void)
+{
+ int elf_phdr_cnt;
+ unsigned long elfcorehdr_size;
+
+ /*
+ * Program header for CPU notes comes first, followed by one for
+ * vmcoreinfo, and the remaining program headers correspond to
+ * memory regions.
+ */
+ elf_phdr_cnt = 2 + fw_dump.boot_mem_regs_cnt + memblock_num_regions(memory);
+ elfcorehdr_size = sizeof(struct elfhdr) + (elf_phdr_cnt * sizeof(struct elf_phdr));
+ elfcorehdr_size = PAGE_ALIGN(elfcorehdr_size);
+
+ fw_dump.elfcorehdr_addr = (u64)fadump_alloc_buffer(elfcorehdr_size);
+ if (!fw_dump.elfcorehdr_addr) {
+ pr_err("Failed to allocate %lu bytes for elfcorehdr\n",
+ elfcorehdr_size);
+ return -ENOMEM;
+ }
+ fw_dump.elfcorehdr_size = elfcorehdr_size;
+ return 0;
+}
+
+/*
+ * Check if the fadump header of crashed kernel is compatible with fadump kernel.
+ *
+ * It checks the magic number, endianness, and size of non-primitive type
+ * members of fadump header to ensure safe dump collection.
+ */
+static bool __init is_fadump_header_compatible(struct fadump_crash_info_header *fdh)
+{
+ if (fdh->magic_number == FADUMP_CRASH_INFO_MAGIC_OLD) {
+ pr_err("Old magic number, can't process the dump.\n");
+ return false;
+ }
+
+ if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) {
+ if (fdh->magic_number == swab64(FADUMP_CRASH_INFO_MAGIC))
+ pr_err("Endianness mismatch between the crashed and fadump kernels.\n");
+ else
+ pr_err("Fadump header is corrupted.\n");
+
+ return false;
+ }
+
+ /*
+ * Dump collection is not safe if the size of non-primitive type members
+ * of the fadump header do not match between crashed and fadump kernel.
+ */
+ if (fdh->pt_regs_sz != sizeof(struct pt_regs) ||
+ fdh->cpu_mask_sz != sizeof(struct cpumask)) {
+ pr_err("Fadump header size mismatch.\n");
+ return false;
+ }
+
+ return true;
+}
+
+static void __init fadump_process(void)
+{
+ struct fadump_crash_info_header *fdh;
+
+ fdh = (struct fadump_crash_info_header *) __va(fw_dump.fadumphdr_addr);
+ if (!fdh) {
+ pr_err("Crash info header is empty.\n");
+ goto err_out;
}
+
+ /* Avoid processing the dump if fadump header isn't compatible */
+ if (!is_fadump_header_compatible(fdh))
+ goto err_out;
+
+ /* Allocate buffer for elfcorehdr */
+ if (fadump_setup_elfcorehdr_buf())
+ goto err_out;
+
+ fadump_populate_elfcorehdr(fdh);
+
+ /* Let platform update the CPU notes in elfcorehdr */
+ if (fw_dump.ops->fadump_process(&fw_dump) < 0)
+ goto err_out;
+
+ /*
+ * elfcorehdr is now ready to be exported.
+ *
+ * set elfcorehdr_addr so that vmcore module will export the
+ * elfcorehdr through '/proc/vmcore'.
+ */
+ elfcorehdr_addr = virt_to_phys((void *)fw_dump.elfcorehdr_addr);
return;
+
+err_out:
+ fadump_invalidate_release_mem();
+}
+
+/*
+ * Reserve memory to store additional parameters to be passed
+ * for fadump/capture kernel.
+ */
+void __init fadump_setup_param_area(void)
+{
+ phys_addr_t range_start, range_end;
+
+ if (!fw_dump.param_area_supported || fw_dump.dump_active)
+ return;
+
+ /* This memory can't be used by PFW or bootloader as it is shared across kernels */
+ if (early_radix_enabled()) {
+ /*
+ * Anywhere in the upper half should be good enough as all memory
+ * is accessible in real mode.
+ */
+ range_start = memblock_end_of_DRAM() / 2;
+ range_end = memblock_end_of_DRAM();
+ } else {
+ /*
+ * Memory range for passing additional parameters for HASH MMU
+ * must meet the following conditions:
+ * 1. The first memory block size must be higher than the
+ * minimum RMA (MIN_RMA) size. Bootloader can use memory
+ * upto RMA size. So it should be avoided.
+ * 2. The range should be between MIN_RMA and RMA size (ppc64_rma_size)
+ * 3. It must not overlap with the fadump reserved area.
+ */
+ if (ppc64_rma_size < MIN_RMA*1024*1024)
+ return;
+
+ range_start = MIN_RMA * 1024 * 1024;
+ range_end = min(ppc64_rma_size, fw_dump.boot_mem_top);
+ }
+
+ fw_dump.param_area = memblock_phys_alloc_range(COMMAND_LINE_SIZE,
+ COMMAND_LINE_SIZE,
+ range_start,
+ range_end);
+ if (!fw_dump.param_area) {
+ pr_warn("WARNING: Could not setup area to pass additional parameters!\n");
+ return;
+ }
+
+ memset((void *)fw_dump.param_area, 0, COMMAND_LINE_SIZE);
}
/*
@@ -1284,33 +1792,96 @@ static void fadump_init_files(void)
*/
int __init setup_fadump(void)
{
- if (!fw_dump.fadump_enabled)
- return 0;
-
- if (!fw_dump.fadump_supported) {
- printk(KERN_ERR "Firmware-assisted dump is not supported on"
- " this hardware\n");
+ if (!fw_dump.fadump_supported)
return 0;
- }
+ fadump_init_files();
fadump_show_config();
+
+ if (!fw_dump.fadump_enabled)
+ return 1;
+
/*
* If dump data is available then see if it is valid and prepare for
* saving it to the disk.
*/
if (fw_dump.dump_active) {
+ fadump_process();
+ }
+ /* Initialize the kernel dump memory structure and register with f/w */
+ else if (fw_dump.reserve_dump_area_size) {
+ fw_dump.ops->fadump_init_mem_struct(&fw_dump);
+ register_fadump();
+ }
+
+ /*
+ * In case of panic, fadump is triggered via ppc_panic_event()
+ * panic notifier. Setting crash_kexec_post_notifiers to 'true'
+ * lets panic() function take crash friendly path before panic
+ * notifiers are invoked.
+ */
+ crash_kexec_post_notifiers = true;
+
+ return 1;
+}
+/*
+ * Use subsys_initcall_sync() here because there is dependency with
+ * crash_save_vmcoreinfo_init(), which must run first to ensure vmcoreinfo initialization
+ * is done before registering with f/w.
+ */
+subsys_initcall_sync(setup_fadump);
+#else /* !CONFIG_PRESERVE_FA_DUMP */
+
+/* Scan the Firmware Assisted dump configuration details. */
+int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
+ int depth, void *data)
+{
+ if ((depth != 1) || (strcmp(uname, "ibm,opal") != 0))
+ return 0;
+
+ opal_fadump_dt_scan(&fw_dump, node);
+ return 1;
+}
+
+/*
+ * When dump is active but PRESERVE_FA_DUMP is enabled on the kernel,
+ * preserve crash data. The subsequent memory preserving kernel boot
+ * is likely to process this crash data.
+ */
+int __init fadump_reserve_mem(void)
+{
+ if (fw_dump.dump_active) {
/*
- * if dump process fails then invalidate the registration
- * and release memory before proceeding for re-registration.
+ * If last boot has crashed then reserve all the memory
+ * above boot memory to preserve crash data.
*/
- if (process_fadump(fdm_active) < 0)
- fadump_invalidate_release_mem();
- }
- /* Initialize the kernel dump memory structure for FAD registration. */
- else if (fw_dump.reserve_dump_area_size)
- init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start);
- fadump_init_files();
+ pr_info("Preserving crash data for processing in next boot.\n");
+ fadump_reserve_crash_area(fw_dump.boot_mem_top);
+ } else
+ pr_debug("FADump-aware kernel..\n");
return 1;
}
-subsys_initcall(setup_fadump);
+#endif /* CONFIG_PRESERVE_FA_DUMP */
+
+/* Preserve everything above the base address */
+static void __init fadump_reserve_crash_area(u64 base)
+{
+ u64 i, mstart, mend, msize;
+
+ for_each_mem_range(i, &mstart, &mend) {
+ msize = mend - mstart;
+
+ if ((mstart + msize) < base)
+ continue;
+
+ if (mstart < base) {
+ msize -= (base - mstart);
+ mstart = base;
+ }
+
+ pr_info("Reserving %lluMB of memory at %#016llx for preserving crash data",
+ (msize >> 20), mstart);
+ memblock_reserve(mstart, msize);
+ }
+}
diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c
index 2eae4478f7a1..8987eee33dc8 100644
--- a/arch/powerpc/kernel/firmware.c
+++ b/arch/powerpc/kernel/firmware.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Extracted from cputable.c
*
@@ -6,17 +7,37 @@
* Modifications for ppc64:
* Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com>
* Copyright (C) 2005 Stephen Rothwell, IBM Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/export.h>
#include <linux/cache.h>
+#include <linux/of.h>
#include <asm/firmware.h>
+#include <asm/kvm_guest.h>
+#ifdef CONFIG_PPC64
unsigned long powerpc_firmware_features __read_mostly;
EXPORT_SYMBOL_GPL(powerpc_firmware_features);
+#endif
+
+#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST)
+DEFINE_STATIC_KEY_FALSE(kvm_guest);
+EXPORT_SYMBOL_GPL(kvm_guest);
+
+int __init check_kvm_guest(void)
+{
+ struct device_node *hyper_node;
+
+ hyper_node = of_find_node_by_path("/hypervisor");
+ if (!hyper_node)
+ return 0;
+
+ if (of_device_is_compatible(hyper_node, "linux,kvm"))
+ static_branch_enable(&kvm_guest);
+
+ of_node_put(hyper_node);
+ return 0;
+}
+core_initcall(check_kvm_guest); // before kvm_guest_init()
+#endif
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index 9ad236e5d2c9..2f8f3f93cbb6 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* FPU support code, moved here from head.S so that it can be used
* by chips which use other head-whatever.S files.
@@ -6,26 +7,31 @@
* Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
* Copyright (C) 1996 Paul Mackerras.
* Copyright (C) 1997 Dan Malek (dmalek@jlc.net).
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
+#include <linux/export.h>
#include <asm/reg.h>
#include <asm/page.h>
#include <asm/mmu.h>
-#include <asm/pgtable.h>
#include <asm/cputable.h>
#include <asm/cache.h>
#include <asm/thread_info.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/ptrace.h>
+#include <asm/asm-compat.h>
+#include <asm/feature-fixups.h>
#ifdef CONFIG_VSX
+#define __REST_1FPVSR(n,c,base) \
+BEGIN_FTR_SECTION \
+ b 2f; \
+END_FTR_SECTION_IFSET(CPU_FTR_VSX); \
+ REST_FPR(n,base); \
+ b 3f; \
+2: REST_VSR(n,c,base); \
+3:
+
#define __REST_32FPVSRS(n,c,base) \
BEGIN_FTR_SECTION \
b 2f; \
@@ -44,58 +50,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX); \
2: SAVE_32VSRS(n,c,base); \
3:
#else
+#define __REST_1FPVSR(n,b,base) REST_FPR(n, base)
#define __REST_32FPVSRS(n,b,base) REST_32FPRS(n, base)
#define __SAVE_32FPVSRS(n,b,base) SAVE_32FPRS(n, base)
#endif
+#define REST_1FPVSR(n,c,base) __REST_1FPVSR(n,__REG_##c,__REG_##base)
#define REST_32FPVSRS(n,c,base) __REST_32FPVSRS(n,__REG_##c,__REG_##base)
#define SAVE_32FPVSRS(n,c,base) __SAVE_32FPVSRS(n,__REG_##c,__REG_##base)
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-/* void do_load_up_transact_fpu(struct thread_struct *thread)
- *
- * This is similar to load_up_fpu but for the transactional version of the FP
- * register set. It doesn't mess with the task MSR or valid flags.
- * Furthermore, we don't do lazy FP with TM currently.
- */
-_GLOBAL(do_load_up_transact_fpu)
- mfmsr r6
- ori r5,r6,MSR_FP
-#ifdef CONFIG_VSX
-BEGIN_FTR_SECTION
- oris r5,r5,MSR_VSX@h
-END_FTR_SECTION_IFSET(CPU_FTR_VSX)
-#endif
- SYNC
- MTMSRD(r5)
-
- addi r7,r3,THREAD_TRANSACT_FPSTATE
- lfd fr0,FPSTATE_FPSCR(r7)
- MTFSF_L(fr0)
- REST_32FPVSRS(0, R4, R7)
-
- /* FP/VSX off again */
- MTMSRD(r6)
- SYNC
-
- blr
-#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
-
-/*
- * Enable use of the FPU, and VSX if possible, for the caller.
- */
-_GLOBAL(fp_enable)
- mfmsr r3
- ori r3,r3,MSR_FP
-#ifdef CONFIG_VSX
-BEGIN_FTR_SECTION
- oris r3,r3,MSR_VSX@h
-END_FTR_SECTION_IFSET(CPU_FTR_VSX)
-#endif
- SYNC
- MTMSRD(r3)
- isync /* (not necessary for arch 2.02 and later) */
- blr
-
/*
* Load state from memory into FP registers including FPSCR.
* Assumes the caller has enabled FP in the MSR.
@@ -105,6 +67,8 @@ _GLOBAL(load_fp_state)
MTFSF_L(fr0)
REST_32FPVSRS(0, R4, R3)
blr
+EXPORT_SYMBOL(load_fp_state)
+_ASM_NOKPROBE_SYMBOL(load_fp_state); /* used by restore_math */
/*
* Store FP state into memory, including FPSCR
@@ -114,7 +78,9 @@ _GLOBAL(store_fp_state)
SAVE_32FPVSRS(0, R4, R3)
mffs fr0
stfd fr0,FPSTATE_FPSCR(r3)
+ REST_1FPVSR(0, R4, R3)
blr
+EXPORT_SYMBOL(store_fp_state)
/*
* This task wants to use the FPU now.
@@ -127,43 +93,22 @@ _GLOBAL(store_fp_state)
*/
_GLOBAL(load_up_fpu)
mfmsr r5
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* interrupt doesn't set MSR[RI] and HPT can fault on current access */
+ ori r5,r5,MSR_FP|MSR_RI
+#else
ori r5,r5,MSR_FP
+#endif
#ifdef CONFIG_VSX
BEGIN_FTR_SECTION
oris r5,r5,MSR_VSX@h
END_FTR_SECTION_IFSET(CPU_FTR_VSX)
#endif
- SYNC
MTMSRD(r5) /* enable use of fpu now */
isync
-/*
- * For SMP, we don't do lazy FPU switching because it just gets too
- * horrendously complex, especially when a task switches from one CPU
- * to another. Instead we call giveup_fpu in switch_to.
- */
-#ifndef CONFIG_SMP
- LOAD_REG_ADDRBASE(r3, last_task_used_math)
- toreal(r3)
- PPC_LL r4,ADDROFF(last_task_used_math)(r3)
- PPC_LCMPI 0,r4,0
- beq 1f
- toreal(r4)
- addi r4,r4,THREAD /* want last_task_used_math->thread */
- addi r10,r4,THREAD_FPSTATE
- SAVE_32FPVSRS(0, R5, R10)
- mffs fr0
- stfd fr0,FPSTATE_FPSCR(r10)
- PPC_LL r5,PT_REGS(r4)
- toreal(r5)
- PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
- li r10,MSR_FP|MSR_FE0|MSR_FE1
- andc r4,r4,r10 /* disable FP for previous task */
- PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
-#endif /* CONFIG_SMP */
/* enable use of FP after return */
#ifdef CONFIG_PPC32
- mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */
+ addi r5,r2,THREAD
lwz r4,THREAD_FPEXC_MODE(r5)
ori r9,r9,MSR_FP /* enable FP for current */
or r9,r9,r4
@@ -174,80 +119,36 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
ori r12,r12,MSR_FP
or r12,r12,r4
std r12,_MSR(r1)
+#ifdef CONFIG_PPC_BOOK3S_64
+ li r4,0
+ stb r4,PACASRR_VALID(r13)
+#endif
#endif
+ li r4,1
+ stb r4,THREAD_LOAD_FP(r5)
addi r10,r5,THREAD_FPSTATE
lfd fr0,FPSTATE_FPSCR(r10)
MTFSF_L(fr0)
REST_32FPVSRS(0, R4, R10)
-#ifndef CONFIG_SMP
- subi r4,r5,THREAD
- fromreal(r4)
- PPC_STL r4,ADDROFF(last_task_used_math)(r3)
-#endif /* CONFIG_SMP */
/* restore registers and return */
/* we haven't used ctr or xer or lr */
blr
+_ASM_NOKPROBE_SYMBOL(load_up_fpu)
/*
- * giveup_fpu(tsk)
- * Disable FP for the task given as the argument,
- * and save the floating-point registers in its thread_struct.
+ * save_fpu(tsk)
+ * Save the floating-point registers in its thread_struct.
* Enables the FPU for use in the kernel on return.
*/
-_GLOBAL(giveup_fpu)
- mfmsr r5
- ori r5,r5,MSR_FP
-#ifdef CONFIG_VSX
-BEGIN_FTR_SECTION
- oris r5,r5,MSR_VSX@h
-END_FTR_SECTION_IFSET(CPU_FTR_VSX)
-#endif
- SYNC_601
- ISYNC_601
- MTMSRD(r5) /* enable use of fpu now */
- SYNC_601
- isync
- PPC_LCMPI 0,r3,0
- beqlr- /* if no previous owner, done */
+_GLOBAL(save_fpu)
addi r3,r3,THREAD /* want THREAD of task */
PPC_LL r6,THREAD_FPSAVEAREA(r3)
PPC_LL r5,PT_REGS(r3)
PPC_LCMPI 0,r6,0
bne 2f
addi r6,r3,THREAD_FPSTATE
-2: PPC_LCMPI 0,r5,0
- SAVE_32FPVSRS(0, R4, R6)
+2: SAVE_32FPVSRS(0, R4, R6)
mffs fr0
stfd fr0,FPSTATE_FPSCR(r6)
- beq 1f
- PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
- li r3,MSR_FP|MSR_FE0|MSR_FE1
-#ifdef CONFIG_VSX
-BEGIN_FTR_SECTION
- oris r3,r3,MSR_VSX@h
-END_FTR_SECTION_IFSET(CPU_FTR_VSX)
-#endif
- andc r4,r4,r3 /* disable FP for previous task */
- PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
-#ifndef CONFIG_SMP
- li r5,0
- LOAD_REG_ADDRBASE(r4,last_task_used_math)
- PPC_STL r5,ADDROFF(last_task_used_math)(r4)
-#endif /* CONFIG_SMP */
- blr
-
-/*
- * These are used in the alignment trap handler when emulating
- * single-precision loads and stores.
- */
-
-_GLOBAL(cvt_fd)
- lfs 0,0(r3)
- stfd 0,0(r4)
- blr
-
-_GLOBAL(cvt_df)
- lfd 0,0(r3)
- stfs 0,0(r4)
+ REST_1FPVSR(0, R4, R6)
blr
diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
deleted file mode 100644
index 390311c0f03d..000000000000
--- a/arch/powerpc/kernel/ftrace.c
+++ /dev/null
@@ -1,594 +0,0 @@
-/*
- * Code for replacing ftrace calls with jumps.
- *
- * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
- *
- * Thanks goes out to P.A. Semi, Inc for supplying me with a PPC64 box.
- *
- * Added function graph tracer code, taken from x86 that was written
- * by Frederic Weisbecker, and ported to PPC by Steven Rostedt.
- *
- */
-
-#define pr_fmt(fmt) "ftrace-powerpc: " fmt
-
-#include <linux/spinlock.h>
-#include <linux/hardirq.h>
-#include <linux/uaccess.h>
-#include <linux/module.h>
-#include <linux/ftrace.h>
-#include <linux/percpu.h>
-#include <linux/init.h>
-#include <linux/list.h>
-
-#include <asm/cacheflush.h>
-#include <asm/code-patching.h>
-#include <asm/ftrace.h>
-#include <asm/syscall.h>
-
-
-#ifdef CONFIG_DYNAMIC_FTRACE
-static unsigned int
-ftrace_call_replace(unsigned long ip, unsigned long addr, int link)
-{
- unsigned int op;
-
- addr = ppc_function_entry((void *)addr);
-
- /* if (link) set op to 'bl' else 'b' */
- op = create_branch((unsigned int *)ip, addr, link ? 1 : 0);
-
- return op;
-}
-
-static int
-ftrace_modify_code(unsigned long ip, unsigned int old, unsigned int new)
-{
- unsigned int replaced;
-
- /*
- * Note: Due to modules and __init, code can
- * disappear and change, we need to protect against faulting
- * as well as code changing. We do this by using the
- * probe_kernel_* functions.
- *
- * No real locking needed, this code is run through
- * kstop_machine, or before SMP starts.
- */
-
- /* read the text we want to modify */
- if (probe_kernel_read(&replaced, (void *)ip, MCOUNT_INSN_SIZE))
- return -EFAULT;
-
- /* Make sure it is what we expect it to be */
- if (replaced != old)
- return -EINVAL;
-
- /* replace the text with the new text */
- if (patch_instruction((unsigned int *)ip, new))
- return -EPERM;
-
- return 0;
-}
-
-/*
- * Helper functions that are the same for both PPC64 and PPC32.
- */
-static int test_24bit_addr(unsigned long ip, unsigned long addr)
-{
- addr = ppc_function_entry((void *)addr);
-
- /* use the create_branch to verify that this offset can be branched */
- return create_branch((unsigned int *)ip, addr, 0);
-}
-
-#ifdef CONFIG_MODULES
-
-static int is_bl_op(unsigned int op)
-{
- return (op & 0xfc000003) == 0x48000001;
-}
-
-static unsigned long find_bl_target(unsigned long ip, unsigned int op)
-{
- static int offset;
-
- offset = (op & 0x03fffffc);
- /* make it signed */
- if (offset & 0x02000000)
- offset |= 0xfe000000;
-
- return ip + (long)offset;
-}
-
-#ifdef CONFIG_PPC64
-static int
-__ftrace_make_nop(struct module *mod,
- struct dyn_ftrace *rec, unsigned long addr)
-{
- unsigned int op;
- unsigned long entry, ptr;
- unsigned long ip = rec->ip;
- void *tramp;
-
- /* read where this goes */
- if (probe_kernel_read(&op, (void *)ip, sizeof(int)))
- return -EFAULT;
-
- /* Make sure that that this is still a 24bit jump */
- if (!is_bl_op(op)) {
- pr_err("Not expected bl: opcode is %x\n", op);
- return -EINVAL;
- }
-
- /* lets find where the pointer goes */
- tramp = (void *)find_bl_target(ip, op);
-
- pr_devel("ip:%lx jumps to %p", ip, tramp);
-
- if (!is_module_trampoline(tramp)) {
- pr_err("Not a trampoline\n");
- return -EINVAL;
- }
-
- if (module_trampoline_target(mod, tramp, &ptr)) {
- pr_err("Failed to get trampoline target\n");
- return -EFAULT;
- }
-
- pr_devel("trampoline target %lx", ptr);
-
- entry = ppc_global_function_entry((void *)addr);
- /* This should match what was called */
- if (ptr != entry) {
- pr_err("addr %lx does not match expected %lx\n", ptr, entry);
- return -EINVAL;
- }
-
- /*
- * Our original call site looks like:
- *
- * bl <tramp>
- * ld r2,XX(r1)
- *
- * Milton Miller pointed out that we can not simply nop the branch.
- * If a task was preempted when calling a trace function, the nops
- * will remove the way to restore the TOC in r2 and the r2 TOC will
- * get corrupted.
- *
- * Use a b +8 to jump over the load.
- */
- op = 0x48000008; /* b +8 */
-
- if (patch_instruction((unsigned int *)ip, op))
- return -EPERM;
-
- return 0;
-}
-
-#else /* !PPC64 */
-static int
-__ftrace_make_nop(struct module *mod,
- struct dyn_ftrace *rec, unsigned long addr)
-{
- unsigned int op;
- unsigned int jmp[4];
- unsigned long ip = rec->ip;
- unsigned long tramp;
-
- if (probe_kernel_read(&op, (void *)ip, MCOUNT_INSN_SIZE))
- return -EFAULT;
-
- /* Make sure that that this is still a 24bit jump */
- if (!is_bl_op(op)) {
- pr_err("Not expected bl: opcode is %x\n", op);
- return -EINVAL;
- }
-
- /* lets find where the pointer goes */
- tramp = find_bl_target(ip, op);
-
- /*
- * On PPC32 the trampoline looks like:
- * 0x3d, 0x80, 0x00, 0x00 lis r12,sym@ha
- * 0x39, 0x8c, 0x00, 0x00 addi r12,r12,sym@l
- * 0x7d, 0x89, 0x03, 0xa6 mtctr r12
- * 0x4e, 0x80, 0x04, 0x20 bctr
- */
-
- pr_devel("ip:%lx jumps to %lx", ip, tramp);
-
- /* Find where the trampoline jumps to */
- if (probe_kernel_read(jmp, (void *)tramp, sizeof(jmp))) {
- pr_err("Failed to read %lx\n", tramp);
- return -EFAULT;
- }
-
- pr_devel(" %08x %08x ", jmp[0], jmp[1]);
-
- /* verify that this is what we expect it to be */
- if (((jmp[0] & 0xffff0000) != 0x3d800000) ||
- ((jmp[1] & 0xffff0000) != 0x398c0000) ||
- (jmp[2] != 0x7d8903a6) ||
- (jmp[3] != 0x4e800420)) {
- pr_err("Not a trampoline\n");
- return -EINVAL;
- }
-
- tramp = (jmp[1] & 0xffff) |
- ((jmp[0] & 0xffff) << 16);
- if (tramp & 0x8000)
- tramp -= 0x10000;
-
- pr_devel(" %lx ", tramp);
-
- if (tramp != addr) {
- pr_err("Trampoline location %08lx does not match addr\n",
- tramp);
- return -EINVAL;
- }
-
- op = PPC_INST_NOP;
-
- if (patch_instruction((unsigned int *)ip, op))
- return -EPERM;
-
- return 0;
-}
-#endif /* PPC64 */
-#endif /* CONFIG_MODULES */
-
-int ftrace_make_nop(struct module *mod,
- struct dyn_ftrace *rec, unsigned long addr)
-{
- unsigned long ip = rec->ip;
- unsigned int old, new;
-
- /*
- * If the calling address is more that 24 bits away,
- * then we had to use a trampoline to make the call.
- * Otherwise just update the call site.
- */
- if (test_24bit_addr(ip, addr)) {
- /* within range */
- old = ftrace_call_replace(ip, addr, 1);
- new = PPC_INST_NOP;
- return ftrace_modify_code(ip, old, new);
- }
-
-#ifdef CONFIG_MODULES
- /*
- * Out of range jumps are called from modules.
- * We should either already have a pointer to the module
- * or it has been passed in.
- */
- if (!rec->arch.mod) {
- if (!mod) {
- pr_err("No module loaded addr=%lx\n", addr);
- return -EFAULT;
- }
- rec->arch.mod = mod;
- } else if (mod) {
- if (mod != rec->arch.mod) {
- pr_err("Record mod %p not equal to passed in mod %p\n",
- rec->arch.mod, mod);
- return -EINVAL;
- }
- /* nothing to do if mod == rec->arch.mod */
- } else
- mod = rec->arch.mod;
-
- return __ftrace_make_nop(mod, rec, addr);
-#else
- /* We should not get here without modules */
- return -EINVAL;
-#endif /* CONFIG_MODULES */
-}
-
-#ifdef CONFIG_MODULES
-#ifdef CONFIG_PPC64
-static int
-__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
-{
- unsigned int op[2];
- void *ip = (void *)rec->ip;
-
- /* read where this goes */
- if (probe_kernel_read(op, ip, sizeof(op)))
- return -EFAULT;
-
- /*
- * We expect to see:
- *
- * b +8
- * ld r2,XX(r1)
- *
- * The load offset is different depending on the ABI. For simplicity
- * just mask it out when doing the compare.
- */
- if ((op[0] != 0x48000008) || ((op[1] & 0xffff0000) != 0xe8410000)) {
- pr_err("Unexpected call sequence: %x %x\n", op[0], op[1]);
- return -EINVAL;
- }
-
- /* If we never set up a trampoline to ftrace_caller, then bail */
- if (!rec->arch.mod->arch.tramp) {
- pr_err("No ftrace trampoline\n");
- return -EINVAL;
- }
-
- /* Ensure branch is within 24 bits */
- if (!create_branch(ip, rec->arch.mod->arch.tramp, BRANCH_SET_LINK)) {
- pr_err("Branch out of range\n");
- return -EINVAL;
- }
-
- if (patch_branch(ip, rec->arch.mod->arch.tramp, BRANCH_SET_LINK)) {
- pr_err("REL24 out of range!\n");
- return -EINVAL;
- }
-
- return 0;
-}
-#else
-static int
-__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
-{
- unsigned int op;
- unsigned long ip = rec->ip;
-
- /* read where this goes */
- if (probe_kernel_read(&op, (void *)ip, MCOUNT_INSN_SIZE))
- return -EFAULT;
-
- /* It should be pointing to a nop */
- if (op != PPC_INST_NOP) {
- pr_err("Expected NOP but have %x\n", op);
- return -EINVAL;
- }
-
- /* If we never set up a trampoline to ftrace_caller, then bail */
- if (!rec->arch.mod->arch.tramp) {
- pr_err("No ftrace trampoline\n");
- return -EINVAL;
- }
-
- /* create the branch to the trampoline */
- op = create_branch((unsigned int *)ip,
- rec->arch.mod->arch.tramp, BRANCH_SET_LINK);
- if (!op) {
- pr_err("REL24 out of range!\n");
- return -EINVAL;
- }
-
- pr_devel("write to %lx\n", rec->ip);
-
- if (patch_instruction((unsigned int *)ip, op))
- return -EPERM;
-
- return 0;
-}
-#endif /* CONFIG_PPC64 */
-#endif /* CONFIG_MODULES */
-
-int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
-{
- unsigned long ip = rec->ip;
- unsigned int old, new;
-
- /*
- * If the calling address is more that 24 bits away,
- * then we had to use a trampoline to make the call.
- * Otherwise just update the call site.
- */
- if (test_24bit_addr(ip, addr)) {
- /* within range */
- old = PPC_INST_NOP;
- new = ftrace_call_replace(ip, addr, 1);
- return ftrace_modify_code(ip, old, new);
- }
-
-#ifdef CONFIG_MODULES
- /*
- * Out of range jumps are called from modules.
- * Being that we are converting from nop, it had better
- * already have a module defined.
- */
- if (!rec->arch.mod) {
- pr_err("No module loaded\n");
- return -EINVAL;
- }
-
- return __ftrace_make_call(rec, addr);
-#else
- /* We should not get here without modules */
- return -EINVAL;
-#endif /* CONFIG_MODULES */
-}
-
-int ftrace_update_ftrace_func(ftrace_func_t func)
-{
- unsigned long ip = (unsigned long)(&ftrace_call);
- unsigned int old, new;
- int ret;
-
- old = *(unsigned int *)&ftrace_call;
- new = ftrace_call_replace(ip, (unsigned long)func, 1);
- ret = ftrace_modify_code(ip, old, new);
-
- return ret;
-}
-
-static int __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
-{
- unsigned long ftrace_addr = (unsigned long)FTRACE_ADDR;
- int ret;
-
- ret = ftrace_update_record(rec, enable);
-
- switch (ret) {
- case FTRACE_UPDATE_IGNORE:
- return 0;
- case FTRACE_UPDATE_MAKE_CALL:
- return ftrace_make_call(rec, ftrace_addr);
- case FTRACE_UPDATE_MAKE_NOP:
- return ftrace_make_nop(NULL, rec, ftrace_addr);
- }
-
- return 0;
-}
-
-void ftrace_replace_code(int enable)
-{
- struct ftrace_rec_iter *iter;
- struct dyn_ftrace *rec;
- int ret;
-
- for (iter = ftrace_rec_iter_start(); iter;
- iter = ftrace_rec_iter_next(iter)) {
- rec = ftrace_rec_iter_record(iter);
- ret = __ftrace_replace_code(rec, enable);
- if (ret) {
- ftrace_bug(ret, rec->ip);
- return;
- }
- }
-}
-
-void arch_ftrace_update_code(int command)
-{
- if (command & FTRACE_UPDATE_CALLS)
- ftrace_replace_code(1);
- else if (command & FTRACE_DISABLE_CALLS)
- ftrace_replace_code(0);
-
- if (command & FTRACE_UPDATE_TRACE_FUNC)
- ftrace_update_ftrace_func(ftrace_trace_function);
-
- if (command & FTRACE_START_FUNC_RET)
- ftrace_enable_ftrace_graph_caller();
- else if (command & FTRACE_STOP_FUNC_RET)
- ftrace_disable_ftrace_graph_caller();
-}
-
-int __init ftrace_dyn_arch_init(void)
-{
- return 0;
-}
-#endif /* CONFIG_DYNAMIC_FTRACE */
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-
-#ifdef CONFIG_DYNAMIC_FTRACE
-extern void ftrace_graph_call(void);
-extern void ftrace_graph_stub(void);
-
-int ftrace_enable_ftrace_graph_caller(void)
-{
- unsigned long ip = (unsigned long)(&ftrace_graph_call);
- unsigned long addr = (unsigned long)(&ftrace_graph_caller);
- unsigned long stub = (unsigned long)(&ftrace_graph_stub);
- unsigned int old, new;
-
- old = ftrace_call_replace(ip, stub, 0);
- new = ftrace_call_replace(ip, addr, 0);
-
- return ftrace_modify_code(ip, old, new);
-}
-
-int ftrace_disable_ftrace_graph_caller(void)
-{
- unsigned long ip = (unsigned long)(&ftrace_graph_call);
- unsigned long addr = (unsigned long)(&ftrace_graph_caller);
- unsigned long stub = (unsigned long)(&ftrace_graph_stub);
- unsigned int old, new;
-
- old = ftrace_call_replace(ip, addr, 0);
- new = ftrace_call_replace(ip, stub, 0);
-
- return ftrace_modify_code(ip, old, new);
-}
-#endif /* CONFIG_DYNAMIC_FTRACE */
-
-#ifdef CONFIG_PPC64
-extern void mod_return_to_handler(void);
-#endif
-
-/*
- * Hook the return address and push it in the stack of return addrs
- * in current thread info.
- */
-void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
-{
- unsigned long old;
- int faulted;
- struct ftrace_graph_ent trace;
- unsigned long return_hooker = (unsigned long)&return_to_handler;
-
- if (unlikely(ftrace_graph_is_dead()))
- return;
-
- if (unlikely(atomic_read(&current->tracing_graph_pause)))
- return;
-
-#ifdef CONFIG_PPC64
- /* non core kernel code needs to save and restore the TOC */
- if (REGION_ID(self_addr) != KERNEL_REGION_ID)
- return_hooker = (unsigned long)&mod_return_to_handler;
-#endif
-
- return_hooker = ppc_function_entry((void *)return_hooker);
-
- /*
- * Protect against fault, even if it shouldn't
- * happen. This tool is too much intrusive to
- * ignore such a protection.
- */
- asm volatile(
- "1: " PPC_LL "%[old], 0(%[parent])\n"
- "2: " PPC_STL "%[return_hooker], 0(%[parent])\n"
- " li %[faulted], 0\n"
- "3:\n"
-
- ".section .fixup, \"ax\"\n"
- "4: li %[faulted], 1\n"
- " b 3b\n"
- ".previous\n"
-
- ".section __ex_table,\"a\"\n"
- PPC_LONG_ALIGN "\n"
- PPC_LONG "1b,4b\n"
- PPC_LONG "2b,4b\n"
- ".previous"
-
- : [old] "=&r" (old), [faulted] "=r" (faulted)
- : [parent] "r" (parent), [return_hooker] "r" (return_hooker)
- : "memory"
- );
-
- if (unlikely(faulted)) {
- ftrace_graph_stop();
- WARN_ON(1);
- return;
- }
-
- trace.func = self_addr;
- trace.depth = current->curr_ret_stack + 1;
-
- /* Only trace if the calling function expects to */
- if (!ftrace_graph_entry(&trace)) {
- *parent = old;
- return;
- }
-
- if (ftrace_push_return_trace(old, self_addr, &trace.depth, 0) == -EBUSY)
- *parent = old;
-}
-#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
-
-#if defined(CONFIG_FTRACE_SYSCALLS) && defined(CONFIG_PPC64)
-unsigned long __init arch_syscall_addr(int nr)
-{
- return sys_call_table[nr*2];
-}
-#endif /* CONFIG_FTRACE_SYSCALLS && CONFIG_PPC64 */
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
new file mode 100644
index 000000000000..9cba7dbf58dd
--- /dev/null
+++ b/arch/powerpc/kernel/head_32.h
@@ -0,0 +1,212 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __HEAD_32_H__
+#define __HEAD_32_H__
+
+#include <asm/ptrace.h> /* for STACK_FRAME_REGS_MARKER */
+
+/*
+ * Exception entry code. This code runs with address translation
+ * turned off, i.e. using physical addresses.
+ * We assume sprg3 has the physical address of the current
+ * task's thread_struct.
+ */
+.macro EXCEPTION_PROLOG trapno name handle_dar_dsisr=0
+ EXCEPTION_PROLOG_0 handle_dar_dsisr=\handle_dar_dsisr
+ EXCEPTION_PROLOG_1
+ EXCEPTION_PROLOG_2 \trapno \name handle_dar_dsisr=\handle_dar_dsisr
+.endm
+
+.macro EXCEPTION_PROLOG_0 handle_dar_dsisr=0
+ mtspr SPRN_SPRG_SCRATCH0,r10
+ mtspr SPRN_SPRG_SCRATCH1,r11
+ mfspr r10, SPRN_SPRG_THREAD
+ .if \handle_dar_dsisr
+ mfspr r11, SPRN_DAR
+ stw r11, DAR(r10)
+ mfspr r11, SPRN_DSISR
+ stw r11, DSISR(r10)
+ .endif
+ mfspr r11, SPRN_SRR0
+ stw r11, SRR0(r10)
+ mfspr r11, SPRN_SRR1 /* check whether user or kernel */
+ stw r11, SRR1(r10)
+ mfcr r10
+ andi. r11, r11, MSR_PR
+.endm
+
+.macro EXCEPTION_PROLOG_1
+ mtspr SPRN_SPRG_SCRATCH2,r1
+ subi r1, r1, INT_FRAME_SIZE /* use r1 if kernel */
+ beq 1f
+ mfspr r1,SPRN_SPRG_THREAD
+ lwz r1,TASK_STACK-THREAD(r1)
+ addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE
+1:
+#ifdef CONFIG_VMAP_STACK
+ mtcrf 0x3f, r1
+ bt 32 - THREAD_ALIGN_SHIFT, vmap_stack_overflow
+#endif
+.endm
+
+.macro EXCEPTION_PROLOG_2 trapno name handle_dar_dsisr=0
+#ifdef CONFIG_PPC_8xx
+ .if \handle_dar_dsisr
+ li r11, RPN_PATTERN
+ mtspr SPRN_DAR, r11 /* Tag DAR, to be used in DTLB Error */
+ .endif
+#endif
+ LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~MSR_RI) /* re-enable MMU */
+ mtspr SPRN_SRR1, r11
+ lis r11, 1f@h
+ ori r11, r11, 1f@l
+ mtspr SPRN_SRR0, r11
+ mfspr r11, SPRN_SPRG_SCRATCH2
+ rfi
+
+ .text
+\name\()_virt:
+1:
+ stw r11,GPR1(r1)
+ stw r11,0(r1)
+ mr r11, r1
+ stw r10,_CCR(r11) /* save registers */
+ stw r12,GPR12(r11)
+ stw r9,GPR9(r11)
+ mfspr r10,SPRN_SPRG_SCRATCH0
+ mfspr r12,SPRN_SPRG_SCRATCH1
+ stw r10,GPR10(r11)
+ stw r12,GPR11(r11)
+ mflr r10
+ stw r10,_LINK(r11)
+ mfspr r12, SPRN_SPRG_THREAD
+ tovirt(r12, r12)
+ .if \handle_dar_dsisr
+ lwz r10, DAR(r12)
+ stw r10, _DAR(r11)
+ lwz r10, DSISR(r12)
+ stw r10, _DSISR(r11)
+ .endif
+ lwz r9, SRR1(r12)
+ lwz r12, SRR0(r12)
+#ifdef CONFIG_PPC_8xx
+ mtspr SPRN_EID, r2 /* Set MSR_RI */
+#else
+ li r10, MSR_KERNEL /* can take exceptions */
+ mtmsr r10 /* (except for mach check in rtas) */
+#endif
+ COMMON_EXCEPTION_PROLOG_END \trapno
+_ASM_NOKPROBE_SYMBOL(\name\()_virt)
+.endm
+
+.macro COMMON_EXCEPTION_PROLOG_END trapno
+ stw r0,GPR0(r1)
+ lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
+ addi r10,r10,STACK_FRAME_REGS_MARKER@l
+ stw r10,STACK_INT_FRAME_MARKER(r1)
+ li r10, \trapno
+ stw r10,_TRAP(r1)
+ SAVE_GPRS(3, 8, r1)
+ SAVE_NVGPRS(r1)
+ stw r2,GPR2(r1)
+ stw r12,_NIP(r1)
+ stw r9,_MSR(r1)
+ mfctr r10
+ mfspr r2,SPRN_SPRG_THREAD
+ stw r10,_CTR(r1)
+ tovirt(r2, r2)
+ mfspr r10,SPRN_XER
+ addi r2, r2, -THREAD
+ stw r10,_XER(r1)
+ addi r3,r1,STACK_INT_FRAME_REGS
+.endm
+
+.macro prepare_transfer_to_handler
+#ifdef CONFIG_PPC_BOOK3S_32
+ andi. r12,r9,MSR_PR
+ bne 777f
+ bl prepare_transfer_to_handler
+#ifdef CONFIG_PPC_KUEP
+ b 778f
+777:
+ bl __kuep_lock
+778:
+#endif
+777:
+#endif
+.endm
+
+.macro SYSCALL_ENTRY trapno
+ mfspr r9, SPRN_SRR1
+ mfspr r12, SPRN_SRR0
+ LOAD_REG_IMMEDIATE(r11, MSR_KERNEL) /* can take exceptions */
+ lis r10, 1f@h
+ ori r10, r10, 1f@l
+ mtspr SPRN_SRR1, r11
+ mtspr SPRN_SRR0, r10
+ mfspr r10,SPRN_SPRG_THREAD
+ mr r11, r1
+ lwz r1,TASK_STACK-THREAD(r10)
+ tovirt(r10, r10)
+ addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE
+ rfi
+1:
+ stw r12,_NIP(r1)
+ mfcr r12
+ rlwinm r12,r12,0,4,2 /* Clear SO bit in CR */
+ stw r12,_CCR(r1)
+ b transfer_to_syscall /* jump to handler */
+.endm
+
+/*
+ * Note: code which follows this uses cr0.eq (set if from kernel),
+ * r11, r12 (SRR0), and r9 (SRR1).
+ *
+ * Note2: once we have set r1 we are in a position to take exceptions
+ * again, and we could thus set MSR:RI at that point.
+ */
+
+/*
+ * Exception vectors.
+ */
+#ifdef CONFIG_PPC_BOOK3S
+#define START_EXCEPTION(n, label) \
+ __HEAD; \
+ . = n; \
+ DO_KVM n; \
+label:
+
+#else
+#define START_EXCEPTION(n, label) \
+ __HEAD; \
+ . = n; \
+label:
+
+#endif
+
+#define EXCEPTION(n, label, hdlr) \
+ START_EXCEPTION(n, label) \
+ EXCEPTION_PROLOG n label; \
+ prepare_transfer_to_handler; \
+ bl hdlr; \
+ b interrupt_return
+
+.macro vmap_stack_overflow_exception
+ __HEAD
+vmap_stack_overflow:
+#ifdef CONFIG_SMP
+ mfspr r1, SPRN_SPRG_THREAD
+ lwz r1, TASK_CPU - THREAD(r1)
+ slwi r1, r1, 3
+ addis r1, r1, emergency_ctx-PAGE_OFFSET@ha
+#else
+ lis r1, emergency_ctx-PAGE_OFFSET@ha
+#endif
+ lwz r1, emergency_ctx-PAGE_OFFSET@l(r1)
+ addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE
+ EXCEPTION_PROLOG_2 0 vmap_stack_overflow
+ prepare_transfer_to_handler
+ bl stack_overflow_exception
+ b interrupt_return
+.endm
+
+#endif /* __HEAD_32_H__ */
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
deleted file mode 100644
index 7d7d8635227a..000000000000
--- a/arch/powerpc/kernel/head_40x.S
+++ /dev/null
@@ -1,982 +0,0 @@
-/*
- * Copyright (c) 1995-1996 Gary Thomas <gdt@linuxppc.org>
- * Initial PowerPC version.
- * Copyright (c) 1996 Cort Dougan <cort@cs.nmt.edu>
- * Rewritten for PReP
- * Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
- * Low-level exception handers, MMU support, and rewrite.
- * Copyright (c) 1997 Dan Malek <dmalek@jlc.net>
- * PowerPC 8xx modifications.
- * Copyright (c) 1998-1999 TiVo, Inc.
- * PowerPC 403GCX modifications.
- * Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu>
- * PowerPC 403GCX/405GP modifications.
- * Copyright 2000 MontaVista Software Inc.
- * PPC405 modifications
- * PowerPC 403GCX/405GP modifications.
- * Author: MontaVista Software, Inc.
- * frank_rowand@mvista.com or source@mvista.com
- * debbie_chu@mvista.com
- *
- *
- * Module name: head_4xx.S
- *
- * Description:
- * Kernel execution entry point code.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- */
-
-#include <linux/init.h>
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/mmu.h>
-#include <asm/pgtable.h>
-#include <asm/cputable.h>
-#include <asm/thread_info.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/ptrace.h>
-
-/* As with the other PowerPC ports, it is expected that when code
- * execution begins here, the following registers contain valid, yet
- * optional, information:
- *
- * r3 - Board info structure pointer (DRAM, frequency, MAC address, etc.)
- * r4 - Starting address of the init RAM disk
- * r5 - Ending address of the init RAM disk
- * r6 - Start of kernel command line string (e.g. "mem=96m")
- * r7 - End of kernel command line string
- *
- * This is all going to change RSN when we add bi_recs....... -- Dan
- */
- __HEAD
-_ENTRY(_stext);
-_ENTRY(_start);
-
- mr r31,r3 /* save device tree ptr */
-
- /* We have to turn on the MMU right away so we get cache modes
- * set correctly.
- */
- bl initial_mmu
-
-/* We now have the lower 16 Meg mapped into TLB entries, and the caches
- * ready to work.
- */
-turn_on_mmu:
- lis r0,MSR_KERNEL@h
- ori r0,r0,MSR_KERNEL@l
- mtspr SPRN_SRR1,r0
- lis r0,start_here@h
- ori r0,r0,start_here@l
- mtspr SPRN_SRR0,r0
- SYNC
- rfi /* enables MMU */
- b . /* prevent prefetch past rfi */
-
-/*
- * This area is used for temporarily saving registers during the
- * critical exception prolog.
- */
- . = 0xc0
-crit_save:
-_ENTRY(crit_r10)
- .space 4
-_ENTRY(crit_r11)
- .space 4
-_ENTRY(crit_srr0)
- .space 4
-_ENTRY(crit_srr1)
- .space 4
-_ENTRY(saved_ksp_limit)
- .space 4
-
-/*
- * Exception vector entry code. This code runs with address translation
- * turned off (i.e. using physical addresses). We assume SPRG_THREAD has
- * the physical address of the current task thread_struct.
- * Note that we have to have decremented r1 before we write to any fields
- * of the exception frame, since a critical interrupt could occur at any
- * time, and it will write to the area immediately below the current r1.
- */
-#define NORMAL_EXCEPTION_PROLOG \
- mtspr SPRN_SPRG_SCRATCH0,r10; /* save two registers to work with */\
- mtspr SPRN_SPRG_SCRATCH1,r11; \
- mtspr SPRN_SPRG_SCRATCH2,r1; \
- mfcr r10; /* save CR in r10 for now */\
- mfspr r11,SPRN_SRR1; /* check whether user or kernel */\
- andi. r11,r11,MSR_PR; \
- beq 1f; \
- mfspr r1,SPRN_SPRG_THREAD; /* if from user, start at top of */\
- lwz r1,THREAD_INFO-THREAD(r1); /* this thread's kernel stack */\
- addi r1,r1,THREAD_SIZE; \
-1: subi r1,r1,INT_FRAME_SIZE; /* Allocate an exception frame */\
- tophys(r11,r1); \
- stw r10,_CCR(r11); /* save various registers */\
- stw r12,GPR12(r11); \
- stw r9,GPR9(r11); \
- mfspr r10,SPRN_SPRG_SCRATCH0; \
- stw r10,GPR10(r11); \
- mfspr r12,SPRN_SPRG_SCRATCH1; \
- stw r12,GPR11(r11); \
- mflr r10; \
- stw r10,_LINK(r11); \
- mfspr r10,SPRN_SPRG_SCRATCH2; \
- mfspr r12,SPRN_SRR0; \
- stw r10,GPR1(r11); \
- mfspr r9,SPRN_SRR1; \
- stw r10,0(r11); \
- rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\
- stw r0,GPR0(r11); \
- SAVE_4GPRS(3, r11); \
- SAVE_2GPRS(7, r11)
-
-/*
- * Exception prolog for critical exceptions. This is a little different
- * from the normal exception prolog above since a critical exception
- * can potentially occur at any point during normal exception processing.
- * Thus we cannot use the same SPRG registers as the normal prolog above.
- * Instead we use a couple of words of memory at low physical addresses.
- * This is OK since we don't support SMP on these processors.
- */
-#define CRITICAL_EXCEPTION_PROLOG \
- stw r10,crit_r10@l(0); /* save two registers to work with */\
- stw r11,crit_r11@l(0); \
- mfcr r10; /* save CR in r10 for now */\
- mfspr r11,SPRN_SRR3; /* check whether user or kernel */\
- andi. r11,r11,MSR_PR; \
- lis r11,critirq_ctx@ha; \
- tophys(r11,r11); \
- lwz r11,critirq_ctx@l(r11); \
- beq 1f; \
- /* COMING FROM USER MODE */ \
- mfspr r11,SPRN_SPRG_THREAD; /* if from user, start at top of */\
- lwz r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\
-1: addi r11,r11,THREAD_SIZE-INT_FRAME_SIZE; /* Alloc an excpt frm */\
- tophys(r11,r11); \
- stw r10,_CCR(r11); /* save various registers */\
- stw r12,GPR12(r11); \
- stw r9,GPR9(r11); \
- mflr r10; \
- stw r10,_LINK(r11); \
- mfspr r12,SPRN_DEAR; /* save DEAR and ESR in the frame */\
- stw r12,_DEAR(r11); /* since they may have had stuff */\
- mfspr r9,SPRN_ESR; /* in them at the point where the */\
- stw r9,_ESR(r11); /* exception was taken */\
- mfspr r12,SPRN_SRR2; \
- stw r1,GPR1(r11); \
- mfspr r9,SPRN_SRR3; \
- stw r1,0(r11); \
- tovirt(r1,r11); \
- rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\
- stw r0,GPR0(r11); \
- SAVE_4GPRS(3, r11); \
- SAVE_2GPRS(7, r11)
-
- /*
- * State at this point:
- * r9 saved in stack frame, now saved SRR3 & ~MSR_WE
- * r10 saved in crit_r10 and in stack frame, trashed
- * r11 saved in crit_r11 and in stack frame,
- * now phys stack/exception frame pointer
- * r12 saved in stack frame, now saved SRR2
- * CR saved in stack frame, CR0.EQ = !SRR3.PR
- * LR, DEAR, ESR in stack frame
- * r1 saved in stack frame, now virt stack/excframe pointer
- * r0, r3-r8 saved in stack frame
- */
-
-/*
- * Exception vectors.
- */
-#define START_EXCEPTION(n, label) \
- . = n; \
-label:
-
-#define EXCEPTION(n, label, hdlr, xfer) \
- START_EXCEPTION(n, label); \
- NORMAL_EXCEPTION_PROLOG; \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- xfer(n, hdlr)
-
-#define CRITICAL_EXCEPTION(n, label, hdlr) \
- START_EXCEPTION(n, label); \
- CRITICAL_EXCEPTION_PROLOG; \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
- NOCOPY, crit_transfer_to_handler, \
- ret_from_crit_exc)
-
-#define EXC_XFER_TEMPLATE(hdlr, trap, msr, copyee, tfer, ret) \
- li r10,trap; \
- stw r10,_TRAP(r11); \
- lis r10,msr@h; \
- ori r10,r10,msr@l; \
- copyee(r10, r9); \
- bl tfer; \
- .long hdlr; \
- .long ret
-
-#define COPY_EE(d, s) rlwimi d,s,0,16,16
-#define NOCOPY(d, s)
-
-#define EXC_XFER_STD(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, NOCOPY, transfer_to_handler_full, \
- ret_from_except_full)
-
-#define EXC_XFER_LITE(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, NOCOPY, transfer_to_handler, \
- ret_from_except)
-
-#define EXC_XFER_EE(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, COPY_EE, transfer_to_handler_full, \
- ret_from_except_full)
-
-#define EXC_XFER_EE_LITE(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, COPY_EE, transfer_to_handler, \
- ret_from_except)
-
-
-/*
- * 0x0100 - Critical Interrupt Exception
- */
- CRITICAL_EXCEPTION(0x0100, CriticalInterrupt, unknown_exception)
-
-/*
- * 0x0200 - Machine Check Exception
- */
- CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception)
-
-/*
- * 0x0300 - Data Storage Exception
- * This happens for just a few reasons. U0 set (but we don't do that),
- * or zone protection fault (user violation, write to protected page).
- * If this is just an update of modified status, we do that quickly
- * and exit. Otherwise, we call heavywight functions to do the work.
- */
- START_EXCEPTION(0x0300, DataStorage)
- mtspr SPRN_SPRG_SCRATCH0, r10 /* Save some working registers */
- mtspr SPRN_SPRG_SCRATCH1, r11
-#ifdef CONFIG_403GCX
- stw r12, 0(r0)
- stw r9, 4(r0)
- mfcr r11
- mfspr r12, SPRN_PID
- stw r11, 8(r0)
- stw r12, 12(r0)
-#else
- mtspr SPRN_SPRG_SCRATCH3, r12
- mtspr SPRN_SPRG_SCRATCH4, r9
- mfcr r11
- mfspr r12, SPRN_PID
- mtspr SPRN_SPRG_SCRATCH6, r11
- mtspr SPRN_SPRG_SCRATCH5, r12
-#endif
-
- /* First, check if it was a zone fault (which means a user
- * tried to access a kernel or read-protected page - always
- * a SEGV). All other faults here must be stores, so no
- * need to check ESR_DST as well. */
- mfspr r10, SPRN_ESR
- andis. r10, r10, ESR_DIZ@h
- bne 2f
-
- mfspr r10, SPRN_DEAR /* Get faulting address */
-
- /* If we are faulting a kernel address, we have to use the
- * kernel page tables.
- */
- lis r11, PAGE_OFFSET@h
- cmplw r10, r11
- blt+ 3f
- lis r11, swapper_pg_dir@h
- ori r11, r11, swapper_pg_dir@l
- li r9, 0
- mtspr SPRN_PID, r9 /* TLB will have 0 TID */
- b 4f
-
- /* Get the PGD for the current thread.
- */
-3:
- mfspr r11,SPRN_SPRG_THREAD
- lwz r11,PGDIR(r11)
-4:
- tophys(r11, r11)
- rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */
- lwz r11, 0(r11) /* Get L1 entry */
- rlwinm. r12, r11, 0, 0, 19 /* Extract L2 (pte) base address */
- beq 2f /* Bail if no table */
-
- rlwimi r12, r10, 22, 20, 29 /* Compute PTE address */
- lwz r11, 0(r12) /* Get Linux PTE */
-
- andi. r9, r11, _PAGE_RW /* Is it writeable? */
- beq 2f /* Bail if not */
-
- /* Update 'changed'.
- */
- ori r11, r11, _PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_HWWRITE
- stw r11, 0(r12) /* Update Linux page table */
-
- /* Most of the Linux PTE is ready to load into the TLB LO.
- * We set ZSEL, where only the LS-bit determines user access.
- * We set execute, because we don't have the granularity to
- * properly set this at the page level (Linux problem).
- * If shared is set, we cause a zero PID->TID load.
- * Many of these bits are software only. Bits we don't set
- * here we (properly should) assume have the appropriate value.
- */
- li r12, 0x0ce2
- andc r11, r11, r12 /* Make sure 20, 21 are zero */
-
- /* find the TLB index that caused the fault. It has to be here.
- */
- tlbsx r9, 0, r10
-
- tlbwe r11, r9, TLB_DATA /* Load TLB LO */
-
- /* Done...restore registers and get out of here.
- */
-#ifdef CONFIG_403GCX
- lwz r12, 12(r0)
- lwz r11, 8(r0)
- mtspr SPRN_PID, r12
- mtcr r11
- lwz r9, 4(r0)
- lwz r12, 0(r0)
-#else
- mfspr r12, SPRN_SPRG_SCRATCH5
- mfspr r11, SPRN_SPRG_SCRATCH6
- mtspr SPRN_PID, r12
- mtcr r11
- mfspr r9, SPRN_SPRG_SCRATCH4
- mfspr r12, SPRN_SPRG_SCRATCH3
-#endif
- mfspr r11, SPRN_SPRG_SCRATCH1
- mfspr r10, SPRN_SPRG_SCRATCH0
- PPC405_ERR77_SYNC
- rfi /* Should sync shadow TLBs */
- b . /* prevent prefetch past rfi */
-
-2:
- /* The bailout. Restore registers to pre-exception conditions
- * and call the heavyweights to help us out.
- */
-#ifdef CONFIG_403GCX
- lwz r12, 12(r0)
- lwz r11, 8(r0)
- mtspr SPRN_PID, r12
- mtcr r11
- lwz r9, 4(r0)
- lwz r12, 0(r0)
-#else
- mfspr r12, SPRN_SPRG_SCRATCH5
- mfspr r11, SPRN_SPRG_SCRATCH6
- mtspr SPRN_PID, r12
- mtcr r11
- mfspr r9, SPRN_SPRG_SCRATCH4
- mfspr r12, SPRN_SPRG_SCRATCH3
-#endif
- mfspr r11, SPRN_SPRG_SCRATCH1
- mfspr r10, SPRN_SPRG_SCRATCH0
- b DataAccess
-
-/*
- * 0x0400 - Instruction Storage Exception
- * This is caused by a fetch from non-execute or guarded pages.
- */
- START_EXCEPTION(0x0400, InstructionAccess)
- NORMAL_EXCEPTION_PROLOG
- mr r4,r12 /* Pass SRR0 as arg2 */
- li r5,0 /* Pass zero as arg3 */
- EXC_XFER_LITE(0x400, handle_page_fault)
-
-/* 0x0500 - External Interrupt Exception */
- EXCEPTION(0x0500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE)
-
-/* 0x0600 - Alignment Exception */
- START_EXCEPTION(0x0600, Alignment)
- NORMAL_EXCEPTION_PROLOG
- mfspr r4,SPRN_DEAR /* Grab the DEAR and save it */
- stw r4,_DEAR(r11)
- addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_EE(0x600, alignment_exception)
-
-/* 0x0700 - Program Exception */
- START_EXCEPTION(0x0700, ProgramCheck)
- NORMAL_EXCEPTION_PROLOG
- mfspr r4,SPRN_ESR /* Grab the ESR and save it */
- stw r4,_ESR(r11)
- addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_STD(0x700, program_check_exception)
-
- EXCEPTION(0x0800, Trap_08, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x0900, Trap_09, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x0A00, Trap_0A, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x0B00, Trap_0B, unknown_exception, EXC_XFER_EE)
-
-/* 0x0C00 - System Call Exception */
- START_EXCEPTION(0x0C00, SystemCall)
- NORMAL_EXCEPTION_PROLOG
- EXC_XFER_EE_LITE(0xc00, DoSyscall)
-
- EXCEPTION(0x0D00, Trap_0D, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x0E00, Trap_0E, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x0F00, Trap_0F, unknown_exception, EXC_XFER_EE)
-
-/* 0x1000 - Programmable Interval Timer (PIT) Exception */
- . = 0x1000
- b Decrementer
-
-/* 0x1010 - Fixed Interval Timer (FIT) Exception
-*/
- . = 0x1010
- b FITException
-
-/* 0x1020 - Watchdog Timer (WDT) Exception
-*/
- . = 0x1020
- b WDTException
-
-/* 0x1100 - Data TLB Miss Exception
- * As the name implies, translation is not in the MMU, so search the
- * page tables and fix it. The only purpose of this function is to
- * load TLB entries from the page table if they exist.
- */
- START_EXCEPTION(0x1100, DTLBMiss)
- mtspr SPRN_SPRG_SCRATCH0, r10 /* Save some working registers */
- mtspr SPRN_SPRG_SCRATCH1, r11
-#ifdef CONFIG_403GCX
- stw r12, 0(r0)
- stw r9, 4(r0)
- mfcr r11
- mfspr r12, SPRN_PID
- stw r11, 8(r0)
- stw r12, 12(r0)
-#else
- mtspr SPRN_SPRG_SCRATCH3, r12
- mtspr SPRN_SPRG_SCRATCH4, r9
- mfcr r11
- mfspr r12, SPRN_PID
- mtspr SPRN_SPRG_SCRATCH6, r11
- mtspr SPRN_SPRG_SCRATCH5, r12
-#endif
- mfspr r10, SPRN_DEAR /* Get faulting address */
-
- /* If we are faulting a kernel address, we have to use the
- * kernel page tables.
- */
- lis r11, PAGE_OFFSET@h
- cmplw r10, r11
- blt+ 3f
- lis r11, swapper_pg_dir@h
- ori r11, r11, swapper_pg_dir@l
- li r9, 0
- mtspr SPRN_PID, r9 /* TLB will have 0 TID */
- b 4f
-
- /* Get the PGD for the current thread.
- */
-3:
- mfspr r11,SPRN_SPRG_THREAD
- lwz r11,PGDIR(r11)
-4:
- tophys(r11, r11)
- rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */
- lwz r12, 0(r11) /* Get L1 entry */
- andi. r9, r12, _PMD_PRESENT /* Check if it points to a PTE page */
- beq 2f /* Bail if no table */
-
- rlwimi r12, r10, 22, 20, 29 /* Compute PTE address */
- lwz r11, 0(r12) /* Get Linux PTE */
- andi. r9, r11, _PAGE_PRESENT
- beq 5f
-
- ori r11, r11, _PAGE_ACCESSED
- stw r11, 0(r12)
-
- /* Create TLB tag. This is the faulting address plus a static
- * set of bits. These are size, valid, E, U0.
- */
- li r12, 0x00c0
- rlwimi r10, r12, 0, 20, 31
-
- b finish_tlb_load
-
-2: /* Check for possible large-page pmd entry */
- rlwinm. r9, r12, 2, 22, 24
- beq 5f
-
- /* Create TLB tag. This is the faulting address, plus a static
- * set of bits (valid, E, U0) plus the size from the PMD.
- */
- ori r9, r9, 0x40
- rlwimi r10, r9, 0, 20, 31
- mr r11, r12
-
- b finish_tlb_load
-
-5:
- /* The bailout. Restore registers to pre-exception conditions
- * and call the heavyweights to help us out.
- */
-#ifdef CONFIG_403GCX
- lwz r12, 12(r0)
- lwz r11, 8(r0)
- mtspr SPRN_PID, r12
- mtcr r11
- lwz r9, 4(r0)
- lwz r12, 0(r0)
-#else
- mfspr r12, SPRN_SPRG_SCRATCH5
- mfspr r11, SPRN_SPRG_SCRATCH6
- mtspr SPRN_PID, r12
- mtcr r11
- mfspr r9, SPRN_SPRG_SCRATCH4
- mfspr r12, SPRN_SPRG_SCRATCH3
-#endif
- mfspr r11, SPRN_SPRG_SCRATCH1
- mfspr r10, SPRN_SPRG_SCRATCH0
- b DataAccess
-
-/* 0x1200 - Instruction TLB Miss Exception
- * Nearly the same as above, except we get our information from different
- * registers and bailout to a different point.
- */
- START_EXCEPTION(0x1200, ITLBMiss)
- mtspr SPRN_SPRG_SCRATCH0, r10 /* Save some working registers */
- mtspr SPRN_SPRG_SCRATCH1, r11
-#ifdef CONFIG_403GCX
- stw r12, 0(r0)
- stw r9, 4(r0)
- mfcr r11
- mfspr r12, SPRN_PID
- stw r11, 8(r0)
- stw r12, 12(r0)
-#else
- mtspr SPRN_SPRG_SCRATCH3, r12
- mtspr SPRN_SPRG_SCRATCH4, r9
- mfcr r11
- mfspr r12, SPRN_PID
- mtspr SPRN_SPRG_SCRATCH6, r11
- mtspr SPRN_SPRG_SCRATCH5, r12
-#endif
- mfspr r10, SPRN_SRR0 /* Get faulting address */
-
- /* If we are faulting a kernel address, we have to use the
- * kernel page tables.
- */
- lis r11, PAGE_OFFSET@h
- cmplw r10, r11
- blt+ 3f
- lis r11, swapper_pg_dir@h
- ori r11, r11, swapper_pg_dir@l
- li r9, 0
- mtspr SPRN_PID, r9 /* TLB will have 0 TID */
- b 4f
-
- /* Get the PGD for the current thread.
- */
-3:
- mfspr r11,SPRN_SPRG_THREAD
- lwz r11,PGDIR(r11)
-4:
- tophys(r11, r11)
- rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */
- lwz r12, 0(r11) /* Get L1 entry */
- andi. r9, r12, _PMD_PRESENT /* Check if it points to a PTE page */
- beq 2f /* Bail if no table */
-
- rlwimi r12, r10, 22, 20, 29 /* Compute PTE address */
- lwz r11, 0(r12) /* Get Linux PTE */
- andi. r9, r11, _PAGE_PRESENT
- beq 5f
-
- ori r11, r11, _PAGE_ACCESSED
- stw r11, 0(r12)
-
- /* Create TLB tag. This is the faulting address plus a static
- * set of bits. These are size, valid, E, U0.
- */
- li r12, 0x00c0
- rlwimi r10, r12, 0, 20, 31
-
- b finish_tlb_load
-
-2: /* Check for possible large-page pmd entry */
- rlwinm. r9, r12, 2, 22, 24
- beq 5f
-
- /* Create TLB tag. This is the faulting address, plus a static
- * set of bits (valid, E, U0) plus the size from the PMD.
- */
- ori r9, r9, 0x40
- rlwimi r10, r9, 0, 20, 31
- mr r11, r12
-
- b finish_tlb_load
-
-5:
- /* The bailout. Restore registers to pre-exception conditions
- * and call the heavyweights to help us out.
- */
-#ifdef CONFIG_403GCX
- lwz r12, 12(r0)
- lwz r11, 8(r0)
- mtspr SPRN_PID, r12
- mtcr r11
- lwz r9, 4(r0)
- lwz r12, 0(r0)
-#else
- mfspr r12, SPRN_SPRG_SCRATCH5
- mfspr r11, SPRN_SPRG_SCRATCH6
- mtspr SPRN_PID, r12
- mtcr r11
- mfspr r9, SPRN_SPRG_SCRATCH4
- mfspr r12, SPRN_SPRG_SCRATCH3
-#endif
- mfspr r11, SPRN_SPRG_SCRATCH1
- mfspr r10, SPRN_SPRG_SCRATCH0
- b InstructionAccess
-
- EXCEPTION(0x1300, Trap_13, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1400, Trap_14, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_EE)
-#ifdef CONFIG_IBM405_ERR51
- /* 405GP errata 51 */
- START_EXCEPTION(0x1700, Trap_17)
- b DTLBMiss
-#else
- EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_EE)
-#endif
- EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1A00, Trap_1A, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1B00, Trap_1B, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1C00, Trap_1C, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1D00, Trap_1D, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1E00, Trap_1E, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1F00, Trap_1F, unknown_exception, EXC_XFER_EE)
-
-/* Check for a single step debug exception while in an exception
- * handler before state has been saved. This is to catch the case
- * where an instruction that we are trying to single step causes
- * an exception (eg ITLB/DTLB miss) and thus the first instruction of
- * the exception handler generates a single step debug exception.
- *
- * If we get a debug trap on the first instruction of an exception handler,
- * we reset the MSR_DE in the _exception handler's_ MSR (the debug trap is
- * a critical exception, so we are using SPRN_CSRR1 to manipulate the MSR).
- * The exception handler was handling a non-critical interrupt, so it will
- * save (and later restore) the MSR via SPRN_SRR1, which will still have
- * the MSR_DE bit set.
- */
- /* 0x2000 - Debug Exception */
- START_EXCEPTION(0x2000, DebugTrap)
- CRITICAL_EXCEPTION_PROLOG
-
- /*
- * If this is a single step or branch-taken exception in an
- * exception entry sequence, it was probably meant to apply to
- * the code where the exception occurred (since exception entry
- * doesn't turn off DE automatically). We simulate the effect
- * of turning off DE on entry to an exception handler by turning
- * off DE in the SRR3 value and clearing the debug status.
- */
- mfspr r10,SPRN_DBSR /* check single-step/branch taken */
- andis. r10,r10,DBSR_IC@h
- beq+ 2f
-
- andi. r10,r9,MSR_IR|MSR_PR /* check supervisor + MMU off */
- beq 1f /* branch and fix it up */
-
- mfspr r10,SPRN_SRR2 /* Faulting instruction address */
- cmplwi r10,0x2100
- bgt+ 2f /* address above exception vectors */
-
- /* here it looks like we got an inappropriate debug exception. */
-1: rlwinm r9,r9,0,~MSR_DE /* clear DE in the SRR3 value */
- lis r10,DBSR_IC@h /* clear the IC event */
- mtspr SPRN_DBSR,r10
- /* restore state and get out */
- lwz r10,_CCR(r11)
- lwz r0,GPR0(r11)
- lwz r1,GPR1(r11)
- mtcrf 0x80,r10
- mtspr SPRN_SRR2,r12
- mtspr SPRN_SRR3,r9
- lwz r9,GPR9(r11)
- lwz r12,GPR12(r11)
- lwz r10,crit_r10@l(0)
- lwz r11,crit_r11@l(0)
- PPC405_ERR77_SYNC
- rfci
- b .
-
- /* continue normal handling for a critical exception... */
-2: mfspr r4,SPRN_DBSR
- addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_TEMPLATE(DebugException, 0x2002, \
- (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
- NOCOPY, crit_transfer_to_handler, ret_from_crit_exc)
-
- /* Programmable Interval Timer (PIT) Exception. (from 0x1000) */
-Decrementer:
- NORMAL_EXCEPTION_PROLOG
- lis r0,TSR_PIS@h
- mtspr SPRN_TSR,r0 /* Clear the PIT exception */
- addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_LITE(0x1000, timer_interrupt)
-
- /* Fixed Interval Timer (FIT) Exception. (from 0x1010) */
-FITException:
- NORMAL_EXCEPTION_PROLOG
- addi r3,r1,STACK_FRAME_OVERHEAD;
- EXC_XFER_EE(0x1010, unknown_exception)
-
- /* Watchdog Timer (WDT) Exception. (from 0x1020) */
-WDTException:
- CRITICAL_EXCEPTION_PROLOG;
- addi r3,r1,STACK_FRAME_OVERHEAD;
- EXC_XFER_TEMPLATE(WatchdogException, 0x1020+2,
- (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)),
- NOCOPY, crit_transfer_to_handler,
- ret_from_crit_exc)
-
-/*
- * The other Data TLB exceptions bail out to this point
- * if they can't resolve the lightweight TLB fault.
- */
-DataAccess:
- NORMAL_EXCEPTION_PROLOG
- mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */
- stw r5,_ESR(r11)
- mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */
- EXC_XFER_LITE(0x300, handle_page_fault)
-
-/* Other PowerPC processors, namely those derived from the 6xx-series
- * have vectors from 0x2100 through 0x2F00 defined, but marked as reserved.
- * However, for the 4xx-series processors these are neither defined nor
- * reserved.
- */
-
- /* Damn, I came up one instruction too many to fit into the
- * exception space :-). Both the instruction and data TLB
- * miss get to this point to load the TLB.
- * r10 - TLB_TAG value
- * r11 - Linux PTE
- * r12, r9 - available to use
- * PID - loaded with proper value when we get here
- * Upon exit, we reload everything and RFI.
- * Actually, it will fit now, but oh well.....a common place
- * to load the TLB.
- */
-tlb_4xx_index:
- .long 0
-finish_tlb_load:
- /* load the next available TLB index.
- */
- lwz r9, tlb_4xx_index@l(0)
- addi r9, r9, 1
- andi. r9, r9, (PPC40X_TLB_SIZE-1)
- stw r9, tlb_4xx_index@l(0)
-
-6:
- /*
- * Clear out the software-only bits in the PTE to generate the
- * TLB_DATA value. These are the bottom 2 bits of the RPM, the
- * top 3 bits of the zone field, and M.
- */
- li r12, 0x0ce2
- andc r11, r11, r12
-
- tlbwe r11, r9, TLB_DATA /* Load TLB LO */
- tlbwe r10, r9, TLB_TAG /* Load TLB HI */
-
- /* Done...restore registers and get out of here.
- */
-#ifdef CONFIG_403GCX
- lwz r12, 12(r0)
- lwz r11, 8(r0)
- mtspr SPRN_PID, r12
- mtcr r11
- lwz r9, 4(r0)
- lwz r12, 0(r0)
-#else
- mfspr r12, SPRN_SPRG_SCRATCH5
- mfspr r11, SPRN_SPRG_SCRATCH6
- mtspr SPRN_PID, r12
- mtcr r11
- mfspr r9, SPRN_SPRG_SCRATCH4
- mfspr r12, SPRN_SPRG_SCRATCH3
-#endif
- mfspr r11, SPRN_SPRG_SCRATCH1
- mfspr r10, SPRN_SPRG_SCRATCH0
- PPC405_ERR77_SYNC
- rfi /* Should sync shadow TLBs */
- b . /* prevent prefetch past rfi */
-
-/* This is where the main kernel code starts.
- */
-start_here:
-
- /* ptr to current */
- lis r2,init_task@h
- ori r2,r2,init_task@l
-
- /* ptr to phys current thread */
- tophys(r4,r2)
- addi r4,r4,THREAD /* init task's THREAD */
- mtspr SPRN_SPRG_THREAD,r4
-
- /* stack */
- lis r1,init_thread_union@ha
- addi r1,r1,init_thread_union@l
- li r0,0
- stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
-
- bl early_init /* We have to do this with MMU on */
-
-/*
- * Decide what sort of machine this is and initialize the MMU.
- */
- li r3,0
- mr r4,r31
- bl machine_init
- bl MMU_init
-
-/* Go back to running unmapped so we can load up new values
- * and change to using our exception vectors.
- * On the 4xx, all we have to do is invalidate the TLB to clear
- * the old 16M byte TLB mappings.
- */
- lis r4,2f@h
- ori r4,r4,2f@l
- tophys(r4,r4)
- lis r3,(MSR_KERNEL & ~(MSR_IR|MSR_DR))@h
- ori r3,r3,(MSR_KERNEL & ~(MSR_IR|MSR_DR))@l
- mtspr SPRN_SRR0,r4
- mtspr SPRN_SRR1,r3
- rfi
- b . /* prevent prefetch past rfi */
-
-/* Load up the kernel context */
-2:
- sync /* Flush to memory before changing TLB */
- tlbia
- isync /* Flush shadow TLBs */
-
- /* set up the PTE pointers for the Abatron bdiGDB.
- */
- lis r6, swapper_pg_dir@h
- ori r6, r6, swapper_pg_dir@l
- lis r5, abatron_pteptrs@h
- ori r5, r5, abatron_pteptrs@l
- stw r5, 0xf0(r0) /* Must match your Abatron config file */
- tophys(r5,r5)
- stw r6, 0(r5)
-
-/* Now turn on the MMU for real! */
- lis r4,MSR_KERNEL@h
- ori r4,r4,MSR_KERNEL@l
- lis r3,start_kernel@h
- ori r3,r3,start_kernel@l
- mtspr SPRN_SRR0,r3
- mtspr SPRN_SRR1,r4
- rfi /* enable MMU and jump to start_kernel */
- b . /* prevent prefetch past rfi */
-
-/* Set up the initial MMU state so we can do the first level of
- * kernel initialization. This maps the first 16 MBytes of memory 1:1
- * virtual to physical and more importantly sets the cache mode.
- */
-initial_mmu:
- tlbia /* Invalidate all TLB entries */
- isync
-
- /* We should still be executing code at physical address 0x0000xxxx
- * at this point. However, start_here is at virtual address
- * 0xC000xxxx. So, set up a TLB mapping to cover this once
- * translation is enabled.
- */
-
- lis r3,KERNELBASE@h /* Load the kernel virtual address */
- ori r3,r3,KERNELBASE@l
- tophys(r4,r3) /* Load the kernel physical address */
-
- iccci r0,r3 /* Invalidate the i-cache before use */
-
- /* Load the kernel PID.
- */
- li r0,0
- mtspr SPRN_PID,r0
- sync
-
- /* Configure and load one entry into TLB slots 63 */
- clrrwi r4,r4,10 /* Mask off the real page number */
- ori r4,r4,(TLB_WR | TLB_EX) /* Set the write and execute bits */
-
- clrrwi r3,r3,10 /* Mask off the effective page number */
- ori r3,r3,(TLB_VALID | TLB_PAGESZ(PAGESZ_16M))
-
- li r0,63 /* TLB slot 63 */
-
- tlbwe r4,r0,TLB_DATA /* Load the data portion of the entry */
- tlbwe r3,r0,TLB_TAG /* Load the tag portion of the entry */
-
- isync
-
- /* Establish the exception vector base
- */
- lis r4,KERNELBASE@h /* EVPR only uses the high 16-bits */
- tophys(r0,r4) /* Use the physical address */
- mtspr SPRN_EVPR,r0
-
- blr
-
-_GLOBAL(abort)
- mfspr r13,SPRN_DBCR0
- oris r13,r13,DBCR0_RST_SYSTEM@h
- mtspr SPRN_DBCR0,r13
-
-_GLOBAL(set_context)
-
-#ifdef CONFIG_BDI_SWITCH
- /* Context switch the PTE pointer for the Abatron BDI2000.
- * The PGDIR is the second parameter.
- */
- lis r5, KERNELBASE@h
- lwz r5, 0xf0(r5)
- stw r4, 0x4(r5)
-#endif
- sync
- mtspr SPRN_PID,r3
- isync /* Need an isync to flush shadow */
- /* TLBs after changing PID */
- blr
-
-/* We put a few things here that have to be page-aligned. This stuff
- * goes at the beginning of the data segment, which is page-aligned.
- */
- .data
- .align 12
- .globl sdata
-sdata:
- .globl empty_zero_page
-empty_zero_page:
- .space 4096
- .globl swapper_pg_dir
-swapper_pg_dir:
- .space PGD_TABLE_SIZE
-
-/* Room for two PTE pointers, usually the kernel and current user pointers
- * to their respective root page table.
- */
-abatron_pteptrs:
- .space 8
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index b5061abbd2e0..25642e802ed3 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Kernel execution entry point code.
*
@@ -21,24 +22,20 @@
* debbie_chu@mvista.com
* Copyright 2002-2005 MontaVista Software, Inc.
* PowerPC 44x support, Matt Porter <mporter@kernel.crashing.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#include <linux/init.h>
+#include <linux/pgtable.h>
#include <asm/processor.h>
#include <asm/page.h>
#include <asm/mmu.h>
-#include <asm/pgtable.h>
#include <asm/cputable.h>
#include <asm/thread_info.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/ptrace.h>
#include <asm/synch.h>
+#include <asm/code-patching-asm.h>
#include "head_booke.h"
@@ -54,8 +51,8 @@
*
*/
__HEAD
-_ENTRY(_stext);
-_ENTRY(_start);
+_GLOBAL(_stext);
+_GLOBAL(_start);
/*
* Reserve a word at a fixed location to store the address
* of abatron_pteptrs
@@ -72,7 +69,7 @@ _ENTRY(_start);
* address.
* r21 will be loaded with the physical runtime address of _stext
*/
- bl 0f /* Get our runtime address */
+ bcl 20,31,$+4 /* Get our runtime address */
0: mflr r21 /* Make it accessible */
addis r21,r21,(_stext - 0b)@ha
addi r21,r21,(_stext - 0b)@l /* Get our current runtime base */
@@ -111,7 +108,7 @@ _ENTRY(_start);
lis r1,init_thread_union@h
ori r1,r1,init_thread_union@l
li r0,0
- stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
+ stwu r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1)
bl early_init
@@ -201,6 +198,9 @@ _ENTRY(_start);
/*
* Decide what sort of machine this is and initialize the MMU.
*/
+#ifdef CONFIG_KASAN
+ bl kasan_early_init
+#endif
li r3,0
mr r4,r31
bl machine_init
@@ -262,8 +262,7 @@ interrupt_base:
INSTRUCTION_STORAGE_EXCEPTION
/* External Input Interrupt */
- EXCEPTION(0x0500, BOOKE_INTERRUPT_EXTERNAL, ExternalInput, \
- do_IRQ, EXC_XFER_LITE)
+ EXCEPTION(0x0500, BOOKE_INTERRUPT_EXTERNAL, ExternalInput, do_IRQ)
/* Alignment Interrupt */
ALIGNMENT_EXCEPTION
@@ -276,24 +275,22 @@ interrupt_base:
FP_UNAVAILABLE_EXCEPTION
#else
EXCEPTION(0x2010, BOOKE_INTERRUPT_FP_UNAVAIL, \
- FloatingPointUnavailable, unknown_exception, EXC_XFER_EE)
+ FloatingPointUnavailable, unknown_exception)
#endif
/* System Call Interrupt */
START_EXCEPTION(SystemCall)
- NORMAL_EXCEPTION_PROLOG(BOOKE_INTERRUPT_SYSCALL)
- EXC_XFER_EE_LITE(0x0c00, DoSyscall)
+ SYSCALL_ENTRY 0xc00 BOOKE_INTERRUPT_SYSCALL
/* Auxiliary Processor Unavailable Interrupt */
EXCEPTION(0x2020, BOOKE_INTERRUPT_AP_UNAVAIL, \
- AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE)
+ AuxillaryProcessorUnavailable, unknown_exception)
/* Decrementer Interrupt */
DECREMENTER_EXCEPTION
/* Fixed Internal Timer Interrupt */
/* TODO: Add FIT support */
- EXCEPTION(0x1010, BOOKE_INTERRUPT_FIT, FixedIntervalTimer, \
- unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1010, BOOKE_INTERRUPT_FIT, FixedIntervalTimer, unknown_exception)
/* Watchdog Timer Interrupt */
/* TODO: Add watchdog support */
@@ -317,8 +314,8 @@ interrupt_base:
* kernel page tables.
*/
lis r11, PAGE_OFFSET@h
- cmplw r10, r11
- blt+ 3f
+ cmplw cr7, r10, r11
+ blt+ cr7, 3f
lis r11, swapper_pg_dir@h
ori r11, r11, swapper_pg_dir@l
@@ -336,12 +333,16 @@ interrupt_base:
mfspr r12,SPRN_MMUCR
mfspr r13,SPRN_PID /* Get PID */
rlwimi r12,r13,0,24,31 /* Set TID */
+#ifdef CONFIG_PPC_KUAP
+ cmpwi r13,0
+ beq 2f /* KUAP Fault */
+#endif
4:
mtspr SPRN_MMUCR,r12
/* Mask of required permission bits. Note that while we
- * do copy ESR:ST to _PAGE_RW position as trying to write
+ * do copy ESR:ST to _PAGE_WRITE position as trying to write
* to an RO page is pretty common, we don't do it with
* _PAGE_DIRTY. We could do it, but it's a fairly rare
* event so I'd rather take the overhead when it happens
@@ -354,7 +355,7 @@ interrupt_base:
* place or can we save a couple of instructions here ?
*/
mfspr r12,SPRN_ESR
- li r13,_PAGE_PRESENT|_PAGE_ACCESSED
+ li r13,_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_READ
rlwimi r13,r12,10,30,30
/* Load the PTE */
@@ -376,15 +377,14 @@ interrupt_base:
/* Load the next available TLB index */
lwz r13,tlb_44x_index@l(r10)
- bne 2f /* Bail if permission mismach */
+ bne 2f /* Bail if permission mismatch */
/* Increment, rollover, and store TLB index */
addi r13,r13,1
+ patch_site 0f, patch__tlb_44x_hwater_D
/* Compare with watermark (instruction gets patched) */
- .globl tlb_44x_patch_hwater_D
-tlb_44x_patch_hwater_D:
- cmpwi 0,r13,1 /* reserve entries */
+0: cmpwi 0,r13,1 /* reserve entries */
ble 5f
li r13,0
5:
@@ -428,8 +428,8 @@ tlb_44x_patch_hwater_D:
* kernel page tables.
*/
lis r11, PAGE_OFFSET@h
- cmplw r10, r11
- blt+ 3f
+ cmplw cr7, r10, r11
+ blt+ cr7, 3f
lis r11, swapper_pg_dir@h
ori r11, r11, swapper_pg_dir@l
@@ -447,6 +447,10 @@ tlb_44x_patch_hwater_D:
mfspr r12,SPRN_MMUCR
mfspr r13,SPRN_PID /* Get PID */
rlwimi r12,r13,0,24,31 /* Set TID */
+#ifdef CONFIG_PPC_KUAP
+ cmpwi r13,0
+ beq 2f /* KUAP Fault */
+#endif
4:
mtspr SPRN_MMUCR,r12
@@ -472,15 +476,14 @@ tlb_44x_patch_hwater_D:
/* Load the next available TLB index */
lwz r13,tlb_44x_index@l(r10)
- bne 2f /* Bail if permission mismach */
+ bne 2f /* Bail if permission mismatch */
/* Increment, rollover, and store TLB index */
addi r13,r13,1
+ patch_site 0f, patch__tlb_44x_hwater_I
/* Compare with watermark (instruction gets patched) */
- .globl tlb_44x_patch_hwater_I
-tlb_44x_patch_hwater_I:
- cmpwi 0,r13,1 /* reserve entries */
+0: cmpwi 0,r13,1 /* reserve entries */
ble 5f
li r13,0
5:
@@ -512,6 +515,7 @@ tlb_44x_patch_hwater_I:
* r11 - PTE high word value
* r12 - PTE low word value
* r13 - TLB index
+ * cr7 - Result of comparison with PAGE_OFFSET
* MMUCR - loaded with proper value when we get here
* Upon exit, we reload everything and RFI.
*/
@@ -530,12 +534,12 @@ finish_tlb_load_44x:
tlbwe r10,r13,PPC44x_TLB_PAGEID /* Write PAGEID */
/* And WS 2 */
- li r10,0xf85 /* Mask to apply from PTE */
- rlwimi r10,r12,29,30,30 /* DIRTY -> SW position */
+ li r10,0xf84 /* Mask to apply from PTE */
+ rlwimi r10,r12,29,30,31 /* DIRTY,READ -> SW,SR position */
and r11,r12,r10 /* Mask PTE bits to keep */
- andi. r10,r12,_PAGE_USER /* User page ? */
- beq 1f /* nope, leave U bits empty */
+ bge cr7,1f /* User page ? no, leave U bits empty */
rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */
+ rlwinm r11,r11,0,~PPC44x_TLB_SX /* Clear SX if User page */
1: tlbwe r11,r13,PPC44x_TLB_ATTRIB /* Write ATTRIB */
/* Done...restore registers and get out of here.
@@ -564,8 +568,8 @@ finish_tlb_load_44x:
* kernel page tables.
*/
lis r11,PAGE_OFFSET@h
- cmplw cr0,r10,r11
- blt+ 3f
+ cmplw cr7,r10,r11
+ blt+ cr7,3f
lis r11,swapper_pg_dir@h
ori r11,r11, swapper_pg_dir@l
li r12,0 /* MMUCR = 0 */
@@ -575,10 +579,14 @@ finish_tlb_load_44x:
3: mfspr r11,SPRN_SPRG3
lwz r11,PGDIR(r11)
mfspr r12,SPRN_PID /* Get PID */
+#ifdef CONFIG_PPC_KUAP
+ cmpwi r12,0
+ beq 2f /* KUAP Fault */
+#endif
4: mtspr SPRN_MMUCR,r12 /* Set MMUCR */
/* Mask of required permission bits. Note that while we
- * do copy ESR:ST to _PAGE_RW position as trying to write
+ * do copy ESR:ST to _PAGE_WRITE position as trying to write
* to an RO page is pretty common, we don't do it with
* _PAGE_DIRTY. We could do it, but it's a fairly rare
* event so I'd rather take the overhead when it happens
@@ -591,7 +599,7 @@ finish_tlb_load_44x:
* place or can we save a couple of instructions here ?
*/
mfspr r12,SPRN_ESR
- li r13,_PAGE_PRESENT|_PAGE_ACCESSED
+ li r13,_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_READ
rlwimi r13,r12,10,30,30
/* Load the PTE */
@@ -661,8 +669,8 @@ finish_tlb_load_44x:
* kernel page tables.
*/
lis r11,PAGE_OFFSET@h
- cmplw cr0,r10,r11
- blt+ 3f
+ cmplw cr7,r10,r11
+ blt+ cr7,3f
lis r11,swapper_pg_dir@h
ori r11,r11, swapper_pg_dir@l
li r12,0 /* MMUCR = 0 */
@@ -672,6 +680,10 @@ finish_tlb_load_44x:
3: mfspr r11,SPRN_SPRG_THREAD
lwz r11,PGDIR(r11)
mfspr r12,SPRN_PID /* Get PID */
+#ifdef CONFIG_PPC_KUAP
+ cmpwi r12,0
+ beq 2f /* KUAP Fault */
+#endif
4: mtspr SPRN_MMUCR,r12 /* Set MMUCR */
/* Make up the required permissions */
@@ -732,6 +744,7 @@ finish_tlb_load_44x:
* r11 - PTE high word value
* r12 - PTE low word value
* r13 - free to use
+ * cr7 - Result of comparison with PAGE_OFFSET
* MMUCR - loaded with proper value when we get here
* Upon exit, we reload everything and RFI.
*/
@@ -741,12 +754,12 @@ finish_tlb_load_47x:
tlbwe r11,r13,1
/* And make up word 2 */
- li r10,0xf85 /* Mask to apply from PTE */
- rlwimi r10,r12,29,30,30 /* DIRTY -> SW position */
+ li r10,0xf84 /* Mask to apply from PTE */
+ rlwimi r10,r12,29,30,31 /* DIRTY,READ -> SW,SR position */
and r11,r12,r10 /* Mask PTE bits to keep */
- andi. r10,r12,_PAGE_USER /* User page ? */
- beq 1f /* nope, leave U bits empty */
+ bge cr7,1f /* User page ? no, leave U bits empty */
rlwimi r11,r11,3,26,28 /* yes, copy S bits to U */
+ rlwinm r11,r11,0,~PPC47x_TLB2_SX /* Clear SX if User page */
1: tlbwe r11,r13,2
/* Done...restore registers and get out of here.
@@ -784,20 +797,6 @@ _GLOBAL(__fixup_440A_mcheck)
sync
blr
-_GLOBAL(set_context)
-
-#ifdef CONFIG_BDI_SWITCH
- /* Context switch the PTE pointer for the Abatron BDI2000.
- * The PGDIR is the second parameter.
- */
- lis r5, abatron_pteptrs@h
- ori r5, r5, abatron_pteptrs@l
- stw r4, 0x4(r5)
-#endif
- mtspr SPRN_PID,r3
- isync /* Force context change */
- blr
-
/*
* Init CPU state. This is called at boot time or for secondary CPUs
* to setup initial TLB entries, setup IVORs, etc...
@@ -806,7 +805,7 @@ _GLOBAL(set_context)
_GLOBAL(init_cpu_state)
mflr r22
#ifdef CONFIG_PPC_47x
- /* We use the PVR to differenciate 44x cores from 476 */
+ /* We use the PVR to differentiate 44x cores from 476 */
mfspr r3,SPRN_PVR
srwi r3,r3,16
cmplwi cr0,r3,PVR_476FPE@h
@@ -863,7 +862,7 @@ _GLOBAL(init_cpu_state)
wmmucr: mtspr SPRN_MMUCR,r3 /* Put MMUCR */
sync
- bl invstr /* Find our address */
+ bcl 20,31,$+4 /* Find our address */
invstr: mflr r5 /* Make it accessible */
tlbsx r23,0,r5 /* Find entry we are in */
li r4,0 /* Start at TLB entry 0 */
@@ -1012,20 +1011,20 @@ _GLOBAL(start_secondary_47x)
*/
lis r1,temp_boot_stack@h
ori r1,r1,temp_boot_stack@l
- addi r1,r1,1024-STACK_FRAME_OVERHEAD
+ addi r1,r1,1024-STACK_FRAME_MIN_SIZE
li r0,0
stw r0,0(r1)
bl mmu_init_secondary
/* Now we can get our task struct and real stack pointer */
- /* Get current_thread_info and current */
- lis r1,secondary_ti@ha
- lwz r1,secondary_ti@l(r1)
- lwz r2,TI_TASK(r1)
+ /* Get current's stack and current */
+ lis r2,secondary_current@ha
+ lwz r2,secondary_current@l(r2)
+ lwz r1,TASK_STACK(r2)
/* Current stack pointer */
- addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
+ addi r1,r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE
li r0,0
stw r0,0(r1)
@@ -1055,7 +1054,7 @@ head_start_47x:
sync
/* Find the entry we are running from */
- bl 1f
+ bcl 20,31,$+4
1: mflr r23
tlbsx r23,0,r23
tlbre r24,r23,0
@@ -1243,33 +1242,8 @@ head_start_common:
isync
blr
-/*
- * We put a few things here that have to be page-aligned. This stuff
- * goes at the beginning of the data segment, which is page-aligned.
- */
- .data
- .align PAGE_SHIFT
- .globl sdata
-sdata:
- .globl empty_zero_page
-empty_zero_page:
- .space PAGE_SIZE
-
-/*
- * To support >32-bit physical addresses, we use an 8KB pgdir.
- */
- .globl swapper_pg_dir
-swapper_pg_dir:
- .space PGD_TABLE_SIZE
-
-/*
- * Room for two PTE pointers, usually the kernel and current user pointers
- * to their respective root page table.
- */
-abatron_pteptrs:
- .space 8
-
#ifdef CONFIG_SMP
+ .data
.align 12
temp_boot_stack:
.space 1024
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index d48125d0c048..63432a33ec49 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* PowerPC version
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
@@ -15,19 +16,16 @@
* This file contains the entry point for the 64-bit kernel along
* with some early initialization code common to all 64-bit powerpc
* variants.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
+#include <linux/linkage.h>
#include <linux/threads.h>
#include <linux/init.h>
#include <asm/reg.h>
#include <asm/page.h>
#include <asm/mmu.h>
#include <asm/ppc_asm.h>
+#include <asm/head-64.h>
#include <asm/asm-offsets.h>
#include <asm/bug.h>
#include <asm/cputable.h>
@@ -40,6 +38,14 @@
#include <asm/kvm_book3s_asm.h>
#include <asm/ptrace.h>
#include <asm/hw_irq.h>
+#include <asm/cputhreads.h>
+#include <asm/ppc-opcode.h>
+#include <asm/feature-fixups.h>
+#ifdef CONFIG_PPC_BOOK3S
+#include <asm/exception-64s.h>
+#else
+#include <asm/exception-64e.h>
+#endif
/* The physical memory is laid out such that the secondary processor
* spin code sits at 0x0000...0x00ff. On server, the vectors follow
@@ -51,21 +57,39 @@
*
* For pSeries or server processors:
* 1. The MMU is off & open firmware is running in real mode.
- * 2. The kernel is entered at __start
+ * 2. The primary CPU enters at __start.
+ * 3. If the RTAS supports "query-cpu-stopped-state", then secondary
+ * CPUs will enter as directed by "start-cpu" RTAS call, which is
+ * generic_secondary_smp_init, with PIR in r3.
+ * 4. Else the secondary CPUs will enter at secondary_hold (0x60) as
+ * directed by the "start-cpu" RTS call, with PIR in r3.
* -or- For OPAL entry:
- * 1. The MMU is off, processor in HV mode, primary CPU enters at 0
- * with device-tree in gpr3. We also get OPAL base in r8 and
- * entry in r9 for debugging purposes
- * 2. Secondary processors enter at 0x60 with PIR in gpr3
+ * 1. The MMU is off, processor in HV mode.
+ * 2. The primary CPU enters at 0 with device-tree in r3, OPAL base
+ * in r8, and entry in r9 for debugging purposes.
+ * 3. Secondary CPUs enter as directed by OPAL_START_CPU call, which
+ * is at generic_secondary_smp_init, with PIR in r3.
*
* For Book3E processors:
* 1. The MMU is on running in AS0 in a state defined in ePAPR
* 2. The kernel is entered at __start
*/
- .text
- .globl _stext
-_stext:
+/*
+ * boot_from_prom and prom_init run at the physical address. Everything
+ * after prom and kexec entry run at the virtual address (PAGE_OFFSET).
+ * Secondaries run at the virtual address from generic_secondary_common_init
+ * onward.
+ */
+
+OPEN_FIXED_SECTION(first_256B, 0x0, 0x100)
+USE_FIXED_SECTION(first_256B)
+ /*
+ * Offsets are relative from the start of fixed section, and
+ * first_256B starts at 0. Offsets are a bit easier to use here
+ * than the fixed section entry macros.
+ */
+ . = 0x0
_GLOBAL(__start)
/* NOP this out unconditionally */
BEGIN_FTR_SECTION
@@ -83,26 +107,34 @@ END_FTR_SECTION(0, 1)
.balign 8
.globl __secondary_hold_spinloop
__secondary_hold_spinloop:
- .llong 0x0
+ .8byte 0x0
/* Secondary processors write this value with their cpu # */
/* after they enter the spin loop immediately below. */
.globl __secondary_hold_acknowledge
__secondary_hold_acknowledge:
- .llong 0x0
+ .8byte 0x0
#ifdef CONFIG_RELOCATABLE
/* This flag is set to 1 by a loader if the kernel should run
* at the loaded address instead of the linked address. This
- * is used by kexec-tools to keep the the kdump kernel in the
+ * is used by kexec-tools to keep the kdump kernel in the
* crash_kernel region. The loader is responsible for
* observing the alignment requirement.
*/
+
+#ifdef CONFIG_RELOCATABLE_TEST
+#define RUN_AT_LOAD_DEFAULT 1 /* Test relocation, do not copy to 0 */
+#else
+#define RUN_AT_LOAD_DEFAULT 0x72756e30 /* "run0" -- relocate to 0 by default */
+#endif
+
/* Do not move this variable as kexec-tools knows about it. */
. = 0x5c
.globl __run_at_load
__run_at_load:
- .long 0x72756e30 /* "run0" -- relocate to 0 by default */
+DEFINE_FIXED_SYMBOL(__run_at_load, first_256B)
+ .long RUN_AT_LOAD_DEFAULT
#endif
. = 0x60
@@ -118,7 +150,7 @@ __run_at_load:
.globl __secondary_hold
__secondary_hold:
FIXUP_ENDIAN
-#ifndef CONFIG_PPC_BOOK3E
+#ifndef CONFIG_PPC_BOOK3E_64
mfmsr r24
ori r24,r24,MSR_RI
mtmsrd r24 /* RI on */
@@ -131,20 +163,16 @@ __secondary_hold:
/* Tell the master cpu we're here */
/* Relocation is off & we are located at an address less */
/* than 0x100, so only need to grab low order offset. */
- std r24,__secondary_hold_acknowledge-_stext(0)
+ std r24,(ABS_ADDR(__secondary_hold_acknowledge, first_256B))(0)
sync
- li r26,0
-#ifdef CONFIG_PPC_BOOK3E
- tovirt(r26,r26)
-#endif
/* All secondary cpus wait here until told to start. */
-100: ld r12,__secondary_hold_spinloop-_stext(r26)
+100: ld r12,(ABS_ADDR(__secondary_hold_spinloop, first_256B))(0)
cmpdi 0,r12,0
beq 100b
-#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC)
-#ifdef CONFIG_PPC_BOOK3E
+#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE)
+#ifdef CONFIG_PPC_BOOK3E_64
tovirt(r12,r12)
#endif
mtctr r12
@@ -153,7 +181,7 @@ __secondary_hold:
* it may be the case that other platforms have r4 right to
* begin with, this gives us some safety in case it is not
*/
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
mr r4,r25
#else
li r4,0
@@ -162,14 +190,10 @@ __secondary_hold:
isync
bctr
#else
- BUG_OPCODE
+0: trap
+ EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0
#endif
-
-/* This value is used to mark exception frames on the stack. */
- .section ".toc","aw"
-exception_marker:
- .tc ID_72656773_68657265[TC],0x7265677368657265
- .text
+CLOSE_FIXED_SECTION(first_256B)
/*
* On server, we include the exception vectors code here as it
@@ -178,10 +202,76 @@ exception_marker:
*/
#ifdef CONFIG_PPC_BOOK3S
#include "exceptions-64s.S"
+#else
+OPEN_TEXT_SECTION(0x100)
#endif
-#ifdef CONFIG_PPC_BOOK3E
+USE_TEXT_SECTION()
+
+#include "interrupt_64.S"
+
+#ifdef CONFIG_PPC_BOOK3E_64
+/*
+ * The booting_thread_hwid holds the thread id we want to boot in cpu
+ * hotplug case. It is set by cpu hotplug code, and is invalid by default.
+ * The thread id is the same as the initial value of SPRN_PIR[THREAD_ID]
+ * bit field.
+ */
+ .globl booting_thread_hwid
+booting_thread_hwid:
+ .long INVALID_THREAD_HWID
+ .align 3
+/*
+ * start a thread in the same core
+ * input parameters:
+ * r3 = the thread physical id
+ * r4 = the entry point where thread starts
+ */
+_GLOBAL(book3e_start_thread)
+ LOAD_REG_IMMEDIATE(r5, MSR_KERNEL)
+ cmpwi r3, 0
+ beq 10f
+ cmpwi r3, 1
+ beq 11f
+ /* If the thread id is invalid, just exit. */
+ b 13f
+10:
+ MTTMR(TMRN_IMSR0, 5)
+ MTTMR(TMRN_INIA0, 4)
+ b 12f
+11:
+ MTTMR(TMRN_IMSR1, 5)
+ MTTMR(TMRN_INIA1, 4)
+12:
+ isync
+ li r6, 1
+ sld r6, r6, r3
+ mtspr SPRN_TENS, r6
+13:
+ blr
+
+/*
+ * stop a thread in the same core
+ * input parameter:
+ * r3 = the thread physical id
+ */
+_GLOBAL(book3e_stop_thread)
+ cmpwi r3, 0
+ beq 10f
+ cmpwi r3, 1
+ beq 10f
+ /* If the thread id is invalid, just exit. */
+ b 13f
+10:
+ li r4, 1
+ sld r4, r4, r3
+ mtspr SPRN_TENC, r4
+13:
+ blr
+
_GLOBAL(fsl_secondary_thread_init)
+ mfspr r4,SPRN_BUCSR
+
/* Enable branch prediction */
lis r3,BUCSR_INIT@h
ori r3,r3,BUCSR_INIT@l
@@ -196,29 +286,38 @@ _GLOBAL(fsl_secondary_thread_init)
* number. There are two threads per core, so shift everything
* but the low bit right by two bits so that the cpu numbering is
* continuous.
+ *
+ * If the old value of BUCSR is non-zero, this thread has run
+ * before. Thus, we assume we are coming from kexec or a similar
+ * scenario, and PIR is already set to the correct value. This
+ * is a bit of a hack, but there are limited opportunities for
+ * getting information into the thread and the alternatives
+ * seemed like they'd be overkill. We can't tell just by looking
+ * at the old PIR value which state it's in, since the same value
+ * could be valid for one thread out of reset and for a different
+ * thread in Linux.
*/
+
mfspr r3, SPRN_PIR
+ cmpwi r4,0
+ bne 1f
rlwimi r3, r3, 30, 2, 30
mtspr SPRN_PIR, r3
-#endif
-
-_GLOBAL(generic_secondary_thread_init)
+1:
mr r24,r3
/* turn on 64-bit mode */
bl enable_64b_mode
- /* get a valid TOC pointer, wherever we're mapped at */
- bl relative_toc
- tovirt(r2,r2)
-
-#ifdef CONFIG_PPC_BOOK3E
/* Book3E initialization */
mr r3,r24
bl book3e_secondary_thread_init
-#endif
+ bl relative_toc
+
b generic_secondary_common_init
+#endif /* CONFIG_PPC_BOOK3E_64 */
+
/*
* On pSeries and most other platforms, secondary processors spin
* in the following code.
@@ -230,21 +329,70 @@ _GLOBAL(generic_secondary_thread_init)
*/
_GLOBAL(generic_secondary_smp_init)
FIXUP_ENDIAN
+
+ li r13,0
+
+ /* Poison TOC */
+ li r2,-1
+
mr r24,r3
mr r25,r4
/* turn on 64-bit mode */
bl enable_64b_mode
- /* get a valid TOC pointer, wherever we're mapped at */
- bl relative_toc
- tovirt(r2,r2)
-
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
/* Book3E initialization */
mr r3,r24
mr r4,r25
bl book3e_secondary_core_init
+ /* Now NIA and r2 are relocated to PAGE_OFFSET if not already */
+/*
+ * After common core init has finished, check if the current thread is the
+ * one we wanted to boot. If not, start the specified thread and stop the
+ * current thread.
+ */
+ LOAD_REG_ADDR(r4, booting_thread_hwid)
+ lwz r3, 0(r4)
+ li r5, INVALID_THREAD_HWID
+ cmpw r3, r5
+ beq 20f
+
+ /*
+ * The value of booting_thread_hwid has been stored in r3,
+ * so make it invalid.
+ */
+ stw r5, 0(r4)
+
+ /*
+ * Get the current thread id and check if it is the one we wanted.
+ * If not, start the one specified in booting_thread_hwid and stop
+ * the current thread.
+ */
+ mfspr r8, SPRN_TIR
+ cmpw r3, r8
+ beq 20f
+
+ /* start the specified thread */
+ LOAD_REG_ADDR(r5, DOTSYM(fsl_secondary_thread_init))
+ bl book3e_start_thread
+
+ /* stop the current thread */
+ mr r3, r8
+ bl book3e_stop_thread
+10:
+ b 10b
+20:
+#else
+ /* Now the MMU is off, can branch to our PAGE_OFFSET address */
+ bcl 20,31,$+4
+1: mflr r11
+ addi r11,r11,(2f - 1b)
+ tovirt(r11, r11)
+ mtctr r11
+ bctr
+2:
+ bl relative_toc
#endif
generic_secondary_common_init:
@@ -252,19 +400,24 @@ generic_secondary_common_init:
* physical cpu id in r24, we need to search the pacas to find
* which logical id maps to our physical one.
*/
- LOAD_REG_ADDR(r13, paca) /* Load paca pointer */
- ld r13,0(r13) /* Get base vaddr of paca array */
#ifndef CONFIG_SMP
- addi r13,r13,PACA_SIZE /* know r13 if used accidentally */
b kexec_wait /* wait for next kernel if !SMP */
#else
+ LOAD_REG_ADDR(r8, paca_ptrs) /* Load paca_ptrs pointe */
+ ld r8,0(r8) /* Get base vaddr of array */
+#if (NR_CPUS == 1) || defined(CONFIG_FORCE_NR_CPUS)
+ LOAD_REG_IMMEDIATE(r7, NR_CPUS)
+#else
LOAD_REG_ADDR(r7, nr_cpu_ids) /* Load nr_cpu_ids address */
lwz r7,0(r7) /* also the max paca allocated */
+#endif
li r5,0 /* logical cpu id */
-1: lhz r6,PACAHWCPUID(r13) /* Load HW procid from paca */
+1:
+ sldi r9,r5,3 /* get paca_ptrs[] index from cpu id */
+ ldx r13,r9,r8 /* r13 = paca_ptrs[cpu id] */
+ lhz r6,PACAHWCPUID(r13) /* Load HW procid from paca */
cmpw r6,r24 /* Compare to our id */
beq 2f
- addi r13,r13,PACA_SIZE /* Loop to next PACA on miss */
addi r5,r5,1
cmpw r5,r7 /* Check if more pacas exist */
blt 1b
@@ -273,7 +426,7 @@ generic_secondary_common_init:
b kexec_wait /* next kernel might do better */
2: SET_PACA(r13)
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
addi r12,r13,PACA_EXTLB /* and TLB exc frame in another */
mtspr SPRN_SPRG_TLB_EXFRAME,r12
#endif
@@ -281,13 +434,17 @@ generic_secondary_common_init:
/* From now on, r24 is expected to be logical cpuid */
mr r24,r5
+ /* Create a temp kernel stack for use before relocation is on. */
+ ld r1,PACAEMERGSP(r13)
+ subi r1,r1,STACK_FRAME_MIN_SIZE
+
/* See if we need to call a cpu state restore handler */
LOAD_REG_ADDR(r23, cur_cpu_spec)
ld r23,0(r23)
ld r12,CPU_SPEC_RESTORE(r23)
cmpdi 0,r12,0
beq 3f
-#if !defined(_CALL_ELF) || _CALL_ELF != 2
+#ifdef CONFIG_PPC64_ELF_ABI_V1
ld r12,0(r12)
#endif
mtctr r12
@@ -309,10 +466,6 @@ generic_secondary_common_init:
sync /* order paca.run and cur_cpu_spec */
isync /* In case code patching happened */
- /* Create a temp kernel stack for use before relocation is on. */
- ld r1,PACAEMERGSP(r13)
- subi r1,r1,STACK_FRAME_OVERHEAD
-
b __secondary_start
#endif /* SMP */
@@ -321,7 +474,7 @@ generic_secondary_common_init:
* Assumes we're mapped EA == RA if the MMU is on.
*/
#ifdef CONFIG_PPC_BOOK3S
-__mmu_off:
+SYM_FUNC_START_LOCAL(__mmu_off)
mfmsr r3
andi. r0,r3,MSR_IR|MSR_DR
beqlr
@@ -332,8 +485,34 @@ __mmu_off:
sync
rfid
b . /* prevent speculative execution */
-#endif
+SYM_FUNC_END(__mmu_off)
+
+SYM_FUNC_START_LOCAL(start_initialization_book3s)
+ mflr r25
+
+ /* Setup some critical 970 SPRs before switching MMU off */
+ mfspr r0,SPRN_PVR
+ srwi r0,r0,16
+ cmpwi r0,0x39 /* 970 */
+ beq 1f
+ cmpwi r0,0x3c /* 970FX */
+ beq 1f
+ cmpwi r0,0x44 /* 970MP */
+ beq 1f
+ cmpwi r0,0x45 /* 970GX */
+ bne 2f
+1: bl __cpu_preinit_ppc970
+2:
+ /* Switch off MMU if not already off */
+ bl __mmu_off
+
+ /* Now the MMU is off, can return to our PAGE_OFFSET address */
+ tovirt(r25,r25)
+ mtlr r25
+ blr
+SYM_FUNC_END(start_initialization_book3s)
+#endif
/*
* Here is our main kernel entry point. We support currently 2 kind of entries
@@ -350,14 +529,11 @@ __start_initialization_multiplatform:
/* Make sure we are running in 64 bits mode */
bl enable_64b_mode
- /* Get TOC pointer (current runtime address) */
- bl relative_toc
+ /* Zero r13 (paca) so early program check / mce don't use it */
+ li r13,0
- /* find out where we are now */
- bcl 20,31,$+4
-0: mflr r26 /* r26 = runtime addr here */
- addis r26,r26,(_stext - 0b)@ha
- addi r26,r26,(_stext - 0b)@l /* current runtime base addr */
+ /* Poison TOC */
+ li r2,-1
/*
* Are we booted from a PROM Of-type client-interface ?
@@ -375,31 +551,41 @@ __start_initialization_multiplatform:
mr r29,r9
#endif
-#ifdef CONFIG_PPC_BOOK3E
+ /* Get TOC pointer (current runtime address) */
+ bl relative_toc
+
+ /* These functions return to the virtual (PAGE_OFFSET) address */
+#ifdef CONFIG_PPC_BOOK3E_64
bl start_initialization_book3e
- b __after_prom_start
#else
- /* Setup some critical 970 SPRs before switching MMU off */
- mfspr r0,SPRN_PVR
- srwi r0,r0,16
- cmpwi r0,0x39 /* 970 */
- beq 1f
- cmpwi r0,0x3c /* 970FX */
- beq 1f
- cmpwi r0,0x44 /* 970MP */
- beq 1f
- cmpwi r0,0x45 /* 970GX */
- bne 2f
-1: bl __cpu_preinit_ppc970
-2:
+ bl start_initialization_book3s
+#endif /* CONFIG_PPC_BOOK3E_64 */
+
+ /* Get TOC pointer, virtual */
+ bl relative_toc
+
+ /* find out where we are now */
+
+ /* OPAL doesn't pass base address in r4, have to derive it. */
+ bcl 20,31,$+4
+0: mflr r26 /* r26 = runtime addr here */
+ addis r26,r26,(_stext - 0b)@ha
+ addi r26,r26,(_stext - 0b)@l /* current runtime base addr */
- /* Switch off MMU if not already off */
- bl __mmu_off
b __after_prom_start
-#endif /* CONFIG_PPC_BOOK3E */
+__REF
__boot_from_prom:
#ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE
+ /* Get TOC pointer, non-virtual */
+ bl relative_toc
+
+ /* find out where we are now */
+ bcl 20,31,$+4
+0: mflr r26 /* r26 = runtime addr here */
+ addis r26,r26,(_stext - 0b)@ha
+ addi r26,r26,(_stext - 0b)@l /* current runtime base addr */
+
/* Save parameters */
mr r31,r3
mr r30,r4
@@ -429,24 +615,28 @@ __boot_from_prom:
/* Do all of the interaction with OF client interface */
mr r8,r26
- bl prom_init
+ bl CFUNC(prom_init)
#endif /* #CONFIG_PPC_OF_BOOT_TRAMPOLINE */
/* We never return. We also hit that trap if trying to boot
* from OF while CONFIG_PPC_OF_BOOT_TRAMPOLINE isn't selected */
trap
+ .previous
__after_prom_start:
#ifdef CONFIG_RELOCATABLE
/* process relocations for the final address of the kernel */
- lis r25,PAGE_OFFSET@highest /* compute virtual base of kernel */
- sldi r25,r25,32
- lwz r7,__run_at_load-_stext(r26)
+ lwz r7,(FIXED_SYMBOL_ABS_ADDR(__run_at_load))(r26)
cmplwi cr0,r7,1 /* flagged to stay where we are ? */
- bne 1f
- add r25,r25,r26
+ mr r25,r26 /* then use current kernel base */
+ beq 1f
+ LOAD_REG_IMMEDIATE(r25, PAGE_OFFSET) /* else use static kernel base */
1: mr r3,r25
bl relocate
+#if defined(CONFIG_PPC_BOOK3E_64)
+ /* IVPR needs to be set after relocation. */
+ bl init_core_book3e
+#endif
#endif
/*
@@ -456,17 +646,12 @@ __after_prom_start:
*
* Note: This process overwrites the OF exception vectors.
*/
- li r3,0 /* target addr */
-#ifdef CONFIG_PPC_BOOK3E
- tovirt(r3,r3) /* on booke, we already run at PAGE_OFFSET */
-#endif
- mr. r4,r26 /* In some cases the loader may */
- beq 9f /* have already put us at zero */
+ LOAD_REG_IMMEDIATE(r3, PAGE_OFFSET)
+ mr r4,r26 /* Load the virtual source address into r4 */
+ cmpld r3,r4 /* Check if source == dest */
+ beq 9f /* If so skip the copy */
li r6,0x100 /* Start offset, the first 0x100 */
/* bytes were copied earlier. */
-#ifdef CONFIG_PPC_BOOK3E
- tovirt(r6,r6) /* on booke, we already run at PAGE_OFFSET */
-#endif
#ifdef CONFIG_RELOCATABLE
/*
@@ -474,32 +659,45 @@ __after_prom_start:
* variable __run_at_load, if it is set the kernel is treated as relocatable
* kernel, otherwise it will be moved to PHYSICAL_START
*/
- lwz r7,__run_at_load-_stext(r26)
+ lwz r7,(FIXED_SYMBOL_ABS_ADDR(__run_at_load))(r26)
cmplwi cr0,r7,1
bne 3f
+#ifdef CONFIG_PPC_BOOK3E_64
+ LOAD_REG_ADDR(r5, __end_interrupts)
+ LOAD_REG_ADDR(r11, _stext)
+ sub r5,r5,r11
+#else
/* just copy interrupts */
- LOAD_REG_IMMEDIATE(r5, __end_interrupts - _stext)
+ LOAD_REG_IMMEDIATE_SYM(r5, r11, FIXED_SYMBOL_ABS_ADDR(__end_interrupts))
+#endif
b 5f
3:
#endif
- lis r5,(copy_to_here - _stext)@ha
- addi r5,r5,(copy_to_here - _stext)@l /* # bytes of memory to copy */
+ /* # bytes of memory to copy */
+ lis r5,(ABS_ADDR(copy_to_here, text))@ha
+ addi r5,r5,(ABS_ADDR(copy_to_here, text))@l
bl copy_and_flush /* copy the first n bytes */
/* this includes the code being */
/* executed here. */
- addis r8,r3,(4f - _stext)@ha /* Jump to the copy of this code */
- addi r12,r8,(4f - _stext)@l /* that we just made */
+ /* Jump to the copy of this code that we just made */
+ addis r8,r3,(ABS_ADDR(4f, text))@ha
+ addi r12,r8,(ABS_ADDR(4f, text))@l
mtctr r12
bctr
.balign 8
-p_end: .llong _end - _stext
+p_end: .8byte _end - copy_to_here
-4: /* Now copy the rest of the kernel up to _end */
- addis r5,r26,(p_end - _stext)@ha
- ld r5,(p_end - _stext)@l(r5) /* get _end */
+4:
+ /*
+ * Now copy the rest of the kernel up to _end, add
+ * _end - copy_to_here to the copy limit and run again.
+ */
+ addis r8,r26,(ABS_ADDR(p_end, text))@ha
+ ld r8,(ABS_ADDR(p_end, text))@l(r8)
+ add r5,r5,r8
5: bl copy_and_flush /* copy the rest */
9: b start_here_multiplatform
@@ -540,6 +738,8 @@ _GLOBAL(copy_and_flush)
isync
blr
+_ASM_NOKPROBE_SYMBOL(copy_and_flush); /* Called in real mode */
+
.align 8
copy_to_here:
@@ -577,9 +777,15 @@ _GLOBAL(pmac_secondary_start)
sync
slbia
- /* get TOC pointer (real address) */
+ /* Branch to our PAGE_OFFSET address */
+ bcl 20,31,$+4
+1: mflr r11
+ addi r11,r11,(2f - 1b)
+ tovirt(r11, r11)
+ mtctr r11
+ bctr
+2:
bl relative_toc
- tovirt(r2,r2)
/* Copy some CPU settings from CPU 0 */
bl __restore_cpu_ppc970
@@ -590,23 +796,23 @@ _GLOBAL(pmac_secondary_start)
mtmsrd r3 /* RI on */
/* Set up a paca value for this processor. */
- LOAD_REG_ADDR(r4,paca) /* Load paca pointer */
- ld r4,0(r4) /* Get base vaddr of paca array */
- mulli r13,r24,PACA_SIZE /* Calculate vaddr of right paca */
- add r13,r13,r4 /* for this processor. */
+ LOAD_REG_ADDR(r4,paca_ptrs) /* Load paca pointer */
+ ld r4,0(r4) /* Get base vaddr of paca_ptrs array */
+ sldi r5,r24,3 /* get paca_ptrs[] index from cpu id */
+ ldx r13,r5,r4 /* r13 = paca_ptrs[cpu id] */
SET_PACA(r13) /* Save vaddr of paca in an SPRG*/
/* Mark interrupts soft and hard disabled (they might be enabled
* in the PACA when doing hotplug)
*/
- li r0,0
- stb r0,PACASOFTIRQEN(r13)
+ li r0,IRQS_DISABLED
+ stb r0,PACAIRQSOFTMASK(r13)
li r0,PACA_IRQ_HARD_DIS
stb r0,PACAIRQHAPPENED(r13)
/* Create a temp kernel stack for use before relocation is on. */
ld r1,PACAEMERGSP(r13)
- subi r1,r1,STACK_FRAME_OVERHEAD
+ subi r1,r1,STACK_FRAME_MIN_SIZE
b __secondary_start
@@ -633,21 +839,19 @@ __secondary_start:
/* Set thread priority to MEDIUM */
HMT_MEDIUM
- /* Initialize the kernel stack */
- LOAD_REG_ADDR(r3, current_set)
- sldi r28,r24,3 /* get current_set[cpu#] */
- ldx r14,r3,r28
- addi r14,r14,THREAD_SIZE-STACK_FRAME_OVERHEAD
- std r14,PACAKSAVE(r13)
-
- /* Do early setup for that CPU (SLB and hash table pointer) */
- bl early_setup_secondary
+ /*
+ * Do early setup for this CPU, in particular initialising the MMU so we
+ * can turn it on below. This is a call to C, which is OK, we're still
+ * running on the emergency stack.
+ */
+ bl CFUNC(early_setup_secondary)
/*
- * setup the new stack pointer, but *don't* use this until
- * translation is on.
+ * The primary has initialized our kernel stack for us in the paca, grab
+ * it and put it in r1. We must *not* use it until we turn on the MMU
+ * below, because it may not be inside the RMO.
*/
- mr r1, r14
+ ld r1, PACAKSAVE(r13)
/* Clear backchain so we get nice backtraces */
li r7,0
@@ -656,7 +860,8 @@ __secondary_start:
/* Mark interrupts soft and hard disabled (they might be enabled
* in the PACA when doing hotplug)
*/
- stb r7,PACASOFTIRQEN(r13)
+ li r7,IRQS_DISABLED
+ stb r7,PACAIRQSOFTMASK(r13)
li r0,PACA_IRQ_HARD_DIS
stb r0,PACAIRQHAPPENED(r13)
@@ -666,7 +871,7 @@ __secondary_start:
mtspr SPRN_SRR0,r3
mtspr SPRN_SRR1,r4
- RFI
+ RFI_TO_KERNEL
b . /* prevent speculative execution */
/*
@@ -675,10 +880,10 @@ __secondary_start:
* before going into C code.
*/
start_secondary_prolog:
- ld r2,PACATOC(r13)
+ LOAD_PACA_TOC()
li r3,0
std r3,0(r1) /* Zero the stack frame pointer */
- bl start_secondary
+ bl CFUNC(start_secondary)
b .
/*
* Reset stack pointer and call start_secondary
@@ -689,26 +894,26 @@ _GLOBAL(start_secondary_resume)
ld r1,PACAKSAVE(r13) /* Reload kernel stack pointer */
li r3,0
std r3,0(r1) /* Zero the stack frame pointer */
- bl start_secondary
+ bl CFUNC(start_secondary)
b .
#endif
/*
* This subroutine clobbers r11 and r12
*/
-enable_64b_mode:
+SYM_FUNC_START_LOCAL(enable_64b_mode)
mfmsr r11 /* grab the current MSR */
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
oris r11,r11,0x8000 /* CM bit set, we'll set ICM later */
mtmsr r11
-#else /* CONFIG_PPC_BOOK3E */
- li r12,(MSR_64BIT | MSR_ISF)@highest
- sldi r12,r12,48
+#else /* CONFIG_PPC_BOOK3E_64 */
+ LOAD_REG_IMMEDIATE(r12, MSR_64BIT)
or r11,r11,r12
mtmsrd r11
isync
#endif
blr
+SYM_FUNC_END(enable_64b_mode)
/*
* This puts the TOC pointer into r2, offset by 0x8000 (as expected
@@ -719,10 +924,15 @@ enable_64b_mode:
* TOC in -mcmodel=medium mode. After we relocate to 0 but before
* the MMU is on we need our TOC to be a virtual address otherwise
* these pointers will be real addresses which may get stored and
- * accessed later with the MMU on. We use tovirt() at the call
- * sites to handle this.
+ * accessed later with the MMU on. We branch to the virtual address
+ * while still in real mode then call relative_toc again to handle
+ * this.
*/
_GLOBAL(relative_toc)
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ tdnei r2,-1
+ blr
+#else
mflr r0
bcl 20,31,$+4
0: mflr r11
@@ -732,15 +942,16 @@ _GLOBAL(relative_toc)
blr
.balign 8
-p_toc: .llong __toc_start + 0x8000 - 0b
+p_toc: .8byte .TOC. - 0b
+#endif
/*
* This is where the main kernel code starts.
*/
+__REF
start_here_multiplatform:
- /* set up the TOC */
- bl relative_toc
- tovirt(r2,r2)
+ /* Adjust TOC for moved kernel. Could adjust when moving it instead. */
+ bl relative_toc
/* Clear out the BSS. It may have been done in prom_init,
* already but that's irrelevant since prom_init will soon
@@ -767,7 +978,7 @@ start_here_multiplatform:
std r29,8(r11);
#endif
-#ifndef CONFIG_PPC_BOOK3E
+#ifndef CONFIG_PPC_BOOK3E_64
mfmsr r6
ori r6,r6,MSR_RI
mtmsrd r6 /* RI on */
@@ -780,35 +991,34 @@ start_here_multiplatform:
std r0,0(r4)
#endif
- /* The following gets the stack set up with the regs */
- /* pointing to the real addr of the kernel stack. This is */
- /* all done to support the C function call below which sets */
- /* up the htab. This is done because we have relocated the */
- /* kernel but are still running in real mode. */
-
- LOAD_REG_ADDR(r3,init_thread_union)
-
/* set up a stack pointer */
- addi r1,r3,THREAD_SIZE
+ LOAD_REG_ADDR(r3,init_thread_union)
+ LOAD_REG_IMMEDIATE(r1,THREAD_SIZE)
+ add r1,r3,r1
li r0,0
- stdu r0,-STACK_FRAME_OVERHEAD(r1)
+ stdu r0,-STACK_FRAME_MIN_SIZE(r1)
/*
* Do very early kernel initializations, including initial hash table
* and SLB setup before we turn on relocation.
*/
+#ifdef CONFIG_KASAN
+ bl CFUNC(kasan_early_init)
+#endif
/* Restore parameters passed from prom_init/kexec */
mr r3,r31
- bl early_setup /* also sets r13 and SPRG_PACA */
+ LOAD_REG_ADDR(r12, DOTSYM(early_setup))
+ mtctr r12
+ bctrl /* also sets r13 and SPRG_PACA */
LOAD_REG_ADDR(r3, start_here_common)
ld r4,PACAKMSR(r13)
mtspr SPRN_SRR0,r3
mtspr SPRN_SRR1,r4
- RFI
+ RFI_TO_KERNEL
b . /* prevent speculative execution */
-
+
/* This is where all platforms converge execution */
start_here_common:
@@ -816,37 +1026,20 @@ start_here_common:
std r1,PACAKSAVE(r13)
/* Load the TOC (virtual address) */
- ld r2,PACATOC(r13)
-
- /* Do more system initializations in virtual mode */
- bl setup_system
+ LOAD_PACA_TOC()
/* Mark interrupts soft and hard disabled (they might be enabled
* in the PACA when doing hotplug)
*/
- li r0,0
- stb r0,PACASOFTIRQEN(r13)
+ li r0,IRQS_DISABLED
+ stb r0,PACAIRQSOFTMASK(r13)
li r0,PACA_IRQ_HARD_DIS
stb r0,PACAIRQHAPPENED(r13)
/* Generic kernel entry */
- bl start_kernel
+ bl CFUNC(start_kernel)
/* Not reached */
- BUG_OPCODE
-
-/*
- * We put a few things here that have to be page-aligned.
- * This stuff goes at the beginning of the bss, which is page-aligned.
- */
- .section ".bss"
-
- .align PAGE_SHIFT
-
- .globl empty_zero_page
-empty_zero_page:
- .space PAGE_SIZE
-
- .globl swapper_pg_dir
-swapper_pg_dir:
- .space PGD_TABLE_SIZE
+0: trap
+ EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0
+ .previous
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_85xx.S
index b497188a94a1..f9a73fae6464 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_85xx.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Kernel execution entry point code.
*
@@ -23,25 +24,23 @@
* PowerPC 44x support, Matt Porter <mporter@kernel.crashing.org>
* Copyright 2004 Freescale Semiconductor, Inc
* PowerPC e500 modifications, Kumar Gala <galak@kernel.crashing.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#include <linux/init.h>
#include <linux/threads.h>
+#include <linux/pgtable.h>
+#include <linux/linkage.h>
+
#include <asm/processor.h>
#include <asm/page.h>
#include <asm/mmu.h>
-#include <asm/pgtable.h>
#include <asm/cputable.h>
#include <asm/thread_info.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/cache.h>
#include <asm/ptrace.h>
+#include <asm/feature-fixups.h>
#include "head_booke.h"
/* As with the other PowerPC ports, it is expected that when code
@@ -56,8 +55,8 @@
*
*/
__HEAD
-_ENTRY(_stext);
-_ENTRY(_start);
+_GLOBAL(_stext);
+_GLOBAL(_start);
/*
* Reserve a word at a fixed location to store the address
* of abatron_pteptrs
@@ -81,7 +80,7 @@ _ENTRY(_start);
mr r23,r3
mr r25,r4
- bl 0f
+ bcl 20,31,$+4
0: mflr r8
addis r3,r8,(is_second_reloc - 0b)@ha
lwz r19,(is_second_reloc - 0b)@l(r3)
@@ -115,7 +114,7 @@ _ENTRY(_start);
1:
/*
- * We have the runtime (virutal) address of our base.
+ * We have the runtime (virtual) address of our base.
* We calculate our shift of offset from a 64M page.
* We could map the 64M page we belong to at PAGE_OFFSET and
* get going from there.
@@ -131,7 +130,7 @@ _ENTRY(_start);
/*
* For the second relocation, we already set the right tlb entries
- * for the kernel space, so skip the code in fsl_booke_entry_mapping.S
+ * for the kernel space, so skip the code in 85xx_entry_mapping.S
*/
cmpwi r19,1
beq set_ivor
@@ -156,10 +155,12 @@ _ENTRY(_start);
* if needed
*/
-_ENTRY(__early_start)
+_GLOBAL(__early_start)
+ LOAD_REG_ADDR_PIC(r20, kernstart_virt_addr)
+ lwz r20,0(r20)
#define ENTRY_MAPPING_BOOT_SETUP
-#include "fsl_booke_entry_mapping.S"
+#include "85xx_entry_mapping.S"
#undef ENTRY_MAPPING_BOOT_SETUP
set_ivor:
@@ -187,18 +188,8 @@ set_ivor:
/* Setup the defaults for TLB entries */
li r2,(MAS4_TSIZED(BOOK3E_PAGESZ_4K))@l
-#ifdef CONFIG_E200
- oris r2,r2,MAS4_TLBSELD(1)@h
-#endif
mtspr SPRN_MAS4, r2
-#if 0
- /* Enable DOZE */
- mfspr r2,SPRN_HID0
- oris r2,r2,HID0_DOZE@h
- mtspr SPRN_HID0, r2
-#endif
-
#if !defined(CONFIG_BDI_SWITCH)
/*
* The Abatron BDI JTAG debugger does not tolerate others
@@ -239,13 +230,17 @@ set_ivor:
lis r1,init_thread_union@h
ori r1,r1,init_thread_union@l
li r0,0
- stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
+ stwu r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1)
- CURRENT_THREAD_INFO(r22, r1)
- stw r24, TI_CPU(r22)
+#ifdef CONFIG_SMP
+ stw r24, TASK_CPU(r2)
+#endif
bl early_init
+#ifdef CONFIG_KASAN
+ bl kasan_early_init
+#endif
#ifdef CONFIG_RELOCATABLE
mr r3,r30
mr r4,r31
@@ -282,8 +277,8 @@ set_ivor:
ori r6, r6, swapper_pg_dir@l
lis r5, abatron_pteptrs@h
ori r5, r5, abatron_pteptrs@l
- lis r4, KERNELBASE@h
- ori r4, r4, KERNELBASE@l
+ lis r3, kernstart_virt_addr@ha
+ lwz r4, kernstart_virt_addr@l(r3)
stw r5, 0(r4) /* Save abatron_pteptrs at a fixed location */
stw r6, 0(r5)
@@ -299,9 +294,10 @@ set_ivor:
/* Macros to hide the PTE size differences
*
* FIND_PTE -- walks the page tables given EA & pgdir pointer
- * r10 -- EA of fault
+ * r10 -- free
* r11 -- PGDIR pointer
* r12 -- free
+ * r13 -- EA of fault
* label 2: is the bailout case
*
* if we find the pte (fall through):
@@ -312,34 +308,34 @@ set_ivor:
#ifdef CONFIG_PTE_64BIT
#ifdef CONFIG_HUGETLB_PAGE
#define FIND_PTE \
- rlwinm r12, r10, 13, 19, 29; /* Compute pgdir/pmd offset */ \
- lwzx r11, r12, r11; /* Get pgd/pmd entry */ \
+ rlwinm r12, r13, 14, 18, 28; /* Compute pgdir/pmd offset */ \
+ add r12, r11, r12; \
+ lwz r11, 4(r12); /* Get pgd/pmd entry */ \
+ rlwinm. r10, r11, 32 - _PAGE_PSIZE_SHIFT, 0x1e; /* get tsize*/ \
+ bne 1000f; /* Huge page (leaf entry) */ \
rlwinm. r12, r11, 0, 0, 20; /* Extract pt base address */ \
- blt 1000f; /* Normal non-huge page */ \
beq 2f; /* Bail if no table */ \
- oris r11, r11, PD_HUGE@h; /* Put back address bit */ \
- andi. r10, r11, HUGEPD_SHIFT_MASK@l; /* extract size field */ \
- xor r12, r10, r11; /* drop size bits from pointer */ \
- b 1001f; \
-1000: rlwimi r12, r10, 23, 20, 28; /* Compute pte address */ \
+ rlwimi r12, r13, 23, 20, 28; /* Compute pte address */ \
li r10, 0; /* clear r10 */ \
-1001: lwz r11, 4(r12); /* Get pte entry */
+ lwz r11, 4(r12); /* Get pte entry */ \
+1000:
#else
#define FIND_PTE \
- rlwinm r12, r10, 13, 19, 29; /* Compute pgdir/pmd offset */ \
- lwzx r11, r12, r11; /* Get pgd/pmd entry */ \
+ rlwinm r12, r13, 14, 18, 28; /* Compute pgdir/pmd offset */ \
+ add r12, r11, r12; \
+ lwz r11, 4(r12); /* Get pgd/pmd entry */ \
rlwinm. r12, r11, 0, 0, 20; /* Extract pt base address */ \
beq 2f; /* Bail if no table */ \
- rlwimi r12, r10, 23, 20, 28; /* Compute pte address */ \
+ rlwimi r12, r13, 23, 20, 28; /* Compute pte address */ \
lwz r11, 4(r12); /* Get pte entry */
#endif /* HUGEPAGE */
#else /* !PTE_64BIT */
#define FIND_PTE \
- rlwimi r11, r10, 12, 20, 29; /* Create L1 (pgdir/pmd) address */ \
+ rlwimi r11, r13, 12, 20, 29; /* Create L1 (pgdir/pmd) address */ \
lwz r11, 0(r11); /* Get L1 entry */ \
rlwinm. r12, r11, 0, 0, 19; /* Extract L2 (pte) base address */ \
beq 2f; /* Bail if no table */ \
- rlwimi r12, r10, 22, 20, 29; /* Compute PTE address */ \
+ rlwimi r12, r13, 22, 20, 29; /* Compute PTE address */ \
lwz r11, 0(r12); /* Get Linux PTE */
#endif
@@ -365,32 +361,30 @@ interrupt_base:
CRITICAL_EXCEPTION(0x0100, CRITICAL, CriticalInput, unknown_exception)
/* Machine Check Interrupt */
-#ifdef CONFIG_E200
- /* no RFMCI, MCSRRs on E200 */
- CRITICAL_EXCEPTION(0x0200, MACHINE_CHECK, MachineCheck, \
- machine_check_exception)
-#else
MCHECK_EXCEPTION(0x0200, MachineCheck, machine_check_exception)
-#endif
/* Data Storage Interrupt */
START_EXCEPTION(DataStorage)
- NORMAL_EXCEPTION_PROLOG(DATA_STORAGE)
- mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */
+ NORMAL_EXCEPTION_PROLOG(0x300, DATA_STORAGE)
+ mfspr r5,SPRN_ESR /* Grab the ESR, save it */
stw r5,_ESR(r11)
- mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */
+ mfspr r4,SPRN_DEAR /* Grab the DEAR, save it */
+ stw r4, _DEAR(r11)
andis. r10,r5,(ESR_ILK|ESR_DLK)@h
bne 1f
- EXC_XFER_LITE(0x0300, handle_page_fault)
+ prepare_transfer_to_handler
+ bl do_page_fault
+ b interrupt_return
1:
- addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_EE_LITE(0x0300, CacheLockingException)
+ prepare_transfer_to_handler
+ bl CacheLockingException
+ b interrupt_return
/* Instruction Storage Interrupt */
INSTRUCTION_STORAGE_EXCEPTION
/* External Input Interrupt */
- EXCEPTION(0x0500, EXTERNAL, ExternalInput, do_IRQ, EXC_XFER_LITE)
+ EXCEPTION(0x0500, EXTERNAL, ExternalInput, do_IRQ)
/* Alignment Interrupt */
ALIGNMENT_EXCEPTION
@@ -402,32 +396,22 @@ interrupt_base:
#ifdef CONFIG_PPC_FPU
FP_UNAVAILABLE_EXCEPTION
#else
-#ifdef CONFIG_E200
- /* E200 treats 'normal' floating point instructions as FP Unavail exception */
- EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \
- program_check_exception, EXC_XFER_EE)
-#else
- EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \
- unknown_exception, EXC_XFER_EE)
-#endif
+ EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, emulation_assist_interrupt)
#endif
/* System Call Interrupt */
START_EXCEPTION(SystemCall)
- NORMAL_EXCEPTION_PROLOG(SYSCALL)
- EXC_XFER_EE_LITE(0x0c00, DoSyscall)
+ SYSCALL_ENTRY 0xc00 BOOKE_INTERRUPT_SYSCALL SPRN_SRR1
/* Auxiliary Processor Unavailable Interrupt */
- EXCEPTION(0x2900, AP_UNAVAIL, AuxillaryProcessorUnavailable, \
- unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x2900, AP_UNAVAIL, AuxillaryProcessorUnavailable, unknown_exception)
/* Decrementer Interrupt */
DECREMENTER_EXCEPTION
/* Fixed Internal Timer Interrupt */
/* TODO: Add FIT support */
- EXCEPTION(0x3100, FIT, FixedIntervalTimer, \
- unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x3100, FIT, FixedIntervalTimer, unknown_exception)
/* Watchdog Timer Interrupt */
#ifdef CONFIG_BOOKE_WDT
@@ -451,13 +435,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
mfcr r13
stw r13, THREAD_NORMSAVE(3)(r10)
DO_KVM BOOKE_INTERRUPT_DTLB_MISS SPRN_SRR1
- mfspr r10, SPRN_DEAR /* Get faulting address */
+START_BTB_FLUSH_SECTION
+ mfspr r11, SPRN_SRR1
+ andi. r10,r11,MSR_PR
+ beq 1f
+ BTB_FLUSH(r10)
+1:
+END_BTB_FLUSH_SECTION
+ mfspr r13, SPRN_DEAR /* Get faulting address */
/* If we are faulting a kernel address, we have to use the
* kernel page tables.
*/
lis r11, PAGE_OFFSET@h
- cmplw 5, r10, r11
+ cmplw 5, r13, r11
blt 5, 3f
lis r11, swapper_pg_dir@h
ori r11, r11, swapper_pg_dir@l
@@ -473,30 +464,21 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
mfspr r11,SPRN_SPRG_THREAD
lwz r11,PGDIR(r11)
+#ifdef CONFIG_PPC_KUAP
+ mfspr r12, SPRN_MAS1
+ rlwinm. r12,r12,0,0x3fff0000
+ beq 2f /* KUAP fault */
+#endif
+
4:
- /* Mask of required permission bits. Note that while we
- * do copy ESR:ST to _PAGE_RW position as trying to write
- * to an RO page is pretty common, we don't do it with
- * _PAGE_DIRTY. We could do it, but it's a fairly rare
- * event so I'd rather take the overhead when it happens
- * rather than adding an instruction here. We should measure
- * whether the whole thing is worth it in the first place
- * as we could avoid loading SPRN_ESR completely in the first
- * place...
- *
- * TODO: Is it worth doing that mfspr & rlwimi in the first
- * place or can we save a couple of instructions here ?
- */
- mfspr r12,SPRN_ESR
+ FIND_PTE
+
#ifdef CONFIG_PTE_64BIT
- li r13,_PAGE_PRESENT
+ li r13,_PAGE_PRESENT|_PAGE_BAP_SR
oris r13,r13,_PAGE_ACCESSED@h
#else
- li r13,_PAGE_PRESENT|_PAGE_ACCESSED
+ li r13,_PAGE_PRESENT|_PAGE_READ|_PAGE_ACCESSED
#endif
- rlwimi r13,r12,11,29,29
-
- FIND_PTE
andc. r13,r13,r11 /* Check permission */
#ifdef CONFIG_PTE_64BIT
@@ -508,7 +490,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
#endif
#endif
- bne 2f /* Bail if permission/valid mismach */
+ bne 2f /* Bail if permission/valid mismatch */
/* Jump to common tlb load */
b finish_tlb_load
@@ -545,13 +527,21 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
mfcr r13
stw r13, THREAD_NORMSAVE(3)(r10)
DO_KVM BOOKE_INTERRUPT_ITLB_MISS SPRN_SRR1
- mfspr r10, SPRN_SRR0 /* Get faulting address */
+START_BTB_FLUSH_SECTION
+ mfspr r11, SPRN_SRR1
+ andi. r10,r11,MSR_PR
+ beq 1f
+ BTB_FLUSH(r10)
+1:
+END_BTB_FLUSH_SECTION
+
+ mfspr r13, SPRN_SRR0 /* Get faulting address */
/* If we are faulting a kernel address, we have to use the
* kernel page tables.
*/
lis r11, PAGE_OFFSET@h
- cmplw 5, r10, r11
+ cmplw 5, r13, r11
blt 5, 3f
lis r11, swapper_pg_dir@h
ori r11, r11, swapper_pg_dir@l
@@ -560,6 +550,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
rlwinm r12,r12,0,16,1
mtspr SPRN_MAS1,r12
+ FIND_PTE
/* Make up the required permissions for kernel code */
#ifdef CONFIG_PTE_64BIT
li r13,_PAGE_PRESENT | _PAGE_BAP_SX
@@ -574,6 +565,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
mfspr r11,SPRN_SPRG_THREAD
lwz r11,PGDIR(r11)
+#ifdef CONFIG_PPC_KUAP
+ mfspr r12, SPRN_MAS1
+ rlwinm. r12,r12,0,0x3fff0000
+ beq 2f /* KUAP fault */
+#endif
+
+ FIND_PTE
/* Make up the required permissions for user code */
#ifdef CONFIG_PTE_64BIT
li r13,_PAGE_PRESENT | _PAGE_BAP_UX
@@ -583,7 +581,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
#endif
4:
- FIND_PTE
andc. r13,r13,r11 /* Check permission */
#ifdef CONFIG_PTE_64BIT
@@ -595,7 +592,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
#endif
#endif
- bne 2f /* Bail if permission mismach */
+ bne 2f /* Bail if permission mismatch */
/* Jump to common TLB load point */
b finish_tlb_load
@@ -613,40 +610,48 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
mfspr r10, SPRN_SPRG_RSCRATCH0
b InstructionStorage
+/* Define SPE handlers for e500v2 */
#ifdef CONFIG_SPE
/* SPE Unavailable */
START_EXCEPTION(SPEUnavailable)
- NORMAL_EXCEPTION_PROLOG(SPE_ALTIVEC_UNAVAIL)
+ NORMAL_EXCEPTION_PROLOG(0x2010, SPE_UNAVAIL)
beq 1f
bl load_up_spe
b fast_exception_return
-1: addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_EE_LITE(0x2010, KernelSPE)
-#else
- EXCEPTION(0x2020, SPE_ALTIVEC_UNAVAIL, SPEUnavailable, \
- unknown_exception, EXC_XFER_EE)
-#endif /* CONFIG_SPE */
+1: prepare_transfer_to_handler
+ bl KernelSPE
+ b interrupt_return
+#elif defined(CONFIG_SPE_POSSIBLE)
+ EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, unknown_exception)
+#endif /* CONFIG_SPE_POSSIBLE */
/* SPE Floating Point Data */
#ifdef CONFIG_SPE
- EXCEPTION(0x2030, SPE_FP_DATA_ALTIVEC_ASSIST, SPEFloatingPointData,
- SPEFloatingPointException, EXC_XFER_EE)
+ START_EXCEPTION(SPEFloatingPointData)
+ NORMAL_EXCEPTION_PROLOG(0x2030, SPE_FP_DATA)
+ prepare_transfer_to_handler
+ bl SPEFloatingPointException
+ REST_NVGPRS(r1)
+ b interrupt_return
/* SPE Floating Point Round */
- EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \
- SPEFloatingPointRoundException, EXC_XFER_EE)
-#else
- EXCEPTION(0x2040, SPE_FP_DATA_ALTIVEC_ASSIST, SPEFloatingPointData,
- unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \
- unknown_exception, EXC_XFER_EE)
-#endif /* CONFIG_SPE */
+ START_EXCEPTION(SPEFloatingPointRound)
+ NORMAL_EXCEPTION_PROLOG(0x2050, SPE_FP_ROUND)
+ prepare_transfer_to_handler
+ bl SPEFloatingPointRoundException
+ REST_NVGPRS(r1)
+ b interrupt_return
+#elif defined(CONFIG_SPE_POSSIBLE)
+ EXCEPTION(0x2040, SPE_FP_DATA, SPEFloatingPointData, unknown_exception)
+ EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, unknown_exception)
+#endif /* CONFIG_SPE_POSSIBLE */
+
/* Performance Monitor */
EXCEPTION(0x2060, PERFORMANCE_MONITOR, PerformanceMonitor, \
- performance_monitor_exception, EXC_XFER_STD)
+ performance_monitor_exception)
- EXCEPTION(0x2070, DOORBELL, Doorbell, doorbell_exception, EXC_XFER_STD)
+ EXCEPTION(0x2070, DOORBELL, Doorbell, doorbell_exception)
CRITICAL_EXCEPTION(0x2080, DOORBELL_CRITICAL, \
CriticalDoorbell, unknown_exception)
@@ -661,10 +666,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
unknown_exception)
/* Hypercall */
- EXCEPTION(0, HV_SYSCALL, Hypercall, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0, HV_SYSCALL, Hypercall, unknown_exception)
/* Embedded Hypervisor Privilege */
- EXCEPTION(0, HV_PRIV, Ehvpriv, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0, HV_PRIV, Ehvpriv, unknown_exception)
interrupt_end:
@@ -698,8 +703,7 @@ finish_tlb_load:
/* Get the next_tlbcam_idx percpu var */
#ifdef CONFIG_SMP
- lwz r12, THREAD_INFO-THREAD(r12)
- lwz r15, TI_CPU(r12)
+ lwz r15, TASK_CPU-THREAD(r12)
lis r14, __per_cpu_offset@h
ori r14, r14, __per_cpu_offset@l
rlwinm r15, r15, 2, 0, 29
@@ -729,17 +733,12 @@ finish_tlb_load:
lwz r15, 0(r14)
100: stw r15, 0(r17)
- /*
- * Calc MAS1_TSIZE from r10 (which has pshift encoded)
- * tlb_enc = (pshift - 10).
- */
- subi r15, r10, 10
mfspr r16, SPRN_MAS1
- rlwimi r16, r15, 7, 20, 24
+ rlwimi r16, r10, MAS1_TSIZE_SHIFT, MAS1_TSIZE_MASK
mtspr SPRN_MAS1, r16
/* copy the pshift for use later */
- mr r14, r10
+ addi r14, r10, _PAGE_PSIZE_SHIFT_OFFSET
/* fall through */
@@ -766,14 +765,15 @@ BEGIN_MMU_FTR_SECTION
mtspr SPRN_MAS7, r10
END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS)
#else
- li r10, (_PAGE_EXEC | _PAGE_PRESENT)
+ li r10, (_PAGE_EXEC | _PAGE_READ)
mr r13, r11
rlwimi r10, r11, 31, 29, 29 /* extract _PAGE_DIRTY into SW */
and r12, r11, r10
- andi. r10, r11, _PAGE_USER /* Test for _PAGE_USER */
+ mcrf cr0, cr5 /* Test for user page */
slwi r10, r12, 1
or r10, r10, r12
- iseleq r12, r12, r10
+ rlwinm r10, r10, 0, ~_PAGE_EXEC /* Clear SX on user pages */
+ isellt r12, r10, r12
rlwimi r13, r12, 0, 20, 31 /* Get RPN from PTE, merge w/ perms */
mtspr SPRN_MAS3, r13
#endif
@@ -794,31 +794,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS)
#endif
3: mtspr SPRN_MAS2, r12
-#ifdef CONFIG_E200
- /* Round robin TLB1 entries assignment */
- mfspr r12, SPRN_MAS0
-
- /* Extract TLB1CFG(NENTRY) */
- mfspr r11, SPRN_TLB1CFG
- andi. r11, r11, 0xfff
-
- /* Extract MAS0(NV) */
- andi. r13, r12, 0xfff
- addi r13, r13, 1
- cmpw 0, r13, r11
- addi r12, r12, 1
-
- /* check if we need to wrap */
- blt 7f
-
- /* wrap back to first free tlbcam entry */
- lis r13, tlbcam_index@ha
- lwz r13, tlbcam_index@l(r13)
- rlwimi r12, r13, 0, 20, 31
-7:
- mtspr SPRN_MAS0,r12
-#endif /* CONFIG_E200 */
-
tlb_write_entry:
tlbwe
@@ -855,29 +830,6 @@ _GLOBAL(load_up_spe)
oris r5,r5,MSR_SPE@h
mtmsr r5 /* enable use of SPE now */
isync
-/*
- * For SMP, we don't do lazy SPE switching because it just gets too
- * horrendously complex, especially when a task switches from one CPU
- * to another. Instead we call giveup_spe in switch_to.
- */
-#ifndef CONFIG_SMP
- lis r3,last_task_used_spe@ha
- lwz r4,last_task_used_spe@l(r3)
- cmpi 0,r4,0
- beq 1f
- addi r4,r4,THREAD /* want THREAD of last_task_used_spe */
- SAVE_32EVRS(0,r10,r4,THREAD_EVR0)
- evxor evr10, evr10, evr10 /* clear out evr10 */
- evmwumiaa evr10, evr10, evr10 /* evr10 <- ACC = 0 * 0 + ACC */
- li r5,THREAD_ACC
- evstddx evr10, r4, r5 /* save off accumulator */
- lwz r5,PT_REGS(r4)
- lwz r4,_MSR-STACK_FRAME_OVERHEAD(r5)
- lis r10,MSR_SPE@h
- andc r4,r4,r10 /* disable SPE for previous task */
- stw r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
-#endif /* !CONFIG_SMP */
/* enable use of SPE after return */
oris r9,r9,MSR_SPE@h
mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */
@@ -887,17 +839,13 @@ _GLOBAL(load_up_spe)
evlddx evr4,r10,r5
evmra evr4,evr4
REST_32EVRS(0,r10,r5,THREAD_EVR0)
-#ifndef CONFIG_SMP
- subi r4,r5,THREAD
- stw r4,last_task_used_spe@l(r3)
-#endif /* !CONFIG_SMP */
blr
/*
* SPE unavailable trap from kernel - print a message, but let
* the task use SPE in the kernel until it returns to user mode.
*/
-KernelSPE:
+SYM_FUNC_START_LOCAL(KernelSPE)
lwz r3,_MSR(r1)
oris r3,r3,MSR_SPE@h
stw r3,_MSR(r1) /* enable use of SPE after return */
@@ -906,21 +854,22 @@ KernelSPE:
ori r3,r3,87f@l
mr r4,r2 /* current */
lwz r5,_NIP(r1)
- bl printk
+ bl _printk
#endif
- b ret_from_except
+ b interrupt_return
#ifdef CONFIG_PRINTK
87: .string "SPE used in kernel (task=%p, pc=%x) \n"
#endif
.align 4,0
+SYM_FUNC_END(KernelSPE)
#endif /* CONFIG_SPE */
/*
* Translate the effec addr in r3 to phys addr. The phys addr will be put
* into r3(higher 32bit) and r4(lower 32bit)
*/
-get_phys_addr:
+SYM_FUNC_START_LOCAL(get_phys_addr)
mfmsr r8
mfspr r9,SPRN_PID
rlwinm r9,r9,16,0x3fff0000 /* turn PID into MAS6[SPID] */
@@ -942,24 +891,14 @@ get_phys_addr:
mfspr r3,SPRN_MAS7
#endif
blr
+SYM_FUNC_END(get_phys_addr)
/*
* Global functions
*/
-/* Adjust or setup IVORs for e200 */
-_GLOBAL(__setup_e200_ivors)
- li r3,DebugDebug@l
- mtspr SPRN_IVOR15,r3
- li r3,SPEUnavailable@l
- mtspr SPRN_IVOR32,r3
- li r3,SPEFloatingPointData@l
- mtspr SPRN_IVOR33,r3
- li r3,SPEFloatingPointRound@l
- mtspr SPRN_IVOR34,r3
- sync
- blr
-
+#ifdef CONFIG_PPC_E500
+#ifndef CONFIG_PPC_E500MC
/* Adjust or setup IVORs for e500v1/v2 */
_GLOBAL(__setup_e500_ivors)
li r3,DebugCrit@l
@@ -974,7 +913,7 @@ _GLOBAL(__setup_e500_ivors)
mtspr SPRN_IVOR35,r3
sync
blr
-
+#else
/* Adjust or setup IVORs for e500mc */
_GLOBAL(__setup_e500mc_ivors)
li r3,DebugDebug@l
@@ -1000,19 +939,15 @@ _GLOBAL(__setup_ehv_ivors)
mtspr SPRN_IVOR41,r3
sync
blr
+#endif /* CONFIG_PPC_E500MC */
+#endif /* CONFIG_PPC_E500 */
#ifdef CONFIG_SPE
/*
- * extern void giveup_spe(struct task_struct *prev)
+ * extern void __giveup_spe(struct task_struct *prev)
*
*/
-_GLOBAL(giveup_spe)
- mfmsr r5
- oris r5,r5,MSR_SPE@h
- mtmsr r5 /* enable use of SPE now */
- isync
- cmpi 0,r3,0
- beqlr- /* if no previous owner, done */
+_GLOBAL(__giveup_spe)
addi r3,r3,THREAD /* want THREAD of task */
lwz r5,PT_REGS(r3)
cmpi 0,r5,0
@@ -1022,16 +957,11 @@ _GLOBAL(giveup_spe)
li r4,THREAD_ACC
evstddx evr6, r4, r3 /* save off accumulator */
beq 1f
- lwz r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+ lwz r4,_MSR-STACK_INT_FRAME_REGS(r5)
lis r3,MSR_SPE@h
andc r4,r4,r3 /* disable SPE for previous task */
- stw r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+ stw r4,_MSR-STACK_INT_FRAME_REGS(r5)
1:
-#ifndef CONFIG_SMP
- li r5,0
- lis r4,last_task_used_spe@ha
- stw r5,last_task_used_spe@l(r4)
-#endif /* !CONFIG_SMP */
blr
#endif /* CONFIG_SPE */
@@ -1053,94 +983,6 @@ _GLOBAL(abort)
mtspr SPRN_DBCR0,r13
isync
-_GLOBAL(set_context)
-
-#ifdef CONFIG_BDI_SWITCH
- /* Context switch the PTE pointer for the Abatron BDI2000.
- * The PGDIR is the second parameter.
- */
- lis r5, abatron_pteptrs@h
- ori r5, r5, abatron_pteptrs@l
- stw r4, 0x4(r5)
-#endif
- mtspr SPRN_PID,r3
- isync /* Force context change */
- blr
-
-_GLOBAL(flush_dcache_L1)
- mfspr r3,SPRN_L1CFG0
-
- rlwinm r5,r3,9,3 /* Extract cache block size */
- twlgti r5,1 /* Only 32 and 64 byte cache blocks
- * are currently defined.
- */
- li r4,32
- subfic r6,r5,2 /* r6 = log2(1KiB / cache block size) -
- * log2(number of ways)
- */
- slw r5,r4,r5 /* r5 = cache block size */
-
- rlwinm r7,r3,0,0xff /* Extract number of KiB in the cache */
- mulli r7,r7,13 /* An 8-way cache will require 13
- * loads per set.
- */
- slw r7,r7,r6
-
- /* save off HID0 and set DCFA */
- mfspr r8,SPRN_HID0
- ori r9,r8,HID0_DCFA@l
- mtspr SPRN_HID0,r9
- isync
-
- lis r4,KERNELBASE@h
- mtctr r7
-
-1: lwz r3,0(r4) /* Load... */
- add r4,r4,r5
- bdnz 1b
-
- msync
- lis r4,KERNELBASE@h
- mtctr r7
-
-1: dcbf 0,r4 /* ...and flush. */
- add r4,r4,r5
- bdnz 1b
-
- /* restore HID0 */
- mtspr SPRN_HID0,r8
- isync
-
- blr
-
-/* Flush L1 d-cache, invalidate and disable d-cache and i-cache */
-_GLOBAL(__flush_disable_L1)
- mflr r10
- bl flush_dcache_L1 /* Flush L1 d-cache */
- mtlr r10
-
- mfspr r4, SPRN_L1CSR0 /* Invalidate and disable d-cache */
- li r5, 2
- rlwimi r4, r5, 0, 3
-
- msync
- isync
- mtspr SPRN_L1CSR0, r4
- isync
-
-1: mfspr r4, SPRN_L1CSR0 /* Wait for the invalidate to finish */
- andi. r4, r4, 2
- bne 1b
-
- mfspr r4, SPRN_L1CSR1 /* Invalidate and disable i-cache */
- li r5, 2
- rlwimi r4, r5, 0, 3
-
- mtspr SPRN_L1CSR1, r4
- isync
-
- blr
-
#ifdef CONFIG_SMP
/* When we get here, r24 needs to hold the CPU # */
.globl __secondary_start
@@ -1163,7 +1005,12 @@ __secondary_start:
mr r5,r25 /* phys kernel start */
rlwinm r5,r5,0,~0x3ffffff /* aligned 64M */
subf r4,r5,r4 /* memstart_addr - phys kernel start */
- li r5,0 /* no device tree */
+ lis r7,KERNELBASE@h
+ ori r7,r7,KERNELBASE@l
+ cmpw r20,r7 /* if kernstart_virt_addr != KERNELBASE, randomized */
+ beq 2f
+ li r4,0
+2: li r5,0 /* no device tree */
li r6,0 /* not boot cpu */
bl restore_to_as0
@@ -1176,13 +1023,13 @@ __secondary_start:
mr r4,r24 /* Why? */
bl call_setup_cpu
- /* get current_thread_info and current */
- lis r1,secondary_ti@ha
- lwz r1,secondary_ti@l(r1)
- lwz r2,TI_TASK(r1)
+ /* get current's stack and current */
+ lis r2,secondary_current@ha
+ lwz r2,secondary_current@l(r2)
+ lwz r1,TASK_STACK(r2)
/* stack */
- addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
+ addi r1,r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE
li r0,0
stw r0,0(r1)
@@ -1211,6 +1058,54 @@ __secondary_hold_acknowledge:
#endif
/*
+ * Create a 64M tlb by address and entry
+ * r3 - entry
+ * r4 - virtual address
+ * r5/r6 - physical address
+ */
+_GLOBAL(create_kaslr_tlb_entry)
+ lis r7,0x1000 /* Set MAS0(TLBSEL) = 1 */
+ rlwimi r7,r3,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r6) */
+ mtspr SPRN_MAS0,r7 /* Write MAS0 */
+
+ lis r3,(MAS1_VALID|MAS1_IPROT)@h
+ ori r3,r3,(MAS1_TSIZE(BOOK3E_PAGESZ_64M))@l
+ mtspr SPRN_MAS1,r3 /* Write MAS1 */
+
+ lis r3,MAS2_EPN_MASK(BOOK3E_PAGESZ_64M)@h
+ ori r3,r3,MAS2_EPN_MASK(BOOK3E_PAGESZ_64M)@l
+ and r3,r3,r4
+ ori r3,r3,MAS2_M_IF_NEEDED@l
+ mtspr SPRN_MAS2,r3 /* Write MAS2(EPN) */
+
+#ifdef CONFIG_PHYS_64BIT
+ ori r8,r6,(MAS3_SW|MAS3_SR|MAS3_SX)
+ mtspr SPRN_MAS3,r8 /* Write MAS3(RPN) */
+ mtspr SPRN_MAS7,r5
+#else
+ ori r8,r5,(MAS3_SW|MAS3_SR|MAS3_SX)
+ mtspr SPRN_MAS3,r8 /* Write MAS3(RPN) */
+#endif
+
+ tlbwe /* Write TLB */
+ isync
+ sync
+ blr
+
+/*
+ * Return to the start of the relocated kernel and run again
+ * r3 - virtual address of fdt
+ * r4 - entry of the kernel
+ */
+_GLOBAL(reloc_kernel_entry)
+ mfmsr r7
+ rlwinm r7, r7, 0, ~(MSR_IS | MSR_DS)
+
+ mtspr SPRN_SRR0,r4
+ mtspr SPRN_SRR1,r7
+ rfi
+
+/*
* Create a tlb entry with the same effective and physical address as
* the tlb entry used by the current running code. But set the TS to 1.
* Then switch to the address space 1. It will return with the r3 set to
@@ -1235,7 +1130,7 @@ _GLOBAL(switch_to_as1)
bne 1b
/* Get the tlb entry used by the current running code */
- bl 0f
+ bcl 20,31,$+4
0: mflr r4
tlbsx 0,r4
@@ -1269,7 +1164,7 @@ _GLOBAL(switch_to_as1)
_GLOBAL(restore_to_as0)
mflr r0
- bl 0f
+ bcl 20,31,$+4
0: mflr r9
addi r9,r9,1f - 0b
@@ -1315,25 +1210,3 @@ _GLOBAL(restore_to_as0)
*/
3: mr r3,r5
bl _start
-
-/*
- * We put a few things here that have to be page-aligned. This stuff
- * goes at the beginning of the data segment, which is page-aligned.
- */
- .data
- .align 12
- .globl sdata
-sdata:
- .globl empty_zero_page
-empty_zero_page:
- .space 4096
- .globl swapper_pg_dir
-swapper_pg_dir:
- .space PGD_TABLE_SIZE
-
-/*
- * Room for two PTE pointers, usually the kernel and current user pointers
- * to their respective root page table.
- */
-abatron_pteptrs:
- .space 8
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 7ee876d2adb5..393e19ee1322 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* PowerPC version
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
@@ -11,38 +12,40 @@
*
* This file contains low-level support and setup for PowerPC 8xx
* embedded processors, including trap and interrupt dispatch.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
#include <linux/init.h>
+#include <linux/magic.h>
+#include <linux/pgtable.h>
+#include <linux/sizes.h>
+#include <linux/linkage.h>
+
#include <asm/processor.h>
#include <asm/page.h>
#include <asm/mmu.h>
#include <asm/cache.h>
-#include <asm/pgtable.h>
#include <asm/cputable.h>
#include <asm/thread_info.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/ptrace.h>
+#include <asm/code-patching-asm.h>
+#include <asm/interrupt.h>
+
+/*
+ * Value for the bits that have fixed value in RPN entries.
+ * Also used for tagging DAR for DTLBerror.
+ */
+#define RPN_PATTERN 0x00f0
+
+#include "head_32.h"
+
+#define PAGE_SHIFT_512K 19
+#define PAGE_SHIFT_8M 23
-/* Macro to make the code more readable. */
-#ifdef CONFIG_8xx_CPU6
-#define DO_8xx_CPU6(val, reg) \
- li reg, val; \
- stw reg, 12(r0); \
- lwz reg, 12(r0);
-#else
-#define DO_8xx_CPU6(val, reg)
-#endif
__HEAD
-_ENTRY(_stext);
-_ENTRY(_start);
+_GLOBAL(_stext);
+_GLOBAL(_start);
/* MPC8xx
* This port was done on an MBX board with an 860. Right now I only
@@ -65,13 +68,6 @@ _ENTRY(_start);
* 8M 1:1. I also mapped an additional I/O space 1:1 so we can get to
* the "internal" processor registers before MMU_init is called.
*
- * The TLB code currently contains a major hack. Since I use the condition
- * code register, I have to save and restore it. I am out of registers, so
- * I just store it in memory location 0 (the TLB handlers are not reentrant).
- * To avoid making any decisions, I need to use the "segment" valid bit
- * in the first level table, but that would require many changes to the
- * Linux page directory/table functions that I don't want to do right now.
- *
* -- Dan
*/
.globl __start
@@ -94,541 +90,354 @@ turn_on_mmu:
lis r0,start_here@h
ori r0,r0,start_here@l
mtspr SPRN_SRR0,r0
- SYNC
rfi /* enables MMU */
-/*
- * Exception entry code. This code runs with address translation
- * turned off, i.e. using physical addresses.
- * We assume sprg3 has the physical address of the current
- * task's thread_struct.
- */
-#define EXCEPTION_PROLOG \
- mtspr SPRN_SPRG_SCRATCH0,r10; \
- mtspr SPRN_SPRG_SCRATCH1,r11; \
- mfcr r10; \
- EXCEPTION_PROLOG_1; \
- EXCEPTION_PROLOG_2
-
-#define EXCEPTION_PROLOG_1 \
- mfspr r11,SPRN_SRR1; /* check whether user or kernel */ \
- andi. r11,r11,MSR_PR; \
- tophys(r11,r1); /* use tophys(r1) if kernel */ \
- beq 1f; \
- mfspr r11,SPRN_SPRG_THREAD; \
- lwz r11,THREAD_INFO-THREAD(r11); \
- addi r11,r11,THREAD_SIZE; \
- tophys(r11,r11); \
-1: subi r11,r11,INT_FRAME_SIZE /* alloc exc. frame */
-
-
-#define EXCEPTION_PROLOG_2 \
- CLR_TOP32(r11); \
- stw r10,_CCR(r11); /* save registers */ \
- stw r12,GPR12(r11); \
- stw r9,GPR9(r11); \
- mfspr r10,SPRN_SPRG_SCRATCH0; \
- stw r10,GPR10(r11); \
- mfspr r12,SPRN_SPRG_SCRATCH1; \
- stw r12,GPR11(r11); \
- mflr r10; \
- stw r10,_LINK(r11); \
- mfspr r12,SPRN_SRR0; \
- mfspr r9,SPRN_SRR1; \
- stw r1,GPR1(r11); \
- stw r1,0(r11); \
- tovirt(r1,r11); /* set new kernel sp */ \
- li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR); /* can take exceptions */ \
- MTMSRD(r10); /* (except for mach check in rtas) */ \
- stw r0,GPR0(r11); \
- SAVE_4GPRS(3, r11); \
- SAVE_2GPRS(7, r11)
-/*
- * Note: code which follows this uses cr0.eq (set if from kernel),
- * r11, r12 (SRR0), and r9 (SRR1).
- *
- * Note2: once we have set r1 we are in a position to take exceptions
- * again, and we could thus set MSR:RI at that point.
- */
+#ifdef CONFIG_PERF_EVENTS
+ .align 4
-/*
- * Exception vectors.
- */
-#define EXCEPTION(n, label, hdlr, xfer) \
- . = n; \
-label: \
- EXCEPTION_PROLOG; \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- xfer(n, hdlr)
-
-#define EXC_XFER_TEMPLATE(n, hdlr, trap, copyee, tfer, ret) \
- li r10,trap; \
- stw r10,_TRAP(r11); \
- li r10,MSR_KERNEL; \
- copyee(r10, r9); \
- bl tfer; \
-i##n: \
- .long hdlr; \
- .long ret
-
-#define COPY_EE(d, s) rlwimi d,s,0,16,16
-#define NOCOPY(d, s)
-
-#define EXC_XFER_STD(n, hdlr) \
- EXC_XFER_TEMPLATE(n, hdlr, n, NOCOPY, transfer_to_handler_full, \
- ret_from_except_full)
-
-#define EXC_XFER_LITE(n, hdlr) \
- EXC_XFER_TEMPLATE(n, hdlr, n+1, NOCOPY, transfer_to_handler, \
- ret_from_except)
-
-#define EXC_XFER_EE(n, hdlr) \
- EXC_XFER_TEMPLATE(n, hdlr, n, COPY_EE, transfer_to_handler_full, \
- ret_from_except_full)
-
-#define EXC_XFER_EE_LITE(n, hdlr) \
- EXC_XFER_TEMPLATE(n, hdlr, n+1, COPY_EE, transfer_to_handler, \
- ret_from_except)
+ .globl itlb_miss_counter
+itlb_miss_counter:
+ .space 4
+
+ .globl dtlb_miss_counter
+dtlb_miss_counter:
+ .space 4
+
+ .globl instruction_counter
+instruction_counter:
+ .space 4
+#endif
/* System reset */
- EXCEPTION(0x100, Reset, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(INTERRUPT_SYSTEM_RESET, Reset, system_reset_exception)
/* Machine check */
- . = 0x200
-MachineCheck:
- EXCEPTION_PROLOG
- mfspr r4,SPRN_DAR
- stw r4,_DAR(r11)
- li r5,0x00f0
- mtspr SPRN_DAR,r5 /* Tag DAR, to be used in DTLB Error */
- mfspr r5,SPRN_DSISR
- stw r5,_DSISR(r11)
- addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_STD(0x200, machine_check_exception)
-
-/* Data access exception.
- * This is "never generated" by the MPC8xx. We jump to it for other
- * translation errors.
- */
- . = 0x300
-DataAccess:
- EXCEPTION_PROLOG
- mfspr r10,SPRN_DSISR
- stw r10,_DSISR(r11)
- mr r5,r10
- mfspr r4,SPRN_DAR
- li r10,0x00f0
- mtspr SPRN_DAR,r10 /* Tag DAR, to be used in DTLB Error */
- EXC_XFER_LITE(0x300, handle_page_fault)
-
-/* Instruction access exception.
- * This is "never generated" by the MPC8xx. We jump to it for other
- * translation errors.
- */
- . = 0x400
-InstructionAccess:
- EXCEPTION_PROLOG
- mr r4,r12
- mr r5,r9
- EXC_XFER_LITE(0x400, handle_page_fault)
+ START_EXCEPTION(INTERRUPT_MACHINE_CHECK, MachineCheck)
+ EXCEPTION_PROLOG INTERRUPT_MACHINE_CHECK MachineCheck handle_dar_dsisr=1
+ prepare_transfer_to_handler
+ bl machine_check_exception
+ b interrupt_return
/* External interrupt */
- EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE)
+ EXCEPTION(INTERRUPT_EXTERNAL, HardwareInterrupt, do_IRQ)
/* Alignment exception */
- . = 0x600
-Alignment:
- EXCEPTION_PROLOG
- mfspr r4,SPRN_DAR
- stw r4,_DAR(r11)
- li r5,0x00f0
- mtspr SPRN_DAR,r5 /* Tag DAR, to be used in DTLB Error */
- mfspr r5,SPRN_DSISR
- stw r5,_DSISR(r11)
- addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_EE(0x600, alignment_exception)
+ START_EXCEPTION(INTERRUPT_ALIGNMENT, Alignment)
+ EXCEPTION_PROLOG INTERRUPT_ALIGNMENT Alignment handle_dar_dsisr=1
+ prepare_transfer_to_handler
+ bl alignment_exception
+ REST_NVGPRS(r1)
+ b interrupt_return
/* Program check exception */
- EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD)
-
-/* No FPU on MPC8xx. This exception is not supposed to happen.
-*/
- EXCEPTION(0x800, FPUnavailable, unknown_exception, EXC_XFER_STD)
+ START_EXCEPTION(INTERRUPT_PROGRAM, ProgramCheck)
+ EXCEPTION_PROLOG INTERRUPT_PROGRAM ProgramCheck
+ prepare_transfer_to_handler
+ bl program_check_exception
+ REST_NVGPRS(r1)
+ b interrupt_return
/* Decrementer */
- EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE)
-
- EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(INTERRUPT_DECREMENTER, Decrementer, timer_interrupt)
/* System call */
- . = 0xc00
-SystemCall:
- EXCEPTION_PROLOG
- EXC_XFER_EE_LITE(0xc00, DoSyscall)
+ START_EXCEPTION(INTERRUPT_SYSCALL, SystemCall)
+ SYSCALL_ENTRY INTERRUPT_SYSCALL
/* Single step - not used on 601 */
- EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD)
- EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0xf00, Trap_0f, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(INTERRUPT_TRACE, SingleStep, single_step_exception)
/* On the MPC8xx, this is a software emulation interrupt. It occurs
* for all unimplemented and illegal instructions.
*/
- EXCEPTION(0x1000, SoftEmu, SoftwareEmulation, EXC_XFER_STD)
+ START_EXCEPTION(INTERRUPT_SOFT_EMU_8xx, SoftEmu)
+ EXCEPTION_PROLOG INTERRUPT_SOFT_EMU_8xx SoftEmu
+ prepare_transfer_to_handler
+ bl emulation_assist_interrupt
+ REST_NVGPRS(r1)
+ b interrupt_return
- . = 0x1100
/*
* For the MPC8xx, this is a software tablewalk to load the instruction
- * TLB. It is modelled after the example in the Motorola manual. The task
- * switch loads the M_TWB register with the pointer to the first level table.
- * If we discover there is no second level table (value is zero) or if there
+ * TLB. The task switch loads the M_TWB register with the pointer to the first
+ * level table.
+ * If there is no second level table (value is zero) or if there
* is an invalid pte, we load that into the TLB, which causes another fault
* into the TLB Error interrupt where we can handle such problems.
* We have to use the MD_xxx registers for the tablewalk because the
* equivalent MI_xxx registers only perform the attribute functions.
*/
-InstructionTLBMiss:
-#ifdef CONFIG_8xx_CPU6
- stw r3, 8(r0)
-#endif
- DO_8xx_CPU6(0x3f80, r3)
- mtspr SPRN_M_TW, r10 /* Save a couple of working registers */
- mfcr r10
-#ifdef CONFIG_8xx_CPU6
- stw r10, 0(r0)
- stw r11, 4(r0)
-#else
- mtspr SPRN_DAR, r10
- mtspr SPRN_SPRG2, r11
-#endif
- mfspr r10, SPRN_SRR0 /* Get effective address of fault */
+
#ifdef CONFIG_8xx_CPU15
- addi r11, r10, 0x1000
- tlbie r11
- addi r11, r10, -0x1000
- tlbie r11
+#define INVALIDATE_ADJACENT_PAGES_CPU15(addr, tmp) \
+ addi tmp, addr, PAGE_SIZE; \
+ tlbie tmp; \
+ addi tmp, addr, -PAGE_SIZE; \
+ tlbie tmp
+#else
+#define INVALIDATE_ADJACENT_PAGES_CPU15(addr, tmp)
#endif
- DO_8xx_CPU6(0x3780, r3)
- mtspr SPRN_MD_EPN, r10 /* Have to use MD_EPN for walk, MI_EPN can't */
- mfspr r10, SPRN_M_TWB /* Get level 1 table entry address */
- /* If we are faulting a kernel address, we have to use the
- * kernel page tables.
- */
-#ifdef CONFIG_MODULES
- /* Only modules will cause ITLB Misses as we always
- * pin the first 8MB of kernel memory */
- andi. r11, r10, 0x0800 /* Address >= 0x80000000 */
- beq 3f
- lis r11, swapper_pg_dir@h
- ori r11, r11, swapper_pg_dir@l
- rlwimi r10, r11, 0, 2, 19
-3:
-#endif
- lwz r11, 0(r10) /* Get the level 1 entry */
- rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */
- beq 2f /* If zero, don't try to find a pte */
+ START_EXCEPTION(INTERRUPT_INST_TLB_MISS_8xx, InstructionTLBMiss)
+ mtspr SPRN_SPRG_SCRATCH2, r10
+ mtspr SPRN_M_TW, r11
- /* We have a pte table, so load the MI_TWC with the attributes
- * for this "segment."
- */
- ori r11,r11,1 /* Set valid bit */
- DO_8xx_CPU6(0x2b80, r3)
- mtspr SPRN_MI_TWC, r11 /* Set segment attributes */
- DO_8xx_CPU6(0x3b80, r3)
- mtspr SPRN_MD_TWC, r11 /* Load pte table base address */
- mfspr r11, SPRN_MD_TWC /* ....and get the pte address */
- lwz r10, 0(r11) /* Get the pte */
-
-#ifdef CONFIG_SWAP
- andi. r11, r10, _PAGE_ACCESSED | _PAGE_PRESENT
- cmpwi cr0, r11, _PAGE_ACCESSED | _PAGE_PRESENT
- bne- cr0, 2f
-#endif
+ mfspr r10, SPRN_SRR0 /* Get effective address of fault */
+ INVALIDATE_ADJACENT_PAGES_CPU15(r10, r11)
+ mtspr SPRN_MD_EPN, r10
+ mfspr r10, SPRN_M_TWB /* Get level 1 table */
+ lwz r11, 0(r10) /* Get level 1 entry */
+ mtspr SPRN_MD_TWC, r11
+ mfspr r10, SPRN_MD_TWC
+ lwz r10, 0(r10) /* Get the pte */
+ rlwimi r11, r10, 0, _PAGE_GUARDED | _PAGE_ACCESSED
+ rlwimi r11, r10, 32 - 9, _PMD_PAGE_512K
+ mtspr SPRN_MI_TWC, r11
/* The Linux PTE won't go exactly into the MMU TLB.
- * Software indicator bits 21 and 28 must be clear.
- * Software indicator bits 24, 25, 26, and 27 must be
+ * Software indicator bits 20 and 23 must be clear.
+ * Software indicator bits 22, 24, 25, 26, and 27 must be
* set. All other Linux PTE bits control the behavior
* of the MMU.
*/
- li r11, 0x00f0
- rlwimi r10, r11, 0, 0x07f8 /* Set 24-27, clear 21-23,28 */
- DO_8xx_CPU6(0x2d80, r3)
+ rlwinm r10, r10, 0, ~0x0f00 /* Clear bits 20-23 */
+ rlwimi r10, r10, 4, 0x0400 /* Copy _PAGE_EXEC into bit 21 */
+ ori r10, r10, RPN_PATTERN | 0x200 /* Set 22 and 24-27 */
mtspr SPRN_MI_RPN, r10 /* Update TLB entry */
/* Restore registers */
-#ifndef CONFIG_8xx_CPU6
- mfspr r10, SPRN_DAR
- mtcr r10
- mtspr SPRN_DAR, r11 /* Tag DAR */
- mfspr r11, SPRN_SPRG2
-#else
- lwz r11, 0(r0)
- mtcr r11
- lwz r11, 4(r0)
- lwz r3, 8(r0)
-#endif
- mfspr r10, SPRN_M_TW
+0: mfspr r10, SPRN_SPRG_SCRATCH2
+ mfspr r11, SPRN_M_TW
+ rfi
+ patch_site 0b, patch__itlbmiss_exit_1
+
+#ifdef CONFIG_PERF_EVENTS
+ patch_site 0f, patch__itlbmiss_perf
+0: lwz r10, (itlb_miss_counter - PAGE_OFFSET)@l(0)
+ addi r10, r10, 1
+ stw r10, (itlb_miss_counter - PAGE_OFFSET)@l(0)
+ mfspr r10, SPRN_SPRG_SCRATCH2
+ mfspr r11, SPRN_M_TW
rfi
-2:
- mfspr r11, SPRN_SRR1
- /* clear all error bits as TLB Miss
- * sets a few unconditionally
- */
- rlwinm r11, r11, 0, 0xffff
- mtspr SPRN_SRR1, r11
-
- /* Restore registers */
-#ifndef CONFIG_8xx_CPU6
- mfspr r10, SPRN_DAR
- mtcr r10
- li r11, 0x00f0
- mtspr SPRN_DAR, r11 /* Tag DAR */
- mfspr r11, SPRN_SPRG2
-#else
- lwz r11, 0(r0)
- mtcr r11
- lwz r11, 4(r0)
- lwz r3, 8(r0)
#endif
- mfspr r10, SPRN_M_TW
- b InstructionAccess
- . = 0x1200
-DataStoreTLBMiss:
-#ifdef CONFIG_8xx_CPU6
- stw r3, 8(r0)
-#endif
- DO_8xx_CPU6(0x3f80, r3)
- mtspr SPRN_M_TW, r10 /* Save a couple of working registers */
- mfcr r10
-#ifdef CONFIG_8xx_CPU6
- stw r10, 0(r0)
- stw r11, 4(r0)
-#else
- mtspr SPRN_DAR, r10
- mtspr SPRN_SPRG2, r11
-#endif
- mfspr r10, SPRN_M_TWB /* Get level 1 table entry address */
+ START_EXCEPTION(INTERRUPT_DATA_TLB_MISS_8xx, DataStoreTLBMiss)
+ mtspr SPRN_SPRG_SCRATCH2, r10
+ mtspr SPRN_M_TW, r11
- /* If we are faulting a kernel address, we have to use the
- * kernel page tables.
- */
- andi. r11, r10, 0x0800
- beq 3f
- lis r11, swapper_pg_dir@h
- ori r11, r11, swapper_pg_dir@l
- rlwimi r10, r11, 0, 2, 19
-3:
- lwz r11, 0(r10) /* Get the level 1 entry */
- rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */
- beq 2f /* If zero, don't try to find a pte */
+ mfspr r10, SPRN_M_TWB /* Get level 1 table */
+ lwz r11, 0(r10) /* Get level 1 entry */
- /* We have a pte table, so load fetch the pte from the table.
- */
- ori r11, r11, 1 /* Set valid bit in physical L2 page */
- DO_8xx_CPU6(0x3b80, r3)
- mtspr SPRN_MD_TWC, r11 /* Load pte table base address */
- mfspr r10, SPRN_MD_TWC /* ....and get the pte address */
+ mtspr SPRN_MD_TWC, r11
+ mfspr r10, SPRN_MD_TWC
lwz r10, 0(r10) /* Get the pte */
- /* Insert the Guarded flag into the TWC from the Linux PTE.
+ /* Insert Guarded and Accessed flags into the TWC from the Linux PTE.
* It is bit 27 of both the Linux PTE and the TWC (at least
* I got that right :-). It will be better when we can put
* this into the Linux pgd/pmd and load it in the operation
* above.
*/
- rlwimi r11, r10, 0, 27, 27
- /* Insert the WriteThru flag into the TWC from the Linux PTE.
- * It is bit 25 in the Linux PTE and bit 30 in the TWC
- */
- rlwimi r11, r10, 32-5, 30, 30
- DO_8xx_CPU6(0x3b80, r3)
+ rlwimi r11, r10, 0, _PAGE_GUARDED | _PAGE_ACCESSED
+ rlwimi r11, r10, 32 - 9, _PMD_PAGE_512K
mtspr SPRN_MD_TWC, r11
- /* Both _PAGE_ACCESSED and _PAGE_PRESENT has to be set.
- * We also need to know if the insn is a load/store, so:
- * Clear _PAGE_PRESENT and load that which will
- * trap into DTLB Error with store bit set accordinly.
- */
- /* PRESENT=0x1, ACCESSED=0x20
- * r11 = ((r10 & PRESENT) & ((r10 & ACCESSED) >> 5));
- * r10 = (r10 & ~PRESENT) | r11;
- */
-#ifdef CONFIG_SWAP
- rlwinm r11, r10, 32-5, _PAGE_PRESENT
- and r11, r11, r10
- rlwimi r10, r11, 0, _PAGE_PRESENT
-#endif
- /* Honour kernel RO, User NA */
- /* 0x200 == Extended encoding, bit 22 */
- rlwimi r10, r10, 32-2, 0x200 /* Copy USER to bit 22, 0x200 */
- /* r11 = (r10 & _PAGE_RW) >> 1 */
- rlwinm r11, r10, 32-1, 0x200
- or r10, r11, r10
- /* invert RW and 0x200 bits */
- xori r10, r10, _PAGE_RW | 0x200
-
/* The Linux PTE won't go exactly into the MMU TLB.
- * Software indicator bits 22 and 28 must be clear.
* Software indicator bits 24, 25, 26, and 27 must be
* set. All other Linux PTE bits control the behavior
* of the MMU.
*/
-2: li r11, 0x00f0
- rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */
- DO_8xx_CPU6(0x3d80, r3)
+ li r11, RPN_PATTERN
+ rlwimi r10, r11, 0, 24, 27 /* Set 24-27 */
mtspr SPRN_MD_RPN, r10 /* Update TLB entry */
+ mtspr SPRN_DAR, r11 /* Tag DAR */
/* Restore registers */
-#ifndef CONFIG_8xx_CPU6
- mfspr r10, SPRN_DAR
- mtcr r10
- mtspr SPRN_DAR, r11 /* Tag DAR */
- mfspr r11, SPRN_SPRG2
-#else
- mtspr SPRN_DAR, r11 /* Tag DAR */
- lwz r11, 0(r0)
- mtcr r11
- lwz r11, 4(r0)
- lwz r3, 8(r0)
-#endif
- mfspr r10, SPRN_M_TW
+
+0: mfspr r10, SPRN_SPRG_SCRATCH2
+ mfspr r11, SPRN_M_TW
rfi
+ patch_site 0b, patch__dtlbmiss_exit_1
+
+#ifdef CONFIG_PERF_EVENTS
+ patch_site 0f, patch__dtlbmiss_perf
+0: lwz r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0)
+ addi r10, r10, 1
+ stw r10, (dtlb_miss_counter - PAGE_OFFSET)@l(0)
+ mfspr r10, SPRN_SPRG_SCRATCH2
+ mfspr r11, SPRN_M_TW
+ rfi
+#endif
/* This is an instruction TLB error on the MPC8xx. This could be due
* to many reasons, such as executing guarded memory or illegal instruction
* addresses. There is nothing to do but handle a big time error fault.
*/
- . = 0x1300
-InstructionTLBError:
- b InstructionAccess
+ START_EXCEPTION(INTERRUPT_INST_TLB_ERROR_8xx, InstructionTLBError)
+ /* 0x400 is InstructionAccess exception, needed by bad_page_fault() */
+ EXCEPTION_PROLOG INTERRUPT_INST_STORAGE InstructionTLBError
+ andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */
+ andis. r10,r9,SRR1_ISI_NOPT@h
+ beq+ .Litlbie
+ tlbie r12
+.Litlbie:
+ stw r12, _DAR(r11)
+ stw r5, _DSISR(r11)
+ prepare_transfer_to_handler
+ bl do_page_fault
+ b interrupt_return
/* This is the data TLB error on the MPC8xx. This could be due to
- * many reasons, including a dirty update to a pte. We can catch that
- * one here, but anything else is an error. First, we track down the
- * Linux pte. If it is valid, write access is allowed, but the
- * page dirty bit is not set, we will set it and reload the TLB. For
- * any other case, we bail out to a higher level function that can
- * handle it.
+ * many reasons, including a dirty update to a pte. We bail out to
+ * a higher level function that can handle it.
*/
- . = 0x1400
-DataTLBError:
-#ifdef CONFIG_8xx_CPU6
- stw r3, 8(r0)
-#endif
- DO_8xx_CPU6(0x3f80, r3)
- mtspr SPRN_M_TW, r10 /* Save a couple of working registers */
- mfcr r10
- stw r10, 0(r0)
- stw r11, 4(r0)
-
- mfspr r10, SPRN_DAR
- cmpwi cr0, r10, 0x00f0
- beq- FixupDAR /* must be a buggy dcbX, icbi insn. */
-DARFixed:/* Return from dcbx instruction bug workaround, r10 holds value of DAR */
- mfspr r10, SPRN_M_TW /* Restore registers */
- lwz r11, 0(r0)
- mtcr r11
- lwz r11, 4(r0)
-#ifdef CONFIG_8xx_CPU6
- lwz r3, 8(r0)
+ START_EXCEPTION(INTERRUPT_DATA_TLB_ERROR_8xx, DataTLBError)
+ EXCEPTION_PROLOG_0 handle_dar_dsisr=1
+ mfspr r11, SPRN_DAR
+ cmpwi cr1, r11, RPN_PATTERN
+ beq- cr1, FixupDAR /* must be a buggy dcbX, icbi insn. */
+DARFixed:/* Return from dcbx instruction bug workaround */
+ mfspr r11, SPRN_DSISR
+ rlwinm r11, r11, 0, DSISR_NOHPTE
+ cmpwi cr1, r11, 0
+ beq+ cr1, .Ldtlbie
+ mfspr r11, SPRN_DAR
+ tlbie r11
+ rlwinm r11, r11, 16, 0xffff
+ cmplwi cr1, r11, TASK_SIZE@h
+ bge- cr1, FixupPGD
+.Ldtlbie:
+ EXCEPTION_PROLOG_1
+ /* 0x300 is DataAccess exception, needed by bad_page_fault() */
+ EXCEPTION_PROLOG_2 INTERRUPT_DATA_STORAGE DataTLBError handle_dar_dsisr=1
+ prepare_transfer_to_handler
+ bl do_page_fault
+ b interrupt_return
+
+#ifdef CONFIG_VMAP_STACK
+ vmap_stack_overflow_exception
#endif
- b DataAccess
-
- EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_EE)
/* On the MPC8xx, these next four traps are used for development
* support of breakpoints and such. Someday I will get around to
* using them.
*/
- EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_EE)
+ START_EXCEPTION(INTERRUPT_DATA_BREAKPOINT_8xx, DataBreakpoint)
+ EXCEPTION_PROLOG_0 handle_dar_dsisr=1
+ mfspr r11, SPRN_SRR0
+ cmplwi cr1, r11, (.Ldtlbie - PAGE_OFFSET)@l
+ cmplwi cr7, r11, (.Litlbie - PAGE_OFFSET)@l
+ cror 4*cr1+eq, 4*cr1+eq, 4*cr7+eq
+ bne cr1, 1f
+ mtcr r10
+ mfspr r10, SPRN_SPRG_SCRATCH0
+ mfspr r11, SPRN_SPRG_SCRATCH1
+ rfi
+
+1: EXCEPTION_PROLOG_1
+ EXCEPTION_PROLOG_2 INTERRUPT_DATA_BREAKPOINT_8xx DataBreakpoint handle_dar_dsisr=1
+ mfspr r4,SPRN_BAR
+ stw r4,_DAR(r11)
+ prepare_transfer_to_handler
+ bl do_break
+ REST_NVGPRS(r1)
+ b interrupt_return
+
+#ifdef CONFIG_PERF_EVENTS
+ START_EXCEPTION(INTERRUPT_INST_BREAKPOINT_8xx, InstructionBreakpoint)
+ mtspr SPRN_SPRG_SCRATCH0, r10
+ lwz r10, (instruction_counter - PAGE_OFFSET)@l(0)
+ addi r10, r10, -1
+ stw r10, (instruction_counter - PAGE_OFFSET)@l(0)
+ lis r10, 0xffff
+ ori r10, r10, 0x01
+ mtspr SPRN_COUNTA, r10
+ mfspr r10, SPRN_SPRG_SCRATCH0
+ rfi
+#else
+ EXCEPTION(INTERRUPT_INST_BREAKPOINT_8xx, Trap_1d, unknown_exception)
+#endif
+ EXCEPTION(0x1e00, Trap_1e, unknown_exception)
+ EXCEPTION(0x1f00, Trap_1f, unknown_exception)
+ __HEAD
. = 0x2000
+FixupPGD:
+ mtspr SPRN_M_TW, r10
+ mfspr r10, SPRN_DAR
+ mtspr SPRN_MD_EPN, r10
+ mfspr r11, SPRN_M_TWB /* Get level 1 table */
+ lwz r10, 0(r11) /* Get the level 1 entry */
+ cmpwi cr1, r10, 0
+ bne cr1, 1f
+
+ rlwinm r10, r11, 0, 20, 31
+ oris r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha
+ lwz r10, (swapper_pg_dir - PAGE_OFFSET)@l(r10) /* Get the level 1 entry */
+ cmpwi cr1, r10, 0
+ beq cr1, 1f
+ stw r10, 0(r11) /* Set the level 1 entry */
+ mfspr r10, SPRN_M_TW
+ mtcr r10
+ mfspr r10, SPRN_SPRG_SCRATCH0
+ mfspr r11, SPRN_SPRG_SCRATCH1
+ rfi
+1:
+ mfspr r10, SPRN_M_TW
+ b .Ldtlbie
+
/* This is the procedure to calculate the data EA for buggy dcbx,dcbi instructions
* by decoding the registers used by the dcbx instruction and adding them.
- * DAR is set to the calculated address and r10 also holds the EA on exit.
+ * DAR is set to the calculated address.
*/
- /* define if you don't want to use self modifying code */
-#define NO_SELF_MODIFYING_CODE
FixupDAR:/* Entry point for dcbx workaround. */
+ mtspr SPRN_M_TW, r10
/* fetch instruction from memory. */
mfspr r10, SPRN_SRR0
- andis. r11, r10, 0x8000 /* Address >= 0x80000000 */
- DO_8xx_CPU6(0x3780, r3)
mtspr SPRN_MD_EPN, r10
- mfspr r11, SPRN_M_TWB /* Get level 1 table entry address */
- beq- 3f /* Branch if user space */
- lis r11, (swapper_pg_dir-PAGE_OFFSET)@h
- ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l
- rlwimi r11, r10, 32-20, 0xffc /* r11 = r11&~0xffc|(r10>>20)&0xffc */
-3: lwz r11, 0(r11) /* Get the level 1 entry */
- DO_8xx_CPU6(0x3b80, r3)
- mtspr SPRN_MD_TWC, r11 /* Load pte table base address */
- mfspr r11, SPRN_MD_TWC /* ....and get the pte address */
+ rlwinm r11, r10, 16, 0xfff8
+ cmpli cr1, r11, TASK_SIZE@h
+ mfspr r11, SPRN_M_TWB /* Get level 1 table */
+ blt+ cr1, 3f
+
+ /* create physical page address from effective address */
+ tophys(r11, r10)
+ mfspr r11, SPRN_M_TWB /* Get level 1 table */
+ rlwinm r11, r11, 0, 20, 31
+ oris r11, r11, (swapper_pg_dir - PAGE_OFFSET)@h
+ ori r11, r11, (swapper_pg_dir - PAGE_OFFSET)@l
+3:
+ lwz r11, 0(r11) /* Get the level 1 entry */
+ rlwinm r11, r11, 0, ~_PMD_PAGE_8M
+ mtspr SPRN_MD_TWC, r11
+ mfspr r11, SPRN_MD_TWC
lwz r11, 0(r11) /* Get the pte */
/* concat physical page address(r11) and page offset(r10) */
- rlwimi r11, r10, 0, 20, 31
+ rlwimi r11, r10, 0, 32 - PAGE_SHIFT, 31
lwz r11,0(r11)
/* Check if it really is a dcbx instruction. */
/* dcbt and dcbtst does not generate DTLB Misses/Errors,
* no need to include them here */
- srwi r10, r11, 26 /* check if major OP code is 31 */
- cmpwi cr0, r10, 31
- bne- 141f
- rlwinm r10, r11, 0, 21, 30
- cmpwi cr0, r10, 2028 /* Is dcbz? */
- beq+ 142f
- cmpwi cr0, r10, 940 /* Is dcbi? */
- beq+ 142f
- cmpwi cr0, r10, 108 /* Is dcbst? */
- beq+ 144f /* Fix up store bit! */
- cmpwi cr0, r10, 172 /* Is dcbf? */
- beq+ 142f
- cmpwi cr0, r10, 1964 /* Is icbi? */
- beq+ 142f
-141: mfspr r10, SPRN_DAR /* r10 must hold DAR at exit */
+ xoris r10, r11, 0x7c00 /* check if major OP code is 31 */
+ rlwinm r10, r10, 0, 21, 5
+ cmpwi cr1, r10, 2028 /* Is dcbz? */
+ beq+ cr1, 142f
+ cmpwi cr1, r10, 940 /* Is dcbi? */
+ beq+ cr1, 142f
+ cmpwi cr1, r10, 108 /* Is dcbst? */
+ beq+ cr1, 144f /* Fix up store bit! */
+ cmpwi cr1, r10, 172 /* Is dcbf? */
+ beq+ cr1, 142f
+ cmpwi cr1, r10, 1964 /* Is icbi? */
+ beq+ cr1, 142f
+141: mfspr r10,SPRN_M_TW
b DARFixed /* Nope, go back to normal TLB processing */
144: mfspr r10, SPRN_DSISR
rlwinm r10, r10,0,7,5 /* Clear store bit for buggy dcbst insn */
mtspr SPRN_DSISR, r10
142: /* continue, it was a dcbx, dcbi instruction. */
-#ifdef CONFIG_8xx_CPU6
- lwz r3, 8(r0) /* restore r3 from memory */
-#endif
-#ifndef NO_SELF_MODIFYING_CODE
- andis. r10,r11,0x1f /* test if reg RA is r0 */
- li r10,modified_instr@l
- dcbtst r0,r10 /* touch for store */
- rlwinm r11,r11,0,0,20 /* Zero lower 10 bits */
- oris r11,r11,640 /* Transform instr. to a "add r10,RA,RB" */
- ori r11,r11,532
- stw r11,0(r10) /* store add/and instruction */
- dcbf 0,r10 /* flush new instr. to memory. */
- icbi 0,r10 /* invalidate instr. cache line */
- lwz r11, 4(r0) /* restore r11 from memory */
- mfspr r10, SPRN_M_TW /* restore r10 from M_TW */
- isync /* Wait until new instr is loaded from memory */
-modified_instr:
- .space 4 /* this is where the add instr. is stored */
- bne+ 143f
- subf r10,r0,r10 /* r10=r10-r0, only if reg RA is r0 */
-143: mtdar r10 /* store faulting EA in DAR */
- b DARFixed /* Go back to normal TLB handling */
-#else
mfctr r10
mtdar r10 /* save ctr reg in DAR */
rlwinm r10, r11, 24, 24, 28 /* offset into jump table for reg RB */
@@ -670,26 +479,33 @@ modified_instr:
add r10, r10, r30 ;b 151f
add r10, r10, r31
151:
- rlwinm. r11,r11,19,24,28 /* offset into jump table for reg RA */
- beq 152f /* if reg RA is zero, don't add it */
+ rlwinm r11,r11,19,24,28 /* offset into jump table for reg RA */
+ cmpwi cr1, r11, 0
+ beq cr1, 152f /* if reg RA is zero, don't add it */
addi r11, r11, 150b@l /* add start of table */
mtctr r11 /* load ctr with jump address */
rlwinm r11,r11,0,16,10 /* make sure we don't execute this more than once */
bctr /* jump into table */
152:
mfdar r11
+ mtdar r10
mtctr r11 /* restore ctr reg from DAR */
- mtdar r10 /* save fault EA to DAR */
+ mfspr r11, SPRN_SPRG_THREAD
+ stw r10, DAR(r11)
+ mfspr r10, SPRN_DSISR
+ stw r10, DSISR(r11)
+ mfspr r10,SPRN_M_TW
b DARFixed /* Go back to normal TLB handling */
/* special handling for r10,r11 since these are modified already */
-153: lwz r11, 4(r0) /* load r11 from memory */
- b 155f
-154: mfspr r11, SPRN_M_TW /* load r10 from M_TW */
-155: add r10, r10, r11 /* add it */
+153: mfspr r11, SPRN_SPRG_SCRATCH1 /* load r11 from SPRN_SPRG_SCRATCH1 */
+ add r10, r10, r11 /* add it */
+ mfctr r11 /* restore r11 */
+ b 151b
+154: mfspr r11, SPRN_SPRG_SCRATCH0 /* load r10 from SPRN_SPRG_SCRATCH0 */
+ add r10, r10, r11 /* add it */
mfctr r11 /* restore r11 */
b 151b
-#endif
/*
* This is where the main kernel code starts.
@@ -707,14 +523,25 @@ start_here:
/* stack */
lis r1,init_thread_union@ha
addi r1,r1,init_thread_union@l
+ lis r0, STACK_END_MAGIC@h
+ ori r0, r0, STACK_END_MAGIC@l
+ stw r0, 0(r1)
li r0,0
- stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
+ stwu r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1)
+
+ lis r6, swapper_pg_dir@h
+ ori r6, r6, swapper_pg_dir@l
+ tophys(r6,r6)
+ mtspr SPRN_M_TWB, r6
bl early_init /* We have to do this with MMU on */
/*
* Decide what sort of machine this is and initialize the MMU.
*/
+#ifdef CONFIG_KASAN
+ bl kasan_early_init
+#endif
li r3,0
mr r4,r31
bl machine_init
@@ -730,17 +557,6 @@ start_here:
* init's THREAD like the context switch code does, but this is
* easier......until someone changes init's static structures.
*/
- lis r6, swapper_pg_dir@h
- ori r6, r6, swapper_pg_dir@l
- tophys(r6,r6)
-#ifdef CONFIG_8xx_CPU6
- lis r4, cpu6_errata_word@h
- ori r4, r4, cpu6_errata_word@l
- li r3, 0x3980
- stw r3, 12(r4)
- lwz r3, 12(r4)
-#endif
- mtspr SPRN_M_TWB, r6
lis r4,2f@h
ori r4,r4,2f@l
tophys(r4,r4)
@@ -750,18 +566,42 @@ start_here:
rfi
/* Load up the kernel context */
2:
- SYNC /* Force all PTE updates to finish */
+#ifdef CONFIG_PIN_TLB_IMMR
+ lis r0, MD_TWAM@h
+ oris r0, r0, 0x1f00
+ mtspr SPRN_MD_CTR, r0
+ LOAD_REG_IMMEDIATE(r0, VIRT_IMMR_BASE | MD_EVALID)
+ tlbie r0
+ mtspr SPRN_MD_EPN, r0
+ LOAD_REG_IMMEDIATE(r0, MD_SVALID | MD_PS512K | MD_GUARDED)
+ mtspr SPRN_MD_TWC, r0
+ mfspr r0, SPRN_IMMR
+ rlwinm r0, r0, 0, 0xfff80000
+ ori r0, r0, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | \
+ _PAGE_NO_CACHE | _PAGE_PRESENT
+ mtspr SPRN_MD_RPN, r0
+ lis r0, (MD_TWAM | MD_RSV4I)@h
+ mtspr SPRN_MD_CTR, r0
+#endif
+#ifndef CONFIG_PIN_TLB_TEXT
+ li r0, 0
+ mtspr SPRN_MI_CTR, r0
+#endif
+#if !defined(CONFIG_PIN_TLB_DATA) && !defined(CONFIG_PIN_TLB_IMMR)
+ lis r0, MD_TWAM@h
+ mtspr SPRN_MD_CTR, r0
+#endif
tlbia /* Clear all TLB entries */
sync /* wait for tlbia/tlbie to finish */
- TLBSYNC /* ... on all CPUs */
/* set up the PTE pointers for the Abatron bdiGDB.
*/
- tovirt(r6,r6)
lis r5, abatron_pteptrs@h
ori r5, r5, abatron_pteptrs@l
- stw r5, 0xf0(r0) /* Must match your Abatron config file */
+ stw r5, 0xf0(0) /* Must match your Abatron config file */
tophys(r5,r5)
+ lis r6, swapper_pg_dir@h
+ ori r6, r6, swapper_pg_dir@l
stw r6, 0(r5)
/* Now turn on the MMU for real! */
@@ -777,96 +617,50 @@ start_here:
* virtual to physical. Also, set the cache mode since that is defined
* by TLB entries and perform any additional mapping (like of the IMMR).
* If configured to pin some TLBs, we pin the first 8 Mbytes of kernel,
- * 24 Mbytes of data, and the 8M IMMR space. Anything not covered by
+ * 24 Mbytes of data, and the 512k IMMR space. Anything not covered by
* these mappings is mapped by page tables.
*/
-initial_mmu:
- tlbia /* Invalidate all TLB entries */
-/* Always pin the first 8 MB ITLB to prevent ITLB
- misses while mucking around with SRR0/SRR1 in asm
-*/
- lis r8, MI_RSV4I@h
- ori r8, r8, 0x1c00
+SYM_FUNC_START_LOCAL(initial_mmu)
+ li r8, 0
+ mtspr SPRN_MI_CTR, r8 /* remove PINNED ITLB entries */
+ lis r10, MD_TWAM@h
+ mtspr SPRN_MD_CTR, r10 /* remove PINNED DTLB entries */
- mtspr SPRN_MI_CTR, r8 /* Set instruction MMU control */
-
-#ifdef CONFIG_PIN_TLB
- lis r10, (MD_RSV4I | MD_RESETVAL)@h
- ori r10, r10, 0x1c00
- mr r8, r10
-#else
- lis r10, MD_RESETVAL@h
-#endif
-#ifndef CONFIG_8xx_COPYBACK
- oris r10, r10, MD_WTDEF@h
-#endif
- mtspr SPRN_MD_CTR, r10 /* Set data TLB control */
+ tlbia /* Invalidate all TLB entries */
- /* Now map the lower 8 Meg into the TLBs. For this quick hack,
- * we can load the instruction and data TLB registers with the
- * same values.
- */
- lis r8, KERNELBASE@h /* Create vaddr for TLB */
- ori r8, r8, MI_EVALID /* Mark it valid */
- mtspr SPRN_MI_EPN, r8
- mtspr SPRN_MD_EPN, r8
- li r8, MI_PS8MEG /* Set 8M byte page */
- ori r8, r8, MI_SVALID /* Make it valid */
- mtspr SPRN_MI_TWC, r8
- mtspr SPRN_MD_TWC, r8
- li r8, MI_BOOTINIT /* Create RPN for address 0 */
- mtspr SPRN_MI_RPN, r8 /* Store TLB entry */
- mtspr SPRN_MD_RPN, r8
- lis r8, MI_Kp@h /* Set the protection mode */
+ lis r8, MI_APG_INIT@h /* Set protection modes */
+ ori r8, r8, MI_APG_INIT@l
mtspr SPRN_MI_AP, r8
+ lis r8, MD_APG_INIT@h
+ ori r8, r8, MD_APG_INIT@l
mtspr SPRN_MD_AP, r8
- /* Map another 8 MByte at the IMMR to get the processor
- * internal registers (among other things).
- */
-#ifdef CONFIG_PIN_TLB
- addi r10, r10, 0x0100
- mtspr SPRN_MD_CTR, r10
-#endif
- mfspr r9, 638 /* Get current IMMR */
- andis. r9, r9, 0xff80 /* Get 8Mbyte boundary */
-
- mr r8, r9 /* Create vaddr for TLB */
- ori r8, r8, MD_EVALID /* Mark it valid */
- mtspr SPRN_MD_EPN, r8
- li r8, MD_PS8MEG /* Set 8M byte page */
- ori r8, r8, MD_SVALID /* Make it valid */
- mtspr SPRN_MD_TWC, r8
- mr r8, r9 /* Create paddr for TLB */
- ori r8, r8, MI_BOOTINIT|0x2 /* Inhibit cache -- Cort */
- mtspr SPRN_MD_RPN, r8
-
-#ifdef CONFIG_PIN_TLB
- /* Map two more 8M kernel data pages.
- */
- addi r10, r10, 0x0100
- mtspr SPRN_MD_CTR, r10
-
- lis r8, KERNELBASE@h /* Create vaddr for TLB */
- addis r8, r8, 0x0080 /* Add 8M */
- ori r8, r8, MI_EVALID /* Mark it valid */
- mtspr SPRN_MD_EPN, r8
- li r9, MI_PS8MEG /* Set 8M byte page */
- ori r9, r9, MI_SVALID /* Make it valid */
- mtspr SPRN_MD_TWC, r9
- li r11, MI_BOOTINIT /* Create RPN for address 0 */
- addis r11, r11, 0x0080 /* Add 8M */
+ /* Map the lower RAM (up to 32 Mbytes) into the ITLB and DTLB */
+ lis r8, MI_RSV4I@h
+ ori r8, r8, 0x1c00
+ oris r12, r10, MD_RSV4I@h
+ ori r12, r12, 0x1c00
+ li r9, 4 /* up to 4 pages of 8M */
+ mtctr r9
+ lis r9, KERNELBASE@h /* Create vaddr for TLB */
+ li r10, MI_PS8MEG | _PMD_ACCESSED | MI_SVALID
+ li r11, MI_BOOTINIT /* Create RPN for address 0 */
+1:
+ mtspr SPRN_MI_CTR, r8 /* Set instruction MMU control */
+ addi r8, r8, 0x100
+ ori r0, r9, MI_EVALID /* Mark it valid */
+ mtspr SPRN_MI_EPN, r0
+ mtspr SPRN_MI_TWC, r10
+ mtspr SPRN_MI_RPN, r11 /* Store TLB entry */
+ mtspr SPRN_MD_CTR, r12
+ addi r12, r12, 0x100
+ mtspr SPRN_MD_EPN, r0
+ mtspr SPRN_MD_TWC, r10
mtspr SPRN_MD_RPN, r11
+ addis r9, r9, 0x80
+ addis r11, r11, 0x80
- addi r10, r10, 0x0100
- mtspr SPRN_MD_CTR, r10
-
- addis r8, r8, 0x0080 /* Add 8M */
- mtspr SPRN_MD_EPN, r8
- mtspr SPRN_MD_TWC, r9
- addis r11, r11, 0x0080 /* Add 8M */
- mtspr SPRN_MD_RPN, r11
-#endif
+ bdnz 1b
/* Since the cache is enabled according to the information we
* just loaded into the TLB, invalidate and enable the caches here.
@@ -877,102 +671,119 @@ initial_mmu:
mtspr SPRN_DC_CST, r8
lis r8, IDC_ENABLE@h
mtspr SPRN_IC_CST, r8
-#ifdef CONFIG_8xx_COPYBACK
mtspr SPRN_DC_CST, r8
+ /* Disable debug mode entry on breakpoints */
+ mfspr r8, SPRN_DER
+#ifdef CONFIG_PERF_EVENTS
+ rlwinm r8, r8, 0, ~0xc
#else
- /* For a debug option, I left this here to easily enable
- * the write through cache mode
- */
- lis r8, DC_SFWT@h
- mtspr SPRN_DC_CST, r8
- lis r8, IDC_ENABLE@h
- mtspr SPRN_DC_CST, r8
+ rlwinm r8, r8, 0, ~0x8
#endif
+ mtspr SPRN_DER, r8
blr
+SYM_FUNC_END(initial_mmu)
-
-/*
- * Set up to use a given MMU context.
- * r3 is context number, r4 is PGD pointer.
- *
- * We place the physical address of the new task page directory loaded
- * into the MMU base register, and set the ASID compare register with
- * the new "context."
- */
-_GLOBAL(set_context)
-
-#ifdef CONFIG_BDI_SWITCH
- /* Context switch the PTE pointer for the Abatron BDI2000.
- * The PGDIR is passed as second argument.
- */
- lis r5, KERNELBASE@h
- lwz r5, 0xf0(r5)
- stw r4, 0x4(r5)
+#ifdef CONFIG_PIN_TLB
+_GLOBAL(mmu_pin_tlb)
+ lis r9, (1f - PAGE_OFFSET)@h
+ ori r9, r9, (1f - PAGE_OFFSET)@l
+ mfmsr r10
+ mflr r11
+ li r12, MSR_KERNEL & ~(MSR_IR | MSR_DR | MSR_RI)
+ rlwinm r0, r10, 0, ~MSR_RI
+ rlwinm r0, r0, 0, ~MSR_EE
+ mtmsr r0
+ isync
+ .align 4
+ mtspr SPRN_SRR0, r9
+ mtspr SPRN_SRR1, r12
+ rfi
+1:
+ li r5, 0
+ lis r6, MD_TWAM@h
+ mtspr SPRN_MI_CTR, r5
+ mtspr SPRN_MD_CTR, r6
+ tlbia
+
+#ifdef CONFIG_PIN_TLB_TEXT
+ LOAD_REG_IMMEDIATE(r5, 28 << 8)
+ LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET)
+ LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG | _PMD_ACCESSED)
+ LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_RO | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT)
+ LOAD_REG_ADDR(r9, _sinittext)
+ li r0, 4
+ mtctr r0
+
+2: ori r0, r6, MI_EVALID
+ mtspr SPRN_MI_CTR, r5
+ mtspr SPRN_MI_EPN, r0
+ mtspr SPRN_MI_TWC, r7
+ mtspr SPRN_MI_RPN, r8
+ addi r5, r5, 0x100
+ addis r6, r6, SZ_8M@h
+ addis r8, r8, SZ_8M@h
+ cmplw r6, r9
+ bdnzt lt, 2b
+ lis r0, MI_RSV4I@h
+ mtspr SPRN_MI_CTR, r0
#endif
-#ifdef CONFIG_8xx_CPU6
- lis r6, cpu6_errata_word@h
- ori r6, r6, cpu6_errata_word@l
- tophys (r4, r4)
- li r7, 0x3980
- stw r7, 12(r6)
- lwz r7, 12(r6)
- mtspr SPRN_M_TWB, r4 /* Update MMU base address */
- li r7, 0x3380
- stw r7, 12(r6)
- lwz r7, 12(r6)
- mtspr SPRN_M_CASID, r3 /* Update context */
+ LOAD_REG_IMMEDIATE(r5, 28 << 8 | MD_TWAM)
+#ifdef CONFIG_PIN_TLB_DATA
+ LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET)
+ LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG | _PMD_ACCESSED)
+ li r8, 0
+#ifdef CONFIG_PIN_TLB_IMMR
+ li r0, 3
#else
- mtspr SPRN_M_CASID,r3 /* Update context */
- tophys (r4, r4)
- mtspr SPRN_M_TWB, r4 /* and pgd */
+ li r0, 4
#endif
- SYNC
- blr
-
-#ifdef CONFIG_8xx_CPU6
-/* It's here because it is unique to the 8xx.
- * It is important we get called with interrupts disabled. I used to
- * do that, but it appears that all code that calls this already had
- * interrupt disabled.
- */
- .globl set_dec_cpu6
-set_dec_cpu6:
- lis r7, cpu6_errata_word@h
- ori r7, r7, cpu6_errata_word@l
- li r4, 0x2c00
- stw r4, 8(r7)
- lwz r4, 8(r7)
- mtspr 22, r3 /* Update Decrementer */
- SYNC
- blr
+ mtctr r0
+ cmpwi r4, 0
+ beq 4f
+ LOAD_REG_ADDR(r9, _sinittext)
+
+2: ori r0, r6, MD_EVALID
+ ori r12, r8, 0xf0 | _PAGE_RO | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT
+ mtspr SPRN_MD_CTR, r5
+ mtspr SPRN_MD_EPN, r0
+ mtspr SPRN_MD_TWC, r7
+ mtspr SPRN_MD_RPN, r12
+ addi r5, r5, 0x100
+ addis r6, r6, SZ_8M@h
+ addis r8, r8, SZ_8M@h
+ cmplw r6, r9
+ bdnzt lt, 2b
+4:
+2: ori r0, r6, MD_EVALID
+ ori r12, r8, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT
+ mtspr SPRN_MD_CTR, r5
+ mtspr SPRN_MD_EPN, r0
+ mtspr SPRN_MD_TWC, r7
+ mtspr SPRN_MD_RPN, r12
+ addi r5, r5, 0x100
+ addis r6, r6, SZ_8M@h
+ addis r8, r8, SZ_8M@h
+ cmplw r6, r3
+ bdnzt lt, 2b
#endif
-
-/*
- * We put a few things here that have to be page-aligned.
- * This stuff goes at the beginning of the data segment,
- * which is page-aligned.
- */
- .data
- .globl sdata
-sdata:
- .globl empty_zero_page
-empty_zero_page:
- .space 4096
-
- .globl swapper_pg_dir
-swapper_pg_dir:
- .space 4096
-
-/* Room for two PTE table poiners, usually the kernel and current user
- * pointer to their respective root page table (pgdir).
- */
-abatron_pteptrs:
- .space 8
-
-#ifdef CONFIG_8xx_CPU6
- .globl cpu6_errata_word
-cpu6_errata_word:
- .space 16
+#ifdef CONFIG_PIN_TLB_IMMR
+ LOAD_REG_IMMEDIATE(r0, VIRT_IMMR_BASE | MD_EVALID)
+ LOAD_REG_IMMEDIATE(r7, MD_SVALID | MD_PS512K | MD_GUARDED | _PMD_ACCESSED)
+ mfspr r8, SPRN_IMMR
+ rlwinm r8, r8, 0, 0xfff80000
+ ori r8, r8, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | \
+ _PAGE_NO_CACHE | _PAGE_PRESENT
+ mtspr SPRN_MD_CTR, r5
+ mtspr SPRN_MD_EPN, r0
+ mtspr SPRN_MD_TWC, r7
+ mtspr SPRN_MD_RPN, r8
+#endif
+#if defined(CONFIG_PIN_TLB_IMMR) || defined(CONFIG_PIN_TLB_DATA)
+ lis r0, (MD_RSV4I | MD_TWAM)@h
+ mtspr SPRN_MD_CTR, r0
+#endif
+ mtspr SPRN_SRR1, r10
+ mtspr SPRN_SRR0, r11
+ rfi
#endif
-
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_book3s_32.S
index dc0488b6f6e1..cb2bca76be53 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_book3s_32.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* PowerPC version
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
@@ -13,19 +14,15 @@
* This file contains the low-level support and setup for the
* PowerPC platform, including trap and interrupt dispatch.
* (The PPC 8xx embedded CPUs use head_8xx.S instead.)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
#include <linux/init.h>
+#include <linux/pgtable.h>
+#include <linux/linkage.h>
+
#include <asm/reg.h>
#include <asm/page.h>
#include <asm/mmu.h>
-#include <asm/pgtable.h>
#include <asm/cputable.h>
#include <asm/cache.h>
#include <asm/thread_info.h>
@@ -34,8 +31,11 @@
#include <asm/ptrace.h>
#include <asm/bug.h>
#include <asm/kvm_book3s_asm.h>
+#include <asm/feature-fixups.h>
+#include <asm/interrupt.h>
+
+#include "head_32.h"
-/* 601 only have IBAT; cr0.eq is set on 601 when using this macro */
#define LOAD_BAT(n, reg, RA, RB) \
/* see the comment for clear_bats() -- Cort */ \
li RA,0; \
@@ -45,24 +45,19 @@
lwz RB,(n*16)+4(reg); \
mtspr SPRN_IBAT##n##U,RA; \
mtspr SPRN_IBAT##n##L,RB; \
- beq 1f; \
lwz RA,(n*16)+8(reg); \
lwz RB,(n*16)+12(reg); \
mtspr SPRN_DBAT##n##U,RA; \
- mtspr SPRN_DBAT##n##L,RB; \
-1:
+ mtspr SPRN_DBAT##n##L,RB
__HEAD
- .stabs "arch/powerpc/kernel/",N_SO,0,0,0f
- .stabs "head_32.S",N_SO,0,0,0f
-0:
-_ENTRY(_stext);
+_GLOBAL(_stext);
/*
* _start is defined this way because the XCOFF loader in the OpenFirmware
* on the powermac expects the entry point to be a procedure descriptor.
*/
-_ENTRY(_start);
+_GLOBAL(_start);
/*
* These are here for legacy reasons, the kernel used to
* need to look like a coff function entry for the pmac
@@ -158,6 +153,9 @@ __after_mmu_off:
bl flush_tlbs
bl initial_bats
+ bl load_segment_registers
+ bl reloc_offset
+ bl early_hash_table
#if defined(CONFIG_BOOTX_TEXT)
bl setup_disp_bat
#endif
@@ -174,10 +172,8 @@ __after_mmu_off:
bl reloc_offset
li r24,0 /* cpu# */
bl call_setup_cpu /* Call setup_cpu for this CPU */
-#ifdef CONFIG_6xx
bl reloc_offset
bl init_idle_6xx
-#endif /* CONFIG_6xx */
/*
@@ -203,13 +199,12 @@ __after_mmu_off:
*/
turn_on_mmu:
mfmsr r0
- ori r0,r0,MSR_DR|MSR_IR
+ ori r0,r0,MSR_DR|MSR_IR|MSR_RI
mtspr SPRN_SRR1,r0
lis r0,start_here@h
ori r0,r0,start_here@l
mtspr SPRN_SRR0,r0
- SYNC
- RFI /* enables MMU */
+ rfi /* enables MMU */
/*
* We need __secondary_hold as a place to hold the other cpus on
@@ -240,108 +235,10 @@ __secondary_hold_spinloop:
__secondary_hold_acknowledge:
.long -1
-/*
- * Exception entry code. This code runs with address translation
- * turned off, i.e. using physical addresses.
- * We assume sprg3 has the physical address of the current
- * task's thread_struct.
- */
-#define EXCEPTION_PROLOG \
- mtspr SPRN_SPRG_SCRATCH0,r10; \
- mtspr SPRN_SPRG_SCRATCH1,r11; \
- mfcr r10; \
- EXCEPTION_PROLOG_1; \
- EXCEPTION_PROLOG_2
-
-#define EXCEPTION_PROLOG_1 \
- mfspr r11,SPRN_SRR1; /* check whether user or kernel */ \
- andi. r11,r11,MSR_PR; \
- tophys(r11,r1); /* use tophys(r1) if kernel */ \
- beq 1f; \
- mfspr r11,SPRN_SPRG_THREAD; \
- lwz r11,THREAD_INFO-THREAD(r11); \
- addi r11,r11,THREAD_SIZE; \
- tophys(r11,r11); \
-1: subi r11,r11,INT_FRAME_SIZE /* alloc exc. frame */
-
-
-#define EXCEPTION_PROLOG_2 \
- CLR_TOP32(r11); \
- stw r10,_CCR(r11); /* save registers */ \
- stw r12,GPR12(r11); \
- stw r9,GPR9(r11); \
- mfspr r10,SPRN_SPRG_SCRATCH0; \
- stw r10,GPR10(r11); \
- mfspr r12,SPRN_SPRG_SCRATCH1; \
- stw r12,GPR11(r11); \
- mflr r10; \
- stw r10,_LINK(r11); \
- mfspr r12,SPRN_SRR0; \
- mfspr r9,SPRN_SRR1; \
- stw r1,GPR1(r11); \
- stw r1,0(r11); \
- tovirt(r1,r11); /* set new kernel sp */ \
- li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR); /* can take exceptions */ \
- MTMSRD(r10); /* (except for mach check in rtas) */ \
- stw r0,GPR0(r11); \
- lis r10,STACK_FRAME_REGS_MARKER@ha; /* exception frame marker */ \
- addi r10,r10,STACK_FRAME_REGS_MARKER@l; \
- stw r10,8(r11); \
- SAVE_4GPRS(3, r11); \
- SAVE_2GPRS(7, r11)
-
-/*
- * Note: code which follows this uses cr0.eq (set if from kernel),
- * r11, r12 (SRR0), and r9 (SRR1).
- *
- * Note2: once we have set r1 we are in a position to take exceptions
- * again, and we could thus set MSR:RI at that point.
- */
-
-/*
- * Exception vectors.
- */
-#define EXCEPTION(n, label, hdlr, xfer) \
- . = n; \
- DO_KVM n; \
-label: \
- EXCEPTION_PROLOG; \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- xfer(n, hdlr)
-
-#define EXC_XFER_TEMPLATE(n, hdlr, trap, copyee, tfer, ret) \
- li r10,trap; \
- stw r10,_TRAP(r11); \
- li r10,MSR_KERNEL; \
- copyee(r10, r9); \
- bl tfer; \
-i##n: \
- .long hdlr; \
- .long ret
-
-#define COPY_EE(d, s) rlwimi d,s,0,16,16
-#define NOCOPY(d, s)
-
-#define EXC_XFER_STD(n, hdlr) \
- EXC_XFER_TEMPLATE(n, hdlr, n, NOCOPY, transfer_to_handler_full, \
- ret_from_except_full)
-
-#define EXC_XFER_LITE(n, hdlr) \
- EXC_XFER_TEMPLATE(n, hdlr, n+1, NOCOPY, transfer_to_handler, \
- ret_from_except)
-
-#define EXC_XFER_EE(n, hdlr) \
- EXC_XFER_TEMPLATE(n, hdlr, n, COPY_EE, transfer_to_handler_full, \
- ret_from_except_full)
-
-#define EXC_XFER_EE_LITE(n, hdlr) \
- EXC_XFER_TEMPLATE(n, hdlr, n+1, COPY_EE, transfer_to_handler, \
- ret_from_except)
-
/* System reset */
/* core99 pmac starts the seconary here by changing the vector, and
- putting it back to what it was (unknown_exception) when done. */
- EXCEPTION(0x100, Reset, unknown_exception, EXC_XFER_STD)
+ putting it back to what it was (unknown_async_exception) when done. */
+ EXCEPTION(INTERRUPT_SYSTEM_RESET, Reset, unknown_async_exception)
/* Machine check */
/*
@@ -351,89 +248,117 @@ i##n: \
* registers that might have bad values includes all the GPRs
* and all the BATs. We indicate that we are in RTAS by putting
* a non-zero value, the address of the exception frame to use,
- * in SPRG2. The machine check handler checks SPRG2 and uses its
- * value if it is non-zero. If we ever needed to free up SPRG2,
- * we could use a field in the thread_info or thread_struct instead.
+ * in thread.rtas_sp. The machine check handler checks thread.rtas_sp
+ * and uses its value if it is non-zero.
* (Other exception handlers assume that r1 is a valid kernel stack
* pointer when we take an exception from supervisor mode.)
* -- paulus.
*/
- . = 0x200
- DO_KVM 0x200
- mtspr SPRN_SPRG_SCRATCH0,r10
- mtspr SPRN_SPRG_SCRATCH1,r11
- mfcr r10
+ START_EXCEPTION(INTERRUPT_MACHINE_CHECK, MachineCheck)
+ EXCEPTION_PROLOG_0
#ifdef CONFIG_PPC_CHRP
- mfspr r11,SPRN_SPRG_RTAS
- cmpwi 0,r11,0
- bne 7f
+ mtspr SPRN_SPRG_SCRATCH2,r1
+ mfspr r1, SPRN_SPRG_THREAD
+ lwz r1, RTAS_SP(r1)
+ cmpwi cr1, r1, 0
+ bne cr1, 7f
+ mfspr r1, SPRN_SPRG_SCRATCH2
#endif /* CONFIG_PPC_CHRP */
EXCEPTION_PROLOG_1
-7: EXCEPTION_PROLOG_2
- addi r3,r1,STACK_FRAME_OVERHEAD
+7: EXCEPTION_PROLOG_2 0x200 MachineCheck
#ifdef CONFIG_PPC_CHRP
- mfspr r4,SPRN_SPRG_RTAS
- cmpwi cr1,r4,0
- bne cr1,1f
-#endif
- EXC_XFER_STD(0x200, machine_check_exception)
-#ifdef CONFIG_PPC_CHRP
-1: b machine_check_in_rtas
+ beq cr1, 1f
+ twi 31, 0, 0
#endif
+1: prepare_transfer_to_handler
+ bl machine_check_exception
+ b interrupt_return
/* Data access exception. */
- . = 0x300
- DO_KVM 0x300
-DataAccess:
- EXCEPTION_PROLOG
- mfspr r10,SPRN_DSISR
- stw r10,_DSISR(r11)
- andis. r0,r10,0xa470 /* weird error? */
- bne 1f /* if not, try to put a PTE */
- mfspr r4,SPRN_DAR /* into the hash table */
- rlwinm r3,r10,32-15,21,21 /* DSISR_STORE -> _PAGE_RW */
- bl hash_page
-1: lwz r5,_DSISR(r11) /* get DSISR value */
- mfspr r4,SPRN_DAR
- EXC_XFER_LITE(0x300, handle_page_fault)
+ START_EXCEPTION(INTERRUPT_DATA_STORAGE, DataAccess)
+#ifdef CONFIG_PPC_BOOK3S_604
+BEGIN_MMU_FTR_SECTION
+ mtspr SPRN_SPRG_SCRATCH2,r10
+ mfspr r10, SPRN_SPRG_THREAD
+ stw r11, THR11(r10)
+ mfspr r10, SPRN_DSISR
+ mfcr r11
+ andis. r10, r10, (DSISR_BAD_FAULT_32S | DSISR_DABRMATCH)@h
+ mfspr r10, SPRN_SPRG_THREAD
+ beq hash_page_dsi
+.Lhash_page_dsi_cont:
+ mtcr r11
+ lwz r11, THR11(r10)
+ mfspr r10, SPRN_SPRG_SCRATCH2
+MMU_FTR_SECTION_ELSE
+ b 1f
+ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_HPTE_TABLE)
+#endif
+1: EXCEPTION_PROLOG_0 handle_dar_dsisr=1
+ EXCEPTION_PROLOG_1
+ EXCEPTION_PROLOG_2 INTERRUPT_DATA_STORAGE DataAccess handle_dar_dsisr=1
+ prepare_transfer_to_handler
+ lwz r5, _DSISR(r1)
+ andis. r0, r5, DSISR_DABRMATCH@h
+ bne- 1f
+ bl do_page_fault
+ b interrupt_return
+1: bl do_break
+ REST_NVGPRS(r1)
+ b interrupt_return
/* Instruction access exception. */
- . = 0x400
- DO_KVM 0x400
-InstructionAccess:
- EXCEPTION_PROLOG
- andis. r0,r9,0x4000 /* no pte found? */
- beq 1f /* if so, try to put a PTE */
- li r3,0 /* into the hash table */
- mr r4,r12 /* SRR0 is fault address */
- bl hash_page
-1: mr r4,r12
- mr r5,r9
- EXC_XFER_LITE(0x400, handle_page_fault)
+ START_EXCEPTION(INTERRUPT_INST_STORAGE, InstructionAccess)
+ mtspr SPRN_SPRG_SCRATCH0,r10
+ mtspr SPRN_SPRG_SCRATCH1,r11
+ mfspr r10, SPRN_SPRG_THREAD
+ mfspr r11, SPRN_SRR0
+ stw r11, SRR0(r10)
+ mfspr r11, SPRN_SRR1 /* check whether user or kernel */
+ stw r11, SRR1(r10)
+ mfcr r10
+#ifdef CONFIG_PPC_BOOK3S_604
+BEGIN_MMU_FTR_SECTION
+ andis. r11, r11, SRR1_ISI_NOPT@h /* no pte found? */
+ bne hash_page_isi
+.Lhash_page_isi_cont:
+ mfspr r11, SPRN_SRR1 /* check whether user or kernel */
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
+#endif
+ andi. r11, r11, MSR_PR
+
+ EXCEPTION_PROLOG_1
+ EXCEPTION_PROLOG_2 INTERRUPT_INST_STORAGE InstructionAccess
+ andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */
+ stw r5, _DSISR(r11)
+ stw r12, _DAR(r11)
+ prepare_transfer_to_handler
+ bl do_page_fault
+ b interrupt_return
/* External interrupt */
- EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE)
+ EXCEPTION(INTERRUPT_EXTERNAL, HardwareInterrupt, do_IRQ)
/* Alignment exception */
- . = 0x600
- DO_KVM 0x600
-Alignment:
- EXCEPTION_PROLOG
- mfspr r4,SPRN_DAR
- stw r4,_DAR(r11)
- mfspr r5,SPRN_DSISR
- stw r5,_DSISR(r11)
- addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_EE(0x600, alignment_exception)
+ START_EXCEPTION(INTERRUPT_ALIGNMENT, Alignment)
+ EXCEPTION_PROLOG INTERRUPT_ALIGNMENT Alignment handle_dar_dsisr=1
+ prepare_transfer_to_handler
+ bl alignment_exception
+ REST_NVGPRS(r1)
+ b interrupt_return
/* Program check exception */
- EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD)
+ START_EXCEPTION(INTERRUPT_PROGRAM, ProgramCheck)
+ EXCEPTION_PROLOG INTERRUPT_PROGRAM ProgramCheck
+ prepare_transfer_to_handler
+ bl program_check_exception
+ REST_NVGPRS(r1)
+ b interrupt_return
/* Floating-point unavailable */
- . = 0x800
- DO_KVM 0x800
-FPUnavailable:
+ START_EXCEPTION(0x800, FPUnavailable)
+#ifdef CONFIG_PPC_FPU
BEGIN_FTR_SECTION
/*
* Certain Freescale cores don't have a FPU and treat fp instructions
@@ -441,29 +366,29 @@ BEGIN_FTR_SECTION
*/
b ProgramCheck
END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE)
- EXCEPTION_PROLOG
+ EXCEPTION_PROLOG INTERRUPT_FP_UNAVAIL FPUnavailable
beq 1f
bl load_up_fpu /* if from user, just load it up */
b fast_exception_return
-1: addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_EE_LITE(0x800, kernel_fp_unavailable_exception)
+1: prepare_transfer_to_handler
+ bl kernel_fp_unavailable_exception
+ b interrupt_return
+#else
+ b ProgramCheck
+#endif
/* Decrementer */
- EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE)
+ EXCEPTION(INTERRUPT_DECREMENTER, Decrementer, timer_interrupt)
- EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0xa00, Trap_0a, unknown_exception)
+ EXCEPTION(0xb00, Trap_0b, unknown_exception)
/* System call */
- . = 0xc00
- DO_KVM 0xc00
-SystemCall:
- EXCEPTION_PROLOG
- EXC_XFER_EE_LITE(0xc00, DoSyscall)
+ START_EXCEPTION(INTERRUPT_SYSCALL, SystemCall)
+ SYSCALL_ENTRY INTERRUPT_SYSCALL
-/* Single step - not used on 601 */
- EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD)
- EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(INTERRUPT_TRACE, SingleStep, single_step_exception)
+ EXCEPTION(0xe00, Trap_0e, unknown_exception)
/*
* The Altivec unavailable trap is at 0x0f20. Foo.
@@ -473,66 +398,47 @@ SystemCall:
* non-altivec kernel running on a machine with altivec just
* by executing an altivec instruction.
*/
- . = 0xf00
- DO_KVM 0xf00
+ START_EXCEPTION(INTERRUPT_PERFMON, PerformanceMonitorTrap)
b PerformanceMonitor
- . = 0xf20
- DO_KVM 0xf20
+ START_EXCEPTION(INTERRUPT_ALTIVEC_UNAVAIL, AltiVecUnavailableTrap)
b AltiVecUnavailable
+ __HEAD
/*
* Handle TLB miss for instruction on 603/603e.
* Note: we get an alternate set of r0 - r3 to use automatically.
*/
- . = 0x1000
+ . = INTERRUPT_INST_TLB_MISS_603
InstructionTLBMiss:
-/*
- * r0: scratch
- * r1: linux style pte ( later becomes ppc hardware pte )
- * r2: ptr to linux-style pte
- * r3: scratch
- */
/* Get PTE (linux-style) and check access */
- mfspr r3,SPRN_IMISS
- lis r1,PAGE_OFFSET@h /* check if kernel address */
- cmplw 0,r1,r3
- mfspr r2,SPRN_SPRG_THREAD
- li r1,_PAGE_USER|_PAGE_PRESENT /* low addresses tested as user */
- lwz r2,PGDIR(r2)
- bge- 112f
- mfspr r2,SPRN_SRR1 /* and MSR_PR bit from SRR1 */
- rlwimi r1,r2,32-12,29,29 /* shift MSR_PR to _PAGE_USER posn */
- lis r2,swapper_pg_dir@ha /* if kernel address, use */
- addi r2,r2,swapper_pg_dir@l /* kernel page table */
-112: tophys(r2,r2)
- rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */
+ mfspr r0,SPRN_IMISS
+ mfspr r2, SPRN_SDR1
+ li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
+ rlwinm r2, r2, 28, 0xfffff000
+ rlwimi r2,r0,12,20,29 /* insert top 10 bits of address */
lwz r2,0(r2) /* get pmd entry */
+#ifdef CONFIG_EXECMEM
+ rlwinm r3, r0, 4, 0xf
+ subi r3, r3, (TASK_SIZE >> 28) & 0xf
+#endif
rlwinm. r2,r2,0,0,19 /* extract address of pte page */
beq- InstructionAddressInvalid /* return if no mapping */
- rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */
- lwz r0,0(r2) /* get linux-style pte */
- andc. r1,r1,r0 /* check access & ~permission */
+ rlwimi r2,r0,22,20,29 /* insert next 10 bits of address */
+ lwz r2,0(r2) /* get linux-style pte */
+ andc. r1,r1,r2 /* check access & ~permission */
bne- InstructionAddressInvalid /* return if access not permitted */
- ori r0,r0,_PAGE_ACCESSED /* set _PAGE_ACCESSED in pte */
- /*
- * NOTE! We are assuming this is not an SMP system, otherwise
- * we would need to update the pte atomically with lwarx/stwcx.
- */
- stw r0,0(r2) /* update PTE (accessed bit) */
/* Convert linux-style PTE to low word of PPC-style PTE */
- rlwinm r1,r0,32-10,31,31 /* _PAGE_RW -> PP lsb */
- rlwinm r2,r0,32-7,31,31 /* _PAGE_DIRTY -> PP lsb */
- and r1,r1,r2 /* writable if _RW and _DIRTY */
- rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */
- rlwimi r0,r0,32-1,31,31 /* _PAGE_USER -> PP lsb */
- ori r1,r1,0xe04 /* clear out reserved bits */
- andc r1,r0,r1 /* PP = user? (rw&dirty? 2: 3): 0 */
+#ifdef CONFIG_EXECMEM
+ rlwimi r2, r3, 1, 31, 31 /* userspace ? -> PP lsb */
+#endif
+ ori r1, r1, 0xe06 /* clear out reserved bits */
+ andc r1, r2, r1 /* PP = user? 1 : 0 */
BEGIN_FTR_SECTION
rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */
END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
mtspr SPRN_RPA,r1
- tlbli r3
+ tlbli r0
mfspr r3,SPRN_SRR1 /* Need to restore CR0 */
mtcrf 0x80,r3
rfi
@@ -559,69 +465,64 @@ InstructionAddressInvalid:
/*
* Handle TLB miss for DATA Load operation on 603/603e
*/
- . = 0x1100
+ . = INTERRUPT_DATA_LOAD_TLB_MISS_603
DataLoadTLBMiss:
-/*
- * r0: scratch
- * r1: linux style pte ( later becomes ppc hardware pte )
- * r2: ptr to linux-style pte
- * r3: scratch
- */
/* Get PTE (linux-style) and check access */
- mfspr r3,SPRN_DMISS
- lis r1,PAGE_OFFSET@h /* check if kernel address */
- cmplw 0,r1,r3
- mfspr r2,SPRN_SPRG_THREAD
- li r1,_PAGE_USER|_PAGE_PRESENT /* low addresses tested as user */
- lwz r2,PGDIR(r2)
- bge- 112f
- mfspr r2,SPRN_SRR1 /* and MSR_PR bit from SRR1 */
- rlwimi r1,r2,32-12,29,29 /* shift MSR_PR to _PAGE_USER posn */
- lis r2,swapper_pg_dir@ha /* if kernel address, use */
- addi r2,r2,swapper_pg_dir@l /* kernel page table */
-112: tophys(r2,r2)
- rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */
- lwz r2,0(r2) /* get pmd entry */
+ mfspr r0,SPRN_DMISS
+ mfspr r2, SPRN_SDR1
+ rlwinm r1, r2, 28, 0xfffff000
+ rlwimi r1,r0,12,20,29 /* insert top 10 bits of address */
+ lwz r2,0(r1) /* get pmd entry */
+ rlwinm r3, r0, 4, 0xf
rlwinm. r2,r2,0,0,19 /* extract address of pte page */
- beq- DataAddressInvalid /* return if no mapping */
- rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */
- lwz r0,0(r2) /* get linux-style pte */
- andc. r1,r1,r0 /* check access & ~permission */
+ subi r3, r3, (TASK_SIZE >> 28) & 0xf
+ beq- 2f /* bail if no mapping */
+1: rlwimi r2,r0,22,20,29 /* insert next 10 bits of address */
+ lwz r2,0(r2) /* get linux-style pte */
+ li r1, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_READ
+ andc. r1,r1,r2 /* check access & ~permission */
bne- DataAddressInvalid /* return if access not permitted */
- ori r0,r0,_PAGE_ACCESSED /* set _PAGE_ACCESSED in pte */
- /*
- * NOTE! We are assuming this is not an SMP system, otherwise
- * we would need to update the pte atomically with lwarx/stwcx.
- */
- stw r0,0(r2) /* update PTE (accessed bit) */
/* Convert linux-style PTE to low word of PPC-style PTE */
- rlwinm r1,r0,32-10,31,31 /* _PAGE_RW -> PP lsb */
- rlwinm r2,r0,32-7,31,31 /* _PAGE_DIRTY -> PP lsb */
- and r1,r1,r2 /* writable if _RW and _DIRTY */
- rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */
- rlwimi r0,r0,32-1,31,31 /* _PAGE_USER -> PP lsb */
+ rlwinm r1,r2,32-9,30,30 /* _PAGE_WRITE -> PP msb */
+ rlwimi r2,r3,2,30,31 /* userspace ? -> PP */
+ rlwimi r1,r2,32-3,24,24 /* _PAGE_WRITE -> _PAGE_DIRTY */
+ xori r1,r1,_PAGE_DIRTY /* clear dirty when not rw */
ori r1,r1,0xe04 /* clear out reserved bits */
- andc r1,r0,r1 /* PP = user? (rw&dirty? 2: 3): 0 */
+ andc r1,r2,r1 /* PP = user? rw? 1: 3: 0 */
BEGIN_FTR_SECTION
rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */
END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
mtspr SPRN_RPA,r1
- mfspr r2,SPRN_SRR1 /* Need to restore CR0 */
- mtcrf 0x80,r2
BEGIN_MMU_FTR_SECTION
- li r0,1
+ li r3,1
mfspr r1,SPRN_SPRG_603_LRU
- rlwinm r2,r3,20,27,31 /* Get Address bits 15:19 */
- slw r0,r0,r2
- xor r1,r0,r1
- srw r0,r1,r2
+ rlwinm r2,r0,20,27,31 /* Get Address bits 15:19 */
+ slw r3,r3,r2
+ xor r1,r3,r1
+ srw r3,r1,r2
mtspr SPRN_SPRG_603_LRU,r1
mfspr r2,SPRN_SRR1
- rlwimi r2,r0,31-14,14,14
+ rlwimi r2,r3,31-14,14,14
mtspr SPRN_SRR1,r2
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
- tlbld r3
+ mtcrf 0x80,r2
+ tlbld r0
+ rfi
+MMU_FTR_SECTION_ELSE
+ mfspr r2,SPRN_SRR1 /* Need to restore CR0 */
+ mtcrf 0x80,r2
+ tlbld r0
rfi
+ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
+
+2: lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha
+ addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */
+ rlwimi r2,r0,12,20,29 /* insert top 10 bits of address */
+ lwz r2,0(r2) /* get pmd entry */
+ cmpwi cr0,r2,0
+ beq- DataAddressInvalid /* return if no mapping */
+ stw r2,0(r1)
+ rlwinm. r2,r2,0,0,19 /* extract address of pte page */
+ b 1b
DataAddressInvalid:
mfspr r3,SPRN_SRR1
rlwinm r1,r3,9,6,6 /* Get load/store bit */
@@ -643,45 +544,28 @@ DataAddressInvalid:
/*
* Handle TLB miss for DATA Store on 603/603e
*/
- . = 0x1200
+ . = INTERRUPT_DATA_STORE_TLB_MISS_603
DataStoreTLBMiss:
-/*
- * r0: scratch
- * r1: linux style pte ( later becomes ppc hardware pte )
- * r2: ptr to linux-style pte
- * r3: scratch
- */
/* Get PTE (linux-style) and check access */
- mfspr r3,SPRN_DMISS
- lis r1,PAGE_OFFSET@h /* check if kernel address */
- cmplw 0,r1,r3
- mfspr r2,SPRN_SPRG_THREAD
- li r1,_PAGE_RW|_PAGE_USER|_PAGE_PRESENT /* access flags */
- lwz r2,PGDIR(r2)
- bge- 112f
- mfspr r2,SPRN_SRR1 /* and MSR_PR bit from SRR1 */
- rlwimi r1,r2,32-12,29,29 /* shift MSR_PR to _PAGE_USER posn */
- lis r2,swapper_pg_dir@ha /* if kernel address, use */
- addi r2,r2,swapper_pg_dir@l /* kernel page table */
-112: tophys(r2,r2)
- rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */
- lwz r2,0(r2) /* get pmd entry */
+ mfspr r0,SPRN_DMISS
+ mfspr r2, SPRN_SDR1
+ rlwinm r1, r2, 28, 0xfffff000
+ rlwimi r1,r0,12,20,29 /* insert top 10 bits of address */
+ lwz r2,0(r1) /* get pmd entry */
+ rlwinm r3, r0, 4, 0xf
rlwinm. r2,r2,0,0,19 /* extract address of pte page */
- beq- DataAddressInvalid /* return if no mapping */
- rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */
- lwz r0,0(r2) /* get linux-style pte */
- andc. r1,r1,r0 /* check access & ~permission */
+ subi r3, r3, (TASK_SIZE >> 28) & 0xf
+ beq- 2f /* bail if no mapping */
+1:
+ rlwimi r2,r0,22,20,29 /* insert next 10 bits of address */
+ lwz r2,0(r2) /* get linux-style pte */
+ li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED
+ andc. r1,r1,r2 /* check access & ~permission */
bne- DataAddressInvalid /* return if access not permitted */
- ori r0,r0,_PAGE_ACCESSED|_PAGE_DIRTY
- /*
- * NOTE! We are assuming this is not an SMP system, otherwise
- * we would need to update the pte atomically with lwarx/stwcx.
- */
- stw r0,0(r2) /* update PTE (accessed/dirty bits) */
/* Convert linux-style PTE to low word of PPC-style PTE */
- rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */
- li r1,0xe05 /* clear out reserved bits & PP lsb */
- andc r1,r0,r1 /* PP = user? 2: 0 */
+ rlwimi r2,r3,1,31,31 /* userspace ? -> PP lsb */
+ li r1,0xe06 /* clear out reserved bits & PP msb */
+ andc r1,r2,r1 /* PP = user? 1: 0 */
BEGIN_FTR_SECTION
rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */
END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
@@ -689,83 +573,182 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
mfspr r2,SPRN_SRR1 /* Need to restore CR0 */
mtcrf 0x80,r2
BEGIN_MMU_FTR_SECTION
- li r0,1
+ li r3,1
mfspr r1,SPRN_SPRG_603_LRU
- rlwinm r2,r3,20,27,31 /* Get Address bits 15:19 */
- slw r0,r0,r2
- xor r1,r0,r1
- srw r0,r1,r2
+ rlwinm r2,r0,20,27,31 /* Get Address bits 15:19 */
+ slw r3,r3,r2
+ xor r1,r3,r1
+ srw r3,r1,r2
mtspr SPRN_SPRG_603_LRU,r1
mfspr r2,SPRN_SRR1
- rlwimi r2,r0,31-14,14,14
+ rlwimi r2,r3,31-14,14,14
mtspr SPRN_SRR1,r2
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
- tlbld r3
+ mtcrf 0x80,r2
+ tlbld r0
+ rfi
+MMU_FTR_SECTION_ELSE
+ mfspr r2,SPRN_SRR1 /* Need to restore CR0 */
+ mtcrf 0x80,r2
+ tlbld r0
rfi
+ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
+
+2: lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha
+ addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */
+ rlwimi r2,r0,12,20,29 /* insert top 10 bits of address */
+ lwz r2,0(r2) /* get pmd entry */
+ cmpwi cr0,r2,0
+ beq- DataAddressInvalid /* return if no mapping */
+ stw r2,0(r1)
+ rlwinm r2,r2,0,0,19 /* extract address of pte page */
+ b 1b
#ifndef CONFIG_ALTIVEC
#define altivec_assist_exception unknown_exception
#endif
- EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_EE)
- EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_EE)
- EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1600, Trap_16, altivec_assist_exception, EXC_XFER_EE)
- EXCEPTION(0x1700, Trap_17, TAUException, EXC_XFER_STD)
- EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2000, RunMode, RunModeException, EXC_XFER_EE)
- EXCEPTION(0x2100, Trap_21, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2200, Trap_22, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2300, Trap_23, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2400, Trap_24, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2500, Trap_25, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2600, Trap_26, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2700, Trap_27, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2800, Trap_28, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2900, Trap_29, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2a00, Trap_2a, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2b00, Trap_2b, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2c00, Trap_2c, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2d00, Trap_2d, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2e00, Trap_2e, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2f00, MOLTrampoline, unknown_exception, EXC_XFER_EE_LITE)
-
- .globl mol_trampoline
- .set mol_trampoline, i0x2f00
+#ifndef CONFIG_TAU_INT
+#define TAUException unknown_async_exception
+#endif
+
+ EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception)
+ EXCEPTION(0x1400, SMI, SMIException)
+ EXCEPTION(0x1500, Trap_15, unknown_exception)
+ EXCEPTION(0x1600, Trap_16, altivec_assist_exception)
+ EXCEPTION(0x1700, Trap_17, TAUException)
+ EXCEPTION(0x1800, Trap_18, unknown_exception)
+ EXCEPTION(0x1900, Trap_19, unknown_exception)
+ EXCEPTION(0x1a00, Trap_1a, unknown_exception)
+ EXCEPTION(0x1b00, Trap_1b, unknown_exception)
+ EXCEPTION(0x1c00, Trap_1c, unknown_exception)
+ EXCEPTION(0x1d00, Trap_1d, unknown_exception)
+ EXCEPTION(0x1e00, Trap_1e, unknown_exception)
+ EXCEPTION(0x1f00, Trap_1f, unknown_exception)
+ EXCEPTION(0x2000, RunMode, RunModeException)
+ EXCEPTION(0x2100, Trap_21, unknown_exception)
+ EXCEPTION(0x2200, Trap_22, unknown_exception)
+ EXCEPTION(0x2300, Trap_23, unknown_exception)
+ EXCEPTION(0x2400, Trap_24, unknown_exception)
+ EXCEPTION(0x2500, Trap_25, unknown_exception)
+ EXCEPTION(0x2600, Trap_26, unknown_exception)
+ EXCEPTION(0x2700, Trap_27, unknown_exception)
+ EXCEPTION(0x2800, Trap_28, unknown_exception)
+ EXCEPTION(0x2900, Trap_29, unknown_exception)
+ EXCEPTION(0x2a00, Trap_2a, unknown_exception)
+ EXCEPTION(0x2b00, Trap_2b, unknown_exception)
+ EXCEPTION(0x2c00, Trap_2c, unknown_exception)
+ EXCEPTION(0x2d00, Trap_2d, unknown_exception)
+ EXCEPTION(0x2e00, Trap_2e, unknown_exception)
+ EXCEPTION(0x2f00, Trap_2f, unknown_exception)
+ __HEAD
. = 0x3000
+#ifdef CONFIG_PPC_BOOK3S_604
+.macro save_regs_thread thread
+ stw r0, THR0(\thread)
+ stw r3, THR3(\thread)
+ stw r4, THR4(\thread)
+ stw r5, THR5(\thread)
+ stw r6, THR6(\thread)
+ stw r8, THR8(\thread)
+ stw r9, THR9(\thread)
+ mflr r0
+ stw r0, THLR(\thread)
+ mfctr r0
+ stw r0, THCTR(\thread)
+.endm
+
+.macro restore_regs_thread thread
+ lwz r0, THLR(\thread)
+ mtlr r0
+ lwz r0, THCTR(\thread)
+ mtctr r0
+ lwz r0, THR0(\thread)
+ lwz r3, THR3(\thread)
+ lwz r4, THR4(\thread)
+ lwz r5, THR5(\thread)
+ lwz r6, THR6(\thread)
+ lwz r8, THR8(\thread)
+ lwz r9, THR9(\thread)
+.endm
+
+hash_page_dsi:
+ save_regs_thread r10
+ mfdsisr r3
+ mfdar r4
+ mfsrr0 r5
+ mfsrr1 r9
+ rlwinm r3, r3, 32 - 15, _PAGE_WRITE /* DSISR_STORE -> _PAGE_WRITE */
+ ori r3, r3, _PAGE_PRESENT | _PAGE_READ
+ bl hash_page
+ mfspr r10, SPRN_SPRG_THREAD
+ restore_regs_thread r10
+ b .Lhash_page_dsi_cont
+
+hash_page_isi:
+ mr r11, r10
+ mfspr r10, SPRN_SPRG_THREAD
+ save_regs_thread r10
+ li r3, _PAGE_PRESENT | _PAGE_EXEC
+ lwz r4, SRR0(r10)
+ lwz r9, SRR1(r10)
+ bl hash_page
+ mfspr r10, SPRN_SPRG_THREAD
+ restore_regs_thread r10
+ mr r10, r11
+ b .Lhash_page_isi_cont
+
+ .globl fast_hash_page_return
+fast_hash_page_return:
+ andis. r10, r9, SRR1_ISI_NOPT@h /* Set on ISI, cleared on DSI */
+ mfspr r10, SPRN_SPRG_THREAD
+ restore_regs_thread r10
+ bne 1f
+
+ /* DSI */
+ mtcr r11
+ lwz r11, THR11(r10)
+ mfspr r10, SPRN_SPRG_SCRATCH2
+ rfi
+
+1: /* ISI */
+ mtcr r11
+ mfspr r11, SPRN_SPRG_SCRATCH1
+ mfspr r10, SPRN_SPRG_SCRATCH0
+ rfi
+#endif /* CONFIG_PPC_BOOK3S_604 */
+
+#ifdef CONFIG_VMAP_STACK
+ vmap_stack_overflow_exception
+#endif
+
+ __HEAD
AltiVecUnavailable:
- EXCEPTION_PROLOG
+ EXCEPTION_PROLOG 0xf20 AltiVecUnavailable
#ifdef CONFIG_ALTIVEC
beq 1f
bl load_up_altivec /* if from user, just load it up */
b fast_exception_return
#endif /* CONFIG_ALTIVEC */
-1: addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_EE_LITE(0xf20, altivec_unavailable_exception)
+1: prepare_transfer_to_handler
+ bl altivec_unavailable_exception
+ b interrupt_return
+ __HEAD
PerformanceMonitor:
- EXCEPTION_PROLOG
- addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_STD(0xf00, performance_monitor_exception)
+ EXCEPTION_PROLOG 0xf00 PerformanceMonitor
+ prepare_transfer_to_handler
+ bl performance_monitor_exception
+ b interrupt_return
+ __HEAD
/*
* This code is jumped to from the startup code to copy
* the kernel image to physical address PHYSICAL_START.
*/
relocate_kernel:
- addis r9,r26,klimit@ha /* fetch klimit */
- lwz r25,klimit@l(r9)
- addis r25,r25,-KERNELBASE@h
lis r3,PHYSICAL_START@h /* Destination base address */
li r6,0 /* Destination offset */
li r5,0x4000 /* # bytes of memory to copy */
@@ -773,7 +756,8 @@ relocate_kernel:
addi r0,r3,4f@l /* jump to the address of 4f */
mtctr r0 /* in copy and do the rest. */
bctr /* jump to the copy */
-4: mr r5,r25
+4: lis r5,_end-KERNELBASE@h
+ ori r5,r5,_end-KERNELBASE@l
bl copy_and_flush /* copy the rest */
b turn_on_mmu
@@ -783,7 +767,7 @@ relocate_kernel:
* r3 = dest addr, r4 = source addr, r5 = copy limit, r6 = start offset
* on exit, r3, r4, r5 are unchanged, r6 is updated to be >= r5.
*/
-_ENTRY(copy_and_flush)
+_GLOBAL(copy_and_flush)
addi r5,r5,-4
addi r6,r6,-4
4: li r0,L1_CACHE_BYTES/4
@@ -826,7 +810,6 @@ __secondary_start_pmac_0:
set to map the 0xf0000000 - 0xffffffff region */
mfmsr r0
rlwinm r0,r0,0,28,26 /* clear DR (0x10) */
- SYNC
mtmsr r0
isync
@@ -838,44 +821,44 @@ __secondary_start:
lis r3,-KERNELBASE@h
mr r4,r24
bl call_setup_cpu /* Call setup_cpu for this CPU */
-#ifdef CONFIG_6xx
lis r3,-KERNELBASE@h
bl init_idle_6xx
-#endif /* CONFIG_6xx */
- /* get current_thread_info and current */
- lis r1,secondary_ti@ha
- tophys(r1,r1)
- lwz r1,secondary_ti@l(r1)
- tophys(r2,r1)
- lwz r2,TI_TASK(r2)
+ /* get current's stack and current */
+ lis r2,secondary_current@ha
+ tophys(r2,r2)
+ lwz r2,secondary_current@l(r2)
+ tophys(r1,r2)
+ lwz r1,TASK_STACK(r1)
/* stack */
- addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
+ addi r1,r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE
li r0,0
tophys(r3,r1)
stw r0,0(r3)
/* load up the MMU */
+ bl load_segment_registers
bl load_up_mmu
/* ptr to phys current thread */
tophys(r4,r2)
addi r4,r4,THREAD /* phys address of our thread_struct */
- CLR_TOP32(r4)
mtspr SPRN_SPRG_THREAD,r4
- li r3,0
- mtspr SPRN_SPRG_RTAS,r3 /* 0 => not in RTAS */
+BEGIN_MMU_FTR_SECTION
+ lis r4, (swapper_pg_dir - PAGE_OFFSET)@h
+ ori r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l
+ rlwinm r4, r4, 4, 0xffff01ff
+ mtspr SPRN_SDR1, r4
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE)
/* enable MMU and jump to start_secondary */
li r4,MSR_KERNEL
- FIX_SRR1(r4,r5)
lis r3,start_secondary@h
ori r3,r3,start_secondary@l
mtspr SPRN_SRR0,r3
mtspr SPRN_SRR1,r4
- SYNC
- RFI
+ rfi
#endif /* CONFIG_SMP */
#ifdef CONFIG_KVM_BOOK3S_HANDLER
@@ -883,46 +866,37 @@ __secondary_start:
#endif
/*
- * Those generic dummy functions are kept for CPUs not
- * included in CONFIG_6xx
- */
-#if !defined(CONFIG_6xx)
-_ENTRY(__save_cpu_setup)
- blr
-_ENTRY(__restore_cpu_setup)
- blr
-#endif /* !defined(CONFIG_6xx) */
-
-
-/*
* Load stuff into the MMU. Intended to be called with
* IR=0 and DR=0.
*/
-load_up_mmu:
+SYM_FUNC_START_LOCAL(early_hash_table)
sync /* Force all PTE updates to finish */
isync
tlbia /* Clear all TLB entries */
sync /* wait for tlbia/tlbie to finish */
TLBSYNC /* ... on all CPUs */
/* Load the SDR1 register (hash table base & size) */
+ lis r6, early_hash - PAGE_OFFSET@h
+ ori r6, r6, 3 /* 256kB table */
+ mtspr SPRN_SDR1, r6
+ blr
+SYM_FUNC_END(early_hash_table)
+
+SYM_FUNC_START_LOCAL(load_up_mmu)
+ sync /* Force all PTE updates to finish */
+ isync
+ tlbia /* Clear all TLB entries */
+ sync /* wait for tlbia/tlbie to finish */
+ TLBSYNC /* ... on all CPUs */
+BEGIN_MMU_FTR_SECTION
+ /* Load the SDR1 register (hash table base & size) */
lis r6,_SDR1@ha
tophys(r6,r6)
lwz r6,_SDR1@l(r6)
mtspr SPRN_SDR1,r6
- li r0,16 /* load up segment register values */
- mtctr r0 /* for context 0 */
- lis r3,0x2000 /* Ku = 1, VSID = 0 */
- li r4,0
-3: mtsrin r3,r4
- addi r3,r3,0x111 /* increment VSID */
- addis r4,r4,0x1000 /* address of next segment */
- bdnz 3b
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
-/* Load the BAT registers with the values set up by MMU_init.
- MMU_init takes care of whether we're on a 601 or not. */
- mfpvr r3
- srwi r3,r3,16
- cmpwi r3,1
+/* Load the BAT registers with the values set up by MMU_init. */
lis r3,BATS@ha
addi r3,r3,BATS@l
tophys(r3,r3)
@@ -937,6 +911,31 @@ BEGIN_MMU_FTR_SECTION
LOAD_BAT(7,r3,r4,r5)
END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
blr
+SYM_FUNC_END(load_up_mmu)
+
+_GLOBAL(load_segment_registers)
+ li r0, NUM_USER_SEGMENTS /* load up user segment register values */
+ mtctr r0 /* for context 0 */
+#ifdef CONFIG_PPC_KUEP
+ lis r3, SR_NX@h /* Kp = 0, Ks = 0, VSID = 0 */
+#else
+ li r3, 0 /* Kp = 0, Ks = 0, VSID = 0 */
+#endif
+ li r4, 0
+3: mtsrin r3, r4
+ addi r3, r3, 0x111 /* increment VSID */
+ addis r4, r4, 0x1000 /* address of next segment */
+ bdnz 3b
+ li r0, 16 - NUM_USER_SEGMENTS /* load up kernel segment registers */
+ mtctr r0 /* for context 0 */
+ rlwinm r3, r3, 0, ~SR_NX /* Nx = 0 */
+ rlwinm r3, r3, 0, ~SR_KS /* Ks = 0 */
+ oris r3, r3, SR_KP@h /* Kp = 1 */
+3: mtsrin r3, r4
+ addi r3, r3, 0x111 /* increment VSID */
+ addis r4, r4, 0x1000 /* address of next segment */
+ bdnz 3b
+ blr
/*
* This is where the main kernel code starts.
@@ -949,25 +948,32 @@ start_here:
/* ptr to phys current thread */
tophys(r4,r2)
addi r4,r4,THREAD /* init task's THREAD */
- CLR_TOP32(r4)
mtspr SPRN_SPRG_THREAD,r4
- li r3,0
- mtspr SPRN_SPRG_RTAS,r3 /* 0 => not in RTAS */
+BEGIN_MMU_FTR_SECTION
+ lis r4, (swapper_pg_dir - PAGE_OFFSET)@h
+ ori r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l
+ rlwinm r4, r4, 4, 0xffff01ff
+ mtspr SPRN_SDR1, r4
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE)
/* stack */
lis r1,init_thread_union@ha
addi r1,r1,init_thread_union@l
li r0,0
- stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
+ stwu r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1)
/*
* Do early platform-specific initialization,
* and set up the MMU.
*/
+#ifdef CONFIG_KASAN
+ bl kasan_early_init
+#endif
li r3,0
mr r4,r31
bl machine_init
bl __save_cpu_setup
bl MMU_init
+ bl MMU_init_hw_patch
/*
* Go back to running unmapped so we can load up new values
@@ -978,11 +984,11 @@ start_here:
ori r4,r4,2f@l
tophys(r4,r4)
li r3,MSR_KERNEL & ~(MSR_IR|MSR_DR)
- FIX_SRR1(r3,r5)
+
+ .align 4
mtspr SPRN_SRR0,r4
mtspr SPRN_SRR1,r3
- SYNC
- RFI
+ rfi
/* Load up the kernel context */
2: bl load_up_mmu
@@ -993,7 +999,7 @@ start_here:
*/
lis r5, abatron_pteptrs@h
ori r5, r5, abatron_pteptrs@l
- stw r5, 0xf0(r0) /* This much match your Abatron config */
+ stw r5, 0xf0(0) /* This much match your Abatron config */
lis r6, swapper_pg_dir@h
ori r6, r6, swapper_pg_dir@l
tophys(r5, r5)
@@ -1002,52 +1008,11 @@ start_here:
/* Now turn on the MMU for real! */
li r4,MSR_KERNEL
- FIX_SRR1(r4,r5)
lis r3,start_kernel@h
ori r3,r3,start_kernel@l
mtspr SPRN_SRR0,r3
mtspr SPRN_SRR1,r4
- SYNC
- RFI
-
-/*
- * void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next);
- *
- * Set up the segment registers for a new context.
- */
-_ENTRY(switch_mmu_context)
- lwz r3,MMCONTEXTID(r4)
- cmpwi cr0,r3,0
- blt- 4f
- mulli r3,r3,897 /* multiply context by skew factor */
- rlwinm r3,r3,4,8,27 /* VSID = (context & 0xfffff) << 4 */
- addis r3,r3,0x6000 /* Set Ks, Ku bits */
- li r0,NUM_USER_SEGMENTS
- mtctr r0
-
-#ifdef CONFIG_BDI_SWITCH
- /* Context switch the PTE pointer for the Abatron BDI2000.
- * The PGDIR is passed as second argument.
- */
- lwz r4,MM_PGD(r4)
- lis r5, KERNELBASE@h
- lwz r5, 0xf0(r5)
- stw r4, 0x4(r5)
-#endif
- li r4,0
- isync
-3:
- mtsrin r3,r4
- addi r3,r3,0x111 /* next VSID */
- rlwinm r3,r3,0,8,3 /* clear out any overflow from VSID field */
- addis r4,r4,0x1000 /* address of next segment */
- bdnz 3b
- sync
- isync
- blr
-4: trap
- EMIT_BUG_ENTRY 4b,__FILE__,__LINE__,0
- blr
+ rfi
/*
* An undocumented "feature" of 604e requires that the v bit
@@ -1057,12 +1022,8 @@ _ENTRY(switch_mmu_context)
* this makes sure it's done.
* -- Cort
*/
-clear_bats:
+SYM_FUNC_START_LOCAL(clear_bats)
li r10,0
- mfspr r9,SPRN_PVR
- rlwinm r9,r9,16,16,31 /* r9 = 1 for 601, 4 for 604 */
- cmpwi r9, 1
- beq 1f
mtspr SPRN_DBAT0U,r10
mtspr SPRN_DBAT0L,r10
@@ -1072,7 +1033,6 @@ clear_bats:
mtspr SPRN_DBAT2L,r10
mtspr SPRN_DBAT3U,r10
mtspr SPRN_DBAT3L,r10
-1:
mtspr SPRN_IBAT0U,r10
mtspr SPRN_IBAT0L,r10
mtspr SPRN_IBAT1U,r10
@@ -1106,52 +1066,70 @@ BEGIN_MMU_FTR_SECTION
mtspr SPRN_IBAT7L,r10
END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
blr
+SYM_FUNC_END(clear_bats)
+
+_GLOBAL(update_bats)
+ lis r4, 1f@h
+ ori r4, r4, 1f@l
+ tophys(r4, r4)
+ mfmsr r6
+ mflr r7
+ li r3, MSR_KERNEL & ~(MSR_IR | MSR_DR)
+ rlwinm r0, r6, 0, ~MSR_RI
+ rlwinm r0, r0, 0, ~MSR_EE
+ mtmsr r0
-flush_tlbs:
+ .align 4
+ mtspr SPRN_SRR0, r4
+ mtspr SPRN_SRR1, r3
+ rfi
+1: bl clear_bats
+ lis r3, BATS@ha
+ addi r3, r3, BATS@l
+ tophys(r3, r3)
+ LOAD_BAT(0, r3, r4, r5)
+ LOAD_BAT(1, r3, r4, r5)
+ LOAD_BAT(2, r3, r4, r5)
+ LOAD_BAT(3, r3, r4, r5)
+BEGIN_MMU_FTR_SECTION
+ LOAD_BAT(4, r3, r4, r5)
+ LOAD_BAT(5, r3, r4, r5)
+ LOAD_BAT(6, r3, r4, r5)
+ LOAD_BAT(7, r3, r4, r5)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
+ li r3, MSR_KERNEL & ~(MSR_IR | MSR_DR | MSR_RI)
+ mtmsr r3
+ mtspr SPRN_SRR0, r7
+ mtspr SPRN_SRR1, r6
+ rfi
+
+SYM_FUNC_START_LOCAL(flush_tlbs)
lis r10, 0x40
1: addic. r10, r10, -0x1000
tlbie r10
bgt 1b
sync
blr
+SYM_FUNC_END(flush_tlbs)
-mmu_off:
+SYM_FUNC_START_LOCAL(mmu_off)
addi r4, r3, __after_mmu_off - _start
mfmsr r3
andi. r0,r3,MSR_DR|MSR_IR /* MMU enabled? */
beqlr
andc r3,r3,r0
+
+ .align 4
mtspr SPRN_SRR0,r4
mtspr SPRN_SRR1,r3
sync
- RFI
+ rfi
+SYM_FUNC_END(mmu_off)
-/*
- * On 601, we use 3 BATs to map up to 24M of RAM at _PAGE_OFFSET
- * (we keep one for debugging) and on others, we use one 256M BAT.
- */
-initial_bats:
+/* We use one BAT to map up to 256M of RAM at _PAGE_OFFSET */
+SYM_FUNC_START_LOCAL(initial_bats)
lis r11,PAGE_OFFSET@h
- mfspr r9,SPRN_PVR
- rlwinm r9,r9,16,16,31 /* r9 = 1 for 601, 4 for 604 */
- cmpwi 0,r9,1
- bne 4f
- ori r11,r11,4 /* set up BAT registers for 601 */
- li r8,0x7f /* valid, block length = 8MB */
- mtspr SPRN_IBAT0U,r11 /* N.B. 601 has valid bit in */
- mtspr SPRN_IBAT0L,r8 /* lower BAT register */
- addis r11,r11,0x800000@h
- addis r8,r8,0x800000@h
- mtspr SPRN_IBAT1U,r11
- mtspr SPRN_IBAT1L,r8
- addis r11,r11,0x800000@h
- addis r8,r8,0x800000@h
- mtspr SPRN_IBAT2U,r11
- mtspr SPRN_IBAT2L,r8
- isync
- blr
-
-4: tophys(r8,r11)
+ tophys(r8,r11)
#ifdef CONFIG_SMP
ori r8,r8,0x12 /* R/W access, M=1 */
#else
@@ -1159,16 +1137,16 @@ initial_bats:
#endif /* CONFIG_SMP */
ori r11,r11,BL_256M<<2|0x2 /* set up BAT registers for 604 */
- mtspr SPRN_DBAT0L,r8 /* N.B. 6xx (not 601) have valid */
+ mtspr SPRN_DBAT0L,r8 /* N.B. 6xx have valid */
mtspr SPRN_DBAT0U,r11 /* bit in upper BAT register */
mtspr SPRN_IBAT0L,r8
mtspr SPRN_IBAT0U,r11
isync
blr
-
+SYM_FUNC_END(initial_bats)
#ifdef CONFIG_BOOTX_TEXT
-setup_disp_bat:
+SYM_FUNC_START_LOCAL(setup_disp_bat)
/*
* setup the display bat prepared for us in prom.c
*/
@@ -1181,20 +1159,14 @@ setup_disp_bat:
beqlr
lwz r11,0(r8)
lwz r8,4(r8)
- mfspr r9,SPRN_PVR
- rlwinm r9,r9,16,16,31 /* r9 = 1 for 601, 4 for 604 */
- cmpwi 0,r9,1
- beq 1f
mtspr SPRN_DBAT3L,r8
mtspr SPRN_DBAT3U,r11
blr
-1: mtspr SPRN_IBAT3L,r8
- mtspr SPRN_IBAT3U,r11
- blr
+SYM_FUNC_END(setup_disp_bat)
#endif /* CONFIG_BOOTX_TEXT */
#ifdef CONFIG_PPC_EARLY_DEBUG_CPM
-setup_cpm_bat:
+SYM_FUNC_START_LOCAL(setup_cpm_bat)
lis r8, 0xf000
ori r8, r8, 0x002a
mtspr SPRN_DBAT1L, r8
@@ -1204,10 +1176,11 @@ setup_cpm_bat:
mtspr SPRN_DBAT1U, r11
blr
+SYM_FUNC_END(setup_cpm_bat)
#endif
#ifdef CONFIG_PPC_EARLY_DEBUG_USBGECKO
-setup_usbgecko_bat:
+SYM_FUNC_START_LOCAL(setup_usbgecko_bat)
/* prepare a BAT for early io */
#if defined(CONFIG_GAMECUBE)
lis r8, 0x0c00
@@ -1226,71 +1199,7 @@ setup_usbgecko_bat:
mtspr SPRN_DBAT1L, r8
mtspr SPRN_DBAT1U, r11
blr
+SYM_FUNC_END(setup_usbgecko_bat)
#endif
-#ifdef CONFIG_8260
-/* Jump into the system reset for the rom.
- * We first disable the MMU, and then jump to the ROM reset address.
- *
- * r3 is the board info structure, r4 is the location for starting.
- * I use this for building a small kernel that can load other kernels,
- * rather than trying to write or rely on a rom monitor that can tftp load.
- */
- .globl m8260_gorom
-m8260_gorom:
- mfmsr r0
- rlwinm r0,r0,0,17,15 /* clear MSR_EE in r0 */
- sync
- mtmsr r0
- sync
- mfspr r11, SPRN_HID0
- lis r10, 0
- ori r10,r10,HID0_ICE|HID0_DCE
- andc r11, r11, r10
- mtspr SPRN_HID0, r11
- isync
- li r5, MSR_ME|MSR_RI
- lis r6,2f@h
- addis r6,r6,-KERNELBASE@h
- ori r6,r6,2f@l
- mtspr SPRN_SRR0,r6
- mtspr SPRN_SRR1,r5
- isync
- sync
- rfi
-2:
- mtlr r4
- blr
-#endif
-
-
-/*
- * We put a few things here that have to be page-aligned.
- * This stuff goes at the beginning of the data segment,
- * which is page-aligned.
- */
.data
- .globl sdata
-sdata:
- .globl empty_zero_page
-empty_zero_page:
- .space 4096
-
- .globl swapper_pg_dir
-swapper_pg_dir:
- .space PGD_TABLE_SIZE
-
- .globl intercept_table
-intercept_table:
- .long 0, 0, i0x200, i0x300, i0x400, 0, i0x600, i0x700
- .long i0x800, 0, 0, 0, 0, i0xd00, 0, 0
- .long 0, 0, 0, i0x1300, 0, 0, 0, 0
- .long 0, 0, 0, 0, 0, 0, 0, 0
- .long 0, 0, 0, 0, 0, 0, 0, 0
- .long 0, 0, 0, 0, 0, 0, 0, 0
-
-/* Room for two PTE pointers, usually the kernel and current user pointers
- * to their respective root page table.
- */
-abatron_pteptrs:
- .space 8
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index a620203f7de3..75471fb6fb10 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -1,9 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __HEAD_BOOKE_H__
#define __HEAD_BOOKE_H__
#include <asm/ptrace.h> /* for STACK_FRAME_REGS_MARKER */
#include <asm/kvm_asm.h>
#include <asm/kvm_booke_hv_asm.h>
+#include <asm/thread_info.h> /* for THREAD_SHIFT */
+
+#ifdef __ASSEMBLER__
/*
* Macros used for common Book-e exception handling
@@ -31,7 +35,17 @@
*/
#define THREAD_NORMSAVE(offset) (THREAD_NORMSAVES + (offset * 4))
-#define NORMAL_EXCEPTION_PROLOG(intno) \
+#ifdef CONFIG_PPC_E500
+#define BOOKE_CLEAR_BTB(reg) \
+START_BTB_FLUSH_SECTION \
+ BTB_FLUSH(reg) \
+END_BTB_FLUSH_SECTION
+#else
+#define BOOKE_CLEAR_BTB(reg)
+#endif
+
+
+#define NORMAL_EXCEPTION_PROLOG(trapno, intno) \
mtspr SPRN_SPRG_WSCRATCH0, r10; /* save one register */ \
mfspr r10, SPRN_SPRG_THREAD; \
stw r11, THREAD_NORMSAVE(0)(r10); \
@@ -40,10 +54,13 @@
mfspr r11, SPRN_SRR1; \
DO_KVM BOOKE_INTERRUPT_##intno SPRN_SRR1; \
andi. r11, r11, MSR_PR; /* check whether user or kernel */\
+ LOAD_REG_IMMEDIATE(r11, MSR_KERNEL); \
+ mtmsr r11; \
mr r11, r1; \
beq 1f; \
+ BOOKE_CLEAR_BTB(r11) \
/* if from user, start at top of this thread's kernel stack */ \
- lwz r11, THREAD_INFO-THREAD(r10); \
+ lwz r11, TASK_STACK - THREAD(r10); \
ALLOC_STACK_FRAME(r11, THREAD_SIZE); \
1 : subi r11, r11, INT_FRAME_SIZE; /* Allocate exception frame */ \
stw r13, _CCR(r11); /* save various registers */ \
@@ -62,19 +79,76 @@
stw r1, 0(r11); \
mr r1, r11; \
rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\
- stw r0,GPR0(r11); \
- lis r10, STACK_FRAME_REGS_MARKER@ha;/* exception frame marker */ \
- addi r10, r10, STACK_FRAME_REGS_MARKER@l; \
- stw r10, 8(r11); \
- SAVE_4GPRS(3, r11); \
- SAVE_2GPRS(7, r11)
-
-/* To handle the additional exception priority levels on 40x and Book-E
+ COMMON_EXCEPTION_PROLOG_END trapno
+
+.macro COMMON_EXCEPTION_PROLOG_END trapno
+ stw r0,GPR0(r1)
+ lis r10, STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
+ addi r10, r10, STACK_FRAME_REGS_MARKER@l
+ stw r10, STACK_INT_FRAME_MARKER(r1)
+ li r10, \trapno
+ stw r10,_TRAP(r1)
+ SAVE_GPRS(3, 8, r1)
+ SAVE_NVGPRS(r1)
+ stw r2,GPR2(r1)
+ stw r12,_NIP(r1)
+ stw r9,_MSR(r1)
+ mfctr r10
+ mfspr r2,SPRN_SPRG_THREAD
+ stw r10,_CTR(r1)
+ tovirt(r2, r2)
+ mfspr r10,SPRN_XER
+ addi r2, r2, -THREAD
+ stw r10,_XER(r1)
+ addi r3,r1,STACK_INT_FRAME_REGS
+.endm
+
+.macro prepare_transfer_to_handler
+#ifdef CONFIG_PPC_E500
+ andi. r12,r9,MSR_PR
+ bne 777f
+ bl prepare_transfer_to_handler
+777:
+#endif
+.endm
+
+.macro SYSCALL_ENTRY trapno intno srr1
+ mfspr r10, SPRN_SPRG_THREAD
+#ifdef CONFIG_KVM_BOOKE_HV
+BEGIN_FTR_SECTION
+ mtspr SPRN_SPRG_WSCRATCH0, r10
+ stw r11, THREAD_NORMSAVE(0)(r10)
+ stw r13, THREAD_NORMSAVE(2)(r10)
+ mfcr r13 /* save CR in r13 for now */
+ mfspr r11, SPRN_SRR1
+ mtocrf 0x80, r11 /* check MSR[GS] without clobbering reg */
+ bf 3, 1975f
+ b kvmppc_handler_\intno\()_\srr1
+1975:
+ mr r12, r13
+ lwz r13, THREAD_NORMSAVE(2)(r10)
+FTR_SECTION_ELSE
+ mfcr r12
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
+#else
+ mfcr r12
+#endif
+ mfspr r9, SPRN_SRR1
+ BOOKE_CLEAR_BTB(r11)
+ mr r11, r1
+ lwz r1, TASK_STACK - THREAD(r10)
+ rlwinm r12,r12,0,4,2 /* Clear SO bit in CR */
+ ALLOC_STACK_FRAME(r1, THREAD_SIZE - INT_FRAME_SIZE)
+ stw r12, _CCR(r1)
+ mfspr r12,SPRN_SRR0
+ stw r12,_NIP(r1)
+ b transfer_to_syscall /* jump to handler */
+.endm
+
+/* To handle the additional exception priority levels on Book-E
* processors we allocate a stack per additional priority level.
*
- * On 40x critical is the only additional level
* On 44x/e500 we have critical and machine check
- * On e200 we have critical and debug (machine check occurs via critical)
*
* Additionally we reserve a SPRG for each priority level so we can free up a
* GPR to use as the base for indirect access to the exception stacks. This
@@ -90,23 +164,21 @@
#define MC_STACK_BASE mcheckirq_ctx
#define CRIT_STACK_BASE critirq_ctx
-/* only on e500mc/e200 */
+/* only on e500mc */
#define DBG_STACK_BASE dbgirq_ctx
-#define EXC_LVL_FRAME_OVERHEAD (THREAD_SIZE - INT_FRAME_SIZE - EXC_LVL_SIZE)
-
#ifdef CONFIG_SMP
#define BOOKE_LOAD_EXC_LEVEL_STACK(level) \
mfspr r8,SPRN_PIR; \
slwi r8,r8,2; \
addis r8,r8,level##_STACK_BASE@ha; \
lwz r8,level##_STACK_BASE@l(r8); \
- addi r8,r8,EXC_LVL_FRAME_OVERHEAD;
+ addi r8,r8,THREAD_SIZE - INT_FRAME_SIZE;
#else
#define BOOKE_LOAD_EXC_LEVEL_STACK(level) \
lis r8,level##_STACK_BASE@ha; \
lwz r8,level##_STACK_BASE@l(r8); \
- addi r8,r8,EXC_LVL_FRAME_OVERHEAD;
+ addi r8,r8,THREAD_SIZE - INT_FRAME_SIZE;
#endif
/*
@@ -117,7 +189,7 @@
* registers as the normal prolog above. Instead we use a portion of the
* critical/machine check exception stack at low physical addresses.
*/
-#define EXC_LEVEL_EXCEPTION_PROLOG(exc_level, intno, exc_level_srr0, exc_level_srr1) \
+#define EXC_LEVEL_EXCEPTION_PROLOG(exc_level, trapno, intno, exc_level_srr0, exc_level_srr1) \
mtspr SPRN_SPRG_WSCRATCH_##exc_level,r8; \
BOOKE_LOAD_EXC_LEVEL_STACK(exc_level);/* r8 points to the exc_level stack*/ \
stw r9,GPR9(r8); /* save various registers */\
@@ -127,10 +199,13 @@
stw r9,_CCR(r8); /* save CR on stack */\
mfspr r11,exc_level_srr1; /* check whether user or kernel */\
DO_KVM BOOKE_INTERRUPT_##intno exc_level_srr1; \
+ BOOKE_CLEAR_BTB(r10) \
andi. r11,r11,MSR_PR; \
+ LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)); \
+ mtmsr r11; \
mfspr r11,SPRN_SPRG_THREAD; /* if from user, start at top of */\
- lwz r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\
- addi r11,r11,EXC_LVL_FRAME_OVERHEAD; /* allocate stack frame */\
+ lwz r11, TASK_STACK - THREAD(r11); /* this thread's kernel stack */\
+ addi r11,r11,THREAD_SIZE - INT_FRAME_SIZE; /* allocate stack frame */\
beq 1f; \
/* COMING FROM USER MODE */ \
stw r9,_CCR(r11); /* save CR */\
@@ -142,13 +217,7 @@
stw r10,GPR11(r11); \
b 2f; \
/* COMING FROM PRIV MODE */ \
-1: lwz r9,TI_FLAGS-EXC_LVL_FRAME_OVERHEAD(r11); \
- lwz r10,TI_PREEMPT-EXC_LVL_FRAME_OVERHEAD(r11); \
- stw r9,TI_FLAGS-EXC_LVL_FRAME_OVERHEAD(r8); \
- stw r10,TI_PREEMPT-EXC_LVL_FRAME_OVERHEAD(r8); \
- lwz r9,TI_TASK-EXC_LVL_FRAME_OVERHEAD(r11); \
- stw r9,TI_TASK-EXC_LVL_FRAME_OVERHEAD(r8); \
- mr r11,r8; \
+1: mr r11, r8; \
2: mfspr r8,SPRN_SPRG_RSCRATCH_##exc_level; \
stw r12,GPR12(r11); /* save various registers */\
mflr r10; \
@@ -163,16 +232,44 @@
stw r1,0(r11); \
mr r1,r11; \
rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\
- stw r0,GPR0(r11); \
- SAVE_4GPRS(3, r11); \
- SAVE_2GPRS(7, r11)
-
-#define CRITICAL_EXCEPTION_PROLOG(intno) \
- EXC_LEVEL_EXCEPTION_PROLOG(CRIT, intno, SPRN_CSRR0, SPRN_CSRR1)
-#define DEBUG_EXCEPTION_PROLOG \
- EXC_LEVEL_EXCEPTION_PROLOG(DBG, DEBUG, SPRN_DSRR0, SPRN_DSRR1)
-#define MCHECK_EXCEPTION_PROLOG \
- EXC_LEVEL_EXCEPTION_PROLOG(MC, MACHINE_CHECK, \
+ COMMON_EXCEPTION_PROLOG_END trapno
+
+#define SAVE_xSRR(xSRR) \
+ mfspr r0,SPRN_##xSRR##0; \
+ stw r0,_##xSRR##0(r1); \
+ mfspr r0,SPRN_##xSRR##1; \
+ stw r0,_##xSRR##1(r1)
+
+
+.macro SAVE_MMU_REGS
+#ifdef CONFIG_PPC_E500
+ mfspr r0,SPRN_MAS0
+ stw r0,MAS0(r1)
+ mfspr r0,SPRN_MAS1
+ stw r0,MAS1(r1)
+ mfspr r0,SPRN_MAS2
+ stw r0,MAS2(r1)
+ mfspr r0,SPRN_MAS3
+ stw r0,MAS3(r1)
+ mfspr r0,SPRN_MAS6
+ stw r0,MAS6(r1)
+#ifdef CONFIG_PHYS_64BIT
+ mfspr r0,SPRN_MAS7
+ stw r0,MAS7(r1)
+#endif /* CONFIG_PHYS_64BIT */
+#endif /* CONFIG_PPC_E500 */
+#ifdef CONFIG_44x
+ mfspr r0,SPRN_MMUCR
+ stw r0,MMUCR(r1)
+#endif
+.endm
+
+#define CRITICAL_EXCEPTION_PROLOG(trapno, intno) \
+ EXC_LEVEL_EXCEPTION_PROLOG(CRIT, trapno+2, intno, SPRN_CSRR0, SPRN_CSRR1)
+#define DEBUG_EXCEPTION_PROLOG(trapno) \
+ EXC_LEVEL_EXCEPTION_PROLOG(DBG, trapno+8, DEBUG, SPRN_DSRR0, SPRN_DSRR1)
+#define MCHECK_EXCEPTION_PROLOG(trapno) \
+ EXC_LEVEL_EXCEPTION_PROLOG(MC, trapno+4, MACHINE_CHECK, \
SPRN_MCSRR0, SPRN_MCSRR1)
/*
@@ -199,58 +296,34 @@
.align 5; \
label:
-#define EXCEPTION(n, intno, label, hdlr, xfer) \
+#define EXCEPTION(n, intno, label, hdlr) \
START_EXCEPTION(label); \
- NORMAL_EXCEPTION_PROLOG(intno); \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- xfer(n, hdlr)
+ NORMAL_EXCEPTION_PROLOG(n, intno); \
+ prepare_transfer_to_handler; \
+ bl hdlr; \
+ b interrupt_return
#define CRITICAL_EXCEPTION(n, intno, label, hdlr) \
START_EXCEPTION(label); \
- CRITICAL_EXCEPTION_PROLOG(intno); \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
- NOCOPY, crit_transfer_to_handler, \
- ret_from_crit_exc)
+ CRITICAL_EXCEPTION_PROLOG(n, intno); \
+ SAVE_MMU_REGS; \
+ SAVE_xSRR(SRR); \
+ prepare_transfer_to_handler; \
+ bl hdlr; \
+ b ret_from_crit_exc
#define MCHECK_EXCEPTION(n, label, hdlr) \
START_EXCEPTION(label); \
- MCHECK_EXCEPTION_PROLOG; \
+ MCHECK_EXCEPTION_PROLOG(n); \
mfspr r5,SPRN_ESR; \
stw r5,_ESR(r11); \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- EXC_XFER_TEMPLATE(hdlr, n+4, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
- NOCOPY, mcheck_transfer_to_handler, \
- ret_from_mcheck_exc)
-
-#define EXC_XFER_TEMPLATE(hdlr, trap, msr, copyee, tfer, ret) \
- li r10,trap; \
- stw r10,_TRAP(r11); \
- lis r10,msr@h; \
- ori r10,r10,msr@l; \
- copyee(r10, r9); \
- bl tfer; \
- .long hdlr; \
- .long ret
-
-#define COPY_EE(d, s) rlwimi d,s,0,16,16
-#define NOCOPY(d, s)
-
-#define EXC_XFER_STD(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, NOCOPY, transfer_to_handler_full, \
- ret_from_except_full)
-
-#define EXC_XFER_LITE(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, NOCOPY, transfer_to_handler, \
- ret_from_except)
-
-#define EXC_XFER_EE(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, COPY_EE, transfer_to_handler_full, \
- ret_from_except_full)
-
-#define EXC_XFER_EE_LITE(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, COPY_EE, transfer_to_handler, \
- ret_from_except)
+ SAVE_xSRR(DSRR); \
+ SAVE_xSRR(CSRR); \
+ SAVE_MMU_REGS; \
+ SAVE_xSRR(SRR); \
+ prepare_transfer_to_handler; \
+ bl hdlr; \
+ b ret_from_mcheck_exc
/* Check for a single step debug exception while in an exception
* handler before state has been saved. This is to catch the case
@@ -267,7 +340,7 @@ label:
*/
#define DEBUG_DEBUG_EXCEPTION \
START_EXCEPTION(DebugDebug); \
- DEBUG_EXCEPTION_PROLOG; \
+ DEBUG_EXCEPTION_PROLOG(2000); \
\
/* \
* If there is a single step or branch-taken exception in an \
@@ -315,12 +388,17 @@ label:
\
/* continue normal handling for a debug exception... */ \
2: mfspr r4,SPRN_DBSR; \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- EXC_XFER_TEMPLATE(DebugException, 0x2008, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), NOCOPY, debug_transfer_to_handler, ret_from_debug_exc)
+ stw r4,_ESR(r11); /* DebugException takes DBSR in _ESR */\
+ SAVE_xSRR(CSRR); \
+ SAVE_MMU_REGS; \
+ SAVE_xSRR(SRR); \
+ prepare_transfer_to_handler; \
+ bl DebugException; \
+ b ret_from_debug_exc
#define DEBUG_CRIT_EXCEPTION \
START_EXCEPTION(DebugCrit); \
- CRITICAL_EXCEPTION_PROLOG(DEBUG); \
+ CRITICAL_EXCEPTION_PROLOG(2000,DEBUG); \
\
/* \
* If there is a single step or branch-taken exception in an \
@@ -368,78 +446,81 @@ label:
\
/* continue normal handling for a critical exception... */ \
2: mfspr r4,SPRN_DBSR; \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), NOCOPY, crit_transfer_to_handler, ret_from_crit_exc)
+ stw r4,_ESR(r11); /* DebugException takes DBSR in _ESR */\
+ SAVE_MMU_REGS; \
+ SAVE_xSRR(SRR); \
+ prepare_transfer_to_handler; \
+ bl DebugException; \
+ b ret_from_crit_exc
#define DATA_STORAGE_EXCEPTION \
START_EXCEPTION(DataStorage) \
- NORMAL_EXCEPTION_PROLOG(DATA_STORAGE); \
+ NORMAL_EXCEPTION_PROLOG(0x300, DATA_STORAGE); \
mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \
stw r5,_ESR(r11); \
mfspr r4,SPRN_DEAR; /* Grab the DEAR */ \
- EXC_XFER_LITE(0x0300, handle_page_fault)
+ stw r4, _DEAR(r11); \
+ prepare_transfer_to_handler; \
+ bl do_page_fault; \
+ b interrupt_return
+/*
+ * Instruction TLB Error interrupt handlers may call InstructionStorage
+ * directly without clearing ESR, so the ESR at this point may be left over
+ * from a prior interrupt.
+ *
+ * In any case, do_page_fault for BOOK3E does not use ESR and always expects
+ * dsisr to be 0. ESR_DST from a prior store in particular would confuse fault
+ * handling.
+ */
#define INSTRUCTION_STORAGE_EXCEPTION \
START_EXCEPTION(InstructionStorage) \
- NORMAL_EXCEPTION_PROLOG(INST_STORAGE); \
- mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \
+ NORMAL_EXCEPTION_PROLOG(0x400, INST_STORAGE); \
+ li r5,0; /* Store 0 in regs->esr (dsisr) */ \
stw r5,_ESR(r11); \
- mr r4,r12; /* Pass SRR0 as arg2 */ \
- li r5,0; /* Pass zero as arg3 */ \
- EXC_XFER_LITE(0x0400, handle_page_fault)
+ stw r12, _DEAR(r11); /* Set regs->dear (dar) to SRR0 */ \
+ prepare_transfer_to_handler; \
+ bl do_page_fault; \
+ b interrupt_return
#define ALIGNMENT_EXCEPTION \
START_EXCEPTION(Alignment) \
- NORMAL_EXCEPTION_PROLOG(ALIGNMENT); \
+ NORMAL_EXCEPTION_PROLOG(0x600, ALIGNMENT); \
mfspr r4,SPRN_DEAR; /* Grab the DEAR and save it */ \
stw r4,_DEAR(r11); \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- EXC_XFER_EE(0x0600, alignment_exception)
+ prepare_transfer_to_handler; \
+ bl alignment_exception; \
+ REST_NVGPRS(r1); \
+ b interrupt_return
#define PROGRAM_EXCEPTION \
START_EXCEPTION(Program) \
- NORMAL_EXCEPTION_PROLOG(PROGRAM); \
+ NORMAL_EXCEPTION_PROLOG(0x700, PROGRAM); \
mfspr r4,SPRN_ESR; /* Grab the ESR and save it */ \
stw r4,_ESR(r11); \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- EXC_XFER_STD(0x0700, program_check_exception)
+ prepare_transfer_to_handler; \
+ bl program_check_exception; \
+ REST_NVGPRS(r1); \
+ b interrupt_return
#define DECREMENTER_EXCEPTION \
START_EXCEPTION(Decrementer) \
- NORMAL_EXCEPTION_PROLOG(DECREMENTER); \
+ NORMAL_EXCEPTION_PROLOG(0x900, DECREMENTER); \
lis r0,TSR_DIS@h; /* Setup the DEC interrupt mask */ \
mtspr SPRN_TSR,r0; /* Clear the DEC interrupt */ \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- EXC_XFER_LITE(0x0900, timer_interrupt)
+ prepare_transfer_to_handler; \
+ bl timer_interrupt; \
+ b interrupt_return
#define FP_UNAVAILABLE_EXCEPTION \
START_EXCEPTION(FloatingPointUnavailable) \
- NORMAL_EXCEPTION_PROLOG(FP_UNAVAIL); \
+ NORMAL_EXCEPTION_PROLOG(0x800, FP_UNAVAIL); \
beq 1f; \
bl load_up_fpu; /* if from user, just load it up */ \
b fast_exception_return; \
-1: addi r3,r1,STACK_FRAME_OVERHEAD; \
- EXC_XFER_EE_LITE(0x800, kernel_fp_unavailable_exception)
-
-#ifndef __ASSEMBLY__
-struct exception_regs {
- unsigned long mas0;
- unsigned long mas1;
- unsigned long mas2;
- unsigned long mas3;
- unsigned long mas6;
- unsigned long mas7;
- unsigned long srr0;
- unsigned long srr1;
- unsigned long csrr0;
- unsigned long csrr1;
- unsigned long dsrr0;
- unsigned long dsrr1;
- unsigned long saved_ksp_limit;
-};
-
-/* ensure this structure is always sized to a multiple of the stack alignment */
-#define STACK_EXC_LVL_FRAME_SIZE _ALIGN_UP(sizeof (struct exception_regs), 16)
-
-#endif /* __ASSEMBLY__ */
+1: prepare_transfer_to_handler; \
+ bl kernel_fp_unavailable_exception; \
+ b interrupt_return
+
+#endif /* __ASSEMBLER__ */
#endif /* __HEAD_BOOKE_H__ */
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index 0bb5918faaaf..a1318ce18d0e 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -1,25 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
* using the CPU's debug registers. Derived from
* "arch/x86/kernel/hw_breakpoint.c"
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
* Copyright 2010 IBM Corporation
* Author: K.Prasad <prasad@linux.vnet.ibm.com>
- *
*/
#include <linux/hw_breakpoint.h>
@@ -29,17 +15,23 @@
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <linux/debugfs.h>
+#include <linux/init.h>
#include <asm/hw_breakpoint.h>
#include <asm/processor.h>
#include <asm/sstep.h>
-#include <asm/uaccess.h>
+#include <asm/debug.h>
+#include <asm/hvcall.h>
+#include <asm/inst.h>
+#include <linux/uaccess.h>
/*
* Stores the breakpoints currently in use on each breakpoint address
* register for every cpu
*/
-static DEFINE_PER_CPU(struct perf_event *, bp_per_reg);
+static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM_MAX]);
/*
* Returns total number of data or instruction breakpoints available.
@@ -47,10 +39,11 @@ static DEFINE_PER_CPU(struct perf_event *, bp_per_reg);
int hw_breakpoint_slots(int type)
{
if (type == TYPE_DATA)
- return HBP_NUM;
+ return nr_wp_slots();
return 0; /* no instruction breakpoints available */
}
+
/*
* Install a perf counter breakpoint.
*
@@ -63,16 +56,26 @@ int hw_breakpoint_slots(int type)
int arch_install_hw_breakpoint(struct perf_event *bp)
{
struct arch_hw_breakpoint *info = counter_arch_bp(bp);
- struct perf_event **slot = &__get_cpu_var(bp_per_reg);
+ struct perf_event **slot;
+ int i;
+
+ for (i = 0; i < nr_wp_slots(); i++) {
+ slot = this_cpu_ptr(&bp_per_reg[i]);
+ if (!*slot) {
+ *slot = bp;
+ break;
+ }
+ }
- *slot = bp;
+ if (WARN_ONCE(i == nr_wp_slots(), "Can't find any breakpoint slot"))
+ return -EBUSY;
/*
* Do not install DABR values if the instruction must be single-stepped.
* If so, DABR will be populated in single_step_dabr_instruction().
*/
- if (current->thread.last_hit_ubp != bp)
- __set_breakpoint(info);
+ if (!info->perf_single_step)
+ __set_breakpoint(i, info);
return 0;
}
@@ -88,40 +91,35 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
*/
void arch_uninstall_hw_breakpoint(struct perf_event *bp)
{
- struct perf_event **slot = &__get_cpu_var(bp_per_reg);
+ struct arch_hw_breakpoint null_brk = {0};
+ struct perf_event **slot;
+ int i;
+
+ for (i = 0; i < nr_wp_slots(); i++) {
+ slot = this_cpu_ptr(&bp_per_reg[i]);
+ if (*slot == bp) {
+ *slot = NULL;
+ break;
+ }
+ }
- if (*slot != bp) {
- WARN_ONCE(1, "Can't find the breakpoint");
+ if (WARN_ONCE(i == nr_wp_slots(), "Can't find any breakpoint slot"))
return;
- }
- *slot = NULL;
- hw_breakpoint_disable();
+ __set_breakpoint(i, &null_brk);
}
-/*
- * Perform cleanup of arch-specific counters during unregistration
- * of the perf-event
- */
-void arch_unregister_hw_breakpoint(struct perf_event *bp)
+static bool is_ptrace_bp(struct perf_event *bp)
{
- /*
- * If the breakpoint is unregistered between a hw_breakpoint_handler()
- * and the single_step_dabr_instruction(), then cleanup the breakpoint
- * restoration variables to prevent dangling pointers.
- */
- if (bp->ctx && bp->ctx->task)
- bp->ctx->task->thread.last_hit_ubp = NULL;
+ return bp->overflow_handler == ptrace_triggered;
}
/*
* Check for virtual address in kernel space.
*/
-int arch_check_bp_in_kernelspace(struct perf_event *bp)
+int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw)
{
- struct arch_hw_breakpoint *info = counter_arch_bp(bp);
-
- return is_kernel_addr(info->address);
+ return is_kernel_addr(hw->address);
}
int arch_bp_generic_fields(int type, int *gen_bp_type)
@@ -137,83 +135,258 @@ int arch_bp_generic_fields(int type, int *gen_bp_type)
}
/*
+ * Watchpoint match range is always doubleword(8 bytes) aligned on
+ * powerpc. If the given range is crossing doubleword boundary, we
+ * need to increase the length such that next doubleword also get
+ * covered. Ex,
+ *
+ * address len = 6 bytes
+ * |=========.
+ * |------------v--|------v--------|
+ * | | | | | | | | | | | | | | | | |
+ * |---------------|---------------|
+ * <---8 bytes--->
+ *
+ * In this case, we should configure hw as:
+ * start_addr = address & ~(HW_BREAKPOINT_SIZE - 1)
+ * len = 16 bytes
+ *
+ * @start_addr is inclusive but @end_addr is exclusive.
+ */
+static int hw_breakpoint_validate_len(struct arch_hw_breakpoint *hw)
+{
+ u16 max_len = DABR_MAX_LEN;
+ u16 hw_len;
+ unsigned long start_addr, end_addr;
+
+ start_addr = ALIGN_DOWN(hw->address, HW_BREAKPOINT_SIZE);
+ end_addr = ALIGN(hw->address + hw->len, HW_BREAKPOINT_SIZE);
+ hw_len = end_addr - start_addr;
+
+ if (dawr_enabled()) {
+ max_len = DAWR_MAX_LEN;
+ /* DAWR region can't cross 512 bytes boundary on p10 predecessors */
+ if (!cpu_has_feature(CPU_FTR_ARCH_31) &&
+ (ALIGN_DOWN(start_addr, SZ_512) != ALIGN_DOWN(end_addr - 1, SZ_512)))
+ return -EINVAL;
+ } else if (IS_ENABLED(CONFIG_PPC_8xx)) {
+ /* 8xx can setup a range without limitation */
+ max_len = U16_MAX;
+ }
+
+ if (hw_len > max_len)
+ return -EINVAL;
+
+ hw->hw_len = hw_len;
+ return 0;
+}
+
+/*
* Validate the arch-specific HW Breakpoint register settings
*/
-int arch_validate_hwbkpt_settings(struct perf_event *bp)
+int hw_breakpoint_arch_parse(struct perf_event *bp,
+ const struct perf_event_attr *attr,
+ struct arch_hw_breakpoint *hw)
{
- int ret = -EINVAL, length_max;
- struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+ int ret = -EINVAL;
- if (!bp)
+ if (!bp || !attr->bp_len)
return ret;
- info->type = HW_BRK_TYPE_TRANSLATE;
- if (bp->attr.bp_type & HW_BREAKPOINT_R)
- info->type |= HW_BRK_TYPE_READ;
- if (bp->attr.bp_type & HW_BREAKPOINT_W)
- info->type |= HW_BRK_TYPE_WRITE;
- if (info->type == HW_BRK_TYPE_TRANSLATE)
+ hw->type = HW_BRK_TYPE_TRANSLATE;
+ if (attr->bp_type & HW_BREAKPOINT_R)
+ hw->type |= HW_BRK_TYPE_READ;
+ if (attr->bp_type & HW_BREAKPOINT_W)
+ hw->type |= HW_BRK_TYPE_WRITE;
+ if (hw->type == HW_BRK_TYPE_TRANSLATE)
/* must set alteast read or write */
return ret;
- if (!(bp->attr.exclude_user))
- info->type |= HW_BRK_TYPE_USER;
- if (!(bp->attr.exclude_kernel))
- info->type |= HW_BRK_TYPE_KERNEL;
- if (!(bp->attr.exclude_hv))
- info->type |= HW_BRK_TYPE_HYP;
- info->address = bp->attr.bp_addr;
- info->len = bp->attr.bp_len;
-
- /*
- * Since breakpoint length can be a maximum of HW_BREAKPOINT_LEN(8)
- * and breakpoint addresses are aligned to nearest double-word
- * HW_BREAKPOINT_ALIGN by rounding off to the lower address, the
- * 'symbolsize' should satisfy the check below.
- */
- length_max = 8; /* DABR */
- if (cpu_has_feature(CPU_FTR_DAWR)) {
- length_max = 512 ; /* 64 doublewords */
- /* DAWR region can't cross 512 boundary */
- if ((bp->attr.bp_addr >> 10) !=
- ((bp->attr.bp_addr + bp->attr.bp_len - 1) >> 10))
- return -EINVAL;
- }
- if (info->len >
- (length_max - (info->address & HW_BREAKPOINT_ALIGN)))
- return -EINVAL;
- return 0;
+ if (!attr->exclude_user)
+ hw->type |= HW_BRK_TYPE_USER;
+ if (!attr->exclude_kernel)
+ hw->type |= HW_BRK_TYPE_KERNEL;
+ if (!attr->exclude_hv)
+ hw->type |= HW_BRK_TYPE_HYP;
+ hw->address = attr->bp_addr;
+ hw->len = attr->bp_len;
+
+ if (!ppc_breakpoint_available())
+ return -ENODEV;
+
+ return hw_breakpoint_validate_len(hw);
}
/*
* Restores the breakpoint on the debug registers.
* Invoke this function if it is known that the execution context is
* about to change to cause loss of MSR_SE settings.
+ *
+ * The perf watchpoint will simply re-trigger once the thread is started again,
+ * and the watchpoint handler will set up MSR_SE and perf_single_step as
+ * needed.
*/
void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs)
{
struct arch_hw_breakpoint *info;
+ int i;
+
+ preempt_disable();
+
+ for (i = 0; i < nr_wp_slots(); i++) {
+ struct perf_event *bp = __this_cpu_read(bp_per_reg[i]);
+
+ if (unlikely(bp && counter_arch_bp(bp)->perf_single_step))
+ goto reset;
+ }
+ goto out;
+
+reset:
+ regs_set_return_msr(regs, regs->msr & ~MSR_SE);
+ for (i = 0; i < nr_wp_slots(); i++) {
+ info = counter_arch_bp(__this_cpu_read(bp_per_reg[i]));
+ __set_breakpoint(i, info);
+ info->perf_single_step = false;
+ }
+
+out:
+ preempt_enable();
+}
+
+static bool is_larx_stcx_instr(int type)
+{
+ return type == LARX || type == STCX;
+}
+
+static bool is_octword_vsx_instr(int type, int size)
+{
+ return ((type == LOAD_VSX || type == STORE_VSX) && size == 32);
+}
+
+/*
+ * We've failed in reliably handling the hw-breakpoint. Unregister
+ * it and throw a warning message to let the user know about it.
+ */
+static void handler_error(struct perf_event *bp)
+{
+ WARN(1, "Unable to handle hardware breakpoint. Breakpoint at 0x%lx will be disabled.",
+ counter_arch_bp(bp)->address);
+ perf_event_disable_inatomic(bp);
+}
+
+static void larx_stcx_err(struct perf_event *bp)
+{
+ printk_ratelimited("Breakpoint hit on instruction that can't be emulated. Breakpoint at 0x%lx will be disabled.\n",
+ counter_arch_bp(bp)->address);
+ perf_event_disable_inatomic(bp);
+}
+
+static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp,
+ int *hit, ppc_inst_t instr)
+{
+ int i;
+ int stepped;
+
+ /* Do not emulate user-space instructions, instead single-step them */
+ if (user_mode(regs)) {
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (!hit[i])
+ continue;
+
+ counter_arch_bp(bp[i])->perf_single_step = true;
+ bp[i] = NULL;
+ }
+ regs_set_return_msr(regs, regs->msr | MSR_SE);
+ return false;
+ }
+
+ stepped = emulate_step(regs, instr);
+ if (!stepped) {
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (!hit[i])
+ continue;
+ handler_error(bp[i]);
+ bp[i] = NULL;
+ }
+ return false;
+ }
+ return true;
+}
+
+static void handle_p10dd1_spurious_exception(struct perf_event **bp,
+ int *hit, unsigned long ea)
+{
+ int i;
+ unsigned long hw_end_addr;
+
+ /*
+ * Handle spurious exception only when any bp_per_reg is set.
+ * Otherwise this might be created by xmon and not actually a
+ * spurious exception.
+ */
+ for (i = 0; i < nr_wp_slots(); i++) {
+ struct arch_hw_breakpoint *info;
+
+ if (!bp[i])
+ continue;
+
+ info = counter_arch_bp(bp[i]);
- if (likely(!tsk->thread.last_hit_ubp))
+ hw_end_addr = ALIGN(info->address + info->len, HW_BREAKPOINT_SIZE);
+
+ /*
+ * Ending address of DAWR range is less than starting
+ * address of op.
+ */
+ if ((hw_end_addr - 1) >= ea)
+ continue;
+
+ /*
+ * Those addresses need to be in the same or in two
+ * consecutive 512B blocks;
+ */
+ if (((hw_end_addr - 1) >> 10) != (ea >> 10))
+ continue;
+
+ /*
+ * 'op address + 64B' generates an address that has a
+ * carry into bit 52 (crosses 2K boundary).
+ */
+ if ((ea & 0x800) == ((ea + 64) & 0x800))
+ continue;
+
+ break;
+ }
+
+ if (i == nr_wp_slots())
return;
- info = counter_arch_bp(tsk->thread.last_hit_ubp);
- regs->msr &= ~MSR_SE;
- __set_breakpoint(info);
- tsk->thread.last_hit_ubp = NULL;
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (bp[i]) {
+ hit[i] = 1;
+ counter_arch_bp(bp[i])->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
+ }
+ }
}
/*
- * Handle debug exception notifications.
+ * Handle a DABR or DAWR exception.
+ *
+ * Called in atomic context.
*/
-int __kprobes hw_breakpoint_handler(struct die_args *args)
+int hw_breakpoint_handler(struct die_args *args)
{
+ bool err = false;
int rc = NOTIFY_STOP;
- struct perf_event *bp;
+ struct perf_event *bp[HBP_NUM_MAX] = { NULL };
struct pt_regs *regs = args->regs;
- int stepped = 1;
- struct arch_hw_breakpoint *info;
- unsigned int instr;
- unsigned long dar = regs->dar;
+ int i;
+ int hit[HBP_NUM_MAX] = {0};
+ int nr_hit = 0;
+ bool ptrace_bp = false;
+ ppc_inst_t instr = ppc_inst(0);
+ int type = 0;
+ int size = 0;
+ unsigned long ea = 0;
/* Disable breakpoints during exception handling */
hw_breakpoint_disable();
@@ -226,10 +399,48 @@ int __kprobes hw_breakpoint_handler(struct die_args *args)
*/
rcu_read_lock();
- bp = __get_cpu_var(bp_per_reg);
- if (!bp)
- goto out;
- info = counter_arch_bp(bp);
+ if (!IS_ENABLED(CONFIG_PPC_8xx))
+ wp_get_instr_detail(regs, &instr, &type, &size, &ea);
+
+ for (i = 0; i < nr_wp_slots(); i++) {
+ struct arch_hw_breakpoint *info;
+
+ bp[i] = __this_cpu_read(bp_per_reg[i]);
+ if (!bp[i])
+ continue;
+
+ info = counter_arch_bp(bp[i]);
+ info->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ;
+
+ if (wp_check_constraints(regs, instr, ea, type, size, info)) {
+ if (!IS_ENABLED(CONFIG_PPC_8xx) &&
+ ppc_inst_equal(instr, ppc_inst(0))) {
+ handler_error(bp[i]);
+ bp[i] = NULL;
+ err = 1;
+ continue;
+ }
+
+ if (is_ptrace_bp(bp[i]))
+ ptrace_bp = true;
+ hit[i] = 1;
+ nr_hit++;
+ }
+ }
+
+ if (err)
+ goto reset;
+
+ if (!nr_hit) {
+ /* Workaround for Power10 DD1 */
+ if (!IS_ENABLED(CONFIG_PPC_8xx) && mfspr(SPRN_PVR) == 0x800100 &&
+ is_octword_vsx_instr(type, size)) {
+ handle_p10dd1_spurious_exception(bp, hit, ea);
+ } else {
+ rc = NOTIFY_DONE;
+ goto out;
+ }
+ }
/*
* Return early after invoking user-callback function without restoring
@@ -237,102 +448,115 @@ int __kprobes hw_breakpoint_handler(struct die_args *args)
* one-shot mode. The ptrace-ed process will receive the SIGTRAP signal
* generated in do_dabr().
*/
- if (bp->overflow_handler == ptrace_triggered) {
- perf_bp_event(bp, regs);
+ if (ptrace_bp) {
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (!hit[i] || !is_ptrace_bp(bp[i]))
+ continue;
+ perf_bp_event(bp[i], regs);
+ bp[i] = NULL;
+ }
rc = NOTIFY_DONE;
- goto out;
+ goto reset;
}
- /*
- * Verify if dar lies within the address range occupied by the symbol
- * being watched to filter extraneous exceptions. If it doesn't,
- * we still need to single-step the instruction, but we don't
- * generate an event.
- */
- info->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ;
- if (!((bp->attr.bp_addr <= dar) &&
- (dar - bp->attr.bp_addr < bp->attr.bp_len)))
- info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
-
- /* Do not emulate user-space instructions, instead single-step them */
- if (user_mode(regs)) {
- current->thread.last_hit_ubp = bp;
- regs->msr |= MSR_SE;
- goto out;
+ if (!IS_ENABLED(CONFIG_PPC_8xx)) {
+ if (is_larx_stcx_instr(type)) {
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (!hit[i])
+ continue;
+ larx_stcx_err(bp[i]);
+ bp[i] = NULL;
+ }
+ goto reset;
+ }
+
+ if (!stepping_handler(regs, bp, hit, instr))
+ goto reset;
}
- stepped = 0;
- instr = 0;
- if (!__get_user_inatomic(instr, (unsigned int *) regs->nip))
- stepped = emulate_step(regs, instr);
-
- /*
- * emulate_step() could not execute it. We've failed in reliably
- * handling the hw-breakpoint. Unregister it and throw a warning
- * message to let the user know about it.
- */
- if (!stepped) {
- WARN(1, "Unable to handle hardware breakpoint. Breakpoint at "
- "0x%lx will be disabled.", info->address);
- perf_event_disable(bp);
- goto out;
- }
/*
* As a policy, the callback is invoked in a 'trigger-after-execute'
* fashion
*/
- if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ))
- perf_bp_event(bp, regs);
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (!hit[i])
+ continue;
+ if (!(counter_arch_bp(bp[i])->type & HW_BRK_TYPE_EXTRANEOUS_IRQ))
+ perf_bp_event(bp[i], regs);
+ }
+
+reset:
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (!bp[i])
+ continue;
+ __set_breakpoint(i, counter_arch_bp(bp[i]));
+ }
- __set_breakpoint(info);
out:
rcu_read_unlock();
return rc;
}
+NOKPROBE_SYMBOL(hw_breakpoint_handler);
/*
* Handle single-step exceptions following a DABR hit.
+ *
+ * Called in atomic context.
*/
-int __kprobes single_step_dabr_instruction(struct die_args *args)
+static int single_step_dabr_instruction(struct die_args *args)
{
struct pt_regs *regs = args->regs;
- struct perf_event *bp = NULL;
- struct arch_hw_breakpoint *info;
+ bool found = false;
- bp = current->thread.last_hit_ubp;
/*
* Check if we are single-stepping as a result of a
* previous HW Breakpoint exception
*/
- if (!bp)
- return NOTIFY_DONE;
+ for (int i = 0; i < nr_wp_slots(); i++) {
+ struct perf_event *bp;
+ struct arch_hw_breakpoint *info;
- info = counter_arch_bp(bp);
+ bp = __this_cpu_read(bp_per_reg[i]);
- /*
- * We shall invoke the user-defined callback function in the single
- * stepping handler to confirm to 'trigger-after-execute' semantics
- */
- if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ))
- perf_bp_event(bp, regs);
+ if (!bp)
+ continue;
+
+ info = counter_arch_bp(bp);
+
+ if (!info->perf_single_step)
+ continue;
- __set_breakpoint(info);
- current->thread.last_hit_ubp = NULL;
+ found = true;
+
+ /*
+ * We shall invoke the user-defined callback function in the
+ * single stepping handler to confirm to 'trigger-after-execute'
+ * semantics
+ */
+ if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ))
+ perf_bp_event(bp, regs);
+
+ info->perf_single_step = false;
+ __set_breakpoint(i, counter_arch_bp(bp));
+ }
/*
* If the process was being single-stepped by ptrace, let the
* other single-step actions occur (e.g. generate SIGTRAP).
*/
- if (test_thread_flag(TIF_SINGLESTEP))
+ if (!found || test_thread_flag(TIF_SINGLESTEP))
return NOTIFY_DONE;
return NOTIFY_STOP;
}
+NOKPROBE_SYMBOL(single_step_dabr_instruction);
/*
* Handle debug exception notifications.
+ *
+ * Called in atomic context.
*/
-int __kprobes hw_breakpoint_exceptions_notify(
+int hw_breakpoint_exceptions_notify(
struct notifier_block *unused, unsigned long val, void *data)
{
int ret = NOTIFY_DONE;
@@ -348,19 +572,39 @@ int __kprobes hw_breakpoint_exceptions_notify(
return ret;
}
+NOKPROBE_SYMBOL(hw_breakpoint_exceptions_notify);
/*
* Release the user breakpoints used by ptrace
*/
void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
{
+ int i;
struct thread_struct *t = &tsk->thread;
- unregister_hw_breakpoint(t->ptrace_bps[0]);
- t->ptrace_bps[0] = NULL;
+ for (i = 0; i < nr_wp_slots(); i++) {
+ unregister_hw_breakpoint(t->ptrace_bps[i]);
+ t->ptrace_bps[i] = NULL;
+ }
}
void hw_breakpoint_pmu_read(struct perf_event *bp)
{
/* TODO */
}
+
+void ptrace_triggered(struct perf_event *bp,
+ struct perf_sample_data *data, struct pt_regs *regs)
+{
+ struct perf_event_attr attr;
+
+ /*
+ * Disable the breakpoint request here since ptrace has defined a
+ * one-shot behaviour for breakpoint exceptions in PPC64.
+ * The SIGTRAP signal is generated automatically for us in do_dabr().
+ * We don't have to do anything about that here
+ */
+ attr = bp->attr;
+ attr.disabled = true;
+ modify_user_hw_breakpoint(bp, &attr);
+}
diff --git a/arch/powerpc/kernel/hw_breakpoint_constraints.c b/arch/powerpc/kernel/hw_breakpoint_constraints.c
new file mode 100644
index 000000000000..9e51801c4915
--- /dev/null
+++ b/arch/powerpc/kernel/hw_breakpoint_constraints.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0+
+#include <linux/kernel.h>
+#include <linux/uaccess.h>
+#include <linux/sched.h>
+#include <asm/hw_breakpoint.h>
+#include <asm/sstep.h>
+#include <asm/cache.h>
+
+static bool dar_in_user_range(unsigned long dar, struct arch_hw_breakpoint *info)
+{
+ return ((info->address <= dar) && (dar - info->address < info->len));
+}
+
+static bool ea_user_range_overlaps(unsigned long ea, int size,
+ struct arch_hw_breakpoint *info)
+{
+ return ((ea < info->address + info->len) &&
+ (ea + size > info->address));
+}
+
+static bool dar_in_hw_range(unsigned long dar, struct arch_hw_breakpoint *info)
+{
+ unsigned long hw_start_addr, hw_end_addr;
+
+ hw_start_addr = ALIGN_DOWN(info->address, HW_BREAKPOINT_SIZE);
+ hw_end_addr = ALIGN(info->address + info->len, HW_BREAKPOINT_SIZE);
+
+ return ((hw_start_addr <= dar) && (hw_end_addr > dar));
+}
+
+static bool ea_hw_range_overlaps(unsigned long ea, int size,
+ struct arch_hw_breakpoint *info)
+{
+ unsigned long hw_start_addr, hw_end_addr;
+ unsigned long align_size = HW_BREAKPOINT_SIZE;
+
+ /*
+ * On p10 predecessors, quadword is handle differently then
+ * other instructions.
+ */
+ if (!cpu_has_feature(CPU_FTR_ARCH_31) && size == 16)
+ align_size = HW_BREAKPOINT_SIZE_QUADWORD;
+
+ hw_start_addr = ALIGN_DOWN(info->address, align_size);
+ hw_end_addr = ALIGN(info->address + info->len, align_size);
+
+ return ((ea < hw_end_addr) && (ea + size > hw_start_addr));
+}
+
+/*
+ * If hw has multiple DAWR registers, we also need to check all
+ * dawrx constraint bits to confirm this is _really_ a valid event.
+ * If type is UNKNOWN, but privilege level matches, consider it as
+ * a positive match.
+ */
+static bool check_dawrx_constraints(struct pt_regs *regs, int type,
+ struct arch_hw_breakpoint *info)
+{
+ if (OP_IS_LOAD(type) && !(info->type & HW_BRK_TYPE_READ))
+ return false;
+
+ /*
+ * The Cache Management instructions other than dcbz never
+ * cause a match. i.e. if type is CACHEOP, the instruction
+ * is dcbz, and dcbz is treated as Store.
+ */
+ if ((OP_IS_STORE(type) || type == CACHEOP) && !(info->type & HW_BRK_TYPE_WRITE))
+ return false;
+
+ if (is_kernel_addr(regs->nip) && !(info->type & HW_BRK_TYPE_KERNEL))
+ return false;
+
+ if (user_mode(regs) && !(info->type & HW_BRK_TYPE_USER))
+ return false;
+
+ return true;
+}
+
+/*
+ * Return true if the event is valid wrt dawr configuration,
+ * including extraneous exception. Otherwise return false.
+ */
+bool wp_check_constraints(struct pt_regs *regs, ppc_inst_t instr,
+ unsigned long ea, int type, int size,
+ struct arch_hw_breakpoint *info)
+{
+ bool in_user_range = dar_in_user_range(regs->dar, info);
+ bool dawrx_constraints;
+
+ /*
+ * 8xx supports only one breakpoint and thus we can
+ * unconditionally return true.
+ */
+ if (IS_ENABLED(CONFIG_PPC_8xx)) {
+ if (!in_user_range)
+ info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
+ return true;
+ }
+
+ if (unlikely(ppc_inst_equal(instr, ppc_inst(0)))) {
+ if (cpu_has_feature(CPU_FTR_ARCH_31) &&
+ !dar_in_hw_range(regs->dar, info))
+ return false;
+
+ return true;
+ }
+
+ dawrx_constraints = check_dawrx_constraints(regs, type, info);
+
+ if (type == UNKNOWN) {
+ if (cpu_has_feature(CPU_FTR_ARCH_31) &&
+ !dar_in_hw_range(regs->dar, info))
+ return false;
+
+ return dawrx_constraints;
+ }
+
+ if (ea_user_range_overlaps(ea, size, info))
+ return dawrx_constraints;
+
+ if (ea_hw_range_overlaps(ea, size, info)) {
+ if (dawrx_constraints) {
+ info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
+ return true;
+ }
+ }
+ return false;
+}
+
+void wp_get_instr_detail(struct pt_regs *regs, ppc_inst_t *instr,
+ int *type, int *size, unsigned long *ea)
+{
+ struct instruction_op op;
+ int err;
+
+ pagefault_disable();
+ err = __get_user_instr(*instr, (void __user *)regs->nip);
+ pagefault_enable();
+
+ if (err)
+ return;
+
+ analyse_instr(&op, regs, *instr);
+ *type = GETTYPE(op.type);
+ *ea = op.ea;
+
+ if (!(regs->msr & MSR_64BIT))
+ *ea &= 0xffffffffUL;
+
+
+ *size = GETSIZE(op.type);
+ if (*type == CACHEOP) {
+ *size = l1_dcache_bytes();
+ *ea &= ~(*size - 1);
+ } else if (*type == LOAD_VMX || *type == STORE_VMX) {
+ *ea &= ~(*size - 1);
+ }
+}
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index d7216c9abda1..e527cd3ef128 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Idle daemon for PowerPC. Idle daemon will handle any action
* that needs to be taken when the system becomes idle.
@@ -12,11 +13,6 @@
* Copyright (c) 2003 Dave Engebretsen <engebret@us.ibm.com>
*
* 32-bit and 64-bit versions merged by Paul Mackerras <paulus@samba.org>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/sched.h>
@@ -41,18 +37,10 @@ static int __init powersave_off(char *arg)
{
ppc_md.power_save = NULL;
cpuidle_disable = IDLE_POWERSAVE_OFF;
- return 0;
+ return 1;
}
__setup("powersave=off", powersave_off);
-#ifdef CONFIG_HOTPLUG_CPU
-void arch_cpu_idle_dead(void)
-{
- sched_preempt_enable_no_resched();
- cpu_die();
-}
-#endif
-
void arch_cpu_idle(void)
{
ppc64_runlatch_off();
@@ -63,10 +51,9 @@ void arch_cpu_idle(void)
* Some power_save functions return with
* interrupts enabled, some don't.
*/
- if (irqs_disabled())
- local_irq_enable();
+ if (!irqs_disabled())
+ raw_local_irq_disable();
} else {
- local_irq_enable();
/*
* Go into low thread priority and possibly
* low power mode.
@@ -81,11 +68,36 @@ void arch_cpu_idle(void)
int powersave_nap;
+#ifdef CONFIG_PPC_970_NAP
+void power4_idle(void)
+{
+ if (!cpu_has_feature(CPU_FTR_CAN_NAP))
+ return;
+
+ if (!powersave_nap)
+ return;
+
+ if (!prep_irq_for_idle())
+ return;
+
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ asm volatile(PPC_DSSALL " ; sync" ::: "memory");
+
+ power4_idle_nap();
+
+ /*
+ * power4_idle_nap returns with interrupts enabled (soft and hard).
+ * to our caller with interrupts enabled (soft and hard). Our caller
+ * can cope with either interrupts disabled or enabled upon return.
+ */
+}
+#endif
+
#ifdef CONFIG_SYSCTL
/*
* Register the sysctl to set/clear powersave_nap.
*/
-static struct ctl_table powersave_nap_ctl_table[] = {
+static const struct ctl_table powersave_nap_ctl_table[] = {
{
.procname = "powersave-nap",
.data = &powersave_nap,
@@ -93,21 +105,12 @@ static struct ctl_table powersave_nap_ctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
- {}
-};
-static struct ctl_table powersave_nap_sysctl_root[] = {
- {
- .procname = "kernel",
- .mode = 0555,
- .child = powersave_nap_ctl_table,
- },
- {}
};
static int __init
register_powersave_nap_sysctl(void)
{
- register_sysctl_table(powersave_nap_sysctl_root);
+ register_sysctl("kernel", powersave_nap_ctl_table);
return 0;
}
diff --git a/arch/powerpc/kernel/idle_book3e.S b/arch/powerpc/kernel/idle_64e.S
index 48c21acef915..0fc680e03dee 100644
--- a/arch/powerpc/kernel/idle_book3e.S
+++ b/arch/powerpc/kernel/idle_64e.S
@@ -1,12 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright 2010 IBM Corp, Benjamin Herrenschmidt <benh@kernel.crashing.org>
*
- * Generic idle routine for Book3E processors
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
+ * Generic idle routine for 64 bits e500 processors
*/
#include <linux/threads.h>
@@ -17,10 +13,9 @@
#include <asm/processor.h>
#include <asm/thread_info.h>
#include <asm/epapr_hcalls.h>
+#include <asm/hw_irq.h>
/* 64-bit version only for now */
-#ifdef CONFIG_PPC64
-
.macro BOOK3E_IDLE name loop
_GLOBAL(\name)
/* Save LR for later */
@@ -35,7 +30,7 @@ _GLOBAL(\name)
*/
lbz r3,PACAIRQHAPPENED(r13)
cmpwi cr0,r3,0
- bnelr
+ bne 2f
/* Now we are going to mark ourselves as soft and hard enabled in
* order to be able to take interrupts while asleep. We inform lockdep
@@ -46,8 +41,8 @@ _GLOBAL(\name)
bl trace_hardirqs_on
addi r1,r1,128
#endif
- li r0,1
- stb r0,PACASOFTIRQEN(r13)
+ li r0,IRQS_ENABLED
+ stb r0,PACAIRQSOFTMASK(r13)
/* Interrupts will make use return to LR, so get something we want
* in there
@@ -62,7 +57,7 @@ _GLOBAL(\name)
1: /* Let's set the _TLF_NAPPING flag so interrupts make us return
* to the right spot
*/
- CURRENT_THREAD_INFO(r11, r1)
+ ld r11, PACACURRENT(r13)
ld r10,TI_LOCAL_FLAGS(r11)
ori r10,r10,_TLF_NAPPING
std r10,TI_LOCAL_FLAGS(r11)
@@ -71,11 +66,16 @@ _GLOBAL(\name)
wrteei 1
\loop
+2:
+ lbz r10,PACAIRQHAPPENED(r13)
+ ori r10,r10,PACA_IRQ_HARD_DIS
+ stb r10,PACAIRQHAPPENED(r13)
+ blr
.endm
.macro BOOK3E_IDLE_LOOP
1:
- PPC_WAIT(0)
+ PPC_WAIT_v203
b 1b
.endm
@@ -96,6 +96,4 @@ epapr_ev_idle_start:
BOOK3E_IDLE epapr_ev_idle EPAPR_EV_IDLE_LOOP
-BOOK3E_IDLE book3e_idle BOOK3E_IDLE_LOOP
-
-#endif /* CONFIG_PPC64 */
+BOOK3E_IDLE e500_idle BOOK3E_IDLE_LOOP
diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S
index 1686916cc7f0..3c097356366b 100644
--- a/arch/powerpc/kernel/idle_6xx.S
+++ b/arch/powerpc/kernel/idle_6xx.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* This file contains the power_save function for 6xx & 7xxx CPUs
* rewritten in assembler
@@ -6,11 +7,6 @@
* it will have PLL 1 set to low speed mode (used during NAP/DOZE).
* if this is not the case some additional changes will have to
* be done to check a runtime var (a bit like powersave-nap)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/threads.h>
@@ -20,6 +16,7 @@
#include <asm/thread_info.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
+#include <asm/feature-fixups.h>
.text
@@ -132,13 +129,12 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFCLR(CPU_FTR_NO_DPM)
mtspr SPRN_HID0,r4
BEGIN_FTR_SECTION
- DSSALL
+ PPC_DSSALL
sync
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
- CURRENT_THREAD_INFO(r9, r1)
- lwz r8,TI_LOCAL_FLAGS(r9) /* set napping bit */
+ lwz r8,TI_LOCAL_FLAGS(r2) /* set napping bit */
ori r8,r8,_TLF_NAPPING /* so when we take an exception */
- stw r8,TI_LOCAL_FLAGS(r9) /* it will return to our caller */
+ stw r8,TI_LOCAL_FLAGS(r2) /* it will return to our caller */
mfmsr r7
ori r7,r7,MSR_EE
oris r7,r7,MSR_POW@h
@@ -149,17 +145,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
/*
* Return from NAP/DOZE mode, restore some CPU specific registers,
- * we are called with DR/IR still off and r2 containing physical
- * address of current. R11 points to the exception frame (physical
- * address). We have to preserve r10.
+ * R11 points to the exception frame. We have to preserve r10.
*/
_GLOBAL(power_save_ppc32_restore)
lwz r9,_LINK(r11) /* interrupted in ppc6xx_idle: */
stw r9,_NIP(r11) /* make it do a blr */
#ifdef CONFIG_SMP
- CURRENT_THREAD_INFO(r12, r11)
- lwz r11,TI_CPU(r12) /* get cpu number * 4 */
+ lwz r11,TASK_CPU(r2) /* get cpu number * 4 */
slwi r11,r11,2
#else
li r11,0
@@ -171,7 +164,7 @@ BEGIN_FTR_SECTION
mfspr r9,SPRN_HID0
andis. r9,r9,HID0_NAP@h
beq 1f
- addis r9,r11,(nap_save_msscr0-KERNELBASE)@ha
+ addis r9, r11, nap_save_msscr0@ha
lwz r9,nap_save_msscr0@l(r9)
mtspr SPRN_MSSCR0, r9
sync
@@ -179,11 +172,12 @@ BEGIN_FTR_SECTION
1:
END_FTR_SECTION_IFSET(CPU_FTR_NAP_DISABLE_L2_PR)
BEGIN_FTR_SECTION
- addis r9,r11,(nap_save_hid1-KERNELBASE)@ha
+ addis r9, r11, nap_save_hid1@ha
lwz r9,nap_save_hid1@l(r9)
mtspr SPRN_HID1, r9
END_FTR_SECTION_IFSET(CPU_FTR_DUAL_PLL_750FX)
- b transfer_to_handler_cont
+ blr
+_ASM_NOKPROBE_SYMBOL(power_save_ppc32_restore)
.data
diff --git a/arch/powerpc/kernel/idle_e500.S b/arch/powerpc/kernel/idle_85xx.S
index 15448668988d..9e1bc4502c50 100644
--- a/arch/powerpc/kernel/idle_e500.S
+++ b/arch/powerpc/kernel/idle_85xx.S
@@ -1,13 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
* Dave Liu <daveliu@freescale.com>
* copy from idle_6xx.S and modify for e500 based processor,
* implement the power_save function in idle.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/threads.h>
@@ -17,14 +13,14 @@
#include <asm/thread_info.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
+#include <asm/feature-fixups.h>
.text
_GLOBAL(e500_idle)
- CURRENT_THREAD_INFO(r3, r1)
- lwz r4,TI_LOCAL_FLAGS(r3) /* set napping bit */
+ lwz r4,TI_LOCAL_FLAGS(r2) /* set napping bit */
ori r4,r4,_TLF_NAPPING /* so when we take an exception */
- stw r4,TI_LOCAL_FLAGS(r3) /* it will return to our caller */
+ stw r4,TI_LOCAL_FLAGS(r2) /* it will return to our caller */
#ifdef CONFIG_PPC_E500MC
wrteei 1
@@ -58,15 +54,6 @@ BEGIN_FTR_SECTION
mtlr r0
lis r3,HID0_NAP@h
END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
-BEGIN_FTR_SECTION
- msync
- li r7,L2CSR0_L2FL@l
- mtspr SPRN_L2CSR0,r7
-2:
- mfspr r7,SPRN_L2CSR0
- andi. r4,r7,L2CSR0_L2FL@l
- bne 2b
-END_FTR_SECTION_IFSET(CPU_FTR_L2CSR|CPU_FTR_CAN_NAP)
1:
/* Go to NAP or DOZE now */
mfspr r4,SPRN_HID0
@@ -87,20 +74,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_L2CSR|CPU_FTR_CAN_NAP)
/*
* Return from NAP/DOZE mode, restore some CPU specific registers,
- * r2 containing physical address of current.
- * r11 points to the exception frame (physical address).
+ * r2 containing address of current.
+ * r11 points to the exception frame.
* We have to preserve r10.
*/
_GLOBAL(power_save_ppc32_restore)
lwz r9,_LINK(r11) /* interrupted in e500_idle */
stw r9,_NIP(r11) /* make it do a blr */
-
-#ifdef CONFIG_SMP
- CURRENT_THREAD_INFO(r12, r1)
- lwz r11,TI_CPU(r12) /* get cpu number * 4 */
- slwi r11,r11,2
-#else
- li r11,0
-#endif
-
- b transfer_to_handler_cont
+ blr
+_ASM_NOKPROBE_SYMBOL(power_save_ppc32_restore)
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
new file mode 100644
index 000000000000..3d97fb833834
--- /dev/null
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -0,0 +1,218 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2018, IBM Corporation.
+ *
+ * This file contains general idle entry/exit functions to save
+ * and restore stack and NVGPRs which allows C code to call idle
+ * states that lose GPRs, and it will return transparently with
+ * SRR1 wakeup reason return value.
+ *
+ * The platform / CPU caller must ensure SPRs and any other non-GPR
+ * state is saved and restored correctly, handle KVM, interrupts, etc.
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/ppc-opcode.h>
+#include <asm/cpuidle.h>
+#include <asm/thread_info.h> /* TLF_NAPPING */
+
+#ifdef CONFIG_PPC_P7_NAP
+/*
+ * Desired PSSCR in r3
+ *
+ * No state will be lost regardless of wakeup mechanism (interrupt or NIA).
+ *
+ * An EC=0 type wakeup will return with a value of 0. SRESET wakeup (which can
+ * happen with xscom SRESET and possibly MCE) may clobber volatiles except LR,
+ * and must blr, to return to caller with r3 set according to caller's expected
+ * return code (for Book3S/64 that is SRR1).
+ */
+_GLOBAL(isa300_idle_stop_noloss)
+ mtspr SPRN_PSSCR,r3
+ PPC_STOP
+ li r3,0
+ blr
+
+/*
+ * Desired PSSCR in r3
+ *
+ * GPRs may be lost, so they are saved here. Wakeup is by interrupt only.
+ * The SRESET wakeup returns to this function's caller by calling
+ * idle_return_gpr_loss with r3 set to desired return value.
+ *
+ * A wakeup without GPR loss may alteratively be handled as in
+ * isa300_idle_stop_noloss and blr directly, as an optimisation.
+ *
+ * The caller is responsible for saving/restoring SPRs, MSR, timebase,
+ * etc.
+ */
+_GLOBAL(isa300_idle_stop_mayloss)
+ mtspr SPRN_PSSCR,r3
+ std r1,PACAR1(r13)
+ mflr r4
+ mfcr r5
+ /*
+ * Use the stack red zone rather than a new frame for saving regs since
+ * in the case of no GPR loss the wakeup code branches directly back to
+ * the caller without deallocating the stack frame first.
+ */
+ std r2,-8*1(r1)
+ std r14,-8*2(r1)
+ std r15,-8*3(r1)
+ std r16,-8*4(r1)
+ std r17,-8*5(r1)
+ std r18,-8*6(r1)
+ std r19,-8*7(r1)
+ std r20,-8*8(r1)
+ std r21,-8*9(r1)
+ std r22,-8*10(r1)
+ std r23,-8*11(r1)
+ std r24,-8*12(r1)
+ std r25,-8*13(r1)
+ std r26,-8*14(r1)
+ std r27,-8*15(r1)
+ std r28,-8*16(r1)
+ std r29,-8*17(r1)
+ std r30,-8*18(r1)
+ std r31,-8*19(r1)
+ std r4,-8*20(r1)
+ std r5,-8*21(r1)
+ /* 168 bytes */
+ PPC_STOP
+ b . /* catch bugs */
+
+/*
+ * Desired return value in r3
+ *
+ * The idle wakeup SRESET interrupt can call this after calling
+ * to return to the idle sleep function caller with r3 as the return code.
+ *
+ * This must not be used if idle was entered via a _noloss function (use
+ * a simple blr instead).
+ */
+_GLOBAL(idle_return_gpr_loss)
+ ld r1,PACAR1(r13)
+ ld r4,-8*20(r1)
+ ld r5,-8*21(r1)
+ mtlr r4
+ mtcr r5
+ /*
+ * KVM nap requires r2 to be saved, rather than just restoring it
+ * from PACATOC. This could be avoided for that less common case
+ * if KVM saved its r2.
+ */
+ ld r2,-8*1(r1)
+ ld r14,-8*2(r1)
+ ld r15,-8*3(r1)
+ ld r16,-8*4(r1)
+ ld r17,-8*5(r1)
+ ld r18,-8*6(r1)
+ ld r19,-8*7(r1)
+ ld r20,-8*8(r1)
+ ld r21,-8*9(r1)
+ ld r22,-8*10(r1)
+ ld r23,-8*11(r1)
+ ld r24,-8*12(r1)
+ ld r25,-8*13(r1)
+ ld r26,-8*14(r1)
+ ld r27,-8*15(r1)
+ ld r28,-8*16(r1)
+ ld r29,-8*17(r1)
+ ld r30,-8*18(r1)
+ ld r31,-8*19(r1)
+ blr
+
+/*
+ * This is the sequence required to execute idle instructions, as
+ * specified in ISA v2.07 (and earlier). MSR[IR] and MSR[DR] must be 0.
+ * We have to store a GPR somewhere, ptesync, then reload it, and create
+ * a false dependency on the result of the load. It doesn't matter which
+ * GPR we store, or where we store it. We have already stored r2 to the
+ * stack at -8(r1) in isa206_idle_insn_mayloss, so use that.
+ */
+#define IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST) \
+ /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \
+ std r2,-8(r1); \
+ ptesync; \
+ ld r2,-8(r1); \
+236: cmpd cr0,r2,r2; \
+ bne 236b; \
+ IDLE_INST; \
+ b . /* catch bugs */
+
+/*
+ * Desired instruction type in r3
+ *
+ * GPRs may be lost, so they are saved here. Wakeup is by interrupt only.
+ * The SRESET wakeup returns to this function's caller by calling
+ * idle_return_gpr_loss with r3 set to desired return value.
+ *
+ * A wakeup without GPR loss may alteratively be handled as in
+ * isa300_idle_stop_noloss and blr directly, as an optimisation.
+ *
+ * The caller is responsible for saving/restoring SPRs, MSR, timebase,
+ * etc.
+ *
+ * This must be called in real-mode (MSR_IDLE).
+ */
+_GLOBAL(isa206_idle_insn_mayloss)
+ std r1,PACAR1(r13)
+ mflr r4
+ mfcr r5
+ /*
+ * Use the stack red zone rather than a new frame for saving regs since
+ * in the case of no GPR loss the wakeup code branches directly back to
+ * the caller without deallocating the stack frame first.
+ */
+ std r2,-8*1(r1)
+ std r14,-8*2(r1)
+ std r15,-8*3(r1)
+ std r16,-8*4(r1)
+ std r17,-8*5(r1)
+ std r18,-8*6(r1)
+ std r19,-8*7(r1)
+ std r20,-8*8(r1)
+ std r21,-8*9(r1)
+ std r22,-8*10(r1)
+ std r23,-8*11(r1)
+ std r24,-8*12(r1)
+ std r25,-8*13(r1)
+ std r26,-8*14(r1)
+ std r27,-8*15(r1)
+ std r28,-8*16(r1)
+ std r29,-8*17(r1)
+ std r30,-8*18(r1)
+ std r31,-8*19(r1)
+ std r4,-8*20(r1)
+ std r5,-8*21(r1)
+ cmpwi r3,PNV_THREAD_NAP
+ bne 1f
+ IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP)
+1: cmpwi r3,PNV_THREAD_SLEEP
+ bne 2f
+ IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP)
+2: IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE)
+#endif
+
+#ifdef CONFIG_PPC_970_NAP
+_GLOBAL(power4_idle_nap)
+ LOAD_REG_IMMEDIATE(r7, MSR_KERNEL|MSR_EE|MSR_POW)
+ ld r9,PACA_THREAD_INFO(r13)
+ ld r8,TI_LOCAL_FLAGS(r9)
+ ori r8,r8,_TLF_NAPPING
+ std r8,TI_LOCAL_FLAGS(r9)
+ /*
+ * NAPPING bit is set, from this point onward power4_fixup_nap
+ * will cause exceptions to return to power4_idle_nap_return.
+ */
+1: sync
+ isync
+ mtmsrd r7
+ isync
+ b 1b
+
+ .globl power4_idle_nap_return
+power4_idle_nap_return:
+ blr
+#endif
diff --git a/arch/powerpc/kernel/idle_power4.S b/arch/powerpc/kernel/idle_power4.S
deleted file mode 100644
index f57a19348bdd..000000000000
--- a/arch/powerpc/kernel/idle_power4.S
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * This file contains the power_save function for 970-family CPUs.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/threads.h>
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/cputable.h>
-#include <asm/thread_info.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/irqflags.h>
-
-#undef DEBUG
-
- .text
-
-_GLOBAL(power4_idle)
-BEGIN_FTR_SECTION
- blr
-END_FTR_SECTION_IFCLR(CPU_FTR_CAN_NAP)
- /* Now check if user or arch enabled NAP mode */
- LOAD_REG_ADDRBASE(r3,powersave_nap)
- lwz r4,ADDROFF(powersave_nap)(r3)
- cmpwi 0,r4,0
- beqlr
-
- /* Hard disable interrupts */
- mfmsr r7
- rldicl r0,r7,48,1
- rotldi r0,r0,16
- mtmsrd r0,1
-
- /* Check if something happened while soft-disabled */
- lbz r0,PACAIRQHAPPENED(r13)
- cmpwi cr0,r0,0
- bnelr
-
- /* Soft-enable interrupts */
-#ifdef CONFIG_TRACE_IRQFLAGS
- mflr r0
- std r0,16(r1)
- stdu r1,-128(r1)
- bl trace_hardirqs_on
- addi r1,r1,128
- ld r0,16(r1)
- mtlr r0
- mfmsr r7
-#endif /* CONFIG_TRACE_IRQFLAGS */
-
- li r0,1
- stb r0,PACASOFTIRQEN(r13) /* we'll hard-enable shortly */
-BEGIN_FTR_SECTION
- DSSALL
- sync
-END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
- CURRENT_THREAD_INFO(r9, r1)
- ld r8,TI_LOCAL_FLAGS(r9) /* set napping bit */
- ori r8,r8,_TLF_NAPPING /* so when we take an exception */
- std r8,TI_LOCAL_FLAGS(r9) /* it will return to our caller */
- ori r7,r7,MSR_EE
- oris r7,r7,MSR_POW@h
-1: sync
- isync
- mtmsrd r7
- isync
- b 1b
-
diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
deleted file mode 100644
index be05841396cf..000000000000
--- a/arch/powerpc/kernel/idle_power7.S
+++ /dev/null
@@ -1,244 +0,0 @@
-/*
- * This file contains the power_save function for Power7 CPUs.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/threads.h>
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/cputable.h>
-#include <asm/thread_info.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/ppc-opcode.h>
-#include <asm/hw_irq.h>
-#include <asm/kvm_book3s_asm.h>
-#include <asm/opal.h>
-
-#undef DEBUG
-
-/* Idle state entry routines */
-
-#define IDLE_STATE_ENTER_SEQ(IDLE_INST) \
- /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \
- std r0,0(r1); \
- ptesync; \
- ld r0,0(r1); \
-1: cmp cr0,r0,r0; \
- bne 1b; \
- IDLE_INST; \
- b .
-
- .text
-
-/*
- * Pass requested state in r3:
- * 0 - nap
- * 1 - sleep
- *
- * To check IRQ_HAPPENED in r4
- * 0 - don't check
- * 1 - check
- */
-_GLOBAL(power7_powersave_common)
- /* Use r3 to pass state nap/sleep/winkle */
- /* NAP is a state loss, we create a regs frame on the
- * stack, fill it up with the state we care about and
- * stick a pointer to it in PACAR1. We really only
- * need to save PC, some CR bits and the NV GPRs,
- * but for now an interrupt frame will do.
- */
- mflr r0
- std r0,16(r1)
- stdu r1,-INT_FRAME_SIZE(r1)
- std r0,_LINK(r1)
- std r0,_NIP(r1)
-
-#ifndef CONFIG_SMP
- /* Make sure FPU, VSX etc... are flushed as we may lose
- * state when going to nap mode
- */
- bl discard_lazy_cpu_state
-#endif /* CONFIG_SMP */
-
- /* Hard disable interrupts */
- mfmsr r9
- rldicl r9,r9,48,1
- rotldi r9,r9,16
- mtmsrd r9,1 /* hard-disable interrupts */
-
- /* Check if something happened while soft-disabled */
- lbz r0,PACAIRQHAPPENED(r13)
- cmpwi cr0,r0,0
- beq 1f
- cmpwi cr0,r4,0
- beq 1f
- addi r1,r1,INT_FRAME_SIZE
- ld r0,16(r1)
- mtlr r0
- blr
-
-1: /* We mark irqs hard disabled as this is the state we'll
- * be in when returning and we need to tell arch_local_irq_restore()
- * about it
- */
- li r0,PACA_IRQ_HARD_DIS
- stb r0,PACAIRQHAPPENED(r13)
-
- /* We haven't lost state ... yet */
- li r0,0
- stb r0,PACA_NAPSTATELOST(r13)
-
- /* Continue saving state */
- SAVE_GPR(2, r1)
- SAVE_NVGPRS(r1)
- mfcr r4
- std r4,_CCR(r1)
- std r9,_MSR(r1)
- std r1,PACAR1(r13)
-
-_GLOBAL(power7_enter_nap_mode)
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- /* Tell KVM we're napping */
- li r4,KVM_HWTHREAD_IN_NAP
- stb r4,HSTATE_HWTHREAD_STATE(r13)
-#endif
- cmpwi cr0,r3,1
- beq 2f
- IDLE_STATE_ENTER_SEQ(PPC_NAP)
- /* No return */
-2: IDLE_STATE_ENTER_SEQ(PPC_SLEEP)
- /* No return */
-
-_GLOBAL(power7_idle)
- /* Now check if user or arch enabled NAP mode */
- LOAD_REG_ADDRBASE(r3,powersave_nap)
- lwz r4,ADDROFF(powersave_nap)(r3)
- cmpwi 0,r4,0
- beqlr
- li r3, 1
- /* fall through */
-
-_GLOBAL(power7_nap)
- mr r4,r3
- li r3,0
- b power7_powersave_common
- /* No return */
-
-_GLOBAL(power7_sleep)
- li r3,1
- li r4,1
- b power7_powersave_common
- /* No return */
-
-/*
- * Make opal call in realmode. This is a generic function to be called
- * from realmode from reset vector. It handles endianess.
- *
- * r13 - paca pointer
- * r1 - stack pointer
- * r3 - opal token
- */
-opal_call_realmode:
- mflr r12
- std r12,_LINK(r1)
- ld r2,PACATOC(r13)
- /* Set opal return address */
- LOAD_REG_ADDR(r0,return_from_opal_call)
- mtlr r0
- /* Handle endian-ness */
- li r0,MSR_LE
- mfmsr r12
- andc r12,r12,r0
- mtspr SPRN_HSRR1,r12
- mr r0,r3 /* Move opal token to r0 */
- LOAD_REG_ADDR(r11,opal)
- ld r12,8(r11)
- ld r2,0(r11)
- mtspr SPRN_HSRR0,r12
- hrfid
-
-return_from_opal_call:
- FIXUP_ENDIAN
- ld r0,_LINK(r1)
- mtlr r0
- blr
-
-#define CHECK_HMI_INTERRUPT \
- mfspr r0,SPRN_SRR1; \
-BEGIN_FTR_SECTION_NESTED(66); \
- rlwinm r0,r0,45-31,0xf; /* extract wake reason field (P8) */ \
-FTR_SECTION_ELSE_NESTED(66); \
- rlwinm r0,r0,45-31,0xe; /* P7 wake reason field is 3 bits */ \
-ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \
- cmpwi r0,0xa; /* Hypervisor maintenance ? */ \
- bne 20f; \
- /* Invoke opal call to handle hmi */ \
- ld r2,PACATOC(r13); \
- ld r1,PACAR1(r13); \
- std r3,ORIG_GPR3(r1); /* Save original r3 */ \
- li r3,OPAL_HANDLE_HMI; /* Pass opal token argument*/ \
- bl opal_call_realmode; \
- ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \
-20: nop;
-
-
-_GLOBAL(power7_wakeup_tb_loss)
- ld r2,PACATOC(r13);
- ld r1,PACAR1(r13)
-
-BEGIN_FTR_SECTION
- CHECK_HMI_INTERRUPT
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
- /* Time base re-sync */
- li r3,OPAL_RESYNC_TIMEBASE
- bl opal_call_realmode;
-
- /* TODO: Check r3 for failure */
-
- REST_NVGPRS(r1)
- REST_GPR(2, r1)
- ld r3,_CCR(r1)
- ld r4,_MSR(r1)
- ld r5,_NIP(r1)
- addi r1,r1,INT_FRAME_SIZE
- mtcr r3
- mfspr r3,SPRN_SRR1 /* Return SRR1 */
- mtspr SPRN_SRR1,r4
- mtspr SPRN_SRR0,r5
- rfid
-
-_GLOBAL(power7_wakeup_loss)
- ld r1,PACAR1(r13)
-BEGIN_FTR_SECTION
- CHECK_HMI_INTERRUPT
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
- REST_NVGPRS(r1)
- REST_GPR(2, r1)
- ld r3,_CCR(r1)
- ld r4,_MSR(r1)
- ld r5,_NIP(r1)
- addi r1,r1,INT_FRAME_SIZE
- mtcr r3
- mtspr SPRN_SRR1,r4
- mtspr SPRN_SRR0,r5
- rfid
-
-_GLOBAL(power7_wakeup_noloss)
- lbz r0,PACA_NAPSTATELOST(r13)
- cmpwi r0,0
- bne power7_wakeup_loss
-BEGIN_FTR_SECTION
- CHECK_HMI_INTERRUPT
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
- ld r1,PACAR1(r13)
- ld r4,_MSR(r1)
- ld r5,_NIP(r1)
- addi r1,r1,INT_FRAME_SIZE
- mtspr SPRN_SRR1,r4
- mtspr SPRN_SRR0,r5
- rfid
diff --git a/arch/powerpc/kernel/ima_arch.c b/arch/powerpc/kernel/ima_arch.c
new file mode 100644
index 000000000000..b7029beed847
--- /dev/null
+++ b/arch/powerpc/kernel/ima_arch.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 IBM Corporation
+ * Author: Nayna Jain
+ */
+
+#include <linux/ima.h>
+#include <asm/secure_boot.h>
+
+bool arch_ima_get_secureboot(void)
+{
+ return is_ppc_secureboot_enabled();
+}
+
+/*
+ * The "secure_rules" are enabled only on "secureboot" enabled systems.
+ * These rules verify the file signatures against known good values.
+ * The "appraise_type=imasig|modsig" option allows the known good signature
+ * to be stored as an xattr or as an appended signature.
+ *
+ * To avoid duplicate signature verification as much as possible, the IMA
+ * policy rule for module appraisal is added only if CONFIG_MODULE_SIG
+ * is not enabled.
+ */
+static const char *const secure_rules[] = {
+ "appraise func=KEXEC_KERNEL_CHECK appraise_type=imasig|modsig",
+#ifndef CONFIG_MODULE_SIG
+ "appraise func=MODULE_CHECK appraise_type=imasig|modsig",
+#endif
+ NULL
+};
+
+/*
+ * The "trusted_rules" are enabled only on "trustedboot" enabled systems.
+ * These rules add the kexec kernel image and kernel modules file hashes to
+ * the IMA measurement list.
+ */
+static const char *const trusted_rules[] = {
+ "measure func=KEXEC_KERNEL_CHECK",
+ "measure func=MODULE_CHECK",
+ NULL
+};
+
+/*
+ * The "secure_and_trusted_rules" contains rules for both the secure boot and
+ * trusted boot. The "template=ima-modsig" option includes the appended
+ * signature, when available, in the IMA measurement list.
+ */
+static const char *const secure_and_trusted_rules[] = {
+ "measure func=KEXEC_KERNEL_CHECK template=ima-modsig",
+ "measure func=MODULE_CHECK template=ima-modsig",
+ "appraise func=KEXEC_KERNEL_CHECK appraise_type=imasig|modsig",
+#ifndef CONFIG_MODULE_SIG
+ "appraise func=MODULE_CHECK appraise_type=imasig|modsig",
+#endif
+ NULL
+};
+
+/*
+ * Returns the relevant IMA arch-specific policies based on the system secure
+ * boot state.
+ */
+const char *const *arch_get_ima_policy(void)
+{
+ if (is_ppc_secureboot_enabled()) {
+ if (IS_ENABLED(CONFIG_MODULE_SIG))
+ set_module_sig_enforced();
+
+ if (is_ppc_trustedboot_enabled())
+ return secure_and_trusted_rules;
+ else
+ return secure_rules;
+ } else if (is_ppc_trustedboot_enabled()) {
+ return trusted_rules;
+ }
+
+ return NULL;
+}
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
new file mode 100644
index 000000000000..e0c681d0b076
--- /dev/null
+++ b/arch/powerpc/kernel/interrupt.c
@@ -0,0 +1,509 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/context_tracking.h>
+#include <linux/err.h>
+#include <linux/compat.h>
+#include <linux/rseq.h>
+#include <linux/sched/debug.h> /* for show_regs */
+
+#include <asm/kup.h>
+#include <asm/cputime.h>
+#include <asm/hw_irq.h>
+#include <asm/interrupt.h>
+#include <asm/kprobes.h>
+#include <asm/paca.h>
+#include <asm/ptrace.h>
+#include <asm/reg.h>
+#include <asm/signal.h>
+#include <asm/switch_to.h>
+#include <asm/syscall.h>
+#include <asm/time.h>
+#include <asm/tm.h>
+#include <asm/unistd.h>
+
+#if defined(CONFIG_PPC_ADV_DEBUG_REGS) && defined(CONFIG_PPC32)
+unsigned long global_dbcr0[NR_CPUS];
+#endif
+
+#if defined(CONFIG_PREEMPT_DYNAMIC)
+DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched);
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_64
+DEFINE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant);
+static inline bool exit_must_hard_disable(void)
+{
+ return static_branch_unlikely(&interrupt_exit_not_reentrant);
+}
+#else
+static inline bool exit_must_hard_disable(void)
+{
+ return true;
+}
+#endif
+
+/*
+ * local irqs must be disabled. Returns false if the caller must re-enable
+ * them, check for new work, and try again.
+ *
+ * This should be called with local irqs disabled, but if they were previously
+ * enabled when the interrupt handler returns (indicating a process-context /
+ * synchronous interrupt) then irqs_enabled should be true.
+ *
+ * restartable is true then EE/RI can be left on because interrupts are handled
+ * with a restart sequence.
+ */
+static notrace __always_inline bool prep_irq_for_enabled_exit(bool restartable)
+{
+ bool must_hard_disable = (exit_must_hard_disable() || !restartable);
+
+ /* This must be done with RI=1 because tracing may touch vmaps */
+ trace_hardirqs_on();
+
+ if (must_hard_disable)
+ __hard_EE_RI_disable();
+
+#ifdef CONFIG_PPC64
+ /* This pattern matches prep_irq_for_idle */
+ if (unlikely(lazy_irq_pending_nocheck())) {
+ if (must_hard_disable) {
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+ __hard_RI_enable();
+ }
+ trace_hardirqs_off();
+
+ return false;
+ }
+#endif
+ return true;
+}
+
+static notrace void booke_load_dbcr0(void)
+{
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+ unsigned long dbcr0 = current->thread.debug.dbcr0;
+
+ if (likely(!(dbcr0 & DBCR0_IDM)))
+ return;
+
+ /*
+ * Check to see if the dbcr0 register is set up to debug.
+ * Use the internal debug mode bit to do this.
+ */
+ mtmsr(mfmsr() & ~MSR_DE);
+ if (IS_ENABLED(CONFIG_PPC32)) {
+ isync();
+ global_dbcr0[smp_processor_id()] = mfspr(SPRN_DBCR0);
+ }
+ mtspr(SPRN_DBCR0, dbcr0);
+ mtspr(SPRN_DBSR, -1);
+#endif
+}
+
+static notrace void check_return_regs_valid(struct pt_regs *regs)
+{
+#ifdef CONFIG_PPC_BOOK3S_64
+ unsigned long trap, srr0, srr1;
+ static bool warned;
+ u8 *validp;
+ char *h;
+
+ if (trap_is_scv(regs))
+ return;
+
+ trap = TRAP(regs);
+ // EE in HV mode sets HSRRs like 0xea0
+ if (cpu_has_feature(CPU_FTR_HVMODE) && trap == INTERRUPT_EXTERNAL)
+ trap = 0xea0;
+
+ switch (trap) {
+ case 0x980:
+ case INTERRUPT_H_DATA_STORAGE:
+ case 0xe20:
+ case 0xe40:
+ case INTERRUPT_HMI:
+ case 0xe80:
+ case 0xea0:
+ case INTERRUPT_H_FAC_UNAVAIL:
+ case 0x1200:
+ case 0x1500:
+ case 0x1600:
+ case 0x1800:
+ validp = &local_paca->hsrr_valid;
+ if (!READ_ONCE(*validp))
+ return;
+
+ srr0 = mfspr(SPRN_HSRR0);
+ srr1 = mfspr(SPRN_HSRR1);
+ h = "H";
+
+ break;
+ default:
+ validp = &local_paca->srr_valid;
+ if (!READ_ONCE(*validp))
+ return;
+
+ srr0 = mfspr(SPRN_SRR0);
+ srr1 = mfspr(SPRN_SRR1);
+ h = "";
+ break;
+ }
+
+ if (srr0 == regs->nip && srr1 == regs->msr)
+ return;
+
+ /*
+ * A NMI / soft-NMI interrupt may have come in after we found
+ * srr_valid and before the SRRs are loaded. The interrupt then
+ * comes in and clobbers SRRs and clears srr_valid. Then we load
+ * the SRRs here and test them above and find they don't match.
+ *
+ * Test validity again after that, to catch such false positives.
+ *
+ * This test in general will have some window for false negatives
+ * and may not catch and fix all such cases if an NMI comes in
+ * later and clobbers SRRs without clearing srr_valid, but hopefully
+ * such things will get caught most of the time, statistically
+ * enough to be able to get a warning out.
+ */
+ if (!READ_ONCE(*validp))
+ return;
+
+ if (!data_race(warned)) {
+ data_race(warned = true);
+ printk("%sSRR0 was: %lx should be: %lx\n", h, srr0, regs->nip);
+ printk("%sSRR1 was: %lx should be: %lx\n", h, srr1, regs->msr);
+ show_regs(regs);
+ }
+
+ WRITE_ONCE(*validp, 0); /* fixup */
+#endif
+}
+
+static notrace unsigned long
+interrupt_exit_user_prepare_main(unsigned long ret, struct pt_regs *regs)
+{
+ unsigned long ti_flags;
+
+again:
+ ti_flags = read_thread_flags();
+ while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
+ local_irq_enable();
+ if (ti_flags & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)) {
+ schedule();
+ } else {
+ /*
+ * SIGPENDING must restore signal handler function
+ * argument GPRs, and some non-volatiles (e.g., r1).
+ * Restore all for now. This could be made lighter.
+ */
+ if (ti_flags & _TIF_SIGPENDING)
+ ret |= _TIF_RESTOREALL;
+ do_notify_resume(regs, ti_flags);
+ }
+ local_irq_disable();
+ ti_flags = read_thread_flags();
+ }
+
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && IS_ENABLED(CONFIG_PPC_FPU)) {
+ if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
+ unlikely((ti_flags & _TIF_RESTORE_TM))) {
+ restore_tm_state(regs);
+ } else {
+ unsigned long mathflags = MSR_FP;
+
+ if (cpu_has_feature(CPU_FTR_VSX))
+ mathflags |= MSR_VEC | MSR_VSX;
+ else if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ mathflags |= MSR_VEC;
+
+ /*
+ * If userspace MSR has all available FP bits set,
+ * then they are live and no need to restore. If not,
+ * it means the regs were given up and restore_math
+ * may decide to restore them (to avoid taking an FP
+ * fault).
+ */
+ if ((regs->msr & mathflags) != mathflags)
+ restore_math(regs);
+ }
+ }
+
+ check_return_regs_valid(regs);
+
+ user_enter_irqoff();
+ if (!prep_irq_for_enabled_exit(true)) {
+ user_exit_irqoff();
+ local_irq_enable();
+ local_irq_disable();
+ goto again;
+ }
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ local_paca->tm_scratch = regs->msr;
+#endif
+
+ booke_load_dbcr0();
+
+ account_cpu_user_exit();
+
+ /* Restore user access locks last */
+ kuap_user_restore(regs);
+
+ return ret;
+}
+
+/*
+ * This should be called after a syscall returns, with r3 the return value
+ * from the syscall. If this function returns non-zero, the system call
+ * exit assembly should additionally load all GPR registers and CTR and XER
+ * from the interrupt frame.
+ *
+ * The function graph tracer can not trace the return side of this function,
+ * because RI=0 and soft mask state is "unreconciled", so it is marked notrace.
+ */
+notrace unsigned long syscall_exit_prepare(unsigned long r3,
+ struct pt_regs *regs,
+ long scv)
+{
+ unsigned long ti_flags;
+ unsigned long ret = 0;
+ bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv;
+
+ CT_WARN_ON(ct_state() == CT_STATE_USER);
+
+ kuap_assert_locked();
+
+ regs->result = r3;
+
+ /* Check whether the syscall is issued inside a restartable sequence */
+ rseq_syscall(regs);
+
+ ti_flags = read_thread_flags();
+
+ if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && is_not_scv) {
+ if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) {
+ r3 = -r3;
+ regs->ccr |= 0x10000000; /* Set SO bit in CR */
+ }
+ }
+
+ if (unlikely(ti_flags & _TIF_PERSYSCALL_MASK)) {
+ if (ti_flags & _TIF_RESTOREALL)
+ ret = _TIF_RESTOREALL;
+ else
+ regs->gpr[3] = r3;
+ clear_bits(_TIF_PERSYSCALL_MASK, &current_thread_info()->flags);
+ } else {
+ regs->gpr[3] = r3;
+ }
+
+ if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) {
+ do_syscall_trace_leave(regs);
+ ret |= _TIF_RESTOREALL;
+ }
+
+ local_irq_disable();
+ ret = interrupt_exit_user_prepare_main(ret, regs);
+
+#ifdef CONFIG_PPC64
+ regs->exit_result = ret;
+#endif
+
+ return ret;
+}
+
+#ifdef CONFIG_PPC64
+notrace unsigned long syscall_exit_restart(unsigned long r3, struct pt_regs *regs)
+{
+ /*
+ * This is called when detecting a soft-pending interrupt as well as
+ * an alternate-return interrupt. So we can't just have the alternate
+ * return path clear SRR1[MSR] and set PACA_IRQ_HARD_DIS (unless
+ * the soft-pending case were to fix things up as well). RI might be
+ * disabled, in which case it gets re-enabled by __hard_irq_disable().
+ */
+ __hard_irq_disable();
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ set_kuap(AMR_KUAP_BLOCKED);
+#endif
+
+ trace_hardirqs_off();
+ user_exit_irqoff();
+ account_cpu_user_entry();
+
+ BUG_ON(!user_mode(regs));
+
+ regs->exit_result = interrupt_exit_user_prepare_main(regs->exit_result, regs);
+
+ return regs->exit_result;
+}
+#endif
+
+notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs)
+{
+ unsigned long ret;
+
+ BUG_ON(regs_is_unrecoverable(regs));
+ BUG_ON(arch_irq_disabled_regs(regs));
+ CT_WARN_ON(ct_state() == CT_STATE_USER);
+
+ /*
+ * We don't need to restore AMR on the way back to userspace for KUAP.
+ * AMR can only have been unlocked if we interrupted the kernel.
+ */
+ kuap_assert_locked();
+
+ local_irq_disable();
+
+ ret = interrupt_exit_user_prepare_main(0, regs);
+
+#ifdef CONFIG_PPC64
+ regs->exit_result = ret;
+#endif
+
+ return ret;
+}
+
+void preempt_schedule_irq(void);
+
+notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs)
+{
+ unsigned long ret = 0;
+ unsigned long kuap;
+ bool stack_store = read_thread_flags() & _TIF_EMULATE_STACK_STORE;
+
+ if (regs_is_unrecoverable(regs))
+ unrecoverable_exception(regs);
+ /*
+ * CT_WARN_ON comes here via program_check_exception, so avoid
+ * recursion.
+ *
+ * Skip the assertion on PMIs on 64e to work around a problem caused
+ * by NMI PMIs incorrectly taking this interrupt return path, it's
+ * possible for this to hit after interrupt exit to user switches
+ * context to user. See also the comment in the performance monitor
+ * handler in exceptions-64e.S
+ */
+ if (!IS_ENABLED(CONFIG_PPC_BOOK3E_64) &&
+ TRAP(regs) != INTERRUPT_PROGRAM &&
+ TRAP(regs) != INTERRUPT_PERFMON)
+ CT_WARN_ON(ct_state() == CT_STATE_USER);
+
+ kuap = kuap_get_and_assert_locked();
+
+ local_irq_disable();
+
+ if (!arch_irq_disabled_regs(regs)) {
+ /* Returning to a kernel context with local irqs enabled. */
+ WARN_ON_ONCE(!(regs->msr & MSR_EE));
+again:
+ if (need_irq_preemption()) {
+ /* Return to preemptible kernel context */
+ if (unlikely(read_thread_flags() & _TIF_NEED_RESCHED)) {
+ if (preempt_count() == 0)
+ preempt_schedule_irq();
+ }
+ }
+
+ check_return_regs_valid(regs);
+
+ /*
+ * Stack store exit can't be restarted because the interrupt
+ * stack frame might have been clobbered.
+ */
+ if (!prep_irq_for_enabled_exit(unlikely(stack_store))) {
+ /*
+ * Replay pending soft-masked interrupts now. Don't
+ * just local_irq_enabe(); local_irq_disable(); because
+ * if we are returning from an asynchronous interrupt
+ * here, another one might hit after irqs are enabled,
+ * and it would exit via this same path allowing
+ * another to fire, and so on unbounded.
+ */
+ hard_irq_disable();
+ replay_soft_interrupts();
+ /* Took an interrupt, may have more exit work to do. */
+ goto again;
+ }
+#ifdef CONFIG_PPC64
+ /*
+ * An interrupt may clear MSR[EE] and set this concurrently,
+ * but it will be marked pending and the exit will be retried.
+ * This leaves a racy window where MSR[EE]=0 and HARD_DIS is
+ * clear, until interrupt_exit_kernel_restart() calls
+ * hard_irq_disable(), which will set HARD_DIS again.
+ */
+ local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
+
+ } else {
+ check_return_regs_valid(regs);
+
+ if (unlikely(stack_store))
+ __hard_EE_RI_disable();
+#endif /* CONFIG_PPC64 */
+ }
+
+ if (unlikely(stack_store)) {
+ clear_bits(_TIF_EMULATE_STACK_STORE, &current_thread_info()->flags);
+ ret = 1;
+ }
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ local_paca->tm_scratch = regs->msr;
+#endif
+
+ /*
+ * 64s does not want to mfspr(SPRN_AMR) here, because this comes after
+ * mtmsr, which would cause Read-After-Write stalls. Hence, take the
+ * AMR value from the check above.
+ */
+ kuap_kernel_restore(regs, kuap);
+
+ return ret;
+}
+
+#ifdef CONFIG_PPC64
+notrace unsigned long interrupt_exit_user_restart(struct pt_regs *regs)
+{
+ __hard_irq_disable();
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ set_kuap(AMR_KUAP_BLOCKED);
+#endif
+
+ trace_hardirqs_off();
+ user_exit_irqoff();
+ account_cpu_user_entry();
+
+ BUG_ON(!user_mode(regs));
+
+ regs->exit_result |= interrupt_exit_user_prepare(regs);
+
+ return regs->exit_result;
+}
+
+/*
+ * No real need to return a value here because the stack store case does not
+ * get restarted.
+ */
+notrace unsigned long interrupt_exit_kernel_restart(struct pt_regs *regs)
+{
+ __hard_irq_disable();
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ set_kuap(AMR_KUAP_BLOCKED);
+#endif
+
+ if (regs->softe == IRQS_ENABLED)
+ trace_hardirqs_off();
+
+ BUG_ON(user_mode(regs));
+
+ return interrupt_exit_kernel_prepare(regs);
+}
+#endif
diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S
new file mode 100644
index 000000000000..1ad059a9e2fe
--- /dev/null
+++ b/arch/powerpc/kernel/interrupt_64.S
@@ -0,0 +1,772 @@
+#include <asm/asm-offsets.h>
+#include <asm/bug.h>
+#ifdef CONFIG_PPC_BOOK3S
+#include <asm/exception-64s.h>
+#else
+#include <asm/exception-64e.h>
+#endif
+#include <asm/feature-fixups.h>
+#include <asm/head-64.h>
+#include <asm/hw_irq.h>
+#include <asm/kup.h>
+#include <asm/mmu.h>
+#include <asm/ppc_asm.h>
+#include <asm/ptrace.h>
+
+ .align 7
+
+.macro DEBUG_SRR_VALID srr
+#ifdef CONFIG_PPC_RFI_SRR_DEBUG
+ .ifc \srr,srr
+ mfspr r11,SPRN_SRR0
+ ld r12,_NIP(r1)
+ clrrdi r11,r11,2
+ clrrdi r12,r12,2
+100: tdne r11,r12
+ EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+ mfspr r11,SPRN_SRR1
+ ld r12,_MSR(r1)
+100: tdne r11,r12
+ EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+ .else
+ mfspr r11,SPRN_HSRR0
+ ld r12,_NIP(r1)
+ clrrdi r11,r11,2
+ clrrdi r12,r12,2
+100: tdne r11,r12
+ EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+ mfspr r11,SPRN_HSRR1
+ ld r12,_MSR(r1)
+100: tdne r11,r12
+ EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+ .endif
+#endif
+.endm
+
+#ifdef CONFIG_PPC_BOOK3S
+.macro system_call_vectored name trapnr
+ .globl system_call_vectored_\name
+system_call_vectored_\name:
+_ASM_NOKPROBE_SYMBOL(system_call_vectored_\name)
+ SCV_INTERRUPT_TO_KERNEL
+ mr r10,r1
+ ld r1,PACAKSAVE(r13)
+ std r10,0(r1)
+ std r11,_LINK(r1)
+ std r11,_NIP(r1) /* Saved LR is also the next instruction */
+ std r12,_MSR(r1)
+ std r0,GPR0(r1)
+ std r10,GPR1(r1)
+ std r2,GPR2(r1)
+ LOAD_PACA_TOC()
+ mfcr r12
+ li r11,0
+ /* Save syscall parameters in r3-r8 */
+ SAVE_GPRS(3, 8, r1)
+ /* Zero r9-r12, this should only be required when restoring all GPRs */
+ std r11,GPR9(r1)
+ std r11,GPR10(r1)
+ std r11,GPR11(r1)
+ std r11,GPR12(r1)
+ std r9,GPR13(r1)
+ SAVE_NVGPRS(r1)
+ std r11,_XER(r1)
+ std r11,_CTR(r1)
+
+ li r11,\trapnr
+ std r11,_TRAP(r1)
+ std r12,_CCR(r1)
+ std r3,ORIG_GPR3(r1)
+ LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER)
+ std r11,STACK_INT_FRAME_MARKER(r1) /* "regs" marker */
+ /* Calling convention has r3 = regs, r4 = orig r0 */
+ addi r3,r1,STACK_INT_FRAME_REGS
+ mr r4,r0
+
+BEGIN_FTR_SECTION
+ HMT_MEDIUM
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
+ /*
+ * scv enters with MSR[EE]=1 and is immediately considered soft-masked.
+ * The entry vector already sets PACAIRQSOFTMASK to IRQS_ALL_DISABLED,
+ * and interrupts may be masked and pending already.
+ * system_call_exception() will call trace_hardirqs_off() which means
+ * interrupts could already have been blocked before trace_hardirqs_off,
+ * but this is the best we can do.
+ */
+
+ /*
+ * Zero user registers to prevent influencing speculative execution
+ * state of kernel code.
+ */
+ SANITIZE_SYSCALL_GPRS()
+ bl CFUNC(system_call_exception)
+
+.Lsyscall_vectored_\name\()_exit:
+ addi r4,r1,STACK_INT_FRAME_REGS
+ li r5,1 /* scv */
+ bl CFUNC(syscall_exit_prepare)
+ std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
+.Lsyscall_vectored_\name\()_rst_start:
+ lbz r11,PACAIRQHAPPENED(r13)
+ andi. r11,r11,(~PACA_IRQ_HARD_DIS)@l
+ bne- syscall_vectored_\name\()_restart
+ li r11,IRQS_ENABLED
+ stb r11,PACAIRQSOFTMASK(r13)
+ li r11,0
+ stb r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS
+
+ ld r2,_CCR(r1)
+ ld r4,_NIP(r1)
+ ld r5,_MSR(r1)
+
+BEGIN_FTR_SECTION
+ stdcx. r0,0,r1 /* to clear the reservation */
+END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
+
+BEGIN_FTR_SECTION
+ HMT_MEDIUM_LOW
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
+ SANITIZE_RESTORE_NVGPRS()
+ cmpdi r3,0
+ bne .Lsyscall_vectored_\name\()_restore_regs
+
+ /* rfscv returns with LR->NIA and CTR->MSR */
+ mtlr r4
+ mtctr r5
+
+ /* Could zero these as per ABI, but we may consider a stricter ABI
+ * which preserves these if libc implementations can benefit, so
+ * restore them for now until further measurement is done. */
+ REST_GPR(0, r1)
+ REST_GPRS(4, 8, r1)
+ /* Zero volatile regs that may contain sensitive kernel data */
+ ZEROIZE_GPRS(9, 12)
+ mtspr SPRN_XER,r0
+
+ /*
+ * We don't need to restore AMR on the way back to userspace for KUAP.
+ * The value of AMR only matters while we're in the kernel.
+ */
+ mtcr r2
+ REST_GPRS(2, 3, r1)
+ REST_GPR(13, r1)
+ REST_GPR(1, r1)
+ RFSCV_TO_USER
+ b . /* prevent speculative execution */
+
+.Lsyscall_vectored_\name\()_restore_regs:
+ mtspr SPRN_SRR0,r4
+ mtspr SPRN_SRR1,r5
+
+ ld r3,_CTR(r1)
+ ld r4,_LINK(r1)
+ ld r5,_XER(r1)
+
+ HANDLER_RESTORE_NVGPRS()
+ REST_GPR(0, r1)
+ mtcr r2
+ mtctr r3
+ mtlr r4
+ mtspr SPRN_XER,r5
+ REST_GPRS(2, 13, r1)
+ REST_GPR(1, r1)
+ RFI_TO_USER
+.Lsyscall_vectored_\name\()_rst_end:
+
+syscall_vectored_\name\()_restart:
+_ASM_NOKPROBE_SYMBOL(syscall_vectored_\name\()_restart)
+ GET_PACA(r13)
+ ld r1,PACA_EXIT_SAVE_R1(r13)
+ LOAD_PACA_TOC()
+ ld r3,RESULT(r1)
+ addi r4,r1,STACK_INT_FRAME_REGS
+ li r11,IRQS_ALL_DISABLED
+ stb r11,PACAIRQSOFTMASK(r13)
+ bl CFUNC(syscall_exit_restart)
+ std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
+ b .Lsyscall_vectored_\name\()_rst_start
+1:
+
+SOFT_MASK_TABLE(.Lsyscall_vectored_\name\()_rst_start, 1b)
+RESTART_TABLE(.Lsyscall_vectored_\name\()_rst_start, .Lsyscall_vectored_\name\()_rst_end, syscall_vectored_\name\()_restart)
+
+.endm
+
+system_call_vectored common 0x3000
+
+/*
+ * We instantiate another entry copy for the SIGILL variant, with TRAP=0x7ff0
+ * which is tested by system_call_exception when r0 is -1 (as set by vector
+ * entry code).
+ */
+system_call_vectored sigill 0x7ff0
+
+#endif /* CONFIG_PPC_BOOK3S */
+
+ .balign IFETCH_ALIGN_BYTES
+ .globl system_call_common_real
+system_call_common_real:
+_ASM_NOKPROBE_SYMBOL(system_call_common_real)
+ ld r10,PACAKMSR(r13) /* get MSR value for kernel */
+ mtmsrd r10
+
+ .balign IFETCH_ALIGN_BYTES
+ .globl system_call_common
+system_call_common:
+_ASM_NOKPROBE_SYMBOL(system_call_common)
+ mr r10,r1
+ ld r1,PACAKSAVE(r13)
+ std r10,0(r1)
+ std r11,_NIP(r1)
+ std r12,_MSR(r1)
+ std r0,GPR0(r1)
+ std r10,GPR1(r1)
+ std r2,GPR2(r1)
+#ifdef CONFIG_PPC_E500
+START_BTB_FLUSH_SECTION
+ BTB_FLUSH(r10)
+END_BTB_FLUSH_SECTION
+#endif
+ LOAD_PACA_TOC()
+ mfcr r12
+ li r11,0
+ /* Save syscall parameters in r3-r8 */
+ SAVE_GPRS(3, 8, r1)
+ /* Zero r9-r12, this should only be required when restoring all GPRs */
+ std r11,GPR9(r1)
+ std r11,GPR10(r1)
+ std r11,GPR11(r1)
+ std r11,GPR12(r1)
+ std r9,GPR13(r1)
+ SAVE_NVGPRS(r1)
+ std r11,_XER(r1)
+ std r11,_CTR(r1)
+ mflr r10
+
+ /*
+ * This clears CR0.SO (bit 28), which is the error indication on
+ * return from this system call.
+ */
+ rldimi r12,r11,28,(63-28)
+ li r11,0xc00
+ std r10,_LINK(r1)
+ std r11,_TRAP(r1)
+ std r12,_CCR(r1)
+ std r3,ORIG_GPR3(r1)
+ LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER)
+ std r11,STACK_INT_FRAME_MARKER(r1) /* "regs" marker */
+ /* Calling convention has r3 = regs, r4 = orig r0 */
+ addi r3,r1,STACK_INT_FRAME_REGS
+ mr r4,r0
+
+#ifdef CONFIG_PPC_BOOK3S
+ li r11,1
+ stb r11,PACASRR_VALID(r13)
+#endif
+
+ /*
+ * We always enter kernel from userspace with irq soft-mask enabled and
+ * nothing pending. system_call_exception() will call
+ * trace_hardirqs_off().
+ */
+ li r11,IRQS_ALL_DISABLED
+ stb r11,PACAIRQSOFTMASK(r13)
+#ifdef CONFIG_PPC_BOOK3S
+ li r12,-1 /* Set MSR_EE and MSR_RI */
+ mtmsrd r12,1
+#else
+ wrteei 1
+#endif
+
+ /*
+ * Zero user registers to prevent influencing speculative execution
+ * state of kernel code.
+ */
+ SANITIZE_SYSCALL_GPRS()
+ bl CFUNC(system_call_exception)
+
+.Lsyscall_exit:
+ addi r4,r1,STACK_INT_FRAME_REGS
+ li r5,0 /* !scv */
+ bl CFUNC(syscall_exit_prepare)
+ std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
+#ifdef CONFIG_PPC_BOOK3S
+.Lsyscall_rst_start:
+ lbz r11,PACAIRQHAPPENED(r13)
+ andi. r11,r11,(~PACA_IRQ_HARD_DIS)@l
+ bne- syscall_restart
+#endif
+ li r11,IRQS_ENABLED
+ stb r11,PACAIRQSOFTMASK(r13)
+ li r11,0
+ stb r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS
+
+ ld r2,_CCR(r1)
+ ld r6,_LINK(r1)
+ mtlr r6
+
+#ifdef CONFIG_PPC_BOOK3S
+ lbz r4,PACASRR_VALID(r13)
+ cmpdi r4,0
+ bne 1f
+ li r4,0
+ stb r4,PACASRR_VALID(r13)
+#endif
+ ld r4,_NIP(r1)
+ ld r5,_MSR(r1)
+ mtspr SPRN_SRR0,r4
+ mtspr SPRN_SRR1,r5
+1:
+ DEBUG_SRR_VALID srr
+
+BEGIN_FTR_SECTION
+ stdcx. r0,0,r1 /* to clear the reservation */
+END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
+
+ SANITIZE_RESTORE_NVGPRS()
+ cmpdi r3,0
+ bne .Lsyscall_restore_regs
+ /* Zero volatile regs that may contain sensitive kernel data */
+ ZEROIZE_GPR(0)
+ ZEROIZE_GPRS(4, 12)
+ mtctr r0
+ mtspr SPRN_XER,r0
+.Lsyscall_restore_regs_cont:
+
+BEGIN_FTR_SECTION
+ HMT_MEDIUM_LOW
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
+ /*
+ * We don't need to restore AMR on the way back to userspace for KUAP.
+ * The value of AMR only matters while we're in the kernel.
+ */
+ mtcr r2
+ REST_GPRS(2, 3, r1)
+ REST_GPR(13, r1)
+ REST_GPR(1, r1)
+ RFI_TO_USER
+ b . /* prevent speculative execution */
+
+.Lsyscall_restore_regs:
+ ld r3,_CTR(r1)
+ ld r4,_XER(r1)
+ HANDLER_RESTORE_NVGPRS()
+ mtctr r3
+ mtspr SPRN_XER,r4
+ REST_GPR(0, r1)
+ REST_GPRS(4, 12, r1)
+ b .Lsyscall_restore_regs_cont
+.Lsyscall_rst_end:
+
+#ifdef CONFIG_PPC_BOOK3S
+syscall_restart:
+_ASM_NOKPROBE_SYMBOL(syscall_restart)
+ GET_PACA(r13)
+ ld r1,PACA_EXIT_SAVE_R1(r13)
+ LOAD_PACA_TOC()
+ ld r3,RESULT(r1)
+ addi r4,r1,STACK_INT_FRAME_REGS
+ li r11,IRQS_ALL_DISABLED
+ stb r11,PACAIRQSOFTMASK(r13)
+ bl CFUNC(syscall_exit_restart)
+ std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
+ b .Lsyscall_rst_start
+1:
+
+SOFT_MASK_TABLE(.Lsyscall_rst_start, 1b)
+RESTART_TABLE(.Lsyscall_rst_start, .Lsyscall_rst_end, syscall_restart)
+#endif
+
+ /*
+ * If MSR EE/RI was never enabled, IRQs not reconciled, NVGPRs not
+ * touched, no exit work created, then this can be used.
+ */
+ .balign IFETCH_ALIGN_BYTES
+ .globl fast_interrupt_return_srr
+fast_interrupt_return_srr:
+_ASM_NOKPROBE_SYMBOL(fast_interrupt_return_srr)
+ kuap_check_amr r3, r4
+ ld r5,_MSR(r1)
+ andi. r0,r5,MSR_PR
+#ifdef CONFIG_PPC_BOOK3S
+ beq 1f
+ kuap_user_restore r3, r4
+ b .Lfast_user_interrupt_return_srr
+1: kuap_kernel_restore r3, r4
+ andi. r0,r5,MSR_RI
+ li r3,0 /* 0 return value, no EMULATE_STACK_STORE */
+ bne+ .Lfast_kernel_interrupt_return_srr
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(unrecoverable_exception)
+ b . /* should not get here */
+#else
+ bne .Lfast_user_interrupt_return_srr
+ b .Lfast_kernel_interrupt_return_srr
+#endif
+
+.macro interrupt_return_macro srr
+ .balign IFETCH_ALIGN_BYTES
+ .globl interrupt_return_\srr
+interrupt_return_\srr\():
+_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\())
+ ld r4,_MSR(r1)
+ andi. r0,r4,MSR_PR
+ beq interrupt_return_\srr\()_kernel
+interrupt_return_\srr\()_user: /* make backtraces match the _kernel variant */
+_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user)
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(interrupt_exit_user_prepare)
+#ifndef CONFIG_INTERRUPT_SANITIZE_REGISTERS
+ cmpdi r3,0
+ bne- .Lrestore_nvgprs_\srr
+.Lrestore_nvgprs_\srr\()_cont:
+#endif
+ std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
+#ifdef CONFIG_PPC_BOOK3S
+.Linterrupt_return_\srr\()_user_rst_start:
+ lbz r11,PACAIRQHAPPENED(r13)
+ andi. r11,r11,(~PACA_IRQ_HARD_DIS)@l
+ bne- interrupt_return_\srr\()_user_restart
+#endif
+ li r11,IRQS_ENABLED
+ stb r11,PACAIRQSOFTMASK(r13)
+ li r11,0
+ stb r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS
+
+.Lfast_user_interrupt_return_\srr\():
+ SANITIZE_RESTORE_NVGPRS()
+#ifdef CONFIG_PPC_BOOK3S
+ .ifc \srr,srr
+ lbz r4,PACASRR_VALID(r13)
+ .else
+ lbz r4,PACAHSRR_VALID(r13)
+ .endif
+ cmpdi r4,0
+ li r4,0
+ bne 1f
+#endif
+ ld r11,_NIP(r1)
+ ld r12,_MSR(r1)
+ .ifc \srr,srr
+ mtspr SPRN_SRR0,r11
+ mtspr SPRN_SRR1,r12
+1:
+#ifdef CONFIG_PPC_BOOK3S
+ stb r4,PACASRR_VALID(r13)
+#endif
+ .else
+ mtspr SPRN_HSRR0,r11
+ mtspr SPRN_HSRR1,r12
+1:
+#ifdef CONFIG_PPC_BOOK3S
+ stb r4,PACAHSRR_VALID(r13)
+#endif
+ .endif
+ DEBUG_SRR_VALID \srr
+
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
+ lbz r4,PACAIRQSOFTMASK(r13)
+ tdnei r4,IRQS_ENABLED
+#endif
+
+BEGIN_FTR_SECTION
+ ld r10,_PPR(r1)
+ mtspr SPRN_PPR,r10
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
+BEGIN_FTR_SECTION
+ stdcx. r0,0,r1 /* to clear the reservation */
+FTR_SECTION_ELSE
+ ldarx r0,0,r1
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
+
+ ld r3,_CCR(r1)
+ ld r4,_LINK(r1)
+ ld r5,_CTR(r1)
+ ld r6,_XER(r1)
+ li r0,0
+
+ REST_GPRS(7, 13, r1)
+
+ mtcr r3
+ mtlr r4
+ mtctr r5
+ mtspr SPRN_XER,r6
+
+ REST_GPRS(2, 6, r1)
+ REST_GPR(0, r1)
+ REST_GPR(1, r1)
+ .ifc \srr,srr
+ RFI_TO_USER
+ .else
+ HRFI_TO_USER
+ .endif
+ b . /* prevent speculative execution */
+.Linterrupt_return_\srr\()_user_rst_end:
+
+#ifndef CONFIG_INTERRUPT_SANITIZE_REGISTERS
+.Lrestore_nvgprs_\srr\():
+ REST_NVGPRS(r1)
+ b .Lrestore_nvgprs_\srr\()_cont
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S
+interrupt_return_\srr\()_user_restart:
+_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user_restart)
+ GET_PACA(r13)
+ ld r1,PACA_EXIT_SAVE_R1(r13)
+ LOAD_PACA_TOC()
+ addi r3,r1,STACK_INT_FRAME_REGS
+ li r11,IRQS_ALL_DISABLED
+ stb r11,PACAIRQSOFTMASK(r13)
+ bl CFUNC(interrupt_exit_user_restart)
+ std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
+ b .Linterrupt_return_\srr\()_user_rst_start
+1:
+
+SOFT_MASK_TABLE(.Linterrupt_return_\srr\()_user_rst_start, 1b)
+RESTART_TABLE(.Linterrupt_return_\srr\()_user_rst_start, .Linterrupt_return_\srr\()_user_rst_end, interrupt_return_\srr\()_user_restart)
+#endif
+
+ .balign IFETCH_ALIGN_BYTES
+interrupt_return_\srr\()_kernel:
+_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel)
+ addi r3,r1,STACK_INT_FRAME_REGS
+ bl CFUNC(interrupt_exit_kernel_prepare)
+
+ std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
+.Linterrupt_return_\srr\()_kernel_rst_start:
+ ld r11,SOFTE(r1)
+ cmpwi r11,IRQS_ENABLED
+ stb r11,PACAIRQSOFTMASK(r13)
+ beq .Linterrupt_return_\srr\()_soft_enabled
+
+ /*
+ * Returning to soft-disabled context.
+ * Check if a MUST_HARD_MASK interrupt has become pending, in which
+ * case we need to disable MSR[EE] in the return context.
+ *
+ * The MSR[EE] check catches among other things the short incoherency
+ * in hard_irq_disable() between clearing MSR[EE] and setting
+ * PACA_IRQ_HARD_DIS.
+ */
+ ld r12,_MSR(r1)
+ andi. r10,r12,MSR_EE
+ beq .Lfast_kernel_interrupt_return_\srr\() // EE already disabled
+ lbz r11,PACAIRQHAPPENED(r13)
+ andi. r10,r11,PACA_IRQ_MUST_HARD_MASK
+ bne 1f // HARD_MASK is pending
+ // No HARD_MASK pending, clear possible HARD_DIS set by interrupt
+ andi. r11,r11,(~PACA_IRQ_HARD_DIS)@l
+ stb r11,PACAIRQHAPPENED(r13)
+ b .Lfast_kernel_interrupt_return_\srr\()
+
+
+1: /* Must clear MSR_EE from _MSR */
+#ifdef CONFIG_PPC_BOOK3S
+ li r10,0
+ /* Clear valid before changing _MSR */
+ .ifc \srr,srr
+ stb r10,PACASRR_VALID(r13)
+ .else
+ stb r10,PACAHSRR_VALID(r13)
+ .endif
+#endif
+ xori r12,r12,MSR_EE
+ std r12,_MSR(r1)
+ b .Lfast_kernel_interrupt_return_\srr\()
+
+.Linterrupt_return_\srr\()_soft_enabled:
+ /*
+ * In the soft-enabled case, need to double-check that we have no
+ * pending interrupts that might have come in before we reached the
+ * restart section of code, and restart the exit so those can be
+ * handled.
+ *
+ * If there are none, it is be possible that the interrupt still
+ * has PACA_IRQ_HARD_DIS set, which needs to be cleared for the
+ * interrupted context. This clear will not clobber a new pending
+ * interrupt coming in, because we're in the restart section, so
+ * such would return to the restart location.
+ */
+#ifdef CONFIG_PPC_BOOK3S
+ lbz r11,PACAIRQHAPPENED(r13)
+ andi. r11,r11,(~PACA_IRQ_HARD_DIS)@l
+ bne- interrupt_return_\srr\()_kernel_restart
+#endif
+ li r11,0
+ stb r11,PACAIRQHAPPENED(r13) // clear the possible HARD_DIS
+
+.Lfast_kernel_interrupt_return_\srr\():
+ SANITIZE_RESTORE_NVGPRS()
+ cmpdi cr1,r3,0
+#ifdef CONFIG_PPC_BOOK3S
+ .ifc \srr,srr
+ lbz r4,PACASRR_VALID(r13)
+ .else
+ lbz r4,PACAHSRR_VALID(r13)
+ .endif
+ cmpdi r4,0
+ li r4,0
+ bne 1f
+#endif
+ ld r11,_NIP(r1)
+ ld r12,_MSR(r1)
+ .ifc \srr,srr
+ mtspr SPRN_SRR0,r11
+ mtspr SPRN_SRR1,r12
+1:
+#ifdef CONFIG_PPC_BOOK3S
+ stb r4,PACASRR_VALID(r13)
+#endif
+ .else
+ mtspr SPRN_HSRR0,r11
+ mtspr SPRN_HSRR1,r12
+1:
+#ifdef CONFIG_PPC_BOOK3S
+ stb r4,PACAHSRR_VALID(r13)
+#endif
+ .endif
+ DEBUG_SRR_VALID \srr
+
+BEGIN_FTR_SECTION
+ stdcx. r0,0,r1 /* to clear the reservation */
+FTR_SECTION_ELSE
+ ldarx r0,0,r1
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
+
+ ld r3,_LINK(r1)
+ ld r4,_CTR(r1)
+ ld r5,_XER(r1)
+ ld r6,_CCR(r1)
+ li r0,0
+
+ REST_GPRS(7, 12, r1)
+
+ mtlr r3
+ mtctr r4
+ mtspr SPRN_XER,r5
+
+ /*
+ * Leaving a stale STACK_FRAME_REGS_MARKER on the stack can confuse
+ * the reliable stack unwinder later on. Clear it.
+ */
+ std r0,STACK_INT_FRAME_MARKER(r1)
+
+ REST_GPRS(2, 5, r1)
+
+ bne- cr1,1f /* emulate stack store */
+ mtcr r6
+ REST_GPR(6, r1)
+ REST_GPR(0, r1)
+ REST_GPR(1, r1)
+ .ifc \srr,srr
+ RFI_TO_KERNEL
+ .else
+ HRFI_TO_KERNEL
+ .endif
+ b . /* prevent speculative execution */
+
+1: /*
+ * Emulate stack store with update. New r1 value was already calculated
+ * and updated in our interrupt regs by emulate_loadstore, but we can't
+ * store the previous value of r1 to the stack before re-loading our
+ * registers from it, otherwise they could be clobbered. Use
+ * PACA_EXGEN as temporary storage to hold the store data, as
+ * interrupts are disabled here so it won't be clobbered.
+ */
+ mtcr r6
+ std r9,PACA_EXGEN+0(r13)
+ addi r9,r1,INT_FRAME_SIZE /* get original r1 */
+ REST_GPR(6, r1)
+ REST_GPR(0, r1)
+ REST_GPR(1, r1)
+ std r9,0(r1) /* perform store component of stdu */
+ ld r9,PACA_EXGEN+0(r13)
+
+ .ifc \srr,srr
+ RFI_TO_KERNEL
+ .else
+ HRFI_TO_KERNEL
+ .endif
+ b . /* prevent speculative execution */
+.Linterrupt_return_\srr\()_kernel_rst_end:
+
+#ifdef CONFIG_PPC_BOOK3S
+interrupt_return_\srr\()_kernel_restart:
+_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel_restart)
+ GET_PACA(r13)
+ ld r1,PACA_EXIT_SAVE_R1(r13)
+ LOAD_PACA_TOC()
+ addi r3,r1,STACK_INT_FRAME_REGS
+ li r11,IRQS_ALL_DISABLED
+ stb r11,PACAIRQSOFTMASK(r13)
+ bl CFUNC(interrupt_exit_kernel_restart)
+ std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
+ b .Linterrupt_return_\srr\()_kernel_rst_start
+1:
+
+SOFT_MASK_TABLE(.Linterrupt_return_\srr\()_kernel_rst_start, 1b)
+RESTART_TABLE(.Linterrupt_return_\srr\()_kernel_rst_start, .Linterrupt_return_\srr\()_kernel_rst_end, interrupt_return_\srr\()_kernel_restart)
+#endif
+
+.endm
+
+interrupt_return_macro srr
+#ifdef CONFIG_PPC_BOOK3S
+interrupt_return_macro hsrr
+
+ .globl __end_soft_masked
+__end_soft_masked:
+DEFINE_FIXED_SYMBOL(__end_soft_masked, text)
+#endif /* CONFIG_PPC_BOOK3S */
+
+#ifdef CONFIG_PPC_BOOK3S
+_GLOBAL(ret_from_fork_scv)
+ bl CFUNC(schedule_tail)
+ HANDLER_RESTORE_NVGPRS()
+ li r3,0 /* fork() return value */
+ b .Lsyscall_vectored_common_exit
+#endif
+
+_GLOBAL(ret_from_fork)
+ bl CFUNC(schedule_tail)
+ HANDLER_RESTORE_NVGPRS()
+ li r3,0 /* fork() return value */
+ b .Lsyscall_exit
+
+_GLOBAL(ret_from_kernel_user_thread)
+ bl CFUNC(schedule_tail)
+ mtctr r14
+ mr r3,r15
+#ifdef CONFIG_PPC64_ELF_ABI_V2
+ mr r12,r14
+#endif
+ bctrl
+ li r3,0
+ /*
+ * It does not matter whether this returns via the scv or sc path
+ * because it returns as execve() and therefore has no calling ABI
+ * (i.e., it sets registers according to the exec()ed entry point).
+ */
+ b .Lsyscall_exit
+
+_GLOBAL(start_kernel_thread)
+ bl CFUNC(schedule_tail)
+ mtctr r14
+ mr r3,r15
+#ifdef CONFIG_PPC64_ELF_ABI_V2
+ mr r12,r14
+#endif
+ bctrl
+ /*
+ * This must not return. We actually want to BUG here, not WARN,
+ * because BUG will exit the process which is what the kernel thread
+ * should have done, which may give some hope of continuing.
+ */
+100: trap
+ EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,0
diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c
deleted file mode 100644
index 24b968f8e4d8..000000000000
--- a/arch/powerpc/kernel/io-workarounds.c
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
- * Support PCI IO workaround
- *
- * Copyright (C) 2006 Benjamin Herrenschmidt <benh@kernel.crashing.org>
- * IBM, Corp.
- * (C) Copyright 2007-2008 TOSHIBA CORPORATION
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#undef DEBUG
-
-#include <linux/kernel.h>
-#include <linux/sched.h> /* for init_mm */
-
-#include <asm/io.h>
-#include <asm/machdep.h>
-#include <asm/pgtable.h>
-#include <asm/ppc-pci.h>
-#include <asm/io-workarounds.h>
-
-#define IOWA_MAX_BUS 8
-
-static struct iowa_bus iowa_busses[IOWA_MAX_BUS];
-static unsigned int iowa_bus_count;
-
-static struct iowa_bus *iowa_pci_find(unsigned long vaddr, unsigned long paddr)
-{
- int i, j;
- struct resource *res;
- unsigned long vstart, vend;
-
- for (i = 0; i < iowa_bus_count; i++) {
- struct iowa_bus *bus = &iowa_busses[i];
- struct pci_controller *phb = bus->phb;
-
- if (vaddr) {
- vstart = (unsigned long)phb->io_base_virt;
- vend = vstart + phb->pci_io_size - 1;
- if ((vaddr >= vstart) && (vaddr <= vend))
- return bus;
- }
-
- if (paddr)
- for (j = 0; j < 3; j++) {
- res = &phb->mem_resources[j];
- if (paddr >= res->start && paddr <= res->end)
- return bus;
- }
- }
-
- return NULL;
-}
-
-#ifdef CONFIG_PPC_INDIRECT_MMIO
-struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr)
-{
- unsigned hugepage_shift;
- struct iowa_bus *bus;
- int token;
-
- token = PCI_GET_ADDR_TOKEN(addr);
-
- if (token && token <= iowa_bus_count)
- bus = &iowa_busses[token - 1];
- else {
- unsigned long vaddr, paddr;
- pte_t *ptep;
-
- vaddr = (unsigned long)PCI_FIX_ADDR(addr);
- if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END)
- return NULL;
-
- ptep = find_linux_pte_or_hugepte(init_mm.pgd, vaddr,
- &hugepage_shift);
- if (ptep == NULL)
- paddr = 0;
- else {
- /*
- * we don't have hugepages backing iomem
- */
- WARN_ON(hugepage_shift);
- paddr = pte_pfn(*ptep) << PAGE_SHIFT;
- }
- bus = iowa_pci_find(vaddr, paddr);
-
- if (bus == NULL)
- return NULL;
- }
-
- return bus;
-}
-#else /* CONFIG_PPC_INDIRECT_MMIO */
-struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr)
-{
- return NULL;
-}
-#endif /* !CONFIG_PPC_INDIRECT_MMIO */
-
-#ifdef CONFIG_PPC_INDIRECT_PIO
-struct iowa_bus *iowa_pio_find_bus(unsigned long port)
-{
- unsigned long vaddr = (unsigned long)pci_io_base + port;
- return iowa_pci_find(vaddr, 0);
-}
-#else
-struct iowa_bus *iowa_pio_find_bus(unsigned long port)
-{
- return NULL;
-}
-#endif
-
-#define DEF_PCI_AC_RET(name, ret, at, al, space, aa) \
-static ret iowa_##name at \
-{ \
- struct iowa_bus *bus; \
- bus = iowa_##space##_find_bus(aa); \
- if (bus && bus->ops && bus->ops->name) \
- return bus->ops->name al; \
- return __do_##name al; \
-}
-
-#define DEF_PCI_AC_NORET(name, at, al, space, aa) \
-static void iowa_##name at \
-{ \
- struct iowa_bus *bus; \
- bus = iowa_##space##_find_bus(aa); \
- if (bus && bus->ops && bus->ops->name) { \
- bus->ops->name al; \
- return; \
- } \
- __do_##name al; \
-}
-
-#include <asm/io-defs.h>
-
-#undef DEF_PCI_AC_RET
-#undef DEF_PCI_AC_NORET
-
-static const struct ppc_pci_io iowa_pci_io = {
-
-#define DEF_PCI_AC_RET(name, ret, at, al, space, aa) .name = iowa_##name,
-#define DEF_PCI_AC_NORET(name, at, al, space, aa) .name = iowa_##name,
-
-#include <asm/io-defs.h>
-
-#undef DEF_PCI_AC_RET
-#undef DEF_PCI_AC_NORET
-
-};
-
-#ifdef CONFIG_PPC_INDIRECT_MMIO
-static void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size,
- unsigned long flags, void *caller)
-{
- struct iowa_bus *bus;
- void __iomem *res = __ioremap_caller(addr, size, flags, caller);
- int busno;
-
- bus = iowa_pci_find(0, (unsigned long)addr);
- if (bus != NULL) {
- busno = bus - iowa_busses;
- PCI_SET_ADDR_TOKEN(res, busno + 1);
- }
- return res;
-}
-#else /* CONFIG_PPC_INDIRECT_MMIO */
-#define iowa_ioremap NULL
-#endif /* !CONFIG_PPC_INDIRECT_MMIO */
-
-/* Enable IO workaround */
-static void io_workaround_init(void)
-{
- static int io_workaround_inited;
-
- if (io_workaround_inited)
- return;
- ppc_pci_io = iowa_pci_io;
- ppc_md.ioremap = iowa_ioremap;
- io_workaround_inited = 1;
-}
-
-/* Register new bus to support workaround */
-void iowa_register_bus(struct pci_controller *phb, struct ppc_pci_io *ops,
- int (*initfunc)(struct iowa_bus *, void *), void *data)
-{
- struct iowa_bus *bus;
- struct device_node *np = phb->dn;
-
- io_workaround_init();
-
- if (iowa_bus_count >= IOWA_MAX_BUS) {
- pr_err("IOWA:Too many pci bridges, "
- "workarounds disabled for %s\n", np->full_name);
- return;
- }
-
- bus = &iowa_busses[iowa_bus_count];
- bus->phb = phb;
- bus->ops = ops;
- bus->private = data;
-
- if (initfunc)
- if ((*initfunc)(bus, data))
- return;
-
- iowa_bus_count++;
-
- pr_debug("IOWA:[%d]Add bus, %s.\n", iowa_bus_count-1, np->full_name);
-}
-
diff --git a/arch/powerpc/kernel/io.c b/arch/powerpc/kernel/io.c
index 2a2b4aeab80f..bcc201c01514 100644
--- a/arch/powerpc/kernel/io.c
+++ b/arch/powerpc/kernel/io.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* I/O string operations
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
@@ -10,11 +11,6 @@
* PPC64 updates by Dave Engebretsen (engebret@us.ibm.com)
*
* Rewritten in C by Stephen Rothwell.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/types.h>
@@ -35,13 +31,14 @@ void _insb(const volatile u8 __iomem *port, void *buf, long count)
if (unlikely(count <= 0))
return;
- asm volatile("sync");
+
+ mb();
do {
- tmp = *port;
+ tmp = *(const volatile u8 __force *)port;
eieio();
*tbuf++ = tmp;
} while (--count != 0);
- asm volatile("twi 0,%0,0; isync" : : "r" (tmp));
+ data_barrier(tmp);
}
EXPORT_SYMBOL(_insb);
@@ -51,75 +48,80 @@ void _outsb(volatile u8 __iomem *port, const void *buf, long count)
if (unlikely(count <= 0))
return;
- asm volatile("sync");
+
+ mb();
do {
- *port = *tbuf++;
+ *(volatile u8 __force *)port = *tbuf++;
} while (--count != 0);
- asm volatile("sync");
+ mb();
}
EXPORT_SYMBOL(_outsb);
-void _insw_ns(const volatile u16 __iomem *port, void *buf, long count)
+void _insw(const volatile u16 __iomem *port, void *buf, long count)
{
u16 *tbuf = buf;
u16 tmp;
if (unlikely(count <= 0))
return;
- asm volatile("sync");
+
+ mb();
do {
- tmp = *port;
+ tmp = *(const volatile u16 __force *)port;
eieio();
*tbuf++ = tmp;
} while (--count != 0);
- asm volatile("twi 0,%0,0; isync" : : "r" (tmp));
+ data_barrier(tmp);
}
-EXPORT_SYMBOL(_insw_ns);
+EXPORT_SYMBOL(_insw);
-void _outsw_ns(volatile u16 __iomem *port, const void *buf, long count)
+void _outsw(volatile u16 __iomem *port, const void *buf, long count)
{
const u16 *tbuf = buf;
if (unlikely(count <= 0))
return;
- asm volatile("sync");
+
+ mb();
do {
- *port = *tbuf++;
+ *(volatile u16 __force *)port = *tbuf++;
} while (--count != 0);
- asm volatile("sync");
+ mb();
}
-EXPORT_SYMBOL(_outsw_ns);
+EXPORT_SYMBOL(_outsw);
-void _insl_ns(const volatile u32 __iomem *port, void *buf, long count)
+void _insl(const volatile u32 __iomem *port, void *buf, long count)
{
u32 *tbuf = buf;
u32 tmp;
if (unlikely(count <= 0))
return;
- asm volatile("sync");
+
+ mb();
do {
- tmp = *port;
+ tmp = *(const volatile u32 __force *)port;
eieio();
*tbuf++ = tmp;
} while (--count != 0);
- asm volatile("twi 0,%0,0; isync" : : "r" (tmp));
+ data_barrier(tmp);
}
-EXPORT_SYMBOL(_insl_ns);
+EXPORT_SYMBOL(_insl);
-void _outsl_ns(volatile u32 __iomem *port, const void *buf, long count)
+void _outsl(volatile u32 __iomem *port, const void *buf, long count)
{
const u32 *tbuf = buf;
if (unlikely(count <= 0))
return;
- asm volatile("sync");
+
+ mb();
do {
- *port = *tbuf++;
+ *(volatile u32 __force *)port = *tbuf++;
} while (--count != 0);
- asm volatile("sync");
+ mb();
}
-EXPORT_SYMBOL(_outsl_ns);
+EXPORT_SYMBOL(_outsl);
#define IO_CHECK_ALIGN(v,a) ((((unsigned long)(v)) & ((a) - 1)) == 0)
@@ -131,7 +133,7 @@ _memset_io(volatile void __iomem *addr, int c, unsigned long n)
lc |= lc << 8;
lc |= lc << 16;
- __asm__ __volatile__ ("sync" : : : "memory");
+ mb();
while(n && !IO_CHECK_ALIGN(p, 4)) {
*((volatile u8 *)p) = c;
p++;
@@ -147,7 +149,7 @@ _memset_io(volatile void __iomem *addr, int c, unsigned long n)
p++;
n--;
}
- __asm__ __volatile__ ("sync" : : : "memory");
+ mb();
}
EXPORT_SYMBOL(_memset_io);
@@ -156,7 +158,7 @@ void _memcpy_fromio(void *dest, const volatile void __iomem *src,
{
void *vsrc = (void __force *) src;
- __asm__ __volatile__ ("sync" : : : "memory");
+ mb();
while(n && (!IO_CHECK_ALIGN(vsrc, 4) || !IO_CHECK_ALIGN(dest, 4))) {
*((u8 *)dest) = *((volatile u8 *)vsrc);
eieio();
@@ -178,7 +180,7 @@ void _memcpy_fromio(void *dest, const volatile void __iomem *src,
dest++;
n--;
}
- __asm__ __volatile__ ("sync" : : : "memory");
+ mb();
}
EXPORT_SYMBOL(_memcpy_fromio);
@@ -186,7 +188,7 @@ void _memcpy_toio(volatile void __iomem *dest, const void *src, unsigned long n)
{
void *vdest = (void __force *) dest;
- __asm__ __volatile__ ("sync" : : : "memory");
+ mb();
while(n && (!IO_CHECK_ALIGN(vdest, 4) || !IO_CHECK_ALIGN(src, 4))) {
*((volatile u8 *)vdest) = *((u8 *)src);
src++;
@@ -205,6 +207,6 @@ void _memcpy_toio(volatile void __iomem *dest, const void *src, unsigned long n)
vdest++;
n--;
}
- __asm__ __volatile__ ("sync" : : : "memory");
+ mb();
}
EXPORT_SYMBOL(_memcpy_toio);
diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c
index 12e48d56f771..72862a4d3a5d 100644
--- a/arch/powerpc/kernel/iomap.c
+++ b/arch/powerpc/kernel/iomap.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* ppc64 "iomap" interface implementation.
*
@@ -8,114 +9,13 @@
#include <linux/export.h>
#include <asm/io.h>
#include <asm/pci-bridge.h>
-
-/*
- * Here comes the ppc64 implementation of the IOMAP
- * interfaces.
- */
-unsigned int ioread8(void __iomem *addr)
-{
- return readb(addr);
-}
-unsigned int ioread16(void __iomem *addr)
-{
- return readw(addr);
-}
-unsigned int ioread16be(void __iomem *addr)
-{
- return readw_be(addr);
-}
-unsigned int ioread32(void __iomem *addr)
-{
- return readl(addr);
-}
-unsigned int ioread32be(void __iomem *addr)
-{
- return readl_be(addr);
-}
-EXPORT_SYMBOL(ioread8);
-EXPORT_SYMBOL(ioread16);
-EXPORT_SYMBOL(ioread16be);
-EXPORT_SYMBOL(ioread32);
-EXPORT_SYMBOL(ioread32be);
-
-void iowrite8(u8 val, void __iomem *addr)
-{
- writeb(val, addr);
-}
-void iowrite16(u16 val, void __iomem *addr)
-{
- writew(val, addr);
-}
-void iowrite16be(u16 val, void __iomem *addr)
-{
- writew_be(val, addr);
-}
-void iowrite32(u32 val, void __iomem *addr)
-{
- writel(val, addr);
-}
-void iowrite32be(u32 val, void __iomem *addr)
-{
- writel_be(val, addr);
-}
-EXPORT_SYMBOL(iowrite8);
-EXPORT_SYMBOL(iowrite16);
-EXPORT_SYMBOL(iowrite16be);
-EXPORT_SYMBOL(iowrite32);
-EXPORT_SYMBOL(iowrite32be);
-
-/*
- * These are the "repeat read/write" functions. Note the
- * non-CPU byte order. We do things in "IO byteorder"
- * here.
- *
- * FIXME! We could make these do EEH handling if we really
- * wanted. Not clear if we do.
- */
-void ioread8_rep(void __iomem *addr, void *dst, unsigned long count)
-{
- readsb(addr, dst, count);
-}
-void ioread16_rep(void __iomem *addr, void *dst, unsigned long count)
-{
- readsw(addr, dst, count);
-}
-void ioread32_rep(void __iomem *addr, void *dst, unsigned long count)
-{
- readsl(addr, dst, count);
-}
-EXPORT_SYMBOL(ioread8_rep);
-EXPORT_SYMBOL(ioread16_rep);
-EXPORT_SYMBOL(ioread32_rep);
-
-void iowrite8_rep(void __iomem *addr, const void *src, unsigned long count)
-{
- writesb(addr, src, count);
-}
-void iowrite16_rep(void __iomem *addr, const void *src, unsigned long count)
-{
- writesw(addr, src, count);
-}
-void iowrite32_rep(void __iomem *addr, const void *src, unsigned long count)
-{
- writesl(addr, src, count);
-}
-EXPORT_SYMBOL(iowrite8_rep);
-EXPORT_SYMBOL(iowrite16_rep);
-EXPORT_SYMBOL(iowrite32_rep);
+#include <asm/isa-bridge.h>
void __iomem *ioport_map(unsigned long port, unsigned int len)
{
return (void __iomem *) (port + _IO_BASE);
}
-
-void ioport_unmap(void __iomem *addr)
-{
- /* Nothing to do */
-}
EXPORT_SYMBOL(ioport_map);
-EXPORT_SYMBOL(ioport_unmap);
#ifdef CONFIG_PCI
void pci_iounmap(struct pci_dev *dev, void __iomem *addr)
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index a10642a0d861..244eb4857e7f 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation
*
@@ -6,20 +7,6 @@
* and Ben. Herrenschmidt, IBM Corporation
*
* Dynamic DMA mapping support, bus-independent parts.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
@@ -29,6 +16,7 @@
#include <linux/mm.h>
#include <linux/spinlock.h>
#include <linux/string.h>
+#include <linux/string_choices.h>
#include <linux/dma-mapping.h>
#include <linux/bitmap.h>
#include <linux/iommu-helper.h>
@@ -38,8 +26,9 @@
#include <linux/pci.h>
#include <linux/iommu.h>
#include <linux/sched.h>
+#include <linux/debugfs.h>
+#include <linux/vmalloc.h>
#include <asm/io.h>
-#include <asm/prom.h>
#include <asm/iommu.h>
#include <asm/pci-bridge.h>
#include <asm/machdep.h>
@@ -47,9 +36,49 @@
#include <asm/fadump.h>
#include <asm/vio.h>
#include <asm/tce.h>
+#include <asm/mmu_context.h>
+#include <asm/ppc-pci.h>
#define DBG(...)
+#ifdef CONFIG_IOMMU_DEBUGFS
+static int iommu_debugfs_weight_get(void *data, u64 *val)
+{
+ struct iommu_table *tbl = data;
+ *val = bitmap_weight(tbl->it_map, tbl->it_size);
+ return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(iommu_debugfs_fops_weight, iommu_debugfs_weight_get, NULL, "%llu\n");
+
+static void iommu_debugfs_add(struct iommu_table *tbl)
+{
+ char name[10];
+ struct dentry *liobn_entry;
+
+ sprintf(name, "%08lx", tbl->it_index);
+ liobn_entry = debugfs_create_dir(name, iommu_debugfs_dir);
+
+ debugfs_create_file_unsafe("weight", 0400, liobn_entry, tbl, &iommu_debugfs_fops_weight);
+ debugfs_create_ulong("it_size", 0400, liobn_entry, &tbl->it_size);
+ debugfs_create_ulong("it_page_shift", 0400, liobn_entry, &tbl->it_page_shift);
+ debugfs_create_ulong("it_reserved_start", 0400, liobn_entry, &tbl->it_reserved_start);
+ debugfs_create_ulong("it_reserved_end", 0400, liobn_entry, &tbl->it_reserved_end);
+ debugfs_create_ulong("it_indirect_levels", 0400, liobn_entry, &tbl->it_indirect_levels);
+ debugfs_create_ulong("it_level_size", 0400, liobn_entry, &tbl->it_level_size);
+}
+
+static void iommu_debugfs_del(struct iommu_table *tbl)
+{
+ char name[10];
+
+ sprintf(name, "%08lx", tbl->it_index);
+ debugfs_lookup_and_remove(name, iommu_debugfs_dir);
+}
+#else
+static void iommu_debugfs_add(struct iommu_table *tbl){}
+static void iommu_debugfs_del(struct iommu_table *tbl){}
+#endif
+
static int novmerge;
static void __iommu_free(struct iommu_table *, dma_addr_t, unsigned int);
@@ -127,8 +156,7 @@ static ssize_t fail_iommu_store(struct device *dev,
return count;
}
-static DEVICE_ATTR(fail_iommu, S_IRUGO|S_IWUSR, fail_iommu_show,
- fail_iommu_store);
+static DEVICE_ATTR_RW(fail_iommu);
static int fail_iommu_bus_notify(struct notifier_block *nb,
unsigned long action, void *data)
@@ -146,17 +174,28 @@ static int fail_iommu_bus_notify(struct notifier_block *nb,
return 0;
}
-static struct notifier_block fail_iommu_bus_notifier = {
+/*
+ * PCI and VIO buses need separate notifier_block structs, since they're linked
+ * list nodes. Sharing a notifier_block would mean that any notifiers later
+ * registered for PCI buses would also get called by VIO buses and vice versa.
+ */
+static struct notifier_block fail_iommu_pci_bus_notifier = {
.notifier_call = fail_iommu_bus_notify
};
+#ifdef CONFIG_IBMVIO
+static struct notifier_block fail_iommu_vio_bus_notifier = {
+ .notifier_call = fail_iommu_bus_notify
+};
+#endif
+
static int __init fail_iommu_setup(void)
{
#ifdef CONFIG_PCI
- bus_register_notifier(&pci_bus_type, &fail_iommu_bus_notifier);
+ bus_register_notifier(&pci_bus_type, &fail_iommu_pci_bus_notifier);
#endif
#ifdef CONFIG_IBMVIO
- bus_register_notifier(&vio_bus_type, &fail_iommu_bus_notifier);
+ bus_register_notifier(&vio_bus_type, &fail_iommu_vio_bus_notifier);
#endif
return 0;
@@ -185,12 +224,11 @@ static unsigned long iommu_range_alloc(struct device *dev,
int largealloc = npages > 15;
int pass = 0;
unsigned long align_mask;
- unsigned long boundary_size;
unsigned long flags;
unsigned int pool_nr;
struct iommu_pool *pool;
- align_mask = 0xffffffffffffffffl >> (64 - align_order);
+ align_mask = (1ull << align_order) - 1;
/* This allocator was derived from x86_64's bit string search */
@@ -198,17 +236,17 @@ static unsigned long iommu_range_alloc(struct device *dev,
if (unlikely(npages == 0)) {
if (printk_ratelimit())
WARN_ON(1);
- return DMA_ERROR_CODE;
+ return DMA_MAPPING_ERROR;
}
if (should_fail_iommu(dev))
- return DMA_ERROR_CODE;
+ return DMA_MAPPING_ERROR;
/*
* We don't need to disable preemption here because any CPU can
* safely use any IOMMU pool.
*/
- pool_nr = __raw_get_cpu_var(iommu_pool_hash) & (tbl->nr_pools - 1);
+ pool_nr = raw_cpu_read(iommu_pool_hash) & (tbl->nr_pools - 1);
if (largealloc)
pool = &(tbl->large_pool);
@@ -249,15 +287,9 @@ again:
}
}
- if (dev)
- boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
- 1 << tbl->it_page_shift);
- else
- boundary_size = ALIGN(1UL << 32, 1 << tbl->it_page_shift);
- /* 4GB boundary for iseries_hv_alloc and iseries_hv_map */
-
n = iommu_area_alloc(tbl->it_map, limit, start, npages, tbl->it_offset,
- boundary_size >> tbl->it_page_shift, align_mask);
+ dma_get_seg_boundary_nr_pages(dev, tbl->it_page_shift),
+ align_mask);
if (n == -1) {
if (likely(pass == 0)) {
/* First try the pool from the start */
@@ -275,10 +307,19 @@ again:
pass++;
goto again;
+ } else if (pass == tbl->nr_pools + 1) {
+ /* Last resort: try largepool */
+ spin_unlock(&pool->lock);
+ pool = &tbl->large_pool;
+ spin_lock(&pool->lock);
+ pool->hint = pool->start;
+ pass++;
+ goto again;
+
} else {
/* Give up */
spin_unlock_irqrestore(&(pool->lock), flags);
- return DMA_ERROR_CODE;
+ return DMA_MAPPING_ERROR;
}
}
@@ -307,38 +348,38 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
void *page, unsigned int npages,
enum dma_data_direction direction,
unsigned long mask, unsigned int align_order,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
unsigned long entry;
- dma_addr_t ret = DMA_ERROR_CODE;
+ dma_addr_t ret = DMA_MAPPING_ERROR;
int build_fail;
entry = iommu_range_alloc(dev, tbl, npages, NULL, mask, align_order);
- if (unlikely(entry == DMA_ERROR_CODE))
- return DMA_ERROR_CODE;
+ if (unlikely(entry == DMA_MAPPING_ERROR))
+ return DMA_MAPPING_ERROR;
entry += tbl->it_offset; /* Offset into real TCE table */
ret = entry << tbl->it_page_shift; /* Set the return dma address */
/* Put the TCEs in the HW table */
- build_fail = ppc_md.tce_build(tbl, entry, npages,
+ build_fail = tbl->it_ops->set(tbl, entry, npages,
(unsigned long)page &
IOMMU_PAGE_MASK(tbl), direction, attrs);
- /* ppc_md.tce_build() only returns non-zero for transient errors.
+ /* tbl->it_ops->set() only returns non-zero for transient errors.
* Clean up the table bitmap in this case and return
- * DMA_ERROR_CODE. For all other errors the functionality is
+ * DMA_MAPPING_ERROR. For all other errors the functionality is
* not altered.
*/
if (unlikely(build_fail)) {
__iommu_free(tbl, ret, npages);
- return DMA_ERROR_CODE;
+ return DMA_MAPPING_ERROR;
}
/* Flush/invalidate TLB caches if necessary */
- if (ppc_md.tce_flush)
- ppc_md.tce_flush(tbl);
+ if (tbl->it_ops->flush)
+ tbl->it_ops->flush(tbl);
/* Make sure updates are seen by hardware */
mb();
@@ -408,7 +449,7 @@ static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
if (!iommu_free_check(tbl, dma_addr, npages))
return;
- ppc_md.tce_free(tbl, entry, npages);
+ tbl->it_ops->clear(tbl, entry, npages);
spin_lock_irqsave(&(pool->lock), flags);
bitmap_clear(tbl->it_map, free_entry, npages);
@@ -424,14 +465,14 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
* not do an mb() here on purpose, it is not needed on any of
* the current platforms.
*/
- if (ppc_md.tce_flush)
- ppc_md.tce_flush(tbl);
+ if (tbl->it_ops->flush)
+ tbl->it_ops->flush(tbl);
}
-int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
- struct scatterlist *sglist, int nelems,
- unsigned long mask, enum dma_data_direction direction,
- struct dma_attrs *attrs)
+int ppc_iommu_map_sg(struct device *dev, struct iommu_table *tbl,
+ struct scatterlist *sglist, int nelems,
+ unsigned long mask, enum dma_data_direction direction,
+ unsigned long attrs)
{
dma_addr_t dma_next = 0, dma_addr;
struct scatterlist *s, *outs, *segstart;
@@ -443,7 +484,7 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
BUG_ON(direction == DMA_NONE);
if ((nelems == 0) || !tbl)
- return 0;
+ return -EINVAL;
outs = s = segstart = &sglist[0];
outcount = 1;
@@ -478,8 +519,9 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
DBG(" - vaddr: %lx, size: %lx\n", vaddr, slen);
/* Handle failure */
- if (unlikely(entry == DMA_ERROR_CODE)) {
- if (printk_ratelimit())
+ if (unlikely(entry == DMA_MAPPING_ERROR)) {
+ if (!(attrs & DMA_ATTR_NO_WARN) &&
+ printk_ratelimit())
dev_info(dev, "iommu_alloc failed, tbl %p "
"vaddr %lx npages %lu\n", tbl, vaddr,
npages);
@@ -489,13 +531,13 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
/* Convert entry to a dma_addr_t */
entry += tbl->it_offset;
dma_addr = entry << tbl->it_page_shift;
- dma_addr |= (s->offset & ~IOMMU_PAGE_MASK(tbl));
+ dma_addr |= (vaddr & ~IOMMU_PAGE_MASK(tbl));
DBG(" - %lu pages, entry: %lx, dma_addr: %lx\n",
npages, entry, dma_addr);
/* Insert into HW table */
- build_fail = ppc_md.tce_build(tbl, entry, npages,
+ build_fail = tbl->it_ops->set(tbl, entry, npages,
vaddr & IOMMU_PAGE_MASK(tbl),
direction, attrs);
if(unlikely(build_fail))
@@ -534,17 +576,16 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
}
/* Flush/invalidate TLB caches if necessary */
- if (ppc_md.tce_flush)
- ppc_md.tce_flush(tbl);
+ if (tbl->it_ops->flush)
+ tbl->it_ops->flush(tbl);
DBG("mapped %d elements:\n", outcount);
- /* For the sake of iommu_unmap_sg, we clear out the length in the
+ /* For the sake of ppc_iommu_unmap_sg, we clear out the length in the
* next entry of the sglist if we didn't fill the list completely
*/
if (outcount < incount) {
outs = sg_next(outs);
- outs->dma_address = DMA_ERROR_CODE;
outs->dma_length = 0;
}
@@ -562,19 +603,18 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
npages = iommu_num_pages(s->dma_address, s->dma_length,
IOMMU_PAGE_SIZE(tbl));
__iommu_free(tbl, vaddr, npages);
- s->dma_address = DMA_ERROR_CODE;
s->dma_length = 0;
}
if (s == outs)
break;
}
- return 0;
+ return -EIO;
}
-void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
- int nelems, enum dma_data_direction direction,
- struct dma_attrs *attrs)
+void ppc_iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
+ int nelems, enum dma_data_direction direction,
+ unsigned long attrs)
{
struct scatterlist *sg;
@@ -600,11 +640,11 @@ void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
* do not do an mb() here, the affected platforms do not need it
* when freeing.
*/
- if (ppc_md.tce_flush)
- ppc_md.tce_flush(tbl);
+ if (tbl->it_ops->flush)
+ tbl->it_ops->flush(tbl);
}
-static void iommu_table_clear(struct iommu_table *tbl)
+void iommu_table_clear(struct iommu_table *tbl)
{
/*
* In case of firmware assisted dump system goes through clean
@@ -613,17 +653,17 @@ static void iommu_table_clear(struct iommu_table *tbl)
*/
if (!is_kdump_kernel() || is_fadump_active()) {
/* Clear the table in case firmware left allocations in it */
- ppc_md.tce_free(tbl, tbl->it_offset, tbl->it_size);
+ tbl->it_ops->clear(tbl, tbl->it_offset, tbl->it_size);
return;
}
#ifdef CONFIG_CRASH_DUMP
- if (ppc_md.tce_get) {
+ if (tbl->it_ops->get) {
unsigned long index, tceval, tcecount = 0;
/* Reserve the existing mappings left by the first kernel. */
for (index = 0; index < tbl->it_size; index++) {
- tceval = ppc_md.tce_get(tbl, index + tbl->it_offset);
+ tceval = tbl->it_ops->get(tbl, index + tbl->it_offset);
/*
* Freed TCE entry contains 0x7fffffffffffffff on JS20
*/
@@ -645,34 +685,64 @@ static void iommu_table_clear(struct iommu_table *tbl)
#endif
}
+void iommu_table_reserve_pages(struct iommu_table *tbl,
+ unsigned long res_start, unsigned long res_end)
+{
+ unsigned long i;
+
+ WARN_ON_ONCE(res_end < res_start);
+ /*
+ * Reserve page 0 so it will not be used for any mappings.
+ * This avoids buggy drivers that consider page 0 to be invalid
+ * to crash the machine or even lose data.
+ */
+ if (tbl->it_offset == 0)
+ set_bit(0, tbl->it_map);
+
+ if (res_start < tbl->it_offset)
+ res_start = tbl->it_offset;
+
+ if (res_end > (tbl->it_offset + tbl->it_size))
+ res_end = tbl->it_offset + tbl->it_size;
+
+ /* Check if res_start..res_end is a valid range in the table */
+ if (res_start >= res_end) {
+ tbl->it_reserved_start = tbl->it_offset;
+ tbl->it_reserved_end = tbl->it_offset;
+ return;
+ }
+
+ tbl->it_reserved_start = res_start;
+ tbl->it_reserved_end = res_end;
+
+ for (i = tbl->it_reserved_start; i < tbl->it_reserved_end; ++i)
+ set_bit(i - tbl->it_offset, tbl->it_map);
+}
+
/*
* Build a iommu_table structure. This contains a bit map which
* is used to manage allocation of the tce space.
*/
-struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
+struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid,
+ unsigned long res_start, unsigned long res_end)
{
unsigned long sz;
static int welcomed = 0;
- struct page *page;
unsigned int i;
struct iommu_pool *p;
+ BUG_ON(!tbl->it_ops);
+
/* number of bytes needed for the bitmap */
sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long);
- page = alloc_pages_node(nid, GFP_KERNEL, get_order(sz));
- if (!page)
- panic("iommu_init_table: Can't allocate %ld bytes\n", sz);
- tbl->it_map = page_address(page);
- memset(tbl->it_map, 0, sz);
+ tbl->it_map = vzalloc_node(sz, nid);
+ if (!tbl->it_map) {
+ pr_err("%s: Can't allocate %ld bytes\n", __func__, sz);
+ return NULL;
+ }
- /*
- * Reserve page 0 so it will not be used for any mappings.
- * This avoids buggy drivers that consider page 0 to be invalid
- * to crash the machine or even lose data.
- */
- if (tbl->it_offset == 0)
- set_bit(0, tbl->it_map);
+ iommu_table_reserve_pages(tbl, res_start, res_end);
/* We only split the IOMMU table if we have 1GB or more of space */
if ((tbl->it_size << tbl->it_page_shift) >= (1UL * 1024 * 1024 * 1024))
@@ -700,54 +770,82 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
iommu_table_clear(tbl);
if (!welcomed) {
- printk(KERN_INFO "IOMMU table initialized, virtual merging %s\n",
- novmerge ? "disabled" : "enabled");
+ pr_info("IOMMU table initialized, virtual merging %s\n",
+ str_disabled_enabled(novmerge));
welcomed = 1;
}
+ iommu_debugfs_add(tbl);
+
return tbl;
}
-void iommu_free_table(struct iommu_table *tbl, const char *node_name)
+bool iommu_table_in_use(struct iommu_table *tbl)
{
- unsigned long bitmap_sz;
- unsigned int order;
+ unsigned long start = 0, end;
- if (!tbl || !tbl->it_map) {
- printk(KERN_ERR "%s: expected TCE map for %s\n", __func__,
- node_name);
- return;
- }
-
- /*
- * In case we have reserved the first bit, we should not emit
- * the warning below.
- */
+ /* ignore reserved bit0 */
if (tbl->it_offset == 0)
- clear_bit(0, tbl->it_map);
+ start = 1;
-#ifdef CONFIG_IOMMU_API
- if (tbl->it_group) {
- iommu_group_put(tbl->it_group);
- BUG_ON(tbl->it_group);
+ /* Simple case with no reserved MMIO32 region */
+ if (!tbl->it_reserved_start && !tbl->it_reserved_end)
+ return find_next_bit(tbl->it_map, tbl->it_size, start) != tbl->it_size;
+
+ end = tbl->it_reserved_start - tbl->it_offset;
+ if (find_next_bit(tbl->it_map, end, start) != end)
+ return true;
+
+ start = tbl->it_reserved_end - tbl->it_offset;
+ end = tbl->it_size;
+ return find_next_bit(tbl->it_map, end, start) != end;
+}
+
+static void iommu_table_free(struct kref *kref)
+{
+ struct iommu_table *tbl;
+
+ tbl = container_of(kref, struct iommu_table, it_kref);
+
+ if (tbl->it_ops->free)
+ tbl->it_ops->free(tbl);
+
+ if (!tbl->it_map) {
+ kfree(tbl);
+ return;
}
-#endif
- /* verify that table contains no entries */
- if (!bitmap_empty(tbl->it_map, tbl->it_size))
- pr_warn("%s: Unexpected TCEs for %s\n", __func__, node_name);
+ iommu_debugfs_del(tbl);
- /* calculate bitmap size in bytes */
- bitmap_sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long);
+ /* verify that table contains no entries */
+ if (iommu_table_in_use(tbl))
+ pr_warn("%s: Unexpected TCEs\n", __func__);
/* free bitmap */
- order = get_order(bitmap_sz);
- free_pages((unsigned long) tbl->it_map, order);
+ vfree(tbl->it_map);
/* free table */
kfree(tbl);
}
+struct iommu_table *iommu_tce_table_get(struct iommu_table *tbl)
+{
+ if (kref_get_unless_zero(&tbl->it_kref))
+ return tbl;
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(iommu_tce_table_get);
+
+int iommu_tce_table_put(struct iommu_table *tbl)
+{
+ if (WARN_ON(!tbl))
+ return 0;
+
+ return kref_put(&tbl->it_kref, iommu_table_free);
+}
+EXPORT_SYMBOL_GPL(iommu_tce_table_put);
+
/* Creates TCEs for a user provided buffer. The user buffer must be
* contiguous real kernel storage (not vmalloc). The address passed here
* comprises a page address and offset into that page. The dma_addr_t
@@ -756,9 +854,9 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name)
dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl,
struct page *page, unsigned long offset, size_t size,
unsigned long mask, enum dma_data_direction direction,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
- dma_addr_t dma_handle = DMA_ERROR_CODE;
+ dma_addr_t dma_handle = DMA_MAPPING_ERROR;
void *vaddr;
unsigned long uaddr;
unsigned int npages, align;
@@ -767,9 +865,9 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl,
vaddr = page_address(page) + offset;
uaddr = (unsigned long)vaddr;
- npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE(tbl));
if (tbl) {
+ npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE(tbl));
align = 0;
if (tbl->it_page_shift < PAGE_SHIFT && size >= PAGE_SIZE &&
((unsigned long)vaddr & ~PAGE_MASK) == 0)
@@ -778,8 +876,9 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl,
dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction,
mask >> tbl->it_page_shift, align,
attrs);
- if (dma_handle == DMA_ERROR_CODE) {
- if (printk_ratelimit()) {
+ if (dma_handle == DMA_MAPPING_ERROR) {
+ if (!(attrs & DMA_ATTR_NO_WARN) &&
+ printk_ratelimit()) {
dev_info(dev, "iommu_alloc failed, tbl %p "
"vaddr %p npages %d\n", tbl, vaddr,
npages);
@@ -793,7 +892,7 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl,
void iommu_unmap_page(struct iommu_table *tbl, dma_addr_t dma_handle,
size_t size, enum dma_data_direction direction,
- struct dma_attrs *attrs)
+ unsigned long attrs)
{
unsigned int npages;
@@ -819,6 +918,7 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
unsigned int order;
unsigned int nio_pages, io_order;
struct page *page;
+ int tcesize = (1 << tbl->it_page_shift);
size = PAGE_ALIGN(size);
order = get_order(size);
@@ -845,15 +945,17 @@ void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
memset(ret, 0, size);
/* Set up tces to cover the allocated range */
- nio_pages = size >> tbl->it_page_shift;
+ nio_pages = IOMMU_PAGE_ALIGN(size, tbl) >> tbl->it_page_shift;
+
io_order = get_iommu_order(size, tbl);
mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL,
- mask >> tbl->it_page_shift, io_order, NULL);
- if (mapping == DMA_ERROR_CODE) {
+ mask >> tbl->it_page_shift, io_order, 0);
+ if (mapping == DMA_MAPPING_ERROR) {
free_pages((unsigned long)ret, order);
return NULL;
}
- *dma_handle = mapping;
+
+ *dma_handle = mapping | ((u64)ret & (tcesize - 1));
return ret;
}
@@ -864,24 +966,57 @@ void iommu_free_coherent(struct iommu_table *tbl, size_t size,
unsigned int nio_pages;
size = PAGE_ALIGN(size);
- nio_pages = size >> tbl->it_page_shift;
+ nio_pages = IOMMU_PAGE_ALIGN(size, tbl) >> tbl->it_page_shift;
iommu_free(tbl, dma_handle, nio_pages);
size = PAGE_ALIGN(size);
free_pages((unsigned long)vaddr, get_order(size));
}
}
+unsigned long iommu_direction_to_tce_perm(enum dma_data_direction dir)
+{
+ switch (dir) {
+ case DMA_BIDIRECTIONAL:
+ return TCE_PCI_READ | TCE_PCI_WRITE;
+ case DMA_FROM_DEVICE:
+ return TCE_PCI_WRITE;
+ case DMA_TO_DEVICE:
+ return TCE_PCI_READ;
+ default:
+ return 0;
+ }
+}
+EXPORT_SYMBOL_GPL(iommu_direction_to_tce_perm);
+
#ifdef CONFIG_IOMMU_API
+
+int dev_has_iommu_table(struct device *dev, void *data)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct pci_dev **ppdev = data;
+
+ if (!dev)
+ return 0;
+
+ if (device_iommu_mapped(dev)) {
+ *ppdev = pdev;
+ return 1;
+ }
+
+ return 0;
+}
+
/*
* SPAPR TCE API
*/
static void group_release(void *iommu_data)
{
- struct iommu_table *tbl = iommu_data;
- tbl->it_group = NULL;
+ struct iommu_table_group *table_group = iommu_data;
+
+ table_group->group = NULL;
}
-void iommu_register_group(struct iommu_table *tbl,
+void iommu_register_group(struct iommu_table_group *table_group,
int pci_domain_number, unsigned long pe_num)
{
struct iommu_group *grp;
@@ -893,8 +1028,8 @@ void iommu_register_group(struct iommu_table *tbl,
PTR_ERR(grp));
return;
}
- tbl->it_group = grp;
- iommu_group_set_iommudata(grp, tbl, group_release);
+ table_group->group = grp;
+ iommu_group_set_iommudata(grp, table_group, group_release);
name = kasprintf(GFP_KERNEL, "domain%d-pe%lx",
pci_domain_number, pe_num);
if (!name)
@@ -919,260 +1054,264 @@ EXPORT_SYMBOL_GPL(iommu_tce_direction);
void iommu_flush_tce(struct iommu_table *tbl)
{
/* Flush/invalidate TLB caches if necessary */
- if (ppc_md.tce_flush)
- ppc_md.tce_flush(tbl);
+ if (tbl->it_ops->flush)
+ tbl->it_ops->flush(tbl);
/* Make sure updates are seen by hardware */
mb();
}
EXPORT_SYMBOL_GPL(iommu_flush_tce);
-int iommu_tce_clear_param_check(struct iommu_table *tbl,
- unsigned long ioba, unsigned long tce_value,
- unsigned long npages)
+int iommu_tce_check_ioba(unsigned long page_shift,
+ unsigned long offset, unsigned long size,
+ unsigned long ioba, unsigned long npages)
{
- /* ppc_md.tce_free() does not support any value but 0 */
- if (tce_value)
- return -EINVAL;
+ unsigned long mask = (1UL << page_shift) - 1;
- if (ioba & ~IOMMU_PAGE_MASK(tbl))
+ if (ioba & mask)
return -EINVAL;
- ioba >>= tbl->it_page_shift;
- if (ioba < tbl->it_offset)
+ ioba >>= page_shift;
+ if (ioba < offset)
return -EINVAL;
- if ((ioba + npages) > (tbl->it_offset + tbl->it_size))
+ if ((ioba + 1) > (offset + size))
return -EINVAL;
return 0;
}
-EXPORT_SYMBOL_GPL(iommu_tce_clear_param_check);
+EXPORT_SYMBOL_GPL(iommu_tce_check_ioba);
-int iommu_tce_put_param_check(struct iommu_table *tbl,
- unsigned long ioba, unsigned long tce)
+int iommu_tce_check_gpa(unsigned long page_shift, unsigned long gpa)
{
- if (!(tce & (TCE_PCI_WRITE | TCE_PCI_READ)))
- return -EINVAL;
-
- if (tce & ~(IOMMU_PAGE_MASK(tbl) | TCE_PCI_WRITE | TCE_PCI_READ))
- return -EINVAL;
+ unsigned long mask = (1UL << page_shift) - 1;
- if (ioba & ~IOMMU_PAGE_MASK(tbl))
- return -EINVAL;
-
- ioba >>= tbl->it_page_shift;
- if (ioba < tbl->it_offset)
- return -EINVAL;
-
- if ((ioba + 1) > (tbl->it_offset + tbl->it_size))
+ if (gpa & mask)
return -EINVAL;
return 0;
}
-EXPORT_SYMBOL_GPL(iommu_tce_put_param_check);
+EXPORT_SYMBOL_GPL(iommu_tce_check_gpa);
-unsigned long iommu_clear_tce(struct iommu_table *tbl, unsigned long entry)
+long iommu_tce_xchg_no_kill(struct mm_struct *mm,
+ struct iommu_table *tbl,
+ unsigned long entry, unsigned long *hpa,
+ enum dma_data_direction *direction)
{
- unsigned long oldtce;
- struct iommu_pool *pool = get_pool(tbl, entry);
-
- spin_lock(&(pool->lock));
-
- oldtce = ppc_md.tce_get(tbl, entry);
- if (oldtce & (TCE_PCI_WRITE | TCE_PCI_READ))
- ppc_md.tce_free(tbl, entry, 1);
- else
- oldtce = 0;
+ long ret;
+ unsigned long size = 0;
- spin_unlock(&(pool->lock));
+ ret = tbl->it_ops->xchg_no_kill(tbl, entry, hpa, direction);
+ if (!ret && ((*direction == DMA_FROM_DEVICE) ||
+ (*direction == DMA_BIDIRECTIONAL)) &&
+ !mm_iommu_is_devmem(mm, *hpa, tbl->it_page_shift,
+ &size))
+ SetPageDirty(pfn_to_page(*hpa >> PAGE_SHIFT));
- return oldtce;
+ return ret;
}
-EXPORT_SYMBOL_GPL(iommu_clear_tce);
+EXPORT_SYMBOL_GPL(iommu_tce_xchg_no_kill);
-int iommu_clear_tces_and_put_pages(struct iommu_table *tbl,
+void iommu_tce_kill(struct iommu_table *tbl,
unsigned long entry, unsigned long pages)
{
- unsigned long oldtce;
- struct page *page;
+ if (tbl->it_ops->tce_kill)
+ tbl->it_ops->tce_kill(tbl, entry, pages);
+}
+EXPORT_SYMBOL_GPL(iommu_tce_kill);
- for ( ; pages; --pages, ++entry) {
- oldtce = iommu_clear_tce(tbl, entry);
- if (!oldtce)
- continue;
+int iommu_add_device(struct iommu_table_group *table_group, struct device *dev)
+{
+ /*
+ * The sysfs entries should be populated before
+ * binding IOMMU group. If sysfs entries isn't
+ * ready, we simply bail.
+ */
+ if (!device_is_registered(dev))
+ return -ENOENT;
- page = pfn_to_page(oldtce >> PAGE_SHIFT);
- WARN_ON(!page);
- if (page) {
- if (oldtce & TCE_PCI_WRITE)
- SetPageDirty(page);
- put_page(page);
- }
+ if (device_iommu_mapped(dev)) {
+ pr_debug("%s: Skipping device %s with iommu group %d\n",
+ __func__, dev_name(dev),
+ iommu_group_id(dev->iommu_group));
+ return -EBUSY;
}
- return 0;
+ pr_debug("%s: Adding %s to iommu group %d\n",
+ __func__, dev_name(dev), iommu_group_id(table_group->group));
+ /*
+ * This is still not adding devices via the IOMMU bus notifier because
+ * of pcibios_init() from arch/powerpc/kernel/pci_64.c which calls
+ * pcibios_scan_phb() first (and this guy adds devices and triggers
+ * the notifier) and only then it calls pci_bus_add_devices() which
+ * configures DMA for buses which also creates PEs and IOMMU groups.
+ */
+ return iommu_probe_device(dev);
}
-EXPORT_SYMBOL_GPL(iommu_clear_tces_and_put_pages);
+EXPORT_SYMBOL_GPL(iommu_add_device);
+#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
/*
- * hwaddr is a kernel virtual address here (0xc... bazillion),
- * tce_build converts it to a physical address.
+ * A simple iommu_ops to allow less cruft in generic VFIO code.
*/
-int iommu_tce_build(struct iommu_table *tbl, unsigned long entry,
- unsigned long hwaddr, enum dma_data_direction direction)
+static int
+spapr_tce_platform_iommu_attach_dev(struct iommu_domain *platform_domain,
+ struct device *dev)
{
- int ret = -EBUSY;
- unsigned long oldtce;
- struct iommu_pool *pool = get_pool(tbl, entry);
+ struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+ struct iommu_table_group *table_group;
+ struct iommu_group *grp;
- spin_lock(&(pool->lock));
+ /* At first attach the ownership is already set */
+ if (!domain)
+ return 0;
- oldtce = ppc_md.tce_get(tbl, entry);
- /* Add new entry if it is not busy */
- if (!(oldtce & (TCE_PCI_WRITE | TCE_PCI_READ)))
- ret = ppc_md.tce_build(tbl, entry, 1, hwaddr, direction, NULL);
+ grp = iommu_group_get(dev);
+ table_group = iommu_group_get_iommudata(grp);
+ /*
+ * The domain being set to PLATFORM from earlier
+ * BLOCKED. The table_group ownership has to be released.
+ */
+ table_group->ops->release_ownership(table_group, dev);
+ iommu_group_put(grp);
- spin_unlock(&(pool->lock));
+ return 0;
+}
- /* if (unlikely(ret))
- pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx ret=%d\n",
- __func__, hwaddr, entry << tbl->it_page_shift,
- hwaddr, ret); */
+static const struct iommu_domain_ops spapr_tce_platform_domain_ops = {
+ .attach_dev = spapr_tce_platform_iommu_attach_dev,
+};
- return ret;
-}
-EXPORT_SYMBOL_GPL(iommu_tce_build);
+static struct iommu_domain spapr_tce_platform_domain = {
+ .type = IOMMU_DOMAIN_PLATFORM,
+ .ops = &spapr_tce_platform_domain_ops,
+};
-int iommu_put_tce_user_mode(struct iommu_table *tbl, unsigned long entry,
- unsigned long tce)
+static int
+spapr_tce_blocked_iommu_attach_dev(struct iommu_domain *platform_domain,
+ struct device *dev)
{
- int ret;
- struct page *page = NULL;
- unsigned long hwaddr, offset = tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
- enum dma_data_direction direction = iommu_tce_direction(tce);
-
- ret = get_user_pages_fast(tce & PAGE_MASK, 1,
- direction != DMA_TO_DEVICE, &page);
- if (unlikely(ret != 1)) {
- /* pr_err("iommu_tce: get_user_pages_fast failed tce=%lx ioba=%lx ret=%d\n",
- tce, entry << tbl->it_page_shift, ret); */
- return -EFAULT;
- }
- hwaddr = (unsigned long) page_address(page) + offset;
+ struct iommu_group *grp = iommu_group_get(dev);
+ struct iommu_table_group *table_group;
+ int ret = -EINVAL;
- ret = iommu_tce_build(tbl, entry, hwaddr, direction);
- if (ret)
- put_page(page);
-
- if (ret < 0)
- pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%d\n",
- __func__, entry << tbl->it_page_shift, tce, ret);
+ /*
+ * FIXME: SPAPR mixes blocked and platform behaviors, the blocked domain
+ * also sets the dma_api ops
+ */
+ table_group = iommu_group_get_iommudata(grp);
+ ret = table_group->ops->take_ownership(table_group, dev);
+ iommu_group_put(grp);
return ret;
}
-EXPORT_SYMBOL_GPL(iommu_put_tce_user_mode);
-int iommu_take_ownership(struct iommu_table *tbl)
-{
- unsigned long sz = (tbl->it_size + 7) >> 3;
+static const struct iommu_domain_ops spapr_tce_blocked_domain_ops = {
+ .attach_dev = spapr_tce_blocked_iommu_attach_dev,
+};
- if (tbl->it_offset == 0)
- clear_bit(0, tbl->it_map);
+static struct iommu_domain spapr_tce_blocked_domain = {
+ .type = IOMMU_DOMAIN_BLOCKED,
+ .ops = &spapr_tce_blocked_domain_ops,
+};
- if (!bitmap_empty(tbl->it_map, tbl->it_size)) {
- pr_err("iommu_tce: it_map is not empty");
- return -EBUSY;
+static bool spapr_tce_iommu_capable(struct device *dev, enum iommu_cap cap)
+{
+ switch (cap) {
+ case IOMMU_CAP_CACHE_COHERENCY:
+ return true;
+ default:
+ break;
}
- memset(tbl->it_map, 0xff, sz);
- iommu_clear_tces_and_put_pages(tbl, tbl->it_offset, tbl->it_size);
-
- /*
- * Disable iommu bypass, otherwise the user can DMA to all of
- * our physical memory via the bypass window instead of just
- * the pages that has been explicitly mapped into the iommu
- */
- if (tbl->set_bypass)
- tbl->set_bypass(tbl, false);
-
- return 0;
+ return false;
}
-EXPORT_SYMBOL_GPL(iommu_take_ownership);
-void iommu_release_ownership(struct iommu_table *tbl)
+static struct iommu_device *spapr_tce_iommu_probe_device(struct device *dev)
{
- unsigned long sz = (tbl->it_size + 7) >> 3;
+ struct pci_dev *pdev;
+ struct pci_controller *hose;
- iommu_clear_tces_and_put_pages(tbl, tbl->it_offset, tbl->it_size);
- memset(tbl->it_map, 0, sz);
+ if (!dev_is_pci(dev))
+ return ERR_PTR(-ENODEV);
- /* Restore bit#0 set by iommu_init_table() */
- if (tbl->it_offset == 0)
- set_bit(0, tbl->it_map);
+ pdev = to_pci_dev(dev);
+ hose = pdev->bus->sysdata;
- /* The kernel owns the device now, we can restore the iommu bypass */
- if (tbl->set_bypass)
- tbl->set_bypass(tbl, true);
+ return &hose->iommu;
}
-EXPORT_SYMBOL_GPL(iommu_release_ownership);
-int iommu_add_device(struct device *dev)
+static void spapr_tce_iommu_release_device(struct device *dev)
{
- struct iommu_table *tbl;
+}
- /*
- * The sysfs entries should be populated before
- * binding IOMMU group. If sysfs entries isn't
- * ready, we simply bail.
- */
- if (!device_is_registered(dev))
- return -ENOENT;
+static struct iommu_group *spapr_tce_iommu_device_group(struct device *dev)
+{
+ struct pci_controller *hose;
+ struct pci_dev *pdev;
- if (dev->iommu_group) {
- pr_debug("%s: Skipping device %s with iommu group %d\n",
- __func__, dev_name(dev),
- iommu_group_id(dev->iommu_group));
- return -EBUSY;
- }
+ pdev = to_pci_dev(dev);
+ hose = pdev->bus->sysdata;
- tbl = get_iommu_table_base(dev);
- if (!tbl || !tbl->it_group) {
- pr_debug("%s: Skipping device %s with no tbl\n",
- __func__, dev_name(dev));
- return 0;
- }
+ if (!hose->controller_ops.device_group)
+ return ERR_PTR(-ENOENT);
- pr_debug("%s: Adding %s to iommu group %d\n",
- __func__, dev_name(dev),
- iommu_group_id(tbl->it_group));
+ return hose->controller_ops.device_group(hose, pdev);
+}
- if (PAGE_SIZE < IOMMU_PAGE_SIZE(tbl)) {
- pr_err("%s: Invalid IOMMU page size %lx (%lx) on %s\n",
- __func__, IOMMU_PAGE_SIZE(tbl),
- PAGE_SIZE, dev_name(dev));
- return -EINVAL;
- }
+static const struct iommu_ops spapr_tce_iommu_ops = {
+ .default_domain = &spapr_tce_platform_domain,
+ .blocked_domain = &spapr_tce_blocked_domain,
+ .capable = spapr_tce_iommu_capable,
+ .probe_device = spapr_tce_iommu_probe_device,
+ .release_device = spapr_tce_iommu_release_device,
+ .device_group = spapr_tce_iommu_device_group,
+};
+
+static struct attribute *spapr_tce_iommu_attrs[] = {
+ NULL,
+};
+
+static struct attribute_group spapr_tce_iommu_group = {
+ .name = "spapr-tce-iommu",
+ .attrs = spapr_tce_iommu_attrs,
+};
+
+static const struct attribute_group *spapr_tce_iommu_groups[] = {
+ &spapr_tce_iommu_group,
+ NULL,
+};
+
+void ppc_iommu_register_device(struct pci_controller *phb)
+{
+ iommu_device_sysfs_add(&phb->iommu, phb->parent,
+ spapr_tce_iommu_groups, "iommu-phb%04x",
+ phb->global_number);
+ iommu_device_register(&phb->iommu, &spapr_tce_iommu_ops,
+ phb->parent);
+}
- return iommu_group_add_device(tbl->it_group, dev);
+void ppc_iommu_unregister_device(struct pci_controller *phb)
+{
+ iommu_device_unregister(&phb->iommu);
+ iommu_device_sysfs_remove(&phb->iommu);
}
-EXPORT_SYMBOL_GPL(iommu_add_device);
-void iommu_del_device(struct device *dev)
+/*
+ * This registers IOMMU devices of PHBs. This needs to happen
+ * after core_initcall(iommu_init) + postcore_initcall(pci_driver_init) and
+ * before subsys_initcall(iommu_subsys_init).
+ */
+static int __init spapr_tce_setup_phb_iommus_initcall(void)
{
- /*
- * Some devices might not have IOMMU table and group
- * and we needn't detach them from the associated
- * IOMMU groups
- */
- if (!dev->iommu_group) {
- pr_debug("iommu_tce: skipping device %s with no tbl\n",
- dev_name(dev));
- return;
- }
+ struct pci_controller *hose;
- iommu_group_remove_device(dev);
+ list_for_each_entry(hose, &hose_list, list_node) {
+ ppc_iommu_register_device(hose);
+ }
+ return 0;
}
-EXPORT_SYMBOL_GPL(iommu_del_device);
+postcore_initcall_sync(spapr_tce_setup_phb_iommus_initcall);
+#endif
#endif /* CONFIG_IOMMU_API */
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 4c5891de162e..a0e8b998c9b5 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Derived from arch/i386/kernel/irq.c
* Copyright (C) 1992 Linus Torvalds
@@ -8,11 +9,6 @@
* Adapted for Power Macintosh by Paul Mackerras
* Copyright (C) 1996 Paul Mackerras (paulus@cs.anu.edu.au)
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* This file contains the code used by various IRQ handling routines:
* asking for different IRQ's should be done through these routines
* instead of just grabbing them. Thus setups with different IRQ numbers
@@ -24,7 +20,7 @@
* mask register (of which only 16 are defined), hence the weird shifting
* and complement of the cached_irq_mask. I want to be able to stuff
* this right into the SIU SMASK register.
- * Many of the prep/chrp functions are conditional compiled on CONFIG_8xx
+ * Many of the prep/chrp functions are conditional compiled on CONFIG_PPC_8xx
* to reduce code space and undefined function references.
*/
@@ -50,351 +46,112 @@
#include <linux/list.h>
#include <linux/radix-tree.h>
#include <linux/mutex.h>
-#include <linux/bootmem.h>
#include <linux/pci.h>
#include <linux/debugfs.h>
#include <linux/of.h>
#include <linux/of_irq.h>
+#include <linux/vmalloc.h>
+#include <linux/pgtable.h>
+#include <linux/static_call.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
+#include <asm/interrupt.h>
#include <asm/io.h>
-#include <asm/pgtable.h>
#include <asm/irq.h>
#include <asm/cache.h>
-#include <asm/prom.h>
#include <asm/ptrace.h>
#include <asm/machdep.h>
#include <asm/udbg.h>
#include <asm/smp.h>
-#include <asm/debug.h>
+#include <asm/hw_irq.h>
+#include <asm/softirq_stack.h>
+#include <asm/ppc_asm.h>
-#ifdef CONFIG_PPC64
-#include <asm/paca.h>
-#include <asm/firmware.h>
-#include <asm/lv1call.h>
-#endif
#define CREATE_TRACE_POINTS
#include <asm/trace.h>
+#include <asm/cpu_has_feature.h>
DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
EXPORT_PER_CPU_SYMBOL(irq_stat);
-int __irq_offset_value;
-
#ifdef CONFIG_PPC32
-EXPORT_SYMBOL(__irq_offset_value);
atomic_t ppc_n_lost_interrupts;
#ifdef CONFIG_TAU_INT
extern int tau_initialized;
-extern int tau_interrupts(int);
+u32 tau_interrupts(unsigned long cpu);
#endif
#endif /* CONFIG_PPC32 */
-#ifdef CONFIG_PPC64
-
-int distribute_irqs = 1;
-
-static inline notrace unsigned long get_irq_happened(void)
-{
- unsigned long happened;
-
- __asm__ __volatile__("lbz %0,%1(13)"
- : "=r" (happened) : "i" (offsetof(struct paca_struct, irq_happened)));
-
- return happened;
-}
-
-static inline notrace void set_soft_enabled(unsigned long enable)
-{
- __asm__ __volatile__("stb %0,%1(13)"
- : : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled)));
-}
-
-static inline notrace int decrementer_check_overflow(void)
-{
- u64 now = get_tb_or_rtc();
- u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
-
- return now >= *next_tb;
-}
-
-/* This is called whenever we are re-enabling interrupts
- * and returns either 0 (nothing to do) or 500/900/280/a00/e80 if
- * there's an EE, DEC or DBELL to generate.
- *
- * This is called in two contexts: From arch_local_irq_restore()
- * before soft-enabling interrupts, and from the exception exit
- * path when returning from an interrupt from a soft-disabled to
- * a soft enabled context. In both case we have interrupts hard
- * disabled.
- *
- * We take care of only clearing the bits we handled in the
- * PACA irq_happened field since we can only re-emit one at a
- * time and we don't want to "lose" one.
- */
-notrace unsigned int __check_irq_replay(void)
-{
- /*
- * We use local_paca rather than get_paca() to avoid all
- * the debug_smp_processor_id() business in this low level
- * function
- */
- unsigned char happened = local_paca->irq_happened;
-
- /* Clear bit 0 which we wouldn't clear otherwise */
- local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
-
- /*
- * Force the delivery of pending soft-disabled interrupts on PS3.
- * Any HV call will have this side effect.
- */
- if (firmware_has_feature(FW_FEATURE_PS3_LV1)) {
- u64 tmp, tmp2;
- lv1_get_version_info(&tmp, &tmp2);
- }
-
- /*
- * We may have missed a decrementer interrupt. We check the
- * decrementer itself rather than the paca irq_happened field
- * in case we also had a rollover while hard disabled
- */
- local_paca->irq_happened &= ~PACA_IRQ_DEC;
- if ((happened & PACA_IRQ_DEC) || decrementer_check_overflow())
- return 0x900;
-
- /* Finally check if an external interrupt happened */
- local_paca->irq_happened &= ~PACA_IRQ_EE;
- if (happened & PACA_IRQ_EE)
- return 0x500;
-
-#ifdef CONFIG_PPC_BOOK3E
- /* Finally check if an EPR external interrupt happened
- * this bit is typically set if we need to handle another
- * "edge" interrupt from within the MPIC "EPR" handler
- */
- local_paca->irq_happened &= ~PACA_IRQ_EE_EDGE;
- if (happened & PACA_IRQ_EE_EDGE)
- return 0x500;
-
- local_paca->irq_happened &= ~PACA_IRQ_DBELL;
- if (happened & PACA_IRQ_DBELL)
- return 0x280;
-#else
- local_paca->irq_happened &= ~PACA_IRQ_DBELL;
- if (happened & PACA_IRQ_DBELL) {
- if (cpu_has_feature(CPU_FTR_HVMODE))
- return 0xe80;
- return 0xa00;
- }
-#endif /* CONFIG_PPC_BOOK3E */
-
- /* Check if an hypervisor Maintenance interrupt happened */
- local_paca->irq_happened &= ~PACA_IRQ_HMI;
- if (happened & PACA_IRQ_HMI)
- return 0xe60;
-
- /* There should be nothing left ! */
- BUG_ON(local_paca->irq_happened != 0);
-
- return 0;
-}
-
-notrace void arch_local_irq_restore(unsigned long en)
-{
- unsigned char irq_happened;
- unsigned int replay;
-
- /* Write the new soft-enabled value */
- set_soft_enabled(en);
- if (!en)
- return;
- /*
- * From this point onward, we can take interrupts, preempt,
- * etc... unless we got hard-disabled. We check if an event
- * happened. If none happened, we know we can just return.
- *
- * We may have preempted before the check below, in which case
- * we are checking the "new" CPU instead of the old one. This
- * is only a problem if an event happened on the "old" CPU.
- *
- * External interrupt events will have caused interrupts to
- * be hard-disabled, so there is no problem, we
- * cannot have preempted.
- */
- irq_happened = get_irq_happened();
- if (!irq_happened)
- return;
-
- /*
- * We need to hard disable to get a trusted value from
- * __check_irq_replay(). We also need to soft-disable
- * again to avoid warnings in there due to the use of
- * per-cpu variables.
- *
- * We know that if the value in irq_happened is exactly 0x01
- * then we are already hard disabled (there are other less
- * common cases that we'll ignore for now), so we skip the
- * (expensive) mtmsrd.
- */
- if (unlikely(irq_happened != PACA_IRQ_HARD_DIS))
- __hard_irq_disable();
-#ifdef CONFIG_TRACE_IRQFLAGS
- else {
- /*
- * We should already be hard disabled here. We had bugs
- * where that wasn't the case so let's dbl check it and
- * warn if we are wrong. Only do that when IRQ tracing
- * is enabled as mfmsr() can be costly.
- */
- if (WARN_ON(mfmsr() & MSR_EE))
- __hard_irq_disable();
- }
-#endif /* CONFIG_TRACE_IRQFLAG */
-
- set_soft_enabled(0);
-
- /*
- * Check if anything needs to be re-emitted. We haven't
- * soft-enabled yet to avoid warnings in decrementer_check_overflow
- * accessing per-cpu variables
- */
- replay = __check_irq_replay();
-
- /* We can soft-enable now */
- set_soft_enabled(1);
-
- /*
- * And replay if we have to. This will return with interrupts
- * hard-enabled.
- */
- if (replay) {
- __replay_interrupt(replay);
- return;
- }
-
- /* Finally, let's ensure we are hard enabled */
- __hard_irq_enable();
-}
-EXPORT_SYMBOL(arch_local_irq_restore);
-
-/*
- * This is specifically called by assembly code to re-enable interrupts
- * if they are currently disabled. This is typically called before
- * schedule() or do_signal() when returning to userspace. We do it
- * in C to avoid the burden of dealing with lockdep etc...
- *
- * NOTE: This is called with interrupts hard disabled but not marked
- * as such in paca->irq_happened, so we need to resync this.
- */
-void notrace restore_interrupts(void)
-{
- if (irqs_disabled()) {
- local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
- local_irq_enable();
- } else
- __hard_irq_enable();
-}
-
-/*
- * This is a helper to use when about to go into idle low-power
- * when the latter has the side effect of re-enabling interrupts
- * (such as calling H_CEDE under pHyp).
- *
- * You call this function with interrupts soft-disabled (this is
- * already the case when ppc_md.power_save is called). The function
- * will return whether to enter power save or just return.
- *
- * In the former case, it will have notified lockdep of interrupts
- * being re-enabled and generally sanitized the lazy irq state,
- * and in the latter case it will leave with interrupts hard
- * disabled and marked as such, so the local_irq_enable() call
- * in arch_cpu_idle() will properly re-enable everything.
- */
-bool prep_irq_for_idle(void)
-{
- /*
- * First we need to hard disable to ensure no interrupt
- * occurs before we effectively enter the low power state
- */
- hard_irq_disable();
-
- /*
- * If anything happened while we were soft-disabled,
- * we return now and do not enter the low power state.
- */
- if (lazy_irq_pending())
- return false;
-
- /* Tell lockdep we are about to re-enable */
- trace_hardirqs_on();
-
- /*
- * Mark interrupts as soft-enabled and clear the
- * PACA_IRQ_HARD_DIS from the pending mask since we
- * are about to hard enable as well as a side effect
- * of entering the low power state.
- */
- local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
- local_paca->soft_enabled = 1;
-
- /* Tell the caller to enter the low power state */
- return true;
-}
-
-#endif /* CONFIG_PPC64 */
-
int arch_show_interrupts(struct seq_file *p, int prec)
{
int j;
#if defined(CONFIG_PPC32) && defined(CONFIG_TAU_INT)
if (tau_initialized) {
- seq_printf(p, "%*s: ", prec, "TAU");
+ seq_printf(p, "%*s:", prec, "TAU");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", tau_interrupts(j));
+ seq_put_decimal_ull_width(p, " ", tau_interrupts(j), 10);
seq_puts(p, " PowerPC Thermal Assist (cpu temp)\n");
}
#endif /* CONFIG_PPC32 && CONFIG_TAU_INT */
- seq_printf(p, "%*s: ", prec, "LOC");
+ seq_printf(p, "%*s:", prec, "LOC");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs_event);
+ seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).timer_irqs_event, 10);
seq_printf(p, " Local timer interrupts for timer event device\n");
- seq_printf(p, "%*s: ", prec, "LOC");
+ seq_printf(p, "%*s:", prec, "BCT");
+ for_each_online_cpu(j)
+ seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).broadcast_irqs_event, 10);
+ seq_printf(p, " Broadcast timer interrupts for timer event device\n");
+
+ seq_printf(p, "%*s:", prec, "LOC");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs_others);
+ seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).timer_irqs_others, 10);
seq_printf(p, " Local timer interrupts for others\n");
- seq_printf(p, "%*s: ", prec, "SPU");
+ seq_printf(p, "%*s:", prec, "SPU");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", per_cpu(irq_stat, j).spurious_irqs);
+ seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).spurious_irqs, 10);
seq_printf(p, " Spurious interrupts\n");
- seq_printf(p, "%*s: ", prec, "PMI");
+ seq_printf(p, "%*s:", prec, "PMI");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", per_cpu(irq_stat, j).pmu_irqs);
+ seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).pmu_irqs, 10);
seq_printf(p, " Performance monitoring interrupts\n");
- seq_printf(p, "%*s: ", prec, "MCE");
+ seq_printf(p, "%*s:", prec, "MCE");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", per_cpu(irq_stat, j).mce_exceptions);
+ seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).mce_exceptions, 10);
seq_printf(p, " Machine check exceptions\n");
+#ifdef CONFIG_PPC_BOOK3S_64
if (cpu_has_feature(CPU_FTR_HVMODE)) {
- seq_printf(p, "%*s: ", prec, "HMI");
+ seq_printf(p, "%*s:", prec, "HMI");
for_each_online_cpu(j)
- seq_printf(p, "%10u ",
- per_cpu(irq_stat, j).hmi_exceptions);
+ seq_put_decimal_ull_width(p, " ", paca_ptrs[j]->hmi_irqs, 10);
seq_printf(p, " Hypervisor Maintenance Interrupts\n");
}
+#endif
+
+ seq_printf(p, "%*s:", prec, "NMI");
+ for_each_online_cpu(j)
+ seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).sreset_irqs, 10);
+ seq_printf(p, " System Reset interrupts\n");
+
+#ifdef CONFIG_PPC_WATCHDOG
+ seq_printf(p, "%*s:", prec, "WDG");
+ for_each_online_cpu(j)
+ seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).soft_nmi_irqs, 10);
+ seq_printf(p, " Watchdog soft-NMI interrupts\n");
+#endif
#ifdef CONFIG_PPC_DOORBELL
if (cpu_has_feature(CPU_FTR_DBELL)) {
- seq_printf(p, "%*s: ", prec, "DBL");
+ seq_printf(p, "%*s:", prec, "DBL");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", per_cpu(irq_stat, j).doorbell_irqs);
+ seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).doorbell_irqs, 10);
seq_printf(p, " Doorbell interrupts\n");
}
#endif
@@ -409,11 +166,18 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
{
u64 sum = per_cpu(irq_stat, cpu).timer_irqs_event;
+ sum += per_cpu(irq_stat, cpu).broadcast_irqs_event;
sum += per_cpu(irq_stat, cpu).pmu_irqs;
sum += per_cpu(irq_stat, cpu).mce_exceptions;
sum += per_cpu(irq_stat, cpu).spurious_irqs;
sum += per_cpu(irq_stat, cpu).timer_irqs_others;
- sum += per_cpu(irq_stat, cpu).hmi_exceptions;
+#ifdef CONFIG_PPC_BOOK3S_64
+ sum += paca_ptrs[cpu]->hmi_irqs;
+#endif
+ sum += per_cpu(irq_stat, cpu).sreset_irqs;
+#ifdef CONFIG_PPC_WATCHDOG
+ sum += per_cpu(irq_stat, cpu).soft_nmi_irqs;
+#endif
#ifdef CONFIG_PPC_DOORBELL
sum += per_cpu(irq_stat, cpu).doorbell_irqs;
#endif
@@ -421,217 +185,169 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
return sum;
}
-#ifdef CONFIG_HOTPLUG_CPU
-void migrate_irqs(void)
+static inline void check_stack_overflow(unsigned long sp)
{
- struct irq_desc *desc;
- unsigned int irq;
- static int warned;
- cpumask_var_t mask;
- const struct cpumask *map = cpu_online_mask;
-
- alloc_cpumask_var(&mask, GFP_KERNEL);
-
- for_each_irq_desc(irq, desc) {
- struct irq_data *data;
- struct irq_chip *chip;
-
- data = irq_desc_get_irq_data(desc);
- if (irqd_is_per_cpu(data))
- continue;
-
- chip = irq_data_get_irq_chip(data);
-
- cpumask_and(mask, data->affinity, map);
- if (cpumask_any(mask) >= nr_cpu_ids) {
- printk("Breaking affinity for irq %i\n", irq);
- cpumask_copy(mask, map);
- }
- if (chip->irq_set_affinity)
- chip->irq_set_affinity(data, mask, true);
- else if (desc->action && !(warned++))
- printk("Cannot set affinity for irq %i\n", irq);
- }
+ if (!IS_ENABLED(CONFIG_DEBUG_STACKOVERFLOW))
+ return;
- free_cpumask_var(mask);
+ sp &= THREAD_SIZE - 1;
- local_irq_enable();
- mdelay(1);
- local_irq_disable();
+ /* check for stack overflow: is there less than 1/4th free? */
+ if (unlikely(sp < THREAD_SIZE / 4)) {
+ pr_err("do_IRQ: stack overflow: %ld\n", sp);
+ dump_stack();
+ }
}
-#endif
-static inline void check_stack_overflow(void)
+#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK
+static __always_inline void call_do_softirq(const void *sp)
{
-#ifdef CONFIG_DEBUG_STACKOVERFLOW
- long sp;
-
- sp = __get_SP() & (THREAD_SIZE-1);
-
- /* check for stack overflow: is there less than 2KB free? */
- if (unlikely(sp < (sizeof(struct thread_info) + 2048))) {
- printk("do_IRQ: stack overflow: %ld\n",
- sp - sizeof(struct thread_info));
- dump_stack();
- }
+ /* Temporarily switch r1 to sp, call __do_softirq() then restore r1. */
+ asm volatile (
+ PPC_STLU " %%r1, %[offset](%[sp]) ;"
+ "mr %%r1, %[sp] ;"
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ "bl %[callee]@notoc ;"
+#else
+ "bl %[callee] ;"
#endif
+ PPC_LL " %%r1, 0(%%r1) ;"
+ : // Outputs
+ : // Inputs
+ [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_MIN_SIZE),
+ [callee] "i" (__do_softirq)
+ : // Clobbers
+ "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6",
+ "cr7", "r0", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
+ "r11", "r12"
+ );
}
+#endif
-void __do_irq(struct pt_regs *regs)
+DEFINE_STATIC_CALL_RET0(ppc_get_irq, *ppc_md.get_irq);
+
+static void __do_irq(struct pt_regs *regs, unsigned long oldsp)
{
unsigned int irq;
- irq_enter();
-
trace_irq_entry(regs);
- check_stack_overflow();
+ check_stack_overflow(oldsp);
/*
* Query the platform PIC for the interrupt & ack it.
*
* This will typically lower the interrupt line to the CPU
*/
- irq = ppc_md.get_irq();
+ irq = static_call(ppc_get_irq)();
/* We can hard enable interrupts now to allow perf interrupts */
- may_hard_irq_enable();
+ if (should_hard_irq_enable(regs))
+ do_hard_irq_enable();
/* And finally process it */
- if (unlikely(irq == NO_IRQ))
- __get_cpu_var(irq_stat).spurious_irqs++;
+ if (unlikely(!irq))
+ __this_cpu_inc(irq_stat.spurious_irqs);
else
generic_handle_irq(irq);
trace_irq_exit(regs);
+}
- irq_exit();
+static __always_inline void call_do_irq(struct pt_regs *regs, void *sp)
+{
+ register unsigned long r3 asm("r3") = (unsigned long)regs;
+
+ /* Temporarily switch r1 to sp, call __do_irq() then restore r1. */
+ asm volatile (
+ PPC_STLU " %%r1, %[offset](%[sp]) ;"
+ "mr %%r4, %%r1 ;"
+ "mr %%r1, %[sp] ;"
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ "bl %[callee]@notoc ;"
+#else
+ "bl %[callee] ;"
+#endif
+ PPC_LL " %%r1, 0(%%r1) ;"
+ : // Outputs
+ "+r" (r3)
+ : // Inputs
+ [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_MIN_SIZE),
+ [callee] "i" (__do_irq)
+ : // Clobbers
+ "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6",
+ "cr7", "r0", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
+ "r11", "r12"
+ );
}
-void do_IRQ(struct pt_regs *regs)
+void __do_IRQ(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
- struct thread_info *curtp, *irqtp, *sirqtp;
+ void *cursp, *irqsp;
/* Switch to the irq stack to handle this */
- curtp = current_thread_info();
- irqtp = hardirq_ctx[raw_smp_processor_id()];
- sirqtp = softirq_ctx[raw_smp_processor_id()];
-
- /* Already there ? */
- if (unlikely(curtp == irqtp || curtp == sirqtp)) {
- __do_irq(regs);
- set_irq_regs(old_regs);
- return;
- }
-
- /* Prepare the thread_info in the irq stack */
- irqtp->task = curtp->task;
- irqtp->flags = 0;
-
- /* Copy the preempt_count so that the [soft]irq checks work. */
- irqtp->preempt_count = curtp->preempt_count;
-
- /* Switch stack and call */
- call_do_irq(regs, irqtp);
-
- /* Restore stack limit */
- irqtp->task = NULL;
+ cursp = (void *)(current_stack_pointer & ~(THREAD_SIZE - 1));
+ irqsp = hardirq_ctx[raw_smp_processor_id()];
- /* Copy back updates to the thread_info */
- if (irqtp->flags)
- set_bits(irqtp->flags, &curtp->flags);
+ /* Already there ? If not switch stack and call */
+ if (unlikely(cursp == irqsp))
+ __do_irq(regs, current_stack_pointer);
+ else
+ call_do_irq(regs, irqsp);
set_irq_regs(old_regs);
}
-void __init init_IRQ(void)
+DEFINE_INTERRUPT_HANDLER_ASYNC(do_IRQ)
{
- if (ppc_md.init_IRQ)
- ppc_md.init_IRQ();
-
- exc_lvl_ctx_init();
-
- irq_ctx_init();
+ __do_IRQ(regs);
}
-#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
-struct thread_info *critirq_ctx[NR_CPUS] __read_mostly;
-struct thread_info *dbgirq_ctx[NR_CPUS] __read_mostly;
-struct thread_info *mcheckirq_ctx[NR_CPUS] __read_mostly;
+static void *__init alloc_vm_stack(void)
+{
+ return __vmalloc_node(THREAD_SIZE, THREAD_ALIGN, THREADINFO_GFP,
+ NUMA_NO_NODE, (void *)_RET_IP_);
+}
-void exc_lvl_ctx_init(void)
+static void __init vmap_irqstack_init(void)
{
- struct thread_info *tp;
- int i, cpu_nr;
+ int i;
for_each_possible_cpu(i) {
-#ifdef CONFIG_PPC64
- cpu_nr = i;
-#else
-#ifdef CONFIG_SMP
- cpu_nr = get_hard_smp_processor_id(i);
-#else
- cpu_nr = 0;
-#endif
-#endif
-
- memset((void *)critirq_ctx[cpu_nr], 0, THREAD_SIZE);
- tp = critirq_ctx[cpu_nr];
- tp->cpu = cpu_nr;
- tp->preempt_count = 0;
-
-#ifdef CONFIG_BOOKE
- memset((void *)dbgirq_ctx[cpu_nr], 0, THREAD_SIZE);
- tp = dbgirq_ctx[cpu_nr];
- tp->cpu = cpu_nr;
- tp->preempt_count = 0;
-
- memset((void *)mcheckirq_ctx[cpu_nr], 0, THREAD_SIZE);
- tp = mcheckirq_ctx[cpu_nr];
- tp->cpu = cpu_nr;
- tp->preempt_count = HARDIRQ_OFFSET;
-#endif
+ softirq_ctx[i] = alloc_vm_stack();
+ hardirq_ctx[i] = alloc_vm_stack();
}
}
-#endif
-struct thread_info *softirq_ctx[NR_CPUS] __read_mostly;
-struct thread_info *hardirq_ctx[NR_CPUS] __read_mostly;
-void irq_ctx_init(void)
+void __init init_IRQ(void)
{
- struct thread_info *tp;
- int i;
+ if (IS_ENABLED(CONFIG_VMAP_STACK))
+ vmap_irqstack_init();
- for_each_possible_cpu(i) {
- memset((void *)softirq_ctx[i], 0, THREAD_SIZE);
- tp = softirq_ctx[i];
- tp->cpu = i;
+ if (ppc_md.init_IRQ)
+ ppc_md.init_IRQ();
- memset((void *)hardirq_ctx[i], 0, THREAD_SIZE);
- tp = hardirq_ctx[i];
- tp->cpu = i;
- }
+ if (!WARN_ON(!ppc_md.get_irq))
+ static_call_update(ppc_get_irq, ppc_md.get_irq);
}
-void do_softirq_own_stack(void)
-{
- struct thread_info *curtp, *irqtp;
+#ifdef CONFIG_BOOKE
+void *critirq_ctx[NR_CPUS] __read_mostly;
+void *dbgirq_ctx[NR_CPUS] __read_mostly;
+void *mcheckirq_ctx[NR_CPUS] __read_mostly;
+#endif
- curtp = current_thread_info();
- irqtp = softirq_ctx[smp_processor_id()];
- irqtp->task = curtp->task;
- irqtp->flags = 0;
- call_do_softirq(irqtp);
- irqtp->task = NULL;
+void *softirq_ctx[NR_CPUS] __read_mostly;
+void *hardirq_ctx[NR_CPUS] __read_mostly;
- /* Set any flag that may have been set on the
- * alternate stack
- */
- if (irqtp->flags)
- set_bits(irqtp->flags, &curtp->flags);
+#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK
+void do_softirq_own_stack(void)
+{
+ call_do_softirq(softirq_ctx[smp_processor_id()]);
}
+#endif
irq_hw_number_t virq_to_hw(unsigned int virq)
{
@@ -675,18 +391,3 @@ int irq_choose_cpu(const struct cpumask *mask)
return hard_smp_processor_id();
}
#endif
-
-int arch_early_irq_init(void)
-{
- return 0;
-}
-
-#ifdef CONFIG_PPC64
-static int __init setup_noirqdistrib(char *str)
-{
- distribute_irqs = 0;
- return 1;
-}
-
-__setup("noirqdistrib", setup_noirqdistrib);
-#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/kernel/irq_64.c b/arch/powerpc/kernel/irq_64.c
new file mode 100644
index 000000000000..d5c48d1b0a31
--- /dev/null
+++ b/arch/powerpc/kernel/irq_64.c
@@ -0,0 +1,522 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Derived from arch/i386/kernel/irq.c
+ * Copyright (C) 1992 Linus Torvalds
+ * Adapted from arch/i386 by Gary Thomas
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ * Updated and modified by Cort Dougan <cort@fsmlabs.com>
+ * Copyright (C) 1996-2001 Cort Dougan
+ * Adapted for Power Macintosh by Paul Mackerras
+ * Copyright (C) 1996 Paul Mackerras (paulus@cs.anu.edu.au)
+ *
+ * This file contains the code used by various IRQ handling routines:
+ * asking for different IRQ's should be done through these routines
+ * instead of just grabbing them. Thus setups with different IRQ numbers
+ * shouldn't result in any weird surprises, and installing new handlers
+ * should be easier.
+ */
+
+#undef DEBUG
+
+#include <linux/export.h>
+#include <linux/threads.h>
+#include <linux/kernel_stat.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/ptrace.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/timex.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/seq_file.h>
+#include <linux/cpumask.h>
+#include <linux/profile.h>
+#include <linux/bitops.h>
+#include <linux/list.h>
+#include <linux/radix-tree.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <linux/debugfs.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/vmalloc.h>
+#include <linux/pgtable.h>
+#include <linux/static_call.h>
+
+#include <linux/uaccess.h>
+#include <asm/interrupt.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/cache.h>
+#include <asm/ptrace.h>
+#include <asm/machdep.h>
+#include <asm/udbg.h>
+#include <asm/smp.h>
+#include <asm/hw_irq.h>
+#include <asm/softirq_stack.h>
+#include <asm/ppc_asm.h>
+
+#include <asm/paca.h>
+#include <asm/firmware.h>
+#include <asm/lv1call.h>
+#include <asm/dbell.h>
+#include <asm/trace.h>
+#include <asm/cpu_has_feature.h>
+
+int distribute_irqs = 1;
+
+static inline void next_interrupt(struct pt_regs *regs)
+{
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) {
+ WARN_ON(!(local_paca->irq_happened & PACA_IRQ_HARD_DIS));
+ WARN_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
+ }
+
+ /*
+ * We are responding to the next interrupt, so interrupt-off
+ * latencies should be reset here.
+ */
+ lockdep_hardirq_exit();
+ trace_hardirqs_on();
+ trace_hardirqs_off();
+ lockdep_hardirq_enter();
+}
+
+static inline bool irq_happened_test_and_clear(u8 irq)
+{
+ if (local_paca->irq_happened & irq) {
+ local_paca->irq_happened &= ~irq;
+ return true;
+ }
+ return false;
+}
+
+static __no_kcsan void __replay_soft_interrupts(void)
+{
+ struct pt_regs regs;
+
+ /*
+ * We use local_paca rather than get_paca() to avoid all the
+ * debug_smp_processor_id() business in this low level function.
+ */
+
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) {
+ WARN_ON_ONCE(mfmsr() & MSR_EE);
+ WARN_ON(!(local_paca->irq_happened & PACA_IRQ_HARD_DIS));
+ WARN_ON(local_paca->irq_happened & PACA_IRQ_REPLAYING);
+ }
+
+ /*
+ * PACA_IRQ_REPLAYING prevents interrupt handlers from enabling
+ * MSR[EE] to get PMIs, which can result in more IRQs becoming
+ * pending.
+ */
+ local_paca->irq_happened |= PACA_IRQ_REPLAYING;
+
+ ppc_save_regs(&regs);
+ regs.softe = IRQS_ENABLED;
+ regs.msr |= MSR_EE;
+
+ /*
+ * Force the delivery of pending soft-disabled interrupts on PS3.
+ * Any HV call will have this side effect.
+ */
+ if (firmware_has_feature(FW_FEATURE_PS3_LV1)) {
+ u64 tmp, tmp2;
+ lv1_get_version_info(&tmp, &tmp2);
+ }
+
+ /*
+ * Check if an hypervisor Maintenance interrupt happened.
+ * This is a higher priority interrupt than the others, so
+ * replay it first.
+ */
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S) &&
+ irq_happened_test_and_clear(PACA_IRQ_HMI)) {
+ regs.trap = INTERRUPT_HMI;
+ handle_hmi_exception(&regs);
+ next_interrupt(&regs);
+ }
+
+ if (irq_happened_test_and_clear(PACA_IRQ_DEC)) {
+ regs.trap = INTERRUPT_DECREMENTER;
+ timer_interrupt(&regs);
+ next_interrupt(&regs);
+ }
+
+ if (irq_happened_test_and_clear(PACA_IRQ_EE)) {
+ regs.trap = INTERRUPT_EXTERNAL;
+ do_IRQ(&regs);
+ next_interrupt(&regs);
+ }
+
+ if (IS_ENABLED(CONFIG_PPC_DOORBELL) &&
+ irq_happened_test_and_clear(PACA_IRQ_DBELL)) {
+ regs.trap = INTERRUPT_DOORBELL;
+ doorbell_exception(&regs);
+ next_interrupt(&regs);
+ }
+
+ /* Book3E does not support soft-masking PMI interrupts */
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S) &&
+ irq_happened_test_and_clear(PACA_IRQ_PMI)) {
+ regs.trap = INTERRUPT_PERFMON;
+ performance_monitor_exception(&regs);
+ next_interrupt(&regs);
+ }
+
+ local_paca->irq_happened &= ~PACA_IRQ_REPLAYING;
+}
+
+__no_kcsan void replay_soft_interrupts(void)
+{
+ irq_enter(); /* See comment in arch_local_irq_restore */
+ __replay_soft_interrupts();
+ irq_exit();
+}
+
+#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_KUAP)
+static inline __no_kcsan void replay_soft_interrupts_irqrestore(void)
+{
+ unsigned long kuap_state = get_kuap();
+
+ /*
+ * Check if anything calls local_irq_enable/restore() when KUAP is
+ * disabled (user access enabled). We handle that case here by saving
+ * and re-locking AMR but we shouldn't get here in the first place,
+ * hence the warning.
+ */
+ kuap_assert_locked();
+
+ if (kuap_state != AMR_KUAP_BLOCKED)
+ set_kuap(AMR_KUAP_BLOCKED);
+
+ __replay_soft_interrupts();
+
+ if (kuap_state != AMR_KUAP_BLOCKED)
+ set_kuap(kuap_state);
+}
+#else
+#define replay_soft_interrupts_irqrestore() __replay_soft_interrupts()
+#endif
+
+notrace __no_kcsan void arch_local_irq_restore(unsigned long mask)
+{
+ unsigned char irq_happened;
+
+ /* Write the new soft-enabled value if it is a disable */
+ if (mask) {
+ irq_soft_mask_set(mask);
+ return;
+ }
+
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) {
+ WARN_ON_ONCE(in_nmi());
+ WARN_ON_ONCE(in_hardirq());
+ WARN_ON_ONCE(local_paca->irq_happened & PACA_IRQ_REPLAYING);
+ }
+
+again:
+ /*
+ * After the stb, interrupts are unmasked and there are no interrupts
+ * pending replay. The restart sequence makes this atomic with
+ * respect to soft-masked interrupts. If this was just a simple code
+ * sequence, a soft-masked interrupt could become pending right after
+ * the comparison and before the stb.
+ *
+ * This allows interrupts to be unmasked without hard disabling, and
+ * also without new hard interrupts coming in ahead of pending ones.
+ */
+ asm goto(
+"1: \n"
+" lbz 9,%0(13) \n"
+" cmpwi 9,0 \n"
+" bne %l[happened] \n"
+" stb 9,%1(13) \n"
+"2: \n"
+ RESTART_TABLE(1b, 2b, 1b)
+ : : "i" (offsetof(struct paca_struct, irq_happened)),
+ "i" (offsetof(struct paca_struct, irq_soft_mask))
+ : "cr0", "r9"
+ : happened);
+
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+ WARN_ON_ONCE(!(mfmsr() & MSR_EE));
+
+ /*
+ * If we came here from the replay below, we might have a preempt
+ * pending (due to preempt_enable_no_resched()). Have to check now.
+ */
+ preempt_check_resched();
+
+ return;
+
+happened:
+ irq_happened = READ_ONCE(local_paca->irq_happened);
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+ WARN_ON_ONCE(!irq_happened);
+
+ if (irq_happened == PACA_IRQ_HARD_DIS) {
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+ WARN_ON_ONCE(mfmsr() & MSR_EE);
+ irq_soft_mask_set(IRQS_ENABLED);
+ local_paca->irq_happened = 0;
+ __hard_irq_enable();
+ preempt_check_resched();
+ return;
+ }
+
+ /* Have interrupts to replay, need to hard disable first */
+ if (!(irq_happened & PACA_IRQ_HARD_DIS)) {
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) {
+ if (!(mfmsr() & MSR_EE)) {
+ /*
+ * An interrupt could have come in and cleared
+ * MSR[EE] and set IRQ_HARD_DIS, so check
+ * IRQ_HARD_DIS again and warn if it is still
+ * clear.
+ */
+ irq_happened = READ_ONCE(local_paca->irq_happened);
+ WARN_ON_ONCE(!(irq_happened & PACA_IRQ_HARD_DIS));
+ }
+ }
+ __hard_irq_disable();
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+ } else {
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) {
+ if (WARN_ON_ONCE(mfmsr() & MSR_EE))
+ __hard_irq_disable();
+ }
+ }
+
+ /*
+ * Disable preempt here, so that the below preempt_enable will
+ * perform resched if required (a replayed interrupt may set
+ * need_resched).
+ */
+ preempt_disable();
+ irq_soft_mask_set(IRQS_ALL_DISABLED);
+ trace_hardirqs_off();
+
+ /*
+ * Now enter interrupt context. The interrupt handlers themselves
+ * also call irq_enter/exit (which is okay, they can nest). But call
+ * it here now to hold off softirqs until the below irq_exit(). If
+ * we allowed replayed handlers to run softirqs, that enables irqs,
+ * which must replay interrupts, which recurses in here and makes
+ * things more complicated. The recursion is limited to 2, and it can
+ * be made to work, but it's complicated.
+ *
+ * local_bh_disable can not be used here because interrupts taken in
+ * idle are not in the right context (RCU, tick, etc) to run softirqs
+ * so irq_enter must be called.
+ */
+ irq_enter();
+
+ replay_soft_interrupts_irqrestore();
+
+ irq_exit();
+
+ if (unlikely(local_paca->irq_happened != PACA_IRQ_HARD_DIS)) {
+ /*
+ * The softirq processing in irq_exit() may enable interrupts
+ * temporarily, which can result in MSR[EE] being enabled and
+ * more irqs becoming pending. Go around again if that happens.
+ */
+ trace_hardirqs_on();
+ preempt_enable_no_resched();
+ goto again;
+ }
+
+ trace_hardirqs_on();
+ irq_soft_mask_set(IRQS_ENABLED);
+ local_paca->irq_happened = 0;
+ __hard_irq_enable();
+ preempt_enable();
+}
+EXPORT_SYMBOL(arch_local_irq_restore);
+
+/*
+ * This is a helper to use when about to go into idle low-power
+ * when the latter has the side effect of re-enabling interrupts
+ * (such as calling H_CEDE under pHyp).
+ *
+ * You call this function with interrupts soft-disabled (this is
+ * already the case when ppc_md.power_save is called). The function
+ * will return whether to enter power save or just return.
+ *
+ * In the former case, it will have generally sanitized the lazy irq
+ * state, and in the latter case it will leave with interrupts hard
+ * disabled and marked as such, so the local_irq_enable() call
+ * in arch_cpu_idle() will properly re-enable everything.
+ */
+__cpuidle bool prep_irq_for_idle(void)
+{
+ /*
+ * First we need to hard disable to ensure no interrupt
+ * occurs before we effectively enter the low power state
+ */
+ __hard_irq_disable();
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+ /*
+ * If anything happened while we were soft-disabled,
+ * we return now and do not enter the low power state.
+ */
+ if (lazy_irq_pending())
+ return false;
+
+ /*
+ * Mark interrupts as soft-enabled and clear the
+ * PACA_IRQ_HARD_DIS from the pending mask since we
+ * are about to hard enable as well as a side effect
+ * of entering the low power state.
+ */
+ local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
+ irq_soft_mask_set(IRQS_ENABLED);
+
+ /* Tell the caller to enter the low power state */
+ return true;
+}
+
+#ifdef CONFIG_PPC_BOOK3S
+/*
+ * This is for idle sequences that return with IRQs off, but the
+ * idle state itself wakes on interrupt. Tell the irq tracer that
+ * IRQs are enabled for the duration of idle so it does not get long
+ * off times. Must be paired with fini_irq_for_idle_irqsoff.
+ */
+bool prep_irq_for_idle_irqsoff(void)
+{
+ WARN_ON(!irqs_disabled());
+
+ /*
+ * First we need to hard disable to ensure no interrupt
+ * occurs before we effectively enter the low power state
+ */
+ __hard_irq_disable();
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+ /*
+ * If anything happened while we were soft-disabled,
+ * we return now and do not enter the low power state.
+ */
+ if (lazy_irq_pending())
+ return false;
+
+ /* Tell lockdep we are about to re-enable */
+ trace_hardirqs_on();
+
+ return true;
+}
+
+/*
+ * Take the SRR1 wakeup reason, index into this table to find the
+ * appropriate irq_happened bit.
+ *
+ * Sytem reset exceptions taken in idle state also come through here,
+ * but they are NMI interrupts so do not need to wait for IRQs to be
+ * restored, and should be taken as early as practical. These are marked
+ * with 0xff in the table. The Power ISA specifies 0100b as the system
+ * reset interrupt reason.
+ */
+#define IRQ_SYSTEM_RESET 0xff
+
+static const u8 srr1_to_lazyirq[0x10] = {
+ 0, 0, 0,
+ PACA_IRQ_DBELL,
+ IRQ_SYSTEM_RESET,
+ PACA_IRQ_DBELL,
+ PACA_IRQ_DEC,
+ 0,
+ PACA_IRQ_EE,
+ PACA_IRQ_EE,
+ PACA_IRQ_HMI,
+ 0, 0, 0, 0, 0 };
+
+void replay_system_reset(void)
+{
+ struct pt_regs regs;
+
+ ppc_save_regs(&regs);
+ regs.trap = 0x100;
+ get_paca()->in_nmi = 1;
+ system_reset_exception(&regs);
+ get_paca()->in_nmi = 0;
+}
+EXPORT_SYMBOL_GPL(replay_system_reset);
+
+void irq_set_pending_from_srr1(unsigned long srr1)
+{
+ unsigned int idx = (srr1 & SRR1_WAKEMASK_P8) >> 18;
+ u8 reason = srr1_to_lazyirq[idx];
+
+ /*
+ * Take the system reset now, which is immediately after registers
+ * are restored from idle. It's an NMI, so interrupts need not be
+ * re-enabled before it is taken.
+ */
+ if (unlikely(reason == IRQ_SYSTEM_RESET)) {
+ replay_system_reset();
+ return;
+ }
+
+ if (reason == PACA_IRQ_DBELL) {
+ /*
+ * When doorbell triggers a system reset wakeup, the message
+ * is not cleared, so if the doorbell interrupt is replayed
+ * and the IPI handled, the doorbell interrupt would still
+ * fire when EE is enabled.
+ *
+ * To avoid taking the superfluous doorbell interrupt,
+ * execute a msgclr here before the interrupt is replayed.
+ */
+ ppc_msgclr(PPC_DBELL_MSGTYPE);
+ }
+
+ /*
+ * The 0 index (SRR1[42:45]=b0000) must always evaluate to 0,
+ * so this can be called unconditionally with the SRR1 wake
+ * reason as returned by the idle code, which uses 0 to mean no
+ * interrupt.
+ *
+ * If a future CPU was to designate this as an interrupt reason,
+ * then a new index for no interrupt must be assigned.
+ */
+ local_paca->irq_happened |= reason;
+}
+#endif /* CONFIG_PPC_BOOK3S */
+
+/*
+ * Force a replay of the external interrupt handler on this CPU.
+ */
+void force_external_irq_replay(void)
+{
+ /*
+ * This must only be called with interrupts soft-disabled,
+ * the replay will happen when re-enabling.
+ */
+ WARN_ON(!arch_irqs_disabled());
+
+ /*
+ * Interrupts must always be hard disabled before irq_happened is
+ * modified (to prevent lost update in case of interrupt between
+ * load and store).
+ */
+ __hard_irq_disable();
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+ /* Indicate in the PACA that we have an interrupt to replay */
+ local_paca->irq_happened |= PACA_IRQ_EE;
+}
+
+static int __init setup_noirqdistrib(char *str)
+{
+ distribute_irqs = 0;
+ return 1;
+}
+
+__setup("noirqdistrib", setup_noirqdistrib);
diff --git a/arch/powerpc/kernel/isa-bridge.c b/arch/powerpc/kernel/isa-bridge.c
index 0f1997097960..5c064485197a 100644
--- a/arch/powerpc/kernel/isa-bridge.c
+++ b/arch/powerpc/kernel/isa-bridge.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Routines for tracking a legacy ISA bridge
*
@@ -6,11 +7,6 @@
* Some bits and pieces moved over from pci_64.c
*
* Copyrigh 2003 Anton Blanchard <anton@au.ibm.com>, IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#define DEBUG
@@ -22,13 +18,15 @@
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/notifier.h>
+#include <linux/of_address.h>
+#include <linux/vmalloc.h>
#include <asm/processor.h>
#include <asm/io.h>
-#include <asm/prom.h>
#include <asm/pci-bridge.h>
#include <asm/machdep.h>
#include <asm/ppc-pci.h>
+#include <asm/isa-bridge.h>
unsigned long isa_io_base; /* NULL if no ISA bus */
EXPORT_SYMBOL(isa_io_base);
@@ -41,82 +39,66 @@ EXPORT_SYMBOL_GPL(isa_bridge_pcidev);
#define ISA_SPACE_MASK 0x1
#define ISA_SPACE_IO 0x1
-static void pci_process_ISA_OF_ranges(struct device_node *isa_node,
- unsigned long phb_io_base_phys)
+static void remap_isa_base(phys_addr_t pa, unsigned long size)
+{
+ WARN_ON_ONCE(ISA_IO_BASE & ~PAGE_MASK);
+ WARN_ON_ONCE(pa & ~PAGE_MASK);
+ WARN_ON_ONCE(size & ~PAGE_MASK);
+
+ if (slab_is_available()) {
+ if (vmap_page_range(ISA_IO_BASE, ISA_IO_BASE + size, pa,
+ pgprot_noncached(PAGE_KERNEL)))
+ vunmap_range(ISA_IO_BASE, ISA_IO_BASE + size);
+ } else {
+ early_ioremap_range(ISA_IO_BASE, pa, size,
+ pgprot_noncached(PAGE_KERNEL));
+ }
+}
+
+static int process_ISA_OF_ranges(struct device_node *isa_node,
+ unsigned long phb_io_base_phys)
{
- /* We should get some saner parsing here and remove these structs */
- struct pci_address {
- u32 a_hi;
- u32 a_mid;
- u32 a_lo;
- };
-
- struct isa_address {
- u32 a_hi;
- u32 a_lo;
- };
-
- struct isa_range {
- struct isa_address isa_addr;
- struct pci_address pci_addr;
- unsigned int size;
- };
-
- const struct isa_range *range;
- unsigned long pci_addr;
- unsigned int isa_addr;
unsigned int size;
- int rlen = 0;
+ struct of_range_parser parser;
+ struct of_range range;
- range = of_get_property(isa_node, "ranges", &rlen);
- if (range == NULL || (rlen < sizeof(struct isa_range)))
+ if (of_range_parser_init(&parser, isa_node))
goto inval_range;
- /* From "ISA Binding to 1275"
- * The ranges property is laid out as an array of elements,
- * each of which comprises:
- * cells 0 - 1: an ISA address
- * cells 2 - 4: a PCI address
- * (size depending on dev->n_addr_cells)
- * cell 5: the size of the range
- */
- if ((range->isa_addr.a_hi & ISA_SPACE_MASK) != ISA_SPACE_IO) {
- range++;
- rlen -= sizeof(struct isa_range);
- if (rlen < sizeof(struct isa_range))
- goto inval_range;
- }
- if ((range->isa_addr.a_hi & ISA_SPACE_MASK) != ISA_SPACE_IO)
- goto inval_range;
+ for_each_of_range(&parser, &range) {
+ if ((range.flags & ISA_SPACE_MASK) != ISA_SPACE_IO)
+ continue;
- isa_addr = range->isa_addr.a_lo;
- pci_addr = (unsigned long) range->pci_addr.a_mid << 32 |
- range->pci_addr.a_lo;
+ if (range.cpu_addr == OF_BAD_ADDR) {
+ pr_err("ISA: Bad CPU mapping: %s\n", __func__);
+ return -EINVAL;
+ }
- /* Assume these are both zero. Note: We could fix that and
- * do a proper parsing instead ... oh well, that will do for
- * now as nobody uses fancy mappings for ISA bridges
- */
- if ((pci_addr != 0) || (isa_addr != 0)) {
- printk(KERN_ERR "unexpected isa to pci mapping: %s\n",
- __func__);
- return;
- }
+ /* We need page alignment */
+ if ((range.bus_addr & ~PAGE_MASK) || (range.cpu_addr & ~PAGE_MASK)) {
+ pr_warn("ISA: bridge %pOF has non aligned IO range\n", isa_node);
+ return -EINVAL;
+ }
- /* Align size and make sure it's cropped to 64K */
- size = PAGE_ALIGN(range->size);
- if (size > 0x10000)
- size = 0x10000;
+ /* Align size and make sure it's cropped to 64K */
+ size = PAGE_ALIGN(range.size);
+ if (size > 0x10000)
+ size = 0x10000;
- __ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE,
- size, _PAGE_NO_CACHE|_PAGE_GUARDED);
- return;
+ if (!phb_io_base_phys)
+ phb_io_base_phys = range.cpu_addr;
+
+ remap_isa_base(phb_io_base_phys, size);
+ return 0;
+ }
inval_range:
- printk(KERN_ERR "no ISA IO ranges or unexpected isa range, "
- "mapping 64k\n");
- __ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE,
- 0x10000, _PAGE_NO_CACHE|_PAGE_GUARDED);
+ if (phb_io_base_phys) {
+ pr_err("no ISA IO ranges or unexpected isa range, mapping 64k\n");
+ remap_isa_base(phb_io_base_phys, 0x10000);
+ return 0;
+ }
+ return -EINVAL;
}
@@ -158,12 +140,41 @@ void __init isa_bridge_find_early(struct pci_controller *hose)
isa_bridge_devnode = np;
/* Now parse the "ranges" property and setup the ISA mapping */
- pci_process_ISA_OF_ranges(np, hose->io_base_phys);
+ process_ISA_OF_ranges(np, hose->io_base_phys);
/* Set the global ISA io base to indicate we have an ISA bridge */
isa_io_base = ISA_IO_BASE;
- pr_debug("ISA bridge (early) is %s\n", np->full_name);
+ pr_debug("ISA bridge (early) is %pOF\n", np);
+}
+
+/**
+ * isa_bridge_find_early - Find and map the ISA IO space early before
+ * main PCI discovery. This is optionally called by
+ * the arch code when adding PCI PHBs to get early
+ * access to ISA IO ports
+ */
+void __init isa_bridge_init_non_pci(struct device_node *np)
+{
+ int ret;
+
+ /* If we already have an ISA bridge, bail off */
+ if (isa_bridge_devnode != NULL)
+ return;
+
+ ret = process_ISA_OF_ranges(np, 0);
+ if (ret)
+ return;
+
+ /* Got it */
+ isa_bridge_devnode = np;
+
+ /* Set the global ISA io base to indicate we have an ISA bridge
+ * and map it
+ */
+ isa_io_base = ISA_IO_BASE;
+
+ pr_debug("ISA: Non-PCI bridge is %pOF\n", np);
}
/**
@@ -180,13 +191,13 @@ static void isa_bridge_find_late(struct pci_dev *pdev,
isa_bridge_pcidev = pdev;
/* Now parse the "ranges" property and setup the ISA mapping */
- pci_process_ISA_OF_ranges(devnode, hose->io_base_phys);
+ process_ISA_OF_ranges(devnode, hose->io_base_phys);
/* Set the global ISA io base to indicate we have an ISA bridge */
isa_io_base = ISA_IO_BASE;
- pr_debug("ISA bridge (late) is %s on %s\n",
- devnode->full_name, pci_name(pdev));
+ pr_debug("ISA bridge (late) is %pOF on %s\n",
+ devnode, pci_name(pdev));
}
/**
@@ -209,7 +220,7 @@ static void isa_bridge_remove(void)
isa_bridge_pcidev = NULL;
/* Unmap the ISA area */
- __iounmap_at((void *)ISA_IO_BASE, 0x10000);
+ vunmap_range(ISA_IO_BASE, ISA_IO_BASE + 0x10000);
}
/**
@@ -235,8 +246,7 @@ static int isa_bridge_notify(struct notifier_block *nb, unsigned long action,
/* Check if we have no ISA device, and this happens to be one,
* register it as such if it has an OF device
*/
- if (!isa_bridge_devnode && devnode && devnode->type &&
- !strcmp(devnode->type, "isa"))
+ if (!isa_bridge_devnode && of_node_is_type(devnode, "isa"))
isa_bridge_find_late(pdev, devnode);
return 0;
diff --git a/arch/powerpc/kernel/jump_label.c b/arch/powerpc/kernel/jump_label.c
index a1ed8a8c7cb4..2659e1ac8604 100644
--- a/arch/powerpc/kernel/jump_label.c
+++ b/arch/powerpc/kernel/jump_label.c
@@ -1,25 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright 2010 Michael Ellerman, IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/jump_label.h>
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
+#include <asm/inst.h>
-#ifdef HAVE_JUMP_LABEL
void arch_jump_label_transform(struct jump_entry *entry,
enum jump_label_type type)
{
- u32 *addr = (u32 *)(unsigned long)entry->code;
+ u32 *addr = (u32 *)jump_entry_code(entry);
- if (type == JUMP_LABEL_ENABLE)
- patch_branch(addr, entry->target, 0);
+ if (type == JUMP_LABEL_JMP)
+ patch_branch(addr, jump_entry_target(entry), 0);
else
- patch_instruction(addr, PPC_INST_NOP);
+ patch_instruction(addr, ppc_inst(PPC_RAW_NOP()));
}
-#endif
diff --git a/arch/powerpc/kernel/kdebugfs.c b/arch/powerpc/kernel/kdebugfs.c
new file mode 100644
index 000000000000..36d3124d5a8b
--- /dev/null
+++ b/arch/powerpc/kernel/kdebugfs.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/debugfs.h>
+#include <linux/export.h>
+#include <linux/init.h>
+
+struct dentry *arch_debugfs_dir;
+EXPORT_SYMBOL(arch_debugfs_dir);
+
+static int __init arch_kdebugfs_init(void)
+{
+ arch_debugfs_dir = debugfs_create_dir("powerpc", NULL);
+ return 0;
+}
+arch_initcall(arch_kdebugfs_init);
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index 8504657379f1..5081334b7bd2 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* PowerPC backend to the KGDB stub.
*
@@ -8,10 +9,6 @@
* PPC32 support restored by Vitaly Wool <vwool@ru.mvista.com> and
* Sergei Shtylyov <sshtylyov@ru.mvista.com>
* Copyright (C) 2007-2008 Wind River Systems, Inc.
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program as licensed "as is" without any warranty of any
- * kind, whether express or implied.
*/
#include <linux/kernel.h>
@@ -24,7 +21,9 @@
#include <asm/processor.h>
#include <asm/machdep.h>
#include <asm/debug.h>
+#include <asm/text-patching.h>
#include <linux/slab.h>
+#include <asm/inst.h>
/*
* This table contains the mapping between PowerPC hardware trap types, and
@@ -46,9 +45,9 @@ static struct hard_trap_info
{ 0x0800, 0x08 /* SIGFPE */ }, /* fp unavailable */
{ 0x0900, 0x0e /* SIGALRM */ }, /* decrementer */
{ 0x0c00, 0x14 /* SIGCHLD */ }, /* system call */
-#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
+#ifdef CONFIG_BOOKE
{ 0x2002, 0x05 /* SIGTRAP */ }, /* debug */
-#if defined(CONFIG_FSL_BOOKE)
+#if defined(CONFIG_PPC_85xx)
{ 0x2010, 0x08 /* SIGFPE */ }, /* spe unavailable */
{ 0x2020, 0x08 /* SIGFPE */ }, /* spe unavailable */
{ 0x2030, 0x08 /* SIGFPE */ }, /* spe fp data */
@@ -58,18 +57,18 @@ static struct hard_trap_info
{ 0x2900, 0x08 /* SIGFPE */ }, /* apu unavailable */
{ 0x3100, 0x0e /* SIGALRM */ }, /* fixed interval timer */
{ 0x3200, 0x02 /* SIGINT */ }, /* watchdog */
-#else /* ! CONFIG_FSL_BOOKE */
+#else /* ! CONFIG_PPC_85xx */
{ 0x1000, 0x0e /* SIGALRM */ }, /* prog interval timer */
{ 0x1010, 0x0e /* SIGALRM */ }, /* fixed interval timer */
{ 0x1020, 0x02 /* SIGINT */ }, /* watchdog */
{ 0x2010, 0x08 /* SIGFPE */ }, /* fp unavailable */
{ 0x2020, 0x08 /* SIGFPE */ }, /* ap unavailable */
#endif
-#else /* ! (defined(CONFIG_40x) || defined(CONFIG_BOOKE)) */
+#else /* !CONFIG_BOOKE */
{ 0x0d00, 0x05 /* SIGTRAP */ }, /* single-step */
-#if defined(CONFIG_8xx)
+#if defined(CONFIG_PPC_8xx)
{ 0x1000, 0x04 /* SIGILL */ }, /* software emulation */
-#else /* ! CONFIG_8xx */
+#else /* ! CONFIG_PPC_8xx */
{ 0x0f00, 0x04 /* SIGILL */ }, /* performance monitor */
{ 0x0f20, 0x08 /* SIGFPE */ }, /* altivec unavailable */
{ 0x1300, 0x05 /* SIGTRAP */ }, /* instruction address break */
@@ -116,14 +115,14 @@ int kgdb_skipexception(int exception, struct pt_regs *regs)
return kgdb_isremovedbreak(regs->nip);
}
-static int kgdb_call_nmi_hook(struct pt_regs *regs)
+static int kgdb_debugger_ipi(struct pt_regs *regs)
{
kgdb_nmicallback(raw_smp_processor_id(), regs);
return 0;
}
#ifdef CONFIG_SMP
-void kgdb_roundup_cpus(unsigned long flags)
+void kgdb_roundup_cpus(void)
{
smp_send_debugger_break();
}
@@ -144,47 +143,19 @@ static int kgdb_handle_breakpoint(struct pt_regs *regs)
if (kgdb_handle_exception(1, SIGTRAP, 0, regs) != 0)
return 0;
- if (*(u32 *) (regs->nip) == *(u32 *) (&arch_kgdb_ops.gdb_bpt_instr))
- regs->nip += BREAK_INSTR_SIZE;
+ if (*(u32 *)regs->nip == BREAK_INSTR)
+ regs_add_return_ip(regs, BREAK_INSTR_SIZE);
return 1;
}
-static DEFINE_PER_CPU(struct thread_info, kgdb_thread_info);
static int kgdb_singlestep(struct pt_regs *regs)
{
- struct thread_info *thread_info, *exception_thread_info;
- struct thread_info *backup_current_thread_info =
- &__get_cpu_var(kgdb_thread_info);
-
if (user_mode(regs))
return 0;
- /*
- * On Book E and perhaps other processors, singlestep is handled on
- * the critical exception stack. This causes current_thread_info()
- * to fail, since it it locates the thread_info by masking off
- * the low bits of the current stack pointer. We work around
- * this issue by copying the thread_info from the kernel stack
- * before calling kgdb_handle_exception, and copying it back
- * afterwards. On most processors the copy is avoided since
- * exception_thread_info == thread_info.
- */
- thread_info = (struct thread_info *)(regs->gpr[1] & ~(THREAD_SIZE-1));
- exception_thread_info = current_thread_info();
-
- if (thread_info != exception_thread_info) {
- /* Save the original current_thread_info. */
- memcpy(backup_current_thread_info, exception_thread_info, sizeof *thread_info);
- memcpy(exception_thread_info, thread_info, sizeof *thread_info);
- }
-
kgdb_handle_exception(0, SIGTRAP, 0, regs);
- if (thread_info != exception_thread_info)
- /* Restore current_thread_info lastly. */
- memcpy(exception_thread_info, backup_current_thread_info, sizeof *thread_info);
-
return 1;
}
@@ -220,7 +191,7 @@ static int kgdb_break_match(struct pt_regs *regs)
void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
{
struct pt_regs *regs = (struct pt_regs *)(p->thread.ksp +
- STACK_FRAME_OVERHEAD);
+ STACK_INT_FRAME_REGS);
unsigned long *ptr = gdb_regs;
int reg;
@@ -237,7 +208,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
for (reg = 14; reg < 32; reg++)
PACK64(ptr, regs->gpr[reg]);
-#ifdef CONFIG_FSL_BOOKE
+#ifdef CONFIG_PPC_85xx
#ifdef CONFIG_SPE
for (reg = 0; reg < 32; reg++)
PACK64(ptr, p->thread.evr[reg]);
@@ -263,7 +234,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
#define GDB_SIZEOF_REG sizeof(unsigned long)
#define GDB_SIZEOF_REG_U32 sizeof(u32)
-#ifdef CONFIG_FSL_BOOKE
+#ifdef CONFIG_PPC_85xx
#define GDB_SIZEOF_FLOAT_REG sizeof(unsigned long)
#else
#define GDB_SIZEOF_FLOAT_REG sizeof(u64)
@@ -358,7 +329,7 @@ char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
if (regno >= 32 && regno < 64) {
/* FP registers 32 -> 63 */
-#if defined(CONFIG_FSL_BOOKE) && defined(CONFIG_SPE)
+#if defined(CONFIG_PPC_85xx) && defined(CONFIG_SPE)
if (current)
memcpy(mem, &current->thread.evr[regno-32],
dbg_reg_def[regno].size);
@@ -384,7 +355,7 @@ int dbg_set_reg(int regno, void *mem, struct pt_regs *regs)
if (regno >= 32 && regno < 64) {
/* FP registers 32 -> 63 */
-#if defined(CONFIG_FSL_BOOKE) && defined(CONFIG_SPE)
+#if defined(CONFIG_PPC_85xx) && defined(CONFIG_SPE)
memcpy(&current->thread.evr[regno-32], mem,
dbg_reg_def[regno].size);
#else
@@ -398,11 +369,11 @@ int dbg_set_reg(int regno, void *mem, struct pt_regs *regs)
void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc)
{
- regs->nip = pc;
+ regs_set_return_ip(regs, pc);
}
/*
- * This function does PowerPC specific procesing for interfacing to gdb.
+ * This function does PowerPC specific processing for interfacing to gdb.
*/
int kgdb_arch_handle_exception(int vector, int signo, int err_code,
char *remcom_in_buffer, char *remcom_out_buffer,
@@ -420,7 +391,7 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code,
case 'c':
/* handle the optional parameter */
if (kgdb_hex2long(&ptr, &addr))
- linux_regs->nip = addr;
+ regs_set_return_ip(linux_regs, addr);
atomic_set(&kgdb_cpu_doing_single_step, -1);
/* set the trace bit if we're stepping */
@@ -428,9 +399,9 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code,
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
mtspr(SPRN_DBCR0,
mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM);
- linux_regs->msr |= MSR_DE;
+ regs_set_return_msr(linux_regs, linux_regs->msr | MSR_DE);
#else
- linux_regs->msr |= MSR_SE;
+ regs_set_return_msr(linux_regs, linux_regs->msr | MSR_SE);
#endif
atomic_set(&kgdb_cpu_doing_single_step,
raw_smp_processor_id());
@@ -441,12 +412,41 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code,
return -1;
}
+int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
+{
+ u32 instr, *addr = (u32 *)bpt->bpt_addr;
+ int err;
+
+ err = get_kernel_nofault(instr, addr);
+ if (err)
+ return err;
+
+ err = patch_instruction(addr, ppc_inst(BREAK_INSTR));
+ if (err)
+ return -EFAULT;
+
+ *(u32 *)bpt->saved_instr = instr;
+
+ return 0;
+}
+
+int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
+{
+ int err;
+ unsigned int instr = *(unsigned int *)bpt->saved_instr;
+ u32 *addr = (u32 *)bpt->bpt_addr;
+
+ err = patch_instruction(addr, ppc_inst(instr));
+ if (err)
+ return -EFAULT;
+
+ return 0;
+}
+
/*
* Global data
*/
-struct kgdb_arch arch_kgdb_ops = {
- .gdb_bpt_instr = {0x7d, 0x82, 0x10, 0x08},
-};
+const struct kgdb_arch arch_kgdb_ops;
static int kgdb_not_implemented(struct pt_regs *regs)
{
@@ -471,7 +471,7 @@ int kgdb_arch_init(void)
old__debugger_break_match = __debugger_break_match;
old__debugger_fault_handler = __debugger_fault_handler;
- __debugger_ipi = kgdb_call_nmi_hook;
+ __debugger_ipi = kgdb_debugger_ipi;
__debugger = kgdb_debugger;
__debugger_bpt = kgdb_handle_breakpoint;
__debugger_sstep = kgdb_singlestep;
diff --git a/arch/powerpc/kernel/kprobes-ftrace.c b/arch/powerpc/kernel/kprobes-ftrace.c
new file mode 100644
index 000000000000..f8208c027148
--- /dev/null
+++ b/arch/powerpc/kernel/kprobes-ftrace.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Dynamic Ftrace based Kprobes Optimization
+ *
+ * Copyright (C) Hitachi Ltd., 2012
+ * Copyright 2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
+ * IBM Corporation
+ */
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+#include <linux/hardirq.h>
+#include <linux/preempt.h>
+#include <linux/ftrace.h>
+
+/* Ftrace callback handler for kprobes */
+void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip,
+ struct ftrace_ops *ops, struct ftrace_regs *fregs)
+{
+ struct kprobe *p;
+ struct kprobe_ctlblk *kcb;
+ struct pt_regs *regs;
+ int bit;
+
+ if (unlikely(kprobe_ftrace_disabled))
+ return;
+
+ bit = ftrace_test_recursion_trylock(nip, parent_nip);
+ if (bit < 0)
+ return;
+
+ regs = ftrace_get_regs(fregs);
+ p = get_kprobe((kprobe_opcode_t *)nip);
+ if (unlikely(!p) || kprobe_disabled(p))
+ goto out;
+
+ kcb = get_kprobe_ctlblk();
+ if (kprobe_running()) {
+ kprobes_inc_nmissed_count(p);
+ } else {
+ /*
+ * On powerpc, NIP is *before* this instruction for the
+ * pre handler
+ */
+ regs_add_return_ip(regs, -MCOUNT_INSN_SIZE);
+
+ __this_cpu_write(current_kprobe, p);
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+ if (!p->pre_handler || !p->pre_handler(p, regs)) {
+ /*
+ * Emulate singlestep (and also recover regs->nip)
+ * as if there is a nop
+ */
+ regs_add_return_ip(regs, MCOUNT_INSN_SIZE);
+ if (unlikely(p->post_handler)) {
+ kcb->kprobe_status = KPROBE_HIT_SSDONE;
+ p->post_handler(p, regs, 0);
+ }
+ }
+ /*
+ * If pre_handler returns !0, it changes regs->nip. We have to
+ * skip emulating post_handler.
+ */
+ __this_cpu_write(current_kprobe, NULL);
+ }
+out:
+ ftrace_test_recursion_unlock(bit);
+}
+NOKPROBE_SYMBOL(kprobe_ftrace_handler);
+
+int arch_prepare_kprobe_ftrace(struct kprobe *p)
+{
+ p->ainsn.insn = NULL;
+ p->ainsn.boostable = -1;
+ return 0;
+}
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 2f72af82513c..c0d9f12cb441 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -1,20 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Kernel Probes (KProbes)
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
* Copyright (C) IBM Corporation, 2002, 2004
*
* 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
@@ -29,29 +16,140 @@
#include <linux/kprobes.h>
#include <linux/ptrace.h>
#include <linux/preempt.h>
-#include <linux/module.h>
+#include <linux/extable.h>
#include <linux/kdebug.h>
#include <linux/slab.h>
-#include <asm/code-patching.h>
+#include <linux/set_memory.h>
+#include <linux/execmem.h>
+#include <asm/text-patching.h>
#include <asm/cacheflush.h>
#include <asm/sstep.h>
-#include <asm/uaccess.h>
+#include <asm/sections.h>
+#include <asm/inst.h>
+#include <linux/uaccess.h>
DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}};
-int __kprobes arch_prepare_kprobe(struct kprobe *p)
+bool arch_within_kprobe_blacklist(unsigned long addr)
+{
+ return (addr >= (unsigned long)__kprobes_text_start &&
+ addr < (unsigned long)__kprobes_text_end) ||
+ (addr >= (unsigned long)_stext &&
+ addr < (unsigned long)__head_end);
+}
+
+kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset)
+{
+ kprobe_opcode_t *addr = NULL;
+
+#ifdef CONFIG_PPC64_ELF_ABI_V2
+ /* PPC64 ABIv2 needs local entry point */
+ addr = (kprobe_opcode_t *)kallsyms_lookup_name(name);
+ if (addr && !offset) {
+#ifdef CONFIG_KPROBES_ON_FTRACE
+ unsigned long faddr;
+ /*
+ * Per livepatch.h, ftrace location is always within the first
+ * 16 bytes of a function on powerpc with -mprofile-kernel.
+ */
+ faddr = ftrace_location_range((unsigned long)addr,
+ (unsigned long)addr + 16);
+ if (faddr)
+ addr = (kprobe_opcode_t *)faddr;
+ else
+#endif
+ addr = (kprobe_opcode_t *)ppc_function_entry(addr);
+ }
+#elif defined(CONFIG_PPC64_ELF_ABI_V1)
+ /*
+ * 64bit powerpc ABIv1 uses function descriptors:
+ * - Check for the dot variant of the symbol first.
+ * - If that fails, try looking up the symbol provided.
+ *
+ * This ensures we always get to the actual symbol and not
+ * the descriptor.
+ *
+ * Also handle <module:symbol> format.
+ */
+ char dot_name[MODULE_NAME_LEN + 1 + KSYM_NAME_LEN];
+ bool dot_appended = false;
+ const char *c;
+ ssize_t ret = 0;
+ int len = 0;
+
+ if ((c = strnchr(name, MODULE_NAME_LEN, ':')) != NULL) {
+ c++;
+ len = c - name;
+ memcpy(dot_name, name, len);
+ } else
+ c = name;
+
+ if (*c != '\0' && *c != '.') {
+ dot_name[len++] = '.';
+ dot_appended = true;
+ }
+ ret = strscpy(dot_name + len, c, KSYM_NAME_LEN);
+ if (ret > 0)
+ addr = (kprobe_opcode_t *)kallsyms_lookup_name(dot_name);
+
+ /* Fallback to the original non-dot symbol lookup */
+ if (!addr && dot_appended)
+ addr = (kprobe_opcode_t *)kallsyms_lookup_name(name);
+#else
+ addr = (kprobe_opcode_t *)kallsyms_lookup_name(name);
+#endif
+
+ return addr;
+}
+
+static bool arch_kprobe_on_func_entry(unsigned long addr, unsigned long offset)
+{
+ unsigned long ip = ftrace_location(addr);
+
+ if (ip)
+ return offset <= (ip - addr);
+ if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2) && !IS_ENABLED(CONFIG_PPC_KERNEL_PCREL))
+ return offset <= 8;
+ return !offset;
+}
+
+/* XXX try and fold the magic of kprobe_lookup_name() in this */
+kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset,
+ bool *on_func_entry)
+{
+ *on_func_entry = arch_kprobe_on_func_entry(addr, offset);
+ return (kprobe_opcode_t *)(addr + offset);
+}
+
+int arch_prepare_kprobe(struct kprobe *p)
{
int ret = 0;
- kprobe_opcode_t insn = *p->addr;
+ struct kprobe *prev;
+ ppc_inst_t insn = ppc_inst_read(p->addr);
if ((unsigned long)p->addr & 0x03) {
printk("Attempt to register kprobe at an unaligned address\n");
ret = -EINVAL;
- } else if (IS_MTMSRD(insn) || IS_RFID(insn) || IS_RFI(insn)) {
- printk("Cannot register a kprobe on rfi/rfid or mtmsr[d]\n");
+ } else if (!can_single_step(ppc_inst_val(insn))) {
+ printk("Cannot register a kprobe on instructions that can't be single stepped\n");
+ ret = -EINVAL;
+ } else if ((unsigned long)p->addr & ~PAGE_MASK &&
+ ppc_inst_prefixed(ppc_inst_read(p->addr - 1))) {
+ printk("Cannot register a kprobe on the second word of prefixed instruction\n");
+ ret = -EINVAL;
+ }
+ prev = get_kprobe(p->addr - 1);
+
+ /*
+ * When prev is a ftrace-based kprobe, we don't have an insn, and it
+ * doesn't probe for prefixed instruction.
+ */
+ if (prev && !kprobe_ftrace(prev) &&
+ ppc_inst_prefixed(ppc_inst_read(prev->ainsn.insn))) {
+ printk("Cannot register a kprobe on the second word of prefixed instruction\n");
ret = -EINVAL;
}
@@ -64,40 +162,37 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
}
if (!ret) {
- memcpy(p->ainsn.insn, p->addr,
- MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
- p->opcode = *p->addr;
- flush_icache_range((unsigned long)p->ainsn.insn,
- (unsigned long)p->ainsn.insn + sizeof(kprobe_opcode_t));
+ patch_instruction(p->ainsn.insn, insn);
+ p->opcode = ppc_inst_val(insn);
}
p->ainsn.boostable = 0;
return ret;
}
+NOKPROBE_SYMBOL(arch_prepare_kprobe);
-void __kprobes arch_arm_kprobe(struct kprobe *p)
+void arch_arm_kprobe(struct kprobe *p)
{
- *p->addr = BREAKPOINT_INSTRUCTION;
- flush_icache_range((unsigned long) p->addr,
- (unsigned long) p->addr + sizeof(kprobe_opcode_t));
+ WARN_ON_ONCE(patch_instruction(p->addr, ppc_inst(BREAKPOINT_INSTRUCTION)));
}
+NOKPROBE_SYMBOL(arch_arm_kprobe);
-void __kprobes arch_disarm_kprobe(struct kprobe *p)
+void arch_disarm_kprobe(struct kprobe *p)
{
- *p->addr = p->opcode;
- flush_icache_range((unsigned long) p->addr,
- (unsigned long) p->addr + sizeof(kprobe_opcode_t));
+ WARN_ON_ONCE(patch_instruction(p->addr, ppc_inst(p->opcode)));
}
+NOKPROBE_SYMBOL(arch_disarm_kprobe);
-void __kprobes arch_remove_kprobe(struct kprobe *p)
+void arch_remove_kprobe(struct kprobe *p)
{
if (p->ainsn.insn) {
free_insn_slot(p->ainsn.insn, 0);
p->ainsn.insn = NULL;
}
}
+NOKPROBE_SYMBOL(arch_remove_kprobe);
-static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
+static nokprobe_inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
{
enable_single_step(regs);
@@ -107,46 +202,85 @@ static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
* variant as values in regs could play a part in
* if the trap is taken or not
*/
- regs->nip = (unsigned long)p->ainsn.insn;
+ regs_set_return_ip(regs, (unsigned long)p->ainsn.insn);
}
-static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
+static nokprobe_inline void save_previous_kprobe(struct kprobe_ctlblk *kcb)
{
kcb->prev_kprobe.kp = kprobe_running();
kcb->prev_kprobe.status = kcb->kprobe_status;
kcb->prev_kprobe.saved_msr = kcb->kprobe_saved_msr;
}
-static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
+static nokprobe_inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb)
{
- __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
+ __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
kcb->kprobe_status = kcb->prev_kprobe.status;
kcb->kprobe_saved_msr = kcb->prev_kprobe.saved_msr;
}
-static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
+static nokprobe_inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb)
{
- __get_cpu_var(current_kprobe) = p;
+ __this_cpu_write(current_kprobe, p);
kcb->kprobe_saved_msr = regs->msr;
}
-void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
- struct pt_regs *regs)
+static int try_to_emulate(struct kprobe *p, struct pt_regs *regs)
{
- ri->ret_addr = (kprobe_opcode_t *)regs->link;
+ int ret;
+ ppc_inst_t insn = ppc_inst_read(p->ainsn.insn);
- /* Replace the return addr with trampoline addr */
- regs->link = (unsigned long)kretprobe_trampoline;
+ /* regs->nip is also adjusted if emulate_step returns 1 */
+ ret = emulate_step(regs, insn);
+ if (ret > 0) {
+ /*
+ * Once this instruction has been boosted
+ * successfully, set the boostable flag
+ */
+ if (unlikely(p->ainsn.boostable == 0))
+ p->ainsn.boostable = 1;
+ } else if (ret < 0) {
+ /*
+ * We don't allow kprobes on mtmsr(d)/rfi(d), etc.
+ * So, we should never get here... but, its still
+ * good to catch them, just in case...
+ */
+ printk("Can't step on instruction %08lx\n", ppc_inst_as_ulong(insn));
+ BUG();
+ } else {
+ /*
+ * If we haven't previously emulated this instruction, then it
+ * can't be boosted. Note it down so we don't try to do so again.
+ *
+ * If, however, we had emulated this instruction in the past,
+ * then this is just an error with the current run (for
+ * instance, exceptions due to a load/store). We return 0 so
+ * that this is now single-stepped, but continue to try
+ * emulating it in subsequent probe hits.
+ */
+ if (unlikely(p->ainsn.boostable != 1))
+ p->ainsn.boostable = -1;
+ }
+
+ return ret;
}
+NOKPROBE_SYMBOL(try_to_emulate);
-static int __kprobes kprobe_handler(struct pt_regs *regs)
+int kprobe_handler(struct pt_regs *regs)
{
struct kprobe *p;
int ret = 0;
unsigned int *addr = (unsigned int *)regs->nip;
struct kprobe_ctlblk *kcb;
+ if (user_mode(regs))
+ return 0;
+
+ if (!IS_ENABLED(CONFIG_BOOKE) &&
+ (!(regs->msr & MSR_IR) || !(regs->msr & MSR_DR)))
+ return 0;
+
/*
* We don't want to be preempted for the entire
* duration of kprobe processing
@@ -154,63 +288,21 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
preempt_disable();
kcb = get_kprobe_ctlblk();
- /* Check we're not actually recursing */
- if (kprobe_running()) {
- p = get_kprobe(addr);
- if (p) {
- kprobe_opcode_t insn = *p->ainsn.insn;
- if (kcb->kprobe_status == KPROBE_HIT_SS &&
- is_trap(insn)) {
- /* Turn off 'trace' bits */
- regs->msr &= ~MSR_SINGLESTEP;
- regs->msr |= kcb->kprobe_saved_msr;
- goto no_kprobe;
- }
- /* We have reentered the kprobe_handler(), since
- * another probe was hit while within the handler.
- * We here save the original kprobes variables and
- * just single step on the instruction of the new probe
- * without calling any user handlers.
- */
- save_previous_kprobe(kcb);
- set_current_kprobe(p, regs, kcb);
- kcb->kprobe_saved_msr = regs->msr;
- kprobes_inc_nmissed_count(p);
- prepare_singlestep(p, regs);
- kcb->kprobe_status = KPROBE_REENTER;
- return 1;
- } else {
- if (*addr != BREAKPOINT_INSTRUCTION) {
- /* If trap variant, then it belongs not to us */
- kprobe_opcode_t cur_insn = *addr;
- if (is_trap(cur_insn))
- goto no_kprobe;
- /* The breakpoint instruction was removed by
- * another cpu right after we hit, no further
- * handling of this interrupt is appropriate
- */
- ret = 1;
- goto no_kprobe;
- }
- p = __get_cpu_var(current_kprobe);
- if (p->break_handler && p->break_handler(p, regs)) {
- goto ss_probe;
- }
- }
- goto no_kprobe;
- }
-
p = get_kprobe(addr);
if (!p) {
- if (*addr != BREAKPOINT_INSTRUCTION) {
+ unsigned int instr;
+
+ if (get_kernel_nofault(instr, addr))
+ goto no_kprobe;
+
+ if (instr != BREAKPOINT_INSTRUCTION) {
/*
* PowerPC has multiple variants of the "trap"
* instruction. If the current instruction is a
* trap variant, it could belong to someone else
*/
- kprobe_opcode_t cur_insn = *addr;
- if (is_trap(cur_insn))
- goto no_kprobe;
+ if (is_trap(instr))
+ goto no_kprobe;
/*
* The breakpoint instruction was removed right
* after we hit it. Another cpu has removed
@@ -224,133 +316,71 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
goto no_kprobe;
}
+ /* Check we're not actually recursing */
+ if (kprobe_running()) {
+ kprobe_opcode_t insn = *p->ainsn.insn;
+ if (kcb->kprobe_status == KPROBE_HIT_SS && is_trap(insn)) {
+ /* Turn off 'trace' bits */
+ regs_set_return_msr(regs,
+ (regs->msr & ~MSR_SINGLESTEP) |
+ kcb->kprobe_saved_msr);
+ goto no_kprobe;
+ }
+
+ /*
+ * We have reentered the kprobe_handler(), since another probe
+ * was hit while within the handler. We here save the original
+ * kprobes variables and just single step on the instruction of
+ * the new probe without calling any user handlers.
+ */
+ save_previous_kprobe(kcb);
+ set_current_kprobe(p, regs, kcb);
+ kprobes_inc_nmissed_count(p);
+ kcb->kprobe_status = KPROBE_REENTER;
+ if (p->ainsn.boostable >= 0) {
+ ret = try_to_emulate(p, regs);
+
+ if (ret > 0) {
+ restore_previous_kprobe(kcb);
+ preempt_enable();
+ return 1;
+ }
+ }
+ prepare_singlestep(p, regs);
+ return 1;
+ }
+
kcb->kprobe_status = KPROBE_HIT_ACTIVE;
set_current_kprobe(p, regs, kcb);
- if (p->pre_handler && p->pre_handler(p, regs))
- /* handler has already set things up, so skip ss setup */
+ if (p->pre_handler && p->pre_handler(p, regs)) {
+ /* handler changed execution path, so skip ss setup */
+ reset_current_kprobe();
+ preempt_enable();
return 1;
+ }
-ss_probe:
if (p->ainsn.boostable >= 0) {
- unsigned int insn = *p->ainsn.insn;
+ ret = try_to_emulate(p, regs);
- /* regs->nip is also adjusted if emulate_step returns 1 */
- ret = emulate_step(regs, insn);
if (ret > 0) {
- /*
- * Once this instruction has been boosted
- * successfully, set the boostable flag
- */
- if (unlikely(p->ainsn.boostable == 0))
- p->ainsn.boostable = 1;
-
if (p->post_handler)
p->post_handler(p, regs, 0);
kcb->kprobe_status = KPROBE_HIT_SSDONE;
reset_current_kprobe();
- preempt_enable_no_resched();
+ preempt_enable();
return 1;
- } else if (ret < 0) {
- /*
- * We don't allow kprobes on mtmsr(d)/rfi(d), etc.
- * So, we should never get here... but, its still
- * good to catch them, just in case...
- */
- printk("Can't step on instruction %x\n", insn);
- BUG();
- } else if (ret == 0)
- /* This instruction can't be boosted */
- p->ainsn.boostable = -1;
+ }
}
prepare_singlestep(p, regs);
kcb->kprobe_status = KPROBE_HIT_SS;
return 1;
no_kprobe:
- preempt_enable_no_resched();
+ preempt_enable();
return ret;
}
-
-/*
- * Function return probe trampoline:
- * - init_kprobes() establishes a probepoint here
- * - When the probed function returns, this probe
- * causes the handlers to fire
- */
-static void __used kretprobe_trampoline_holder(void)
-{
- asm volatile(".global kretprobe_trampoline\n"
- "kretprobe_trampoline:\n"
- "nop\n");
-}
-
-/*
- * Called when the probe at kretprobe trampoline is hit
- */
-static int __kprobes trampoline_probe_handler(struct kprobe *p,
- struct pt_regs *regs)
-{
- struct kretprobe_instance *ri = NULL;
- struct hlist_head *head, empty_rp;
- struct hlist_node *tmp;
- unsigned long flags, orig_ret_address = 0;
- unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
-
- INIT_HLIST_HEAD(&empty_rp);
- kretprobe_hash_lock(current, &head, &flags);
-
- /*
- * It is possible to have multiple instances associated with a given
- * task either because an multiple functions in the call path
- * have a return probe installed on them, and/or more than one return
- * return probe was registered for a target function.
- *
- * We can handle this because:
- * - instances are always inserted at the head of the list
- * - when multiple return probes are registered for the same
- * function, the first instance's ret_addr will point to the
- * real return address, and all the rest will point to
- * kretprobe_trampoline
- */
- hlist_for_each_entry_safe(ri, tmp, head, hlist) {
- if (ri->task != current)
- /* another task is sharing our hash bucket */
- continue;
-
- if (ri->rp && ri->rp->handler)
- ri->rp->handler(ri, regs);
-
- orig_ret_address = (unsigned long)ri->ret_addr;
- recycle_rp_inst(ri, &empty_rp);
-
- if (orig_ret_address != trampoline_address)
- /*
- * This is the real return address. Any other
- * instances associated with this task are for
- * other calls deeper on the call stack
- */
- break;
- }
-
- kretprobe_assert(ri, orig_ret_address, trampoline_address);
- regs->nip = orig_ret_address;
-
- reset_current_kprobe();
- kretprobe_hash_unlock(current, &flags);
- preempt_enable_no_resched();
-
- hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
- hlist_del(&ri->hlist);
- kfree(ri);
- }
- /*
- * By returning a non-zero value, we are telling
- * kprobe_handler() that we don't want the post_handler
- * to run (and have re-enabled preemption)
- */
- return 1;
-}
+NOKPROBE_SYMBOL(kprobe_handler);
/*
* Called after single-stepping. p->addr is the address of the
@@ -360,16 +390,18 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
* single-stepped a copy of the instruction. The address of this
* copy is p->ainsn.insn.
*/
-static int __kprobes post_kprobe_handler(struct pt_regs *regs)
+int kprobe_post_handler(struct pt_regs *regs)
{
+ int len;
struct kprobe *cur = kprobe_running();
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
- if (!cur)
+ if (!cur || user_mode(regs))
return 0;
+ len = ppc_inst_len(ppc_inst_read(cur->ainsn.insn));
/* make sure we got here for instruction we have a kprobe on */
- if (((unsigned long)cur->ainsn.insn + 4) != regs->nip)
+ if (((unsigned long)cur->ainsn.insn + len) != regs->nip)
return 0;
if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
@@ -378,8 +410,8 @@ static int __kprobes post_kprobe_handler(struct pt_regs *regs)
}
/* Adjust nip to after the single-stepped instruction */
- regs->nip = (unsigned long)cur->addr + 4;
- regs->msr |= kcb->kprobe_saved_msr;
+ regs_set_return_ip(regs, (unsigned long)cur->addr + len);
+ regs_set_return_msr(regs, regs->msr | kcb->kprobe_saved_msr);
/*Restore back the original saved kprobes variables and continue. */
if (kcb->kprobe_status == KPROBE_REENTER) {
@@ -388,7 +420,7 @@ static int __kprobes post_kprobe_handler(struct pt_regs *regs)
}
reset_current_kprobe();
out:
- preempt_enable_no_resched();
+ preempt_enable();
/*
* if somebody else is singlestepping across a probe point, msr
@@ -400,8 +432,9 @@ out:
return 1;
}
+NOKPROBE_SYMBOL(kprobe_post_handler);
-int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
{
struct kprobe *cur = kprobe_running();
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
@@ -417,40 +450,25 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
* and allow the page fault handler to continue as a
* normal page fault.
*/
- regs->nip = (unsigned long)cur->addr;
- regs->msr &= ~MSR_SINGLESTEP; /* Turn off 'trace' bits */
- regs->msr |= kcb->kprobe_saved_msr;
+ regs_set_return_ip(regs, (unsigned long)cur->addr);
+ /* Turn off 'trace' bits */
+ regs_set_return_msr(regs,
+ (regs->msr & ~MSR_SINGLESTEP) |
+ kcb->kprobe_saved_msr);
if (kcb->kprobe_status == KPROBE_REENTER)
restore_previous_kprobe(kcb);
else
reset_current_kprobe();
- preempt_enable_no_resched();
+ preempt_enable();
break;
case KPROBE_HIT_ACTIVE:
case KPROBE_HIT_SSDONE:
/*
- * We increment the nmissed count for accounting,
- * we can also use npre/npostfault count for accounting
- * these specific fault cases.
- */
- kprobes_inc_nmissed_count(cur);
-
- /*
- * We come here because instructions in the pre/post
- * handler caused the page_fault, this could happen
- * if handler tries to access user space by
- * copy_from_user(), get_user() etc. Let the
- * user-specified handler try to fix it first.
- */
- if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
- return 1;
-
- /*
* In case the user-specified fault handler returned
* zero, try to fix up.
*/
if ((entry = search_exception_tables(regs->nip)) != NULL) {
- regs->nip = entry->fixup;
+ regs_set_return_ip(regs, extable_fixup(entry));
return 1;
}
@@ -464,96 +482,13 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
}
return 0;
}
+NOKPROBE_SYMBOL(kprobe_fault_handler);
-/*
- * Wrapper routine to for handling exceptions.
- */
-int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
- unsigned long val, void *data)
-{
- struct die_args *args = (struct die_args *)data;
- int ret = NOTIFY_DONE;
-
- if (args->regs && user_mode(args->regs))
- return ret;
-
- switch (val) {
- case DIE_BPT:
- if (kprobe_handler(args->regs))
- ret = NOTIFY_STOP;
- break;
- case DIE_SSTEP:
- if (post_kprobe_handler(args->regs))
- ret = NOTIFY_STOP;
- break;
- default:
- break;
- }
- return ret;
-}
-
-unsigned long arch_deref_entry_point(void *entry)
-{
- return ppc_global_function_entry(entry);
-}
-
-int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
-{
- struct jprobe *jp = container_of(p, struct jprobe, kp);
- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-
- memcpy(&kcb->jprobe_saved_regs, regs, sizeof(struct pt_regs));
-
- /* setup return addr to the jprobe handler routine */
- regs->nip = arch_deref_entry_point(jp->entry);
-#ifdef CONFIG_PPC64
-#if defined(_CALL_ELF) && _CALL_ELF == 2
- regs->gpr[12] = (unsigned long)jp->entry;
-#else
- regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc);
-#endif
-#endif
-
- return 1;
-}
-
-void __used __kprobes jprobe_return(void)
-{
- asm volatile("trap" ::: "memory");
-}
-
-static void __used __kprobes jprobe_return_end(void)
-{
-};
-
-int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
-{
- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-
- /*
- * FIXME - we should ideally be validating that we got here 'cos
- * of the "trap" in jprobe_return() above, before restoring the
- * saved regs...
- */
- memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs));
- preempt_enable_no_resched();
- return 1;
-}
-
-static struct kprobe trampoline_p = {
- .addr = (kprobe_opcode_t *) &kretprobe_trampoline,
- .pre_handler = trampoline_probe_handler
-};
-
-int __init arch_init_kprobes(void)
-{
- return register_kprobe(&trampoline_p);
-}
-
-int __kprobes arch_trampoline_kprobe(struct kprobe *p)
+int arch_trampoline_kprobe(struct kprobe *p)
{
- if (p->addr == (kprobe_opcode_t *)&kretprobe_trampoline)
+ if (p->addr == (kprobe_opcode_t *)&arch_rethook_trampoline)
return 1;
return 0;
}
+NOKPROBE_SYMBOL(arch_trampoline_kprobe);
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index 33aa4ddf597d..7209d00a9c25 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -1,30 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2010 SUSE Linux Products GmbH. All rights reserved.
* Copyright 2010-2011 Freescale Semiconductor, Inc.
*
* Authors:
* Alexander Graf <agraf@suse.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include <linux/kvm_host.h>
#include <linux/init.h>
#include <linux/export.h>
+#include <linux/kmemleak.h>
#include <linux/kvm_para.h>
#include <linux/slab.h>
#include <linux/of.h>
+#include <linux/pagemap.h>
#include <asm/reg.h>
#include <asm/sections.h>
@@ -74,16 +64,17 @@
#define KVM_INST_MTSRIN 0x7c0001e4
static bool kvm_patching_worked = true;
-char kvm_tmp[1024 * 1024];
+extern char kvm_tmp[];
+extern char kvm_tmp_end[];
static int kvm_tmp_index;
-static inline void kvm_patch_ins(u32 *inst, u32 new_inst)
+static void __init kvm_patch_ins(u32 *inst, u32 new_inst)
{
*inst = new_inst;
flush_icache_range((ulong)inst, (ulong)inst + 4);
}
-static void kvm_patch_ins_ll(u32 *inst, long addr, u32 rt)
+static void __init kvm_patch_ins_ll(u32 *inst, long addr, u32 rt)
{
#ifdef CONFIG_64BIT
kvm_patch_ins(inst, KVM_INST_LD | rt | (addr & 0x0000fffc));
@@ -92,7 +83,7 @@ static void kvm_patch_ins_ll(u32 *inst, long addr, u32 rt)
#endif
}
-static void kvm_patch_ins_ld(u32 *inst, long addr, u32 rt)
+static void __init kvm_patch_ins_ld(u32 *inst, long addr, u32 rt)
{
#ifdef CONFIG_64BIT
kvm_patch_ins(inst, KVM_INST_LD | rt | (addr & 0x0000fffc));
@@ -101,12 +92,12 @@ static void kvm_patch_ins_ld(u32 *inst, long addr, u32 rt)
#endif
}
-static void kvm_patch_ins_lwz(u32 *inst, long addr, u32 rt)
+static void __init kvm_patch_ins_lwz(u32 *inst, long addr, u32 rt)
{
kvm_patch_ins(inst, KVM_INST_LWZ | rt | (addr & 0x0000ffff));
}
-static void kvm_patch_ins_std(u32 *inst, long addr, u32 rt)
+static void __init kvm_patch_ins_std(u32 *inst, long addr, u32 rt)
{
#ifdef CONFIG_64BIT
kvm_patch_ins(inst, KVM_INST_STD | rt | (addr & 0x0000fffc));
@@ -115,17 +106,17 @@ static void kvm_patch_ins_std(u32 *inst, long addr, u32 rt)
#endif
}
-static void kvm_patch_ins_stw(u32 *inst, long addr, u32 rt)
+static void __init kvm_patch_ins_stw(u32 *inst, long addr, u32 rt)
{
kvm_patch_ins(inst, KVM_INST_STW | rt | (addr & 0x0000fffc));
}
-static void kvm_patch_ins_nop(u32 *inst)
+static void __init kvm_patch_ins_nop(u32 *inst)
{
kvm_patch_ins(inst, KVM_INST_NOP);
}
-static void kvm_patch_ins_b(u32 *inst, int addr)
+static void __init kvm_patch_ins_b(u32 *inst, int addr)
{
#if defined(CONFIG_RELOCATABLE) && defined(CONFIG_PPC_BOOK3S)
/* On relocatable kernels interrupts handlers and our code
@@ -138,11 +129,11 @@ static void kvm_patch_ins_b(u32 *inst, int addr)
kvm_patch_ins(inst, KVM_INST_B | (addr & KVM_INST_B_MASK));
}
-static u32 *kvm_alloc(int len)
+static u32 * __init kvm_alloc(int len)
{
u32 *p;
- if ((kvm_tmp_index + len) > ARRAY_SIZE(kvm_tmp)) {
+ if ((kvm_tmp_index + len) > (kvm_tmp_end - kvm_tmp)) {
printk(KERN_ERR "KVM: No more space (%d + %d)\n",
kvm_tmp_index, len);
kvm_patching_worked = false;
@@ -161,7 +152,7 @@ extern u32 kvm_emulate_mtmsrd_orig_ins_offs;
extern u32 kvm_emulate_mtmsrd_len;
extern u32 kvm_emulate_mtmsrd[];
-static void kvm_patch_ins_mtmsrd(u32 *inst, u32 rt)
+static void __init kvm_patch_ins_mtmsrd(u32 *inst, u32 rt)
{
u32 *p;
int distance_start;
@@ -214,7 +205,7 @@ extern u32 kvm_emulate_mtmsr_orig_ins_offs;
extern u32 kvm_emulate_mtmsr_len;
extern u32 kvm_emulate_mtmsr[];
-static void kvm_patch_ins_mtmsr(u32 *inst, u32 rt)
+static void __init kvm_patch_ins_mtmsr(u32 *inst, u32 rt)
{
u32 *p;
int distance_start;
@@ -275,7 +266,7 @@ extern u32 kvm_emulate_wrtee_orig_ins_offs;
extern u32 kvm_emulate_wrtee_len;
extern u32 kvm_emulate_wrtee[];
-static void kvm_patch_ins_wrtee(u32 *inst, u32 rt, int imm_one)
+static void __init kvm_patch_ins_wrtee(u32 *inst, u32 rt, int imm_one)
{
u32 *p;
int distance_start;
@@ -332,7 +323,7 @@ extern u32 kvm_emulate_wrteei_0_branch_offs;
extern u32 kvm_emulate_wrteei_0_len;
extern u32 kvm_emulate_wrteei_0[];
-static void kvm_patch_ins_wrteei_0(u32 *inst)
+static void __init kvm_patch_ins_wrteei_0(u32 *inst)
{
u32 *p;
int distance_start;
@@ -373,7 +364,7 @@ extern u32 kvm_emulate_mtsrin_orig_ins_offs;
extern u32 kvm_emulate_mtsrin_len;
extern u32 kvm_emulate_mtsrin[];
-static void kvm_patch_ins_mtsrin(u32 *inst, u32 rt, u32 rb)
+static void __init kvm_patch_ins_mtsrin(u32 *inst, u32 rt, u32 rb)
{
u32 *p;
int distance_start;
@@ -409,7 +400,7 @@ static void kvm_patch_ins_mtsrin(u32 *inst, u32 rt, u32 rb)
#endif
-static void kvm_map_magic_page(void *data)
+static void __init kvm_map_magic_page(void *data)
{
u32 *features = data;
@@ -424,7 +415,7 @@ static void kvm_map_magic_page(void *data)
*features = out[0];
}
-static void kvm_check_ins(u32 *inst, u32 features)
+static void __init kvm_check_ins(u32 *inst, u32 features)
{
u32 _inst = *inst;
u32 inst_no_rt = _inst & ~KVM_MASK_RT;
@@ -464,7 +455,7 @@ static void kvm_check_ins(u32 *inst, u32 features)
kvm_patch_ins_lwz(inst, magic_var(dsisr), inst_rt);
break;
-#ifdef CONFIG_PPC_BOOK3E_MMU
+#ifdef CONFIG_PPC_E500
case KVM_INST_MFSPR(SPRN_MAS0):
if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
kvm_patch_ins_lwz(inst, magic_var(mas0), inst_rt);
@@ -493,7 +484,7 @@ static void kvm_check_ins(u32 *inst, u32 features)
if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
kvm_patch_ins_lwz(inst, magic_var(mas7_3), inst_rt);
break;
-#endif /* CONFIG_PPC_BOOK3E_MMU */
+#endif /* CONFIG_PPC_E500 */
case KVM_INST_MFSPR(SPRN_SPRG4):
#ifdef CONFIG_BOOKE
@@ -566,7 +557,7 @@ static void kvm_check_ins(u32 *inst, u32 features)
case KVM_INST_MTSPR(SPRN_DSISR):
kvm_patch_ins_stw(inst, magic_var(dsisr), inst_rt);
break;
-#ifdef CONFIG_PPC_BOOK3E_MMU
+#ifdef CONFIG_PPC_E500
case KVM_INST_MTSPR(SPRN_MAS0):
if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
kvm_patch_ins_stw(inst, magic_var(mas0), inst_rt);
@@ -595,7 +586,7 @@ static void kvm_check_ins(u32 *inst, u32 features)
if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
kvm_patch_ins_stw(inst, magic_var(mas7_3), inst_rt);
break;
-#endif /* CONFIG_PPC_BOOK3E_MMU */
+#endif /* CONFIG_PPC_E500 */
case KVM_INST_MTSPR(SPRN_SPRG4):
if (features & KVM_MAGIC_FEAT_MAS0_TO_SPRG7)
@@ -641,20 +632,19 @@ static void kvm_check_ins(u32 *inst, u32 features)
#endif
}
- switch (inst_no_rt & ~KVM_MASK_RB) {
#ifdef CONFIG_PPC_BOOK3S_32
+ switch (inst_no_rt & ~KVM_MASK_RB) {
case KVM_INST_MTSRIN:
if (features & KVM_MAGIC_FEAT_SR) {
u32 inst_rb = _inst & KVM_MASK_RB;
kvm_patch_ins_mtsrin(inst, inst_rt, inst_rb);
}
break;
- break;
-#endif
}
+#endif
- switch (_inst) {
#ifdef CONFIG_BOOKE
+ switch (_inst) {
case KVM_INST_WRTEEI_0:
kvm_patch_ins_wrteei_0(inst);
break;
@@ -662,25 +652,25 @@ static void kvm_check_ins(u32 *inst, u32 features)
case KVM_INST_WRTEEI_1:
kvm_patch_ins_wrtee(inst, 0, 1);
break;
-#endif
}
+#endif
}
extern u32 kvm_template_start[];
extern u32 kvm_template_end[];
-static void kvm_use_magic_page(void)
+static void __init kvm_use_magic_page(void)
{
u32 *p;
u32 *start, *end;
- u32 tmp;
u32 features;
/* Tell the host to map the magic page to -4096 on all CPUs */
on_each_cpu(kvm_map_magic_page, &features, 1);
/* Quick self-test to see if the mapping works */
- if (__get_user(tmp, (u32*)KVM_MAGIC_PAGE)) {
+ if (fault_in_readable((const char __user *)KVM_MAGIC_PAGE,
+ sizeof(u32))) {
kvm_patching_worked = false;
return;
}
@@ -711,19 +701,13 @@ static void kvm_use_magic_page(void)
kvm_patching_worked ? "worked" : "failed");
}
-static __init void kvm_free_tmp(void)
-{
- free_reserved_area(&kvm_tmp[kvm_tmp_index],
- &kvm_tmp[ARRAY_SIZE(kvm_tmp)], -1, NULL);
-}
-
static int __init kvm_guest_init(void)
{
if (!kvm_para_available())
- goto free_tmp;
+ return 0;
if (!epapr_paravirt_enabled)
- goto free_tmp;
+ return 0;
if (kvm_para_has_feature(KVM_FEATURE_MAGIC_PAGE))
kvm_use_magic_page();
@@ -733,9 +717,6 @@ static int __init kvm_guest_init(void)
powersave_nap = 1;
#endif
-free_tmp:
- kvm_free_tmp();
-
return 0;
}
diff --git a/arch/powerpc/kernel/kvm_emul.S b/arch/powerpc/kernel/kvm_emul.S
index e100ff324a85..7af6f8b50c5d 100644
--- a/arch/powerpc/kernel/kvm_emul.S
+++ b/arch/powerpc/kernel/kvm_emul.S
@@ -1,16 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright SUSE Linux Products GmbH 2010
* Copyright 2010-2011 Freescale Semiconductor, Inc.
@@ -23,6 +12,7 @@
#include <asm/reg.h>
#include <asm/page.h>
#include <asm/asm-offsets.h>
+#include <asm/asm-compat.h>
#define KVM_MAGIC_PAGE (-4096)
@@ -202,6 +192,8 @@ kvm_emulate_mtmsr_orig_ins_offs:
kvm_emulate_mtmsr_len:
.long (kvm_emulate_mtmsr_end - kvm_emulate_mtmsr) / 4
+#ifdef CONFIG_BOOKE
+
/* also used for wrteei 1 */
.global kvm_emulate_wrtee
kvm_emulate_wrtee:
@@ -295,6 +287,10 @@ kvm_emulate_wrteei_0_branch_offs:
kvm_emulate_wrteei_0_len:
.long (kvm_emulate_wrteei_0_end - kvm_emulate_wrteei_0) / 4
+#endif /* CONFIG_BOOKE */
+
+#ifdef CONFIG_PPC_BOOK3S_32
+
.global kvm_emulate_mtsrin
kvm_emulate_mtsrin:
@@ -344,5 +340,15 @@ kvm_emulate_mtsrin_orig_ins_offs:
kvm_emulate_mtsrin_len:
.long (kvm_emulate_mtsrin_end - kvm_emulate_mtsrin) / 4
+#endif /* CONFIG_PPC_BOOK3S_32 */
+
+ .balign 4
+ .global kvm_tmp
+kvm_tmp:
+ .space (64 * 1024)
+
+.global kvm_tmp_end
+kvm_tmp_end:
+
.global kvm_template_end
kvm_template_end:
diff --git a/arch/powerpc/kernel/l2cr_6xx.S b/arch/powerpc/kernel/l2cr_6xx.S
index 97ec8557f974..f2e03ed423d0 100644
--- a/arch/powerpc/kernel/l2cr_6xx.S
+++ b/arch/powerpc/kernel/l2cr_6xx.S
@@ -1,20 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
L2CR functions
Copyright © 1997-1998 by PowerLogix R & D, Inc.
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
Thur, Dec. 12, 1998.
@@ -45,6 +33,7 @@
#include <asm/ppc_asm.h>
#include <asm/cache.h>
#include <asm/page.h>
+#include <asm/feature-fixups.h>
/* Usage:
@@ -107,7 +96,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_L2CR)
/* Stop DST streams */
BEGIN_FTR_SECTION
- DSSALL
+ PPC_DSSALL
sync
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
@@ -181,7 +170,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450)
mtctr r4
li r4,0
1:
- lwzx r0,r0,r4
+ lwzx r0,0,r4
addi r4,r4,32 /* Go to start of next cache line */
bdnz 1b
isync
@@ -267,7 +256,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450)
sync
/* Restore MSR (restores EE and DR bits to original state) */
- SYNC
mtmsr r7
isync
@@ -304,7 +292,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_L3CR)
isync
/* Stop DST streams */
- DSSALL
+ PPC_DSSALL
sync
/* Get the current enable bit of the L3CR into r4 */
@@ -328,7 +316,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_L3CR)
mtctr r4
li r4,0
1:
- lwzx r0,r0,r4
+ lwzx r0,0,r4
dcbf 0,r4
addi r4,r4,32 /* Go to start of next cache line */
bdnz 1b
@@ -388,7 +376,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_L3CR)
1: bdnz 1b
/* Restore MSR (restores EE and DR bits to original state) */
-4: SYNC
+4:
mtmsr r7
isync
blr
@@ -413,7 +401,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_L3CR)
_GLOBAL(__flush_disable_L1)
/* Stop pending alitvec streams and memory accesses */
BEGIN_FTR_SECTION
- DSSALL
+ PPC_DSSALL
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
sync
@@ -466,5 +454,6 @@ _GLOBAL(__inval_enable_L1)
sync
blr
+_ASM_NOKPROBE_SYMBOL(__inval_enable_L1)
diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index 936258881c98..ae1906bfe8a5 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -1,19 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/kernel.h>
#include <linux/serial.h>
#include <linux/serial_8250.h>
#include <linux/serial_core.h>
#include <linux/console.h>
#include <linux/pci.h>
+#include <linux/of.h>
#include <linux/of_address.h>
-#include <linux/of_device.h>
+#include <linux/of_irq.h>
#include <linux/serial_reg.h>
#include <asm/io.h>
#include <asm/mmu.h>
-#include <asm/prom.h>
#include <asm/serial.h>
#include <asm/udbg.h>
#include <asm/pci-bridge.h>
#include <asm/ppc-pci.h>
+#include <asm/early_ioremap.h>
#undef DEBUG
@@ -33,9 +35,10 @@ static struct legacy_serial_info {
unsigned int clock;
int irq_check_parent;
phys_addr_t taddr;
+ void __iomem *early_addr;
} legacy_serial_infos[MAX_LEGACY_SERIAL_PORTS];
-static struct of_device_id legacy_serial_parents[] __initdata = {
+static const struct of_device_id legacy_serial_parents[] __initconst = {
{.type = "soc",},
{.type = "tsi-bridge",},
{.type = "opb", },
@@ -51,9 +54,10 @@ static int legacy_serial_console = -1;
static const upf_t legacy_port_flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST |
UPF_SHARE_IRQ | UPF_FIXED_PORT;
-static unsigned int tsi_serial_in(struct uart_port *p, int offset)
+static u32 tsi_serial_in(struct uart_port *p, unsigned int offset)
{
- unsigned int tmp;
+ u32 tmp;
+
offset = offset << p->regshift;
if (offset == UART_IIR) {
tmp = readl(p->membase + (UART_IIR & ~3));
@@ -62,7 +66,7 @@ static unsigned int tsi_serial_in(struct uart_port *p, int offset)
return readb(p->membase + offset);
}
-static void tsi_serial_out(struct uart_port *p, int offset, int value)
+static void tsi_serial_out(struct uart_port *p, unsigned int offset, u32 value)
{
offset = offset << p->regshift;
if (!((offset == UART_IER) && (value & UART_IER_UUE)))
@@ -74,6 +78,8 @@ static int __init add_legacy_port(struct device_node *np, int want_index,
phys_addr_t taddr, unsigned long irq,
upf_t flags, int irq_check_parent)
{
+ struct plat_serial8250_port *legacy_port;
+ struct legacy_serial_info *legacy_info;
const __be32 *clk, *spd, *rs;
u32 clock = BASE_BAUD * 16;
u32 shift = 0;
@@ -107,16 +113,17 @@ static int __init add_legacy_port(struct device_node *np, int want_index,
if (index >= legacy_serial_count)
legacy_serial_count = index + 1;
+ legacy_port = &legacy_serial_ports[index];
+ legacy_info = &legacy_serial_infos[index];
+
/* Check if there is a port who already claimed our slot */
- if (legacy_serial_infos[index].np != NULL) {
+ if (legacy_info->np != NULL) {
/* if we still have some room, move it, else override */
if (legacy_serial_count < MAX_LEGACY_SERIAL_PORTS) {
printk(KERN_DEBUG "Moved legacy port %d -> %d\n",
index, legacy_serial_count);
- legacy_serial_ports[legacy_serial_count] =
- legacy_serial_ports[index];
- legacy_serial_infos[legacy_serial_count] =
- legacy_serial_infos[index];
+ legacy_serial_ports[legacy_serial_count] = *legacy_port;
+ legacy_serial_infos[legacy_serial_count] = *legacy_info;
legacy_serial_count++;
} else {
printk(KERN_DEBUG "Replacing legacy port %d\n", index);
@@ -124,36 +131,32 @@ static int __init add_legacy_port(struct device_node *np, int want_index,
}
/* Now fill the entry */
- memset(&legacy_serial_ports[index], 0,
- sizeof(struct plat_serial8250_port));
+ memset(legacy_port, 0, sizeof(*legacy_port));
if (iotype == UPIO_PORT)
- legacy_serial_ports[index].iobase = base;
+ legacy_port->iobase = base;
else
- legacy_serial_ports[index].mapbase = base;
-
- legacy_serial_ports[index].iotype = iotype;
- legacy_serial_ports[index].uartclk = clock;
- legacy_serial_ports[index].irq = irq;
- legacy_serial_ports[index].flags = flags;
- legacy_serial_ports[index].regshift = shift;
- legacy_serial_infos[index].taddr = taddr;
- legacy_serial_infos[index].np = of_node_get(np);
- legacy_serial_infos[index].clock = clock;
- legacy_serial_infos[index].speed = spd ? be32_to_cpup(spd) : 0;
- legacy_serial_infos[index].irq_check_parent = irq_check_parent;
+ legacy_port->mapbase = base;
+
+ legacy_port->iotype = iotype;
+ legacy_port->uartclk = clock;
+ legacy_port->irq = irq;
+ legacy_port->flags = flags;
+ legacy_port->regshift = shift;
+ legacy_info->taddr = taddr;
+ legacy_info->np = of_node_get(np);
+ legacy_info->clock = clock;
+ legacy_info->speed = spd ? be32_to_cpup(spd) : 0;
+ legacy_info->irq_check_parent = irq_check_parent;
if (iotype == UPIO_TSI) {
- legacy_serial_ports[index].serial_in = tsi_serial_in;
- legacy_serial_ports[index].serial_out = tsi_serial_out;
+ legacy_port->serial_in = tsi_serial_in;
+ legacy_port->serial_out = tsi_serial_out;
}
- printk(KERN_DEBUG "Found legacy serial port %d for %s\n",
- index, np->full_name);
- printk(KERN_DEBUG " %s=%llx, taddr=%llx, irq=%lx, clk=%d, speed=%d\n",
+ printk(KERN_DEBUG "Found legacy serial port %d for %pOF\n", index, np);
+ printk(KERN_DEBUG " %s=%pa, taddr=%pa, irq=%lx, clk=%d, speed=%d\n",
(iotype == UPIO_PORT) ? "port" : "mem",
- (unsigned long long)base, (unsigned long long)taddr, irq,
- legacy_serial_ports[index].uartclk,
- legacy_serial_infos[index].speed);
+ &base, &taddr, irq, legacy_port->uartclk, legacy_info->speed);
return index;
}
@@ -168,15 +171,15 @@ static int __init add_legacy_soc_port(struct device_node *np,
/* We only support ports that have a clock frequency properly
* encoded in the device-tree.
*/
- if (of_get_property(np, "clock-frequency", NULL) == NULL)
+ if (!of_property_present(np, "clock-frequency"))
return -1;
/* if reg-offset don't try to use it */
- if ((of_get_property(np, "reg-offset", NULL) != NULL))
+ if (of_property_present(np, "reg-offset"))
return -1;
/* if rtas uses this device, don't try to use it as well */
- if (of_get_property(np, "used-by-rtas", NULL) != NULL)
+ if (of_property_read_bool(np, "used-by-rtas"))
return -1;
/* Get the address */
@@ -191,12 +194,12 @@ static int __init add_legacy_soc_port(struct device_node *np,
/* Add port, irq will be dealt with later. We passed a translated
* IO port value. It will be fixed up later along with the irq
*/
- if (tsi && !strcmp(tsi->type, "tsi-bridge"))
+ if (of_node_is_type(tsi, "tsi-bridge"))
return add_legacy_port(np, -1, UPIO_TSI, addr, addr,
- NO_IRQ, legacy_port_flags, 0);
+ 0, legacy_port_flags, 0);
else
return add_legacy_port(np, -1, UPIO_MEM, addr, addr,
- NO_IRQ, legacy_port_flags, 0);
+ 0, legacy_port_flags, 0);
}
static int __init add_legacy_isa_port(struct device_node *np,
@@ -207,7 +210,7 @@ static int __init add_legacy_isa_port(struct device_node *np,
int index = -1;
u64 taddr;
- DBG(" -> add_legacy_isa_port(%s)\n", np->full_name);
+ DBG(" -> add_legacy_isa_port(%pOF)\n", np);
/* Get the ISA port number */
reg = of_get_property(np, "reg", NULL);
@@ -233,7 +236,8 @@ static int __init add_legacy_isa_port(struct device_node *np,
*
* Note: Don't even try on P8 lpc, we know it's not directly mapped
*/
- if (!of_device_is_compatible(isa_brg, "ibm,power8-lpc")) {
+ if (!of_device_is_compatible(isa_brg, "ibm,power8-lpc") ||
+ of_property_present(isa_brg, "ranges")) {
taddr = of_translate_address(np, reg);
if (taddr == OF_BAD_ADDR)
taddr = 0;
@@ -242,7 +246,7 @@ static int __init add_legacy_isa_port(struct device_node *np,
/* Add port, irq will be dealt with later */
return add_legacy_port(np, index, UPIO_PORT, be32_to_cpu(reg[1]),
- taddr, NO_IRQ, legacy_port_flags, 0);
+ taddr, 0, legacy_port_flags, 0);
}
@@ -255,7 +259,7 @@ static int __init add_legacy_pci_port(struct device_node *np,
unsigned int flags;
int iotype, index = -1, lindex = 0;
- DBG(" -> add_legacy_pci_port(%s)\n", np->full_name);
+ DBG(" -> add_legacy_pci_port(%pOF)\n", np);
/* We only support ports that have a clock frequency properly
* encoded in the device-tree (that is have an fcode). Anything
@@ -264,7 +268,7 @@ static int __init add_legacy_pci_port(struct device_node *np,
* compatible UARTs on PCI need all sort of quirks (port offsets
* etc...) that this code doesn't know about
*/
- if (of_get_property(np, "clock-frequency", NULL) == NULL)
+ if (!of_property_present(np, "clock-frequency"))
return -1;
/* Get the PCI address. Assume BAR 0 */
@@ -314,7 +318,7 @@ static int __init add_legacy_pci_port(struct device_node *np,
/* Add port, irq will be dealt with later. We passed a translated
* IO port value. It will be fixed up later along with the irq
*/
- return add_legacy_port(np, index, iotype, base, addr, NO_IRQ,
+ return add_legacy_port(np, index, iotype, base, addr, 0,
legacy_port_flags, np != pci_dev);
}
#endif
@@ -323,17 +327,16 @@ static void __init setup_legacy_serial_console(int console)
{
struct legacy_serial_info *info = &legacy_serial_infos[console];
struct plat_serial8250_port *port = &legacy_serial_ports[console];
- void __iomem *addr;
unsigned int stride;
stride = 1 << port->regshift;
/* Check if a translated MMIO address has been found */
if (info->taddr) {
- addr = ioremap(info->taddr, 0x1000);
- if (addr == NULL)
+ info->early_addr = early_ioremap(info->taddr, 0x1000);
+ if (info->early_addr == NULL)
return;
- udbg_uart_init_mmio(addr, stride);
+ udbg_uart_init_mmio(info->early_addr, stride);
} else {
/* Check if it's PIO and we support untranslated PIO */
if (port->iotype == UPIO_PORT && isa_io_special)
@@ -351,6 +354,33 @@ static void __init setup_legacy_serial_console(int console)
udbg_uart_setup(info->speed, info->clock);
}
+static int __init ioremap_legacy_serial_console(void)
+{
+ struct plat_serial8250_port *port;
+ struct legacy_serial_info *info;
+ void __iomem *vaddr;
+
+ if (legacy_serial_console < 0)
+ return 0;
+
+ info = &legacy_serial_infos[legacy_serial_console];
+ port = &legacy_serial_ports[legacy_serial_console];
+
+ if (!info->early_addr)
+ return 0;
+
+ vaddr = ioremap(info->taddr, 0x1000);
+ if (WARN_ON(!vaddr))
+ return -ENOMEM;
+
+ udbg_uart_init_mmio(vaddr, 1 << port->regshift);
+ early_iounmap(info->early_addr, 0x1000);
+ info->early_addr = NULL;
+
+ return 0;
+}
+early_initcall(ioremap_legacy_serial_console);
+
/*
* This is called very early, as part of setup_system() or eventually
* setup_arch(), basically before anything else in this file. This function
@@ -370,10 +400,12 @@ void __init find_legacy_serial_ports(void)
/* Now find out if one of these is out firmware console */
path = of_get_property(of_chosen, "linux,stdout-path", NULL);
+ if (path == NULL)
+ path = of_get_property(of_chosen, "stdout-path", NULL);
if (path != NULL) {
stdout = of_find_node_by_path(path);
if (stdout)
- DBG("stdout is %s\n", stdout->full_name);
+ DBG("stdout is %pOF\n", stdout);
} else {
DBG(" no linux,stdout-path !\n");
}
@@ -396,8 +428,7 @@ void __init find_legacy_serial_ports(void)
/* Next, fill our array with ISA ports */
for_each_node_by_type(np, "serial") {
struct device_node *isa = of_get_parent(np);
- if (isa && (!strcmp(isa->name, "isa") ||
- !strcmp(isa->name, "lpc"))) {
+ if (of_node_name_eq(isa, "isa") || of_node_name_eq(isa, "lpc")) {
if (of_device_is_available(np)) {
index = add_legacy_isa_port(np, isa);
if (index >= 0 && np == stdout)
@@ -411,11 +442,12 @@ void __init find_legacy_serial_ports(void)
/* Next, try to locate PCI ports */
for (np = NULL; (np = of_find_all_nodes(np));) {
struct device_node *pci, *parent = of_get_parent(np);
- if (parent && !strcmp(parent->name, "isa")) {
+ if (of_node_name_eq(parent, "isa")) {
of_node_put(parent);
continue;
}
- if (strcmp(np->name, "serial") && strcmp(np->type, "serial")) {
+ if (!of_node_name_eq(np, "serial") &&
+ !of_node_is_type(np, "serial")) {
of_node_put(parent);
continue;
}
@@ -439,6 +471,8 @@ void __init find_legacy_serial_ports(void)
}
#endif
+ of_node_put(stdout);
+
DBG("legacy_serial_console = %d\n", legacy_serial_console);
if (legacy_serial_console >= 0)
setup_legacy_serial_console(legacy_serial_console);
@@ -462,22 +496,28 @@ static void __init fixup_port_irq(int index,
DBG("fixup_port_irq(%d)\n", index);
virq = irq_of_parse_and_map(np, 0);
- if (virq == NO_IRQ && legacy_serial_infos[index].irq_check_parent) {
+ if (!virq && legacy_serial_infos[index].irq_check_parent) {
np = of_get_parent(np);
if (np == NULL)
return;
virq = irq_of_parse_and_map(np, 0);
of_node_put(np);
}
- if (virq == NO_IRQ)
+ if (!virq)
return;
port->irq = virq;
-#ifdef CONFIG_SERIAL_8250_FSL
- if (of_device_is_compatible(np, "fsl,ns16550"))
- port->handle_irq = fsl8250_handle_irq;
-#endif
+ if (IS_ENABLED(CONFIG_SERIAL_8250) &&
+ of_device_is_compatible(np, "fsl,ns16550")) {
+ if (IS_REACHABLE(CONFIG_SERIAL_8250_FSL)) {
+ port->handle_irq = fsl8250_handle_irq;
+ port->has_sysrq = IS_ENABLED(CONFIG_SERIAL_8250_CONSOLE);
+ } else {
+ pr_warn_once("Not activating Freescale specific workaround for device %pOFP\n",
+ np);
+ }
+ }
}
static void __init fixup_port_pio(int index,
@@ -543,7 +583,7 @@ static int __init serial_dev_init(void)
struct plat_serial8250_port *port = &legacy_serial_ports[i];
struct device_node *np = legacy_serial_infos[i].np;
- if (port->irq == NO_IRQ)
+ if (!port->irq)
fixup_port_irq(i, np, port);
if (port->iotype == UPIO_PORT)
fixup_port_pio(i, np, port);
@@ -593,8 +633,10 @@ static int __init check_legacy_serial_console(void)
/* We are getting a weird phandle from OF ... */
/* ... So use the full path instead */
name = of_get_property(of_chosen, "linux,stdout-path", NULL);
+ if (name == NULL)
+ name = of_get_property(of_chosen, "stdout-path", NULL);
if (name == NULL) {
- DBG(" no linux,stdout-path !\n");
+ DBG(" no stdout-path !\n");
return -ENODEV;
}
prom_stdout = of_find_node_by_path(name);
@@ -602,7 +644,7 @@ static int __init check_legacy_serial_console(void)
DBG(" can't find stdout package %s !\n", name);
return -ENODEV;
}
- DBG("stdout is %s\n", prom_stdout->full_name);
+ DBG("stdout is %pOF\n", prom_stdout);
name = of_get_property(prom_stdout, "name", NULL);
if (!name) {
diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
deleted file mode 100644
index 015ae55c1868..000000000000
--- a/arch/powerpc/kernel/machine_kexec.c
+++ /dev/null
@@ -1,281 +0,0 @@
-/*
- * Code to handle transition of Linux booting another kernel.
- *
- * Copyright (C) 2002-2003 Eric Biederman <ebiederm@xmission.com>
- * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz
- * Copyright (C) 2005 IBM Corporation.
- *
- * This source code is licensed under the GNU General Public License,
- * Version 2. See the file COPYING for more details.
- */
-
-#include <linux/kexec.h>
-#include <linux/reboot.h>
-#include <linux/threads.h>
-#include <linux/memblock.h>
-#include <linux/of.h>
-#include <linux/irq.h>
-#include <linux/ftrace.h>
-
-#include <asm/machdep.h>
-#include <asm/pgalloc.h>
-#include <asm/prom.h>
-#include <asm/sections.h>
-
-void machine_kexec_mask_interrupts(void) {
- unsigned int i;
- struct irq_desc *desc;
-
- for_each_irq_desc(i, desc) {
- struct irq_chip *chip;
-
- chip = irq_desc_get_chip(desc);
- if (!chip)
- continue;
-
- if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data))
- chip->irq_eoi(&desc->irq_data);
-
- if (chip->irq_mask)
- chip->irq_mask(&desc->irq_data);
-
- if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
- chip->irq_disable(&desc->irq_data);
- }
-}
-
-void machine_crash_shutdown(struct pt_regs *regs)
-{
- default_machine_crash_shutdown(regs);
-}
-
-/*
- * Do what every setup is needed on image and the
- * reboot code buffer to allow us to avoid allocations
- * later.
- */
-int machine_kexec_prepare(struct kimage *image)
-{
- if (ppc_md.machine_kexec_prepare)
- return ppc_md.machine_kexec_prepare(image);
- else
- return default_machine_kexec_prepare(image);
-}
-
-void machine_kexec_cleanup(struct kimage *image)
-{
-}
-
-void arch_crash_save_vmcoreinfo(void)
-{
-
-#ifdef CONFIG_NEED_MULTIPLE_NODES
- VMCOREINFO_SYMBOL(node_data);
- VMCOREINFO_LENGTH(node_data, MAX_NUMNODES);
-#endif
-#ifndef CONFIG_NEED_MULTIPLE_NODES
- VMCOREINFO_SYMBOL(contig_page_data);
-#endif
-#if defined(CONFIG_PPC64) && defined(CONFIG_SPARSEMEM_VMEMMAP)
- VMCOREINFO_SYMBOL(vmemmap_list);
- VMCOREINFO_SYMBOL(mmu_vmemmap_psize);
- VMCOREINFO_SYMBOL(mmu_psize_defs);
- VMCOREINFO_STRUCT_SIZE(vmemmap_backing);
- VMCOREINFO_OFFSET(vmemmap_backing, list);
- VMCOREINFO_OFFSET(vmemmap_backing, phys);
- VMCOREINFO_OFFSET(vmemmap_backing, virt_addr);
- VMCOREINFO_STRUCT_SIZE(mmu_psize_def);
- VMCOREINFO_OFFSET(mmu_psize_def, shift);
-#endif
-}
-
-/*
- * Do not allocate memory (or fail in any way) in machine_kexec().
- * We are past the point of no return, committed to rebooting now.
- */
-void machine_kexec(struct kimage *image)
-{
- int save_ftrace_enabled;
-
- save_ftrace_enabled = __ftrace_enabled_save();
-
- if (ppc_md.machine_kexec)
- ppc_md.machine_kexec(image);
- else
- default_machine_kexec(image);
-
- __ftrace_enabled_restore(save_ftrace_enabled);
-
- /* Fall back to normal restart if we're still alive. */
- machine_restart(NULL);
- for(;;);
-}
-
-void __init reserve_crashkernel(void)
-{
- unsigned long long crash_size, crash_base;
- int ret;
-
- /* use common parsing */
- ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
- &crash_size, &crash_base);
- if (ret == 0 && crash_size > 0) {
- crashk_res.start = crash_base;
- crashk_res.end = crash_base + crash_size - 1;
- }
-
- if (crashk_res.end == crashk_res.start) {
- crashk_res.start = crashk_res.end = 0;
- return;
- }
-
- /* We might have got these values via the command line or the
- * device tree, either way sanitise them now. */
-
- crash_size = resource_size(&crashk_res);
-
-#ifndef CONFIG_NONSTATIC_KERNEL
- if (crashk_res.start != KDUMP_KERNELBASE)
- printk("Crash kernel location must be 0x%x\n",
- KDUMP_KERNELBASE);
-
- crashk_res.start = KDUMP_KERNELBASE;
-#else
- if (!crashk_res.start) {
-#ifdef CONFIG_PPC64
- /*
- * On 64bit we split the RMO in half but cap it at half of
- * a small SLB (128MB) since the crash kernel needs to place
- * itself and some stacks to be in the first segment.
- */
- crashk_res.start = min(0x8000000ULL, (ppc64_rma_size / 2));
-#else
- crashk_res.start = KDUMP_KERNELBASE;
-#endif
- }
-
- crash_base = PAGE_ALIGN(crashk_res.start);
- if (crash_base != crashk_res.start) {
- printk("Crash kernel base must be aligned to 0x%lx\n",
- PAGE_SIZE);
- crashk_res.start = crash_base;
- }
-
-#endif
- crash_size = PAGE_ALIGN(crash_size);
- crashk_res.end = crashk_res.start + crash_size - 1;
-
- /* The crash region must not overlap the current kernel */
- if (overlaps_crashkernel(__pa(_stext), _end - _stext)) {
- printk(KERN_WARNING
- "Crash kernel can not overlap current kernel\n");
- crashk_res.start = crashk_res.end = 0;
- return;
- }
-
- /* Crash kernel trumps memory limit */
- if (memory_limit && memory_limit <= crashk_res.end) {
- memory_limit = crashk_res.end + 1;
- printk("Adjusted memory limit for crashkernel, now 0x%llx\n",
- memory_limit);
- }
-
- printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
- "for crashkernel (System RAM: %ldMB)\n",
- (unsigned long)(crash_size >> 20),
- (unsigned long)(crashk_res.start >> 20),
- (unsigned long)(memblock_phys_mem_size() >> 20));
-
- memblock_reserve(crashk_res.start, crash_size);
-}
-
-int overlaps_crashkernel(unsigned long start, unsigned long size)
-{
- return (start + size) > crashk_res.start && start <= crashk_res.end;
-}
-
-/* Values we need to export to the second kernel via the device tree. */
-static phys_addr_t kernel_end;
-static phys_addr_t crashk_base;
-static phys_addr_t crashk_size;
-static unsigned long long mem_limit;
-
-static struct property kernel_end_prop = {
- .name = "linux,kernel-end",
- .length = sizeof(phys_addr_t),
- .value = &kernel_end,
-};
-
-static struct property crashk_base_prop = {
- .name = "linux,crashkernel-base",
- .length = sizeof(phys_addr_t),
- .value = &crashk_base
-};
-
-static struct property crashk_size_prop = {
- .name = "linux,crashkernel-size",
- .length = sizeof(phys_addr_t),
- .value = &crashk_size,
-};
-
-static struct property memory_limit_prop = {
- .name = "linux,memory-limit",
- .length = sizeof(unsigned long long),
- .value = &mem_limit,
-};
-
-#define cpu_to_be_ulong __PASTE(cpu_to_be, BITS_PER_LONG)
-
-static void __init export_crashk_values(struct device_node *node)
-{
- struct property *prop;
-
- /* There might be existing crash kernel properties, but we can't
- * be sure what's in them, so remove them. */
- prop = of_find_property(node, "linux,crashkernel-base", NULL);
- if (prop)
- of_remove_property(node, prop);
-
- prop = of_find_property(node, "linux,crashkernel-size", NULL);
- if (prop)
- of_remove_property(node, prop);
-
- if (crashk_res.start != 0) {
- crashk_base = cpu_to_be_ulong(crashk_res.start),
- of_add_property(node, &crashk_base_prop);
- crashk_size = cpu_to_be_ulong(resource_size(&crashk_res));
- of_add_property(node, &crashk_size_prop);
- }
-
- /*
- * memory_limit is required by the kexec-tools to limit the
- * crash regions to the actual memory used.
- */
- mem_limit = cpu_to_be_ulong(memory_limit);
- of_update_property(node, &memory_limit_prop);
-}
-
-static int __init kexec_setup(void)
-{
- struct device_node *node;
- struct property *prop;
-
- node = of_find_node_by_path("/chosen");
- if (!node)
- return -ENOENT;
-
- /* remove any stale properties so ours can be found */
- prop = of_find_property(node, kernel_end_prop.name, NULL);
- if (prop)
- of_remove_property(node, prop);
-
- /* information needed by userspace when using default_machine_kexec */
- kernel_end = cpu_to_be_ulong(__pa(_end));
- of_add_property(node, &kernel_end_prop);
-
- export_crashk_values(node);
-
- of_node_put(node);
- return 0;
-}
-late_initcall(kexec_setup);
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index a7fd4cb78b78..219f28637a3e 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -1,20 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Machine check exception handling.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
* Copyright 2013 IBM Corporation
* Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
*/
@@ -22,24 +9,42 @@
#undef DEBUG
#define pr_fmt(fmt) "mce: " fmt
+#include <linux/hardirq.h>
#include <linux/types.h>
#include <linux/ptrace.h>
#include <linux/percpu.h>
#include <linux/export.h>
#include <linux/irq_work.h>
+#include <linux/extable.h>
+#include <linux/ftrace.h>
+#include <linux/memblock.h>
+#include <linux/of.h>
+
+#include <asm/interrupt.h>
+#include <asm/machdep.h>
#include <asm/mce.h>
+#include <asm/nmi.h>
+
+#include "setup.h"
-static DEFINE_PER_CPU(int, mce_nest_count);
-static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
+static void machine_check_ue_event(struct machine_check_event *evt);
+static void machine_process_ue_event(struct work_struct *work);
-/* Queue for delayed MCE events. */
-static DEFINE_PER_CPU(int, mce_queue_count);
-static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
+static DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
-static void machine_check_process_queued_event(struct irq_work *work);
-struct irq_work mce_event_process_work = {
- .func = machine_check_process_queued_event,
-};
+static BLOCKING_NOTIFIER_HEAD(mce_notifier_list);
+
+int mce_register_notifier(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_register(&mce_notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(mce_register_notifier);
+
+int mce_unregister_notifier(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_unregister(&mce_notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(mce_unregister_notifier);
static void mce_set_error_info(struct machine_check_event *mce,
struct mce_error_info *mce_err)
@@ -58,24 +63,40 @@ static void mce_set_error_info(struct machine_check_event *mce,
case MCE_ERROR_TYPE_TLB:
mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
break;
+ case MCE_ERROR_TYPE_USER:
+ mce->u.user_error.user_error_type = mce_err->u.user_error_type;
+ break;
+ case MCE_ERROR_TYPE_RA:
+ mce->u.ra_error.ra_error_type = mce_err->u.ra_error_type;
+ break;
+ case MCE_ERROR_TYPE_LINK:
+ mce->u.link_error.link_error_type = mce_err->u.link_error_type;
+ break;
case MCE_ERROR_TYPE_UNKNOWN:
default:
break;
}
}
+void mce_irq_work_queue(void)
+{
+ /* Raise decrementer interrupt */
+ arch_irq_work_raise();
+ set_mce_pending_irq_work();
+}
+
/*
* Decode and save high level MCE information into per cpu buffer which
* is an array of machine_check_event structure.
*/
void save_mce_event(struct pt_regs *regs, long handled,
struct mce_error_info *mce_err,
- uint64_t nip, uint64_t addr)
+ uint64_t nip, uint64_t addr, uint64_t phys_addr)
{
- uint64_t srr1;
- int index = __get_cpu_var(mce_nest_count)++;
- struct machine_check_event *mce = &__get_cpu_var(mce_event[index]);
+ int index = local_paca->mce_info->mce_nest_count++;
+ struct machine_check_event *mce;
+ mce = &local_paca->mce_info->mce_event[index];
/*
* Return if we don't have enough space to log mce event.
* mce_nest_count may go beyond MAX_MC_EVT but that's ok,
@@ -90,20 +111,32 @@ void save_mce_event(struct pt_regs *regs, long handled,
mce->srr1 = regs->msr;
mce->gpr3 = regs->gpr[3];
mce->in_use = 1;
+ mce->cpu = get_paca()->paca_index;
- mce->initiator = MCE_INITIATOR_CPU;
- if (handled)
+ /* Mark it recovered if we have handled it and MSR(RI=1). */
+ if (handled && (regs->msr & MSR_RI))
mce->disposition = MCE_DISPOSITION_RECOVERED;
else
mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
- mce->severity = MCE_SEV_ERROR_SYNC;
- srr1 = regs->msr;
+ mce->initiator = mce_err->initiator;
+ mce->severity = mce_err->severity;
+ mce->sync_error = mce_err->sync_error;
+ mce->error_class = mce_err->error_class;
/*
* Populate the mce error_type and type-specific error_type.
*/
mce_set_error_info(mce, mce_err);
+ if (mce->error_type == MCE_ERROR_TYPE_UE)
+ mce->u.ue_error.ignore_event = mce_err->ignore_event;
+
+ /*
+ * Raise irq work, So that we don't miss to log the error for
+ * unrecoverable errors.
+ */
+ if (mce->disposition == MCE_DISPOSITION_NOT_RECOVERED)
+ mce_irq_work_queue();
if (!addr)
return;
@@ -117,9 +150,23 @@ void save_mce_event(struct pt_regs *regs, long handled,
} else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
mce->u.erat_error.effective_address_provided = true;
mce->u.erat_error.effective_address = addr;
+ } else if (mce->error_type == MCE_ERROR_TYPE_USER) {
+ mce->u.user_error.effective_address_provided = true;
+ mce->u.user_error.effective_address = addr;
+ } else if (mce->error_type == MCE_ERROR_TYPE_RA) {
+ mce->u.ra_error.effective_address_provided = true;
+ mce->u.ra_error.effective_address = addr;
+ } else if (mce->error_type == MCE_ERROR_TYPE_LINK) {
+ mce->u.link_error.effective_address_provided = true;
+ mce->u.link_error.effective_address = addr;
} else if (mce->error_type == MCE_ERROR_TYPE_UE) {
mce->u.ue_error.effective_address_provided = true;
mce->u.ue_error.effective_address = addr;
+ if (phys_addr != ULONG_MAX) {
+ mce->u.ue_error.physical_address_provided = true;
+ mce->u.ue_error.physical_address = phys_addr;
+ machine_check_ue_event(mce);
+ }
}
return;
}
@@ -143,7 +190,7 @@ void save_mce_event(struct pt_regs *regs, long handled,
*/
int get_mce_event(struct machine_check_event *mce, bool release)
{
- int index = __get_cpu_var(mce_nest_count) - 1;
+ int index = local_paca->mce_info->mce_nest_count - 1;
struct machine_check_event *mc_evt;
int ret = 0;
@@ -153,7 +200,7 @@ int get_mce_event(struct machine_check_event *mce, bool release)
/* Check if we have MCE info to process. */
if (index < MAX_MC_EVT) {
- mc_evt = &__get_cpu_var(mce_event[index]);
+ mc_evt = &local_paca->mce_info->mce_event[index];
/* Copy the event structure and release the original */
if (mce)
*mce = *mc_evt;
@@ -163,7 +210,7 @@ int get_mce_event(struct machine_check_event *mce, bool release)
}
/* Decrement the count to free the slot. */
if (release)
- __get_cpu_var(mce_nest_count)--;
+ local_paca->mce_info->mce_nest_count--;
return ret;
}
@@ -173,6 +220,28 @@ void release_mce_event(void)
get_mce_event(NULL, true);
}
+static void machine_check_ue_work(void)
+{
+ schedule_work(&mce_ue_event_work);
+}
+
+/*
+ * Queue up the MCE event which then can be handled later.
+ */
+static void machine_check_ue_event(struct machine_check_event *evt)
+{
+ int index;
+
+ index = local_paca->mce_info->mce_ue_count++;
+ /* If queue is full, just return for now. */
+ if (index >= MAX_MC_EVT) {
+ local_paca->mce_info->mce_ue_count--;
+ return;
+ }
+ memcpy(&local_paca->mce_info->mce_ue_event_queue[index],
+ evt, sizeof(*evt));
+}
+
/*
* Queue up the MCE event which then can be handled later.
*/
@@ -184,41 +253,131 @@ void machine_check_queue_event(void)
if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
return;
- index = __get_cpu_var(mce_queue_count)++;
+ index = local_paca->mce_info->mce_queue_count++;
/* If queue is full, just return for now. */
if (index >= MAX_MC_EVT) {
- __get_cpu_var(mce_queue_count)--;
+ local_paca->mce_info->mce_queue_count--;
return;
}
- __get_cpu_var(mce_event_queue[index]) = evt;
+ memcpy(&local_paca->mce_info->mce_event_queue[index],
+ &evt, sizeof(evt));
- /* Queue irq work to process this event later. */
- irq_work_queue(&mce_event_process_work);
+ mce_irq_work_queue();
+}
+
+void mce_common_process_ue(struct pt_regs *regs,
+ struct mce_error_info *mce_err)
+{
+ const struct exception_table_entry *entry;
+
+ entry = search_kernel_exception_table(regs->nip);
+ if (entry) {
+ mce_err->ignore_event = true;
+ regs_set_return_ip(regs, extable_fixup(entry));
+ }
}
/*
* process pending MCE event from the mce event queue. This function will be
* called during syscall exit.
*/
-static void machine_check_process_queued_event(struct irq_work *work)
+static void machine_process_ue_event(struct work_struct *work)
{
int index;
+ struct machine_check_event *evt;
+
+ while (local_paca->mce_info->mce_ue_count > 0) {
+ index = local_paca->mce_info->mce_ue_count - 1;
+ evt = &local_paca->mce_info->mce_ue_event_queue[index];
+ blocking_notifier_call_chain(&mce_notifier_list, 0, evt);
+#ifdef CONFIG_MEMORY_FAILURE
+ /*
+ * This should probably queued elsewhere, but
+ * oh! well
+ *
+ * Don't report this machine check because the caller has a
+ * asked us to ignore the event, it has a fixup handler which
+ * will do the appropriate error handling and reporting.
+ */
+ if (evt->error_type == MCE_ERROR_TYPE_UE) {
+ if (evt->u.ue_error.ignore_event) {
+ local_paca->mce_info->mce_ue_count--;
+ continue;
+ }
+
+ if (evt->u.ue_error.physical_address_provided) {
+ unsigned long pfn;
+
+ pfn = evt->u.ue_error.physical_address >>
+ PAGE_SHIFT;
+ memory_failure(pfn, 0);
+ } else
+ pr_warn("Failed to identify bad address from "
+ "where the uncorrectable error (UE) "
+ "was generated\n");
+ }
+#endif
+ local_paca->mce_info->mce_ue_count--;
+ }
+}
+/*
+ * process pending MCE event from the mce event queue. This function will be
+ * called during syscall exit.
+ */
+static void machine_check_process_queued_event(void)
+{
+ int index;
+ struct machine_check_event *evt;
+
+ add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
/*
* For now just print it to console.
* TODO: log this error event to FSP or nvram.
*/
- while (__get_cpu_var(mce_queue_count) > 0) {
- index = __get_cpu_var(mce_queue_count) - 1;
- machine_check_print_event_info(
- &__get_cpu_var(mce_event_queue[index]));
- __get_cpu_var(mce_queue_count)--;
+ while (local_paca->mce_info->mce_queue_count > 0) {
+ index = local_paca->mce_info->mce_queue_count - 1;
+ evt = &local_paca->mce_info->mce_event_queue[index];
+
+ if (evt->error_type == MCE_ERROR_TYPE_UE &&
+ evt->u.ue_error.ignore_event) {
+ local_paca->mce_info->mce_queue_count--;
+ continue;
+ }
+ machine_check_print_event_info(evt, false, false);
+ local_paca->mce_info->mce_queue_count--;
}
}
-void machine_check_print_event_info(struct machine_check_event *evt)
+void set_mce_pending_irq_work(void)
+{
+ local_paca->mce_pending_irq_work = 1;
+}
+
+void clear_mce_pending_irq_work(void)
{
- const char *level, *sevstr, *subtype;
+ local_paca->mce_pending_irq_work = 0;
+}
+
+void mce_run_irq_context_handlers(void)
+{
+ if (unlikely(local_paca->mce_pending_irq_work)) {
+ if (ppc_md.machine_check_log_err)
+ ppc_md.machine_check_log_err();
+ machine_check_process_queued_event();
+ machine_check_ue_work();
+ clear_mce_pending_irq_work();
+ }
+}
+
+void machine_check_print_event_info(struct machine_check_event *evt,
+ bool user_mode, bool in_guest)
+{
+ const char *level, *sevstr, *subtype, *err_type, *initiator;
+ uint64_t ea = 0, pa = 0;
+ int n = 0;
+ char dar_str[50];
+ char pa_str[50];
static const char *mc_ue_types[] = {
"Indeterminate",
"Instruction fetch",
@@ -241,6 +400,38 @@ void machine_check_print_event_info(struct machine_check_event *evt)
"Parity",
"Multihit",
};
+ static const char *mc_user_types[] = {
+ "Indeterminate",
+ "tlbie(l) invalid",
+ "scv invalid",
+ };
+ static const char *mc_ra_types[] = {
+ "Indeterminate",
+ "Instruction fetch (bad)",
+ "Instruction fetch (foreign/control memory)",
+ "Page table walk ifetch (bad)",
+ "Page table walk ifetch (foreign/control memory)",
+ "Load (bad)",
+ "Store (bad)",
+ "Page table walk Load/Store (bad)",
+ "Page table walk Load/Store (foreign/control memory)",
+ "Load/Store (foreign/control memory)",
+ };
+ static const char *mc_link_types[] = {
+ "Indeterminate",
+ "Instruction fetch (timeout)",
+ "Page table walk ifetch (timeout)",
+ "Load (timeout)",
+ "Store (timeout)",
+ "Page table walk Load/Store (timeout)",
+ };
+ static const char *mc_error_class[] = {
+ "Unknown",
+ "Hardware error",
+ "Probable Hardware error (some chance of software cause)",
+ "Software error",
+ "Probable Software error (some chance of hardware cause)",
+ };
/* Print things out */
if (evt->version != MCE_V1) {
@@ -255,9 +446,9 @@ void machine_check_print_event_info(struct machine_check_event *evt)
break;
case MCE_SEV_WARNING:
level = KERN_WARNING;
- sevstr = "";
+ sevstr = "Warning";
break;
- case MCE_SEV_ERROR_SYNC:
+ case MCE_SEV_SEVERE:
level = KERN_ERR;
sevstr = "Severe";
break;
@@ -268,85 +459,313 @@ void machine_check_print_event_info(struct machine_check_event *evt)
break;
}
- printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
- evt->disposition == MCE_DISPOSITION_RECOVERED ?
- "Recovered" : "[Not recovered");
- printk("%s Initiator: %s\n", level,
- evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown");
+ switch(evt->initiator) {
+ case MCE_INITIATOR_CPU:
+ initiator = "CPU";
+ break;
+ case MCE_INITIATOR_PCI:
+ initiator = "PCI";
+ break;
+ case MCE_INITIATOR_ISA:
+ initiator = "ISA";
+ break;
+ case MCE_INITIATOR_MEMORY:
+ initiator = "Memory";
+ break;
+ case MCE_INITIATOR_POWERMGM:
+ initiator = "Power Management";
+ break;
+ case MCE_INITIATOR_UNKNOWN:
+ default:
+ initiator = "Unknown";
+ break;
+ }
+
switch (evt->error_type) {
case MCE_ERROR_TYPE_UE:
+ err_type = "UE";
subtype = evt->u.ue_error.ue_error_type <
ARRAY_SIZE(mc_ue_types) ?
mc_ue_types[evt->u.ue_error.ue_error_type]
: "Unknown";
- printk("%s Error type: UE [%s]\n", level, subtype);
if (evt->u.ue_error.effective_address_provided)
- printk("%s Effective address: %016llx\n",
- level, evt->u.ue_error.effective_address);
+ ea = evt->u.ue_error.effective_address;
if (evt->u.ue_error.physical_address_provided)
- printk("%s Physial address: %016llx\n",
- level, evt->u.ue_error.physical_address);
+ pa = evt->u.ue_error.physical_address;
break;
case MCE_ERROR_TYPE_SLB:
+ err_type = "SLB";
subtype = evt->u.slb_error.slb_error_type <
ARRAY_SIZE(mc_slb_types) ?
mc_slb_types[evt->u.slb_error.slb_error_type]
: "Unknown";
- printk("%s Error type: SLB [%s]\n", level, subtype);
if (evt->u.slb_error.effective_address_provided)
- printk("%s Effective address: %016llx\n",
- level, evt->u.slb_error.effective_address);
+ ea = evt->u.slb_error.effective_address;
break;
case MCE_ERROR_TYPE_ERAT:
+ err_type = "ERAT";
subtype = evt->u.erat_error.erat_error_type <
ARRAY_SIZE(mc_erat_types) ?
mc_erat_types[evt->u.erat_error.erat_error_type]
: "Unknown";
- printk("%s Error type: ERAT [%s]\n", level, subtype);
if (evt->u.erat_error.effective_address_provided)
- printk("%s Effective address: %016llx\n",
- level, evt->u.erat_error.effective_address);
+ ea = evt->u.erat_error.effective_address;
break;
case MCE_ERROR_TYPE_TLB:
+ err_type = "TLB";
subtype = evt->u.tlb_error.tlb_error_type <
ARRAY_SIZE(mc_tlb_types) ?
mc_tlb_types[evt->u.tlb_error.tlb_error_type]
: "Unknown";
- printk("%s Error type: TLB [%s]\n", level, subtype);
if (evt->u.tlb_error.effective_address_provided)
- printk("%s Effective address: %016llx\n",
- level, evt->u.tlb_error.effective_address);
+ ea = evt->u.tlb_error.effective_address;
+ break;
+ case MCE_ERROR_TYPE_USER:
+ err_type = "User";
+ subtype = evt->u.user_error.user_error_type <
+ ARRAY_SIZE(mc_user_types) ?
+ mc_user_types[evt->u.user_error.user_error_type]
+ : "Unknown";
+ if (evt->u.user_error.effective_address_provided)
+ ea = evt->u.user_error.effective_address;
+ break;
+ case MCE_ERROR_TYPE_RA:
+ err_type = "Real address";
+ subtype = evt->u.ra_error.ra_error_type <
+ ARRAY_SIZE(mc_ra_types) ?
+ mc_ra_types[evt->u.ra_error.ra_error_type]
+ : "Unknown";
+ if (evt->u.ra_error.effective_address_provided)
+ ea = evt->u.ra_error.effective_address;
+ break;
+ case MCE_ERROR_TYPE_LINK:
+ err_type = "Link";
+ subtype = evt->u.link_error.link_error_type <
+ ARRAY_SIZE(mc_link_types) ?
+ mc_link_types[evt->u.link_error.link_error_type]
+ : "Unknown";
+ if (evt->u.link_error.effective_address_provided)
+ ea = evt->u.link_error.effective_address;
+ break;
+ case MCE_ERROR_TYPE_DCACHE:
+ err_type = "D-Cache";
+ subtype = "Unknown";
+ break;
+ case MCE_ERROR_TYPE_ICACHE:
+ err_type = "I-Cache";
+ subtype = "Unknown";
break;
default:
case MCE_ERROR_TYPE_UNKNOWN:
- printk("%s Error type: Unknown\n", level);
+ err_type = "Unknown";
+ subtype = "";
break;
}
+
+ dar_str[0] = pa_str[0] = '\0';
+ if (ea && evt->srr0 != ea) {
+ /* Load/Store address */
+ n = sprintf(dar_str, "DAR: %016llx ", ea);
+ if (pa)
+ sprintf(dar_str + n, "paddr: %016llx ", pa);
+ } else if (pa) {
+ sprintf(pa_str, " paddr: %016llx", pa);
+ }
+
+ printk("%sMCE: CPU%d: machine check (%s) %s %s %s %s[%s]\n",
+ level, evt->cpu, sevstr, in_guest ? "Guest" : "",
+ err_type, subtype, dar_str,
+ evt->disposition == MCE_DISPOSITION_RECOVERED ?
+ "Recovered" : "Not recovered");
+
+ if (in_guest || user_mode) {
+ printk("%sMCE: CPU%d: PID: %d Comm: %s %sNIP: [%016llx]%s\n",
+ level, evt->cpu, current->pid, current->comm,
+ in_guest ? "Guest " : "", evt->srr0, pa_str);
+ } else {
+ printk("%sMCE: CPU%d: NIP: [%016llx] %pS%s\n",
+ level, evt->cpu, evt->srr0, (void *)evt->srr0, pa_str);
+ }
+
+ printk("%sMCE: CPU%d: Initiator %s\n", level, evt->cpu, initiator);
+
+ subtype = evt->error_class < ARRAY_SIZE(mc_error_class) ?
+ mc_error_class[evt->error_class] : "Unknown";
+ printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype);
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+ /* Display faulty slb contents for SLB errors. */
+ if (evt->error_type == MCE_ERROR_TYPE_SLB && !in_guest)
+ slb_dump_contents(local_paca->mce_faulty_slbs);
+#endif
}
+EXPORT_SYMBOL_GPL(machine_check_print_event_info);
-uint64_t get_mce_fault_addr(struct machine_check_event *evt)
+/*
+ * This function is called in real mode. Strictly no printk's please.
+ *
+ * regs->nip and regs->msr contains srr0 and ssr1.
+ */
+DEFINE_INTERRUPT_HANDLER_NMI(machine_check_early)
{
- switch (evt->error_type) {
- case MCE_ERROR_TYPE_UE:
- if (evt->u.ue_error.effective_address_provided)
- return evt->u.ue_error.effective_address;
+ long handled = 0;
+
+ hv_nmi_check_nonrecoverable(regs);
+
+ /*
+ * See if platform is capable of handling machine check.
+ */
+ if (ppc_md.machine_check_early)
+ handled = ppc_md.machine_check_early(regs);
+
+ return handled;
+}
+
+/* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */
+static enum {
+ DTRIG_UNKNOWN,
+ DTRIG_VECTOR_CI, /* need to emulate vector CI load instr */
+ DTRIG_SUSPEND_ESCAPE, /* need to escape from TM suspend mode */
+} hmer_debug_trig_function;
+
+static int init_debug_trig_function(void)
+{
+ int pvr;
+ struct device_node *cpun;
+ struct property *prop = NULL;
+ const char *str;
+
+ /* First look in the device tree */
+ preempt_disable();
+ cpun = of_get_cpu_node(smp_processor_id(), NULL);
+ if (cpun) {
+ of_property_for_each_string(cpun, "ibm,hmi-special-triggers",
+ prop, str) {
+ if (strcmp(str, "bit17-vector-ci-load") == 0)
+ hmer_debug_trig_function = DTRIG_VECTOR_CI;
+ else if (strcmp(str, "bit17-tm-suspend-escape") == 0)
+ hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
+ }
+ of_node_put(cpun);
+ }
+ preempt_enable();
+
+ /* If we found the property, don't look at PVR */
+ if (prop)
+ goto out;
+
+ pvr = mfspr(SPRN_PVR);
+ /* Check for POWER9 Nimbus (scale-out) */
+ if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) {
+ /* DD2.2 and later */
+ if ((pvr & 0xfff) >= 0x202)
+ hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE;
+ /* DD2.0 and DD2.1 - used for vector CI load emulation */
+ else if ((pvr & 0xfff) >= 0x200)
+ hmer_debug_trig_function = DTRIG_VECTOR_CI;
+ }
+
+ out:
+ switch (hmer_debug_trig_function) {
+ case DTRIG_VECTOR_CI:
+ pr_debug("HMI debug trigger used for vector CI load\n");
break;
- case MCE_ERROR_TYPE_SLB:
- if (evt->u.slb_error.effective_address_provided)
- return evt->u.slb_error.effective_address;
+ case DTRIG_SUSPEND_ESCAPE:
+ pr_debug("HMI debug trigger used for TM suspend escape\n");
break;
- case MCE_ERROR_TYPE_ERAT:
- if (evt->u.erat_error.effective_address_provided)
- return evt->u.erat_error.effective_address;
+ default:
break;
- case MCE_ERROR_TYPE_TLB:
- if (evt->u.tlb_error.effective_address_provided)
- return evt->u.tlb_error.effective_address;
+ }
+ return 0;
+}
+__initcall(init_debug_trig_function);
+
+/*
+ * Handle HMIs that occur as a result of a debug trigger.
+ * Return values:
+ * -1 means this is not a HMI cause that we know about
+ * 0 means no further handling is required
+ * 1 means further handling is required
+ */
+long hmi_handle_debugtrig(struct pt_regs *regs)
+{
+ unsigned long hmer = mfspr(SPRN_HMER);
+ long ret = 0;
+
+ /* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */
+ if (!((hmer & HMER_DEBUG_TRIG)
+ && hmer_debug_trig_function != DTRIG_UNKNOWN))
+ return -1;
+
+ hmer &= ~HMER_DEBUG_TRIG;
+ /* HMER is a write-AND register */
+ mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG);
+
+ switch (hmer_debug_trig_function) {
+ case DTRIG_VECTOR_CI:
+ /*
+ * Now to avoid problems with soft-disable we
+ * only do the emulation if we are coming from
+ * host user space
+ */
+ if (regs && user_mode(regs))
+ ret = local_paca->hmi_p9_special_emu = 1;
+
break;
+
default:
- case MCE_ERROR_TYPE_UNKNOWN:
break;
}
- return 0;
+
+ /*
+ * See if any other HMI causes remain to be handled
+ */
+ if (hmer & mfspr(SPRN_HMEER))
+ return -1;
+
+ return ret;
+}
+
+/*
+ * Return values:
+ */
+DEFINE_INTERRUPT_HANDLER_NMI(hmi_exception_realmode)
+{
+ int ret;
+
+ local_paca->hmi_irqs++;
+
+ ret = hmi_handle_debugtrig(regs);
+ if (ret >= 0)
+ return ret;
+
+ wait_for_subcore_guest_exit();
+
+ if (ppc_md.hmi_exception_early)
+ ppc_md.hmi_exception_early(regs);
+
+ wait_for_tb_resync();
+
+ return 1;
+}
+
+void __init mce_init(void)
+{
+ struct mce_info *mce_info;
+ u64 limit;
+ int i;
+
+ limit = min(ppc64_bolted_size(), ppc64_rma_size);
+ for_each_possible_cpu(i) {
+ mce_info = memblock_alloc_try_nid(sizeof(*mce_info),
+ __alignof__(*mce_info),
+ MEMBLOCK_LOW_LIMIT,
+ limit, early_cpu_to_node(i));
+ if (!mce_info)
+ goto err;
+ paca_ptrs[i]->mce_info = mce_info;
+ }
+ return;
+err:
+ panic("Failed to allocate memory for MCE event data\n");
}
-EXPORT_SYMBOL(get_mce_fault_addr);
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index aa9aff3d6ad3..71e8f2a92e36 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -1,20 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Machine check exception handling CPU-side for power7 and power8
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
* Copyright 2013 IBM Corporation
* Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
*/
@@ -24,183 +11,676 @@
#include <linux/types.h>
#include <linux/ptrace.h>
+#include <linux/extable.h>
+#include <linux/pgtable.h>
#include <asm/mmu.h>
#include <asm/mce.h>
#include <asm/machdep.h>
+#include <asm/pte-walk.h>
+#include <asm/sstep.h>
+#include <asm/exception-64s.h>
+#include <asm/extable.h>
+#include <asm/inst.h>
-/* flush SLBs and reload */
-static void flush_and_reload_slb(void)
+/*
+ * Convert an address related to an mm to a PFN. NOTE: we are in real
+ * mode, we could potentially race with page table updates.
+ */
+unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr)
{
- struct slb_shadow *slb;
- unsigned long i, n;
+ pte_t *ptep, pte;
+ unsigned int shift;
+ unsigned long pfn, flags;
+ struct mm_struct *mm;
+
+ if (user_mode(regs))
+ mm = current->mm;
+ else
+ mm = &init_mm;
+
+ local_irq_save(flags);
+ ptep = __find_linux_pte(mm->pgd, addr, NULL, &shift);
+ if (!ptep) {
+ pfn = ULONG_MAX;
+ goto out;
+ }
+ pte = READ_ONCE(*ptep);
- /* Invalidate all SLBs */
- asm volatile("slbmte %0,%0; slbia" : : "r" (0));
+ if (!pte_present(pte) || pte_special(pte)) {
+ pfn = ULONG_MAX;
+ goto out;
+ }
+
+ if (shift <= PAGE_SHIFT)
+ pfn = pte_pfn(pte);
+ else {
+ unsigned long rpnmask = (1ul << shift) - PAGE_SIZE;
+ pfn = pte_pfn(__pte(pte_val(pte) | (addr & rpnmask)));
+ }
+out:
+ local_irq_restore(flags);
+ return pfn;
+}
+static bool mce_in_guest(void)
+{
#ifdef CONFIG_KVM_BOOK3S_HANDLER
/*
- * If machine check is hit when in guest or in transition, we will
- * only flush the SLBs and continue.
+ * If machine check is hit when in guest context or low level KVM
+ * code, avoid looking up any translations or making any attempts
+ * to recover, just record the event and pass to KVM.
*/
if (get_paca()->kvm_hstate.in_guest)
- return;
+ return true;
#endif
+ return false;
+}
- /* For host kernel, reload the SLBs from shadow SLB buffer. */
- slb = get_slb_shadow();
- if (!slb)
+/* flush SLBs and reload */
+#ifdef CONFIG_PPC_64S_HASH_MMU
+void flush_and_reload_slb(void)
+{
+ if (early_radix_enabled())
return;
- n = min_t(u32, be32_to_cpu(slb->persistent), SLB_MIN_SIZE);
+ /* Invalidate all SLBs */
+ slb_flush_all_realmode();
- /* Load up the SLB entries from shadow SLB */
- for (i = 0; i < n; i++) {
- unsigned long rb = be64_to_cpu(slb->save_area[i].esid);
- unsigned long rs = be64_to_cpu(slb->save_area[i].vsid);
+ /*
+ * This probably shouldn't happen, but it may be possible it's
+ * called in early boot before SLB shadows are allocated.
+ */
+ if (!get_slb_shadow())
+ return;
- rb = (rb & ~0xFFFul) | i;
- asm volatile("slbmte %0,%1" : : "r" (rs), "r" (rb));
- }
+ slb_restore_bolted_realmode();
}
+#endif
-static long mce_handle_derror(uint64_t dsisr, uint64_t slb_error_bits)
+void flush_erat(void)
{
- long handled = 1;
-
- /*
- * flush and reload SLBs for SLB errors and flush TLBs for TLB errors.
- * reset the error bits whenever we handle them so that at the end
- * we can check whether we handled all of them or not.
- * */
- if (dsisr & slb_error_bits) {
+#ifdef CONFIG_PPC_64S_HASH_MMU
+ if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) {
flush_and_reload_slb();
- /* reset error bits */
- dsisr &= ~(slb_error_bits);
- }
- if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) {
- if (cur_cpu_spec && cur_cpu_spec->flush_tlb)
- cur_cpu_spec->flush_tlb(TLBIEL_INVAL_PAGE);
- /* reset error bits */
- dsisr &= ~P7_DSISR_MC_TLB_MULTIHIT_MFTLB;
+ return;
}
- /* Any other errors we don't understand? */
- if (dsisr & 0xffffffffUL)
- handled = 0;
-
- return handled;
+#endif
+ asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory");
}
-static long mce_handle_derror_p7(uint64_t dsisr)
-{
- return mce_handle_derror(dsisr, P7_DSISR_MC_SLB_ERRORS);
-}
+#define MCE_FLUSH_SLB 1
+#define MCE_FLUSH_TLB 2
+#define MCE_FLUSH_ERAT 3
-static long mce_handle_common_ierror(uint64_t srr1)
+static int mce_flush(int what)
{
- long handled = 0;
-
- switch (P7_SRR1_MC_IFETCH(srr1)) {
- case 0:
- break;
- case P7_SRR1_MC_IFETCH_SLB_PARITY:
- case P7_SRR1_MC_IFETCH_SLB_MULTIHIT:
- /* flush and reload SLBs for SLB errors. */
+#ifdef CONFIG_PPC_64S_HASH_MMU
+ if (what == MCE_FLUSH_SLB) {
flush_and_reload_slb();
- handled = 1;
- break;
- case P7_SRR1_MC_IFETCH_TLB_MULTIHIT:
- if (cur_cpu_spec && cur_cpu_spec->flush_tlb) {
- cur_cpu_spec->flush_tlb(TLBIEL_INVAL_PAGE);
- handled = 1;
- }
- break;
- default:
- break;
+ return 1;
+ }
+#endif
+ if (what == MCE_FLUSH_ERAT) {
+ flush_erat();
+ return 1;
+ }
+ if (what == MCE_FLUSH_TLB) {
+ tlbiel_all();
+ return 1;
}
- return handled;
+ return 0;
}
-static long mce_handle_ierror_p7(uint64_t srr1)
+#define SRR1_MC_LOADSTORE(srr1) ((srr1) & PPC_BIT(42))
+
+struct mce_ierror_table {
+ unsigned long srr1_mask;
+ unsigned long srr1_value;
+ bool nip_valid; /* nip is a valid indicator of faulting address */
+ unsigned int error_type;
+ unsigned int error_subtype;
+ unsigned int error_class;
+ unsigned int initiator;
+ unsigned int severity;
+ bool sync_error;
+};
+
+static const struct mce_ierror_table mce_p7_ierror_table[] = {
+{ 0x00000000001c0000, 0x0000000000040000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000000001c0000, 0x0000000000080000, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000000001c0000, 0x00000000000c0000, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0x00000000001c0000, 0x0000000000100000, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */
+ MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0x00000000001c0000, 0x0000000000140000, true,
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0x00000000001c0000, 0x0000000000180000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000000001c0000, 0x00000000001c0000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0, 0, 0, 0, 0, 0, 0 } };
+
+static const struct mce_ierror_table mce_p8_ierror_table[] = {
+{ 0x00000000081c0000, 0x0000000000040000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x0000000000080000, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x00000000000c0000, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0x00000000081c0000, 0x0000000000100000, true,
+ MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0x00000000081c0000, 0x0000000000140000, true,
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0x00000000081c0000, 0x0000000000180000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH,
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x00000000001c0000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x0000000008000000, true,
+ MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_IFETCH_TIMEOUT, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x0000000008040000, true,
+ MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT,
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0, 0, 0, 0, 0, 0, 0 } };
+
+static const struct mce_ierror_table mce_p9_ierror_table[] = {
+{ 0x00000000081c0000, 0x0000000000040000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x0000000000080000, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x00000000000c0000, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0x00000000081c0000, 0x0000000000100000, true,
+ MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0x00000000081c0000, 0x0000000000140000, true,
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0x00000000081c0000, 0x0000000000180000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x00000000001c0000, true,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH_FOREIGN, MCE_ECLASS_SOFTWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x0000000008000000, true,
+ MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_IFETCH_TIMEOUT, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x0000000008040000, true,
+ MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT,
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x00000000080c0000, true,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH, MCE_ECLASS_SOFTWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x0000000008100000, true,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_SOFTWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x0000000008140000, false,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_STORE, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_FATAL, false }, /* ASYNC is fatal */
+{ 0x00000000081c0000, 0x0000000008180000, false,
+ MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_STORE_TIMEOUT,
+ MCE_INITIATOR_CPU, MCE_SEV_FATAL, false }, /* ASYNC is fatal */
+{ 0x00000000081c0000, 0x00000000081c0000, true, MCE_ECLASS_HARDWARE,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0, 0, 0, 0, 0, 0, 0 } };
+
+static const struct mce_ierror_table mce_p10_ierror_table[] = {
+{ 0x00000000081c0000, 0x0000000000040000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x0000000000080000, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x00000000000c0000, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0x00000000081c0000, 0x0000000000100000, true,
+ MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0x00000000081c0000, 0x0000000000140000, true,
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0x00000000081c0000, 0x0000000000180000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x00000000001c0000, true,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH_FOREIGN, MCE_ECLASS_SOFTWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x0000000008080000, true,
+ MCE_ERROR_TYPE_USER,MCE_USER_ERROR_SCV, MCE_ECLASS_SOFTWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0x00000000081c0000, 0x00000000080c0000, true,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH, MCE_ECLASS_SOFTWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x0000000008100000, true,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_SOFTWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x0000000008140000, false,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_STORE, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_FATAL, false }, /* ASYNC is fatal */
+{ 0x00000000081c0000, 0x00000000081c0000, true, MCE_ECLASS_HARDWARE,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0, 0, 0, 0, 0, 0, 0 } };
+
+struct mce_derror_table {
+ unsigned long dsisr_value;
+ bool dar_valid; /* dar is a valid indicator of faulting address */
+ unsigned int error_type;
+ unsigned int error_subtype;
+ unsigned int error_class;
+ unsigned int initiator;
+ unsigned int severity;
+ bool sync_error;
+};
+
+static const struct mce_derror_table mce_p7_derror_table[] = {
+{ 0x00008000, false,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00004000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000800, true,
+ MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0x00000400, true,
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0x00000080, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0x00000100, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000040, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */
+ MCE_ECLASS_HARD_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0, false, 0, 0, 0, 0, 0 } };
+
+static const struct mce_derror_table mce_p8_derror_table[] = {
+{ 0x00008000, false,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00004000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00002000, true,
+ MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00001000, true,
+ MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT,
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000800, true,
+ MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0x00000400, true,
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0x00000200, true,
+ MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, /* SECONDARY ERAT */
+ MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0x00000080, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */
+ MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0x00000100, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0, false, 0, 0, 0, 0, 0 } };
+
+static const struct mce_derror_table mce_p9_derror_table[] = {
+{ 0x00008000, false,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00004000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00002000, true,
+ MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00001000, true,
+ MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT,
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000800, true,
+ MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0x00000400, true,
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0x00000200, false,
+ MCE_ERROR_TYPE_USER, MCE_USER_ERROR_TLBIE, MCE_ECLASS_SOFTWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0x00000080, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */
+ MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0x00000100, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000040, true,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000020, false,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000010, false,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN,
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000008, false,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD_STORE_FOREIGN, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0, false, 0, 0, 0, 0, 0 } };
+
+static const struct mce_derror_table mce_p10_derror_table[] = {
+{ 0x00008000, false,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00004000, true,
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000800, true,
+ MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0x00000400, true,
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0x00000200, false,
+ MCE_ERROR_TYPE_USER, MCE_USER_ERROR_TLBIE, MCE_ECLASS_SOFTWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0x00000080, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */
+ MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0x00000100, true,
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000040, true,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000020, false,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000010, false,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN,
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0x00000008, false,
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD_STORE_FOREIGN, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0, false, 0, 0, 0, 0, 0 } };
+
+static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr,
+ uint64_t *phys_addr)
{
- long handled = 0;
-
- handled = mce_handle_common_ierror(srr1);
-
- if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) {
- flush_and_reload_slb();
- handled = 1;
+ /*
+ * Carefully look at the NIP to determine
+ * the instruction to analyse. Reading the NIP
+ * in real-mode is tricky and can lead to recursive
+ * faults
+ */
+ ppc_inst_t instr;
+ unsigned long pfn, instr_addr;
+ struct instruction_op op;
+ struct pt_regs tmp = *regs;
+
+ pfn = addr_to_pfn(regs, regs->nip);
+ if (pfn != ULONG_MAX) {
+ instr_addr = (pfn << PAGE_SHIFT) + (regs->nip & ~PAGE_MASK);
+ instr = ppc_inst_read((u32 *)instr_addr);
+ if (!analyse_instr(&op, &tmp, instr)) {
+ pfn = addr_to_pfn(regs, op.ea);
+ *addr = op.ea;
+ *phys_addr = (pfn << PAGE_SHIFT);
+ return 0;
+ }
+ /*
+ * analyse_instr() might fail if the instruction
+ * is not a load/store, although this is unexpected
+ * for load/store errors or if we got the NIP
+ * wrong
+ */
}
- return handled;
+ *addr = 0;
+ return -1;
}
-static void mce_get_common_ierror(struct mce_error_info *mce_err, uint64_t srr1)
+static int mce_handle_ierror(struct pt_regs *regs, unsigned long srr1,
+ const struct mce_ierror_table table[],
+ struct mce_error_info *mce_err, uint64_t *addr,
+ uint64_t *phys_addr)
{
- switch (P7_SRR1_MC_IFETCH(srr1)) {
- case P7_SRR1_MC_IFETCH_SLB_PARITY:
- mce_err->error_type = MCE_ERROR_TYPE_SLB;
- mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
- break;
- case P7_SRR1_MC_IFETCH_SLB_MULTIHIT:
- mce_err->error_type = MCE_ERROR_TYPE_SLB;
- mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
- break;
- case P7_SRR1_MC_IFETCH_TLB_MULTIHIT:
- mce_err->error_type = MCE_ERROR_TYPE_TLB;
- mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
- break;
- case P7_SRR1_MC_IFETCH_UE:
- case P7_SRR1_MC_IFETCH_UE_IFU_INTERNAL:
- mce_err->error_type = MCE_ERROR_TYPE_UE;
- mce_err->u.ue_error_type = MCE_UE_ERROR_IFETCH;
- break;
- case P7_SRR1_MC_IFETCH_UE_TLB_RELOAD:
- mce_err->error_type = MCE_ERROR_TYPE_UE;
- mce_err->u.ue_error_type =
- MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
- break;
- }
-}
+ int handled = 0;
+ int i;
+
+ *addr = 0;
+
+ for (i = 0; table[i].srr1_mask; i++) {
+ if ((srr1 & table[i].srr1_mask) != table[i].srr1_value)
+ continue;
+
+ if (!mce_in_guest()) {
+ /* attempt to correct the error */
+ switch (table[i].error_type) {
+ case MCE_ERROR_TYPE_SLB:
+#ifdef CONFIG_PPC_64S_HASH_MMU
+ if (local_paca->in_mce == 1)
+ slb_save_contents(local_paca->mce_faulty_slbs);
+#endif
+ handled = mce_flush(MCE_FLUSH_SLB);
+ break;
+ case MCE_ERROR_TYPE_ERAT:
+ handled = mce_flush(MCE_FLUSH_ERAT);
+ break;
+ case MCE_ERROR_TYPE_TLB:
+ handled = mce_flush(MCE_FLUSH_TLB);
+ break;
+ }
+ }
-static void mce_get_ierror_p7(struct mce_error_info *mce_err, uint64_t srr1)
-{
- mce_get_common_ierror(mce_err, srr1);
- if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) {
- mce_err->error_type = MCE_ERROR_TYPE_SLB;
- mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
+ /* now fill in mce_error_info */
+ mce_err->error_type = table[i].error_type;
+ mce_err->error_class = table[i].error_class;
+ switch (table[i].error_type) {
+ case MCE_ERROR_TYPE_UE:
+ mce_err->u.ue_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_SLB:
+ mce_err->u.slb_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_ERAT:
+ mce_err->u.erat_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_TLB:
+ mce_err->u.tlb_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_USER:
+ mce_err->u.user_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_RA:
+ mce_err->u.ra_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_LINK:
+ mce_err->u.link_error_type = table[i].error_subtype;
+ break;
+ }
+ mce_err->sync_error = table[i].sync_error;
+ mce_err->severity = table[i].severity;
+ mce_err->initiator = table[i].initiator;
+ if (table[i].nip_valid && !mce_in_guest()) {
+ *addr = regs->nip;
+ if (mce_err->sync_error &&
+ table[i].error_type == MCE_ERROR_TYPE_UE) {
+ unsigned long pfn;
+
+ if (get_paca()->in_mce < MAX_MCE_DEPTH) {
+ pfn = addr_to_pfn(regs, regs->nip);
+ if (pfn != ULONG_MAX) {
+ *phys_addr =
+ (pfn << PAGE_SHIFT);
+ }
+ }
+ }
+ }
+ return handled;
}
+
+ mce_err->error_type = MCE_ERROR_TYPE_UNKNOWN;
+ mce_err->error_class = MCE_ECLASS_UNKNOWN;
+ mce_err->severity = MCE_SEV_SEVERE;
+ mce_err->initiator = MCE_INITIATOR_CPU;
+ mce_err->sync_error = true;
+
+ return 0;
}
-static void mce_get_derror_p7(struct mce_error_info *mce_err, uint64_t dsisr)
+static int mce_handle_derror(struct pt_regs *regs,
+ const struct mce_derror_table table[],
+ struct mce_error_info *mce_err, uint64_t *addr,
+ uint64_t *phys_addr)
{
- if (dsisr & P7_DSISR_MC_UE) {
- mce_err->error_type = MCE_ERROR_TYPE_UE;
- mce_err->u.ue_error_type = MCE_UE_ERROR_LOAD_STORE;
- } else if (dsisr & P7_DSISR_MC_UE_TABLEWALK) {
- mce_err->error_type = MCE_ERROR_TYPE_UE;
- mce_err->u.ue_error_type =
- MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
- } else if (dsisr & P7_DSISR_MC_ERAT_MULTIHIT) {
- mce_err->error_type = MCE_ERROR_TYPE_ERAT;
- mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
- } else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT) {
- mce_err->error_type = MCE_ERROR_TYPE_SLB;
- mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
- } else if (dsisr & P7_DSISR_MC_SLB_PARITY_MFSLB) {
- mce_err->error_type = MCE_ERROR_TYPE_SLB;
- mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
- } else if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) {
- mce_err->error_type = MCE_ERROR_TYPE_TLB;
- mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
- } else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT_PARITY) {
- mce_err->error_type = MCE_ERROR_TYPE_SLB;
- mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
+ uint64_t dsisr = regs->dsisr;
+ int handled = 0;
+ int found = 0;
+ int i;
+
+ *addr = 0;
+
+ for (i = 0; table[i].dsisr_value; i++) {
+ if (!(dsisr & table[i].dsisr_value))
+ continue;
+
+ if (!mce_in_guest()) {
+ /* attempt to correct the error */
+ switch (table[i].error_type) {
+ case MCE_ERROR_TYPE_SLB:
+#ifdef CONFIG_PPC_64S_HASH_MMU
+ if (local_paca->in_mce == 1)
+ slb_save_contents(local_paca->mce_faulty_slbs);
+#endif
+ if (mce_flush(MCE_FLUSH_SLB))
+ handled = 1;
+ break;
+ case MCE_ERROR_TYPE_ERAT:
+ if (mce_flush(MCE_FLUSH_ERAT))
+ handled = 1;
+ break;
+ case MCE_ERROR_TYPE_TLB:
+ if (mce_flush(MCE_FLUSH_TLB))
+ handled = 1;
+ break;
+ }
+ }
+
+ /*
+ * Attempt to handle multiple conditions, but only return
+ * one. Ensure uncorrectable errors are first in the table
+ * to match.
+ */
+ if (found)
+ continue;
+
+ /* now fill in mce_error_info */
+ mce_err->error_type = table[i].error_type;
+ mce_err->error_class = table[i].error_class;
+ switch (table[i].error_type) {
+ case MCE_ERROR_TYPE_UE:
+ mce_err->u.ue_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_SLB:
+ mce_err->u.slb_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_ERAT:
+ mce_err->u.erat_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_TLB:
+ mce_err->u.tlb_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_USER:
+ mce_err->u.user_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_RA:
+ mce_err->u.ra_error_type = table[i].error_subtype;
+ break;
+ case MCE_ERROR_TYPE_LINK:
+ mce_err->u.link_error_type = table[i].error_subtype;
+ break;
+ }
+ mce_err->sync_error = table[i].sync_error;
+ mce_err->severity = table[i].severity;
+ mce_err->initiator = table[i].initiator;
+ if (table[i].dar_valid)
+ *addr = regs->dar;
+ else if (mce_err->sync_error && !mce_in_guest() &&
+ table[i].error_type == MCE_ERROR_TYPE_UE) {
+ /*
+ * We do a maximum of 4 nested MCE calls, see
+ * kernel/exception-64s.h
+ */
+ if (get_paca()->in_mce < MAX_MCE_DEPTH)
+ mce_find_instr_ea_and_phys(regs, addr,
+ phys_addr);
+ }
+ found = 1;
}
+
+ if (found)
+ return handled;
+
+ mce_err->error_type = MCE_ERROR_TYPE_UNKNOWN;
+ mce_err->error_class = MCE_ECLASS_UNKNOWN;
+ mce_err->severity = MCE_SEV_SEVERE;
+ mce_err->initiator = MCE_INITIATOR_CPU;
+ mce_err->sync_error = true;
+
+ return 0;
}
-static long mce_handle_ue_error(struct pt_regs *regs)
+static long mce_handle_ue_error(struct pt_regs *regs,
+ struct mce_error_info *mce_err)
{
- long handled = 0;
+ if (mce_in_guest())
+ return 0;
+
+ mce_common_process_ue(regs, mce_err);
+ if (mce_err->ignore_event)
+ return 1;
/*
* On specific SCOM read via MMIO we may get a machine check
@@ -211,103 +691,101 @@ static long mce_handle_ue_error(struct pt_regs *regs)
if (ppc_md.mce_check_early_recovery) {
if (ppc_md.mce_check_early_recovery(regs))
- handled = 1;
+ return 1;
}
- return handled;
+
+ return 0;
}
-long __machine_check_early_realmode_p7(struct pt_regs *regs)
+static long mce_handle_error(struct pt_regs *regs,
+ unsigned long srr1,
+ const struct mce_derror_table dtable[],
+ const struct mce_ierror_table itable[])
{
- uint64_t srr1, nip, addr;
- long handled = 1;
- struct mce_error_info mce_error_info = { 0 };
+ struct mce_error_info mce_err = { 0 };
+ uint64_t addr, phys_addr = ULONG_MAX;
+ long handled;
- srr1 = regs->msr;
- nip = regs->nip;
+ if (SRR1_MC_LOADSTORE(srr1))
+ handled = mce_handle_derror(regs, dtable, &mce_err, &addr,
+ &phys_addr);
+ else
+ handled = mce_handle_ierror(regs, srr1, itable, &mce_err, &addr,
+ &phys_addr);
- /*
- * Handle memory errors depending whether this was a load/store or
- * ifetch exception. Also, populate the mce error_type and
- * type-specific error_type from either SRR1 or DSISR, depending
- * whether this was a load/store or ifetch exception
- */
- if (P7_SRR1_MC_LOADSTORE(srr1)) {
- handled = mce_handle_derror_p7(regs->dsisr);
- mce_get_derror_p7(&mce_error_info, regs->dsisr);
- addr = regs->dar;
- } else {
- handled = mce_handle_ierror_p7(srr1);
- mce_get_ierror_p7(&mce_error_info, srr1);
- addr = regs->nip;
- }
+ if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE)
+ handled = mce_handle_ue_error(regs, &mce_err);
- /* Handle UE error. */
- if (mce_error_info.error_type == MCE_ERROR_TYPE_UE)
- handled = mce_handle_ue_error(regs);
+ save_mce_event(regs, handled, &mce_err, regs->nip, addr, phys_addr);
- save_mce_event(regs, handled, &mce_error_info, nip, addr);
return handled;
}
-static void mce_get_ierror_p8(struct mce_error_info *mce_err, uint64_t srr1)
+long __machine_check_early_realmode_p7(struct pt_regs *regs)
{
- mce_get_common_ierror(mce_err, srr1);
- if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) {
- mce_err->error_type = MCE_ERROR_TYPE_ERAT;
- mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
- }
+ /* P7 DD1 leaves top bits of DSISR undefined */
+ regs->dsisr &= 0x0000ffff;
+
+ return mce_handle_error(regs, regs->msr,
+ mce_p7_derror_table, mce_p7_ierror_table);
}
-static void mce_get_derror_p8(struct mce_error_info *mce_err, uint64_t dsisr)
+long __machine_check_early_realmode_p8(struct pt_regs *regs)
{
- mce_get_derror_p7(mce_err, dsisr);
- if (dsisr & P8_DSISR_MC_ERAT_MULTIHIT_SEC) {
- mce_err->error_type = MCE_ERROR_TYPE_ERAT;
- mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
- }
+ return mce_handle_error(regs, regs->msr,
+ mce_p8_derror_table, mce_p8_ierror_table);
}
-static long mce_handle_ierror_p8(uint64_t srr1)
+long __machine_check_early_realmode_p9(struct pt_regs *regs)
{
- long handled = 0;
+ unsigned long srr1 = regs->msr;
- handled = mce_handle_common_ierror(srr1);
+ /*
+ * On POWER9 DD2.1 and below, it's possible to get a machine check
+ * caused by a paste instruction where only DSISR bit 25 is set. This
+ * will result in the MCE handler seeing an unknown event and the kernel
+ * crashing. An MCE that occurs like this is spurious, so we don't need
+ * to do anything in terms of servicing it. If there is something that
+ * needs to be serviced, the CPU will raise the MCE again with the
+ * correct DSISR so that it can be serviced properly. So detect this
+ * case and mark it as handled.
+ */
+ if (SRR1_MC_LOADSTORE(regs->msr) && regs->dsisr == 0x02000000)
+ return 1;
- if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) {
- flush_and_reload_slb();
- handled = 1;
+ /*
+ * Async machine check due to bad real address from store or foreign
+ * link time out comes with the load/store bit (PPC bit 42) set in
+ * SRR1, but the cause comes in SRR1 not DSISR. Clear bit 42 so we're
+ * directed to the ierror table so it will find the cause (which
+ * describes it correctly as a store error).
+ */
+ if (SRR1_MC_LOADSTORE(srr1) &&
+ ((srr1 & 0x081c0000) == 0x08140000 ||
+ (srr1 & 0x081c0000) == 0x08180000)) {
+ srr1 &= ~PPC_BIT(42);
}
- return handled;
-}
-static long mce_handle_derror_p8(uint64_t dsisr)
-{
- return mce_handle_derror(dsisr, P8_DSISR_MC_SLB_ERRORS);
+ return mce_handle_error(regs, srr1,
+ mce_p9_derror_table, mce_p9_ierror_table);
}
-long __machine_check_early_realmode_p8(struct pt_regs *regs)
+long __machine_check_early_realmode_p10(struct pt_regs *regs)
{
- uint64_t srr1, nip, addr;
- long handled = 1;
- struct mce_error_info mce_error_info = { 0 };
-
- srr1 = regs->msr;
- nip = regs->nip;
-
- if (P7_SRR1_MC_LOADSTORE(srr1)) {
- handled = mce_handle_derror_p8(regs->dsisr);
- mce_get_derror_p8(&mce_error_info, regs->dsisr);
- addr = regs->dar;
- } else {
- handled = mce_handle_ierror_p8(srr1);
- mce_get_ierror_p8(&mce_error_info, srr1);
- addr = regs->nip;
- }
+ unsigned long srr1 = regs->msr;
- /* Handle UE error. */
- if (mce_error_info.error_type == MCE_ERROR_TYPE_UE)
- handled = mce_handle_ue_error(regs);
+ /*
+ * Async machine check due to bad real address from store comes with
+ * the load/store bit (PPC bit 42) set in SRR1, but the cause comes in
+ * SRR1 not DSISR. Clear bit 42 so we're directed to the ierror table
+ * so it will find the cause (which describes it correctly as a store
+ * error).
+ */
+ if (SRR1_MC_LOADSTORE(srr1) &&
+ (srr1 & 0x081c0000) == 0x08140000) {
+ srr1 &= ~PPC_BIT(42);
+ }
- save_mce_event(regs, handled, &mce_error_info, nip, addr);
- return handled;
+ return mce_handle_error(regs, srr1,
+ mce_p10_derror_table, mce_p10_ierror_table);
}
diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S
index 7ce26d45777e..29e1440d14cc 100644
--- a/arch/powerpc/kernel/misc.S
+++ b/arch/powerpc/kernel/misc.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* This file contains miscellaneous low-level functions.
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
@@ -8,12 +9,8 @@
* PPC64 updates by Dave Engebretsen (engebret@us.ibm.com)
*
* setjmp/longjmp code by Paul Mackerras.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
+#include <linux/export.h>
#include <asm/ppc_asm.h>
#include <asm/unistd.h>
#include <asm/asm-compat.h>
@@ -24,32 +21,23 @@
/*
* Returns (address we are running at) - (address we were linked at)
* for use before the text and data are mapped to KERNELBASE.
- */
-
-_GLOBAL(reloc_offset)
- mflr r0
- bl 1f
-1: mflr r3
- PPC_LL r4,(2f-1b)(r3)
- subf r3,r4,r3
- mtlr r0
- blr
- .align 3
-2: PPC_LONG 1b
-
-/*
* add_reloc_offset(x) returns x + reloc_offset().
*/
+
+_GLOBAL(reloc_offset)
+ li r3, 0
_GLOBAL(add_reloc_offset)
mflr r0
- bl 1f
+ bcl 20,31,$+4
1: mflr r5
PPC_LL r4,(2f-1b)(r5)
subf r5,r4,r5
add r3,r3,r5
mtlr r0
blr
+_ASM_NOKPROBE_SYMBOL(reloc_offset)
+_ASM_NOKPROBE_SYMBOL(add_reloc_offset)
.align 3
2: PPC_LONG 1b
@@ -59,6 +47,10 @@ _GLOBAL(setjmp)
PPC_STL r0,0(r3)
PPC_STL r1,SZL(r3)
PPC_STL r2,2*SZL(r3)
+#ifdef CONFIG_PPC32
+ mfcr r12
+ stmw r12, 3*SZL(r3)
+#else
mfcr r0
PPC_STL r0,3*SZL(r3)
PPC_STL r13,4*SZL(r3)
@@ -80,14 +72,16 @@ _GLOBAL(setjmp)
PPC_STL r29,20*SZL(r3)
PPC_STL r30,21*SZL(r3)
PPC_STL r31,22*SZL(r3)
+#endif
li r3,0
blr
_GLOBAL(longjmp)
- PPC_LCMPI r4,0
- bne 1f
- li r4,1
-1: PPC_LL r13,4*SZL(r3)
+#ifdef CONFIG_PPC32
+ lmw r12, 3*SZL(r3)
+ mtcrf 0x38, r12
+#else
+ PPC_LL r13,4*SZL(r3)
PPC_LL r14,5*SZL(r3)
PPC_LL r15,6*SZL(r3)
PPC_LL r16,7*SZL(r3)
@@ -108,9 +102,17 @@ _GLOBAL(longjmp)
PPC_LL r31,22*SZL(r3)
PPC_LL r0,3*SZL(r3)
mtcrf 0x38,r0
+#endif
PPC_LL r0,0(r3)
PPC_LL r1,SZL(r3)
PPC_LL r2,2*SZL(r3)
mtlr r0
- mr r3,r4
+ mr. r3, r4
+ bnelr
+ li r3, 1
+ blr
+
+_GLOBAL(current_stack_frame)
+ PPC_LL r3,0(r1)
blr
+EXPORT_SYMBOL(current_stack_frame)
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 7c6bb4b17b49..acb727f54e9d 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* This file contains miscellaneous low-level functions.
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
@@ -5,19 +6,9 @@
* Largely rewritten by Cort Dougan (cort@cs.nmt.edu)
* and Paul Mackerras.
*
- * kexec bits:
- * Copyright (C) 2002-2003 Eric Biederman <ebiederm@xmission.com>
- * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz
- * PPC44x port. Copyright (C) 2011, IBM Corporation
- * Author: Suzuki Poulose <suzuki@in.ibm.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
+#include <linux/export.h>
#include <linux/sys.h>
#include <asm/unistd.h>
#include <asm/errno.h>
@@ -30,95 +21,13 @@
#include <asm/thread_info.h>
#include <asm/asm-offsets.h>
#include <asm/processor.h>
-#include <asm/kexec.h>
#include <asm/bug.h>
#include <asm/ptrace.h>
+#include <asm/feature-fixups.h>
.text
/*
- * We store the saved ksp_limit in the unused part
- * of the STACK_FRAME_OVERHEAD
- */
-_GLOBAL(call_do_softirq)
- mflr r0
- stw r0,4(r1)
- lwz r10,THREAD+KSP_LIMIT(r2)
- addi r11,r3,THREAD_INFO_GAP
- stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
- mr r1,r3
- stw r10,8(r1)
- stw r11,THREAD+KSP_LIMIT(r2)
- bl __do_softirq
- lwz r10,8(r1)
- lwz r1,0(r1)
- lwz r0,4(r1)
- stw r10,THREAD+KSP_LIMIT(r2)
- mtlr r0
- blr
-
-/*
- * void call_do_irq(struct pt_regs *regs, struct thread_info *irqtp);
- */
-_GLOBAL(call_do_irq)
- mflr r0
- stw r0,4(r1)
- lwz r10,THREAD+KSP_LIMIT(r2)
- addi r11,r4,THREAD_INFO_GAP
- stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4)
- mr r1,r4
- stw r10,8(r1)
- stw r11,THREAD+KSP_LIMIT(r2)
- bl __do_irq
- lwz r10,8(r1)
- lwz r1,0(r1)
- lwz r0,4(r1)
- stw r10,THREAD+KSP_LIMIT(r2)
- mtlr r0
- blr
-
-/*
- * This returns the high 64 bits of the product of two 64-bit numbers.
- */
-_GLOBAL(mulhdu)
- cmpwi r6,0
- cmpwi cr1,r3,0
- mr r10,r4
- mulhwu r4,r4,r5
- beq 1f
- mulhwu r0,r10,r6
- mullw r7,r10,r5
- addc r7,r0,r7
- addze r4,r4
-1: beqlr cr1 /* all done if high part of A is 0 */
- mr r10,r3
- mullw r9,r3,r5
- mulhwu r3,r3,r5
- beq 2f
- mullw r0,r10,r6
- mulhwu r8,r10,r6
- addc r7,r0,r7
- adde r4,r4,r8
- addze r3,r3
-2: addc r4,r4,r9
- addze r3,r3
- blr
-
-/*
- * sub_reloc_offset(x) returns x - reloc_offset().
- */
-_GLOBAL(sub_reloc_offset)
- mflr r0
- bl 1f
-1: mflr r5
- lis r4,1b@ha
- addi r4,r4,1b@l
- subf r5,r4,r5
- subf r3,r5,r3
- mtlr r0
- blr
-
-/*
* reloc_got2 runs through the .got2 section adding an offset
* to each entry.
*/
@@ -132,7 +41,7 @@ _GLOBAL(reloc_got2)
srwi. r8,r8,2
beqlr
mtctr r8
- bl 1f
+ bcl 20,31,$+4
1: mflr r0
lis r4,1b@ha
addi r4,r4,1b@l
@@ -166,7 +75,7 @@ _GLOBAL(call_setup_cpu)
mtctr r5
bctr
-#if defined(CONFIG_CPU_FREQ_PMAC) && defined(CONFIG_6xx)
+#if defined(CONFIG_CPU_FREQ_PMAC) && defined(CONFIG_PPC_BOOK3S_32)
/* This gets called by via-pmu.c to switch the PLL selection
* on 750fx CPU. This function should really be moved to some
@@ -196,10 +105,13 @@ _GLOBAL(low_choose_750fx_pll)
or r4,r4,r5
mtspr SPRN_HID1,r4
+#ifdef CONFIG_SMP
/* Store new HID1 image */
- CURRENT_THREAD_INFO(r6, r1)
- lwz r6,TI_CPU(r6)
+ lwz r6,TASK_CPU(r2)
slwi r6,r6,2
+#else
+ li r6, 0
+#endif
addis r6,r6,nap_save_hid1@ha
stw r4,nap_save_hid1@l(r6)
@@ -236,303 +148,7 @@ _GLOBAL(low_choose_7447a_dfs)
mtmsr r7
blr
-#endif /* CONFIG_CPU_FREQ_PMAC && CONFIG_6xx */
-
-/*
- * complement mask on the msr then "or" some values on.
- * _nmask_and_or_msr(nmask, value_to_or)
- */
-_GLOBAL(_nmask_and_or_msr)
- mfmsr r0 /* Get current msr */
- andc r0,r0,r3 /* And off the bits set in r3 (first parm) */
- or r0,r0,r4 /* Or on the bits in r4 (second parm) */
- SYNC /* Some chip revs have problems here... */
- mtmsr r0 /* Update machine state */
- isync
- blr /* Done */
-
-#ifdef CONFIG_40x
-
-/*
- * Do an IO access in real mode
- */
-_GLOBAL(real_readb)
- mfmsr r7
- ori r0,r7,MSR_DR
- xori r0,r0,MSR_DR
- sync
- mtmsr r0
- sync
- isync
- lbz r3,0(r3)
- sync
- mtmsr r7
- sync
- isync
- blr
-
- /*
- * Do an IO access in real mode
- */
-_GLOBAL(real_writeb)
- mfmsr r7
- ori r0,r7,MSR_DR
- xori r0,r0,MSR_DR
- sync
- mtmsr r0
- sync
- isync
- stb r3,0(r4)
- sync
- mtmsr r7
- sync
- isync
- blr
-
-#endif /* CONFIG_40x */
-
-
-/*
- * Flush instruction cache.
- * This is a no-op on the 601.
- */
-_GLOBAL(flush_instruction_cache)
-#if defined(CONFIG_8xx)
- isync
- lis r5, IDC_INVALL@h
- mtspr SPRN_IC_CST, r5
-#elif defined(CONFIG_4xx)
-#ifdef CONFIG_403GCX
- li r3, 512
- mtctr r3
- lis r4, KERNELBASE@h
-1: iccci 0, r4
- addi r4, r4, 16
- bdnz 1b
-#else
- lis r3, KERNELBASE@h
- iccci 0,r3
-#endif
-#elif CONFIG_FSL_BOOKE
-BEGIN_FTR_SECTION
- mfspr r3,SPRN_L1CSR0
- ori r3,r3,L1CSR0_CFI|L1CSR0_CLFC
- /* msync; isync recommended here */
- mtspr SPRN_L1CSR0,r3
- isync
- blr
-END_FTR_SECTION_IFSET(CPU_FTR_UNIFIED_ID_CACHE)
- mfspr r3,SPRN_L1CSR1
- ori r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR
- mtspr SPRN_L1CSR1,r3
-#else
- mfspr r3,SPRN_PVR
- rlwinm r3,r3,16,16,31
- cmpwi 0,r3,1
- beqlr /* for 601, do nothing */
- /* 603/604 processor - use invalidate-all bit in HID0 */
- mfspr r3,SPRN_HID0
- ori r3,r3,HID0_ICFI
- mtspr SPRN_HID0,r3
-#endif /* CONFIG_8xx/4xx */
- isync
- blr
-
-/*
- * Write any modified data cache blocks out to memory
- * and invalidate the corresponding instruction cache blocks.
- * This is a no-op on the 601.
- *
- * flush_icache_range(unsigned long start, unsigned long stop)
- */
-_KPROBE(flush_icache_range)
-BEGIN_FTR_SECTION
- PURGE_PREFETCHED_INS
- blr /* for 601, do nothing */
-END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
- li r5,L1_CACHE_BYTES-1
- andc r3,r3,r5
- subf r4,r3,r4
- add r4,r4,r5
- srwi. r4,r4,L1_CACHE_SHIFT
- beqlr
- mtctr r4
- mr r6,r3
-1: dcbst 0,r3
- addi r3,r3,L1_CACHE_BYTES
- bdnz 1b
- sync /* wait for dcbst's to get to ram */
-#ifndef CONFIG_44x
- mtctr r4
-2: icbi 0,r6
- addi r6,r6,L1_CACHE_BYTES
- bdnz 2b
-#else
- /* Flash invalidate on 44x because we are passed kmapped addresses and
- this doesn't work for userspace pages due to the virtually tagged
- icache. Sigh. */
- iccci 0, r0
-#endif
- sync /* additional sync needed on g4 */
- isync
- blr
-/*
- * Write any modified data cache blocks out to memory.
- * Does not invalidate the corresponding cache lines (especially for
- * any corresponding instruction cache).
- *
- * clean_dcache_range(unsigned long start, unsigned long stop)
- */
-_GLOBAL(clean_dcache_range)
- li r5,L1_CACHE_BYTES-1
- andc r3,r3,r5
- subf r4,r3,r4
- add r4,r4,r5
- srwi. r4,r4,L1_CACHE_SHIFT
- beqlr
- mtctr r4
-
-1: dcbst 0,r3
- addi r3,r3,L1_CACHE_BYTES
- bdnz 1b
- sync /* wait for dcbst's to get to ram */
- blr
-
-/*
- * Write any modified data cache blocks out to memory and invalidate them.
- * Does not invalidate the corresponding instruction cache blocks.
- *
- * flush_dcache_range(unsigned long start, unsigned long stop)
- */
-_GLOBAL(flush_dcache_range)
- li r5,L1_CACHE_BYTES-1
- andc r3,r3,r5
- subf r4,r3,r4
- add r4,r4,r5
- srwi. r4,r4,L1_CACHE_SHIFT
- beqlr
- mtctr r4
-
-1: dcbf 0,r3
- addi r3,r3,L1_CACHE_BYTES
- bdnz 1b
- sync /* wait for dcbst's to get to ram */
- blr
-
-/*
- * Like above, but invalidate the D-cache. This is used by the 8xx
- * to invalidate the cache so the PPC core doesn't get stale data
- * from the CPM (no cache snooping here :-).
- *
- * invalidate_dcache_range(unsigned long start, unsigned long stop)
- */
-_GLOBAL(invalidate_dcache_range)
- li r5,L1_CACHE_BYTES-1
- andc r3,r3,r5
- subf r4,r3,r4
- add r4,r4,r5
- srwi. r4,r4,L1_CACHE_SHIFT
- beqlr
- mtctr r4
-
-1: dcbi 0,r3
- addi r3,r3,L1_CACHE_BYTES
- bdnz 1b
- sync /* wait for dcbi's to get to ram */
- blr
-
-/*
- * Flush a particular page from the data cache to RAM.
- * Note: this is necessary because the instruction cache does *not*
- * snoop from the data cache.
- * This is a no-op on the 601 which has a unified cache.
- *
- * void __flush_dcache_icache(void *page)
- */
-_GLOBAL(__flush_dcache_icache)
-BEGIN_FTR_SECTION
- PURGE_PREFETCHED_INS
- blr
-END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
- rlwinm r3,r3,0,0,31-PAGE_SHIFT /* Get page base address */
- li r4,PAGE_SIZE/L1_CACHE_BYTES /* Number of lines in a page */
- mtctr r4
- mr r6,r3
-0: dcbst 0,r3 /* Write line to ram */
- addi r3,r3,L1_CACHE_BYTES
- bdnz 0b
- sync
-#ifdef CONFIG_44x
- /* We don't flush the icache on 44x. Those have a virtual icache
- * and we don't have access to the virtual address here (it's
- * not the page vaddr but where it's mapped in user space). The
- * flushing of the icache on these is handled elsewhere, when
- * a change in the address space occurs, before returning to
- * user space
- */
-BEGIN_MMU_FTR_SECTION
- blr
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_44x)
-#endif /* CONFIG_44x */
- mtctr r4
-1: icbi 0,r6
- addi r6,r6,L1_CACHE_BYTES
- bdnz 1b
- sync
- isync
- blr
-
-#ifndef CONFIG_BOOKE
-/*
- * Flush a particular page from the data cache to RAM, identified
- * by its physical address. We turn off the MMU so we can just use
- * the physical address (this may be a highmem page without a kernel
- * mapping).
- *
- * void __flush_dcache_icache_phys(unsigned long physaddr)
- */
-_GLOBAL(__flush_dcache_icache_phys)
-BEGIN_FTR_SECTION
- PURGE_PREFETCHED_INS
- blr /* for 601, do nothing */
-END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
- mfmsr r10
- rlwinm r0,r10,0,28,26 /* clear DR */
- mtmsr r0
- isync
- rlwinm r3,r3,0,0,31-PAGE_SHIFT /* Get page base address */
- li r4,PAGE_SIZE/L1_CACHE_BYTES /* Number of lines in a page */
- mtctr r4
- mr r6,r3
-0: dcbst 0,r3 /* Write line to ram */
- addi r3,r3,L1_CACHE_BYTES
- bdnz 0b
- sync
- mtctr r4
-1: icbi 0,r6
- addi r6,r6,L1_CACHE_BYTES
- bdnz 1b
- sync
- mtmsr r10 /* restore DR */
- isync
- blr
-#endif /* CONFIG_BOOKE */
-
-/*
- * Clear pages using the dcbz instruction, which doesn't cause any
- * memory traffic (except to write out any cache lines which get
- * displaced). This only works on cacheable memory.
- *
- * void clear_pages(void *page, int order) ;
- */
-_GLOBAL(clear_pages)
- li r0,PAGE_SIZE/L1_CACHE_BYTES
- slw r0,r0,r4
- mtctr r0
-1: dcbz 0,r3
- addi r3,r3,L1_CACHE_BYTES
- bdnz 1b
- blr
+#endif /* CONFIG_CPU_FREQ_PMAC && CONFIG_PPC_BOOK3S_32 */
/*
* Copy a whole page. We use the dcbz instruction on the destination
@@ -551,7 +167,12 @@ _GLOBAL(clear_pages)
stwu r9,16(r3)
_GLOBAL(copy_page)
+ rlwinm r5, r3, 0, L1_CACHE_BYTES - 1
addi r3,r3,-4
+
+0: twnei r5, 0 /* WARN if r3 is not cache aligned */
+ EMIT_WARN_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
+
addi r4,r4,-4
li r5,4
@@ -594,25 +215,7 @@ _GLOBAL(copy_page)
li r0,MAX_COPY_PREFETCH
li r11,4
b 2b
-
-/*
- * void atomic_clear_mask(atomic_t mask, atomic_t *addr)
- * void atomic_set_mask(atomic_t mask, atomic_t *addr);
- */
-_GLOBAL(atomic_clear_mask)
-10: lwarx r5,0,r4
- andc r5,r5,r3
- PPC405_ERR77(0,r4)
- stwcx. r5,0,r4
- bne- 10b
- blr
-_GLOBAL(atomic_set_mask)
-10: lwarx r5,0,r4
- or r5,r5,r3
- PPC405_ERR77(0,r4)
- stwcx. r5,0,r4
- bne- 10b
- blr
+EXPORT_SYMBOL(copy_page)
/*
* Extended precision shifts.
@@ -640,6 +243,7 @@ _GLOBAL(__ashrdi3)
sraw r3,r3,r5 # MSW = MSW >> count
or r4,r4,r7 # LSW |= t2
blr
+EXPORT_SYMBOL(__ashrdi3)
_GLOBAL(__ashldi3)
subfic r6,r5,32
@@ -651,6 +255,7 @@ _GLOBAL(__ashldi3)
slw r4,r4,r5 # LSW = LSW << count
or r3,r3,r7 # MSW |= t2
blr
+EXPORT_SYMBOL(__ashldi3)
_GLOBAL(__lshrdi3)
subfic r6,r5,32
@@ -662,6 +267,7 @@ _GLOBAL(__lshrdi3)
srw r3,r3,r5 # MSW = MSW >> count
or r4,r4,r7 # LSW |= t2
blr
+EXPORT_SYMBOL(__lshrdi3)
/*
* 64-bit comparison: __cmpdi2(s64 a, s64 b)
@@ -677,6 +283,7 @@ _GLOBAL(__cmpdi2)
bltlr
li r3,2
blr
+EXPORT_SYMBOL(__cmpdi2)
/*
* 64-bit comparison: __ucmpdi2(u64 a, u64 b)
* Returns 0 if a < b, 1 if a == b, 2 if a > b.
@@ -691,6 +298,7 @@ _GLOBAL(__ucmpdi2)
bltlr
li r3,2
blr
+EXPORT_SYMBOL(__ucmpdi2)
_GLOBAL(__bswapdi2)
rotlwi r9,r4,8
@@ -702,509 +310,15 @@ _GLOBAL(__bswapdi2)
mr r3,r9
mr r4,r10
blr
-
-_GLOBAL(abs)
- srawi r4,r3,31
- xor r3,r3,r4
- sub r3,r3,r4
- blr
+EXPORT_SYMBOL(__bswapdi2)
#ifdef CONFIG_SMP
_GLOBAL(start_secondary_resume)
/* Reset stack */
- CURRENT_THREAD_INFO(r1, r1)
- addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
+ rlwinm r1, r1, 0, 0, 31 - THREAD_SHIFT
+ addi r1,r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE
li r3,0
stw r3,0(r1) /* Zero the stack frame pointer */
bl start_secondary
b .
#endif /* CONFIG_SMP */
-
-/*
- * This routine is just here to keep GCC happy - sigh...
- */
-_GLOBAL(__main)
- blr
-
-#ifdef CONFIG_KEXEC
- /*
- * Must be relocatable PIC code callable as a C function.
- */
- .globl relocate_new_kernel
-relocate_new_kernel:
- /* r3 = page_list */
- /* r4 = reboot_code_buffer */
- /* r5 = start_address */
-
-#ifdef CONFIG_FSL_BOOKE
-
- mr r29, r3
- mr r30, r4
- mr r31, r5
-
-#define ENTRY_MAPPING_KEXEC_SETUP
-#include "fsl_booke_entry_mapping.S"
-#undef ENTRY_MAPPING_KEXEC_SETUP
-
- mr r3, r29
- mr r4, r30
- mr r5, r31
-
- li r0, 0
-#elif defined(CONFIG_44x)
-
- /* Save our parameters */
- mr r29, r3
- mr r30, r4
- mr r31, r5
-
-#ifdef CONFIG_PPC_47x
- /* Check for 47x cores */
- mfspr r3,SPRN_PVR
- srwi r3,r3,16
- cmplwi cr0,r3,PVR_476@h
- beq setup_map_47x
- cmplwi cr0,r3,PVR_476_ISS@h
- beq setup_map_47x
-#endif /* CONFIG_PPC_47x */
-
-/*
- * Code for setting up 1:1 mapping for PPC440x for KEXEC
- *
- * We cannot switch off the MMU on PPC44x.
- * So we:
- * 1) Invalidate all the mappings except the one we are running from.
- * 2) Create a tmp mapping for our code in the other address space(TS) and
- * jump to it. Invalidate the entry we started in.
- * 3) Create a 1:1 mapping for 0-2GiB in chunks of 256M in original TS.
- * 4) Jump to the 1:1 mapping in original TS.
- * 5) Invalidate the tmp mapping.
- *
- * - Based on the kexec support code for FSL BookE
- *
- */
-
- /*
- * Load the PID with kernel PID (0).
- * Also load our MSR_IS and TID to MMUCR for TLB search.
- */
- li r3, 0
- mtspr SPRN_PID, r3
- mfmsr r4
- andi. r4,r4,MSR_IS@l
- beq wmmucr
- oris r3,r3,PPC44x_MMUCR_STS@h
-wmmucr:
- mtspr SPRN_MMUCR,r3
- sync
-
- /*
- * Invalidate all the TLB entries except the current entry
- * where we are running from
- */
- bl 0f /* Find our address */
-0: mflr r5 /* Make it accessible */
- tlbsx r23,0,r5 /* Find entry we are in */
- li r4,0 /* Start at TLB entry 0 */
- li r3,0 /* Set PAGEID inval value */
-1: cmpw r23,r4 /* Is this our entry? */
- beq skip /* If so, skip the inval */
- tlbwe r3,r4,PPC44x_TLB_PAGEID /* If not, inval the entry */
-skip:
- addi r4,r4,1 /* Increment */
- cmpwi r4,64 /* Are we done? */
- bne 1b /* If not, repeat */
- isync
-
- /* Create a temp mapping and jump to it */
- andi. r6, r23, 1 /* Find the index to use */
- addi r24, r6, 1 /* r24 will contain 1 or 2 */
-
- mfmsr r9 /* get the MSR */
- rlwinm r5, r9, 27, 31, 31 /* Extract the MSR[IS] */
- xori r7, r5, 1 /* Use the other address space */
-
- /* Read the current mapping entries */
- tlbre r3, r23, PPC44x_TLB_PAGEID
- tlbre r4, r23, PPC44x_TLB_XLAT
- tlbre r5, r23, PPC44x_TLB_ATTRIB
-
- /* Save our current XLAT entry */
- mr r25, r4
-
- /* Extract the TLB PageSize */
- li r10, 1 /* r10 will hold PageSize */
- rlwinm r11, r3, 0, 24, 27 /* bits 24-27 */
-
- /* XXX: As of now we use 256M, 4K pages */
- cmpwi r11, PPC44x_TLB_256M
- bne tlb_4k
- rotlwi r10, r10, 28 /* r10 = 256M */
- b write_out
-tlb_4k:
- cmpwi r11, PPC44x_TLB_4K
- bne default
- rotlwi r10, r10, 12 /* r10 = 4K */
- b write_out
-default:
- rotlwi r10, r10, 10 /* r10 = 1K */
-
-write_out:
- /*
- * Write out the tmp 1:1 mapping for this code in other address space
- * Fixup EPN = RPN , TS=other address space
- */
- insrwi r3, r7, 1, 23 /* Bit 23 is TS for PAGEID field */
-
- /* Write out the tmp mapping entries */
- tlbwe r3, r24, PPC44x_TLB_PAGEID
- tlbwe r4, r24, PPC44x_TLB_XLAT
- tlbwe r5, r24, PPC44x_TLB_ATTRIB
-
- subi r11, r10, 1 /* PageOffset Mask = PageSize - 1 */
- not r10, r11 /* Mask for PageNum */
-
- /* Switch to other address space in MSR */
- insrwi r9, r7, 1, 26 /* Set MSR[IS] = r7 */
-
- bl 1f
-1: mflr r8
- addi r8, r8, (2f-1b) /* Find the target offset */
-
- /* Jump to the tmp mapping */
- mtspr SPRN_SRR0, r8
- mtspr SPRN_SRR1, r9
- rfi
-
-2:
- /* Invalidate the entry we were executing from */
- li r3, 0
- tlbwe r3, r23, PPC44x_TLB_PAGEID
-
- /* attribute fields. rwx for SUPERVISOR mode */
- li r5, 0
- ori r5, r5, (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G)
-
- /* Create 1:1 mapping in 256M pages */
- xori r7, r7, 1 /* Revert back to Original TS */
-
- li r8, 0 /* PageNumber */
- li r6, 3 /* TLB Index, start at 3 */
-
-next_tlb:
- rotlwi r3, r8, 28 /* Create EPN (bits 0-3) */
- mr r4, r3 /* RPN = EPN */
- ori r3, r3, (PPC44x_TLB_VALID | PPC44x_TLB_256M) /* SIZE = 256M, Valid */
- insrwi r3, r7, 1, 23 /* Set TS from r7 */
-
- tlbwe r3, r6, PPC44x_TLB_PAGEID /* PageID field : EPN, V, SIZE */
- tlbwe r4, r6, PPC44x_TLB_XLAT /* Address translation : RPN */
- tlbwe r5, r6, PPC44x_TLB_ATTRIB /* Attributes */
-
- addi r8, r8, 1 /* Increment PN */
- addi r6, r6, 1 /* Increment TLB Index */
- cmpwi r8, 8 /* Are we done ? */
- bne next_tlb
- isync
-
- /* Jump to the new mapping 1:1 */
- li r9,0
- insrwi r9, r7, 1, 26 /* Set MSR[IS] = r7 */
-
- bl 1f
-1: mflr r8
- and r8, r8, r11 /* Get our offset within page */
- addi r8, r8, (2f-1b)
-
- and r5, r25, r10 /* Get our target PageNum */
- or r8, r8, r5 /* Target jump address */
-
- mtspr SPRN_SRR0, r8
- mtspr SPRN_SRR1, r9
- rfi
-2:
- /* Invalidate the tmp entry we used */
- li r3, 0
- tlbwe r3, r24, PPC44x_TLB_PAGEID
- sync
- b ppc44x_map_done
-
-#ifdef CONFIG_PPC_47x
-
- /* 1:1 mapping for 47x */
-
-setup_map_47x:
-
- /*
- * Load the kernel pid (0) to PID and also to MMUCR[TID].
- * Also set the MSR IS->MMUCR STS
- */
- li r3, 0
- mtspr SPRN_PID, r3 /* Set PID */
- mfmsr r4 /* Get MSR */
- andi. r4, r4, MSR_IS@l /* TS=1? */
- beq 1f /* If not, leave STS=0 */
- oris r3, r3, PPC47x_MMUCR_STS@h /* Set STS=1 */
-1: mtspr SPRN_MMUCR, r3 /* Put MMUCR */
- sync
-
- /* Find the entry we are running from */
- bl 2f
-2: mflr r23
- tlbsx r23, 0, r23
- tlbre r24, r23, 0 /* TLB Word 0 */
- tlbre r25, r23, 1 /* TLB Word 1 */
- tlbre r26, r23, 2 /* TLB Word 2 */
-
-
- /*
- * Invalidates all the tlb entries by writing to 256 RPNs(r4)
- * of 4k page size in all 4 ways (0-3 in r3).
- * This would invalidate the entire UTLB including the one we are
- * running from. However the shadow TLB entries would help us
- * to continue the execution, until we flush them (rfi/isync).
- */
- addis r3, 0, 0x8000 /* specify the way */
- addi r4, 0, 0 /* TLB Word0 = (EPN=0, VALID = 0) */
- addi r5, 0, 0
- b clear_utlb_entry
-
- /* Align the loop to speed things up. from head_44x.S */
- .align 6
-
-clear_utlb_entry:
-
- tlbwe r4, r3, 0
- tlbwe r5, r3, 1
- tlbwe r5, r3, 2
- addis r3, r3, 0x2000 /* Increment the way */
- cmpwi r3, 0
- bne clear_utlb_entry
- addis r3, 0, 0x8000
- addis r4, r4, 0x100 /* Increment the EPN */
- cmpwi r4, 0
- bne clear_utlb_entry
-
- /* Create the entries in the other address space */
- mfmsr r5
- rlwinm r7, r5, 27, 31, 31 /* Get the TS (Bit 26) from MSR */
- xori r7, r7, 1 /* r7 = !TS */
-
- insrwi r24, r7, 1, 21 /* Change the TS in the saved TLB word 0 */
-
- /*
- * write out the TLB entries for the tmp mapping
- * Use way '0' so that we could easily invalidate it later.
- */
- lis r3, 0x8000 /* Way '0' */
-
- tlbwe r24, r3, 0
- tlbwe r25, r3, 1
- tlbwe r26, r3, 2
-
- /* Update the msr to the new TS */
- insrwi r5, r7, 1, 26
-
- bl 1f
-1: mflr r6
- addi r6, r6, (2f-1b)
-
- mtspr SPRN_SRR0, r6
- mtspr SPRN_SRR1, r5
- rfi
-
- /*
- * Now we are in the tmp address space.
- * Create a 1:1 mapping for 0-2GiB in the original TS.
- */
-2:
- li r3, 0
- li r4, 0 /* TLB Word 0 */
- li r5, 0 /* TLB Word 1 */
- li r6, 0
- ori r6, r6, PPC47x_TLB2_S_RWX /* TLB word 2 */
-
- li r8, 0 /* PageIndex */
-
- xori r7, r7, 1 /* revert back to original TS */
-
-write_utlb:
- rotlwi r5, r8, 28 /* RPN = PageIndex * 256M */
- /* ERPN = 0 as we don't use memory above 2G */
-
- mr r4, r5 /* EPN = RPN */
- ori r4, r4, (PPC47x_TLB0_VALID | PPC47x_TLB0_256M)
- insrwi r4, r7, 1, 21 /* Insert the TS to Word 0 */
-
- tlbwe r4, r3, 0 /* Write out the entries */
- tlbwe r5, r3, 1
- tlbwe r6, r3, 2
- addi r8, r8, 1
- cmpwi r8, 8 /* Have we completed ? */
- bne write_utlb
-
- /* make sure we complete the TLB write up */
- isync
-
- /*
- * Prepare to jump to the 1:1 mapping.
- * 1) Extract page size of the tmp mapping
- * DSIZ = TLB_Word0[22:27]
- * 2) Calculate the physical address of the address
- * to jump to.
- */
- rlwinm r10, r24, 0, 22, 27
-
- cmpwi r10, PPC47x_TLB0_4K
- bne 0f
- li r10, 0x1000 /* r10 = 4k */
- bl 1f
-
-0:
- /* Defaults to 256M */
- lis r10, 0x1000
-
- bl 1f
-1: mflr r4
- addi r4, r4, (2f-1b) /* virtual address of 2f */
-
- subi r11, r10, 1 /* offsetmask = Pagesize - 1 */
- not r10, r11 /* Pagemask = ~(offsetmask) */
-
- and r5, r25, r10 /* Physical page */
- and r6, r4, r11 /* offset within the current page */
-
- or r5, r5, r6 /* Physical address for 2f */
-
- /* Switch the TS in MSR to the original one */
- mfmsr r8
- insrwi r8, r7, 1, 26
-
- mtspr SPRN_SRR1, r8
- mtspr SPRN_SRR0, r5
- rfi
-
-2:
- /* Invalidate the tmp mapping */
- lis r3, 0x8000 /* Way '0' */
-
- clrrwi r24, r24, 12 /* Clear the valid bit */
- tlbwe r24, r3, 0
- tlbwe r25, r3, 1
- tlbwe r26, r3, 2
-
- /* Make sure we complete the TLB write and flush the shadow TLB */
- isync
-
-#endif
-
-ppc44x_map_done:
-
-
- /* Restore the parameters */
- mr r3, r29
- mr r4, r30
- mr r5, r31
-
- li r0, 0
-#else
- li r0, 0
-
- /*
- * Set Machine Status Register to a known status,
- * switch the MMU off and jump to 1: in a single step.
- */
-
- mr r8, r0
- ori r8, r8, MSR_RI|MSR_ME
- mtspr SPRN_SRR1, r8
- addi r8, r4, 1f - relocate_new_kernel
- mtspr SPRN_SRR0, r8
- sync
- rfi
-
-1:
-#endif
- /* from this point address translation is turned off */
- /* and interrupts are disabled */
-
- /* set a new stack at the bottom of our page... */
- /* (not really needed now) */
- addi r1, r4, KEXEC_CONTROL_PAGE_SIZE - 8 /* for LR Save+Back Chain */
- stw r0, 0(r1)
-
- /* Do the copies */
- li r6, 0 /* checksum */
- mr r0, r3
- b 1f
-
-0: /* top, read another word for the indirection page */
- lwzu r0, 4(r3)
-
-1:
- /* is it a destination page? (r8) */
- rlwinm. r7, r0, 0, 31, 31 /* IND_DESTINATION (1<<0) */
- beq 2f
-
- rlwinm r8, r0, 0, 0, 19 /* clear kexec flags, page align */
- b 0b
-
-2: /* is it an indirection page? (r3) */
- rlwinm. r7, r0, 0, 30, 30 /* IND_INDIRECTION (1<<1) */
- beq 2f
-
- rlwinm r3, r0, 0, 0, 19 /* clear kexec flags, page align */
- subi r3, r3, 4
- b 0b
-
-2: /* are we done? */
- rlwinm. r7, r0, 0, 29, 29 /* IND_DONE (1<<2) */
- beq 2f
- b 3f
-
-2: /* is it a source page? (r9) */
- rlwinm. r7, r0, 0, 28, 28 /* IND_SOURCE (1<<3) */
- beq 0b
-
- rlwinm r9, r0, 0, 0, 19 /* clear kexec flags, page align */
-
- li r7, PAGE_SIZE / 4
- mtctr r7
- subi r9, r9, 4
- subi r8, r8, 4
-9:
- lwzu r0, 4(r9) /* do the copy */
- xor r6, r6, r0
- stwu r0, 4(r8)
- dcbst 0, r8
- sync
- icbi 0, r8
- bdnz 9b
-
- addi r9, r9, 4
- addi r8, r8, 4
- b 0b
-
-3:
-
- /* To be certain of avoiding problems with self-modifying code
- * execute a serializing instruction here.
- */
- isync
- sync
-
- mfspr r3, SPRN_PIR /* current core we are running on */
- mr r4, r5 /* load physical address of chunk called */
-
- /* jump to the entry point, usually the setup routine */
- mtlr r5
- blrl
-
-1: b 1b
-
-relocate_new_kernel_end:
-
- .globl relocate_new_kernel_size
-relocate_new_kernel_size:
- .long relocate_new_kernel_end - relocate_new_kernel
-#endif
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 4e314b90c75d..a997c7f43dc0 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* This file contains miscellaneous low-level functions.
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
@@ -6,14 +7,10 @@
* and Paul Mackerras.
* Adapted for iSeries by Mike Corrigan (mikejc@us.ibm.com)
* PPC64 updates by Dave Engebretsen (engebret@us.ibm.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
+#include <linux/export.h>
+#include <linux/linkage.h>
#include <linux/sys.h>
#include <asm/unistd.h>
#include <asm/errno.h>
@@ -26,221 +23,13 @@
#include <asm/thread_info.h>
#include <asm/kexec.h>
#include <asm/ptrace.h>
+#include <asm/mmu.h>
+#include <asm/feature-fixups.h>
.text
-_GLOBAL(call_do_softirq)
- mflr r0
- std r0,16(r1)
- stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
- mr r1,r3
- bl __do_softirq
- ld r1,0(r1)
- ld r0,16(r1)
- mtlr r0
- blr
-
-_GLOBAL(call_do_irq)
- mflr r0
- std r0,16(r1)
- stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4)
- mr r1,r4
- bl __do_irq
- ld r1,0(r1)
- ld r0,16(r1)
- mtlr r0
- blr
-
- .section ".toc","aw"
-PPC64_CACHES:
- .tc ppc64_caches[TC],ppc64_caches
- .section ".text"
-
-/*
- * Write any modified data cache blocks out to memory
- * and invalidate the corresponding instruction cache blocks.
- *
- * flush_icache_range(unsigned long start, unsigned long stop)
- *
- * flush all bytes from start through stop-1 inclusive
- */
-
-_KPROBE(flush_icache_range)
-BEGIN_FTR_SECTION
- PURGE_PREFETCHED_INS
- blr
-END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
-/*
- * Flush the data cache to memory
- *
- * Different systems have different cache line sizes
- * and in some cases i-cache and d-cache line sizes differ from
- * each other.
- */
- ld r10,PPC64_CACHES@toc(r2)
- lwz r7,DCACHEL1LINESIZE(r10)/* Get cache line size */
- addi r5,r7,-1
- andc r6,r3,r5 /* round low to line bdy */
- subf r8,r6,r4 /* compute length */
- add r8,r8,r5 /* ensure we get enough */
- lwz r9,DCACHEL1LOGLINESIZE(r10) /* Get log-2 of cache line size */
- srw. r8,r8,r9 /* compute line count */
- beqlr /* nothing to do? */
- mtctr r8
-1: dcbst 0,r6
- add r6,r6,r7
- bdnz 1b
- sync
-
-/* Now invalidate the instruction cache */
-
- lwz r7,ICACHEL1LINESIZE(r10) /* Get Icache line size */
- addi r5,r7,-1
- andc r6,r3,r5 /* round low to line bdy */
- subf r8,r6,r4 /* compute length */
- add r8,r8,r5
- lwz r9,ICACHEL1LOGLINESIZE(r10) /* Get log-2 of Icache line size */
- srw. r8,r8,r9 /* compute line count */
- beqlr /* nothing to do? */
- mtctr r8
-2: icbi 0,r6
- add r6,r6,r7
- bdnz 2b
- isync
- blr
- .previous .text
-/*
- * Like above, but only do the D-cache.
- *
- * flush_dcache_range(unsigned long start, unsigned long stop)
- *
- * flush all bytes from start to stop-1 inclusive
- */
-_GLOBAL(flush_dcache_range)
-
-/*
- * Flush the data cache to memory
- *
- * Different systems have different cache line sizes
- */
- ld r10,PPC64_CACHES@toc(r2)
- lwz r7,DCACHEL1LINESIZE(r10) /* Get dcache line size */
- addi r5,r7,-1
- andc r6,r3,r5 /* round low to line bdy */
- subf r8,r6,r4 /* compute length */
- add r8,r8,r5 /* ensure we get enough */
- lwz r9,DCACHEL1LOGLINESIZE(r10) /* Get log-2 of dcache line size */
- srw. r8,r8,r9 /* compute line count */
- beqlr /* nothing to do? */
- mtctr r8
-0: dcbst 0,r6
- add r6,r6,r7
- bdnz 0b
- sync
- blr
-
-/*
- * Like above, but works on non-mapped physical addresses.
- * Use only for non-LPAR setups ! It also assumes real mode
- * is cacheable. Used for flushing out the DART before using
- * it as uncacheable memory
- *
- * flush_dcache_phys_range(unsigned long start, unsigned long stop)
- *
- * flush all bytes from start to stop-1 inclusive
- */
-_GLOBAL(flush_dcache_phys_range)
- ld r10,PPC64_CACHES@toc(r2)
- lwz r7,DCACHEL1LINESIZE(r10) /* Get dcache line size */
- addi r5,r7,-1
- andc r6,r3,r5 /* round low to line bdy */
- subf r8,r6,r4 /* compute length */
- add r8,r8,r5 /* ensure we get enough */
- lwz r9,DCACHEL1LOGLINESIZE(r10) /* Get log-2 of dcache line size */
- srw. r8,r8,r9 /* compute line count */
- beqlr /* nothing to do? */
- mfmsr r5 /* Disable MMU Data Relocation */
- ori r0,r5,MSR_DR
- xori r0,r0,MSR_DR
- sync
- mtmsr r0
- sync
- isync
- mtctr r8
-0: dcbst 0,r6
- add r6,r6,r7
- bdnz 0b
- sync
- isync
- mtmsr r5 /* Re-enable MMU Data Relocation */
- sync
- isync
- blr
-
-_GLOBAL(flush_inval_dcache_range)
- ld r10,PPC64_CACHES@toc(r2)
- lwz r7,DCACHEL1LINESIZE(r10) /* Get dcache line size */
- addi r5,r7,-1
- andc r6,r3,r5 /* round low to line bdy */
- subf r8,r6,r4 /* compute length */
- add r8,r8,r5 /* ensure we get enough */
- lwz r9,DCACHEL1LOGLINESIZE(r10)/* Get log-2 of dcache line size */
- srw. r8,r8,r9 /* compute line count */
- beqlr /* nothing to do? */
- sync
- isync
- mtctr r8
-0: dcbf 0,r6
- add r6,r6,r7
- bdnz 0b
- sync
- isync
- blr
-
-
-/*
- * Flush a particular page from the data cache to RAM.
- * Note: this is necessary because the instruction cache does *not*
- * snoop from the data cache.
- *
- * void __flush_dcache_icache(void *page)
- */
-_GLOBAL(__flush_dcache_icache)
-/*
- * Flush the data cache to memory
- *
- * Different systems have different cache line sizes
- */
-
-BEGIN_FTR_SECTION
- PURGE_PREFETCHED_INS
- blr
-END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
-
-/* Flush the dcache */
- ld r7,PPC64_CACHES@toc(r2)
- clrrdi r3,r3,PAGE_SHIFT /* Page align */
- lwz r4,DCACHEL1LINESPERPAGE(r7) /* Get # dcache lines per page */
- lwz r5,DCACHEL1LINESIZE(r7) /* Get dcache line size */
- mr r6,r3
- mtctr r4
-0: dcbst 0,r6
- add r6,r6,r5
- bdnz 0b
- sync
-
-/* Now invalidate the icache */
-
- lwz r4,ICACHEL1LINESPERPAGE(r7) /* Get # icache lines per page */
- lwz r5,ICACHEL1LINESIZE(r7) /* Get icache line size */
- mtctr r4
-1: icbi 0,r3
- add r3,r3,r5
- bdnz 1b
- isync
- blr
-
_GLOBAL(__bswapdi2)
+EXPORT_SYMBOL(__bswapdi2)
srdi r8,r3,32
rlwinm r7,r3,8,0xffffffff
rlwimi r7,r3,24,0,7
@@ -285,7 +74,7 @@ _GLOBAL(rmci_off)
blr
#endif /* CONFIG_PPC_EARLY_DEBUG_BOOTX */
-#if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE)
+#ifdef CONFIG_PPC_PMAC
/*
* Do an IO access in real mode
@@ -348,7 +137,7 @@ _GLOBAL(real_writeb)
sync
isync
blr
-#endif /* defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) */
+#endif // CONFIG_PPC_PMAC
#ifdef CONFIG_PPC_PASEMI
@@ -385,7 +174,7 @@ _GLOBAL(real_205_writeb)
#endif /* CONFIG_PPC_PASEMI */
-#if defined(CONFIG_CPU_FREQ_PMAC64) || defined(CONFIG_CPU_FREQ_MAPLE)
+#ifdef CONFIG_CPU_FREQ_PMAC64
/*
* SCOM access functions for 970 (FX only for now)
*
@@ -403,7 +192,7 @@ _GLOBAL(scom970_read)
xori r0,r0,MSR_EE
mtmsrd r0,1
- /* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits
+ /* rotate 24 bits SCOM address 8 bits left and mask out its low 8 bits
* (including parity). On current CPUs they must be 0'd,
* and finally or in RW bit
*/
@@ -437,7 +226,7 @@ _GLOBAL(scom970_write)
xori r0,r0,MSR_EE
mtmsrd r0,1
- /* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits
+ /* rotate 24 bits SCOM address 8 bits left and mask out its low 8 bits
* (including parity). On current CPUs they must be 0'd.
*/
@@ -454,7 +243,7 @@ _GLOBAL(scom970_write)
/* restore interrupts */
mtmsrd r5,1
blr
-#endif /* CONFIG_CPU_FREQ_PMAC64 || CONFIG_CPU_FREQ_MAPLE */
+#endif // CONFIG_CPU_FREQ_PMAC64
/* kexec_wait(phys_cpu)
*
@@ -467,17 +256,28 @@ _GLOBAL(scom970_write)
* Physical (hardware) cpu id should be in r3.
*/
_GLOBAL(kexec_wait)
- bl 1f
+ bcl 20,31,$+4
1: mflr r5
addi r5,r5,kexec_flag-1b
99: HMT_LOW
-#ifdef CONFIG_KEXEC /* use no memory without kexec */
+#ifdef CONFIG_KEXEC_CORE /* use no memory without kexec */
lwz r4,0(r5)
cmpwi 0,r4,0
- bnea 0x60
+ beq 99b
+#ifdef CONFIG_PPC_BOOK3S_64
+ li r10,0x60
+ mfmsr r11
+ clrrdi r11,r11,1 /* Clear MSR_LE */
+ mtsrr0 r10
+ mtsrr1 r11
+ rfid
+#else
+ /* Create TLB entry in book3e_secondary_core_init */
+ li r4,0
+ ba 0x60
+#endif
#endif
- b 99b
/* this can be in text because we won't change it until we are
* running in real anyways
@@ -486,7 +286,47 @@ kexec_flag:
.long 0
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
+#ifdef CONFIG_PPC_BOOK3E_64
+/*
+ * BOOK3E has no real MMU mode, so we have to setup the initial TLB
+ * for a core to identity map v:0 to p:0. This current implementation
+ * assumes that 1G is enough for kexec.
+ */
+kexec_create_tlb:
+ /*
+ * Invalidate all non-IPROT TLB entries to avoid any TLB conflict.
+ * IPROT TLB entries should be >= PAGE_OFFSET and thus not conflict.
+ */
+ PPC_TLBILX_ALL(0,R0)
+ sync
+ isync
+
+ mfspr r10,SPRN_TLB1CFG
+ andi. r10,r10,TLBnCFG_N_ENTRY /* Extract # entries */
+ subi r10,r10,1 /* Last entry: no conflict with kernel text */
+ lis r9,MAS0_TLBSEL(1)@h
+ rlwimi r9,r10,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r9) */
+
+/* Set up a temp identity mapping v:0 to p:0 and return to it. */
+ mtspr SPRN_MAS0,r9
+
+ lis r9,(MAS1_VALID|MAS1_IPROT)@h
+ ori r9,r9,(MAS1_TSIZE(BOOK3E_PAGESZ_1GB))@l
+ mtspr SPRN_MAS1,r9
+
+ LOAD_REG_IMMEDIATE(r9, 0x0 | MAS2_M_IF_NEEDED)
+ mtspr SPRN_MAS2,r9
+
+ LOAD_REG_IMMEDIATE(r9, 0x0 | MAS3_SR | MAS3_SW | MAS3_SX)
+ mtspr SPRN_MAS3,r9
+ li r9,0
+ mtspr SPRN_MAS7,r9
+
+ tlbwe
+ isync
+ blr
+#endif
/* kexec_smp_wait(void)
*
@@ -504,7 +344,6 @@ _GLOBAL(kexec_smp_wait)
li r4,KEXEC_STATE_REAL_MODE
stb r4,PACAKEXECSTATE(r13)
- SYNC
b kexec_wait
@@ -515,7 +354,11 @@ _GLOBAL(kexec_smp_wait)
*
* don't overwrite r3 here, it is live for kexec_wait above.
*/
-real_mode: /* assume normal blr return */
+SYM_FUNC_START_LOCAL(real_mode) /* assume normal blr return */
+#ifdef CONFIG_PPC_BOOK3E_64
+ /* Create an identity mapping. */
+ b kexec_create_tlb
+#else
1: li r9,MSR_RI
li r10,MSR_DR|MSR_IR
mflr r11 /* return address to SRR0 */
@@ -527,10 +370,12 @@ real_mode: /* assume normal blr return */
mtspr SPRN_SRR1,r10
mtspr SPRN_SRR0,r11
rfid
-
+#endif
+SYM_FUNC_END(real_mode)
/*
- * kexec_sequence(newstack, start, image, control, clear_all())
+ * kexec_sequence(newstack, start, image, control, clear_all(),
+ copy_with_mmu_off)
*
* does the grungy work with stack switching and real mode switches
* also does simple calls to other code
@@ -541,7 +386,7 @@ _GLOBAL(kexec_sequence)
std r0,16(r1)
/* switch stacks to newstack -- &kexec_stack.stack */
- stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
+ stdu r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r3)
mr r1,r3
li r0,0
@@ -558,7 +403,7 @@ _GLOBAL(kexec_sequence)
std r26,-48(r1)
std r25,-56(r1)
- stdu r1,-STACK_FRAME_OVERHEAD-64(r1)
+ stdu r1,-STACK_FRAME_MIN_SIZE-64(r1)
/* save args into preserved regs */
mr r31,r3 /* newstack (both) */
@@ -566,23 +411,36 @@ _GLOBAL(kexec_sequence)
mr r29,r5 /* image (virt) */
mr r28,r6 /* control, unused */
mr r27,r7 /* clear_all() fn desc */
- mr r26,r8 /* spare */
+ mr r26,r8 /* copy_with_mmu_off */
lhz r25,PACAHWCPUID(r13) /* get our phys cpu from paca */
/* disable interrupts, we are overwriting kernel data next */
+#ifdef CONFIG_PPC_BOOK3E_64
+ wrteei 0
+#else
mfmsr r3
rlwinm r3,r3,0,17,15
mtmsrd r3,1
+#endif
+ /* We need to turn the MMU off unless we are in hash mode
+ * under a hypervisor
+ */
+ cmpdi r26,0
+ beq 1f
+ bl real_mode
+1:
/* copy dest pages, flush whole dest image */
mr r3,r29
- bl kexec_copy_flush /* (image) */
+ bl CFUNC(kexec_copy_flush) /* (image) */
- /* turn off mmu */
+ /* turn off mmu now if not done earlier */
+ cmpdi r26,0
+ bne 1f
bl real_mode
/* copy 0x100 bytes starting at start to 0 */
- li r3,0
+1: li r3,0
mr r4,r30 /* start, aka phys mem offset */
li r5,0x100
li r6,0
@@ -594,14 +452,17 @@ _GLOBAL(kexec_sequence)
li r6,1
stw r6,kexec_flag-1b(5)
+ cmpdi r27,0
+ beq 1f
+
/* clear out hardware hash page table and tlb */
-#if !defined(_CALL_ELF) || _CALL_ELF != 2
+#ifdef CONFIG_PPC64_ELF_ABI_V1
ld r12,0(r27) /* deref function descriptor */
#else
mr r12,r27
#endif
mtctr r12
- bctrl /* ppc_md.hpte_clear_all(void); */
+ bctrl /* mmu_hash_ops.hpte_clear_all(void); */
/*
* kexec image calling is:
@@ -628,37 +489,9 @@ _GLOBAL(kexec_sequence)
* are the boot cpu ?????
* other device tree differences (prop sizes, va vs pa, etc)...
*/
- mr r3,r25 # my phys cpu
+1: mr r3,r25 # my phys cpu
mr r4,r30 # start, aka phys mem offset
mtlr 4
li r5,0
blr /* image->start(physid, image->start, 0); */
-#endif /* CONFIG_KEXEC */
-
-#ifdef CONFIG_MODULES
-#if defined(_CALL_ELF) && _CALL_ELF == 2
-
-#ifdef CONFIG_MODVERSIONS
-.weak __crc_TOC.
-.section "___kcrctab+TOC.","a"
-.globl __kcrctab_TOC.
-__kcrctab_TOC.:
- .llong __crc_TOC.
-#endif
-
-/*
- * Export a fake .TOC. since both modpost and depmod will complain otherwise.
- * Both modpost and depmod strip the leading . so we do the same here.
- */
-.section "__ksymtab_strings","a"
-__kstrtab_TOC.:
- .asciz "TOC."
-
-.section "___ksymtab+TOC.","a"
-/* This symbol name is important: it's used by modpost to find exported syms */
-.globl __ksymtab_TOC.
-__ksymtab_TOC.:
- .llong 0 /* .value */
- .llong __kstrtab_TOC.
-#endif /* ELFv2 */
-#endif /* MODULES */
+#endif /* CONFIG_KEXEC_CORE */
diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
index 9547381b631a..baeb24c102c8 100644
--- a/arch/powerpc/kernel/module.c
+++ b/arch/powerpc/kernel/module.c
@@ -1,33 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/* Kernel module help for powerpc.
Copyright (C) 2001, 2003 Rusty Russell IBM Corporation.
Copyright (C) 2008 Freescale Semiconductor, Inc.
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/elf.h>
#include <linux/moduleloader.h>
#include <linux/err.h>
-#include <linux/vmalloc.h>
+#include <linux/mm.h>
#include <linux/bug.h>
#include <asm/module.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/firmware.h>
#include <linux/sort.h>
#include <asm/setup.h>
-
-LIST_HEAD(module_bug_list);
+#include <asm/sections.h>
static const Elf_Shdr *find_section(const Elf_Ehdr *hdr,
const Elf_Shdr *sechdrs,
@@ -47,6 +34,11 @@ int module_finalize(const Elf_Ehdr *hdr,
const Elf_Shdr *sechdrs, struct module *me)
{
const Elf_Shdr *sect;
+ int rc;
+
+ rc = module_finalize_ftrace(me, sechdrs);
+ if (rc)
+ return rc;
/* Apply feature fixups */
sect = find_section(hdr, sechdrs, "__ftr_fixup");
@@ -67,7 +59,23 @@ int module_finalize(const Elf_Ehdr *hdr,
do_feature_fixups(powerpc_firmware_features,
(void *)sect->sh_addr,
(void *)sect->sh_addr + sect->sh_size);
-#endif
+#endif /* CONFIG_PPC64 */
+
+#ifdef CONFIG_PPC64_ELF_ABI_V1
+ sect = find_section(hdr, sechdrs, ".opd");
+ if (sect != NULL) {
+ me->arch.start_opd = sect->sh_addr;
+ me->arch.end_opd = sect->sh_addr + sect->sh_size;
+ }
+#endif /* CONFIG_PPC64_ELF_ABI_V1 */
+
+#ifdef CONFIG_PPC_BARRIER_NOSPEC
+ sect = find_section(hdr, sechdrs, "__spec_barrier_fixup");
+ if (sect != NULL)
+ do_barrier_nospec_fixups_range(barrier_nospec_enabled,
+ (void *)sect->sh_addr,
+ (void *)sect->sh_addr + sect->sh_size);
+#endif /* CONFIG_PPC_BARRIER_NOSPEC */
sect = find_section(hdr, sechdrs, "__lwsync_fixup");
if (sect != NULL)
diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c
index 6cff040bf456..f930e3395a7f 100644
--- a/arch/powerpc/kernel/module_32.c
+++ b/arch/powerpc/kernel/module_32.c
@@ -1,20 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/* Kernel module help for PPC.
Copyright (C) 2001 Rusty Russell.
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
+*/
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-*/
#include <linux/module.h>
#include <linux/moduleloader.h>
#include <linux/elf.h>
@@ -27,12 +18,7 @@
#include <linux/bug.h>
#include <linux/sort.h>
#include <asm/setup.h>
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(fmt , ...)
-#endif
+#include <asm/text-patching.h>
/* Count how many different relocations (different symbol, different
addend) */
@@ -82,21 +68,6 @@ static int relacmp(const void *_x, const void *_y)
return 0;
}
-static void relaswap(void *_x, void *_y, int size)
-{
- uint32_t *x, *y, tmp;
- int i;
-
- y = (uint32_t *)_x;
- x = (uint32_t *)_y;
-
- for (i = 0; i < sizeof(Elf32_Rela) / sizeof(uint32_t); i++) {
- tmp = x[i];
- x[i] = y[i];
- y[i] = tmp;
- }
-}
-
/* Get the potential trampolines size required of the init and
non-init sections */
static unsigned long get_plt_size(const Elf32_Ehdr *hdr,
@@ -112,28 +83,28 @@ static unsigned long get_plt_size(const Elf32_Ehdr *hdr,
for (i = 1; i < hdr->e_shnum; i++) {
/* If it's called *.init*, and we're not init, we're
not interested */
- if ((strstr(secstrings + sechdrs[i].sh_name, ".init") != 0)
+ if ((strstr(secstrings + sechdrs[i].sh_name, ".init") != NULL)
!= is_init)
continue;
/* We don't want to look at debug sections. */
- if (strstr(secstrings + sechdrs[i].sh_name, ".debug") != 0)
+ if (strstr(secstrings + sechdrs[i].sh_name, ".debug"))
continue;
if (sechdrs[i].sh_type == SHT_RELA) {
- DEBUGP("Found relocations in section %u\n", i);
- DEBUGP("Ptr: %p. Number: %u\n",
+ pr_debug("Found relocations in section %u\n", i);
+ pr_debug("Ptr: %p. Number: %u\n",
(void *)hdr + sechdrs[i].sh_offset,
sechdrs[i].sh_size / sizeof(Elf32_Rela));
/* Sort the relocation information based on a symbol and
* addend key. This is a stable O(n*log n) complexity
- * alogrithm but it will reduce the complexity of
+ * algorithm but it will reduce the complexity of
* count_relocs() to linear complexity O(n)
*/
sort((void *)hdr + sechdrs[i].sh_offset,
sechdrs[i].sh_size / sizeof(Elf32_Rela),
- sizeof(Elf32_Rela), relacmp, relaswap);
+ sizeof(Elf32_Rela), relacmp, NULL);
ret += count_relocs((void *)hdr
+ sechdrs[i].sh_offset,
@@ -161,7 +132,7 @@ int module_frob_arch_sections(Elf32_Ehdr *hdr,
me->arch.core_plt_section = i;
}
if (!me->arch.core_plt_section || !me->arch.init_plt_section) {
- printk("Module doesn't contain .plt or .init.plt sections.\n");
+ pr_err("Module doesn't contain .plt or .init.plt sections.\n");
return -ENOEXEC;
}
@@ -175,24 +146,24 @@ int module_frob_arch_sections(Elf32_Ehdr *hdr,
static inline int entry_matches(struct ppc_plt_entry *entry, Elf32_Addr val)
{
- if (entry->jump[0] == 0x3d800000 + ((val + 0x8000) >> 16)
- && entry->jump[1] == 0x398c0000 + (val & 0xffff))
- return 1;
- return 0;
+ if (entry->jump[0] != PPC_RAW_LIS(_R12, PPC_HA(val)))
+ return 0;
+ if (entry->jump[1] != PPC_RAW_ADDI(_R12, _R12, PPC_LO(val)))
+ return 0;
+ return 1;
}
/* Set up a trampoline in the PLT to bounce us to the distant function */
static uint32_t do_plt_call(void *location,
Elf32_Addr val,
- Elf32_Shdr *sechdrs,
+ const Elf32_Shdr *sechdrs,
struct module *mod)
{
struct ppc_plt_entry *entry;
- DEBUGP("Doing plt for call to 0x%x at 0x%x\n", val, (unsigned int)location);
+ pr_debug("Doing plt for call to 0x%x at 0x%x\n", val, (unsigned int)location);
/* Init, or core PLT? */
- if (location >= mod->module_core
- && location < mod->module_core + mod->core_size)
+ if (within_module_core((unsigned long)location, mod))
entry = (void *)sechdrs[mod->arch.core_plt_section].sh_addr;
else
entry = (void *)sechdrs[mod->arch.init_plt_section].sh_addr;
@@ -203,15 +174,25 @@ static uint32_t do_plt_call(void *location,
entry++;
}
- entry->jump[0] = 0x3d800000+((val+0x8000)>>16); /* lis r12,sym@ha */
- entry->jump[1] = 0x398c0000 + (val&0xffff); /* addi r12,r12,sym@l*/
- entry->jump[2] = 0x7d8903a6; /* mtctr r12 */
- entry->jump[3] = 0x4e800420; /* bctr */
+ if (patch_instruction(&entry->jump[0], ppc_inst(PPC_RAW_LIS(_R12, PPC_HA(val)))))
+ return 0;
+ if (patch_instruction(&entry->jump[1], ppc_inst(PPC_RAW_ADDI(_R12, _R12, PPC_LO(val)))))
+ return 0;
+ if (patch_instruction(&entry->jump[2], ppc_inst(PPC_RAW_MTCTR(_R12))))
+ return 0;
+ if (patch_instruction(&entry->jump[3], ppc_inst(PPC_RAW_BCTR())))
+ return 0;
- DEBUGP("Initialized plt for 0x%x at %p\n", val, entry);
+ pr_debug("Initialized plt for 0x%x at %p\n", val, entry);
return (uint32_t)entry;
}
+static int patch_location_16(uint32_t *loc, u16 value)
+{
+ loc = PTR_ALIGN_DOWN(loc, sizeof(u32));
+ return patch_instruction(loc, ppc_inst((*loc & 0xffff0000) | value));
+}
+
int apply_relocate_add(Elf32_Shdr *sechdrs,
const char *strtab,
unsigned int symindex,
@@ -224,7 +205,7 @@ int apply_relocate_add(Elf32_Shdr *sechdrs,
uint32_t *location;
uint32_t value;
- DEBUGP("Applying ADD relocate section %u to %u\n", relsec,
+ pr_debug("Applying ADD relocate section %u to %u\n", relsec,
sechdrs[relsec].sh_info);
for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rela); i++) {
/* This is where to make the change */
@@ -245,44 +226,46 @@ int apply_relocate_add(Elf32_Shdr *sechdrs,
case R_PPC_ADDR16_LO:
/* Low half of the symbol */
- *(uint16_t *)location = value;
+ if (patch_location_16(location, PPC_LO(value)))
+ return -EFAULT;
break;
case R_PPC_ADDR16_HI:
/* Higher half of the symbol */
- *(uint16_t *)location = (value >> 16);
+ if (patch_location_16(location, PPC_HI(value)))
+ return -EFAULT;
break;
case R_PPC_ADDR16_HA:
- /* Sign-adjusted lower 16 bits: PPC ELF ABI says:
- (((x >> 16) + ((x & 0x8000) ? 1 : 0))) & 0xFFFF.
- This is the same, only sane.
- */
- *(uint16_t *)location = (value + 0x8000) >> 16;
+ if (patch_location_16(location, PPC_HA(value)))
+ return -EFAULT;
break;
case R_PPC_REL24:
if ((int)(value - (uint32_t)location) < -0x02000000
- || (int)(value - (uint32_t)location) >= 0x02000000)
+ || (int)(value - (uint32_t)location) >= 0x02000000) {
value = do_plt_call(location, value,
sechdrs, module);
+ if (!value)
+ return -EFAULT;
+ }
/* Only replace bits 2 through 26 */
- DEBUGP("REL24 value = %08X. location = %08X\n",
+ pr_debug("REL24 value = %08X. location = %08X\n",
value, (uint32_t)location);
- DEBUGP("Location before: %08X.\n",
+ pr_debug("Location before: %08X.\n",
*(uint32_t *)location);
- *(uint32_t *)location
- = (*(uint32_t *)location & ~0x03fffffc)
- | ((value - (uint32_t)location)
- & 0x03fffffc);
- DEBUGP("Location after: %08X.\n",
+ value = (*(uint32_t *)location & ~PPC_LI_MASK) |
+ PPC_LI(value - (uint32_t)location);
+
+ if (patch_instruction(location, ppc_inst(value)))
+ return -EFAULT;
+
+ pr_debug("Location after: %08X.\n",
*(uint32_t *)location);
- DEBUGP("ie. jump to %08X+%08X = %08X\n",
- *(uint32_t *)location & 0x03fffffc,
- (uint32_t)location,
- (*(uint32_t *)location & 0x03fffffc)
- + (uint32_t)location);
+ pr_debug("ie. jump to %08X+%08X = %08X\n",
+ *(uint32_t *)PPC_LI((uint32_t)location), (uint32_t)location,
+ (*(uint32_t *)PPC_LI((uint32_t)location)) + (uint32_t)location);
break;
case R_PPC_REL32:
@@ -291,17 +274,67 @@ int apply_relocate_add(Elf32_Shdr *sechdrs,
break;
default:
- printk("%s: unknown ADD relocation: %u\n",
+ pr_err("%s: unknown ADD relocation: %u\n",
module->name,
ELF32_R_TYPE(rela[i].r_info));
return -ENOEXEC;
}
}
+
+ return 0;
+}
+
#ifdef CONFIG_DYNAMIC_FTRACE
- module->arch.tramp =
- do_plt_call(module->module_core,
- (unsigned long)ftrace_caller,
- sechdrs, module);
+notrace int module_trampoline_target(struct module *mod, unsigned long addr,
+ unsigned long *target)
+{
+ ppc_inst_t jmp[4];
+
+ /* Find where the trampoline jumps to */
+ if (copy_inst_from_kernel_nofault(jmp, (void *)addr))
+ return -EFAULT;
+ if (__copy_inst_from_kernel_nofault(jmp + 1, (void *)addr + 4))
+ return -EFAULT;
+ if (__copy_inst_from_kernel_nofault(jmp + 2, (void *)addr + 8))
+ return -EFAULT;
+ if (__copy_inst_from_kernel_nofault(jmp + 3, (void *)addr + 12))
+ return -EFAULT;
+
+ /* verify that this is what we expect it to be */
+ if ((ppc_inst_val(jmp[0]) & 0xffff0000) != PPC_RAW_LIS(_R12, 0))
+ return -EINVAL;
+ if ((ppc_inst_val(jmp[1]) & 0xffff0000) != PPC_RAW_ADDI(_R12, _R12, 0))
+ return -EINVAL;
+ if (ppc_inst_val(jmp[2]) != PPC_RAW_MTCTR(_R12))
+ return -EINVAL;
+ if (ppc_inst_val(jmp[3]) != PPC_RAW_BCTR())
+ return -EINVAL;
+
+ addr = (ppc_inst_val(jmp[1]) & 0xffff) | ((ppc_inst_val(jmp[0]) & 0xffff) << 16);
+ if (addr & 0x8000)
+ addr -= 0x10000;
+
+ *target = addr;
+
+ return 0;
+}
+
+int module_finalize_ftrace(struct module *module, const Elf_Shdr *sechdrs)
+{
+ module->arch.tramp = do_plt_call(module->mem[MOD_TEXT].base,
+ (unsigned long)ftrace_caller,
+ sechdrs, module);
+ if (!module->arch.tramp)
+ return -ENOENT;
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+ module->arch.tramp_regs = do_plt_call(module->mem[MOD_TEXT].base,
+ (unsigned long)ftrace_regs_caller,
+ sechdrs, module);
+ if (!module->arch.tramp_regs)
+ return -ENOENT;
#endif
+
return 0;
}
+#endif
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index d807ee626af9..2a44bc8e2439 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -1,20 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/* Kernel module help for PPC64.
Copyright (C) 2001, 2003 Rusty Russell IBM Corporation.
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
+*/
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-*/
#include <linux/module.h>
#include <linux/elf.h>
#include <linux/moduleloader.h>
@@ -23,11 +14,14 @@
#include <linux/ftrace.h>
#include <linux/bug.h>
#include <linux/uaccess.h>
+#include <linux/kernel.h>
#include <asm/module.h>
#include <asm/firmware.h>
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
#include <linux/sort.h>
#include <asm/setup.h>
+#include <asm/sections.h>
+#include <asm/inst.h>
/* FIXME: We don't do .init separately. To do this, we'd need to have
a separate r2 value in the init and core section, and stub between
@@ -36,29 +30,26 @@
Using a magic allocator which places modules within 32MB solves
this, and makes other things simpler. Anton?
--RR. */
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(fmt , ...)
-#endif
-#if defined(_CALL_ELF) && _CALL_ELF == 2
-#define R2_STACK_OFFSET 24
+bool module_elf_check_arch(Elf_Ehdr *hdr)
+{
+ unsigned long abi_level = hdr->e_flags & 0x3;
-/* An address is simply the address of the function. */
-typedef unsigned long func_desc_t;
+ if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2))
+ return abi_level == 2;
+ else
+ return abi_level < 2;
+}
+
+#ifdef CONFIG_PPC64_ELF_ABI_V2
static func_desc_t func_desc(unsigned long addr)
{
- return addr;
-}
-static unsigned long func_addr(unsigned long addr)
-{
- return addr;
-}
-static unsigned long stub_func_addr(func_desc_t func)
-{
- return func;
+ func_desc_t desc = {
+ .addr = addr,
+ };
+
+ return desc;
}
/* PowerPC64 specific values for the Elf64_Sym st_other field. */
@@ -75,141 +66,100 @@ static unsigned int local_entry_offset(const Elf64_Sym *sym)
return PPC64_LOCAL_ENTRY_OFFSET(sym->st_other);
}
#else
-#define R2_STACK_OFFSET 40
-
-/* An address is address of the OPD entry, which contains address of fn. */
-typedef struct ppc64_opd_entry func_desc_t;
static func_desc_t func_desc(unsigned long addr)
{
- return *(struct ppc64_opd_entry *)addr;
+ return *(struct func_desc *)addr;
}
-static unsigned long func_addr(unsigned long addr)
+static unsigned int local_entry_offset(const Elf64_Sym *sym)
{
- return func_desc(addr).funcaddr;
+ return 0;
}
-static unsigned long stub_func_addr(func_desc_t func)
+
+void *dereference_module_function_descriptor(struct module *mod, void *ptr)
{
- return func.funcaddr;
+ if (ptr < (void *)mod->arch.start_opd ||
+ ptr >= (void *)mod->arch.end_opd)
+ return ptr;
+
+ return dereference_function_descriptor(ptr);
}
-static unsigned int local_entry_offset(const Elf64_Sym *sym)
+#endif
+
+static unsigned long func_addr(unsigned long addr)
{
- return 0;
+ return func_desc(addr).addr;
+}
+
+static unsigned long stub_func_addr(func_desc_t func)
+{
+ return func.addr;
}
-#endif
+
+#define STUB_MAGIC 0x73747562 /* stub */
/* Like PPC32, we need little trampolines to do > 24-bit jumps (into
the kernel itself). But on PPC64, these need to be used for every
jump, actually, to reset r2 (TOC+0x8000). */
-struct ppc64_stub_entry
-{
- /* 28 byte jump instruction sequence (7 instructions). We only
- * need 6 instructions on ABIv2 but we always allocate 7 so
- * so we don't have to modify the trampoline load instruction. */
+struct ppc64_stub_entry {
+ /*
+ * 28 byte jump instruction sequence (7 instructions) that can
+ * hold ppc64_stub_insns or stub_insns. Must be 8-byte aligned
+ * with PCREL kernels that use prefix instructions in the stub.
+ */
u32 jump[7];
- u32 unused;
+ /* Used by ftrace to identify stubs */
+ u32 magic;
/* Data for the above code */
func_desc_t funcdata;
+} __aligned(8);
+
+struct ppc64_got_entry {
+ u64 addr;
};
/*
* PPC64 uses 24 bit jumps, but we need to jump into other modules or
* the kernel which may be further. So we jump to a stub.
*
- * For ELFv1 we need to use this to set up the new r2 value (aka TOC
- * pointer). For ELFv2 it's the callee's responsibility to set up the
- * new r2, but for both we need to save the old r2.
+ * Target address and TOC are loaded from function descriptor in the
+ * ppc64_stub_entry.
*
- * We could simply patch the new r2 value and function pointer into
- * the stub, but it's significantly shorter to put these values at the
- * end of the stub code, and patch the stub address (32-bits relative
- * to the TOC ptr, r2) into the stub.
+ * r12 is used to generate the target address, which is required for the
+ * ELFv2 global entry point calling convention.
+ *
+ * TOC handling:
+ * - PCREL does not have a TOC.
+ * - ELFv2 non-PCREL just has to save r2, the callee is responsible for
+ * setting its own TOC pointer at the global entry address.
+ * - ELFv1 must load the new TOC pointer from the function descriptor.
*/
-
static u32 ppc64_stub_insns[] = {
- 0x3d620000, /* addis r11,r2, <high> */
- 0x396b0000, /* addi r11,r11, <low> */
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ /* pld r12,addr */
+ PPC_PREFIX_8LS | __PPC_PRFX_R(1),
+ PPC_INST_PLD | ___PPC_RT(_R12),
+#else
+ PPC_RAW_ADDIS(_R11, _R2, 0),
+ PPC_RAW_ADDI(_R11, _R11, 0),
/* Save current r2 value in magic place on the stack. */
- 0xf8410000|R2_STACK_OFFSET, /* std r2,R2_STACK_OFFSET(r1) */
- 0xe98b0020, /* ld r12,32(r11) */
-#if !defined(_CALL_ELF) || _CALL_ELF != 2
+ PPC_RAW_STD(_R2, _R1, R2_STACK_OFFSET),
+ PPC_RAW_LD(_R12, _R11, 32),
+#ifdef CONFIG_PPC64_ELF_ABI_V1
/* Set up new r2 from function descriptor */
- 0xe84b0028, /* ld r2,40(r11) */
+ PPC_RAW_LD(_R2, _R11, 40),
#endif
- 0x7d8903a6, /* mtctr r12 */
- 0x4e800420 /* bctr */
-};
-
-#ifdef CONFIG_DYNAMIC_FTRACE
-
-static u32 ppc64_stub_mask[] = {
- 0xffff0000,
- 0xffff0000,
- 0xffffffff,
- 0xffffffff,
-#if !defined(_CALL_ELF) || _CALL_ELF != 2
- 0xffffffff,
#endif
- 0xffffffff,
- 0xffffffff
+ PPC_RAW_MTCTR(_R12),
+ PPC_RAW_BCTR(),
};
-bool is_module_trampoline(u32 *p)
-{
- unsigned int i;
- u32 insns[ARRAY_SIZE(ppc64_stub_insns)];
-
- BUILD_BUG_ON(sizeof(ppc64_stub_insns) != sizeof(ppc64_stub_mask));
-
- if (probe_kernel_read(insns, p, sizeof(insns)))
- return -EFAULT;
-
- for (i = 0; i < ARRAY_SIZE(ppc64_stub_insns); i++) {
- u32 insna = insns[i];
- u32 insnb = ppc64_stub_insns[i];
- u32 mask = ppc64_stub_mask[i];
-
- if ((insna & mask) != (insnb & mask))
- return false;
- }
-
- return true;
-}
-
-int module_trampoline_target(struct module *mod, u32 *trampoline,
- unsigned long *target)
-{
- u32 buf[2];
- u16 upper, lower;
- long offset;
- void *toc_entry;
-
- if (probe_kernel_read(buf, trampoline, sizeof(buf)))
- return -EFAULT;
-
- upper = buf[0] & 0xffff;
- lower = buf[1] & 0xffff;
-
- /* perform the addis/addi, both signed */
- offset = ((short)upper << 16) + (short)lower;
-
- /*
- * Now get the address this trampoline jumps to. This
- * is always 32 bytes into our trampoline stub.
- */
- toc_entry = (void *)mod->arch.toc + offset + 32;
-
- if (probe_kernel_read(target, toc_entry, sizeof(*target)))
- return -EFAULT;
-
- return 0;
-}
-
-#endif
-
-/* Count how many different 24-bit relocations (different symbol,
- different addend) */
-static unsigned int count_relocs(const Elf64_Rela *rela, unsigned int num)
+/*
+ * Count how many different r_type relocations (different symbol,
+ * different addend).
+ */
+static unsigned int count_relocs(const Elf64_Rela *rela, unsigned int num,
+ unsigned long r_type)
{
unsigned int i, r_info, r_addend, _count_relocs;
@@ -218,8 +168,8 @@ static unsigned int count_relocs(const Elf64_Rela *rela, unsigned int num)
r_info = 0;
r_addend = 0;
for (i = 0; i < num; i++)
- /* Only count 24-bit relocs, others don't need stubs */
- if (ELF64_R_TYPE(rela[i].r_info) == R_PPC_REL24 &&
+ /* Only count r_type relocs, others don't need stubs */
+ if (ELF64_R_TYPE(rela[i].r_info) == r_type &&
(r_info != ELF64_R_SYM(rela[i].r_info) ||
r_addend != rela[i].r_addend)) {
_count_relocs++;
@@ -253,61 +203,155 @@ static int relacmp(const void *_x, const void *_y)
return 0;
}
-static void relaswap(void *_x, void *_y, int size)
-{
- uint64_t *x, *y, tmp;
- int i;
-
- y = (uint64_t *)_x;
- x = (uint64_t *)_y;
-
- for (i = 0; i < sizeof(Elf64_Rela) / sizeof(uint64_t); i++) {
- tmp = x[i];
- x[i] = y[i];
- y[i] = tmp;
- }
-}
-
/* Get size of potential trampolines required. */
static unsigned long get_stubs_size(const Elf64_Ehdr *hdr,
- const Elf64_Shdr *sechdrs)
+ const Elf64_Shdr *sechdrs,
+ char *secstrings,
+ struct module *me)
{
- /* One extra reloc so it's always 0-funcaddr terminated */
- unsigned long relocs = 1;
+ unsigned long relocs = 0;
unsigned i;
/* Every relocated section... */
for (i = 1; i < hdr->e_shnum; i++) {
if (sechdrs[i].sh_type == SHT_RELA) {
- DEBUGP("Found relocations in section %u\n", i);
- DEBUGP("Ptr: %p. Number: %lu\n",
+ pr_debug("Found relocations in section %u\n", i);
+ pr_debug("Ptr: %p. Number: %Lu\n",
(void *)sechdrs[i].sh_addr,
sechdrs[i].sh_size / sizeof(Elf64_Rela));
/* Sort the relocation information based on a symbol and
* addend key. This is a stable O(n*log n) complexity
- * alogrithm but it will reduce the complexity of
+ * algorithm but it will reduce the complexity of
* count_relocs() to linear complexity O(n)
*/
sort((void *)sechdrs[i].sh_addr,
sechdrs[i].sh_size / sizeof(Elf64_Rela),
- sizeof(Elf64_Rela), relacmp, relaswap);
+ sizeof(Elf64_Rela), relacmp, NULL);
relocs += count_relocs((void *)sechdrs[i].sh_addr,
sechdrs[i].sh_size
- / sizeof(Elf64_Rela));
+ / sizeof(Elf64_Rela),
+ R_PPC_REL24);
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ relocs += count_relocs((void *)sechdrs[i].sh_addr,
+ sechdrs[i].sh_size
+ / sizeof(Elf64_Rela),
+ R_PPC64_REL24_NOTOC);
+#endif
}
}
-#ifdef CONFIG_DYNAMIC_FTRACE
- /* make the trampoline to the ftrace_caller */
- relocs++;
+ /* stubs for ftrace_caller and ftrace_regs_caller */
+ relocs += IS_ENABLED(CONFIG_DYNAMIC_FTRACE) + IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS);
+
+#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE
+ /* stubs for the function tracer */
+ for (i = 1; i < hdr->e_shnum; i++) {
+ if (!strcmp(secstrings + sechdrs[i].sh_name, "__patchable_function_entries")) {
+ me->arch.ool_stub_count = sechdrs[i].sh_size / sizeof(unsigned long);
+ me->arch.ool_stub_index = 0;
+ relocs += roundup(me->arch.ool_stub_count * sizeof(struct ftrace_ool_stub),
+ sizeof(struct ppc64_stub_entry)) /
+ sizeof(struct ppc64_stub_entry);
+ break;
+ }
+ }
#endif
- DEBUGP("Looks like a total of %lu stubs, max\n", relocs);
+ pr_debug("Looks like a total of %lu stubs, max\n", relocs);
return relocs * sizeof(struct ppc64_stub_entry);
}
+#ifdef CONFIG_PPC_KERNEL_PCREL
+static int count_pcpu_relocs(const Elf64_Shdr *sechdrs,
+ const Elf64_Rela *rela, unsigned int num,
+ unsigned int symindex, unsigned int pcpu)
+{
+ unsigned int i, r_info, r_addend, _count_relocs;
+
+ _count_relocs = 0;
+ r_info = 0;
+ r_addend = 0;
+
+ for (i = 0; i < num; i++) {
+ Elf64_Sym *sym;
+
+ /* This is the symbol it is referring to */
+ sym = (Elf64_Sym *)sechdrs[symindex].sh_addr
+ + ELF64_R_SYM(rela[i].r_info);
+
+ if (sym->st_shndx == pcpu &&
+ (r_info != ELF64_R_SYM(rela[i].r_info) ||
+ r_addend != rela[i].r_addend)) {
+ _count_relocs++;
+ r_info = ELF64_R_SYM(rela[i].r_info);
+ r_addend = rela[i].r_addend;
+ }
+ }
+
+ return _count_relocs;
+}
+
+/* Get size of potential GOT required. */
+static unsigned long get_got_size(const Elf64_Ehdr *hdr,
+ const Elf64_Shdr *sechdrs,
+ struct module *me)
+{
+ /* One extra reloc so it's always 0-addr terminated */
+ unsigned long relocs = 1;
+ unsigned int i, symindex = 0;
+
+ for (i = 1; i < hdr->e_shnum; i++) {
+ if (sechdrs[i].sh_type == SHT_SYMTAB) {
+ symindex = i;
+ break;
+ }
+ }
+ WARN_ON_ONCE(!symindex);
+
+ /* Every relocated section... */
+ for (i = 1; i < hdr->e_shnum; i++) {
+ if (sechdrs[i].sh_type == SHT_RELA) {
+ pr_debug("Found relocations in section %u\n", i);
+ pr_debug("Ptr: %p. Number: %llu\n", (void *)sechdrs[i].sh_addr,
+ sechdrs[i].sh_size / sizeof(Elf64_Rela));
+
+ /*
+ * Sort the relocation information based on a symbol and
+ * addend key. This is a stable O(n*log n) complexity
+ * algorithm but it will reduce the complexity of
+ * count_relocs() to linear complexity O(n)
+ */
+ sort((void *)sechdrs[i].sh_addr,
+ sechdrs[i].sh_size / sizeof(Elf64_Rela),
+ sizeof(Elf64_Rela), relacmp, NULL);
+
+ relocs += count_relocs((void *)sechdrs[i].sh_addr,
+ sechdrs[i].sh_size
+ / sizeof(Elf64_Rela),
+ R_PPC64_GOT_PCREL34);
+
+ /*
+ * Percpu data access typically gets linked with
+ * REL34 relocations, but the percpu section gets
+ * moved at load time and requires that to be
+ * converted to GOT linkage.
+ */
+ if (IS_ENABLED(CONFIG_SMP) && symindex)
+ relocs += count_pcpu_relocs(sechdrs,
+ (void *)sechdrs[i].sh_addr,
+ sechdrs[i].sh_size
+ / sizeof(Elf64_Rela),
+ symindex, me->arch.pcpu_section);
+ }
+ }
+
+ pr_debug("Looks like a total of %lu GOT entries, max\n", relocs);
+ return relocs * sizeof(struct ppc64_got_entry);
+}
+#else /* CONFIG_PPC_KERNEL_PCREL */
+
/* Still needed for ELFv2, for .TOC. */
static void dedotify_versions(struct modversion_info *vers,
unsigned long size)
@@ -317,18 +361,31 @@ static void dedotify_versions(struct modversion_info *vers,
for (end = (void *)vers + size; vers < end; vers++)
if (vers->name[0] == '.') {
memmove(vers->name, vers->name+1, strlen(vers->name));
-#ifdef ARCH_RELOCATES_KCRCTAB
- /* The TOC symbol has no CRC computed. To avoid CRC
- * check failing, we must force it to the expected
- * value (see CRC check in module.c).
- */
- if (!strcmp(vers->name, "TOC."))
- vers->crc = -(unsigned long)reloc_start;
-#endif
}
}
-/* Undefined symbols which refer to .funcname, hack to funcname (or .TOC.) */
+/* Same as normal versions, remove a leading dot if present. */
+static void dedotify_ext_version_names(char *str_seq, unsigned long size)
+{
+ unsigned long out = 0;
+ unsigned long in;
+ char last = '\0';
+
+ for (in = 0; in < size; in++) {
+ /* Skip one leading dot */
+ if (last == '\0' && str_seq[in] == '.')
+ in++;
+ last = str_seq[in];
+ str_seq[out++] = last;
+ }
+ /* Zero the trailing portion of the names table for robustness */
+ memset(&str_seq[out], 0, size - out);
+}
+
+/*
+ * Undefined symbols which refer to .funcname, hack to funcname. Make .TOC.
+ * seem to be defined (value set later).
+ */
static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab)
{
unsigned int i;
@@ -336,8 +393,11 @@ static void dedotify(Elf64_Sym *syms, unsigned int numsyms, char *strtab)
for (i = 1; i < numsyms; i++) {
if (syms[i].st_shndx == SHN_UNDEF) {
char *name = strtab + syms[i].st_name;
- if (name[0] == '.')
- memmove(name, name+1, strlen(name));
+ if (name[0] == '.') {
+ if (strcmp(name+1, "TOC.") == 0)
+ syms[i].st_shndx = SHN_ABS;
+ syms[i].st_name++;
+ }
}
}
}
@@ -353,12 +413,19 @@ static Elf64_Sym *find_dot_toc(Elf64_Shdr *sechdrs,
numsyms = sechdrs[symindex].sh_size / sizeof(Elf64_Sym);
for (i = 1; i < numsyms; i++) {
- if (syms[i].st_shndx == SHN_UNDEF
+ if (syms[i].st_shndx == SHN_ABS
&& strcmp(strtab + syms[i].st_name, "TOC.") == 0)
return &syms[i];
}
return NULL;
}
+#endif /* CONFIG_PPC_KERNEL_PCREL */
+
+bool module_init_section(const char *name)
+{
+ /* We don't handle .init for the moment: always return false. */
+ return false;
+}
int module_frob_arch_sections(Elf64_Ehdr *hdr,
Elf64_Shdr *sechdrs,
@@ -369,88 +436,266 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr,
/* Find .toc and .stubs sections, symtab and strtab */
for (i = 1; i < hdr->e_shnum; i++) {
- char *p;
if (strcmp(secstrings + sechdrs[i].sh_name, ".stubs") == 0)
me->arch.stubs_section = i;
- else if (strcmp(secstrings + sechdrs[i].sh_name, ".toc") == 0)
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ else if (strcmp(secstrings + sechdrs[i].sh_name, ".data..percpu") == 0)
+ me->arch.pcpu_section = i;
+ else if (strcmp(secstrings + sechdrs[i].sh_name, ".mygot") == 0) {
+ me->arch.got_section = i;
+ if (sechdrs[i].sh_addralign < 8)
+ sechdrs[i].sh_addralign = 8;
+ }
+#else
+ else if (strcmp(secstrings + sechdrs[i].sh_name, ".toc") == 0) {
me->arch.toc_section = i;
- else if (strcmp(secstrings+sechdrs[i].sh_name,"__versions")==0)
+ if (sechdrs[i].sh_addralign < 8)
+ sechdrs[i].sh_addralign = 8;
+ } else if (strcmp(secstrings + sechdrs[i].sh_name, "__versions") == 0)
dedotify_versions((void *)hdr + sechdrs[i].sh_offset,
sechdrs[i].sh_size);
-
- /* We don't handle .init for the moment: rename to _init */
- while ((p = strstr(secstrings + sechdrs[i].sh_name, ".init")))
- p[0] = '_';
+ else if (strcmp(secstrings + sechdrs[i].sh_name, "__version_ext_names") == 0)
+ dedotify_ext_version_names((void *)hdr + sechdrs[i].sh_offset,
+ sechdrs[i].sh_size);
if (sechdrs[i].sh_type == SHT_SYMTAB)
dedotify((void *)hdr + sechdrs[i].sh_offset,
sechdrs[i].sh_size / sizeof(Elf64_Sym),
(void *)hdr
+ sechdrs[sechdrs[i].sh_link].sh_offset);
+#endif
}
if (!me->arch.stubs_section) {
- printk("%s: doesn't contain .stubs.\n", me->name);
+ pr_err("%s: doesn't contain .stubs.\n", me->name);
+ return -ENOEXEC;
+ }
+
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ if (!me->arch.got_section) {
+ pr_err("%s: doesn't contain .mygot.\n", me->name);
return -ENOEXEC;
}
+ /* Override the got size */
+ sechdrs[me->arch.got_section].sh_size = get_got_size(hdr, sechdrs, me);
+#else
/* If we don't have a .toc, just use .stubs. We need to set r2
to some reasonable value in case the module calls out to
other functions via a stub, or if a function pointer escapes
the module by some means. */
if (!me->arch.toc_section)
me->arch.toc_section = me->arch.stubs_section;
+#endif
/* Override the stubs size */
- sechdrs[me->arch.stubs_section].sh_size = get_stubs_size(hdr, sechdrs);
+ sechdrs[me->arch.stubs_section].sh_size = get_stubs_size(hdr, sechdrs, secstrings, me);
+
return 0;
}
-/* r2 is the TOC pointer: it actually points 0x8000 into the TOC (this
- gives the value maximum span in an instruction which uses a signed
- offset) */
-static inline unsigned long my_r2(Elf64_Shdr *sechdrs, struct module *me)
+#if defined(CONFIG_MPROFILE_KERNEL) || defined(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY)
+
+static u32 stub_insns[] = {
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ PPC_RAW_LD(_R12, _R13, offsetof(struct paca_struct, kernelbase)),
+ PPC_RAW_NOP(), /* align the prefix insn */
+ /* paddi r12,r12,addr */
+ PPC_PREFIX_MLS | __PPC_PRFX_R(0),
+ PPC_INST_PADDI | ___PPC_RT(_R12) | ___PPC_RA(_R12),
+ PPC_RAW_MTCTR(_R12),
+ PPC_RAW_BCTR(),
+#else
+ PPC_RAW_LD(_R12, _R13, offsetof(struct paca_struct, kernel_toc)),
+ PPC_RAW_ADDIS(_R12, _R12, 0),
+ PPC_RAW_ADDI(_R12, _R12, 0),
+ PPC_RAW_MTCTR(_R12),
+ PPC_RAW_BCTR(),
+#endif
+};
+
+/*
+ * For mprofile-kernel we use a special stub for ftrace_caller() because we
+ * can't rely on r2 containing this module's TOC when we enter the stub.
+ *
+ * That can happen if the function calling us didn't need to use the toc. In
+ * that case it won't have setup r2, and the r2 value will be either the
+ * kernel's toc, or possibly another modules toc.
+ *
+ * To deal with that this stub uses the kernel toc, which is always accessible
+ * via the paca (in r13). The target (ftrace_caller()) is responsible for
+ * saving and restoring the toc before returning.
+ */
+static inline int create_ftrace_stub(struct ppc64_stub_entry *entry,
+ unsigned long addr,
+ struct module *me)
+{
+ long reladdr;
+
+ if ((unsigned long)entry->jump % 8 != 0) {
+ pr_err("%s: Address of stub entry is not 8-byte aligned\n", me->name);
+ return 0;
+ }
+
+ BUILD_BUG_ON(sizeof(stub_insns) > sizeof(entry->jump));
+ memcpy(entry->jump, stub_insns, sizeof(stub_insns));
+
+ if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) {
+ /* Stub uses address relative to kernel base (from the paca) */
+ reladdr = addr - local_paca->kernelbase;
+ if (reladdr > 0x1FFFFFFFFL || reladdr < -0x200000000L) {
+ pr_err("%s: Address of %ps out of range of 34-bit relative address.\n",
+ me->name, (void *)addr);
+ return 0;
+ }
+
+ entry->jump[2] |= IMM_H18(reladdr);
+ entry->jump[3] |= IMM_L(reladdr);
+ } else {
+ /* Stub uses address relative to kernel toc (from the paca) */
+ reladdr = addr - kernel_toc_addr();
+ if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) {
+ pr_err("%s: Address of %ps out of range of kernel_toc.\n",
+ me->name, (void *)addr);
+ return 0;
+ }
+
+ entry->jump[1] |= PPC_HA(reladdr);
+ entry->jump[2] |= PPC_LO(reladdr);
+ }
+
+ /* Even though we don't use funcdata in the stub, it's needed elsewhere. */
+ entry->funcdata = func_desc(addr);
+ entry->magic = STUB_MAGIC;
+
+ return 1;
+}
+
+static bool is_mprofile_ftrace_call(const char *name)
{
- return sechdrs[me->arch.toc_section].sh_addr + 0x8000;
+ if (!strcmp("_mcount", name))
+ return true;
+#ifdef CONFIG_DYNAMIC_FTRACE
+ if (!strcmp("ftrace_caller", name))
+ return true;
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+ if (!strcmp("ftrace_regs_caller", name))
+ return true;
+#endif
+#endif
+
+ return false;
}
+#else
+static inline int create_ftrace_stub(struct ppc64_stub_entry *entry,
+ unsigned long addr,
+ struct module *me)
+{
+ return 0;
+}
+
+static bool is_mprofile_ftrace_call(const char *name)
+{
+ return false;
+}
+#endif
-/* Both low and high 16 bits are added as SIGNED additions, so if low
- 16 bits has high bit set, high 16 bits must be adjusted. These
- macros do that (stolen from binutils). */
-#define PPC_LO(v) ((v) & 0xffff)
-#define PPC_HI(v) (((v) >> 16) & 0xffff)
-#define PPC_HA(v) PPC_HI ((v) + 0x8000)
+/*
+ * r2 is the TOC pointer: it actually points 0x8000 into the TOC (this gives the
+ * value maximum span in an instruction which uses a signed offset). Round down
+ * to a 256 byte boundary for the odd case where we are setting up r2 without a
+ * .toc section.
+ */
+static inline unsigned long my_r2(const Elf64_Shdr *sechdrs, struct module *me)
+{
+#ifndef CONFIG_PPC_KERNEL_PCREL
+ return (sechdrs[me->arch.toc_section].sh_addr & ~0xfful) + 0x8000;
+#else
+ return -1;
+#endif
+}
/* Patch stub to reference function and correct r2 value. */
-static inline int create_stub(Elf64_Shdr *sechdrs,
+static inline int create_stub(const Elf64_Shdr *sechdrs,
struct ppc64_stub_entry *entry,
unsigned long addr,
- struct module *me)
+ struct module *me,
+ const char *name)
{
long reladdr;
+ func_desc_t desc;
+ int i;
- memcpy(entry->jump, ppc64_stub_insns, sizeof(ppc64_stub_insns));
+ if (is_mprofile_ftrace_call(name))
+ return create_ftrace_stub(entry, addr, me);
- /* Stub uses address relative to r2. */
- reladdr = (unsigned long)entry - my_r2(sechdrs, me);
- if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) {
- printk("%s: Address %p of stub out of range of %p.\n",
- me->name, (void *)reladdr, (void *)my_r2);
+ if ((unsigned long)entry->jump % 8 != 0) {
+ pr_err("%s: Address of stub entry is not 8-byte aligned\n", me->name);
return 0;
}
- DEBUGP("Stub %p get data from reladdr %li\n", entry, reladdr);
- entry->jump[0] |= PPC_HA(reladdr);
- entry->jump[1] |= PPC_LO(reladdr);
- entry->funcdata = func_desc(addr);
+ BUILD_BUG_ON(sizeof(ppc64_stub_insns) > sizeof(entry->jump));
+ for (i = 0; i < ARRAY_SIZE(ppc64_stub_insns); i++) {
+ if (patch_instruction(&entry->jump[i],
+ ppc_inst(ppc64_stub_insns[i])))
+ return 0;
+ }
+
+ if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) {
+ /* Stub uses address relative to itself! */
+ reladdr = 0 + offsetof(struct ppc64_stub_entry, funcdata);
+ BUILD_BUG_ON(reladdr != 32);
+ if (reladdr > 0x1FFFFFFFFL || reladdr < -0x200000000L) {
+ pr_err("%s: Address of %p out of range of 34-bit relative address.\n",
+ me->name, (void *)reladdr);
+ return 0;
+ }
+ pr_debug("Stub %p get data from reladdr %li\n", entry, reladdr);
+
+ /* May not even need this if we're relative to 0 */
+ if (patch_instruction(&entry->jump[0],
+ ppc_inst_prefix(entry->jump[0] | IMM_H18(reladdr),
+ entry->jump[1] | IMM_L(reladdr))))
+ return 0;
+
+ } else {
+ /* Stub uses address relative to r2. */
+ reladdr = (unsigned long)entry - my_r2(sechdrs, me);
+ if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) {
+ pr_err("%s: Address %p of stub out of range of %p.\n",
+ me->name, (void *)reladdr, (void *)my_r2);
+ return 0;
+ }
+ pr_debug("Stub %p get data from reladdr %li\n", entry, reladdr);
+
+ if (patch_instruction(&entry->jump[0],
+ ppc_inst(entry->jump[0] | PPC_HA(reladdr))))
+ return 0;
+
+ if (patch_instruction(&entry->jump[1],
+ ppc_inst(entry->jump[1] | PPC_LO(reladdr))))
+ return 0;
+ }
+
+ // func_desc_t is 8 bytes if ABIv2, else 16 bytes
+ desc = func_desc(addr);
+ for (i = 0; i < sizeof(func_desc_t) / sizeof(u32); i++) {
+ if (patch_u32(((u32 *)&entry->funcdata) + i, ((u32 *)&desc)[i]))
+ return 0;
+ }
+
+ if (patch_u32(&entry->magic, STUB_MAGIC))
+ return 0;
+
return 1;
}
/* Create stub to jump to function described in this OPD/ptr: we need the
stub to set up the TOC ptr (r2) for the function. */
-static unsigned long stub_for_addr(Elf64_Shdr *sechdrs,
+static unsigned long stub_for_addr(const Elf64_Shdr *sechdrs,
unsigned long addr,
- struct module *me)
+ struct module *me,
+ const char *name)
{
struct ppc64_stub_entry *stubs;
unsigned int i, num_stubs;
@@ -459,31 +704,90 @@ static unsigned long stub_for_addr(Elf64_Shdr *sechdrs,
/* Find this stub, or if that fails, the next avail. entry */
stubs = (void *)sechdrs[me->arch.stubs_section].sh_addr;
- for (i = 0; stub_func_addr(stubs[i].funcdata); i++) {
- BUG_ON(i >= num_stubs);
+ for (i = 0; i < me->arch.stub_count; i++) {
+ if (WARN_ON(i >= num_stubs))
+ return 0;
if (stub_func_addr(stubs[i].funcdata) == func_addr(addr))
return (unsigned long)&stubs[i];
}
- if (!create_stub(sechdrs, &stubs[i], addr, me))
+ if (!create_stub(sechdrs, &stubs[i], addr, me, name))
return 0;
+ me->arch.stub_count++;
return (unsigned long)&stubs[i];
}
+#ifdef CONFIG_PPC_KERNEL_PCREL
+/* Create GOT to load the location described in this ptr */
+static unsigned long got_for_addr(const Elf64_Shdr *sechdrs,
+ unsigned long addr,
+ struct module *me,
+ const char *name)
+{
+ struct ppc64_got_entry *got;
+ unsigned int i, num_got;
+
+ if (!IS_ENABLED(CONFIG_PPC_KERNEL_PCREL))
+ return addr;
+
+ num_got = sechdrs[me->arch.got_section].sh_size / sizeof(*got);
+
+ /* Find this stub, or if that fails, the next avail. entry */
+ got = (void *)sechdrs[me->arch.got_section].sh_addr;
+ for (i = 0; got[i].addr; i++) {
+ if (WARN_ON(i >= num_got))
+ return 0;
+
+ if (got[i].addr == addr)
+ return (unsigned long)&got[i];
+ }
+
+ got[i].addr = addr;
+
+ return (unsigned long)&got[i];
+}
+#endif
+
/* We expect a noop next: if it is, replace it with instruction to
restore r2. */
-static int restore_r2(u32 *instruction, struct module *me)
+static int restore_r2(const char *name, u32 *instruction, struct module *me)
{
- if (*instruction != PPC_INST_NOP) {
- printk("%s: Expect noop after relocate, got %08x\n",
- me->name, *instruction);
+ u32 *prev_insn = instruction - 1;
+ u32 insn_val = *instruction;
+
+ if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL))
return 0;
+
+ if (is_mprofile_ftrace_call(name))
+ return 0;
+
+ /*
+ * Make sure the branch isn't a sibling call. Sibling calls aren't
+ * "link" branches and they don't return, so they don't need the r2
+ * restore afterwards.
+ */
+ if (!instr_is_relative_link_branch(ppc_inst(*prev_insn)))
+ return 0;
+
+ /*
+ * For livepatch, the restore r2 instruction might have already been
+ * written previously, if the referenced symbol is in a previously
+ * unloaded module which is now being loaded again. In that case, skip
+ * the warning and the instruction write.
+ */
+ if (insn_val == PPC_INST_LD_TOC)
+ return 0;
+
+ if (insn_val != PPC_RAW_NOP()) {
+ pr_err("%s: Expected nop after call, got %08x at %pS\n",
+ me->name, insn_val, instruction);
+ return -ENOEXEC;
}
+
/* ld r2,R2_STACK_OFFSET(r1) */
- *instruction = 0xe8410000 | R2_STACK_OFFSET;
- return 1;
+ return patch_instruction(instruction, ppc_inst(PPC_INST_LD_TOC));
}
int apply_relocate_add(Elf64_Shdr *sechdrs,
@@ -498,9 +802,10 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
unsigned long *location;
unsigned long value;
- DEBUGP("Applying ADD relocate section %u to %u\n", relsec,
+ pr_debug("Applying ADD relocate section %u to %u\n", relsec,
sechdrs[relsec].sh_info);
+#ifndef CONFIG_PPC_KERNEL_PCREL
/* First time we're called, we can fix up .TOC. */
if (!me->arch.toc_fixed) {
sym = find_dot_toc(sechdrs, strtab, symindex);
@@ -510,7 +815,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
sym->st_value = my_r2(sechdrs, me);
me->arch.toc_fixed = true;
}
-
+#endif
for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rela); i++) {
/* This is where to make the change */
location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
@@ -519,7 +824,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
sym = (Elf64_Sym *)sechdrs[symindex].sh_addr
+ ELF64_R_SYM(rela[i].r_info);
- DEBUGP("RELOC at %p: %li-type as %s (%lu) + %li\n",
+ pr_debug("RELOC at %p: %li-type as %s (0x%lx) + %li\n",
location, (long)ELF64_R_TYPE(rela[i].r_info),
strtab + sym->st_name, (unsigned long)sym->st_value,
(long)rela[i].r_addend);
@@ -538,6 +843,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
*(unsigned long *)location = value;
break;
+#ifndef CONFIG_PPC_KERNEL_PCREL
case R_PPC64_TOC:
*(unsigned long *)location = my_r2(sechdrs, me);
break;
@@ -546,7 +852,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
/* Subtract TOC pointer */
value -= my_r2(sechdrs, me);
if (value + 0x8000 > 0xffff) {
- printk("%s: bad TOC16 relocation (%lu)\n",
+ pr_err("%s: bad TOC16 relocation (0x%lx)\n",
me->name, value);
return -ENOEXEC;
}
@@ -567,7 +873,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
/* Subtract TOC pointer */
value -= my_r2(sechdrs, me);
if ((value & 3) != 0 || value + 0x8000 > 0xffff) {
- printk("%s: bad TOC16_DS relocation (%lu)\n",
+ pr_err("%s: bad TOC16_DS relocation (0x%lx)\n",
me->name, value);
return -ENOEXEC;
}
@@ -580,7 +886,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
/* Subtract TOC pointer */
value -= my_r2(sechdrs, me);
if ((value & 3) != 0) {
- printk("%s: bad TOC16_LO_DS relocation (%lu)\n",
+ pr_err("%s: bad TOC16_LO_DS relocation (0x%lx)\n",
me->name, value);
return -ENOEXEC;
}
@@ -597,15 +903,23 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
= (*((uint16_t *) location) & ~0xffff)
| (value & 0xffff);
break;
+#endif
case R_PPC_REL24:
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ /* PCREL still generates REL24 for mcount */
+ case R_PPC64_REL24_NOTOC:
+#endif
/* FIXME: Handle weak symbols here --RR */
- if (sym->st_shndx == SHN_UNDEF) {
+ if (sym->st_shndx == SHN_UNDEF ||
+ sym->st_shndx == SHN_LIVEPATCH) {
/* External: go via stub */
- value = stub_for_addr(sechdrs, value, me);
+ value = stub_for_addr(sechdrs, value, me,
+ strtab + sym->st_name);
if (!value)
return -ENOENT;
- if (!restore_r2((u32 *)location + 1, me))
+ if (restore_r2(strtab + sym->st_name,
+ (u32 *)location + 1, me))
return -ENOEXEC;
} else
value += local_entry_offset(sym);
@@ -613,15 +927,17 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
/* Convert value to relative */
value -= (unsigned long)location;
if (value + 0x2000000 > 0x3ffffff || (value & 3) != 0){
- printk("%s: REL24 %li out of range!\n",
+ pr_err("%s: REL24 %li out of range!\n",
me->name, (long int)value);
return -ENOEXEC;
}
/* Only replace bits 2 through 26 */
- *(uint32_t *)location
- = (*(uint32_t *)location & ~0x03fffffc)
- | (value & 0x03fffffc);
+ value = (*(uint32_t *)location & ~PPC_LI_MASK) | PPC_LI(value);
+
+ if (patch_instruction((u32 *)location, ppc_inst(value)))
+ return -EFAULT;
+
break;
case R_PPC64_REL64:
@@ -629,6 +945,59 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
*location = value - (unsigned long)location;
break;
+ case R_PPC64_REL32:
+ /* 32 bits relative (used by relative exception tables) */
+ /* Convert value to relative */
+ value -= (unsigned long)location;
+ if (value + 0x80000000 > 0xffffffff) {
+ pr_err("%s: REL32 %li out of range!\n",
+ me->name, (long int)value);
+ return -ENOEXEC;
+ }
+ *(u32 *)location = value;
+ break;
+
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ case R_PPC64_PCREL34: {
+ unsigned long absvalue = value;
+
+ /* Convert value to relative */
+ value -= (unsigned long)location;
+
+ if (value + 0x200000000 > 0x3ffffffff) {
+ if (sym->st_shndx != me->arch.pcpu_section) {
+ pr_err("%s: REL34 %li out of range!\n",
+ me->name, (long)value);
+ return -ENOEXEC;
+ }
+
+ /*
+ * per-cpu section is special cased because
+ * it is moved during loading, so has to be
+ * converted to use GOT.
+ */
+ value = got_for_addr(sechdrs, absvalue, me,
+ strtab + sym->st_name);
+ if (!value)
+ return -ENOENT;
+ value -= (unsigned long)location;
+
+ /* Turn pla into pld */
+ if (patch_instruction((u32 *)location,
+ ppc_inst_prefix((*(u32 *)location & ~0x02000000),
+ (*((u32 *)location + 1) & ~0xf8000000) | 0xe4000000)))
+ return -EFAULT;
+ }
+
+ if (patch_instruction((u32 *)location,
+ ppc_inst_prefix((*(u32 *)location & ~0x3ffff) | IMM_H18(value),
+ (*((u32 *)location + 1) & ~0xffff) | IMM_L(value))))
+ return -EFAULT;
+
+ break;
+ }
+
+#else
case R_PPC64_TOCSAVE:
/*
* Marker reloc indicates we don't have to save r2.
@@ -636,6 +1005,36 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
* it.
*/
break;
+#endif
+
+ case R_PPC64_ENTRY:
+ if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL))
+ break;
+
+ /*
+ * Optimize ELFv2 large code model entry point if
+ * the TOC is within 2GB range of current location.
+ */
+ value = my_r2(sechdrs, me) - (unsigned long)location;
+ if (value + 0x80008000 > 0xffffffff)
+ break;
+ /*
+ * Check for the large code model prolog sequence:
+ * ld r2, ...(r12)
+ * add r2, r2, r12
+ */
+ if ((((uint32_t *)location)[0] & ~0xfffc) != PPC_RAW_LD(_R2, _R12, 0))
+ break;
+ if (((uint32_t *)location)[1] != PPC_RAW_ADD(_R2, _R2, _R12))
+ break;
+ /*
+ * If found, replace it with:
+ * addis r2, r12, (.TOC.-func)@ha
+ * addi r2, r2, (.TOC.-func)@l
+ */
+ ((uint32_t *)location)[0] = PPC_RAW_ADDIS(_R2, _R12, PPC_HA(value));
+ ((uint32_t *)location)[1] = PPC_RAW_ADDI(_R2, _R2, PPC_LO(value));
+ break;
case R_PPC64_REL16_HA:
/* Subtract location pointer */
@@ -654,20 +1053,110 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
| (value & 0xffff);
break;
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ case R_PPC64_GOT_PCREL34:
+ value = got_for_addr(sechdrs, value, me,
+ strtab + sym->st_name);
+ if (!value)
+ return -ENOENT;
+ value -= (unsigned long)location;
+ ((uint32_t *)location)[0] = (((uint32_t *)location)[0] & ~0x3ffff) |
+ ((value >> 16) & 0x3ffff);
+ ((uint32_t *)location)[1] = (((uint32_t *)location)[1] & ~0xffff) |
+ (value & 0xffff);
+ break;
+#endif
+
default:
- printk("%s: Unknown ADD relocation: %lu\n",
+ pr_err("%s: Unknown ADD relocation: %lu\n",
me->name,
(unsigned long)ELF64_R_TYPE(rela[i].r_info));
return -ENOEXEC;
}
}
+ return 0;
+}
+
#ifdef CONFIG_DYNAMIC_FTRACE
- me->arch.toc = my_r2(sechdrs, me);
- me->arch.tramp = stub_for_addr(sechdrs,
- (unsigned long)ftrace_caller,
- me);
+int module_trampoline_target(struct module *mod, unsigned long addr,
+ unsigned long *target)
+{
+ struct ppc64_stub_entry *stub;
+ func_desc_t funcdata;
+ u32 magic;
+
+ if (!within_module_core(addr, mod)) {
+ pr_err("%s: stub %lx not in module %s\n", __func__, addr, mod->name);
+ return -EFAULT;
+ }
+
+ stub = (struct ppc64_stub_entry *)addr;
+
+ if (copy_from_kernel_nofault(&magic, &stub->magic,
+ sizeof(magic))) {
+ pr_err("%s: fault reading magic for stub %lx for %s\n", __func__, addr, mod->name);
+ return -EFAULT;
+ }
+
+ if (magic != STUB_MAGIC) {
+ pr_err("%s: bad magic for stub %lx for %s\n", __func__, addr, mod->name);
+ return -EFAULT;
+ }
+
+ if (copy_from_kernel_nofault(&funcdata, &stub->funcdata,
+ sizeof(funcdata))) {
+ pr_err("%s: fault reading funcdata for stub %lx for %s\n", __func__, addr, mod->name);
+ return -EFAULT;
+ }
+
+ *target = stub_func_addr(funcdata);
+
+ return 0;
+}
+
+static int setup_ftrace_ool_stubs(const Elf64_Shdr *sechdrs, unsigned long addr, struct module *me)
+{
+#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE
+ unsigned int total_stubs, num_stubs;
+ struct ppc64_stub_entry *stub;
+
+ total_stubs = sechdrs[me->arch.stubs_section].sh_size / sizeof(*stub);
+ num_stubs = roundup(me->arch.ool_stub_count * sizeof(struct ftrace_ool_stub),
+ sizeof(struct ppc64_stub_entry)) / sizeof(struct ppc64_stub_entry);
+
+ if (WARN_ON(me->arch.stub_count + num_stubs > total_stubs))
+ return -1;
+
+ stub = (void *)sechdrs[me->arch.stubs_section].sh_addr;
+ me->arch.ool_stubs = (struct ftrace_ool_stub *)(stub + me->arch.stub_count);
+ me->arch.stub_count += num_stubs;
+#endif
+
+ return 0;
+}
+
+int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs)
+{
+ mod->arch.tramp = stub_for_addr(sechdrs,
+ (unsigned long)ftrace_caller,
+ mod,
+ "ftrace_caller");
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+ mod->arch.tramp_regs = stub_for_addr(sechdrs,
+ (unsigned long)ftrace_regs_caller,
+ mod,
+ "ftrace_regs_caller");
+ if (!mod->arch.tramp_regs)
+ return -ENOENT;
#endif
+ if (!mod->arch.tramp)
+ return -ENOENT;
+
+ if (setup_ftrace_ool_stubs(sechdrs, mod->arch.tramp, mod))
+ return -ENOENT;
+
return 0;
}
+#endif
diff --git a/arch/powerpc/kernel/msi.c b/arch/powerpc/kernel/msi.c
index 8bbc12d20f5c..a5d25bebcab9 100644
--- a/arch/powerpc/kernel/msi.c
+++ b/arch/powerpc/kernel/msi.c
@@ -1,10 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright 2006-2007, Michael Ellerman, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
@@ -13,9 +9,12 @@
#include <asm/machdep.h>
-int arch_msi_check_device(struct pci_dev* dev, int nvec, int type)
+int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
{
- if (!ppc_md.setup_msi_irqs || !ppc_md.teardown_msi_irqs) {
+ struct pci_controller *phb = pci_bus_to_host(dev->bus);
+
+ if (!phb->controller_ops.setup_msi_irqs ||
+ !phb->controller_ops.teardown_msi_irqs) {
pr_debug("msi: Platform doesn't provide MSI callbacks.\n");
return -ENOSYS;
}
@@ -24,20 +23,17 @@ int arch_msi_check_device(struct pci_dev* dev, int nvec, int type)
if (type == PCI_CAP_ID_MSI && nvec > 1)
return 1;
- if (ppc_md.msi_check_device) {
- pr_debug("msi: Using platform check routine.\n");
- return ppc_md.msi_check_device(dev, nvec, type);
- }
-
- return 0;
-}
-
-int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
-{
- return ppc_md.setup_msi_irqs(dev, nvec, type);
+ return phb->controller_ops.setup_msi_irqs(dev, nvec, type);
}
void arch_teardown_msi_irqs(struct pci_dev *dev)
{
- ppc_md.teardown_msi_irqs(dev);
+ struct pci_controller *phb = pci_bus_to_host(dev->bus);
+
+ /*
+ * We can be called even when arch_setup_msi_irqs() returns -ENOSYS,
+ * so check the pointer again.
+ */
+ if (phb->controller_ops.teardown_msi_irqs)
+ phb->controller_ops.teardown_msi_irqs(dev);
}
diff --git a/arch/powerpc/kernel/note.S b/arch/powerpc/kernel/note.S
new file mode 100644
index 000000000000..bcdad15395dd
--- /dev/null
+++ b/arch/powerpc/kernel/note.S
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * PowerPC ELF notes.
+ *
+ * Copyright 2019, IBM Corporation
+ */
+
+#include <linux/elfnote.h>
+#include <asm/elfnote.h>
+
+/*
+ * Ultravisor-capable bit (PowerNV only).
+ *
+ * Bit 0 indicates that the powerpc kernel binary knows how to run in an
+ * ultravisor-enabled system.
+ *
+ * In an ultravisor-enabled system, some machine resources are now controlled
+ * by the ultravisor. If the kernel is not ultravisor-capable, but it ends up
+ * being run on a machine with ultravisor, the kernel will probably crash
+ * trying to access ultravisor resources. For instance, it may crash in early
+ * boot trying to set the partition table entry 0.
+ *
+ * In an ultravisor-enabled system, a bootloader could warn the user or prevent
+ * the kernel from being run if the PowerPC ultravisor capability doesn't exist
+ * or the Ultravisor-capable bit is not set.
+ */
+#ifdef CONFIG_PPC_POWERNV
+#define PPCCAP_ULTRAVISOR_BIT (1 << 0)
+#else
+#define PPCCAP_ULTRAVISOR_BIT 0
+#endif
+
+/*
+ * Add the PowerPC Capabilities in the binary ELF note. It is a bitmap that
+ * can be used to advertise kernel capabilities to userland.
+ */
+#define PPC_CAPABILITIES_BITMAP (PPCCAP_ULTRAVISOR_BIT)
+
+ELFNOTE(PowerPC, PPC_ELFNOTE_CAPABILITIES,
+ .long PPC_CAPABILITIES_BITMAP)
diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c
index 28b898e68185..f9c6568a9137 100644
--- a/arch/powerpc/kernel/nvram_64.c
+++ b/arch/powerpc/kernel/nvram_64.c
@@ -1,22 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* c 2001 PPC 64 Team, IBM Corp
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* /dev/nvram driver for PPC64
- *
- * This perhaps should live in drivers/char
- *
- * TODO: Split the /dev/nvram part (that one can use
- * drivers/char/generic_nvram.c) from the arch & partition
- * parsing code.
*/
-#include <linux/module.h>
-
#include <linux/types.h>
#include <linux/errno.h>
#include <linux/fs.h>
@@ -26,10 +14,14 @@
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
-#include <asm/uaccess.h>
+#include <linux/kmsg_dump.h>
+#include <linux/pagemap.h>
+#include <linux/pstore.h>
+#include <linux/zlib.h>
+#include <linux/uaccess.h>
+#include <linux/of.h>
#include <asm/nvram.h>
#include <asm/rtas.h>
-#include <asm/prom.h>
#include <asm/machdep.h>
#undef DEBUG_NVRAM
@@ -54,153 +46,663 @@ struct nvram_partition {
static LIST_HEAD(nvram_partitions);
-static loff_t dev_nvram_llseek(struct file *file, loff_t offset, int origin)
+#ifdef CONFIG_PPC_PSERIES
+struct nvram_os_partition rtas_log_partition = {
+ .name = "ibm,rtas-log",
+ .req_size = 2079,
+ .min_size = 1055,
+ .index = -1,
+ .os_partition = true
+};
+#endif
+
+struct nvram_os_partition oops_log_partition = {
+ .name = "lnx,oops-log",
+ .req_size = 4000,
+ .min_size = 2000,
+ .index = -1,
+ .os_partition = true
+};
+
+static const char *nvram_os_partitions[] = {
+#ifdef CONFIG_PPC_PSERIES
+ "ibm,rtas-log",
+#endif
+ "lnx,oops-log",
+ NULL
+};
+
+static void oops_to_nvram(struct kmsg_dumper *dumper,
+ struct kmsg_dump_detail *detail);
+
+static struct kmsg_dumper nvram_kmsg_dumper = {
+ .dump = oops_to_nvram
+};
+
+/*
+ * For capturing and compressing an oops or panic report...
+
+ * big_oops_buf[] holds the uncompressed text we're capturing.
+ *
+ * oops_buf[] holds the compressed text, preceded by a oops header.
+ * oops header has u16 holding the version of oops header (to differentiate
+ * between old and new format header) followed by u16 holding the length of
+ * the compressed* text (*Or uncompressed, if compression fails.) and u64
+ * holding the timestamp. oops_buf[] gets written to NVRAM.
+ *
+ * oops_log_info points to the header. oops_data points to the compressed text.
+ *
+ * +- oops_buf
+ * | +- oops_data
+ * v v
+ * +-----------+-----------+-----------+------------------------+
+ * | version | length | timestamp | text |
+ * | (2 bytes) | (2 bytes) | (8 bytes) | (oops_data_sz bytes) |
+ * +-----------+-----------+-----------+------------------------+
+ * ^
+ * +- oops_log_info
+ *
+ * We preallocate these buffers during init to avoid kmalloc during oops/panic.
+ */
+static size_t big_oops_buf_sz;
+static char *big_oops_buf, *oops_buf;
+static char *oops_data;
+static size_t oops_data_sz;
+
+/* Compression parameters */
+#define COMPR_LEVEL 6
+#define WINDOW_BITS 12
+#define MEM_LEVEL 4
+static struct z_stream_s stream;
+
+#ifdef CONFIG_PSTORE
+#ifdef CONFIG_PPC_POWERNV
+static struct nvram_os_partition skiboot_partition = {
+ .name = "ibm,skiboot",
+ .index = -1,
+ .os_partition = false
+};
+#endif
+
+#ifdef CONFIG_PPC_PSERIES
+static struct nvram_os_partition of_config_partition = {
+ .name = "of-config",
+ .index = -1,
+ .os_partition = false
+};
+#endif
+
+static struct nvram_os_partition common_partition = {
+ .name = "common",
+ .index = -1,
+ .os_partition = false
+};
+
+static enum pstore_type_id nvram_type_ids[] = {
+ PSTORE_TYPE_DMESG,
+ PSTORE_TYPE_PPC_COMMON,
+ -1,
+ -1,
+ -1
+};
+static int read_type;
+#endif
+
+/* nvram_write_os_partition
+ *
+ * We need to buffer the error logs into nvram to ensure that we have
+ * the failure information to decode. If we have a severe error there
+ * is no way to guarantee that the OS or the machine is in a state to
+ * get back to user land and write the error to disk. For example if
+ * the SCSI device driver causes a Machine Check by writing to a bad
+ * IO address, there is no way of guaranteeing that the device driver
+ * is in any state that is would also be able to write the error data
+ * captured to disk, thus we buffer it in NVRAM for analysis on the
+ * next boot.
+ *
+ * In NVRAM the partition containing the error log buffer will looks like:
+ * Header (in bytes):
+ * +-----------+----------+--------+------------+------------------+
+ * | signature | checksum | length | name | data |
+ * |0 |1 |2 3|4 15|16 length-1|
+ * +-----------+----------+--------+------------+------------------+
+ *
+ * The 'data' section would look like (in bytes):
+ * +--------------+------------+-----------------------------------+
+ * | event_logged | sequence # | error log |
+ * |0 3|4 7|8 error_log_size-1|
+ * +--------------+------------+-----------------------------------+
+ *
+ * event_logged: 0 if event has not been logged to syslog, 1 if it has
+ * sequence #: The unique sequence # for each event. (until it wraps)
+ * error log: The error log from event_scan
+ */
+int nvram_write_os_partition(struct nvram_os_partition *part,
+ char *buff, int length,
+ unsigned int err_type,
+ unsigned int error_log_cnt)
{
- int size;
+ int rc;
+ loff_t tmp_index;
+ struct err_log_info info;
- if (ppc_md.nvram_size == NULL)
- return -ENODEV;
- size = ppc_md.nvram_size();
+ if (part->index == -1)
+ return -ESPIPE;
- switch (origin) {
- case 1:
- offset += file->f_pos;
- break;
- case 2:
- offset += size;
- break;
+ if (length > part->size)
+ length = part->size;
+
+ info.error_type = cpu_to_be32(err_type);
+ info.seq_num = cpu_to_be32(error_log_cnt);
+
+ tmp_index = part->index;
+
+ rc = ppc_md.nvram_write((char *)&info, sizeof(info), &tmp_index);
+ if (rc <= 0) {
+ pr_err("%s: Failed nvram_write (%d)\n", __func__, rc);
+ return rc;
}
- if (offset < 0)
- return -EINVAL;
- file->f_pos = offset;
- return file->f_pos;
-}
+ rc = ppc_md.nvram_write(buff, length, &tmp_index);
+ if (rc <= 0) {
+ pr_err("%s: Failed nvram_write (%d)\n", __func__, rc);
+ return rc;
+ }
-static ssize_t dev_nvram_read(struct file *file, char __user *buf,
- size_t count, loff_t *ppos)
+ return 0;
+}
+
+/* nvram_read_partition
+ *
+ * Reads nvram partition for at most 'length'
+ */
+int nvram_read_partition(struct nvram_os_partition *part, char *buff,
+ int length, unsigned int *err_type,
+ unsigned int *error_log_cnt)
{
- ssize_t ret;
- char *tmp = NULL;
- ssize_t size;
+ int rc;
+ loff_t tmp_index;
+ struct err_log_info info;
+
+ if (part->index == -1)
+ return -1;
- if (!ppc_md.nvram_size) {
- ret = -ENODEV;
- goto out;
+ if (length > part->size)
+ length = part->size;
+
+ tmp_index = part->index;
+
+ if (part->os_partition) {
+ rc = ppc_md.nvram_read((char *)&info, sizeof(info), &tmp_index);
+ if (rc <= 0) {
+ pr_err("%s: Failed nvram_read (%d)\n", __func__, rc);
+ return rc;
+ }
}
- size = ppc_md.nvram_size();
- if (size < 0) {
- ret = size;
- goto out;
+ rc = ppc_md.nvram_read(buff, length, &tmp_index);
+ if (rc <= 0) {
+ pr_err("%s: Failed nvram_read (%d)\n", __func__, rc);
+ return rc;
}
- if (*ppos >= size) {
- ret = 0;
- goto out;
+ if (part->os_partition) {
+ *error_log_cnt = be32_to_cpu(info.seq_num);
+ *err_type = be32_to_cpu(info.error_type);
}
- count = min_t(size_t, count, size - *ppos);
- count = min(count, PAGE_SIZE);
+ return 0;
+}
- tmp = kmalloc(count, GFP_KERNEL);
- if (!tmp) {
- ret = -ENOMEM;
- goto out;
+/* nvram_init_os_partition
+ *
+ * This sets up a partition with an "OS" signature.
+ *
+ * The general strategy is the following:
+ * 1.) If a partition with the indicated name already exists...
+ * - If it's large enough, use it.
+ * - Otherwise, recycle it and keep going.
+ * 2.) Search for a free partition that is large enough.
+ * 3.) If there's not a free partition large enough, recycle any obsolete
+ * OS partitions and try again.
+ * 4.) Will first try getting a chunk that will satisfy the requested size.
+ * 5.) If a chunk of the requested size cannot be allocated, then try finding
+ * a chunk that will satisfy the minum needed.
+ *
+ * Returns 0 on success, else -1.
+ */
+int __init nvram_init_os_partition(struct nvram_os_partition *part)
+{
+ loff_t p;
+ int size;
+
+ /* Look for ours */
+ p = nvram_find_partition(part->name, NVRAM_SIG_OS, &size);
+
+ /* Found one but too small, remove it */
+ if (p && size < part->min_size) {
+ pr_info("nvram: Found too small %s partition,"
+ " removing it...\n", part->name);
+ nvram_remove_partition(part->name, NVRAM_SIG_OS, NULL);
+ p = 0;
}
- ret = ppc_md.nvram_read(tmp, count, ppos);
- if (ret <= 0)
- goto out;
+ /* Create one if we didn't find */
+ if (!p) {
+ p = nvram_create_partition(part->name, NVRAM_SIG_OS,
+ part->req_size, part->min_size);
+ if (p == -ENOSPC) {
+ pr_info("nvram: No room to create %s partition, "
+ "deleting any obsolete OS partitions...\n",
+ part->name);
+ nvram_remove_partition(NULL, NVRAM_SIG_OS,
+ nvram_os_partitions);
+ p = nvram_create_partition(part->name, NVRAM_SIG_OS,
+ part->req_size, part->min_size);
+ }
+ }
+
+ if (p <= 0) {
+ pr_err("nvram: Failed to find or create %s"
+ " partition, err %d\n", part->name, (int)p);
+ return -1;
+ }
+
+ part->index = p;
+ part->size = nvram_get_partition_size(p) - sizeof(struct err_log_info);
- if (copy_to_user(buf, tmp, ret))
- ret = -EFAULT;
+ return 0;
+}
-out:
- kfree(tmp);
+/* Derived from logfs_compress() */
+static int nvram_compress(const void *in, void *out, size_t inlen,
+ size_t outlen)
+{
+ int err, ret;
+
+ ret = -EIO;
+ err = zlib_deflateInit2(&stream, COMPR_LEVEL, Z_DEFLATED, WINDOW_BITS,
+ MEM_LEVEL, Z_DEFAULT_STRATEGY);
+ if (err != Z_OK)
+ goto error;
+
+ stream.next_in = in;
+ stream.avail_in = inlen;
+ stream.total_in = 0;
+ stream.next_out = out;
+ stream.avail_out = outlen;
+ stream.total_out = 0;
+
+ err = zlib_deflate(&stream, Z_FINISH);
+ if (err != Z_STREAM_END)
+ goto error;
+
+ err = zlib_deflateEnd(&stream);
+ if (err != Z_OK)
+ goto error;
+
+ if (stream.total_out >= stream.total_in)
+ goto error;
+
+ ret = stream.total_out;
+error:
return ret;
+}
+/* Compress the text from big_oops_buf into oops_buf. */
+static int zip_oops(size_t text_len)
+{
+ struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf;
+ int zipped_len = nvram_compress(big_oops_buf, oops_data, text_len,
+ oops_data_sz);
+ if (zipped_len < 0) {
+ pr_err("nvram: compression failed; returned %d\n", zipped_len);
+ pr_err("nvram: logging uncompressed oops/panic report\n");
+ return -1;
+ }
+ oops_hdr->version = cpu_to_be16(OOPS_HDR_VERSION);
+ oops_hdr->report_length = cpu_to_be16(zipped_len);
+ oops_hdr->timestamp = cpu_to_be64(ktime_get_real_seconds());
+ return 0;
}
-static ssize_t dev_nvram_write(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos)
+#ifdef CONFIG_PSTORE
+static int nvram_pstore_open(struct pstore_info *psi)
{
- ssize_t ret;
- char *tmp = NULL;
- ssize_t size;
+ /* Reset the iterator to start reading partitions again */
+ read_type = -1;
+ return 0;
+}
- ret = -ENODEV;
- if (!ppc_md.nvram_size)
- goto out;
+/**
+ * nvram_pstore_write - pstore write callback for nvram
+ * @record: pstore record to write, with @id to be set
+ *
+ * Called by pstore_dump() when an oops or panic report is logged in the
+ * printk buffer.
+ * Returns 0 on successful write.
+ */
+static int nvram_pstore_write(struct pstore_record *record)
+{
+ int rc;
+ unsigned int err_type = ERR_TYPE_KERNEL_PANIC;
+ struct oops_log_info *oops_hdr = (struct oops_log_info *) oops_buf;
- ret = 0;
- size = ppc_md.nvram_size();
- if (*ppos >= size || size < 0)
- goto out;
+ /* part 1 has the recent messages from printk buffer */
+ if (record->part > 1 || (record->type != PSTORE_TYPE_DMESG))
+ return -1;
- count = min_t(size_t, count, size - *ppos);
- count = min(count, PAGE_SIZE);
+ if (clobbering_unread_rtas_event())
+ return -1;
- ret = -ENOMEM;
- tmp = kmalloc(count, GFP_KERNEL);
- if (!tmp)
- goto out;
+ oops_hdr->version = cpu_to_be16(OOPS_HDR_VERSION);
+ oops_hdr->report_length = cpu_to_be16(record->size);
+ oops_hdr->timestamp = cpu_to_be64(ktime_get_real_seconds());
- ret = -EFAULT;
- if (copy_from_user(tmp, buf, count))
- goto out;
+ if (record->compressed)
+ err_type = ERR_TYPE_KERNEL_PANIC_GZ;
- ret = ppc_md.nvram_write(tmp, count, ppos);
+ rc = nvram_write_os_partition(&oops_log_partition, oops_buf,
+ (int) (sizeof(*oops_hdr) + record->size), err_type,
+ record->count);
-out:
- kfree(tmp);
- return ret;
+ if (rc != 0)
+ return rc;
+ record->id = record->part;
+ return 0;
}
-static long dev_nvram_ioctl(struct file *file, unsigned int cmd,
- unsigned long arg)
+/*
+ * Reads the oops/panic report, rtas, of-config and common partition.
+ * Returns the length of the data we read from each partition.
+ * Returns 0 if we've been called before.
+ */
+static ssize_t nvram_pstore_read(struct pstore_record *record)
{
- switch(cmd) {
-#ifdef CONFIG_PPC_PMAC
- case OBSOLETE_PMAC_NVRAM_GET_OFFSET:
- printk(KERN_WARNING "nvram: Using obsolete PMAC_NVRAM_GET_OFFSET ioctl\n");
- case IOC_NVRAM_GET_OFFSET: {
- int part, offset;
-
- if (!machine_is(powermac))
- return -EINVAL;
- if (copy_from_user(&part, (void __user*)arg, sizeof(part)) != 0)
- return -EFAULT;
- if (part < pmac_nvram_OF || part > pmac_nvram_NR)
- return -EINVAL;
- offset = pmac_get_partition(part);
- if (offset < 0)
- return offset;
- if (copy_to_user((void __user*)arg, &offset, sizeof(offset)) != 0)
- return -EFAULT;
+ struct oops_log_info *oops_hdr;
+ unsigned int err_type, id_no, size = 0;
+ struct nvram_os_partition *part = NULL;
+ char *buff = NULL;
+ int sig = 0;
+ loff_t p;
+
+ read_type++;
+
+ switch (nvram_type_ids[read_type]) {
+ case PSTORE_TYPE_DMESG:
+ part = &oops_log_partition;
+ record->type = PSTORE_TYPE_DMESG;
+ break;
+ case PSTORE_TYPE_PPC_COMMON:
+ sig = NVRAM_SIG_SYS;
+ part = &common_partition;
+ record->type = PSTORE_TYPE_PPC_COMMON;
+ record->id = PSTORE_TYPE_PPC_COMMON;
+ record->time.tv_sec = 0;
+ record->time.tv_nsec = 0;
+ break;
+#ifdef CONFIG_PPC_PSERIES
+ case PSTORE_TYPE_PPC_RTAS:
+ part = &rtas_log_partition;
+ record->type = PSTORE_TYPE_PPC_RTAS;
+ record->time.tv_sec = last_rtas_event;
+ record->time.tv_nsec = 0;
+ break;
+ case PSTORE_TYPE_PPC_OF:
+ sig = NVRAM_SIG_OF;
+ part = &of_config_partition;
+ record->type = PSTORE_TYPE_PPC_OF;
+ record->id = PSTORE_TYPE_PPC_OF;
+ record->time.tv_sec = 0;
+ record->time.tv_nsec = 0;
+ break;
+#endif
+#ifdef CONFIG_PPC_POWERNV
+ case PSTORE_TYPE_PPC_OPAL:
+ sig = NVRAM_SIG_FW;
+ part = &skiboot_partition;
+ record->type = PSTORE_TYPE_PPC_OPAL;
+ record->id = PSTORE_TYPE_PPC_OPAL;
+ record->time.tv_sec = 0;
+ record->time.tv_nsec = 0;
+ break;
+#endif
+ default:
return 0;
}
-#endif /* CONFIG_PPC_PMAC */
- default:
- return -EINVAL;
+
+ if (!part->os_partition) {
+ p = nvram_find_partition(part->name, sig, &size);
+ if (p <= 0) {
+ pr_err("nvram: Failed to find partition %s, "
+ "err %d\n", part->name, (int)p);
+ return 0;
+ }
+ part->index = p;
+ part->size = size;
+ }
+
+ buff = kmalloc(part->size, GFP_KERNEL);
+
+ if (!buff)
+ return -ENOMEM;
+
+ if (nvram_read_partition(part, buff, part->size, &err_type, &id_no)) {
+ kfree(buff);
+ return 0;
}
+
+ record->count = 0;
+
+ if (part->os_partition)
+ record->id = id_no;
+
+ if (nvram_type_ids[read_type] == PSTORE_TYPE_DMESG) {
+ size_t length, hdr_size;
+
+ oops_hdr = (struct oops_log_info *)buff;
+ if (be16_to_cpu(oops_hdr->version) < OOPS_HDR_VERSION) {
+ /* Old format oops header had 2-byte record size */
+ hdr_size = sizeof(u16);
+ length = be16_to_cpu(oops_hdr->version);
+ record->time.tv_sec = 0;
+ record->time.tv_nsec = 0;
+ } else {
+ hdr_size = sizeof(*oops_hdr);
+ length = be16_to_cpu(oops_hdr->report_length);
+ record->time.tv_sec = be64_to_cpu(oops_hdr->timestamp);
+ record->time.tv_nsec = 0;
+ }
+ record->buf = kmemdup(buff + hdr_size, length, GFP_KERNEL);
+ kfree(buff);
+ if (record->buf == NULL)
+ return -ENOMEM;
+
+ record->ecc_notice_size = 0;
+ if (err_type == ERR_TYPE_KERNEL_PANIC_GZ)
+ record->compressed = true;
+ else
+ record->compressed = false;
+ return length;
+ }
+
+ record->buf = buff;
+ return part->size;
}
-const struct file_operations nvram_fops = {
- .owner = THIS_MODULE,
- .llseek = dev_nvram_llseek,
- .read = dev_nvram_read,
- .write = dev_nvram_write,
- .unlocked_ioctl = dev_nvram_ioctl,
+static struct pstore_info nvram_pstore_info = {
+ .owner = THIS_MODULE,
+ .name = "nvram",
+ .flags = PSTORE_FLAGS_DMESG,
+ .open = nvram_pstore_open,
+ .read = nvram_pstore_read,
+ .write = nvram_pstore_write,
};
-static struct miscdevice nvram_dev = {
- NVRAM_MINOR,
- "nvram",
- &nvram_fops
-};
+static int __init nvram_pstore_init(void)
+{
+ int rc = 0;
+
+ if (machine_is(pseries)) {
+ nvram_type_ids[2] = PSTORE_TYPE_PPC_RTAS;
+ nvram_type_ids[3] = PSTORE_TYPE_PPC_OF;
+ } else
+ nvram_type_ids[2] = PSTORE_TYPE_PPC_OPAL;
+
+ nvram_pstore_info.buf = oops_data;
+ nvram_pstore_info.bufsize = oops_data_sz;
+
+ rc = pstore_register(&nvram_pstore_info);
+ if (rc && (rc != -EPERM))
+ /* Print error only when pstore.backend == nvram */
+ pr_err("nvram: pstore_register() failed, returned %d. "
+ "Defaults to kmsg_dump\n", rc);
+
+ return rc;
+}
+#else
+static int __init nvram_pstore_init(void)
+{
+ return -1;
+}
+#endif
+
+void __init nvram_init_oops_partition(int rtas_partition_exists)
+{
+ int rc;
+
+ rc = nvram_init_os_partition(&oops_log_partition);
+ if (rc != 0) {
+#ifdef CONFIG_PPC_PSERIES
+ if (!rtas_partition_exists) {
+ pr_err("nvram: Failed to initialize oops partition!");
+ return;
+ }
+ pr_notice("nvram: Using %s partition to log both"
+ " RTAS errors and oops/panic reports\n",
+ rtas_log_partition.name);
+ memcpy(&oops_log_partition, &rtas_log_partition,
+ sizeof(rtas_log_partition));
+#else
+ pr_err("nvram: Failed to initialize oops partition!");
+ return;
+#endif
+ }
+ oops_buf = kmalloc(oops_log_partition.size, GFP_KERNEL);
+ if (!oops_buf) {
+ pr_err("nvram: No memory for %s partition\n",
+ oops_log_partition.name);
+ return;
+ }
+ oops_data = oops_buf + sizeof(struct oops_log_info);
+ oops_data_sz = oops_log_partition.size - sizeof(struct oops_log_info);
+
+ rc = nvram_pstore_init();
+ if (!rc)
+ return;
+
+ /*
+ * Figure compression (preceded by elimination of each line's <n>
+ * severity prefix) will reduce the oops/panic report to at most
+ * 45% of its original size.
+ */
+ big_oops_buf_sz = (oops_data_sz * 100) / 45;
+ big_oops_buf = kmalloc(big_oops_buf_sz, GFP_KERNEL);
+ if (big_oops_buf) {
+ stream.workspace = kmalloc(zlib_deflate_workspacesize(
+ WINDOW_BITS, MEM_LEVEL), GFP_KERNEL);
+ if (!stream.workspace) {
+ pr_err("nvram: No memory for compression workspace; "
+ "skipping compression of %s partition data\n",
+ oops_log_partition.name);
+ kfree(big_oops_buf);
+ big_oops_buf = NULL;
+ }
+ } else {
+ pr_err("No memory for uncompressed %s data; "
+ "skipping compression\n", oops_log_partition.name);
+ stream.workspace = NULL;
+ }
+
+ rc = kmsg_dump_register(&nvram_kmsg_dumper);
+ if (rc != 0) {
+ pr_err("nvram: kmsg_dump_register() failed; returned %d\n", rc);
+ kfree(oops_buf);
+ kfree(big_oops_buf);
+ kfree(stream.workspace);
+ }
+}
+
+/*
+ * This is our kmsg_dump callback, called after an oops or panic report
+ * has been written to the printk buffer. We want to capture as much
+ * of the printk buffer as possible. First, capture as much as we can
+ * that we think will compress sufficiently to fit in the lnx,oops-log
+ * partition. If that's too much, go back and capture uncompressed text.
+ */
+static void oops_to_nvram(struct kmsg_dumper *dumper,
+ struct kmsg_dump_detail *detail)
+{
+ struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf;
+ static unsigned int oops_count = 0;
+ static struct kmsg_dump_iter iter;
+ static bool panicking = false;
+ static DEFINE_SPINLOCK(lock);
+ unsigned long flags;
+ size_t text_len;
+ unsigned int err_type = ERR_TYPE_KERNEL_PANIC_GZ;
+ int rc = -1;
+
+ switch (detail->reason) {
+ case KMSG_DUMP_SHUTDOWN:
+ /* These are almost always orderly shutdowns. */
+ return;
+ case KMSG_DUMP_OOPS:
+ break;
+ case KMSG_DUMP_PANIC:
+ panicking = true;
+ break;
+ case KMSG_DUMP_EMERG:
+ if (panicking)
+ /* Panic report already captured. */
+ return;
+ break;
+ default:
+ pr_err("%s: ignoring unrecognized KMSG_DUMP_* reason %d\n",
+ __func__, (int) detail->reason);
+ return;
+ }
+
+ if (clobbering_unread_rtas_event())
+ return;
+
+ if (!spin_trylock_irqsave(&lock, flags))
+ return;
+
+ if (big_oops_buf) {
+ kmsg_dump_rewind(&iter);
+ kmsg_dump_get_buffer(&iter, false,
+ big_oops_buf, big_oops_buf_sz, &text_len);
+ rc = zip_oops(text_len);
+ }
+ if (rc != 0) {
+ kmsg_dump_rewind(&iter);
+ kmsg_dump_get_buffer(&iter, false,
+ oops_data, oops_data_sz, &text_len);
+ err_type = ERR_TYPE_KERNEL_PANIC;
+ oops_hdr->version = cpu_to_be16(OOPS_HDR_VERSION);
+ oops_hdr->report_length = cpu_to_be16(text_len);
+ oops_hdr->timestamp = cpu_to_be64(ktime_get_real_seconds());
+ }
+
+ (void) nvram_write_os_partition(&oops_log_partition, oops_buf,
+ (int) (sizeof(*oops_hdr) + text_len), err_type,
+ ++oops_count);
+
+ spin_unlock_irqrestore(&lock, flags);
+}
#ifdef DEBUG_NVRAM
static void __init nvram_print_partitions(char * label)
@@ -253,7 +755,7 @@ static unsigned char __init nvram_checksum(struct nvram_header *p)
* Per the criteria passed via nvram_remove_partition(), should this
* partition be removed? 1=remove, 0=keep
*/
-static int nvram_can_remove_partition(struct nvram_partition *part,
+static int __init nvram_can_remove_partition(struct nvram_partition *part,
const char *name, int sig, const char *exceptions[])
{
if (part->header.signature != sig)
@@ -292,7 +794,7 @@ int __init nvram_remove_partition(const char *name, int sig,
/* Make partition a free partition */
part->header.signature = NVRAM_SIG_FREE;
- strncpy(part->header.name, "wwwwwwwwwwww", 12);
+ memset(part->header.name, 'w', 12);
part->header.checksum = nvram_checksum(&part->header);
rc = nvram_write_header(part);
if (rc <= 0) {
@@ -310,8 +812,8 @@ int __init nvram_remove_partition(const char *name, int sig,
}
if (prev) {
prev->header.length += part->header.length;
- prev->header.checksum = nvram_checksum(&part->header);
- rc = nvram_write_header(part);
+ prev->header.checksum = nvram_checksum(&prev->header);
+ rc = nvram_write_header(prev);
if (rc <= 0) {
printk(KERN_ERR "nvram_remove_partition: nvram_write failed (%d)\n", rc);
return rc;
@@ -349,9 +851,11 @@ loff_t __init nvram_create_partition(const char *name, int sig,
long size = 0;
int rc;
+ BUILD_BUG_ON(NVRAM_BLOCK_LEN != 16);
+
/* Convert sizes from bytes to blocks */
- req_size = _ALIGN_UP(req_size, NVRAM_BLOCK_LEN) / NVRAM_BLOCK_LEN;
- min_size = _ALIGN_UP(min_size, NVRAM_BLOCK_LEN) / NVRAM_BLOCK_LEN;
+ req_size = ALIGN(req_size, NVRAM_BLOCK_LEN) / NVRAM_BLOCK_LEN;
+ min_size = ALIGN(min_size, NVRAM_BLOCK_LEN) / NVRAM_BLOCK_LEN;
/* If no minimum size specified, make it the same as the
* requested size
@@ -386,22 +890,22 @@ loff_t __init nvram_create_partition(const char *name, int sig,
return -ENOSPC;
/* Create our OS partition */
- new_part = kmalloc(sizeof(*new_part), GFP_KERNEL);
+ new_part = kzalloc(sizeof(*new_part), GFP_KERNEL);
if (!new_part) {
- pr_err("nvram_create_os_partition: kmalloc failed\n");
+ pr_err("%s: kmalloc failed\n", __func__);
return -ENOMEM;
}
new_part->index = free_part->index;
new_part->header.signature = sig;
new_part->header.length = size;
- strncpy(new_part->header.name, name, 12);
+ memcpy(new_part->header.name, name, strnlen(name, sizeof(new_part->header.name)));
new_part->header.checksum = nvram_checksum(&new_part->header);
rc = nvram_write_header(new_part);
if (rc <= 0) {
- pr_err("nvram_create_os_partition: nvram_write_header "
- "failed (%d)\n", rc);
+ pr_err("%s: nvram_write_header failed (%d)\n", __func__, rc);
+ kfree(new_part);
return rc;
}
list_add_tail(&new_part->partition, &free_part->partition);
@@ -413,8 +917,8 @@ loff_t __init nvram_create_partition(const char *name, int sig,
free_part->header.checksum = nvram_checksum(&free_part->header);
rc = nvram_write_header(free_part);
if (rc <= 0) {
- pr_err("nvram_create_os_partition: nvram_write_header "
- "failed (%d)\n", rc);
+ pr_err("%s: nvram_write_header failed (%d)\n",
+ __func__, rc);
return rc;
}
} else {
@@ -428,11 +932,12 @@ loff_t __init nvram_create_partition(const char *name, int sig,
tmp_index += NVRAM_BLOCK_LEN) {
rc = ppc_md.nvram_write(nv_init_vals, NVRAM_BLOCK_LEN, &tmp_index);
if (rc <= 0) {
- pr_err("nvram_create_partition: nvram_write failed (%d)\n", rc);
+ pr_err("%s: nvram_write failed (%d)\n",
+ __func__, rc);
return rc;
}
}
-
+
return new_part->index + NVRAM_HEADER_LEN;
}
@@ -525,7 +1030,7 @@ int __init nvram_scan_partitions(void)
"detected: 0-length partition\n");
goto out;
}
- tmp_part = kmalloc(sizeof(struct nvram_partition), GFP_KERNEL);
+ tmp_part = kmalloc(sizeof(*tmp_part), GFP_KERNEL);
err = -ENOMEM;
if (!tmp_part) {
printk(KERN_ERR "nvram_scan_partitions: kmalloc failed\n");
@@ -548,30 +1053,3 @@ int __init nvram_scan_partitions(void)
kfree(header);
return err;
}
-
-static int __init nvram_init(void)
-{
- int rc;
-
- BUILD_BUG_ON(NVRAM_BLOCK_LEN != 16);
-
- if (ppc_md.nvram_size == NULL || ppc_md.nvram_size() <= 0)
- return -ENODEV;
-
- rc = misc_register(&nvram_dev);
- if (rc != 0) {
- printk(KERN_ERR "nvram_init: failed to register device\n");
- return rc;
- }
-
- return rc;
-}
-
-void __exit nvram_cleanup(void)
-{
- misc_deregister( &nvram_dev );
-}
-
-module_init(nvram_init);
-module_exit(nvram_cleanup);
-MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c
deleted file mode 100644
index a7b743076720..000000000000
--- a/arch/powerpc/kernel/of_platform.c
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright (C) 2006 Benjamin Herrenschmidt, IBM Corp.
- * <benh@kernel.crashing.org>
- * and Arnd Bergmann, IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- */
-
-#undef DEBUG
-
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/export.h>
-#include <linux/mod_devicetable.h>
-#include <linux/pci.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/of_platform.h>
-#include <linux/atomic.h>
-
-#include <asm/errno.h>
-#include <asm/topology.h>
-#include <asm/pci-bridge.h>
-#include <asm/ppc-pci.h>
-#include <asm/eeh.h>
-
-#ifdef CONFIG_PPC_OF_PLATFORM_PCI
-
-/* The probing of PCI controllers from of_platform is currently
- * 64 bits only, mostly due to gratuitous differences between
- * the 32 and 64 bits PCI code on PowerPC and the 32 bits one
- * lacking some bits needed here.
- */
-
-static int of_pci_phb_probe(struct platform_device *dev)
-{
- struct pci_controller *phb;
-
- /* Check if we can do that ... */
- if (ppc_md.pci_setup_phb == NULL)
- return -ENODEV;
-
- pr_info("Setting up PCI bus %s\n", dev->dev.of_node->full_name);
-
- /* Alloc and setup PHB data structure */
- phb = pcibios_alloc_controller(dev->dev.of_node);
- if (!phb)
- return -ENODEV;
-
- /* Setup parent in sysfs */
- phb->parent = &dev->dev;
-
- /* Setup the PHB using arch provided callback */
- if (ppc_md.pci_setup_phb(phb)) {
- pcibios_free_controller(phb);
- return -ENODEV;
- }
-
- /* Process "ranges" property */
- pci_process_bridge_OF_ranges(phb, dev->dev.of_node, 0);
-
- /* Init pci_dn data structures */
- pci_devs_phb_init_dynamic(phb);
-
- /* Create EEH devices for the PHB */
- eeh_dev_phb_init_dynamic(phb);
-
- /* Register devices with EEH */
- if (dev->dev.of_node->child)
- eeh_add_device_tree_early(dev->dev.of_node);
-
- /* Scan the bus */
- pcibios_scan_phb(phb);
- if (phb->bus == NULL)
- return -ENXIO;
-
- /* Claim resources. This might need some rework as well depending
- * whether we are doing probe-only or not, like assigning unassigned
- * resources etc...
- */
- pcibios_claim_one_bus(phb->bus);
-
- /* Finish EEH setup */
- eeh_add_device_tree_late(phb->bus);
-
- /* Add probed PCI devices to the device model */
- pci_bus_add_devices(phb->bus);
-
- /* sysfs files should only be added after devices are added */
- eeh_add_sysfs_files(phb->bus);
-
- return 0;
-}
-
-static struct of_device_id of_pci_phb_ids[] = {
- { .type = "pci", },
- { .type = "pcix", },
- { .type = "pcie", },
- { .type = "pciex", },
- { .type = "ht", },
- {}
-};
-
-static struct platform_driver of_pci_phb_driver = {
- .probe = of_pci_phb_probe,
- .driver = {
- .name = "of-pci",
- .owner = THIS_MODULE,
- .of_match_table = of_pci_phb_ids,
- },
-};
-
-static __init int of_pci_phb_init(void)
-{
- return platform_driver_register(&of_pci_phb_driver);
-}
-
-device_initcall(of_pci_phb_init);
-
-#endif /* CONFIG_PPC_OF_PLATFORM_PCI */
diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
new file mode 100644
index 000000000000..2e83702bf9ba
--- /dev/null
+++ b/arch/powerpc/kernel/optprobes.c
@@ -0,0 +1,304 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Code for Kernel probes Jump optimization.
+ *
+ * Copyright 2017, Anju T, IBM Corp.
+ */
+
+#include <linux/kprobes.h>
+#include <linux/jump_label.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <asm/kprobes.h>
+#include <asm/ptrace.h>
+#include <asm/cacheflush.h>
+#include <asm/text-patching.h>
+#include <asm/sstep.h>
+#include <asm/ppc-opcode.h>
+#include <asm/inst.h>
+
+#define TMPL_CALL_HDLR_IDX (optprobe_template_call_handler - optprobe_template_entry)
+#define TMPL_EMULATE_IDX (optprobe_template_call_emulate - optprobe_template_entry)
+#define TMPL_RET_IDX (optprobe_template_ret - optprobe_template_entry)
+#define TMPL_OP_IDX (optprobe_template_op_address - optprobe_template_entry)
+#define TMPL_INSN_IDX (optprobe_template_insn - optprobe_template_entry)
+#define TMPL_END_IDX (optprobe_template_end - optprobe_template_entry)
+
+static bool insn_page_in_use;
+
+void *alloc_optinsn_page(void)
+{
+ if (insn_page_in_use)
+ return NULL;
+ insn_page_in_use = true;
+ return &optinsn_slot;
+}
+
+void free_optinsn_page(void *page)
+{
+ insn_page_in_use = false;
+}
+
+/*
+ * Check if we can optimize this probe. Returns NIP post-emulation if this can
+ * be optimized and 0 otherwise.
+ */
+static unsigned long can_optimize(struct kprobe *p)
+{
+ struct pt_regs regs;
+ struct instruction_op op;
+ unsigned long nip = 0;
+ unsigned long addr = (unsigned long)p->addr;
+
+ /*
+ * kprobe placed for kretprobe during boot time
+ * has a 'nop' instruction, which can be emulated.
+ * So further checks can be skipped.
+ */
+ if (p->addr == (kprobe_opcode_t *)&arch_rethook_trampoline)
+ return addr + sizeof(kprobe_opcode_t);
+
+ /*
+ * We only support optimizing kernel addresses, but not
+ * module addresses.
+ *
+ * FIXME: Optimize kprobes placed in module addresses.
+ */
+ if (!is_kernel_addr(addr))
+ return 0;
+
+ memset(&regs, 0, sizeof(struct pt_regs));
+ regs.nip = addr;
+ regs.trap = 0x0;
+ regs.msr = MSR_KERNEL;
+
+ /*
+ * Kprobe placed in conditional branch instructions are
+ * not optimized, as we can't predict the nip prior with
+ * dummy pt_regs and can not ensure that the return branch
+ * from detour buffer falls in the range of address (i.e 32MB).
+ * A branch back from trampoline is set up in the detour buffer
+ * to the nip returned by the analyse_instr() here.
+ *
+ * Ensure that the instruction is not a conditional branch,
+ * and that can be emulated.
+ */
+ if (!is_conditional_branch(ppc_inst_read(p->ainsn.insn)) &&
+ analyse_instr(&op, &regs, ppc_inst_read(p->ainsn.insn)) == 1) {
+ emulate_update_regs(&regs, &op);
+ nip = regs.nip;
+ }
+
+ return nip;
+}
+
+static void optimized_callback(struct optimized_kprobe *op,
+ struct pt_regs *regs)
+{
+ /* This is possible if op is under delayed unoptimizing */
+ if (kprobe_disabled(&op->kp))
+ return;
+
+ preempt_disable();
+
+ if (kprobe_running()) {
+ kprobes_inc_nmissed_count(&op->kp);
+ } else {
+ __this_cpu_write(current_kprobe, &op->kp);
+ regs_set_return_ip(regs, (unsigned long)op->kp.addr);
+ get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
+ opt_pre_handler(&op->kp, regs);
+ __this_cpu_write(current_kprobe, NULL);
+ }
+
+ preempt_enable();
+}
+NOKPROBE_SYMBOL(optimized_callback);
+
+void arch_remove_optimized_kprobe(struct optimized_kprobe *op)
+{
+ if (op->optinsn.insn) {
+ free_optinsn_slot(op->optinsn.insn, 1);
+ op->optinsn.insn = NULL;
+ }
+}
+
+static void patch_imm32_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr)
+{
+ patch_instruction(addr++, ppc_inst(PPC_RAW_LIS(reg, PPC_HI(val))));
+ patch_instruction(addr, ppc_inst(PPC_RAW_ORI(reg, reg, PPC_LO(val))));
+}
+
+/*
+ * Generate instructions to load provided immediate 64-bit value
+ * to register 'reg' and patch these instructions at 'addr'.
+ */
+static void patch_imm64_load_insns(unsigned long long val, int reg, kprobe_opcode_t *addr)
+{
+ patch_instruction(addr++, ppc_inst(PPC_RAW_LIS(reg, PPC_HIGHEST(val))));
+ patch_instruction(addr++, ppc_inst(PPC_RAW_ORI(reg, reg, PPC_HIGHER(val))));
+ patch_instruction(addr++, ppc_inst(PPC_RAW_SLDI(reg, reg, 32)));
+ patch_instruction(addr++, ppc_inst(PPC_RAW_ORIS(reg, reg, PPC_HI(val))));
+ patch_instruction(addr, ppc_inst(PPC_RAW_ORI(reg, reg, PPC_LO(val))));
+}
+
+static void patch_imm_load_insns(unsigned long val, int reg, kprobe_opcode_t *addr)
+{
+ if (IS_ENABLED(CONFIG_PPC64))
+ patch_imm64_load_insns(val, reg, addr);
+ else
+ patch_imm32_load_insns(val, reg, addr);
+}
+
+int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
+{
+ ppc_inst_t branch_op_callback, branch_emulate_step, temp;
+ unsigned long op_callback_addr, emulate_step_addr;
+ kprobe_opcode_t *buff;
+ long b_offset;
+ unsigned long nip, size;
+ int rc, i;
+
+ nip = can_optimize(p);
+ if (!nip)
+ return -EILSEQ;
+
+ /* Allocate instruction slot for detour buffer */
+ buff = get_optinsn_slot();
+ if (!buff)
+ return -ENOMEM;
+
+ /*
+ * OPTPROBE uses 'b' instruction to branch to optinsn.insn.
+ *
+ * The target address has to be relatively nearby, to permit use
+ * of branch instruction in powerpc, because the address is specified
+ * in an immediate field in the instruction opcode itself, ie 24 bits
+ * in the opcode specify the address. Therefore the address should
+ * be within 32MB on either side of the current instruction.
+ */
+ b_offset = (unsigned long)buff - (unsigned long)p->addr;
+ if (!is_offset_in_branch_range(b_offset))
+ goto error;
+
+ /* Check if the return address is also within 32MB range */
+ b_offset = (unsigned long)(buff + TMPL_RET_IDX) - nip;
+ if (!is_offset_in_branch_range(b_offset))
+ goto error;
+
+ /* Setup template */
+ /* We can optimize this via patch_instruction_window later */
+ size = (TMPL_END_IDX * sizeof(kprobe_opcode_t)) / sizeof(int);
+ pr_devel("Copying template to %p, size %lu\n", buff, size);
+ for (i = 0; i < size; i++) {
+ rc = patch_instruction(buff + i, ppc_inst(*(optprobe_template_entry + i)));
+ if (rc < 0)
+ goto error;
+ }
+
+ /*
+ * Fixup the template with instructions to:
+ * 1. load the address of the actual probepoint
+ */
+ patch_imm_load_insns((unsigned long)op, 3, buff + TMPL_OP_IDX);
+
+ /*
+ * 2. branch to optimized_callback() and emulate_step()
+ */
+ op_callback_addr = ppc_kallsyms_lookup_name("optimized_callback");
+ emulate_step_addr = ppc_kallsyms_lookup_name("emulate_step");
+ if (!op_callback_addr || !emulate_step_addr) {
+ WARN(1, "Unable to lookup optimized_callback()/emulate_step()\n");
+ goto error;
+ }
+
+ rc = create_branch(&branch_op_callback, buff + TMPL_CALL_HDLR_IDX,
+ op_callback_addr, BRANCH_SET_LINK);
+
+ rc |= create_branch(&branch_emulate_step, buff + TMPL_EMULATE_IDX,
+ emulate_step_addr, BRANCH_SET_LINK);
+
+ if (rc)
+ goto error;
+
+ patch_instruction(buff + TMPL_CALL_HDLR_IDX, branch_op_callback);
+ patch_instruction(buff + TMPL_EMULATE_IDX, branch_emulate_step);
+
+ /*
+ * 3. load instruction to be emulated into relevant register, and
+ */
+ temp = ppc_inst_read(p->ainsn.insn);
+ patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX);
+
+ /*
+ * 4. branch back from trampoline
+ */
+ patch_branch(buff + TMPL_RET_IDX, nip, 0);
+
+ flush_icache_range((unsigned long)buff, (unsigned long)(&buff[TMPL_END_IDX]));
+
+ op->optinsn.insn = buff;
+
+ return 0;
+
+error:
+ free_optinsn_slot(buff, 0);
+ return -ERANGE;
+
+}
+
+int arch_prepared_optinsn(struct arch_optimized_insn *optinsn)
+{
+ return optinsn->insn != NULL;
+}
+
+/*
+ * On powerpc, Optprobes always replaces one instruction (4 bytes
+ * aligned and 4 bytes long). It is impossible to encounter another
+ * kprobe in this address range. So always return 0.
+ */
+int arch_check_optimized_kprobe(struct optimized_kprobe *op)
+{
+ return 0;
+}
+
+void arch_optimize_kprobes(struct list_head *oplist)
+{
+ ppc_inst_t instr;
+ struct optimized_kprobe *op;
+ struct optimized_kprobe *tmp;
+
+ list_for_each_entry_safe(op, tmp, oplist, list) {
+ /*
+ * Backup instructions which will be replaced
+ * by jump address
+ */
+ memcpy(op->optinsn.copied_insn, op->kp.addr, RELATIVEJUMP_SIZE);
+ create_branch(&instr, op->kp.addr, (unsigned long)op->optinsn.insn, 0);
+ patch_instruction(op->kp.addr, instr);
+ list_del_init(&op->list);
+ }
+}
+
+void arch_unoptimize_kprobe(struct optimized_kprobe *op)
+{
+ arch_arm_kprobe(&op->kp);
+}
+
+void arch_unoptimize_kprobes(struct list_head *oplist, struct list_head *done_list)
+{
+ struct optimized_kprobe *op;
+ struct optimized_kprobe *tmp;
+
+ list_for_each_entry_safe(op, tmp, oplist, list) {
+ arch_unoptimize_kprobe(op);
+ list_move(&op->list, done_list);
+ }
+}
+
+int arch_within_optimized_kprobe(struct optimized_kprobe *op, kprobe_opcode_t *addr)
+{
+ return (op->kp.addr <= addr &&
+ op->kp.addr + (RELATIVEJUMP_SIZE / sizeof(kprobe_opcode_t)) > addr);
+}
diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S
new file mode 100644
index 000000000000..35932f45fb4e
--- /dev/null
+++ b/arch/powerpc/kernel/optprobes_head.S
@@ -0,0 +1,136 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Code to prepare detour buffer for optprobes in Kernel.
+ *
+ * Copyright 2017, Anju T, IBM Corp.
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/ptrace.h>
+#include <asm/asm-offsets.h>
+
+#ifdef CONFIG_PPC64
+#define SAVE_30GPRS(base) SAVE_GPRS(2, 31, base)
+#define REST_30GPRS(base) REST_GPRS(2, 31, base)
+#define TEMPLATE_FOR_IMM_LOAD_INSNS nop; nop; nop; nop; nop
+#else
+#define SAVE_30GPRS(base) stmw r2, GPR2(base)
+#define REST_30GPRS(base) lmw r2, GPR2(base)
+#define TEMPLATE_FOR_IMM_LOAD_INSNS nop; nop; nop
+#endif
+
+#define OPT_SLOT_SIZE 65536
+
+ .balign 4
+
+ /*
+ * Reserve an area to allocate slots for detour buffer.
+ * This is part of .text section (rather than vmalloc area)
+ * as this needs to be within 32MB of the probed address.
+ */
+ .global optinsn_slot
+optinsn_slot:
+ .space OPT_SLOT_SIZE
+
+ /*
+ * Optprobe template:
+ * This template gets copied into one of the slots in optinsn_slot
+ * and gets fixed up with real optprobe structures et al.
+ */
+ .global optprobe_template_entry
+optprobe_template_entry:
+ /* Create an in-memory pt_regs */
+ PPC_STLU r1,-INT_FRAME_SIZE(r1)
+ SAVE_GPR(0,r1)
+ /* Save the previous SP into stack */
+ addi r0,r1,INT_FRAME_SIZE
+ PPC_STL r0,GPR1(r1)
+ SAVE_30GPRS(r1)
+ /* Save SPRS */
+ mfmsr r5
+ PPC_STL r5,_MSR(r1)
+ li r5,0x700
+ PPC_STL r5,_TRAP(r1)
+ li r5,0
+ PPC_STL r5,ORIG_GPR3(r1)
+ PPC_STL r5,RESULT(r1)
+ mfctr r5
+ PPC_STL r5,_CTR(r1)
+ mflr r5
+ PPC_STL r5,_LINK(r1)
+ mfspr r5,SPRN_XER
+ PPC_STL r5,_XER(r1)
+ mfcr r5
+ PPC_STL r5,_CCR(r1)
+#ifdef CONFIG_PPC64
+ lbz r5,PACAIRQSOFTMASK(r13)
+ std r5,SOFTE(r1)
+#endif
+
+ /*
+ * We may get here from a module, so load the kernel TOC in r2.
+ * The original TOC gets restored when pt_regs is restored
+ * further below.
+ */
+#ifdef CONFIG_PPC64
+ LOAD_PACA_TOC()
+#endif
+
+ .global optprobe_template_op_address
+optprobe_template_op_address:
+ /*
+ * Parameters to optimized_callback():
+ * 1. optimized_kprobe structure in r3
+ */
+ TEMPLATE_FOR_IMM_LOAD_INSNS
+
+ /* 2. pt_regs pointer in r4 */
+ addi r4,r1,STACK_INT_FRAME_REGS
+
+ .global optprobe_template_call_handler
+optprobe_template_call_handler:
+ /* Branch to optimized_callback() */
+ nop
+
+ /*
+ * Parameters for instruction emulation:
+ * 1. Pass SP in register r3.
+ */
+ addi r3,r1,STACK_INT_FRAME_REGS
+
+ .global optprobe_template_insn
+optprobe_template_insn:
+ /* 2, Pass instruction to be emulated in r4 */
+ TEMPLATE_FOR_IMM_LOAD_INSNS
+
+ .global optprobe_template_call_emulate
+optprobe_template_call_emulate:
+ /* Branch to emulate_step() */
+ nop
+
+ /*
+ * All done.
+ * Now, restore the registers...
+ */
+ PPC_LL r5,_MSR(r1)
+ mtmsr r5
+ PPC_LL r5,_CTR(r1)
+ mtctr r5
+ PPC_LL r5,_LINK(r1)
+ mtlr r5
+ PPC_LL r5,_XER(r1)
+ mtxer r5
+ PPC_LL r5,_CCR(r1)
+ mtcr r5
+ REST_GPR(0,r1)
+ REST_30GPRS(r1)
+ /* Restore the previous SP */
+ addi r1,r1,INT_FRAME_SIZE
+
+ .global optprobe_template_ret
+optprobe_template_ret:
+ /* ... and jump back from trampoline */
+ nop
+
+ .global optprobe_template_end
+optprobe_template_end:
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index d6e195e8cd4c..7502066c3c53 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -1,131 +1,173 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* c 2001 PPC 64 Team, IBM Corp
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/smp.h>
#include <linux/export.h>
#include <linux/memblock.h>
+#include <linux/sched/task.h>
+#include <linux/numa.h>
+#include <linux/pgtable.h>
#include <asm/lppaca.h>
#include <asm/paca.h>
#include <asm/sections.h>
-#include <asm/pgtable.h>
#include <asm/kexec.h>
+#include <asm/svm.h>
+#include <asm/ultravisor.h>
+
+#include "setup.h"
+
+#ifndef CONFIG_SMP
+#define boot_cpuid 0
+#endif
+
+static void *__init alloc_paca_data(unsigned long size, unsigned long align,
+ unsigned long limit, int cpu)
+{
+ void *ptr;
+ int nid;
+
+ /*
+ * boot_cpuid paca is allocated very early before cpu_to_node is up.
+ * Set bottom-up mode, because the boot CPU should be on node-0,
+ * which will put its paca in the right place.
+ */
+ if (cpu == boot_cpuid) {
+ nid = NUMA_NO_NODE;
+ memblock_set_bottom_up(true);
+ } else {
+ nid = early_cpu_to_node(cpu);
+ }
+
+ ptr = memblock_alloc_try_nid(size, align, MEMBLOCK_LOW_LIMIT,
+ limit, nid);
+ if (!ptr)
+ panic("cannot allocate paca data");
+
+ if (cpu == boot_cpuid)
+ memblock_set_bottom_up(false);
+
+ return ptr;
+}
+
+#ifdef CONFIG_PPC_PSERIES
+
+#define LPPACA_SIZE 0x400
-/* This symbol is provided by the linker - let it fill in the paca
- * field correctly */
-extern unsigned long __toc_start;
+static void *__init alloc_shared_lppaca(unsigned long size, unsigned long limit,
+ int cpu)
+{
+ size_t shared_lppaca_total_size = PAGE_ALIGN(nr_cpu_ids * LPPACA_SIZE);
+ static unsigned long shared_lppaca_size;
+ static void *shared_lppaca;
+ void *ptr;
+
+ if (!shared_lppaca) {
+ memblock_set_bottom_up(true);
+
+ /*
+ * See Documentation/arch/powerpc/ultravisor.rst for more details.
+ *
+ * UV/HV data sharing is in PAGE_SIZE granularity. In order to
+ * minimize the number of pages shared, align the allocation to
+ * PAGE_SIZE.
+ */
+ shared_lppaca =
+ memblock_alloc_try_nid(shared_lppaca_total_size,
+ PAGE_SIZE, MEMBLOCK_LOW_LIMIT,
+ limit, NUMA_NO_NODE);
+ if (!shared_lppaca)
+ panic("cannot allocate shared data");
+
+ memblock_set_bottom_up(false);
+ uv_share_page(PHYS_PFN(__pa(shared_lppaca)),
+ shared_lppaca_total_size >> PAGE_SHIFT);
+ }
+
+ ptr = shared_lppaca + shared_lppaca_size;
+ shared_lppaca_size += size;
-#ifdef CONFIG_PPC_BOOK3S
+ /*
+ * This is very early in boot, so no harm done if the kernel crashes at
+ * this point.
+ */
+ BUG_ON(shared_lppaca_size > shared_lppaca_total_size);
+
+ return ptr;
+}
/*
- * The structure which the hypervisor knows about - this structure
- * should not cross a page boundary. The vpa_init/register_vpa call
- * is now known to fail if the lppaca structure crosses a page
- * boundary. The lppaca is also used on POWER5 pSeries boxes.
- * The lppaca is 640 bytes long, and cannot readily
- * change since the hypervisor knows its layout, so a 1kB alignment
- * will suffice to ensure that it doesn't cross a page boundary.
+ * See asm/lppaca.h for more detail.
+ *
+ * lppaca structures must must be 1kB in size, L1 cache line aligned,
+ * and not cross 4kB boundary. A 1kB size and 1kB alignment will satisfy
+ * these requirements.
*/
-struct lppaca lppaca[] = {
- [0 ... (NR_LPPACAS-1)] = {
+static inline void init_lppaca(struct lppaca *lppaca)
+{
+ BUILD_BUG_ON(sizeof(struct lppaca) != 640);
+
+ *lppaca = (struct lppaca) {
.desc = cpu_to_be32(0xd397d781), /* "LpPa" */
- .size = cpu_to_be16(sizeof(struct lppaca)),
+ .size = cpu_to_be16(LPPACA_SIZE),
.fpregs_in_use = 1,
.slb_count = cpu_to_be16(64),
.vmxregs_in_use = 0,
- .page_ins = 0,
- },
+ .page_ins = 0, };
};
-static struct lppaca *extra_lppacas;
-static long __initdata lppaca_size;
-
-static void __init allocate_lppacas(int nr_cpus, unsigned long limit)
+static struct lppaca * __init new_lppaca(int cpu, unsigned long limit)
{
- if (nr_cpus <= NR_LPPACAS)
- return;
+ struct lppaca *lp;
- lppaca_size = PAGE_ALIGN(sizeof(struct lppaca) *
- (nr_cpus - NR_LPPACAS));
- extra_lppacas = __va(memblock_alloc_base(lppaca_size,
- PAGE_SIZE, limit));
-}
+ BUILD_BUG_ON(sizeof(struct lppaca) > LPPACA_SIZE);
-static struct lppaca * __init new_lppaca(int cpu)
-{
- struct lppaca *lp;
+ if (early_cpu_has_feature(CPU_FTR_HVMODE))
+ return NULL;
- if (cpu < NR_LPPACAS)
- return &lppaca[cpu];
+ if (is_secure_guest())
+ lp = alloc_shared_lppaca(LPPACA_SIZE, limit, cpu);
+ else
+ lp = alloc_paca_data(LPPACA_SIZE, 0x400, limit, cpu);
- lp = extra_lppacas + (cpu - NR_LPPACAS);
- *lp = lppaca[0];
+ init_lppaca(lp);
return lp;
}
+#endif /* CONFIG_PPC_PSERIES */
-static void __init free_lppacas(void)
-{
- long new_size = 0, nr;
-
- if (!lppaca_size)
- return;
- nr = num_possible_cpus() - NR_LPPACAS;
- if (nr > 0)
- new_size = PAGE_ALIGN(nr * sizeof(struct lppaca));
- if (new_size >= lppaca_size)
- return;
-
- memblock_free(__pa(extra_lppacas) + new_size, lppaca_size - new_size);
- lppaca_size = new_size;
-}
-
-#else
-
-static inline void allocate_lppacas(int nr_cpus, unsigned long limit) { }
-static inline void free_lppacas(void) { }
-
-#endif /* CONFIG_PPC_BOOK3S */
-
-#ifdef CONFIG_PPC_STD_MMU_64
-
+#ifdef CONFIG_PPC_64S_HASH_MMU
/*
- * 3 persistent SLBs are registered here. The buffer will be zero
+ * 3 persistent SLBs are allocated here. The buffer will be zero
* initially, hence will all be invaild until we actually write them.
*
* If you make the number of persistent SLB entries dynamic, please also
* update PR KVM to flush and restore them accordingly.
*/
-static struct slb_shadow *slb_shadow;
-
-static void __init allocate_slb_shadows(int nr_cpus, int limit)
+static struct slb_shadow * __init new_slb_shadow(int cpu, unsigned long limit)
{
- int size = PAGE_ALIGN(sizeof(struct slb_shadow) * nr_cpus);
- slb_shadow = __va(memblock_alloc_base(size, PAGE_SIZE, limit));
- memset(slb_shadow, 0, size);
-}
+ struct slb_shadow *s;
-static struct slb_shadow * __init init_slb_shadow(int cpu)
-{
- struct slb_shadow *s = &slb_shadow[cpu];
+ if (cpu != boot_cpuid) {
+ /*
+ * Boot CPU comes here before early_radix_enabled
+ * is parsed (e.g., for disable_radix). So allocate
+ * always and this will be fixed up in free_unused_pacas.
+ */
+ if (early_radix_enabled())
+ return NULL;
+ }
+
+ s = alloc_paca_data(sizeof(*s), L1_CACHE_BYTES, limit, cpu);
s->persistent = cpu_to_be32(SLB_NUM_BOLTED);
s->buffer_length = cpu_to_be32(sizeof(*s));
return s;
}
-
-#else /* CONFIG_PPC_STD_MMU_64 */
-
-static void __init allocate_slb_shadows(int nr_cpus, int limit) { }
-
-#endif /* CONFIG_PPC_STD_MMU_64 */
+#endif /* CONFIG_PPC_64S_HASH_MMU */
/* The Paca is an array with one entry per processor. Each contains an
* lppaca, which contains the information shared between the
@@ -136,24 +178,22 @@ static void __init allocate_slb_shadows(int nr_cpus, int limit) { }
* processors. The processor VPD array needs one entry per physical
* processor (not thread).
*/
-struct paca_struct *paca;
-EXPORT_SYMBOL(paca);
+struct paca_struct **paca_ptrs __read_mostly;
+EXPORT_SYMBOL(paca_ptrs);
void __init initialise_paca(struct paca_struct *new_paca, int cpu)
{
- /* The TOC register (GPR2) points 32kB into the TOC, so that 64kB
- * of the TOC can be addressed using a single machine instruction.
- */
- unsigned long kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL;
-
-#ifdef CONFIG_PPC_BOOK3S
- new_paca->lppaca_ptr = new_lppaca(cpu);
-#else
+#ifdef CONFIG_PPC_PSERIES
+ new_paca->lppaca_ptr = NULL;
+#endif
+#ifdef CONFIG_PPC_BOOK3E_64
new_paca->kernel_pgd = swapper_pg_dir;
#endif
new_paca->lock_token = 0x8000;
new_paca->paca_index = cpu;
- new_paca->kernel_toc = kernel_toc;
+#ifndef CONFIG_PPC_KERNEL_PCREL
+ new_paca->kernel_toc = kernel_toc_addr();
+#endif
new_paca->kernelbase = (unsigned long) _stext;
/* Only set MSR:IR/DR when MMU is initialized */
new_paca->kernel_msr = MSR_KERNEL & ~(MSR_IR | MSR_DR);
@@ -161,11 +201,11 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu)
new_paca->kexec_state = KEXEC_STATE_NONE;
new_paca->__current = &init_task;
new_paca->data_offset = 0xfeeeeeeeeeeeeeeeULL;
-#ifdef CONFIG_PPC_STD_MMU_64
- new_paca->slb_shadow_ptr = init_slb_shadow(cpu);
-#endif /* CONFIG_PPC_STD_MMU_64 */
+#ifdef CONFIG_PPC_64S_HASH_MMU
+ new_paca->slb_shadow_ptr = NULL;
+#endif
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
/* For now -- if we have threads this will be adjusted later */
new_paca->tcd_ptr = &new_paca->tcd;
#endif
@@ -177,66 +217,107 @@ void setup_paca(struct paca_struct *new_paca)
/* Setup r13 */
local_paca = new_paca;
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
/* On Book3E, initialize the TLB miss exception frames */
mtspr(SPRN_SPRG_TLB_EXFRAME, local_paca->extlb);
#else
- /* In HV mode, we setup both HPACA and PACA to avoid problems
+ /*
+ * In HV mode, we setup both HPACA and PACA to avoid problems
* if we do a GET_PACA() before the feature fixups have been
- * applied
+ * applied.
+ *
+ * Normally you should test against CPU_FTR_HVMODE, but CPU features
+ * are not yet set up when we first reach here.
*/
- if (cpu_has_feature(CPU_FTR_HVMODE))
+ if (mfmsr() & MSR_HV)
mtspr(SPRN_SPRG_HPACA, local_paca);
#endif
mtspr(SPRN_SPRG_PACA, local_paca);
}
-static int __initdata paca_size;
+static int __initdata paca_nr_cpu_ids;
+static int __initdata paca_ptrs_size;
+static int __initdata paca_struct_size;
-void __init allocate_pacas(void)
+void __init allocate_paca_ptrs(void)
{
- int cpu, limit;
+ paca_nr_cpu_ids = nr_cpu_ids;
- /*
- * We can't take SLB misses on the paca, and we want to access them
- * in real mode, so allocate them within the RMA and also within
- * the first segment.
- */
- limit = min(0x10000000ULL, ppc64_rma_size);
+ paca_ptrs_size = sizeof(struct paca_struct *) * nr_cpu_ids;
+ paca_ptrs = memblock_alloc_raw(paca_ptrs_size, SMP_CACHE_BYTES);
+ if (!paca_ptrs)
+ panic("Failed to allocate %d bytes for paca pointers\n",
+ paca_ptrs_size);
- paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids);
+ memset(paca_ptrs, 0x88, paca_ptrs_size);
+}
- paca = __va(memblock_alloc_base(paca_size, PAGE_SIZE, limit));
- memset(paca, 0, paca_size);
+void __init allocate_paca(int cpu)
+{
+ u64 limit;
+ struct paca_struct *paca;
- printk(KERN_DEBUG "Allocated %u bytes for %d pacas at %p\n",
- paca_size, nr_cpu_ids, paca);
+ BUG_ON(cpu >= paca_nr_cpu_ids);
- allocate_lppacas(nr_cpu_ids, limit);
+#ifdef CONFIG_PPC_BOOK3S_64
+ /*
+ * We access pacas in real mode, and cannot take SLB faults
+ * on them when in virtual mode, so allocate them accordingly.
+ */
+ limit = min(ppc64_bolted_size(), ppc64_rma_size);
+#else
+ limit = ppc64_rma_size;
+#endif
- allocate_slb_shadows(nr_cpu_ids, limit);
+ paca = alloc_paca_data(sizeof(struct paca_struct), L1_CACHE_BYTES,
+ limit, cpu);
+ paca_ptrs[cpu] = paca;
- /* Can't use for_each_*_cpu, as they aren't functional yet */
- for (cpu = 0; cpu < nr_cpu_ids; cpu++)
- initialise_paca(&paca[cpu], cpu);
+ initialise_paca(paca, cpu);
+#ifdef CONFIG_PPC_PSERIES
+ paca->lppaca_ptr = new_lppaca(cpu, limit);
+#endif
+#ifdef CONFIG_PPC_64S_HASH_MMU
+ paca->slb_shadow_ptr = new_slb_shadow(cpu, limit);
+#endif
+ paca_struct_size += sizeof(struct paca_struct);
}
void __init free_unused_pacas(void)
{
- int new_size;
-
- new_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpu_ids);
-
- if (new_size >= paca_size)
- return;
-
- memblock_free(__pa(paca) + new_size, paca_size - new_size);
+ int new_ptrs_size;
+
+ new_ptrs_size = sizeof(struct paca_struct *) * nr_cpu_ids;
+ if (new_ptrs_size < paca_ptrs_size)
+ memblock_phys_free(__pa(paca_ptrs) + new_ptrs_size,
+ paca_ptrs_size - new_ptrs_size);
+
+ paca_nr_cpu_ids = nr_cpu_ids;
+ paca_ptrs_size = new_ptrs_size;
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+ if (early_radix_enabled()) {
+ /* Ugly fixup, see new_slb_shadow() */
+ memblock_phys_free(__pa(paca_ptrs[boot_cpuid]->slb_shadow_ptr),
+ sizeof(struct slb_shadow));
+ paca_ptrs[boot_cpuid]->slb_shadow_ptr = NULL;
+ }
+#endif
- printk(KERN_DEBUG "Freed %u bytes for unused pacas\n",
- paca_size - new_size);
+ printk(KERN_DEBUG "Allocated %u bytes for %u pacas\n",
+ paca_ptrs_size + paca_struct_size, nr_cpu_ids);
+}
- paca_size = new_size;
+#ifdef CONFIG_PPC_64S_HASH_MMU
+void copy_mm_to_paca(struct mm_struct *mm)
+{
+ mm_context_t *context = &mm->context;
- free_lppacas();
+ VM_BUG_ON(!mm_ctx_slb_addr_limit(context));
+ memcpy(&get_paca()->mm_ctx_low_slices_psize, mm_ctx_low_slices(context),
+ LOW_SLICE_ARRAY_SZ);
+ memcpy(&get_paca()->mm_ctx_high_slices_psize, mm_ctx_high_slices(context),
+ TASK_SLICE_ARRAY_SZ(context));
}
+#endif /* CONFIG_PPC_64S_HASH_MMU */
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index b2814e23e1ed..eac84d687b53 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Contains common pci routines for ALL ppc platform
* (based on pci_32.c and pci_64.c)
@@ -9,97 +10,198 @@
* Rework, based on alpha PCI code.
*
* Common pmac/prep/chrp pci routines. -- Cort
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/pci.h>
#include <linux/string.h>
#include <linux/init.h>
-#include <linux/bootmem.h>
#include <linux/delay.h>
#include <linux/export.h>
#include <linux/of_address.h>
#include <linux/of_pci.h>
#include <linux/mm.h>
+#include <linux/shmem_fs.h>
#include <linux/list.h>
#include <linux/syscalls.h>
#include <linux/irq.h>
#include <linux/vmalloc.h>
#include <linux/slab.h>
#include <linux/vgaarb.h>
+#include <linux/numa.h>
+#include <linux/msi.h>
+#include <linux/irqdomain.h>
#include <asm/processor.h>
#include <asm/io.h>
-#include <asm/prom.h>
#include <asm/pci-bridge.h>
#include <asm/byteorder.h>
#include <asm/machdep.h>
#include <asm/ppc-pci.h>
#include <asm/eeh.h>
+#include <asm/setup.h>
+
+#include "../../../drivers/pci/pci.h"
+/* hose_spinlock protects accesses to the phb_bitmap. */
static DEFINE_SPINLOCK(hose_spinlock);
LIST_HEAD(hose_list);
-/* XXX kill that some day ... */
-static int global_phb_number; /* Global phb counter */
+/* For dynamic PHB numbering on get_phb_number(): max number of PHBs. */
+#define MAX_PHBS 0x10000
+
+/*
+ * For dynamic PHB numbering: used/free PHBs tracking bitmap.
+ * Accesses to this bitmap should be protected by hose_spinlock.
+ */
+static DECLARE_BITMAP(phb_bitmap, MAX_PHBS);
/* ISA Memory physical address */
resource_size_t isa_mem_base;
+EXPORT_SYMBOL(isa_mem_base);
-static struct dma_map_ops *pci_dma_ops = &dma_direct_ops;
+static const struct dma_map_ops *pci_dma_ops;
-void set_pci_dma_ops(struct dma_map_ops *dma_ops)
+void __init set_pci_dma_ops(const struct dma_map_ops *dma_ops)
{
pci_dma_ops = dma_ops;
}
-struct dma_map_ops *get_pci_dma_ops(void)
+static int get_phb_number(struct device_node *dn)
{
- return pci_dma_ops;
+ int ret, phb_id = -1;
+ u64 prop;
+
+ /*
+ * Try fixed PHB numbering first, by checking archs and reading
+ * the respective device-tree properties. Firstly, try reading
+ * standard "linux,pci-domain", then try reading "ibm,opal-phbid"
+ * (only present in powernv OPAL environment), then try device-tree
+ * alias and as the last try to use lower bits of "reg" property.
+ */
+ ret = of_get_pci_domain_nr(dn);
+ if (ret >= 0) {
+ prop = ret;
+ ret = 0;
+ }
+ if (ret)
+ ret = of_property_read_u64(dn, "ibm,opal-phbid", &prop);
+
+ if (ret) {
+ ret = of_alias_get_id(dn, "pci");
+ if (ret >= 0) {
+ prop = ret;
+ ret = 0;
+ }
+ }
+ if (ret) {
+ u32 prop_32;
+ ret = of_property_read_u32_index(dn, "reg", 1, &prop_32);
+ prop = prop_32;
+ }
+
+ if (!ret)
+ phb_id = (int)(prop & (MAX_PHBS - 1));
+
+ spin_lock(&hose_spinlock);
+
+ /* We need to be sure to not use the same PHB number twice. */
+ if ((phb_id >= 0) && !test_and_set_bit(phb_id, phb_bitmap))
+ goto out_unlock;
+
+ /* If everything fails then fallback to dynamic PHB numbering. */
+ phb_id = find_first_zero_bit(phb_bitmap, MAX_PHBS);
+ BUG_ON(phb_id >= MAX_PHBS);
+ set_bit(phb_id, phb_bitmap);
+
+out_unlock:
+ spin_unlock(&hose_spinlock);
+
+ return phb_id;
}
-EXPORT_SYMBOL(get_pci_dma_ops);
struct pci_controller *pcibios_alloc_controller(struct device_node *dev)
{
struct pci_controller *phb;
- phb = zalloc_maybe_bootmem(sizeof(struct pci_controller), GFP_KERNEL);
+ phb = kzalloc(sizeof(struct pci_controller), GFP_KERNEL);
if (phb == NULL)
return NULL;
+
+ phb->global_number = get_phb_number(dev);
+
spin_lock(&hose_spinlock);
- phb->global_number = global_phb_number++;
list_add_tail(&phb->list_node, &hose_list);
spin_unlock(&hose_spinlock);
- phb->dn = dev;
- phb->is_dynamic = mem_init_done;
+
+ phb->dn = of_node_get(dev);
+ phb->is_dynamic = slab_is_available();
#ifdef CONFIG_PPC64
if (dev) {
int nid = of_node_to_nid(dev);
if (nid < 0 || !node_online(nid))
- nid = -1;
+ nid = NUMA_NO_NODE;
PHB_SET_NODE(phb, nid);
}
#endif
return phb;
}
+EXPORT_SYMBOL_GPL(pcibios_alloc_controller);
void pcibios_free_controller(struct pci_controller *phb)
{
spin_lock(&hose_spinlock);
+
+ /* Clear bit of phb_bitmap to allow reuse of this PHB number. */
+ if (phb->global_number < MAX_PHBS)
+ clear_bit(phb->global_number, phb_bitmap);
+ of_node_put(phb->dn);
list_del(&phb->list_node);
spin_unlock(&hose_spinlock);
if (phb->is_dynamic)
kfree(phb);
}
+EXPORT_SYMBOL_GPL(pcibios_free_controller);
+
+/*
+ * This function is used to call pcibios_free_controller()
+ * in a deferred manner: a callback from the PCI subsystem.
+ *
+ * _*DO NOT*_ call pcibios_free_controller() explicitly if
+ * this is used (or it may access an invalid *phb pointer).
+ *
+ * The callback occurs when all references to the root bus
+ * are dropped (e.g., child buses/devices and their users).
+ *
+ * It's called as .release_fn() of 'struct pci_host_bridge'
+ * which is associated with the 'struct pci_controller.bus'
+ * (root bus) - it expects .release_data to hold a pointer
+ * to 'struct pci_controller'.
+ *
+ * In order to use it, register .release_fn()/release_data
+ * like this:
+ *
+ * pci_set_host_bridge_release(bridge,
+ * pcibios_free_controller_deferred
+ * (void *) phb);
+ *
+ * e.g. in the pcibios_root_bridge_prepare() callback from
+ * pci_create_root_bus().
+ */
+void pcibios_free_controller_deferred(struct pci_host_bridge *bridge)
+{
+ struct pci_controller *phb = (struct pci_controller *)
+ bridge->release_data;
+
+ pr_debug("domain %d, dynamic %d\n", phb->global_number, phb->is_dynamic);
+
+ pcibios_free_controller(phb);
+}
+EXPORT_SYMBOL_GPL(pcibios_free_controller_deferred);
/*
* The function is used to return the minimal alignment
@@ -110,8 +212,10 @@ void pcibios_free_controller(struct pci_controller *phb)
resource_size_t pcibios_window_alignment(struct pci_bus *bus,
unsigned long type)
{
- if (ppc_md.pcibios_window_alignment)
- return ppc_md.pcibios_window_alignment(bus, type);
+ struct pci_controller *phb = pci_bus_to_host(bus);
+
+ if (phb->controller_ops.window_alignment)
+ return phb->controller_ops.window_alignment(bus, type);
/*
* PCI core will figure out the default
@@ -121,16 +225,61 @@ resource_size_t pcibios_window_alignment(struct pci_bus *bus,
return 1;
}
+void pcibios_setup_bridge(struct pci_bus *bus, unsigned long type)
+{
+ struct pci_controller *hose = pci_bus_to_host(bus);
+
+ if (hose->controller_ops.setup_bridge)
+ hose->controller_ops.setup_bridge(bus, type);
+}
+
void pcibios_reset_secondary_bus(struct pci_dev *dev)
{
- if (ppc_md.pcibios_reset_secondary_bus) {
- ppc_md.pcibios_reset_secondary_bus(dev);
+ struct pci_controller *phb = pci_bus_to_host(dev->bus);
+
+ if (phb->controller_ops.reset_secondary_bus) {
+ phb->controller_ops.reset_secondary_bus(dev);
return;
}
pci_reset_secondary_bus(dev);
}
+resource_size_t pcibios_default_alignment(void)
+{
+ if (ppc_md.pcibios_default_alignment)
+ return ppc_md.pcibios_default_alignment();
+
+ return 0;
+}
+
+#ifdef CONFIG_PCI_IOV
+resource_size_t pcibios_iov_resource_alignment(struct pci_dev *pdev, int resno)
+{
+ if (ppc_md.pcibios_iov_resource_alignment)
+ return ppc_md.pcibios_iov_resource_alignment(pdev, resno);
+
+ return pci_iov_resource_size(pdev, resno);
+}
+
+int pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
+{
+ if (ppc_md.pcibios_sriov_enable)
+ return ppc_md.pcibios_sriov_enable(pdev, num_vfs);
+
+ return 0;
+}
+
+int pcibios_sriov_disable(struct pci_dev *pdev)
+{
+ if (ppc_md.pcibios_sriov_disable)
+ return ppc_md.pcibios_sriov_disable(pdev);
+
+ return 0;
+}
+
+#endif /* CONFIG_PCI_IOV */
+
static resource_size_t pcibios_io_size(const struct pci_controller *hose)
{
#ifdef CONFIG_PPC64
@@ -212,6 +361,66 @@ struct pci_controller* pci_find_hose_for_OF_device(struct device_node* node)
return NULL;
}
+struct pci_controller *pci_find_controller_for_domain(int domain_nr)
+{
+ struct pci_controller *hose;
+
+ list_for_each_entry(hose, &hose_list, list_node)
+ if (hose->global_number == domain_nr)
+ return hose;
+
+ return NULL;
+}
+
+struct pci_intx_virq {
+ int virq;
+ struct kref kref;
+ struct list_head list_node;
+};
+
+static LIST_HEAD(intx_list);
+static DEFINE_MUTEX(intx_mutex);
+
+static void ppc_pci_intx_release(struct kref *kref)
+{
+ struct pci_intx_virq *vi = container_of(kref, struct pci_intx_virq, kref);
+
+ list_del(&vi->list_node);
+ irq_dispose_mapping(vi->virq);
+ kfree(vi);
+}
+
+static int ppc_pci_unmap_irq_line(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct pci_dev *pdev = to_pci_dev(data);
+
+ if (action == BUS_NOTIFY_DEL_DEVICE) {
+ struct pci_intx_virq *vi;
+
+ mutex_lock(&intx_mutex);
+ list_for_each_entry(vi, &intx_list, list_node) {
+ if (vi->virq == pdev->irq) {
+ kref_put(&vi->kref, ppc_pci_intx_release);
+ break;
+ }
+ }
+ mutex_unlock(&intx_mutex);
+ }
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block ppc_pci_unmap_irq_notifier = {
+ .notifier_call = ppc_pci_unmap_irq_line,
+};
+
+static int ppc_pci_register_irq_notifier(void)
+{
+ return bus_register_notifier(&pci_bus_type, &ppc_pci_unmap_irq_notifier);
+}
+arch_initcall(ppc_pci_register_irq_notifier);
+
/*
* Reads the interrupt pin to determine if interrupt is use by card.
* If the interrupt is used, then gets the interrupt line from the
@@ -219,16 +428,19 @@ struct pci_controller* pci_find_hose_for_OF_device(struct device_node* node)
*/
static int pci_read_irq_line(struct pci_dev *pci_dev)
{
- struct of_phandle_args oirq;
- unsigned int virq;
+ int virq;
+ struct pci_intx_virq *vi, *vitmp;
+
+ /* Preallocate vi as rewind is complex if this fails after mapping */
+ vi = kzalloc(sizeof(struct pci_intx_virq), GFP_KERNEL);
+ if (!vi)
+ return -1;
pr_debug("PCI: Try to map irq for %s...\n", pci_name(pci_dev));
-#ifdef DEBUG
- memset(&oirq, 0xff, sizeof(oirq));
-#endif
/* Try to get a mapping from the device-tree */
- if (of_irq_parse_pci(pci_dev, &oirq)) {
+ virq = of_irq_parse_and_map_pci(pci_dev, 0, 0);
+ if (virq <= 0) {
u8 line, pin;
/* If that fails, lets fallback to what is in the config
@@ -239,144 +451,77 @@ static int pci_read_irq_line(struct pci_dev *pci_dev)
* function.
*/
if (pci_read_config_byte(pci_dev, PCI_INTERRUPT_PIN, &pin))
- return -1;
+ goto error_exit;
if (pin == 0)
- return -1;
+ goto error_exit;
if (pci_read_config_byte(pci_dev, PCI_INTERRUPT_LINE, &line) ||
line == 0xff || line == 0) {
- return -1;
+ goto error_exit;
}
pr_debug(" No map ! Using line %d (pin %d) from PCI config\n",
line, pin);
virq = irq_create_mapping(NULL, line);
- if (virq != NO_IRQ)
+ if (virq)
irq_set_irq_type(virq, IRQ_TYPE_LEVEL_LOW);
- } else {
- pr_debug(" Got one, spec %d cells (0x%08x 0x%08x...) on %s\n",
- oirq.args_count, oirq.args[0], oirq.args[1],
- of_node_full_name(oirq.np));
-
- virq = irq_create_of_mapping(&oirq);
}
- if(virq == NO_IRQ) {
+
+ if (!virq) {
pr_debug(" Failed to map !\n");
- return -1;
+ goto error_exit;
}
pr_debug(" Mapped to linux irq %d\n", virq);
pci_dev->irq = virq;
- return 0;
-}
-
-/*
- * Platform support for /proc/bus/pci/X/Y mmap()s,
- * modelled on the sparc64 implementation by Dave Miller.
- * -- paulus.
- */
-
-/*
- * Adjust vm_pgoff of VMA such that it is the physical page offset
- * corresponding to the 32-bit pci bus offset for DEV requested by the user.
- *
- * Basically, the user finds the base address for his device which he wishes
- * to mmap. They read the 32-bit value from the config space base register,
- * add whatever PAGE_SIZE multiple offset they wish, and feed this into the
- * offset parameter of mmap on /proc/bus/pci/XXX for that device.
- *
- * Returns negative error code on failure, zero on success.
- */
-static struct resource *__pci_mmap_make_offset(struct pci_dev *dev,
- resource_size_t *offset,
- enum pci_mmap_state mmap_state)
-{
- struct pci_controller *hose = pci_bus_to_host(dev->bus);
- unsigned long io_offset = 0;
- int i, res_bit;
-
- if (hose == NULL)
- return NULL; /* should never happen */
-
- /* If memory, add on the PCI bridge address offset */
- if (mmap_state == pci_mmap_mem) {
-#if 0 /* See comment in pci_resource_to_user() for why this is disabled */
- *offset += hose->pci_mem_offset;
-#endif
- res_bit = IORESOURCE_MEM;
- } else {
- io_offset = (unsigned long)hose->io_base_virt - _IO_BASE;
- *offset += io_offset;
- res_bit = IORESOURCE_IO;
+ mutex_lock(&intx_mutex);
+ list_for_each_entry(vitmp, &intx_list, list_node) {
+ if (vitmp->virq == virq) {
+ kref_get(&vitmp->kref);
+ kfree(vi);
+ vi = NULL;
+ break;
+ }
}
-
- /*
- * Check that the offset requested corresponds to one of the
- * resources of the device.
- */
- for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
- struct resource *rp = &dev->resource[i];
- int flags = rp->flags;
-
- /* treat ROM as memory (should be already) */
- if (i == PCI_ROM_RESOURCE)
- flags |= IORESOURCE_MEM;
-
- /* Active and same type? */
- if ((flags & res_bit) == 0)
- continue;
-
- /* In the range of this resource? */
- if (*offset < (rp->start & PAGE_MASK) || *offset > rp->end)
- continue;
-
- /* found it! construct the final physical address */
- if (mmap_state == pci_mmap_io)
- *offset += hose->io_base_phys - io_offset;
- return rp;
+ if (vi) {
+ vi->virq = virq;
+ kref_init(&vi->kref);
+ list_add_tail(&vi->list_node, &intx_list);
}
+ mutex_unlock(&intx_mutex);
- return NULL;
+ return 0;
+error_exit:
+ kfree(vi);
+ return -1;
}
/*
- * Set vm_page_prot of VMA, as appropriate for this architecture, for a pci
- * device mapping.
+ * Platform support for /proc/bus/pci/X/Y mmap()s.
+ * -- paulus.
*/
-static pgprot_t __pci_mmap_set_pgprot(struct pci_dev *dev, struct resource *rp,
- pgprot_t protection,
- enum pci_mmap_state mmap_state,
- int write_combine)
+int pci_iobar_pfn(struct pci_dev *pdev, int bar, struct vm_area_struct *vma)
{
+ struct pci_controller *hose = pci_bus_to_host(pdev->bus);
+ resource_size_t ioaddr = pci_resource_start(pdev, bar);
- /* Write combine is always 0 on non-memory space mappings. On
- * memory space, if the user didn't pass 1, we check for a
- * "prefetchable" resource. This is a bit hackish, but we use
- * this to workaround the inability of /sysfs to provide a write
- * combine bit
- */
- if (mmap_state != pci_mmap_mem)
- write_combine = 0;
- else if (write_combine == 0) {
- if (rp->flags & IORESOURCE_PREFETCH)
- write_combine = 1;
- }
+ if (!hose)
+ return -EINVAL;
- /* XXX would be nice to have a way to ask for write-through */
- if (write_combine)
- return pgprot_noncached_wc(protection);
- else
- return pgprot_noncached(protection);
+ /* Convert to an offset within this PCI controller */
+ ioaddr -= (unsigned long)hose->io_base_virt - _IO_BASE;
+
+ vma->vm_pgoff += (ioaddr + hose->io_base_phys) >> PAGE_SHIFT;
+ return 0;
}
/*
- * This one is used by /dev/mem and fbdev who have no clue about the
+ * This one is used by /dev/mem and video who have no clue about the
* PCI device, it tries to find the PCI device first and calls the
* above routine
*/
-pgprot_t pci_phys_mem_access_prot(struct file *file,
- unsigned long pfn,
+pgprot_t pci_phys_mem_access_prot(unsigned long pfn,
unsigned long size,
pgprot_t prot)
{
@@ -419,40 +564,6 @@ pgprot_t pci_phys_mem_access_prot(struct file *file,
return prot;
}
-
-/*
- * Perform the actual remap of the pages for a PCI device mapping, as
- * appropriate for this architecture. The region in the process to map
- * is described by vm_start and vm_end members of VMA, the base physical
- * address is found in vm_pgoff.
- * The pci device structure is provided so that architectures may make mapping
- * decisions on a per-device or per-bus basis.
- *
- * Returns a negative error code on failure, zero on success.
- */
-int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
- enum pci_mmap_state mmap_state, int write_combine)
-{
- resource_size_t offset =
- ((resource_size_t)vma->vm_pgoff) << PAGE_SHIFT;
- struct resource *rp;
- int ret;
-
- rp = __pci_mmap_make_offset(dev, &offset, mmap_state);
- if (rp == NULL)
- return -EINVAL;
-
- vma->vm_pgoff = offset >> PAGE_SHIFT;
- vma->vm_page_prot = __pci_mmap_set_pgprot(dev, rp,
- vma->vm_page_prot,
- mmap_state, write_combine);
-
- ret = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
- vma->vm_end - vma->vm_start, vma->vm_page_prot);
-
- return ret;
-}
-
/* This provides legacy IO read access on a bus */
int pci_legacy_read(struct pci_bus *bus, loff_t port, u32 *val, size_t size)
{
@@ -595,39 +706,25 @@ void pci_resource_to_user(const struct pci_dev *dev, int bar,
const struct resource *rsrc,
resource_size_t *start, resource_size_t *end)
{
- struct pci_controller *hose = pci_bus_to_host(dev->bus);
- resource_size_t offset = 0;
+ struct pci_bus_region region;
- if (hose == NULL)
+ if (rsrc->flags & IORESOURCE_IO) {
+ pcibios_resource_to_bus(dev->bus, &region,
+ (struct resource *) rsrc);
+ *start = region.start;
+ *end = region.end;
return;
+ }
- if (rsrc->flags & IORESOURCE_IO)
- offset = (unsigned long)hose->io_base_virt - _IO_BASE;
-
- /* We pass a fully fixed up address to userland for MMIO instead of
- * a BAR value because X is lame and expects to be able to use that
- * to pass to /dev/mem !
- *
- * That means that we'll have potentially 64 bits values where some
- * userland apps only expect 32 (like X itself since it thinks only
- * Sparc has 64 bits MMIO) but if we don't do that, we break it on
- * 32 bits CHRPs :-(
- *
- * Hopefully, the sysfs insterface is immune to that gunk. Once X
- * has been fixed (and the fix spread enough), we can re-enable the
- * 2 lines below and pass down a BAR value to userland. In that case
- * we'll also have to re-enable the matching code in
- * __pci_mmap_make_offset().
+ /* We pass a CPU physical address to userland for MMIO instead of a
+ * BAR value because X is lame and expects to be able to use that
+ * to pass to /dev/mem!
*
- * BenH.
+ * That means we may have 64-bit values where some apps only expect
+ * 32 (like X itself since it thinks only Sparc has 64-bit MMIO).
*/
-#if 0
- else if (rsrc->flags & IORESOURCE_MEM)
- offset = hose->pci_mem_offset;
-#endif
-
- *start = rsrc->start - offset;
- *end = rsrc->end - offset;
+ *start = rsrc->start;
+ *end = rsrc->end;
}
/**
@@ -662,8 +759,8 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose,
struct of_pci_range range;
struct of_pci_range_parser parser;
- printk(KERN_INFO "PCI host bridge %s %s ranges:\n",
- dev->full_name, primary ? "(primary)" : "");
+ printk(KERN_INFO "PCI host bridge %pOF %s ranges:\n",
+ dev, primary ? "(primary)" : "");
/* Check for ranges property */
if (of_pci_range_parser_init(&parser, dev))
@@ -723,7 +820,7 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose,
" MEM 0x%016llx..0x%016llx -> 0x%016llx %s\n",
range.cpu_addr, range.cpu_addr + range.size - 1,
range.pci_addr,
- (range.pci_space & 0x40000000) ?
+ (range.flags & IORESOURCE_PREFETCH) ?
"Prefetch" : "");
/* We support only 3 memory ranges */
@@ -747,7 +844,11 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose,
break;
}
if (res != NULL) {
- of_pci_range_to_resource(&range, dev, res);
+ res->name = dev->full_name;
+ res->flags = range.flags;
+ res->start = range.cpu_addr;
+ res->end = range.cpu_addr + range.size - 1;
+ res->parent = res->child = res->sibling = NULL;
}
}
}
@@ -778,6 +879,7 @@ int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
static void pcibios_fixup_resources(struct pci_dev *dev)
{
struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ struct resource *res;
int i;
if (!hose) {
@@ -785,9 +887,13 @@ static void pcibios_fixup_resources(struct pci_dev *dev)
pci_name(dev));
return;
}
- for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
- struct resource *res = dev->resource + i;
+
+ if (dev->is_virtfn)
+ return;
+
+ pci_dev_for_each_resource(dev, res, i) {
struct pci_bus_region reg;
+
if (!res->flags)
continue;
@@ -801,23 +907,15 @@ static void pcibios_fixup_resources(struct pci_dev *dev)
(reg.start == 0 && !pci_has_flag(PCI_PROBE_ONLY))) {
/* Only print message if not re-assigning */
if (!pci_has_flag(PCI_REASSIGN_ALL_RSRC))
- pr_debug("PCI:%s Resource %d %016llx-%016llx [%x] "
- "is unassigned\n",
- pci_name(dev), i,
- (unsigned long long)res->start,
- (unsigned long long)res->end,
- (unsigned int)res->flags);
+ pr_debug("PCI:%s Resource %d %pR is unassigned\n",
+ pci_name(dev), i, res);
res->end -= res->start;
res->start = 0;
res->flags |= IORESOURCE_UNSET;
continue;
}
- pr_debug("PCI:%s Resource %d %016llx-%016llx [%x]\n",
- pci_name(dev), i,
- (unsigned long long)res->start,\
- (unsigned long long)res->end,
- (unsigned int)res->flags);
+ pr_debug("PCI:%s Resource %d %pR\n", pci_name(dev), i, res);
}
/* Call machine specific resource fixup */
@@ -921,11 +1019,7 @@ static void pcibios_fixup_bridge(struct pci_bus *bus)
continue;
}
- pr_debug("PCI:%s Bus rsrc %d %016llx-%016llx [%x]\n",
- pci_name(dev), i,
- (unsigned long long)res->start,\
- (unsigned long long)res->end,
- (unsigned int)res->flags);
+ pr_debug("PCI:%s Bus rsrc %d %pR\n", pci_name(dev), i, res);
/* Try to detect uninitialized P2P bridge resources,
* and clear them out so they get re-assigned later
@@ -939,6 +1033,8 @@ static void pcibios_fixup_bridge(struct pci_bus *bus)
void pcibios_setup_bus_self(struct pci_bus *bus)
{
+ struct pci_controller *phb;
+
/* Fix up the bus resources for P2P bridges */
if (bus->self != NULL)
pcibios_fixup_bridge(bus);
@@ -950,12 +1046,14 @@ void pcibios_setup_bus_self(struct pci_bus *bus)
ppc_md.pcibios_fixup_bus(bus);
/* Setup bus DMA mappings */
- if (ppc_md.pci_dma_bus_setup)
- ppc_md.pci_dma_bus_setup(bus);
+ phb = pci_bus_to_host(bus);
+ if (phb->controller_ops.dma_bus_setup)
+ phb->controller_ops.dma_bus_setup(bus);
}
-static void pcibios_setup_device(struct pci_dev *dev)
+void pcibios_bus_add_device(struct pci_dev *dev)
{
+ struct pci_controller *phb;
/* Fixup NUMA node as it may not be setup yet by the generic
* code and is needed by the DMA init
*/
@@ -963,45 +1061,35 @@ static void pcibios_setup_device(struct pci_dev *dev)
/* Hook up default DMA ops */
set_dma_ops(&dev->dev, pci_dma_ops);
- set_dma_offset(&dev->dev, PCI_DRAM_OFFSET);
+ dev->dev.archdata.dma_offset = PCI_DRAM_OFFSET;
/* Additional platform DMA/iommu setup */
- if (ppc_md.pci_dma_dev_setup)
- ppc_md.pci_dma_dev_setup(dev);
+ phb = pci_bus_to_host(dev->bus);
+ if (phb->controller_ops.dma_dev_setup)
+ phb->controller_ops.dma_dev_setup(dev);
/* Read default IRQs and fixup if necessary */
pci_read_irq_line(dev);
if (ppc_md.pci_irq_fixup)
ppc_md.pci_irq_fixup(dev);
-}
-int pcibios_add_device(struct pci_dev *dev)
-{
- /*
- * We can only call pcibios_setup_device() after bus setup is complete,
- * since some of the platform specific DMA setup code depends on it.
- */
- if (dev->bus->is_added)
- pcibios_setup_device(dev);
- return 0;
+ if (ppc_md.pcibios_bus_add_device)
+ ppc_md.pcibios_bus_add_device(dev);
}
-void pcibios_setup_bus_devices(struct pci_bus *bus)
+int pcibios_device_add(struct pci_dev *dev)
{
- struct pci_dev *dev;
+ struct irq_domain *d;
- pr_debug("PCI: Fixup bus devices %d (%s)\n",
- bus->number, bus->self ? pci_name(bus->self) : "PHB");
+#ifdef CONFIG_PCI_IOV
+ if (ppc_md.pcibios_fixup_sriov)
+ ppc_md.pcibios_fixup_sriov(dev);
+#endif /* CONFIG_PCI_IOV */
- list_for_each_entry(dev, &bus->devices, bus_list) {
- /* Cardbus can call us to add new devices to a bus, so ignore
- * those who are already fully discovered
- */
- if (dev->is_added)
- continue;
-
- pcibios_setup_device(dev);
- }
+ d = dev_get_msi_domain(&dev->bus->dev);
+ if (d)
+ dev_set_msi_domain(&dev->dev, d);
+ return 0;
}
void pcibios_set_master(struct pci_dev *dev)
@@ -1017,21 +1105,11 @@ void pcibios_fixup_bus(struct pci_bus *bus)
*/
pci_read_bridge_bases(bus);
- /* Now fixup the bus bus */
+ /* Now fixup the bus */
pcibios_setup_bus_self(bus);
-
- /* Now fixup devices on that bus */
- pcibios_setup_bus_devices(bus);
}
EXPORT_SYMBOL(pcibios_fixup_bus);
-void pci_fixup_cardbus(struct pci_bus *bus)
-{
- /* Now fixup devices on that bus */
- pcibios_setup_bus_devices(bus);
-}
-
-
static int skip_isa_ioresource_align(struct pci_dev *dev)
{
if (pci_has_flag(PCI_CAN_SKIP_ISA_ALIGN) &&
@@ -1099,10 +1177,8 @@ static int reparent_resources(struct resource *parent,
*pp = NULL;
for (p = res->child; p != NULL; p = p->sibling) {
p->parent = res;
- pr_debug("PCI: Reparented %s [%llx..%llx] under %s\n",
- p->name,
- (unsigned long long)p->start,
- (unsigned long long)p->end, res->name);
+ pr_debug("PCI: Reparented %s %pR under %s\n",
+ p->name, p, res->name);
}
return 0;
}
@@ -1140,7 +1216,7 @@ static int reparent_resources(struct resource *parent,
* as well.
*/
-void pcibios_allocate_bus_resources(struct pci_bus *bus)
+static void pcibios_allocate_bus_resources(struct pci_bus *bus)
{
struct pci_bus *b;
int i;
@@ -1171,16 +1247,13 @@ void pcibios_allocate_bus_resources(struct pci_bus *bus)
}
}
- pr_debug("PCI: %s (bus %d) bridge rsrc %d: %016llx-%016llx "
- "[0x%x], parent %p (%s)\n",
- bus->self ? pci_name(bus->self) : "PHB",
- bus->number, i,
- (unsigned long long)res->start,
- (unsigned long long)res->end,
- (unsigned int)res->flags,
- pr, (pr && pr->name) ? pr->name : "nil");
+ pr_debug("PCI: %s (bus %d) bridge rsrc %d: %pR, parent %p (%s)\n",
+ bus->self ? pci_name(bus->self) : "PHB", bus->number,
+ i, res, pr, (pr && pr->name) ? pr->name : "nil");
if (pr && !(pr->flags & IORESOURCE_UNSET)) {
+ struct pci_dev *dev = bus->self;
+
if (request_resource(pr, res) == 0)
continue;
/*
@@ -1190,9 +1263,14 @@ void pcibios_allocate_bus_resources(struct pci_bus *bus)
*/
if (reparent_resources(pr, res) == 0)
continue;
+
+ if (dev && i < PCI_BRIDGE_RESOURCE_NUM &&
+ pci_claim_bridge_resource(dev,
+ i + PCI_BRIDGE_RESOURCES) == 0)
+ continue;
}
- pr_warning("PCI: Cannot allocate resource region "
- "%d of PCI bridge %d, will remap\n", i, bus->number);
+ pr_warn("PCI: Cannot allocate resource region %d of PCI bridge %d, will remap\n",
+ i, bus->number);
clear_resource:
/* The resource might be figured out when doing
* reassignment based on the resources required
@@ -1213,11 +1291,8 @@ static inline void alloc_resource(struct pci_dev *dev, int idx)
{
struct resource *pr, *r = &dev->resource[idx];
- pr_debug("PCI: Allocating %s: Resource %d: %016llx..%016llx [%x]\n",
- pci_name(dev), idx,
- (unsigned long long)r->start,
- (unsigned long long)r->end,
- (unsigned int)r->flags);
+ pr_debug("PCI: Allocating %s: Resource %d: %pR\n",
+ pci_name(dev), idx, r);
pr = pci_find_parent_resource(dev, r);
if (!pr || (pr->flags & IORESOURCE_UNSET) ||
@@ -1225,11 +1300,7 @@ static inline void alloc_resource(struct pci_dev *dev, int idx)
printk(KERN_WARNING "PCI: Cannot allocate resource region %d"
" of device %s, will remap\n", idx, pci_name(dev));
if (pr)
- pr_debug("PCI: parent is %p: %016llx-%016llx [%x]\n",
- pr,
- (unsigned long long)pr->start,
- (unsigned long long)pr->end,
- (unsigned int)pr->flags);
+ pr_debug("PCI: parent is %p: %pR\n", pr, pr);
/* We'll assign a new address later */
r->flags |= IORESOURCE_UNSET;
r->end -= r->start;
@@ -1347,8 +1418,10 @@ void __init pcibios_resource_survey(void)
/* Allocate and assign resources */
list_for_each_entry(b, &pci_root_buses, node)
pcibios_allocate_bus_resources(b);
- pcibios_allocate_resources(0);
- pcibios_allocate_resources(1);
+ if (!pci_has_flag(PCI_REASSIGN_ALL_RSRC)) {
+ pcibios_allocate_resources(0);
+ pcibios_allocate_resources(1);
+ }
/* Before we start assigning unassigned resource, we try to reserve
* the low IO area and the VGA memory area if they intersect the
@@ -1366,10 +1439,6 @@ void __init pcibios_resource_survey(void)
pr_debug("PCI: Assigning unassigned resources...\n");
pci_assign_unassigned_resources();
}
-
- /* Call machine dependent fixup */
- if (ppc_md.pcibios_fixup)
- ppc_md.pcibios_fixup();
}
/* This is used by the PCI hotplug driver to allocate resource
@@ -1383,28 +1452,27 @@ void pcibios_claim_one_bus(struct pci_bus *bus)
struct pci_bus *child_bus;
list_for_each_entry(dev, &bus->devices, bus_list) {
+ struct resource *r;
int i;
- for (i = 0; i < PCI_NUM_RESOURCES; i++) {
- struct resource *r = &dev->resource[i];
-
+ pci_dev_for_each_resource(dev, r, i) {
if (r->parent || !r->start || !r->flags)
continue;
- pr_debug("PCI: Claiming %s: "
- "Resource %d: %016llx..%016llx [%x]\n",
- pci_name(dev), i,
- (unsigned long long)r->start,
- (unsigned long long)r->end,
- (unsigned int)r->flags);
+ pr_debug("PCI: Claiming %s: Resource %d: %pR\n",
+ pci_name(dev), i, r);
- pci_claim_resource(dev, i);
+ if (pci_claim_resource(dev, i) == 0)
+ continue;
+
+ pci_claim_bridge_resource(dev, i);
}
}
list_for_each_entry(child_bus, &bus->children, node)
pcibios_claim_one_bus(child_bus);
}
+EXPORT_SYMBOL_GPL(pcibios_claim_one_bus);
/* pcibios_finish_adding_to_bus
@@ -1421,29 +1489,37 @@ void pcibios_finish_adding_to_bus(struct pci_bus *bus)
/* Allocate bus and devices resources */
pcibios_allocate_bus_resources(bus);
pcibios_claim_one_bus(bus);
- if (!pci_has_flag(PCI_PROBE_ONLY))
- pci_assign_unassigned_bus_resources(bus);
-
- /* Fixup EEH */
- eeh_add_device_tree_late(bus);
+ if (!pci_has_flag(PCI_PROBE_ONLY)) {
+ if (bus->self)
+ pci_assign_unassigned_bridge_resources(bus->self);
+ else
+ pci_assign_unassigned_bus_resources(bus);
+ }
/* Add new devices to global lists. Register in proc, sysfs. */
pci_bus_add_devices(bus);
-
- /* sysfs files should only be added after devices are added */
- eeh_add_sysfs_files(bus);
}
EXPORT_SYMBOL_GPL(pcibios_finish_adding_to_bus);
int pcibios_enable_device(struct pci_dev *dev, int mask)
{
- if (ppc_md.pcibios_enable_device_hook)
- if (ppc_md.pcibios_enable_device_hook(dev))
+ struct pci_controller *phb = pci_bus_to_host(dev->bus);
+
+ if (phb->controller_ops.enable_device_hook)
+ if (!phb->controller_ops.enable_device_hook(dev))
return -EINVAL;
return pci_enable_resources(dev, mask);
}
+void pcibios_disable_device(struct pci_dev *dev)
+{
+ struct pci_controller *phb = pci_bus_to_host(dev->bus);
+
+ if (phb->controller_ops.disable_device)
+ phb->controller_ops.disable_device(dev);
+}
+
resource_size_t pcibios_io_space_offset(struct pci_controller *hose)
{
return (unsigned long) hose->io_base_virt - _IO_BASE;
@@ -1460,38 +1536,26 @@ static void pcibios_setup_phb_resources(struct pci_controller *hose,
res = &hose->io_resource;
if (!res->flags) {
- printk(KERN_WARNING "PCI: I/O resource not set for host"
- " bridge %s (domain %d)\n",
- hose->dn->full_name, hose->global_number);
+ pr_debug("PCI: I/O resource not set for host"
+ " bridge %pOF (domain %d)\n",
+ hose->dn, hose->global_number);
} else {
offset = pcibios_io_space_offset(hose);
- pr_debug("PCI: PHB IO resource = %08llx-%08llx [%lx] off 0x%08llx\n",
- (unsigned long long)res->start,
- (unsigned long long)res->end,
- (unsigned long)res->flags,
- (unsigned long long)offset);
+ pr_debug("PCI: PHB IO resource = %pR off 0x%08llx\n",
+ res, (unsigned long long)offset);
pci_add_resource_offset(resources, res, offset);
}
/* Hookup PHB Memory resources */
for (i = 0; i < 3; ++i) {
res = &hose->mem_resources[i];
- if (!res->flags) {
- if (i == 0)
- printk(KERN_ERR "PCI: Memory resource 0 not set for "
- "host bridge %s (domain %d)\n",
- hose->dn->full_name, hose->global_number);
+ if (!res->flags)
continue;
- }
- offset = hose->mem_offset[i];
-
- pr_debug("PCI: PHB MEM resource %d = %08llx-%08llx [%lx] off 0x%08llx\n", i,
- (unsigned long long)res->start,
- (unsigned long long)res->end,
- (unsigned long)res->flags,
- (unsigned long long)offset);
+ offset = hose->mem_offset[i];
+ pr_debug("PCI: PHB MEM resource %d = %pR off 0x%08llx\n", i,
+ res, (unsigned long long)offset);
pci_add_resource_offset(resources, res, offset);
}
@@ -1561,7 +1625,6 @@ EARLY_PCI_OP(write, byte, u8)
EARLY_PCI_OP(write, word, u16)
EARLY_PCI_OP(write, dword, u32)
-extern int pci_bus_find_capability (struct pci_bus *bus, unsigned int devfn, int cap);
int early_find_capability(struct pci_controller *hose, int bus, int devfn,
int cap)
{
@@ -1586,7 +1649,7 @@ void pcibios_scan_phb(struct pci_controller *hose)
struct device_node *node = hose->dn;
int mode;
- pr_debug("PCI: Scanning PHB %s\n", of_node_full_name(node));
+ pr_debug("PCI: Scanning PHB %pOF\n", node);
/* Get some IO space for the new PHB */
pcibios_setup_phb_io_space(hose);
@@ -1612,8 +1675,8 @@ void pcibios_scan_phb(struct pci_controller *hose)
/* Get probe mode and perform scan */
mode = PCI_PROBE_NORMAL;
- if (node && ppc_md.pci_probe_mode)
- mode = ppc_md.pci_probe_mode(bus);
+ if (node && hose->controller_ops.probe_mode)
+ mode = hose->controller_ops.probe_mode(bus);
pr_debug(" probe mode: %d\n", mode);
if (mode == PCI_PROBE_DEVTREE)
of_scan_bus(node, bus);
@@ -1637,36 +1700,36 @@ void pcibios_scan_phb(struct pci_controller *hose)
pcie_bus_configure_settings(child);
}
}
+EXPORT_SYMBOL_GPL(pcibios_scan_phb);
static void fixup_hide_host_resource_fsl(struct pci_dev *dev)
{
- int i, class = dev->class >> 8;
- /* When configured as agent, programing interface = 1 */
+ int class = dev->class >> 8;
+ /* When configured as agent, programming interface = 1 */
int prog_if = dev->class & 0xf;
+ struct resource *r;
if ((class == PCI_CLASS_PROCESSOR_POWERPC ||
class == PCI_CLASS_BRIDGE_OTHER) &&
(dev->hdr_type == PCI_HEADER_TYPE_NORMAL) &&
(prog_if == 0) &&
(dev->bus->parent == NULL)) {
- for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
- dev->resource[i].start = 0;
- dev->resource[i].end = 0;
- dev->resource[i].flags = 0;
+ pci_dev_for_each_resource(dev, r) {
+ r->start = 0;
+ r->end = 0;
+ r->flags = 0;
}
}
}
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MOTOROLA, PCI_ANY_ID, fixup_hide_host_resource_fsl);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_FREESCALE, PCI_ANY_ID, fixup_hide_host_resource_fsl);
-static void fixup_vga(struct pci_dev *pdev)
-{
- u16 cmd;
- pci_read_config_word(pdev, PCI_COMMAND, &cmd);
- if ((cmd & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) || !vga_default_device())
- vga_set_default_device(pdev);
+static int __init discover_phbs(void)
+{
+ if (ppc_md.discover_phbs)
+ ppc_md.discover_phbs();
+ return 0;
}
-DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_ANY_ID, PCI_ANY_ID,
- PCI_CLASS_DISPLAY_VGA, 8, fixup_vga);
+core_initcall(discover_phbs);
diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c
index 5b789177aa29..6f444d0822d8 100644
--- a/arch/powerpc/kernel/pci-hotplug.c
+++ b/arch/powerpc/kernel/pci-hotplug.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Derived from "arch/powerpc/platforms/pseries/pci_dlpar.c"
*
@@ -7,20 +8,45 @@
* Updates, 2005, John Rose <johnrose@austin.ibm.com>
* Updates, 2005, Linas Vepstas <linas@austin.ibm.com>
* Updates, 2013, Gavin Shan <shangw@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
*/
#include <linux/pci.h>
#include <linux/export.h>
+#include <linux/of.h>
#include <asm/pci-bridge.h>
#include <asm/ppc-pci.h>
#include <asm/firmware.h>
#include <asm/eeh.h>
+static struct pci_bus *find_bus_among_children(struct pci_bus *bus,
+ struct device_node *dn)
+{
+ struct pci_bus *child = NULL;
+ struct pci_bus *tmp;
+
+ if (pci_bus_to_OF_node(bus) == dn)
+ return bus;
+
+ list_for_each_entry(tmp, &bus->children, node) {
+ child = find_bus_among_children(tmp, dn);
+ if (child)
+ break;
+ }
+
+ return child;
+}
+
+struct pci_bus *pci_find_bus_by_node(struct device_node *dn)
+{
+ struct pci_dn *pdn = PCI_DN(dn);
+
+ if (!pdn || !pdn->phb || !pdn->phb->bus)
+ return NULL;
+
+ return find_bus_among_children(pdn->phb->bus, dn);
+}
+EXPORT_SYMBOL_GPL(pci_find_bus_by_node);
+
/**
* pcibios_release_device - release PCI device
* @dev: PCI device
@@ -29,37 +55,76 @@
*/
void pcibios_release_device(struct pci_dev *dev)
{
- eeh_remove_device(dev);
+ struct pci_controller *phb = pci_bus_to_host(dev->bus);
+ struct pci_dn *pdn = pci_get_pdn(dev);
+
+ if (phb->controller_ops.release_device)
+ phb->controller_ops.release_device(dev);
+
+ /* free()ing the pci_dn has been deferred to us, do it now */
+ if (pdn && (pdn->flags & PCI_DN_FLAG_DEAD)) {
+ pci_dbg(dev, "freeing dead pdn\n");
+ kfree(pdn);
+ }
}
/**
- * pcibios_remove_pci_devices - remove all devices under this bus
+ * pci_hp_remove_devices - remove all devices under this bus
* @bus: the indicated PCI bus
*
* Remove all of the PCI devices under this bus both from the
* linux pci device tree, and from the powerpc EEH address cache.
*/
-void pcibios_remove_pci_devices(struct pci_bus *bus)
+void pci_hp_remove_devices(struct pci_bus *bus)
{
struct pci_dev *dev, *tmp;
struct pci_bus *child_bus;
/* First go down child busses */
list_for_each_entry(child_bus, &bus->children, node)
- pcibios_remove_pci_devices(child_bus);
+ pci_hp_remove_devices(child_bus);
pr_debug("PCI: Removing devices on bus %04x:%02x\n",
pci_domain_nr(bus), bus->number);
- list_for_each_entry_safe(dev, tmp, &bus->devices, bus_list) {
+ list_for_each_entry_safe_reverse(dev, tmp, &bus->devices, bus_list) {
pr_debug(" Removing %s...\n", pci_name(dev));
pci_stop_and_remove_bus_device(dev);
}
}
+EXPORT_SYMBOL_GPL(pci_hp_remove_devices);
-EXPORT_SYMBOL_GPL(pcibios_remove_pci_devices);
+static void traverse_siblings_and_scan_slot(struct device_node *start, struct pci_bus *bus)
+{
+ struct device_node *dn;
+ int slotno;
+
+ u32 class = 0;
+
+ if (!of_property_read_u32(start->child, "class-code", &class)) {
+ /* Call of pci_scan_slot for non-bridge/EP case */
+ if (!((class >> 8) == PCI_CLASS_BRIDGE_PCI)) {
+ slotno = PCI_SLOT(PCI_DN(start->child)->devfn);
+ pci_scan_slot(bus, PCI_DEVFN(slotno, 0));
+ return;
+ }
+ }
+
+ /* Iterate all siblings */
+ for_each_child_of_node(start, dn) {
+ class = 0;
+
+ if (!of_property_read_u32(start->child, "class-code", &class)) {
+ /* Call of pci_scan_slot on each sibling-nodes/bridge-ports */
+ if ((class >> 8) == PCI_CLASS_BRIDGE_PCI) {
+ slotno = PCI_SLOT(PCI_DN(dn)->devfn);
+ pci_scan_slot(bus, PCI_DEVFN(slotno, 0));
+ }
+ }
+ }
+}
/**
- * pcibios_add_pci_devices - adds new pci devices to bus
+ * pci_hp_add_devices - adds new pci devices to bus
* @bus: the indicated PCI bus
*
* This routine will find and fixup new pci devices under
@@ -69,22 +134,27 @@ EXPORT_SYMBOL_GPL(pcibios_remove_pci_devices);
* is how this routine differs from other, similar pcibios
* routines.)
*/
-void pcibios_add_pci_devices(struct pci_bus * bus)
+void pci_hp_add_devices(struct pci_bus *bus)
{
- int slotno, mode, pass, max;
+ int mode, max;
struct pci_dev *dev;
+ struct pci_controller *phb;
struct device_node *dn = pci_bus_to_OF_node(bus);
- eeh_add_device_tree_early(dn);
+ if (!dn)
+ return;
+
+ phb = pci_bus_to_host(bus);
mode = PCI_PROBE_NORMAL;
- if (ppc_md.pci_probe_mode)
- mode = ppc_md.pci_probe_mode(bus);
+ if (phb->controller_ops.probe_mode)
+ mode = phb->controller_ops.probe_mode(bus);
if (mode == PCI_PROBE_DEVTREE) {
/* use ofdt-based probe */
of_rescan_bus(dn, bus);
- } else if (mode == PCI_PROBE_NORMAL) {
+ } else if (mode == PCI_PROBE_NORMAL &&
+ dn->child && PCI_DN(dn->child)) {
/*
* Use legacy probe. In the partial hotplug case, we
* probably have grandchildren devices unplugged. So
@@ -92,18 +162,20 @@ void pcibios_add_pci_devices(struct pci_bus * bus)
* order for fully rescan all the way down to pick them up.
* They can have been removed during partial hotplug.
*/
- slotno = PCI_SLOT(PCI_DN(dn->child)->devfn);
- pci_scan_slot(bus, PCI_DEVFN(slotno, 0));
- pcibios_setup_bus_devices(bus);
+ traverse_siblings_and_scan_slot(dn, bus);
max = bus->busn_res.start;
- for (pass = 0; pass < 2; pass++) {
- list_for_each_entry(dev, &bus->devices, bus_list) {
- if (pci_is_bridge(dev))
- max = pci_scan_bridge(bus, dev,
- max, pass);
- }
- }
+ /*
+ * Scan bridges that are already configured. We don't touch
+ * them unless they are misconfigured (which will be done in
+ * the second scan below).
+ */
+ for_each_pci_bridge(dev, bus)
+ max = pci_scan_bridge(bus, dev, max, 0);
+
+ /* Scan bridges that need to be reconfigured */
+ for_each_pci_bridge(dev, bus)
+ max = pci_scan_bridge(bus, dev, max, 1);
}
pcibios_finish_adding_to_bus(bus);
}
-EXPORT_SYMBOL_GPL(pcibios_add_pci_devices);
+EXPORT_SYMBOL_GPL(pci_hp_add_devices);
diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index 432459c817fa..f8a3bd8cfae4 100644
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Common pmac/prep/chrp pci routines. -- Cort
*/
@@ -10,7 +11,8 @@
#include <linux/capability.h>
#include <linux/sched.h>
#include <linux/errno.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
+#include <linux/syscalls.h>
#include <linux/irq.h>
#include <linux/list.h>
#include <linux/of.h>
@@ -19,12 +21,11 @@
#include <asm/processor.h>
#include <asm/io.h>
-#include <asm/prom.h>
#include <asm/sections.h>
#include <asm/pci-bridge.h>
#include <asm/ppc-pci.h>
#include <asm/byteorder.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/machdep.h>
#undef DEBUG
@@ -32,19 +33,16 @@
unsigned long isa_io_base = 0;
unsigned long pci_dram_offset = 0;
int pcibios_assign_bus_offset = 1;
-
-void pcibios_make_OF_bus_map(void);
+EXPORT_SYMBOL(isa_io_base);
+EXPORT_SYMBOL(pci_dram_offset);
static void fixup_cpc710_pci64(struct pci_dev* dev);
-static u8* pci_to_OF_bus_map;
/* By default, we don't re-assign bus numbers. We do this only on
* some pmacs
*/
static int pci_assign_all_buses;
-static int pci_bus_count;
-
/* This will remain NULL for now, until isa-bridge.c is made common
* to both 32-bit and 64-bit.
*/
@@ -64,6 +62,11 @@ fixup_cpc710_pci64(struct pci_dev* dev)
}
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CPC710_PCI64, fixup_cpc710_pci64);
+#ifdef CONFIG_PPC_PCI_OF_BUS_MAP
+
+static u8* pci_to_OF_bus_map;
+static int pci_bus_count;
+
/*
* Functions below are used on OpenFirmware machines.
*/
@@ -77,8 +80,8 @@ make_one_node_map(struct device_node* node, u8 pci_bus)
return;
bus_range = of_get_property(node, "bus-range", &len);
if (bus_range == NULL || len < 2 * sizeof(int)) {
- printk(KERN_WARNING "Can't get bus-range for %s, "
- "assuming it starts at 0\n", node->full_name);
+ printk(KERN_WARNING "Can't get bus-range for %pOF, "
+ "assuming it starts at 0\n", node);
pci_to_OF_bus_map[pci_bus] = 0;
} else
pci_to_OF_bus_map[pci_bus] = bus_range[0];
@@ -94,7 +97,8 @@ make_one_node_map(struct device_node* node, u8 pci_bus)
reg = of_get_property(node, "reg", NULL);
if (!reg)
continue;
- dev = pci_get_bus_and_slot(pci_bus, ((reg[0] >> 8) & 0xff));
+ dev = pci_get_domain_bus_and_slot(0, pci_bus,
+ ((reg[0] >> 8) & 0xff));
if (!dev || !dev->subordinate) {
pci_dev_put(dev);
continue;
@@ -104,7 +108,7 @@ make_one_node_map(struct device_node* node, u8 pci_bus)
}
}
-void
+static void __init
pcibios_make_OF_bus_map(void)
{
int i;
@@ -148,14 +152,18 @@ pcibios_make_OF_bus_map(void)
}
#endif
}
+#endif // CONFIG_PPC_PCI_OF_BUS_MAP
+#ifdef CONFIG_PPC_PMAC
/*
* Returns the PCI device matching a given OF node
*/
int pci_device_from_OF_node(struct device_node *node, u8 *bus, u8 *devfn)
{
+#ifdef CONFIG_PPC_PCI_OF_BUS_MAP
struct pci_dev *dev = NULL;
+#endif
const __be32 *reg;
int size;
@@ -170,6 +178,9 @@ int pci_device_from_OF_node(struct device_node *node, u8 *bus, u8 *devfn)
*bus = (be32_to_cpup(&reg[0]) >> 16) & 0xff;
*devfn = (be32_to_cpup(&reg[0]) >> 8) & 0xff;
+#ifndef CONFIG_PPC_PCI_OF_BUS_MAP
+ return 0;
+#else
/* Ok, here we need some tweak. If we have already renumbered
* all busses, we can't rely on the OF bus number any more.
* the pci_to_OF_bus_map is not enough as several PCI busses
@@ -187,9 +198,12 @@ int pci_device_from_OF_node(struct device_node *node, u8 *bus, u8 *devfn)
}
return -ENODEV;
+#endif // CONFIG_PPC_PCI_OF_BUS_MAP
}
EXPORT_SYMBOL(pci_device_from_OF_node);
+#endif
+#ifdef CONFIG_PPC_PCI_OF_BUS_MAP
/* We create the "pci-OF-bus-map" property now so it appears in the
* /proc device tree
*/
@@ -199,9 +213,8 @@ pci_create_OF_bus_map(void)
struct property* of_prop;
struct device_node *dn;
- of_prop = (struct property*) alloc_bootmem(sizeof(struct property) + 256);
- if (!of_prop)
- return;
+ of_prop = memblock_alloc_or_panic(sizeof(struct property) + 256,
+ SMP_CACHE_BYTES);
dn = of_find_node_by_path("/");
if (dn) {
memset(of_prop, -1, sizeof(struct property) + 256);
@@ -212,6 +225,7 @@ pci_create_OF_bus_map(void)
of_node_put(dn);
}
}
+#endif // CONFIG_PPC_PCI_OF_BUS_MAP
void pcibios_setup_phb_io_space(struct pci_controller *hose)
{
@@ -227,23 +241,41 @@ void pcibios_setup_phb_io_space(struct pci_controller *hose)
static int __init pcibios_init(void)
{
struct pci_controller *hose, *tmp;
+#ifndef CONFIG_PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT
int next_busno = 0;
+#endif
printk(KERN_INFO "PCI: Probing PCI hardware\n");
+#ifdef CONFIG_PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT
+ /*
+ * Enable PCI domains in /proc when PCI bus numbers are not unique
+ * across all PCI domains to prevent conflicts. And keep PCI domain 0
+ * backward compatible in /proc for video cards.
+ */
+ pci_add_flags(PCI_ENABLE_PROC_DOMAINS | PCI_COMPAT_DOMAIN_0);
+#endif
+
if (pci_has_flag(PCI_REASSIGN_ALL_BUS))
pci_assign_all_buses = 1;
/* Scan all of the recorded PCI controllers. */
list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+#ifndef CONFIG_PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT
if (pci_assign_all_buses)
hose->first_busno = next_busno;
+#endif
hose->last_busno = 0xff;
pcibios_scan_phb(hose);
pci_bus_add_devices(hose->bus);
+#ifndef CONFIG_PPC_PCI_BUS_NUM_DOMAIN_DEPENDENT
if (pci_assign_all_buses || next_busno <= hose->last_busno)
next_busno = hose->last_busno + pcibios_assign_bus_offset;
+#endif
}
+
+#if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_CHRP)
+#ifdef CONFIG_PPC_PCI_OF_BUS_MAP
pci_bus_count = next_busno;
/* OpenFirmware based machines need a map of OF bus
@@ -252,10 +284,16 @@ static int __init pcibios_init(void)
*/
if (pci_assign_all_buses)
pcibios_make_OF_bus_map();
+#endif
+#endif
/* Call common code to handle resource allocation */
pcibios_resource_survey();
+ /* Call machine dependent fixup */
+ if (ppc_md.pcibios_fixup)
+ ppc_md.pcibios_fixup();
+
/* Call machine dependent post-init code */
if (ppc_md.pcibios_after_init)
ppc_md.pcibios_after_init();
@@ -282,7 +320,8 @@ pci_bus_to_hose(int bus)
* Note that the returned IO or memory base is a physical address
*/
-long sys_pciconfig_iobase(long which, unsigned long bus, unsigned long devfn)
+SYSCALL_DEFINE3(pciconfig_iobase, long, which,
+ unsigned long, bus, unsigned long, devfn)
{
struct pci_controller* hose;
long result = -EOPNOTSUPP;
@@ -306,5 +345,3 @@ long sys_pciconfig_iobase(long which, unsigned long bus, unsigned long devfn)
return result;
}
-
-
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index 155013da27e0..e27342ef128b 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Port for PPC64 David Engebretsen, IBM Corp.
* Contains common pci routines for ppc64 platform, pSeries and iSeries brands.
*
* Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
* Rework, based on alpha PCI code.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#undef DEBUG
@@ -17,17 +13,16 @@
#include <linux/pci.h>
#include <linux/string.h>
#include <linux/init.h>
-#include <linux/bootmem.h>
#include <linux/export.h>
#include <linux/mm.h>
#include <linux/list.h>
#include <linux/syscalls.h>
#include <linux/irq.h>
#include <linux/vmalloc.h>
+#include <linux/of.h>
#include <asm/processor.h>
#include <asm/io.h>
-#include <asm/prom.h>
#include <asm/pci-bridge.h>
#include <asm/byteorder.h>
#include <asm/machdep.h>
@@ -39,7 +34,7 @@
* ISA drivers use hard coded offsets. If no ISA bus exists nothing
* is mapped on the first 64K of IO space
*/
-unsigned long pci_io_base = ISA_IO_BASE;
+unsigned long pci_io_base;
EXPORT_SYMBOL(pci_io_base);
static int __init pcibios_init(void)
@@ -59,20 +54,26 @@ static int __init pcibios_init(void)
pci_add_flags(PCI_ENABLE_PROC_DOMAINS | PCI_COMPAT_DOMAIN_0);
/* Scan all of the recorded PCI controllers. */
- list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+ list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
pcibios_scan_phb(hose);
- pci_bus_add_devices(hose->bus);
- }
/* Call common code to handle resource allocation */
pcibios_resource_survey();
+ /* Add devices. */
+ list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
+ pci_bus_add_devices(hose->bus);
+
+ /* Call machine dependent fixup */
+ if (ppc_md.pcibios_fixup)
+ ppc_md.pcibios_fixup();
+
printk(KERN_DEBUG "PCI: Probing PCI hardware done\n");
return 0;
}
-subsys_initcall(pcibios_init);
+subsys_initcall_sync(pcibios_init);
int pcibios_unmap_io_space(struct pci_bus *bus)
{
@@ -82,7 +83,7 @@ int pcibios_unmap_io_space(struct pci_bus *bus)
/* If this is not a PHB, we only flush the hash table over
* the area mapped by this bridge. We don't play with the PTE
- * mappings since we might have to deal with sub-page alignemnts
+ * mappings since we might have to deal with sub-page alignments
* so flushing the hash table is the only sane way to make sure
* that no hash entries are covering that removed bridge area
* while still allowing other busses overlapping those pages
@@ -91,15 +92,15 @@ int pcibios_unmap_io_space(struct pci_bus *bus)
* to do an appropriate TLB flush here too
*/
if (bus->self) {
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_BOOK3S_64
struct resource *res = bus->resource[0];
#endif
pr_debug("IO unmapping for PCI-PCI bridge %s\n",
pci_name(bus->self));
-#ifdef CONFIG_PPC_STD_MMU_64
- __flush_hash_table_range(&init_mm, res->start + _IO_BASE,
+#ifdef CONFIG_PPC_BOOK3S_64
+ __flush_hash_table_range(res->start + _IO_BASE,
res->end + _IO_BASE + 1);
#endif
return 0;
@@ -108,29 +109,53 @@ int pcibios_unmap_io_space(struct pci_bus *bus)
/* Get the host bridge */
hose = pci_bus_to_host(bus);
- /* Check if we have IOs allocated */
- if (hose->io_base_alloc == NULL)
- return 0;
-
- pr_debug("IO unmapping for PHB %s\n", hose->dn->full_name);
+ pr_debug("IO unmapping for PHB %pOF\n", hose->dn);
pr_debug(" alloc=0x%p\n", hose->io_base_alloc);
- /* This is a PHB, we fully unmap the IO area */
- vunmap(hose->io_base_alloc);
-
+ iounmap(hose->io_base_alloc);
return 0;
}
EXPORT_SYMBOL_GPL(pcibios_unmap_io_space);
-static int pcibios_map_phb_io_space(struct pci_controller *hose)
+void __iomem *ioremap_phb(phys_addr_t paddr, unsigned long size)
{
struct vm_struct *area;
+ unsigned long addr;
+
+ WARN_ON_ONCE(paddr & ~PAGE_MASK);
+ WARN_ON_ONCE(size & ~PAGE_MASK);
+
+ /*
+ * Let's allocate some IO space for that guy. We don't pass VM_IOREMAP
+ * because we don't care about alignment tricks that the core does in
+ * that case. Maybe we should due to stupid card with incomplete
+ * address decoding but I'd rather not deal with those outside of the
+ * reserved 64K legacy region.
+ */
+ area = __get_vm_area_caller(size, VM_IOREMAP, PHB_IO_BASE, PHB_IO_END,
+ __builtin_return_address(0));
+ if (!area)
+ return NULL;
+
+ addr = (unsigned long)area->addr;
+ if (ioremap_page_range(addr, addr + size, paddr,
+ pgprot_noncached(PAGE_KERNEL))) {
+ vunmap_range(addr, addr + size);
+ return NULL;
+ }
+
+ return (void __iomem *)addr;
+}
+EXPORT_SYMBOL_GPL(ioremap_phb);
+
+static int pcibios_map_phb_io_space(struct pci_controller *hose)
+{
unsigned long phys_page;
unsigned long size_page;
unsigned long io_virt_offset;
- phys_page = _ALIGN_DOWN(hose->io_base_phys, PAGE_SIZE);
- size_page = _ALIGN_UP(hose->pci_io_size, PAGE_SIZE);
+ phys_page = ALIGN_DOWN(hose->io_base_phys, PAGE_SIZE);
+ size_page = ALIGN(hose->pci_io_size, PAGE_SIZE);
/* Make sure IO area address is clear */
hose->io_base_alloc = NULL;
@@ -145,24 +170,18 @@ static int pcibios_map_phb_io_space(struct pci_controller *hose)
* with incomplete address decoding but I'd rather not deal with
* those outside of the reserved 64K legacy region.
*/
- area = __get_vm_area(size_page, 0, PHB_IO_BASE, PHB_IO_END);
- if (area == NULL)
+ hose->io_base_alloc = ioremap_phb(phys_page, size_page);
+ if (!hose->io_base_alloc)
return -ENOMEM;
- hose->io_base_alloc = area->addr;
- hose->io_base_virt = (void __iomem *)(area->addr +
- hose->io_base_phys - phys_page);
+ hose->io_base_virt = hose->io_base_alloc +
+ hose->io_base_phys - phys_page;
- pr_debug("IO mapping for PHB %s\n", hose->dn->full_name);
+ pr_debug("IO mapping for PHB %pOF\n", hose->dn);
pr_debug(" phys=0x%016llx, virt=0x%p (alloc=0x%p)\n",
hose->io_base_phys, hose->io_base_virt, hose->io_base_alloc);
pr_debug(" size=0x%016llx (alloc=0x%016lx)\n",
hose->pci_io_size, size_page);
- /* Establish the mapping */
- if (__ioremap_at(phys_page, area->addr, size_page,
- _PAGE_NO_CACHE | _PAGE_GUARDED) == NULL)
- return -ENOMEM;
-
/* Fixup hose IO resource */
io_virt_offset = pcibios_io_space_offset(hose);
hose->io_resource.start += io_virt_offset;
@@ -204,8 +223,8 @@ void pcibios_setup_phb_io_space(struct pci_controller *hose)
#define IOBASE_ISA_IO 3
#define IOBASE_ISA_MEM 4
-long sys_pciconfig_iobase(long which, unsigned long in_bus,
- unsigned long in_devfn)
+SYSCALL_DEFINE3(pciconfig_iobase, long, which, unsigned long, in_bus,
+ unsigned long, in_devfn)
{
struct pci_controller* hose;
struct pci_bus *tmp_bus, *bus = NULL;
@@ -267,12 +286,13 @@ int pcibus_to_node(struct pci_bus *bus)
EXPORT_SYMBOL(pcibus_to_node);
#endif
-static void quirk_radeon_32bit_msi(struct pci_dev *dev)
+#ifdef CONFIG_PPC_PMAC
+int pci_device_from_OF_node(struct device_node *np, u8 *bus, u8 *devfn)
{
- struct pci_dn *pdn = pci_get_pdn(dev);
-
- if (pdn)
- pdn->force_32bit_msi = true;
+ if (!PCI_DN(np))
+ return -ENODEV;
+ *bus = PCI_DN(np)->busno;
+ *devfn = PCI_DN(np)->devfn;
+ return 0;
}
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x68f2, quirk_radeon_32bit_msi);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0xaa68, quirk_radeon_32bit_msi);
+#endif
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index 1f61fab59d9b..38561d6a2079 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -1,23 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* pci_dn.c
*
* Copyright (C) 2001 Todd Inglett, IBM Corporation
*
* PCI manipulation via device_nodes.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/kernel.h>
#include <linux/pci.h>
@@ -25,41 +12,290 @@
#include <linux/export.h>
#include <linux/init.h>
#include <linux/gfp.h>
+#include <linux/of.h>
#include <asm/io.h>
-#include <asm/prom.h>
#include <asm/pci-bridge.h>
#include <asm/ppc-pci.h>
#include <asm/firmware.h>
+#include <asm/eeh.h>
+
+/*
+ * The function is used to find the firmware data of one
+ * specific PCI device, which is attached to the indicated
+ * PCI bus. For VFs, their firmware data is linked to that
+ * one of PF's bridge. For other devices, their firmware
+ * data is linked to that of their bridge.
+ */
+static struct pci_dn *pci_bus_to_pdn(struct pci_bus *bus)
+{
+ struct pci_bus *pbus;
+ struct device_node *dn;
+ struct pci_dn *pdn;
+
+ /*
+ * We probably have virtual bus which doesn't
+ * have associated bridge.
+ */
+ pbus = bus;
+ while (pbus) {
+ if (pci_is_root_bus(pbus) || pbus->self)
+ break;
+
+ pbus = pbus->parent;
+ }
+
+ /*
+ * Except virtual bus, all PCI buses should
+ * have device nodes.
+ */
+ dn = pci_bus_to_OF_node(pbus);
+ pdn = dn ? PCI_DN(dn) : NULL;
+
+ return pdn;
+}
+
+struct pci_dn *pci_get_pdn_by_devfn(struct pci_bus *bus,
+ int devfn)
+{
+ struct device_node *dn = NULL;
+ struct pci_dn *parent, *pdn;
+ struct pci_dev *pdev = NULL;
+
+ /* Fast path: fetch from PCI device */
+ list_for_each_entry(pdev, &bus->devices, bus_list) {
+ if (pdev->devfn == devfn) {
+ if (pdev->dev.archdata.pci_data)
+ return pdev->dev.archdata.pci_data;
+
+ dn = pci_device_to_OF_node(pdev);
+ break;
+ }
+ }
+
+ /* Fast path: fetch from device node */
+ pdn = dn ? PCI_DN(dn) : NULL;
+ if (pdn)
+ return pdn;
+
+ /* Slow path: fetch from firmware data hierarchy */
+ parent = pci_bus_to_pdn(bus);
+ if (!parent)
+ return NULL;
+
+ list_for_each_entry(pdn, &parent->child_list, list) {
+ if (pdn->busno == bus->number &&
+ pdn->devfn == devfn)
+ return pdn;
+ }
+
+ return NULL;
+}
struct pci_dn *pci_get_pdn(struct pci_dev *pdev)
{
- struct device_node *dn = pci_device_to_OF_node(pdev);
- if (!dn)
+ struct device_node *dn;
+ struct pci_dn *parent, *pdn;
+
+ /* Search device directly */
+ if (pdev->dev.archdata.pci_data)
+ return pdev->dev.archdata.pci_data;
+
+ /* Check device node */
+ dn = pci_device_to_OF_node(pdev);
+ pdn = dn ? PCI_DN(dn) : NULL;
+ if (pdn)
+ return pdn;
+
+ /*
+ * VFs don't have device nodes. We hook their
+ * firmware data to PF's bridge.
+ */
+ parent = pci_bus_to_pdn(pdev->bus);
+ if (!parent)
return NULL;
- return PCI_DN(dn);
+
+ list_for_each_entry(pdn, &parent->child_list, list) {
+ if (pdn->busno == pdev->bus->number &&
+ pdn->devfn == pdev->devfn)
+ return pdn;
+ }
+
+ return NULL;
}
-/*
- * Traverse_func that inits the PCI fields of the device node.
- * NOTE: this *must* be done before read/write config to the device.
- */
-void *update_dn_pci_info(struct device_node *dn, void *data)
+#ifdef CONFIG_EEH
+static struct eeh_dev *eeh_dev_init(struct pci_dn *pdn)
+{
+ struct eeh_dev *edev;
+
+ /* Allocate EEH device */
+ edev = kzalloc(sizeof(*edev), GFP_KERNEL);
+ if (!edev)
+ return NULL;
+
+ /* Associate EEH device with OF node */
+ pdn->edev = edev;
+ edev->pdn = pdn;
+ edev->bdfn = (pdn->busno << 8) | pdn->devfn;
+ edev->controller = pdn->phb;
+
+ return edev;
+}
+#endif /* CONFIG_EEH */
+
+#ifdef CONFIG_PCI_IOV
+static struct pci_dn *add_one_sriov_vf_pdn(struct pci_dn *parent,
+ int busno, int devfn)
+{
+ struct pci_dn *pdn;
+
+ /* Except PHB, we always have the parent */
+ if (!parent)
+ return NULL;
+
+ pdn = kzalloc(sizeof(*pdn), GFP_KERNEL);
+ if (!pdn)
+ return NULL;
+
+ pdn->phb = parent->phb;
+ pdn->parent = parent;
+ pdn->busno = busno;
+ pdn->devfn = devfn;
+ pdn->pe_number = IODA_INVALID_PE;
+ INIT_LIST_HEAD(&pdn->child_list);
+ INIT_LIST_HEAD(&pdn->list);
+ list_add_tail(&pdn->list, &parent->child_list);
+
+ return pdn;
+}
+
+struct pci_dn *add_sriov_vf_pdns(struct pci_dev *pdev)
+{
+ struct pci_dn *parent, *pdn;
+ int i;
+
+ /* Only support IOV for now */
+ if (WARN_ON(!pdev->is_physfn))
+ return NULL;
+
+ /* Check if VFs have been populated */
+ pdn = pci_get_pdn(pdev);
+ if (!pdn || (pdn->flags & PCI_DN_FLAG_IOV_VF))
+ return NULL;
+
+ pdn->flags |= PCI_DN_FLAG_IOV_VF;
+ parent = pci_bus_to_pdn(pdev->bus);
+ if (!parent)
+ return NULL;
+
+ for (i = 0; i < pci_sriov_get_totalvfs(pdev); i++) {
+ struct eeh_dev *edev __maybe_unused;
+
+ pdn = add_one_sriov_vf_pdn(parent,
+ pci_iov_virtfn_bus(pdev, i),
+ pci_iov_virtfn_devfn(pdev, i));
+ if (!pdn) {
+ dev_warn(&pdev->dev, "%s: Cannot create firmware data for VF#%d\n",
+ __func__, i);
+ return NULL;
+ }
+
+#ifdef CONFIG_EEH
+ /* Create the EEH device for the VF */
+ edev = eeh_dev_init(pdn);
+ BUG_ON(!edev);
+
+ /* FIXME: these should probably be populated by the EEH probe */
+ edev->physfn = pdev;
+ edev->vf_index = i;
+#endif /* CONFIG_EEH */
+ }
+ return pci_get_pdn(pdev);
+}
+
+void remove_sriov_vf_pdns(struct pci_dev *pdev)
+{
+ struct pci_dn *parent;
+ struct pci_dn *pdn, *tmp;
+ int i;
+
+ /* Only support IOV PF for now */
+ if (WARN_ON(!pdev->is_physfn))
+ return;
+
+ /* Check if VFs have been populated */
+ pdn = pci_get_pdn(pdev);
+ if (!pdn || !(pdn->flags & PCI_DN_FLAG_IOV_VF))
+ return;
+
+ pdn->flags &= ~PCI_DN_FLAG_IOV_VF;
+ parent = pci_bus_to_pdn(pdev->bus);
+ if (!parent)
+ return;
+
+ /*
+ * We might introduce flag to pci_dn in future
+ * so that we can release VF's firmware data in
+ * a batch mode.
+ */
+ for (i = 0; i < pci_sriov_get_totalvfs(pdev); i++) {
+ struct eeh_dev *edev __maybe_unused;
+
+ list_for_each_entry_safe(pdn, tmp,
+ &parent->child_list, list) {
+ if (pdn->busno != pci_iov_virtfn_bus(pdev, i) ||
+ pdn->devfn != pci_iov_virtfn_devfn(pdev, i))
+ continue;
+
+#ifdef CONFIG_EEH
+ /*
+ * Release EEH state for this VF. The PCI core
+ * has already torn down the pci_dev for this VF, but
+ * we're responsible to removing the eeh_dev since it
+ * has the same lifetime as the pci_dn that spawned it.
+ */
+ edev = pdn_to_eeh_dev(pdn);
+ if (edev) {
+ /*
+ * We allocate pci_dn's for the totalvfs count,
+ * but only the vfs that were activated
+ * have a configured PE.
+ */
+ if (edev->pe)
+ eeh_pe_tree_remove(edev);
+
+ pdn->edev = NULL;
+ kfree(edev);
+ }
+#endif /* CONFIG_EEH */
+
+ if (!list_empty(&pdn->list))
+ list_del(&pdn->list);
+
+ kfree(pdn);
+ }
+ }
+}
+#endif /* CONFIG_PCI_IOV */
+
+struct pci_dn *pci_add_device_node_info(struct pci_controller *hose,
+ struct device_node *dn)
{
- struct pci_controller *phb = data;
const __be32 *type = of_get_property(dn, "ibm,pci-config-space-type", NULL);
const __be32 *regs;
+ struct device_node *parent;
struct pci_dn *pdn;
+#ifdef CONFIG_EEH
+ struct eeh_dev *edev;
+#endif
- pdn = zalloc_maybe_bootmem(sizeof(*pdn), GFP_KERNEL);
+ pdn = kzalloc(sizeof(*pdn), GFP_KERNEL);
if (pdn == NULL)
return NULL;
dn->data = pdn;
- pdn->node = dn;
- pdn->phb = phb;
-#ifdef CONFIG_PPC_POWERNV
+ pdn->phb = hose;
pdn->pe_number = IODA_INVALID_PE;
-#endif
regs = of_get_property(dn, "reg", NULL);
if (regs) {
u32 addr = of_read_number(regs, 1);
@@ -69,9 +305,81 @@ void *update_dn_pci_info(struct device_node *dn, void *data)
pdn->devfn = (addr >> 8) & 0xff;
}
+ /* vendor/device IDs and class code */
+ regs = of_get_property(dn, "vendor-id", NULL);
+ pdn->vendor_id = regs ? of_read_number(regs, 1) : 0;
+ regs = of_get_property(dn, "device-id", NULL);
+ pdn->device_id = regs ? of_read_number(regs, 1) : 0;
+ regs = of_get_property(dn, "class-code", NULL);
+ pdn->class_code = regs ? of_read_number(regs, 1) : 0;
+
+ /* Extended config space */
pdn->pci_ext_config_space = (type && of_read_number(type, 1) == 1);
- return NULL;
+
+ /* Create EEH device */
+#ifdef CONFIG_EEH
+ edev = eeh_dev_init(pdn);
+ if (!edev) {
+ kfree(pdn);
+ return NULL;
+ }
+#endif
+
+ /* Attach to parent node */
+ INIT_LIST_HEAD(&pdn->child_list);
+ INIT_LIST_HEAD(&pdn->list);
+ parent = of_get_parent(dn);
+ pdn->parent = parent ? PCI_DN(parent) : NULL;
+ of_node_put(parent);
+ if (pdn->parent)
+ list_add_tail(&pdn->list, &pdn->parent->child_list);
+
+ return pdn;
+}
+EXPORT_SYMBOL_GPL(pci_add_device_node_info);
+
+void pci_remove_device_node_info(struct device_node *dn)
+{
+ struct pci_dn *pdn = dn ? PCI_DN(dn) : NULL;
+ struct device_node *parent;
+ struct pci_dev *pdev;
+#ifdef CONFIG_EEH
+ struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+
+ if (edev)
+ edev->pdn = NULL;
+#endif
+
+ if (!pdn)
+ return;
+
+ WARN_ON(!list_empty(&pdn->child_list));
+ list_del(&pdn->list);
+
+ /* Drop the parent pci_dn's ref to our backing dt node */
+ parent = of_get_parent(dn);
+ if (parent)
+ of_node_put(parent);
+
+ /*
+ * At this point we *might* still have a pci_dev that was
+ * instantiated from this pci_dn. So defer free()ing it until
+ * the pci_dev's release function is called.
+ */
+ pdev = pci_get_domain_bus_and_slot(pdn->phb->global_number,
+ pdn->busno, pdn->devfn);
+ if (pdev) {
+ /* NB: pdev has a ref to dn */
+ pci_dbg(pdev, "marked pdn (from %pOF) as dead\n", dn);
+ pdn->flags |= PCI_DN_FLAG_DEAD;
+ } else {
+ dn->data = NULL;
+ kfree(pdn);
+ }
+
+ pci_dev_put(pdev);
}
+EXPORT_SYMBOL_GPL(pci_remove_device_node_info);
/*
* Traverse a device tree stopping each PCI device in the tree.
@@ -91,8 +399,9 @@ void *update_dn_pci_info(struct device_node *dn, void *data)
* one of these nodes we also assume its siblings are non-pci for
* performance.
*/
-void *traverse_pci_devices(struct device_node *start, traverse_func pre,
- void *data)
+void *pci_traverse_device_nodes(struct device_node *start,
+ void *(*fn)(struct device_node *, void *),
+ void *data)
{
struct device_node *dn, *nextdn;
void *ret;
@@ -107,8 +416,11 @@ void *traverse_pci_devices(struct device_node *start, traverse_func pre,
if (classp)
class = of_read_number(classp, 1);
- if (pre && ((ret = pre(dn, data)) != NULL))
- return ret;
+ if (fn) {
+ ret = fn(dn, data);
+ if (ret)
+ return ret;
+ }
/* If we are a PCI bridge, go down */
if (dn->child && ((class >> 8) == PCI_CLASS_BRIDGE_PCI ||
@@ -130,6 +442,19 @@ void *traverse_pci_devices(struct device_node *start, traverse_func pre,
}
return NULL;
}
+EXPORT_SYMBOL_GPL(pci_traverse_device_nodes);
+
+static void *add_pdn(struct device_node *dn, void *data)
+{
+ struct pci_controller *hose = data;
+ struct pci_dn *pdn;
+
+ pdn = pci_add_device_node_info(hose, dn);
+ if (!pdn)
+ return ERR_PTR(-ENOMEM);
+
+ return NULL;
+}
/**
* pci_devs_phb_init_dynamic - setup pci devices under this PHB
@@ -145,31 +470,27 @@ void pci_devs_phb_init_dynamic(struct pci_controller *phb)
struct pci_dn *pdn;
/* PHB nodes themselves must not match */
- update_dn_pci_info(dn, phb);
- pdn = dn->data;
+ pdn = pci_add_device_node_info(phb, dn);
if (pdn) {
pdn->devfn = pdn->busno = -1;
+ pdn->vendor_id = pdn->device_id = pdn->class_code = 0;
pdn->phb = phb;
+ phb->pci_data = pdn;
}
/* Update dn->phb ptrs for new phb and children devices */
- traverse_pci_devices(dn, update_dn_pci_info, phb);
+ pci_traverse_device_nodes(dn, add_pdn, phb);
}
-/**
- * pci_devs_phb_init - Initialize phbs and pci devs under them.
- *
- * This routine walks over all phb's (pci-host bridges) on the
- * system, and sets up assorted pci-related structures
- * (including pci info in the device node structs) for each
- * pci device found underneath. This routine runs once,
- * early in the boot sequence.
- */
-void __init pci_devs_phb_init(void)
+static void pci_dev_pdn_setup(struct pci_dev *pdev)
{
- struct pci_controller *phb, *tmp;
+ struct pci_dn *pdn;
+
+ if (pdev->dev.archdata.pci_data)
+ return;
- /* This must be done first so the device nodes have valid pci info! */
- list_for_each_entry_safe(phb, tmp, &hose_list, list_node)
- pci_devs_phb_init_dynamic(phb);
+ /* Setup the fast path */
+ pdn = pci_get_pdn(pdev);
+ pdev->dev.archdata.pci_data = pdn;
}
+DECLARE_PCI_FIXUP_EARLY(PCI_ANY_ID, PCI_ANY_ID, pci_dev_pdn_setup);
diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
index 44562aa97f16..756043dd06e9 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Helper routines to scan the device tree for PCI devices and busses
*
@@ -8,16 +9,12 @@
* Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
* Rework, based on alpha PCI code.
* Copyright (c) 2009 Secret Lab Technologies Ltd.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
*/
#include <linux/pci.h>
#include <linux/export.h>
+#include <linux/of.h>
#include <asm/pci-bridge.h>
-#include <asm/prom.h>
/**
* get_int_prop - Decode a u32 from a device tree property
@@ -37,29 +34,75 @@ static u32 get_int_prop(struct device_node *np, const char *name, u32 def)
* pci_parse_of_flags - Parse the flags cell of a device tree PCI address
* @addr0: value of 1st cell of a device tree PCI address.
* @bridge: Set this flag if the address is from a bridge 'ranges' property
+ *
+ * PCI Bus Binding to IEEE Std 1275-1994
+ *
+ * Bit# 33222222 22221111 11111100 00000000
+ * 10987654 32109876 54321098 76543210
+ * phys.hi cell: npt000ss bbbbbbbb dddddfff rrrrrrrr
+ * phys.mid cell: hhhhhhhh hhhhhhhh hhhhhhhh hhhhhhhh
+ * phys.lo cell: llllllll llllllll llllllll llllllll
+ *
+ * where:
+ * n is 0 if the address is relocatable, 1 otherwise
+ * p is 1 if the addressable region is "prefetchable", 0 otherwise
+ * t is 1 if the address is aliased (for non-relocatable I/O),
+ * below 1 MB (for Memory),or below 64 KB (for relocatable I/O).
+ * ss is the space code, denoting the address space:
+ * 00 denotes Configuration Space
+ * 01 denotes I/O Space
+ * 10 denotes 32-bit-address Memory Space
+ * 11 denotes 64-bit-address Memory Space
+ * bbbbbbbb is the 8-bit Bus Number
+ * ddddd is the 5-bit Device Number
+ * fff is the 3-bit Function Number
+ * rrrrrrrr is the 8-bit Register Number
*/
+#define OF_PCI_ADDR0_SPACE(ss) (((ss)&3)<<24)
+#define OF_PCI_ADDR0_SPACE_CFG OF_PCI_ADDR0_SPACE(0)
+#define OF_PCI_ADDR0_SPACE_IO OF_PCI_ADDR0_SPACE(1)
+#define OF_PCI_ADDR0_SPACE_MMIO32 OF_PCI_ADDR0_SPACE(2)
+#define OF_PCI_ADDR0_SPACE_MMIO64 OF_PCI_ADDR0_SPACE(3)
+#define OF_PCI_ADDR0_SPACE_MASK OF_PCI_ADDR0_SPACE(3)
+#define OF_PCI_ADDR0_RELOC (1UL<<31)
+#define OF_PCI_ADDR0_PREFETCH (1UL<<30)
+#define OF_PCI_ADDR0_ALIAS (1UL<<29)
+#define OF_PCI_ADDR0_BUS 0x00FF0000UL
+#define OF_PCI_ADDR0_DEV 0x0000F800UL
+#define OF_PCI_ADDR0_FN 0x00000700UL
+#define OF_PCI_ADDR0_BARREG 0x000000FFUL
+
unsigned int pci_parse_of_flags(u32 addr0, int bridge)
{
- unsigned int flags = 0;
+ unsigned int flags = 0, as = addr0 & OF_PCI_ADDR0_SPACE_MASK;
- if (addr0 & 0x02000000) {
+ if (as == OF_PCI_ADDR0_SPACE_MMIO32 || as == OF_PCI_ADDR0_SPACE_MMIO64) {
flags = IORESOURCE_MEM | PCI_BASE_ADDRESS_SPACE_MEMORY;
- flags |= (addr0 >> 22) & PCI_BASE_ADDRESS_MEM_TYPE_64;
- flags |= (addr0 >> 28) & PCI_BASE_ADDRESS_MEM_TYPE_1M;
- if (addr0 & 0x40000000)
- flags |= IORESOURCE_PREFETCH
- | PCI_BASE_ADDRESS_MEM_PREFETCH;
+
+ if (as == OF_PCI_ADDR0_SPACE_MMIO64)
+ flags |= PCI_BASE_ADDRESS_MEM_TYPE_64 | IORESOURCE_MEM_64;
+
+ if (addr0 & OF_PCI_ADDR0_ALIAS)
+ flags |= PCI_BASE_ADDRESS_MEM_TYPE_1M;
+
+ if (addr0 & OF_PCI_ADDR0_PREFETCH)
+ flags |= IORESOURCE_PREFETCH |
+ PCI_BASE_ADDRESS_MEM_PREFETCH;
+
/* Note: We don't know whether the ROM has been left enabled
* by the firmware or not. We mark it as disabled (ie, we do
* not set the IORESOURCE_ROM_ENABLE flag) for now rather than
* do a config space read, it will be force-enabled if needed
*/
- if (!bridge && (addr0 & 0xff) == 0x30)
+ if (!bridge && (addr0 & OF_PCI_ADDR0_BARREG) == PCI_ROM_ADDRESS)
flags |= IORESOURCE_READONLY;
- } else if (addr0 & 0x01000000)
+
+ } else if (as == OF_PCI_ADDR0_SPACE_IO)
flags = IORESOURCE_IO | PCI_BASE_ADDRESS_SPACE_IO;
+
if (flags)
flags |= IORESOURCE_SIZEALIGN;
+
return flags;
}
@@ -80,10 +123,16 @@ static void of_pci_parse_addrs(struct device_node *node, struct pci_dev *dev)
const __be32 *addrs;
u32 i;
int proplen;
+ bool mark_unset = false;
addrs = of_get_property(node, "assigned-addresses", &proplen);
- if (!addrs)
- return;
+ if (!addrs || !proplen) {
+ addrs = of_get_property(node, "reg", &proplen);
+ if (!addrs || !proplen)
+ return;
+ mark_unset = true;
+ }
+
pr_debug(" parse addresses (%d bytes) @ %p\n", proplen, addrs);
for (; proplen >= 20; proplen -= 20, addrs += 5) {
flags = pci_parse_of_flags(of_read_number(addrs, 1), 0);
@@ -102,12 +151,14 @@ static void of_pci_parse_addrs(struct device_node *node, struct pci_dev *dev)
res = &dev->resource[(i - PCI_BASE_ADDRESS_0) >> 2];
} else if (i == dev->rom_base_reg) {
res = &dev->resource[PCI_ROM_RESOURCE];
- flags |= IORESOURCE_READONLY | IORESOURCE_CACHEABLE;
+ flags |= IORESOURCE_READONLY;
} else {
printk(KERN_ERR "PCI: bad cfg reg num 0x%x\n", i);
continue;
}
res->flags = flags;
+ if (mark_unset)
+ res->flags |= IORESOURCE_UNSET;
res->name = pci_name(dev);
region.start = base;
region.end = base + size - 1;
@@ -125,17 +176,13 @@ struct pci_dev *of_create_pci_dev(struct device_node *node,
struct pci_bus *bus, int devfn)
{
struct pci_dev *dev;
- const char *type;
- struct pci_slot *slot;
dev = pci_alloc_dev(bus);
if (!dev)
return NULL;
- type = of_get_property(node, "device_type", NULL);
- if (type == NULL)
- type = "";
- pr_debug(" create device, devfn: %x, type: %s\n", devfn, type);
+ pr_debug(" create device, devfn: %x, type: %s\n", devfn,
+ of_node_get_device_type(node));
dev->dev.of_node = of_node_get(node);
dev->dev.parent = bus->bridge;
@@ -145,10 +192,7 @@ struct pci_dev *of_create_pci_dev(struct device_node *node,
dev->needs_freset = 0; /* pcie fundamental reset required */
set_pcie_port_type(dev);
- list_for_each_entry(slot, &dev->bus->slots, list)
- if (PCI_SLOT(dev->devfn) == slot->number)
- dev->slot = slot;
-
+ pci_dev_assign_slot(dev);
dev->vendor = get_int_prop(node, "vendor-id", 0xffff);
dev->device = get_int_prop(node, "device-id", 0xffff);
dev->subsystem_vendor = get_int_prop(node, "subsystem-vendor-id", 0);
@@ -171,18 +215,18 @@ struct pci_dev *of_create_pci_dev(struct device_node *node,
/* Early fixups, before probing the BARs */
pci_fixup_device(pci_fixup_early, dev);
- if (!strcmp(type, "pci") || !strcmp(type, "pciex")) {
+ if (of_node_is_type(node, "pci") || of_node_is_type(node, "pciex")) {
/* a PCI-PCI bridge */
dev->hdr_type = PCI_HEADER_TYPE_BRIDGE;
dev->rom_base_reg = PCI_ROM_ADDRESS1;
set_pcie_hotplug_bridge(dev);
- } else if (!strcmp(type, "cardbus")) {
+ } else if (of_node_is_type(node, "cardbus")) {
dev->hdr_type = PCI_HEADER_TYPE_CARDBUS;
} else {
dev->hdr_type = PCI_HEADER_TYPE_NORMAL;
dev->rom_base_reg = PCI_ROM_ADDRESS;
/* Maybe do a default OF mapping here */
- dev->irq = NO_IRQ;
+ dev->irq = 0;
}
of_pci_parse_addrs(node, dev);
@@ -200,13 +244,14 @@ EXPORT_SYMBOL(of_create_pci_dev);
* @dev: pci_dev structure for the bridge
*
* of_scan_bus() calls this routine for each PCI bridge that it finds, and
- * this routine in turn call of_scan_bus() recusively to scan for more child
+ * this routine in turn call of_scan_bus() recursively to scan for more child
* devices.
*/
void of_scan_pci_bridge(struct pci_dev *dev)
{
struct device_node *node = dev->dev.of_node;
struct pci_bus *bus;
+ struct pci_controller *phb;
const __be32 *busrange, *ranges;
int len, i, mode;
struct pci_bus_region region;
@@ -214,19 +259,19 @@ void of_scan_pci_bridge(struct pci_dev *dev)
unsigned int flags;
u64 size;
- pr_debug("of_scan_pci_bridge(%s)\n", node->full_name);
+ pr_debug("of_scan_pci_bridge(%pOF)\n", node);
/* parse bus-range property */
busrange = of_get_property(node, "bus-range", &len);
if (busrange == NULL || len != 8) {
- printk(KERN_DEBUG "Can't get bus-range for PCI-PCI bridge %s\n",
- node->full_name);
+ printk(KERN_DEBUG "Can't get bus-range for PCI-PCI bridge %pOF\n",
+ node);
return;
}
ranges = of_get_property(node, "ranges", &len);
if (ranges == NULL) {
- printk(KERN_DEBUG "Can't get ranges for PCI-PCI bridge %s\n",
- node->full_name);
+ printk(KERN_DEBUG "Can't get ranges for PCI-PCI bridge %pOF\n",
+ node);
return;
}
@@ -236,8 +281,8 @@ void of_scan_pci_bridge(struct pci_dev *dev)
bus = pci_add_new_bus(dev->bus, dev,
of_read_number(busrange, 1));
if (!bus) {
- printk(KERN_ERR "Failed to create pci bus for %s\n",
- node->full_name);
+ printk(KERN_ERR "Failed to create pci bus for %pOF\n",
+ node);
return;
}
}
@@ -265,13 +310,13 @@ void of_scan_pci_bridge(struct pci_dev *dev)
res = bus->resource[0];
if (res->flags) {
printk(KERN_ERR "PCI: ignoring extra I/O range"
- " for bridge %s\n", node->full_name);
+ " for bridge %pOF\n", node);
continue;
}
} else {
if (i >= PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES) {
printk(KERN_ERR "PCI: too many memory ranges"
- " for bridge %s\n", node->full_name);
+ " for bridge %pOF\n", node);
continue;
}
res = bus->resource[i];
@@ -286,9 +331,11 @@ void of_scan_pci_bridge(struct pci_dev *dev)
bus->number);
pr_debug(" bus name: %s\n", bus->name);
+ phb = pci_bus_to_host(bus);
+
mode = PCI_PROBE_NORMAL;
- if (ppc_md.pci_probe_mode)
- mode = ppc_md.pci_probe_mode(bus);
+ if (phb->controller_ops.probe_mode)
+ mode = phb->controller_ops.probe_mode(bus);
pr_debug(" probe mode: %d\n", mode);
if (mode == PCI_PROBE_DEVTREE)
@@ -305,10 +352,10 @@ static struct pci_dev *of_scan_pci_dev(struct pci_bus *bus,
const __be32 *reg;
int reglen, devfn;
#ifdef CONFIG_EEH
- struct eeh_dev *edev = of_node_to_eeh_dev(dn);
+ struct eeh_dev *edev = pdn_to_eeh_dev(PCI_DN(dn));
#endif
- pr_debug(" * %s\n", dn->full_name);
+ pr_debug(" * %pOF\n", dn);
if (!of_device_is_available(dn))
return NULL;
@@ -351,8 +398,8 @@ static void __of_scan_bus(struct device_node *node, struct pci_bus *bus,
struct device_node *child;
struct pci_dev *dev;
- pr_debug("of_scan_bus(%s) bus no %d...\n",
- node->full_name, bus->number);
+ pr_debug("of_scan_bus(%pOF) bus no %d...\n",
+ node, bus->number);
/* Scan direct children */
for_each_child_of_node(node, child) {
@@ -367,14 +414,10 @@ static void __of_scan_bus(struct device_node *node, struct pci_bus *bus,
*/
if (!rescan_existing)
pcibios_setup_bus_self(bus);
- pcibios_setup_bus_devices(bus);
/* Now scan child busses */
- list_for_each_entry(dev, &bus->devices, bus_list) {
- if (pci_is_bridge(dev)) {
- of_scan_pci_bridge(dev);
- }
- }
+ for_each_pci_bridge(dev, bus)
+ of_scan_pci_bridge(dev);
}
/**
diff --git a/arch/powerpc/kernel/pmc.c b/arch/powerpc/kernel/pmc.c
index 58eaa3ddf7b9..9fabb4d9235e 100644
--- a/arch/powerpc/kernel/pmc.c
+++ b/arch/powerpc/kernel/pmc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* arch/powerpc/kernel/pmc.c
*
@@ -5,11 +6,6 @@
* Includes code formerly from arch/ppc/kernel/perfmon.c:
* Author: Andy Fleming
* Copyright (c) 2004 Freescale Semiconductor, Inc
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/errno.h>
@@ -29,7 +25,7 @@ static void dummy_perf(struct pt_regs *regs)
{
#if defined(CONFIG_FSL_EMB_PERFMON)
mtpmr(PMRN_PMGC0, mfpmr(PMRN_PMGC0) & ~PMGC0_PMIE);
-#elif defined(CONFIG_PPC64) || defined(CONFIG_6xx)
+#elif defined(CONFIG_PPC64) || defined(CONFIG_PPC_BOOK3S_32)
if (cur_cpu_spec->pmc_type == PPC_PMC_IBM)
mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~(MMCR0_PMXE|MMCR0_PMAO));
#else
@@ -78,7 +74,7 @@ void release_pmc_hardware(void)
}
EXPORT_SYMBOL_GPL(release_pmc_hardware);
-#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC_BOOK3S_64
void power4_enable_pmcs(void)
{
unsigned long hid0;
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
deleted file mode 100644
index 48d17d6fca5b..000000000000
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ /dev/null
@@ -1,209 +0,0 @@
-#include <linux/export.h>
-#include <linux/threads.h>
-#include <linux/smp.h>
-#include <linux/sched.h>
-#include <linux/elfcore.h>
-#include <linux/string.h>
-#include <linux/interrupt.h>
-#include <linux/screen_info.h>
-#include <linux/vt_kern.h>
-#include <linux/nvram.h>
-#include <linux/irq.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
-#include <linux/bitops.h>
-
-#include <asm/page.h>
-#include <asm/processor.h>
-#include <asm/cacheflush.h>
-#include <asm/uaccess.h>
-#include <asm/io.h>
-#include <linux/atomic.h>
-#include <asm/checksum.h>
-#include <asm/pgtable.h>
-#include <asm/tlbflush.h>
-#include <linux/adb.h>
-#include <linux/cuda.h>
-#include <linux/pmu.h>
-#include <asm/prom.h>
-#include <asm/pci-bridge.h>
-#include <asm/irq.h>
-#include <asm/pmac_feature.h>
-#include <asm/dma.h>
-#include <asm/machdep.h>
-#include <asm/hw_irq.h>
-#include <asm/nvram.h>
-#include <asm/mmu_context.h>
-#include <asm/backlight.h>
-#include <asm/time.h>
-#include <asm/cputable.h>
-#include <asm/btext.h>
-#include <asm/div64.h>
-#include <asm/signal.h>
-#include <asm/dcr.h>
-#include <asm/ftrace.h>
-#include <asm/switch_to.h>
-#include <asm/epapr_hcalls.h>
-
-#ifdef CONFIG_PPC32
-extern void transfer_to_handler(void);
-extern void do_IRQ(struct pt_regs *regs);
-extern void machine_check_exception(struct pt_regs *regs);
-extern void alignment_exception(struct pt_regs *regs);
-extern void program_check_exception(struct pt_regs *regs);
-extern void single_step_exception(struct pt_regs *regs);
-extern int sys_sigreturn(struct pt_regs *regs);
-
-EXPORT_SYMBOL(clear_pages);
-EXPORT_SYMBOL(ISA_DMA_THRESHOLD);
-EXPORT_SYMBOL(DMA_MODE_READ);
-EXPORT_SYMBOL(DMA_MODE_WRITE);
-
-EXPORT_SYMBOL(transfer_to_handler);
-EXPORT_SYMBOL(do_IRQ);
-EXPORT_SYMBOL(machine_check_exception);
-EXPORT_SYMBOL(alignment_exception);
-EXPORT_SYMBOL(program_check_exception);
-EXPORT_SYMBOL(single_step_exception);
-EXPORT_SYMBOL(sys_sigreturn);
-#endif
-
-#ifdef CONFIG_FUNCTION_TRACER
-EXPORT_SYMBOL(_mcount);
-#endif
-
-EXPORT_SYMBOL(strcpy);
-EXPORT_SYMBOL(strncpy);
-EXPORT_SYMBOL(strcat);
-EXPORT_SYMBOL(strlen);
-EXPORT_SYMBOL(strcmp);
-EXPORT_SYMBOL(strncmp);
-
-#ifndef CONFIG_GENERIC_CSUM
-EXPORT_SYMBOL(csum_partial);
-EXPORT_SYMBOL(csum_partial_copy_generic);
-EXPORT_SYMBOL(ip_fast_csum);
-EXPORT_SYMBOL(csum_tcpudp_magic);
-#endif
-
-EXPORT_SYMBOL(__copy_tofrom_user);
-EXPORT_SYMBOL(__clear_user);
-EXPORT_SYMBOL(copy_page);
-
-#if defined(CONFIG_PCI) && defined(CONFIG_PPC32)
-EXPORT_SYMBOL(isa_io_base);
-EXPORT_SYMBOL(isa_mem_base);
-EXPORT_SYMBOL(pci_dram_offset);
-#endif /* CONFIG_PCI */
-
-EXPORT_SYMBOL(start_thread);
-
-#ifdef CONFIG_PPC_FPU
-EXPORT_SYMBOL(giveup_fpu);
-EXPORT_SYMBOL(load_fp_state);
-EXPORT_SYMBOL(store_fp_state);
-#endif
-#ifdef CONFIG_ALTIVEC
-EXPORT_SYMBOL(giveup_altivec);
-EXPORT_SYMBOL(load_vr_state);
-EXPORT_SYMBOL(store_vr_state);
-#endif /* CONFIG_ALTIVEC */
-#ifdef CONFIG_VSX
-EXPORT_SYMBOL(giveup_vsx);
-EXPORT_SYMBOL_GPL(__giveup_vsx);
-#endif /* CONFIG_VSX */
-#ifdef CONFIG_SPE
-EXPORT_SYMBOL(giveup_spe);
-#endif /* CONFIG_SPE */
-
-#ifndef CONFIG_PPC64
-EXPORT_SYMBOL(flush_instruction_cache);
-#endif
-EXPORT_SYMBOL(flush_dcache_range);
-EXPORT_SYMBOL(flush_icache_range);
-
-#ifdef CONFIG_SMP
-#ifdef CONFIG_PPC32
-EXPORT_SYMBOL(smp_hw_index);
-#endif
-#endif
-
-#ifdef CONFIG_ADB
-EXPORT_SYMBOL(adb_request);
-EXPORT_SYMBOL(adb_register);
-EXPORT_SYMBOL(adb_unregister);
-EXPORT_SYMBOL(adb_poll);
-EXPORT_SYMBOL(adb_try_handler_change);
-#endif /* CONFIG_ADB */
-#ifdef CONFIG_ADB_CUDA
-EXPORT_SYMBOL(cuda_request);
-EXPORT_SYMBOL(cuda_poll);
-#endif /* CONFIG_ADB_CUDA */
-EXPORT_SYMBOL(to_tm);
-
-#ifdef CONFIG_PPC32
-long long __ashrdi3(long long, int);
-long long __ashldi3(long long, int);
-long long __lshrdi3(long long, int);
-EXPORT_SYMBOL(__ashrdi3);
-EXPORT_SYMBOL(__ashldi3);
-EXPORT_SYMBOL(__lshrdi3);
-int __ucmpdi2(unsigned long long, unsigned long long);
-EXPORT_SYMBOL(__ucmpdi2);
-int __cmpdi2(long long, long long);
-EXPORT_SYMBOL(__cmpdi2);
-#endif
-long long __bswapdi2(long long);
-EXPORT_SYMBOL(__bswapdi2);
-EXPORT_SYMBOL(memcpy);
-EXPORT_SYMBOL(memset);
-EXPORT_SYMBOL(memmove);
-EXPORT_SYMBOL(memcmp);
-EXPORT_SYMBOL(memchr);
-
-#if defined(CONFIG_FB_VGA16_MODULE)
-EXPORT_SYMBOL(screen_info);
-#endif
-
-#ifdef CONFIG_PPC32
-EXPORT_SYMBOL(timer_interrupt);
-EXPORT_SYMBOL(tb_ticks_per_jiffy);
-EXPORT_SYMBOL(cacheable_memcpy);
-EXPORT_SYMBOL(cacheable_memzero);
-#endif
-
-#ifdef CONFIG_PPC32
-EXPORT_SYMBOL(switch_mmu_context);
-#endif
-
-#ifdef CONFIG_PPC_STD_MMU_32
-extern long mol_trampoline;
-EXPORT_SYMBOL(mol_trampoline); /* For MOL */
-EXPORT_SYMBOL(flush_hash_pages); /* For MOL */
-#ifdef CONFIG_SMP
-extern int mmu_hash_lock;
-EXPORT_SYMBOL(mmu_hash_lock); /* For MOL */
-#endif /* CONFIG_SMP */
-extern long *intercept_table;
-EXPORT_SYMBOL(intercept_table);
-#endif /* CONFIG_PPC_STD_MMU_32 */
-#ifdef CONFIG_PPC_DCR_NATIVE
-EXPORT_SYMBOL(__mtdcr);
-EXPORT_SYMBOL(__mfdcr);
-#endif
-EXPORT_SYMBOL(empty_zero_page);
-
-#ifdef CONFIG_PPC64
-EXPORT_SYMBOL(__arch_hweight8);
-EXPORT_SYMBOL(__arch_hweight16);
-EXPORT_SYMBOL(__arch_hweight32);
-EXPORT_SYMBOL(__arch_hweight64);
-#endif
-
-#ifdef CONFIG_PPC_BOOK3S_64
-EXPORT_SYMBOL_GPL(mmu_psize_defs);
-#endif
-
-#ifdef CONFIG_EPAPR_PARAVIRT
-EXPORT_SYMBOL(epapr_hypercall_start);
-#endif
diff --git a/arch/powerpc/kernel/ppc_save_regs.S b/arch/powerpc/kernel/ppc_save_regs.S
index 1b1787d52896..a9b9c32d0c1f 100644
--- a/arch/powerpc/kernel/ppc_save_regs.S
+++ b/arch/powerpc/kernel/ppc_save_regs.S
@@ -1,17 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (C) 1996 Paul Mackerras.
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* NOTE: assert(sizeof(buf) > 23 * sizeof(long))
*/
#include <asm/processor.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/ptrace.h>
+#include <asm/asm-compat.h>
/*
* Grab the register values as they are now.
@@ -24,52 +21,33 @@
* different ABIs, though).
*/
_GLOBAL(ppc_save_regs)
- PPC_STL r0,0*SZL(r3)
- PPC_STL r2,2*SZL(r3)
- PPC_STL r3,3*SZL(r3)
- PPC_STL r4,4*SZL(r3)
- PPC_STL r5,5*SZL(r3)
- PPC_STL r6,6*SZL(r3)
- PPC_STL r7,7*SZL(r3)
- PPC_STL r8,8*SZL(r3)
- PPC_STL r9,9*SZL(r3)
- PPC_STL r10,10*SZL(r3)
- PPC_STL r11,11*SZL(r3)
- PPC_STL r12,12*SZL(r3)
- PPC_STL r13,13*SZL(r3)
- PPC_STL r14,14*SZL(r3)
- PPC_STL r15,15*SZL(r3)
- PPC_STL r16,16*SZL(r3)
- PPC_STL r17,17*SZL(r3)
- PPC_STL r18,18*SZL(r3)
- PPC_STL r19,19*SZL(r3)
- PPC_STL r20,20*SZL(r3)
- PPC_STL r21,21*SZL(r3)
- PPC_STL r22,22*SZL(r3)
- PPC_STL r23,23*SZL(r3)
- PPC_STL r24,24*SZL(r3)
- PPC_STL r25,25*SZL(r3)
- PPC_STL r26,26*SZL(r3)
- PPC_STL r27,27*SZL(r3)
- PPC_STL r28,28*SZL(r3)
- PPC_STL r29,29*SZL(r3)
- PPC_STL r30,30*SZL(r3)
- PPC_STL r31,31*SZL(r3)
- /* go up one stack frame for SP */
- PPC_LL r4,0(r1)
- PPC_STL r4,1*SZL(r3)
+ /* This allows stack frame accessor macros and offsets to be used */
+ subi r3,r3,STACK_INT_FRAME_REGS
+ PPC_STL r0,GPR0(r3)
+#ifdef CONFIG_PPC32
+ stmw r2,GPR2(r3)
+#else
+ SAVE_GPRS(2, 31, r3)
+ lbz r0,PACAIRQSOFTMASK(r13)
+ PPC_STL r0,SOFTE(r3)
+#endif
+ /* store current SP */
+ PPC_STL r1,GPR1(r3)
/* get caller's LR */
+ PPC_LL r4,0(r1)
PPC_LL r0,LRSAVE(r4)
- PPC_STL r0,_NIP-STACK_FRAME_OVERHEAD(r3)
- PPC_STL r0,_LINK-STACK_FRAME_OVERHEAD(r3)
+ PPC_STL r0,_LINK(r3)
+ mflr r0
+ PPC_STL r0,_NIP(r3)
mfmsr r0
- PPC_STL r0,_MSR-STACK_FRAME_OVERHEAD(r3)
+ PPC_STL r0,_MSR(r3)
mfctr r0
- PPC_STL r0,_CTR-STACK_FRAME_OVERHEAD(r3)
+ PPC_STL r0,_CTR(r3)
mfxer r0
- PPC_STL r0,_XER-STACK_FRAME_OVERHEAD(r3)
+ PPC_STL r0,_XER(r3)
mfcr r0
- PPC_STL r0,_CCR-STACK_FRAME_OVERHEAD(r3)
+ PPC_STL r0,_CCR(r3)
li r0,0
- PPC_STL r0,_TRAP-STACK_FRAME_OVERHEAD(r3)
+ PPC_STL r0,_TRAP(r3)
+ PPC_STL r0,ORIG_GPR3(r3)
blr
diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c
index c30612aad68e..d083b4517065 100644
--- a/arch/powerpc/kernel/proc_powerpc.c
+++ b/arch/powerpc/kernel/proc_powerpc.c
@@ -1,33 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2001 Mike Corrigan & Dave Engebretsen IBM Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/init.h>
+#include <linux/memblock.h>
#include <linux/mm.h>
#include <linux/proc_fs.h>
#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/string.h>
#include <asm/machdep.h>
#include <asm/vdso_datapage.h>
#include <asm/rtas.h>
-#include <asm/uaccess.h>
-#include <asm/prom.h>
+#include <asm/systemcfg.h>
+#include <linux/uaccess.h>
-#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC64_PROC_SYSTEMCFG
static loff_t page_map_seek(struct file *file, loff_t off, int whence)
{
@@ -38,7 +28,7 @@ static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes
loff_t *ppos)
{
return simple_read_from_buffer(buf, nbytes, ppos,
- PDE_DATA(file_inode(file)), PAGE_SIZE);
+ pde_data(file_inode(file)), PAGE_SIZE);
}
static int page_map_mmap( struct file *file, struct vm_area_struct *vma )
@@ -46,25 +36,46 @@ static int page_map_mmap( struct file *file, struct vm_area_struct *vma )
if ((vma->vm_end - vma->vm_start) > PAGE_SIZE)
return -EINVAL;
- remap_pfn_range(vma, vma->vm_start,
- __pa(PDE_DATA(file_inode(file))) >> PAGE_SHIFT,
- PAGE_SIZE, vma->vm_page_prot);
- return 0;
+ return remap_pfn_range(vma, vma->vm_start,
+ __pa(pde_data(file_inode(file))) >> PAGE_SHIFT,
+ PAGE_SIZE, vma->vm_page_prot);
}
-static const struct file_operations page_map_fops = {
- .llseek = page_map_seek,
- .read = page_map_read,
- .mmap = page_map_mmap
+static const struct proc_ops page_map_proc_ops = {
+ .proc_lseek = page_map_seek,
+ .proc_read = page_map_read,
+ .proc_mmap = page_map_mmap,
};
+static union {
+ struct systemcfg data;
+ u8 page[PAGE_SIZE];
+} systemcfg_data_store __page_aligned_data;
+struct systemcfg *systemcfg = &systemcfg_data_store.data;
static int __init proc_ppc64_init(void)
{
struct proc_dir_entry *pde;
- pde = proc_create_data("powerpc/systemcfg", S_IFREG|S_IRUGO, NULL,
- &page_map_fops, vdso_data);
+ strscpy(systemcfg->eye_catcher, "SYSTEMCFG:PPC64");
+ systemcfg->version.major = SYSTEMCFG_MAJOR;
+ systemcfg->version.minor = SYSTEMCFG_MINOR;
+ systemcfg->processor = mfspr(SPRN_PVR);
+ /*
+ * Fake the old platform number for pSeries and add
+ * in LPAR bit if necessary
+ */
+ systemcfg->platform = 0x100;
+ if (firmware_has_feature(FW_FEATURE_LPAR))
+ systemcfg->platform |= 1;
+ systemcfg->physicalMemorySize = memblock_phys_mem_size();
+ systemcfg->dcache_size = ppc64_caches.l1d.size;
+ systemcfg->dcache_line_size = ppc64_caches.l1d.line_size;
+ systemcfg->icache_size = ppc64_caches.l1i.size;
+ systemcfg->icache_line_size = ppc64_caches.l1i.line_size;
+
+ pde = proc_create_data("powerpc/systemcfg", S_IFREG | 0444, NULL,
+ &page_map_proc_ops, systemcfg);
if (!pde)
return 1;
proc_set_size(pde, PAGE_SIZE);
@@ -73,7 +84,7 @@ static int __init proc_ppc64_init(void)
}
__initcall(proc_ppc64_init);
-#endif /* CONFIG_PPC64 */
+#endif /* CONFIG_PPC64_PROC_SYSTEMCFG */
/*
* Create the ppc64 and ppc64/rtas directories early. This allows us to
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index bf44ae962ab8..eb23966ac0a9 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Derived from "arch/i386/kernel/process.c"
* Copyright (C) 1995 Linus Torvalds
@@ -7,15 +8,13 @@
*
* PowerPC version
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/errno.h>
#include <linux/sched.h>
+#include <linux/sched/debug.h>
+#include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/smp.h>
@@ -35,15 +34,15 @@
#include <linux/ftrace.h>
#include <linux/kernel_stat.h>
#include <linux/personality.h>
-#include <linux/random.h>
#include <linux/hw_breakpoint.h>
+#include <linux/uaccess.h>
+#include <linux/pkeys.h>
+#include <linux/seq_buf.h>
-#include <asm/pgtable.h>
-#include <asm/uaccess.h>
+#include <asm/interrupt.h>
#include <asm/io.h>
#include <asm/processor.h>
#include <asm/mmu.h>
-#include <asm/prom.h>
#include <asm/machdep.h>
#include <asm/time.h>
#include <asm/runlatch.h>
@@ -53,8 +52,16 @@
#include <asm/debug.h>
#ifdef CONFIG_PPC64
#include <asm/firmware.h>
+#include <asm/hw_irq.h>
#endif
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
+#include <asm/exec.h>
+#include <asm/livepatch.h>
+#include <asm/cpu_has_feature.h>
+#include <asm/asm-prototypes.h>
+#include <asm/stacktrace.h>
+#include <asm/hw_breakpoint.h>
+
#include <linux/kprobes.h>
#include <linux/kdebug.h>
@@ -65,17 +72,15 @@
#define TM_DEBUG(x...) do { } while(0)
#endif
-extern unsigned long _get_SP(void);
-
-#ifndef CONFIG_SMP
-struct task_struct *last_task_used_math = NULL;
-struct task_struct *last_task_used_altivec = NULL;
-struct task_struct *last_task_used_vsx = NULL;
-struct task_struct *last_task_used_spe = NULL;
-#endif
-
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-void giveup_fpu_maybe_transactional(struct task_struct *tsk)
+/*
+ * Are we running in "Suspend disabled" mode? If so we have to block any
+ * sigreturn that would get us into suspended state, and we also warn in some
+ * other paths that we should never reach with suspend disabled.
+ */
+bool tm_suspend_disabled __ro_after_init = false;
+
+static void check_if_tm_restore_required(struct task_struct *tsk)
{
/*
* If we are saving the current thread's registers, and the
@@ -86,37 +91,85 @@ void giveup_fpu_maybe_transactional(struct task_struct *tsk)
if (tsk == current && tsk->thread.regs &&
MSR_TM_ACTIVE(tsk->thread.regs->msr) &&
!test_thread_flag(TIF_RESTORE_TM)) {
- tsk->thread.tm_orig_msr = tsk->thread.regs->msr;
+ regs_set_return_msr(&tsk->thread.ckpt_regs,
+ tsk->thread.regs->msr);
set_thread_flag(TIF_RESTORE_TM);
}
+}
+
+#else
+static inline void check_if_tm_restore_required(struct task_struct *tsk) { }
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
- giveup_fpu(tsk);
+bool strict_msr_control;
+EXPORT_SYMBOL(strict_msr_control);
+
+static int __init enable_strict_msr_control(char *str)
+{
+ strict_msr_control = true;
+ pr_info("Enabling strict facility control\n");
+
+ return 0;
}
+early_param("ppc_strict_facility_enable", enable_strict_msr_control);
-void giveup_altivec_maybe_transactional(struct task_struct *tsk)
+/* notrace because it's called by restore_math */
+unsigned long notrace msr_check_and_set(unsigned long bits)
{
- /*
- * If we are saving the current thread's registers, and the
- * thread is in a transactional state, set the TIF_RESTORE_TM
- * bit so that we know to restore the registers before
- * returning to userspace.
- */
- if (tsk == current && tsk->thread.regs &&
- MSR_TM_ACTIVE(tsk->thread.regs->msr) &&
- !test_thread_flag(TIF_RESTORE_TM)) {
- tsk->thread.tm_orig_msr = tsk->thread.regs->msr;
- set_thread_flag(TIF_RESTORE_TM);
- }
+ unsigned long oldmsr = mfmsr();
+ unsigned long newmsr;
+
+ newmsr = oldmsr | bits;
+
+ if (cpu_has_feature(CPU_FTR_VSX) && (bits & MSR_FP))
+ newmsr |= MSR_VSX;
- giveup_altivec(tsk);
+ if (oldmsr != newmsr)
+ newmsr = mtmsr_isync_irqsafe(newmsr);
+
+ return newmsr;
}
+EXPORT_SYMBOL_GPL(msr_check_and_set);
-#else
-#define giveup_fpu_maybe_transactional(tsk) giveup_fpu(tsk)
-#define giveup_altivec_maybe_transactional(tsk) giveup_altivec(tsk)
-#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+/* notrace because it's called by restore_math */
+void notrace __msr_check_and_clear(unsigned long bits)
+{
+ unsigned long oldmsr = mfmsr();
+ unsigned long newmsr;
+
+ newmsr = oldmsr & ~bits;
+
+ if (cpu_has_feature(CPU_FTR_VSX) && (bits & MSR_FP))
+ newmsr &= ~MSR_VSX;
+
+ if (oldmsr != newmsr)
+ mtmsr_isync_irqsafe(newmsr);
+}
+EXPORT_SYMBOL(__msr_check_and_clear);
#ifdef CONFIG_PPC_FPU
+static void __giveup_fpu(struct task_struct *tsk)
+{
+ unsigned long msr;
+
+ save_fpu(tsk);
+ msr = tsk->thread.regs->msr;
+ msr &= ~(MSR_FP|MSR_FE0|MSR_FE1);
+ if (cpu_has_feature(CPU_FTR_VSX))
+ msr &= ~MSR_VSX;
+ regs_set_return_msr(tsk->thread.regs, msr);
+}
+
+void giveup_fpu(struct task_struct *tsk)
+{
+ check_if_tm_restore_required(tsk);
+
+ msr_check_and_set(MSR_FP);
+ __giveup_fpu(tsk);
+ msr_check_and_clear(MSR_FP);
+}
+EXPORT_SYMBOL(giveup_fpu);
+
/*
* Make sure the floating-point register state in the
* the thread_struct is up to date for task tsk.
@@ -134,52 +187,94 @@ void flush_fp_to_thread(struct task_struct *tsk)
*/
preempt_disable();
if (tsk->thread.regs->msr & MSR_FP) {
-#ifdef CONFIG_SMP
/*
* This should only ever be called for current or
* for a stopped child process. Since we save away
- * the FP register state on context switch on SMP,
+ * the FP register state on context switch,
* there is something wrong if a stopped child appears
* to still have its FP state in the CPU registers.
*/
BUG_ON(tsk != current);
-#endif
- giveup_fpu_maybe_transactional(tsk);
+ giveup_fpu(tsk);
}
preempt_enable();
}
}
EXPORT_SYMBOL_GPL(flush_fp_to_thread);
-#endif /* CONFIG_PPC_FPU */
void enable_kernel_fp(void)
{
+ unsigned long cpumsr;
+
WARN_ON(preemptible());
-#ifdef CONFIG_SMP
- if (current->thread.regs && (current->thread.regs->msr & MSR_FP))
- giveup_fpu_maybe_transactional(current);
- else
- giveup_fpu(NULL); /* just enables FP for kernel */
-#else
- giveup_fpu_maybe_transactional(last_task_used_math);
-#endif /* CONFIG_SMP */
+ cpumsr = msr_check_and_set(MSR_FP);
+
+ if (current->thread.regs && (current->thread.regs->msr & MSR_FP)) {
+ check_if_tm_restore_required(current);
+ /*
+ * If a thread has already been reclaimed then the
+ * checkpointed registers are on the CPU but have definitely
+ * been saved by the reclaim code. Don't need to and *cannot*
+ * giveup as this would save to the 'live' structure not the
+ * checkpointed structure.
+ */
+ if (!MSR_TM_ACTIVE(cpumsr) &&
+ MSR_TM_ACTIVE(current->thread.regs->msr))
+ return;
+ __giveup_fpu(current);
+ }
}
EXPORT_SYMBOL(enable_kernel_fp);
+#else
+static inline void __giveup_fpu(struct task_struct *tsk) { }
+#endif /* CONFIG_PPC_FPU */
#ifdef CONFIG_ALTIVEC
+static void __giveup_altivec(struct task_struct *tsk)
+{
+ unsigned long msr;
+
+ save_altivec(tsk);
+ msr = tsk->thread.regs->msr;
+ msr &= ~MSR_VEC;
+ if (cpu_has_feature(CPU_FTR_VSX))
+ msr &= ~MSR_VSX;
+ regs_set_return_msr(tsk->thread.regs, msr);
+}
+
+void giveup_altivec(struct task_struct *tsk)
+{
+ check_if_tm_restore_required(tsk);
+
+ msr_check_and_set(MSR_VEC);
+ __giveup_altivec(tsk);
+ msr_check_and_clear(MSR_VEC);
+}
+EXPORT_SYMBOL(giveup_altivec);
+
void enable_kernel_altivec(void)
{
+ unsigned long cpumsr;
+
WARN_ON(preemptible());
-#ifdef CONFIG_SMP
- if (current->thread.regs && (current->thread.regs->msr & MSR_VEC))
- giveup_altivec_maybe_transactional(current);
- else
- giveup_altivec_notask();
-#else
- giveup_altivec_maybe_transactional(last_task_used_altivec);
-#endif /* CONFIG_SMP */
+ cpumsr = msr_check_and_set(MSR_VEC);
+
+ if (current->thread.regs && (current->thread.regs->msr & MSR_VEC)) {
+ check_if_tm_restore_required(current);
+ /*
+ * If a thread has already been reclaimed then the
+ * checkpointed registers are on the CPU but have definitely
+ * been saved by the reclaim code. Don't need to and *cannot*
+ * giveup as this would save to the 'live' structure not the
+ * checkpointed structure.
+ */
+ if (!MSR_TM_ACTIVE(cpumsr) &&
+ MSR_TM_ACTIVE(current->thread.regs->msr))
+ return;
+ __giveup_altivec(current);
+ }
}
EXPORT_SYMBOL(enable_kernel_altivec);
@@ -192,10 +287,8 @@ void flush_altivec_to_thread(struct task_struct *tsk)
if (tsk->thread.regs) {
preempt_disable();
if (tsk->thread.regs->msr & MSR_VEC) {
-#ifdef CONFIG_SMP
BUG_ON(tsk != current);
-#endif
- giveup_altivec_maybe_transactional(tsk);
+ giveup_altivec(tsk);
}
preempt_enable();
}
@@ -204,39 +297,64 @@ EXPORT_SYMBOL_GPL(flush_altivec_to_thread);
#endif /* CONFIG_ALTIVEC */
#ifdef CONFIG_VSX
-#if 0
-/* not currently used, but some crazy RAID module might want to later */
-void enable_kernel_vsx(void)
+static void __giveup_vsx(struct task_struct *tsk)
{
- WARN_ON(preemptible());
+ unsigned long msr = tsk->thread.regs->msr;
-#ifdef CONFIG_SMP
- if (current->thread.regs && (current->thread.regs->msr & MSR_VSX))
- giveup_vsx(current);
- else
- giveup_vsx(NULL); /* just enable vsx for kernel - force */
-#else
- giveup_vsx(last_task_used_vsx);
-#endif /* CONFIG_SMP */
+ /*
+ * We should never be setting MSR_VSX without also setting
+ * MSR_FP and MSR_VEC
+ */
+ WARN_ON((msr & MSR_VSX) && !((msr & MSR_FP) && (msr & MSR_VEC)));
+
+ /* __giveup_fpu will clear MSR_VSX */
+ if (msr & MSR_FP)
+ __giveup_fpu(tsk);
+ if (msr & MSR_VEC)
+ __giveup_altivec(tsk);
}
-EXPORT_SYMBOL(enable_kernel_vsx);
-#endif
-void giveup_vsx(struct task_struct *tsk)
+static void giveup_vsx(struct task_struct *tsk)
{
- giveup_fpu_maybe_transactional(tsk);
- giveup_altivec_maybe_transactional(tsk);
+ check_if_tm_restore_required(tsk);
+
+ msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX);
__giveup_vsx(tsk);
+ msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX);
}
+void enable_kernel_vsx(void)
+{
+ unsigned long cpumsr;
+
+ WARN_ON(preemptible());
+
+ cpumsr = msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX);
+
+ if (current->thread.regs &&
+ (current->thread.regs->msr & (MSR_VSX|MSR_VEC|MSR_FP))) {
+ check_if_tm_restore_required(current);
+ /*
+ * If a thread has already been reclaimed then the
+ * checkpointed registers are on the CPU but have definitely
+ * been saved by the reclaim code. Don't need to and *cannot*
+ * giveup as this would save to the 'live' structure not the
+ * checkpointed structure.
+ */
+ if (!MSR_TM_ACTIVE(cpumsr) &&
+ MSR_TM_ACTIVE(current->thread.regs->msr))
+ return;
+ __giveup_vsx(current);
+ }
+}
+EXPORT_SYMBOL(enable_kernel_vsx);
+
void flush_vsx_to_thread(struct task_struct *tsk)
{
if (tsk->thread.regs) {
preempt_disable();
- if (tsk->thread.regs->msr & MSR_VSX) {
-#ifdef CONFIG_SMP
+ if (tsk->thread.regs->msr & (MSR_VSX|MSR_VEC|MSR_FP)) {
BUG_ON(tsk != current);
-#endif
giveup_vsx(tsk);
}
preempt_enable();
@@ -246,19 +364,26 @@ EXPORT_SYMBOL_GPL(flush_vsx_to_thread);
#endif /* CONFIG_VSX */
#ifdef CONFIG_SPE
+void giveup_spe(struct task_struct *tsk)
+{
+ check_if_tm_restore_required(tsk);
+
+ msr_check_and_set(MSR_SPE);
+ __giveup_spe(tsk);
+ msr_check_and_clear(MSR_SPE);
+}
+EXPORT_SYMBOL(giveup_spe);
void enable_kernel_spe(void)
{
WARN_ON(preemptible());
-#ifdef CONFIG_SMP
- if (current->thread.regs && (current->thread.regs->msr & MSR_SPE))
- giveup_spe(current);
- else
- giveup_spe(NULL); /* just enable SPE for kernel - force */
-#else
- giveup_spe(last_task_used_spe);
-#endif /* __SMP __ */
+ msr_check_and_set(MSR_SPE);
+
+ if (current->thread.regs && (current->thread.regs->msr & MSR_SPE)) {
+ check_if_tm_restore_required(current);
+ __giveup_spe(current);
+ }
}
EXPORT_SYMBOL(enable_kernel_spe);
@@ -267,9 +392,7 @@ void flush_spe_to_thread(struct task_struct *tsk)
if (tsk->thread.regs) {
preempt_disable();
if (tsk->thread.regs->msr & MSR_SPE) {
-#ifdef CONFIG_SMP
BUG_ON(tsk != current);
-#endif
tsk->thread.spefscr = mfspr(SPRN_SPEFSCR);
giveup_spe(tsk);
}
@@ -278,77 +401,287 @@ void flush_spe_to_thread(struct task_struct *tsk)
}
#endif /* CONFIG_SPE */
-#ifndef CONFIG_SMP
-/*
- * If we are doing lazy switching of CPU state (FP, altivec or SPE),
- * and the current task has some state, discard it.
- */
-void discard_lazy_cpu_state(void)
+static unsigned long msr_all_available;
+
+static int __init init_msr_all_available(void)
{
- preempt_disable();
- if (last_task_used_math == current)
- last_task_used_math = NULL;
+ if (IS_ENABLED(CONFIG_PPC_FPU))
+ msr_all_available |= MSR_FP;
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ msr_all_available |= MSR_VEC;
+ if (cpu_has_feature(CPU_FTR_VSX))
+ msr_all_available |= MSR_VSX;
+ if (cpu_has_feature(CPU_FTR_SPE))
+ msr_all_available |= MSR_SPE;
+
+ return 0;
+}
+early_initcall(init_msr_all_available);
+
+void giveup_all(struct task_struct *tsk)
+{
+ unsigned long usermsr;
+
+ if (!tsk->thread.regs)
+ return;
+
+ check_if_tm_restore_required(tsk);
+
+ usermsr = tsk->thread.regs->msr;
+
+ if ((usermsr & msr_all_available) == 0)
+ return;
+
+ msr_check_and_set(msr_all_available);
+
+ WARN_ON((usermsr & MSR_VSX) && !((usermsr & MSR_FP) && (usermsr & MSR_VEC)));
+
+ if (usermsr & MSR_FP)
+ __giveup_fpu(tsk);
+ if (usermsr & MSR_VEC)
+ __giveup_altivec(tsk);
+ if (usermsr & MSR_SPE)
+ __giveup_spe(tsk);
+
+ msr_check_and_clear(msr_all_available);
+}
+EXPORT_SYMBOL(giveup_all);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_FPU
+static bool should_restore_fp(void)
+{
+ if (current->thread.load_fp) {
+ current->thread.load_fp++;
+ return true;
+ }
+ return false;
+}
+
+static void do_restore_fp(void)
+{
+ load_fp_state(&current->thread.fp_state);
+}
+#else
+static bool should_restore_fp(void) { return false; }
+static void do_restore_fp(void) { }
+#endif /* CONFIG_PPC_FPU */
+
#ifdef CONFIG_ALTIVEC
- if (last_task_used_altivec == current)
- last_task_used_altivec = NULL;
+static bool should_restore_altivec(void)
+{
+ if (cpu_has_feature(CPU_FTR_ALTIVEC) && (current->thread.load_vec)) {
+ current->thread.load_vec++;
+ return true;
+ }
+ return false;
+}
+
+static void do_restore_altivec(void)
+{
+ load_vr_state(&current->thread.vr_state);
+ current->thread.used_vr = 1;
+}
+#else
+static bool should_restore_altivec(void) { return false; }
+static void do_restore_altivec(void) { }
#endif /* CONFIG_ALTIVEC */
+
+static bool should_restore_vsx(void)
+{
+ if (cpu_has_feature(CPU_FTR_VSX))
+ return true;
+ return false;
+}
#ifdef CONFIG_VSX
- if (last_task_used_vsx == current)
- last_task_used_vsx = NULL;
+static void do_restore_vsx(void)
+{
+ current->thread.used_vsr = 1;
+}
+#else
+static void do_restore_vsx(void) { }
#endif /* CONFIG_VSX */
+
+/*
+ * The exception exit path calls restore_math() with interrupts hard disabled
+ * but the soft irq state not "reconciled". ftrace code that calls
+ * local_irq_save/restore causes warnings.
+ *
+ * Rather than complicate the exit path, just don't trace restore_math. This
+ * could be done by having ftrace entry code check for this un-reconciled
+ * condition where MSR[EE]=0 and PACA_IRQ_HARD_DIS is not set, and
+ * temporarily fix it up for the duration of the ftrace call.
+ */
+void notrace restore_math(struct pt_regs *regs)
+{
+ unsigned long msr;
+ unsigned long new_msr = 0;
+
+ msr = regs->msr;
+
+ /*
+ * new_msr tracks the facilities that are to be restored. Only reload
+ * if the bit is not set in the user MSR (if it is set, the registers
+ * are live for the user thread).
+ */
+ if ((!(msr & MSR_FP)) && should_restore_fp())
+ new_msr |= MSR_FP;
+
+ if ((!(msr & MSR_VEC)) && should_restore_altivec())
+ new_msr |= MSR_VEC;
+
+ if ((!(msr & MSR_VSX)) && should_restore_vsx()) {
+ if (((msr | new_msr) & (MSR_FP | MSR_VEC)) == (MSR_FP | MSR_VEC))
+ new_msr |= MSR_VSX;
+ }
+
+ if (new_msr) {
+ unsigned long fpexc_mode = 0;
+
+ msr_check_and_set(new_msr);
+
+ if (new_msr & MSR_FP) {
+ do_restore_fp();
+
+ // This also covers VSX, because VSX implies FP
+ fpexc_mode = current->thread.fpexc_mode;
+ }
+
+ if (new_msr & MSR_VEC)
+ do_restore_altivec();
+
+ if (new_msr & MSR_VSX)
+ do_restore_vsx();
+
+ msr_check_and_clear(new_msr);
+
+ regs_set_return_msr(regs, regs->msr | new_msr | fpexc_mode);
+ }
+}
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+static void save_all(struct task_struct *tsk)
+{
+ unsigned long usermsr;
+
+ if (!tsk->thread.regs)
+ return;
+
+ usermsr = tsk->thread.regs->msr;
+
+ if ((usermsr & msr_all_available) == 0)
+ return;
+
+ msr_check_and_set(msr_all_available);
+
+ WARN_ON((usermsr & MSR_VSX) && !((usermsr & MSR_FP) && (usermsr & MSR_VEC)));
+
+ if (usermsr & MSR_FP)
+ save_fpu(tsk);
+
+ if (usermsr & MSR_VEC)
+ save_altivec(tsk);
+
+ if (usermsr & MSR_SPE)
+ __giveup_spe(tsk);
+
+ msr_check_and_clear(msr_all_available);
+}
+
+void flush_all_to_thread(struct task_struct *tsk)
+{
+ if (tsk->thread.regs) {
+ preempt_disable();
+ BUG_ON(tsk != current);
#ifdef CONFIG_SPE
- if (last_task_used_spe == current)
- last_task_used_spe = NULL;
+ if (tsk->thread.regs->msr & MSR_SPE)
+ tsk->thread.spefscr = mfspr(SPRN_SPEFSCR);
#endif
- preempt_enable();
+ save_all(tsk);
+
+ preempt_enable();
+ }
}
-#endif /* CONFIG_SMP */
+EXPORT_SYMBOL(flush_all_to_thread);
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
void do_send_trap(struct pt_regs *regs, unsigned long address,
- unsigned long error_code, int signal_code, int breakpt)
+ unsigned long error_code, int breakpt)
{
- siginfo_t info;
-
- current->thread.trap_nr = signal_code;
+ current->thread.trap_nr = TRAP_HWBKPT;
if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
11, SIGSEGV) == NOTIFY_STOP)
return;
/* Deliver the signal to userspace */
- info.si_signo = SIGTRAP;
- info.si_errno = breakpt; /* breakpoint or watchpoint id */
- info.si_code = signal_code;
- info.si_addr = (void __user *)address;
- force_sig_info(SIGTRAP, &info, current);
+ force_sig_ptrace_errno_trap(breakpt, /* breakpoint or watchpoint id */
+ (void __user *)address);
}
#else /* !CONFIG_PPC_ADV_DEBUG_REGS */
-void do_break (struct pt_regs *regs, unsigned long address,
- unsigned long error_code)
+
+static void do_break_handler(struct pt_regs *regs)
{
- siginfo_t info;
+ struct arch_hw_breakpoint null_brk = {0};
+ struct arch_hw_breakpoint *info;
+ ppc_inst_t instr = ppc_inst(0);
+ int type = 0;
+ int size = 0;
+ unsigned long ea;
+ int i;
+
+ /*
+ * If underneath hw supports only one watchpoint, we know it
+ * caused exception. 8xx also falls into this category.
+ */
+ if (nr_wp_slots() == 1) {
+ __set_breakpoint(0, &null_brk);
+ current->thread.hw_brk[0] = null_brk;
+ current->thread.hw_brk[0].flags |= HW_BRK_FLAG_DISABLED;
+ return;
+ }
+
+ /* Otherwise find out which DAWR caused exception and disable it. */
+ wp_get_instr_detail(regs, &instr, &type, &size, &ea);
+
+ for (i = 0; i < nr_wp_slots(); i++) {
+ info = &current->thread.hw_brk[i];
+ if (!info->address)
+ continue;
+ if (wp_check_constraints(regs, instr, ea, type, size, info)) {
+ __set_breakpoint(i, &null_brk);
+ current->thread.hw_brk[i] = null_brk;
+ current->thread.hw_brk[i].flags |= HW_BRK_FLAG_DISABLED;
+ }
+ }
+}
+
+DEFINE_INTERRUPT_HANDLER(do_break)
+{
current->thread.trap_nr = TRAP_HWBKPT;
- if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
+ if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, regs->dsisr,
11, SIGSEGV) == NOTIFY_STOP)
return;
if (debugger_break_match(regs))
return;
- /* Clear the breakpoint */
- hw_breakpoint_disable();
+ /*
+ * We reach here only when watchpoint exception is generated by ptrace
+ * event (or hw is buggy!). Now if CONFIG_HAVE_HW_BREAKPOINT is set,
+ * watchpoint is already handled by hw_breakpoint_handler() so we don't
+ * have to do anything. But when CONFIG_HAVE_HW_BREAKPOINT is not set,
+ * we need to manually handle the watchpoint here.
+ */
+ if (!IS_ENABLED(CONFIG_HAVE_HW_BREAKPOINT))
+ do_break_handler(regs);
/* Deliver the signal to userspace */
- info.si_signo = SIGTRAP;
- info.si_errno = 0;
- info.si_code = TRAP_HWBKPT;
- info.si_addr = (void __user *)address;
- force_sig_info(SIGTRAP, &info, current);
+ force_sig_fault(SIGTRAP, TRAP_HWBKPT, (void __user *)regs->dar);
}
#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
-static DEFINE_PER_CPU(struct arch_hw_breakpoint, current_brk);
+static DEFINE_PER_CPU(struct arch_hw_breakpoint, current_brk[HBP_NUM_MAX]);
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
/*
@@ -422,38 +755,52 @@ void switch_booke_debug_regs(struct debug_reg *new_debug)
EXPORT_SYMBOL_GPL(switch_booke_debug_regs);
#else /* !CONFIG_PPC_ADV_DEBUG_REGS */
#ifndef CONFIG_HAVE_HW_BREAKPOINT
-static void set_debug_reg_defaults(struct thread_struct *thread)
+static void set_breakpoint(int i, struct arch_hw_breakpoint *brk)
{
- thread->hw_brk.address = 0;
- thread->hw_brk.type = 0;
- set_breakpoint(&thread->hw_brk);
+ preempt_disable();
+ __set_breakpoint(i, brk);
+ preempt_enable();
}
-#endif /* !CONFIG_HAVE_HW_BREAKPOINT */
-#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
-static inline int __set_dabr(unsigned long dabr, unsigned long dabrx)
+static void set_debug_reg_defaults(struct thread_struct *thread)
{
- mtspr(SPRN_DAC1, dabr);
-#ifdef CONFIG_PPC_47x
- isync();
-#endif
- return 0;
+ int i;
+ struct arch_hw_breakpoint null_brk = {0};
+
+ for (i = 0; i < nr_wp_slots(); i++) {
+ thread->hw_brk[i] = null_brk;
+ if (ppc_breakpoint_available())
+ set_breakpoint(i, &thread->hw_brk[i]);
+ }
}
-#elif defined(CONFIG_PPC_BOOK3S)
-static inline int __set_dabr(unsigned long dabr, unsigned long dabrx)
+
+static inline bool hw_brk_match(struct arch_hw_breakpoint *a,
+ struct arch_hw_breakpoint *b)
{
- mtspr(SPRN_DABR, dabr);
- if (cpu_has_feature(CPU_FTR_DABRX))
- mtspr(SPRN_DABRX, dabrx);
- return 0;
+ if (a->address != b->address)
+ return false;
+ if (a->type != b->type)
+ return false;
+ if (a->len != b->len)
+ return false;
+ /* no need to check hw_len. it's calculated from address and len */
+ return true;
}
-#else
-static inline int __set_dabr(unsigned long dabr, unsigned long dabrx)
+
+static void switch_hw_breakpoint(struct task_struct *new)
{
- return -EINVAL;
+ int i;
+
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (likely(hw_brk_match(this_cpu_ptr(&current_brk[i]),
+ &new->thread.hw_brk[i])))
+ continue;
+
+ __set_breakpoint(i, &new->thread.hw_brk[i]);
+ }
}
-#endif
+#endif /* !CONFIG_HAVE_HW_BREAKPOINT */
+#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
static inline int set_dabr(struct arch_hw_breakpoint *brk)
{
@@ -465,109 +812,170 @@ static inline int set_dabr(struct arch_hw_breakpoint *brk)
if (ppc_md.set_dabr)
return ppc_md.set_dabr(dabr, dabrx);
- return __set_dabr(dabr, dabrx);
+ if (IS_ENABLED(CONFIG_PPC_ADV_DEBUG_REGS)) {
+ mtspr(SPRN_DAC1, dabr);
+ if (IS_ENABLED(CONFIG_PPC_47x))
+ isync();
+ return 0;
+ } else if (IS_ENABLED(CONFIG_PPC_BOOK3S)) {
+ mtspr(SPRN_DABR, dabr);
+ if (cpu_has_feature(CPU_FTR_DABRX))
+ mtspr(SPRN_DABRX, dabrx);
+ return 0;
+ } else {
+ return -EINVAL;
+ }
}
-static inline int set_dawr(struct arch_hw_breakpoint *brk)
+static inline int set_breakpoint_8xx(struct arch_hw_breakpoint *brk)
{
- unsigned long dawr, dawrx, mrd;
+ unsigned long lctrl1 = LCTRL1_CTE_GT | LCTRL1_CTF_LT | LCTRL1_CRWE_RW |
+ LCTRL1_CRWF_RW;
+ unsigned long lctrl2 = LCTRL2_LW0EN | LCTRL2_LW0LADC | LCTRL2_SLW0EN;
+ unsigned long start_addr = ALIGN_DOWN(brk->address, HW_BREAKPOINT_SIZE);
+ unsigned long end_addr = ALIGN(brk->address + brk->len, HW_BREAKPOINT_SIZE);
+
+ if (start_addr == 0)
+ lctrl2 |= LCTRL2_LW0LA_F;
+ else if (end_addr == 0)
+ lctrl2 |= LCTRL2_LW0LA_E;
+ else
+ lctrl2 |= LCTRL2_LW0LA_EandF;
- dawr = brk->address;
+ mtspr(SPRN_LCTRL2, 0);
- dawrx = (brk->type & (HW_BRK_TYPE_READ | HW_BRK_TYPE_WRITE)) \
- << (63 - 58); //* read/write bits */
- dawrx |= ((brk->type & (HW_BRK_TYPE_TRANSLATE)) >> 2) \
- << (63 - 59); //* translate */
- dawrx |= (brk->type & (HW_BRK_TYPE_PRIV_ALL)) \
- >> 3; //* PRIM bits */
- /* dawr length is stored in field MDR bits 48:53. Matches range in
- doublewords (64 bits) baised by -1 eg. 0b000000=1DW and
- 0b111111=64DW.
- brk->len is in bytes.
- This aligns up to double word size, shifts and does the bias.
- */
- mrd = ((brk->len + 7) >> 3) - 1;
- dawrx |= (mrd & 0x3f) << (63 - 53);
+ if ((brk->type & HW_BRK_TYPE_RDWR) == 0)
+ return 0;
+
+ if ((brk->type & HW_BRK_TYPE_RDWR) == HW_BRK_TYPE_READ)
+ lctrl1 |= LCTRL1_CRWE_RO | LCTRL1_CRWF_RO;
+ if ((brk->type & HW_BRK_TYPE_RDWR) == HW_BRK_TYPE_WRITE)
+ lctrl1 |= LCTRL1_CRWE_WO | LCTRL1_CRWF_WO;
+
+ mtspr(SPRN_CMPE, start_addr - 1);
+ mtspr(SPRN_CMPF, end_addr);
+ mtspr(SPRN_LCTRL1, lctrl1);
+ mtspr(SPRN_LCTRL2, lctrl2);
- if (ppc_md.set_dawr)
- return ppc_md.set_dawr(dawr, dawrx);
- mtspr(SPRN_DAWR, dawr);
- mtspr(SPRN_DAWRX, dawrx);
return 0;
}
-void __set_breakpoint(struct arch_hw_breakpoint *brk)
+static void set_hw_breakpoint(int nr, struct arch_hw_breakpoint *brk)
{
- __get_cpu_var(current_brk) = *brk;
-
- if (cpu_has_feature(CPU_FTR_DAWR))
- set_dawr(brk);
- else
+ if (dawr_enabled())
+ // Power8 or later
+ set_dawr(nr, brk);
+ else if (IS_ENABLED(CONFIG_PPC_8xx))
+ set_breakpoint_8xx(brk);
+ else if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+ // Power7 or earlier
set_dabr(brk);
+ else
+ // Shouldn't happen due to higher level checks
+ WARN_ON_ONCE(1);
}
-void set_breakpoint(struct arch_hw_breakpoint *brk)
+void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk)
{
- preempt_disable();
- __set_breakpoint(brk);
- preempt_enable();
+ memcpy(this_cpu_ptr(&current_brk[nr]), brk, sizeof(*brk));
+ set_hw_breakpoint(nr, brk);
}
-#ifdef CONFIG_PPC64
-DEFINE_PER_CPU(struct cpu_usage, cpu_usage_array);
-#endif
-
-static inline bool hw_brk_match(struct arch_hw_breakpoint *a,
- struct arch_hw_breakpoint *b)
+/* Check if we have DAWR or DABR hardware */
+bool ppc_breakpoint_available(void)
{
- if (a->address != b->address)
- return false;
- if (a->type != b->type)
- return false;
- if (a->len != b->len)
- return false;
+ if (dawr_enabled())
+ return true; /* POWER8 DAWR or POWER9 forced DAWR */
+ if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ return false; /* POWER9 with DAWR disabled */
+ /* DABR: Everything but POWER8 and POWER9 */
return true;
}
+EXPORT_SYMBOL_GPL(ppc_breakpoint_available);
+
+/* Disable the breakpoint in hardware without touching current_brk[] */
+void suspend_breakpoints(void)
+{
+ struct arch_hw_breakpoint brk = {0};
+ int i;
+
+ if (!ppc_breakpoint_available())
+ return;
+
+ for (i = 0; i < nr_wp_slots(); i++)
+ set_hw_breakpoint(i, &brk);
+}
+
+/*
+ * Re-enable breakpoints suspended by suspend_breakpoints() in hardware
+ * from current_brk[]
+ */
+void restore_breakpoints(void)
+{
+ int i;
+
+ if (!ppc_breakpoint_available())
+ return;
+
+ for (i = 0; i < nr_wp_slots(); i++)
+ set_hw_breakpoint(i, this_cpu_ptr(&current_brk[i]));
+}
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-static void tm_reclaim_thread(struct thread_struct *thr,
- struct thread_info *ti, uint8_t cause)
+
+static inline bool tm_enabled(struct task_struct *tsk)
{
- unsigned long msr_diff = 0;
+ return tsk && tsk->thread.regs && (tsk->thread.regs->msr & MSR_TM);
+}
+static void tm_reclaim_thread(struct thread_struct *thr, uint8_t cause)
+{
/*
- * If FP/VSX registers have been already saved to the
- * thread_struct, move them to the transact_fp array.
- * We clear the TIF_RESTORE_TM bit since after the reclaim
- * the thread will no longer be transactional.
+ * Use the current MSR TM suspended bit to track if we have
+ * checkpointed state outstanding.
+ * On signal delivery, we'd normally reclaim the checkpointed
+ * state to obtain stack pointer (see:get_tm_stackpointer()).
+ * This will then directly return to userspace without going
+ * through __switch_to(). However, if the stack frame is bad,
+ * we need to exit this thread which calls __switch_to() which
+ * will again attempt to reclaim the already saved tm state.
+ * Hence we need to check that we've not already reclaimed
+ * this state.
+ * We do this using the current MSR, rather tracking it in
+ * some specific thread_struct bit, as it has the additional
+ * benefit of checking for a potential TM bad thing exception.
*/
- if (test_ti_thread_flag(ti, TIF_RESTORE_TM)) {
- msr_diff = thr->tm_orig_msr & ~thr->regs->msr;
- if (msr_diff & MSR_FP)
- memcpy(&thr->transact_fp, &thr->fp_state,
- sizeof(struct thread_fp_state));
- if (msr_diff & MSR_VEC)
- memcpy(&thr->transact_vr, &thr->vr_state,
- sizeof(struct thread_vr_state));
- clear_ti_thread_flag(ti, TIF_RESTORE_TM);
- msr_diff &= MSR_FP | MSR_VEC | MSR_VSX | MSR_FE0 | MSR_FE1;
- }
+ if (!MSR_TM_SUSPENDED(mfmsr()))
+ return;
+
+ giveup_all(container_of(thr, struct task_struct, thread));
- tm_reclaim(thr, thr->regs->msr, cause);
+ tm_reclaim(thr, cause);
- /* Having done the reclaim, we now have the checkpointed
- * FP/VSX values in the registers. These might be valid
- * even if we have previously called enable_kernel_fp() or
- * flush_fp_to_thread(), so update thr->regs->msr to
- * indicate their current validity.
+ /*
+ * If we are in a transaction and FP is off then we can't have
+ * used FP inside that transaction. Hence the checkpointed
+ * state is the same as the live state. We need to copy the
+ * live state to the checkpointed state so that when the
+ * transaction is restored, the checkpointed state is correct
+ * and the aborted transaction sees the correct state. We use
+ * ckpt_regs.msr here as that's what tm_reclaim will use to
+ * determine if it's going to write the checkpointed state or
+ * not. So either this will write the checkpointed registers,
+ * or reclaim will. Similarly for VMX.
*/
- thr->regs->msr |= msr_diff;
+ if ((thr->ckpt_regs.msr & MSR_FP) == 0)
+ memcpy(&thr->ckfp_state, &thr->fp_state,
+ sizeof(struct thread_fp_state));
+ if ((thr->ckpt_regs.msr & MSR_VEC) == 0)
+ memcpy(&thr->ckvr_state, &thr->vr_state,
+ sizeof(struct thread_vr_state));
}
void tm_reclaim_current(uint8_t cause)
{
tm_enable();
- tm_reclaim_thread(&current->thread, current_thread_info(), cause);
+ tm_reclaim_thread(&current->thread, cause);
}
static inline void tm_reclaim_task(struct task_struct *tsk)
@@ -577,8 +985,8 @@ static inline void tm_reclaim_task(struct task_struct *tsk)
*
* In switching we need to maintain a 2nd register state as
* oldtask->thread.ckpt_regs. We tm_reclaim(oldproc); this saves the
- * checkpointed (tbegin) state in ckpt_regs and saves the transactional
- * (current) FPRs into oldtask->thread.transact_fpr[].
+ * checkpointed (tbegin) state in ckpt_regs, ckfp_state and
+ * ckvr_state
*
* We also context switch (save) TFHAR/TEXASR/TFIAR in here.
*/
@@ -590,23 +998,17 @@ static inline void tm_reclaim_task(struct task_struct *tsk)
if (!MSR_TM_ACTIVE(thr->regs->msr))
goto out_and_saveregs;
- /* Stash the original thread MSR, as giveup_fpu et al will
- * modify it. We hold onto it to see whether the task used
- * FP & vector regs. If the TIF_RESTORE_TM flag is set,
- * tm_orig_msr is already set.
- */
- if (!test_ti_thread_flag(task_thread_info(tsk), TIF_RESTORE_TM))
- thr->tm_orig_msr = thr->regs->msr;
+ WARN_ON(tm_suspend_disabled);
- TM_DEBUG("--- tm_reclaim on pid %d (NIP=%lx, "
+ TM_DEBUG("---- tm_reclaim on pid %d (NIP=%lx, "
"ccr=%lx, msr=%lx, trap=%lx)\n",
tsk->pid, thr->regs->nip,
thr->regs->ccr, thr->regs->msr,
thr->regs->trap);
- tm_reclaim_thread(thr, task_thread_info(tsk), TM_CAUSE_RESCHED);
+ tm_reclaim_thread(thr, TM_CAUSE_RESCHED);
- TM_DEBUG("--- tm_reclaim on pid %d complete\n",
+ TM_DEBUG("---- tm_reclaim on pid %d complete\n",
tsk->pid);
out_and_saveregs:
@@ -618,14 +1020,15 @@ out_and_saveregs:
tm_save_sprs(thr);
}
-extern void __tm_recheckpoint(struct thread_struct *thread,
- unsigned long orig_msr);
+extern void __tm_recheckpoint(struct thread_struct *thread);
-void tm_recheckpoint(struct thread_struct *thread,
- unsigned long orig_msr)
+void tm_recheckpoint(struct thread_struct *thread)
{
unsigned long flags;
+ if (!(thread->regs->msr & MSR_TM))
+ return;
+
/* We really can't be interrupted here as the TEXASR registers can't
* change and later in the trecheckpoint code, we have a userspace R1.
* So let's hard disable over this region.
@@ -638,15 +1041,13 @@ void tm_recheckpoint(struct thread_struct *thread,
*/
tm_restore_sprs(thread);
- __tm_recheckpoint(thread, orig_msr);
+ __tm_recheckpoint(thread);
local_irq_restore(flags);
}
static inline void tm_recheckpoint_new_task(struct task_struct *new)
{
- unsigned long msr;
-
if (!cpu_has_feature(CPU_FTR_TM))
return;
@@ -655,51 +1056,49 @@ static inline void tm_recheckpoint_new_task(struct task_struct *new)
* If the task was using FP, we non-lazily reload both the original and
* the speculative FP register states. This is because the kernel
* doesn't see if/when a TM rollback occurs, so if we take an FP
- * unavoidable later, we are unable to determine which set of FP regs
+ * unavailable later, we are unable to determine which set of FP regs
* need to be restored.
*/
- if (!new->thread.regs)
+ if (!tm_enabled(new))
return;
if (!MSR_TM_ACTIVE(new->thread.regs->msr)){
tm_restore_sprs(&new->thread);
return;
}
- msr = new->thread.tm_orig_msr;
/* Recheckpoint to restore original checkpointed register state. */
- TM_DEBUG("*** tm_recheckpoint of pid %d "
- "(new->msr 0x%lx, new->origmsr 0x%lx)\n",
- new->pid, new->thread.regs->msr, msr);
-
- /* This loads the checkpointed FP/VEC state, if used */
- tm_recheckpoint(&new->thread, msr);
-
- /* This loads the speculative FP/VEC state, if used */
- if (msr & MSR_FP) {
- do_load_up_transact_fpu(&new->thread);
- new->thread.regs->msr |=
- (MSR_FP | new->thread.fpexc_mode);
- }
-#ifdef CONFIG_ALTIVEC
- if (msr & MSR_VEC) {
- do_load_up_transact_altivec(&new->thread);
- new->thread.regs->msr |= MSR_VEC;
- }
-#endif
- /* We may as well turn on VSX too since all the state is restored now */
- if (msr & MSR_VSX)
- new->thread.regs->msr |= MSR_VSX;
+ TM_DEBUG("*** tm_recheckpoint of pid %d (new->msr 0x%lx)\n",
+ new->pid, new->thread.regs->msr);
+
+ tm_recheckpoint(&new->thread);
+
+ /*
+ * The checkpointed state has been restored but the live state has
+ * not, ensure all the math functionality is turned off to trigger
+ * restore_math() to reload.
+ */
+ new->thread.regs->msr &= ~(MSR_FP | MSR_VEC | MSR_VSX);
TM_DEBUG("*** tm_recheckpoint of pid %d complete "
"(kernel msr 0x%lx)\n",
new->pid, mfmsr());
}
-static inline void __switch_to_tm(struct task_struct *prev)
+static inline void __switch_to_tm(struct task_struct *prev,
+ struct task_struct *new)
{
if (cpu_has_feature(CPU_FTR_TM)) {
- tm_enable();
- tm_reclaim_task(prev);
+ if (tm_enabled(prev) || tm_enabled(new))
+ tm_enable();
+
+ if (tm_enabled(prev)) {
+ prev->thread.load_tm++;
+ tm_reclaim_task(prev);
+ if (!MSR_TM_ACTIVE(prev->thread.regs->msr) && prev->thread.load_tm == 0)
+ prev->thread.regs->msr &= ~MSR_TM;
+ }
+
+ tm_recheckpoint_new_task(new);
}
}
@@ -721,117 +1120,198 @@ void restore_tm_state(struct pt_regs *regs)
{
unsigned long msr_diff;
+ /*
+ * This is the only moment we should clear TIF_RESTORE_TM as
+ * it is here that ckpt_regs.msr and pt_regs.msr become the same
+ * again, anything else could lead to an incorrect ckpt_msr being
+ * saved and therefore incorrect signal contexts.
+ */
clear_thread_flag(TIF_RESTORE_TM);
if (!MSR_TM_ACTIVE(regs->msr))
return;
- msr_diff = current->thread.tm_orig_msr & ~regs->msr;
+ msr_diff = current->thread.ckpt_regs.msr & ~regs->msr;
msr_diff &= MSR_FP | MSR_VEC | MSR_VSX;
- if (msr_diff & MSR_FP) {
- fp_enable();
- load_fp_state(&current->thread.fp_state);
- regs->msr |= current->thread.fpexc_mode;
- }
- if (msr_diff & MSR_VEC) {
- vec_enable();
- load_vr_state(&current->thread.vr_state);
- }
- regs->msr |= msr_diff;
+
+ /* Ensure that restore_math() will restore */
+ if (msr_diff & MSR_FP)
+ current->thread.load_fp = 1;
+#ifdef CONFIG_ALTIVEC
+ if (cpu_has_feature(CPU_FTR_ALTIVEC) && msr_diff & MSR_VEC)
+ current->thread.load_vec = 1;
+#endif
+ restore_math(regs);
+
+ regs_set_return_msr(regs, regs->msr | msr_diff);
}
-#else
+#else /* !CONFIG_PPC_TRANSACTIONAL_MEM */
#define tm_recheckpoint_new_task(new)
-#define __switch_to_tm(prev)
+#define __switch_to_tm(prev, new)
+void tm_reclaim_current(uint8_t cause) {}
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+static inline void save_sprs(struct thread_struct *t)
+{
+#ifdef CONFIG_ALTIVEC
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ t->vrsave = mfspr(SPRN_VRSAVE);
+#endif
+#ifdef CONFIG_SPE
+ if (cpu_has_feature(CPU_FTR_SPE))
+ t->spefscr = mfspr(SPRN_SPEFSCR);
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+ if (cpu_has_feature(CPU_FTR_DSCR))
+ t->dscr = mfspr(SPRN_DSCR);
+
+ if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ t->bescr = mfspr(SPRN_BESCR);
+ t->ebbhr = mfspr(SPRN_EBBHR);
+ t->ebbrr = mfspr(SPRN_EBBRR);
+
+ t->fscr = mfspr(SPRN_FSCR);
+
+ /*
+ * Note that the TAR is not available for use in the kernel.
+ * (To provide this, the TAR should be backed up/restored on
+ * exception entry/exit instead, and be in pt_regs. FIXME,
+ * this should be in pt_regs anyway (for debug).)
+ */
+ t->tar = mfspr(SPRN_TAR);
+ }
+
+ if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE))
+ t->hashkeyr = mfspr(SPRN_HASHKEYR);
+
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ t->dexcr = mfspr(SPRN_DEXCR);
+#endif
+}
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+void kvmppc_save_user_regs(void)
+{
+ unsigned long usermsr;
+
+ if (!current->thread.regs)
+ return;
+
+ usermsr = current->thread.regs->msr;
+
+ /* Caller has enabled FP/VEC/VSX/TM in MSR */
+ if (usermsr & MSR_FP)
+ __giveup_fpu(current);
+ if (usermsr & MSR_VEC)
+ __giveup_altivec(current);
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ if (usermsr & MSR_TM) {
+ current->thread.tm_tfhar = mfspr(SPRN_TFHAR);
+ current->thread.tm_tfiar = mfspr(SPRN_TFIAR);
+ current->thread.tm_texasr = mfspr(SPRN_TEXASR);
+ current->thread.regs->msr &= ~MSR_TM;
+ }
+#endif
+}
+EXPORT_SYMBOL_GPL(kvmppc_save_user_regs);
+
+void kvmppc_save_current_sprs(void)
+{
+ save_sprs(&current->thread);
+}
+EXPORT_SYMBOL_GPL(kvmppc_save_current_sprs);
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
+
+static inline void restore_sprs(struct thread_struct *old_thread,
+ struct thread_struct *new_thread)
+{
+#ifdef CONFIG_ALTIVEC
+ if (cpu_has_feature(CPU_FTR_ALTIVEC) &&
+ old_thread->vrsave != new_thread->vrsave)
+ mtspr(SPRN_VRSAVE, new_thread->vrsave);
+#endif
+#ifdef CONFIG_SPE
+ if (cpu_has_feature(CPU_FTR_SPE) &&
+ old_thread->spefscr != new_thread->spefscr)
+ mtspr(SPRN_SPEFSCR, new_thread->spefscr);
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+ if (cpu_has_feature(CPU_FTR_DSCR)) {
+ u64 dscr = get_paca()->dscr_default;
+ if (new_thread->dscr_inherit)
+ dscr = new_thread->dscr;
+
+ if (old_thread->dscr != dscr)
+ mtspr(SPRN_DSCR, dscr);
+ }
+
+ if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ if (old_thread->bescr != new_thread->bescr)
+ mtspr(SPRN_BESCR, new_thread->bescr);
+ if (old_thread->ebbhr != new_thread->ebbhr)
+ mtspr(SPRN_EBBHR, new_thread->ebbhr);
+ if (old_thread->ebbrr != new_thread->ebbrr)
+ mtspr(SPRN_EBBRR, new_thread->ebbrr);
+
+ if (old_thread->fscr != new_thread->fscr)
+ mtspr(SPRN_FSCR, new_thread->fscr);
+
+ if (old_thread->tar != new_thread->tar)
+ mtspr(SPRN_TAR, new_thread->tar);
+ }
+
+ if (cpu_has_feature(CPU_FTR_P9_TIDR) &&
+ old_thread->tidr != new_thread->tidr)
+ mtspr(SPRN_TIDR, new_thread->tidr);
+
+ if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE) &&
+ old_thread->hashkeyr != new_thread->hashkeyr)
+ mtspr(SPRN_HASHKEYR, new_thread->hashkeyr);
+
+ if (cpu_has_feature(CPU_FTR_ARCH_31) &&
+ old_thread->dexcr != new_thread->dexcr)
+ mtspr(SPRN_DEXCR, new_thread->dexcr);
+#endif
+
+}
+
struct task_struct *__switch_to(struct task_struct *prev,
struct task_struct *new)
{
struct thread_struct *new_thread, *old_thread;
struct task_struct *last;
-#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
struct ppc64_tlb_batch *batch;
#endif
- WARN_ON(!irqs_disabled());
+ new_thread = &new->thread;
+ old_thread = &current->thread;
- /* Back up the TAR and DSCR across context switches.
- * Note that the TAR is not available for use in the kernel. (To
- * provide this, the TAR should be backed up/restored on exception
- * entry/exit instead, and be in pt_regs. FIXME, this should be in
- * pt_regs anyway (for debug).)
- * Save the TAR and DSCR here before we do treclaim/trecheckpoint as
- * these will change them.
- */
- save_early_sprs(&prev->thread);
+ WARN_ON(!irqs_disabled());
- __switch_to_tm(prev);
+#ifdef CONFIG_PPC_64S_HASH_MMU
+ batch = this_cpu_ptr(&ppc64_tlb_batch);
+ if (batch->active) {
+ current_thread_info()->local_flags |= _TLF_LAZY_MMU;
+ if (batch->index)
+ __flush_tlb_pending(batch);
+ batch->active = 0;
+ }
-#ifdef CONFIG_SMP
- /* avoid complexity of lazy save/restore of fpu
- * by just saving it every time we switch out if
- * this task used the fpu during the last quantum.
- *
- * If it tries to use the fpu again, it'll trap and
- * reload its fp regs. So we don't have to do a restore
- * every switch, just a save.
- * -- Cort
- */
- if (prev->thread.regs && (prev->thread.regs->msr & MSR_FP))
- giveup_fpu(prev);
-#ifdef CONFIG_ALTIVEC
/*
- * If the previous thread used altivec in the last quantum
- * (thus changing altivec regs) then save them.
- * We used to check the VRSAVE register but not all apps
- * set it, so we don't rely on it now (and in fact we need
- * to save & restore VSCR even if VRSAVE == 0). -- paulus
- *
- * On SMP we always save/restore altivec regs just to avoid the
- * complexity of changing processors.
- * -- Cort
- */
- if (prev->thread.regs && (prev->thread.regs->msr & MSR_VEC))
- giveup_altivec(prev);
-#endif /* CONFIG_ALTIVEC */
-#ifdef CONFIG_VSX
- if (prev->thread.regs && (prev->thread.regs->msr & MSR_VSX))
- /* VMX and FPU registers are already save here */
- __giveup_vsx(prev);
-#endif /* CONFIG_VSX */
-#ifdef CONFIG_SPE
- /*
- * If the previous thread used spe in the last quantum
- * (thus changing spe regs) then save them.
- *
- * On SMP we always save/restore spe regs just to avoid the
- * complexity of changing processors.
- */
- if ((prev->thread.regs && (prev->thread.regs->msr & MSR_SPE)))
- giveup_spe(prev);
-#endif /* CONFIG_SPE */
-
-#else /* CONFIG_SMP */
-#ifdef CONFIG_ALTIVEC
- /* Avoid the trap. On smp this this never happens since
- * we don't set last_task_used_altivec -- Cort
+ * On POWER9 the copy-paste buffer can only paste into
+ * foreign real addresses, so unprivileged processes can not
+ * see the data or use it in any way unless they have
+ * foreign real mappings. If the new process has the foreign
+ * real address mappings, we must issue a cp_abort to clear
+ * any state and prevent snooping, corruption or a covert
+ * channel. ISA v3.1 supports paste into local memory.
*/
- if (new->thread.regs && last_task_used_altivec == new)
- new->thread.regs->msr |= MSR_VEC;
-#endif /* CONFIG_ALTIVEC */
-#ifdef CONFIG_VSX
- if (new->thread.regs && last_task_used_vsx == new)
- new->thread.regs->msr |= MSR_VSX;
-#endif /* CONFIG_VSX */
-#ifdef CONFIG_SPE
- /* Avoid the trap. On smp this this never happens since
- * we don't set last_task_used_spe
- */
- if (new->thread.regs && last_task_used_spe == new)
- new->thread.regs->msr |= MSR_SPE;
-#endif /* CONFIG_SPE */
-
-#endif /* CONFIG_SMP */
+ if (new->mm && (cpu_has_feature(CPU_FTR_ARCH_31) ||
+ atomic_read(&new->mm->context.vas_windows)))
+ asm volatile(PPC_CP_ABORT);
+#endif /* CONFIG_PPC_BOOK3S_64 */
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
switch_booke_debug_regs(&new->thread.debug);
@@ -841,109 +1321,156 @@ struct task_struct *__switch_to(struct task_struct *prev,
* schedule DABR
*/
#ifndef CONFIG_HAVE_HW_BREAKPOINT
- if (unlikely(!hw_brk_match(&__get_cpu_var(current_brk), &new->thread.hw_brk)))
- __set_breakpoint(&new->thread.hw_brk);
+ switch_hw_breakpoint(new);
#endif /* CONFIG_HAVE_HW_BREAKPOINT */
#endif
-
- new_thread = &new->thread;
- old_thread = &current->thread;
-
-#ifdef CONFIG_PPC64
/*
- * Collect processor utilization data per process
+ * We need to save SPRs before treclaim/trecheckpoint as these will
+ * change a number of them.
*/
- if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
- struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array);
- long unsigned start_tb, current_tb;
- start_tb = old_thread->start_tb;
- cu->current_tb = current_tb = mfspr(SPRN_PURR);
- old_thread->accum_tb += (current_tb - start_tb);
- new_thread->start_tb = current_tb;
- }
-#endif /* CONFIG_PPC64 */
+ save_sprs(&prev->thread);
-#ifdef CONFIG_PPC_BOOK3S_64
- batch = &__get_cpu_var(ppc64_tlb_batch);
- if (batch->active) {
- current_thread_info()->local_flags |= _TLF_LAZY_MMU;
- if (batch->index)
- __flush_tlb_pending(batch);
- batch->active = 0;
+ /* Save FPU, Altivec, VSX and SPE state */
+ giveup_all(prev);
+
+ __switch_to_tm(prev, new);
+
+ if (!radix_enabled()) {
+ /*
+ * We can't take a PMU exception inside _switch() since there
+ * is a window where the kernel stack SLB and the kernel stack
+ * are out of sync. Hard disable here.
+ */
+ hard_irq_disable();
}
-#endif /* CONFIG_PPC_BOOK3S_64 */
/*
- * We can't take a PMU exception inside _switch() since there is a
- * window where the kernel stack SLB and the kernel stack are out
- * of sync. Hard disable here.
+ * Call restore_sprs() and set_return_regs_changed() before calling
+ * _switch(). If we move it after _switch() then we miss out on calling
+ * it for new tasks. The reason for this is we manually create a stack
+ * frame for new tasks that directly returns through ret_from_fork() or
+ * ret_from_kernel_thread(). See copy_thread() for details.
*/
- hard_irq_disable();
+ restore_sprs(old_thread, new_thread);
+
+ set_return_regs_changed(); /* _switch changes stack (and regs) */
- tm_recheckpoint_new_task(new);
+ if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64))
+ kuap_assert_locked();
last = _switch(old_thread, new_thread);
+ /*
+ * Nothing after _switch will be run for newly created tasks,
+ * because they switch directly to ret_from_fork/ret_from_kernel_thread
+ * etc. Code added here should have a comment explaining why that is
+ * okay.
+ */
+
#ifdef CONFIG_PPC_BOOK3S_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
+ /*
+ * This applies to a process that was context switched while inside
+ * arch_enter_lazy_mmu_mode(), to re-activate the batch that was
+ * deactivated above, before _switch(). This will never be the case
+ * for new tasks.
+ */
if (current_thread_info()->local_flags & _TLF_LAZY_MMU) {
current_thread_info()->local_flags &= ~_TLF_LAZY_MMU;
- batch = &__get_cpu_var(ppc64_tlb_batch);
+ batch = this_cpu_ptr(&ppc64_tlb_batch);
batch->active = 1;
}
+#endif
+
+ /*
+ * Math facilities are masked out of the child MSR in copy_thread.
+ * A new task does not need to restore_math because it will
+ * demand fault them.
+ */
+ if (current->thread.regs)
+ restore_math(current->thread.regs);
#endif /* CONFIG_PPC_BOOK3S_64 */
return last;
}
-static int instructions_to_print = 16;
+#define NR_INSN_TO_PRINT 16
static void show_instructions(struct pt_regs *regs)
{
int i;
- unsigned long pc = regs->nip - (instructions_to_print * 3 / 4 *
- sizeof(int));
+ unsigned long nip = regs->nip;
+ unsigned long pc = regs->nip - (NR_INSN_TO_PRINT * 3 / 4 * sizeof(int));
- printk("Instruction dump:");
+ printk("Code: ");
- for (i = 0; i < instructions_to_print; i++) {
- int instr;
-
- if (!(i % 8))
- printk("\n");
+ /*
+ * If we were executing with the MMU off for instructions, adjust pc
+ * rather than printing XXXXXXXX.
+ */
+ if (!IS_ENABLED(CONFIG_BOOKE) && !(regs->msr & MSR_IR)) {
+ pc = (unsigned long)phys_to_virt(pc);
+ nip = (unsigned long)phys_to_virt(regs->nip);
+ }
-#if !defined(CONFIG_BOOKE)
- /* If executing with the IMMU off, adjust pc rather
- * than print XXXXXXXX.
- */
- if (!(regs->msr & MSR_IR))
- pc = (unsigned long)phys_to_virt(pc);
-#endif
+ for (i = 0; i < NR_INSN_TO_PRINT; i++) {
+ int instr;
- /* We use __get_user here *only* to avoid an OOPS on a
- * bad address because the pc *should* only be a
- * kernel address.
- */
- if (!__kernel_text_address(pc) ||
- __get_user(instr, (unsigned int __user *)pc)) {
- printk(KERN_CONT "XXXXXXXX ");
+ if (get_kernel_nofault(instr, (const void *)pc)) {
+ pr_cont("XXXXXXXX ");
} else {
- if (regs->nip == pc)
- printk(KERN_CONT "<%08x> ", instr);
+ if (nip == pc)
+ pr_cont("<%08x> ", instr);
else
- printk(KERN_CONT "%08x ", instr);
+ pr_cont("%08x ", instr);
}
pc += sizeof(int);
}
- printk("\n");
+ pr_cont("\n");
}
-static struct regbit {
+void show_user_instructions(struct pt_regs *regs)
+{
+ unsigned long pc;
+ int n = NR_INSN_TO_PRINT;
+ struct seq_buf s;
+ char buf[96]; /* enough for 8 times 9 + 2 chars */
+
+ pc = regs->nip - (NR_INSN_TO_PRINT * 3 / 4 * sizeof(int));
+
+ seq_buf_init(&s, buf, sizeof(buf));
+
+ while (n) {
+ int i;
+
+ seq_buf_clear(&s);
+
+ for (i = 0; i < 8 && n; i++, n--, pc += sizeof(int)) {
+ int instr;
+
+ if (copy_from_user_nofault(&instr, (void __user *)pc,
+ sizeof(instr))) {
+ seq_buf_printf(&s, "XXXXXXXX ");
+ continue;
+ }
+ seq_buf_printf(&s, regs->nip == pc ? "<%08x> " : "%08x ", instr);
+ }
+
+ if (!seq_buf_has_overflowed(&s))
+ pr_info("%s[%d]: code: %s\n", current->comm,
+ current->pid, s.buffer);
+ }
+}
+
+struct regbit {
unsigned long bit;
const char *name;
-} msr_bits[] = {
+};
+
+static struct regbit msr_bits[] = {
#if defined(CONFIG_PPC64) && !defined(CONFIG_BOOKE)
{MSR_SF, "SF"},
{MSR_HV, "HV"},
@@ -973,89 +1500,118 @@ static struct regbit {
{0, NULL}
};
-static void printbits(unsigned long val, struct regbit *bits)
+static void print_bits(unsigned long val, struct regbit *bits, const char *sep)
{
- const char *sep = "";
+ const char *s = "";
- printk("<");
for (; bits->bit; ++bits)
if (val & bits->bit) {
- printk("%s%s", sep, bits->name);
- sep = ",";
+ pr_cont("%s%s", s, bits->name);
+ s = sep;
}
- printk(">");
+}
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+static struct regbit msr_tm_bits[] = {
+ {MSR_TS_T, "T"},
+ {MSR_TS_S, "S"},
+ {MSR_TM, "E"},
+ {0, NULL}
+};
+
+static void print_tm_bits(unsigned long val)
+{
+/*
+ * This only prints something if at least one of the TM bit is set.
+ * Inside the TM[], the output means:
+ * E: Enabled (bit 32)
+ * S: Suspended (bit 33)
+ * T: Transactional (bit 34)
+ */
+ if (val & (MSR_TM | MSR_TS_S | MSR_TS_T)) {
+ pr_cont(",TM[");
+ print_bits(val, msr_tm_bits, "");
+ pr_cont("]");
+ }
+}
+#else
+static void print_tm_bits(unsigned long val) {}
+#endif
+
+static void print_msr_bits(unsigned long val)
+{
+ pr_cont("<");
+ print_bits(val, msr_bits, ",");
+ print_tm_bits(val);
+ pr_cont(">");
}
#ifdef CONFIG_PPC64
#define REG "%016lx"
#define REGS_PER_LINE 4
-#define LAST_VOLATILE 13
#else
#define REG "%08lx"
#define REGS_PER_LINE 8
-#define LAST_VOLATILE 12
#endif
-void show_regs(struct pt_regs * regs)
+static void __show_regs(struct pt_regs *regs)
{
int i, trap;
- show_regs_print_info(KERN_DEFAULT);
-
- printk("NIP: "REG" LR: "REG" CTR: "REG"\n",
+ printk("NIP: "REG" LR: "REG" CTR: "REG"\n",
regs->nip, regs->link, regs->ctr);
- printk("REGS: %p TRAP: %04lx %s (%s)\n",
+ printk("REGS: %px TRAP: %04lx %s (%s)\n",
regs, regs->trap, print_tainted(), init_utsname()->release);
- printk("MSR: "REG" ", regs->msr);
- printbits(regs->msr, msr_bits);
- printk(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer);
+ printk("MSR: "REG" ", regs->msr);
+ print_msr_bits(regs->msr);
+ pr_cont(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer);
trap = TRAP(regs);
- if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR))
- printk("CFAR: "REG" ", regs->orig_gpr3);
- if (trap == 0x200 || trap == 0x300 || trap == 0x600)
-#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
- printk("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr);
-#else
- printk("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr);
-#endif
+ if (!trap_is_syscall(regs) && cpu_has_feature(CPU_FTR_CFAR))
+ pr_cont("CFAR: "REG" ", regs->orig_gpr3);
+ if (trap == INTERRUPT_MACHINE_CHECK ||
+ trap == INTERRUPT_DATA_STORAGE ||
+ trap == INTERRUPT_ALIGNMENT) {
+ if (IS_ENABLED(CONFIG_BOOKE))
+ pr_cont("DEAR: "REG" ESR: "REG" ", regs->dear, regs->esr);
+ else
+ pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr);
+ }
+
#ifdef CONFIG_PPC64
- printk("SOFTE: %ld ", regs->softe);
+ pr_cont("IRQMASK: %lx ", regs->softe);
#endif
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
if (MSR_TM_ACTIVE(regs->msr))
- printk("\nPACATMSCRATCH: %016llx ", get_paca()->tm_scratch);
+ pr_cont("\nPACATMSCRATCH: %016llx ", get_paca()->tm_scratch);
#endif
for (i = 0; i < 32; i++) {
if ((i % REGS_PER_LINE) == 0)
- printk("\nGPR%02d: ", i);
- printk(REG " ", regs->gpr[i]);
- if (i == LAST_VOLATILE && !FULL_REGS(regs))
- break;
+ pr_cont("\nGPR%02d: ", i);
+ pr_cont(REG " ", regs->gpr[i]);
}
- printk("\n");
-#ifdef CONFIG_KALLSYMS
+ pr_cont("\n");
/*
* Lookup NIP late so we have the best change of getting the
* above info out without failing
*/
- printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip);
- printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link);
-#endif
- show_stack(current, (unsigned long *) regs->gpr[1]);
- if (!user_mode(regs))
- show_instructions(regs);
+ if (IS_ENABLED(CONFIG_KALLSYMS)) {
+ printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip);
+ printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link);
+ }
}
-void exit_thread(void)
+void show_regs(struct pt_regs *regs)
{
- discard_lazy_cpu_state();
+ show_regs_print_info(KERN_DEFAULT);
+ __show_regs(regs);
+ show_stack(current, (unsigned long *) regs->gpr[1], KERN_DEFAULT);
+ if (!user_mode(regs))
+ show_instructions(regs);
}
void flush_thread(void)
{
- discard_lazy_cpu_state();
-
#ifdef CONFIG_HAVE_HW_BREAKPOINT
flush_ptrace_hw_breakpoint(current);
#else /* CONFIG_HAVE_HW_BREAKPOINT */
@@ -1063,30 +1619,107 @@ void flush_thread(void)
#endif /* CONFIG_HAVE_HW_BREAKPOINT */
}
-void
-release_thread(struct task_struct *t)
+void arch_setup_new_exec(void)
{
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ if (!radix_enabled())
+ hash__setup_new_exec();
+#endif
+ /*
+ * If we exec out of a kernel thread then thread.regs will not be
+ * set. Do it now.
+ */
+ if (!current->thread.regs) {
+ struct pt_regs *regs = task_stack_page(current) + THREAD_SIZE;
+ current->thread.regs = regs - 1;
+ }
+
+#ifdef CONFIG_PPC_MEM_KEYS
+ current->thread.regs->amr = default_amr;
+ current->thread.regs->iamr = default_iamr;
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ current->thread.dexcr = current->thread.dexcr_onexec;
+ mtspr(SPRN_DEXCR, current->thread.dexcr);
+ }
+#endif /* CONFIG_PPC_BOOK3S_64 */
}
+#ifdef CONFIG_PPC64
+/*
+ * Assign a TIDR (thread ID) for task @t and set it in the thread
+ * structure. For now, we only support setting TIDR for 'current' task.
+ *
+ * Since the TID value is a truncated form of it PID, it is possible
+ * (but unlikely) for 2 threads to have the same TID. In the unlikely event
+ * that 2 threads share the same TID and are waiting, one of the following
+ * cases will happen:
+ *
+ * 1. The correct thread is running, the wrong thread is not
+ * In this situation, the correct thread is woken and proceeds to pass its
+ * condition check.
+ *
+ * 2. Neither threads are running
+ * In this situation, neither thread will be woken. When scheduled, the waiting
+ * threads will execute either a wait, which will return immediately, followed
+ * by a condition check, which will pass for the correct thread and fail
+ * for the wrong thread, or they will execute the condition check immediately.
+ *
+ * 3. The wrong thread is running, the correct thread is not
+ * The wrong thread will be woken, but will fail its condition check and
+ * re-execute wait. The correct thread, when scheduled, will execute either
+ * its condition check (which will pass), or wait, which returns immediately
+ * when called the first time after the thread is scheduled, followed by its
+ * condition check (which will pass).
+ *
+ * 4. Both threads are running
+ * Both threads will be woken. The wrong thread will fail its condition check
+ * and execute another wait, while the correct thread will pass its condition
+ * check.
+ *
+ * @t: the task to set the thread ID for
+ */
+int set_thread_tidr(struct task_struct *t)
+{
+ if (!cpu_has_feature(CPU_FTR_P9_TIDR))
+ return -EINVAL;
+
+ if (t != current)
+ return -EINVAL;
+
+ if (t->thread.tidr)
+ return 0;
+
+ t->thread.tidr = (u16)task_pid_nr(t);
+ mtspr(SPRN_TIDR, t->thread.tidr);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(set_thread_tidr);
+
+#endif /* CONFIG_PPC64 */
+
/*
* this gets called so that we can store coprocessor state into memory and
* copy the current task into the new thread.
*/
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
- flush_fp_to_thread(src);
- flush_altivec_to_thread(src);
- flush_vsx_to_thread(src);
- flush_spe_to_thread(src);
+ flush_all_to_thread(src);
/*
* Flush TM state out so we can copy it. __switch_to_tm() does this
* flush but it removes the checkpointed state from the current CPU and
* transitions the CPU out of TM mode. Hence we need to call
* tm_recheckpoint_new_task() (on the same task) to restore the
* checkpointed state back and the TM mode.
+ *
+ * Can't pass dst because it isn't ready. Doesn't matter, passing
+ * dst is only important for __switch_to()
*/
- __switch_to_tm(src);
- tm_recheckpoint_new_task(src);
+ __switch_to_tm(src, src);
*dst = *src;
@@ -1097,10 +1730,13 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
static void setup_ksp_vsid(struct task_struct *p, unsigned long sp)
{
-#ifdef CONFIG_PPC_STD_MMU_64
+#ifdef CONFIG_PPC_64S_HASH_MMU
unsigned long sp_vsid;
unsigned long llp = mmu_psize_defs[mmu_linear_psize].sllp;
+ if (radix_enabled())
+ return;
+
if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
sp_vsid = get_kernel_vsid(sp, MMU_SEGSIZE_1T)
<< SLB_VSID_SHIFT_1T;
@@ -1115,55 +1751,89 @@ static void setup_ksp_vsid(struct task_struct *p, unsigned long sp)
/*
* Copy a thread..
*/
-extern unsigned long dscr_default; /* defined in arch/powerpc/kernel/sysfs.c */
-int copy_thread(unsigned long clone_flags, unsigned long usp,
- unsigned long arg, struct task_struct *p)
+/*
+ * Copy architecture-specific thread state
+ */
+int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
- struct pt_regs *childregs, *kregs;
+ struct pt_regs *kregs; /* Switch frame regs */
extern void ret_from_fork(void);
- extern void ret_from_kernel_thread(void);
+ extern void ret_from_fork_scv(void);
+ extern void ret_from_kernel_user_thread(void);
+ extern void start_kernel_thread(void);
void (*f)(void);
unsigned long sp = (unsigned long)task_stack_page(p) + THREAD_SIZE;
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ int i;
+#endif
+
+ klp_init_thread_info(p);
- /* Copy registers */
- sp -= sizeof(struct pt_regs);
- childregs = (struct pt_regs *) sp;
if (unlikely(p->flags & PF_KTHREAD)) {
- struct thread_info *ti = (void *)task_stack_page(p);
- memset(childregs, 0, sizeof(struct pt_regs));
- childregs->gpr[1] = sp + sizeof(struct pt_regs);
- /* function */
- if (usp)
- childregs->gpr[14] = ppc_function_entry((void *)usp);
-#ifdef CONFIG_PPC64
- clear_tsk_thread_flag(p, TIF_32BIT);
- childregs->softe = 1;
-#endif
- childregs->gpr[15] = arg;
+ /* kernel thread */
+
+ /* Create initial minimum stack frame. */
+ sp -= STACK_FRAME_MIN_SIZE;
+ ((unsigned long *)sp)[0] = 0;
+
+ f = start_kernel_thread;
p->thread.regs = NULL; /* no user register state */
- ti->flags |= _TIF_RESTOREALL;
- f = ret_from_kernel_thread;
+ clear_tsk_compat_task(p);
} else {
- struct pt_regs *regs = current_pt_regs();
- CHECK_FULL_REGS(regs);
- *childregs = *regs;
- if (usp)
- childregs->gpr[1] = usp;
- p->thread.regs = childregs;
- childregs->gpr[3] = 0; /* Result from fork() */
- if (clone_flags & CLONE_SETTLS) {
+ /* user thread */
+ struct pt_regs *childregs;
+
+ /* Create initial user return stack frame. */
+ sp -= STACK_USER_INT_FRAME_SIZE;
+ *(unsigned long *)(sp + STACK_INT_FRAME_MARKER) = STACK_FRAME_REGS_MARKER;
+
+ childregs = (struct pt_regs *)(sp + STACK_INT_FRAME_REGS);
+
+ if (unlikely(args->fn)) {
+ /*
+ * A user space thread, but it first runs a kernel
+ * thread, and then returns as though it had called
+ * execve rather than fork, so user regs will be
+ * filled in (e.g., by kernel_execve()).
+ */
+ ((unsigned long *)sp)[0] = 0;
+ memset(childregs, 0, sizeof(struct pt_regs));
#ifdef CONFIG_PPC64
- if (!is_32bit_task())
- childregs->gpr[13] = childregs->gpr[6];
- else
+ childregs->softe = IRQS_ENABLED;
#endif
- childregs->gpr[2] = childregs->gpr[6];
+ f = ret_from_kernel_user_thread;
+ } else {
+ struct pt_regs *regs = current_pt_regs();
+ u64 clone_flags = args->flags;
+ unsigned long usp = args->stack;
+
+ /* Copy registers */
+ *childregs = *regs;
+ if (usp)
+ childregs->gpr[1] = usp;
+ ((unsigned long *)sp)[0] = childregs->gpr[1];
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
+ WARN_ON_ONCE(childregs->softe != IRQS_ENABLED);
+#endif
+ if (clone_flags & CLONE_SETTLS) {
+ unsigned long tls = args->tls;
+
+ if (!is_32bit_task())
+ childregs->gpr[13] = tls;
+ else
+ childregs->gpr[2] = tls;
+ }
+
+ if (trap_is_scv(regs))
+ f = ret_from_fork_scv;
+ else
+ f = ret_from_fork;
}
- f = ret_from_fork;
+ childregs->msr &= ~(MSR_FP|MSR_VEC|MSR_VSX);
+ p->thread.regs = childregs;
}
- sp -= STACK_FRAME_OVERHEAD;
/*
* The way this works is that at some point in the future
@@ -1173,38 +1843,62 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
* do some house keeping and then return from the fork or clone
* system call, using the stack frame created above.
*/
- ((unsigned long *)sp)[0] = 0;
- sp -= sizeof(struct pt_regs);
- kregs = (struct pt_regs *) sp;
- sp -= STACK_FRAME_OVERHEAD;
+ ((unsigned long *)sp)[STACK_FRAME_LR_SAVE] = (unsigned long)f;
+ sp -= STACK_SWITCH_FRAME_SIZE;
+ ((unsigned long *)sp)[0] = sp + STACK_SWITCH_FRAME_SIZE;
+ kregs = (struct pt_regs *)(sp + STACK_SWITCH_FRAME_REGS);
+ kregs->nip = ppc_function_entry(f);
+ if (unlikely(args->fn)) {
+ /*
+ * Put kthread fn, arg parameters in non-volatile GPRs in the
+ * switch frame so they are loaded by _switch before it returns
+ * to ret_from_kernel_thread.
+ */
+ kregs->gpr[14] = ppc_function_entry((void *)args->fn);
+ kregs->gpr[15] = (unsigned long)args->fn_arg;
+ }
p->thread.ksp = sp;
-#ifdef CONFIG_PPC32
- p->thread.ksp_limit = (unsigned long)task_stack_page(p) +
- _ALIGN_UP(sizeof(struct thread_info), 16);
-#endif
+
#ifdef CONFIG_HAVE_HW_BREAKPOINT
- p->thread.ptrace_bps[0] = NULL;
+ for (i = 0; i < nr_wp_slots(); i++)
+ p->thread.ptrace_bps[i] = NULL;
#endif
+#ifdef CONFIG_PPC_FPU_REGS
p->thread.fp_save_area = NULL;
+#endif
#ifdef CONFIG_ALTIVEC
p->thread.vr_save_area = NULL;
#endif
+#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP)
+ p->thread.kuap = KUAP_NONE;
+#endif
+#if defined(CONFIG_BOOKE) && defined(CONFIG_PPC_KUAP)
+ p->thread.pid = MMU_NO_CONTEXT;
+#endif
setup_ksp_vsid(p, sp);
#ifdef CONFIG_PPC64
if (cpu_has_feature(CPU_FTR_DSCR)) {
p->thread.dscr_inherit = current->thread.dscr_inherit;
- p->thread.dscr = current->thread.dscr;
+ p->thread.dscr = mfspr(SPRN_DSCR);
}
- if (cpu_has_feature(CPU_FTR_HAS_PPR))
- p->thread.ppr = INIT_PPR;
+
+ p->thread.tidr = 0;
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+ if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE))
+ p->thread.hashkeyr = current->thread.hashkeyr;
+
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ p->thread.dexcr = mfspr(SPRN_DEXCR);
#endif
- kregs->nip = ppc_function_entry(f);
return 0;
}
+void preload_new_slb_context(unsigned long start, unsigned long sp);
+
/*
* Set up a thread for executing a new program
*/
@@ -1212,31 +1906,28 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
{
#ifdef CONFIG_PPC64
unsigned long load_addr = regs->gpr[2]; /* saved by ELF_PLAT_INIT */
+
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !radix_enabled())
+ preload_new_slb_context(start, sp);
#endif
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
/*
- * If we exec out of a kernel thread then thread.regs will not be
- * set. Do it now.
+ * Clear any transactional state, we're exec()ing. The cause is
+ * not important as there will never be a recheckpoint so it's not
+ * user visible.
*/
- if (!current->thread.regs) {
- struct pt_regs *regs = task_stack_page(current) + THREAD_SIZE;
- current->thread.regs = regs - 1;
- }
+ if (MSR_TM_SUSPENDED(mfmsr()))
+ tm_reclaim_current(0);
+#endif
- memset(regs->gpr, 0, sizeof(regs->gpr));
+ memset(&regs->gpr[1], 0, sizeof(regs->gpr) - sizeof(regs->gpr[0]));
regs->ctr = 0;
regs->link = 0;
regs->xer = 0;
regs->ccr = 0;
regs->gpr[1] = sp;
- /*
- * We have just cleared all the nonvolatile GPRs, so make
- * FULL_REGS(regs) return true. This is necessary to allow
- * ptrace to examine the thread immediately after exec.
- */
- regs->trap &= ~1UL;
-
#ifdef CONFIG_PPC32
regs->mq = 0;
regs->nip = start;
@@ -1269,8 +1960,8 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
* address of _start and the second entry is the TOC
* value we need to use.
*/
- __get_user(entry, (unsigned long __user *)start);
- __get_user(toc, (unsigned long __user *)start+1);
+ get_user(entry, (unsigned long __user *)start);
+ get_user(toc, (unsigned long __user *)start+1);
/* Check whether the e_entry function descriptor entries
* need to be relocated before we can use them.
@@ -1281,26 +1972,31 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
}
regs->gpr[2] = toc;
}
- regs->nip = entry;
- regs->msr = MSR_USER64;
+ regs_set_return_ip(regs, entry);
+ regs_set_return_msr(regs, MSR_USER64);
} else {
- regs->nip = start;
regs->gpr[2] = 0;
- regs->msr = MSR_USER32;
+ regs_set_return_ip(regs, start);
+ regs_set_return_msr(regs, MSR_USER32);
}
+
#endif
- discard_lazy_cpu_state();
#ifdef CONFIG_VSX
current->thread.used_vsr = 0;
#endif
+ current->thread.load_slb = 0;
+ current->thread.load_fp = 0;
+#ifdef CONFIG_PPC_FPU_REGS
memset(&current->thread.fp_state, 0, sizeof(current->thread.fp_state));
current->thread.fp_save_area = NULL;
+#endif
#ifdef CONFIG_ALTIVEC
memset(&current->thread.vr_state, 0, sizeof(current->thread.vr_state));
current->thread.vr_state.vscr.u[3] = 0x00010000; /* Java mode disabled */
current->thread.vr_save_area = NULL;
current->thread.vrsave = 0;
current->thread.used_vr = 0;
+ current->thread.load_vec = 0;
#endif /* CONFIG_ALTIVEC */
#ifdef CONFIG_SPE
memset(current->thread.evr, 0, sizeof(current->thread.evr));
@@ -1309,13 +2005,19 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
current->thread.used_spe = 0;
#endif /* CONFIG_SPE */
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- if (cpu_has_feature(CPU_FTR_TM))
- regs->msr |= MSR_TM;
current->thread.tm_tfhar = 0;
current->thread.tm_texasr = 0;
current->thread.tm_tfiar = 0;
+ current->thread.load_tm = 0;
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+#ifdef CONFIG_PPC_BOOK3S_64
+ if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE)) {
+ current->thread.hashkeyr = get_random_long();
+ mtspr(SPRN_HASHKEYR, current->thread.hashkeyr);
+ }
+#endif /* CONFIG_PPC_BOOK3S_64 */
}
+EXPORT_SYMBOL(start_thread);
#define PR_FP_ALL_EXCEPT (PR_FP_EXC_DIV | PR_FP_EXC_OVF | PR_FP_EXC_UND \
| PR_FP_EXC_RES | PR_FP_EXC_INV)
@@ -1329,7 +2031,6 @@ int set_fpexc_mode(struct task_struct *tsk, unsigned int val)
* fpexc_mode. fpexc_mode is also used for setting FP exception
* mode (asyn, precise, disabled) for 'Classic' FP. */
if (val & PR_FP_EXC_SW_ENABLE) {
-#ifdef CONFIG_SPE
if (cpu_has_feature(CPU_FTR_SPE)) {
/*
* When the sticky exception bits are set
@@ -1343,16 +2044,15 @@ int set_fpexc_mode(struct task_struct *tsk, unsigned int val)
* anyway to restore the prctl settings from
* the saved environment.
*/
+#ifdef CONFIG_SPE
tsk->thread.spefscr_last = mfspr(SPRN_SPEFSCR);
tsk->thread.fpexc_mode = val &
(PR_FP_EXC_SW_ENABLE | PR_FP_ALL_EXCEPT);
+#endif
return 0;
} else {
return -EINVAL;
}
-#else
- return -EINVAL;
-#endif
}
/* on a CONFIG_SPE this does not hurt us. The bits that
@@ -1363,18 +2063,18 @@ int set_fpexc_mode(struct task_struct *tsk, unsigned int val)
if (val > PR_FP_EXC_PRECISE)
return -EINVAL;
tsk->thread.fpexc_mode = __pack_fe01(val);
- if (regs != NULL && (regs->msr & MSR_FP) != 0)
- regs->msr = (regs->msr & ~(MSR_FE0|MSR_FE1))
- | tsk->thread.fpexc_mode;
+ if (regs != NULL && (regs->msr & MSR_FP) != 0) {
+ regs_set_return_msr(regs, (regs->msr & ~(MSR_FE0|MSR_FE1))
+ | tsk->thread.fpexc_mode);
+ }
return 0;
}
int get_fpexc_mode(struct task_struct *tsk, unsigned long adr)
{
- unsigned int val;
+ unsigned int val = 0;
- if (tsk->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE)
-#ifdef CONFIG_SPE
+ if (tsk->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE) {
if (cpu_has_feature(CPU_FTR_SPE)) {
/*
* When the sticky exception bits are set
@@ -1388,15 +2088,15 @@ int get_fpexc_mode(struct task_struct *tsk, unsigned long adr)
* anyway to restore the prctl settings from
* the saved environment.
*/
+#ifdef CONFIG_SPE
tsk->thread.spefscr_last = mfspr(SPRN_SPEFSCR);
val = tsk->thread.fpexc_mode;
+#endif
} else
return -EINVAL;
-#else
- return -EINVAL;
-#endif
- else
+ } else {
val = __unpack_fe01(tsk->thread.fpexc_mode);
+ }
return put_user(val, (unsigned int __user *) adr);
}
@@ -1412,9 +2112,9 @@ int set_endian(struct task_struct *tsk, unsigned int val)
return -EINVAL;
if (val == PR_ENDIAN_BIG)
- regs->msr &= ~MSR_LE;
+ regs_set_return_msr(regs, regs->msr & ~MSR_LE);
else if (val == PR_ENDIAN_LITTLE || val == PR_ENDIAN_PPC_LITTLE)
- regs->msr |= MSR_LE;
+ regs_set_return_msr(regs, regs->msr | MSR_LE);
else
return -EINVAL;
@@ -1461,56 +2161,113 @@ static inline int valid_irq_stack(unsigned long sp, struct task_struct *p,
unsigned long stack_page;
unsigned long cpu = task_cpu(p);
- /*
- * Avoid crashing if the stack has overflowed and corrupted
- * task_cpu(p), which is in the thread_info struct.
- */
- if (cpu < NR_CPUS && cpu_possible(cpu)) {
- stack_page = (unsigned long) hardirq_ctx[cpu];
- if (sp >= stack_page + sizeof(struct thread_struct)
- && sp <= stack_page + THREAD_SIZE - nbytes)
- return 1;
-
- stack_page = (unsigned long) softirq_ctx[cpu];
- if (sp >= stack_page + sizeof(struct thread_struct)
- && sp <= stack_page + THREAD_SIZE - nbytes)
- return 1;
- }
+ if (!hardirq_ctx[cpu] || !softirq_ctx[cpu])
+ return 0;
+
+ stack_page = (unsigned long)hardirq_ctx[cpu];
+ if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes)
+ return 1;
+
+ stack_page = (unsigned long)softirq_ctx[cpu];
+ if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes)
+ return 1;
+
+ return 0;
+}
+
+#ifdef CONFIG_PPC64
+static inline int valid_emergency_stack(unsigned long sp, struct task_struct *p,
+ unsigned long nbytes)
+{
+ unsigned long stack_page;
+ unsigned long cpu = task_cpu(p);
+
+ if (!paca_ptrs)
+ return 0;
+
+ if (!paca_ptrs[cpu]->emergency_sp)
+ return 0;
+
+# ifdef CONFIG_PPC_BOOK3S_64
+ if (!paca_ptrs[cpu]->nmi_emergency_sp || !paca_ptrs[cpu]->mc_emergency_sp)
+ return 0;
+#endif
+
+ stack_page = (unsigned long)paca_ptrs[cpu]->emergency_sp - THREAD_SIZE;
+ if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes)
+ return 1;
+
+# ifdef CONFIG_PPC_BOOK3S_64
+ stack_page = (unsigned long)paca_ptrs[cpu]->nmi_emergency_sp - THREAD_SIZE;
+ if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes)
+ return 1;
+
+ stack_page = (unsigned long)paca_ptrs[cpu]->mc_emergency_sp - THREAD_SIZE;
+ if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes)
+ return 1;
+# endif
+
+ return 0;
+}
+#else
+static inline int valid_emergency_stack(unsigned long sp, struct task_struct *p,
+ unsigned long nbytes)
+{
+ unsigned long stack_page;
+ unsigned long cpu = task_cpu(p);
+
+ if (!IS_ENABLED(CONFIG_VMAP_STACK))
+ return 0;
+
+ stack_page = (unsigned long)emergency_ctx[cpu] - THREAD_SIZE;
+ if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes)
+ return 1;
+
return 0;
}
+#endif
-int validate_sp(unsigned long sp, struct task_struct *p,
- unsigned long nbytes)
+/*
+ * validate the stack frame of a particular minimum size, used for when we are
+ * looking at a certain object in the stack beyond the minimum.
+ */
+int validate_sp_size(unsigned long sp, struct task_struct *p,
+ unsigned long nbytes)
{
unsigned long stack_page = (unsigned long)task_stack_page(p);
- if (sp >= stack_page + sizeof(struct thread_struct)
- && sp <= stack_page + THREAD_SIZE - nbytes)
+ if (sp < THREAD_SIZE)
+ return 0;
+
+ if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes)
return 1;
- return valid_irq_stack(sp, p, nbytes);
+ if (valid_irq_stack(sp, p, nbytes))
+ return 1;
+
+ return valid_emergency_stack(sp, p, nbytes);
}
-EXPORT_SYMBOL(validate_sp);
+int validate_sp(unsigned long sp, struct task_struct *p)
+{
+ return validate_sp_size(sp, p, STACK_FRAME_MIN_SIZE);
+}
-unsigned long get_wchan(struct task_struct *p)
+static unsigned long ___get_wchan(struct task_struct *p)
{
unsigned long ip, sp;
int count = 0;
- if (!p || p == current || p->state == TASK_RUNNING)
- return 0;
-
sp = p->thread.ksp;
- if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD))
+ if (!validate_sp(sp, p))
return 0;
do {
- sp = *(unsigned long *)sp;
- if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD))
+ sp = READ_ONCE_NOCHECK(*(unsigned long *)sp);
+ if (!validate_sp(sp, p) || task_is_running(p))
return 0;
if (count > 0) {
- ip = ((unsigned long *)sp)[STACK_FRAME_LR_SAVE];
+ ip = READ_ONCE_NOCHECK(((unsigned long *)sp)[STACK_FRAME_LR_SAVE]);
if (!in_sched_functions(ip))
return ip;
}
@@ -1518,76 +2275,115 @@ unsigned long get_wchan(struct task_struct *p)
return 0;
}
+unsigned long __get_wchan(struct task_struct *p)
+{
+ unsigned long ret;
+
+ if (!try_get_task_stack(p))
+ return 0;
+
+ ret = ___get_wchan(p);
+
+ put_task_stack(p);
+
+ return ret;
+}
+
+static bool empty_user_regs(struct pt_regs *regs, struct task_struct *tsk)
+{
+ unsigned long stack_page;
+
+ // A non-empty pt_regs should never have a zero MSR or TRAP value.
+ if (regs->msr || regs->trap)
+ return false;
+
+ // Check it sits at the very base of the stack
+ stack_page = (unsigned long)task_stack_page(tsk);
+ if ((unsigned long)(regs + 1) != stack_page + THREAD_SIZE)
+ return false;
+
+ return true;
+}
+
static int kstack_depth_to_print = CONFIG_PRINT_STACK_DEPTH;
-void show_stack(struct task_struct *tsk, unsigned long *stack)
+void __no_sanitize_address show_stack(struct task_struct *tsk,
+ unsigned long *stack,
+ const char *loglvl)
{
unsigned long sp, ip, lr, newsp;
int count = 0;
int firstframe = 1;
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- int curr_frame = current->curr_ret_stack;
- extern void return_to_handler(void);
- unsigned long rth = (unsigned long)return_to_handler;
- unsigned long mrth = -1;
-#ifdef CONFIG_PPC64
- extern void mod_return_to_handler(void);
- rth = *(unsigned long *)rth;
- mrth = (unsigned long)mod_return_to_handler;
- mrth = *(unsigned long *)mrth;
-#endif
-#endif
+ unsigned long ret_addr;
+ int ftrace_idx = 0;
- sp = (unsigned long) stack;
if (tsk == NULL)
tsk = current;
+
+ if (!try_get_task_stack(tsk))
+ return;
+
+ sp = (unsigned long) stack;
if (sp == 0) {
if (tsk == current)
- asm("mr %0,1" : "=r" (sp));
+ sp = current_stack_frame();
else
sp = tsk->thread.ksp;
}
lr = 0;
- printk("Call Trace:\n");
+ printk("%sCall Trace:\n", loglvl);
do {
- if (!validate_sp(sp, tsk, STACK_FRAME_OVERHEAD))
- return;
+ if (!validate_sp(sp, tsk))
+ break;
stack = (unsigned long *) sp;
newsp = stack[0];
ip = stack[STACK_FRAME_LR_SAVE];
if (!firstframe || ip != lr) {
- printk("["REG"] ["REG"] %pS", sp, ip, (void *)ip);
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- if ((ip == rth || ip == mrth) && curr_frame >= 0) {
- printk(" (%pS)",
- (void *)current->ret_stack[curr_frame].ret);
- curr_frame--;
- }
-#endif
+ printk("%s["REG"] ["REG"] %pS",
+ loglvl, sp, ip, (void *)ip);
+ ret_addr = ftrace_graph_ret_addr(current,
+ &ftrace_idx, ip, stack);
+ if (ret_addr != ip)
+ pr_cont(" (%pS)", (void *)ret_addr);
if (firstframe)
- printk(" (unreliable)");
- printk("\n");
+ pr_cont(" (unreliable)");
+ pr_cont("\n");
}
firstframe = 0;
/*
* See if this is an exception frame.
- * We look for the "regshere" marker in the current frame.
+ * We look for the "regs" marker in the current frame.
+ *
+ * STACK_SWITCH_FRAME_SIZE being the smallest frame that
+ * could hold a pt_regs, if that does not fit then it can't
+ * have regs.
*/
- if (validate_sp(sp, tsk, STACK_INT_FRAME_SIZE)
- && stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
+ if (validate_sp_size(sp, tsk, STACK_SWITCH_FRAME_SIZE)
+ && stack[STACK_INT_FRAME_MARKER_LONGS] == STACK_FRAME_REGS_MARKER) {
struct pt_regs *regs = (struct pt_regs *)
- (sp + STACK_FRAME_OVERHEAD);
+ (sp + STACK_INT_FRAME_REGS);
+
lr = regs->link;
- printk("--- interrupt: %lx at %pS\n LR = %pS\n",
- regs->trap, (void *)regs->nip, (void *)lr);
+ printk("%s---- interrupt: %lx at %pS\n",
+ loglvl, regs->trap, (void *)regs->nip);
+
+ // Detect the case of an empty pt_regs at the very base
+ // of the stack and suppress showing it in full.
+ if (!empty_user_regs(regs, tsk)) {
+ __show_regs(regs);
+ printk("%s---- interrupt: %lx\n", loglvl, regs->trap);
+ }
+
firstframe = 1;
}
sp = newsp;
} while (count++ < kstack_depth_to_print);
+
+ put_task_stack(tsk);
}
#ifdef CONFIG_PPC64
@@ -1595,11 +2391,25 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
void notrace __ppc64_runlatch_on(void)
{
struct thread_info *ti = current_thread_info();
- unsigned long ctrl;
- ctrl = mfspr(SPRN_CTRLF);
- ctrl |= CTRL_RUNLATCH;
- mtspr(SPRN_CTRLT, ctrl);
+ if (cpu_has_feature(CPU_FTR_ARCH_206)) {
+ /*
+ * Least significant bit (RUN) is the only writable bit of
+ * the CTRL register, so we can avoid mfspr. 2.06 is not the
+ * earliest ISA where this is the case, but it's convenient.
+ */
+ mtspr(SPRN_CTRLT, CTRL_RUNLATCH);
+ } else {
+ unsigned long ctrl;
+
+ /*
+ * Some architectures (e.g., Cell) have writable fields other
+ * than RUN, so do the read-modify-write.
+ */
+ ctrl = mfspr(SPRN_CTRLF);
+ ctrl |= CTRL_RUNLATCH;
+ mtspr(SPRN_CTRLT, ctrl);
+ }
ti->local_flags |= _TLF_RUNLATCH;
}
@@ -1608,67 +2418,24 @@ void notrace __ppc64_runlatch_on(void)
void notrace __ppc64_runlatch_off(void)
{
struct thread_info *ti = current_thread_info();
- unsigned long ctrl;
ti->local_flags &= ~_TLF_RUNLATCH;
- ctrl = mfspr(SPRN_CTRLF);
- ctrl &= ~CTRL_RUNLATCH;
- mtspr(SPRN_CTRLT, ctrl);
+ if (cpu_has_feature(CPU_FTR_ARCH_206)) {
+ mtspr(SPRN_CTRLT, 0);
+ } else {
+ unsigned long ctrl;
+
+ ctrl = mfspr(SPRN_CTRLF);
+ ctrl &= ~CTRL_RUNLATCH;
+ mtspr(SPRN_CTRLT, ctrl);
+ }
}
#endif /* CONFIG_PPC64 */
unsigned long arch_align_stack(unsigned long sp)
{
if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
- sp -= get_random_int() & ~PAGE_MASK;
+ sp -= get_random_u32_below(PAGE_SIZE);
return sp & ~0xf;
}
-
-static inline unsigned long brk_rnd(void)
-{
- unsigned long rnd = 0;
-
- /* 8MB for 32bit, 1GB for 64bit */
- if (is_32bit_task())
- rnd = (long)(get_random_int() % (1<<(23-PAGE_SHIFT)));
- else
- rnd = (long)(get_random_int() % (1<<(30-PAGE_SHIFT)));
-
- return rnd << PAGE_SHIFT;
-}
-
-unsigned long arch_randomize_brk(struct mm_struct *mm)
-{
- unsigned long base = mm->brk;
- unsigned long ret;
-
-#ifdef CONFIG_PPC_STD_MMU_64
- /*
- * If we are using 1TB segments and we are allowed to randomise
- * the heap, we can put it above 1TB so it is backed by a 1TB
- * segment. Otherwise the heap will be in the bottom 1TB
- * which always uses 256MB segments and this may result in a
- * performance penalty.
- */
- if (!is_32bit_task() && (mmu_highuser_ssize == MMU_SEGSIZE_1T))
- base = max_t(unsigned long, mm->brk, 1UL << SID_SHIFT_1T);
-#endif
-
- ret = PAGE_ALIGN(base + brk_rnd());
-
- if (ret < mm->brk)
- return mm->brk;
-
- return ret;
-}
-
-unsigned long randomize_et_dyn(unsigned long base)
-{
- unsigned long ret = PAGE_ALIGN(base + brk_rnd());
-
- if (ret < base)
- return base;
-
- return ret;
-}
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 4e139f8a69ef..9ed9dde7d231 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Procedures for creating, accessing and interpreting the device tree.
*
@@ -6,16 +7,10 @@
*
* Adapted for 64bit PowerPC by Dave Engebretsen and Peter Bergner.
* {engebret|bergner}@us.ibm.com
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#undef DEBUG
-#include <stdarg.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/init.h>
@@ -23,7 +18,6 @@
#include <linux/spinlock.h>
#include <linux/types.h>
#include <linux/pci.h>
-#include <linux/stringify.h>
#include <linux/delay.h>
#include <linux/initrd.h>
#include <linux/bitops.h>
@@ -34,8 +28,10 @@
#include <linux/of.h>
#include <linux/of_fdt.h>
#include <linux/libfdt.h>
+#include <linux/cpu.h>
+#include <linux/pgtable.h>
+#include <linux/seq_buf.h>
-#include <asm/prom.h>
#include <asm/rtas.h>
#include <asm/page.h>
#include <asm/processor.h>
@@ -45,17 +41,22 @@
#include <asm/smp.h>
#include <asm/mmu.h>
#include <asm/paca.h>
-#include <asm/pgtable.h>
-#include <asm/pci.h>
+#include <asm/powernv.h>
#include <asm/iommu.h>
#include <asm/btext.h>
#include <asm/sections.h>
-#include <asm/machdep.h>
+#include <asm/setup.h>
#include <asm/pci-bridge.h>
#include <asm/kexec.h>
#include <asm/opal.h>
#include <asm/fadump.h>
-#include <asm/debug.h>
+#include <asm/epapr_hcalls.h>
+#include <asm/firmware.h>
+#include <asm/dt_cpu_ftrs.h>
+#include <asm/drmem.h>
+#include <asm/ultravisor.h>
+#include <asm/prom.h>
+#include <asm/plpks.h>
#include <mm/mmu_decl.h>
@@ -65,11 +66,14 @@
#define DBG(fmt...)
#endif
+int *chip_id_lookup_table;
+
#ifdef CONFIG_PPC64
int __initdata iommu_is_off;
int __initdata iommu_force_on;
unsigned long tce_alloc_start, tce_alloc_end;
u64 ppc64_rma_size;
+unsigned int boot_cpu_node_count __ro_after_init;
#endif
static phys_addr_t first_memblock_size;
static int __initdata boot_cpu_count;
@@ -96,8 +100,8 @@ static inline int overlaps_initrd(unsigned long start, unsigned long size)
if (!initrd_start)
return 0;
- return (start + size) > _ALIGN_DOWN(initrd_start, PAGE_SIZE) &&
- start <= _ALIGN_UP(initrd_end, PAGE_SIZE);
+ return (start + size) > ALIGN_DOWN(initrd_start, PAGE_SIZE) &&
+ start <= ALIGN(initrd_end, PAGE_SIZE);
#else
return 0;
#endif
@@ -121,19 +125,22 @@ static void __init move_device_tree(void)
size = fdt_totalsize(initial_boot_params);
if ((memory_limit && (start + size) > PHYSICAL_START + memory_limit) ||
- overlaps_crashkernel(start, size) ||
- overlaps_initrd(start, size)) {
- p = __va(memblock_alloc(size, PAGE_SIZE));
+ !memblock_is_memory(start + size - 1) ||
+ overlaps_crashkernel(start, size) || overlaps_initrd(start, size)) {
+ p = memblock_alloc_raw(size, PAGE_SIZE);
+ if (!p)
+ panic("Failed to allocate %lu bytes to move device tree\n",
+ size);
memcpy(p, initial_boot_params, size);
initial_boot_params = p;
- DBG("Moved device tree to 0x%p\n", p);
+ DBG("Moved device tree to 0x%px\n", p);
}
DBG("<- move_device_tree\n");
}
/*
- * ibm,pa-features is a per-cpu property that contains a string of
+ * ibm,pa/pi-features is a per-cpu property that contains a string of
* attribute descriptors, each of which has a 2 byte header plus up
* to 254 bytes worth of processor attribute bits. First header
* byte specifies the number of bytes following the header.
@@ -144,27 +151,57 @@ static void __init move_device_tree(void)
* pa-features property is missing, or a 1/0 to indicate if the feature
* is supported/not supported. Note that the bit numbers are
* big-endian to match the definition in PAPR.
+ * Note: the 'clear' flag clears the feature if the bit is set in the
+ * ibm,pa/pi-features property, it does not set the feature if the
+ * bit is clear.
*/
-static struct ibm_pa_feature {
+struct ibm_feature {
unsigned long cpu_features; /* CPU_FTR_xxx bit */
unsigned long mmu_features; /* MMU_FTR_xxx bit */
unsigned int cpu_user_ftrs; /* PPC_FEATURE_xxx bit */
- unsigned char pabyte; /* byte number in ibm,pa-features */
+ unsigned int cpu_user_ftrs2; /* PPC_FEATURE2_xxx bit */
+ unsigned char pabyte; /* byte number in ibm,pa/pi-features */
unsigned char pabit; /* bit number (big-endian) */
- unsigned char invert; /* if 1, pa bit set => clear feature */
-} ibm_pa_features[] __initdata = {
- {0, 0, PPC_FEATURE_HAS_MMU, 0, 0, 0},
- {0, 0, PPC_FEATURE_HAS_FPU, 0, 1, 0},
- {CPU_FTR_CTRL, 0, 0, 0, 3, 0},
- {CPU_FTR_NOEXECUTE, 0, 0, 0, 6, 0},
- {CPU_FTR_NODSISRALIGN, 0, 0, 1, 1, 1},
- {0, MMU_FTR_CI_LARGE_PAGE, 0, 1, 2, 0},
- {CPU_FTR_REAL_LE, PPC_FEATURE_TRUE_LE, 5, 0, 0},
+ unsigned char clear; /* if 1, pa bit set => clear feature */
+};
+
+static struct ibm_feature ibm_pa_features[] __initdata = {
+ { .pabyte = 0, .pabit = 0, .cpu_user_ftrs = PPC_FEATURE_HAS_MMU },
+ { .pabyte = 0, .pabit = 1, .cpu_user_ftrs = PPC_FEATURE_HAS_FPU },
+ { .pabyte = 0, .pabit = 3, .cpu_features = CPU_FTR_CTRL },
+ { .pabyte = 0, .pabit = 6, .cpu_features = CPU_FTR_NOEXECUTE },
+ { .pabyte = 1, .pabit = 2, .mmu_features = MMU_FTR_CI_LARGE_PAGE },
+#ifdef CONFIG_PPC_RADIX_MMU
+ { .pabyte = 40, .pabit = 0, .mmu_features = MMU_FTR_TYPE_RADIX | MMU_FTR_GTSE },
+#endif
+ { .pabyte = 5, .pabit = 0, .cpu_features = CPU_FTR_REAL_LE,
+ .cpu_user_ftrs = PPC_FEATURE_TRUE_LE },
+ /*
+ * If the kernel doesn't support TM (ie CONFIG_PPC_TRANSACTIONAL_MEM=n),
+ * we don't want to turn on TM here, so we use the *_COMP versions
+ * which are 0 if the kernel doesn't support TM.
+ */
+ { .pabyte = 22, .pabit = 0, .cpu_features = CPU_FTR_TM_COMP,
+ .cpu_user_ftrs2 = PPC_FEATURE2_HTM_COMP | PPC_FEATURE2_HTM_NOSC_COMP },
+
+ { .pabyte = 64, .pabit = 0, .cpu_features = CPU_FTR_DAWR1 },
+ { .pabyte = 68, .pabit = 5, .cpu_features = CPU_FTR_DEXCR_NPHIE },
+};
+
+/*
+ * ibm,pi-features property provides the support of processor specific
+ * options not described in ibm,pa-features. Right now use byte 0, bit 3
+ * which indicates the occurrence of DSI interrupt when the paste operation
+ * on the suspended NX window.
+ */
+static struct ibm_feature ibm_pi_features[] __initdata = {
+ { .pabyte = 0, .pabit = 3, .mmu_features = MMU_FTR_NX_DSI },
+ { .pabyte = 0, .pabit = 4, .cpu_features = CPU_FTR_DBELL, .clear = 1 },
};
static void __init scan_features(unsigned long node, const unsigned char *ftrs,
unsigned long tablelen,
- struct ibm_pa_feature *fp,
+ struct ibm_feature *fp,
unsigned long ft_size)
{
unsigned long i, len, bit;
@@ -187,48 +224,47 @@ static void __init scan_features(unsigned long node, const unsigned char *ftrs,
if (fp->pabyte >= ftrs[0])
continue;
bit = (ftrs[2 + fp->pabyte] >> (7 - fp->pabit)) & 1;
- if (bit ^ fp->invert) {
+ if (bit && !fp->clear) {
cur_cpu_spec->cpu_features |= fp->cpu_features;
cur_cpu_spec->cpu_user_features |= fp->cpu_user_ftrs;
+ cur_cpu_spec->cpu_user_features2 |= fp->cpu_user_ftrs2;
cur_cpu_spec->mmu_features |= fp->mmu_features;
- } else {
+ } else if (bit == fp->clear) {
cur_cpu_spec->cpu_features &= ~fp->cpu_features;
cur_cpu_spec->cpu_user_features &= ~fp->cpu_user_ftrs;
+ cur_cpu_spec->cpu_user_features2 &= ~fp->cpu_user_ftrs2;
cur_cpu_spec->mmu_features &= ~fp->mmu_features;
}
}
}
-static void __init check_cpu_pa_features(unsigned long node)
+static void __init check_cpu_features(unsigned long node, char *name,
+ struct ibm_feature *fp,
+ unsigned long size)
{
const unsigned char *pa_ftrs;
int tablelen;
- pa_ftrs = of_get_flat_dt_prop(node, "ibm,pa-features", &tablelen);
+ pa_ftrs = of_get_flat_dt_prop(node, name, &tablelen);
if (pa_ftrs == NULL)
return;
- scan_features(node, pa_ftrs, tablelen,
- ibm_pa_features, ARRAY_SIZE(ibm_pa_features));
+ scan_features(node, pa_ftrs, tablelen, fp, size);
}
-#ifdef CONFIG_PPC_STD_MMU_64
-static void __init check_cpu_slb_size(unsigned long node)
+#ifdef CONFIG_PPC_64S_HASH_MMU
+static void __init init_mmu_slb_size(unsigned long node)
{
const __be32 *slb_size_ptr;
- slb_size_ptr = of_get_flat_dt_prop(node, "slb-size", NULL);
- if (slb_size_ptr != NULL) {
- mmu_slb_size = be32_to_cpup(slb_size_ptr);
- return;
- }
- slb_size_ptr = of_get_flat_dt_prop(node, "ibm,slb-size", NULL);
- if (slb_size_ptr != NULL) {
+ slb_size_ptr = of_get_flat_dt_prop(node, "slb-size", NULL) ? :
+ of_get_flat_dt_prop(node, "ibm,slb-size", NULL);
+
+ if (slb_size_ptr)
mmu_slb_size = be32_to_cpup(slb_size_ptr);
- }
}
#else
-#define check_cpu_slb_size(node) do { } while(0)
+#define init_mmu_slb_size(node) do { } while(0)
#endif
static struct feature_property {
@@ -253,7 +289,7 @@ static struct feature_property {
};
#if defined(CONFIG_44x) && defined(CONFIG_PPC_FPU)
-static inline void identical_pvr_fixup(unsigned long node)
+static __init void identical_pvr_fixup(unsigned long node)
{
unsigned int pvr;
const char *model = of_get_flat_dt_prop(node, "model", NULL);
@@ -277,11 +313,11 @@ static inline void identical_pvr_fixup(unsigned long node)
static void __init check_cpu_feature_properties(unsigned long node)
{
- unsigned long i;
+ int i;
struct feature_property *fp = feature_properties;
const __be32 *prop;
- for (i = 0; i < ARRAY_SIZE(feature_properties); ++i, ++fp) {
+ for (i = 0; i < (int)ARRAY_SIZE(feature_properties); ++i, ++fp) {
prop = of_get_flat_dt_prop(node, fp->name, NULL);
if (prop && be32_to_cpup(prop) >= fp->min_value) {
cur_cpu_spec->cpu_features |= fp->cpu_feature;
@@ -295,6 +331,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
void *data)
{
const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+ const __be32 *cpu_version = NULL;
const __be32 *prop;
const __be32 *intserv;
int i, nthreads;
@@ -306,6 +343,9 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
if (type == NULL || strcmp(type, "cpu") != 0)
return 0;
+ if (IS_ENABLED(CONFIG_PPC64))
+ boot_cpu_node_count++;
+
/* Get physical cpuid */
intserv = of_get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s", &len);
if (!intserv)
@@ -318,25 +358,10 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
* NOTE: This must match the parsing done in smp_setup_cpu_maps.
*/
for (i = 0; i < nthreads; i++) {
- /*
- * version 2 of the kexec param format adds the phys cpuid of
- * booted proc.
- */
- if (fdt_version(initial_boot_params) >= 2) {
- if (be32_to_cpu(intserv[i]) ==
- fdt_boot_cpuid_phys(initial_boot_params)) {
- found = boot_cpu_count;
- found_thread = i;
- }
- } else {
- /*
- * Check if it's the boot-cpu, set it's hw index now,
- * unfortunately this format did not support booting
- * off secondary threads.
- */
- if (of_get_flat_dt_prop(node,
- "linux,boot-cpu", NULL) != NULL)
- found = boot_cpu_count;
+ if (be32_to_cpu(intserv[i]) ==
+ fdt_boot_cpuid_phys(initial_boot_params)) {
+ found = boot_cpu_count;
+ found_thread = i;
}
#ifdef CONFIG_SMP
/* logical cpu id is always 0 on UP kernels */
@@ -348,10 +373,30 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
if (found < 0)
return 0;
- DBG("boot cpu: logical %d physical %d\n", found,
- be32_to_cpu(intserv[found_thread]));
boot_cpuid = found;
- set_hard_smp_processor_id(found, be32_to_cpu(intserv[found_thread]));
+
+ if (IS_ENABLED(CONFIG_PPC64))
+ boot_cpu_hwid = be32_to_cpu(intserv[found_thread]);
+
+ if (nr_cpu_ids % nthreads != 0) {
+ set_nr_cpu_ids(ALIGN(nr_cpu_ids, nthreads));
+ pr_warn("nr_cpu_ids was not a multiple of threads_per_core, adjusted to %d\n",
+ nr_cpu_ids);
+ }
+
+ if (boot_cpuid >= nr_cpu_ids) {
+ // Remember boot core for smp_setup_cpu_maps()
+ boot_core_hwid = be32_to_cpu(intserv[0]);
+
+ pr_warn("Boot CPU %d (core hwid %d) >= nr_cpu_ids, adjusted boot CPU to %d\n",
+ boot_cpuid, boot_core_hwid, found_thread);
+
+ // Adjust boot CPU to appear on logical core 0
+ boot_cpuid = found_thread;
+ }
+
+ DBG("boot cpu: logical %d physical %d\n", boot_cpuid,
+ be32_to_cpu(intserv[found_thread]));
/*
* PAPR defines "logical" PVR values for cpus that
@@ -366,33 +411,53 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
* A POWER6 partition in "POWER6 architected" mode
* uses the 0x0f000002 PVR value; in POWER5+ mode
* it uses 0x0f000001.
+ *
+ * If we're using device tree CPU feature discovery then we don't
+ * support the cpu-version property, and it's the responsibility of the
+ * firmware/hypervisor to provide the correct feature set for the
+ * architecture level via the ibm,powerpc-cpu-features binding.
*/
- prop = of_get_flat_dt_prop(node, "cpu-version", NULL);
- if (prop && (be32_to_cpup(prop) & 0xff000000) == 0x0f000000)
- identify_cpu(0, be32_to_cpup(prop));
+ if (!dt_cpu_ftrs_in_use()) {
+ prop = of_get_flat_dt_prop(node, "cpu-version", NULL);
+ if (prop && (be32_to_cpup(prop) & 0xff000000) == 0x0f000000) {
+ identify_cpu(0, be32_to_cpup(prop));
+ cpu_version = prop;
+ }
+
+ check_cpu_feature_properties(node);
+ check_cpu_features(node, "ibm,pa-features", ibm_pa_features,
+ ARRAY_SIZE(ibm_pa_features));
+ check_cpu_features(node, "ibm,pi-features", ibm_pi_features,
+ ARRAY_SIZE(ibm_pi_features));
+ }
identical_pvr_fixup(node);
- check_cpu_feature_properties(node);
- check_cpu_pa_features(node);
- check_cpu_slb_size(node);
+ // We can now add the CPU name & PVR to the hardware description
+ seq_buf_printf(&ppc_hw_desc, "%s 0x%04lx ", cur_cpu_spec->cpu_name, mfspr(SPRN_PVR));
+ if (cpu_version)
+ seq_buf_printf(&ppc_hw_desc, "0x%04x ", be32_to_cpup(cpu_version));
+
+ init_mmu_slb_size(node);
#ifdef CONFIG_PPC64
- if (nthreads > 1)
- cur_cpu_spec->cpu_features |= CPU_FTR_SMT;
- else
+ if (nthreads == 1)
cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT;
+ else if (!dt_cpu_ftrs_in_use())
+ cur_cpu_spec->cpu_features |= CPU_FTR_SMT;
#endif
+
return 0;
}
-int __init early_init_dt_scan_chosen_ppc(unsigned long node, const char *uname,
- int depth, void *data)
+static int __init early_init_dt_scan_chosen_ppc(unsigned long node,
+ const char *uname,
+ int depth, void *data)
{
const unsigned long *lprop; /* All these set by kernel, so no need to convert endian */
/* Use common scan routine to determine if this is the chosen node */
- if (early_init_dt_scan_chosen(node, uname, depth, data) == 0)
+ if (early_init_dt_scan_chosen(data) < 0)
return 0;
#ifdef CONFIG_PPC64
@@ -417,7 +482,7 @@ int __init early_init_dt_scan_chosen_ppc(unsigned long node, const char *uname,
tce_alloc_end = *lprop;
#endif
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_CRASH_RESERVE
lprop = of_get_flat_dt_prop(node, "linux,crashkernel-base", NULL);
if (lprop)
crashk_res.start = *lprop;
@@ -431,95 +496,109 @@ int __init early_init_dt_scan_chosen_ppc(unsigned long node, const char *uname,
return 1;
}
+/*
+ * Compare the range against max mem limit and update
+ * size if it cross the limit.
+ */
+
+#ifdef CONFIG_SPARSEMEM
+static bool __init validate_mem_limit(u64 base, u64 *size)
+{
+ u64 max_mem = 1UL << (MAX_PHYSMEM_BITS);
+
+ if (base >= max_mem)
+ return false;
+ if ((base + *size) > max_mem)
+ *size = max_mem - base;
+ return true;
+}
+#else
+static bool __init validate_mem_limit(u64 base, u64 *size)
+{
+ return true;
+}
+#endif
+
#ifdef CONFIG_PPC_PSERIES
/*
- * Interpret the ibm,dynamic-memory property in the
- * /ibm,dynamic-reconfiguration-memory node.
+ * Interpret the ibm dynamic reconfiguration memory LMBs.
* This contains a list of memory blocks along with NUMA affinity
* information.
*/
-static int __init early_init_dt_scan_drconf_memory(unsigned long node)
+static int __init early_init_drmem_lmb(struct drmem_lmb *lmb,
+ const __be32 **usm,
+ void *data)
{
- const __be32 *dm, *ls, *usm;
- int l;
- unsigned long n, flags;
- u64 base, size, memblock_size;
- unsigned int is_kexec_kdump = 0, rngs;
-
- ls = of_get_flat_dt_prop(node, "ibm,lmb-size", &l);
- if (ls == NULL || l < dt_root_size_cells * sizeof(__be32))
- return 0;
- memblock_size = dt_mem_next_cell(dt_root_size_cells, &ls);
+ u64 base, size;
+ int is_kexec_kdump = 0, rngs;
- dm = of_get_flat_dt_prop(node, "ibm,dynamic-memory", &l);
- if (dm == NULL || l < sizeof(__be32))
- return 0;
+ base = lmb->base_addr;
+ size = drmem_lmb_size();
+ rngs = 1;
- n = of_read_number(dm++, 1); /* number of entries */
- if (l < (n * (dt_root_addr_cells + 4) + 1) * sizeof(__be32))
+ /*
+ * Skip this block if the reserved bit is set in flags
+ * or if the block is not assigned to this partition.
+ */
+ if ((lmb->flags & DRCONF_MEM_RESERVED) ||
+ !(lmb->flags & DRCONF_MEM_ASSIGNED))
return 0;
- /* check if this is a kexec/kdump kernel. */
- usm = of_get_flat_dt_prop(node, "linux,drconf-usable-memory",
- &l);
- if (usm != NULL)
+ if (*usm)
is_kexec_kdump = 1;
- for (; n != 0; --n) {
- base = dt_mem_next_cell(dt_root_addr_cells, &dm);
- flags = of_read_number(&dm[3], 1);
- /* skip DRC index, pad, assoc. list index, flags */
- dm += 4;
- /* skip this block if the reserved bit is set in flags (0x80)
- or if the block is not assigned to this partition (0x8) */
- if ((flags & 0x80) || !(flags & 0x8))
- continue;
- size = memblock_size;
- rngs = 1;
+ if (is_kexec_kdump) {
+ /*
+ * For each memblock in ibm,dynamic-memory, a
+ * corresponding entry in linux,drconf-usable-memory
+ * property contains a counter 'p' followed by 'p'
+ * (base, size) duple. Now read the counter from
+ * linux,drconf-usable-memory property
+ */
+ rngs = dt_mem_next_cell(dt_root_size_cells, usm);
+ if (!rngs) /* there are no (base, size) duple */
+ return 0;
+ }
+
+ do {
if (is_kexec_kdump) {
- /*
- * For each memblock in ibm,dynamic-memory, a corresponding
- * entry in linux,drconf-usable-memory property contains
- * a counter 'p' followed by 'p' (base, size) duple.
- * Now read the counter from
- * linux,drconf-usable-memory property
- */
- rngs = dt_mem_next_cell(dt_root_size_cells, &usm);
- if (!rngs) /* there are no (base, size) duple */
+ base = dt_mem_next_cell(dt_root_addr_cells, usm);
+ size = dt_mem_next_cell(dt_root_size_cells, usm);
+ }
+
+ if (iommu_is_off) {
+ if (base >= 0x80000000ul)
continue;
+ if ((base + size) > 0x80000000ul)
+ size = 0x80000000ul - base;
}
- do {
- if (is_kexec_kdump) {
- base = dt_mem_next_cell(dt_root_addr_cells,
- &usm);
- size = dt_mem_next_cell(dt_root_size_cells,
- &usm);
- }
- if (iommu_is_off) {
- if (base >= 0x80000000ul)
- continue;
- if ((base + size) > 0x80000000ul)
- size = 0x80000000ul - base;
- }
- memblock_add(base, size);
- } while (--rngs);
- }
- memblock_dump_all();
+
+ if (!validate_mem_limit(base, &size))
+ continue;
+
+ DBG("Adding: %llx -> %llx\n", base, size);
+ memblock_add(base, size);
+
+ if (lmb->flags & DRCONF_MEM_HOTREMOVABLE)
+ memblock_mark_hotplug(base, size);
+ } while (--rngs);
+
return 0;
}
-#else
-#define early_init_dt_scan_drconf_memory(node) 0
#endif /* CONFIG_PPC_PSERIES */
-static int __init early_init_dt_scan_memory_ppc(unsigned long node,
- const char *uname,
- int depth, void *data)
+static int __init early_init_dt_scan_memory_ppc(void)
{
- if (depth == 1 &&
- strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0)
- return early_init_dt_scan_drconf_memory(node);
-
- return early_init_dt_scan_memory(node, uname, depth, data);
+#ifdef CONFIG_PPC_PSERIES
+ const void *fdt = initial_boot_params;
+ int node = fdt_path_offset(fdt, "/ibm,dynamic-reconfiguration-memory");
+
+ if (node > 0)
+ walk_drmem_lmbs_early(node, NULL, early_init_drmem_lmb);
+
+#endif
+
+ return early_init_dt_scan_memory();
}
/*
@@ -556,8 +635,10 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size)
}
/* Add the chunk to the MEMBLOCK list */
- if (add_mem_to_memblock)
- memblock_add(base, size);
+ if (add_mem_to_memblock) {
+ if (validate_mem_limit(base, &size))
+ memblock_add(base, size);
+ }
}
static void __init early_reserve_mem_dt(void)
@@ -566,6 +647,7 @@ static void __init early_reserve_mem_dt(void)
int len;
const __be32 *prop;
+ early_init_fdt_reserve_self();
early_init_fdt_scan_reserved_mem();
dt_root = of_get_flat_dt_root();
@@ -605,13 +687,15 @@ static void __init early_reserve_mem(void)
#ifdef CONFIG_BLK_DEV_INITRD
/* Then reserve the initrd, if any */
if (initrd_start && (initrd_end > initrd_start)) {
- memblock_reserve(_ALIGN_DOWN(__pa(initrd_start), PAGE_SIZE),
- _ALIGN_UP(initrd_end, PAGE_SIZE) -
- _ALIGN_DOWN(initrd_start, PAGE_SIZE));
+ memblock_reserve(ALIGN_DOWN(__pa(initrd_start), PAGE_SIZE),
+ ALIGN(initrd_end, PAGE_SIZE) -
+ ALIGN_DOWN(initrd_start, PAGE_SIZE));
}
#endif /* CONFIG_BLK_DEV_INITRD */
-#ifdef CONFIG_PPC32
+ if (!IS_ENABLED(CONFIG_PPC32))
+ return;
+
/*
* Handle the case where we might be booting from an old kexec
* image that setup the mem_rsvmap as pairs of 32-bit values
@@ -632,17 +716,85 @@ static void __init early_reserve_mem(void)
}
return;
}
-#endif
}
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+static bool tm_disabled __initdata;
+
+static int __init parse_ppc_tm(char *str)
+{
+ bool res;
+
+ if (kstrtobool(str, &res))
+ return -EINVAL;
+
+ tm_disabled = !res;
+
+ return 0;
+}
+early_param("ppc_tm", parse_ppc_tm);
+
+static void __init tm_init(void)
+{
+ if (tm_disabled) {
+ pr_info("Disabling hardware transactional memory (HTM)\n");
+ cur_cpu_spec->cpu_user_features2 &=
+ ~(PPC_FEATURE2_HTM_NOSC | PPC_FEATURE2_HTM);
+ cur_cpu_spec->cpu_features &= ~CPU_FTR_TM;
+ return;
+ }
+
+ pnv_tm_init();
+}
+#else
+static void tm_init(void) { }
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+
+static int __init
+early_init_dt_scan_model(unsigned long node, const char *uname,
+ int depth, void *data)
+{
+ const char *prop;
+
+ if (depth != 0)
+ return 0;
+
+ prop = of_get_flat_dt_prop(node, "model", NULL);
+ if (prop)
+ seq_buf_printf(&ppc_hw_desc, "%s ", prop);
+
+ /* break now */
+ return 1;
+}
+
+#ifdef CONFIG_PPC64
+static void __init save_fscr_to_task(void)
+{
+ /*
+ * Ensure the init_task (pid 0, aka swapper) uses the value of FSCR we
+ * have configured via the device tree features or via __init_FSCR().
+ * That value will then be propagated to pid 1 (init) and all future
+ * processes.
+ */
+ if (early_cpu_has_feature(CPU_FTR_ARCH_207S))
+ init_task.thread.fscr = mfspr(SPRN_FSCR);
+}
+#else
+static inline void save_fscr_to_task(void) {}
+#endif
+
+
void __init early_init_devtree(void *params)
{
- phys_addr_t limit;
+ phys_addr_t int_vector_size;
- DBG(" -> early_init_devtree(%p)\n", params);
+ DBG(" -> early_init_devtree(%px)\n", params);
- /* Setup flat device-tree pointer */
- initial_boot_params = params;
+ /* Too early to BUG_ON(), do it by hand */
+ if (!early_init_dt_verify(params, __pa(params)))
+ panic("BUG: Failed verifying flat device tree, bad version?");
+
+ of_scan_flat_dt(early_init_dt_scan_model, NULL);
#ifdef CONFIG_PPC_RTAS
/* Some machines might need RTAS info for debugging, grab it now. */
@@ -652,9 +804,12 @@ void __init early_init_devtree(void *params)
#ifdef CONFIG_PPC_POWERNV
/* Some machines might need OPAL info for debugging, grab it now. */
of_scan_flat_dt(early_init_dt_scan_opal, NULL);
+
+ /* Scan tree for ultravisor feature */
+ of_scan_flat_dt(early_init_dt_scan_ultravisor, NULL);
#endif
-#ifdef CONFIG_FA_DUMP
+#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP)
/* scan tree to see if dump is active during last boot */
of_scan_flat_dt(early_init_dt_scan_fw_dump, NULL);
#endif
@@ -663,14 +818,22 @@ void __init early_init_devtree(void *params)
* device-tree, including the platform type, initrd location and
* size, TCE reserve, and more ...
*/
- of_scan_flat_dt(early_init_dt_scan_chosen_ppc, cmd_line);
+ of_scan_flat_dt(early_init_dt_scan_chosen_ppc, boot_command_line);
+
+ /* Append additional parameters passed for fadump capture kernel */
+ fadump_append_bootargs();
/* Scan memory nodes and rebuild MEMBLOCKs */
- of_scan_flat_dt(early_init_dt_scan_root, NULL);
- of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL);
+ early_init_dt_scan_root();
+ early_init_dt_scan_memory_ppc();
- /* Save command line for /proc/cmdline and then parse parameters */
- strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE);
+ /*
+ * As generic code authors expect to be able to use static keys
+ * in early_param() handlers, we initialize the static keys just
+ * before parsing early params (it's fine to call jump_label_init()
+ * more than once).
+ */
+ jump_label_init();
parse_early_param();
/* make sure we've parsed cmdline for mem= before this */
@@ -678,50 +841,64 @@ void __init early_init_devtree(void *params)
first_memblock_size = min_t(u64, first_memblock_size, memory_limit);
setup_initial_memory_limit(memstart_addr, first_memblock_size);
/* Reserve MEMBLOCK regions used by kernel, initrd, dt, etc... */
- memblock_reserve(PHYSICAL_START, __pa(klimit) - PHYSICAL_START);
+ memblock_reserve(PHYSICAL_START, __pa(_end) - PHYSICAL_START);
+#ifdef CONFIG_PPC64
+ /* If relocatable, reserve at least 32k for interrupt vectors etc. */
+ int_vector_size = __end_interrupts - _stext;
+ int_vector_size = max_t(phys_addr_t, SZ_32K, int_vector_size);
+#else
/* If relocatable, reserve first 32k for interrupt vectors etc. */
+ int_vector_size = SZ_32K;
+#endif
if (PHYSICAL_START > MEMORY_START)
- memblock_reserve(MEMORY_START, 0x8000);
+ memblock_reserve(MEMORY_START, int_vector_size);
reserve_kdump_trampoline();
-#ifdef CONFIG_FA_DUMP
+#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP)
/*
* If we fail to reserve memory for firmware-assisted dump then
* fallback to kexec based kdump.
*/
if (fadump_reserve_mem() == 0)
#endif
- reserve_crashkernel();
+ arch_reserve_crashkernel();
early_reserve_mem();
- /*
- * Ensure that total memory size is page-aligned, because otherwise
- * mark_bootmem() gets upset.
- */
- limit = ALIGN(memory_limit ?: memblock_phys_mem_size(), PAGE_SIZE);
- memblock_enforce_memory_limit(limit);
+ if (memory_limit > memblock_phys_mem_size())
+ memory_limit = 0;
+
+ /* Align down to 16 MB which is large page size with hash page translation */
+ memory_limit = ALIGN_DOWN(memory_limit ?: memblock_phys_mem_size(), SZ_16M);
+ memblock_enforce_memory_limit(memory_limit);
+
+#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_4K_PAGES)
+ if (!early_radix_enabled())
+ memblock_cap_memory_range(0, 1UL << (H_MAX_PHYSMEM_BITS));
+#endif
memblock_allow_resize();
memblock_dump_all();
- DBG("Phys. mem: %llx\n", memblock_phys_mem_size());
+ DBG("Phys. mem: %llx\n", (unsigned long long)memblock_phys_mem_size());
/* We may need to relocate the flat tree, do it now.
* FIXME .. and the initrd too? */
move_device_tree();
- allocate_pacas();
-
DBG("Scanning CPUs ...\n");
+ dt_cpu_ftrs_scan();
+
/* Retrieve CPU related informations from the flat tree
* (altivec support, boot CPU ID, ...)
*/
of_scan_flat_dt(early_init_dt_scan_cpus, NULL);
if (boot_cpuid < 0) {
- printk("Failed to indentify boot CPU !\n");
+ printk("Failed to identify boot CPU !\n");
BUG();
}
+ save_fscr_to_task();
+
#if defined(CONFIG_SMP) && defined(CONFIG_PPC64)
/* We'll later wait for secondaries to check in; there are
* NCPUS-1 non-boot CPUs :-)
@@ -729,10 +906,35 @@ void __init early_init_devtree(void *params)
spinning_secondaries = boot_cpu_count - 1;
#endif
+ mmu_early_init_devtree();
+
+ /* Setup param area for passing additional parameters to fadump capture kernel. */
+ fadump_setup_param_area();
+
#ifdef CONFIG_PPC_POWERNV
/* Scan and build the list of machine check recoverable ranges */
of_scan_flat_dt(early_init_dt_scan_recoverable_ranges, NULL);
#endif
+ epapr_paravirt_early_init();
+
+ /* Now try to figure out if we are running on LPAR and so on */
+ pseries_probe_fw_features();
+
+ /*
+ * Initialize pkey features and default AMR/IAMR values
+ */
+ pkey_early_init_devtree();
+
+#ifdef CONFIG_PPC_PS3
+ /* Identify PS3 firmware */
+ if (of_flat_dt_is_compatible(of_get_flat_dt_root(), "sony,ps3"))
+ powerpc_firmware_features |= FW_FEATURE_PS3_POSSIBLE;
+#endif
+
+ /* If kexec left a PLPKS password in the DT, get it and clear it */
+ plpks_early_init_devtree();
+
+ tm_init();
DBG(" <- early_init_devtree()\n");
}
@@ -752,8 +954,8 @@ void __init early_get_first_memblock_info(void *params, phys_addr_t *size)
* mess the memblock.
*/
add_mem_to_memblock = 0;
- of_scan_flat_dt(early_init_dt_scan_root, NULL);
- of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL);
+ early_init_dt_scan_root();
+ early_init_dt_scan_memory_ppc();
add_mem_to_memblock = 1;
if (size)
@@ -783,20 +985,23 @@ void __init early_get_first_memblock_info(void *params, phys_addr_t *size)
int of_get_ibm_chip_id(struct device_node *np)
{
of_node_get(np);
- while(np) {
- struct device_node *old = np;
- const __be32 *prop;
+ while (np) {
+ u32 chip_id;
- prop = of_get_property(np, "ibm,chip-id", NULL);
- if (prop) {
+ /*
+ * Skiboot may produce memory nodes that contain more than one
+ * cell in chip-id, we only read the first one here.
+ */
+ if (!of_property_read_u32(np, "ibm,chip-id", &chip_id)) {
of_node_put(np);
- return be32_to_cpup(prop);
+ return chip_id;
}
- np = of_get_parent(np);
- of_node_put(old);
+
+ np = of_get_next_parent(np);
}
return -1;
}
+EXPORT_SYMBOL(of_get_ibm_chip_id);
/**
* cpu_to_chip_id - Return the cpus chip-id
@@ -808,17 +1013,36 @@ int of_get_ibm_chip_id(struct device_node *np)
int cpu_to_chip_id(int cpu)
{
struct device_node *np;
+ int ret = -1, idx;
+
+ idx = cpu / threads_per_core;
+ if (chip_id_lookup_table && chip_id_lookup_table[idx] != -1)
+ return chip_id_lookup_table[idx];
np = of_get_cpu_node(cpu, NULL);
- if (!np)
- return -1;
+ if (np) {
+ ret = of_get_ibm_chip_id(np);
+ of_node_put(np);
- of_node_put(np);
- return of_get_ibm_chip_id(np);
+ if (chip_id_lookup_table)
+ chip_id_lookup_table[idx] = ret;
+ }
+
+ return ret;
}
EXPORT_SYMBOL(cpu_to_chip_id);
bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
{
+#ifdef CONFIG_SMP
+ /*
+ * Early firmware scanning must use this rather than
+ * get_hard_smp_processor_id because we don't have pacas allocated
+ * until memory topology is discovered.
+ */
+ if (cpu_to_phys_id != NULL)
+ return (int)phys_id == cpu_to_phys_id[cpu];
+#endif
+
return (int)phys_id == get_hard_smp_processor_id(cpu);
}
diff --git a/arch/powerpc/kernel/prom_entry_64.S b/arch/powerpc/kernel/prom_entry_64.S
new file mode 100644
index 000000000000..f1b8793d28c6
--- /dev/null
+++ b/arch/powerpc/kernel/prom_entry_64.S
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * PowerPC version
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
+ * Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
+ * Adapted for Power Macintosh by Paul Mackerras.
+ * Low-level exception handlers and MMU support
+ * rewritten by Paul Mackerras.
+ * Copyright (C) 1996 Paul Mackerras.
+ * MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek@jlc.net).
+ *
+ * This file contains the 64-bit prom entry code.
+ */
+#include <asm/asm-offsets.h>
+#ifdef CONFIG_PPC_BOOK3S
+#include <asm/exception-64s.h>
+#else
+#include <asm/exception-64e.h>
+#endif
+#include <asm/ppc_asm.h>
+
+.section ".text","ax",@progbits
+
+_GLOBAL(enter_prom)
+ mflr r0
+ std r0,16(r1)
+ stdu r1,-SWITCH_FRAME_SIZE(r1) /* Save SP and create stack space */
+
+ /* Because PROM is running in 32b mode, it clobbers the high order half
+ * of all registers that it saves. We therefore save those registers
+ * PROM might touch to the stack. (r0, r3-r13 are caller saved)
+ */
+ SAVE_GPR(2, r1)
+ SAVE_GPR(13, r1)
+ SAVE_NVGPRS(r1)
+ mfcr r10
+ mfmsr r11
+ std r10,_CCR(r1)
+ std r11,_MSR(r1)
+
+ /* Put PROM address in SRR0 */
+ mtsrr0 r4
+
+ /* Setup our trampoline return addr in LR */
+ bcl 20,31,$+4
+0: mflr r4
+ addi r4,r4,(1f - 0b)
+ mtlr r4
+
+ /* Prepare a 32-bit mode big endian MSR
+ */
+#ifdef CONFIG_PPC_BOOK3E_64
+ rlwinm r11,r11,0,1,31
+ mtsrr1 r11
+ rfi
+#else /* CONFIG_PPC_BOOK3E_64 */
+ LOAD_REG_IMMEDIATE(r12, MSR_SF | MSR_LE)
+ andc r11,r11,r12
+ mtsrr1 r11
+ RFI_TO_KERNEL
+#endif /* CONFIG_PPC_BOOK3E_64 */
+
+1: /* Return from OF */
+ FIXUP_ENDIAN
+
+ /* Just make sure that r1 top 32 bits didn't get
+ * corrupt by OF
+ */
+ rldicl r1,r1,0,32
+
+ /* Restore the MSR (back to 64 bits) */
+ ld r0,_MSR(r1)
+ MTMSRD(r0)
+ isync
+
+ /* Restore other registers */
+ REST_GPR(2, r1)
+ REST_GPR(13, r1)
+ REST_NVGPRS(r1)
+ ld r4,_CCR(r1)
+ mtcr r4
+
+ addi r1,r1,SWITCH_FRAME_SIZE
+ ld r0,16(r1)
+ mtlr r0
+ blr
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 1a85d8f96739..827c958677f8 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Procedures for interfacing to Open Firmware.
*
@@ -6,16 +7,14 @@
*
* Adapted for 64bit PowerPC by Dave Engebretsen and Peter Bergner.
* {engebret|bergner}@us.ibm.com
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#undef DEBUG_PROM
-#include <stdarg.h>
+/* we cannot use FORTIFY as it brings in new symbols */
+#define __NO_FORTIFY
+
+#include <linux/stdarg.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/init.h>
@@ -24,28 +23,34 @@
#include <linux/types.h>
#include <linux/pci.h>
#include <linux/proc_fs.h>
-#include <linux/stringify.h>
#include <linux/delay.h>
#include <linux/initrd.h>
#include <linux/bitops.h>
+#include <linux/pgtable.h>
+#include <linux/printk.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
#include <asm/prom.h>
#include <asm/rtas.h>
#include <asm/page.h>
#include <asm/processor.h>
+#include <asm/interrupt.h>
#include <asm/irq.h>
#include <asm/io.h>
#include <asm/smp.h>
#include <asm/mmu.h>
-#include <asm/pgtable.h>
-#include <asm/pci.h>
#include <asm/iommu.h>
#include <asm/btext.h>
#include <asm/sections.h>
-#include <asm/machdep.h>
-#include <asm/opal.h>
+#include <asm/setup.h>
+#include <asm/asm-prototypes.h>
+#include <asm/ultravisor-api.h>
#include <linux/linux_logo.h>
+/* All of prom_init bss lives here */
+#define __prombss __section(".bss.prominit")
+
/*
* Eventually bump that one up
*/
@@ -85,22 +90,16 @@
#define OF_WORKAROUNDS 0
#else
#define OF_WORKAROUNDS of_workarounds
-int of_workarounds;
+static int of_workarounds __prombss;
#endif
#define OF_WA_CLAIM 1 /* do phys/virt claim separately, then map */
#define OF_WA_LONGTRAIL 2 /* work around longtrail bugs */
-#define PROM_BUG() do { \
- prom_printf("kernel BUG at %s line 0x%x!\n", \
- __FILE__, __LINE__); \
- __asm__ __volatile__(".long " BUG_ILLEGAL_INSTR); \
-} while (0)
-
#ifdef DEBUG_PROM
#define prom_debug(x...) prom_printf(x)
#else
-#define prom_debug(x...)
+#define prom_debug(x...) do { } while (0)
#endif
@@ -146,28 +145,43 @@ extern void copy_and_flush(unsigned long dest, unsigned long src,
unsigned long size, unsigned long offset);
/* prom structure */
-static struct prom_t __initdata prom;
-
-static unsigned long prom_entry __initdata;
+static struct prom_t __prombss prom;
-#define PROM_SCRATCH_SIZE 256
+static unsigned long __prombss prom_entry;
-static char __initdata of_stdout_device[256];
-static char __initdata prom_scratch[PROM_SCRATCH_SIZE];
+static char __prombss of_stdout_device[256];
+static char __prombss prom_scratch[256];
-static unsigned long __initdata dt_header_start;
-static unsigned long __initdata dt_struct_start, dt_struct_end;
-static unsigned long __initdata dt_string_start, dt_string_end;
+static unsigned long __prombss dt_header_start;
+static unsigned long __prombss dt_struct_start, dt_struct_end;
+static unsigned long __prombss dt_string_start, dt_string_end;
-static unsigned long __initdata prom_initrd_start, prom_initrd_end;
+static unsigned long __prombss prom_initrd_start, prom_initrd_end;
#ifdef CONFIG_PPC64
-static int __initdata prom_iommu_force_on;
-static int __initdata prom_iommu_off;
-static unsigned long __initdata prom_tce_alloc_start;
-static unsigned long __initdata prom_tce_alloc_end;
+static int __prombss prom_iommu_force_on;
+static int __prombss prom_iommu_off;
+static unsigned long __prombss prom_tce_alloc_start;
+static unsigned long __prombss prom_tce_alloc_end;
+#endif
+
+#ifdef CONFIG_PPC_PSERIES
+static bool __prombss prom_radix_disable;
+static bool __prombss prom_radix_gtse_disable;
+static bool __prombss prom_xive_disable;
+#endif
+
+#ifdef CONFIG_PPC_SVM
+static bool __prombss prom_svm_enable;
#endif
+struct platform_support {
+ bool hash_mmu;
+ bool radix_mmu;
+ bool radix_gtse;
+ bool xive;
+};
+
/* Platforms codes are now obsolete in the kernel. Now only used within this
* file and ultimately gone too. Feel free to change them if you need, they
* are not shared with anything outside of this file anymore
@@ -177,26 +191,25 @@ static unsigned long __initdata prom_tce_alloc_end;
#define PLATFORM_LPAR 0x0001
#define PLATFORM_POWERMAC 0x0400
#define PLATFORM_GENERIC 0x0500
-#define PLATFORM_OPAL 0x0600
-static int __initdata of_platform;
+static int __prombss of_platform;
-static char __initdata prom_cmd_line[COMMAND_LINE_SIZE];
+static char __prombss prom_cmd_line[COMMAND_LINE_SIZE];
-static unsigned long __initdata prom_memory_limit;
+static unsigned long __prombss prom_memory_limit;
-static unsigned long __initdata alloc_top;
-static unsigned long __initdata alloc_top_high;
-static unsigned long __initdata alloc_bottom;
-static unsigned long __initdata rmo_top;
-static unsigned long __initdata ram_top;
+static unsigned long __prombss alloc_top;
+static unsigned long __prombss alloc_top_high;
+static unsigned long __prombss alloc_bottom;
+static unsigned long __prombss rmo_top;
+static unsigned long __prombss ram_top;
-static struct mem_map_entry __initdata mem_reserve_map[MEM_RESERVE_MAP_SIZE];
-static int __initdata mem_reserve_cnt;
+static struct mem_map_entry __prombss mem_reserve_map[MEM_RESERVE_MAP_SIZE];
+static int __prombss mem_reserve_cnt;
-static cell_t __initdata regbuf[1024];
+static cell_t __prombss regbuf[1024];
-static bool rtas_has_query_cpu_stopped;
+static bool __prombss rtas_has_query_cpu_stopped;
/*
@@ -210,6 +223,162 @@ static bool rtas_has_query_cpu_stopped;
#define PHANDLE_VALID(p) ((p) != 0 && (p) != PROM_ERROR)
#define IHANDLE_VALID(i) ((i) != 0 && (i) != PROM_ERROR)
+/* Copied from lib/string.c and lib/kstrtox.c */
+
+static int __init prom_strcmp(const char *cs, const char *ct)
+{
+ unsigned char c1, c2;
+
+ while (1) {
+ c1 = *cs++;
+ c2 = *ct++;
+ if (c1 != c2)
+ return c1 < c2 ? -1 : 1;
+ if (!c1)
+ break;
+ }
+ return 0;
+}
+
+static ssize_t __init prom_strscpy_pad(char *dest, const char *src, size_t n)
+{
+ ssize_t rc;
+ size_t i;
+
+ if (n == 0 || n > INT_MAX)
+ return -E2BIG;
+
+ // Copy up to n bytes
+ for (i = 0; i < n && src[i] != '\0'; i++)
+ dest[i] = src[i];
+
+ rc = i;
+
+ // If we copied all n then we have run out of space for the nul
+ if (rc == n) {
+ // Rewind by one character to ensure nul termination
+ i--;
+ rc = -E2BIG;
+ }
+
+ for (; i < n; i++)
+ dest[i] = '\0';
+
+ return rc;
+}
+
+static int __init prom_strncmp(const char *cs, const char *ct, size_t count)
+{
+ unsigned char c1, c2;
+
+ while (count) {
+ c1 = *cs++;
+ c2 = *ct++;
+ if (c1 != c2)
+ return c1 < c2 ? -1 : 1;
+ if (!c1)
+ break;
+ count--;
+ }
+ return 0;
+}
+
+static size_t __init prom_strlen(const char *s)
+{
+ const char *sc;
+
+ for (sc = s; *sc != '\0'; ++sc)
+ /* nothing */;
+ return sc - s;
+}
+
+static int __init prom_memcmp(const void *cs, const void *ct, size_t count)
+{
+ const unsigned char *su1, *su2;
+ int res = 0;
+
+ for (su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--)
+ if ((res = *su1 - *su2) != 0)
+ break;
+ return res;
+}
+
+static char __init *prom_strstr(const char *s1, const char *s2)
+{
+ size_t l1, l2;
+
+ l2 = prom_strlen(s2);
+ if (!l2)
+ return (char *)s1;
+ l1 = prom_strlen(s1);
+ while (l1 >= l2) {
+ l1--;
+ if (!prom_memcmp(s1, s2, l2))
+ return (char *)s1;
+ s1++;
+ }
+ return NULL;
+}
+
+static size_t __init prom_strlcat(char *dest, const char *src, size_t count)
+{
+ size_t dsize = prom_strlen(dest);
+ size_t len = prom_strlen(src);
+ size_t res = dsize + len;
+
+ /* This would be a bug */
+ if (dsize >= count)
+ return count;
+
+ dest += dsize;
+ count -= dsize;
+ if (len >= count)
+ len = count-1;
+ memcpy(dest, src, len);
+ dest[len] = 0;
+ return res;
+
+}
+
+#ifdef CONFIG_PPC_PSERIES
+static int __init prom_strtobool(const char *s, bool *res)
+{
+ if (!s)
+ return -EINVAL;
+
+ switch (s[0]) {
+ case 'y':
+ case 'Y':
+ case '1':
+ *res = true;
+ return 0;
+ case 'n':
+ case 'N':
+ case '0':
+ *res = false;
+ return 0;
+ case 'o':
+ case 'O':
+ switch (s[1]) {
+ case 'n':
+ case 'N':
+ *res = true;
+ return 0;
+ case 'f':
+ case 'F':
+ *res = false;
+ return 0;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return -EINVAL;
+}
+#endif
/* This is the one and *ONLY* place where we actually call open
* firmware.
@@ -289,6 +458,10 @@ static void __init prom_print(const char *msg)
}
+/*
+ * Both prom_print_hex & prom_print_dec takes an unsigned long as input so that
+ * we do not need __udivdi3 or __umoddi3 on 32bits.
+ */
static void __init prom_print_hex(unsigned long val)
{
int i, nibbles = sizeof(val)*2;
@@ -322,12 +495,14 @@ static void __init prom_print_dec(unsigned long val)
call_prom("write", 3, 1, prom.stdout, buf+i, size);
}
+__printf(1, 2)
static void __init prom_printf(const char *format, ...)
{
const char *p, *q, *s;
va_list args;
unsigned long v;
long vs;
+ int n = 0;
va_start(args, format);
for (p = format; *p != 0; p = q) {
@@ -346,6 +521,10 @@ static void __init prom_printf(const char *format, ...)
++q;
if (*q == 0)
break;
+ while (*q == 'l') {
+ ++q;
+ ++n;
+ }
switch (*q) {
case 's':
++q;
@@ -354,42 +533,59 @@ static void __init prom_printf(const char *format, ...)
break;
case 'x':
++q;
- v = va_arg(args, unsigned long);
+ switch (n) {
+ case 0:
+ v = va_arg(args, unsigned int);
+ break;
+ case 1:
+ v = va_arg(args, unsigned long);
+ break;
+ case 2:
+ default:
+ v = va_arg(args, unsigned long long);
+ break;
+ }
prom_print_hex(v);
break;
- case 'd':
+ case 'u':
++q;
- vs = va_arg(args, int);
- if (vs < 0) {
- prom_print("-");
- vs = -vs;
+ switch (n) {
+ case 0:
+ v = va_arg(args, unsigned int);
+ break;
+ case 1:
+ v = va_arg(args, unsigned long);
+ break;
+ case 2:
+ default:
+ v = va_arg(args, unsigned long long);
+ break;
}
- prom_print_dec(vs);
+ prom_print_dec(v);
break;
- case 'l':
+ case 'd':
++q;
- if (*q == 0)
+ switch (n) {
+ case 0:
+ vs = va_arg(args, int);
break;
- else if (*q == 'x') {
- ++q;
- v = va_arg(args, unsigned long);
- prom_print_hex(v);
- } else if (*q == 'u') { /* '%lu' */
- ++q;
- v = va_arg(args, unsigned long);
- prom_print_dec(v);
- } else if (*q == 'd') { /* %ld */
- ++q;
+ case 1:
vs = va_arg(args, long);
- if (vs < 0) {
- prom_print("-");
- vs = -vs;
- }
- prom_print_dec(vs);
+ break;
+ case 2:
+ default:
+ vs = va_arg(args, long long);
+ break;
}
+ if (vs < 0) {
+ prom_print("-");
+ vs = -vs;
+ }
+ prom_print_dec(vs);
break;
}
}
+ va_end(args);
}
@@ -460,19 +656,19 @@ static int __init prom_next_node(phandle *nodep)
}
}
-static int inline prom_getprop(phandle node, const char *pname,
- void *value, size_t valuelen)
+static inline int __init prom_getprop(phandle node, const char *pname,
+ void *value, size_t valuelen)
{
return call_prom("getprop", 4, 1, node, ADDR(pname),
(u32)(unsigned long) value, (u32) valuelen);
}
-static int inline prom_getproplen(phandle node, const char *pname)
+static inline int __init prom_getproplen(phandle node, const char *pname)
{
return call_prom("getproplen", 2, 1, node, ADDR(pname));
}
-static void add_string(char **str, const char *q)
+static void __init add_string(char **str, const char *q)
{
char *p = *str;
@@ -482,10 +678,10 @@ static void add_string(char **str, const char *q)
*str = p;
}
-static char *tohex(unsigned int x)
+static char *__init tohex(unsigned int x)
{
- static char digits[] = "0123456789abcdef";
- static char result[9];
+ static const char digits[] __initconst = "0123456789abcdef";
+ static char result[9] __prombss;
int i;
result[8] = 0;
@@ -514,36 +710,35 @@ static int __init prom_setprop(phandle node, const char *nodename,
add_string(&p, tohex((u32)(unsigned long) value));
add_string(&p, tohex(valuelen));
add_string(&p, tohex(ADDR(pname)));
- add_string(&p, tohex(strlen(pname)));
+ add_string(&p, tohex(prom_strlen(pname)));
add_string(&p, "property");
*p = 0;
return call_prom("interpret", 1, 1, (u32)(unsigned long) cmd);
}
/* We can't use the standard versions because of relocation headaches. */
-#define isxdigit(c) (('0' <= (c) && (c) <= '9') \
- || ('a' <= (c) && (c) <= 'f') \
- || ('A' <= (c) && (c) <= 'F'))
+#define prom_isxdigit(c) \
+ (('0' <= (c) && (c) <= '9') || ('a' <= (c) && (c) <= 'f') || ('A' <= (c) && (c) <= 'F'))
-#define isdigit(c) ('0' <= (c) && (c) <= '9')
-#define islower(c) ('a' <= (c) && (c) <= 'z')
-#define toupper(c) (islower(c) ? ((c) - 'a' + 'A') : (c))
+#define prom_isdigit(c) ('0' <= (c) && (c) <= '9')
+#define prom_islower(c) ('a' <= (c) && (c) <= 'z')
+#define prom_toupper(c) (prom_islower(c) ? ((c) - 'a' + 'A') : (c))
-static unsigned long prom_strtoul(const char *cp, const char **endp)
+static unsigned long __init prom_strtoul(const char *cp, const char **endp)
{
unsigned long result = 0, base = 10, value;
if (*cp == '0') {
base = 8;
cp++;
- if (toupper(*cp) == 'X') {
+ if (prom_toupper(*cp) == 'X') {
cp++;
base = 16;
}
}
- while (isxdigit(*cp) &&
- (value = isdigit(*cp) ? *cp - '0' : toupper(*cp) - 'A' + 10) < base) {
+ while (prom_isxdigit(*cp) &&
+ (value = prom_isdigit(*cp) ? *cp - '0' : prom_toupper(*cp) - 'A' + 10) < base) {
result = result * base + value;
cp++;
}
@@ -554,7 +749,7 @@ static unsigned long prom_strtoul(const char *cp, const char **endp)
return result;
}
-static unsigned long prom_memparse(const char *ptr, const char **retptr)
+static unsigned long __init prom_memparse(const char *ptr, const char **retptr)
{
unsigned long ret = prom_strtoul(ptr, retptr);
int shift = 0;
@@ -594,141 +789,353 @@ static void __init early_cmdline_parse(void)
prom_cmd_line[0] = 0;
p = prom_cmd_line;
- if ((long)prom.chosen > 0)
+
+ if (!IS_ENABLED(CONFIG_CMDLINE_FORCE) && (long)prom.chosen > 0)
l = prom_getprop(prom.chosen, "bootargs", p, COMMAND_LINE_SIZE-1);
-#ifdef CONFIG_CMDLINE
- if (l <= 0 || p[0] == '\0') /* dbl check */
- strlcpy(prom_cmd_line,
- CONFIG_CMDLINE, sizeof(prom_cmd_line));
-#endif /* CONFIG_CMDLINE */
+
+ if (IS_ENABLED(CONFIG_CMDLINE_EXTEND) || l <= 0 || p[0] == '\0')
+ prom_strlcat(prom_cmd_line, " " CONFIG_CMDLINE,
+ sizeof(prom_cmd_line));
+
prom_printf("command line: %s\n", prom_cmd_line);
#ifdef CONFIG_PPC64
- opt = strstr(prom_cmd_line, "iommu=");
+ opt = prom_strstr(prom_cmd_line, "iommu=");
if (opt) {
prom_printf("iommu opt is: %s\n", opt);
opt += 6;
while (*opt && *opt == ' ')
opt++;
- if (!strncmp(opt, "off", 3))
+ if (!prom_strncmp(opt, "off", 3))
prom_iommu_off = 1;
- else if (!strncmp(opt, "force", 5))
+ else if (!prom_strncmp(opt, "force", 5))
prom_iommu_force_on = 1;
}
#endif
- opt = strstr(prom_cmd_line, "mem=");
+ opt = prom_strstr(prom_cmd_line, "mem=");
if (opt) {
opt += 4;
prom_memory_limit = prom_memparse(opt, (const char **)&opt);
#ifdef CONFIG_PPC64
- /* Align to 16 MB == size of ppc64 large page */
- prom_memory_limit = ALIGN(prom_memory_limit, 0x1000000);
+ /* Align down to 16 MB which is large page size with hash page translation */
+ prom_memory_limit = ALIGN_DOWN(prom_memory_limit, SZ_16M);
#endif
}
+
+#ifdef CONFIG_PPC_PSERIES
+ prom_radix_disable = !IS_ENABLED(CONFIG_PPC_RADIX_MMU_DEFAULT);
+ opt = prom_strstr(prom_cmd_line, "disable_radix");
+ if (opt) {
+ opt += 13;
+ if (*opt && *opt == '=') {
+ bool val;
+
+ if (prom_strtobool(++opt, &val))
+ prom_radix_disable = false;
+ else
+ prom_radix_disable = val;
+ } else
+ prom_radix_disable = true;
+ }
+ if (prom_radix_disable)
+ prom_debug("Radix disabled from cmdline\n");
+
+ opt = prom_strstr(prom_cmd_line, "radix_hcall_invalidate=on");
+ if (opt) {
+ prom_radix_gtse_disable = true;
+ prom_debug("Radix GTSE disabled from cmdline\n");
+ }
+
+ opt = prom_strstr(prom_cmd_line, "xive=off");
+ if (opt) {
+ prom_xive_disable = true;
+ prom_debug("XIVE disabled from cmdline\n");
+ }
+#endif /* CONFIG_PPC_PSERIES */
+
+#ifdef CONFIG_PPC_SVM
+ opt = prom_strstr(prom_cmd_line, "svm=");
+ if (opt) {
+ bool val;
+
+ opt += sizeof("svm=") - 1;
+ if (!prom_strtobool(opt, &val))
+ prom_svm_enable = val;
+ }
+#endif /* CONFIG_PPC_SVM */
}
-#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
+#ifdef CONFIG_PPC_PSERIES
/*
* The architecture vector has an array of PVR mask/value pairs,
* followed by # option vectors - 1, followed by the option vectors.
*
* See prom.h for the definition of the bits specified in the
* architecture vector.
- *
- * Because the description vector contains a mix of byte and word
- * values, we declare it as an unsigned char array, and use this
- * macro to put word values in.
*/
-#define W(x) ((x) >> 24) & 0xff, ((x) >> 16) & 0xff, \
- ((x) >> 8) & 0xff, (x) & 0xff
-
-unsigned char ibm_architecture_vec[] = {
- W(0xfffe0000), W(0x003a0000), /* POWER5/POWER5+ */
- W(0xffff0000), W(0x003e0000), /* POWER6 */
- W(0xffff0000), W(0x003f0000), /* POWER7 */
- W(0xffff0000), W(0x004b0000), /* POWER8E */
- W(0xffff0000), W(0x004d0000), /* POWER8 */
- W(0xffffffff), W(0x0f000004), /* all 2.07-compliant */
- W(0xffffffff), W(0x0f000003), /* all 2.06-compliant */
- W(0xffffffff), W(0x0f000002), /* all 2.05-compliant */
- W(0xfffffffe), W(0x0f000001), /* all 2.04-compliant and earlier */
- 6 - 1, /* 6 option vectors */
-
- /* option vector 1: processor architectures supported */
- 3 - 2, /* length */
- 0, /* don't ignore, don't halt */
- OV1_PPC_2_00 | OV1_PPC_2_01 | OV1_PPC_2_02 | OV1_PPC_2_03 |
- OV1_PPC_2_04 | OV1_PPC_2_05 | OV1_PPC_2_06 | OV1_PPC_2_07,
+/* Firmware expects the value to be n - 1, where n is the # of vectors */
+#define NUM_VECTORS(n) ((n) - 1)
+
+/*
+ * Firmware expects 1 + n - 2, where n is the length of the option vector in
+ * bytes. The 1 accounts for the length byte itself, the - 2 .. ?
+ */
+#define VECTOR_LENGTH(n) (1 + (n) - 2)
+
+struct option_vector1 {
+ u8 byte1;
+ u8 arch_versions;
+ u8 arch_versions3;
+} __packed;
+
+struct option_vector2 {
+ u8 byte1;
+ __be16 reserved;
+ __be32 real_base;
+ __be32 real_size;
+ __be32 virt_base;
+ __be32 virt_size;
+ __be32 load_base;
+ __be32 min_rma;
+ __be32 min_load;
+ u8 min_rma_percent;
+ u8 max_pft_size;
+} __packed;
+
+struct option_vector3 {
+ u8 byte1;
+ u8 byte2;
+} __packed;
+
+struct option_vector4 {
+ u8 byte1;
+ u8 min_vp_cap;
+} __packed;
+
+struct option_vector5 {
+ u8 byte1;
+ u8 byte2;
+ u8 byte3;
+ u8 cmo;
+ u8 associativity;
+ u8 bin_opts;
+ u8 micro_checkpoint;
+ u8 reserved0;
+ __be32 max_cpus;
+ __be16 papr_level;
+ __be16 reserved1;
+ u8 platform_facilities;
+ u8 reserved2;
+ __be16 reserved3;
+ u8 subprocessors;
+ u8 byte22;
+ u8 intarch;
+ u8 mmu;
+ u8 hash_ext;
+ u8 radix_ext;
+} __packed;
+
+struct option_vector6 {
+ u8 reserved;
+ u8 secondary_pteg;
+ u8 os_name;
+} __packed;
+
+struct option_vector7 {
+ u8 os_id[256];
+} __packed;
+
+struct ibm_arch_vec {
+ struct { __be32 mask, val; } pvrs[16];
+
+ u8 num_vectors;
+
+ u8 vec1_len;
+ struct option_vector1 vec1;
+
+ u8 vec2_len;
+ struct option_vector2 vec2;
+
+ u8 vec3_len;
+ struct option_vector3 vec3;
+
+ u8 vec4_len;
+ struct option_vector4 vec4;
+
+ u8 vec5_len;
+ struct option_vector5 vec5;
+
+ u8 vec6_len;
+ struct option_vector6 vec6;
+
+ u8 vec7_len;
+ struct option_vector7 vec7;
+} __packed;
+
+static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = {
+ .pvrs = {
+ {
+ .mask = cpu_to_be32(0xfffe0000), /* POWER5/POWER5+ */
+ .val = cpu_to_be32(0x003a0000),
+ },
+ {
+ .mask = cpu_to_be32(0xffff0000), /* POWER6 */
+ .val = cpu_to_be32(0x003e0000),
+ },
+ {
+ .mask = cpu_to_be32(0xffff0000), /* POWER7 */
+ .val = cpu_to_be32(0x003f0000),
+ },
+ {
+ .mask = cpu_to_be32(0xffff0000), /* POWER8E */
+ .val = cpu_to_be32(0x004b0000),
+ },
+ {
+ .mask = cpu_to_be32(0xffff0000), /* POWER8NVL */
+ .val = cpu_to_be32(0x004c0000),
+ },
+ {
+ .mask = cpu_to_be32(0xffff0000), /* POWER8 */
+ .val = cpu_to_be32(0x004d0000),
+ },
+ {
+ .mask = cpu_to_be32(0xffff0000), /* POWER9 */
+ .val = cpu_to_be32(0x004e0000),
+ },
+ {
+ .mask = cpu_to_be32(0xffff0000), /* POWER10 */
+ .val = cpu_to_be32(0x00800000),
+ },
+ {
+ .mask = cpu_to_be32(0xffff0000), /* POWER11 */
+ .val = cpu_to_be32(0x00820000),
+ },
+ {
+ .mask = cpu_to_be32(0xffffffff), /* P11 compliant */
+ .val = cpu_to_be32(0x0f000007),
+ },
+ {
+ .mask = cpu_to_be32(0xffffffff), /* all 3.1-compliant */
+ .val = cpu_to_be32(0x0f000006),
+ },
+ {
+ .mask = cpu_to_be32(0xffffffff), /* all 3.00-compliant */
+ .val = cpu_to_be32(0x0f000005),
+ },
+ {
+ .mask = cpu_to_be32(0xffffffff), /* all 2.07-compliant */
+ .val = cpu_to_be32(0x0f000004),
+ },
+ {
+ .mask = cpu_to_be32(0xffffffff), /* all 2.06-compliant */
+ .val = cpu_to_be32(0x0f000003),
+ },
+ {
+ .mask = cpu_to_be32(0xffffffff), /* all 2.05-compliant */
+ .val = cpu_to_be32(0x0f000002),
+ },
+ {
+ .mask = cpu_to_be32(0xfffffffe), /* all 2.04-compliant and earlier */
+ .val = cpu_to_be32(0x0f000001),
+ },
+ },
+
+ .num_vectors = NUM_VECTORS(6),
+
+ .vec1_len = VECTOR_LENGTH(sizeof(struct option_vector1)),
+ .vec1 = {
+ .byte1 = 0,
+ .arch_versions = OV1_PPC_2_00 | OV1_PPC_2_01 | OV1_PPC_2_02 | OV1_PPC_2_03 |
+ OV1_PPC_2_04 | OV1_PPC_2_05 | OV1_PPC_2_06 | OV1_PPC_2_07,
+ .arch_versions3 = OV1_PPC_3_00 | OV1_PPC_3_1,
+ },
+
+ .vec2_len = VECTOR_LENGTH(sizeof(struct option_vector2)),
/* option vector 2: Open Firmware options supported */
- 34 - 2, /* length */
- OV2_REAL_MODE,
- 0, 0,
- W(0xffffffff), /* real_base */
- W(0xffffffff), /* real_size */
- W(0xffffffff), /* virt_base */
- W(0xffffffff), /* virt_size */
- W(0xffffffff), /* load_base */
- W(256), /* 256MB min RMA */
- W(0xffffffff), /* full client load */
- 0, /* min RMA percentage of total RAM */
- 48, /* max log_2(hash table size) */
+ .vec2 = {
+ .byte1 = OV2_REAL_MODE,
+ .reserved = 0,
+ .real_base = cpu_to_be32(0xffffffff),
+ .real_size = cpu_to_be32(0xffffffff),
+ .virt_base = cpu_to_be32(0xffffffff),
+ .virt_size = cpu_to_be32(0xffffffff),
+ .load_base = cpu_to_be32(0xffffffff),
+ .min_rma = cpu_to_be32(MIN_RMA),
+ .min_load = cpu_to_be32(0xffffffff), /* full client load */
+ .min_rma_percent = 0, /* min RMA percentage of total RAM */
+ .max_pft_size = 48, /* max log_2(hash table size) */
+ },
+ .vec3_len = VECTOR_LENGTH(sizeof(struct option_vector3)),
/* option vector 3: processor options supported */
- 3 - 2, /* length */
- 0, /* don't ignore, don't halt */
- OV3_FP | OV3_VMX | OV3_DFP,
+ .vec3 = {
+ .byte1 = 0, /* don't ignore, don't halt */
+ .byte2 = OV3_FP | OV3_VMX | OV3_DFP,
+ },
+ .vec4_len = VECTOR_LENGTH(sizeof(struct option_vector4)),
/* option vector 4: IBM PAPR implementation */
- 3 - 2, /* length */
- 0, /* don't halt */
- OV4_MIN_ENT_CAP, /* minimum VP entitled capacity */
+ .vec4 = {
+ .byte1 = 0, /* don't halt */
+ .min_vp_cap = OV4_MIN_ENT_CAP, /* minimum VP entitled capacity */
+ },
+ .vec5_len = VECTOR_LENGTH(sizeof(struct option_vector5)),
/* option vector 5: PAPR/OF options */
- 19 - 2, /* length */
- 0, /* don't ignore, don't halt */
- OV5_FEAT(OV5_LPAR) | OV5_FEAT(OV5_SPLPAR) | OV5_FEAT(OV5_LARGE_PAGES) |
- OV5_FEAT(OV5_DRCONF_MEMORY) | OV5_FEAT(OV5_DONATE_DEDICATE_CPU) |
+ .vec5 = {
+ .byte1 = 0, /* don't ignore, don't halt */
+ .byte2 = OV5_FEAT(OV5_LPAR) | OV5_FEAT(OV5_SPLPAR) | OV5_FEAT(OV5_LARGE_PAGES) |
+ OV5_FEAT(OV5_DRCONF_MEMORY) | OV5_FEAT(OV5_DONATE_DEDICATE_CPU) |
#ifdef CONFIG_PCI_MSI
- /* PCIe/MSI support. Without MSI full PCIe is not supported */
- OV5_FEAT(OV5_MSI),
+ /* PCIe/MSI support. Without MSI full PCIe is not supported */
+ OV5_FEAT(OV5_MSI),
#else
- 0,
+ 0,
#endif
- 0,
+ .byte3 = 0,
+ .cmo =
#ifdef CONFIG_PPC_SMLPAR
- OV5_FEAT(OV5_CMO) | OV5_FEAT(OV5_XCMO),
+ OV5_FEAT(OV5_CMO) | OV5_FEAT(OV5_XCMO),
#else
- 0,
+ 0,
#endif
- OV5_FEAT(OV5_TYPE1_AFFINITY) | OV5_FEAT(OV5_PRRN),
- 0,
- 0,
- 0,
- /* WARNING: The offset of the "number of cores" field below
- * must match by the macro below. Update the definition if
- * the structure layout changes.
- */
-#define IBM_ARCH_VEC_NRCORES_OFFSET 125
- W(NR_CPUS), /* number of cores supported */
- 0,
- 0,
- 0,
- 0,
- OV5_FEAT(OV5_PFO_HW_RNG) | OV5_FEAT(OV5_PFO_HW_ENCR) |
- OV5_FEAT(OV5_PFO_HW_842),
- OV5_FEAT(OV5_SUB_PROCESSORS),
+ .associativity = OV5_FEAT(OV5_FORM1_AFFINITY) | OV5_FEAT(OV5_PRRN) |
+ OV5_FEAT(OV5_FORM2_AFFINITY),
+ .bin_opts = OV5_FEAT(OV5_RESIZE_HPT) | OV5_FEAT(OV5_HP_EVT),
+ .micro_checkpoint = 0,
+ .reserved0 = 0,
+ .max_cpus = cpu_to_be32(NR_CPUS), /* number of cores supported */
+ .papr_level = 0,
+ .reserved1 = 0,
+ .platform_facilities = OV5_FEAT(OV5_PFO_HW_RNG) | OV5_FEAT(OV5_PFO_HW_ENCR) | OV5_FEAT(OV5_PFO_HW_842),
+ .reserved2 = 0,
+ .reserved3 = 0,
+ .subprocessors = 1,
+ .byte22 = OV5_FEAT(OV5_DRMEM_V2) | OV5_FEAT(OV5_DRC_INFO),
+ .intarch = 0,
+ .mmu = 0,
+ .hash_ext = 0,
+ .radix_ext = 0,
+ },
+
/* option vector 6: IBM PAPR hints */
- 4 - 2, /* length */
- 0,
- 0,
- OV6_LINUX,
+ .vec6_len = VECTOR_LENGTH(sizeof(struct option_vector6)),
+ .vec6 = {
+ .reserved = 0,
+ .secondary_pteg = 0,
+ .os_name = OV6_LINUX,
+ },
+ /* option vector 7: OS Identification */
+ .vec7_len = VECTOR_LENGTH(sizeof(struct option_vector7)),
};
+static struct ibm_arch_vec __prombss ibm_architecture_vec ____cacheline_aligned;
+
/* Old method - ELF header with PT_NOTE sections only works on BE */
#ifdef __BIG_ENDIAN__
-static struct fake_elf {
+static const struct fake_elf {
Elf32_Ehdr elfhdr;
Elf32_Phdr phdr[2];
struct chrpnote {
@@ -761,7 +1168,7 @@ static struct fake_elf {
u32 ignore_me;
} rpadesc;
} rpanote;
-} fake_elf = {
+} fake_elf __initconst = {
.elfhdr = {
.e_ident = { 0x7f, 'E', 'L', 'F',
ELFCLASS32, ELFDATA2MSB, EV_CURRENT },
@@ -826,7 +1233,7 @@ static int __init prom_count_smt_threads(void)
type[0] = 0;
prom_getprop(node, "device_type", type, sizeof(type));
- if (strcmp(type, "cpu"))
+ if (prom_strcmp(type, "cpu"))
continue;
/*
* There is an entry for each smt thread, each entry being
@@ -853,13 +1260,152 @@ static int __init prom_count_smt_threads(void)
}
+static void __init prom_parse_mmu_model(u8 val,
+ struct platform_support *support)
+{
+ switch (val) {
+ case OV5_FEAT(OV5_MMU_DYNAMIC):
+ case OV5_FEAT(OV5_MMU_EITHER): /* Either Available */
+ prom_debug("MMU - either supported\n");
+ support->radix_mmu = !prom_radix_disable;
+ support->hash_mmu = true;
+ break;
+ case OV5_FEAT(OV5_MMU_RADIX): /* Only Radix */
+ prom_debug("MMU - radix only\n");
+ if (prom_radix_disable) {
+ /*
+ * If we __have__ to do radix, we're better off ignoring
+ * the command line rather than not booting.
+ */
+ prom_printf("WARNING: Ignoring cmdline option disable_radix\n");
+ }
+ support->radix_mmu = true;
+ break;
+ case OV5_FEAT(OV5_MMU_HASH):
+ prom_debug("MMU - hash only\n");
+ support->hash_mmu = true;
+ break;
+ default:
+ prom_debug("Unknown mmu support option: 0x%x\n", val);
+ break;
+ }
+}
+
+static void __init prom_parse_xive_model(u8 val,
+ struct platform_support *support)
+{
+ switch (val) {
+ case OV5_FEAT(OV5_XIVE_EITHER): /* Either Available */
+ prom_debug("XIVE - either mode supported\n");
+ support->xive = !prom_xive_disable;
+ break;
+ case OV5_FEAT(OV5_XIVE_EXPLOIT): /* Only Exploitation mode */
+ prom_debug("XIVE - exploitation mode supported\n");
+ if (prom_xive_disable) {
+ /*
+ * If we __have__ to do XIVE, we're better off ignoring
+ * the command line rather than not booting.
+ */
+ prom_printf("WARNING: Ignoring cmdline option xive=off\n");
+ }
+ support->xive = true;
+ break;
+ case OV5_FEAT(OV5_XIVE_LEGACY): /* Only Legacy mode */
+ prom_debug("XIVE - legacy mode supported\n");
+ break;
+ default:
+ prom_debug("Unknown xive support option: 0x%x\n", val);
+ break;
+ }
+}
+
+static void __init prom_parse_platform_support(u8 index, u8 val,
+ struct platform_support *support)
+{
+ switch (index) {
+ case OV5_INDX(OV5_MMU_SUPPORT): /* MMU Model */
+ prom_parse_mmu_model(val & OV5_FEAT(OV5_MMU_SUPPORT), support);
+ break;
+ case OV5_INDX(OV5_RADIX_GTSE): /* Radix Extensions */
+ if (val & OV5_FEAT(OV5_RADIX_GTSE))
+ support->radix_gtse = !prom_radix_gtse_disable;
+ break;
+ case OV5_INDX(OV5_XIVE_SUPPORT): /* Interrupt mode */
+ prom_parse_xive_model(val & OV5_FEAT(OV5_XIVE_SUPPORT),
+ support);
+ break;
+ }
+}
+
+static void __init prom_check_platform_support(void)
+{
+ struct platform_support supported = {
+ .hash_mmu = false,
+ .radix_mmu = false,
+ .radix_gtse = false,
+ .xive = false
+ };
+ int prop_len = prom_getproplen(prom.chosen,
+ "ibm,arch-vec-5-platform-support");
+
+ /*
+ * First copy the architecture vec template
+ *
+ * use memcpy() instead of *vec = *vec_template so that GCC replaces it
+ * by __memcpy() when KASAN is active
+ */
+ memcpy(&ibm_architecture_vec, &ibm_architecture_vec_template,
+ sizeof(ibm_architecture_vec));
+
+ prom_strscpy_pad(ibm_architecture_vec.vec7.os_id, linux_banner, 256);
+
+ if (prop_len > 1) {
+ int i;
+ u8 vec[8];
+ prom_debug("Found ibm,arch-vec-5-platform-support, len: %d\n",
+ prop_len);
+ if (prop_len > sizeof(vec))
+ prom_printf("WARNING: ibm,arch-vec-5-platform-support longer than expected (len: %d)\n",
+ prop_len);
+ prom_getprop(prom.chosen, "ibm,arch-vec-5-platform-support", &vec, sizeof(vec));
+ for (i = 0; i < prop_len; i += 2) {
+ prom_debug("%d: index = 0x%x val = 0x%x\n", i / 2, vec[i], vec[i + 1]);
+ prom_parse_platform_support(vec[i], vec[i + 1], &supported);
+ }
+ }
+
+ if (supported.radix_mmu && IS_ENABLED(CONFIG_PPC_RADIX_MMU)) {
+ /* Radix preferred - Check if GTSE is also supported */
+ prom_debug("Asking for radix\n");
+ ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_RADIX);
+ if (supported.radix_gtse)
+ ibm_architecture_vec.vec5.radix_ext =
+ OV5_FEAT(OV5_RADIX_GTSE);
+ else
+ prom_debug("Radix GTSE isn't supported\n");
+ } else if (supported.hash_mmu) {
+ /* Default to hash mmu (if we can) */
+ prom_debug("Asking for hash\n");
+ ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_HASH);
+ } else {
+ /* We're probably on a legacy hypervisor */
+ prom_debug("Assuming legacy hash support\n");
+ }
+
+ if (supported.xive) {
+ prom_debug("Asking for XIVE\n");
+ ibm_architecture_vec.vec5.intarch = OV5_FEAT(OV5_XIVE_EXPLOIT);
+ }
+}
static void __init prom_send_capabilities(void)
{
ihandle root;
prom_arg_t ret;
u32 cores;
- unsigned char *ptcores;
+
+ /* Check ibm,arch-vec-5-platform-support and fixup vec5 if required */
+ prom_check_platform_support();
root = call_prom("open", 1, 1, ADDR("/"));
if (root != 0) {
@@ -870,37 +1416,18 @@ static void __init prom_send_capabilities(void)
* divide NR_CPUS.
*/
- /* The core value may start at an odd address. If such a word
- * access is made at a cache line boundary, this leads to an
- * exception which may not be handled at this time.
- * Forcing a per byte access to avoid exception.
- */
- ptcores = &ibm_architecture_vec[IBM_ARCH_VEC_NRCORES_OFFSET];
- cores = 0;
- cores |= ptcores[0] << 24;
- cores |= ptcores[1] << 16;
- cores |= ptcores[2] << 8;
- cores |= ptcores[3];
- if (cores != NR_CPUS) {
- prom_printf("WARNING ! "
- "ibm_architecture_vec structure inconsistent: %lu!\n",
- cores);
- } else {
- cores = DIV_ROUND_UP(NR_CPUS, prom_count_smt_threads());
- prom_printf("Max number of cores passed to firmware: %lu (NR_CPUS = %lu)\n",
- cores, NR_CPUS);
- ptcores[0] = (cores >> 24) & 0xff;
- ptcores[1] = (cores >> 16) & 0xff;
- ptcores[2] = (cores >> 8) & 0xff;
- ptcores[3] = cores & 0xff;
- }
+ cores = DIV_ROUND_UP(NR_CPUS, prom_count_smt_threads());
+ prom_printf("Max number of cores passed to firmware: %u (NR_CPUS = %d)\n",
+ cores, NR_CPUS);
+
+ ibm_architecture_vec.vec5.max_cpus = cpu_to_be32(cores);
/* try calling the ibm,client-architecture-support method */
prom_printf("Calling ibm,client-architecture-support...");
if (call_prom_ret("call-method", 3, 2, &ret,
ADDR("ibm,client-architecture-support"),
root,
- ADDR(ibm_architecture_vec)) == 0) {
+ ADDR(&ibm_architecture_vec)) == 0) {
/* the call exists... */
if (ret)
prom_printf("\nWARNING: ibm,client-architecture"
@@ -930,7 +1457,7 @@ static void __init prom_send_capabilities(void)
}
#endif /* __BIG_ENDIAN__ */
}
-#endif /* #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */
+#endif /* CONFIG_PPC_PSERIES */
/*
* Memory allocation strategy... our layout is normally:
@@ -971,19 +1498,19 @@ static unsigned long __init alloc_up(unsigned long size, unsigned long align)
unsigned long addr = 0;
if (align)
- base = _ALIGN_UP(base, align);
- prom_debug("alloc_up(%x, %x)\n", size, align);
+ base = ALIGN(base, align);
+ prom_debug("%s(%lx, %lx)\n", __func__, size, align);
if (ram_top == 0)
prom_panic("alloc_up() called with mem not initialized\n");
if (align)
- base = _ALIGN_UP(alloc_bottom, align);
+ base = ALIGN(alloc_bottom, align);
else
base = alloc_bottom;
for(; (base + size) <= alloc_top;
- base = _ALIGN_UP(base + 0x100000, align)) {
- prom_debug(" trying: 0x%x\n\r", base);
+ base = ALIGN(base + 0x100000, align)) {
+ prom_debug(" trying: 0x%lx\n\r", base);
addr = (unsigned long)prom_claim(base, size, 0);
if (addr != PROM_ERROR && addr != 0)
break;
@@ -995,12 +1522,12 @@ static unsigned long __init alloc_up(unsigned long size, unsigned long align)
return 0;
alloc_bottom = addr + size;
- prom_debug(" -> %x\n", addr);
- prom_debug(" alloc_bottom : %x\n", alloc_bottom);
- prom_debug(" alloc_top : %x\n", alloc_top);
- prom_debug(" alloc_top_hi : %x\n", alloc_top_high);
- prom_debug(" rmo_top : %x\n", rmo_top);
- prom_debug(" ram_top : %x\n", ram_top);
+ prom_debug(" -> %lx\n", addr);
+ prom_debug(" alloc_bottom : %lx\n", alloc_bottom);
+ prom_debug(" alloc_top : %lx\n", alloc_top);
+ prom_debug(" alloc_top_hi : %lx\n", alloc_top_high);
+ prom_debug(" rmo_top : %lx\n", rmo_top);
+ prom_debug(" ram_top : %lx\n", ram_top);
return addr;
}
@@ -1015,14 +1542,14 @@ static unsigned long __init alloc_down(unsigned long size, unsigned long align,
{
unsigned long base, addr = 0;
- prom_debug("alloc_down(%x, %x, %s)\n", size, align,
+ prom_debug("%s(%lx, %lx, %s)\n", __func__, size, align,
highmem ? "(high)" : "(low)");
if (ram_top == 0)
prom_panic("alloc_down() called with mem not initialized\n");
if (highmem) {
/* Carve out storage for the TCE table. */
- addr = _ALIGN_DOWN(alloc_top_high - size, align);
+ addr = ALIGN_DOWN(alloc_top_high - size, align);
if (addr <= alloc_bottom)
return 0;
/* Will we bump into the RMO ? If yes, check out that we
@@ -1040,10 +1567,10 @@ static unsigned long __init alloc_down(unsigned long size, unsigned long align,
goto bail;
}
- base = _ALIGN_DOWN(alloc_top - size, align);
+ base = ALIGN_DOWN(alloc_top - size, align);
for (; base > alloc_bottom;
- base = _ALIGN_DOWN(base - 0x100000, align)) {
- prom_debug(" trying: 0x%x\n\r", base);
+ base = ALIGN_DOWN(base - 0x100000, align)) {
+ prom_debug(" trying: 0x%lx\n\r", base);
addr = (unsigned long)prom_claim(base, size, 0);
if (addr != PROM_ERROR && addr != 0)
break;
@@ -1054,12 +1581,12 @@ static unsigned long __init alloc_down(unsigned long size, unsigned long align,
alloc_top = addr;
bail:
- prom_debug(" -> %x\n", addr);
- prom_debug(" alloc_bottom : %x\n", alloc_bottom);
- prom_debug(" alloc_top : %x\n", alloc_top);
- prom_debug(" alloc_top_hi : %x\n", alloc_top_high);
- prom_debug(" rmo_top : %x\n", rmo_top);
- prom_debug(" ram_top : %x\n", ram_top);
+ prom_debug(" -> %lx\n", addr);
+ prom_debug(" alloc_bottom : %lx\n", alloc_bottom);
+ prom_debug(" alloc_top : %lx\n", alloc_top);
+ prom_debug(" alloc_top_hi : %lx\n", alloc_top_high);
+ prom_debug(" rmo_top : %lx\n", rmo_top);
+ prom_debug(" ram_top : %lx\n", ram_top);
return addr;
}
@@ -1108,8 +1635,8 @@ static void __init reserve_mem(u64 base, u64 size)
* have our terminator with "size" set to 0 since we are
* dumb and just copy this entire array to the boot params
*/
- base = _ALIGN_DOWN(base, PAGE_SIZE);
- top = _ALIGN_UP(top, PAGE_SIZE);
+ base = ALIGN_DOWN(base, PAGE_SIZE);
+ top = ALIGN(top, PAGE_SIZE);
size = top - base;
if (cnt >= (MEM_RESERVE_MAP_SIZE - 1))
@@ -1126,7 +1653,7 @@ static void __init reserve_mem(u64 base, u64 size)
static void __init prom_init_mem(void)
{
phandle node;
- char *path, type[64];
+ char type[64];
unsigned int plen;
cell_t *p, *endp;
__be32 val;
@@ -1147,7 +1674,6 @@ static void __init prom_init_mem(void)
prom_debug("root_size_cells: %x\n", rsc);
prom_debug("scanning memory:\n");
- path = prom_scratch;
for (node = 0; prom_next_node(&node); ) {
type[0] = 0;
@@ -1160,7 +1686,7 @@ static void __init prom_init_mem(void)
*/
prom_getprop(node, "name", type, sizeof(type));
}
- if (strcmp(type, "memory"))
+ if (prom_strcmp(type, "memory"))
continue;
plen = prom_getprop(node, "reg", regbuf, sizeof(regbuf));
@@ -1172,9 +1698,10 @@ static void __init prom_init_mem(void)
endp = p + (plen / sizeof(cell_t));
#ifdef DEBUG_PROM
- memset(path, 0, PROM_SCRATCH_SIZE);
- call_prom("package-to-path", 3, 1, node, path, PROM_SCRATCH_SIZE-1);
- prom_debug(" node %s :\n", path);
+ memset(prom_scratch, 0, sizeof(prom_scratch));
+ call_prom("package-to-path", 3, 1, node, prom_scratch,
+ sizeof(prom_scratch) - 1);
+ prom_debug(" node %s :\n", prom_scratch);
#endif /* DEBUG_PROM */
while ((endp - p) >= (rac + rsc)) {
@@ -1185,7 +1712,7 @@ static void __init prom_init_mem(void)
if (size == 0)
continue;
- prom_debug(" %x %x\n", base, size);
+ prom_debug(" %lx %lx\n", base, size);
if (base == 0 && (of_platform & PLATFORM_LPAR))
rmo_top = size;
if ((base + size) > ram_top)
@@ -1205,12 +1732,12 @@ static void __init prom_init_mem(void)
if (prom_memory_limit) {
if (prom_memory_limit <= alloc_bottom) {
- prom_printf("Ignoring mem=%x <= alloc_bottom.\n",
- prom_memory_limit);
+ prom_printf("Ignoring mem=%lx <= alloc_bottom.\n",
+ prom_memory_limit);
prom_memory_limit = 0;
} else if (prom_memory_limit >= ram_top) {
- prom_printf("Ignoring mem=%x >= ram_top.\n",
- prom_memory_limit);
+ prom_printf("Ignoring mem=%lx >= ram_top.\n",
+ prom_memory_limit);
prom_memory_limit = 0;
} else {
ram_top = prom_memory_limit;
@@ -1242,12 +1769,13 @@ static void __init prom_init_mem(void)
alloc_bottom = PAGE_ALIGN(prom_initrd_end);
prom_printf("memory layout at init:\n");
- prom_printf(" memory_limit : %x (16 MB aligned)\n", prom_memory_limit);
- prom_printf(" alloc_bottom : %x\n", alloc_bottom);
- prom_printf(" alloc_top : %x\n", alloc_top);
- prom_printf(" alloc_top_hi : %x\n", alloc_top_high);
- prom_printf(" rmo_top : %x\n", rmo_top);
- prom_printf(" ram_top : %x\n", ram_top);
+ prom_printf(" memory_limit : %lx (16 MB aligned)\n",
+ prom_memory_limit);
+ prom_printf(" alloc_bottom : %lx\n", alloc_bottom);
+ prom_printf(" alloc_top : %lx\n", alloc_top);
+ prom_printf(" alloc_top_hi : %lx\n", alloc_top_high);
+ prom_printf(" rmo_top : %lx\n", rmo_top);
+ prom_printf(" ram_top : %lx\n", ram_top);
}
static void __init prom_close_stdin(void)
@@ -1261,87 +1789,47 @@ static void __init prom_close_stdin(void)
}
}
-#ifdef CONFIG_PPC_POWERNV
-
-#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL
-static u64 __initdata prom_opal_base;
-static u64 __initdata prom_opal_entry;
-#endif
-
-/*
- * Allocate room for and instantiate OPAL
- */
-static void __init prom_instantiate_opal(void)
+#ifdef CONFIG_PPC_SVM
+static int __init prom_rtas_hcall(uint64_t args)
{
- phandle opal_node;
- ihandle opal_inst;
- u64 base, entry;
- u64 size = 0, align = 0x10000;
- __be64 val64;
- u32 rets[2];
-
- prom_debug("prom_instantiate_opal: start...\n");
+ register uint64_t arg1 asm("r3") = H_RTAS;
+ register uint64_t arg2 asm("r4") = args;
- opal_node = call_prom("finddevice", 1, 1, ADDR("/ibm,opal"));
- prom_debug("opal_node: %x\n", opal_node);
- if (!PHANDLE_VALID(opal_node))
- return;
-
- val64 = 0;
- prom_getprop(opal_node, "opal-runtime-size", &val64, sizeof(val64));
- size = be64_to_cpu(val64);
- if (size == 0)
- return;
- val64 = 0;
- prom_getprop(opal_node, "opal-runtime-alignment", &val64,sizeof(val64));
- align = be64_to_cpu(val64);
+ asm volatile("sc 1\n" : "=r" (arg1) :
+ "r" (arg1),
+ "r" (arg2) :);
+ srr_regs_clobbered();
- base = alloc_down(size, align, 0);
- if (base == 0) {
- prom_printf("OPAL allocation failed !\n");
- return;
- }
+ return arg1;
+}
- opal_inst = call_prom("open", 1, 1, ADDR("/ibm,opal"));
- if (!IHANDLE_VALID(opal_inst)) {
- prom_printf("opening opal package failed (%x)\n", opal_inst);
- return;
- }
+static struct rtas_args __prombss os_term_args;
- prom_printf("instantiating opal at 0x%x...", base);
+static void __init prom_rtas_os_term(char *str)
+{
+ phandle rtas_node;
+ __be32 val;
+ u32 token;
- if (call_prom_ret("call-method", 4, 3, rets,
- ADDR("load-opal-runtime"),
- opal_inst,
- base >> 32, base & 0xffffffff) != 0
- || (rets[0] == 0 && rets[1] == 0)) {
- prom_printf(" failed\n");
+ prom_debug("%s: start...\n", __func__);
+ rtas_node = call_prom("finddevice", 1, 1, ADDR("/rtas"));
+ prom_debug("rtas_node: %x\n", rtas_node);
+ if (!PHANDLE_VALID(rtas_node))
return;
- }
- entry = (((u64)rets[0]) << 32) | rets[1];
-
- prom_printf(" done\n");
-
- reserve_mem(base, size);
- prom_debug("opal base = 0x%x\n", base);
- prom_debug("opal align = 0x%x\n", align);
- prom_debug("opal entry = 0x%x\n", entry);
- prom_debug("opal size = 0x%x\n", (long)size);
-
- prom_setprop(opal_node, "/ibm,opal", "opal-base-address",
- &base, sizeof(base));
- prom_setprop(opal_node, "/ibm,opal", "opal-entry-address",
- &entry, sizeof(entry));
-
-#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL
- prom_opal_base = base;
- prom_opal_entry = entry;
-#endif
- prom_debug("prom_instantiate_opal: end...\n");
+ val = 0;
+ prom_getprop(rtas_node, "ibm,os-term", &val, sizeof(val));
+ token = be32_to_cpu(val);
+ prom_debug("ibm,os-term: %x\n", token);
+ if (token == 0)
+ prom_panic("Could not get token for ibm,os-term\n");
+ os_term_args.token = cpu_to_be32(token);
+ os_term_args.nargs = cpu_to_be32(1);
+ os_term_args.nret = cpu_to_be32(1);
+ os_term_args.args[0] = cpu_to_be32(__pa(str));
+ prom_rtas_hcall((uint64_t)&os_term_args);
}
-
-#endif /* CONFIG_PPC_POWERNV */
+#endif /* CONFIG_PPC_SVM */
/*
* Allocate room for and instantiate RTAS
@@ -1404,7 +1892,7 @@ static void __init prom_instantiate_rtas(void)
prom_debug("rtas base = 0x%x\n", base);
prom_debug("rtas entry = 0x%x\n", entry);
- prom_debug("rtas size = 0x%x\n", (long)size);
+ prom_debug("rtas size = 0x%x\n", size);
prom_debug("prom_instantiate_rtas: end...\n");
}
@@ -1417,34 +1905,54 @@ static void __init prom_instantiate_sml(void)
{
phandle ibmvtpm_node;
ihandle ibmvtpm_inst;
- u32 entry = 0, size = 0;
+ u32 entry = 0, size = 0, succ = 0;
u64 base;
+ __be32 val;
prom_debug("prom_instantiate_sml: start...\n");
- ibmvtpm_node = call_prom("finddevice", 1, 1, ADDR("/ibm,vtpm"));
+ ibmvtpm_node = call_prom("finddevice", 1, 1, ADDR("/vdevice/vtpm"));
prom_debug("ibmvtpm_node: %x\n", ibmvtpm_node);
if (!PHANDLE_VALID(ibmvtpm_node))
return;
- ibmvtpm_inst = call_prom("open", 1, 1, ADDR("/ibm,vtpm"));
+ ibmvtpm_inst = call_prom("open", 1, 1, ADDR("/vdevice/vtpm"));
if (!IHANDLE_VALID(ibmvtpm_inst)) {
prom_printf("opening vtpm package failed (%x)\n", ibmvtpm_inst);
return;
}
- if (call_prom_ret("call-method", 2, 2, &size,
- ADDR("sml-get-handover-size"),
- ibmvtpm_inst) != 0 || size == 0) {
- prom_printf("SML get handover size failed\n");
- return;
+ if (prom_getprop(ibmvtpm_node, "ibm,sml-efi-reformat-supported",
+ &val, sizeof(val)) != PROM_ERROR) {
+ if (call_prom_ret("call-method", 2, 2, &succ,
+ ADDR("reformat-sml-to-efi-alignment"),
+ ibmvtpm_inst) != 0 || succ == 0) {
+ prom_printf("Reformat SML to EFI alignment failed\n");
+ return;
+ }
+
+ if (call_prom_ret("call-method", 2, 2, &size,
+ ADDR("sml-get-allocated-size"),
+ ibmvtpm_inst) != 0 || size == 0) {
+ prom_printf("SML get allocated size failed\n");
+ return;
+ }
+ } else {
+ if (call_prom_ret("call-method", 2, 2, &size,
+ ADDR("sml-get-handover-size"),
+ ibmvtpm_inst) != 0 || size == 0) {
+ prom_printf("SML get handover size failed\n");
+ return;
+ }
}
base = alloc_down(size, PAGE_SIZE, 0);
if (base == 0)
prom_panic("Could not allocate memory for sml\n");
- prom_printf("instantiating sml at 0x%x...", base);
+ prom_printf("instantiating sml at 0x%llx...", base);
+
+ memset((void *)base, 0, size);
if (call_prom_ret("call-method", 4, 2, &entry,
ADDR("sml-handover"),
@@ -1456,13 +1964,13 @@ static void __init prom_instantiate_sml(void)
reserve_mem(base, size);
- prom_setprop(ibmvtpm_node, "/ibm,vtpm", "linux,sml-base",
+ prom_setprop(ibmvtpm_node, "/vdevice/vtpm", "linux,sml-base",
&base, sizeof(base));
- prom_setprop(ibmvtpm_node, "/ibm,vtpm", "linux,sml-size",
+ prom_setprop(ibmvtpm_node, "/vdevice/vtpm", "linux,sml-size",
&size, sizeof(size));
- prom_debug("sml base = 0x%x\n", base);
- prom_debug("sml size = 0x%x\n", (long)size);
+ prom_debug("sml base = 0x%llx\n", base);
+ prom_debug("sml size = 0x%x\n", size);
prom_debug("prom_instantiate_sml: end...\n");
}
@@ -1502,19 +2010,19 @@ static void __init prom_initialize_tce_table(void)
prom_getprop(node, "device_type", type, sizeof(type));
prom_getprop(node, "model", model, sizeof(model));
- if ((type[0] == 0) || (strstr(type, "pci") == NULL))
+ if ((type[0] == 0) || (prom_strstr(type, "pci") == NULL))
continue;
/* Keep the old logic intact to avoid regression. */
if (compatible[0] != 0) {
- if ((strstr(compatible, "python") == NULL) &&
- (strstr(compatible, "Speedwagon") == NULL) &&
- (strstr(compatible, "Winnipeg") == NULL))
+ if ((prom_strstr(compatible, "python") == NULL) &&
+ (prom_strstr(compatible, "Speedwagon") == NULL) &&
+ (prom_strstr(compatible, "Winnipeg") == NULL))
continue;
} else if (model[0] != 0) {
- if ((strstr(model, "ython") == NULL) &&
- (strstr(model, "peedwagon") == NULL) &&
- (strstr(model, "innipeg") == NULL))
+ if ((prom_strstr(model, "ython") == NULL) &&
+ (prom_strstr(model, "peedwagon") == NULL) &&
+ (prom_strstr(model, "innipeg") == NULL))
continue;
}
@@ -1530,16 +2038,8 @@ static void __init prom_initialize_tce_table(void)
* size to 4 MB. This is enough to map 2GB of PCI DMA space.
* By doing this, we avoid the pitfalls of trying to DMA to
* MMIO space and the DMA alias hole.
- *
- * On POWER4, firmware sets the TCE region by assuming
- * each TCE table is 8MB. Using this memory for anything
- * else will impact performance, so we always allocate 8MB.
- * Anton
*/
- if (pvr_version_is(PVR_POWER4) || pvr_version_is(PVR_POWER4p))
- minsize = 8UL << 20;
- else
- minsize = 4UL << 20;
+ minsize = 4UL << 20;
/* Align to the greater of the align or size */
align = max(minalign, minsize);
@@ -1550,10 +2050,10 @@ static void __init prom_initialize_tce_table(void)
local_alloc_bottom = base;
/* It seems OF doesn't null-terminate the path :-( */
- memset(path, 0, PROM_SCRATCH_SIZE);
+ memset(path, 0, sizeof(prom_scratch));
/* Call OF to setup the TCE hardware */
if (call_prom("package-to-path", 3, 1, node,
- path, PROM_SCRATCH_SIZE-1) == PROM_ERROR) {
+ path, sizeof(prom_scratch) - 1) == PROM_ERROR) {
prom_printf("package-to-path failed\n");
}
@@ -1563,7 +2063,7 @@ static void __init prom_initialize_tce_table(void)
prom_debug("TCE table: %s\n", path);
prom_debug("\tnode = 0x%x\n", node);
- prom_debug("\tbase = 0x%x\n", base);
+ prom_debug("\tbase = 0x%llx\n", base);
prom_debug("\tsize = 0x%x\n", minsize);
/* Initialize the table to have a one-to-one mapping
@@ -1650,12 +2150,12 @@ static void __init prom_hold_cpus(void)
}
prom_debug("prom_hold_cpus: start...\n");
- prom_debug(" 1) spinloop = 0x%x\n", (unsigned long)spinloop);
- prom_debug(" 1) *spinloop = 0x%x\n", *spinloop);
- prom_debug(" 1) acknowledge = 0x%x\n",
+ prom_debug(" 1) spinloop = 0x%lx\n", (unsigned long)spinloop);
+ prom_debug(" 1) *spinloop = 0x%lx\n", *spinloop);
+ prom_debug(" 1) acknowledge = 0x%lx\n",
(unsigned long)acknowledge);
- prom_debug(" 1) *acknowledge = 0x%x\n", *acknowledge);
- prom_debug(" 1) secondary_hold = 0x%x\n", secondary_hold);
+ prom_debug(" 1) *acknowledge = 0x%lx\n", *acknowledge);
+ prom_debug(" 1) secondary_hold = 0x%lx\n", secondary_hold);
/* Set the common spinloop variable, so all of the secondary cpus
* will block when they are awakened from their OF spinloop.
@@ -1671,19 +2171,19 @@ static void __init prom_hold_cpus(void)
type[0] = 0;
prom_getprop(node, "device_type", type, sizeof(type));
- if (strcmp(type, "cpu") != 0)
+ if (prom_strcmp(type, "cpu") != 0)
continue;
/* Skip non-configured cpus. */
if (prom_getprop(node, "status", type, sizeof(type)) > 0)
- if (strcmp(type, "okay") != 0)
+ if (prom_strcmp(type, "okay") != 0)
continue;
reg = cpu_to_be32(-1); /* make sparse happy */
prom_getprop(node, "reg", &reg, sizeof(reg));
cpu_no = be32_to_cpu(reg);
- prom_debug("cpu hw idx = %lu\n", cpu_no);
+ prom_debug("cpu hw idx = %u\n", cpu_no);
/* Init the acknowledge var which will be reset by
* the secondary cpu when it awakens from its OF
@@ -1693,7 +2193,7 @@ static void __init prom_hold_cpus(void)
if (cpu_no != prom.cpu) {
/* Primary Thread of non-boot cpu or any thread */
- prom_printf("starting cpu hw idx %lu... ", cpu_no);
+ prom_printf("starting cpu hw idx %u... ", cpu_no);
call_prom("start-cpu", 3, 0, node,
secondary_hold, cpu_no);
@@ -1704,11 +2204,11 @@ static void __init prom_hold_cpus(void)
if (*acknowledge == cpu_no)
prom_printf("done\n");
else
- prom_printf("failed: %x\n", *acknowledge);
+ prom_printf("failed: %lx\n", *acknowledge);
}
#ifdef CONFIG_SMP
else
- prom_printf("boot cpu hw idx %lu\n", cpu_no);
+ prom_printf("boot cpu hw idx %u\n", cpu_no);
#endif /* CONFIG_SMP */
}
@@ -1752,9 +2252,9 @@ static void __init prom_find_mmu(void)
return;
version[sizeof(version) - 1] = 0;
/* XXX might need to add other versions here */
- if (strcmp(version, "Open Firmware, 1.0.5") == 0)
+ if (prom_strcmp(version, "Open Firmware, 1.0.5") == 0)
of_workarounds = OF_WA_CLAIM;
- else if (strncmp(version, "FirmWorks,3.", 12) == 0) {
+ else if (prom_strncmp(version, "FirmWorks,3.", 12) == 0) {
of_workarounds = OF_WA_CLAIM | OF_WA_LONGTRAIL;
call_prom("interpret", 1, 1, "dev /memory 0 to allow-reclaim");
} else
@@ -1787,26 +2287,24 @@ static void __init prom_init_stdout(void)
call_prom("instance-to-path", 3, 1, prom.stdout, path, 255);
prom_printf("OF stdout device is: %s\n", of_stdout_device);
prom_setprop(prom.chosen, "/chosen", "linux,stdout-path",
- path, strlen(path) + 1);
+ path, prom_strlen(path) + 1);
/* instance-to-package fails on PA-Semi */
stdout_node = call_prom("instance-to-package", 1, 1, prom.stdout);
if (stdout_node != PROM_ERROR) {
val = cpu_to_be32(stdout_node);
- prom_setprop(prom.chosen, "/chosen", "linux,stdout-package",
- &val, sizeof(val));
/* If it's a display, note it */
memset(type, 0, sizeof(type));
prom_getprop(stdout_node, "device_type", type, sizeof(type));
- if (strcmp(type, "display") == 0)
+ if (prom_strcmp(type, "display") == 0)
prom_setprop(stdout_node, path, "linux,boot-display", NULL, 0);
}
}
static int __init prom_find_machine_type(void)
{
- char compat[256];
+ static char compat[256] __prombss;
int len, i = 0;
#ifdef CONFIG_PPC64
phandle rtas;
@@ -1820,29 +2318,25 @@ static int __init prom_find_machine_type(void)
compat[len] = 0;
while (i < len) {
char *p = &compat[i];
- int sl = strlen(p);
+ int sl = prom_strlen(p);
if (sl == 0)
break;
- if (strstr(p, "Power Macintosh") ||
- strstr(p, "MacRISC"))
+ if (prom_strstr(p, "Power Macintosh") ||
+ prom_strstr(p, "MacRISC"))
return PLATFORM_POWERMAC;
#ifdef CONFIG_PPC64
/* We must make sure we don't detect the IBM Cell
* blades as pSeries due to some firmware issues,
* so we do it here.
*/
- if (strstr(p, "IBM,CBEA") ||
- strstr(p, "IBM,CPBW-1.0"))
+ if (prom_strstr(p, "IBM,CBEA") ||
+ prom_strstr(p, "IBM,CPBW-1.0"))
return PLATFORM_GENERIC;
#endif /* CONFIG_PPC64 */
i += sl + 1;
}
}
#ifdef CONFIG_PPC64
- /* Try to detect OPAL */
- if (PHANDLE_VALID(call_prom("finddevice", 1, 1, ADDR("/ibm,opal"))))
- return PLATFORM_OPAL;
-
/* Try to figure out if it's an IBM pSeries or any other
* PAPR compliant platform. We assume it is if :
* - /device_type is "chrp" (please, do NOT use that for future
@@ -1853,7 +2347,7 @@ static int __init prom_find_machine_type(void)
compat, sizeof(compat)-1);
if (len <= 0)
return PLATFORM_GENERIC;
- if (strcmp(compat, "chrp"))
+ if (prom_strcmp(compat, "chrp"))
return PLATFORM_GENERIC;
/* Default to pSeries. We need to know if we are running LPAR */
@@ -1891,7 +2385,7 @@ static void __init prom_check_displays(void)
ihandle ih;
int i;
- static unsigned char default_colors[] = {
+ static const unsigned char default_colors[] __initconst = {
0x00, 0x00, 0x00,
0x00, 0x00, 0xaa,
0x00, 0xaa, 0x00,
@@ -1915,19 +2409,19 @@ static void __init prom_check_displays(void)
for (node = 0; prom_next_node(&node); ) {
memset(type, 0, sizeof(type));
prom_getprop(node, "device_type", type, sizeof(type));
- if (strcmp(type, "display") != 0)
+ if (prom_strcmp(type, "display") != 0)
continue;
/* It seems OF doesn't null-terminate the path :-( */
path = prom_scratch;
- memset(path, 0, PROM_SCRATCH_SIZE);
+ memset(path, 0, sizeof(prom_scratch));
/*
* leave some room at the end of the path for appending extra
* arguments
*/
if (call_prom("package-to-path", 3, 1, node, path,
- PROM_SCRATCH_SIZE-10) == PROM_ERROR)
+ sizeof(prom_scratch) - 10) == PROM_ERROR)
continue;
prom_printf("found display : %s, opening... ", path);
@@ -1963,13 +2457,23 @@ static void __init prom_check_displays(void)
u32 width, height, pitch, addr;
prom_printf("Setting btext !\n");
- prom_getprop(node, "width", &width, 4);
- prom_getprop(node, "height", &height, 4);
- prom_getprop(node, "linebytes", &pitch, 4);
- prom_getprop(node, "address", &addr, 4);
+
+ if (prom_getprop(node, "width", &width, 4) == PROM_ERROR)
+ return;
+
+ if (prom_getprop(node, "height", &height, 4) == PROM_ERROR)
+ return;
+
+ if (prom_getprop(node, "linebytes", &pitch, 4) == PROM_ERROR)
+ return;
+
+ if (prom_getprop(node, "address", &addr, 4) == PROM_ERROR)
+ return;
+
prom_printf("W=%d H=%d LB=%d addr=0x%x\n",
width, height, pitch, addr);
btext_setup_display(width, height, 8, pitch, addr);
+ btext_prepare_BAT();
}
#endif /* CONFIG_PPC_EARLY_DEBUG_BOOTX */
}
@@ -1982,11 +2486,11 @@ static void __init *make_room(unsigned long *mem_start, unsigned long *mem_end,
{
void *ret;
- *mem_start = _ALIGN(*mem_start, align);
+ *mem_start = ALIGN(*mem_start, align);
while ((*mem_start + needed) > *mem_end) {
unsigned long room, chunk;
- prom_debug("Chunk exhausted, claiming more at %x...\n",
+ prom_debug("Chunk exhausted, claiming more at %lx...\n",
alloc_bottom);
room = alloc_top - alloc_bottom;
if (room > DEVTREE_CHUNK_SIZE)
@@ -2019,9 +2523,9 @@ static unsigned long __init dt_find_string(char *str)
s = os = (char *)dt_string_start;
s += 4;
while (s < (char *)dt_string_end) {
- if (strcmp(s, str) == 0)
+ if (prom_strcmp(s, str) == 0)
return s - os;
- s += strlen(s) + 1;
+ s += prom_strlen(s) + 1;
}
return 0;
}
@@ -2054,7 +2558,7 @@ static void __init scan_dt_build_strings(phandle node,
}
/* skip "name" */
- if (strcmp(namep, "name") == 0) {
+ if (prom_strcmp(namep, "name") == 0) {
*mem_start = (unsigned long)namep;
prev_name = "name";
continue;
@@ -2066,7 +2570,7 @@ static void __init scan_dt_build_strings(phandle node,
namep = sstart + soff;
} else {
/* Trim off some if we can */
- *mem_start = (unsigned long)namep + strlen(namep) + 1;
+ *mem_start = (unsigned long)namep + prom_strlen(namep) + 1;
dt_string_end = *mem_start;
}
prev_name = namep;
@@ -2087,7 +2591,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start,
char *namep, *prev_name, *sstart, *p, *ep, *lp, *path;
unsigned long soff;
unsigned char *valp;
- static char pname[MAX_PROPERTY_NAME];
+ static char pname[MAX_PROPERTY_NAME] __prombss;
int l, room, has_phandle = 0;
dt_push_token(OF_DT_BEGIN_NODE, mem_start, mem_end);
@@ -2118,13 +2622,13 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start,
*lp++ = *p;
}
*lp = 0;
- *mem_start = _ALIGN((unsigned long)lp + 1, 4);
+ *mem_start = ALIGN((unsigned long)lp + 1, 4);
}
/* get it again for debugging */
path = prom_scratch;
- memset(path, 0, PROM_SCRATCH_SIZE);
- call_prom("package-to-path", 3, 1, node, path, PROM_SCRATCH_SIZE-1);
+ memset(path, 0, sizeof(prom_scratch));
+ call_prom("package-to-path", 3, 1, node, path, sizeof(prom_scratch) - 1);
/* get and store all properties */
prev_name = "";
@@ -2135,7 +2639,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start,
break;
/* skip "name" */
- if (strcmp(pname, "name") == 0) {
+ if (prom_strcmp(pname, "name") == 0) {
prev_name = "name";
continue;
}
@@ -2164,20 +2668,17 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start,
/* push property content */
valp = make_room(mem_start, mem_end, l, 4);
call_prom("getprop", 4, 1, node, pname, valp, l);
- *mem_start = _ALIGN(*mem_start, 4);
+ *mem_start = ALIGN(*mem_start, 4);
- if (!strcmp(pname, "phandle"))
+ if (!prom_strcmp(pname, "phandle"))
has_phandle = 1;
}
- /* Add a "linux,phandle" property if no "phandle" property already
- * existed (can happen with OPAL)
- */
+ /* Add a "phandle" property if none already exist */
if (!has_phandle) {
- soff = dt_find_string("linux,phandle");
+ soff = dt_find_string("phandle");
if (soff == 0)
- prom_printf("WARNING: Can't find string index for"
- " <linux-phandle> node %s\n", path);
+ prom_printf("WARNING: Can't find string index for <phandle> node %s\n", path);
else {
dt_push_token(OF_DT_PROP, mem_start, mem_end);
dt_push_token(4, mem_start, mem_end);
@@ -2212,7 +2713,7 @@ static void __init flatten_device_tree(void)
room = alloc_top - alloc_bottom - 0x4000;
if (room > DEVTREE_CHUNK_SIZE)
room = DEVTREE_CHUNK_SIZE;
- prom_debug("starting device tree allocs at %x\n", alloc_bottom);
+ prom_debug("starting device tree allocs at %lx\n", alloc_bottom);
/* Now try to claim that */
mem_start = (unsigned long)alloc_up(room, PAGE_SIZE);
@@ -2226,7 +2727,7 @@ static void __init flatten_device_tree(void)
prom_panic ("couldn't get device tree root\n");
/* Build header and make room for mem rsv map */
- mem_start = _ALIGN(mem_start, 4);
+ mem_start = ALIGN(mem_start, 4);
hdr = make_room(&mem_start, &mem_end,
sizeof(struct boot_param_header), 4);
dt_header_start = (unsigned long)hdr;
@@ -2237,10 +2738,10 @@ static void __init flatten_device_tree(void)
dt_string_start = mem_start;
mem_start += 4; /* hole */
- /* Add "linux,phandle" in there, we'll need it */
+ /* Add "phandle" in there, we'll need it */
namep = make_room(&mem_start, &mem_end, 16, 1);
- strcpy(namep, "linux,phandle");
- mem_start = (unsigned long)namep + strlen(namep) + 1;
+ prom_strscpy_pad(namep, "phandle", sizeof("phandle"));
+ mem_start = (unsigned long)namep + prom_strlen(namep) + 1;
/* Build string array */
prom_printf("Building dt strings...\n");
@@ -2275,7 +2776,7 @@ static void __init flatten_device_tree(void)
int i;
prom_printf("reserved memory map:\n");
for (i = 0; i < mem_reserve_cnt; i++)
- prom_printf(" %x - %x\n",
+ prom_printf(" %llx - %llx\n",
be64_to_cpu(mem_reserve_map[i].base),
be64_to_cpu(mem_reserve_map[i].size));
}
@@ -2285,97 +2786,12 @@ static void __init flatten_device_tree(void)
*/
mem_reserve_cnt = MEM_RESERVE_MAP_SIZE;
- prom_printf("Device tree strings 0x%x -> 0x%x\n",
+ prom_printf("Device tree strings 0x%lx -> 0x%lx\n",
dt_string_start, dt_string_end);
- prom_printf("Device tree struct 0x%x -> 0x%x\n",
+ prom_printf("Device tree struct 0x%lx -> 0x%lx\n",
dt_struct_start, dt_struct_end);
}
-#ifdef CONFIG_PPC_MAPLE
-/* PIBS Version 1.05.0000 04/26/2005 has an incorrect /ht/isa/ranges property.
- * The values are bad, and it doesn't even have the right number of cells. */
-static void __init fixup_device_tree_maple(void)
-{
- phandle isa;
- u32 rloc = 0x01002000; /* IO space; PCI device = 4 */
- u32 isa_ranges[6];
- char *name;
-
- name = "/ht@0/isa@4";
- isa = call_prom("finddevice", 1, 1, ADDR(name));
- if (!PHANDLE_VALID(isa)) {
- name = "/ht@0/isa@6";
- isa = call_prom("finddevice", 1, 1, ADDR(name));
- rloc = 0x01003000; /* IO space; PCI device = 6 */
- }
- if (!PHANDLE_VALID(isa))
- return;
-
- if (prom_getproplen(isa, "ranges") != 12)
- return;
- if (prom_getprop(isa, "ranges", isa_ranges, sizeof(isa_ranges))
- == PROM_ERROR)
- return;
-
- if (isa_ranges[0] != 0x1 ||
- isa_ranges[1] != 0xf4000000 ||
- isa_ranges[2] != 0x00010000)
- return;
-
- prom_printf("Fixing up bogus ISA range on Maple/Apache...\n");
-
- isa_ranges[0] = 0x1;
- isa_ranges[1] = 0x0;
- isa_ranges[2] = rloc;
- isa_ranges[3] = 0x0;
- isa_ranges[4] = 0x0;
- isa_ranges[5] = 0x00010000;
- prom_setprop(isa, name, "ranges",
- isa_ranges, sizeof(isa_ranges));
-}
-
-#define CPC925_MC_START 0xf8000000
-#define CPC925_MC_LENGTH 0x1000000
-/* The values for memory-controller don't have right number of cells */
-static void __init fixup_device_tree_maple_memory_controller(void)
-{
- phandle mc;
- u32 mc_reg[4];
- char *name = "/hostbridge@f8000000";
- u32 ac, sc;
-
- mc = call_prom("finddevice", 1, 1, ADDR(name));
- if (!PHANDLE_VALID(mc))
- return;
-
- if (prom_getproplen(mc, "reg") != 8)
- return;
-
- prom_getprop(prom.root, "#address-cells", &ac, sizeof(ac));
- prom_getprop(prom.root, "#size-cells", &sc, sizeof(sc));
- if ((ac != 2) || (sc != 2))
- return;
-
- if (prom_getprop(mc, "reg", mc_reg, sizeof(mc_reg)) == PROM_ERROR)
- return;
-
- if (mc_reg[0] != CPC925_MC_START || mc_reg[1] != CPC925_MC_LENGTH)
- return;
-
- prom_printf("Fixing up bogus hostbridge on Maple...\n");
-
- mc_reg[0] = 0x0;
- mc_reg[1] = CPC925_MC_START;
- mc_reg[2] = 0x0;
- mc_reg[3] = CPC925_MC_LENGTH;
- prom_setprop(mc, name, "reg", mc_reg, sizeof(mc_reg));
-}
-#else
-#define fixup_device_tree_maple()
-#define fixup_device_tree_maple_memory_controller()
-#endif
-
-#ifdef CONFIG_PPC_CHRP
/*
* Pegasos and BriQ lacks the "ranges" property in the isa node
* Pegasos needs decimal IRQ 14/15, not hexadecimal
@@ -2426,12 +2842,8 @@ static void __init fixup_device_tree_chrp(void)
}
}
}
-#else
-#define fixup_device_tree_chrp()
-#endif
-#if defined(CONFIG_PPC64) && defined(CONFIG_PPC_PMAC)
-static void __init fixup_device_tree_pmac(void)
+static void __init fixup_device_tree_pmac64(void)
{
phandle u3, i2c, mpic;
u32 u3_rev;
@@ -2470,11 +2882,27 @@ static void __init fixup_device_tree_pmac(void)
prom_setprop(i2c, "/u3@0,f8000000/i2c@f8001000", "interrupt-parent",
&parent, sizeof(parent));
}
-#else
-#define fixup_device_tree_pmac()
-#endif
-#ifdef CONFIG_PPC_EFIKA
+static void __init fixup_device_tree_pmac(void)
+{
+ __be32 val = 1;
+ char type[8];
+ phandle node;
+
+ // Some pmacs are missing #size-cells on escc or i2s nodes
+ for (node = 0; prom_next_node(&node); ) {
+ type[0] = '\0';
+ prom_getprop(node, "device_type", type, sizeof(type));
+ if (prom_strcmp(type, "escc") && prom_strcmp(type, "i2s"))
+ continue;
+
+ if (prom_getproplen(node, "#size-cells") != PROM_ERROR)
+ continue;
+
+ prom_setprop(node, NULL, "#size-cells", &val, sizeof(val));
+ }
+}
+
/*
* The MPC5200 FEC driver requires an phy-handle property to tell it how
* to talk to the phy. If the phy-handle property is missing, then this
@@ -2494,7 +2922,7 @@ static void __init fixup_device_tree_efika_add_phy(void)
/* Check if the phy-handle property exists - bail if it does */
rv = prom_getprop(node, "phy-handle", prop, sizeof(prop));
- if (!rv)
+ if (rv <= 0)
return;
/*
@@ -2520,7 +2948,7 @@ static void __init fixup_device_tree_efika_add_phy(void)
" 0x3 encode-int encode+"
" s\" interrupts\" property"
" finish-device");
- };
+ }
/* Check for a PHY device node - if missing then create one and
* give it's phandle to the ethernet node */
@@ -2562,7 +2990,7 @@ static void __init fixup_device_tree_efika(void)
rv = prom_getprop(node, "model", prop, sizeof(prop));
if (rv == PROM_ERROR)
return;
- if (strcmp(prop, "EFIKA5K2"))
+ if (prom_strcmp(prop, "EFIKA5K2"))
return;
prom_printf("Applying EFIKA device tree fixups\n");
@@ -2570,13 +2998,13 @@ static void __init fixup_device_tree_efika(void)
/* Claiming to be 'chrp' is death */
node = call_prom("finddevice", 1, 1, ADDR("/"));
rv = prom_getprop(node, "device_type", prop, sizeof(prop));
- if (rv != PROM_ERROR && (strcmp(prop, "chrp") == 0))
+ if (rv != PROM_ERROR && (prom_strcmp(prop, "chrp") == 0))
prom_setprop(node, "/", "device_type", "efika", sizeof("efika"));
/* CODEGEN,description is exposed in /proc/cpuinfo so
fix that too */
rv = prom_getprop(node, "CODEGEN,description", prop, sizeof(prop));
- if (rv != PROM_ERROR && (strstr(prop, "CHRP")))
+ if (rv != PROM_ERROR && (prom_strstr(prop, "CHRP")))
prom_setprop(node, "/", "CODEGEN,description",
"Efika 5200B PowerPC System",
sizeof("Efika 5200B PowerPC System"));
@@ -2606,17 +3034,99 @@ static void __init fixup_device_tree_efika(void)
/* Make sure ethernet phy-handle property exists */
fixup_device_tree_efika_add_phy();
}
-#else
-#define fixup_device_tree_efika()
-#endif
+
+/*
+ * CFE supplied on Nemo is broken in several ways, biggest
+ * problem is that it reassigns ISA interrupts to unused mpic ints.
+ * Add an interrupt-controller property for the io-bridge to use
+ * and correct the ints so we can attach them to an irq_domain
+ */
+static void __init fixup_device_tree_pasemi(void)
+{
+ u32 interrupts[2], parent, rval, val = 0;
+ char *name, *pci_name;
+ phandle iob, node;
+
+ /* Find the root pci node */
+ name = "/pxp@0,e0000000";
+ iob = call_prom("finddevice", 1, 1, ADDR(name));
+ if (!PHANDLE_VALID(iob))
+ return;
+
+ /* check if interrupt-controller node set yet */
+ if (prom_getproplen(iob, "interrupt-controller") !=PROM_ERROR)
+ return;
+
+ prom_printf("adding interrupt-controller property for SB600...\n");
+
+ prom_setprop(iob, name, "interrupt-controller", &val, 0);
+
+ pci_name = "/pxp@0,e0000000/pci@11";
+ node = call_prom("finddevice", 1, 1, ADDR(pci_name));
+ parent = ADDR(iob);
+
+ for( ; prom_next_node(&node); ) {
+ /* scan each node for one with an interrupt */
+ if (!PHANDLE_VALID(node))
+ continue;
+
+ rval = prom_getproplen(node, "interrupts");
+ if (rval == 0 || rval == PROM_ERROR)
+ continue;
+
+ prom_getprop(node, "interrupts", &interrupts, sizeof(interrupts));
+ if ((interrupts[0] < 212) || (interrupts[0] > 222))
+ continue;
+
+ /* found a node, update both interrupts and interrupt-parent */
+ if ((interrupts[0] >= 212) && (interrupts[0] <= 215))
+ interrupts[0] -= 203;
+ if ((interrupts[0] >= 216) && (interrupts[0] <= 220))
+ interrupts[0] -= 213;
+ if (interrupts[0] == 221)
+ interrupts[0] = 14;
+ if (interrupts[0] == 222)
+ interrupts[0] = 8;
+
+ prom_setprop(node, pci_name, "interrupts", interrupts,
+ sizeof(interrupts));
+ prom_setprop(node, pci_name, "interrupt-parent", &parent,
+ sizeof(parent));
+ }
+
+ /*
+ * The io-bridge has device_type set to 'io-bridge' change it to 'isa'
+ * so that generic isa-bridge code can add the SB600 and its on-board
+ * peripherals.
+ */
+ name = "/pxp@0,e0000000/io-bridge@0";
+ iob = call_prom("finddevice", 1, 1, ADDR(name));
+ if (!PHANDLE_VALID(iob))
+ return;
+
+ /* device_type is already set, just change it. */
+
+ prom_printf("Changing device_type of SB600 node...\n");
+
+ prom_setprop(iob, name, "device_type", "isa", sizeof("isa"));
+}
static void __init fixup_device_tree(void)
{
- fixup_device_tree_maple();
- fixup_device_tree_maple_memory_controller();
- fixup_device_tree_chrp();
- fixup_device_tree_pmac();
- fixup_device_tree_efika();
+ if (IS_ENABLED(CONFIG_PPC_CHRP))
+ fixup_device_tree_chrp();
+
+ if (IS_ENABLED(CONFIG_PPC_PMAC))
+ fixup_device_tree_pmac();
+
+ if (IS_ENABLED(CONFIG_PPC_PMAC) && IS_ENABLED(CONFIG_PPC64))
+ fixup_device_tree_pmac64();
+
+ if (IS_ENABLED(CONFIG_PPC_EFIKA))
+ fixup_device_tree_efika();
+
+ if (IS_ENABLED(CONFIG_PPC_PASEMI_NEMO))
+ fixup_device_tree_pasemi();
}
static void __init prom_find_boot_cpu(void)
@@ -2632,10 +3142,13 @@ static void __init prom_find_boot_cpu(void)
cpu_pkg = call_prom("instance-to-package", 1, 1, prom_cpu);
+ if (!PHANDLE_VALID(cpu_pkg))
+ return;
+
prom_getprop(cpu_pkg, "reg", &rval, sizeof(rval));
prom.cpu = be32_to_cpu(rval);
- prom_debug("Booting CPU hw index = %lu\n", prom.cpu);
+ prom_debug("Booting CPU hw index = %d\n", prom.cpu);
}
static void __init prom_check_initrd(unsigned long r3, unsigned long r4)
@@ -2657,59 +3170,62 @@ static void __init prom_check_initrd(unsigned long r3, unsigned long r4)
reserve_mem(prom_initrd_start,
prom_initrd_end - prom_initrd_start);
- prom_debug("initrd_start=0x%x\n", prom_initrd_start);
- prom_debug("initrd_end=0x%x\n", prom_initrd_end);
+ prom_debug("initrd_start=0x%lx\n", prom_initrd_start);
+ prom_debug("initrd_end=0x%lx\n", prom_initrd_end);
}
#endif /* CONFIG_BLK_DEV_INITRD */
}
-#ifdef CONFIG_PPC64
-#ifdef CONFIG_RELOCATABLE
-static void reloc_toc(void)
-{
-}
-
-static void unreloc_toc(void)
-{
-}
-#else
-static void __reloc_toc(unsigned long offset, unsigned long nr_entries)
+#ifdef CONFIG_PPC_SVM
+/*
+ * Perform the Enter Secure Mode ultracall.
+ */
+static int __init enter_secure_mode(unsigned long kbase, unsigned long fdt)
{
- unsigned long i;
- unsigned long *toc_entry;
+ register unsigned long r3 asm("r3") = UV_ESM;
+ register unsigned long r4 asm("r4") = kbase;
+ register unsigned long r5 asm("r5") = fdt;
- /* Get the start of the TOC by using r2 directly. */
- asm volatile("addi %0,2,-0x8000" : "=b" (toc_entry));
+ asm volatile("sc 2" : "+r"(r3) : "r"(r4), "r"(r5));
- for (i = 0; i < nr_entries; i++) {
- *toc_entry = *toc_entry + offset;
- toc_entry++;
- }
+ return r3;
}
-static void reloc_toc(void)
+/*
+ * Call the Ultravisor to transfer us to secure memory if we have an ESM blob.
+ */
+static void __init setup_secure_guest(unsigned long kbase, unsigned long fdt)
{
- unsigned long offset = reloc_offset();
- unsigned long nr_entries =
- (__prom_init_toc_end - __prom_init_toc_start) / sizeof(long);
+ int ret;
- __reloc_toc(offset, nr_entries);
+ if (!prom_svm_enable)
+ return;
- mb();
-}
+ /* Switch to secure mode. */
+ prom_printf("Switching to secure mode.\n");
-static void unreloc_toc(void)
-{
- unsigned long offset = reloc_offset();
- unsigned long nr_entries =
- (__prom_init_toc_end - __prom_init_toc_start) / sizeof(long);
+ /*
+ * The ultravisor will do an integrity check of the kernel image but we
+ * relocated it so the check will fail. Restore the original image by
+ * relocating it back to the kernel virtual base address.
+ */
+ relocate(KERNELBASE);
+
+ ret = enter_secure_mode(kbase, fdt);
- mb();
+ /* Relocate the kernel again. */
+ relocate(kbase);
- __reloc_toc(-offset, nr_entries);
+ if (ret != U_SUCCESS) {
+ prom_printf("Returned %d from switching to secure mode.\n", ret);
+ prom_rtas_os_term("Switch to secure mode failed.\n");
+ }
}
-#endif
-#endif
+#else
+static void __init setup_secure_guest(unsigned long kbase, unsigned long fdt)
+{
+}
+#endif /* CONFIG_PPC_SVM */
/*
* We enter here early on, when the Open Firmware prom is still
@@ -2726,8 +3242,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
#ifdef CONFIG_PPC32
unsigned long offset = reloc_offset();
reloc_got2(offset);
-#else
- reloc_toc();
#endif
/*
@@ -2772,7 +3286,12 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
*/
prom_check_initrd(r3, r4);
-#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
+ /*
+ * Do early parsing of command line
+ */
+ early_cmdline_parse();
+
+#ifdef CONFIG_PPC_PSERIES
/*
* On pSeries, inform the firmware about our capabilities
*/
@@ -2788,11 +3307,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
copy_and_flush(0, kbase, 0x100, 0);
/*
- * Do early parsing of command line
- */
- early_cmdline_parse();
-
- /*
* Initialize memory management within prom_init
*/
prom_init_mem();
@@ -2821,15 +3335,9 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
* On non-powermacs, try to instantiate RTAS. PowerMacs don't
* have a usable RTAS implementation.
*/
- if (of_platform != PLATFORM_POWERMAC &&
- of_platform != PLATFORM_OPAL)
+ if (of_platform != PLATFORM_POWERMAC)
prom_instantiate_rtas();
-#ifdef CONFIG_PPC_POWERNV
- if (of_platform == PLATFORM_OPAL)
- prom_instantiate_opal();
-#endif /* CONFIG_PPC_POWERNV */
-
#ifdef CONFIG_PPC64
/* instantiate sml */
prom_instantiate_sml();
@@ -2840,10 +3348,9 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
*
* PowerMacs use a different mechanism to spin CPUs
*
- * (This must be done after instanciating RTAS)
+ * (This must be done after instantiating RTAS)
*/
- if (of_platform != PLATFORM_POWERMAC &&
- of_platform != PLATFORM_OPAL)
+ if (of_platform != PLATFORM_POWERMAC)
prom_hold_cpus();
/*
@@ -2887,18 +3394,16 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
/*
* in case stdin is USB and still active on IBM machines...
* Unfortunately quiesce crashes on some powermacs if we have
- * closed stdin already (in particular the powerbook 101). It
- * appears that the OPAL version of OFW doesn't like it either.
+ * closed stdin already (in particular the powerbook 101).
*/
- if (of_platform != PLATFORM_POWERMAC &&
- of_platform != PLATFORM_OPAL)
+ if (of_platform != PLATFORM_POWERMAC)
prom_close_stdin();
/*
* Call OF "quiesce" method to shut down pending DMA's from
* devices etc...
*/
- prom_printf("Calling quiesce...\n");
+ prom_printf("Quiescing Open Firmware ...\n");
call_prom("quiesce", 0, 0);
/*
@@ -2908,25 +3413,17 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
*/
hdr = dt_header_start;
- /* Don't print anything after quiesce under OPAL, it crashes OFW */
- if (of_platform != PLATFORM_OPAL) {
- prom_printf("returning from prom_init\n");
- prom_debug("->dt_header_start=0x%x\n", hdr);
- }
+ prom_printf("Booting Linux via __start() @ 0x%lx ...\n", kbase);
+ prom_debug("->dt_header_start=0x%lx\n", hdr);
#ifdef CONFIG_PPC32
reloc_got2(-offset);
-#else
- unreloc_toc();
#endif
-#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL
- /* OPAL early debug gets the OPAL base & entry in r8 and r9 */
- __start(hdr, kbase, 0, 0, 0,
- prom_opal_base, prom_opal_entry);
-#else
+ /* Move to secure memory if we're supposed to be secure guests. */
+ setup_secure_guest(kbase, hdr);
+
__start(hdr, kbase, 0, 0, 0, 0, 0);
-#endif
return 0;
}
diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh
index fe8e54b9ef7d..3090b97258ae 100644
--- a/arch/powerpc/kernel/prom_init_check.sh
+++ b/arch/powerpc/kernel/prom_init_check.sh
@@ -1,11 +1,8 @@
#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-or-later
#
# Copyright © 2008 IBM Corporation
#
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public License
-# as published by the Free Software Foundation; either version
-# 2 of the License, or (at your option) any later version.
# This script checks prom_init.o to see what external symbols it
# is using, if it finds symbols not in the whitelist it returns
@@ -16,29 +13,53 @@
# If you really need to reference something from prom_init.o add
# it to the list below:
+has_renamed_memintrinsics()
+{
+ grep -q "^CONFIG_KASAN=y$" "${KCONFIG_CONFIG}" && \
+ ! grep -q "^CONFIG_CC_HAS_KASAN_MEMINTRINSIC_PREFIX=y" "${KCONFIG_CONFIG}"
+}
+
+if has_renamed_memintrinsics
+then
+ MEM_FUNCS="__memcpy __memset"
+else
+ MEM_FUNCS="memcpy memset"
+fi
+
WHITELIST="add_reloc_offset __bss_start __bss_stop copy_and_flush
-_end enter_prom memcpy memset reloc_offset __secondary_hold
+_end enter_prom $MEM_FUNCS reloc_offset __secondary_hold
__secondary_hold_acknowledge __secondary_hold_spinloop __start
-strcmp strcpy strlcpy strlen strncmp strstr logo_linux_clut224
+logo_linux_clut224 btext_prepare_BAT
reloc_got2 kernstart_addr memstart_addr linux_banner _stext
-__prom_init_toc_start __prom_init_toc_end btext_setup_display TOC."
+btext_setup_display TOC. relocate"
NM="$1"
OBJ="$2"
ERROR=0
-for UNDEF in $($NM -u $OBJ | awk '{print $2}')
+check_section()
+{
+ file=$1
+ section=$2
+ size=$(objdump -h -j "$section" "$file" 2>/dev/null | awk "\$2 == \"$section\" {print \$3}")
+ size=${size:-0}
+ if [ "$size" -ne 0 ]; then
+ ERROR=1
+ echo "Error: Section $section not empty in prom_init.c" >&2
+ fi
+}
+
+for UNDEF in $($NM -u "$OBJ" | awk '{print $2}')
do
# On 64-bit nm gives us the function descriptors, which have
# a leading . on the name, so strip it off here.
UNDEF="${UNDEF#.}"
- if [ $KBUILD_VERBOSE ]; then
- if [ $KBUILD_VERBOSE -ne 0 ]; then
- echo "Checking prom_init.o symbol '$UNDEF'"
- fi
- fi
+ case "$KBUILD_VERBOSE" in
+ *1*)
+ echo "Checking prom_init.o symbol '$UNDEF'" ;;
+ esac
OK=0
for WHITE in $WHITELIST
@@ -50,24 +71,14 @@ do
done
# ignore register save/restore funcitons
- if [ "${UNDEF:0:9}" = "_restgpr_" ]; then
- OK=1
- fi
- if [ "${UNDEF:0:10}" = "_restgpr0_" ]; then
- OK=1
- fi
- if [ "${UNDEF:0:11}" = "_rest32gpr_" ]; then
- OK=1
- fi
- if [ "${UNDEF:0:9}" = "_savegpr_" ]; then
- OK=1
- fi
- if [ "${UNDEF:0:10}" = "_savegpr0_" ]; then
+ case $UNDEF in
+ _restgpr_*|_restgpr0_*|_rest32gpr_*)
OK=1
- fi
- if [ "${UNDEF:0:11}" = "_save32gpr_" ]; then
+ ;;
+ _savegpr_*|_savegpr0_*|_save32gpr_*)
OK=1
- fi
+ ;;
+ esac
if [ $OK -eq 0 ]; then
ERROR=1
@@ -76,4 +87,8 @@ do
fi
done
+check_section "$OBJ" .data
+check_section "$OBJ" .bss
+check_section "$OBJ" .init.data
+
exit $ERROR
diff --git a/arch/powerpc/kernel/prom_parse.c b/arch/powerpc/kernel/prom_parse.c
index 6295e646f78c..9cb7f88df563 100644
--- a/arch/powerpc/kernel/prom_parse.c
+++ b/arch/powerpc/kernel/prom_parse.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#undef DEBUG
#include <linux/kernel.h>
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
deleted file mode 100644
index 2e3d2bf536c5..000000000000
--- a/arch/powerpc/kernel/ptrace.c
+++ /dev/null
@@ -1,1821 +0,0 @@
-/*
- * PowerPC version
- * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- * Derived from "arch/m68k/kernel/ptrace.c"
- * Copyright (C) 1994 by Hamish Macdonald
- * Taken from linux/kernel/ptrace.c and modified for M680x0.
- * linux/kernel/ptrace.c is by Ross Biro 1/23/92, edited by Linus Torvalds
- *
- * Modified by Cort Dougan (cort@hq.fsmlabs.com)
- * and Paul Mackerras (paulus@samba.org).
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License. See the file README.legal in the main directory of
- * this archive for more details.
- */
-
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/errno.h>
-#include <linux/ptrace.h>
-#include <linux/regset.h>
-#include <linux/tracehook.h>
-#include <linux/elf.h>
-#include <linux/user.h>
-#include <linux/security.h>
-#include <linux/signal.h>
-#include <linux/seccomp.h>
-#include <linux/audit.h>
-#include <trace/syscall.h>
-#include <linux/hw_breakpoint.h>
-#include <linux/perf_event.h>
-#include <linux/context_tracking.h>
-
-#include <asm/uaccess.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/switch_to.h>
-
-#define CREATE_TRACE_POINTS
-#include <trace/events/syscalls.h>
-
-/*
- * The parameter save area on the stack is used to store arguments being passed
- * to callee function and is located at fixed offset from stack pointer.
- */
-#ifdef CONFIG_PPC32
-#define PARAMETER_SAVE_AREA_OFFSET 24 /* bytes */
-#else /* CONFIG_PPC32 */
-#define PARAMETER_SAVE_AREA_OFFSET 48 /* bytes */
-#endif
-
-struct pt_regs_offset {
- const char *name;
- int offset;
-};
-
-#define STR(s) #s /* convert to string */
-#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)}
-#define GPR_OFFSET_NAME(num) \
- {.name = STR(gpr##num), .offset = offsetof(struct pt_regs, gpr[num])}
-#define REG_OFFSET_END {.name = NULL, .offset = 0}
-
-static const struct pt_regs_offset regoffset_table[] = {
- GPR_OFFSET_NAME(0),
- GPR_OFFSET_NAME(1),
- GPR_OFFSET_NAME(2),
- GPR_OFFSET_NAME(3),
- GPR_OFFSET_NAME(4),
- GPR_OFFSET_NAME(5),
- GPR_OFFSET_NAME(6),
- GPR_OFFSET_NAME(7),
- GPR_OFFSET_NAME(8),
- GPR_OFFSET_NAME(9),
- GPR_OFFSET_NAME(10),
- GPR_OFFSET_NAME(11),
- GPR_OFFSET_NAME(12),
- GPR_OFFSET_NAME(13),
- GPR_OFFSET_NAME(14),
- GPR_OFFSET_NAME(15),
- GPR_OFFSET_NAME(16),
- GPR_OFFSET_NAME(17),
- GPR_OFFSET_NAME(18),
- GPR_OFFSET_NAME(19),
- GPR_OFFSET_NAME(20),
- GPR_OFFSET_NAME(21),
- GPR_OFFSET_NAME(22),
- GPR_OFFSET_NAME(23),
- GPR_OFFSET_NAME(24),
- GPR_OFFSET_NAME(25),
- GPR_OFFSET_NAME(26),
- GPR_OFFSET_NAME(27),
- GPR_OFFSET_NAME(28),
- GPR_OFFSET_NAME(29),
- GPR_OFFSET_NAME(30),
- GPR_OFFSET_NAME(31),
- REG_OFFSET_NAME(nip),
- REG_OFFSET_NAME(msr),
- REG_OFFSET_NAME(ctr),
- REG_OFFSET_NAME(link),
- REG_OFFSET_NAME(xer),
- REG_OFFSET_NAME(ccr),
-#ifdef CONFIG_PPC64
- REG_OFFSET_NAME(softe),
-#else
- REG_OFFSET_NAME(mq),
-#endif
- REG_OFFSET_NAME(trap),
- REG_OFFSET_NAME(dar),
- REG_OFFSET_NAME(dsisr),
- REG_OFFSET_END,
-};
-
-/**
- * regs_query_register_offset() - query register offset from its name
- * @name: the name of a register
- *
- * regs_query_register_offset() returns the offset of a register in struct
- * pt_regs from its name. If the name is invalid, this returns -EINVAL;
- */
-int regs_query_register_offset(const char *name)
-{
- const struct pt_regs_offset *roff;
- for (roff = regoffset_table; roff->name != NULL; roff++)
- if (!strcmp(roff->name, name))
- return roff->offset;
- return -EINVAL;
-}
-
-/**
- * regs_query_register_name() - query register name from its offset
- * @offset: the offset of a register in struct pt_regs.
- *
- * regs_query_register_name() returns the name of a register from its
- * offset in struct pt_regs. If the @offset is invalid, this returns NULL;
- */
-const char *regs_query_register_name(unsigned int offset)
-{
- const struct pt_regs_offset *roff;
- for (roff = regoffset_table; roff->name != NULL; roff++)
- if (roff->offset == offset)
- return roff->name;
- return NULL;
-}
-
-/*
- * does not yet catch signals sent when the child dies.
- * in exit.c or in signal.c.
- */
-
-/*
- * Set of msr bits that gdb can change on behalf of a process.
- */
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
-#define MSR_DEBUGCHANGE 0
-#else
-#define MSR_DEBUGCHANGE (MSR_SE | MSR_BE)
-#endif
-
-/*
- * Max register writeable via put_reg
- */
-#ifdef CONFIG_PPC32
-#define PT_MAX_PUT_REG PT_MQ
-#else
-#define PT_MAX_PUT_REG PT_CCR
-#endif
-
-static unsigned long get_user_msr(struct task_struct *task)
-{
- return task->thread.regs->msr | task->thread.fpexc_mode;
-}
-
-static int set_user_msr(struct task_struct *task, unsigned long msr)
-{
- task->thread.regs->msr &= ~MSR_DEBUGCHANGE;
- task->thread.regs->msr |= msr & MSR_DEBUGCHANGE;
- return 0;
-}
-
-#ifdef CONFIG_PPC64
-static int get_user_dscr(struct task_struct *task, unsigned long *data)
-{
- *data = task->thread.dscr;
- return 0;
-}
-
-static int set_user_dscr(struct task_struct *task, unsigned long dscr)
-{
- task->thread.dscr = dscr;
- task->thread.dscr_inherit = 1;
- return 0;
-}
-#else
-static int get_user_dscr(struct task_struct *task, unsigned long *data)
-{
- return -EIO;
-}
-
-static int set_user_dscr(struct task_struct *task, unsigned long dscr)
-{
- return -EIO;
-}
-#endif
-
-/*
- * We prevent mucking around with the reserved area of trap
- * which are used internally by the kernel.
- */
-static int set_user_trap(struct task_struct *task, unsigned long trap)
-{
- task->thread.regs->trap = trap & 0xfff0;
- return 0;
-}
-
-/*
- * Get contents of register REGNO in task TASK.
- */
-int ptrace_get_reg(struct task_struct *task, int regno, unsigned long *data)
-{
- if ((task->thread.regs == NULL) || !data)
- return -EIO;
-
- if (regno == PT_MSR) {
- *data = get_user_msr(task);
- return 0;
- }
-
- if (regno == PT_DSCR)
- return get_user_dscr(task, data);
-
- if (regno < (sizeof(struct pt_regs) / sizeof(unsigned long))) {
- *data = ((unsigned long *)task->thread.regs)[regno];
- return 0;
- }
-
- return -EIO;
-}
-
-/*
- * Write contents of register REGNO in task TASK.
- */
-int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data)
-{
- if (task->thread.regs == NULL)
- return -EIO;
-
- if (regno == PT_MSR)
- return set_user_msr(task, data);
- if (regno == PT_TRAP)
- return set_user_trap(task, data);
- if (regno == PT_DSCR)
- return set_user_dscr(task, data);
-
- if (regno <= PT_MAX_PUT_REG) {
- ((unsigned long *)task->thread.regs)[regno] = data;
- return 0;
- }
- return -EIO;
-}
-
-static int gpr_get(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- int i, ret;
-
- if (target->thread.regs == NULL)
- return -EIO;
-
- if (!FULL_REGS(target->thread.regs)) {
- /* We have a partial register set. Fill 14-31 with bogus values */
- for (i = 14; i < 32; i++)
- target->thread.regs->gpr[i] = NV_REG_POISON;
- }
-
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- target->thread.regs,
- 0, offsetof(struct pt_regs, msr));
- if (!ret) {
- unsigned long msr = get_user_msr(target);
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &msr,
- offsetof(struct pt_regs, msr),
- offsetof(struct pt_regs, msr) +
- sizeof(msr));
- }
-
- BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
- offsetof(struct pt_regs, msr) + sizeof(long));
-
- if (!ret)
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.regs->orig_gpr3,
- offsetof(struct pt_regs, orig_gpr3),
- sizeof(struct pt_regs));
- if (!ret)
- ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
- sizeof(struct pt_regs), -1);
-
- return ret;
-}
-
-static int gpr_set(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- unsigned long reg;
- int ret;
-
- if (target->thread.regs == NULL)
- return -EIO;
-
- CHECK_FULL_REGS(target->thread.regs);
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- target->thread.regs,
- 0, PT_MSR * sizeof(reg));
-
- if (!ret && count > 0) {
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &reg,
- PT_MSR * sizeof(reg),
- (PT_MSR + 1) * sizeof(reg));
- if (!ret)
- ret = set_user_msr(target, reg);
- }
-
- BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
- offsetof(struct pt_regs, msr) + sizeof(long));
-
- if (!ret)
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.regs->orig_gpr3,
- PT_ORIG_R3 * sizeof(reg),
- (PT_MAX_PUT_REG + 1) * sizeof(reg));
-
- if (PT_MAX_PUT_REG + 1 < PT_TRAP && !ret)
- ret = user_regset_copyin_ignore(
- &pos, &count, &kbuf, &ubuf,
- (PT_MAX_PUT_REG + 1) * sizeof(reg),
- PT_TRAP * sizeof(reg));
-
- if (!ret && count > 0) {
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &reg,
- PT_TRAP * sizeof(reg),
- (PT_TRAP + 1) * sizeof(reg));
- if (!ret)
- ret = set_user_trap(target, reg);
- }
-
- if (!ret)
- ret = user_regset_copyin_ignore(
- &pos, &count, &kbuf, &ubuf,
- (PT_TRAP + 1) * sizeof(reg), -1);
-
- return ret;
-}
-
-static int fpr_get(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
-#ifdef CONFIG_VSX
- u64 buf[33];
- int i;
-#endif
- flush_fp_to_thread(target);
-
-#ifdef CONFIG_VSX
- /* copy to local buffer then write that out */
- for (i = 0; i < 32 ; i++)
- buf[i] = target->thread.TS_FPR(i);
- buf[32] = target->thread.fp_state.fpscr;
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
-
-#else
- BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) !=
- offsetof(struct thread_fp_state, fpr[32][0]));
-
- return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.fp_state, 0, -1);
-#endif
-}
-
-static int fpr_set(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
-#ifdef CONFIG_VSX
- u64 buf[33];
- int i;
-#endif
- flush_fp_to_thread(target);
-
-#ifdef CONFIG_VSX
- /* copy to local buffer then write that out */
- i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
- if (i)
- return i;
- for (i = 0; i < 32 ; i++)
- target->thread.TS_FPR(i) = buf[i];
- target->thread.fp_state.fpscr = buf[32];
- return 0;
-#else
- BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) !=
- offsetof(struct thread_fp_state, fpr[32][0]));
-
- return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.fp_state, 0, -1);
-#endif
-}
-
-#ifdef CONFIG_ALTIVEC
-/*
- * Get/set all the altivec registers vr0..vr31, vscr, vrsave, in one go.
- * The transfer totals 34 quadword. Quadwords 0-31 contain the
- * corresponding vector registers. Quadword 32 contains the vscr as the
- * last word (offset 12) within that quadword. Quadword 33 contains the
- * vrsave as the first word (offset 0) within the quadword.
- *
- * This definition of the VMX state is compatible with the current PPC32
- * ptrace interface. This allows signal handling and ptrace to use the
- * same structures. This also simplifies the implementation of a bi-arch
- * (combined (32- and 64-bit) gdb.
- */
-
-static int vr_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- flush_altivec_to_thread(target);
- return target->thread.used_vr ? regset->n : 0;
-}
-
-static int vr_get(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- int ret;
-
- flush_altivec_to_thread(target);
-
- BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) !=
- offsetof(struct thread_vr_state, vr[32]));
-
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.vr_state, 0,
- 33 * sizeof(vector128));
- if (!ret) {
- /*
- * Copy out only the low-order word of vrsave.
- */
- union {
- elf_vrreg_t reg;
- u32 word;
- } vrsave;
- memset(&vrsave, 0, sizeof(vrsave));
- vrsave.word = target->thread.vrsave;
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &vrsave,
- 33 * sizeof(vector128), -1);
- }
-
- return ret;
-}
-
-static int vr_set(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- int ret;
-
- flush_altivec_to_thread(target);
-
- BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) !=
- offsetof(struct thread_vr_state, vr[32]));
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.vr_state, 0,
- 33 * sizeof(vector128));
- if (!ret && count > 0) {
- /*
- * We use only the first word of vrsave.
- */
- union {
- elf_vrreg_t reg;
- u32 word;
- } vrsave;
- memset(&vrsave, 0, sizeof(vrsave));
- vrsave.word = target->thread.vrsave;
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave,
- 33 * sizeof(vector128), -1);
- if (!ret)
- target->thread.vrsave = vrsave.word;
- }
-
- return ret;
-}
-#endif /* CONFIG_ALTIVEC */
-
-#ifdef CONFIG_VSX
-/*
- * Currently to set and and get all the vsx state, you need to call
- * the fp and VMX calls as well. This only get/sets the lower 32
- * 128bit VSX registers.
- */
-
-static int vsr_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- flush_vsx_to_thread(target);
- return target->thread.used_vsr ? regset->n : 0;
-}
-
-static int vsr_get(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- u64 buf[32];
- int ret, i;
-
- flush_vsx_to_thread(target);
-
- for (i = 0; i < 32 ; i++)
- buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET];
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- buf, 0, 32 * sizeof(double));
-
- return ret;
-}
-
-static int vsr_set(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- u64 buf[32];
- int ret,i;
-
- flush_vsx_to_thread(target);
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- buf, 0, 32 * sizeof(double));
- for (i = 0; i < 32 ; i++)
- target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
-
-
- return ret;
-}
-#endif /* CONFIG_VSX */
-
-#ifdef CONFIG_SPE
-
-/*
- * For get_evrregs/set_evrregs functions 'data' has the following layout:
- *
- * struct {
- * u32 evr[32];
- * u64 acc;
- * u32 spefscr;
- * }
- */
-
-static int evr_active(struct task_struct *target,
- const struct user_regset *regset)
-{
- flush_spe_to_thread(target);
- return target->thread.used_spe ? regset->n : 0;
-}
-
-static int evr_get(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- int ret;
-
- flush_spe_to_thread(target);
-
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.evr,
- 0, sizeof(target->thread.evr));
-
- BUILD_BUG_ON(offsetof(struct thread_struct, acc) + sizeof(u64) !=
- offsetof(struct thread_struct, spefscr));
-
- if (!ret)
- ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.acc,
- sizeof(target->thread.evr), -1);
-
- return ret;
-}
-
-static int evr_set(struct task_struct *target, const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- int ret;
-
- flush_spe_to_thread(target);
-
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.evr,
- 0, sizeof(target->thread.evr));
-
- BUILD_BUG_ON(offsetof(struct thread_struct, acc) + sizeof(u64) !=
- offsetof(struct thread_struct, spefscr));
-
- if (!ret)
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.acc,
- sizeof(target->thread.evr), -1);
-
- return ret;
-}
-#endif /* CONFIG_SPE */
-
-
-/*
- * These are our native regset flavors.
- */
-enum powerpc_regset {
- REGSET_GPR,
- REGSET_FPR,
-#ifdef CONFIG_ALTIVEC
- REGSET_VMX,
-#endif
-#ifdef CONFIG_VSX
- REGSET_VSX,
-#endif
-#ifdef CONFIG_SPE
- REGSET_SPE,
-#endif
-};
-
-static const struct user_regset native_regsets[] = {
- [REGSET_GPR] = {
- .core_note_type = NT_PRSTATUS, .n = ELF_NGREG,
- .size = sizeof(long), .align = sizeof(long),
- .get = gpr_get, .set = gpr_set
- },
- [REGSET_FPR] = {
- .core_note_type = NT_PRFPREG, .n = ELF_NFPREG,
- .size = sizeof(double), .align = sizeof(double),
- .get = fpr_get, .set = fpr_set
- },
-#ifdef CONFIG_ALTIVEC
- [REGSET_VMX] = {
- .core_note_type = NT_PPC_VMX, .n = 34,
- .size = sizeof(vector128), .align = sizeof(vector128),
- .active = vr_active, .get = vr_get, .set = vr_set
- },
-#endif
-#ifdef CONFIG_VSX
- [REGSET_VSX] = {
- .core_note_type = NT_PPC_VSX, .n = 32,
- .size = sizeof(double), .align = sizeof(double),
- .active = vsr_active, .get = vsr_get, .set = vsr_set
- },
-#endif
-#ifdef CONFIG_SPE
- [REGSET_SPE] = {
- .core_note_type = NT_PPC_SPE, .n = 35,
- .size = sizeof(u32), .align = sizeof(u32),
- .active = evr_active, .get = evr_get, .set = evr_set
- },
-#endif
-};
-
-static const struct user_regset_view user_ppc_native_view = {
- .name = UTS_MACHINE, .e_machine = ELF_ARCH, .ei_osabi = ELF_OSABI,
- .regsets = native_regsets, .n = ARRAY_SIZE(native_regsets)
-};
-
-#ifdef CONFIG_PPC64
-#include <linux/compat.h>
-
-static int gpr32_get(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- void *kbuf, void __user *ubuf)
-{
- const unsigned long *regs = &target->thread.regs->gpr[0];
- compat_ulong_t *k = kbuf;
- compat_ulong_t __user *u = ubuf;
- compat_ulong_t reg;
- int i;
-
- if (target->thread.regs == NULL)
- return -EIO;
-
- if (!FULL_REGS(target->thread.regs)) {
- /* We have a partial register set. Fill 14-31 with bogus values */
- for (i = 14; i < 32; i++)
- target->thread.regs->gpr[i] = NV_REG_POISON;
- }
-
- pos /= sizeof(reg);
- count /= sizeof(reg);
-
- if (kbuf)
- for (; count > 0 && pos < PT_MSR; --count)
- *k++ = regs[pos++];
- else
- for (; count > 0 && pos < PT_MSR; --count)
- if (__put_user((compat_ulong_t) regs[pos++], u++))
- return -EFAULT;
-
- if (count > 0 && pos == PT_MSR) {
- reg = get_user_msr(target);
- if (kbuf)
- *k++ = reg;
- else if (__put_user(reg, u++))
- return -EFAULT;
- ++pos;
- --count;
- }
-
- if (kbuf)
- for (; count > 0 && pos < PT_REGS_COUNT; --count)
- *k++ = regs[pos++];
- else
- for (; count > 0 && pos < PT_REGS_COUNT; --count)
- if (__put_user((compat_ulong_t) regs[pos++], u++))
- return -EFAULT;
-
- kbuf = k;
- ubuf = u;
- pos *= sizeof(reg);
- count *= sizeof(reg);
- return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
- PT_REGS_COUNT * sizeof(reg), -1);
-}
-
-static int gpr32_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- unsigned long *regs = &target->thread.regs->gpr[0];
- const compat_ulong_t *k = kbuf;
- const compat_ulong_t __user *u = ubuf;
- compat_ulong_t reg;
-
- if (target->thread.regs == NULL)
- return -EIO;
-
- CHECK_FULL_REGS(target->thread.regs);
-
- pos /= sizeof(reg);
- count /= sizeof(reg);
-
- if (kbuf)
- for (; count > 0 && pos < PT_MSR; --count)
- regs[pos++] = *k++;
- else
- for (; count > 0 && pos < PT_MSR; --count) {
- if (__get_user(reg, u++))
- return -EFAULT;
- regs[pos++] = reg;
- }
-
-
- if (count > 0 && pos == PT_MSR) {
- if (kbuf)
- reg = *k++;
- else if (__get_user(reg, u++))
- return -EFAULT;
- set_user_msr(target, reg);
- ++pos;
- --count;
- }
-
- if (kbuf) {
- for (; count > 0 && pos <= PT_MAX_PUT_REG; --count)
- regs[pos++] = *k++;
- for (; count > 0 && pos < PT_TRAP; --count, ++pos)
- ++k;
- } else {
- for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) {
- if (__get_user(reg, u++))
- return -EFAULT;
- regs[pos++] = reg;
- }
- for (; count > 0 && pos < PT_TRAP; --count, ++pos)
- if (__get_user(reg, u++))
- return -EFAULT;
- }
-
- if (count > 0 && pos == PT_TRAP) {
- if (kbuf)
- reg = *k++;
- else if (__get_user(reg, u++))
- return -EFAULT;
- set_user_trap(target, reg);
- ++pos;
- --count;
- }
-
- kbuf = k;
- ubuf = u;
- pos *= sizeof(reg);
- count *= sizeof(reg);
- return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
- (PT_TRAP + 1) * sizeof(reg), -1);
-}
-
-/*
- * These are the regset flavors matching the CONFIG_PPC32 native set.
- */
-static const struct user_regset compat_regsets[] = {
- [REGSET_GPR] = {
- .core_note_type = NT_PRSTATUS, .n = ELF_NGREG,
- .size = sizeof(compat_long_t), .align = sizeof(compat_long_t),
- .get = gpr32_get, .set = gpr32_set
- },
- [REGSET_FPR] = {
- .core_note_type = NT_PRFPREG, .n = ELF_NFPREG,
- .size = sizeof(double), .align = sizeof(double),
- .get = fpr_get, .set = fpr_set
- },
-#ifdef CONFIG_ALTIVEC
- [REGSET_VMX] = {
- .core_note_type = NT_PPC_VMX, .n = 34,
- .size = sizeof(vector128), .align = sizeof(vector128),
- .active = vr_active, .get = vr_get, .set = vr_set
- },
-#endif
-#ifdef CONFIG_SPE
- [REGSET_SPE] = {
- .core_note_type = NT_PPC_SPE, .n = 35,
- .size = sizeof(u32), .align = sizeof(u32),
- .active = evr_active, .get = evr_get, .set = evr_set
- },
-#endif
-};
-
-static const struct user_regset_view user_ppc_compat_view = {
- .name = "ppc", .e_machine = EM_PPC, .ei_osabi = ELF_OSABI,
- .regsets = compat_regsets, .n = ARRAY_SIZE(compat_regsets)
-};
-#endif /* CONFIG_PPC64 */
-
-const struct user_regset_view *task_user_regset_view(struct task_struct *task)
-{
-#ifdef CONFIG_PPC64
- if (test_tsk_thread_flag(task, TIF_32BIT))
- return &user_ppc_compat_view;
-#endif
- return &user_ppc_native_view;
-}
-
-
-void user_enable_single_step(struct task_struct *task)
-{
- struct pt_regs *regs = task->thread.regs;
-
- if (regs != NULL) {
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
- task->thread.debug.dbcr0 &= ~DBCR0_BT;
- task->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC;
- regs->msr |= MSR_DE;
-#else
- regs->msr &= ~MSR_BE;
- regs->msr |= MSR_SE;
-#endif
- }
- set_tsk_thread_flag(task, TIF_SINGLESTEP);
-}
-
-void user_enable_block_step(struct task_struct *task)
-{
- struct pt_regs *regs = task->thread.regs;
-
- if (regs != NULL) {
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
- task->thread.debug.dbcr0 &= ~DBCR0_IC;
- task->thread.debug.dbcr0 = DBCR0_IDM | DBCR0_BT;
- regs->msr |= MSR_DE;
-#else
- regs->msr &= ~MSR_SE;
- regs->msr |= MSR_BE;
-#endif
- }
- set_tsk_thread_flag(task, TIF_SINGLESTEP);
-}
-
-void user_disable_single_step(struct task_struct *task)
-{
- struct pt_regs *regs = task->thread.regs;
-
- if (regs != NULL) {
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
- /*
- * The logic to disable single stepping should be as
- * simple as turning off the Instruction Complete flag.
- * And, after doing so, if all debug flags are off, turn
- * off DBCR0(IDM) and MSR(DE) .... Torez
- */
- task->thread.debug.dbcr0 &= ~(DBCR0_IC|DBCR0_BT);
- /*
- * Test to see if any of the DBCR_ACTIVE_EVENTS bits are set.
- */
- if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0,
- task->thread.debug.dbcr1)) {
- /*
- * All debug events were off.....
- */
- task->thread.debug.dbcr0 &= ~DBCR0_IDM;
- regs->msr &= ~MSR_DE;
- }
-#else
- regs->msr &= ~(MSR_SE | MSR_BE);
-#endif
- }
- clear_tsk_thread_flag(task, TIF_SINGLESTEP);
-}
-
-#ifdef CONFIG_HAVE_HW_BREAKPOINT
-void ptrace_triggered(struct perf_event *bp,
- struct perf_sample_data *data, struct pt_regs *regs)
-{
- struct perf_event_attr attr;
-
- /*
- * Disable the breakpoint request here since ptrace has defined a
- * one-shot behaviour for breakpoint exceptions in PPC64.
- * The SIGTRAP signal is generated automatically for us in do_dabr().
- * We don't have to do anything about that here
- */
- attr = bp->attr;
- attr.disabled = true;
- modify_user_hw_breakpoint(bp, &attr);
-}
-#endif /* CONFIG_HAVE_HW_BREAKPOINT */
-
-int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
- unsigned long data)
-{
-#ifdef CONFIG_HAVE_HW_BREAKPOINT
- int ret;
- struct thread_struct *thread = &(task->thread);
- struct perf_event *bp;
- struct perf_event_attr attr;
-#endif /* CONFIG_HAVE_HW_BREAKPOINT */
-#ifndef CONFIG_PPC_ADV_DEBUG_REGS
- struct arch_hw_breakpoint hw_brk;
-#endif
-
- /* For ppc64 we support one DABR and no IABR's at the moment (ppc64).
- * For embedded processors we support one DAC and no IAC's at the
- * moment.
- */
- if (addr > 0)
- return -EINVAL;
-
- /* The bottom 3 bits in dabr are flags */
- if ((data & ~0x7UL) >= TASK_SIZE)
- return -EIO;
-
-#ifndef CONFIG_PPC_ADV_DEBUG_REGS
- /* For processors using DABR (i.e. 970), the bottom 3 bits are flags.
- * It was assumed, on previous implementations, that 3 bits were
- * passed together with the data address, fitting the design of the
- * DABR register, as follows:
- *
- * bit 0: Read flag
- * bit 1: Write flag
- * bit 2: Breakpoint translation
- *
- * Thus, we use them here as so.
- */
-
- /* Ensure breakpoint translation bit is set */
- if (data && !(data & HW_BRK_TYPE_TRANSLATE))
- return -EIO;
- hw_brk.address = data & (~HW_BRK_TYPE_DABR);
- hw_brk.type = (data & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL;
- hw_brk.len = 8;
-#ifdef CONFIG_HAVE_HW_BREAKPOINT
- bp = thread->ptrace_bps[0];
- if ((!data) || !(hw_brk.type & HW_BRK_TYPE_RDWR)) {
- if (bp) {
- unregister_hw_breakpoint(bp);
- thread->ptrace_bps[0] = NULL;
- }
- return 0;
- }
- if (bp) {
- attr = bp->attr;
- attr.bp_addr = hw_brk.address;
- arch_bp_generic_fields(hw_brk.type, &attr.bp_type);
-
- /* Enable breakpoint */
- attr.disabled = false;
-
- ret = modify_user_hw_breakpoint(bp, &attr);
- if (ret) {
- return ret;
- }
- thread->ptrace_bps[0] = bp;
- thread->hw_brk = hw_brk;
- return 0;
- }
-
- /* Create a new breakpoint request if one doesn't exist already */
- hw_breakpoint_init(&attr);
- attr.bp_addr = hw_brk.address;
- arch_bp_generic_fields(hw_brk.type,
- &attr.bp_type);
-
- thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr,
- ptrace_triggered, NULL, task);
- if (IS_ERR(bp)) {
- thread->ptrace_bps[0] = NULL;
- return PTR_ERR(bp);
- }
-
-#endif /* CONFIG_HAVE_HW_BREAKPOINT */
- task->thread.hw_brk = hw_brk;
-#else /* CONFIG_PPC_ADV_DEBUG_REGS */
- /* As described above, it was assumed 3 bits were passed with the data
- * address, but we will assume only the mode bits will be passed
- * as to not cause alignment restrictions for DAC-based processors.
- */
-
- /* DAC's hold the whole address without any mode flags */
- task->thread.debug.dac1 = data & ~0x3UL;
-
- if (task->thread.debug.dac1 == 0) {
- dbcr_dac(task) &= ~(DBCR_DAC1R | DBCR_DAC1W);
- if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0,
- task->thread.debug.dbcr1)) {
- task->thread.regs->msr &= ~MSR_DE;
- task->thread.debug.dbcr0 &= ~DBCR0_IDM;
- }
- return 0;
- }
-
- /* Read or Write bits must be set */
-
- if (!(data & 0x3UL))
- return -EINVAL;
-
- /* Set the Internal Debugging flag (IDM bit 1) for the DBCR0
- register */
- task->thread.debug.dbcr0 |= DBCR0_IDM;
-
- /* Check for write and read flags and set DBCR0
- accordingly */
- dbcr_dac(task) &= ~(DBCR_DAC1R|DBCR_DAC1W);
- if (data & 0x1UL)
- dbcr_dac(task) |= DBCR_DAC1R;
- if (data & 0x2UL)
- dbcr_dac(task) |= DBCR_DAC1W;
- task->thread.regs->msr |= MSR_DE;
-#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
- return 0;
-}
-
-/*
- * Called by kernel/ptrace.c when detaching..
- *
- * Make sure single step bits etc are not set.
- */
-void ptrace_disable(struct task_struct *child)
-{
- /* make sure the single step bit is not set. */
- user_disable_single_step(child);
-}
-
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
-static long set_instruction_bp(struct task_struct *child,
- struct ppc_hw_breakpoint *bp_info)
-{
- int slot;
- int slot1_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC1) != 0);
- int slot2_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC2) != 0);
- int slot3_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC3) != 0);
- int slot4_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC4) != 0);
-
- if (dbcr_iac_range(child) & DBCR_IAC12MODE)
- slot2_in_use = 1;
- if (dbcr_iac_range(child) & DBCR_IAC34MODE)
- slot4_in_use = 1;
-
- if (bp_info->addr >= TASK_SIZE)
- return -EIO;
-
- if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) {
-
- /* Make sure range is valid. */
- if (bp_info->addr2 >= TASK_SIZE)
- return -EIO;
-
- /* We need a pair of IAC regsisters */
- if ((!slot1_in_use) && (!slot2_in_use)) {
- slot = 1;
- child->thread.debug.iac1 = bp_info->addr;
- child->thread.debug.iac2 = bp_info->addr2;
- child->thread.debug.dbcr0 |= DBCR0_IAC1;
- if (bp_info->addr_mode ==
- PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE)
- dbcr_iac_range(child) |= DBCR_IAC12X;
- else
- dbcr_iac_range(child) |= DBCR_IAC12I;
-#if CONFIG_PPC_ADV_DEBUG_IACS > 2
- } else if ((!slot3_in_use) && (!slot4_in_use)) {
- slot = 3;
- child->thread.debug.iac3 = bp_info->addr;
- child->thread.debug.iac4 = bp_info->addr2;
- child->thread.debug.dbcr0 |= DBCR0_IAC3;
- if (bp_info->addr_mode ==
- PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE)
- dbcr_iac_range(child) |= DBCR_IAC34X;
- else
- dbcr_iac_range(child) |= DBCR_IAC34I;
-#endif
- } else
- return -ENOSPC;
- } else {
- /* We only need one. If possible leave a pair free in
- * case a range is needed later
- */
- if (!slot1_in_use) {
- /*
- * Don't use iac1 if iac1-iac2 are free and either
- * iac3 or iac4 (but not both) are free
- */
- if (slot2_in_use || (slot3_in_use == slot4_in_use)) {
- slot = 1;
- child->thread.debug.iac1 = bp_info->addr;
- child->thread.debug.dbcr0 |= DBCR0_IAC1;
- goto out;
- }
- }
- if (!slot2_in_use) {
- slot = 2;
- child->thread.debug.iac2 = bp_info->addr;
- child->thread.debug.dbcr0 |= DBCR0_IAC2;
-#if CONFIG_PPC_ADV_DEBUG_IACS > 2
- } else if (!slot3_in_use) {
- slot = 3;
- child->thread.debug.iac3 = bp_info->addr;
- child->thread.debug.dbcr0 |= DBCR0_IAC3;
- } else if (!slot4_in_use) {
- slot = 4;
- child->thread.debug.iac4 = bp_info->addr;
- child->thread.debug.dbcr0 |= DBCR0_IAC4;
-#endif
- } else
- return -ENOSPC;
- }
-out:
- child->thread.debug.dbcr0 |= DBCR0_IDM;
- child->thread.regs->msr |= MSR_DE;
-
- return slot;
-}
-
-static int del_instruction_bp(struct task_struct *child, int slot)
-{
- switch (slot) {
- case 1:
- if ((child->thread.debug.dbcr0 & DBCR0_IAC1) == 0)
- return -ENOENT;
-
- if (dbcr_iac_range(child) & DBCR_IAC12MODE) {
- /* address range - clear slots 1 & 2 */
- child->thread.debug.iac2 = 0;
- dbcr_iac_range(child) &= ~DBCR_IAC12MODE;
- }
- child->thread.debug.iac1 = 0;
- child->thread.debug.dbcr0 &= ~DBCR0_IAC1;
- break;
- case 2:
- if ((child->thread.debug.dbcr0 & DBCR0_IAC2) == 0)
- return -ENOENT;
-
- if (dbcr_iac_range(child) & DBCR_IAC12MODE)
- /* used in a range */
- return -EINVAL;
- child->thread.debug.iac2 = 0;
- child->thread.debug.dbcr0 &= ~DBCR0_IAC2;
- break;
-#if CONFIG_PPC_ADV_DEBUG_IACS > 2
- case 3:
- if ((child->thread.debug.dbcr0 & DBCR0_IAC3) == 0)
- return -ENOENT;
-
- if (dbcr_iac_range(child) & DBCR_IAC34MODE) {
- /* address range - clear slots 3 & 4 */
- child->thread.debug.iac4 = 0;
- dbcr_iac_range(child) &= ~DBCR_IAC34MODE;
- }
- child->thread.debug.iac3 = 0;
- child->thread.debug.dbcr0 &= ~DBCR0_IAC3;
- break;
- case 4:
- if ((child->thread.debug.dbcr0 & DBCR0_IAC4) == 0)
- return -ENOENT;
-
- if (dbcr_iac_range(child) & DBCR_IAC34MODE)
- /* Used in a range */
- return -EINVAL;
- child->thread.debug.iac4 = 0;
- child->thread.debug.dbcr0 &= ~DBCR0_IAC4;
- break;
-#endif
- default:
- return -EINVAL;
- }
- return 0;
-}
-
-static int set_dac(struct task_struct *child, struct ppc_hw_breakpoint *bp_info)
-{
- int byte_enable =
- (bp_info->condition_mode >> PPC_BREAKPOINT_CONDITION_BE_SHIFT)
- & 0xf;
- int condition_mode =
- bp_info->condition_mode & PPC_BREAKPOINT_CONDITION_MODE;
- int slot;
-
- if (byte_enable && (condition_mode == 0))
- return -EINVAL;
-
- if (bp_info->addr >= TASK_SIZE)
- return -EIO;
-
- if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0) {
- slot = 1;
- if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
- dbcr_dac(child) |= DBCR_DAC1R;
- if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
- dbcr_dac(child) |= DBCR_DAC1W;
- child->thread.debug.dac1 = (unsigned long)bp_info->addr;
-#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
- if (byte_enable) {
- child->thread.debug.dvc1 =
- (unsigned long)bp_info->condition_value;
- child->thread.debug.dbcr2 |=
- ((byte_enable << DBCR2_DVC1BE_SHIFT) |
- (condition_mode << DBCR2_DVC1M_SHIFT));
- }
-#endif
-#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
- } else if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) {
- /* Both dac1 and dac2 are part of a range */
- return -ENOSPC;
-#endif
- } else if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0) {
- slot = 2;
- if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
- dbcr_dac(child) |= DBCR_DAC2R;
- if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
- dbcr_dac(child) |= DBCR_DAC2W;
- child->thread.debug.dac2 = (unsigned long)bp_info->addr;
-#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
- if (byte_enable) {
- child->thread.debug.dvc2 =
- (unsigned long)bp_info->condition_value;
- child->thread.debug.dbcr2 |=
- ((byte_enable << DBCR2_DVC2BE_SHIFT) |
- (condition_mode << DBCR2_DVC2M_SHIFT));
- }
-#endif
- } else
- return -ENOSPC;
- child->thread.debug.dbcr0 |= DBCR0_IDM;
- child->thread.regs->msr |= MSR_DE;
-
- return slot + 4;
-}
-
-static int del_dac(struct task_struct *child, int slot)
-{
- if (slot == 1) {
- if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0)
- return -ENOENT;
-
- child->thread.debug.dac1 = 0;
- dbcr_dac(child) &= ~(DBCR_DAC1R | DBCR_DAC1W);
-#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
- if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) {
- child->thread.debug.dac2 = 0;
- child->thread.debug.dbcr2 &= ~DBCR2_DAC12MODE;
- }
- child->thread.debug.dbcr2 &= ~(DBCR2_DVC1M | DBCR2_DVC1BE);
-#endif
-#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
- child->thread.debug.dvc1 = 0;
-#endif
- } else if (slot == 2) {
- if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0)
- return -ENOENT;
-
-#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
- if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE)
- /* Part of a range */
- return -EINVAL;
- child->thread.debug.dbcr2 &= ~(DBCR2_DVC2M | DBCR2_DVC2BE);
-#endif
-#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
- child->thread.debug.dvc2 = 0;
-#endif
- child->thread.debug.dac2 = 0;
- dbcr_dac(child) &= ~(DBCR_DAC2R | DBCR_DAC2W);
- } else
- return -EINVAL;
-
- return 0;
-}
-#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
-
-#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
-static int set_dac_range(struct task_struct *child,
- struct ppc_hw_breakpoint *bp_info)
-{
- int mode = bp_info->addr_mode & PPC_BREAKPOINT_MODE_MASK;
-
- /* We don't allow range watchpoints to be used with DVC */
- if (bp_info->condition_mode)
- return -EINVAL;
-
- /*
- * Best effort to verify the address range. The user/supervisor bits
- * prevent trapping in kernel space, but let's fail on an obvious bad
- * range. The simple test on the mask is not fool-proof, and any
- * exclusive range will spill over into kernel space.
- */
- if (bp_info->addr >= TASK_SIZE)
- return -EIO;
- if (mode == PPC_BREAKPOINT_MODE_MASK) {
- /*
- * dac2 is a bitmask. Don't allow a mask that makes a
- * kernel space address from a valid dac1 value
- */
- if (~((unsigned long)bp_info->addr2) >= TASK_SIZE)
- return -EIO;
- } else {
- /*
- * For range breakpoints, addr2 must also be a valid address
- */
- if (bp_info->addr2 >= TASK_SIZE)
- return -EIO;
- }
-
- if (child->thread.debug.dbcr0 &
- (DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R | DBCR0_DAC2W))
- return -ENOSPC;
-
- if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
- child->thread.debug.dbcr0 |= (DBCR0_DAC1R | DBCR0_IDM);
- if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
- child->thread.debug.dbcr0 |= (DBCR0_DAC1W | DBCR0_IDM);
- child->thread.debug.dac1 = bp_info->addr;
- child->thread.debug.dac2 = bp_info->addr2;
- if (mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE)
- child->thread.debug.dbcr2 |= DBCR2_DAC12M;
- else if (mode == PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE)
- child->thread.debug.dbcr2 |= DBCR2_DAC12MX;
- else /* PPC_BREAKPOINT_MODE_MASK */
- child->thread.debug.dbcr2 |= DBCR2_DAC12MM;
- child->thread.regs->msr |= MSR_DE;
-
- return 5;
-}
-#endif /* CONFIG_PPC_ADV_DEBUG_DAC_RANGE */
-
-static long ppc_set_hwdebug(struct task_struct *child,
- struct ppc_hw_breakpoint *bp_info)
-{
-#ifdef CONFIG_HAVE_HW_BREAKPOINT
- int len = 0;
- struct thread_struct *thread = &(child->thread);
- struct perf_event *bp;
- struct perf_event_attr attr;
-#endif /* CONFIG_HAVE_HW_BREAKPOINT */
-#ifndef CONFIG_PPC_ADV_DEBUG_REGS
- struct arch_hw_breakpoint brk;
-#endif
-
- if (bp_info->version != 1)
- return -ENOTSUPP;
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
- /*
- * Check for invalid flags and combinations
- */
- if ((bp_info->trigger_type == 0) ||
- (bp_info->trigger_type & ~(PPC_BREAKPOINT_TRIGGER_EXECUTE |
- PPC_BREAKPOINT_TRIGGER_RW)) ||
- (bp_info->addr_mode & ~PPC_BREAKPOINT_MODE_MASK) ||
- (bp_info->condition_mode &
- ~(PPC_BREAKPOINT_CONDITION_MODE |
- PPC_BREAKPOINT_CONDITION_BE_ALL)))
- return -EINVAL;
-#if CONFIG_PPC_ADV_DEBUG_DVCS == 0
- if (bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE)
- return -EINVAL;
-#endif
-
- if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_EXECUTE) {
- if ((bp_info->trigger_type != PPC_BREAKPOINT_TRIGGER_EXECUTE) ||
- (bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE))
- return -EINVAL;
- return set_instruction_bp(child, bp_info);
- }
- if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT)
- return set_dac(child, bp_info);
-
-#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
- return set_dac_range(child, bp_info);
-#else
- return -EINVAL;
-#endif
-#else /* !CONFIG_PPC_ADV_DEBUG_DVCS */
- /*
- * We only support one data breakpoint
- */
- if ((bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_RW) == 0 ||
- (bp_info->trigger_type & ~PPC_BREAKPOINT_TRIGGER_RW) != 0 ||
- bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE)
- return -EINVAL;
-
- if ((unsigned long)bp_info->addr >= TASK_SIZE)
- return -EIO;
-
- brk.address = bp_info->addr & ~7UL;
- brk.type = HW_BRK_TYPE_TRANSLATE;
- brk.len = 8;
- if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
- brk.type |= HW_BRK_TYPE_READ;
- if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
- brk.type |= HW_BRK_TYPE_WRITE;
-#ifdef CONFIG_HAVE_HW_BREAKPOINT
- /*
- * Check if the request is for 'range' breakpoints. We can
- * support it if range < 8 bytes.
- */
- if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE)
- len = bp_info->addr2 - bp_info->addr;
- else if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT)
- len = 1;
- else
- return -EINVAL;
- bp = thread->ptrace_bps[0];
- if (bp)
- return -ENOSPC;
-
- /* Create a new breakpoint request if one doesn't exist already */
- hw_breakpoint_init(&attr);
- attr.bp_addr = (unsigned long)bp_info->addr & ~HW_BREAKPOINT_ALIGN;
- attr.bp_len = len;
- arch_bp_generic_fields(brk.type, &attr.bp_type);
-
- thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr,
- ptrace_triggered, NULL, child);
- if (IS_ERR(bp)) {
- thread->ptrace_bps[0] = NULL;
- return PTR_ERR(bp);
- }
-
- return 1;
-#endif /* CONFIG_HAVE_HW_BREAKPOINT */
-
- if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT)
- return -EINVAL;
-
- if (child->thread.hw_brk.address)
- return -ENOSPC;
-
- child->thread.hw_brk = brk;
-
- return 1;
-#endif /* !CONFIG_PPC_ADV_DEBUG_DVCS */
-}
-
-static long ppc_del_hwdebug(struct task_struct *child, long data)
-{
-#ifdef CONFIG_HAVE_HW_BREAKPOINT
- int ret = 0;
- struct thread_struct *thread = &(child->thread);
- struct perf_event *bp;
-#endif /* CONFIG_HAVE_HW_BREAKPOINT */
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
- int rc;
-
- if (data <= 4)
- rc = del_instruction_bp(child, (int)data);
- else
- rc = del_dac(child, (int)data - 4);
-
- if (!rc) {
- if (!DBCR_ACTIVE_EVENTS(child->thread.debug.dbcr0,
- child->thread.debug.dbcr1)) {
- child->thread.debug.dbcr0 &= ~DBCR0_IDM;
- child->thread.regs->msr &= ~MSR_DE;
- }
- }
- return rc;
-#else
- if (data != 1)
- return -EINVAL;
-
-#ifdef CONFIG_HAVE_HW_BREAKPOINT
- bp = thread->ptrace_bps[0];
- if (bp) {
- unregister_hw_breakpoint(bp);
- thread->ptrace_bps[0] = NULL;
- } else
- ret = -ENOENT;
- return ret;
-#else /* CONFIG_HAVE_HW_BREAKPOINT */
- if (child->thread.hw_brk.address == 0)
- return -ENOENT;
-
- child->thread.hw_brk.address = 0;
- child->thread.hw_brk.type = 0;
-#endif /* CONFIG_HAVE_HW_BREAKPOINT */
-
- return 0;
-#endif
-}
-
-long arch_ptrace(struct task_struct *child, long request,
- unsigned long addr, unsigned long data)
-{
- int ret = -EPERM;
- void __user *datavp = (void __user *) data;
- unsigned long __user *datalp = datavp;
-
- switch (request) {
- /* read the word at location addr in the USER area. */
- case PTRACE_PEEKUSR: {
- unsigned long index, tmp;
-
- ret = -EIO;
- /* convert to index and check */
-#ifdef CONFIG_PPC32
- index = addr >> 2;
- if ((addr & 3) || (index > PT_FPSCR)
- || (child->thread.regs == NULL))
-#else
- index = addr >> 3;
- if ((addr & 7) || (index > PT_FPSCR))
-#endif
- break;
-
- CHECK_FULL_REGS(child->thread.regs);
- if (index < PT_FPR0) {
- ret = ptrace_get_reg(child, (int) index, &tmp);
- if (ret)
- break;
- } else {
- unsigned int fpidx = index - PT_FPR0;
-
- flush_fp_to_thread(child);
- if (fpidx < (PT_FPSCR - PT_FPR0))
- memcpy(&tmp, &child->thread.TS_FPR(fpidx),
- sizeof(long));
- else
- tmp = child->thread.fp_state.fpscr;
- }
- ret = put_user(tmp, datalp);
- break;
- }
-
- /* write the word at location addr in the USER area */
- case PTRACE_POKEUSR: {
- unsigned long index;
-
- ret = -EIO;
- /* convert to index and check */
-#ifdef CONFIG_PPC32
- index = addr >> 2;
- if ((addr & 3) || (index > PT_FPSCR)
- || (child->thread.regs == NULL))
-#else
- index = addr >> 3;
- if ((addr & 7) || (index > PT_FPSCR))
-#endif
- break;
-
- CHECK_FULL_REGS(child->thread.regs);
- if (index < PT_FPR0) {
- ret = ptrace_put_reg(child, index, data);
- } else {
- unsigned int fpidx = index - PT_FPR0;
-
- flush_fp_to_thread(child);
- if (fpidx < (PT_FPSCR - PT_FPR0))
- memcpy(&child->thread.TS_FPR(fpidx), &data,
- sizeof(long));
- else
- child->thread.fp_state.fpscr = data;
- ret = 0;
- }
- break;
- }
-
- case PPC_PTRACE_GETHWDBGINFO: {
- struct ppc_debug_info dbginfo;
-
- dbginfo.version = 1;
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
- dbginfo.num_instruction_bps = CONFIG_PPC_ADV_DEBUG_IACS;
- dbginfo.num_data_bps = CONFIG_PPC_ADV_DEBUG_DACS;
- dbginfo.num_condition_regs = CONFIG_PPC_ADV_DEBUG_DVCS;
- dbginfo.data_bp_alignment = 4;
- dbginfo.sizeof_condition = 4;
- dbginfo.features = PPC_DEBUG_FEATURE_INSN_BP_RANGE |
- PPC_DEBUG_FEATURE_INSN_BP_MASK;
-#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
- dbginfo.features |=
- PPC_DEBUG_FEATURE_DATA_BP_RANGE |
- PPC_DEBUG_FEATURE_DATA_BP_MASK;
-#endif
-#else /* !CONFIG_PPC_ADV_DEBUG_REGS */
- dbginfo.num_instruction_bps = 0;
- dbginfo.num_data_bps = 1;
- dbginfo.num_condition_regs = 0;
-#ifdef CONFIG_PPC64
- dbginfo.data_bp_alignment = 8;
-#else
- dbginfo.data_bp_alignment = 4;
-#endif
- dbginfo.sizeof_condition = 0;
-#ifdef CONFIG_HAVE_HW_BREAKPOINT
- dbginfo.features = PPC_DEBUG_FEATURE_DATA_BP_RANGE;
- if (cpu_has_feature(CPU_FTR_DAWR))
- dbginfo.features |= PPC_DEBUG_FEATURE_DATA_BP_DAWR;
-#else
- dbginfo.features = 0;
-#endif /* CONFIG_HAVE_HW_BREAKPOINT */
-#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
-
- if (!access_ok(VERIFY_WRITE, datavp,
- sizeof(struct ppc_debug_info)))
- return -EFAULT;
- ret = __copy_to_user(datavp, &dbginfo,
- sizeof(struct ppc_debug_info)) ?
- -EFAULT : 0;
- break;
- }
-
- case PPC_PTRACE_SETHWDEBUG: {
- struct ppc_hw_breakpoint bp_info;
-
- if (!access_ok(VERIFY_READ, datavp,
- sizeof(struct ppc_hw_breakpoint)))
- return -EFAULT;
- ret = __copy_from_user(&bp_info, datavp,
- sizeof(struct ppc_hw_breakpoint)) ?
- -EFAULT : 0;
- if (!ret)
- ret = ppc_set_hwdebug(child, &bp_info);
- break;
- }
-
- case PPC_PTRACE_DELHWDEBUG: {
- ret = ppc_del_hwdebug(child, data);
- break;
- }
-
- case PTRACE_GET_DEBUGREG: {
-#ifndef CONFIG_PPC_ADV_DEBUG_REGS
- unsigned long dabr_fake;
-#endif
- ret = -EINVAL;
- /* We only support one DABR and no IABRS at the moment */
- if (addr > 0)
- break;
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
- ret = put_user(child->thread.debug.dac1, datalp);
-#else
- dabr_fake = ((child->thread.hw_brk.address & (~HW_BRK_TYPE_DABR)) |
- (child->thread.hw_brk.type & HW_BRK_TYPE_DABR));
- ret = put_user(dabr_fake, datalp);
-#endif
- break;
- }
-
- case PTRACE_SET_DEBUGREG:
- ret = ptrace_set_debugreg(child, addr, data);
- break;
-
-#ifdef CONFIG_PPC64
- case PTRACE_GETREGS64:
-#endif
- case PTRACE_GETREGS: /* Get all pt_regs from the child. */
- return copy_regset_to_user(child, &user_ppc_native_view,
- REGSET_GPR,
- 0, sizeof(struct pt_regs),
- datavp);
-
-#ifdef CONFIG_PPC64
- case PTRACE_SETREGS64:
-#endif
- case PTRACE_SETREGS: /* Set all gp regs in the child. */
- return copy_regset_from_user(child, &user_ppc_native_view,
- REGSET_GPR,
- 0, sizeof(struct pt_regs),
- datavp);
-
- case PTRACE_GETFPREGS: /* Get the child FPU state (FPR0...31 + FPSCR) */
- return copy_regset_to_user(child, &user_ppc_native_view,
- REGSET_FPR,
- 0, sizeof(elf_fpregset_t),
- datavp);
-
- case PTRACE_SETFPREGS: /* Set the child FPU state (FPR0...31 + FPSCR) */
- return copy_regset_from_user(child, &user_ppc_native_view,
- REGSET_FPR,
- 0, sizeof(elf_fpregset_t),
- datavp);
-
-#ifdef CONFIG_ALTIVEC
- case PTRACE_GETVRREGS:
- return copy_regset_to_user(child, &user_ppc_native_view,
- REGSET_VMX,
- 0, (33 * sizeof(vector128) +
- sizeof(u32)),
- datavp);
-
- case PTRACE_SETVRREGS:
- return copy_regset_from_user(child, &user_ppc_native_view,
- REGSET_VMX,
- 0, (33 * sizeof(vector128) +
- sizeof(u32)),
- datavp);
-#endif
-#ifdef CONFIG_VSX
- case PTRACE_GETVSRREGS:
- return copy_regset_to_user(child, &user_ppc_native_view,
- REGSET_VSX,
- 0, 32 * sizeof(double),
- datavp);
-
- case PTRACE_SETVSRREGS:
- return copy_regset_from_user(child, &user_ppc_native_view,
- REGSET_VSX,
- 0, 32 * sizeof(double),
- datavp);
-#endif
-#ifdef CONFIG_SPE
- case PTRACE_GETEVRREGS:
- /* Get the child spe register state. */
- return copy_regset_to_user(child, &user_ppc_native_view,
- REGSET_SPE, 0, 35 * sizeof(u32),
- datavp);
-
- case PTRACE_SETEVRREGS:
- /* Set the child spe register state. */
- return copy_regset_from_user(child, &user_ppc_native_view,
- REGSET_SPE, 0, 35 * sizeof(u32),
- datavp);
-#endif
-
- default:
- ret = ptrace_request(child, request, addr, data);
- break;
- }
- return ret;
-}
-
-/*
- * We must return the syscall number to actually look up in the table.
- * This can be -1L to skip running any syscall at all.
- */
-long do_syscall_trace_enter(struct pt_regs *regs)
-{
- long ret = 0;
-
- user_exit();
-
- secure_computing_strict(regs->gpr[0]);
-
- if (test_thread_flag(TIF_SYSCALL_TRACE) &&
- tracehook_report_syscall_entry(regs))
- /*
- * Tracing decided this syscall should not happen.
- * We'll return a bogus call number to get an ENOSYS
- * error, but leave the original number in regs->gpr[0].
- */
- ret = -1L;
-
- if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
- trace_sys_enter(regs, regs->gpr[0]);
-
-#ifdef CONFIG_PPC64
- if (!is_32bit_task())
- audit_syscall_entry(AUDIT_ARCH_PPC64,
- regs->gpr[0],
- regs->gpr[3], regs->gpr[4],
- regs->gpr[5], regs->gpr[6]);
- else
-#endif
- audit_syscall_entry(AUDIT_ARCH_PPC,
- regs->gpr[0],
- regs->gpr[3] & 0xffffffff,
- regs->gpr[4] & 0xffffffff,
- regs->gpr[5] & 0xffffffff,
- regs->gpr[6] & 0xffffffff);
-
- return ret ?: regs->gpr[0];
-}
-
-void do_syscall_trace_leave(struct pt_regs *regs)
-{
- int step;
-
- audit_syscall_exit(regs);
-
- if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
- trace_sys_exit(regs, regs->result);
-
- step = test_thread_flag(TIF_SINGLESTEP);
- if (step || test_thread_flag(TIF_SYSCALL_TRACE))
- tracehook_report_syscall_exit(regs, step);
-
- user_enter();
-}
diff --git a/arch/powerpc/kernel/ptrace/Makefile b/arch/powerpc/kernel/ptrace/Makefile
new file mode 100644
index 000000000000..77abd1a5a508
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/Makefile
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the linux kernel.
+#
+
+CFLAGS_ptrace-view.o += -DUTS_MACHINE='"$(UTS_MACHINE)"'
+
+obj-y += ptrace.o ptrace-view.o
+obj-y += ptrace-fpu.o
+obj-$(CONFIG_COMPAT) += ptrace32.o
+obj-$(CONFIG_VSX) += ptrace-vsx.o
+ifneq ($(CONFIG_VSX),y)
+obj-y += ptrace-novsx.o
+endif
+obj-$(CONFIG_ALTIVEC) += ptrace-altivec.o
+obj-$(CONFIG_SPE) += ptrace-spe.o
+obj-$(CONFIG_PPC_TRANSACTIONAL_MEM) += ptrace-tm.o
+obj-$(CONFIG_PPC_ADV_DEBUG_REGS) += ptrace-adv.o
+ifneq ($(CONFIG_PPC_ADV_DEBUG_REGS),y)
+obj-y += ptrace-noadv.o
+endif
diff --git a/arch/powerpc/kernel/ptrace/ptrace-adv.c b/arch/powerpc/kernel/ptrace/ptrace-adv.c
new file mode 100644
index 000000000000..399f5d94a3df
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-adv.c
@@ -0,0 +1,494 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/regset.h>
+#include <linux/hw_breakpoint.h>
+
+#include "ptrace-decl.h"
+
+void user_enable_single_step(struct task_struct *task)
+{
+ struct pt_regs *regs = task->thread.regs;
+
+ if (regs != NULL) {
+ task->thread.debug.dbcr0 &= ~DBCR0_BT;
+ task->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC;
+ regs_set_return_msr(regs, regs->msr | MSR_DE);
+ }
+ set_tsk_thread_flag(task, TIF_SINGLESTEP);
+}
+
+void user_enable_block_step(struct task_struct *task)
+{
+ struct pt_regs *regs = task->thread.regs;
+
+ if (regs != NULL) {
+ task->thread.debug.dbcr0 &= ~DBCR0_IC;
+ task->thread.debug.dbcr0 = DBCR0_IDM | DBCR0_BT;
+ regs_set_return_msr(regs, regs->msr | MSR_DE);
+ }
+ set_tsk_thread_flag(task, TIF_SINGLESTEP);
+}
+
+void user_disable_single_step(struct task_struct *task)
+{
+ struct pt_regs *regs = task->thread.regs;
+
+ if (regs != NULL) {
+ /*
+ * The logic to disable single stepping should be as
+ * simple as turning off the Instruction Complete flag.
+ * And, after doing so, if all debug flags are off, turn
+ * off DBCR0(IDM) and MSR(DE) .... Torez
+ */
+ task->thread.debug.dbcr0 &= ~(DBCR0_IC | DBCR0_BT);
+ /*
+ * Test to see if any of the DBCR_ACTIVE_EVENTS bits are set.
+ */
+ if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0,
+ task->thread.debug.dbcr1)) {
+ /*
+ * All debug events were off.....
+ */
+ task->thread.debug.dbcr0 &= ~DBCR0_IDM;
+ regs_set_return_msr(regs, regs->msr & ~MSR_DE);
+ }
+ }
+ clear_tsk_thread_flag(task, TIF_SINGLESTEP);
+}
+
+void ppc_gethwdinfo(struct ppc_debug_info *dbginfo)
+{
+ dbginfo->version = 1;
+ dbginfo->num_instruction_bps = CONFIG_PPC_ADV_DEBUG_IACS;
+ dbginfo->num_data_bps = CONFIG_PPC_ADV_DEBUG_DACS;
+ dbginfo->num_condition_regs = CONFIG_PPC_ADV_DEBUG_DVCS;
+ dbginfo->data_bp_alignment = 4;
+ dbginfo->sizeof_condition = 4;
+ dbginfo->features = PPC_DEBUG_FEATURE_INSN_BP_RANGE |
+ PPC_DEBUG_FEATURE_INSN_BP_MASK;
+ if (IS_ENABLED(CONFIG_PPC_ADV_DEBUG_DAC_RANGE))
+ dbginfo->features |= PPC_DEBUG_FEATURE_DATA_BP_RANGE |
+ PPC_DEBUG_FEATURE_DATA_BP_MASK;
+}
+
+int ptrace_get_debugreg(struct task_struct *child, unsigned long addr,
+ unsigned long __user *datalp)
+{
+ /* We only support one DABR and no IABRS at the moment */
+ if (addr > 0)
+ return -EINVAL;
+ return put_user(child->thread.debug.dac1, datalp);
+}
+
+int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned long data)
+{
+ struct pt_regs *regs = task->thread.regs;
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ int ret;
+ struct thread_struct *thread = &task->thread;
+ struct perf_event *bp;
+ struct perf_event_attr attr;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+
+ /* For ppc64 we support one DABR and no IABR's at the moment (ppc64).
+ * For embedded processors we support one DAC and no IAC's at the
+ * moment.
+ */
+ if (addr > 0)
+ return -EINVAL;
+
+ /* The bottom 3 bits in dabr are flags */
+ if ((data & ~0x7UL) >= TASK_SIZE)
+ return -EIO;
+
+ /* As described above, it was assumed 3 bits were passed with the data
+ * address, but we will assume only the mode bits will be passed
+ * as to not cause alignment restrictions for DAC-based processors.
+ */
+
+ /* DAC's hold the whole address without any mode flags */
+ task->thread.debug.dac1 = data & ~0x3UL;
+
+ if (task->thread.debug.dac1 == 0) {
+ dbcr_dac(task) &= ~(DBCR_DAC1R | DBCR_DAC1W);
+ if (!DBCR_ACTIVE_EVENTS(task->thread.debug.dbcr0,
+ task->thread.debug.dbcr1)) {
+ regs_set_return_msr(regs, regs->msr & ~MSR_DE);
+ task->thread.debug.dbcr0 &= ~DBCR0_IDM;
+ }
+ return 0;
+ }
+
+ /* Read or Write bits must be set */
+
+ if (!(data & 0x3UL))
+ return -EINVAL;
+
+ /* Set the Internal Debugging flag (IDM bit 1) for the DBCR0 register */
+ task->thread.debug.dbcr0 |= DBCR0_IDM;
+
+ /* Check for write and read flags and set DBCR0 accordingly */
+ dbcr_dac(task) &= ~(DBCR_DAC1R | DBCR_DAC1W);
+ if (data & 0x1UL)
+ dbcr_dac(task) |= DBCR_DAC1R;
+ if (data & 0x2UL)
+ dbcr_dac(task) |= DBCR_DAC1W;
+ regs_set_return_msr(regs, regs->msr | MSR_DE);
+ return 0;
+}
+
+static long set_instruction_bp(struct task_struct *child,
+ struct ppc_hw_breakpoint *bp_info)
+{
+ int slot;
+ int slot1_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC1) != 0);
+ int slot2_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC2) != 0);
+ int slot3_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC3) != 0);
+ int slot4_in_use = ((child->thread.debug.dbcr0 & DBCR0_IAC4) != 0);
+
+ if (dbcr_iac_range(child) & DBCR_IAC12MODE)
+ slot2_in_use = 1;
+ if (dbcr_iac_range(child) & DBCR_IAC34MODE)
+ slot4_in_use = 1;
+
+ if (bp_info->addr >= TASK_SIZE)
+ return -EIO;
+
+ if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT) {
+ /* Make sure range is valid. */
+ if (bp_info->addr2 >= TASK_SIZE)
+ return -EIO;
+
+ /* We need a pair of IAC regsisters */
+ if (!slot1_in_use && !slot2_in_use) {
+ slot = 1;
+ child->thread.debug.iac1 = bp_info->addr;
+ child->thread.debug.iac2 = bp_info->addr2;
+ child->thread.debug.dbcr0 |= DBCR0_IAC1;
+ if (bp_info->addr_mode ==
+ PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE)
+ dbcr_iac_range(child) |= DBCR_IAC12X;
+ else
+ dbcr_iac_range(child) |= DBCR_IAC12I;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+ } else if ((!slot3_in_use) && (!slot4_in_use)) {
+ slot = 3;
+ child->thread.debug.iac3 = bp_info->addr;
+ child->thread.debug.iac4 = bp_info->addr2;
+ child->thread.debug.dbcr0 |= DBCR0_IAC3;
+ if (bp_info->addr_mode ==
+ PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE)
+ dbcr_iac_range(child) |= DBCR_IAC34X;
+ else
+ dbcr_iac_range(child) |= DBCR_IAC34I;
+#endif
+ } else {
+ return -ENOSPC;
+ }
+ } else {
+ /* We only need one. If possible leave a pair free in
+ * case a range is needed later
+ */
+ if (!slot1_in_use) {
+ /*
+ * Don't use iac1 if iac1-iac2 are free and either
+ * iac3 or iac4 (but not both) are free
+ */
+ if (slot2_in_use || slot3_in_use == slot4_in_use) {
+ slot = 1;
+ child->thread.debug.iac1 = bp_info->addr;
+ child->thread.debug.dbcr0 |= DBCR0_IAC1;
+ goto out;
+ }
+ }
+ if (!slot2_in_use) {
+ slot = 2;
+ child->thread.debug.iac2 = bp_info->addr;
+ child->thread.debug.dbcr0 |= DBCR0_IAC2;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+ } else if (!slot3_in_use) {
+ slot = 3;
+ child->thread.debug.iac3 = bp_info->addr;
+ child->thread.debug.dbcr0 |= DBCR0_IAC3;
+ } else if (!slot4_in_use) {
+ slot = 4;
+ child->thread.debug.iac4 = bp_info->addr;
+ child->thread.debug.dbcr0 |= DBCR0_IAC4;
+#endif
+ } else {
+ return -ENOSPC;
+ }
+ }
+out:
+ child->thread.debug.dbcr0 |= DBCR0_IDM;
+ regs_set_return_msr(child->thread.regs, child->thread.regs->msr | MSR_DE);
+
+ return slot;
+}
+
+static int del_instruction_bp(struct task_struct *child, int slot)
+{
+ switch (slot) {
+ case 1:
+ if ((child->thread.debug.dbcr0 & DBCR0_IAC1) == 0)
+ return -ENOENT;
+
+ if (dbcr_iac_range(child) & DBCR_IAC12MODE) {
+ /* address range - clear slots 1 & 2 */
+ child->thread.debug.iac2 = 0;
+ dbcr_iac_range(child) &= ~DBCR_IAC12MODE;
+ }
+ child->thread.debug.iac1 = 0;
+ child->thread.debug.dbcr0 &= ~DBCR0_IAC1;
+ break;
+ case 2:
+ if ((child->thread.debug.dbcr0 & DBCR0_IAC2) == 0)
+ return -ENOENT;
+
+ if (dbcr_iac_range(child) & DBCR_IAC12MODE)
+ /* used in a range */
+ return -EINVAL;
+ child->thread.debug.iac2 = 0;
+ child->thread.debug.dbcr0 &= ~DBCR0_IAC2;
+ break;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+ case 3:
+ if ((child->thread.debug.dbcr0 & DBCR0_IAC3) == 0)
+ return -ENOENT;
+
+ if (dbcr_iac_range(child) & DBCR_IAC34MODE) {
+ /* address range - clear slots 3 & 4 */
+ child->thread.debug.iac4 = 0;
+ dbcr_iac_range(child) &= ~DBCR_IAC34MODE;
+ }
+ child->thread.debug.iac3 = 0;
+ child->thread.debug.dbcr0 &= ~DBCR0_IAC3;
+ break;
+ case 4:
+ if ((child->thread.debug.dbcr0 & DBCR0_IAC4) == 0)
+ return -ENOENT;
+
+ if (dbcr_iac_range(child) & DBCR_IAC34MODE)
+ /* Used in a range */
+ return -EINVAL;
+ child->thread.debug.iac4 = 0;
+ child->thread.debug.dbcr0 &= ~DBCR0_IAC4;
+ break;
+#endif
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int set_dac(struct task_struct *child, struct ppc_hw_breakpoint *bp_info)
+{
+ int byte_enable =
+ (bp_info->condition_mode >> PPC_BREAKPOINT_CONDITION_BE_SHIFT)
+ & 0xf;
+ int condition_mode =
+ bp_info->condition_mode & PPC_BREAKPOINT_CONDITION_MODE;
+ int slot;
+
+ if (byte_enable && condition_mode == 0)
+ return -EINVAL;
+
+ if (bp_info->addr >= TASK_SIZE)
+ return -EIO;
+
+ if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0) {
+ slot = 1;
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
+ dbcr_dac(child) |= DBCR_DAC1R;
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
+ dbcr_dac(child) |= DBCR_DAC1W;
+ child->thread.debug.dac1 = (unsigned long)bp_info->addr;
+#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
+ if (byte_enable) {
+ child->thread.debug.dvc1 =
+ (unsigned long)bp_info->condition_value;
+ child->thread.debug.dbcr2 |=
+ ((byte_enable << DBCR2_DVC1BE_SHIFT) |
+ (condition_mode << DBCR2_DVC1M_SHIFT));
+ }
+#endif
+#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
+ } else if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) {
+ /* Both dac1 and dac2 are part of a range */
+ return -ENOSPC;
+#endif
+ } else if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0) {
+ slot = 2;
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
+ dbcr_dac(child) |= DBCR_DAC2R;
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
+ dbcr_dac(child) |= DBCR_DAC2W;
+ child->thread.debug.dac2 = (unsigned long)bp_info->addr;
+#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
+ if (byte_enable) {
+ child->thread.debug.dvc2 =
+ (unsigned long)bp_info->condition_value;
+ child->thread.debug.dbcr2 |=
+ ((byte_enable << DBCR2_DVC2BE_SHIFT) |
+ (condition_mode << DBCR2_DVC2M_SHIFT));
+ }
+#endif
+ } else {
+ return -ENOSPC;
+ }
+ child->thread.debug.dbcr0 |= DBCR0_IDM;
+ regs_set_return_msr(child->thread.regs, child->thread.regs->msr | MSR_DE);
+
+ return slot + 4;
+}
+
+static int del_dac(struct task_struct *child, int slot)
+{
+ if (slot == 1) {
+ if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0)
+ return -ENOENT;
+
+ child->thread.debug.dac1 = 0;
+ dbcr_dac(child) &= ~(DBCR_DAC1R | DBCR_DAC1W);
+#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
+ if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE) {
+ child->thread.debug.dac2 = 0;
+ child->thread.debug.dbcr2 &= ~DBCR2_DAC12MODE;
+ }
+ child->thread.debug.dbcr2 &= ~(DBCR2_DVC1M | DBCR2_DVC1BE);
+#endif
+#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
+ child->thread.debug.dvc1 = 0;
+#endif
+ } else if (slot == 2) {
+ if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0)
+ return -ENOENT;
+
+#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
+ if (child->thread.debug.dbcr2 & DBCR2_DAC12MODE)
+ /* Part of a range */
+ return -EINVAL;
+ child->thread.debug.dbcr2 &= ~(DBCR2_DVC2M | DBCR2_DVC2BE);
+#endif
+#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
+ child->thread.debug.dvc2 = 0;
+#endif
+ child->thread.debug.dac2 = 0;
+ dbcr_dac(child) &= ~(DBCR_DAC2R | DBCR_DAC2W);
+ } else {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
+static int set_dac_range(struct task_struct *child,
+ struct ppc_hw_breakpoint *bp_info)
+{
+ int mode = bp_info->addr_mode & PPC_BREAKPOINT_MODE_MASK;
+
+ /* We don't allow range watchpoints to be used with DVC */
+ if (bp_info->condition_mode)
+ return -EINVAL;
+
+ /*
+ * Best effort to verify the address range. The user/supervisor bits
+ * prevent trapping in kernel space, but let's fail on an obvious bad
+ * range. The simple test on the mask is not fool-proof, and any
+ * exclusive range will spill over into kernel space.
+ */
+ if (bp_info->addr >= TASK_SIZE)
+ return -EIO;
+ if (mode == PPC_BREAKPOINT_MODE_MASK) {
+ /*
+ * dac2 is a bitmask. Don't allow a mask that makes a
+ * kernel space address from a valid dac1 value
+ */
+ if (~((unsigned long)bp_info->addr2) >= TASK_SIZE)
+ return -EIO;
+ } else {
+ /*
+ * For range breakpoints, addr2 must also be a valid address
+ */
+ if (bp_info->addr2 >= TASK_SIZE)
+ return -EIO;
+ }
+
+ if (child->thread.debug.dbcr0 &
+ (DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R | DBCR0_DAC2W))
+ return -ENOSPC;
+
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
+ child->thread.debug.dbcr0 |= (DBCR0_DAC1R | DBCR0_IDM);
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
+ child->thread.debug.dbcr0 |= (DBCR0_DAC1W | DBCR0_IDM);
+ child->thread.debug.dac1 = bp_info->addr;
+ child->thread.debug.dac2 = bp_info->addr2;
+ if (mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE)
+ child->thread.debug.dbcr2 |= DBCR2_DAC12M;
+ else if (mode == PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE)
+ child->thread.debug.dbcr2 |= DBCR2_DAC12MX;
+ else /* PPC_BREAKPOINT_MODE_MASK */
+ child->thread.debug.dbcr2 |= DBCR2_DAC12MM;
+ regs_set_return_msr(child->thread.regs, child->thread.regs->msr | MSR_DE);
+
+ return 5;
+}
+#endif /* CONFIG_PPC_ADV_DEBUG_DAC_RANGE */
+
+long ppc_set_hwdebug(struct task_struct *child, struct ppc_hw_breakpoint *bp_info)
+{
+ if (bp_info->version != 1)
+ return -ENOTSUPP;
+ /*
+ * Check for invalid flags and combinations
+ */
+ if (bp_info->trigger_type == 0 ||
+ (bp_info->trigger_type & ~(PPC_BREAKPOINT_TRIGGER_EXECUTE |
+ PPC_BREAKPOINT_TRIGGER_RW)) ||
+ (bp_info->addr_mode & ~PPC_BREAKPOINT_MODE_MASK) ||
+ (bp_info->condition_mode &
+ ~(PPC_BREAKPOINT_CONDITION_MODE |
+ PPC_BREAKPOINT_CONDITION_BE_ALL)))
+ return -EINVAL;
+#if CONFIG_PPC_ADV_DEBUG_DVCS == 0
+ if (bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE)
+ return -EINVAL;
+#endif
+
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_EXECUTE) {
+ if (bp_info->trigger_type != PPC_BREAKPOINT_TRIGGER_EXECUTE ||
+ bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE)
+ return -EINVAL;
+ return set_instruction_bp(child, bp_info);
+ }
+ if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT)
+ return set_dac(child, bp_info);
+
+#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
+ return set_dac_range(child, bp_info);
+#else
+ return -EINVAL;
+#endif
+}
+
+long ppc_del_hwdebug(struct task_struct *child, long data)
+{
+ int rc;
+
+ if (data <= 4)
+ rc = del_instruction_bp(child, (int)data);
+ else
+ rc = del_dac(child, (int)data - 4);
+
+ if (!rc) {
+ if (!DBCR_ACTIVE_EVENTS(child->thread.debug.dbcr0,
+ child->thread.debug.dbcr1)) {
+ child->thread.debug.dbcr0 &= ~DBCR0_IDM;
+ regs_set_return_msr(child->thread.regs,
+ child->thread.regs->msr & ~MSR_DE);
+ }
+ }
+ return rc;
+}
diff --git a/arch/powerpc/kernel/ptrace/ptrace-altivec.c b/arch/powerpc/kernel/ptrace/ptrace-altivec.c
new file mode 100644
index 000000000000..0d9bc4bd4972
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-altivec.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/regset.h>
+#include <linux/elf.h>
+
+#include <asm/switch_to.h>
+
+#include "ptrace-decl.h"
+
+/*
+ * Get/set all the altivec registers vr0..vr31, vscr, vrsave, in one go.
+ * The transfer totals 34 quadword. Quadwords 0-31 contain the
+ * corresponding vector registers. Quadword 32 contains the vscr as the
+ * last word (offset 12) within that quadword. Quadword 33 contains the
+ * vrsave as the first word (offset 0) within the quadword.
+ *
+ * This definition of the VMX state is compatible with the current PPC32
+ * ptrace interface. This allows signal handling and ptrace to use the
+ * same structures. This also simplifies the implementation of a bi-arch
+ * (combined (32- and 64-bit) gdb.
+ */
+
+int vr_active(struct task_struct *target, const struct user_regset *regset)
+{
+ flush_altivec_to_thread(target);
+ return target->thread.used_vr ? regset->n : 0;
+}
+
+/*
+ * Regardless of transactions, 'vr_state' holds the current running
+ * value of all the VMX registers and 'ckvr_state' holds the last
+ * checkpointed value of all the VMX registers for the current
+ * transaction to fall back on in case it aborts.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ * vector128 vr[32];
+ * vector128 vscr;
+ * vector128 vrsave;
+ * };
+ */
+int vr_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+ union {
+ elf_vrreg_t reg;
+ u32 word;
+ } vrsave;
+
+ flush_altivec_to_thread(target);
+
+ BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) !=
+ offsetof(struct thread_vr_state, vr[32]));
+
+ membuf_write(&to, &target->thread.vr_state, 33 * sizeof(vector128));
+ /*
+ * Copy out only the low-order word of vrsave.
+ */
+ memset(&vrsave, 0, sizeof(vrsave));
+ vrsave.word = target->thread.vrsave;
+ return membuf_write(&to, &vrsave, sizeof(vrsave));
+}
+
+/*
+ * Regardless of transactions, 'vr_state' holds the current running
+ * value of all the VMX registers and 'ckvr_state' holds the last
+ * checkpointed value of all the VMX registers for the current
+ * transaction to fall back on in case it aborts.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ * vector128 vr[32];
+ * vector128 vscr;
+ * vector128 vrsave;
+ * };
+ */
+int vr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+
+ flush_altivec_to_thread(target);
+
+ BUILD_BUG_ON(offsetof(struct thread_vr_state, vscr) !=
+ offsetof(struct thread_vr_state, vr[32]));
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.vr_state, 0,
+ 33 * sizeof(vector128));
+ if (!ret && count > 0) {
+ /*
+ * We use only the first word of vrsave.
+ */
+ int start, end;
+ union {
+ elf_vrreg_t reg;
+ u32 word;
+ } vrsave;
+ memset(&vrsave, 0, sizeof(vrsave));
+
+ vrsave.word = target->thread.vrsave;
+
+ start = 33 * sizeof(vector128);
+ end = start + sizeof(vrsave);
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave,
+ start, end);
+ if (!ret)
+ target->thread.vrsave = vrsave.word;
+ }
+
+ return ret;
+}
diff --git a/arch/powerpc/kernel/ptrace/ptrace-decl.h b/arch/powerpc/kernel/ptrace/ptrace-decl.h
new file mode 100644
index 000000000000..4171a5727197
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-decl.h
@@ -0,0 +1,183 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <linux/regset.h>
+
+/*
+ * Set of msr bits that gdb can change on behalf of a process.
+ */
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+#define MSR_DEBUGCHANGE 0
+#else
+#define MSR_DEBUGCHANGE (MSR_SE | MSR_BE)
+#endif
+
+/*
+ * Max register writeable via put_reg
+ */
+#ifdef CONFIG_PPC32
+#define PT_MAX_PUT_REG PT_MQ
+#else
+#define PT_MAX_PUT_REG PT_CCR
+#endif
+
+#define TVSO(f) (offsetof(struct thread_vr_state, f))
+#define TFSO(f) (offsetof(struct thread_fp_state, f))
+#define TSO(f) (offsetof(struct thread_struct, f))
+
+/*
+ * These are our native regset flavors.
+ */
+enum powerpc_regset {
+ REGSET_GPR,
+ REGSET_FPR,
+#ifdef CONFIG_ALTIVEC
+ REGSET_VMX,
+#endif
+#ifdef CONFIG_VSX
+ REGSET_VSX,
+#endif
+#ifdef CONFIG_SPE
+ REGSET_SPE,
+#endif
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ REGSET_TM_CGPR, /* TM checkpointed GPR registers */
+ REGSET_TM_CFPR, /* TM checkpointed FPR registers */
+ REGSET_TM_CVMX, /* TM checkpointed VMX registers */
+ REGSET_TM_CVSX, /* TM checkpointed VSX registers */
+ REGSET_TM_SPR, /* TM specific SPR registers */
+ REGSET_TM_CTAR, /* TM checkpointed TAR register */
+ REGSET_TM_CPPR, /* TM checkpointed PPR register */
+ REGSET_TM_CDSCR, /* TM checkpointed DSCR register */
+#endif
+#ifdef CONFIG_PPC64
+ REGSET_PPR, /* PPR register */
+ REGSET_DSCR, /* DSCR register */
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+ REGSET_TAR, /* TAR register */
+ REGSET_EBB, /* EBB registers */
+ REGSET_PMR, /* Performance Monitor Registers */
+ REGSET_DEXCR, /* DEXCR registers */
+#ifdef CONFIG_CHECKPOINT_RESTORE
+ REGSET_HASHKEYR, /* HASHKEYR register */
+#endif
+#endif
+#ifdef CONFIG_PPC_MEM_KEYS
+ REGSET_PKEY, /* AMR register */
+#endif
+};
+
+/* ptrace-(no)vsx */
+
+user_regset_get2_fn fpr_get;
+int fpr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+
+/* ptrace-vsx */
+
+int vsr_active(struct task_struct *target, const struct user_regset *regset);
+user_regset_get2_fn vsr_get;
+int vsr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+
+/* ptrace-altivec */
+
+int vr_active(struct task_struct *target, const struct user_regset *regset);
+user_regset_get2_fn vr_get;
+int vr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+
+/* ptrace-spe */
+
+int evr_active(struct task_struct *target, const struct user_regset *regset);
+user_regset_get2_fn evr_get;
+int evr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+
+/* ptrace */
+
+int gpr32_get_common(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to,
+ unsigned long *regs);
+int gpr32_set_common(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf,
+ unsigned long *regs);
+
+/* ptrace-tm */
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+void flush_tmregs_to_thread(struct task_struct *tsk);
+#else
+static inline void flush_tmregs_to_thread(struct task_struct *tsk) { }
+#endif
+
+int tm_cgpr_active(struct task_struct *target, const struct user_regset *regset);
+user_regset_get2_fn tm_cgpr_get;
+int tm_cgpr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+int tm_cfpr_active(struct task_struct *target, const struct user_regset *regset);
+user_regset_get2_fn tm_cfpr_get;
+int tm_cfpr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+int tm_cvmx_active(struct task_struct *target, const struct user_regset *regset);
+user_regset_get2_fn tm_cvmx_get;
+int tm_cvmx_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+int tm_cvsx_active(struct task_struct *target, const struct user_regset *regset);
+user_regset_get2_fn tm_cvsx_get;
+int tm_cvsx_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+int tm_spr_active(struct task_struct *target, const struct user_regset *regset);
+user_regset_get2_fn tm_spr_get;
+int tm_spr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+int tm_tar_active(struct task_struct *target, const struct user_regset *regset);
+user_regset_get2_fn tm_tar_get;
+int tm_tar_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+int tm_ppr_active(struct task_struct *target, const struct user_regset *regset);
+user_regset_get2_fn tm_ppr_get;
+int tm_ppr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+int tm_dscr_active(struct task_struct *target, const struct user_regset *regset);
+user_regset_get2_fn tm_dscr_get;
+int tm_dscr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+user_regset_get2_fn tm_cgpr32_get;
+int tm_cgpr32_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf);
+
+/* ptrace-view */
+
+int ptrace_get_reg(struct task_struct *task, int regno, unsigned long *data);
+int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data);
+
+extern const struct user_regset_view user_ppc_native_view;
+
+/* ptrace-fpu */
+int ptrace_get_fpr(struct task_struct *child, int index, unsigned long *data);
+int ptrace_put_fpr(struct task_struct *child, int index, unsigned long data);
+
+/* ptrace-(no)adv */
+void ppc_gethwdinfo(struct ppc_debug_info *dbginfo);
+int ptrace_get_debugreg(struct task_struct *child, unsigned long addr,
+ unsigned long __user *datalp);
+int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned long data);
+long ppc_set_hwdebug(struct task_struct *child, struct ppc_hw_breakpoint *bp_info);
+long ppc_del_hwdebug(struct task_struct *child, long data);
diff --git a/arch/powerpc/kernel/ptrace/ptrace-fpu.c b/arch/powerpc/kernel/ptrace/ptrace-fpu.c
new file mode 100644
index 000000000000..09c49632bfe5
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-fpu.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/regset.h>
+
+#include <asm/switch_to.h>
+
+#include "ptrace-decl.h"
+
+int ptrace_get_fpr(struct task_struct *child, int index, unsigned long *data)
+{
+#ifdef CONFIG_PPC_FPU_REGS
+ unsigned int fpidx = index - PT_FPR0;
+#endif
+
+ if (index > PT_FPSCR)
+ return -EIO;
+
+#ifdef CONFIG_PPC_FPU_REGS
+ flush_fp_to_thread(child);
+ if (fpidx < (PT_FPSCR - PT_FPR0)) {
+ if (IS_ENABLED(CONFIG_PPC32))
+ // On 32-bit the index we are passed refers to 32-bit words
+ *data = ((u32 *)child->thread.fp_state.fpr)[fpidx];
+ else
+ memcpy(data, &child->thread.TS_FPR(fpidx), sizeof(long));
+ } else
+ *data = child->thread.fp_state.fpscr;
+#else
+ *data = 0;
+#endif
+
+ return 0;
+}
+
+int ptrace_put_fpr(struct task_struct *child, int index, unsigned long data)
+{
+#ifdef CONFIG_PPC_FPU_REGS
+ unsigned int fpidx = index - PT_FPR0;
+#endif
+
+ if (index > PT_FPSCR)
+ return -EIO;
+
+#ifdef CONFIG_PPC_FPU_REGS
+ flush_fp_to_thread(child);
+ if (fpidx < (PT_FPSCR - PT_FPR0)) {
+ if (IS_ENABLED(CONFIG_PPC32))
+ // On 32-bit the index we are passed refers to 32-bit words
+ ((u32 *)child->thread.fp_state.fpr)[fpidx] = data;
+ else
+ memcpy(&child->thread.TS_FPR(fpidx), &data, sizeof(long));
+ } else
+ child->thread.fp_state.fpscr = data;
+#endif
+
+ return 0;
+}
+
diff --git a/arch/powerpc/kernel/ptrace/ptrace-noadv.c b/arch/powerpc/kernel/ptrace/ptrace-noadv.c
new file mode 100644
index 000000000000..a5dd7d2e2c9e
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-noadv.c
@@ -0,0 +1,298 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/regset.h>
+#include <linux/hw_breakpoint.h>
+
+#include <asm/debug.h>
+
+#include "ptrace-decl.h"
+
+void user_enable_single_step(struct task_struct *task)
+{
+ struct pt_regs *regs = task->thread.regs;
+
+ if (regs != NULL)
+ regs_set_return_msr(regs, (regs->msr & ~MSR_BE) | MSR_SE);
+ set_tsk_thread_flag(task, TIF_SINGLESTEP);
+}
+
+void user_enable_block_step(struct task_struct *task)
+{
+ struct pt_regs *regs = task->thread.regs;
+
+ if (regs != NULL)
+ regs_set_return_msr(regs, (regs->msr & ~MSR_SE) | MSR_BE);
+ set_tsk_thread_flag(task, TIF_SINGLESTEP);
+}
+
+void user_disable_single_step(struct task_struct *task)
+{
+ struct pt_regs *regs = task->thread.regs;
+
+ if (regs != NULL)
+ regs_set_return_msr(regs, regs->msr & ~(MSR_SE | MSR_BE));
+
+ clear_tsk_thread_flag(task, TIF_SINGLESTEP);
+}
+
+void ppc_gethwdinfo(struct ppc_debug_info *dbginfo)
+{
+ dbginfo->version = 1;
+ dbginfo->num_instruction_bps = 0;
+ if (ppc_breakpoint_available())
+ dbginfo->num_data_bps = nr_wp_slots();
+ else
+ dbginfo->num_data_bps = 0;
+ dbginfo->num_condition_regs = 0;
+ dbginfo->data_bp_alignment = sizeof(long);
+ dbginfo->sizeof_condition = 0;
+ if (IS_ENABLED(CONFIG_HAVE_HW_BREAKPOINT)) {
+ dbginfo->features = PPC_DEBUG_FEATURE_DATA_BP_RANGE;
+ if (dawr_enabled())
+ dbginfo->features |= PPC_DEBUG_FEATURE_DATA_BP_DAWR;
+ } else {
+ dbginfo->features = 0;
+ }
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ dbginfo->features |= PPC_DEBUG_FEATURE_DATA_BP_ARCH_31;
+}
+
+int ptrace_get_debugreg(struct task_struct *child, unsigned long addr,
+ unsigned long __user *datalp)
+{
+ unsigned long dabr_fake;
+
+ /* We only support one DABR and no IABRS at the moment */
+ if (addr > 0)
+ return -EINVAL;
+ dabr_fake = ((child->thread.hw_brk[0].address & (~HW_BRK_TYPE_DABR)) |
+ (child->thread.hw_brk[0].type & HW_BRK_TYPE_DABR));
+ return put_user(dabr_fake, datalp);
+}
+
+/*
+ * ptrace_set_debugreg() fakes DABR and DABR is only one. So even if
+ * internal hw supports more than one watchpoint, we support only one
+ * watchpoint with this interface.
+ */
+int ptrace_set_debugreg(struct task_struct *task, unsigned long addr, unsigned long data)
+{
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ int ret;
+ struct thread_struct *thread = &task->thread;
+ struct perf_event *bp;
+ struct perf_event_attr attr;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+ bool set_bp = true;
+ struct arch_hw_breakpoint hw_brk;
+
+ /* For ppc64 we support one DABR and no IABR's at the moment (ppc64).
+ * For embedded processors we support one DAC and no IAC's at the
+ * moment.
+ */
+ if (addr > 0)
+ return -EINVAL;
+
+ /* The bottom 3 bits in dabr are flags */
+ if ((data & ~0x7UL) >= TASK_SIZE)
+ return -EIO;
+
+ /* For processors using DABR (i.e. 970), the bottom 3 bits are flags.
+ * It was assumed, on previous implementations, that 3 bits were
+ * passed together with the data address, fitting the design of the
+ * DABR register, as follows:
+ *
+ * bit 0: Read flag
+ * bit 1: Write flag
+ * bit 2: Breakpoint translation
+ *
+ * Thus, we use them here as so.
+ */
+
+ /* Ensure breakpoint translation bit is set */
+ if (data && !(data & HW_BRK_TYPE_TRANSLATE))
+ return -EIO;
+ hw_brk.address = data & (~HW_BRK_TYPE_DABR);
+ hw_brk.type = (data & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL;
+ hw_brk.len = DABR_MAX_LEN;
+ hw_brk.hw_len = DABR_MAX_LEN;
+ set_bp = (data) && (hw_brk.type & HW_BRK_TYPE_RDWR);
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ bp = thread->ptrace_bps[0];
+ if (!set_bp) {
+ if (bp) {
+ unregister_hw_breakpoint(bp);
+ thread->ptrace_bps[0] = NULL;
+ }
+ return 0;
+ }
+ if (bp) {
+ attr = bp->attr;
+ attr.bp_addr = hw_brk.address;
+ attr.bp_len = DABR_MAX_LEN;
+ arch_bp_generic_fields(hw_brk.type, &attr.bp_type);
+
+ /* Enable breakpoint */
+ attr.disabled = false;
+
+ ret = modify_user_hw_breakpoint(bp, &attr);
+ if (ret)
+ return ret;
+
+ thread->ptrace_bps[0] = bp;
+ thread->hw_brk[0] = hw_brk;
+ return 0;
+ }
+
+ /* Create a new breakpoint request if one doesn't exist already */
+ hw_breakpoint_init(&attr);
+ attr.bp_addr = hw_brk.address;
+ attr.bp_len = DABR_MAX_LEN;
+ arch_bp_generic_fields(hw_brk.type,
+ &attr.bp_type);
+
+ thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr,
+ ptrace_triggered, NULL, task);
+ if (IS_ERR(bp)) {
+ thread->ptrace_bps[0] = NULL;
+ return PTR_ERR(bp);
+ }
+
+#else /* !CONFIG_HAVE_HW_BREAKPOINT */
+ if (set_bp && (!ppc_breakpoint_available()))
+ return -ENODEV;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+ task->thread.hw_brk[0] = hw_brk;
+ return 0;
+}
+
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+static int find_empty_ptrace_bp(struct thread_struct *thread)
+{
+ int i;
+
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (!thread->ptrace_bps[i])
+ return i;
+ }
+ return -1;
+}
+#endif
+
+static int find_empty_hw_brk(struct thread_struct *thread)
+{
+ int i;
+
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (!thread->hw_brk[i].address)
+ return i;
+ }
+ return -1;
+}
+
+long ppc_set_hwdebug(struct task_struct *child, struct ppc_hw_breakpoint *bp_info)
+{
+ int i;
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ int len = 0;
+ struct thread_struct *thread = &child->thread;
+ struct perf_event *bp;
+ struct perf_event_attr attr;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+ struct arch_hw_breakpoint brk;
+
+ if (bp_info->version != 1)
+ return -ENOTSUPP;
+ /*
+ * We only support one data breakpoint
+ */
+ if ((bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_RW) == 0 ||
+ (bp_info->trigger_type & ~PPC_BREAKPOINT_TRIGGER_RW) != 0 ||
+ bp_info->condition_mode != PPC_BREAKPOINT_CONDITION_NONE)
+ return -EINVAL;
+
+ if ((unsigned long)bp_info->addr >= TASK_SIZE)
+ return -EIO;
+
+ brk.address = ALIGN_DOWN(bp_info->addr, HW_BREAKPOINT_SIZE);
+ brk.type = HW_BRK_TYPE_TRANSLATE | HW_BRK_TYPE_PRIV_ALL;
+ brk.len = DABR_MAX_LEN;
+ brk.hw_len = DABR_MAX_LEN;
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
+ brk.type |= HW_BRK_TYPE_READ;
+ if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
+ brk.type |= HW_BRK_TYPE_WRITE;
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE)
+ len = bp_info->addr2 - bp_info->addr;
+ else if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT)
+ len = 1;
+ else
+ return -EINVAL;
+
+ i = find_empty_ptrace_bp(thread);
+ if (i < 0)
+ return -ENOSPC;
+
+ /* Create a new breakpoint request if one doesn't exist already */
+ hw_breakpoint_init(&attr);
+ attr.bp_addr = (unsigned long)bp_info->addr;
+ attr.bp_len = len;
+ arch_bp_generic_fields(brk.type, &attr.bp_type);
+
+ bp = register_user_hw_breakpoint(&attr, ptrace_triggered, NULL, child);
+ thread->ptrace_bps[i] = bp;
+ if (IS_ERR(bp)) {
+ thread->ptrace_bps[i] = NULL;
+ return PTR_ERR(bp);
+ }
+
+ return i + 1;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+
+ if (bp_info->addr_mode != PPC_BREAKPOINT_MODE_EXACT)
+ return -EINVAL;
+
+ i = find_empty_hw_brk(&child->thread);
+ if (i < 0)
+ return -ENOSPC;
+
+ if (!ppc_breakpoint_available())
+ return -ENODEV;
+
+ child->thread.hw_brk[i] = brk;
+
+ return i + 1;
+}
+
+long ppc_del_hwdebug(struct task_struct *child, long data)
+{
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ int ret = 0;
+ struct thread_struct *thread = &child->thread;
+ struct perf_event *bp;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+ if (data < 1 || data > nr_wp_slots())
+ return -EINVAL;
+
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ bp = thread->ptrace_bps[data - 1];
+ if (bp) {
+ unregister_hw_breakpoint(bp);
+ thread->ptrace_bps[data - 1] = NULL;
+ } else {
+ ret = -ENOENT;
+ }
+ return ret;
+#else /* CONFIG_HAVE_HW_BREAKPOINT */
+ if (!(child->thread.hw_brk[data - 1].flags & HW_BRK_FLAG_DISABLED) &&
+ child->thread.hw_brk[data - 1].address == 0)
+ return -ENOENT;
+
+ child->thread.hw_brk[data - 1].address = 0;
+ child->thread.hw_brk[data - 1].type = 0;
+ child->thread.hw_brk[data - 1].flags = 0;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+
+ return 0;
+}
diff --git a/arch/powerpc/kernel/ptrace/ptrace-novsx.c b/arch/powerpc/kernel/ptrace/ptrace-novsx.c
new file mode 100644
index 000000000000..7433f3db979a
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-novsx.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/regset.h>
+
+#include <asm/switch_to.h>
+
+#include "ptrace-decl.h"
+
+/*
+ * Regardless of transactions, 'fp_state' holds the current running
+ * value of all FPR registers and 'ckfp_state' holds the last checkpointed
+ * value of all FPR registers for the current transaction.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ * u64 fpr[32];
+ * u64 fpscr;
+ * };
+ */
+int fpr_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+#ifdef CONFIG_PPC_FPU_REGS
+ BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) !=
+ offsetof(struct thread_fp_state, fpr[32]));
+
+ flush_fp_to_thread(target);
+
+ return membuf_write(&to, &target->thread.fp_state, 33 * sizeof(u64));
+#else
+ return membuf_write(&to, &empty_zero_page, 33 * sizeof(u64));
+#endif
+}
+
+/*
+ * Regardless of transactions, 'fp_state' holds the current running
+ * value of all FPR registers and 'ckfp_state' holds the last checkpointed
+ * value of all FPR registers for the current transaction.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ * u64 fpr[32];
+ * u64 fpscr;
+ * };
+ *
+ */
+int fpr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+#ifdef CONFIG_PPC_FPU_REGS
+ BUILD_BUG_ON(offsetof(struct thread_fp_state, fpscr) !=
+ offsetof(struct thread_fp_state, fpr[32]));
+
+ flush_fp_to_thread(target);
+
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.fp_state, 0, -1);
+#else
+ return 0;
+#endif
+}
diff --git a/arch/powerpc/kernel/ptrace/ptrace-spe.c b/arch/powerpc/kernel/ptrace/ptrace-spe.c
new file mode 100644
index 000000000000..47034d069045
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-spe.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/regset.h>
+
+#include <asm/switch_to.h>
+
+#include "ptrace-decl.h"
+
+/*
+ * For get_evrregs/set_evrregs functions 'data' has the following layout:
+ *
+ * struct {
+ * u32 evr[32];
+ * u64 acc;
+ * u32 spefscr;
+ * }
+ */
+
+int evr_active(struct task_struct *target, const struct user_regset *regset)
+{
+ flush_spe_to_thread(target);
+ return target->thread.used_spe ? regset->n : 0;
+}
+
+int evr_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+ flush_spe_to_thread(target);
+
+ membuf_write(&to, &target->thread.evr, sizeof(target->thread.evr));
+
+ BUILD_BUG_ON(offsetof(struct thread_struct, acc) + sizeof(u64) !=
+ offsetof(struct thread_struct, spefscr));
+
+ return membuf_write(&to, &target->thread.acc,
+ sizeof(u64) + sizeof(u32));
+}
+
+int evr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+
+ flush_spe_to_thread(target);
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.evr,
+ 0, sizeof(target->thread.evr));
+
+ BUILD_BUG_ON(offsetof(struct thread_struct, acc) + sizeof(u64) !=
+ offsetof(struct thread_struct, spefscr));
+
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.acc,
+ sizeof(target->thread.evr), -1);
+
+ return ret;
+}
diff --git a/arch/powerpc/kernel/ptrace/ptrace-tm.c b/arch/powerpc/kernel/ptrace/ptrace-tm.c
new file mode 100644
index 000000000000..447bff87fd21
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-tm.c
@@ -0,0 +1,788 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/regset.h>
+
+#include <asm/switch_to.h>
+#include <asm/tm.h>
+#include <asm/asm-prototypes.h>
+
+#include "ptrace-decl.h"
+
+void flush_tmregs_to_thread(struct task_struct *tsk)
+{
+ /*
+ * If task is not current, it will have been flushed already to
+ * its thread_struct during __switch_to().
+ *
+ * A reclaim flushes ALL the state or if not in TM save TM SPRs
+ * in the appropriate thread structures from live.
+ */
+
+ if (!cpu_has_feature(CPU_FTR_TM) || tsk != current)
+ return;
+
+ if (MSR_TM_SUSPENDED(mfmsr())) {
+ tm_reclaim_current(TM_CAUSE_SIGNAL);
+ } else {
+ tm_enable();
+ tm_save_sprs(&tsk->thread);
+ }
+}
+
+static unsigned long get_user_ckpt_msr(struct task_struct *task)
+{
+ return task->thread.ckpt_regs.msr | task->thread.fpexc_mode;
+}
+
+static int set_user_ckpt_msr(struct task_struct *task, unsigned long msr)
+{
+ task->thread.ckpt_regs.msr &= ~MSR_DEBUGCHANGE;
+ task->thread.ckpt_regs.msr |= msr & MSR_DEBUGCHANGE;
+ return 0;
+}
+
+static int set_user_ckpt_trap(struct task_struct *task, unsigned long trap)
+{
+ set_trap(&task->thread.ckpt_regs, trap);
+ return 0;
+}
+
+/**
+ * tm_cgpr_active - get active number of registers in CGPR
+ * @target: The target task.
+ * @regset: The user regset structure.
+ *
+ * This function checks for the active number of available
+ * regisers in transaction checkpointed GPR category.
+ */
+int tm_cgpr_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return 0;
+
+ return regset->n;
+}
+
+/**
+ * tm_cgpr_get - get CGPR registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @to: Destination of copy.
+ *
+ * This function gets transaction checkpointed GPR registers.
+ *
+ * When the transaction is active, 'ckpt_regs' holds all the checkpointed
+ * GPR register values for the current transaction to fall back on if it
+ * aborts in between. This function gets those checkpointed GPR registers.
+ * The userspace interface buffer layout is as follows.
+ *
+ * struct data {
+ * struct pt_regs ckpt_regs;
+ * };
+ */
+int tm_cgpr_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+ struct membuf to_msr = membuf_at(&to, offsetof(struct pt_regs, msr));
+#ifdef CONFIG_PPC64
+ struct membuf to_softe = membuf_at(&to, offsetof(struct pt_regs, softe));
+#endif
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+
+ membuf_write(&to, &target->thread.ckpt_regs, sizeof(struct user_pt_regs));
+
+ membuf_store(&to_msr, get_user_ckpt_msr(target));
+#ifdef CONFIG_PPC64
+ membuf_store(&to_softe, 0x1ul);
+#endif
+ return membuf_zero(&to, ELF_NGREG * sizeof(unsigned long) -
+ sizeof(struct user_pt_regs));
+}
+
+/*
+ * tm_cgpr_set - set the CGPR registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @pos: The buffer position.
+ * @count: Number of bytes to copy.
+ * @kbuf: Kernel buffer to copy into.
+ * @ubuf: User buffer to copy from.
+ *
+ * This function sets in transaction checkpointed GPR registers.
+ *
+ * When the transaction is active, 'ckpt_regs' holds the checkpointed
+ * GPR register values for the current transaction to fall back on if it
+ * aborts in between. This function sets those checkpointed GPR registers.
+ * The userspace interface buffer layout is as follows.
+ *
+ * struct data {
+ * struct pt_regs ckpt_regs;
+ * };
+ */
+int tm_cgpr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ unsigned long reg;
+ int ret;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.ckpt_regs,
+ 0, PT_MSR * sizeof(reg));
+
+ if (!ret && count > 0) {
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &reg,
+ PT_MSR * sizeof(reg),
+ (PT_MSR + 1) * sizeof(reg));
+ if (!ret)
+ ret = set_user_ckpt_msr(target, reg);
+ }
+
+ BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
+ offsetof(struct pt_regs, msr) + sizeof(long));
+
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.ckpt_regs.orig_gpr3,
+ PT_ORIG_R3 * sizeof(reg),
+ (PT_MAX_PUT_REG + 1) * sizeof(reg));
+
+ if (PT_MAX_PUT_REG + 1 < PT_TRAP && !ret)
+ user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+ (PT_MAX_PUT_REG + 1) * sizeof(reg),
+ PT_TRAP * sizeof(reg));
+
+ if (!ret && count > 0) {
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &reg,
+ PT_TRAP * sizeof(reg),
+ (PT_TRAP + 1) * sizeof(reg));
+ if (!ret)
+ ret = set_user_ckpt_trap(target, reg);
+ }
+
+ if (!ret)
+ user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+ (PT_TRAP + 1) * sizeof(reg), -1);
+
+ return ret;
+}
+
+/**
+ * tm_cfpr_active - get active number of registers in CFPR
+ * @target: The target task.
+ * @regset: The user regset structure.
+ *
+ * This function checks for the active number of available
+ * regisers in transaction checkpointed FPR category.
+ */
+int tm_cfpr_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return 0;
+
+ return regset->n;
+}
+
+/**
+ * tm_cfpr_get - get CFPR registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @to: Destination of copy.
+ *
+ * This function gets in transaction checkpointed FPR registers.
+ *
+ * When the transaction is active 'ckfp_state' holds the checkpointed
+ * values for the current transaction to fall back on if it aborts
+ * in between. This function gets those checkpointed FPR registers.
+ * The userspace interface buffer layout is as follows.
+ *
+ * struct data {
+ * u64 fpr[32];
+ * u64 fpscr;
+ *};
+ */
+int tm_cfpr_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+ u64 buf[33];
+ int i;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+
+ /* copy to local buffer then write that out */
+ for (i = 0; i < 32 ; i++)
+ buf[i] = target->thread.TS_CKFPR(i);
+ buf[32] = target->thread.ckfp_state.fpscr;
+ return membuf_write(&to, buf, sizeof(buf));
+}
+
+/**
+ * tm_cfpr_set - set CFPR registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @pos: The buffer position.
+ * @count: Number of bytes to copy.
+ * @kbuf: Kernel buffer to copy into.
+ * @ubuf: User buffer to copy from.
+ *
+ * This function sets in transaction checkpointed FPR registers.
+ *
+ * When the transaction is active 'ckfp_state' holds the checkpointed
+ * FPR register values for the current transaction to fall back on
+ * if it aborts in between. This function sets these checkpointed
+ * FPR registers. The userspace interface buffer layout is as follows.
+ *
+ * struct data {
+ * u64 fpr[32];
+ * u64 fpscr;
+ *};
+ */
+int tm_cfpr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ u64 buf[33];
+ int i;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+
+ for (i = 0; i < 32; i++)
+ buf[i] = target->thread.TS_CKFPR(i);
+ buf[32] = target->thread.ckfp_state.fpscr;
+
+ /* copy to local buffer then write that out */
+ i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
+ if (i)
+ return i;
+ for (i = 0; i < 32 ; i++)
+ target->thread.TS_CKFPR(i) = buf[i];
+ target->thread.ckfp_state.fpscr = buf[32];
+ return 0;
+}
+
+/**
+ * tm_cvmx_active - get active number of registers in CVMX
+ * @target: The target task.
+ * @regset: The user regset structure.
+ *
+ * This function checks for the active number of available
+ * regisers in checkpointed VMX category.
+ */
+int tm_cvmx_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return 0;
+
+ return regset->n;
+}
+
+/**
+ * tm_cvmx_get - get CMVX registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @to: Destination of copy.
+ *
+ * This function gets in transaction checkpointed VMX registers.
+ *
+ * When the transaction is active 'ckvr_state' and 'ckvrsave' hold
+ * the checkpointed values for the current transaction to fall
+ * back on if it aborts in between. The userspace interface buffer
+ * layout is as follows.
+ *
+ * struct data {
+ * vector128 vr[32];
+ * vector128 vscr;
+ * vector128 vrsave;
+ *};
+ */
+int tm_cvmx_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+ union {
+ elf_vrreg_t reg;
+ u32 word;
+ } vrsave;
+ BUILD_BUG_ON(TVSO(vscr) != TVSO(vr[32]));
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ /* Flush the state */
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+
+ membuf_write(&to, &target->thread.ckvr_state, 33 * sizeof(vector128));
+ /*
+ * Copy out only the low-order word of vrsave.
+ */
+ memset(&vrsave, 0, sizeof(vrsave));
+ vrsave.word = target->thread.ckvrsave;
+ return membuf_write(&to, &vrsave, sizeof(vrsave));
+}
+
+/**
+ * tm_cvmx_set - set CMVX registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @pos: The buffer position.
+ * @count: Number of bytes to copy.
+ * @kbuf: Kernel buffer to copy into.
+ * @ubuf: User buffer to copy from.
+ *
+ * This function sets in transaction checkpointed VMX registers.
+ *
+ * When the transaction is active 'ckvr_state' and 'ckvrsave' hold
+ * the checkpointed values for the current transaction to fall
+ * back on if it aborts in between. The userspace interface buffer
+ * layout is as follows.
+ *
+ * struct data {
+ * vector128 vr[32];
+ * vector128 vscr;
+ * vector128 vrsave;
+ *};
+ */
+int tm_cvmx_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+
+ BUILD_BUG_ON(TVSO(vscr) != TVSO(vr[32]));
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.ckvr_state,
+ 0, 33 * sizeof(vector128));
+ if (!ret && count > 0) {
+ /*
+ * We use only the low-order word of vrsave.
+ */
+ union {
+ elf_vrreg_t reg;
+ u32 word;
+ } vrsave;
+ memset(&vrsave, 0, sizeof(vrsave));
+ vrsave.word = target->thread.ckvrsave;
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &vrsave,
+ 33 * sizeof(vector128), -1);
+ if (!ret)
+ target->thread.ckvrsave = vrsave.word;
+ }
+
+ return ret;
+}
+
+/**
+ * tm_cvsx_active - get active number of registers in CVSX
+ * @target: The target task.
+ * @regset: The user regset structure.
+ *
+ * This function checks for the active number of available
+ * regisers in transaction checkpointed VSX category.
+ */
+int tm_cvsx_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return 0;
+
+ flush_vsx_to_thread(target);
+ return target->thread.used_vsr ? regset->n : 0;
+}
+
+/**
+ * tm_cvsx_get - get CVSX registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @to: Destination of copy.
+ *
+ * This function gets in transaction checkpointed VSX registers.
+ *
+ * When the transaction is active 'ckfp_state' holds the checkpointed
+ * values for the current transaction to fall back on if it aborts
+ * in between. This function gets those checkpointed VSX registers.
+ * The userspace interface buffer layout is as follows.
+ *
+ * struct data {
+ * u64 vsx[32];
+ *};
+ */
+int tm_cvsx_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+ u64 buf[32];
+ int i;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ /* Flush the state */
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+ flush_vsx_to_thread(target);
+
+ for (i = 0; i < 32 ; i++)
+ buf[i] = target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET];
+ return membuf_write(&to, buf, 32 * sizeof(double));
+}
+
+/**
+ * tm_cvsx_set - set CFPR registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @pos: The buffer position.
+ * @count: Number of bytes to copy.
+ * @kbuf: Kernel buffer to copy into.
+ * @ubuf: User buffer to copy from.
+ *
+ * This function sets in transaction checkpointed VSX registers.
+ *
+ * When the transaction is active 'ckfp_state' holds the checkpointed
+ * VSX register values for the current transaction to fall back on
+ * if it aborts in between. This function sets these checkpointed
+ * FPR registers. The userspace interface buffer layout is as follows.
+ *
+ * struct data {
+ * u64 vsx[32];
+ *};
+ */
+int tm_cvsx_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ u64 buf[32];
+ int ret, i;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ /* Flush the state */
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+ flush_vsx_to_thread(target);
+
+ for (i = 0; i < 32 ; i++)
+ buf[i] = target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET];
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ buf, 0, 32 * sizeof(double));
+ if (!ret)
+ for (i = 0; i < 32 ; i++)
+ target->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
+
+ return ret;
+}
+
+/**
+ * tm_spr_active - get active number of registers in TM SPR
+ * @target: The target task.
+ * @regset: The user regset structure.
+ *
+ * This function checks the active number of available
+ * regisers in the transactional memory SPR category.
+ */
+int tm_spr_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ return regset->n;
+}
+
+/**
+ * tm_spr_get - get the TM related SPR registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @to: Destination of copy.
+ *
+ * This function gets transactional memory related SPR registers.
+ * The userspace interface buffer layout is as follows.
+ *
+ * struct {
+ * u64 tm_tfhar;
+ * u64 tm_texasr;
+ * u64 tm_tfiar;
+ * };
+ */
+int tm_spr_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+ /* Build tests */
+ BUILD_BUG_ON(TSO(tm_tfhar) + sizeof(u64) != TSO(tm_texasr));
+ BUILD_BUG_ON(TSO(tm_texasr) + sizeof(u64) != TSO(tm_tfiar));
+ BUILD_BUG_ON(TSO(tm_tfiar) + sizeof(u64) != TSO(ckpt_regs));
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ /* Flush the states */
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+
+ /* TFHAR register */
+ membuf_write(&to, &target->thread.tm_tfhar, sizeof(u64));
+ /* TEXASR register */
+ membuf_write(&to, &target->thread.tm_texasr, sizeof(u64));
+ /* TFIAR register */
+ return membuf_write(&to, &target->thread.tm_tfiar, sizeof(u64));
+}
+
+/**
+ * tm_spr_set - set the TM related SPR registers
+ * @target: The target task.
+ * @regset: The user regset structure.
+ * @pos: The buffer position.
+ * @count: Number of bytes to copy.
+ * @kbuf: Kernel buffer to copy into.
+ * @ubuf: User buffer to copy from.
+ *
+ * This function sets transactional memory related SPR registers.
+ * The userspace interface buffer layout is as follows.
+ *
+ * struct {
+ * u64 tm_tfhar;
+ * u64 tm_texasr;
+ * u64 tm_tfiar;
+ * };
+ */
+int tm_spr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+
+ /* Build tests */
+ BUILD_BUG_ON(TSO(tm_tfhar) + sizeof(u64) != TSO(tm_texasr));
+ BUILD_BUG_ON(TSO(tm_texasr) + sizeof(u64) != TSO(tm_tfiar));
+ BUILD_BUG_ON(TSO(tm_tfiar) + sizeof(u64) != TSO(ckpt_regs));
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ /* Flush the states */
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+
+ /* TFHAR register */
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_tfhar, 0, sizeof(u64));
+
+ /* TEXASR register */
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_texasr, sizeof(u64),
+ 2 * sizeof(u64));
+
+ /* TFIAR register */
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_tfiar,
+ 2 * sizeof(u64), 3 * sizeof(u64));
+ return ret;
+}
+
+int tm_tar_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (MSR_TM_ACTIVE(target->thread.regs->msr))
+ return regset->n;
+
+ return 0;
+}
+
+int tm_tar_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ return membuf_write(&to, &target->thread.tm_tar, sizeof(u64));
+}
+
+int tm_tar_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_tar, 0, sizeof(u64));
+ return ret;
+}
+
+int tm_ppr_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (MSR_TM_ACTIVE(target->thread.regs->msr))
+ return regset->n;
+
+ return 0;
+}
+
+
+int tm_ppr_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ return membuf_write(&to, &target->thread.tm_ppr, sizeof(u64));
+}
+
+int tm_ppr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_ppr, 0, sizeof(u64));
+ return ret;
+}
+
+int tm_dscr_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (MSR_TM_ACTIVE(target->thread.regs->msr))
+ return regset->n;
+
+ return 0;
+}
+
+int tm_dscr_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ return membuf_write(&to, &target->thread.tm_dscr, sizeof(u64));
+}
+
+int tm_dscr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int ret;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ return -ENODEV;
+
+ if (!MSR_TM_ACTIVE(target->thread.regs->msr))
+ return -ENODATA;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tm_dscr, 0, sizeof(u64));
+ return ret;
+}
+
+int tm_cgpr32_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+ gpr32_get_common(target, regset, to,
+ &target->thread.ckpt_regs.gpr[0]);
+ return membuf_zero(&to, ELF_NGREG * sizeof(u32));
+}
+
+int tm_cgpr32_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ return gpr32_set_common(target, regset, pos, count, kbuf, ubuf,
+ &target->thread.ckpt_regs.gpr[0]);
+}
diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c
new file mode 100644
index 000000000000..0310f9097e39
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-view.c
@@ -0,0 +1,948 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/regset.h>
+#include <linux/elf.h>
+#include <linux/nospec.h>
+#include <linux/pkeys.h>
+
+#include "ptrace-decl.h"
+
+struct pt_regs_offset {
+ const char *name;
+ int offset;
+};
+
+#define STR(s) #s /* convert to string */
+#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)}
+#define GPR_OFFSET_NAME(num) \
+ {.name = STR(r##num), .offset = offsetof(struct pt_regs, gpr[num])}, \
+ {.name = STR(gpr##num), .offset = offsetof(struct pt_regs, gpr[num])}
+#define REG_OFFSET_END {.name = NULL, .offset = 0}
+
+static const struct pt_regs_offset regoffset_table[] = {
+ GPR_OFFSET_NAME(0),
+ GPR_OFFSET_NAME(1),
+ GPR_OFFSET_NAME(2),
+ GPR_OFFSET_NAME(3),
+ GPR_OFFSET_NAME(4),
+ GPR_OFFSET_NAME(5),
+ GPR_OFFSET_NAME(6),
+ GPR_OFFSET_NAME(7),
+ GPR_OFFSET_NAME(8),
+ GPR_OFFSET_NAME(9),
+ GPR_OFFSET_NAME(10),
+ GPR_OFFSET_NAME(11),
+ GPR_OFFSET_NAME(12),
+ GPR_OFFSET_NAME(13),
+ GPR_OFFSET_NAME(14),
+ GPR_OFFSET_NAME(15),
+ GPR_OFFSET_NAME(16),
+ GPR_OFFSET_NAME(17),
+ GPR_OFFSET_NAME(18),
+ GPR_OFFSET_NAME(19),
+ GPR_OFFSET_NAME(20),
+ GPR_OFFSET_NAME(21),
+ GPR_OFFSET_NAME(22),
+ GPR_OFFSET_NAME(23),
+ GPR_OFFSET_NAME(24),
+ GPR_OFFSET_NAME(25),
+ GPR_OFFSET_NAME(26),
+ GPR_OFFSET_NAME(27),
+ GPR_OFFSET_NAME(28),
+ GPR_OFFSET_NAME(29),
+ GPR_OFFSET_NAME(30),
+ GPR_OFFSET_NAME(31),
+ REG_OFFSET_NAME(nip),
+ REG_OFFSET_NAME(msr),
+ REG_OFFSET_NAME(ctr),
+ REG_OFFSET_NAME(link),
+ REG_OFFSET_NAME(xer),
+ REG_OFFSET_NAME(ccr),
+#ifdef CONFIG_PPC64
+ REG_OFFSET_NAME(softe),
+#else
+ REG_OFFSET_NAME(mq),
+#endif
+ REG_OFFSET_NAME(trap),
+ REG_OFFSET_NAME(dar),
+ REG_OFFSET_NAME(dsisr),
+ REG_OFFSET_END,
+};
+
+/**
+ * regs_query_register_offset() - query register offset from its name
+ * @name: the name of a register
+ *
+ * regs_query_register_offset() returns the offset of a register in struct
+ * pt_regs from its name. If the name is invalid, this returns -EINVAL;
+ */
+int regs_query_register_offset(const char *name)
+{
+ const struct pt_regs_offset *roff;
+ for (roff = regoffset_table; roff->name != NULL; roff++)
+ if (!strcmp(roff->name, name))
+ return roff->offset;
+ return -EINVAL;
+}
+
+/**
+ * regs_query_register_name() - query register name from its offset
+ * @offset: the offset of a register in struct pt_regs.
+ *
+ * regs_query_register_name() returns the name of a register from its
+ * offset in struct pt_regs. If the @offset is invalid, this returns NULL;
+ */
+const char *regs_query_register_name(unsigned int offset)
+{
+ const struct pt_regs_offset *roff;
+ for (roff = regoffset_table; roff->name != NULL; roff++)
+ if (roff->offset == offset)
+ return roff->name;
+ return NULL;
+}
+
+/*
+ * does not yet catch signals sent when the child dies.
+ * in exit.c or in signal.c.
+ */
+
+static unsigned long get_user_msr(struct task_struct *task)
+{
+ return task->thread.regs->msr | task->thread.fpexc_mode;
+}
+
+static __always_inline int set_user_msr(struct task_struct *task, unsigned long msr)
+{
+ unsigned long newmsr = (task->thread.regs->msr & ~MSR_DEBUGCHANGE) |
+ (msr & MSR_DEBUGCHANGE);
+ regs_set_return_msr(task->thread.regs, newmsr);
+ return 0;
+}
+
+#ifdef CONFIG_PPC64
+static int get_user_dscr(struct task_struct *task, unsigned long *data)
+{
+ *data = task->thread.dscr;
+ return 0;
+}
+
+static int set_user_dscr(struct task_struct *task, unsigned long dscr)
+{
+ task->thread.dscr = dscr;
+ task->thread.dscr_inherit = 1;
+ return 0;
+}
+#else
+static int get_user_dscr(struct task_struct *task, unsigned long *data)
+{
+ return -EIO;
+}
+
+static int set_user_dscr(struct task_struct *task, unsigned long dscr)
+{
+ return -EIO;
+}
+#endif
+
+/*
+ * We prevent mucking around with the reserved area of trap
+ * which are used internally by the kernel.
+ */
+static __always_inline int set_user_trap(struct task_struct *task, unsigned long trap)
+{
+ set_trap(task->thread.regs, trap);
+ return 0;
+}
+
+/*
+ * Get contents of register REGNO in task TASK.
+ */
+int ptrace_get_reg(struct task_struct *task, int regno, unsigned long *data)
+{
+ unsigned int regs_max;
+
+ if (task->thread.regs == NULL || !data)
+ return -EIO;
+
+ if (regno == PT_MSR) {
+ *data = get_user_msr(task);
+ return 0;
+ }
+
+ if (regno == PT_DSCR)
+ return get_user_dscr(task, data);
+
+ /*
+ * softe copies paca->irq_soft_mask variable state. Since irq_soft_mask is
+ * no more used as a flag, lets force usr to always see the softe value as 1
+ * which means interrupts are not soft disabled.
+ */
+ if (IS_ENABLED(CONFIG_PPC64) && regno == PT_SOFTE) {
+ *data = 1;
+ return 0;
+ }
+
+ regs_max = sizeof(struct user_pt_regs) / sizeof(unsigned long);
+ if (regno < regs_max) {
+ regno = array_index_nospec(regno, regs_max);
+ *data = ((unsigned long *)task->thread.regs)[regno];
+ return 0;
+ }
+
+ return -EIO;
+}
+
+/*
+ * Write contents of register REGNO in task TASK.
+ */
+int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data)
+{
+ if (task->thread.regs == NULL)
+ return -EIO;
+
+ if (regno == PT_MSR)
+ return set_user_msr(task, data);
+ if (regno == PT_TRAP)
+ return set_user_trap(task, data);
+ if (regno == PT_DSCR)
+ return set_user_dscr(task, data);
+
+ if (regno <= PT_MAX_PUT_REG) {
+ regno = array_index_nospec(regno, PT_MAX_PUT_REG + 1);
+ ((unsigned long *)task->thread.regs)[regno] = data;
+ return 0;
+ }
+ return -EIO;
+}
+
+static int gpr_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+ struct membuf to_msr = membuf_at(&to, offsetof(struct pt_regs, msr));
+#ifdef CONFIG_PPC64
+ struct membuf to_softe = membuf_at(&to, offsetof(struct pt_regs, softe));
+#endif
+ if (target->thread.regs == NULL)
+ return -EIO;
+
+ membuf_write(&to, target->thread.regs, sizeof(struct user_pt_regs));
+
+ membuf_store(&to_msr, get_user_msr(target));
+#ifdef CONFIG_PPC64
+ membuf_store(&to_softe, 0x1ul);
+#endif
+ return membuf_zero(&to, ELF_NGREG * sizeof(unsigned long) -
+ sizeof(struct user_pt_regs));
+}
+
+static int gpr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, const void *kbuf,
+ const void __user *ubuf)
+{
+ unsigned long reg;
+ int ret;
+
+ if (target->thread.regs == NULL)
+ return -EIO;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ target->thread.regs,
+ 0, PT_MSR * sizeof(reg));
+
+ if (!ret && count > 0) {
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &reg,
+ PT_MSR * sizeof(reg),
+ (PT_MSR + 1) * sizeof(reg));
+ if (!ret)
+ ret = set_user_msr(target, reg);
+ }
+
+ BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
+ offsetof(struct pt_regs, msr) + sizeof(long));
+
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.regs->orig_gpr3,
+ PT_ORIG_R3 * sizeof(reg),
+ (PT_MAX_PUT_REG + 1) * sizeof(reg));
+
+ if (PT_MAX_PUT_REG + 1 < PT_TRAP && !ret)
+ user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+ (PT_MAX_PUT_REG + 1) * sizeof(reg),
+ PT_TRAP * sizeof(reg));
+
+ if (!ret && count > 0) {
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &reg,
+ PT_TRAP * sizeof(reg),
+ (PT_TRAP + 1) * sizeof(reg));
+ if (!ret)
+ ret = set_user_trap(target, reg);
+ }
+
+ if (!ret)
+ user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+ (PT_TRAP + 1) * sizeof(reg), -1);
+
+ return ret;
+}
+
+#ifdef CONFIG_PPC64
+static int ppr_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+ if (!target->thread.regs)
+ return -EINVAL;
+
+ return membuf_write(&to, &target->thread.regs->ppr, sizeof(u64));
+}
+
+static int ppr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, const void *kbuf,
+ const void __user *ubuf)
+{
+ if (!target->thread.regs)
+ return -EINVAL;
+
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.regs->ppr, 0, sizeof(u64));
+}
+
+static int dscr_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+ return membuf_write(&to, &target->thread.dscr, sizeof(u64));
+}
+static int dscr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, const void *kbuf,
+ const void __user *ubuf)
+{
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.dscr, 0, sizeof(u64));
+}
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+static int tar_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+ return membuf_write(&to, &target->thread.tar, sizeof(u64));
+}
+static int tar_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, const void *kbuf,
+ const void __user *ubuf)
+{
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.tar, 0, sizeof(u64));
+}
+
+static int ebb_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+ return -ENODEV;
+
+ if (target->thread.used_ebb)
+ return regset->n;
+
+ return 0;
+}
+
+static int ebb_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+ /* Build tests */
+ BUILD_BUG_ON(TSO(ebbrr) + sizeof(unsigned long) != TSO(ebbhr));
+ BUILD_BUG_ON(TSO(ebbhr) + sizeof(unsigned long) != TSO(bescr));
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+ return -ENODEV;
+
+ if (!target->thread.used_ebb)
+ return -ENODATA;
+
+ return membuf_write(&to, &target->thread.ebbrr, 3 * sizeof(unsigned long));
+}
+
+static int ebb_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, const void *kbuf,
+ const void __user *ubuf)
+{
+ int ret = 0;
+
+ /* Build tests */
+ BUILD_BUG_ON(TSO(ebbrr) + sizeof(unsigned long) != TSO(ebbhr));
+ BUILD_BUG_ON(TSO(ebbhr) + sizeof(unsigned long) != TSO(bescr));
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+ return -ENODEV;
+
+ if (target->thread.used_ebb)
+ return -ENODATA;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.ebbrr,
+ 0, sizeof(unsigned long));
+
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.ebbhr, sizeof(unsigned long),
+ 2 * sizeof(unsigned long));
+
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.bescr, 2 * sizeof(unsigned long),
+ 3 * sizeof(unsigned long));
+
+ return ret;
+}
+static int pmu_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+ return -ENODEV;
+
+ return regset->n;
+}
+
+static int pmu_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+ /* Build tests */
+ BUILD_BUG_ON(TSO(siar) + sizeof(unsigned long) != TSO(sdar));
+ BUILD_BUG_ON(TSO(sdar) + sizeof(unsigned long) != TSO(sier));
+ BUILD_BUG_ON(TSO(sier) + sizeof(unsigned long) != TSO(mmcr2));
+ BUILD_BUG_ON(TSO(mmcr2) + sizeof(unsigned long) != TSO(mmcr0));
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+ return -ENODEV;
+
+ return membuf_write(&to, &target->thread.siar, 5 * sizeof(unsigned long));
+}
+
+static int pmu_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, const void *kbuf,
+ const void __user *ubuf)
+{
+ int ret = 0;
+
+ /* Build tests */
+ BUILD_BUG_ON(TSO(siar) + sizeof(unsigned long) != TSO(sdar));
+ BUILD_BUG_ON(TSO(sdar) + sizeof(unsigned long) != TSO(sier));
+ BUILD_BUG_ON(TSO(sier) + sizeof(unsigned long) != TSO(mmcr2));
+ BUILD_BUG_ON(TSO(mmcr2) + sizeof(unsigned long) != TSO(mmcr0));
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+ return -ENODEV;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.siar,
+ 0, sizeof(unsigned long));
+
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.sdar, sizeof(unsigned long),
+ 2 * sizeof(unsigned long));
+
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.sier, 2 * sizeof(unsigned long),
+ 3 * sizeof(unsigned long));
+
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.mmcr2, 3 * sizeof(unsigned long),
+ 4 * sizeof(unsigned long));
+
+ if (!ret)
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.mmcr0, 4 * sizeof(unsigned long),
+ 5 * sizeof(unsigned long));
+ return ret;
+}
+
+static int dexcr_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_ARCH_31))
+ return -ENODEV;
+
+ return regset->n;
+}
+
+static int dexcr_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+ if (!cpu_has_feature(CPU_FTR_ARCH_31))
+ return -ENODEV;
+
+ membuf_store(&to, (u64)lower_32_bits(target->thread.dexcr));
+
+ /*
+ * Technically the HDEXCR is per-cpu, but a hypervisor can't reasonably
+ * change it between CPUs of the same guest.
+ */
+ return membuf_store(&to, (u64)lower_32_bits(mfspr(SPRN_HDEXCR_RO)));
+}
+
+#ifdef CONFIG_CHECKPOINT_RESTORE
+static int hashkeyr_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!cpu_has_feature(CPU_FTR_ARCH_31))
+ return -ENODEV;
+
+ return regset->n;
+}
+
+static int hashkeyr_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+ if (!cpu_has_feature(CPU_FTR_ARCH_31))
+ return -ENODEV;
+
+ return membuf_store(&to, target->thread.hashkeyr);
+}
+
+static int hashkeyr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, const void *kbuf,
+ const void __user *ubuf)
+{
+ if (!cpu_has_feature(CPU_FTR_ARCH_31))
+ return -ENODEV;
+
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.hashkeyr,
+ 0, sizeof(unsigned long));
+}
+#endif /* CONFIG_CHECKPOINT_RESTORE */
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+#ifdef CONFIG_PPC_MEM_KEYS
+static int pkey_active(struct task_struct *target, const struct user_regset *regset)
+{
+ if (!arch_pkeys_enabled())
+ return -ENODEV;
+
+ return regset->n;
+}
+
+static int pkey_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+
+ if (!arch_pkeys_enabled())
+ return -ENODEV;
+
+ membuf_store(&to, target->thread.regs->amr);
+ membuf_store(&to, target->thread.regs->iamr);
+ return membuf_store(&to, default_uamor);
+}
+
+static int pkey_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count, const void *kbuf,
+ const void __user *ubuf)
+{
+ u64 new_amr;
+ int ret;
+
+ if (!arch_pkeys_enabled())
+ return -ENODEV;
+
+ /* Only the AMR can be set from userspace */
+ if (pos != 0 || count != sizeof(new_amr))
+ return -EINVAL;
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &new_amr, 0, sizeof(new_amr));
+ if (ret)
+ return ret;
+
+ /*
+ * UAMOR determines which bits of the AMR can be set from userspace.
+ * UAMOR value 0b11 indicates that the AMR value can be modified
+ * from userspace. If the kernel is using a specific key, we avoid
+ * userspace modifying the AMR value for that key by masking them
+ * via UAMOR 0b00.
+ *
+ * Pick the AMR values for the keys that kernel is using. This
+ * will be indicated by the ~default_uamor bits.
+ */
+ target->thread.regs->amr = (new_amr & default_uamor) |
+ (target->thread.regs->amr & ~default_uamor);
+
+ return 0;
+}
+#endif /* CONFIG_PPC_MEM_KEYS */
+
+static const struct user_regset native_regsets[] = {
+ [REGSET_GPR] = {
+ USER_REGSET_NOTE_TYPE(PRSTATUS), .n = ELF_NGREG,
+ .size = sizeof(long), .align = sizeof(long),
+ .regset_get = gpr_get, .set = gpr_set
+ },
+ [REGSET_FPR] = {
+ USER_REGSET_NOTE_TYPE(PRFPREG), .n = ELF_NFPREG,
+ .size = sizeof(double), .align = sizeof(double),
+ .regset_get = fpr_get, .set = fpr_set
+ },
+#ifdef CONFIG_ALTIVEC
+ [REGSET_VMX] = {
+ USER_REGSET_NOTE_TYPE(PPC_VMX), .n = 34,
+ .size = sizeof(vector128), .align = sizeof(vector128),
+ .active = vr_active, .regset_get = vr_get, .set = vr_set
+ },
+#endif
+#ifdef CONFIG_VSX
+ [REGSET_VSX] = {
+ USER_REGSET_NOTE_TYPE(PPC_VSX), .n = 32,
+ .size = sizeof(double), .align = sizeof(double),
+ .active = vsr_active, .regset_get = vsr_get, .set = vsr_set
+ },
+#endif
+#ifdef CONFIG_SPE
+ [REGSET_SPE] = {
+ USER_REGSET_NOTE_TYPE(PPC_SPE), .n = 35,
+ .size = sizeof(u32), .align = sizeof(u32),
+ .active = evr_active, .regset_get = evr_get, .set = evr_set
+ },
+#endif
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ [REGSET_TM_CGPR] = {
+ USER_REGSET_NOTE_TYPE(PPC_TM_CGPR), .n = ELF_NGREG,
+ .size = sizeof(long), .align = sizeof(long),
+ .active = tm_cgpr_active, .regset_get = tm_cgpr_get, .set = tm_cgpr_set
+ },
+ [REGSET_TM_CFPR] = {
+ USER_REGSET_NOTE_TYPE(PPC_TM_CFPR), .n = ELF_NFPREG,
+ .size = sizeof(double), .align = sizeof(double),
+ .active = tm_cfpr_active, .regset_get = tm_cfpr_get, .set = tm_cfpr_set
+ },
+ [REGSET_TM_CVMX] = {
+ USER_REGSET_NOTE_TYPE(PPC_TM_CVMX), .n = ELF_NVMX,
+ .size = sizeof(vector128), .align = sizeof(vector128),
+ .active = tm_cvmx_active, .regset_get = tm_cvmx_get, .set = tm_cvmx_set
+ },
+ [REGSET_TM_CVSX] = {
+ USER_REGSET_NOTE_TYPE(PPC_TM_CVSX), .n = ELF_NVSX,
+ .size = sizeof(double), .align = sizeof(double),
+ .active = tm_cvsx_active, .regset_get = tm_cvsx_get, .set = tm_cvsx_set
+ },
+ [REGSET_TM_SPR] = {
+ USER_REGSET_NOTE_TYPE(PPC_TM_SPR), .n = ELF_NTMSPRREG,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = tm_spr_active, .regset_get = tm_spr_get, .set = tm_spr_set
+ },
+ [REGSET_TM_CTAR] = {
+ USER_REGSET_NOTE_TYPE(PPC_TM_CTAR), .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = tm_tar_active, .regset_get = tm_tar_get, .set = tm_tar_set
+ },
+ [REGSET_TM_CPPR] = {
+ USER_REGSET_NOTE_TYPE(PPC_TM_CPPR), .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = tm_ppr_active, .regset_get = tm_ppr_get, .set = tm_ppr_set
+ },
+ [REGSET_TM_CDSCR] = {
+ USER_REGSET_NOTE_TYPE(PPC_TM_CDSCR), .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = tm_dscr_active, .regset_get = tm_dscr_get, .set = tm_dscr_set
+ },
+#endif
+#ifdef CONFIG_PPC64
+ [REGSET_PPR] = {
+ USER_REGSET_NOTE_TYPE(PPC_PPR), .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .regset_get = ppr_get, .set = ppr_set
+ },
+ [REGSET_DSCR] = {
+ USER_REGSET_NOTE_TYPE(PPC_DSCR), .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .regset_get = dscr_get, .set = dscr_set
+ },
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+ [REGSET_TAR] = {
+ USER_REGSET_NOTE_TYPE(PPC_TAR), .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .regset_get = tar_get, .set = tar_set
+ },
+ [REGSET_EBB] = {
+ USER_REGSET_NOTE_TYPE(PPC_EBB), .n = ELF_NEBB,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = ebb_active, .regset_get = ebb_get, .set = ebb_set
+ },
+ [REGSET_PMR] = {
+ USER_REGSET_NOTE_TYPE(PPC_PMU), .n = ELF_NPMU,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = pmu_active, .regset_get = pmu_get, .set = pmu_set
+ },
+ [REGSET_DEXCR] = {
+ USER_REGSET_NOTE_TYPE(PPC_DEXCR), .n = ELF_NDEXCR,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = dexcr_active, .regset_get = dexcr_get
+ },
+#ifdef CONFIG_CHECKPOINT_RESTORE
+ [REGSET_HASHKEYR] = {
+ USER_REGSET_NOTE_TYPE(PPC_HASHKEYR), .n = ELF_NHASHKEYR,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = hashkeyr_active, .regset_get = hashkeyr_get, .set = hashkeyr_set
+ },
+#endif
+#endif
+#ifdef CONFIG_PPC_MEM_KEYS
+ [REGSET_PKEY] = {
+ USER_REGSET_NOTE_TYPE(PPC_PKEY), .n = ELF_NPKEY,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = pkey_active, .regset_get = pkey_get, .set = pkey_set
+ },
+#endif
+};
+
+const struct user_regset_view user_ppc_native_view = {
+ .name = UTS_MACHINE, .e_machine = ELF_ARCH, .ei_osabi = ELF_OSABI,
+ .regsets = native_regsets, .n = ARRAY_SIZE(native_regsets)
+};
+
+#include <linux/compat.h>
+
+int gpr32_get_common(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to, unsigned long *regs)
+{
+ int i;
+
+ for (i = 0; i < PT_MSR; i++)
+ membuf_store(&to, (u32)regs[i]);
+ membuf_store(&to, (u32)get_user_msr(target));
+ for (i++ ; i < PT_REGS_COUNT; i++)
+ membuf_store(&to, (u32)regs[i]);
+ return membuf_zero(&to, (ELF_NGREG - PT_REGS_COUNT) * sizeof(u32));
+}
+
+static int gpr32_set_common_kernel(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, unsigned long *regs)
+{
+ const compat_ulong_t *k = kbuf;
+
+ pos /= sizeof(compat_ulong_t);
+ count /= sizeof(compat_ulong_t);
+
+ for (; count > 0 && pos < PT_MSR; --count)
+ regs[pos++] = *k++;
+
+ if (count > 0 && pos == PT_MSR) {
+ set_user_msr(target, *k++);
+ ++pos;
+ --count;
+ }
+
+ for (; count > 0 && pos <= PT_MAX_PUT_REG; --count)
+ regs[pos++] = *k++;
+ for (; count > 0 && pos < PT_TRAP; --count, ++pos)
+ ++k;
+
+ if (count > 0 && pos == PT_TRAP) {
+ set_user_trap(target, *k++);
+ ++pos;
+ --count;
+ }
+
+ kbuf = k;
+ pos *= sizeof(compat_ulong_t);
+ count *= sizeof(compat_ulong_t);
+ user_regset_copyin_ignore(&pos, &count, &kbuf, NULL,
+ (PT_TRAP + 1) * sizeof(compat_ulong_t), -1);
+ return 0;
+}
+
+static int gpr32_set_common_user(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void __user *ubuf, unsigned long *regs)
+{
+ const compat_ulong_t __user *u = ubuf;
+ const void *kbuf = NULL;
+ compat_ulong_t reg;
+
+ if (!user_read_access_begin(u, count))
+ return -EFAULT;
+
+ pos /= sizeof(reg);
+ count /= sizeof(reg);
+
+ for (; count > 0 && pos < PT_MSR; --count) {
+ unsafe_get_user(reg, u++, Efault);
+ regs[pos++] = reg;
+ }
+
+ if (count > 0 && pos == PT_MSR) {
+ unsafe_get_user(reg, u++, Efault);
+ set_user_msr(target, reg);
+ ++pos;
+ --count;
+ }
+
+ for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) {
+ unsafe_get_user(reg, u++, Efault);
+ regs[pos++] = reg;
+ }
+ for (; count > 0 && pos < PT_TRAP; --count, ++pos)
+ unsafe_get_user(reg, u++, Efault);
+
+ if (count > 0 && pos == PT_TRAP) {
+ unsafe_get_user(reg, u++, Efault);
+ set_user_trap(target, reg);
+ ++pos;
+ --count;
+ }
+ user_read_access_end();
+
+ ubuf = u;
+ pos *= sizeof(reg);
+ count *= sizeof(reg);
+ user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+ (PT_TRAP + 1) * sizeof(reg), -1);
+ return 0;
+
+Efault:
+ user_read_access_end();
+ return -EFAULT;
+}
+
+int gpr32_set_common(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf,
+ unsigned long *regs)
+{
+ if (kbuf)
+ return gpr32_set_common_kernel(target, regset, pos, count, kbuf, regs);
+ else
+ return gpr32_set_common_user(target, regset, pos, count, ubuf, regs);
+}
+
+static int gpr32_get(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to)
+{
+ if (target->thread.regs == NULL)
+ return -EIO;
+
+ return gpr32_get_common(target, regset, to,
+ &target->thread.regs->gpr[0]);
+}
+
+static int gpr32_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ if (target->thread.regs == NULL)
+ return -EIO;
+
+ return gpr32_set_common(target, regset, pos, count, kbuf, ubuf,
+ &target->thread.regs->gpr[0]);
+}
+
+/*
+ * These are the regset flavors matching the CONFIG_PPC32 native set.
+ */
+static const struct user_regset compat_regsets[] = {
+ [REGSET_GPR] = {
+ USER_REGSET_NOTE_TYPE(PRSTATUS), .n = ELF_NGREG,
+ .size = sizeof(compat_long_t), .align = sizeof(compat_long_t),
+ .regset_get = gpr32_get, .set = gpr32_set
+ },
+ [REGSET_FPR] = {
+ USER_REGSET_NOTE_TYPE(PRFPREG), .n = ELF_NFPREG,
+ .size = sizeof(double), .align = sizeof(double),
+ .regset_get = fpr_get, .set = fpr_set
+ },
+#ifdef CONFIG_ALTIVEC
+ [REGSET_VMX] = {
+ USER_REGSET_NOTE_TYPE(PPC_VMX), .n = 34,
+ .size = sizeof(vector128), .align = sizeof(vector128),
+ .active = vr_active, .regset_get = vr_get, .set = vr_set
+ },
+#endif
+#ifdef CONFIG_SPE
+ [REGSET_SPE] = {
+ USER_REGSET_NOTE_TYPE(PPC_SPE), .n = 35,
+ .size = sizeof(u32), .align = sizeof(u32),
+ .active = evr_active, .regset_get = evr_get, .set = evr_set
+ },
+#endif
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ [REGSET_TM_CGPR] = {
+ USER_REGSET_NOTE_TYPE(PPC_TM_CGPR), .n = ELF_NGREG,
+ .size = sizeof(long), .align = sizeof(long),
+ .active = tm_cgpr_active,
+ .regset_get = tm_cgpr32_get, .set = tm_cgpr32_set
+ },
+ [REGSET_TM_CFPR] = {
+ USER_REGSET_NOTE_TYPE(PPC_TM_CFPR), .n = ELF_NFPREG,
+ .size = sizeof(double), .align = sizeof(double),
+ .active = tm_cfpr_active, .regset_get = tm_cfpr_get, .set = tm_cfpr_set
+ },
+ [REGSET_TM_CVMX] = {
+ USER_REGSET_NOTE_TYPE(PPC_TM_CVMX), .n = ELF_NVMX,
+ .size = sizeof(vector128), .align = sizeof(vector128),
+ .active = tm_cvmx_active, .regset_get = tm_cvmx_get, .set = tm_cvmx_set
+ },
+ [REGSET_TM_CVSX] = {
+ USER_REGSET_NOTE_TYPE(PPC_TM_CVSX), .n = ELF_NVSX,
+ .size = sizeof(double), .align = sizeof(double),
+ .active = tm_cvsx_active, .regset_get = tm_cvsx_get, .set = tm_cvsx_set
+ },
+ [REGSET_TM_SPR] = {
+ USER_REGSET_NOTE_TYPE(PPC_TM_SPR), .n = ELF_NTMSPRREG,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = tm_spr_active, .regset_get = tm_spr_get, .set = tm_spr_set
+ },
+ [REGSET_TM_CTAR] = {
+ USER_REGSET_NOTE_TYPE(PPC_TM_CTAR), .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = tm_tar_active, .regset_get = tm_tar_get, .set = tm_tar_set
+ },
+ [REGSET_TM_CPPR] = {
+ USER_REGSET_NOTE_TYPE(PPC_TM_CPPR), .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = tm_ppr_active, .regset_get = tm_ppr_get, .set = tm_ppr_set
+ },
+ [REGSET_TM_CDSCR] = {
+ USER_REGSET_NOTE_TYPE(PPC_TM_CDSCR), .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = tm_dscr_active, .regset_get = tm_dscr_get, .set = tm_dscr_set
+ },
+#endif
+#ifdef CONFIG_PPC64
+ [REGSET_PPR] = {
+ USER_REGSET_NOTE_TYPE(PPC_PPR), .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .regset_get = ppr_get, .set = ppr_set
+ },
+ [REGSET_DSCR] = {
+ USER_REGSET_NOTE_TYPE(PPC_DSCR), .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .regset_get = dscr_get, .set = dscr_set
+ },
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+ [REGSET_TAR] = {
+ USER_REGSET_NOTE_TYPE(PPC_TAR), .n = 1,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .regset_get = tar_get, .set = tar_set
+ },
+ [REGSET_EBB] = {
+ USER_REGSET_NOTE_TYPE(PPC_EBB), .n = ELF_NEBB,
+ .size = sizeof(u64), .align = sizeof(u64),
+ .active = ebb_active, .regset_get = ebb_get, .set = ebb_set
+ },
+#endif
+};
+
+static const struct user_regset_view user_ppc_compat_view = {
+ .name = "ppc", .e_machine = EM_PPC, .ei_osabi = ELF_OSABI,
+ .regsets = compat_regsets, .n = ARRAY_SIZE(compat_regsets)
+};
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+{
+ if (IS_ENABLED(CONFIG_COMPAT) && is_tsk_32bit_task(task))
+ return &user_ppc_compat_view;
+ return &user_ppc_native_view;
+}
diff --git a/arch/powerpc/kernel/ptrace/ptrace-vsx.c b/arch/powerpc/kernel/ptrace/ptrace-vsx.c
new file mode 100644
index 000000000000..7df08004c47d
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace-vsx.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/regset.h>
+
+#include <asm/switch_to.h>
+
+#include "ptrace-decl.h"
+
+/*
+ * Regardless of transactions, 'fp_state' holds the current running
+ * value of all FPR registers and 'ckfp_state' holds the last checkpointed
+ * value of all FPR registers for the current transaction.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ * u64 fpr[32];
+ * u64 fpscr;
+ * };
+ */
+int fpr_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+ u64 buf[33];
+ int i;
+
+ flush_fp_to_thread(target);
+
+ /* copy to local buffer then write that out */
+ for (i = 0; i < 32 ; i++)
+ buf[i] = target->thread.TS_FPR(i);
+ buf[32] = target->thread.fp_state.fpscr;
+ return membuf_write(&to, buf, 33 * sizeof(u64));
+}
+
+/*
+ * Regardless of transactions, 'fp_state' holds the current running
+ * value of all FPR registers and 'ckfp_state' holds the last checkpointed
+ * value of all FPR registers for the current transaction.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ * u64 fpr[32];
+ * u64 fpscr;
+ * };
+ *
+ */
+int fpr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ u64 buf[33];
+ int i;
+
+ flush_fp_to_thread(target);
+
+ for (i = 0; i < 32 ; i++)
+ buf[i] = target->thread.TS_FPR(i);
+ buf[32] = target->thread.fp_state.fpscr;
+
+ /* copy to local buffer then write that out */
+ i = user_regset_copyin(&pos, &count, &kbuf, &ubuf, buf, 0, -1);
+ if (i)
+ return i;
+
+ for (i = 0; i < 32 ; i++)
+ target->thread.TS_FPR(i) = buf[i];
+ target->thread.fp_state.fpscr = buf[32];
+ return 0;
+}
+
+/*
+ * Currently to set and get all the vsx state, you need to call
+ * the fp and VMX calls as well. This only get/sets the lower 32
+ * 128bit VSX registers.
+ */
+
+int vsr_active(struct task_struct *target, const struct user_regset *regset)
+{
+ flush_vsx_to_thread(target);
+ return target->thread.used_vsr ? regset->n : 0;
+}
+
+/*
+ * Regardless of transactions, 'fp_state' holds the current running
+ * value of all FPR registers and 'ckfp_state' holds the last
+ * checkpointed value of all FPR registers for the current
+ * transaction.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ * u64 vsx[32];
+ * };
+ */
+int vsr_get(struct task_struct *target, const struct user_regset *regset,
+ struct membuf to)
+{
+ u64 buf[32];
+ int i;
+
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+ flush_vsx_to_thread(target);
+
+ for (i = 0; i < 32 ; i++)
+ buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET];
+
+ return membuf_write(&to, buf, 32 * sizeof(double));
+}
+
+/*
+ * Regardless of transactions, 'fp_state' holds the current running
+ * value of all FPR registers and 'ckfp_state' holds the last
+ * checkpointed value of all FPR registers for the current
+ * transaction.
+ *
+ * Userspace interface buffer layout:
+ *
+ * struct data {
+ * u64 vsx[32];
+ * };
+ */
+int vsr_set(struct task_struct *target, const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ u64 buf[32];
+ int ret, i;
+
+ flush_tmregs_to_thread(target);
+ flush_fp_to_thread(target);
+ flush_altivec_to_thread(target);
+ flush_vsx_to_thread(target);
+
+ for (i = 0; i < 32 ; i++)
+ buf[i] = target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET];
+
+ ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ buf, 0, 32 * sizeof(double));
+ if (!ret)
+ for (i = 0; i < 32 ; i++)
+ target->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
+
+ return ret;
+}
diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c
new file mode 100644
index 000000000000..c6997df63287
--- /dev/null
+++ b/arch/powerpc/kernel/ptrace/ptrace.c
@@ -0,0 +1,447 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerPC version
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ * Derived from "arch/m68k/kernel/ptrace.c"
+ * Copyright (C) 1994 by Hamish Macdonald
+ * Taken from linux/kernel/ptrace.c and modified for M680x0.
+ * linux/kernel/ptrace.c is by Ross Biro 1/23/92, edited by Linus Torvalds
+ *
+ * Modified by Cort Dougan (cort@hq.fsmlabs.com)
+ * and Paul Mackerras (paulus@samba.org).
+ */
+
+#include <linux/regset.h>
+#include <linux/ptrace.h>
+#include <linux/audit.h>
+#include <linux/context_tracking.h>
+#include <linux/syscalls.h>
+
+#include <asm/switch_to.h>
+#include <asm/debug.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/syscalls.h>
+
+#include "ptrace-decl.h"
+
+/*
+ * Called by kernel/ptrace.c when detaching..
+ *
+ * Make sure single step bits etc are not set.
+ */
+void ptrace_disable(struct task_struct *child)
+{
+ /* make sure the single step bit is not set. */
+ user_disable_single_step(child);
+}
+
+long arch_ptrace(struct task_struct *child, long request,
+ unsigned long addr, unsigned long data)
+{
+ int ret = -EPERM;
+ void __user *datavp = (void __user *) data;
+ unsigned long __user *datalp = datavp;
+
+ switch (request) {
+ /* read the word at location addr in the USER area. */
+ case PTRACE_PEEKUSR: {
+ unsigned long index, tmp;
+
+ ret = -EIO;
+ /* convert to index and check */
+ index = addr / sizeof(long);
+ if ((addr & (sizeof(long) - 1)) || !child->thread.regs)
+ break;
+
+ if (index < PT_FPR0)
+ ret = ptrace_get_reg(child, (int) index, &tmp);
+ else
+ ret = ptrace_get_fpr(child, index, &tmp);
+
+ if (ret)
+ break;
+ ret = put_user(tmp, datalp);
+ break;
+ }
+
+ /* write the word at location addr in the USER area */
+ case PTRACE_POKEUSR: {
+ unsigned long index;
+
+ ret = -EIO;
+ /* convert to index and check */
+ index = addr / sizeof(long);
+ if ((addr & (sizeof(long) - 1)) || !child->thread.regs)
+ break;
+
+ if (index < PT_FPR0)
+ ret = ptrace_put_reg(child, index, data);
+ else
+ ret = ptrace_put_fpr(child, index, data);
+ break;
+ }
+
+ case PPC_PTRACE_GETHWDBGINFO: {
+ struct ppc_debug_info dbginfo;
+
+ ppc_gethwdinfo(&dbginfo);
+
+ if (copy_to_user(datavp, &dbginfo,
+ sizeof(struct ppc_debug_info)))
+ return -EFAULT;
+ return 0;
+ }
+
+ case PPC_PTRACE_SETHWDEBUG: {
+ struct ppc_hw_breakpoint bp_info;
+
+ if (copy_from_user(&bp_info, datavp,
+ sizeof(struct ppc_hw_breakpoint)))
+ return -EFAULT;
+ return ppc_set_hwdebug(child, &bp_info);
+ }
+
+ case PPC_PTRACE_DELHWDEBUG: {
+ ret = ppc_del_hwdebug(child, data);
+ break;
+ }
+
+ case PTRACE_GET_DEBUGREG:
+ ret = ptrace_get_debugreg(child, addr, datalp);
+ break;
+
+ case PTRACE_SET_DEBUGREG:
+ ret = ptrace_set_debugreg(child, addr, data);
+ break;
+
+#ifdef CONFIG_PPC64
+ case PTRACE_GETREGS64:
+#endif
+ case PTRACE_GETREGS: /* Get all pt_regs from the child. */
+ return copy_regset_to_user(child, &user_ppc_native_view,
+ REGSET_GPR,
+ 0, sizeof(struct user_pt_regs),
+ datavp);
+
+#ifdef CONFIG_PPC64
+ case PTRACE_SETREGS64:
+#endif
+ case PTRACE_SETREGS: /* Set all gp regs in the child. */
+ return copy_regset_from_user(child, &user_ppc_native_view,
+ REGSET_GPR,
+ 0, sizeof(struct user_pt_regs),
+ datavp);
+
+ case PTRACE_GETFPREGS: /* Get the child FPU state (FPR0...31 + FPSCR) */
+ return copy_regset_to_user(child, &user_ppc_native_view,
+ REGSET_FPR,
+ 0, sizeof(elf_fpregset_t),
+ datavp);
+
+ case PTRACE_SETFPREGS: /* Set the child FPU state (FPR0...31 + FPSCR) */
+ return copy_regset_from_user(child, &user_ppc_native_view,
+ REGSET_FPR,
+ 0, sizeof(elf_fpregset_t),
+ datavp);
+
+#ifdef CONFIG_ALTIVEC
+ case PTRACE_GETVRREGS:
+ return copy_regset_to_user(child, &user_ppc_native_view,
+ REGSET_VMX,
+ 0, (33 * sizeof(vector128) +
+ sizeof(u32)),
+ datavp);
+
+ case PTRACE_SETVRREGS:
+ return copy_regset_from_user(child, &user_ppc_native_view,
+ REGSET_VMX,
+ 0, (33 * sizeof(vector128) +
+ sizeof(u32)),
+ datavp);
+#endif
+#ifdef CONFIG_VSX
+ case PTRACE_GETVSRREGS:
+ return copy_regset_to_user(child, &user_ppc_native_view,
+ REGSET_VSX,
+ 0, 32 * sizeof(double),
+ datavp);
+
+ case PTRACE_SETVSRREGS:
+ return copy_regset_from_user(child, &user_ppc_native_view,
+ REGSET_VSX,
+ 0, 32 * sizeof(double),
+ datavp);
+#endif
+#ifdef CONFIG_SPE
+ case PTRACE_GETEVRREGS:
+ /* Get the child spe register state. */
+ return copy_regset_to_user(child, &user_ppc_native_view,
+ REGSET_SPE, 0, 35 * sizeof(u32),
+ datavp);
+
+ case PTRACE_SETEVRREGS:
+ /* Set the child spe register state. */
+ return copy_regset_from_user(child, &user_ppc_native_view,
+ REGSET_SPE, 0, 35 * sizeof(u32),
+ datavp);
+#endif
+
+ default:
+ ret = ptrace_request(child, request, addr, data);
+ break;
+ }
+ return ret;
+}
+
+#ifdef CONFIG_SECCOMP
+static int do_seccomp(struct pt_regs *regs)
+{
+ if (!test_thread_flag(TIF_SECCOMP))
+ return 0;
+
+ /*
+ * The ABI we present to seccomp tracers is that r3 contains
+ * the syscall return value and orig_gpr3 contains the first
+ * syscall parameter. This is different to the ptrace ABI where
+ * both r3 and orig_gpr3 contain the first syscall parameter.
+ */
+ regs->gpr[3] = -ENOSYS;
+
+ /*
+ * We use the __ version here because we have already checked
+ * TIF_SECCOMP. If this fails, there is nothing left to do, we
+ * have already loaded -ENOSYS into r3, or seccomp has put
+ * something else in r3 (via SECCOMP_RET_ERRNO/TRACE).
+ */
+ if (__secure_computing())
+ return -1;
+
+ /*
+ * The syscall was allowed by seccomp, restore the register
+ * state to what audit expects.
+ * Note that we use orig_gpr3, which means a seccomp tracer can
+ * modify the first syscall parameter (in orig_gpr3) and also
+ * allow the syscall to proceed.
+ */
+ regs->gpr[3] = regs->orig_gpr3;
+
+ return 0;
+}
+#else
+static inline int do_seccomp(struct pt_regs *regs) { return 0; }
+#endif /* CONFIG_SECCOMP */
+
+/**
+ * do_syscall_trace_enter() - Do syscall tracing on kernel entry.
+ * @regs: the pt_regs of the task to trace (current)
+ *
+ * Performs various types of tracing on syscall entry. This includes seccomp,
+ * ptrace, syscall tracepoints and audit.
+ *
+ * The pt_regs are potentially visible to userspace via ptrace, so their
+ * contents is ABI.
+ *
+ * One or more of the tracers may modify the contents of pt_regs, in particular
+ * to modify arguments or even the syscall number itself.
+ *
+ * It's also possible that a tracer can choose to reject the system call. In
+ * that case this function will return an illegal syscall number, and will put
+ * an appropriate return value in regs->r3.
+ *
+ * Return: the (possibly changed) syscall number.
+ */
+long do_syscall_trace_enter(struct pt_regs *regs)
+{
+ u32 flags;
+
+ flags = read_thread_flags() & (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE);
+
+ if (flags) {
+ int rc = ptrace_report_syscall_entry(regs);
+
+ if (unlikely(flags & _TIF_SYSCALL_EMU)) {
+ /*
+ * A nonzero return code from
+ * ptrace_report_syscall_entry() tells us to prevent
+ * the syscall execution, but we are not going to
+ * execute it anyway.
+ *
+ * Returning -1 will skip the syscall execution. We want
+ * to avoid clobbering any registers, so we don't goto
+ * the skip label below.
+ */
+ return -1;
+ }
+
+ if (rc) {
+ /*
+ * The tracer decided to abort the syscall. Note that
+ * the tracer may also just change regs->gpr[0] to an
+ * invalid syscall number, that is handled below on the
+ * exit path.
+ */
+ goto skip;
+ }
+ }
+
+ /* Run seccomp after ptrace; allow it to set gpr[3]. */
+ if (do_seccomp(regs))
+ return -1;
+
+ /* Avoid trace and audit when syscall is invalid. */
+ if (regs->gpr[0] >= NR_syscalls)
+ goto skip;
+
+ if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
+ trace_sys_enter(regs, regs->gpr[0]);
+
+ if (!is_32bit_task())
+ audit_syscall_entry(regs->gpr[0], regs->gpr[3], regs->gpr[4],
+ regs->gpr[5], regs->gpr[6]);
+ else
+ audit_syscall_entry(regs->gpr[0],
+ regs->gpr[3] & 0xffffffff,
+ regs->gpr[4] & 0xffffffff,
+ regs->gpr[5] & 0xffffffff,
+ regs->gpr[6] & 0xffffffff);
+
+ /* Return the possibly modified but valid syscall number */
+ return regs->gpr[0];
+
+skip:
+ /*
+ * If we are aborting explicitly, or if the syscall number is
+ * now invalid, set the return value to -ENOSYS.
+ */
+ regs->gpr[3] = -ENOSYS;
+ return -1;
+}
+
+void do_syscall_trace_leave(struct pt_regs *regs)
+{
+ int step;
+
+ audit_syscall_exit(regs);
+
+ if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
+ trace_sys_exit(regs, regs->result);
+
+ step = test_thread_flag(TIF_SINGLESTEP);
+ if (step || test_thread_flag(TIF_SYSCALL_TRACE))
+ ptrace_report_syscall_exit(regs, step);
+}
+
+void __init pt_regs_check(void);
+
+/*
+ * Dummy function, its purpose is to break the build if struct pt_regs and
+ * struct user_pt_regs don't match.
+ */
+void __init pt_regs_check(void)
+{
+ BUILD_BUG_ON(offsetof(struct pt_regs, gpr) !=
+ offsetof(struct user_pt_regs, gpr));
+ BUILD_BUG_ON(offsetof(struct pt_regs, nip) !=
+ offsetof(struct user_pt_regs, nip));
+ BUILD_BUG_ON(offsetof(struct pt_regs, msr) !=
+ offsetof(struct user_pt_regs, msr));
+ BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
+ offsetof(struct user_pt_regs, orig_gpr3));
+ BUILD_BUG_ON(offsetof(struct pt_regs, ctr) !=
+ offsetof(struct user_pt_regs, ctr));
+ BUILD_BUG_ON(offsetof(struct pt_regs, link) !=
+ offsetof(struct user_pt_regs, link));
+ BUILD_BUG_ON(offsetof(struct pt_regs, xer) !=
+ offsetof(struct user_pt_regs, xer));
+ BUILD_BUG_ON(offsetof(struct pt_regs, ccr) !=
+ offsetof(struct user_pt_regs, ccr));
+#ifdef __powerpc64__
+ BUILD_BUG_ON(offsetof(struct pt_regs, softe) !=
+ offsetof(struct user_pt_regs, softe));
+#else
+ BUILD_BUG_ON(offsetof(struct pt_regs, mq) !=
+ offsetof(struct user_pt_regs, mq));
+#endif
+ BUILD_BUG_ON(offsetof(struct pt_regs, trap) !=
+ offsetof(struct user_pt_regs, trap));
+ BUILD_BUG_ON(offsetof(struct pt_regs, dar) !=
+ offsetof(struct user_pt_regs, dar));
+ BUILD_BUG_ON(offsetof(struct pt_regs, dear) !=
+ offsetof(struct user_pt_regs, dar));
+ BUILD_BUG_ON(offsetof(struct pt_regs, dsisr) !=
+ offsetof(struct user_pt_regs, dsisr));
+ BUILD_BUG_ON(offsetof(struct pt_regs, esr) !=
+ offsetof(struct user_pt_regs, dsisr));
+ BUILD_BUG_ON(offsetof(struct pt_regs, result) !=
+ offsetof(struct user_pt_regs, result));
+
+ BUILD_BUG_ON(sizeof(struct user_pt_regs) > sizeof(struct pt_regs));
+
+ // Now check that the pt_regs offsets match the uapi #defines
+ #define CHECK_REG(_pt, _reg) \
+ BUILD_BUG_ON(_pt != (offsetof(struct user_pt_regs, _reg) / \
+ sizeof(unsigned long)));
+
+ CHECK_REG(PT_R0, gpr[0]);
+ CHECK_REG(PT_R1, gpr[1]);
+ CHECK_REG(PT_R2, gpr[2]);
+ CHECK_REG(PT_R3, gpr[3]);
+ CHECK_REG(PT_R4, gpr[4]);
+ CHECK_REG(PT_R5, gpr[5]);
+ CHECK_REG(PT_R6, gpr[6]);
+ CHECK_REG(PT_R7, gpr[7]);
+ CHECK_REG(PT_R8, gpr[8]);
+ CHECK_REG(PT_R9, gpr[9]);
+ CHECK_REG(PT_R10, gpr[10]);
+ CHECK_REG(PT_R11, gpr[11]);
+ CHECK_REG(PT_R12, gpr[12]);
+ CHECK_REG(PT_R13, gpr[13]);
+ CHECK_REG(PT_R14, gpr[14]);
+ CHECK_REG(PT_R15, gpr[15]);
+ CHECK_REG(PT_R16, gpr[16]);
+ CHECK_REG(PT_R17, gpr[17]);
+ CHECK_REG(PT_R18, gpr[18]);
+ CHECK_REG(PT_R19, gpr[19]);
+ CHECK_REG(PT_R20, gpr[20]);
+ CHECK_REG(PT_R21, gpr[21]);
+ CHECK_REG(PT_R22, gpr[22]);
+ CHECK_REG(PT_R23, gpr[23]);
+ CHECK_REG(PT_R24, gpr[24]);
+ CHECK_REG(PT_R25, gpr[25]);
+ CHECK_REG(PT_R26, gpr[26]);
+ CHECK_REG(PT_R27, gpr[27]);
+ CHECK_REG(PT_R28, gpr[28]);
+ CHECK_REG(PT_R29, gpr[29]);
+ CHECK_REG(PT_R30, gpr[30]);
+ CHECK_REG(PT_R31, gpr[31]);
+ CHECK_REG(PT_NIP, nip);
+ CHECK_REG(PT_MSR, msr);
+ CHECK_REG(PT_ORIG_R3, orig_gpr3);
+ CHECK_REG(PT_CTR, ctr);
+ CHECK_REG(PT_LNK, link);
+ CHECK_REG(PT_XER, xer);
+ CHECK_REG(PT_CCR, ccr);
+#ifdef CONFIG_PPC64
+ CHECK_REG(PT_SOFTE, softe);
+#else
+ CHECK_REG(PT_MQ, mq);
+#endif
+ CHECK_REG(PT_TRAP, trap);
+ CHECK_REG(PT_DAR, dar);
+ CHECK_REG(PT_DSISR, dsisr);
+ CHECK_REG(PT_RESULT, result);
+ #undef CHECK_REG
+
+ BUILD_BUG_ON(PT_REGS_COUNT != sizeof(struct user_pt_regs) / sizeof(unsigned long));
+
+ /*
+ * PT_DSCR isn't a real reg, but it's important that it doesn't overlap the
+ * real registers.
+ */
+ BUILD_BUG_ON(PT_DSCR < sizeof(struct user_pt_regs) / sizeof(unsigned long));
+
+ // ptrace_get/put_fpr() rely on PPC32 and VSX being incompatible
+ BUILD_BUG_ON(IS_ENABLED(CONFIG_PPC32) && IS_ENABLED(CONFIG_VSX));
+}
diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace/ptrace32.c
index f52b7db327c8..19c224808982 100644
--- a/arch/powerpc/kernel/ptrace32.c
+++ b/arch/powerpc/kernel/ptrace/ptrace32.c
@@ -17,23 +17,14 @@
* this archive for more details.
*/
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/errno.h>
#include <linux/ptrace.h>
#include <linux/regset.h>
-#include <linux/user.h>
-#include <linux/security.h>
-#include <linux/signal.h>
#include <linux/compat.h>
-#include <asm/uaccess.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
#include <asm/switch_to.h>
+#include "ptrace-decl.h"
+
/*
* does not yet catch signals sent when the child dies.
* in exit.c or in signal.c.
@@ -73,8 +64,8 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
if (get_user(addrOthers, (u32 __user * __user *)addr) != 0)
break;
- copied = access_process_vm(child, (u64)addrOthers, &tmp,
- sizeof(tmp), 0);
+ copied = ptrace_access_vm(child, (u64)addrOthers, &tmp,
+ sizeof(tmp), FOLL_FORCE);
if (copied != sizeof(tmp))
break;
ret = put_user(tmp, (u32 __user *)data);
@@ -92,7 +83,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
if ((addr & 3) || (index > PT_FPSCR32))
break;
- CHECK_FULL_REGS(child->thread.regs);
if (index < PT_FPR0) {
ret = ptrace_get_reg(child, index, &tmp);
if (ret)
@@ -142,7 +132,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
if ((addr & 3) || numReg > PT_FPSCR)
break;
- CHECK_FULL_REGS(child->thread.regs);
if (numReg >= PT_FPR0) {
flush_fp_to_thread(child);
/* get 64 bit FPR */
@@ -178,8 +167,9 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
if (get_user(addrOthers, (u32 __user * __user *)addr) != 0)
break;
ret = 0;
- if (access_process_vm(child, (u64)addrOthers, &tmp,
- sizeof(tmp), 1) == sizeof(tmp))
+ if (ptrace_access_vm(child, (u64)addrOthers, &tmp,
+ sizeof(tmp),
+ FOLL_FORCE | FOLL_WRITE) == sizeof(tmp))
break;
ret = -EIO;
break;
@@ -195,7 +185,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
if ((addr & 3) || (index > PT_FPSCR32))
break;
- CHECK_FULL_REGS(child->thread.regs);
if (index < PT_FPR0) {
ret = ptrace_put_reg(child, index, data);
} else {
@@ -234,7 +223,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
*/
if ((addr & 3) || (numReg > PT_FPSCR))
break;
- CHECK_FULL_REGS(child->thread.regs);
if (numReg < PT_FPR0) {
unsigned long freg;
ret = ptrace_get_reg(child, numReg, &freg);
@@ -269,8 +257,8 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
ret = put_user(child->thread.debug.dac1, (u32 __user *)data);
#else
dabr_fake = (
- (child->thread.hw_brk.address & (~HW_BRK_TYPE_DABR)) |
- (child->thread.hw_brk.type & HW_BRK_TYPE_DABR));
+ (child->thread.hw_brk[0].address & (~HW_BRK_TYPE_DABR)) |
+ (child->thread.hw_brk[0].type & HW_BRK_TYPE_DABR));
ret = put_user(dabr_fake, (u32 __user *)data);
#endif
break;
diff --git a/arch/powerpc/kernel/reloc_32.S b/arch/powerpc/kernel/reloc_32.S
index f366fedb0872..0508c14b4c28 100644
--- a/arch/powerpc/kernel/reloc_32.S
+++ b/arch/powerpc/kernel/reloc_32.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Code to process dynamic relocations for PPC32.
*
@@ -5,11 +6,6 @@
* Author: Suzuki Poulose <suzuki@in.ibm.com>
*
* - Based on ppc64 code - reloc_64.S
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <asm/ppc_asm.h>
@@ -34,7 +30,7 @@ R_PPC_RELATIVE = 22
_GLOBAL(relocate)
mflr r0 /* Save our LR */
- bl 0f /* Find our current runtime address */
+ bcl 20,31,$+4 /* Find our current runtime address */
0: mflr r12 /* Make it accessible */
mtlr r0
diff --git a/arch/powerpc/kernel/reloc_64.S b/arch/powerpc/kernel/reloc_64.S
index d88736fbece6..efd52f2e7033 100644
--- a/arch/powerpc/kernel/reloc_64.S
+++ b/arch/powerpc/kernel/reloc_64.S
@@ -1,19 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Code to process dynamic relocations in the kernel.
*
* Copyright 2008 Paul Mackerras, IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <asm/ppc_asm.h>
RELA = 7
-RELACOUNT = 0x6ffffff9
+RELASZ = 8
+RELAENT = 9
R_PPC64_RELATIVE = 22
+R_PPC64_UADDR64 = 43
/*
* r3 = desired final address of kernel
@@ -29,29 +27,38 @@ _GLOBAL(relocate)
add r9,r9,r12 /* r9 has runtime addr of .rela.dyn section */
ld r10,(p_st - 0b)(r12)
add r10,r10,r12 /* r10 has runtime addr of _stext */
+ ld r4,(p_sym - 0b)(r12)
+ add r4,r4,r12 /* r4 has runtime addr of .dynsym */
/*
- * Scan the dynamic section for the RELA and RELACOUNT entries.
+ * Scan the dynamic section for the RELA, RELASZ and RELAENT entries.
*/
li r7,0
li r8,0
-1: ld r6,0(r11) /* get tag */
+.Ltags:
+ ld r6,0(r11) /* get tag */
cmpdi r6,0
- beq 4f /* end of list */
+ beq .Lend_of_list /* end of list */
cmpdi r6,RELA
bne 2f
ld r7,8(r11) /* get RELA pointer in r7 */
- b 3f
-2: addis r6,r6,(-RELACOUNT)@ha
- cmpdi r6,RELACOUNT@l
+ b 4f
+2: cmpdi r6,RELASZ
bne 3f
- ld r8,8(r11) /* get RELACOUNT value in r8 */
-3: addi r11,r11,16
- b 1b
-4: cmpdi r7,0 /* check we have both RELA and RELACOUNT */
+ ld r8,8(r11) /* get RELASZ value in r8 */
+ b 4f
+3: cmpdi r6,RELAENT
+ bne 4f
+ ld r12,8(r11) /* get RELAENT value in r12 */
+4: addi r11,r11,16
+ b .Ltags
+.Lend_of_list:
+ cmpdi r7,0 /* check we have RELA, RELASZ, RELAENT */
cmpdi cr1,r8,0
- beq 6f
- beq cr1,6f
+ beq .Lout
+ beq cr1,.Lout
+ cmpdi r12,0
+ beq .Lout
/*
* Work out linktime address of _stext and hence the
@@ -66,23 +73,39 @@ _GLOBAL(relocate)
/*
* Run through the list of relocations and process the
- * R_PPC64_RELATIVE ones.
+ * R_PPC64_RELATIVE and R_PPC64_UADDR64 ones.
*/
+ divd r8,r8,r12 /* RELASZ / RELAENT */
mtctr r8
-5: ld r0,8(9) /* ELF64_R_TYPE(reloc->r_info) */
+.Lrels: ld r0,8(r9) /* ELF64_R_TYPE(reloc->r_info) */
cmpdi r0,R_PPC64_RELATIVE
- bne 6f
+ bne .Luaddr64
ld r6,0(r9) /* reloc->r_offset */
ld r0,16(r9) /* reloc->r_addend */
+ b .Lstore
+.Luaddr64:
+ srdi r5,r0,32 /* ELF64_R_SYM(reloc->r_info) */
+ clrldi r0,r0,32
+ cmpdi r0,R_PPC64_UADDR64
+ bne .Lnext
+ ld r6,0(r9)
+ ld r0,16(r9)
+ mulli r5,r5,24 /* 24 == sizeof(elf64_sym) */
+ add r5,r5,r4 /* elf64_sym[ELF64_R_SYM] */
+ ld r5,8(r5)
+ add r0,r0,r5
+.Lstore:
add r0,r0,r3
stdx r0,r7,r6
- addi r9,r9,24
- bdnz 5b
-
-6: blr
+.Lnext:
+ add r9,r9,r12
+ bdnz .Lrels
+.Lout:
+ blr
.balign 8
-p_dyn: .llong __dynamic_start - 0b
-p_rela: .llong __rela_dyn_start - 0b
-p_st: .llong _stext - 0b
+p_dyn: .8byte __dynamic_start - 0b
+p_rela: .8byte __rela_dyn_start - 0b
+p_sym: .8byte __dynamic_symtab - 0b
+p_st: .8byte _stext - 0b
diff --git a/arch/powerpc/kernel/rethook.c b/arch/powerpc/kernel/rethook.c
new file mode 100644
index 000000000000..5f5f47ae82cf
--- /dev/null
+++ b/arch/powerpc/kernel/rethook.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * PowerPC implementation of rethook. This depends on kprobes.
+ */
+
+#include <linux/kprobes.h>
+#include <linux/rethook.h>
+
+/*
+ * Function return trampoline:
+ * - init_kprobes() establishes a probepoint here
+ * - When the probed function returns, this probe
+ * causes the handlers to fire
+ */
+asm(".global arch_rethook_trampoline\n"
+ ".type arch_rethook_trampoline, @function\n"
+ "arch_rethook_trampoline:\n"
+ "nop\n"
+ "blr\n"
+ ".size arch_rethook_trampoline, .-arch_rethook_trampoline\n");
+
+/*
+ * Called when the probe at kretprobe trampoline is hit
+ */
+static int trampoline_rethook_handler(struct kprobe *p, struct pt_regs *regs)
+{
+ return !rethook_trampoline_handler(regs, regs->gpr[1]);
+}
+NOKPROBE_SYMBOL(trampoline_rethook_handler);
+
+void arch_rethook_prepare(struct rethook_node *rh, struct pt_regs *regs, bool mcount)
+{
+ rh->ret_addr = regs->link;
+ rh->frame = regs->gpr[1];
+
+ /* Replace the return addr with trampoline addr */
+ regs->link = (unsigned long)arch_rethook_trampoline;
+}
+NOKPROBE_SYMBOL(arch_rethook_prepare);
+
+/* This is called from rethook_trampoline_handler(). */
+void arch_rethook_fixup_return(struct pt_regs *regs, unsigned long orig_ret_address)
+{
+ /*
+ * We get here through one of two paths:
+ * 1. by taking a trap -> kprobe_handler() -> here
+ * 2. by optprobe branch -> optimized_callback() -> opt_pre_handler() -> here
+ *
+ * When going back through (1), we need regs->nip to be setup properly
+ * as it is used to determine the return address from the trap.
+ * For (2), since nip is not honoured with optprobes, we instead setup
+ * the link register properly so that the subsequent 'blr' in
+ * arch_rethook_trampoline jumps back to the right instruction.
+ *
+ * For nip, we should set the address to the previous instruction since
+ * we end up emulating it in kprobe_handler(), which increments the nip
+ * again.
+ */
+ regs_set_return_ip(regs, orig_ret_address - 4);
+ regs->link = orig_ret_address;
+}
+NOKPROBE_SYMBOL(arch_rethook_fixup_return);
+
+static struct kprobe trampoline_p = {
+ .addr = (kprobe_opcode_t *) &arch_rethook_trampoline,
+ .pre_handler = trampoline_rethook_handler
+};
+
+/* rethook initializer */
+int __init arch_init_kprobes(void)
+{
+ return register_kprobe(&trampoline_p);
+}
diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c
index 8777fb02349f..f38df72e64b8 100644
--- a/arch/powerpc/kernel/rtas-proc.c
+++ b/arch/powerpc/kernel/rtas-proc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2000 Tilmann Bitterberg
* (tilmann@bitterberg.de)
@@ -23,11 +24,11 @@
#include <linux/seq_file.h>
#include <linux/bitops.h>
#include <linux/rtc.h>
+#include <linux/of.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/processor.h>
#include <asm/io.h>
-#include <asm/prom.h>
#include <asm/rtas.h>
#include <asm/machdep.h> /* for ppc_md */
#include <asm/time.h>
@@ -113,17 +114,6 @@
#define SENSOR_PREFIX "ibm,sensor-"
#define cel_to_fahr(x) ((x*9/5)+32)
-
-/* Globals */
-static struct rtas_sensors sensors;
-static struct device_node *rtas_node = NULL;
-static unsigned long power_on_time = 0; /* Save the time the user set */
-static char progress_led[MAX_LINELENGTH];
-
-static unsigned long rtas_tone_frequency = 1000;
-static unsigned long rtas_tone_volume = 0;
-
-/* ****************STRUCTS******************************************* */
struct individual_sensor {
unsigned int token;
unsigned int quant;
@@ -134,6 +124,15 @@ struct rtas_sensors {
unsigned int quant;
};
+/* Globals */
+static struct rtas_sensors sensors;
+static struct device_node *rtas_node = NULL;
+static unsigned long power_on_time = 0; /* Save the time the user set */
+static char progress_led[MAX_LINELENGTH];
+
+static unsigned long rtas_tone_frequency = 1000;
+static unsigned long rtas_tone_volume = 0;
+
/* ****************************************************************** */
/* Declarations */
static int ppc_rtas_sensors_show(struct seq_file *m, void *v);
@@ -155,29 +154,17 @@ static ssize_t ppc_rtas_tone_volume_write(struct file *file,
static int ppc_rtas_tone_volume_show(struct seq_file *m, void *v);
static int ppc_rtas_rmo_buf_show(struct seq_file *m, void *v);
-static int sensors_open(struct inode *inode, struct file *file)
-{
- return single_open(file, ppc_rtas_sensors_show, NULL);
-}
-
-static const struct file_operations ppc_rtas_sensors_operations = {
- .open = sensors_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
static int poweron_open(struct inode *inode, struct file *file)
{
return single_open(file, ppc_rtas_poweron_show, NULL);
}
-static const struct file_operations ppc_rtas_poweron_operations = {
- .open = poweron_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .write = ppc_rtas_poweron_write,
- .release = single_release,
+static const struct proc_ops ppc_rtas_poweron_proc_ops = {
+ .proc_open = poweron_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_write = ppc_rtas_poweron_write,
+ .proc_release = single_release,
};
static int progress_open(struct inode *inode, struct file *file)
@@ -185,12 +172,12 @@ static int progress_open(struct inode *inode, struct file *file)
return single_open(file, ppc_rtas_progress_show, NULL);
}
-static const struct file_operations ppc_rtas_progress_operations = {
- .open = progress_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .write = ppc_rtas_progress_write,
- .release = single_release,
+static const struct proc_ops ppc_rtas_progress_proc_ops = {
+ .proc_open = progress_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_write = ppc_rtas_progress_write,
+ .proc_release = single_release,
};
static int clock_open(struct inode *inode, struct file *file)
@@ -198,12 +185,12 @@ static int clock_open(struct inode *inode, struct file *file)
return single_open(file, ppc_rtas_clock_show, NULL);
}
-static const struct file_operations ppc_rtas_clock_operations = {
- .open = clock_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .write = ppc_rtas_clock_write,
- .release = single_release,
+static const struct proc_ops ppc_rtas_clock_proc_ops = {
+ .proc_open = clock_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_write = ppc_rtas_clock_write,
+ .proc_release = single_release,
};
static int tone_freq_open(struct inode *inode, struct file *file)
@@ -211,12 +198,12 @@ static int tone_freq_open(struct inode *inode, struct file *file)
return single_open(file, ppc_rtas_tone_freq_show, NULL);
}
-static const struct file_operations ppc_rtas_tone_freq_operations = {
- .open = tone_freq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .write = ppc_rtas_tone_freq_write,
- .release = single_release,
+static const struct proc_ops ppc_rtas_tone_freq_proc_ops = {
+ .proc_open = tone_freq_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_write = ppc_rtas_tone_freq_write,
+ .proc_release = single_release,
};
static int tone_volume_open(struct inode *inode, struct file *file)
@@ -224,24 +211,12 @@ static int tone_volume_open(struct inode *inode, struct file *file)
return single_open(file, ppc_rtas_tone_volume_show, NULL);
}
-static const struct file_operations ppc_rtas_tone_volume_operations = {
- .open = tone_volume_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .write = ppc_rtas_tone_volume_write,
- .release = single_release,
-};
-
-static int rmo_buf_open(struct inode *inode, struct file *file)
-{
- return single_open(file, ppc_rtas_rmo_buf_show, NULL);
-}
-
-static const struct file_operations ppc_rtas_rmo_buf_ops = {
- .open = rmo_buf_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
+static const struct proc_ops ppc_rtas_tone_volume_proc_ops = {
+ .proc_open = tone_volume_open,
+ .proc_read = seq_read,
+ .proc_lseek = seq_lseek,
+ .proc_write = ppc_rtas_tone_volume_write,
+ .proc_release = single_release,
};
static int ppc_rtas_find_all_sensors(void);
@@ -262,29 +237,28 @@ static int __init proc_rtas_init(void)
if (rtas_node == NULL)
return -ENODEV;
- proc_create("powerpc/rtas/progress", S_IRUGO|S_IWUSR, NULL,
- &ppc_rtas_progress_operations);
- proc_create("powerpc/rtas/clock", S_IRUGO|S_IWUSR, NULL,
- &ppc_rtas_clock_operations);
- proc_create("powerpc/rtas/poweron", S_IWUSR|S_IRUGO, NULL,
- &ppc_rtas_poweron_operations);
- proc_create("powerpc/rtas/sensors", S_IRUGO, NULL,
- &ppc_rtas_sensors_operations);
- proc_create("powerpc/rtas/frequency", S_IWUSR|S_IRUGO, NULL,
- &ppc_rtas_tone_freq_operations);
- proc_create("powerpc/rtas/volume", S_IWUSR|S_IRUGO, NULL,
- &ppc_rtas_tone_volume_operations);
- proc_create("powerpc/rtas/rmo_buffer", S_IRUSR, NULL,
- &ppc_rtas_rmo_buf_ops);
+ proc_create("powerpc/rtas/progress", 0644, NULL,
+ &ppc_rtas_progress_proc_ops);
+ proc_create("powerpc/rtas/clock", 0644, NULL,
+ &ppc_rtas_clock_proc_ops);
+ proc_create("powerpc/rtas/poweron", 0644, NULL,
+ &ppc_rtas_poweron_proc_ops);
+ proc_create_single("powerpc/rtas/sensors", 0444, NULL,
+ ppc_rtas_sensors_show);
+ proc_create("powerpc/rtas/frequency", 0644, NULL,
+ &ppc_rtas_tone_freq_proc_ops);
+ proc_create("powerpc/rtas/volume", 0644, NULL,
+ &ppc_rtas_tone_volume_proc_ops);
+ proc_create_single("powerpc/rtas/rmo_buffer", 0400, NULL,
+ ppc_rtas_rmo_buf_show);
return 0;
}
__initcall(proc_rtas_init);
-static int parse_number(const char __user *p, size_t count, unsigned long *val)
+static int parse_number(const char __user *p, size_t count, u64 *val)
{
char buf[40];
- char *end;
if (count > 39)
return -EINVAL;
@@ -294,11 +268,7 @@ static int parse_number(const char __user *p, size_t count, unsigned long *val)
buf[count] = 0;
- *val = simple_strtoul(buf, &end, 10);
- if (*end && *end != '\n')
- return -EINVAL;
-
- return 0;
+ return kstrtoull(buf, 10, val);
}
/* ****************************************************************** */
@@ -308,18 +278,18 @@ static ssize_t ppc_rtas_poweron_write(struct file *file,
const char __user *buf, size_t count, loff_t *ppos)
{
struct rtc_time tm;
- unsigned long nowtime;
+ time64_t nowtime;
int error = parse_number(buf, count, &nowtime);
if (error)
return error;
power_on_time = nowtime; /* save the time */
- to_tm(nowtime, &tm);
+ rtc_time64_to_tm(nowtime, &tm);
- error = rtas_call(rtas_token("set-time-for-power-on"), 7, 1, NULL,
- tm.tm_year, tm.tm_mon, tm.tm_mday,
- tm.tm_hour, tm.tm_min, tm.tm_sec, 0 /* nano */);
+ error = rtas_call(rtas_function_token(RTAS_FN_SET_TIME_FOR_POWER_ON), 7, 1, NULL,
+ tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
+ tm.tm_hour, tm.tm_min, tm.tm_sec, 0 /* nano */);
if (error)
printk(KERN_WARNING "error: setting poweron time returned: %s\n",
ppc_rtas_process_error(error));
@@ -374,15 +344,15 @@ static ssize_t ppc_rtas_clock_write(struct file *file,
const char __user *buf, size_t count, loff_t *ppos)
{
struct rtc_time tm;
- unsigned long nowtime;
+ time64_t nowtime;
int error = parse_number(buf, count, &nowtime);
if (error)
return error;
- to_tm(nowtime, &tm);
- error = rtas_call(rtas_token("set-time-of-day"), 7, 1, NULL,
- tm.tm_year, tm.tm_mon, tm.tm_mday,
- tm.tm_hour, tm.tm_min, tm.tm_sec, 0);
+ rtc_time64_to_tm(nowtime, &tm);
+ error = rtas_call(rtas_function_token(RTAS_FN_SET_TIME_OF_DAY), 7, 1, NULL,
+ tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
+ tm.tm_hour, tm.tm_min, tm.tm_sec, 0);
if (error)
printk(KERN_WARNING "error: setting the clock returned: %s\n",
ppc_rtas_process_error(error));
@@ -392,7 +362,7 @@ static ssize_t ppc_rtas_clock_write(struct file *file,
static int ppc_rtas_clock_show(struct seq_file *m, void *v)
{
int ret[8];
- int error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret);
+ int error = rtas_call(rtas_function_token(RTAS_FN_GET_TIME_OF_DAY), 0, 8, ret);
if (error) {
printk(KERN_WARNING "error: reading the clock returned: %s\n",
@@ -402,8 +372,8 @@ static int ppc_rtas_clock_show(struct seq_file *m, void *v)
unsigned int year, mon, day, hour, min, sec;
year = ret[0]; mon = ret[1]; day = ret[2];
hour = ret[3]; min = ret[4]; sec = ret[5];
- seq_printf(m, "%lu\n",
- mktime(year, mon, day, hour, min, sec));
+ seq_printf(m, "%lld\n",
+ mktime64(year, mon, day, hour, min, sec));
}
return 0;
}
@@ -415,7 +385,7 @@ static int ppc_rtas_sensors_show(struct seq_file *m, void *v)
{
int i,j;
int state, error;
- int get_sensor_state = rtas_token("get-sensor-state");
+ int get_sensor_state = rtas_function_token(RTAS_FN_GET_SENSOR_STATE);
seq_printf(m, "RTAS (RunTime Abstraction Services) Sensor Information\n");
seq_printf(m, "Sensor\t\tValue\t\tCondition\tLocation\n");
@@ -529,7 +499,7 @@ static void ppc_rtas_process_sensor(struct seq_file *m,
"EPOW power off" };
const char * battery_cyclestate[] = { "None", "In progress",
"Requested" };
- const char * battery_charging[] = { "Charging", "Discharching",
+ const char * battery_charging[] = { "Charging", "Discharging",
"No current flow" };
const char * ibm_drconnector[] = { "Empty", "Present", "Unusable",
"Exchange" };
@@ -700,7 +670,7 @@ static void check_location(struct seq_file *m, const char *c)
/*
* Format:
* ${LETTER}${NUMBER}[[-/]${LETTER}${NUMBER} [ ... ] ]
- * the '.' may be an abbrevation
+ * the '.' may be an abbreviation
*/
static void check_location_string(struct seq_file *m, const char *c)
{
@@ -732,14 +702,14 @@ static void get_location_code(struct seq_file *m, struct individual_sensor *s,
static ssize_t ppc_rtas_tone_freq_write(struct file *file,
const char __user *buf, size_t count, loff_t *ppos)
{
- unsigned long freq;
+ u64 freq;
int error = parse_number(buf, count, &freq);
if (error)
return error;
rtas_tone_frequency = freq; /* save it for later */
- error = rtas_call(rtas_token("set-indicator"), 3, 1, NULL,
- TONE_FREQUENCY, 0, freq);
+ error = rtas_call(rtas_function_token(RTAS_FN_SET_INDICATOR), 3, 1, NULL,
+ TONE_FREQUENCY, 0, freq);
if (error)
printk(KERN_WARNING "error: setting tone frequency returned: %s\n",
ppc_rtas_process_error(error));
@@ -757,7 +727,7 @@ static int ppc_rtas_tone_freq_show(struct seq_file *m, void *v)
static ssize_t ppc_rtas_tone_volume_write(struct file *file,
const char __user *buf, size_t count, loff_t *ppos)
{
- unsigned long volume;
+ u64 volume;
int error = parse_number(buf, count, &volume);
if (error)
return error;
@@ -766,8 +736,8 @@ static ssize_t ppc_rtas_tone_volume_write(struct file *file,
volume = 100;
rtas_tone_volume = volume; /* save it for later */
- error = rtas_call(rtas_token("set-indicator"), 3, 1, NULL,
- TONE_VOLUME, 0, volume);
+ error = rtas_call(rtas_function_token(RTAS_FN_SET_INDICATOR), 3, 1, NULL,
+ TONE_VOLUME, 0, volume);
if (error)
printk(KERN_WARNING "error: setting tone volume returned: %s\n",
ppc_rtas_process_error(error));
@@ -780,11 +750,20 @@ static int ppc_rtas_tone_volume_show(struct seq_file *m, void *v)
return 0;
}
-#define RMO_READ_BUF_MAX 30
-
-/* RTAS Userspace access */
+/**
+ * ppc_rtas_rmo_buf_show() - Describe RTAS-addressable region for user space.
+ * @m: seq_file output target.
+ * @v: Unused.
+ *
+ * Base + size description of a range of RTAS-addressable memory set
+ * aside for user space to use as work area(s) for certain RTAS
+ * functions. User space accesses this region via /dev/mem. Apart from
+ * security policies, the kernel does not arbitrate or serialize
+ * access to this region, and user space must ensure that concurrent
+ * users do not interfere with each other.
+ */
static int ppc_rtas_rmo_buf_show(struct seq_file *m, void *v)
{
- seq_printf(m, "%016lx %x\n", rtas_rmo_buf, RTAS_RMOBUF_MAX);
+ seq_printf(m, "%016lx %x\n", rtas_rmo_buf, RTAS_USER_REGION_SIZE);
return 0;
}
diff --git a/arch/powerpc/kernel/rtas-rtc.c b/arch/powerpc/kernel/rtas-rtc.c
index c57c19358a26..6996214532bd 100644
--- a/arch/powerpc/kernel/rtas-rtc.c
+++ b/arch/powerpc/kernel/rtas-rtc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/kernel.h>
#include <linux/time.h>
#include <linux/timer.h>
@@ -5,14 +6,13 @@
#include <linux/rtc.h>
#include <linux/delay.h>
#include <linux/ratelimit.h>
-#include <asm/prom.h>
#include <asm/rtas.h>
#include <asm/time.h>
#define MAX_RTC_WAIT 5000 /* 5 sec */
-#define RTAS_CLOCK_BUSY (-2)
-unsigned long __init rtas_get_boot_time(void)
+
+time64_t __init rtas_get_boot_time(void)
{
int ret[8];
int error;
@@ -21,7 +21,7 @@ unsigned long __init rtas_get_boot_time(void)
max_wait_tb = get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT;
do {
- error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret);
+ error = rtas_call(rtas_function_token(RTAS_FN_GET_TIME_OF_DAY), 0, 8, ret);
wait_time = rtas_busy_delay_time(error);
if (wait_time) {
@@ -37,7 +37,7 @@ unsigned long __init rtas_get_boot_time(void)
return 0;
}
- return mktime(ret[0], ret[1], ret[2], ret[3], ret[4], ret[5]);
+ return mktime64(ret[0], ret[1], ret[2], ret[3], ret[4], ret[5]);
}
/* NOTE: get_rtc_time will get an error if executed in interrupt context
@@ -53,7 +53,7 @@ void rtas_get_rtc_time(struct rtc_time *rtc_tm)
max_wait_tb = get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT;
do {
- error = rtas_call(rtas_token("get-time-of-day"), 0, 8, ret);
+ error = rtas_call(rtas_function_token(RTAS_FN_GET_TIME_OF_DAY), 0, 8, ret);
wait_time = rtas_busy_delay_time(error);
if (wait_time) {
@@ -90,7 +90,7 @@ int rtas_set_rtc_time(struct rtc_time *tm)
max_wait_tb = get_tb() + tb_ticks_per_usec * 1000 * MAX_RTC_WAIT;
do {
- error = rtas_call(rtas_token("set-time-of-day"), 7, 1, NULL,
+ error = rtas_call(rtas_function_token(RTAS_FN_SET_TIME_OF_DAY), 7, 1, NULL,
tm->tm_year + 1900, tm->tm_mon + 1,
tm->tm_mday, tm->tm_hour, tm->tm_min,
tm->tm_sec, 0);
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 8b4c857c1421..8d81c1e7a8db 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -1,204 +1,826 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
*
* Procedures for interfacing to the RTAS on CHRP machines.
*
* Peter Bergner, IBM March 2001.
* Copyright (C) 2001 IBM.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
-#include <stdarg.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/spinlock.h>
-#include <linux/export.h>
-#include <linux/init.h>
+#define pr_fmt(fmt) "rtas: " fmt
+
+#include <linux/bsearch.h>
#include <linux/capability.h>
#include <linux/delay.h>
-#include <linux/cpu.h>
-#include <linux/smp.h>
-#include <linux/completion.h>
-#include <linux/cpumask.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/kconfig.h>
+#include <linux/kernel.h>
+#include <linux/lockdep.h>
#include <linux/memblock.h>
-#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/nospec.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
#include <linux/reboot.h>
+#include <linux/sched.h>
+#include <linux/security.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/stdarg.h>
+#include <linux/syscalls.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <linux/xarray.h>
-#include <asm/prom.h>
-#include <asm/rtas.h>
-#include <asm/hvcall.h>
-#include <asm/machdep.h>
+#include <asm/delay.h>
#include <asm/firmware.h>
+#include <asm/interrupt.h>
+#include <asm/machdep.h>
+#include <asm/mmu.h>
#include <asm/page.h>
-#include <asm/param.h>
-#include <asm/delay.h>
-#include <asm/uaccess.h>
-#include <asm/udbg.h>
-#include <asm/syscalls.h>
-#include <asm/smp.h>
-#include <linux/atomic.h>
+#include <asm/rtas-work-area.h>
+#include <asm/rtas.h>
#include <asm/time.h>
-#include <asm/mmu.h>
-#include <asm/topology.h>
+#include <asm/trace.h>
+#include <asm/udbg.h>
-struct rtas_t rtas = {
- .lock = __ARCH_SPIN_LOCK_UNLOCKED
+struct rtas_filter {
+ /* Indexes into the args buffer, -1 if not used */
+ const int buf_idx1;
+ const int size_idx1;
+ const int buf_idx2;
+ const int size_idx2;
+ /*
+ * Assumed buffer size per the spec if the function does not
+ * have a size parameter, e.g. ibm,errinjct. 0 if unused.
+ */
+ const int fixed_size;
};
-EXPORT_SYMBOL(rtas);
-DEFINE_SPINLOCK(rtas_data_buf_lock);
-EXPORT_SYMBOL(rtas_data_buf_lock);
+/**
+ * struct rtas_function - Descriptor for RTAS functions.
+ *
+ * @token: Value of @name if it exists under the /rtas node.
+ * @name: Function name.
+ * @filter: If non-NULL, invoking this function via the rtas syscall is
+ * generally allowed, and @filter describes constraints on the
+ * arguments. See also @banned_for_syscall_on_le.
+ * @banned_for_syscall_on_le: Set when call via sys_rtas is generally allowed
+ * but specifically restricted on ppc64le. Such
+ * functions are believed to have no users on
+ * ppc64le, and we want to keep it that way. It does
+ * not make sense for this to be set when @filter
+ * is NULL.
+ * @lock: Pointer to an optional dedicated per-function mutex. This
+ * should be set for functions that require multiple calls in
+ * sequence to complete a single operation, and such sequences
+ * will disrupt each other if allowed to interleave. Users of
+ * this function are required to hold the associated lock for
+ * the duration of the call sequence. Add an explanatory
+ * comment to the function table entry if setting this member.
+ */
+struct rtas_function {
+ s32 token;
+ const bool banned_for_syscall_on_le:1;
+ const char * const name;
+ const struct rtas_filter *filter;
+ struct mutex *lock;
+};
-char rtas_data_buf[RTAS_DATA_BUF_SIZE] __cacheline_aligned;
-EXPORT_SYMBOL(rtas_data_buf);
+/*
+ * Per-function locks for sequence-based RTAS functions.
+ */
+static DEFINE_MUTEX(rtas_ibm_activate_firmware_lock);
+static DEFINE_MUTEX(rtas_ibm_lpar_perftools_lock);
+DEFINE_MUTEX(rtas_ibm_physical_attestation_lock);
+DEFINE_MUTEX(rtas_ibm_get_vpd_lock);
+DEFINE_MUTEX(rtas_ibm_get_indices_lock);
+DEFINE_MUTEX(rtas_ibm_set_dynamic_indicator_lock);
+DEFINE_MUTEX(rtas_ibm_get_dynamic_sensor_state_lock);
+DEFINE_MUTEX(rtas_ibm_receive_hvpipe_msg_lock);
+DEFINE_MUTEX(rtas_ibm_send_hvpipe_msg_lock);
+
+static struct rtas_function rtas_function_table[] __ro_after_init = {
+ [RTAS_FNIDX__CHECK_EXCEPTION] = {
+ .name = "check-exception",
+ },
+ [RTAS_FNIDX__DISPLAY_CHARACTER] = {
+ .name = "display-character",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = -1, .size_idx1 = -1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ },
+ [RTAS_FNIDX__EVENT_SCAN] = {
+ .name = "event-scan",
+ },
+ [RTAS_FNIDX__FREEZE_TIME_BASE] = {
+ .name = "freeze-time-base",
+ },
+ [RTAS_FNIDX__GET_POWER_LEVEL] = {
+ .name = "get-power-level",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = -1, .size_idx1 = -1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ },
+ [RTAS_FNIDX__GET_SENSOR_STATE] = {
+ .name = "get-sensor-state",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = -1, .size_idx1 = -1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ },
+ [RTAS_FNIDX__GET_TERM_CHAR] = {
+ .name = "get-term-char",
+ },
+ [RTAS_FNIDX__GET_TIME_OF_DAY] = {
+ .name = "get-time-of-day",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = -1, .size_idx1 = -1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ },
+ [RTAS_FNIDX__IBM_ACTIVATE_FIRMWARE] = {
+ .name = "ibm,activate-firmware",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = -1, .size_idx1 = -1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ /*
+ * PAPR+ as of v2.13 doesn't explicitly impose any
+ * restriction, but this typically requires multiple
+ * calls before success, and there's no reason to
+ * allow sequences to interleave.
+ */
+ .lock = &rtas_ibm_activate_firmware_lock,
+ },
+ [RTAS_FNIDX__IBM_CBE_START_PTCAL] = {
+ .name = "ibm,cbe-start-ptcal",
+ },
+ [RTAS_FNIDX__IBM_CBE_STOP_PTCAL] = {
+ .name = "ibm,cbe-stop-ptcal",
+ },
+ [RTAS_FNIDX__IBM_CHANGE_MSI] = {
+ .name = "ibm,change-msi",
+ },
+ [RTAS_FNIDX__IBM_CLOSE_ERRINJCT] = {
+ .name = "ibm,close-errinjct",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = -1, .size_idx1 = -1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ },
+ [RTAS_FNIDX__IBM_CONFIGURE_BRIDGE] = {
+ .name = "ibm,configure-bridge",
+ },
+ [RTAS_FNIDX__IBM_CONFIGURE_CONNECTOR] = {
+ .name = "ibm,configure-connector",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = 0, .size_idx1 = -1,
+ .buf_idx2 = 1, .size_idx2 = -1,
+ .fixed_size = 4096,
+ },
+ },
+ [RTAS_FNIDX__IBM_CONFIGURE_KERNEL_DUMP] = {
+ .name = "ibm,configure-kernel-dump",
+ },
+ [RTAS_FNIDX__IBM_CONFIGURE_PE] = {
+ .name = "ibm,configure-pe",
+ },
+ [RTAS_FNIDX__IBM_CREATE_PE_DMA_WINDOW] = {
+ .name = "ibm,create-pe-dma-window",
+ },
+ [RTAS_FNIDX__IBM_DISPLAY_MESSAGE] = {
+ .name = "ibm,display-message",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = 0, .size_idx1 = -1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ },
+ [RTAS_FNIDX__IBM_ERRINJCT] = {
+ .name = "ibm,errinjct",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = 2, .size_idx1 = -1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ .fixed_size = 1024,
+ },
+ },
+ [RTAS_FNIDX__IBM_EXTI2C] = {
+ .name = "ibm,exti2c",
+ },
+ [RTAS_FNIDX__IBM_GET_CONFIG_ADDR_INFO] = {
+ .name = "ibm,get-config-addr-info",
+ },
+ [RTAS_FNIDX__IBM_GET_CONFIG_ADDR_INFO2] = {
+ .name = "ibm,get-config-addr-info2",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = -1, .size_idx1 = -1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ },
+ [RTAS_FNIDX__IBM_GET_DYNAMIC_SENSOR_STATE] = {
+ .name = "ibm,get-dynamic-sensor-state",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = 1, .size_idx1 = -1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ /*
+ * PAPR+ v2.13 R1–7.3.19–3 is explicit that the OS
+ * must not call ibm,get-dynamic-sensor-state with
+ * different inputs until a non-retry status has been
+ * returned.
+ */
+ .lock = &rtas_ibm_get_dynamic_sensor_state_lock,
+ },
+ [RTAS_FNIDX__IBM_GET_INDICES] = {
+ .name = "ibm,get-indices",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = 2, .size_idx1 = 3,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ /*
+ * PAPR+ v2.13 R1–7.3.17–2 says that the OS must not
+ * interleave ibm,get-indices call sequences with
+ * different inputs.
+ */
+ .lock = &rtas_ibm_get_indices_lock,
+ },
+ [RTAS_FNIDX__IBM_GET_RIO_TOPOLOGY] = {
+ .name = "ibm,get-rio-topology",
+ },
+ [RTAS_FNIDX__IBM_GET_SYSTEM_PARAMETER] = {
+ .name = "ibm,get-system-parameter",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = 1, .size_idx1 = 2,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ },
+ [RTAS_FNIDX__IBM_GET_VPD] = {
+ .name = "ibm,get-vpd",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = 0, .size_idx1 = -1,
+ .buf_idx2 = 1, .size_idx2 = 2,
+ },
+ /*
+ * PAPR+ v2.13 R1–7.3.20–4 indicates that sequences
+ * should not be allowed to interleave.
+ */
+ .lock = &rtas_ibm_get_vpd_lock,
+ },
+ [RTAS_FNIDX__IBM_GET_XIVE] = {
+ .name = "ibm,get-xive",
+ },
+ [RTAS_FNIDX__IBM_INT_OFF] = {
+ .name = "ibm,int-off",
+ },
+ [RTAS_FNIDX__IBM_INT_ON] = {
+ .name = "ibm,int-on",
+ },
+ [RTAS_FNIDX__IBM_IO_QUIESCE_ACK] = {
+ .name = "ibm,io-quiesce-ack",
+ },
+ [RTAS_FNIDX__IBM_LPAR_PERFTOOLS] = {
+ .name = "ibm,lpar-perftools",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = 2, .size_idx1 = 3,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ /*
+ * PAPR+ v2.13 R1–7.3.26–6 says the OS should allow
+ * only one call sequence in progress at a time.
+ */
+ .lock = &rtas_ibm_lpar_perftools_lock,
+ },
+ [RTAS_FNIDX__IBM_MANAGE_FLASH_IMAGE] = {
+ .name = "ibm,manage-flash-image",
+ },
+ [RTAS_FNIDX__IBM_MANAGE_STORAGE_PRESERVATION] = {
+ .name = "ibm,manage-storage-preservation",
+ },
+ [RTAS_FNIDX__IBM_NMI_INTERLOCK] = {
+ .name = "ibm,nmi-interlock",
+ },
+ [RTAS_FNIDX__IBM_NMI_REGISTER] = {
+ .name = "ibm,nmi-register",
+ },
+ [RTAS_FNIDX__IBM_OPEN_ERRINJCT] = {
+ .name = "ibm,open-errinjct",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = -1, .size_idx1 = -1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ },
+ [RTAS_FNIDX__IBM_OPEN_SRIOV_ALLOW_UNFREEZE] = {
+ .name = "ibm,open-sriov-allow-unfreeze",
+ },
+ [RTAS_FNIDX__IBM_OPEN_SRIOV_MAP_PE_NUMBER] = {
+ .name = "ibm,open-sriov-map-pe-number",
+ },
+ [RTAS_FNIDX__IBM_OS_TERM] = {
+ .name = "ibm,os-term",
+ },
+ [RTAS_FNIDX__IBM_PARTNER_CONTROL] = {
+ .name = "ibm,partner-control",
+ },
+ [RTAS_FNIDX__IBM_PHYSICAL_ATTESTATION] = {
+ .name = "ibm,physical-attestation",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = 0, .size_idx1 = 1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ /*
+ * This follows a sequence-based pattern similar to
+ * ibm,get-vpd et al. Since PAPR+ restricts
+ * interleaving call sequences for other functions of
+ * this style, assume the restriction applies here,
+ * even though it's not explicit in the spec.
+ */
+ .lock = &rtas_ibm_physical_attestation_lock,
+ },
+ [RTAS_FNIDX__IBM_PLATFORM_DUMP] = {
+ .name = "ibm,platform-dump",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = 4, .size_idx1 = 5,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ /*
+ * PAPR+ v2.13 7.3.3.4.1 indicates that concurrent
+ * sequences of ibm,platform-dump are allowed if they
+ * are operating on different dump tags. So leave the
+ * lock pointer unset for now. This may need
+ * reconsideration if kernel-internal users appear.
+ */
+ },
+ [RTAS_FNIDX__IBM_POWER_OFF_UPS] = {
+ .name = "ibm,power-off-ups",
+ },
+ [RTAS_FNIDX__IBM_QUERY_INTERRUPT_SOURCE_NUMBER] = {
+ .name = "ibm,query-interrupt-source-number",
+ },
+ [RTAS_FNIDX__IBM_QUERY_PE_DMA_WINDOW] = {
+ .name = "ibm,query-pe-dma-window",
+ },
+ [RTAS_FNIDX__IBM_READ_PCI_CONFIG] = {
+ .name = "ibm,read-pci-config",
+ },
+ [RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE] = {
+ .name = "ibm,read-slot-reset-state",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = -1, .size_idx1 = -1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ },
+ [RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE2] = {
+ .name = "ibm,read-slot-reset-state2",
+ },
+ [RTAS_FNIDX__IBM_RECEIVE_HVPIPE_MSG] {
+ .name = "ibm,receive-hvpipe-msg",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = 0, .size_idx1 = 1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ /*
+ * PAPR+ v2.13 R1–7.3.32.1
+ */
+ .lock = &rtas_ibm_receive_hvpipe_msg_lock,
+ },
+ [RTAS_FNIDX__IBM_REMOVE_PE_DMA_WINDOW] = {
+ .name = "ibm,remove-pe-dma-window",
+ },
+ [RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOW] = {
+ /*
+ * Note: PAPR+ v2.13 7.3.31.4.1 spells this as
+ * "ibm,reset-pe-dma-windows" (plural), but RTAS
+ * implementations use the singular form in practice.
+ */
+ .name = "ibm,reset-pe-dma-window",
+ },
+ [RTAS_FNIDX__IBM_SCAN_LOG_DUMP] = {
+ .name = "ibm,scan-log-dump",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = 0, .size_idx1 = 1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ },
+ [RTAS_FNIDX__IBM_SEND_HVPIPE_MSG] {
+ .name = "ibm,send-hvpipe-msg",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = 1, .size_idx1 = -1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ /*
+ * PAPR+ v2.13 R1–7.3.32.2
+ */
+ .lock = &rtas_ibm_send_hvpipe_msg_lock,
+ },
+ [RTAS_FNIDX__IBM_SET_DYNAMIC_INDICATOR] = {
+ .name = "ibm,set-dynamic-indicator",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = 2, .size_idx1 = -1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ /*
+ * PAPR+ v2.13 R1–7.3.18–3 says the OS must not call
+ * this function with different inputs until a
+ * non-retry status has been returned.
+ */
+ .lock = &rtas_ibm_set_dynamic_indicator_lock,
+ },
+ [RTAS_FNIDX__IBM_SET_EEH_OPTION] = {
+ .name = "ibm,set-eeh-option",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = -1, .size_idx1 = -1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ },
+ [RTAS_FNIDX__IBM_SET_SLOT_RESET] = {
+ .name = "ibm,set-slot-reset",
+ },
+ [RTAS_FNIDX__IBM_SET_SYSTEM_PARAMETER] = {
+ .name = "ibm,set-system-parameter",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = 1, .size_idx1 = -1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ },
+ [RTAS_FNIDX__IBM_SET_XIVE] = {
+ .name = "ibm,set-xive",
+ },
+ [RTAS_FNIDX__IBM_SLOT_ERROR_DETAIL] = {
+ .name = "ibm,slot-error-detail",
+ },
+ [RTAS_FNIDX__IBM_SUSPEND_ME] = {
+ .name = "ibm,suspend-me",
+ .banned_for_syscall_on_le = true,
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = -1, .size_idx1 = -1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ },
+ [RTAS_FNIDX__IBM_TUNE_DMA_PARMS] = {
+ .name = "ibm,tune-dma-parms",
+ },
+ [RTAS_FNIDX__IBM_UPDATE_FLASH_64_AND_REBOOT] = {
+ .name = "ibm,update-flash-64-and-reboot",
+ },
+ [RTAS_FNIDX__IBM_UPDATE_NODES] = {
+ .name = "ibm,update-nodes",
+ .banned_for_syscall_on_le = true,
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = 0, .size_idx1 = -1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ .fixed_size = 4096,
+ },
+ },
+ [RTAS_FNIDX__IBM_UPDATE_PROPERTIES] = {
+ .name = "ibm,update-properties",
+ .banned_for_syscall_on_le = true,
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = 0, .size_idx1 = -1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ .fixed_size = 4096,
+ },
+ },
+ [RTAS_FNIDX__IBM_VALIDATE_FLASH_IMAGE] = {
+ .name = "ibm,validate-flash-image",
+ },
+ [RTAS_FNIDX__IBM_WRITE_PCI_CONFIG] = {
+ .name = "ibm,write-pci-config",
+ },
+ [RTAS_FNIDX__NVRAM_FETCH] = {
+ .name = "nvram-fetch",
+ },
+ [RTAS_FNIDX__NVRAM_STORE] = {
+ .name = "nvram-store",
+ },
+ [RTAS_FNIDX__POWER_OFF] = {
+ .name = "power-off",
+ },
+ [RTAS_FNIDX__PUT_TERM_CHAR] = {
+ .name = "put-term-char",
+ },
+ [RTAS_FNIDX__QUERY_CPU_STOPPED_STATE] = {
+ .name = "query-cpu-stopped-state",
+ },
+ [RTAS_FNIDX__READ_PCI_CONFIG] = {
+ .name = "read-pci-config",
+ },
+ [RTAS_FNIDX__RTAS_LAST_ERROR] = {
+ .name = "rtas-last-error",
+ },
+ [RTAS_FNIDX__SET_INDICATOR] = {
+ .name = "set-indicator",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = -1, .size_idx1 = -1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ },
+ [RTAS_FNIDX__SET_POWER_LEVEL] = {
+ .name = "set-power-level",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = -1, .size_idx1 = -1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ },
+ [RTAS_FNIDX__SET_TIME_FOR_POWER_ON] = {
+ .name = "set-time-for-power-on",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = -1, .size_idx1 = -1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ },
+ [RTAS_FNIDX__SET_TIME_OF_DAY] = {
+ .name = "set-time-of-day",
+ .filter = &(const struct rtas_filter) {
+ .buf_idx1 = -1, .size_idx1 = -1,
+ .buf_idx2 = -1, .size_idx2 = -1,
+ },
+ },
+ [RTAS_FNIDX__START_CPU] = {
+ .name = "start-cpu",
+ },
+ [RTAS_FNIDX__STOP_SELF] = {
+ .name = "stop-self",
+ },
+ [RTAS_FNIDX__SYSTEM_REBOOT] = {
+ .name = "system-reboot",
+ },
+ [RTAS_FNIDX__THAW_TIME_BASE] = {
+ .name = "thaw-time-base",
+ },
+ [RTAS_FNIDX__WRITE_PCI_CONFIG] = {
+ .name = "write-pci-config",
+ },
+};
-unsigned long rtas_rmo_buf;
+#define for_each_rtas_function(funcp) \
+ for (funcp = &rtas_function_table[0]; \
+ funcp < &rtas_function_table[ARRAY_SIZE(rtas_function_table)]; \
+ ++funcp)
/*
- * If non-NULL, this gets called when the kernel terminates.
- * This is done like this so rtas_flash can be a module.
+ * Nearly all RTAS calls need to be serialized. All uses of the
+ * default rtas_args block must hold rtas_lock.
+ *
+ * Exceptions to the RTAS serialization requirement (e.g. stop-self)
+ * must use a separate rtas_args structure.
*/
-void (*rtas_flash_term_hook)(int);
-EXPORT_SYMBOL(rtas_flash_term_hook);
+static DEFINE_RAW_SPINLOCK(rtas_lock);
+static struct rtas_args rtas_args;
-/* RTAS use home made raw locking instead of spin_lock_irqsave
- * because those can be called from within really nasty contexts
- * such as having the timebase stopped which would lockup with
- * normal locks and spinlock debugging enabled
+/**
+ * rtas_function_token() - RTAS function token lookup.
+ * @handle: Function handle, e.g. RTAS_FN_EVENT_SCAN.
+ *
+ * Context: Any context.
+ * Return: the token value for the function if implemented by this platform,
+ * otherwise RTAS_UNKNOWN_SERVICE.
*/
-static unsigned long lock_rtas(void)
+s32 rtas_function_token(const rtas_fn_handle_t handle)
{
- unsigned long flags;
+ const size_t index = handle.index;
+ const bool out_of_bounds = index >= ARRAY_SIZE(rtas_function_table);
- local_irq_save(flags);
- preempt_disable();
- arch_spin_lock_flags(&rtas.lock, flags);
- return flags;
+ if (WARN_ONCE(out_of_bounds, "invalid function index %zu", index))
+ return RTAS_UNKNOWN_SERVICE;
+ /*
+ * Various drivers attempt token lookups on non-RTAS
+ * platforms.
+ */
+ if (!rtas.dev)
+ return RTAS_UNKNOWN_SERVICE;
+
+ return rtas_function_table[index].token;
}
+EXPORT_SYMBOL_GPL(rtas_function_token);
-static void unlock_rtas(unsigned long flags)
+static int rtas_function_cmp(const void *a, const void *b)
{
- arch_spin_unlock(&rtas.lock);
- local_irq_restore(flags);
- preempt_enable();
+ const struct rtas_function *f1 = a;
+ const struct rtas_function *f2 = b;
+
+ return strcmp(f1->name, f2->name);
}
/*
- * call_rtas_display_status and call_rtas_display_status_delay
- * are designed only for very early low-level debugging, which
- * is why the token is hard-coded to 10.
+ * Boot-time initialization of the function table needs the lookup to
+ * return a non-const-qualified object. Use rtas_name_to_function()
+ * in all other contexts.
*/
-static void call_rtas_display_status(unsigned char c)
+static struct rtas_function *__rtas_name_to_function(const char *name)
{
- struct rtas_args *args = &rtas.args;
- unsigned long s;
+ const struct rtas_function key = {
+ .name = name,
+ };
+ struct rtas_function *found;
- if (!rtas.base)
- return;
- s = lock_rtas();
+ found = bsearch(&key, rtas_function_table, ARRAY_SIZE(rtas_function_table),
+ sizeof(rtas_function_table[0]), rtas_function_cmp);
- args->token = cpu_to_be32(10);
- args->nargs = cpu_to_be32(1);
- args->nret = cpu_to_be32(1);
- args->rets = &(args->args[1]);
- args->args[0] = cpu_to_be32(c);
+ return found;
+}
- enter_rtas(__pa(args));
+static const struct rtas_function *rtas_name_to_function(const char *name)
+{
+ return __rtas_name_to_function(name);
+}
+
+static DEFINE_XARRAY(rtas_token_to_function_xarray);
+
+static int __init rtas_token_to_function_xarray_init(void)
+{
+ const struct rtas_function *func;
+ int err = 0;
- unlock_rtas(s);
+ for_each_rtas_function(func) {
+ const s32 token = func->token;
+
+ if (token == RTAS_UNKNOWN_SERVICE)
+ continue;
+
+ err = xa_err(xa_store(&rtas_token_to_function_xarray,
+ token, (void *)func, GFP_KERNEL));
+ if (err)
+ break;
+ }
+
+ return err;
}
+arch_initcall(rtas_token_to_function_xarray_init);
-static void call_rtas_display_status_delay(char c)
+/*
+ * For use by sys_rtas(), where the token value is provided by user
+ * space and we don't want to warn on failed lookups.
+ */
+static const struct rtas_function *rtas_token_to_function_untrusted(s32 token)
{
- static int pending_newline = 0; /* did last write end with unprinted newline? */
- static int width = 16;
+ return xa_load(&rtas_token_to_function_xarray, token);
+}
- if (c == '\n') {
- while (width-- > 0)
- call_rtas_display_status(' ');
- width = 16;
- mdelay(500);
- pending_newline = 1;
- } else {
- if (pending_newline) {
- call_rtas_display_status('\r');
- call_rtas_display_status('\n');
- }
- pending_newline = 0;
- if (width--) {
- call_rtas_display_status(c);
- udelay(10000);
+/*
+ * Reverse lookup for deriving the function descriptor from a
+ * known-good token value in contexts where the former is not already
+ * available. @token must be valid, e.g. derived from the result of a
+ * prior lookup against the function table.
+ */
+static const struct rtas_function *rtas_token_to_function(s32 token)
+{
+ const struct rtas_function *func;
+
+ if (WARN_ONCE(token < 0, "invalid token %d", token))
+ return NULL;
+
+ func = rtas_token_to_function_untrusted(token);
+ if (func)
+ return func;
+ /*
+ * Fall back to linear scan in case the reverse mapping hasn't
+ * been initialized yet.
+ */
+ if (xa_empty(&rtas_token_to_function_xarray)) {
+ for_each_rtas_function(func) {
+ if (func->token == token)
+ return func;
}
}
+
+ WARN_ONCE(true, "unexpected failed lookup for token %d", token);
+ return NULL;
}
-void __init udbg_init_rtas_panel(void)
+/* This is here deliberately so it's only used in this file */
+void enter_rtas(unsigned long);
+
+static void __do_enter_rtas(struct rtas_args *args)
{
- udbg_putc = call_rtas_display_status_delay;
+ enter_rtas(__pa(args));
+ srr_regs_clobbered(); /* rtas uses SRRs, invalidate */
}
-#ifdef CONFIG_UDBG_RTAS_CONSOLE
+static void __do_enter_rtas_trace(struct rtas_args *args)
+{
+ const struct rtas_function *func = rtas_token_to_function(be32_to_cpu(args->token));
-/* If you think you're dying before early_init_dt_scan_rtas() does its
- * work, you can hard code the token values for your firmware here and
- * hardcode rtas.base/entry etc.
- */
-static unsigned int rtas_putchar_token = RTAS_UNKNOWN_SERVICE;
-static unsigned int rtas_getchar_token = RTAS_UNKNOWN_SERVICE;
+ /*
+ * If there is a per-function lock, it must be held by the
+ * caller.
+ */
+ if (func->lock)
+ lockdep_assert_held(func->lock);
-static void udbg_rtascon_putc(char c)
-{
- int tries;
+ if (args == &rtas_args)
+ lockdep_assert_held(&rtas_lock);
- if (!rtas.base)
- return;
+ trace_rtas_input(args, func->name);
+ trace_rtas_ll_entry(args);
- /* Add CRs before LFs */
- if (c == '\n')
- udbg_rtascon_putc('\r');
+ __do_enter_rtas(args);
- /* if there is more than one character to be displayed, wait a bit */
- for (tries = 0; tries < 16; tries++) {
- if (rtas_call(rtas_putchar_token, 1, 1, NULL, c) == 0)
- break;
- udelay(1000);
- }
+ trace_rtas_ll_exit(args);
+ trace_rtas_output(args, func->name);
}
-static int udbg_rtascon_getc_poll(void)
+static void do_enter_rtas(struct rtas_args *args)
{
- int c;
+ const unsigned long msr = mfmsr();
+ /*
+ * Situations where we want to skip any active tracepoints for
+ * safety reasons:
+ *
+ * 1. The last code executed on an offline CPU as it stops,
+ * i.e. we're about to call stop-self. The tracepoints'
+ * function name lookup uses xarray, which uses RCU, which
+ * isn't valid to call on an offline CPU. Any events
+ * emitted on an offline CPU will be discarded anyway.
+ *
+ * 2. In real mode, as when invoking ibm,nmi-interlock from
+ * the pseries MCE handler. We cannot count on trace
+ * buffers or the entries in rtas_token_to_function_xarray
+ * to be contained in the RMO.
+ */
+ const unsigned long mask = MSR_IR | MSR_DR;
+ const bool can_trace = likely(cpu_online(raw_smp_processor_id()) &&
+ (msr & mask) == mask);
+ /*
+ * Make sure MSR[RI] is currently enabled as it will be forced later
+ * in enter_rtas.
+ */
+ BUG_ON(!(msr & MSR_RI));
- if (!rtas.base)
- return -1;
+ BUG_ON(!irqs_disabled());
- if (rtas_call(rtas_getchar_token, 0, 2, &c))
- return -1;
+ hard_irq_disable(); /* Ensure MSR[EE] is disabled on PPC64 */
- return c;
+ if (can_trace)
+ __do_enter_rtas_trace(args);
+ else
+ __do_enter_rtas(args);
}
-static int udbg_rtascon_getc(void)
+struct rtas_t rtas;
+
+DEFINE_SPINLOCK(rtas_data_buf_lock);
+EXPORT_SYMBOL_GPL(rtas_data_buf_lock);
+
+char rtas_data_buf[RTAS_DATA_BUF_SIZE] __aligned(SZ_4K);
+EXPORT_SYMBOL_GPL(rtas_data_buf);
+
+unsigned long rtas_rmo_buf;
+
+/*
+ * If non-NULL, this gets called when the kernel terminates.
+ * This is done like this so rtas_flash can be a module.
+ */
+void (*rtas_flash_term_hook)(int);
+EXPORT_SYMBOL_GPL(rtas_flash_term_hook);
+
+/*
+ * call_rtas_display_status and call_rtas_display_status_delay
+ * are designed only for very early low-level debugging, which
+ * is why the token is hard-coded to 10.
+ */
+static void call_rtas_display_status(unsigned char c)
{
- int c;
+ unsigned long flags;
- while ((c = udbg_rtascon_getc_poll()) == -1)
- ;
+ if (!rtas.base)
+ return;
- return c;
+ raw_spin_lock_irqsave(&rtas_lock, flags);
+ rtas_call_unlocked(&rtas_args, 10, 1, 1, NULL, c);
+ raw_spin_unlock_irqrestore(&rtas_lock, flags);
}
+static void call_rtas_display_status_delay(char c)
+{
+ static int pending_newline = 0; /* did last write end with unprinted newline? */
+ static int width = 16;
+
+ if (c == '\n') {
+ while (width-- > 0)
+ call_rtas_display_status(' ');
+ width = 16;
+ mdelay(500);
+ pending_newline = 1;
+ } else {
+ if (pending_newline) {
+ call_rtas_display_status('\r');
+ call_rtas_display_status('\n');
+ }
+ pending_newline = 0;
+ if (width--) {
+ call_rtas_display_status(c);
+ udelay(10000);
+ }
+ }
+}
-void __init udbg_init_rtas_console(void)
+void __init udbg_init_rtas_panel(void)
{
- udbg_putc = udbg_rtascon_putc;
- udbg_getc = udbg_rtascon_getc;
- udbg_getc_poll = udbg_rtascon_getc_poll;
+ udbg_putc = call_rtas_display_status_delay;
}
-#endif /* CONFIG_UDBG_RTAS_CONSOLE */
void rtas_progress(char *s, unsigned short hex)
{
@@ -232,8 +854,8 @@ void rtas_progress(char *s, unsigned short hex)
"ibm,display-truncation-length", NULL);
of_node_put(root);
}
- display_character = rtas_token("display-character");
- set_indicator = rtas_token("set-indicator");
+ display_character = rtas_function_token(RTAS_FN_DISPLAY_CHARACTER);
+ set_indicator = rtas_function_token(RTAS_FN_SET_INDICATOR);
}
if (display_character == RTAS_UNKNOWN_SERVICE) {
@@ -266,7 +888,7 @@ void rtas_progress(char *s, unsigned short hex)
else
rtas_call(display_character, 1, 1, NULL, '\r');
}
-
+
if (row_width)
width = row_width[current_line];
else
@@ -286,9 +908,9 @@ void rtas_progress(char *s, unsigned short hex)
spin_unlock(&progress_lock);
return;
}
-
+
/* RTAS wants CR-LF, not just LF */
-
+
if (*os == '\n') {
rtas_call(display_character, 1, 1, NULL, '\r');
rtas_call(display_character, 1, 1, NULL, '\n');
@@ -298,7 +920,7 @@ void rtas_progress(char *s, unsigned short hex)
*/
rtas_call(display_character, 1, 1, NULL, *os);
}
-
+
if (row_width)
width = row_width[current_line];
else
@@ -307,36 +929,49 @@ void rtas_progress(char *s, unsigned short hex)
width--;
rtas_call(display_character, 1, 1, NULL, *os);
}
-
+
os++;
-
+
/* if we overwrite the screen length */
if (width <= 0)
while ((*os != 0) && (*os != '\n') && (*os != '\r'))
os++;
}
-
+
spin_unlock(&progress_lock);
}
-EXPORT_SYMBOL(rtas_progress); /* needed by rtas_flash module */
+EXPORT_SYMBOL_GPL(rtas_progress); /* needed by rtas_flash module */
int rtas_token(const char *service)
{
+ const struct rtas_function *func;
const __be32 *tokp;
+
if (rtas.dev == NULL)
return RTAS_UNKNOWN_SERVICE;
+
+ func = rtas_name_to_function(service);
+ if (func)
+ return func->token;
+ /*
+ * The caller is looking up a name that is not known to be an
+ * RTAS function. Either it's a function that needs to be
+ * added to the table, or they're misusing rtas_token() to
+ * access non-function properties of the /rtas node. Warn and
+ * fall back to the legacy behavior.
+ */
+ WARN_ONCE(1, "unknown function `%s`, should it be added to rtas_function_table?\n",
+ service);
+
tokp = of_get_property(rtas.dev, service, NULL);
return tokp ? be32_to_cpu(*tokp) : RTAS_UNKNOWN_SERVICE;
}
-EXPORT_SYMBOL(rtas_token);
-
-int rtas_service_present(const char *service)
-{
- return rtas_token(service) != RTAS_UNKNOWN_SERVICE;
-}
-EXPORT_SYMBOL(rtas_service_present);
+EXPORT_SYMBOL_GPL(rtas_token);
#ifdef CONFIG_RTAS_ERROR_LOGGING
+
+static u32 rtas_error_log_max __ro_after_init = RTAS_ERROR_LOG_MAX;
+
/*
* Return the firmware-specified size of the error log buffer
* for all rtas calls that require an error buffer argument.
@@ -344,56 +979,66 @@ EXPORT_SYMBOL(rtas_service_present);
*/
int rtas_get_error_log_max(void)
{
- static int rtas_error_log_max;
- if (rtas_error_log_max)
- return rtas_error_log_max;
-
- rtas_error_log_max = rtas_token ("rtas-error-log-max");
- if ((rtas_error_log_max == RTAS_UNKNOWN_SERVICE) ||
- (rtas_error_log_max > RTAS_ERROR_LOG_MAX)) {
- printk (KERN_WARNING "RTAS: bad log buffer size %d\n",
- rtas_error_log_max);
- rtas_error_log_max = RTAS_ERROR_LOG_MAX;
- }
return rtas_error_log_max;
}
-EXPORT_SYMBOL(rtas_get_error_log_max);
+
+static void __init init_error_log_max(void)
+{
+ static const char propname[] __initconst = "rtas-error-log-max";
+ u32 max;
+
+ if (of_property_read_u32(rtas.dev, propname, &max)) {
+ pr_warn("%s not found, using default of %u\n",
+ propname, RTAS_ERROR_LOG_MAX);
+ max = RTAS_ERROR_LOG_MAX;
+ }
+
+ if (max > RTAS_ERROR_LOG_MAX) {
+ pr_warn("%s = %u, clamping max error log size to %u\n",
+ propname, max, RTAS_ERROR_LOG_MAX);
+ max = RTAS_ERROR_LOG_MAX;
+ }
+
+ rtas_error_log_max = max;
+}
static char rtas_err_buf[RTAS_ERROR_LOG_MAX];
-static int rtas_last_error_token;
/** Return a copy of the detailed error text associated with the
* most recent failed call to rtas. Because the error text
* might go stale if there are any other intervening rtas calls,
* this routine must be called atomically with whatever produced
- * the error (i.e. with rtas.lock still held from the previous call).
+ * the error (i.e. with rtas_lock still held from the previous call).
*/
static char *__fetch_rtas_last_error(char *altbuf)
{
+ const s32 token = rtas_function_token(RTAS_FN_RTAS_LAST_ERROR);
struct rtas_args err_args, save_args;
u32 bufsz;
char *buf = NULL;
- if (rtas_last_error_token == -1)
+ lockdep_assert_held(&rtas_lock);
+
+ if (token == -1)
return NULL;
bufsz = rtas_get_error_log_max();
- err_args.token = cpu_to_be32(rtas_last_error_token);
+ err_args.token = cpu_to_be32(token);
err_args.nargs = cpu_to_be32(2);
err_args.nret = cpu_to_be32(1);
err_args.args[0] = cpu_to_be32(__pa(rtas_err_buf));
err_args.args[1] = cpu_to_be32(bufsz);
err_args.args[2] = 0;
- save_args = rtas.args;
- rtas.args = err_args;
+ save_args = rtas_args;
+ rtas_args = err_args;
- enter_rtas(__pa(&rtas.args));
+ do_enter_rtas(&rtas_args);
- err_args = rtas.args;
- rtas.args = save_args;
+ err_args = rtas_args;
+ rtas_args = save_args;
/* Log the error in the unlikely case that there was one. */
if (unlikely(err_args.args[2] == 0)) {
@@ -401,11 +1046,11 @@ static char *__fetch_rtas_last_error(char *altbuf)
buf = altbuf;
} else {
buf = rtas_err_buf;
- if (mem_init_done)
+ if (slab_is_available())
buf = kmalloc(RTAS_ERROR_LOG_MAX, GFP_ATOMIC);
}
if (buf)
- memcpy(buf, rtas_err_buf, RTAS_ERROR_LOG_MAX);
+ memmove(buf, rtas_err_buf, RTAS_ERROR_LOG_MAX);
}
return buf;
@@ -416,60 +1061,199 @@ static char *__fetch_rtas_last_error(char *altbuf)
#else /* CONFIG_RTAS_ERROR_LOGGING */
#define __fetch_rtas_last_error(x) NULL
#define get_errorlog_buffer() NULL
+static void __init init_error_log_max(void) {}
#endif
+
+static void
+va_rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret,
+ va_list list)
+{
+ int i;
+
+ args->token = cpu_to_be32(token);
+ args->nargs = cpu_to_be32(nargs);
+ args->nret = cpu_to_be32(nret);
+ args->rets = &(args->args[nargs]);
+
+ for (i = 0; i < nargs; ++i)
+ args->args[i] = cpu_to_be32(va_arg(list, __u32));
+
+ for (i = 0; i < nret; ++i)
+ args->rets[i] = 0;
+
+ do_enter_rtas(args);
+}
+
+/**
+ * rtas_call_unlocked() - Invoke an RTAS firmware function without synchronization.
+ * @args: RTAS parameter block to be used for the call, must obey RTAS addressing
+ * constraints.
+ * @token: Identifies the function being invoked.
+ * @nargs: Number of input parameters. Does not include token.
+ * @nret: Number of output parameters, including the call status.
+ * @....: List of @nargs input parameters.
+ *
+ * Invokes the RTAS function indicated by @token, which the caller
+ * should obtain via rtas_function_token().
+ *
+ * This function is similar to rtas_call(), but must be used with a
+ * limited set of RTAS calls specifically exempted from the general
+ * requirement that only one RTAS call may be in progress at any
+ * time. Examples include stop-self and ibm,nmi-interlock.
+ */
+void rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, ...)
+{
+ va_list list;
+
+ va_start(list, nret);
+ va_rtas_call_unlocked(args, token, nargs, nret, list);
+ va_end(list);
+}
+
+static bool token_is_restricted_errinjct(s32 token)
+{
+ return token == rtas_function_token(RTAS_FN_IBM_OPEN_ERRINJCT) ||
+ token == rtas_function_token(RTAS_FN_IBM_ERRINJCT);
+}
+
+/**
+ * rtas_call() - Invoke an RTAS firmware function.
+ * @token: Identifies the function being invoked.
+ * @nargs: Number of input parameters. Does not include token.
+ * @nret: Number of output parameters, including the call status.
+ * @outputs: Array of @nret output words.
+ * @....: List of @nargs input parameters.
+ *
+ * Invokes the RTAS function indicated by @token, which the caller
+ * should obtain via rtas_function_token().
+ *
+ * The @nargs and @nret arguments must match the number of input and
+ * output parameters specified for the RTAS function.
+ *
+ * rtas_call() returns RTAS status codes, not conventional Linux errno
+ * values. Callers must translate any failure to an appropriate errno
+ * in syscall context. Most callers of RTAS functions that can return
+ * -2 or 990x should use rtas_busy_delay() to correctly handle those
+ * statuses before calling again.
+ *
+ * The return value descriptions are adapted from 7.2.8 [RTAS] Return
+ * Codes of the PAPR and CHRP specifications.
+ *
+ * Context: Process context preferably, interrupt context if
+ * necessary. Acquires an internal spinlock and may perform
+ * GFP_ATOMIC slab allocation in error path. Unsafe for NMI
+ * context.
+ * Return:
+ * * 0 - RTAS function call succeeded.
+ * * -1 - RTAS function encountered a hardware or
+ * platform error, or the token is invalid,
+ * or the function is restricted by kernel policy.
+ * * -2 - Specs say "A necessary hardware device was busy,
+ * and the requested function could not be
+ * performed. The operation should be retried at
+ * a later time." This is misleading, at least with
+ * respect to current RTAS implementations. What it
+ * usually means in practice is that the function
+ * could not be completed while meeting RTAS's
+ * deadline for returning control to the OS (250us
+ * for PAPR/PowerVM, typically), but the call may be
+ * immediately reattempted to resume work on it.
+ * * -3 - Parameter error.
+ * * -7 - Unexpected state change.
+ * * 9000...9899 - Vendor-specific success codes.
+ * * 9900...9905 - Advisory extended delay. Caller should try
+ * again after ~10^x ms has elapsed, where x is
+ * the last digit of the status [0-5]. Again going
+ * beyond the PAPR text, 990x on PowerVM indicates
+ * contention for RTAS-internal resources. Other
+ * RTAS call sequences in progress should be
+ * allowed to complete before reattempting the
+ * call.
+ * * -9000 - Multi-level isolation error.
+ * * -9999...-9004 - Vendor-specific error codes.
+ * * Additional negative values - Function-specific error.
+ * * Additional positive values - Function-specific success.
+ */
int rtas_call(int token, int nargs, int nret, int *outputs, ...)
{
+ struct pin_cookie cookie;
va_list list;
int i;
- unsigned long s;
- struct rtas_args *rtas_args;
+ unsigned long flags;
+ struct rtas_args *args;
char *buff_copy = NULL;
int ret;
if (!rtas.entry || token == RTAS_UNKNOWN_SERVICE)
return -1;
- s = lock_rtas();
- rtas_args = &rtas.args;
+ if (token_is_restricted_errinjct(token)) {
+ /*
+ * It would be nicer to not discard the error value
+ * from security_locked_down(), but callers expect an
+ * RTAS status, not an errno.
+ */
+ if (security_locked_down(LOCKDOWN_RTAS_ERROR_INJECTION))
+ return -1;
+ }
- rtas_args->token = cpu_to_be32(token);
- rtas_args->nargs = cpu_to_be32(nargs);
- rtas_args->nret = cpu_to_be32(nret);
- rtas_args->rets = &(rtas_args->args[nargs]);
- va_start(list, outputs);
- for (i = 0; i < nargs; ++i)
- rtas_args->args[i] = cpu_to_be32(va_arg(list, __u32));
- va_end(list);
+ if ((mfmsr() & (MSR_IR|MSR_DR)) != (MSR_IR|MSR_DR)) {
+ WARN_ON_ONCE(1);
+ return -1;
+ }
- for (i = 0; i < nret; ++i)
- rtas_args->rets[i] = 0;
+ raw_spin_lock_irqsave(&rtas_lock, flags);
+ cookie = lockdep_pin_lock(&rtas_lock);
- enter_rtas(__pa(rtas_args));
+ /* We use the global rtas args buffer */
+ args = &rtas_args;
+
+ va_start(list, outputs);
+ va_rtas_call_unlocked(args, token, nargs, nret, list);
+ va_end(list);
/* A -1 return code indicates that the last command couldn't
be completed due to a hardware error. */
- if (be32_to_cpu(rtas_args->rets[0]) == -1)
+ if (be32_to_cpu(args->rets[0]) == -1)
buff_copy = __fetch_rtas_last_error(NULL);
if (nret > 1 && outputs != NULL)
for (i = 0; i < nret-1; ++i)
- outputs[i] = be32_to_cpu(rtas_args->rets[i+1]);
- ret = (nret > 0)? be32_to_cpu(rtas_args->rets[0]): 0;
+ outputs[i] = be32_to_cpu(args->rets[i + 1]);
+ ret = (nret > 0) ? be32_to_cpu(args->rets[0]) : 0;
- unlock_rtas(s);
+ lockdep_unpin_lock(&rtas_lock, cookie);
+ raw_spin_unlock_irqrestore(&rtas_lock, flags);
if (buff_copy) {
log_error(buff_copy, ERR_TYPE_RTAS_LOG, 0);
- if (mem_init_done)
+ if (slab_is_available())
kfree(buff_copy);
}
return ret;
}
-EXPORT_SYMBOL(rtas_call);
+EXPORT_SYMBOL_GPL(rtas_call);
-/* For RTAS_BUSY (-2), delay for 1 millisecond. For an extended busy status
- * code of 990n, perform the hinted delay of 10^n (last digit) milliseconds.
+/**
+ * rtas_busy_delay_time() - From an RTAS status value, calculate the
+ * suggested delay time in milliseconds.
+ *
+ * @status: a value returned from rtas_call() or similar APIs which return
+ * the status of a RTAS function call.
+ *
+ * Context: Any context.
+ *
+ * Return:
+ * * 100000 - If @status is 9905.
+ * * 10000 - If @status is 9904.
+ * * 1000 - If @status is 9903.
+ * * 100 - If @status is 9902.
+ * * 10 - If @status is 9901.
+ * * 1 - If @status is either 9900 or -2. This is "wrong" for -2, but
+ * some callers depend on this behavior, and the worst outcome
+ * is that they will delay for longer than necessary.
+ * * 0 - If @status is not a busy or extended delay value.
*/
unsigned int rtas_busy_delay_time(int status)
{
@@ -478,62 +1262,162 @@ unsigned int rtas_busy_delay_time(int status)
if (status == RTAS_BUSY) {
ms = 1;
- } else if (status >= 9900 && status <= 9905) {
- order = status - 9900;
+ } else if (status >= RTAS_EXTENDED_DELAY_MIN &&
+ status <= RTAS_EXTENDED_DELAY_MAX) {
+ order = status - RTAS_EXTENDED_DELAY_MIN;
for (ms = 1; order > 0; order--)
ms *= 10;
}
return ms;
}
-EXPORT_SYMBOL(rtas_busy_delay_time);
-/* For an RTAS busy status code, perform the hinted delay. */
-unsigned int rtas_busy_delay(int status)
+/*
+ * Early boot fallback for rtas_busy_delay().
+ */
+static bool __init rtas_busy_delay_early(int status)
+{
+ static size_t successive_ext_delays __initdata;
+ bool retry;
+
+ switch (status) {
+ case RTAS_EXTENDED_DELAY_MIN...RTAS_EXTENDED_DELAY_MAX:
+ /*
+ * In the unlikely case that we receive an extended
+ * delay status in early boot, the OS is probably not
+ * the cause, and there's nothing we can do to clear
+ * the condition. Best we can do is delay for a bit
+ * and hope it's transient. Lie to the caller if it
+ * seems like we're stuck in a retry loop.
+ */
+ mdelay(1);
+ retry = true;
+ successive_ext_delays += 1;
+ if (successive_ext_delays > 1000) {
+ pr_err("too many extended delays, giving up\n");
+ dump_stack();
+ retry = false;
+ successive_ext_delays = 0;
+ }
+ break;
+ case RTAS_BUSY:
+ retry = true;
+ successive_ext_delays = 0;
+ break;
+ default:
+ retry = false;
+ successive_ext_delays = 0;
+ break;
+ }
+
+ return retry;
+}
+
+/**
+ * rtas_busy_delay() - helper for RTAS busy and extended delay statuses
+ *
+ * @status: a value returned from rtas_call() or similar APIs which return
+ * the status of a RTAS function call.
+ *
+ * Context: Process context. May sleep or schedule.
+ *
+ * Return:
+ * * true - @status is RTAS_BUSY or an extended delay hint. The
+ * caller may assume that the CPU has been yielded if necessary,
+ * and that an appropriate delay for @status has elapsed.
+ * Generally the caller should reattempt the RTAS call which
+ * yielded @status.
+ *
+ * * false - @status is not @RTAS_BUSY nor an extended delay hint. The
+ * caller is responsible for handling @status.
+ */
+bool __ref rtas_busy_delay(int status)
{
unsigned int ms;
+ bool ret;
- might_sleep();
- ms = rtas_busy_delay_time(status);
- if (ms && need_resched())
- msleep(ms);
+ /*
+ * Can't do timed sleeps before timekeeping is up.
+ */
+ if (system_state < SYSTEM_SCHEDULING)
+ return rtas_busy_delay_early(status);
+
+ switch (status) {
+ case RTAS_EXTENDED_DELAY_MIN...RTAS_EXTENDED_DELAY_MAX:
+ ret = true;
+ ms = rtas_busy_delay_time(status);
+ /*
+ * The extended delay hint can be as high as 100 seconds.
+ * Surely any function returning such a status is either
+ * buggy or isn't going to be significantly slowed by us
+ * polling at 1HZ. Clamp the sleep time to one second.
+ */
+ ms = clamp(ms, 1U, 1000U);
+ /*
+ * The delay hint is an order-of-magnitude suggestion, not a
+ * minimum. It is fine, possibly even advantageous, for us to
+ * pause for less time than hinted. To make sure pause time will
+ * not be way longer than requested independent of HZ
+ * configuration, use fsleep(). See fsleep() for details of
+ * used sleeping functions.
+ */
+ fsleep(ms * 1000);
+ break;
+ case RTAS_BUSY:
+ ret = true;
+ /*
+ * We should call again immediately if there's no other
+ * work to do.
+ */
+ cond_resched();
+ break;
+ default:
+ ret = false;
+ /*
+ * Not a busy or extended delay status; the caller should
+ * handle @status itself. Ensure we warn on misuses in
+ * atomic context regardless.
+ */
+ might_sleep();
+ break;
+ }
- return ms;
+ return ret;
}
-EXPORT_SYMBOL(rtas_busy_delay);
+EXPORT_SYMBOL_GPL(rtas_busy_delay);
-static int rtas_error_rc(int rtas_rc)
+int rtas_error_rc(int rtas_rc)
{
int rc;
switch (rtas_rc) {
- case -1: /* Hardware Error */
- rc = -EIO;
- break;
- case -3: /* Bad indicator/domain/etc */
- rc = -EINVAL;
- break;
- case -9000: /* Isolation error */
- rc = -EFAULT;
- break;
- case -9001: /* Outstanding TCE/PTE */
- rc = -EEXIST;
- break;
- case -9002: /* No usable slot */
- rc = -ENODEV;
- break;
- default:
- printk(KERN_ERR "%s: unexpected RTAS error %d\n",
- __func__, rtas_rc);
- rc = -ERANGE;
- break;
+ case RTAS_HARDWARE_ERROR: /* Hardware Error */
+ rc = -EIO;
+ break;
+ case RTAS_INVALID_PARAMETER: /* Bad indicator/domain/etc */
+ rc = -EINVAL;
+ break;
+ case -9000: /* Isolation error */
+ rc = -EFAULT;
+ break;
+ case -9001: /* Outstanding TCE/PTE */
+ rc = -EEXIST;
+ break;
+ case -9002: /* No usable slot */
+ rc = -ENODEV;
+ break;
+ default:
+ pr_err("%s: unexpected error %d\n", __func__, rtas_rc);
+ rc = -ERANGE;
+ break;
}
return rc;
}
+EXPORT_SYMBOL_GPL(rtas_error_rc);
int rtas_get_power_level(int powerdomain, int *level)
{
- int token = rtas_token("get-power-level");
+ int token = rtas_function_token(RTAS_FN_GET_POWER_LEVEL);
int rc;
if (token == RTAS_UNKNOWN_SERVICE)
@@ -546,11 +1430,11 @@ int rtas_get_power_level(int powerdomain, int *level)
return rtas_error_rc(rc);
return rc;
}
-EXPORT_SYMBOL(rtas_get_power_level);
+EXPORT_SYMBOL_GPL(rtas_get_power_level);
int rtas_set_power_level(int powerdomain, int level, int *setlevel)
{
- int token = rtas_token("set-power-level");
+ int token = rtas_function_token(RTAS_FN_SET_POWER_LEVEL);
int rc;
if (token == RTAS_UNKNOWN_SERVICE)
@@ -564,11 +1448,11 @@ int rtas_set_power_level(int powerdomain, int level, int *setlevel)
return rtas_error_rc(rc);
return rc;
}
-EXPORT_SYMBOL(rtas_set_power_level);
+EXPORT_SYMBOL_GPL(rtas_set_power_level);
int rtas_get_sensor(int sensor, int index, int *state)
{
- int token = rtas_token("get-sensor-state");
+ int token = rtas_function_token(RTAS_FN_GET_SENSOR_STATE);
int rc;
if (token == RTAS_UNKNOWN_SERVICE)
@@ -582,7 +1466,24 @@ int rtas_get_sensor(int sensor, int index, int *state)
return rtas_error_rc(rc);
return rc;
}
-EXPORT_SYMBOL(rtas_get_sensor);
+EXPORT_SYMBOL_GPL(rtas_get_sensor);
+
+int rtas_get_sensor_fast(int sensor, int index, int *state)
+{
+ int token = rtas_function_token(RTAS_FN_GET_SENSOR_STATE);
+ int rc;
+
+ if (token == RTAS_UNKNOWN_SERVICE)
+ return -ENOENT;
+
+ rc = rtas_call(token, 2, 2, state, sensor, index);
+ WARN_ON(rc == RTAS_BUSY || (rc >= RTAS_EXTENDED_DELAY_MIN &&
+ rc <= RTAS_EXTENDED_DELAY_MAX));
+
+ if (rc < 0)
+ return rtas_error_rc(rc);
+ return rc;
+}
bool rtas_indicator_present(int token, int *maxindex)
{
@@ -608,11 +1509,10 @@ bool rtas_indicator_present(int token, int *maxindex)
return false;
}
-EXPORT_SYMBOL(rtas_indicator_present);
int rtas_set_indicator(int indicator, int index, int new_value)
{
- int token = rtas_token("set-indicator");
+ int token = rtas_function_token(RTAS_FN_SET_INDICATOR);
int rc;
if (token == RTAS_UNKNOWN_SERVICE)
@@ -626,22 +1526,23 @@ int rtas_set_indicator(int indicator, int index, int new_value)
return rtas_error_rc(rc);
return rc;
}
-EXPORT_SYMBOL(rtas_set_indicator);
+EXPORT_SYMBOL_GPL(rtas_set_indicator);
/*
* Ignoring RTAS extended delay
*/
int rtas_set_indicator_fast(int indicator, int index, int new_value)
{
+ int token = rtas_function_token(RTAS_FN_SET_INDICATOR);
int rc;
- int token = rtas_token("set-indicator");
if (token == RTAS_UNKNOWN_SERVICE)
return -ENOENT;
rc = rtas_call(token, 3, 1, NULL, indicator, index, new_value);
- WARN_ON(rc == -2 || (rc >= 9900 && rc <= 9905));
+ WARN_ON(rc == RTAS_BUSY || (rc >= RTAS_EXTENDED_DELAY_MIN &&
+ rc <= RTAS_EXTENDED_DELAY_MAX));
if (rc < 0)
return rtas_error_rc(rc);
@@ -649,12 +1550,70 @@ int rtas_set_indicator_fast(int indicator, int index, int new_value)
return rc;
}
-void rtas_restart(char *cmd)
+/**
+ * rtas_ibm_suspend_me() - Call ibm,suspend-me to suspend the LPAR.
+ *
+ * @fw_status: RTAS call status will be placed here if not NULL.
+ *
+ * rtas_ibm_suspend_me() should be called only on a CPU which has
+ * received H_CONTINUE from the H_JOIN hcall. All other active CPUs
+ * should be waiting to return from H_JOIN.
+ *
+ * rtas_ibm_suspend_me() may suspend execution of the OS
+ * indefinitely. Callers should take appropriate measures upon return, such as
+ * resetting watchdog facilities.
+ *
+ * Callers may choose to retry this call if @fw_status is
+ * %RTAS_THREADS_ACTIVE.
+ *
+ * Return:
+ * 0 - The partition has resumed from suspend, possibly after
+ * migration to a different host.
+ * -ECANCELED - The operation was aborted.
+ * -EAGAIN - There were other CPUs not in H_JOIN at the time of the call.
+ * -EBUSY - Some other condition prevented the suspend from succeeding.
+ * -EIO - Hardware/platform error.
+ */
+int rtas_ibm_suspend_me(int *fw_status)
+{
+ int token = rtas_function_token(RTAS_FN_IBM_SUSPEND_ME);
+ int fwrc;
+ int ret;
+
+ fwrc = rtas_call(token, 0, 1, NULL);
+
+ switch (fwrc) {
+ case 0:
+ ret = 0;
+ break;
+ case RTAS_SUSPEND_ABORTED:
+ ret = -ECANCELED;
+ break;
+ case RTAS_THREADS_ACTIVE:
+ ret = -EAGAIN;
+ break;
+ case RTAS_NOT_SUSPENDABLE:
+ case RTAS_OUTSTANDING_COPROC:
+ ret = -EBUSY;
+ break;
+ case -1:
+ default:
+ ret = -EIO;
+ break;
+ }
+
+ if (fw_status)
+ *fw_status = fwrc;
+
+ return ret;
+}
+
+void __noreturn rtas_restart(char *cmd)
{
if (rtas_flash_term_hook)
rtas_flash_term_hook(SYS_RESTART);
- printk("RTAS system-reboot returned %d\n",
- rtas_call(rtas_token("system-reboot"), 0, 1, NULL));
+ pr_emerg("system-reboot returned %d\n",
+ rtas_call(rtas_function_token(RTAS_FN_SYSTEM_REBOOT), 0, 1, NULL));
for (;;);
}
@@ -663,26 +1622,29 @@ void rtas_power_off(void)
if (rtas_flash_term_hook)
rtas_flash_term_hook(SYS_POWER_OFF);
/* allow power on only with power button press */
- printk("RTAS power-off returned %d\n",
- rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1));
+ pr_emerg("power-off returned %d\n",
+ rtas_call(rtas_function_token(RTAS_FN_POWER_OFF), 2, 1, NULL, -1, -1));
for (;;);
}
-void rtas_halt(void)
+void __noreturn rtas_halt(void)
{
if (rtas_flash_term_hook)
rtas_flash_term_hook(SYS_HALT);
/* allow power on only with power button press */
- printk("RTAS power-off returned %d\n",
- rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1));
+ pr_emerg("power-off returned %d\n",
+ rtas_call(rtas_function_token(RTAS_FN_POWER_OFF), 2, 1, NULL, -1, -1));
for (;;);
}
/* Must be in the RMO region, so we place it here */
static char rtas_os_term_buf[2048];
+static bool ibm_extended_os_term;
void rtas_os_term(char *str)
{
+ s32 token = rtas_function_token(RTAS_FN_IBM_OS_TERM);
+ static struct rtas_args args;
int status;
/*
@@ -691,303 +1653,69 @@ void rtas_os_term(char *str)
* this property may terminate the partition which we want to avoid
* since it interferes with panic_timeout.
*/
- if (RTAS_UNKNOWN_SERVICE == rtas_token("ibm,os-term") ||
- RTAS_UNKNOWN_SERVICE == rtas_token("ibm,extended-os-term"))
+
+ if (token == RTAS_UNKNOWN_SERVICE || !ibm_extended_os_term)
return;
snprintf(rtas_os_term_buf, 2048, "OS panic: %s", str);
+ /*
+ * Keep calling as long as RTAS returns a "try again" status,
+ * but don't use rtas_busy_delay(), which potentially
+ * schedules.
+ */
do {
- status = rtas_call(rtas_token("ibm,os-term"), 1, 1, NULL,
- __pa(rtas_os_term_buf));
- } while (rtas_busy_delay(status));
+ rtas_call_unlocked(&args, token, 1, 1, NULL, __pa(rtas_os_term_buf));
+ status = be32_to_cpu(args.rets[0]);
+ } while (rtas_busy_delay_time(status));
if (status != 0)
- printk(KERN_EMERG "ibm,os-term call failed %d\n", status);
-}
-
-static int ibm_suspend_me_token = RTAS_UNKNOWN_SERVICE;
-#ifdef CONFIG_PPC_PSERIES
-static int __rtas_suspend_last_cpu(struct rtas_suspend_me_data *data, int wake_when_done)
-{
- u16 slb_size = mmu_slb_size;
- int rc = H_MULTI_THREADS_ACTIVE;
- int cpu;
-
- slb_set_size(SLB_MIN_SIZE);
- printk(KERN_DEBUG "calling ibm,suspend-me on cpu %i\n", smp_processor_id());
-
- while (rc == H_MULTI_THREADS_ACTIVE && !atomic_read(&data->done) &&
- !atomic_read(&data->error))
- rc = rtas_call(data->token, 0, 1, NULL);
-
- if (rc || atomic_read(&data->error)) {
- printk(KERN_DEBUG "ibm,suspend-me returned %d\n", rc);
- slb_set_size(slb_size);
- }
-
- if (atomic_read(&data->error))
- rc = atomic_read(&data->error);
-
- atomic_set(&data->error, rc);
- pSeries_coalesce_init();
-
- if (wake_when_done) {
- atomic_set(&data->done, 1);
-
- for_each_online_cpu(cpu)
- plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu));
- }
-
- if (atomic_dec_return(&data->working) == 0)
- complete(data->complete);
-
- return rc;
-}
-
-int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data)
-{
- atomic_inc(&data->working);
- return __rtas_suspend_last_cpu(data, 0);
-}
-
-static int __rtas_suspend_cpu(struct rtas_suspend_me_data *data, int wake_when_done)
-{
- long rc = H_SUCCESS;
- unsigned long msr_save;
- int cpu;
-
- atomic_inc(&data->working);
-
- /* really need to ensure MSR.EE is off for H_JOIN */
- msr_save = mfmsr();
- mtmsr(msr_save & ~(MSR_EE));
-
- while (rc == H_SUCCESS && !atomic_read(&data->done) && !atomic_read(&data->error))
- rc = plpar_hcall_norets(H_JOIN);
-
- mtmsr(msr_save);
-
- if (rc == H_SUCCESS) {
- /* This cpu was prodded and the suspend is complete. */
- goto out;
- } else if (rc == H_CONTINUE) {
- /* All other cpus are in H_JOIN, this cpu does
- * the suspend.
- */
- return __rtas_suspend_last_cpu(data, wake_when_done);
- } else {
- printk(KERN_ERR "H_JOIN on cpu %i failed with rc = %ld\n",
- smp_processor_id(), rc);
- atomic_set(&data->error, rc);
- }
-
- if (wake_when_done) {
- atomic_set(&data->done, 1);
-
- /* This cpu did the suspend or got an error; in either case,
- * we need to prod all other other cpus out of join state.
- * Extra prods are harmless.
- */
- for_each_online_cpu(cpu)
- plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu));
- }
-out:
- if (atomic_dec_return(&data->working) == 0)
- complete(data->complete);
- return rc;
-}
-
-int rtas_suspend_cpu(struct rtas_suspend_me_data *data)
-{
- return __rtas_suspend_cpu(data, 0);
-}
-
-static void rtas_percpu_suspend_me(void *info)
-{
- __rtas_suspend_cpu((struct rtas_suspend_me_data *)info, 1);
-}
-
-enum rtas_cpu_state {
- DOWN,
- UP,
-};
-
-#ifndef CONFIG_SMP
-static int rtas_cpu_state_change_mask(enum rtas_cpu_state state,
- cpumask_var_t cpus)
-{
- if (!cpumask_empty(cpus)) {
- cpumask_clear(cpus);
- return -EINVAL;
- } else
- return 0;
-}
-#else
-/* On return cpumask will be altered to indicate CPUs changed.
- * CPUs with states changed will be set in the mask,
- * CPUs with status unchanged will be unset in the mask. */
-static int rtas_cpu_state_change_mask(enum rtas_cpu_state state,
- cpumask_var_t cpus)
-{
- int cpu;
- int cpuret = 0;
- int ret = 0;
-
- if (cpumask_empty(cpus))
- return 0;
-
- for_each_cpu(cpu, cpus) {
- switch (state) {
- case DOWN:
- cpuret = cpu_down(cpu);
- break;
- case UP:
- cpuret = cpu_up(cpu);
- break;
- }
- if (cpuret) {
- pr_debug("%s: cpu_%s for cpu#%d returned %d.\n",
- __func__,
- ((state == UP) ? "up" : "down"),
- cpu, cpuret);
- if (!ret)
- ret = cpuret;
- if (state == UP) {
- /* clear bits for unchanged cpus, return */
- cpumask_shift_right(cpus, cpus, cpu);
- cpumask_shift_left(cpus, cpus, cpu);
- break;
- } else {
- /* clear bit for unchanged cpu, continue */
- cpumask_clear_cpu(cpu, cpus);
- }
- }
- }
-
- return ret;
-}
-#endif
-
-int rtas_online_cpus_mask(cpumask_var_t cpus)
-{
- int ret;
-
- ret = rtas_cpu_state_change_mask(UP, cpus);
-
- if (ret) {
- cpumask_var_t tmp_mask;
-
- if (!alloc_cpumask_var(&tmp_mask, GFP_TEMPORARY))
- return ret;
-
- /* Use tmp_mask to preserve cpus mask from first failure */
- cpumask_copy(tmp_mask, cpus);
- rtas_offline_cpus_mask(tmp_mask);
- free_cpumask_var(tmp_mask);
- }
-
- return ret;
-}
-EXPORT_SYMBOL(rtas_online_cpus_mask);
-
-int rtas_offline_cpus_mask(cpumask_var_t cpus)
-{
- return rtas_cpu_state_change_mask(DOWN, cpus);
+ pr_emerg("ibm,os-term call failed %d\n", status);
}
-EXPORT_SYMBOL(rtas_offline_cpus_mask);
-int rtas_ibm_suspend_me(struct rtas_args *args)
+/**
+ * rtas_activate_firmware() - Activate a new version of firmware.
+ *
+ * Context: This function may sleep.
+ *
+ * Activate a new version of partition firmware. The OS must call this
+ * after resuming from a partition hibernation or migration in order
+ * to maintain the ability to perform live firmware updates. It's not
+ * catastrophic for this method to be absent or to fail; just log the
+ * condition in that case.
+ */
+void rtas_activate_firmware(void)
{
- long state;
- long rc;
- unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
- struct rtas_suspend_me_data data;
- DECLARE_COMPLETION_ONSTACK(done);
- cpumask_var_t offline_mask;
- int cpuret;
-
- if (!rtas_service_present("ibm,suspend-me"))
- return -ENOSYS;
-
- /* Make sure the state is valid */
- rc = plpar_hcall(H_VASI_STATE, retbuf,
- ((u64)args->args[0] << 32) | args->args[1]);
-
- state = retbuf[0];
-
- if (rc) {
- printk(KERN_ERR "rtas_ibm_suspend_me: vasi_state returned %ld\n",rc);
- return rc;
- } else if (state == H_VASI_ENABLED) {
- args->args[args->nargs] = RTAS_NOT_SUSPENDABLE;
- return 0;
- } else if (state != H_VASI_SUSPENDING) {
- printk(KERN_ERR "rtas_ibm_suspend_me: vasi_state returned state %ld\n",
- state);
- args->args[args->nargs] = -1;
- return 0;
- }
+ int token = rtas_function_token(RTAS_FN_IBM_ACTIVATE_FIRMWARE);
+ int fwrc;
- if (!alloc_cpumask_var(&offline_mask, GFP_TEMPORARY))
- return -ENOMEM;
-
- atomic_set(&data.working, 0);
- atomic_set(&data.done, 0);
- atomic_set(&data.error, 0);
- data.token = rtas_token("ibm,suspend-me");
- data.complete = &done;
-
- /* All present CPUs must be online */
- cpumask_andnot(offline_mask, cpu_present_mask, cpu_online_mask);
- cpuret = rtas_online_cpus_mask(offline_mask);
- if (cpuret) {
- pr_err("%s: Could not bring present CPUs online.\n", __func__);
- atomic_set(&data.error, cpuret);
- goto out;
+ if (token == RTAS_UNKNOWN_SERVICE) {
+ pr_notice("ibm,activate-firmware method unavailable\n");
+ return;
}
- stop_topology_update();
+ mutex_lock(&rtas_ibm_activate_firmware_lock);
- /* Call function on all CPUs. One of us will make the
- * rtas call
- */
- if (on_each_cpu(rtas_percpu_suspend_me, &data, 0))
- atomic_set(&data.error, -EINVAL);
-
- wait_for_completion(&done);
-
- if (atomic_read(&data.error) != 0)
- printk(KERN_ERR "Error doing global join\n");
-
- start_topology_update();
+ do {
+ fwrc = rtas_call(token, 0, 1, NULL);
+ } while (rtas_busy_delay(fwrc));
- /* Take down CPUs not online prior to suspend */
- cpuret = rtas_offline_cpus_mask(offline_mask);
- if (cpuret)
- pr_warn("%s: Could not restore CPUs to offline state.\n",
- __func__);
+ mutex_unlock(&rtas_ibm_activate_firmware_lock);
-out:
- free_cpumask_var(offline_mask);
- return atomic_read(&data.error);
-}
-#else /* CONFIG_PPC_PSERIES */
-int rtas_ibm_suspend_me(struct rtas_args *args)
-{
- return -ENOSYS;
+ if (fwrc)
+ pr_err("ibm,activate-firmware failed (%i)\n", fwrc);
}
-#endif
/**
- * Find a specific pseries error log in an RTAS extended event log.
+ * get_pseries_errorlog() - Find a specific pseries error log in an RTAS
+ * extended event log.
* @log: RTAS error/event log
* @section_id: two character section identifier
*
- * Returns a pointer to the specified errorlog or NULL if not found.
+ * Return: A pointer to the specified errorlog or NULL if not found.
*/
-struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log,
- uint16_t section_id)
+noinstr struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log,
+ uint16_t section_id)
{
struct rtas_ext_event_log_v6 *ext_log =
(struct rtas_ext_event_log_v6 *)log->buffer;
@@ -1016,18 +1744,124 @@ struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log,
return NULL;
}
+/*
+ * The sys_rtas syscall, as originally designed, allows root to pass
+ * arbitrary physical addresses to RTAS calls. A number of RTAS calls
+ * can be abused to write to arbitrary memory and do other things that
+ * are potentially harmful to system integrity, and thus should only
+ * be used inside the kernel and not exposed to userspace.
+ *
+ * All known legitimate users of the sys_rtas syscall will only ever
+ * pass addresses that fall within the RMO buffer, and use a known
+ * subset of RTAS calls.
+ *
+ * Accordingly, we filter RTAS requests to check that the call is
+ * permitted, and that provided pointers fall within the RMO buffer.
+ * If a function is allowed to be invoked via the syscall, then its
+ * entry in the rtas_functions table points to a rtas_filter that
+ * describes its constraints, with the indexes of the parameters which
+ * are expected to contain addresses and sizes of buffers allocated
+ * inside the RMO buffer.
+ */
+
+static bool in_rmo_buf(u32 base, u32 end)
+{
+ return base >= rtas_rmo_buf &&
+ base < (rtas_rmo_buf + RTAS_USER_REGION_SIZE) &&
+ base <= end &&
+ end >= rtas_rmo_buf &&
+ end < (rtas_rmo_buf + RTAS_USER_REGION_SIZE);
+}
+
+static bool block_rtas_call(const struct rtas_function *func, int nargs,
+ struct rtas_args *args)
+{
+ const struct rtas_filter *f;
+ const bool is_platform_dump =
+ func == &rtas_function_table[RTAS_FNIDX__IBM_PLATFORM_DUMP];
+ const bool is_config_conn =
+ func == &rtas_function_table[RTAS_FNIDX__IBM_CONFIGURE_CONNECTOR];
+ u32 base, size, end;
+
+ /*
+ * Only functions with filters attached are allowed.
+ */
+ f = func->filter;
+ if (!f)
+ goto err;
+ /*
+ * And some functions aren't allowed on LE.
+ */
+ if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) && func->banned_for_syscall_on_le)
+ goto err;
+
+ if (f->buf_idx1 != -1) {
+ base = be32_to_cpu(args->args[f->buf_idx1]);
+ if (f->size_idx1 != -1)
+ size = be32_to_cpu(args->args[f->size_idx1]);
+ else if (f->fixed_size)
+ size = f->fixed_size;
+ else
+ size = 1;
+
+ end = base + size - 1;
+
+ /*
+ * Special case for ibm,platform-dump - NULL buffer
+ * address is used to indicate end of dump processing
+ */
+ if (is_platform_dump && base == 0)
+ return false;
+
+ if (!in_rmo_buf(base, end))
+ goto err;
+ }
+
+ if (f->buf_idx2 != -1) {
+ base = be32_to_cpu(args->args[f->buf_idx2]);
+ if (f->size_idx2 != -1)
+ size = be32_to_cpu(args->args[f->size_idx2]);
+ else if (f->fixed_size)
+ size = f->fixed_size;
+ else
+ size = 1;
+ end = base + size - 1;
+
+ /*
+ * Special case for ibm,configure-connector where the
+ * address can be 0
+ */
+ if (is_config_conn && base == 0)
+ return false;
+
+ if (!in_rmo_buf(base, end))
+ goto err;
+ }
+
+ return false;
+err:
+ pr_err_ratelimited("sys_rtas: RTAS call blocked - exploit attempt?\n");
+ pr_err_ratelimited("sys_rtas: %s nargs=%d (called by %s)\n",
+ func->name, nargs, current->comm);
+ return true;
+}
+
/* We assume to be passed big endian arguments */
-asmlinkage int ppc_rtas(struct rtas_args __user *uargs)
+SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
{
+ const struct rtas_function *func;
+ struct pin_cookie cookie;
struct rtas_args args;
unsigned long flags;
char *buff_copy, *errbuf = NULL;
int nargs, nret, token;
- int rc;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
+ if (!rtas.entry)
+ return -EINVAL;
+
if (copy_from_user(&args, uargs, 3 * sizeof(u32)) != 0)
return -EFAULT;
@@ -1035,44 +1869,89 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs)
nret = be32_to_cpu(args.nret);
token = be32_to_cpu(args.token);
- if (nargs > ARRAY_SIZE(args.args)
+ if (nargs >= ARRAY_SIZE(args.args)
|| nret > ARRAY_SIZE(args.args)
|| nargs + nret > ARRAY_SIZE(args.args))
return -EINVAL;
+ nargs = array_index_nospec(nargs, ARRAY_SIZE(args.args));
+ nret = array_index_nospec(nret, ARRAY_SIZE(args.args) - nargs);
+
/* Copy in args. */
if (copy_from_user(args.args, uargs->args,
nargs * sizeof(rtas_arg_t)) != 0)
return -EFAULT;
- if (token == RTAS_UNKNOWN_SERVICE)
+ /*
+ * If this token doesn't correspond to a function the kernel
+ * understands, you're not allowed to call it.
+ */
+ func = rtas_token_to_function_untrusted(token);
+ if (!func)
return -EINVAL;
args.rets = &args.args[nargs];
memset(args.rets, 0, nret * sizeof(rtas_arg_t));
+ if (block_rtas_call(func, nargs, &args))
+ return -EINVAL;
+
+ if (token_is_restricted_errinjct(token)) {
+ int err;
+
+ err = security_locked_down(LOCKDOWN_RTAS_ERROR_INJECTION);
+ if (err)
+ return err;
+ }
+
/* Need to handle ibm,suspend_me call specially */
- if (token == ibm_suspend_me_token) {
- rc = rtas_ibm_suspend_me(&args);
- if (rc)
+ if (token == rtas_function_token(RTAS_FN_IBM_SUSPEND_ME)) {
+
+ /*
+ * rtas_ibm_suspend_me assumes the streamid handle is in cpu
+ * endian, or at least the hcall within it requires it.
+ */
+ int rc = 0;
+ u64 handle = ((u64)be32_to_cpu(args.args[0]) << 32)
+ | be32_to_cpu(args.args[1]);
+ rc = rtas_syscall_dispatch_ibm_suspend_me(handle);
+ if (rc == -EAGAIN)
+ args.rets[0] = cpu_to_be32(RTAS_NOT_SUSPENDABLE);
+ else if (rc == -EIO)
+ args.rets[0] = cpu_to_be32(-1);
+ else if (rc)
return rc;
goto copy_return;
}
buff_copy = get_errorlog_buffer();
- flags = lock_rtas();
+ /*
+ * If this function has a mutex assigned to it, we must
+ * acquire it to avoid interleaving with any kernel-based uses
+ * of the same function. Kernel-based sequences acquire the
+ * appropriate mutex explicitly.
+ */
+ if (func->lock)
+ mutex_lock(func->lock);
- rtas.args = args;
- enter_rtas(__pa(&rtas.args));
- args = rtas.args;
+ raw_spin_lock_irqsave(&rtas_lock, flags);
+ cookie = lockdep_pin_lock(&rtas_lock);
+
+ rtas_args = args;
+ do_enter_rtas(&rtas_args);
+ args = rtas_args;
/* A -1 return code indicates that the last command couldn't
be completed due to a hardware error. */
if (be32_to_cpu(args.rets[0]) == -1)
errbuf = __fetch_rtas_last_error(buff_copy);
- unlock_rtas(flags);
+ lockdep_unpin_lock(&rtas_lock, cookie);
+ raw_spin_unlock_irqrestore(&rtas_lock, flags);
+
+ if (func->lock)
+ mutex_unlock(func->lock);
if (buff_copy) {
if (errbuf)
@@ -1090,53 +1969,110 @@ asmlinkage int ppc_rtas(struct rtas_args __user *uargs)
return 0;
}
+static void __init rtas_function_table_init(void)
+{
+ struct property *prop;
+
+ for (size_t i = 0; i < ARRAY_SIZE(rtas_function_table); ++i) {
+ struct rtas_function *curr = &rtas_function_table[i];
+ struct rtas_function *prior;
+ int cmp;
+
+ curr->token = RTAS_UNKNOWN_SERVICE;
+
+ if (i == 0)
+ continue;
+ /*
+ * Ensure table is sorted correctly for binary search
+ * on function names.
+ */
+ prior = &rtas_function_table[i - 1];
+
+ cmp = strcmp(prior->name, curr->name);
+ if (cmp < 0)
+ continue;
+
+ if (cmp == 0) {
+ pr_err("'%s' has duplicate function table entries\n",
+ curr->name);
+ } else {
+ pr_err("function table unsorted: '%s' wrongly precedes '%s'\n",
+ prior->name, curr->name);
+ }
+ }
+
+ for_each_property_of_node(rtas.dev, prop) {
+ struct rtas_function *func;
+
+ if (prop->length != sizeof(u32))
+ continue;
+
+ func = __rtas_name_to_function(prop->name);
+ if (!func)
+ continue;
+
+ func->token = be32_to_cpup((__be32 *)prop->value);
+
+ pr_debug("function %s has token %u\n", func->name, func->token);
+ }
+}
+
/*
- * Call early during boot, before mem init or bootmem, to retrieve the RTAS
- * informations from the device-tree and allocate the RMO buffer for userland
+ * Call early during boot, before mem init, to retrieve the RTAS
+ * information from the device-tree and allocate the RMO buffer for userland
* accesses.
*/
void __init rtas_initialize(void)
{
unsigned long rtas_region = RTAS_INSTANTIATE_MAX;
+ u32 base, size, entry;
+ int no_base, no_size, no_entry;
/* Get RTAS dev node and fill up our "rtas" structure with infos
* about it.
*/
rtas.dev = of_find_node_by_name(NULL, "rtas");
- if (rtas.dev) {
- const __be32 *basep, *entryp, *sizep;
-
- basep = of_get_property(rtas.dev, "linux,rtas-base", NULL);
- sizep = of_get_property(rtas.dev, "rtas-size", NULL);
- if (basep != NULL && sizep != NULL) {
- rtas.base = __be32_to_cpu(*basep);
- rtas.size = __be32_to_cpu(*sizep);
- entryp = of_get_property(rtas.dev,
- "linux,rtas-entry", NULL);
- if (entryp == NULL) /* Ugh */
- rtas.entry = rtas.base;
- else
- rtas.entry = __be32_to_cpu(*entryp);
- } else
- rtas.dev = NULL;
- }
if (!rtas.dev)
return;
+ no_base = of_property_read_u32(rtas.dev, "linux,rtas-base", &base);
+ no_size = of_property_read_u32(rtas.dev, "rtas-size", &size);
+ if (no_base || no_size) {
+ of_node_put(rtas.dev);
+ rtas.dev = NULL;
+ return;
+ }
+
+ rtas.base = base;
+ rtas.size = size;
+ no_entry = of_property_read_u32(rtas.dev, "linux,rtas-entry", &entry);
+ rtas.entry = no_entry ? rtas.base : entry;
+
+ init_error_log_max();
+
+ /* Must be called before any function token lookups */
+ rtas_function_table_init();
+
+ /*
+ * Discover this now to avoid a device tree lookup in the
+ * panic path.
+ */
+ ibm_extended_os_term = of_property_read_bool(rtas.dev, "ibm,extended-os-term");
+
/* If RTAS was found, allocate the RMO buffer for it and look for
* the stop-self token if any
*/
#ifdef CONFIG_PPC64
- if (machine_is(pseries) && firmware_has_feature(FW_FEATURE_LPAR)) {
+ if (firmware_has_feature(FW_FEATURE_LPAR))
rtas_region = min(ppc64_rma_size, RTAS_INSTANTIATE_MAX);
- ibm_suspend_me_token = rtas_token("ibm,suspend-me");
- }
#endif
- rtas_rmo_buf = memblock_alloc_base(RTAS_RMOBUF_MAX, PAGE_SIZE, rtas_region);
+ rtas_rmo_buf = memblock_phys_alloc_range(RTAS_USER_REGION_SIZE, PAGE_SIZE,
+ 0, rtas_region);
+ if (!rtas_rmo_buf)
+ panic("ERROR: RTAS: Failed to allocate %lx bytes below %pa\n",
+ PAGE_SIZE, &rtas_region);
-#ifdef CONFIG_RTAS_ERROR_LOGGING
- rtas_last_error_token = rtas_token("rtas-last-error");
-#endif
+ rtas_work_area_reserve_arena(rtas_region);
}
int __init early_init_dt_scan_rtas(unsigned long node,
@@ -1151,48 +2087,38 @@ int __init early_init_dt_scan_rtas(unsigned long node,
entryp = of_get_flat_dt_prop(node, "linux,rtas-entry", NULL);
sizep = of_get_flat_dt_prop(node, "rtas-size", NULL);
+#ifdef CONFIG_PPC64
+ /* need this feature to decide the crashkernel offset */
+ if (of_get_flat_dt_prop(node, "ibm,hypertas-functions", NULL))
+ powerpc_firmware_features |= FW_FEATURE_LPAR;
+#endif
+
if (basep && entryp && sizep) {
rtas.base = *basep;
rtas.entry = *entryp;
rtas.size = *sizep;
}
-#ifdef CONFIG_UDBG_RTAS_CONSOLE
- basep = of_get_flat_dt_prop(node, "put-term-char", NULL);
- if (basep)
- rtas_putchar_token = *basep;
-
- basep = of_get_flat_dt_prop(node, "get-term-char", NULL);
- if (basep)
- rtas_getchar_token = *basep;
-
- if (rtas_putchar_token != RTAS_UNKNOWN_SERVICE &&
- rtas_getchar_token != RTAS_UNKNOWN_SERVICE)
- udbg_init_rtas_console();
-
-#endif
-
/* break now */
return 1;
}
-static arch_spinlock_t timebase_lock;
+static DEFINE_RAW_SPINLOCK(timebase_lock);
static u64 timebase = 0;
void rtas_give_timebase(void)
{
unsigned long flags;
- local_irq_save(flags);
+ raw_spin_lock_irqsave(&timebase_lock, flags);
hard_irq_disable();
- arch_spin_lock(&timebase_lock);
- rtas_call(rtas_token("freeze-time-base"), 0, 1, NULL);
+ rtas_call(rtas_function_token(RTAS_FN_FREEZE_TIME_BASE), 0, 1, NULL);
timebase = get_tb();
- arch_spin_unlock(&timebase_lock);
+ raw_spin_unlock(&timebase_lock);
while (timebase)
barrier();
- rtas_call(rtas_token("thaw-time-base"), 0, 1, NULL);
+ rtas_call(rtas_function_token(RTAS_FN_THAW_TIME_BASE), 0, 1, NULL);
local_irq_restore(flags);
}
@@ -1200,8 +2126,8 @@ void rtas_take_timebase(void)
{
while (!timebase)
barrier();
- arch_spin_lock(&timebase_lock);
+ raw_spin_lock(&timebase_lock);
set_tb(timebase >> 32, timebase & 0xffffffff);
timebase = 0;
- arch_spin_unlock(&timebase_lock);
+ raw_spin_unlock(&timebase_lock);
}
diff --git a/arch/powerpc/kernel/rtas_entry.S b/arch/powerpc/kernel/rtas_entry.S
new file mode 100644
index 000000000000..6ce95ddadbcd
--- /dev/null
+++ b/arch/powerpc/kernel/rtas_entry.S
@@ -0,0 +1,176 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <asm/asm-offsets.h>
+#include <asm/bug.h>
+#include <asm/page.h>
+#include <asm/ppc_asm.h>
+
+/*
+ * RTAS is called with MSR IR, DR, EE disabled, and LR in the return address.
+ *
+ * Note: r3 is an input parameter to rtas, so don't trash it...
+ */
+
+#ifdef CONFIG_PPC32
+_GLOBAL(enter_rtas)
+ stwu r1,-INT_FRAME_SIZE(r1)
+ mflr r0
+ stw r0,INT_FRAME_SIZE+4(r1)
+ LOAD_REG_ADDR(r4, rtas)
+ lis r6,1f@ha /* physical return address for rtas */
+ addi r6,r6,1f@l
+ tophys(r6,r6)
+ lwz r8,RTASENTRY(r4)
+ lwz r4,RTASBASE(r4)
+ mfmsr r9
+ stw r9,8(r1)
+ li r9,MSR_KERNEL & ~(MSR_IR|MSR_DR)
+ mtlr r6
+ stw r1, THREAD + RTAS_SP(r2)
+ mtspr SPRN_SRR0,r8
+ mtspr SPRN_SRR1,r9
+ rfi
+1:
+ lis r8, 1f@h
+ ori r8, r8, 1f@l
+ LOAD_REG_IMMEDIATE(r9,MSR_KERNEL)
+ mtspr SPRN_SRR0,r8
+ mtspr SPRN_SRR1,r9
+ rfi /* Reactivate MMU translation */
+1:
+ lwz r8,INT_FRAME_SIZE+4(r1) /* get return address */
+ lwz r9,8(r1) /* original msr value */
+ addi r1,r1,INT_FRAME_SIZE
+ li r0,0
+ stw r0, THREAD + RTAS_SP(r2)
+ mtlr r8
+ mtmsr r9
+ blr /* return to caller */
+_ASM_NOKPROBE_SYMBOL(enter_rtas)
+
+#else /* CONFIG_PPC32 */
+#include <asm/exception-64s.h>
+
+/*
+ * 32-bit rtas on 64-bit machines has the additional problem that RTAS may
+ * not preserve the upper parts of registers it uses.
+ */
+_GLOBAL(enter_rtas)
+ mflr r0
+ std r0,16(r1)
+ stdu r1,-SWITCH_FRAME_SIZE(r1) /* Save SP and create stack space. */
+
+ /* Because RTAS is running in 32b mode, it clobbers the high order half
+ * of all registers that it saves. We therefore save those registers
+ * RTAS might touch to the stack. (r0, r3-r12 are caller saved)
+ */
+ SAVE_GPR(2, r1) /* Save the TOC */
+ SAVE_NVGPRS(r1) /* Save the non-volatiles */
+
+ mfcr r4
+ std r4,_CCR(r1)
+ mfctr r5
+ std r5,_CTR(r1)
+ mfspr r6,SPRN_XER
+ std r6,_XER(r1)
+ mfdar r7
+ std r7,_DAR(r1)
+ mfdsisr r8
+ std r8,_DSISR(r1)
+
+ /* Temporary workaround to clear CR until RTAS can be modified to
+ * ignore all bits.
+ */
+ li r0,0
+ mtcr r0
+
+ mfmsr r6
+
+ /* Unfortunately, the stack pointer and the MSR are also clobbered,
+ * so they are saved in the PACA which allows us to restore
+ * our original state after RTAS returns.
+ */
+ std r1,PACAR1(r13)
+ std r6,PACASAVEDMSR(r13)
+
+ /* Setup our real return addr */
+ LOAD_REG_ADDR(r4,rtas_return_loc)
+ clrldi r4,r4,2 /* convert to realmode address */
+ mtlr r4
+
+__enter_rtas:
+ LOAD_REG_ADDR(r4, rtas)
+ ld r5,RTASENTRY(r4) /* get the rtas->entry value */
+ ld r4,RTASBASE(r4) /* get the rtas->base value */
+
+ /*
+ * RTAS runs in 32-bit big endian real mode, but leave MSR[RI] on as we
+ * may hit NMI (SRESET or MCE) while in RTAS. RTAS should disable RI in
+ * its critical regions (as specified in PAPR+ section 7.2.1). MSR[S]
+ * is not impacted by RFI_TO_KERNEL (only urfid can unset it). So if
+ * MSR[S] is set, it will remain when entering RTAS.
+ * If we're in HV mode, RTAS must also run in HV mode, so extract MSR_HV
+ * from the saved MSR value and insert into the value RTAS will use.
+ */
+ extrdi r0, r6, 1, 63 - MSR_HV_LG
+ LOAD_REG_IMMEDIATE(r6, MSR_ME | MSR_RI)
+ insrdi r6, r0, 1, 63 - MSR_HV_LG
+
+ li r0,0
+ mtmsrd r0,1 /* disable RI before using SRR0/1 */
+
+ mtspr SPRN_SRR0,r5
+ mtspr SPRN_SRR1,r6
+ RFI_TO_KERNEL
+ b . /* prevent speculative execution */
+rtas_return_loc:
+ FIXUP_ENDIAN
+
+ /* Set SF before anything. */
+ LOAD_REG_IMMEDIATE(r6, MSR_KERNEL & ~(MSR_IR|MSR_DR))
+ mtmsrd r6
+
+ /* relocation is off at this point */
+ GET_PACA(r13)
+
+ bcl 20,31,$+4
+0: mflr r3
+ ld r3,(1f-0b)(r3) /* get &rtas_restore_regs */
+
+ ld r1,PACAR1(r13) /* Restore our SP */
+ ld r4,PACASAVEDMSR(r13) /* Restore our MSR */
+
+ mtspr SPRN_SRR0,r3
+ mtspr SPRN_SRR1,r4
+ RFI_TO_KERNEL
+ b . /* prevent speculative execution */
+_ASM_NOKPROBE_SYMBOL(enter_rtas)
+_ASM_NOKPROBE_SYMBOL(__enter_rtas)
+_ASM_NOKPROBE_SYMBOL(rtas_return_loc)
+
+ .align 3
+1: .8byte rtas_restore_regs
+
+rtas_restore_regs:
+ /* relocation is on at this point */
+ REST_GPR(2, r1) /* Restore the TOC */
+ REST_NVGPRS(r1) /* Restore the non-volatiles */
+
+ ld r4,_CCR(r1)
+ mtcr r4
+ ld r5,_CTR(r1)
+ mtctr r5
+ ld r6,_XER(r1)
+ mtspr SPRN_XER,r6
+ ld r7,_DAR(r1)
+ mtdar r7
+ ld r8,_DSISR(r1)
+ mtdsisr r8
+
+ addi r1,r1,SWITCH_FRAME_SIZE /* Unstack our frame */
+ ld r0,16(r1) /* get return address */
+
+ mtlr r0
+ blr /* return to caller */
+
+#endif /* CONFIG_PPC32 */
diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c
index db2b482af658..583dc16e9d3c 100644
--- a/arch/powerpc/kernel/rtas_flash.c
+++ b/arch/powerpc/kernel/rtas_flash.c
@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* c 2001 PPC 64 Team, IBM Corp
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* /proc/powerpc/rtas/firmware_flash interface
*
* This file implements a firmware_flash interface to pump a firmware
@@ -19,7 +15,7 @@
#include <linux/proc_fs.h>
#include <linux/reboot.h>
#include <asm/delay.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/rtas.h>
#define MODULE_VERS "1.0"
@@ -124,7 +120,7 @@ static struct kmem_cache *flash_block_cache = NULL;
/*
* Local copy of the flash block list.
*
- * The rtas_firmware_flash_list varable will be
+ * The rtas_firmware_flash_list variable will be
* set once the data is fully read.
*
* For convenience as we build the list we use virtual addrs,
@@ -316,13 +312,13 @@ static ssize_t rtas_flash_write(struct file *file, const char __user *buffer,
{
struct rtas_update_flash_t *const uf = &rtas_update_flash_data;
char *p;
- int next_free, rc;
+ int next_free;
struct flash_block_list *fl;
- mutex_lock(&rtas_update_flash_mutex);
+ guard(mutex)(&rtas_update_flash_mutex);
if (uf->status == FLASH_AUTH || count == 0)
- goto out; /* discard data */
+ return count; /* discard data */
/* In the case that the image is not ready for flashing, the memory
* allocated for the block list will be freed upon the release of the
@@ -331,7 +327,7 @@ static ssize_t rtas_flash_write(struct file *file, const char __user *buffer,
if (uf->flist == NULL) {
uf->flist = kmem_cache_zalloc(flash_block_cache, GFP_KERNEL);
if (!uf->flist)
- goto nomem;
+ return -ENOMEM;
}
fl = uf->flist;
@@ -342,7 +338,7 @@ static ssize_t rtas_flash_write(struct file *file, const char __user *buffer,
/* Need to allocate another block_list */
fl->next = kmem_cache_zalloc(flash_block_cache, GFP_KERNEL);
if (!fl->next)
- goto nomem;
+ return -ENOMEM;
fl = fl->next;
next_free = 0;
}
@@ -351,25 +347,17 @@ static ssize_t rtas_flash_write(struct file *file, const char __user *buffer,
count = RTAS_BLK_SIZE;
p = kmem_cache_zalloc(flash_block_cache, GFP_KERNEL);
if (!p)
- goto nomem;
+ return -ENOMEM;
if(copy_from_user(p, buffer, count)) {
kmem_cache_free(flash_block_cache, p);
- rc = -EFAULT;
- goto error;
+ return -EFAULT;
}
fl->blocks[next_free].data = p;
fl->blocks[next_free].length = count;
fl->num_blocks++;
-out:
- mutex_unlock(&rtas_update_flash_mutex);
- return count;
-nomem:
- rc = -ENOMEM;
-error:
- mutex_unlock(&rtas_update_flash_mutex);
- return rc;
+ return count;
}
/*
@@ -380,7 +368,7 @@ static void manage_flash(struct rtas_manage_flash_t *args_buf, unsigned int op)
s32 rc;
do {
- rc = rtas_call(rtas_token("ibm,manage-flash-image"), 1, 1,
+ rc = rtas_call(rtas_function_token(RTAS_FN_IBM_MANAGE_FLASH_IMAGE), 1, 1,
NULL, op);
} while (rtas_busy_delay(rc));
@@ -409,19 +397,18 @@ static ssize_t manage_flash_write(struct file *file, const char __user *buf,
static const char reject_str[] = "0";
static const char commit_str[] = "1";
char stkbuf[10];
- int op, rc;
+ int op;
- mutex_lock(&rtas_manage_flash_mutex);
+ guard(mutex)(&rtas_manage_flash_mutex);
if ((args_buf->status == MANAGE_AUTH) || (count == 0))
- goto out;
+ return count;
op = -1;
if (buf) {
if (count > 9) count = 9;
- rc = -EFAULT;
if (copy_from_user (stkbuf, buf, count))
- goto error;
+ return -EFAULT;
if (strncmp(stkbuf, reject_str, strlen(reject_str)) == 0)
op = RTAS_REJECT_TMP_IMG;
else if (strncmp(stkbuf, commit_str, strlen(commit_str)) == 0)
@@ -429,18 +416,11 @@ static ssize_t manage_flash_write(struct file *file, const char __user *buf,
}
if (op == -1) { /* buf is empty, or contains invalid string */
- rc = -EINVAL;
- goto error;
+ return -EINVAL;
}
manage_flash(args_buf, op);
-out:
- mutex_unlock(&rtas_manage_flash_mutex);
return count;
-
-error:
- mutex_unlock(&rtas_manage_flash_mutex);
- return rc;
}
/*
@@ -448,7 +428,7 @@ error:
*/
static void validate_flash(struct rtas_validate_flash_t *args_buf)
{
- int token = rtas_token("ibm,validate-flash-image");
+ int token = rtas_function_token(RTAS_FN_IBM_VALIDATE_FLASH_IMAGE);
int update_results;
s32 rc;
@@ -503,16 +483,14 @@ static ssize_t validate_flash_write(struct file *file, const char __user *buf,
{
struct rtas_validate_flash_t *const args_buf =
&rtas_validate_flash_data;
- int rc;
- mutex_lock(&rtas_validate_flash_mutex);
+ guard(mutex)(&rtas_validate_flash_mutex);
/* We are only interested in the first 4K of the
* candidate image */
if ((*off >= VALIDATE_BUF_SIZE) ||
(args_buf->status == VALIDATE_AUTH)) {
*off += count;
- mutex_unlock(&rtas_validate_flash_mutex);
return count;
}
@@ -523,20 +501,14 @@ static ssize_t validate_flash_write(struct file *file, const char __user *buf,
args_buf->status = VALIDATE_INCOMPLETE;
}
- if (!access_ok(VERIFY_READ, buf, count)) {
- rc = -EFAULT;
- goto done;
- }
- if (copy_from_user(args_buf->buf + *off, buf, count)) {
- rc = -EFAULT;
- goto done;
- }
+ if (!access_ok(buf, count))
+ return -EFAULT;
+
+ if (copy_from_user(args_buf->buf + *off, buf, count))
+ return -EFAULT;
*off += count;
- rc = count;
-done:
- mutex_unlock(&rtas_validate_flash_mutex);
- return rc;
+ return count;
}
static int validate_flash_release(struct inode *inode, struct file *file)
@@ -574,7 +546,7 @@ static void rtas_flash_firmware(int reboot_type)
return;
}
- update_token = rtas_token("ibm,update-flash-64-and-reboot");
+ update_token = rtas_function_token(RTAS_FN_IBM_UPDATE_FLASH_64_AND_REBOOT);
if (update_token == RTAS_UNKNOWN_SERVICE) {
printk(KERN_ALERT "FLASH: ibm,update-flash-64-and-reboot "
"is not available -- not a service partition?\n");
@@ -657,46 +629,46 @@ static void rtas_flash_firmware(int reboot_type)
*/
struct rtas_flash_file {
const char *filename;
- const char *rtas_call_name;
+ const rtas_fn_handle_t handle;
int *status;
- const struct file_operations fops;
+ const struct proc_ops ops;
};
static const struct rtas_flash_file rtas_flash_files[] = {
{
.filename = "powerpc/rtas/" FIRMWARE_FLASH_NAME,
- .rtas_call_name = "ibm,update-flash-64-and-reboot",
+ .handle = RTAS_FN_IBM_UPDATE_FLASH_64_AND_REBOOT,
.status = &rtas_update_flash_data.status,
- .fops.read = rtas_flash_read_msg,
- .fops.write = rtas_flash_write,
- .fops.release = rtas_flash_release,
- .fops.llseek = default_llseek,
+ .ops.proc_read = rtas_flash_read_msg,
+ .ops.proc_write = rtas_flash_write,
+ .ops.proc_release = rtas_flash_release,
+ .ops.proc_lseek = default_llseek,
},
{
.filename = "powerpc/rtas/" FIRMWARE_UPDATE_NAME,
- .rtas_call_name = "ibm,update-flash-64-and-reboot",
+ .handle = RTAS_FN_IBM_UPDATE_FLASH_64_AND_REBOOT,
.status = &rtas_update_flash_data.status,
- .fops.read = rtas_flash_read_num,
- .fops.write = rtas_flash_write,
- .fops.release = rtas_flash_release,
- .fops.llseek = default_llseek,
+ .ops.proc_read = rtas_flash_read_num,
+ .ops.proc_write = rtas_flash_write,
+ .ops.proc_release = rtas_flash_release,
+ .ops.proc_lseek = default_llseek,
},
{
.filename = "powerpc/rtas/" VALIDATE_FLASH_NAME,
- .rtas_call_name = "ibm,validate-flash-image",
+ .handle = RTAS_FN_IBM_VALIDATE_FLASH_IMAGE,
.status = &rtas_validate_flash_data.status,
- .fops.read = validate_flash_read,
- .fops.write = validate_flash_write,
- .fops.release = validate_flash_release,
- .fops.llseek = default_llseek,
+ .ops.proc_read = validate_flash_read,
+ .ops.proc_write = validate_flash_write,
+ .ops.proc_release = validate_flash_release,
+ .ops.proc_lseek = default_llseek,
},
{
.filename = "powerpc/rtas/" MANAGE_FLASH_NAME,
- .rtas_call_name = "ibm,manage-flash-image",
+ .handle = RTAS_FN_IBM_MANAGE_FLASH_IMAGE,
.status = &rtas_manage_flash_data.status,
- .fops.read = manage_flash_read,
- .fops.write = manage_flash_write,
- .fops.llseek = default_llseek,
+ .ops.proc_read = manage_flash_read,
+ .ops.proc_write = manage_flash_write,
+ .ops.proc_lseek = default_llseek,
}
};
@@ -704,8 +676,7 @@ static int __init rtas_flash_init(void)
{
int i;
- if (rtas_token("ibm,update-flash-64-and-reboot") ==
- RTAS_UNKNOWN_SERVICE) {
+ if (rtas_function_token(RTAS_FN_IBM_UPDATE_FLASH_64_AND_REBOOT) == RTAS_UNKNOWN_SERVICE) {
pr_info("rtas_flash: no firmware flash support\n");
return -EINVAL;
}
@@ -714,9 +685,9 @@ static int __init rtas_flash_init(void)
if (!rtas_validate_flash_data.buf)
return -ENOMEM;
- flash_block_cache = kmem_cache_create("rtas_flash_cache",
- RTAS_BLK_SIZE, RTAS_BLK_SIZE, 0,
- NULL);
+ flash_block_cache = kmem_cache_create_usercopy("rtas_flash_cache",
+ RTAS_BLK_SIZE, RTAS_BLK_SIZE,
+ 0, 0, RTAS_BLK_SIZE, NULL);
if (!flash_block_cache) {
printk(KERN_ERR "%s: failed to create block cache\n",
__func__);
@@ -727,14 +698,14 @@ static int __init rtas_flash_init(void)
const struct rtas_flash_file *f = &rtas_flash_files[i];
int token;
- if (!proc_create(f->filename, S_IRUSR | S_IWUSR, NULL, &f->fops))
+ if (!proc_create(f->filename, 0600, NULL, &f->ops))
goto enomem;
/*
* This code assumes that the status int is the first member of the
* struct
*/
- token = rtas_token(f->rtas_call_name);
+ token = rtas_function_token(f->handle);
if (token == RTAS_UNKNOWN_SERVICE)
*f->status = FLASH_AUTH;
else
@@ -778,4 +749,5 @@ static void __exit rtas_flash_cleanup(void)
module_init(rtas_flash_init);
module_exit(rtas_flash_cleanup);
+MODULE_DESCRIPTION("PPC procfs firmware flash interface");
MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/kernel/rtas_pci.c b/arch/powerpc/kernel/rtas_pci.c
index c168337aef9d..fccf96e897f6 100644
--- a/arch/powerpc/kernel/rtas_pci.c
+++ b/arch/powerpc/kernel/rtas_pci.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2001 Dave Engebretsen, IBM Corporation
* Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
@@ -5,20 +6,6 @@
* RTAS specific routines for PCI.
*
* Based on code from pci.c, chrp_pci.c and pSeries_pci.c
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/kernel.h>
@@ -26,12 +13,12 @@
#include <linux/pci.h>
#include <linux/string.h>
#include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/pgtable.h>
+#include <linux/of_address.h>
+#include <linux/of_fdt.h>
#include <asm/io.h>
-#include <asm/pgtable.h>
#include <asm/irq.h>
-#include <asm/prom.h>
#include <asm/machdep.h>
#include <asm/pci-bridge.h>
#include <asm/iommu.h>
@@ -56,7 +43,7 @@ static inline int config_access_valid(struct pci_dn *dn, int where)
return 0;
}
-int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val)
+int rtas_pci_dn_read_config(struct pci_dn *pdn, int where, int size, u32 *val)
{
int returnval = -1;
unsigned long buid, addr;
@@ -66,6 +53,11 @@ int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val)
return PCIBIOS_DEVICE_NOT_FOUND;
if (!config_access_valid(pdn, where))
return PCIBIOS_BAD_REGISTER_NUMBER;
+#ifdef CONFIG_EEH
+ if (pdn->edev && pdn->edev->pe &&
+ (pdn->edev->pe->state & EEH_PE_CFG_BLOCKED))
+ return PCIBIOS_SET_FAILED;
+#endif
addr = rtas_config_addr(pdn->busno, pdn->devfn, where);
buid = pdn->phb->buid;
@@ -87,43 +79,23 @@ static int rtas_pci_read_config(struct pci_bus *bus,
unsigned int devfn,
int where, int size, u32 *val)
{
- struct device_node *busdn, *dn;
struct pci_dn *pdn;
- bool found = false;
-#ifdef CONFIG_EEH
- struct eeh_dev *edev;
-#endif
int ret;
- /* Search only direct children of the bus */
*val = 0xFFFFFFFF;
- busdn = pci_bus_to_OF_node(bus);
- for (dn = busdn->child; dn; dn = dn->sibling) {
- pdn = PCI_DN(dn);
- if (pdn && pdn->devfn == devfn
- && of_device_is_available(dn)) {
- found = true;
- break;
- }
- }
- if (!found)
- return PCIBIOS_DEVICE_NOT_FOUND;
-#ifdef CONFIG_EEH
- edev = of_node_to_eeh_dev(dn);
- if (edev && edev->pe && edev->pe->state & EEH_PE_RESET)
- return PCIBIOS_DEVICE_NOT_FOUND;
-#endif
+ pdn = pci_get_pdn_by_devfn(bus, devfn);
- ret = rtas_read_config(pdn, where, size, val);
+ /* Validity of pdn is checked in here */
+ ret = rtas_pci_dn_read_config(pdn, where, size, val);
if (*val == EEH_IO_ERROR_VALUE(size) &&
- eeh_dev_check_failure(of_node_to_eeh_dev(dn)))
+ eeh_dev_check_failure(pdn_to_eeh_dev(pdn)))
return PCIBIOS_DEVICE_NOT_FOUND;
return ret;
}
-int rtas_write_config(struct pci_dn *pdn, int where, int size, u32 val)
+int rtas_pci_dn_write_config(struct pci_dn *pdn, int where, int size, u32 val)
{
unsigned long buid, addr;
int ret;
@@ -132,6 +104,11 @@ int rtas_write_config(struct pci_dn *pdn, int where, int size, u32 val)
return PCIBIOS_DEVICE_NOT_FOUND;
if (!config_access_valid(pdn, where))
return PCIBIOS_BAD_REGISTER_NUMBER;
+#ifdef CONFIG_EEH
+ if (pdn->edev && pdn->edev->pe &&
+ (pdn->edev->pe->state & EEH_PE_CFG_BLOCKED))
+ return PCIBIOS_SET_FAILED;
+#endif
addr = rtas_config_addr(pdn->busno, pdn->devfn, where);
buid = pdn->phb->buid;
@@ -152,35 +129,12 @@ static int rtas_pci_write_config(struct pci_bus *bus,
unsigned int devfn,
int where, int size, u32 val)
{
- struct device_node *busdn, *dn;
struct pci_dn *pdn;
- bool found = false;
-#ifdef CONFIG_EEH
- struct eeh_dev *edev;
-#endif
- int ret;
- /* Search only direct children of the bus */
- busdn = pci_bus_to_OF_node(bus);
- for (dn = busdn->child; dn; dn = dn->sibling) {
- pdn = PCI_DN(dn);
- if (pdn && pdn->devfn == devfn
- && of_device_is_available(dn)) {
- found = true;
- break;
- }
- }
-
- if (!found)
- return PCIBIOS_DEVICE_NOT_FOUND;
-#ifdef CONFIG_EEH
- edev = of_node_to_eeh_dev(dn);
- if (edev && edev->pe && (edev->pe->state & EEH_PE_RESET))
- return PCIBIOS_DEVICE_NOT_FOUND;
-#endif
- ret = rtas_write_config(pdn, where, size, val);
+ pdn = pci_get_pdn_by_devfn(bus, devfn);
- return ret;
+ /* Validity of pdn is checked in here. */
+ return rtas_pci_dn_write_config(pdn, where, size, val);
}
static struct pci_ops rtas_pci_ops = {
@@ -237,10 +191,10 @@ static void python_countermeasures(struct device_node *dev)
void __init init_pci_config_tokens(void)
{
- read_pci_config = rtas_token("read-pci-config");
- write_pci_config = rtas_token("write-pci-config");
- ibm_read_pci_config = rtas_token("ibm,read-pci-config");
- ibm_write_pci_config = rtas_token("ibm,write-pci-config");
+ read_pci_config = rtas_function_token(RTAS_FN_READ_PCI_CONFIG);
+ write_pci_config = rtas_function_token(RTAS_FN_WRITE_PCI_CONFIG);
+ ibm_read_pci_config = rtas_function_token(RTAS_FN_IBM_READ_PCI_CONFIG);
+ ibm_write_pci_config = rtas_function_token(RTAS_FN_IBM_WRITE_PCI_CONFIG);
}
unsigned long get_phb_buid(struct device_node *phb)
@@ -286,50 +240,3 @@ int rtas_setup_phb(struct pci_controller *phb)
return 0;
}
-
-void __init find_and_init_phbs(void)
-{
- struct device_node *node;
- struct pci_controller *phb;
- struct device_node *root = of_find_node_by_path("/");
-
- for_each_child_of_node(root, node) {
- if (node->type == NULL || (strcmp(node->type, "pci") != 0 &&
- strcmp(node->type, "pciex") != 0))
- continue;
-
- phb = pcibios_alloc_controller(node);
- if (!phb)
- continue;
- rtas_setup_phb(phb);
- pci_process_bridge_OF_ranges(phb, node, 0);
- isa_bridge_find_early(phb);
- }
-
- of_node_put(root);
- pci_devs_phb_init();
-
- /*
- * PCI_PROBE_ONLY and PCI_REASSIGN_ALL_BUS can be set via properties
- * in chosen.
- */
- if (of_chosen) {
- const int *prop;
-
- prop = of_get_property(of_chosen,
- "linux,pci-probe-only", NULL);
- if (prop) {
- if (*prop)
- pci_add_flags(PCI_PROBE_ONLY);
- else
- pci_clear_flags(PCI_PROBE_ONLY);
- }
-
-#ifdef CONFIG_PPC32 /* Will be made generic soon */
- prop = of_get_property(of_chosen,
- "linux,pci-assign-all-buses", NULL);
- if (prop && *prop)
- pci_add_flags(PCI_REASSIGN_ALL_BUS);
-#endif /* CONFIG_PPC32 */
- }
-}
diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index e736387fee6a..6336ec9aedd0 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -1,11 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Communication to userspace based on kernel/printk.c
*/
@@ -13,6 +9,7 @@
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/kernel.h>
+#include <linux/of.h>
#include <linux/poll.h>
#include <linux/proc_fs.h>
#include <linux/init.h>
@@ -21,11 +18,11 @@
#include <linux/cpu.h>
#include <linux/workqueue.h>
#include <linux/slab.h>
+#include <linux/topology.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/io.h>
#include <asm/rtas.h>
-#include <asm/prom.h>
#include <asm/nvram.h>
#include <linux/atomic.h>
#include <asm/machdep.h>
@@ -49,7 +46,7 @@ static unsigned int rtas_error_log_buffer_max;
static unsigned int event_scan;
static unsigned int rtas_event_scan_rate;
-static int full_rtas_msgs = 0;
+static bool full_rtas_msgs;
/* Stop logging to nvram after first fatal error */
static int logging_enabled; /* Until we initialize everything,
@@ -90,6 +87,10 @@ static char *rtas_event_type(int type)
return "Dump Notification Event";
case RTAS_TYPE_PRRN:
return "Platform Resource Reassignment Event";
+ case RTAS_TYPE_HOTPLUG:
+ return "Hotplug Event";
+ case RTAS_TYPE_HVPIPE:
+ return "Hypervisor Pipe Notification event";
}
return rtas_type[0];
@@ -149,8 +150,10 @@ static void printk_log_rtas(char *buf, int len)
} else {
struct rtas_error_log *errlog = (struct rtas_error_log *)buf;
- printk(RTAS_DEBUG "event: %d, Type: %s, Severity: %d\n",
- error_log_cnt, rtas_event_type(rtas_error_type(errlog)),
+ printk(RTAS_DEBUG "event: %d, Type: %s (%d), Severity: %d\n",
+ error_log_cnt,
+ rtas_event_type(rtas_error_type(errlog)),
+ rtas_error_type(errlog),
rtas_error_severity(errlog));
}
}
@@ -272,47 +275,15 @@ void pSeries_log_error(char *buf, unsigned int err_type, int fatal)
}
}
-#ifdef CONFIG_PPC_PSERIES
-static s32 prrn_update_scope;
-
-static void prrn_work_fn(struct work_struct *work)
-{
- /*
- * For PRRN, we must pass the negative of the scope value in
- * the RTAS event.
- */
- pseries_devicetree_update(-prrn_update_scope);
-}
-
-static DECLARE_WORK(prrn_work, prrn_work_fn);
-
-void prrn_schedule_update(u32 scope)
-{
- flush_work(&prrn_work);
- prrn_update_scope = scope;
- schedule_work(&prrn_work);
-}
-
static void handle_rtas_event(const struct rtas_error_log *log)
{
- if (rtas_error_type(log) != RTAS_TYPE_PRRN || !prrn_is_enabled())
+ if (!machine_is(pseries))
return;
- /* For PRRN Events the extended log length is used to denote
- * the scope for calling rtas update-nodes.
- */
- prrn_schedule_update(rtas_error_extended_log_length(log));
-}
-
-#else
-
-static void handle_rtas_event(const struct rtas_error_log *log)
-{
- return;
+ if (rtas_error_type(log) == RTAS_TYPE_PRRN)
+ pr_info_ratelimited("Platform resource reassignment ignored.\n");
}
-#endif
-
static int rtas_log_open(struct inode * inode, struct file * file)
{
return 0;
@@ -340,7 +311,7 @@ static ssize_t rtas_log_read(struct file * file, char __user * buf,
count = rtas_error_log_buffer_max;
- if (!access_ok(VERIFY_WRITE, buf, count))
+ if (!access_ok(buf, count))
return -EFAULT;
tmp = kmalloc(count, GFP_KERNEL);
@@ -386,20 +357,20 @@ out:
return error;
}
-static unsigned int rtas_log_poll(struct file *file, poll_table * wait)
+static __poll_t rtas_log_poll(struct file *file, poll_table * wait)
{
poll_wait(file, &rtas_log_wait, wait);
if (rtas_log_size)
- return POLLIN | POLLRDNORM;
+ return EPOLLIN | EPOLLRDNORM;
return 0;
}
-static const struct file_operations proc_rtas_log_operations = {
- .read = rtas_log_read,
- .poll = rtas_log_poll,
- .open = rtas_log_open,
- .release = rtas_log_release,
- .llseek = noop_llseek,
+static const struct proc_ops rtas_log_proc_ops = {
+ .proc_read = rtas_log_read,
+ .proc_poll = rtas_log_poll,
+ .proc_open = rtas_log_open,
+ .proc_release = rtas_log_release,
+ .proc_lseek = noop_llseek,
};
static int enable_surveillance(int timeout)
@@ -434,7 +405,10 @@ static void do_event_scan(void)
}
if (error == 0) {
- pSeries_log_error(logdata, ERR_TYPE_RTAS_LOG, 0);
+ if (rtas_error_type((struct rtas_error_log *)logdata) !=
+ RTAS_TYPE_PRRN)
+ pSeries_log_error(logdata, ERR_TYPE_RTAS_LOG,
+ 0);
handle_rtas_event((struct rtas_error_log *)logdata);
}
@@ -442,7 +416,7 @@ static void do_event_scan(void)
}
static void rtas_event_scan(struct work_struct *w);
-DECLARE_DELAYED_WORK(event_scan_work, rtas_event_scan);
+static DECLARE_DELAYED_WORK(event_scan_work, rtas_event_scan);
/*
* Delay should be at least one second since some machines have problems if
@@ -457,7 +431,7 @@ static void rtas_event_scan(struct work_struct *w)
do_event_scan();
- get_online_cpus();
+ cpus_read_lock();
/* raw_ OK because just using CPU as starting point. */
cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
@@ -479,11 +453,11 @@ static void rtas_event_scan(struct work_struct *w)
schedule_delayed_work_on(cpu, &event_scan_work,
__round_jiffies_relative(event_scan_delay, cpu));
- put_online_cpus();
+ cpus_read_unlock();
}
#ifdef CONFIG_PPC64
-static void retreive_nvram_error_log(void)
+static void __init retrieve_nvram_error_log(void)
{
unsigned int err_type ;
int rc ;
@@ -501,19 +475,19 @@ static void retreive_nvram_error_log(void)
}
}
#else /* CONFIG_PPC64 */
-static void retreive_nvram_error_log(void)
+static void __init retrieve_nvram_error_log(void)
{
}
#endif /* CONFIG_PPC64 */
-static void start_event_scan(void)
+static void __init start_event_scan(void)
{
printk(KERN_DEBUG "RTAS daemon started\n");
pr_debug("rtasd: will sleep for %d milliseconds\n",
(30000 / rtas_event_scan_rate));
/* Retrieve errors from nvram if any */
- retreive_nvram_error_log();
+ retrieve_nvram_error_log();
schedule_delayed_work_on(cpumask_first(cpu_online_mask),
&event_scan_work, event_scan_delay);
@@ -526,22 +500,22 @@ void rtas_cancel_event_scan(void)
}
EXPORT_SYMBOL_GPL(rtas_cancel_event_scan);
-static int __init rtas_init(void)
+static int __init rtas_event_scan_init(void)
{
- struct proc_dir_entry *entry;
+ int err;
if (!machine_is(pseries) && !machine_is(chrp))
return 0;
/* No RTAS */
- event_scan = rtas_token("event-scan");
+ event_scan = rtas_function_token(RTAS_FN_EVENT_SCAN);
if (event_scan == RTAS_UNKNOWN_SERVICE) {
printk(KERN_INFO "rtasd: No event-scan on system\n");
return -ENODEV;
}
- rtas_event_scan_rate = rtas_token("rtas-event-scan-rate");
- if (rtas_event_scan_rate == RTAS_UNKNOWN_SERVICE) {
+ err = of_property_read_u32(rtas.dev, "rtas-event-scan-rate", &rtas_event_scan_rate);
+ if (err) {
printk(KERN_ERR "rtasd: no rtas-event-scan-rate on system\n");
return -ENODEV;
}
@@ -556,19 +530,34 @@ static int __init rtas_init(void)
rtas_error_log_max = rtas_get_error_log_max();
rtas_error_log_buffer_max = rtas_error_log_max + sizeof(int);
- rtas_log_buf = vmalloc(rtas_error_log_buffer_max*LOG_NUMBER);
+ rtas_log_buf = vmalloc(array_size(LOG_NUMBER,
+ rtas_error_log_buffer_max));
if (!rtas_log_buf) {
printk(KERN_ERR "rtasd: no memory\n");
return -ENOMEM;
}
- entry = proc_create("powerpc/rtas/error_log", S_IRUSR, NULL,
- &proc_rtas_log_operations);
+ start_event_scan();
+
+ return 0;
+}
+arch_initcall(rtas_event_scan_init);
+
+static int __init rtas_init(void)
+{
+ struct proc_dir_entry *entry;
+
+ if (!machine_is(pseries) && !machine_is(chrp))
+ return 0;
+
+ if (!rtas_log_buf)
+ return -ENODEV;
+
+ entry = proc_create("powerpc/rtas/error_log", 0400, NULL,
+ &rtas_log_proc_ops);
if (!entry)
printk(KERN_ERR "Failed to create error_log proc entry\n");
- start_event_scan();
-
return 0;
}
__initcall(rtas_init);
@@ -592,11 +581,6 @@ __setup("surveillance=", surveillance_setup);
static int __init rtasmsgs_setup(char *str)
{
- if (strcmp(str, "on") == 0)
- full_rtas_msgs = 1;
- else if (strcmp(str, "off") == 0)
- full_rtas_msgs = 0;
-
- return 1;
+ return (kstrtobool(str, &full_rtas_msgs) == 0);
}
__setup("rtasmsgs=", rtasmsgs_setup);
diff --git a/arch/powerpc/kernel/secure_boot.c b/arch/powerpc/kernel/secure_boot.c
new file mode 100644
index 000000000000..3a28795b4ed8
--- /dev/null
+++ b/arch/powerpc/kernel/secure_boot.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 IBM Corporation
+ * Author: Nayna Jain
+ */
+#include <linux/types.h>
+#include <linux/of.h>
+#include <linux/string_choices.h>
+#include <asm/secure_boot.h>
+
+static struct device_node *get_ppc_fw_sb_node(void)
+{
+ static const struct of_device_id ids[] = {
+ { .compatible = "ibm,secureboot", },
+ { .compatible = "ibm,secureboot-v1", },
+ { .compatible = "ibm,secureboot-v2", },
+ {},
+ };
+
+ return of_find_matching_node(NULL, ids);
+}
+
+bool is_ppc_secureboot_enabled(void)
+{
+ struct device_node *node;
+ bool enabled = false;
+ u32 secureboot;
+
+ node = get_ppc_fw_sb_node();
+ enabled = of_property_read_bool(node, "os-secureboot-enforcing");
+ of_node_put(node);
+
+ if (enabled)
+ goto out;
+
+ node = of_find_node_by_path("/");
+ if (!of_property_read_u32(node, "ibm,secure-boot", &secureboot))
+ enabled = (secureboot > 1);
+ of_node_put(node);
+
+out:
+ pr_info("Secure boot mode %s\n", str_enabled_disabled(enabled));
+
+ return enabled;
+}
+
+bool is_ppc_trustedboot_enabled(void)
+{
+ struct device_node *node;
+ bool enabled = false;
+ u32 trustedboot;
+
+ node = get_ppc_fw_sb_node();
+ enabled = of_property_read_bool(node, "trusted-enabled");
+ of_node_put(node);
+
+ if (enabled)
+ goto out;
+
+ node = of_find_node_by_path("/");
+ if (!of_property_read_u32(node, "ibm,trusted-boot", &trustedboot))
+ enabled = (trustedboot > 0);
+ of_node_put(node);
+
+out:
+ pr_info("Trusted boot mode %s\n", str_enabled_disabled(enabled));
+
+ return enabled;
+}
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
new file mode 100644
index 000000000000..fbb7ebd8aa08
--- /dev/null
+++ b/arch/powerpc/kernel/security.c
@@ -0,0 +1,866 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Security related flags and so on.
+//
+// Copyright 2018, Michael Ellerman, IBM Corporation.
+
+#include <linux/cpu.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/memblock.h>
+#include <linux/nospec.h>
+#include <linux/prctl.h>
+#include <linux/seq_buf.h>
+#include <linux/debugfs.h>
+
+#include <asm/asm-prototypes.h>
+#include <asm/text-patching.h>
+#include <asm/security_features.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+#include <asm/inst.h>
+
+#include "setup.h"
+
+u64 powerpc_security_features __read_mostly = SEC_FTR_DEFAULT;
+
+enum branch_cache_flush_type {
+ BRANCH_CACHE_FLUSH_NONE = 0x1,
+ BRANCH_CACHE_FLUSH_SW = 0x2,
+ BRANCH_CACHE_FLUSH_HW = 0x4,
+};
+static enum branch_cache_flush_type count_cache_flush_type = BRANCH_CACHE_FLUSH_NONE;
+static enum branch_cache_flush_type link_stack_flush_type = BRANCH_CACHE_FLUSH_NONE;
+
+bool barrier_nospec_enabled;
+static bool no_nospec;
+static bool btb_flush_enabled;
+#if defined(CONFIG_PPC_E500) || defined(CONFIG_PPC_BOOK3S_64)
+static bool no_spectrev2;
+#endif
+
+static void enable_barrier_nospec(bool enable)
+{
+ barrier_nospec_enabled = enable;
+ do_barrier_nospec_fixups(enable);
+}
+
+void __init setup_barrier_nospec(void)
+{
+ bool enable;
+
+ /*
+ * It would make sense to check SEC_FTR_SPEC_BAR_ORI31 below as well.
+ * But there's a good reason not to. The two flags we check below are
+ * both are enabled by default in the kernel, so if the hcall is not
+ * functional they will be enabled.
+ * On a system where the host firmware has been updated (so the ori
+ * functions as a barrier), but on which the hypervisor (KVM/Qemu) has
+ * not been updated, we would like to enable the barrier. Dropping the
+ * check for SEC_FTR_SPEC_BAR_ORI31 achieves that. The only downside is
+ * we potentially enable the barrier on systems where the host firmware
+ * is not updated, but that's harmless as it's a no-op.
+ */
+ enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
+ security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR);
+
+ if (!no_nospec && !cpu_mitigations_off())
+ enable_barrier_nospec(enable);
+}
+
+static int __init handle_nospectre_v1(char *p)
+{
+ no_nospec = true;
+
+ return 0;
+}
+early_param("nospectre_v1", handle_nospectre_v1);
+
+#ifdef CONFIG_DEBUG_FS
+static int barrier_nospec_set(void *data, u64 val)
+{
+ switch (val) {
+ case 0:
+ case 1:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (!!val == !!barrier_nospec_enabled)
+ return 0;
+
+ enable_barrier_nospec(!!val);
+
+ return 0;
+}
+
+static int barrier_nospec_get(void *data, u64 *val)
+{
+ *val = barrier_nospec_enabled ? 1 : 0;
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(fops_barrier_nospec, barrier_nospec_get,
+ barrier_nospec_set, "%llu\n");
+
+static __init int barrier_nospec_debugfs_init(void)
+{
+ debugfs_create_file_unsafe("barrier_nospec", 0600,
+ arch_debugfs_dir, NULL,
+ &fops_barrier_nospec);
+ return 0;
+}
+device_initcall(barrier_nospec_debugfs_init);
+
+static __init int security_feature_debugfs_init(void)
+{
+ debugfs_create_x64("security_features", 0400, arch_debugfs_dir,
+ &powerpc_security_features);
+ return 0;
+}
+device_initcall(security_feature_debugfs_init);
+#endif /* CONFIG_DEBUG_FS */
+
+#if defined(CONFIG_PPC_E500) || defined(CONFIG_PPC_BOOK3S_64)
+static int __init handle_nospectre_v2(char *p)
+{
+ no_spectrev2 = true;
+
+ return 0;
+}
+early_param("nospectre_v2", handle_nospectre_v2);
+#endif /* CONFIG_PPC_E500 || CONFIG_PPC_BOOK3S_64 */
+
+#ifdef CONFIG_PPC_E500
+void __init setup_spectre_v2(void)
+{
+ if (no_spectrev2 || cpu_mitigations_off())
+ do_btb_flush_fixups();
+ else
+ btb_flush_enabled = true;
+}
+#endif /* CONFIG_PPC_E500 */
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ bool thread_priv;
+
+ thread_priv = security_ftr_enabled(SEC_FTR_L1D_THREAD_PRIV);
+
+ if (rfi_flush) {
+ struct seq_buf s;
+ seq_buf_init(&s, buf, PAGE_SIZE - 1);
+
+ seq_buf_printf(&s, "Mitigation: RFI Flush");
+ if (thread_priv)
+ seq_buf_printf(&s, ", L1D private per thread");
+
+ seq_buf_printf(&s, "\n");
+
+ return s.len;
+ }
+
+ if (thread_priv)
+ return sprintf(buf, "Vulnerable: L1D private per thread\n");
+
+ if (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) &&
+ !security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR))
+ return sprintf(buf, "Not affected\n");
+
+ return sprintf(buf, "Vulnerable\n");
+}
+
+ssize_t cpu_show_l1tf(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpu_show_meltdown(dev, attr, buf);
+}
+#endif
+
+ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct seq_buf s;
+
+ seq_buf_init(&s, buf, PAGE_SIZE - 1);
+
+ if (security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR)) {
+ if (barrier_nospec_enabled)
+ seq_buf_printf(&s, "Mitigation: __user pointer sanitization");
+ else
+ seq_buf_printf(&s, "Vulnerable");
+
+ if (security_ftr_enabled(SEC_FTR_SPEC_BAR_ORI31))
+ seq_buf_printf(&s, ", ori31 speculation barrier enabled");
+
+ seq_buf_printf(&s, "\n");
+ } else
+ seq_buf_printf(&s, "Not affected\n");
+
+ return s.len;
+}
+
+ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct seq_buf s;
+ bool bcs, ccd;
+
+ seq_buf_init(&s, buf, PAGE_SIZE - 1);
+
+ bcs = security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED);
+ ccd = security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED);
+
+ if (bcs || ccd) {
+ seq_buf_printf(&s, "Mitigation: ");
+
+ if (bcs)
+ seq_buf_printf(&s, "Indirect branch serialisation (kernel only)");
+
+ if (bcs && ccd)
+ seq_buf_printf(&s, ", ");
+
+ if (ccd)
+ seq_buf_printf(&s, "Indirect branch cache disabled");
+
+ } else if (count_cache_flush_type != BRANCH_CACHE_FLUSH_NONE) {
+ seq_buf_printf(&s, "Mitigation: Software count cache flush");
+
+ if (count_cache_flush_type == BRANCH_CACHE_FLUSH_HW)
+ seq_buf_printf(&s, " (hardware accelerated)");
+
+ } else if (btb_flush_enabled) {
+ seq_buf_printf(&s, "Mitigation: Branch predictor state flush");
+ } else {
+ seq_buf_printf(&s, "Vulnerable");
+ }
+
+ if (bcs || ccd || count_cache_flush_type != BRANCH_CACHE_FLUSH_NONE) {
+ if (link_stack_flush_type != BRANCH_CACHE_FLUSH_NONE)
+ seq_buf_printf(&s, ", Software link stack flush");
+ if (link_stack_flush_type == BRANCH_CACHE_FLUSH_HW)
+ seq_buf_printf(&s, " (hardware accelerated)");
+ }
+
+ seq_buf_printf(&s, "\n");
+
+ return s.len;
+}
+
+#ifdef CONFIG_PPC_BOOK3S_64
+/*
+ * Store-forwarding barrier support.
+ */
+
+static enum stf_barrier_type stf_enabled_flush_types;
+static bool no_stf_barrier;
+static bool stf_barrier;
+
+static int __init handle_no_stf_barrier(char *p)
+{
+ pr_info("stf-barrier: disabled on command line.");
+ no_stf_barrier = true;
+ return 0;
+}
+
+early_param("no_stf_barrier", handle_no_stf_barrier);
+
+enum stf_barrier_type stf_barrier_type_get(void)
+{
+ return stf_enabled_flush_types;
+}
+
+/* This is the generic flag used by other architectures */
+static int __init handle_ssbd(char *p)
+{
+ if (!p || strncmp(p, "auto", 5) == 0 || strncmp(p, "on", 2) == 0 ) {
+ /* Until firmware tells us, we have the barrier with auto */
+ return 0;
+ } else if (strncmp(p, "off", 3) == 0) {
+ handle_no_stf_barrier(NULL);
+ return 0;
+ } else
+ return 1;
+
+ return 0;
+}
+early_param("spec_store_bypass_disable", handle_ssbd);
+
+/* This is the generic flag used by other architectures */
+static int __init handle_no_ssbd(char *p)
+{
+ handle_no_stf_barrier(NULL);
+ return 0;
+}
+early_param("nospec_store_bypass_disable", handle_no_ssbd);
+
+static void stf_barrier_enable(bool enable)
+{
+ if (enable)
+ do_stf_barrier_fixups(stf_enabled_flush_types);
+ else
+ do_stf_barrier_fixups(STF_BARRIER_NONE);
+
+ stf_barrier = enable;
+}
+
+void setup_stf_barrier(void)
+{
+ enum stf_barrier_type type;
+ bool enable;
+
+ /* Default to fallback in case fw-features are not available */
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ type = STF_BARRIER_EIEIO;
+ else if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ type = STF_BARRIER_SYNC_ORI;
+ else if (cpu_has_feature(CPU_FTR_ARCH_206))
+ type = STF_BARRIER_FALLBACK;
+ else
+ type = STF_BARRIER_NONE;
+
+ enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
+ security_ftr_enabled(SEC_FTR_STF_BARRIER);
+
+ if (type == STF_BARRIER_FALLBACK) {
+ pr_info("stf-barrier: fallback barrier available\n");
+ } else if (type == STF_BARRIER_SYNC_ORI) {
+ pr_info("stf-barrier: hwsync barrier available\n");
+ } else if (type == STF_BARRIER_EIEIO) {
+ pr_info("stf-barrier: eieio barrier available\n");
+ }
+
+ stf_enabled_flush_types = type;
+
+ if (!no_stf_barrier && !cpu_mitigations_off())
+ stf_barrier_enable(enable);
+}
+
+ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ if (stf_barrier && stf_enabled_flush_types != STF_BARRIER_NONE) {
+ const char *type;
+ switch (stf_enabled_flush_types) {
+ case STF_BARRIER_EIEIO:
+ type = "eieio";
+ break;
+ case STF_BARRIER_SYNC_ORI:
+ type = "hwsync";
+ break;
+ case STF_BARRIER_FALLBACK:
+ type = "fallback";
+ break;
+ default:
+ type = "unknown";
+ }
+ return sprintf(buf, "Mitigation: Kernel entry/exit barrier (%s)\n", type);
+ }
+
+ if (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) &&
+ !security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR))
+ return sprintf(buf, "Not affected\n");
+
+ return sprintf(buf, "Vulnerable\n");
+}
+
+static int ssb_prctl_get(struct task_struct *task)
+{
+ /*
+ * The STF_BARRIER feature is on by default, so if it's off that means
+ * firmware has explicitly said the CPU is not vulnerable via either
+ * the hypercall or device tree.
+ */
+ if (!security_ftr_enabled(SEC_FTR_STF_BARRIER))
+ return PR_SPEC_NOT_AFFECTED;
+
+ /*
+ * If the system's CPU has no known barrier (see setup_stf_barrier())
+ * then assume that the CPU is not vulnerable.
+ */
+ if (stf_enabled_flush_types == STF_BARRIER_NONE)
+ return PR_SPEC_NOT_AFFECTED;
+
+ /*
+ * Otherwise the CPU is vulnerable. The barrier is not a global or
+ * per-process mitigation, so the only value that can be reported here
+ * is PR_SPEC_ENABLE, which appears as "vulnerable" in /proc.
+ */
+ return PR_SPEC_ENABLE;
+}
+
+int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
+{
+ switch (which) {
+ case PR_SPEC_STORE_BYPASS:
+ return ssb_prctl_get(task);
+ default:
+ return -ENODEV;
+ }
+}
+
+#ifdef CONFIG_DEBUG_FS
+static int stf_barrier_set(void *data, u64 val)
+{
+ bool enable;
+
+ if (val == 1)
+ enable = true;
+ else if (val == 0)
+ enable = false;
+ else
+ return -EINVAL;
+
+ /* Only do anything if we're changing state */
+ if (enable != stf_barrier)
+ stf_barrier_enable(enable);
+
+ return 0;
+}
+
+static int stf_barrier_get(void *data, u64 *val)
+{
+ *val = stf_barrier ? 1 : 0;
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(fops_stf_barrier, stf_barrier_get, stf_barrier_set,
+ "%llu\n");
+
+static __init int stf_barrier_debugfs_init(void)
+{
+ debugfs_create_file_unsafe("stf_barrier", 0600, arch_debugfs_dir,
+ NULL, &fops_stf_barrier);
+ return 0;
+}
+device_initcall(stf_barrier_debugfs_init);
+#endif /* CONFIG_DEBUG_FS */
+
+static void update_branch_cache_flush(void)
+{
+ u32 *site, __maybe_unused *site2;
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ site = &patch__call_kvm_flush_link_stack;
+ site2 = &patch__call_kvm_flush_link_stack_p9;
+ // This controls the branch from guest_exit_cont to kvm_flush_link_stack
+ if (link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE) {
+ patch_instruction_site(site, ppc_inst(PPC_RAW_NOP()));
+ patch_instruction_site(site2, ppc_inst(PPC_RAW_NOP()));
+ } else {
+ // Could use HW flush, but that could also flush count cache
+ patch_branch_site(site, (u64)&kvm_flush_link_stack, BRANCH_SET_LINK);
+ patch_branch_site(site2, (u64)&kvm_flush_link_stack, BRANCH_SET_LINK);
+ }
+#endif
+
+ // Patch out the bcctr first, then nop the rest
+ site = &patch__call_flush_branch_caches3;
+ patch_instruction_site(site, ppc_inst(PPC_RAW_NOP()));
+ site = &patch__call_flush_branch_caches2;
+ patch_instruction_site(site, ppc_inst(PPC_RAW_NOP()));
+ site = &patch__call_flush_branch_caches1;
+ patch_instruction_site(site, ppc_inst(PPC_RAW_NOP()));
+
+ // This controls the branch from _switch to flush_branch_caches
+ if (count_cache_flush_type == BRANCH_CACHE_FLUSH_NONE &&
+ link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE) {
+ // Nothing to be done
+
+ } else if (count_cache_flush_type == BRANCH_CACHE_FLUSH_HW &&
+ link_stack_flush_type == BRANCH_CACHE_FLUSH_HW) {
+ // Patch in the bcctr last
+ site = &patch__call_flush_branch_caches1;
+ patch_instruction_site(site, ppc_inst(0x39207fff)); // li r9,0x7fff
+ site = &patch__call_flush_branch_caches2;
+ patch_instruction_site(site, ppc_inst(0x7d2903a6)); // mtctr r9
+ site = &patch__call_flush_branch_caches3;
+ patch_instruction_site(site, ppc_inst(PPC_INST_BCCTR_FLUSH));
+
+ } else {
+ patch_branch_site(site, (u64)&flush_branch_caches, BRANCH_SET_LINK);
+
+ // If we just need to flush the link stack, early return
+ if (count_cache_flush_type == BRANCH_CACHE_FLUSH_NONE) {
+ patch_instruction_site(&patch__flush_link_stack_return,
+ ppc_inst(PPC_RAW_BLR()));
+
+ // If we have flush instruction, early return
+ } else if (count_cache_flush_type == BRANCH_CACHE_FLUSH_HW) {
+ patch_instruction_site(&patch__flush_count_cache_return,
+ ppc_inst(PPC_RAW_BLR()));
+ }
+ }
+}
+
+static void toggle_branch_cache_flush(bool enable)
+{
+ if (!enable || !security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) {
+ if (count_cache_flush_type != BRANCH_CACHE_FLUSH_NONE)
+ count_cache_flush_type = BRANCH_CACHE_FLUSH_NONE;
+
+ pr_info("count-cache-flush: flush disabled.\n");
+ } else {
+ if (security_ftr_enabled(SEC_FTR_BCCTR_FLUSH_ASSIST)) {
+ count_cache_flush_type = BRANCH_CACHE_FLUSH_HW;
+ pr_info("count-cache-flush: hardware flush enabled.\n");
+ } else {
+ count_cache_flush_type = BRANCH_CACHE_FLUSH_SW;
+ pr_info("count-cache-flush: software flush enabled.\n");
+ }
+ }
+
+ if (!enable || !security_ftr_enabled(SEC_FTR_FLUSH_LINK_STACK)) {
+ if (link_stack_flush_type != BRANCH_CACHE_FLUSH_NONE)
+ link_stack_flush_type = BRANCH_CACHE_FLUSH_NONE;
+
+ pr_info("link-stack-flush: flush disabled.\n");
+ } else {
+ if (security_ftr_enabled(SEC_FTR_BCCTR_LINK_FLUSH_ASSIST)) {
+ link_stack_flush_type = BRANCH_CACHE_FLUSH_HW;
+ pr_info("link-stack-flush: hardware flush enabled.\n");
+ } else {
+ link_stack_flush_type = BRANCH_CACHE_FLUSH_SW;
+ pr_info("link-stack-flush: software flush enabled.\n");
+ }
+ }
+
+ update_branch_cache_flush();
+}
+
+void setup_count_cache_flush(void)
+{
+ bool enable = true;
+
+ if (no_spectrev2 || cpu_mitigations_off()) {
+ if (security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED) ||
+ security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED))
+ pr_warn("Spectre v2 mitigations not fully under software control, can't disable\n");
+
+ enable = false;
+ }
+
+ /*
+ * There's no firmware feature flag/hypervisor bit to tell us we need to
+ * flush the link stack on context switch. So we set it here if we see
+ * either of the Spectre v2 mitigations that aim to protect userspace.
+ */
+ if (security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED) ||
+ security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE))
+ security_ftr_set(SEC_FTR_FLUSH_LINK_STACK);
+
+ toggle_branch_cache_flush(enable);
+}
+
+static enum l1d_flush_type enabled_flush_types;
+static void *l1d_flush_fallback_area;
+static bool no_rfi_flush;
+static bool no_entry_flush;
+static bool no_uaccess_flush;
+bool rfi_flush;
+static bool entry_flush;
+static bool uaccess_flush;
+DEFINE_STATIC_KEY_FALSE(uaccess_flush_key);
+EXPORT_SYMBOL(uaccess_flush_key);
+
+static int __init handle_no_rfi_flush(char *p)
+{
+ pr_info("rfi-flush: disabled on command line.");
+ no_rfi_flush = true;
+ return 0;
+}
+early_param("no_rfi_flush", handle_no_rfi_flush);
+
+static int __init handle_no_entry_flush(char *p)
+{
+ pr_info("entry-flush: disabled on command line.");
+ no_entry_flush = true;
+ return 0;
+}
+early_param("no_entry_flush", handle_no_entry_flush);
+
+static int __init handle_no_uaccess_flush(char *p)
+{
+ pr_info("uaccess-flush: disabled on command line.");
+ no_uaccess_flush = true;
+ return 0;
+}
+early_param("no_uaccess_flush", handle_no_uaccess_flush);
+
+/*
+ * The RFI flush is not KPTI, but because users will see doco that says to use
+ * nopti we hijack that option here to also disable the RFI flush.
+ */
+static int __init handle_no_pti(char *p)
+{
+ pr_info("rfi-flush: disabling due to 'nopti' on command line.\n");
+ handle_no_rfi_flush(NULL);
+ return 0;
+}
+early_param("nopti", handle_no_pti);
+
+static void do_nothing(void *unused)
+{
+ /*
+ * We don't need to do the flush explicitly, just enter+exit kernel is
+ * sufficient, the RFI exit handlers will do the right thing.
+ */
+}
+
+void rfi_flush_enable(bool enable)
+{
+ if (enable) {
+ do_rfi_flush_fixups(enabled_flush_types);
+ on_each_cpu(do_nothing, NULL, 1);
+ } else
+ do_rfi_flush_fixups(L1D_FLUSH_NONE);
+
+ rfi_flush = enable;
+}
+
+static void entry_flush_enable(bool enable)
+{
+ if (enable) {
+ do_entry_flush_fixups(enabled_flush_types);
+ on_each_cpu(do_nothing, NULL, 1);
+ } else {
+ do_entry_flush_fixups(L1D_FLUSH_NONE);
+ }
+
+ entry_flush = enable;
+}
+
+static void uaccess_flush_enable(bool enable)
+{
+ if (enable) {
+ do_uaccess_flush_fixups(enabled_flush_types);
+ static_branch_enable(&uaccess_flush_key);
+ on_each_cpu(do_nothing, NULL, 1);
+ } else {
+ static_branch_disable(&uaccess_flush_key);
+ do_uaccess_flush_fixups(L1D_FLUSH_NONE);
+ }
+
+ uaccess_flush = enable;
+}
+
+static void __ref init_fallback_flush(void)
+{
+ u64 l1d_size, limit;
+ int cpu;
+
+ /* Only allocate the fallback flush area once (at boot time). */
+ if (l1d_flush_fallback_area)
+ return;
+
+ l1d_size = ppc64_caches.l1d.size;
+
+ /*
+ * If there is no d-cache-size property in the device tree, l1d_size
+ * could be zero. That leads to the loop in the asm wrapping around to
+ * 2^64-1, and then walking off the end of the fallback area and
+ * eventually causing a page fault which is fatal. Just default to
+ * something vaguely sane.
+ */
+ if (!l1d_size)
+ l1d_size = (64 * 1024);
+
+ limit = min(ppc64_bolted_size(), ppc64_rma_size);
+
+ /*
+ * Align to L1d size, and size it at 2x L1d size, to catch possible
+ * hardware prefetch runoff. We don't have a recipe for load patterns to
+ * reliably avoid the prefetcher.
+ */
+ l1d_flush_fallback_area = memblock_alloc_try_nid(l1d_size * 2,
+ l1d_size, MEMBLOCK_LOW_LIMIT,
+ limit, NUMA_NO_NODE);
+ if (!l1d_flush_fallback_area)
+ panic("%s: Failed to allocate %llu bytes align=0x%llx max_addr=%pa\n",
+ __func__, l1d_size * 2, l1d_size, &limit);
+
+
+ for_each_possible_cpu(cpu) {
+ struct paca_struct *paca = paca_ptrs[cpu];
+ paca->rfi_flush_fallback_area = l1d_flush_fallback_area;
+ paca->l1d_flush_size = l1d_size;
+ }
+}
+
+void setup_rfi_flush(enum l1d_flush_type types, bool enable)
+{
+ if (types & L1D_FLUSH_FALLBACK) {
+ pr_info("rfi-flush: fallback displacement flush available\n");
+ init_fallback_flush();
+ }
+
+ if (types & L1D_FLUSH_ORI)
+ pr_info("rfi-flush: ori type flush available\n");
+
+ if (types & L1D_FLUSH_MTTRIG)
+ pr_info("rfi-flush: mttrig type flush available\n");
+
+ enabled_flush_types = types;
+
+ if (!cpu_mitigations_off() && !no_rfi_flush)
+ rfi_flush_enable(enable);
+}
+
+void setup_entry_flush(bool enable)
+{
+ if (cpu_mitigations_off())
+ return;
+
+ if (!no_entry_flush)
+ entry_flush_enable(enable);
+}
+
+void setup_uaccess_flush(bool enable)
+{
+ if (cpu_mitigations_off())
+ return;
+
+ if (!no_uaccess_flush)
+ uaccess_flush_enable(enable);
+}
+
+#ifdef CONFIG_DEBUG_FS
+static int count_cache_flush_set(void *data, u64 val)
+{
+ bool enable;
+
+ if (val == 1)
+ enable = true;
+ else if (val == 0)
+ enable = false;
+ else
+ return -EINVAL;
+
+ toggle_branch_cache_flush(enable);
+
+ return 0;
+}
+
+static int count_cache_flush_get(void *data, u64 *val)
+{
+ if (count_cache_flush_type == BRANCH_CACHE_FLUSH_NONE)
+ *val = 0;
+ else
+ *val = 1;
+
+ return 0;
+}
+
+static int link_stack_flush_get(void *data, u64 *val)
+{
+ if (link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE)
+ *val = 0;
+ else
+ *val = 1;
+
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(fops_count_cache_flush, count_cache_flush_get,
+ count_cache_flush_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_link_stack_flush, link_stack_flush_get,
+ count_cache_flush_set, "%llu\n");
+
+static __init int count_cache_flush_debugfs_init(void)
+{
+ debugfs_create_file_unsafe("count_cache_flush", 0600,
+ arch_debugfs_dir, NULL,
+ &fops_count_cache_flush);
+ debugfs_create_file_unsafe("link_stack_flush", 0600,
+ arch_debugfs_dir, NULL,
+ &fops_link_stack_flush);
+ return 0;
+}
+device_initcall(count_cache_flush_debugfs_init);
+
+static int rfi_flush_set(void *data, u64 val)
+{
+ bool enable;
+
+ if (val == 1)
+ enable = true;
+ else if (val == 0)
+ enable = false;
+ else
+ return -EINVAL;
+
+ /* Only do anything if we're changing state */
+ if (enable != rfi_flush)
+ rfi_flush_enable(enable);
+
+ return 0;
+}
+
+static int rfi_flush_get(void *data, u64 *val)
+{
+ *val = rfi_flush ? 1 : 0;
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n");
+
+static int entry_flush_set(void *data, u64 val)
+{
+ bool enable;
+
+ if (val == 1)
+ enable = true;
+ else if (val == 0)
+ enable = false;
+ else
+ return -EINVAL;
+
+ /* Only do anything if we're changing state */
+ if (enable != entry_flush)
+ entry_flush_enable(enable);
+
+ return 0;
+}
+
+static int entry_flush_get(void *data, u64 *val)
+{
+ *val = entry_flush ? 1 : 0;
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_entry_flush, entry_flush_get, entry_flush_set, "%llu\n");
+
+static int uaccess_flush_set(void *data, u64 val)
+{
+ bool enable;
+
+ if (val == 1)
+ enable = true;
+ else if (val == 0)
+ enable = false;
+ else
+ return -EINVAL;
+
+ /* Only do anything if we're changing state */
+ if (enable != uaccess_flush)
+ uaccess_flush_enable(enable);
+
+ return 0;
+}
+
+static int uaccess_flush_get(void *data, u64 *val)
+{
+ *val = uaccess_flush ? 1 : 0;
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_uaccess_flush, uaccess_flush_get, uaccess_flush_set, "%llu\n");
+
+static __init int rfi_flush_debugfs_init(void)
+{
+ debugfs_create_file("rfi_flush", 0600, arch_debugfs_dir, NULL, &fops_rfi_flush);
+ debugfs_create_file("entry_flush", 0600, arch_debugfs_dir, NULL, &fops_entry_flush);
+ debugfs_create_file("uaccess_flush", 0600, arch_debugfs_dir, NULL, &fops_uaccess_flush);
+ return 0;
+}
+device_initcall(rfi_flush_debugfs_init);
+#endif /* CONFIG_DEBUG_FS */
+#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/kernel/secvar-ops.c b/arch/powerpc/kernel/secvar-ops.c
new file mode 100644
index 000000000000..19172a2804f0
--- /dev/null
+++ b/arch/powerpc/kernel/secvar-ops.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 IBM Corporation
+ * Author: Nayna Jain
+ *
+ * This file initializes secvar operations for PowerPC Secureboot
+ */
+
+#include <linux/cache.h>
+#include <asm/secvar.h>
+#include <asm/bug.h>
+
+const struct secvar_operations *secvar_ops __ro_after_init = NULL;
+
+int set_secvar_ops(const struct secvar_operations *ops)
+{
+ if (WARN_ON_ONCE(secvar_ops))
+ return -EBUSY;
+
+ secvar_ops = ops;
+
+ return 0;
+}
diff --git a/arch/powerpc/kernel/secvar-sysfs.c b/arch/powerpc/kernel/secvar-sysfs.c
new file mode 100644
index 000000000000..ec900bce0257
--- /dev/null
+++ b/arch/powerpc/kernel/secvar-sysfs.c
@@ -0,0 +1,293 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2019 IBM Corporation <nayna@linux.ibm.com>
+ *
+ * This code exposes secure variables to user via sysfs
+ */
+
+#define pr_fmt(fmt) "secvar-sysfs: "fmt
+
+#include <linux/slab.h>
+#include <linux/compat.h>
+#include <linux/string.h>
+#include <linux/of.h>
+#include <asm/secvar.h>
+
+#define NAME_MAX_SIZE 1024
+
+static struct kobject *secvar_kobj;
+static struct kset *secvar_kset;
+
+static ssize_t format_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ char tmp[32];
+ ssize_t len = secvar_ops->format(tmp, sizeof(tmp));
+
+ if (len > 0)
+ return sysfs_emit(buf, "%s\n", tmp);
+ else if (len < 0)
+ pr_err("Error %zd reading format string\n", len);
+ else
+ pr_err("Got empty format string from backend\n");
+
+ return -EIO;
+}
+
+
+static ssize_t size_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ u64 dsize;
+ int rc;
+
+ rc = secvar_ops->get(kobj->name, strlen(kobj->name) + 1, NULL, &dsize);
+ if (rc) {
+ if (rc != -ENOENT)
+ pr_err("Error retrieving %s variable size %d\n", kobj->name, rc);
+ return rc;
+ }
+
+ return sysfs_emit(buf, "%llu\n", dsize);
+}
+
+static ssize_t data_read(struct file *filep, struct kobject *kobj,
+ const struct bin_attribute *attr, char *buf, loff_t off,
+ size_t count)
+{
+ char *data;
+ u64 dsize;
+ int rc;
+
+ rc = secvar_ops->get(kobj->name, strlen(kobj->name) + 1, NULL, &dsize);
+ if (rc) {
+ if (rc != -ENOENT)
+ pr_err("Error getting %s variable size %d\n", kobj->name, rc);
+ return rc;
+ }
+ pr_debug("dsize is %llu\n", dsize);
+
+ data = kzalloc(dsize, GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ rc = secvar_ops->get(kobj->name, strlen(kobj->name) + 1, data, &dsize);
+ if (rc) {
+ pr_err("Error getting %s variable %d\n", kobj->name, rc);
+ goto data_fail;
+ }
+
+ rc = memory_read_from_buffer(buf, count, &off, data, dsize);
+
+data_fail:
+ kfree(data);
+ return rc;
+}
+
+static ssize_t update_write(struct file *filep, struct kobject *kobj,
+ const struct bin_attribute *attr, char *buf, loff_t off,
+ size_t count)
+{
+ int rc;
+
+ pr_debug("count is %ld\n", count);
+ rc = secvar_ops->set(kobj->name, strlen(kobj->name) + 1, buf, count);
+ if (rc) {
+ pr_err("Error setting the %s variable %d\n", kobj->name, rc);
+ return rc;
+ }
+
+ return count;
+}
+
+static struct kobj_attribute format_attr = __ATTR_RO(format);
+
+static struct kobj_attribute size_attr = __ATTR_RO(size);
+
+static struct bin_attribute data_attr __ro_after_init = __BIN_ATTR_RO(data, 0);
+
+static struct bin_attribute update_attr __ro_after_init = __BIN_ATTR_WO(update, 0);
+
+static const struct bin_attribute *const secvar_bin_attrs[] = {
+ &data_attr,
+ &update_attr,
+ NULL,
+};
+
+static struct attribute *secvar_attrs[] = {
+ &size_attr.attr,
+ NULL,
+};
+
+static const struct attribute_group secvar_attr_group = {
+ .attrs = secvar_attrs,
+ .bin_attrs = secvar_bin_attrs,
+};
+__ATTRIBUTE_GROUPS(secvar_attr);
+
+static const struct kobj_type secvar_ktype = {
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = secvar_attr_groups,
+};
+
+static __init int update_kobj_size(void)
+{
+
+ u64 varsize;
+ int rc = secvar_ops->max_size(&varsize);
+
+ if (rc)
+ return rc;
+
+ data_attr.size = varsize;
+ update_attr.size = varsize;
+
+ return 0;
+}
+
+static __init int secvar_sysfs_config(struct kobject *kobj)
+{
+ struct attribute_group config_group = {
+ .name = "config",
+ .attrs = (struct attribute **)secvar_ops->config_attrs,
+ };
+
+ if (secvar_ops->config_attrs)
+ return sysfs_create_group(kobj, &config_group);
+
+ return 0;
+}
+
+static __init int add_var(const char *name)
+{
+ struct kobject *kobj;
+ int rc;
+
+ kobj = kzalloc(sizeof(*kobj), GFP_KERNEL);
+ if (!kobj)
+ return -ENOMEM;
+
+ kobject_init(kobj, &secvar_ktype);
+
+ rc = kobject_add(kobj, &secvar_kset->kobj, "%s", name);
+ if (rc) {
+ pr_warn("kobject_add error %d for attribute: %s\n", rc,
+ name);
+ kobject_put(kobj);
+ return rc;
+ }
+
+ kobject_uevent(kobj, KOBJ_ADD);
+ return 0;
+}
+
+static __init int secvar_sysfs_load(void)
+{
+ u64 namesize = 0;
+ char *name;
+ int rc;
+
+ name = kzalloc(NAME_MAX_SIZE, GFP_KERNEL);
+ if (!name)
+ return -ENOMEM;
+
+ do {
+ rc = secvar_ops->get_next(name, &namesize, NAME_MAX_SIZE);
+ if (rc) {
+ if (rc != -ENOENT)
+ pr_err("error getting secvar from firmware %d\n", rc);
+ else
+ rc = 0;
+
+ break;
+ }
+
+ rc = add_var(name);
+ } while (!rc);
+
+ kfree(name);
+ return rc;
+}
+
+static __init int secvar_sysfs_load_static(void)
+{
+ const char * const *name_ptr = secvar_ops->var_names;
+ int rc;
+
+ while (*name_ptr) {
+ rc = add_var(*name_ptr);
+ if (rc)
+ return rc;
+ name_ptr++;
+ }
+
+ return 0;
+}
+
+static __init int secvar_sysfs_init(void)
+{
+ u64 max_size;
+ int rc;
+
+ if (!secvar_ops) {
+ pr_warn("Failed to retrieve secvar operations\n");
+ return -ENODEV;
+ }
+
+ secvar_kobj = kobject_create_and_add("secvar", firmware_kobj);
+ if (!secvar_kobj) {
+ pr_err("Failed to create firmware kobj\n");
+ return -ENOMEM;
+ }
+
+ rc = sysfs_create_file(secvar_kobj, &format_attr.attr);
+ if (rc) {
+ pr_err("Failed to create format object\n");
+ rc = -ENOMEM;
+ goto err;
+ }
+
+ secvar_kset = kset_create_and_add("vars", NULL, secvar_kobj);
+ if (!secvar_kset) {
+ pr_err("sysfs kobject registration failed\n");
+ rc = -ENOMEM;
+ goto err;
+ }
+
+ rc = update_kobj_size();
+ if (rc) {
+ pr_err("Cannot read the size of the attribute\n");
+ goto err;
+ }
+
+ rc = secvar_sysfs_config(secvar_kobj);
+ if (rc) {
+ pr_err("Failed to create config directory\n");
+ goto err;
+ }
+
+ if (secvar_ops->get_next)
+ rc = secvar_sysfs_load();
+ else
+ rc = secvar_sysfs_load_static();
+
+ if (rc) {
+ pr_err("Failed to create variable attributes\n");
+ goto err;
+ }
+
+ // Due to sysfs limitations, we will only ever get a write buffer of
+ // up to 1 page in size. Print a warning if this is potentially going
+ // to cause problems, so that the user is aware.
+ secvar_ops->max_size(&max_size);
+ if (max_size > PAGE_SIZE)
+ pr_warn_ratelimited("PAGE_SIZE (%lu) is smaller than maximum object size (%llu), writes are limited to PAGE_SIZE\n",
+ PAGE_SIZE, max_size);
+
+ return 0;
+err:
+ kobject_put(secvar_kobj);
+ return rc;
+}
+
+late_initcall(secvar_sysfs_init);
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 1b0e26013a62..68d47c53876c 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -1,18 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Common boot and setup code for both 32-bit and 64-bit.
* Extracted from arch/powerpc/kernel/setup_64.c.
*
* Copyright (C) 2001 PPC64 Team, IBM Corp
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#undef DEBUG
#include <linux/export.h>
+#include <linux/panic_notifier.h>
#include <linux/string.h>
#include <linux/sched.h>
#include <linux/init.h>
@@ -21,26 +18,27 @@
#include <linux/delay.h>
#include <linux/initrd.h>
#include <linux/platform_device.h>
+#include <linux/printk.h>
#include <linux/seq_file.h>
#include <linux/ioport.h>
#include <linux/console.h>
-#include <linux/screen_info.h>
#include <linux/root_dev.h>
-#include <linux/notifier.h>
#include <linux/cpu.h>
#include <linux/unistd.h>
+#include <linux/seq_buf.h>
#include <linux/serial.h>
#include <linux/serial_8250.h>
-#include <linux/debugfs.h>
#include <linux/percpu.h>
#include <linux/memblock.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/of_irq.h>
+#include <linux/hugetlb.h>
+#include <linux/pgtable.h>
#include <asm/io.h>
#include <asm/paca.h>
-#include <asm/prom.h>
#include <asm/processor.h>
#include <asm/vdso_datapage.h>
-#include <asm/pgtable.h>
#include <asm/smp.h>
#include <asm/elf.h>
#include <asm/machdep.h>
@@ -60,10 +58,20 @@
#include <asm/xmon.h>
#include <asm/cputhreads.h>
#include <mm/mmu_decl.h>
+#include <asm/archrandom.h>
#include <asm/fadump.h>
+#include <asm/udbg.h>
+#include <asm/hugetlb.h>
+#include <asm/livepatch.h>
+#include <asm/mmu_context.h>
+#include <asm/cpu_has_feature.h>
+#include <asm/kasan.h>
+#include <asm/mce.h>
+#include <asm/systemcfg.h>
+
+#include "setup.h"
#ifdef DEBUG
-#include <asm/udbg.h>
#define DBG(fmt...) udbg_printf(fmt)
#else
#define DBG(fmt...)
@@ -78,22 +86,18 @@ EXPORT_SYMBOL(machine_id);
int boot_cpuid = -1;
EXPORT_SYMBOL_GPL(boot_cpuid);
+int __initdata boot_core_hwid = -1;
-unsigned long klimit = (unsigned long) _end;
-
-char cmd_line[COMMAND_LINE_SIZE];
+#ifdef CONFIG_PPC64
+int boot_cpu_hwid = -1;
+#endif
/*
- * This still seems to be needed... -- paulus
- */
-struct screen_info screen_info = {
- .orig_x = 0,
- .orig_y = 25,
- .orig_video_cols = 80,
- .orig_video_lines = 25,
- .orig_video_isVGA = 1,
- .orig_video_points = 16
-};
+ * These are used in binfmt_elf.c to put aux entries on the stack
+ * for each elf executable being started.
+ */
+int dcache_bsize;
+int icache_bsize;
/* Variables required to store legacy IO irq routing */
int of_i8042_kbd_irq;
@@ -107,71 +111,77 @@ int ppc_do_canonicalize_irqs;
EXPORT_SYMBOL(ppc_do_canonicalize_irqs);
#endif
+#ifdef CONFIG_CRASH_DUMP
+/* This keeps a track of which one is the crashing cpu. */
+int crashing_cpu = -1;
+#endif
+
/* also used by kexec */
void machine_shutdown(void)
{
-#ifdef CONFIG_FA_DUMP
/*
* if fadump is active, cleanup the fadump registration before we
* shutdown.
*/
fadump_cleanup();
-#endif
if (ppc_md.machine_shutdown)
ppc_md.machine_shutdown();
}
+static void machine_hang(void)
+{
+ pr_emerg("System Halted, OK to turn off power\n");
+ local_irq_disable();
+ while (1)
+ ;
+}
+
void machine_restart(char *cmd)
{
machine_shutdown();
if (ppc_md.restart)
ppc_md.restart(cmd);
-#ifdef CONFIG_SMP
+
smp_send_stop();
-#endif
- printk(KERN_EMERG "System Halted, OK to turn off power\n");
- local_irq_disable();
- while (1) ;
+
+ do_kernel_restart(cmd);
+ mdelay(1000);
+
+ machine_hang();
}
void machine_power_off(void)
{
machine_shutdown();
- if (ppc_md.power_off)
- ppc_md.power_off();
-#ifdef CONFIG_SMP
+ do_kernel_power_off();
smp_send_stop();
-#endif
- printk(KERN_EMERG "System Halted, OK to turn off power\n");
- local_irq_disable();
- while (1) ;
+ machine_hang();
}
/* Used by the G5 thermal driver */
EXPORT_SYMBOL_GPL(machine_power_off);
-void (*pm_power_off)(void) = machine_power_off;
+void (*pm_power_off)(void);
EXPORT_SYMBOL_GPL(pm_power_off);
+size_t __must_check arch_get_random_seed_longs(unsigned long *v, size_t max_longs)
+{
+ if (max_longs && ppc_md.get_random_seed && ppc_md.get_random_seed(v))
+ return 1;
+ return 0;
+}
+EXPORT_SYMBOL(arch_get_random_seed_longs);
+
void machine_halt(void)
{
machine_shutdown();
if (ppc_md.halt)
ppc_md.halt();
-#ifdef CONFIG_SMP
+
smp_send_stop();
-#endif
- printk(KERN_EMERG "System Halted, OK to turn off power\n");
- local_irq_disable();
- while (1) ;
+ machine_hang();
}
-
-#ifdef CONFIG_TAU
-extern u32 cpu_temp(unsigned long cpu);
-extern u32 cpu_temp_both(unsigned long cpu);
-#endif /* CONFIG_TAU */
-
#ifdef CONFIG_SMP
DEFINE_PER_CPU(unsigned int, cpu_pvr);
#endif
@@ -180,14 +190,15 @@ static void show_cpuinfo_summary(struct seq_file *m)
{
struct device_node *root;
const char *model = NULL;
-#if defined(CONFIG_SMP) && defined(CONFIG_PPC32)
unsigned long bogosum = 0;
int i;
- for_each_online_cpu(i)
- bogosum += loops_per_jiffy;
- seq_printf(m, "total bogomips\t: %lu.%02lu\n",
- bogosum/(500000/HZ), bogosum/(5000/HZ) % 100);
-#endif /* CONFIG_SMP && CONFIG_PPC32 */
+
+ if (IS_ENABLED(CONFIG_SMP) && IS_ENABLED(CONFIG_PPC32)) {
+ for_each_online_cpu(i)
+ bogosum += loops_per_jiffy;
+ seq_printf(m, "total bogomips\t: %lu.%02lu\n",
+ bogosum / (500000 / HZ), bogosum / (5000 / HZ) % 100);
+ }
seq_printf(m, "timebase\t: %lu\n", ppc_tb_freq);
if (ppc_md.name)
seq_printf(m, "platform\t: %s\n", ppc_md.name);
@@ -201,11 +212,10 @@ static void show_cpuinfo_summary(struct seq_file *m)
if (ppc_md.show_cpuinfo != NULL)
ppc_md.show_cpuinfo(m);
-#ifdef CONFIG_PPC32
/* Display the amount of memory */
- seq_printf(m, "Memory\t\t: %d MB\n",
- (unsigned int)(total_memory / (1024 * 1024)));
-#endif
+ if (IS_ENABLED(CONFIG_PPC32))
+ seq_printf(m, "Memory\t\t: %d MB\n",
+ (unsigned int)(total_memory / (1024 * 1024)));
}
static int show_cpuinfo(struct seq_file *m, void *v)
@@ -216,14 +226,6 @@ static int show_cpuinfo(struct seq_file *m, void *v)
unsigned short maj;
unsigned short min;
- /* We only show online cpus: disable preempt (overzealous, I
- * knew) to prevent cpu going down. */
- preempt_disable();
- if (!cpu_online(cpu_id)) {
- preempt_enable();
- return 0;
- }
-
#ifdef CONFIG_SMP
pvr = per_cpu(cpu_pvr, cpu_id);
#else
@@ -232,34 +234,31 @@ static int show_cpuinfo(struct seq_file *m, void *v)
maj = (pvr >> 8) & 0xFF;
min = pvr & 0xFF;
- seq_printf(m, "processor\t: %lu\n", cpu_id);
- seq_printf(m, "cpu\t\t: ");
+ seq_printf(m, "processor\t: %lu\ncpu\t\t: ", cpu_id);
- if (cur_cpu_spec->pvr_mask)
- seq_printf(m, "%s", cur_cpu_spec->cpu_name);
+ if (cur_cpu_spec->pvr_mask && cur_cpu_spec->cpu_name)
+ seq_puts(m, cur_cpu_spec->cpu_name);
else
seq_printf(m, "unknown (%08x)", pvr);
-#ifdef CONFIG_ALTIVEC
if (cpu_has_feature(CPU_FTR_ALTIVEC))
- seq_printf(m, ", altivec supported");
-#endif /* CONFIG_ALTIVEC */
+ seq_puts(m, ", altivec supported");
- seq_printf(m, "\n");
+ seq_putc(m, '\n');
#ifdef CONFIG_TAU
- if (cur_cpu_spec->cpu_features & CPU_FTR_TAU) {
-#ifdef CONFIG_TAU_AVERAGE
- /* more straightforward, but potentially misleading */
- seq_printf(m, "temperature \t: %u C (uncalibrated)\n",
- cpu_temp(cpu_id));
-#else
- /* show the actual temp sensor range */
- u32 temp;
- temp = cpu_temp_both(cpu_id);
- seq_printf(m, "temperature \t: %u-%u C (uncalibrated)\n",
- temp & 0xff, temp >> 16);
-#endif
+ if (cpu_has_feature(CPU_FTR_TAU)) {
+ if (IS_ENABLED(CONFIG_TAU_AVERAGE)) {
+ /* more straightforward, but potentially misleading */
+ seq_printf(m, "temperature \t: %u C (uncalibrated)\n",
+ cpu_temp(cpu_id));
+ } else {
+ /* show the actual temp sensor range */
+ u32 temp;
+ temp = cpu_temp_both(cpu_id);
+ seq_printf(m, "temperature \t: %u-%u C (uncalibrated)\n",
+ temp & 0xff, temp >> 16);
+ }
}
#endif /* CONFIG_TAU */
@@ -278,11 +277,8 @@ static int show_cpuinfo(struct seq_file *m, void *v)
seq_printf(m, "clock\t\t: %lu.%06luMHz\n",
proc_freq / 1000000, proc_freq % 1000000);
- if (ppc_md.show_percpuinfo != NULL)
- ppc_md.show_percpuinfo(m, cpu_id);
-
/* If we are a Freescale core do a simple check so
- * we dont have to keep adding cases in the future */
+ * we don't have to keep adding cases in the future */
if (PVR_VER(pvr) & 0x8000) {
switch (PVR_VER(pvr)) {
case 0x8000: /* 7441/7450/7451, Voyager */
@@ -301,14 +297,15 @@ static int show_cpuinfo(struct seq_file *m, void *v)
}
} else {
switch (PVR_VER(pvr)) {
- case 0x0020: /* 403 family */
- maj = PVR_MAJ(pvr) + 1;
- min = PVR_MIN(pvr);
- break;
case 0x1008: /* 740P/750P ?? */
maj = ((pvr >> 8) & 0xFF) - 1;
min = pvr & 0xFF;
break;
+ case 0x004e: /* POWER9 bits 12-15 give chip type */
+ case 0x0080: /* POWER10 bit 12 gives SMT8/4 */
+ maj = (pvr >> 8) & 0x0F;
+ min = pvr & 0xFF;
+ break;
default:
maj = (pvr >> 8) & 0xFF;
min = pvr & 0xFF;
@@ -319,17 +316,11 @@ static int show_cpuinfo(struct seq_file *m, void *v)
seq_printf(m, "revision\t: %hd.%hd (pvr %04x %04x)\n",
maj, min, PVR_VER(pvr), PVR_REV(pvr));
-#ifdef CONFIG_PPC32
- seq_printf(m, "bogomips\t: %lu.%02lu\n",
- loops_per_jiffy / (500000/HZ),
- (loops_per_jiffy / (5000/HZ)) % 100);
-#endif
-
-#ifdef CONFIG_SMP
- seq_printf(m, "\n");
-#endif
+ if (IS_ENABLED(CONFIG_PPC32))
+ seq_printf(m, "bogomips\t: %lu.%02lu\n", loops_per_jiffy / (500000 / HZ),
+ (loops_per_jiffy / (5000 / HZ)) % 100);
- preempt_enable();
+ seq_putc(m, '\n');
/* If this is the last cpu, print the summary */
if (cpumask_next(cpu_id, cpu_online_mask) >= nr_cpu_ids)
@@ -360,10 +351,10 @@ static void c_stop(struct seq_file *m, void *v)
}
const struct seq_operations cpuinfo_op = {
- .start =c_start,
- .next = c_next,
- .stop = c_stop,
- .show = show_cpuinfo,
+ .start = c_start,
+ .next = c_next,
+ .stop = c_stop,
+ .show = show_cpuinfo,
};
void __init check_for_initrd(void)
@@ -382,7 +373,7 @@ void __init check_for_initrd(void)
initrd_start = initrd_end = 0;
if (initrd_start)
- printk("Found initrd at 0x%lx:0x%lx\n", initrd_start, initrd_end);
+ pr_info("Found initrd at 0x%lx:0x%lx\n", initrd_start, initrd_end);
DBG(" <- check_for_initrd()\n");
#endif /* CONFIG_BLK_DEV_INITRD */
@@ -390,8 +381,8 @@ void __init check_for_initrd(void)
#ifdef CONFIG_SMP
-int threads_per_core, threads_per_subcore, threads_shift;
-cpumask_t threads_core_mask;
+int threads_per_core, threads_per_subcore, threads_shift __read_mostly;
+cpumask_t threads_core_mask __read_mostly;
EXPORT_SYMBOL_GPL(threads_per_core);
EXPORT_SYMBOL_GPL(threads_per_subcore);
EXPORT_SYMBOL_GPL(threads_shift);
@@ -415,11 +406,32 @@ static void __init cpu_init_thread_core_maps(int tpc)
cpumask_set_cpu(i, &threads_core_mask);
printk(KERN_INFO "CPU maps initialized for %d thread%s per core\n",
- tpc, tpc > 1 ? "s" : "");
+ tpc, str_plural(tpc));
printk(KERN_DEBUG " (thread shift is %d)\n", threads_shift);
}
+u32 *cpu_to_phys_id = NULL;
+
+static int assign_threads(unsigned int cpu, unsigned int nthreads, bool present,
+ const __be32 *hw_ids)
+{
+ for (int i = 0; i < nthreads && cpu < nr_cpu_ids; i++) {
+ __be32 hwid;
+
+ hwid = be32_to_cpu(hw_ids[i]);
+
+ DBG(" thread %d -> cpu %d (hard id %d)\n", i, cpu, hwid);
+
+ set_cpu_present(cpu, present);
+ set_cpu_possible(cpu, true);
+ cpu_to_phys_id[cpu] = hwid;
+ cpu++;
+ }
+
+ return cpu;
+}
+
/**
* setup_cpu_maps - initialize the following cpu maps:
* cpu_possible_mask
@@ -440,29 +452,33 @@ static void __init cpu_init_thread_core_maps(int tpc)
*/
void __init smp_setup_cpu_maps(void)
{
- struct device_node *dn = NULL;
+ struct device_node *dn;
int cpu = 0;
int nthreads = 1;
DBG("smp_setup_cpu_maps()\n");
- while ((dn = of_find_node_by_type(dn, "cpu")) && cpu < nr_cpu_ids) {
+ cpu_to_phys_id = memblock_alloc_or_panic(nr_cpu_ids * sizeof(u32),
+ __alignof__(u32));
+
+ for_each_node_by_type(dn, "cpu") {
const __be32 *intserv;
__be32 cpu_be;
- int j, len;
+ int len;
- DBG(" * %s...\n", dn->full_name);
+ DBG(" * %pOF...\n", dn);
intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s",
&len);
if (intserv) {
- DBG(" ibm,ppc-interrupt-server#s -> %d threads\n",
- nthreads);
+ DBG(" ibm,ppc-interrupt-server#s -> %lu threads\n",
+ (len / sizeof(int)));
} else {
DBG(" no ibm,ppc-interrupt-server#s -> 1 thread\n");
intserv = of_get_property(dn, "reg", &len);
if (!intserv) {
cpu_be = cpu_to_be32(cpu);
+ /* XXX: what is this? uninitialized?? */
intserv = &cpu_be; /* assume logical == phys */
len = 4;
}
@@ -470,22 +486,31 @@ void __init smp_setup_cpu_maps(void)
nthreads = len / sizeof(int);
- for (j = 0; j < nthreads && cpu < nr_cpu_ids; j++) {
- bool avail;
-
- DBG(" thread %d -> cpu %d (hard id %d)\n",
- j, cpu, be32_to_cpu(intserv[j]));
+ bool avail = of_device_is_available(dn);
+ if (!avail)
+ avail = !of_property_match_string(dn,
+ "enable-method", "spin-table");
- avail = of_device_is_available(dn);
- if (!avail)
- avail = !of_property_match_string(dn,
- "enable-method", "spin-table");
+ if (boot_core_hwid >= 0) {
+ if (cpu == 0) {
+ pr_info("Skipping CPU node %pOF to allow for boot core.\n", dn);
+ cpu = nthreads;
+ continue;
+ }
- set_cpu_present(cpu, avail);
- set_hard_smp_processor_id(cpu, be32_to_cpu(intserv[j]));
- set_cpu_possible(cpu, true);
- cpu++;
+ if (be32_to_cpu(intserv[0]) == boot_core_hwid) {
+ pr_info("Renumbered boot core %pOF to logical 0\n", dn);
+ assign_threads(0, nthreads, avail, intserv);
+ of_node_put(dn);
+ break;
+ }
+ } else if (cpu >= nr_cpu_ids) {
+ of_node_put(dn);
+ break;
}
+
+ if (cpu < nr_cpu_ids)
+ cpu = assign_threads(cpu, nthreads, avail, intserv);
}
/* If no SMT supported, nthreads is forced to 1 */
@@ -499,7 +524,7 @@ void __init smp_setup_cpu_maps(void)
* On pSeries LPAR, we need to know how many cpus
* could possibly be added to this partition.
*/
- if (machine_is(pseries) && firmware_has_feature(FW_FEATURE_LPAR) &&
+ if (firmware_has_feature(FW_FEATURE_LPAR) &&
(dn = of_find_node_by_path("/rtas"))) {
int num_addr_cell, num_size_cell, maxcpus;
const __be32 *ireg;
@@ -521,7 +546,7 @@ void __init smp_setup_cpu_maps(void)
if (maxcpus > nr_cpu_ids) {
printk(KERN_WARNING
"Partition configured for %d cpus, "
- "operating system maximum is %d.\n",
+ "operating system maximum is %u.\n",
maxcpus, nr_cpu_ids);
maxcpus = nr_cpu_ids;
} else
@@ -533,7 +558,9 @@ void __init smp_setup_cpu_maps(void)
out:
of_node_put(dn);
}
- vdso_data->processorCount = num_present_cpus();
+#endif
+#ifdef CONFIG_PPC64_PROC_SYSTEMCFG
+ systemcfg->processorCount = num_present_cpus();
#endif /* CONFIG_PPC64 */
/* Initialize CPU <=> thread mapping/
@@ -576,10 +603,19 @@ static __init int add_pcspkr(void)
device_initcall(add_pcspkr);
#endif /* CONFIG_PCSPKR_PLATFORM */
-void probe_machine(void)
+static char ppc_hw_desc_buf[128] __initdata;
+
+struct seq_buf ppc_hw_desc __initdata = {
+ .buffer = ppc_hw_desc_buf,
+ .size = sizeof(ppc_hw_desc_buf),
+ .len = 0,
+};
+
+static __init void probe_machine(void)
{
extern struct machdep_calls __machine_desc_start;
extern struct machdep_calls __machine_desc_end;
+ unsigned int i;
/*
* Iterate all ppc_md structures until we find the proper
@@ -587,24 +623,44 @@ void probe_machine(void)
*/
DBG("Probing machine type ...\n");
+ /*
+ * Check ppc_md is empty, if not we have a bug, ie, we setup an
+ * entry before probe_machine() which will be overwritten
+ */
+ for (i = 0; i < (sizeof(ppc_md) / sizeof(void *)); i++) {
+ if (((void **)&ppc_md)[i]) {
+ printk(KERN_ERR "Entry %d in ppc_md non empty before"
+ " machine probe !\n", i);
+ }
+ }
+
for (machine_id = &__machine_desc_start;
machine_id < &__machine_desc_end;
machine_id++) {
- DBG(" %s ...", machine_id->name);
+ DBG(" %s ...\n", machine_id->name);
+ if (machine_id->compatible && !of_machine_is_compatible(machine_id->compatible))
+ continue;
+ if (machine_id->compatibles && !of_machine_compatible_match(machine_id->compatibles))
+ continue;
memcpy(&ppc_md, machine_id, sizeof(struct machdep_calls));
- if (ppc_md.probe()) {
- DBG(" match !\n");
- break;
- }
- DBG("\n");
+ if (ppc_md.probe && !ppc_md.probe())
+ continue;
+ DBG(" %s match !\n", machine_id->name);
+ break;
}
/* What can we do if we didn't find ? */
if (machine_id >= &__machine_desc_end) {
- DBG("No suitable machine found !\n");
+ pr_err("No suitable machine description found !\n");
for (;;);
}
- printk(KERN_INFO "Using %s machine description\n", ppc_md.name);
+ // Append the machine name to other info we've gathered
+ seq_buf_puts(&ppc_hw_desc, ppc_md.name);
+
+ // Set the generic hardware description shown in oopses
+ dump_stack_set_arch_desc(ppc_hw_desc.buffer);
+
+ pr_info("Hardware name: %s\n", ppc_hw_desc.buffer);
}
/* Match a class of boards, not a specific device configuration. */
@@ -653,7 +709,7 @@ int check_legacy_ioport(unsigned long base_port)
return ret;
parent = of_get_parent(np);
if (parent) {
- if (strcmp(parent->type, "isa") == 0)
+ if (of_node_is_type(parent, "isa"))
ret = 0;
of_node_put(parent);
}
@@ -662,26 +718,91 @@ int check_legacy_ioport(unsigned long base_port)
}
EXPORT_SYMBOL(check_legacy_ioport);
-static int ppc_panic_event(struct notifier_block *this,
- unsigned long event, void *ptr)
+/*
+ * Panic notifiers setup
+ *
+ * We have 3 notifiers for powerpc, each one from a different "nature":
+ *
+ * - ppc_panic_fadump_handler() is a hypervisor notifier, which hard-disables
+ * IRQs and deal with the Firmware-Assisted dump, when it is configured;
+ * should run early in the panic path.
+ *
+ * - dump_kernel_offset() is an informative notifier, just showing the KASLR
+ * offset if we have RANDOMIZE_BASE set.
+ *
+ * - ppc_panic_platform_handler() is a low-level handler that's registered
+ * only if the platform wishes to perform final actions in the panic path,
+ * hence it should run late and might not even return. Currently, only
+ * pseries and ps3 platforms register callbacks.
+ */
+static int ppc_panic_fadump_handler(struct notifier_block *this,
+ unsigned long event, void *ptr)
{
/*
+ * panic does a local_irq_disable, but we really
+ * want interrupts to be hard disabled.
+ */
+ hard_irq_disable();
+
+ /*
* If firmware-assisted dump has been registered then trigger
- * firmware-assisted dump and let firmware handle everything else.
+ * its callback and let the firmware handles everything else.
*/
crash_fadump(NULL, ptr);
- ppc_md.panic(ptr); /* May not return */
+
+ return NOTIFY_DONE;
+}
+
+static int dump_kernel_offset(struct notifier_block *self, unsigned long v,
+ void *p)
+{
+ pr_emerg("Kernel Offset: 0x%lx from 0x%lx\n",
+ kaslr_offset(), KERNELBASE);
+
return NOTIFY_DONE;
}
+static int ppc_panic_platform_handler(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ /*
+ * This handler is only registered if we have a panic callback
+ * on ppc_md, hence NULL check is not needed.
+ * Also, it may not return, so it runs really late on panic path.
+ */
+ ppc_md.panic(ptr);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block ppc_fadump_block = {
+ .notifier_call = ppc_panic_fadump_handler,
+ .priority = INT_MAX, /* run early, to notify the firmware ASAP */
+};
+
+static struct notifier_block kernel_offset_notifier = {
+ .notifier_call = dump_kernel_offset,
+};
+
static struct notifier_block ppc_panic_block = {
- .notifier_call = ppc_panic_event,
- .priority = INT_MIN /* may not return; must be done last */
+ .notifier_call = ppc_panic_platform_handler,
+ .priority = INT_MIN, /* may not return; must be done last */
};
void __init setup_panic(void)
{
- atomic_notifier_chain_register(&panic_notifier_list, &ppc_panic_block);
+ /* Hard-disables IRQs + deal with FW-assisted dump (fadump) */
+ atomic_notifier_chain_register(&panic_notifier_list,
+ &ppc_fadump_block);
+
+ if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_offset() > 0)
+ atomic_notifier_chain_register(&panic_notifier_list,
+ &kernel_offset_notifier);
+
+ /* Low-level platform-specific routines that should run on panic */
+ if (ppc_md.panic)
+ atomic_notifier_chain_register(&panic_notifier_list,
+ &ppc_panic_block);
}
#ifdef CONFIG_CHECK_CACHE_COHERENCY
@@ -693,29 +814,25 @@ void __init setup_panic(void)
* BUG() in that case.
*/
-#ifdef CONFIG_NOT_COHERENT_CACHE
-#define KERNEL_COHERENCY 0
-#else
-#define KERNEL_COHERENCY 1
-#endif
+#define KERNEL_COHERENCY (!IS_ENABLED(CONFIG_NOT_COHERENT_CACHE))
static int __init check_cache_coherency(void)
{
struct device_node *np;
const void *prop;
- int devtree_coherency;
+ bool devtree_coherency;
np = of_find_node_by_path("/");
prop = of_get_property(np, "coherency-off", NULL);
of_node_put(np);
- devtree_coherency = prop ? 0 : 1;
+ devtree_coherency = prop ? false : true;
if (devtree_coherency != KERNEL_COHERENCY) {
printk(KERN_ERR
"kernel coherency:%s != device tree_coherency:%s\n",
- KERNEL_COHERENCY ? "on" : "off",
- devtree_coherency ? "on" : "off");
+ str_on_off(KERNEL_COHERENCY),
+ str_on_off(devtree_coherency));
BUG();
}
@@ -725,27 +842,182 @@ static int __init check_cache_coherency(void)
late_initcall(check_cache_coherency);
#endif /* CONFIG_CHECK_CACHE_COHERENCY */
-#ifdef CONFIG_DEBUG_FS
-struct dentry *powerpc_debugfs_root;
-EXPORT_SYMBOL(powerpc_debugfs_root);
+void ppc_printk_progress(char *s, unsigned short hex)
+{
+ pr_info("%s\n", s);
+}
-static int powerpc_debugfs_init(void)
+static __init void print_system_info(void)
{
- powerpc_debugfs_root = debugfs_create_dir("powerpc", NULL);
+ pr_info("-----------------------------------------------------\n");
+ pr_info("phys_mem_size = 0x%llx\n",
+ (unsigned long long)memblock_phys_mem_size());
+
+ pr_info("dcache_bsize = 0x%x\n", dcache_bsize);
+ pr_info("icache_bsize = 0x%x\n", icache_bsize);
+
+ pr_info("cpu_features = 0x%016lx\n", cur_cpu_spec->cpu_features);
+ pr_info(" possible = 0x%016lx\n",
+ (unsigned long)CPU_FTRS_POSSIBLE);
+ pr_info(" always = 0x%016lx\n",
+ (unsigned long)CPU_FTRS_ALWAYS);
+ pr_info("cpu_user_features = 0x%08x 0x%08x\n",
+ cur_cpu_spec->cpu_user_features,
+ cur_cpu_spec->cpu_user_features2);
+ pr_info("mmu_features = 0x%08x\n", cur_cpu_spec->mmu_features);
+#ifdef CONFIG_PPC64
+ pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features);
+#ifdef CONFIG_PPC_BOOK3S
+ pr_info("vmalloc start = 0x%lx\n", KERN_VIRT_START);
+ pr_info("IO start = 0x%lx\n", KERN_IO_START);
+ pr_info("vmemmap start = 0x%lx\n", (unsigned long)vmemmap);
+#endif
+#endif
+
+ if (!early_radix_enabled())
+ print_system_hash_info();
- return powerpc_debugfs_root == NULL;
+ if (PHYSICAL_START > 0)
+ pr_info("physical_start = 0x%llx\n",
+ (unsigned long long)PHYSICAL_START);
+ pr_info("-----------------------------------------------------\n");
}
-arch_initcall(powerpc_debugfs_init);
-#endif
-void ppc_printk_progress(char *s, unsigned short hex)
+#ifdef CONFIG_SMP
+static void __init smp_setup_pacas(void)
{
- pr_info("%s\n", s);
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ if (cpu == smp_processor_id())
+ continue;
+ allocate_paca(cpu);
+ set_hard_smp_processor_id(cpu, cpu_to_phys_id[cpu]);
+ }
+
+ memblock_free(cpu_to_phys_id, nr_cpu_ids * sizeof(u32));
+ cpu_to_phys_id = NULL;
}
+#endif
-void arch_setup_pdev_archdata(struct platform_device *pdev)
+/*
+ * Called into from start_kernel this initializes memblock, which is used
+ * to manage page allocation until mem_init is called.
+ */
+void __init setup_arch(char **cmdline_p)
{
- pdev->archdata.dma_mask = DMA_BIT_MASK(32);
- pdev->dev.dma_mask = &pdev->archdata.dma_mask;
- set_dma_ops(&pdev->dev, &dma_direct_ops);
+ kasan_init();
+
+ *cmdline_p = boot_command_line;
+
+ /* Set a half-reasonable default so udelay does something sensible */
+ loops_per_jiffy = 500000000 / HZ;
+
+ /* Unflatten the device-tree passed by prom_init or kexec */
+ unflatten_device_tree();
+
+ /*
+ * Initialize cache line/block info from device-tree (on ppc64) or
+ * just cputable (on ppc32).
+ */
+ initialize_cache_info();
+
+ /* Initialize RTAS if available. */
+ rtas_initialize();
+
+ /* Check if we have an initrd provided via the device-tree. */
+ check_for_initrd();
+
+ /* Probe the machine type, establish ppc_md. */
+ probe_machine();
+
+ /* Setup panic notifier if requested by the platform. */
+ setup_panic();
+
+ /*
+ * Configure ppc_md.power_save (ppc32 only, 64-bit machines do
+ * it from their respective probe() function.
+ */
+ setup_power_save();
+
+ /* Discover standard serial ports. */
+ find_legacy_serial_ports();
+
+ /* Register early console with the printk subsystem. */
+ register_early_udbg_console();
+
+ /* Setup the various CPU maps based on the device-tree. */
+ smp_setup_cpu_maps();
+
+ /* Initialize xmon. */
+ xmon_setup();
+
+ /* Check the SMT related command line arguments (ppc64). */
+ check_smt_enabled();
+
+ /* Parse memory topology */
+ mem_topology_setup();
+ high_memory = (void *)__va(max_low_pfn * PAGE_SIZE);
+
+ /*
+ * Release secondary cpus out of their spinloops at 0x60 now that
+ * we can map physical -> logical CPU ids.
+ *
+ * Freescale Book3e parts spin in a loop provided by firmware,
+ * so smp_release_cpus() does nothing for them.
+ */
+#ifdef CONFIG_SMP
+ smp_setup_pacas();
+
+ /* On BookE, setup per-core TLB data structures. */
+ setup_tlb_core_data();
+#endif
+
+ /* Print various info about the machine that has been gathered so far. */
+ print_system_info();
+
+ klp_init_thread_info(&init_task);
+
+ setup_initial_init_mm(_stext, _etext, _edata, _end);
+ /* sched_init() does the mmgrab(&init_mm) for the primary CPU */
+ VM_WARN_ON(cpumask_test_cpu(smp_processor_id(), mm_cpumask(&init_mm)));
+ cpumask_set_cpu(smp_processor_id(), mm_cpumask(&init_mm));
+ inc_mm_active_cpus(&init_mm);
+ mm_iommu_init(&init_mm);
+
+ irqstack_early_init();
+ exc_lvl_early_init();
+ emergency_stack_init();
+
+ mce_init();
+ smp_release_cpus();
+
+ initmem_init();
+
+ /*
+ * Reserve large chunks of memory for use by CMA for fadump, KVM and
+ * hugetlb. These must be called after initmem_init(), so that
+ * pageblock_order is initialised.
+ */
+ fadump_cma_init();
+ kvm_cma_reserve();
+ gigantic_hugetlb_cma_reserve();
+
+ early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT);
+
+ if (ppc_md.setup_arch)
+ ppc_md.setup_arch();
+
+ setup_barrier_nospec();
+ setup_spectre_v2();
+
+ paging_init();
+
+ /* Initialize the MMU context management stuff. */
+ mmu_context_init();
+
+ /* Interrupt code needs to be 64K-aligned. */
+ if (IS_ENABLED(CONFIG_PPC64) && (unsigned long)_stext & 0xffff)
+ panic("Kernelbase not 64K-aligned (0x%lx)!\n",
+ (unsigned long)_stext);
}
diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h
new file mode 100644
index 000000000000..385a00a2e2ca
--- /dev/null
+++ b/arch/powerpc/kernel/setup.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Prototypes for functions that are shared between setup_(32|64|common).c
+ *
+ * Copyright 2016 Michael Ellerman, IBM Corporation.
+ */
+
+#ifndef __ARCH_POWERPC_KERNEL_SETUP_H
+#define __ARCH_POWERPC_KERNEL_SETUP_H
+
+void initialize_cache_info(void);
+void irqstack_early_init(void);
+
+#ifdef CONFIG_PPC32
+void setup_power_save(void);
+#else
+static inline void setup_power_save(void) { }
+#endif
+
+#if defined(CONFIG_PPC64) && defined(CONFIG_SMP)
+void check_smt_enabled(void);
+#else
+static inline void check_smt_enabled(void) { }
+#endif
+
+#if defined(CONFIG_PPC_BOOK3E_64) && defined(CONFIG_SMP)
+void setup_tlb_core_data(void);
+#else
+static inline void setup_tlb_core_data(void) { }
+#endif
+
+#ifdef CONFIG_BOOKE
+void exc_lvl_early_init(void);
+#else
+static inline void exc_lvl_early_init(void) { }
+#endif
+
+#if defined(CONFIG_PPC64) || defined(CONFIG_VMAP_STACK)
+void emergency_stack_init(void);
+#else
+static inline void emergency_stack_init(void) { }
+#endif
+
+#ifdef CONFIG_PPC64
+u64 ppc64_bolted_size(void);
+
+/* Default SPR values from firmware/kexec */
+extern unsigned long spr_default_dscr;
+#endif
+
+/*
+ * Having this in kvm_ppc.h makes include dependencies too
+ * tricky to solve for setup-common.c so have it here.
+ */
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+void kvm_cma_reserve(void);
+#else
+static inline void kvm_cma_reserve(void) { }
+#endif
+
+#ifdef CONFIG_TAU
+u32 cpu_temp(unsigned long cpu);
+u32 cpu_temp_both(unsigned long cpu);
+u32 tau_interrupts(unsigned long cpu);
+#endif /* CONFIG_TAU */
+
+#endif /* __ARCH_POWERPC_KERNEL_SETUP_H */
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index ea4fda60e57b..5a1bf501fbe1 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Common prep/pmac/chrp boot and setup code.
*/
@@ -11,17 +12,19 @@
#include <linux/delay.h>
#include <linux/initrd.h>
#include <linux/tty.h>
-#include <linux/bootmem.h>
#include <linux/seq_file.h>
#include <linux/root_dev.h>
#include <linux/cpu.h>
#include <linux/console.h>
#include <linux/memblock.h>
+#include <linux/export.h>
+#include <linux/nvram.h>
+#include <linux/pgtable.h>
+#include <linux/of_fdt.h>
+#include <linux/irq.h>
#include <asm/io.h>
-#include <asm/prom.h>
#include <asm/processor.h>
-#include <asm/pgtable.h>
#include <asm/setup.h>
#include <asm/smp.h>
#include <asm/elf.h>
@@ -29,7 +32,7 @@
#include <asm/bootx.h>
#include <asm/btext.h>
#include <asm/machdep.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/pmac_feature.h>
#include <asm/sections.h>
#include <asm/nvram.h>
@@ -37,8 +40,14 @@
#include <asm/time.h>
#include <asm/serial.h>
#include <asm/udbg.h>
-#include <asm/mmu_context.h>
-#include <asm/epapr_hcalls.h>
+#include <asm/text-patching.h>
+#include <asm/cpu_has_feature.h>
+#include <asm/asm-prototypes.h>
+#include <asm/kdump.h>
+#include <asm/feature-fixups.h>
+#include <asm/early_ioremap.h>
+
+#include "setup.h"
#define DBG(fmt...)
@@ -48,108 +57,50 @@ int boot_cpuid_phys;
EXPORT_SYMBOL_GPL(boot_cpuid_phys);
int smp_hw_index[NR_CPUS];
+EXPORT_SYMBOL(smp_hw_index);
-unsigned long ISA_DMA_THRESHOLD;
unsigned int DMA_MODE_READ;
unsigned int DMA_MODE_WRITE;
-#ifdef CONFIG_VGA_CONSOLE
-unsigned long vgacon_remap_base;
-EXPORT_SYMBOL(vgacon_remap_base);
-#endif
-
-/*
- * These are used in binfmt_elf.c to put aux entries on the stack
- * for each elf executable being started.
- */
-int dcache_bsize;
-int icache_bsize;
-int ucache_bsize;
+EXPORT_SYMBOL(DMA_MODE_READ);
+EXPORT_SYMBOL(DMA_MODE_WRITE);
/*
- * We're called here very early in the boot. We determine the machine
- * type and call the appropriate low-level setup functions.
- * -- Cort <cort@fsmlabs.com>
+ * This is run before start_kernel(), the kernel has been relocated
+ * and we are running with enough of the MMU enabled to have our
+ * proper kernel virtual addresses
*
- * Note that the kernel may be running at an address which is different
- * from the address that it was linked at, so we must use RELOC/PTRRELOC
- * to access static data (including strings). -- paulus
+ * We do the initial parsing of the flat device-tree and prepares
+ * for the MMU to be fully initialized.
*/
-notrace unsigned long __init early_init(unsigned long dt_ptr)
+notrace void __init machine_init(u64 dt_ptr)
{
- unsigned long offset = reloc_offset();
- struct cpu_spec *spec;
-
- /* First zero the BSS -- use memset_io, some platforms don't have
- * caches on yet */
- memset_io((void __iomem *)PTRRELOC(&__bss_start), 0,
- __bss_stop - __bss_start);
-
- /*
- * Identify the CPU type and fix up code sections
- * that depend on which cpu we have.
- */
- spec = identify_cpu(offset, mfspr(SPRN_PVR));
-
- do_feature_fixups(spec->cpu_features,
- PTRRELOC(&__start___ftr_fixup),
- PTRRELOC(&__stop___ftr_fixup));
-
- do_feature_fixups(spec->mmu_features,
- PTRRELOC(&__start___mmu_ftr_fixup),
- PTRRELOC(&__stop___mmu_ftr_fixup));
+ u32 *addr = (u32 *)patch_site_addr(&patch__memset_nocache);
+ ppc_inst_t insn;
- do_lwsync_fixups(spec->cpu_features,
- PTRRELOC(&__start___lwsync_fixup),
- PTRRELOC(&__stop___lwsync_fixup));
+ /* Configure static keys first, now that we're relocated. */
+ setup_feature_keys();
- do_final_fixups();
-
- return KERNELBASE + offset;
-}
-
-
-/*
- * Find out what kind of machine we're on and save any data we need
- * from the early boot process (devtree is copied on pmac by prom_init()).
- * This is called very early on the boot process, after a minimal
- * MMU environment has been set up but before MMU_init is called.
- */
-notrace void __init machine_init(u64 dt_ptr)
-{
- lockdep_init();
+ early_ioremap_init();
/* Enable early debugging if any specified (see udbg.h) */
udbg_early_init();
+ patch_instruction_site(&patch__memcpy_nocache, ppc_inst(PPC_RAW_NOP()));
+
+ create_cond_branch(&insn, addr, branch_target(addr), 0x820000);
+ patch_instruction(addr, insn); /* replace b by bne cr0 */
+
/* Do some early initialization based on the flat device tree */
early_init_devtree(__va(dt_ptr));
- epapr_paravirt_early_init();
-
early_init_mmu();
- probe_machine();
-
setup_kdump_trampoline();
-
-#ifdef CONFIG_6xx
- if (cpu_has_feature(CPU_FTR_CAN_DOZE) ||
- cpu_has_feature(CPU_FTR_CAN_NAP))
- ppc_md.power_save = ppc6xx_idle;
-#endif
-
-#ifdef CONFIG_E500
- if (cpu_has_feature(CPU_FTR_CAN_DOZE) ||
- cpu_has_feature(CPU_FTR_CAN_NAP))
- ppc_md.power_save = e500_idle;
-#endif
- if (ppc_md.progress)
- ppc_md.progress("id mach(): done", 0x200);
}
/* Checks "l2cr=xxxx" command-line option */
-int __init ppc_setup_l2cr(char *str)
+static int __init ppc_setup_l2cr(char *str)
{
if (cpu_has_feature(CPU_FTR_L2CR)) {
unsigned long val = simple_strtoul(str, NULL, 0);
@@ -162,7 +113,7 @@ int __init ppc_setup_l2cr(char *str)
__setup("l2cr=", ppc_setup_l2cr);
/* Checks "l3cr=xxxx" command-line option */
-int __init ppc_setup_l3cr(char *str)
+static int __init ppc_setup_l3cr(char *str)
{
if (cpu_has_feature(CPU_FTR_L3CR)) {
unsigned long val = simple_strtoul(str, NULL, 0);
@@ -173,42 +124,7 @@ int __init ppc_setup_l3cr(char *str)
}
__setup("l3cr=", ppc_setup_l3cr);
-#ifdef CONFIG_GENERIC_NVRAM
-
-/* Generic nvram hooks used by drivers/char/gen_nvram.c */
-unsigned char nvram_read_byte(int addr)
-{
- if (ppc_md.nvram_read_val)
- return ppc_md.nvram_read_val(addr);
- return 0xff;
-}
-EXPORT_SYMBOL(nvram_read_byte);
-
-void nvram_write_byte(unsigned char val, int addr)
-{
- if (ppc_md.nvram_write_val)
- ppc_md.nvram_write_val(addr, val);
-}
-EXPORT_SYMBOL(nvram_write_byte);
-
-ssize_t nvram_get_size(void)
-{
- if (ppc_md.nvram_size)
- return ppc_md.nvram_size();
- return -1;
-}
-EXPORT_SYMBOL(nvram_get_size);
-
-void nvram_sync(void)
-{
- if (ppc_md.nvram_sync)
- ppc_md.nvram_sync();
-}
-EXPORT_SYMBOL(nvram_sync);
-
-#endif /* CONFIG_NVRAM */
-
-int __init ppc_init(void)
+static int __init ppc_init(void)
{
/* clear the progress line */
if (ppc_md.progress)
@@ -220,25 +136,42 @@ int __init ppc_init(void)
}
return 0;
}
-
arch_initcall(ppc_init);
-static void __init irqstack_early_init(void)
+static void *__init alloc_stack(void)
+{
+ return memblock_alloc_or_panic(THREAD_SIZE, THREAD_ALIGN);
+}
+
+void __init irqstack_early_init(void)
{
unsigned int i;
+ if (IS_ENABLED(CONFIG_VMAP_STACK))
+ return;
+
/* interrupt stacks must be in lowmem, we get that for free on ppc32
* as the memblock is limited to lowmem by default */
for_each_possible_cpu(i) {
- softirq_ctx[i] = (struct thread_info *)
- __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
- hardirq_ctx[i] = (struct thread_info *)
- __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
+ softirq_ctx[i] = alloc_stack();
+ hardirq_ctx[i] = alloc_stack();
}
}
-#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
-static void __init exc_lvl_early_init(void)
+#ifdef CONFIG_VMAP_STACK
+void *emergency_ctx[NR_CPUS] __ro_after_init = {[0] = &init_stack};
+
+void __init emergency_stack_init(void)
+{
+ unsigned int i;
+
+ for_each_possible_cpu(i)
+ emergency_ctx[i] = alloc_stack();
+}
+#endif
+
+#ifdef CONFIG_BOOKE
+void __init exc_lvl_early_init(void)
{
unsigned int i, hw_cpu;
@@ -251,43 +184,32 @@ static void __init exc_lvl_early_init(void)
hw_cpu = 0;
#endif
- critirq_ctx[hw_cpu] = (struct thread_info *)
- __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
+ critirq_ctx[hw_cpu] = alloc_stack();
#ifdef CONFIG_BOOKE
- dbgirq_ctx[hw_cpu] = (struct thread_info *)
- __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
- mcheckirq_ctx[hw_cpu] = (struct thread_info *)
- __va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
+ dbgirq_ctx[hw_cpu] = alloc_stack();
+ mcheckirq_ctx[hw_cpu] = alloc_stack();
#endif
}
}
-#else
-#define exc_lvl_early_init()
#endif
-/* Warning, IO base is not yet inited */
-void __init setup_arch(char **cmdline_p)
+void __init setup_power_save(void)
{
- *cmdline_p = cmd_line;
-
- /* so udelay does something sensible, assume <= 1000 bogomips */
- loops_per_jiffy = 500000000 / HZ;
-
- unflatten_device_tree();
- check_for_initrd();
-
- if (ppc_md.init_early)
- ppc_md.init_early();
-
- find_legacy_serial_ports();
-
- smp_setup_cpu_maps();
-
- /* Register early console */
- register_early_udbg_console();
+#ifdef CONFIG_PPC_BOOK3S_32
+ if (cpu_has_feature(CPU_FTR_CAN_DOZE) ||
+ cpu_has_feature(CPU_FTR_CAN_NAP))
+ ppc_md.power_save = ppc6xx_idle;
+#endif
- xmon_setup();
+#ifdef CONFIG_PPC_E500
+ if (cpu_has_feature(CPU_FTR_CAN_DOZE) ||
+ cpu_has_feature(CPU_FTR_CAN_NAP))
+ ppc_md.power_save = e500_idle;
+#endif
+}
+__init void initialize_cache_info(void)
+{
/*
* Set cache line size based on type of cpu as a default.
* Systems with OF can look in the properties on the cpu node(s)
@@ -295,36 +217,4 @@ void __init setup_arch(char **cmdline_p)
*/
dcache_bsize = cur_cpu_spec->dcache_bsize;
icache_bsize = cur_cpu_spec->icache_bsize;
- ucache_bsize = 0;
- if (cpu_has_feature(CPU_FTR_UNIFIED_ID_CACHE))
- ucache_bsize = icache_bsize = dcache_bsize;
-
- if (ppc_md.panic)
- setup_panic();
-
- init_mm.start_code = (unsigned long)_stext;
- init_mm.end_code = (unsigned long) _etext;
- init_mm.end_data = (unsigned long) _edata;
- init_mm.brk = klimit;
-
- exc_lvl_early_init();
-
- irqstack_early_init();
-
- /* set up the bootmem stuff with available memory */
- do_init_bootmem();
- if ( ppc_md.progress ) ppc_md.progress("setup_arch: bootmem", 0x3eab);
-
-#ifdef CONFIG_DUMMY_CONSOLE
- conswitchp = &dummy_con;
-#endif
-
- if (ppc_md.setup_arch)
- ppc_md.setup_arch();
- if ( ppc_md.progress ) ppc_md.progress("arch: exit", 0x3eab);
-
- paging_init();
-
- /* Initialize the MMU context management stuff */
- mmu_context_init();
}
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 75d62d63fe68..8fd7cbf3bd04 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -1,17 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
*
* Common boot and setup code.
*
* Copyright (C) 2001 PPC64 Team, IBM Corp
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
-#define DEBUG
-
#include <linux/export.h>
#include <linux/string.h>
#include <linux/sched.h>
@@ -31,24 +25,27 @@
#include <linux/unistd.h>
#include <linux/serial.h>
#include <linux/serial_8250.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/pci.h>
#include <linux/lockdep.h>
-#include <linux/memblock.h>
-#include <linux/hugetlb.h>
#include <linux/memory.h>
+#include <linux/nmi.h>
+#include <linux/pgtable.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <asm/asm-prototypes.h>
+#include <asm/kvm_guest.h>
#include <asm/io.h>
#include <asm/kdump.h>
-#include <asm/prom.h>
#include <asm/processor.h>
-#include <asm/pgtable.h>
#include <asm/smp.h>
#include <asm/elf.h>
#include <asm/machdep.h>
#include <asm/paca.h>
#include <asm/time.h>
#include <asm/cputable.h>
+#include <asm/dt_cpu_ftrs.h>
#include <asm/sections.h>
#include <asm/btext.h>
#include <asm/nvram.h>
@@ -63,42 +60,35 @@
#include <asm/xmon.h>
#include <asm/udbg.h>
#include <asm/kexec.h>
-#include <asm/mmu_context.h>
-#include <asm/code-patching.h>
-#include <asm/kvm_ppc.h>
-#include <asm/hugetlb.h>
-#include <asm/epapr_hcalls.h>
-
-#ifdef DEBUG
-#define DBG(fmt...) udbg_printf(fmt)
-#else
-#define DBG(fmt...)
-#endif
+#include <asm/text-patching.h>
+#include <asm/ftrace.h>
+#include <asm/opal.h>
+#include <asm/cputhreads.h>
+#include <asm/hw_irq.h>
+#include <asm/feature-fixups.h>
+#include <asm/kup.h>
+#include <asm/early_ioremap.h>
+#include <asm/pgalloc.h>
+
+#include "setup.h"
int spinning_secondaries;
u64 ppc64_pft_size;
-/* Pick defaults since we might want to patch instructions
- * before we've read this from the device tree.
- */
struct ppc64_caches ppc64_caches = {
- .dline_size = 0x40,
- .log_dline_size = 6,
- .iline_size = 0x40,
- .log_iline_size = 6
+ .l1d = {
+ .block_size = 0x40,
+ .log_block_size = 6,
+ },
+ .l1i = {
+ .block_size = 0x40,
+ .log_block_size = 6
+ },
};
EXPORT_SYMBOL_GPL(ppc64_caches);
-/*
- * These are used in binfmt_elf.c to put aux entries on the stack
- * for each elf executable being started.
- */
-int dcache_bsize;
-int icache_bsize;
-int ucache_bsize;
-
-#if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP)
-static void setup_tlb_core_data(void)
+#if defined(CONFIG_PPC_BOOK3E_64) && defined(CONFIG_SMP)
+void __init setup_tlb_core_data(void)
{
int cpu;
@@ -107,26 +97,27 @@ static void setup_tlb_core_data(void)
for_each_possible_cpu(cpu) {
int first = cpu_first_thread_sibling(cpu);
- paca[cpu].tcd_ptr = &paca[first].tcd;
+ /*
+ * If we boot via kdump on a non-primary thread,
+ * make sure we point at the thread that actually
+ * set up this TLB.
+ */
+ if (cpu_first_thread_sibling(boot_cpuid) == first)
+ first = boot_cpuid;
+
+ paca_ptrs[cpu]->tcd_ptr = &paca_ptrs[first]->tcd;
/*
* If we have threads, we need either tlbsrx.
* or e6500 tablewalk mode, or else TLB handlers
* will be racy and could produce duplicate entries.
+ * Should we panic instead?
*/
- if (smt_enabled_at_boot >= 2 &&
- !mmu_has_feature(MMU_FTR_USE_TLBRSRV) &&
- book3e_htw_mode != PPC_HTW_E6500) {
- /* Should we panic instead? */
- WARN_ONCE("%s: unsupported MMU configuration -- expect problems\n",
- __func__);
- }
+ WARN_ONCE(smt_enabled_at_boot >= 2 &&
+ book3e_htw_mode != PPC_HTW_E6500,
+ "%s: unsupported MMU configuration\n", __func__);
}
}
-#else
-static void setup_tlb_core_data(void)
-{
-}
#endif
#ifdef CONFIG_SMP
@@ -134,7 +125,7 @@ static void setup_tlb_core_data(void)
static char *smt_enabled_cmdline;
/* Look for ibm,smt-enabled OF option */
-static void check_smt_enabled(void)
+void __init check_smt_enabled(void)
{
struct device_node *dn;
const char *smt_option;
@@ -150,10 +141,7 @@ static void check_smt_enabled(void)
smt_enabled_at_boot = 0;
else {
int smt;
- int rc;
-
- rc = kstrtoint(smt_enabled_cmdline, 10, &smt);
- if (!rc)
+ if (!kstrtoint(smt_enabled_cmdline, 10, &smt))
smt_enabled_at_boot =
min(threads_per_core, smt);
}
@@ -183,34 +171,148 @@ static int __init early_smt_enabled(char *p)
}
early_param("smt-enabled", early_smt_enabled);
-#else
-#define check_smt_enabled()
#endif /* CONFIG_SMP */
/** Fix up paca fields required for the boot cpu */
-static void fixup_boot_paca(void)
+static void __init fixup_boot_paca(struct paca_struct *boot_paca)
{
/* The boot cpu is started */
- get_paca()->cpu_start = 1;
+ boot_paca->cpu_start = 1;
+#ifdef CONFIG_PPC_BOOK3S_64
+ /*
+ * Give the early boot machine check stack somewhere to use, use
+ * half of the init stack. This is a bit hacky but there should not be
+ * deep stack usage in early init so shouldn't overflow it or overwrite
+ * things.
+ */
+ boot_paca->mc_emergency_sp = (void *)&init_thread_union +
+ (THREAD_SIZE/2);
+#endif
/* Allow percpu accesses to work until we setup percpu data */
- get_paca()->data_offset = 0;
+ boot_paca->data_offset = 0;
+ /* Mark interrupts soft and hard disabled in PACA */
+ boot_paca->irq_soft_mask = IRQS_DISABLED;
+ boot_paca->irq_happened = PACA_IRQ_HARD_DIS;
+ WARN_ON(mfmsr() & MSR_EE);
}
-static void cpu_ready_for_interrupts(void)
+static void __init configure_exceptions(void)
{
- /* Set IR and DR in PACA MSR */
- get_paca()->kernel_msr = MSR_KERNEL;
+ /*
+ * Setup the trampolines from the lowmem exception vectors
+ * to the kdump kernel when not using a relocatable kernel.
+ */
+ setup_kdump_trampoline();
+
+ /* Under a PAPR hypervisor, we need hypercalls */
+ if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
+ /*
+ * - PR KVM does not support AIL mode interrupts in the host
+ * while a PR guest is running.
+ *
+ * - SCV system call interrupt vectors are only implemented for
+ * AIL mode interrupts.
+ *
+ * - On pseries, AIL mode can only be enabled and disabled
+ * system-wide so when a PR VM is created on a pseries host,
+ * all CPUs of the host are set to AIL=0 mode.
+ *
+ * - Therefore host CPUs must not execute scv while a PR VM
+ * exists.
+ *
+ * - SCV support can not be disabled dynamically because the
+ * feature is advertised to host userspace. Disabling the
+ * facility and emulating it would be possible but is not
+ * implemented.
+ *
+ * - So SCV support is blanket disabled if PR KVM could possibly
+ * run. That is, PR support compiled in, booting on pseries
+ * with hash MMU.
+ */
+ if (IS_ENABLED(CONFIG_KVM_BOOK3S_PR_POSSIBLE) && !radix_enabled()) {
+ init_task.thread.fscr &= ~FSCR_SCV;
+ cur_cpu_spec->cpu_user_features2 &= ~PPC_FEATURE2_SCV;
+ }
+
+ /* Enable AIL if possible */
+ if (!pseries_enable_reloc_on_exc()) {
+ init_task.thread.fscr &= ~FSCR_SCV;
+ cur_cpu_spec->cpu_user_features2 &= ~PPC_FEATURE2_SCV;
+ }
+
+ /*
+ * Tell the hypervisor that we want our exceptions to
+ * be taken in little endian mode.
+ *
+ * We don't call this for big endian as our calling convention
+ * makes us always enter in BE, and the call may fail under
+ * some circumstances with kdump.
+ */
+#ifdef __LITTLE_ENDIAN__
+ pseries_little_endian_exceptions();
+#endif
+ } else {
+ /* Set endian mode using OPAL */
+ if (firmware_has_feature(FW_FEATURE_OPAL))
+ opal_configure_cores();
+ /* AIL on native is done in cpu_ready_for_interrupts() */
+ }
+}
+
+static void cpu_ready_for_interrupts(void)
+{
/*
- * Enable AIL if supported, and we are in hypervisor mode. If we are
- * not in hypervisor mode, we enable relocation-on interrupts later
- * in pSeries_setup_arch() using the H_SET_MODE hcall.
+ * Enable AIL if supported, and we are in hypervisor mode. This
+ * is called once for every processor.
+ *
+ * If we are not in hypervisor mode the job is done once for
+ * the whole partition in configure_exceptions().
*/
- if (cpu_has_feature(CPU_FTR_HVMODE) &&
- cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ if (cpu_has_feature(CPU_FTR_HVMODE)) {
unsigned long lpcr = mfspr(SPRN_LPCR);
- mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3);
+ unsigned long new_lpcr = lpcr;
+
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ /* P10 DD1 does not have HAIL */
+ if (pvr_version_is(PVR_POWER10) &&
+ (mfspr(SPRN_PVR) & 0xf00) == 0x100)
+ new_lpcr |= LPCR_AIL_3;
+ else
+ new_lpcr |= LPCR_HAIL;
+ } else if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ new_lpcr |= LPCR_AIL_3;
+ }
+
+ if (new_lpcr != lpcr)
+ mtspr(SPRN_LPCR, new_lpcr);
+ }
+
+ /*
+ * Set HFSCR:TM based on CPU features:
+ * In the special case of TM no suspend (P9N DD2.1), Linux is
+ * told TM is off via the dt-ftrs but told to (partially) use
+ * it via OPAL_REINIT_CPUS_TM_SUSPEND_DISABLED. So HFSCR[TM]
+ * will be off from dt-ftrs but we need to turn it on for the
+ * no suspend case.
+ */
+ if (cpu_has_feature(CPU_FTR_HVMODE)) {
+ if (cpu_has_feature(CPU_FTR_TM_COMP))
+ mtspr(SPRN_HFSCR, mfspr(SPRN_HFSCR) | HFSCR_TM);
+ else
+ mtspr(SPRN_HFSCR, mfspr(SPRN_HFSCR) & ~HFSCR_TM);
}
+
+ /* Set IR and DR in PACA MSR */
+ get_paca()->kernel_msr = MSR_KERNEL;
+}
+
+unsigned long spr_default_dscr = 0;
+
+static void __init record_spr_defaults(void)
+{
+ if (early_cpu_has_feature(CPU_FTR_DSCR))
+ spr_default_dscr = mfspr(SPRN_DSCR);
}
/*
@@ -238,47 +340,92 @@ void __init early_setup(unsigned long dt_ptr)
/* -------- printk is _NOT_ safe to use here ! ------- */
- /* Identify CPU type */
- identify_cpu(0, mfspr(SPRN_PVR));
-
- /* Assume we're on cpu 0 for now. Don't write to the paca yet! */
+ /*
+ * Assume we're on cpu 0 for now.
+ *
+ * We need to load a PACA very early for a few reasons.
+ *
+ * The stack protector canary is stored in the paca, so as soon as we
+ * call any stack protected code we need r13 pointing somewhere valid.
+ *
+ * If we are using kcov it will call in_task() in its instrumentation,
+ * which relies on the current task from the PACA.
+ *
+ * dt_cpu_ftrs_init() calls into generic OF/fdt code, as well as
+ * printk(), which can trigger both stack protector and kcov.
+ *
+ * percpu variables and spin locks also use the paca.
+ *
+ * So set up a temporary paca. It will be replaced below once we know
+ * what CPU we are on.
+ */
initialise_paca(&boot_paca, 0);
- setup_paca(&boot_paca);
- fixup_boot_paca();
-
- /* Initialize lockdep early or else spinlocks will blow */
- lockdep_init();
+ fixup_boot_paca(&boot_paca);
+ WARN_ON(local_paca);
+ setup_paca(&boot_paca); /* install the paca into registers */
/* -------- printk is now safe to use ------- */
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && (mfmsr() & MSR_HV))
+ enable_machine_check();
+
+ /* Try new device tree based feature discovery ... */
+ if (!dt_cpu_ftrs_init(__va(dt_ptr)))
+ /* Otherwise use the old style CPU table */
+ identify_cpu(0, mfspr(SPRN_PVR));
+
/* Enable early debugging if any specified (see udbg.h) */
udbg_early_init();
- DBG(" -> early_setup(), dt_ptr: 0x%lx\n", dt_ptr);
+ udbg_printf(" -> %s(), dt_ptr: 0x%lx\n", __func__, dt_ptr);
/*
* Do early initialization using the flattened device
* tree, such as retrieving the physical memory map or
- * calculating/retrieving the hash table size.
+ * calculating/retrieving the hash table size, discover
+ * boot_cpuid and boot_cpu_hwid.
*/
early_init_devtree(__va(dt_ptr));
- epapr_paravirt_early_init();
+ allocate_paca_ptrs();
+ allocate_paca(boot_cpuid);
+ set_hard_smp_processor_id(boot_cpuid, boot_cpu_hwid);
+ fixup_boot_paca(paca_ptrs[boot_cpuid]);
+ setup_paca(paca_ptrs[boot_cpuid]); /* install the paca into registers */
+ // smp_processor_id() now reports boot_cpuid
- /* Now we know the logical id of our boot cpu, setup the paca. */
- setup_paca(&paca[boot_cpuid]);
- fixup_boot_paca();
+#ifdef CONFIG_SMP
+ task_thread_info(current)->cpu = boot_cpuid; // fix task_cpu(current)
+#endif
- /* Probe the machine type */
- probe_machine();
+ /*
+ * Configure exception handlers. This include setting up trampolines
+ * if needed, setting exception endian mode, etc...
+ */
+ configure_exceptions();
- setup_kdump_trampoline();
+ /*
+ * Configure Kernel Userspace Protection. This needs to happen before
+ * feature fixups for platforms that implement this using features.
+ */
+ setup_kup();
- DBG("Found, Initializing memory management...\n");
+ /* Apply all the dynamic patching */
+ apply_feature_fixups();
+ setup_feature_keys();
/* Initialize the hash table or TLB handling */
early_init_mmu();
+ early_ioremap_setup();
+
+ /*
+ * After firmware and early platform setup code has set things up,
+ * we note the SPR values for configurable control/performance
+ * registers, and use those as initial defaults.
+ */
+ record_spr_defaults();
+
/*
* At this point, we can let interrupts switch to virtual mode
* (the MMU has been setup), so adjust the MSR in the PACA to
@@ -286,21 +433,18 @@ void __init early_setup(unsigned long dt_ptr)
*/
cpu_ready_for_interrupts();
- /* Reserve large chunks of memory for use by CMA for KVM */
- kvm_cma_reserve();
-
/*
- * Reserve any gigantic pages requested on the command line.
- * memblock needs to have been initialized by the time this is
- * called since this will reserve memory.
+ * We enable ftrace here, but since we only support DYNAMIC_FTRACE, it
+ * will only actually get enabled on the boot cpu much later once
+ * ftrace itself has been initialized.
*/
- reserve_hugetlb_gpages();
+ this_cpu_enable_ftrace();
- DBG(" <- early_setup()\n");
+ udbg_printf(" <- %s()\n", __func__);
#ifdef CONFIG_PPC_EARLY_DEBUG_BOOTX
/*
- * This needs to be done *last* (after the above DBG() even)
+ * This needs to be done *last* (after the above udbg_printf() even)
*
* Right after we return from this function, we turn on the MMU
* which means the real-mode access trick that btext does will
@@ -314,12 +458,15 @@ void __init early_setup(unsigned long dt_ptr)
#ifdef CONFIG_SMP
void early_setup_secondary(void)
{
- /* Mark interrupts enabled in PACA */
- get_paca()->soft_enabled = 0;
+ /* Mark interrupts disabled in PACA */
+ irq_soft_mask_set(IRQS_DISABLED);
/* Initialize the hash table or TLB handling */
early_init_mmu_secondary();
+ /* Perform any KUP setup that is per-cpu */
+ setup_kup();
+
/*
* At this point, we can let interrupts switch to virtual mode
* (the MMU has been setup), so adjust the MSR in the PACA to
@@ -330,13 +477,42 @@ void early_setup_secondary(void)
#endif /* CONFIG_SMP */
-#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC)
+void __noreturn panic_smp_self_stop(void)
+{
+ hard_irq_disable();
+ spin_begin();
+ while (1)
+ spin_cpu_relax();
+}
+
+#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE)
+static bool use_spinloop(void)
+{
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S)) {
+ /*
+ * See comments in head_64.S -- not all platforms insert
+ * secondaries at __secondary_hold and wait at the spin
+ * loop.
+ */
+ if (firmware_has_feature(FW_FEATURE_OPAL))
+ return false;
+ return true;
+ }
+
+ /*
+ * When book3e boots from kexec, the ePAPR spin table does
+ * not get used.
+ */
+ return of_property_read_bool(of_chosen, "linux,booted-from-kexec");
+}
+
void smp_release_cpus(void)
{
unsigned long *ptr;
int i;
- DBG(" -> smp_release_cpus()\n");
+ if (!use_spinloop())
+ return;
/* All secondary cpus are spinning on a common spinloop, release them
* all now so they can start to spin on their individual paca
@@ -356,11 +532,9 @@ void smp_release_cpus(void)
break;
udelay(1);
}
- DBG("spinning_secondaries = %d\n", spinning_secondaries);
-
- DBG(" <- smp_release_cpus()\n");
+ pr_debug("spinning_secondaries = %d\n", spinning_secondaries);
}
-#endif /* CONFIG_SMP || CONFIG_KEXEC */
+#endif /* CONFIG_SMP || CONFIG_KEXEC_CORE */
/*
* Initialize some remaining members of the ppc64_caches and systemcfg
@@ -369,248 +543,224 @@ void smp_release_cpus(void)
* cache informations about the CPU that will be used by cache flush
* routines and/or provided to userland
*/
-static void __init initialize_cache_info(void)
-{
- struct device_node *np;
- unsigned long num_cpus = 0;
-
- DBG(" -> initialize_cache_info()\n");
- for_each_node_by_type(np, "cpu") {
- num_cpus += 1;
-
- /*
- * We're assuming *all* of the CPUs have the same
- * d-cache and i-cache sizes... -Peter
- */
- if (num_cpus == 1) {
- const __be32 *sizep, *lsizep;
- u32 size, lsize;
-
- size = 0;
- lsize = cur_cpu_spec->dcache_bsize;
- sizep = of_get_property(np, "d-cache-size", NULL);
- if (sizep != NULL)
- size = be32_to_cpu(*sizep);
- lsizep = of_get_property(np, "d-cache-block-size",
- NULL);
- /* fallback if block size missing */
- if (lsizep == NULL)
- lsizep = of_get_property(np,
- "d-cache-line-size",
- NULL);
- if (lsizep != NULL)
- lsize = be32_to_cpu(*lsizep);
- if (sizep == NULL || lsizep == NULL)
- DBG("Argh, can't find dcache properties ! "
- "sizep: %p, lsizep: %p\n", sizep, lsizep);
-
- ppc64_caches.dsize = size;
- ppc64_caches.dline_size = lsize;
- ppc64_caches.log_dline_size = __ilog2(lsize);
- ppc64_caches.dlines_per_page = PAGE_SIZE / lsize;
-
- size = 0;
- lsize = cur_cpu_spec->icache_bsize;
- sizep = of_get_property(np, "i-cache-size", NULL);
- if (sizep != NULL)
- size = be32_to_cpu(*sizep);
- lsizep = of_get_property(np, "i-cache-block-size",
- NULL);
- if (lsizep == NULL)
- lsizep = of_get_property(np,
- "i-cache-line-size",
- NULL);
- if (lsizep != NULL)
- lsize = be32_to_cpu(*lsizep);
- if (sizep == NULL || lsizep == NULL)
- DBG("Argh, can't find icache properties ! "
- "sizep: %p, lsizep: %p\n", sizep, lsizep);
-
- ppc64_caches.isize = size;
- ppc64_caches.iline_size = lsize;
- ppc64_caches.log_iline_size = __ilog2(lsize);
- ppc64_caches.ilines_per_page = PAGE_SIZE / lsize;
- }
- }
+static void __init init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize,
+ u32 bsize, u32 sets)
+{
+ info->size = size;
+ info->sets = sets;
+ info->line_size = lsize;
+ info->block_size = bsize;
+ info->log_block_size = __ilog2(bsize);
+ if (bsize)
+ info->blocks_per_page = PAGE_SIZE / bsize;
+ else
+ info->blocks_per_page = 0;
- DBG(" <- initialize_cache_info()\n");
+ if (sets == 0)
+ info->assoc = 0xffff;
+ else
+ info->assoc = size / (sets * lsize);
}
-
-/*
- * Do some initial setup of the system. The parameters are those which
- * were passed in from the bootloader.
- */
-void __init setup_system(void)
+static bool __init parse_cache_info(struct device_node *np,
+ bool icache,
+ struct ppc_cache_info *info)
{
- DBG(" -> setup_system()\n");
-
- /* Apply the CPUs-specific and firmware specific fixups to kernel
- * text (nop out sections not relevant to this CPU or this firmware)
- */
- do_feature_fixups(cur_cpu_spec->cpu_features,
- &__start___ftr_fixup, &__stop___ftr_fixup);
- do_feature_fixups(cur_cpu_spec->mmu_features,
- &__start___mmu_ftr_fixup, &__stop___mmu_ftr_fixup);
- do_feature_fixups(powerpc_firmware_features,
- &__start___fw_ftr_fixup, &__stop___fw_ftr_fixup);
- do_lwsync_fixups(cur_cpu_spec->cpu_features,
- &__start___lwsync_fixup, &__stop___lwsync_fixup);
- do_final_fixups();
+ static const char *ipropnames[] __initdata = {
+ "i-cache-size",
+ "i-cache-sets",
+ "i-cache-block-size",
+ "i-cache-line-size",
+ };
+ static const char *dpropnames[] __initdata = {
+ "d-cache-size",
+ "d-cache-sets",
+ "d-cache-block-size",
+ "d-cache-line-size",
+ };
+ const char **propnames = icache ? ipropnames : dpropnames;
+ const __be32 *sizep, *lsizep, *bsizep, *setsp;
+ u32 size, lsize, bsize, sets;
+ bool success = true;
+
+ size = 0;
+ sets = -1u;
+ lsize = bsize = cur_cpu_spec->dcache_bsize;
+ sizep = of_get_property(np, propnames[0], NULL);
+ if (sizep != NULL)
+ size = be32_to_cpu(*sizep);
+ setsp = of_get_property(np, propnames[1], NULL);
+ if (setsp != NULL)
+ sets = be32_to_cpu(*setsp);
+ bsizep = of_get_property(np, propnames[2], NULL);
+ lsizep = of_get_property(np, propnames[3], NULL);
+ if (bsizep == NULL)
+ bsizep = lsizep;
+ if (lsizep == NULL)
+ lsizep = bsizep;
+ if (lsizep != NULL)
+ lsize = be32_to_cpu(*lsizep);
+ if (bsizep != NULL)
+ bsize = be32_to_cpu(*bsizep);
+ if (sizep == NULL || bsizep == NULL || lsizep == NULL)
+ success = false;
/*
- * Unflatten the device-tree passed by prom_init or kexec
+ * OF is weird .. it represents fully associative caches
+ * as "1 way" which doesn't make much sense and doesn't
+ * leave room for direct mapped. We'll assume that 0
+ * in OF means direct mapped for that reason.
*/
- unflatten_device_tree();
+ if (sets == 1)
+ sets = 0;
+ else if (sets == 0)
+ sets = 1;
- /*
- * Fill the ppc64_caches & systemcfg structures with informations
- * retrieved from the device-tree.
- */
- initialize_cache_info();
+ init_cache_info(info, size, lsize, bsize, sets);
-#ifdef CONFIG_PPC_RTAS
- /*
- * Initialize RTAS if available
- */
- rtas_initialize();
-#endif /* CONFIG_PPC_RTAS */
+ return success;
+}
- /*
- * Check if we have an initrd provided via the device-tree
- */
- check_for_initrd();
+void __init initialize_cache_info(void)
+{
+ struct device_node *cpu = NULL, *l2, *l3 = NULL;
+ u32 pvr;
/*
- * Do some platform specific early initializations, that includes
- * setting up the hash table pointers. It also sets up some interrupt-mapping
- * related options that will be used by finish_device_tree()
- */
- if (ppc_md.init_early)
- ppc_md.init_early();
-
- /*
- * We can discover serial ports now since the above did setup the
- * hash table management for us, thus ioremap works. We do that early
- * so that further code can be debugged
+ * All shipping POWER8 machines have a firmware bug that
+ * puts incorrect information in the device-tree. This will
+ * be (hopefully) fixed for future chips but for now hard
+ * code the values if we are running on one of these
*/
- find_legacy_serial_ports();
+ pvr = PVR_VER(mfspr(SPRN_PVR));
+ if (pvr == PVR_POWER8 || pvr == PVR_POWER8E ||
+ pvr == PVR_POWER8NVL) {
+ /* size lsize blk sets */
+ init_cache_info(&ppc64_caches.l1i, 0x8000, 128, 128, 32);
+ init_cache_info(&ppc64_caches.l1d, 0x10000, 128, 128, 64);
+ init_cache_info(&ppc64_caches.l2, 0x80000, 128, 0, 512);
+ init_cache_info(&ppc64_caches.l3, 0x800000, 128, 0, 8192);
+ } else
+ cpu = of_find_node_by_type(NULL, "cpu");
/*
- * Register early console
+ * We're assuming *all* of the CPUs have the same
+ * d-cache and i-cache sizes... -Peter
*/
- register_early_udbg_console();
+ if (cpu) {
+ if (!parse_cache_info(cpu, false, &ppc64_caches.l1d))
+ pr_warn("Argh, can't find dcache properties !\n");
- /*
- * Initialize xmon
- */
- xmon_setup();
+ if (!parse_cache_info(cpu, true, &ppc64_caches.l1i))
+ pr_warn("Argh, can't find icache properties !\n");
- smp_setup_cpu_maps();
- check_smt_enabled();
- setup_tlb_core_data();
+ /*
+ * Try to find the L2 and L3 if any. Assume they are
+ * unified and use the D-side properties.
+ */
+ l2 = of_find_next_cache_node(cpu);
+ of_node_put(cpu);
+ if (l2) {
+ parse_cache_info(l2, false, &ppc64_caches.l2);
+ l3 = of_find_next_cache_node(l2);
+ of_node_put(l2);
+ }
+ if (l3) {
+ parse_cache_info(l3, false, &ppc64_caches.l3);
+ of_node_put(l3);
+ }
+ }
- /*
- * Freescale Book3e parts spin in a loop provided by firmware,
- * so smp_release_cpus() does nothing for them
- */
-#if defined(CONFIG_SMP) && !defined(CONFIG_PPC_FSL_BOOK3E)
- /* Release secondary cpus out of their spinloops at 0x60 now that
- * we can map physical -> logical CPU ids
- */
- smp_release_cpus();
-#endif
+ /* For use by binfmt_elf */
+ dcache_bsize = ppc64_caches.l1d.block_size;
+ icache_bsize = ppc64_caches.l1i.block_size;
- printk("Starting Linux PPC64 %s\n", init_utsname()->version);
-
- printk("-----------------------------------------------------\n");
- printk("ppc64_pft_size = 0x%llx\n", ppc64_pft_size);
- printk("physicalMemorySize = 0x%llx\n", memblock_phys_mem_size());
- if (ppc64_caches.dline_size != 0x80)
- printk("ppc64_caches.dcache_line_size = 0x%x\n",
- ppc64_caches.dline_size);
- if (ppc64_caches.iline_size != 0x80)
- printk("ppc64_caches.icache_line_size = 0x%x\n",
- ppc64_caches.iline_size);
-#ifdef CONFIG_PPC_STD_MMU_64
- if (htab_address)
- printk("htab_address = 0x%p\n", htab_address);
- printk("htab_hash_mask = 0x%lx\n", htab_hash_mask);
-#endif /* CONFIG_PPC_STD_MMU_64 */
- if (PHYSICAL_START > 0)
- printk("physical_start = 0x%llx\n",
- (unsigned long long)PHYSICAL_START);
- printk("-----------------------------------------------------\n");
-
- DBG(" <- setup_system()\n");
+ cur_cpu_spec->dcache_bsize = dcache_bsize;
+ cur_cpu_spec->icache_bsize = icache_bsize;
}
-/* This returns the limit below which memory accesses to the linear
- * mapping are guarnateed not to cause a TLB or SLB miss. This is
- * used to allocate interrupt or emergency stacks for which our
- * exception entry path doesn't deal with being interrupted.
+/*
+ * This returns the limit below which memory accesses to the linear
+ * mapping are guarnateed not to cause an architectural exception (e.g.,
+ * TLB or SLB miss fault).
+ *
+ * This is used to allocate PACAs and various interrupt stacks that
+ * that are accessed early in interrupt handlers that must not cause
+ * re-entrant interrupts.
*/
-static u64 safe_stack_limit(void)
+__init u64 ppc64_bolted_size(void)
{
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
/* Freescale BookE bolts the entire linear mapping */
- if (mmu_has_feature(MMU_FTR_TYPE_FSL_E))
- return linear_map_top;
- /* Other BookE, we assume the first GB is bolted */
- return 1ul << 30;
+ return linear_map_top;
#else
- /* BookS, the first segment is bolted */
- if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
+ /* BookS radix, does not take faults on linear mapping */
+ if (early_radix_enabled())
+ return ULONG_MAX;
+
+ /* BookS hash, the first segment is bolted */
+ if (early_mmu_has_feature(MMU_FTR_1T_SEGMENT))
return 1UL << SID_SHIFT_1T;
return 1UL << SID_SHIFT;
#endif
}
-static void __init irqstack_early_init(void)
+static void *__init alloc_stack(unsigned long limit, int cpu)
{
- u64 limit = safe_stack_limit();
+ void *ptr;
+
+ BUILD_BUG_ON(STACK_INT_FRAME_SIZE % 16);
+
+ ptr = memblock_alloc_try_nid(THREAD_SIZE, THREAD_ALIGN,
+ MEMBLOCK_LOW_LIMIT, limit,
+ early_cpu_to_node(cpu));
+ if (!ptr)
+ panic("cannot allocate stacks");
+
+ return ptr;
+}
+
+void __init irqstack_early_init(void)
+{
+ u64 limit = ppc64_bolted_size();
unsigned int i;
/*
* Interrupt stacks must be in the first segment since we
- * cannot afford to take SLB misses on them.
+ * cannot afford to take SLB misses on them. They are not
+ * accessed in realmode.
*/
for_each_possible_cpu(i) {
- softirq_ctx[i] = (struct thread_info *)
- __va(memblock_alloc_base(THREAD_SIZE,
- THREAD_SIZE, limit));
- hardirq_ctx[i] = (struct thread_info *)
- __va(memblock_alloc_base(THREAD_SIZE,
- THREAD_SIZE, limit));
+ softirq_ctx[i] = alloc_stack(limit, i);
+ hardirq_ctx[i] = alloc_stack(limit, i);
}
}
-#ifdef CONFIG_PPC_BOOK3E
-static void __init exc_lvl_early_init(void)
+#ifdef CONFIG_PPC_BOOK3E_64
+void __init exc_lvl_early_init(void)
{
unsigned int i;
- unsigned long sp;
for_each_possible_cpu(i) {
- sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
- critirq_ctx[i] = (struct thread_info *)__va(sp);
- paca[i].crit_kstack = __va(sp + THREAD_SIZE);
+ void *sp;
+
+ sp = alloc_stack(ULONG_MAX, i);
+ critirq_ctx[i] = sp;
+ paca_ptrs[i]->crit_kstack = sp + THREAD_SIZE;
- sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
- dbgirq_ctx[i] = (struct thread_info *)__va(sp);
- paca[i].dbg_kstack = __va(sp + THREAD_SIZE);
+ sp = alloc_stack(ULONG_MAX, i);
+ dbgirq_ctx[i] = sp;
+ paca_ptrs[i]->dbg_kstack = sp + THREAD_SIZE;
- sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
- mcheckirq_ctx[i] = (struct thread_info *)__va(sp);
- paca[i].mc_kstack = __va(sp + THREAD_SIZE);
+ sp = alloc_stack(ULONG_MAX, i);
+ mcheckirq_ctx[i] = sp;
+ paca_ptrs[i]->mc_kstack = sp + THREAD_SIZE;
}
if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC))
patch_exception(0x040, exc_debug_debug_book3e);
}
-#else
-#define exc_lvl_early_init()
#endif
/*
@@ -618,9 +768,9 @@ static void __init exc_lvl_early_init(void)
* early in SMP boots before relocation is enabled. Exclusive emergency
* stack for machine checks.
*/
-static void __init emergency_stack_init(void)
+void __init emergency_stack_init(void)
{
- u64 limit;
+ u64 limit, mce_limit;
unsigned int i;
/*
@@ -629,131 +779,55 @@ static void __init emergency_stack_init(void)
* aligned.
*
* Since we use these as temporary stacks during secondary CPU
- * bringup, we need to get at them in real mode. This means they
- * must also be within the RMO region.
+ * bringup, machine check, system reset, and HMI, we need to get
+ * at them in real mode. This means they must also be within the RMO
+ * region.
+ *
+ * The IRQ stacks allocated elsewhere in this file are zeroed and
+ * initialized in kernel/irq.c. These are initialized here in order
+ * to have emergency stacks available as early as possible.
*/
- limit = min(safe_stack_limit(), ppc64_rma_size);
-
- for_each_possible_cpu(i) {
- unsigned long sp;
- sp = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit);
- sp += THREAD_SIZE;
- paca[i].emergency_sp = __va(sp);
-
-#ifdef CONFIG_PPC_BOOK3S_64
- /* emergency stack for machine check exception handling. */
- sp = memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit);
- sp += THREAD_SIZE;
- paca[i].mc_emergency_sp = __va(sp);
-#endif
- }
-}
-
-/*
- * Called into from start_kernel this initializes bootmem, which is used
- * to manage page allocation until mem_init is called.
- */
-void __init setup_arch(char **cmdline_p)
-{
- ppc64_boot_msg(0x12, "Setup Arch");
-
- *cmdline_p = cmd_line;
+ limit = mce_limit = min(ppc64_bolted_size(), ppc64_rma_size);
/*
- * Set cache line size based on type of cpu as a default.
- * Systems with OF can look in the properties on the cpu node(s)
- * for a possibly more accurate value.
+ * Machine check on pseries calls rtas, but can't use the static
+ * rtas_args due to a machine check hitting while the lock is held.
+ * rtas args have to be under 4GB, so the machine check stack is
+ * limited to 4GB so args can be put on stack.
*/
- dcache_bsize = ppc64_caches.dline_size;
- icache_bsize = ppc64_caches.iline_size;
-
- if (ppc_md.panic)
- setup_panic();
-
- init_mm.start_code = (unsigned long)_stext;
- init_mm.end_code = (unsigned long) _etext;
- init_mm.end_data = (unsigned long) _edata;
- init_mm.brk = klimit;
-#ifdef CONFIG_PPC_64K_PAGES
- init_mm.context.pte_frag = NULL;
-#endif
- irqstack_early_init();
- exc_lvl_early_init();
- emergency_stack_init();
-
- /* set up the bootmem stuff with available memory */
- do_init_bootmem();
- sparse_init();
-
-#ifdef CONFIG_DUMMY_CONSOLE
- conswitchp = &dummy_con;
-#endif
-
- if (ppc_md.setup_arch)
- ppc_md.setup_arch();
-
- paging_init();
+ if (firmware_has_feature(FW_FEATURE_LPAR) && mce_limit > SZ_4G)
+ mce_limit = SZ_4G;
- /* Initialize the MMU context management stuff */
- mmu_context_init();
-
- /* Interrupt code needs to be 64K-aligned */
- if ((unsigned long)_stext & 0xffff)
- panic("Kernelbase not 64K-aligned (0x%lx)!\n",
- (unsigned long)_stext);
-
- ppc64_boot_msg(0x15, "Setup Done");
-}
-
-
-/* ToDo: do something useful if ppc_md is not yet setup. */
-#define PPC64_LINUX_FUNCTION 0x0f000000
-#define PPC64_IPL_MESSAGE 0xc0000000
-#define PPC64_TERM_MESSAGE 0xb0000000
+ for_each_possible_cpu(i) {
+ paca_ptrs[i]->emergency_sp = alloc_stack(limit, i) + THREAD_SIZE;
-static void ppc64_do_msg(unsigned int src, const char *msg)
-{
- if (ppc_md.progress) {
- char buf[128];
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* emergency stack for NMI exception handling. */
+ paca_ptrs[i]->nmi_emergency_sp = alloc_stack(limit, i) + THREAD_SIZE;
- sprintf(buf, "%08X\n", src);
- ppc_md.progress(buf, 0);
- snprintf(buf, 128, "%s", msg);
- ppc_md.progress(buf, 0);
+ /* emergency stack for machine check exception handling. */
+ paca_ptrs[i]->mc_emergency_sp = alloc_stack(mce_limit, i) + THREAD_SIZE;
+#endif
}
}
-/* Print a boot progress message. */
-void ppc64_boot_msg(unsigned int src, const char *msg)
-{
- ppc64_do_msg(PPC64_LINUX_FUNCTION|PPC64_IPL_MESSAGE|src, msg);
- printk("[boot]%04x %s\n", src, msg);
-}
-
#ifdef CONFIG_SMP
-#define PCPU_DYN_SIZE ()
-
-static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
-{
- return __alloc_bootmem_node(NODE_DATA(cpu_to_node(cpu)), size, align,
- __pa(MAX_DMA_ADDRESS));
-}
-
-static void __init pcpu_fc_free(void *ptr, size_t size)
-{
- free_bootmem(__pa(ptr), size);
-}
-
static int pcpu_cpu_distance(unsigned int from, unsigned int to)
{
- if (cpu_to_node(from) == cpu_to_node(to))
+ if (early_cpu_to_node(from) == early_cpu_to_node(to))
return LOCAL_DISTANCE;
else
return REMOTE_DISTANCE;
}
+static __init int pcpu_cpu_to_node(int cpu)
+{
+ return early_cpu_to_node(cpu);
+}
+
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
EXPORT_SYMBOL(__per_cpu_offset);
+DEFINE_STATIC_KEY_FALSE(__percpu_first_chunk_is_paged);
void __init setup_per_cpu_areas(void)
{
@@ -761,32 +835,51 @@ void __init setup_per_cpu_areas(void)
size_t atom_size;
unsigned long delta;
unsigned int cpu;
- int rc;
+ int rc = -EINVAL;
/*
- * Linear mapping is one of 4K, 1M and 16M. For 4K, no need
- * to group units. For larger mappings, use 1M atom which
- * should be large enough to contain a number of units.
+ * BookE and BookS radix are historical values and should be revisited.
*/
- if (mmu_linear_psize == MMU_PAGE_4K)
+ if (IS_ENABLED(CONFIG_PPC_BOOK3E_64)) {
+ atom_size = SZ_1M;
+ } else if (radix_enabled()) {
atom_size = PAGE_SIZE;
- else
- atom_size = 1 << 20;
+ } else if (IS_ENABLED(CONFIG_PPC_64S_HASH_MMU)) {
+ /*
+ * Linear mapping is one of 4K, 1M and 16M. For 4K, no need
+ * to group units. For larger mappings, use 1M atom which
+ * should be large enough to contain a number of units.
+ */
+ if (mmu_linear_psize == MMU_PAGE_4K)
+ atom_size = PAGE_SIZE;
+ else
+ atom_size = SZ_1M;
+ }
+
+ if (pcpu_chosen_fc != PCPU_FC_PAGE) {
+ rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
+ pcpu_cpu_to_node);
+ if (rc)
+ pr_warn("PERCPU: %s allocator failed (%d), "
+ "falling back to page size\n",
+ pcpu_fc_names[pcpu_chosen_fc], rc);
+ }
- rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
- pcpu_fc_alloc, pcpu_fc_free);
+ if (rc < 0)
+ rc = pcpu_page_first_chunk(0, pcpu_cpu_to_node);
if (rc < 0)
panic("cannot initialize percpu area (err=%d)", rc);
+ static_key_enable(&__percpu_first_chunk_is_paged.key);
delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
for_each_possible_cpu(cpu) {
__per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
- paca[cpu].data_offset = __per_cpu_offset[cpu];
+ paca_ptrs[cpu]->data_offset = __per_cpu_offset[cpu];
}
}
#endif
-#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
+#ifdef CONFIG_MEMORY_HOTPLUG
unsigned long memory_block_size_bytes(void)
{
if (ppc_md.memory_block_size)
@@ -796,7 +889,40 @@ unsigned long memory_block_size_bytes(void)
}
#endif
-#if defined(CONFIG_PPC_INDIRECT_PIO) || defined(CONFIG_PPC_INDIRECT_MMIO)
+#ifdef CONFIG_PPC_INDIRECT_PIO
struct ppc_pci_io ppc_pci_io;
EXPORT_SYMBOL(ppc_pci_io);
#endif
+
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
+u64 hw_nmi_get_sample_period(int watchdog_thresh)
+{
+ return ppc_proc_freq * watchdog_thresh;
+}
+#endif
+
+/*
+ * The perf based hardlockup detector breaks PMU event based branches, so
+ * disable it by default. Book3S has a soft-nmi hardlockup detector based
+ * on the decrementer interrupt, so it does not suffer from this problem.
+ *
+ * It is likely to get false positives in KVM guests, so disable it there
+ * by default too. PowerVM will not stop or arbitrarily oversubscribe
+ * CPUs, but give a minimum regular allotment even with SPLPAR, so enable
+ * the detector for non-KVM guests, assume PowerVM.
+ */
+static int __init disable_hardlockup_detector(void)
+{
+#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
+ hardlockup_detector_disable();
+#else
+ if (firmware_has_feature(FW_FEATURE_LPAR)) {
+ check_kvm_guest();
+ if (is_kvm_guest())
+ hardlockup_detector_disable();
+ }
+#endif
+
+ return 0;
+}
+early_initcall(disable_hardlockup_detector);
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index cf8c7e4e0b21..aa17e62f3754 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -1,50 +1,177 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Common signal handling code for both 32 and 64 bits
*
- * Copyright (c) 2007 Benjamin Herrenschmidt, IBM Coproration
+ * Copyright (c) 2007 Benjamin Herrenschmidt, IBM Corporation
* Extracted from signal_32.c and signal_64.c
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License. See the file README.legal in the main directory of
- * this archive for more details.
*/
-#include <linux/tracehook.h>
+#include <linux/resume_user_mode.h>
#include <linux/signal.h>
#include <linux/uprobes.h>
#include <linux/key.h>
#include <linux/context_tracking.h>
+#include <linux/livepatch.h>
+#include <linux/syscalls.h>
#include <asm/hw_breakpoint.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
+#include <asm/switch_to.h>
#include <asm/unistd.h>
#include <asm/debug.h>
#include <asm/tm.h>
#include "signal.h"
+#ifdef CONFIG_VSX
+unsigned long copy_fpr_to_user(void __user *to,
+ struct task_struct *task)
+{
+ u64 buf[ELF_NFPREG];
+ int i;
+
+ /* save FPR copy to local buffer then write to the thread_struct */
+ for (i = 0; i < (ELF_NFPREG - 1) ; i++)
+ buf[i] = task->thread.TS_FPR(i);
+ buf[i] = task->thread.fp_state.fpscr;
+ return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double));
+}
+
+unsigned long copy_fpr_from_user(struct task_struct *task,
+ void __user *from)
+{
+ u64 buf[ELF_NFPREG];
+ int i;
+
+ if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double)))
+ return 1;
+ for (i = 0; i < (ELF_NFPREG - 1) ; i++)
+ task->thread.TS_FPR(i) = buf[i];
+ task->thread.fp_state.fpscr = buf[i];
+
+ return 0;
+}
+
+unsigned long copy_vsx_to_user(void __user *to,
+ struct task_struct *task)
+{
+ u64 buf[ELF_NVSRHALFREG];
+ int i;
+
+ /* save FPR copy to local buffer then write to the thread_struct */
+ for (i = 0; i < ELF_NVSRHALFREG; i++)
+ buf[i] = task->thread.fp_state.fpr[i][TS_VSRLOWOFFSET];
+ return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double));
+}
+
+unsigned long copy_vsx_from_user(struct task_struct *task,
+ void __user *from)
+{
+ u64 buf[ELF_NVSRHALFREG];
+ int i;
+
+ if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double)))
+ return 1;
+ for (i = 0; i < ELF_NVSRHALFREG ; i++)
+ task->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
+ return 0;
+}
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+unsigned long copy_ckfpr_to_user(void __user *to,
+ struct task_struct *task)
+{
+ u64 buf[ELF_NFPREG];
+ int i;
+
+ /* save FPR copy to local buffer then write to the thread_struct */
+ for (i = 0; i < (ELF_NFPREG - 1) ; i++)
+ buf[i] = task->thread.TS_CKFPR(i);
+ buf[i] = task->thread.ckfp_state.fpscr;
+ return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double));
+}
+
+unsigned long copy_ckfpr_from_user(struct task_struct *task,
+ void __user *from)
+{
+ u64 buf[ELF_NFPREG];
+ int i;
+
+ if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double)))
+ return 1;
+ for (i = 0; i < (ELF_NFPREG - 1) ; i++)
+ task->thread.TS_CKFPR(i) = buf[i];
+ task->thread.ckfp_state.fpscr = buf[i];
+
+ return 0;
+}
+
+unsigned long copy_ckvsx_to_user(void __user *to,
+ struct task_struct *task)
+{
+ u64 buf[ELF_NVSRHALFREG];
+ int i;
+
+ /* save FPR copy to local buffer then write to the thread_struct */
+ for (i = 0; i < ELF_NVSRHALFREG; i++)
+ buf[i] = task->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET];
+ return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double));
+}
+
+unsigned long copy_ckvsx_from_user(struct task_struct *task,
+ void __user *from)
+{
+ u64 buf[ELF_NVSRHALFREG];
+ int i;
+
+ if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double)))
+ return 1;
+ for (i = 0; i < ELF_NVSRHALFREG ; i++)
+ task->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
+ return 0;
+}
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+#endif
+
/* Log an error when sending an unhandled signal to a process. Controlled
* through debug.exception-trace sysctl.
*/
int show_unhandled_signals = 1;
+unsigned long get_min_sigframe_size(void)
+{
+ if (IS_ENABLED(CONFIG_PPC64))
+ return get_min_sigframe_size_64();
+ else
+ return get_min_sigframe_size_32();
+}
+
+#ifdef CONFIG_COMPAT
+unsigned long get_min_sigframe_size_compat(void)
+{
+ return get_min_sigframe_size_32();
+}
+#endif
+
/*
* Allocate space for the signal frame
*/
-void __user *get_sigframe(struct ksignal *ksig, unsigned long sp,
- size_t frame_size, int is_32)
+static unsigned long get_tm_stackpointer(struct task_struct *tsk);
+
+void __user *get_sigframe(struct ksignal *ksig, struct task_struct *tsk,
+ size_t frame_size, int is_32)
{
unsigned long oldsp, newsp;
+ unsigned long sp = get_tm_stackpointer(tsk);
/* Default to using normal stack */
- oldsp = get_clean_sp(sp, is_32);
+ if (is_32)
+ oldsp = sp & 0x0ffffffffUL;
+ else
+ oldsp = sp;
oldsp = sigsp(oldsp, ksig);
newsp = (oldsp - frame_size) & ~0xFUL;
- /* Check access */
- if (!access_ok(VERIFY_WRITE, (void __user *)newsp, oldsp - newsp))
- return NULL;
-
return (void __user *)newsp;
}
@@ -55,12 +182,21 @@ static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka,
int restart = 1;
/* syscall ? */
- if (TRAP(regs) != 0x0C00)
+ if (!trap_is_syscall(regs))
+ return;
+
+ if (trap_norestart(regs))
return;
/* error signalled ? */
- if (!(regs->ccr & 0x10000000))
+ if (trap_is_scv(regs)) {
+ /* 32-bit compat mode sign extend? */
+ if (!IS_ERR_VALUE(ret))
+ return;
+ ret = -ret;
+ } else if (!(regs->ccr & 0x10000000)) {
return;
+ }
switch (ret) {
case ERESTART_RESTARTBLOCK:
@@ -90,79 +226,90 @@ static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka,
regs->gpr[0] = __NR_restart_syscall;
else
regs->gpr[3] = regs->orig_gpr3;
- regs->nip -= 4;
+ regs_add_return_ip(regs, -4);
regs->result = 0;
} else {
- regs->result = -EINTR;
- regs->gpr[3] = EINTR;
- regs->ccr |= 0x10000000;
+ if (trap_is_scv(regs)) {
+ regs->result = -EINTR;
+ regs->gpr[3] = -EINTR;
+ } else {
+ regs->result = -EINTR;
+ regs->gpr[3] = EINTR;
+ regs->ccr |= 0x10000000;
+ }
}
}
-static void do_signal(struct pt_regs *regs)
+static void do_signal(struct task_struct *tsk)
{
sigset_t *oldset = sigmask_to_save();
- struct ksignal ksig;
+ struct ksignal ksig = { .sig = 0 };
int ret;
- int is32 = is_32bit_task();
+
+ BUG_ON(tsk != current);
get_signal(&ksig);
/* Is there any syscall restart business here ? */
- check_syscall_restart(regs, &ksig.ka, ksig.sig > 0);
+ check_syscall_restart(tsk->thread.regs, &ksig.ka, ksig.sig > 0);
if (ksig.sig <= 0) {
/* No signal to deliver -- put the saved sigmask back */
restore_saved_sigmask();
- regs->trap = 0;
+ set_trap_norestart(tsk->thread.regs);
return; /* no signals delivered */
}
-#ifndef CONFIG_PPC_ADV_DEBUG_REGS
/*
* Reenable the DABR before delivering the signal to
* user space. The DABR will have been cleared if it
* triggered inside the kernel.
*/
- if (current->thread.hw_brk.address &&
- current->thread.hw_brk.type)
- __set_breakpoint(&current->thread.hw_brk);
-#endif
+ if (!IS_ENABLED(CONFIG_PPC_ADV_DEBUG_REGS)) {
+ int i;
+
+ for (i = 0; i < nr_wp_slots(); i++) {
+ if (tsk->thread.hw_brk[i].address && tsk->thread.hw_brk[i].type)
+ __set_breakpoint(i, &tsk->thread.hw_brk[i]);
+ }
+ }
+
/* Re-enable the breakpoints for the signal stack */
- thread_change_pc(current, regs);
+ thread_change_pc(tsk, tsk->thread.regs);
- if (is32) {
+ rseq_signal_deliver(&ksig, tsk->thread.regs);
+
+ if (is_32bit_task()) {
if (ksig.ka.sa.sa_flags & SA_SIGINFO)
- ret = handle_rt_signal32(&ksig, oldset, regs);
+ ret = handle_rt_signal32(&ksig, oldset, tsk);
else
- ret = handle_signal32(&ksig, oldset, regs);
+ ret = handle_signal32(&ksig, oldset, tsk);
} else {
- ret = handle_rt_signal64(&ksig, oldset, regs);
+ ret = handle_rt_signal64(&ksig, oldset, tsk);
}
- regs->trap = 0;
+ set_trap_norestart(tsk->thread.regs);
signal_setup_done(ret, &ksig, test_thread_flag(TIF_SINGLESTEP));
}
void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags)
{
- user_exit();
-
if (thread_info_flags & _TIF_UPROBE)
uprobe_notify_resume(regs);
- if (thread_info_flags & _TIF_SIGPENDING)
- do_signal(regs);
+ if (thread_info_flags & _TIF_PATCH_PENDING)
+ klp_update_patch_state(current);
- if (thread_info_flags & _TIF_NOTIFY_RESUME) {
- clear_thread_flag(TIF_NOTIFY_RESUME);
- tracehook_notify_resume(regs);
+ if (thread_info_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) {
+ BUG_ON(regs != current->thread.regs);
+ do_signal(current);
}
- user_enter();
+ if (thread_info_flags & _TIF_NOTIFY_RESUME)
+ resume_user_mode_work(regs);
}
-unsigned long get_tm_stackpointer(struct pt_regs *regs)
+static unsigned long get_tm_stackpointer(struct task_struct *tsk)
{
/* When in an active transaction that takes a signal, we need to be
* careful with the stack. It's possible that the stack has moved back
@@ -178,20 +325,46 @@ unsigned long get_tm_stackpointer(struct pt_regs *regs)
* need to use the stack pointer from the checkpointed state, rather
* than the speculated state. This ensures that the signal context
* (written tm suspended) will be written below the stack required for
- * the rollback. The transaction is aborted becuase of the treclaim,
+ * the rollback. The transaction is aborted because of the treclaim,
* so any memory written between the tbegin and the signal will be
* rolled back anyway.
*
* For signals taken in non-TM or suspended mode, we use the
* normal/non-checkpointed stack pointer.
*/
+ struct pt_regs *regs = tsk->thread.regs;
+ unsigned long ret = regs->gpr[1];
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ BUG_ON(tsk != current);
+
if (MSR_TM_ACTIVE(regs->msr)) {
+ preempt_disable();
tm_reclaim_current(TM_CAUSE_SIGNAL);
if (MSR_TM_TRANSACTIONAL(regs->msr))
- return current->thread.ckpt_regs.gpr[1];
+ ret = tsk->thread.ckpt_regs.gpr[1];
+
+ /*
+ * If we treclaim, we must clear the current thread's TM bits
+ * before re-enabling preemption. Otherwise we might be
+ * preempted and have the live MSR[TS] changed behind our back
+ * (tm_recheckpoint_new_task() would recheckpoint). Besides, we
+ * enter the signal handler in non-transactional state.
+ */
+ regs_set_return_msr(regs, regs->msr & ~MSR_TS_MASK);
+ preempt_enable();
}
#endif
- return regs->gpr[1];
+ return ret;
+}
+
+static const char fm32[] = KERN_INFO "%s[%d]: bad frame in %s: %p nip %08lx lr %08lx\n";
+static const char fm64[] = KERN_INFO "%s[%d]: bad frame in %s: %p nip %016lx lr %016lx\n";
+
+void signal_fault(struct task_struct *tsk, struct pt_regs *regs,
+ const char *where, void __user *ptr)
+{
+ if (show_unhandled_signals)
+ printk_ratelimited(regs->msr & MSR_64BIT ? fm64 : fm32, tsk->comm,
+ task_pid_nr(tsk), where, ptr, regs->nip, regs->link);
}
diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h
index 51b274199dd9..58ecea1cdc27 100644
--- a/arch/powerpc/kernel/signal.h
+++ b/arch/powerpc/kernel/signal.h
@@ -1,58 +1,207 @@
-/*
- * Copyright (c) 2007 Benjamin Herrenschmidt, IBM Coproration
- * Extracted from signal_32.c and signal_64.c
+/* SPDX-License-Identifier: GPL-2.0-or-later
*
- * This file is subject to the terms and conditions of the GNU General
- * Public License. See the file README.legal in the main directory of
- * this archive for more details.
+ * Copyright (c) 2007 Benjamin Herrenschmidt, IBM Corporation
+ * Extracted from signal_32.c and signal_64.c
*/
#ifndef _POWERPC_ARCH_SIGNAL_H
#define _POWERPC_ARCH_SIGNAL_H
-extern void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags);
-
-extern void __user *get_sigframe(struct ksignal *ksig, unsigned long sp,
- size_t frame_size, int is_32);
+void __user *get_sigframe(struct ksignal *ksig, struct task_struct *tsk,
+ size_t frame_size, int is_32);
extern int handle_signal32(struct ksignal *ksig, sigset_t *oldset,
- struct pt_regs *regs);
+ struct task_struct *tsk);
extern int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
- struct pt_regs *regs);
+ struct task_struct *tsk);
+
+static inline int __get_user_sigset(sigset_t *dst, const sigset_t __user *src)
+{
+ BUILD_BUG_ON(sizeof(sigset_t) != sizeof(u64));
+
+ return __get_user(dst->sig[0], (u64 __user *)&src->sig[0]);
+}
+#define unsafe_get_user_sigset(dst, src, label) do { \
+ sigset_t *__dst = dst; \
+ const sigset_t __user *__src = src; \
+ int i; \
+ \
+ for (i = 0; i < _NSIG_WORDS; i++) \
+ unsafe_get_user(__dst->sig[i], &__src->sig[i], label); \
+} while (0)
-extern unsigned long copy_fpr_to_user(void __user *to,
- struct task_struct *task);
-extern unsigned long copy_transact_fpr_to_user(void __user *to,
- struct task_struct *task);
-extern unsigned long copy_fpr_from_user(struct task_struct *task,
- void __user *from);
-extern unsigned long copy_transact_fpr_from_user(struct task_struct *task,
- void __user *from);
#ifdef CONFIG_VSX
extern unsigned long copy_vsx_to_user(void __user *to,
struct task_struct *task);
-extern unsigned long copy_transact_vsx_to_user(void __user *to,
+extern unsigned long copy_ckvsx_to_user(void __user *to,
struct task_struct *task);
extern unsigned long copy_vsx_from_user(struct task_struct *task,
void __user *from);
-extern unsigned long copy_transact_vsx_from_user(struct task_struct *task,
+extern unsigned long copy_ckvsx_from_user(struct task_struct *task,
void __user *from);
+unsigned long copy_fpr_to_user(void __user *to, struct task_struct *task);
+unsigned long copy_ckfpr_to_user(void __user *to, struct task_struct *task);
+unsigned long copy_fpr_from_user(struct task_struct *task, void __user *from);
+unsigned long copy_ckfpr_from_user(struct task_struct *task, void __user *from);
+
+#define unsafe_copy_fpr_to_user(to, task, label) do { \
+ struct task_struct *__t = task; \
+ u64 __user *buf = (u64 __user *)to; \
+ int i; \
+ \
+ for (i = 0; i < ELF_NFPREG - 1 ; i++) \
+ unsafe_put_user(__t->thread.TS_FPR(i), &buf[i], label); \
+ unsafe_put_user(__t->thread.fp_state.fpscr, &buf[i], label); \
+} while (0)
+
+#define unsafe_copy_vsx_to_user(to, task, label) do { \
+ struct task_struct *__t = task; \
+ u64 __user *buf = (u64 __user *)to; \
+ int i; \
+ \
+ for (i = 0; i < ELF_NVSRHALFREG ; i++) \
+ unsafe_put_user(__t->thread.fp_state.fpr[i][TS_VSRLOWOFFSET], \
+ &buf[i], label);\
+} while (0)
+
+#define unsafe_copy_fpr_from_user(task, from, label) do { \
+ struct task_struct *__t = task; \
+ u64 __user *buf = (u64 __user *)from; \
+ int i; \
+ \
+ for (i = 0; i < ELF_NFPREG - 1; i++) \
+ unsafe_get_user(__t->thread.TS_FPR(i), &buf[i], label); \
+ unsafe_get_user(__t->thread.fp_state.fpscr, &buf[i], label); \
+} while (0)
+
+#define unsafe_copy_vsx_from_user(task, from, label) do { \
+ struct task_struct *__t = task; \
+ u64 __user *buf = (u64 __user *)from; \
+ int i; \
+ \
+ for (i = 0; i < ELF_NVSRHALFREG ; i++) \
+ unsafe_get_user(__t->thread.fp_state.fpr[i][TS_VSRLOWOFFSET], \
+ &buf[i], label); \
+} while (0)
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+#define unsafe_copy_ckfpr_to_user(to, task, label) do { \
+ struct task_struct *__t = task; \
+ u64 __user *buf = (u64 __user *)to; \
+ int i; \
+ \
+ for (i = 0; i < ELF_NFPREG - 1 ; i++) \
+ unsafe_put_user(__t->thread.TS_CKFPR(i), &buf[i], label);\
+ unsafe_put_user(__t->thread.ckfp_state.fpscr, &buf[i], label); \
+} while (0)
+
+#define unsafe_copy_ckvsx_to_user(to, task, label) do { \
+ struct task_struct *__t = task; \
+ u64 __user *buf = (u64 __user *)to; \
+ int i; \
+ \
+ for (i = 0; i < ELF_NVSRHALFREG ; i++) \
+ unsafe_put_user(__t->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET], \
+ &buf[i], label);\
+} while (0)
+
+#define unsafe_copy_ckfpr_from_user(task, from, label) do { \
+ struct task_struct *__t = task; \
+ u64 __user *buf = (u64 __user *)from; \
+ int i; \
+ \
+ for (i = 0; i < ELF_NFPREG - 1 ; i++) \
+ unsafe_get_user(__t->thread.TS_CKFPR(i), &buf[i], label);\
+ unsafe_get_user(__t->thread.ckfp_state.fpscr, &buf[i], failed); \
+} while (0)
+
+#define unsafe_copy_ckvsx_from_user(task, from, label) do { \
+ struct task_struct *__t = task; \
+ u64 __user *buf = (u64 __user *)from; \
+ int i; \
+ \
+ for (i = 0; i < ELF_NVSRHALFREG ; i++) \
+ unsafe_get_user(__t->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET], \
+ &buf[i], label); \
+} while (0)
+#endif
+#elif defined(CONFIG_PPC_FPU_REGS)
+
+#define unsafe_copy_fpr_to_user(to, task, label) \
+ unsafe_copy_to_user(to, (task)->thread.fp_state.fpr, \
+ ELF_NFPREG * sizeof(double), label)
+
+#define unsafe_copy_fpr_from_user(task, from, label) \
+ unsafe_copy_from_user((task)->thread.fp_state.fpr, from, \
+ ELF_NFPREG * sizeof(double), label)
+
+static inline unsigned long
+copy_fpr_to_user(void __user *to, struct task_struct *task)
+{
+ return __copy_to_user(to, task->thread.fp_state.fpr,
+ ELF_NFPREG * sizeof(double));
+}
+
+static inline unsigned long
+copy_fpr_from_user(struct task_struct *task, void __user *from)
+{
+ return __copy_from_user(task->thread.fp_state.fpr, from,
+ ELF_NFPREG * sizeof(double));
+}
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+#define unsafe_copy_ckfpr_to_user(to, task, label) \
+ unsafe_copy_to_user(to, (task)->thread.ckfp_state.fpr, \
+ ELF_NFPREG * sizeof(double), label)
+
+inline unsigned long copy_ckfpr_to_user(void __user *to, struct task_struct *task)
+{
+ return __copy_to_user(to, task->thread.ckfp_state.fpr,
+ ELF_NFPREG * sizeof(double));
+}
+
+static inline unsigned long
+copy_ckfpr_from_user(struct task_struct *task, void __user *from)
+{
+ return __copy_from_user(task->thread.ckfp_state.fpr, from,
+ ELF_NFPREG * sizeof(double));
+}
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
+#else
+#define unsafe_copy_fpr_to_user(to, task, label) do { if (0) goto label;} while (0)
+
+#define unsafe_copy_fpr_from_user(task, from, label) do { if (0) goto label;} while (0)
+
+static inline unsigned long
+copy_fpr_to_user(void __user *to, struct task_struct *task)
+{
+ return 0;
+}
+
+static inline unsigned long
+copy_fpr_from_user(struct task_struct *task, void __user *from)
+{
+ return 0;
+}
#endif
#ifdef CONFIG_PPC64
extern int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
- struct pt_regs *regs);
+ struct task_struct *tsk);
#else /* CONFIG_PPC64 */
static inline int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
- struct pt_regs *regs)
+ struct task_struct *tsk)
{
return -EFAULT;
}
#endif /* !defined(CONFIG_PPC64) */
+void signal_fault(struct task_struct *tsk, struct pt_regs *regs,
+ const char *where, void __user *ptr);
+
#endif /* _POWERPC_ARCH_SIGNAL_H */
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index b171001698ff..7a718ed32b27 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Signal handling for 32bit PPC and 32bit tasks on 64bit PPC
*
@@ -10,11 +11,6 @@
* Derived from "arch/i386/kernel/signal.c"
* Copyright (C) 1991, 1992 Linus Torvalds
* 1997-11-28 Modified for POSIX.1b signals by Richard Henderson
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/sched.h>
@@ -25,9 +21,10 @@
#include <linux/errno.h>
#include <linux/elf.h>
#include <linux/ptrace.h>
+#include <linux/pagemap.h>
#include <linux/ratelimit.h>
-#ifdef CONFIG_PPC64
#include <linux/syscalls.h>
+#ifdef CONFIG_PPC64
#include <linux/compat.h>
#else
#include <linux/wait.h>
@@ -37,36 +34,30 @@
#include <linux/binfmts.h>
#endif
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/cacheflush.h>
#include <asm/syscalls.h>
#include <asm/sigcontext.h>
#include <asm/vdso.h>
#include <asm/switch_to.h>
#include <asm/tm.h>
+#include <asm/asm-prototypes.h>
#ifdef CONFIG_PPC64
-#include "ppc32.h"
+#include <asm/syscalls_32.h>
#include <asm/unistd.h>
#else
#include <asm/ucontext.h>
-#include <asm/pgtable.h>
#endif
#include "signal.h"
#ifdef CONFIG_PPC64
-#define sys_rt_sigreturn compat_sys_rt_sigreturn
-#define sys_swapcontext compat_sys_swapcontext
-#define sys_sigreturn compat_sys_sigreturn
-
#define old_sigaction old_sigaction32
#define sigcontext sigcontext32
#define mcontext mcontext32
#define ucontext ucontext32
-#define __save_altstack __compat_save_altstack
-
/*
* Userspace code may pass a ucontext which doesn't include VSX added
* at the end. We need to check for this case.
@@ -91,66 +82,35 @@
* Functions for flipping sigsets (thanks to brain dead generic
* implementation that makes things simple for little endian only)
*/
-static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set)
-{
- compat_sigset_t cset;
-
- switch (_NSIG_WORDS) {
- case 4: cset.sig[6] = set->sig[3] & 0xffffffffull;
- cset.sig[7] = set->sig[3] >> 32;
- case 3: cset.sig[4] = set->sig[2] & 0xffffffffull;
- cset.sig[5] = set->sig[2] >> 32;
- case 2: cset.sig[2] = set->sig[1] & 0xffffffffull;
- cset.sig[3] = set->sig[1] >> 32;
- case 1: cset.sig[0] = set->sig[0] & 0xffffffffull;
- cset.sig[1] = set->sig[0] >> 32;
- }
- return copy_to_user(uset, &cset, sizeof(*uset));
-}
-
-static inline int get_sigset_t(sigset_t *set,
- const compat_sigset_t __user *uset)
-{
- compat_sigset_t s32;
-
- if (copy_from_user(&s32, uset, sizeof(*uset)))
- return -EFAULT;
-
- /*
- * Swap the 2 words of the 64-bit sigset_t (they are stored
- * in the "wrong" endian in 32-bit user storage).
- */
- switch (_NSIG_WORDS) {
- case 4: set->sig[3] = s32.sig[6] | (((long)s32.sig[7]) << 32);
- case 3: set->sig[2] = s32.sig[4] | (((long)s32.sig[5]) << 32);
- case 2: set->sig[1] = s32.sig[2] | (((long)s32.sig[3]) << 32);
- case 1: set->sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32);
- }
- return 0;
-}
+#define unsafe_put_sigset_t unsafe_put_compat_sigset
+#define unsafe_get_sigset_t unsafe_get_compat_sigset
#define to_user_ptr(p) ptr_to_compat(p)
#define from_user_ptr(p) compat_ptr(p)
-static inline int save_general_regs(struct pt_regs *regs,
- struct mcontext __user *frame)
+static __always_inline int
+__unsafe_save_general_regs(struct pt_regs *regs, struct mcontext __user *frame)
{
elf_greg_t64 *gregs = (elf_greg_t64 *)regs;
- int i;
-
- WARN_ON(!FULL_REGS(regs));
+ int val, i;
for (i = 0; i <= PT_RESULT; i ++) {
- if (i == 14 && !FULL_REGS(regs))
- i = 32;
- if (__put_user((unsigned int)gregs[i], &frame->mc_gregs[i]))
- return -EFAULT;
+ /* Force usr to alway see softe as 1 (interrupts enabled) */
+ if (i == PT_SOFTE)
+ val = 1;
+ else
+ val = gregs[i];
+
+ unsafe_put_user(val, &frame->mc_gregs[i], failed);
}
return 0;
+
+failed:
+ return 1;
}
-static inline int restore_general_regs(struct pt_regs *regs,
- struct mcontext __user *sr)
+static __always_inline int
+__unsafe_restore_general_regs(struct pt_regs *regs, struct mcontext __user *sr)
{
elf_greg_t64 *gregs = (elf_greg_t64 *)regs;
int i;
@@ -158,51 +118,67 @@ static inline int restore_general_regs(struct pt_regs *regs,
for (i = 0; i <= PT_RESULT; i++) {
if ((i == PT_MSR) || (i == PT_SOFTE))
continue;
- if (__get_user(gregs[i], &sr->mc_gregs[i]))
- return -EFAULT;
+ unsafe_get_user(gregs[i], &sr->mc_gregs[i], failed);
}
return 0;
+
+failed:
+ return 1;
}
#else /* CONFIG_PPC64 */
#define GP_REGS_SIZE min(sizeof(elf_gregset_t), sizeof(struct pt_regs))
-static inline int put_sigset_t(sigset_t __user *uset, sigset_t *set)
-{
- return copy_to_user(uset, set, sizeof(*uset));
-}
+#define unsafe_put_sigset_t(uset, set, label) do { \
+ sigset_t __user *__us = uset ; \
+ const sigset_t *__s = set; \
+ \
+ unsafe_copy_to_user(__us, __s, sizeof(*__us), label); \
+} while (0)
-static inline int get_sigset_t(sigset_t *set, const sigset_t __user *uset)
-{
- return copy_from_user(set, uset, sizeof(*uset));
-}
+#define unsafe_get_sigset_t unsafe_get_user_sigset
#define to_user_ptr(p) ((unsigned long)(p))
#define from_user_ptr(p) ((void __user *)(p))
-static inline int save_general_regs(struct pt_regs *regs,
- struct mcontext __user *frame)
+static __always_inline int
+__unsafe_save_general_regs(struct pt_regs *regs, struct mcontext __user *frame)
{
- WARN_ON(!FULL_REGS(regs));
- return __copy_to_user(&frame->mc_gregs, regs, GP_REGS_SIZE);
+ unsafe_copy_to_user(&frame->mc_gregs, regs, GP_REGS_SIZE, failed);
+ return 0;
+
+failed:
+ return 1;
}
-static inline int restore_general_regs(struct pt_regs *regs,
- struct mcontext __user *sr)
+static __always_inline
+int __unsafe_restore_general_regs(struct pt_regs *regs, struct mcontext __user *sr)
{
/* copy up to but not including MSR */
- if (__copy_from_user(regs, &sr->mc_gregs,
- PT_MSR * sizeof(elf_greg_t)))
- return -EFAULT;
+ unsafe_copy_from_user(regs, &sr->mc_gregs, PT_MSR * sizeof(elf_greg_t), failed);
+
/* copy from orig_r3 (the word after the MSR) up to the end */
- if (__copy_from_user(&regs->orig_gpr3, &sr->mc_gregs[PT_ORIG_R3],
- GP_REGS_SIZE - PT_ORIG_R3 * sizeof(elf_greg_t)))
- return -EFAULT;
+ unsafe_copy_from_user(&regs->orig_gpr3, &sr->mc_gregs[PT_ORIG_R3],
+ GP_REGS_SIZE - PT_ORIG_R3 * sizeof(elf_greg_t), failed);
+
return 0;
+
+failed:
+ return 1;
}
#endif
+#define unsafe_save_general_regs(regs, frame, label) do { \
+ if (__unsafe_save_general_regs(regs, frame)) \
+ goto label; \
+} while (0)
+
+#define unsafe_restore_general_regs(regs, frame, label) do { \
+ if (__unsafe_restore_general_regs(regs, frame)) \
+ goto label; \
+} while (0)
+
/*
* When we have signals to deliver, we set up on the
* user stack, going down from the original stack pointer:
@@ -229,9 +205,6 @@ struct sigframe {
int abigap[56];
};
-/* We use the mc_pad field for the signal return trampoline. */
-#define tramp mc_pad
-
/*
* When we have rt signals to deliver, we set up on the
* user stack, going down from the original stack pointer:
@@ -260,171 +233,51 @@ struct rt_sigframe {
int abigap[56];
};
-#ifdef CONFIG_VSX
-unsigned long copy_fpr_to_user(void __user *to,
- struct task_struct *task)
-{
- u64 buf[ELF_NFPREG];
- int i;
-
- /* save FPR copy to local buffer then write to the thread_struct */
- for (i = 0; i < (ELF_NFPREG - 1) ; i++)
- buf[i] = task->thread.TS_FPR(i);
- buf[i] = task->thread.fp_state.fpscr;
- return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double));
-}
-
-unsigned long copy_fpr_from_user(struct task_struct *task,
- void __user *from)
-{
- u64 buf[ELF_NFPREG];
- int i;
-
- if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double)))
- return 1;
- for (i = 0; i < (ELF_NFPREG - 1) ; i++)
- task->thread.TS_FPR(i) = buf[i];
- task->thread.fp_state.fpscr = buf[i];
-
- return 0;
-}
-
-unsigned long copy_vsx_to_user(void __user *to,
- struct task_struct *task)
-{
- u64 buf[ELF_NVSRHALFREG];
- int i;
-
- /* save FPR copy to local buffer then write to the thread_struct */
- for (i = 0; i < ELF_NVSRHALFREG; i++)
- buf[i] = task->thread.fp_state.fpr[i][TS_VSRLOWOFFSET];
- return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double));
-}
-
-unsigned long copy_vsx_from_user(struct task_struct *task,
- void __user *from)
-{
- u64 buf[ELF_NVSRHALFREG];
- int i;
-
- if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double)))
- return 1;
- for (i = 0; i < ELF_NVSRHALFREG ; i++)
- task->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i];
- return 0;
-}
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-unsigned long copy_transact_fpr_to_user(void __user *to,
- struct task_struct *task)
-{
- u64 buf[ELF_NFPREG];
- int i;
-
- /* save FPR copy to local buffer then write to the thread_struct */
- for (i = 0; i < (ELF_NFPREG - 1) ; i++)
- buf[i] = task->thread.TS_TRANS_FPR(i);
- buf[i] = task->thread.transact_fp.fpscr;
- return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double));
-}
-
-unsigned long copy_transact_fpr_from_user(struct task_struct *task,
- void __user *from)
-{
- u64 buf[ELF_NFPREG];
- int i;
-
- if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double)))
- return 1;
- for (i = 0; i < (ELF_NFPREG - 1) ; i++)
- task->thread.TS_TRANS_FPR(i) = buf[i];
- task->thread.transact_fp.fpscr = buf[i];
-
- return 0;
-}
-
-unsigned long copy_transact_vsx_to_user(void __user *to,
- struct task_struct *task)
-{
- u64 buf[ELF_NVSRHALFREG];
- int i;
-
- /* save FPR copy to local buffer then write to the thread_struct */
- for (i = 0; i < ELF_NVSRHALFREG; i++)
- buf[i] = task->thread.transact_fp.fpr[i][TS_VSRLOWOFFSET];
- return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double));
-}
-
-unsigned long copy_transact_vsx_from_user(struct task_struct *task,
- void __user *from)
-{
- u64 buf[ELF_NVSRHALFREG];
- int i;
-
- if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double)))
- return 1;
- for (i = 0; i < ELF_NVSRHALFREG ; i++)
- task->thread.transact_fp.fpr[i][TS_VSRLOWOFFSET] = buf[i];
- return 0;
-}
-#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
-#else
-inline unsigned long copy_fpr_to_user(void __user *to,
- struct task_struct *task)
-{
- return __copy_to_user(to, task->thread.fp_state.fpr,
- ELF_NFPREG * sizeof(double));
-}
-
-inline unsigned long copy_fpr_from_user(struct task_struct *task,
- void __user *from)
+unsigned long get_min_sigframe_size_32(void)
{
- return __copy_from_user(task->thread.fp_state.fpr, from,
- ELF_NFPREG * sizeof(double));
+ return max(sizeof(struct rt_sigframe) + __SIGNAL_FRAMESIZE + 16,
+ sizeof(struct sigframe) + __SIGNAL_FRAMESIZE);
}
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-inline unsigned long copy_transact_fpr_to_user(void __user *to,
- struct task_struct *task)
-{
- return __copy_to_user(to, task->thread.transact_fp.fpr,
- ELF_NFPREG * sizeof(double));
-}
-
-inline unsigned long copy_transact_fpr_from_user(struct task_struct *task,
- void __user *from)
-{
- return __copy_from_user(task->thread.transact_fp.fpr, from,
- ELF_NFPREG * sizeof(double));
-}
-#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
-#endif
-
/*
* Save the current user registers on the user stack.
* We only save the altivec/spe registers if the process has used
* altivec/spe instructions at some point.
*/
-static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
- struct mcontext __user *tm_frame, int sigret,
- int ctx_has_vsx_region)
+static void prepare_save_user_regs(int ctx_has_vsx_region)
{
- unsigned long msr = regs->msr;
-
/* Make sure floating point registers are stored in regs */
flush_fp_to_thread(current);
+#ifdef CONFIG_ALTIVEC
+ if (current->thread.used_vr)
+ flush_altivec_to_thread(current);
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ current->thread.vrsave = mfspr(SPRN_VRSAVE);
+#endif
+#ifdef CONFIG_VSX
+ if (current->thread.used_vsr && ctx_has_vsx_region)
+ flush_vsx_to_thread(current);
+#endif
+#ifdef CONFIG_SPE
+ if (current->thread.used_spe)
+ flush_spe_to_thread(current);
+#endif
+}
+
+static __always_inline int
+__unsafe_save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
+ struct mcontext __user *tm_frame, int ctx_has_vsx_region)
+{
+ unsigned long msr = regs->msr;
/* save general registers */
- if (save_general_regs(regs, frame))
- return 1;
+ unsafe_save_general_regs(regs, frame, failed);
#ifdef CONFIG_ALTIVEC
/* save altivec registers */
if (current->thread.used_vr) {
- flush_altivec_to_thread(current);
- if (__copy_to_user(&frame->mc_vregs, &current->thread.vr_state,
- ELF_NVRREG * sizeof(vector128)))
- return 1;
+ unsafe_copy_to_user(&frame->mc_vregs, &current->thread.vr_state,
+ ELF_NVRREG * sizeof(vector128), failed);
/* set MSR_VEC in the saved MSR value to indicate that
frame->mc_vregs contains valid data */
msr |= MSR_VEC;
@@ -437,13 +290,10 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
* most significant bits of that same vector. --BenH
* Note that the current VRSAVE value is in the SPR at this point.
*/
- if (cpu_has_feature(CPU_FTR_ALTIVEC))
- current->thread.vrsave = mfspr(SPRN_VRSAVE);
- if (__put_user(current->thread.vrsave, (u32 __user *)&frame->mc_vregs[32]))
- return 1;
+ unsafe_put_user(current->thread.vrsave, (u32 __user *)&frame->mc_vregs[32],
+ failed);
#endif /* CONFIG_ALTIVEC */
- if (copy_fpr_to_user(&frame->mc_fregs, current))
- return 1;
+ unsafe_copy_fpr_to_user(&frame->mc_fregs, current, failed);
/*
* Clear the MSR VSX bit to indicate there is no valid state attached
@@ -458,19 +308,15 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
* contains valid data
*/
if (current->thread.used_vsr && ctx_has_vsx_region) {
- __giveup_vsx(current);
- if (copy_vsx_to_user(&frame->mc_vsregs, current))
- return 1;
+ unsafe_copy_vsx_to_user(&frame->mc_vsregs, current, failed);
msr |= MSR_VSX;
}
#endif /* CONFIG_VSX */
#ifdef CONFIG_SPE
/* save spe registers */
if (current->thread.used_spe) {
- flush_spe_to_thread(current);
- if (__copy_to_user(&frame->mc_vregs, current->thread.evr,
- ELF_NEVRREG * sizeof(u32)))
- return 1;
+ unsafe_copy_to_user(&frame->mc_vregs, current->thread.evr,
+ ELF_NEVRREG * sizeof(u32), failed);
/* set MSR_SPE in the saved MSR value to indicate that
frame->mc_vregs contains valid data */
msr |= MSR_SPE;
@@ -478,30 +324,29 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
/* else assert((regs->msr & MSR_SPE) == 0) */
/* We always copy to/from spefscr */
- if (__put_user(current->thread.spefscr, (u32 __user *)&frame->mc_vregs + ELF_NEVRREG))
- return 1;
+ unsafe_put_user(current->thread.spefscr,
+ (u32 __user *)&frame->mc_vregs + ELF_NEVRREG, failed);
#endif /* CONFIG_SPE */
- if (__put_user(msr, &frame->mc_gregs[PT_MSR]))
- return 1;
+ unsafe_put_user(msr, &frame->mc_gregs[PT_MSR], failed);
+
/* We need to write 0 the MSR top 32 bits in the tm frame so that we
* can check it on the restore to see if TM is active
*/
- if (tm_frame && __put_user(0, &tm_frame->mc_gregs[PT_MSR]))
- return 1;
-
- if (sigret) {
- /* Set up the sigreturn trampoline: li r0,sigret; sc */
- if (__put_user(0x38000000UL + sigret, &frame->tramp[0])
- || __put_user(0x44000002UL, &frame->tramp[1]))
- return 1;
- flush_icache_range((unsigned long) &frame->tramp[0],
- (unsigned long) &frame->tramp[2]);
- }
+ if (tm_frame)
+ unsafe_put_user(0, &tm_frame->mc_gregs[PT_MSR], failed);
return 0;
+
+failed:
+ return 1;
}
+#define unsafe_save_user_regs(regs, frame, tm_frame, has_vsx, label) do { \
+ if (__unsafe_save_user_regs(regs, frame, tm_frame, has_vsx)) \
+ goto label; \
+} while (0)
+
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
/*
* Save the current user registers on the user stack.
@@ -510,28 +355,23 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
* We also save the transactional registers to a second ucontext in the
* frame.
*
- * See save_user_regs() and signal_64.c:setup_tm_sigcontexts().
+ * See __unsafe_save_user_regs() and signal_64.c:setup_tm_sigcontexts().
*/
-static int save_tm_user_regs(struct pt_regs *regs,
- struct mcontext __user *frame,
- struct mcontext __user *tm_frame, int sigret)
+static void prepare_save_tm_user_regs(void)
{
- unsigned long msr = regs->msr;
-
- /* Remove TM bits from thread's MSR. The MSR in the sigcontext
- * just indicates to userland that we were doing a transaction, but we
- * don't want to return in transactional state. This also ensures
- * that flush_fp_to_thread won't set TIF_RESTORE_TM again.
- */
- regs->msr &= ~MSR_TS_MASK;
+ WARN_ON(tm_suspend_disabled);
- /* Make sure floating point registers are stored in regs */
- flush_fp_to_thread(current);
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ current->thread.ckvrsave = mfspr(SPRN_VRSAVE);
+}
+static __always_inline int
+save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame,
+ struct mcontext __user *tm_frame, unsigned long msr)
+{
/* Save both sets of general registers */
- if (save_general_regs(&current->thread.ckpt_regs, frame)
- || save_general_regs(regs, tm_frame))
- return 1;
+ unsafe_save_general_regs(&current->thread.ckpt_regs, frame, failed);
+ unsafe_save_general_regs(regs, tm_frame, failed);
/* Stash the top half of the 64bit MSR into the 32bit MSR word
* of the transactional mcontext. This way we have a backward-compatible
@@ -539,27 +379,20 @@ static int save_tm_user_regs(struct pt_regs *regs,
* also look at what type of transaction (T or S) was active at the
* time of the signal.
*/
- if (__put_user((msr >> 32), &tm_frame->mc_gregs[PT_MSR]))
- return 1;
+ unsafe_put_user((msr >> 32), &tm_frame->mc_gregs[PT_MSR], failed);
-#ifdef CONFIG_ALTIVEC
/* save altivec registers */
if (current->thread.used_vr) {
- flush_altivec_to_thread(current);
- if (__copy_to_user(&frame->mc_vregs, &current->thread.vr_state,
- ELF_NVRREG * sizeof(vector128)))
- return 1;
- if (msr & MSR_VEC) {
- if (__copy_to_user(&tm_frame->mc_vregs,
- &current->thread.transact_vr,
- ELF_NVRREG * sizeof(vector128)))
- return 1;
- } else {
- if (__copy_to_user(&tm_frame->mc_vregs,
- &current->thread.vr_state,
- ELF_NVRREG * sizeof(vector128)))
- return 1;
- }
+ unsafe_copy_to_user(&frame->mc_vregs, &current->thread.ckvr_state,
+ ELF_NVRREG * sizeof(vector128), failed);
+ if (msr & MSR_VEC)
+ unsafe_copy_to_user(&tm_frame->mc_vregs,
+ &current->thread.vr_state,
+ ELF_NVRREG * sizeof(vector128), failed);
+ else
+ unsafe_copy_to_user(&tm_frame->mc_vregs,
+ &current->thread.ckvr_state,
+ ELF_NVRREG * sizeof(vector128), failed);
/* set MSR_VEC in the saved MSR value to indicate that
* frame->mc_vregs contains valid data
@@ -572,33 +405,21 @@ static int save_tm_user_regs(struct pt_regs *regs,
* significant bits of a vector, we "cheat" and stuff VRSAVE in the
* most significant bits of that same vector. --BenH
*/
- if (cpu_has_feature(CPU_FTR_ALTIVEC))
- current->thread.vrsave = mfspr(SPRN_VRSAVE);
- if (__put_user(current->thread.vrsave,
- (u32 __user *)&frame->mc_vregs[32]))
- return 1;
- if (msr & MSR_VEC) {
- if (__put_user(current->thread.transact_vrsave,
- (u32 __user *)&tm_frame->mc_vregs[32]))
- return 1;
- } else {
- if (__put_user(current->thread.vrsave,
- (u32 __user *)&tm_frame->mc_vregs[32]))
- return 1;
- }
-#endif /* CONFIG_ALTIVEC */
+ unsafe_put_user(current->thread.ckvrsave,
+ (u32 __user *)&frame->mc_vregs[32], failed);
+ if (msr & MSR_VEC)
+ unsafe_put_user(current->thread.vrsave,
+ (u32 __user *)&tm_frame->mc_vregs[32], failed);
+ else
+ unsafe_put_user(current->thread.ckvrsave,
+ (u32 __user *)&tm_frame->mc_vregs[32], failed);
- if (copy_fpr_to_user(&frame->mc_fregs, current))
- return 1;
- if (msr & MSR_FP) {
- if (copy_transact_fpr_to_user(&tm_frame->mc_fregs, current))
- return 1;
- } else {
- if (copy_fpr_to_user(&tm_frame->mc_fregs, current))
- return 1;
- }
+ unsafe_copy_ckfpr_to_user(&frame->mc_fregs, current, failed);
+ if (msr & MSR_FP)
+ unsafe_copy_fpr_to_user(&tm_frame->mc_fregs, current, failed);
+ else
+ unsafe_copy_ckfpr_to_user(&tm_frame->mc_fregs, current, failed);
-#ifdef CONFIG_VSX
/*
* Copy VSR 0-31 upper half from thread_struct to local
* buffer, then write that to userspace. Also set MSR_VSX in
@@ -606,55 +427,38 @@ static int save_tm_user_regs(struct pt_regs *regs,
* contains valid data
*/
if (current->thread.used_vsr) {
- __giveup_vsx(current);
- if (copy_vsx_to_user(&frame->mc_vsregs, current))
- return 1;
- if (msr & MSR_VSX) {
- if (copy_transact_vsx_to_user(&tm_frame->mc_vsregs,
- current))
- return 1;
- } else {
- if (copy_vsx_to_user(&tm_frame->mc_vsregs, current))
- return 1;
- }
+ unsafe_copy_ckvsx_to_user(&frame->mc_vsregs, current, failed);
+ if (msr & MSR_VSX)
+ unsafe_copy_vsx_to_user(&tm_frame->mc_vsregs, current, failed);
+ else
+ unsafe_copy_ckvsx_to_user(&tm_frame->mc_vsregs, current, failed);
msr |= MSR_VSX;
}
-#endif /* CONFIG_VSX */
-#ifdef CONFIG_SPE
- /* SPE regs are not checkpointed with TM, so this section is
- * simply the same as in save_user_regs().
- */
- if (current->thread.used_spe) {
- flush_spe_to_thread(current);
- if (__copy_to_user(&frame->mc_vregs, current->thread.evr,
- ELF_NEVRREG * sizeof(u32)))
- return 1;
- /* set MSR_SPE in the saved MSR value to indicate that
- * frame->mc_vregs contains valid data */
- msr |= MSR_SPE;
- }
- /* We always copy to/from spefscr */
- if (__put_user(current->thread.spefscr, (u32 __user *)&frame->mc_vregs + ELF_NEVRREG))
- return 1;
-#endif /* CONFIG_SPE */
+ unsafe_put_user(msr, &frame->mc_gregs[PT_MSR], failed);
- if (__put_user(msr, &frame->mc_gregs[PT_MSR]))
- return 1;
- if (sigret) {
- /* Set up the sigreturn trampoline: li r0,sigret; sc */
- if (__put_user(0x38000000UL + sigret, &frame->tramp[0])
- || __put_user(0x44000002UL, &frame->tramp[1]))
- return 1;
- flush_icache_range((unsigned long) &frame->tramp[0],
- (unsigned long) &frame->tramp[2]);
- }
+ return 0;
+
+failed:
+ return 1;
+}
+#else
+static void prepare_save_tm_user_regs(void) { }
+static __always_inline int
+save_tm_user_regs_unsafe(struct pt_regs *regs, struct mcontext __user *frame,
+ struct mcontext __user *tm_frame, unsigned long msr)
+{
return 0;
}
#endif
+#define unsafe_save_tm_user_regs(regs, frame, tm_frame, msr, label) do { \
+ if (save_tm_user_regs_unsafe(regs, frame, tm_frame, msr)) \
+ goto label; \
+} while (0)
+
/*
* Restore the current user register values from the user stack,
* (except for MSR).
@@ -662,77 +466,65 @@ static int save_tm_user_regs(struct pt_regs *regs,
static long restore_user_regs(struct pt_regs *regs,
struct mcontext __user *sr, int sig)
{
- long err;
unsigned int save_r2 = 0;
unsigned long msr;
#ifdef CONFIG_VSX
int i;
#endif
+ if (!user_read_access_begin(sr, sizeof(*sr)))
+ return 1;
/*
* restore general registers but not including MSR or SOFTE. Also
* take care of keeping r2 (TLS) intact if not a signal
*/
if (!sig)
save_r2 = (unsigned int)regs->gpr[2];
- err = restore_general_regs(regs, sr);
- regs->trap = 0;
- err |= __get_user(msr, &sr->mc_gregs[PT_MSR]);
+ unsafe_restore_general_regs(regs, sr, failed);
+ set_trap_norestart(regs);
+ unsafe_get_user(msr, &sr->mc_gregs[PT_MSR], failed);
if (!sig)
regs->gpr[2] = (unsigned long) save_r2;
- if (err)
- return 1;
/* if doing signal return, restore the previous little-endian mode */
if (sig)
- regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE);
-
- /*
- * Do this before updating the thread state in
- * current->thread.fpr/vr/evr. That way, if we get preempted
- * and another task grabs the FPU/Altivec/SPE, it won't be
- * tempted to save the current CPU state into the thread_struct
- * and corrupt what we are writing there.
- */
- discard_lazy_cpu_state();
+ regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (msr & MSR_LE));
#ifdef CONFIG_ALTIVEC
/*
* Force the process to reload the altivec registers from
* current->thread when it next does altivec instructions
*/
- regs->msr &= ~MSR_VEC;
+ regs_set_return_msr(regs, regs->msr & ~MSR_VEC);
if (msr & MSR_VEC) {
/* restore altivec registers from the stack */
- if (__copy_from_user(&current->thread.vr_state, &sr->mc_vregs,
- sizeof(sr->mc_vregs)))
- return 1;
+ unsafe_copy_from_user(&current->thread.vr_state, &sr->mc_vregs,
+ sizeof(sr->mc_vregs), failed);
+ current->thread.used_vr = true;
} else if (current->thread.used_vr)
memset(&current->thread.vr_state, 0,
ELF_NVRREG * sizeof(vector128));
/* Always get VRSAVE back */
- if (__get_user(current->thread.vrsave, (u32 __user *)&sr->mc_vregs[32]))
- return 1;
+ unsafe_get_user(current->thread.vrsave, (u32 __user *)&sr->mc_vregs[32], failed);
if (cpu_has_feature(CPU_FTR_ALTIVEC))
mtspr(SPRN_VRSAVE, current->thread.vrsave);
#endif /* CONFIG_ALTIVEC */
- if (copy_fpr_from_user(current, &sr->mc_fregs))
- return 1;
+ unsafe_copy_fpr_from_user(current, &sr->mc_fregs, failed);
#ifdef CONFIG_VSX
/*
* Force the process to reload the VSX registers from
* current->thread when it next does VSX instruction.
*/
- regs->msr &= ~MSR_VSX;
+ regs_set_return_msr(regs, regs->msr & ~MSR_VSX);
if (msr & MSR_VSX) {
/*
* Restore altivec registers from the stack to a local
* buffer, then write this out to the thread_struct
*/
- if (copy_vsx_from_user(current, &sr->mc_vsregs))
- return 1;
+ unsafe_copy_vsx_from_user(current, &sr->mc_vsregs, failed);
+ current->thread.used_vsr = true;
} else if (current->thread.used_vsr)
for (i = 0; i < 32 ; i++)
current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
@@ -741,26 +533,34 @@ static long restore_user_regs(struct pt_regs *regs,
* force the process to reload the FP registers from
* current->thread when it next does FP instructions
*/
- regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1);
+ regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1));
#ifdef CONFIG_SPE
- /* force the process to reload the spe registers from
- current->thread when it next does spe instructions */
- regs->msr &= ~MSR_SPE;
+ /*
+ * Force the process to reload the spe registers from
+ * current->thread when it next does spe instructions.
+ * Since this is user ABI, we must enforce the sizing.
+ */
+ BUILD_BUG_ON(sizeof(current->thread.spe) != ELF_NEVRREG * sizeof(u32));
+ regs_set_return_msr(regs, regs->msr & ~MSR_SPE);
if (msr & MSR_SPE) {
/* restore spe registers from the stack */
- if (__copy_from_user(current->thread.evr, &sr->mc_vregs,
- ELF_NEVRREG * sizeof(u32)))
- return 1;
+ unsafe_copy_from_user(&current->thread.spe, &sr->mc_vregs,
+ sizeof(current->thread.spe), failed);
+ current->thread.used_spe = true;
} else if (current->thread.used_spe)
- memset(current->thread.evr, 0, ELF_NEVRREG * sizeof(u32));
+ memset(&current->thread.spe, 0, sizeof(current->thread.spe));
/* Always get SPEFSCR back */
- if (__get_user(current->thread.spefscr, (u32 __user *)&sr->mc_vregs + ELF_NEVRREG))
- return 1;
+ unsafe_get_user(current->thread.spefscr, (u32 __user *)&sr->mc_vregs + ELF_NEVRREG, failed);
#endif /* CONFIG_SPE */
+ user_read_access_end();
return 0;
+
+failed:
+ user_read_access_end();
+ return 1;
}
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -773,12 +573,11 @@ static long restore_tm_user_regs(struct pt_regs *regs,
struct mcontext __user *sr,
struct mcontext __user *tm_sr)
{
- long err;
unsigned long msr, msr_hi;
-#ifdef CONFIG_VSX
int i;
-#endif
+ if (tm_suspend_disabled)
+ return 1;
/*
* restore general registers but not including MSR or SOFTE. Also
* take care of keeping r2 (TLS) intact if not a signal.
@@ -786,95 +585,107 @@ static long restore_tm_user_regs(struct pt_regs *regs,
* TFHAR is restored from the checkpointed NIP; TEXASR and TFIAR
* were set by the signal delivery.
*/
- err = restore_general_regs(regs, tm_sr);
- err |= restore_general_regs(&current->thread.ckpt_regs, sr);
-
- err |= __get_user(current->thread.tm_tfhar, &sr->mc_gregs[PT_NIP]);
-
- err |= __get_user(msr, &sr->mc_gregs[PT_MSR]);
- if (err)
+ if (!user_read_access_begin(sr, sizeof(*sr)))
return 1;
- /* Restore the previous little-endian mode */
- regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE);
+ unsafe_restore_general_regs(&current->thread.ckpt_regs, sr, failed);
+ unsafe_get_user(current->thread.tm_tfhar, &sr->mc_gregs[PT_NIP], failed);
+ unsafe_get_user(msr, &sr->mc_gregs[PT_MSR], failed);
- /*
- * Do this before updating the thread state in
- * current->thread.fpr/vr/evr. That way, if we get preempted
- * and another task grabs the FPU/Altivec/SPE, it won't be
- * tempted to save the current CPU state into the thread_struct
- * and corrupt what we are writing there.
- */
- discard_lazy_cpu_state();
+ /* Restore the previous little-endian mode */
+ regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (msr & MSR_LE));
-#ifdef CONFIG_ALTIVEC
- regs->msr &= ~MSR_VEC;
+ regs_set_return_msr(regs, regs->msr & ~MSR_VEC);
if (msr & MSR_VEC) {
/* restore altivec registers from the stack */
- if (__copy_from_user(&current->thread.vr_state, &sr->mc_vregs,
- sizeof(sr->mc_vregs)) ||
- __copy_from_user(&current->thread.transact_vr,
- &tm_sr->mc_vregs,
- sizeof(sr->mc_vregs)))
- return 1;
+ unsafe_copy_from_user(&current->thread.ckvr_state, &sr->mc_vregs,
+ sizeof(sr->mc_vregs), failed);
+ current->thread.used_vr = true;
} else if (current->thread.used_vr) {
memset(&current->thread.vr_state, 0,
ELF_NVRREG * sizeof(vector128));
- memset(&current->thread.transact_vr, 0,
+ memset(&current->thread.ckvr_state, 0,
ELF_NVRREG * sizeof(vector128));
}
/* Always get VRSAVE back */
- if (__get_user(current->thread.vrsave,
- (u32 __user *)&sr->mc_vregs[32]) ||
- __get_user(current->thread.transact_vrsave,
- (u32 __user *)&tm_sr->mc_vregs[32]))
- return 1;
+ unsafe_get_user(current->thread.ckvrsave,
+ (u32 __user *)&sr->mc_vregs[32], failed);
if (cpu_has_feature(CPU_FTR_ALTIVEC))
- mtspr(SPRN_VRSAVE, current->thread.vrsave);
-#endif /* CONFIG_ALTIVEC */
+ mtspr(SPRN_VRSAVE, current->thread.ckvrsave);
- regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1);
+ regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1));
- if (copy_fpr_from_user(current, &sr->mc_fregs) ||
- copy_transact_fpr_from_user(current, &tm_sr->mc_fregs))
- return 1;
+ unsafe_copy_fpr_from_user(current, &sr->mc_fregs, failed);
-#ifdef CONFIG_VSX
- regs->msr &= ~MSR_VSX;
+ regs_set_return_msr(regs, regs->msr & ~MSR_VSX);
if (msr & MSR_VSX) {
/*
* Restore altivec registers from the stack to a local
* buffer, then write this out to the thread_struct
*/
- if (copy_vsx_from_user(current, &sr->mc_vsregs) ||
- copy_transact_vsx_from_user(current, &tm_sr->mc_vsregs))
- return 1;
+ unsafe_copy_ckvsx_from_user(current, &sr->mc_vsregs, failed);
+ current->thread.used_vsr = true;
} else if (current->thread.used_vsr)
for (i = 0; i < 32 ; i++) {
current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
- current->thread.transact_fp.fpr[i][TS_VSRLOWOFFSET] = 0;
+ current->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
}
-#endif /* CONFIG_VSX */
-#ifdef CONFIG_SPE
- /* SPE regs are not checkpointed with TM, so this section is
- * simply the same as in restore_user_regs().
- */
- regs->msr &= ~MSR_SPE;
- if (msr & MSR_SPE) {
- if (__copy_from_user(current->thread.evr, &sr->mc_vregs,
- ELF_NEVRREG * sizeof(u32)))
- return 1;
- } else if (current->thread.used_spe)
- memset(current->thread.evr, 0, ELF_NEVRREG * sizeof(u32));
+ user_read_access_end();
- /* Always get SPEFSCR back */
- if (__get_user(current->thread.spefscr, (u32 __user *)&sr->mc_vregs
- + ELF_NEVRREG))
+ if (!user_read_access_begin(tm_sr, sizeof(*tm_sr)))
return 1;
-#endif /* CONFIG_SPE */
+ unsafe_restore_general_regs(regs, tm_sr, failed);
+
+ /* restore altivec registers from the stack */
+ if (msr & MSR_VEC)
+ unsafe_copy_from_user(&current->thread.vr_state, &tm_sr->mc_vregs,
+ sizeof(sr->mc_vregs), failed);
+
+ /* Always get VRSAVE back */
+ unsafe_get_user(current->thread.vrsave,
+ (u32 __user *)&tm_sr->mc_vregs[32], failed);
+
+ unsafe_copy_ckfpr_from_user(current, &tm_sr->mc_fregs, failed);
+
+ if (msr & MSR_VSX) {
+ /*
+ * Restore altivec registers from the stack to a local
+ * buffer, then write this out to the thread_struct
+ */
+ unsafe_copy_vsx_from_user(current, &tm_sr->mc_vsregs, failed);
+ current->thread.used_vsr = true;
+ }
+
+ /* Get the top half of the MSR from the user context */
+ unsafe_get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR], failed);
+ msr_hi <<= 32;
+
+ user_read_access_end();
+
+ /* If TM bits are set to the reserved value, it's an invalid context */
+ if (MSR_TM_RESV(msr_hi))
+ return 1;
+
+ /*
+ * Disabling preemption, since it is unsafe to be preempted
+ * with MSR[TS] set without recheckpointing.
+ */
+ preempt_disable();
+
+ /*
+ * CAUTION:
+ * After regs->MSR[TS] being updated, make sure that get_user(),
+ * put_user() or similar functions are *not* called. These
+ * functions can generate page faults which will cause the process
+ * to be de-scheduled with MSR[TS] set but without calling
+ * tm_recheckpoint(). This can cause a bug.
+ *
+ * Pull in the MSR TM bits from the user context
+ */
+ regs_set_return_msr(regs, (regs->msr & ~MSR_TS_MASK) | (msr_hi & MSR_TS_MASK));
/* Now, recheckpoint. This loads up all of the checkpointed (older)
* registers, including FP and V[S]Rs. After recheckpointing, the
* transactional versions should be loaded.
@@ -883,98 +694,39 @@ static long restore_tm_user_regs(struct pt_regs *regs,
/* Make sure the transaction is marked as failed */
current->thread.tm_texasr |= TEXASR_FS;
/* This loads the checkpointed FP/VEC state, if used */
- tm_recheckpoint(&current->thread, msr);
- /* Get the top half of the MSR */
- if (__get_user(msr_hi, &tm_sr->mc_gregs[PT_MSR]))
- return 1;
- /* Pull in MSR TM from user context */
- regs->msr = (regs->msr & ~MSR_TS_MASK) | ((msr_hi<<32) & MSR_TS_MASK);
+ tm_recheckpoint(&current->thread);
/* This loads the speculative FP/VEC state, if used */
+ msr_check_and_set(msr & (MSR_FP | MSR_VEC));
if (msr & MSR_FP) {
- do_load_up_transact_fpu(&current->thread);
- regs->msr |= (MSR_FP | current->thread.fpexc_mode);
+ load_fp_state(&current->thread.fp_state);
+ regs_set_return_msr(regs, regs->msr | (MSR_FP | current->thread.fpexc_mode));
}
-#ifdef CONFIG_ALTIVEC
if (msr & MSR_VEC) {
- do_load_up_transact_altivec(&current->thread);
- regs->msr |= MSR_VEC;
+ load_vr_state(&current->thread.vr_state);
+ regs_set_return_msr(regs, regs->msr | MSR_VEC);
}
-#endif
+ preempt_enable();
+
+ return 0;
+
+failed:
+ user_read_access_end();
+ return 1;
+}
+#else
+static long restore_tm_user_regs(struct pt_regs *regs, struct mcontext __user *sr,
+ struct mcontext __user *tm_sr)
+{
return 0;
}
#endif
#ifdef CONFIG_PPC64
-int copy_siginfo_to_user32(struct compat_siginfo __user *d, const siginfo_t *s)
-{
- int err;
-
- if (!access_ok (VERIFY_WRITE, d, sizeof(*d)))
- return -EFAULT;
-
- /* If you change siginfo_t structure, please be sure
- * this code is fixed accordingly.
- * It should never copy any pad contained in the structure
- * to avoid security leaks, but must copy the generic
- * 3 ints plus the relevant union member.
- * This routine must convert siginfo from 64bit to 32bit as well
- * at the same time.
- */
- err = __put_user(s->si_signo, &d->si_signo);
- err |= __put_user(s->si_errno, &d->si_errno);
- err |= __put_user((short)s->si_code, &d->si_code);
- if (s->si_code < 0)
- err |= __copy_to_user(&d->_sifields._pad, &s->_sifields._pad,
- SI_PAD_SIZE32);
- else switch(s->si_code >> 16) {
- case __SI_CHLD >> 16:
- err |= __put_user(s->si_pid, &d->si_pid);
- err |= __put_user(s->si_uid, &d->si_uid);
- err |= __put_user(s->si_utime, &d->si_utime);
- err |= __put_user(s->si_stime, &d->si_stime);
- err |= __put_user(s->si_status, &d->si_status);
- break;
- case __SI_FAULT >> 16:
- err |= __put_user((unsigned int)(unsigned long)s->si_addr,
- &d->si_addr);
- break;
- case __SI_POLL >> 16:
- err |= __put_user(s->si_band, &d->si_band);
- err |= __put_user(s->si_fd, &d->si_fd);
- break;
- case __SI_TIMER >> 16:
- err |= __put_user(s->si_tid, &d->si_tid);
- err |= __put_user(s->si_overrun, &d->si_overrun);
- err |= __put_user(s->si_int, &d->si_int);
- break;
- case __SI_RT >> 16: /* This is not generated by the kernel as of now. */
- case __SI_MESGQ >> 16:
- err |= __put_user(s->si_int, &d->si_int);
- /* fallthrough */
- case __SI_KILL >> 16:
- default:
- err |= __put_user(s->si_pid, &d->si_pid);
- err |= __put_user(s->si_uid, &d->si_uid);
- break;
- }
- return err;
-}
#define copy_siginfo_to_user copy_siginfo_to_user32
-int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from)
-{
- memset(to, 0, sizeof *to);
-
- if (copy_from_user(to, from, 3*sizeof(int)) ||
- copy_from_user(to->_sifields._pad,
- from->_sifields._pad, SI_PAD_SIZE32))
- return -EFAULT;
-
- return 0;
-}
#endif /* CONFIG_PPC64 */
/*
@@ -982,91 +734,185 @@ int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from)
* (one which gets siginfo).
*/
int handle_rt_signal32(struct ksignal *ksig, sigset_t *oldset,
- struct pt_regs *regs)
+ struct task_struct *tsk)
{
- struct rt_sigframe __user *rt_sf;
- struct mcontext __user *frame;
- struct mcontext __user *tm_frame = NULL;
- void __user *addr;
+ struct rt_sigframe __user *frame;
+ struct mcontext __user *mctx;
+ struct mcontext __user *tm_mctx = NULL;
unsigned long newsp = 0;
- int sigret;
unsigned long tramp;
+ struct pt_regs *regs = tsk->thread.regs;
+ /* Save the thread's msr before get_tm_stackpointer() changes it */
+ unsigned long msr = regs->msr;
/* Set up Signal Frame */
- /* Put a Real Time Context onto stack */
- rt_sf = get_sigframe(ksig, get_tm_stackpointer(regs), sizeof(*rt_sf), 1);
- addr = rt_sf;
- if (unlikely(rt_sf == NULL))
+ frame = get_sigframe(ksig, tsk, sizeof(*frame), 1);
+ mctx = &frame->uc.uc_mcontext;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ tm_mctx = &frame->uc_transact.uc_mcontext;
+#endif
+ if (MSR_TM_ACTIVE(msr))
+ prepare_save_tm_user_regs();
+ else
+ prepare_save_user_regs(1);
+
+ if (!user_access_begin(frame, sizeof(*frame)))
goto badframe;
/* Put the siginfo & fill in most of the ucontext */
- if (copy_siginfo_to_user(&rt_sf->info, &ksig->info)
- || __put_user(0, &rt_sf->uc.uc_flags)
- || __save_altstack(&rt_sf->uc.uc_stack, regs->gpr[1])
- || __put_user(to_user_ptr(&rt_sf->uc.uc_mcontext),
- &rt_sf->uc.uc_regs)
- || put_sigset_t(&rt_sf->uc.uc_sigmask, oldset))
- goto badframe;
+ unsafe_put_user(0, &frame->uc.uc_flags, failed);
+#ifdef CONFIG_PPC64
+ unsafe_compat_save_altstack(&frame->uc.uc_stack, regs->gpr[1], failed);
+#else
+ unsafe_save_altstack(&frame->uc.uc_stack, regs->gpr[1], failed);
+#endif
+ unsafe_put_user(to_user_ptr(&frame->uc.uc_mcontext), &frame->uc.uc_regs, failed);
+
+ if (MSR_TM_ACTIVE(msr)) {
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ unsafe_put_user((unsigned long)&frame->uc_transact,
+ &frame->uc.uc_link, failed);
+ unsafe_put_user((unsigned long)tm_mctx,
+ &frame->uc_transact.uc_regs, failed);
+#endif
+ unsafe_save_tm_user_regs(regs, mctx, tm_mctx, msr, failed);
+ } else {
+ unsafe_put_user(0, &frame->uc.uc_link, failed);
+ unsafe_save_user_regs(regs, mctx, tm_mctx, 1, failed);
+ }
/* Save user registers on the stack */
- frame = &rt_sf->uc.uc_mcontext;
- addr = frame;
- if (vdso32_rt_sigtramp && current->mm->context.vdso_base) {
- sigret = 0;
- tramp = current->mm->context.vdso_base + vdso32_rt_sigtramp;
+ if (tsk->mm->context.vdso) {
+ tramp = VDSO32_SYMBOL(tsk->mm->context.vdso, sigtramp_rt32);
} else {
- sigret = __NR_rt_sigreturn;
- tramp = (unsigned long) frame->tramp;
+ tramp = (unsigned long)mctx->mc_pad;
+ unsafe_put_user(PPC_RAW_LI(_R0, __NR_rt_sigreturn), &mctx->mc_pad[0], failed);
+ unsafe_put_user(PPC_RAW_SC(), &mctx->mc_pad[1], failed);
+ asm("dcbst %y0; sync; icbi %y0; sync" :: "Z" (mctx->mc_pad[0]));
}
+ unsafe_put_sigset_t(&frame->uc.uc_sigmask, oldset, failed);
+
+ user_access_end();
+
+ if (copy_siginfo_to_user(&frame->info, &ksig->info))
+ goto badframe;
+
+ regs->link = tramp;
+
+#ifdef CONFIG_PPC_FPU_REGS
+ tsk->thread.fp_state.fpscr = 0; /* turn off all fp exceptions */
+#endif
+
+ /* create a stack frame for the caller of the handler */
+ newsp = ((unsigned long)frame) - (__SIGNAL_FRAMESIZE + 16);
+ if (put_user(regs->gpr[1], (u32 __user *)newsp))
+ goto badframe;
+
+ /* Fill registers for signal handler */
+ regs->gpr[1] = newsp;
+ regs->gpr[3] = ksig->sig;
+ regs->gpr[4] = (unsigned long)&frame->info;
+ regs->gpr[5] = (unsigned long)&frame->uc;
+ regs->gpr[6] = (unsigned long)frame;
+ regs_set_return_ip(regs, (unsigned long) ksig->ka.sa.sa_handler);
+ /* enter the signal handler in native-endian mode */
+ regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (MSR_KERNEL & MSR_LE));
+
+ return 0;
+
+failed:
+ user_access_end();
+badframe:
+ signal_fault(tsk, regs, "handle_rt_signal32", frame);
+
+ return 1;
+}
+
+/*
+ * OK, we're invoking a handler
+ */
+int handle_signal32(struct ksignal *ksig, sigset_t *oldset,
+ struct task_struct *tsk)
+{
+ struct sigcontext __user *sc;
+ struct sigframe __user *frame;
+ struct mcontext __user *mctx;
+ struct mcontext __user *tm_mctx = NULL;
+ unsigned long newsp = 0;
+ unsigned long tramp;
+ struct pt_regs *regs = tsk->thread.regs;
+ /* Save the thread's msr before get_tm_stackpointer() changes it */
+ unsigned long msr = regs->msr;
+
+ /* Set up Signal Frame */
+ frame = get_sigframe(ksig, tsk, sizeof(*frame), 1);
+ mctx = &frame->mctx;
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- tm_frame = &rt_sf->uc_transact.uc_mcontext;
- if (MSR_TM_ACTIVE(regs->msr)) {
- if (__put_user((unsigned long)&rt_sf->uc_transact,
- &rt_sf->uc.uc_link) ||
- __put_user((unsigned long)tm_frame,
- &rt_sf->uc_transact.uc_regs))
- goto badframe;
- if (save_tm_user_regs(regs, frame, tm_frame, sigret))
- goto badframe;
- }
+ tm_mctx = &frame->mctx_transact;
+#endif
+ if (MSR_TM_ACTIVE(msr))
+ prepare_save_tm_user_regs();
else
+ prepare_save_user_regs(1);
+
+ if (!user_access_begin(frame, sizeof(*frame)))
+ goto badframe;
+ sc = (struct sigcontext __user *) &frame->sctx;
+
+#if _NSIG != 64
+#error "Please adjust handle_signal()"
#endif
- {
- if (__put_user(0, &rt_sf->uc.uc_link))
- goto badframe;
- if (save_user_regs(regs, frame, tm_frame, sigret, 1))
- goto badframe;
+ unsafe_put_user(to_user_ptr(ksig->ka.sa.sa_handler), &sc->handler, failed);
+ unsafe_put_user(oldset->sig[0], &sc->oldmask, failed);
+#ifdef CONFIG_PPC64
+ unsafe_put_user((oldset->sig[0] >> 32), &sc->_unused[3], failed);
+#else
+ unsafe_put_user(oldset->sig[1], &sc->_unused[3], failed);
+#endif
+ unsafe_put_user(to_user_ptr(mctx), &sc->regs, failed);
+ unsafe_put_user(ksig->sig, &sc->signal, failed);
+
+ if (MSR_TM_ACTIVE(msr))
+ unsafe_save_tm_user_regs(regs, mctx, tm_mctx, msr, failed);
+ else
+ unsafe_save_user_regs(regs, mctx, tm_mctx, 1, failed);
+
+ if (tsk->mm->context.vdso) {
+ tramp = VDSO32_SYMBOL(tsk->mm->context.vdso, sigtramp32);
+ } else {
+ tramp = (unsigned long)mctx->mc_pad;
+ unsafe_put_user(PPC_RAW_LI(_R0, __NR_sigreturn), &mctx->mc_pad[0], failed);
+ unsafe_put_user(PPC_RAW_SC(), &mctx->mc_pad[1], failed);
+ asm("dcbst %y0; sync; icbi %y0; sync" :: "Z" (mctx->mc_pad[0]));
}
+ user_access_end();
+
regs->link = tramp;
- current->thread.fp_state.fpscr = 0; /* turn off all fp exceptions */
+#ifdef CONFIG_PPC_FPU_REGS
+ tsk->thread.fp_state.fpscr = 0; /* turn off all fp exceptions */
+#endif
/* create a stack frame for the caller of the handler */
- newsp = ((unsigned long)rt_sf) - (__SIGNAL_FRAMESIZE + 16);
- addr = (void __user *)regs->gpr[1];
+ newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE;
if (put_user(regs->gpr[1], (u32 __user *)newsp))
goto badframe;
- /* Fill registers for signal handler */
regs->gpr[1] = newsp;
regs->gpr[3] = ksig->sig;
- regs->gpr[4] = (unsigned long) &rt_sf->info;
- regs->gpr[5] = (unsigned long) &rt_sf->uc;
- regs->gpr[6] = (unsigned long) rt_sf;
- regs->nip = (unsigned long) ksig->ka.sa.sa_handler;
+ regs->gpr[4] = (unsigned long) sc;
+ regs_set_return_ip(regs, (unsigned long) ksig->ka.sa.sa_handler);
/* enter the signal handler in native-endian mode */
- regs->msr &= ~MSR_LE;
- regs->msr |= (MSR_KERNEL & MSR_LE);
+ regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (MSR_KERNEL & MSR_LE));
+
return 0;
+failed:
+ user_access_end();
+
badframe:
- if (show_unhandled_signals)
- printk_ratelimited(KERN_INFO
- "%s[%d]: bad frame in handle_rt_signal32: "
- "%p nip %08lx lr %08lx\n",
- current->comm, current->pid,
- addr, regs->nip, regs->link);
+ signal_fault(tsk, regs, "handle_signal32", frame);
return 1;
}
@@ -1076,28 +922,31 @@ static int do_setcontext(struct ucontext __user *ucp, struct pt_regs *regs, int
sigset_t set;
struct mcontext __user *mcp;
- if (get_sigset_t(&set, &ucp->uc_sigmask))
+ if (!user_read_access_begin(ucp, sizeof(*ucp)))
return -EFAULT;
+
+ unsafe_get_sigset_t(&set, &ucp->uc_sigmask, failed);
#ifdef CONFIG_PPC64
{
u32 cmcp;
- if (__get_user(cmcp, &ucp->uc_regs))
- return -EFAULT;
+ unsafe_get_user(cmcp, &ucp->uc_regs, failed);
mcp = (struct mcontext __user *)(u64)cmcp;
- /* no need to check access_ok(mcp), since mcp < 4GB */
}
#else
- if (__get_user(mcp, &ucp->uc_regs))
- return -EFAULT;
- if (!access_ok(VERIFY_READ, mcp, sizeof(*mcp)))
- return -EFAULT;
+ unsafe_get_user(mcp, &ucp->uc_regs, failed);
#endif
+ user_read_access_end();
+
set_current_blocked(&set);
if (restore_user_regs(regs, mcp, sig))
return -EFAULT;
return 0;
+
+failed:
+ user_read_access_end();
+ return -EFAULT;
}
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -1111,11 +960,15 @@ static int do_setcontext_tm(struct ucontext __user *ucp,
u32 cmcp;
u32 tm_cmcp;
- if (get_sigset_t(&set, &ucp->uc_sigmask))
+ if (!user_read_access_begin(ucp, sizeof(*ucp)))
return -EFAULT;
- if (__get_user(cmcp, &ucp->uc_regs) ||
- __get_user(tm_cmcp, &tm_ucp->uc_regs))
+ unsafe_get_sigset_t(&set, &ucp->uc_sigmask, failed);
+ unsafe_get_user(cmcp, &ucp->uc_regs, failed);
+
+ user_read_access_end();
+
+ if (__get_user(tm_cmcp, &tm_ucp->uc_regs))
return -EFAULT;
mcp = (struct mcontext __user *)(u64)cmcp;
tm_mcp = (struct mcontext __user *)(u64)tm_cmcp;
@@ -1126,14 +979,22 @@ static int do_setcontext_tm(struct ucontext __user *ucp,
return -EFAULT;
return 0;
+
+failed:
+ user_read_access_end();
+ return -EFAULT;
}
#endif
-long sys_swapcontext(struct ucontext __user *old_ctx,
- struct ucontext __user *new_ctx,
- int ctx_size, int r6, int r7, int r8, struct pt_regs *regs)
+#ifdef CONFIG_PPC64
+COMPAT_SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
+ struct ucontext __user *, new_ctx, int, ctx_size)
+#else
+SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
+ struct ucontext __user *, new_ctx, long, ctx_size)
+#endif
{
- unsigned char tmp;
+ struct pt_regs *regs = current_pt_regs();
int ctx_has_vsx_region = 0;
#ifdef CONFIG_PPC64
@@ -1189,17 +1050,18 @@ long sys_swapcontext(struct ucontext __user *old_ctx,
*/
mctx = (struct mcontext __user *)
((unsigned long) &old_ctx->uc_mcontext & ~0xfUL);
- if (!access_ok(VERIFY_WRITE, old_ctx, ctx_size)
- || save_user_regs(regs, mctx, NULL, 0, ctx_has_vsx_region)
- || put_sigset_t(&old_ctx->uc_sigmask, &current->blocked)
- || __put_user(to_user_ptr(mctx), &old_ctx->uc_regs))
+ prepare_save_user_regs(ctx_has_vsx_region);
+ if (!user_write_access_begin(old_ctx, ctx_size))
return -EFAULT;
+ unsafe_save_user_regs(regs, mctx, NULL, ctx_has_vsx_region, failed);
+ unsafe_put_sigset_t(&old_ctx->uc_sigmask, &current->blocked, failed);
+ unsafe_put_user(to_user_ptr(mctx), &old_ctx->uc_regs, failed);
+ user_write_access_end();
}
if (new_ctx == NULL)
return 0;
- if (!access_ok(VERIFY_READ, new_ctx, ctx_size)
- || __get_user(tmp, (u8 __user *) new_ctx)
- || __get_user(tmp, (u8 __user *) new_ctx + ctx_size - 1))
+ if (!access_ok(new_ctx, ctx_size) ||
+ fault_in_readable((char __user *)new_ctx, ctx_size))
return -EFAULT;
/*
@@ -1213,31 +1075,55 @@ long sys_swapcontext(struct ucontext __user *old_ctx,
* or if another thread unmaps the region containing the context.
* We kill the task with a SIGSEGV in this situation.
*/
- if (do_setcontext(new_ctx, regs, 0))
- do_exit(SIGSEGV);
+ if (do_setcontext(new_ctx, regs, 0)) {
+ force_exit_sig(SIGSEGV);
+ return -EFAULT;
+ }
set_thread_flag(TIF_RESTOREALL);
return 0;
+
+failed:
+ user_write_access_end();
+ return -EFAULT;
}
-long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
- struct pt_regs *regs)
+#ifdef CONFIG_PPC64
+COMPAT_SYSCALL_DEFINE0(rt_sigreturn)
+#else
+SYSCALL_DEFINE0(rt_sigreturn)
+#endif
{
struct rt_sigframe __user *rt_sf;
+ struct pt_regs *regs = current_pt_regs();
+ int tm_restore = 0;
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
struct ucontext __user *uc_transact;
unsigned long msr_hi;
unsigned long tmp;
- int tm_restore = 0;
#endif
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
rt_sf = (struct rt_sigframe __user *)
(regs->gpr[1] + __SIGNAL_FRAMESIZE + 16);
- if (!access_ok(VERIFY_READ, rt_sf, sizeof(*rt_sf)))
+ if (!access_ok(rt_sf, sizeof(*rt_sf)))
goto bad;
+
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ /*
+ * If there is a transactional state then throw it away.
+ * The purpose of a sigreturn is to destroy all traces of the
+ * signal frame, this includes any transactional state created
+ * within in. We only check for suspended as we can never be
+ * active in the kernel, we are active, there is nothing better to
+ * do than go ahead and Bad Thing later.
+ * The cause is not important as there will never be a
+ * recheckpoint so it's not user visible.
+ */
+ if (MSR_TM_SUSPENDED(mfmsr()))
+ tm_reclaim_current(0);
+
if (__get_user(tmp, &rt_sf->uc.uc_link))
goto bad;
uc_transact = (struct ucontext __user *)(uintptr_t)tmp;
@@ -1254,6 +1140,9 @@ long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
goto bad;
if (MSR_TM_ACTIVE(msr_hi<<32)) {
+ /* Trying to start TM on non TM system */
+ if (!cpu_has_feature(CPU_FTR_TM))
+ goto bad;
/* We only recheckpoint on return if we're
* transaction.
*/
@@ -1262,11 +1151,19 @@ long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
goto bad;
}
}
- if (!tm_restore)
- /* Fall through, for non-TM restore */
+ if (!tm_restore) {
+ /*
+ * Unset regs->msr because ucontext MSR TS is not
+ * set, and recheckpoint was not called. This avoid
+ * hitting a TM Bad thing at RFID
+ */
+ regs_set_return_msr(regs, regs->msr & ~MSR_TS_MASK);
+ }
+ /* Fall through, for non-TM restore */
#endif
- if (do_setcontext(&rt_sf->uc, regs, 1))
- goto bad;
+ if (!tm_restore)
+ if (do_setcontext(&rt_sf->uc, regs, 1))
+ goto bad;
/*
* It's not clear whether or why it is desirable to save the
@@ -1286,26 +1183,19 @@ long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
return 0;
bad:
- if (show_unhandled_signals)
- printk_ratelimited(KERN_INFO
- "%s[%d]: bad frame in sys_rt_sigreturn: "
- "%p nip %08lx lr %08lx\n",
- current->comm, current->pid,
- rt_sf, regs->nip, regs->link);
-
- force_sig(SIGSEGV, current);
+ signal_fault(current, regs, "sys_rt_sigreturn", rt_sf);
+
+ force_sig(SIGSEGV);
return 0;
}
#ifdef CONFIG_PPC32
-int sys_debug_setcontext(struct ucontext __user *ctx,
- int ndbg, struct sig_dbg_op __user *dbg,
- int r6, int r7, int r8,
- struct pt_regs *regs)
+SYSCALL_DEFINE3(debug_setcontext, struct ucontext __user *, ctx,
+ int, ndbg, struct sig_dbg_op __user *, dbg)
{
+ struct pt_regs *regs = current_pt_regs();
struct sig_dbg_op op;
int i;
- unsigned char tmp;
unsigned long new_msr = regs->msr;
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
unsigned long new_dbcr0 = current->thread.debug.dbcr0;
@@ -1356,14 +1246,13 @@ int sys_debug_setcontext(struct ucontext __user *ctx,
affect the contents of these registers. After this point,
failure is a problem, anyway, and it's very unlikely unless
the user is really doing something wrong. */
- regs->msr = new_msr;
+ regs_set_return_msr(regs, new_msr);
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
current->thread.debug.dbcr0 = new_dbcr0;
#endif
- if (!access_ok(VERIFY_READ, ctx, sizeof(*ctx))
- || __get_user(tmp, (u8 __user *) ctx)
- || __get_user(tmp, (u8 __user *) (ctx + 1) - 1))
+ if (!access_ok(ctx, sizeof(*ctx)) ||
+ fault_in_readable((char __user *)ctx, sizeof(*ctx)))
return -EFAULT;
/*
@@ -1378,14 +1267,9 @@ int sys_debug_setcontext(struct ucontext __user *ctx,
* We kill the task with a SIGSEGV in this situation.
*/
if (do_setcontext(ctx, regs, 1)) {
- if (show_unhandled_signals)
- printk_ratelimited(KERN_INFO "%s[%d]: bad frame in "
- "sys_debug_setcontext: %p nip %08lx "
- "lr %08lx\n",
- current->comm, current->pid,
- ctx, regs->nip, regs->link);
-
- force_sig(SIGSEGV, current);
+ signal_fault(current, regs, "sys_debug_setcontext", ctx);
+
+ force_sig(SIGSEGV);
goto out;
}
@@ -1405,110 +1289,29 @@ int sys_debug_setcontext(struct ucontext __user *ctx,
#endif
/*
- * OK, we're invoking a handler
+ * Do a signal return; undo the signal stack.
*/
-int handle_signal32(struct ksignal *ksig, sigset_t *oldset, struct pt_regs *regs)
-{
- struct sigcontext __user *sc;
- struct sigframe __user *frame;
- struct mcontext __user *tm_mctx = NULL;
- unsigned long newsp = 0;
- int sigret;
- unsigned long tramp;
-
- /* Set up Signal Frame */
- frame = get_sigframe(ksig, get_tm_stackpointer(regs), sizeof(*frame), 1);
- if (unlikely(frame == NULL))
- goto badframe;
- sc = (struct sigcontext __user *) &frame->sctx;
-
-#if _NSIG != 64
-#error "Please adjust handle_signal()"
-#endif
- if (__put_user(to_user_ptr(ksig->ka.sa.sa_handler), &sc->handler)
- || __put_user(oldset->sig[0], &sc->oldmask)
#ifdef CONFIG_PPC64
- || __put_user((oldset->sig[0] >> 32), &sc->_unused[3])
+COMPAT_SYSCALL_DEFINE0(sigreturn)
#else
- || __put_user(oldset->sig[1], &sc->_unused[3])
-#endif
- || __put_user(to_user_ptr(&frame->mctx), &sc->regs)
- || __put_user(ksig->sig, &sc->signal))
- goto badframe;
-
- if (vdso32_sigtramp && current->mm->context.vdso_base) {
- sigret = 0;
- tramp = current->mm->context.vdso_base + vdso32_sigtramp;
- } else {
- sigret = __NR_sigreturn;
- tramp = (unsigned long) frame->mctx.tramp;
- }
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- tm_mctx = &frame->mctx_transact;
- if (MSR_TM_ACTIVE(regs->msr)) {
- if (save_tm_user_regs(regs, &frame->mctx, &frame->mctx_transact,
- sigret))
- goto badframe;
- }
- else
+SYSCALL_DEFINE0(sigreturn)
#endif
- {
- if (save_user_regs(regs, &frame->mctx, tm_mctx, sigret, 1))
- goto badframe;
- }
-
- regs->link = tramp;
-
- current->thread.fp_state.fpscr = 0; /* turn off all fp exceptions */
-
- /* create a stack frame for the caller of the handler */
- newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE;
- if (put_user(regs->gpr[1], (u32 __user *)newsp))
- goto badframe;
-
- regs->gpr[1] = newsp;
- regs->gpr[3] = ksig->sig;
- regs->gpr[4] = (unsigned long) sc;
- regs->nip = (unsigned long) (unsigned long)ksig->ka.sa.sa_handler;
- /* enter the signal handler in big-endian mode */
- regs->msr &= ~MSR_LE;
- return 0;
-
-badframe:
- if (show_unhandled_signals)
- printk_ratelimited(KERN_INFO
- "%s[%d]: bad frame in handle_signal32: "
- "%p nip %08lx lr %08lx\n",
- current->comm, current->pid,
- frame, regs->nip, regs->link);
-
- return 1;
-}
-
-/*
- * Do a signal return; undo the signal stack.
- */
-long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
- struct pt_regs *regs)
{
+ struct pt_regs *regs = current_pt_regs();
struct sigframe __user *sf;
struct sigcontext __user *sc;
struct sigcontext sigctx;
struct mcontext __user *sr;
- void __user *addr;
sigset_t set;
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- struct mcontext __user *mcp, *tm_mcp;
- unsigned long msr_hi;
-#endif
+ struct mcontext __user *mcp;
+ struct mcontext __user *tm_mcp = NULL;
+ unsigned long long msr_hi = 0;
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
sf = (struct sigframe __user *)(regs->gpr[1] + __SIGNAL_FRAMESIZE);
sc = &sf->sctx;
- addr = sc;
if (copy_from_user(&sigctx, sc, sizeof(sigctx)))
goto badframe;
@@ -1524,37 +1327,33 @@ long sys_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
#endif
set_current_blocked(&set);
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
mcp = (struct mcontext __user *)&sf->mctx;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
tm_mcp = (struct mcontext __user *)&sf->mctx_transact;
if (__get_user(msr_hi, &tm_mcp->mc_gregs[PT_MSR]))
goto badframe;
+#endif
if (MSR_TM_ACTIVE(msr_hi<<32)) {
if (!cpu_has_feature(CPU_FTR_TM))
goto badframe;
if (restore_tm_user_regs(regs, mcp, tm_mcp))
goto badframe;
- } else
-#endif
- {
+ } else {
sr = (struct mcontext __user *)from_user_ptr(sigctx.regs);
- addr = sr;
- if (!access_ok(VERIFY_READ, sr, sizeof(*sr))
- || restore_user_regs(regs, sr, 1))
- goto badframe;
+ if (restore_user_regs(regs, sr, 1)) {
+ signal_fault(current, regs, "sys_sigreturn", sr);
+
+ force_sig(SIGSEGV);
+ return 0;
+ }
}
set_thread_flag(TIF_RESTOREALL);
return 0;
badframe:
- if (show_unhandled_signals)
- printk_ratelimited(KERN_INFO
- "%s[%d]: bad frame in sys_sigreturn: "
- "%p nip %08lx lr %08lx\n",
- current->comm, current->pid,
- addr, regs->nip, regs->link);
-
- force_sig(SIGSEGV, current);
+ signal_fault(current, regs, "sys_sigreturn", sc);
+
+ force_sig(SIGSEGV);
return 0;
}
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 2cb0c94cafa5..86bb5bb4c143 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* PowerPC version
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
@@ -5,11 +6,6 @@
* Derived from "arch/i386/kernel/signal.c"
* Copyright (C) 1991, 1992 Linus Torvalds
* 1997-11-28 Modified for POSIX.1b signals by Richard Henderson
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/sched.h>
@@ -24,17 +20,19 @@
#include <linux/elf.h>
#include <linux/ptrace.h>
#include <linux/ratelimit.h>
+#include <linux/syscalls.h>
+#include <linux/pagemap.h>
#include <asm/sigcontext.h>
#include <asm/ucontext.h>
-#include <asm/uaccess.h>
-#include <asm/pgtable.h>
+#include <linux/uaccess.h>
#include <asm/unistd.h>
#include <asm/cacheflush.h>
#include <asm/syscalls.h>
#include <asm/vdso.h>
#include <asm/switch_to.h>
#include <asm/tm.h>
+#include <asm/asm-prototypes.h>
#include "signal.h"
@@ -42,8 +40,8 @@
#define GP_REGS_SIZE min(sizeof(elf_gregset_t), sizeof(struct pt_regs))
#define FP_REGS_SIZE sizeof(elf_fpregset_t)
-#define TRAMP_TRACEBACK 3
-#define TRAMP_SIZE 6
+#define TRAMP_TRACEBACK 4
+#define TRAMP_SIZE 7
/*
* When we have signals to deliver, we set up on the user stack,
@@ -68,18 +66,54 @@ struct rt_sigframe {
char abigap[USER_REDZONE_SIZE];
} __attribute__ ((aligned (16)));
-static const char fmt32[] = KERN_INFO \
- "%s[%d]: bad frame in %s: %08lx nip %08lx lr %08lx\n";
-static const char fmt64[] = KERN_INFO \
- "%s[%d]: bad frame in %s: %016lx nip %016lx lr %016lx\n";
+unsigned long get_min_sigframe_size_64(void)
+{
+ return sizeof(struct rt_sigframe) + __SIGNAL_FRAMESIZE;
+}
+
+/*
+ * This computes a quad word aligned pointer inside the vmx_reserve array
+ * element. For historical reasons sigcontext might not be quad word aligned,
+ * but the location we write the VMX regs to must be. See the comment in
+ * sigcontext for more detail.
+ */
+#ifdef CONFIG_ALTIVEC
+static elf_vrreg_t __user *sigcontext_vmx_regs(struct sigcontext __user *sc)
+{
+ return (elf_vrreg_t __user *) (((unsigned long)sc->vmx_reserve + 15) & ~0xful);
+}
+#endif
+
+static void prepare_setup_sigcontext(struct task_struct *tsk)
+{
+#ifdef CONFIG_ALTIVEC
+ /* save altivec registers */
+ if (tsk->thread.used_vr)
+ flush_altivec_to_thread(tsk);
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ tsk->thread.vrsave = mfspr(SPRN_VRSAVE);
+#endif /* CONFIG_ALTIVEC */
+
+ flush_fp_to_thread(tsk);
+
+#ifdef CONFIG_VSX
+ if (tsk->thread.used_vsr)
+ flush_vsx_to_thread(tsk);
+#endif /* CONFIG_VSX */
+}
/*
* Set up the sigcontext for the signal frame.
*/
-static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
- int signr, sigset_t *set, unsigned long handler,
- int ctx_has_vsx_region)
+#define unsafe_setup_sigcontext(sc, tsk, signr, set, handler, ctx_has_vsx_region, label)\
+do { \
+ if (__unsafe_setup_sigcontext(sc, tsk, signr, set, handler, ctx_has_vsx_region))\
+ goto label; \
+} while (0)
+static long notrace __unsafe_setup_sigcontext(struct sigcontext __user *sc,
+ struct task_struct *tsk, int signr, sigset_t *set,
+ unsigned long handler, int ctx_has_vsx_region)
{
/* When CONFIG_ALTIVEC is set, we _always_ setup v_regs even if the
* process never used altivec yet (MSR_VEC is zero in pt_regs of
@@ -90,20 +124,23 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
* v_regs pointer or not
*/
#ifdef CONFIG_ALTIVEC
- elf_vrreg_t __user *v_regs = (elf_vrreg_t __user *)(((unsigned long)sc->vmx_reserve + 15) & ~0xful);
+ elf_vrreg_t __user *v_regs = sigcontext_vmx_regs(sc);
#endif
+ struct pt_regs *regs = tsk->thread.regs;
unsigned long msr = regs->msr;
- long err = 0;
+ /* Force usr to always see softe as 1 (interrupts enabled) */
+ unsigned long softe = 0x1;
+
+ BUG_ON(tsk != current);
#ifdef CONFIG_ALTIVEC
- err |= __put_user(v_regs, &sc->v_regs);
+ unsafe_put_user(v_regs, &sc->v_regs, efault_out);
/* save altivec registers */
- if (current->thread.used_vr) {
- flush_altivec_to_thread(current);
+ if (tsk->thread.used_vr) {
/* Copy 33 vec registers (vr0..31 and vscr) to the stack */
- err |= __copy_to_user(v_regs, &current->thread.vr_state,
- 33 * sizeof(vector128));
+ unsafe_copy_to_user(v_regs, &tsk->thread.vr_state,
+ 33 * sizeof(vector128), efault_out);
/* set MSR_VEC in the MSR value in the frame to indicate that sc->v_reg)
* contains valid data.
*/
@@ -112,15 +149,12 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
/* We always copy to/from vrsave, it's 0 if we don't have or don't
* use altivec.
*/
- if (cpu_has_feature(CPU_FTR_ALTIVEC))
- current->thread.vrsave = mfspr(SPRN_VRSAVE);
- err |= __put_user(current->thread.vrsave, (u32 __user *)&v_regs[33]);
+ unsafe_put_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33], efault_out);
#else /* CONFIG_ALTIVEC */
- err |= __put_user(0, &sc->v_regs);
+ unsafe_put_user(0, &sc->v_regs, efault_out);
#endif /* CONFIG_ALTIVEC */
- flush_fp_to_thread(current);
/* copy fpr regs and fpscr */
- err |= copy_fpr_to_user(&sc->fp_regs, current);
+ unsafe_copy_fpr_to_user(&sc->fp_regs, tsk, efault_out);
/*
* Clear the MSR VSX bit to indicate there is no valid state attached
@@ -133,26 +167,28 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
* then out to userspace. Update v_regs to point after the
* VMX data.
*/
- if (current->thread.used_vsr && ctx_has_vsx_region) {
- __giveup_vsx(current);
+ if (tsk->thread.used_vsr && ctx_has_vsx_region) {
v_regs += ELF_NVRREG;
- err |= copy_vsx_to_user(v_regs, current);
+ unsafe_copy_vsx_to_user(v_regs, tsk, efault_out);
/* set MSR_VSX in the MSR value in the frame to
* indicate that sc->vs_reg) contains valid data.
*/
msr |= MSR_VSX;
}
#endif /* CONFIG_VSX */
- err |= __put_user(&sc->gp_regs, &sc->regs);
- WARN_ON(!FULL_REGS(regs));
- err |= __copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE);
- err |= __put_user(msr, &sc->gp_regs[PT_MSR]);
- err |= __put_user(signr, &sc->signal);
- err |= __put_user(handler, &sc->handler);
+ unsafe_put_user(&sc->gp_regs, &sc->regs, efault_out);
+ unsafe_copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE, efault_out);
+ unsafe_put_user(msr, &sc->gp_regs[PT_MSR], efault_out);
+ unsafe_put_user(softe, &sc->gp_regs[PT_SOFTE], efault_out);
+ unsafe_put_user(signr, &sc->signal, efault_out);
+ unsafe_put_user(handler, &sc->handler, efault_out);
if (set != NULL)
- err |= __put_user(set->sig[0], &sc->oldmask);
+ unsafe_put_user(set->sig[0], &sc->oldmask, efault_out);
- return err;
+ return 0;
+
+efault_out:
+ return -EFAULT;
}
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -169,8 +205,9 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
*/
static long setup_tm_sigcontexts(struct sigcontext __user *sc,
struct sigcontext __user *tm_sc,
- struct pt_regs *regs,
- int signr, sigset_t *set, unsigned long handler)
+ struct task_struct *tsk,
+ int signr, sigset_t *set, unsigned long handler,
+ unsigned long msr)
{
/* When CONFIG_ALTIVEC is set, we _always_ setup v_regs even if the
* process never used altivec yet (MSR_VEC is zero in pt_regs of
@@ -181,45 +218,43 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
* v_regs pointer or not.
*/
#ifdef CONFIG_ALTIVEC
- elf_vrreg_t __user *v_regs = (elf_vrreg_t __user *)
- (((unsigned long)sc->vmx_reserve + 15) & ~0xful);
- elf_vrreg_t __user *tm_v_regs = (elf_vrreg_t __user *)
- (((unsigned long)tm_sc->vmx_reserve + 15) & ~0xful);
+ elf_vrreg_t __user *v_regs = sigcontext_vmx_regs(sc);
+ elf_vrreg_t __user *tm_v_regs = sigcontext_vmx_regs(tm_sc);
#endif
- unsigned long msr = regs->msr;
+ struct pt_regs *regs = tsk->thread.regs;
long err = 0;
- BUG_ON(!MSR_TM_ACTIVE(regs->msr));
+ BUG_ON(tsk != current);
- /* Remove TM bits from thread's MSR. The MSR in the sigcontext
- * just indicates to userland that we were doing a transaction, but we
- * don't want to return in transactional state. This also ensures
- * that flush_fp_to_thread won't set TIF_RESTORE_TM again.
- */
- regs->msr &= ~MSR_TS_MASK;
+ BUG_ON(!MSR_TM_ACTIVE(msr));
- flush_fp_to_thread(current);
+ WARN_ON(tm_suspend_disabled);
+
+ /* Restore checkpointed FP, VEC, and VSX bits from ckpt_regs as
+ * it contains the correct FP, VEC, VSX state after we treclaimed
+ * the transaction and giveup_all() was called on reclaiming.
+ */
+ msr |= tsk->thread.ckpt_regs.msr & (MSR_FP | MSR_VEC | MSR_VSX);
#ifdef CONFIG_ALTIVEC
err |= __put_user(v_regs, &sc->v_regs);
err |= __put_user(tm_v_regs, &tm_sc->v_regs);
/* save altivec registers */
- if (current->thread.used_vr) {
- flush_altivec_to_thread(current);
+ if (tsk->thread.used_vr) {
/* Copy 33 vec registers (vr0..31 and vscr) to the stack */
- err |= __copy_to_user(v_regs, &current->thread.vr_state,
+ err |= __copy_to_user(v_regs, &tsk->thread.ckvr_state,
33 * sizeof(vector128));
/* If VEC was enabled there are transactional VRs valid too,
* else they're a copy of the checkpointed VRs.
*/
if (msr & MSR_VEC)
err |= __copy_to_user(tm_v_regs,
- &current->thread.transact_vr,
+ &tsk->thread.vr_state,
33 * sizeof(vector128));
else
err |= __copy_to_user(tm_v_regs,
- &current->thread.vr_state,
+ &tsk->thread.ckvr_state,
33 * sizeof(vector128));
/* set MSR_VEC in the MSR value in the frame to indicate
@@ -231,13 +266,13 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
* use altivec.
*/
if (cpu_has_feature(CPU_FTR_ALTIVEC))
- current->thread.vrsave = mfspr(SPRN_VRSAVE);
- err |= __put_user(current->thread.vrsave, (u32 __user *)&v_regs[33]);
+ tsk->thread.ckvrsave = mfspr(SPRN_VRSAVE);
+ err |= __put_user(tsk->thread.ckvrsave, (u32 __user *)&v_regs[33]);
if (msr & MSR_VEC)
- err |= __put_user(current->thread.transact_vrsave,
+ err |= __put_user(tsk->thread.vrsave,
(u32 __user *)&tm_v_regs[33]);
else
- err |= __put_user(current->thread.vrsave,
+ err |= __put_user(tsk->thread.ckvrsave,
(u32 __user *)&tm_v_regs[33]);
#else /* CONFIG_ALTIVEC */
@@ -246,11 +281,11 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
#endif /* CONFIG_ALTIVEC */
/* copy fpr regs and fpscr */
- err |= copy_fpr_to_user(&sc->fp_regs, current);
+ err |= copy_ckfpr_to_user(&sc->fp_regs, tsk);
if (msr & MSR_FP)
- err |= copy_transact_fpr_to_user(&tm_sc->fp_regs, current);
+ err |= copy_fpr_to_user(&tm_sc->fp_regs, tsk);
else
- err |= copy_fpr_to_user(&tm_sc->fp_regs, current);
+ err |= copy_ckfpr_to_user(&tm_sc->fp_regs, tsk);
#ifdef CONFIG_VSX
/*
@@ -258,17 +293,16 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
* then out to userspace. Update v_regs to point after the
* VMX data.
*/
- if (current->thread.used_vsr) {
- __giveup_vsx(current);
+ if (tsk->thread.used_vsr) {
v_regs += ELF_NVRREG;
tm_v_regs += ELF_NVRREG;
- err |= copy_vsx_to_user(v_regs, current);
+ err |= copy_ckvsx_to_user(v_regs, tsk);
if (msr & MSR_VSX)
- err |= copy_transact_vsx_to_user(tm_v_regs, current);
+ err |= copy_vsx_to_user(tm_v_regs, tsk);
else
- err |= copy_vsx_to_user(tm_v_regs, current);
+ err |= copy_ckvsx_to_user(tm_v_regs, tsk);
/* set MSR_VSX in the MSR value in the frame to
* indicate that sc->vs_reg) contains valid data.
@@ -279,10 +313,9 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
err |= __put_user(&sc->gp_regs, &sc->regs);
err |= __put_user(&tm_sc->gp_regs, &tm_sc->regs);
- WARN_ON(!FULL_REGS(regs));
err |= __copy_to_user(&tm_sc->gp_regs, regs, GP_REGS_SIZE);
err |= __copy_to_user(&sc->gp_regs,
- &current->thread.ckpt_regs, GP_REGS_SIZE);
+ &tsk->thread.ckpt_regs, GP_REGS_SIZE);
err |= __put_user(msr, &tm_sc->gp_regs[PT_MSR]);
err |= __put_user(msr, &sc->gp_regs[PT_MSR]);
err |= __put_user(signr, &sc->signal);
@@ -297,85 +330,84 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
/*
* Restore the sigcontext from the signal frame.
*/
-
-static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig,
- struct sigcontext __user *sc)
+#define unsafe_restore_sigcontext(tsk, set, sig, sc, label) do { \
+ if (__unsafe_restore_sigcontext(tsk, set, sig, sc)) \
+ goto label; \
+} while (0)
+static long notrace __unsafe_restore_sigcontext(struct task_struct *tsk, sigset_t *set,
+ int sig, struct sigcontext __user *sc)
{
#ifdef CONFIG_ALTIVEC
elf_vrreg_t __user *v_regs;
#endif
- unsigned long err = 0;
unsigned long save_r13 = 0;
unsigned long msr;
+ struct pt_regs *regs = tsk->thread.regs;
#ifdef CONFIG_VSX
int i;
#endif
+ BUG_ON(tsk != current);
+
/* If this is not a signal return, we preserve the TLS in r13 */
if (!sig)
save_r13 = regs->gpr[13];
/* copy the GPRs */
- err |= __copy_from_user(regs->gpr, sc->gp_regs, sizeof(regs->gpr));
- err |= __get_user(regs->nip, &sc->gp_regs[PT_NIP]);
+ unsafe_copy_from_user(regs->gpr, sc->gp_regs, sizeof(regs->gpr), efault_out);
+ unsafe_get_user(regs->nip, &sc->gp_regs[PT_NIP], efault_out);
/* get MSR separately, transfer the LE bit if doing signal return */
- err |= __get_user(msr, &sc->gp_regs[PT_MSR]);
+ unsafe_get_user(msr, &sc->gp_regs[PT_MSR], efault_out);
if (sig)
- regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE);
- err |= __get_user(regs->orig_gpr3, &sc->gp_regs[PT_ORIG_R3]);
- err |= __get_user(regs->ctr, &sc->gp_regs[PT_CTR]);
- err |= __get_user(regs->link, &sc->gp_regs[PT_LNK]);
- err |= __get_user(regs->xer, &sc->gp_regs[PT_XER]);
- err |= __get_user(regs->ccr, &sc->gp_regs[PT_CCR]);
- /* skip SOFTE */
- regs->trap = 0;
- err |= __get_user(regs->dar, &sc->gp_regs[PT_DAR]);
- err |= __get_user(regs->dsisr, &sc->gp_regs[PT_DSISR]);
- err |= __get_user(regs->result, &sc->gp_regs[PT_RESULT]);
+ regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (msr & MSR_LE));
+ unsafe_get_user(regs->orig_gpr3, &sc->gp_regs[PT_ORIG_R3], efault_out);
+ unsafe_get_user(regs->ctr, &sc->gp_regs[PT_CTR], efault_out);
+ unsafe_get_user(regs->link, &sc->gp_regs[PT_LNK], efault_out);
+ unsafe_get_user(regs->xer, &sc->gp_regs[PT_XER], efault_out);
+ unsafe_get_user(regs->ccr, &sc->gp_regs[PT_CCR], efault_out);
+ /* Don't allow userspace to set SOFTE */
+ set_trap_norestart(regs);
+ unsafe_get_user(regs->dar, &sc->gp_regs[PT_DAR], efault_out);
+ unsafe_get_user(regs->dsisr, &sc->gp_regs[PT_DSISR], efault_out);
+ unsafe_get_user(regs->result, &sc->gp_regs[PT_RESULT], efault_out);
if (!sig)
regs->gpr[13] = save_r13;
if (set != NULL)
- err |= __get_user(set->sig[0], &sc->oldmask);
-
- /*
- * Do this before updating the thread state in
- * current->thread.fpr/vr. That way, if we get preempted
- * and another task grabs the FPU/Altivec, it won't be
- * tempted to save the current CPU state into the thread_struct
- * and corrupt what we are writing there.
- */
- discard_lazy_cpu_state();
+ unsafe_get_user(set->sig[0], &sc->oldmask, efault_out);
/*
- * Force reload of FP/VEC.
- * This has to be done before copying stuff into current->thread.fpr/vr
- * for the reasons explained in the previous comment.
+ * Force reload of FP/VEC/VSX so userspace sees any changes.
+ * Clear these bits from the user process' MSR before copying into the
+ * thread struct. If we are rescheduled or preempted and another task
+ * uses FP/VEC/VSX, and this process has the MSR bits set, then the
+ * context switch code will save the current CPU state into the
+ * thread_struct - possibly overwriting the data we are updating here.
*/
- regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX);
+ regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX));
#ifdef CONFIG_ALTIVEC
- err |= __get_user(v_regs, &sc->v_regs);
- if (err)
- return err;
- if (v_regs && !access_ok(VERIFY_READ, v_regs, 34 * sizeof(vector128)))
+ unsafe_get_user(v_regs, &sc->v_regs, efault_out);
+ if (v_regs && !access_ok(v_regs, 34 * sizeof(vector128)))
return -EFAULT;
/* Copy 33 vec registers (vr0..31 and vscr) from the stack */
- if (v_regs != NULL && (msr & MSR_VEC) != 0)
- err |= __copy_from_user(&current->thread.vr_state, v_regs,
- 33 * sizeof(vector128));
- else if (current->thread.used_vr)
- memset(&current->thread.vr_state, 0, 33 * sizeof(vector128));
+ if (v_regs != NULL && (msr & MSR_VEC) != 0) {
+ unsafe_copy_from_user(&tsk->thread.vr_state, v_regs,
+ 33 * sizeof(vector128), efault_out);
+ tsk->thread.used_vr = true;
+ } else if (tsk->thread.used_vr) {
+ memset(&tsk->thread.vr_state, 0, 33 * sizeof(vector128));
+ }
/* Always get VRSAVE back */
if (v_regs != NULL)
- err |= __get_user(current->thread.vrsave, (u32 __user *)&v_regs[33]);
+ unsafe_get_user(tsk->thread.vrsave, (u32 __user *)&v_regs[33], efault_out);
else
- current->thread.vrsave = 0;
+ tsk->thread.vrsave = 0;
if (cpu_has_feature(CPU_FTR_ALTIVEC))
- mtspr(SPRN_VRSAVE, current->thread.vrsave);
+ mtspr(SPRN_VRSAVE, tsk->thread.vrsave);
#endif /* CONFIG_ALTIVEC */
/* restore floating point */
- err |= copy_fpr_from_user(current, &sc->fp_regs);
+ unsafe_copy_fpr_from_user(tsk, &sc->fp_regs, efault_out);
#ifdef CONFIG_VSX
/*
* Get additional VSX data. Update v_regs to point after the
@@ -383,13 +415,18 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig,
* buffer for formatting, then into the taskstruct.
*/
v_regs += ELF_NVRREG;
- if ((msr & MSR_VSX) != 0)
- err |= copy_vsx_from_user(current, v_regs);
- else
+ if ((msr & MSR_VSX) != 0) {
+ unsafe_copy_vsx_from_user(tsk, v_regs, efault_out);
+ tsk->thread.used_vsr = true;
+ } else {
for (i = 0; i < 32 ; i++)
- current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
+ tsk->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
+ }
#endif
- return err;
+ return 0;
+
+efault_out:
+ return -EFAULT;
}
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -397,7 +434,7 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig,
* Restore the two sigcontexts from the frame of a transactional processes.
*/
-static long restore_tm_sigcontexts(struct pt_regs *regs,
+static long restore_tm_sigcontexts(struct task_struct *tsk,
struct sigcontext __user *sc,
struct sigcontext __user *tm_sc)
{
@@ -406,12 +443,19 @@ static long restore_tm_sigcontexts(struct pt_regs *regs,
#endif
unsigned long err = 0;
unsigned long msr;
+ struct pt_regs *regs = tsk->thread.regs;
#ifdef CONFIG_VSX
int i;
#endif
+
+ BUG_ON(tsk != current);
+
+ if (tm_suspend_disabled)
+ return -EINVAL;
+
/* copy the GPRs */
err |= __copy_from_user(regs->gpr, tm_sc->gp_regs, sizeof(regs->gpr));
- err |= __copy_from_user(&current->thread.ckpt_regs, sc->gp_regs,
+ err |= __copy_from_user(&tsk->thread.ckpt_regs, sc->gp_regs,
sizeof(regs->gpr));
/*
@@ -423,90 +467,82 @@ static long restore_tm_sigcontexts(struct pt_regs *regs,
* we don't need to re-copy them here.
*/
err |= __get_user(regs->nip, &tm_sc->gp_regs[PT_NIP]);
- err |= __get_user(current->thread.tm_tfhar, &sc->gp_regs[PT_NIP]);
+ err |= __get_user(tsk->thread.tm_tfhar, &sc->gp_regs[PT_NIP]);
/* get MSR separately, transfer the LE bit if doing signal return */
err |= __get_user(msr, &sc->gp_regs[PT_MSR]);
- /* pull in MSR TM from user context */
- regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr & MSR_TS_MASK);
+ /* Don't allow reserved mode. */
+ if (MSR_TM_RESV(msr))
+ return -EINVAL;
/* pull in MSR LE from user context */
- regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE);
+ regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (msr & MSR_LE));
/* The following non-GPR non-FPR non-VR state is also checkpointed: */
err |= __get_user(regs->ctr, &tm_sc->gp_regs[PT_CTR]);
err |= __get_user(regs->link, &tm_sc->gp_regs[PT_LNK]);
err |= __get_user(regs->xer, &tm_sc->gp_regs[PT_XER]);
err |= __get_user(regs->ccr, &tm_sc->gp_regs[PT_CCR]);
- err |= __get_user(current->thread.ckpt_regs.ctr,
+ err |= __get_user(tsk->thread.ckpt_regs.ctr,
&sc->gp_regs[PT_CTR]);
- err |= __get_user(current->thread.ckpt_regs.link,
+ err |= __get_user(tsk->thread.ckpt_regs.link,
&sc->gp_regs[PT_LNK]);
- err |= __get_user(current->thread.ckpt_regs.xer,
+ err |= __get_user(tsk->thread.ckpt_regs.xer,
&sc->gp_regs[PT_XER]);
- err |= __get_user(current->thread.ckpt_regs.ccr,
+ err |= __get_user(tsk->thread.ckpt_regs.ccr,
&sc->gp_regs[PT_CCR]);
-
+ /* Don't allow userspace to set SOFTE */
+ set_trap_norestart(regs);
/* These regs are not checkpointed; they can go in 'regs'. */
- err |= __get_user(regs->trap, &sc->gp_regs[PT_TRAP]);
err |= __get_user(regs->dar, &sc->gp_regs[PT_DAR]);
err |= __get_user(regs->dsisr, &sc->gp_regs[PT_DSISR]);
err |= __get_user(regs->result, &sc->gp_regs[PT_RESULT]);
/*
- * Do this before updating the thread state in
- * current->thread.fpr/vr. That way, if we get preempted
- * and another task grabs the FPU/Altivec, it won't be
- * tempted to save the current CPU state into the thread_struct
- * and corrupt what we are writing there.
- */
- discard_lazy_cpu_state();
-
- /*
* Force reload of FP/VEC.
- * This has to be done before copying stuff into current->thread.fpr/vr
+ * This has to be done before copying stuff into tsk->thread.fpr/vr
* for the reasons explained in the previous comment.
*/
- regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX);
+ regs_set_return_msr(regs, regs->msr & ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX));
#ifdef CONFIG_ALTIVEC
err |= __get_user(v_regs, &sc->v_regs);
err |= __get_user(tm_v_regs, &tm_sc->v_regs);
if (err)
return err;
- if (v_regs && !access_ok(VERIFY_READ, v_regs, 34 * sizeof(vector128)))
+ if (v_regs && !access_ok(v_regs, 34 * sizeof(vector128)))
return -EFAULT;
- if (tm_v_regs && !access_ok(VERIFY_READ,
- tm_v_regs, 34 * sizeof(vector128)))
+ if (tm_v_regs && !access_ok(tm_v_regs, 34 * sizeof(vector128)))
return -EFAULT;
/* Copy 33 vec registers (vr0..31 and vscr) from the stack */
if (v_regs != NULL && tm_v_regs != NULL && (msr & MSR_VEC) != 0) {
- err |= __copy_from_user(&current->thread.vr_state, v_regs,
+ err |= __copy_from_user(&tsk->thread.ckvr_state, v_regs,
33 * sizeof(vector128));
- err |= __copy_from_user(&current->thread.transact_vr, tm_v_regs,
+ err |= __copy_from_user(&tsk->thread.vr_state, tm_v_regs,
33 * sizeof(vector128));
+ current->thread.used_vr = true;
}
- else if (current->thread.used_vr) {
- memset(&current->thread.vr_state, 0, 33 * sizeof(vector128));
- memset(&current->thread.transact_vr, 0, 33 * sizeof(vector128));
+ else if (tsk->thread.used_vr) {
+ memset(&tsk->thread.vr_state, 0, 33 * sizeof(vector128));
+ memset(&tsk->thread.ckvr_state, 0, 33 * sizeof(vector128));
}
/* Always get VRSAVE back */
if (v_regs != NULL && tm_v_regs != NULL) {
- err |= __get_user(current->thread.vrsave,
+ err |= __get_user(tsk->thread.ckvrsave,
(u32 __user *)&v_regs[33]);
- err |= __get_user(current->thread.transact_vrsave,
+ err |= __get_user(tsk->thread.vrsave,
(u32 __user *)&tm_v_regs[33]);
}
else {
- current->thread.vrsave = 0;
- current->thread.transact_vrsave = 0;
+ tsk->thread.vrsave = 0;
+ tsk->thread.ckvrsave = 0;
}
if (cpu_has_feature(CPU_FTR_ALTIVEC))
- mtspr(SPRN_VRSAVE, current->thread.vrsave);
+ mtspr(SPRN_VRSAVE, tsk->thread.vrsave);
#endif /* CONFIG_ALTIVEC */
/* restore floating point */
- err |= copy_fpr_from_user(current, &sc->fp_regs);
- err |= copy_transact_fpr_from_user(current, &tm_sc->fp_regs);
+ err |= copy_fpr_from_user(tsk, &tm_sc->fp_regs);
+ err |= copy_ckfpr_from_user(tsk, &sc->fp_regs);
#ifdef CONFIG_VSX
/*
* Get additional VSX data. Update v_regs to point after the
@@ -516,35 +552,70 @@ static long restore_tm_sigcontexts(struct pt_regs *regs,
if (v_regs && ((msr & MSR_VSX) != 0)) {
v_regs += ELF_NVRREG;
tm_v_regs += ELF_NVRREG;
- err |= copy_vsx_from_user(current, v_regs);
- err |= copy_transact_vsx_from_user(current, tm_v_regs);
+ err |= copy_vsx_from_user(tsk, tm_v_regs);
+ err |= copy_ckvsx_from_user(tsk, v_regs);
+ tsk->thread.used_vsr = true;
} else {
for (i = 0; i < 32 ; i++) {
- current->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
- current->thread.transact_fp.fpr[i][TS_VSRLOWOFFSET] = 0;
+ tsk->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
+ tsk->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = 0;
}
}
#endif
tm_enable();
/* Make sure the transaction is marked as failed */
- current->thread.tm_texasr |= TEXASR_FS;
+ tsk->thread.tm_texasr |= TEXASR_FS;
+
+ /*
+ * Disabling preemption, since it is unsafe to be preempted
+ * with MSR[TS] set without recheckpointing.
+ */
+ preempt_disable();
+
+ /* pull in MSR TS bits from user context */
+ regs_set_return_msr(regs, regs->msr | (msr & MSR_TS_MASK));
+
+ /*
+ * Ensure that TM is enabled in regs->msr before we leave the signal
+ * handler. It could be the case that (a) user disabled the TM bit
+ * through the manipulation of the MSR bits in uc_mcontext or (b) the
+ * TM bit was disabled because a sufficient number of context switches
+ * happened whilst in the signal handler and load_tm overflowed,
+ * disabling the TM bit. In either case we can end up with an illegal
+ * TM state leading to a TM Bad Thing when we return to userspace.
+ *
+ * CAUTION:
+ * After regs->MSR[TS] being updated, make sure that get_user(),
+ * put_user() or similar functions are *not* called. These
+ * functions can generate page faults which will cause the process
+ * to be de-scheduled with MSR[TS] set but without calling
+ * tm_recheckpoint(). This can cause a bug.
+ */
+ regs_set_return_msr(regs, regs->msr | MSR_TM);
+
/* This loads the checkpointed FP/VEC state, if used */
- tm_recheckpoint(&current->thread, msr);
+ tm_recheckpoint(&tsk->thread);
- /* This loads the speculative FP/VEC state, if used */
+ msr_check_and_set(msr & (MSR_FP | MSR_VEC));
if (msr & MSR_FP) {
- do_load_up_transact_fpu(&current->thread);
- regs->msr |= (MSR_FP | current->thread.fpexc_mode);
+ load_fp_state(&tsk->thread.fp_state);
+ regs_set_return_msr(regs, regs->msr | (MSR_FP | tsk->thread.fpexc_mode));
}
-#ifdef CONFIG_ALTIVEC
if (msr & MSR_VEC) {
- do_load_up_transact_altivec(&current->thread);
- regs->msr |= MSR_VEC;
+ load_vr_state(&tsk->thread.vr_state);
+ regs_set_return_msr(regs, regs->msr | MSR_VEC);
}
-#endif
+
+ preempt_enable();
return err;
}
+#else /* !CONFIG_PPC_TRANSACTIONAL_MEM */
+static long restore_tm_sigcontexts(struct task_struct *tsk, struct sigcontext __user *sc,
+ struct sigcontext __user *tm_sc)
+{
+ return -EINVAL;
+}
#endif
/*
@@ -555,12 +626,12 @@ static long setup_trampoline(unsigned int syscall, unsigned int __user *tramp)
int i;
long err = 0;
- /* addi r1, r1, __SIGNAL_FRAMESIZE # Pop the dummy stackframe */
- err |= __put_user(0x38210000UL | (__SIGNAL_FRAMESIZE & 0xffff), &tramp[0]);
- /* li r0, __NR_[rt_]sigreturn| */
- err |= __put_user(0x38000000UL | (syscall & 0xffff), &tramp[1]);
- /* sc */
- err |= __put_user(0x44000002UL, &tramp[2]);
+ /* Call the handler and pop the dummy stackframe*/
+ err |= __put_user(PPC_RAW_BCTRL(), &tramp[0]);
+ err |= __put_user(PPC_RAW_ADDI(_R1, _R1, __SIGNAL_FRAMESIZE), &tramp[1]);
+
+ err |= __put_user(PPC_RAW_LI(_R0, syscall), &tramp[2]);
+ err |= __put_user(PPC_RAW_SC(), &tramp[3]);
/* Minimal traceback info */
for (i=TRAMP_TRACEBACK; i < TRAMP_SIZE ;i++)
@@ -583,11 +654,9 @@ static long setup_trampoline(unsigned int syscall, unsigned int __user *tramp)
/*
* Handle {get,set,swap}_context operations
*/
-int sys_swapcontext(struct ucontext __user *old_ctx,
- struct ucontext __user *new_ctx,
- long ctx_size, long r6, long r7, long r8, struct pt_regs *regs)
+SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
+ struct ucontext __user *, new_ctx, long, ctx_size)
{
- unsigned char tmp;
sigset_t set;
unsigned long new_msr = 0;
int ctx_has_vsx_region = 0;
@@ -613,18 +682,21 @@ int sys_swapcontext(struct ucontext __user *old_ctx,
ctx_has_vsx_region = 1;
if (old_ctx != NULL) {
- if (!access_ok(VERIFY_WRITE, old_ctx, ctx_size)
- || setup_sigcontext(&old_ctx->uc_mcontext, regs, 0, NULL, 0,
- ctx_has_vsx_region)
- || __copy_to_user(&old_ctx->uc_sigmask,
- &current->blocked, sizeof(sigset_t)))
+ prepare_setup_sigcontext(current);
+ if (!user_write_access_begin(old_ctx, ctx_size))
return -EFAULT;
+
+ unsafe_setup_sigcontext(&old_ctx->uc_mcontext, current, 0, NULL,
+ 0, ctx_has_vsx_region, efault_out);
+ unsafe_copy_to_user(&old_ctx->uc_sigmask, &current->blocked,
+ sizeof(sigset_t), efault_out);
+
+ user_write_access_end();
}
if (new_ctx == NULL)
return 0;
- if (!access_ok(VERIFY_READ, new_ctx, ctx_size)
- || __get_user(tmp, (u8 __user *) new_ctx)
- || __get_user(tmp, (u8 __user *) new_ctx + ctx_size - 1))
+ if (!access_ok(new_ctx, ctx_size) ||
+ fault_in_readable((char __user *)new_ctx, ctx_size))
return -EFAULT;
/*
@@ -639,15 +711,29 @@ int sys_swapcontext(struct ucontext __user *old_ctx,
* We kill the task with a SIGSEGV in this situation.
*/
- if (__copy_from_user(&set, &new_ctx->uc_sigmask, sizeof(set)))
- do_exit(SIGSEGV);
+ if (__get_user_sigset(&set, &new_ctx->uc_sigmask)) {
+ force_exit_sig(SIGSEGV);
+ return -EFAULT;
+ }
set_current_blocked(&set);
- if (restore_sigcontext(regs, NULL, 0, &new_ctx->uc_mcontext))
- do_exit(SIGSEGV);
+
+ if (!user_read_access_begin(new_ctx, ctx_size))
+ return -EFAULT;
+ if (__unsafe_restore_sigcontext(current, NULL, 0, &new_ctx->uc_mcontext)) {
+ user_read_access_end();
+ force_exit_sig(SIGSEGV);
+ return -EFAULT;
+ }
+ user_read_access_end();
/* This returns like rt_sigreturn */
set_thread_flag(TIF_RESTOREALL);
+
return 0;
+
+efault_out:
+ user_write_access_end();
+ return -EFAULT;
}
@@ -655,112 +741,193 @@ int sys_swapcontext(struct ucontext __user *old_ctx,
* Do a signal return; undo the signal stack.
*/
-int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5,
- unsigned long r6, unsigned long r7, unsigned long r8,
- struct pt_regs *regs)
+SYSCALL_DEFINE0(rt_sigreturn)
{
+ struct pt_regs *regs = current_pt_regs();
struct ucontext __user *uc = (struct ucontext __user *)regs->gpr[1];
sigset_t set;
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
unsigned long msr;
-#endif
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
- if (!access_ok(VERIFY_READ, uc, sizeof(*uc)))
+ if (!access_ok(uc, sizeof(*uc)))
goto badframe;
- if (__copy_from_user(&set, &uc->uc_sigmask, sizeof(set)))
+ if (__get_user_sigset(&set, &uc->uc_sigmask))
goto badframe;
set_current_blocked(&set);
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR]))
- goto badframe;
- if (MSR_TM_ACTIVE(msr)) {
+
+ if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM)) {
+ /*
+ * If there is a transactional state then throw it away.
+ * The purpose of a sigreturn is to destroy all traces of the
+ * signal frame, this includes any transactional state created
+ * within in. We only check for suspended as we can never be
+ * active in the kernel, we are active, there is nothing better to
+ * do than go ahead and Bad Thing later.
+ * The cause is not important as there will never be a
+ * recheckpoint so it's not user visible.
+ */
+ if (MSR_TM_SUSPENDED(mfmsr()))
+ tm_reclaim_current(0);
+
+ /*
+ * Disable MSR[TS] bit also, so, if there is an exception in the
+ * code below (as a page fault in copy_ckvsx_to_user()), it does
+ * not recheckpoint this task if there was a context switch inside
+ * the exception.
+ *
+ * A major page fault can indirectly call schedule(). A reschedule
+ * process in the middle of an exception can have a side effect
+ * (Changing the CPU MSR[TS] state), since schedule() is called
+ * with the CPU MSR[TS] disable and returns with MSR[TS]=Suspended
+ * (switch_to() calls tm_recheckpoint() for the 'new' process). In
+ * this case, the process continues to be the same in the CPU, but
+ * the CPU state just changed.
+ *
+ * This can cause a TM Bad Thing, since the MSR in the stack will
+ * have the MSR[TS]=0, and this is what will be used to RFID.
+ *
+ * Clearing MSR[TS] state here will avoid a recheckpoint if there
+ * is any process reschedule in kernel space. The MSR[TS] state
+ * does not need to be saved also, since it will be replaced with
+ * the MSR[TS] that came from user context later, at
+ * restore_tm_sigcontexts.
+ */
+ regs_set_return_msr(regs, regs->msr & ~MSR_TS_MASK);
+
+ if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR]))
+ goto badframe;
+ }
+
+ if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && MSR_TM_ACTIVE(msr)) {
/* We recheckpoint on return. */
struct ucontext __user *uc_transact;
+
+ /* Trying to start TM on non TM system */
+ if (!cpu_has_feature(CPU_FTR_TM))
+ goto badframe;
+
if (__get_user(uc_transact, &uc->uc_link))
goto badframe;
- if (restore_tm_sigcontexts(regs, &uc->uc_mcontext,
+ if (restore_tm_sigcontexts(current, &uc->uc_mcontext,
&uc_transact->uc_mcontext))
goto badframe;
+ } else {
+ /*
+ * Fall through, for non-TM restore
+ *
+ * Unset MSR[TS] on the thread regs since MSR from user
+ * context does not have MSR active, and recheckpoint was
+ * not called since restore_tm_sigcontexts() was not called
+ * also.
+ *
+ * If not unsetting it, the code can RFID to userspace with
+ * MSR[TS] set, but without CPU in the proper state,
+ * causing a TM bad thing.
+ */
+ regs_set_return_msr(current->thread.regs,
+ current->thread.regs->msr & ~MSR_TS_MASK);
+ if (!user_read_access_begin(&uc->uc_mcontext, sizeof(uc->uc_mcontext)))
+ goto badframe;
+
+ unsafe_restore_sigcontext(current, NULL, 1, &uc->uc_mcontext,
+ badframe_block);
+
+ user_read_access_end();
}
- else
- /* Fall through, for non-TM restore */
-#endif
- if (restore_sigcontext(regs, NULL, 1, &uc->uc_mcontext))
- goto badframe;
if (restore_altstack(&uc->uc_stack))
goto badframe;
set_thread_flag(TIF_RESTOREALL);
+
return 0;
+badframe_block:
+ user_read_access_end();
badframe:
- if (show_unhandled_signals)
- printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32,
- current->comm, current->pid, "rt_sigreturn",
- (long)uc, regs->nip, regs->link);
+ signal_fault(current, regs, "rt_sigreturn", uc);
- force_sig(SIGSEGV, current);
+ force_sig(SIGSEGV);
return 0;
}
-int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs)
+int handle_rt_signal64(struct ksignal *ksig, sigset_t *set,
+ struct task_struct *tsk)
{
struct rt_sigframe __user *frame;
unsigned long newsp = 0;
long err = 0;
+ struct pt_regs *regs = tsk->thread.regs;
+ /* Save the thread's msr before get_tm_stackpointer() changes it */
+ unsigned long msr = regs->msr;
- frame = get_sigframe(ksig, get_tm_stackpointer(regs), sizeof(*frame), 0);
- if (unlikely(frame == NULL))
- goto badframe;
+ frame = get_sigframe(ksig, tsk, sizeof(*frame), 0);
- err |= __put_user(&frame->info, &frame->pinfo);
- err |= __put_user(&frame->uc, &frame->puc);
- err |= copy_siginfo_to_user(&frame->info, &ksig->info);
- if (err)
+ /*
+ * This only applies when calling unsafe_setup_sigcontext() and must be
+ * called before opening the uaccess window.
+ */
+ if (!MSR_TM_ACTIVE(msr))
+ prepare_setup_sigcontext(tsk);
+
+ if (!user_write_access_begin(frame, sizeof(*frame)))
goto badframe;
+ unsafe_put_user(&frame->info, &frame->pinfo, badframe_block);
+ unsafe_put_user(&frame->uc, &frame->puc, badframe_block);
+
/* Create the ucontext. */
- err |= __put_user(0, &frame->uc.uc_flags);
- err |= __save_altstack(&frame->uc.uc_stack, regs->gpr[1]);
+ unsafe_put_user(0, &frame->uc.uc_flags, badframe_block);
+ unsafe_save_altstack(&frame->uc.uc_stack, regs->gpr[1], badframe_block);
+
+ if (MSR_TM_ACTIVE(msr)) {
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- if (MSR_TM_ACTIVE(regs->msr)) {
/* The ucontext_t passed to userland points to the second
* ucontext_t (for transactional state) with its uc_link ptr.
*/
- err |= __put_user(&frame->uc_transact, &frame->uc.uc_link);
+ unsafe_put_user(&frame->uc_transact, &frame->uc.uc_link, badframe_block);
+
+ user_write_access_end();
+
err |= setup_tm_sigcontexts(&frame->uc.uc_mcontext,
&frame->uc_transact.uc_mcontext,
- regs, ksig->sig,
- NULL,
- (unsigned long)ksig->ka.sa.sa_handler);
- } else
+ tsk, ksig->sig, NULL,
+ (unsigned long)ksig->ka.sa.sa_handler,
+ msr);
+
+ if (!user_write_access_begin(&frame->uc.uc_sigmask,
+ sizeof(frame->uc.uc_sigmask)))
+ goto badframe;
+
#endif
- {
- err |= __put_user(0, &frame->uc.uc_link);
- err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, ksig->sig,
+ } else {
+ unsafe_put_user(0, &frame->uc.uc_link, badframe_block);
+ unsafe_setup_sigcontext(&frame->uc.uc_mcontext, tsk, ksig->sig,
NULL, (unsigned long)ksig->ka.sa.sa_handler,
- 1);
+ 1, badframe_block);
}
- err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
- if (err)
+
+ unsafe_copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set), badframe_block);
+ user_write_access_end();
+
+ /* Save the siginfo outside of the unsafe block. */
+ if (copy_siginfo_to_user(&frame->info, &ksig->info))
goto badframe;
/* Make sure signal handler doesn't get spurious FP exceptions */
- current->thread.fp_state.fpscr = 0;
+ tsk->thread.fp_state.fpscr = 0;
/* Set up to return from userspace. */
- if (vdso64_rt_sigtramp && current->mm->context.vdso_base) {
- regs->link = current->mm->context.vdso_base + vdso64_rt_sigtramp;
+ if (tsk->mm->context.vdso) {
+ regs_set_return_ip(regs, VDSO64_SYMBOL(tsk->mm->context.vdso, sigtramp_rt64));
} else {
err |= setup_trampoline(__NR_rt_sigreturn, &frame->tramp[0]);
if (err)
goto badframe;
- regs->link = (unsigned long) &frame->tramp[0];
+ regs_set_return_ip(regs, (unsigned long) &frame->tramp[0]);
}
/* Allocate a dummy caller frame for the signal handler. */
@@ -769,30 +936,29 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs
/* Set up "regs" so we "return" to the signal handler. */
if (is_elf2_task()) {
- regs->nip = (unsigned long) ksig->ka.sa.sa_handler;
- regs->gpr[12] = regs->nip;
+ regs->ctr = (unsigned long) ksig->ka.sa.sa_handler;
+ regs->gpr[12] = regs->ctr;
} else {
/* Handler is *really* a pointer to the function descriptor for
* the signal routine. The first entry in the function
* descriptor is the entry address of signal and the second
* entry is the TOC value we need to use.
*/
- func_descr_t __user *funct_desc_ptr =
- (func_descr_t __user *) ksig->ka.sa.sa_handler;
+ struct func_desc __user *ptr =
+ (struct func_desc __user *)ksig->ka.sa.sa_handler;
- err |= get_user(regs->nip, &funct_desc_ptr->entry);
- err |= get_user(regs->gpr[2], &funct_desc_ptr->toc);
+ err |= get_user(regs->ctr, &ptr->addr);
+ err |= get_user(regs->gpr[2], &ptr->toc);
}
/* enter the signal handler in native-endian mode */
- regs->msr &= ~MSR_LE;
- regs->msr |= (MSR_KERNEL & MSR_LE);
+ regs_set_return_msr(regs, (regs->msr & ~MSR_LE) | (MSR_KERNEL & MSR_LE));
regs->gpr[1] = newsp;
regs->gpr[3] = ksig->sig;
regs->result = 0;
if (ksig->ka.sa.sa_flags & SA_SIGINFO) {
- err |= get_user(regs->gpr[4], (unsigned long __user *)&frame->pinfo);
- err |= get_user(regs->gpr[5], (unsigned long __user *)&frame->puc);
+ regs->gpr[4] = (unsigned long)&frame->info;
+ regs->gpr[5] = (unsigned long)&frame->uc;
regs->gpr[6] = (unsigned long) frame;
} else {
regs->gpr[4] = (unsigned long)&frame->uc.uc_mcontext;
@@ -802,11 +968,10 @@ int handle_rt_signal64(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs
return 0;
+badframe_block:
+ user_write_access_end();
badframe:
- if (show_unhandled_signals)
- printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32,
- current->comm, current->pid, "setup_rt_frame",
- (long)frame, regs->nip, regs->link);
+ signal_fault(current, regs, "handle_rt_signal64", frame);
return 1;
}
diff --git a/arch/powerpc/kernel/smp-tbsync.c b/arch/powerpc/kernel/smp-tbsync.c
index 7a37ecd3afa3..21c39355b25e 100644
--- a/arch/powerpc/kernel/smp-tbsync.c
+++ b/arch/powerpc/kernel/smp-tbsync.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Smp timebase synchronization for ppc.
*
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index a0738af4aba6..68edb66c2964 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* SMP support for ppc.
*
@@ -8,18 +9,15 @@
*
* PowerPC-64 Support added by Dave Engebretsen, Peter Bergner, and
* Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#undef DEBUG
#include <linux/kernel.h>
#include <linux/export.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/task_stack.h>
+#include <linux/sched/topology.h>
#include <linux/smp.h>
#include <linux/interrupt.h>
#include <linux/delay.h>
@@ -31,18 +29,25 @@
#include <linux/cpu.h>
#include <linux/notifier.h>
#include <linux/topology.h>
+#include <linux/profile.h>
+#include <linux/processor.h>
+#include <linux/random.h>
+#include <linux/stackprotector.h>
+#include <linux/pgtable.h>
+#include <linux/clockchips.h>
+#include <linux/kexec.h>
#include <asm/ptrace.h>
#include <linux/atomic.h>
#include <asm/irq.h>
#include <asm/hw_irq.h>
#include <asm/kvm_ppc.h>
+#include <asm/dbell.h>
#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/prom.h>
#include <asm/smp.h>
#include <asm/time.h>
#include <asm/machdep.h>
+#include <asm/mmu_context.h>
#include <asm/cputhreads.h>
#include <asm/cputable.h>
#include <asm/mpic.h>
@@ -52,6 +57,13 @@
#endif
#include <asm/vdso.h>
#include <asm/debug.h>
+#include <asm/cpu_has_feature.h>
+#include <asm/ftrace.h>
+#include <asm/kup.h>
+#include <asm/fadump.h>
+#include <asm/systemcfg.h>
+
+#include <trace/events/ipi.h>
#ifdef DEBUG
#include <asm/udbg.h>
@@ -65,13 +77,60 @@
static DEFINE_PER_CPU(int, cpu_state) = { 0 };
#endif
-struct thread_info *secondary_ti;
+struct task_struct *secondary_current;
+bool has_big_cores __ro_after_init;
+bool coregroup_enabled __ro_after_init;
+bool thread_group_shares_l2 __ro_after_init;
+bool thread_group_shares_l3 __ro_after_init;
DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
+DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
+DEFINE_PER_CPU(cpumask_var_t, cpu_l2_cache_map);
DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
+static DEFINE_PER_CPU(cpumask_var_t, cpu_coregroup_map);
EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
+EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map);
EXPORT_PER_CPU_SYMBOL(cpu_core_map);
+EXPORT_SYMBOL_GPL(has_big_cores);
+
+#define MAX_THREAD_LIST_SIZE 8
+#define THREAD_GROUP_SHARE_L1 1
+#define THREAD_GROUP_SHARE_L2_L3 2
+struct thread_groups {
+ unsigned int property;
+ unsigned int nr_groups;
+ unsigned int threads_per_group;
+ unsigned int thread_list[MAX_THREAD_LIST_SIZE];
+};
+
+/* Maximum number of properties that groups of threads within a core can share */
+#define MAX_THREAD_GROUP_PROPERTIES 2
+
+struct thread_groups_list {
+ unsigned int nr_properties;
+ struct thread_groups property_tgs[MAX_THREAD_GROUP_PROPERTIES];
+};
+
+static struct thread_groups_list tgl[NR_CPUS] __initdata;
+/*
+ * On big-cores system, thread_group_l1_cache_map for each CPU corresponds to
+ * the set its siblings that share the L1-cache.
+ */
+DEFINE_PER_CPU(cpumask_var_t, thread_group_l1_cache_map);
+
+/*
+ * On some big-cores system, thread_group_l2_cache_map for each CPU
+ * corresponds to the set its siblings within the core that share the
+ * L2-cache.
+ */
+DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
+
+/*
+ * On P10, thread_group_l3_cache_map for each CPU is equal to the
+ * thread_group_l2_cache_map
+ */
+DEFINE_PER_CPU(cpumask_var_t, thread_group_l3_cache_map);
/* SMP operations for this machine */
struct smp_ops_t *smp_ops;
@@ -81,8 +140,6 @@ volatile unsigned int cpu_callin_map[NR_CPUS];
int smt_enabled_at_boot = 1;
-static void (*crash_ipi_function_ptr)(struct pt_regs *) = NULL;
-
/*
* Returns 1 if the specified cpu should be brought up during boot.
* Used to inhibit booting threads if they've been disabled or
@@ -93,7 +150,7 @@ int smp_generic_cpu_bootable(unsigned int nr)
/* Special case - we inhibit secondary thread startup
* during boot if the user requests it.
*/
- if (system_state == SYSTEM_BOOTING && cpu_has_feature(CPU_FTR_SMT)) {
+ if (system_state < SYSTEM_RUNNING && cpu_has_feature(CPU_FTR_SMT)) {
if (!smt_enabled_at_boot && cpu_thread_in_core(nr) != 0)
return 0;
if (smt_enabled_at_boot
@@ -108,15 +165,16 @@ int smp_generic_cpu_bootable(unsigned int nr)
#ifdef CONFIG_PPC64
int smp_generic_kick_cpu(int nr)
{
- BUG_ON(nr < 0 || nr >= NR_CPUS);
+ if (nr < 0 || nr >= nr_cpu_ids)
+ return -EINVAL;
/*
* The processor is currently spinning, waiting for the
* cpu_start field to become non-zero After we set cpu_start,
* the processor will continue on to secondary_start
*/
- if (!paca[nr].cpu_start) {
- paca[nr].cpu_start = 1;
+ if (!paca_ptrs[nr]->cpu_start) {
+ paca_ptrs[nr]->cpu_start = 1;
smp_mb();
return 0;
}
@@ -147,38 +205,47 @@ static irqreturn_t reschedule_action(int irq, void *data)
return IRQ_HANDLED;
}
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
static irqreturn_t tick_broadcast_ipi_action(int irq, void *data)
{
- tick_broadcast_ipi_handler();
+ timer_broadcast_interrupt();
return IRQ_HANDLED;
}
+#endif
-static irqreturn_t debug_ipi_action(int irq, void *data)
+#ifdef CONFIG_NMI_IPI
+static irqreturn_t nmi_ipi_action(int irq, void *data)
{
- if (crash_ipi_function_ptr) {
- crash_ipi_function_ptr(get_irq_regs());
- return IRQ_HANDLED;
- }
-
-#ifdef CONFIG_DEBUGGER
- debugger_ipi(get_irq_regs());
-#endif /* CONFIG_DEBUGGER */
-
+ smp_handle_nmi_ipi(get_irq_regs());
return IRQ_HANDLED;
}
+#endif
static irq_handler_t smp_ipi_action[] = {
[PPC_MSG_CALL_FUNCTION] = call_function_action,
[PPC_MSG_RESCHEDULE] = reschedule_action,
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
[PPC_MSG_TICK_BROADCAST] = tick_broadcast_ipi_action,
- [PPC_MSG_DEBUGGER_BREAK] = debug_ipi_action,
+#endif
+#ifdef CONFIG_NMI_IPI
+ [PPC_MSG_NMI_IPI] = nmi_ipi_action,
+#endif
};
+/*
+ * The NMI IPI is a fallback and not truly non-maskable. It is simpler
+ * than going through the call function infrastructure, and strongly
+ * serialized, so it is more appropriate for debugging.
+ */
const char *smp_ipi_name[] = {
[PPC_MSG_CALL_FUNCTION] = "ipi call function",
[PPC_MSG_RESCHEDULE] = "ipi reschedule",
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
[PPC_MSG_TICK_BROADCAST] = "ipi tick-broadcast",
- [PPC_MSG_DEBUGGER_BREAK] = "ipi debugger",
+#endif
+#ifdef CONFIG_NMI_IPI
+ [PPC_MSG_NMI_IPI] = "nmi ipi",
+#endif
};
/* optional function to request ipi, for controllers with >= 4 ipis */
@@ -186,14 +253,13 @@ int smp_request_message_ipi(int virq, int msg)
{
int err;
- if (msg < 0 || msg > PPC_MSG_DEBUGGER_BREAK) {
+ if (msg < 0 || msg > PPC_MSG_NMI_IPI)
return -EINVAL;
- }
-#if !defined(CONFIG_DEBUGGER) && !defined(CONFIG_KEXEC)
- if (msg == PPC_MSG_DEBUGGER_BREAK) {
+#ifndef CONFIG_NMI_IPI
+ if (msg == PPC_MSG_NMI_IPI)
return 1;
- }
#endif
+
err = request_irq(virq, smp_ipi_action[msg],
IRQF_PERCPU | IRQF_NO_THREAD | IRQF_NO_SUSPEND,
smp_ipi_name[msg], NULL);
@@ -205,19 +271,11 @@ int smp_request_message_ipi(int virq, int msg)
#ifdef CONFIG_PPC_SMP_MUXED_IPI
struct cpu_messages {
- int messages; /* current messages */
- unsigned long data; /* data for cause ipi */
+ long messages; /* current messages */
};
static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_messages, ipi_message);
-void smp_muxed_ipi_set_data(int cpu, unsigned long data)
-{
- struct cpu_messages *info = &per_cpu(ipi_message, cpu);
-
- info->data = data;
-}
-
-void smp_muxed_ipi_message_pass(int cpu, int msg)
+void smp_muxed_ipi_set_message(int cpu, int msg)
{
struct cpu_messages *info = &per_cpu(ipi_message, cpu);
char *message = (char *)&info->messages;
@@ -226,38 +284,66 @@ void smp_muxed_ipi_message_pass(int cpu, int msg)
* Order previous accesses before accesses in the IPI handler.
*/
smp_mb();
- message[msg] = 1;
+ WRITE_ONCE(message[msg], 1);
+}
+
+void smp_muxed_ipi_message_pass(int cpu, int msg)
+{
+ smp_muxed_ipi_set_message(cpu, msg);
+
/*
* cause_ipi functions are required to include a full barrier
* before doing whatever causes the IPI.
*/
- smp_ops->cause_ipi(cpu, info->data);
+ smp_ops->cause_ipi(cpu);
}
#ifdef __BIG_ENDIAN__
-#define IPI_MESSAGE(A) (1 << (24 - 8 * (A)))
+#define IPI_MESSAGE(A) (1uL << ((BITS_PER_LONG - 8) - 8 * (A)))
#else
-#define IPI_MESSAGE(A) (1 << (8 * (A)))
+#define IPI_MESSAGE(A) (1uL << (8 * (A)))
#endif
irqreturn_t smp_ipi_demux(void)
{
- struct cpu_messages *info = &__get_cpu_var(ipi_message);
- unsigned int all;
-
mb(); /* order any irq clear */
+ return smp_ipi_demux_relaxed();
+}
+
+/* sync-free variant. Callers should ensure synchronization */
+irqreturn_t smp_ipi_demux_relaxed(void)
+{
+ struct cpu_messages *info;
+ unsigned long all;
+
+ info = this_cpu_ptr(&ipi_message);
do {
all = xchg(&info->messages, 0);
+#if defined(CONFIG_KVM_XICS) && defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE)
+ /*
+ * Must check for PPC_MSG_RM_HOST_ACTION messages
+ * before PPC_MSG_CALL_FUNCTION messages because when
+ * a VM is destroyed, we call kick_all_cpus_sync()
+ * to ensure that any pending PPC_MSG_RM_HOST_ACTION
+ * messages have completed before we free any VCPUs.
+ */
+ if (all & IPI_MESSAGE(PPC_MSG_RM_HOST_ACTION))
+ kvmppc_xics_ipi_action();
+#endif
if (all & IPI_MESSAGE(PPC_MSG_CALL_FUNCTION))
generic_smp_call_function_interrupt();
if (all & IPI_MESSAGE(PPC_MSG_RESCHEDULE))
scheduler_ipi();
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
if (all & IPI_MESSAGE(PPC_MSG_TICK_BROADCAST))
- tick_broadcast_ipi_handler();
- if (all & IPI_MESSAGE(PPC_MSG_DEBUGGER_BREAK))
- debug_ipi_action(0, NULL);
- } while (info->messages);
+ timer_broadcast_interrupt();
+#endif
+#ifdef CONFIG_NMI_IPI
+ if (all & IPI_MESSAGE(PPC_MSG_NMI_IPI))
+ nmi_ipi_action(0, NULL);
+#endif
+ } while (READ_ONCE(info->messages));
return IRQ_HANDLED;
}
@@ -273,12 +359,12 @@ static inline void do_message_pass(int cpu, int msg)
#endif
}
-void smp_send_reschedule(int cpu)
+void arch_smp_send_reschedule(int cpu)
{
if (likely(smp_ops))
do_message_pass(cpu, PPC_MSG_RESCHEDULE);
}
-EXPORT_SYMBOL_GPL(smp_send_reschedule);
+EXPORT_SYMBOL_GPL(arch_smp_send_reschedule);
void arch_send_call_function_single_ipi(int cpu)
{
@@ -293,6 +379,194 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
do_message_pass(cpu, PPC_MSG_CALL_FUNCTION);
}
+#ifdef CONFIG_NMI_IPI
+
+/*
+ * "NMI IPI" system.
+ *
+ * NMI IPIs may not be recoverable, so should not be used as ongoing part of
+ * a running system. They can be used for crash, debug, halt/reboot, etc.
+ *
+ * The IPI call waits with interrupts disabled until all targets enter the
+ * NMI handler, then returns. Subsequent IPIs can be issued before targets
+ * have returned from their handlers, so there is no guarantee about
+ * concurrency or re-entrancy.
+ *
+ * A new NMI can be issued before all targets exit the handler.
+ *
+ * The IPI call may time out without all targets entering the NMI handler.
+ * In that case, there is some logic to recover (and ignore subsequent
+ * NMI interrupts that may eventually be raised), but the platform interrupt
+ * handler may not be able to distinguish this from other exception causes,
+ * which may cause a crash.
+ */
+
+static atomic_t __nmi_ipi_lock = ATOMIC_INIT(0);
+static struct cpumask nmi_ipi_pending_mask;
+static bool nmi_ipi_busy = false;
+static void (*nmi_ipi_function)(struct pt_regs *) = NULL;
+
+noinstr static void nmi_ipi_lock_start(unsigned long *flags)
+{
+ raw_local_irq_save(*flags);
+ hard_irq_disable();
+ while (raw_atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1) {
+ raw_local_irq_restore(*flags);
+ spin_until_cond(raw_atomic_read(&__nmi_ipi_lock) == 0);
+ raw_local_irq_save(*flags);
+ hard_irq_disable();
+ }
+}
+
+noinstr static void nmi_ipi_lock(void)
+{
+ while (raw_atomic_cmpxchg(&__nmi_ipi_lock, 0, 1) == 1)
+ spin_until_cond(raw_atomic_read(&__nmi_ipi_lock) == 0);
+}
+
+noinstr static void nmi_ipi_unlock(void)
+{
+ smp_mb();
+ WARN_ON(raw_atomic_read(&__nmi_ipi_lock) != 1);
+ raw_atomic_set(&__nmi_ipi_lock, 0);
+}
+
+noinstr static void nmi_ipi_unlock_end(unsigned long *flags)
+{
+ nmi_ipi_unlock();
+ raw_local_irq_restore(*flags);
+}
+
+/*
+ * Platform NMI handler calls this to ack
+ */
+noinstr int smp_handle_nmi_ipi(struct pt_regs *regs)
+{
+ void (*fn)(struct pt_regs *) = NULL;
+ unsigned long flags;
+ int me = raw_smp_processor_id();
+ int ret = 0;
+
+ /*
+ * Unexpected NMIs are possible here because the interrupt may not
+ * be able to distinguish NMI IPIs from other types of NMIs, or
+ * because the caller may have timed out.
+ */
+ nmi_ipi_lock_start(&flags);
+ if (cpumask_test_cpu(me, &nmi_ipi_pending_mask)) {
+ cpumask_clear_cpu(me, &nmi_ipi_pending_mask);
+ fn = READ_ONCE(nmi_ipi_function);
+ WARN_ON_ONCE(!fn);
+ ret = 1;
+ }
+ nmi_ipi_unlock_end(&flags);
+
+ if (fn)
+ fn(regs);
+
+ return ret;
+}
+
+static void do_smp_send_nmi_ipi(int cpu, bool safe)
+{
+ if (!safe && smp_ops->cause_nmi_ipi && smp_ops->cause_nmi_ipi(cpu))
+ return;
+
+ if (cpu >= 0) {
+ do_message_pass(cpu, PPC_MSG_NMI_IPI);
+ } else {
+ int c;
+
+ for_each_online_cpu(c) {
+ if (c == raw_smp_processor_id())
+ continue;
+ do_message_pass(c, PPC_MSG_NMI_IPI);
+ }
+ }
+}
+
+/*
+ * - cpu is the target CPU (must not be this CPU), or NMI_IPI_ALL_OTHERS.
+ * - fn is the target callback function.
+ * - delay_us > 0 is the delay before giving up waiting for targets to
+ * begin executing the handler, == 0 specifies indefinite delay.
+ */
+static int __smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *),
+ u64 delay_us, bool safe)
+{
+ unsigned long flags;
+ int me = raw_smp_processor_id();
+ int ret = 1;
+
+ BUG_ON(cpu == me);
+ BUG_ON(cpu < 0 && cpu != NMI_IPI_ALL_OTHERS);
+
+ if (unlikely(!smp_ops))
+ return 0;
+
+ nmi_ipi_lock_start(&flags);
+ while (nmi_ipi_busy) {
+ nmi_ipi_unlock_end(&flags);
+ spin_until_cond(!nmi_ipi_busy);
+ nmi_ipi_lock_start(&flags);
+ }
+ nmi_ipi_busy = true;
+ nmi_ipi_function = fn;
+
+ WARN_ON_ONCE(!cpumask_empty(&nmi_ipi_pending_mask));
+
+ if (cpu < 0) {
+ /* ALL_OTHERS */
+ cpumask_copy(&nmi_ipi_pending_mask, cpu_online_mask);
+ cpumask_clear_cpu(me, &nmi_ipi_pending_mask);
+ } else {
+ cpumask_set_cpu(cpu, &nmi_ipi_pending_mask);
+ }
+
+ nmi_ipi_unlock();
+
+ /* Interrupts remain hard disabled */
+
+ do_smp_send_nmi_ipi(cpu, safe);
+
+ nmi_ipi_lock();
+ /* nmi_ipi_busy is set here, so unlock/lock is okay */
+ while (!cpumask_empty(&nmi_ipi_pending_mask)) {
+ nmi_ipi_unlock();
+ udelay(1);
+ nmi_ipi_lock();
+ if (delay_us) {
+ delay_us--;
+ if (!delay_us)
+ break;
+ }
+ }
+
+ if (!cpumask_empty(&nmi_ipi_pending_mask)) {
+ /* Timeout waiting for CPUs to call smp_handle_nmi_ipi */
+ ret = 0;
+ cpumask_clear(&nmi_ipi_pending_mask);
+ }
+
+ nmi_ipi_function = NULL;
+ nmi_ipi_busy = false;
+
+ nmi_ipi_unlock_end(&flags);
+
+ return ret;
+}
+
+int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us)
+{
+ return __smp_send_nmi_ipi(cpu, fn, delay_us, false);
+}
+
+int smp_send_safe_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us)
+{
+ return __smp_send_nmi_ipi(cpu, fn, delay_us, true);
+}
+#endif /* CONFIG_NMI_IPI */
+
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
void tick_broadcast(const struct cpumask *mask)
{
@@ -303,66 +577,522 @@ void tick_broadcast(const struct cpumask *mask)
}
#endif
-#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
-void smp_send_debugger_break(void)
+#ifdef CONFIG_DEBUGGER
+static void debugger_ipi_callback(struct pt_regs *regs)
{
- int cpu;
- int me = raw_smp_processor_id();
-
- if (unlikely(!smp_ops))
- return;
+ debugger_ipi(regs);
+}
- for_each_online_cpu(cpu)
- if (cpu != me)
- do_message_pass(cpu, PPC_MSG_DEBUGGER_BREAK);
+void smp_send_debugger_break(void)
+{
+ smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, debugger_ipi_callback, 1000000);
}
#endif
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_CRASH_DUMP
void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
{
- crash_ipi_function_ptr = crash_ipi_callback;
- if (crash_ipi_callback) {
- mb();
- smp_send_debugger_break();
+ int cpu;
+
+ smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_ipi_callback, 1000000);
+ if (kdump_in_progress() && crash_wake_offline) {
+ for_each_present_cpu(cpu) {
+ if (cpu_online(cpu))
+ continue;
+ /*
+ * crash_ipi_callback will wait for
+ * all cpus, including offline CPUs.
+ * We don't care about nmi_ipi_function.
+ * Offline cpus will jump straight into
+ * crash_ipi_callback, we can skip the
+ * entire NMI dance and waiting for
+ * cpus to clear pending mask, etc.
+ */
+ do_smp_send_nmi_ipi(cpu, false);
+ }
}
}
#endif
+void crash_smp_send_stop(void)
+{
+ static bool stopped = false;
+
+ /*
+ * In case of fadump, register data for all CPUs is captured by f/w
+ * on ibm,os-term rtas call. Skip IPI callbacks to other CPUs before
+ * this rtas call to avoid tricky post processing of those CPUs'
+ * backtraces.
+ */
+ if (should_fadump_crash())
+ return;
+
+ if (stopped)
+ return;
+
+ stopped = true;
+
+#ifdef CONFIG_CRASH_DUMP
+ if (kexec_crash_image) {
+ crash_kexec_prepare();
+ return;
+ }
+#endif
+
+ smp_send_stop();
+}
+
+#ifdef CONFIG_NMI_IPI
+static void nmi_stop_this_cpu(struct pt_regs *regs)
+{
+ /*
+ * IRQs are already hard disabled by the smp_handle_nmi_ipi.
+ */
+ set_cpu_online(smp_processor_id(), false);
+
+ spin_begin();
+ while (1)
+ spin_cpu_relax();
+}
+
+void smp_send_stop(void)
+{
+ smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, nmi_stop_this_cpu, 1000000);
+}
+
+#else /* CONFIG_NMI_IPI */
+
static void stop_this_cpu(void *dummy)
{
- /* Remove this CPU */
+ hard_irq_disable();
+
+ /*
+ * Offlining CPUs in stop_this_cpu can result in scheduler warnings,
+ * (see commit de6e5d38417e), but printk_safe_flush_on_panic() wants
+ * to know other CPUs are offline before it breaks locks to flush
+ * printk buffers, in case we panic()ed while holding the lock.
+ */
set_cpu_online(smp_processor_id(), false);
- local_irq_disable();
+ spin_begin();
while (1)
- ;
+ spin_cpu_relax();
}
void smp_send_stop(void)
{
+ static bool stopped = false;
+
+ /*
+ * Prevent waiting on csd lock from a previous smp_send_stop.
+ * This is racy, but in general callers try to do the right
+ * thing and only fire off one smp_send_stop (e.g., see
+ * kernel/panic.c)
+ */
+ if (stopped)
+ return;
+
+ stopped = true;
+
smp_call_function(stop_this_cpu, NULL, 0);
}
+#endif /* CONFIG_NMI_IPI */
-struct thread_info *current_set[NR_CPUS];
+static struct task_struct *current_set[NR_CPUS];
static void smp_store_cpu_info(int id)
{
per_cpu(cpu_pvr, id) = mfspr(SPRN_PVR);
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#ifdef CONFIG_PPC_E500
per_cpu(next_tlbcam_idx, id)
= (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) - 1;
#endif
}
+/*
+ * Relationships between CPUs are maintained in a set of per-cpu cpumasks so
+ * rather than just passing around the cpumask we pass around a function that
+ * returns the that cpumask for the given CPU.
+ */
+static void set_cpus_related(int i, int j, struct cpumask *(*get_cpumask)(int))
+{
+ cpumask_set_cpu(i, get_cpumask(j));
+ cpumask_set_cpu(j, get_cpumask(i));
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static void set_cpus_unrelated(int i, int j,
+ struct cpumask *(*get_cpumask)(int))
+{
+ cpumask_clear_cpu(i, get_cpumask(j));
+ cpumask_clear_cpu(j, get_cpumask(i));
+}
+#endif
+
+/*
+ * Extends set_cpus_related. Instead of setting one CPU at a time in
+ * dstmask, set srcmask at oneshot. dstmask should be super set of srcmask.
+ */
+static void or_cpumasks_related(int i, int j, struct cpumask *(*srcmask)(int),
+ struct cpumask *(*dstmask)(int))
+{
+ struct cpumask *mask;
+ int k;
+
+ mask = srcmask(j);
+ for_each_cpu(k, srcmask(i))
+ cpumask_or(dstmask(k), dstmask(k), mask);
+
+ if (i == j)
+ return;
+
+ mask = srcmask(i);
+ for_each_cpu(k, srcmask(j))
+ cpumask_or(dstmask(k), dstmask(k), mask);
+}
+
+/*
+ * parse_thread_groups: Parses the "ibm,thread-groups" device tree
+ * property for the CPU device node @dn and stores
+ * the parsed output in the thread_groups_list
+ * structure @tglp.
+ *
+ * @dn: The device node of the CPU device.
+ * @tglp: Pointer to a thread group list structure into which the parsed
+ * output of "ibm,thread-groups" is stored.
+ *
+ * ibm,thread-groups[0..N-1] array defines which group of threads in
+ * the CPU-device node can be grouped together based on the property.
+ *
+ * This array can represent thread groupings for multiple properties.
+ *
+ * ibm,thread-groups[i + 0] tells us the property based on which the
+ * threads are being grouped together. If this value is 1, it implies
+ * that the threads in the same group share L1, translation cache. If
+ * the value is 2, it implies that the threads in the same group share
+ * the same L2 cache.
+ *
+ * ibm,thread-groups[i+1] tells us how many such thread groups exist for the
+ * property ibm,thread-groups[i]
+ *
+ * ibm,thread-groups[i+2] tells us the number of threads in each such
+ * group.
+ * Suppose k = (ibm,thread-groups[i+1] * ibm,thread-groups[i+2]), then,
+ *
+ * ibm,thread-groups[i+3..i+k+2] (is the list of threads identified by
+ * "ibm,ppc-interrupt-server#s" arranged as per their membership in
+ * the grouping.
+ *
+ * Example:
+ * If "ibm,thread-groups" = [1,2,4,8,10,12,14,9,11,13,15,2,2,4,8,10,12,14,9,11,13,15]
+ * This can be decomposed up into two consecutive arrays:
+ * a) [1,2,4,8,10,12,14,9,11,13,15]
+ * b) [2,2,4,8,10,12,14,9,11,13,15]
+ *
+ * where in,
+ *
+ * a) provides information of Property "1" being shared by "2" groups,
+ * each with "4" threads each. The "ibm,ppc-interrupt-server#s" of
+ * the first group is {8,10,12,14} and the
+ * "ibm,ppc-interrupt-server#s" of the second group is
+ * {9,11,13,15}. Property "1" is indicative of the thread in the
+ * group sharing L1 cache, translation cache and Instruction Data
+ * flow.
+ *
+ * b) provides information of Property "2" being shared by "2" groups,
+ * each group with "4" threads. The "ibm,ppc-interrupt-server#s" of
+ * the first group is {8,10,12,14} and the
+ * "ibm,ppc-interrupt-server#s" of the second group is
+ * {9,11,13,15}. Property "2" indicates that the threads in each
+ * group share the L2-cache.
+ *
+ * Returns 0 on success, -EINVAL if the property does not exist,
+ * -ENODATA if property does not have a value, and -EOVERFLOW if the
+ * property data isn't large enough.
+ */
+static int parse_thread_groups(struct device_node *dn,
+ struct thread_groups_list *tglp)
+{
+ unsigned int property_idx = 0;
+ u32 *thread_group_array;
+ size_t total_threads;
+ int ret = 0, count;
+ u32 *thread_list;
+ int i = 0;
+
+ count = of_property_count_u32_elems(dn, "ibm,thread-groups");
+ thread_group_array = kcalloc(count, sizeof(u32), GFP_KERNEL);
+ ret = of_property_read_u32_array(dn, "ibm,thread-groups",
+ thread_group_array, count);
+ if (ret)
+ goto out_free;
+
+ while (i < count && property_idx < MAX_THREAD_GROUP_PROPERTIES) {
+ int j;
+ struct thread_groups *tg = &tglp->property_tgs[property_idx++];
+
+ tg->property = thread_group_array[i];
+ tg->nr_groups = thread_group_array[i + 1];
+ tg->threads_per_group = thread_group_array[i + 2];
+ total_threads = tg->nr_groups * tg->threads_per_group;
+
+ thread_list = &thread_group_array[i + 3];
+
+ for (j = 0; j < total_threads; j++)
+ tg->thread_list[j] = thread_list[j];
+ i = i + 3 + total_threads;
+ }
+
+ tglp->nr_properties = property_idx;
+
+out_free:
+ kfree(thread_group_array);
+ return ret;
+}
+
+/*
+ * get_cpu_thread_group_start : Searches the thread group in tg->thread_list
+ * that @cpu belongs to.
+ *
+ * @cpu : The logical CPU whose thread group is being searched.
+ * @tg : The thread-group structure of the CPU node which @cpu belongs
+ * to.
+ *
+ * Returns the index to tg->thread_list that points to the start
+ * of the thread_group that @cpu belongs to.
+ *
+ * Returns -1 if cpu doesn't belong to any of the groups pointed to by
+ * tg->thread_list.
+ */
+static int get_cpu_thread_group_start(int cpu, struct thread_groups *tg)
+{
+ int hw_cpu_id = get_hard_smp_processor_id(cpu);
+ int i, j;
+
+ for (i = 0; i < tg->nr_groups; i++) {
+ int group_start = i * tg->threads_per_group;
+
+ for (j = 0; j < tg->threads_per_group; j++) {
+ int idx = group_start + j;
+
+ if (tg->thread_list[idx] == hw_cpu_id)
+ return group_start;
+ }
+ }
+
+ return -1;
+}
+
+static struct thread_groups *__init get_thread_groups(int cpu,
+ int group_property,
+ int *err)
+{
+ struct device_node *dn = of_get_cpu_node(cpu, NULL);
+ struct thread_groups_list *cpu_tgl = &tgl[cpu];
+ struct thread_groups *tg = NULL;
+ int i;
+ *err = 0;
+
+ if (!dn) {
+ *err = -ENODATA;
+ return NULL;
+ }
+
+ if (!cpu_tgl->nr_properties) {
+ *err = parse_thread_groups(dn, cpu_tgl);
+ if (*err)
+ goto out;
+ }
+
+ for (i = 0; i < cpu_tgl->nr_properties; i++) {
+ if (cpu_tgl->property_tgs[i].property == group_property) {
+ tg = &cpu_tgl->property_tgs[i];
+ break;
+ }
+ }
+
+ if (!tg)
+ *err = -EINVAL;
+out:
+ of_node_put(dn);
+ return tg;
+}
+
+static int __init update_mask_from_threadgroup(cpumask_var_t *mask, struct thread_groups *tg,
+ int cpu, int cpu_group_start)
+{
+ int first_thread = cpu_first_thread_sibling(cpu);
+ int i;
+
+ zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu));
+
+ for (i = first_thread; i < first_thread + threads_per_core; i++) {
+ int i_group_start = get_cpu_thread_group_start(i, tg);
+
+ if (unlikely(i_group_start == -1)) {
+ WARN_ON_ONCE(1);
+ return -ENODATA;
+ }
+
+ if (i_group_start == cpu_group_start)
+ cpumask_set_cpu(i, *mask);
+ }
+
+ return 0;
+}
+
+static int __init init_thread_group_cache_map(int cpu, int cache_property)
+
+{
+ int cpu_group_start = -1, err = 0;
+ struct thread_groups *tg = NULL;
+ cpumask_var_t *mask = NULL;
+
+ if (cache_property != THREAD_GROUP_SHARE_L1 &&
+ cache_property != THREAD_GROUP_SHARE_L2_L3)
+ return -EINVAL;
+
+ tg = get_thread_groups(cpu, cache_property, &err);
+
+ if (!tg)
+ return err;
+
+ cpu_group_start = get_cpu_thread_group_start(cpu, tg);
+
+ if (unlikely(cpu_group_start == -1)) {
+ WARN_ON_ONCE(1);
+ return -ENODATA;
+ }
+
+ if (cache_property == THREAD_GROUP_SHARE_L1) {
+ mask = &per_cpu(thread_group_l1_cache_map, cpu);
+ update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start);
+ }
+ else if (cache_property == THREAD_GROUP_SHARE_L2_L3) {
+ mask = &per_cpu(thread_group_l2_cache_map, cpu);
+ update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start);
+ mask = &per_cpu(thread_group_l3_cache_map, cpu);
+ update_mask_from_threadgroup(mask, tg, cpu, cpu_group_start);
+ }
+
+
+ return 0;
+}
+
+static bool shared_caches __ro_after_init;
+
+#ifdef CONFIG_SCHED_SMT
+/* cpumask of CPUs with asymmetric SMT dependency */
+static int powerpc_smt_flags(void)
+{
+ int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_LLC;
+
+ if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
+ printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
+ flags |= SD_ASYM_PACKING;
+ }
+ return flags;
+}
+#endif
+
+/*
+ * On shared processor LPARs scheduled on a big core (which has two or more
+ * independent thread groups per core), prefer lower numbered CPUs, so
+ * that workload consolidates to lesser number of cores.
+ */
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(splpar_asym_pack);
+
+/*
+ * P9 has a slightly odd architecture where pairs of cores share an L2 cache.
+ * This topology makes it *much* cheaper to migrate tasks between adjacent cores
+ * since the migrated task remains cache hot. We want to take advantage of this
+ * at the scheduler level so an extra topology level is required.
+ */
+static int powerpc_shared_cache_flags(void)
+{
+ if (static_branch_unlikely(&splpar_asym_pack))
+ return SD_SHARE_LLC | SD_ASYM_PACKING;
+
+ return SD_SHARE_LLC;
+}
+
+static int powerpc_shared_proc_flags(void)
+{
+ if (static_branch_unlikely(&splpar_asym_pack))
+ return SD_ASYM_PACKING;
+
+ return 0;
+}
+
+/*
+ * We can't just pass cpu_l2_cache_mask() directly because
+ * returns a non-const pointer and the compiler barfs on that.
+ */
+static const struct cpumask *tl_cache_mask(struct sched_domain_topology_level *tl, int cpu)
+{
+ return per_cpu(cpu_l2_cache_map, cpu);
+}
+
+#ifdef CONFIG_SCHED_SMT
+static const struct cpumask *tl_smallcore_smt_mask(struct sched_domain_topology_level *tl, int cpu)
+{
+ return cpu_smallcore_mask(cpu);
+}
+#endif
+
+struct cpumask *cpu_coregroup_mask(int cpu)
+{
+ return per_cpu(cpu_coregroup_map, cpu);
+}
+
+static bool has_coregroup_support(void)
+{
+ /* Coregroup identification not available on shared systems */
+ if (is_shared_processor())
+ return 0;
+
+ return coregroup_enabled;
+}
+
+static int __init init_big_cores(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L1);
+
+ if (err)
+ return err;
+
+ zalloc_cpumask_var_node(&per_cpu(cpu_smallcore_map, cpu),
+ GFP_KERNEL,
+ cpu_to_node(cpu));
+ }
+
+ has_big_cores = true;
+
+ for_each_possible_cpu(cpu) {
+ int err = init_thread_group_cache_map(cpu, THREAD_GROUP_SHARE_L2_L3);
+
+ if (err)
+ return err;
+ }
+
+ thread_group_shares_l2 = true;
+ thread_group_shares_l3 = true;
+ pr_debug("L2/L3 cache only shared by the threads in the small core\n");
+
+ return 0;
+}
+
void __init smp_prepare_cpus(unsigned int max_cpus)
{
- unsigned int cpu;
+ unsigned int cpu, num_threads;
DBG("smp_prepare_cpus\n");
/*
- * setup_cpu may need to be called on the boot cpu. We havent
+ * setup_cpu may need to be called on the boot cpu. We haven't
* spun any cpus up but lets be paranoid.
*/
BUG_ON(boot_cpuid != smp_processor_id());
@@ -374,30 +1104,72 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
for_each_possible_cpu(cpu) {
zalloc_cpumask_var_node(&per_cpu(cpu_sibling_map, cpu),
GFP_KERNEL, cpu_to_node(cpu));
+ zalloc_cpumask_var_node(&per_cpu(cpu_l2_cache_map, cpu),
+ GFP_KERNEL, cpu_to_node(cpu));
zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu),
GFP_KERNEL, cpu_to_node(cpu));
+ if (has_coregroup_support())
+ zalloc_cpumask_var_node(&per_cpu(cpu_coregroup_map, cpu),
+ GFP_KERNEL, cpu_to_node(cpu));
+
+#ifdef CONFIG_NUMA
/*
* numa_node_id() works after this.
*/
- set_cpu_numa_node(cpu, numa_cpu_lookup_table[cpu]);
- set_cpu_numa_mem(cpu, local_memory_node(numa_cpu_lookup_table[cpu]));
+ if (cpu_present(cpu)) {
+ set_cpu_numa_node(cpu, numa_cpu_lookup_table[cpu]);
+ set_cpu_numa_mem(cpu,
+ local_memory_node(numa_cpu_lookup_table[cpu]));
+ }
+#endif
}
+ /* Init the cpumasks so the boot CPU is related to itself */
cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid));
+ cpumask_set_cpu(boot_cpuid, cpu_l2_cache_mask(boot_cpuid));
cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid));
+ if (has_coregroup_support())
+ cpumask_set_cpu(boot_cpuid, cpu_coregroup_mask(boot_cpuid));
+
+ init_big_cores();
+ if (has_big_cores) {
+ cpumask_set_cpu(boot_cpuid,
+ cpu_smallcore_mask(boot_cpuid));
+ }
+
+ if (cpu_to_chip_id(boot_cpuid) != -1) {
+ int idx = DIV_ROUND_UP(num_possible_cpus(), threads_per_core);
+
+ /*
+ * All threads of a core will all belong to the same core,
+ * chip_id_lookup_table will have one entry per core.
+ * Assumption: if boot_cpuid doesn't have a chip-id, then no
+ * other CPUs, will also not have chip-id.
+ */
+ chip_id_lookup_table = kcalloc(idx, sizeof(int), GFP_KERNEL);
+ if (chip_id_lookup_table)
+ memset(chip_id_lookup_table, -1, sizeof(int) * idx);
+ }
+
if (smp_ops && smp_ops->probe)
smp_ops->probe();
+
+ // Initalise the generic SMT topology support
+ num_threads = 1;
+ if (smt_enabled_at_boot)
+ num_threads = smt_enabled_at_boot;
+ cpu_smt_set_num_threads(num_threads, threads_per_core);
}
-void smp_prepare_boot_cpu(void)
+void __init smp_prepare_boot_cpu(void)
{
BUG_ON(smp_processor_id() != boot_cpuid);
#ifdef CONFIG_PPC64
- paca[boot_cpuid].__current = current;
+ paca_ptrs[boot_cpuid]->__current = current;
#endif
set_numa_node(numa_cpu_lookup_table[boot_cpuid]);
- current_set[boot_cpuid] = task_thread_info(current);
+ current_set[boot_cpuid] = current;
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -410,10 +1182,24 @@ int generic_cpu_disable(void)
return -EBUSY;
set_cpu_online(cpu, false);
-#ifdef CONFIG_PPC64
- vdso_data->processorCount--;
+#ifdef CONFIG_PPC64_PROC_SYSTEMCFG
+ systemcfg->processorCount--;
#endif
- migrate_irqs();
+ /* Update affinity of all IRQs previously aimed at this CPU */
+ irq_migrate_all_off_this_cpu();
+
+ /*
+ * Depending on the details of the interrupt controller, it's possible
+ * that one of the interrupts we just migrated away from this CPU is
+ * actually already pending on this CPU. If we leave it in that state
+ * the interrupt will never be EOI'ed, and will never fire again. So
+ * temporarily enable interrupts here, to allow any pending interrupt to
+ * be received (and EOI'ed), before we take this CPU offline.
+ */
+ local_irq_enable();
+ mdelay(1);
+ local_irq_disable();
+
return 0;
}
@@ -423,27 +1209,13 @@ void generic_cpu_die(unsigned int cpu)
for (i = 0; i < 100; i++) {
smp_rmb();
- if (per_cpu(cpu_state, cpu) == CPU_DEAD)
+ if (is_cpu_dead(cpu))
return;
msleep(100);
}
printk(KERN_ERR "CPU%d didn't die...\n", cpu);
}
-void generic_mach_cpu_die(void)
-{
- unsigned int cpu;
-
- local_irq_disable();
- idle_task_exit();
- cpu = smp_processor_id();
- printk(KERN_DEBUG "CPU%d offline\n", cpu);
- __get_cpu_var(cpu_state) = CPU_DEAD;
- smp_wmb();
- while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE)
- cpu_relax();
-}
-
void generic_set_cpu_dead(unsigned int cpu)
{
per_cpu(cpu_state, cpu) = CPU_DEAD;
@@ -464,6 +1236,11 @@ int generic_check_cpu_restart(unsigned int cpu)
return per_cpu(cpu_state, cpu) == CPU_UP_PREPARE;
}
+int is_cpu_dead(unsigned int cpu)
+{
+ return per_cpu(cpu_state, cpu) == CPU_DEAD;
+}
+
static bool secondaries_inhibited(void)
{
return kvm_hv_mode_active();
@@ -477,19 +1254,23 @@ static bool secondaries_inhibited(void)
static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle)
{
- struct thread_info *ti = task_thread_info(idle);
-
#ifdef CONFIG_PPC64
- paca[cpu].__current = idle;
- paca[cpu].kstack = (unsigned long)ti + THREAD_SIZE - STACK_FRAME_OVERHEAD;
+ paca_ptrs[cpu]->__current = idle;
+ paca_ptrs[cpu]->kstack = (unsigned long)task_stack_page(idle) +
+ THREAD_SIZE - STACK_FRAME_MIN_SIZE;
#endif
- ti->cpu = cpu;
- secondary_ti = current_set[cpu] = ti;
+ task_thread_info(idle)->cpu = cpu;
+ secondary_current = current_set[cpu] = idle;
}
int __cpu_up(unsigned int cpu, struct task_struct *tidle)
{
- int rc, c;
+ const unsigned long boot_spin_ms = 5 * MSEC_PER_SEC;
+ const bool booting = system_state < SYSTEM_RUNNING;
+ const unsigned long hp_spin_ms = 1;
+ unsigned long deadline;
+ int rc;
+ const unsigned long spin_wait_ms = booting ? boot_spin_ms : hp_spin_ms;
/*
* Don't allow secondary threads to come online if inhibited
@@ -504,6 +1285,16 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
cpu_idle_thread_init(cpu, tidle);
+ /*
+ * The platform might need to allocate resources prior to bringing
+ * up the CPU
+ */
+ if (smp_ops->prepare_cpu) {
+ rc = smp_ops->prepare_cpu(cpu);
+ if (rc)
+ return rc;
+ }
+
/* Make sure callin-map entry is 0 (can be leftover a CPU
* hotplug
*/
@@ -524,22 +1315,23 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
}
/*
- * wait to see if the cpu made a callin (is actually up).
- * use this value that I found through experimentation.
- * -- Cort
+ * At boot time, simply spin on the callin word until the
+ * deadline passes.
+ *
+ * At run time, spin for an optimistic amount of time to avoid
+ * sleeping in the common case.
*/
- if (system_state < SYSTEM_RUNNING)
- for (c = 50000; c && !cpu_callin_map[cpu]; c--)
- udelay(100);
-#ifdef CONFIG_HOTPLUG_CPU
- else
- /*
- * CPUs can take much longer to come up in the
- * hotplug case. Wait five seconds.
- */
- for (c = 5000; c && !cpu_callin_map[cpu]; c--)
- msleep(1);
-#endif
+ deadline = jiffies + msecs_to_jiffies(spin_wait_ms);
+ spin_until_cond(cpu_callin_map[cpu] || time_is_before_jiffies(deadline));
+
+ if (!cpu_callin_map[cpu] && system_state >= SYSTEM_RUNNING) {
+ const unsigned long sleep_interval_us = 10 * USEC_PER_MSEC;
+ const unsigned long sleep_wait_ms = 100 * MSEC_PER_SEC;
+
+ deadline = jiffies + msecs_to_jiffies(sleep_wait_ms);
+ while (!cpu_callin_map[cpu] && time_is_after_jiffies(deadline))
+ fsleep(sleep_interval_us);
+ }
if (!cpu_callin_map[cpu]) {
printk(KERN_ERR "Processor %u is stuck.\n", cpu);
@@ -551,9 +1343,8 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
if (smp_ops->give_timebase)
smp_ops->give_timebase();
- /* Wait until cpu puts itself in the online map */
- while (!cpu_online(cpu))
- cpu_relax();
+ /* Wait until cpu puts itself in the online & active maps */
+ spin_until_cond(cpu_online(cpu));
return 0;
}
@@ -564,22 +1355,18 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
int cpu_to_core_id(int cpu)
{
struct device_node *np;
- const __be32 *reg;
int id = -1;
np = of_get_cpu_node(cpu, NULL);
if (!np)
goto out;
- reg = of_get_property(np, "reg", NULL);
- if (!reg)
- goto out;
-
- id = be32_to_cpup(reg);
+ id = of_get_cpu_hwid(np, 0);
out:
of_node_put(np);
return id;
}
+EXPORT_SYMBOL_GPL(cpu_to_core_id);
/* Helper routines for cpu to core mapping */
int cpu_core_index_of_thread(int cpu)
@@ -594,33 +1381,6 @@ int cpu_first_thread_of_core(int core)
}
EXPORT_SYMBOL_GPL(cpu_first_thread_of_core);
-static void traverse_siblings_chip_id(int cpu, bool add, int chipid)
-{
- const struct cpumask *mask;
- struct device_node *np;
- int i, plen;
- const __be32 *prop;
-
- mask = add ? cpu_online_mask : cpu_present_mask;
- for_each_cpu(i, mask) {
- np = of_get_cpu_node(i, NULL);
- if (!np)
- continue;
- prop = of_get_property(np, "ibm,chip-id", &plen);
- if (prop && plen == sizeof(int) &&
- of_read_number(prop, 1) == chipid) {
- if (add) {
- cpumask_set_cpu(cpu, cpu_core_mask(i));
- cpumask_set_cpu(i, cpu_core_mask(cpu));
- } else {
- cpumask_clear_cpu(cpu, cpu_core_mask(i));
- cpumask_clear_cpu(i, cpu_core_mask(cpu));
- }
- }
- of_node_put(np);
- }
-}
-
/* Must be called when no change can occur to cpu_present_mask,
* i.e. during cpu online or offline.
*/
@@ -643,59 +1403,232 @@ static struct device_node *cpu_to_l2cache(int cpu)
return cache;
}
-static void traverse_core_siblings(int cpu, bool add)
+static bool update_mask_by_l2(int cpu, cpumask_var_t *mask)
{
+ struct cpumask *(*submask_fn)(int) = cpu_sibling_mask;
struct device_node *l2_cache, *np;
- const struct cpumask *mask;
- int i, chip, plen;
- const __be32 *prop;
+ int i;
- /* First see if we have ibm,chip-id properties in cpu nodes */
- np = of_get_cpu_node(cpu, NULL);
- if (np) {
- chip = -1;
- prop = of_get_property(np, "ibm,chip-id", &plen);
- if (prop && plen == sizeof(int))
- chip = of_read_number(prop, 1);
- of_node_put(np);
- if (chip >= 0) {
- traverse_siblings_chip_id(cpu, add, chip);
- return;
+ if (has_big_cores)
+ submask_fn = cpu_smallcore_mask;
+
+ /*
+ * If the threads in a thread-group share L2 cache, then the
+ * L2-mask can be obtained from thread_group_l2_cache_map.
+ */
+ if (thread_group_shares_l2) {
+ cpumask_set_cpu(cpu, cpu_l2_cache_mask(cpu));
+
+ for_each_cpu(i, per_cpu(thread_group_l2_cache_map, cpu)) {
+ if (cpu_online(i))
+ set_cpus_related(i, cpu, cpu_l2_cache_mask);
+ }
+
+ /* Verify that L1-cache siblings are a subset of L2 cache-siblings */
+ if (!cpumask_equal(submask_fn(cpu), cpu_l2_cache_mask(cpu)) &&
+ !cpumask_subset(submask_fn(cpu), cpu_l2_cache_mask(cpu))) {
+ pr_warn_once("CPU %d : Inconsistent L1 and L2 cache siblings\n",
+ cpu);
}
+
+ return true;
}
l2_cache = cpu_to_l2cache(cpu);
- mask = add ? cpu_online_mask : cpu_present_mask;
- for_each_cpu(i, mask) {
+ if (!l2_cache || !*mask) {
+ /* Assume only core siblings share cache with this CPU */
+ for_each_cpu(i, cpu_sibling_mask(cpu))
+ set_cpus_related(cpu, i, cpu_l2_cache_mask);
+
+ return false;
+ }
+
+ cpumask_and(*mask, cpu_online_mask, cpu_node_mask(cpu));
+
+ /* Update l2-cache mask with all the CPUs that are part of submask */
+ or_cpumasks_related(cpu, cpu, submask_fn, cpu_l2_cache_mask);
+
+ /* Skip all CPUs already part of current CPU l2-cache mask */
+ cpumask_andnot(*mask, *mask, cpu_l2_cache_mask(cpu));
+
+ for_each_cpu(i, *mask) {
+ /*
+ * when updating the marks the current CPU has not been marked
+ * online, but we need to update the cache masks
+ */
np = cpu_to_l2cache(i);
- if (!np)
- continue;
+
+ /* Skip all CPUs already part of current CPU l2-cache */
if (np == l2_cache) {
- if (add) {
- cpumask_set_cpu(cpu, cpu_core_mask(i));
- cpumask_set_cpu(i, cpu_core_mask(cpu));
- } else {
- cpumask_clear_cpu(cpu, cpu_core_mask(i));
- cpumask_clear_cpu(i, cpu_core_mask(cpu));
- }
+ or_cpumasks_related(cpu, i, submask_fn, cpu_l2_cache_mask);
+ cpumask_andnot(*mask, *mask, submask_fn(i));
+ } else {
+ cpumask_andnot(*mask, *mask, cpu_l2_cache_mask(i));
}
+
of_node_put(np);
}
of_node_put(l2_cache);
+
+ return true;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static void remove_cpu_from_masks(int cpu)
+{
+ struct cpumask *(*mask_fn)(int) = cpu_sibling_mask;
+ int i;
+
+ unmap_cpu_from_node(cpu);
+
+ if (shared_caches)
+ mask_fn = cpu_l2_cache_mask;
+
+ for_each_cpu(i, mask_fn(cpu)) {
+ set_cpus_unrelated(cpu, i, cpu_l2_cache_mask);
+ set_cpus_unrelated(cpu, i, cpu_sibling_mask);
+ if (has_big_cores)
+ set_cpus_unrelated(cpu, i, cpu_smallcore_mask);
+ }
+
+ for_each_cpu(i, cpu_core_mask(cpu))
+ set_cpus_unrelated(cpu, i, cpu_core_mask);
+
+ if (has_coregroup_support()) {
+ for_each_cpu(i, cpu_coregroup_mask(cpu))
+ set_cpus_unrelated(cpu, i, cpu_coregroup_mask);
+ }
+}
+#endif
+
+static inline void add_cpu_to_smallcore_masks(int cpu)
+{
+ int i;
+
+ if (!has_big_cores)
+ return;
+
+ cpumask_set_cpu(cpu, cpu_smallcore_mask(cpu));
+
+ for_each_cpu(i, per_cpu(thread_group_l1_cache_map, cpu)) {
+ if (cpu_online(i))
+ set_cpus_related(i, cpu, cpu_smallcore_mask);
+ }
+}
+
+static void update_coregroup_mask(int cpu, cpumask_var_t *mask)
+{
+ struct cpumask *(*submask_fn)(int) = cpu_sibling_mask;
+ int coregroup_id = cpu_to_coregroup_id(cpu);
+ int i;
+
+ if (shared_caches)
+ submask_fn = cpu_l2_cache_mask;
+
+ if (!*mask) {
+ /* Assume only siblings are part of this CPU's coregroup */
+ for_each_cpu(i, submask_fn(cpu))
+ set_cpus_related(cpu, i, cpu_coregroup_mask);
+
+ return;
+ }
+
+ cpumask_and(*mask, cpu_online_mask, cpu_node_mask(cpu));
+
+ /* Update coregroup mask with all the CPUs that are part of submask */
+ or_cpumasks_related(cpu, cpu, submask_fn, cpu_coregroup_mask);
+
+ /* Skip all CPUs already part of coregroup mask */
+ cpumask_andnot(*mask, *mask, cpu_coregroup_mask(cpu));
+
+ for_each_cpu(i, *mask) {
+ /* Skip all CPUs not part of this coregroup */
+ if (coregroup_id == cpu_to_coregroup_id(i)) {
+ or_cpumasks_related(cpu, i, submask_fn, cpu_coregroup_mask);
+ cpumask_andnot(*mask, *mask, submask_fn(i));
+ } else {
+ cpumask_andnot(*mask, *mask, cpu_coregroup_mask(i));
+ }
+ }
+}
+
+static void add_cpu_to_masks(int cpu)
+{
+ struct cpumask *(*submask_fn)(int) = cpu_sibling_mask;
+ int first_thread = cpu_first_thread_sibling(cpu);
+ cpumask_var_t mask;
+ int chip_id = -1;
+ bool ret;
+ int i;
+
+ /*
+ * This CPU will not be in the online mask yet so we need to manually
+ * add it to its own thread sibling mask.
+ */
+ map_cpu_to_node(cpu, cpu_to_node(cpu));
+ cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
+ cpumask_set_cpu(cpu, cpu_core_mask(cpu));
+
+ for (i = first_thread; i < first_thread + threads_per_core; i++)
+ if (cpu_online(i))
+ set_cpus_related(i, cpu, cpu_sibling_mask);
+
+ add_cpu_to_smallcore_masks(cpu);
+
+ /* In CPU-hotplug path, hence use GFP_ATOMIC */
+ ret = alloc_cpumask_var_node(&mask, GFP_ATOMIC, cpu_to_node(cpu));
+ update_mask_by_l2(cpu, &mask);
+
+ if (has_coregroup_support())
+ update_coregroup_mask(cpu, &mask);
+
+ if (chip_id_lookup_table && ret)
+ chip_id = cpu_to_chip_id(cpu);
+
+ if (shared_caches)
+ submask_fn = cpu_l2_cache_mask;
+
+ /* Update core_mask with all the CPUs that are part of submask */
+ or_cpumasks_related(cpu, cpu, submask_fn, cpu_core_mask);
+
+ /* Skip all CPUs already part of current CPU core mask */
+ cpumask_andnot(mask, cpu_online_mask, cpu_core_mask(cpu));
+
+ /* If chip_id is -1; limit the cpu_core_mask to within PKG */
+ if (chip_id == -1)
+ cpumask_and(mask, mask, cpu_node_mask(cpu));
+
+ for_each_cpu(i, mask) {
+ if (chip_id == cpu_to_chip_id(i)) {
+ or_cpumasks_related(cpu, i, submask_fn, cpu_core_mask);
+ cpumask_andnot(mask, mask, submask_fn(i));
+ } else {
+ cpumask_andnot(mask, mask, cpu_core_mask(i));
+ }
+ }
+
+ free_cpumask_var(mask);
}
/* Activate a secondary processor. */
+__no_stack_protector
void start_secondary(void *unused)
{
- unsigned int cpu = smp_processor_id();
- int i, base;
+ unsigned int cpu = raw_smp_processor_id();
- atomic_inc(&init_mm.mm_count);
+ /* PPC64 calls setup_kup() in early_setup_secondary() */
+ if (IS_ENABLED(CONFIG_PPC32))
+ setup_kup();
+
+ mmgrab_lazy_tlb(&init_mm);
current->active_mm = &init_mm;
+ VM_WARN_ON(cpumask_test_cpu(smp_processor_id(), mm_cpumask(&init_mm)));
+ cpumask_set_cpu(cpu, mm_cpumask(&init_mm));
+ inc_mm_active_cpus(&init_mm);
smp_store_cpu_info(cpu);
set_dec(tb_ticks_per_jiffy);
- preempt_disable();
+ rcutree_report_cpu_starting(cpu);
cpu_callin_map[cpu] = 1;
if (smp_ops->setup_cpu)
@@ -705,132 +1638,160 @@ void start_secondary(void *unused)
secondary_cpu_time_init();
-#ifdef CONFIG_PPC64
+#ifdef CONFIG_PPC64_PROC_SYSTEMCFG
if (system_state == SYSTEM_RUNNING)
- vdso_data->processorCount++;
+ systemcfg->processorCount++;
+#endif
+#ifdef CONFIG_PPC64
vdso_getcpu_init();
#endif
- /* Update sibling maps */
- base = cpu_first_thread_sibling(cpu);
- for (i = 0; i < threads_per_core; i++) {
- if (cpu_is_offline(base + i) && (cpu != base + i))
- continue;
- cpumask_set_cpu(cpu, cpu_sibling_mask(base + i));
- cpumask_set_cpu(base + i, cpu_sibling_mask(cpu));
-
- /* cpu_core_map should be a superset of
- * cpu_sibling_map even if we don't have cache
- * information, so update the former here, too.
- */
- cpumask_set_cpu(cpu, cpu_core_mask(base + i));
- cpumask_set_cpu(base + i, cpu_core_mask(cpu));
+ set_numa_node(numa_cpu_lookup_table[cpu]);
+ set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu]));
+
+ /* Update topology CPU masks */
+ add_cpu_to_masks(cpu);
+
+ /*
+ * Check for any shared caches. Note that this must be done on a
+ * per-core basis because one core in the pair might be disabled.
+ */
+ if (!shared_caches) {
+ struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask;
+ struct cpumask *mask = cpu_l2_cache_mask(cpu);
+
+ if (has_big_cores)
+ sibling_mask = cpu_smallcore_mask;
+
+ if (cpumask_weight(mask) > cpumask_weight(sibling_mask(cpu)))
+ shared_caches = true;
}
- traverse_core_siblings(cpu, true);
smp_wmb();
notify_cpu_starting(cpu);
set_cpu_online(cpu, true);
+ boot_init_stack_canary();
+
local_irq_enable();
- cpu_startup_entry(CPUHP_ONLINE);
+ /* We can enable ftrace for secondary cpus now */
+ this_cpu_enable_ftrace();
+
+ cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
BUG();
}
-int setup_profiling_timer(unsigned int multiplier)
-{
- return 0;
-}
+static struct sched_domain_topology_level powerpc_topology[6];
-#ifdef CONFIG_SCHED_SMT
-/* cpumask of CPUs with asymetric SMT dependancy */
-static int powerpc_smt_flags(void)
+static void __init build_sched_topology(void)
{
- int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
+ int i = 0;
- if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
- printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
- flags |= SD_ASYM_PACKING;
- }
- return flags;
-}
-#endif
+ if (is_shared_processor() && has_big_cores)
+ static_branch_enable(&splpar_asym_pack);
-static struct sched_domain_topology_level powerpc_topology[] = {
#ifdef CONFIG_SCHED_SMT
- { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
+ if (has_big_cores) {
+ pr_info("Big cores detected but using small core scheduling\n");
+ powerpc_topology[i++] =
+ SDTL_INIT(tl_smallcore_smt_mask, powerpc_smt_flags, SMT);
+ } else {
+ powerpc_topology[i++] = SDTL_INIT(tl_smt_mask, powerpc_smt_flags, SMT);
+ }
#endif
- { cpu_cpu_mask, SD_INIT_NAME(DIE) },
- { NULL, },
-};
+ if (shared_caches) {
+ powerpc_topology[i++] =
+ SDTL_INIT(tl_cache_mask, powerpc_shared_cache_flags, CACHE);
+ }
+
+ if (has_coregroup_support()) {
+ powerpc_topology[i++] =
+ SDTL_INIT(tl_mc_mask, powerpc_shared_proc_flags, MC);
+ }
+
+ powerpc_topology[i++] = SDTL_INIT(tl_pkg_mask, powerpc_shared_proc_flags, PKG);
+
+ /* There must be one trailing NULL entry left. */
+ BUG_ON(i >= ARRAY_SIZE(powerpc_topology) - 1);
+
+ set_sched_topology(powerpc_topology);
+}
void __init smp_cpus_done(unsigned int max_cpus)
{
- cpumask_var_t old_mask;
-
- /* We want the setup_cpu() here to be called from CPU 0, but our
- * init thread may have been "borrowed" by another CPU in the meantime
- * se we pin us down to CPU 0 for a short while
+ /*
+ * We are running pinned to the boot CPU, see rest_init().
*/
- alloc_cpumask_var(&old_mask, GFP_NOWAIT);
- cpumask_copy(old_mask, tsk_cpus_allowed(current));
- set_cpus_allowed_ptr(current, cpumask_of(boot_cpuid));
-
if (smp_ops && smp_ops->setup_cpu)
smp_ops->setup_cpu(boot_cpuid);
- set_cpus_allowed_ptr(current, old_mask);
-
- free_cpumask_var(old_mask);
-
if (smp_ops && smp_ops->bringup_done)
smp_ops->bringup_done();
dump_numa_cpu_topology();
+ build_sched_topology();
+}
- set_sched_topology(powerpc_topology);
+/*
+ * For asym packing, by default lower numbered CPU has higher priority.
+ * On shared processors, pack to lower numbered core. However avoid moving
+ * between thread_groups within the same core.
+ */
+int arch_asym_cpu_priority(int cpu)
+{
+ if (static_branch_unlikely(&splpar_asym_pack))
+ return -cpu / threads_per_core;
+ return -cpu;
}
#ifdef CONFIG_HOTPLUG_CPU
int __cpu_disable(void)
{
int cpu = smp_processor_id();
- int base, i;
int err;
if (!smp_ops->cpu_disable)
return -ENOSYS;
+ this_cpu_disable_ftrace();
+
err = smp_ops->cpu_disable();
if (err)
return err;
/* Update sibling maps */
- base = cpu_first_thread_sibling(cpu);
- for (i = 0; i < threads_per_core; i++) {
- cpumask_clear_cpu(cpu, cpu_sibling_mask(base + i));
- cpumask_clear_cpu(base + i, cpu_sibling_mask(cpu));
- cpumask_clear_cpu(cpu, cpu_core_mask(base + i));
- cpumask_clear_cpu(base + i, cpu_core_mask(cpu));
- }
- traverse_core_siblings(cpu, false);
+ remove_cpu_from_masks(cpu);
return 0;
}
void __cpu_die(unsigned int cpu)
{
+ /*
+ * This could perhaps be a generic call in idlea_task_dead(), but
+ * that requires testing from all archs, so first put it here to
+ */
+ VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(&init_mm)));
+ dec_mm_active_cpus(&init_mm);
+ cpumask_clear_cpu(cpu, mm_cpumask(&init_mm));
+
if (smp_ops->cpu_die)
smp_ops->cpu_die(cpu);
}
-void cpu_die(void)
+void __noreturn arch_cpu_idle_dead(void)
{
- if (ppc_md.cpu_die)
- ppc_md.cpu_die();
+ /*
+ * Disable on the down path. This will be re-enabled by
+ * start_secondary() via start_secondary_resume() below
+ */
+ this_cpu_disable_ftrace();
+
+ if (smp_ops->cpu_offline_self)
+ smp_ops->cpu_offline_self();
/* If we return, we re-enter start_secondary */
start_secondary_resume();
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index 3d30ef1038e5..90882b5175cd 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -1,63 +1,213 @@
+// SPDX-License-Identifier: GPL-2.0
+
/*
- * Stack trace utility
+ * Stack trace utility functions etc.
*
* Copyright 2008 Christoph Hellwig, IBM Corp.
- *
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
+ * Copyright 2018 SUSE Linux GmbH
+ * Copyright 2018 Nick Piggin, Michael Ellerman, IBM Corp.
*/
+#include <linux/delay.h>
#include <linux/export.h>
+#include <linux/kallsyms.h>
+#include <linux/module.h>
+#include <linux/nmi.h>
#include <linux/sched.h>
+#include <linux/sched/debug.h>
+#include <linux/sched/task_stack.h>
#include <linux/stacktrace.h>
#include <asm/ptrace.h>
#include <asm/processor.h>
+#include <linux/ftrace.h>
+#include <asm/kprobes.h>
+#include <linux/rethook.h>
-/*
- * Save stack-backtrace addresses into a stack_trace buffer.
- */
-static void save_context_stack(struct stack_trace *trace, unsigned long sp,
- struct task_struct *tsk, int savesched)
+#include <asm/paca.h>
+
+void __no_sanitize_address arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
+ struct task_struct *task, struct pt_regs *regs)
{
+ unsigned long sp;
+
+ if (regs && !consume_entry(cookie, regs->nip))
+ return;
+
+ if (regs)
+ sp = regs->gpr[1];
+ else if (task == current)
+ sp = current_stack_frame();
+ else
+ sp = task->thread.ksp;
+
for (;;) {
unsigned long *stack = (unsigned long *) sp;
unsigned long newsp, ip;
- if (!validate_sp(sp, tsk, STACK_FRAME_OVERHEAD))
+ if (!validate_sp(sp, task))
return;
newsp = stack[0];
ip = stack[STACK_FRAME_LR_SAVE];
- if (savesched || !in_sched_functions(ip)) {
- if (!trace->skip)
- trace->entries[trace->nr_entries++] = ip;
- else
- trace->skip--;
- }
-
- if (trace->nr_entries >= trace->max_entries)
+ if (!consume_entry(cookie, ip))
return;
sp = newsp;
}
}
-void save_stack_trace(struct stack_trace *trace)
+/*
+ * This function returns an error if it detects any unreliable features of the
+ * stack. Otherwise it guarantees that the stack trace is reliable.
+ *
+ * If the task is not 'current', the caller *must* ensure the task is inactive.
+ */
+int __no_sanitize_address arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
+ void *cookie, struct task_struct *task)
{
unsigned long sp;
+ unsigned long newsp;
+ unsigned long stack_page = (unsigned long)task_stack_page(task);
+ unsigned long stack_end;
+ int graph_idx = 0;
+ bool firstframe;
+
+ stack_end = stack_page + THREAD_SIZE;
+
+ // See copy_thread() for details.
+ if (task->flags & PF_KTHREAD)
+ stack_end -= STACK_FRAME_MIN_SIZE;
+ else
+ stack_end -= STACK_USER_INT_FRAME_SIZE;
+
+ if (task == current)
+ sp = current_stack_frame();
+ else
+ sp = task->thread.ksp;
+
+ if (sp < stack_page + sizeof(struct thread_struct) ||
+ sp > stack_end - STACK_FRAME_MIN_SIZE) {
+ return -EINVAL;
+ }
+
+ for (firstframe = true; sp != stack_end;
+ firstframe = false, sp = newsp) {
+ unsigned long *stack = (unsigned long *) sp;
+ unsigned long ip;
+
+ /* sanity check: ABI requires SP to be aligned 16 bytes. */
+ if (sp & 0xF)
+ return -EINVAL;
+
+ newsp = stack[0];
+ /* Stack grows downwards; unwinder may only go up. */
+ if (newsp <= sp)
+ return -EINVAL;
+
+ if (newsp != stack_end &&
+ newsp > stack_end - STACK_FRAME_MIN_SIZE) {
+ return -EINVAL; /* invalid backlink, too far up. */
+ }
- asm("mr %0,1" : "=r" (sp));
+ /*
+ * We can only trust the bottom frame's backlink, the
+ * rest of the frame may be uninitialized, continue to
+ * the next.
+ */
+ if (firstframe)
+ continue;
- save_context_stack(trace, sp, current, 1);
+ /* Mark stacktraces with exception frames as unreliable. */
+ if (sp <= stack_end - STACK_INT_FRAME_SIZE &&
+ stack[STACK_INT_FRAME_MARKER_LONGS] == STACK_FRAME_REGS_MARKER) {
+ return -EINVAL;
+ }
+
+ /* Examine the saved LR: it must point into kernel code. */
+ ip = stack[STACK_FRAME_LR_SAVE];
+ if (!__kernel_text_address(ip))
+ return -EINVAL;
+
+ /*
+ * FIXME: IMHO these tests do not belong in
+ * arch-dependent code, they are generic.
+ */
+ ip = ftrace_graph_ret_addr(task, &graph_idx, ip, stack);
+
+ /*
+ * Mark stacktraces with kretprobed functions on them
+ * as unreliable.
+ */
+#ifdef CONFIG_RETHOOK
+ if (ip == (unsigned long)arch_rethook_trampoline)
+ return -EINVAL;
+#endif
+
+ if (!consume_entry(cookie, ip))
+ return -EINVAL;
+ }
+ return 0;
+}
+
+#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI)
+static void handle_backtrace_ipi(struct pt_regs *regs)
+{
+ nmi_cpu_backtrace(regs);
+}
+
+static void raise_backtrace_ipi(cpumask_t *mask)
+{
+ struct paca_struct *p;
+ unsigned int cpu;
+ u64 delay_us;
+
+ for_each_cpu(cpu, mask) {
+ if (cpu == smp_processor_id()) {
+ handle_backtrace_ipi(NULL);
+ continue;
+ }
+
+ delay_us = 5 * USEC_PER_SEC;
+
+ if (smp_send_safe_nmi_ipi(cpu, handle_backtrace_ipi, delay_us)) {
+ // Now wait up to 5s for the other CPU to do its backtrace
+ while (cpumask_test_cpu(cpu, mask) && delay_us) {
+ udelay(1);
+ delay_us--;
+ }
+
+ // Other CPU cleared itself from the mask
+ if (delay_us)
+ continue;
+ }
+
+ p = paca_ptrs[cpu];
+
+ cpumask_clear_cpu(cpu, mask);
+
+ pr_warn("CPU %d didn't respond to backtrace IPI, inspecting paca.\n", cpu);
+ if (!virt_addr_valid(p)) {
+ pr_warn("paca pointer appears corrupt? (%px)\n", p);
+ continue;
+ }
+
+ pr_warn("irq_soft_mask: 0x%02x in_mce: %d in_nmi: %d",
+ p->irq_soft_mask, p->in_mce, p->in_nmi);
+
+ if (virt_addr_valid(p->__current))
+ pr_cont(" current: %d (%s)\n", p->__current->pid,
+ p->__current->comm);
+ else
+ pr_cont(" current pointer corrupt? (%px)\n", p->__current);
+
+ pr_warn("Back trace of paca->saved_r1 (0x%016llx) (possibly stale):\n", p->saved_r1);
+ show_stack(p->__current, (unsigned long *)p->saved_r1, KERN_WARNING);
+ }
}
-EXPORT_SYMBOL_GPL(save_stack_trace);
-void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
+void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu)
{
- save_context_stack(trace, tsk->thread.ksp, tsk, 0);
+ nmi_trigger_cpumask_backtrace(mask, exclude_cpu, raise_backtrace_ipi);
}
-EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
+#endif /* defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI) */
diff --git a/arch/powerpc/kernel/static_call.c b/arch/powerpc/kernel/static_call.c
new file mode 100644
index 000000000000..ec3101f95e53
--- /dev/null
+++ b/arch/powerpc/kernel/static_call.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/memory.h>
+#include <linux/static_call.h>
+
+#include <asm/text-patching.h>
+
+void arch_static_call_transform(void *site, void *tramp, void *func, bool tail)
+{
+ int err;
+ bool is_ret0 = (func == __static_call_return0);
+ unsigned long _tramp = (unsigned long)tramp;
+ unsigned long _func = (unsigned long)func;
+ unsigned long _ret0 = _tramp + PPC_SCT_RET0;
+ bool is_short = is_offset_in_branch_range((long)func - (long)(site ? : tramp));
+
+ mutex_lock(&text_mutex);
+
+ if (site && tail) {
+ if (!func)
+ err = patch_instruction(site, ppc_inst(PPC_RAW_BLR()));
+ else if (is_ret0)
+ err = patch_branch(site, _ret0, 0);
+ else if (is_short)
+ err = patch_branch(site, _func, 0);
+ else if (tramp)
+ err = patch_branch(site, _tramp, 0);
+ else
+ err = 0;
+ } else if (site) {
+ if (!func)
+ err = patch_instruction(site, ppc_inst(PPC_RAW_NOP()));
+ else if (is_ret0)
+ err = patch_instruction(site, ppc_inst(PPC_RAW_LI(_R3, 0)));
+ else if (is_short)
+ err = patch_branch(site, _func, BRANCH_SET_LINK);
+ else if (tramp)
+ err = patch_branch(site, _tramp, BRANCH_SET_LINK);
+ else
+ err = 0;
+ } else if (tramp) {
+ if (func && !is_short) {
+ err = patch_ulong(tramp + PPC_SCT_DATA, _func);
+ if (err)
+ goto out;
+ }
+
+ if (!func)
+ err = patch_instruction(tramp, ppc_inst(PPC_RAW_BLR()));
+ else if (is_ret0)
+ err = patch_branch(tramp, _ret0, 0);
+ else if (is_short)
+ err = patch_branch(tramp, _func, 0);
+ else
+ err = patch_instruction(tramp, ppc_inst(PPC_RAW_NOP()));
+ } else {
+ err = 0;
+ }
+
+out:
+ mutex_unlock(&text_mutex);
+
+ if (err)
+ panic("%s: patching failed %pS at %pS\n", __func__, func, tramp);
+}
+EXPORT_SYMBOL_GPL(arch_static_call_transform);
diff --git a/arch/powerpc/kernel/suspend.c b/arch/powerpc/kernel/suspend.c
index 0167d53da30c..b84992c10854 100644
--- a/arch/powerpc/kernel/suspend.c
+++ b/arch/powerpc/kernel/suspend.c
@@ -1,17 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Suspend support specific for power.
*
- * Distribute under GPLv2
- *
* Copyright (c) 2002 Pavel Machek <pavel@ucw.cz>
* Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
*/
#include <linux/mm.h>
+#include <linux/suspend.h>
#include <asm/page.h>
-
-/* References to section boundaries */
-extern const void __nosave_begin, __nosave_end;
+#include <asm/sections.h>
/*
* pfn_is_nosave - check if given pfn is in the 'nosave' section
diff --git a/arch/powerpc/kernel/switch.S b/arch/powerpc/kernel/switch.S
new file mode 100644
index 000000000000..59e3ee99db0e
--- /dev/null
+++ b/arch/powerpc/kernel/switch.S
@@ -0,0 +1,257 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#include <linux/objtool.h>
+#include <asm/asm-offsets.h>
+#include <asm/code-patching-asm.h>
+#include <asm/mmu.h>
+#include <asm/ppc_asm.h>
+#include <asm/kup.h>
+#include <asm/thread_info.h>
+
+.section ".text","ax",@progbits
+
+#ifdef CONFIG_PPC_BOOK3S_64
+/*
+ * Cancel all explict user streams as they will have no use after context
+ * switch and will stop the HW from creating streams itself
+ */
+#define STOP_STREAMS \
+ DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r6)
+
+#define FLUSH_COUNT_CACHE \
+1: nop; \
+ patch_site 1b, patch__call_flush_branch_caches1; \
+1: nop; \
+ patch_site 1b, patch__call_flush_branch_caches2; \
+1: nop; \
+ patch_site 1b, patch__call_flush_branch_caches3
+
+.macro nops number
+ .rept \number
+ nop
+ .endr
+.endm
+
+.balign 32
+.global flush_branch_caches
+flush_branch_caches:
+ /* Save LR into r9 */
+ mflr r9
+
+ // Flush the link stack
+ .rept 64
+ bl .+4
+ .endr
+ b 1f
+ nops 6
+
+ .balign 32
+ /* Restore LR */
+1: mtlr r9
+
+ // If we're just flushing the link stack, return here
+3: nop
+ patch_site 3b patch__flush_link_stack_return
+
+ li r9,0x7fff
+ mtctr r9
+
+ PPC_BCCTR_FLUSH
+
+2: nop
+ patch_site 2b patch__flush_count_cache_return
+
+ nops 3
+
+ .rept 278
+ .balign 32
+ PPC_BCCTR_FLUSH
+ nops 7
+ .endr
+
+ blr
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+.balign 32
+/*
+ * New stack pointer in r8, old stack pointer in r1, must not clobber r3
+ */
+pin_stack_slb:
+BEGIN_FTR_SECTION
+ clrrdi r6,r8,28 /* get its ESID */
+ clrrdi r9,r1,28 /* get current sp ESID */
+FTR_SECTION_ELSE
+ clrrdi r6,r8,40 /* get its 1T ESID */
+ clrrdi r9,r1,40 /* get current sp 1T ESID */
+ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_1T_SEGMENT)
+ clrldi. r0,r6,2 /* is new ESID c00000000? */
+ cmpd cr1,r6,r9 /* or is new ESID the same as current ESID? */
+ cror eq,4*cr1+eq,eq
+ beq 2f /* if yes, don't slbie it */
+
+ /* Bolt in the new stack SLB entry */
+ ld r7,KSP_VSID(r4) /* Get new stack's VSID */
+ oris r0,r6,(SLB_ESID_V)@h
+ ori r0,r0,(SLB_NUM_BOLTED-1)@l
+BEGIN_FTR_SECTION
+ li r9,MMU_SEGSIZE_1T /* insert B field */
+ oris r6,r6,(MMU_SEGSIZE_1T << SLBIE_SSIZE_SHIFT)@h
+ rldimi r7,r9,SLB_VSID_SSIZE_SHIFT,0
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
+
+ /* Update the last bolted SLB. No write barriers are needed
+ * here, provided we only update the current CPU's SLB shadow
+ * buffer.
+ */
+ ld r9,PACA_SLBSHADOWPTR(r13)
+ li r12,0
+ std r12,SLBSHADOW_STACKESID(r9) /* Clear ESID */
+ li r12,SLBSHADOW_STACKVSID
+ STDX_BE r7,r12,r9 /* Save VSID */
+ li r12,SLBSHADOW_STACKESID
+ STDX_BE r0,r12,r9 /* Save ESID */
+
+ /* No need to check for MMU_FTR_NO_SLBIE_B here, since when
+ * we have 1TB segments, the only CPUs known to have the errata
+ * only support less than 1TB of system memory and we'll never
+ * actually hit this code path.
+ */
+
+ isync
+ slbie r6
+BEGIN_FTR_SECTION
+ slbie r6 /* Workaround POWER5 < DD2.1 issue */
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
+ slbmte r7,r0
+ isync
+2: blr
+ .size pin_stack_slb,.-pin_stack_slb
+#endif /* CONFIG_PPC_64S_HASH_MMU */
+
+#else
+#define STOP_STREAMS
+#define FLUSH_COUNT_CACHE
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+/*
+ * do_switch_32/64 have the same calling convention as _switch, i.e., r3,r4
+ * are prev and next thread_struct *, and returns prev task_struct * in r3.
+
+ * This switches the stack, current, and does other task switch housekeeping.
+ */
+.macro do_switch_32
+ tophys(r0,r4)
+ mtspr SPRN_SPRG_THREAD,r0 /* Update current THREAD phys addr */
+ lwz r1,KSP(r4) /* Load new stack pointer */
+
+ /* save the old current 'last' for return value */
+ mr r3,r2
+ addi r2,r4,-THREAD /* Update current */
+.endm
+
+.macro do_switch_64
+ ld r8,KSP(r4) /* Load new stack pointer */
+
+ kuap_check_amr r9, r10
+
+ FLUSH_COUNT_CACHE /* Clobbers r9, ctr */
+
+ STOP_STREAMS /* Clobbers r6 */
+
+ addi r3,r3,-THREAD /* old thread -> task_struct for return value */
+ addi r6,r4,-THREAD /* new thread -> task_struct */
+ std r6,PACACURRENT(r13) /* Set new task_struct to 'current' */
+#if defined(CONFIG_STACKPROTECTOR)
+ ld r6, TASK_CANARY(r6)
+ std r6, PACA_CANARY(r13)
+#endif
+ /* Set new PACAKSAVE */
+ clrrdi r7,r8,THREAD_SHIFT /* base of new stack */
+ addi r7,r7,THREAD_SIZE-SWITCH_FRAME_SIZE
+ std r7,PACAKSAVE(r13)
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+BEGIN_MMU_FTR_SECTION
+ bl pin_stack_slb
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
+#endif
+ /*
+ * PMU interrupts in radix may come in here. They will use r1, not
+ * PACAKSAVE, so this stack switch will not cause a problem. They
+ * will store to the process stack, which may then be migrated to
+ * another CPU. However the rq lock release on this CPU paired with
+ * the rq lock acquire on the new CPU before the stack becomes
+ * active on the new CPU, will order those stores.
+ */
+ mr r1,r8 /* start using new stack pointer */
+.endm
+
+/*
+ * This routine switches between two different tasks. The process
+ * state of one is saved on its kernel stack. Then the state
+ * of the other is restored from its kernel stack. The memory
+ * management hardware is updated to the second process's state.
+ * Finally, we can return to the second process.
+ * On entry, r3 points to the THREAD for the current task, r4
+ * points to the THREAD for the new task.
+ *
+ * This routine is always called with interrupts disabled.
+ *
+ * Note: there are two ways to get to the "going out" portion
+ * of this code; either by coming in via the entry (_switch)
+ * or via "fork" which must set up an environment equivalent
+ * to the "_switch" path. If you change this , you'll have to
+ * change the fork code also.
+ *
+ * The code which creates the new task context is in 'copy_thread'
+ * in arch/ppc/kernel/process.c
+ *
+ * Note: this uses SWITCH_FRAME_SIZE rather than USER_INT_FRAME_SIZE
+ * because we don't need to leave the redzone ABI gap at the top of
+ * the kernel stack.
+ */
+_GLOBAL(_switch)
+ PPC_CREATE_STACK_FRAME(SWITCH_FRAME_SIZE)
+ PPC_STL r1,KSP(r3) /* Set old stack pointer */
+ SAVE_NVGPRS(r1) /* volatiles are caller-saved -- Cort */
+ PPC_STL r0,_NIP(r1) /* Return to switch caller */
+ mfcr r0
+ stw r0,_CCR(r1)
+
+ /*
+ * On SMP kernels, care must be taken because a task may be
+ * scheduled off CPUx and on to CPUy. Memory ordering must be
+ * considered.
+ *
+ * Cacheable stores on CPUx will be visible when the task is
+ * scheduled on CPUy by virtue of the core scheduler barriers
+ * (see "Notes on Program-Order guarantees on SMP systems." in
+ * kernel/sched/core.c).
+ *
+ * Uncacheable stores in the case of involuntary preemption must
+ * be taken care of. The smp_mb__after_spinlock() in __schedule()
+ * is implemented as hwsync on powerpc, which orders MMIO too. So
+ * long as there is an hwsync in the context switch path, it will
+ * be executed on the source CPU after the task has performed
+ * all MMIO ops on that CPU, and on the destination CPU before the
+ * task performs any MMIO ops there.
+ */
+
+ /*
+ * The kernel context switch path must contain a spin_lock,
+ * which contains larx/stcx, which will clear any reservation
+ * of the task being switched.
+ */
+
+#ifdef CONFIG_PPC32
+ do_switch_32
+#else
+ do_switch_64
+#endif
+
+ lwz r0,_CCR(r1)
+ mtcrf 0xFF,r0
+ REST_NVGPRS(r1) /* volatiles are destroyed -- Cort */
+ PPC_LL r0,_NIP(r1) /* Return to _switch caller in new task */
+ mtlr r0
+ addi r1,r1,SWITCH_FRAME_SIZE
+ blr
diff --git a/arch/powerpc/kernel/swsusp.c b/arch/powerpc/kernel/swsusp.c
index eae33e10b65f..41dcb2175299 100644
--- a/arch/powerpc/kernel/swsusp.c
+++ b/arch/powerpc/kernel/swsusp.c
@@ -1,15 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Common powerpc suspend code for 32 and 64 bits
*
* Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/sched.h>
+#include <linux/suspend.h>
#include <asm/current.h>
#include <asm/mmu_context.h>
#include <asm/switch_to.h>
@@ -20,9 +17,7 @@ void save_processor_state(void)
* flush out all the special registers so we don't need
* to save them in the snapshot
*/
- flush_fp_to_thread(current);
- flush_altivec_to_thread(current);
- flush_spe_to_thread(current);
+ flush_all_to_thread(current);
#ifdef CONFIG_PPC64
hard_irq_disable();
@@ -33,6 +28,6 @@ void save_processor_state(void)
void restore_processor_state(void)
{
#ifdef CONFIG_PPC32
- switch_mmu_context(current->active_mm, current->active_mm);
+ switch_mmu_context(current->active_mm, current->active_mm, NULL);
#endif
}
diff --git a/arch/powerpc/kernel/swsusp_32.S b/arch/powerpc/kernel/swsusp_32.S
index ba4dee3d233f..ffb79326483c 100644
--- a/arch/powerpc/kernel/swsusp_32.S
+++ b/arch/powerpc/kernel/swsusp_32.S
@@ -1,4 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/threads.h>
+#include <linux/linkage.h>
+
#include <asm/processor.h>
#include <asm/page.h>
#include <asm/cputable.h>
@@ -6,6 +9,7 @@
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/mmu.h>
+#include <asm/feature-fixups.h>
/*
* Structure for storing CPU registers on the save area.
@@ -23,11 +27,19 @@
#define SL_IBAT2 0x48
#define SL_DBAT3 0x50
#define SL_IBAT3 0x58
-#define SL_TB 0x60
-#define SL_R2 0x68
-#define SL_CR 0x6c
-#define SL_LR 0x70
-#define SL_R12 0x74 /* r12 to r31 */
+#define SL_DBAT4 0x60
+#define SL_IBAT4 0x68
+#define SL_DBAT5 0x70
+#define SL_IBAT5 0x78
+#define SL_DBAT6 0x80
+#define SL_IBAT6 0x88
+#define SL_DBAT7 0x90
+#define SL_IBAT7 0x98
+#define SL_TB 0xa0
+#define SL_R2 0xa8
+#define SL_CR 0xac
+#define SL_LR 0xb0
+#define SL_R12 0xb4 /* r12 to r31 */
#define SL_SIZE (SL_R12 + 80)
.section .data
@@ -112,6 +124,41 @@ _GLOBAL(swsusp_arch_suspend)
mfibatl r4,3
stw r4,SL_IBAT3+4(r11)
+BEGIN_MMU_FTR_SECTION
+ mfspr r4,SPRN_DBAT4U
+ stw r4,SL_DBAT4(r11)
+ mfspr r4,SPRN_DBAT4L
+ stw r4,SL_DBAT4+4(r11)
+ mfspr r4,SPRN_DBAT5U
+ stw r4,SL_DBAT5(r11)
+ mfspr r4,SPRN_DBAT5L
+ stw r4,SL_DBAT5+4(r11)
+ mfspr r4,SPRN_DBAT6U
+ stw r4,SL_DBAT6(r11)
+ mfspr r4,SPRN_DBAT6L
+ stw r4,SL_DBAT6+4(r11)
+ mfspr r4,SPRN_DBAT7U
+ stw r4,SL_DBAT7(r11)
+ mfspr r4,SPRN_DBAT7L
+ stw r4,SL_DBAT7+4(r11)
+ mfspr r4,SPRN_IBAT4U
+ stw r4,SL_IBAT4(r11)
+ mfspr r4,SPRN_IBAT4L
+ stw r4,SL_IBAT4+4(r11)
+ mfspr r4,SPRN_IBAT5U
+ stw r4,SL_IBAT5(r11)
+ mfspr r4,SPRN_IBAT5L
+ stw r4,SL_IBAT5+4(r11)
+ mfspr r4,SPRN_IBAT6U
+ stw r4,SL_IBAT6(r11)
+ mfspr r4,SPRN_IBAT6L
+ stw r4,SL_IBAT6+4(r11)
+ mfspr r4,SPRN_IBAT7U
+ stw r4,SL_IBAT7(r11)
+ mfspr r4,SPRN_IBAT7L
+ stw r4,SL_IBAT7+4(r11)
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
+
#if 0
/* Backup various CPU config stuffs */
bl __save_cpu_setup
@@ -136,7 +183,7 @@ _GLOBAL(swsusp_arch_resume)
#ifdef CONFIG_ALTIVEC
/* Stop pending alitvec streams and memory accesses */
BEGIN_FTR_SECTION
- DSSALL
+ PPC_DSSALL
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
#endif
sync
@@ -277,27 +324,41 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
mtibatu 3,r4
lwz r4,SL_IBAT3+4(r11)
mtibatl 3,r4
-#endif
-
BEGIN_MMU_FTR_SECTION
- li r4,0
+ lwz r4,SL_DBAT4(r11)
mtspr SPRN_DBAT4U,r4
+ lwz r4,SL_DBAT4+4(r11)
mtspr SPRN_DBAT4L,r4
+ lwz r4,SL_DBAT5(r11)
mtspr SPRN_DBAT5U,r4
+ lwz r4,SL_DBAT5+4(r11)
mtspr SPRN_DBAT5L,r4
+ lwz r4,SL_DBAT6(r11)
mtspr SPRN_DBAT6U,r4
+ lwz r4,SL_DBAT6+4(r11)
mtspr SPRN_DBAT6L,r4
+ lwz r4,SL_DBAT7(r11)
mtspr SPRN_DBAT7U,r4
+ lwz r4,SL_DBAT7+4(r11)
mtspr SPRN_DBAT7L,r4
+ lwz r4,SL_IBAT4(r11)
mtspr SPRN_IBAT4U,r4
+ lwz r4,SL_IBAT4+4(r11)
mtspr SPRN_IBAT4L,r4
+ lwz r4,SL_IBAT5(r11)
mtspr SPRN_IBAT5U,r4
+ lwz r4,SL_IBAT5+4(r11)
mtspr SPRN_IBAT5L,r4
+ lwz r4,SL_IBAT6(r11)
mtspr SPRN_IBAT6U,r4
+ lwz r4,SL_IBAT6+4(r11)
mtspr SPRN_IBAT6L,r4
+ lwz r4,SL_IBAT7(r11)
mtspr SPRN_IBAT7U,r4
+ lwz r4,SL_IBAT7+4(r11)
mtspr SPRN_IBAT7L,r4
END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
+#endif
/* Flush all TLBs */
lis r4,0x1000
@@ -336,15 +397,18 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
li r3,0
blr
+_ASM_NOKPROBE_SYMBOL(swsusp_arch_resume)
/* FIXME:This construct is actually not useful since we don't shut
* down the instruction MMU, we could just flip back MSR-DR on.
*/
-turn_on_mmu:
+SYM_FUNC_START_LOCAL(turn_on_mmu)
mflr r4
mtsrr0 r4
mtsrr1 r3
sync
isync
rfi
+_ASM_NOKPROBE_SYMBOL(turn_on_mmu)
+SYM_FUNC_END(turn_on_mmu)
diff --git a/arch/powerpc/kernel/swsusp_64.c b/arch/powerpc/kernel/swsusp_64.c
index 0e899e47c325..50fa8fc9ef95 100644
--- a/arch/powerpc/kernel/swsusp_64.c
+++ b/arch/powerpc/kernel/swsusp_64.c
@@ -1,15 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* PowerPC 64-bit swsusp implementation
*
* Copyright 2006 Johannes Berg <johannes@sipsolutions.net>
- *
- * GPLv2
*/
#include <asm/iommu.h>
#include <linux/irq.h>
#include <linux/sched.h>
#include <linux/interrupt.h>
+#include <linux/nmi.h>
+
+void do_after_copyback(void);
void do_after_copyback(void)
{
@@ -17,8 +19,3 @@ void do_after_copyback(void)
touch_softlockup_watchdog();
mb();
}
-
-void _iommu_save(void)
-{
- iommu_save();
-}
diff --git a/arch/powerpc/kernel/swsusp_booke.S b/arch/powerpc/kernel/swsusp_85xx.S
index 553c1405ee05..88cfdbd530f1 100644
--- a/arch/powerpc/kernel/swsusp_booke.S
+++ b/arch/powerpc/kernel/swsusp_85xx.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Based on swsusp_32.S, modified for FSL BookE by
* Anton Vorontsov <avorontsov@ru.mvista.com>
diff --git a/arch/powerpc/kernel/swsusp_asm64.S b/arch/powerpc/kernel/swsusp_asm64.S
index 988f38dced0f..f645652c2654 100644
--- a/arch/powerpc/kernel/swsusp_asm64.S
+++ b/arch/powerpc/kernel/swsusp_asm64.S
@@ -1,9 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* PowerPC 64-bit swsusp implementation
*
* Copyright 2006 Johannes Berg <johannes@sipsolutions.net>
- *
- * GPLv2
*/
#include <linux/threads.h>
@@ -13,6 +12,7 @@
#include <asm/thread_info.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
+#include <asm/feature-fixups.h>
/*
* Structure for storing CPU registers on the save area.
@@ -76,16 +76,10 @@
swsusp_save_area:
.space SL_SIZE
- .section ".toc","aw"
-swsusp_save_area_ptr:
- .tc swsusp_save_area[TC],swsusp_save_area
-restore_pblist_ptr:
- .tc restore_pblist[TC],restore_pblist
-
.section .text
.align 5
_GLOBAL(swsusp_arch_suspend)
- ld r11,swsusp_save_area_ptr@toc(r2)
+ LOAD_REG_ADDR(r11, swsusp_save_area)
SAVE_SPECIAL(LR)
SAVE_REGISTER(r1)
SAVE_SPECIAL(CR)
@@ -128,11 +122,10 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_LPAR)
* stack pointer on the stack like a real stackframe */
addi r1,r1,-128
- bl _iommu_save
bl swsusp_save
/* restore LR */
- ld r11,swsusp_save_area_ptr@toc(r2)
+ LOAD_REG_ADDR(r11, swsusp_save_area)
RESTORE_SPECIAL(LR)
addi r1,r1,128
@@ -142,11 +135,11 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_LPAR)
_GLOBAL(swsusp_arch_resume)
/* Stop pending alitvec streams and memory accesses */
BEGIN_FTR_SECTION
- DSSALL
+ PPC_DSSALL
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
sync
- ld r12,restore_pblist_ptr@toc(r2)
+ LOAD_REG_ADDR(r11, restore_pblist)
ld r12,0(r12)
cmpdi r12,0
@@ -179,7 +172,7 @@ nothing_to_copy:
sld r3, r3, r0
li r0, 0
1:
- dcbf r0,r3
+ dcbf 0,r3
addi r3,r3,0x20
bdnz 1b
@@ -188,7 +181,7 @@ nothing_to_copy:
tlbia
#endif
- ld r11,swsusp_save_area_ptr@toc(r2)
+ LOAD_REG_ADDR(r11, swsusp_save_area)
RESTORE_SPECIAL(CR)
@@ -261,12 +254,12 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_LPAR)
addi r1,r1,-128
#ifdef CONFIG_PPC_BOOK3S_64
- bl slb_flush_and_rebolt
+ bl slb_flush_and_restore_bolted
#endif
bl do_after_copyback
addi r1,r1,128
- ld r11,swsusp_save_area_ptr@toc(r2)
+ LOAD_REG_ADDR(r11, swsusp_save_area)
RESTORE_SPECIAL(LR)
li r3, 0
diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c
index 8a285876aef8..d451a8229223 100644
--- a/arch/powerpc/kernel/sys_ppc32.c
+++ b/arch/powerpc/kernel/sys_ppc32.c
@@ -1,17 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
- * sys_ppc32.c: Conversion between 32bit and 64bit native syscalls.
+ * sys_ppc32.c: 32-bit system calls with complex calling conventions.
*
* Copyright (C) 2001 IBM
* Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
* Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
*
- * These routines maintain argument size conversion between 32bit and 64bit
- * environment.
+ * 32-bit system calls with 64-bit arguments pass those in register pairs.
+ * This must be specially dealt with on 64-bit kernels. The compat_arg_u64_dual
+ * in generic compat syscalls is not always usable because the register
+ * pairing is constrained depending on preceding arguments.
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
+ * An analogous problem exists on 32-bit kernels with ARCH_HAS_SYSCALL_WRAPPER,
+ * the defined system call functions take the pt_regs as an argument, and there
+ * is a mapping macro which maps registers to arguments
+ * (SC_POWERPC_REGS_TO_ARGS) which also does not deal with these 64-bit
+ * arguments.
+ *
+ * This file contains these system calls.
*/
#include <linux/kernel.h>
@@ -29,7 +35,6 @@
#include <linux/poll.h>
#include <linux/personality.h>
#include <linux/stat.h>
-#include <linux/mman.h>
#include <linux/in.h>
#include <linux/syscalls.h>
#include <linux/unistd.h>
@@ -44,7 +49,7 @@
#include <asm/ptrace.h>
#include <asm/types.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/unistd.h>
#include <asm/time.h>
#include <asm/mmu_context.h>
@@ -52,77 +57,79 @@
#include <asm/syscalls.h>
#include <asm/switch_to.h>
+#ifdef CONFIG_PPC32
+#define PPC32_SYSCALL_DEFINE4 SYSCALL_DEFINE4
+#define PPC32_SYSCALL_DEFINE5 SYSCALL_DEFINE5
+#define PPC32_SYSCALL_DEFINE6 SYSCALL_DEFINE6
+#else
+#define PPC32_SYSCALL_DEFINE4 COMPAT_SYSCALL_DEFINE4
+#define PPC32_SYSCALL_DEFINE5 COMPAT_SYSCALL_DEFINE5
+#define PPC32_SYSCALL_DEFINE6 COMPAT_SYSCALL_DEFINE6
+#endif
-asmlinkage long ppc32_select(u32 n, compat_ulong_t __user *inp,
- compat_ulong_t __user *outp, compat_ulong_t __user *exp,
- compat_uptr_t tvp_x)
+PPC32_SYSCALL_DEFINE6(ppc_pread64,
+ unsigned int, fd,
+ char __user *, ubuf, compat_size_t, count,
+ u32, reg6, u32, pos1, u32, pos2)
{
- /* sign extend n */
- return compat_sys_select((int)n, inp, outp, exp, compat_ptr(tvp_x));
+ return ksys_pread64(fd, ubuf, count, merge_64(pos1, pos2));
}
-unsigned long compat_sys_mmap2(unsigned long addr, size_t len,
- unsigned long prot, unsigned long flags,
- unsigned long fd, unsigned long pgoff)
+PPC32_SYSCALL_DEFINE6(ppc_pwrite64,
+ unsigned int, fd,
+ const char __user *, ubuf, compat_size_t, count,
+ u32, reg6, u32, pos1, u32, pos2)
{
- /* This should remain 12 even if PAGE_SIZE changes */
- return sys_mmap(addr, len, prot, flags, fd, pgoff << 12);
+ return ksys_pwrite64(fd, ubuf, count, merge_64(pos1, pos2));
}
-/*
- * long long munging:
- * The 32 bit ABI passes long longs in an odd even register pair.
- */
-
-compat_ssize_t compat_sys_pread64(unsigned int fd, char __user *ubuf, compat_size_t count,
- u32 reg6, u32 poshi, u32 poslo)
+PPC32_SYSCALL_DEFINE5(ppc_readahead,
+ int, fd, u32, r4,
+ u32, offset1, u32, offset2, u32, count)
{
- return sys_pread64(fd, ubuf, count, ((loff_t)poshi << 32) | poslo);
+ return ksys_readahead(fd, merge_64(offset1, offset2), count);
}
-compat_ssize_t compat_sys_pwrite64(unsigned int fd, const char __user *ubuf, compat_size_t count,
- u32 reg6, u32 poshi, u32 poslo)
+PPC32_SYSCALL_DEFINE4(ppc_truncate64,
+ const char __user *, path, u32, reg4,
+ unsigned long, len1, unsigned long, len2)
{
- return sys_pwrite64(fd, ubuf, count, ((loff_t)poshi << 32) | poslo);
+ return ksys_truncate(path, merge_64(len1, len2));
}
-compat_ssize_t compat_sys_readahead(int fd, u32 r4, u32 offhi, u32 offlo, u32 count)
+PPC32_SYSCALL_DEFINE4(ppc_ftruncate64,
+ unsigned int, fd, u32, reg4,
+ unsigned long, len1, unsigned long, len2)
{
- return sys_readahead(fd, ((loff_t)offhi << 32) | offlo, count);
+ return ksys_ftruncate(fd, merge_64(len1, len2));
}
-asmlinkage int compat_sys_truncate64(const char __user * path, u32 reg4,
- unsigned long high, unsigned long low)
+PPC32_SYSCALL_DEFINE6(ppc32_fadvise64,
+ int, fd, u32, unused, u32, offset1, u32, offset2,
+ size_t, len, int, advice)
{
- return sys_truncate(path, (high << 32) | low);
+ return ksys_fadvise64_64(fd, merge_64(offset1, offset2), len,
+ advice);
}
-asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offhi, u32 offlo,
- u32 lenhi, u32 lenlo)
+PPC32_SYSCALL_DEFINE6(ppc_sync_file_range2,
+ int, fd, unsigned int, flags,
+ unsigned int, offset1, unsigned int, offset2,
+ unsigned int, nbytes1, unsigned int, nbytes2)
{
- return sys_fallocate(fd, mode, ((loff_t)offhi << 32) | offlo,
- ((loff_t)lenhi << 32) | lenlo);
-}
+ loff_t offset = merge_64(offset1, offset2);
+ loff_t nbytes = merge_64(nbytes1, nbytes2);
-asmlinkage int compat_sys_ftruncate64(unsigned int fd, u32 reg4, unsigned long high,
- unsigned long low)
-{
- return sys_ftruncate(fd, (high << 32) | low);
+ return ksys_sync_file_range(fd, offset, nbytes, flags);
}
-long ppc32_fadvise64(int fd, u32 unused, u32 offset_high, u32 offset_low,
- size_t len, int advice)
+#ifdef CONFIG_PPC32
+SYSCALL_DEFINE6(ppc_fallocate,
+ int, fd, int, mode,
+ u32, offset1, u32, offset2, u32, len1, u32, len2)
{
- return sys_fadvise64(fd, (u64)offset_high << 32 | offset_low, len,
- advice);
-}
-
-asmlinkage long compat_sys_sync_file_range2(int fd, unsigned int flags,
- unsigned offset_hi, unsigned offset_lo,
- unsigned nbytes_hi, unsigned nbytes_lo)
-{
- loff_t offset = ((loff_t)offset_hi << 32) | offset_lo;
- loff_t nbytes = ((loff_t)nbytes_hi << 32) | nbytes_lo;
-
- return sys_sync_file_range(fd, offset, nbytes, flags);
+ return ksys_fallocate(fd, mode,
+ merge_64(offset1, offset2),
+ merge_64(len1, len2));
}
+#endif
diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c
new file mode 100644
index 000000000000..be159ad4b77b
--- /dev/null
+++ b/arch/powerpc/kernel/syscall.c
@@ -0,0 +1,189 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/compat.h>
+#include <linux/context_tracking.h>
+#include <linux/randomize_kstack.h>
+
+#include <asm/interrupt.h>
+#include <asm/kup.h>
+#include <asm/syscall.h>
+#include <asm/time.h>
+#include <asm/tm.h>
+#include <asm/unistd.h>
+
+
+/* Has to run notrace because it is entered not completely "reconciled" */
+notrace long system_call_exception(struct pt_regs *regs, unsigned long r0)
+{
+ long ret;
+ syscall_fn f;
+
+ kuap_lock();
+
+ add_random_kstack_offset();
+
+ if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG))
+ BUG_ON(irq_soft_mask_return() != IRQS_ALL_DISABLED);
+
+ trace_hardirqs_off(); /* finish reconciling */
+
+ CT_WARN_ON(ct_state() == CT_STATE_KERNEL);
+ user_exit_irqoff();
+
+ BUG_ON(regs_is_unrecoverable(regs));
+ BUG_ON(!user_mode(regs));
+ BUG_ON(arch_irq_disabled_regs(regs));
+
+#ifdef CONFIG_PPC_PKEY
+ if (mmu_has_feature(MMU_FTR_PKEY)) {
+ unsigned long amr, iamr;
+ bool flush_needed = false;
+ /*
+ * When entering from userspace we mostly have the AMR/IAMR
+ * different from kernel default values. Hence don't compare.
+ */
+ amr = mfspr(SPRN_AMR);
+ iamr = mfspr(SPRN_IAMR);
+ regs->amr = amr;
+ regs->iamr = iamr;
+ if (mmu_has_feature(MMU_FTR_KUAP)) {
+ mtspr(SPRN_AMR, AMR_KUAP_BLOCKED);
+ flush_needed = true;
+ }
+ if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) {
+ mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED);
+ flush_needed = true;
+ }
+ if (flush_needed)
+ isync();
+ } else
+#endif
+ kuap_assert_locked();
+
+ booke_restore_dbcr0();
+
+ account_cpu_user_entry();
+
+ account_stolen_time();
+
+ /*
+ * This is not required for the syscall exit path, but makes the
+ * stack frame look nicer. If this was initialised in the first stack
+ * frame, or if the unwinder was taught the first stack frame always
+ * returns to user with IRQS_ENABLED, this store could be avoided!
+ */
+ irq_soft_mask_regs_set_state(regs, IRQS_ENABLED);
+
+ /*
+ * If system call is called with TM active, set _TIF_RESTOREALL to
+ * prevent RFSCV being used to return to userspace, because POWER9
+ * TM implementation has problems with this instruction returning to
+ * transactional state. Final register values are not relevant because
+ * the transaction will be aborted upon return anyway. Or in the case
+ * of unsupported_scv SIGILL fault, the return state does not much
+ * matter because it's an edge case.
+ */
+ if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
+ unlikely(MSR_TM_TRANSACTIONAL(regs->msr)))
+ set_bits(_TIF_RESTOREALL, &current_thread_info()->flags);
+
+ /*
+ * If the system call was made with a transaction active, doom it and
+ * return without performing the system call. Unless it was an
+ * unsupported scv vector, in which case it's treated like an illegal
+ * instruction.
+ */
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ if (unlikely(MSR_TM_TRANSACTIONAL(regs->msr)) &&
+ !trap_is_unsupported_scv(regs)) {
+ /* Enable TM in the kernel, and disable EE (for scv) */
+ hard_irq_disable();
+ mtmsr(mfmsr() | MSR_TM);
+
+ /* tabort, this dooms the transaction, nothing else */
+ asm volatile(".long 0x7c00071d | ((%0) << 16)"
+ :: "r"(TM_CAUSE_SYSCALL|TM_CAUSE_PERSISTENT));
+
+ /*
+ * Userspace will never see the return value. Execution will
+ * resume after the tbegin. of the aborted transaction with the
+ * checkpointed register state. A context switch could occur
+ * or signal delivered to the process before resuming the
+ * doomed transaction context, but that should all be handled
+ * as expected.
+ */
+ return -ENOSYS;
+ }
+#endif // CONFIG_PPC_TRANSACTIONAL_MEM
+
+ local_irq_enable();
+
+ if (unlikely(read_thread_flags() & _TIF_SYSCALL_DOTRACE)) {
+ if (unlikely(trap_is_unsupported_scv(regs))) {
+ /* Unsupported scv vector */
+ _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
+ return regs->gpr[3];
+ }
+ /*
+ * We use the return value of do_syscall_trace_enter() as the
+ * syscall number. If the syscall was rejected for any reason
+ * do_syscall_trace_enter() returns an invalid syscall number
+ * and the test against NR_syscalls will fail and the return
+ * value to be used is in regs->gpr[3].
+ */
+ r0 = do_syscall_trace_enter(regs);
+ if (unlikely(r0 >= NR_syscalls))
+ return regs->gpr[3];
+
+ } else if (unlikely(r0 >= NR_syscalls)) {
+ if (unlikely(trap_is_unsupported_scv(regs))) {
+ /* Unsupported scv vector */
+ _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
+ return regs->gpr[3];
+ }
+ return -ENOSYS;
+ }
+
+ /* May be faster to do array_index_nospec? */
+ barrier_nospec();
+
+#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
+ // No COMPAT if we have SYSCALL_WRAPPER, see Kconfig
+ f = (void *)sys_call_table[r0];
+ ret = f(regs);
+#else
+ if (unlikely(is_compat_task())) {
+ unsigned long r3, r4, r5, r6, r7, r8;
+
+ f = (void *)compat_sys_call_table[r0];
+
+ r3 = regs->gpr[3] & 0x00000000ffffffffULL;
+ r4 = regs->gpr[4] & 0x00000000ffffffffULL;
+ r5 = regs->gpr[5] & 0x00000000ffffffffULL;
+ r6 = regs->gpr[6] & 0x00000000ffffffffULL;
+ r7 = regs->gpr[7] & 0x00000000ffffffffULL;
+ r8 = regs->gpr[8] & 0x00000000ffffffffULL;
+
+ ret = f(r3, r4, r5, r6, r7, r8);
+ } else {
+ f = (void *)sys_call_table[r0];
+
+ ret = f(regs->gpr[3], regs->gpr[4], regs->gpr[5],
+ regs->gpr[6], regs->gpr[7], regs->gpr[8]);
+ }
+#endif
+
+ /*
+ * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(),
+ * so the maximum stack offset is 1k bytes (10 bits).
+ *
+ * The actual entropy will be further reduced by the compiler when
+ * applying stack alignment constraints: the powerpc architecture
+ * may have two kinds of stack alignment (16-bytes and 8-bytes).
+ *
+ * So the resulting 6 or 7 bits of entropy is seen in SP[9:4] or SP[9:3].
+ */
+ choose_random_kstack_offset(mftb());
+
+ return ret;
+}
diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c
index cd9be9aa016d..68ebb23a5af4 100644
--- a/arch/powerpc/kernel/syscalls.c
+++ b/arch/powerpc/kernel/syscalls.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Implementation of various system calls for Linux/PowerPC
*
@@ -11,12 +12,6 @@
* This file contains various random system calls that
* have a non-standard calling sequence on the Linux/PPC
* platform.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
#include <linux/errno.h>
@@ -36,102 +31,97 @@
#include <linux/file.h>
#include <linux/personality.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/syscalls.h>
#include <asm/time.h>
#include <asm/unistd.h>
-static inline unsigned long do_mmap2(unsigned long addr, size_t len,
- unsigned long prot, unsigned long flags,
- unsigned long fd, unsigned long off, int shift)
+static long do_mmap2(unsigned long addr, size_t len,
+ unsigned long prot, unsigned long flags,
+ unsigned long fd, unsigned long off, int shift)
{
- unsigned long ret = -EINVAL;
-
- if (!arch_validate_prot(prot))
- goto out;
+ if (!arch_validate_prot(prot, addr))
+ return -EINVAL;
- if (shift) {
- if (off & ((1 << shift) - 1))
- goto out;
- off >>= shift;
- }
+ if (!IS_ALIGNED(off, 1 << shift))
+ return -EINVAL;
- ret = sys_mmap_pgoff(addr, len, prot, flags, fd, off);
-out:
- return ret;
+ return ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> shift);
}
-unsigned long sys_mmap2(unsigned long addr, size_t len,
- unsigned long prot, unsigned long flags,
- unsigned long fd, unsigned long pgoff)
+SYSCALL_DEFINE6(mmap2, unsigned long, addr, size_t, len,
+ unsigned long, prot, unsigned long, flags,
+ unsigned long, fd, unsigned long, pgoff)
{
return do_mmap2(addr, len, prot, flags, fd, pgoff, PAGE_SHIFT-12);
}
-unsigned long sys_mmap(unsigned long addr, size_t len,
- unsigned long prot, unsigned long flags,
- unsigned long fd, off_t offset)
+#ifdef CONFIG_COMPAT
+COMPAT_SYSCALL_DEFINE6(mmap2,
+ unsigned long, addr, size_t, len,
+ unsigned long, prot, unsigned long, flags,
+ unsigned long, fd, unsigned long, off_4k)
{
- return do_mmap2(addr, len, prot, flags, fd, offset, PAGE_SHIFT);
+ return do_mmap2(addr, len, prot, flags, fd, off_4k, PAGE_SHIFT-12);
}
+#endif
-#ifdef CONFIG_PPC32
-/*
- * Due to some executables calling the wrong select we sometimes
- * get wrong args. This determines how the args are being passed
- * (a single ptr to them all args passed) then calls
- * sys_select() with the appropriate args. -- Cort
- */
-int
-ppc_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timeval __user *tvp)
+SYSCALL_DEFINE6(mmap, unsigned long, addr, size_t, len,
+ unsigned long, prot, unsigned long, flags,
+ unsigned long, fd, off_t, offset)
{
- if ( (unsigned long)n >= 4096 )
- {
- unsigned long __user *buffer = (unsigned long __user *)n;
- if (!access_ok(VERIFY_READ, buffer, 5*sizeof(unsigned long))
- || __get_user(n, buffer)
- || __get_user(inp, ((fd_set __user * __user *)(buffer+1)))
- || __get_user(outp, ((fd_set __user * __user *)(buffer+2)))
- || __get_user(exp, ((fd_set __user * __user *)(buffer+3)))
- || __get_user(tvp, ((struct timeval __user * __user *)(buffer+4))))
- return -EFAULT;
- }
- return sys_select(n, inp, outp, exp, tvp);
+ return do_mmap2(addr, len, prot, flags, fd, offset, PAGE_SHIFT);
}
-#endif
#ifdef CONFIG_PPC64
-long ppc64_personality(unsigned long personality)
+static long do_ppc64_personality(unsigned long personality)
{
long ret;
if (personality(current->personality) == PER_LINUX32
&& personality(personality) == PER_LINUX)
personality = (personality & ~PER_MASK) | PER_LINUX32;
- ret = sys_personality(personality);
+ ret = ksys_personality(personality);
if (personality(ret) == PER_LINUX32)
ret = (ret & ~PER_MASK) | PER_LINUX;
return ret;
}
-#endif
-long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low,
- u32 len_high, u32 len_low)
+SYSCALL_DEFINE1(ppc64_personality, unsigned long, personality)
+{
+ return do_ppc64_personality(personality);
+}
+
+#ifdef CONFIG_COMPAT
+COMPAT_SYSCALL_DEFINE1(ppc64_personality, unsigned long, personality)
{
- return sys_fadvise64(fd, (u64)offset_high << 32 | offset_low,
- (u64)len_high << 32 | len_low, advice);
+ return do_ppc64_personality(personality);
}
+#endif /* CONFIG_COMPAT */
+#endif /* CONFIG_PPC64 */
-void do_show_syscall(unsigned long r3, unsigned long r4, unsigned long r5,
- unsigned long r6, unsigned long r7, unsigned long r8,
- struct pt_regs *regs)
+SYSCALL_DEFINE6(ppc_fadvise64_64,
+ int, fd, int, advice, u32, offset_high, u32, offset_low,
+ u32, len_high, u32, len_low)
{
- printk("syscall %ld(%lx, %lx, %lx, %lx, %lx, %lx) regs=%p current=%p"
- " cpu=%d\n", regs->gpr[0], r3, r4, r5, r6, r7, r8, regs,
- current, smp_processor_id());
+ return ksys_fadvise64_64(fd, merge_64(offset_high, offset_low),
+ merge_64(len_high, len_low), advice);
}
-void do_show_syscall_exit(unsigned long r3)
+SYSCALL_DEFINE0(switch_endian)
{
- printk(" -> %lx, current=%p cpu=%d\n", r3, current, smp_processor_id());
+ struct thread_info *ti;
+
+ regs_set_return_msr(current->thread.regs,
+ current->thread.regs->msr ^ MSR_LE);
+
+ /*
+ * Set TIF_RESTOREALL so that r3 isn't clobbered on return to
+ * userspace. That also has the effect of restoring the non-volatile
+ * GPRs, so we saved them on the way in here.
+ */
+ ti = current_thread_info();
+ ti->flags |= _TIF_RESTOREALL;
+
+ return 0;
}
diff --git a/arch/powerpc/kernel/syscalls/Makefile b/arch/powerpc/kernel/syscalls/Makefile
new file mode 100644
index 000000000000..9d7bd81510b8
--- /dev/null
+++ b/arch/powerpc/kernel/syscalls/Makefile
@@ -0,0 +1,48 @@
+# SPDX-License-Identifier: GPL-2.0
+kapi := arch/$(SRCARCH)/include/generated/asm
+uapi := arch/$(SRCARCH)/include/generated/uapi/asm
+
+$(shell mkdir -p $(uapi) $(kapi))
+
+syscall := $(src)/syscall.tbl
+syshdr := $(srctree)/scripts/syscallhdr.sh
+systbl := $(srctree)/scripts/syscalltbl.sh
+
+quiet_cmd_syshdr = SYSHDR $@
+ cmd_syshdr = $(CONFIG_SHELL) $(syshdr) --emit-nr --abis $(abis) $< $@
+
+quiet_cmd_systbl = SYSTBL $@
+ cmd_systbl = $(CONFIG_SHELL) $(systbl) --abis $(abis) $< $@
+
+$(uapi)/unistd_32.h: abis := common,nospu,32
+$(uapi)/unistd_32.h: $(syscall) $(syshdr) FORCE
+ $(call if_changed,syshdr)
+
+$(uapi)/unistd_64.h: abis := common,nospu,64
+$(uapi)/unistd_64.h: $(syscall) $(syshdr) FORCE
+ $(call if_changed,syshdr)
+
+$(kapi)/syscall_table_32.h: abis := common,nospu,32
+$(kapi)/syscall_table_32.h: $(syscall) $(systbl) FORCE
+ $(call if_changed,systbl)
+
+$(kapi)/syscall_table_64.h: abis := common,nospu,64
+$(kapi)/syscall_table_64.h: $(syscall) $(systbl) FORCE
+ $(call if_changed,systbl)
+
+$(kapi)/syscall_table_spu.h: abis := common,spu
+$(kapi)/syscall_table_spu.h: $(syscall) $(systbl) FORCE
+ $(call if_changed,systbl)
+
+uapisyshdr-y += unistd_32.h unistd_64.h
+kapisyshdr-y += syscall_table_32.h \
+ syscall_table_64.h \
+ syscall_table_spu.h
+
+uapisyshdr-y := $(addprefix $(uapi)/, $(uapisyshdr-y))
+kapisyshdr-y := $(addprefix $(kapi)/, $(kapisyshdr-y))
+targets += $(addprefix ../../../../, $(uapisyshdr-y) $(kapisyshdr-y))
+
+PHONY += all
+all: $(uapisyshdr-y) $(kapisyshdr-y)
+ @:
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
new file mode 100644
index 000000000000..b453e80dfc00
--- /dev/null
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -0,0 +1,562 @@
+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+#
+# system call numbers and entry vectors for powerpc
+#
+# The format is:
+# <number> <abi> <name> <entry point> <compat entry point>
+#
+# The <abi> can be common, spu, nospu, 64, or 32 for this file.
+#
+0 nospu restart_syscall sys_restart_syscall
+1 nospu exit sys_exit
+2 nospu fork sys_fork
+3 common read sys_read
+4 common write sys_write
+5 common open sys_open compat_sys_open
+6 common close sys_close
+7 common waitpid sys_waitpid
+8 common creat sys_creat
+9 common link sys_link
+10 common unlink sys_unlink
+11 nospu execve sys_execve compat_sys_execve
+12 common chdir sys_chdir
+13 32 time sys_time32
+13 64 time sys_time
+13 spu time sys_time
+14 common mknod sys_mknod
+15 common chmod sys_chmod
+16 common lchown sys_lchown
+17 common break sys_ni_syscall
+18 32 oldstat sys_stat sys_ni_syscall
+18 64 oldstat sys_ni_syscall
+18 spu oldstat sys_ni_syscall
+19 common lseek sys_lseek compat_sys_lseek
+20 common getpid sys_getpid
+21 nospu mount sys_mount
+22 32 umount sys_oldumount
+22 64 umount sys_ni_syscall
+22 spu umount sys_ni_syscall
+23 common setuid sys_setuid
+24 common getuid sys_getuid
+25 32 stime sys_stime32
+25 64 stime sys_stime
+25 spu stime sys_stime
+26 nospu ptrace sys_ptrace compat_sys_ptrace
+27 common alarm sys_alarm
+28 32 oldfstat sys_fstat sys_ni_syscall
+28 64 oldfstat sys_ni_syscall
+28 spu oldfstat sys_ni_syscall
+29 nospu pause sys_pause
+30 32 utime sys_utime32
+30 64 utime sys_utime
+31 common stty sys_ni_syscall
+32 common gtty sys_ni_syscall
+33 common access sys_access
+34 common nice sys_nice
+35 common ftime sys_ni_syscall
+36 common sync sys_sync
+37 common kill sys_kill
+38 common rename sys_rename
+39 common mkdir sys_mkdir
+40 common rmdir sys_rmdir
+41 common dup sys_dup
+42 common pipe sys_pipe
+43 common times sys_times compat_sys_times
+44 common prof sys_ni_syscall
+45 common brk sys_brk
+46 common setgid sys_setgid
+47 common getgid sys_getgid
+48 nospu signal sys_signal
+49 common geteuid sys_geteuid
+50 common getegid sys_getegid
+51 nospu acct sys_acct
+52 nospu umount2 sys_umount
+53 common lock sys_ni_syscall
+54 common ioctl sys_ioctl compat_sys_ioctl
+55 common fcntl sys_fcntl compat_sys_fcntl
+56 common mpx sys_ni_syscall
+57 common setpgid sys_setpgid
+58 common ulimit sys_ni_syscall
+59 32 oldolduname sys_olduname
+59 64 oldolduname sys_ni_syscall
+59 spu oldolduname sys_ni_syscall
+60 common umask sys_umask
+61 common chroot sys_chroot
+62 nospu ustat sys_ustat compat_sys_ustat
+63 common dup2 sys_dup2
+64 common getppid sys_getppid
+65 common getpgrp sys_getpgrp
+66 common setsid sys_setsid
+67 32 sigaction sys_sigaction compat_sys_sigaction
+67 64 sigaction sys_ni_syscall
+67 spu sigaction sys_ni_syscall
+68 common sgetmask sys_sgetmask
+69 common ssetmask sys_ssetmask
+70 common setreuid sys_setreuid
+71 common setregid sys_setregid
+72 32 sigsuspend sys_sigsuspend
+72 64 sigsuspend sys_ni_syscall
+72 spu sigsuspend sys_ni_syscall
+73 32 sigpending sys_sigpending compat_sys_sigpending
+73 64 sigpending sys_ni_syscall
+73 spu sigpending sys_ni_syscall
+74 common sethostname sys_sethostname
+75 common setrlimit sys_setrlimit compat_sys_setrlimit
+76 32 getrlimit sys_old_getrlimit compat_sys_old_getrlimit
+76 64 getrlimit sys_ni_syscall
+76 spu getrlimit sys_ni_syscall
+77 common getrusage sys_getrusage compat_sys_getrusage
+78 common gettimeofday sys_gettimeofday compat_sys_gettimeofday
+79 common settimeofday sys_settimeofday compat_sys_settimeofday
+80 common getgroups sys_getgroups
+81 common setgroups sys_setgroups
+82 32 select sys_old_select compat_sys_old_select
+82 64 select sys_ni_syscall
+82 spu select sys_ni_syscall
+83 common symlink sys_symlink
+84 32 oldlstat sys_lstat sys_ni_syscall
+84 64 oldlstat sys_ni_syscall
+84 spu oldlstat sys_ni_syscall
+85 common readlink sys_readlink
+86 nospu uselib sys_uselib
+87 nospu swapon sys_swapon
+88 nospu reboot sys_reboot
+89 32 readdir sys_old_readdir compat_sys_old_readdir
+89 64 readdir sys_ni_syscall
+89 spu readdir sys_ni_syscall
+90 common mmap sys_mmap
+91 common munmap sys_munmap
+92 common truncate sys_truncate compat_sys_truncate
+93 common ftruncate sys_ftruncate compat_sys_ftruncate
+94 common fchmod sys_fchmod
+95 common fchown sys_fchown
+96 common getpriority sys_getpriority
+97 common setpriority sys_setpriority
+98 common profil sys_ni_syscall
+99 nospu statfs sys_statfs compat_sys_statfs
+100 nospu fstatfs sys_fstatfs compat_sys_fstatfs
+101 common ioperm sys_ni_syscall
+102 common socketcall sys_socketcall compat_sys_socketcall
+103 common syslog sys_syslog
+104 common setitimer sys_setitimer compat_sys_setitimer
+105 common getitimer sys_getitimer compat_sys_getitimer
+106 common stat sys_newstat compat_sys_newstat
+107 common lstat sys_newlstat compat_sys_newlstat
+108 common fstat sys_newfstat compat_sys_newfstat
+109 32 olduname sys_uname
+109 64 olduname sys_ni_syscall
+109 spu olduname sys_ni_syscall
+110 common iopl sys_ni_syscall
+111 common vhangup sys_vhangup
+112 common idle sys_ni_syscall
+113 common vm86 sys_ni_syscall
+114 common wait4 sys_wait4 compat_sys_wait4
+115 nospu swapoff sys_swapoff
+116 common sysinfo sys_sysinfo compat_sys_sysinfo
+117 nospu ipc sys_ipc compat_sys_ipc
+118 common fsync sys_fsync
+119 32 sigreturn sys_sigreturn compat_sys_sigreturn
+119 64 sigreturn sys_ni_syscall
+119 spu sigreturn sys_ni_syscall
+120 nospu clone sys_clone
+121 common setdomainname sys_setdomainname
+122 common uname sys_newuname
+123 common modify_ldt sys_ni_syscall
+124 32 adjtimex sys_adjtimex_time32
+124 64 adjtimex sys_adjtimex
+124 spu adjtimex sys_adjtimex
+125 common mprotect sys_mprotect
+126 32 sigprocmask sys_sigprocmask compat_sys_sigprocmask
+126 64 sigprocmask sys_ni_syscall
+126 spu sigprocmask sys_ni_syscall
+127 common create_module sys_ni_syscall
+128 nospu init_module sys_init_module
+129 nospu delete_module sys_delete_module
+130 common get_kernel_syms sys_ni_syscall
+131 nospu quotactl sys_quotactl
+132 common getpgid sys_getpgid
+133 common fchdir sys_fchdir
+134 common bdflush sys_ni_syscall
+135 common sysfs sys_sysfs
+136 32 personality sys_personality compat_sys_ppc64_personality
+136 64 personality sys_ppc64_personality
+136 spu personality sys_ppc64_personality
+137 common afs_syscall sys_ni_syscall
+138 common setfsuid sys_setfsuid
+139 common setfsgid sys_setfsgid
+140 common _llseek sys_llseek
+141 common getdents sys_getdents compat_sys_getdents
+142 common _newselect sys_select compat_sys_select
+143 common flock sys_flock
+144 common msync sys_msync
+145 common readv sys_readv
+146 common writev sys_writev
+147 common getsid sys_getsid
+148 common fdatasync sys_fdatasync
+149 nospu _sysctl sys_ni_syscall
+150 common mlock sys_mlock
+151 common munlock sys_munlock
+152 common mlockall sys_mlockall
+153 common munlockall sys_munlockall
+154 common sched_setparam sys_sched_setparam
+155 common sched_getparam sys_sched_getparam
+156 common sched_setscheduler sys_sched_setscheduler
+157 common sched_getscheduler sys_sched_getscheduler
+158 common sched_yield sys_sched_yield
+159 common sched_get_priority_max sys_sched_get_priority_max
+160 common sched_get_priority_min sys_sched_get_priority_min
+161 32 sched_rr_get_interval sys_sched_rr_get_interval_time32
+161 64 sched_rr_get_interval sys_sched_rr_get_interval
+161 spu sched_rr_get_interval sys_sched_rr_get_interval
+162 32 nanosleep sys_nanosleep_time32
+162 64 nanosleep sys_nanosleep
+162 spu nanosleep sys_nanosleep
+163 common mremap sys_mremap
+164 common setresuid sys_setresuid
+165 common getresuid sys_getresuid
+166 common query_module sys_ni_syscall
+167 common poll sys_poll
+168 common nfsservctl sys_ni_syscall
+169 common setresgid sys_setresgid
+170 common getresgid sys_getresgid
+171 common prctl sys_prctl
+172 nospu rt_sigreturn sys_rt_sigreturn compat_sys_rt_sigreturn
+173 nospu rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction
+174 nospu rt_sigprocmask sys_rt_sigprocmask compat_sys_rt_sigprocmask
+175 nospu rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending
+176 32 rt_sigtimedwait sys_rt_sigtimedwait_time32 compat_sys_rt_sigtimedwait_time32
+176 64 rt_sigtimedwait sys_rt_sigtimedwait
+177 nospu rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo
+178 nospu rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend
+179 32 pread64 sys_ppc_pread64 compat_sys_ppc_pread64
+179 64 pread64 sys_pread64
+179 spu pread64 sys_pread64
+180 32 pwrite64 sys_ppc_pwrite64 compat_sys_ppc_pwrite64
+180 64 pwrite64 sys_pwrite64
+180 spu pwrite64 sys_pwrite64
+181 common chown sys_chown
+182 common getcwd sys_getcwd
+183 common capget sys_capget
+184 common capset sys_capset
+185 nospu sigaltstack sys_sigaltstack compat_sys_sigaltstack
+186 32 sendfile sys_sendfile compat_sys_sendfile
+186 64 sendfile sys_sendfile64
+186 spu sendfile sys_sendfile64
+187 common getpmsg sys_ni_syscall
+188 common putpmsg sys_ni_syscall
+189 nospu vfork sys_vfork
+190 common ugetrlimit sys_getrlimit compat_sys_getrlimit
+191 32 readahead sys_ppc_readahead compat_sys_ppc_readahead
+191 64 readahead sys_readahead
+191 spu readahead sys_readahead
+192 32 mmap2 sys_mmap2 compat_sys_mmap2
+193 32 truncate64 sys_ppc_truncate64 compat_sys_ppc_truncate64
+194 32 ftruncate64 sys_ppc_ftruncate64 compat_sys_ppc_ftruncate64
+195 32 stat64 sys_stat64
+196 32 lstat64 sys_lstat64
+197 32 fstat64 sys_fstat64
+198 nospu pciconfig_read sys_pciconfig_read
+199 nospu pciconfig_write sys_pciconfig_write
+200 nospu pciconfig_iobase sys_pciconfig_iobase
+201 common multiplexer sys_ni_syscall
+202 common getdents64 sys_getdents64
+203 common pivot_root sys_pivot_root
+204 32 fcntl64 sys_fcntl64 compat_sys_fcntl64
+205 common madvise sys_madvise
+206 common mincore sys_mincore
+207 common gettid sys_gettid
+208 common tkill sys_tkill
+209 common setxattr sys_setxattr
+210 common lsetxattr sys_lsetxattr
+211 common fsetxattr sys_fsetxattr
+212 common getxattr sys_getxattr
+213 common lgetxattr sys_lgetxattr
+214 common fgetxattr sys_fgetxattr
+215 common listxattr sys_listxattr
+216 common llistxattr sys_llistxattr
+217 common flistxattr sys_flistxattr
+218 common removexattr sys_removexattr
+219 common lremovexattr sys_lremovexattr
+220 common fremovexattr sys_fremovexattr
+221 32 futex sys_futex_time32
+221 64 futex sys_futex
+221 spu futex sys_futex
+222 common sched_setaffinity sys_sched_setaffinity compat_sys_sched_setaffinity
+223 common sched_getaffinity sys_sched_getaffinity compat_sys_sched_getaffinity
+# 224 unused
+225 common tuxcall sys_ni_syscall
+226 32 sendfile64 sys_sendfile64 compat_sys_sendfile64
+227 common io_setup sys_io_setup compat_sys_io_setup
+228 common io_destroy sys_io_destroy
+229 32 io_getevents sys_io_getevents_time32
+229 64 io_getevents sys_io_getevents
+229 spu io_getevents sys_io_getevents
+230 common io_submit sys_io_submit compat_sys_io_submit
+231 common io_cancel sys_io_cancel
+232 nospu set_tid_address sys_set_tid_address
+233 32 fadvise64 sys_ppc32_fadvise64 compat_sys_ppc32_fadvise64
+233 64 fadvise64 sys_fadvise64
+233 spu fadvise64 sys_fadvise64
+234 nospu exit_group sys_exit_group
+235 nospu lookup_dcookie sys_ni_syscall
+236 common epoll_create sys_epoll_create
+237 common epoll_ctl sys_epoll_ctl
+238 common epoll_wait sys_epoll_wait
+239 common remap_file_pages sys_remap_file_pages
+240 common timer_create sys_timer_create compat_sys_timer_create
+241 32 timer_settime sys_timer_settime32
+241 64 timer_settime sys_timer_settime
+241 spu timer_settime sys_timer_settime
+242 32 timer_gettime sys_timer_gettime32
+242 64 timer_gettime sys_timer_gettime
+242 spu timer_gettime sys_timer_gettime
+243 common timer_getoverrun sys_timer_getoverrun
+244 common timer_delete sys_timer_delete
+245 32 clock_settime sys_clock_settime32
+245 64 clock_settime sys_clock_settime
+245 spu clock_settime sys_clock_settime
+246 32 clock_gettime sys_clock_gettime32
+246 64 clock_gettime sys_clock_gettime
+246 spu clock_gettime sys_clock_gettime
+247 32 clock_getres sys_clock_getres_time32
+247 64 clock_getres sys_clock_getres
+247 spu clock_getres sys_clock_getres
+248 32 clock_nanosleep sys_clock_nanosleep_time32
+248 64 clock_nanosleep sys_clock_nanosleep
+248 spu clock_nanosleep sys_clock_nanosleep
+249 nospu swapcontext sys_swapcontext compat_sys_swapcontext
+250 common tgkill sys_tgkill
+251 32 utimes sys_utimes_time32
+251 64 utimes sys_utimes
+251 spu utimes sys_utimes
+252 common statfs64 sys_statfs64 compat_sys_statfs64
+253 common fstatfs64 sys_fstatfs64 compat_sys_fstatfs64
+254 32 fadvise64_64 sys_ppc_fadvise64_64
+254 spu fadvise64_64 sys_ni_syscall
+255 common rtas sys_rtas
+256 32 sys_debug_setcontext sys_debug_setcontext sys_ni_syscall
+256 64 sys_debug_setcontext sys_ni_syscall
+256 spu sys_debug_setcontext sys_ni_syscall
+# 257 reserved for vserver
+258 nospu migrate_pages sys_migrate_pages
+259 nospu mbind sys_mbind
+260 nospu get_mempolicy sys_get_mempolicy
+261 nospu set_mempolicy sys_set_mempolicy
+262 nospu mq_open sys_mq_open compat_sys_mq_open
+263 nospu mq_unlink sys_mq_unlink
+264 32 mq_timedsend sys_mq_timedsend_time32
+264 64 mq_timedsend sys_mq_timedsend
+265 32 mq_timedreceive sys_mq_timedreceive_time32
+265 64 mq_timedreceive sys_mq_timedreceive
+266 nospu mq_notify sys_mq_notify compat_sys_mq_notify
+267 nospu mq_getsetattr sys_mq_getsetattr compat_sys_mq_getsetattr
+268 nospu kexec_load sys_kexec_load compat_sys_kexec_load
+269 nospu add_key sys_add_key
+270 nospu request_key sys_request_key
+271 nospu keyctl sys_keyctl compat_sys_keyctl
+272 nospu waitid sys_waitid compat_sys_waitid
+273 nospu ioprio_set sys_ioprio_set
+274 nospu ioprio_get sys_ioprio_get
+275 nospu inotify_init sys_inotify_init
+276 nospu inotify_add_watch sys_inotify_add_watch
+277 nospu inotify_rm_watch sys_inotify_rm_watch
+278 nospu spu_run sys_spu_run
+279 nospu spu_create sys_spu_create
+280 32 pselect6 sys_pselect6_time32 compat_sys_pselect6_time32
+280 64 pselect6 sys_pselect6
+281 32 ppoll sys_ppoll_time32 compat_sys_ppoll_time32
+281 64 ppoll sys_ppoll
+282 common unshare sys_unshare
+283 common splice sys_splice
+284 common tee sys_tee
+285 common vmsplice sys_vmsplice
+286 common openat sys_openat compat_sys_openat
+287 common mkdirat sys_mkdirat
+288 common mknodat sys_mknodat
+289 common fchownat sys_fchownat
+290 32 futimesat sys_futimesat_time32
+290 64 futimesat sys_futimesat
+290 spu utimesat sys_futimesat
+291 32 fstatat64 sys_fstatat64
+291 64 newfstatat sys_newfstatat
+291 spu newfstatat sys_newfstatat
+292 common unlinkat sys_unlinkat
+293 common renameat sys_renameat
+294 common linkat sys_linkat
+295 common symlinkat sys_symlinkat
+296 common readlinkat sys_readlinkat
+297 common fchmodat sys_fchmodat
+298 common faccessat sys_faccessat
+299 common get_robust_list sys_get_robust_list compat_sys_get_robust_list
+300 common set_robust_list sys_set_robust_list compat_sys_set_robust_list
+301 common move_pages sys_move_pages
+302 common getcpu sys_getcpu
+303 nospu epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait
+304 32 utimensat sys_utimensat_time32
+304 64 utimensat sys_utimensat
+304 spu utimensat sys_utimensat
+305 common signalfd sys_signalfd compat_sys_signalfd
+306 common timerfd_create sys_timerfd_create
+307 common eventfd sys_eventfd
+308 32 sync_file_range2 sys_ppc_sync_file_range2 compat_sys_ppc_sync_file_range2
+308 64 sync_file_range2 sys_sync_file_range2
+308 spu sync_file_range2 sys_sync_file_range2
+309 32 fallocate sys_ppc_fallocate compat_sys_fallocate
+309 64 fallocate sys_fallocate
+310 nospu subpage_prot sys_subpage_prot
+311 32 timerfd_settime sys_timerfd_settime32
+311 64 timerfd_settime sys_timerfd_settime
+311 spu timerfd_settime sys_timerfd_settime
+312 32 timerfd_gettime sys_timerfd_gettime32
+312 64 timerfd_gettime sys_timerfd_gettime
+312 spu timerfd_gettime sys_timerfd_gettime
+313 common signalfd4 sys_signalfd4 compat_sys_signalfd4
+314 common eventfd2 sys_eventfd2
+315 common epoll_create1 sys_epoll_create1
+316 common dup3 sys_dup3
+317 common pipe2 sys_pipe2
+318 nospu inotify_init1 sys_inotify_init1
+319 common perf_event_open sys_perf_event_open
+320 common preadv sys_preadv compat_sys_preadv
+321 common pwritev sys_pwritev compat_sys_pwritev
+322 nospu rt_tgsigqueueinfo sys_rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo
+323 nospu fanotify_init sys_fanotify_init
+324 nospu fanotify_mark sys_fanotify_mark compat_sys_fanotify_mark
+325 common prlimit64 sys_prlimit64
+326 common socket sys_socket
+327 common bind sys_bind
+328 common connect sys_connect
+329 common listen sys_listen
+330 common accept sys_accept
+331 common getsockname sys_getsockname
+332 common getpeername sys_getpeername
+333 common socketpair sys_socketpair
+334 common send sys_send
+335 common sendto sys_sendto
+336 common recv sys_recv compat_sys_recv
+337 common recvfrom sys_recvfrom compat_sys_recvfrom
+338 common shutdown sys_shutdown
+339 common setsockopt sys_setsockopt sys_setsockopt
+340 common getsockopt sys_getsockopt sys_getsockopt
+341 common sendmsg sys_sendmsg compat_sys_sendmsg
+342 common recvmsg sys_recvmsg compat_sys_recvmsg
+343 32 recvmmsg sys_recvmmsg_time32 compat_sys_recvmmsg_time32
+343 64 recvmmsg sys_recvmmsg
+343 spu recvmmsg sys_recvmmsg
+344 common accept4 sys_accept4
+345 common name_to_handle_at sys_name_to_handle_at
+346 common open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at
+347 32 clock_adjtime sys_clock_adjtime32
+347 64 clock_adjtime sys_clock_adjtime
+347 spu clock_adjtime sys_clock_adjtime
+348 common syncfs sys_syncfs
+349 common sendmmsg sys_sendmmsg compat_sys_sendmmsg
+350 common setns sys_setns
+351 nospu process_vm_readv sys_process_vm_readv
+352 nospu process_vm_writev sys_process_vm_writev
+353 nospu finit_module sys_finit_module
+354 nospu kcmp sys_kcmp
+355 common sched_setattr sys_sched_setattr
+356 common sched_getattr sys_sched_getattr
+357 common renameat2 sys_renameat2
+358 common seccomp sys_seccomp
+359 common getrandom sys_getrandom
+360 common memfd_create sys_memfd_create
+361 common bpf sys_bpf
+362 nospu execveat sys_execveat compat_sys_execveat
+363 32 switch_endian sys_ni_syscall
+363 64 switch_endian sys_switch_endian
+363 spu switch_endian sys_ni_syscall
+364 common userfaultfd sys_userfaultfd
+365 common membarrier sys_membarrier
+# 366-377 originally left for IPC, now unused
+378 nospu mlock2 sys_mlock2
+379 nospu copy_file_range sys_copy_file_range
+380 common preadv2 sys_preadv2 compat_sys_preadv2
+381 common pwritev2 sys_pwritev2 compat_sys_pwritev2
+382 nospu kexec_file_load sys_kexec_file_load
+383 nospu statx sys_statx
+384 nospu pkey_alloc sys_pkey_alloc
+385 nospu pkey_free sys_pkey_free
+386 nospu pkey_mprotect sys_pkey_mprotect
+387 nospu rseq sys_rseq
+388 32 io_pgetevents sys_io_pgetevents_time32 compat_sys_io_pgetevents
+388 64 io_pgetevents sys_io_pgetevents
+# room for arch specific syscalls
+392 64 semtimedop sys_semtimedop
+393 common semget sys_semget
+394 common semctl sys_semctl compat_sys_semctl
+395 common shmget sys_shmget
+396 common shmctl sys_shmctl compat_sys_shmctl
+397 common shmat sys_shmat compat_sys_shmat
+398 common shmdt sys_shmdt
+399 common msgget sys_msgget
+400 common msgsnd sys_msgsnd compat_sys_msgsnd
+401 common msgrcv sys_msgrcv compat_sys_msgrcv
+402 common msgctl sys_msgctl compat_sys_msgctl
+403 32 clock_gettime64 sys_clock_gettime sys_clock_gettime
+404 32 clock_settime64 sys_clock_settime sys_clock_settime
+405 32 clock_adjtime64 sys_clock_adjtime sys_clock_adjtime
+406 32 clock_getres_time64 sys_clock_getres sys_clock_getres
+407 32 clock_nanosleep_time64 sys_clock_nanosleep sys_clock_nanosleep
+408 32 timer_gettime64 sys_timer_gettime sys_timer_gettime
+409 32 timer_settime64 sys_timer_settime sys_timer_settime
+410 32 timerfd_gettime64 sys_timerfd_gettime sys_timerfd_gettime
+411 32 timerfd_settime64 sys_timerfd_settime sys_timerfd_settime
+412 32 utimensat_time64 sys_utimensat sys_utimensat
+413 32 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64
+414 32 ppoll_time64 sys_ppoll compat_sys_ppoll_time64
+416 32 io_pgetevents_time64 sys_io_pgetevents compat_sys_io_pgetevents_time64
+417 32 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64
+418 32 mq_timedsend_time64 sys_mq_timedsend sys_mq_timedsend
+419 32 mq_timedreceive_time64 sys_mq_timedreceive sys_mq_timedreceive
+420 32 semtimedop_time64 sys_semtimedop sys_semtimedop
+421 32 rt_sigtimedwait_time64 sys_rt_sigtimedwait compat_sys_rt_sigtimedwait_time64
+422 32 futex_time64 sys_futex sys_futex
+423 32 sched_rr_get_interval_time64 sys_sched_rr_get_interval sys_sched_rr_get_interval
+424 common pidfd_send_signal sys_pidfd_send_signal
+425 common io_uring_setup sys_io_uring_setup
+426 common io_uring_enter sys_io_uring_enter
+427 common io_uring_register sys_io_uring_register
+428 common open_tree sys_open_tree
+429 common move_mount sys_move_mount
+430 common fsopen sys_fsopen
+431 common fsconfig sys_fsconfig
+432 common fsmount sys_fsmount
+433 common fspick sys_fspick
+434 common pidfd_open sys_pidfd_open
+435 nospu clone3 sys_clone3
+436 common close_range sys_close_range
+437 common openat2 sys_openat2
+438 common pidfd_getfd sys_pidfd_getfd
+439 common faccessat2 sys_faccessat2
+440 common process_madvise sys_process_madvise
+441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2
+442 common mount_setattr sys_mount_setattr
+443 common quotactl_fd sys_quotactl_fd
+444 common landlock_create_ruleset sys_landlock_create_ruleset
+445 common landlock_add_rule sys_landlock_add_rule
+446 common landlock_restrict_self sys_landlock_restrict_self
+# 447 reserved for memfd_secret
+448 common process_mrelease sys_process_mrelease
+449 common futex_waitv sys_futex_waitv
+450 nospu set_mempolicy_home_node sys_set_mempolicy_home_node
+451 common cachestat sys_cachestat
+452 common fchmodat2 sys_fchmodat2
+453 common map_shadow_stack sys_ni_syscall
+454 common futex_wake sys_futex_wake
+455 common futex_wait sys_futex_wait
+456 common futex_requeue sys_futex_requeue
+457 common statmount sys_statmount
+458 common listmount sys_listmount
+459 common lsm_get_self_attr sys_lsm_get_self_attr
+460 common lsm_set_self_attr sys_lsm_set_self_attr
+461 common lsm_list_modules sys_lsm_list_modules
+462 common mseal sys_mseal
+463 common setxattrat sys_setxattrat
+464 common getxattrat sys_getxattrat
+465 common listxattrat sys_listxattrat
+466 common removexattrat sys_removexattrat
+467 common open_tree_attr sys_open_tree_attr
+468 common file_getattr sys_file_getattr
+469 common file_setattr sys_file_setattr
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 67fd2fd2620a..6b3dd6decdf9 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
#include <linux/device.h>
#include <linux/cpu.h>
#include <linux/smp.h>
@@ -8,18 +9,22 @@
#include <linux/nodemask.h>
#include <linux/cpumask.h>
#include <linux/notifier.h>
+#include <linux/of.h>
#include <asm/current.h>
#include <asm/processor.h>
#include <asm/cputable.h>
#include <asm/hvcall.h>
-#include <asm/prom.h>
#include <asm/machdep.h>
#include <asm/smp.h>
+#include <asm/time.h>
#include <asm/pmc.h>
#include <asm/firmware.h>
+#include <asm/idle.h>
+#include <asm/svm.h>
#include "cacheinfo.h"
+#include "setup.h"
#ifdef CONFIG_PPC64
#include <asm/paca.h>
@@ -28,29 +33,27 @@
static DEFINE_PER_CPU(struct cpu, cpu_devices);
-/*
- * SMT snooze delay stuff, 64-bit only for now
- */
-
#ifdef CONFIG_PPC64
-/* Time in microseconds we delay before sleeping in the idle loop */
-DEFINE_PER_CPU(long, smt_snooze_delay) = { 100 };
+/*
+ * Snooze delay has not been hooked up since 3fa8cad82b94 ("powerpc/pseries/cpuidle:
+ * smt-snooze-delay cleanup.") and has been broken even longer. As was foretold in
+ * 2014:
+ *
+ * "ppc64_util currently utilises it. Once we fix ppc64_util, propose to clean
+ * up the kernel code."
+ *
+ * powerpc-utils stopped using it as of 1.3.8. At some point in the future this
+ * code should be removed.
+ */
static ssize_t store_smt_snooze_delay(struct device *dev,
struct device_attribute *attr,
const char *buf,
size_t count)
{
- struct cpu *cpu = container_of(dev, struct cpu, dev);
- ssize_t ret;
- long snooze;
-
- ret = sscanf(buf, "%ld", &snooze);
- if (ret != 1)
- return -EINVAL;
-
- per_cpu(smt_snooze_delay, cpu->dev.id) = snooze;
+ pr_warn_once("%s (%d) stored to unsupported smt_snooze_delay, which has no effect.\n",
+ current->comm, current->pid);
return count;
}
@@ -58,9 +61,9 @@ static ssize_t show_smt_snooze_delay(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- struct cpu *cpu = container_of(dev, struct cpu, dev);
-
- return sprintf(buf, "%ld\n", per_cpu(smt_snooze_delay, cpu->dev.id));
+ pr_warn_once("%s (%d) read from unsupported smt_snooze_delay\n",
+ current->comm, current->pid);
+ return sprintf(buf, "100\n");
}
static DEVICE_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay,
@@ -68,23 +71,170 @@ static DEVICE_ATTR(smt_snooze_delay, 0644, show_smt_snooze_delay,
static int __init setup_smt_snooze_delay(char *str)
{
- unsigned int cpu;
- long snooze;
-
if (!cpu_has_feature(CPU_FTR_SMT))
return 1;
- snooze = simple_strtol(str, NULL, 10);
- for_each_possible_cpu(cpu)
- per_cpu(smt_snooze_delay, cpu) = snooze;
-
+ pr_warn("smt-snooze-delay command line option has no effect\n");
return 1;
}
__setup("smt-snooze-delay=", setup_smt_snooze_delay);
#endif /* CONFIG_PPC64 */
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#define __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, EXTRA) \
+static void read_##NAME(void *val) \
+{ \
+ *(unsigned long *)val = mfspr(ADDRESS); \
+} \
+static void write_##NAME(void *val) \
+{ \
+ EXTRA; \
+ mtspr(ADDRESS, *(unsigned long *)val); \
+}
+
+#define __SYSFS_SPRSETUP_SHOW_STORE(NAME) \
+static ssize_t show_##NAME(struct device *dev, \
+ struct device_attribute *attr, \
+ char *buf) \
+{ \
+ struct cpu *cpu = container_of(dev, struct cpu, dev); \
+ unsigned long val; \
+ smp_call_function_single(cpu->dev.id, read_##NAME, &val, 1); \
+ return sprintf(buf, "%lx\n", val); \
+} \
+static ssize_t __used \
+ store_##NAME(struct device *dev, struct device_attribute *attr, \
+ const char *buf, size_t count) \
+{ \
+ struct cpu *cpu = container_of(dev, struct cpu, dev); \
+ unsigned long val; \
+ int ret = sscanf(buf, "%lx", &val); \
+ if (ret != 1) \
+ return -EINVAL; \
+ smp_call_function_single(cpu->dev.id, write_##NAME, &val, 1); \
+ return count; \
+}
+
+#define SYSFS_PMCSETUP(NAME, ADDRESS) \
+ __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, ppc_enable_pmcs()) \
+ __SYSFS_SPRSETUP_SHOW_STORE(NAME)
+#define SYSFS_SPRSETUP(NAME, ADDRESS) \
+ __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, ) \
+ __SYSFS_SPRSETUP_SHOW_STORE(NAME)
+
+#define SYSFS_SPRSETUP_SHOW_STORE(NAME) \
+ __SYSFS_SPRSETUP_SHOW_STORE(NAME)
+
+#ifdef CONFIG_PPC64
+
+/*
+ * This is the system wide DSCR register default value. Any
+ * change to this default value through the sysfs interface
+ * will update all per cpu DSCR default values across the
+ * system stored in their respective PACA structures.
+ */
+static unsigned long dscr_default;
+
+/**
+ * read_dscr() - Fetch the cpu specific DSCR default
+ * @val: Returned cpu specific DSCR default value
+ *
+ * This function returns the per cpu DSCR default value
+ * for any cpu which is contained in its PACA structure.
+ */
+static void read_dscr(void *val)
+{
+ *(unsigned long *)val = get_paca()->dscr_default;
+}
+
+
+/**
+ * write_dscr() - Update the cpu specific DSCR default
+ * @val: New cpu specific DSCR default value to update
+ *
+ * This function updates the per cpu DSCR default value
+ * for any cpu which is contained in its PACA structure.
+ */
+static void write_dscr(void *val)
+{
+ get_paca()->dscr_default = *(unsigned long *)val;
+ if (!current->thread.dscr_inherit) {
+ current->thread.dscr = *(unsigned long *)val;
+ mtspr(SPRN_DSCR, *(unsigned long *)val);
+ }
+}
+
+SYSFS_SPRSETUP_SHOW_STORE(dscr);
+static DEVICE_ATTR(dscr, 0600, show_dscr, store_dscr);
+
+static void add_write_permission_dev_attr(struct device_attribute *attr)
+{
+ attr->attr.mode |= 0200;
+}
+
+/**
+ * show_dscr_default() - Fetch the system wide DSCR default
+ * @dev: Device structure
+ * @attr: Device attribute structure
+ * @buf: Interface buffer
+ *
+ * This function returns the system wide DSCR default value.
+ */
+static ssize_t show_dscr_default(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%lx\n", dscr_default);
+}
+
+/**
+ * store_dscr_default() - Update the system wide DSCR default
+ * @dev: Device structure
+ * @attr: Device attribute structure
+ * @buf: Interface buffer
+ * @count: Size of the update
+ *
+ * This function updates the system wide DSCR default value.
+ */
+static ssize_t __used store_dscr_default(struct device *dev,
+ struct device_attribute *attr, const char *buf,
+ size_t count)
+{
+ unsigned long val;
+ int ret = 0;
+
+ ret = sscanf(buf, "%lx", &val);
+ if (ret != 1)
+ return -EINVAL;
+ dscr_default = val;
+
+ on_each_cpu(write_dscr, &val, 1);
+
+ return count;
+}
+
+static DEVICE_ATTR(dscr_default, 0600,
+ show_dscr_default, store_dscr_default);
+
+static void __init sysfs_create_dscr_default(void)
+{
+ if (cpu_has_feature(CPU_FTR_DSCR)) {
+ struct device *dev_root;
+ int cpu;
+
+ dscr_default = spr_default_dscr;
+ for_each_possible_cpu(cpu)
+ paca_ptrs[cpu]->dscr_default = dscr_default;
+
+ dev_root = bus_get_dev_root(&cpu_subsys);
+ if (dev_root) {
+ device_create_file(dev_root, &dev_attr_dscr_default);
+ put_device(dev_root);
+ }
+ }
+}
+#endif /* CONFIG_PPC64 */
+
+#ifdef CONFIG_PPC_E500
#define MAX_BIT 63
static u64 pw20_wt;
@@ -394,74 +544,60 @@ void ppc_enable_pmcs(void)
ppc_set_pmu_inuse(1);
/* Only need to enable them once */
- if (__get_cpu_var(pmcs_enabled))
+ if (__this_cpu_read(pmcs_enabled))
return;
- __get_cpu_var(pmcs_enabled) = 1;
+ __this_cpu_write(pmcs_enabled, 1);
if (ppc_md.enable_pmcs)
ppc_md.enable_pmcs();
}
EXPORT_SYMBOL(ppc_enable_pmcs);
-#define __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, EXTRA) \
-static void read_##NAME(void *val) \
-{ \
- *(unsigned long *)val = mfspr(ADDRESS); \
-} \
-static void write_##NAME(void *val) \
-{ \
- EXTRA; \
- mtspr(ADDRESS, *(unsigned long *)val); \
-}
-
-#define __SYSFS_SPRSETUP_SHOW_STORE(NAME) \
-static ssize_t show_##NAME(struct device *dev, \
- struct device_attribute *attr, \
- char *buf) \
-{ \
- struct cpu *cpu = container_of(dev, struct cpu, dev); \
- unsigned long val; \
- smp_call_function_single(cpu->dev.id, read_##NAME, &val, 1); \
- return sprintf(buf, "%lx\n", val); \
-} \
-static ssize_t __used \
- store_##NAME(struct device *dev, struct device_attribute *attr, \
- const char *buf, size_t count) \
-{ \
- struct cpu *cpu = container_of(dev, struct cpu, dev); \
- unsigned long val; \
- int ret = sscanf(buf, "%lx", &val); \
- if (ret != 1) \
- return -EINVAL; \
- smp_call_function_single(cpu->dev.id, write_##NAME, &val, 1); \
- return count; \
-}
-
-#define SYSFS_PMCSETUP(NAME, ADDRESS) \
- __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, ppc_enable_pmcs()) \
- __SYSFS_SPRSETUP_SHOW_STORE(NAME)
-#define SYSFS_SPRSETUP(NAME, ADDRESS) \
- __SYSFS_SPRSETUP_READ_WRITE(NAME, ADDRESS, ) \
- __SYSFS_SPRSETUP_SHOW_STORE(NAME)
-#define SYSFS_SPRSETUP_SHOW_STORE(NAME) \
- __SYSFS_SPRSETUP_SHOW_STORE(NAME)
/* Let's define all possible registers, we'll only hook up the ones
* that are implemented on the current processor
*/
-#if defined(CONFIG_PPC64)
+#ifdef CONFIG_PMU_SYSFS
+#if defined(CONFIG_PPC64) || defined(CONFIG_PPC_BOOK3S_32)
#define HAS_PPC_PMC_CLASSIC 1
#define HAS_PPC_PMC_IBM 1
+#endif
+
+#ifdef CONFIG_PPC64
#define HAS_PPC_PMC_PA6T 1
-#elif defined(CONFIG_6xx)
-#define HAS_PPC_PMC_CLASSIC 1
-#define HAS_PPC_PMC_IBM 1
+#define HAS_PPC_PMC56 1
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_32
#define HAS_PPC_PMC_G4 1
#endif
+#endif /* CONFIG_PMU_SYSFS */
+#if defined(CONFIG_PPC64) && defined(CONFIG_DEBUG_MISC)
+#define HAS_PPC_PA6T
+#endif
+/*
+ * SPRs which are not related to PMU.
+ */
+#ifdef CONFIG_PPC64
+SYSFS_SPRSETUP(purr, SPRN_PURR);
+SYSFS_SPRSETUP(spurr, SPRN_SPURR);
+SYSFS_SPRSETUP(pir, SPRN_PIR);
+SYSFS_SPRSETUP(tscr, SPRN_TSCR);
+
+/*
+ Lets only enable read for phyp resources and
+ enable write when needed with a separate function.
+ Lets be conservative and default to pseries.
+*/
+static DEVICE_ATTR(spurr, 0400, show_spurr, NULL);
+static DEVICE_ATTR(purr, 0400, show_purr, store_purr);
+static DEVICE_ATTR(pir, 0400, show_pir, NULL);
+static DEVICE_ATTR(tscr, 0600, show_tscr, store_tscr);
+#endif /* CONFIG_PPC64 */
#ifdef HAS_PPC_PMC_CLASSIC
SYSFS_PMCSETUP(mmcr0, SPRN_MMCR0);
@@ -472,87 +608,25 @@ SYSFS_PMCSETUP(pmc3, SPRN_PMC3);
SYSFS_PMCSETUP(pmc4, SPRN_PMC4);
SYSFS_PMCSETUP(pmc5, SPRN_PMC5);
SYSFS_PMCSETUP(pmc6, SPRN_PMC6);
+#endif
#ifdef HAS_PPC_PMC_G4
SYSFS_PMCSETUP(mmcr2, SPRN_MMCR2);
#endif
-#ifdef CONFIG_PPC64
+#ifdef HAS_PPC_PMC56
SYSFS_PMCSETUP(pmc7, SPRN_PMC7);
SYSFS_PMCSETUP(pmc8, SPRN_PMC8);
SYSFS_PMCSETUP(mmcra, SPRN_MMCRA);
-SYSFS_SPRSETUP(purr, SPRN_PURR);
-SYSFS_SPRSETUP(spurr, SPRN_SPURR);
-SYSFS_SPRSETUP(pir, SPRN_PIR);
+SYSFS_PMCSETUP(mmcr3, SPRN_MMCR3);
-/*
- Lets only enable read for phyp resources and
- enable write when needed with a separate function.
- Lets be conservative and default to pseries.
-*/
static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
-static DEVICE_ATTR(spurr, 0400, show_spurr, NULL);
-static DEVICE_ATTR(purr, 0400, show_purr, store_purr);
-static DEVICE_ATTR(pir, 0400, show_pir, NULL);
-
-static unsigned long dscr_default;
-
-static void read_dscr(void *val)
-{
- *(unsigned long *)val = get_paca()->dscr_default;
-}
-
-static void write_dscr(void *val)
-{
- get_paca()->dscr_default = *(unsigned long *)val;
- if (!current->thread.dscr_inherit) {
- current->thread.dscr = *(unsigned long *)val;
- mtspr(SPRN_DSCR, *(unsigned long *)val);
- }
-}
-
-SYSFS_SPRSETUP_SHOW_STORE(dscr);
-static DEVICE_ATTR(dscr, 0600, show_dscr, store_dscr);
+static DEVICE_ATTR(mmcr3, 0600, show_mmcr3, store_mmcr3);
+#endif /* HAS_PPC_PMC56 */
-static void add_write_permission_dev_attr(struct device_attribute *attr)
-{
- attr->attr.mode |= 0200;
-}
-
-static ssize_t show_dscr_default(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- return sprintf(buf, "%lx\n", dscr_default);
-}
-
-static ssize_t __used store_dscr_default(struct device *dev,
- struct device_attribute *attr, const char *buf,
- size_t count)
-{
- unsigned long val;
- int ret = 0;
-
- ret = sscanf(buf, "%lx", &val);
- if (ret != 1)
- return -EINVAL;
- dscr_default = val;
-
- on_each_cpu(write_dscr, &val, 1);
- return count;
-}
-static DEVICE_ATTR(dscr_default, 0600,
- show_dscr_default, store_dscr_default);
-
-static void sysfs_create_dscr_default(void)
-{
- int err = 0;
- if (cpu_has_feature(CPU_FTR_DSCR))
- err = device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default);
-}
-#endif /* CONFIG_PPC64 */
#ifdef HAS_PPC_PMC_PA6T
SYSFS_PMCSETUP(pa6t_pmc0, SPRN_PA6T_PMC0);
@@ -561,7 +635,9 @@ SYSFS_PMCSETUP(pa6t_pmc2, SPRN_PA6T_PMC2);
SYSFS_PMCSETUP(pa6t_pmc3, SPRN_PA6T_PMC3);
SYSFS_PMCSETUP(pa6t_pmc4, SPRN_PA6T_PMC4);
SYSFS_PMCSETUP(pa6t_pmc5, SPRN_PA6T_PMC5);
-#ifdef CONFIG_DEBUG_KERNEL
+#endif
+
+#ifdef HAS_PPC_PA6T
SYSFS_SPRSETUP(hid0, SPRN_HID0);
SYSFS_SPRSETUP(hid1, SPRN_HID1);
SYSFS_SPRSETUP(hid4, SPRN_HID4);
@@ -590,15 +666,14 @@ SYSFS_SPRSETUP(tsr0, SPRN_PA6T_TSR0);
SYSFS_SPRSETUP(tsr1, SPRN_PA6T_TSR1);
SYSFS_SPRSETUP(tsr2, SPRN_PA6T_TSR2);
SYSFS_SPRSETUP(tsr3, SPRN_PA6T_TSR3);
-#endif /* CONFIG_DEBUG_KERNEL */
-#endif /* HAS_PPC_PMC_PA6T */
+#endif /* HAS_PPC_PA6T */
#ifdef HAS_PPC_PMC_IBM
static struct device_attribute ibm_common_attrs[] = {
__ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0),
__ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1),
};
-#endif /* HAS_PPC_PMC_G4 */
+#endif /* HAS_PPC_PMC_IBM */
#ifdef HAS_PPC_PMC_G4
static struct device_attribute g4_common_attrs[] = {
@@ -608,6 +683,7 @@ static struct device_attribute g4_common_attrs[] = {
};
#endif /* HAS_PPC_PMC_G4 */
+#ifdef HAS_PPC_PMC_CLASSIC
static struct device_attribute classic_pmc_attrs[] = {
__ATTR(pmc1, 0600, show_pmc1, store_pmc1),
__ATTR(pmc2, 0600, show_pmc2, store_pmc2),
@@ -615,14 +691,16 @@ static struct device_attribute classic_pmc_attrs[] = {
__ATTR(pmc4, 0600, show_pmc4, store_pmc4),
__ATTR(pmc5, 0600, show_pmc5, store_pmc5),
__ATTR(pmc6, 0600, show_pmc6, store_pmc6),
-#ifdef CONFIG_PPC64
+#ifdef HAS_PPC_PMC56
__ATTR(pmc7, 0600, show_pmc7, store_pmc7),
__ATTR(pmc8, 0600, show_pmc8, store_pmc8),
#endif
};
+#endif
-#ifdef HAS_PPC_PMC_PA6T
+#if defined(HAS_PPC_PMC_PA6T) || defined(HAS_PPC_PA6T)
static struct device_attribute pa6t_attrs[] = {
+#ifdef HAS_PPC_PMC_PA6T
__ATTR(mmcr0, 0600, show_mmcr0, store_mmcr0),
__ATTR(mmcr1, 0600, show_mmcr1, store_mmcr1),
__ATTR(pmc0, 0600, show_pa6t_pmc0, store_pa6t_pmc0),
@@ -631,7 +709,8 @@ static struct device_attribute pa6t_attrs[] = {
__ATTR(pmc3, 0600, show_pa6t_pmc3, store_pa6t_pmc3),
__ATTR(pmc4, 0600, show_pa6t_pmc4, store_pa6t_pmc4),
__ATTR(pmc5, 0600, show_pa6t_pmc5, store_pa6t_pmc5),
-#ifdef CONFIG_DEBUG_KERNEL
+#endif
+#ifdef HAS_PPC_PA6T
__ATTR(hid0, 0600, show_hid0, store_hid0),
__ATTR(hid1, 0600, show_hid1, store_hid1),
__ATTR(hid4, 0600, show_hid4, store_hid4),
@@ -660,18 +739,111 @@ static struct device_attribute pa6t_attrs[] = {
__ATTR(tsr1, 0600, show_tsr1, store_tsr1),
__ATTR(tsr2, 0600, show_tsr2, store_tsr2),
__ATTR(tsr3, 0600, show_tsr3, store_tsr3),
-#endif /* CONFIG_DEBUG_KERNEL */
+#endif /* HAS_PPC_PA6T */
};
-#endif /* HAS_PPC_PMC_PA6T */
-#endif /* HAS_PPC_PMC_CLASSIC */
+#endif
+
+#ifdef CONFIG_PPC_SVM
+static ssize_t show_svm(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%u\n", is_secure_guest());
+}
+static DEVICE_ATTR(svm, 0444, show_svm, NULL);
+
+static void __init create_svm_file(void)
+{
+ struct device *dev_root = bus_get_dev_root(&cpu_subsys);
+
+ if (dev_root) {
+ device_create_file(dev_root, &dev_attr_svm);
+ put_device(dev_root);
+ }
+}
+#else
+static void __init create_svm_file(void)
+{
+}
+#endif /* CONFIG_PPC_SVM */
+
+#ifdef CONFIG_PPC_PSERIES
+static void read_idle_purr(void *val)
+{
+ u64 *ret = val;
+
+ *ret = read_this_idle_purr();
+}
+
+static ssize_t idle_purr_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct cpu *cpu = container_of(dev, struct cpu, dev);
+ u64 val;
+
+ smp_call_function_single(cpu->dev.id, read_idle_purr, &val, 1);
+ return sprintf(buf, "%llx\n", val);
+}
+static DEVICE_ATTR(idle_purr, 0400, idle_purr_show, NULL);
+
+static void create_idle_purr_file(struct device *s)
+{
+ if (firmware_has_feature(FW_FEATURE_LPAR))
+ device_create_file(s, &dev_attr_idle_purr);
+}
-static void register_cpu_online(unsigned int cpu)
+static void remove_idle_purr_file(struct device *s)
+{
+ if (firmware_has_feature(FW_FEATURE_LPAR))
+ device_remove_file(s, &dev_attr_idle_purr);
+}
+
+static void read_idle_spurr(void *val)
+{
+ u64 *ret = val;
+
+ *ret = read_this_idle_spurr();
+}
+
+static ssize_t idle_spurr_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct cpu *cpu = container_of(dev, struct cpu, dev);
+ u64 val;
+
+ smp_call_function_single(cpu->dev.id, read_idle_spurr, &val, 1);
+ return sprintf(buf, "%llx\n", val);
+}
+static DEVICE_ATTR(idle_spurr, 0400, idle_spurr_show, NULL);
+
+static void create_idle_spurr_file(struct device *s)
+{
+ if (firmware_has_feature(FW_FEATURE_LPAR))
+ device_create_file(s, &dev_attr_idle_spurr);
+}
+
+static void remove_idle_spurr_file(struct device *s)
+{
+ if (firmware_has_feature(FW_FEATURE_LPAR))
+ device_remove_file(s, &dev_attr_idle_spurr);
+}
+
+#else /* CONFIG_PPC_PSERIES */
+#define create_idle_purr_file(s)
+#define remove_idle_purr_file(s)
+#define create_idle_spurr_file(s)
+#define remove_idle_spurr_file(s)
+#endif /* CONFIG_PPC_PSERIES */
+
+static int register_cpu_online(unsigned int cpu)
{
struct cpu *c = &per_cpu(cpu_devices, cpu);
struct device *s = &c->dev;
struct device_attribute *attrs, *pmc_attrs;
int i, nattrs;
+ /* For cpus present at boot a reference was already grabbed in register_cpu() */
+ if (!s->of_node)
+ s->of_node = of_get_cpu_node(cpu, NULL);
+
#ifdef CONFIG_PPC64
if (cpu_has_feature(CPU_FTR_SMT))
device_create_file(s, &dev_attr_smt_snooze_delay);
@@ -682,25 +854,25 @@ static void register_cpu_online(unsigned int cpu)
#ifdef HAS_PPC_PMC_IBM
case PPC_PMC_IBM:
attrs = ibm_common_attrs;
- nattrs = sizeof(ibm_common_attrs) / sizeof(struct device_attribute);
+ nattrs = ARRAY_SIZE(ibm_common_attrs);
pmc_attrs = classic_pmc_attrs;
break;
#endif /* HAS_PPC_PMC_IBM */
#ifdef HAS_PPC_PMC_G4
case PPC_PMC_G4:
attrs = g4_common_attrs;
- nattrs = sizeof(g4_common_attrs) / sizeof(struct device_attribute);
+ nattrs = ARRAY_SIZE(g4_common_attrs);
pmc_attrs = classic_pmc_attrs;
break;
#endif /* HAS_PPC_PMC_G4 */
-#ifdef HAS_PPC_PMC_PA6T
+#if defined(HAS_PPC_PMC_PA6T) || defined(HAS_PPC_PA6T)
case PPC_PMC_PA6T:
/* PA Semi starts counting at PMC0 */
attrs = pa6t_attrs;
- nattrs = sizeof(pa6t_attrs) / sizeof(struct device_attribute);
+ nattrs = ARRAY_SIZE(pa6t_attrs);
pmc_attrs = NULL;
break;
-#endif /* HAS_PPC_PMC_PA6T */
+#endif
default:
attrs = NULL;
nattrs = 0;
@@ -715,26 +887,38 @@ static void register_cpu_online(unsigned int cpu)
device_create_file(s, &pmc_attrs[i]);
#ifdef CONFIG_PPC64
+#ifdef CONFIG_PMU_SYSFS
if (cpu_has_feature(CPU_FTR_MMCRA))
device_create_file(s, &dev_attr_mmcra);
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ device_create_file(s, &dev_attr_mmcr3);
+#endif /* CONFIG_PMU_SYSFS */
+
if (cpu_has_feature(CPU_FTR_PURR)) {
if (!firmware_has_feature(FW_FEATURE_LPAR))
add_write_permission_dev_attr(&dev_attr_purr);
device_create_file(s, &dev_attr_purr);
+ create_idle_purr_file(s);
}
- if (cpu_has_feature(CPU_FTR_SPURR))
+ if (cpu_has_feature(CPU_FTR_SPURR)) {
device_create_file(s, &dev_attr_spurr);
+ create_idle_spurr_file(s);
+ }
if (cpu_has_feature(CPU_FTR_DSCR))
device_create_file(s, &dev_attr_dscr);
if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2))
device_create_file(s, &dev_attr_pir);
+
+ if (cpu_has_feature(CPU_FTR_ARCH_206) &&
+ !firmware_has_feature(FW_FEATURE_LPAR))
+ device_create_file(s, &dev_attr_tscr);
#endif /* CONFIG_PPC64 */
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#ifdef CONFIG_PPC_E500
if (PVR_VER(cur_cpu_spec->pvr_value) == PVR_VER_E6500) {
device_create_file(s, &dev_attr_pw20_state);
device_create_file(s, &dev_attr_pw20_wait_time);
@@ -744,17 +928,19 @@ static void register_cpu_online(unsigned int cpu)
}
#endif
cacheinfo_cpu_online(cpu);
+ return 0;
}
#ifdef CONFIG_HOTPLUG_CPU
-static void unregister_cpu_online(unsigned int cpu)
+static int unregister_cpu_online(unsigned int cpu)
{
struct cpu *c = &per_cpu(cpu_devices, cpu);
struct device *s = &c->dev;
struct device_attribute *attrs, *pmc_attrs;
int i, nattrs;
- BUG_ON(!c->hotpluggable);
+ if (WARN_RATELIMIT(!c->hotpluggable, "cpu %d can't be offlined\n", cpu))
+ return -EBUSY;
#ifdef CONFIG_PPC64
if (cpu_has_feature(CPU_FTR_SMT))
@@ -766,25 +952,25 @@ static void unregister_cpu_online(unsigned int cpu)
#ifdef HAS_PPC_PMC_IBM
case PPC_PMC_IBM:
attrs = ibm_common_attrs;
- nattrs = sizeof(ibm_common_attrs) / sizeof(struct device_attribute);
+ nattrs = ARRAY_SIZE(ibm_common_attrs);
pmc_attrs = classic_pmc_attrs;
break;
#endif /* HAS_PPC_PMC_IBM */
#ifdef HAS_PPC_PMC_G4
case PPC_PMC_G4:
attrs = g4_common_attrs;
- nattrs = sizeof(g4_common_attrs) / sizeof(struct device_attribute);
+ nattrs = ARRAY_SIZE(g4_common_attrs);
pmc_attrs = classic_pmc_attrs;
break;
#endif /* HAS_PPC_PMC_G4 */
-#ifdef HAS_PPC_PMC_PA6T
+#if defined(HAS_PPC_PMC_PA6T) || defined(HAS_PPC_PA6T)
case PPC_PMC_PA6T:
/* PA Semi starts counting at PMC0 */
attrs = pa6t_attrs;
- nattrs = sizeof(pa6t_attrs) / sizeof(struct device_attribute);
+ nattrs = ARRAY_SIZE(pa6t_attrs);
pmc_attrs = NULL;
break;
-#endif /* HAS_PPC_PMC_PA6T */
+#endif
default:
attrs = NULL;
nattrs = 0;
@@ -799,23 +985,36 @@ static void unregister_cpu_online(unsigned int cpu)
device_remove_file(s, &pmc_attrs[i]);
#ifdef CONFIG_PPC64
+#ifdef CONFIG_PMU_SYSFS
if (cpu_has_feature(CPU_FTR_MMCRA))
device_remove_file(s, &dev_attr_mmcra);
- if (cpu_has_feature(CPU_FTR_PURR))
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ device_remove_file(s, &dev_attr_mmcr3);
+#endif /* CONFIG_PMU_SYSFS */
+
+ if (cpu_has_feature(CPU_FTR_PURR)) {
device_remove_file(s, &dev_attr_purr);
+ remove_idle_purr_file(s);
+ }
- if (cpu_has_feature(CPU_FTR_SPURR))
+ if (cpu_has_feature(CPU_FTR_SPURR)) {
device_remove_file(s, &dev_attr_spurr);
+ remove_idle_spurr_file(s);
+ }
if (cpu_has_feature(CPU_FTR_DSCR))
device_remove_file(s, &dev_attr_dscr);
if (cpu_has_feature(CPU_FTR_PPCAS_ARCH_V2))
device_remove_file(s, &dev_attr_pir);
+
+ if (cpu_has_feature(CPU_FTR_ARCH_206) &&
+ !firmware_has_feature(FW_FEATURE_LPAR))
+ device_remove_file(s, &dev_attr_tscr);
#endif /* CONFIG_PPC64 */
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#ifdef CONFIG_PPC_E500
if (PVR_VER(cur_cpu_spec->pvr_value) == PVR_VER_E6500) {
device_remove_file(s, &dev_attr_pw20_state);
device_remove_file(s, &dev_attr_pw20_wait_time);
@@ -825,7 +1024,13 @@ static void unregister_cpu_online(unsigned int cpu)
}
#endif
cacheinfo_cpu_offline(cpu);
+ of_node_put(s->of_node);
+ s->of_node = NULL;
+ return 0;
}
+#else /* !CONFIG_HOTPLUG_CPU */
+#define unregister_cpu_online NULL
+#endif
#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
ssize_t arch_cpu_probe(const char *buf, size_t count)
@@ -845,32 +1050,6 @@ ssize_t arch_cpu_release(const char *buf, size_t count)
}
#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
-#endif /* CONFIG_HOTPLUG_CPU */
-
-static int sysfs_cpu_notify(struct notifier_block *self,
- unsigned long action, void *hcpu)
-{
- unsigned int cpu = (unsigned int)(long)hcpu;
-
- switch (action) {
- case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
- register_cpu_online(cpu);
- break;
-#ifdef CONFIG_HOTPLUG_CPU
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
- unregister_cpu_online(cpu);
- break;
-#endif
- }
- return NOTIFY_OK;
-}
-
-static struct notifier_block sysfs_cpu_nb = {
- .notifier_call = sysfs_cpu_notify,
-};
-
static DEFINE_MUTEX(cpu_mutex);
int cpu_add_dev_attr(struct device_attribute *attr)
@@ -942,14 +1121,6 @@ EXPORT_SYMBOL_GPL(cpu_remove_dev_attr_group);
/* NUMA stuff */
#ifdef CONFIG_NUMA
-static void register_nodes(void)
-{
- int i;
-
- for (i = 0; i < MAX_NUMNODES; i++)
- register_one_node(i);
-}
-
int sysfs_add_device_to_node(struct device *dev, int nid)
{
struct node *node = node_devices[nid];
@@ -964,13 +1135,6 @@ void sysfs_remove_device_from_node(struct device *dev, int nid)
sysfs_remove_link(&node->dev.kobj, kobject_name(&dev->kobj));
}
EXPORT_SYMBOL_GPL(sysfs_remove_device_from_node);
-
-#else
-static void register_nodes(void)
-{
- return;
-}
-
#endif
/* Only valid if CPU is present. */
@@ -985,15 +1149,12 @@ static DEVICE_ATTR(physical_id, 0444, show_physical_id, NULL);
static int __init topology_init(void)
{
- int cpu;
-
- register_nodes();
-
- cpu_notifier_register_begin();
+ int cpu, r;
for_each_possible_cpu(cpu) {
struct cpu *c = &per_cpu(cpu_devices, cpu);
+#ifdef CONFIG_HOTPLUG_CPU
/*
* For now, we just see if the system supports making
* the RTAS calls for CPU hotplug. But, there may be a
@@ -1001,27 +1162,25 @@ static int __init topology_init(void)
* CPU. For instance, the boot cpu might never be valid
* for hotplugging.
*/
- if (ppc_md.cpu_die)
+ if (smp_ops && smp_ops->cpu_offline_self)
c->hotpluggable = 1;
+#endif
if (cpu_online(cpu) || c->hotpluggable) {
register_cpu(c, cpu);
device_create_file(&c->dev, &dev_attr_physical_id);
}
-
- if (cpu_online(cpu))
- register_cpu_online(cpu);
}
-
- __register_cpu_notifier(&sysfs_cpu_nb);
-
- cpu_notifier_register_done();
-
+ r = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powerpc/topology:online",
+ register_cpu_online, unregister_cpu_online);
+ WARN_ON(r < 0);
#ifdef CONFIG_PPC64
sysfs_create_dscr_default();
#endif /* CONFIG_PPC64 */
+ create_svm_file();
+
return 0;
}
subsys_initcall(topology_init);
diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S
deleted file mode 100644
index 7ab5d434e2ee..000000000000
--- a/arch/powerpc/kernel/systbl.S
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * This file contains the table of syscall-handling functions.
- * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- * Largely rewritten by Cort Dougan (cort@cs.nmt.edu)
- * and Paul Mackerras.
- *
- * Adapted for iSeries by Mike Corrigan (mikejc@us.ibm.com)
- * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <asm/ppc_asm.h>
-
-#ifdef CONFIG_PPC64
-#define SYSCALL(func) .llong DOTSYM(sys_##func),DOTSYM(sys_##func)
-#define COMPAT_SYS(func) .llong DOTSYM(sys_##func),DOTSYM(compat_sys_##func)
-#define PPC_SYS(func) .llong DOTSYM(ppc_##func),DOTSYM(ppc_##func)
-#define OLDSYS(func) .llong DOTSYM(sys_ni_syscall),DOTSYM(sys_ni_syscall)
-#define SYS32ONLY(func) .llong DOTSYM(sys_ni_syscall),DOTSYM(compat_sys_##func)
-#define SYSX(f, f3264, f32) .llong DOTSYM(f),DOTSYM(f3264)
-#else
-#define SYSCALL(func) .long sys_##func
-#define COMPAT_SYS(func) .long sys_##func
-#define PPC_SYS(func) .long ppc_##func
-#define OLDSYS(func) .long sys_##func
-#define SYS32ONLY(func) .long sys_##func
-#define SYSX(f, f3264, f32) .long f32
-#endif
-#define SYSCALL_SPU(func) SYSCALL(func)
-#define COMPAT_SYS_SPU(func) COMPAT_SYS(func)
-#define PPC_SYS_SPU(func) PPC_SYS(func)
-#define SYSX_SPU(f, f3264, f32) SYSX(f, f3264, f32)
-
-.section .rodata,"a"
-
-#ifdef CONFIG_PPC64
- .p2align 3
-#endif
-
-.globl sys_call_table
-sys_call_table:
-
-#include <asm/systbl.h>
diff --git a/arch/powerpc/kernel/systbl.c b/arch/powerpc/kernel/systbl.c
new file mode 100644
index 000000000000..4305f2a2162f
--- /dev/null
+++ b/arch/powerpc/kernel/systbl.c
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * This file contains the table of syscall-handling functions.
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ * Largely rewritten by Cort Dougan (cort@cs.nmt.edu)
+ * and Paul Mackerras.
+ *
+ * Adapted for iSeries by Mike Corrigan (mikejc@us.ibm.com)
+ * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com)
+ */
+
+#include <linux/syscalls.h>
+#include <linux/compat.h>
+#include <asm/unistd.h>
+#include <asm/syscalls.h>
+
+#undef __SYSCALL_WITH_COMPAT
+#define __SYSCALL_WITH_COMPAT(nr, entry, compat) __SYSCALL(nr, entry)
+
+#undef __SYSCALL
+#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
+#define __SYSCALL(nr, entry) [nr] = entry,
+#else
+/*
+ * Coerce syscall handlers with arbitrary parameters to common type
+ * requires cast to void* to avoid -Wcast-function-type.
+ */
+#define __SYSCALL(nr, entry) [nr] = (void *) entry,
+#endif
+
+const syscall_fn sys_call_table[] = {
+#ifdef CONFIG_PPC64
+#include <asm/syscall_table_64.h>
+#else
+#include <asm/syscall_table_32.h>
+#endif
+};
+
+#ifdef CONFIG_COMPAT
+#undef __SYSCALL_WITH_COMPAT
+#define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, compat)
+const syscall_fn compat_sys_call_table[] = {
+#include <asm/syscall_table_32.h>
+};
+#endif /* CONFIG_COMPAT */
diff --git a/arch/powerpc/kernel/systbl_chk.c b/arch/powerpc/kernel/systbl_chk.c
deleted file mode 100644
index 238aa63ced8f..000000000000
--- a/arch/powerpc/kernel/systbl_chk.c
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * This file, when run through CPP produces a list of syscall numbers
- * in the order of systbl.h. That way we can check for gaps and syscalls
- * that are out of order.
- *
- * Unfortunately, we cannot check for the correct ordering of entries
- * using SYSX().
- *
- * Copyright © IBM Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <asm/unistd.h>
-
-#define SYSCALL(func) __NR_##func
-#define COMPAT_SYS(func) __NR_##func
-#define PPC_SYS(func) __NR_##func
-#ifdef CONFIG_PPC64
-#define OLDSYS(func) -1
-#define SYS32ONLY(func) -1
-#else
-#define OLDSYS(func) __NR_old##func
-#define SYS32ONLY(func) __NR_##func
-#endif
-#define SYSX(f, f3264, f32) -1
-
-#define SYSCALL_SPU(func) SYSCALL(func)
-#define COMPAT_SYS_SPU(func) COMPAT_SYS(func)
-#define PPC_SYS_SPU(func) PPC_SYS(func)
-#define SYSX_SPU(f, f3264, f32) SYSX(f, f3264, f32)
-
-/* Just insert a marker for ni_syscalls */
-#define __NR_ni_syscall -1
-
-/*
- * These are the known exceptions.
- * Hopefully, there will be no more.
- */
-#define __NR_llseek __NR__llseek
-#undef __NR_umount
-#define __NR_umount __NR_umount2
-#define __NR_old_getrlimit __NR_getrlimit
-#define __NR_newstat __NR_stat
-#define __NR_newlstat __NR_lstat
-#define __NR_newfstat __NR_fstat
-#define __NR_newuname __NR_uname
-#define __NR_sysctl __NR__sysctl
-#define __NR_olddebug_setcontext __NR_sys_debug_setcontext
-
-/* We call sys_ugetrlimit for syscall number __NR_getrlimit */
-#define getrlimit ugetrlimit
-
-START_TABLE
-#include <asm/systbl.h>
-END_TABLE __NR_syscalls
diff --git a/arch/powerpc/kernel/systbl_chk.sh b/arch/powerpc/kernel/systbl_chk.sh
deleted file mode 100644
index 19415e7674a5..000000000000
--- a/arch/powerpc/kernel/systbl_chk.sh
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/bin/sh
-#
-# Just process the CPP output from systbl_chk.c and complain
-# if anything is out of order.
-#
-# Copyright © 2008 IBM Corporation
-#
-# This program is free software; you can redistribute it and/or
-# modify it under the terms of the GNU General Public License
-# as published by the Free Software Foundation; either version
-# 2 of the License, or (at your option) any later version.
-
-awk 'BEGIN { num = -1; } # Ignore the beginning of the file
- /^#/ { next; }
- /^[ \t]*$/ { next; }
- /^START_TABLE/ { num = 0; next; }
- /^END_TABLE/ {
- if (num != $2) {
- printf "__NR_syscalls (%s) is not one more than the last syscall (%s)\n",
- $2, num - 1;
- exit(1);
- }
- num = -1; # Ignore the rest of the file
- }
- {
- if (num == -1) next;
- if (($1 != -1) && ($1 != num)) {
- printf "Syscall %s out of order (expected %s)\n",
- $1, num;
- exit(1);
- };
- num++;
- }' "$1"
diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c
index a753b72efbc0..cba6dd15de3b 100644
--- a/arch/powerpc/kernel/tau_6xx.c
+++ b/arch/powerpc/kernel/tau_6xx.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* temp.c Thermal management for cpu's with Thermal Assist Units
*
@@ -12,14 +13,16 @@
*/
#include <linux/errno.h>
-#include <linux/jiffies.h>
#include <linux/kernel.h>
#include <linux/param.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/workqueue.h>
+#include <asm/interrupt.h>
#include <asm/io.h>
#include <asm/reg.h>
#include <asm/nvram.h>
@@ -27,6 +30,8 @@
#include <asm/8xx_immap.h>
#include <asm/machdep.h>
+#include "setup.h"
+
static struct tau_temp
{
int interrupts;
@@ -35,9 +40,7 @@ static struct tau_temp
unsigned char grew;
} tau[NR_CPUS];
-struct timer_list tau_timer;
-
-#undef DEBUG
+static bool tau_int_enable;
/* TODO: put these in a /proc interface, with some sanity checks, and maybe
* dynamic adjustment to minimize # of interrupts */
@@ -46,72 +49,49 @@ struct timer_list tau_timer;
#define step_size 2 /* step size when temp goes out of range */
#define window_expand 1 /* expand the window by this much */
/* configurable values for shrinking the window */
-#define shrink_timer 2*HZ /* period between shrinking the window */
+#define shrink_timer 2000 /* period between shrinking the window */
#define min_window 2 /* minimum window size, degrees C */
-void set_thresholds(unsigned long cpu)
+static void set_thresholds(unsigned long cpu)
{
-#ifdef CONFIG_TAU_INT
- /*
- * setup THRM1,
- * threshold, valid bit, enable interrupts, interrupt when below threshold
- */
- mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | THRM1_TIE | THRM1_TID);
+ u32 maybe_tie = tau_int_enable ? THRM1_TIE : 0;
- /* setup THRM2,
- * threshold, valid bit, enable interrupts, interrupt when above threshold
- */
- mtspr (SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V | THRM1_TIE);
-#else
- /* same thing but don't enable interrupts */
- mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | THRM1_TID);
- mtspr(SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V);
-#endif
+ /* setup THRM1, threshold, valid bit, interrupt when below threshold */
+ mtspr(SPRN_THRM1, THRM1_THRES(tau[cpu].low) | THRM1_V | maybe_tie | THRM1_TID);
+
+ /* setup THRM2, threshold, valid bit, interrupt when above threshold */
+ mtspr(SPRN_THRM2, THRM1_THRES(tau[cpu].high) | THRM1_V | maybe_tie);
}
-void TAUupdate(int cpu)
+static void TAUupdate(int cpu)
{
- unsigned thrm;
-
-#ifdef DEBUG
- printk("TAUupdate ");
-#endif
+ u32 thrm;
+ u32 bits = THRM1_TIV | THRM1_TIN | THRM1_V;
/* if both thresholds are crossed, the step_sizes cancel out
* and the window winds up getting expanded twice. */
- if((thrm = mfspr(SPRN_THRM1)) & THRM1_TIV){ /* is valid? */
- if(thrm & THRM1_TIN){ /* crossed low threshold */
- if (tau[cpu].low >= step_size){
- tau[cpu].low -= step_size;
- tau[cpu].high -= (step_size - window_expand);
- }
- tau[cpu].grew = 1;
-#ifdef DEBUG
- printk("low threshold crossed ");
-#endif
+ thrm = mfspr(SPRN_THRM1);
+ if ((thrm & bits) == bits) {
+ mtspr(SPRN_THRM1, 0);
+
+ if (tau[cpu].low >= step_size) {
+ tau[cpu].low -= step_size;
+ tau[cpu].high -= (step_size - window_expand);
}
+ tau[cpu].grew = 1;
+ pr_debug("%s: low threshold crossed\n", __func__);
}
- if((thrm = mfspr(SPRN_THRM2)) & THRM1_TIV){ /* is valid? */
- if(thrm & THRM1_TIN){ /* crossed high threshold */
- if (tau[cpu].high <= 127-step_size){
- tau[cpu].low += (step_size - window_expand);
- tau[cpu].high += step_size;
- }
- tau[cpu].grew = 1;
-#ifdef DEBUG
- printk("high threshold crossed ");
-#endif
+ thrm = mfspr(SPRN_THRM2);
+ if ((thrm & bits) == bits) {
+ mtspr(SPRN_THRM2, 0);
+
+ if (tau[cpu].high <= 127 - step_size) {
+ tau[cpu].low += (step_size - window_expand);
+ tau[cpu].high += step_size;
}
+ tau[cpu].grew = 1;
+ pr_debug("%s: high threshold crossed\n", __func__);
}
-
-#ifdef DEBUG
- printk("grew = %d\n", tau[cpu].grew);
-#endif
-
-#ifndef CONFIG_TAU_INT /* tau_timeout will do this if not using interrupts */
- set_thresholds(cpu);
-#endif
-
}
#ifdef CONFIG_TAU_INT
@@ -120,33 +100,29 @@ void TAUupdate(int cpu)
* with interrupts disabled
*/
-void TAUException(struct pt_regs * regs)
+DEFINE_INTERRUPT_HANDLER_ASYNC(TAUException)
{
int cpu = smp_processor_id();
- irq_enter();
tau[cpu].interrupts++;
TAUupdate(cpu);
-
- irq_exit();
}
#endif /* CONFIG_TAU_INT */
static void tau_timeout(void * info)
{
int cpu;
- unsigned long flags;
int size;
int shrink;
- /* disabling interrupts *should* be okay */
- local_irq_save(flags);
cpu = smp_processor_id();
-#ifndef CONFIG_TAU_INT
- TAUupdate(cpu);
-#endif
+ if (!tau_int_enable)
+ TAUupdate(cpu);
+
+ /* Stop thermal sensor comparisons and interrupts */
+ mtspr(SPRN_THRM3, 0);
size = tau[cpu].high - tau[cpu].low;
if (size > min_window && ! tau[cpu].grew) {
@@ -169,32 +145,26 @@ static void tau_timeout(void * info)
set_thresholds(cpu);
- /*
- * Do the enable every time, since otherwise a bunch of (relatively)
- * complex sleep code needs to be added. One mtspr every time
- * tau_timeout is called is probably not a big deal.
- *
- * Enable thermal sensor and set up sample interval timer
- * need 20 us to do the compare.. until a nice 'cpu_speed' function
- * call is implemented, just assume a 500 mhz clock. It doesn't really
- * matter if we take too long for a compare since it's all interrupt
- * driven anyway.
- *
- * use a extra long time.. (60 us @ 500 mhz)
+ /* Restart thermal sensor comparisons and interrupts.
+ * The "PowerPC 740 and PowerPC 750 Microprocessor Datasheet"
+ * recommends that "the maximum value be set in THRM3 under all
+ * conditions."
*/
- mtspr(SPRN_THRM3, THRM3_SITV(500*60) | THRM3_E);
-
- local_irq_restore(flags);
+ mtspr(SPRN_THRM3, THRM3_SITV(0x1fff) | THRM3_E);
}
-static void tau_timeout_smp(unsigned long unused)
-{
+static struct workqueue_struct *tau_workq;
- /* schedule ourselves to be run again */
- mod_timer(&tau_timer, jiffies + shrink_timer) ;
+static void tau_work_func(struct work_struct *work)
+{
+ msleep(shrink_timer);
on_each_cpu(tau_timeout, NULL, 0);
+ /* schedule ourselves to be run again */
+ queue_work(tau_workq, work);
}
+static DECLARE_WORK(tau_work, tau_work_func);
+
/*
* setup the TAU
*
@@ -204,7 +174,7 @@ static void tau_timeout_smp(unsigned long unused)
int tau_initialized = 0;
-void __init TAU_init_smp(void * info)
+static void __init TAU_init_smp(void *info)
{
unsigned long cpu = smp_processor_id();
@@ -216,7 +186,7 @@ void __init TAU_init_smp(void * info)
set_thresholds(cpu);
}
-int __init TAU_init(void)
+static int __init TAU_init(void)
{
/* We assume in SMP that if one CPU has TAU support, they
* all have it --BenH
@@ -227,22 +197,19 @@ int __init TAU_init(void)
return 1;
}
+ tau_int_enable = IS_ENABLED(CONFIG_TAU_INT) &&
+ !strcmp(cur_cpu_spec->platform, "ppc750");
- /* first, set up the window shrinking timer */
- init_timer(&tau_timer);
- tau_timer.function = tau_timeout_smp;
- tau_timer.expires = jiffies + shrink_timer;
- add_timer(&tau_timer);
+ tau_workq = alloc_ordered_workqueue("tau", 0);
+ if (!tau_workq)
+ return -ENOMEM;
on_each_cpu(TAU_init_smp, NULL, 0);
- printk("Thermal assist unit ");
-#ifdef CONFIG_TAU_INT
- printk("using interrupts, ");
-#else
- printk("using timers, ");
-#endif
- printk("shrink_timer: %d jiffies\n", shrink_timer);
+ queue_work(tau_workq, &tau_work);
+
+ pr_info("Thermal assist unit using %s, shrink_timer: %d ms\n",
+ tau_int_enable ? "interrupts" : "workqueue", shrink_timer);
tau_initialized = 1;
return 0;
@@ -259,12 +226,12 @@ u32 cpu_temp_both(unsigned long cpu)
return ((tau[cpu].high << 16) | tau[cpu].low);
}
-int cpu_temp(unsigned long cpu)
+u32 cpu_temp(unsigned long cpu)
{
return ((tau[cpu].high + tau[cpu].low) / 2);
}
-int tau_interrupts(unsigned long cpu)
+u32 tau_interrupts(unsigned long cpu)
{
return (tau[cpu].interrupts);
}
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 368ab374d33c..4bbeb8644d3d 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Common time routines among all ppc machines.
*
@@ -24,16 +25,13 @@
*
* 1997-09-10 Updated NTP code according to technical memorandum Jan '96
* "A Kernel Model for Precision Timekeeping" by Dave Mills
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/errno.h>
#include <linux/export.h>
#include <linux/sched.h>
+#include <linux/sched/clock.h>
+#include <linux/sched/cputime.h>
#include <linux/kernel.h>
#include <linux/param.h>
#include <linux/string.h>
@@ -42,7 +40,6 @@
#include <linux/timex.h>
#include <linux/kernel_stat.h>
#include <linux/time.h>
-#include <linux/clockchips.h>
#include <linux/init.h>
#include <linux/profile.h>
#include <linux/cpu.h>
@@ -54,64 +51,70 @@
#include <linux/irq.h>
#include <linux/delay.h>
#include <linux/irq_work.h>
-#include <asm/trace.h>
+#include <linux/of_clk.h>
+#include <linux/suspend.h>
+#include <linux/processor.h>
+#include <linux/mc146818rtc.h>
+#include <linux/platform_device.h>
+#include <asm/trace.h>
+#include <asm/interrupt.h>
#include <asm/io.h>
-#include <asm/processor.h>
#include <asm/nvram.h>
#include <asm/cache.h>
#include <asm/machdep.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/time.h>
-#include <asm/prom.h>
#include <asm/irq.h>
#include <asm/div64.h>
#include <asm/smp.h>
#include <asm/vdso_datapage.h>
#include <asm/firmware.h>
-#include <asm/cputime.h>
+#include <asm/mce.h>
+#include <asm/systemcfg.h>
/* powerpc clocksource/clockevent code */
#include <linux/clockchips.h>
-#include <linux/timekeeper_internal.h>
-static cycle_t rtc_read(struct clocksource *);
-static struct clocksource clocksource_rtc = {
- .name = "rtc",
- .rating = 400,
- .flags = CLOCK_SOURCE_IS_CONTINUOUS,
- .mask = CLOCKSOURCE_MASK(64),
- .read = rtc_read,
-};
-
-static cycle_t timebase_read(struct clocksource *);
+static u64 timebase_read(struct clocksource *);
static struct clocksource clocksource_timebase = {
.name = "timebase",
.rating = 400,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
.mask = CLOCKSOURCE_MASK(64),
.read = timebase_read,
+ .vdso_clock_mode = VDSO_CLOCKMODE_ARCHTIMER,
};
-#define DECREMENTER_MAX 0x7fffffff
+#define DECREMENTER_DEFAULT_MAX 0x7FFFFFFF
+u64 decrementer_max = DECREMENTER_DEFAULT_MAX;
+EXPORT_SYMBOL_GPL(decrementer_max); /* for KVM HDEC */
static int decrementer_set_next_event(unsigned long evt,
struct clock_event_device *dev);
-static void decrementer_set_mode(enum clock_event_mode mode,
- struct clock_event_device *dev);
+static int decrementer_shutdown(struct clock_event_device *evt);
struct clock_event_device decrementer_clockevent = {
- .name = "decrementer",
- .rating = 200,
- .irq = 0,
- .set_next_event = decrementer_set_next_event,
- .set_mode = decrementer_set_mode,
- .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP,
+ .name = "decrementer",
+ .rating = 200,
+ .irq = 0,
+ .set_next_event = decrementer_set_next_event,
+ .set_state_oneshot_stopped = decrementer_shutdown,
+ .set_state_shutdown = decrementer_shutdown,
+ .tick_resume = decrementer_shutdown,
+ .features = CLOCK_EVT_FEAT_ONESHOT |
+ CLOCK_EVT_FEAT_C3STOP,
};
EXPORT_SYMBOL(decrementer_clockevent);
-DEFINE_PER_CPU(u64, decrementers_next_tb);
+/*
+ * This always puts next_tb beyond now, so the clock event will never fire
+ * with the usual comparison, no need for a separate test for stopped.
+ */
+#define DEC_CLOCKEVENT_STOPPED ~0ULL
+DEFINE_PER_CPU(u64, decrementers_next_tb) = DEC_CLOCKEVENT_STOPPED;
+EXPORT_SYMBOL_GPL(decrementers_next_tb);
static DEFINE_PER_CPU(struct clock_event_device, decrementers);
#define XSEC_PER_SEC (1024*1024)
@@ -127,14 +130,14 @@ unsigned long tb_ticks_per_jiffy;
unsigned long tb_ticks_per_usec = 100; /* sane default */
EXPORT_SYMBOL(tb_ticks_per_usec);
unsigned long tb_ticks_per_sec;
-EXPORT_SYMBOL(tb_ticks_per_sec); /* for cputime_t conversions */
+EXPORT_SYMBOL(tb_ticks_per_sec); /* for cputime conversions */
DEFINE_SPINLOCK(rtc_lock);
EXPORT_SYMBOL_GPL(rtc_lock);
static u64 tb_to_ns_scale __read_mostly;
static unsigned tb_to_ns_shift __read_mostly;
-static u64 boot_tb __read_mostly;
+static u64 boot_tb __ro_after_init;
extern struct timezone sys_tz;
static long timezone_offset;
@@ -144,46 +147,14 @@ EXPORT_SYMBOL_GPL(ppc_proc_freq);
unsigned long ppc_tb_freq;
EXPORT_SYMBOL_GPL(ppc_tb_freq);
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-/*
- * Factors for converting from cputime_t (timebase ticks) to
- * jiffies, microseconds, seconds, and clock_t (1/USER_HZ seconds).
- * These are all stored as 0.64 fixed-point binary fractions.
- */
-u64 __cputime_jiffies_factor;
-EXPORT_SYMBOL(__cputime_jiffies_factor);
-u64 __cputime_usec_factor;
-EXPORT_SYMBOL(__cputime_usec_factor);
-u64 __cputime_sec_factor;
-EXPORT_SYMBOL(__cputime_sec_factor);
-u64 __cputime_clockt_factor;
-EXPORT_SYMBOL(__cputime_clockt_factor);
-DEFINE_PER_CPU(unsigned long, cputime_last_delta);
-DEFINE_PER_CPU(unsigned long, cputime_scaled_last_delta);
-
-cputime_t cputime_one_jiffy;
-
-void (*dtl_consumer)(struct dtl_entry *, u64);
-
-static void calc_cputime_factors(void)
-{
- struct div_result res;
-
- div128_by_32(HZ, 0, tb_ticks_per_sec, &res);
- __cputime_jiffies_factor = res.result_low;
- div128_by_32(1000000, 0, tb_ticks_per_sec, &res);
- __cputime_usec_factor = res.result_low;
- div128_by_32(1, 0, tb_ticks_per_sec, &res);
- __cputime_sec_factor = res.result_low;
- div128_by_32(USER_HZ, 0, tb_ticks_per_sec, &res);
- __cputime_clockt_factor = res.result_low;
-}
+bool tb_invalid;
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
/*
* Read the SPURR on systems that have it, otherwise the PURR,
* or if that doesn't exist return the timebase value passed in.
*/
-static u64 read_spurr(u64 tb)
+static inline unsigned long read_spurr(unsigned long tb)
{
if (cpu_has_feature(CPU_FTR_SPURR))
return mfspr(SPRN_SPURR);
@@ -192,220 +163,243 @@ static u64 read_spurr(u64 tb)
return tb;
}
-#ifdef CONFIG_PPC_SPLPAR
-
/*
- * Scan the dispatch trace log and count up the stolen time.
- * Should be called with interrupts disabled.
+ * Account time for a transition between system, hard irq
+ * or soft irq state.
*/
-static u64 scan_dispatch_log(u64 stop_tb)
+static unsigned long vtime_delta_scaled(struct cpu_accounting_data *acct,
+ unsigned long now, unsigned long stime)
{
- u64 i = local_paca->dtl_ridx;
- struct dtl_entry *dtl = local_paca->dtl_curr;
- struct dtl_entry *dtl_end = local_paca->dispatch_log_end;
- struct lppaca *vpa = local_paca->lppaca_ptr;
- u64 tb_delta;
- u64 stolen = 0;
- u64 dtb;
-
- if (!dtl)
- return 0;
-
- if (i == be64_to_cpu(vpa->dtl_idx))
- return 0;
- while (i < be64_to_cpu(vpa->dtl_idx)) {
- dtb = be64_to_cpu(dtl->timebase);
- tb_delta = be32_to_cpu(dtl->enqueue_to_dispatch_time) +
- be32_to_cpu(dtl->ready_to_enqueue_time);
- barrier();
- if (i + N_DISPATCH_LOG < be64_to_cpu(vpa->dtl_idx)) {
- /* buffer has overflowed */
- i = be64_to_cpu(vpa->dtl_idx) - N_DISPATCH_LOG;
- dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG);
- continue;
+ unsigned long stime_scaled = 0;
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
+ unsigned long nowscaled, deltascaled;
+ unsigned long utime, utime_scaled;
+
+ nowscaled = read_spurr(now);
+ deltascaled = nowscaled - acct->startspurr;
+ acct->startspurr = nowscaled;
+ utime = acct->utime - acct->utime_sspurr;
+ acct->utime_sspurr = acct->utime;
+
+ /*
+ * Because we don't read the SPURR on every kernel entry/exit,
+ * deltascaled includes both user and system SPURR ticks.
+ * Apportion these ticks to system SPURR ticks and user
+ * SPURR ticks in the same ratio as the system time (delta)
+ * and user time (udelta) values obtained from the timebase
+ * over the same interval. The system ticks get accounted here;
+ * the user ticks get saved up in paca->user_time_scaled to be
+ * used by account_process_tick.
+ */
+ stime_scaled = stime;
+ utime_scaled = utime;
+ if (deltascaled != stime + utime) {
+ if (utime) {
+ stime_scaled = deltascaled * stime / (stime + utime);
+ utime_scaled = deltascaled - stime_scaled;
+ } else {
+ stime_scaled = deltascaled;
}
- if (dtb > stop_tb)
- break;
- if (dtl_consumer)
- dtl_consumer(dtl, i);
- stolen += tb_delta;
- ++i;
- ++dtl;
- if (dtl == dtl_end)
- dtl = local_paca->dispatch_log;
}
- local_paca->dtl_ridx = i;
- local_paca->dtl_curr = dtl;
- return stolen;
+ acct->utime_scaled += utime_scaled;
+#endif
+
+ return stime_scaled;
}
-/*
- * Accumulate stolen time by scanning the dispatch trace log.
- * Called on entry from user mode.
- */
-void accumulate_stolen_time(void)
+static unsigned long vtime_delta(struct cpu_accounting_data *acct,
+ unsigned long *stime_scaled,
+ unsigned long *steal_time)
{
- u64 sst, ust;
-
- u8 save_soft_enabled = local_paca->soft_enabled;
-
- /* We are called early in the exception entry, before
- * soft/hard_enabled are sync'ed to the expected state
- * for the exception. We are hard disabled but the PACA
- * needs to reflect that so various debug stuff doesn't
- * complain
- */
- local_paca->soft_enabled = 0;
+ unsigned long now, stime;
- sst = scan_dispatch_log(local_paca->starttime_user);
- ust = scan_dispatch_log(local_paca->starttime);
- local_paca->system_time -= sst;
- local_paca->user_time -= ust;
- local_paca->stolen_time += ust + sst;
+ WARN_ON_ONCE(!irqs_disabled());
- local_paca->soft_enabled = save_soft_enabled;
-}
+ now = mftb();
+ stime = now - acct->starttime;
+ acct->starttime = now;
-static inline u64 calculate_stolen_time(u64 stop_tb)
-{
- u64 stolen = 0;
+ *stime_scaled = vtime_delta_scaled(acct, now, stime);
- if (get_paca()->dtl_ridx != be64_to_cpu(get_lppaca()->dtl_idx)) {
- stolen = scan_dispatch_log(stop_tb);
- get_paca()->system_time -= stolen;
- }
+ if (IS_ENABLED(CONFIG_PPC_SPLPAR) &&
+ firmware_has_feature(FW_FEATURE_SPLPAR))
+ *steal_time = pseries_calculate_stolen_time(now);
+ else
+ *steal_time = 0;
- stolen += get_paca()->stolen_time;
- get_paca()->stolen_time = 0;
- return stolen;
+ return stime;
}
-#else /* CONFIG_PPC_SPLPAR */
-static inline u64 calculate_stolen_time(u64 stop_tb)
+static void vtime_delta_kernel(struct cpu_accounting_data *acct,
+ unsigned long *stime, unsigned long *stime_scaled)
{
- return 0;
-}
+ unsigned long steal_time;
-#endif /* CONFIG_PPC_SPLPAR */
+ *stime = vtime_delta(acct, stime_scaled, &steal_time);
+ *stime -= min(*stime, steal_time);
+ acct->steal_time += steal_time;
+}
-/*
- * Account time for a transition between system, hard irq
- * or soft irq state.
- */
-static u64 vtime_delta(struct task_struct *tsk,
- u64 *sys_scaled, u64 *stolen)
+void vtime_account_kernel(struct task_struct *tsk)
{
- u64 now, nowscaled, deltascaled;
- u64 udelta, delta, user_scaled;
+ struct cpu_accounting_data *acct = get_accounting(tsk);
+ unsigned long stime, stime_scaled;
- WARN_ON_ONCE(!irqs_disabled());
+ vtime_delta_kernel(acct, &stime, &stime_scaled);
- now = mftb();
- nowscaled = read_spurr(now);
- get_paca()->system_time += now - get_paca()->starttime;
- get_paca()->starttime = now;
- deltascaled = nowscaled - get_paca()->startspurr;
- get_paca()->startspurr = nowscaled;
+ if (tsk->flags & PF_VCPU) {
+ acct->gtime += stime;
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
+ acct->utime_scaled += stime_scaled;
+#endif
+ } else {
+ acct->stime += stime;
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
+ acct->stime_scaled += stime_scaled;
+#endif
+ }
+}
+EXPORT_SYMBOL_GPL(vtime_account_kernel);
- *stolen = calculate_stolen_time(now);
+void vtime_account_idle(struct task_struct *tsk)
+{
+ unsigned long stime, stime_scaled, steal_time;
+ struct cpu_accounting_data *acct = get_accounting(tsk);
- delta = get_paca()->system_time;
- get_paca()->system_time = 0;
- udelta = get_paca()->user_time - get_paca()->utime_sspurr;
- get_paca()->utime_sspurr = get_paca()->user_time;
+ stime = vtime_delta(acct, &stime_scaled, &steal_time);
+ acct->idle_time += stime + steal_time;
+}
- /*
- * Because we don't read the SPURR on every kernel entry/exit,
- * deltascaled includes both user and system SPURR ticks.
- * Apportion these ticks to system SPURR ticks and user
- * SPURR ticks in the same ratio as the system time (delta)
- * and user time (udelta) values obtained from the timebase
- * over the same interval. The system ticks get accounted here;
- * the user ticks get saved up in paca->user_time_scaled to be
- * used by account_process_tick.
- */
- *sys_scaled = delta;
- user_scaled = udelta;
- if (deltascaled != delta + udelta) {
- if (udelta) {
- *sys_scaled = deltascaled * delta / (delta + udelta);
- user_scaled = deltascaled - *sys_scaled;
- } else {
- *sys_scaled = deltascaled;
- }
- }
- get_paca()->user_time_scaled += user_scaled;
+static void vtime_account_irq_field(struct cpu_accounting_data *acct,
+ unsigned long *field)
+{
+ unsigned long stime, stime_scaled;
- return delta;
+ vtime_delta_kernel(acct, &stime, &stime_scaled);
+ *field += stime;
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
+ acct->stime_scaled += stime_scaled;
+#endif
}
-void vtime_account_system(struct task_struct *tsk)
+void vtime_account_softirq(struct task_struct *tsk)
{
- u64 delta, sys_scaled, stolen;
-
- delta = vtime_delta(tsk, &sys_scaled, &stolen);
- account_system_time(tsk, 0, delta, sys_scaled);
- if (stolen)
- account_steal_time(stolen);
+ struct cpu_accounting_data *acct = get_accounting(tsk);
+ vtime_account_irq_field(acct, &acct->softirq_time);
}
-EXPORT_SYMBOL_GPL(vtime_account_system);
-void vtime_account_idle(struct task_struct *tsk)
+void vtime_account_hardirq(struct task_struct *tsk)
{
- u64 delta, sys_scaled, stolen;
+ struct cpu_accounting_data *acct = get_accounting(tsk);
+ vtime_account_irq_field(acct, &acct->hardirq_time);
+}
- delta = vtime_delta(tsk, &sys_scaled, &stolen);
- account_idle_time(delta + stolen);
+static void vtime_flush_scaled(struct task_struct *tsk,
+ struct cpu_accounting_data *acct)
+{
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
+ if (acct->utime_scaled)
+ tsk->utimescaled += cputime_to_nsecs(acct->utime_scaled);
+ if (acct->stime_scaled)
+ tsk->stimescaled += cputime_to_nsecs(acct->stime_scaled);
+
+ acct->utime_scaled = 0;
+ acct->utime_sspurr = 0;
+ acct->stime_scaled = 0;
+#endif
}
/*
- * Transfer the user time accumulated in the paca
- * by the exception entry and exit code to the generic
- * process user time records.
+ * Account the whole cputime accumulated in the paca
* Must be called with interrupts disabled.
- * Assumes that vtime_account_system/idle() has been called
+ * Assumes that vtime_account_kernel/idle() has been called
* recently (i.e. since the last entry from usermode) so that
* get_paca()->user_time_scaled is up to date.
*/
-void vtime_account_user(struct task_struct *tsk)
+void vtime_flush(struct task_struct *tsk)
{
- cputime_t utime, utimescaled;
-
- utime = get_paca()->user_time;
- utimescaled = get_paca()->user_time_scaled;
- get_paca()->user_time = 0;
- get_paca()->user_time_scaled = 0;
- get_paca()->utime_sspurr = 0;
- account_user_time(tsk, utime, utimescaled);
+ struct cpu_accounting_data *acct = get_accounting(tsk);
+
+ if (acct->utime)
+ account_user_time(tsk, cputime_to_nsecs(acct->utime));
+
+ if (acct->gtime)
+ account_guest_time(tsk, cputime_to_nsecs(acct->gtime));
+
+ if (IS_ENABLED(CONFIG_PPC_SPLPAR) && acct->steal_time) {
+ account_steal_time(cputime_to_nsecs(acct->steal_time));
+ acct->steal_time = 0;
+ }
+
+ if (acct->idle_time)
+ account_idle_time(cputime_to_nsecs(acct->idle_time));
+
+ if (acct->stime)
+ account_system_index_time(tsk, cputime_to_nsecs(acct->stime),
+ CPUTIME_SYSTEM);
+
+ if (acct->hardirq_time)
+ account_system_index_time(tsk, cputime_to_nsecs(acct->hardirq_time),
+ CPUTIME_IRQ);
+ if (acct->softirq_time)
+ account_system_index_time(tsk, cputime_to_nsecs(acct->softirq_time),
+ CPUTIME_SOFTIRQ);
+
+ vtime_flush_scaled(tsk, acct);
+
+ acct->utime = 0;
+ acct->gtime = 0;
+ acct->idle_time = 0;
+ acct->stime = 0;
+ acct->hardirq_time = 0;
+ acct->softirq_time = 0;
}
-#else /* ! CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
-#define calc_cputime_factors()
-#endif
+/*
+ * Called from the context switch with interrupts disabled, to charge all
+ * accumulated times to the current process, and to prepare accounting on
+ * the next process.
+ */
+void vtime_task_switch(struct task_struct *prev)
+{
+ if (is_idle_task(prev))
+ vtime_account_idle(prev);
+ else
+ vtime_account_kernel(prev);
-void __delay(unsigned long loops)
+ vtime_flush(prev);
+
+ if (!IS_ENABLED(CONFIG_PPC64)) {
+ struct cpu_accounting_data *acct = get_accounting(current);
+ struct cpu_accounting_data *acct0 = get_accounting(prev);
+
+ acct->starttime = acct0->starttime;
+ }
+}
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
+
+void __no_kcsan __delay(unsigned long loops)
{
unsigned long start;
- int diff;
-
- if (__USE_RTC()) {
- start = get_rtcl();
- do {
- /* the RTCL register wraps at 1000000000 */
- diff = get_rtcl() - start;
- if (diff < 0)
- diff += 1000000000;
- } while (diff < loops);
+
+ spin_begin();
+ if (tb_invalid) {
+ /*
+ * TB is in error state and isn't ticking anymore.
+ * HMI handler was unable to recover from TB error.
+ * Return immediately, so that kernel won't get stuck here.
+ */
+ spin_cpu_relax();
} else {
- start = get_tbl();
- while (get_tbl() - start < loops)
- HMT_low();
- HMT_medium();
+ start = mftb();
+ while (mftb() - start < loops)
+ spin_cpu_relax();
}
+ spin_end();
}
EXPORT_SYMBOL(__delay);
-void udelay(unsigned long usecs)
+void __no_kcsan udelay(unsigned long usecs)
{
__delay(tb_ticks_per_usec * usecs);
}
@@ -458,155 +452,181 @@ static inline void clear_irq_work_pending(void)
DEFINE_PER_CPU(u8, irq_work_pending);
-#define set_irq_work_pending_flag() __get_cpu_var(irq_work_pending) = 1
-#define test_irq_work_pending() __get_cpu_var(irq_work_pending)
-#define clear_irq_work_pending() __get_cpu_var(irq_work_pending) = 0
+#define set_irq_work_pending_flag() __this_cpu_write(irq_work_pending, 1)
+#define test_irq_work_pending() __this_cpu_read(irq_work_pending)
+#define clear_irq_work_pending() __this_cpu_write(irq_work_pending, 0)
#endif /* 32 vs 64 bit */
void arch_irq_work_raise(void)
{
+ /*
+ * 64-bit code that uses irq soft-mask can just cause an immediate
+ * interrupt here that gets soft masked, if this is called under
+ * local_irq_disable(). It might be possible to prevent that happening
+ * by noticing interrupts are disabled and setting decrementer pending
+ * to be replayed when irqs are enabled. The problem there is that
+ * tracing can call irq_work_raise, including in code that does low
+ * level manipulations of irq soft-mask state (e.g., trace_hardirqs_on)
+ * which could get tangled up if we're messing with the same state
+ * here.
+ */
preempt_disable();
set_irq_work_pending_flag();
set_dec(1);
preempt_enable();
}
+static void set_dec_or_work(u64 val)
+{
+ set_dec(val);
+ /* We may have raced with new irq work */
+ if (unlikely(test_irq_work_pending()))
+ set_dec(1);
+}
+
#else /* CONFIG_IRQ_WORK */
#define test_irq_work_pending() 0
#define clear_irq_work_pending()
+static void set_dec_or_work(u64 val)
+{
+ set_dec(val);
+}
#endif /* CONFIG_IRQ_WORK */
-void __timer_interrupt(void)
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+void timer_rearm_host_dec(u64 now)
{
- struct pt_regs *regs = get_irq_regs();
- u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
- struct clock_event_device *evt = &__get_cpu_var(decrementers);
- u64 now;
+ u64 *next_tb = this_cpu_ptr(&decrementers_next_tb);
- trace_timer_interrupt_entry(regs);
-
- if (test_irq_work_pending()) {
- clear_irq_work_pending();
- irq_work_run();
- }
+ WARN_ON_ONCE(!arch_irqs_disabled());
+ WARN_ON_ONCE(mfmsr() & MSR_EE);
- now = get_tb_or_rtc();
if (now >= *next_tb) {
- *next_tb = ~(u64)0;
- if (evt->event_handler)
- evt->event_handler(evt);
- __get_cpu_var(irq_stat).timer_irqs_event++;
+ local_paca->irq_happened |= PACA_IRQ_DEC;
} else {
now = *next_tb - now;
- if (now <= DECREMENTER_MAX)
- set_dec((int)now);
- /* We may have raced with new irq work */
- if (test_irq_work_pending())
- set_dec(1);
- __get_cpu_var(irq_stat).timer_irqs_others++;
- }
-
-#ifdef CONFIG_PPC64
- /* collect purr register values often, for accurate calculations */
- if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
- struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array);
- cu->current_tb = mfspr(SPRN_PURR);
+ if (now > decrementer_max)
+ now = decrementer_max;
+ set_dec_or_work(now);
}
-#endif
-
- trace_timer_interrupt_exit(regs);
}
+EXPORT_SYMBOL_GPL(timer_rearm_host_dec);
+#endif
/*
* timer_interrupt - gets called when the decrementer overflows,
* with interrupts disabled.
*/
-void timer_interrupt(struct pt_regs * regs)
+DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt)
{
+ struct clock_event_device *evt = this_cpu_ptr(&decrementers);
+ u64 *next_tb = this_cpu_ptr(&decrementers_next_tb);
struct pt_regs *old_regs;
- u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
-
- /* Ensure a positive value is written to the decrementer, or else
- * some CPUs will continue to take decrementer exceptions.
- */
- set_dec(DECREMENTER_MAX);
+ u64 now;
- /* Some implementations of hotplug will get timer interrupts while
- * offline, just ignore these and we also need to set
- * decrementers_next_tb as MAX to make sure __check_irq_replay
- * don't replay timer interrupt when return, otherwise we'll trap
- * here infinitely :(
+ /*
+ * Some implementations of hotplug will get timer interrupts while
+ * offline, just ignore these.
*/
- if (!cpu_online(smp_processor_id())) {
- *next_tb = ~(u64)0;
+ if (unlikely(!cpu_online(smp_processor_id()))) {
+ set_dec(decrementer_max);
return;
}
- /* Conditionally hard-enable interrupts now that the DEC has been
- * bumped to its maximum value
- */
- may_hard_irq_enable();
-
+ /* Conditionally hard-enable interrupts. */
+ if (should_hard_irq_enable(regs)) {
+ /*
+ * Ensure a positive value is written to the decrementer, or
+ * else some CPUs will continue to take decrementer exceptions.
+ * When the PPC_WATCHDOG (decrementer based) is configured,
+ * keep this at most 31 bits, which is about 4 seconds on most
+ * systems, which gives the watchdog a chance of catching timer
+ * interrupt hard lockups.
+ */
+ if (IS_ENABLED(CONFIG_PPC_WATCHDOG))
+ set_dec(0x7fffffff);
+ else
+ set_dec(decrementer_max);
+
+ do_hard_irq_enable();
+ }
#if defined(CONFIG_PPC32) && defined(CONFIG_PPC_PMAC)
if (atomic_read(&ppc_n_lost_interrupts) != 0)
- do_IRQ(regs);
+ __do_IRQ(regs);
#endif
old_regs = set_irq_regs(regs);
- irq_enter();
- __timer_interrupt();
- irq_exit();
- set_irq_regs(old_regs);
-}
+ trace_timer_interrupt_entry(regs);
-/*
- * Hypervisor decrementer interrupts shouldn't occur but are sometimes
- * left pending on exit from a KVM guest. We don't need to do anything
- * to clear them, as they are edge-triggered.
- */
-void hdec_interrupt(struct pt_regs *regs)
-{
-}
+ if (test_irq_work_pending()) {
+ clear_irq_work_pending();
+ mce_run_irq_context_handlers();
+ irq_work_run();
+ }
-#ifdef CONFIG_SUSPEND
-static void generic_suspend_disable_irqs(void)
-{
- /* Disable the decrementer, so that it doesn't interfere
- * with suspending.
- */
+ now = get_tb();
+ if (now >= *next_tb) {
+ evt->event_handler(evt);
+ __this_cpu_inc(irq_stat.timer_irqs_event);
+ } else {
+ now = *next_tb - now;
+ if (now > decrementer_max)
+ now = decrementer_max;
+ set_dec_or_work(now);
+ __this_cpu_inc(irq_stat.timer_irqs_others);
+ }
- set_dec(DECREMENTER_MAX);
- local_irq_disable();
- set_dec(DECREMENTER_MAX);
+ trace_timer_interrupt_exit(regs);
+
+ set_irq_regs(old_regs);
}
+EXPORT_SYMBOL(timer_interrupt);
-static void generic_suspend_enable_irqs(void)
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+void timer_broadcast_interrupt(void)
{
- local_irq_enable();
+ tick_receive_broadcast();
+ __this_cpu_inc(irq_stat.broadcast_irqs_event);
}
+#endif
+#ifdef CONFIG_SUSPEND
/* Overrides the weak version in kernel/power/main.c */
void arch_suspend_disable_irqs(void)
{
if (ppc_md.suspend_disable_irqs)
ppc_md.suspend_disable_irqs();
- generic_suspend_disable_irqs();
+
+ /* Disable the decrementer, so that it doesn't interfere
+ * with suspending.
+ */
+
+ set_dec(decrementer_max);
+ local_irq_disable();
+ set_dec(decrementer_max);
}
/* Overrides the weak version in kernel/power/main.c */
void arch_suspend_enable_irqs(void)
{
- generic_suspend_enable_irqs();
+ local_irq_enable();
+
if (ppc_md.suspend_enable_irqs)
ppc_md.suspend_enable_irqs();
}
#endif
+unsigned long long tb_to_ns(unsigned long long ticks)
+{
+ return mulhdu(ticks, tb_to_ns_scale) << tb_to_ns_shift;
+}
+EXPORT_SYMBOL_GPL(tb_to_ns);
+
/*
* Scheduler clock - returns current time in nanosec units.
*
@@ -614,13 +634,49 @@ void arch_suspend_enable_irqs(void)
* the high 64 bits of a * b, i.e. (a * b) >> 64, where a and b
* are 64-bit unsigned numbers.
*/
-unsigned long long sched_clock(void)
+notrace unsigned long long sched_clock(void)
{
- if (__USE_RTC())
- return get_rtc();
return mulhdu(get_tb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift;
}
+#ifdef CONFIG_PPC_SPLPAR
+u64 get_boot_tb(void)
+{
+ return boot_tb;
+}
+#endif
+
+#ifdef CONFIG_PPC_PSERIES
+
+/*
+ * Running clock - attempts to give a view of time passing for a virtualised
+ * kernels.
+ * Uses the VTB register if available otherwise a next best guess.
+ */
+unsigned long long running_clock(void)
+{
+ /*
+ * Don't read the VTB as a host since KVM does not switch in host
+ * timebase into the VTB when it takes a guest off the CPU, reading the
+ * VTB would result in reading 'last switched out' guest VTB.
+ *
+ * Host kernels are often compiled with CONFIG_PPC_PSERIES checked, it
+ * would be unsafe to rely only on the #ifdef above.
+ */
+ if (firmware_has_feature(FW_FEATURE_LPAR) &&
+ cpu_has_feature(CPU_FTR_ARCH_207S))
+ return mulhdu(get_vtb() - boot_tb, tb_to_ns_scale) << tb_to_ns_shift;
+
+ /*
+ * This is a next best approximation without a VTB.
+ * On a host which is running bare metal there should never be any stolen
+ * time and on a host which doesn't do any virtualisation TB *should* equal
+ * VTB so it makes no difference anyway.
+ */
+ return local_clock() - kcpustat_this_cpu->cpustat[CPUTIME_STEAL];
+}
+#endif
+
static int __init get_freq(char *name, int cells, unsigned long *val)
{
struct device_node *cpu;
@@ -643,15 +699,23 @@ static int __init get_freq(char *name, int cells, unsigned long *val)
return found;
}
-void start_cpu_decrementer(void)
+static void start_cpu_decrementer(void)
{
-#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
+#ifdef CONFIG_BOOKE
+ unsigned int tcr;
+
/* Clear any pending timer interrupts */
mtspr(SPRN_TSR, TSR_ENW | TSR_WIS | TSR_DIS | TSR_FIS);
- /* Enable decrementer interrupt */
- mtspr(SPRN_TCR, TCR_DIE);
-#endif /* defined(CONFIG_BOOKE) || defined(CONFIG_40x) */
+ tcr = mfspr(SPRN_TCR);
+ /*
+ * The watchdog may have already been enabled by u-boot. So leave
+ * TRC[WP] (Watchdog Period) alone.
+ */
+ tcr &= TCR_WP_MASK; /* Clear all bits except for TCR[WP] */
+ tcr |= TCR_DIE; /* Enable decrementer */
+ mtspr(SPRN_TCR, tcr);
+#endif
}
void __init generic_calibrate_decr(void)
@@ -675,27 +739,25 @@ void __init generic_calibrate_decr(void)
}
}
-int update_persistent_clock(struct timespec now)
+int update_persistent_clock64(struct timespec64 now)
{
struct rtc_time tm;
if (!ppc_md.set_rtc_time)
return -ENODEV;
- to_tm(now.tv_sec + 1 + timezone_offset, &tm);
- tm.tm_year -= 1900;
- tm.tm_mon -= 1;
+ rtc_time64_to_tm(now.tv_sec + 1 + timezone_offset, &tm);
return ppc_md.set_rtc_time(&tm);
}
-static void __read_persistent_clock(struct timespec *ts)
+static void __read_persistent_clock(struct timespec64 *ts)
{
struct rtc_time tm;
static int first = 1;
ts->tv_nsec = 0;
- /* XXX this is a litle fragile but will work okay in the short term */
+ /* XXX this is a little fragile but will work okay in the short term */
if (first) {
first = 0;
if (ppc_md.time_init)
@@ -713,11 +775,10 @@ static void __read_persistent_clock(struct timespec *ts)
}
ppc_md.get_rtc_time(&tm);
- ts->tv_sec = mktime(tm.tm_year+1900, tm.tm_mon+1, tm.tm_mday,
- tm.tm_hour, tm.tm_min, tm.tm_sec);
+ ts->tv_sec = rtc_tm_to_time64(&tm);
}
-void read_persistent_clock(struct timespec *ts)
+void read_persistent_clock64(struct timespec64 *ts)
{
__read_persistent_clock(ts);
@@ -730,75 +791,14 @@ void read_persistent_clock(struct timespec *ts)
}
/* clocksource code */
-static cycle_t rtc_read(struct clocksource *cs)
-{
- return (cycle_t)get_rtc();
-}
-
-static cycle_t timebase_read(struct clocksource *cs)
-{
- return (cycle_t)get_tb();
-}
-
-void update_vsyscall_old(struct timespec *wall_time, struct timespec *wtm,
- struct clocksource *clock, u32 mult, cycle_t cycle_last)
-{
- u64 new_tb_to_xs, new_stamp_xsec;
- u32 frac_sec;
-
- if (clock != &clocksource_timebase)
- return;
-
- /* Make userspace gettimeofday spin until we're done. */
- ++vdso_data->tb_update_count;
- smp_mb();
-
- /* 19342813113834067 ~= 2^(20+64) / 1e9 */
- new_tb_to_xs = (u64) mult * (19342813113834067ULL >> clock->shift);
- new_stamp_xsec = (u64) wall_time->tv_nsec * XSEC_PER_SEC;
- do_div(new_stamp_xsec, 1000000000);
- new_stamp_xsec += (u64) wall_time->tv_sec * XSEC_PER_SEC;
-
- BUG_ON(wall_time->tv_nsec >= NSEC_PER_SEC);
- /* this is tv_nsec / 1e9 as a 0.32 fraction */
- frac_sec = ((u64) wall_time->tv_nsec * 18446744073ULL) >> 32;
-
- /*
- * tb_update_count is used to allow the userspace gettimeofday code
- * to assure itself that it sees a consistent view of the tb_to_xs and
- * stamp_xsec variables. It reads the tb_update_count, then reads
- * tb_to_xs and stamp_xsec and then reads tb_update_count again. If
- * the two values of tb_update_count match and are even then the
- * tb_to_xs and stamp_xsec values are consistent. If not, then it
- * loops back and reads them again until this criteria is met.
- * We expect the caller to have done the first increment of
- * vdso_data->tb_update_count already.
- */
- vdso_data->tb_orig_stamp = cycle_last;
- vdso_data->stamp_xsec = new_stamp_xsec;
- vdso_data->tb_to_xs = new_tb_to_xs;
- vdso_data->wtom_clock_sec = wtm->tv_sec;
- vdso_data->wtom_clock_nsec = wtm->tv_nsec;
- vdso_data->stamp_xtime = *wall_time;
- vdso_data->stamp_sec_fraction = frac_sec;
- smp_wmb();
- ++(vdso_data->tb_update_count);
-}
-
-void update_vsyscall_tz(void)
+static notrace u64 timebase_read(struct clocksource *cs)
{
- vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
- vdso_data->tz_dsttime = sys_tz.tz_dsttime;
+ return (u64)get_tb();
}
static void __init clocksource_init(void)
{
- struct clocksource *clock;
-
- if (__USE_RTC())
- clock = &clocksource_rtc;
- else
- clock = &clocksource_timebase;
+ struct clocksource *clock = &clocksource_timebase;
if (clocksource_register_hz(clock, tb_ticks_per_sec)) {
printk(KERN_ERR "clocksource: %s is already registered\n",
@@ -813,30 +813,18 @@ static void __init clocksource_init(void)
static int decrementer_set_next_event(unsigned long evt,
struct clock_event_device *dev)
{
- __get_cpu_var(decrementers_next_tb) = get_tb_or_rtc() + evt;
- set_dec(evt);
-
- /* We may have raced with new irq work */
- if (test_irq_work_pending())
- set_dec(1);
+ __this_cpu_write(decrementers_next_tb, get_tb() + evt);
+ set_dec_or_work(evt);
return 0;
}
-static void decrementer_set_mode(enum clock_event_mode mode,
- struct clock_event_device *dev)
-{
- if (mode != CLOCK_EVT_MODE_ONESHOT)
- decrementer_set_next_event(DECREMENTER_MAX, dev);
-}
-
-/* Interrupt handler for the timer broadcast IPI */
-void tick_broadcast_ipi_handler(void)
+static int decrementer_shutdown(struct clock_event_device *dev)
{
- u64 *next_tb = &__get_cpu_var(decrementers_next_tb);
+ __this_cpu_write(decrementers_next_tb, DEC_CLOCKEVENT_STOPPED);
+ set_dec_or_work(decrementer_max);
- *next_tb = get_tb_or_rtc();
- __timer_interrupt();
+ return 0;
}
static void register_decrementer_clockevent(int cpu)
@@ -846,38 +834,111 @@ static void register_decrementer_clockevent(int cpu)
*dec = decrementer_clockevent;
dec->cpumask = cpumask_of(cpu);
+ clockevents_config_and_register(dec, ppc_tb_freq, 2, decrementer_max);
+
printk_once(KERN_DEBUG "clockevent: %s mult[%x] shift[%d] cpu[%d]\n",
dec->name, dec->mult, dec->shift, cpu);
- clockevents_register_device(dec);
+ /* Set values for KVM, see kvm_emulate_dec() */
+ decrementer_clockevent.mult = dec->mult;
+ decrementer_clockevent.shift = dec->shift;
}
-static void __init init_decrementer_clockevent(void)
+static void enable_large_decrementer(void)
+{
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ return;
+
+ if (decrementer_max <= DECREMENTER_DEFAULT_MAX)
+ return;
+
+ /*
+ * If we're running as the hypervisor we need to enable the LD manually
+ * otherwise firmware should have done it for us.
+ */
+ if (cpu_has_feature(CPU_FTR_HVMODE))
+ mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_LD);
+}
+
+static void __init set_decrementer_max(void)
{
- int cpu = smp_processor_id();
+ struct device_node *cpu;
+ u32 bits = 32;
- clockevents_calc_mult_shift(&decrementer_clockevent, ppc_tb_freq, 4);
+ /* Prior to ISAv3 the decrementer is always 32 bit */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ return;
- decrementer_clockevent.max_delta_ns =
- clockevent_delta2ns(DECREMENTER_MAX, &decrementer_clockevent);
- decrementer_clockevent.min_delta_ns =
- clockevent_delta2ns(2, &decrementer_clockevent);
+ cpu = of_find_node_by_type(NULL, "cpu");
- register_decrementer_clockevent(cpu);
+ if (of_property_read_u32(cpu, "ibm,dec-bits", &bits) == 0) {
+ if (bits > 64 || bits < 32) {
+ pr_warn("time_init: firmware supplied invalid ibm,dec-bits");
+ bits = 32;
+ }
+
+ /* calculate the signed maximum given this many bits */
+ decrementer_max = (1ul << (bits - 1)) - 1;
+ }
+
+ of_node_put(cpu);
+
+ pr_info("time_init: %u bit decrementer (max: %llx)\n",
+ bits, decrementer_max);
+}
+
+static void __init init_decrementer_clockevent(void)
+{
+ register_decrementer_clockevent(smp_processor_id());
}
void secondary_cpu_time_init(void)
{
+ /* Enable and test the large decrementer for this cpu */
+ enable_large_decrementer();
+
/* Start the decrementer on CPUs that have manual control
* such as BookE
*/
start_cpu_decrementer();
- /* FIME: Should make unrelatred change to move snapshot_timebase
+ /* FIME: Should make unrelated change to move snapshot_timebase
* call here ! */
register_decrementer_clockevent(smp_processor_id());
}
+/*
+ * Divide a 128-bit dividend by a 32-bit divisor, leaving a 128 bit
+ * result.
+ */
+static __init void div128_by_32(u64 dividend_high, u64 dividend_low,
+ unsigned int divisor, struct div_result *dr)
+{
+ unsigned long a, b, c, d;
+ unsigned long w, x, y, z;
+ u64 ra, rb, rc;
+
+ a = dividend_high >> 32;
+ b = dividend_high & 0xffffffff;
+ c = dividend_low >> 32;
+ d = dividend_low & 0xffffffff;
+
+ w = a / divisor;
+ ra = ((u64)(a - (w * divisor)) << 32) + b;
+
+ rb = ((u64)do_div(ra, divisor) << 32) + c;
+ x = ra;
+
+ rc = ((u64)do_div(rb, divisor) << 32) + d;
+ y = rb;
+
+ do_div(rc, divisor);
+ z = rc;
+
+ dr->result_high = ((u64)w << 32) + x;
+ dr->result_low = ((u64)y << 32) + z;
+}
+
/* This function is only called on the boot processor */
void __init time_init(void)
{
@@ -885,23 +946,20 @@ void __init time_init(void)
u64 scale;
unsigned shift;
- if (__USE_RTC()) {
- /* 601 processor: dec counts down by 128 every 128ns */
- ppc_tb_freq = 1000000000;
- } else {
- /* Normal PowerPC with timebase register */
+ /* Normal PowerPC with timebase register */
+ if (ppc_md.calibrate_decr)
ppc_md.calibrate_decr();
- printk(KERN_DEBUG "time_init: decrementer frequency = %lu.%.6lu MHz\n",
- ppc_tb_freq / 1000000, ppc_tb_freq % 1000000);
- printk(KERN_DEBUG "time_init: processor frequency = %lu.%.6lu MHz\n",
- ppc_proc_freq / 1000000, ppc_proc_freq % 1000000);
- }
+ else
+ generic_calibrate_decr();
+
+ printk(KERN_DEBUG "time_init: decrementer frequency = %lu.%.6lu MHz\n",
+ ppc_tb_freq / 1000000, ppc_tb_freq % 1000000);
+ printk(KERN_DEBUG "time_init: processor frequency = %lu.%.6lu MHz\n",
+ ppc_proc_freq / 1000000, ppc_proc_freq % 1000000);
tb_ticks_per_jiffy = ppc_tb_freq / HZ;
tb_ticks_per_sec = ppc_tb_freq;
tb_ticks_per_usec = ppc_tb_freq / 1000000;
- calc_cputime_factors();
- setup_cputime_one_jiffy();
/*
* Compute scale factor for sched_clock.
@@ -922,7 +980,7 @@ void __init time_init(void)
tb_to_ns_scale = scale;
tb_to_ns_shift = shift;
/* Save the current timebase to pretty up CONFIG_PRINTK_TIME */
- boot_tb = get_tb_or_rtc();
+ boot_tb = get_tb();
/* If platform provided a timezone (pmac), we correct the time */
if (timezone_offset) {
@@ -930,8 +988,14 @@ void __init time_init(void)
sys_tz.tz_dsttime = 0;
}
- vdso_data->tb_update_count = 0;
- vdso_data->tb_ticks_per_sec = tb_ticks_per_sec;
+ vdso_k_arch_data->tb_ticks_per_sec = tb_ticks_per_sec;
+#ifdef CONFIG_PPC64_PROC_SYSTEMCFG
+ systemcfg->tb_ticks_per_sec = tb_ticks_per_sec;
+#endif
+
+ /* initialise and enable the large decrementer (if we have one) */
+ set_decrementer_max();
+ enable_large_decrementer();
/* Start the decrementer on CPUs that have manual control
* such as BookE
@@ -943,129 +1007,42 @@ void __init time_init(void)
init_decrementer_clockevent();
tick_setup_hrtimer_broadcast();
-}
+ of_clk_init(NULL);
+ enable_sched_clock_irqtime();
+}
-#define FEBRUARY 2
-#define STARTOFTIME 1970
-#define SECDAY 86400L
-#define SECYR (SECDAY * 365)
-#define leapyear(year) ((year) % 4 == 0 && \
- ((year) % 100 != 0 || (year) % 400 == 0))
-#define days_in_year(a) (leapyear(a) ? 366 : 365)
-#define days_in_month(a) (month_days[(a) - 1])
-
-static int month_days[12] = {
- 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
-};
-
-/*
- * This only works for the Gregorian calendar - i.e. after 1752 (in the UK)
- */
-void GregorianDay(struct rtc_time * tm)
+/* We don't need to calibrate delay, we use the CPU timebase for that */
+void calibrate_delay(void)
{
- int leapsToDate;
- int lastYear;
- int day;
- int MonthOffset[] = { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 };
-
- lastYear = tm->tm_year - 1;
-
- /*
- * Number of leap corrections to apply up to end of last year
- */
- leapsToDate = lastYear / 4 - lastYear / 100 + lastYear / 400;
-
- /*
- * This year is a leap year if it is divisible by 4 except when it is
- * divisible by 100 unless it is divisible by 400
- *
- * e.g. 1904 was a leap year, 1900 was not, 1996 is, and 2000 was
+ /* Some generic code (such as spinlock debug) use loops_per_jiffy
+ * as the number of __delay(1) in a jiffy, so make it so
*/
- day = tm->tm_mon > 2 && leapyear(tm->tm_year);
-
- day += lastYear*365 + leapsToDate + MonthOffset[tm->tm_mon-1] +
- tm->tm_mday;
-
- tm->tm_wday = day % 7;
+ loops_per_jiffy = tb_ticks_per_jiffy;
}
-void to_tm(int tim, struct rtc_time * tm)
+#if IS_ENABLED(CONFIG_RTC_DRV_GENERIC)
+static int rtc_generic_get_time(struct device *dev, struct rtc_time *tm)
{
- register int i;
- register long hms, day;
-
- day = tim / SECDAY;
- hms = tim % SECDAY;
-
- /* Hours, minutes, seconds are easy */
- tm->tm_hour = hms / 3600;
- tm->tm_min = (hms % 3600) / 60;
- tm->tm_sec = (hms % 3600) % 60;
-
- /* Number of years in days */
- for (i = STARTOFTIME; day >= days_in_year(i); i++)
- day -= days_in_year(i);
- tm->tm_year = i;
-
- /* Number of months in days left */
- if (leapyear(tm->tm_year))
- days_in_month(FEBRUARY) = 29;
- for (i = 1; day >= days_in_month(i); i++)
- day -= days_in_month(i);
- days_in_month(FEBRUARY) = 28;
- tm->tm_mon = i;
-
- /* Days are what is left over (+1) from all that. */
- tm->tm_mday = day + 1;
-
- /*
- * Determine the day of week
- */
- GregorianDay(tm);
+ ppc_md.get_rtc_time(tm);
+ return 0;
}
-/*
- * Divide a 128-bit dividend by a 32-bit divisor, leaving a 128 bit
- * result.
- */
-void div128_by_32(u64 dividend_high, u64 dividend_low,
- unsigned divisor, struct div_result *dr)
+static int rtc_generic_set_time(struct device *dev, struct rtc_time *tm)
{
- unsigned long a, b, c, d;
- unsigned long w, x, y, z;
- u64 ra, rb, rc;
-
- a = dividend_high >> 32;
- b = dividend_high & 0xffffffff;
- c = dividend_low >> 32;
- d = dividend_low & 0xffffffff;
-
- w = a / divisor;
- ra = ((u64)(a - (w * divisor)) << 32) + b;
-
- rb = ((u64) do_div(ra, divisor) << 32) + c;
- x = ra;
-
- rc = ((u64) do_div(rb, divisor) << 32) + d;
- y = rb;
-
- do_div(rc, divisor);
- z = rc;
+ if (!ppc_md.set_rtc_time)
+ return -EOPNOTSUPP;
- dr->result_high = ((u64)w << 32) + x;
- dr->result_low = ((u64)y << 32) + z;
+ if (ppc_md.set_rtc_time(tm) < 0)
+ return -EOPNOTSUPP;
+ return 0;
}
-/* We don't need to calibrate delay, we use the CPU timebase for that */
-void calibrate_delay(void)
-{
- /* Some generic code (such as spinlock debug) use loops_per_jiffy
- * as the number of __delay(1) in a jiffy, so make it so
- */
- loops_per_jiffy = tb_ticks_per_jiffy;
-}
+static const struct rtc_class_ops rtc_generic_ops = {
+ .read_time = rtc_generic_get_time,
+ .set_time = rtc_generic_set_time,
+};
static int __init rtc_init(void)
{
@@ -1074,9 +1051,12 @@ static int __init rtc_init(void)
if (!ppc_md.get_rtc_time)
return -ENODEV;
- pdev = platform_device_register_simple("rtc-generic", -1, NULL, 0);
+ pdev = platform_device_register_data(NULL, "rtc-generic", -1,
+ &rtc_generic_ops,
+ sizeof(rtc_generic_ops));
return PTR_ERR_OR_ZERO(pdev);
}
-module_init(rtc_init);
+device_initcall(rtc_init);
+#endif
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index 2a324f4cb1b9..a9cd6507163a 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Transactional memory support routines to reclaim and recheckpoint
* transactional process state.
@@ -5,12 +6,14 @@
* Copyright 2012 Matt Evans & Michael Neuling, IBM Corporation.
*/
+#include <linux/export.h>
#include <asm/asm-offsets.h>
#include <asm/ppc_asm.h>
#include <asm/ppc-opcode.h>
#include <asm/ptrace.h>
#include <asm/reg.h>
#include <asm/bug.h>
+#include <asm/feature-fixups.h>
#ifdef CONFIG_VSX
/* See fpu.S, this is borrowed from there */
@@ -54,6 +57,16 @@ _GLOBAL(tm_enable)
or r4, r4, r3
mtmsrd r4
1: blr
+EXPORT_SYMBOL_GPL(tm_enable);
+
+_GLOBAL(tm_disable)
+ mfmsr r4
+ li r3, MSR_TM >> 32
+ sldi r3, r3, 32
+ andc r4, r4, r3
+ mtmsrd r4
+ blr
+EXPORT_SYMBOL_GPL(tm_disable);
_GLOBAL(tm_save_sprs)
mfspr r0, SPRN_TFHAR
@@ -77,17 +90,16 @@ _GLOBAL(tm_restore_sprs)
_GLOBAL(tm_abort)
TABORT(R3)
blr
+EXPORT_SYMBOL_GPL(tm_abort);
-/* void tm_reclaim(struct thread_struct *thread,
- * unsigned long orig_msr,
+/*
+ * void tm_reclaim(struct thread_struct *thread,
* uint8_t cause)
*
* - Performs a full reclaim. This destroys outstanding
- * transactions and updates thread->regs.tm_ckpt_* with the
- * original checkpointed state. Note that thread->regs is
- * unchanged.
- * - FP regs are written back to thread->transact_fpr before
- * reclaiming. These are the transactional (current) versions.
+ * transactions and updates thread.ckpt_regs, thread.ckfp_state and
+ * thread.ckvr_state with the original checkpointed state. Note that
+ * thread->regs is unchanged.
*
* Purpose is to both abort transactions of, and preserve the state of,
* a transactions at a context switch. We preserve/restore both sets of process
@@ -98,29 +110,30 @@ _GLOBAL(tm_abort)
* Call with IRQs off, stacks get all out of sync for some periods in here!
*/
_GLOBAL(tm_reclaim)
- mfcr r6
+ mfcr r5
mflr r0
- stw r6, 8(r1)
+ stw r5, 8(r1)
std r0, 16(r1)
std r2, STK_GOT(r1)
stdu r1, -TM_FRAME_SIZE(r1)
- /* We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD]. */
+ /* We've a struct pt_regs at [r1+STACK_INT_FRAME_REGS]. */
std r3, STK_PARAM(R3)(r1)
SAVE_NVGPRS(r1)
- /* We need to setup MSR for VSX register save instructions. Here we
- * also clear the MSR RI since when we do the treclaim, we won't have a
- * valid kernel pointer for a while. We clear RI here as it avoids
- * adding another mtmsr closer to the treclaim. This makes the region
- * maked as non-recoverable wider than it needs to be but it saves on
- * inserting another mtmsrd later.
+ /*
+ * Save kernel live AMR since it will be clobbered by treclaim
+ * but can be used elsewhere later in kernel space.
*/
+ mfspr r3, SPRN_AMR
+ std r3, TM_FRAME_L1(r1)
+
+ /* We need to setup MSR for VSX register save instructions. */
mfmsr r14
mr r15, r14
ori r15, r15, MSR_FP
- li r16, MSR_RI
+ li r16, 0
ori r16, r16, MSR_EE /* IRQs hard off */
andc r15, r15, r16
oris r15, r15, MSR_VEC@h
@@ -132,43 +145,6 @@ _GLOBAL(tm_reclaim)
mtmsrd r15
std r14, TM_FRAME_L0(r1)
- /* Stash the stack pointer away for use after reclaim */
- std r1, PACAR1(r13)
-
- /* ******************** FPR/VR/VSRs ************
- * Before reclaiming, capture the current/transactional FPR/VR
- * versions /if used/.
- *
- * (If VSX used, FP and VMX are implied. Or, we don't need to look
- * at MSR.VSX as copying FP regs if .FP, vector regs if .VMX covers it.)
- *
- * We're passed the thread's MSR as parameter 2.
- *
- * We enabled VEC/FP/VSX in the msr above, so we can execute these
- * instructions!
- */
- andis. r0, r4, MSR_VEC@h
- beq dont_backup_vec
-
- addi r7, r3, THREAD_TRANSACT_VRSTATE
- SAVE_32VRS(0, r6, r7) /* r6 scratch, r7 transact vr state */
- mfvscr vr0
- li r6, VRSTATE_VSCR
- stvx vr0, r7, r6
-dont_backup_vec:
- mfspr r0, SPRN_VRSAVE
- std r0, THREAD_TRANSACT_VRSAVE(r3)
-
- andi. r0, r4, MSR_FP
- beq dont_backup_fp
-
- addi r7, r3, THREAD_TRANSACT_FPSTATE
- SAVE_32FPRS_VSRS(0, R6, R7) /* r6 scratch, r7 transact fp state */
-
- mffs fr0
- stfd fr0,FPSTATE_FPSCR(r7)
-
-dont_backup_fp:
/* Do sanity check on MSR to make sure we are suspended */
li r7, (MSR_TS_S)@higher
srdi r6, r14, 32
@@ -176,27 +152,60 @@ dont_backup_fp:
1: tdeqi r6, 0
EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0
- /* The moment we treclaim, ALL of our GPRs will switch
+ /* Stash the stack pointer away for use after reclaim */
+ std r1, PACAR1(r13)
+
+ /* Clear MSR RI since we are about to use SCRATCH0, EE is already off */
+ li r5, 0
+ mtmsrd r5, 1
+
+ /*
+ * BE CAREFUL HERE:
+ * At this point we can't take an SLB miss since we have MSR_RI
+ * off. Load only to/from the stack/paca which are in SLB bolted regions
+ * until we turn MSR RI back on.
+ *
+ * The moment we treclaim, ALL of our GPRs will switch
* to user register state. (FPRs, CCR etc. also!)
* Use an sprg and a tm_scratch in the PACA to shuffle.
*/
- TRECLAIM(R5) /* Cause in r5 */
+ TRECLAIM(R4) /* Cause in r4 */
- /* ******************** GPRs ******************** */
- /* Stash the checkpointed r13 away in the scratch SPR and get the real
- * paca
+ /*
+ * ******************** GPRs ********************
+ * Stash the checkpointed r13 in the scratch SPR and get the real paca.
*/
SET_SCRATCH0(r13)
GET_PACA(r13)
- /* Stash the checkpointed r1 away in paca tm_scratch and get the real
- * stack pointer back
+ /*
+ * Stash the checkpointed r1 away in paca->tm_scratch and get the real
+ * stack pointer back into r1.
*/
std r1, PACATMSCRATCH(r13)
ld r1, PACAR1(r13)
- /* Store the PPR in r11 and reset to decent value */
std r11, GPR11(r1) /* Temporary stash */
+
+ /*
+ * Move the saved user r1 to the kernel stack in case PACATMSCRATCH is
+ * clobbered by an exception once we turn on MSR_RI below.
+ */
+ ld r11, PACATMSCRATCH(r13)
+ std r11, GPR1(r1)
+
+ /*
+ * Store r13 away so we can free up the scratch SPR for the SLB fault
+ * handler (needed once we start accessing the thread_struct).
+ */
+ GET_SCRATCH0(r11)
+ std r11, GPR13(r1)
+
+ /* Reset MSR RI so we can take SLB faults again */
+ li r11, MSR_RI
+ mtmsrd r11, 1
+
+ /* Store the PPR in r11 and reset to decent value */
mfspr r11, SPRN_PPR
HMT_MEDIUM
@@ -209,23 +218,21 @@ dont_backup_fp:
addi r7, r12, PT_CKPT_REGS /* Thread's ckpt_regs */
- /* Make r7 look like an exception frame so that we
- * can use the neat GPRx(n) macros. r7 is NOT a pt_regs ptr!
+ /*
+ * Make r7 look like an exception frame so that we can use the neat
+ * GPRx(n) macros. r7 is NOT a pt_regs ptr!
*/
- subi r7, r7, STACK_FRAME_OVERHEAD
+ subi r7, r7, STACK_INT_FRAME_REGS
/* Sync the userland GPRs 2-12, 14-31 to thread->regs: */
SAVE_GPR(0, r7) /* user r0 */
- SAVE_GPR(2, r7) /* user r2 */
- SAVE_4GPRS(3, r7) /* user r3-r6 */
- SAVE_GPR(8, r7) /* user r8 */
- SAVE_GPR(9, r7) /* user r9 */
- SAVE_GPR(10, r7) /* user r10 */
- ld r3, PACATMSCRATCH(r13) /* user r1 */
+ SAVE_GPRS(2, 6, r7) /* user r2-r6 */
+ SAVE_GPRS(8, 10, r7) /* user r8-r10 */
+ ld r3, GPR1(r1) /* user r1 */
ld r4, GPR7(r1) /* user r7 */
ld r5, GPR11(r1) /* user r11 */
ld r6, GPR12(r1) /* user r12 */
- GET_SCRATCH0(8) /* user r13 */
+ ld r8, GPR13(r1) /* user r13 */
std r3, GPR1(r7)
std r4, GPR7(r7)
std r5, GPR11(r7)
@@ -237,11 +244,12 @@ dont_backup_fp:
/* ******************** NIP ******************** */
mfspr r3, SPRN_TFHAR
std r3, _NIP(r7) /* Returns to failhandler */
- /* The checkpointed NIP is ignored when rescheduling/rechkpting,
+ /*
+ * The checkpointed NIP is ignored when rescheduling/rechkpting,
* but is used in signal return to 'wind back' to the abort handler.
*/
- /* ******************** CR,LR,CCR,MSR ********** */
+ /* ***************** CTR, LR, CR, XER ********** */
mfctr r3
mflr r4
mfcr r5
@@ -252,7 +260,6 @@ dont_backup_fp:
std r5, _CCR(r7)
std r6, _XER(r7)
-
/* ******************** TAR, DSCR ********** */
mfspr r3, SPRN_TAR
mfspr r4, SPRN_DSCR
@@ -260,11 +267,44 @@ dont_backup_fp:
std r3, THREAD_TM_TAR(r12)
std r4, THREAD_TM_DSCR(r12)
- /* MSR and flags: We don't change CRs, and we don't need to alter
- * MSR.
+ /* ******************** AMR **************** */
+ mfspr r3, SPRN_AMR
+ std r3, THREAD_TM_AMR(r12)
+
+ /*
+ * MSR and flags: We don't change CRs, and we don't need to alter MSR.
*/
- /* TM regs, incl TEXASR -- these live in thread_struct. Note they've
+
+ /*
+ * ******************** FPR/VR/VSRs ************
+ * After reclaiming, capture the checkpointed FPRs/VRs.
+ *
+ * We enabled VEC/FP/VSX in the msr above, so we can execute these
+ * instructions!
+ */
+ mr r3, r12
+
+ /* Altivec (VEC/VMX/VR)*/
+ addi r7, r3, THREAD_CKVRSTATE
+ SAVE_32VRS(0, r6, r7) /* r6 scratch, r7 ckvr_state */
+ mfvscr v0
+ li r6, VRSTATE_VSCR
+ stvx v0, r7, r6
+
+ /* VRSAVE */
+ mfspr r0, SPRN_VRSAVE
+ std r0, THREAD_CKVRSAVE(r3)
+
+ /* Floating Point (FP) */
+ addi r7, r3, THREAD_CKFPSTATE
+ SAVE_32FPRS_VSRS(0, R6, R7) /* r6 scratch, r7 ckfp_state */
+ mffs fr0
+ stfd fr0,FPSTATE_FPSCR(r7)
+
+
+ /*
+ * TM regs, incl TEXASR -- these live in thread_struct. Note they've
* been updated by the treclaim, to explain to userland the failure
* cause (aborted).
*/
@@ -275,10 +315,13 @@ dont_backup_fp:
std r3, THREAD_TM_TFHAR(r12)
std r4, THREAD_TM_TFIAR(r12)
- /* AMR is checkpointed too, but is unsupported by Linux. */
+ /* Restore kernel live AMR */
+ ld r8, TM_FRAME_L1(r1)
+ mtspr SPRN_AMR, r8
/* Restore original MSR/IRQ state & clear TM mode */
ld r14, TM_FRAME_L0(r1) /* Orig MSR */
+
li r15, 0
rldimi r14, r15, MSR_TS_LG, (63-MSR_TS_LG)-1
mtmsrd r14
@@ -293,14 +336,14 @@ dont_backup_fp:
ld r2, STK_GOT(r1)
/* Load CPU's default DSCR */
- ld r0, PACA_DSCR(r13)
+ ld r0, PACA_DSCR_DEFAULT(r13)
mtspr SPRN_DSCR, r0
blr
- /* void tm_recheckpoint(struct thread_struct *thread,
- * unsigned long orig_msr)
+ /*
+ * void __tm_recheckpoint(struct thread_struct *thread)
* - Restore the checkpointed register state saved by tm_reclaim
* when we switch_to a process.
*
@@ -315,72 +358,69 @@ _GLOBAL(__tm_recheckpoint)
std r2, STK_GOT(r1)
stdu r1, -TM_FRAME_SIZE(r1)
- /* We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD].
+ /*
+ * We've a struct pt_regs at [r1+STACK_INT_FRAME_REGS].
* This is used for backing up the NVGPRs:
*/
SAVE_NVGPRS(r1)
+ /*
+ * Save kernel live AMR since it will be clobbered for trechkpt
+ * but can be used elsewhere later in kernel space.
+ */
+ mfspr r8, SPRN_AMR
+ std r8, TM_FRAME_L0(r1)
+
/* Load complete register state from ts_ckpt* registers */
addi r7, r3, PT_CKPT_REGS /* Thread's ckpt_regs */
- /* Make r7 look like an exception frame so that we
- * can use the neat GPRx(n) macros. r7 is now NOT a pt_regs ptr!
+ /*
+ * Make r7 look like an exception frame so that we can use the neat
+ * GPRx(n) macros. r7 is now NOT a pt_regs ptr!
*/
- subi r7, r7, STACK_FRAME_OVERHEAD
-
- SET_SCRATCH0(r1)
+ subi r7, r7, STACK_INT_FRAME_REGS
+ /* We need to setup MSR for FP/VMX/VSX register save instructions. */
mfmsr r6
- /* R4 = original MSR to indicate whether thread used FP/Vector etc. */
-
- /* Enable FP/vec in MSR if necessary! */
- lis r5, MSR_VEC@h
+ mr r5, r6
ori r5, r5, MSR_FP
- and. r5, r4, r5
- beq restore_gprs /* if neither, skip both */
-
+#ifdef CONFIG_ALTIVEC
+ oris r5, r5, MSR_VEC@h
+#endif
#ifdef CONFIG_VSX
BEGIN_FTR_SECTION
- oris r5, r5, MSR_VSX@h
+ oris r5,r5, MSR_VSX@h
END_FTR_SECTION_IFSET(CPU_FTR_VSX)
#endif
- or r5, r6, r5 /* Set MSR.FP+.VSX/.VEC */
- mtmsr r5
+ mtmsrd r5
#ifdef CONFIG_ALTIVEC
- /* FP and VEC registers: These are recheckpointed from thread.fpr[]
- * and thread.vr[] respectively. The thread.transact_fpr[] version
- * is more modern, and will be loaded subsequently by any FPUnavailable
- * trap.
+ /*
+ * FP and VEC registers: These are recheckpointed from
+ * thread.ckfp_state and thread.ckvr_state respectively. The
+ * thread.fp_state[] version holds the 'live' (transactional)
+ * and will be loaded subsequently by any FPUnavailable trap.
*/
- andis. r0, r4, MSR_VEC@h
- beq dont_restore_vec
-
- addi r8, r3, THREAD_VRSTATE
+ addi r8, r3, THREAD_CKVRSTATE
li r5, VRSTATE_VSCR
- lvx vr0, r8, r5
- mtvscr vr0
+ lvx v0, r8, r5
+ mtvscr v0
REST_32VRS(0, r5, r8) /* r5 scratch, r8 ptr */
-dont_restore_vec:
- ld r5, THREAD_VRSAVE(r3)
+ ld r5, THREAD_CKVRSAVE(r3)
mtspr SPRN_VRSAVE, r5
#endif
- andi. r0, r4, MSR_FP
- beq dont_restore_fp
-
- addi r8, r3, THREAD_FPSTATE
+ addi r8, r3, THREAD_CKFPSTATE
lfd fr0, FPSTATE_FPSCR(r8)
MTFSF_L(fr0)
REST_32FPRS_VSRS(0, R4, R8)
-dont_restore_fp:
mtmsr r6 /* FP/Vec off again! */
restore_gprs:
- /* ******************** CR,LR,CCR,MSR ********** */
+ /* ****************** CTR, LR, XER ************* */
ld r4, _CTR(r7)
ld r5, _LINK(r7)
ld r8, _XER(r7)
@@ -393,31 +433,28 @@ restore_gprs:
ld r4, THREAD_TM_TAR(r3)
mtspr SPRN_TAR, r4
+ /* ******************** AMR ******************** */
+ ld r4, THREAD_TM_AMR(r3)
+ mtspr SPRN_AMR, r4
+
/* Load up the PPR and DSCR in GPRs only at this stage */
ld r5, THREAD_TM_DSCR(r3)
ld r6, THREAD_TM_PPR(r3)
- /* Clear the MSR RI since we are about to change R1. EE is already off
- */
- li r4, 0
- mtmsrd r4, 1
-
REST_GPR(0, r7) /* GPR0 */
- REST_2GPRS(2, r7) /* GPR2-3 */
- REST_GPR(4, r7) /* GPR4 */
- REST_4GPRS(8, r7) /* GPR8-11 */
- REST_2GPRS(12, r7) /* GPR12-13 */
+ REST_GPRS(2, 4, r7) /* GPR2-4 */
+ REST_GPRS(8, 12, r7) /* GPR8-12 */
+ REST_GPRS(14, 31, r7) /* GPR14-31 */
- REST_NVGPRS(r7) /* GPR14-31 */
-
- /* Load up PPR and DSCR here so we don't run with user values for long
- */
+ /* Load up PPR and DSCR here so we don't run with user values for long */
mtspr SPRN_DSCR, r5
mtspr SPRN_PPR, r6
- /* Do final sanity check on TEXASR to make sure FS is set. Do this
+ /*
+ * Do final sanity check on TEXASR to make sure FS is set. Do this
* here before we load up the userspace r1 so any bugs we hit will get
- * a call chain */
+ * a call chain.
+ */
mfspr r5, SPRN_TEXASR
srdi r5, r5, 16
li r6, (TEXASR_FS)@h
@@ -425,8 +462,9 @@ restore_gprs:
1: tdeqi r6, 0
EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0
- /* Do final sanity check on MSR to make sure we are not transactional
- * or suspended
+ /*
+ * Do final sanity check on MSR to make sure we are not transactional
+ * or suspended.
*/
mfmsr r6
li r5, (MSR_TS_MASK)@higher
@@ -439,17 +477,48 @@ restore_gprs:
ld r6, _CCR(r7)
mtcr r6
- REST_GPR(1, r7) /* GPR1 */
- REST_GPR(5, r7) /* GPR5-7 */
REST_GPR(6, r7)
- ld r7, GPR7(r7)
+
+ /*
+ * Store user r1 and r5 and r13 on the stack (in the unused save
+ * areas / compiler reserved areas), so that we can access them after
+ * we clear MSR RI.
+ */
+
+ REST_GPR(5, r7)
+ std r5, -8(r1)
+ ld r5, GPR13(r7)
+ std r5, -16(r1)
+ ld r5, GPR1(r7)
+ std r5, -24(r1)
+
+ REST_GPR(7, r7)
+
+ /* Stash the stack pointer away for use after recheckpoint */
+ std r1, PACAR1(r13)
+
+ /* Clear MSR RI since we are about to clobber r13. EE is already off */
+ li r5, 0
+ mtmsrd r5, 1
+
+ /*
+ * BE CAREFUL HERE:
+ * At this point we can't take an SLB miss since we have MSR_RI
+ * off. Load only to/from the stack/paca which are in SLB bolted regions
+ * until we turn MSR RI back on.
+ */
+
+ ld r5, -8(r1)
+ ld r13, -16(r1)
+ ld r1, -24(r1)
/* Commit register state as checkpointed state: */
TRECHKPT
HMT_MEDIUM
- /* Our transactional state has now changed.
+ /*
+ * Our transactional state has now changed.
*
* Now just get out of here. Transactional (current) state will be
* updated once restore is called on the return path in the _switch-ed
@@ -457,12 +526,16 @@ restore_gprs:
*/
GET_PACA(r13)
- GET_SCRATCH0(r1)
+ ld r1, PACAR1(r13)
- /* R1 is restored, so we are recoverable again. EE is still off */
+ /* R13, R1 is restored, so we are recoverable again. EE is still off */
li r4, MSR_RI
mtmsrd r4, 1
+ /* Restore kernel live AMR */
+ ld r8, TM_FRAME_L0(r1)
+ mtspr SPRN_AMR, r8
+
REST_NVGPRS(r1)
addi r1, r1, TM_FRAME_SIZE
@@ -473,7 +546,7 @@ restore_gprs:
ld r2, STK_GOT(r1)
/* Load CPU's default DSCR */
- ld r0, PACA_DSCR(r13)
+ ld r0, PACA_DSCR_DEFAULT(r13)
mtspr SPRN_DSCR, r0
blr
diff --git a/arch/powerpc/kernel/trace/Makefile b/arch/powerpc/kernel/trace/Makefile
new file mode 100644
index 000000000000..d6c3885453bd
--- /dev/null
+++ b/arch/powerpc/kernel/trace/Makefile
@@ -0,0 +1,34 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the powerpc trace subsystem
+#
+
+ifdef CONFIG_FUNCTION_TRACER
+# do not trace tracer code
+CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_ftrace_64_pg.o = $(CC_FLAGS_FTRACE)
+endif
+
+ifdef CONFIG_FUNCTION_TRACER
+obj32-y += ftrace.o ftrace_entry.o
+ifeq ($(CONFIG_MPROFILE_KERNEL)$(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY),)
+obj64-y += ftrace_64_pg.o ftrace_64_pg_entry.o
+else
+obj64-y += ftrace.o ftrace_entry.o
+endif
+endif
+
+obj-$(CONFIG_TRACING) += trace_clock.o
+
+obj-$(CONFIG_PPC64) += $(obj64-y)
+obj-$(CONFIG_PPC32) += $(obj32-y)
+
+# Disable GCOV, KCOV & sanitizers in odd or sensitive code
+GCOV_PROFILE_ftrace.o := n
+KCOV_INSTRUMENT_ftrace.o := n
+KCSAN_SANITIZE_ftrace.o := n
+UBSAN_SANITIZE_ftrace.o := n
+GCOV_PROFILE_ftrace_64_pg.o := n
+KCOV_INSTRUMENT_ftrace_64_pg.o := n
+KCSAN_SANITIZE_ftrace_64_pg.o := n
+UBSAN_SANITIZE_ftrace_64_pg.o := n
diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
new file mode 100644
index 000000000000..841d077e2825
--- /dev/null
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -0,0 +1,678 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Code for replacing ftrace calls with jumps.
+ *
+ * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
+ *
+ * Thanks goes out to P.A. Semi, Inc for supplying me with a PPC64 box.
+ *
+ * Added function graph tracer code, taken from x86 that was written
+ * by Frederic Weisbecker, and ported to PPC by Steven Rostedt.
+ *
+ */
+
+#define pr_fmt(fmt) "ftrace-powerpc: " fmt
+
+#include <linux/spinlock.h>
+#include <linux/hardirq.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/ftrace.h>
+#include <linux/percpu.h>
+#include <linux/init.h>
+#include <linux/list.h>
+
+#include <asm/cacheflush.h>
+#include <asm/text-patching.h>
+#include <asm/ftrace.h>
+#include <asm/syscall.h>
+#include <asm/inst.h>
+#include <asm/sections.h>
+
+#define NUM_FTRACE_TRAMPS 2
+static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS];
+
+unsigned long ftrace_call_adjust(unsigned long addr)
+{
+ if (addr >= (unsigned long)__exittext_begin && addr < (unsigned long)__exittext_end)
+ return 0;
+
+ if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY) &&
+ !IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) {
+ addr += MCOUNT_INSN_SIZE;
+ if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS))
+ addr += MCOUNT_INSN_SIZE;
+ }
+
+ return addr;
+}
+
+static ppc_inst_t ftrace_create_branch_inst(unsigned long ip, unsigned long addr, int link)
+{
+ ppc_inst_t op;
+
+ WARN_ON(!is_offset_in_branch_range(addr - ip));
+ create_branch(&op, (u32 *)ip, addr, link ? BRANCH_SET_LINK : 0);
+
+ return op;
+}
+
+static inline int ftrace_read_inst(unsigned long ip, ppc_inst_t *op)
+{
+ if (copy_inst_from_kernel_nofault(op, (void *)ip)) {
+ pr_err("0x%lx: fetching instruction failed\n", ip);
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+static inline int ftrace_validate_inst(unsigned long ip, ppc_inst_t inst)
+{
+ ppc_inst_t op;
+ int ret;
+
+ ret = ftrace_read_inst(ip, &op);
+ if (!ret && !ppc_inst_equal(op, inst)) {
+ pr_err("0x%lx: expected (%08lx) != found (%08lx)\n",
+ ip, ppc_inst_as_ulong(inst), ppc_inst_as_ulong(op));
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+static inline int ftrace_modify_code(unsigned long ip, ppc_inst_t old, ppc_inst_t new)
+{
+ int ret = ftrace_validate_inst(ip, old);
+
+ if (!ret && !ppc_inst_equal(old, new))
+ ret = patch_instruction((u32 *)ip, new);
+
+ return ret;
+}
+
+static int is_bl_op(ppc_inst_t op)
+{
+ return (ppc_inst_val(op) & ~PPC_LI_MASK) == PPC_RAW_BL(0);
+}
+
+static unsigned long find_ftrace_tramp(unsigned long ip)
+{
+ int i;
+
+ for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
+ if (!ftrace_tramps[i])
+ continue;
+ else if (is_offset_in_branch_range(ftrace_tramps[i] - ip))
+ return ftrace_tramps[i];
+
+ return 0;
+}
+
+#ifdef CONFIG_MODULES
+static unsigned long ftrace_lookup_module_stub(unsigned long ip, unsigned long addr)
+{
+ struct module *mod = NULL;
+
+ scoped_guard(rcu)
+ mod = __module_text_address(ip);
+ if (!mod)
+ pr_err("No module loaded at addr=%lx\n", ip);
+
+ return (addr == (unsigned long)ftrace_caller ? mod->arch.tramp : mod->arch.tramp_regs);
+}
+#else
+static unsigned long ftrace_lookup_module_stub(unsigned long ip, unsigned long addr)
+{
+ return 0;
+}
+#endif
+
+static unsigned long ftrace_get_ool_stub(struct dyn_ftrace *rec)
+{
+#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE
+ return rec->arch.ool_stub;
+#else
+ BUILD_BUG();
+#endif
+}
+
+static int ftrace_get_call_inst(struct dyn_ftrace *rec, unsigned long addr, ppc_inst_t *call_inst)
+{
+ unsigned long ip;
+ unsigned long stub;
+
+ if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE))
+ ip = ftrace_get_ool_stub(rec) + MCOUNT_INSN_SIZE; /* second instruction in stub */
+ else
+ ip = rec->ip;
+
+ if (!is_offset_in_branch_range(addr - ip) && addr != FTRACE_ADDR &&
+ addr != FTRACE_REGS_ADDR) {
+ /* This can only happen with ftrace direct */
+ if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS)) {
+ pr_err("0x%lx (0x%lx): Unexpected target address 0x%lx\n",
+ ip, rec->ip, addr);
+ return -EINVAL;
+ }
+ addr = FTRACE_ADDR;
+ }
+
+ if (is_offset_in_branch_range(addr - ip))
+ /* Within range */
+ stub = addr;
+ else if (core_kernel_text(ip))
+ /* We would be branching to one of our ftrace stubs */
+ stub = find_ftrace_tramp(ip);
+ else
+ stub = ftrace_lookup_module_stub(ip, addr);
+
+ if (!stub) {
+ pr_err("0x%lx (0x%lx): No ftrace stubs reachable\n", ip, rec->ip);
+ return -EINVAL;
+ }
+
+ *call_inst = ftrace_create_branch_inst(ip, stub, 1);
+ return 0;
+}
+
+static int ftrace_init_ool_stub(struct module *mod, struct dyn_ftrace *rec)
+{
+#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE
+ static int ool_stub_text_index, ool_stub_text_end_index, ool_stub_inittext_index;
+ int ret = 0, ool_stub_count, *ool_stub_index;
+ ppc_inst_t inst;
+ /*
+ * See ftrace_entry.S if changing the below instruction sequence, as we rely on
+ * decoding the last branch instruction here to recover the correct function ip.
+ */
+ struct ftrace_ool_stub *ool_stub, ool_stub_template = {
+ .insn = {
+ PPC_RAW_MFLR(_R0),
+ PPC_RAW_NOP(), /* bl ftrace_caller */
+ PPC_RAW_MTLR(_R0),
+ PPC_RAW_NOP() /* b rec->ip + 4 */
+ }
+ };
+
+ WARN_ON(rec->arch.ool_stub);
+
+ if (is_kernel_inittext(rec->ip)) {
+ ool_stub = ftrace_ool_stub_inittext;
+ ool_stub_index = &ool_stub_inittext_index;
+ ool_stub_count = ftrace_ool_stub_inittext_count;
+ } else if (is_kernel_text(rec->ip)) {
+ /*
+ * ftrace records are sorted, so we first use up the stub area within .text
+ * (ftrace_ool_stub_text) before using the area at the end of .text
+ * (ftrace_ool_stub_text_end), unless the stub is out of range of the record.
+ */
+ if (ool_stub_text_index >= ftrace_ool_stub_text_count ||
+ !is_offset_in_branch_range((long)rec->ip -
+ (long)&ftrace_ool_stub_text[ool_stub_text_index])) {
+ ool_stub = ftrace_ool_stub_text_end;
+ ool_stub_index = &ool_stub_text_end_index;
+ ool_stub_count = ftrace_ool_stub_text_end_count;
+ } else {
+ ool_stub = ftrace_ool_stub_text;
+ ool_stub_index = &ool_stub_text_index;
+ ool_stub_count = ftrace_ool_stub_text_count;
+ }
+#ifdef CONFIG_MODULES
+ } else if (mod) {
+ ool_stub = mod->arch.ool_stubs;
+ ool_stub_index = &mod->arch.ool_stub_index;
+ ool_stub_count = mod->arch.ool_stub_count;
+#endif
+ } else {
+ return -EINVAL;
+ }
+
+ ool_stub += (*ool_stub_index)++;
+
+ if (WARN_ON(*ool_stub_index > ool_stub_count))
+ return -EINVAL;
+
+ if (!is_offset_in_branch_range((long)rec->ip - (long)&ool_stub->insn[0]) ||
+ !is_offset_in_branch_range((long)(rec->ip + MCOUNT_INSN_SIZE) -
+ (long)&ool_stub->insn[3])) {
+ pr_err("%s: ftrace ool stub out of range (%p -> %p).\n",
+ __func__, (void *)rec->ip, (void *)&ool_stub->insn[0]);
+ return -EINVAL;
+ }
+
+ rec->arch.ool_stub = (unsigned long)&ool_stub->insn[0];
+
+ /* bl ftrace_caller */
+ if (!mod)
+ ret = ftrace_get_call_inst(rec, (unsigned long)ftrace_caller, &inst);
+#ifdef CONFIG_MODULES
+ else
+ /*
+ * We can't use ftrace_get_call_inst() since that uses
+ * __module_text_address(rec->ip) to look up the module.
+ * But, since the module is not fully formed at this stage,
+ * the lookup fails. We know the target though, so generate
+ * the branch inst directly.
+ */
+ inst = ftrace_create_branch_inst(ftrace_get_ool_stub(rec) + MCOUNT_INSN_SIZE,
+ mod->arch.tramp, 1);
+#endif
+ ool_stub_template.insn[1] = ppc_inst_val(inst);
+
+ /* b rec->ip + 4 */
+ if (!ret && create_branch(&inst, &ool_stub->insn[3], rec->ip + MCOUNT_INSN_SIZE, 0))
+ return -EINVAL;
+ ool_stub_template.insn[3] = ppc_inst_val(inst);
+
+ if (!ret)
+ ret = patch_instructions((u32 *)ool_stub, (u32 *)&ool_stub_template,
+ sizeof(ool_stub_template), false);
+
+ return ret;
+#else /* !CONFIG_PPC_FTRACE_OUT_OF_LINE */
+ BUILD_BUG();
+#endif
+}
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS
+static const struct ftrace_ops *powerpc_rec_get_ops(struct dyn_ftrace *rec)
+{
+ const struct ftrace_ops *ops = NULL;
+
+ if (rec->flags & FTRACE_FL_CALL_OPS_EN) {
+ ops = ftrace_find_unique_ops(rec);
+ WARN_ON_ONCE(!ops);
+ }
+
+ if (!ops)
+ ops = &ftrace_list_ops;
+
+ return ops;
+}
+
+static int ftrace_rec_set_ops(struct dyn_ftrace *rec, const struct ftrace_ops *ops)
+{
+ if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE))
+ return patch_ulong((void *)(ftrace_get_ool_stub(rec) - sizeof(unsigned long)),
+ (unsigned long)ops);
+ else
+ return patch_ulong((void *)(rec->ip - MCOUNT_INSN_SIZE - sizeof(unsigned long)),
+ (unsigned long)ops);
+}
+
+static int ftrace_rec_set_nop_ops(struct dyn_ftrace *rec)
+{
+ return ftrace_rec_set_ops(rec, &ftrace_nop_ops);
+}
+
+static int ftrace_rec_update_ops(struct dyn_ftrace *rec)
+{
+ return ftrace_rec_set_ops(rec, powerpc_rec_get_ops(rec));
+}
+#else
+static int ftrace_rec_set_nop_ops(struct dyn_ftrace *rec) { return 0; }
+static int ftrace_rec_update_ops(struct dyn_ftrace *rec) { return 0; }
+#endif
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr)
+{
+ /* This should never be called since we override ftrace_replace_code() */
+ WARN_ON(1);
+ return -EINVAL;
+}
+#endif
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ ppc_inst_t old, new;
+ unsigned long ip = rec->ip;
+ int ret = 0;
+
+ /* This can only ever be called during module load */
+ if (WARN_ON(!IS_ENABLED(CONFIG_MODULES) || core_kernel_text(ip)))
+ return -EINVAL;
+
+ old = ppc_inst(PPC_RAW_NOP());
+ if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) {
+ ip = ftrace_get_ool_stub(rec) + MCOUNT_INSN_SIZE; /* second instruction in stub */
+ ret = ftrace_get_call_inst(rec, (unsigned long)ftrace_caller, &old);
+ }
+
+ ret |= ftrace_get_call_inst(rec, addr, &new);
+
+ if (!ret)
+ ret = ftrace_modify_code(ip, old, new);
+
+ ret = ftrace_rec_update_ops(rec);
+ if (ret)
+ return ret;
+
+ if (!ret && IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE))
+ ret = ftrace_modify_code(rec->ip, ppc_inst(PPC_RAW_NOP()),
+ ppc_inst(PPC_RAW_BRANCH((long)ftrace_get_ool_stub(rec) - (long)rec->ip)));
+
+ return ret;
+}
+
+int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr)
+{
+ /*
+ * This should never be called since we override ftrace_replace_code(),
+ * as well as ftrace_init_nop()
+ */
+ WARN_ON(1);
+ return -EINVAL;
+}
+
+void ftrace_replace_code(int enable)
+{
+ ppc_inst_t old, new, call_inst, new_call_inst;
+ ppc_inst_t nop_inst = ppc_inst(PPC_RAW_NOP());
+ unsigned long ip, new_addr, addr;
+ struct ftrace_rec_iter *iter;
+ struct dyn_ftrace *rec;
+ int ret = 0, update;
+
+ for_ftrace_rec_iter(iter) {
+ rec = ftrace_rec_iter_record(iter);
+ ip = rec->ip;
+
+ if (rec->flags & FTRACE_FL_DISABLED && !(rec->flags & FTRACE_FL_ENABLED))
+ continue;
+
+ addr = ftrace_get_addr_curr(rec);
+ new_addr = ftrace_get_addr_new(rec);
+ update = ftrace_update_record(rec, enable);
+
+ if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE) && update != FTRACE_UPDATE_IGNORE) {
+ ip = ftrace_get_ool_stub(rec) + MCOUNT_INSN_SIZE;
+ ret = ftrace_get_call_inst(rec, (unsigned long)ftrace_caller, &nop_inst);
+ if (ret)
+ goto out;
+ }
+
+ switch (update) {
+ case FTRACE_UPDATE_IGNORE:
+ default:
+ continue;
+ case FTRACE_UPDATE_MODIFY_CALL:
+ ret = ftrace_get_call_inst(rec, new_addr, &new_call_inst);
+ ret |= ftrace_get_call_inst(rec, addr, &call_inst);
+ ret |= ftrace_rec_update_ops(rec);
+ old = call_inst;
+ new = new_call_inst;
+ break;
+ case FTRACE_UPDATE_MAKE_NOP:
+ ret = ftrace_get_call_inst(rec, addr, &call_inst);
+ ret |= ftrace_rec_set_nop_ops(rec);
+ old = call_inst;
+ new = nop_inst;
+ break;
+ case FTRACE_UPDATE_MAKE_CALL:
+ ret = ftrace_get_call_inst(rec, new_addr, &call_inst);
+ ret |= ftrace_rec_update_ops(rec);
+ old = nop_inst;
+ new = call_inst;
+ break;
+ }
+
+ if (!ret)
+ ret = ftrace_modify_code(ip, old, new);
+
+ if (!ret && IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE) &&
+ (update == FTRACE_UPDATE_MAKE_NOP || update == FTRACE_UPDATE_MAKE_CALL)) {
+ /* Update the actual ftrace location */
+ call_inst = ppc_inst(PPC_RAW_BRANCH((long)ftrace_get_ool_stub(rec) -
+ (long)rec->ip));
+ nop_inst = ppc_inst(PPC_RAW_NOP());
+ ip = rec->ip;
+
+ if (update == FTRACE_UPDATE_MAKE_NOP)
+ ret = ftrace_modify_code(ip, call_inst, nop_inst);
+ else
+ ret = ftrace_modify_code(ip, nop_inst, call_inst);
+
+ if (ret)
+ goto out;
+ }
+
+ if (ret)
+ goto out;
+ }
+
+out:
+ if (ret)
+ ftrace_bug(ret, rec);
+ return;
+}
+
+int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
+{
+ unsigned long addr, ip = rec->ip;
+ ppc_inst_t old, new;
+ int ret = 0;
+
+ /* Verify instructions surrounding the ftrace location */
+ if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY)) {
+ /* Expect nops */
+ if (!IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE))
+ ret = ftrace_validate_inst(ip - 4, ppc_inst(PPC_RAW_NOP()));
+ if (!ret)
+ ret = ftrace_validate_inst(ip, ppc_inst(PPC_RAW_NOP()));
+ } else if (IS_ENABLED(CONFIG_PPC32)) {
+ /* Expected sequence: 'mflr r0', 'stw r0,4(r1)', 'bl _mcount' */
+ ret = ftrace_validate_inst(ip - 8, ppc_inst(PPC_RAW_MFLR(_R0)));
+ if (ret)
+ return ret;
+ ret = ftrace_modify_code(ip - 4, ppc_inst(PPC_RAW_STW(_R0, _R1, 4)),
+ ppc_inst(PPC_RAW_NOP()));
+ } else if (IS_ENABLED(CONFIG_MPROFILE_KERNEL)) {
+ /* Expected sequence: 'mflr r0', ['std r0,16(r1)'], 'bl _mcount' */
+ ret = ftrace_read_inst(ip - 4, &old);
+ if (!ret && !ppc_inst_equal(old, ppc_inst(PPC_RAW_MFLR(_R0)))) {
+ /* Gcc v5.x emit the additional 'std' instruction, gcc v6.x don't */
+ ret = ftrace_validate_inst(ip - 8, ppc_inst(PPC_RAW_MFLR(_R0)));
+ if (ret)
+ return ret;
+ ret = ftrace_modify_code(ip - 4, ppc_inst(PPC_RAW_STD(_R0, _R1, 16)),
+ ppc_inst(PPC_RAW_NOP()));
+ }
+ } else {
+ return -EINVAL;
+ }
+
+ if (ret)
+ return ret;
+
+ /* Set up out-of-line stub */
+ if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) {
+ ret = ftrace_init_ool_stub(mod, rec);
+ goto out;
+ }
+
+ /* Nop-out the ftrace location */
+ new = ppc_inst(PPC_RAW_NOP());
+ addr = MCOUNT_ADDR;
+ if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY)) {
+ /* we instead patch-in the 'mflr r0' */
+ old = ppc_inst(PPC_RAW_NOP());
+ new = ppc_inst(PPC_RAW_MFLR(_R0));
+ ret = ftrace_modify_code(ip - 4, old, new);
+ } else if (is_offset_in_branch_range(addr - ip)) {
+ /* Within range */
+ old = ftrace_create_branch_inst(ip, addr, 1);
+ ret = ftrace_modify_code(ip, old, new);
+ } else if (core_kernel_text(ip) || (IS_ENABLED(CONFIG_MODULES) && mod)) {
+ /*
+ * We would be branching to a linker-generated stub, or to the module _mcount
+ * stub. Let's just confirm we have a 'bl' here.
+ */
+ ret = ftrace_read_inst(ip, &old);
+ if (ret)
+ return ret;
+ if (!is_bl_op(old)) {
+ pr_err("0x%lx: expected (bl) != found (%08lx)\n", ip, ppc_inst_as_ulong(old));
+ return -EINVAL;
+ }
+ ret = patch_instruction((u32 *)ip, new);
+ } else {
+ return -EINVAL;
+ }
+
+out:
+ if (!ret)
+ ret = ftrace_rec_set_nop_ops(rec);
+
+ return ret;
+}
+
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+ unsigned long ip = (unsigned long)(&ftrace_call);
+ ppc_inst_t old, new;
+ int ret;
+
+ /*
+ * When using CALL_OPS, the function to call is associated with the
+ * call site, and we don't have a global function pointer to update.
+ */
+ if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS))
+ return 0;
+
+ old = ppc_inst_read((u32 *)&ftrace_call);
+ new = ftrace_create_branch_inst(ip, ppc_function_entry(func), 1);
+ ret = ftrace_modify_code(ip, old, new);
+
+ /* Also update the regs callback function */
+ if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && !ret) {
+ ip = (unsigned long)(&ftrace_regs_call);
+ old = ppc_inst_read((u32 *)&ftrace_regs_call);
+ new = ftrace_create_branch_inst(ip, ppc_function_entry(func), 1);
+ ret = ftrace_modify_code(ip, old, new);
+ }
+
+ return ret;
+}
+
+/*
+ * Use the default ftrace_modify_all_code, but without
+ * stop_machine().
+ */
+void arch_ftrace_update_code(int command)
+{
+ ftrace_modify_all_code(command);
+}
+
+void ftrace_free_init_tramp(void)
+{
+ int i;
+
+ for (i = 0; i < NUM_FTRACE_TRAMPS && ftrace_tramps[i]; i++)
+ if (ftrace_tramps[i] == (unsigned long)ftrace_tramp_init) {
+ ftrace_tramps[i] = 0;
+ return;
+ }
+}
+
+static void __init add_ftrace_tramp(unsigned long tramp)
+{
+ int i;
+
+ for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
+ if (!ftrace_tramps[i]) {
+ ftrace_tramps[i] = tramp;
+ return;
+ }
+}
+
+int __init ftrace_dyn_arch_init(void)
+{
+ unsigned int *tramp[] = { ftrace_tramp_text, ftrace_tramp_init };
+ unsigned long addr = FTRACE_REGS_ADDR;
+ long reladdr;
+ int i;
+ u32 stub_insns[] = {
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ /* pla r12,addr */
+ PPC_PREFIX_MLS | __PPC_PRFX_R(1),
+ PPC_INST_PADDI | ___PPC_RT(_R12),
+ PPC_RAW_MTCTR(_R12),
+ PPC_RAW_BCTR()
+#elif defined(CONFIG_PPC64)
+ PPC_RAW_LD(_R12, _R13, offsetof(struct paca_struct, kernel_toc)),
+ PPC_RAW_ADDIS(_R12, _R12, 0),
+ PPC_RAW_ADDI(_R12, _R12, 0),
+ PPC_RAW_MTCTR(_R12),
+ PPC_RAW_BCTR()
+#else
+ PPC_RAW_LIS(_R12, 0),
+ PPC_RAW_ADDI(_R12, _R12, 0),
+ PPC_RAW_MTCTR(_R12),
+ PPC_RAW_BCTR()
+#endif
+ };
+
+ if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) {
+ for (i = 0; i < 2; i++) {
+ reladdr = addr - (unsigned long)tramp[i];
+
+ if (reladdr >= (long)SZ_8G || reladdr < -(long)SZ_8G) {
+ pr_err("Address of %ps out of range of pcrel address.\n",
+ (void *)addr);
+ return -1;
+ }
+
+ memcpy(tramp[i], stub_insns, sizeof(stub_insns));
+ tramp[i][0] |= IMM_H18(reladdr);
+ tramp[i][1] |= IMM_L(reladdr);
+ add_ftrace_tramp((unsigned long)tramp[i]);
+ }
+ } else if (IS_ENABLED(CONFIG_PPC64)) {
+ reladdr = addr - kernel_toc_addr();
+
+ if (reladdr >= (long)SZ_2G || reladdr < -(long long)SZ_2G) {
+ pr_err("Address of %ps out of range of kernel_toc.\n",
+ (void *)addr);
+ return -1;
+ }
+
+ for (i = 0; i < 2; i++) {
+ memcpy(tramp[i], stub_insns, sizeof(stub_insns));
+ tramp[i][1] |= PPC_HA(reladdr);
+ tramp[i][2] |= PPC_LO(reladdr);
+ add_ftrace_tramp((unsigned long)tramp[i]);
+ }
+ } else {
+ for (i = 0; i < 2; i++) {
+ memcpy(tramp[i], stub_insns, sizeof(stub_insns));
+ tramp[i][0] |= PPC_HA(addr);
+ tramp[i][1] |= PPC_LO(addr);
+ add_ftrace_tramp((unsigned long)tramp[i]);
+ }
+ }
+
+ return 0;
+}
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *op, struct ftrace_regs *fregs)
+{
+ unsigned long sp = arch_ftrace_regs(fregs)->regs.gpr[1];
+
+ if (unlikely(ftrace_graph_is_dead()))
+ goto out;
+
+ if (unlikely(atomic_read(&current->tracing_graph_pause)))
+ goto out;
+
+ if (!function_graph_enter_regs(parent_ip, ip, 0, (unsigned long *)sp, fregs))
+ parent_ip = ppc_function_entry(return_to_handler);
+
+out:
+ arch_ftrace_regs(fregs)->regs.link = parent_ip;
+}
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.c b/arch/powerpc/kernel/trace/ftrace_64_pg.c
new file mode 100644
index 000000000000..5c6e545d1708
--- /dev/null
+++ b/arch/powerpc/kernel/trace/ftrace_64_pg.c
@@ -0,0 +1,832 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Code for replacing ftrace calls with jumps.
+ *
+ * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
+ *
+ * Thanks goes out to P.A. Semi, Inc for supplying me with a PPC64 box.
+ *
+ * Added function graph tracer code, taken from x86 that was written
+ * by Frederic Weisbecker, and ported to PPC by Steven Rostedt.
+ *
+ */
+
+#define pr_fmt(fmt) "ftrace-powerpc: " fmt
+
+#include <linux/spinlock.h>
+#include <linux/hardirq.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/ftrace.h>
+#include <linux/percpu.h>
+#include <linux/init.h>
+#include <linux/list.h>
+
+#include <asm/cacheflush.h>
+#include <asm/text-patching.h>
+#include <asm/ftrace.h>
+#include <asm/syscall.h>
+#include <asm/inst.h>
+
+/*
+ * We generally only have a single long_branch tramp and at most 2 or 3 plt
+ * tramps generated. But, we don't use the plt tramps currently. We also allot
+ * 2 tramps after .text and .init.text. So, we only end up with around 3 usable
+ * tramps in total. Set aside 8 just to be sure.
+ */
+#define NUM_FTRACE_TRAMPS 8
+static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS];
+
+unsigned long ftrace_call_adjust(unsigned long addr)
+{
+ return addr;
+}
+
+static ppc_inst_t
+ftrace_call_replace(unsigned long ip, unsigned long addr, int link)
+{
+ ppc_inst_t op;
+
+ addr = ppc_function_entry((void *)addr);
+
+ /* if (link) set op to 'bl' else 'b' */
+ create_branch(&op, (u32 *)ip, addr, link ? BRANCH_SET_LINK : 0);
+
+ return op;
+}
+
+static inline int
+ftrace_modify_code(unsigned long ip, ppc_inst_t old, ppc_inst_t new)
+{
+ ppc_inst_t replaced;
+
+ /*
+ * Note:
+ * We are paranoid about modifying text, as if a bug was to happen, it
+ * could cause us to read or write to someplace that could cause harm.
+ * Carefully read and modify the code with probe_kernel_*(), and make
+ * sure what we read is what we expected it to be before modifying it.
+ */
+
+ /* read the text we want to modify */
+ if (copy_inst_from_kernel_nofault(&replaced, (void *)ip))
+ return -EFAULT;
+
+ /* Make sure it is what we expect it to be */
+ if (!ppc_inst_equal(replaced, old)) {
+ pr_err("%p: replaced (%08lx) != old (%08lx)", (void *)ip,
+ ppc_inst_as_ulong(replaced), ppc_inst_as_ulong(old));
+ return -EINVAL;
+ }
+
+ /* replace the text with the new text */
+ return patch_instruction((u32 *)ip, new);
+}
+
+/*
+ * Helper functions that are the same for both PPC64 and PPC32.
+ */
+static int test_24bit_addr(unsigned long ip, unsigned long addr)
+{
+ addr = ppc_function_entry((void *)addr);
+
+ return is_offset_in_branch_range(addr - ip);
+}
+
+static int is_bl_op(ppc_inst_t op)
+{
+ return (ppc_inst_val(op) & ~PPC_LI_MASK) == PPC_RAW_BL(0);
+}
+
+static int is_b_op(ppc_inst_t op)
+{
+ return (ppc_inst_val(op) & ~PPC_LI_MASK) == PPC_RAW_BRANCH(0);
+}
+
+static unsigned long find_bl_target(unsigned long ip, ppc_inst_t op)
+{
+ int offset;
+
+ offset = PPC_LI(ppc_inst_val(op));
+ /* make it signed */
+ if (offset & 0x02000000)
+ offset |= 0xfe000000;
+
+ return ip + (long)offset;
+}
+
+#ifdef CONFIG_MODULES
+static struct module *ftrace_lookup_module(struct dyn_ftrace *rec)
+{
+ struct module *mod;
+
+ scoped_guard(rcu)
+ mod = __module_text_address(rec->ip);
+ if (!mod)
+ pr_err("No module loaded at addr=%lx\n", rec->ip);
+
+ return mod;
+}
+
+static int
+__ftrace_make_nop(struct module *mod,
+ struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned long entry, ptr, tramp;
+ unsigned long ip = rec->ip;
+ ppc_inst_t op, pop;
+
+ if (!mod) {
+ mod = ftrace_lookup_module(rec);
+ if (!mod)
+ return -EINVAL;
+ }
+
+ /* read where this goes */
+ if (copy_inst_from_kernel_nofault(&op, (void *)ip)) {
+ pr_err("Fetching opcode failed.\n");
+ return -EFAULT;
+ }
+
+ /* Make sure that this is still a 24bit jump */
+ if (!is_bl_op(op)) {
+ pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op));
+ return -EINVAL;
+ }
+
+ /* lets find where the pointer goes */
+ tramp = find_bl_target(ip, op);
+
+ pr_devel("ip:%lx jumps to %lx", ip, tramp);
+
+ if (module_trampoline_target(mod, tramp, &ptr)) {
+ pr_err("Failed to get trampoline target\n");
+ return -EFAULT;
+ }
+
+ pr_devel("trampoline target %lx", ptr);
+
+ entry = ppc_global_function_entry((void *)addr);
+ /* This should match what was called */
+ if (ptr != entry) {
+ pr_err("addr %lx does not match expected %lx\n", ptr, entry);
+ return -EINVAL;
+ }
+
+ if (IS_ENABLED(CONFIG_MPROFILE_KERNEL)) {
+ if (copy_inst_from_kernel_nofault(&op, (void *)(ip - 4))) {
+ pr_err("Fetching instruction at %lx failed.\n", ip - 4);
+ return -EFAULT;
+ }
+
+ /* We expect either a mflr r0, or a std r0, LRSAVE(r1) */
+ if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_MFLR(_R0))) &&
+ !ppc_inst_equal(op, ppc_inst(PPC_INST_STD_LR))) {
+ pr_err("Unexpected instruction %08lx around bl _mcount\n",
+ ppc_inst_as_ulong(op));
+ return -EINVAL;
+ }
+ } else if (IS_ENABLED(CONFIG_PPC64)) {
+ /*
+ * Check what is in the next instruction. We can see ld r2,40(r1), but
+ * on first pass after boot we will see mflr r0.
+ */
+ if (copy_inst_from_kernel_nofault(&op, (void *)(ip + 4))) {
+ pr_err("Fetching op failed.\n");
+ return -EFAULT;
+ }
+
+ if (!ppc_inst_equal(op, ppc_inst(PPC_INST_LD_TOC))) {
+ pr_err("Expected %08lx found %08lx\n", PPC_INST_LD_TOC,
+ ppc_inst_as_ulong(op));
+ return -EINVAL;
+ }
+ }
+
+ /*
+ * When using -mprofile-kernel or PPC32 there is no load to jump over.
+ *
+ * Otherwise our original call site looks like:
+ *
+ * bl <tramp>
+ * ld r2,XX(r1)
+ *
+ * Milton Miller pointed out that we can not simply nop the branch.
+ * If a task was preempted when calling a trace function, the nops
+ * will remove the way to restore the TOC in r2 and the r2 TOC will
+ * get corrupted.
+ *
+ * Use a b +8 to jump over the load.
+ */
+ if (IS_ENABLED(CONFIG_MPROFILE_KERNEL) || IS_ENABLED(CONFIG_PPC32))
+ pop = ppc_inst(PPC_RAW_NOP());
+ else
+ pop = ppc_inst(PPC_RAW_BRANCH(8)); /* b +8 */
+
+ if (patch_instruction((u32 *)ip, pop)) {
+ pr_err("Patching NOP failed.\n");
+ return -EPERM;
+ }
+
+ return 0;
+}
+#else
+static int __ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr)
+{
+ return 0;
+}
+#endif /* CONFIG_MODULES */
+
+static unsigned long find_ftrace_tramp(unsigned long ip)
+{
+ int i;
+
+ /*
+ * We have the compiler generated long_branch tramps at the end
+ * and we prefer those
+ */
+ for (i = NUM_FTRACE_TRAMPS - 1; i >= 0; i--)
+ if (!ftrace_tramps[i])
+ continue;
+ else if (is_offset_in_branch_range(ftrace_tramps[i] - ip))
+ return ftrace_tramps[i];
+
+ return 0;
+}
+
+static int add_ftrace_tramp(unsigned long tramp)
+{
+ int i;
+
+ for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
+ if (!ftrace_tramps[i]) {
+ ftrace_tramps[i] = tramp;
+ return 0;
+ }
+
+ return -1;
+}
+
+/*
+ * If this is a compiler generated long_branch trampoline (essentially, a
+ * trampoline that has a branch to _mcount()), we re-write the branch to
+ * instead go to ftrace_[regs_]caller() and note down the location of this
+ * trampoline.
+ */
+static int setup_mcount_compiler_tramp(unsigned long tramp)
+{
+ int i;
+ ppc_inst_t op;
+ unsigned long ptr;
+
+ /* Is this a known long jump tramp? */
+ for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
+ if (ftrace_tramps[i] == tramp)
+ return 0;
+
+ /* New trampoline -- read where this goes */
+ if (copy_inst_from_kernel_nofault(&op, (void *)tramp)) {
+ pr_debug("Fetching opcode failed.\n");
+ return -1;
+ }
+
+ /* Is this a 24 bit branch? */
+ if (!is_b_op(op)) {
+ pr_debug("Trampoline is not a long branch tramp.\n");
+ return -1;
+ }
+
+ /* lets find where the pointer goes */
+ ptr = find_bl_target(tramp, op);
+
+ if (ptr != ppc_global_function_entry((void *)_mcount)) {
+ pr_debug("Trampoline target %p is not _mcount\n", (void *)ptr);
+ return -1;
+ }
+
+ /* Let's re-write the tramp to go to ftrace_[regs_]caller */
+ if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
+ ptr = ppc_global_function_entry((void *)ftrace_regs_caller);
+ else
+ ptr = ppc_global_function_entry((void *)ftrace_caller);
+
+ if (patch_branch((u32 *)tramp, ptr, 0)) {
+ pr_debug("REL24 out of range!\n");
+ return -1;
+ }
+
+ if (add_ftrace_tramp(tramp)) {
+ pr_debug("No tramp locations left\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned long tramp, ip = rec->ip;
+ ppc_inst_t op;
+
+ /* Read where this goes */
+ if (copy_inst_from_kernel_nofault(&op, (void *)ip)) {
+ pr_err("Fetching opcode failed.\n");
+ return -EFAULT;
+ }
+
+ /* Make sure that this is still a 24bit jump */
+ if (!is_bl_op(op)) {
+ pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op));
+ return -EINVAL;
+ }
+
+ /* Let's find where the pointer goes */
+ tramp = find_bl_target(ip, op);
+
+ pr_devel("ip:%lx jumps to %lx", ip, tramp);
+
+ if (setup_mcount_compiler_tramp(tramp)) {
+ /* Are other trampolines reachable? */
+ if (!find_ftrace_tramp(ip)) {
+ pr_err("No ftrace trampolines reachable from %ps\n",
+ (void *)ip);
+ return -EINVAL;
+ }
+ }
+
+ if (patch_instruction((u32 *)ip, ppc_inst(PPC_RAW_NOP()))) {
+ pr_err("Patching NOP failed.\n");
+ return -EPERM;
+ }
+
+ return 0;
+}
+
+int ftrace_make_nop(struct module *mod,
+ struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned long ip = rec->ip;
+ ppc_inst_t old, new;
+
+ /*
+ * If the calling address is more that 24 bits away,
+ * then we had to use a trampoline to make the call.
+ * Otherwise just update the call site.
+ */
+ if (test_24bit_addr(ip, addr)) {
+ /* within range */
+ old = ftrace_call_replace(ip, addr, 1);
+ new = ppc_inst(PPC_RAW_NOP());
+ return ftrace_modify_code(ip, old, new);
+ } else if (core_kernel_text(ip)) {
+ return __ftrace_make_nop_kernel(rec, addr);
+ } else if (!IS_ENABLED(CONFIG_MODULES)) {
+ return -EINVAL;
+ }
+
+ return __ftrace_make_nop(mod, rec, addr);
+}
+
+#ifdef CONFIG_MODULES
+/*
+ * Examine the existing instructions for __ftrace_make_call.
+ * They should effectively be a NOP, and follow formal constraints,
+ * depending on the ABI. Return false if they don't.
+ */
+static bool expected_nop_sequence(void *ip, ppc_inst_t op0, ppc_inst_t op1)
+{
+ if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
+ return ppc_inst_equal(op0, ppc_inst(PPC_RAW_NOP()));
+ else
+ return ppc_inst_equal(op0, ppc_inst(PPC_RAW_BRANCH(8))) &&
+ ppc_inst_equal(op1, ppc_inst(PPC_INST_LD_TOC));
+}
+
+static int
+__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ ppc_inst_t op[2];
+ void *ip = (void *)rec->ip;
+ unsigned long entry, ptr, tramp;
+ struct module *mod = ftrace_lookup_module(rec);
+
+ if (!mod)
+ return -EINVAL;
+
+ /* read where this goes */
+ if (copy_inst_from_kernel_nofault(op, ip))
+ return -EFAULT;
+
+ if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) &&
+ copy_inst_from_kernel_nofault(op + 1, ip + 4))
+ return -EFAULT;
+
+ if (!expected_nop_sequence(ip, op[0], op[1])) {
+ pr_err("Unexpected call sequence at %p: %08lx %08lx\n", ip,
+ ppc_inst_as_ulong(op[0]), ppc_inst_as_ulong(op[1]));
+ return -EINVAL;
+ }
+
+ /* If we never set up ftrace trampoline(s), then bail */
+ if (!mod->arch.tramp ||
+ (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && !mod->arch.tramp_regs)) {
+ pr_err("No ftrace trampoline\n");
+ return -EINVAL;
+ }
+
+ if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && rec->flags & FTRACE_FL_REGS)
+ tramp = mod->arch.tramp_regs;
+ else
+ tramp = mod->arch.tramp;
+
+ if (module_trampoline_target(mod, tramp, &ptr)) {
+ pr_err("Failed to get trampoline target\n");
+ return -EFAULT;
+ }
+
+ pr_devel("trampoline target %lx", ptr);
+
+ entry = ppc_global_function_entry((void *)addr);
+ /* This should match what was called */
+ if (ptr != entry) {
+ pr_err("addr %lx does not match expected %lx\n", ptr, entry);
+ return -EINVAL;
+ }
+
+ if (patch_branch(ip, tramp, BRANCH_SET_LINK)) {
+ pr_err("REL24 out of range!\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+#else
+static int __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ return 0;
+}
+#endif /* CONFIG_MODULES */
+
+static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr)
+{
+ ppc_inst_t op;
+ void *ip = (void *)rec->ip;
+ unsigned long tramp, entry, ptr;
+
+ /* Make sure we're being asked to patch branch to a known ftrace addr */
+ entry = ppc_global_function_entry((void *)ftrace_caller);
+ ptr = ppc_global_function_entry((void *)addr);
+
+ if (ptr != entry && IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
+ entry = ppc_global_function_entry((void *)ftrace_regs_caller);
+
+ if (ptr != entry) {
+ pr_err("Unknown ftrace addr to patch: %ps\n", (void *)ptr);
+ return -EINVAL;
+ }
+
+ /* Make sure we have a nop */
+ if (copy_inst_from_kernel_nofault(&op, ip)) {
+ pr_err("Unable to read ftrace location %p\n", ip);
+ return -EFAULT;
+ }
+
+ if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_NOP()))) {
+ pr_err("Unexpected call sequence at %p: %08lx\n",
+ ip, ppc_inst_as_ulong(op));
+ return -EINVAL;
+ }
+
+ tramp = find_ftrace_tramp((unsigned long)ip);
+ if (!tramp) {
+ pr_err("No ftrace trampolines reachable from %ps\n", ip);
+ return -EINVAL;
+ }
+
+ if (patch_branch(ip, tramp, BRANCH_SET_LINK)) {
+ pr_err("Error patching branch to ftrace tramp!\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned long ip = rec->ip;
+ ppc_inst_t old, new;
+
+ /*
+ * If the calling address is more that 24 bits away,
+ * then we had to use a trampoline to make the call.
+ * Otherwise just update the call site.
+ */
+ if (test_24bit_addr(ip, addr)) {
+ /* within range */
+ old = ppc_inst(PPC_RAW_NOP());
+ new = ftrace_call_replace(ip, addr, 1);
+ return ftrace_modify_code(ip, old, new);
+ } else if (core_kernel_text(ip)) {
+ return __ftrace_make_call_kernel(rec, addr);
+ } else if (!IS_ENABLED(CONFIG_MODULES)) {
+ /* We should not get here without modules */
+ return -EINVAL;
+ }
+
+ return __ftrace_make_call(rec, addr);
+}
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+#ifdef CONFIG_MODULES
+static int
+__ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+ unsigned long addr)
+{
+ ppc_inst_t op;
+ unsigned long ip = rec->ip;
+ unsigned long entry, ptr, tramp;
+ struct module *mod = ftrace_lookup_module(rec);
+
+ if (!mod)
+ return -EINVAL;
+
+ /* If we never set up ftrace trampolines, then bail */
+ if (!mod->arch.tramp || !mod->arch.tramp_regs) {
+ pr_err("No ftrace trampoline\n");
+ return -EINVAL;
+ }
+
+ /* read where this goes */
+ if (copy_inst_from_kernel_nofault(&op, (void *)ip)) {
+ pr_err("Fetching opcode failed.\n");
+ return -EFAULT;
+ }
+
+ /* Make sure that this is still a 24bit jump */
+ if (!is_bl_op(op)) {
+ pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op));
+ return -EINVAL;
+ }
+
+ /* lets find where the pointer goes */
+ tramp = find_bl_target(ip, op);
+ entry = ppc_global_function_entry((void *)old_addr);
+
+ pr_devel("ip:%lx jumps to %lx", ip, tramp);
+
+ if (tramp != entry) {
+ /* old_addr is not within range, so we must have used a trampoline */
+ if (module_trampoline_target(mod, tramp, &ptr)) {
+ pr_err("Failed to get trampoline target\n");
+ return -EFAULT;
+ }
+
+ pr_devel("trampoline target %lx", ptr);
+
+ /* This should match what was called */
+ if (ptr != entry) {
+ pr_err("addr %lx does not match expected %lx\n", ptr, entry);
+ return -EINVAL;
+ }
+ }
+
+ /* The new target may be within range */
+ if (test_24bit_addr(ip, addr)) {
+ /* within range */
+ if (patch_branch((u32 *)ip, addr, BRANCH_SET_LINK)) {
+ pr_err("REL24 out of range!\n");
+ return -EINVAL;
+ }
+
+ return 0;
+ }
+
+ if (rec->flags & FTRACE_FL_REGS)
+ tramp = mod->arch.tramp_regs;
+ else
+ tramp = mod->arch.tramp;
+
+ if (module_trampoline_target(mod, tramp, &ptr)) {
+ pr_err("Failed to get trampoline target\n");
+ return -EFAULT;
+ }
+
+ pr_devel("trampoline target %lx", ptr);
+
+ entry = ppc_global_function_entry((void *)addr);
+ /* This should match what was called */
+ if (ptr != entry) {
+ pr_err("addr %lx does not match expected %lx\n", ptr, entry);
+ return -EINVAL;
+ }
+
+ if (patch_branch((u32 *)ip, tramp, BRANCH_SET_LINK)) {
+ pr_err("REL24 out of range!\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+#else
+static int __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr)
+{
+ return 0;
+}
+#endif
+
+int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+ unsigned long addr)
+{
+ unsigned long ip = rec->ip;
+ ppc_inst_t old, new;
+
+ /*
+ * If the calling address is more that 24 bits away,
+ * then we had to use a trampoline to make the call.
+ * Otherwise just update the call site.
+ */
+ if (test_24bit_addr(ip, addr) && test_24bit_addr(ip, old_addr)) {
+ /* within range */
+ old = ftrace_call_replace(ip, old_addr, 1);
+ new = ftrace_call_replace(ip, addr, 1);
+ return ftrace_modify_code(ip, old, new);
+ } else if (core_kernel_text(ip)) {
+ /*
+ * We always patch out of range locations to go to the regs
+ * variant, so there is nothing to do here
+ */
+ return 0;
+ } else if (!IS_ENABLED(CONFIG_MODULES)) {
+ /* We should not get here without modules */
+ return -EINVAL;
+ }
+
+ return __ftrace_modify_call(rec, old_addr, addr);
+}
+#endif
+
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+ unsigned long ip = (unsigned long)(&ftrace_call);
+ ppc_inst_t old, new;
+ int ret;
+
+ old = ppc_inst_read((u32 *)&ftrace_call);
+ new = ftrace_call_replace(ip, (unsigned long)func, 1);
+ ret = ftrace_modify_code(ip, old, new);
+
+ /* Also update the regs callback function */
+ if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && !ret) {
+ ip = (unsigned long)(&ftrace_regs_call);
+ old = ppc_inst_read((u32 *)&ftrace_regs_call);
+ new = ftrace_call_replace(ip, (unsigned long)func, 1);
+ ret = ftrace_modify_code(ip, old, new);
+ }
+
+ return ret;
+}
+
+/*
+ * Use the default ftrace_modify_all_code, but without
+ * stop_machine().
+ */
+void arch_ftrace_update_code(int command)
+{
+ ftrace_modify_all_code(command);
+}
+
+#ifdef CONFIG_PPC64
+#define PACATOC offsetof(struct paca_struct, kernel_toc)
+
+extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[];
+
+void ftrace_free_init_tramp(void)
+{
+ int i;
+
+ for (i = 0; i < NUM_FTRACE_TRAMPS && ftrace_tramps[i]; i++)
+ if (ftrace_tramps[i] == (unsigned long)ftrace_tramp_init) {
+ ftrace_tramps[i] = 0;
+ return;
+ }
+}
+
+int __init ftrace_dyn_arch_init(void)
+{
+ int i;
+ unsigned int *tramp[] = { ftrace_tramp_text, ftrace_tramp_init };
+ u32 stub_insns[] = {
+ PPC_RAW_LD(_R12, _R13, PACATOC),
+ PPC_RAW_ADDIS(_R12, _R12, 0),
+ PPC_RAW_ADDI(_R12, _R12, 0),
+ PPC_RAW_MTCTR(_R12),
+ PPC_RAW_BCTR()
+ };
+ unsigned long addr;
+ long reladdr;
+
+ if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
+ addr = ppc_global_function_entry((void *)ftrace_regs_caller);
+ else
+ addr = ppc_global_function_entry((void *)ftrace_caller);
+
+ reladdr = addr - kernel_toc_addr();
+
+ if (reladdr >= SZ_2G || reladdr < -(long)SZ_2G) {
+ pr_err("Address of %ps out of range of kernel_toc.\n",
+ (void *)addr);
+ return -1;
+ }
+
+ for (i = 0; i < 2; i++) {
+ memcpy(tramp[i], stub_insns, sizeof(stub_insns));
+ tramp[i][1] |= PPC_HA(reladdr);
+ tramp[i][2] |= PPC_LO(reladdr);
+ add_ftrace_tramp((unsigned long)tramp[i]);
+ }
+
+ return 0;
+}
+#endif
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+extern void ftrace_graph_call(void);
+extern void ftrace_graph_stub(void);
+
+static int ftrace_modify_ftrace_graph_caller(bool enable)
+{
+ unsigned long ip = (unsigned long)(&ftrace_graph_call);
+ unsigned long addr = (unsigned long)(&ftrace_graph_caller);
+ unsigned long stub = (unsigned long)(&ftrace_graph_stub);
+ ppc_inst_t old, new;
+
+ if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_ARGS))
+ return 0;
+
+ old = ftrace_call_replace(ip, enable ? stub : addr, 0);
+ new = ftrace_call_replace(ip, enable ? addr : stub, 0);
+
+ return ftrace_modify_code(ip, old, new);
+}
+
+int ftrace_enable_ftrace_graph_caller(void)
+{
+ return ftrace_modify_ftrace_graph_caller(true);
+}
+
+int ftrace_disable_ftrace_graph_caller(void)
+{
+ return ftrace_modify_ftrace_graph_caller(false);
+}
+
+/*
+ * Hook the return address and push it in the stack of return addrs
+ * in current thread info. Return the address we want to divert to.
+ */
+static unsigned long
+__prepare_ftrace_return(unsigned long parent, unsigned long ip, unsigned long sp,
+ struct ftrace_regs *fregs)
+{
+ unsigned long return_hooker;
+
+ if (unlikely(ftrace_graph_is_dead()))
+ goto out;
+
+ if (unlikely(atomic_read(&current->tracing_graph_pause)))
+ goto out;
+
+ return_hooker = ppc_function_entry(return_to_handler);
+
+ if (!function_graph_enter_regs(parent, ip, 0, (unsigned long *)sp, fregs))
+ parent = return_hooker;
+
+out:
+ return parent;
+}
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
+void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *op, struct ftrace_regs *fregs)
+{
+ arch_ftrace_regs(fregs)->regs.link = __prepare_ftrace_return(parent_ip, ip,
+ arch_ftrace_regs(fregs)->regs.gpr[1], fregs);
+}
+#else
+unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip,
+ unsigned long sp)
+{
+ return __prepare_ftrace_return(parent, ip, sp, NULL);
+}
+#endif
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
+#ifdef CONFIG_PPC64_ELF_ABI_V1
+char *arch_ftrace_match_adjust(char *str, const char *search)
+{
+ if (str[0] == '.' && search[0] != '.')
+ return str + 1;
+ else
+ return str;
+}
+#endif /* CONFIG_PPC64_ELF_ABI_V1 */
diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg_entry.S b/arch/powerpc/kernel/trace/ftrace_64_pg_entry.S
new file mode 100644
index 000000000000..a8a7f28404c8
--- /dev/null
+++ b/arch/powerpc/kernel/trace/ftrace_64_pg_entry.S
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Split from ftrace_64.S
+ */
+
+#include <linux/export.h>
+#include <linux/magic.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/ftrace.h>
+#include <asm/ppc-opcode.h>
+
+_GLOBAL_TOC(ftrace_caller)
+ lbz r3, PACA_FTRACE_ENABLED(r13)
+ cmpdi r3, 0
+ beqlr
+
+ /* Taken from output of objdump from lib64/glibc */
+ mflr r3
+ ld r11, 0(r1)
+ stdu r1, -112(r1)
+ std r3, 128(r1)
+ ld r4, 16(r11)
+ subi r3, r3, MCOUNT_INSN_SIZE
+.globl ftrace_call
+ftrace_call:
+ bl ftrace_stub
+ nop
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+.globl ftrace_graph_call
+ftrace_graph_call:
+ b ftrace_graph_stub
+_GLOBAL(ftrace_graph_stub)
+#endif
+ ld r0, 128(r1)
+ mtlr r0
+ addi r1, r1, 112
+
+_GLOBAL(ftrace_stub)
+ blr
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+_GLOBAL(ftrace_graph_caller)
+ addi r5, r1, 112
+ /* load r4 with local address */
+ ld r4, 128(r1)
+ subi r4, r4, MCOUNT_INSN_SIZE
+
+ /* Grab the LR out of the caller stack frame */
+ ld r11, 112(r1)
+ ld r3, 16(r11)
+
+ bl prepare_ftrace_return
+ nop
+
+ /*
+ * prepare_ftrace_return gives us the address we divert to.
+ * Change the LR in the callers stack frame to this.
+ */
+ ld r11, 112(r1)
+ std r3, 16(r11)
+
+ ld r0, 128(r1)
+ mtlr r0
+ addi r1, r1, 112
+ blr
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
+.pushsection ".tramp.ftrace.text","aw",@progbits;
+.globl ftrace_tramp_text
+ftrace_tramp_text:
+ .space 32
+.popsection
+
+.pushsection ".tramp.ftrace.init","aw",@progbits;
+.globl ftrace_tramp_init
+ftrace_tramp_init:
+ .space 32
+.popsection
+
+_GLOBAL(mcount)
+_GLOBAL(_mcount)
+EXPORT_SYMBOL(_mcount)
+ mflr r12
+ mtctr r12
+ mtlr r0
+ bctr
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+_GLOBAL(return_to_handler)
+ /* need to save return values */
+#ifdef CONFIG_PPC64
+ std r4, -32(r1)
+ std r3, -24(r1)
+ /* save TOC */
+ std r2, -16(r1)
+ std r31, -8(r1)
+ mr r31, r1
+ stdu r1, -112(r1)
+
+ /*
+ * We might be called from a module.
+ * Switch to our TOC to run inside the core kernel.
+ */
+ LOAD_PACA_TOC()
+#else
+ stwu r1, -16(r1)
+ stw r3, 8(r1)
+ stw r4, 12(r1)
+#endif
+
+ bl ftrace_return_to_handler
+ nop
+
+ /* return value has real return address */
+ mtlr r3
+
+#ifdef CONFIG_PPC64
+ ld r1, 0(r1)
+ ld r4, -32(r1)
+ ld r3, -24(r1)
+ ld r2, -16(r1)
+ ld r31, -8(r1)
+#else
+ lwz r3, 8(r1)
+ lwz r4, 12(r1)
+ addi r1, r1, 16
+#endif
+
+ /* Jump back to real return address */
+ blr
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/powerpc/kernel/trace/ftrace_entry.S b/arch/powerpc/kernel/trace/ftrace_entry.S
new file mode 100644
index 000000000000..6599fe3c6234
--- /dev/null
+++ b/arch/powerpc/kernel/trace/ftrace_entry.S
@@ -0,0 +1,479 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Split from ftrace_64.S
+ */
+
+#include <linux/export.h>
+#include <linux/magic.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/ftrace.h>
+#include <asm/ppc-opcode.h>
+#include <asm/thread_info.h>
+#include <asm/bug.h>
+#include <asm/ptrace.h>
+
+/*
+ *
+ * ftrace_caller()/ftrace_regs_caller() is the function that replaces _mcount()
+ * when ftrace is active.
+ *
+ * We arrive here after a function A calls function B, and we are the trace
+ * function for B. When we enter r1 points to A's stack frame, B has not yet
+ * had a chance to allocate one yet.
+ *
+ * Additionally r2 may point either to the TOC for A, or B, depending on
+ * whether B did a TOC setup sequence before calling us.
+ *
+ * On entry the LR points back to the _mcount() call site, and r0 holds the
+ * saved LR as it was on entry to B, ie. the original return address at the
+ * call site in A.
+ *
+ * Our job is to save the register state into a struct pt_regs (on the stack)
+ * and then arrange for the ftrace function to be called.
+ */
+.macro ftrace_regs_entry allregs
+ /* Create a minimal stack frame for representing B */
+ PPC_STLU r1, -STACK_FRAME_MIN_SIZE(r1)
+
+ /* Create our stack frame + pt_regs */
+ PPC_STLU r1,-SWITCH_FRAME_SIZE(r1)
+
+ .if \allregs == 1
+ SAVE_GPRS(11, 12, r1)
+ .endif
+
+ /* Get the _mcount() call site out of LR */
+ mflr r11
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+ /* Load the ftrace_op */
+ PPC_LL r12, -(MCOUNT_INSN_SIZE*2 + SZL)(r11)
+
+ /* Load direct_call from the ftrace_op */
+ PPC_LL r12, FTRACE_OPS_DIRECT_CALL(r12)
+ PPC_LCMPI r12, 0
+ .if \allregs == 1
+ bne .Lftrace_direct_call_regs
+ .else
+ bne .Lftrace_direct_call
+ .endif
+#endif
+
+ /* Save the previous LR in pt_regs->link */
+ PPC_STL r0, _LINK(r1)
+ /* Also save it in A's stack frame */
+ PPC_STL r0, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE+LRSAVE(r1)
+
+ /* Save all gprs to pt_regs */
+ SAVE_GPR(0, r1)
+ SAVE_GPRS(3, 10, r1)
+
+#ifdef CONFIG_PPC64
+ /* Ok to continue? */
+ lbz r3, PACA_FTRACE_ENABLED(r13)
+ cmpdi r3, 0
+ beq ftrace_no_trace
+#endif
+
+ .if \allregs == 1
+ SAVE_GPR(2, r1)
+ SAVE_GPRS(13, 31, r1)
+ .else
+#if defined(CONFIG_LIVEPATCH_64) || defined(CONFIG_PPC_FTRACE_OUT_OF_LINE)
+ SAVE_GPR(14, r1)
+#endif
+ .endif
+
+ /* Save previous stack pointer (r1) */
+ addi r8, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE
+ PPC_STL r8, GPR1(r1)
+
+ .if \allregs == 1
+ /* Load special regs for save below */
+ mfcr r7
+ mfmsr r8
+ mfctr r9
+ mfxer r10
+ .else
+ /* Clear MSR to flag as ftrace_caller versus frace_regs_caller */
+ li r8, 0
+ .endif
+
+#ifdef CONFIG_PPC64
+ /* Save callee's TOC in the ABI compliant location */
+ std r2, STK_GOT(r1)
+ LOAD_PACA_TOC() /* get kernel TOC in r2 */
+#endif
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS
+ /* r11 points to the instruction following the call to ftrace */
+ PPC_LL r5, -(MCOUNT_INSN_SIZE*2 + SZL)(r11)
+ PPC_LL r12, FTRACE_OPS_FUNC(r5)
+ mtctr r12
+#else /* !CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS */
+#ifdef CONFIG_PPC64
+ LOAD_REG_ADDR(r3, function_trace_op)
+ ld r5,0(r3)
+#else
+ lis r3,function_trace_op@ha
+ lwz r5,function_trace_op@l(r3)
+#endif
+#endif
+
+ /* Save special regs */
+ PPC_STL r8, _MSR(r1)
+ .if \allregs == 1
+ PPC_STL r7, _CCR(r1)
+ PPC_STL r9, _CTR(r1)
+ PPC_STL r10, _XER(r1)
+ .endif
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+ /* Clear orig_gpr3 to later detect ftrace_direct call */
+ li r7, 0
+ PPC_STL r7, ORIG_GPR3(r1)
+#endif
+
+#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE
+ /* Save our real return address in nvr for return */
+ .if \allregs == 0
+ SAVE_GPR(15, r1)
+ .endif
+ mr r15, r11
+ /*
+ * We want the ftrace location in the function, but our lr (in r11)
+ * points at the 'mtlr r0' instruction in the out of line stub. To
+ * recover the ftrace location, we read the branch instruction in the
+ * stub, and adjust our lr by the branch offset.
+ *
+ * See ftrace_init_ool_stub() for the profile sequence.
+ */
+ lwz r8, MCOUNT_INSN_SIZE(r11)
+ slwi r8, r8, 6
+ srawi r8, r8, 6
+ add r3, r11, r8
+ /*
+ * Override our nip to point past the branch in the original function.
+ * This allows reliable stack trace and the ftrace stack tracer to work as-is.
+ */
+ addi r11, r3, MCOUNT_INSN_SIZE
+#else
+ /* Calculate ip from nip-4 into r3 for call below */
+ subi r3, r11, MCOUNT_INSN_SIZE
+#endif
+
+ /* Save NIP as pt_regs->nip */
+ PPC_STL r11, _NIP(r1)
+ /* Also save it in B's stackframe header for proper unwind */
+ PPC_STL r11, LRSAVE+SWITCH_FRAME_SIZE(r1)
+#if defined(CONFIG_LIVEPATCH_64) || defined(CONFIG_PPC_FTRACE_OUT_OF_LINE)
+ mr r14, r11 /* remember old NIP */
+#endif
+
+ /* Put the original return address in r4 as parent_ip */
+ mr r4, r0
+
+ /* Load &pt_regs in r6 for call below */
+ addi r6, r1, STACK_INT_FRAME_REGS
+.endm
+
+.macro ftrace_regs_exit allregs
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+ /* Check orig_gpr3 to detect ftrace_direct call */
+ PPC_LL r3, ORIG_GPR3(r1)
+ PPC_LCMPI cr1, r3, 0
+ mtctr r3
+#endif
+
+ /* Restore possibly modified LR */
+ PPC_LL r0, _LINK(r1)
+
+#ifndef CONFIG_PPC_FTRACE_OUT_OF_LINE
+ /* Load ctr with the possibly modified NIP */
+ PPC_LL r3, _NIP(r1)
+#ifdef CONFIG_LIVEPATCH_64
+ cmpd r14, r3 /* has NIP been altered? */
+#endif
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+ beq cr1,2f
+ mtlr r3
+ b 3f
+#endif
+2: mtctr r3
+ mtlr r0
+3:
+
+#else /* !CONFIG_PPC_FTRACE_OUT_OF_LINE */
+ /* Load LR with the possibly modified NIP */
+ PPC_LL r3, _NIP(r1)
+ cmpd r14, r3 /* has NIP been altered? */
+ bne- 1f
+
+ mr r3, r15
+1: mtlr r3
+ .if \allregs == 0
+ REST_GPR(15, r1)
+ .endif
+#endif
+
+ /* Restore gprs */
+ .if \allregs == 1
+ REST_GPRS(2, 31, r1)
+ .else
+ REST_GPRS(3, 10, r1)
+#if defined(CONFIG_LIVEPATCH_64) || defined(CONFIG_PPC_FTRACE_OUT_OF_LINE)
+ REST_GPR(14, r1)
+#endif
+ .endif
+
+#ifdef CONFIG_PPC64
+ /* Restore callee's TOC */
+ ld r2, STK_GOT(r1)
+#endif
+
+ /* Pop our stack frame */
+ addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE
+
+#ifdef CONFIG_LIVEPATCH_64
+ /* Based on the cmpd above, if the NIP was altered handle livepatch */
+ bne- livepatch_handler
+#endif
+
+ /* jump after _mcount site */
+#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+ bnectr cr1
+#endif
+ /*
+ * Return with blr to keep the link stack balanced. The function profiling sequence
+ * uses 'mtlr r0' to restore LR.
+ */
+ blr
+#else
+ bctr
+#endif
+.endm
+
+.macro ftrace_regs_func allregs
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS
+ bctrl
+#else
+ .if \allregs == 1
+.globl ftrace_regs_call
+ftrace_regs_call:
+ .else
+.globl ftrace_call
+ftrace_call:
+ .endif
+ /* ftrace_call(r3, r4, r5, r6) */
+ bl ftrace_stub
+#endif
+.endm
+
+_GLOBAL(ftrace_regs_caller)
+ ftrace_regs_entry 1
+ ftrace_regs_func 1
+ ftrace_regs_exit 1
+
+_GLOBAL(ftrace_caller)
+ ftrace_regs_entry 0
+ ftrace_regs_func 0
+ ftrace_regs_exit 0
+
+_GLOBAL(ftrace_stub)
+ blr
+
+#ifdef CONFIG_PPC64
+ftrace_no_trace:
+#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE
+ REST_GPR(3, r1)
+ addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE
+ blr
+#else
+ mflr r3
+ mtctr r3
+ REST_GPR(3, r1)
+ addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE
+ mtlr r0
+ bctr
+#endif
+#endif
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+.Lftrace_direct_call_regs:
+ mtctr r12
+ REST_GPRS(11, 12, r1)
+ addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE
+ bctr
+.Lftrace_direct_call:
+ mtctr r12
+ addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE
+ bctr
+SYM_FUNC_START(ftrace_stub_direct_tramp)
+ blr
+SYM_FUNC_END(ftrace_stub_direct_tramp)
+#endif
+
+#ifdef CONFIG_LIVEPATCH_64
+ /*
+ * This function runs in the mcount context, between two functions. As
+ * such it can only clobber registers which are volatile and used in
+ * function linkage.
+ *
+ * We get here when a function A, calls another function B, but B has
+ * been live patched with a new function C.
+ *
+ * On entry, we have no stack frame and can not allocate one.
+ *
+ * With PPC_FTRACE_OUT_OF_LINE=n, on entry:
+ * - LR points back to the original caller (in A)
+ * - CTR holds the new NIP in C
+ * - r0, r11 & r12 are free
+ *
+ * With PPC_FTRACE_OUT_OF_LINE=y, on entry:
+ * - r0 points back to the original caller (in A)
+ * - LR holds the new NIP in C
+ * - r11 & r12 are free
+ */
+livepatch_handler:
+ ld r12, PACA_THREAD_INFO(r13)
+
+ /* Allocate 3 x 8 bytes */
+ ld r11, TI_livepatch_sp(r12)
+ addi r11, r11, 24
+ std r11, TI_livepatch_sp(r12)
+
+ /* Store stack end marker */
+ lis r12, STACK_END_MAGIC@h
+ ori r12, r12, STACK_END_MAGIC@l
+ std r12, -8(r11)
+
+ /* Save toc & real LR on livepatch stack */
+ std r2, -24(r11)
+#ifndef CONFIG_PPC_FTRACE_OUT_OF_LINE
+ mflr r12
+ std r12, -16(r11)
+ mfctr r12
+#else
+ std r0, -16(r11)
+ mflr r12
+ /* Put ctr in r12 for global entry and branch there */
+ mtctr r12
+#endif
+ bctrl
+
+ /*
+ * Now we are returning from the patched function to the original
+ * caller A. We are free to use r11, r12 and we can use r2 until we
+ * restore it.
+ */
+
+ ld r12, PACA_THREAD_INFO(r13)
+
+ ld r11, TI_livepatch_sp(r12)
+
+ /* Check stack marker hasn't been trashed */
+ lis r2, STACK_END_MAGIC@h
+ ori r2, r2, STACK_END_MAGIC@l
+ ld r12, -8(r11)
+1: tdne r12, r2
+ EMIT_BUG_ENTRY 1b, __FILE__, __LINE__ - 1, 0
+
+ /* Restore LR & toc from livepatch stack */
+ ld r12, -16(r11)
+ mtlr r12
+ ld r2, -24(r11)
+
+ /* Pop livepatch stack frame */
+ ld r12, PACA_THREAD_INFO(r13)
+ subi r11, r11, 24
+ std r11, TI_livepatch_sp(r12)
+
+ /* Return to original caller of live patched function */
+ blr
+#endif /* CONFIG_LIVEPATCH */
+
+#ifndef CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY
+_GLOBAL(mcount)
+_GLOBAL(_mcount)
+EXPORT_SYMBOL(_mcount)
+ mflr r12
+ mtctr r12
+ mtlr r0
+ bctr
+#endif
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+_GLOBAL(return_to_handler)
+ /* need to save return values */
+#ifdef CONFIG_PPC64
+ stdu r1, -SWITCH_FRAME_SIZE(r1)
+ std r4, GPR4(r1)
+ std r3, GPR3(r1)
+ /* Save previous stack pointer (r1) */
+ addi r3, r1, SWITCH_FRAME_SIZE
+ std r3, GPR1(r1)
+ /* save TOC */
+ std r2, 24(r1)
+ std r31, 32(r1)
+ mr r31, r1
+ /* pass ftrace_regs/pt_regs to ftrace_return_to_handler */
+ addi r3, r1, STACK_INT_FRAME_REGS
+ /*
+ * We might be called from a module.
+ * Switch to our TOC to run inside the core kernel.
+ */
+ LOAD_PACA_TOC()
+#else
+ stwu r1, -SWITCH_FRAME_SIZE(r1)
+ stw r4, GPR4(r1)
+ stw r3, GPR3(r1)
+ addi r3, r1, SWITCH_FRAME_SIZE
+ stw r3, GPR1(r1)
+ /* pass ftrace_regs/pt_regs to ftrace_return_to_handler */
+ addi r3, r1, STACK_INT_FRAME_REGS
+#endif
+
+ bl ftrace_return_to_handler
+ nop
+
+ /* return value has real return address */
+ mtlr r3
+
+#ifdef CONFIG_PPC64
+ ld r4, GPR4(r1)
+ ld r3, GPR3(r1)
+ ld r2, 24(r1)
+ ld r31, 32(r1)
+ ld r1, 0(r1)
+#else
+ lwz r3, GPR3(r1)
+ lwz r4, GPR4(r1)
+ addi r1, r1, SWITCH_FRAME_SIZE
+#endif
+
+ /* Jump back to real return address */
+ blr
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
+#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE
+SYM_DATA(ftrace_ool_stub_text_count, .long CONFIG_PPC_FTRACE_OUT_OF_LINE_NUM_RESERVE)
+
+SYM_START(ftrace_ool_stub_text, SYM_L_GLOBAL, .balign SZL)
+ .space CONFIG_PPC_FTRACE_OUT_OF_LINE_NUM_RESERVE * FTRACE_OOL_STUB_SIZE
+SYM_CODE_END(ftrace_ool_stub_text)
+#endif
+
+.pushsection ".tramp.ftrace.text","aw",@progbits;
+.globl ftrace_tramp_text
+ftrace_tramp_text:
+ .space 32
+.popsection
+
+.pushsection ".tramp.ftrace.init","aw",@progbits;
+.globl ftrace_tramp_init
+ftrace_tramp_init:
+ .space 32
+.popsection
diff --git a/arch/powerpc/kernel/trace/trace_clock.c b/arch/powerpc/kernel/trace/trace_clock.c
new file mode 100644
index 000000000000..b0143a313736
--- /dev/null
+++ b/arch/powerpc/kernel/trace/trace_clock.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *
+ * Copyright (C) 2015 Naveen N. Rao, IBM Corporation
+ */
+
+#include <asm/trace_clock.h>
+#include <asm/time.h>
+
+u64 notrace trace_clock_ppc_tb(void)
+{
+ return get_tb();
+}
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 0dc43f9932cf..cb8e9357383e 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
* Copyright 2007-2010 Freescale Semiconductor, Inc.
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Modified by Cort Dougan (cort@cs.nmt.edu)
* and Paul Mackerras (paulus@samba.org)
*/
@@ -17,15 +13,18 @@
#include <linux/errno.h>
#include <linux/sched.h>
+#include <linux/sched/debug.h>
#include <linux/kernel.h>
#include <linux/mm.h>
+#include <linux/pkeys.h>
#include <linux/stddef.h>
#include <linux/unistd.h>
#include <linux/ptrace.h>
#include <linux/user.h>
#include <linux/interrupt.h>
#include <linux/init.h>
-#include <linux/module.h>
+#include <linux/extable.h>
+#include <linux/module.h> /* print_modules */
#include <linux/prctl.h>
#include <linux/delay.h>
#include <linux/kprobes.h>
@@ -33,13 +32,16 @@
#include <linux/backlight.h>
#include <linux/bug.h>
#include <linux/kdebug.h>
-#include <linux/debugfs.h>
#include <linux/ratelimit.h>
#include <linux/context_tracking.h>
+#include <linux/smp.h>
+#include <linux/console.h>
+#include <linux/kmsg_dump.h>
+#include <linux/debugfs.h>
#include <asm/emulated_ops.h>
-#include <asm/pgtable.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
+#include <asm/interrupt.h>
#include <asm/io.h>
#include <asm/machdep.h>
#include <asm/rtas.h>
@@ -51,7 +53,6 @@
#ifdef CONFIG_PPC64
#include <asm/firmware.h>
#include <asm/processor.h>
-#include <asm/tm.h>
#endif
#include <asm/kexec.h>
#include <asm/ppc-opcode.h>
@@ -60,9 +61,16 @@
#include <asm/switch_to.h>
#include <asm/tm.h>
#include <asm/debug.h>
+#include <asm/asm-prototypes.h>
+#include <asm/hmi.h>
#include <sysdev/fsl_pci.h>
+#include <asm/kprobes.h>
+#include <asm/stacktrace.h>
+#include <asm/nmi.h>
+#include <asm/disassemble.h>
+#include <asm/udbg.h>
-#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
+#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE)
int (*__debugger)(struct pt_regs *regs) __read_mostly;
int (*__debugger_ipi)(struct pt_regs *regs) __read_mostly;
int (*__debugger_bpt)(struct pt_regs *regs) __read_mostly;
@@ -87,6 +95,19 @@ EXPORT_SYMBOL(__debugger_fault_handler);
#define TM_DEBUG(x...) do { } while(0)
#endif
+static const char *signame(int signr)
+{
+ switch (signr) {
+ case SIGBUS: return "bus error";
+ case SIGFPE: return "floating point exception";
+ case SIGILL: return "illegal instruction";
+ case SIGSEGV: return "segfault";
+ case SIGTRAP: return "unhandled trap";
+ }
+
+ return "unknown signal";
+}
+
/*
* Trap & Exception support
*/
@@ -100,7 +121,7 @@ static void pmac_backlight_unblank(void)
props = &pmac_backlight->props;
props->brightness = props->max_brightness;
- props->power = FB_BLANK_UNBLANK;
+ props->power = BACKLIGHT_POWER_ON;
backlight_update_status(pmac_backlight);
}
mutex_unlock(&pmac_backlight_mutex);
@@ -109,19 +130,59 @@ static void pmac_backlight_unblank(void)
static inline void pmac_backlight_unblank(void) { }
#endif
+/*
+ * If oops/die is expected to crash the machine, return true here.
+ *
+ * This should not be expected to be 100% accurate, there may be
+ * notifiers registered or other unexpected conditions that may bring
+ * down the kernel. Or if the current process in the kernel is holding
+ * locks or has other critical state, the kernel may become effectively
+ * unusable anyway.
+ */
+bool die_will_crash(void)
+{
+ if (should_fadump_crash())
+ return true;
+ if (kexec_should_crash(current))
+ return true;
+ if (in_interrupt() || panic_on_oops ||
+ !current->pid || is_global_init(current))
+ return true;
+
+ return false;
+}
+
static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
static int die_owner = -1;
static unsigned int die_nest_count;
static int die_counter;
-static unsigned __kprobes long oops_begin(struct pt_regs *regs)
+void panic_flush_kmsg_start(void)
+{
+ /*
+ * These are mostly taken from kernel/panic.c, but tries to do
+ * relatively minimal work. Don't use delay functions (TB may
+ * be broken), don't crash dump (need to set a firmware log),
+ * don't run notifiers. We do want to get some information to
+ * Linux console.
+ */
+ console_verbose();
+ bust_spinlocks(1);
+}
+
+void panic_flush_kmsg_end(void)
+{
+ kmsg_dump(KMSG_DUMP_PANIC);
+ bust_spinlocks(0);
+ debug_locks_off();
+ console_flush_on_panic(CONSOLE_FLUSH_PENDING);
+}
+
+static unsigned long oops_begin(struct pt_regs *regs)
{
int cpu;
unsigned long flags;
- if (debugger(regs))
- return 1;
-
oops_enter();
/* racy, but better than risking deadlock. */
@@ -141,37 +202,33 @@ static unsigned __kprobes long oops_begin(struct pt_regs *regs)
pmac_backlight_unblank();
return flags;
}
+NOKPROBE_SYMBOL(oops_begin);
-static void __kprobes oops_end(unsigned long flags, struct pt_regs *regs,
+static void oops_end(unsigned long flags, struct pt_regs *regs,
int signr)
{
bust_spinlocks(0);
- die_owner = -1;
add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
die_nest_count--;
oops_exit();
printk("\n");
- if (!die_nest_count)
+ if (!die_nest_count) {
/* Nest count reaches zero, release the lock. */
+ die_owner = -1;
arch_spin_unlock(&die_lock);
+ }
raw_local_irq_restore(flags);
- crash_fadump(regs, "die oops");
-
/*
- * A system reset (0x100) is a request to dump, so we always send
- * it through the crashdump code.
+ * system_reset_excption handles debugger, crash dump, panic, for 0x100
*/
- if (kexec_should_crash(current) || (TRAP(regs) == 0x100)) {
- crash_kexec(regs);
+ if (TRAP(regs) == INTERRUPT_SYSTEM_RESET)
+ return;
- /*
- * We aren't the primary crash CPU. We need to send it
- * to a holding pattern to avoid it ending up in the panic
- * code.
- */
- crash_kexec_secondary(regs);
- }
+ crash_fadump(regs, "die oops");
+
+ if (kexec_should_crash(current))
+ crash_kexec(regs);
if (!signr)
return;
@@ -187,29 +244,33 @@ static void __kprobes oops_end(unsigned long flags, struct pt_regs *regs,
mdelay(MSEC_PER_SEC);
}
- if (in_interrupt())
- panic("Fatal exception in interrupt");
if (panic_on_oops)
panic("Fatal exception");
- do_exit(signr);
+ make_task_dead(signr);
+}
+NOKPROBE_SYMBOL(oops_end);
+
+static char *get_mmu_str(void)
+{
+ if (early_radix_enabled())
+ return " MMU=Radix";
+ if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE))
+ return " MMU=Hash";
+ return "";
}
-static int __kprobes __die(const char *str, struct pt_regs *regs, long err)
+static int __die(const char *str, struct pt_regs *regs, long err)
{
printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter);
-#ifdef CONFIG_PREEMPT
- printk("PREEMPT ");
-#endif
-#ifdef CONFIG_SMP
- printk("SMP NR_CPUS=%d ", NR_CPUS);
-#endif
-#ifdef CONFIG_DEBUG_PAGEALLOC
- printk("DEBUG_PAGEALLOC ");
-#endif
-#ifdef CONFIG_NUMA
- printk("NUMA ");
-#endif
- printk("%s\n", ppc_md.name ? ppc_md.name : "");
+
+ printk("%s PAGE_SIZE=%luK%s %s%s%s%s %s\n",
+ IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) ? "LE" : "BE",
+ PAGE_SIZE / 1024, get_mmu_str(),
+ IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
+ IS_ENABLED(CONFIG_SMP) ? (" NR_CPUS=" __stringify(NR_CPUS)) : "",
+ debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "",
+ IS_ENABLED(CONFIG_NUMA) ? " NUMA" : "",
+ ppc_md.name ? ppc_md.name : "");
if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV) == NOTIFY_STOP)
return 1;
@@ -219,109 +280,252 @@ static int __kprobes __die(const char *str, struct pt_regs *regs, long err)
return 0;
}
+NOKPROBE_SYMBOL(__die);
void die(const char *str, struct pt_regs *regs, long err)
{
- unsigned long flags = oops_begin(regs);
+ unsigned long flags;
+
+ /*
+ * system_reset_excption handles debugger, crash dump, panic, for 0x100
+ */
+ if (TRAP(regs) != INTERRUPT_SYSTEM_RESET) {
+ if (debugger(regs))
+ return;
+ }
+ flags = oops_begin(regs);
if (__die(str, regs, err))
err = 0;
oops_end(flags, regs, err);
}
+NOKPROBE_SYMBOL(die);
-void user_single_step_siginfo(struct task_struct *tsk,
- struct pt_regs *regs, siginfo_t *info)
+void user_single_step_report(struct pt_regs *regs)
{
- memset(info, 0, sizeof(*info));
- info->si_signo = SIGTRAP;
- info->si_code = TRAP_TRACE;
- info->si_addr = (void __user *)regs->nip;
+ force_sig_fault(SIGTRAP, TRAP_TRACE, (void __user *)regs->nip);
}
-void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
+static void show_signal_msg(int signr, struct pt_regs *regs, int code,
+ unsigned long addr)
{
- siginfo_t info;
- const char fmt32[] = KERN_INFO "%s[%d]: unhandled signal %d " \
- "at %08lx nip %08lx lr %08lx code %x\n";
- const char fmt64[] = KERN_INFO "%s[%d]: unhandled signal %d " \
- "at %016lx nip %016lx lr %016lx code %x\n";
+ static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
+ DEFAULT_RATELIMIT_BURST);
+
+ if (!show_unhandled_signals)
+ return;
+
+ if (!unhandled_signal(current, signr))
+ return;
+
+ if (!__ratelimit(&rs))
+ return;
+ pr_info("%s[%d]: %s (%d) at %lx nip %lx lr %lx code %x",
+ current->comm, current->pid, signame(signr), signr,
+ addr, regs->nip, regs->link, code);
+
+ print_vma_addr(KERN_CONT " in ", regs->nip);
+
+ pr_cont("\n");
+
+ show_user_instructions(regs);
+}
+
+static bool exception_common(int signr, struct pt_regs *regs, int code,
+ unsigned long addr)
+{
if (!user_mode(regs)) {
die("Exception in kernel mode", regs, signr);
- return;
+ return false;
}
- if (show_unhandled_signals && unhandled_signal(current, signr)) {
- printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32,
- current->comm, current->pid, signr,
- addr, regs->nip, regs->link, code);
- }
+ /*
+ * Must not enable interrupts even for user-mode exception, because
+ * this can be called from machine check, which may be a NMI or IRQ
+ * which don't like interrupts being enabled. Could check for
+ * in_hardirq || in_nmi perhaps, but there doesn't seem to be a good
+ * reason why _exception() should enable irqs for an exception handler,
+ * the handlers themselves do that directly.
+ */
- if (arch_irqs_disabled() && !arch_irq_disabled_regs(regs))
- local_irq_enable();
+ show_signal_msg(signr, regs, code, addr);
current->thread.trap_nr = code;
- memset(&info, 0, sizeof(info));
- info.si_signo = signr;
- info.si_code = code;
- info.si_addr = (void __user *) addr;
- force_sig_info(signr, &info, current);
+
+ return true;
}
-#ifdef CONFIG_PPC64
-void system_reset_exception(struct pt_regs *regs)
+void _exception_pkey(struct pt_regs *regs, unsigned long addr, int key)
{
- /* See if any machine dependent calls */
- if (ppc_md.system_reset_exception) {
- if (ppc_md.system_reset_exception(regs))
- return;
- }
+ if (!exception_common(SIGSEGV, regs, SEGV_PKUERR, addr))
+ return;
- die("System Reset", regs, SIGABRT);
+ force_sig_pkuerr((void __user *) addr, key);
+}
- /* Must die if the interrupt is not recoverable */
- if (!(regs->msr & MSR_RI))
- panic("Unrecoverable System Reset");
+void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
+{
+ if (!exception_common(signr, regs, code, addr))
+ return;
- /* What should we do here? We could issue a shutdown or hard reset. */
+ force_sig_fault(signr, code, (void __user *)addr);
}
/*
- * This function is called in real mode. Strictly no printk's please.
+ * The interrupt architecture has a quirk in that the HV interrupts excluding
+ * the NMIs (0x100 and 0x200) do not clear MSR[RI] at entry. The first thing
+ * that an interrupt handler must do is save off a GPR into a scratch register,
+ * and all interrupts on POWERNV (HV=1) use the HSPRG1 register as scratch.
+ * Therefore an NMI can clobber an HV interrupt's live HSPRG1 without noticing
+ * that it is non-reentrant, which leads to random data corruption.
+ *
+ * The solution is for NMI interrupts in HV mode to check if they originated
+ * from these critical HV interrupt regions. If so, then mark them not
+ * recoverable.
*
- * regs->nip and regs->msr contains srr0 and ssr1.
+ * An alternative would be for HV NMIs to use SPRG for scratch to avoid the
+ * HSPRG1 clobber, however this would cause guest SPRG to be clobbered. Linux
+ * guests should always have MSR[RI]=0 when its scratch SPRG is in use, so
+ * that would work. However any other guest OS that may have the SPRG live
+ * and MSR[RI]=1 could encounter silent corruption.
+ *
+ * Builds that do not support KVM could take this second option to increase
+ * the recoverability of NMIs.
*/
-long machine_check_early(struct pt_regs *regs)
+noinstr void hv_nmi_check_nonrecoverable(struct pt_regs *regs)
{
- long handled = 0;
+#ifdef CONFIG_PPC_POWERNV
+ unsigned long kbase = (unsigned long)_stext;
+ unsigned long nip = regs->nip;
- __get_cpu_var(irq_stat).mce_exceptions++;
+ if (!(regs->msr & MSR_RI))
+ return;
+ if (!(regs->msr & MSR_HV))
+ return;
+ if (user_mode(regs))
+ return;
- if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
- handled = cur_cpu_spec->machine_check_early(regs);
- return handled;
-}
+ /*
+ * Now test if the interrupt has hit a range that may be using
+ * HSPRG1 without having RI=0 (i.e., an HSRR interrupt). The
+ * problem ranges all run un-relocated. Test real and virt modes
+ * at the same time by dropping the high bit of the nip (virt mode
+ * entry points still have the +0x4000 offset).
+ */
+ nip &= ~0xc000000000000000ULL;
+ if ((nip >= 0x500 && nip < 0x600) || (nip >= 0x4500 && nip < 0x4600))
+ goto nonrecoverable;
+ if ((nip >= 0x980 && nip < 0xa00) || (nip >= 0x4980 && nip < 0x4a00))
+ goto nonrecoverable;
+ if ((nip >= 0xe00 && nip < 0xec0) || (nip >= 0x4e00 && nip < 0x4ec0))
+ goto nonrecoverable;
+ if ((nip >= 0xf80 && nip < 0xfa0) || (nip >= 0x4f80 && nip < 0x4fa0))
+ goto nonrecoverable;
+
+ /* Trampoline code runs un-relocated so subtract kbase. */
+ if (nip >= (unsigned long)(start_real_trampolines - kbase) &&
+ nip < (unsigned long)(end_real_trampolines - kbase))
+ goto nonrecoverable;
+ if (nip >= (unsigned long)(start_virt_trampolines - kbase) &&
+ nip < (unsigned long)(end_virt_trampolines - kbase))
+ goto nonrecoverable;
+ return;
-long hmi_exception_realmode(struct pt_regs *regs)
+nonrecoverable:
+ regs->msr &= ~MSR_RI;
+ local_paca->hsrr_valid = 0;
+ local_paca->srr_valid = 0;
+#endif
+}
+DEFINE_INTERRUPT_HANDLER_NMI(system_reset_exception)
{
- __get_cpu_var(irq_stat).hmi_exceptions++;
+ unsigned long hsrr0, hsrr1;
+ bool saved_hsrrs = false;
+
+ /*
+ * System reset can interrupt code where HSRRs are live and MSR[RI]=1.
+ * The system reset interrupt itself may clobber HSRRs (e.g., to call
+ * OPAL), so save them here and restore them before returning.
+ *
+ * Machine checks don't need to save HSRRs, as the real mode handler
+ * is careful to avoid them, and the regular handler is not delivered
+ * as an NMI.
+ */
+ if (cpu_has_feature(CPU_FTR_HVMODE)) {
+ hsrr0 = mfspr(SPRN_HSRR0);
+ hsrr1 = mfspr(SPRN_HSRR1);
+ saved_hsrrs = true;
+ }
- if (ppc_md.hmi_exception_early)
- ppc_md.hmi_exception_early(regs);
+ hv_nmi_check_nonrecoverable(regs);
- return 0;
-}
+ __this_cpu_inc(irq_stat.sreset_irqs);
+
+ /* See if any machine dependent calls */
+ if (ppc_md.system_reset_exception) {
+ if (ppc_md.system_reset_exception(regs))
+ goto out;
+ }
+
+ if (debugger(regs))
+ goto out;
+
+ kmsg_dump(KMSG_DUMP_OOPS);
+ /*
+ * A system reset is a request to dump, so we always send
+ * it through the crashdump code (if fadump or kdump are
+ * registered).
+ */
+ crash_fadump(regs, "System Reset");
+ crash_kexec(regs);
+
+ /*
+ * We aren't the primary crash CPU. We need to send it
+ * to a holding pattern to avoid it ending up in the panic
+ * code.
+ */
+ crash_kexec_secondary(regs);
+
+ /*
+ * No debugger or crash dump registered, print logs then
+ * panic.
+ */
+ die("System Reset", regs, SIGABRT);
+
+ mdelay(2*MSEC_PER_SEC); /* Wait a little while for others to print */
+ add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
+ nmi_panic(regs, "System Reset");
+
+out:
+#ifdef CONFIG_PPC_BOOK3S_64
+ BUG_ON(get_paca()->in_nmi == 0);
+ if (get_paca()->in_nmi > 1)
+ die("Unrecoverable nested System Reset", regs, SIGABRT);
#endif
+ /* Must die if the interrupt is not recoverable */
+ if (regs_is_unrecoverable(regs)) {
+ /* For the reason explained in die_mce, nmi_exit before die */
+ nmi_exit();
+ die("Unrecoverable System Reset", regs, SIGABRT);
+ }
+
+ if (saved_hsrrs) {
+ mtspr(SPRN_HSRR0, hsrr0);
+ mtspr(SPRN_HSRR1, hsrr1);
+ }
+
+ /* What should we do here? We could issue a shutdown or hard reset. */
+
+ return 0;
+}
/*
* I/O accesses can cause machine checks on powermacs.
* Check if the NIP corresponds to the address of a sync
* instruction for which there is an entry in the exception
* table.
- * Note that the 601 only takes a machine check on TEA
- * (transfer error ack) signal assertion, and does not
- * set any of the top 16 bits of SRR1.
* -- paulus.
*/
static inline int check_io_access(struct pt_regs *regs)
@@ -341,12 +545,11 @@ static inline int check_io_access(struct pt_regs *regs)
* For the debug message, we look at the preceding
* load or store.
*/
- if (*nip == 0x60000000) /* nop */
+ if (*nip == PPC_RAW_NOP())
nip -= 2;
- else if (*nip == 0x4c00012c) /* isync */
+ else if (*nip == PPC_RAW_ISYNC())
--nip;
- if (*nip == 0x7c0004ac || (*nip >> 26) == 3) {
- /* sync or twi */
+ if (*nip == PPC_RAW_SYNC() || get_op(*nip) == OP_TRAP) {
unsigned int rb;
--nip;
@@ -354,8 +557,8 @@ static inline int check_io_access(struct pt_regs *regs)
printk(KERN_DEBUG "%s bad port %lx at %p\n",
(*nip & 0x100)? "OUT to": "IN from",
regs->gpr[rb] - _IO_BASE, nip);
- regs->msr |= MSR_RI;
- regs->nip = entry->fixup;
+ regs_set_recoverable(regs);
+ regs_set_return_ip(regs, extable_fixup(entry));
return 1;
}
}
@@ -366,130 +569,42 @@ static inline int check_io_access(struct pt_regs *regs)
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
/* On 4xx, the reason for the machine check or program exception
is in the ESR. */
-#define get_reason(regs) ((regs)->dsisr)
-#ifndef CONFIG_FSL_BOOKE
-#define get_mc_reason(regs) ((regs)->dsisr)
-#else
-#define get_mc_reason(regs) (mfspr(SPRN_MCSR))
-#endif
+#define get_reason(regs) ((regs)->esr)
#define REASON_FP ESR_FP
#define REASON_ILLEGAL (ESR_PIL | ESR_PUO)
#define REASON_PRIVILEGED ESR_PPR
#define REASON_TRAP ESR_PTR
+#define REASON_PREFIXED 0
+#define REASON_BOUNDARY 0
/* single-step stuff */
#define single_stepping(regs) (current->thread.debug.dbcr0 & DBCR0_IC)
#define clear_single_step(regs) (current->thread.debug.dbcr0 &= ~DBCR0_IC)
-
+#define clear_br_trace(regs) do {} while(0)
#else
/* On non-4xx, the reason for the machine check or program
exception is in the MSR. */
#define get_reason(regs) ((regs)->msr)
-#define get_mc_reason(regs) ((regs)->msr)
-#define REASON_TM 0x200000
-#define REASON_FP 0x100000
-#define REASON_ILLEGAL 0x80000
-#define REASON_PRIVILEGED 0x40000
-#define REASON_TRAP 0x20000
+#define REASON_TM SRR1_PROGTM
+#define REASON_FP SRR1_PROGFPE
+#define REASON_ILLEGAL SRR1_PROGILL
+#define REASON_PRIVILEGED SRR1_PROGPRIV
+#define REASON_TRAP SRR1_PROGTRAP
+#define REASON_PREFIXED SRR1_PREFIXED
+#define REASON_BOUNDARY SRR1_BOUNDARY
#define single_stepping(regs) ((regs)->msr & MSR_SE)
-#define clear_single_step(regs) ((regs)->msr &= ~MSR_SE)
+#define clear_single_step(regs) (regs_set_return_msr((regs), (regs)->msr & ~MSR_SE))
+#define clear_br_trace(regs) (regs_set_return_msr((regs), (regs)->msr & ~MSR_BE))
#endif
-#if defined(CONFIG_4xx)
-int machine_check_4xx(struct pt_regs *regs)
-{
- unsigned long reason = get_mc_reason(regs);
-
- if (reason & ESR_IMCP) {
- printk("Instruction");
- mtspr(SPRN_ESR, reason & ~ESR_IMCP);
- } else
- printk("Data");
- printk(" machine check in kernel mode.\n");
-
- return 0;
-}
-
-int machine_check_440A(struct pt_regs *regs)
-{
- unsigned long reason = get_mc_reason(regs);
+#define inst_length(reason) (((reason) & REASON_PREFIXED) ? 8 : 4)
- printk("Machine check in kernel mode.\n");
- if (reason & ESR_IMCP){
- printk("Instruction Synchronous Machine Check exception\n");
- mtspr(SPRN_ESR, reason & ~ESR_IMCP);
- }
- else {
- u32 mcsr = mfspr(SPRN_MCSR);
- if (mcsr & MCSR_IB)
- printk("Instruction Read PLB Error\n");
- if (mcsr & MCSR_DRB)
- printk("Data Read PLB Error\n");
- if (mcsr & MCSR_DWB)
- printk("Data Write PLB Error\n");
- if (mcsr & MCSR_TLBP)
- printk("TLB Parity Error\n");
- if (mcsr & MCSR_ICP){
- flush_instruction_cache();
- printk("I-Cache Parity Error\n");
- }
- if (mcsr & MCSR_DCSP)
- printk("D-Cache Search Parity Error\n");
- if (mcsr & MCSR_DCFP)
- printk("D-Cache Flush Parity Error\n");
- if (mcsr & MCSR_IMPE)
- printk("Machine Check exception is imprecise\n");
-
- /* Clear MCSR */
- mtspr(SPRN_MCSR, mcsr);
- }
- return 0;
-}
-
-int machine_check_47x(struct pt_regs *regs)
-{
- unsigned long reason = get_mc_reason(regs);
- u32 mcsr;
-
- printk(KERN_ERR "Machine check in kernel mode.\n");
- if (reason & ESR_IMCP) {
- printk(KERN_ERR
- "Instruction Synchronous Machine Check exception\n");
- mtspr(SPRN_ESR, reason & ~ESR_IMCP);
- return 0;
- }
- mcsr = mfspr(SPRN_MCSR);
- if (mcsr & MCSR_IB)
- printk(KERN_ERR "Instruction Read PLB Error\n");
- if (mcsr & MCSR_DRB)
- printk(KERN_ERR "Data Read PLB Error\n");
- if (mcsr & MCSR_DWB)
- printk(KERN_ERR "Data Write PLB Error\n");
- if (mcsr & MCSR_TLBP)
- printk(KERN_ERR "TLB Parity Error\n");
- if (mcsr & MCSR_ICP) {
- flush_instruction_cache();
- printk(KERN_ERR "I-Cache Parity Error\n");
- }
- if (mcsr & MCSR_DCSP)
- printk(KERN_ERR "D-Cache Search Parity Error\n");
- if (mcsr & PPC47x_MCSR_GPR)
- printk(KERN_ERR "GPR Parity Error\n");
- if (mcsr & PPC47x_MCSR_FPR)
- printk(KERN_ERR "FPR Parity Error\n");
- if (mcsr & PPC47x_MCSR_IPR)
- printk(KERN_ERR "Machine Check exception is imprecise\n");
-
- /* Clear MCSR */
- mtspr(SPRN_MCSR, mcsr);
-
- return 0;
-}
-#elif defined(CONFIG_E500)
+#if defined(CONFIG_PPC_E500)
int machine_check_e500mc(struct pt_regs *regs)
{
unsigned long mcsr = mfspr(SPRN_MCSR);
+ unsigned long pvr = mfspr(SPRN_PVR);
unsigned long reason = mcsr;
int recoverable = 1;
@@ -503,10 +618,10 @@ int machine_check_e500mc(struct pt_regs *regs)
printk("Caused by (from MCSR=%lx): ", reason);
if (reason & MCSR_MCP)
- printk("Machine Check Signal\n");
+ pr_cont("Machine Check Signal\n");
if (reason & MCSR_ICPERR) {
- printk("Instruction Cache Parity Error\n");
+ pr_cont("Instruction Cache Parity Error\n");
/*
* This is recoverable by invalidating the i-cache.
@@ -524,50 +639,57 @@ int machine_check_e500mc(struct pt_regs *regs)
}
if (reason & MCSR_DCPERR_MC) {
- printk("Data Cache Parity Error\n");
+ pr_cont("Data Cache Parity Error\n");
/*
* In write shadow mode we auto-recover from the error, but it
* may still get logged and cause a machine check. We should
* only treat the non-write shadow case as non-recoverable.
*/
- if (!(mfspr(SPRN_L1CSR2) & L1CSR2_DCWS))
- recoverable = 0;
+ /* On e6500 core, L1 DCWS (Data cache write shadow mode) bit
+ * is not implemented but L1 data cache always runs in write
+ * shadow mode. Hence on data cache parity errors HW will
+ * automatically invalidate the L1 Data Cache.
+ */
+ if (PVR_VER(pvr) != PVR_VER_E6500) {
+ if (!(mfspr(SPRN_L1CSR2) & L1CSR2_DCWS))
+ recoverable = 0;
+ }
}
if (reason & MCSR_L2MMU_MHIT) {
- printk("Hit on multiple TLB entries\n");
+ pr_cont("Hit on multiple TLB entries\n");
recoverable = 0;
}
if (reason & MCSR_NMI)
- printk("Non-maskable interrupt\n");
+ pr_cont("Non-maskable interrupt\n");
if (reason & MCSR_IF) {
- printk("Instruction Fetch Error Report\n");
+ pr_cont("Instruction Fetch Error Report\n");
recoverable = 0;
}
if (reason & MCSR_LD) {
- printk("Load Error Report\n");
+ pr_cont("Load Error Report\n");
recoverable = 0;
}
if (reason & MCSR_ST) {
- printk("Store Error Report\n");
+ pr_cont("Store Error Report\n");
recoverable = 0;
}
if (reason & MCSR_LDG) {
- printk("Guarded Load Error Report\n");
+ pr_cont("Guarded Load Error Report\n");
recoverable = 0;
}
if (reason & MCSR_TLBSYNC)
- printk("Simultaneous tlbsync operations\n");
+ pr_cont("Simultaneous tlbsync operations\n");
if (reason & MCSR_BSL2_ERR) {
- printk("Level 2 Cache Error\n");
+ pr_cont("Level 2 Cache Error\n");
recoverable = 0;
}
@@ -577,7 +699,7 @@ int machine_check_e500mc(struct pt_regs *regs)
addr = mfspr(SPRN_MCAR);
addr |= (u64)mfspr(SPRN_MCARU) << 32;
- printk("Machine Check %s Address: %#llx\n",
+ pr_cont("Machine Check %s Address: %#llx\n",
reason & MCSR_MEA ? "Effective" : "Physical", addr);
}
@@ -588,7 +710,7 @@ silent_out:
int machine_check_e500(struct pt_regs *regs)
{
- unsigned long reason = get_mc_reason(regs);
+ unsigned long reason = mfspr(SPRN_MCSR);
if (reason & MCSR_BUS_RBERR) {
if (fsl_rio_mcheck_exception(regs))
@@ -601,29 +723,29 @@ int machine_check_e500(struct pt_regs *regs)
printk("Caused by (from MCSR=%lx): ", reason);
if (reason & MCSR_MCP)
- printk("Machine Check Signal\n");
+ pr_cont("Machine Check Signal\n");
if (reason & MCSR_ICPERR)
- printk("Instruction Cache Parity Error\n");
+ pr_cont("Instruction Cache Parity Error\n");
if (reason & MCSR_DCP_PERR)
- printk("Data Cache Push Parity Error\n");
+ pr_cont("Data Cache Push Parity Error\n");
if (reason & MCSR_DCPERR)
- printk("Data Cache Parity Error\n");
+ pr_cont("Data Cache Parity Error\n");
if (reason & MCSR_BUS_IAERR)
- printk("Bus - Instruction Address Error\n");
+ pr_cont("Bus - Instruction Address Error\n");
if (reason & MCSR_BUS_RAERR)
- printk("Bus - Read Address Error\n");
+ pr_cont("Bus - Read Address Error\n");
if (reason & MCSR_BUS_WAERR)
- printk("Bus - Write Address Error\n");
+ pr_cont("Bus - Write Address Error\n");
if (reason & MCSR_BUS_IBERR)
- printk("Bus - Instruction Data Error\n");
+ pr_cont("Bus - Instruction Data Error\n");
if (reason & MCSR_BUS_RBERR)
- printk("Bus - Read Data Bus Error\n");
+ pr_cont("Bus - Read Data Bus Error\n");
if (reason & MCSR_BUS_WBERR)
- printk("Bus - Write Data Bus Error\n");
+ pr_cont("Bus - Write Data Bus Error\n");
if (reason & MCSR_BUS_IPERR)
- printk("Bus - Instruction Parity Error\n");
+ pr_cont("Bus - Instruction Parity Error\n");
if (reason & MCSR_BUS_RPERR)
- printk("Bus - Read Parity Error\n");
+ pr_cont("Bus - Read Parity Error\n");
return 0;
}
@@ -632,75 +754,72 @@ int machine_check_generic(struct pt_regs *regs)
{
return 0;
}
-#elif defined(CONFIG_E200)
-int machine_check_e200(struct pt_regs *regs)
-{
- unsigned long reason = get_mc_reason(regs);
-
- printk("Machine check in kernel mode.\n");
- printk("Caused by (from MCSR=%lx): ", reason);
-
- if (reason & MCSR_MCP)
- printk("Machine Check Signal\n");
- if (reason & MCSR_CP_PERR)
- printk("Cache Push Parity Error\n");
- if (reason & MCSR_CPERR)
- printk("Cache Parity Error\n");
- if (reason & MCSR_EXCP_ERR)
- printk("ISI, ITLB, or Bus Error on first instruction fetch for an exception handler\n");
- if (reason & MCSR_BUS_IRERR)
- printk("Bus - Read Bus Error on instruction fetch\n");
- if (reason & MCSR_BUS_DRERR)
- printk("Bus - Read Bus Error on data load\n");
- if (reason & MCSR_BUS_WRERR)
- printk("Bus - Write Bus Error on buffered store or cache line push\n");
-
- return 0;
-}
-#else
+#elif defined(CONFIG_PPC32)
int machine_check_generic(struct pt_regs *regs)
{
- unsigned long reason = get_mc_reason(regs);
+ unsigned long reason = regs->msr;
printk("Machine check in kernel mode.\n");
printk("Caused by (from SRR1=%lx): ", reason);
switch (reason & 0x601F0000) {
case 0x80000:
- printk("Machine check signal\n");
+ pr_cont("Machine check signal\n");
break;
- case 0: /* for 601 */
case 0x40000:
case 0x140000: /* 7450 MSS error and TEA */
- printk("Transfer error ack signal\n");
+ pr_cont("Transfer error ack signal\n");
break;
case 0x20000:
- printk("Data parity error signal\n");
+ pr_cont("Data parity error signal\n");
break;
case 0x10000:
- printk("Address parity error signal\n");
+ pr_cont("Address parity error signal\n");
break;
case 0x20000000:
- printk("L1 Data Cache error\n");
+ pr_cont("L1 Data Cache error\n");
break;
case 0x40000000:
- printk("L1 Instruction Cache error\n");
+ pr_cont("L1 Instruction Cache error\n");
break;
case 0x00100000:
- printk("L2 data cache parity error\n");
+ pr_cont("L2 data cache parity error\n");
break;
default:
- printk("Unknown values in msr\n");
+ pr_cont("Unknown values in msr\n");
}
return 0;
}
#endif /* everything else */
-void machine_check_exception(struct pt_regs *regs)
+void die_mce(const char *str, struct pt_regs *regs, long err)
+{
+ /*
+ * The machine check wants to kill the interrupted context,
+ * but make_task_dead() checks for in_interrupt() and panics
+ * in that case, so exit the irq/nmi before calling die.
+ */
+ if (in_nmi())
+ nmi_exit();
+ else
+ irq_exit();
+ die(str, regs, err);
+}
+
+/*
+ * BOOK3S_64 does not usually call this handler as a non-maskable interrupt
+ * (it uses its own early real-mode handler to handle the MCE proper
+ * and then raises irq_work to call this handler when interrupts are
+ * enabled). The only time when this is not true is if the early handler
+ * is unrecoverable, then it does call this directly to try to get a
+ * message out.
+ */
+static void __machine_check_exception(struct pt_regs *regs)
{
- enum ctx_state prev_state = exception_enter();
int recover = 0;
- __get_cpu_var(irq_stat).mce_exceptions++;
+ __this_cpu_inc(irq_stat.mce_exceptions);
+
+ add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
/* See if any machine dependent calls. In theory, we would want
* to call the CPU first, and call the ppc_md. one if the CPU
@@ -716,100 +835,320 @@ void machine_check_exception(struct pt_regs *regs)
if (recover > 0)
goto bail;
-#if defined(CONFIG_8xx) && defined(CONFIG_PCI)
- /* the qspan pci read routines can cause machine checks -- Cort
- *
- * yuck !!! that totally needs to go away ! There are better ways
- * to deal with that than having a wart in the mcheck handler.
- * -- BenH
- */
- bad_page_fault(regs, regs->dar, SIGBUS);
- goto bail;
-#endif
-
if (debugger_fault_handler(regs))
goto bail;
if (check_io_access(regs))
goto bail;
- die("Machine check", regs, SIGBUS);
+ die_mce("Machine check", regs, SIGBUS);
+bail:
/* Must die if the interrupt is not recoverable */
- if (!(regs->msr & MSR_RI))
- panic("Unrecoverable Machine check");
+ if (regs_is_unrecoverable(regs))
+ die_mce("Unrecoverable Machine check", regs, SIGBUS);
+}
-bail:
- exception_exit(prev_state);
+#ifdef CONFIG_PPC_BOOK3S_64
+DEFINE_INTERRUPT_HANDLER_RAW(machine_check_early_boot)
+{
+ udbg_printf("Machine check (early boot)\n");
+ udbg_printf("SRR0=0x%016lx SRR1=0x%016lx\n", regs->nip, regs->msr);
+ udbg_printf(" DAR=0x%016lx DSISR=0x%08lx\n", regs->dar, regs->dsisr);
+ udbg_printf(" LR=0x%016lx R1=0x%08lx\n", regs->link, regs->gpr[1]);
+ udbg_printf("------\n");
+ die("Machine check (early boot)", regs, SIGBUS);
+ for (;;)
+ ;
+ return 0;
+}
+
+DEFINE_INTERRUPT_HANDLER_ASYNC(machine_check_exception_async)
+{
+ __machine_check_exception(regs);
+}
+#endif
+DEFINE_INTERRUPT_HANDLER_NMI(machine_check_exception)
+{
+ __machine_check_exception(regs);
+
+ return 0;
}
-void SMIException(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(SMIException) /* async? */
{
die("System Management Interrupt", regs, SIGABRT);
}
-void handle_hmi_exception(struct pt_regs *regs)
+#ifdef CONFIG_VSX
+static void p9_hmi_special_emu(struct pt_regs *regs)
+{
+ unsigned int ra, rb, t, i, sel, instr, rc;
+ const void __user *addr;
+ u8 vbuf[16] __aligned(16), *vdst;
+ unsigned long ea, msr, msr_mask;
+ bool swap;
+
+ if (__get_user(instr, (unsigned int __user *)regs->nip))
+ return;
+
+ /*
+ * lxvb16x opcode: 0x7c0006d8
+ * lxvd2x opcode: 0x7c000698
+ * lxvh8x opcode: 0x7c000658
+ * lxvw4x opcode: 0x7c000618
+ */
+ if ((instr & 0xfc00073e) != 0x7c000618) {
+ pr_devel("HMI vec emu: not vector CI %i:%s[%d] nip=%016lx"
+ " instr=%08x\n",
+ smp_processor_id(), current->comm, current->pid,
+ regs->nip, instr);
+ return;
+ }
+
+ /* Grab vector registers into the task struct */
+ msr = regs->msr; /* Grab msr before we flush the bits */
+ flush_vsx_to_thread(current);
+ enable_kernel_altivec();
+
+ /*
+ * Is userspace running with a different endian (this is rare but
+ * not impossible)
+ */
+ swap = (msr & MSR_LE) != (MSR_KERNEL & MSR_LE);
+
+ /* Decode the instruction */
+ ra = (instr >> 16) & 0x1f;
+ rb = (instr >> 11) & 0x1f;
+ t = (instr >> 21) & 0x1f;
+ if (instr & 1)
+ vdst = (u8 *)&current->thread.vr_state.vr[t];
+ else
+ vdst = (u8 *)&current->thread.fp_state.fpr[t][0];
+
+ /* Grab the vector address */
+ ea = regs->gpr[rb] + (ra ? regs->gpr[ra] : 0);
+ if (is_32bit_task())
+ ea &= 0xfffffffful;
+ addr = (__force const void __user *)ea;
+
+ /* Check it */
+ if (!access_ok(addr, 16)) {
+ pr_devel("HMI vec emu: bad access %i:%s[%d] nip=%016lx"
+ " instr=%08x addr=%016lx\n",
+ smp_processor_id(), current->comm, current->pid,
+ regs->nip, instr, (unsigned long)addr);
+ return;
+ }
+
+ /* Read the vector */
+ rc = 0;
+ if ((unsigned long)addr & 0xfUL)
+ /* unaligned case */
+ rc = __copy_from_user_inatomic(vbuf, addr, 16);
+ else
+ __get_user_atomic_128_aligned(vbuf, addr, rc);
+ if (rc) {
+ pr_devel("HMI vec emu: page fault %i:%s[%d] nip=%016lx"
+ " instr=%08x addr=%016lx\n",
+ smp_processor_id(), current->comm, current->pid,
+ regs->nip, instr, (unsigned long)addr);
+ return;
+ }
+
+ pr_devel("HMI vec emu: emulated vector CI %i:%s[%d] nip=%016lx"
+ " instr=%08x addr=%016lx\n",
+ smp_processor_id(), current->comm, current->pid, regs->nip,
+ instr, (unsigned long) addr);
+
+ /* Grab instruction "selector" */
+ sel = (instr >> 6) & 3;
+
+ /*
+ * Check to make sure the facility is actually enabled. This
+ * could happen if we get a false positive hit.
+ *
+ * lxvd2x/lxvw4x always check MSR VSX sel = 0,2
+ * lxvh8x/lxvb16x check MSR VSX or VEC depending on VSR used sel = 1,3
+ */
+ msr_mask = MSR_VSX;
+ if ((sel & 1) && (instr & 1)) /* lxvh8x & lxvb16x + VSR >= 32 */
+ msr_mask = MSR_VEC;
+ if (!(msr & msr_mask)) {
+ pr_devel("HMI vec emu: MSR fac clear %i:%s[%d] nip=%016lx"
+ " instr=%08x msr:%016lx\n",
+ smp_processor_id(), current->comm, current->pid,
+ regs->nip, instr, msr);
+ return;
+ }
+
+ /* Do logging here before we modify sel based on endian */
+ switch (sel) {
+ case 0: /* lxvw4x */
+ PPC_WARN_EMULATED(lxvw4x, regs);
+ break;
+ case 1: /* lxvh8x */
+ PPC_WARN_EMULATED(lxvh8x, regs);
+ break;
+ case 2: /* lxvd2x */
+ PPC_WARN_EMULATED(lxvd2x, regs);
+ break;
+ case 3: /* lxvb16x */
+ PPC_WARN_EMULATED(lxvb16x, regs);
+ break;
+ }
+
+#ifdef __LITTLE_ENDIAN__
+ /*
+ * An LE kernel stores the vector in the task struct as an LE
+ * byte array (effectively swapping both the components and
+ * the content of the components). Those instructions expect
+ * the components to remain in ascending address order, so we
+ * swap them back.
+ *
+ * If we are running a BE user space, the expectation is that
+ * of a simple memcpy, so forcing the emulation to look like
+ * a lxvb16x should do the trick.
+ */
+ if (swap)
+ sel = 3;
+
+ switch (sel) {
+ case 0: /* lxvw4x */
+ for (i = 0; i < 4; i++)
+ ((u32 *)vdst)[i] = ((u32 *)vbuf)[3-i];
+ break;
+ case 1: /* lxvh8x */
+ for (i = 0; i < 8; i++)
+ ((u16 *)vdst)[i] = ((u16 *)vbuf)[7-i];
+ break;
+ case 2: /* lxvd2x */
+ for (i = 0; i < 2; i++)
+ ((u64 *)vdst)[i] = ((u64 *)vbuf)[1-i];
+ break;
+ case 3: /* lxvb16x */
+ for (i = 0; i < 16; i++)
+ vdst[i] = vbuf[15-i];
+ break;
+ }
+#else /* __LITTLE_ENDIAN__ */
+ /* On a big endian kernel, a BE userspace only needs a memcpy */
+ if (!swap)
+ sel = 3;
+
+ /* Otherwise, we need to swap the content of the components */
+ switch (sel) {
+ case 0: /* lxvw4x */
+ for (i = 0; i < 4; i++)
+ ((u32 *)vdst)[i] = cpu_to_le32(((u32 *)vbuf)[i]);
+ break;
+ case 1: /* lxvh8x */
+ for (i = 0; i < 8; i++)
+ ((u16 *)vdst)[i] = cpu_to_le16(((u16 *)vbuf)[i]);
+ break;
+ case 2: /* lxvd2x */
+ for (i = 0; i < 2; i++)
+ ((u64 *)vdst)[i] = cpu_to_le64(((u64 *)vbuf)[i]);
+ break;
+ case 3: /* lxvb16x */
+ memcpy(vdst, vbuf, 16);
+ break;
+ }
+#endif /* !__LITTLE_ENDIAN__ */
+
+ /* Go to next instruction */
+ regs_add_return_ip(regs, 4);
+}
+#endif /* CONFIG_VSX */
+
+DEFINE_INTERRUPT_HANDLER_ASYNC(handle_hmi_exception)
{
struct pt_regs *old_regs;
old_regs = set_irq_regs(regs);
- irq_enter();
+
+#ifdef CONFIG_VSX
+ /* Real mode flagged P9 special emu is needed */
+ if (local_paca->hmi_p9_special_emu) {
+ local_paca->hmi_p9_special_emu = 0;
+
+ /*
+ * We don't want to take page faults while doing the
+ * emulation, we just replay the instruction if necessary.
+ */
+ pagefault_disable();
+ p9_hmi_special_emu(regs);
+ pagefault_enable();
+ }
+#endif /* CONFIG_VSX */
if (ppc_md.handle_hmi_exception)
ppc_md.handle_hmi_exception(regs);
- irq_exit();
set_irq_regs(old_regs);
}
-void unknown_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(unknown_exception)
{
- enum ctx_state prev_state = exception_enter();
-
printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
regs->nip, regs->msr, regs->trap);
- _exception(SIGTRAP, regs, 0, 0);
+ _exception(SIGTRAP, regs, TRAP_UNK, 0);
+}
- exception_exit(prev_state);
+DEFINE_INTERRUPT_HANDLER_ASYNC(unknown_async_exception)
+{
+ printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
+ regs->nip, regs->msr, regs->trap);
+
+ _exception(SIGTRAP, regs, TRAP_UNK, 0);
}
-void instruction_breakpoint_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER_NMI(unknown_nmi_exception)
{
- enum ctx_state prev_state = exception_enter();
+ printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
+ regs->nip, regs->msr, regs->trap);
+ _exception(SIGTRAP, regs, TRAP_UNK, 0);
+
+ return 0;
+}
+
+DEFINE_INTERRUPT_HANDLER(instruction_breakpoint_exception)
+{
if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5,
5, SIGTRAP) == NOTIFY_STOP)
- goto bail;
+ return;
if (debugger_iabr_match(regs))
- goto bail;
+ return;
_exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
-
-bail:
- exception_exit(prev_state);
}
-void RunModeException(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(RunModeException)
{
- _exception(SIGTRAP, regs, 0, 0);
+ _exception(SIGTRAP, regs, TRAP_UNK, 0);
}
-void __kprobes single_step_exception(struct pt_regs *regs)
+static void __single_step_exception(struct pt_regs *regs)
{
- enum ctx_state prev_state = exception_enter();
-
clear_single_step(regs);
+ clear_br_trace(regs);
+
+ if (kprobe_post_handler(regs))
+ return;
if (notify_die(DIE_SSTEP, "single_step", regs, 5,
5, SIGTRAP) == NOTIFY_STOP)
- goto bail;
+ return;
if (debugger_sstep(regs))
- goto bail;
+ return;
_exception(SIGTRAP, regs, TRAP_TRACE, regs->nip);
+}
-bail:
- exception_exit(prev_state);
+DEFINE_INTERRUPT_HANDLER(single_step_exception)
+{
+ __single_step_exception(regs);
}
/*
@@ -818,15 +1157,16 @@ bail:
* pretend we got a single-step exception. This was pointed out
* by Kumar Gala. -- paulus
*/
-static void emulate_single_step(struct pt_regs *regs)
+void emulate_single_step(struct pt_regs *regs)
{
if (single_stepping(regs))
- single_step_exception(regs);
+ __single_step_exception(regs);
}
+#ifdef CONFIG_PPC_FPU_REGS
static inline int __parse_fpscr(unsigned long fpscr)
{
- int ret = 0;
+ int ret = FPE_FLTUNK;
/* Invalid operation */
if ((fpscr & FPSCR_VE) && (fpscr & FPSCR_VX))
@@ -850,6 +1190,7 @@ static inline int __parse_fpscr(unsigned long fpscr)
return ret;
}
+#endif
static void parse_fpe(struct pt_regs *regs)
{
@@ -857,7 +1198,9 @@ static void parse_fpe(struct pt_regs *regs)
flush_fp_to_thread(current);
+#ifdef CONFIG_PPC_FPU_REGS
code = __parse_fpscr(current->thread.fp_state.fpscr);
+#endif
_exception(SIGFPE, regs, code, regs->nip);
}
@@ -1008,7 +1351,6 @@ static int emulate_instruction(struct pt_regs *regs)
if (!user_mode(regs))
return -EINVAL;
- CHECK_FULL_REGS(regs);
if (get_user(instword, (u32 __user *)(regs->nip)))
return -EFAULT;
@@ -1096,16 +1438,17 @@ static int emulate_instruction(struct pt_regs *regs)
return -EINVAL;
}
+#ifdef CONFIG_GENERIC_BUG
int is_valid_bugaddr(unsigned long addr)
{
return is_kernel_addr(addr);
}
+#endif
#ifdef CONFIG_MATH_EMULATION
static int emulate_math(struct pt_regs *regs)
{
int ret;
- extern int do_mathemu(struct pt_regs *regs);
ret = do_mathemu(regs);
if (ret >= 0)
@@ -1132,9 +1475,8 @@ static int emulate_math(struct pt_regs *regs)
static inline int emulate_math(struct pt_regs *regs) { return -1; }
#endif
-void __kprobes program_check_exception(struct pt_regs *regs)
+static void do_program_check(struct pt_regs *regs)
{
- enum ctx_state prev_state = exception_enter();
unsigned int reason = get_reason(regs);
/* We can now get here via a FP Unavailable exception if the core
@@ -1143,26 +1485,41 @@ void __kprobes program_check_exception(struct pt_regs *regs)
if (reason & REASON_FP) {
/* IEEE FP exception */
parse_fpe(regs);
- goto bail;
+ return;
}
if (reason & REASON_TRAP) {
+ unsigned long bugaddr;
/* Debugger is first in line to stop recursive faults in
* rcu_lock, notify_die, or atomic_notifier_call_chain */
if (debugger_bpt(regs))
- goto bail;
+ return;
+
+ if (kprobe_handler(regs))
+ return;
/* trap exception */
if (notify_die(DIE_BPT, "breakpoint", regs, 5, 5, SIGTRAP)
== NOTIFY_STOP)
- goto bail;
+ return;
+
+ bugaddr = regs->nip;
+ /*
+ * Fixup bugaddr for BUG_ON() in real mode
+ */
+ if (!is_kernel_addr(bugaddr) && !(regs->msr & MSR_IR))
+ bugaddr += PAGE_OFFSET;
- if (!(regs->msr & MSR_PR) && /* not user-mode */
- report_bug(regs->nip, regs) == BUG_TRAP_TYPE_WARN) {
- regs->nip += 4;
- goto bail;
+ if (!user_mode(regs) &&
+ report_bug(bugaddr, regs) == BUG_TRAP_TYPE_WARN) {
+ regs_add_return_ip(regs, 4);
+ return;
+ }
+
+ /* User mode considers other cases after enabling IRQs */
+ if (!user_mode(regs)) {
+ _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
+ return;
}
- _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
- goto bail;
}
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
if (reason & REASON_TM) {
@@ -1174,13 +1531,8 @@ void __kprobes program_check_exception(struct pt_regs *regs)
* - A treclaim is attempted when non transactional.
* - A tend is illegally attempted.
* - writing a TM SPR when transactional.
- */
- if (!user_mode(regs) &&
- report_bug(regs->nip, regs) == BUG_TRAP_TYPE_WARN) {
- regs->nip += 4;
- goto bail;
- }
- /* If usermode caused this, it's done something illegal and
+ *
+ * If usermode caused this, it's done something illegal and
* gets a SIGILL slap on the wrist. We call it an illegal
* operand to distinguish from the instruction just being bad
* (e.g. executing a 'tend' on a CPU without TM!); it's an
@@ -1188,10 +1540,11 @@ void __kprobes program_check_exception(struct pt_regs *regs)
*/
if (user_mode(regs)) {
_exception(SIGILL, regs, ILL_ILLOPN, regs->nip);
- goto bail;
+ return;
} else {
printk(KERN_EMERG "Unexpected TM Bad Thing exception "
- "at %lx (msr 0x%x)\n", regs->nip, reason);
+ "at %lx (msr 0x%lx) tm_scratch=%llx\n",
+ regs->nip, regs->msr, get_paca()->tm_scratch);
die("Unrecoverable exception", regs, SIGABRT);
}
}
@@ -1199,17 +1552,43 @@ void __kprobes program_check_exception(struct pt_regs *regs)
/*
* If we took the program check in the kernel skip down to sending a
- * SIGILL. The subsequent cases all relate to emulating instructions
- * which we should only do for userspace. We also do not want to enable
- * interrupts for kernel faults because that might lead to further
- * faults, and loose the context of the original exception.
+ * SIGILL. The subsequent cases all relate to user space, such as
+ * emulating instructions which we should only do for user space. We
+ * also do not want to enable interrupts for kernel faults because that
+ * might lead to further faults, and loose the context of the original
+ * exception.
*/
if (!user_mode(regs))
goto sigill;
- /* We restore the interrupt state now */
- if (!arch_irq_disabled_regs(regs))
- local_irq_enable();
+ interrupt_cond_local_irq_enable(regs);
+
+ /*
+ * (reason & REASON_TRAP) is mostly handled before enabling IRQs,
+ * except get_user_instr() can sleep so we cannot reliably inspect the
+ * current instruction in that context. Now that we know we are
+ * handling a user space trap and can sleep, we can check if the trap
+ * was a hashchk failure.
+ */
+ if (reason & REASON_TRAP) {
+ if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE)) {
+ ppc_inst_t insn;
+
+ if (get_user_instr(insn, (void __user *)regs->nip)) {
+ _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip);
+ return;
+ }
+
+ if (ppc_inst_primary_opcode(insn) == 31 &&
+ get_xop(ppc_inst_val(insn)) == OP_31_XOP_HASHCHK) {
+ _exception(SIGILL, regs, ILL_ILLOPN, regs->nip);
+ return;
+ }
+ }
+
+ _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
+ return;
+ }
/* (reason & REASON_ILLEGAL) would be the obvious thing here,
* but there seems to be a hardware bug on the 405GP (RevD)
@@ -1220,18 +1599,18 @@ void __kprobes program_check_exception(struct pt_regs *regs)
* pattern to occurrences etc. -dgibson 31/Mar/2003
*/
if (!emulate_math(regs))
- goto bail;
+ return;
/* Try to emulate it if we should. */
if (reason & (REASON_ILLEGAL | REASON_PRIVILEGED)) {
switch (emulate_instruction(regs)) {
case 0:
- regs->nip += 4;
+ regs_add_return_ip(regs, 4);
emulate_single_step(regs);
- goto bail;
+ return;
case -EFAULT:
_exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip);
- goto bail;
+ return;
}
}
@@ -1241,40 +1620,49 @@ sigill:
else
_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
-bail:
- exception_exit(prev_state);
+}
+
+DEFINE_INTERRUPT_HANDLER(program_check_exception)
+{
+ do_program_check(regs);
}
/*
* This occurs when running in hypervisor mode on POWER6 or later
* and an illegal instruction is encountered.
*/
-void __kprobes emulation_assist_interrupt(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(emulation_assist_interrupt)
{
- regs->msr |= REASON_ILLEGAL;
- program_check_exception(regs);
+ regs_set_return_msr(regs, regs->msr | REASON_ILLEGAL);
+ do_program_check(regs);
}
-void alignment_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(alignment_exception)
{
- enum ctx_state prev_state = exception_enter();
int sig, code, fixed = 0;
+ unsigned long reason;
+
+ interrupt_cond_local_irq_enable(regs);
- /* We restore the interrupt state now */
- if (!arch_irq_disabled_regs(regs))
- local_irq_enable();
+ reason = get_reason(regs);
+ if (reason & REASON_BOUNDARY) {
+ sig = SIGBUS;
+ code = BUS_ADRALN;
+ goto bad;
+ }
if (tm_abort_check(regs, TM_CAUSE_ALIGNMENT | TM_CAUSE_PERSISTENT))
- goto bail;
+ return;
/* we don't implement logging of alignment exceptions */
if (!(current->thread.align_ctl & PR_UNALIGN_SIGBUS))
fixed = fix_alignment(regs);
if (fixed == 1) {
- regs->nip += 4; /* skip over emulated instruction */
+ /* skip over emulated instruction */
+ regs_add_return_ip(regs, inst_length(reason));
emulate_single_step(regs);
- goto bail;
+ return;
}
/* Operand address was bad */
@@ -1285,70 +1673,40 @@ void alignment_exception(struct pt_regs *regs)
sig = SIGBUS;
code = BUS_ADRALN;
}
+bad:
if (user_mode(regs))
_exception(sig, regs, code, regs->dar);
else
- bad_page_fault(regs, regs->dar, sig);
-
-bail:
- exception_exit(prev_state);
-}
-
-void StackOverflow(struct pt_regs *regs)
-{
- printk(KERN_CRIT "Kernel stack overflow in process %p, r1=%lx\n",
- current, regs->gpr[1]);
- debugger(regs);
- show_regs(regs);
- panic("kernel stack overflow");
-}
-
-void nonrecoverable_exception(struct pt_regs *regs)
-{
- printk(KERN_ERR "Non-recoverable exception at PC=%lx MSR=%lx\n",
- regs->nip, regs->msr);
- debugger(regs);
- die("nonrecoverable exception", regs, SIGKILL);
+ bad_page_fault(regs, sig);
}
-void trace_syscall(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(stack_overflow_exception)
{
- printk("Task: %p(%d), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld %s\n",
- current, task_pid_nr(current), regs->nip, regs->link, regs->gpr[0],
- regs->ccr&0x10000000?"Error=":"", regs->gpr[3], print_tainted());
+ die("Kernel stack overflow", regs, SIGSEGV);
}
-void kernel_fp_unavailable_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(kernel_fp_unavailable_exception)
{
- enum ctx_state prev_state = exception_enter();
-
printk(KERN_EMERG "Unrecoverable FP Unavailable Exception "
"%lx at %lx\n", regs->trap, regs->nip);
die("Unrecoverable FP Unavailable Exception", regs, SIGABRT);
-
- exception_exit(prev_state);
}
-void altivec_unavailable_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(altivec_unavailable_exception)
{
- enum ctx_state prev_state = exception_enter();
-
if (user_mode(regs)) {
/* A user program has executed an altivec instruction,
but this kernel doesn't support altivec. */
_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
- goto bail;
+ return;
}
printk(KERN_EMERG "Unrecoverable VMX/Altivec Unavailable Exception "
"%lx at %lx\n", regs->trap, regs->nip);
die("Unrecoverable VMX/Altivec Unavailable Exception", regs, SIGABRT);
-
-bail:
- exception_exit(prev_state);
}
-void vsx_unavailable_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(vsx_unavailable_exception)
{
if (user_mode(regs)) {
/* A user program has executed an vsx instruction,
@@ -1362,8 +1720,24 @@ void vsx_unavailable_exception(struct pt_regs *regs)
die("Unrecoverable VSX Unavailable Exception", regs, SIGABRT);
}
-#ifdef CONFIG_PPC64
-void facility_unavailable_exception(struct pt_regs *regs)
+#ifdef CONFIG_PPC_BOOK3S_64
+static void tm_unavailable(struct pt_regs *regs)
+{
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ if (user_mode(regs)) {
+ current->thread.load_tm++;
+ regs_set_return_msr(regs, regs->msr | MSR_TM);
+ tm_enable();
+ tm_restore_sprs(&current->thread);
+ return;
+ }
+#endif
+ pr_emerg("Unrecoverable TM Unavailable Exception "
+ "%lx at %lx\n", regs->trap, regs->nip);
+ die("Unrecoverable TM Unavailable Exception", regs, SIGABRT);
+}
+
+DEFINE_INTERRUPT_HANDLER(facility_unavailable_exception)
{
static char *facility_strings[] = {
[FSCR_FP_LG] = "FPU",
@@ -1374,53 +1748,114 @@ void facility_unavailable_exception(struct pt_regs *regs)
[FSCR_TM_LG] = "TM",
[FSCR_EBB_LG] = "EBB",
[FSCR_TAR_LG] = "TAR",
+ [FSCR_MSGP_LG] = "MSGP",
+ [FSCR_SCV_LG] = "SCV",
+ [FSCR_PREFIX_LG] = "PREFIX",
};
char *facility = "unknown";
u64 value;
+ u32 instword, rd;
u8 status;
bool hv;
- hv = (regs->trap == 0xf80);
+ hv = (TRAP(regs) == INTERRUPT_H_FAC_UNAVAIL);
if (hv)
value = mfspr(SPRN_HFSCR);
else
value = mfspr(SPRN_FSCR);
status = value >> 56;
+ if ((hv || status >= 2) &&
+ (status < ARRAY_SIZE(facility_strings)) &&
+ facility_strings[status])
+ facility = facility_strings[status];
+
+ /* We should not have taken this interrupt in kernel */
+ if (!user_mode(regs)) {
+ pr_emerg("Facility '%s' unavailable (%d) exception in kernel mode at %lx\n",
+ facility, status, regs->nip);
+ die("Unexpected facility unavailable exception", regs, SIGABRT);
+ }
+
+ interrupt_cond_local_irq_enable(regs);
+
if (status == FSCR_DSCR_LG) {
- /* User is acessing the DSCR. Set the inherit bit and allow
- * the user to set it directly in future by setting via the
- * FSCR DSCR bit. We always leave HFSCR DSCR set.
+ /*
+ * User is accessing the DSCR register using the problem
+ * state only SPR number (0x03) either through a mfspr or
+ * a mtspr instruction. If it is a write attempt through
+ * a mtspr, then we set the inherit bit. This also allows
+ * the user to write or read the register directly in the
+ * future by setting via the FSCR DSCR bit. But in case it
+ * is a read DSCR attempt through a mfspr instruction, we
+ * just emulate the instruction instead. This code path will
+ * always emulate all the mfspr instructions till the user
+ * has attempted at least one mtspr instruction. This way it
+ * preserves the same behaviour when the user is accessing
+ * the DSCR through privilege level only SPR number (0x11)
+ * which is emulated through illegal instruction exception.
+ * We always leave HFSCR DSCR set.
*/
- current->thread.dscr_inherit = 1;
- mtspr(SPRN_FSCR, value | FSCR_DSCR);
- return;
- }
+ if (get_user(instword, (u32 __user *)(regs->nip))) {
+ pr_err("Failed to fetch the user instruction\n");
+ return;
+ }
- if ((status < ARRAY_SIZE(facility_strings)) &&
- facility_strings[status])
- facility = facility_strings[status];
+ /* Write into DSCR (mtspr 0x03, RS) */
+ if ((instword & PPC_INST_MTSPR_DSCR_USER_MASK)
+ == PPC_INST_MTSPR_DSCR_USER) {
+ rd = (instword >> 21) & 0x1f;
+ current->thread.dscr = regs->gpr[rd];
+ current->thread.dscr_inherit = 1;
+ current->thread.fscr |= FSCR_DSCR;
+ mtspr(SPRN_FSCR, current->thread.fscr);
+ }
- /* We restore the interrupt state now */
- if (!arch_irq_disabled_regs(regs))
- local_irq_enable();
+ /* Read from DSCR (mfspr RT, 0x03) */
+ if ((instword & PPC_INST_MFSPR_DSCR_USER_MASK)
+ == PPC_INST_MFSPR_DSCR_USER) {
+ if (emulate_instruction(regs)) {
+ pr_err("DSCR based mfspr emulation failed\n");
+ return;
+ }
+ regs_add_return_ip(regs, 4);
+ emulate_single_step(regs);
+ }
+ return;
+ }
- pr_err_ratelimited(
- "%sFacility '%s' unavailable, exception at 0x%lx, MSR=%lx\n",
- hv ? "Hypervisor " : "", facility, regs->nip, regs->msr);
+ if (status == FSCR_TM_LG) {
+ /*
+ * If we're here then the hardware is TM aware because it
+ * generated an exception with FSRM_TM set.
+ *
+ * If cpu_has_feature(CPU_FTR_TM) is false, then either firmware
+ * told us not to do TM, or the kernel is not built with TM
+ * support.
+ *
+ * If both of those things are true, then userspace can spam the
+ * console by triggering the printk() below just by continually
+ * doing tbegin (or any TM instruction). So in that case just
+ * send the process a SIGILL immediately.
+ */
+ if (!cpu_has_feature(CPU_FTR_TM))
+ goto out;
- if (user_mode(regs)) {
- _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
+ tm_unavailable(regs);
return;
}
- die("Unexpected facility unavailable exception", regs, SIGABRT);
+ pr_err_ratelimited("%sFacility '%s' unavailable (%d), exception at 0x%lx, MSR=%lx\n",
+ hv ? "Hypervisor " : "", facility, status, regs->nip, regs->msr);
+
+out:
+ _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
}
#endif
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-void fp_unavailable_tm(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(fp_unavailable_tm)
{
/* Note: This does not handle any kind of FP laziness. */
@@ -1435,28 +1870,25 @@ void fp_unavailable_tm(struct pt_regs *regs)
* checkpointed FP registers need to be loaded.
*/
tm_reclaim_current(TM_CAUSE_FAC_UNAV);
- /* Reclaim didn't save out any FPRs to transact_fprs. */
+
+ /*
+ * Reclaim initially saved out bogus (lazy) FPRs to ckfp_state, and
+ * then it was overwrite by the thr->fp_state by tm_reclaim_thread().
+ *
+ * At this point, ck{fp,vr}_state contains the exact values we want to
+ * recheckpoint.
+ */
/* Enable FP for the task: */
- regs->msr |= (MSR_FP | current->thread.fpexc_mode);
+ current->thread.load_fp = 1;
- /* This loads and recheckpoints the FP registers from
- * thread.fpr[]. They will remain in registers after the
- * checkpoint so we don't need to reload them after.
- * If VMX is in use, the VRs now hold checkpointed values,
- * so we don't want to load the VRs from the thread_struct.
+ /*
+ * Recheckpoint all the checkpointed ckpt, ck{fp, vr}_state registers.
*/
- tm_recheckpoint(&current->thread, MSR_FP);
-
- /* If VMX is in use, get the transactional values back */
- if (regs->msr & MSR_VEC) {
- do_load_up_transact_altivec(&current->thread);
- /* At this point all the VSX state is loaded, so enable it */
- regs->msr |= MSR_VSX;
- }
+ tm_recheckpoint(&current->thread);
}
-void altivec_unavailable_tm(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(altivec_unavailable_tm)
{
/* See the comments in fp_unavailable_tm(). This function operates
* the same way.
@@ -1466,20 +1898,13 @@ void altivec_unavailable_tm(struct pt_regs *regs)
"MSR=%lx\n",
regs->nip, regs->msr);
tm_reclaim_current(TM_CAUSE_FAC_UNAV);
- regs->msr |= MSR_VEC;
- tm_recheckpoint(&current->thread, MSR_VEC);
+ current->thread.load_vec = 1;
+ tm_recheckpoint(&current->thread);
current->thread.used_vr = 1;
-
- if (regs->msr & MSR_FP) {
- do_load_up_transact_fpu(&current->thread);
- regs->msr |= MSR_VSX;
- }
}
-void vsx_unavailable_tm(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(vsx_unavailable_tm)
{
- unsigned long orig_msr = regs->msr;
-
/* See the comments in fp_unavailable_tm(). This works similarly,
* though we're loading both FP and VEC registers in here.
*
@@ -1493,54 +1918,51 @@ void vsx_unavailable_tm(struct pt_regs *regs)
current->thread.used_vsr = 1;
- /* If FP and VMX are already loaded, we have all the state we need */
- if ((orig_msr & (MSR_FP | MSR_VEC)) == (MSR_FP | MSR_VEC)) {
- regs->msr |= MSR_VSX;
- return;
- }
-
/* This reclaims FP and/or VR regs if they're already enabled */
tm_reclaim_current(TM_CAUSE_FAC_UNAV);
- regs->msr |= MSR_VEC | MSR_FP | current->thread.fpexc_mode |
- MSR_VSX;
-
- /* This loads & recheckpoints FP and VRs; but we have
- * to be sure not to overwrite previously-valid state.
- */
- tm_recheckpoint(&current->thread, regs->msr & ~orig_msr);
+ current->thread.load_vec = 1;
+ current->thread.load_fp = 1;
- if (orig_msr & MSR_FP)
- do_load_up_transact_fpu(&current->thread);
- if (orig_msr & MSR_VEC)
- do_load_up_transact_altivec(&current->thread);
+ tm_recheckpoint(&current->thread);
}
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
-void performance_monitor_exception(struct pt_regs *regs)
+#ifdef CONFIG_PPC64
+DECLARE_INTERRUPT_HANDLER_NMI(performance_monitor_exception_nmi);
+DEFINE_INTERRUPT_HANDLER_NMI(performance_monitor_exception_nmi)
{
- __get_cpu_var(irq_stat).pmu_irqs++;
+ __this_cpu_inc(irq_stat.pmu_irqs);
perf_irq(regs);
+
+ return 0;
}
+#endif
-#ifdef CONFIG_8xx
-void SoftwareEmulation(struct pt_regs *regs)
+DECLARE_INTERRUPT_HANDLER_ASYNC(performance_monitor_exception_async);
+DEFINE_INTERRUPT_HANDLER_ASYNC(performance_monitor_exception_async)
{
- CHECK_FULL_REGS(regs);
+ __this_cpu_inc(irq_stat.pmu_irqs);
- if (!user_mode(regs)) {
- debugger(regs);
- die("Kernel Mode Unimplemented Instruction or SW FPU Emulation",
- regs, SIGFPE);
- }
+ perf_irq(regs);
+}
- if (!emulate_math(regs))
- return;
+DEFINE_INTERRUPT_HANDLER_RAW(performance_monitor_exception)
+{
+ /*
+ * On 64-bit, if perf interrupts hit in a local_irq_disable
+ * (soft-masked) region, we consider them as NMIs. This is required to
+ * prevent hash faults on user addresses when reading callchains (and
+ * looks better from an irq tracing perspective).
+ */
+ if (IS_ENABLED(CONFIG_PPC64) && unlikely(arch_irq_disabled_regs(regs)))
+ performance_monitor_exception_nmi(regs);
+ else
+ performance_monitor_exception_async(regs);
- _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
+ return 0;
}
-#endif /* CONFIG_8xx */
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
static void handle_debug(struct pt_regs *regs, unsigned long debug_status)
@@ -1555,34 +1977,34 @@ static void handle_debug(struct pt_regs *regs, unsigned long debug_status)
#ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE
current->thread.debug.dbcr2 &= ~DBCR2_DAC12MODE;
#endif
- do_send_trap(regs, mfspr(SPRN_DAC1), debug_status, TRAP_HWBKPT,
+ do_send_trap(regs, mfspr(SPRN_DAC1), debug_status,
5);
changed |= 0x01;
} else if (debug_status & (DBSR_DAC2R | DBSR_DAC2W)) {
dbcr_dac(current) &= ~(DBCR_DAC2R | DBCR_DAC2W);
- do_send_trap(regs, mfspr(SPRN_DAC2), debug_status, TRAP_HWBKPT,
+ do_send_trap(regs, mfspr(SPRN_DAC2), debug_status,
6);
changed |= 0x01;
} else if (debug_status & DBSR_IAC1) {
current->thread.debug.dbcr0 &= ~DBCR0_IAC1;
dbcr_iac_range(current) &= ~DBCR_IAC12MODE;
- do_send_trap(regs, mfspr(SPRN_IAC1), debug_status, TRAP_HWBKPT,
+ do_send_trap(regs, mfspr(SPRN_IAC1), debug_status,
1);
changed |= 0x01;
} else if (debug_status & DBSR_IAC2) {
current->thread.debug.dbcr0 &= ~DBCR0_IAC2;
- do_send_trap(regs, mfspr(SPRN_IAC2), debug_status, TRAP_HWBKPT,
+ do_send_trap(regs, mfspr(SPRN_IAC2), debug_status,
2);
changed |= 0x01;
} else if (debug_status & DBSR_IAC3) {
current->thread.debug.dbcr0 &= ~DBCR0_IAC3;
dbcr_iac_range(current) &= ~DBCR_IAC34MODE;
- do_send_trap(regs, mfspr(SPRN_IAC3), debug_status, TRAP_HWBKPT,
+ do_send_trap(regs, mfspr(SPRN_IAC3), debug_status,
3);
changed |= 0x01;
} else if (debug_status & DBSR_IAC4) {
current->thread.debug.dbcr0 &= ~DBCR0_IAC4;
- do_send_trap(regs, mfspr(SPRN_IAC4), debug_status, TRAP_HWBKPT,
+ do_send_trap(regs, mfspr(SPRN_IAC4), debug_status,
4);
changed |= 0x01;
}
@@ -1593,7 +2015,7 @@ static void handle_debug(struct pt_regs *regs, unsigned long debug_status)
*/
if (DBCR_ACTIVE_EVENTS(current->thread.debug.dbcr0,
current->thread.debug.dbcr1))
- regs->msr |= MSR_DE;
+ regs_set_return_msr(regs, regs->msr | MSR_DE);
else
/* Make sure the IDM flag is off */
current->thread.debug.dbcr0 &= ~DBCR0_IDM;
@@ -1602,8 +2024,10 @@ static void handle_debug(struct pt_regs *regs, unsigned long debug_status)
mtspr(SPRN_DBCR0, current->thread.debug.dbcr0);
}
-void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status)
+DEFINE_INTERRUPT_HANDLER(DebugException)
{
+ unsigned long debug_status = regs->dsisr;
+
current->thread.debug.dbsr = debug_status;
/* Hack alert: On BookE, Branch Taken stops on the branch itself, while
@@ -1612,7 +2036,7 @@ void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status)
* instead of stopping here when hitting a BT
*/
if (debug_status & DBSR_BT) {
- regs->msr &= ~MSR_DE;
+ regs_set_return_msr(regs, regs->msr & ~MSR_DE);
/* Disable BT */
mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~DBCR0_BT);
@@ -1623,10 +2047,13 @@ void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status)
if (user_mode(regs)) {
current->thread.debug.dbcr0 &= ~DBCR0_BT;
current->thread.debug.dbcr0 |= DBCR0_IDM | DBCR0_IC;
- regs->msr |= MSR_DE;
+ regs_set_return_msr(regs, regs->msr | MSR_DE);
return;
}
+ if (kprobe_post_handler(regs))
+ return;
+
if (notify_die(DIE_SSTEP, "block_step", regs, 5,
5, SIGTRAP) == NOTIFY_STOP) {
return;
@@ -1634,13 +2061,16 @@ void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status)
if (debugger_sstep(regs))
return;
} else if (debug_status & DBSR_IC) { /* Instruction complete */
- regs->msr &= ~MSR_DE;
+ regs_set_return_msr(regs, regs->msr & ~MSR_DE);
/* Disable instruction completion */
mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~DBCR0_IC);
/* Clear the instruction completion event */
mtspr(SPRN_DBSR, DBSR_IC);
+ if (kprobe_post_handler(regs))
+ return;
+
if (notify_die(DIE_SSTEP, "single_step", regs, 5,
5, SIGTRAP) == NOTIFY_STOP) {
return;
@@ -1653,7 +2083,7 @@ void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status)
current->thread.debug.dbcr0 &= ~DBCR0_IC;
if (DBCR_ACTIVE_EVENTS(current->thread.debug.dbcr0,
current->thread.debug.dbcr1))
- regs->msr |= MSR_DE;
+ regs_set_return_msr(regs, regs->msr | MSR_DE);
else
/* Make sure the IDM bit is off */
current->thread.debug.dbcr0 &= ~DBCR0_IDM;
@@ -1665,16 +2095,8 @@ void __kprobes DebugException(struct pt_regs *regs, unsigned long debug_status)
}
#endif /* CONFIG_PPC_ADV_DEBUG_REGS */
-#if !defined(CONFIG_TAU_INT)
-void TAUException(struct pt_regs *regs)
-{
- printk("TAU trap at PC: %lx, MSR: %lx, vector=%lx %s\n",
- regs->nip, regs->msr, regs->trap, print_tainted());
-}
-#endif /* CONFIG_INT_TAU */
-
#ifdef CONFIG_ALTIVEC
-void altivec_assist_exception(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(altivec_assist_exception)
{
int err;
@@ -1689,7 +2111,7 @@ void altivec_assist_exception(struct pt_regs *regs)
PPC_WARN_EMULATED(altivec, regs);
err = emulate_altivec(regs);
if (err == 0) {
- regs->nip += 4; /* skip emulated instruction */
+ regs_add_return_ip(regs, 4); /* skip emulated instruction */
emulate_single_step(regs);
return;
}
@@ -1707,25 +2129,11 @@ void altivec_assist_exception(struct pt_regs *regs)
}
#endif /* CONFIG_ALTIVEC */
-#ifdef CONFIG_VSX
-void vsx_assist_exception(struct pt_regs *regs)
+#ifdef CONFIG_PPC_85xx
+DEFINE_INTERRUPT_HANDLER(CacheLockingException)
{
- if (!user_mode(regs)) {
- printk(KERN_EMERG "VSX assist exception in kernel mode"
- " at %lx\n", regs->nip);
- die("Kernel VSX assist exception", regs, SIGILL);
- }
+ unsigned long error_code = regs->dsisr;
- flush_vsx_to_thread(current);
- printk(KERN_INFO "VSX assist not supported at %lx\n", regs->nip);
- _exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
-}
-#endif /* CONFIG_VSX */
-
-#ifdef CONFIG_FSL_BOOKE
-void CacheLockingException(struct pt_regs *regs, unsigned long address,
- unsigned long error_code)
-{
/* We treat cache locking instructions from the user
* as priv ops, in the future we could try to do
* something smarter
@@ -1734,17 +2142,18 @@ void CacheLockingException(struct pt_regs *regs, unsigned long address,
_exception(SIGILL, regs, ILL_PRVOPC, regs->nip);
return;
}
-#endif /* CONFIG_FSL_BOOKE */
+#endif /* CONFIG_PPC_85xx */
#ifdef CONFIG_SPE
-void SPEFloatingPointException(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(SPEFloatingPointException)
{
- extern int do_spe_mathemu(struct pt_regs *regs);
unsigned long spefscr;
int fpexc_mode;
- int code = 0;
+ int code = FPE_FLTUNK;
int err;
+ interrupt_cond_local_irq_enable(regs);
+
flush_spe_to_thread(current);
spefscr = current->thread.spefscr;
@@ -1766,7 +2175,7 @@ void SPEFloatingPointException(struct pt_regs *regs)
err = do_spe_mathemu(regs);
if (err == 0) {
- regs->nip += 4; /* skip emulated instruction */
+ regs_add_return_ip(regs, 4); /* skip emulated instruction */
emulate_single_step(regs);
return;
}
@@ -1785,20 +2194,21 @@ void SPEFloatingPointException(struct pt_regs *regs)
return;
}
-void SPEFloatingPointRoundException(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(SPEFloatingPointRoundException)
{
- extern int speround_handler(struct pt_regs *regs);
int err;
+ interrupt_cond_local_irq_enable(regs);
+
preempt_disable();
if (regs->msr & MSR_SPE)
giveup_spe(current);
preempt_enable();
- regs->nip -= 4;
+ regs_add_return_ip(regs, -4);
err = speround_handler(regs);
if (err == 0) {
- regs->nip += 4; /* skip emulated instruction */
+ regs_add_return_ip(regs, 4); /* skip emulated instruction */
emulate_single_step(regs);
return;
}
@@ -1811,7 +2221,7 @@ void SPEFloatingPointRoundException(struct pt_regs *regs)
printk(KERN_ERR "unrecognized spe instruction "
"in %s at %lx\n", current->comm, regs->nip);
} else {
- _exception(SIGFPE, regs, 0, regs->nip);
+ _exception(SIGFPE, regs, FPE_FLTUNK, regs->nip);
return;
}
}
@@ -1823,29 +2233,22 @@ void SPEFloatingPointRoundException(struct pt_regs *regs)
* in the MSR is 0. This indicates that SRR0/1 are live, and that
* we therefore lost state by taking this exception.
*/
-void unrecoverable_exception(struct pt_regs *regs)
+void __noreturn unrecoverable_exception(struct pt_regs *regs)
{
- printk(KERN_EMERG "Unrecoverable exception %lx at %lx\n",
- regs->trap, regs->nip);
+ pr_emerg("Unrecoverable exception %lx at %lx (msr=%lx)\n",
+ regs->trap, regs->nip, regs->msr);
die("Unrecoverable exception", regs, SIGABRT);
+ /* die() should not return */
+ for (;;)
+ ;
}
-#if defined(CONFIG_BOOKE_WDT) || defined(CONFIG_40x)
-/*
- * Default handler for a Watchdog exception,
- * spins until a reboot occurs
- */
-void __attribute__ ((weak)) WatchdogHandler(struct pt_regs *regs)
-{
- /* Generic WatchdogHandler, implement your own */
- mtspr(SPRN_TCR, mfspr(SPRN_TCR)&(~TCR_WIE));
- return;
-}
-
-void WatchdogException(struct pt_regs *regs)
+#ifdef CONFIG_BOOKE_WDT
+DEFINE_INTERRUPT_HANDLER_NMI(WatchdogException)
{
printk (KERN_EMERG "PowerPC Book-E Watchdog Exception\n");
- WatchdogHandler(regs);
+ mtspr(SPRN_TCR, mfspr(SPRN_TCR) & ~TCR_WIE);
+ return 0;
}
#endif
@@ -1853,18 +2256,13 @@ void WatchdogException(struct pt_regs *regs)
* We enter here if we discover during exception entry that we are
* running in supervisor mode with a userspace value in the stack pointer.
*/
-void kernel_bad_stack(struct pt_regs *regs)
+DEFINE_INTERRUPT_HANDLER(kernel_bad_stack)
{
printk(KERN_EMERG "Bad kernel stack pointer %lx at %lx\n",
regs->gpr[1], regs->nip);
die("Bad kernel stack pointer", regs, SIGABRT);
}
-void __init trap_init(void)
-{
-}
-
-
#ifdef CONFIG_PPC_EMULATED_STATS
#define WARN_EMULATED_SETUP(type) .type = { .name = #type }
@@ -1895,6 +2293,10 @@ struct ppc_emulated ppc_emulated = {
WARN_EMULATED_SETUP(mfdscr),
WARN_EMULATED_SETUP(mtdscr),
WARN_EMULATED_SETUP(lq_stq),
+ WARN_EMULATED_SETUP(lxvw4x),
+ WARN_EMULATED_SETUP(lxvh8x),
+ WARN_EMULATED_SETUP(lxvd2x),
+ WARN_EMULATED_SETUP(lxvb16x),
#endif
};
@@ -1908,35 +2310,20 @@ void ppc_warn_emulated_print(const char *type)
static int __init ppc_warn_emulated_init(void)
{
- struct dentry *dir, *d;
+ struct dentry *dir;
unsigned int i;
struct ppc_emulated_entry *entries = (void *)&ppc_emulated;
- if (!powerpc_debugfs_root)
- return -ENODEV;
-
dir = debugfs_create_dir("emulated_instructions",
- powerpc_debugfs_root);
- if (!dir)
- return -ENOMEM;
+ arch_debugfs_dir);
- d = debugfs_create_u32("do_warn", S_IRUGO | S_IWUSR, dir,
- &ppc_warn_emulated);
- if (!d)
- goto fail;
+ debugfs_create_u32("do_warn", 0644, dir, &ppc_warn_emulated);
- for (i = 0; i < sizeof(ppc_emulated)/sizeof(*entries); i++) {
- d = debugfs_create_u32(entries[i].name, S_IRUGO | S_IWUSR, dir,
- (u32 *)&entries[i].val.counter);
- if (!d)
- goto fail;
- }
+ for (i = 0; i < sizeof(ppc_emulated)/sizeof(*entries); i++)
+ debugfs_create_u32(entries[i].name, 0644, dir,
+ (u32 *)&entries[i].val.counter);
return 0;
-
-fail:
- debugfs_remove_recursive(dir);
- return -ENOMEM;
}
device_initcall(ppc_warn_emulated_init);
diff --git a/arch/powerpc/kernel/ucall.S b/arch/powerpc/kernel/ucall.S
new file mode 100644
index 000000000000..80a1f9a4300a
--- /dev/null
+++ b/arch/powerpc/kernel/ucall.S
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Generic code to perform an ultravisor call.
+ *
+ * Copyright 2019, IBM Corporation.
+ *
+ */
+#include <linux/export.h>
+#include <asm/ppc_asm.h>
+
+_GLOBAL(ucall_norets)
+EXPORT_SYMBOL_GPL(ucall_norets)
+ sc 2 /* Invoke the ultravisor */
+ blr /* Return r3 = status */
diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c
index b7aa07279a63..862b22b2b616 100644
--- a/arch/powerpc/kernel/udbg.c
+++ b/arch/powerpc/kernel/udbg.c
@@ -1,15 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* polling mode stateless debugging stuff, originally for NS16550 Serial Ports
*
* c 2001 PPC 64 Team, IBM Corp
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
-#include <stdarg.h>
+#include <linux/stdarg.h>
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/console.h>
@@ -40,14 +36,6 @@ void __init udbg_early_init(void)
#elif defined(CONFIG_PPC_EARLY_DEBUG_RTAS_PANEL)
/* RTAS panel debug */
udbg_init_rtas_panel();
-#elif defined(CONFIG_PPC_EARLY_DEBUG_RTAS_CONSOLE)
- /* RTAS console debug */
- udbg_init_rtas_console();
-#elif defined(CONFIG_PPC_EARLY_DEBUG_MAPLE)
- /* Maple real mode debug */
- udbg_init_maple_realmode();
-#elif defined(CONFIG_PPC_EARLY_DEBUG_BEAT)
- udbg_init_debug_beat();
#elif defined(CONFIG_PPC_EARLY_DEBUG_PAS_REALMODE)
udbg_init_pas_realmode();
#elif defined(CONFIG_PPC_EARLY_DEBUG_BOOTX)
@@ -55,9 +43,6 @@ void __init udbg_early_init(void)
#elif defined(CONFIG_PPC_EARLY_DEBUG_44x)
/* PPC44x debug */
udbg_init_44x_as1();
-#elif defined(CONFIG_PPC_EARLY_DEBUG_40x)
- /* PPC40x debug */
- udbg_init_40x_realmode();
#elif defined(CONFIG_PPC_EARLY_DEBUG_CPM)
udbg_init_cpm();
#elif defined(CONFIG_PPC_EARLY_DEBUG_USBGECKO)
@@ -73,10 +58,12 @@ void __init udbg_early_init(void)
udbg_init_debug_opal_raw();
#elif defined(CONFIG_PPC_EARLY_DEBUG_OPAL_HVSI)
udbg_init_debug_opal_hvsi();
+#elif defined(CONFIG_PPC_EARLY_DEBUG_16550)
+ udbg_init_debug_16550();
#endif
#ifdef CONFIG_PPC_EARLY_DEBUG
- console_loglevel = 10;
+ console_loglevel = CONSOLE_LOGLEVEL_DEBUG;
register_early_udbg_console();
#endif
@@ -126,13 +113,15 @@ int udbg_write(const char *s, int n)
#define UDBG_BUFSIZE 256
void udbg_printf(const char *fmt, ...)
{
- char buf[UDBG_BUFSIZE];
- va_list args;
+ if (udbg_putc) {
+ char buf[UDBG_BUFSIZE];
+ va_list args;
- va_start(args, fmt);
- vsnprintf(buf, UDBG_BUFSIZE, fmt, args);
- udbg_puts(buf);
- va_end(args);
+ va_start(args, fmt);
+ vsnprintf(buf, UDBG_BUFSIZE, fmt, args);
+ udbg_puts(buf);
+ va_end(args);
+ }
}
void __init udbg_progress(char *s, unsigned short hex)
diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c
index 6e7c4923b5ea..dfe8ed2192e8 100644
--- a/arch/powerpc/kernel/udbg_16550.c
+++ b/arch/powerpc/kernel/udbg_16550.c
@@ -1,17 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* udbg for NS16550 compatible serial ports
*
* Copyright (C) 2001-2005 PPC 64 Team, IBM Corp
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/types.h>
#include <asm/udbg.h>
#include <asm/io.h>
-#include <asm/reg_a2.h>
+#include <asm/early_ioremap.h>
extern u8 real_readb(volatile u8 __iomem *addr);
extern void real_writeb(u8 data, volatile u8 __iomem *addr);
@@ -69,8 +65,12 @@ static void udbg_uart_putc(char c)
static int udbg_uart_getc_poll(void)
{
- if (!udbg_uart_in || !(udbg_uart_in(UART_LSR) & LSR_DR))
+ if (!udbg_uart_in)
+ return -1;
+
+ if (!(udbg_uart_in(UART_LSR) & LSR_DR))
return udbg_uart_in(UART_RBR);
+
return -1;
}
@@ -84,7 +84,7 @@ static int udbg_uart_getc(void)
return udbg_uart_in(UART_RBR);
}
-static void udbg_use_uart(void)
+static void __init udbg_use_uart(void)
{
udbg_putc = udbg_uart_putc;
udbg_flush = udbg_uart_flush;
@@ -92,7 +92,7 @@ static void udbg_use_uart(void)
udbg_getc_poll = udbg_uart_getc_poll;
}
-void udbg_uart_setup(unsigned int speed, unsigned int clock)
+void __init udbg_uart_setup(unsigned int speed, unsigned int clock)
{
unsigned int dll, base_bauds;
@@ -121,7 +121,7 @@ void udbg_uart_setup(unsigned int speed, unsigned int clock)
udbg_uart_out(UART_FCR, 0x7);
}
-unsigned int udbg_probe_uart_speed(unsigned int clock)
+unsigned int __init udbg_probe_uart_speed(unsigned int clock)
{
unsigned int dll, dlm, divisor, prescaler, speed;
u8 old_lcr;
@@ -172,7 +172,7 @@ static void udbg_uart_out_pio(unsigned int reg, u8 data)
outb(data, udbg_uart.pio_base + (reg * udbg_uart_stride));
}
-void udbg_uart_init_pio(unsigned long port, unsigned int stride)
+void __init udbg_uart_init_pio(unsigned long port, unsigned int stride)
{
if (!port)
return;
@@ -194,7 +194,7 @@ static void udbg_uart_out_mmio(unsigned int reg, u8 data)
}
-void udbg_uart_init_mmio(void __iomem *addr, unsigned int stride)
+void __init udbg_uart_init_mmio(void __iomem *addr, unsigned int stride)
{
if (!addr)
return;
@@ -205,29 +205,6 @@ void udbg_uart_init_mmio(void __iomem *addr, unsigned int stride)
udbg_use_uart();
}
-#ifdef CONFIG_PPC_MAPLE
-
-#define UDBG_UART_MAPLE_ADDR ((void __iomem *)0xf40003f8)
-
-static u8 udbg_uart_in_maple(unsigned int reg)
-{
- return real_readb(UDBG_UART_MAPLE_ADDR + reg);
-}
-
-static void udbg_uart_out_maple(unsigned int reg, u8 val)
-{
- real_writeb(val, UDBG_UART_MAPLE_ADDR + reg);
-}
-
-void __init udbg_init_maple_realmode(void)
-{
- udbg_uart_in = udbg_uart_in_maple;
- udbg_uart_out = udbg_uart_out_maple;
- udbg_use_uart();
-}
-
-#endif /* CONFIG_PPC_MAPLE */
-
#ifdef CONFIG_PPC_PASEMI
#define UDBG_UART_PAS_ADDR ((void __iomem *)0xfcff03f8UL)
@@ -274,25 +251,34 @@ void __init udbg_init_44x_as1(void)
#endif /* CONFIG_PPC_EARLY_DEBUG_44x */
-#ifdef CONFIG_PPC_EARLY_DEBUG_40x
+#ifdef CONFIG_PPC_EARLY_DEBUG_16550
-static u8 udbg_uart_in_40x(unsigned int reg)
-{
- return real_readb((void __iomem *)CONFIG_PPC_EARLY_DEBUG_40x_PHYSADDR
- + reg);
-}
+static void __iomem *udbg_uart_early_addr;
-static void udbg_uart_out_40x(unsigned int reg, u8 val)
+void __init udbg_init_debug_16550(void)
{
- real_writeb(val, (void __iomem *)CONFIG_PPC_EARLY_DEBUG_40x_PHYSADDR
- + reg);
+ udbg_uart_early_addr = early_ioremap(CONFIG_PPC_EARLY_DEBUG_16550_PHYSADDR, 0x1000);
+ udbg_uart_init_mmio(udbg_uart_early_addr, CONFIG_PPC_EARLY_DEBUG_16550_STRIDE);
}
-void __init udbg_init_40x_realmode(void)
+static int __init udbg_init_debug_16550_ioremap(void)
{
- udbg_uart_in = udbg_uart_in_40x;
- udbg_uart_out = udbg_uart_out_40x;
- udbg_use_uart();
+ void __iomem *addr;
+
+ if (!udbg_uart_early_addr)
+ return 0;
+
+ addr = ioremap(CONFIG_PPC_EARLY_DEBUG_16550_PHYSADDR, 0x1000);
+ if (WARN_ON(!addr))
+ return -ENOMEM;
+
+ udbg_uart_init_mmio(addr, CONFIG_PPC_EARLY_DEBUG_16550_STRIDE);
+ early_iounmap(udbg_uart_early_addr, 0x1000);
+ udbg_uart_early_addr = NULL;
+
+ return 0;
}
-#endif /* CONFIG_PPC_EARLY_DEBUG_40x */
+early_initcall(udbg_init_debug_16550_ioremap);
+
+#endif /* CONFIG_PPC_EARLY_DEBUG_16550 */
diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c
index 003b20964ea0..95a41ae9dfa7 100644
--- a/arch/powerpc/kernel/uprobes.c
+++ b/arch/powerpc/kernel/uprobes.c
@@ -1,20 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* User-space Probes (UProbes) for powerpc
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
* Copyright IBM Corporation, 2007-2012
*
* Adapted from the x86 port by Ananth N Mavinakayanahalli <ananth@in.ibm.com>
@@ -27,6 +14,7 @@
#include <linux/kdebug.h>
#include <asm/sstep.h>
+#include <asm/inst.h>
#define UPROBE_TRAP_NR UINT_MAX
@@ -53,6 +41,18 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe,
if (addr & 0x03)
return -EINVAL;
+ if (cpu_has_feature(CPU_FTR_ARCH_31) &&
+ ppc_inst_prefixed(ppc_inst_read(auprobe->insn)) &&
+ (addr & 0x3f) == 60) {
+ pr_info_ratelimited("Cannot register a uprobe on 64 byte unaligned prefixed instruction\n");
+ return -EINVAL;
+ }
+
+ if (!can_single_step(ppc_inst_val(ppc_inst_read(auprobe->insn)))) {
+ pr_info_ratelimited("Cannot register a uprobe on instructions that can't be single stepped\n");
+ return -ENOTSUPP;
+ }
+
return 0;
}
@@ -67,7 +67,7 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
autask->saved_trap_nr = current->thread.trap_nr;
current->thread.trap_nr = UPROBE_TRAP_NR;
- regs->nip = current->utask->xol_vaddr;
+ regs_set_return_ip(regs, current->utask->xol_vaddr);
user_enable_single_step(current);
return 0;
@@ -124,7 +124,7 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
* support doesn't exist and have to fix-up the next instruction
* to be executed.
*/
- regs->nip = utask->vaddr + MAX_UINSN_BYTES;
+ regs_set_return_ip(regs, (unsigned long)ppc_inst_next((void *)utask->vaddr, auprobe->insn));
user_disable_single_step(current);
return 0;
@@ -153,6 +153,7 @@ int arch_uprobe_exception_notify(struct notifier_block *self,
case DIE_SSTEP:
if (uprobe_post_sstep_notifier(regs))
return NOTIFY_STOP;
+ break;
default:
break;
}
@@ -186,7 +187,7 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
* emulate_step() returns 1 if the insn was successfully emulated.
* For all other cases, we need to single-step in hardware.
*/
- ret = emulate_step(regs, auprobe->insn);
+ ret = emulate_step(regs, ppc_inst_read(auprobe->insn));
if (ret > 0)
return true;
@@ -205,3 +206,12 @@ arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs
return orig_ret_vaddr;
}
+
+bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx,
+ struct pt_regs *regs)
+{
+ if (ctx == RP_CHECK_CHAIN_CALL)
+ return regs->gpr[1] <= ret->stack;
+ else
+ return regs->gpr[1] < ret->stack;
+}
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index f174351842cf..ab7c4cc80943 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp.
* <benh@kernel.crashing.org>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/errno.h>
@@ -20,14 +16,15 @@
#include <linux/user.h>
#include <linux/elf.h>
#include <linux/security.h>
-#include <linux/bootmem.h>
-#include <linux/memblock.h>
+#include <linux/syscalls.h>
+#include <linux/vdso_datastore.h>
+#include <vdso/datapage.h>
-#include <asm/pgtable.h>
+#include <asm/syscall.h>
+#include <asm/syscalls.h>
#include <asm/processor.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
-#include <asm/prom.h>
#include <asm/machdep.h>
#include <asm/cputable.h>
#include <asm/sections.h>
@@ -36,226 +33,101 @@
#include <asm/vdso_datapage.h>
#include <asm/setup.h>
-#undef DEBUG
-
-#ifdef DEBUG
-#define DBG(fmt...) printk(fmt)
-#else
-#define DBG(fmt...)
-#endif
-
-/* Max supported size for symbol names */
-#define MAX_SYMNAME 64
+static_assert(__VDSO_PAGES == VDSO_NR_PAGES);
/* The alignment of the vDSO */
#define VDSO_ALIGNMENT (1 << 16)
extern char vdso32_start, vdso32_end;
-static void *vdso32_kbase = &vdso32_start;
-static unsigned int vdso32_pages;
-static struct page **vdso32_pagelist;
-unsigned long vdso32_sigtramp;
-unsigned long vdso32_rt_sigtramp;
-
-#ifdef CONFIG_PPC64
extern char vdso64_start, vdso64_end;
-static void *vdso64_kbase = &vdso64_start;
-static unsigned int vdso64_pages;
-static struct page **vdso64_pagelist;
-unsigned long vdso64_rt_sigtramp;
-#endif /* CONFIG_PPC64 */
-
-static int vdso_ready;
-/*
- * The vdso data page (aka. systemcfg for old ppc64 fans) is here.
- * Once the early boot kernel code no longer needs to muck around
- * with it, it will become dynamically allocated
- */
-static union {
- struct vdso_data data;
- u8 page[PAGE_SIZE];
-} vdso_data_store __page_aligned_data;
-struct vdso_data *vdso_data = &vdso_data_store.data;
-
-/* Format of the patch table */
-struct vdso_patch_def
+static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma,
+ unsigned long text_size)
{
- unsigned long ftr_mask, ftr_value;
- const char *gen_name;
- const char *fix_name;
-};
+ unsigned long new_size = new_vma->vm_end - new_vma->vm_start;
-/* Table of functions to patch based on the CPU type/revision
- *
- * Currently, we only change sync_dicache to do nothing on processors
- * with a coherent icache
- */
-static struct vdso_patch_def vdso_patches[] = {
- {
- CPU_FTR_COHERENT_ICACHE, CPU_FTR_COHERENT_ICACHE,
- "__kernel_sync_dicache", "__kernel_sync_dicache_p5"
- },
- {
- CPU_FTR_USE_TB, 0,
- "__kernel_gettimeofday", NULL
- },
- {
- CPU_FTR_USE_TB, 0,
- "__kernel_clock_gettime", NULL
- },
- {
- CPU_FTR_USE_TB, 0,
- "__kernel_clock_getres", NULL
- },
- {
- CPU_FTR_USE_TB, 0,
- "__kernel_get_tbfreq", NULL
- },
- {
- CPU_FTR_USE_TB, 0,
- "__kernel_time", NULL
- },
-};
+ if (new_size != text_size)
+ return -EINVAL;
-/*
- * Some infos carried around for each of them during parsing at
- * boot time.
- */
-struct lib32_elfinfo
-{
- Elf32_Ehdr *hdr; /* ptr to ELF */
- Elf32_Sym *dynsym; /* ptr to .dynsym section */
- unsigned long dynsymsize; /* size of .dynsym section */
- char *dynstr; /* ptr to .dynstr section */
- unsigned long text; /* offset of .text section in .so */
-};
+ current->mm->context.vdso = (void __user *)new_vma->vm_start;
-struct lib64_elfinfo
-{
- Elf64_Ehdr *hdr;
- Elf64_Sym *dynsym;
- unsigned long dynsymsize;
- char *dynstr;
- unsigned long text;
-};
+ return 0;
+}
+static int vdso32_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma)
+{
+ return vdso_mremap(sm, new_vma, &vdso32_end - &vdso32_start);
+}
-#ifdef __DEBUG
-static void dump_one_vdso_page(struct page *pg, struct page *upg)
+static int vdso64_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma)
{
- printk("kpg: %p (c:%d,f:%08lx)", __va(page_to_pfn(pg) << PAGE_SHIFT),
- page_count(pg),
- pg->flags);
- if (upg && !IS_ERR(upg) /* && pg != upg*/) {
- printk(" upg: %p (c:%d,f:%08lx)", __va(page_to_pfn(upg)
- << PAGE_SHIFT),
- page_count(upg),
- upg->flags);
- }
- printk("\n");
+ return vdso_mremap(sm, new_vma, &vdso64_end - &vdso64_start);
}
-static void dump_vdso_pages(struct vm_area_struct * vma)
+static void vdso_close(const struct vm_special_mapping *sm, struct vm_area_struct *vma)
{
- int i;
+ struct mm_struct *mm = vma->vm_mm;
- if (!vma || is_32bit_task()) {
- printk("vDSO32 @ %016lx:\n", (unsigned long)vdso32_kbase);
- for (i=0; i<vdso32_pages; i++) {
- struct page *pg = virt_to_page(vdso32_kbase +
- i*PAGE_SIZE);
- struct page *upg = (vma && vma->vm_mm) ?
- follow_page(vma, vma->vm_start + i*PAGE_SIZE, 0)
- : NULL;
- dump_one_vdso_page(pg, upg);
- }
- }
- if (!vma || !is_32bit_task()) {
- printk("vDSO64 @ %016lx:\n", (unsigned long)vdso64_kbase);
- for (i=0; i<vdso64_pages; i++) {
- struct page *pg = virt_to_page(vdso64_kbase +
- i*PAGE_SIZE);
- struct page *upg = (vma && vma->vm_mm) ?
- follow_page(vma, vma->vm_start + i*PAGE_SIZE, 0)
- : NULL;
- dump_one_vdso_page(pg, upg);
- }
- }
+ /*
+ * close() is called for munmap() but also for mremap(). In the mremap()
+ * case the vdso pointer has already been updated by the mremap() hook
+ * above, so it must not be set to NULL here.
+ */
+ if (vma->vm_start != (unsigned long)mm->context.vdso)
+ return;
+
+ mm->context.vdso = NULL;
}
-#endif /* DEBUG */
+
+static struct vm_special_mapping vdso32_spec __ro_after_init = {
+ .name = "[vdso]",
+ .mremap = vdso32_mremap,
+ .close = vdso_close,
+};
+
+static struct vm_special_mapping vdso64_spec __ro_after_init = {
+ .name = "[vdso]",
+ .mremap = vdso64_mremap,
+ .close = vdso_close,
+};
/*
* This is called from binfmt_elf, we create the special vma for the
* vDSO and insert it into the mm struct tree
*/
-int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
{
+ unsigned long vdso_size, vdso_base, mappings_size;
+ struct vm_special_mapping *vdso_spec;
+ unsigned long vvar_size = VDSO_NR_PAGES * PAGE_SIZE;
struct mm_struct *mm = current->mm;
- struct page **vdso_pagelist;
- unsigned long vdso_pages;
- unsigned long vdso_base;
- int rc;
+ struct vm_area_struct *vma;
- if (!vdso_ready)
- return 0;
-
-#ifdef CONFIG_PPC64
if (is_32bit_task()) {
- vdso_pagelist = vdso32_pagelist;
- vdso_pages = vdso32_pages;
- vdso_base = VDSO32_MBASE;
+ vdso_spec = &vdso32_spec;
+ vdso_size = &vdso32_end - &vdso32_start;
} else {
- vdso_pagelist = vdso64_pagelist;
- vdso_pages = vdso64_pages;
- /*
- * On 64bit we don't have a preferred map address. This
- * allows get_unmapped_area to find an area near other mmaps
- * and most likely share a SLB entry.
- */
- vdso_base = 0;
+ vdso_spec = &vdso64_spec;
+ vdso_size = &vdso64_end - &vdso64_start;
}
-#else
- vdso_pagelist = vdso32_pagelist;
- vdso_pages = vdso32_pages;
- vdso_base = VDSO32_MBASE;
-#endif
- current->mm->context.vdso_base = 0;
-
- /* vDSO has a problem and was disabled, just don't "enable" it for the
- * process
- */
- if (vdso_pages == 0)
- return 0;
- /* Add a page to the vdso size for the data page */
- vdso_pages ++;
+ mappings_size = vdso_size + vvar_size;
+ mappings_size += (VDSO_ALIGNMENT - 1) & PAGE_MASK;
/*
- * pick a base address for the vDSO in process space. We try to put it
- * at vdso_base which is the "natural" base for it, but we might fail
- * and end up putting it elsewhere.
+ * Pick a base address for the vDSO in process space.
* Add enough to the size so that the result can be aligned.
*/
- down_write(&mm->mmap_sem);
- vdso_base = get_unmapped_area(NULL, vdso_base,
- (vdso_pages << PAGE_SHIFT) +
- ((VDSO_ALIGNMENT - 1) & PAGE_MASK),
- 0, 0);
- if (IS_ERR_VALUE(vdso_base)) {
- rc = vdso_base;
- goto fail_mmapsem;
- }
+ vdso_base = get_unmapped_area(NULL, 0, mappings_size, 0, 0);
+ if (IS_ERR_VALUE(vdso_base))
+ return vdso_base;
/* Add required alignment. */
vdso_base = ALIGN(vdso_base, VDSO_ALIGNMENT);
- /*
- * Put vDSO base into mm struct. We need to do this before calling
- * install_special_mapping or the perf counter mmap tracking code
- * will fail to recognise it as a vDSO (since arch_vma_name fails).
- */
- current->mm->context.vdso_base = vdso_base;
+ vma = vdso_install_vvar_mapping(mm, vdso_base);
+ if (IS_ERR(vma))
+ return PTR_ERR(vma);
/*
* our vma flags don't have VM_WRITE so by default, the process isn't
@@ -267,419 +139,60 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
* It's fine to use that for setting breakpoints in the vDSO code
* pages though.
*/
- rc = install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT,
- VM_READ|VM_EXEC|
- VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
- vdso_pagelist);
- if (rc) {
- current->mm->context.vdso_base = 0;
- goto fail_mmapsem;
- }
-
- up_write(&mm->mmap_sem);
- return 0;
-
- fail_mmapsem:
- up_write(&mm->mmap_sem);
- return rc;
-}
-
-const char *arch_vma_name(struct vm_area_struct *vma)
-{
- if (vma->vm_mm && vma->vm_start == vma->vm_mm->context.vdso_base)
- return "[vdso]";
- return NULL;
-}
-
-
-
-static void * __init find_section32(Elf32_Ehdr *ehdr, const char *secname,
- unsigned long *size)
-{
- Elf32_Shdr *sechdrs;
- unsigned int i;
- char *secnames;
-
- /* Grab section headers and strings so we can tell who is who */
- sechdrs = (void *)ehdr + ehdr->e_shoff;
- secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset;
-
- /* Find the section they want */
- for (i = 1; i < ehdr->e_shnum; i++) {
- if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) {
- if (size)
- *size = sechdrs[i].sh_size;
- return (void *)ehdr + sechdrs[i].sh_offset;
- }
- }
- *size = 0;
- return NULL;
-}
-
-static Elf32_Sym * __init find_symbol32(struct lib32_elfinfo *lib,
- const char *symname)
-{
- unsigned int i;
- char name[MAX_SYMNAME], *c;
-
- for (i = 0; i < (lib->dynsymsize / sizeof(Elf32_Sym)); i++) {
- if (lib->dynsym[i].st_name == 0)
- continue;
- strlcpy(name, lib->dynstr + lib->dynsym[i].st_name,
- MAX_SYMNAME);
- c = strchr(name, '@');
- if (c)
- *c = 0;
- if (strcmp(symname, name) == 0)
- return &lib->dynsym[i];
- }
- return NULL;
-}
-
-/* Note that we assume the section is .text and the symbol is relative to
- * the library base
- */
-static unsigned long __init find_function32(struct lib32_elfinfo *lib,
- const char *symname)
-{
- Elf32_Sym *sym = find_symbol32(lib, symname);
-
- if (sym == NULL) {
- printk(KERN_WARNING "vDSO32: function %s not found !\n",
- symname);
- return 0;
- }
- return sym->st_value - VDSO32_LBASE;
-}
-
-static int __init vdso_do_func_patch32(struct lib32_elfinfo *v32,
- struct lib64_elfinfo *v64,
- const char *orig, const char *fix)
-{
- Elf32_Sym *sym32_gen, *sym32_fix;
-
- sym32_gen = find_symbol32(v32, orig);
- if (sym32_gen == NULL) {
- printk(KERN_ERR "vDSO32: Can't find symbol %s !\n", orig);
- return -1;
- }
- if (fix == NULL) {
- sym32_gen->st_name = 0;
- return 0;
- }
- sym32_fix = find_symbol32(v32, fix);
- if (sym32_fix == NULL) {
- printk(KERN_ERR "vDSO32: Can't find symbol %s !\n", fix);
- return -1;
- }
- sym32_gen->st_value = sym32_fix->st_value;
- sym32_gen->st_size = sym32_fix->st_size;
- sym32_gen->st_info = sym32_fix->st_info;
- sym32_gen->st_other = sym32_fix->st_other;
- sym32_gen->st_shndx = sym32_fix->st_shndx;
-
- return 0;
-}
-
-
-#ifdef CONFIG_PPC64
-
-static void * __init find_section64(Elf64_Ehdr *ehdr, const char *secname,
- unsigned long *size)
-{
- Elf64_Shdr *sechdrs;
- unsigned int i;
- char *secnames;
-
- /* Grab section headers and strings so we can tell who is who */
- sechdrs = (void *)ehdr + ehdr->e_shoff;
- secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset;
-
- /* Find the section they want */
- for (i = 1; i < ehdr->e_shnum; i++) {
- if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) {
- if (size)
- *size = sechdrs[i].sh_size;
- return (void *)ehdr + sechdrs[i].sh_offset;
- }
- }
- if (size)
- *size = 0;
- return NULL;
-}
-
-static Elf64_Sym * __init find_symbol64(struct lib64_elfinfo *lib,
- const char *symname)
-{
- unsigned int i;
- char name[MAX_SYMNAME], *c;
-
- for (i = 0; i < (lib->dynsymsize / sizeof(Elf64_Sym)); i++) {
- if (lib->dynsym[i].st_name == 0)
- continue;
- strlcpy(name, lib->dynstr + lib->dynsym[i].st_name,
- MAX_SYMNAME);
- c = strchr(name, '@');
- if (c)
- *c = 0;
- if (strcmp(symname, name) == 0)
- return &lib->dynsym[i];
- }
- return NULL;
-}
-
-/* Note that we assume the section is .text and the symbol is relative to
- * the library base
- */
-static unsigned long __init find_function64(struct lib64_elfinfo *lib,
- const char *symname)
-{
- Elf64_Sym *sym = find_symbol64(lib, symname);
-
- if (sym == NULL) {
- printk(KERN_WARNING "vDSO64: function %s not found !\n",
- symname);
- return 0;
- }
-#ifdef VDS64_HAS_DESCRIPTORS
- return *((u64 *)(vdso64_kbase + sym->st_value - VDSO64_LBASE)) -
- VDSO64_LBASE;
-#else
- return sym->st_value - VDSO64_LBASE;
-#endif
-}
-
-static int __init vdso_do_func_patch64(struct lib32_elfinfo *v32,
- struct lib64_elfinfo *v64,
- const char *orig, const char *fix)
-{
- Elf64_Sym *sym64_gen, *sym64_fix;
-
- sym64_gen = find_symbol64(v64, orig);
- if (sym64_gen == NULL) {
- printk(KERN_ERR "vDSO64: Can't find symbol %s !\n", orig);
- return -1;
- }
- if (fix == NULL) {
- sym64_gen->st_name = 0;
- return 0;
- }
- sym64_fix = find_symbol64(v64, fix);
- if (sym64_fix == NULL) {
- printk(KERN_ERR "vDSO64: Can't find symbol %s !\n", fix);
- return -1;
- }
- sym64_gen->st_value = sym64_fix->st_value;
- sym64_gen->st_size = sym64_fix->st_size;
- sym64_gen->st_info = sym64_fix->st_info;
- sym64_gen->st_other = sym64_fix->st_other;
- sym64_gen->st_shndx = sym64_fix->st_shndx;
-
- return 0;
-}
-
-#endif /* CONFIG_PPC64 */
-
-
-static __init int vdso_do_find_sections(struct lib32_elfinfo *v32,
- struct lib64_elfinfo *v64)
-{
- void *sect;
-
- /*
- * Locate symbol tables & text section
- */
-
- v32->dynsym = find_section32(v32->hdr, ".dynsym", &v32->dynsymsize);
- v32->dynstr = find_section32(v32->hdr, ".dynstr", NULL);
- if (v32->dynsym == NULL || v32->dynstr == NULL) {
- printk(KERN_ERR "vDSO32: required symbol section not found\n");
- return -1;
- }
- sect = find_section32(v32->hdr, ".text", NULL);
- if (sect == NULL) {
- printk(KERN_ERR "vDSO32: the .text section was not found\n");
- return -1;
+ vma = _install_special_mapping(mm, vdso_base + vvar_size, vdso_size,
+ VM_READ | VM_EXEC | VM_MAYREAD |
+ VM_MAYWRITE | VM_MAYEXEC, vdso_spec);
+ if (IS_ERR(vma)) {
+ do_munmap(mm, vdso_base, vvar_size, NULL);
+ return PTR_ERR(vma);
}
- v32->text = sect - vdso32_kbase;
-#ifdef CONFIG_PPC64
- v64->dynsym = find_section64(v64->hdr, ".dynsym", &v64->dynsymsize);
- v64->dynstr = find_section64(v64->hdr, ".dynstr", NULL);
- if (v64->dynsym == NULL || v64->dynstr == NULL) {
- printk(KERN_ERR "vDSO64: required symbol section not found\n");
- return -1;
- }
- sect = find_section64(v64->hdr, ".text", NULL);
- if (sect == NULL) {
- printk(KERN_ERR "vDSO64: the .text section was not found\n");
- return -1;
- }
- v64->text = sect - vdso64_kbase;
-#endif /* CONFIG_PPC64 */
+ // Now that the mappings are in place, set the mm VDSO pointer
+ mm->context.vdso = (void __user *)vdso_base + vvar_size;
return 0;
}
-static __init void vdso_setup_trampolines(struct lib32_elfinfo *v32,
- struct lib64_elfinfo *v64)
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
{
- /*
- * Find signal trampolines
- */
+ struct mm_struct *mm = current->mm;
+ int rc;
-#ifdef CONFIG_PPC64
- vdso64_rt_sigtramp = find_function64(v64, "__kernel_sigtramp_rt64");
-#endif
- vdso32_sigtramp = find_function32(v32, "__kernel_sigtramp32");
- vdso32_rt_sigtramp = find_function32(v32, "__kernel_sigtramp_rt32");
-}
+ mm->context.vdso = NULL;
-static __init int vdso_fixup_datapage(struct lib32_elfinfo *v32,
- struct lib64_elfinfo *v64)
-{
- Elf32_Sym *sym32;
-#ifdef CONFIG_PPC64
- Elf64_Sym *sym64;
-
- sym64 = find_symbol64(v64, "__kernel_datapage_offset");
- if (sym64 == NULL) {
- printk(KERN_ERR "vDSO64: Can't find symbol "
- "__kernel_datapage_offset !\n");
- return -1;
- }
- *((int *)(vdso64_kbase + sym64->st_value - VDSO64_LBASE)) =
- (vdso64_pages << PAGE_SHIFT) -
- (sym64->st_value - VDSO64_LBASE);
-#endif /* CONFIG_PPC64 */
+ if (mmap_write_lock_killable(mm))
+ return -EINTR;
- sym32 = find_symbol32(v32, "__kernel_datapage_offset");
- if (sym32 == NULL) {
- printk(KERN_ERR "vDSO32: Can't find symbol "
- "__kernel_datapage_offset !\n");
- return -1;
- }
- *((int *)(vdso32_kbase + (sym32->st_value - VDSO32_LBASE))) =
- (vdso32_pages << PAGE_SHIFT) -
- (sym32->st_value - VDSO32_LBASE);
+ rc = __arch_setup_additional_pages(bprm, uses_interp);
- return 0;
+ mmap_write_unlock(mm);
+ return rc;
}
+#define VDSO_DO_FIXUPS(type, value, bits, sec) do { \
+ void *__start = (void *)VDSO##bits##_SYMBOL(&vdso##bits##_start, sec##_start); \
+ void *__end = (void *)VDSO##bits##_SYMBOL(&vdso##bits##_start, sec##_end); \
+ \
+ do_##type##_fixups((value), __start, __end); \
+} while (0)
-static __init int vdso_fixup_features(struct lib32_elfinfo *v32,
- struct lib64_elfinfo *v64)
+static void __init vdso_fixup_features(void)
{
- void *start32;
- unsigned long size32;
-
#ifdef CONFIG_PPC64
- void *start64;
- unsigned long size64;
-
- start64 = find_section64(v64->hdr, "__ftr_fixup", &size64);
- if (start64)
- do_feature_fixups(cur_cpu_spec->cpu_features,
- start64, start64 + size64);
-
- start64 = find_section64(v64->hdr, "__mmu_ftr_fixup", &size64);
- if (start64)
- do_feature_fixups(cur_cpu_spec->mmu_features,
- start64, start64 + size64);
-
- start64 = find_section64(v64->hdr, "__fw_ftr_fixup", &size64);
- if (start64)
- do_feature_fixups(powerpc_firmware_features,
- start64, start64 + size64);
-
- start64 = find_section64(v64->hdr, "__lwsync_fixup", &size64);
- if (start64)
- do_lwsync_fixups(cur_cpu_spec->cpu_features,
- start64, start64 + size64);
+ VDSO_DO_FIXUPS(feature, cur_cpu_spec->cpu_features, 64, ftr_fixup);
+ VDSO_DO_FIXUPS(feature, cur_cpu_spec->mmu_features, 64, mmu_ftr_fixup);
+ VDSO_DO_FIXUPS(feature, powerpc_firmware_features, 64, fw_ftr_fixup);
+ VDSO_DO_FIXUPS(lwsync, cur_cpu_spec->cpu_features, 64, lwsync_fixup);
#endif /* CONFIG_PPC64 */
- start32 = find_section32(v32->hdr, "__ftr_fixup", &size32);
- if (start32)
- do_feature_fixups(cur_cpu_spec->cpu_features,
- start32, start32 + size32);
-
- start32 = find_section32(v32->hdr, "__mmu_ftr_fixup", &size32);
- if (start32)
- do_feature_fixups(cur_cpu_spec->mmu_features,
- start32, start32 + size32);
-
+#ifdef CONFIG_VDSO32
+ VDSO_DO_FIXUPS(feature, cur_cpu_spec->cpu_features, 32, ftr_fixup);
+ VDSO_DO_FIXUPS(feature, cur_cpu_spec->mmu_features, 32, mmu_ftr_fixup);
#ifdef CONFIG_PPC64
- start32 = find_section32(v32->hdr, "__fw_ftr_fixup", &size32);
- if (start32)
- do_feature_fixups(powerpc_firmware_features,
- start32, start32 + size32);
+ VDSO_DO_FIXUPS(feature, powerpc_firmware_features, 32, fw_ftr_fixup);
#endif /* CONFIG_PPC64 */
-
- start32 = find_section32(v32->hdr, "__lwsync_fixup", &size32);
- if (start32)
- do_lwsync_fixups(cur_cpu_spec->cpu_features,
- start32, start32 + size32);
-
- return 0;
-}
-
-static __init int vdso_fixup_alt_funcs(struct lib32_elfinfo *v32,
- struct lib64_elfinfo *v64)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(vdso_patches); i++) {
- struct vdso_patch_def *patch = &vdso_patches[i];
- int match = (cur_cpu_spec->cpu_features & patch->ftr_mask)
- == patch->ftr_value;
- if (!match)
- continue;
-
- DBG("replacing %s with %s...\n", patch->gen_name,
- patch->fix_name ? "NONE" : patch->fix_name);
-
- /*
- * Patch the 32 bits and 64 bits symbols. Note that we do not
- * patch the "." symbol on 64 bits.
- * It would be easy to do, but doesn't seem to be necessary,
- * patching the OPD symbol is enough.
- */
- vdso_do_func_patch32(v32, v64, patch->gen_name,
- patch->fix_name);
-#ifdef CONFIG_PPC64
- vdso_do_func_patch64(v32, v64, patch->gen_name,
- patch->fix_name);
-#endif /* CONFIG_PPC64 */
- }
-
- return 0;
-}
-
-
-static __init int vdso_setup(void)
-{
- struct lib32_elfinfo v32;
- struct lib64_elfinfo v64;
-
- v32.hdr = vdso32_kbase;
-#ifdef CONFIG_PPC64
- v64.hdr = vdso64_kbase;
+ VDSO_DO_FIXUPS(lwsync, cur_cpu_spec->cpu_features, 32, lwsync_fixup);
#endif
- if (vdso_do_find_sections(&v32, &v64))
- return -1;
-
- if (vdso_fixup_datapage(&v32, &v64))
- return -1;
-
- if (vdso_fixup_features(&v32, &v64))
- return -1;
-
- if (vdso_fixup_alt_funcs(&v32, &v64))
- return -1;
-
- vdso_setup_trampolines(&v32, &v64);
-
- return 0;
}
/*
@@ -689,23 +202,13 @@ static __init int vdso_setup(void)
static void __init vdso_setup_syscall_map(void)
{
unsigned int i;
- extern unsigned long *sys_call_table;
- extern unsigned long sys_ni_syscall;
-
- for (i = 0; i < __NR_syscalls; i++) {
-#ifdef CONFIG_PPC64
- if (sys_call_table[i*2] != sys_ni_syscall)
- vdso_data->syscall_map_64[i >> 5] |=
- 0x80000000UL >> (i & 0x1f);
- if (sys_call_table[i*2+1] != sys_ni_syscall)
- vdso_data->syscall_map_32[i >> 5] |=
- 0x80000000UL >> (i & 0x1f);
-#else /* CONFIG_PPC64 */
- if (sys_call_table[i] != sys_ni_syscall)
- vdso_data->syscall_map_32[i >> 5] |=
- 0x80000000UL >> (i & 0x1f);
-#endif /* CONFIG_PPC64 */
+ for (i = 0; i < NR_syscalls; i++) {
+ if (sys_call_table[i] != (void *)&sys_ni_syscall)
+ vdso_k_arch_data->syscall_map[i >> 5] |= 0x80000000UL >> (i & 0x1f);
+ if (IS_ENABLED(CONFIG_COMPAT) &&
+ compat_sys_call_table[i] != (void *)&sys_ni_syscall)
+ vdso_k_arch_data->compat_syscall_map[i >> 5] |= 0x80000000UL >> (i & 0x1f);
}
}
@@ -724,7 +227,7 @@ int vdso_getcpu_init(void)
node = cpu_to_node(cpu);
WARN_ON_ONCE(node > 0xffff);
- val = (cpu & 0xfff) | ((node & 0xffff) << 16);
+ val = (cpu & 0xffff) | ((node & 0xffff) << 16);
mtspr(SPRN_SPRG_VDSO_WRITE, val);
get_paca()->sprg_vdso = val;
@@ -736,106 +239,42 @@ int vdso_getcpu_init(void)
early_initcall(vdso_getcpu_init);
#endif
-static int __init vdso_init(void)
+static struct page ** __init vdso_setup_pages(void *start, void *end)
{
int i;
+ struct page **pagelist;
+ int pages = (end - start) >> PAGE_SHIFT;
-#ifdef CONFIG_PPC64
- /*
- * Fill up the "systemcfg" stuff for backward compatibility
- */
- strcpy((char *)vdso_data->eye_catcher, "SYSTEMCFG:PPC64");
- vdso_data->version.major = SYSTEMCFG_MAJOR;
- vdso_data->version.minor = SYSTEMCFG_MINOR;
- vdso_data->processor = mfspr(SPRN_PVR);
- /*
- * Fake the old platform number for pSeries and add
- * in LPAR bit if necessary
- */
- vdso_data->platform = 0x100;
- if (firmware_has_feature(FW_FEATURE_LPAR))
- vdso_data->platform |= 1;
- vdso_data->physicalMemorySize = memblock_phys_mem_size();
- vdso_data->dcache_size = ppc64_caches.dsize;
- vdso_data->dcache_line_size = ppc64_caches.dline_size;
- vdso_data->icache_size = ppc64_caches.isize;
- vdso_data->icache_line_size = ppc64_caches.iline_size;
-
- /* XXXOJN: Blocks should be added to ppc64_caches and used instead */
- vdso_data->dcache_block_size = ppc64_caches.dline_size;
- vdso_data->icache_block_size = ppc64_caches.iline_size;
- vdso_data->dcache_log_block_size = ppc64_caches.log_dline_size;
- vdso_data->icache_log_block_size = ppc64_caches.log_iline_size;
-
- /*
- * Calculate the size of the 64 bits vDSO
- */
- vdso64_pages = (&vdso64_end - &vdso64_start) >> PAGE_SHIFT;
- DBG("vdso64_kbase: %p, 0x%x pages\n", vdso64_kbase, vdso64_pages);
-#else
- vdso_data->dcache_block_size = L1_CACHE_BYTES;
- vdso_data->dcache_log_block_size = L1_CACHE_SHIFT;
- vdso_data->icache_block_size = L1_CACHE_BYTES;
- vdso_data->icache_log_block_size = L1_CACHE_SHIFT;
-#endif /* CONFIG_PPC64 */
+ pagelist = kcalloc(pages + 1, sizeof(struct page *), GFP_KERNEL);
+ if (!pagelist)
+ panic("%s: Cannot allocate page list for VDSO", __func__);
+ for (i = 0; i < pages; i++)
+ pagelist[i] = virt_to_page(start + i * PAGE_SIZE);
- /*
- * Calculate the size of the 32 bits vDSO
- */
- vdso32_pages = (&vdso32_end - &vdso32_start) >> PAGE_SHIFT;
- DBG("vdso32_kbase: %p, 0x%x pages\n", vdso32_kbase, vdso32_pages);
+ return pagelist;
+}
+static int __init vdso_init(void)
+{
+#ifdef CONFIG_PPC64
+ vdso_k_arch_data->dcache_block_size = ppc64_caches.l1d.block_size;
+ vdso_k_arch_data->icache_block_size = ppc64_caches.l1i.block_size;
+ vdso_k_arch_data->dcache_log_block_size = ppc64_caches.l1d.log_block_size;
+ vdso_k_arch_data->icache_log_block_size = ppc64_caches.l1i.log_block_size;
+#endif /* CONFIG_PPC64 */
- /*
- * Setup the syscall map in the vDOS
- */
vdso_setup_syscall_map();
- /*
- * Initialize the vDSO images in memory, that is do necessary
- * fixups of vDSO symbols, locate trampolines, etc...
- */
- if (vdso_setup()) {
- printk(KERN_ERR "vDSO setup failure, not enabled !\n");
- vdso32_pages = 0;
-#ifdef CONFIG_PPC64
- vdso64_pages = 0;
-#endif
- return 0;
- }
-
- /* Make sure pages are in the correct state */
- vdso32_pagelist = kzalloc(sizeof(struct page *) * (vdso32_pages + 2),
- GFP_KERNEL);
- BUG_ON(vdso32_pagelist == NULL);
- for (i = 0; i < vdso32_pages; i++) {
- struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE);
- ClearPageReserved(pg);
- get_page(pg);
- vdso32_pagelist[i] = pg;
- }
- vdso32_pagelist[i++] = virt_to_page(vdso_data);
- vdso32_pagelist[i] = NULL;
+ vdso_fixup_features();
-#ifdef CONFIG_PPC64
- vdso64_pagelist = kzalloc(sizeof(struct page *) * (vdso64_pages + 2),
- GFP_KERNEL);
- BUG_ON(vdso64_pagelist == NULL);
- for (i = 0; i < vdso64_pages; i++) {
- struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE);
- ClearPageReserved(pg);
- get_page(pg);
- vdso64_pagelist[i] = pg;
- }
- vdso64_pagelist[i++] = virt_to_page(vdso_data);
- vdso64_pagelist[i] = NULL;
-#endif /* CONFIG_PPC64 */
+ if (IS_ENABLED(CONFIG_VDSO32))
+ vdso32_spec.pages = vdso_setup_pages(&vdso32_start, &vdso32_end);
- get_page(virt_to_page(vdso_data));
+ if (IS_ENABLED(CONFIG_PPC64))
+ vdso64_spec.pages = vdso_setup_pages(&vdso64_start, &vdso64_end);
smp_wmb();
- vdso_ready = 1;
return 0;
}
diff --git a/arch/powerpc/kernel/vdso/.gitignore b/arch/powerpc/kernel/vdso/.gitignore
new file mode 100644
index 000000000000..dd9bdd67758b
--- /dev/null
+++ b/arch/powerpc/kernel/vdso/.gitignore
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0-only
+vdso32.lds
+vdso32.so.dbg
+vdso64.lds
+vdso64.so.dbg
diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile
new file mode 100644
index 000000000000..8834dfe9d727
--- /dev/null
+++ b/arch/powerpc/kernel/vdso/Makefile
@@ -0,0 +1,123 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# List of files in the vdso, has to be asm only for now
+
+# Include the generic Makefile to check the built vdso.
+include $(srctree)/lib/vdso/Makefile.include
+
+obj-vdso32 = sigtramp32-32.o gettimeofday-32.o datapage-32.o cacheflush-32.o note-32.o getcpu-32.o
+obj-vdso64 = sigtramp64-64.o gettimeofday-64.o datapage-64.o cacheflush-64.o note-64.o getcpu-64.o
+
+obj-vdso32 += getrandom-32.o vgetrandom-chacha-32.o
+obj-vdso64 += getrandom-64.o vgetrandom-chacha-64.o
+
+ifneq ($(c-gettimeofday-y),)
+ CFLAGS_vgettimeofday-32.o += -include $(c-gettimeofday-y)
+# Go prior to 1.16.x assumes r30 is not clobbered by any VDSO code. That used to be true
+# by accident when the VDSO was hand-written asm code, but may not be now that the VDSO is
+# compiler generated. To avoid breaking Go tell GCC not to use r30. Impact on code
+# generation is minimal, it will just use r29 instead.
+ CFLAGS_vgettimeofday-64.o += -include $(c-gettimeofday-y) $(call cc-option, -ffixed-r30)
+endif
+
+ifneq ($(c-getrandom-y),)
+ CFLAGS_vgetrandom-32.o += -include $(c-getrandom-y)
+ CFLAGS_vgetrandom-64.o += -include $(c-getrandom-y)
+endif
+
+# Build rules
+
+ifdef CROSS32_COMPILE
+ VDSOCC := $(CROSS32_COMPILE)gcc
+else
+ VDSOCC := $(CC)
+endif
+
+targets := $(obj-vdso32) vdso32.so.dbg vgettimeofday-32.o vgetrandom-32.o
+targets += crtsavres-32.o
+obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32))
+targets += $(obj-vdso64) vdso64.so.dbg vgettimeofday-64.o vgetrandom-64.o
+obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64))
+
+ccflags-y := -fno-common -fno-builtin -DBUILD_VDSO
+ccflags-y += $(DISABLE_LATENT_ENTROPY_PLUGIN)
+ccflags-y += $(call cc-option, -fno-stack-protector)
+ccflags-y += -DDISABLE_BRANCH_PROFILING
+ccflags-y += -ffreestanding -fasynchronous-unwind-tables
+ccflags-remove-y := $(CC_FLAGS_FTRACE)
+ldflags-y := -Wl,--hash-style=both -nostdlib -shared -z noexecstack $(CLANG_FLAGS)
+ldflags-$(CONFIG_LD_IS_LLD) += $(call cc-option,--ld-path=$(LD),-fuse-ld=lld)
+ldflags-$(CONFIG_LD_ORPHAN_WARN) += -Wl,--orphan-handling=$(CONFIG_LD_ORPHAN_WARN_LEVEL)
+
+# Filter flags that clang will warn are unused for linking
+ldflags-y += $(filter-out $(CC_AUTO_VAR_INIT_ZERO_ENABLER) $(CC_FLAGS_FTRACE) -Wa$(comma)%, $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS))
+
+CC32FLAGS := -m32
+CC32FLAGSREMOVE := -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc -mpcrel
+ifdef CONFIG_CC_IS_CLANG
+# This flag is supported by clang for 64-bit but not 32-bit so it will cause
+# an unused command line flag warning for this file.
+CC32FLAGSREMOVE += -fno-stack-clash-protection
+# -mstack-protector-guard values from the 64-bit build are not valid for the
+# 32-bit one. clang validates the values passed to these arguments during
+# parsing, even when -fno-stack-protector is passed afterwards.
+CC32FLAGSREMOVE += -mstack-protector-guard%
+endif
+LD32FLAGS := -Wl,-soname=linux-vdso32.so.1
+AS32FLAGS := -D__VDSO32__
+
+LD64FLAGS := -Wl,-soname=linux-vdso64.so.1
+AS64FLAGS := -D__VDSO64__
+
+targets += vdso32.lds
+CPPFLAGS_vdso32.lds += -P -C -Upowerpc
+targets += vdso64.lds
+CPPFLAGS_vdso64.lds += -P -C
+
+# link rule for the .so file, .lds has to be first
+$(obj)/vdso32.so.dbg: $(obj)/vdso32.lds $(obj-vdso32) $(obj)/vgettimeofday-32.o $(obj)/vgetrandom-32.o $(obj)/crtsavres-32.o FORCE
+ $(call if_changed,vdso32ld_and_check)
+$(obj)/vdso64.so.dbg: $(obj)/vdso64.lds $(obj-vdso64) $(obj)/vgettimeofday-64.o $(obj)/vgetrandom-64.o FORCE
+ $(call if_changed,vdso64ld_and_check)
+
+# assembly rules for the .S files
+$(obj-vdso32): %-32.o: %.S FORCE
+ $(call if_changed_dep,vdso32as)
+$(obj)/crtsavres-32.o: %-32.o: $(srctree)/arch/powerpc/lib/crtsavres.S FORCE
+ $(call if_changed_dep,vdso32as)
+$(obj)/vgettimeofday-32.o: %-32.o: %.c FORCE
+ $(call if_changed_dep,vdso32cc)
+$(obj)/vgetrandom-32.o: %-32.o: %.c FORCE
+ $(call if_changed_dep,vdso32cc)
+$(obj-vdso64): %-64.o: %.S FORCE
+ $(call if_changed_dep,vdso64as)
+$(obj)/vgettimeofday-64.o: %-64.o: %.c FORCE
+ $(call if_changed_dep,cc_o_c)
+$(obj)/vgetrandom-64.o: %-64.o: %.c FORCE
+ $(call if_changed_dep,cc_o_c)
+
+# Generate VDSO offsets using helper script
+gen-vdso32sym := $(src)/gen_vdso32_offsets.sh
+quiet_cmd_vdso32sym = VDSO32SYM $@
+ cmd_vdso32sym = $(NM) $< | $(gen-vdso32sym) | LC_ALL=C sort > $@
+gen-vdso64sym := $(src)/gen_vdso64_offsets.sh
+quiet_cmd_vdso64sym = VDSO64SYM $@
+ cmd_vdso64sym = $(NM) $< | $(gen-vdso64sym) | LC_ALL=C sort > $@
+
+include/generated/vdso32-offsets.h: $(obj)/vdso32.so.dbg FORCE
+ $(call if_changed,vdso32sym)
+include/generated/vdso64-offsets.h: $(obj)/vdso64.so.dbg FORCE
+ $(call if_changed,vdso64sym)
+
+# actual build commands
+quiet_cmd_vdso32ld_and_check = VDSO32L $@
+ cmd_vdso32ld_and_check = $(VDSOCC) $(ldflags-y) $(CC32FLAGS) $(LD32FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^); $(cmd_vdso_check)
+quiet_cmd_vdso32as = VDSO32A $@
+ cmd_vdso32as = $(VDSOCC) $(a_flags) $(CC32FLAGS) $(AS32FLAGS) -c -o $@ $<
+quiet_cmd_vdso32cc = VDSO32C $@
+ cmd_vdso32cc = $(VDSOCC) $(filter-out $(CC32FLAGSREMOVE), $(c_flags)) $(CC32FLAGS) -c -o $@ $<
+
+quiet_cmd_vdso64ld_and_check = VDSO64L $@
+ cmd_vdso64ld_and_check = $(VDSOCC) $(ldflags-y) $(LD64FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^); $(cmd_vdso_check)
+quiet_cmd_vdso64as = VDSO64A $@
+ cmd_vdso64as = $(VDSOCC) $(a_flags) $(AS64FLAGS) -c -o $@ $<
diff --git a/arch/powerpc/kernel/vdso32/cacheflush.S b/arch/powerpc/kernel/vdso/cacheflush.S
index 1ba6feb71b31..488d3ade11e6 100644
--- a/arch/powerpc/kernel/vdso32/cacheflush.S
+++ b/arch/powerpc/kernel/vdso/cacheflush.S
@@ -1,18 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* vDSO provided cache flush routines
*
* Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org),
* IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <asm/processor.h>
#include <asm/ppc_asm.h>
#include <asm/vdso.h>
+#include <asm/vdso_datapage.h>
#include <asm/asm-offsets.h>
+#include <asm/cache.h>
.text
@@ -26,60 +24,76 @@
*/
V_FUNCTION_BEGIN(__kernel_sync_dicache)
.cfi_startproc
+BEGIN_FTR_SECTION
+ b 3f
+END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
+#ifdef CONFIG_PPC64
mflr r12
.cfi_register lr,r12
- mr r11,r3
- bl __get_datapage@local
+ get_datapage r10 vdso_u_arch_data
mtlr r12
- mr r10,r3
+ .cfi_restore lr
+#endif
+#ifdef CONFIG_PPC64
lwz r7,CFG_DCACHE_BLOCKSZ(r10)
addi r5,r7,-1
- andc r6,r11,r5 /* round low to line bdy */
+#else
+ li r5, L1_CACHE_BYTES - 1
+#endif
+ andc r6,r3,r5 /* round low to line bdy */
subf r8,r6,r4 /* compute length */
add r8,r8,r5 /* ensure we get enough */
+#ifdef CONFIG_PPC64
lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10)
- srw. r8,r8,r9 /* compute line count */
+ PPC_SRL. r8,r8,r9 /* compute line count */
+#else
+ srwi. r8, r8, L1_CACHE_SHIFT
+ mr r7, r6
+#endif
crclr cr0*4+so
beqlr /* nothing to do? */
mtctr r8
1: dcbst 0,r6
+#ifdef CONFIG_PPC64
add r6,r6,r7
+#else
+ addi r6, r6, L1_CACHE_BYTES
+#endif
bdnz 1b
sync
/* Now invalidate the instruction cache */
+#ifdef CONFIG_PPC64
lwz r7,CFG_ICACHE_BLOCKSZ(r10)
addi r5,r7,-1
- andc r6,r11,r5 /* round low to line bdy */
+ andc r6,r3,r5 /* round low to line bdy */
subf r8,r6,r4 /* compute length */
add r8,r8,r5
lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10)
- srw. r8,r8,r9 /* compute line count */
+ PPC_SRL. r8,r8,r9 /* compute line count */
crclr cr0*4+so
beqlr /* nothing to do? */
+#endif
mtctr r8
+#ifdef CONFIG_PPC64
2: icbi 0,r6
add r6,r6,r7
+#else
+2: icbi 0, r7
+ addi r7, r7, L1_CACHE_BYTES
+#endif
bdnz 2b
isync
li r3,0
blr
- .cfi_endproc
-V_FUNCTION_END(__kernel_sync_dicache)
-
-
-/*
- * POWER5 version of __kernel_sync_dicache
- */
-V_FUNCTION_BEGIN(__kernel_sync_dicache_p5)
- .cfi_startproc
+3:
crclr cr0*4+so
sync
+ icbi 0,r1
isync
li r3,0
blr
.cfi_endproc
-V_FUNCTION_END(__kernel_sync_dicache_p5)
-
+V_FUNCTION_END(__kernel_sync_dicache)
diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso/datapage.S
index dc21e891d2e7..d23b2e8e2a34 100644
--- a/arch/powerpc/kernel/vdso32/datapage.S
+++ b/arch/powerpc/kernel/vdso/datapage.S
@@ -1,12 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Access to the shared data page by the vDSO & syscall map
*
* Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <asm/processor.h>
@@ -14,31 +10,9 @@
#include <asm/asm-offsets.h>
#include <asm/unistd.h>
#include <asm/vdso.h>
+#include <asm/vdso_datapage.h>
.text
-V_FUNCTION_BEGIN(__get_datapage)
- .cfi_startproc
- /* We don't want that exposed or overridable as we want other objects
- * to be able to bl directly to here
- */
- .protected __get_datapage
- .hidden __get_datapage
-
- mflr r0
- .cfi_register lr,r0
-
- bcl 20,31,1f
- .global __kernel_datapage_offset;
-__kernel_datapage_offset:
- .long 0
-1:
- mflr r3
- mtlr r0
- lwz r0,0(r3)
- add r3,r0,r3
- blr
- .cfi_endproc
-V_FUNCTION_END(__get_datapage)
/*
* void *__kernel_get_syscall_map(unsigned int *syscall_count) ;
@@ -53,15 +27,18 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map)
.cfi_startproc
mflr r12
.cfi_register lr,r12
- mr r4,r3
- bl __get_datapage@local
+ mr. r4,r3
+ get_datapage r3 vdso_u_arch_data
mtlr r12
+#ifdef __powerpc64__
+ addi r3,r3,CFG_SYSCALL_MAP64
+#else
addi r3,r3,CFG_SYSCALL_MAP32
- cmpli cr0,r4,0
+#endif
+ crclr cr0*4+so
beqlr
- li r0,__NR_syscalls
+ li r0,NR_syscalls
stw r0,0(r4)
- crclr cr0*4+so
blr
.cfi_endproc
V_FUNCTION_END(__kernel_get_syscall_map)
@@ -75,9 +52,11 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq)
.cfi_startproc
mflr r12
.cfi_register lr,r12
- bl __get_datapage@local
+ get_datapage r3 vdso_u_arch_data
+#ifndef __powerpc64__
lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3)
- lwz r3,CFG_TB_TICKS_PER_SEC(r3)
+#endif
+ PPC_LL r3,CFG_TB_TICKS_PER_SEC(r3)
mtlr r12
crclr cr0*4+so
blr
diff --git a/arch/powerpc/kernel/vdso/gen_vdso32_offsets.sh b/arch/powerpc/kernel/vdso/gen_vdso32_offsets.sh
new file mode 100755
index 000000000000..c7b54a5dcd3e
--- /dev/null
+++ b/arch/powerpc/kernel/vdso/gen_vdso32_offsets.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+#
+# Match symbols in the DSO that look like VDSO_*; produce a header file
+# of constant offsets into the shared object.
+#
+# Doing this inside the Makefile will break the $(filter-out) function,
+# causing Kbuild to rebuild the vdso-offsets header file every time.
+#
+# Author: Will Deacon <will.deacon@arm.com
+#
+
+LC_ALL=C
+sed -n -e 's/^00*/0/' -e \
+'s/^\([0-9a-fA-F]*\) . VDSO_\([a-zA-Z0-9_]*\)$/\#define vdso32_offset_\2\t0x\1/p'
diff --git a/arch/powerpc/kernel/vdso/gen_vdso64_offsets.sh b/arch/powerpc/kernel/vdso/gen_vdso64_offsets.sh
new file mode 100755
index 000000000000..4bf15ffd5933
--- /dev/null
+++ b/arch/powerpc/kernel/vdso/gen_vdso64_offsets.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+#
+# Match symbols in the DSO that look like VDSO_*; produce a header file
+# of constant offsets into the shared object.
+#
+# Doing this inside the Makefile will break the $(filter-out) function,
+# causing Kbuild to rebuild the vdso-offsets header file every time.
+#
+# Author: Will Deacon <will.deacon@arm.com
+#
+
+LC_ALL=C
+sed -n -e 's/^00*/0/' -e \
+'s/^\([0-9a-fA-F]*\) . VDSO_\([a-zA-Z0-9_]*\)$/\#define vdso64_offset_\2\t0x\1/p'
diff --git a/arch/powerpc/kernel/vdso/getcpu.S b/arch/powerpc/kernel/vdso/getcpu.S
new file mode 100644
index 000000000000..8e08ccf19062
--- /dev/null
+++ b/arch/powerpc/kernel/vdso/getcpu.S
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ *
+ * Copyright (C) IBM Corporation, 2012
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ */
+#include <asm/ppc_asm.h>
+#include <asm/vdso.h>
+
+ .text
+/*
+ * Exact prototype of getcpu
+ *
+ * int __kernel_getcpu(unsigned *cpu, unsigned *node);
+ *
+ */
+#if defined(CONFIG_PPC64)
+V_FUNCTION_BEGIN(__kernel_getcpu)
+ .cfi_startproc
+ mfspr r5,SPRN_SPRG_VDSO_READ
+ PPC_LCMPI cr0,r3,0
+ PPC_LCMPI cr1,r4,0
+ clrlwi r6,r5,16
+ rlwinm r7,r5,16,31-15,31-0
+ beq cr0,1f
+ stw r6,0(r3)
+1: crclr cr0*4+so
+ li r3,0 /* always success */
+ beqlr cr1
+ stw r7,0(r4)
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_getcpu)
+#elif !defined(CONFIG_SMP)
+V_FUNCTION_BEGIN(__kernel_getcpu)
+ .cfi_startproc
+ cmpwi cr0, r3, 0
+ cmpwi cr1, r4, 0
+ li r5, 0
+ beq cr0, 1f
+ stw r5, 0(r3)
+1: li r3, 0 /* always success */
+ crclr cr0*4+so
+ beqlr cr1
+ stw r5, 0(r4)
+ blr
+ .cfi_endproc
+V_FUNCTION_END(__kernel_getcpu)
+#endif
diff --git a/arch/powerpc/kernel/vdso/getrandom.S b/arch/powerpc/kernel/vdso/getrandom.S
new file mode 100644
index 000000000000..a80d9fb436f7
--- /dev/null
+++ b/arch/powerpc/kernel/vdso/getrandom.S
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Userland implementation of getrandom() for processes
+ * for use in the vDSO
+ *
+ * Copyright (C) 2024 Christophe Leroy <christophe.leroy@csgroup.eu>, CS GROUP France
+ */
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/vdso.h>
+#include <asm/vdso_datapage.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+
+/*
+ * The macro sets two stack frames, one for the caller and one for the callee
+ * because there are no requirement for the caller to set a stack frame when
+ * calling VDSO so it may have omitted to set one, especially on PPC64
+ */
+
+.macro cvdso_call funct
+ .cfi_startproc
+ PPC_STLU r1, -PPC_MIN_STKFRM(r1)
+ .cfi_adjust_cfa_offset PPC_MIN_STKFRM
+ mflr r0
+ PPC_STLU r1, -PPC_MIN_STKFRM(r1)
+ .cfi_adjust_cfa_offset PPC_MIN_STKFRM
+ PPC_STL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1)
+ .cfi_rel_offset lr, PPC_MIN_STKFRM + PPC_LR_STKOFF
+#ifdef __powerpc64__
+ PPC_STL r2, PPC_MIN_STKFRM + STK_GOT(r1)
+ .cfi_rel_offset r2, PPC_MIN_STKFRM + STK_GOT
+#endif
+ bl CFUNC(DOTSYM(\funct))
+ PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1)
+#ifdef __powerpc64__
+ PPC_LL r2, PPC_MIN_STKFRM + STK_GOT(r1)
+ .cfi_restore r2
+#endif
+ cmpwi r3, 0
+ mtlr r0
+ addi r1, r1, 2 * PPC_MIN_STKFRM
+ .cfi_restore lr
+ .cfi_def_cfa_offset 0
+ crclr so
+ bgelr+
+ crset so
+ neg r3, r3
+ blr
+ .cfi_endproc
+.endm
+
+ .text
+V_FUNCTION_BEGIN(__kernel_getrandom)
+ cvdso_call __c_kernel_getrandom
+V_FUNCTION_END(__kernel_getrandom)
diff --git a/arch/powerpc/kernel/vdso/gettimeofday.S b/arch/powerpc/kernel/vdso/gettimeofday.S
new file mode 100644
index 000000000000..79c967212444
--- /dev/null
+++ b/arch/powerpc/kernel/vdso/gettimeofday.S
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Userland implementation of gettimeofday() for processes
+ * for use in the vDSO
+ *
+ * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org,
+ * IBM Corp.
+ */
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/vdso.h>
+#include <asm/vdso_datapage.h>
+#include <asm/asm-offsets.h>
+#include <asm/unistd.h>
+
+/*
+ * The macro sets two stack frames, one for the caller and one for the callee
+ * because there are no requirement for the caller to set a stack frame when
+ * calling VDSO so it may have omitted to set one, especially on PPC64
+ */
+
+.macro cvdso_call funct call_time=0
+ .cfi_startproc
+ PPC_STLU r1, -PPC_MIN_STKFRM(r1)
+ .cfi_adjust_cfa_offset PPC_MIN_STKFRM
+ mflr r0
+ PPC_STLU r1, -PPC_MIN_STKFRM(r1)
+ .cfi_adjust_cfa_offset PPC_MIN_STKFRM
+ PPC_STL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1)
+ .cfi_rel_offset lr, PPC_MIN_STKFRM + PPC_LR_STKOFF
+#ifdef __powerpc64__
+ PPC_STL r2, PPC_MIN_STKFRM + STK_GOT(r1)
+ .cfi_rel_offset r2, PPC_MIN_STKFRM + STK_GOT
+#endif
+ .ifeq \call_time
+ get_datapage r5 vdso_u_time_data
+ .else
+ get_datapage r4 vdso_u_time_data
+ .endif
+ bl CFUNC(DOTSYM(\funct))
+ PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1)
+#ifdef __powerpc64__
+ PPC_LL r2, PPC_MIN_STKFRM + STK_GOT(r1)
+ .cfi_restore r2
+#endif
+ .ifeq \call_time
+ cmpwi r3, 0
+ .endif
+ mtlr r0
+ addi r1, r1, 2 * PPC_MIN_STKFRM
+ .cfi_restore lr
+ .cfi_def_cfa_offset 0
+ crclr so
+ .ifeq \call_time
+ beqlr+
+ crset so
+ neg r3, r3
+ .endif
+ blr
+ .cfi_endproc
+.endm
+
+ .text
+/*
+ * Exact prototype of gettimeofday
+ *
+ * int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz);
+ *
+ */
+V_FUNCTION_BEGIN(__kernel_gettimeofday)
+ cvdso_call __c_kernel_gettimeofday
+V_FUNCTION_END(__kernel_gettimeofday)
+
+/*
+ * Exact prototype of clock_gettime()
+ *
+ * int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp);
+ *
+ */
+V_FUNCTION_BEGIN(__kernel_clock_gettime)
+ cvdso_call __c_kernel_clock_gettime
+V_FUNCTION_END(__kernel_clock_gettime)
+
+/*
+ * Exact prototype of clock_gettime64()
+ *
+ * int __kernel_clock_gettime64(clockid_t clock_id, struct __timespec64 *ts);
+ *
+ */
+#ifndef __powerpc64__
+V_FUNCTION_BEGIN(__kernel_clock_gettime64)
+ cvdso_call __c_kernel_clock_gettime64
+V_FUNCTION_END(__kernel_clock_gettime64)
+#endif
+
+/*
+ * Exact prototype of clock_getres()
+ *
+ * int __kernel_clock_getres(clockid_t clock_id, struct timespec *res);
+ *
+ */
+V_FUNCTION_BEGIN(__kernel_clock_getres)
+ cvdso_call __c_kernel_clock_getres
+V_FUNCTION_END(__kernel_clock_getres)
+
+
+/*
+ * Exact prototype of time()
+ *
+ * time_t time(time *t);
+ *
+ */
+V_FUNCTION_BEGIN(__kernel_time)
+ cvdso_call __c_kernel_time call_time=1
+V_FUNCTION_END(__kernel_time)
diff --git a/arch/powerpc/kernel/vdso32/note.S b/arch/powerpc/kernel/vdso/note.S
index d4b5be4f3d5f..227a7327399e 100644
--- a/arch/powerpc/kernel/vdso32/note.S
+++ b/arch/powerpc/kernel/vdso/note.S
@@ -5,6 +5,7 @@
#include <linux/uts.h>
#include <linux/version.h>
+#include <linux/build-salt.h>
#define ASM_ELF_NOTE_BEGIN(name, flags, vendor, type) \
.section name, flags; \
@@ -23,3 +24,5 @@
ASM_ELF_NOTE_BEGIN(".note.kernel-version", "a", UTS_SYSNAME, 0)
.long LINUX_VERSION_CODE
ASM_ELF_NOTE_END
+
+BUILD_SALT
diff --git a/arch/powerpc/kernel/vdso32/sigtramp.S b/arch/powerpc/kernel/vdso/sigtramp32.S
index cf0c9c9c24f9..0bcc5e5fe789 100644
--- a/arch/powerpc/kernel/vdso32/sigtramp.S
+++ b/arch/powerpc/kernel/vdso/sigtramp32.S
@@ -1,14 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Signal trampolines for 32 bits processes in a ppc64 kernel for
* use in the vDSO
*
* Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp.
* Copyright (C) 2004 Alan Modra (amodra@au.ibm.com)), IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <asm/processor.h>
#include <asm/ppc_asm.h>
diff --git a/arch/powerpc/kernel/vdso64/sigtramp.S b/arch/powerpc/kernel/vdso/sigtramp64.S
index 542c6f422e4d..2d4067561293 100644
--- a/arch/powerpc/kernel/vdso64/sigtramp.S
+++ b/arch/powerpc/kernel/vdso/sigtramp64.S
@@ -1,15 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Signal trampoline for 64 bits processes in a ppc64 kernel for
* use in the vDSO
*
* Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp.
* Copyright (C) 2004 Alan Modra (amodra@au.ibm.com)), IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
+#include <asm/cache.h> /* IFETCH_ALIGN_BYTES */
#include <asm/processor.h>
#include <asm/ppc_asm.h>
#include <asm/unistd.h>
@@ -18,21 +15,26 @@
.text
-/* The nop here is a hack. The dwarf2 unwind routines subtract 1 from
- the return address to get an address in the middle of the presumed
- call instruction. Since we don't have a call here, we artificially
- extend the range covered by the unwind info by padding before the
- real start. */
- nop
+/*
+ * __kernel_start_sigtramp_rt64 and __kernel_sigtramp_rt64 together
+ * are one function split in two parts. The kernel jumps to the former
+ * and the signal handler indirectly (by blr) returns to the latter.
+ * __kernel_sigtramp_rt64 needs to point to the return address so
+ * glibc can correctly identify the trampoline stack frame.
+ */
.balign 8
+ .balign IFETCH_ALIGN_BYTES
+V_FUNCTION_BEGIN(__kernel_start_sigtramp_rt64)
+.Lsigrt_start:
+ bctrl /* call the handler */
+V_FUNCTION_END(__kernel_start_sigtramp_rt64)
V_FUNCTION_BEGIN(__kernel_sigtramp_rt64)
-.Lsigrt_start = . - 4
addi r1, r1, __SIGNAL_FRAMESIZE
li r0,__NR_rt_sigreturn
sc
.Lsigrt_end:
V_FUNCTION_END(__kernel_sigtramp_rt64)
-/* The ".balign 8" above and the following zeros mimic the old stack
+/* The .balign 8 above and the following zeros mimic the old stack
trampoline layout. The last magic value is the ucontext pointer,
chosen in such a way that older libgcc unwind code returns a zero
for a sigcontext pointer. */
diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso/vdso32.lds.S
index e58ee10fa5c0..72a1012b8a20 100644
--- a/arch/powerpc/kernel/vdso32/vdso32.lds.S
+++ b/arch/powerpc/kernel/vdso/vdso32.lds.S
@@ -1,8 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* This is the infamous ld script for the 32 bits vdso
* library
*/
#include <asm/vdso.h>
+#include <asm/page.h>
+#include <asm-generic/vmlinux.lds.h>
+#include <vdso/datapage.h>
#ifdef __LITTLE_ENDIAN__
OUTPUT_FORMAT("elf32-powerpcle", "elf32-powerpcle", "elf32-powerpcle")
@@ -10,11 +14,12 @@ OUTPUT_FORMAT("elf32-powerpcle", "elf32-powerpcle", "elf32-powerpcle")
OUTPUT_FORMAT("elf32-powerpc", "elf32-powerpc", "elf32-powerpc")
#endif
OUTPUT_ARCH(powerpc:common)
-ENTRY(_start)
SECTIONS
{
- . = VDSO32_LBASE + SIZEOF_HEADERS;
+ VDSO_VVAR_SYMS
+
+ . = SIZEOF_HEADERS;
.hash : { *(.hash) } :text
.gnu.hash : { *(.gnu.hash) }
@@ -35,17 +40,25 @@ SECTIONS
PROVIDE(etext = .);
. = ALIGN(8);
+ VDSO_ftr_fixup_start = .;
__ftr_fixup : { *(__ftr_fixup) }
+ VDSO_ftr_fixup_end = .;
. = ALIGN(8);
+ VDSO_mmu_ftr_fixup_start = .;
__mmu_ftr_fixup : { *(__mmu_ftr_fixup) }
+ VDSO_mmu_ftr_fixup_end = .;
. = ALIGN(8);
+ VDSO_lwsync_fixup_start = .;
__lwsync_fixup : { *(__lwsync_fixup) }
+ VDSO_lwsync_fixup_end = .;
#ifdef CONFIG_PPC64
. = ALIGN(8);
+ VDSO_fw_ftr_fixup_start = .;
__fw_ftr_fixup : { *(__fw_ftr_fixup) }
+ VDSO_fw_ftr_fixup_end = .;
#endif
/*
@@ -63,53 +76,22 @@ SECTIONS
.got : { *(.got) } :text
.plt : { *(.plt) }
+ .rela.dyn : { *(.rela .rela*) }
+
_end = .;
__end = .;
PROVIDE(end = .);
- /*
- * Stabs debugging sections are here too.
- */
- .stab 0 : { *(.stab) }
- .stabstr 0 : { *(.stabstr) }
- .stab.excl 0 : { *(.stab.excl) }
- .stab.exclstr 0 : { *(.stab.exclstr) }
- .stab.index 0 : { *(.stab.index) }
- .stab.indexstr 0 : { *(.stab.indexstr) }
- .comment 0 : { *(.comment) }
-
- /*
- * DWARF debug sections.
- * Symbols in the DWARF debugging sections are relative to the beginning
- * of the section so we begin them at 0.
- */
- /* DWARF 1 */
- .debug 0 : { *(.debug) }
- .line 0 : { *(.line) }
- /* GNU DWARF 1 extensions */
- .debug_srcinfo 0 : { *(.debug_srcinfo) }
- .debug_sfnames 0 : { *(.debug_sfnames) }
- /* DWARF 1.1 and DWARF 2 */
- .debug_aranges 0 : { *(.debug_aranges) }
- .debug_pubnames 0 : { *(.debug_pubnames) }
- /* DWARF 2 */
- .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
- .debug_abbrev 0 : { *(.debug_abbrev) }
- .debug_line 0 : { *(.debug_line) }
- .debug_frame 0 : { *(.debug_frame) }
- .debug_str 0 : { *(.debug_str) }
- .debug_loc 0 : { *(.debug_loc) }
- .debug_macinfo 0 : { *(.debug_macinfo) }
- /* SGI/MIPS DWARF 2 extensions */
- .debug_weaknames 0 : { *(.debug_weaknames) }
- .debug_funcnames 0 : { *(.debug_funcnames) }
- .debug_typenames 0 : { *(.debug_typenames) }
- .debug_varnames 0 : { *(.debug_varnames) }
+ DWARF_DEBUG
+ ELF_DETAILS
/DISCARD/ : {
*(.note.GNU-stack)
+ *(*.EMB.apuinfo)
+ *(.branch_lt)
*(.data .data.* .gnu.linkonce.d.* .sdata*)
*(.bss .sbss .dynbss .dynsbss)
+ *(.got1 .glink .iplt)
}
}
@@ -137,25 +119,27 @@ VERSION
{
VDSO_VERSION_STRING {
global:
- /*
- * Has to be there for the kernel to find
- */
- __kernel_datapage_offset;
-
__kernel_get_syscall_map;
__kernel_gettimeofday;
__kernel_clock_gettime;
+ __kernel_clock_gettime64;
__kernel_clock_getres;
+ __kernel_time;
__kernel_get_tbfreq;
__kernel_sync_dicache;
- __kernel_sync_dicache_p5;
__kernel_sigtramp32;
__kernel_sigtramp_rt32;
-#ifdef CONFIG_PPC64
+#if defined(CONFIG_PPC64) || !defined(CONFIG_SMP)
__kernel_getcpu;
#endif
- __kernel_time;
+ __kernel_getrandom;
local: *;
};
}
+
+/*
+ * Make the sigreturn code visible to the kernel.
+ */
+VDSO_sigtramp32 = __kernel_sigtramp32;
+VDSO_sigtramp_rt32 = __kernel_sigtramp_rt32;
diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso/vdso64.lds.S
index 64fb183a47c2..32102a05eaa7 100644
--- a/arch/powerpc/kernel/vdso64/vdso64.lds.S
+++ b/arch/powerpc/kernel/vdso/vdso64.lds.S
@@ -1,8 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* This is the infamous ld script for the 64 bits vdso
* library
*/
#include <asm/vdso.h>
+#include <asm/page.h>
+#include <asm-generic/vmlinux.lds.h>
+#include <vdso/datapage.h>
#ifdef __LITTLE_ENDIAN__
OUTPUT_FORMAT("elf64-powerpcle", "elf64-powerpcle", "elf64-powerpcle")
@@ -10,11 +14,12 @@ OUTPUT_FORMAT("elf64-powerpcle", "elf64-powerpcle", "elf64-powerpcle")
OUTPUT_FORMAT("elf64-powerpc", "elf64-powerpc", "elf64-powerpc")
#endif
OUTPUT_ARCH(powerpc:common64)
-ENTRY(_start)
SECTIONS
{
- . = VDSO64_LBASE + SIZEOF_HEADERS;
+ VDSO_VVAR_SYMS
+
+ . = SIZEOF_HEADERS;
.hash : { *(.hash) } :text
.gnu.hash : { *(.gnu.hash) }
@@ -29,23 +34,31 @@ SECTIONS
. = ALIGN(16);
.text : {
*(.text .stub .text.* .gnu.linkonce.t.* __ftr_alt_*)
- *(.sfpr .glink)
+ *(.sfpr)
} :text
PROVIDE(__etext = .);
PROVIDE(_etext = .);
PROVIDE(etext = .);
. = ALIGN(8);
+ VDSO_ftr_fixup_start = .;
__ftr_fixup : { *(__ftr_fixup) }
+ VDSO_ftr_fixup_end = .;
. = ALIGN(8);
+ VDSO_mmu_ftr_fixup_start = .;
__mmu_ftr_fixup : { *(__mmu_ftr_fixup) }
+ VDSO_mmu_ftr_fixup_end = .;
. = ALIGN(8);
+ VDSO_lwsync_fixup_start = .;
__lwsync_fixup : { *(__lwsync_fixup) }
+ VDSO_lwsync_fixup_end = .;
. = ALIGN(8);
+ VDSO_fw_ftr_fixup_start = .;
__fw_ftr_fixup : { *(__fw_ftr_fixup) }
+ VDSO_fw_ftr_fixup_end = .;
/*
* Other stuff is appended to the text segment:
@@ -58,58 +71,24 @@ SECTIONS
.eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
.eh_frame : { KEEP (*(.eh_frame)) } :text
.gcc_except_table : { *(.gcc_except_table) }
- .rela.dyn ALIGN(8) : { *(.rela.dyn) }
+ .rela.dyn ALIGN(8) : { *(.rela .rela*) }
- .opd ALIGN(8) : { KEEP (*(.opd)) }
.got ALIGN(8) : { *(.got .toc) }
_end = .;
PROVIDE(end = .);
- /*
- * Stabs debugging sections are here too.
- */
- .stab 0 : { *(.stab) }
- .stabstr 0 : { *(.stabstr) }
- .stab.excl 0 : { *(.stab.excl) }
- .stab.exclstr 0 : { *(.stab.exclstr) }
- .stab.index 0 : { *(.stab.index) }
- .stab.indexstr 0 : { *(.stab.indexstr) }
- .comment 0 : { *(.comment) }
-
- /*
- * DWARF debug sections.
- * Symbols in the DWARF debugging sections are relative to the beginning
- * of the section so we begin them at 0.
- */
- /* DWARF 1 */
- .debug 0 : { *(.debug) }
- .line 0 : { *(.line) }
- /* GNU DWARF 1 extensions */
- .debug_srcinfo 0 : { *(.debug_srcinfo) }
- .debug_sfnames 0 : { *(.debug_sfnames) }
- /* DWARF 1.1 and DWARF 2 */
- .debug_aranges 0 : { *(.debug_aranges) }
- .debug_pubnames 0 : { *(.debug_pubnames) }
- /* DWARF 2 */
- .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
- .debug_abbrev 0 : { *(.debug_abbrev) }
- .debug_line 0 : { *(.debug_line) }
- .debug_frame 0 : { *(.debug_frame) }
- .debug_str 0 : { *(.debug_str) }
- .debug_loc 0 : { *(.debug_loc) }
- .debug_macinfo 0 : { *(.debug_macinfo) }
- /* SGI/MIPS DWARF 2 extensions */
- .debug_weaknames 0 : { *(.debug_weaknames) }
- .debug_funcnames 0 : { *(.debug_funcnames) }
- .debug_typenames 0 : { *(.debug_typenames) }
- .debug_varnames 0 : { *(.debug_varnames) }
+ DWARF_DEBUG
+ ELF_DETAILS
/DISCARD/ : {
*(.note.GNU-stack)
+ *(*.EMB.apuinfo)
*(.branch_lt)
*(.data .data.* .gnu.linkonce.d.* .sdata*)
*(.bss .sbss .dynbss .dynsbss)
+ *(.opd)
+ *(.glink .iplt .plt)
}
}
@@ -137,22 +116,22 @@ VERSION
{
VDSO_VERSION_STRING {
global:
- /*
- * Has to be there for the kernel to find
- */
- __kernel_datapage_offset;
-
__kernel_get_syscall_map;
__kernel_gettimeofday;
__kernel_clock_gettime;
__kernel_clock_getres;
__kernel_get_tbfreq;
__kernel_sync_dicache;
- __kernel_sync_dicache_p5;
__kernel_sigtramp_rt64;
__kernel_getcpu;
__kernel_time;
+ __kernel_getrandom;
local: *;
};
}
+
+/*
+ * Make the sigreturn code visible to the kernel.
+ */
+VDSO_sigtramp_rt64 = __kernel_start_sigtramp_rt64;
diff --git a/arch/powerpc/kernel/vdso/vgetrandom-chacha.S b/arch/powerpc/kernel/vdso/vgetrandom-chacha.S
new file mode 100644
index 000000000000..7f9061a9e8b4
--- /dev/null
+++ b/arch/powerpc/kernel/vdso/vgetrandom-chacha.S
@@ -0,0 +1,365 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2024 Christophe Leroy <christophe.leroy@csgroup.eu>, CS GROUP France
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/ppc_asm.h>
+
+#define dst_bytes r3
+#define key r4
+#define counter r5
+#define nblocks r6
+
+#define idx_r0 r0
+#define val4 r4
+
+#define const0 0x61707865
+#define const1 0x3320646e
+#define const2 0x79622d32
+#define const3 0x6b206574
+
+#define key0 r5
+#define key1 r6
+#define key2 r7
+#define key3 r8
+#define key4 r9
+#define key5 r10
+#define key6 r11
+#define key7 r12
+
+#define counter0 r14
+#define counter1 r15
+
+#define state0 r16
+#define state1 r17
+#define state2 r18
+#define state3 r19
+#define state4 r20
+#define state5 r21
+#define state6 r22
+#define state7 r23
+#define state8 r24
+#define state9 r25
+#define state10 r26
+#define state11 r27
+#define state12 r28
+#define state13 r29
+#define state14 r30
+#define state15 r31
+
+.macro quarterround4 a1 b1 c1 d1 a2 b2 c2 d2 a3 b3 c3 d3 a4 b4 c4 d4
+ add \a1, \a1, \b1
+ add \a2, \a2, \b2
+ add \a3, \a3, \b3
+ add \a4, \a4, \b4
+ xor \d1, \d1, \a1
+ xor \d2, \d2, \a2
+ xor \d3, \d3, \a3
+ xor \d4, \d4, \a4
+ rotlwi \d1, \d1, 16
+ rotlwi \d2, \d2, 16
+ rotlwi \d3, \d3, 16
+ rotlwi \d4, \d4, 16
+ add \c1, \c1, \d1
+ add \c2, \c2, \d2
+ add \c3, \c3, \d3
+ add \c4, \c4, \d4
+ xor \b1, \b1, \c1
+ xor \b2, \b2, \c2
+ xor \b3, \b3, \c3
+ xor \b4, \b4, \c4
+ rotlwi \b1, \b1, 12
+ rotlwi \b2, \b2, 12
+ rotlwi \b3, \b3, 12
+ rotlwi \b4, \b4, 12
+ add \a1, \a1, \b1
+ add \a2, \a2, \b2
+ add \a3, \a3, \b3
+ add \a4, \a4, \b4
+ xor \d1, \d1, \a1
+ xor \d2, \d2, \a2
+ xor \d3, \d3, \a3
+ xor \d4, \d4, \a4
+ rotlwi \d1, \d1, 8
+ rotlwi \d2, \d2, 8
+ rotlwi \d3, \d3, 8
+ rotlwi \d4, \d4, 8
+ add \c1, \c1, \d1
+ add \c2, \c2, \d2
+ add \c3, \c3, \d3
+ add \c4, \c4, \d4
+ xor \b1, \b1, \c1
+ xor \b2, \b2, \c2
+ xor \b3, \b3, \c3
+ xor \b4, \b4, \c4
+ rotlwi \b1, \b1, 7
+ rotlwi \b2, \b2, 7
+ rotlwi \b3, \b3, 7
+ rotlwi \b4, \b4, 7
+.endm
+
+#define QUARTERROUND4(a1,b1,c1,d1,a2,b2,c2,d2,a3,b3,c3,d3,a4,b4,c4,d4) \
+ quarterround4 state##a1 state##b1 state##c1 state##d1 \
+ state##a2 state##b2 state##c2 state##d2 \
+ state##a3 state##b3 state##c3 state##d3 \
+ state##a4 state##b4 state##c4 state##d4
+
+/*
+ * Very basic 32 bits implementation of ChaCha20. Produces a given positive number
+ * of blocks of output with a nonce of 0, taking an input key and 8-byte
+ * counter. Importantly does not spill to the stack. Its arguments are:
+ *
+ * r3: output bytes
+ * r4: 32-byte key input
+ * r5: 8-byte counter input/output (saved on stack)
+ * r6: number of 64-byte blocks to write to output
+ *
+ * r0: counter of blocks (initialised with r6)
+ * r4: Value '4' after key has been read.
+ * r5-r12: key
+ * r14-r15: counter
+ * r16-r31: state
+ */
+SYM_FUNC_START(__arch_chacha20_blocks_nostack)
+#ifdef __powerpc64__
+ std counter, -216(r1)
+
+ std r14, -144(r1)
+ std r15, -136(r1)
+ std r16, -128(r1)
+ std r17, -120(r1)
+ std r18, -112(r1)
+ std r19, -104(r1)
+ std r20, -96(r1)
+ std r21, -88(r1)
+ std r22, -80(r1)
+ std r23, -72(r1)
+ std r24, -64(r1)
+ std r25, -56(r1)
+ std r26, -48(r1)
+ std r27, -40(r1)
+ std r28, -32(r1)
+ std r29, -24(r1)
+ std r30, -16(r1)
+ std r31, -8(r1)
+#else
+ stwu r1, -96(r1)
+ stw counter, 20(r1)
+#ifdef __BIG_ENDIAN__
+ stmw r14, 24(r1)
+#else
+ stw r14, 24(r1)
+ stw r15, 28(r1)
+ stw r16, 32(r1)
+ stw r17, 36(r1)
+ stw r18, 40(r1)
+ stw r19, 44(r1)
+ stw r20, 48(r1)
+ stw r21, 52(r1)
+ stw r22, 56(r1)
+ stw r23, 60(r1)
+ stw r24, 64(r1)
+ stw r25, 68(r1)
+ stw r26, 72(r1)
+ stw r27, 76(r1)
+ stw r28, 80(r1)
+ stw r29, 84(r1)
+ stw r30, 88(r1)
+ stw r31, 92(r1)
+#endif
+#endif /* __powerpc64__ */
+
+ lwz counter0, 0(counter)
+ lwz counter1, 4(counter)
+#ifdef __powerpc64__
+ rldimi counter0, counter1, 32, 0
+#endif
+ mr idx_r0, nblocks
+ subi dst_bytes, dst_bytes, 4
+
+ lwz key0, 0(key)
+ lwz key1, 4(key)
+ lwz key2, 8(key)
+ lwz key3, 12(key)
+ lwz key4, 16(key)
+ lwz key5, 20(key)
+ lwz key6, 24(key)
+ lwz key7, 28(key)
+
+ li val4, 4
+.Lblock:
+ li r31, 10
+
+ lis state0, const0@ha
+ lis state1, const1@ha
+ lis state2, const2@ha
+ lis state3, const3@ha
+ addi state0, state0, const0@l
+ addi state1, state1, const1@l
+ addi state2, state2, const2@l
+ addi state3, state3, const3@l
+
+ mtctr r31
+
+ mr state4, key0
+ mr state5, key1
+ mr state6, key2
+ mr state7, key3
+ mr state8, key4
+ mr state9, key5
+ mr state10, key6
+ mr state11, key7
+
+ mr state12, counter0
+ mr state13, counter1
+
+ li state14, 0
+ li state15, 0
+
+.Lpermute:
+ QUARTERROUND4( 0, 4, 8,12, 1, 5, 9,13, 2, 6,10,14, 3, 7,11,15)
+ QUARTERROUND4( 0, 5,10,15, 1, 6,11,12, 2, 7, 8,13, 3, 4, 9,14)
+
+ bdnz .Lpermute
+
+ addis state0, state0, const0@ha
+ addis state1, state1, const1@ha
+ addis state2, state2, const2@ha
+ addis state3, state3, const3@ha
+ addi state0, state0, const0@l
+ addi state1, state1, const1@l
+ addi state2, state2, const2@l
+ addi state3, state3, const3@l
+
+ add state4, state4, key0
+ add state5, state5, key1
+ add state6, state6, key2
+ add state7, state7, key3
+ add state8, state8, key4
+ add state9, state9, key5
+ add state10, state10, key6
+ add state11, state11, key7
+
+ add state12, state12, counter0
+ add state13, state13, counter1
+
+#ifdef __BIG_ENDIAN__
+ stwbrx state0, val4, dst_bytes
+ addi dst_bytes, dst_bytes, 8
+ stwbrx state1, 0, dst_bytes
+ stwbrx state2, val4, dst_bytes
+ addi dst_bytes, dst_bytes, 8
+ stwbrx state3, 0, dst_bytes
+ stwbrx state4, val4, dst_bytes
+ addi dst_bytes, dst_bytes, 8
+ stwbrx state5, 0, dst_bytes
+ stwbrx state6, val4, dst_bytes
+ addi dst_bytes, dst_bytes, 8
+ stwbrx state7, 0, dst_bytes
+ stwbrx state8, val4, dst_bytes
+ addi dst_bytes, dst_bytes, 8
+ stwbrx state9, 0, dst_bytes
+ stwbrx state10, val4, dst_bytes
+ addi dst_bytes, dst_bytes, 8
+ stwbrx state11, 0, dst_bytes
+ stwbrx state12, val4, dst_bytes
+ addi dst_bytes, dst_bytes, 8
+ stwbrx state13, 0, dst_bytes
+ stwbrx state14, val4, dst_bytes
+ addi dst_bytes, dst_bytes, 8
+ stwbrx state15, 0, dst_bytes
+#else
+ stw state0, 4(dst_bytes)
+ stw state1, 8(dst_bytes)
+ stw state2, 12(dst_bytes)
+ stw state3, 16(dst_bytes)
+ stw state4, 20(dst_bytes)
+ stw state5, 24(dst_bytes)
+ stw state6, 28(dst_bytes)
+ stw state7, 32(dst_bytes)
+ stw state8, 36(dst_bytes)
+ stw state9, 40(dst_bytes)
+ stw state10, 44(dst_bytes)
+ stw state11, 48(dst_bytes)
+ stw state12, 52(dst_bytes)
+ stw state13, 56(dst_bytes)
+ stw state14, 60(dst_bytes)
+ stwu state15, 64(dst_bytes)
+#endif
+
+ subic. idx_r0, idx_r0, 1 /* subi. can't use r0 as source */
+
+#ifdef __powerpc64__
+ addi counter0, counter0, 1
+ srdi counter1, counter0, 32
+#else
+ addic counter0, counter0, 1
+ addze counter1, counter1
+#endif
+
+ bne .Lblock
+
+#ifdef __powerpc64__
+ ld counter, -216(r1)
+#else
+ lwz counter, 20(r1)
+#endif
+ stw counter0, 0(counter)
+ stw counter1, 4(counter)
+
+ li r6, 0
+ li r7, 0
+ li r8, 0
+ li r9, 0
+ li r10, 0
+ li r11, 0
+ li r12, 0
+
+#ifdef __powerpc64__
+ ld r14, -144(r1)
+ ld r15, -136(r1)
+ ld r16, -128(r1)
+ ld r17, -120(r1)
+ ld r18, -112(r1)
+ ld r19, -104(r1)
+ ld r20, -96(r1)
+ ld r21, -88(r1)
+ ld r22, -80(r1)
+ ld r23, -72(r1)
+ ld r24, -64(r1)
+ ld r25, -56(r1)
+ ld r26, -48(r1)
+ ld r27, -40(r1)
+ ld r28, -32(r1)
+ ld r29, -24(r1)
+ ld r30, -16(r1)
+ ld r31, -8(r1)
+#else
+#ifdef __BIG_ENDIAN__
+ lmw r14, 24(r1)
+#else
+ lwz r14, 24(r1)
+ lwz r15, 28(r1)
+ lwz r16, 32(r1)
+ lwz r17, 36(r1)
+ lwz r18, 40(r1)
+ lwz r19, 44(r1)
+ lwz r20, 48(r1)
+ lwz r21, 52(r1)
+ lwz r22, 56(r1)
+ lwz r23, 60(r1)
+ lwz r24, 64(r1)
+ lwz r25, 68(r1)
+ lwz r26, 72(r1)
+ lwz r27, 76(r1)
+ lwz r28, 80(r1)
+ lwz r29, 84(r1)
+ lwz r30, 88(r1)
+ lwz r31, 92(r1)
+#endif
+ addi r1, r1, 96
+#endif /* __powerpc64__ */
+ blr
+SYM_FUNC_END(__arch_chacha20_blocks_nostack)
diff --git a/arch/powerpc/kernel/vdso/vgetrandom.c b/arch/powerpc/kernel/vdso/vgetrandom.c
new file mode 100644
index 000000000000..cc79b960a541
--- /dev/null
+++ b/arch/powerpc/kernel/vdso/vgetrandom.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Powerpc userspace implementation of getrandom()
+ *
+ * Copyright (C) 2024 Christophe Leroy <christophe.leroy@csgroup.eu>, CS GROUP France
+ */
+#include <linux/time.h>
+#include <linux/types.h>
+
+ssize_t __c_kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state,
+ size_t opaque_len)
+{
+ return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len);
+}
diff --git a/arch/powerpc/kernel/vdso/vgettimeofday.c b/arch/powerpc/kernel/vdso/vgettimeofday.c
new file mode 100644
index 000000000000..6f5167d81af5
--- /dev/null
+++ b/arch/powerpc/kernel/vdso/vgettimeofday.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Powerpc userspace implementations of gettimeofday() and similar.
+ */
+#include <linux/time.h>
+#include <linux/types.h>
+
+#ifdef __powerpc64__
+int __c_kernel_clock_gettime(clockid_t clock, struct __kernel_timespec *ts,
+ const struct vdso_time_data *vd)
+{
+ return __cvdso_clock_gettime_data(vd, clock, ts);
+}
+
+int __c_kernel_clock_getres(clockid_t clock_id, struct __kernel_timespec *res,
+ const struct vdso_time_data *vd)
+{
+ return __cvdso_clock_getres_data(vd, clock_id, res);
+}
+#else
+int __c_kernel_clock_gettime(clockid_t clock, struct old_timespec32 *ts,
+ const struct vdso_time_data *vd)
+{
+ return __cvdso_clock_gettime32_data(vd, clock, ts);
+}
+
+int __c_kernel_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts,
+ const struct vdso_time_data *vd)
+{
+ return __cvdso_clock_gettime_data(vd, clock, ts);
+}
+
+int __c_kernel_clock_getres(clockid_t clock_id, struct old_timespec32 *res,
+ const struct vdso_time_data *vd)
+{
+ return __cvdso_clock_getres_time32_data(vd, clock_id, res);
+}
+#endif
+
+int __c_kernel_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz,
+ const struct vdso_time_data *vd)
+{
+ return __cvdso_gettimeofday_data(vd, tv, tz);
+}
+
+__kernel_old_time_t __c_kernel_time(__kernel_old_time_t *time, const struct vdso_time_data *vd)
+{
+ return __cvdso_time_data(vd, time);
+}
diff --git a/arch/powerpc/kernel/vdso32/.gitignore b/arch/powerpc/kernel/vdso32/.gitignore
deleted file mode 100644
index fea5809857a5..000000000000
--- a/arch/powerpc/kernel/vdso32/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-vdso32.lds
-vdso32.so.dbg
diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile
deleted file mode 100644
index 53e6c9b979ec..000000000000
--- a/arch/powerpc/kernel/vdso32/Makefile
+++ /dev/null
@@ -1,58 +0,0 @@
-
-# List of files in the vdso, has to be asm only for now
-
-obj-vdso32-$(CONFIG_PPC64) = getcpu.o
-obj-vdso32 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o \
- $(obj-vdso32-y)
-
-# Build rules
-
-ifeq ($(CONFIG_PPC32),y)
-CROSS32CC := $(CC)
-endif
-
-targets := $(obj-vdso32) vdso32.so vdso32.so.dbg
-obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32))
-
-GCOV_PROFILE := n
-
-ccflags-y := -shared -fno-common -fno-builtin
-ccflags-y += -nostdlib -Wl,-soname=linux-vdso32.so.1 \
- $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
-asflags-y := -D__VDSO32__ -s
-
-obj-y += vdso32_wrapper.o
-extra-y += vdso32.lds
-CPPFLAGS_vdso32.lds += -P -C -Upowerpc
-
-# Force dependency (incbin is bad)
-$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
-
-# link rule for the .so file, .lds has to be first
-$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32)
- $(call if_changed,vdso32ld)
-
-# strip rule for the .so file
-$(obj)/%.so: OBJCOPYFLAGS := -S
-$(obj)/%.so: $(obj)/%.so.dbg FORCE
- $(call if_changed,objcopy)
-
-# assembly rules for the .S files
-$(obj-vdso32): %.o: %.S
- $(call if_changed_dep,vdso32as)
-
-# actual build commands
-quiet_cmd_vdso32ld = VDSO32L $@
- cmd_vdso32ld = $(CROSS32CC) $(c_flags) -Wl,-T $^ -o $@
-quiet_cmd_vdso32as = VDSO32A $@
- cmd_vdso32as = $(CROSS32CC) $(a_flags) -c -o $@ $<
-
-# install commands for the unstripped file
-quiet_cmd_vdso_install = INSTALL $@
- cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
-
-vdso32.so: $(obj)/vdso32.so.dbg
- @mkdir -p $(MODLIB)/vdso
- $(call cmd,vdso_install)
-
-vdso_install: vdso32.so
diff --git a/arch/powerpc/kernel/vdso32/getcpu.S b/arch/powerpc/kernel/vdso32/getcpu.S
deleted file mode 100644
index 23eb9a9441bd..000000000000
--- a/arch/powerpc/kernel/vdso32/getcpu.S
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright (C) IBM Corporation, 2012
- *
- * Author: Anton Blanchard <anton@au.ibm.com>
- */
-#include <asm/ppc_asm.h>
-#include <asm/vdso.h>
-
- .text
-/*
- * Exact prototype of getcpu
- *
- * int __kernel_getcpu(unsigned *cpu, unsigned *node);
- *
- */
-V_FUNCTION_BEGIN(__kernel_getcpu)
- .cfi_startproc
- mfspr r5,SPRN_SPRG_VDSO_READ
- cmpdi cr0,r3,0
- cmpdi cr1,r4,0
- clrlwi r6,r5,16
- rlwinm r7,r5,16,31-15,31-0
- beq cr0,1f
- stw r6,0(r3)
-1: beq cr1,2f
- stw r7,0(r4)
-2: crclr cr0*4+so
- li r3,0 /* always success */
- blr
- .cfi_endproc
-V_FUNCTION_END(__kernel_getcpu)
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
deleted file mode 100644
index 6b2b69616e77..000000000000
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ /dev/null
@@ -1,298 +0,0 @@
-/*
- * Userland implementation of gettimeofday() for 32 bits processes in a
- * ppc64 kernel for use in the vDSO
- *
- * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org,
- * IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <asm/processor.h>
-#include <asm/ppc_asm.h>
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-
-/* Offset for the low 32-bit part of a field of long type */
-#ifdef CONFIG_PPC64
-#define LOPART 4
-#define TSPEC_TV_SEC TSPC64_TV_SEC+LOPART
-#else
-#define LOPART 0
-#define TSPEC_TV_SEC TSPC32_TV_SEC
-#endif
-
- .text
-/*
- * Exact prototype of gettimeofday
- *
- * int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz);
- *
- */
-V_FUNCTION_BEGIN(__kernel_gettimeofday)
- .cfi_startproc
- mflr r12
- .cfi_register lr,r12
-
- mr r10,r3 /* r10 saves tv */
- mr r11,r4 /* r11 saves tz */
- bl __get_datapage@local /* get data page */
- mr r9, r3 /* datapage ptr in r9 */
- cmplwi r10,0 /* check if tv is NULL */
- beq 3f
- lis r7,1000000@ha /* load up USEC_PER_SEC */
- addi r7,r7,1000000@l /* so we get microseconds in r4 */
- bl __do_get_tspec@local /* get sec/usec from tb & kernel */
- stw r3,TVAL32_TV_SEC(r10)
- stw r4,TVAL32_TV_USEC(r10)
-
-3: cmplwi r11,0 /* check if tz is NULL */
- beq 1f
- lwz r4,CFG_TZ_MINUTEWEST(r9)/* fill tz */
- lwz r5,CFG_TZ_DSTTIME(r9)
- stw r4,TZONE_TZ_MINWEST(r11)
- stw r5,TZONE_TZ_DSTTIME(r11)
-
-1: mtlr r12
- crclr cr0*4+so
- li r3,0
- blr
- .cfi_endproc
-V_FUNCTION_END(__kernel_gettimeofday)
-
-/*
- * Exact prototype of clock_gettime()
- *
- * int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp);
- *
- */
-V_FUNCTION_BEGIN(__kernel_clock_gettime)
- .cfi_startproc
- /* Check for supported clock IDs */
- cmpli cr0,r3,CLOCK_REALTIME
- cmpli cr1,r3,CLOCK_MONOTONIC
- cror cr0*4+eq,cr0*4+eq,cr1*4+eq
- bne cr0,99f
-
- mflr r12 /* r12 saves lr */
- .cfi_register lr,r12
- mr r11,r4 /* r11 saves tp */
- bl __get_datapage@local /* get data page */
- mr r9,r3 /* datapage ptr in r9 */
- lis r7,NSEC_PER_SEC@h /* want nanoseconds */
- ori r7,r7,NSEC_PER_SEC@l
-50: bl __do_get_tspec@local /* get sec/nsec from tb & kernel */
- bne cr1,80f /* not monotonic -> all done */
-
- /*
- * CLOCK_MONOTONIC
- */
-
- /* now we must fixup using wall to monotonic. We need to snapshot
- * that value and do the counter trick again. Fortunately, we still
- * have the counter value in r8 that was returned by __do_get_xsec.
- * At this point, r3,r4 contain our sec/nsec values, r5 and r6
- * can be used, r7 contains NSEC_PER_SEC.
- */
-
- lwz r5,WTOM_CLOCK_SEC(r9)
- lwz r6,WTOM_CLOCK_NSEC(r9)
-
- /* We now have our offset in r5,r6. We create a fake dependency
- * on that value and re-check the counter
- */
- or r0,r6,r5
- xor r0,r0,r0
- add r9,r9,r0
- lwz r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
- cmpl cr0,r8,r0 /* check if updated */
- bne- 50b
-
- /* Calculate and store result. Note that this mimics the C code,
- * which may cause funny results if nsec goes negative... is that
- * possible at all ?
- */
- add r3,r3,r5
- add r4,r4,r6
- cmpw cr0,r4,r7
- cmpwi cr1,r4,0
- blt 1f
- subf r4,r7,r4
- addi r3,r3,1
-1: bge cr1,80f
- addi r3,r3,-1
- add r4,r4,r7
-
-80: stw r3,TSPC32_TV_SEC(r11)
- stw r4,TSPC32_TV_NSEC(r11)
-
- mtlr r12
- crclr cr0*4+so
- li r3,0
- blr
-
- /*
- * syscall fallback
- */
-99:
- li r0,__NR_clock_gettime
- sc
- blr
- .cfi_endproc
-V_FUNCTION_END(__kernel_clock_gettime)
-
-
-/*
- * Exact prototype of clock_getres()
- *
- * int __kernel_clock_getres(clockid_t clock_id, struct timespec *res);
- *
- */
-V_FUNCTION_BEGIN(__kernel_clock_getres)
- .cfi_startproc
- /* Check for supported clock IDs */
- cmpwi cr0,r3,CLOCK_REALTIME
- cmpwi cr1,r3,CLOCK_MONOTONIC
- cror cr0*4+eq,cr0*4+eq,cr1*4+eq
- bne cr0,99f
-
- li r3,0
- cmpli cr0,r4,0
- crclr cr0*4+so
- beqlr
- lis r5,CLOCK_REALTIME_RES@h
- ori r5,r5,CLOCK_REALTIME_RES@l
- stw r3,TSPC32_TV_SEC(r4)
- stw r5,TSPC32_TV_NSEC(r4)
- blr
-
- /*
- * syscall fallback
- */
-99:
- li r0,__NR_clock_getres
- sc
- blr
- .cfi_endproc
-V_FUNCTION_END(__kernel_clock_getres)
-
-
-/*
- * Exact prototype of time()
- *
- * time_t time(time *t);
- *
- */
-V_FUNCTION_BEGIN(__kernel_time)
- .cfi_startproc
- mflr r12
- .cfi_register lr,r12
-
- mr r11,r3 /* r11 holds t */
- bl __get_datapage@local
- mr r9, r3 /* datapage ptr in r9 */
-
- lwz r3,STAMP_XTIME+TSPEC_TV_SEC(r9)
-
- cmplwi r11,0 /* check if t is NULL */
- beq 2f
- stw r3,0(r11) /* store result at *t */
-2: mtlr r12
- crclr cr0*4+so
- blr
- .cfi_endproc
-V_FUNCTION_END(__kernel_time)
-
-/*
- * This is the core of clock_gettime() and gettimeofday(),
- * it returns the current time in r3 (seconds) and r4.
- * On entry, r7 gives the resolution of r4, either USEC_PER_SEC
- * or NSEC_PER_SEC, giving r4 in microseconds or nanoseconds.
- * It expects the datapage ptr in r9 and doesn't clobber it.
- * It clobbers r0, r5 and r6.
- * On return, r8 contains the counter value that can be reused.
- * This clobbers cr0 but not any other cr field.
- */
-__do_get_tspec:
- .cfi_startproc
- /* Check for update count & load values. We use the low
- * order 32 bits of the update count
- */
-1: lwz r8,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
- andi. r0,r8,1 /* pending update ? loop */
- bne- 1b
- xor r0,r8,r8 /* create dependency */
- add r9,r9,r0
-
- /* Load orig stamp (offset to TB) */
- lwz r5,CFG_TB_ORIG_STAMP(r9)
- lwz r6,(CFG_TB_ORIG_STAMP+4)(r9)
-
- /* Get a stable TB value */
-#ifdef CONFIG_8xx
-2: mftbu r3
- mftbl r4
- mftbu r0
-#else
-2: mfspr r3, SPRN_TBRU
- mfspr r4, SPRN_TBRL
- mfspr r0, SPRN_TBRU
-#endif
- cmplw cr0,r3,r0
- bne- 2b
-
- /* Subtract tb orig stamp and shift left 12 bits.
- */
- subfc r4,r6,r4
- subfe r0,r5,r3
- slwi r0,r0,12
- rlwimi. r0,r4,12,20,31
- slwi r4,r4,12
-
- /*
- * Load scale factor & do multiplication.
- * We only use the high 32 bits of the tb_to_xs value.
- * Even with a 1GHz timebase clock, the high 32 bits of
- * tb_to_xs will be at least 4 million, so the error from
- * ignoring the low 32 bits will be no more than 0.25ppm.
- * The error will just make the clock run very very slightly
- * slow until the next time the kernel updates the VDSO data,
- * at which point the clock will catch up to the kernel's value,
- * so there is no long-term error accumulation.
- */
- lwz r5,CFG_TB_TO_XS(r9) /* load values */
- mulhwu r4,r4,r5
- li r3,0
-
- beq+ 4f /* skip high part computation if 0 */
- mulhwu r3,r0,r5
- mullw r5,r0,r5
- addc r4,r4,r5
- addze r3,r3
-4:
- /* At this point, we have seconds since the xtime stamp
- * as a 32.32 fixed-point number in r3 and r4.
- * Load & add the xtime stamp.
- */
- lwz r5,STAMP_XTIME+TSPEC_TV_SEC(r9)
- lwz r6,STAMP_SEC_FRAC(r9)
- addc r4,r4,r6
- adde r3,r3,r5
-
- /* We create a fake dependency on the result in r3/r4
- * and re-check the counter
- */
- or r6,r4,r3
- xor r0,r6,r6
- add r9,r9,r0
- lwz r0,(CFG_TB_UPDATE_COUNT+LOPART)(r9)
- cmplw cr0,r8,r0 /* check if updated */
- bne- 1b
-
- mulhwu r4,r4,r7 /* convert to micro or nanoseconds */
-
- blr
- .cfi_endproc
diff --git a/arch/powerpc/kernel/vdso32/vdso32_wrapper.S b/arch/powerpc/kernel/vdso32_wrapper.S
index 6ac107ac402a..20bca3548b44 100644
--- a/arch/powerpc/kernel/vdso32/vdso32_wrapper.S
+++ b/arch/powerpc/kernel/vdso32_wrapper.S
@@ -1,12 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/linkage.h>
#include <asm/page.h>
- __PAGE_ALIGNED_DATA
+ .section ".data..ro_after_init", "aw"
.globl vdso32_start, vdso32_end
.balign PAGE_SIZE
vdso32_start:
- .incbin "arch/powerpc/kernel/vdso32/vdso32.so.dbg"
+ .incbin "arch/powerpc/kernel/vdso/vdso32.so.dbg"
.balign PAGE_SIZE
vdso32_end:
diff --git a/arch/powerpc/kernel/vdso64/.gitignore b/arch/powerpc/kernel/vdso64/.gitignore
deleted file mode 100644
index 77a0b423642c..000000000000
--- a/arch/powerpc/kernel/vdso64/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-vdso64.lds
-vdso64.so.dbg
diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile
deleted file mode 100644
index effca9404b17..000000000000
--- a/arch/powerpc/kernel/vdso64/Makefile
+++ /dev/null
@@ -1,51 +0,0 @@
-# List of files in the vdso, has to be asm only for now
-
-obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o
-
-# Build rules
-
-targets := $(obj-vdso64) vdso64.so vdso64.so.dbg
-obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64))
-
-GCOV_PROFILE := n
-
-ccflags-y := -shared -fno-common -fno-builtin
-ccflags-y += -nostdlib -Wl,-soname=linux-vdso64.so.1 \
- $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
-asflags-y := -D__VDSO64__ -s
-
-obj-y += vdso64_wrapper.o
-extra-y += vdso64.lds
-CPPFLAGS_vdso64.lds += -P -C -U$(ARCH)
-
-# Force dependency (incbin is bad)
-$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so
-
-# link rule for the .so file, .lds has to be first
-$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64)
- $(call if_changed,vdso64ld)
-
-# strip rule for the .so file
-$(obj)/%.so: OBJCOPYFLAGS := -S
-$(obj)/%.so: $(obj)/%.so.dbg FORCE
- $(call if_changed,objcopy)
-
-# assembly rules for the .S files
-$(obj-vdso64): %.o: %.S
- $(call if_changed_dep,vdso64as)
-
-# actual build commands
-quiet_cmd_vdso64ld = VDSO64L $@
- cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@
-quiet_cmd_vdso64as = VDSO64A $@
- cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $<
-
-# install commands for the unstripped file
-quiet_cmd_vdso_install = INSTALL $@
- cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
-
-vdso64.so: $(obj)/vdso64.so.dbg
- @mkdir -p $(MODLIB)/vdso
- $(call cmd,vdso_install)
-
-vdso_install: vdso64.so
diff --git a/arch/powerpc/kernel/vdso64/cacheflush.S b/arch/powerpc/kernel/vdso64/cacheflush.S
deleted file mode 100644
index 69c5af2b3c96..000000000000
--- a/arch/powerpc/kernel/vdso64/cacheflush.S
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * vDSO provided cache flush routines
- *
- * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org),
- * IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <asm/processor.h>
-#include <asm/ppc_asm.h>
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-
- .text
-
-/*
- * Default "generic" version of __kernel_sync_dicache.
- *
- * void __kernel_sync_dicache(unsigned long start, unsigned long end)
- *
- * Flushes the data cache & invalidate the instruction cache for the
- * provided range [start, end[
- */
-V_FUNCTION_BEGIN(__kernel_sync_dicache)
- .cfi_startproc
- mflr r12
- .cfi_register lr,r12
- mr r11,r3
- bl V_LOCAL_FUNC(__get_datapage)
- mtlr r12
- mr r10,r3
-
- lwz r7,CFG_DCACHE_BLOCKSZ(r10)
- addi r5,r7,-1
- andc r6,r11,r5 /* round low to line bdy */
- subf r8,r6,r4 /* compute length */
- add r8,r8,r5 /* ensure we get enough */
- lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10)
- srw. r8,r8,r9 /* compute line count */
- crclr cr0*4+so
- beqlr /* nothing to do? */
- mtctr r8
-1: dcbst 0,r6
- add r6,r6,r7
- bdnz 1b
- sync
-
-/* Now invalidate the instruction cache */
-
- lwz r7,CFG_ICACHE_BLOCKSZ(r10)
- addi r5,r7,-1
- andc r6,r11,r5 /* round low to line bdy */
- subf r8,r6,r4 /* compute length */
- add r8,r8,r5
- lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10)
- srw. r8,r8,r9 /* compute line count */
- crclr cr0*4+so
- beqlr /* nothing to do? */
- mtctr r8
-2: icbi 0,r6
- add r6,r6,r7
- bdnz 2b
- isync
- li r3,0
- blr
- .cfi_endproc
-V_FUNCTION_END(__kernel_sync_dicache)
-
-
-/*
- * POWER5 version of __kernel_sync_dicache
- */
-V_FUNCTION_BEGIN(__kernel_sync_dicache_p5)
- .cfi_startproc
- crclr cr0*4+so
- sync
- isync
- li r3,0
- blr
- .cfi_endproc
-V_FUNCTION_END(__kernel_sync_dicache_p5)
diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S
deleted file mode 100644
index 79796de11737..000000000000
--- a/arch/powerpc/kernel/vdso64/datapage.S
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Access to the shared data page by the vDSO & syscall map
- *
- * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <asm/processor.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-#include <asm/vdso.h>
-
- .text
-V_FUNCTION_BEGIN(__get_datapage)
- .cfi_startproc
- /* We don't want that exposed or overridable as we want other objects
- * to be able to bl directly to here
- */
- .protected __get_datapage
- .hidden __get_datapage
-
- mflr r0
- .cfi_register lr,r0
-
- bcl 20,31,1f
- .global __kernel_datapage_offset;
-__kernel_datapage_offset:
- .long 0
-1:
- mflr r3
- mtlr r0
- lwz r0,0(r3)
- add r3,r0,r3
- blr
- .cfi_endproc
-V_FUNCTION_END(__get_datapage)
-
-/*
- * void *__kernel_get_syscall_map(unsigned int *syscall_count) ;
- *
- * returns a pointer to the syscall map. the map is agnostic to the
- * size of "long", unlike kernel bitops, it stores bits from top to
- * bottom so that memory actually contains a linear bitmap
- * check for syscall N by testing bit (0x80000000 >> (N & 0x1f)) of
- * 32 bits int at N >> 5.
- */
-V_FUNCTION_BEGIN(__kernel_get_syscall_map)
- .cfi_startproc
- mflr r12
- .cfi_register lr,r12
- mr r4,r3
- bl V_LOCAL_FUNC(__get_datapage)
- mtlr r12
- addi r3,r3,CFG_SYSCALL_MAP64
- cmpli cr0,r4,0
- crclr cr0*4+so
- beqlr
- li r0,__NR_syscalls
- stw r0,0(r4)
- blr
- .cfi_endproc
-V_FUNCTION_END(__kernel_get_syscall_map)
-
-
-/*
- * void unsigned long __kernel_get_tbfreq(void);
- *
- * returns the timebase frequency in HZ
- */
-V_FUNCTION_BEGIN(__kernel_get_tbfreq)
- .cfi_startproc
- mflr r12
- .cfi_register lr,r12
- bl V_LOCAL_FUNC(__get_datapage)
- ld r3,CFG_TB_TICKS_PER_SEC(r3)
- mtlr r12
- crclr cr0*4+so
- blr
- .cfi_endproc
-V_FUNCTION_END(__kernel_get_tbfreq)
diff --git a/arch/powerpc/kernel/vdso64/getcpu.S b/arch/powerpc/kernel/vdso64/getcpu.S
deleted file mode 100644
index 23eb9a9441bd..000000000000
--- a/arch/powerpc/kernel/vdso64/getcpu.S
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright (C) IBM Corporation, 2012
- *
- * Author: Anton Blanchard <anton@au.ibm.com>
- */
-#include <asm/ppc_asm.h>
-#include <asm/vdso.h>
-
- .text
-/*
- * Exact prototype of getcpu
- *
- * int __kernel_getcpu(unsigned *cpu, unsigned *node);
- *
- */
-V_FUNCTION_BEGIN(__kernel_getcpu)
- .cfi_startproc
- mfspr r5,SPRN_SPRG_VDSO_READ
- cmpdi cr0,r3,0
- cmpdi cr1,r4,0
- clrlwi r6,r5,16
- rlwinm r7,r5,16,31-15,31-0
- beq cr0,1f
- stw r6,0(r3)
-1: beq cr1,2f
- stw r7,0(r4)
-2: crclr cr0*4+so
- li r3,0 /* always success */
- blr
- .cfi_endproc
-V_FUNCTION_END(__kernel_getcpu)
diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S
deleted file mode 100644
index a76b4af37ef2..000000000000
--- a/arch/powerpc/kernel/vdso64/gettimeofday.S
+++ /dev/null
@@ -1,244 +0,0 @@
-/*
- * Userland implementation of gettimeofday() for 64 bits processes in a
- * ppc64 kernel for use in the vDSO
- *
- * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org),
- * IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <asm/processor.h>
-#include <asm/ppc_asm.h>
-#include <asm/vdso.h>
-#include <asm/asm-offsets.h>
-#include <asm/unistd.h>
-
- .text
-/*
- * Exact prototype of gettimeofday
- *
- * int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz);
- *
- */
-V_FUNCTION_BEGIN(__kernel_gettimeofday)
- .cfi_startproc
- mflr r12
- .cfi_register lr,r12
-
- mr r11,r3 /* r11 holds tv */
- mr r10,r4 /* r10 holds tz */
- bl V_LOCAL_FUNC(__get_datapage) /* get data page */
- cmpldi r11,0 /* check if tv is NULL */
- beq 2f
- lis r7,1000000@ha /* load up USEC_PER_SEC */
- addi r7,r7,1000000@l
- bl V_LOCAL_FUNC(__do_get_tspec) /* get sec/us from tb & kernel */
- std r4,TVAL64_TV_SEC(r11) /* store sec in tv */
- std r5,TVAL64_TV_USEC(r11) /* store usec in tv */
-2: cmpldi r10,0 /* check if tz is NULL */
- beq 1f
- lwz r4,CFG_TZ_MINUTEWEST(r3)/* fill tz */
- lwz r5,CFG_TZ_DSTTIME(r3)
- stw r4,TZONE_TZ_MINWEST(r10)
- stw r5,TZONE_TZ_DSTTIME(r10)
-1: mtlr r12
- crclr cr0*4+so
- li r3,0 /* always success */
- blr
- .cfi_endproc
-V_FUNCTION_END(__kernel_gettimeofday)
-
-
-/*
- * Exact prototype of clock_gettime()
- *
- * int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp);
- *
- */
-V_FUNCTION_BEGIN(__kernel_clock_gettime)
- .cfi_startproc
- /* Check for supported clock IDs */
- cmpwi cr0,r3,CLOCK_REALTIME
- cmpwi cr1,r3,CLOCK_MONOTONIC
- cror cr0*4+eq,cr0*4+eq,cr1*4+eq
- bne cr0,99f
-
- mflr r12 /* r12 saves lr */
- .cfi_register lr,r12
- mr r11,r4 /* r11 saves tp */
- bl V_LOCAL_FUNC(__get_datapage) /* get data page */
- lis r7,NSEC_PER_SEC@h /* want nanoseconds */
- ori r7,r7,NSEC_PER_SEC@l
-50: bl V_LOCAL_FUNC(__do_get_tspec) /* get time from tb & kernel */
- bne cr1,80f /* if not monotonic, all done */
-
- /*
- * CLOCK_MONOTONIC
- */
-
- /* now we must fixup using wall to monotonic. We need to snapshot
- * that value and do the counter trick again. Fortunately, we still
- * have the counter value in r8 that was returned by __do_get_tspec.
- * At this point, r4,r5 contain our sec/nsec values.
- */
-
- lwa r6,WTOM_CLOCK_SEC(r3)
- lwa r9,WTOM_CLOCK_NSEC(r3)
-
- /* We now have our result in r6,r9. We create a fake dependency
- * on that result and re-check the counter
- */
- or r0,r6,r9
- xor r0,r0,r0
- add r3,r3,r0
- ld r0,CFG_TB_UPDATE_COUNT(r3)
- cmpld cr0,r0,r8 /* check if updated */
- bne- 50b
-
- /* Add wall->monotonic offset and check for overflow or underflow.
- */
- add r4,r4,r6
- add r5,r5,r9
- cmpd cr0,r5,r7
- cmpdi cr1,r5,0
- blt 1f
- subf r5,r7,r5
- addi r4,r4,1
-1: bge cr1,80f
- addi r4,r4,-1
- add r5,r5,r7
-
-80: std r4,TSPC64_TV_SEC(r11)
- std r5,TSPC64_TV_NSEC(r11)
-
- mtlr r12
- crclr cr0*4+so
- li r3,0
- blr
-
- /*
- * syscall fallback
- */
-99:
- li r0,__NR_clock_gettime
- sc
- blr
- .cfi_endproc
-V_FUNCTION_END(__kernel_clock_gettime)
-
-
-/*
- * Exact prototype of clock_getres()
- *
- * int __kernel_clock_getres(clockid_t clock_id, struct timespec *res);
- *
- */
-V_FUNCTION_BEGIN(__kernel_clock_getres)
- .cfi_startproc
- /* Check for supported clock IDs */
- cmpwi cr0,r3,CLOCK_REALTIME
- cmpwi cr1,r3,CLOCK_MONOTONIC
- cror cr0*4+eq,cr0*4+eq,cr1*4+eq
- bne cr0,99f
-
- li r3,0
- cmpli cr0,r4,0
- crclr cr0*4+so
- beqlr
- lis r5,CLOCK_REALTIME_RES@h
- ori r5,r5,CLOCK_REALTIME_RES@l
- std r3,TSPC64_TV_SEC(r4)
- std r5,TSPC64_TV_NSEC(r4)
- blr
-
- /*
- * syscall fallback
- */
-99:
- li r0,__NR_clock_getres
- sc
- blr
- .cfi_endproc
-V_FUNCTION_END(__kernel_clock_getres)
-
-/*
- * Exact prototype of time()
- *
- * time_t time(time *t);
- *
- */
-V_FUNCTION_BEGIN(__kernel_time)
- .cfi_startproc
- mflr r12
- .cfi_register lr,r12
-
- mr r11,r3 /* r11 holds t */
- bl V_LOCAL_FUNC(__get_datapage)
-
- ld r4,STAMP_XTIME+TSPC64_TV_SEC(r3)
-
- cmpldi r11,0 /* check if t is NULL */
- beq 2f
- std r4,0(r11) /* store result at *t */
-2: mtlr r12
- crclr cr0*4+so
- mr r3,r4
- blr
- .cfi_endproc
-V_FUNCTION_END(__kernel_time)
-
-
-/*
- * This is the core of clock_gettime() and gettimeofday(),
- * it returns the current time in r4 (seconds) and r5.
- * On entry, r7 gives the resolution of r5, either USEC_PER_SEC
- * or NSEC_PER_SEC, giving r5 in microseconds or nanoseconds.
- * It expects the datapage ptr in r3 and doesn't clobber it.
- * It clobbers r0, r6 and r9.
- * On return, r8 contains the counter value that can be reused.
- * This clobbers cr0 but not any other cr field.
- */
-V_FUNCTION_BEGIN(__do_get_tspec)
- .cfi_startproc
- /* check for update count & load values */
-1: ld r8,CFG_TB_UPDATE_COUNT(r3)
- andi. r0,r8,1 /* pending update ? loop */
- bne- 1b
- xor r0,r8,r8 /* create dependency */
- add r3,r3,r0
-
- /* Get TB & offset it. We use the MFTB macro which will generate
- * workaround code for Cell.
- */
- MFTB(r6)
- ld r9,CFG_TB_ORIG_STAMP(r3)
- subf r6,r9,r6
-
- /* Scale result */
- ld r5,CFG_TB_TO_XS(r3)
- sldi r6,r6,12 /* compute time since stamp_xtime */
- mulhdu r6,r6,r5 /* in units of 2^-32 seconds */
-
- /* Add stamp since epoch */
- ld r4,STAMP_XTIME+TSPC64_TV_SEC(r3)
- lwz r5,STAMP_SEC_FRAC(r3)
- or r0,r4,r5
- or r0,r0,r6
- xor r0,r0,r0
- add r3,r3,r0
- ld r0,CFG_TB_UPDATE_COUNT(r3)
- cmpld r0,r8 /* check if updated */
- bne- 1b /* reload if so */
-
- /* convert to seconds & nanoseconds and add to stamp */
- add r6,r6,r5 /* add on fractional seconds of xtime */
- mulhwu r5,r6,r7 /* compute micro or nanoseconds and */
- srdi r6,r6,32 /* seconds since stamp_xtime */
- clrldi r5,r5,32
- add r4,r4,r6
- blr
- .cfi_endproc
-V_FUNCTION_END(__do_get_tspec)
diff --git a/arch/powerpc/kernel/vdso64/note.S b/arch/powerpc/kernel/vdso64/note.S
deleted file mode 100644
index dc2a509f7e8a..000000000000
--- a/arch/powerpc/kernel/vdso64/note.S
+++ /dev/null
@@ -1 +0,0 @@
-#include "../vdso32/note.S"
diff --git a/arch/powerpc/kernel/vdso64/vdso64_wrapper.S b/arch/powerpc/kernel/vdso64_wrapper.S
index df60fca6a13d..1912936fa227 100644
--- a/arch/powerpc/kernel/vdso64/vdso64_wrapper.S
+++ b/arch/powerpc/kernel/vdso64_wrapper.S
@@ -1,12 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/linkage.h>
#include <asm/page.h>
- __PAGE_ALIGNED_DATA
+ .section ".data..ro_after_init", "aw"
.globl vdso64_start, vdso64_end
.balign PAGE_SIZE
vdso64_start:
- .incbin "arch/powerpc/kernel/vdso64/vdso64.so.dbg"
+ .incbin "arch/powerpc/kernel/vdso/vdso64.so.dbg"
.balign PAGE_SIZE
vdso64_end:
diff --git a/arch/powerpc/kernel/vecemu.c b/arch/powerpc/kernel/vecemu.c
index c4bfadb2606b..fd9432875ebc 100644
--- a/arch/powerpc/kernel/vecemu.c
+++ b/arch/powerpc/kernel/vecemu.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Routines to emulate some Altivec/VMX instructions, specifically
* those that can trap when given denormalized operands in Java mode.
@@ -7,7 +8,9 @@
#include <linux/sched.h>
#include <asm/ptrace.h>
#include <asm/processor.h>
-#include <asm/uaccess.h>
+#include <asm/switch_to.h>
+#include <linux/uaccess.h>
+#include <asm/inst.h>
/* Functions in vector.S */
extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b);
@@ -258,21 +261,24 @@ static unsigned int rfin(unsigned int x)
int emulate_altivec(struct pt_regs *regs)
{
- unsigned int instr, i;
+ ppc_inst_t instr;
+ unsigned int i, word;
unsigned int va, vb, vc, vd;
vector128 *vrs;
- if (get_user(instr, (unsigned int __user *) regs->nip))
+ if (get_user_instr(instr, (void __user *)regs->nip))
return -EFAULT;
- if ((instr >> 26) != 4)
+
+ word = ppc_inst_val(instr);
+ if (ppc_inst_primary_opcode(instr) != 4)
return -EINVAL; /* not an altivec instruction */
- vd = (instr >> 21) & 0x1f;
- va = (instr >> 16) & 0x1f;
- vb = (instr >> 11) & 0x1f;
- vc = (instr >> 6) & 0x1f;
+ vd = (word >> 21) & 0x1f;
+ va = (word >> 16) & 0x1f;
+ vb = (word >> 11) & 0x1f;
+ vc = (word >> 6) & 0x1f;
vrs = current->thread.vr_state.vr;
- switch (instr & 0x3f) {
+ switch (word & 0x3f) {
case 10:
switch (vc) {
case 0: /* vaddfp */
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 74f8050518d6..80b3f6e476b6 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -1,3 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/export.h>
+#include <linux/linkage.h>
#include <asm/processor.h>
#include <asm/ppc_asm.h>
#include <asm/reg.h>
@@ -6,45 +9,7 @@
#include <asm/thread_info.h>
#include <asm/page.h>
#include <asm/ptrace.h>
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-/* void do_load_up_transact_altivec(struct thread_struct *thread)
- *
- * This is similar to load_up_altivec but for the transactional version of the
- * vector regs. It doesn't mess with the task MSR or valid flags.
- * Furthermore, VEC laziness is not supported with TM currently.
- */
-_GLOBAL(do_load_up_transact_altivec)
- mfmsr r6
- oris r5,r6,MSR_VEC@h
- MTMSRD(r5)
- isync
-
- li r4,1
- stw r4,THREAD_USED_VR(r3)
-
- li r10,THREAD_TRANSACT_VRSTATE+VRSTATE_VSCR
- lvx vr0,r10,r3
- mtvscr vr0
- addi r10,r3,THREAD_TRANSACT_VRSTATE
- REST_32VRS(0,r4,r10)
-
- /* Disable VEC again. */
- MTMSRD(r6)
- isync
-
- blr
-#endif
-
-/*
- * Enable use of VMX/Altivec for the caller.
- */
-_GLOBAL(vec_enable)
- mfmsr r3
- oris r3,r3,MSR_VEC@h
- MTMSRD(r3)
- isync
- blr
+#include <asm/asm-compat.h>
/*
* Load state from memory into VMX registers including VSCR.
@@ -52,10 +17,12 @@ _GLOBAL(vec_enable)
*/
_GLOBAL(load_vr_state)
li r4,VRSTATE_VSCR
- lvx vr0,r4,r3
- mtvscr vr0
+ lvx v0,r4,r3
+ mtvscr v0
REST_32VRS(0,r4,r3)
blr
+EXPORT_SYMBOL(load_vr_state)
+_ASM_NOKPROBE_SYMBOL(load_vr_state); /* used by restore_math */
/*
* Store VMX state into memory, including VSCR.
@@ -63,10 +30,12 @@ _GLOBAL(load_vr_state)
*/
_GLOBAL(store_vr_state)
SAVE_32VRS(0, r4, r3)
- mfvscr vr0
+ mfvscr v0
li r4, VRSTATE_VSCR
- stvx vr0, r4, r3
+ stvx v0, r4, r3
+ lvx v0, 0, r3
blr
+EXPORT_SYMBOL(store_vr_state)
/*
* Disable VMX for the task which had it previously,
@@ -80,47 +49,19 @@ _GLOBAL(store_vr_state)
*/
_GLOBAL(load_up_altivec)
mfmsr r5 /* grab the current MSR */
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* interrupt doesn't set MSR[RI] and HPT can fault on current access */
+ ori r5,r5,MSR_RI
+#endif
oris r5,r5,MSR_VEC@h
MTMSRD(r5) /* enable use of AltiVec now */
isync
-/*
- * For SMP, we don't do lazy VMX switching because it just gets too
- * horrendously complex, especially when a task switches from one CPU
- * to another. Instead we call giveup_altvec in switch_to.
- * VRSAVE isn't dealt with here, that is done in the normal context
- * switch code. Note that we could rely on vrsave value to eventually
- * avoid saving all of the VREGs here...
- */
-#ifndef CONFIG_SMP
- LOAD_REG_ADDRBASE(r3, last_task_used_altivec)
- toreal(r3)
- PPC_LL r4,ADDROFF(last_task_used_altivec)(r3)
- PPC_LCMPI 0,r4,0
- beq 1f
-
- /* Save VMX state to last_task_used_altivec's THREAD struct */
- toreal(r4)
- addi r4,r4,THREAD
- addi r6,r4,THREAD_VRSTATE
- SAVE_32VRS(0,r5,r6)
- mfvscr vr0
- li r10,VRSTATE_VSCR
- stvx vr0,r10,r6
- /* Disable VMX for last_task_used_altivec */
- PPC_LL r5,PT_REGS(r4)
- toreal(r5)
- PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
- lis r10,MSR_VEC@h
- andc r4,r4,r10
- PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
-#endif /* CONFIG_SMP */
-
- /* Hack: if we get an altivec unavailable trap with VRSAVE
- * set to all zeros, we assume this is a broken application
- * that fails to set it properly, and thus we switch it to
- * all 1's
+ /*
+ * While userspace in general ignores VRSAVE, glibc uses it as a boolean
+ * to optimise userspace context save/restore. Whenever we take an
+ * altivec unavailable exception we must set VRSAVE to something non
+ * zero. Set it to all 1s. See also the programming note in the ISA.
*/
mfspr r4,SPRN_VRSAVE
cmpwi 0,r4,0
@@ -130,84 +71,46 @@ _GLOBAL(load_up_altivec)
1:
/* enable use of VMX after return */
#ifdef CONFIG_PPC32
- mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */
+ addi r5,r2,THREAD
oris r9,r9,MSR_VEC@h
#else
ld r4,PACACURRENT(r13)
addi r5,r4,THREAD /* Get THREAD */
oris r12,r12,MSR_VEC@h
std r12,_MSR(r1)
+#ifdef CONFIG_PPC_BOOK3S_64
+ li r4,0
+ stb r4,PACASRR_VALID(r13)
+#endif
#endif
- addi r6,r5,THREAD_VRSTATE
li r4,1
+ stb r4,THREAD_LOAD_VEC(r5)
+ addi r6,r5,THREAD_VRSTATE
li r10,VRSTATE_VSCR
stw r4,THREAD_USED_VR(r5)
- lvx vr0,r10,r6
- mtvscr vr0
+ lvx v0,r10,r6
+ mtvscr v0
REST_32VRS(0,r4,r6)
-#ifndef CONFIG_SMP
- /* Update last_task_used_altivec to 'current' */
- subi r4,r5,THREAD /* Back to 'current' */
- fromreal(r4)
- PPC_STL r4,ADDROFF(last_task_used_altivec)(r3)
-#endif /* CONFIG_SMP */
/* restore registers and return */
blr
-
-_GLOBAL(giveup_altivec_notask)
- mfmsr r3
- andis. r4,r3,MSR_VEC@h
- bnelr /* Already enabled? */
- oris r3,r3,MSR_VEC@h
- SYNC
- MTMSRD(r3) /* enable use of VMX now */
- isync
- blr
+_ASM_NOKPROBE_SYMBOL(load_up_altivec)
/*
- * giveup_altivec(tsk)
- * Disable VMX for the task given as the argument,
- * and save the vector registers in its thread_struct.
- * Enables the VMX for use in the kernel on return.
+ * save_altivec(tsk)
+ * Save the vector registers to its thread_struct
*/
-_GLOBAL(giveup_altivec)
- mfmsr r5
- oris r5,r5,MSR_VEC@h
- SYNC
- MTMSRD(r5) /* enable use of VMX now */
- isync
- PPC_LCMPI 0,r3,0
- beqlr /* if no previous owner, done */
+_GLOBAL(save_altivec)
addi r3,r3,THREAD /* want THREAD of task */
PPC_LL r7,THREAD_VRSAVEAREA(r3)
PPC_LL r5,PT_REGS(r3)
PPC_LCMPI 0,r7,0
bne 2f
addi r7,r3,THREAD_VRSTATE
-2: PPC_LCMPI 0,r5,0
- SAVE_32VRS(0,r4,r7)
- mfvscr vr0
+2: SAVE_32VRS(0,r4,r7)
+ mfvscr v0
li r4,VRSTATE_VSCR
- stvx vr0,r4,r7
- beq 1f
- PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-#ifdef CONFIG_VSX
-BEGIN_FTR_SECTION
- lis r3,(MSR_VEC|MSR_VSX)@h
-FTR_SECTION_ELSE
- lis r3,MSR_VEC@h
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX)
-#else
- lis r3,MSR_VEC@h
-#endif
- andc r4,r4,r3 /* disable FP for previous task */
- PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
-#ifndef CONFIG_SMP
- li r5,0
- LOAD_REG_ADDRBASE(r4,last_task_used_altivec)
- PPC_STL r5,ADDROFF(last_task_used_altivec)(r4)
-#endif /* CONFIG_SMP */
+ stvx v0,r4,r7
+ lvx v0,0,r7
blr
#ifdef CONFIG_VSX
@@ -230,20 +133,12 @@ _GLOBAL(load_up_vsx)
andis. r5,r12,MSR_VEC@h
beql+ load_up_altivec /* skip if already loaded */
-#ifndef CONFIG_SMP
- ld r3,last_task_used_vsx@got(r2)
- ld r4,0(r3)
- cmpdi 0,r4,0
- beq 1f
- /* Disable VSX for last_task_used_vsx */
- addi r4,r4,THREAD
- ld r5,PT_REGS(r4)
- ld r4,_MSR-STACK_FRAME_OVERHEAD(r5)
- lis r6,MSR_VSX@h
- andc r6,r4,r6
- std r6,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
-#endif /* CONFIG_SMP */
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* interrupt doesn't set MSR[RI] and HPT can fault on current access */
+ li r5,MSR_RI
+ mtmsrd r5,1
+#endif
+
ld r4,PACACURRENT(r13)
addi r4,r4,THREAD /* Get THREAD */
li r6,1
@@ -251,42 +146,9 @@ _GLOBAL(load_up_vsx)
/* enable use of VSX after return */
oris r12,r12,MSR_VSX@h
std r12,_MSR(r1)
-#ifndef CONFIG_SMP
- /* Update last_task_used_vsx to 'current' */
- ld r4,PACACURRENT(r13)
- std r4,0(r3)
-#endif /* CONFIG_SMP */
- b fast_exception_return
-
-/*
- * __giveup_vsx(tsk)
- * Disable VSX for the task given as the argument.
- * Does NOT save vsx registers.
- * Enables the VSX for use in the kernel on return.
- */
-_GLOBAL(__giveup_vsx)
- mfmsr r5
- oris r5,r5,MSR_VSX@h
- mtmsrd r5 /* enable use of VSX now */
- isync
-
- cmpdi 0,r3,0
- beqlr- /* if no previous owner, done */
- addi r3,r3,THREAD /* want THREAD of task */
- ld r5,PT_REGS(r3)
- cmpdi 0,r5,0
- beq 1f
- ld r4,_MSR-STACK_FRAME_OVERHEAD(r5)
- lis r3,MSR_VSX@h
- andc r4,r4,r3 /* disable VSX for previous task */
- std r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
-#ifndef CONFIG_SMP
- li r5,0
- ld r4,last_task_used_vsx@got(r2)
- std r5,0(r4)
-#endif /* CONFIG_SMP */
- blr
+ li r4,0
+ stb r4,PACASRR_VALID(r13)
+ b fast_interrupt_return_srr
#endif /* CONFIG_VSX */
@@ -296,8 +158,8 @@ _GLOBAL(__giveup_vsx)
* usage of floating-point registers. These routines must be called
* with preempt disabled.
*/
-#ifdef CONFIG_PPC32
.data
+#ifdef CONFIG_PPC32
fpzero:
.long 0
fpone:
@@ -310,24 +172,29 @@ fphalf:
lfs fr,name@l(r11)
#else
- .section ".toc","aw"
fpzero:
- .tc FD_0_0[TC],0
+ .quad 0
fpone:
- .tc FD_3ff00000_0[TC],0x3ff0000000000000 /* 1.0 */
+ .quad 0x3ff0000000000000 /* 1.0 */
fphalf:
- .tc FD_3fe00000_0[TC],0x3fe0000000000000 /* 0.5 */
+ .quad 0x3fe0000000000000 /* 0.5 */
-#define LDCONST(fr, name) \
- lfd fr,name@toc(r2)
+#ifdef CONFIG_PPC_KERNEL_PCREL
+#define LDCONST(fr, name) \
+ pla r11,name@pcrel; \
+ lfd fr,0(r11)
+#else
+#define LDCONST(fr, name) \
+ addis r11,r2,name@toc@ha; \
+ lfd fr,name@toc@l(r11)
+#endif
#endif
-
.text
/*
* Internal routine to enable floating point and set FPSCR to 0.
* Don't call it from C; it doesn't use the normal calling convention.
*/
-fpenable:
+SYM_FUNC_START_LOCAL(fpenable)
#ifdef CONFIG_PPC32
stwu r1,-64(r1)
#else
@@ -344,6 +211,7 @@ fpenable:
mffs fr31
MTFSF_L(fr1)
blr
+SYM_FUNC_END(fpenable)
fpdisable:
mtlr r12
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index f096e72262f4..de6ee7d35cff 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -1,32 +1,41 @@
-#ifdef CONFIG_PPC64
-#define PROVIDE32(x) PROVIDE(__unused__##x)
-#else
-#define PROVIDE32(x) PROVIDE(x)
-#endif
+/* SPDX-License-Identifier: GPL-2.0 */
+#define BSS_FIRST_SECTIONS *(.bss.prominit)
+#define EMITS_PT_NOTE
+#define RO_EXCEPTION_TABLE_ALIGN 0
+#define RUNTIME_DISCARD_EXIT
+
+#define SOFT_MASK_TABLE(align) \
+ . = ALIGN(align); \
+ __soft_mask_table : AT(ADDR(__soft_mask_table) - LOAD_OFFSET) { \
+ __start___soft_mask_table = .; \
+ KEEP(*(__soft_mask_table)) \
+ __stop___soft_mask_table = .; \
+ }
+
+#define RESTART_TABLE(align) \
+ . = ALIGN(align); \
+ __restart_table : AT(ADDR(__restart_table) - LOAD_OFFSET) { \
+ __start___restart_table = .; \
+ KEEP(*(__restart_table)) \
+ __stop___restart_table = .; \
+ }
+
#include <asm/page.h>
#include <asm-generic/vmlinux.lds.h>
#include <asm/cache.h>
#include <asm/thread_info.h>
+#define STRICT_ALIGN_SIZE (1 << CONFIG_DATA_SHIFT)
+
+#if STRICT_ALIGN_SIZE < PAGE_SIZE
+#error "CONFIG_DATA_SHIFT must be >= PAGE_SHIFT"
+#endif
+
ENTRY(_stext)
PHDRS {
- kernel PT_LOAD FLAGS(7); /* RWX */
- notes PT_NOTE FLAGS(0);
- dummy PT_NOTE FLAGS(0);
-
- /* binutils < 2.18 has a bug that makes it misbehave when taking an
- ELF file with all segments at load address 0 as input. This
- happens when running "strip" on vmlinux, because of the AT() magic
- in this linker script. People using GCC >= 4.2 won't run into
- this problem, because the "build-id" support will put some data
- into the "notes" segment (at a non-zero load address).
-
- To work around this, we force some data into both the "dummy"
- segment and the kernel segment, so the dummy segment will get a
- non-zero load address. It's not enough to always create the
- "notes" segment, since if nothing gets assigned to it, its load
- address will be zero. */
+ text PT_LOAD FLAGS(7); /* RWX */
+ note PT_NOTE FLAGS(0);
}
#ifdef CONFIG_PPC64
@@ -44,129 +53,273 @@ SECTIONS
* Text, read only data and other permanent read-only sections
*/
- /* Text and gots */
+ _text = .;
+ _stext = .;
+
+ /*
+ * Head text.
+ * This needs to be in its own output section to avoid ld placing
+ * branch trampoline stubs randomly throughout the fixed sections,
+ * which it will do (even if the branch comes from another section)
+ * in order to optimize stub generation.
+ */
+ .head.text : AT(ADDR(.head.text) - LOAD_OFFSET) {
+#ifdef CONFIG_PPC64
+ KEEP(*(.head.text.first_256B));
+#ifdef CONFIG_PPC_BOOK3E_64
+#else
+ KEEP(*(.head.text.real_vectors));
+ *(.head.text.real_trampolines);
+ KEEP(*(.head.text.virt_vectors));
+ *(.head.text.virt_trampolines);
+# if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
+ KEEP(*(.head.data.fwnmi_page));
+# endif
+#endif
+#else /* !CONFIG_PPC64 */
+ HEAD_TEXT
+#endif
+ } :text
+
+ __head_end = .;
+
+#ifdef CONFIG_PPC64
+ /*
+ * ALIGN(0) overrides the default output section alignment because
+ * this needs to start right after .head.text in order for fixed
+ * section placement to work.
+ */
+ .text ALIGN(0) : AT(ADDR(.text) - LOAD_OFFSET) {
+#ifdef CONFIG_LD_HEAD_STUB_CATCH
+ KEEP(*(.linker_stub_catch));
+ . = . ;
+#endif
+
+#else
.text : AT(ADDR(.text) - LOAD_OFFSET) {
ALIGN_FUNCTION();
- HEAD_TEXT
- _text = .;
+#endif
/* careful! __ftr_alt_* sections need to be close to .text */
- *(.text .fixup __ftr_alt_* .ref.text)
+ *(.text.hot .text.hot.* TEXT_MAIN .text.fixup .text.unlikely .text.unlikely.* .fixup __ftr_alt_* .ref.text);
+ *(.tramp.ftrace.text);
+ NOINSTR_TEXT
SCHED_TEXT
LOCK_TEXT
KPROBES_TEXT
IRQENTRY_TEXT
+ SOFTIRQENTRY_TEXT
+ /*
+ * -Os builds call FP save/restore functions. The powerpc64
+ * linker generates those on demand in the .sfpr section.
+ * .sfpr gets placed at the beginning of a group of input
+ * sections, which can break start-of-text offset if it is
+ * included with the main text sections, so put it by itself.
+ */
+ *(.sfpr);
+ *(.text.asan.* .text.tsan.*)
+ } :text
+
+ . = ALIGN(PAGE_SIZE);
+ _etext = .;
+
+ /* Read-only data */
+ RO_DATA(PAGE_SIZE)
#ifdef CONFIG_PPC32
+ .sdata2 : AT(ADDR(.sdata2) - LOAD_OFFSET) {
+ *(.sdata2)
+ }
+#endif
+
+ .data.rel.ro : AT(ADDR(.data.rel.ro) - LOAD_OFFSET) {
+ *(.data.rel.ro .data.rel.ro.*)
+ }
+
+ .branch_lt : AT(ADDR(.branch_lt) - LOAD_OFFSET) {
+ *(.branch_lt)
+ }
+
+#ifdef CONFIG_PPC32
+ .got1 : AT(ADDR(.got1) - LOAD_OFFSET) {
*(.got1)
+ }
+ .got2 : AT(ADDR(.got2) - LOAD_OFFSET) {
__got2_start = .;
*(.got2)
__got2_end = .;
-#endif /* CONFIG_PPC32 */
+ }
+ .got : AT(ADDR(.got) - LOAD_OFFSET) {
+ *(.got)
+ *(.got.plt)
+ }
+ .plt : AT(ADDR(.plt) - LOAD_OFFSET) {
+ /* XXX: is .plt (and .got.plt) required? */
+ *(.plt)
+ }
- } :kernel
+#else /* CONFIG_PPC32 */
+#ifndef CONFIG_PPC_KERNEL_PCREL
+ .toc1 : AT(ADDR(.toc1) - LOAD_OFFSET) {
+ *(.toc1)
+ }
+#endif
- . = ALIGN(PAGE_SIZE);
- _etext = .;
- PROVIDE32 (etext = .);
+ .got : AT(ADDR(.got) - LOAD_OFFSET) ALIGN(256) {
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ *(.got)
+#else
+ *(.got .toc)
+#endif
+ }
- /* Read-only data */
- RODATA
+ SOFT_MASK_TABLE(8)
+ RESTART_TABLE(8)
+
+#ifdef CONFIG_PPC64_ELF_ABI_V1
+ .opd : AT(ADDR(.opd) - LOAD_OFFSET) {
+ __start_opd = .;
+ KEEP(*(.opd))
+ __end_opd = .;
+ }
+#endif
+
+ . = ALIGN(8);
+ __stf_entry_barrier_fixup : AT(ADDR(__stf_entry_barrier_fixup) - LOAD_OFFSET) {
+ __start___stf_entry_barrier_fixup = .;
+ *(__stf_entry_barrier_fixup)
+ __stop___stf_entry_barrier_fixup = .;
+ }
+
+ . = ALIGN(8);
+ __uaccess_flush_fixup : AT(ADDR(__uaccess_flush_fixup) - LOAD_OFFSET) {
+ __start___uaccess_flush_fixup = .;
+ *(__uaccess_flush_fixup)
+ __stop___uaccess_flush_fixup = .;
+ }
+
+ . = ALIGN(8);
+ __entry_flush_fixup : AT(ADDR(__entry_flush_fixup) - LOAD_OFFSET) {
+ __start___entry_flush_fixup = .;
+ *(__entry_flush_fixup)
+ __stop___entry_flush_fixup = .;
+ }
- EXCEPTION_TABLE(0)
+ . = ALIGN(8);
+ __scv_entry_flush_fixup : AT(ADDR(__scv_entry_flush_fixup) - LOAD_OFFSET) {
+ __start___scv_entry_flush_fixup = .;
+ *(__scv_entry_flush_fixup)
+ __stop___scv_entry_flush_fixup = .;
+ }
- NOTES :kernel :notes
+ . = ALIGN(8);
+ __stf_exit_barrier_fixup : AT(ADDR(__stf_exit_barrier_fixup) - LOAD_OFFSET) {
+ __start___stf_exit_barrier_fixup = .;
+ *(__stf_exit_barrier_fixup)
+ __stop___stf_exit_barrier_fixup = .;
+ }
- /* The dummy segment contents for the bug workaround mentioned above
- near PHDRS. */
- .dummy : AT(ADDR(.dummy) - LOAD_OFFSET) {
- LONG(0)
- LONG(0)
- LONG(0)
- } :kernel :dummy
+ . = ALIGN(8);
+ __rfi_flush_fixup : AT(ADDR(__rfi_flush_fixup) - LOAD_OFFSET) {
+ __start___rfi_flush_fixup = .;
+ *(__rfi_flush_fixup)
+ __stop___rfi_flush_fixup = .;
+ }
+#endif /* CONFIG_PPC32 */
+
+#ifdef CONFIG_PPC_BARRIER_NOSPEC
+ . = ALIGN(8);
+ __spec_barrier_fixup : AT(ADDR(__spec_barrier_fixup) - LOAD_OFFSET) {
+ __start___barrier_nospec_fixup = .;
+ *(__barrier_nospec_fixup)
+ __stop___barrier_nospec_fixup = .;
+ }
+#endif /* CONFIG_PPC_BARRIER_NOSPEC */
+
+#ifdef CONFIG_PPC_E500
+ . = ALIGN(8);
+ __spec_btb_flush_fixup : AT(ADDR(__spec_btb_flush_fixup) - LOAD_OFFSET) {
+ __start__btb_flush_fixup = .;
+ *(__btb_flush_fixup)
+ __stop__btb_flush_fixup = .;
+ }
+#endif
+
+ /*
+ * Various code relies on __init_begin being at the strict RWX boundary.
+ */
+ . = ALIGN(STRICT_ALIGN_SIZE);
+ __srwx_boundary = .;
+ __end_rodata = .;
+ __init_begin = .;
/*
* Init sections discarded at runtime
*/
- . = ALIGN(PAGE_SIZE);
- __init_begin = .;
- INIT_TEXT_SECTION(PAGE_SIZE) :kernel
+ .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
+ _sinittext = .;
+ INIT_TEXT
+ *(.tramp.ftrace.init);
+ /*
+ *.init.text might be RO so we must ensure this section ends on
+ * a page boundary.
+ */
+ . = ALIGN(PAGE_SIZE);
+ _einittext = .;
+ } :text
/* .exit.text is discarded at runtime, not link time,
* to deal with references from __bug_table
*/
.exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) {
+ __exittext_begin = .;
EXIT_TEXT
+ __exittext_end = .;
}
- .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
- INIT_DATA
- __vtop_table_begin = .;
- *(.vtop_fixup);
- __vtop_table_end = .;
- __ptov_table_begin = .;
- *(.ptov_fixup);
- __ptov_table_end = .;
- }
-
- .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
- INIT_SETUP(16)
- }
-
- .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
- INIT_CALLS
- }
-
- .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
- CON_INITCALL
- }
+ . = ALIGN(PAGE_SIZE);
- SECURITY_INIT
+ INIT_DATA_SECTION(16)
. = ALIGN(8);
__ftr_fixup : AT(ADDR(__ftr_fixup) - LOAD_OFFSET) {
__start___ftr_fixup = .;
- *(__ftr_fixup)
+ KEEP(*(__ftr_fixup))
__stop___ftr_fixup = .;
}
. = ALIGN(8);
__mmu_ftr_fixup : AT(ADDR(__mmu_ftr_fixup) - LOAD_OFFSET) {
__start___mmu_ftr_fixup = .;
- *(__mmu_ftr_fixup)
+ KEEP(*(__mmu_ftr_fixup))
__stop___mmu_ftr_fixup = .;
}
. = ALIGN(8);
__lwsync_fixup : AT(ADDR(__lwsync_fixup) - LOAD_OFFSET) {
__start___lwsync_fixup = .;
- *(__lwsync_fixup)
+ KEEP(*(__lwsync_fixup))
__stop___lwsync_fixup = .;
}
#ifdef CONFIG_PPC64
. = ALIGN(8);
__fw_ftr_fixup : AT(ADDR(__fw_ftr_fixup) - LOAD_OFFSET) {
__start___fw_ftr_fixup = .;
- *(__fw_ftr_fixup)
+ KEEP(*(__fw_ftr_fixup))
__stop___fw_ftr_fixup = .;
}
#endif
- .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
- INIT_RAM_FS
- }
PERCPU_SECTION(L1_CACHE_BYTES)
. = ALIGN(8);
.machine.desc : AT(ADDR(.machine.desc) - LOAD_OFFSET) {
__machine_desc_start = . ;
- *(.machine.desc)
+ KEEP(*(.machine.desc))
__machine_desc_end = . ;
}
#ifdef CONFIG_RELOCATABLE
. = ALIGN(8);
.dynsym : AT(ADDR(.dynsym) - LOAD_OFFSET)
{
-#ifdef CONFIG_RELOCATABLE_PPC32
__dynamic_symtab = .;
-#endif
*(.dynsym)
}
.dynstr : AT(ADDR(.dynstr) - LOAD_OFFSET) { *(.dynstr) }
@@ -176,6 +329,7 @@ SECTIONS
*(.dynamic)
}
.hash : AT(ADDR(.hash) - LOAD_OFFSET) { *(.hash) }
+ .gnu.hash : AT(ADDR(.gnu.hash) - LOAD_OFFSET) { *(.gnu.hash) }
.interp : AT(ADDR(.interp) - LOAD_OFFSET) { *(.interp) }
.rela.dyn : AT(ADDR(.rela.dyn) - LOAD_OFFSET)
{
@@ -183,6 +337,12 @@ SECTIONS
*(.rela*)
}
#endif
+ /* .exit.data is discarded at runtime, not link time,
+ * to deal with references from .exit.text
+ */
+ .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) {
+ EXIT_DATA
+ }
/* freed after init ends here */
. = ALIGN(PAGE_SIZE);
@@ -195,38 +355,16 @@ SECTIONS
. = ALIGN(PAGE_SIZE);
_sdata = .;
-#ifdef CONFIG_PPC32
- .data : AT(ADDR(.data) - LOAD_OFFSET) {
- DATA_DATA
- *(.sdata)
- *(.got.plt) *(.got)
- }
-#else
.data : AT(ADDR(.data) - LOAD_OFFSET) {
DATA_DATA
*(.data.rel*)
- *(.toc1)
- *(.branch_lt)
- }
-
- .opd : AT(ADDR(.opd) - LOAD_OFFSET) {
- *(.opd)
- }
-
- .got : AT(ADDR(.got) - LOAD_OFFSET) {
- __toc_start = .;
-#ifndef CONFIG_RELOCATABLE
- __prom_init_toc_start = .;
- arch/powerpc/kernel/prom_init.o*(.toc .got)
- __prom_init_toc_end = .;
+#ifdef CONFIG_PPC32
+ *(SDATA_MAIN)
#endif
- *(.got)
- *(.toc)
}
-#endif
/* The initial task and kernel stack */
- INIT_TASK_DATA_SECTION(THREAD_SIZE)
+ INIT_TASK_DATA_SECTION(THREAD_ALIGN)
.data..page_aligned : AT(ADDR(.data..page_aligned) - LOAD_OFFSET) {
PAGE_ALIGNED_DATA(PAGE_SIZE)
@@ -245,9 +383,10 @@ SECTIONS
NOSAVE_DATA
}
+ BUG_TABLE
+
. = ALIGN(PAGE_SIZE);
_edata = .;
- PROVIDE32 (edata = .);
/*
* And finally the bss
@@ -257,8 +396,19 @@ SECTIONS
. = ALIGN(PAGE_SIZE);
_end = . ;
- PROVIDE32 (end = .);
- /* Sections to be discarded. */
+ DWARF_DEBUG
+ ELF_DETAILS
+
DISCARDS
+ /DISCARD/ : {
+ *(*.EMB.apuinfo)
+ *(.glink .iplt .plt)
+ *(.gnu.version*)
+ *(.gnu.attributes)
+ *(.eh_frame)
+#ifndef CONFIG_RELOCATABLE
+ *(.rela*)
+#endif
+ }
}
diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
new file mode 100644
index 000000000000..2429cb1c7baa
--- /dev/null
+++ b/arch/powerpc/kernel/watchdog.c
@@ -0,0 +1,590 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Watchdog support on powerpc systems.
+ *
+ * Copyright 2017, IBM Corporation.
+ *
+ * This uses code from arch/sparc/kernel/nmi.c and kernel/watchdog.c
+ */
+
+#define pr_fmt(fmt) "watchdog: " fmt
+
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/init.h>
+#include <linux/percpu.h>
+#include <linux/cpu.h>
+#include <linux/nmi.h>
+#include <linux/module.h>
+#include <linux/export.h>
+#include <linux/kprobes.h>
+#include <linux/hardirq.h>
+#include <linux/reboot.h>
+#include <linux/slab.h>
+#include <linux/kdebug.h>
+#include <linux/sched/debug.h>
+#include <linux/delay.h>
+#include <linux/processor.h>
+#include <linux/smp.h>
+
+#include <asm/interrupt.h>
+#include <asm/paca.h>
+#include <asm/nmi.h>
+
+/*
+ * The powerpc watchdog ensures that each CPU is able to service timers.
+ * The watchdog sets up a simple timer on each CPU to run once per timer
+ * period, and updates a per-cpu timestamp and a "pending" cpumask. This is
+ * the heartbeat.
+ *
+ * Then there are two systems to check that the heartbeat is still running.
+ * The local soft-NMI, and the SMP checker.
+ *
+ * The soft-NMI checker can detect lockups on the local CPU. When interrupts
+ * are disabled with local_irq_disable(), platforms that use soft-masking
+ * can leave hardware interrupts enabled and handle them with a masked
+ * interrupt handler. The masked handler can send the timer interrupt to the
+ * watchdog's soft_nmi_interrupt(), which appears to Linux as an NMI
+ * interrupt, and can be used to detect CPUs stuck with IRQs disabled.
+ *
+ * The soft-NMI checker will compare the heartbeat timestamp for this CPU
+ * with the current time, and take action if the difference exceeds the
+ * watchdog threshold.
+ *
+ * The limitation of the soft-NMI watchdog is that it does not work when
+ * interrupts are hard disabled or otherwise not being serviced. This is
+ * solved by also having a SMP watchdog where all CPUs check all other
+ * CPUs heartbeat.
+ *
+ * The SMP checker can detect lockups on other CPUs. A global "pending"
+ * cpumask is kept, containing all CPUs which enable the watchdog. Each
+ * CPU clears their pending bit in their heartbeat timer. When the bitmask
+ * becomes empty, the last CPU to clear its pending bit updates a global
+ * timestamp and refills the pending bitmask.
+ *
+ * In the heartbeat timer, if any CPU notices that the global timestamp has
+ * not been updated for a period exceeding the watchdog threshold, then it
+ * means the CPU(s) with their bit still set in the pending mask have had
+ * their heartbeat stop, and action is taken.
+ *
+ * Some platforms implement true NMI IPIs, which can be used by the SMP
+ * watchdog to detect an unresponsive CPU and pull it out of its stuck
+ * state with the NMI IPI, to get crash/debug data from it. This way the
+ * SMP watchdog can detect hardware interrupts off lockups.
+ */
+
+static cpumask_t wd_cpus_enabled __read_mostly;
+
+static u64 wd_panic_timeout_tb __read_mostly; /* timebase ticks until panic */
+static u64 wd_smp_panic_timeout_tb __read_mostly; /* panic other CPUs */
+
+static u64 wd_timer_period_ms __read_mostly; /* interval between heartbeat */
+
+static DEFINE_PER_CPU(struct hrtimer, wd_hrtimer);
+static DEFINE_PER_CPU(u64, wd_timer_tb);
+
+/* SMP checker bits */
+static unsigned long __wd_smp_lock;
+static unsigned long __wd_reporting;
+static unsigned long __wd_nmi_output;
+static cpumask_t wd_smp_cpus_pending;
+static cpumask_t wd_smp_cpus_stuck;
+static u64 wd_smp_last_reset_tb;
+
+#ifdef CONFIG_PPC_PSERIES
+static u64 wd_timeout_pct;
+#endif
+
+/*
+ * Try to take the exclusive watchdog action / NMI IPI / printing lock.
+ * wd_smp_lock must be held. If this fails, we should return and wait
+ * for the watchdog to kick in again (or another CPU to trigger it).
+ *
+ * Importantly, if hardlockup_panic is set, wd_try_report failure should
+ * not delay the panic, because whichever other CPU is reporting will
+ * call panic.
+ */
+static bool wd_try_report(void)
+{
+ if (__wd_reporting)
+ return false;
+ __wd_reporting = 1;
+ return true;
+}
+
+/* End printing after successful wd_try_report. wd_smp_lock not required. */
+static void wd_end_reporting(void)
+{
+ smp_mb(); /* End printing "critical section" */
+ WARN_ON_ONCE(__wd_reporting == 0);
+ WRITE_ONCE(__wd_reporting, 0);
+}
+
+static inline void wd_smp_lock(unsigned long *flags)
+{
+ /*
+ * Avoid locking layers if possible.
+ * This may be called from low level interrupt handlers at some
+ * point in future.
+ */
+ raw_local_irq_save(*flags);
+ hard_irq_disable(); /* Make it soft-NMI safe */
+ while (unlikely(test_and_set_bit_lock(0, &__wd_smp_lock))) {
+ raw_local_irq_restore(*flags);
+ spin_until_cond(!test_bit(0, &__wd_smp_lock));
+ raw_local_irq_save(*flags);
+ hard_irq_disable();
+ }
+}
+
+static inline void wd_smp_unlock(unsigned long *flags)
+{
+ clear_bit_unlock(0, &__wd_smp_lock);
+ raw_local_irq_restore(*flags);
+}
+
+static void wd_lockup_ipi(struct pt_regs *regs)
+{
+ int cpu = raw_smp_processor_id();
+ u64 tb = get_tb();
+
+ pr_emerg("CPU %d Hard LOCKUP\n", cpu);
+ pr_emerg("CPU %d TB:%lld, last heartbeat TB:%lld (%lldms ago)\n",
+ cpu, tb, per_cpu(wd_timer_tb, cpu),
+ tb_to_ns(tb - per_cpu(wd_timer_tb, cpu)) / 1000000);
+ print_modules();
+ print_irqtrace_events(current);
+ if (regs)
+ show_regs(regs);
+ else
+ dump_stack();
+
+ /*
+ * __wd_nmi_output must be set after we printk from NMI context.
+ *
+ * printk from NMI context defers printing to the console to irq_work.
+ * If that NMI was taken in some code that is hard-locked, then irqs
+ * are disabled so irq_work will never fire. That can result in the
+ * hard lockup messages being delayed (indefinitely, until something
+ * else kicks the console drivers).
+ *
+ * Setting __wd_nmi_output will cause another CPU to notice and kick
+ * the console drivers for us.
+ *
+ * xchg is not needed here (it could be a smp_mb and store), but xchg
+ * gives the memory ordering and atomicity required.
+ */
+ xchg(&__wd_nmi_output, 1);
+
+ /* Do not panic from here because that can recurse into NMI IPI layer */
+}
+
+static bool set_cpu_stuck(int cpu)
+{
+ cpumask_set_cpu(cpu, &wd_smp_cpus_stuck);
+ cpumask_clear_cpu(cpu, &wd_smp_cpus_pending);
+ /*
+ * See wd_smp_clear_cpu_pending()
+ */
+ smp_mb();
+ if (cpumask_empty(&wd_smp_cpus_pending)) {
+ wd_smp_last_reset_tb = get_tb();
+ cpumask_andnot(&wd_smp_cpus_pending,
+ &wd_cpus_enabled,
+ &wd_smp_cpus_stuck);
+ return true;
+ }
+ return false;
+}
+
+static void watchdog_smp_panic(int cpu)
+{
+ static cpumask_t wd_smp_cpus_ipi; // protected by reporting
+ unsigned long flags;
+ u64 tb, last_reset;
+ int c;
+
+ wd_smp_lock(&flags);
+ /* Double check some things under lock */
+ tb = get_tb();
+ last_reset = wd_smp_last_reset_tb;
+ if ((s64)(tb - last_reset) < (s64)wd_smp_panic_timeout_tb)
+ goto out;
+ if (cpumask_test_cpu(cpu, &wd_smp_cpus_pending))
+ goto out;
+ if (!wd_try_report())
+ goto out;
+ for_each_online_cpu(c) {
+ if (!cpumask_test_cpu(c, &wd_smp_cpus_pending))
+ continue;
+ if (c == cpu)
+ continue; // should not happen
+
+ __cpumask_set_cpu(c, &wd_smp_cpus_ipi);
+ if (set_cpu_stuck(c))
+ break;
+ }
+ if (cpumask_empty(&wd_smp_cpus_ipi)) {
+ wd_end_reporting();
+ goto out;
+ }
+ wd_smp_unlock(&flags);
+
+ pr_emerg("CPU %d detected hard LOCKUP on other CPUs %*pbl\n",
+ cpu, cpumask_pr_args(&wd_smp_cpus_ipi));
+ pr_emerg("CPU %d TB:%lld, last SMP heartbeat TB:%lld (%lldms ago)\n",
+ cpu, tb, last_reset, tb_to_ns(tb - last_reset) / 1000000);
+
+ if (!sysctl_hardlockup_all_cpu_backtrace) {
+ /*
+ * Try to trigger the stuck CPUs, unless we are going to
+ * get a backtrace on all of them anyway.
+ */
+ for_each_cpu(c, &wd_smp_cpus_ipi) {
+ smp_send_nmi_ipi(c, wd_lockup_ipi, 1000000);
+ __cpumask_clear_cpu(c, &wd_smp_cpus_ipi);
+ }
+ } else {
+ trigger_allbutcpu_cpu_backtrace(cpu);
+ cpumask_clear(&wd_smp_cpus_ipi);
+ }
+
+ if (hardlockup_panic)
+ nmi_panic(NULL, "Hard LOCKUP");
+
+ wd_end_reporting();
+
+ return;
+
+out:
+ wd_smp_unlock(&flags);
+}
+
+static void wd_smp_clear_cpu_pending(int cpu)
+{
+ if (!cpumask_test_cpu(cpu, &wd_smp_cpus_pending)) {
+ if (unlikely(cpumask_test_cpu(cpu, &wd_smp_cpus_stuck))) {
+ struct pt_regs *regs = get_irq_regs();
+ unsigned long flags;
+
+ pr_emerg("CPU %d became unstuck TB:%lld\n",
+ cpu, get_tb());
+ print_irqtrace_events(current);
+ if (regs)
+ show_regs(regs);
+ else
+ dump_stack();
+
+ wd_smp_lock(&flags);
+ cpumask_clear_cpu(cpu, &wd_smp_cpus_stuck);
+ wd_smp_unlock(&flags);
+ } else {
+ /*
+ * The last CPU to clear pending should have reset the
+ * watchdog so we generally should not find it empty
+ * here if our CPU was clear. However it could happen
+ * due to a rare race with another CPU taking the
+ * last CPU out of the mask concurrently.
+ *
+ * We can't add a warning for it. But just in case
+ * there is a problem with the watchdog that is causing
+ * the mask to not be reset, try to kick it along here.
+ */
+ if (unlikely(cpumask_empty(&wd_smp_cpus_pending)))
+ goto none_pending;
+ }
+ return;
+ }
+
+ /*
+ * All other updates to wd_smp_cpus_pending are performed under
+ * wd_smp_lock. All of them are atomic except the case where the
+ * mask becomes empty and is reset. This will not happen here because
+ * cpu was tested to be in the bitmap (above), and a CPU only clears
+ * its own bit. _Except_ in the case where another CPU has detected a
+ * hard lockup on our CPU and takes us out of the pending mask. So in
+ * normal operation there will be no race here, no problem.
+ *
+ * In the lockup case, this atomic clear-bit vs a store that refills
+ * other bits in the accessed word wll not be a problem. The bit clear
+ * is atomic so it will not cause the store to get lost, and the store
+ * will never set this bit so it will not overwrite the bit clear. The
+ * only way for a stuck CPU to return to the pending bitmap is to
+ * become unstuck itself.
+ */
+ cpumask_clear_cpu(cpu, &wd_smp_cpus_pending);
+
+ /*
+ * Order the store to clear pending with the load(s) to check all
+ * words in the pending mask to check they are all empty. This orders
+ * with the same barrier on another CPU. This prevents two CPUs
+ * clearing the last 2 pending bits, but neither seeing the other's
+ * store when checking if the mask is empty, and missing an empty
+ * mask, which ends with a false positive.
+ */
+ smp_mb();
+ if (cpumask_empty(&wd_smp_cpus_pending)) {
+ unsigned long flags;
+
+none_pending:
+ /*
+ * Double check under lock because more than one CPU could see
+ * a clear mask with the lockless check after clearing their
+ * pending bits.
+ */
+ wd_smp_lock(&flags);
+ if (cpumask_empty(&wd_smp_cpus_pending)) {
+ wd_smp_last_reset_tb = get_tb();
+ cpumask_andnot(&wd_smp_cpus_pending,
+ &wd_cpus_enabled,
+ &wd_smp_cpus_stuck);
+ }
+ wd_smp_unlock(&flags);
+ }
+}
+
+static void watchdog_timer_interrupt(int cpu)
+{
+ u64 tb = get_tb();
+
+ per_cpu(wd_timer_tb, cpu) = tb;
+
+ wd_smp_clear_cpu_pending(cpu);
+
+ if ((s64)(tb - wd_smp_last_reset_tb) >= (s64)wd_smp_panic_timeout_tb)
+ watchdog_smp_panic(cpu);
+
+ if (__wd_nmi_output && xchg(&__wd_nmi_output, 0)) {
+ /*
+ * Something has called printk from NMI context. It might be
+ * stuck, so this triggers a flush that will get that
+ * printk output to the console.
+ *
+ * See wd_lockup_ipi.
+ */
+ printk_trigger_flush();
+ }
+}
+
+DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt)
+{
+ unsigned long flags;
+ int cpu = raw_smp_processor_id();
+ u64 tb;
+
+ /* should only arrive from kernel, with irqs disabled */
+ WARN_ON_ONCE(!arch_irq_disabled_regs(regs));
+
+ if (!cpumask_test_cpu(cpu, &wd_cpus_enabled))
+ return 0;
+
+ __this_cpu_inc(irq_stat.soft_nmi_irqs);
+
+ tb = get_tb();
+ if (tb - per_cpu(wd_timer_tb, cpu) >= wd_panic_timeout_tb) {
+ /*
+ * Taking wd_smp_lock here means it is a soft-NMI lock, which
+ * means we can't take any regular or irqsafe spin locks while
+ * holding this lock. This is why timers can't printk while
+ * holding the lock.
+ */
+ wd_smp_lock(&flags);
+ if (cpumask_test_cpu(cpu, &wd_smp_cpus_stuck)) {
+ wd_smp_unlock(&flags);
+ return 0;
+ }
+ if (!wd_try_report()) {
+ wd_smp_unlock(&flags);
+ /* Couldn't report, try again in 100ms */
+ mtspr(SPRN_DEC, 100 * tb_ticks_per_usec * 1000);
+ return 0;
+ }
+
+ set_cpu_stuck(cpu);
+
+ wd_smp_unlock(&flags);
+
+ pr_emerg("CPU %d self-detected hard LOCKUP @ %pS\n",
+ cpu, (void *)regs->nip);
+ pr_emerg("CPU %d TB:%lld, last heartbeat TB:%lld (%lldms ago)\n",
+ cpu, tb, per_cpu(wd_timer_tb, cpu),
+ tb_to_ns(tb - per_cpu(wd_timer_tb, cpu)) / 1000000);
+ print_modules();
+ print_irqtrace_events(current);
+ show_regs(regs);
+
+ xchg(&__wd_nmi_output, 1); // see wd_lockup_ipi
+
+ if (sysctl_hardlockup_all_cpu_backtrace)
+ trigger_allbutcpu_cpu_backtrace(cpu);
+
+ if (hardlockup_panic)
+ nmi_panic(regs, "Hard LOCKUP");
+
+ wd_end_reporting();
+ }
+ /*
+ * We are okay to change DEC in soft_nmi_interrupt because the masked
+ * handler has marked a DEC as pending, so the timer interrupt will be
+ * replayed as soon as local irqs are enabled again.
+ */
+ if (wd_panic_timeout_tb < 0x7fffffff)
+ mtspr(SPRN_DEC, wd_panic_timeout_tb);
+
+ return 0;
+}
+
+static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
+{
+ int cpu = smp_processor_id();
+
+ if (!(watchdog_enabled & WATCHDOG_HARDLOCKUP_ENABLED))
+ return HRTIMER_NORESTART;
+
+ if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
+ return HRTIMER_NORESTART;
+
+ watchdog_timer_interrupt(cpu);
+
+ hrtimer_forward_now(hrtimer, ms_to_ktime(wd_timer_period_ms));
+
+ return HRTIMER_RESTART;
+}
+
+void arch_touch_nmi_watchdog(void)
+{
+ unsigned long ticks = tb_ticks_per_usec * wd_timer_period_ms * 1000;
+ int cpu = smp_processor_id();
+ u64 tb;
+
+ if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
+ return;
+
+ tb = get_tb();
+ if (tb - per_cpu(wd_timer_tb, cpu) >= ticks) {
+ per_cpu(wd_timer_tb, cpu) = tb;
+ wd_smp_clear_cpu_pending(cpu);
+ }
+}
+EXPORT_SYMBOL(arch_touch_nmi_watchdog);
+
+static void start_watchdog(void *arg)
+{
+ struct hrtimer *hrtimer = this_cpu_ptr(&wd_hrtimer);
+ int cpu = smp_processor_id();
+ unsigned long flags;
+
+ if (cpumask_test_cpu(cpu, &wd_cpus_enabled)) {
+ WARN_ON(1);
+ return;
+ }
+
+ if (!(watchdog_enabled & WATCHDOG_HARDLOCKUP_ENABLED))
+ return;
+
+ if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
+ return;
+
+ wd_smp_lock(&flags);
+ cpumask_set_cpu(cpu, &wd_cpus_enabled);
+ if (cpumask_weight(&wd_cpus_enabled) == 1) {
+ cpumask_set_cpu(cpu, &wd_smp_cpus_pending);
+ wd_smp_last_reset_tb = get_tb();
+ }
+ wd_smp_unlock(&flags);
+
+ *this_cpu_ptr(&wd_timer_tb) = get_tb();
+
+ hrtimer_setup(hrtimer, watchdog_timer_fn, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer_start(hrtimer, ms_to_ktime(wd_timer_period_ms),
+ HRTIMER_MODE_REL_PINNED);
+}
+
+static int start_watchdog_on_cpu(unsigned int cpu)
+{
+ return smp_call_function_single(cpu, start_watchdog, NULL, true);
+}
+
+static void stop_watchdog(void *arg)
+{
+ struct hrtimer *hrtimer = this_cpu_ptr(&wd_hrtimer);
+ int cpu = smp_processor_id();
+ unsigned long flags;
+
+ if (!cpumask_test_cpu(cpu, &wd_cpus_enabled))
+ return; /* Can happen in CPU unplug case */
+
+ hrtimer_cancel(hrtimer);
+
+ wd_smp_lock(&flags);
+ cpumask_clear_cpu(cpu, &wd_cpus_enabled);
+ wd_smp_unlock(&flags);
+
+ wd_smp_clear_cpu_pending(cpu);
+}
+
+static int stop_watchdog_on_cpu(unsigned int cpu)
+{
+ return smp_call_function_single(cpu, stop_watchdog, NULL, true);
+}
+
+static void watchdog_calc_timeouts(void)
+{
+ u64 threshold = watchdog_thresh;
+
+#ifdef CONFIG_PPC_PSERIES
+ threshold += (READ_ONCE(wd_timeout_pct) * threshold) / 100;
+#endif
+
+ wd_panic_timeout_tb = threshold * ppc_tb_freq;
+
+ /* Have the SMP detector trigger a bit later */
+ wd_smp_panic_timeout_tb = wd_panic_timeout_tb * 3 / 2;
+
+ /* 2/5 is the factor that the perf based detector uses */
+ wd_timer_period_ms = watchdog_thresh * 1000 * 2 / 5;
+}
+
+void watchdog_hardlockup_stop(void)
+{
+ int cpu;
+
+ for_each_cpu(cpu, &wd_cpus_enabled)
+ stop_watchdog_on_cpu(cpu);
+}
+
+void watchdog_hardlockup_start(void)
+{
+ int cpu;
+
+ watchdog_calc_timeouts();
+ for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask)
+ start_watchdog_on_cpu(cpu);
+}
+
+/*
+ * Invoked from core watchdog init.
+ */
+int __init watchdog_hardlockup_probe(void)
+{
+ int err;
+
+ err = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
+ "powerpc/watchdog:online",
+ start_watchdog_on_cpu,
+ stop_watchdog_on_cpu);
+ if (err < 0) {
+ pr_warn("could not be initialized");
+ return err;
+ }
+ return 0;
+}
+
+#ifdef CONFIG_PPC_PSERIES
+void watchdog_hardlockup_set_timeout_pct(u64 pct)
+{
+ pr_info("Set the NMI watchdog timeout factor to %llu%%\n", pct);
+ WRITE_ONCE(wd_timeout_pct, pct);
+ lockup_detector_reconfigure();
+}
+#endif
diff --git a/arch/powerpc/kexec/Makefile b/arch/powerpc/kexec/Makefile
new file mode 100644
index 000000000000..470eb0453e17
--- /dev/null
+++ b/arch/powerpc/kexec/Makefile
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the linux kernel.
+#
+
+obj-y += core.o core_$(BITS).o ranges.o
+
+obj-$(CONFIG_PPC32) += relocate_32.o
+
+obj-$(CONFIG_KEXEC_FILE) += file_load.o file_load_$(BITS).o elf_$(BITS).o
+obj-$(CONFIG_VMCORE_INFO) += vmcore_info.o
+obj-$(CONFIG_CRASH_DUMP) += crash.o
+
+# Disable GCOV, KCOV & sanitizers in odd or sensitive code
+GCOV_PROFILE_core_$(BITS).o := n
+KCOV_INSTRUMENT_core_$(BITS).o := n
+UBSAN_SANITIZE_core_$(BITS).o := n
+KASAN_SANITIZE_core.o := n
+KASAN_SANITIZE_core_$(BITS) := n
diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c
new file mode 100644
index 000000000000..d1a2d755381c
--- /dev/null
+++ b/arch/powerpc/kexec/core.c
@@ -0,0 +1,215 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Code to handle transition of Linux booting another kernel.
+ *
+ * Copyright (C) 2002-2003 Eric Biederman <ebiederm@xmission.com>
+ * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz
+ * Copyright (C) 2005 IBM Corporation.
+ */
+
+#include <linux/kexec.h>
+#include <linux/reboot.h>
+#include <linux/threads.h>
+#include <linux/memblock.h>
+#include <linux/of.h>
+#include <linux/irq.h>
+#include <linux/ftrace.h>
+
+#include <asm/kdump.h>
+#include <asm/machdep.h>
+#include <asm/pgalloc.h>
+#include <asm/sections.h>
+#include <asm/setup.h>
+#include <asm/firmware.h>
+
+#ifdef CONFIG_CRASH_DUMP
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+ default_machine_crash_shutdown(regs);
+}
+#endif
+
+void machine_kexec_cleanup(struct kimage *image)
+{
+}
+
+/*
+ * Do not allocate memory (or fail in any way) in machine_kexec().
+ * We are past the point of no return, committed to rebooting now.
+ */
+void machine_kexec(struct kimage *image)
+{
+ int save_ftrace_enabled;
+
+ save_ftrace_enabled = __ftrace_enabled_save();
+ this_cpu_disable_ftrace();
+
+ if (ppc_md.machine_kexec)
+ ppc_md.machine_kexec(image);
+ else
+ default_machine_kexec(image);
+
+ this_cpu_enable_ftrace();
+ __ftrace_enabled_restore(save_ftrace_enabled);
+
+ /* Fall back to normal restart if we're still alive. */
+ machine_restart(NULL);
+ for(;;);
+}
+
+#ifdef CONFIG_CRASH_RESERVE
+
+static unsigned long long __init get_crash_base(unsigned long long crash_base)
+{
+
+#ifndef CONFIG_NONSTATIC_KERNEL
+ if (crash_base != KDUMP_KERNELBASE)
+ printk("Crash kernel location must be 0x%x\n",
+ KDUMP_KERNELBASE);
+
+ return KDUMP_KERNELBASE;
+#else
+ unsigned long long crash_base_align;
+
+ if (!crash_base) {
+#ifdef CONFIG_PPC64
+ /*
+ * On the LPAR platform place the crash kernel to mid of
+ * RMA size (max. of 512MB) to ensure the crash kernel
+ * gets enough space to place itself and some stack to be
+ * in the first segment. At the same time normal kernel
+ * also get enough space to allocate memory for essential
+ * system resource in the first segment. Keep the crash
+ * kernel starts at 128MB offset on other platforms.
+ */
+ if (firmware_has_feature(FW_FEATURE_LPAR))
+ crash_base = min_t(u64, ppc64_rma_size / 2, SZ_512M);
+ else
+ crash_base = min_t(u64, ppc64_rma_size / 2, SZ_128M);
+#else
+ crash_base = KDUMP_KERNELBASE;
+#endif
+ }
+
+ crash_base_align = PAGE_ALIGN(crash_base);
+ if (crash_base != crash_base_align)
+ pr_warn("Crash kernel base must be aligned to 0x%lx\n", PAGE_SIZE);
+
+ return crash_base_align;
+#endif
+}
+
+void __init arch_reserve_crashkernel(void)
+{
+ unsigned long long crash_size, crash_base, crash_end;
+ unsigned long long kernel_start, kernel_size;
+ unsigned long long total_mem_sz;
+ int ret;
+
+ total_mem_sz = memory_limit ? memory_limit : memblock_phys_mem_size();
+
+ /* use common parsing */
+ ret = parse_crashkernel(boot_command_line, total_mem_sz, &crash_size,
+ &crash_base, NULL, NULL, NULL);
+
+ if (ret)
+ return;
+
+ crash_base = get_crash_base(crash_base);
+ crash_end = crash_base + crash_size - 1;
+
+ kernel_start = __pa(_stext);
+ kernel_size = _end - _stext;
+
+ /* The crash region must not overlap the current kernel */
+ if ((kernel_start + kernel_size > crash_base) && (kernel_start <= crash_end)) {
+ pr_warn("Crash kernel can not overlap current kernel\n");
+ return;
+ }
+
+ reserve_crashkernel_generic(crash_size, crash_base, 0, false);
+}
+
+int __init overlaps_crashkernel(unsigned long start, unsigned long size)
+{
+ return (start + size) > crashk_res.start && start <= crashk_res.end;
+}
+
+/* Values we need to export to the second kernel via the device tree. */
+static phys_addr_t kernel_end;
+static phys_addr_t crashk_base;
+static phys_addr_t crashk_size;
+static unsigned long long mem_limit;
+
+static struct property kernel_end_prop = {
+ .name = "linux,kernel-end",
+ .length = sizeof(phys_addr_t),
+ .value = &kernel_end,
+};
+
+static struct property crashk_base_prop = {
+ .name = "linux,crashkernel-base",
+ .length = sizeof(phys_addr_t),
+ .value = &crashk_base
+};
+
+static struct property crashk_size_prop = {
+ .name = "linux,crashkernel-size",
+ .length = sizeof(phys_addr_t),
+ .value = &crashk_size,
+};
+
+static struct property memory_limit_prop = {
+ .name = "linux,memory-limit",
+ .length = sizeof(unsigned long long),
+ .value = &mem_limit,
+};
+
+#define cpu_to_be_ulong __PASTE(cpu_to_be, BITS_PER_LONG)
+
+static void __init export_crashk_values(struct device_node *node)
+{
+ /* There might be existing crash kernel properties, but we can't
+ * be sure what's in them, so remove them. */
+ of_remove_property(node, of_find_property(node,
+ "linux,crashkernel-base", NULL));
+ of_remove_property(node, of_find_property(node,
+ "linux,crashkernel-size", NULL));
+
+ if (crashk_res.start != 0) {
+ crashk_base = cpu_to_be_ulong(crashk_res.start),
+ of_add_property(node, &crashk_base_prop);
+ crashk_size = cpu_to_be_ulong(resource_size(&crashk_res));
+ of_add_property(node, &crashk_size_prop);
+ }
+
+ /*
+ * memory_limit is required by the kexec-tools to limit the
+ * crash regions to the actual memory used.
+ */
+ mem_limit = cpu_to_be_ulong(memory_limit);
+ of_update_property(node, &memory_limit_prop);
+}
+
+static int __init kexec_setup(void)
+{
+ struct device_node *node;
+
+ node = of_find_node_by_path("/chosen");
+ if (!node)
+ return -ENOENT;
+
+ /* remove any stale properties so ours can be found */
+ of_remove_property(node, of_find_property(node, kernel_end_prop.name, NULL));
+
+ /* information needed by userspace when using default_machine_kexec */
+ kernel_end = cpu_to_be_ulong(__pa(_end));
+ of_add_property(node, &kernel_end_prop);
+
+ export_crashk_values(node);
+
+ of_node_put(node);
+ return 0;
+}
+late_initcall(kexec_setup);
+#endif /* CONFIG_CRASH_RESERVE */
diff --git a/arch/powerpc/kernel/machine_kexec_32.c b/arch/powerpc/kexec/core_32.c
index affe5dcce7f4..deb28eb44f30 100644
--- a/arch/powerpc/kernel/machine_kexec_32.c
+++ b/arch/powerpc/kexec/core_32.c
@@ -1,14 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* PPC32 code to handle Linux booting another kernel.
*
* Copyright (C) 2002-2003 Eric Biederman <ebiederm@xmission.com>
* GameCube/ppc32 port Copyright (C) 2004 Albert Herranz
* Copyright (C) 2005 IBM Corporation.
- *
- * This source code is licensed under the GNU General Public License,
- * Version 2. See the file COPYING for more details.
*/
+#include <linux/irq.h>
#include <linux/kexec.h>
#include <linux/mm.h>
#include <linux/string.h>
@@ -30,7 +29,6 @@ typedef void (*relocate_new_kernel_t)(
*/
void default_machine_kexec(struct kimage *image)
{
- extern const unsigned char relocate_new_kernel[];
extern const unsigned int relocate_new_kernel_size;
unsigned long page_list;
unsigned long reboot_code_buffer, reboot_code_buffer_phys;
@@ -58,12 +56,15 @@ void default_machine_kexec(struct kimage *image)
reboot_code_buffer + KEXEC_CONTROL_PAGE_SIZE);
printk(KERN_INFO "Bye!\n");
+ if (!IS_ENABLED(CONFIG_PPC_85xx) && !IS_ENABLED(CONFIG_44x))
+ relocate_new_kernel(page_list, reboot_code_buffer_phys, image->start);
+
/* now call it */
rnk = (relocate_new_kernel_t) reboot_code_buffer;
(*rnk)(page_list, reboot_code_buffer_phys, image->start);
}
-int default_machine_kexec_prepare(struct kimage *image)
+int machine_kexec_prepare(struct kimage *image)
{
return 0;
}
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kexec/core_64.c
index 879b3aacac32..222aa326dace 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kexec/core_64.c
@@ -1,12 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* PPC64 code to handle Linux booting another kernel.
*
* Copyright (C) 2004-2005, IBM Corp.
*
* Created by: Milton D Miller II
- *
- * This source code is licensed under the GNU General Public License,
- * Version 2. See the file COPYING for more details.
*/
@@ -18,19 +16,25 @@
#include <linux/kernel.h>
#include <linux/cpu.h>
#include <linux/hardirq.h>
+#include <linux/of.h>
+#include <linux/libfdt.h>
#include <asm/page.h>
#include <asm/current.h>
#include <asm/machdep.h>
#include <asm/cacheflush.h>
+#include <asm/firmware.h>
#include <asm/paca.h>
#include <asm/mmu.h>
#include <asm/sections.h> /* _end */
-#include <asm/prom.h>
+#include <asm/setup.h>
#include <asm/smp.h>
#include <asm/hw_breakpoint.h>
+#include <asm/svm.h>
+#include <asm/ultravisor.h>
+#include <asm/crashdump-ppc64.h>
-int default_machine_kexec_prepare(struct kimage *image)
+int machine_kexec_prepare(struct kimage *image)
{
int i;
unsigned long begin, end; /* limits of segment */
@@ -39,9 +43,6 @@ int default_machine_kexec_prepare(struct kimage *image)
const unsigned long *basep;
const unsigned int *sizep;
- if (!ppc_md.hpte_clear_all)
- return -ENOENT;
-
/*
* Since we use the kernel fault handlers and paging code to
* handle the virtual mode, we must make sure no destination
@@ -51,29 +52,6 @@ int default_machine_kexec_prepare(struct kimage *image)
if (image->segment[i].mem < __pa(_end))
return -ETXTBSY;
- /*
- * For non-LPAR, we absolutely can not overwrite the mmu hash
- * table, since we are still using the bolted entries in it to
- * do the copy. Check that here.
- *
- * It is safe if the end is below the start of the blocked
- * region (end <= low), or if the beginning is after the
- * end of the blocked region (begin >= high). Use the
- * boolean identity !(a || b) === (!a && !b).
- */
- if (htab_address) {
- low = __pa(htab_address);
- high = low + htab_size_bytes;
-
- for (i = 0; i < image->nr_segments; i++) {
- begin = image->segment[i].mem;
- end = begin + image->segment[i].memsz;
-
- if ((begin < high) && (end > low))
- return -ETXTBSY;
- }
- }
-
/* We also should not overwrite the tce tables */
for_each_node_by_type(node, "pci") {
basep = of_get_property(node, "linux,tce-base", NULL);
@@ -88,17 +66,18 @@ int default_machine_kexec_prepare(struct kimage *image)
begin = image->segment[i].mem;
end = begin + image->segment[i].memsz;
- if ((begin < high) && (end > low))
+ if ((begin < high) && (end > low)) {
+ of_node_put(node);
return -ETXTBSY;
+ }
}
}
return 0;
}
-#define IND_FLAGS (IND_DESTINATION | IND_INDIRECTION | IND_DONE | IND_SOURCE)
-
-static void copy_segments(unsigned long ind)
+/* Called during kexec sequence with MMU off */
+static notrace void copy_segments(unsigned long ind)
{
unsigned long entry;
unsigned long *ptr;
@@ -131,7 +110,8 @@ static void copy_segments(unsigned long ind)
}
}
-void kexec_copy_flush(struct kimage *image)
+/* Called during kexec sequence with MMU off */
+notrace void kexec_copy_flush(struct kimage *image)
{
long i, nr_segments = image->nr_segments;
struct kexec_segment ranges[KEXEC_SEGMENT_MAX];
@@ -178,6 +158,8 @@ static void kexec_smp_down(void *arg)
if (ppc_md.kexec_cpu_down)
ppc_md.kexec_cpu_down(0, 1);
+ reset_sprs();
+
kexec_smp_wait();
/* NOTREACHED */
}
@@ -194,24 +176,25 @@ static void kexec_prepare_cpus_wait(int wait_state)
* are correctly onlined. If somehow we start a CPU on boot with RTAS
* start-cpu, but somehow that CPU doesn't write callin_cpu_map[] in
* time, the boot CPU will timeout. If it does eventually execute
- * stuff, the secondary will start up (paca[].cpu_start was written) and
- * get into a peculiar state. If the platform supports
- * smp_ops->take_timebase(), the secondary CPU will probably be spinning
- * in there. If not (i.e. pseries), the secondary will continue on and
- * try to online itself/idle/etc. If it survives that, we need to find
- * these possible-but-not-online-but-should-be CPUs and chaperone them
- * into kexec_smp_wait().
+ * stuff, the secondary will start up (paca_ptrs[]->cpu_start was
+ * written) and get into a peculiar state.
+ * If the platform supports smp_ops->take_timebase(), the secondary CPU
+ * will probably be spinning in there. If not (i.e. pseries), the
+ * secondary will continue on and try to online itself/idle/etc. If it
+ * survives that, we need to find these
+ * possible-but-not-online-but-should-be CPUs and chaperone them into
+ * kexec_smp_wait().
*/
for_each_online_cpu(i) {
if (i == my_cpu)
continue;
- while (paca[i].kexec_state < wait_state) {
+ while (paca_ptrs[i]->kexec_state < wait_state) {
barrier();
if (i != notified) {
printk(KERN_INFO "kexec: waiting for cpu %d "
"(physical %d) to enter %i state\n",
- i, paca[i].hw_cpu_id, wait_state);
+ i, paca_ptrs[i]->hw_cpu_id, wait_state);
notified = i;
}
}
@@ -237,7 +220,7 @@ static void wake_offline_cpus(void)
if (!cpu_online(cpu)) {
printk(KERN_INFO "kexec: Waking offline cpu %d.\n",
cpu);
- WARN_ON(cpu_up(cpu));
+ WARN_ON(add_cpu(cpu));
}
}
}
@@ -256,16 +239,16 @@ static void kexec_prepare_cpus(void)
/* we are sure every CPU has IRQs off at this point */
kexec_all_irq_disabled = 1;
- /* after we tell the others to go down */
- if (ppc_md.kexec_cpu_down)
- ppc_md.kexec_cpu_down(0, 0);
-
/*
* Before removing MMU mappings make sure all CPUs have entered real
* mode:
*/
kexec_prepare_cpus_wait(KEXEC_STATE_REAL_MODE);
+ /* after we tell the others to go down */
+ if (ppc_md.kexec_cpu_down)
+ ppc_md.kexec_cpu_down(0, 0);
+
put_cpu();
}
@@ -303,23 +286,25 @@ static void kexec_prepare_cpus(void)
* We could use a smaller stack if we don't care about anything using
* current, but that audit has not been performed.
*/
-static union thread_union kexec_stack __init_task_data =
- { };
+static union thread_union kexec_stack = { };
/*
* For similar reasons to the stack above, the kexecing CPU needs to be on a
* static PACA; we switch to kexec_paca.
*/
-struct paca_struct kexec_paca;
+static struct paca_struct kexec_paca;
/* Our assembly helper, in misc_64.S */
extern void kexec_sequence(void *newstack, unsigned long start,
void *image, void *control,
- void (*clear_all)(void)) __noreturn;
+ void (*clear_all)(void),
+ bool copy_with_mmu_off) __noreturn;
/* too late to fail here */
void default_machine_kexec(struct kimage *image)
{
+ bool copy_with_mmu_off;
+
/* prepare control code if any */
/*
@@ -330,46 +315,84 @@ void default_machine_kexec(struct kimage *image)
* using debugger IPI.
*/
- if (crashing_cpu == -1)
+ if (!kdump_in_progress())
kexec_prepare_cpus();
- pr_debug("kexec: Starting switchover sequence.\n");
+#ifdef CONFIG_PPC_PSERIES
+ /*
+ * This must be done after other CPUs have shut down, otherwise they
+ * could execute the 'scv' instruction, which is not supported with
+ * reloc disabled (see configure_exceptions()).
+ */
+ if (firmware_has_feature(FW_FEATURE_SET_MODE))
+ pseries_disable_reloc_on_exc();
+#endif
+
+ printk("kexec: Starting switchover sequence.\n");
/* switch to a staticly allocated stack. Based on irq stack code.
* We setup preempt_count to avoid using VMX in memcpy.
* XXX: the task struct will likely be invalid once we do the copy!
*/
- kexec_stack.thread_info.task = current_thread_info()->task;
- kexec_stack.thread_info.flags = 0;
- kexec_stack.thread_info.preempt_count = HARDIRQ_OFFSET;
- kexec_stack.thread_info.cpu = current_thread_info()->cpu;
+ current_thread_info()->flags = 0;
+ current_thread_info()->preempt_count = HARDIRQ_OFFSET;
/* We need a static PACA, too; copy this CPU's PACA over and switch to
- * it. Also poison per_cpu_offset to catch anyone using non-static
- * data.
+ * it. Also poison per_cpu_offset and NULL lppaca to catch anyone using
+ * non-static data.
*/
memcpy(&kexec_paca, get_paca(), sizeof(struct paca_struct));
kexec_paca.data_offset = 0xedeaddeadeeeeeeeUL;
- paca = (struct paca_struct *)RELOC_HIDE(&kexec_paca, 0) -
- kexec_paca.paca_index;
+#ifdef CONFIG_PPC_PSERIES
+ kexec_paca.lppaca_ptr = NULL;
+#endif
+
+ if (is_secure_guest() && !(image->preserve_context ||
+ image->type == KEXEC_TYPE_CRASH)) {
+ uv_unshare_all_pages();
+ printk("kexec: Unshared all shared pages.\n");
+ }
+
+ paca_ptrs[kexec_paca.paca_index] = &kexec_paca;
+
setup_paca(&kexec_paca);
- /* XXX: If anyone does 'dynamic lppacas' this will also need to be
- * switched to a static version!
+ /*
+ * The lppaca should be unregistered at this point so the HV won't
+ * touch it. In the case of a crash, none of the lppacas are
+ * unregistered so there is not much we can do about it here.
*/
+ /*
+ * On Book3S, the copy must happen with the MMU off if we are either
+ * using Radix page tables or we are not in an LPAR since we can
+ * overwrite the page tables while copying.
+ *
+ * In an LPAR, we keep the MMU on otherwise we can't access beyond
+ * the RMA. On BookE there is no real MMU off mode, so we have to
+ * keep it enabled as well (but then we have bolted TLB entries).
+ */
+#ifdef CONFIG_PPC_BOOK3E_64
+ copy_with_mmu_off = false;
+#else
+ copy_with_mmu_off = radix_enabled() ||
+ !(firmware_has_feature(FW_FEATURE_LPAR) ||
+ firmware_has_feature(FW_FEATURE_PS3_LV1));
+#endif
+
/* Some things are best done in assembly. Finding globals with
* a toc is easier in C, so pass in what we can.
*/
kexec_sequence(&kexec_stack, image->start, image,
- page_address(image->control_code_page),
- ppc_md.hpte_clear_all);
+ page_address(image->control_code_page),
+ mmu_cleanup_all, copy_with_mmu_off);
/* NOTREACHED */
}
+#ifdef CONFIG_PPC_64S_HASH_MMU
/* Values we need to export to the second kernel via the device tree. */
-static unsigned long htab_base;
-static unsigned long htab_size;
+static __be64 htab_base;
+static __be64 htab_size;
static struct property htab_base_prop = {
.name = "linux,htab-base",
@@ -386,7 +409,6 @@ static struct property htab_size_prop = {
static int __init export_htab_values(void)
{
struct device_node *node;
- struct property *prop;
/* On machines with no htab htab_address is NULL */
if (!htab_address)
@@ -396,13 +418,9 @@ static int __init export_htab_values(void)
if (!node)
return -ENODEV;
- /* remove any stale propertys so ours can be found */
- prop = of_find_property(node, htab_base_prop.name, NULL);
- if (prop)
- of_remove_property(node, prop);
- prop = of_find_property(node, htab_size_prop.name, NULL);
- if (prop)
- of_remove_property(node, prop);
+ /* remove any stale properties so ours can be found */
+ of_remove_property(node, of_find_property(node, htab_base_prop.name, NULL));
+ of_remove_property(node, of_find_property(node, htab_size_prop.name, NULL));
htab_base = cpu_to_be64(__pa(htab_address));
of_add_property(node, &htab_base_prop);
@@ -413,3 +431,114 @@ static int __init export_htab_values(void)
return 0;
}
late_initcall(export_htab_values);
+#endif /* CONFIG_PPC_64S_HASH_MMU */
+
+#if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_DUMP)
+/**
+ * add_node_props - Reads node properties from device node structure and add
+ * them to fdt.
+ * @fdt: Flattened device tree of the kernel
+ * @node_offset: offset of the node to add a property at
+ * @dn: device node pointer
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int add_node_props(void *fdt, int node_offset, const struct device_node *dn)
+{
+ int ret = 0;
+ struct property *pp;
+
+ if (!dn)
+ return -EINVAL;
+
+ for_each_property_of_node(dn, pp) {
+ ret = fdt_setprop(fdt, node_offset, pp->name, pp->value, pp->length);
+ if (ret < 0) {
+ pr_err("Unable to add %s property: %s\n", pp->name, fdt_strerror(ret));
+ return ret;
+ }
+ }
+ return ret;
+}
+
+/**
+ * update_cpus_node - Update cpus node of flattened device tree using of_root
+ * device node.
+ * @fdt: Flattened device tree of the kernel.
+ *
+ * Returns 0 on success, negative errno on error.
+ *
+ * Note: expecting no subnodes under /cpus/<node> with device_type == "cpu".
+ * If this changes, update this function to include them.
+ */
+int update_cpus_node(void *fdt)
+{
+ int prev_node_offset;
+ const char *device_type;
+ const struct fdt_property *prop;
+ struct device_node *cpus_node, *dn;
+ int cpus_offset, cpus_subnode_offset, ret = 0;
+
+ cpus_offset = fdt_path_offset(fdt, "/cpus");
+ if (cpus_offset < 0 && cpus_offset != -FDT_ERR_NOTFOUND) {
+ pr_err("Malformed device tree: error reading /cpus node: %s\n",
+ fdt_strerror(cpus_offset));
+ return cpus_offset;
+ }
+
+ prev_node_offset = cpus_offset;
+ /* Delete sub-nodes of /cpus node with device_type == "cpu" */
+ for (cpus_subnode_offset = fdt_first_subnode(fdt, cpus_offset); cpus_subnode_offset >= 0;) {
+ /* Ignore nodes that do not have a device_type property or device_type != "cpu" */
+ prop = fdt_get_property(fdt, cpus_subnode_offset, "device_type", NULL);
+ if (!prop || strcmp(prop->data, "cpu")) {
+ prev_node_offset = cpus_subnode_offset;
+ goto next_node;
+ }
+
+ ret = fdt_del_node(fdt, cpus_subnode_offset);
+ if (ret < 0) {
+ pr_err("Failed to delete a cpus sub-node: %s\n", fdt_strerror(ret));
+ return ret;
+ }
+next_node:
+ if (prev_node_offset == cpus_offset)
+ cpus_subnode_offset = fdt_first_subnode(fdt, cpus_offset);
+ else
+ cpus_subnode_offset = fdt_next_subnode(fdt, prev_node_offset);
+ }
+
+ cpus_node = of_find_node_by_path("/cpus");
+ /* Fail here to avoid kexec/kdump kernel boot hung */
+ if (!cpus_node) {
+ pr_err("No /cpus node found\n");
+ return -EINVAL;
+ }
+
+ /* Add all /cpus sub-nodes of device_type == "cpu" to FDT */
+ for_each_child_of_node(cpus_node, dn) {
+ /* Ignore device nodes that do not have a device_type property
+ * or device_type != "cpu".
+ */
+ device_type = of_get_property(dn, "device_type", NULL);
+ if (!device_type || strcmp(device_type, "cpu"))
+ continue;
+
+ cpus_subnode_offset = fdt_add_subnode(fdt, cpus_offset, dn->full_name);
+ if (cpus_subnode_offset < 0) {
+ pr_err("Unable to add %s subnode: %s\n", dn->full_name,
+ fdt_strerror(cpus_subnode_offset));
+ ret = cpus_subnode_offset;
+ goto out;
+ }
+
+ ret = add_node_props(fdt, cpus_subnode_offset, dn);
+ if (ret < 0)
+ goto out;
+ }
+out:
+ of_node_put(cpus_node);
+ of_node_put(dn);
+ return ret;
+}
+#endif /* CONFIG_KEXEC_FILE || CONFIG_CRASH_DUMP */
diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kexec/crash.c
index 51dbace3269b..a325c1c02f96 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kexec/crash.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Architecture specific (PPC64) functions for kexec based crash dumps.
*
* Copyright (C) 2005, IBM Corp.
*
* Created by: Haren Myneni
- *
- * This source code is licensed under the GNU General Public License,
- * Version 2. See the file COPYING for more details.
- *
*/
#include <linux/kernel.h>
@@ -19,15 +16,17 @@
#include <linux/delay.h>
#include <linux/irq.h>
#include <linux/types.h>
+#include <linux/libfdt.h>
+#include <linux/memory.h>
#include <asm/processor.h>
#include <asm/machdep.h>
#include <asm/kexec.h>
-#include <asm/kdump.h>
-#include <asm/prom.h>
#include <asm/smp.h>
#include <asm/setjmp.h>
#include <asm/debug.h>
+#include <asm/interrupt.h>
+#include <asm/kexec_ranges.h>
/*
* The primary CPU waits a while for all secondary CPUs to enter. This is to
@@ -43,13 +42,27 @@
#define IPI_TIMEOUT 10000
#define REAL_MODE_TIMEOUT 10000
-/* This keeps a track of which one is the crashing cpu. */
-int crashing_cpu = -1;
static int time_to_dump;
+/*
+ * In case of system reset, secondary CPUs enter crash_kexec_secondary with out
+ * having to send an IPI explicitly. So, indicate if the crash is via
+ * system reset to avoid sending another IPI.
+ */
+static int is_via_system_reset;
+
+/*
+ * crash_wake_offline should be set to 1 by platforms that intend to wake
+ * up offline cpus prior to jumping to a kdump kernel. Currently powernv
+ * sets it to 1, since we want to avoid things from happening when an
+ * offline CPU wakes up due to something like an HMI (malfunction error),
+ * which propagates to all threads.
+ */
+int crash_wake_offline;
+
#define CRASH_HANDLER_MAX 3
-/* NULL terminated list of shutdown handles */
-static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX+1];
+/* List of shutdown handles */
+static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX];
static DEFINE_SPINLOCK(crash_handlers_lock);
static unsigned long crash_shutdown_buf[JMP_BUF_LEN];
@@ -71,9 +84,6 @@ void crash_ipi_callback(struct pt_regs *regs)
int cpu = smp_processor_id();
- if (!cpu_online(cpu))
- return;
-
hard_irq_disable();
if (!cpumask_test_cpu(cpu, &cpus_state_saved)) {
crash_save_cpu(regs, cpu);
@@ -102,16 +112,27 @@ void crash_ipi_callback(struct pt_regs *regs)
/* NOTREACHED */
}
-static void crash_kexec_prepare_cpus(int cpu)
+static void crash_kexec_prepare_cpus(void)
{
unsigned int msecs;
- unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */
- int tries = 0;
+ volatile unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */
+ volatile int tries = 0;
int (*old_handler)(struct pt_regs *regs);
printk(KERN_EMERG "Sending IPI to other CPUs\n");
- crash_send_ipi(crash_ipi_callback);
+ if (crash_wake_offline)
+ ncpus = num_present_cpus() - 1;
+
+ /*
+ * If we came in via system reset, secondaries enter via crash_kexec_secondary().
+ * So, wait a while for the secondary CPUs to enter for that case.
+ * Else, send IPI to all other CPUs.
+ */
+ if (is_via_system_reset)
+ mdelay(PRIMARY_TIMEOUT);
+ else
+ crash_send_ipi(crash_ipi_callback);
smp_wmb();
again:
@@ -200,7 +221,7 @@ void crash_kexec_secondary(struct pt_regs *regs)
#else /* ! CONFIG_SMP */
-static void crash_kexec_prepare_cpus(int cpu)
+static void crash_kexec_prepare_cpus(void)
{
/*
* move the secondaries to us so that we can copy
@@ -221,8 +242,8 @@ void crash_kexec_secondary(struct pt_regs *regs)
#endif /* CONFIG_SMP */
/* wait for all the CPUs to hit real mode but timeout if they don't come in */
-#if defined(CONFIG_SMP) && defined(CONFIG_PPC_STD_MMU_64)
-static void crash_kexec_wait_realmode(int cpu)
+#if defined(CONFIG_SMP) && defined(CONFIG_PPC64)
+noinstr static void __maybe_unused crash_kexec_wait_realmode(int cpu)
{
unsigned int msecs;
int i;
@@ -232,7 +253,7 @@ static void crash_kexec_wait_realmode(int cpu)
if (i == cpu)
continue;
- while (paca[i].kexec_state < KEXEC_STATE_REAL_MODE) {
+ while (paca_ptrs[i]->kexec_state < KEXEC_STATE_REAL_MODE) {
barrier();
if (!cpu_possible(i) || !cpu_online(i) || (msecs <= 0))
break;
@@ -244,7 +265,33 @@ static void crash_kexec_wait_realmode(int cpu)
}
#else
static inline void crash_kexec_wait_realmode(int cpu) {}
-#endif /* CONFIG_SMP && CONFIG_PPC_STD_MMU_64 */
+#endif /* CONFIG_SMP && CONFIG_PPC64 */
+
+void crash_kexec_prepare(void)
+{
+ /* Avoid hardlocking with irresponsive CPU holding logbuf_lock */
+ printk_deferred_enter();
+
+ /*
+ * This function is only called after the system
+ * has panicked or is otherwise in a critical state.
+ * The minimum amount of code to allow a kexec'd kernel
+ * to run successfully needs to happen here.
+ *
+ * In practice this means stopping other cpus in
+ * an SMP system.
+ * The kernel is broken so disable interrupts.
+ */
+ hard_irq_disable();
+
+ /*
+ * Make a note of crashing cpu. Will be used in machine_kexec
+ * such that another IPI will not be sent.
+ */
+ crashing_cpu = smp_processor_id();
+
+ crash_kexec_prepare_cpus();
+}
/*
* Register a function to be called on shutdown. Only use this if you
@@ -288,9 +335,14 @@ int crash_shutdown_unregister(crash_shutdown_t handler)
rc = 1;
} else {
/* Shift handles down */
- for (; crash_shutdown_handles[i]; i++)
+ for (; i < (CRASH_HANDLER_MAX - 1); i++)
crash_shutdown_handles[i] =
crash_shutdown_handles[i+1];
+ /*
+ * Reset last entry to NULL now that it has been shifted down,
+ * this will allow new handles to be added here.
+ */
+ crash_shutdown_handles[i] = NULL;
rc = 0;
}
@@ -301,35 +353,16 @@ EXPORT_SYMBOL(crash_shutdown_unregister);
void default_machine_crash_shutdown(struct pt_regs *regs)
{
- unsigned int i;
+ volatile unsigned int i;
int (*old_handler)(struct pt_regs *regs);
- /*
- * This function is only called after the system
- * has panicked or is otherwise in a critical state.
- * The minimum amount of code to allow a kexec'd kernel
- * to run successfully needs to happen here.
- *
- * In practice this means stopping other cpus in
- * an SMP system.
- * The kernel is broken so disable interrupts.
- */
- hard_irq_disable();
+ if (TRAP(regs) == INTERRUPT_SYSTEM_RESET)
+ is_via_system_reset = 1;
- /*
- * Make a note of crashing cpu. Will be used in machine_kexec
- * such that another IPI will not be sent.
- */
- crashing_cpu = smp_processor_id();
-
- /*
- * If we came in via system reset, wait a while for the secondary
- * CPUs to enter.
- */
- if (TRAP(regs) == 0x100)
- mdelay(PRIMARY_TIMEOUT);
-
- crash_kexec_prepare_cpus(crashing_cpu);
+ if (IS_ENABLED(CONFIG_SMP))
+ crash_smp_send_stop();
+ else
+ crash_kexec_prepare();
crash_save_cpu(regs, crashing_cpu);
@@ -346,7 +379,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs)
old_handler = __debugger_fault_handler;
__debugger_fault_handler = handle_fault;
crash_shutdown_cpu = smp_processor_id();
- for (i = 0; crash_shutdown_handles[i]; i++) {
+ for (i = 0; i < CRASH_HANDLER_MAX && crash_shutdown_handles[i]; i++) {
if (setjmp(crash_shutdown_buf) == 0) {
/*
* Insert syncs and delay to ensure
@@ -365,3 +398,195 @@ void default_machine_crash_shutdown(struct pt_regs *regs)
if (ppc_md.kexec_cpu_down)
ppc_md.kexec_cpu_down(1, 0);
}
+
+#ifdef CONFIG_CRASH_HOTPLUG
+#undef pr_fmt
+#define pr_fmt(fmt) "crash hp: " fmt
+
+/*
+ * Advertise preferred elfcorehdr size to userspace via
+ * /sys/kernel/crash_elfcorehdr_size sysfs interface.
+ */
+unsigned int arch_crash_get_elfcorehdr_size(void)
+{
+ unsigned long phdr_cnt;
+
+ /* A program header for possible CPUs + vmcoreinfo */
+ phdr_cnt = num_possible_cpus() + 1;
+ if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG))
+ phdr_cnt += CONFIG_CRASH_MAX_MEMORY_RANGES;
+
+ return sizeof(struct elfhdr) + (phdr_cnt * sizeof(Elf64_Phdr));
+}
+
+/**
+ * update_crash_elfcorehdr() - Recreate the elfcorehdr and replace it with old
+ * elfcorehdr in the kexec segment array.
+ * @image: the active struct kimage
+ * @mn: struct memory_notify data handler
+ */
+static void update_crash_elfcorehdr(struct kimage *image, struct memory_notify *mn)
+{
+ int ret;
+ struct crash_mem *cmem = NULL;
+ struct kexec_segment *ksegment;
+ void *ptr, *mem, *elfbuf = NULL;
+ unsigned long elfsz, memsz, base_addr, size;
+
+ ksegment = &image->segment[image->elfcorehdr_index];
+ mem = (void *) ksegment->mem;
+ memsz = ksegment->memsz;
+
+ ret = get_crash_memory_ranges(&cmem);
+ if (ret) {
+ pr_err("Failed to get crash mem range\n");
+ return;
+ }
+
+ /*
+ * The hot unplugged memory is part of crash memory ranges,
+ * remove it here.
+ */
+ if (image->hp_action == KEXEC_CRASH_HP_REMOVE_MEMORY) {
+ base_addr = PFN_PHYS(mn->start_pfn);
+ size = mn->nr_pages * PAGE_SIZE;
+ ret = remove_mem_range(&cmem, base_addr, size);
+ if (ret) {
+ pr_err("Failed to remove hot-unplugged memory from crash memory ranges\n");
+ goto out;
+ }
+ }
+
+ ret = crash_prepare_elf64_headers(cmem, false, &elfbuf, &elfsz);
+ if (ret) {
+ pr_err("Failed to prepare elf header\n");
+ goto out;
+ }
+
+ /*
+ * It is unlikely that kernel hit this because elfcorehdr kexec
+ * segment (memsz) is built with addition space to accommodate growing
+ * number of crash memory ranges while loading the kdump kernel. It is
+ * Just to avoid any unforeseen case.
+ */
+ if (elfsz > memsz) {
+ pr_err("Updated crash elfcorehdr elfsz %lu > memsz %lu", elfsz, memsz);
+ goto out;
+ }
+
+ ptr = __va(mem);
+ if (ptr) {
+ /* Temporarily invalidate the crash image while it is replaced */
+ xchg(&kexec_crash_image, NULL);
+
+ /* Replace the old elfcorehdr with newly prepared elfcorehdr */
+ memcpy((void *)ptr, elfbuf, elfsz);
+
+ /* The crash image is now valid once again */
+ xchg(&kexec_crash_image, image);
+ }
+out:
+ kvfree(cmem);
+ kvfree(elfbuf);
+}
+
+/**
+ * get_fdt_index - Loop through the kexec segment array and find
+ * the index of the FDT segment.
+ * @image: a pointer to kexec_crash_image
+ *
+ * Returns the index of FDT segment in the kexec segment array
+ * if found; otherwise -1.
+ */
+static int get_fdt_index(struct kimage *image)
+{
+ void *ptr;
+ unsigned long mem;
+ int i, fdt_index = -1;
+
+ /* Find the FDT segment index in kexec segment array. */
+ for (i = 0; i < image->nr_segments; i++) {
+ mem = image->segment[i].mem;
+ ptr = __va(mem);
+
+ if (ptr && fdt_magic(ptr) == FDT_MAGIC) {
+ fdt_index = i;
+ break;
+ }
+ }
+
+ return fdt_index;
+}
+
+/**
+ * update_crash_fdt - updates the cpus node of the crash FDT.
+ *
+ * @image: a pointer to kexec_crash_image
+ */
+static void update_crash_fdt(struct kimage *image)
+{
+ void *fdt;
+ int fdt_index;
+
+ fdt_index = get_fdt_index(image);
+ if (fdt_index < 0) {
+ pr_err("Unable to locate FDT segment.\n");
+ return;
+ }
+
+ fdt = __va((void *)image->segment[fdt_index].mem);
+
+ /* Temporarily invalidate the crash image while it is replaced */
+ xchg(&kexec_crash_image, NULL);
+
+ /* update FDT to reflect changes in CPU resources */
+ if (update_cpus_node(fdt))
+ pr_err("Failed to update crash FDT");
+
+ /* The crash image is now valid once again */
+ xchg(&kexec_crash_image, image);
+}
+
+int arch_crash_hotplug_support(struct kimage *image, unsigned long kexec_flags)
+{
+#ifdef CONFIG_KEXEC_FILE
+ if (image->file_mode)
+ return 1;
+#endif
+ return kexec_flags & KEXEC_CRASH_HOTPLUG_SUPPORT;
+}
+
+/**
+ * arch_crash_handle_hotplug_event - Handle crash CPU/Memory hotplug events to update the
+ * necessary kexec segments based on the hotplug event.
+ * @image: a pointer to kexec_crash_image
+ * @arg: struct memory_notify handler for memory hotplug case and NULL for CPU hotplug case.
+ *
+ * Update the kdump image based on the type of hotplug event, represented by image->hp_action.
+ * CPU add: Update the FDT segment to include the newly added CPU.
+ * CPU remove: No action is needed, with the assumption that it's okay to have offline CPUs
+ * part of the FDT.
+ * Memory add/remove: No action is taken as this is not yet supported.
+ */
+void arch_crash_handle_hotplug_event(struct kimage *image, void *arg)
+{
+ struct memory_notify *mn;
+
+ switch (image->hp_action) {
+ case KEXEC_CRASH_HP_REMOVE_CPU:
+ return;
+
+ case KEXEC_CRASH_HP_ADD_CPU:
+ update_crash_fdt(image);
+ break;
+
+ case KEXEC_CRASH_HP_REMOVE_MEMORY:
+ case KEXEC_CRASH_HP_ADD_MEMORY:
+ mn = (struct memory_notify *)arg;
+ update_crash_elfcorehdr(image, mn);
+ return;
+ default:
+ pr_warn_once("Unknown hotplug action\n");
+ }
+}
+#endif /* CONFIG_CRASH_HOTPLUG */
diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c
new file mode 100644
index 000000000000..5d6d616404cf
--- /dev/null
+++ b/arch/powerpc/kexec/elf_64.c
@@ -0,0 +1,164 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Load ELF vmlinux file for the kexec_file_load syscall.
+ *
+ * Copyright (C) 2004 Adam Litke (agl@us.ibm.com)
+ * Copyright (C) 2004 IBM Corp.
+ * Copyright (C) 2005 R Sharada (sharada@in.ibm.com)
+ * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com)
+ * Copyright (C) 2016 IBM Corporation
+ *
+ * Based on kexec-tools' kexec-elf-exec.c and kexec-elf-ppc64.c.
+ * Heavily modified for the kernel by
+ * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>.
+ */
+
+#define pr_fmt(fmt) "kexec_elf: " fmt
+
+#include <linux/elf.h>
+#include <linux/kexec.h>
+#include <linux/libfdt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <asm/kexec_ranges.h>
+
+static void *elf64_load(struct kimage *image, char *kernel_buf,
+ unsigned long kernel_len, char *initrd,
+ unsigned long initrd_len, char *cmdline,
+ unsigned long cmdline_len)
+{
+ int ret;
+ unsigned long kernel_load_addr;
+ unsigned long initrd_load_addr = 0, fdt_load_addr;
+ void *fdt;
+ const void *slave_code;
+ struct elfhdr ehdr;
+ char *modified_cmdline = NULL;
+ struct crash_mem *rmem = NULL;
+ struct kexec_elf_info elf_info;
+ struct kexec_buf kbuf = { .image = image, .buf_min = 0,
+ .buf_max = ppc64_rma_size };
+ struct kexec_buf pbuf = { .image = image, .buf_min = 0,
+ .buf_max = ppc64_rma_size, .top_down = true,
+ .mem = KEXEC_BUF_MEM_UNKNOWN };
+
+ ret = kexec_build_elf_info(kernel_buf, kernel_len, &ehdr, &elf_info);
+ if (ret)
+ return ERR_PTR(ret);
+
+ if (IS_ENABLED(CONFIG_CRASH_DUMP) && image->type == KEXEC_TYPE_CRASH) {
+ /* min & max buffer values for kdump case */
+ kbuf.buf_min = pbuf.buf_min = crashk_res.start;
+ kbuf.buf_max = pbuf.buf_max =
+ ((crashk_res.end < ppc64_rma_size) ?
+ crashk_res.end : (ppc64_rma_size - 1));
+ }
+
+ ret = kexec_elf_load(image, &ehdr, &elf_info, &kbuf, &kernel_load_addr);
+ if (ret)
+ goto out;
+
+ kexec_dprintk("Loaded the kernel at 0x%lx\n", kernel_load_addr);
+
+ ret = kexec_load_purgatory(image, &pbuf);
+ if (ret) {
+ pr_err("Loading purgatory failed.\n");
+ goto out;
+ }
+
+ kexec_dprintk("Loaded purgatory at 0x%lx\n", pbuf.mem);
+
+ /* Load additional segments needed for panic kernel */
+ if (IS_ENABLED(CONFIG_CRASH_DUMP) && image->type == KEXEC_TYPE_CRASH) {
+ ret = load_crashdump_segments_ppc64(image, &kbuf);
+ if (ret) {
+ pr_err("Failed to load kdump kernel segments\n");
+ goto out;
+ }
+
+ /* Setup cmdline for kdump kernel case */
+ modified_cmdline = setup_kdump_cmdline(image, cmdline,
+ cmdline_len);
+ if (!modified_cmdline) {
+ pr_err("Setting up cmdline for kdump kernel failed\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ cmdline = modified_cmdline;
+ }
+
+ if (initrd != NULL) {
+ kbuf.buffer = initrd;
+ kbuf.bufsz = kbuf.memsz = initrd_len;
+ kbuf.buf_align = PAGE_SIZE;
+ kbuf.top_down = false;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+ ret = kexec_add_buffer(&kbuf);
+ if (ret)
+ goto out;
+ initrd_load_addr = kbuf.mem;
+
+ kexec_dprintk("Loaded initrd at 0x%lx\n", initrd_load_addr);
+ }
+
+ ret = get_reserved_memory_ranges(&rmem);
+ if (ret)
+ goto out;
+
+ fdt = of_kexec_alloc_and_setup_fdt(image, initrd_load_addr,
+ initrd_len, cmdline,
+ kexec_extra_fdt_size_ppc64(image, rmem));
+ if (!fdt) {
+ pr_err("Error setting up the new device tree.\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = setup_new_fdt_ppc64(image, fdt, rmem);
+ if (ret)
+ goto out_free_fdt;
+
+ if (!IS_ENABLED(CONFIG_CRASH_HOTPLUG) || image->type != KEXEC_TYPE_CRASH)
+ fdt_pack(fdt);
+
+ kbuf.buffer = fdt;
+ kbuf.bufsz = kbuf.memsz = fdt_totalsize(fdt);
+ kbuf.buf_align = PAGE_SIZE;
+ kbuf.top_down = true;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+ ret = kexec_add_buffer(&kbuf);
+ if (ret)
+ goto out_free_fdt;
+
+ /* FDT will be freed in arch_kimage_file_post_load_cleanup */
+ image->arch.fdt = fdt;
+
+ fdt_load_addr = kbuf.mem;
+
+ kexec_dprintk("Loaded device tree at 0x%lx\n", fdt_load_addr);
+
+ slave_code = elf_info.buffer + elf_info.proghdrs[0].p_offset;
+ ret = setup_purgatory_ppc64(image, slave_code, fdt, kernel_load_addr,
+ fdt_load_addr);
+ if (ret)
+ pr_err("Error setting up the purgatory.\n");
+
+ goto out;
+
+out_free_fdt:
+ kvfree(fdt);
+out:
+ kfree(rmem);
+ kfree(modified_cmdline);
+ kexec_free_elf_info(&elf_info);
+
+ return ret ? ERR_PTR(ret) : NULL;
+}
+
+const struct kexec_file_ops kexec_elf64_ops = {
+ .probe = kexec_elf_probe,
+ .load = elf64_load,
+};
diff --git a/arch/powerpc/kexec/file_load.c b/arch/powerpc/kexec/file_load.c
new file mode 100644
index 000000000000..4284f76cbef5
--- /dev/null
+++ b/arch/powerpc/kexec/file_load.c
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * powerpc code to implement the kexec_file_load syscall
+ *
+ * Copyright (C) 2004 Adam Litke (agl@us.ibm.com)
+ * Copyright (C) 2004 IBM Corp.
+ * Copyright (C) 2004,2005 Milton D Miller II, IBM Corporation
+ * Copyright (C) 2005 R Sharada (sharada@in.ibm.com)
+ * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com)
+ * Copyright (C) 2016 IBM Corporation
+ *
+ * Based on kexec-tools' kexec-elf-ppc64.c, fs2dt.c.
+ * Heavily modified for the kernel by
+ * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>.
+ */
+
+#include <linux/slab.h>
+#include <linux/kexec.h>
+#include <linux/of_fdt.h>
+#include <linux/libfdt.h>
+#include <asm/setup.h>
+
+#define SLAVE_CODE_SIZE 256 /* First 0x100 bytes */
+
+/**
+ * setup_kdump_cmdline - Prepend "elfcorehdr=<addr> " to command line
+ * of kdump kernel for exporting the core.
+ * @image: Kexec image
+ * @cmdline: Command line parameters to update.
+ * @cmdline_len: Length of the cmdline parameters.
+ *
+ * kdump segment must be setup before calling this function.
+ *
+ * Returns new cmdline buffer for kdump kernel on success, NULL otherwise.
+ */
+char *setup_kdump_cmdline(struct kimage *image, char *cmdline,
+ unsigned long cmdline_len)
+{
+ int elfcorehdr_strlen;
+ char *cmdline_ptr;
+
+ cmdline_ptr = kzalloc(COMMAND_LINE_SIZE, GFP_KERNEL);
+ if (!cmdline_ptr)
+ return NULL;
+
+ elfcorehdr_strlen = sprintf(cmdline_ptr, "elfcorehdr=0x%lx ",
+ image->elf_load_addr);
+
+ if (elfcorehdr_strlen + cmdline_len > COMMAND_LINE_SIZE) {
+ pr_err("Appending elfcorehdr=<addr> exceeds cmdline size\n");
+ kfree(cmdline_ptr);
+ return NULL;
+ }
+
+ memcpy(cmdline_ptr + elfcorehdr_strlen, cmdline, cmdline_len);
+ // Ensure it's nul terminated
+ cmdline_ptr[COMMAND_LINE_SIZE - 1] = '\0';
+ return cmdline_ptr;
+}
+
+/**
+ * setup_purgatory - initialize the purgatory's global variables
+ * @image: kexec image.
+ * @slave_code: Slave code for the purgatory.
+ * @fdt: Flattened device tree for the next kernel.
+ * @kernel_load_addr: Address where the kernel is loaded.
+ * @fdt_load_addr: Address where the flattened device tree is loaded.
+ *
+ * Return: 0 on success, or negative errno on error.
+ */
+int setup_purgatory(struct kimage *image, const void *slave_code,
+ const void *fdt, unsigned long kernel_load_addr,
+ unsigned long fdt_load_addr)
+{
+ unsigned int *slave_code_buf, master_entry;
+ int ret;
+
+ slave_code_buf = kmalloc(SLAVE_CODE_SIZE, GFP_KERNEL);
+ if (!slave_code_buf)
+ return -ENOMEM;
+
+ /* Get the slave code from the new kernel and put it in purgatory. */
+ ret = kexec_purgatory_get_set_symbol(image, "purgatory_start",
+ slave_code_buf, SLAVE_CODE_SIZE,
+ true);
+ if (ret) {
+ kfree(slave_code_buf);
+ return ret;
+ }
+
+ master_entry = slave_code_buf[0];
+ memcpy(slave_code_buf, slave_code, SLAVE_CODE_SIZE);
+ slave_code_buf[0] = master_entry;
+ ret = kexec_purgatory_get_set_symbol(image, "purgatory_start",
+ slave_code_buf, SLAVE_CODE_SIZE,
+ false);
+ kfree(slave_code_buf);
+
+ ret = kexec_purgatory_get_set_symbol(image, "kernel", &kernel_load_addr,
+ sizeof(kernel_load_addr), false);
+ if (ret)
+ return ret;
+ ret = kexec_purgatory_get_set_symbol(image, "dt_offset", &fdt_load_addr,
+ sizeof(fdt_load_addr), false);
+ if (ret)
+ return ret;
+
+ return 0;
+}
diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c
new file mode 100644
index 000000000000..e7ef8b2a2554
--- /dev/null
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -0,0 +1,871 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * ppc64 code to implement the kexec_file_load syscall
+ *
+ * Copyright (C) 2004 Adam Litke (agl@us.ibm.com)
+ * Copyright (C) 2004 IBM Corp.
+ * Copyright (C) 2004,2005 Milton D Miller II, IBM Corporation
+ * Copyright (C) 2005 R Sharada (sharada@in.ibm.com)
+ * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com)
+ * Copyright (C) 2020 IBM Corporation
+ *
+ * Based on kexec-tools' kexec-ppc64.c, kexec-elf-rel-ppc64.c, fs2dt.c.
+ * Heavily modified for the kernel by
+ * Hari Bathini, IBM Corporation.
+ */
+
+#include <linux/kexec.h>
+#include <linux/of_fdt.h>
+#include <linux/libfdt.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/memblock.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <asm/setup.h>
+#include <asm/drmem.h>
+#include <asm/firmware.h>
+#include <asm/kexec_ranges.h>
+#include <asm/crashdump-ppc64.h>
+#include <asm/mmzone.h>
+#include <asm/iommu.h>
+#include <asm/prom.h>
+#include <asm/plpks.h>
+#include <asm/cputhreads.h>
+
+struct umem_info {
+ __be64 *buf; /* data buffer for usable-memory property */
+ u32 size; /* size allocated for the data buffer */
+ u32 max_entries; /* maximum no. of entries */
+ u32 idx; /* index of current entry */
+
+ /* usable memory ranges to look up */
+ unsigned int nr_ranges;
+ const struct range *ranges;
+};
+
+const struct kexec_file_ops * const kexec_file_loaders[] = {
+ &kexec_elf64_ops,
+ NULL
+};
+
+int arch_check_excluded_range(struct kimage *image, unsigned long start,
+ unsigned long end)
+{
+ struct crash_mem *emem;
+ int i;
+
+ emem = image->arch.exclude_ranges;
+ for (i = 0; i < emem->nr_ranges; i++)
+ if (start < emem->ranges[i].end && end > emem->ranges[i].start)
+ return 1;
+
+ return 0;
+}
+
+#ifdef CONFIG_CRASH_DUMP
+/**
+ * check_realloc_usable_mem - Reallocate buffer if it can't accommodate entries
+ * @um_info: Usable memory buffer and ranges info.
+ * @cnt: No. of entries to accommodate.
+ *
+ * Frees up the old buffer if memory reallocation fails.
+ *
+ * Returns buffer on success, NULL on error.
+ */
+static __be64 *check_realloc_usable_mem(struct umem_info *um_info, int cnt)
+{
+ u32 new_size;
+ __be64 *tbuf;
+
+ if ((um_info->idx + cnt) <= um_info->max_entries)
+ return um_info->buf;
+
+ new_size = um_info->size + MEM_RANGE_CHUNK_SZ;
+ tbuf = krealloc(um_info->buf, new_size, GFP_KERNEL);
+ if (tbuf) {
+ um_info->buf = tbuf;
+ um_info->size = new_size;
+ um_info->max_entries = (um_info->size / sizeof(u64));
+ }
+
+ return tbuf;
+}
+
+/**
+ * add_usable_mem - Add the usable memory ranges within the given memory range
+ * to the buffer
+ * @um_info: Usable memory buffer and ranges info.
+ * @base: Base address of memory range to look for.
+ * @end: End address of memory range to look for.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int add_usable_mem(struct umem_info *um_info, u64 base, u64 end)
+{
+ u64 loc_base, loc_end;
+ bool add;
+ int i;
+
+ for (i = 0; i < um_info->nr_ranges; i++) {
+ add = false;
+ loc_base = um_info->ranges[i].start;
+ loc_end = um_info->ranges[i].end;
+ if (loc_base >= base && loc_end <= end)
+ add = true;
+ else if (base < loc_end && end > loc_base) {
+ if (loc_base < base)
+ loc_base = base;
+ if (loc_end > end)
+ loc_end = end;
+ add = true;
+ }
+
+ if (add) {
+ if (!check_realloc_usable_mem(um_info, 2))
+ return -ENOMEM;
+
+ um_info->buf[um_info->idx++] = cpu_to_be64(loc_base);
+ um_info->buf[um_info->idx++] =
+ cpu_to_be64(loc_end - loc_base + 1);
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * kdump_setup_usable_lmb - This is a callback function that gets called by
+ * walk_drmem_lmbs for every LMB to set its
+ * usable memory ranges.
+ * @lmb: LMB info.
+ * @usm: linux,drconf-usable-memory property value.
+ * @data: Pointer to usable memory buffer and ranges info.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int kdump_setup_usable_lmb(struct drmem_lmb *lmb, const __be32 **usm,
+ void *data)
+{
+ struct umem_info *um_info;
+ int tmp_idx, ret;
+ u64 base, end;
+
+ /*
+ * kdump load isn't supported on kernels already booted with
+ * linux,drconf-usable-memory property.
+ */
+ if (*usm) {
+ pr_err("linux,drconf-usable-memory property already exists!");
+ return -EINVAL;
+ }
+
+ um_info = data;
+ tmp_idx = um_info->idx;
+ if (!check_realloc_usable_mem(um_info, 1))
+ return -ENOMEM;
+
+ um_info->idx++;
+ base = lmb->base_addr;
+ end = base + drmem_lmb_size() - 1;
+ ret = add_usable_mem(um_info, base, end);
+ if (!ret) {
+ /*
+ * Update the no. of ranges added. Two entries (base & size)
+ * for every range added.
+ */
+ um_info->buf[tmp_idx] =
+ cpu_to_be64((um_info->idx - tmp_idx - 1) / 2);
+ }
+
+ return ret;
+}
+
+#define NODE_PATH_LEN 256
+/**
+ * add_usable_mem_property - Add usable memory property for the given
+ * memory node.
+ * @fdt: Flattened device tree for the kdump kernel.
+ * @dn: Memory node.
+ * @um_info: Usable memory buffer and ranges info.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int add_usable_mem_property(void *fdt, struct device_node *dn,
+ struct umem_info *um_info)
+{
+ int node;
+ char path[NODE_PATH_LEN];
+ int i, ret;
+ u64 base, size;
+
+ of_node_get(dn);
+
+ if (snprintf(path, NODE_PATH_LEN, "%pOF", dn) > (NODE_PATH_LEN - 1)) {
+ pr_err("Buffer (%d) too small for memory node: %pOF\n",
+ NODE_PATH_LEN, dn);
+ return -EOVERFLOW;
+ }
+ kexec_dprintk("Memory node path: %s\n", path);
+
+ /* Now that we know the path, find its offset in kdump kernel's fdt */
+ node = fdt_path_offset(fdt, path);
+ if (node < 0) {
+ pr_err("Malformed device tree: error reading %s\n", path);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ um_info->idx = 0;
+ if (!check_realloc_usable_mem(um_info, 2)) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ /*
+ * "reg" property represents sequence of (addr,size) tuples
+ * each representing a memory range.
+ */
+ for (i = 0; ; i++) {
+ ret = of_property_read_reg(dn, i, &base, &size);
+ if (ret)
+ break;
+
+ ret = add_usable_mem(um_info, base, base + size - 1);
+ if (ret)
+ goto out;
+ }
+
+ // No reg or empty reg? Skip this node.
+ if (i == 0)
+ goto out;
+
+ /*
+ * No kdump kernel usable memory found in this memory node.
+ * Write (0,0) tuple in linux,usable-memory property for
+ * this region to be ignored.
+ */
+ if (um_info->idx == 0) {
+ um_info->buf[0] = 0;
+ um_info->buf[1] = 0;
+ um_info->idx = 2;
+ }
+
+ ret = fdt_setprop(fdt, node, "linux,usable-memory", um_info->buf,
+ (um_info->idx * sizeof(u64)));
+
+out:
+ of_node_put(dn);
+ return ret;
+}
+
+
+/**
+ * update_usable_mem_fdt - Updates kdump kernel's fdt with linux,usable-memory
+ * and linux,drconf-usable-memory DT properties as
+ * appropriate to restrict its memory usage.
+ * @fdt: Flattened device tree for the kdump kernel.
+ * @usable_mem: Usable memory ranges for kdump kernel.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int update_usable_mem_fdt(void *fdt, struct crash_mem *usable_mem)
+{
+ struct umem_info um_info;
+ struct device_node *dn;
+ int node, ret = 0;
+
+ if (!usable_mem) {
+ pr_err("Usable memory ranges for kdump kernel not found\n");
+ return -ENOENT;
+ }
+
+ node = fdt_path_offset(fdt, "/ibm,dynamic-reconfiguration-memory");
+ if (node == -FDT_ERR_NOTFOUND)
+ kexec_dprintk("No dynamic reconfiguration memory found\n");
+ else if (node < 0) {
+ pr_err("Malformed device tree: error reading /ibm,dynamic-reconfiguration-memory.\n");
+ return -EINVAL;
+ }
+
+ um_info.buf = NULL;
+ um_info.size = 0;
+ um_info.max_entries = 0;
+ um_info.idx = 0;
+ /* Memory ranges to look up */
+ um_info.ranges = &(usable_mem->ranges[0]);
+ um_info.nr_ranges = usable_mem->nr_ranges;
+
+ dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+ if (dn) {
+ ret = walk_drmem_lmbs(dn, &um_info, kdump_setup_usable_lmb);
+ of_node_put(dn);
+
+ if (ret) {
+ pr_err("Could not setup linux,drconf-usable-memory property for kdump\n");
+ goto out;
+ }
+
+ ret = fdt_setprop(fdt, node, "linux,drconf-usable-memory",
+ um_info.buf, (um_info.idx * sizeof(u64)));
+ if (ret) {
+ pr_err("Failed to update fdt with linux,drconf-usable-memory property: %s",
+ fdt_strerror(ret));
+ goto out;
+ }
+ }
+
+ /*
+ * Walk through each memory node and set linux,usable-memory property
+ * for the corresponding node in kdump kernel's fdt.
+ */
+ for_each_node_by_type(dn, "memory") {
+ ret = add_usable_mem_property(fdt, dn, &um_info);
+ if (ret) {
+ pr_err("Failed to set linux,usable-memory property for %s node",
+ dn->full_name);
+ of_node_put(dn);
+ goto out;
+ }
+ }
+
+out:
+ kfree(um_info.buf);
+ return ret;
+}
+
+/**
+ * load_backup_segment - Locate a memory hole to place the backup region.
+ * @image: Kexec image.
+ * @kbuf: Buffer contents and memory parameters.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int load_backup_segment(struct kimage *image, struct kexec_buf *kbuf)
+{
+ void *buf;
+ int ret;
+
+ /*
+ * Setup a source buffer for backup segment.
+ *
+ * A source buffer has no meaning for backup region as data will
+ * be copied from backup source, after crash, in the purgatory.
+ * But as load segment code doesn't recognize such segments,
+ * setup a dummy source buffer to keep it happy for now.
+ */
+ buf = vzalloc(BACKUP_SRC_SIZE);
+ if (!buf)
+ return -ENOMEM;
+
+ kbuf->buffer = buf;
+ kbuf->mem = KEXEC_BUF_MEM_UNKNOWN;
+ kbuf->bufsz = kbuf->memsz = BACKUP_SRC_SIZE;
+ kbuf->top_down = false;
+
+ ret = kexec_add_buffer(kbuf);
+ if (ret) {
+ vfree(buf);
+ return ret;
+ }
+
+ image->arch.backup_buf = buf;
+ image->arch.backup_start = kbuf->mem;
+ return 0;
+}
+
+/**
+ * update_backup_region_phdr - Update backup region's offset for the core to
+ * export the region appropriately.
+ * @image: Kexec image.
+ * @ehdr: ELF core header.
+ *
+ * Assumes an exclusive program header is setup for the backup region
+ * in the ELF headers
+ *
+ * Returns nothing.
+ */
+static void update_backup_region_phdr(struct kimage *image, Elf64_Ehdr *ehdr)
+{
+ Elf64_Phdr *phdr;
+ unsigned int i;
+
+ phdr = (Elf64_Phdr *)(ehdr + 1);
+ for (i = 0; i < ehdr->e_phnum; i++) {
+ if (phdr->p_paddr == BACKUP_SRC_START) {
+ phdr->p_offset = image->arch.backup_start;
+ kexec_dprintk("Backup region offset updated to 0x%lx\n",
+ image->arch.backup_start);
+ return;
+ }
+ }
+}
+
+static unsigned int kdump_extra_elfcorehdr_size(struct crash_mem *cmem)
+{
+#if defined(CONFIG_CRASH_HOTPLUG) && defined(CONFIG_MEMORY_HOTPLUG)
+ unsigned int extra_sz = 0;
+
+ if (CONFIG_CRASH_MAX_MEMORY_RANGES > (unsigned int)PN_XNUM)
+ pr_warn("Number of Phdrs %u exceeds max\n", CONFIG_CRASH_MAX_MEMORY_RANGES);
+ else if (cmem->nr_ranges >= CONFIG_CRASH_MAX_MEMORY_RANGES)
+ pr_warn("Configured crash mem ranges may not be enough\n");
+ else
+ extra_sz = (CONFIG_CRASH_MAX_MEMORY_RANGES - cmem->nr_ranges) * sizeof(Elf64_Phdr);
+
+ return extra_sz;
+#endif
+ return 0;
+}
+
+/**
+ * load_elfcorehdr_segment - Setup crash memory ranges and initialize elfcorehdr
+ * segment needed to load kdump kernel.
+ * @image: Kexec image.
+ * @kbuf: Buffer contents and memory parameters.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int load_elfcorehdr_segment(struct kimage *image, struct kexec_buf *kbuf)
+{
+ struct crash_mem *cmem = NULL;
+ unsigned long headers_sz;
+ void *headers = NULL;
+ int ret;
+
+ ret = get_crash_memory_ranges(&cmem);
+ if (ret)
+ goto out;
+
+ /* Setup elfcorehdr segment */
+ ret = crash_prepare_elf64_headers(cmem, false, &headers, &headers_sz);
+ if (ret) {
+ pr_err("Failed to prepare elf headers for the core\n");
+ goto out;
+ }
+
+ /* Fix the offset for backup region in the ELF header */
+ update_backup_region_phdr(image, headers);
+
+ kbuf->buffer = headers;
+ kbuf->mem = KEXEC_BUF_MEM_UNKNOWN;
+ kbuf->bufsz = headers_sz;
+ kbuf->memsz = headers_sz + kdump_extra_elfcorehdr_size(cmem);
+ kbuf->top_down = false;
+
+ ret = kexec_add_buffer(kbuf);
+ if (ret) {
+ vfree(headers);
+ goto out;
+ }
+
+ image->elf_load_addr = kbuf->mem;
+ image->elf_headers_sz = headers_sz;
+ image->elf_headers = headers;
+out:
+ kfree(cmem);
+ return ret;
+}
+
+/**
+ * load_crashdump_segments_ppc64 - Initialize the additional segements needed
+ * to load kdump kernel.
+ * @image: Kexec image.
+ * @kbuf: Buffer contents and memory parameters.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int load_crashdump_segments_ppc64(struct kimage *image,
+ struct kexec_buf *kbuf)
+{
+ int ret;
+
+ /* Load backup segment - first 64K bytes of the crashing kernel */
+ ret = load_backup_segment(image, kbuf);
+ if (ret) {
+ pr_err("Failed to load backup segment\n");
+ return ret;
+ }
+ kexec_dprintk("Loaded the backup region at 0x%lx\n", kbuf->mem);
+
+ /* Load elfcorehdr segment - to export crashing kernel's vmcore */
+ ret = load_elfcorehdr_segment(image, kbuf);
+ if (ret) {
+ pr_err("Failed to load elfcorehdr segment\n");
+ return ret;
+ }
+ kexec_dprintk("Loaded elf core header at 0x%lx, bufsz=0x%lx memsz=0x%lx\n",
+ image->elf_load_addr, kbuf->bufsz, kbuf->memsz);
+
+ return 0;
+}
+#endif
+
+/**
+ * setup_purgatory_ppc64 - initialize PPC64 specific purgatory's global
+ * variables and call setup_purgatory() to initialize
+ * common global variable.
+ * @image: kexec image.
+ * @slave_code: Slave code for the purgatory.
+ * @fdt: Flattened device tree for the next kernel.
+ * @kernel_load_addr: Address where the kernel is loaded.
+ * @fdt_load_addr: Address where the flattened device tree is loaded.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int setup_purgatory_ppc64(struct kimage *image, const void *slave_code,
+ const void *fdt, unsigned long kernel_load_addr,
+ unsigned long fdt_load_addr)
+{
+ struct device_node *dn = NULL;
+ int ret;
+
+ ret = setup_purgatory(image, slave_code, fdt, kernel_load_addr,
+ fdt_load_addr);
+ if (ret)
+ goto out;
+
+ if (image->type == KEXEC_TYPE_CRASH) {
+ u32 my_run_at_load = 1;
+
+ /*
+ * Tell relocatable kernel to run at load address
+ * via the word meant for that at 0x5c.
+ */
+ ret = kexec_purgatory_get_set_symbol(image, "run_at_load",
+ &my_run_at_load,
+ sizeof(my_run_at_load),
+ false);
+ if (ret)
+ goto out;
+ }
+
+ /* Tell purgatory where to look for backup region */
+ ret = kexec_purgatory_get_set_symbol(image, "backup_start",
+ &image->arch.backup_start,
+ sizeof(image->arch.backup_start),
+ false);
+ if (ret)
+ goto out;
+
+ /* Setup OPAL base & entry values */
+ dn = of_find_node_by_path("/ibm,opal");
+ if (dn) {
+ u64 val;
+
+ ret = of_property_read_u64(dn, "opal-base-address", &val);
+ if (ret)
+ goto out;
+
+ ret = kexec_purgatory_get_set_symbol(image, "opal_base", &val,
+ sizeof(val), false);
+ if (ret)
+ goto out;
+
+ ret = of_property_read_u64(dn, "opal-entry-address", &val);
+ if (ret)
+ goto out;
+ ret = kexec_purgatory_get_set_symbol(image, "opal_entry", &val,
+ sizeof(val), false);
+ }
+out:
+ if (ret)
+ pr_err("Failed to setup purgatory symbols");
+ of_node_put(dn);
+ return ret;
+}
+
+/**
+ * cpu_node_size - Compute the size of a CPU node in the FDT.
+ * This should be done only once and the value is stored in
+ * a static variable.
+ * Returns the max size of a CPU node in the FDT.
+ */
+static unsigned int cpu_node_size(void)
+{
+ static unsigned int size;
+ struct device_node *dn;
+ struct property *pp;
+
+ /*
+ * Don't compute it twice, we are assuming that the per CPU node size
+ * doesn't change during the system's life.
+ */
+ if (size)
+ return size;
+
+ dn = of_find_node_by_type(NULL, "cpu");
+ if (WARN_ON_ONCE(!dn)) {
+ // Unlikely to happen
+ return 0;
+ }
+
+ /*
+ * We compute the sub node size for a CPU node, assuming it
+ * will be the same for all.
+ */
+ size += strlen(dn->name) + 5;
+ for_each_property_of_node(dn, pp) {
+ size += strlen(pp->name);
+ size += pp->length;
+ }
+
+ of_node_put(dn);
+ return size;
+}
+
+static unsigned int kdump_extra_fdt_size_ppc64(struct kimage *image, unsigned int cpu_nodes)
+{
+ unsigned int extra_size = 0;
+ u64 usm_entries;
+#ifdef CONFIG_CRASH_HOTPLUG
+ unsigned int possible_cpu_nodes;
+#endif
+
+ if (!IS_ENABLED(CONFIG_CRASH_DUMP) || image->type != KEXEC_TYPE_CRASH)
+ return 0;
+
+ /*
+ * For kdump kernel, account for linux,usable-memory and
+ * linux,drconf-usable-memory properties. Get an approximate on the
+ * number of usable memory entries and use for FDT size estimation.
+ */
+ if (drmem_lmb_size()) {
+ usm_entries = ((memory_hotplug_max() / drmem_lmb_size()) +
+ (2 * (resource_size(&crashk_res) / drmem_lmb_size())));
+ extra_size += (unsigned int)(usm_entries * sizeof(u64));
+ }
+
+#ifdef CONFIG_CRASH_HOTPLUG
+ /*
+ * Make sure enough space is reserved to accommodate possible CPU nodes
+ * in the crash FDT. This allows packing possible CPU nodes which are
+ * not yet present in the system without regenerating the entire FDT.
+ */
+ if (image->type == KEXEC_TYPE_CRASH) {
+ possible_cpu_nodes = num_possible_cpus() / threads_per_core;
+ if (possible_cpu_nodes > cpu_nodes)
+ extra_size += (possible_cpu_nodes - cpu_nodes) * cpu_node_size();
+ }
+#endif
+
+ return extra_size;
+}
+
+/**
+ * kexec_extra_fdt_size_ppc64 - Return the estimated additional size needed to
+ * setup FDT for kexec/kdump kernel.
+ * @image: kexec image being loaded.
+ *
+ * Returns the estimated extra size needed for kexec/kdump kernel FDT.
+ */
+unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image, struct crash_mem *rmem)
+{
+ struct device_node *dn;
+ unsigned int cpu_nodes = 0, extra_size = 0;
+
+ // Budget some space for the password blob. There's already extra space
+ // for the key name
+ if (plpks_is_available())
+ extra_size += (unsigned int)plpks_get_passwordlen();
+
+ /* Get the number of CPU nodes in the current device tree */
+ for_each_node_by_type(dn, "cpu") {
+ cpu_nodes++;
+ }
+
+ /* Consider extra space for CPU nodes added since the boot time */
+ if (cpu_nodes > boot_cpu_node_count)
+ extra_size += (cpu_nodes - boot_cpu_node_count) * cpu_node_size();
+
+ /* Consider extra space for reserved memory ranges if any */
+ if (rmem->nr_ranges > 0)
+ extra_size += sizeof(struct fdt_reserve_entry) * rmem->nr_ranges;
+
+ return extra_size + kdump_extra_fdt_size_ppc64(image, cpu_nodes);
+}
+
+static int copy_property(void *fdt, int node_offset, const struct device_node *dn,
+ const char *propname)
+{
+ const void *prop, *fdtprop;
+ int len = 0, fdtlen = 0;
+
+ prop = of_get_property(dn, propname, &len);
+ fdtprop = fdt_getprop(fdt, node_offset, propname, &fdtlen);
+
+ if (fdtprop && !prop)
+ return fdt_delprop(fdt, node_offset, propname);
+ else if (prop)
+ return fdt_setprop(fdt, node_offset, propname, prop, len);
+ else
+ return -FDT_ERR_NOTFOUND;
+}
+
+static int update_pci_dma_nodes(void *fdt, const char *dmapropname)
+{
+ struct device_node *dn;
+ int pci_offset, root_offset, ret = 0;
+
+ if (!firmware_has_feature(FW_FEATURE_LPAR))
+ return 0;
+
+ root_offset = fdt_path_offset(fdt, "/");
+ for_each_node_with_property(dn, dmapropname) {
+ pci_offset = fdt_subnode_offset(fdt, root_offset, of_node_full_name(dn));
+ if (pci_offset < 0)
+ continue;
+
+ ret = copy_property(fdt, pci_offset, dn, "ibm,dma-window");
+ if (ret < 0) {
+ of_node_put(dn);
+ break;
+ }
+ ret = copy_property(fdt, pci_offset, dn, dmapropname);
+ if (ret < 0) {
+ of_node_put(dn);
+ break;
+ }
+ }
+
+ return ret;
+}
+
+/**
+ * setup_new_fdt_ppc64 - Update the flattend device-tree of the kernel
+ * being loaded.
+ * @image: kexec image being loaded.
+ * @fdt: Flattened device tree for the next kernel.
+ * @rmem: Reserved memory ranges.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int setup_new_fdt_ppc64(const struct kimage *image, void *fdt, struct crash_mem *rmem)
+{
+ struct crash_mem *umem = NULL;
+ int i, nr_ranges, ret;
+
+#ifdef CONFIG_CRASH_DUMP
+ /*
+ * Restrict memory usage for kdump kernel by setting up
+ * usable memory ranges and memory reserve map.
+ */
+ if (image->type == KEXEC_TYPE_CRASH) {
+ ret = get_usable_memory_ranges(&umem);
+ if (ret)
+ goto out;
+
+ ret = update_usable_mem_fdt(fdt, umem);
+ if (ret) {
+ pr_err("Error setting up usable-memory property for kdump kernel\n");
+ goto out;
+ }
+
+ /*
+ * Ensure we don't touch crashed kernel's memory except the
+ * first 64K of RAM, which will be backed up.
+ */
+ ret = fdt_add_mem_rsv(fdt, BACKUP_SRC_END + 1,
+ crashk_res.start - BACKUP_SRC_SIZE);
+ if (ret) {
+ pr_err("Error reserving crash memory: %s\n",
+ fdt_strerror(ret));
+ goto out;
+ }
+
+ /* Ensure backup region is not used by kdump/capture kernel */
+ ret = fdt_add_mem_rsv(fdt, image->arch.backup_start,
+ BACKUP_SRC_SIZE);
+ if (ret) {
+ pr_err("Error reserving memory for backup: %s\n",
+ fdt_strerror(ret));
+ goto out;
+ }
+ }
+#endif
+
+ /* Update cpus nodes information to account hotplug CPUs. */
+ ret = update_cpus_node(fdt);
+ if (ret < 0)
+ goto out;
+
+ ret = update_pci_dma_nodes(fdt, DIRECT64_PROPNAME);
+ if (ret < 0)
+ goto out;
+
+ ret = update_pci_dma_nodes(fdt, DMA64_PROPNAME);
+ if (ret < 0)
+ goto out;
+
+ /* Update memory reserve map */
+ nr_ranges = rmem ? rmem->nr_ranges : 0;
+ for (i = 0; i < nr_ranges; i++) {
+ u64 base, size;
+
+ base = rmem->ranges[i].start;
+ size = rmem->ranges[i].end - base + 1;
+ ret = fdt_add_mem_rsv(fdt, base, size);
+ if (ret) {
+ pr_err("Error updating memory reserve map: %s\n",
+ fdt_strerror(ret));
+ goto out;
+ }
+ }
+
+ // If we have PLPKS active, we need to provide the password to the new kernel
+ if (plpks_is_available())
+ ret = plpks_populate_fdt(fdt);
+
+out:
+ kfree(umem);
+ return ret;
+}
+
+/**
+ * arch_kexec_kernel_image_probe - Does additional handling needed to setup
+ * kexec segments.
+ * @image: kexec image being loaded.
+ * @buf: Buffer pointing to elf data.
+ * @buf_len: Length of the buffer.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
+ unsigned long buf_len)
+{
+ int ret;
+
+ /* Get exclude memory ranges needed for setting up kexec segments */
+ ret = get_exclude_memory_ranges(&(image->arch.exclude_ranges));
+ if (ret) {
+ pr_err("Failed to setup exclude memory ranges for buffer lookup\n");
+ return ret;
+ }
+
+ return kexec_image_probe_default(image, buf, buf_len);
+}
+
+/**
+ * arch_kimage_file_post_load_cleanup - Frees up all the allocations done
+ * while loading the image.
+ * @image: kexec image being loaded.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int arch_kimage_file_post_load_cleanup(struct kimage *image)
+{
+ kfree(image->arch.exclude_ranges);
+ image->arch.exclude_ranges = NULL;
+
+ vfree(image->arch.backup_buf);
+ image->arch.backup_buf = NULL;
+
+ vfree(image->elf_headers);
+ image->elf_headers = NULL;
+ image->elf_headers_sz = 0;
+
+ kvfree(image->arch.fdt);
+ image->arch.fdt = NULL;
+
+ return kexec_image_post_load_cleanup_default(image);
+}
diff --git a/arch/powerpc/kexec/ranges.c b/arch/powerpc/kexec/ranges.c
new file mode 100644
index 000000000000..3702b0bdab14
--- /dev/null
+++ b/arch/powerpc/kexec/ranges.c
@@ -0,0 +1,708 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * powerpc code to implement the kexec_file_load syscall
+ *
+ * Copyright (C) 2004 Adam Litke (agl@us.ibm.com)
+ * Copyright (C) 2004 IBM Corp.
+ * Copyright (C) 2004,2005 Milton D Miller II, IBM Corporation
+ * Copyright (C) 2005 R Sharada (sharada@in.ibm.com)
+ * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com)
+ * Copyright (C) 2020 IBM Corporation
+ *
+ * Based on kexec-tools' kexec-ppc64.c, fs2dt.c.
+ * Heavily modified for the kernel by
+ * Hari Bathini, IBM Corporation.
+ */
+
+#define pr_fmt(fmt) "kexec ranges: " fmt
+
+#include <linux/sort.h>
+#include <linux/kexec.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/memblock.h>
+#include <linux/crash_core.h>
+#include <asm/sections.h>
+#include <asm/kexec_ranges.h>
+#include <asm/crashdump-ppc64.h>
+
+#if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_DUMP)
+/**
+ * get_max_nr_ranges - Get the max no. of ranges crash_mem structure
+ * could hold, given the size allocated for it.
+ * @size: Allocation size of crash_mem structure.
+ *
+ * Returns the maximum no. of ranges.
+ */
+static inline unsigned int get_max_nr_ranges(size_t size)
+{
+ return ((size - sizeof(struct crash_mem)) /
+ sizeof(struct range));
+}
+
+/**
+ * get_mem_rngs_size - Get the allocated size of mem_rngs based on
+ * max_nr_ranges and chunk size.
+ * @mem_rngs: Memory ranges.
+ *
+ * Returns the maximum size of @mem_rngs.
+ */
+static inline size_t get_mem_rngs_size(struct crash_mem *mem_rngs)
+{
+ size_t size;
+
+ if (!mem_rngs)
+ return 0;
+
+ size = (sizeof(struct crash_mem) +
+ (mem_rngs->max_nr_ranges * sizeof(struct range)));
+
+ /*
+ * Memory is allocated in size multiple of MEM_RANGE_CHUNK_SZ.
+ * So, align to get the actual length.
+ */
+ return ALIGN(size, MEM_RANGE_CHUNK_SZ);
+}
+
+/**
+ * __add_mem_range - add a memory range to memory ranges list.
+ * @mem_ranges: Range list to add the memory range to.
+ * @base: Base address of the range to add.
+ * @size: Size of the memory range to add.
+ *
+ * (Re)allocates memory, if needed.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int __add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size)
+{
+ struct crash_mem *mem_rngs = *mem_ranges;
+
+ if (!mem_rngs || (mem_rngs->nr_ranges == mem_rngs->max_nr_ranges)) {
+ mem_rngs = realloc_mem_ranges(mem_ranges);
+ if (!mem_rngs)
+ return -ENOMEM;
+ }
+
+ mem_rngs->ranges[mem_rngs->nr_ranges].start = base;
+ mem_rngs->ranges[mem_rngs->nr_ranges].end = base + size - 1;
+ pr_debug("Added memory range [%#016llx - %#016llx] at index %d\n",
+ base, base + size - 1, mem_rngs->nr_ranges);
+ mem_rngs->nr_ranges++;
+ return 0;
+}
+
+/**
+ * __merge_memory_ranges - Merges the given memory ranges list.
+ * @mem_rngs: Range list to merge.
+ *
+ * Assumes a sorted range list.
+ *
+ * Returns nothing.
+ */
+static void __merge_memory_ranges(struct crash_mem *mem_rngs)
+{
+ struct range *ranges;
+ int i, idx;
+
+ if (!mem_rngs)
+ return;
+
+ idx = 0;
+ ranges = &(mem_rngs->ranges[0]);
+ for (i = 1; i < mem_rngs->nr_ranges; i++) {
+ if (ranges[i].start <= (ranges[i-1].end + 1))
+ ranges[idx].end = ranges[i].end;
+ else {
+ idx++;
+ if (i == idx)
+ continue;
+
+ ranges[idx] = ranges[i];
+ }
+ }
+ mem_rngs->nr_ranges = idx + 1;
+}
+
+/* cmp_func_t callback to sort ranges with sort() */
+static int rngcmp(const void *_x, const void *_y)
+{
+ const struct range *x = _x, *y = _y;
+
+ if (x->start > y->start)
+ return 1;
+ if (x->start < y->start)
+ return -1;
+ return 0;
+}
+
+/**
+ * sort_memory_ranges - Sorts the given memory ranges list.
+ * @mem_rngs: Range list to sort.
+ * @merge: If true, merge the list after sorting.
+ *
+ * Returns nothing.
+ */
+void sort_memory_ranges(struct crash_mem *mem_rngs, bool merge)
+{
+ int i;
+
+ if (!mem_rngs)
+ return;
+
+ /* Sort the ranges in-place */
+ sort(&(mem_rngs->ranges[0]), mem_rngs->nr_ranges,
+ sizeof(mem_rngs->ranges[0]), rngcmp, NULL);
+
+ if (merge)
+ __merge_memory_ranges(mem_rngs);
+
+ /* For debugging purpose */
+ pr_debug("Memory ranges:\n");
+ for (i = 0; i < mem_rngs->nr_ranges; i++) {
+ pr_debug("\t[%03d][%#016llx - %#016llx]\n", i,
+ mem_rngs->ranges[i].start,
+ mem_rngs->ranges[i].end);
+ }
+}
+
+/**
+ * realloc_mem_ranges - reallocate mem_ranges with size incremented
+ * by MEM_RANGE_CHUNK_SZ. Frees up the old memory,
+ * if memory allocation fails.
+ * @mem_ranges: Memory ranges to reallocate.
+ *
+ * Returns pointer to reallocated memory on success, NULL otherwise.
+ */
+struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges)
+{
+ struct crash_mem *mem_rngs = *mem_ranges;
+ unsigned int nr_ranges;
+ size_t size;
+
+ size = get_mem_rngs_size(mem_rngs);
+ nr_ranges = mem_rngs ? mem_rngs->nr_ranges : 0;
+
+ size += MEM_RANGE_CHUNK_SZ;
+ mem_rngs = krealloc(*mem_ranges, size, GFP_KERNEL);
+ if (!mem_rngs) {
+ kfree(*mem_ranges);
+ *mem_ranges = NULL;
+ return NULL;
+ }
+
+ mem_rngs->nr_ranges = nr_ranges;
+ mem_rngs->max_nr_ranges = get_max_nr_ranges(size);
+ *mem_ranges = mem_rngs;
+
+ return mem_rngs;
+}
+
+/**
+ * add_mem_range - Updates existing memory range, if there is an overlap.
+ * Else, adds a new memory range.
+ * @mem_ranges: Range list to add the memory range to.
+ * @base: Base address of the range to add.
+ * @size: Size of the memory range to add.
+ *
+ * (Re)allocates memory, if needed.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size)
+{
+ struct crash_mem *mem_rngs = *mem_ranges;
+ u64 mstart, mend, end;
+ unsigned int i;
+
+ if (!size)
+ return 0;
+
+ end = base + size - 1;
+
+ if (!mem_rngs || !(mem_rngs->nr_ranges))
+ return __add_mem_range(mem_ranges, base, size);
+
+ for (i = 0; i < mem_rngs->nr_ranges; i++) {
+ mstart = mem_rngs->ranges[i].start;
+ mend = mem_rngs->ranges[i].end;
+ if (base < mend && end > mstart) {
+ if (base < mstart)
+ mem_rngs->ranges[i].start = base;
+ if (end > mend)
+ mem_rngs->ranges[i].end = end;
+ return 0;
+ }
+ }
+
+ return __add_mem_range(mem_ranges, base, size);
+}
+
+#endif /* CONFIG_KEXEC_FILE || CONFIG_CRASH_DUMP */
+
+#ifdef CONFIG_KEXEC_FILE
+/**
+ * add_tce_mem_ranges - Adds tce-table range to the given memory ranges list.
+ * @mem_ranges: Range list to add the memory range(s) to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int add_tce_mem_ranges(struct crash_mem **mem_ranges)
+{
+ struct device_node *dn = NULL;
+ int ret = 0;
+
+ for_each_node_by_type(dn, "pci") {
+ u64 base;
+ u32 size;
+
+ ret = of_property_read_u64(dn, "linux,tce-base", &base);
+ ret |= of_property_read_u32(dn, "linux,tce-size", &size);
+ if (ret) {
+ /*
+ * It is ok to have pci nodes without tce. So, ignore
+ * property does not exist error.
+ */
+ if (ret == -EINVAL) {
+ ret = 0;
+ continue;
+ }
+ break;
+ }
+
+ ret = add_mem_range(mem_ranges, base, size);
+ if (ret)
+ break;
+ }
+
+ of_node_put(dn);
+ return ret;
+}
+
+/**
+ * add_initrd_mem_range - Adds initrd range to the given memory ranges list,
+ * if the initrd was retained.
+ * @mem_ranges: Range list to add the memory range to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int add_initrd_mem_range(struct crash_mem **mem_ranges)
+{
+ u64 base, end;
+ int ret;
+
+ /* This range means something, only if initrd was retained */
+ if (!strstr(saved_command_line, "retain_initrd"))
+ return 0;
+
+ ret = of_property_read_u64(of_chosen, "linux,initrd-start", &base);
+ ret |= of_property_read_u64(of_chosen, "linux,initrd-end", &end);
+ if (!ret)
+ ret = add_mem_range(mem_ranges, base, end - base + 1);
+
+ return ret;
+}
+
+/**
+ * add_htab_mem_range - Adds htab range to the given memory ranges list,
+ * if it exists
+ * @mem_ranges: Range list to add the memory range to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int add_htab_mem_range(struct crash_mem **mem_ranges)
+{
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+ if (!htab_address)
+ return 0;
+
+ return add_mem_range(mem_ranges, __pa(htab_address), htab_size_bytes);
+#else
+ return 0;
+#endif
+}
+
+/**
+ * add_kernel_mem_range - Adds kernel text region to the given
+ * memory ranges list.
+ * @mem_ranges: Range list to add the memory range to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int add_kernel_mem_range(struct crash_mem **mem_ranges)
+{
+ return add_mem_range(mem_ranges, 0, __pa(_end));
+}
+#endif /* CONFIG_KEXEC_FILE */
+
+#if defined(CONFIG_KEXEC_FILE) || defined(CONFIG_CRASH_DUMP)
+/**
+ * add_rtas_mem_range - Adds RTAS region to the given memory ranges list.
+ * @mem_ranges: Range list to add the memory range to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int add_rtas_mem_range(struct crash_mem **mem_ranges)
+{
+ struct device_node *dn;
+ u32 base, size;
+ int ret = 0;
+
+ dn = of_find_node_by_path("/rtas");
+ if (!dn)
+ return 0;
+
+ ret = of_property_read_u32(dn, "linux,rtas-base", &base);
+ ret |= of_property_read_u32(dn, "rtas-size", &size);
+ if (!ret)
+ ret = add_mem_range(mem_ranges, base, size);
+
+ of_node_put(dn);
+ return ret;
+}
+
+/**
+ * add_opal_mem_range - Adds OPAL region to the given memory ranges list.
+ * @mem_ranges: Range list to add the memory range to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int add_opal_mem_range(struct crash_mem **mem_ranges)
+{
+ struct device_node *dn;
+ u64 base, size;
+ int ret;
+
+ dn = of_find_node_by_path("/ibm,opal");
+ if (!dn)
+ return 0;
+
+ ret = of_property_read_u64(dn, "opal-base-address", &base);
+ ret |= of_property_read_u64(dn, "opal-runtime-size", &size);
+ if (!ret)
+ ret = add_mem_range(mem_ranges, base, size);
+
+ of_node_put(dn);
+ return ret;
+}
+#endif /* CONFIG_KEXEC_FILE || CONFIG_CRASH_DUMP */
+
+#ifdef CONFIG_KEXEC_FILE
+/**
+ * add_reserved_mem_ranges - Adds "/reserved-ranges" regions exported by f/w
+ * to the given memory ranges list.
+ * @mem_ranges: Range list to add the memory ranges to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int add_reserved_mem_ranges(struct crash_mem **mem_ranges)
+{
+ int n_mem_addr_cells, n_mem_size_cells, i, len, cells, ret = 0;
+ struct device_node *root = of_find_node_by_path("/");
+ const __be32 *prop;
+
+ prop = of_get_property(root, "reserved-ranges", &len);
+ n_mem_addr_cells = of_n_addr_cells(root);
+ n_mem_size_cells = of_n_size_cells(root);
+ of_node_put(root);
+ if (!prop)
+ return 0;
+
+ cells = n_mem_addr_cells + n_mem_size_cells;
+
+ /* Each reserved range is an (address,size) pair */
+ for (i = 0; i < (len / (sizeof(u32) * cells)); i++) {
+ u64 base, size;
+
+ base = of_read_number(prop + (i * cells), n_mem_addr_cells);
+ size = of_read_number(prop + (i * cells) + n_mem_addr_cells,
+ n_mem_size_cells);
+
+ ret = add_mem_range(mem_ranges, base, size);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+/**
+ * get_reserved_memory_ranges - Get reserve memory ranges. This list includes
+ * memory regions that should be added to the
+ * memory reserve map to ensure the region is
+ * protected from any mischief.
+ * @mem_ranges: Range list to add the memory ranges to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int get_reserved_memory_ranges(struct crash_mem **mem_ranges)
+{
+ int ret;
+
+ ret = add_rtas_mem_range(mem_ranges);
+ if (ret)
+ goto out;
+
+ ret = add_tce_mem_ranges(mem_ranges);
+ if (ret)
+ goto out;
+
+ ret = add_reserved_mem_ranges(mem_ranges);
+out:
+ if (ret)
+ pr_err("Failed to setup reserved memory ranges\n");
+ return ret;
+}
+
+/**
+ * get_exclude_memory_ranges - Get exclude memory ranges. This list includes
+ * regions like opal/rtas, tce-table, initrd,
+ * kernel, htab which should be avoided while
+ * setting up kexec load segments.
+ * @mem_ranges: Range list to add the memory ranges to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int get_exclude_memory_ranges(struct crash_mem **mem_ranges)
+{
+ int ret;
+
+ ret = add_tce_mem_ranges(mem_ranges);
+ if (ret)
+ goto out;
+
+ ret = add_initrd_mem_range(mem_ranges);
+ if (ret)
+ goto out;
+
+ ret = add_htab_mem_range(mem_ranges);
+ if (ret)
+ goto out;
+
+ ret = add_kernel_mem_range(mem_ranges);
+ if (ret)
+ goto out;
+
+ ret = add_rtas_mem_range(mem_ranges);
+ if (ret)
+ goto out;
+
+ ret = add_opal_mem_range(mem_ranges);
+ if (ret)
+ goto out;
+
+ ret = add_reserved_mem_ranges(mem_ranges);
+ if (ret)
+ goto out;
+
+ /* exclude memory ranges should be sorted for easy lookup */
+ sort_memory_ranges(*mem_ranges, true);
+out:
+ if (ret)
+ pr_err("Failed to setup exclude memory ranges\n");
+ return ret;
+}
+
+#ifdef CONFIG_CRASH_DUMP
+/**
+ * get_usable_memory_ranges - Get usable memory ranges. This list includes
+ * regions like crashkernel, opal/rtas & tce-table,
+ * that kdump kernel could use.
+ * @mem_ranges: Range list to add the memory ranges to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int get_usable_memory_ranges(struct crash_mem **mem_ranges)
+{
+ int ret;
+
+ /*
+ * Early boot failure observed on guests when low memory (first memory
+ * block?) is not added to usable memory. So, add [0, crashk_res.end]
+ * instead of [crashk_res.start, crashk_res.end] to workaround it.
+ * Also, crashed kernel's memory must be added to reserve map to
+ * avoid kdump kernel from using it.
+ */
+ ret = add_mem_range(mem_ranges, 0, crashk_res.end + 1);
+ if (ret)
+ goto out;
+
+ ret = add_rtas_mem_range(mem_ranges);
+ if (ret)
+ goto out;
+
+ ret = add_opal_mem_range(mem_ranges);
+ if (ret)
+ goto out;
+
+ ret = add_tce_mem_ranges(mem_ranges);
+out:
+ if (ret)
+ pr_err("Failed to setup usable memory ranges\n");
+ return ret;
+}
+#endif /* CONFIG_CRASH_DUMP */
+#endif /* CONFIG_KEXEC_FILE */
+
+#ifdef CONFIG_CRASH_DUMP
+/**
+ * get_crash_memory_ranges - Get crash memory ranges. This list includes
+ * first/crashing kernel's memory regions that
+ * would be exported via an elfcore.
+ * @mem_ranges: Range list to add the memory ranges to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int get_crash_memory_ranges(struct crash_mem **mem_ranges)
+{
+ phys_addr_t base, end;
+ struct crash_mem *tmem;
+ u64 i;
+ int ret;
+
+ for_each_mem_range(i, &base, &end) {
+ u64 size = end - base;
+
+ /* Skip backup memory region, which needs a separate entry */
+ if (base == BACKUP_SRC_START) {
+ if (size > BACKUP_SRC_SIZE) {
+ base = BACKUP_SRC_END + 1;
+ size -= BACKUP_SRC_SIZE;
+ } else
+ continue;
+ }
+
+ ret = add_mem_range(mem_ranges, base, size);
+ if (ret)
+ goto out;
+
+ /* Try merging adjacent ranges before reallocation attempt */
+ if ((*mem_ranges)->nr_ranges == (*mem_ranges)->max_nr_ranges)
+ sort_memory_ranges(*mem_ranges, true);
+ }
+
+ /* Reallocate memory ranges if there is no space to split ranges */
+ tmem = *mem_ranges;
+ if (tmem && (tmem->nr_ranges == tmem->max_nr_ranges)) {
+ tmem = realloc_mem_ranges(mem_ranges);
+ if (!tmem)
+ goto out;
+ }
+
+ /* Exclude crashkernel region */
+ ret = crash_exclude_mem_range(tmem, crashk_res.start, crashk_res.end);
+ if (ret)
+ goto out;
+
+ /*
+ * FIXME: For now, stay in parity with kexec-tools but if RTAS/OPAL
+ * regions are exported to save their context at the time of
+ * crash, they should actually be backed up just like the
+ * first 64K bytes of memory.
+ */
+ ret = add_rtas_mem_range(mem_ranges);
+ if (ret)
+ goto out;
+
+ ret = add_opal_mem_range(mem_ranges);
+ if (ret)
+ goto out;
+
+ /* create a separate program header for the backup region */
+ ret = add_mem_range(mem_ranges, BACKUP_SRC_START, BACKUP_SRC_SIZE);
+ if (ret)
+ goto out;
+
+ sort_memory_ranges(*mem_ranges, false);
+out:
+ if (ret)
+ pr_err("Failed to setup crash memory ranges\n");
+ return ret;
+}
+
+/**
+ * remove_mem_range - Removes the given memory range from the range list.
+ * @mem_ranges: Range list to remove the memory range to.
+ * @base: Base address of the range to remove.
+ * @size: Size of the memory range to remove.
+ *
+ * (Re)allocates memory, if needed.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int remove_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size)
+{
+ u64 end;
+ int ret = 0;
+ unsigned int i;
+ u64 mstart, mend;
+ struct crash_mem *mem_rngs = *mem_ranges;
+
+ if (!size)
+ return 0;
+
+ /*
+ * Memory range are stored as start and end address, use
+ * the same format to do remove operation.
+ */
+ end = base + size - 1;
+
+ for (i = 0; i < mem_rngs->nr_ranges; i++) {
+ mstart = mem_rngs->ranges[i].start;
+ mend = mem_rngs->ranges[i].end;
+
+ /*
+ * Memory range to remove is not part of this range entry
+ * in the memory range list
+ */
+ if (!(base >= mstart && end <= mend))
+ continue;
+
+ /*
+ * Memory range to remove is equivalent to this entry in the
+ * memory range list. Remove the range entry from the list.
+ */
+ if (base == mstart && end == mend) {
+ for (; i < mem_rngs->nr_ranges - 1; i++) {
+ mem_rngs->ranges[i].start = mem_rngs->ranges[i+1].start;
+ mem_rngs->ranges[i].end = mem_rngs->ranges[i+1].end;
+ }
+ mem_rngs->nr_ranges--;
+ goto out;
+ }
+ /*
+ * Start address of the memory range to remove and the
+ * current memory range entry in the list is same. Just
+ * move the start address of the current memory range
+ * entry in the list to end + 1.
+ */
+ else if (base == mstart) {
+ mem_rngs->ranges[i].start = end + 1;
+ goto out;
+ }
+ /*
+ * End address of the memory range to remove and the
+ * current memory range entry in the list is same.
+ * Just move the end address of the current memory
+ * range entry in the list to base - 1.
+ */
+ else if (end == mend) {
+ mem_rngs->ranges[i].end = base - 1;
+ goto out;
+ }
+ /*
+ * Memory range to remove is not at the edge of current
+ * memory range entry. Split the current memory entry into
+ * two half.
+ */
+ else {
+ mem_rngs->ranges[i].end = base - 1;
+ size = mem_rngs->ranges[i].end - end;
+ ret = add_mem_range(mem_ranges, end + 1, size);
+ }
+ }
+out:
+ return ret;
+}
+#endif /* CONFIG_CRASH_DUMP */
diff --git a/arch/powerpc/kexec/relocate_32.S b/arch/powerpc/kexec/relocate_32.S
new file mode 100644
index 000000000000..dd86e338307d
--- /dev/null
+++ b/arch/powerpc/kexec/relocate_32.S
@@ -0,0 +1,499 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * This file contains kexec low-level functions.
+ *
+ * Copyright (C) 2002-2003 Eric Biederman <ebiederm@xmission.com>
+ * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz
+ * PPC44x port. Copyright (C) 2011, IBM Corporation
+ * Author: Suzuki Poulose <suzuki@in.ibm.com>
+ */
+
+#include <linux/objtool.h>
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/ppc_asm.h>
+#include <asm/kexec.h>
+
+ .text
+
+ /*
+ * Must be relocatable PIC code callable as a C function.
+ */
+ .globl relocate_new_kernel
+relocate_new_kernel:
+ /* r3 = page_list */
+ /* r4 = reboot_code_buffer */
+ /* r5 = start_address */
+
+#ifdef CONFIG_PPC_85xx
+
+ mr r29, r3
+ mr r30, r4
+ mr r31, r5
+
+#define ENTRY_MAPPING_KEXEC_SETUP
+#include <kernel/85xx_entry_mapping.S>
+#undef ENTRY_MAPPING_KEXEC_SETUP
+
+ mr r3, r29
+ mr r4, r30
+ mr r5, r31
+
+ li r0, 0
+#elif defined(CONFIG_44x)
+
+ /* Save our parameters */
+ mr r29, r3
+ mr r30, r4
+ mr r31, r5
+
+#ifdef CONFIG_PPC_47x
+ /* Check for 47x cores */
+ mfspr r3,SPRN_PVR
+ srwi r3,r3,16
+ cmplwi cr0,r3,PVR_476FPE@h
+ beq setup_map_47x
+ cmplwi cr0,r3,PVR_476@h
+ beq setup_map_47x
+ cmplwi cr0,r3,PVR_476_ISS@h
+ beq setup_map_47x
+#endif /* CONFIG_PPC_47x */
+
+/*
+ * Code for setting up 1:1 mapping for PPC440x for KEXEC
+ *
+ * We cannot switch off the MMU on PPC44x.
+ * So we:
+ * 1) Invalidate all the mappings except the one we are running from.
+ * 2) Create a tmp mapping for our code in the other address space(TS) and
+ * jump to it. Invalidate the entry we started in.
+ * 3) Create a 1:1 mapping for 0-2GiB in chunks of 256M in original TS.
+ * 4) Jump to the 1:1 mapping in original TS.
+ * 5) Invalidate the tmp mapping.
+ *
+ * - Based on the kexec support code for FSL BookE
+ *
+ */
+
+ /*
+ * Load the PID with kernel PID (0).
+ * Also load our MSR_IS and TID to MMUCR for TLB search.
+ */
+ li r3, 0
+ mtspr SPRN_PID, r3
+ mfmsr r4
+ andi. r4,r4,MSR_IS@l
+ beq wmmucr
+ oris r3,r3,PPC44x_MMUCR_STS@h
+wmmucr:
+ mtspr SPRN_MMUCR,r3
+ sync
+
+ /*
+ * Invalidate all the TLB entries except the current entry
+ * where we are running from
+ */
+ bcl 20,31,$+4 /* Find our address */
+0: mflr r5 /* Make it accessible */
+ tlbsx r23,0,r5 /* Find entry we are in */
+ li r4,0 /* Start at TLB entry 0 */
+ li r3,0 /* Set PAGEID inval value */
+1: cmpw r23,r4 /* Is this our entry? */
+ beq skip /* If so, skip the inval */
+ tlbwe r3,r4,PPC44x_TLB_PAGEID /* If not, inval the entry */
+skip:
+ addi r4,r4,1 /* Increment */
+ cmpwi r4,64 /* Are we done? */
+ bne 1b /* If not, repeat */
+ isync
+
+ /* Create a temp mapping and jump to it */
+ andi. r6, r23, 1 /* Find the index to use */
+ addi r24, r6, 1 /* r24 will contain 1 or 2 */
+
+ mfmsr r9 /* get the MSR */
+ rlwinm r5, r9, 27, 31, 31 /* Extract the MSR[IS] */
+ xori r7, r5, 1 /* Use the other address space */
+
+ /* Read the current mapping entries */
+ tlbre r3, r23, PPC44x_TLB_PAGEID
+ tlbre r4, r23, PPC44x_TLB_XLAT
+ tlbre r5, r23, PPC44x_TLB_ATTRIB
+
+ /* Save our current XLAT entry */
+ mr r25, r4
+
+ /* Extract the TLB PageSize */
+ li r10, 1 /* r10 will hold PageSize */
+ rlwinm r11, r3, 0, 24, 27 /* bits 24-27 */
+
+ /* XXX: As of now we use 256M, 4K pages */
+ cmpwi r11, PPC44x_TLB_256M
+ bne tlb_4k
+ rotlwi r10, r10, 28 /* r10 = 256M */
+ b write_out
+tlb_4k:
+ cmpwi r11, PPC44x_TLB_4K
+ bne default
+ rotlwi r10, r10, 12 /* r10 = 4K */
+ b write_out
+default:
+ rotlwi r10, r10, 10 /* r10 = 1K */
+
+write_out:
+ /*
+ * Write out the tmp 1:1 mapping for this code in other address space
+ * Fixup EPN = RPN , TS=other address space
+ */
+ insrwi r3, r7, 1, 23 /* Bit 23 is TS for PAGEID field */
+
+ /* Write out the tmp mapping entries */
+ tlbwe r3, r24, PPC44x_TLB_PAGEID
+ tlbwe r4, r24, PPC44x_TLB_XLAT
+ tlbwe r5, r24, PPC44x_TLB_ATTRIB
+
+ subi r11, r10, 1 /* PageOffset Mask = PageSize - 1 */
+ not r10, r11 /* Mask for PageNum */
+
+ /* Switch to other address space in MSR */
+ insrwi r9, r7, 1, 26 /* Set MSR[IS] = r7 */
+
+ bcl 20,31,$+4
+1: mflr r8
+ addi r8, r8, (2f-1b) /* Find the target offset */
+
+ /* Jump to the tmp mapping */
+ mtspr SPRN_SRR0, r8
+ mtspr SPRN_SRR1, r9
+ rfi
+
+2:
+ /* Invalidate the entry we were executing from */
+ li r3, 0
+ tlbwe r3, r23, PPC44x_TLB_PAGEID
+
+ /* attribute fields. rwx for SUPERVISOR mode */
+ li r5, 0
+ ori r5, r5, (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G)
+
+ /* Create 1:1 mapping in 256M pages */
+ xori r7, r7, 1 /* Revert back to Original TS */
+
+ li r8, 0 /* PageNumber */
+ li r6, 3 /* TLB Index, start at 3 */
+
+next_tlb:
+ rotlwi r3, r8, 28 /* Create EPN (bits 0-3) */
+ mr r4, r3 /* RPN = EPN */
+ ori r3, r3, (PPC44x_TLB_VALID | PPC44x_TLB_256M) /* SIZE = 256M, Valid */
+ insrwi r3, r7, 1, 23 /* Set TS from r7 */
+
+ tlbwe r3, r6, PPC44x_TLB_PAGEID /* PageID field : EPN, V, SIZE */
+ tlbwe r4, r6, PPC44x_TLB_XLAT /* Address translation : RPN */
+ tlbwe r5, r6, PPC44x_TLB_ATTRIB /* Attributes */
+
+ addi r8, r8, 1 /* Increment PN */
+ addi r6, r6, 1 /* Increment TLB Index */
+ cmpwi r8, 8 /* Are we done ? */
+ bne next_tlb
+ isync
+
+ /* Jump to the new mapping 1:1 */
+ li r9,0
+ insrwi r9, r7, 1, 26 /* Set MSR[IS] = r7 */
+
+ bcl 20,31,$+4
+1: mflr r8
+ and r8, r8, r11 /* Get our offset within page */
+ addi r8, r8, (2f-1b)
+
+ and r5, r25, r10 /* Get our target PageNum */
+ or r8, r8, r5 /* Target jump address */
+
+ mtspr SPRN_SRR0, r8
+ mtspr SPRN_SRR1, r9
+ rfi
+2:
+ /* Invalidate the tmp entry we used */
+ li r3, 0
+ tlbwe r3, r24, PPC44x_TLB_PAGEID
+ sync
+ b ppc44x_map_done
+
+#ifdef CONFIG_PPC_47x
+
+ /* 1:1 mapping for 47x */
+
+setup_map_47x:
+
+ /*
+ * Load the kernel pid (0) to PID and also to MMUCR[TID].
+ * Also set the MSR IS->MMUCR STS
+ */
+ li r3, 0
+ mtspr SPRN_PID, r3 /* Set PID */
+ mfmsr r4 /* Get MSR */
+ andi. r4, r4, MSR_IS@l /* TS=1? */
+ beq 1f /* If not, leave STS=0 */
+ oris r3, r3, PPC47x_MMUCR_STS@h /* Set STS=1 */
+1: mtspr SPRN_MMUCR, r3 /* Put MMUCR */
+ sync
+
+ /* Find the entry we are running from */
+ bcl 20,31,$+4
+2: mflr r23
+ tlbsx r23, 0, r23
+ tlbre r24, r23, 0 /* TLB Word 0 */
+ tlbre r25, r23, 1 /* TLB Word 1 */
+ tlbre r26, r23, 2 /* TLB Word 2 */
+
+
+ /*
+ * Invalidates all the tlb entries by writing to 256 RPNs(r4)
+ * of 4k page size in all 4 ways (0-3 in r3).
+ * This would invalidate the entire UTLB including the one we are
+ * running from. However the shadow TLB entries would help us
+ * to continue the execution, until we flush them (rfi/isync).
+ */
+ addis r3, 0, 0x8000 /* specify the way */
+ addi r4, 0, 0 /* TLB Word0 = (EPN=0, VALID = 0) */
+ addi r5, 0, 0
+ b clear_utlb_entry
+
+ /* Align the loop to speed things up. from head_44x.S */
+ .align 6
+
+clear_utlb_entry:
+
+ tlbwe r4, r3, 0
+ tlbwe r5, r3, 1
+ tlbwe r5, r3, 2
+ addis r3, r3, 0x2000 /* Increment the way */
+ cmpwi r3, 0
+ bne clear_utlb_entry
+ addis r3, 0, 0x8000
+ addis r4, r4, 0x100 /* Increment the EPN */
+ cmpwi r4, 0
+ bne clear_utlb_entry
+
+ /* Create the entries in the other address space */
+ mfmsr r5
+ rlwinm r7, r5, 27, 31, 31 /* Get the TS (Bit 26) from MSR */
+ xori r7, r7, 1 /* r7 = !TS */
+
+ insrwi r24, r7, 1, 21 /* Change the TS in the saved TLB word 0 */
+
+ /*
+ * write out the TLB entries for the tmp mapping
+ * Use way '0' so that we could easily invalidate it later.
+ */
+ lis r3, 0x8000 /* Way '0' */
+
+ tlbwe r24, r3, 0
+ tlbwe r25, r3, 1
+ tlbwe r26, r3, 2
+
+ /* Update the msr to the new TS */
+ insrwi r5, r7, 1, 26
+
+ bcl 20,31,$+4
+1: mflr r6
+ addi r6, r6, (2f-1b)
+
+ mtspr SPRN_SRR0, r6
+ mtspr SPRN_SRR1, r5
+ rfi
+
+ /*
+ * Now we are in the tmp address space.
+ * Create a 1:1 mapping for 0-2GiB in the original TS.
+ */
+2:
+ li r3, 0
+ li r4, 0 /* TLB Word 0 */
+ li r5, 0 /* TLB Word 1 */
+ li r6, 0
+ ori r6, r6, PPC47x_TLB2_S_RWX /* TLB word 2 */
+
+ li r8, 0 /* PageIndex */
+
+ xori r7, r7, 1 /* revert back to original TS */
+
+write_utlb:
+ rotlwi r5, r8, 28 /* RPN = PageIndex * 256M */
+ /* ERPN = 0 as we don't use memory above 2G */
+
+ mr r4, r5 /* EPN = RPN */
+ ori r4, r4, (PPC47x_TLB0_VALID | PPC47x_TLB0_256M)
+ insrwi r4, r7, 1, 21 /* Insert the TS to Word 0 */
+
+ tlbwe r4, r3, 0 /* Write out the entries */
+ tlbwe r5, r3, 1
+ tlbwe r6, r3, 2
+ addi r8, r8, 1
+ cmpwi r8, 8 /* Have we completed ? */
+ bne write_utlb
+
+ /* make sure we complete the TLB write up */
+ isync
+
+ /*
+ * Prepare to jump to the 1:1 mapping.
+ * 1) Extract page size of the tmp mapping
+ * DSIZ = TLB_Word0[22:27]
+ * 2) Calculate the physical address of the address
+ * to jump to.
+ */
+ rlwinm r10, r24, 0, 22, 27
+
+ cmpwi r10, PPC47x_TLB0_4K
+ li r10, 0x1000 /* r10 = 4k */
+ beq 0f
+
+ /* Defaults to 256M */
+ lis r10, 0x1000
+
+0: bcl 20,31,$+4
+1: mflr r4
+ addi r4, r4, (2f-1b) /* virtual address of 2f */
+
+ subi r11, r10, 1 /* offsetmask = Pagesize - 1 */
+ not r10, r11 /* Pagemask = ~(offsetmask) */
+
+ and r5, r25, r10 /* Physical page */
+ and r6, r4, r11 /* offset within the current page */
+
+ or r5, r5, r6 /* Physical address for 2f */
+
+ /* Switch the TS in MSR to the original one */
+ mfmsr r8
+ insrwi r8, r7, 1, 26
+
+ mtspr SPRN_SRR1, r8
+ mtspr SPRN_SRR0, r5
+ rfi
+
+2:
+ /* Invalidate the tmp mapping */
+ lis r3, 0x8000 /* Way '0' */
+
+ clrrwi r24, r24, 12 /* Clear the valid bit */
+ tlbwe r24, r3, 0
+ tlbwe r25, r3, 1
+ tlbwe r26, r3, 2
+
+ /* Make sure we complete the TLB write and flush the shadow TLB */
+ isync
+
+#endif
+
+ppc44x_map_done:
+
+
+ /* Restore the parameters */
+ mr r3, r29
+ mr r4, r30
+ mr r5, r31
+
+ li r0, 0
+#else
+ li r0, 0
+
+ /*
+ * Set Machine Status Register to a known status,
+ * switch the MMU off and jump to 1: in a single step.
+ */
+
+ mr r8, r0
+ ori r8, r8, MSR_RI|MSR_ME
+ mtspr SPRN_SRR1, r8
+ addi r8, r4, 1f - relocate_new_kernel
+ mtspr SPRN_SRR0, r8
+ sync
+ rfi
+
+1:
+#endif
+ /* from this point address translation is turned off */
+ /* and interrupts are disabled */
+
+ /* set a new stack at the bottom of our page... */
+ /* (not really needed now) */
+ addi r1, r4, KEXEC_CONTROL_PAGE_SIZE - 8 /* for LR Save+Back Chain */
+ stw r0, 0(r1)
+
+ /* Do the copies */
+ li r6, 0 /* checksum */
+ mr r0, r3
+ b 1f
+
+0: /* top, read another word for the indirection page */
+ lwzu r0, 4(r3)
+
+1:
+ /* is it a destination page? (r8) */
+ rlwinm. r7, r0, 0, 31, 31 /* IND_DESTINATION (1<<0) */
+ beq 2f
+
+ rlwinm r8, r0, 0, 0, 19 /* clear kexec flags, page align */
+ b 0b
+
+2: /* is it an indirection page? (r3) */
+ rlwinm. r7, r0, 0, 30, 30 /* IND_INDIRECTION (1<<1) */
+ beq 2f
+
+ rlwinm r3, r0, 0, 0, 19 /* clear kexec flags, page align */
+ subi r3, r3, 4
+ b 0b
+
+2: /* are we done? */
+ rlwinm. r7, r0, 0, 29, 29 /* IND_DONE (1<<2) */
+ beq 2f
+ b 3f
+
+2: /* is it a source page? (r9) */
+ rlwinm. r7, r0, 0, 28, 28 /* IND_SOURCE (1<<3) */
+ beq 0b
+
+ rlwinm r9, r0, 0, 0, 19 /* clear kexec flags, page align */
+
+ li r7, PAGE_SIZE / 4
+ mtctr r7
+ subi r9, r9, 4
+ subi r8, r8, 4
+9:
+ lwzu r0, 4(r9) /* do the copy */
+ xor r6, r6, r0
+ stwu r0, 4(r8)
+ dcbst 0, r8
+ sync
+ icbi 0, r8
+ bdnz 9b
+
+ addi r9, r9, 4
+ addi r8, r8, 4
+ b 0b
+
+3:
+
+ /* To be certain of avoiding problems with self-modifying code
+ * execute a serializing instruction here.
+ */
+ isync
+ sync
+
+ mfspr r3, SPRN_PIR /* current core we are running on */
+ mr r4, r5 /* load physical address of chunk called */
+
+ /* jump to the entry point, usually the setup routine */
+ mtlr r5
+ blrl
+
+1: b 1b
+
+relocate_new_kernel_end:
+
+ .globl relocate_new_kernel_size
+relocate_new_kernel_size:
+ .long relocate_new_kernel_end - relocate_new_kernel
diff --git a/arch/powerpc/kexec/vmcore_info.c b/arch/powerpc/kexec/vmcore_info.c
new file mode 100644
index 000000000000..2b65d2adca5e
--- /dev/null
+++ b/arch/powerpc/kexec/vmcore_info.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/vmcore_info.h>
+#include <asm/pgalloc.h>
+
+void arch_crash_save_vmcoreinfo(void)
+{
+
+#ifdef CONFIG_NUMA
+ VMCOREINFO_SYMBOL(node_data);
+ VMCOREINFO_LENGTH(node_data, MAX_NUMNODES);
+#endif
+#ifndef CONFIG_NUMA
+ VMCOREINFO_SYMBOL(contig_page_data);
+#endif
+#if defined(CONFIG_PPC64) && defined(CONFIG_SPARSEMEM_VMEMMAP)
+ VMCOREINFO_SYMBOL(vmemmap_list);
+ VMCOREINFO_SYMBOL(mmu_vmemmap_psize);
+ VMCOREINFO_SYMBOL(mmu_psize_defs);
+ VMCOREINFO_STRUCT_SIZE(vmemmap_backing);
+ VMCOREINFO_OFFSET(vmemmap_backing, list);
+ VMCOREINFO_OFFSET(vmemmap_backing, phys);
+ VMCOREINFO_OFFSET(vmemmap_backing, virt_addr);
+ VMCOREINFO_STRUCT_SIZE(mmu_psize_def);
+ VMCOREINFO_OFFSET(mmu_psize_def, shift);
+#endif
+ VMCOREINFO_SYMBOL(cur_cpu_spec);
+ VMCOREINFO_OFFSET(cpu_spec, cpu_features);
+ VMCOREINFO_OFFSET(cpu_spec, mmu_features);
+ vmcoreinfo_append_str("NUMBER(RADIX_MMU)=%d\n", early_radix_enabled());
+ vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
+}
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 602eb51d20bc..2f2702c867f7 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# KVM configuration
#
@@ -6,7 +7,7 @@ source "virt/kvm/Kconfig"
menuconfig VIRTUALIZATION
bool "Virtualization"
- ---help---
+ help
Say Y here to get to see options for using your Linux host to run
other operating systems inside virtual machines (guests).
This option alone does not add any kernel code.
@@ -18,9 +19,10 @@ if VIRTUALIZATION
config KVM
bool
- select PREEMPT_NOTIFIERS
- select ANON_INODES
- select HAVE_KVM_EVENTFD
+ select KVM_COMMON
+ select HAVE_KVM_VCPU_ASYNC_IOCTL
+ select KVM_VFIO
+ select HAVE_KVM_IRQ_BYPASS
config KVM_BOOK3S_HANDLER
bool
@@ -37,7 +39,7 @@ config KVM_BOOK3S_64_HANDLER
config KVM_BOOK3S_PR_POSSIBLE
bool
select KVM_MMIO
- select MMU_NOTIFIER
+ select KVM_GENERIC_MMU_NOTIFIER
config KVM_BOOK3S_HV_POSSIBLE
bool
@@ -45,10 +47,12 @@ config KVM_BOOK3S_HV_POSSIBLE
config KVM_BOOK3S_32
tristate "KVM support for PowerPC book3s_32 processors"
depends on PPC_BOOK3S_32 && !SMP && !PTE_64BIT
+ depends on !CONTEXT_TRACKING_USER
select KVM
select KVM_BOOK3S_32_HANDLER
select KVM_BOOK3S_PR_POSSIBLE
- ---help---
+ select PPC_FPU
+ help
Support running unmodified book3s_32 guest kernels
in virtual machines on book3s_32 host processors.
@@ -63,7 +67,9 @@ config KVM_BOOK3S_64
select KVM_BOOK3S_64_HANDLER
select KVM
select KVM_BOOK3S_PR_POSSIBLE if !KVM_BOOK3S_HV_POSSIBLE
- ---help---
+ select PPC_64S_HASH_MMU
+ select SPAPR_TCE_IOMMU if IOMMU_SUPPORT && (PPC_PSERIES || PPC_POWERNV)
+ help
Support running unmodified book3s_64 and book3s_32 guest kernels
in virtual machines on book3s_64 host processors.
@@ -73,14 +79,15 @@ config KVM_BOOK3S_64
If unsure, say N.
config KVM_BOOK3S_64_HV
- tristate "KVM support for POWER7 and PPC970 using hypervisor mode in host"
- depends on KVM_BOOK3S_64
+ tristate "KVM for POWER7 and later using hypervisor mode in host"
+ depends on KVM_BOOK3S_64 && PPC_POWERNV
select KVM_BOOK3S_HV_POSSIBLE
- select MMU_NOTIFIER
+ select KVM_GENERIC_MMU_NOTIFIER
+ select KVM_BOOK3S_HV_PMU
select CMA
- ---help---
+ help
Support running unmodified book3s_64 guest kernels in
- virtual machines on POWER7 and PPC970 processors that have
+ virtual machines on POWER7 and newer processors that have
hypervisor mode available to the host.
If you say Y here, KVM will use the hardware virtualization
@@ -88,34 +95,102 @@ config KVM_BOOK3S_64_HV
guest operating systems will run at full hardware speed
using supervisor and user modes. However, this also means
that KVM is not usable under PowerVM (pHyp), is only usable
- on POWER7 (or later) processors and PPC970-family processors,
- and cannot emulate a different processor from the host processor.
+ on POWER7 or later processors, and cannot emulate a
+ different processor from the host processor.
If unsure, say N.
config KVM_BOOK3S_64_PR
tristate "KVM support without using hypervisor mode in host"
depends on KVM_BOOK3S_64
+ depends on !CONTEXT_TRACKING_USER
select KVM_BOOK3S_PR_POSSIBLE
- ---help---
+ help
Support running guest kernels in virtual machines on processors
without using hypervisor mode in the host, by running the
guest in user mode (problem state) and emulating all
privileged instructions and registers.
+ This is only available for hash MMU mode and only supports
+ guests that use hash MMU mode.
+
This is not as fast as using hypervisor mode, but works on
machines where hypervisor mode is not available or not usable,
and can emulate processors that are different from the host
processor, including emulating 32-bit processors on a 64-bit
host.
+ Selecting this option will cause the SCV facility to be
+ disabled when the kernel is booted on the pseries platform in
+ hash MMU mode (regardless of PR VMs running). When any PR VMs
+ are running, "AIL" mode is disabled which may slow interrupts
+ and system calls on the host.
+
+config KVM_BOOK3S_HV_EXIT_TIMING
+ bool
+
+config KVM_BOOK3S_HV_P9_TIMING
+ bool "Detailed timing for the P9 entry point"
+ select KVM_BOOK3S_HV_EXIT_TIMING
+ depends on KVM_BOOK3S_HV_POSSIBLE && DEBUG_FS
+ help
+ Calculate time taken for each vcpu during vcpu entry and
+ exit, time spent inside the guest and time spent handling
+ hypercalls and page faults. The total, minimum and maximum
+ times in nanoseconds together with the number of executions
+ are reported in debugfs in kvm/vm#/vcpu#/timings.
+
+ If unsure, say N.
+
+config KVM_BOOK3S_HV_P8_TIMING
+ bool "Detailed timing for hypervisor real-mode code (for POWER8)"
+ select KVM_BOOK3S_HV_EXIT_TIMING
+ depends on KVM_BOOK3S_HV_POSSIBLE && DEBUG_FS && !KVM_BOOK3S_HV_P9_TIMING
+ help
+ Calculate time taken for each vcpu in the real-mode guest entry,
+ exit, and interrupt handling code, plus time spent in the guest
+ and in nap mode due to idle (cede) while other threads are still
+ in the guest. The total, minimum and maximum times in nanoseconds
+ together with the number of executions are reported in debugfs in
+ kvm/vm#/vcpu#/timings. The overhead is of the order of 30 - 40
+ ns per exit on POWER8.
+
+ If unsure, say N.
+
+config KVM_BOOK3S_HV_NESTED_PMU_WORKAROUND
+ bool "Nested L0 host workaround for L1 KVM host PMU handling bug" if EXPERT
+ depends on KVM_BOOK3S_HV_POSSIBLE
+ default !EXPERT
+ help
+ Old nested HV capable Linux guests have a bug where they don't
+ reflect the PMU in-use status of their L2 guest to the L0 host
+ while the L2 PMU registers are live. This can result in loss
+ of L2 PMU register state, causing perf to not work correctly in
+ L2 guests.
+
+ Selecting this option for the L0 host implements a workaround for
+ those buggy L1s which saves the L2 state, at the cost of performance
+ in all nested-capable guest entry/exit.
+
+config KVM_BOOK3S_HV_PMU
+ tristate "Hypervisor Perf events for KVM Book3s-HV"
+ depends on KVM_BOOK3S_64_HV
+ help
+ Enable Book3s-HV Hypervisor Perf events PMU named 'kvm-hv'. These
+ Perf events give an overview of hypervisor performance overall
+ instead of a specific guests. Currently the PMU reports
+ L0-Hypervisor stats on a kvm-hv enabled PSeries LPAR like:
+ * Total/Used Guest-Heap
+ * Total/Used Guest Page-table Memory
+ * Total amount of Guest Page-table Memory reclaimed
+
config KVM_BOOKE_HV
bool
config KVM_EXIT_TIMING
bool "Detailed exit timing"
depends on KVM_E500V2 || KVM_E500MC
- ---help---
+ help
Calculate elapsed time for every exit/enter cycle. A per-vcpu
report is available in debugfs kvm/vm#_vcpu#_timing.
The overhead is relatively small, however it is not recommended for
@@ -125,11 +200,12 @@ config KVM_EXIT_TIMING
config KVM_E500V2
bool "KVM support for PowerPC E500v2 processors"
- depends on E500 && !PPC_E500MC
+ depends on PPC_E500 && !PPC_E500MC
+ depends on !CONTEXT_TRACKING_USER
select KVM
select KVM_MMIO
- select MMU_NOTIFIER
- ---help---
+ select KVM_GENERIC_MMU_NOTIFIER
+ help
Support running unmodified E500 guest kernels in virtual machines on
E500v2 host processors.
@@ -141,11 +217,12 @@ config KVM_E500V2
config KVM_E500MC
bool "KVM support for PowerPC E500MC/E5500/E6500 processors"
depends on PPC_E500MC
+ depends on !CONTEXT_TRACKING_USER
select KVM
select KVM_MMIO
select KVM_BOOKE_HV
- select MMU_NOTIFIER
- ---help---
+ select KVM_GENERIC_MMU_NOTIFIER
+ help
Support running unmodified E500MC/E5500/E6500 guest kernels in
virtual machines on E500MC/E5500/E6500 host processors.
@@ -156,27 +233,29 @@ config KVM_E500MC
config KVM_MPIC
bool "KVM in-kernel MPIC emulation"
- depends on KVM && E500
+ depends on KVM && PPC_E500
select HAVE_KVM_IRQCHIP
- select HAVE_KVM_IRQFD
select HAVE_KVM_IRQ_ROUTING
select HAVE_KVM_MSI
help
Enable support for emulating MPIC devices inside the
- host kernel, rather than relying on userspace to emulate.
- Currently, support is limited to certain versions of
- Freescale's MPIC implementation.
+ host kernel, rather than relying on userspace to emulate.
+ Currently, support is limited to certain versions of
+ Freescale's MPIC implementation.
config KVM_XICS
bool "KVM in-kernel XICS emulation"
depends on KVM_BOOK3S_64 && !KVM_MPIC
select HAVE_KVM_IRQCHIP
- select HAVE_KVM_IRQFD
- ---help---
+ default y
+ help
Include support for the XICS (eXternal Interrupt Controller
Specification) interrupt controller architecture used on
IBM POWER (pSeries) servers.
-source drivers/vhost/Kconfig
+config KVM_XIVE
+ bool
+ default y
+ depends on KVM_XICS && PPC_XIVE_NATIVE && KVM_BOOK3S_HV_POSSIBLE
endif # VIRTUALIZATION
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 0570eef83fba..4bd9d1230869 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -1,28 +1,21 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for Kernel-based Virtual Machine module
#
-subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
-
ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
-KVM := ../../../virt/kvm
-
-common-objs-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
- $(KVM)/eventfd.o
-CFLAGS_e500_mmu.o := -I.
-CFLAGS_e500_mmu_host.o := -I.
-CFLAGS_emulate.o := -I.
-CFLAGS_emulate_loadstore.o := -I.
+include $(srctree)/virt/kvm/Makefile.kvm
-common-objs-y += powerpc.o emulate.o emulate_loadstore.o
+common-objs-y += powerpc.o emulate_loadstore.o
obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o
obj-$(CONFIG_KVM_BOOK3S_HANDLER) += book3s_exports.o
-AFLAGS_booke_interrupts.o := -I$(obj)
+AFLAGS_booke_interrupts.o := -I$(objtree)/$(obj)
kvm-e500-objs := \
$(common-objs-y) \
+ emulate.o \
booke.o \
booke_emulate.o \
booke_interrupts.o \
@@ -34,6 +27,7 @@ kvm-objs-$(CONFIG_KVM_E500V2) := $(kvm-e500-objs)
kvm-e500mc-objs := \
$(common-objs-y) \
+ emulate.o \
booke.o \
booke_emulate.o \
bookehv_interrupts.o \
@@ -43,9 +37,6 @@ kvm-e500mc-objs := \
e500_emulate.o
kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs)
-kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) := \
- book3s_64_vio_hv.o
-
kvm-pr-y := \
fpu.o \
emulate.o \
@@ -59,10 +50,11 @@ kvm-pr-y := \
book3s_64_mmu.o \
book3s_32_mmu.o
-ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
-kvm-book3s_64-module-objs := \
- $(KVM)/coalesced_mmio.o
+kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
+ book3s_64_entry.o \
+ tm.o
+ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
book3s_rmhandlers.o
endif
@@ -70,30 +62,48 @@ endif
kvm-hv-y += \
book3s_hv.o \
book3s_hv_interrupts.o \
- book3s_64_mmu_hv.o
+ book3s_64_mmu_hv.o \
+ book3s_64_mmu_radix.o \
+ book3s_hv_nested.o
+
+kvm-hv-$(CONFIG_PPC_UV) += \
+ book3s_hv_uvmem.o
+
+kvm-hv-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \
+ book3s_hv_tm.o
kvm-book3s_64-builtin-xics-objs-$(CONFIG_KVM_XICS) := \
book3s_hv_rm_xics.o
+kvm-book3s_64-builtin-tm-objs-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \
+ book3s_hv_tm_builtin.o
+
ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
+ book3s_hv_hmi.o \
+ book3s_hv_p9_entry.o \
book3s_hv_rmhandlers.o \
book3s_hv_rm_mmu.o \
book3s_hv_ras.o \
book3s_hv_builtin.o \
+ book3s_hv_p9_perf.o \
+ book3s_hv_nestedv2.o \
+ guest-state-buffer.o \
+ $(kvm-book3s_64-builtin-tm-objs-y) \
$(kvm-book3s_64-builtin-xics-objs-y)
+
+obj-$(CONFIG_GUEST_STATE_BUFFER_TEST) += test-guest-state-buffer.o
endif
kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
book3s_xics.o
-kvm-book3s_64-module-objs += \
- $(KVM)/kvm_main.o \
- $(KVM)/eventfd.o \
- powerpc.o \
- emulate_loadstore.o \
+kvm-book3s_64-objs-$(CONFIG_KVM_XIVE) += book3s_xive.o book3s_xive_native.o
+kvm-book3s_64-objs-$(CONFIG_SPAPR_TCE_IOMMU) += book3s_64_vio.o
+
+kvm-book3s_64-module-objs := \
+ $(common-objs-y) \
book3s.o \
- book3s_64_vio.o \
book3s_rtas.o \
$(kvm-book3s_64-objs-y)
@@ -101,6 +111,7 @@ kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs)
kvm-book3s_32-objs := \
$(common-objs-y) \
+ emulate.o \
fpu.o \
book3s_paired_singles.o \
book3s.o \
@@ -113,9 +124,8 @@ kvm-book3s_32-objs := \
kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs)
kvm-objs-$(CONFIG_KVM_MPIC) += mpic.o
-kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(KVM)/irqchip.o
-kvm-objs := $(kvm-objs-m) $(kvm-objs-y)
+kvm-y += $(kvm-objs-m) $(kvm-objs-y)
obj-$(CONFIG_KVM_E500V2) += kvm.o
obj-$(CONFIG_KVM_E500MC) += kvm.o
@@ -126,3 +136,8 @@ obj-$(CONFIG_KVM_BOOK3S_64_PR) += kvm-pr.o
obj-$(CONFIG_KVM_BOOK3S_64_HV) += kvm-hv.o
obj-y += $(kvm-book3s_64-builtin-objs-y)
+
+# KVM does a lot in real-mode, and 64-bit Book3S KASAN doesn't support that
+ifdef CONFIG_PPC_BOOK3S_64
+KASAN_SANITIZE := n
+endif
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index dd03f6b299ba..d79c5d1098c0 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved.
*
@@ -8,10 +9,6 @@
* Description:
* This file is derived from arch/powerpc/kvm/44x.c,
* by Hollis Blanchard <hollisb@us.ibm.com>.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
*/
#include <linux/kvm_host.h>
@@ -20,75 +17,83 @@
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/miscdevice.h>
+#include <linux/gfp.h>
+#include <linux/sched.h>
+#include <linux/vmalloc.h>
+#include <linux/highmem.h>
#include <asm/reg.h>
#include <asm/cputable.h>
#include <asm/cacheflush.h>
-#include <asm/tlbflush.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/io.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
#include <asm/mmu_context.h>
#include <asm/page.h>
-#include <linux/gfp.h>
-#include <linux/sched.h>
-#include <linux/vmalloc.h>
-#include <linux/highmem.h>
+#include <asm/xive.h>
#include "book3s.h"
#include "trace.h"
-#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
-
/* #define EXIT_DEBUG */
-struct kvm_stats_debugfs_item debugfs_entries[] = {
- { "exits", VCPU_STAT(sum_exits) },
- { "mmio", VCPU_STAT(mmio_exits) },
- { "sig", VCPU_STAT(signal_exits) },
- { "sysc", VCPU_STAT(syscall_exits) },
- { "inst_emu", VCPU_STAT(emulated_inst_exits) },
- { "dec", VCPU_STAT(dec_exits) },
- { "ext_intr", VCPU_STAT(ext_intr_exits) },
- { "queue_intr", VCPU_STAT(queue_intr) },
- { "halt_wakeup", VCPU_STAT(halt_wakeup) },
- { "pf_storage", VCPU_STAT(pf_storage) },
- { "sp_storage", VCPU_STAT(sp_storage) },
- { "pf_instruc", VCPU_STAT(pf_instruc) },
- { "sp_instruc", VCPU_STAT(sp_instruc) },
- { "ld", VCPU_STAT(ld) },
- { "ld_slow", VCPU_STAT(ld_slow) },
- { "st", VCPU_STAT(st) },
- { "st_slow", VCPU_STAT(st_slow) },
- { NULL }
+const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
+ KVM_GENERIC_VM_STATS(),
+ STATS_DESC_ICOUNTER(VM, num_2M_pages),
+ STATS_DESC_ICOUNTER(VM, num_1G_pages)
};
-void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
-{
-}
-
-void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
-{
-}
+const struct kvm_stats_header kvm_vm_stats_header = {
+ .name_size = KVM_STATS_NAME_SIZE,
+ .num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
+ .id_offset = sizeof(struct kvm_stats_header),
+ .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
+ .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
+ sizeof(kvm_vm_stats_desc),
+};
-void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu)
-{
- if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) {
- ulong pc = kvmppc_get_pc(vcpu);
- if ((pc & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
- kvmppc_set_pc(vcpu, pc & ~SPLIT_HACK_MASK);
- vcpu->arch.hflags &= ~BOOK3S_HFLAG_SPLIT_HACK;
- }
-}
-EXPORT_SYMBOL_GPL(kvmppc_unfixup_split_real);
+const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
+ KVM_GENERIC_VCPU_STATS(),
+ STATS_DESC_COUNTER(VCPU, sum_exits),
+ STATS_DESC_COUNTER(VCPU, mmio_exits),
+ STATS_DESC_COUNTER(VCPU, signal_exits),
+ STATS_DESC_COUNTER(VCPU, light_exits),
+ STATS_DESC_COUNTER(VCPU, itlb_real_miss_exits),
+ STATS_DESC_COUNTER(VCPU, itlb_virt_miss_exits),
+ STATS_DESC_COUNTER(VCPU, dtlb_real_miss_exits),
+ STATS_DESC_COUNTER(VCPU, dtlb_virt_miss_exits),
+ STATS_DESC_COUNTER(VCPU, syscall_exits),
+ STATS_DESC_COUNTER(VCPU, isi_exits),
+ STATS_DESC_COUNTER(VCPU, dsi_exits),
+ STATS_DESC_COUNTER(VCPU, emulated_inst_exits),
+ STATS_DESC_COUNTER(VCPU, dec_exits),
+ STATS_DESC_COUNTER(VCPU, ext_intr_exits),
+ STATS_DESC_COUNTER(VCPU, halt_successful_wait),
+ STATS_DESC_COUNTER(VCPU, dbell_exits),
+ STATS_DESC_COUNTER(VCPU, gdbell_exits),
+ STATS_DESC_COUNTER(VCPU, ld),
+ STATS_DESC_COUNTER(VCPU, st),
+ STATS_DESC_COUNTER(VCPU, pf_storage),
+ STATS_DESC_COUNTER(VCPU, pf_instruc),
+ STATS_DESC_COUNTER(VCPU, sp_storage),
+ STATS_DESC_COUNTER(VCPU, sp_instruc),
+ STATS_DESC_COUNTER(VCPU, queue_intr),
+ STATS_DESC_COUNTER(VCPU, ld_slow),
+ STATS_DESC_COUNTER(VCPU, st_slow),
+ STATS_DESC_COUNTER(VCPU, pthru_all),
+ STATS_DESC_COUNTER(VCPU, pthru_host),
+ STATS_DESC_COUNTER(VCPU, pthru_bad_aff)
+};
-static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu)
-{
- if (!is_kvmppc_hv_enabled(vcpu->kvm))
- return to_book3s(vcpu)->hior;
- return 0;
-}
+const struct kvm_stats_header kvm_vcpu_stats_header = {
+ .name_size = KVM_STATS_NAME_SIZE,
+ .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
+ .id_offset = sizeof(struct kvm_stats_header),
+ .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
+ .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
+ sizeof(kvm_vcpu_stats_desc),
+};
static inline void kvmppc_update_int_pending(struct kvm_vcpu *vcpu,
unsigned long pending_now, unsigned long old_pending)
@@ -129,11 +134,7 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags)
{
- kvmppc_unfixup_split_real(vcpu);
- kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu));
- kvmppc_set_srr1(vcpu, kvmppc_get_msr(vcpu) | flags);
- kvmppc_set_pc(vcpu, kvmppc_interrupt_offset(vcpu) + vec);
- vcpu->arch.mmu.reset_msr(vcpu);
+ vcpu->kvm->arch.kvm_ops->inject_interrupt(vcpu, vec, flags);
}
static int kvmppc_book3s_vec2irqprio(unsigned int vec)
@@ -148,7 +149,6 @@ static int kvmppc_book3s_vec2irqprio(unsigned int vec)
case 0x400: prio = BOOK3S_IRQPRIO_INST_STORAGE; break;
case 0x480: prio = BOOK3S_IRQPRIO_INST_SEGMENT; break;
case 0x500: prio = BOOK3S_IRQPRIO_EXTERNAL; break;
- case 0x501: prio = BOOK3S_IRQPRIO_EXTERNAL_LEVEL; break;
case 0x600: prio = BOOK3S_IRQPRIO_ALIGNMENT; break;
case 0x700: prio = BOOK3S_IRQPRIO_PROGRAM; break;
case 0x800: prio = BOOK3S_IRQPRIO_FP_UNAVAIL; break;
@@ -188,13 +188,44 @@ void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec)
}
EXPORT_SYMBOL_GPL(kvmppc_book3s_queue_irqprio);
-void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags)
+void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu, ulong srr1_flags)
+{
+ /* might as well deliver this straight away */
+ kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_MACHINE_CHECK, srr1_flags);
+}
+EXPORT_SYMBOL_GPL(kvmppc_core_queue_machine_check);
+
+void kvmppc_core_queue_syscall(struct kvm_vcpu *vcpu)
+{
+ kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_SYSCALL, 0);
+}
+EXPORT_SYMBOL(kvmppc_core_queue_syscall);
+
+void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong srr1_flags)
{
/* might as well deliver this straight away */
- kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_PROGRAM, flags);
+ kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_PROGRAM, srr1_flags);
}
EXPORT_SYMBOL_GPL(kvmppc_core_queue_program);
+void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu, ulong srr1_flags)
+{
+ /* might as well deliver this straight away */
+ kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, srr1_flags);
+}
+
+void kvmppc_core_queue_vec_unavail(struct kvm_vcpu *vcpu, ulong srr1_flags)
+{
+ /* might as well deliver this straight away */
+ kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_ALTIVEC, srr1_flags);
+}
+
+void kvmppc_core_queue_vsx_unavail(struct kvm_vcpu *vcpu, ulong srr1_flags)
+{
+ /* might as well deliver this straight away */
+ kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_VSX, srr1_flags);
+}
+
void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
{
kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER);
@@ -216,38 +247,54 @@ EXPORT_SYMBOL_GPL(kvmppc_core_dequeue_dec);
void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
struct kvm_interrupt *irq)
{
- unsigned int vec = BOOK3S_INTERRUPT_EXTERNAL;
-
- if (irq->irq == KVM_INTERRUPT_SET_LEVEL)
- vec = BOOK3S_INTERRUPT_EXTERNAL_LEVEL;
+ /*
+ * This case (KVM_INTERRUPT_SET) should never actually arise for
+ * a pseries guest (because pseries guests expect their interrupt
+ * controllers to continue asserting an external interrupt request
+ * until it is acknowledged at the interrupt controller), but is
+ * included to avoid ABI breakage and potentially for other
+ * sorts of guest.
+ *
+ * There is a subtlety here: HV KVM does not test the
+ * external_oneshot flag in the code that synthesizes
+ * external interrupts for the guest just before entering
+ * the guest. That is OK even if userspace did do a
+ * KVM_INTERRUPT_SET on a pseries guest vcpu, because the
+ * caller (kvm_vcpu_ioctl_interrupt) does a kvm_vcpu_kick()
+ * which ends up doing a smp_send_reschedule(), which will
+ * pull the guest all the way out to the host, meaning that
+ * we will call kvmppc_core_prepare_to_enter() before entering
+ * the guest again, and that will handle the external_oneshot
+ * flag correctly.
+ */
+ if (irq->irq == KVM_INTERRUPT_SET)
+ vcpu->arch.external_oneshot = 1;
- kvmppc_book3s_queue_irqprio(vcpu, vec);
+ kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
}
void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu)
{
kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
- kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
}
-void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, ulong dar,
- ulong flags)
+void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, ulong srr1_flags,
+ ulong dar, ulong dsisr)
{
kvmppc_set_dar(vcpu, dar);
- kvmppc_set_dsisr(vcpu, flags);
- kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE);
+ kvmppc_set_dsisr(vcpu, dsisr);
+ kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE, srr1_flags);
}
+EXPORT_SYMBOL_GPL(kvmppc_core_queue_data_storage);
-void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, ulong flags)
+void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, ulong srr1_flags)
{
- u64 msr = kvmppc_get_msr(vcpu);
- msr &= ~(SRR1_ISI_NOPT | SRR1_ISI_N_OR_G | SRR1_ISI_PROT);
- msr |= flags & (SRR1_ISI_NOPT | SRR1_ISI_N_OR_G | SRR1_ISI_PROT);
- kvmppc_set_msr_fast(vcpu, msr);
- kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE);
+ kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_INST_STORAGE, srr1_flags);
}
+EXPORT_SYMBOL_GPL(kvmppc_core_queue_inst_storage);
-int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
+static int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu,
+ unsigned int priority)
{
int deliver = 1;
int vec = 0;
@@ -255,12 +302,11 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
switch (priority) {
case BOOK3S_IRQPRIO_DECREMENTER:
- deliver = (kvmppc_get_msr(vcpu) & MSR_EE) && !crit;
+ deliver = !kvmhv_is_nestedv2() && (kvmppc_get_msr(vcpu) & MSR_EE) && !crit;
vec = BOOK3S_INTERRUPT_DECREMENTER;
break;
case BOOK3S_IRQPRIO_EXTERNAL:
- case BOOK3S_IRQPRIO_EXTERNAL_LEVEL:
- deliver = (kvmppc_get_msr(vcpu) & MSR_EE) && !crit;
+ deliver = !kvmhv_is_nestedv2() && (kvmppc_get_msr(vcpu) & MSR_EE) && !crit;
vec = BOOK3S_INTERRUPT_EXTERNAL;
break;
case BOOK3S_IRQPRIO_SYSTEM_RESET:
@@ -314,10 +360,6 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
break;
}
-#if 0
- printk(KERN_INFO "Deliver interrupt 0x%x? %x\n", vec, deliver);
-#endif
-
if (deliver)
kvmppc_inject_interrupt(vcpu, vec, 0);
@@ -333,8 +375,16 @@ static bool clear_irqprio(struct kvm_vcpu *vcpu, unsigned int priority)
case BOOK3S_IRQPRIO_DECREMENTER:
/* DEC interrupts get cleared by mtdec */
return false;
- case BOOK3S_IRQPRIO_EXTERNAL_LEVEL:
- /* External interrupts get cleared by userspace */
+ case BOOK3S_IRQPRIO_EXTERNAL:
+ /*
+ * External interrupts get cleared by userspace
+ * except when set by the KVM_INTERRUPT ioctl with
+ * KVM_INTERRUPT_SET (not KVM_INTERRUPT_SET_LEVEL).
+ */
+ if (vcpu->arch.external_oneshot) {
+ vcpu->arch.external_oneshot = 0;
+ return true;
+ }
return false;
}
@@ -371,8 +421,8 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvmppc_core_prepare_to_enter);
-pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa, bool writing,
- bool *writable)
+kvm_pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa, bool writing,
+ bool *writable, struct page **page)
{
ulong mp_pa = vcpu->arch.magic_page_pa & KVM_PAM;
gfn_t gfn = gpa >> PAGE_SHIFT;
@@ -384,16 +434,17 @@ pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa, bool writing,
gpa &= ~0xFFFULL;
if (unlikely(mp_pa) && unlikely((gpa & KVM_PAM) == mp_pa)) {
ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK;
- pfn_t pfn;
+ kvm_pfn_t pfn;
- pfn = (pfn_t)virt_to_phys((void*)shared_page) >> PAGE_SHIFT;
- get_page(pfn_to_page(pfn));
+ pfn = (kvm_pfn_t)virt_to_phys((void*)shared_page) >> PAGE_SHIFT;
+ *page = pfn_to_page(pfn);
+ get_page(*page);
if (writable)
*writable = true;
return pfn;
}
- return gfn_to_pfn_prot(vcpu->kvm, gfn, writing, writable);
+ return kvm_faultin_pfn(vcpu, gfn, writing, writable, page);
}
EXPORT_SYMBOL_GPL(kvmppc_gpa_to_pfn);
@@ -427,28 +478,45 @@ int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, enum xlate_instdata xlid,
return r;
}
-int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type,
- u32 *inst)
+/*
+ * Returns prefixed instructions with the prefix in the high 32 bits
+ * of *inst and suffix in the low 32 bits. This is the same convention
+ * as used in HEIR, vcpu->arch.last_inst and vcpu->arch.emul_inst.
+ * Like vcpu->arch.last_inst but unlike vcpu->arch.emul_inst, each
+ * half of the value needs byte-swapping if the guest endianness is
+ * different from the host endianness.
+ */
+int kvmppc_load_last_inst(struct kvm_vcpu *vcpu,
+ enum instruction_fetch_type type, unsigned long *inst)
{
ulong pc = kvmppc_get_pc(vcpu);
int r;
+ u32 iw;
if (type == INST_SC)
pc -= 4;
- r = kvmppc_ld(vcpu, &pc, sizeof(u32), inst, false);
- if (r == EMULATE_DONE)
- return r;
- else
+ r = kvmppc_ld(vcpu, &pc, sizeof(u32), &iw, false);
+ if (r != EMULATE_DONE)
return EMULATE_AGAIN;
+ /*
+ * If [H]SRR1 indicates that the instruction that caused the
+ * current interrupt is a prefixed instruction, get the suffix.
+ */
+ if (kvmppc_get_msr(vcpu) & SRR1_PREFIXED) {
+ u32 suffix;
+ pc += 4;
+ r = kvmppc_ld(vcpu, &pc, sizeof(u32), &suffix, false);
+ if (r != EMULATE_DONE)
+ return EMULATE_AGAIN;
+ *inst = ((u64)iw << 32) | suffix;
+ } else {
+ *inst = iw;
+ }
+ return r;
}
EXPORT_SYMBOL_GPL(kvmppc_load_last_inst);
-int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
-{
- return 0;
-}
-
int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu)
{
return 0;
@@ -461,13 +529,25 @@ void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
- return vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs);
+ int ret;
+
+ vcpu_load(vcpu);
+ ret = vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs);
+ vcpu_put(vcpu);
+
+ return ret;
}
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
- return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs);
+ int ret;
+
+ vcpu_load(vcpu);
+ ret = vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs);
+ vcpu_put(vcpu);
+
+ return ret;
}
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
@@ -482,7 +562,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
regs->msr = kvmppc_get_msr(vcpu);
regs->srr0 = kvmppc_get_srr0(vcpu);
regs->srr1 = kvmppc_get_srr1(vcpu);
- regs->pid = vcpu->arch.pid;
+ regs->pid = kvmppc_get_pid(vcpu);
regs->sprg0 = kvmppc_get_sprg0(vcpu);
regs->sprg1 = kvmppc_get_sprg1(vcpu);
regs->sprg2 = kvmppc_get_sprg2(vcpu);
@@ -527,182 +607,131 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
-int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val)
{
- int r;
- union kvmppc_one_reg val;
- int size;
+ int r = 0;
long int i;
- size = one_reg_size(reg->id);
- if (size > sizeof(val))
- return -EINVAL;
-
- r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, reg->id, &val);
+ r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, id, val);
if (r == -EINVAL) {
r = 0;
- switch (reg->id) {
+ switch (id) {
case KVM_REG_PPC_DAR:
- val = get_reg_val(reg->id, kvmppc_get_dar(vcpu));
+ *val = get_reg_val(id, kvmppc_get_dar(vcpu));
break;
case KVM_REG_PPC_DSISR:
- val = get_reg_val(reg->id, kvmppc_get_dsisr(vcpu));
+ *val = get_reg_val(id, kvmppc_get_dsisr(vcpu));
break;
case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
- i = reg->id - KVM_REG_PPC_FPR0;
- val = get_reg_val(reg->id, VCPU_FPR(vcpu, i));
+ i = id - KVM_REG_PPC_FPR0;
+ *val = get_reg_val(id, kvmppc_get_fpr(vcpu, i));
break;
case KVM_REG_PPC_FPSCR:
- val = get_reg_val(reg->id, vcpu->arch.fp.fpscr);
+ *val = get_reg_val(id, kvmppc_get_fpscr(vcpu));
break;
-#ifdef CONFIG_ALTIVEC
- case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
- if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
- r = -ENXIO;
- break;
- }
- val.vval = vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0];
- break;
- case KVM_REG_PPC_VSCR:
- if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
- r = -ENXIO;
- break;
- }
- val = get_reg_val(reg->id, vcpu->arch.vr.vscr.u[3]);
- break;
- case KVM_REG_PPC_VRSAVE:
- val = get_reg_val(reg->id, vcpu->arch.vrsave);
- break;
-#endif /* CONFIG_ALTIVEC */
#ifdef CONFIG_VSX
case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31:
if (cpu_has_feature(CPU_FTR_VSX)) {
- long int i = reg->id - KVM_REG_PPC_VSR0;
- val.vsxval[0] = vcpu->arch.fp.fpr[i][0];
- val.vsxval[1] = vcpu->arch.fp.fpr[i][1];
+ i = id - KVM_REG_PPC_VSR0;
+ val->vsxval[0] = kvmppc_get_vsx_fpr(vcpu, i, 0);
+ val->vsxval[1] = kvmppc_get_vsx_fpr(vcpu, i, 1);
} else {
r = -ENXIO;
}
break;
#endif /* CONFIG_VSX */
- case KVM_REG_PPC_DEBUG_INST: {
- u32 opcode = INS_TW;
- r = copy_to_user((u32 __user *)(long)reg->addr,
- &opcode, sizeof(u32));
+ case KVM_REG_PPC_DEBUG_INST:
+ *val = get_reg_val(id, INS_TW);
break;
- }
#ifdef CONFIG_KVM_XICS
case KVM_REG_PPC_ICP_STATE:
- if (!vcpu->arch.icp) {
+ if (!vcpu->arch.icp && !vcpu->arch.xive_vcpu) {
r = -ENXIO;
break;
}
- val = get_reg_val(reg->id, kvmppc_xics_get_icp(vcpu));
+ if (xics_on_xive())
+ *val = get_reg_val(id, kvmppc_xive_get_icp(vcpu));
+ else
+ *val = get_reg_val(id, kvmppc_xics_get_icp(vcpu));
break;
#endif /* CONFIG_KVM_XICS */
+#ifdef CONFIG_KVM_XIVE
+ case KVM_REG_PPC_VP_STATE:
+ if (!vcpu->arch.xive_vcpu) {
+ r = -ENXIO;
+ break;
+ }
+ if (xive_enabled())
+ r = kvmppc_xive_native_get_vp(vcpu, val);
+ else
+ r = -ENXIO;
+ break;
+#endif /* CONFIG_KVM_XIVE */
case KVM_REG_PPC_FSCR:
- val = get_reg_val(reg->id, vcpu->arch.fscr);
+ *val = get_reg_val(id, vcpu->arch.fscr);
break;
case KVM_REG_PPC_TAR:
- val = get_reg_val(reg->id, vcpu->arch.tar);
+ *val = get_reg_val(id, kvmppc_get_tar(vcpu));
break;
case KVM_REG_PPC_EBBHR:
- val = get_reg_val(reg->id, vcpu->arch.ebbhr);
+ *val = get_reg_val(id, kvmppc_get_ebbhr(vcpu));
break;
case KVM_REG_PPC_EBBRR:
- val = get_reg_val(reg->id, vcpu->arch.ebbrr);
+ *val = get_reg_val(id, kvmppc_get_ebbrr(vcpu));
break;
case KVM_REG_PPC_BESCR:
- val = get_reg_val(reg->id, vcpu->arch.bescr);
- break;
- case KVM_REG_PPC_VTB:
- val = get_reg_val(reg->id, vcpu->arch.vtb);
+ *val = get_reg_val(id, kvmppc_get_bescr(vcpu));
break;
case KVM_REG_PPC_IC:
- val = get_reg_val(reg->id, vcpu->arch.ic);
+ *val = get_reg_val(id, kvmppc_get_ic(vcpu));
break;
default:
r = -EINVAL;
break;
}
}
- if (r)
- return r;
-
- if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size))
- r = -EFAULT;
return r;
}
-int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val)
{
- int r;
- union kvmppc_one_reg val;
- int size;
+ int r = 0;
long int i;
- size = one_reg_size(reg->id);
- if (size > sizeof(val))
- return -EINVAL;
-
- if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size))
- return -EFAULT;
-
- r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, reg->id, &val);
+ r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, id, val);
if (r == -EINVAL) {
r = 0;
- switch (reg->id) {
+ switch (id) {
case KVM_REG_PPC_DAR:
- kvmppc_set_dar(vcpu, set_reg_val(reg->id, val));
+ kvmppc_set_dar(vcpu, set_reg_val(id, *val));
break;
case KVM_REG_PPC_DSISR:
- kvmppc_set_dsisr(vcpu, set_reg_val(reg->id, val));
+ kvmppc_set_dsisr(vcpu, set_reg_val(id, *val));
break;
case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
- i = reg->id - KVM_REG_PPC_FPR0;
- VCPU_FPR(vcpu, i) = set_reg_val(reg->id, val);
+ i = id - KVM_REG_PPC_FPR0;
+ kvmppc_set_fpr(vcpu, i, set_reg_val(id, *val));
break;
case KVM_REG_PPC_FPSCR:
- vcpu->arch.fp.fpscr = set_reg_val(reg->id, val);
+ vcpu->arch.fp.fpscr = set_reg_val(id, *val);
break;
-#ifdef CONFIG_ALTIVEC
- case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
- if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
- r = -ENXIO;
- break;
- }
- vcpu->arch.vr.vr[reg->id - KVM_REG_PPC_VR0] = val.vval;
- break;
- case KVM_REG_PPC_VSCR:
- if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
- r = -ENXIO;
- break;
- }
- vcpu->arch.vr.vscr.u[3] = set_reg_val(reg->id, val);
- break;
- case KVM_REG_PPC_VRSAVE:
- if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
- r = -ENXIO;
- break;
- }
- vcpu->arch.vrsave = set_reg_val(reg->id, val);
- break;
-#endif /* CONFIG_ALTIVEC */
#ifdef CONFIG_VSX
case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31:
if (cpu_has_feature(CPU_FTR_VSX)) {
- long int i = reg->id - KVM_REG_PPC_VSR0;
- vcpu->arch.fp.fpr[i][0] = val.vsxval[0];
- vcpu->arch.fp.fpr[i][1] = val.vsxval[1];
+ i = id - KVM_REG_PPC_VSR0;
+ kvmppc_set_vsx_fpr(vcpu, i, 0, val->vsxval[0]);
+ kvmppc_set_vsx_fpr(vcpu, i, 1, val->vsxval[1]);
} else {
r = -ENXIO;
}
@@ -710,34 +739,45 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
#endif /* CONFIG_VSX */
#ifdef CONFIG_KVM_XICS
case KVM_REG_PPC_ICP_STATE:
- if (!vcpu->arch.icp) {
+ if (!vcpu->arch.icp && !vcpu->arch.xive_vcpu) {
r = -ENXIO;
break;
}
- r = kvmppc_xics_set_icp(vcpu,
- set_reg_val(reg->id, val));
+ if (xics_on_xive())
+ r = kvmppc_xive_set_icp(vcpu, set_reg_val(id, *val));
+ else
+ r = kvmppc_xics_set_icp(vcpu, set_reg_val(id, *val));
break;
#endif /* CONFIG_KVM_XICS */
+#ifdef CONFIG_KVM_XIVE
+ case KVM_REG_PPC_VP_STATE:
+ if (!vcpu->arch.xive_vcpu) {
+ r = -ENXIO;
+ break;
+ }
+ if (xive_enabled())
+ r = kvmppc_xive_native_set_vp(vcpu, val);
+ else
+ r = -ENXIO;
+ break;
+#endif /* CONFIG_KVM_XIVE */
case KVM_REG_PPC_FSCR:
- vcpu->arch.fscr = set_reg_val(reg->id, val);
+ kvmppc_set_fpscr(vcpu, set_reg_val(id, *val));
break;
case KVM_REG_PPC_TAR:
- vcpu->arch.tar = set_reg_val(reg->id, val);
+ kvmppc_set_tar(vcpu, set_reg_val(id, *val));
break;
case KVM_REG_PPC_EBBHR:
- vcpu->arch.ebbhr = set_reg_val(reg->id, val);
+ kvmppc_set_ebbhr(vcpu, set_reg_val(id, *val));
break;
case KVM_REG_PPC_EBBRR:
- vcpu->arch.ebbrr = set_reg_val(reg->id, val);
+ kvmppc_set_ebbrr(vcpu, set_reg_val(id, *val));
break;
case KVM_REG_PPC_BESCR:
- vcpu->arch.bescr = set_reg_val(reg->id, val);
- break;
- case KVM_REG_PPC_VTB:
- vcpu->arch.vtb = set_reg_val(reg->id, val);
+ kvmppc_set_bescr(vcpu, set_reg_val(id, *val));
break;
case KVM_REG_PPC_IC:
- vcpu->arch.ic = set_reg_val(reg->id, val);
+ kvmppc_set_ic(vcpu, set_reg_val(id, *val));
break;
default:
r = -EINVAL;
@@ -764,9 +804,9 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
}
EXPORT_SYMBOL_GPL(kvmppc_set_msr);
-int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+int kvmppc_vcpu_run(struct kvm_vcpu *vcpu)
{
- return vcpu->kvm->arch.kvm_ops->vcpu_run(kvm_run, vcpu);
+ return vcpu->kvm->arch.kvm_ops->vcpu_run(vcpu);
}
int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
@@ -778,20 +818,21 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg)
{
- return -EINVAL;
+ vcpu_load(vcpu);
+ vcpu->guest_debug = dbg->control;
+ vcpu_put(vcpu);
+ return 0;
}
-void kvmppc_decrementer_func(unsigned long data)
+void kvmppc_decrementer_func(struct kvm_vcpu *vcpu)
{
- struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
-
kvmppc_core_queue_dec(vcpu);
kvm_vcpu_kick(vcpu);
}
-struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+int kvmppc_core_vcpu_create(struct kvm_vcpu *vcpu)
{
- return kvm->arch.kvm_ops->vcpu_create(kvm, id);
+ return vcpu->kvm->arch.kvm_ops->vcpu_create(vcpu);
}
void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
@@ -804,21 +845,19 @@ int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
return vcpu->kvm->arch.kvm_ops->check_requests(vcpu);
}
-int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
+void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
{
- return kvm->arch.kvm_ops->get_dirty_log(kvm, log);
+
}
-void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
- struct kvm_memory_slot *dont)
+int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
{
- kvm->arch.kvm_ops->free_memslot(free, dont);
+ return kvm->arch.kvm_ops->get_dirty_log(kvm, log);
}
-int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
- unsigned long npages)
+void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
{
- return kvm->arch.kvm_ops->create_memslot(slot, npages);
+ kvm->arch.kvm_ops->free_memslot(slot);
}
void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
@@ -827,56 +866,43 @@ void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
}
int kvmppc_core_prepare_memory_region(struct kvm *kvm,
- struct kvm_memory_slot *memslot,
- struct kvm_userspace_memory_region *mem)
+ const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new,
+ enum kvm_mr_change change)
{
- return kvm->arch.kvm_ops->prepare_memory_region(kvm, memslot, mem);
+ return kvm->arch.kvm_ops->prepare_memory_region(kvm, old, new, change);
}
void kvmppc_core_commit_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem,
- const struct kvm_memory_slot *old)
-{
- kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old);
-}
-
-int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
+ struct kvm_memory_slot *old,
+ const struct kvm_memory_slot *new,
+ enum kvm_mr_change change)
{
- return kvm->arch.kvm_ops->unmap_hva(kvm, hva);
+ kvm->arch.kvm_ops->commit_memory_region(kvm, old, new, change);
}
-EXPORT_SYMBOL_GPL(kvm_unmap_hva);
-int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
{
- return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end);
+ return kvm->arch.kvm_ops->unmap_gfn_range(kvm, range);
}
-int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
- return kvm->arch.kvm_ops->age_hva(kvm, hva);
+ return kvm->arch.kvm_ops->age_gfn(kvm, range);
}
-int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
+bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
- return kvm->arch.kvm_ops->test_age_hva(kvm, hva);
-}
-
-void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
-{
- kvm->arch.kvm_ops->set_spte_hva(kvm, hva, pte);
-}
-
-void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
-{
- vcpu->kvm->arch.kvm_ops->mmu_destroy(vcpu);
+ return kvm->arch.kvm_ops->test_age_gfn(kvm, range);
}
int kvmppc_core_init_vm(struct kvm *kvm)
{
#ifdef CONFIG_PPC64
- INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
+ INIT_LIST_HEAD_RCU(&kvm->arch.spapr_tce_tables);
INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
+ mutex_init(&kvm->arch.rtas_token_lock);
#endif
return kvm->arch.kvm_ops->init_vm(kvm);
@@ -890,35 +916,175 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
kvmppc_rtas_tokens_free(kvm);
WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
#endif
-}
-int kvmppc_core_check_processor_compat(void)
-{
+#ifdef CONFIG_KVM_XICS
/*
- * We always return 0 for book3s. We check
- * for compatability while loading the HV
- * or PR module
+ * Free the XIVE and XICS devices which are not directly freed by the
+ * device 'release' method
*/
- return 0;
+ kfree(kvm->arch.xive_devices.native);
+ kvm->arch.xive_devices.native = NULL;
+ kfree(kvm->arch.xive_devices.xics_on_xive);
+ kvm->arch.xive_devices.xics_on_xive = NULL;
+ kfree(kvm->arch.xics_device);
+ kvm->arch.xics_device = NULL;
+#endif /* CONFIG_KVM_XICS */
+}
+
+int kvmppc_h_logical_ci_load(struct kvm_vcpu *vcpu)
+{
+ unsigned long size = kvmppc_get_gpr(vcpu, 4);
+ unsigned long addr = kvmppc_get_gpr(vcpu, 5);
+ u64 buf;
+ int srcu_idx;
+ int ret;
+
+ if (!is_power_of_2(size) || (size > sizeof(buf)))
+ return H_TOO_HARD;
+
+ srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, size, &buf);
+ srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
+ if (ret != 0)
+ return H_TOO_HARD;
+
+ switch (size) {
+ case 1:
+ kvmppc_set_gpr(vcpu, 4, *(u8 *)&buf);
+ break;
+
+ case 2:
+ kvmppc_set_gpr(vcpu, 4, be16_to_cpu(*(__be16 *)&buf));
+ break;
+
+ case 4:
+ kvmppc_set_gpr(vcpu, 4, be32_to_cpu(*(__be32 *)&buf));
+ break;
+
+ case 8:
+ kvmppc_set_gpr(vcpu, 4, be64_to_cpu(*(__be64 *)&buf));
+ break;
+
+ default:
+ BUG();
+ }
+
+ return H_SUCCESS;
}
+EXPORT_SYMBOL_GPL(kvmppc_h_logical_ci_load);
+
+int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu)
+{
+ unsigned long size = kvmppc_get_gpr(vcpu, 4);
+ unsigned long addr = kvmppc_get_gpr(vcpu, 5);
+ unsigned long val = kvmppc_get_gpr(vcpu, 6);
+ u64 buf;
+ int srcu_idx;
+ int ret;
+
+ switch (size) {
+ case 1:
+ *(u8 *)&buf = val;
+ break;
+
+ case 2:
+ *(__be16 *)&buf = cpu_to_be16(val);
+ break;
+
+ case 4:
+ *(__be32 *)&buf = cpu_to_be32(val);
+ break;
+
+ case 8:
+ *(__be64 *)&buf = cpu_to_be64(val);
+ break;
+
+ default:
+ return H_TOO_HARD;
+ }
+
+ srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, addr, size, &buf);
+ srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
+ if (ret != 0)
+ return H_TOO_HARD;
+
+ return H_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(kvmppc_h_logical_ci_store);
int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hcall)
{
return kvm->arch.kvm_ops->hcall_implemented(hcall);
}
+#ifdef CONFIG_KVM_XICS
+int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
+ bool line_status)
+{
+ if (xics_on_xive())
+ return kvmppc_xive_set_irq(kvm, irq_source_id, irq, level,
+ line_status);
+ else
+ return kvmppc_xics_set_irq(kvm, irq_source_id, irq, level,
+ line_status);
+}
+
+int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *irq_entry,
+ struct kvm *kvm, int irq_source_id,
+ int level, bool line_status)
+{
+ return kvm_set_irq(kvm, irq_source_id, irq_entry->gsi,
+ level, line_status);
+}
+static int kvmppc_book3s_set_irq(struct kvm_kernel_irq_routing_entry *e,
+ struct kvm *kvm, int irq_source_id, int level,
+ bool line_status)
+{
+ return kvm_set_irq(kvm, irq_source_id, e->gsi, level, line_status);
+}
+
+int kvm_irq_map_gsi(struct kvm *kvm,
+ struct kvm_kernel_irq_routing_entry *entries, int gsi)
+{
+ entries->gsi = gsi;
+ entries->type = KVM_IRQ_ROUTING_IRQCHIP;
+ entries->set = kvmppc_book3s_set_irq;
+ entries->irqchip.irqchip = 0;
+ entries->irqchip.pin = gsi;
+ return 1;
+}
+
+int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin)
+{
+ return pin;
+}
+
+#endif /* CONFIG_KVM_XICS */
+
static int kvmppc_book3s_init(void)
{
int r;
- r = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
+ r = kvm_init(sizeof(struct kvm_vcpu), 0, THIS_MODULE);
if (r)
return r;
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
r = kvmppc_book3s_init_pr();
#endif
- return r;
+#ifdef CONFIG_KVM_XICS
+#ifdef CONFIG_KVM_XIVE
+ if (xics_on_xive()) {
+ kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS);
+ if (kvmppc_xive_native_supported())
+ kvm_register_device_ops(&kvm_xive_native_ops,
+ KVM_DEV_TYPE_XIVE);
+ } else
+#endif
+ kvm_register_device_ops(&kvm_xics_ops, KVM_DEV_TYPE_XICS);
+#endif
+ return r;
}
static void kvmppc_book3s_exit(void)
diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h
index 4bf956cf94d6..4aa2ab89afbc 100644
--- a/arch/powerpc/kvm/book3s.h
+++ b/arch/powerpc/kvm/book3s.h
@@ -1,12 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright IBM Corporation, 2013
* Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License or (at your optional) any later version of the license.
- *
*/
#ifndef __POWERPC_KVM_BOOK3S_H__
@@ -14,21 +9,29 @@
extern void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
struct kvm_memory_slot *memslot);
-extern int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva);
-extern int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start,
- unsigned long end);
-extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva);
-extern int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva);
-extern void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte);
+extern bool kvm_unmap_gfn_range_hv(struct kvm *kvm, struct kvm_gfn_range *range);
+extern bool kvm_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range);
+extern bool kvm_test_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range);
+extern int kvmppc_mmu_init_pr(struct kvm_vcpu *vcpu);
extern void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu);
-extern int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
+extern int kvmppc_core_emulate_op_pr(struct kvm_vcpu *vcpu,
unsigned int inst, int *advance);
extern int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu,
int sprn, ulong spr_val);
extern int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu,
int sprn, ulong *spr_val);
extern int kvmppc_book3s_init_pr(void);
-extern void kvmppc_book3s_exit_pr(void);
+void kvmppc_book3s_exit_pr(void);
+extern int kvmppc_handle_exit_pr(struct kvm_vcpu *vcpu, unsigned int exit_nr);
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+extern void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val);
+#else
+static inline void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val) {}
+#endif
+
+extern void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr);
+extern void kvmppc_inject_interrupt_hv(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags);
#endif
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c
index cd0b0730e29e..0215f32932a9 100644
--- a/arch/powerpc/kvm/book3s_32_mmu.c
+++ b/arch/powerpc/kvm/book3s_32_mmu.c
@@ -1,16 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright SUSE Linux Products GmbH 2009
*
@@ -23,7 +12,6 @@
#include <linux/kvm_host.h>
#include <linux/highmem.h>
-#include <asm/tlbflush.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
@@ -52,7 +40,7 @@
static inline bool check_debug_ip(struct kvm_vcpu *vcpu)
{
#ifdef DEBUG_MMU_PTE_IP
- return vcpu->arch.pc == DEBUG_MMU_PTE_IP;
+ return vcpu->arch.regs.nip == DEBUG_MMU_PTE_IP;
#else
return true;
#endif
@@ -78,11 +66,6 @@ static inline bool sr_kp(u32 sr_raw)
return (sr_raw & 0x20000000) ? true: false;
}
-static inline bool sr_nx(u32 sr_raw)
-{
- return (sr_raw & 0x10000000) ? true: false;
-}
-
static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
struct kvmppc_pte *pte, bool data,
bool iswrite);
@@ -107,11 +90,6 @@ static u64 kvmppc_mmu_book3s_32_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
return (((u64)eaddr >> 12) & 0xffff) | (vsid << 16);
}
-static void kvmppc_mmu_book3s_32_reset_msr(struct kvm_vcpu *vcpu)
-{
- kvmppc_set_msr(vcpu, 0);
-}
-
static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvm_vcpu *vcpu,
u32 sre, gva_t eaddr,
bool primary)
@@ -229,7 +207,8 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
ptem = kvmppc_mmu_book3s_32_get_ptem(sre, eaddr, primary);
if(copy_from_user(pteg, (void __user *)ptegp, sizeof(pteg))) {
- printk(KERN_ERR "KVM: Can't copy data from 0x%lx!\n", ptegp);
+ printk_ratelimited(KERN_ERR
+ "KVM: Can't copy data from 0x%lx!\n", ptegp);
goto no_page_found;
}
@@ -255,6 +234,7 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
case 2:
case 6:
pte->may_write = true;
+ fallthrough;
case 3:
case 5:
case 7:
@@ -357,7 +337,7 @@ static void kvmppc_mmu_book3s_32_mtsrin(struct kvm_vcpu *vcpu, u32 srnum,
static void kvmppc_mmu_book3s_32_tlbie(struct kvm_vcpu *vcpu, ulong ea, bool large)
{
- int i;
+ unsigned long i;
struct kvm_vcpu *v;
/* flush this VA on all cpus */
@@ -421,7 +401,6 @@ void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu)
mmu->mtsrin = kvmppc_mmu_book3s_32_mtsrin;
mmu->mfsrin = kvmppc_mmu_book3s_32_mfsrin;
mmu->xlate = kvmppc_mmu_book3s_32_xlate;
- mmu->reset_msr = kvmppc_mmu_book3s_32_reset_msr;
mmu->tlbie = kvmppc_mmu_book3s_32_tlbie;
mmu->esid_to_vsid = kvmppc_mmu_book3s_32_esid_to_vsid;
mmu->ea_to_vp = kvmppc_mmu_book3s_32_ea_to_vp;
@@ -430,6 +409,7 @@ void kvmppc_mmu_book3s_32_init(struct kvm_vcpu *vcpu)
mmu->slbmte = NULL;
mmu->slbmfee = NULL;
mmu->slbmfev = NULL;
+ mmu->slbfee = NULL;
mmu->slbie = NULL;
mmu->slbia = NULL;
}
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c
index 2035d16a9262..c7e4b62642ea 100644
--- a/arch/powerpc/kvm/book3s_32_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -1,31 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2010 SUSE Linux Products GmbH. All rights reserved.
*
* Authors:
* Alexander Graf <agraf@suse.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include <linux/kvm_host.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
-#include <asm/mmu-hash32.h>
+#include <asm/book3s/32/mmu-hash.h>
#include <asm/machdep.h>
#include <asm/mmu_context.h>
#include <asm/hw_irq.h>
+#include "book3s.h"
/* #define DEBUG_MMU */
/* #define DEBUG_SR */
@@ -136,12 +125,11 @@ static u32 *kvmppc_mmu_get_pteg(struct kvm_vcpu *vcpu, u32 vsid, u32 eaddr,
return (u32*)pteg;
}
-extern char etext[];
-
int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte,
bool iswrite)
{
- pfn_t hpaddr;
+ struct page *page;
+ kvm_pfn_t hpaddr;
u64 vpn;
u64 vsid;
struct kvmppc_sid_map *map;
@@ -156,7 +144,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte,
bool writable;
/* Get host physical address for gpa */
- hpaddr = kvmppc_gpa_to_pfn(vcpu, orig_pte->raddr, iswrite, &writable);
+ hpaddr = kvmppc_gpa_to_pfn(vcpu, orig_pte->raddr, iswrite, &writable, &page);
if (is_error_noslot_pfn(hpaddr)) {
printk(KERN_INFO "Couldn't get guest page for gpa %lx!\n",
orig_pte->raddr);
@@ -243,7 +231,7 @@ next_pteg:
pte = kvmppc_mmu_hpte_cache_next(vcpu);
if (!pte) {
- kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT);
+ kvm_release_page_unused(page);
r = -EAGAIN;
goto out;
}
@@ -261,7 +249,7 @@ next_pteg:
kvmppc_mmu_hpte_cache_map(vcpu, pte);
- kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT);
+ kvm_release_page_clean(page);
out:
return r;
}
@@ -364,10 +352,7 @@ void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu)
preempt_enable();
}
-/* From mm/mmu_context_hash32.c */
-#define CTX_TO_VSID(c, id) ((((c) * (897 * 16)) + (id * 0x111)) & 0xffffff)
-
-int kvmppc_mmu_init(struct kvm_vcpu *vcpu)
+int kvmppc_mmu_init_pr(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
int err;
diff --git a/arch/powerpc/kvm/book3s_32_sr.S b/arch/powerpc/kvm/book3s_32_sr.S
index 7e06a6fc8d07..6cfcd20d4668 100644
--- a/arch/powerpc/kvm/book3s_32_sr.S
+++ b/arch/powerpc/kvm/book3s_32_sr.S
@@ -1,16 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright SUSE Linux Products GmbH 2009
*
@@ -133,11 +122,27 @@
/* 0x0 - 0xb */
- /* 'current->mm' needs to be in r4 */
- tophys(r4, r2)
- lwz r4, MM(r4)
- tophys(r4, r4)
- /* This only clobbers r0, r3, r4 and r5 */
+ /* switch_mmu_context() needs paging, let's enable it */
+ mfmsr r9
+ ori r11, r9, MSR_DR
+ mtmsr r11
+ sync
+
+ /* switch_mmu_context() clobbers r12, rescue it */
+ SAVE_GPR(12, r1)
+
+ /* Calling switch_mmu_context(<inv>, current->mm, <inv>); */
+ lwz r4, MM(r2)
bl switch_mmu_context
+ /* restore r12 */
+ REST_GPR(12, r1)
+
+ /* Disable paging again */
+ mfmsr r9
+ li r6, MSR_DR
+ andc r9, r9, r6
+ mtmsr r9
+ sync
+
.endm
diff --git a/arch/powerpc/kvm/book3s_64_entry.S b/arch/powerpc/kvm/book3s_64_entry.S
new file mode 100644
index 000000000000..a9ab92abffe8
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_64_entry.S
@@ -0,0 +1,429 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#include <linux/export.h>
+#include <asm/asm-offsets.h>
+#include <asm/cache.h>
+#include <asm/code-patching-asm.h>
+#include <asm/exception-64s.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_book3s_asm.h>
+#include <asm/mmu.h>
+#include <asm/ppc_asm.h>
+#include <asm/ptrace.h>
+#include <asm/reg.h>
+#include <asm/ultravisor-api.h>
+
+/*
+ * These are branched to from interrupt handlers in exception-64s.S which set
+ * IKVM_REAL or IKVM_VIRT, if HSTATE_IN_GUEST was found to be non-zero.
+ */
+
+/*
+ * This is a hcall, so register convention is as
+ * Documentation/arch/powerpc/papr_hcalls.rst.
+ *
+ * This may also be a syscall from PR-KVM userspace that is to be
+ * reflected to the PR guest kernel, so registers may be set up for
+ * a system call rather than hcall. We don't currently clobber
+ * anything here, but the 0xc00 handler has already clobbered CTR
+ * and CR0, so PR-KVM can not support a guest kernel that preserves
+ * those registers across its system calls.
+ *
+ * The state of registers is as kvmppc_interrupt, except CFAR is not
+ * saved, R13 is not in SCRATCH0, and R10 does not contain the trap.
+ */
+.global kvmppc_hcall
+.balign IFETCH_ALIGN_BYTES
+kvmppc_hcall:
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ lbz r10,HSTATE_IN_GUEST(r13)
+ cmpwi r10,KVM_GUEST_MODE_HV_P9
+ beq kvmppc_p9_exit_hcall
+#endif
+ ld r10,PACA_EXGEN+EX_R13(r13)
+ SET_SCRATCH0(r10)
+ li r10,0xc00
+ /* Now we look like kvmppc_interrupt */
+ li r11,PACA_EXGEN
+ b .Lgot_save_area
+
+/*
+ * KVM interrupt entry occurs after GEN_INT_ENTRY runs, and follows that
+ * call convention:
+ *
+ * guest R9-R13, CTR, CFAR, PPR saved in PACA EX_xxx save area
+ * guest (H)DAR, (H)DSISR are also in the save area for relevant interrupts
+ * guest R13 also saved in SCRATCH0
+ * R13 = PACA
+ * R11 = (H)SRR0
+ * R12 = (H)SRR1
+ * R9 = guest CR
+ * PPR is set to medium
+ *
+ * With the addition for KVM:
+ * R10 = trap vector
+ */
+.global kvmppc_interrupt
+.balign IFETCH_ALIGN_BYTES
+kvmppc_interrupt:
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ std r10,HSTATE_SCRATCH0(r13)
+ lbz r10,HSTATE_IN_GUEST(r13)
+ cmpwi r10,KVM_GUEST_MODE_HV_P9
+ beq kvmppc_p9_exit_interrupt
+ ld r10,HSTATE_SCRATCH0(r13)
+#endif
+ li r11,PACA_EXGEN
+ cmpdi r10,0x200
+ bgt+ .Lgot_save_area
+ li r11,PACA_EXMC
+ beq .Lgot_save_area
+ li r11,PACA_EXNMI
+.Lgot_save_area:
+ add r11,r11,r13
+BEGIN_FTR_SECTION
+ ld r12,EX_CFAR(r11)
+ std r12,HSTATE_CFAR(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+ ld r12,EX_CTR(r11)
+ mtctr r12
+BEGIN_FTR_SECTION
+ ld r12,EX_PPR(r11)
+ std r12,HSTATE_PPR(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+ ld r12,EX_R12(r11)
+ std r12,HSTATE_SCRATCH0(r13)
+ sldi r12,r9,32
+ or r12,r12,r10
+ ld r9,EX_R9(r11)
+ ld r10,EX_R10(r11)
+ ld r11,EX_R11(r11)
+
+ /*
+ * Hcalls and other interrupts come here after normalising register
+ * contents and save locations:
+ *
+ * R12 = (guest CR << 32) | interrupt vector
+ * R13 = PACA
+ * guest R12 saved in shadow HSTATE_SCRATCH0
+ * guest R13 saved in SPRN_SCRATCH0
+ */
+ std r9,HSTATE_SCRATCH2(r13)
+ lbz r9,HSTATE_IN_GUEST(r13)
+ cmpwi r9,KVM_GUEST_MODE_SKIP
+ beq- .Lmaybe_skip
+.Lno_skip:
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
+ cmpwi r9,KVM_GUEST_MODE_GUEST
+ beq kvmppc_interrupt_pr
+#endif
+ b kvmppc_interrupt_hv
+#else
+ b kvmppc_interrupt_pr
+#endif
+
+/*
+ * "Skip" interrupts are part of a trick KVM uses a with hash guests to load
+ * the faulting instruction in guest memory from the hypervisor without
+ * walking page tables.
+ *
+ * When the guest takes a fault that requires the hypervisor to load the
+ * instruction (e.g., MMIO emulation), KVM is running in real-mode with HV=1
+ * and the guest MMU context loaded. It sets KVM_GUEST_MODE_SKIP, and sets
+ * MSR[DR]=1 while leaving MSR[IR]=0, so it continues to fetch HV instructions
+ * but loads and stores will access the guest context. This is used to load
+ * the faulting instruction using the faulting guest effective address.
+ *
+ * However the guest context may not be able to translate, or it may cause a
+ * machine check or other issue, which results in a fault in the host
+ * (even with KVM-HV).
+ *
+ * These faults come here because KVM_GUEST_MODE_SKIP was set, so if they
+ * are (or are likely) caused by that load, the instruction is skipped by
+ * just returning with the PC advanced +4, where it is noticed the load did
+ * not execute and it goes to the slow path which walks the page tables to
+ * read guest memory.
+ */
+.Lmaybe_skip:
+ cmpwi r12,BOOK3S_INTERRUPT_MACHINE_CHECK
+ beq 1f
+ cmpwi r12,BOOK3S_INTERRUPT_DATA_STORAGE
+ beq 1f
+ cmpwi r12,BOOK3S_INTERRUPT_DATA_SEGMENT
+ beq 1f
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ /* HSRR interrupts get 2 added to interrupt number */
+ cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE | 0x2
+ beq 2f
+#endif
+ b .Lno_skip
+1: mfspr r9,SPRN_SRR0
+ addi r9,r9,4
+ mtspr SPRN_SRR0,r9
+ ld r12,HSTATE_SCRATCH0(r13)
+ ld r9,HSTATE_SCRATCH2(r13)
+ GET_SCRATCH0(r13)
+ RFI_TO_KERNEL
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+2: mfspr r9,SPRN_HSRR0
+ addi r9,r9,4
+ mtspr SPRN_HSRR0,r9
+ ld r12,HSTATE_SCRATCH0(r13)
+ ld r9,HSTATE_SCRATCH2(r13)
+ GET_SCRATCH0(r13)
+ HRFI_TO_KERNEL
+#endif
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+
+/* Stack frame offsets for kvmppc_p9_enter_guest */
+#define SFS (144 + STACK_FRAME_MIN_SIZE)
+#define STACK_SLOT_NVGPRS (SFS - 144) /* 18 gprs */
+
+/*
+ * void kvmppc_p9_enter_guest(struct vcpu *vcpu);
+ *
+ * Enter the guest on a ISAv3.0 or later system.
+ */
+.balign IFETCH_ALIGN_BYTES
+_GLOBAL(kvmppc_p9_enter_guest)
+EXPORT_SYMBOL_GPL(kvmppc_p9_enter_guest)
+ mflr r0
+ std r0,PPC_LR_STKOFF(r1)
+ stdu r1,-SFS(r1)
+
+ std r1,HSTATE_HOST_R1(r13)
+
+ mfcr r4
+ stw r4,SFS+8(r1)
+
+ reg = 14
+ .rept 18
+ std reg,STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
+ reg = reg + 1
+ .endr
+
+ ld r4,VCPU_LR(r3)
+ mtlr r4
+ ld r4,VCPU_CTR(r3)
+ mtctr r4
+ ld r4,VCPU_XER(r3)
+ mtspr SPRN_XER,r4
+
+ ld r1,VCPU_CR(r3)
+
+BEGIN_FTR_SECTION
+ ld r4,VCPU_CFAR(r3)
+ mtspr SPRN_CFAR,r4
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+BEGIN_FTR_SECTION
+ ld r4,VCPU_PPR(r3)
+ mtspr SPRN_PPR,r4
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
+ reg = 4
+ .rept 28
+ ld reg,__VCPU_GPR(reg)(r3)
+ reg = reg + 1
+ .endr
+
+ ld r4,VCPU_KVM(r3)
+ lbz r4,KVM_SECURE_GUEST(r4)
+ cmpdi r4,0
+ ld r4,VCPU_GPR(R4)(r3)
+ bne .Lret_to_ultra
+
+ mtcr r1
+
+ ld r0,VCPU_GPR(R0)(r3)
+ ld r1,VCPU_GPR(R1)(r3)
+ ld r2,VCPU_GPR(R2)(r3)
+ ld r3,VCPU_GPR(R3)(r3)
+
+ HRFI_TO_GUEST
+ b .
+
+ /*
+ * Use UV_RETURN ultracall to return control back to the Ultravisor
+ * after processing an hypercall or interrupt that was forwarded
+ * (a.k.a. reflected) to the Hypervisor.
+ *
+ * All registers have already been reloaded except the ucall requires:
+ * R0 = hcall result
+ * R2 = SRR1, so UV can detect a synthesized interrupt (if any)
+ * R3 = UV_RETURN
+ */
+.Lret_to_ultra:
+ mtcr r1
+ ld r1,VCPU_GPR(R1)(r3)
+
+ ld r0,VCPU_GPR(R3)(r3)
+ mfspr r2,SPRN_SRR1
+ LOAD_REG_IMMEDIATE(r3, UV_RETURN)
+ sc 2
+
+/*
+ * kvmppc_p9_exit_hcall and kvmppc_p9_exit_interrupt are branched to from
+ * above if the interrupt was taken for a guest that was entered via
+ * kvmppc_p9_enter_guest().
+ *
+ * The exit code recovers the host stack and vcpu pointer, saves all guest GPRs
+ * and CR, LR, XER as well as guest MSR and NIA into the VCPU, then re-
+ * establishes the host stack and registers to return from the
+ * kvmppc_p9_enter_guest() function, which saves CTR and other guest registers
+ * (SPRs and FP, VEC, etc).
+ */
+.balign IFETCH_ALIGN_BYTES
+kvmppc_p9_exit_hcall:
+ mfspr r11,SPRN_SRR0
+ mfspr r12,SPRN_SRR1
+ li r10,0xc00
+ std r10,HSTATE_SCRATCH0(r13)
+
+.balign IFETCH_ALIGN_BYTES
+kvmppc_p9_exit_interrupt:
+ /*
+ * If set to KVM_GUEST_MODE_HV_P9 but we're still in the
+ * hypervisor, that means we can't return from the entry stack.
+ */
+ rldicl. r10,r12,64-MSR_HV_LG,63
+ bne- kvmppc_p9_bad_interrupt
+
+ std r1,HSTATE_SCRATCH1(r13)
+ std r3,HSTATE_SCRATCH2(r13)
+ ld r1,HSTATE_HOST_R1(r13)
+ ld r3,HSTATE_KVM_VCPU(r13)
+
+ std r9,VCPU_CR(r3)
+
+1:
+ std r11,VCPU_PC(r3)
+ std r12,VCPU_MSR(r3)
+
+ reg = 14
+ .rept 18
+ std reg,__VCPU_GPR(reg)(r3)
+ reg = reg + 1
+ .endr
+
+ /* r1, r3, r9-r13 are saved to vcpu by C code */
+ std r0,VCPU_GPR(R0)(r3)
+ std r2,VCPU_GPR(R2)(r3)
+ reg = 4
+ .rept 5
+ std reg,__VCPU_GPR(reg)(r3)
+ reg = reg + 1
+ .endr
+
+ LOAD_PACA_TOC()
+
+ mflr r4
+ std r4,VCPU_LR(r3)
+ mfspr r4,SPRN_XER
+ std r4,VCPU_XER(r3)
+
+ reg = 14
+ .rept 18
+ ld reg,STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
+ reg = reg + 1
+ .endr
+
+ lwz r4,SFS+8(r1)
+ mtcr r4
+
+ /*
+ * Flush the link stack here, before executing the first blr on the
+ * way out of the guest.
+ *
+ * The link stack won't match coming out of the guest anyway so the
+ * only cost is the flush itself. The call clobbers r0.
+ */
+1: nop
+ patch_site 1b patch__call_kvm_flush_link_stack_p9
+
+ addi r1,r1,SFS
+ ld r0,PPC_LR_STKOFF(r1)
+ mtlr r0
+ blr
+
+/*
+ * Took an interrupt somewhere right before HRFID to guest, so registers are
+ * in a bad way. Return things hopefully enough to run host virtual code and
+ * run the Linux interrupt handler (SRESET or MCE) to print something useful.
+ *
+ * We could be really clever and save all host registers in known locations
+ * before setting HSTATE_IN_GUEST, then restoring them all here, and setting
+ * return address to a fixup that sets them up again. But that's a lot of
+ * effort for a small bit of code. Lots of other things to do first.
+ */
+kvmppc_p9_bad_interrupt:
+BEGIN_MMU_FTR_SECTION
+ /*
+ * Hash host doesn't try to recover MMU (requires host SLB reload)
+ */
+ b .
+END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
+ /*
+ * Clean up guest registers to give host a chance to run.
+ */
+ li r10,0
+ mtspr SPRN_AMR,r10
+ mtspr SPRN_IAMR,r10
+ mtspr SPRN_CIABR,r10
+ mtspr SPRN_DAWRX0,r10
+BEGIN_FTR_SECTION
+ mtspr SPRN_DAWRX1,r10
+END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
+
+ /*
+ * Switch to host MMU mode (don't have the real host PID but we aren't
+ * going back to userspace).
+ */
+ hwsync
+ isync
+
+ mtspr SPRN_PID,r10
+
+ ld r10, HSTATE_KVM_VCPU(r13)
+ ld r10, VCPU_KVM(r10)
+ lwz r10, KVM_HOST_LPID(r10)
+ mtspr SPRN_LPID,r10
+
+ ld r10, HSTATE_KVM_VCPU(r13)
+ ld r10, VCPU_KVM(r10)
+ ld r10, KVM_HOST_LPCR(r10)
+ mtspr SPRN_LPCR,r10
+
+ isync
+
+ /*
+ * Set GUEST_MODE_NONE so the handler won't branch to KVM, and clear
+ * MSR_RI in r12 ([H]SRR1) so the handler won't try to return.
+ */
+ li r10,KVM_GUEST_MODE_NONE
+ stb r10,HSTATE_IN_GUEST(r13)
+ li r10,MSR_RI
+ andc r12,r12,r10
+
+ /*
+ * Go back to interrupt handler. MCE and SRESET have their specific
+ * PACA save area so they should be used directly. They set up their
+ * own stack. The other handlers all use EXGEN. They will use the
+ * guest r1 if it looks like a kernel stack, so just load the
+ * emergency stack and go to program check for all other interrupts.
+ */
+ ld r10,HSTATE_SCRATCH0(r13)
+ cmpwi r10,BOOK3S_INTERRUPT_MACHINE_CHECK
+ beq .Lcall_machine_check_common
+
+ cmpwi r10,BOOK3S_INTERRUPT_SYSTEM_RESET
+ beq .Lcall_system_reset_common
+
+ b .
+
+.Lcall_machine_check_common:
+ b machine_check_common
+
+.Lcall_system_reset_common:
+ b system_reset_common
+#endif
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index 774a253ca4e1..61290282fd9e 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -1,16 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright SUSE Linux Products GmbH 2009
*
@@ -23,10 +12,9 @@
#include <linux/kvm_host.h>
#include <linux/highmem.h>
-#include <asm/tlbflush.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
-#include <asm/mmu-hash64.h>
+#include <asm/book3s/64/mmu-hash.h>
/* #define DEBUG_MMU */
@@ -36,11 +24,6 @@
#define dprintk(X...) do { } while(0)
#endif
-static void kvmppc_mmu_book3s_64_reset_msr(struct kvm_vcpu *vcpu)
-{
- kvmppc_set_msr(vcpu, vcpu->arch.intr_msr);
-}
-
static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe(
struct kvm_vcpu *vcpu,
gva_t eaddr)
@@ -213,7 +196,7 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
hva_t ptegp;
u64 pteg[16];
u64 avpn = 0;
- u64 v, r;
+ u64 r;
u64 v_val, v_mask;
u64 eaddr_mask;
int i;
@@ -235,6 +218,7 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
gpte->may_read = true;
gpte->may_write = true;
gpte->page_size = MMU_PAGE_4K;
+ gpte->wimg = HPTE_R_M;
return 0;
}
@@ -265,7 +249,8 @@ do_second:
goto no_page_found;
if(copy_from_user(pteg, (void __user *)ptegp, sizeof(pteg))) {
- printk(KERN_ERR "KVM can't copy data from 0x%lx!\n", ptegp);
+ printk_ratelimited(KERN_ERR
+ "KVM: Can't copy data from 0x%lx!\n", ptegp);
goto no_page_found;
}
@@ -300,7 +285,6 @@ do_second:
goto do_second;
}
- v = be64_to_cpu(pteg[i]);
r = be64_to_cpu(pteg[i+1]);
pp = (r & HPTE_R_PP) | key;
if (r & HPTE_R_PP0)
@@ -318,6 +302,7 @@ do_second:
gpte->may_execute = true;
gpte->may_read = false;
gpte->may_write = false;
+ gpte->wimg = r & HPTE_R_WIMG;
switch (pp) {
case 0:
@@ -325,7 +310,7 @@ do_second:
case 2:
case 6:
gpte->may_write = true;
- /* fall through */
+ fallthrough;
case 3:
case 5:
case 7:
@@ -377,15 +362,12 @@ no_seg_found:
static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb)
{
- struct kvmppc_vcpu_book3s *vcpu_book3s;
u64 esid, esid_1t;
int slb_nr;
struct kvmppc_slb *slbe;
dprintk("KVM MMU: slbmte(0x%llx, 0x%llx)\n", rs, rb);
- vcpu_book3s = to_book3s(vcpu);
-
esid = GET_ESID(rb);
esid_1t = GET_ESID_1T(rb);
slb_nr = rb & 0xfff;
@@ -427,6 +409,19 @@ static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb)
kvmppc_mmu_map_segment(vcpu, esid << SID_SHIFT);
}
+static int kvmppc_mmu_book3s_64_slbfee(struct kvm_vcpu *vcpu, gva_t eaddr,
+ ulong *ret_slb)
+{
+ struct kvmppc_slb *slbe = kvmppc_mmu_book3s_64_find_slbe(vcpu, eaddr);
+
+ if (slbe) {
+ *ret_slb = slbe->origv;
+ return 0;
+ }
+ *ret_slb = 0;
+ return -ENOENT;
+}
+
static u64 kvmppc_mmu_book3s_64_slbmfee(struct kvm_vcpu *vcpu, u64 slb_nr)
{
struct kvmppc_slb *slbe;
@@ -535,7 +530,7 @@ static void kvmppc_mmu_book3s_64_tlbie(struct kvm_vcpu *vcpu, ulong va,
bool large)
{
u64 mask = 0xFFFFFFFFFULL;
- long i;
+ unsigned long i;
struct kvm_vcpu *v;
dprintk("KVM MMU: tlbie(0x%lx)\n", va);
@@ -662,10 +657,10 @@ void kvmppc_mmu_book3s_64_init(struct kvm_vcpu *vcpu)
mmu->slbmte = kvmppc_mmu_book3s_64_slbmte;
mmu->slbmfee = kvmppc_mmu_book3s_64_slbmfee;
mmu->slbmfev = kvmppc_mmu_book3s_64_slbmfev;
+ mmu->slbfee = kvmppc_mmu_book3s_64_slbfee;
mmu->slbie = kvmppc_mmu_book3s_64_slbie;
mmu->slbia = kvmppc_mmu_book3s_64_slbia;
mmu->xlate = kvmppc_mmu_book3s_64_xlate;
- mmu->reset_msr = kvmppc_mmu_book3s_64_reset_msr;
mmu->tlbie = kvmppc_mmu_book3s_64_tlbie;
mmu->esid_to_vsid = kvmppc_mmu_book3s_64_esid_to_vsid;
mmu->ea_to_vp = kvmppc_mmu_book3s_64_ea_to_vp;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index b982d925c710..be20aee6fd7d 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -1,41 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2009 SUSE Linux Products GmbH. All rights reserved.
*
* Authors:
* Alexander Graf <agraf@suse.de>
* Kevin Wolf <mail@kevin-wolf.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include <linux/kvm_host.h>
+#include <linux/pkeys.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
-#include <asm/mmu-hash64.h>
+#include <asm/book3s/64/mmu-hash.h>
#include <asm/machdep.h>
#include <asm/mmu_context.h>
#include <asm/hw_irq.h>
#include "trace_pr.h"
+#include "book3s.h"
#define PTE_SIZE 12
void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
{
- ppc_md.hpte_invalidate(pte->slot, pte->host_vpn,
- pte->pagesize, pte->pagesize, MMU_SEGSIZE_256M,
- false);
+ mmu_hash_ops.hpte_invalidate(pte->slot, pte->host_vpn,
+ pte->pagesize, pte->pagesize,
+ MMU_SEGSIZE_256M, false);
}
/* We keep 512 gvsid->hvsid entries, mapping the guest ones to the array using
@@ -82,7 +72,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte,
bool iswrite)
{
unsigned long vpn;
- pfn_t hpaddr;
+ kvm_pfn_t hpaddr;
ulong hash, hpteg;
u64 vsid;
int ret;
@@ -98,13 +88,14 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte,
struct hpte_cache *cpte;
unsigned long gfn = orig_pte->raddr >> PAGE_SHIFT;
unsigned long pfn;
+ struct page *page;
/* used to check for invalidations in progress */
- mmu_seq = kvm->mmu_notifier_seq;
+ mmu_seq = kvm->mmu_invalidate_seq;
smp_rmb();
/* Get host physical address for gpa */
- pfn = kvmppc_gpa_to_pfn(vcpu, orig_pte->raddr, iswrite, &writable);
+ pfn = kvmppc_gpa_to_pfn(vcpu, orig_pte->raddr, iswrite, &writable, &page);
if (is_error_noslot_pfn(pfn)) {
printk(KERN_INFO "Couldn't get guest page for gpa %lx!\n",
orig_pte->raddr);
@@ -131,19 +122,19 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte,
vpn = hpt_vpn(orig_pte->eaddr, map->host_vsid, MMU_SEGSIZE_256M);
- kvm_set_pfn_accessed(pfn);
if (!orig_pte->may_write || !writable)
rflags |= PP_RXRX;
- else {
+ else
mark_page_dirty(vcpu->kvm, gfn);
- kvm_set_pfn_dirty(pfn);
- }
if (!orig_pte->may_execute)
rflags |= HPTE_R_N;
else
kvmppc_mmu_flush_icache(pfn);
+ rflags |= pte_to_hpte_pkey_bits(0, HPTE_USE_KERNEL_KEY);
+ rflags = (rflags & ~HPTE_R_WIMG) | orig_pte->wimg;
+
/*
* Use 64K pages if possible; otherwise, on 64K page kernels,
* we need to transfer 4 more bits from guest real to host real addr.
@@ -158,7 +149,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte,
cpte = kvmppc_mmu_hpte_cache_next(vcpu);
spin_lock(&kvm->mmu_lock);
- if (!cpte || mmu_notifier_retry(kvm, mmu_seq)) {
+ if (!cpte || mmu_invalidate_retry(kvm, mmu_seq)) {
r = -EAGAIN;
goto out_unlock;
}
@@ -168,26 +159,31 @@ map_again:
/* In case we tried normal mapping already, let's nuke old entries */
if (attempt > 1)
- if (ppc_md.hpte_remove(hpteg) < 0) {
+ if (mmu_hash_ops.hpte_remove(hpteg) < 0) {
r = -1;
goto out_unlock;
}
- ret = ppc_md.hpte_insert(hpteg, vpn, hpaddr, rflags, vflags,
- hpsize, hpsize, MMU_SEGSIZE_256M);
+ ret = mmu_hash_ops.hpte_insert(hpteg, vpn, hpaddr, rflags, vflags,
+ hpsize, hpsize, MMU_SEGSIZE_256M);
- if (ret < 0) {
+ if (ret == -1) {
/* If we couldn't map a primary PTE, try a secondary */
hash = ~hash;
vflags ^= HPTE_V_SECONDARY;
attempt++;
goto map_again;
+ } else if (ret < 0) {
+ r = -EIO;
+ goto out_unlock;
} else {
trace_kvm_book3s_64_mmu_map(rflags, hpteg,
vpn, hpaddr, orig_pte);
- /* The ppc_md code may give us a secondary entry even though we
- asked for a primary. Fix up. */
+ /*
+ * The mmu_hash_ops code may give us a secondary entry even
+ * though we asked for a primary. Fix up.
+ */
if ((ret & _PTEIDX_SECONDARY) && !(vflags & HPTE_V_SECONDARY)) {
hash = ~hash;
hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
@@ -204,8 +200,10 @@ map_again:
}
out_unlock:
+ /* FIXME: Don't unconditionally pass unused=false. */
+ kvm_release_faultin_page(kvm, page, false,
+ orig_pte->may_write && writable);
spin_unlock(&kvm->mmu_lock);
- kvm_release_pfn_clean(pfn);
if (cpte)
kvmppc_mmu_hpte_cache_free(cpte);
@@ -226,10 +224,11 @@ void kvmppc_mmu_unmap_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
{
+ unsigned long vsid_bits = VSID_BITS_65_256M;
struct kvmppc_sid_map *map;
struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
u16 sid_map_mask;
- static int backwards_map = 0;
+ static int backwards_map;
if (kvmppc_get_msr(vcpu) & MSR_PR)
gvsid |= VSID_PR;
@@ -254,7 +253,12 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
kvmppc_mmu_pte_flush(vcpu, 0, 0);
kvmppc_mmu_flush_segments(vcpu);
}
- map->host_vsid = vsid_scramble(vcpu_book3s->proto_vsid_next++, 256M);
+
+ if (mmu_has_feature(MMU_FTR_68_BIT_VA))
+ vsid_bits = VSID_BITS_256M;
+
+ map->host_vsid = vsid_scramble(vcpu_book3s->proto_vsid_next++,
+ VSID_MULTIPLIER_256M, vsid_bits);
map->guest_vsid = gvsid;
map->valid = true;
@@ -382,12 +386,12 @@ void kvmppc_mmu_destroy_pr(struct kvm_vcpu *vcpu)
__destroy_context(to_book3s(vcpu)->context_id[0]);
}
-int kvmppc_mmu_init(struct kvm_vcpu *vcpu)
+int kvmppc_mmu_init_pr(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
int err;
- err = __init_new_context();
+ err = hash__alloc_context_id();
if (err < 0)
return -1;
vcpu3s->context_id[0] = err;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 72c20bb16d26..f305395cf26e 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -1,16 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
*/
@@ -27,136 +16,179 @@
#include <linux/srcu.h>
#include <linux/anon_inodes.h>
#include <linux/file.h>
+#include <linux/debugfs.h>
-#include <asm/tlbflush.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
-#include <asm/mmu-hash64.h>
+#include <asm/book3s/64/mmu-hash.h>
#include <asm/hvcall.h>
#include <asm/synch.h>
#include <asm/ppc-opcode.h>
#include <asm/cputable.h>
+#include <asm/pte-walk.h>
-/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
-#define MAX_LPID_970 63
+#include "book3s.h"
+#include "book3s_hv.h"
+#include "trace_hv.h"
-/* Power architecture requires HPT is at least 256kB */
-#define PPC_MIN_HPT_ORDER 18
+//#define DEBUG_RESIZE_HPT 1
+
+#ifdef DEBUG_RESIZE_HPT
+#define resize_hpt_debug(resize, ...) \
+ do { \
+ printk(KERN_DEBUG "RESIZE HPT %p: ", resize); \
+ printk(__VA_ARGS__); \
+ } while (0)
+#else
+#define resize_hpt_debug(resize, ...) \
+ do { } while (0)
+#endif
static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
long pte_index, unsigned long pteh,
unsigned long ptel, unsigned long *pte_idx_ret);
-static void kvmppc_rmap_reset(struct kvm *kvm);
-long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
+struct kvm_resize_hpt {
+ /* These fields read-only after init */
+ struct kvm *kvm;
+ struct work_struct work;
+ u32 order;
+
+ /* These fields protected by kvm->arch.mmu_setup_lock */
+
+ /* Possible values and their usage:
+ * <0 an error occurred during allocation,
+ * -EBUSY allocation is in the progress,
+ * 0 allocation made successfully.
+ */
+ int error;
+
+ /* Private to the work thread, until error != -EBUSY,
+ * then protected by kvm->arch.mmu_setup_lock.
+ */
+ struct kvm_hpt_info hpt;
+};
+
+int kvmppc_allocate_hpt(struct kvm_hpt_info *info, u32 order)
{
unsigned long hpt = 0;
- struct revmap_entry *rev;
+ int cma = 0;
struct page *page = NULL;
- long order = KVM_DEFAULT_HPT_ORDER;
+ struct revmap_entry *rev;
+ unsigned long npte;
- if (htab_orderp) {
- order = *htab_orderp;
- if (order < PPC_MIN_HPT_ORDER)
- order = PPC_MIN_HPT_ORDER;
- }
+ if ((order < PPC_MIN_HPT_ORDER) || (order > PPC_MAX_HPT_ORDER))
+ return -EINVAL;
- kvm->arch.hpt_cma_alloc = 0;
- page = kvm_alloc_hpt(1 << (order - PAGE_SHIFT));
+ page = kvm_alloc_hpt_cma(1ul << (order - PAGE_SHIFT));
if (page) {
hpt = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
- memset((void *)hpt, 0, (1 << order));
- kvm->arch.hpt_cma_alloc = 1;
+ memset((void *)hpt, 0, (1ul << order));
+ cma = 1;
}
- /* Lastly try successively smaller sizes from the page allocator */
- while (!hpt && order > PPC_MIN_HPT_ORDER) {
- hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|
- __GFP_NOWARN, order - PAGE_SHIFT);
- if (!hpt)
- --order;
- }
+ if (!hpt)
+ hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_RETRY_MAYFAIL
+ |__GFP_NOWARN, order - PAGE_SHIFT);
if (!hpt)
return -ENOMEM;
- kvm->arch.hpt_virt = hpt;
- kvm->arch.hpt_order = order;
/* HPTEs are 2**4 bytes long */
- kvm->arch.hpt_npte = 1ul << (order - 4);
- /* 128 (2**7) bytes in each HPTEG */
- kvm->arch.hpt_mask = (1ul << (order - 7)) - 1;
+ npte = 1ul << (order - 4);
/* Allocate reverse map array */
- rev = vmalloc(sizeof(struct revmap_entry) * kvm->arch.hpt_npte);
+ rev = vmalloc(array_size(npte, sizeof(struct revmap_entry)));
if (!rev) {
- pr_err("kvmppc_alloc_hpt: Couldn't alloc reverse map array\n");
- goto out_freehpt;
+ if (cma)
+ kvm_free_hpt_cma(page, 1 << (order - PAGE_SHIFT));
+ else
+ free_pages(hpt, order - PAGE_SHIFT);
+ return -ENOMEM;
}
- kvm->arch.revmap = rev;
- kvm->arch.sdr1 = __pa(hpt) | (order - 18);
- pr_info("KVM guest htab at %lx (order %ld), LPID %x\n",
- hpt, order, kvm->arch.lpid);
+ info->order = order;
+ info->virt = hpt;
+ info->cma = cma;
+ info->rev = rev;
- if (htab_orderp)
- *htab_orderp = order;
return 0;
+}
- out_freehpt:
- if (kvm->arch.hpt_cma_alloc)
- kvm_release_hpt(page, 1 << (order - PAGE_SHIFT));
- else
- free_pages(hpt, order - PAGE_SHIFT);
- return -ENOMEM;
+void kvmppc_set_hpt(struct kvm *kvm, struct kvm_hpt_info *info)
+{
+ atomic64_set(&kvm->arch.mmio_update, 0);
+ kvm->arch.hpt = *info;
+ kvm->arch.sdr1 = __pa(info->virt) | (info->order - 18);
+
+ pr_debug("KVM guest htab at %lx (order %ld), LPID %llx\n",
+ info->virt, (long)info->order, kvm->arch.lpid);
}
-long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
+int kvmppc_alloc_reset_hpt(struct kvm *kvm, int order)
{
- long err = -EBUSY;
- long order;
+ int err = -EBUSY;
+ struct kvm_hpt_info info;
- mutex_lock(&kvm->lock);
- if (kvm->arch.rma_setup_done) {
- kvm->arch.rma_setup_done = 0;
- /* order rma_setup_done vs. vcpus_running */
+ mutex_lock(&kvm->arch.mmu_setup_lock);
+ if (kvm->arch.mmu_ready) {
+ kvm->arch.mmu_ready = 0;
+ /* order mmu_ready vs. vcpus_running */
smp_mb();
if (atomic_read(&kvm->arch.vcpus_running)) {
- kvm->arch.rma_setup_done = 1;
+ kvm->arch.mmu_ready = 1;
goto out;
}
}
- if (kvm->arch.hpt_virt) {
- order = kvm->arch.hpt_order;
+ if (kvm_is_radix(kvm)) {
+ err = kvmppc_switch_mmu_to_hpt(kvm);
+ if (err)
+ goto out;
+ }
+
+ if (kvm->arch.hpt.order == order) {
+ /* We already have a suitable HPT */
+
/* Set the entire HPT to 0, i.e. invalid HPTEs */
- memset((void *)kvm->arch.hpt_virt, 0, 1ul << order);
+ memset((void *)kvm->arch.hpt.virt, 0, 1ul << order);
/*
* Reset all the reverse-mapping chains for all memslots
*/
kvmppc_rmap_reset(kvm);
- /* Ensure that each vcpu will flush its TLB on next entry. */
- cpumask_setall(&kvm->arch.need_tlb_flush);
- *htab_orderp = order;
err = 0;
- } else {
- err = kvmppc_alloc_hpt(kvm, htab_orderp);
- order = *htab_orderp;
+ goto out;
}
- out:
- mutex_unlock(&kvm->lock);
+
+ if (kvm->arch.hpt.virt) {
+ kvmppc_free_hpt(&kvm->arch.hpt);
+ kvmppc_rmap_reset(kvm);
+ }
+
+ err = kvmppc_allocate_hpt(&info, order);
+ if (err < 0)
+ goto out;
+ kvmppc_set_hpt(kvm, &info);
+
+out:
+ if (err == 0)
+ /* Ensure that each vcpu will flush its TLB on next entry. */
+ cpumask_setall(&kvm->arch.need_tlb_flush);
+
+ mutex_unlock(&kvm->arch.mmu_setup_lock);
return err;
}
-void kvmppc_free_hpt(struct kvm *kvm)
+void kvmppc_free_hpt(struct kvm_hpt_info *info)
{
- kvmppc_free_lpid(kvm->arch.lpid);
- vfree(kvm->arch.revmap);
- if (kvm->arch.hpt_cma_alloc)
- kvm_release_hpt(virt_to_page(kvm->arch.hpt_virt),
- 1 << (kvm->arch.hpt_order - PAGE_SHIFT));
- else
- free_pages(kvm->arch.hpt_virt,
- kvm->arch.hpt_order - PAGE_SHIFT);
+ vfree(info->rev);
+ info->rev = NULL;
+ if (info->cma)
+ kvm_free_hpt_cma(virt_to_page((void *)info->virt),
+ 1 << (info->order - PAGE_SHIFT));
+ else if (info->virt)
+ free_pages(info->virt, info->order - PAGE_SHIFT);
+ info->virt = 0;
+ info->order = 0;
}
/* Bits in first HPTE dword for pagesize 4k, 64k or 16M */
@@ -191,8 +223,8 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
if (npages > 1ul << (40 - porder))
npages = 1ul << (40 - porder);
/* Can't use more than 1 HPTE per HPTEG */
- if (npages > kvm->arch.hpt_mask + 1)
- npages = kvm->arch.hpt_mask + 1;
+ if (npages > kvmppc_hpt_mask(&kvm->arch.hpt) + 1)
+ npages = kvmppc_hpt_mask(&kvm->arch.hpt) + 1;
hp0 = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) |
HPTE_V_BOLTED | hpte0_pgsize_encoding(psize);
@@ -202,7 +234,8 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
for (i = 0; i < npages; ++i) {
addr = i << porder;
/* can't use hpt_hash since va > 64 bits */
- hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & kvm->arch.hpt_mask;
+ hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25)))
+ & kvmppc_hpt_mask(&kvm->arch.hpt);
/*
* We assume that the hash table is empty and no
* vcpus are using it at this stage. Since we create
@@ -224,170 +257,48 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
int kvmppc_mmu_hv_init(void)
{
- unsigned long host_lpid, rsvd_lpid;
+ unsigned long nr_lpids;
- if (!cpu_has_feature(CPU_FTR_HVMODE))
+ if (!mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE))
return -EINVAL;
- /* POWER7 has 10-bit LPIDs, PPC970 and e500mc have 6-bit LPIDs */
- if (cpu_has_feature(CPU_FTR_ARCH_206)) {
- host_lpid = mfspr(SPRN_LPID); /* POWER7 */
- rsvd_lpid = LPID_RSVD;
+ if (cpu_has_feature(CPU_FTR_HVMODE)) {
+ if (WARN_ON(mfspr(SPRN_LPID) != 0))
+ return -EINVAL;
+ nr_lpids = 1UL << mmu_lpid_bits;
} else {
- host_lpid = 0; /* PPC970 */
- rsvd_lpid = MAX_LPID_970;
+ nr_lpids = 1UL << KVM_MAX_NESTED_GUESTS_SHIFT;
}
- kvmppc_init_lpid(rsvd_lpid + 1);
-
- kvmppc_claim_lpid(host_lpid);
- /* rsvd_lpid is reserved for use in partition switching */
- kvmppc_claim_lpid(rsvd_lpid);
-
- return 0;
-}
-
-static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
-{
- unsigned long msr = vcpu->arch.intr_msr;
-
- /* If transactional, change to suspend mode on IRQ delivery */
- if (MSR_TM_TRANSACTIONAL(vcpu->arch.shregs.msr))
- msr |= MSR_TS_S;
- else
- msr |= vcpu->arch.shregs.msr & MSR_TS_MASK;
- kvmppc_set_msr(vcpu, msr);
-}
-
-/*
- * This is called to get a reference to a guest page if there isn't
- * one already in the memslot->arch.slot_phys[] array.
- */
-static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
- struct kvm_memory_slot *memslot,
- unsigned long psize)
-{
- unsigned long start;
- long np, err;
- struct page *page, *hpage, *pages[1];
- unsigned long s, pgsize;
- unsigned long *physp;
- unsigned int is_io, got, pgorder;
- struct vm_area_struct *vma;
- unsigned long pfn, i, npages;
-
- physp = memslot->arch.slot_phys;
- if (!physp)
- return -EINVAL;
- if (physp[gfn - memslot->base_gfn])
- return 0;
-
- is_io = 0;
- got = 0;
- page = NULL;
- pgsize = psize;
- err = -EINVAL;
- start = gfn_to_hva_memslot(memslot, gfn);
-
- /* Instantiate and get the page we want access to */
- np = get_user_pages_fast(start, 1, 1, pages);
- if (np != 1) {
- /* Look up the vma for the page */
- down_read(&current->mm->mmap_sem);
- vma = find_vma(current->mm, start);
- if (!vma || vma->vm_start > start ||
- start + psize > vma->vm_end ||
- !(vma->vm_flags & VM_PFNMAP))
- goto up_err;
- is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot));
- pfn = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
- /* check alignment of pfn vs. requested page size */
- if (psize > PAGE_SIZE && (pfn & ((psize >> PAGE_SHIFT) - 1)))
- goto up_err;
- up_read(&current->mm->mmap_sem);
-
- } else {
- page = pages[0];
- got = KVMPPC_GOT_PAGE;
-
- /* See if this is a large page */
- s = PAGE_SIZE;
- if (PageHuge(page)) {
- hpage = compound_head(page);
- s <<= compound_order(hpage);
- /* Get the whole large page if slot alignment is ok */
- if (s > psize && slot_is_aligned(memslot, s) &&
- !(memslot->userspace_addr & (s - 1))) {
- start &= ~(s - 1);
- pgsize = s;
- get_page(hpage);
- put_page(page);
- page = hpage;
- }
- }
- if (s < psize)
- goto out;
- pfn = page_to_pfn(page);
- }
+ if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
+ /* POWER7 has 10-bit LPIDs, POWER8 has 12-bit LPIDs */
+ if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ WARN_ON(nr_lpids != 1UL << 12);
+ else
+ WARN_ON(nr_lpids != 1UL << 10);
- npages = pgsize >> PAGE_SHIFT;
- pgorder = __ilog2(npages);
- physp += (gfn - memslot->base_gfn) & ~(npages - 1);
- spin_lock(&kvm->arch.slot_phys_lock);
- for (i = 0; i < npages; ++i) {
- if (!physp[i]) {
- physp[i] = ((pfn + i) << PAGE_SHIFT) +
- got + is_io + pgorder;
- got = 0;
- }
+ /*
+ * Reserve the last implemented LPID use in partition
+ * switching for POWER7 and POWER8.
+ */
+ nr_lpids -= 1;
}
- spin_unlock(&kvm->arch.slot_phys_lock);
- err = 0;
- out:
- if (got)
- put_page(page);
- return err;
+ kvmppc_init_lpid(nr_lpids);
- up_err:
- up_read(&current->mm->mmap_sem);
- return err;
+ return 0;
}
-long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
+static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
long pte_index, unsigned long pteh,
unsigned long ptel, unsigned long *pte_idx_ret)
{
- unsigned long psize, gpa, gfn;
- struct kvm_memory_slot *memslot;
long ret;
- if (kvm->arch.using_mmu_notifiers)
- goto do_insert;
-
- psize = hpte_page_size(pteh, ptel);
- if (!psize)
- return H_PARAMETER;
-
- pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID);
-
- /* Find the memslot (if any) for this address */
- gpa = (ptel & HPTE_R_RPN) & ~(psize - 1);
- gfn = gpa >> PAGE_SHIFT;
- memslot = gfn_to_memslot(kvm, gfn);
- if (memslot && !(memslot->flags & KVM_MEMSLOT_INVALID)) {
- if (!slot_is_aligned(memslot, psize))
- return H_PARAMETER;
- if (kvmppc_get_guest_page(kvm, gfn, memslot, psize) < 0)
- return H_PARAMETER;
- }
-
- do_insert:
- /* Protect linux PTE lookup from page table destruction */
- rcu_read_lock_sched(); /* this disables preemption too */
+ preempt_disable();
ret = kvmppc_do_h_enter(kvm, flags, pte_index, pteh, ptel,
- current->mm->pgd, false, pte_idx_ret);
- rcu_read_unlock_sched();
+ kvm->mm->pgd, false, pte_idx_ret);
+ preempt_enable();
if (ret == H_TOO_HARD) {
/* this can't happen */
pr_err("KVM: Oops, kvmppc_h_enter returned too hard!\n");
@@ -397,19 +308,6 @@ long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
}
-/*
- * We come here on a H_ENTER call from the guest when we are not
- * using mmu notifiers and we don't have the requested page pinned
- * already.
- */
-long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
- long pte_index, unsigned long pteh,
- unsigned long ptel)
-{
- return kvmppc_virtmode_do_h_enter(vcpu->kvm, flags, pte_index,
- pteh, ptel, &vcpu->arch.gpr[4]);
-}
-
static struct kvmppc_slb *kvmppc_mmu_book3s_hv_find_slbe(struct kvm_vcpu *vcpu,
gva_t eaddr)
{
@@ -436,7 +334,7 @@ static unsigned long kvmppc_mmu_get_real_addr(unsigned long v, unsigned long r,
{
unsigned long ra_mask;
- ra_mask = hpte_page_size(v, r) - 1;
+ ra_mask = kvmppc_actual_pgsz(v, r) - 1;
return (r & HPTE_R_RPN & ~ra_mask) | (ea & ra_mask);
}
@@ -447,10 +345,13 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
struct kvmppc_slb *slbe;
unsigned long slb_v;
unsigned long pp, key;
- unsigned long v, gr;
+ unsigned long v, orig_v, gr;
__be64 *hptep;
- int index;
- int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR);
+ long int index;
+ int virtmode = __kvmppc_get_msr_hv(vcpu) & (data ? MSR_DR : MSR_IR);
+
+ if (kvm_is_radix(vcpu->kvm))
+ return kvmppc_mmu_radix_xlate(vcpu, eaddr, gpte, data, iswrite);
/* Get SLB entry */
if (virtmode) {
@@ -471,13 +372,13 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
preempt_enable();
return -ENOENT;
}
- hptep = (__be64 *)(kvm->arch.hpt_virt + (index << 4));
- v = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK;
- gr = kvm->arch.revmap[index].guest_rpte;
+ hptep = (__be64 *)(kvm->arch.hpt.virt + (index << 4));
+ v = orig_v = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK;
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ v = hpte_new_to_old_v(v, be64_to_cpu(hptep[1]));
+ gr = kvm->arch.hpt.rev[index].guest_rpte;
- /* Unlock the HPTE */
- asm volatile("lwsync" : : : "memory");
- hptep[0] = cpu_to_be64(v);
+ unlock_hpte(hptep, orig_v);
preempt_enable();
gpte->eaddr = eaddr;
@@ -485,7 +386,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
/* Get PP bits and key for permission check */
pp = gr & (HPTE_R_PP0 | HPTE_R_PP);
- key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS;
+ key = (__kvmppc_get_msr_hv(vcpu) & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS;
key &= slb_v;
/* Calculate permissions */
@@ -494,7 +395,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
gpte->may_execute = gpte->may_read && !(gr & (HPTE_R_N | HPTE_R_G));
/* Storage key permission check for POWER7 */
- if (data && virtmode && cpu_has_feature(CPU_FTR_ARCH_206)) {
+ if (data && virtmode) {
int amrfield = hpte_get_skey_perm(gr, vcpu->arch.amr);
if (amrfield & 1)
gpte->may_read = 0;
@@ -515,20 +416,43 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
* embodied here.) If the instruction isn't a load or store, then
* this doesn't return anything useful.
*/
-static int instruction_is_store(unsigned int instr)
+static int instruction_is_store(ppc_inst_t instr)
{
unsigned int mask;
+ unsigned int suffix;
mask = 0x10000000;
- if ((instr & 0xfc000000) == 0x7c000000)
+ suffix = ppc_inst_val(instr);
+ if (ppc_inst_prefixed(instr))
+ suffix = ppc_inst_suffix(instr);
+ else if ((suffix & 0xfc000000) == 0x7c000000)
mask = 0x100; /* major opcode 31 */
- return (instr & mask) != 0;
+ return (suffix & mask) != 0;
}
-static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu,
- unsigned long gpa, gva_t ea, int is_store)
+int kvmppc_hv_emulate_mmio(struct kvm_vcpu *vcpu,
+ unsigned long gpa, gva_t ea, int is_store)
{
- u32 last_inst;
+ ppc_inst_t last_inst;
+ bool is_prefixed = !!(kvmppc_get_msr(vcpu) & SRR1_PREFIXED);
+
+ /*
+ * Fast path - check if the guest physical address corresponds to a
+ * device on the FAST_MMIO_BUS, if so we can avoid loading the
+ * instruction all together, then we can just handle it and return.
+ */
+ if (is_store) {
+ int idx, ret;
+
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ ret = kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, (gpa_t) gpa, 0,
+ NULL);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ if (!ret) {
+ kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + (is_prefixed ? 8 : 4));
+ return RESUME_GUEST;
+ }
+ }
/*
* If we fail, we just return to the guest and try executing it again.
@@ -540,7 +464,16 @@ static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu,
/*
* WARNING: We do not know for sure whether the instruction we just
* read from memory is the same that caused the fault in the first
- * place. If the instruction we read is neither an load or a store,
+ * place.
+ *
+ * If the fault is prefixed but the instruction is not or vice
+ * versa, try again so that we don't advance pc the wrong amount.
+ */
+ if (ppc_inst_prefixed(last_inst) != is_prefixed)
+ return RESUME_GUEST;
+
+ /*
+ * If the instruction we read is neither an load or a store,
* then it can't access memory, so we don't need to worry about
* enforcing access permissions. So, assuming it is a load or
* store, we just check that its direction (load or store) is
@@ -567,27 +500,33 @@ static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu,
vcpu->arch.paddr_accessed = gpa;
vcpu->arch.vaddr_accessed = ea;
- return kvmppc_emulate_mmio(run, vcpu);
+ return kvmppc_emulate_mmio(vcpu);
}
-int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
+int kvmppc_book3s_hv_page_fault(struct kvm_vcpu *vcpu,
unsigned long ea, unsigned long dsisr)
{
struct kvm *kvm = vcpu->kvm;
unsigned long hpte[3], r;
+ unsigned long hnow_v, hnow_r;
__be64 *hptep;
unsigned long mmu_seq, psize, pte_size;
unsigned long gpa_base, gfn_base;
- unsigned long gpa, gfn, hva, pfn;
+ unsigned long gpa, gfn, hva, pfn, hpa;
struct kvm_memory_slot *memslot;
unsigned long *rmap;
struct revmap_entry *rev;
- struct page *page, *pages[1];
- long index, ret, npages;
- unsigned long is_io;
- unsigned int writing, write_ok;
- struct vm_area_struct *vma;
+ struct page *page;
+ long index, ret;
+ bool is_ci;
+ bool writing, write_ok;
+ unsigned int shift;
unsigned long rcbits;
+ long mmio_update;
+ pte_t pte, *ptep;
+
+ if (kvm_is_radix(kvm))
+ return kvmppc_book3s_radix_page_fault(vcpu, ea, dsisr);
/*
* Real-mode code has already searched the HPT and found the
@@ -597,39 +536,55 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
*/
if (ea != vcpu->arch.pgfault_addr)
return RESUME_GUEST;
+
+ if (vcpu->arch.pgfault_cache) {
+ mmio_update = atomic64_read(&kvm->arch.mmio_update);
+ if (mmio_update == vcpu->arch.pgfault_cache->mmio_update) {
+ r = vcpu->arch.pgfault_cache->rpte;
+ psize = kvmppc_actual_pgsz(vcpu->arch.pgfault_hpte[0],
+ r);
+ gpa_base = r & HPTE_R_RPN & ~(psize - 1);
+ gfn_base = gpa_base >> PAGE_SHIFT;
+ gpa = gpa_base | (ea & (psize - 1));
+ return kvmppc_hv_emulate_mmio(vcpu, gpa, ea,
+ dsisr & DSISR_ISSTORE);
+ }
+ }
index = vcpu->arch.pgfault_index;
- hptep = (__be64 *)(kvm->arch.hpt_virt + (index << 4));
- rev = &kvm->arch.revmap[index];
+ hptep = (__be64 *)(kvm->arch.hpt.virt + (index << 4));
+ rev = &kvm->arch.hpt.rev[index];
preempt_disable();
while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
cpu_relax();
hpte[0] = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK;
hpte[1] = be64_to_cpu(hptep[1]);
hpte[2] = r = rev->guest_rpte;
- asm volatile("lwsync" : : : "memory");
- hptep[0] = cpu_to_be64(hpte[0]);
+ unlock_hpte(hptep, hpte[0]);
preempt_enable();
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ hpte[0] = hpte_new_to_old_v(hpte[0], hpte[1]);
+ hpte[1] = hpte_new_to_old_r(hpte[1]);
+ }
if (hpte[0] != vcpu->arch.pgfault_hpte[0] ||
hpte[1] != vcpu->arch.pgfault_hpte[1])
return RESUME_GUEST;
/* Translate the logical address and get the page */
- psize = hpte_page_size(hpte[0], r);
+ psize = kvmppc_actual_pgsz(hpte[0], r);
gpa_base = r & HPTE_R_RPN & ~(psize - 1);
gfn_base = gpa_base >> PAGE_SHIFT;
gpa = gpa_base | (ea & (psize - 1));
gfn = gpa >> PAGE_SHIFT;
memslot = gfn_to_memslot(kvm, gfn);
+ trace_kvm_page_fault_enter(vcpu, hpte, memslot, ea, dsisr);
+
/* No memslot means it's an emulated MMIO region */
if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
- return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea,
+ return kvmppc_hv_emulate_mmio(vcpu, gpa, ea,
dsisr & DSISR_ISSTORE);
- if (!kvm->arch.using_mmu_notifiers)
- return -EFAULT; /* should never get here */
-
/*
* This should never happen, because of the slot_is_aligned()
* check in kvmppc_do_h_enter().
@@ -638,70 +593,55 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
return -EFAULT;
/* used to check for invalidations in progress */
- mmu_seq = kvm->mmu_notifier_seq;
+ mmu_seq = kvm->mmu_invalidate_seq;
smp_rmb();
- is_io = 0;
- pfn = 0;
+ ret = -EFAULT;
page = NULL;
- pte_size = PAGE_SIZE;
writing = (dsisr & DSISR_ISSTORE) != 0;
/* If writing != 0, then the HPTE must allow writing, if we get here */
write_ok = writing;
hva = gfn_to_hva_memslot(memslot, gfn);
- npages = get_user_pages_fast(hva, 1, writing, pages);
- if (npages < 1) {
- /* Check if it's an I/O mapping */
- down_read(&current->mm->mmap_sem);
- vma = find_vma(current->mm, hva);
- if (vma && vma->vm_start <= hva && hva + psize <= vma->vm_end &&
- (vma->vm_flags & VM_PFNMAP)) {
- pfn = vma->vm_pgoff +
- ((hva - vma->vm_start) >> PAGE_SHIFT);
- pte_size = psize;
- is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot));
- write_ok = vma->vm_flags & VM_WRITE;
- }
- up_read(&current->mm->mmap_sem);
- if (!pfn)
- return -EFAULT;
- } else {
- page = pages[0];
- pfn = page_to_pfn(page);
- if (PageHuge(page)) {
- page = compound_head(page);
- pte_size <<= compound_order(page);
- }
- /* if the guest wants write access, see if that is OK */
- if (!writing && hpte_is_writable(r)) {
- unsigned int hugepage_shift;
- pte_t *ptep, pte;
-
- /*
- * We need to protect against page table destruction
- * while looking up and updating the pte.
- */
- rcu_read_lock_sched();
- ptep = find_linux_pte_or_hugepte(current->mm->pgd,
- hva, &hugepage_shift);
- if (ptep) {
- pte = kvmppc_read_update_linux_pte(ptep, 1,
- hugepage_shift);
- if (pte_write(pte))
- write_ok = 1;
- }
- rcu_read_unlock_sched();
- }
+
+ pfn = __kvm_faultin_pfn(memslot, gfn, writing ? FOLL_WRITE : 0,
+ &write_ok, &page);
+ if (is_error_noslot_pfn(pfn))
+ return -EFAULT;
+
+ /*
+ * Read the PTE from the process' radix tree and use that
+ * so we get the shift and attribute bits.
+ */
+ spin_lock(&kvm->mmu_lock);
+ ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &shift);
+ pte = __pte(0);
+ if (ptep)
+ pte = READ_ONCE(*ptep);
+ spin_unlock(&kvm->mmu_lock);
+ /*
+ * If the PTE disappeared temporarily due to a THP
+ * collapse, just return and let the guest try again.
+ */
+ if (!pte_present(pte)) {
+ if (page)
+ put_page(page);
+ return RESUME_GUEST;
}
+ hpa = pte_pfn(pte) << PAGE_SHIFT;
+ pte_size = PAGE_SIZE;
+ if (shift)
+ pte_size = 1ul << shift;
+ is_ci = pte_ci(pte);
- ret = -EFAULT;
if (psize > pte_size)
goto out_put;
+ if (pte_size > psize)
+ hpa |= hva & (pte_size - psize);
/* Check WIMG vs. the actual page we're accessing */
- if (!hpte_cache_flags_ok(r, is_io)) {
- if (is_io)
- return -EFAULT;
+ if (!hpte_cache_flags_ok(r, is_ci)) {
+ if (is_ci)
+ goto out_put;
/*
* Allow guest to map emulated device memory as
* uncacheable, but actually make it cacheable.
@@ -710,22 +650,37 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
}
/*
- * Set the HPTE to point to pfn.
- * Since the pfn is at PAGE_SIZE granularity, make sure we
+ * Set the HPTE to point to hpa.
+ * Since the hpa is at PAGE_SIZE granularity, make sure we
* don't mask out lower-order bits if psize < PAGE_SIZE.
*/
if (psize < PAGE_SIZE)
psize = PAGE_SIZE;
- r = (r & ~(HPTE_R_PP0 - psize)) | ((pfn << PAGE_SHIFT) & ~(psize - 1));
+ r = (r & HPTE_R_KEY_HI) | (r & ~(HPTE_R_PP0 - psize)) | hpa;
if (hpte_is_writable(r) && !write_ok)
r = hpte_make_readonly(r);
ret = RESUME_GUEST;
preempt_disable();
while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
cpu_relax();
- if ((be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK) != hpte[0] ||
- be64_to_cpu(hptep[1]) != hpte[1] ||
- rev->guest_rpte != hpte[2])
+ hnow_v = be64_to_cpu(hptep[0]);
+ hnow_r = be64_to_cpu(hptep[1]);
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ hnow_v = hpte_new_to_old_v(hnow_v, hnow_r);
+ hnow_r = hpte_new_to_old_r(hnow_r);
+ }
+
+ /*
+ * If the HPT is being resized, don't update the HPTE,
+ * instead let the guest retry after the resize operation is complete.
+ * The synchronization for mmu_ready test vs. set is provided
+ * by the HPTE lock.
+ */
+ if (!kvm->arch.mmu_ready)
+ goto out_unlock;
+
+ if ((hnow_v & ~HPTE_V_HVLOCK) != hpte[0] || hnow_r != hpte[1] ||
+ rev->guest_rpte != hpte[2])
/* HPTE has been changed under us; let the guest retry */
goto out_unlock;
hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
@@ -736,7 +691,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
/* Check if we might have been invalidated; let the guest retry if so */
ret = RESUME_GUEST;
- if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) {
+ if (mmu_invalidate_retry(vcpu->kvm, mmu_seq)) {
unlock_rmap(rmap);
goto out_unlock;
}
@@ -756,106 +711,104 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0);
}
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ r = hpte_old_to_new_r(hpte[0], r);
+ hpte[0] = hpte_old_to_new_v(hpte[0]);
+ }
hptep[1] = cpu_to_be64(r);
eieio();
- hptep[0] = cpu_to_be64(hpte[0]);
+ __unlock_hpte(hptep, hpte[0]);
asm volatile("ptesync" : : : "memory");
preempt_enable();
if (page && hpte_is_writable(r))
- SetPageDirty(page);
+ set_page_dirty_lock(page);
out_put:
- if (page) {
- /*
- * We drop pages[0] here, not page because page might
- * have been set to the head page of a compound, but
- * we have to drop the reference on the correct tail
- * page to match the get inside gup()
- */
- put_page(pages[0]);
- }
+ trace_kvm_page_fault_exit(vcpu, hpte, ret);
+
+ if (page)
+ put_page(page);
return ret;
out_unlock:
- hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
+ __unlock_hpte(hptep, be64_to_cpu(hptep[0]));
preempt_enable();
goto out_put;
}
-static void kvmppc_rmap_reset(struct kvm *kvm)
+void kvmppc_rmap_reset(struct kvm *kvm)
{
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
- int srcu_idx;
+ int srcu_idx, bkt;
srcu_idx = srcu_read_lock(&kvm->srcu);
- slots = kvm->memslots;
- kvm_for_each_memslot(memslot, slots) {
+ slots = kvm_memslots(kvm);
+ kvm_for_each_memslot(memslot, bkt, slots) {
+ /* Mutual exclusion with kvm_unmap_hva_range etc. */
+ spin_lock(&kvm->mmu_lock);
/*
* This assumes it is acceptable to lose reference and
* change bits across a reset.
*/
memset(memslot->arch.rmap, 0,
memslot->npages * sizeof(*memslot->arch.rmap));
+ spin_unlock(&kvm->mmu_lock);
}
srcu_read_unlock(&kvm->srcu, srcu_idx);
}
-static int kvm_handle_hva_range(struct kvm *kvm,
- unsigned long start,
- unsigned long end,
- int (*handler)(struct kvm *kvm,
- unsigned long *rmapp,
- unsigned long gfn))
+/* Must be called with both HPTE and rmap locked */
+static void kvmppc_unmap_hpte(struct kvm *kvm, unsigned long i,
+ struct kvm_memory_slot *memslot,
+ unsigned long *rmapp, unsigned long gfn)
{
- int ret;
- int retval = 0;
- struct kvm_memslots *slots;
- struct kvm_memory_slot *memslot;
-
- slots = kvm_memslots(kvm);
- kvm_for_each_memslot(memslot, slots) {
- unsigned long hva_start, hva_end;
- gfn_t gfn, gfn_end;
-
- hva_start = max(start, memslot->userspace_addr);
- hva_end = min(end, memslot->userspace_addr +
- (memslot->npages << PAGE_SHIFT));
- if (hva_start >= hva_end)
- continue;
- /*
- * {gfn(page) | page intersects with [hva_start, hva_end)} =
- * {gfn, gfn+1, ..., gfn_end-1}.
- */
- gfn = hva_to_gfn_memslot(hva_start, memslot);
- gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
+ __be64 *hptep = (__be64 *) (kvm->arch.hpt.virt + (i << 4));
+ struct revmap_entry *rev = kvm->arch.hpt.rev;
+ unsigned long j, h;
+ unsigned long ptel, psize, rcbits;
- for (; gfn < gfn_end; ++gfn) {
- gfn_t gfn_offset = gfn - memslot->base_gfn;
+ j = rev[i].forw;
+ if (j == i) {
+ /* chain is now empty */
+ *rmapp &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX);
+ } else {
+ /* remove i from chain */
+ h = rev[i].back;
+ rev[h].forw = j;
+ rev[j].back = h;
+ rev[i].forw = rev[i].back = i;
+ *rmapp = (*rmapp & ~KVMPPC_RMAP_INDEX) | j;
+ }
- ret = handler(kvm, &memslot->arch.rmap[gfn_offset], gfn);
- retval |= ret;
+ /* Now check and modify the HPTE */
+ ptel = rev[i].guest_rpte;
+ psize = kvmppc_actual_pgsz(be64_to_cpu(hptep[0]), ptel);
+ if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) &&
+ hpte_rpn(ptel, psize) == gfn) {
+ hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
+ kvmppc_invalidate_hpte(kvm, hptep, i);
+ hptep[1] &= ~cpu_to_be64(HPTE_R_KEY_HI | HPTE_R_KEY_LO);
+ /* Harvest R and C */
+ rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C);
+ *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
+ if ((rcbits & HPTE_R_C) && memslot->dirty_bitmap)
+ kvmppc_update_dirty_map(memslot, gfn, psize);
+ if (rcbits & ~rev[i].guest_rpte) {
+ rev[i].guest_rpte = ptel | rcbits;
+ note_hpte_modification(kvm, &rev[i]);
}
}
-
- return retval;
}
-static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
- int (*handler)(struct kvm *kvm, unsigned long *rmapp,
- unsigned long gfn))
+static void kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
+ unsigned long gfn)
{
- return kvm_handle_hva_range(kvm, hva, hva + 1, handler);
-}
-
-static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
- unsigned long gfn)
-{
- struct revmap_entry *rev = kvm->arch.revmap;
- unsigned long h, i, j;
+ unsigned long i;
__be64 *hptep;
- unsigned long ptel, psize, rcbits;
+ unsigned long *rmapp;
+ rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
for (;;) {
lock_rmap(rmapp);
if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
@@ -869,7 +822,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
* rmap chain lock.
*/
i = *rmapp & KVMPPC_RMAP_INDEX;
- hptep = (__be64 *) (kvm->arch.hpt_virt + (i << 4));
+ hptep = (__be64 *) (kvm->arch.hpt.virt + (i << 4));
if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
/* unlock rmap before spinning on the HPTE lock */
unlock_rmap(rmapp);
@@ -877,65 +830,43 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
cpu_relax();
continue;
}
- j = rev[i].forw;
- if (j == i) {
- /* chain is now empty */
- *rmapp &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX);
- } else {
- /* remove i from chain */
- h = rev[i].back;
- rev[h].forw = j;
- rev[j].back = h;
- rev[i].forw = rev[i].back = i;
- *rmapp = (*rmapp & ~KVMPPC_RMAP_INDEX) | j;
- }
- /* Now check and modify the HPTE */
- ptel = rev[i].guest_rpte;
- psize = hpte_page_size(be64_to_cpu(hptep[0]), ptel);
- if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) &&
- hpte_rpn(ptel, psize) == gfn) {
- if (kvm->arch.using_mmu_notifiers)
- hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
- kvmppc_invalidate_hpte(kvm, hptep, i);
- /* Harvest R and C */
- rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C);
- *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
- if (rcbits & ~rev[i].guest_rpte) {
- rev[i].guest_rpte = ptel | rcbits;
- note_hpte_modification(kvm, &rev[i]);
- }
- }
+ kvmppc_unmap_hpte(kvm, i, memslot, rmapp, gfn);
unlock_rmap(rmapp);
- hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
+ __unlock_hpte(hptep, be64_to_cpu(hptep[0]));
}
- return 0;
}
-int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva)
+bool kvm_unmap_gfn_range_hv(struct kvm *kvm, struct kvm_gfn_range *range)
{
- if (kvm->arch.using_mmu_notifiers)
- kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
- return 0;
-}
+ gfn_t gfn;
-int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end)
-{
- if (kvm->arch.using_mmu_notifiers)
- kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp);
- return 0;
+ if (kvm_is_radix(kvm)) {
+ for (gfn = range->start; gfn < range->end; gfn++)
+ kvm_unmap_radix(kvm, range->slot, gfn);
+ } else {
+ for (gfn = range->start; gfn < range->end; gfn++)
+ kvm_unmap_rmapp(kvm, range->slot, gfn);
+ }
+
+ return false;
}
void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
struct kvm_memory_slot *memslot)
{
- unsigned long *rmapp;
unsigned long gfn;
unsigned long n;
+ unsigned long *rmapp;
- rmapp = memslot->arch.rmap;
gfn = memslot->base_gfn;
- for (n = memslot->npages; n; --n) {
+ rmapp = memslot->arch.rmap;
+ if (kvm_is_radix(kvm)) {
+ kvmppc_radix_flush_memslot(kvm, memslot);
+ return;
+ }
+
+ for (n = memslot->npages; n; --n, ++gfn) {
/*
* Testing the present bit without locking is OK because
* the memslot has been marked invalid already, and hence
@@ -943,25 +874,26 @@ void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
* thus the present bit can't go from 0 to 1.
*/
if (*rmapp & KVMPPC_RMAP_PRESENT)
- kvm_unmap_rmapp(kvm, rmapp, gfn);
+ kvm_unmap_rmapp(kvm, memslot, gfn);
++rmapp;
- ++gfn;
}
}
-static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
- unsigned long gfn)
+static bool kvm_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
+ unsigned long gfn)
{
- struct revmap_entry *rev = kvm->arch.revmap;
+ struct revmap_entry *rev = kvm->arch.hpt.rev;
unsigned long head, i, j;
__be64 *hptep;
- int ret = 0;
+ bool ret = false;
+ unsigned long *rmapp;
+ rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
retry:
lock_rmap(rmapp);
if (*rmapp & KVMPPC_RMAP_REFERENCED) {
*rmapp &= ~KVMPPC_RMAP_REFERENCED;
- ret = 1;
+ ret = true;
}
if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
unlock_rmap(rmapp);
@@ -970,7 +902,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
i = head = *rmapp & KVMPPC_RMAP_INDEX;
do {
- hptep = (__be64 *) (kvm->arch.hpt_virt + (i << 4));
+ hptep = (__be64 *) (kvm->arch.hpt.virt + (i << 4));
j = rev[i].forw;
/* If this HPTE isn't referenced, ignore it */
@@ -993,32 +925,43 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
rev[i].guest_rpte |= HPTE_R_R;
note_hpte_modification(kvm, &rev[i]);
}
- ret = 1;
+ ret = true;
}
- hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
+ __unlock_hpte(hptep, be64_to_cpu(hptep[0]));
} while ((i = j) != head);
unlock_rmap(rmapp);
return ret;
}
-int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva)
+bool kvm_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range)
{
- if (!kvm->arch.using_mmu_notifiers)
- return 0;
- return kvm_handle_hva(kvm, hva, kvm_age_rmapp);
+ gfn_t gfn;
+ bool ret = false;
+
+ if (kvm_is_radix(kvm)) {
+ for (gfn = range->start; gfn < range->end; gfn++)
+ ret |= kvm_age_radix(kvm, range->slot, gfn);
+ } else {
+ for (gfn = range->start; gfn < range->end; gfn++)
+ ret |= kvm_age_rmapp(kvm, range->slot, gfn);
+ }
+
+ return ret;
}
-static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
- unsigned long gfn)
+static bool kvm_test_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
+ unsigned long gfn)
{
- struct revmap_entry *rev = kvm->arch.revmap;
+ struct revmap_entry *rev = kvm->arch.hpt.rev;
unsigned long head, i, j;
unsigned long *hp;
- int ret = 1;
+ bool ret = true;
+ unsigned long *rmapp;
+ rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
if (*rmapp & KVMPPC_RMAP_REFERENCED)
- return 1;
+ return true;
lock_rmap(rmapp);
if (*rmapp & KVMPPC_RMAP_REFERENCED)
@@ -1027,31 +970,27 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
if (*rmapp & KVMPPC_RMAP_PRESENT) {
i = head = *rmapp & KVMPPC_RMAP_INDEX;
do {
- hp = (unsigned long *)(kvm->arch.hpt_virt + (i << 4));
+ hp = (unsigned long *)(kvm->arch.hpt.virt + (i << 4));
j = rev[i].forw;
if (be64_to_cpu(hp[1]) & HPTE_R_R)
goto out;
} while ((i = j) != head);
}
- ret = 0;
+ ret = false;
out:
unlock_rmap(rmapp);
return ret;
}
-int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva)
+bool kvm_test_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range)
{
- if (!kvm->arch.using_mmu_notifiers)
- return 0;
- return kvm_handle_hva(kvm, hva, kvm_test_age_rmapp);
-}
+ WARN_ON(range->start + 1 != range->end);
-void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte)
-{
- if (!kvm->arch.using_mmu_notifiers)
- return;
- kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
+ if (kvm_is_radix(kvm))
+ return kvm_test_age_radix(kvm, range->slot, range->start);
+ else
+ return kvm_test_age_rmapp(kvm, range->slot, range->start);
}
static int vcpus_running(struct kvm *kvm)
@@ -1065,7 +1004,7 @@ static int vcpus_running(struct kvm *kvm)
*/
static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
{
- struct revmap_entry *rev = kvm->arch.revmap;
+ struct revmap_entry *rev = kvm->arch.hpt.rev;
unsigned long head, i, j;
unsigned long n;
unsigned long v, r;
@@ -1074,10 +1013,6 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
retry:
lock_rmap(rmapp);
- if (*rmapp & KVMPPC_RMAP_CHANGED) {
- *rmapp &= ~KVMPPC_RMAP_CHANGED;
- npages_dirty = 1;
- }
if (!(*rmapp & KVMPPC_RMAP_PRESENT)) {
unlock_rmap(rmapp);
return npages_dirty;
@@ -1086,7 +1021,7 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
i = head = *rmapp & KVMPPC_RMAP_INDEX;
do {
unsigned long hptep1;
- hptep = (__be64 *) (kvm->arch.hpt_virt + (i << 4));
+ hptep = (__be64 *) (kvm->arch.hpt.virt + (i << 4));
j = rev[i].forw;
/*
@@ -1117,8 +1052,10 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
}
/* Now check and modify the HPTE */
- if (!(hptep[0] & cpu_to_be64(HPTE_V_VALID)))
+ if (!(hptep[0] & cpu_to_be64(HPTE_V_VALID))) {
+ __unlock_hpte(hptep, be64_to_cpu(hptep[0]));
continue;
+ }
/* need to make it temporarily absent so C is stable */
hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
@@ -1131,22 +1068,22 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
rev[i].guest_rpte |= HPTE_R_C;
note_hpte_modification(kvm, &rev[i]);
}
- n = hpte_page_size(v, r);
+ n = kvmppc_actual_pgsz(v, r);
n = (n + PAGE_SIZE - 1) >> PAGE_SHIFT;
if (n > npages_dirty)
npages_dirty = n;
eieio();
}
- v &= ~(HPTE_V_ABSENT | HPTE_V_HVLOCK);
+ v &= ~HPTE_V_ABSENT;
v |= HPTE_V_VALID;
- hptep[0] = cpu_to_be64(v);
+ __unlock_hpte(hptep, v);
} while ((i = j) != head);
unlock_rmap(rmapp);
return npages_dirty;
}
-static void harvest_vpa_dirty(struct kvmppc_vpa *vpa,
+void kvmppc_harvest_vpa_dirty(struct kvmppc_vpa *vpa,
struct kvm_memory_slot *memslot,
unsigned long *map)
{
@@ -1164,12 +1101,11 @@ static void harvest_vpa_dirty(struct kvmppc_vpa *vpa,
__set_bit_le(gfn - memslot->base_gfn, map);
}
-long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot,
- unsigned long *map)
+long kvmppc_hv_get_dirty_log_hpt(struct kvm *kvm,
+ struct kvm_memory_slot *memslot, unsigned long *map)
{
- unsigned long i, j;
+ unsigned long i;
unsigned long *rmapp;
- struct kvm_vcpu *vcpu;
preempt_disable();
rmapp = memslot->arch.rmap;
@@ -1180,20 +1116,10 @@ long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot,
* since we always put huge-page HPTEs in the rmap chain
* corresponding to their page base address.
*/
- if (npages && map)
- for (j = i; npages; ++j, --npages)
- __set_bit_le(j, map);
+ if (npages)
+ set_dirty_bits(map, i, npages);
++rmapp;
}
-
- /* Harvest dirty bits from VPA and DTL updates */
- /* Note: we never modify the SLB shadow buffer areas */
- kvm_for_each_vcpu(i, vcpu, kvm) {
- spin_lock(&vcpu->arch.vpa_update_lock);
- harvest_vpa_dirty(&vcpu->arch.vpa, memslot, map);
- harvest_vpa_dirty(&vcpu->arch.dtl, memslot, map);
- spin_unlock(&vcpu->arch.vpa_update_lock);
- }
preempt_enable();
return 0;
}
@@ -1206,35 +1132,17 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
struct page *page, *pages[1];
int npages;
unsigned long hva, offset;
- unsigned long pa;
- unsigned long *physp;
int srcu_idx;
srcu_idx = srcu_read_lock(&kvm->srcu);
memslot = gfn_to_memslot(kvm, gfn);
if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
goto err;
- if (!kvm->arch.using_mmu_notifiers) {
- physp = memslot->arch.slot_phys;
- if (!physp)
- goto err;
- physp += gfn - memslot->base_gfn;
- pa = *physp;
- if (!pa) {
- if (kvmppc_get_guest_page(kvm, gfn, memslot,
- PAGE_SIZE) < 0)
- goto err;
- pa = *physp;
- }
- page = pfn_to_page(pa >> PAGE_SHIFT);
- get_page(page);
- } else {
- hva = gfn_to_hva_memslot(memslot, gfn);
- npages = get_user_pages_fast(hva, 1, 1, pages);
- if (npages < 1)
- goto err;
- page = pages[0];
- }
+ hva = gfn_to_hva_memslot(memslot, gfn);
+ npages = get_user_pages_fast(hva, 1, FOLL_WRITE, pages);
+ if (npages < 1)
+ goto err;
+ page = pages[0];
srcu_read_unlock(&kvm->srcu, srcu_idx);
offset = gpa & (PAGE_SIZE - 1);
@@ -1253,28 +1161,421 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa,
struct page *page = virt_to_page(va);
struct kvm_memory_slot *memslot;
unsigned long gfn;
- unsigned long *rmap;
int srcu_idx;
put_page(page);
- if (!dirty || !kvm->arch.using_mmu_notifiers)
+ if (!dirty)
return;
- /* We need to mark this page dirty in the rmap chain */
+ /* We need to mark this page dirty in the memslot dirty_bitmap, if any */
gfn = gpa >> PAGE_SHIFT;
srcu_idx = srcu_read_lock(&kvm->srcu);
memslot = gfn_to_memslot(kvm, gfn);
- if (memslot) {
- rmap = &memslot->arch.rmap[gfn - memslot->base_gfn];
- lock_rmap(rmap);
- *rmap |= KVMPPC_RMAP_CHANGED;
- unlock_rmap(rmap);
- }
+ if (memslot && memslot->dirty_bitmap)
+ set_bit_le(gfn - memslot->base_gfn, memslot->dirty_bitmap);
srcu_read_unlock(&kvm->srcu, srcu_idx);
}
/*
+ * HPT resizing
+ */
+static int resize_hpt_allocate(struct kvm_resize_hpt *resize)
+{
+ int rc;
+
+ rc = kvmppc_allocate_hpt(&resize->hpt, resize->order);
+ if (rc < 0)
+ return rc;
+
+ resize_hpt_debug(resize, "%s(): HPT @ 0x%lx\n", __func__,
+ resize->hpt.virt);
+
+ return 0;
+}
+
+static unsigned long resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
+ unsigned long idx)
+{
+ struct kvm *kvm = resize->kvm;
+ struct kvm_hpt_info *old = &kvm->arch.hpt;
+ struct kvm_hpt_info *new = &resize->hpt;
+ unsigned long old_hash_mask = (1ULL << (old->order - 7)) - 1;
+ unsigned long new_hash_mask = (1ULL << (new->order - 7)) - 1;
+ __be64 *hptep, *new_hptep;
+ unsigned long vpte, rpte, guest_rpte;
+ int ret;
+ struct revmap_entry *rev;
+ unsigned long apsize, avpn, pteg, hash;
+ unsigned long new_idx, new_pteg, replace_vpte;
+ int pshift;
+
+ hptep = (__be64 *)(old->virt + (idx << 4));
+
+ /* Guest is stopped, so new HPTEs can't be added or faulted
+ * in, only unmapped or altered by host actions. So, it's
+ * safe to check this before we take the HPTE lock */
+ vpte = be64_to_cpu(hptep[0]);
+ if (!(vpte & HPTE_V_VALID) && !(vpte & HPTE_V_ABSENT))
+ return 0; /* nothing to do */
+
+ while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
+ cpu_relax();
+
+ vpte = be64_to_cpu(hptep[0]);
+
+ ret = 0;
+ if (!(vpte & HPTE_V_VALID) && !(vpte & HPTE_V_ABSENT))
+ /* Nothing to do */
+ goto out;
+
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ rpte = be64_to_cpu(hptep[1]);
+ vpte = hpte_new_to_old_v(vpte, rpte);
+ }
+
+ /* Unmap */
+ rev = &old->rev[idx];
+ guest_rpte = rev->guest_rpte;
+
+ ret = -EIO;
+ apsize = kvmppc_actual_pgsz(vpte, guest_rpte);
+ if (!apsize)
+ goto out;
+
+ if (vpte & HPTE_V_VALID) {
+ unsigned long gfn = hpte_rpn(guest_rpte, apsize);
+ int srcu_idx = srcu_read_lock(&kvm->srcu);
+ struct kvm_memory_slot *memslot =
+ __gfn_to_memslot(kvm_memslots(kvm), gfn);
+
+ if (memslot) {
+ unsigned long *rmapp;
+ rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
+
+ lock_rmap(rmapp);
+ kvmppc_unmap_hpte(kvm, idx, memslot, rmapp, gfn);
+ unlock_rmap(rmapp);
+ }
+
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ }
+
+ /* Reload PTE after unmap */
+ vpte = be64_to_cpu(hptep[0]);
+ BUG_ON(vpte & HPTE_V_VALID);
+ BUG_ON(!(vpte & HPTE_V_ABSENT));
+
+ ret = 0;
+ if (!(vpte & HPTE_V_BOLTED))
+ goto out;
+
+ rpte = be64_to_cpu(hptep[1]);
+
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ vpte = hpte_new_to_old_v(vpte, rpte);
+ rpte = hpte_new_to_old_r(rpte);
+ }
+
+ pshift = kvmppc_hpte_base_page_shift(vpte, rpte);
+ avpn = HPTE_V_AVPN_VAL(vpte) & ~(((1ul << pshift) - 1) >> 23);
+ pteg = idx / HPTES_PER_GROUP;
+ if (vpte & HPTE_V_SECONDARY)
+ pteg = ~pteg;
+
+ if (!(vpte & HPTE_V_1TB_SEG)) {
+ unsigned long offset, vsid;
+
+ /* We only have 28 - 23 bits of offset in avpn */
+ offset = (avpn & 0x1f) << 23;
+ vsid = avpn >> 5;
+ /* We can find more bits from the pteg value */
+ if (pshift < 23)
+ offset |= ((vsid ^ pteg) & old_hash_mask) << pshift;
+
+ hash = vsid ^ (offset >> pshift);
+ } else {
+ unsigned long offset, vsid;
+
+ /* We only have 40 - 23 bits of seg_off in avpn */
+ offset = (avpn & 0x1ffff) << 23;
+ vsid = avpn >> 17;
+ if (pshift < 23)
+ offset |= ((vsid ^ (vsid << 25) ^ pteg) & old_hash_mask) << pshift;
+
+ hash = vsid ^ (vsid << 25) ^ (offset >> pshift);
+ }
+
+ new_pteg = hash & new_hash_mask;
+ if (vpte & HPTE_V_SECONDARY)
+ new_pteg = ~hash & new_hash_mask;
+
+ new_idx = new_pteg * HPTES_PER_GROUP + (idx % HPTES_PER_GROUP);
+ new_hptep = (__be64 *)(new->virt + (new_idx << 4));
+
+ replace_vpte = be64_to_cpu(new_hptep[0]);
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ unsigned long replace_rpte = be64_to_cpu(new_hptep[1]);
+ replace_vpte = hpte_new_to_old_v(replace_vpte, replace_rpte);
+ }
+
+ if (replace_vpte & (HPTE_V_VALID | HPTE_V_ABSENT)) {
+ BUG_ON(new->order >= old->order);
+
+ if (replace_vpte & HPTE_V_BOLTED) {
+ if (vpte & HPTE_V_BOLTED)
+ /* Bolted collision, nothing we can do */
+ ret = -ENOSPC;
+ /* Discard the new HPTE */
+ goto out;
+ }
+
+ /* Discard the previous HPTE */
+ }
+
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ rpte = hpte_old_to_new_r(vpte, rpte);
+ vpte = hpte_old_to_new_v(vpte);
+ }
+
+ new_hptep[1] = cpu_to_be64(rpte);
+ new->rev[new_idx].guest_rpte = guest_rpte;
+ /* No need for a barrier, since new HPT isn't active */
+ new_hptep[0] = cpu_to_be64(vpte);
+ unlock_hpte(new_hptep, vpte);
+
+out:
+ unlock_hpte(hptep, vpte);
+ return ret;
+}
+
+static int resize_hpt_rehash(struct kvm_resize_hpt *resize)
+{
+ struct kvm *kvm = resize->kvm;
+ unsigned long i;
+ int rc;
+
+ for (i = 0; i < kvmppc_hpt_npte(&kvm->arch.hpt); i++) {
+ rc = resize_hpt_rehash_hpte(resize, i);
+ if (rc != 0)
+ return rc;
+ }
+
+ return 0;
+}
+
+static void resize_hpt_pivot(struct kvm_resize_hpt *resize)
+{
+ struct kvm *kvm = resize->kvm;
+ struct kvm_hpt_info hpt_tmp;
+
+ /* Exchange the pending tables in the resize structure with
+ * the active tables */
+
+ resize_hpt_debug(resize, "resize_hpt_pivot()\n");
+
+ spin_lock(&kvm->mmu_lock);
+ asm volatile("ptesync" : : : "memory");
+
+ hpt_tmp = kvm->arch.hpt;
+ kvmppc_set_hpt(kvm, &resize->hpt);
+ resize->hpt = hpt_tmp;
+
+ spin_unlock(&kvm->mmu_lock);
+
+ synchronize_srcu_expedited(&kvm->srcu);
+
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ kvmppc_setup_partition_table(kvm);
+
+ resize_hpt_debug(resize, "resize_hpt_pivot() done\n");
+}
+
+static void resize_hpt_release(struct kvm *kvm, struct kvm_resize_hpt *resize)
+{
+ if (WARN_ON(!mutex_is_locked(&kvm->arch.mmu_setup_lock)))
+ return;
+
+ if (!resize)
+ return;
+
+ if (resize->error != -EBUSY) {
+ if (resize->hpt.virt)
+ kvmppc_free_hpt(&resize->hpt);
+ kfree(resize);
+ }
+
+ if (kvm->arch.resize_hpt == resize)
+ kvm->arch.resize_hpt = NULL;
+}
+
+static void resize_hpt_prepare_work(struct work_struct *work)
+{
+ struct kvm_resize_hpt *resize = container_of(work,
+ struct kvm_resize_hpt,
+ work);
+ struct kvm *kvm = resize->kvm;
+ int err = 0;
+
+ if (WARN_ON(resize->error != -EBUSY))
+ return;
+
+ mutex_lock(&kvm->arch.mmu_setup_lock);
+
+ /* Request is still current? */
+ if (kvm->arch.resize_hpt == resize) {
+ /* We may request large allocations here:
+ * do not sleep with kvm->arch.mmu_setup_lock held for a while.
+ */
+ mutex_unlock(&kvm->arch.mmu_setup_lock);
+
+ resize_hpt_debug(resize, "%s(): order = %d\n", __func__,
+ resize->order);
+
+ err = resize_hpt_allocate(resize);
+
+ /* We have strict assumption about -EBUSY
+ * when preparing for HPT resize.
+ */
+ if (WARN_ON(err == -EBUSY))
+ err = -EINPROGRESS;
+
+ mutex_lock(&kvm->arch.mmu_setup_lock);
+ /* It is possible that kvm->arch.resize_hpt != resize
+ * after we grab kvm->arch.mmu_setup_lock again.
+ */
+ }
+
+ resize->error = err;
+
+ if (kvm->arch.resize_hpt != resize)
+ resize_hpt_release(kvm, resize);
+
+ mutex_unlock(&kvm->arch.mmu_setup_lock);
+}
+
+int kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
+ struct kvm_ppc_resize_hpt *rhpt)
+{
+ unsigned long flags = rhpt->flags;
+ unsigned long shift = rhpt->shift;
+ struct kvm_resize_hpt *resize;
+ int ret;
+
+ if (flags != 0 || kvm_is_radix(kvm))
+ return -EINVAL;
+
+ if (shift && ((shift < 18) || (shift > 46)))
+ return -EINVAL;
+
+ mutex_lock(&kvm->arch.mmu_setup_lock);
+
+ resize = kvm->arch.resize_hpt;
+
+ if (resize) {
+ if (resize->order == shift) {
+ /* Suitable resize in progress? */
+ ret = resize->error;
+ if (ret == -EBUSY)
+ ret = 100; /* estimated time in ms */
+ else if (ret)
+ resize_hpt_release(kvm, resize);
+
+ goto out;
+ }
+
+ /* not suitable, cancel it */
+ resize_hpt_release(kvm, resize);
+ }
+
+ ret = 0;
+ if (!shift)
+ goto out; /* nothing to do */
+
+ /* start new resize */
+
+ resize = kzalloc(sizeof(*resize), GFP_KERNEL);
+ if (!resize) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ resize->error = -EBUSY;
+ resize->order = shift;
+ resize->kvm = kvm;
+ INIT_WORK(&resize->work, resize_hpt_prepare_work);
+ kvm->arch.resize_hpt = resize;
+
+ schedule_work(&resize->work);
+
+ ret = 100; /* estimated time in ms */
+
+out:
+ mutex_unlock(&kvm->arch.mmu_setup_lock);
+ return ret;
+}
+
+static void resize_hpt_boot_vcpu(void *opaque)
+{
+ /* Nothing to do, just force a KVM exit */
+}
+
+int kvm_vm_ioctl_resize_hpt_commit(struct kvm *kvm,
+ struct kvm_ppc_resize_hpt *rhpt)
+{
+ unsigned long flags = rhpt->flags;
+ unsigned long shift = rhpt->shift;
+ struct kvm_resize_hpt *resize;
+ int ret;
+
+ if (flags != 0 || kvm_is_radix(kvm))
+ return -EINVAL;
+
+ if (shift && ((shift < 18) || (shift > 46)))
+ return -EINVAL;
+
+ mutex_lock(&kvm->arch.mmu_setup_lock);
+
+ resize = kvm->arch.resize_hpt;
+
+ /* This shouldn't be possible */
+ ret = -EIO;
+ if (WARN_ON(!kvm->arch.mmu_ready))
+ goto out_no_hpt;
+
+ /* Stop VCPUs from running while we mess with the HPT */
+ kvm->arch.mmu_ready = 0;
+ smp_mb();
+
+ /* Boot all CPUs out of the guest so they re-read
+ * mmu_ready */
+ on_each_cpu(resize_hpt_boot_vcpu, NULL, 1);
+
+ ret = -ENXIO;
+ if (!resize || (resize->order != shift))
+ goto out;
+
+ ret = resize->error;
+ if (ret)
+ goto out;
+
+ ret = resize_hpt_rehash(resize);
+ if (ret)
+ goto out;
+
+ resize_hpt_pivot(resize);
+
+out:
+ /* Let VCPUs run again */
+ kvm->arch.mmu_ready = 1;
+ smp_mb();
+out_no_hpt:
+ resize_hpt_release(kvm, resize);
+ mutex_unlock(&kvm->arch.mmu_setup_lock);
+ return ret;
+}
+
+/*
* Functions for reading and writing the hash table via reads and
* writes on a file descriptor.
*
@@ -1323,7 +1624,7 @@ static long record_hpte(unsigned long flags, __be64 *hptp,
unsigned long *hpte, struct revmap_entry *revp,
int want_valid, int first_pass)
{
- unsigned long v, r;
+ unsigned long v, r, hr;
unsigned long rcbits_unset;
int ok = 1;
int valid, dirty;
@@ -1350,6 +1651,11 @@ static long record_hpte(unsigned long flags, __be64 *hptp,
while (!try_lock_hpte(hptp, HPTE_V_HVLOCK))
cpu_relax();
v = be64_to_cpu(hptp[0]);
+ hr = be64_to_cpu(hptp[1]);
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ v = hpte_new_to_old_v(v, hr);
+ hr = hpte_new_to_old_r(hr);
+ }
/* re-evaluate valid and dirty from synchronized HPTE value */
valid = !!(v & HPTE_V_VALID);
@@ -1357,8 +1663,8 @@ static long record_hpte(unsigned long flags, __be64 *hptp,
/* Harvest R and C into guest view if necessary */
rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C);
- if (valid && (rcbits_unset & be64_to_cpu(hptp[1]))) {
- revp->guest_rpte |= (be64_to_cpu(hptp[1]) &
+ if (valid && (rcbits_unset & hr)) {
+ revp->guest_rpte |= (hr &
(HPTE_R_R | HPTE_R_C)) | HPTE_GR_MODIFIED;
dirty = 1;
}
@@ -1377,8 +1683,7 @@ static long record_hpte(unsigned long flags, __be64 *hptp,
r &= ~HPTE_GR_MODIFIED;
revp->guest_rpte = r;
}
- asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
- hptp[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
+ unlock_hpte(hptp, be64_to_cpu(hptp[0]));
preempt_enable();
if (!(valid == want_valid && (first_pass || dirty)))
ok = 0;
@@ -1403,15 +1708,17 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf,
int first_pass;
unsigned long hpte[2];
- if (!access_ok(VERIFY_WRITE, buf, count))
+ if (!access_ok(buf, count))
return -EFAULT;
+ if (kvm_is_radix(kvm))
+ return 0;
first_pass = ctx->first_pass;
flags = ctx->flags;
i = ctx->index;
- hptp = (__be64 *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
- revp = kvm->arch.revmap + i;
+ hptp = (__be64 *)(kvm->arch.hpt.virt + (i * HPTE_SIZE));
+ revp = kvm->arch.hpt.rev + i;
lbuf = (unsigned long __user *)buf;
nb = 0;
@@ -1426,7 +1733,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf,
/* Skip uninteresting entries, i.e. clean on not-first pass */
if (!first_pass) {
- while (i < kvm->arch.hpt_npte &&
+ while (i < kvmppc_hpt_npte(&kvm->arch.hpt) &&
!hpte_dirty(revp, hptp)) {
++i;
hptp += 2;
@@ -1436,7 +1743,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf,
hdr.index = i;
/* Grab a series of valid entries */
- while (i < kvm->arch.hpt_npte &&
+ while (i < kvmppc_hpt_npte(&kvm->arch.hpt) &&
hdr.n_valid < 0xffff &&
nb + HPTE_SIZE < count &&
record_hpte(flags, hptp, hpte, revp, 1, first_pass)) {
@@ -1452,7 +1759,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf,
++revp;
}
/* Now skip invalid entries while we can */
- while (i < kvm->arch.hpt_npte &&
+ while (i < kvmppc_hpt_npte(&kvm->arch.hpt) &&
hdr.n_invalid < 0xffff &&
record_hpte(flags, hptp, hpte, revp, 0, first_pass)) {
/* found an invalid entry */
@@ -1473,7 +1780,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf,
}
/* Check if we've wrapped around the hash table */
- if (i >= kvm->arch.hpt_npte) {
+ if (i >= kvmppc_hpt_npte(&kvm->arch.hpt)) {
i = 0;
ctx->first_pass = 0;
break;
@@ -1498,21 +1805,24 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
unsigned long tmp[2];
ssize_t nb;
long int err, ret;
- int rma_setup;
+ int mmu_ready;
+ int pshift;
- if (!access_ok(VERIFY_READ, buf, count))
+ if (!access_ok(buf, count))
return -EFAULT;
+ if (kvm_is_radix(kvm))
+ return -EINVAL;
/* lock out vcpus from running while we're doing this */
- mutex_lock(&kvm->lock);
- rma_setup = kvm->arch.rma_setup_done;
- if (rma_setup) {
- kvm->arch.rma_setup_done = 0; /* temporarily */
- /* order rma_setup_done vs. vcpus_running */
+ mutex_lock(&kvm->arch.mmu_setup_lock);
+ mmu_ready = kvm->arch.mmu_ready;
+ if (mmu_ready) {
+ kvm->arch.mmu_ready = 0; /* temporarily */
+ /* order mmu_ready vs. vcpus_running */
smp_mb();
if (atomic_read(&kvm->arch.vcpus_running)) {
- kvm->arch.rma_setup_done = 1;
- mutex_unlock(&kvm->lock);
+ kvm->arch.mmu_ready = 1;
+ mutex_unlock(&kvm->arch.mmu_setup_lock);
return -EBUSY;
}
}
@@ -1532,19 +1842,28 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
err = -EINVAL;
i = hdr.index;
- if (i >= kvm->arch.hpt_npte ||
- i + hdr.n_valid + hdr.n_invalid > kvm->arch.hpt_npte)
+ if (i >= kvmppc_hpt_npte(&kvm->arch.hpt) ||
+ i + hdr.n_valid + hdr.n_invalid > kvmppc_hpt_npte(&kvm->arch.hpt))
break;
- hptp = (__be64 *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
+ hptp = (__be64 *)(kvm->arch.hpt.virt + (i * HPTE_SIZE));
lbuf = (unsigned long __user *)buf;
for (j = 0; j < hdr.n_valid; ++j) {
+ __be64 hpte_v;
+ __be64 hpte_r;
+
err = -EFAULT;
- if (__get_user(v, lbuf) || __get_user(r, lbuf + 1))
+ if (__get_user(hpte_v, lbuf) ||
+ __get_user(hpte_r, lbuf + 1))
goto out;
+ v = be64_to_cpu(hpte_v);
+ r = be64_to_cpu(hpte_r);
err = -EINVAL;
if (!(v & HPTE_V_VALID))
goto out;
+ pshift = kvmppc_hpte_base_page_shift(v, r);
+ if (pshift <= 0)
+ goto out;
lbuf += 2;
nb += HPTE_SIZE;
@@ -1554,20 +1873,23 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, i, v, r,
tmp);
if (ret != H_SUCCESS) {
- pr_err("kvm_htab_write ret %ld i=%ld v=%lx "
- "r=%lx\n", ret, i, v, r);
+ pr_err("%s ret %ld i=%ld v=%lx r=%lx\n", __func__, ret, i, v, r);
goto out;
}
- if (!rma_setup && is_vrma_hpte(v)) {
- unsigned long psize = hpte_base_page_size(v, r);
- unsigned long senc = slb_pgsize_encoding(psize);
- unsigned long lpcr;
+ if (!mmu_ready && is_vrma_hpte(v)) {
+ unsigned long senc, lpcr;
+ senc = slb_pgsize_encoding(1ul << pshift);
kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
(VRMA_VSID << SLB_VSID_SHIFT_1T);
- lpcr = senc << (LPCR_VRMASD_SH - 4);
- kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD);
- rma_setup = 1;
+ if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
+ lpcr = senc << (LPCR_VRMASD_SH - 4);
+ kvmppc_update_lpcr(kvm, lpcr,
+ LPCR_VRMASD);
+ } else {
+ kvmppc_setup_partition_table(kvm);
+ }
+ mmu_ready = 1;
}
++i;
hptp += 2;
@@ -1583,10 +1905,10 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
}
out:
- /* Order HPTE updates vs. rma_setup_done */
+ /* Order HPTE updates vs. mmu_ready */
smp_wmb();
- kvm->arch.rma_setup_done = rma_setup;
- mutex_unlock(&kvm->lock);
+ kvm->arch.mmu_ready = mmu_ready;
+ mutex_unlock(&kvm->arch.mmu_setup_lock);
if (err)
return err;
@@ -1633,7 +1955,8 @@ int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf)
rwflag = (ghf->flags & KVM_GET_HTAB_WRITE) ? O_WRONLY : O_RDONLY;
ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag | O_CLOEXEC);
if (ret < 0) {
- kvm_put_kvm(kvm);
+ kfree(ctx);
+ kvm_put_kvm_no_destroy(kvm);
return ret;
}
@@ -1648,17 +1971,151 @@ int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf)
return ret;
}
+struct debugfs_htab_state {
+ struct kvm *kvm;
+ struct mutex mutex;
+ unsigned long hpt_index;
+ int chars_left;
+ int buf_index;
+ char buf[64];
+};
+
+static int debugfs_htab_open(struct inode *inode, struct file *file)
+{
+ struct kvm *kvm = inode->i_private;
+ struct debugfs_htab_state *p;
+
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+
+ kvm_get_kvm(kvm);
+ p->kvm = kvm;
+ mutex_init(&p->mutex);
+ file->private_data = p;
+
+ return nonseekable_open(inode, file);
+}
+
+static int debugfs_htab_release(struct inode *inode, struct file *file)
+{
+ struct debugfs_htab_state *p = file->private_data;
+
+ kvm_put_kvm(p->kvm);
+ kfree(p);
+ return 0;
+}
+
+static ssize_t debugfs_htab_read(struct file *file, char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ struct debugfs_htab_state *p = file->private_data;
+ ssize_t ret, r;
+ unsigned long i, n;
+ unsigned long v, hr, gr;
+ struct kvm *kvm;
+ __be64 *hptp;
+
+ kvm = p->kvm;
+ if (kvm_is_radix(kvm))
+ return 0;
+
+ ret = mutex_lock_interruptible(&p->mutex);
+ if (ret)
+ return ret;
+
+ if (p->chars_left) {
+ n = p->chars_left;
+ if (n > len)
+ n = len;
+ r = copy_to_user(buf, p->buf + p->buf_index, n);
+ n -= r;
+ p->chars_left -= n;
+ p->buf_index += n;
+ buf += n;
+ len -= n;
+ ret = n;
+ if (r) {
+ if (!n)
+ ret = -EFAULT;
+ goto out;
+ }
+ }
+
+ i = p->hpt_index;
+ hptp = (__be64 *)(kvm->arch.hpt.virt + (i * HPTE_SIZE));
+ for (; len != 0 && i < kvmppc_hpt_npte(&kvm->arch.hpt);
+ ++i, hptp += 2) {
+ if (!(be64_to_cpu(hptp[0]) & (HPTE_V_VALID | HPTE_V_ABSENT)))
+ continue;
+
+ /* lock the HPTE so it's stable and read it */
+ preempt_disable();
+ while (!try_lock_hpte(hptp, HPTE_V_HVLOCK))
+ cpu_relax();
+ v = be64_to_cpu(hptp[0]) & ~HPTE_V_HVLOCK;
+ hr = be64_to_cpu(hptp[1]);
+ gr = kvm->arch.hpt.rev[i].guest_rpte;
+ unlock_hpte(hptp, v);
+ preempt_enable();
+
+ if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT)))
+ continue;
+
+ n = scnprintf(p->buf, sizeof(p->buf),
+ "%6lx %.16lx %.16lx %.16lx\n",
+ i, v, hr, gr);
+ p->chars_left = n;
+ if (n > len)
+ n = len;
+ r = copy_to_user(buf, p->buf, n);
+ n -= r;
+ p->chars_left -= n;
+ p->buf_index = n;
+ buf += n;
+ len -= n;
+ ret += n;
+ if (r) {
+ if (!ret)
+ ret = -EFAULT;
+ goto out;
+ }
+ }
+ p->hpt_index = i;
+
+ out:
+ mutex_unlock(&p->mutex);
+ return ret;
+}
+
+static ssize_t debugfs_htab_write(struct file *file, const char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ return -EACCES;
+}
+
+static const struct file_operations debugfs_htab_fops = {
+ .owner = THIS_MODULE,
+ .open = debugfs_htab_open,
+ .release = debugfs_htab_release,
+ .read = debugfs_htab_read,
+ .write = debugfs_htab_write,
+ .llseek = generic_file_llseek,
+};
+
+void kvmppc_mmu_debugfs_init(struct kvm *kvm)
+{
+ debugfs_create_file("htab", 0400, kvm->debugfs_dentry, kvm,
+ &debugfs_htab_fops);
+}
+
void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
{
struct kvmppc_mmu *mmu = &vcpu->arch.mmu;
- if (cpu_has_feature(CPU_FTR_ARCH_206))
- vcpu->arch.slb_nr = 32; /* POWER7 */
- else
- vcpu->arch.slb_nr = 64;
+ vcpu->arch.slb_nr = 32; /* POWER7/POWER8 */
mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate;
- mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr;
vcpu->arch.hflags |= BOOK3S_HFLAG_SLB;
}
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
new file mode 100644
index 000000000000..b3e6e73d6a08
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -0,0 +1,1476 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *
+ * Copyright 2016 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/anon_inodes.h>
+#include <linux/file.h>
+#include <linux/debugfs.h>
+#include <linux/pgtable.h>
+
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include "book3s_hv.h"
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/pgalloc.h>
+#include <asm/pte-walk.h>
+#include <asm/ultravisor.h>
+#include <asm/kvm_book3s_uvmem.h>
+#include <asm/plpar_wrappers.h>
+#include <asm/firmware.h>
+
+/*
+ * Supported radix tree geometry.
+ * Like p9, we support either 5 or 9 bits at the first (lowest) level,
+ * for a page size of 64k or 4k.
+ */
+static int p9_supported_radix_bits[4] = { 5, 9, 9, 13 };
+
+unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid,
+ gva_t eaddr, void *to, void *from,
+ unsigned long n)
+{
+ int old_pid, old_lpid;
+ unsigned long quadrant, ret = n;
+ bool is_load = !!to;
+
+ if (kvmhv_is_nestedv2())
+ return H_UNSUPPORTED;
+
+ /* Can't access quadrants 1 or 2 in non-HV mode, call the HV to do it */
+ if (kvmhv_on_pseries())
+ return plpar_hcall_norets(H_COPY_TOFROM_GUEST, lpid, pid, eaddr,
+ (to != NULL) ? __pa(to): 0,
+ (from != NULL) ? __pa(from): 0, n);
+
+ if (eaddr & (0xFFFUL << 52))
+ return ret;
+
+ quadrant = 1;
+ if (!pid)
+ quadrant = 2;
+ if (is_load)
+ from = (void *) (eaddr | (quadrant << 62));
+ else
+ to = (void *) (eaddr | (quadrant << 62));
+
+ preempt_disable();
+
+ asm volatile("hwsync" ::: "memory");
+ isync();
+ /* switch the lpid first to avoid running host with unallocated pid */
+ old_lpid = mfspr(SPRN_LPID);
+ if (old_lpid != lpid)
+ mtspr(SPRN_LPID, lpid);
+ if (quadrant == 1) {
+ old_pid = mfspr(SPRN_PID);
+ if (old_pid != pid)
+ mtspr(SPRN_PID, pid);
+ }
+ isync();
+
+ pagefault_disable();
+ if (is_load)
+ ret = __copy_from_user_inatomic(to, (const void __user *)from, n);
+ else
+ ret = __copy_to_user_inatomic((void __user *)to, from, n);
+ pagefault_enable();
+
+ asm volatile("hwsync" ::: "memory");
+ isync();
+ /* switch the pid first to avoid running host with unallocated pid */
+ if (quadrant == 1 && pid != old_pid)
+ mtspr(SPRN_PID, old_pid);
+ if (lpid != old_lpid)
+ mtspr(SPRN_LPID, old_lpid);
+ isync();
+
+ preempt_enable();
+
+ return ret;
+}
+
+static long kvmhv_copy_tofrom_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr,
+ void *to, void *from, unsigned long n)
+{
+ int lpid = vcpu->kvm->arch.lpid;
+ int pid;
+
+ /* This would cause a data segment intr so don't allow the access */
+ if (eaddr & (0x3FFUL << 52))
+ return -EINVAL;
+
+ /* Should we be using the nested lpid */
+ if (vcpu->arch.nested)
+ lpid = vcpu->arch.nested->shadow_lpid;
+
+ /* If accessing quadrant 3 then pid is expected to be 0 */
+ if (((eaddr >> 62) & 0x3) == 0x3)
+ pid = 0;
+ else
+ pid = kvmppc_get_pid(vcpu);
+
+ eaddr &= ~(0xFFFUL << 52);
+
+ return __kvmhv_copy_tofrom_guest_radix(lpid, pid, eaddr, to, from, n);
+}
+
+long kvmhv_copy_from_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, void *to,
+ unsigned long n)
+{
+ long ret;
+
+ ret = kvmhv_copy_tofrom_guest_radix(vcpu, eaddr, to, NULL, n);
+ if (ret > 0)
+ memset(to + (n - ret), 0, ret);
+
+ return ret;
+}
+
+long kvmhv_copy_to_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, void *from,
+ unsigned long n)
+{
+ return kvmhv_copy_tofrom_guest_radix(vcpu, eaddr, NULL, from, n);
+}
+
+int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, gva_t eaddr,
+ struct kvmppc_pte *gpte, u64 root,
+ u64 *pte_ret_p)
+{
+ struct kvm *kvm = vcpu->kvm;
+ int ret, level, ps;
+ unsigned long rts, bits, offset, index;
+ u64 pte, base, gpa;
+ __be64 rpte;
+
+ rts = ((root & RTS1_MASK) >> (RTS1_SHIFT - 3)) |
+ ((root & RTS2_MASK) >> RTS2_SHIFT);
+ bits = root & RPDS_MASK;
+ base = root & RPDB_MASK;
+
+ offset = rts + 31;
+
+ /* Current implementations only support 52-bit space */
+ if (offset != 52)
+ return -EINVAL;
+
+ /* Walk each level of the radix tree */
+ for (level = 3; level >= 0; --level) {
+ u64 addr;
+ /* Check a valid size */
+ if (level && bits != p9_supported_radix_bits[level])
+ return -EINVAL;
+ if (level == 0 && !(bits == 5 || bits == 9))
+ return -EINVAL;
+ offset -= bits;
+ index = (eaddr >> offset) & ((1UL << bits) - 1);
+ /* Check that low bits of page table base are zero */
+ if (base & ((1UL << (bits + 3)) - 1))
+ return -EINVAL;
+ /* Read the entry from guest memory */
+ addr = base + (index * sizeof(rpte));
+
+ kvm_vcpu_srcu_read_lock(vcpu);
+ ret = kvm_read_guest(kvm, addr, &rpte, sizeof(rpte));
+ kvm_vcpu_srcu_read_unlock(vcpu);
+ if (ret) {
+ if (pte_ret_p)
+ *pte_ret_p = addr;
+ return ret;
+ }
+ pte = __be64_to_cpu(rpte);
+ if (!(pte & _PAGE_PRESENT))
+ return -ENOENT;
+ /* Check if a leaf entry */
+ if (pte & _PAGE_PTE)
+ break;
+ /* Get ready to walk the next level */
+ base = pte & RPDB_MASK;
+ bits = pte & RPDS_MASK;
+ }
+
+ /* Need a leaf at lowest level; 512GB pages not supported */
+ if (level < 0 || level == 3)
+ return -EINVAL;
+
+ /* We found a valid leaf PTE */
+ /* Offset is now log base 2 of the page size */
+ gpa = pte & 0x01fffffffffff000ul;
+ if (gpa & ((1ul << offset) - 1))
+ return -EINVAL;
+ gpa |= eaddr & ((1ul << offset) - 1);
+ for (ps = MMU_PAGE_4K; ps < MMU_PAGE_COUNT; ++ps)
+ if (offset == mmu_psize_defs[ps].shift)
+ break;
+ gpte->page_size = ps;
+ gpte->page_shift = offset;
+
+ gpte->eaddr = eaddr;
+ gpte->raddr = gpa;
+
+ /* Work out permissions */
+ gpte->may_read = !!(pte & _PAGE_READ);
+ gpte->may_write = !!(pte & _PAGE_WRITE);
+ gpte->may_execute = !!(pte & _PAGE_EXEC);
+
+ gpte->rc = pte & (_PAGE_ACCESSED | _PAGE_DIRTY);
+
+ if (pte_ret_p)
+ *pte_ret_p = pte;
+
+ return 0;
+}
+
+/*
+ * Used to walk a partition or process table radix tree in guest memory
+ * Note: We exploit the fact that a partition table and a process
+ * table have the same layout, a partition-scoped page table and a
+ * process-scoped page table have the same layout, and the 2nd
+ * doubleword of a partition table entry has the same layout as
+ * the PTCR register.
+ */
+int kvmppc_mmu_radix_translate_table(struct kvm_vcpu *vcpu, gva_t eaddr,
+ struct kvmppc_pte *gpte, u64 table,
+ int table_index, u64 *pte_ret_p)
+{
+ struct kvm *kvm = vcpu->kvm;
+ int ret;
+ unsigned long size, ptbl, root;
+ struct prtb_entry entry;
+
+ if ((table & PRTS_MASK) > 24)
+ return -EINVAL;
+ size = 1ul << ((table & PRTS_MASK) + 12);
+
+ /* Is the table big enough to contain this entry? */
+ if ((table_index * sizeof(entry)) >= size)
+ return -EINVAL;
+
+ /* Read the table to find the root of the radix tree */
+ ptbl = (table & PRTB_MASK) + (table_index * sizeof(entry));
+ kvm_vcpu_srcu_read_lock(vcpu);
+ ret = kvm_read_guest(kvm, ptbl, &entry, sizeof(entry));
+ kvm_vcpu_srcu_read_unlock(vcpu);
+ if (ret)
+ return ret;
+
+ /* Root is stored in the first double word */
+ root = be64_to_cpu(entry.prtb0);
+
+ return kvmppc_mmu_walk_radix_tree(vcpu, eaddr, gpte, root, pte_ret_p);
+}
+
+int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
+ struct kvmppc_pte *gpte, bool data, bool iswrite)
+{
+ u32 pid;
+ u64 pte;
+ int ret;
+
+ /* Work out effective PID */
+ switch (eaddr >> 62) {
+ case 0:
+ pid = kvmppc_get_pid(vcpu);
+ break;
+ case 3:
+ pid = 0;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ ret = kvmppc_mmu_radix_translate_table(vcpu, eaddr, gpte,
+ vcpu->kvm->arch.process_table, pid, &pte);
+ if (ret)
+ return ret;
+
+ /* Check privilege (applies only to process scoped translations) */
+ if (kvmppc_get_msr(vcpu) & MSR_PR) {
+ if (pte & _PAGE_PRIVILEGED) {
+ gpte->may_read = 0;
+ gpte->may_write = 0;
+ gpte->may_execute = 0;
+ }
+ } else {
+ if (!(pte & _PAGE_PRIVILEGED)) {
+ /* Check AMR/IAMR to see if strict mode is in force */
+ if (kvmppc_get_amr_hv(vcpu) & (1ul << 62))
+ gpte->may_read = 0;
+ if (kvmppc_get_amr_hv(vcpu) & (1ul << 63))
+ gpte->may_write = 0;
+ if (vcpu->arch.iamr & (1ul << 62))
+ gpte->may_execute = 0;
+ }
+ }
+
+ return 0;
+}
+
+void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr,
+ unsigned int pshift, u64 lpid)
+{
+ unsigned long psize = PAGE_SIZE;
+ int psi;
+ long rc;
+ unsigned long rb;
+
+ if (pshift)
+ psize = 1UL << pshift;
+ else
+ pshift = PAGE_SHIFT;
+
+ addr &= ~(psize - 1);
+
+ if (!kvmhv_on_pseries()) {
+ radix__flush_tlb_lpid_page(lpid, addr, psize);
+ return;
+ }
+
+ psi = shift_to_mmu_psize(pshift);
+
+ if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE)) {
+ rb = addr | (mmu_get_ap(psi) << PPC_BITLSHIFT(58));
+ rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(0, 0, 1),
+ lpid, rb);
+ } else {
+ rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU,
+ H_RPTI_TYPE_NESTED |
+ H_RPTI_TYPE_TLB,
+ psize_to_rpti_pgsize(psi),
+ addr, addr + psize);
+ }
+
+ if (rc)
+ pr_err("KVM: TLB page invalidation hcall failed, rc=%ld\n", rc);
+}
+
+static void kvmppc_radix_flush_pwc(struct kvm *kvm, u64 lpid)
+{
+ long rc;
+
+ if (!kvmhv_on_pseries()) {
+ radix__flush_pwc_lpid(lpid);
+ return;
+ }
+
+ if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE))
+ rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(1, 0, 1),
+ lpid, TLBIEL_INVAL_SET_LPID);
+ else
+ rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU,
+ H_RPTI_TYPE_NESTED |
+ H_RPTI_TYPE_PWC, H_RPTI_PAGE_ALL,
+ 0, -1UL);
+ if (rc)
+ pr_err("KVM: TLB PWC invalidation hcall failed, rc=%ld\n", rc);
+}
+
+static unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep,
+ unsigned long clr, unsigned long set,
+ unsigned long addr, unsigned int shift)
+{
+ return __radix_pte_update(ptep, clr, set);
+}
+
+static void kvmppc_radix_set_pte_at(struct kvm *kvm, unsigned long addr,
+ pte_t *ptep, pte_t pte)
+{
+ radix__set_pte_at(kvm->mm, addr, ptep, pte, 0);
+}
+
+static struct kmem_cache *kvm_pte_cache;
+static struct kmem_cache *kvm_pmd_cache;
+
+static pte_t *kvmppc_pte_alloc(void)
+{
+ pte_t *pte;
+
+ pte = kmem_cache_alloc(kvm_pte_cache, GFP_KERNEL);
+ /* pmd_populate() will only reference _pa(pte). */
+ kmemleak_ignore(pte);
+
+ return pte;
+}
+
+static void kvmppc_pte_free(pte_t *ptep)
+{
+ kmem_cache_free(kvm_pte_cache, ptep);
+}
+
+static pmd_t *kvmppc_pmd_alloc(void)
+{
+ pmd_t *pmd;
+
+ pmd = kmem_cache_alloc(kvm_pmd_cache, GFP_KERNEL);
+ /* pud_populate() will only reference _pa(pmd). */
+ kmemleak_ignore(pmd);
+
+ return pmd;
+}
+
+static void kvmppc_pmd_free(pmd_t *pmdp)
+{
+ kmem_cache_free(kvm_pmd_cache, pmdp);
+}
+
+/* Called with kvm->mmu_lock held */
+void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, unsigned long gpa,
+ unsigned int shift,
+ const struct kvm_memory_slot *memslot,
+ u64 lpid)
+
+{
+ unsigned long old;
+ unsigned long gfn = gpa >> PAGE_SHIFT;
+ unsigned long page_size = PAGE_SIZE;
+ unsigned long hpa;
+
+ old = kvmppc_radix_update_pte(kvm, pte, ~0UL, 0, gpa, shift);
+ kvmppc_radix_tlbie_page(kvm, gpa, shift, lpid);
+
+ /* The following only applies to L1 entries */
+ if (lpid != kvm->arch.lpid)
+ return;
+
+ if (!memslot) {
+ memslot = gfn_to_memslot(kvm, gfn);
+ if (!memslot)
+ return;
+ }
+ if (shift) { /* 1GB or 2MB page */
+ page_size = 1ul << shift;
+ if (shift == PMD_SHIFT)
+ kvm->stat.num_2M_pages--;
+ else if (shift == PUD_SHIFT)
+ kvm->stat.num_1G_pages--;
+ }
+
+ gpa &= ~(page_size - 1);
+ hpa = old & PTE_RPN_MASK;
+ kvmhv_remove_nest_rmap_range(kvm, memslot, gpa, hpa, page_size);
+
+ if ((old & _PAGE_DIRTY) && memslot->dirty_bitmap)
+ kvmppc_update_dirty_map(memslot, gfn, page_size);
+}
+
+/*
+ * kvmppc_free_p?d are used to free existing page tables, and recursively
+ * descend and clear and free children.
+ * Callers are responsible for flushing the PWC.
+ *
+ * When page tables are being unmapped/freed as part of page fault path
+ * (full == false), valid ptes are generally not expected; however, there
+ * is one situation where they arise, which is when dirty page logging is
+ * turned off for a memslot while the VM is running. The new memslot
+ * becomes visible to page faults before the memslot commit function
+ * gets to flush the memslot, which can lead to a 2MB page mapping being
+ * installed for a guest physical address where there are already 64kB
+ * (or 4kB) mappings (of sub-pages of the same 2MB page).
+ */
+static void kvmppc_unmap_free_pte(struct kvm *kvm, pte_t *pte, bool full,
+ u64 lpid)
+{
+ if (full) {
+ memset(pte, 0, sizeof(long) << RADIX_PTE_INDEX_SIZE);
+ } else {
+ pte_t *p = pte;
+ unsigned long it;
+
+ for (it = 0; it < PTRS_PER_PTE; ++it, ++p) {
+ if (pte_val(*p) == 0)
+ continue;
+ kvmppc_unmap_pte(kvm, p,
+ pte_pfn(*p) << PAGE_SHIFT,
+ PAGE_SHIFT, NULL, lpid);
+ }
+ }
+
+ kvmppc_pte_free(pte);
+}
+
+static void kvmppc_unmap_free_pmd(struct kvm *kvm, pmd_t *pmd, bool full,
+ u64 lpid)
+{
+ unsigned long im;
+ pmd_t *p = pmd;
+
+ for (im = 0; im < PTRS_PER_PMD; ++im, ++p) {
+ if (!pmd_present(*p))
+ continue;
+ if (pmd_leaf(*p)) {
+ if (full) {
+ pmd_clear(p);
+ } else {
+ WARN_ON_ONCE(1);
+ kvmppc_unmap_pte(kvm, (pte_t *)p,
+ pte_pfn(*(pte_t *)p) << PAGE_SHIFT,
+ PMD_SHIFT, NULL, lpid);
+ }
+ } else {
+ pte_t *pte;
+
+ pte = pte_offset_kernel(p, 0);
+ kvmppc_unmap_free_pte(kvm, pte, full, lpid);
+ pmd_clear(p);
+ }
+ }
+ kvmppc_pmd_free(pmd);
+}
+
+static void kvmppc_unmap_free_pud(struct kvm *kvm, pud_t *pud,
+ u64 lpid)
+{
+ unsigned long iu;
+ pud_t *p = pud;
+
+ for (iu = 0; iu < PTRS_PER_PUD; ++iu, ++p) {
+ if (!pud_present(*p))
+ continue;
+ if (pud_leaf(*p)) {
+ pud_clear(p);
+ } else {
+ pmd_t *pmd;
+
+ pmd = pmd_offset(p, 0);
+ kvmppc_unmap_free_pmd(kvm, pmd, true, lpid);
+ pud_clear(p);
+ }
+ }
+ pud_free(kvm->mm, pud);
+}
+
+void kvmppc_free_pgtable_radix(struct kvm *kvm, pgd_t *pgd, u64 lpid)
+{
+ unsigned long ig;
+
+ for (ig = 0; ig < PTRS_PER_PGD; ++ig, ++pgd) {
+ p4d_t *p4d = p4d_offset(pgd, 0);
+ pud_t *pud;
+
+ if (!p4d_present(*p4d))
+ continue;
+ pud = pud_offset(p4d, 0);
+ kvmppc_unmap_free_pud(kvm, pud, lpid);
+ p4d_clear(p4d);
+ }
+}
+
+void kvmppc_free_radix(struct kvm *kvm)
+{
+ if (kvm->arch.pgtable) {
+ kvmppc_free_pgtable_radix(kvm, kvm->arch.pgtable,
+ kvm->arch.lpid);
+ pgd_free(kvm->mm, kvm->arch.pgtable);
+ kvm->arch.pgtable = NULL;
+ }
+}
+
+static void kvmppc_unmap_free_pmd_entry_table(struct kvm *kvm, pmd_t *pmd,
+ unsigned long gpa, u64 lpid)
+{
+ pte_t *pte = pte_offset_kernel(pmd, 0);
+
+ /*
+ * Clearing the pmd entry then flushing the PWC ensures that the pte
+ * page no longer be cached by the MMU, so can be freed without
+ * flushing the PWC again.
+ */
+ pmd_clear(pmd);
+ kvmppc_radix_flush_pwc(kvm, lpid);
+
+ kvmppc_unmap_free_pte(kvm, pte, false, lpid);
+}
+
+static void kvmppc_unmap_free_pud_entry_table(struct kvm *kvm, pud_t *pud,
+ unsigned long gpa, u64 lpid)
+{
+ pmd_t *pmd = pmd_offset(pud, 0);
+
+ /*
+ * Clearing the pud entry then flushing the PWC ensures that the pmd
+ * page and any children pte pages will no longer be cached by the MMU,
+ * so can be freed without flushing the PWC again.
+ */
+ pud_clear(pud);
+ kvmppc_radix_flush_pwc(kvm, lpid);
+
+ kvmppc_unmap_free_pmd(kvm, pmd, false, lpid);
+}
+
+/*
+ * There are a number of bits which may differ between different faults to
+ * the same partition scope entry. RC bits, in the course of cleaning and
+ * aging. And the write bit can change, either the access could have been
+ * upgraded, or a read fault could happen concurrently with a write fault
+ * that sets those bits first.
+ */
+#define PTE_BITS_MUST_MATCH (~(_PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED))
+
+int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte,
+ unsigned long gpa, unsigned int level,
+ unsigned long mmu_seq, u64 lpid,
+ unsigned long *rmapp, struct rmap_nested **n_rmap)
+{
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud, *new_pud = NULL;
+ pmd_t *pmd, *new_pmd = NULL;
+ pte_t *ptep, *new_ptep = NULL;
+ int ret;
+
+ /* Traverse the guest's 2nd-level tree, allocate new levels needed */
+ pgd = pgtable + pgd_index(gpa);
+ p4d = p4d_offset(pgd, gpa);
+
+ pud = NULL;
+ if (p4d_present(*p4d))
+ pud = pud_offset(p4d, gpa);
+ else
+ new_pud = pud_alloc_one(kvm->mm, gpa);
+
+ pmd = NULL;
+ if (pud && pud_present(*pud) && !pud_leaf(*pud))
+ pmd = pmd_offset(pud, gpa);
+ else if (level <= 1)
+ new_pmd = kvmppc_pmd_alloc();
+
+ if (level == 0 && !(pmd && pmd_present(*pmd) && !pmd_leaf(*pmd)))
+ new_ptep = kvmppc_pte_alloc();
+
+ /* Check if we might have been invalidated; let the guest retry if so */
+ spin_lock(&kvm->mmu_lock);
+ ret = -EAGAIN;
+ if (mmu_invalidate_retry(kvm, mmu_seq))
+ goto out_unlock;
+
+ /* Now traverse again under the lock and change the tree */
+ ret = -ENOMEM;
+ if (p4d_none(*p4d)) {
+ if (!new_pud)
+ goto out_unlock;
+ p4d_populate(kvm->mm, p4d, new_pud);
+ new_pud = NULL;
+ }
+ pud = pud_offset(p4d, gpa);
+ if (pud_leaf(*pud)) {
+ unsigned long hgpa = gpa & PUD_MASK;
+
+ /* Check if we raced and someone else has set the same thing */
+ if (level == 2) {
+ if (pud_raw(*pud) == pte_raw(pte)) {
+ ret = 0;
+ goto out_unlock;
+ }
+ /* Valid 1GB page here already, add our extra bits */
+ WARN_ON_ONCE((pud_val(*pud) ^ pte_val(pte)) &
+ PTE_BITS_MUST_MATCH);
+ kvmppc_radix_update_pte(kvm, (pte_t *)pud,
+ 0, pte_val(pte), hgpa, PUD_SHIFT);
+ ret = 0;
+ goto out_unlock;
+ }
+ /*
+ * If we raced with another CPU which has just put
+ * a 1GB pte in after we saw a pmd page, try again.
+ */
+ if (!new_pmd) {
+ ret = -EAGAIN;
+ goto out_unlock;
+ }
+ /* Valid 1GB page here already, remove it */
+ kvmppc_unmap_pte(kvm, (pte_t *)pud, hgpa, PUD_SHIFT, NULL,
+ lpid);
+ }
+ if (level == 2) {
+ if (!pud_none(*pud)) {
+ /*
+ * There's a page table page here, but we wanted to
+ * install a large page, so remove and free the page
+ * table page.
+ */
+ kvmppc_unmap_free_pud_entry_table(kvm, pud, gpa, lpid);
+ }
+ kvmppc_radix_set_pte_at(kvm, gpa, (pte_t *)pud, pte);
+ if (rmapp && n_rmap)
+ kvmhv_insert_nest_rmap(kvm, rmapp, n_rmap);
+ ret = 0;
+ goto out_unlock;
+ }
+ if (pud_none(*pud)) {
+ if (!new_pmd)
+ goto out_unlock;
+ pud_populate(kvm->mm, pud, new_pmd);
+ new_pmd = NULL;
+ }
+ pmd = pmd_offset(pud, gpa);
+ if (pmd_leaf(*pmd)) {
+ unsigned long lgpa = gpa & PMD_MASK;
+
+ /* Check if we raced and someone else has set the same thing */
+ if (level == 1) {
+ if (pmd_raw(*pmd) == pte_raw(pte)) {
+ ret = 0;
+ goto out_unlock;
+ }
+ /* Valid 2MB page here already, add our extra bits */
+ WARN_ON_ONCE((pmd_val(*pmd) ^ pte_val(pte)) &
+ PTE_BITS_MUST_MATCH);
+ kvmppc_radix_update_pte(kvm, pmdp_ptep(pmd),
+ 0, pte_val(pte), lgpa, PMD_SHIFT);
+ ret = 0;
+ goto out_unlock;
+ }
+
+ /*
+ * If we raced with another CPU which has just put
+ * a 2MB pte in after we saw a pte page, try again.
+ */
+ if (!new_ptep) {
+ ret = -EAGAIN;
+ goto out_unlock;
+ }
+ /* Valid 2MB page here already, remove it */
+ kvmppc_unmap_pte(kvm, pmdp_ptep(pmd), lgpa, PMD_SHIFT, NULL,
+ lpid);
+ }
+ if (level == 1) {
+ if (!pmd_none(*pmd)) {
+ /*
+ * There's a page table page here, but we wanted to
+ * install a large page, so remove and free the page
+ * table page.
+ */
+ kvmppc_unmap_free_pmd_entry_table(kvm, pmd, gpa, lpid);
+ }
+ kvmppc_radix_set_pte_at(kvm, gpa, pmdp_ptep(pmd), pte);
+ if (rmapp && n_rmap)
+ kvmhv_insert_nest_rmap(kvm, rmapp, n_rmap);
+ ret = 0;
+ goto out_unlock;
+ }
+ if (pmd_none(*pmd)) {
+ if (!new_ptep)
+ goto out_unlock;
+ pmd_populate(kvm->mm, pmd, new_ptep);
+ new_ptep = NULL;
+ }
+ ptep = pte_offset_kernel(pmd, gpa);
+ if (pte_present(*ptep)) {
+ /* Check if someone else set the same thing */
+ if (pte_raw(*ptep) == pte_raw(pte)) {
+ ret = 0;
+ goto out_unlock;
+ }
+ /* Valid page here already, add our extra bits */
+ WARN_ON_ONCE((pte_val(*ptep) ^ pte_val(pte)) &
+ PTE_BITS_MUST_MATCH);
+ kvmppc_radix_update_pte(kvm, ptep, 0, pte_val(pte), gpa, 0);
+ ret = 0;
+ goto out_unlock;
+ }
+ kvmppc_radix_set_pte_at(kvm, gpa, ptep, pte);
+ if (rmapp && n_rmap)
+ kvmhv_insert_nest_rmap(kvm, rmapp, n_rmap);
+ ret = 0;
+
+ out_unlock:
+ spin_unlock(&kvm->mmu_lock);
+ if (new_pud)
+ pud_free(kvm->mm, new_pud);
+ if (new_pmd)
+ kvmppc_pmd_free(new_pmd);
+ if (new_ptep)
+ kvmppc_pte_free(new_ptep);
+ return ret;
+}
+
+bool kvmppc_hv_handle_set_rc(struct kvm *kvm, bool nested, bool writing,
+ unsigned long gpa, u64 lpid)
+{
+ unsigned long pgflags;
+ unsigned int shift;
+ pte_t *ptep;
+
+ /*
+ * Need to set an R or C bit in the 2nd-level tables;
+ * since we are just helping out the hardware here,
+ * it is sufficient to do what the hardware does.
+ */
+ pgflags = _PAGE_ACCESSED;
+ if (writing)
+ pgflags |= _PAGE_DIRTY;
+
+ if (nested)
+ ptep = find_kvm_nested_guest_pte(kvm, lpid, gpa, &shift);
+ else
+ ptep = find_kvm_secondary_pte(kvm, gpa, &shift);
+
+ if (ptep && pte_present(*ptep) && (!writing || pte_write(*ptep))) {
+ kvmppc_radix_update_pte(kvm, ptep, 0, pgflags, gpa, shift);
+ return true;
+ }
+ return false;
+}
+
+int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu,
+ unsigned long gpa,
+ struct kvm_memory_slot *memslot,
+ bool writing,
+ pte_t *inserted_pte, unsigned int *levelp)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct page *page = NULL;
+ unsigned long mmu_seq;
+ unsigned long hva, gfn = gpa >> PAGE_SHIFT;
+ bool upgrade_write = false;
+ pte_t pte, *ptep;
+ unsigned int shift, level;
+ int ret;
+ bool large_enable;
+ kvm_pfn_t pfn;
+
+ /* used to check for invalidations in progress */
+ mmu_seq = kvm->mmu_invalidate_seq;
+ smp_rmb();
+
+ hva = gfn_to_hva_memslot(memslot, gfn);
+ pfn = __kvm_faultin_pfn(memslot, gfn, writing ? FOLL_WRITE : 0,
+ &upgrade_write, &page);
+ if (is_error_noslot_pfn(pfn))
+ return -EFAULT;
+
+ /*
+ * Read the PTE from the process' radix tree and use that
+ * so we get the shift and attribute bits.
+ */
+ spin_lock(&kvm->mmu_lock);
+ ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &shift);
+ pte = __pte(0);
+ if (ptep)
+ pte = READ_ONCE(*ptep);
+ spin_unlock(&kvm->mmu_lock);
+ /*
+ * If the PTE disappeared temporarily due to a THP
+ * collapse, just return and let the guest try again.
+ */
+ if (!pte_present(pte)) {
+ if (page)
+ put_page(page);
+ return RESUME_GUEST;
+ }
+
+ /* If we're logging dirty pages, always map single pages */
+ large_enable = !(memslot->flags & KVM_MEM_LOG_DIRTY_PAGES);
+
+ /* Get pte level from shift/size */
+ if (large_enable && shift == PUD_SHIFT &&
+ (gpa & (PUD_SIZE - PAGE_SIZE)) ==
+ (hva & (PUD_SIZE - PAGE_SIZE))) {
+ level = 2;
+ } else if (large_enable && shift == PMD_SHIFT &&
+ (gpa & (PMD_SIZE - PAGE_SIZE)) ==
+ (hva & (PMD_SIZE - PAGE_SIZE))) {
+ level = 1;
+ } else {
+ level = 0;
+ if (shift > PAGE_SHIFT) {
+ /*
+ * If the pte maps more than one page, bring over
+ * bits from the virtual address to get the real
+ * address of the specific single page we want.
+ */
+ unsigned long rpnmask = (1ul << shift) - PAGE_SIZE;
+ pte = __pte(pte_val(pte) | (hva & rpnmask));
+ }
+ }
+
+ pte = __pte(pte_val(pte) | _PAGE_EXEC | _PAGE_ACCESSED);
+ if (writing || upgrade_write) {
+ if (pte_val(pte) & _PAGE_WRITE)
+ pte = __pte(pte_val(pte) | _PAGE_DIRTY);
+ } else {
+ pte = __pte(pte_val(pte) & ~(_PAGE_WRITE | _PAGE_DIRTY));
+ }
+
+ /* Allocate space in the tree and write the PTE */
+ ret = kvmppc_create_pte(kvm, kvm->arch.pgtable, pte, gpa, level,
+ mmu_seq, kvm->arch.lpid, NULL, NULL);
+ if (inserted_pte)
+ *inserted_pte = pte;
+ if (levelp)
+ *levelp = level;
+
+ if (page) {
+ if (!ret && (pte_val(pte) & _PAGE_WRITE))
+ set_page_dirty_lock(page);
+ put_page(page);
+ }
+
+ /* Increment number of large pages if we (successfully) inserted one */
+ if (!ret) {
+ if (level == 1)
+ kvm->stat.num_2M_pages++;
+ else if (level == 2)
+ kvm->stat.num_1G_pages++;
+ }
+
+ return ret;
+}
+
+int kvmppc_book3s_radix_page_fault(struct kvm_vcpu *vcpu,
+ unsigned long ea, unsigned long dsisr)
+{
+ struct kvm *kvm = vcpu->kvm;
+ unsigned long gpa, gfn;
+ struct kvm_memory_slot *memslot;
+ long ret;
+ bool writing = !!(dsisr & DSISR_ISSTORE);
+
+ /* Check for unusual errors */
+ if (dsisr & DSISR_UNSUPP_MMU) {
+ pr_err("KVM: Got unsupported MMU fault\n");
+ return -EFAULT;
+ }
+ if (dsisr & DSISR_BADACCESS) {
+ /* Reflect to the guest as DSI */
+ pr_err("KVM: Got radix HV page fault with DSISR=%lx\n", dsisr);
+ kvmppc_core_queue_data_storage(vcpu,
+ kvmppc_get_msr(vcpu) & SRR1_PREFIXED,
+ ea, dsisr);
+ return RESUME_GUEST;
+ }
+
+ /* Translate the logical address */
+ gpa = vcpu->arch.fault_gpa & ~0xfffUL;
+ gpa &= ~0xF000000000000000ul;
+ gfn = gpa >> PAGE_SHIFT;
+ if (!(dsisr & DSISR_PRTABLE_FAULT))
+ gpa |= ea & 0xfff;
+
+ if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
+ return kvmppc_send_page_to_uv(kvm, gfn);
+
+ /* Get the corresponding memslot */
+ memslot = gfn_to_memslot(kvm, gfn);
+
+ /* No memslot means it's an emulated MMIO region */
+ if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) {
+ if (dsisr & (DSISR_PRTABLE_FAULT | DSISR_BADACCESS |
+ DSISR_SET_RC)) {
+ /*
+ * Bad address in guest page table tree, or other
+ * unusual error - reflect it to the guest as DSI.
+ */
+ kvmppc_core_queue_data_storage(vcpu,
+ kvmppc_get_msr(vcpu) & SRR1_PREFIXED,
+ ea, dsisr);
+ return RESUME_GUEST;
+ }
+ return kvmppc_hv_emulate_mmio(vcpu, gpa, ea, writing);
+ }
+
+ if (memslot->flags & KVM_MEM_READONLY) {
+ if (writing) {
+ /* give the guest a DSI */
+ kvmppc_core_queue_data_storage(vcpu,
+ kvmppc_get_msr(vcpu) & SRR1_PREFIXED,
+ ea, DSISR_ISSTORE | DSISR_PROTFAULT);
+ return RESUME_GUEST;
+ }
+ }
+
+ /* Failed to set the reference/change bits */
+ if (dsisr & DSISR_SET_RC) {
+ spin_lock(&kvm->mmu_lock);
+ if (kvmppc_hv_handle_set_rc(kvm, false, writing,
+ gpa, kvm->arch.lpid))
+ dsisr &= ~DSISR_SET_RC;
+ spin_unlock(&kvm->mmu_lock);
+
+ if (!(dsisr & (DSISR_BAD_FAULT_64S | DSISR_NOHPTE |
+ DSISR_PROTFAULT | DSISR_SET_RC)))
+ return RESUME_GUEST;
+ }
+
+ /* Try to insert a pte */
+ ret = kvmppc_book3s_instantiate_page(vcpu, gpa, memslot, writing,
+ NULL, NULL);
+
+ if (ret == 0 || ret == -EAGAIN)
+ ret = RESUME_GUEST;
+ return ret;
+}
+
+/* Called with kvm->mmu_lock held */
+void kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
+ unsigned long gfn)
+{
+ pte_t *ptep;
+ unsigned long gpa = gfn << PAGE_SHIFT;
+ unsigned int shift;
+
+ if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) {
+ uv_page_inval(kvm->arch.lpid, gpa, PAGE_SHIFT);
+ return;
+ }
+
+ ptep = find_kvm_secondary_pte(kvm, gpa, &shift);
+ if (ptep && pte_present(*ptep))
+ kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot,
+ kvm->arch.lpid);
+}
+
+/* Called with kvm->mmu_lock held */
+bool kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
+ unsigned long gfn)
+{
+ pte_t *ptep;
+ unsigned long gpa = gfn << PAGE_SHIFT;
+ unsigned int shift;
+ bool ref = false;
+ unsigned long old, *rmapp;
+
+ if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
+ return ref;
+
+ ptep = find_kvm_secondary_pte(kvm, gpa, &shift);
+ if (ptep && pte_present(*ptep) && pte_young(*ptep)) {
+ old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_ACCESSED, 0,
+ gpa, shift);
+ /* XXX need to flush tlb here? */
+ /* Also clear bit in ptes in shadow pgtable for nested guests */
+ rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
+ kvmhv_update_nest_rmap_rc_list(kvm, rmapp, _PAGE_ACCESSED, 0,
+ old & PTE_RPN_MASK,
+ 1UL << shift);
+ ref = true;
+ }
+ return ref;
+}
+
+/* Called with kvm->mmu_lock held */
+bool kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
+ unsigned long gfn)
+
+{
+ pte_t *ptep;
+ unsigned long gpa = gfn << PAGE_SHIFT;
+ unsigned int shift;
+ bool ref = false;
+
+ if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
+ return ref;
+
+ ptep = find_kvm_secondary_pte(kvm, gpa, &shift);
+ if (ptep && pte_present(*ptep) && pte_young(*ptep))
+ ref = true;
+ return ref;
+}
+
+/* Returns the number of PAGE_SIZE pages that are dirty */
+static int kvm_radix_test_clear_dirty(struct kvm *kvm,
+ struct kvm_memory_slot *memslot, int pagenum)
+{
+ unsigned long gfn = memslot->base_gfn + pagenum;
+ unsigned long gpa = gfn << PAGE_SHIFT;
+ pte_t *ptep, pte;
+ unsigned int shift;
+ int ret = 0;
+ unsigned long old, *rmapp;
+
+ if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
+ return ret;
+
+ /*
+ * For performance reasons we don't hold kvm->mmu_lock while walking the
+ * partition scoped table.
+ */
+ ptep = find_kvm_secondary_pte_unlocked(kvm, gpa, &shift);
+ if (!ptep)
+ return 0;
+
+ pte = READ_ONCE(*ptep);
+ if (pte_present(pte) && pte_dirty(pte)) {
+ spin_lock(&kvm->mmu_lock);
+ /*
+ * Recheck the pte again
+ */
+ if (pte_val(pte) != pte_val(*ptep)) {
+ /*
+ * We have KVM_MEM_LOG_DIRTY_PAGES enabled. Hence we can
+ * only find PAGE_SIZE pte entries here. We can continue
+ * to use the pte addr returned by above page table
+ * walk.
+ */
+ if (!pte_present(*ptep) || !pte_dirty(*ptep)) {
+ spin_unlock(&kvm->mmu_lock);
+ return 0;
+ }
+ }
+
+ ret = 1;
+ VM_BUG_ON(shift);
+ old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_DIRTY, 0,
+ gpa, shift);
+ kvmppc_radix_tlbie_page(kvm, gpa, shift, kvm->arch.lpid);
+ /* Also clear bit in ptes in shadow pgtable for nested guests */
+ rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
+ kvmhv_update_nest_rmap_rc_list(kvm, rmapp, _PAGE_DIRTY, 0,
+ old & PTE_RPN_MASK,
+ 1UL << shift);
+ spin_unlock(&kvm->mmu_lock);
+ }
+ return ret;
+}
+
+long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm,
+ struct kvm_memory_slot *memslot, unsigned long *map)
+{
+ unsigned long i, j;
+ int npages;
+
+ for (i = 0; i < memslot->npages; i = j) {
+ npages = kvm_radix_test_clear_dirty(kvm, memslot, i);
+
+ /*
+ * Note that if npages > 0 then i must be a multiple of npages,
+ * since huge pages are only used to back the guest at guest
+ * real addresses that are a multiple of their size.
+ * Since we have at most one PTE covering any given guest
+ * real address, if npages > 1 we can skip to i + npages.
+ */
+ j = i + 1;
+ if (npages) {
+ set_dirty_bits(map, i, npages);
+ j = i + npages;
+ }
+ }
+ return 0;
+}
+
+void kvmppc_radix_flush_memslot(struct kvm *kvm,
+ const struct kvm_memory_slot *memslot)
+{
+ unsigned long n;
+ pte_t *ptep;
+ unsigned long gpa;
+ unsigned int shift;
+
+ if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START)
+ kvmppc_uvmem_drop_pages(memslot, kvm, true);
+
+ if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
+ return;
+
+ gpa = memslot->base_gfn << PAGE_SHIFT;
+ spin_lock(&kvm->mmu_lock);
+ for (n = memslot->npages; n; --n) {
+ ptep = find_kvm_secondary_pte(kvm, gpa, &shift);
+ if (ptep && pte_present(*ptep))
+ kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot,
+ kvm->arch.lpid);
+ gpa += PAGE_SIZE;
+ }
+ /*
+ * Increase the mmu notifier sequence number to prevent any page
+ * fault that read the memslot earlier from writing a PTE.
+ */
+ kvm->mmu_invalidate_seq++;
+ spin_unlock(&kvm->mmu_lock);
+}
+
+static void add_rmmu_ap_encoding(struct kvm_ppc_rmmu_info *info,
+ int psize, int *indexp)
+{
+ if (!mmu_psize_defs[psize].shift)
+ return;
+ info->ap_encodings[*indexp] = mmu_psize_defs[psize].shift |
+ (mmu_psize_defs[psize].ap << 29);
+ ++(*indexp);
+}
+
+int kvmhv_get_rmmu_info(struct kvm *kvm, struct kvm_ppc_rmmu_info *info)
+{
+ int i;
+
+ if (!radix_enabled())
+ return -EINVAL;
+ memset(info, 0, sizeof(*info));
+
+ /* 4k page size */
+ info->geometries[0].page_shift = 12;
+ info->geometries[0].level_bits[0] = 9;
+ for (i = 1; i < 4; ++i)
+ info->geometries[0].level_bits[i] = p9_supported_radix_bits[i];
+ /* 64k page size */
+ info->geometries[1].page_shift = 16;
+ for (i = 0; i < 4; ++i)
+ info->geometries[1].level_bits[i] = p9_supported_radix_bits[i];
+
+ i = 0;
+ add_rmmu_ap_encoding(info, MMU_PAGE_4K, &i);
+ add_rmmu_ap_encoding(info, MMU_PAGE_64K, &i);
+ add_rmmu_ap_encoding(info, MMU_PAGE_2M, &i);
+ add_rmmu_ap_encoding(info, MMU_PAGE_1G, &i);
+
+ return 0;
+}
+
+int kvmppc_init_vm_radix(struct kvm *kvm)
+{
+ kvm->arch.pgtable = pgd_alloc(kvm->mm);
+ if (!kvm->arch.pgtable)
+ return -ENOMEM;
+ return 0;
+}
+
+static void pte_ctor(void *addr)
+{
+ memset(addr, 0, RADIX_PTE_TABLE_SIZE);
+}
+
+static void pmd_ctor(void *addr)
+{
+ memset(addr, 0, RADIX_PMD_TABLE_SIZE);
+}
+
+struct debugfs_radix_state {
+ struct kvm *kvm;
+ struct mutex mutex;
+ unsigned long gpa;
+ int lpid;
+ int chars_left;
+ int buf_index;
+ char buf[128];
+ u8 hdr;
+};
+
+static int debugfs_radix_open(struct inode *inode, struct file *file)
+{
+ struct kvm *kvm = inode->i_private;
+ struct debugfs_radix_state *p;
+
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+
+ kvm_get_kvm(kvm);
+ p->kvm = kvm;
+ mutex_init(&p->mutex);
+ file->private_data = p;
+
+ return nonseekable_open(inode, file);
+}
+
+static int debugfs_radix_release(struct inode *inode, struct file *file)
+{
+ struct debugfs_radix_state *p = file->private_data;
+
+ kvm_put_kvm(p->kvm);
+ kfree(p);
+ return 0;
+}
+
+static ssize_t debugfs_radix_read(struct file *file, char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ struct debugfs_radix_state *p = file->private_data;
+ ssize_t ret, r;
+ unsigned long n;
+ struct kvm *kvm;
+ unsigned long gpa;
+ pgd_t *pgt;
+ struct kvm_nested_guest *nested;
+ pgd_t *pgdp;
+ p4d_t p4d, *p4dp;
+ pud_t pud, *pudp;
+ pmd_t pmd, *pmdp;
+ pte_t *ptep;
+ int shift;
+ unsigned long pte;
+
+ kvm = p->kvm;
+ if (!kvm_is_radix(kvm))
+ return 0;
+
+ ret = mutex_lock_interruptible(&p->mutex);
+ if (ret)
+ return ret;
+
+ if (p->chars_left) {
+ n = p->chars_left;
+ if (n > len)
+ n = len;
+ r = copy_to_user(buf, p->buf + p->buf_index, n);
+ n -= r;
+ p->chars_left -= n;
+ p->buf_index += n;
+ buf += n;
+ len -= n;
+ ret = n;
+ if (r) {
+ if (!n)
+ ret = -EFAULT;
+ goto out;
+ }
+ }
+
+ gpa = p->gpa;
+ nested = NULL;
+ pgt = NULL;
+ while (len != 0 && p->lpid >= 0) {
+ if (gpa >= RADIX_PGTABLE_RANGE) {
+ gpa = 0;
+ pgt = NULL;
+ if (nested) {
+ kvmhv_put_nested(nested);
+ nested = NULL;
+ }
+ p->lpid = kvmhv_nested_next_lpid(kvm, p->lpid);
+ p->hdr = 0;
+ if (p->lpid < 0)
+ break;
+ }
+ if (!pgt) {
+ if (p->lpid == 0) {
+ pgt = kvm->arch.pgtable;
+ } else {
+ nested = kvmhv_get_nested(kvm, p->lpid, false);
+ if (!nested) {
+ gpa = RADIX_PGTABLE_RANGE;
+ continue;
+ }
+ pgt = nested->shadow_pgtable;
+ }
+ }
+ n = 0;
+ if (!p->hdr) {
+ if (p->lpid > 0)
+ n = scnprintf(p->buf, sizeof(p->buf),
+ "\nNested LPID %d: ", p->lpid);
+ n += scnprintf(p->buf + n, sizeof(p->buf) - n,
+ "pgdir: %lx\n", (unsigned long)pgt);
+ p->hdr = 1;
+ goto copy;
+ }
+
+ pgdp = pgt + pgd_index(gpa);
+ p4dp = p4d_offset(pgdp, gpa);
+ p4d = READ_ONCE(*p4dp);
+ if (!(p4d_val(p4d) & _PAGE_PRESENT)) {
+ gpa = (gpa & P4D_MASK) + P4D_SIZE;
+ continue;
+ }
+
+ pudp = pud_offset(&p4d, gpa);
+ pud = READ_ONCE(*pudp);
+ if (!(pud_val(pud) & _PAGE_PRESENT)) {
+ gpa = (gpa & PUD_MASK) + PUD_SIZE;
+ continue;
+ }
+ if (pud_val(pud) & _PAGE_PTE) {
+ pte = pud_val(pud);
+ shift = PUD_SHIFT;
+ goto leaf;
+ }
+
+ pmdp = pmd_offset(&pud, gpa);
+ pmd = READ_ONCE(*pmdp);
+ if (!(pmd_val(pmd) & _PAGE_PRESENT)) {
+ gpa = (gpa & PMD_MASK) + PMD_SIZE;
+ continue;
+ }
+ if (pmd_val(pmd) & _PAGE_PTE) {
+ pte = pmd_val(pmd);
+ shift = PMD_SHIFT;
+ goto leaf;
+ }
+
+ ptep = pte_offset_kernel(&pmd, gpa);
+ pte = pte_val(READ_ONCE(*ptep));
+ if (!(pte & _PAGE_PRESENT)) {
+ gpa += PAGE_SIZE;
+ continue;
+ }
+ shift = PAGE_SHIFT;
+ leaf:
+ n = scnprintf(p->buf, sizeof(p->buf),
+ " %lx: %lx %d\n", gpa, pte, shift);
+ gpa += 1ul << shift;
+ copy:
+ p->chars_left = n;
+ if (n > len)
+ n = len;
+ r = copy_to_user(buf, p->buf, n);
+ n -= r;
+ p->chars_left -= n;
+ p->buf_index = n;
+ buf += n;
+ len -= n;
+ ret += n;
+ if (r) {
+ if (!ret)
+ ret = -EFAULT;
+ break;
+ }
+ }
+ p->gpa = gpa;
+ if (nested)
+ kvmhv_put_nested(nested);
+
+ out:
+ mutex_unlock(&p->mutex);
+ return ret;
+}
+
+static ssize_t debugfs_radix_write(struct file *file, const char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ return -EACCES;
+}
+
+static const struct file_operations debugfs_radix_fops = {
+ .owner = THIS_MODULE,
+ .open = debugfs_radix_open,
+ .release = debugfs_radix_release,
+ .read = debugfs_radix_read,
+ .write = debugfs_radix_write,
+ .llseek = generic_file_llseek,
+};
+
+void kvmhv_radix_debugfs_init(struct kvm *kvm)
+{
+ debugfs_create_file("radix", 0400, kvm->debugfs_dentry, kvm,
+ &debugfs_radix_fops);
+}
+
+int kvmppc_radix_init(void)
+{
+ unsigned long size = sizeof(void *) << RADIX_PTE_INDEX_SIZE;
+
+ kvm_pte_cache = kmem_cache_create("kvm-pte", size, size, 0, pte_ctor);
+ if (!kvm_pte_cache)
+ return -ENOMEM;
+
+ size = sizeof(void *) << RADIX_PMD_INDEX_SIZE;
+
+ kvm_pmd_cache = kmem_cache_create("kvm-pmd", size, size, 0, pmd_ctor);
+ if (!kvm_pmd_cache) {
+ kmem_cache_destroy(kvm_pte_cache);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+void kvmppc_radix_exit(void)
+{
+ kmem_cache_destroy(kvm_pte_cache);
+ kmem_cache_destroy(kvm_pmd_cache);
+}
diff --git a/arch/powerpc/kvm/book3s_64_slb.S b/arch/powerpc/kvm/book3s_64_slb.S
index 3589c4e3d49b..4d958dd21e59 100644
--- a/arch/powerpc/kvm/book3s_64_slb.S
+++ b/arch/powerpc/kvm/book3s_64_slb.S
@@ -1,22 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright SUSE Linux Products GmbH 2009
*
* Authors: Alexander Graf <agraf@suse.de>
*/
+#include <asm/asm-compat.h>
+#include <asm/feature-fixups.h>
+
#define SHADOW_SLB_ENTRY_LEN 0x10
#define OFFSET_ESID(x) (SHADOW_SLB_ENTRY_LEN * x)
#define OFFSET_VSID(x) ((SHADOW_SLB_ENTRY_LEN * x) + 8)
@@ -113,7 +105,7 @@ slb_do_enter:
/* Remove all SLB entries that are in use. */
- li r0, r0
+ li r0, 0
slbmte r0, r0
slbia
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 54cf9bc94dad..742aa58a7c7e 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -1,19 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
* Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com>
+ * Copyright 2016 Alexey Kardashevskiy, IBM Corporation <aik@au1.ibm.com>
*/
#include <linux/types.h>
@@ -23,52 +13,231 @@
#include <linux/highmem.h>
#include <linux/gfp.h>
#include <linux/slab.h>
+#include <linux/sched/signal.h>
#include <linux/hugetlb.h>
#include <linux/list.h>
#include <linux/anon_inodes.h>
+#include <linux/iommu.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/rcupdate_wait.h>
-#include <asm/tlbflush.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
-#include <asm/mmu-hash64.h>
+#include <asm/book3s/64/mmu-hash.h>
#include <asm/hvcall.h>
#include <asm/synch.h>
#include <asm/ppc-opcode.h>
-#include <asm/kvm_host.h>
#include <asm/udbg.h>
+#include <asm/iommu.h>
+#include <asm/tce.h>
+#include <asm/mmu_context.h>
+
+static struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm *kvm,
+ unsigned long liobn)
+{
+ struct kvmppc_spapr_tce_table *stt;
-#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
+ list_for_each_entry_lockless(stt, &kvm->arch.spapr_tce_tables, list)
+ if (stt->liobn == liobn)
+ return stt;
-static long kvmppc_stt_npages(unsigned long window_size)
+ return NULL;
+}
+
+static unsigned long kvmppc_tce_pages(unsigned long iommu_pages)
{
- return ALIGN((window_size >> SPAPR_TCE_SHIFT)
- * sizeof(u64), PAGE_SIZE) / PAGE_SIZE;
+ return ALIGN(iommu_pages * sizeof(u64), PAGE_SIZE) / PAGE_SIZE;
}
-static void release_spapr_tce_table(struct kvmppc_spapr_tce_table *stt)
+static unsigned long kvmppc_stt_pages(unsigned long tce_pages)
+{
+ unsigned long stt_bytes = sizeof(struct kvmppc_spapr_tce_table) +
+ (tce_pages * sizeof(struct page *));
+
+ return tce_pages + ALIGN(stt_bytes, PAGE_SIZE) / PAGE_SIZE;
+}
+
+static void kvm_spapr_tce_iommu_table_free(struct rcu_head *head)
+{
+ struct kvmppc_spapr_tce_iommu_table *stit = container_of(head,
+ struct kvmppc_spapr_tce_iommu_table, rcu);
+
+ iommu_tce_table_put(stit->tbl);
+
+ kfree(stit);
+}
+
+static void kvm_spapr_tce_liobn_put(struct kref *kref)
+{
+ struct kvmppc_spapr_tce_iommu_table *stit = container_of(kref,
+ struct kvmppc_spapr_tce_iommu_table, kref);
+
+ list_del_rcu(&stit->next);
+
+ call_rcu(&stit->rcu, kvm_spapr_tce_iommu_table_free);
+}
+
+void kvm_spapr_tce_release_iommu_group(struct kvm *kvm,
+ struct iommu_group *grp)
{
- struct kvm *kvm = stt->kvm;
int i;
+ struct kvmppc_spapr_tce_table *stt;
+ struct kvmppc_spapr_tce_iommu_table *stit, *tmp;
+ struct iommu_table_group *table_group = NULL;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) {
+
+ table_group = iommu_group_get_iommudata(grp);
+ if (WARN_ON(!table_group))
+ continue;
+
+ list_for_each_entry_safe(stit, tmp, &stt->iommu_tables, next) {
+ for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
+ if (table_group->tables[i] != stit->tbl)
+ continue;
+
+ kref_put(&stit->kref, kvm_spapr_tce_liobn_put);
+ }
+ }
+ cond_resched_rcu();
+ }
+ rcu_read_unlock();
+}
+
+long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
+ struct iommu_group *grp)
+{
+ struct kvmppc_spapr_tce_table *stt = NULL;
+ bool found = false;
+ struct iommu_table *tbl = NULL;
+ struct iommu_table_group *table_group;
+ long i;
+ struct kvmppc_spapr_tce_iommu_table *stit;
+ CLASS(fd, f)(tablefd);
+
+ if (fd_empty(f))
+ return -EBADF;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) {
+ if (stt == fd_file(f)->private_data) {
+ found = true;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ if (!found)
+ return -EINVAL;
+
+ table_group = iommu_group_get_iommudata(grp);
+ if (WARN_ON(!table_group))
+ return -EFAULT;
+
+ for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
+ struct iommu_table *tbltmp = table_group->tables[i];
+
+ if (!tbltmp)
+ continue;
+ /* Make sure hardware table parameters are compatible */
+ if ((tbltmp->it_page_shift <= stt->page_shift) &&
+ (tbltmp->it_offset << tbltmp->it_page_shift ==
+ stt->offset << stt->page_shift) &&
+ (tbltmp->it_size << tbltmp->it_page_shift >=
+ stt->size << stt->page_shift)) {
+ /*
+ * Reference the table to avoid races with
+ * add/remove DMA windows.
+ */
+ tbl = iommu_tce_table_get(tbltmp);
+ break;
+ }
+ }
+ if (!tbl)
+ return -EINVAL;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(stit, &stt->iommu_tables, next) {
+ if (tbl != stit->tbl)
+ continue;
+
+ if (!kref_get_unless_zero(&stit->kref)) {
+ /* stit is being destroyed */
+ iommu_tce_table_put(tbl);
+ rcu_read_unlock();
+ return -ENOTTY;
+ }
+ /*
+ * The table is already known to this KVM, we just increased
+ * its KVM reference counter and can return.
+ */
+ rcu_read_unlock();
+ return 0;
+ }
+ rcu_read_unlock();
+
+ stit = kzalloc(sizeof(*stit), GFP_KERNEL);
+ if (!stit) {
+ iommu_tce_table_put(tbl);
+ return -ENOMEM;
+ }
+
+ stit->tbl = tbl;
+ kref_init(&stit->kref);
+
+ list_add_rcu(&stit->next, &stt->iommu_tables);
+
+ return 0;
+}
+
+static void release_spapr_tce_table(struct rcu_head *head)
+{
+ struct kvmppc_spapr_tce_table *stt = container_of(head,
+ struct kvmppc_spapr_tce_table, rcu);
+ unsigned long i, npages = kvmppc_tce_pages(stt->size);
+
+ for (i = 0; i < npages; i++)
+ if (stt->pages[i])
+ __free_page(stt->pages[i]);
- mutex_lock(&kvm->lock);
- list_del(&stt->list);
- for (i = 0; i < kvmppc_stt_npages(stt->window_size); i++)
- __free_page(stt->pages[i]);
kfree(stt);
- mutex_unlock(&kvm->lock);
+}
+
+static struct page *kvm_spapr_get_tce_page(struct kvmppc_spapr_tce_table *stt,
+ unsigned long sttpage)
+{
+ struct page *page = stt->pages[sttpage];
+
+ if (page)
+ return page;
- kvm_put_kvm(kvm);
+ mutex_lock(&stt->alloc_lock);
+ page = stt->pages[sttpage];
+ if (!page) {
+ page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ WARN_ON_ONCE(!page);
+ if (page)
+ stt->pages[sttpage] = page;
+ }
+ mutex_unlock(&stt->alloc_lock);
+
+ return page;
}
-static int kvm_spapr_tce_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static vm_fault_t kvm_spapr_tce_fault(struct vm_fault *vmf)
{
- struct kvmppc_spapr_tce_table *stt = vma->vm_file->private_data;
+ struct kvmppc_spapr_tce_table *stt = vmf->vma->vm_file->private_data;
struct page *page;
- if (vmf->pgoff >= kvmppc_stt_npages(stt->window_size))
+ if (vmf->pgoff >= kvmppc_tce_pages(stt->size))
return VM_FAULT_SIGBUS;
- page = stt->pages[vmf->pgoff];
+ page = kvm_spapr_get_tce_page(stt, vmf->pgoff);
+ if (!page)
+ return VM_FAULT_OOM;
+
get_page(page);
vmf->page = page;
return 0;
@@ -87,8 +256,28 @@ static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma)
static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
{
struct kvmppc_spapr_tce_table *stt = filp->private_data;
+ struct kvmppc_spapr_tce_iommu_table *stit, *tmp;
+ struct kvm *kvm = stt->kvm;
+
+ mutex_lock(&kvm->lock);
+ list_del_rcu(&stt->list);
+ mutex_unlock(&kvm->lock);
+
+ list_for_each_entry_safe(stit, tmp, &stt->iommu_tables, next) {
+ WARN_ON(!kref_read(&stit->kref));
+ while (1) {
+ if (kref_put(&stit->kref, kvm_spapr_tce_liobn_put))
+ break;
+ }
+ }
+
+ account_locked_vm(kvm->mm,
+ kvmppc_stt_pages(kvmppc_tce_pages(stt->size)), false);
+
+ kvm_put_kvm(stt->kvm);
+
+ call_rcu(&stt->rcu, release_spapr_tce_table);
- release_spapr_tce_table(stt);
return 0;
}
@@ -97,54 +286,511 @@ static const struct file_operations kvm_spapr_tce_fops = {
.release = kvm_spapr_tce_release,
};
-long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
- struct kvm_create_spapr_tce *args)
+int kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
+ struct kvm_create_spapr_tce_64 *args)
{
struct kvmppc_spapr_tce_table *stt = NULL;
- long npages;
- int ret = -ENOMEM;
- int i;
+ struct kvmppc_spapr_tce_table *siter;
+ struct mm_struct *mm = kvm->mm;
+ unsigned long npages;
+ int ret;
- /* Check this LIOBN hasn't been previously allocated */
- list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
- if (stt->liobn == args->liobn)
- return -EBUSY;
- }
+ if (!args->size || args->page_shift < 12 || args->page_shift > 34 ||
+ (args->offset + args->size > (ULLONG_MAX >> args->page_shift)))
+ return -EINVAL;
- npages = kvmppc_stt_npages(args->window_size);
+ npages = kvmppc_tce_pages(args->size);
+ ret = account_locked_vm(mm, kvmppc_stt_pages(npages), true);
+ if (ret)
+ return ret;
- stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *),
- GFP_KERNEL);
+ ret = -ENOMEM;
+ stt = kzalloc(struct_size(stt, pages, npages), GFP_KERNEL | __GFP_NOWARN);
if (!stt)
- goto fail;
+ goto fail_acct;
stt->liobn = args->liobn;
- stt->window_size = args->window_size;
+ stt->page_shift = args->page_shift;
+ stt->offset = args->offset;
+ stt->size = args->size;
stt->kvm = kvm;
+ mutex_init(&stt->alloc_lock);
+ INIT_LIST_HEAD_RCU(&stt->iommu_tables);
+
+ mutex_lock(&kvm->lock);
- for (i = 0; i < npages; i++) {
- stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
- if (!stt->pages[i])
- goto fail;
+ /* Check this LIOBN hasn't been previously allocated */
+ ret = 0;
+ list_for_each_entry(siter, &kvm->arch.spapr_tce_tables, list) {
+ if (siter->liobn == args->liobn) {
+ ret = -EBUSY;
+ break;
+ }
}
kvm_get_kvm(kvm);
+ if (!ret)
+ ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
+ stt, O_RDWR | O_CLOEXEC);
- mutex_lock(&kvm->lock);
- list_add(&stt->list, &kvm->arch.spapr_tce_tables);
+ if (ret >= 0)
+ list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
+ else
+ kvm_put_kvm_no_destroy(kvm);
mutex_unlock(&kvm->lock);
- return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
- stt, O_RDWR | O_CLOEXEC);
+ if (ret >= 0)
+ return ret;
+
+ kfree(stt);
+ fail_acct:
+ account_locked_vm(mm, kvmppc_stt_pages(npages), false);
+ return ret;
+}
+
+static long kvmppc_tce_to_ua(struct kvm *kvm, unsigned long tce,
+ unsigned long *ua)
+{
+ unsigned long gfn = tce >> PAGE_SHIFT;
+ struct kvm_memory_slot *memslot;
+
+ memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
+ if (!memslot)
+ return -EINVAL;
+
+ *ua = __gfn_to_hva_memslot(memslot, gfn) |
+ (tce & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE));
+
+ return 0;
+}
+
+static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt,
+ unsigned long tce)
+{
+ unsigned long gpa = tce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
+ enum dma_data_direction dir = iommu_tce_direction(tce);
+ struct kvmppc_spapr_tce_iommu_table *stit;
+ unsigned long ua = 0;
+
+ /* Allow userspace to poison TCE table */
+ if (dir == DMA_NONE)
+ return H_SUCCESS;
+
+ if (iommu_tce_check_gpa(stt->page_shift, gpa))
+ return H_TOO_HARD;
+
+ if (kvmppc_tce_to_ua(stt->kvm, tce, &ua))
+ return H_TOO_HARD;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(stit, &stt->iommu_tables, next) {
+ unsigned long hpa = 0;
+ struct mm_iommu_table_group_mem_t *mem;
+ long shift = stit->tbl->it_page_shift;
+
+ mem = mm_iommu_lookup(stt->kvm->mm, ua, 1ULL << shift);
+ if (!mem || mm_iommu_ua_to_hpa(mem, ua, shift, &hpa)) {
+ rcu_read_unlock();
+ return H_TOO_HARD;
+ }
+ }
+ rcu_read_unlock();
+
+ return H_SUCCESS;
+}
+
+/*
+ * Handles TCE requests for emulated devices.
+ * Puts guest TCE values to the table and expects user space to convert them.
+ * Cannot fail so kvmppc_tce_validate must be called before it.
+ */
+static void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
+ unsigned long idx, unsigned long tce)
+{
+ struct page *page;
+ u64 *tbl;
+ unsigned long sttpage;
+
+ idx -= stt->offset;
+ sttpage = idx / TCES_PER_PAGE;
+ page = stt->pages[sttpage];
+
+ if (!page) {
+ /* We allow any TCE, not just with read|write permissions */
+ if (!tce)
+ return;
+
+ page = kvm_spapr_get_tce_page(stt, sttpage);
+ if (!page)
+ return;
+ }
+ tbl = page_to_virt(page);
-fail:
- if (stt) {
- for (i = 0; i < npages; i++)
- if (stt->pages[i])
- __free_page(stt->pages[i]);
+ tbl[idx % TCES_PER_PAGE] = tce;
+}
+
+static void kvmppc_clear_tce(struct mm_struct *mm, struct kvmppc_spapr_tce_table *stt,
+ struct iommu_table *tbl, unsigned long entry)
+{
+ unsigned long i;
+ unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift);
+ unsigned long io_entry = entry << (stt->page_shift - tbl->it_page_shift);
+
+ for (i = 0; i < subpages; ++i) {
+ unsigned long hpa = 0;
+ enum dma_data_direction dir = DMA_NONE;
- kfree(stt);
+ iommu_tce_xchg_no_kill(mm, tbl, io_entry + i, &hpa, &dir);
}
+}
+
+static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm,
+ struct iommu_table *tbl, unsigned long entry)
+{
+ struct mm_iommu_table_group_mem_t *mem = NULL;
+ const unsigned long pgsize = 1ULL << tbl->it_page_shift;
+ __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY_RO(tbl, entry);
+
+ if (!pua)
+ return H_SUCCESS;
+
+ mem = mm_iommu_lookup(kvm->mm, be64_to_cpu(*pua), pgsize);
+ if (!mem)
+ return H_TOO_HARD;
+
+ mm_iommu_mapped_dec(mem);
+
+ *pua = cpu_to_be64(0);
+
+ return H_SUCCESS;
+}
+
+static long kvmppc_tce_iommu_do_unmap(struct kvm *kvm,
+ struct iommu_table *tbl, unsigned long entry)
+{
+ enum dma_data_direction dir = DMA_NONE;
+ unsigned long hpa = 0;
+ long ret;
+
+ if (WARN_ON_ONCE(iommu_tce_xchg_no_kill(kvm->mm, tbl, entry, &hpa,
+ &dir)))
+ return H_TOO_HARD;
+
+ if (dir == DMA_NONE)
+ return H_SUCCESS;
+
+ ret = kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry);
+ if (ret != H_SUCCESS)
+ iommu_tce_xchg_no_kill(kvm->mm, tbl, entry, &hpa, &dir);
+
+ return ret;
+}
+
+static long kvmppc_tce_iommu_unmap(struct kvm *kvm,
+ struct kvmppc_spapr_tce_table *stt, struct iommu_table *tbl,
+ unsigned long entry)
+{
+ unsigned long i, ret = H_SUCCESS;
+ unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift);
+ unsigned long io_entry = entry * subpages;
+
+ for (i = 0; i < subpages; ++i) {
+ ret = kvmppc_tce_iommu_do_unmap(kvm, tbl, io_entry + i);
+ if (ret != H_SUCCESS)
+ break;
+ }
+
+ iommu_tce_kill(tbl, io_entry, subpages);
+
+ return ret;
+}
+
+static long kvmppc_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
+ unsigned long entry, unsigned long ua,
+ enum dma_data_direction dir)
+{
+ long ret;
+ unsigned long hpa;
+ __be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
+ struct mm_iommu_table_group_mem_t *mem;
+
+ if (!pua)
+ /* it_userspace allocation might be delayed */
+ return H_TOO_HARD;
+
+ mem = mm_iommu_lookup(kvm->mm, ua, 1ULL << tbl->it_page_shift);
+ if (!mem)
+ /* This only handles v2 IOMMU type, v1 is handled via ioctl() */
+ return H_TOO_HARD;
+
+ if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, tbl->it_page_shift, &hpa)))
+ return H_TOO_HARD;
+
+ if (mm_iommu_mapped_inc(mem))
+ return H_TOO_HARD;
+
+ ret = iommu_tce_xchg_no_kill(kvm->mm, tbl, entry, &hpa, &dir);
+ if (WARN_ON_ONCE(ret)) {
+ mm_iommu_mapped_dec(mem);
+ return H_TOO_HARD;
+ }
+
+ if (dir != DMA_NONE)
+ kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry);
+
+ *pua = cpu_to_be64(ua);
+
+ return 0;
+}
+
+static long kvmppc_tce_iommu_map(struct kvm *kvm,
+ struct kvmppc_spapr_tce_table *stt, struct iommu_table *tbl,
+ unsigned long entry, unsigned long ua,
+ enum dma_data_direction dir)
+{
+ unsigned long i, pgoff, ret = H_SUCCESS;
+ unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift);
+ unsigned long io_entry = entry * subpages;
+
+ for (i = 0, pgoff = 0; i < subpages;
+ ++i, pgoff += IOMMU_PAGE_SIZE(tbl)) {
+
+ ret = kvmppc_tce_iommu_do_map(kvm, tbl,
+ io_entry + i, ua + pgoff, dir);
+ if (ret != H_SUCCESS)
+ break;
+ }
+
+ iommu_tce_kill(tbl, io_entry, subpages);
+
+ return ret;
+}
+
+long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
+ unsigned long ioba, unsigned long tce)
+{
+ struct kvmppc_spapr_tce_table *stt;
+ long ret, idx;
+ struct kvmppc_spapr_tce_iommu_table *stit;
+ unsigned long entry, ua = 0;
+ enum dma_data_direction dir;
+
+ /* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */
+ /* liobn, ioba, tce); */
+
+ stt = kvmppc_find_table(vcpu->kvm, liobn);
+ if (!stt)
+ return H_TOO_HARD;
+
+ ret = kvmppc_ioba_validate(stt, ioba, 1);
+ if (ret != H_SUCCESS)
+ return ret;
+
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+ ret = kvmppc_tce_validate(stt, tce);
+ if (ret != H_SUCCESS)
+ goto unlock_exit;
+
+ dir = iommu_tce_direction(tce);
+
+ if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua)) {
+ ret = H_PARAMETER;
+ goto unlock_exit;
+ }
+
+ entry = ioba >> stt->page_shift;
+
+ list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
+ if (dir == DMA_NONE)
+ ret = kvmppc_tce_iommu_unmap(vcpu->kvm, stt,
+ stit->tbl, entry);
+ else
+ ret = kvmppc_tce_iommu_map(vcpu->kvm, stt, stit->tbl,
+ entry, ua, dir);
+
+
+ if (ret != H_SUCCESS) {
+ kvmppc_clear_tce(vcpu->kvm->mm, stt, stit->tbl, entry);
+ goto unlock_exit;
+ }
+ }
+
+ kvmppc_tce_put(stt, entry, tce);
+
+unlock_exit:
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(kvmppc_h_put_tce);
+
+long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
+ unsigned long liobn, unsigned long ioba,
+ unsigned long tce_list, unsigned long npages)
+{
+ struct kvmppc_spapr_tce_table *stt;
+ long i, ret = H_SUCCESS, idx;
+ unsigned long entry, ua = 0;
+ u64 __user *tces;
+ u64 tce;
+ struct kvmppc_spapr_tce_iommu_table *stit;
+
+ stt = kvmppc_find_table(vcpu->kvm, liobn);
+ if (!stt)
+ return H_TOO_HARD;
+
+ entry = ioba >> stt->page_shift;
+ /*
+ * SPAPR spec says that the maximum size of the list is 512 TCEs
+ * so the whole table fits in 4K page
+ */
+ if (npages > 512)
+ return H_PARAMETER;
+
+ if (tce_list & (SZ_4K - 1))
+ return H_PARAMETER;
+
+ ret = kvmppc_ioba_validate(stt, ioba, npages);
+ if (ret != H_SUCCESS)
+ return ret;
+
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua)) {
+ ret = H_TOO_HARD;
+ goto unlock_exit;
+ }
+ tces = (u64 __user *) ua;
+
+ for (i = 0; i < npages; ++i) {
+ if (get_user(tce, tces + i)) {
+ ret = H_TOO_HARD;
+ goto unlock_exit;
+ }
+ tce = be64_to_cpu(tce);
+
+ ret = kvmppc_tce_validate(stt, tce);
+ if (ret != H_SUCCESS)
+ goto unlock_exit;
+ }
+
+ for (i = 0; i < npages; ++i) {
+ /*
+ * This looks unsafe, because we validate, then regrab
+ * the TCE from userspace which could have been changed by
+ * another thread.
+ *
+ * But it actually is safe, because the relevant checks will be
+ * re-executed in the following code. If userspace tries to
+ * change this dodgily it will result in a messier failure mode
+ * but won't threaten the host.
+ */
+ if (get_user(tce, tces + i)) {
+ ret = H_TOO_HARD;
+ goto unlock_exit;
+ }
+ tce = be64_to_cpu(tce);
+
+ if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua)) {
+ ret = H_PARAMETER;
+ goto unlock_exit;
+ }
+
+ list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
+ ret = kvmppc_tce_iommu_map(vcpu->kvm, stt,
+ stit->tbl, entry + i, ua,
+ iommu_tce_direction(tce));
+
+ if (ret != H_SUCCESS) {
+ kvmppc_clear_tce(vcpu->kvm->mm, stt, stit->tbl,
+ entry + i);
+ goto unlock_exit;
+ }
+ }
+
+ kvmppc_tce_put(stt, entry + i, tce);
+ }
+
+unlock_exit:
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(kvmppc_h_put_tce_indirect);
+
+long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
+ unsigned long liobn, unsigned long ioba,
+ unsigned long tce_value, unsigned long npages)
+{
+ struct kvmppc_spapr_tce_table *stt;
+ long i, ret;
+ struct kvmppc_spapr_tce_iommu_table *stit;
+
+ stt = kvmppc_find_table(vcpu->kvm, liobn);
+ if (!stt)
+ return H_TOO_HARD;
+
+ ret = kvmppc_ioba_validate(stt, ioba, npages);
+ if (ret != H_SUCCESS)
+ return ret;
+
+ /* Check permission bits only to allow userspace poison TCE for debug */
+ if (tce_value & (TCE_PCI_WRITE | TCE_PCI_READ))
+ return H_PARAMETER;
+
+ list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
+ unsigned long entry = ioba >> stt->page_shift;
+
+ for (i = 0; i < npages; ++i) {
+ ret = kvmppc_tce_iommu_unmap(vcpu->kvm, stt,
+ stit->tbl, entry + i);
+
+ if (ret == H_SUCCESS)
+ continue;
+
+ if (ret == H_TOO_HARD)
+ return ret;
+
+ WARN_ON_ONCE(1);
+ kvmppc_clear_tce(vcpu->kvm->mm, stt, stit->tbl, entry + i);
+ }
+ }
+
+ for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
+ kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value);
+
return ret;
}
+EXPORT_SYMBOL_GPL(kvmppc_h_stuff_tce);
+
+long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
+ unsigned long ioba)
+{
+ struct kvmppc_spapr_tce_table *stt;
+ long ret;
+ unsigned long idx;
+ struct page *page;
+ u64 *tbl;
+
+ stt = kvmppc_find_table(vcpu->kvm, liobn);
+ if (!stt)
+ return H_TOO_HARD;
+
+ ret = kvmppc_ioba_validate(stt, ioba, 1);
+ if (ret != H_SUCCESS)
+ return ret;
+
+ idx = (ioba >> stt->page_shift) - stt->offset;
+ page = stt->pages[idx / TCES_PER_PAGE];
+ if (!page) {
+ kvmppc_set_gpr(vcpu, 4, 0);
+ return H_SUCCESS;
+ }
+ tbl = (u64 *)page_address(page);
+
+ kvmppc_set_gpr(vcpu, 4, tbl[idx % TCES_PER_PAGE]);
+
+ return H_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(kvmppc_h_get_tce);
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
deleted file mode 100644
index 89e96b3e0039..000000000000
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- *
- * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
- * Copyright 2011 David Gibson, IBM Corporation <dwg@au1.ibm.com>
- */
-
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/kvm.h>
-#include <linux/kvm_host.h>
-#include <linux/highmem.h>
-#include <linux/gfp.h>
-#include <linux/slab.h>
-#include <linux/hugetlb.h>
-#include <linux/list.h>
-
-#include <asm/tlbflush.h>
-#include <asm/kvm_ppc.h>
-#include <asm/kvm_book3s.h>
-#include <asm/mmu-hash64.h>
-#include <asm/hvcall.h>
-#include <asm/synch.h>
-#include <asm/ppc-opcode.h>
-#include <asm/kvm_host.h>
-#include <asm/udbg.h>
-
-#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
-
-/* WARNING: This will be called in real-mode on HV KVM and virtual
- * mode on PR KVM
- */
-long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
- unsigned long ioba, unsigned long tce)
-{
- struct kvm *kvm = vcpu->kvm;
- struct kvmppc_spapr_tce_table *stt;
-
- /* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */
- /* liobn, ioba, tce); */
-
- list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
- if (stt->liobn == liobn) {
- unsigned long idx = ioba >> SPAPR_TCE_SHIFT;
- struct page *page;
- u64 *tbl;
-
- /* udbg_printf("H_PUT_TCE: liobn 0x%lx => stt=%p window_size=0x%x\n", */
- /* liobn, stt, stt->window_size); */
- if (ioba >= stt->window_size)
- return H_PARAMETER;
-
- page = stt->pages[idx / TCES_PER_PAGE];
- tbl = (u64 *)page_address(page);
-
- /* FIXME: Need to validate the TCE itself */
- /* udbg_printf("tce @ %p\n", &tbl[idx % TCES_PER_PAGE]); */
- tbl[idx % TCES_PER_PAGE] = tce;
- return H_SUCCESS;
- }
- }
-
- /* Didn't find the liobn, punt it to userspace */
- return H_TOO_HARD;
-}
-EXPORT_SYMBOL_GPL(kvmppc_h_put_tce);
-
-long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
- unsigned long ioba)
-{
- struct kvm *kvm = vcpu->kvm;
- struct kvmppc_spapr_tce_table *stt;
-
- list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
- if (stt->liobn == liobn) {
- unsigned long idx = ioba >> SPAPR_TCE_SHIFT;
- struct page *page;
- u64 *tbl;
-
- if (ioba >= stt->window_size)
- return H_PARAMETER;
-
- page = stt->pages[idx / TCES_PER_PAGE];
- tbl = (u64 *)page_address(page);
-
- vcpu->arch.gpr[4] = tbl[idx % TCES_PER_PAGE];
- return H_SUCCESS;
- }
- }
-
- /* Didn't find the liobn, punt it to userspace */
- return H_TOO_HARD;
-}
-EXPORT_SYMBOL_GPL(kvmppc_h_get_tce);
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 5a2bc4b0dfe5..de126d153328 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -1,16 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright SUSE Linux Products GmbH 2009
*
@@ -23,6 +12,9 @@
#include <asm/reg.h>
#include <asm/switch_to.h>
#include <asm/time.h>
+#include <asm/tm.h>
+#include "book3s.h"
+#include <asm/asm-prototypes.h>
#define OP_19_XOP_RFID 18
#define OP_19_XOP_RFI 50
@@ -33,7 +25,6 @@
#define OP_31_XOP_MTSR 210
#define OP_31_XOP_MTSRIN 242
#define OP_31_XOP_TLBIEL 274
-#define OP_31_XOP_TLBIE 306
/* Opcode is officially reserved, reuse it as sc 1 when sc 1 doesn't trap */
#define OP_31_XOP_FAKE_SC1 308
#define OP_31_XOP_SLBMTE 402
@@ -45,6 +36,13 @@
#define OP_31_XOP_SLBMFEV 851
#define OP_31_XOP_EIOIO 854
#define OP_31_XOP_SLBMFEE 915
+#define OP_31_XOP_SLBFEE 979
+
+#define OP_31_XOP_TBEGIN 654
+#define OP_31_XOP_TABORT 910
+
+#define OP_31_XOP_TRECLAIM 942
+#define OP_31_XOP_TRCHKPT 1006
/* DCBZ is actually 1014, but we patch it to 1010 so we get a trap */
#define OP_31_XOP_DCBZ 1010
@@ -63,10 +61,6 @@
#define SPRN_GQR6 918
#define SPRN_GQR7 919
-/* Book3S_32 defines mfsrin(v) - but that messes up our abstract
- * function pointers, so let's just disable the define. */
-#undef mfsrin
-
enum priv_level {
PRIV_PROBLEM = 0,
PRIV_SUPER = 1,
@@ -86,7 +80,158 @@ static bool spr_allowed(struct kvm_vcpu *vcpu, enum priv_level level)
return true;
}
-int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+static inline void kvmppc_copyto_vcpu_tm(struct kvm_vcpu *vcpu)
+{
+ memcpy(&vcpu->arch.gpr_tm[0], &vcpu->arch.regs.gpr[0],
+ sizeof(vcpu->arch.gpr_tm));
+ memcpy(&vcpu->arch.fp_tm, &vcpu->arch.fp,
+ sizeof(struct thread_fp_state));
+ memcpy(&vcpu->arch.vr_tm, &vcpu->arch.vr,
+ sizeof(struct thread_vr_state));
+ vcpu->arch.ppr_tm = vcpu->arch.ppr;
+ vcpu->arch.dscr_tm = vcpu->arch.dscr;
+ vcpu->arch.amr_tm = vcpu->arch.amr;
+ vcpu->arch.ctr_tm = vcpu->arch.regs.ctr;
+ vcpu->arch.tar_tm = vcpu->arch.tar;
+ vcpu->arch.lr_tm = vcpu->arch.regs.link;
+ vcpu->arch.cr_tm = vcpu->arch.regs.ccr;
+ vcpu->arch.xer_tm = vcpu->arch.regs.xer;
+ vcpu->arch.vrsave_tm = vcpu->arch.vrsave;
+}
+
+static inline void kvmppc_copyfrom_vcpu_tm(struct kvm_vcpu *vcpu)
+{
+ memcpy(&vcpu->arch.regs.gpr[0], &vcpu->arch.gpr_tm[0],
+ sizeof(vcpu->arch.regs.gpr));
+ memcpy(&vcpu->arch.fp, &vcpu->arch.fp_tm,
+ sizeof(struct thread_fp_state));
+ memcpy(&vcpu->arch.vr, &vcpu->arch.vr_tm,
+ sizeof(struct thread_vr_state));
+ vcpu->arch.ppr = vcpu->arch.ppr_tm;
+ vcpu->arch.dscr = vcpu->arch.dscr_tm;
+ vcpu->arch.amr = vcpu->arch.amr_tm;
+ vcpu->arch.regs.ctr = vcpu->arch.ctr_tm;
+ vcpu->arch.tar = vcpu->arch.tar_tm;
+ vcpu->arch.regs.link = vcpu->arch.lr_tm;
+ vcpu->arch.regs.ccr = vcpu->arch.cr_tm;
+ vcpu->arch.regs.xer = vcpu->arch.xer_tm;
+ vcpu->arch.vrsave = vcpu->arch.vrsave_tm;
+}
+
+static void kvmppc_emulate_treclaim(struct kvm_vcpu *vcpu, int ra_val)
+{
+ unsigned long guest_msr = kvmppc_get_msr(vcpu);
+ int fc_val = ra_val ? ra_val : 1;
+ uint64_t texasr;
+
+ /* CR0 = 0 | MSR[TS] | 0 */
+ vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & ~(CR0_MASK << CR0_SHIFT)) |
+ (((guest_msr & MSR_TS_MASK) >> (MSR_TS_S_LG - 1))
+ << CR0_SHIFT);
+
+ preempt_disable();
+ tm_enable();
+ texasr = mfspr(SPRN_TEXASR);
+ kvmppc_save_tm_pr(vcpu);
+ kvmppc_copyfrom_vcpu_tm(vcpu);
+
+ /* failure recording depends on Failure Summary bit */
+ if (!(texasr & TEXASR_FS)) {
+ texasr &= ~TEXASR_FC;
+ texasr |= ((u64)fc_val << TEXASR_FC_LG) | TEXASR_FS;
+
+ texasr &= ~(TEXASR_PR | TEXASR_HV);
+ if (kvmppc_get_msr(vcpu) & MSR_PR)
+ texasr |= TEXASR_PR;
+
+ if (kvmppc_get_msr(vcpu) & MSR_HV)
+ texasr |= TEXASR_HV;
+
+ vcpu->arch.texasr = texasr;
+ vcpu->arch.tfiar = kvmppc_get_pc(vcpu);
+ mtspr(SPRN_TEXASR, texasr);
+ mtspr(SPRN_TFIAR, vcpu->arch.tfiar);
+ }
+ tm_disable();
+ /*
+ * treclaim need quit to non-transactional state.
+ */
+ guest_msr &= ~(MSR_TS_MASK);
+ kvmppc_set_msr(vcpu, guest_msr);
+ preempt_enable();
+
+ if (vcpu->arch.shadow_fscr & FSCR_TAR)
+ mtspr(SPRN_TAR, vcpu->arch.tar);
+}
+
+static void kvmppc_emulate_trchkpt(struct kvm_vcpu *vcpu)
+{
+ unsigned long guest_msr = kvmppc_get_msr(vcpu);
+
+ preempt_disable();
+ /*
+ * need flush FP/VEC/VSX to vcpu save area before
+ * copy.
+ */
+ kvmppc_giveup_ext(vcpu, MSR_VSX);
+ kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
+ kvmppc_copyto_vcpu_tm(vcpu);
+ kvmppc_save_tm_sprs(vcpu);
+
+ /*
+ * as a result of trecheckpoint. set TS to suspended.
+ */
+ guest_msr &= ~(MSR_TS_MASK);
+ guest_msr |= MSR_TS_S;
+ kvmppc_set_msr(vcpu, guest_msr);
+ kvmppc_restore_tm_pr(vcpu);
+ preempt_enable();
+}
+
+/* emulate tabort. at guest privilege state */
+void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val)
+{
+ /* currently we only emulate tabort. but no emulation of other
+ * tabort variants since there is no kernel usage of them at
+ * present.
+ */
+ unsigned long guest_msr = kvmppc_get_msr(vcpu);
+ uint64_t org_texasr;
+
+ preempt_disable();
+ tm_enable();
+ org_texasr = mfspr(SPRN_TEXASR);
+ tm_abort(ra_val);
+
+ /* CR0 = 0 | MSR[TS] | 0 */
+ vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & ~(CR0_MASK << CR0_SHIFT)) |
+ (((guest_msr & MSR_TS_MASK) >> (MSR_TS_S_LG - 1))
+ << CR0_SHIFT);
+
+ vcpu->arch.texasr = mfspr(SPRN_TEXASR);
+ /* failure recording depends on Failure Summary bit,
+ * and tabort will be treated as nops in non-transactional
+ * state.
+ */
+ if (!(org_texasr & TEXASR_FS) &&
+ MSR_TM_ACTIVE(guest_msr)) {
+ vcpu->arch.texasr &= ~(TEXASR_PR | TEXASR_HV);
+ if (guest_msr & MSR_PR)
+ vcpu->arch.texasr |= TEXASR_PR;
+
+ if (guest_msr & MSR_HV)
+ vcpu->arch.texasr |= TEXASR_HV;
+
+ vcpu->arch.tfiar = kvmppc_get_pc(vcpu);
+ }
+ tm_disable();
+ preempt_enable();
+}
+
+#endif
+
+int kvmppc_core_emulate_op_pr(struct kvm_vcpu *vcpu,
unsigned int inst, int *advance)
{
int emulated = EMULATE_DONE;
@@ -116,11 +261,28 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
case 19:
switch (get_xop(inst)) {
case OP_19_XOP_RFID:
- case OP_19_XOP_RFI:
+ case OP_19_XOP_RFI: {
+ unsigned long srr1 = kvmppc_get_srr1(vcpu);
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ unsigned long cur_msr = kvmppc_get_msr(vcpu);
+
+ /*
+ * add rules to fit in ISA specification regarding TM
+ * state transition in TM disable/Suspended state,
+ * and target TM state is TM inactive(00) state. (the
+ * change should be suppressed).
+ */
+ if (((cur_msr & MSR_TM) == 0) &&
+ ((srr1 & MSR_TM) == 0) &&
+ MSR_TM_SUSPENDED(cur_msr) &&
+ !MSR_TM_ACTIVE(srr1))
+ srr1 |= MSR_TS_S;
+#endif
kvmppc_set_pc(vcpu, kvmppc_get_srr0(vcpu));
- kvmppc_set_msr(vcpu, kvmppc_get_srr1(vcpu));
+ kvmppc_set_msr(vcpu, srr1);
*advance = 0;
break;
+ }
default:
emulated = EMULATE_FAIL;
@@ -205,13 +367,13 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
if (kvmppc_h_pr(vcpu, cmd) == EMULATE_DONE)
break;
- run->papr_hcall.nr = cmd;
+ vcpu->run->papr_hcall.nr = cmd;
for (i = 0; i < 9; ++i) {
ulong gpr = kvmppc_get_gpr(vcpu, 4 + i);
- run->papr_hcall.args[i] = gpr;
+ vcpu->run->papr_hcall.args[i] = gpr;
}
- run->exit_reason = KVM_EXIT_PAPR_HCALL;
+ vcpu->run->exit_reason = KVM_EXIT_PAPR_HCALL;
vcpu->arch.hcall_needed = 1;
emulated = EMULATE_EXIT_USER;
break;
@@ -240,6 +402,23 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
vcpu->arch.mmu.slbia(vcpu);
break;
+ case OP_31_XOP_SLBFEE:
+ if (!(inst & 1) || !vcpu->arch.mmu.slbfee) {
+ return EMULATE_FAIL;
+ } else {
+ ulong b, t;
+ ulong cr = kvmppc_get_cr(vcpu) & ~CR0_MASK;
+
+ b = kvmppc_get_gpr(vcpu, rb);
+ if (!vcpu->arch.mmu.slbfee(vcpu, b, &t))
+ cr |= 2 << CR0_SHIFT;
+ kvmppc_set_gpr(vcpu, rt, t);
+ /* copy XER[SO] bit to CR0[SO] */
+ cr |= (vcpu->arch.regs.xer & 0x80000000) >>
+ (31 - CR0_SHIFT);
+ kvmppc_set_cr(vcpu, cr);
+ }
+ break;
case OP_31_XOP_SLBMFEE:
if (!vcpu->arch.mmu.slbmfee) {
emulated = EMULATE_FAIL;
@@ -303,6 +482,140 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
}
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ case OP_31_XOP_TBEGIN:
+ {
+ if (!cpu_has_feature(CPU_FTR_TM))
+ break;
+
+ if (!(kvmppc_get_msr(vcpu) & MSR_TM)) {
+ kvmppc_trigger_fac_interrupt(vcpu, FSCR_TM_LG);
+ emulated = EMULATE_AGAIN;
+ break;
+ }
+
+ if (!(kvmppc_get_msr(vcpu) & MSR_PR)) {
+ preempt_disable();
+ vcpu->arch.regs.ccr = (CR0_TBEGIN_FAILURE |
+ (vcpu->arch.regs.ccr & ~(CR0_MASK << CR0_SHIFT)));
+
+ vcpu->arch.texasr = (TEXASR_FS | TEXASR_EXACT |
+ (((u64)(TM_CAUSE_EMULATE | TM_CAUSE_PERSISTENT))
+ << TEXASR_FC_LG));
+
+ if ((inst >> 21) & 0x1)
+ vcpu->arch.texasr |= TEXASR_ROT;
+
+ if (kvmppc_get_msr(vcpu) & MSR_HV)
+ vcpu->arch.texasr |= TEXASR_HV;
+
+ vcpu->arch.tfhar = kvmppc_get_pc(vcpu) + 4;
+ vcpu->arch.tfiar = kvmppc_get_pc(vcpu);
+
+ kvmppc_restore_tm_sprs(vcpu);
+ preempt_enable();
+ } else
+ emulated = EMULATE_FAIL;
+ break;
+ }
+ case OP_31_XOP_TABORT:
+ {
+ ulong guest_msr = kvmppc_get_msr(vcpu);
+ unsigned long ra_val = 0;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ break;
+
+ if (!(kvmppc_get_msr(vcpu) & MSR_TM)) {
+ kvmppc_trigger_fac_interrupt(vcpu, FSCR_TM_LG);
+ emulated = EMULATE_AGAIN;
+ break;
+ }
+
+ /* only emulate for privilege guest, since problem state
+ * guest can run with TM enabled and we don't expect to
+ * trap at here for that case.
+ */
+ WARN_ON(guest_msr & MSR_PR);
+
+ if (ra)
+ ra_val = kvmppc_get_gpr(vcpu, ra);
+
+ kvmppc_emulate_tabort(vcpu, ra_val);
+ break;
+ }
+ case OP_31_XOP_TRECLAIM:
+ {
+ ulong guest_msr = kvmppc_get_msr(vcpu);
+ unsigned long ra_val = 0;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ break;
+
+ if (!(kvmppc_get_msr(vcpu) & MSR_TM)) {
+ kvmppc_trigger_fac_interrupt(vcpu, FSCR_TM_LG);
+ emulated = EMULATE_AGAIN;
+ break;
+ }
+
+ /* generate interrupts based on priorities */
+ if (guest_msr & MSR_PR) {
+ /* Privileged Instruction type Program Interrupt */
+ kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV);
+ emulated = EMULATE_AGAIN;
+ break;
+ }
+
+ if (!MSR_TM_ACTIVE(guest_msr)) {
+ /* TM bad thing interrupt */
+ kvmppc_core_queue_program(vcpu, SRR1_PROGTM);
+ emulated = EMULATE_AGAIN;
+ break;
+ }
+
+ if (ra)
+ ra_val = kvmppc_get_gpr(vcpu, ra);
+ kvmppc_emulate_treclaim(vcpu, ra_val);
+ break;
+ }
+ case OP_31_XOP_TRCHKPT:
+ {
+ ulong guest_msr = kvmppc_get_msr(vcpu);
+ unsigned long texasr;
+
+ if (!cpu_has_feature(CPU_FTR_TM))
+ break;
+
+ if (!(kvmppc_get_msr(vcpu) & MSR_TM)) {
+ kvmppc_trigger_fac_interrupt(vcpu, FSCR_TM_LG);
+ emulated = EMULATE_AGAIN;
+ break;
+ }
+
+ /* generate interrupt based on priorities */
+ if (guest_msr & MSR_PR) {
+ /* Privileged Instruction type Program Intr */
+ kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV);
+ emulated = EMULATE_AGAIN;
+ break;
+ }
+
+ tm_enable();
+ texasr = mfspr(SPRN_TEXASR);
+ tm_disable();
+
+ if (MSR_TM_ACTIVE(guest_msr) ||
+ !(texasr & (TEXASR_FS))) {
+ /* TM bad thing interrupt */
+ kvmppc_core_queue_program(vcpu, SRR1_PROGTM);
+ emulated = EMULATE_AGAIN;
+ break;
+ }
+
+ kvmppc_emulate_trchkpt(vcpu);
+ break;
+ }
+#endif
default:
emulated = EMULATE_FAIL;
}
@@ -312,7 +625,7 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
}
if (emulated == EMULATE_FAIL)
- emulated = kvmppc_emulate_paired_single(run, vcpu);
+ emulated = kvmppc_emulate_paired_single(vcpu);
return emulated;
}
@@ -401,7 +714,7 @@ int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
case SPRN_HID1:
to_book3s(vcpu)->hid[1] = spr_val;
break;
- case SPRN_HID2:
+ case SPRN_HID2_750FX:
to_book3s(vcpu)->hid[2] = spr_val;
break;
case SPRN_HID2_GEKKO:
@@ -464,13 +777,38 @@ int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
break;
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
case SPRN_TFHAR:
- vcpu->arch.tfhar = spr_val;
- break;
case SPRN_TEXASR:
- vcpu->arch.texasr = spr_val;
- break;
case SPRN_TFIAR:
- vcpu->arch.tfiar = spr_val;
+ if (!cpu_has_feature(CPU_FTR_TM))
+ break;
+
+ if (!(kvmppc_get_msr(vcpu) & MSR_TM)) {
+ kvmppc_trigger_fac_interrupt(vcpu, FSCR_TM_LG);
+ emulated = EMULATE_AGAIN;
+ break;
+ }
+
+ if (MSR_TM_ACTIVE(kvmppc_get_msr(vcpu)) &&
+ !((MSR_TM_SUSPENDED(kvmppc_get_msr(vcpu))) &&
+ (sprn == SPRN_TFHAR))) {
+ /* it is illegal to mtspr() TM regs in
+ * other than non-transactional state, with
+ * the exception of TFHAR in suspend state.
+ */
+ kvmppc_core_queue_program(vcpu, SRR1_PROGTM);
+ emulated = EMULATE_AGAIN;
+ break;
+ }
+
+ tm_enable();
+ if (sprn == SPRN_TFHAR)
+ mtspr(SPRN_TFHAR, spr_val);
+ else if (sprn == SPRN_TEXASR)
+ mtspr(SPRN_TEXASR, spr_val);
+ else
+ mtspr(SPRN_TFIAR, spr_val);
+ tm_disable();
+
break;
#endif
#endif
@@ -497,14 +835,26 @@ int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
case SPRN_MMCR0:
case SPRN_MMCR1:
case SPRN_MMCR2:
+ case SPRN_UMMCR2:
+ case SPRN_UAMOR:
+ case SPRN_IAMR:
+ case SPRN_AMR:
#endif
break;
unprivileged:
default:
- printk(KERN_INFO "KVM: invalid SPR write: %d\n", sprn);
-#ifndef DEBUG_SPR
- emulated = EMULATE_FAIL;
-#endif
+ pr_info_ratelimited("KVM: invalid SPR write: %d\n", sprn);
+ if (sprn & 0x10) {
+ if (kvmppc_get_msr(vcpu) & MSR_PR) {
+ kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV);
+ emulated = EMULATE_AGAIN;
+ }
+ } else {
+ if ((kvmppc_get_msr(vcpu) & MSR_PR) || sprn == 0) {
+ kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+ emulated = EMULATE_AGAIN;
+ }
+ }
break;
}
@@ -550,7 +900,7 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val
case SPRN_HID1:
*spr_val = to_book3s(vcpu)->hid[1];
break;
- case SPRN_HID2:
+ case SPRN_HID2_750FX:
case SPRN_HID2_GEKKO:
*spr_val = to_book3s(vcpu)->hid[2];
break;
@@ -578,7 +928,7 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val
*spr_val = vcpu->arch.spurr;
break;
case SPRN_VTB:
- *spr_val = vcpu->arch.vtb;
+ *spr_val = to_book3s(vcpu)->vtb;
break;
case SPRN_IC:
*spr_val = vcpu->arch.ic;
@@ -608,13 +958,25 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val
break;
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
case SPRN_TFHAR:
- *spr_val = vcpu->arch.tfhar;
- break;
case SPRN_TEXASR:
- *spr_val = vcpu->arch.texasr;
- break;
case SPRN_TFIAR:
- *spr_val = vcpu->arch.tfiar;
+ if (!cpu_has_feature(CPU_FTR_TM))
+ break;
+
+ if (!(kvmppc_get_msr(vcpu) & MSR_TM)) {
+ kvmppc_trigger_fac_interrupt(vcpu, FSCR_TM_LG);
+ emulated = EMULATE_AGAIN;
+ break;
+ }
+
+ tm_enable();
+ if (sprn == SPRN_TFHAR)
+ *spr_val = mfspr(SPRN_TFHAR);
+ else if (sprn == SPRN_TEXASR)
+ *spr_val = mfspr(SPRN_TEXASR);
+ else if (sprn == SPRN_TFIAR)
+ *spr_val = mfspr(SPRN_TFIAR);
+ tm_disable();
break;
#endif
#endif
@@ -639,16 +1001,30 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val
case SPRN_MMCR0:
case SPRN_MMCR1:
case SPRN_MMCR2:
+ case SPRN_UMMCR2:
case SPRN_TIR:
+ case SPRN_UAMOR:
+ case SPRN_IAMR:
+ case SPRN_AMR:
#endif
*spr_val = 0;
break;
default:
unprivileged:
- printk(KERN_INFO "KVM: invalid SPR read: %d\n", sprn);
-#ifndef DEBUG_SPR
- emulated = EMULATE_FAIL;
-#endif
+ pr_info_ratelimited("KVM: invalid SPR read: %d\n", sprn);
+ if (sprn & 0x10) {
+ if (kvmppc_get_msr(vcpu) & MSR_PR) {
+ kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV);
+ emulated = EMULATE_AGAIN;
+ }
+ } else {
+ if ((kvmppc_get_msr(vcpu) & MSR_PR) || sprn == 0 ||
+ sprn == 4 || sprn == 5 || sprn == 6) {
+ kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+ emulated = EMULATE_AGAIN;
+ }
+ }
+
break;
}
diff --git a/arch/powerpc/kvm/book3s_exports.c b/arch/powerpc/kvm/book3s_exports.c
index 0d013fbc2e13..f08565885ddf 100644
--- a/arch/powerpc/kvm/book3s_exports.c
+++ b/arch/powerpc/kvm/book3s_exports.c
@@ -1,16 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright SUSE Linux Products GmbH 2009
*
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 27cced9c7249..7667563fb9ff 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
* Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved.
@@ -12,51 +13,81 @@
*
* This file is derived from arch/powerpc/kvm/book3s.c,
* by Alexander Graf <agraf@suse.de>.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
*/
#include <linux/kvm_host.h>
+#include <linux/kernel.h>
#include <linux/err.h>
#include <linux/slab.h>
#include <linux/preempt.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
+#include <linux/sched/stat.h>
#include <linux/delay.h>
#include <linux/export.h>
#include <linux/fs.h>
#include <linux/anon_inodes.h>
+#include <linux/cpu.h>
#include <linux/cpumask.h>
#include <linux/spinlock.h>
#include <linux/page-flags.h>
#include <linux/srcu.h>
#include <linux/miscdevice.h>
+#include <linux/debugfs.h>
+#include <linux/gfp.h>
+#include <linux/vmalloc.h>
+#include <linux/highmem.h>
+#include <linux/hugetlb.h>
+#include <linux/kvm_irqfd.h>
+#include <linux/irqbypass.h>
+#include <linux/module.h>
+#include <linux/compiler.h>
+#include <linux/of.h>
+#include <linux/irqdomain.h>
+#include <linux/smp.h>
+#include <asm/ftrace.h>
#include <asm/reg.h>
+#include <asm/ppc-opcode.h>
+#include <asm/asm-prototypes.h>
+#include <asm/archrandom.h>
+#include <asm/debug.h>
+#include <asm/disassemble.h>
#include <asm/cputable.h>
-#include <asm/cache.h>
#include <asm/cacheflush.h>
-#include <asm/tlbflush.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
+#include <asm/interrupt.h>
#include <asm/io.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
#include <asm/mmu_context.h>
#include <asm/lppaca.h>
+#include <asm/pmc.h>
#include <asm/processor.h>
#include <asm/cputhreads.h>
#include <asm/page.h>
#include <asm/hvcall.h>
#include <asm/switch_to.h>
#include <asm/smp.h>
-#include <linux/gfp.h>
-#include <linux/vmalloc.h>
-#include <linux/highmem.h>
-#include <linux/hugetlb.h>
-#include <linux/module.h>
+#include <asm/dbell.h>
+#include <asm/hmi.h>
+#include <asm/pnv-pci.h>
+#include <asm/mmu.h>
+#include <asm/opal.h>
+#include <asm/xics.h>
+#include <asm/xive.h>
+#include <asm/hw_breakpoint.h>
+#include <asm/kvm_book3s_uvmem.h>
+#include <asm/ultravisor.h>
+#include <asm/dtl.h>
+#include <asm/plpar_wrappers.h>
+
+#include <trace/events/ipi.h>
#include "book3s.h"
+#include "book3s_hv.h"
+
+#define CREATE_TRACE_POINTS
+#include "trace_hv.h"
/* #define EXIT_DEBUG */
/* #define EXIT_DEBUG_SIMPLE */
@@ -64,52 +95,164 @@
/* Used to indicate that a guest page fault needs to be handled */
#define RESUME_PAGE_FAULT (RESUME_GUEST | RESUME_FLAG_ARCH1)
+/* Used to indicate that a guest passthrough interrupt needs to be handled */
+#define RESUME_PASSTHROUGH (RESUME_GUEST | RESUME_FLAG_ARCH2)
/* Used as a "null" value for timebase values */
#define TB_NIL (~(u64)0)
static DECLARE_BITMAP(default_enabled_hcalls, MAX_HCALL_OPCODE/4 + 1);
-#if defined(CONFIG_PPC_64K_PAGES)
-#define MPP_BUFFER_ORDER 0
-#elif defined(CONFIG_PPC_4K_PAGES)
-#define MPP_BUFFER_ORDER 3
+static int dynamic_mt_modes = 6;
+module_param(dynamic_mt_modes, int, 0644);
+MODULE_PARM_DESC(dynamic_mt_modes, "Set of allowed dynamic micro-threading modes: 0 (= none), 2, 4, or 6 (= 2 or 4)");
+static int target_smt_mode;
+module_param(target_smt_mode, int, 0644);
+MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)");
+
+static bool one_vm_per_core;
+module_param(one_vm_per_core, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(one_vm_per_core, "Only run vCPUs from the same VM on a core (requires POWER8 or older)");
+
+#ifdef CONFIG_KVM_XICS
+static const struct kernel_param_ops module_param_ops = {
+ .set = param_set_int,
+ .get = param_get_int,
+};
+
+module_param_cb(kvm_irq_bypass, &module_param_ops, &kvm_irq_bypass, 0644);
+MODULE_PARM_DESC(kvm_irq_bypass, "Bypass passthrough interrupt optimization");
+
+module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect, 0644);
+MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
#endif
+/* If set, guests are allowed to create and control nested guests */
+static bool nested = true;
+module_param(nested, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(nested, "Enable nested virtualization (only on POWER9)");
-static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
-static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
+/*
+ * RWMR values for POWER8. These control the rate at which PURR
+ * and SPURR count and should be set according to the number of
+ * online threads in the vcore being run.
+ */
+#define RWMR_RPA_P8_1THREAD 0x164520C62609AECAUL
+#define RWMR_RPA_P8_2THREAD 0x7FFF2908450D8DA9UL
+#define RWMR_RPA_P8_3THREAD 0x164520C62609AECAUL
+#define RWMR_RPA_P8_4THREAD 0x199A421245058DA9UL
+#define RWMR_RPA_P8_5THREAD 0x164520C62609AECAUL
+#define RWMR_RPA_P8_6THREAD 0x164520C62609AECAUL
+#define RWMR_RPA_P8_7THREAD 0x164520C62609AECAUL
+#define RWMR_RPA_P8_8THREAD 0x164520C62609AECAUL
+
+static unsigned long p8_rwmr_values[MAX_SMT_THREADS + 1] = {
+ RWMR_RPA_P8_1THREAD,
+ RWMR_RPA_P8_1THREAD,
+ RWMR_RPA_P8_2THREAD,
+ RWMR_RPA_P8_3THREAD,
+ RWMR_RPA_P8_4THREAD,
+ RWMR_RPA_P8_5THREAD,
+ RWMR_RPA_P8_6THREAD,
+ RWMR_RPA_P8_7THREAD,
+ RWMR_RPA_P8_8THREAD,
+};
+
+static inline struct kvm_vcpu *next_runnable_thread(struct kvmppc_vcore *vc,
+ int *ip)
{
- int me;
- int cpu = vcpu->cpu;
- wait_queue_head_t *wqp;
+ int i = *ip;
+ struct kvm_vcpu *vcpu;
- wqp = kvm_arch_vcpu_wq(vcpu);
- if (waitqueue_active(wqp)) {
- wake_up_interruptible(wqp);
- ++vcpu->stat.halt_wakeup;
+ while (++i < MAX_SMT_THREADS) {
+ vcpu = READ_ONCE(vc->runnable_threads[i]);
+ if (vcpu) {
+ *ip = i;
+ return vcpu;
+ }
}
+ return NULL;
+}
- me = get_cpu();
+/* Used to traverse the list of runnable threads for a given vcore */
+#define for_each_runnable_thread(i, vcpu, vc) \
+ for (i = -1; (vcpu = next_runnable_thread(vc, &i)); )
- /* CPU points to the first thread of the core */
- if (cpu != me && cpu >= 0 && cpu < nr_cpu_ids) {
-#ifdef CONFIG_PPC_ICP_NATIVE
- int real_cpu = cpu + vcpu->arch.ptid;
- if (paca[real_cpu].kvm_hstate.xics_phys)
- xics_wake_cpu(real_cpu);
- else
-#endif
- if (cpu_online(cpu))
- smp_send_reschedule(cpu);
+static bool kvmppc_ipi_thread(int cpu)
+{
+ unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
+
+ /* If we're a nested hypervisor, fall back to ordinary IPIs for now */
+ if (kvmhv_on_pseries())
+ return false;
+
+ /* On POWER9 we can use msgsnd to IPI any cpu */
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ msg |= get_hard_smp_processor_id(cpu);
+ smp_mb();
+ __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
+ return true;
+ }
+
+ /* On POWER8 for IPIs to threads in the same core, use msgsnd */
+ if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ preempt_disable();
+ if (cpu_first_thread_sibling(cpu) ==
+ cpu_first_thread_sibling(smp_processor_id())) {
+ msg |= cpu_thread_in_core(cpu);
+ smp_mb();
+ __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
+ preempt_enable();
+ return true;
+ }
+ preempt_enable();
}
- put_cpu();
+
+#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
+ if (cpu >= 0 && cpu < nr_cpu_ids) {
+ if (paca_ptrs[cpu]->kvm_hstate.xics_phys) {
+ xics_wake_cpu(cpu);
+ return true;
+ }
+ opal_int_set_mfrr(get_hard_smp_processor_id(cpu), IPI_PRIORITY);
+ return true;
+ }
+#endif
+
+ return false;
+}
+
+static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
+{
+ int cpu;
+ struct rcuwait *waitp;
+
+ /*
+ * rcuwait_wake_up contains smp_mb() which orders prior stores that
+ * create pending work vs below loads of cpu fields. The other side
+ * is the barrier in vcpu run that orders setting the cpu fields vs
+ * testing for pending work.
+ */
+
+ waitp = kvm_arch_vcpu_get_wait(vcpu);
+ if (rcuwait_wake_up(waitp))
+ ++vcpu->stat.generic.halt_wakeup;
+
+ cpu = READ_ONCE(vcpu->arch.thread_cpu);
+ if (cpu >= 0 && kvmppc_ipi_thread(cpu))
+ return;
+
+ /* CPU points to the first thread of the core */
+ cpu = vcpu->cpu;
+ if (cpu >= 0 && cpu < nr_cpu_ids && cpu_online(cpu))
+ smp_send_reschedule(cpu);
}
/*
* We use the vcpu_load/put functions to measure stolen time.
+ *
* Stolen time is counted as time when either the vcpu is able to
* run as part of a virtual core, but the task running the vcore
* is preempted or sleeping, or when the vcpu needs something done
@@ -135,27 +278,73 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
* stolen.
*
* Updates to busy_stolen are protected by arch.tbacct_lock;
- * updates to vc->stolen_tb are protected by the arch.tbacct_lock
- * of the vcpu that has taken responsibility for running the vcore
- * (i.e. vc->runner). The stolen times are measured in units of
- * timebase ticks. (Note that the != TB_NIL checks below are
- * purely defensive; they should never fail.)
+ * updates to vc->stolen_tb are protected by the vcore->stoltb_lock
+ * lock. The stolen times are measured in units of timebase ticks.
+ * (Note that the != TB_NIL checks below are purely defensive;
+ * they should never fail.)
+ *
+ * The POWER9 path is simpler, one vcpu per virtual core so the
+ * former case does not exist. If a vcpu is preempted when it is
+ * BUSY_IN_HOST and not ceded or otherwise blocked, then accumulate
+ * the stolen cycles in busy_stolen. RUNNING is not a preemptible
+ * state in the P9 path.
*/
+static void kvmppc_core_start_stolen(struct kvmppc_vcore *vc, u64 tb)
+{
+ unsigned long flags;
+
+ WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300));
+
+ spin_lock_irqsave(&vc->stoltb_lock, flags);
+ vc->preempt_tb = tb;
+ spin_unlock_irqrestore(&vc->stoltb_lock, flags);
+}
+
+static void kvmppc_core_end_stolen(struct kvmppc_vcore *vc, u64 tb)
+{
+ unsigned long flags;
+
+ WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300));
+
+ spin_lock_irqsave(&vc->stoltb_lock, flags);
+ if (vc->preempt_tb != TB_NIL) {
+ vc->stolen_tb += tb - vc->preempt_tb;
+ vc->preempt_tb = TB_NIL;
+ }
+ spin_unlock_irqrestore(&vc->stoltb_lock, flags);
+}
+
static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
{
struct kvmppc_vcore *vc = vcpu->arch.vcore;
unsigned long flags;
+ u64 now;
- spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
- if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE &&
- vc->preempt_tb != TB_NIL) {
- vc->stolen_tb += mftb() - vc->preempt_tb;
- vc->preempt_tb = TB_NIL;
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ if (vcpu->arch.busy_preempt != TB_NIL) {
+ WARN_ON_ONCE(vcpu->arch.state != KVMPPC_VCPU_BUSY_IN_HOST);
+ vc->stolen_tb += mftb() - vcpu->arch.busy_preempt;
+ vcpu->arch.busy_preempt = TB_NIL;
+ }
+ return;
}
+
+ now = mftb();
+
+ /*
+ * We can test vc->runner without taking the vcore lock,
+ * because only this task ever sets vc->runner to this
+ * vcpu, and once it is set to this vcpu, only this task
+ * ever sets it to NULL.
+ */
+ if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING)
+ kvmppc_core_end_stolen(vc, now);
+
+ spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST &&
vcpu->arch.busy_preempt != TB_NIL) {
- vcpu->arch.busy_stolen += mftb() - vcpu->arch.busy_preempt;
+ vcpu->arch.busy_stolen += now - vcpu->arch.busy_preempt;
vcpu->arch.busy_preempt = TB_NIL;
}
spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
@@ -165,90 +354,156 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcore *vc = vcpu->arch.vcore;
unsigned long flags;
+ u64 now;
+
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ /*
+ * In the P9 path, RUNNABLE is not preemptible
+ * (nor takes host interrupts)
+ */
+ WARN_ON_ONCE(vcpu->arch.state == KVMPPC_VCPU_RUNNABLE);
+ /*
+ * Account stolen time when preempted while the vcpu task is
+ * running in the kernel (but not in qemu, which is INACTIVE).
+ */
+ if (task_is_running(current) &&
+ vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST)
+ vcpu->arch.busy_preempt = mftb();
+ return;
+ }
+
+ now = mftb();
+
+ if (vc->runner == vcpu && vc->vcore_state >= VCORE_SLEEPING)
+ kvmppc_core_start_stolen(vc, now);
spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
- if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE)
- vc->preempt_tb = mftb();
if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST)
- vcpu->arch.busy_preempt = mftb();
+ vcpu->arch.busy_preempt = now;
spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
}
-static void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr)
+static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr)
{
- vcpu->arch.shregs.msr = msr;
- kvmppc_end_cede(vcpu);
+ vcpu->arch.pvr = pvr;
}
-void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr)
+/* Dummy value used in computing PCR value below */
+#define PCR_ARCH_31 (PCR_ARCH_300 << 1)
+
+static inline unsigned long map_pcr_to_cap(unsigned long pcr)
{
- vcpu->arch.pvr = pvr;
+ unsigned long cap = 0;
+
+ switch (pcr) {
+ case PCR_ARCH_300:
+ cap = H_GUEST_CAP_POWER9;
+ break;
+ case PCR_ARCH_31:
+ if (cpu_has_feature(CPU_FTR_P11_PVR))
+ cap = H_GUEST_CAP_POWER11;
+ else
+ cap = H_GUEST_CAP_POWER10;
+ break;
+ default:
+ break;
+ }
+
+ return cap;
}
-int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
+static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
{
- unsigned long pcr = 0;
+ unsigned long host_pcr_bit = 0, guest_pcr_bit = 0, cap = 0;
struct kvmppc_vcore *vc = vcpu->arch.vcore;
+ /* We can (emulate) our own architecture version and anything older */
+ if (cpu_has_feature(CPU_FTR_P11_PVR) || cpu_has_feature(CPU_FTR_ARCH_31))
+ host_pcr_bit = PCR_ARCH_31;
+ else if (cpu_has_feature(CPU_FTR_ARCH_300))
+ host_pcr_bit = PCR_ARCH_300;
+ else if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ host_pcr_bit = PCR_ARCH_207;
+ else if (cpu_has_feature(CPU_FTR_ARCH_206))
+ host_pcr_bit = PCR_ARCH_206;
+ else
+ host_pcr_bit = PCR_ARCH_205;
+
+ /* Determine lowest PCR bit needed to run guest in given PVR level */
+ guest_pcr_bit = host_pcr_bit;
if (arch_compat) {
- if (!cpu_has_feature(CPU_FTR_ARCH_206))
- return -EINVAL; /* 970 has no compat mode support */
-
switch (arch_compat) {
case PVR_ARCH_205:
- /*
- * If an arch bit is set in PCR, all the defined
- * higher-order arch bits also have to be set.
- */
- pcr = PCR_ARCH_206 | PCR_ARCH_205;
+ guest_pcr_bit = PCR_ARCH_205;
break;
case PVR_ARCH_206:
case PVR_ARCH_206p:
- pcr = PCR_ARCH_206;
+ guest_pcr_bit = PCR_ARCH_206;
break;
case PVR_ARCH_207:
+ guest_pcr_bit = PCR_ARCH_207;
+ break;
+ case PVR_ARCH_300:
+ guest_pcr_bit = PCR_ARCH_300;
+ break;
+ case PVR_ARCH_31:
+ case PVR_ARCH_31_P11:
+ guest_pcr_bit = PCR_ARCH_31;
break;
default:
return -EINVAL;
}
+ }
- if (!cpu_has_feature(CPU_FTR_ARCH_207S)) {
- /* POWER7 can't emulate POWER8 */
- if (!(pcr & PCR_ARCH_206))
- return -EINVAL;
- pcr &= ~PCR_ARCH_206;
- }
+ /* Check requested PCR bits don't exceed our capabilities */
+ if (guest_pcr_bit > host_pcr_bit)
+ return -EINVAL;
+
+ if (kvmhv_on_pseries() && kvmhv_is_nestedv2()) {
+ /*
+ * 'arch_compat == 0' would mean the guest should default to
+ * L1's compatibility. In this case, the guest would pick
+ * host's PCR and evaluate the corresponding capabilities.
+ */
+ cap = map_pcr_to_cap(guest_pcr_bit);
+ if (!(cap & nested_capabilities))
+ return -EINVAL;
}
spin_lock(&vc->lock);
vc->arch_compat = arch_compat;
- vc->pcr = pcr;
+ kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_LOGICAL_PVR);
+ /*
+ * Set all PCR bits for which guest_pcr_bit <= bit < host_pcr_bit
+ * Also set all reserved PCR bits
+ */
+ vc->pcr = (host_pcr_bit - guest_pcr_bit) | PCR_MASK;
spin_unlock(&vc->lock);
return 0;
}
-void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
+static void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
{
int r;
pr_err("vcpu %p (%d):\n", vcpu, vcpu->vcpu_id);
pr_err("pc = %.16lx msr = %.16llx trap = %x\n",
- vcpu->arch.pc, vcpu->arch.shregs.msr, vcpu->arch.trap);
+ vcpu->arch.regs.nip, vcpu->arch.shregs.msr, vcpu->arch.trap);
for (r = 0; r < 16; ++r)
pr_err("r%2d = %.16lx r%d = %.16lx\n",
r, kvmppc_get_gpr(vcpu, r),
r+16, kvmppc_get_gpr(vcpu, r+16));
pr_err("ctr = %.16lx lr = %.16lx\n",
- vcpu->arch.ctr, vcpu->arch.lr);
+ vcpu->arch.regs.ctr, vcpu->arch.regs.link);
pr_err("srr0 = %.16llx srr1 = %.16llx\n",
vcpu->arch.shregs.srr0, vcpu->arch.shregs.srr1);
pr_err("sprg0 = %.16llx sprg1 = %.16llx\n",
vcpu->arch.shregs.sprg0, vcpu->arch.shregs.sprg1);
pr_err("sprg2 = %.16llx sprg3 = %.16llx\n",
vcpu->arch.shregs.sprg2, vcpu->arch.shregs.sprg3);
- pr_err("cr = %.8x xer = %.16lx dsisr = %.8x\n",
- vcpu->arch.cr, vcpu->arch.xer, vcpu->arch.shregs.dsisr);
+ pr_err("cr = %.8lx xer = %.16lx dsisr = %.8x\n",
+ vcpu->arch.regs.ccr, vcpu->arch.regs.xer, vcpu->arch.shregs.dsisr);
pr_err("dar = %.16llx\n", vcpu->arch.shregs.dar);
pr_err("fault dar = %.16lx dsisr = %.8x\n",
vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
@@ -256,25 +511,14 @@ void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
for (r = 0; r < vcpu->arch.slb_max; ++r)
pr_err(" ESID = %.16llx VSID = %.16llx\n",
vcpu->arch.slb[r].orige, vcpu->arch.slb[r].origv);
- pr_err("lpcr = %.16lx sdr1 = %.16lx last_inst = %.8x\n",
+ pr_err("lpcr = %.16lx sdr1 = %.16lx last_inst = %.16lx\n",
vcpu->arch.vcore->lpcr, vcpu->kvm->arch.sdr1,
vcpu->arch.last_inst);
}
-struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id)
+static struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id)
{
- int r;
- struct kvm_vcpu *v, *ret = NULL;
-
- mutex_lock(&kvm->lock);
- kvm_for_each_vcpu(r, v, kvm) {
- if (v->vcpu_id == id) {
- ret = v;
- break;
- }
- }
- mutex_unlock(&kvm->lock);
- return ret;
+ return kvm_get_vcpu_by_id(kvm, id);
}
static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa)
@@ -362,6 +606,13 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
switch (subfunc) {
case H_VPA_REG_VPA: /* register VPA */
+ /*
+ * The size of our lppaca is 1kB because of the way we align
+ * it for the guest to avoid crossing a 4kB boundary. We only
+ * use 640 bytes of the structure though, so we should accept
+ * clients that set a size of 640.
+ */
+ BUILD_BUG_ON(sizeof(struct lppaca) != 640);
if (len < sizeof(struct lppaca))
break;
vpap = &tvcpu->arch.vpa;
@@ -425,7 +676,8 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
return err;
}
-static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap)
+static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap,
+ struct kvmppc_vpa *old_vpap)
{
struct kvm *kvm = vcpu->kvm;
void *va;
@@ -465,9 +717,8 @@ static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap)
kvmppc_unpin_guest_page(kvm, va, gpa, false);
va = NULL;
}
- if (vpap->pinned_addr)
- kvmppc_unpin_guest_page(kvm, vpap->pinned_addr, vpap->gpa,
- vpap->dirty);
+ *old_vpap = *vpap;
+
vpap->gpa = gpa;
vpap->pinned_addr = va;
vpap->dirty = false;
@@ -477,6 +728,9 @@ static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap)
static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvmppc_vpa old_vpa = { 0 };
+
if (!(vcpu->arch.vpa.update_pending ||
vcpu->arch.slb_shadow.update_pending ||
vcpu->arch.dtl.update_pending))
@@ -484,17 +738,34 @@ static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
spin_lock(&vcpu->arch.vpa_update_lock);
if (vcpu->arch.vpa.update_pending) {
- kvmppc_update_vpa(vcpu, &vcpu->arch.vpa);
- if (vcpu->arch.vpa.pinned_addr)
+ kvmppc_update_vpa(vcpu, &vcpu->arch.vpa, &old_vpa);
+ if (old_vpa.pinned_addr) {
+ if (kvmhv_is_nestedv2())
+ kvmhv_nestedv2_set_vpa(vcpu, ~0ull);
+ kvmppc_unpin_guest_page(kvm, old_vpa.pinned_addr, old_vpa.gpa,
+ old_vpa.dirty);
+ }
+ if (vcpu->arch.vpa.pinned_addr) {
init_vpa(vcpu, vcpu->arch.vpa.pinned_addr);
+ if (kvmhv_is_nestedv2())
+ kvmhv_nestedv2_set_vpa(vcpu, __pa(vcpu->arch.vpa.pinned_addr));
+ }
}
if (vcpu->arch.dtl.update_pending) {
- kvmppc_update_vpa(vcpu, &vcpu->arch.dtl);
+ kvmppc_update_vpa(vcpu, &vcpu->arch.dtl, &old_vpa);
+ if (old_vpa.pinned_addr)
+ kvmppc_unpin_guest_page(kvm, old_vpa.pinned_addr, old_vpa.gpa,
+ old_vpa.dirty);
vcpu->arch.dtl_ptr = vcpu->arch.dtl.pinned_addr;
vcpu->arch.dtl_index = 0;
}
- if (vcpu->arch.slb_shadow.update_pending)
- kvmppc_update_vpa(vcpu, &vcpu->arch.slb_shadow);
+ if (vcpu->arch.slb_shadow.update_pending) {
+ kvmppc_update_vpa(vcpu, &vcpu->arch.slb_shadow, &old_vpa);
+ if (old_vpa.pinned_addr)
+ kvmppc_unpin_guest_page(kvm, old_vpa.pinned_addr, old_vpa.gpa,
+ old_vpa.dirty);
+ }
+
spin_unlock(&vcpu->arch.vpa_update_lock);
}
@@ -505,56 +776,42 @@ static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now)
{
u64 p;
+ unsigned long flags;
- /*
- * If we are the task running the vcore, then since we hold
- * the vcore lock, we can't be preempted, so stolen_tb/preempt_tb
- * can't be updated, so we don't need the tbacct_lock.
- * If the vcore is inactive, it can't become active (since we
- * hold the vcore lock), so the vcpu load/put functions won't
- * update stolen_tb/preempt_tb, and we don't need tbacct_lock.
- */
+ WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300));
+
+ spin_lock_irqsave(&vc->stoltb_lock, flags);
+ p = vc->stolen_tb;
if (vc->vcore_state != VCORE_INACTIVE &&
- vc->runner->arch.run_task != current) {
- spin_lock_irq(&vc->runner->arch.tbacct_lock);
- p = vc->stolen_tb;
- if (vc->preempt_tb != TB_NIL)
- p += now - vc->preempt_tb;
- spin_unlock_irq(&vc->runner->arch.tbacct_lock);
- } else {
- p = vc->stolen_tb;
- }
+ vc->preempt_tb != TB_NIL)
+ p += now - vc->preempt_tb;
+ spin_unlock_irqrestore(&vc->stoltb_lock, flags);
return p;
}
-static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
- struct kvmppc_vcore *vc)
+static void __kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
+ struct lppaca *vpa,
+ unsigned int pcpu, u64 now,
+ unsigned long stolen)
{
struct dtl_entry *dt;
- struct lppaca *vpa;
- unsigned long stolen;
- unsigned long core_stolen;
- u64 now;
dt = vcpu->arch.dtl_ptr;
- vpa = vcpu->arch.vpa.pinned_addr;
- now = mftb();
- core_stolen = vcore_stolen_time(vc, now);
- stolen = core_stolen - vcpu->arch.stolen_logged;
- vcpu->arch.stolen_logged = core_stolen;
- spin_lock_irq(&vcpu->arch.tbacct_lock);
- stolen += vcpu->arch.busy_stolen;
- vcpu->arch.busy_stolen = 0;
- spin_unlock_irq(&vcpu->arch.tbacct_lock);
- if (!dt || !vpa)
+
+ if (!dt)
return;
- memset(dt, 0, sizeof(struct dtl_entry));
+
dt->dispatch_reason = 7;
- dt->processor_id = cpu_to_be16(vc->pcpu + vcpu->arch.ptid);
- dt->timebase = cpu_to_be64(now + vc->tb_offset);
+ dt->preempt_reason = 0;
+ dt->processor_id = cpu_to_be16(pcpu + vcpu->arch.ptid);
dt->enqueue_to_dispatch_time = cpu_to_be32(stolen);
+ dt->ready_to_enqueue_time = 0;
+ dt->waiting_to_ready_time = 0;
+ dt->timebase = cpu_to_be64(now);
+ dt->fault_addr = 0;
dt->srr0 = cpu_to_be64(kvmppc_get_pc(vcpu));
dt->srr1 = cpu_to_be64(vcpu->arch.shregs.msr);
+
++dt;
if (dt == vcpu->arch.dtl.pinned_end)
dt = vcpu->arch.dtl.pinned_addr;
@@ -562,14 +819,89 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
/* order writing *dt vs. writing vpa->dtl_idx */
smp_wmb();
vpa->dtl_idx = cpu_to_be64(++vcpu->arch.dtl_index);
- vcpu->arch.dtl.dirty = true;
+
+ /* vcpu->arch.dtl.dirty is set by the caller */
+}
+
+static void kvmppc_update_vpa_dispatch(struct kvm_vcpu *vcpu,
+ struct kvmppc_vcore *vc)
+{
+ struct lppaca *vpa;
+ unsigned long stolen;
+ unsigned long core_stolen;
+ u64 now;
+ unsigned long flags;
+
+ vpa = vcpu->arch.vpa.pinned_addr;
+ if (!vpa)
+ return;
+
+ now = mftb();
+
+ core_stolen = vcore_stolen_time(vc, now);
+ stolen = core_stolen - vcpu->arch.stolen_logged;
+ vcpu->arch.stolen_logged = core_stolen;
+ spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
+ stolen += vcpu->arch.busy_stolen;
+ vcpu->arch.busy_stolen = 0;
+ spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
+
+ vpa->enqueue_dispatch_tb = cpu_to_be64(be64_to_cpu(vpa->enqueue_dispatch_tb) + stolen);
+
+ __kvmppc_create_dtl_entry(vcpu, vpa, vc->pcpu, now + kvmppc_get_tb_offset(vcpu), stolen);
+
+ vcpu->arch.vpa.dirty = true;
+}
+
+static void kvmppc_update_vpa_dispatch_p9(struct kvm_vcpu *vcpu,
+ struct kvmppc_vcore *vc,
+ u64 now)
+{
+ struct lppaca *vpa;
+ unsigned long stolen;
+ unsigned long stolen_delta;
+
+ vpa = vcpu->arch.vpa.pinned_addr;
+ if (!vpa)
+ return;
+
+ stolen = vc->stolen_tb;
+ stolen_delta = stolen - vcpu->arch.stolen_logged;
+ vcpu->arch.stolen_logged = stolen;
+
+ vpa->enqueue_dispatch_tb = cpu_to_be64(stolen);
+
+ __kvmppc_create_dtl_entry(vcpu, vpa, vc->pcpu, now, stolen_delta);
+
+ vcpu->arch.vpa.dirty = true;
+}
+
+/* See if there is a doorbell interrupt pending for a vcpu */
+static bool kvmppc_doorbell_pending(struct kvm_vcpu *vcpu)
+{
+ int thr;
+ struct kvmppc_vcore *vc;
+
+ if (vcpu->arch.doorbell_request)
+ return true;
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ return false;
+ /*
+ * Ensure that the read of vcore->dpdes comes after the read
+ * of vcpu->doorbell_request. This barrier matches the
+ * smp_wmb() in kvmppc_guest_entry_inject().
+ */
+ smp_rmb();
+ vc = vcpu->arch.vcore;
+ thr = vcpu->vcpu_id - vc->first_vcpuid;
+ return !!(vc->dpdes & (1 << thr));
}
static bool kvmppc_power8_compatible(struct kvm_vcpu *vcpu)
{
- if (vcpu->arch.vcore->arch_compat >= PVR_ARCH_207)
+ if (kvmppc_get_arch_compat(vcpu) >= PVR_ARCH_207)
return true;
- if ((!vcpu->arch.vcore->arch_compat) &&
+ if ((!kvmppc_get_arch_compat(vcpu)) &&
cpu_has_feature(CPU_FTR_ARCH_207S))
return true;
return false;
@@ -590,27 +922,232 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
/* Guests can't breakpoint the hypervisor */
if ((value1 & CIABR_PRIV) == CIABR_PRIV_HYPER)
return H_P3;
- vcpu->arch.ciabr = value1;
+ kvmppc_set_ciabr_hv(vcpu, value1);
+ return H_SUCCESS;
+ case H_SET_MODE_RESOURCE_SET_DAWR0:
+ if (!kvmppc_power8_compatible(vcpu))
+ return H_P2;
+ if (!ppc_breakpoint_available())
+ return H_P2;
+ if (mflags)
+ return H_UNSUPPORTED_FLAG_START;
+ if (value2 & DABRX_HYP)
+ return H_P4;
+ kvmppc_set_dawr0_hv(vcpu, value1);
+ kvmppc_set_dawrx0_hv(vcpu, value2);
return H_SUCCESS;
- case H_SET_MODE_RESOURCE_SET_DAWR:
+ case H_SET_MODE_RESOURCE_SET_DAWR1:
if (!kvmppc_power8_compatible(vcpu))
return H_P2;
+ if (!ppc_breakpoint_available())
+ return H_P2;
+ if (!cpu_has_feature(CPU_FTR_DAWR1))
+ return H_P2;
+ if (!vcpu->kvm->arch.dawr1_enabled)
+ return H_FUNCTION;
if (mflags)
return H_UNSUPPORTED_FLAG_START;
if (value2 & DABRX_HYP)
return H_P4;
- vcpu->arch.dawr = value1;
- vcpu->arch.dawrx = value2;
+ kvmppc_set_dawr1_hv(vcpu, value1);
+ kvmppc_set_dawrx1_hv(vcpu, value2);
return H_SUCCESS;
+ case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE:
+ /*
+ * KVM does not support mflags=2 (AIL=2) and AIL=1 is reserved.
+ * Keep this in synch with kvmppc_filter_guest_lpcr_hv.
+ */
+ if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG) &&
+ kvmhv_vcpu_is_radix(vcpu) && mflags == 3)
+ return H_UNSUPPORTED_FLAG_START;
+ return H_TOO_HARD;
default:
return H_TOO_HARD;
}
}
+/* Copy guest memory in place - must reside within a single memslot */
+static int kvmppc_copy_guest(struct kvm *kvm, gpa_t to, gpa_t from,
+ unsigned long len)
+{
+ struct kvm_memory_slot *to_memslot = NULL;
+ struct kvm_memory_slot *from_memslot = NULL;
+ unsigned long to_addr, from_addr;
+ int r;
+
+ /* Get HPA for from address */
+ from_memslot = gfn_to_memslot(kvm, from >> PAGE_SHIFT);
+ if (!from_memslot)
+ return -EFAULT;
+ if ((from + len) >= ((from_memslot->base_gfn + from_memslot->npages)
+ << PAGE_SHIFT))
+ return -EINVAL;
+ from_addr = gfn_to_hva_memslot(from_memslot, from >> PAGE_SHIFT);
+ if (kvm_is_error_hva(from_addr))
+ return -EFAULT;
+ from_addr |= (from & (PAGE_SIZE - 1));
+
+ /* Get HPA for to address */
+ to_memslot = gfn_to_memslot(kvm, to >> PAGE_SHIFT);
+ if (!to_memslot)
+ return -EFAULT;
+ if ((to + len) >= ((to_memslot->base_gfn + to_memslot->npages)
+ << PAGE_SHIFT))
+ return -EINVAL;
+ to_addr = gfn_to_hva_memslot(to_memslot, to >> PAGE_SHIFT);
+ if (kvm_is_error_hva(to_addr))
+ return -EFAULT;
+ to_addr |= (to & (PAGE_SIZE - 1));
+
+ /* Perform copy */
+ r = raw_copy_in_user((void __user *)to_addr, (void __user *)from_addr,
+ len);
+ if (r)
+ return -EFAULT;
+ mark_page_dirty(kvm, to >> PAGE_SHIFT);
+ return 0;
+}
+
+static long kvmppc_h_page_init(struct kvm_vcpu *vcpu, unsigned long flags,
+ unsigned long dest, unsigned long src)
+{
+ u64 pg_sz = SZ_4K; /* 4K page size */
+ u64 pg_mask = SZ_4K - 1;
+ int ret;
+
+ /* Check for invalid flags (H_PAGE_SET_LOANED covers all CMO flags) */
+ if (flags & ~(H_ICACHE_INVALIDATE | H_ICACHE_SYNCHRONIZE |
+ H_ZERO_PAGE | H_COPY_PAGE | H_PAGE_SET_LOANED))
+ return H_PARAMETER;
+
+ /* dest (and src if copy_page flag set) must be page aligned */
+ if ((dest & pg_mask) || ((flags & H_COPY_PAGE) && (src & pg_mask)))
+ return H_PARAMETER;
+
+ /* zero and/or copy the page as determined by the flags */
+ if (flags & H_COPY_PAGE) {
+ ret = kvmppc_copy_guest(vcpu->kvm, dest, src, pg_sz);
+ if (ret < 0)
+ return H_PARAMETER;
+ } else if (flags & H_ZERO_PAGE) {
+ ret = kvm_clear_guest(vcpu->kvm, dest, pg_sz);
+ if (ret < 0)
+ return H_PARAMETER;
+ }
+
+ /* We can ignore the remaining flags */
+
+ return H_SUCCESS;
+}
+
+static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target)
+{
+ struct kvmppc_vcore *vcore = target->arch.vcore;
+
+ /*
+ * We expect to have been called by the real mode handler
+ * (kvmppc_rm_h_confer()) which would have directly returned
+ * H_SUCCESS if the source vcore wasn't idle (e.g. if it may
+ * have useful work to do and should not confer) so we don't
+ * recheck that here.
+ *
+ * In the case of the P9 single vcpu per vcore case, the real
+ * mode handler is not called but no other threads are in the
+ * source vcore.
+ */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
+ spin_lock(&vcore->lock);
+ if (target->arch.state == KVMPPC_VCPU_RUNNABLE &&
+ vcore->vcore_state != VCORE_INACTIVE &&
+ vcore->runner)
+ target = vcore->runner;
+ spin_unlock(&vcore->lock);
+ }
+
+ return kvm_vcpu_yield_to(target);
+}
+
+static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu)
+{
+ int yield_count = 0;
+ struct lppaca *lppaca;
+
+ spin_lock(&vcpu->arch.vpa_update_lock);
+ lppaca = (struct lppaca *)vcpu->arch.vpa.pinned_addr;
+ if (lppaca)
+ yield_count = be32_to_cpu(lppaca->yield_count);
+ spin_unlock(&vcpu->arch.vpa_update_lock);
+ return yield_count;
+}
+
+/*
+ * H_RPT_INVALIDATE hcall handler for nested guests.
+ *
+ * Handles only nested process-scoped invalidation requests in L0.
+ */
+static int kvmppc_nested_h_rpt_invalidate(struct kvm_vcpu *vcpu)
+{
+ unsigned long type = kvmppc_get_gpr(vcpu, 6);
+ unsigned long pid, pg_sizes, start, end;
+
+ /*
+ * The partition-scoped invalidations aren't handled here in L0.
+ */
+ if (type & H_RPTI_TYPE_NESTED)
+ return RESUME_HOST;
+
+ pid = kvmppc_get_gpr(vcpu, 4);
+ pg_sizes = kvmppc_get_gpr(vcpu, 7);
+ start = kvmppc_get_gpr(vcpu, 8);
+ end = kvmppc_get_gpr(vcpu, 9);
+
+ do_h_rpt_invalidate_prt(pid, vcpu->arch.nested->shadow_lpid,
+ type, pg_sizes, start, end);
+
+ kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
+ return RESUME_GUEST;
+}
+
+static long kvmppc_h_rpt_invalidate(struct kvm_vcpu *vcpu,
+ unsigned long id, unsigned long target,
+ unsigned long type, unsigned long pg_sizes,
+ unsigned long start, unsigned long end)
+{
+ if (!kvm_is_radix(vcpu->kvm))
+ return H_UNSUPPORTED;
+
+ if (end < start)
+ return H_P5;
+
+ /*
+ * Partition-scoped invalidation for nested guests.
+ */
+ if (type & H_RPTI_TYPE_NESTED) {
+ if (!nesting_enabled(vcpu->kvm))
+ return H_FUNCTION;
+
+ /* Support only cores as target */
+ if (target != H_RPTI_TARGET_CMMU)
+ return H_P2;
+
+ return do_h_rpt_invalidate_pat(vcpu, id, type, pg_sizes,
+ start, end);
+ }
+
+ /*
+ * Process-scoped invalidation for L1 guests.
+ */
+ do_h_rpt_invalidate_prt(id, vcpu->kvm->arch.lpid,
+ type, pg_sizes, start, end);
+ return H_SUCCESS;
+}
+
int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
{
+ struct kvm *kvm = vcpu->kvm;
unsigned long req = kvmppc_get_gpr(vcpu, 3);
unsigned long target, ret = H_SUCCESS;
+ int yield_count;
struct kvm_vcpu *tvcpu;
int idx, rc;
@@ -619,42 +1156,79 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
return RESUME_HOST;
switch (req) {
+ case H_REMOVE:
+ ret = kvmppc_h_remove(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
case H_ENTER:
- idx = srcu_read_lock(&vcpu->kvm->srcu);
- ret = kvmppc_virtmode_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4),
- kvmppc_get_gpr(vcpu, 5),
- kvmppc_get_gpr(vcpu, 6),
- kvmppc_get_gpr(vcpu, 7));
- srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ ret = kvmppc_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6),
+ kvmppc_get_gpr(vcpu, 7));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+ case H_READ:
+ ret = kvmppc_h_read(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+ case H_CLEAR_MOD:
+ ret = kvmppc_h_clear_mod(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+ case H_CLEAR_REF:
+ ret = kvmppc_h_clear_ref(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+ case H_PROTECT:
+ ret = kvmppc_h_protect(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+ case H_BULK_REMOVE:
+ ret = kvmppc_h_bulk_remove(vcpu);
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
break;
+
case H_CEDE:
break;
case H_PROD:
target = kvmppc_get_gpr(vcpu, 4);
- tvcpu = kvmppc_find_vcpu(vcpu->kvm, target);
+ tvcpu = kvmppc_find_vcpu(kvm, target);
if (!tvcpu) {
ret = H_PARAMETER;
break;
}
tvcpu->arch.prodded = 1;
- smp_mb();
- if (vcpu->arch.ceded) {
- if (waitqueue_active(&vcpu->wq)) {
- wake_up_interruptible(&vcpu->wq);
- vcpu->stat.halt_wakeup++;
- }
- }
+ smp_mb(); /* This orders prodded store vs ceded load */
+ if (tvcpu->arch.ceded)
+ kvmppc_fast_vcpu_kick_hv(tvcpu);
break;
case H_CONFER:
target = kvmppc_get_gpr(vcpu, 4);
if (target == -1)
break;
- tvcpu = kvmppc_find_vcpu(vcpu->kvm, target);
+ tvcpu = kvmppc_find_vcpu(kvm, target);
if (!tvcpu) {
ret = H_PARAMETER;
break;
}
- kvm_vcpu_yield_to(tvcpu);
+ yield_count = kvmppc_get_gpr(vcpu, 5);
+ if (kvmppc_get_yield_count(tvcpu) != yield_count)
+ break;
+ kvm_arch_vcpu_yield_to(tvcpu);
break;
case H_REGISTER_VPA:
ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4),
@@ -662,12 +1236,12 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
kvmppc_get_gpr(vcpu, 6));
break;
case H_RTAS:
- if (list_empty(&vcpu->kvm->arch.rtas_tokens))
+ if (list_empty(&kvm->arch.rtas_tokens))
return RESUME_HOST;
- idx = srcu_read_lock(&vcpu->kvm->srcu);
+ idx = srcu_read_lock(&kvm->srcu);
rc = kvmppc_rtas_hcall(vcpu);
- srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ srcu_read_unlock(&kvm->srcu, idx);
if (rc == -ENOENT)
return RESUME_HOST;
@@ -676,6 +1250,16 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
/* Send the error out to userspace via KVM_RUN */
return rc;
+ case H_LOGICAL_CI_LOAD:
+ ret = kvmppc_h_logical_ci_load(vcpu);
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+ case H_LOGICAL_CI_STORE:
+ ret = kvmppc_h_logical_ci_store(vcpu);
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
case H_SET_MODE:
ret = kvmppc_h_set_mode(vcpu, kvmppc_get_gpr(vcpu, 4),
kvmppc_get_gpr(vcpu, 5),
@@ -691,17 +1275,169 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
case H_IPOLL:
case H_XIRR_X:
if (kvmppc_xics_enabled(vcpu)) {
+ if (xics_on_xive()) {
+ ret = H_NOT_AVAILABLE;
+ return RESUME_GUEST;
+ }
ret = kvmppc_xics_hcall(vcpu, req);
break;
- } /* fallthrough */
+ }
+ return RESUME_HOST;
+ case H_SET_DABR:
+ ret = kvmppc_h_set_dabr(vcpu, kvmppc_get_gpr(vcpu, 4));
+ break;
+ case H_SET_XDABR:
+ ret = kvmppc_h_set_xdabr(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5));
+ break;
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+ case H_GET_TCE:
+ ret = kvmppc_h_get_tce(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+ case H_PUT_TCE:
+ ret = kvmppc_h_put_tce(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+ case H_PUT_TCE_INDIRECT:
+ ret = kvmppc_h_put_tce_indirect(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6),
+ kvmppc_get_gpr(vcpu, 7));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+ case H_STUFF_TCE:
+ ret = kvmppc_h_stuff_tce(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6),
+ kvmppc_get_gpr(vcpu, 7));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+#endif
+ case H_RANDOM: {
+ unsigned long rand;
+
+ if (!arch_get_random_seed_longs(&rand, 1))
+ ret = H_HARDWARE;
+ kvmppc_set_gpr(vcpu, 4, rand);
+ break;
+ }
+ case H_RPT_INVALIDATE:
+ ret = kvmppc_h_rpt_invalidate(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6),
+ kvmppc_get_gpr(vcpu, 7),
+ kvmppc_get_gpr(vcpu, 8),
+ kvmppc_get_gpr(vcpu, 9));
+ break;
+
+ case H_SET_PARTITION_TABLE:
+ ret = H_FUNCTION;
+ if (nesting_enabled(kvm))
+ ret = kvmhv_set_partition_table(vcpu);
+ break;
+ case H_ENTER_NESTED:
+ ret = H_FUNCTION;
+ if (!nesting_enabled(kvm))
+ break;
+ ret = kvmhv_enter_nested_guest(vcpu);
+ if (ret == H_INTERRUPT) {
+ kvmppc_set_gpr(vcpu, 3, 0);
+ vcpu->arch.hcall_needed = 0;
+ return -EINTR;
+ } else if (ret == H_TOO_HARD) {
+ kvmppc_set_gpr(vcpu, 3, 0);
+ vcpu->arch.hcall_needed = 0;
+ return RESUME_HOST;
+ }
+ break;
+ case H_TLB_INVALIDATE:
+ ret = H_FUNCTION;
+ if (nesting_enabled(kvm))
+ ret = kvmhv_do_nested_tlbie(vcpu);
+ break;
+ case H_COPY_TOFROM_GUEST:
+ ret = H_FUNCTION;
+ if (nesting_enabled(kvm))
+ ret = kvmhv_copy_tofrom_guest_nested(vcpu);
+ break;
+ case H_PAGE_INIT:
+ ret = kvmppc_h_page_init(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6));
+ break;
+ case H_SVM_PAGE_IN:
+ ret = H_UNSUPPORTED;
+ if (kvmppc_get_srr1(vcpu) & MSR_S)
+ ret = kvmppc_h_svm_page_in(kvm,
+ kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6));
+ break;
+ case H_SVM_PAGE_OUT:
+ ret = H_UNSUPPORTED;
+ if (kvmppc_get_srr1(vcpu) & MSR_S)
+ ret = kvmppc_h_svm_page_out(kvm,
+ kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6));
+ break;
+ case H_SVM_INIT_START:
+ ret = H_UNSUPPORTED;
+ if (kvmppc_get_srr1(vcpu) & MSR_S)
+ ret = kvmppc_h_svm_init_start(kvm);
+ break;
+ case H_SVM_INIT_DONE:
+ ret = H_UNSUPPORTED;
+ if (kvmppc_get_srr1(vcpu) & MSR_S)
+ ret = kvmppc_h_svm_init_done(kvm);
+ break;
+ case H_SVM_INIT_ABORT:
+ /*
+ * Even if that call is made by the Ultravisor, the SSR1 value
+ * is the guest context one, with the secure bit clear as it has
+ * not yet been secured. So we can't check it here.
+ * Instead the kvm->arch.secure_guest flag is checked inside
+ * kvmppc_h_svm_init_abort().
+ */
+ ret = kvmppc_h_svm_init_abort(kvm);
+ break;
+
default:
return RESUME_HOST;
}
+ WARN_ON_ONCE(ret == H_TOO_HARD);
kvmppc_set_gpr(vcpu, 3, ret);
vcpu->arch.hcall_needed = 0;
return RESUME_GUEST;
}
+/*
+ * Handle H_CEDE in the P9 path where we don't call the real-mode hcall
+ * handlers in book3s_hv_rmhandlers.S.
+ *
+ * This has to be done early, not in kvmppc_pseries_do_hcall(), so
+ * that the cede logic in kvmppc_run_single_vcpu() works properly.
+ */
+static void kvmppc_cede(struct kvm_vcpu *vcpu)
+{
+ __kvmppc_set_msr_hv(vcpu, __kvmppc_get_msr_hv(vcpu) | MSR_EE);
+ vcpu->arch.ceded = 1;
+ smp_mb();
+ if (vcpu->arch.prodded) {
+ vcpu->arch.prodded = 0;
+ smp_mb();
+ vcpu->arch.ceded = 0;
+ }
+}
+
static int kvmppc_hcall_impl_hv(unsigned long cmd)
{
switch (cmd) {
@@ -710,6 +1446,14 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd)
case H_CONFER:
case H_REGISTER_VPA:
case H_SET_MODE:
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+ case H_GET_TCE:
+ case H_PUT_TCE:
+ case H_PUT_TCE_INDIRECT:
+ case H_STUFF_TCE:
+#endif
+ case H_LOGICAL_CI_LOAD:
+ case H_LOGICAL_CI_STORE:
#ifdef CONFIG_KVM_XICS
case H_XIRR:
case H_CPPR:
@@ -718,6 +1462,8 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd)
case H_IPOLL:
case H_XIRR_X:
#endif
+ case H_PAGE_INIT:
+ case H_RPT_INVALIDATE:
return 1;
}
@@ -725,40 +1471,250 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd)
return kvmppc_hcall_impl_hv_realmode(cmd);
}
-static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
+static int kvmppc_emulate_debug_inst(struct kvm_vcpu *vcpu)
+{
+ ppc_inst_t last_inst;
+
+ if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst) !=
+ EMULATE_DONE) {
+ /*
+ * Fetch failed, so return to guest and
+ * try executing it again.
+ */
+ return RESUME_GUEST;
+ }
+
+ if (ppc_inst_val(last_inst) == KVMPPC_INST_SW_BREAKPOINT) {
+ vcpu->run->exit_reason = KVM_EXIT_DEBUG;
+ vcpu->run->debug.arch.address = kvmppc_get_pc(vcpu);
+ return RESUME_HOST;
+ } else {
+ kvmppc_core_queue_program(vcpu, SRR1_PROGILL |
+ (kvmppc_get_msr(vcpu) & SRR1_PREFIXED));
+ return RESUME_GUEST;
+ }
+}
+
+static void do_nothing(void *x)
+{
+}
+
+static unsigned long kvmppc_read_dpdes(struct kvm_vcpu *vcpu)
+{
+ int thr, cpu, pcpu, nthreads;
+ struct kvm_vcpu *v;
+ unsigned long dpdes;
+
+ nthreads = vcpu->kvm->arch.emul_smt_mode;
+ dpdes = 0;
+ cpu = vcpu->vcpu_id & ~(nthreads - 1);
+ for (thr = 0; thr < nthreads; ++thr, ++cpu) {
+ v = kvmppc_find_vcpu(vcpu->kvm, cpu);
+ if (!v)
+ continue;
+ /*
+ * If the vcpu is currently running on a physical cpu thread,
+ * interrupt it in order to pull it out of the guest briefly,
+ * which will update its vcore->dpdes value.
+ */
+ pcpu = READ_ONCE(v->cpu);
+ if (pcpu >= 0)
+ smp_call_function_single(pcpu, do_nothing, NULL, 1);
+ if (kvmppc_doorbell_pending(v))
+ dpdes |= 1 << thr;
+ }
+ return dpdes;
+}
+
+/*
+ * On POWER9, emulate doorbell-related instructions in order to
+ * give the guest the illusion of running on a multi-threaded core.
+ * The instructions emulated are msgsndp, msgclrp, mfspr TIR,
+ * and mfspr DPDES.
+ */
+static int kvmppc_emulate_doorbell_instr(struct kvm_vcpu *vcpu)
+{
+ u32 inst, rb, thr;
+ unsigned long arg;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_vcpu *tvcpu;
+ ppc_inst_t pinst;
+
+ if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &pinst) != EMULATE_DONE)
+ return RESUME_GUEST;
+ inst = ppc_inst_val(pinst);
+ if (get_op(inst) != 31)
+ return EMULATE_FAIL;
+ rb = get_rb(inst);
+ thr = vcpu->vcpu_id & (kvm->arch.emul_smt_mode - 1);
+ switch (get_xop(inst)) {
+ case OP_31_XOP_MSGSNDP:
+ arg = kvmppc_get_gpr(vcpu, rb);
+ if (((arg >> 27) & 0x1f) != PPC_DBELL_SERVER)
+ break;
+ arg &= 0x7f;
+ if (arg >= kvm->arch.emul_smt_mode)
+ break;
+ tvcpu = kvmppc_find_vcpu(kvm, vcpu->vcpu_id - thr + arg);
+ if (!tvcpu)
+ break;
+ if (!tvcpu->arch.doorbell_request) {
+ tvcpu->arch.doorbell_request = 1;
+ kvmppc_fast_vcpu_kick_hv(tvcpu);
+ }
+ break;
+ case OP_31_XOP_MSGCLRP:
+ arg = kvmppc_get_gpr(vcpu, rb);
+ if (((arg >> 27) & 0x1f) != PPC_DBELL_SERVER)
+ break;
+ vcpu->arch.vcore->dpdes = 0;
+ vcpu->arch.doorbell_request = 0;
+ break;
+ case OP_31_XOP_MFSPR:
+ switch (get_sprn(inst)) {
+ case SPRN_TIR:
+ arg = thr;
+ break;
+ case SPRN_DPDES:
+ arg = kvmppc_read_dpdes(vcpu);
+ break;
+ default:
+ return EMULATE_FAIL;
+ }
+ kvmppc_set_gpr(vcpu, get_rt(inst), arg);
+ break;
+ default:
+ return EMULATE_FAIL;
+ }
+ kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4);
+ return RESUME_GUEST;
+}
+
+/*
+ * If the lppaca had pmcregs_in_use clear when we exited the guest, then
+ * HFSCR_PM is cleared for next entry. If the guest then tries to access
+ * the PMU SPRs, we get this facility unavailable interrupt. Putting HFSCR_PM
+ * back in the guest HFSCR will cause the next entry to load the PMU SPRs and
+ * allow the guest access to continue.
+ */
+static int kvmppc_pmu_unavailable(struct kvm_vcpu *vcpu)
+{
+ if (!(vcpu->arch.hfscr_permitted & HFSCR_PM))
+ return EMULATE_FAIL;
+
+ kvmppc_set_hfscr_hv(vcpu, kvmppc_get_hfscr_hv(vcpu) | HFSCR_PM);
+
+ return RESUME_GUEST;
+}
+
+static int kvmppc_ebb_unavailable(struct kvm_vcpu *vcpu)
+{
+ if (!(vcpu->arch.hfscr_permitted & HFSCR_EBB))
+ return EMULATE_FAIL;
+
+ kvmppc_set_hfscr_hv(vcpu, kvmppc_get_hfscr_hv(vcpu) | HFSCR_EBB);
+
+ return RESUME_GUEST;
+}
+
+static int kvmppc_tm_unavailable(struct kvm_vcpu *vcpu)
+{
+ if (!(vcpu->arch.hfscr_permitted & HFSCR_TM))
+ return EMULATE_FAIL;
+
+ kvmppc_set_hfscr_hv(vcpu, kvmppc_get_hfscr_hv(vcpu) | HFSCR_TM);
+
+ return RESUME_GUEST;
+}
+
+static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
struct task_struct *tsk)
{
+ struct kvm_run *run = vcpu->run;
int r = RESUME_HOST;
vcpu->stat.sum_exits++;
+ /*
+ * This can happen if an interrupt occurs in the last stages
+ * of guest entry or the first stages of guest exit (i.e. after
+ * setting paca->kvm_hstate.in_guest to KVM_GUEST_MODE_GUEST_HV
+ * and before setting it to KVM_GUEST_MODE_HOST_HV).
+ * That can happen due to a bug, or due to a machine check
+ * occurring at just the wrong time.
+ */
+ if (!kvmhv_is_nestedv2() && (__kvmppc_get_msr_hv(vcpu) & MSR_HV)) {
+ printk(KERN_EMERG "KVM trap in HV mode!\n");
+ printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
+ vcpu->arch.trap, kvmppc_get_pc(vcpu),
+ vcpu->arch.shregs.msr);
+ kvmppc_dump_regs(vcpu);
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ run->hw.hardware_exit_reason = vcpu->arch.trap;
+ return RESUME_HOST;
+ }
run->exit_reason = KVM_EXIT_UNKNOWN;
run->ready_for_interrupt_injection = 1;
switch (vcpu->arch.trap) {
/* We're good on these - the host merely wanted to get our attention */
+ case BOOK3S_INTERRUPT_NESTED_HV_DECREMENTER:
+ WARN_ON_ONCE(1); /* Should never happen */
+ vcpu->arch.trap = BOOK3S_INTERRUPT_HV_DECREMENTER;
+ fallthrough;
case BOOK3S_INTERRUPT_HV_DECREMENTER:
vcpu->stat.dec_exits++;
r = RESUME_GUEST;
break;
case BOOK3S_INTERRUPT_EXTERNAL:
case BOOK3S_INTERRUPT_H_DOORBELL:
+ case BOOK3S_INTERRUPT_H_VIRT:
vcpu->stat.ext_intr_exits++;
r = RESUME_GUEST;
break;
+ /* SR/HMI/PMI are HV interrupts that host has handled. Resume guest.*/
+ case BOOK3S_INTERRUPT_HMI:
case BOOK3S_INTERRUPT_PERFMON:
+ case BOOK3S_INTERRUPT_SYSTEM_RESET:
r = RESUME_GUEST;
break;
- case BOOK3S_INTERRUPT_MACHINE_CHECK:
+ case BOOK3S_INTERRUPT_MACHINE_CHECK: {
+ static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
+ DEFAULT_RATELIMIT_BURST);
/*
- * Deliver a machine check interrupt to the guest.
- * We have to do this, even if the host has handled the
- * machine check, because machine checks use SRR0/1 and
- * the interrupt might have trashed guest state in them.
+ * Print the MCE event to host console. Ratelimit so the guest
+ * can't flood the host log.
*/
- kvmppc_book3s_queue_irqprio(vcpu,
- BOOK3S_INTERRUPT_MACHINE_CHECK);
- r = RESUME_GUEST;
+ if (__ratelimit(&rs))
+ machine_check_print_event_info(&vcpu->arch.mce_evt,false, true);
+
+ /*
+ * If the guest can do FWNMI, exit to userspace so it can
+ * deliver a FWNMI to the guest.
+ * Otherwise we synthesize a machine check for the guest
+ * so that it knows that the machine check occurred.
+ */
+ if (!vcpu->kvm->arch.fwnmi_enabled) {
+ ulong flags = (__kvmppc_get_msr_hv(vcpu) & 0x083c0000) |
+ (kvmppc_get_msr(vcpu) & SRR1_PREFIXED);
+ kvmppc_core_queue_machine_check(vcpu, flags);
+ r = RESUME_GUEST;
+ break;
+ }
+
+ /* Exit to guest with KVM_EXIT_NMI as exit reason */
+ run->exit_reason = KVM_EXIT_NMI;
+ run->hw.hardware_exit_reason = vcpu->arch.trap;
+ /* Clear out the old NMI status from run->flags */
+ run->flags &= ~KVM_RUN_PPC_NMI_DISP_MASK;
+ /* Now set the NMI status */
+ if (vcpu->arch.mce_evt.disposition == MCE_DISPOSITION_RECOVERED)
+ run->flags |= KVM_RUN_PPC_NMI_DISP_FULLY_RECOV;
+ else
+ run->flags |= KVM_RUN_PPC_NMI_DISP_NOT_RECOV;
+
+ r = RESUME_HOST;
break;
+ }
case BOOK3S_INTERRUPT_PROGRAM:
{
ulong flags;
@@ -768,20 +1724,47 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
* as a result of a hypervisor emulation interrupt
* (e40) getting turned into a 700 by BML RTAS.
*/
- flags = vcpu->arch.shregs.msr & 0x1f0000ull;
+ flags = (__kvmppc_get_msr_hv(vcpu) & 0x1f0000ull) |
+ (kvmppc_get_msr(vcpu) & SRR1_PREFIXED);
kvmppc_core_queue_program(vcpu, flags);
r = RESUME_GUEST;
break;
}
case BOOK3S_INTERRUPT_SYSCALL:
{
- /* hcall - punt to userspace */
int i;
- /* hypercall with MSR_PR has already been handled in rmode,
- * and never reaches here.
- */
+ if (!kvmhv_is_nestedv2() && unlikely(__kvmppc_get_msr_hv(vcpu) & MSR_PR)) {
+ /*
+ * Guest userspace executed sc 1. This can only be
+ * reached by the P9 path because the old path
+ * handles this case in realmode hcall handlers.
+ */
+ if (!kvmhv_vcpu_is_radix(vcpu)) {
+ /*
+ * A guest could be running PR KVM, so this
+ * may be a PR KVM hcall. It must be reflected
+ * to the guest kernel as a sc interrupt.
+ */
+ kvmppc_core_queue_syscall(vcpu);
+ } else {
+ /*
+ * Radix guests can not run PR KVM or nested HV
+ * hash guests which might run PR KVM, so this
+ * is always a privilege fault. Send a program
+ * check to guest kernel.
+ */
+ kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV);
+ }
+ r = RESUME_GUEST;
+ break;
+ }
+ /*
+ * hcall - gather args and set exit_reason. This will next be
+ * handled by kvmppc_pseries_do_hcall which may be able to deal
+ * with it and resume guest, or may punt to userspace.
+ */
run->papr_hcall.nr = kvmppc_get_gpr(vcpu, 3);
for (i = 0; i < 9; ++i)
run->papr_hcall.args[i] = kvmppc_get_gpr(vcpu, 4 + i);
@@ -794,40 +1777,187 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
* We get these next two if the guest accesses a page which it thinks
* it has mapped but which is not actually present, either because
* it is for an emulated I/O device or because the corresonding
- * host page has been paged out. Any other HDSI/HISI interrupts
- * have been handled already.
+ * host page has been paged out.
+ *
+ * Any other HDSI/HISI interrupts have been handled already for P7/8
+ * guests. For POWER9 hash guests not using rmhandlers, basic hash
+ * fault handling is done here.
*/
- case BOOK3S_INTERRUPT_H_DATA_STORAGE:
- r = RESUME_PAGE_FAULT;
+ case BOOK3S_INTERRUPT_H_DATA_STORAGE: {
+ unsigned long vsid;
+ long err;
+
+ if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG) &&
+ unlikely(vcpu->arch.fault_dsisr == HDSISR_CANARY)) {
+ r = RESUME_GUEST; /* Just retry if it's the canary */
+ break;
+ }
+
+ if (kvm_is_radix(vcpu->kvm) || !cpu_has_feature(CPU_FTR_ARCH_300)) {
+ /*
+ * Radix doesn't require anything, and pre-ISAv3.0 hash
+ * already attempted to handle this in rmhandlers. The
+ * hash fault handling below is v3 only (it uses ASDR
+ * via fault_gpa).
+ */
+ r = RESUME_PAGE_FAULT;
+ break;
+ }
+
+ if (!(vcpu->arch.fault_dsisr & (DSISR_NOHPTE | DSISR_PROTFAULT))) {
+ kvmppc_core_queue_data_storage(vcpu,
+ kvmppc_get_msr(vcpu) & SRR1_PREFIXED,
+ vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
+ r = RESUME_GUEST;
+ break;
+ }
+
+ if (!(__kvmppc_get_msr_hv(vcpu) & MSR_DR))
+ vsid = vcpu->kvm->arch.vrma_slb_v;
+ else
+ vsid = vcpu->arch.fault_gpa;
+
+ err = kvmppc_hpte_hv_fault(vcpu, vcpu->arch.fault_dar,
+ vsid, vcpu->arch.fault_dsisr, true);
+ if (err == 0) {
+ r = RESUME_GUEST;
+ } else if (err == -1 || err == -2) {
+ r = RESUME_PAGE_FAULT;
+ } else {
+ kvmppc_core_queue_data_storage(vcpu,
+ kvmppc_get_msr(vcpu) & SRR1_PREFIXED,
+ vcpu->arch.fault_dar, err);
+ r = RESUME_GUEST;
+ }
break;
- case BOOK3S_INTERRUPT_H_INST_STORAGE:
+ }
+ case BOOK3S_INTERRUPT_H_INST_STORAGE: {
+ unsigned long vsid;
+ long err;
+
vcpu->arch.fault_dar = kvmppc_get_pc(vcpu);
- vcpu->arch.fault_dsisr = 0;
- r = RESUME_PAGE_FAULT;
+ vcpu->arch.fault_dsisr = __kvmppc_get_msr_hv(vcpu) &
+ DSISR_SRR1_MATCH_64S;
+ if (kvm_is_radix(vcpu->kvm) || !cpu_has_feature(CPU_FTR_ARCH_300)) {
+ /*
+ * Radix doesn't require anything, and pre-ISAv3.0 hash
+ * already attempted to handle this in rmhandlers. The
+ * hash fault handling below is v3 only (it uses ASDR
+ * via fault_gpa).
+ */
+ if (__kvmppc_get_msr_hv(vcpu) & HSRR1_HISI_WRITE)
+ vcpu->arch.fault_dsisr |= DSISR_ISSTORE;
+ r = RESUME_PAGE_FAULT;
+ break;
+ }
+
+ if (!(vcpu->arch.fault_dsisr & SRR1_ISI_NOPT)) {
+ kvmppc_core_queue_inst_storage(vcpu,
+ vcpu->arch.fault_dsisr |
+ (kvmppc_get_msr(vcpu) & SRR1_PREFIXED));
+ r = RESUME_GUEST;
+ break;
+ }
+
+ if (!(__kvmppc_get_msr_hv(vcpu) & MSR_IR))
+ vsid = vcpu->kvm->arch.vrma_slb_v;
+ else
+ vsid = vcpu->arch.fault_gpa;
+
+ err = kvmppc_hpte_hv_fault(vcpu, vcpu->arch.fault_dar,
+ vsid, vcpu->arch.fault_dsisr, false);
+ if (err == 0) {
+ r = RESUME_GUEST;
+ } else if (err == -1) {
+ r = RESUME_PAGE_FAULT;
+ } else {
+ kvmppc_core_queue_inst_storage(vcpu,
+ err | (kvmppc_get_msr(vcpu) & SRR1_PREFIXED));
+ r = RESUME_GUEST;
+ }
break;
+ }
+
/*
* This occurs if the guest executes an illegal instruction.
- * We just generate a program interrupt to the guest, since
- * we don't emulate any guest instructions at this stage.
+ * If the guest debug is disabled, generate a program interrupt
+ * to the guest. If guest debug is enabled, we need to check
+ * whether the instruction is a software breakpoint instruction.
+ * Accordingly return to Guest or Host.
*/
case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
- kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
- r = RESUME_GUEST;
+ if (vcpu->arch.emul_inst != KVM_INST_FETCH_FAILED)
+ vcpu->arch.last_inst = kvmppc_need_byteswap(vcpu) ?
+ swab32(vcpu->arch.emul_inst) :
+ vcpu->arch.emul_inst;
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) {
+ r = kvmppc_emulate_debug_inst(vcpu);
+ } else {
+ kvmppc_core_queue_program(vcpu, SRR1_PROGILL |
+ (kvmppc_get_msr(vcpu) & SRR1_PREFIXED));
+ r = RESUME_GUEST;
+ }
break;
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ case BOOK3S_INTERRUPT_HV_SOFTPATCH:
+ /*
+ * This occurs for various TM-related instructions that
+ * we need to emulate on POWER9 DD2.2. We have already
+ * handled the cases where the guest was in real-suspend
+ * mode and was transitioning to transactional state.
+ */
+ r = kvmhv_p9_tm_emulation(vcpu);
+ if (r != -1)
+ break;
+ fallthrough; /* go to facility unavailable handler */
+#endif
+
/*
* This occurs if the guest (kernel or userspace), does something that
- * is prohibited by HFSCR. We just generate a program interrupt to
- * the guest.
+ * is prohibited by HFSCR.
+ * On POWER9, this could be a doorbell instruction that we need
+ * to emulate.
+ * Otherwise, we just generate a program interrupt to the guest.
*/
- case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
- kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
- r = RESUME_GUEST;
+ case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: {
+ u64 cause = kvmppc_get_hfscr_hv(vcpu) >> 56;
+
+ r = EMULATE_FAIL;
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ switch (cause) {
+ case FSCR_MSGP_LG:
+ r = kvmppc_emulate_doorbell_instr(vcpu);
+ break;
+ case FSCR_PM_LG:
+ r = kvmppc_pmu_unavailable(vcpu);
+ break;
+ case FSCR_EBB_LG:
+ r = kvmppc_ebb_unavailable(vcpu);
+ break;
+ case FSCR_TM_LG:
+ r = kvmppc_tm_unavailable(vcpu);
+ break;
+ default:
+ break;
+ }
+ }
+ if (r == EMULATE_FAIL) {
+ kvmppc_core_queue_program(vcpu, SRR1_PROGILL |
+ (kvmppc_get_msr(vcpu) & SRR1_PREFIXED));
+ r = RESUME_GUEST;
+ }
+ break;
+ }
+
+ case BOOK3S_INTERRUPT_HV_RM_HARD:
+ r = RESUME_PASSTHROUGH;
break;
default:
kvmppc_dump_regs(vcpu);
printk(KERN_EMERG "trap=0x%x | pc=0x%lx | msr=0x%llx\n",
vcpu->arch.trap, kvmppc_get_pc(vcpu),
- vcpu->arch.shregs.msr);
+ __kvmppc_get_msr_hv(vcpu));
run->hw.hardware_exit_reason = vcpu->arch.trap;
r = RESUME_HOST;
break;
@@ -836,6 +1966,138 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
return r;
}
+static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
+{
+ int r;
+ int srcu_idx;
+
+ vcpu->stat.sum_exits++;
+
+ /*
+ * This can happen if an interrupt occurs in the last stages
+ * of guest entry or the first stages of guest exit (i.e. after
+ * setting paca->kvm_hstate.in_guest to KVM_GUEST_MODE_GUEST_HV
+ * and before setting it to KVM_GUEST_MODE_HOST_HV).
+ * That can happen due to a bug, or due to a machine check
+ * occurring at just the wrong time.
+ */
+ if (__kvmppc_get_msr_hv(vcpu) & MSR_HV) {
+ pr_emerg("KVM trap in HV mode while nested!\n");
+ pr_emerg("trap=0x%x | pc=0x%lx | msr=0x%llx\n",
+ vcpu->arch.trap, kvmppc_get_pc(vcpu),
+ __kvmppc_get_msr_hv(vcpu));
+ kvmppc_dump_regs(vcpu);
+ return RESUME_HOST;
+ }
+ switch (vcpu->arch.trap) {
+ /* We're good on these - the host merely wanted to get our attention */
+ case BOOK3S_INTERRUPT_HV_DECREMENTER:
+ vcpu->stat.dec_exits++;
+ r = RESUME_GUEST;
+ break;
+ case BOOK3S_INTERRUPT_EXTERNAL:
+ vcpu->stat.ext_intr_exits++;
+ r = RESUME_HOST;
+ break;
+ case BOOK3S_INTERRUPT_H_DOORBELL:
+ case BOOK3S_INTERRUPT_H_VIRT:
+ vcpu->stat.ext_intr_exits++;
+ r = RESUME_GUEST;
+ break;
+ /* These need to go to the nested HV */
+ case BOOK3S_INTERRUPT_NESTED_HV_DECREMENTER:
+ vcpu->arch.trap = BOOK3S_INTERRUPT_HV_DECREMENTER;
+ vcpu->stat.dec_exits++;
+ r = RESUME_HOST;
+ break;
+ /* SR/HMI/PMI are HV interrupts that host has handled. Resume guest.*/
+ case BOOK3S_INTERRUPT_HMI:
+ case BOOK3S_INTERRUPT_PERFMON:
+ case BOOK3S_INTERRUPT_SYSTEM_RESET:
+ r = RESUME_GUEST;
+ break;
+ case BOOK3S_INTERRUPT_MACHINE_CHECK:
+ {
+ static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
+ DEFAULT_RATELIMIT_BURST);
+ /* Pass the machine check to the L1 guest */
+ r = RESUME_HOST;
+ /* Print the MCE event to host console. */
+ if (__ratelimit(&rs))
+ machine_check_print_event_info(&vcpu->arch.mce_evt, false, true);
+ break;
+ }
+ /*
+ * We get these next two if the guest accesses a page which it thinks
+ * it has mapped but which is not actually present, either because
+ * it is for an emulated I/O device or because the corresonding
+ * host page has been paged out.
+ */
+ case BOOK3S_INTERRUPT_H_DATA_STORAGE:
+ srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ r = kvmhv_nested_page_fault(vcpu);
+ srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
+ break;
+ case BOOK3S_INTERRUPT_H_INST_STORAGE:
+ vcpu->arch.fault_dar = kvmppc_get_pc(vcpu);
+ vcpu->arch.fault_dsisr = kvmppc_get_msr(vcpu) &
+ DSISR_SRR1_MATCH_64S;
+ if (__kvmppc_get_msr_hv(vcpu) & HSRR1_HISI_WRITE)
+ vcpu->arch.fault_dsisr |= DSISR_ISSTORE;
+ srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ r = kvmhv_nested_page_fault(vcpu);
+ srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
+ break;
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ case BOOK3S_INTERRUPT_HV_SOFTPATCH:
+ /*
+ * This occurs for various TM-related instructions that
+ * we need to emulate on POWER9 DD2.2. We have already
+ * handled the cases where the guest was in real-suspend
+ * mode and was transitioning to transactional state.
+ */
+ r = kvmhv_p9_tm_emulation(vcpu);
+ if (r != -1)
+ break;
+ fallthrough; /* go to facility unavailable handler */
+#endif
+
+ case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
+ r = RESUME_HOST;
+ break;
+
+ case BOOK3S_INTERRUPT_HV_RM_HARD:
+ vcpu->arch.trap = 0;
+ r = RESUME_GUEST;
+ if (!xics_on_xive())
+ kvmppc_xics_rm_complete(vcpu, 0);
+ break;
+ case BOOK3S_INTERRUPT_SYSCALL:
+ {
+ unsigned long req = kvmppc_get_gpr(vcpu, 3);
+
+ /*
+ * The H_RPT_INVALIDATE hcalls issued by nested
+ * guests for process-scoped invalidations when
+ * GTSE=0, are handled here in L0.
+ */
+ if (req == H_RPT_INVALIDATE) {
+ r = kvmppc_nested_h_rpt_invalidate(vcpu);
+ break;
+ }
+
+ r = RESUME_HOST;
+ break;
+ }
+ default:
+ r = RESUME_HOST;
+ break;
+ }
+
+ return r;
+}
+
static int kvm_arch_vcpu_ioctl_get_sregs_hv(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
@@ -856,7 +2118,9 @@ static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu,
{
int i, j;
- kvmppc_set_pvr_hv(vcpu, sregs->pvr);
+ /* Only accept the same PVR as the host's, since we can't spoof it */
+ if (sregs->pvr != vcpu->arch.pvr)
+ return -EINVAL;
j = 0;
for (i = 0; i < vcpu->arch.slb_nr; i++) {
@@ -871,23 +2135,82 @@ static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu,
return 0;
}
+/*
+ * Enforce limits on guest LPCR values based on hardware availability,
+ * guest configuration, and possibly hypervisor support and security
+ * concerns.
+ */
+unsigned long kvmppc_filter_lpcr_hv(struct kvm *kvm, unsigned long lpcr)
+{
+ /* LPCR_TC only applies to HPT guests */
+ if (kvm_is_radix(kvm))
+ lpcr &= ~LPCR_TC;
+
+ /* On POWER8 and above, userspace can modify AIL */
+ if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+ lpcr &= ~LPCR_AIL;
+ if ((lpcr & LPCR_AIL) != LPCR_AIL_3)
+ lpcr &= ~LPCR_AIL; /* LPCR[AIL]=1/2 is disallowed */
+ /*
+ * On some POWER9s we force AIL off for radix guests to prevent
+ * executing in MSR[HV]=1 mode with the MMU enabled and PIDR set to
+ * guest, which can result in Q0 translations with LPID=0 PID=PIDR to
+ * be cached, which the host TLB management does not expect.
+ */
+ if (kvm_is_radix(kvm) && cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
+ lpcr &= ~LPCR_AIL;
+
+ /*
+ * On POWER9, allow userspace to enable large decrementer for the
+ * guest, whether or not the host has it enabled.
+ */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ lpcr &= ~LPCR_LD;
+
+ return lpcr;
+}
+
+static void verify_lpcr(struct kvm *kvm, unsigned long lpcr)
+{
+ if (lpcr != kvmppc_filter_lpcr_hv(kvm, lpcr)) {
+ WARN_ONCE(1, "lpcr 0x%lx differs from filtered 0x%lx\n",
+ lpcr, kvmppc_filter_lpcr_hv(kvm, lpcr));
+ }
+}
+
static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
bool preserve_top32)
{
+ struct kvm *kvm = vcpu->kvm;
struct kvmppc_vcore *vc = vcpu->arch.vcore;
u64 mask;
spin_lock(&vc->lock);
+
+ /*
+ * Userspace can only modify
+ * DPFD (default prefetch depth), ILE (interrupt little-endian),
+ * TC (translation control), AIL (alternate interrupt location),
+ * LD (large decrementer).
+ * These are subject to restrictions from kvmppc_filter_lcpr_hv().
+ */
+ mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD;
+
+ /* Broken 32-bit version of LPCR must not clear top bits */
+ if (preserve_top32)
+ mask &= 0xFFFFFFFF;
+
+ new_lpcr = kvmppc_filter_lpcr_hv(kvm,
+ (vc->lpcr & ~mask) | (new_lpcr & mask));
+
/*
* If ILE (interrupt little-endian) has changed, update the
* MSR_LE bit in the intr_msr for each vcpu in this vcore.
*/
if ((new_lpcr & LPCR_ILE) != (vc->lpcr & LPCR_ILE)) {
- struct kvm *kvm = vcpu->kvm;
struct kvm_vcpu *vcpu;
- int i;
+ unsigned long i;
- mutex_lock(&kvm->lock);
kvm_for_each_vcpu(i, vcpu, kvm) {
if (vcpu->arch.vcore != vc)
continue;
@@ -896,22 +2219,11 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
else
vcpu->arch.intr_msr &= ~MSR_LE;
}
- mutex_unlock(&kvm->lock);
}
- /*
- * Userspace can only modify DPFD (default prefetch depth),
- * ILE (interrupt little-endian) and TC (translation control).
- * On POWER8 userspace can also modify AIL (alt. interrupt loc.)
- */
- mask = LPCR_DPFD | LPCR_ILE | LPCR_TC;
- if (cpu_has_feature(CPU_FTR_ARCH_207S))
- mask |= LPCR_AIL;
+ vc->lpcr = new_lpcr;
+ kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_LPCR);
- /* Broken 32-bit version of LPCR must not clear top bits */
- if (preserve_top32)
- mask &= 0xFFFFFFFF;
- vc->lpcr = (vc->lpcr & ~mask) | (new_lpcr & mask);
spin_unlock(&vc->lock);
}
@@ -922,6 +2234,9 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
long int i;
switch (id) {
+ case KVM_REG_PPC_DEBUG_INST:
+ *val = get_reg_val(id, KVMPPC_INST_SW_BREAKPOINT);
+ break;
case KVM_REG_PPC_HIOR:
*val = get_reg_val(id, 0);
break;
@@ -932,58 +2247,103 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
*val = get_reg_val(id, vcpu->arch.dabrx);
break;
case KVM_REG_PPC_DSCR:
- *val = get_reg_val(id, vcpu->arch.dscr);
+ *val = get_reg_val(id, kvmppc_get_dscr_hv(vcpu));
break;
case KVM_REG_PPC_PURR:
- *val = get_reg_val(id, vcpu->arch.purr);
+ *val = get_reg_val(id, kvmppc_get_purr_hv(vcpu));
break;
case KVM_REG_PPC_SPURR:
- *val = get_reg_val(id, vcpu->arch.spurr);
+ *val = get_reg_val(id, kvmppc_get_spurr_hv(vcpu));
break;
case KVM_REG_PPC_AMR:
- *val = get_reg_val(id, vcpu->arch.amr);
+ *val = get_reg_val(id, kvmppc_get_amr_hv(vcpu));
break;
case KVM_REG_PPC_UAMOR:
- *val = get_reg_val(id, vcpu->arch.uamor);
+ *val = get_reg_val(id, kvmppc_get_uamor_hv(vcpu));
break;
- case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRS:
+ case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCR1:
i = id - KVM_REG_PPC_MMCR0;
- *val = get_reg_val(id, vcpu->arch.mmcr[i]);
+ *val = get_reg_val(id, kvmppc_get_mmcr_hv(vcpu, i));
+ break;
+ case KVM_REG_PPC_MMCR2:
+ *val = get_reg_val(id, kvmppc_get_mmcr_hv(vcpu, 2));
+ break;
+ case KVM_REG_PPC_MMCRA:
+ *val = get_reg_val(id, kvmppc_get_mmcra_hv(vcpu));
+ break;
+ case KVM_REG_PPC_MMCRS:
+ *val = get_reg_val(id, vcpu->arch.mmcrs);
+ break;
+ case KVM_REG_PPC_MMCR3:
+ *val = get_reg_val(id, kvmppc_get_mmcr_hv(vcpu, 3));
break;
case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8:
i = id - KVM_REG_PPC_PMC1;
- *val = get_reg_val(id, vcpu->arch.pmc[i]);
+ *val = get_reg_val(id, kvmppc_get_pmc_hv(vcpu, i));
break;
case KVM_REG_PPC_SPMC1 ... KVM_REG_PPC_SPMC2:
i = id - KVM_REG_PPC_SPMC1;
*val = get_reg_val(id, vcpu->arch.spmc[i]);
break;
case KVM_REG_PPC_SIAR:
- *val = get_reg_val(id, vcpu->arch.siar);
+ *val = get_reg_val(id, kvmppc_get_siar_hv(vcpu));
break;
case KVM_REG_PPC_SDAR:
- *val = get_reg_val(id, vcpu->arch.sdar);
+ *val = get_reg_val(id, kvmppc_get_sdar_hv(vcpu));
break;
case KVM_REG_PPC_SIER:
- *val = get_reg_val(id, vcpu->arch.sier);
+ *val = get_reg_val(id, kvmppc_get_sier_hv(vcpu, 0));
+ break;
+ case KVM_REG_PPC_SIER2:
+ *val = get_reg_val(id, kvmppc_get_sier_hv(vcpu, 1));
+ break;
+ case KVM_REG_PPC_SIER3:
+ *val = get_reg_val(id, kvmppc_get_sier_hv(vcpu, 2));
break;
case KVM_REG_PPC_IAMR:
- *val = get_reg_val(id, vcpu->arch.iamr);
+ *val = get_reg_val(id, kvmppc_get_iamr_hv(vcpu));
break;
case KVM_REG_PPC_PSPB:
- *val = get_reg_val(id, vcpu->arch.pspb);
+ *val = get_reg_val(id, kvmppc_get_pspb_hv(vcpu));
break;
case KVM_REG_PPC_DPDES:
- *val = get_reg_val(id, vcpu->arch.vcore->dpdes);
+ /*
+ * On POWER9, where we are emulating msgsndp etc.,
+ * we return 1 bit for each vcpu, which can come from
+ * either vcore->dpdes or doorbell_request.
+ * On POWER8, doorbell_request is 0.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ *val = get_reg_val(id, vcpu->arch.doorbell_request);
+ else
+ *val = get_reg_val(id, vcpu->arch.vcore->dpdes);
+ break;
+ case KVM_REG_PPC_VTB:
+ *val = get_reg_val(id, kvmppc_get_vtb(vcpu));
break;
case KVM_REG_PPC_DAWR:
- *val = get_reg_val(id, vcpu->arch.dawr);
+ *val = get_reg_val(id, kvmppc_get_dawr0_hv(vcpu));
break;
case KVM_REG_PPC_DAWRX:
- *val = get_reg_val(id, vcpu->arch.dawrx);
+ *val = get_reg_val(id, kvmppc_get_dawrx0_hv(vcpu));
+ break;
+ case KVM_REG_PPC_DAWR1:
+ *val = get_reg_val(id, kvmppc_get_dawr1_hv(vcpu));
+ break;
+ case KVM_REG_PPC_DAWRX1:
+ *val = get_reg_val(id, kvmppc_get_dawrx1_hv(vcpu));
+ break;
+ case KVM_REG_PPC_DEXCR:
+ *val = get_reg_val(id, kvmppc_get_dexcr_hv(vcpu));
+ break;
+ case KVM_REG_PPC_HASHKEYR:
+ *val = get_reg_val(id, kvmppc_get_hashkeyr_hv(vcpu));
+ break;
+ case KVM_REG_PPC_HASHPKEYR:
+ *val = get_reg_val(id, kvmppc_get_hashpkeyr_hv(vcpu));
break;
case KVM_REG_PPC_CIABR:
- *val = get_reg_val(id, vcpu->arch.ciabr);
+ *val = get_reg_val(id, kvmppc_get_ciabr_hv(vcpu));
break;
case KVM_REG_PPC_CSIGR:
*val = get_reg_val(id, vcpu->arch.csigr);
@@ -995,13 +2355,19 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
*val = get_reg_val(id, vcpu->arch.tcscr);
break;
case KVM_REG_PPC_PID:
- *val = get_reg_val(id, vcpu->arch.pid);
+ *val = get_reg_val(id, kvmppc_get_pid(vcpu));
break;
case KVM_REG_PPC_ACOP:
*val = get_reg_val(id, vcpu->arch.acop);
break;
case KVM_REG_PPC_WORT:
- *val = get_reg_val(id, vcpu->arch.wort);
+ *val = get_reg_val(id, kvmppc_get_wort_hv(vcpu));
+ break;
+ case KVM_REG_PPC_TIDR:
+ *val = get_reg_val(id, vcpu->arch.tid);
+ break;
+ case KVM_REG_PPC_PSSCR:
+ *val = get_reg_val(id, vcpu->arch.psscr);
break;
case KVM_REG_PPC_VPA_ADDR:
spin_lock(&vcpu->arch.vpa_update_lock);
@@ -1021,14 +2387,14 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
spin_unlock(&vcpu->arch.vpa_update_lock);
break;
case KVM_REG_PPC_TB_OFFSET:
- *val = get_reg_val(id, vcpu->arch.vcore->tb_offset);
+ *val = get_reg_val(id, kvmppc_get_tb_offset(vcpu));
break;
case KVM_REG_PPC_LPCR:
case KVM_REG_PPC_LPCR_64:
- *val = get_reg_val(id, vcpu->arch.vcore->lpcr);
+ *val = get_reg_val(id, kvmppc_get_lpcr(vcpu));
break;
case KVM_REG_PPC_PPR:
- *val = get_reg_val(id, vcpu->arch.ppr);
+ *val = get_reg_val(id, kvmppc_get_ppr_hv(vcpu));
break;
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
case KVM_REG_PPC_TFHAR:
@@ -1062,6 +2428,9 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
case KVM_REG_PPC_TM_CR:
*val = get_reg_val(id, vcpu->arch.cr_tm);
break;
+ case KVM_REG_PPC_TM_XER:
+ *val = get_reg_val(id, vcpu->arch.xer_tm);
+ break;
case KVM_REG_PPC_TM_LR:
*val = get_reg_val(id, vcpu->arch.lr_tm);
break;
@@ -1094,7 +2463,19 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
break;
#endif
case KVM_REG_PPC_ARCH_COMPAT:
- *val = get_reg_val(id, vcpu->arch.vcore->arch_compat);
+ *val = get_reg_val(id, kvmppc_get_arch_compat(vcpu));
+ break;
+ case KVM_REG_PPC_DEC_EXPIRY:
+ *val = get_reg_val(id, kvmppc_get_dec_expires(vcpu));
+ break;
+ case KVM_REG_PPC_ONLINE:
+ *val = get_reg_val(id, vcpu->arch.online);
+ break;
+ case KVM_REG_PPC_PTCR:
+ *val = get_reg_val(id, vcpu->kvm->arch.l1_ptcr);
+ break;
+ case KVM_REG_PPC_FSCR:
+ *val = get_reg_val(id, kvmppc_get_fscr_hv(vcpu));
break;
default:
r = -EINVAL;
@@ -1124,61 +2505,100 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
vcpu->arch.dabrx = set_reg_val(id, *val) & ~DABRX_HYP;
break;
case KVM_REG_PPC_DSCR:
- vcpu->arch.dscr = set_reg_val(id, *val);
+ kvmppc_set_dscr_hv(vcpu, set_reg_val(id, *val));
break;
case KVM_REG_PPC_PURR:
- vcpu->arch.purr = set_reg_val(id, *val);
+ kvmppc_set_purr_hv(vcpu, set_reg_val(id, *val));
break;
case KVM_REG_PPC_SPURR:
- vcpu->arch.spurr = set_reg_val(id, *val);
+ kvmppc_set_spurr_hv(vcpu, set_reg_val(id, *val));
break;
case KVM_REG_PPC_AMR:
- vcpu->arch.amr = set_reg_val(id, *val);
+ kvmppc_set_amr_hv(vcpu, set_reg_val(id, *val));
break;
case KVM_REG_PPC_UAMOR:
- vcpu->arch.uamor = set_reg_val(id, *val);
+ kvmppc_set_uamor_hv(vcpu, set_reg_val(id, *val));
break;
- case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRS:
+ case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCR1:
i = id - KVM_REG_PPC_MMCR0;
- vcpu->arch.mmcr[i] = set_reg_val(id, *val);
+ kvmppc_set_mmcr_hv(vcpu, i, set_reg_val(id, *val));
+ break;
+ case KVM_REG_PPC_MMCR2:
+ kvmppc_set_mmcr_hv(vcpu, 2, set_reg_val(id, *val));
+ break;
+ case KVM_REG_PPC_MMCRA:
+ kvmppc_set_mmcra_hv(vcpu, set_reg_val(id, *val));
+ break;
+ case KVM_REG_PPC_MMCRS:
+ vcpu->arch.mmcrs = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_MMCR3:
+ kvmppc_set_mmcr_hv(vcpu, 3, set_reg_val(id, *val));
break;
case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8:
i = id - KVM_REG_PPC_PMC1;
- vcpu->arch.pmc[i] = set_reg_val(id, *val);
+ kvmppc_set_pmc_hv(vcpu, i, set_reg_val(id, *val));
break;
case KVM_REG_PPC_SPMC1 ... KVM_REG_PPC_SPMC2:
i = id - KVM_REG_PPC_SPMC1;
vcpu->arch.spmc[i] = set_reg_val(id, *val);
break;
case KVM_REG_PPC_SIAR:
- vcpu->arch.siar = set_reg_val(id, *val);
+ kvmppc_set_siar_hv(vcpu, set_reg_val(id, *val));
break;
case KVM_REG_PPC_SDAR:
- vcpu->arch.sdar = set_reg_val(id, *val);
+ kvmppc_set_sdar_hv(vcpu, set_reg_val(id, *val));
break;
case KVM_REG_PPC_SIER:
- vcpu->arch.sier = set_reg_val(id, *val);
+ kvmppc_set_sier_hv(vcpu, 0, set_reg_val(id, *val));
+ break;
+ case KVM_REG_PPC_SIER2:
+ kvmppc_set_sier_hv(vcpu, 1, set_reg_val(id, *val));
+ break;
+ case KVM_REG_PPC_SIER3:
+ kvmppc_set_sier_hv(vcpu, 2, set_reg_val(id, *val));
break;
case KVM_REG_PPC_IAMR:
- vcpu->arch.iamr = set_reg_val(id, *val);
+ kvmppc_set_iamr_hv(vcpu, set_reg_val(id, *val));
break;
case KVM_REG_PPC_PSPB:
- vcpu->arch.pspb = set_reg_val(id, *val);
+ kvmppc_set_pspb_hv(vcpu, set_reg_val(id, *val));
break;
case KVM_REG_PPC_DPDES:
- vcpu->arch.vcore->dpdes = set_reg_val(id, *val);
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ vcpu->arch.doorbell_request = set_reg_val(id, *val) & 1;
+ else
+ vcpu->arch.vcore->dpdes = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_VTB:
+ kvmppc_set_vtb(vcpu, set_reg_val(id, *val));
break;
case KVM_REG_PPC_DAWR:
- vcpu->arch.dawr = set_reg_val(id, *val);
+ kvmppc_set_dawr0_hv(vcpu, set_reg_val(id, *val));
break;
case KVM_REG_PPC_DAWRX:
- vcpu->arch.dawrx = set_reg_val(id, *val) & ~DAWRX_HYP;
+ kvmppc_set_dawrx0_hv(vcpu, set_reg_val(id, *val) & ~DAWRX_HYP);
+ break;
+ case KVM_REG_PPC_DAWR1:
+ kvmppc_set_dawr1_hv(vcpu, set_reg_val(id, *val));
+ break;
+ case KVM_REG_PPC_DAWRX1:
+ kvmppc_set_dawrx1_hv(vcpu, set_reg_val(id, *val) & ~DAWRX_HYP);
+ break;
+ case KVM_REG_PPC_DEXCR:
+ kvmppc_set_dexcr_hv(vcpu, set_reg_val(id, *val));
+ break;
+ case KVM_REG_PPC_HASHKEYR:
+ kvmppc_set_hashkeyr_hv(vcpu, set_reg_val(id, *val));
+ break;
+ case KVM_REG_PPC_HASHPKEYR:
+ kvmppc_set_hashpkeyr_hv(vcpu, set_reg_val(id, *val));
break;
case KVM_REG_PPC_CIABR:
- vcpu->arch.ciabr = set_reg_val(id, *val);
+ kvmppc_set_ciabr_hv(vcpu, set_reg_val(id, *val));
/* Don't allow setting breakpoints in hypervisor code */
- if ((vcpu->arch.ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER)
- vcpu->arch.ciabr &= ~CIABR_PRIV; /* disable */
+ if ((kvmppc_get_ciabr_hv(vcpu) & CIABR_PRIV) == CIABR_PRIV_HYPER)
+ kvmppc_set_ciabr_hv(vcpu, kvmppc_get_ciabr_hv(vcpu) & ~CIABR_PRIV);
break;
case KVM_REG_PPC_CSIGR:
vcpu->arch.csigr = set_reg_val(id, *val);
@@ -1190,13 +2610,19 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
vcpu->arch.tcscr = set_reg_val(id, *val);
break;
case KVM_REG_PPC_PID:
- vcpu->arch.pid = set_reg_val(id, *val);
+ kvmppc_set_pid(vcpu, set_reg_val(id, *val));
break;
case KVM_REG_PPC_ACOP:
vcpu->arch.acop = set_reg_val(id, *val);
break;
case KVM_REG_PPC_WORT:
- vcpu->arch.wort = set_reg_val(id, *val);
+ kvmppc_set_wort_hv(vcpu, set_reg_val(id, *val));
+ break;
+ case KVM_REG_PPC_TIDR:
+ vcpu->arch.tid = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_PSSCR:
+ vcpu->arch.psscr = set_reg_val(id, *val) & PSSCR_GUEST_VIS;
break;
case KVM_REG_PPC_VPA_ADDR:
addr = set_reg_val(id, *val);
@@ -1225,10 +2651,25 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len);
break;
case KVM_REG_PPC_TB_OFFSET:
+ {
/* round up to multiple of 2^24 */
- vcpu->arch.vcore->tb_offset =
- ALIGN(set_reg_val(id, *val), 1UL << 24);
+ u64 tb_offset = ALIGN(set_reg_val(id, *val), 1UL << 24);
+
+ /*
+ * Now that we know the timebase offset, update the
+ * decrementer expiry with a guest timebase value. If
+ * the userspace does not set DEC_EXPIRY, this ensures
+ * a migrated vcpu at least starts with an expired
+ * decrementer, which is better than a large one that
+ * causes a hang.
+ */
+ kvmppc_set_tb_offset(vcpu, tb_offset);
+ if (!kvmppc_get_dec_expires(vcpu) && tb_offset)
+ kvmppc_set_dec_expires(vcpu, get_tb() + tb_offset);
+
+ kvmppc_set_tb_offset(vcpu, tb_offset);
break;
+ }
case KVM_REG_PPC_LPCR:
kvmppc_set_lpcr(vcpu, set_reg_val(id, *val), true);
break;
@@ -1236,7 +2677,7 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
kvmppc_set_lpcr(vcpu, set_reg_val(id, *val), false);
break;
case KVM_REG_PPC_PPR:
- vcpu->arch.ppr = set_reg_val(id, *val);
+ kvmppc_set_ppr_hv(vcpu, set_reg_val(id, *val));
break;
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
case KVM_REG_PPC_TFHAR:
@@ -1269,6 +2710,9 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
case KVM_REG_PPC_TM_CR:
vcpu->arch.cr_tm = set_reg_val(id, *val);
break;
+ case KVM_REG_PPC_TM_XER:
+ vcpu->arch.xer_tm = set_reg_val(id, *val);
+ break;
case KVM_REG_PPC_TM_LR:
vcpu->arch.lr_tm = set_reg_val(id, *val);
break;
@@ -1303,6 +2747,23 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
case KVM_REG_PPC_ARCH_COMPAT:
r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val));
break;
+ case KVM_REG_PPC_DEC_EXPIRY:
+ kvmppc_set_dec_expires(vcpu, set_reg_val(id, *val));
+ break;
+ case KVM_REG_PPC_ONLINE:
+ i = set_reg_val(id, *val);
+ if (i && !vcpu->arch.online)
+ atomic_inc(&vcpu->arch.vcore->online_count);
+ else if (!i && vcpu->arch.online)
+ atomic_dec(&vcpu->arch.vcore->online_count);
+ vcpu->arch.online = i;
+ break;
+ case KVM_REG_PPC_PTCR:
+ vcpu->kvm->arch.l1_ptcr = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_FSCR:
+ kvmppc_set_fscr_hv(vcpu, set_reg_val(id, *val));
+ break;
default:
r = -EINVAL;
break;
@@ -1311,7 +2772,21 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
return r;
}
-static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
+/*
+ * On POWER9, threads are independent and can be in different partitions.
+ * Therefore we consider each thread to be a subcore.
+ * There is a restriction that all threads have to be in the same
+ * MMU mode (radix or HPT), unfortunately, but since we only support
+ * HPT guests on a HPT host so far, that isn't an impediment yet.
+ */
+static int threads_per_vcore(struct kvm *kvm)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ return 1;
+ return threads_per_subcore;
+}
+
+static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int id)
{
struct kvmppc_vcore *vcore;
@@ -1320,44 +2795,179 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
if (vcore == NULL)
return NULL;
- INIT_LIST_HEAD(&vcore->runnable_threads);
spin_lock_init(&vcore->lock);
- init_waitqueue_head(&vcore->wq);
+ spin_lock_init(&vcore->stoltb_lock);
+ rcuwait_init(&vcore->wait);
vcore->preempt_tb = TB_NIL;
vcore->lpcr = kvm->arch.lpcr;
- vcore->first_vcpuid = core * threads_per_subcore;
+ vcore->first_vcpuid = id;
vcore->kvm = kvm;
+ INIT_LIST_HEAD(&vcore->preempt_list);
- vcore->mpp_buffer_is_valid = false;
+ return vcore;
+}
- if (cpu_has_feature(CPU_FTR_ARCH_207S))
- vcore->mpp_buffer = (void *)__get_free_pages(
- GFP_KERNEL|__GFP_ZERO,
- MPP_BUFFER_ORDER);
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+static struct debugfs_timings_element {
+ const char *name;
+ size_t offset;
+} timings[] = {
+#ifdef CONFIG_KVM_BOOK3S_HV_P9_TIMING
+ {"vcpu_entry", offsetof(struct kvm_vcpu, arch.vcpu_entry)},
+ {"guest_entry", offsetof(struct kvm_vcpu, arch.guest_entry)},
+ {"in_guest", offsetof(struct kvm_vcpu, arch.in_guest)},
+ {"guest_exit", offsetof(struct kvm_vcpu, arch.guest_exit)},
+ {"vcpu_exit", offsetof(struct kvm_vcpu, arch.vcpu_exit)},
+ {"hypercall", offsetof(struct kvm_vcpu, arch.hcall)},
+ {"page_fault", offsetof(struct kvm_vcpu, arch.pg_fault)},
+#else
+ {"rm_entry", offsetof(struct kvm_vcpu, arch.rm_entry)},
+ {"rm_intr", offsetof(struct kvm_vcpu, arch.rm_intr)},
+ {"rm_exit", offsetof(struct kvm_vcpu, arch.rm_exit)},
+ {"guest", offsetof(struct kvm_vcpu, arch.guest_time)},
+ {"cede", offsetof(struct kvm_vcpu, arch.cede_time)},
+#endif
+};
- return vcore;
+#define N_TIMINGS (ARRAY_SIZE(timings))
+
+struct debugfs_timings_state {
+ struct kvm_vcpu *vcpu;
+ unsigned int buflen;
+ char buf[N_TIMINGS * 100];
+};
+
+static int debugfs_timings_open(struct inode *inode, struct file *file)
+{
+ struct kvm_vcpu *vcpu = inode->i_private;
+ struct debugfs_timings_state *p;
+
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+
+ kvm_get_kvm(vcpu->kvm);
+ p->vcpu = vcpu;
+ file->private_data = p;
+
+ return nonseekable_open(inode, file);
}
-static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
- unsigned int id)
+static int debugfs_timings_release(struct inode *inode, struct file *file)
{
- struct kvm_vcpu *vcpu;
- int err = -EINVAL;
- int core;
- struct kvmppc_vcore *vcore;
+ struct debugfs_timings_state *p = file->private_data;
- core = id / threads_per_subcore;
- if (core >= KVM_MAX_VCORES)
- goto out;
+ kvm_put_kvm(p->vcpu->kvm);
+ kfree(p);
+ return 0;
+}
- err = -ENOMEM;
- vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
- if (!vcpu)
- goto out;
+static ssize_t debugfs_timings_read(struct file *file, char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ struct debugfs_timings_state *p = file->private_data;
+ struct kvm_vcpu *vcpu = p->vcpu;
+ char *s, *buf_end;
+ struct kvmhv_tb_accumulator tb;
+ u64 count;
+ loff_t pos;
+ ssize_t n;
+ int i, loops;
+ bool ok;
+
+ if (!p->buflen) {
+ s = p->buf;
+ buf_end = s + sizeof(p->buf);
+ for (i = 0; i < N_TIMINGS; ++i) {
+ struct kvmhv_tb_accumulator *acc;
+
+ acc = (struct kvmhv_tb_accumulator *)
+ ((unsigned long)vcpu + timings[i].offset);
+ ok = false;
+ for (loops = 0; loops < 1000; ++loops) {
+ count = acc->seqcount;
+ if (!(count & 1)) {
+ smp_rmb();
+ tb = *acc;
+ smp_rmb();
+ if (count == acc->seqcount) {
+ ok = true;
+ break;
+ }
+ }
+ udelay(1);
+ }
+ if (!ok)
+ snprintf(s, buf_end - s, "%s: stuck\n",
+ timings[i].name);
+ else
+ snprintf(s, buf_end - s,
+ "%s: %llu %llu %llu %llu\n",
+ timings[i].name, count / 2,
+ tb_to_ns(tb.tb_total),
+ tb_to_ns(tb.tb_min),
+ tb_to_ns(tb.tb_max));
+ s += strlen(s);
+ }
+ p->buflen = s - p->buf;
+ }
- err = kvm_vcpu_init(vcpu, kvm, id);
- if (err)
- goto free_vcpu;
+ pos = *ppos;
+ if (pos >= p->buflen)
+ return 0;
+ if (len > p->buflen - pos)
+ len = p->buflen - pos;
+ n = copy_to_user(buf, p->buf + pos, len);
+ if (n) {
+ if (n == len)
+ return -EFAULT;
+ len -= n;
+ }
+ *ppos = pos + len;
+ return len;
+}
+
+static ssize_t debugfs_timings_write(struct file *file, const char __user *buf,
+ size_t len, loff_t *ppos)
+{
+ return -EACCES;
+}
+
+static const struct file_operations debugfs_timings_ops = {
+ .owner = THIS_MODULE,
+ .open = debugfs_timings_open,
+ .release = debugfs_timings_release,
+ .read = debugfs_timings_read,
+ .write = debugfs_timings_write,
+ .llseek = generic_file_llseek,
+};
+
+/* Create a debugfs directory for the vcpu */
+static int kvmppc_arch_create_vcpu_debugfs_hv(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_300) == IS_ENABLED(CONFIG_KVM_BOOK3S_HV_P9_TIMING))
+ debugfs_create_file("timings", 0444, debugfs_dentry, vcpu,
+ &debugfs_timings_ops);
+ return 0;
+}
+
+#else /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
+static int kvmppc_arch_create_vcpu_debugfs_hv(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry)
+{
+ return 0;
+}
+#endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
+
+static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu)
+{
+ int err;
+ int core;
+ struct kvmppc_vcore *vcore;
+ struct kvm *kvm;
+ unsigned int id;
+
+ kvm = vcpu->kvm;
+ id = vcpu->vcpu_id;
vcpu->arch.shared = &vcpu->arch.shregs;
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
@@ -1371,15 +2981,60 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
vcpu->arch.shared_big_endian = false;
#endif
#endif
- vcpu->arch.mmcr[0] = MMCR0_FC;
- vcpu->arch.ctrl = CTRL_RUNLATCH;
+
+ if (kvmhv_is_nestedv2()) {
+ err = kvmhv_nestedv2_vcpu_create(vcpu, &vcpu->arch.nestedv2_io);
+ if (err < 0)
+ return err;
+ }
+
+ kvmppc_set_mmcr_hv(vcpu, 0, MMCR0_FC);
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ kvmppc_set_mmcr_hv(vcpu, 0, kvmppc_get_mmcr_hv(vcpu, 0) | MMCR0_PMCCEXT);
+ kvmppc_set_mmcra_hv(vcpu, MMCRA_BHRB_DISABLE);
+ }
+
+ kvmppc_set_ctrl_hv(vcpu, CTRL_RUNLATCH);
/* default to host PVR, since we can't spoof it */
kvmppc_set_pvr_hv(vcpu, mfspr(SPRN_PVR));
spin_lock_init(&vcpu->arch.vpa_update_lock);
spin_lock_init(&vcpu->arch.tbacct_lock);
vcpu->arch.busy_preempt = TB_NIL;
+ __kvmppc_set_msr_hv(vcpu, MSR_ME);
vcpu->arch.intr_msr = MSR_SF | MSR_ME;
+ /*
+ * Set the default HFSCR for the guest from the host value.
+ * This value is only used on POWER9 and later.
+ * On >= POWER9, we want to virtualize the doorbell facility, so we
+ * don't set the HFSCR_MSGP bit, and that causes those instructions
+ * to trap and then we emulate them.
+ */
+ kvmppc_set_hfscr_hv(vcpu, HFSCR_TAR | HFSCR_EBB | HFSCR_PM | HFSCR_BHRB |
+ HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP);
+
+ /* On POWER10 and later, allow prefixed instructions */
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ kvmppc_set_hfscr_hv(vcpu, kvmppc_get_hfscr_hv(vcpu) | HFSCR_PREFIX);
+
+ if (cpu_has_feature(CPU_FTR_HVMODE)) {
+ kvmppc_set_hfscr_hv(vcpu, kvmppc_get_hfscr_hv(vcpu) & mfspr(SPRN_HFSCR));
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
+ kvmppc_set_hfscr_hv(vcpu, kvmppc_get_hfscr_hv(vcpu) | HFSCR_TM);
+#endif
+ }
+ if (cpu_has_feature(CPU_FTR_TM_COMP))
+ vcpu->arch.hfscr |= HFSCR_TM;
+
+ vcpu->arch.hfscr_permitted = kvmppc_get_hfscr_hv(vcpu);
+
+ /*
+ * PM, EBB, TM are demand-faulted so start with it clear.
+ */
+ kvmppc_set_hfscr_hv(vcpu, kvmppc_get_hfscr_hv(vcpu) & ~(HFSCR_PM | HFSCR_EBB | HFSCR_TM));
+
kvmppc_mmu_book3s_hv_init(vcpu);
vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
@@ -1387,32 +3042,92 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
init_waitqueue_head(&vcpu->arch.cpu_run);
mutex_lock(&kvm->lock);
- vcore = kvm->arch.vcores[core];
- if (!vcore) {
- vcore = kvmppc_vcore_create(kvm, core);
- kvm->arch.vcores[core] = vcore;
- kvm->arch.online_vcores++;
+ vcore = NULL;
+ err = -EINVAL;
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ if (id >= (KVM_MAX_VCPUS * kvm->arch.emul_smt_mode)) {
+ pr_devel("KVM: VCPU ID too high\n");
+ core = KVM_MAX_VCORES;
+ } else {
+ BUG_ON(kvm->arch.smt_mode != 1);
+ core = kvmppc_pack_vcpu_id(kvm, id);
+ }
+ } else {
+ core = id / kvm->arch.smt_mode;
+ }
+ if (core < KVM_MAX_VCORES) {
+ vcore = kvm->arch.vcores[core];
+ if (vcore && cpu_has_feature(CPU_FTR_ARCH_300)) {
+ pr_devel("KVM: collision on id %u", id);
+ vcore = NULL;
+ } else if (!vcore) {
+ /*
+ * Take mmu_setup_lock for mutual exclusion
+ * with kvmppc_update_lpcr().
+ */
+ err = -ENOMEM;
+ vcore = kvmppc_vcore_create(kvm,
+ id & ~(kvm->arch.smt_mode - 1));
+ mutex_lock(&kvm->arch.mmu_setup_lock);
+ kvm->arch.vcores[core] = vcore;
+ kvm->arch.online_vcores++;
+ mutex_unlock(&kvm->arch.mmu_setup_lock);
+ }
}
mutex_unlock(&kvm->lock);
if (!vcore)
- goto free_vcpu;
+ return err;
spin_lock(&vcore->lock);
++vcore->num_threads;
spin_unlock(&vcore->lock);
vcpu->arch.vcore = vcore;
vcpu->arch.ptid = vcpu->vcpu_id - vcore->first_vcpuid;
+ vcpu->arch.thread_cpu = -1;
+ vcpu->arch.prev_cpu = -1;
vcpu->arch.cpu_type = KVM_CPU_3S_64;
kvmppc_sanity_check(vcpu);
- return vcpu;
+ return 0;
+}
-free_vcpu:
- kmem_cache_free(kvm_vcpu_cache, vcpu);
-out:
- return ERR_PTR(err);
+static int kvmhv_set_smt_mode(struct kvm *kvm, unsigned long smt_mode,
+ unsigned long flags)
+{
+ int err;
+ int esmt = 0;
+
+ if (flags)
+ return -EINVAL;
+ if (smt_mode > MAX_SMT_THREADS || !is_power_of_2(smt_mode))
+ return -EINVAL;
+ if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
+ /*
+ * On POWER8 (or POWER7), the threading mode is "strict",
+ * so we pack smt_mode vcpus per vcore.
+ */
+ if (smt_mode > threads_per_subcore)
+ return -EINVAL;
+ } else {
+ /*
+ * On POWER9, the threading mode is "loose",
+ * so each vcpu gets its own vcore.
+ */
+ esmt = smt_mode;
+ smt_mode = 1;
+ }
+ mutex_lock(&kvm->lock);
+ err = -EBUSY;
+ if (!kvm->arch.online_vcores) {
+ kvm->arch.smt_mode = smt_mode;
+ kvm->arch.emul_smt_mode = esmt;
+ err = 0;
+ }
+ mutex_unlock(&kvm->lock);
+
+ return err;
}
static void unpin_vpa(struct kvm *kvm, struct kvmppc_vpa *vpa)
@@ -1429,8 +3144,8 @@ static void kvmppc_core_vcpu_free_hv(struct kvm_vcpu *vcpu)
unpin_vpa(vcpu->kvm, &vcpu->arch.slb_shadow);
unpin_vpa(vcpu->kvm, &vcpu->arch.vpa);
spin_unlock(&vcpu->arch.vpa_update_lock);
- kvm_vcpu_uninit(vcpu);
- kmem_cache_free(kvm_vcpu_cache, vcpu);
+ if (kvmhv_is_nestedv2())
+ kvmhv_nestedv2_vcpu_free(vcpu, &vcpu->arch.nestedv2_io);
}
static int kvmppc_core_check_requests_hv(struct kvm_vcpu *vcpu)
@@ -1444,58 +3159,50 @@ static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
unsigned long dec_nsec, now;
now = get_tb();
- if (now > vcpu->arch.dec_expires) {
+ if (now > kvmppc_dec_expires_host_tb(vcpu)) {
/* decrementer has already gone negative */
kvmppc_core_queue_dec(vcpu);
kvmppc_core_prepare_to_enter(vcpu);
return;
}
- dec_nsec = (vcpu->arch.dec_expires - now) * NSEC_PER_SEC
- / tb_ticks_per_sec;
- hrtimer_start(&vcpu->arch.dec_timer, ktime_set(0, dec_nsec),
- HRTIMER_MODE_REL);
+ dec_nsec = tb_to_ns(kvmppc_dec_expires_host_tb(vcpu) - now);
+ hrtimer_start(&vcpu->arch.dec_timer, dec_nsec, HRTIMER_MODE_REL);
vcpu->arch.timer_running = 1;
}
-static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
-{
- vcpu->arch.ceded = 0;
- if (vcpu->arch.timer_running) {
- hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
- vcpu->arch.timer_running = 0;
- }
-}
-
-extern void __kvmppc_vcore_entry(void);
+extern int __kvmppc_vcore_entry(void);
static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
- struct kvm_vcpu *vcpu)
+ struct kvm_vcpu *vcpu, u64 tb)
{
u64 now;
if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
return;
spin_lock_irq(&vcpu->arch.tbacct_lock);
- now = mftb();
+ now = tb;
vcpu->arch.busy_stolen += vcore_stolen_time(vc, now) -
vcpu->arch.stolen_logged;
vcpu->arch.busy_preempt = now;
vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
spin_unlock_irq(&vcpu->arch.tbacct_lock);
--vc->n_runnable;
- list_del(&vcpu->arch.run_list);
+ WRITE_ONCE(vc->runnable_threads[vcpu->arch.ptid], NULL);
}
static int kvmppc_grab_hwthread(int cpu)
{
struct paca_struct *tpaca;
- long timeout = 1000;
+ long timeout = 10000;
- tpaca = &paca[cpu];
+ tpaca = paca_ptrs[cpu];
/* Ensure the thread won't go into the kernel if it wakes */
- tpaca->kvm_hstate.hwthread_req = 1;
tpaca->kvm_hstate.kvm_vcpu = NULL;
+ tpaca->kvm_hstate.kvm_vcore = NULL;
+ tpaca->kvm_hstate.napping = 0;
+ smp_wmb();
+ tpaca->kvm_hstate.hwthread_req = 1;
/*
* If the thread is already executing in the kernel (e.g. handling
@@ -1521,52 +3228,162 @@ static void kvmppc_release_hwthread(int cpu)
{
struct paca_struct *tpaca;
- tpaca = &paca[cpu];
+ tpaca = paca_ptrs[cpu];
tpaca->kvm_hstate.hwthread_req = 0;
tpaca->kvm_hstate.kvm_vcpu = NULL;
+ tpaca->kvm_hstate.kvm_vcore = NULL;
+ tpaca->kvm_hstate.kvm_split_mode = NULL;
}
-static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
+static DEFINE_PER_CPU(struct kvm *, cpu_in_guest);
+
+static void radix_flush_cpu(struct kvm *kvm, int cpu, struct kvm_vcpu *vcpu)
+{
+ struct kvm_nested_guest *nested = vcpu->arch.nested;
+ cpumask_t *need_tlb_flush;
+ int i;
+
+ if (nested)
+ need_tlb_flush = &nested->need_tlb_flush;
+ else
+ need_tlb_flush = &kvm->arch.need_tlb_flush;
+
+ cpu = cpu_first_tlb_thread_sibling(cpu);
+ for (i = cpu; i <= cpu_last_tlb_thread_sibling(cpu);
+ i += cpu_tlb_thread_sibling_step())
+ cpumask_set_cpu(i, need_tlb_flush);
+
+ /*
+ * Make sure setting of bit in need_tlb_flush precedes testing of
+ * cpu_in_guest. The matching barrier on the other side is hwsync
+ * when switching to guest MMU mode, which happens between
+ * cpu_in_guest being set to the guest kvm, and need_tlb_flush bit
+ * being tested.
+ */
+ smp_mb();
+
+ for (i = cpu; i <= cpu_last_tlb_thread_sibling(cpu);
+ i += cpu_tlb_thread_sibling_step()) {
+ struct kvm *running = *per_cpu_ptr(&cpu_in_guest, i);
+
+ if (running == kvm)
+ smp_call_function_single(i, do_nothing, NULL, 1);
+ }
+}
+
+static void do_migrate_away_vcpu(void *arg)
+{
+ struct kvm_vcpu *vcpu = arg;
+ struct kvm *kvm = vcpu->kvm;
+
+ /*
+ * If the guest has GTSE, it may execute tlbie, so do a eieio; tlbsync;
+ * ptesync sequence on the old CPU before migrating to a new one, in
+ * case we interrupted the guest between a tlbie ; eieio ;
+ * tlbsync; ptesync sequence.
+ *
+ * Otherwise, ptesync is sufficient for ordering tlbiel sequences.
+ */
+ if (kvm->arch.lpcr & LPCR_GTSE)
+ asm volatile("eieio; tlbsync; ptesync");
+ else
+ asm volatile("ptesync");
+}
+
+static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
+{
+ struct kvm_nested_guest *nested = vcpu->arch.nested;
+ struct kvm *kvm = vcpu->kvm;
+ int prev_cpu;
+
+ if (!cpu_has_feature(CPU_FTR_HVMODE))
+ return;
+
+ if (nested)
+ prev_cpu = nested->prev_cpu[vcpu->arch.nested_vcpu_id];
+ else
+ prev_cpu = vcpu->arch.prev_cpu;
+
+ /*
+ * With radix, the guest can do TLB invalidations itself,
+ * and it could choose to use the local form (tlbiel) if
+ * it is invalidating a translation that has only ever been
+ * used on one vcpu. However, that doesn't mean it has
+ * only ever been used on one physical cpu, since vcpus
+ * can move around between pcpus. To cope with this, when
+ * a vcpu moves from one pcpu to another, we need to tell
+ * any vcpus running on the same core as this vcpu previously
+ * ran to flush the TLB.
+ */
+ if (prev_cpu != pcpu) {
+ if (prev_cpu >= 0) {
+ if (cpu_first_tlb_thread_sibling(prev_cpu) !=
+ cpu_first_tlb_thread_sibling(pcpu))
+ radix_flush_cpu(kvm, prev_cpu, vcpu);
+
+ smp_call_function_single(prev_cpu,
+ do_migrate_away_vcpu, vcpu, 1);
+ }
+ if (nested)
+ nested->prev_cpu[vcpu->arch.nested_vcpu_id] = pcpu;
+ else
+ vcpu->arch.prev_cpu = pcpu;
+ }
+}
+
+static void kvmppc_start_thread(struct kvm_vcpu *vcpu, struct kvmppc_vcore *vc)
{
int cpu;
struct paca_struct *tpaca;
- struct kvmppc_vcore *vc = vcpu->arch.vcore;
- if (vcpu->arch.timer_running) {
- hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
- vcpu->arch.timer_running = 0;
+ cpu = vc->pcpu;
+ if (vcpu) {
+ if (vcpu->arch.timer_running) {
+ hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
+ vcpu->arch.timer_running = 0;
+ }
+ cpu += vcpu->arch.ptid;
+ vcpu->cpu = vc->pcpu;
+ vcpu->arch.thread_cpu = cpu;
}
- cpu = vc->pcpu + vcpu->arch.ptid;
- tpaca = &paca[cpu];
+ tpaca = paca_ptrs[cpu];
tpaca->kvm_hstate.kvm_vcpu = vcpu;
- tpaca->kvm_hstate.kvm_vcore = vc;
- tpaca->kvm_hstate.ptid = vcpu->arch.ptid;
- vcpu->cpu = vc->pcpu;
+ tpaca->kvm_hstate.ptid = cpu - vc->pcpu;
+ tpaca->kvm_hstate.fake_suspend = 0;
+ /* Order stores to hstate.kvm_vcpu etc. before store to kvm_vcore */
smp_wmb();
-#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
- if (cpu != smp_processor_id()) {
- xics_wake_cpu(cpu);
- if (vcpu->arch.ptid)
- ++vc->n_woken;
- }
-#endif
+ tpaca->kvm_hstate.kvm_vcore = vc;
+ if (cpu != smp_processor_id())
+ kvmppc_ipi_thread(cpu);
}
-static void kvmppc_wait_for_nap(struct kvmppc_vcore *vc)
+static void kvmppc_wait_for_nap(int n_threads)
{
- int i;
+ int cpu = smp_processor_id();
+ int i, loops;
- HMT_low();
- i = 0;
- while (vc->nap_count < vc->n_woken) {
- if (++i >= 1000000) {
- pr_err("kvmppc_wait_for_nap timeout %d %d\n",
- vc->nap_count, vc->n_woken);
- break;
+ if (n_threads <= 1)
+ return;
+ for (loops = 0; loops < 1000000; ++loops) {
+ /*
+ * Check if all threads are finished.
+ * We set the vcore pointer when starting a thread
+ * and the thread clears it when finished, so we look
+ * for any threads that still have a non-NULL vcore ptr.
+ */
+ for (i = 1; i < n_threads; ++i)
+ if (paca_ptrs[cpu + i]->kvm_hstate.kvm_vcore)
+ break;
+ if (i == n_threads) {
+ HMT_medium();
+ return;
}
- cpu_relax();
+ HMT_low();
}
HMT_medium();
+ for (i = 1; i < n_threads; ++i)
+ if (paca_ptrs[cpu + i]->kvm_hstate.kvm_vcore)
+ pr_err("KVM: CPU %d seems to be stuck\n", cpu + i);
}
/*
@@ -1601,214 +3418,1333 @@ static int on_primary_thread(void)
return 1;
}
-static void kvmppc_start_saving_l2_cache(struct kvmppc_vcore *vc)
+/*
+ * A list of virtual cores for each physical CPU.
+ * These are vcores that could run but their runner VCPU tasks are
+ * (or may be) preempted.
+ */
+struct preempted_vcore_list {
+ struct list_head list;
+ spinlock_t lock;
+};
+
+static DEFINE_PER_CPU(struct preempted_vcore_list, preempted_vcores);
+
+static void init_vcore_lists(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct preempted_vcore_list *lp = &per_cpu(preempted_vcores, cpu);
+ spin_lock_init(&lp->lock);
+ INIT_LIST_HEAD(&lp->list);
+ }
+}
+
+static void kvmppc_vcore_preempt(struct kvmppc_vcore *vc)
{
- phys_addr_t phy_addr, mpp_addr;
+ struct preempted_vcore_list *lp = this_cpu_ptr(&preempted_vcores);
- phy_addr = (phys_addr_t)virt_to_phys(vc->mpp_buffer);
- mpp_addr = phy_addr & PPC_MPPE_ADDRESS_MASK;
+ WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300));
- mtspr(SPRN_MPPR, mpp_addr | PPC_MPPR_FETCH_ABORT);
- logmpp(mpp_addr | PPC_LOGMPP_LOG_L2);
+ vc->vcore_state = VCORE_PREEMPT;
+ vc->pcpu = smp_processor_id();
+ if (vc->num_threads < threads_per_vcore(vc->kvm)) {
+ spin_lock(&lp->lock);
+ list_add_tail(&vc->preempt_list, &lp->list);
+ spin_unlock(&lp->lock);
+ }
- vc->mpp_buffer_is_valid = true;
+ /* Start accumulating stolen time */
+ kvmppc_core_start_stolen(vc, mftb());
}
-static void kvmppc_start_restoring_l2_cache(const struct kvmppc_vcore *vc)
+static void kvmppc_vcore_end_preempt(struct kvmppc_vcore *vc)
{
- phys_addr_t phy_addr, mpp_addr;
+ struct preempted_vcore_list *lp;
- phy_addr = virt_to_phys(vc->mpp_buffer);
- mpp_addr = phy_addr & PPC_MPPE_ADDRESS_MASK;
+ WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300));
- /* We must abort any in-progress save operations to ensure
- * the table is valid so that prefetch engine knows when to
- * stop prefetching. */
- logmpp(mpp_addr | PPC_LOGMPP_LOG_ABORT);
- mtspr(SPRN_MPPR, mpp_addr | PPC_MPPR_FETCH_WHOLE_TABLE);
+ kvmppc_core_end_stolen(vc, mftb());
+ if (!list_empty(&vc->preempt_list)) {
+ lp = &per_cpu(preempted_vcores, vc->pcpu);
+ spin_lock(&lp->lock);
+ list_del_init(&vc->preempt_list);
+ spin_unlock(&lp->lock);
+ }
+ vc->vcore_state = VCORE_INACTIVE;
}
/*
- * Run a set of guest threads on a physical core.
- * Called with vc->lock held.
+ * This stores information about the virtual cores currently
+ * assigned to a physical core.
+ */
+struct core_info {
+ int n_subcores;
+ int max_subcore_threads;
+ int total_threads;
+ int subcore_threads[MAX_SUBCORES];
+ struct kvmppc_vcore *vc[MAX_SUBCORES];
+};
+
+/*
+ * This mapping means subcores 0 and 1 can use threads 0-3 and 4-7
+ * respectively in 2-way micro-threading (split-core) mode on POWER8.
*/
-static void kvmppc_run_core(struct kvmppc_vcore *vc)
+static int subcore_thread_map[MAX_SUBCORES] = { 0, 4, 2, 6 };
+
+static void init_core_info(struct core_info *cip, struct kvmppc_vcore *vc)
{
- struct kvm_vcpu *vcpu, *vnext;
- long ret;
- u64 now;
- int i, need_vpa_update;
- int srcu_idx;
- struct kvm_vcpu *vcpus_to_update[threads_per_core];
+ memset(cip, 0, sizeof(*cip));
+ cip->n_subcores = 1;
+ cip->max_subcore_threads = vc->num_threads;
+ cip->total_threads = vc->num_threads;
+ cip->subcore_threads[0] = vc->num_threads;
+ cip->vc[0] = vc;
+}
- /* don't start if any threads have a signal pending */
- need_vpa_update = 0;
- list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
+static bool subcore_config_ok(int n_subcores, int n_threads)
+{
+ /*
+ * POWER9 "SMT4" cores are permanently in what is effectively a 4-way
+ * split-core mode, with one thread per subcore.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ return n_subcores <= 4 && n_threads == 1;
+
+ /* On POWER8, can only dynamically split if unsplit to begin with */
+ if (n_subcores > 1 && threads_per_subcore < MAX_SMT_THREADS)
+ return false;
+ if (n_subcores > MAX_SUBCORES)
+ return false;
+ if (n_subcores > 1) {
+ if (!(dynamic_mt_modes & 2))
+ n_subcores = 4;
+ if (n_subcores > 2 && !(dynamic_mt_modes & 4))
+ return false;
+ }
+
+ return n_subcores * roundup_pow_of_two(n_threads) <= MAX_SMT_THREADS;
+}
+
+static void init_vcore_to_run(struct kvmppc_vcore *vc)
+{
+ vc->entry_exit_map = 0;
+ vc->in_guest = 0;
+ vc->napping_threads = 0;
+ vc->conferring_threads = 0;
+ vc->tb_offset_applied = 0;
+}
+
+static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
+{
+ int n_threads = vc->num_threads;
+ int sub;
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+ return false;
+
+ /* In one_vm_per_core mode, require all vcores to be from the same vm */
+ if (one_vm_per_core && vc->kvm != cip->vc[0]->kvm)
+ return false;
+
+ if (n_threads < cip->max_subcore_threads)
+ n_threads = cip->max_subcore_threads;
+ if (!subcore_config_ok(cip->n_subcores + 1, n_threads))
+ return false;
+ cip->max_subcore_threads = n_threads;
+
+ sub = cip->n_subcores;
+ ++cip->n_subcores;
+ cip->total_threads += vc->num_threads;
+ cip->subcore_threads[sub] = vc->num_threads;
+ cip->vc[sub] = vc;
+ init_vcore_to_run(vc);
+ list_del_init(&vc->preempt_list);
+
+ return true;
+}
+
+/*
+ * Work out whether it is possible to piggyback the execution of
+ * vcore *pvc onto the execution of the other vcores described in *cip.
+ */
+static bool can_piggyback(struct kvmppc_vcore *pvc, struct core_info *cip,
+ int target_threads)
+{
+ if (cip->total_threads + pvc->num_threads > target_threads)
+ return false;
+
+ return can_dynamic_split(pvc, cip);
+}
+
+static void prepare_threads(struct kvmppc_vcore *vc)
+{
+ int i;
+ struct kvm_vcpu *vcpu;
+
+ for_each_runnable_thread(i, vcpu, vc) {
if (signal_pending(vcpu->arch.run_task))
- return;
- if (vcpu->arch.vpa.update_pending ||
- vcpu->arch.slb_shadow.update_pending ||
- vcpu->arch.dtl.update_pending)
- vcpus_to_update[need_vpa_update++] = vcpu;
+ vcpu->arch.ret = -EINTR;
+ else if (vcpu->arch.vpa.update_pending ||
+ vcpu->arch.slb_shadow.update_pending ||
+ vcpu->arch.dtl.update_pending)
+ vcpu->arch.ret = RESUME_GUEST;
+ else
+ continue;
+ kvmppc_remove_runnable(vc, vcpu, mftb());
+ wake_up(&vcpu->arch.cpu_run);
}
+}
+static void collect_piggybacks(struct core_info *cip, int target_threads)
+{
+ struct preempted_vcore_list *lp = this_cpu_ptr(&preempted_vcores);
+ struct kvmppc_vcore *pvc, *vcnext;
+
+ spin_lock(&lp->lock);
+ list_for_each_entry_safe(pvc, vcnext, &lp->list, preempt_list) {
+ if (!spin_trylock(&pvc->lock))
+ continue;
+ prepare_threads(pvc);
+ if (!pvc->n_runnable || !pvc->kvm->arch.mmu_ready) {
+ list_del_init(&pvc->preempt_list);
+ if (pvc->runner == NULL) {
+ pvc->vcore_state = VCORE_INACTIVE;
+ kvmppc_core_end_stolen(pvc, mftb());
+ }
+ spin_unlock(&pvc->lock);
+ continue;
+ }
+ if (!can_piggyback(pvc, cip, target_threads)) {
+ spin_unlock(&pvc->lock);
+ continue;
+ }
+ kvmppc_core_end_stolen(pvc, mftb());
+ pvc->vcore_state = VCORE_PIGGYBACK;
+ if (cip->total_threads >= target_threads)
+ break;
+ }
+ spin_unlock(&lp->lock);
+}
+
+static bool recheck_signals_and_mmu(struct core_info *cip)
+{
+ int sub, i;
+ struct kvm_vcpu *vcpu;
+ struct kvmppc_vcore *vc;
+
+ for (sub = 0; sub < cip->n_subcores; ++sub) {
+ vc = cip->vc[sub];
+ if (!vc->kvm->arch.mmu_ready)
+ return true;
+ for_each_runnable_thread(i, vcpu, vc)
+ if (signal_pending(vcpu->arch.run_task))
+ return true;
+ }
+ return false;
+}
+
+static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
+{
+ int still_running = 0, i;
+ u64 now;
+ long ret;
+ struct kvm_vcpu *vcpu;
+
+ spin_lock(&vc->lock);
+ now = get_tb();
+ for_each_runnable_thread(i, vcpu, vc) {
+ /*
+ * It's safe to unlock the vcore in the loop here, because
+ * for_each_runnable_thread() is safe against removal of
+ * the vcpu, and the vcore state is VCORE_EXITING here,
+ * so any vcpus becoming runnable will have their arch.trap
+ * set to zero and can't actually run in the guest.
+ */
+ spin_unlock(&vc->lock);
+ /* cancel pending dec exception if dec is positive */
+ if (now < kvmppc_dec_expires_host_tb(vcpu) &&
+ kvmppc_core_pending_dec(vcpu))
+ kvmppc_core_dequeue_dec(vcpu);
+
+ trace_kvm_guest_exit(vcpu);
+
+ ret = RESUME_GUEST;
+ if (vcpu->arch.trap)
+ ret = kvmppc_handle_exit_hv(vcpu,
+ vcpu->arch.run_task);
+
+ vcpu->arch.ret = ret;
+ vcpu->arch.trap = 0;
+
+ spin_lock(&vc->lock);
+ if (is_kvmppc_resume_guest(vcpu->arch.ret)) {
+ if (vcpu->arch.pending_exceptions)
+ kvmppc_core_prepare_to_enter(vcpu);
+ if (vcpu->arch.ceded)
+ kvmppc_set_timer(vcpu);
+ else
+ ++still_running;
+ } else {
+ kvmppc_remove_runnable(vc, vcpu, mftb());
+ wake_up(&vcpu->arch.cpu_run);
+ }
+ }
+ if (!is_master) {
+ if (still_running > 0) {
+ kvmppc_vcore_preempt(vc);
+ } else if (vc->runner) {
+ vc->vcore_state = VCORE_PREEMPT;
+ kvmppc_core_start_stolen(vc, mftb());
+ } else {
+ vc->vcore_state = VCORE_INACTIVE;
+ }
+ if (vc->n_runnable > 0 && vc->runner == NULL) {
+ /* make sure there's a candidate runner awake */
+ i = -1;
+ vcpu = next_runnable_thread(vc, &i);
+ wake_up(&vcpu->arch.cpu_run);
+ }
+ }
+ spin_unlock(&vc->lock);
+}
+
+/*
+ * Clear core from the list of active host cores as we are about to
+ * enter the guest. Only do this if it is the primary thread of the
+ * core (not if a subcore) that is entering the guest.
+ */
+static inline int kvmppc_clear_host_core(unsigned int cpu)
+{
+ int core;
+
+ if (!kvmppc_host_rm_ops_hv || cpu_thread_in_core(cpu))
+ return 0;
/*
- * Initialize *vc, in particular vc->vcore_state, so we can
- * drop the vcore lock if necessary.
+ * Memory barrier can be omitted here as we will do a smp_wmb()
+ * later in kvmppc_start_thread and we need ensure that state is
+ * visible to other CPUs only after we enter guest.
*/
- vc->n_woken = 0;
- vc->nap_count = 0;
- vc->entry_exit_count = 0;
- vc->vcore_state = VCORE_STARTING;
- vc->in_guest = 0;
- vc->napping_threads = 0;
+ core = cpu >> threads_shift;
+ kvmppc_host_rm_ops_hv->rm_core[core].rm_state.in_host = 0;
+ return 0;
+}
+
+/*
+ * Advertise this core as an active host core since we exited the guest
+ * Only need to do this if it is the primary thread of the core that is
+ * exiting.
+ */
+static inline int kvmppc_set_host_core(unsigned int cpu)
+{
+ int core;
+
+ if (!kvmppc_host_rm_ops_hv || cpu_thread_in_core(cpu))
+ return 0;
/*
- * Updating any of the vpas requires calling kvmppc_pin_guest_page,
- * which can't be called with any spinlocks held.
+ * Memory barrier can be omitted here because we do a spin_unlock
+ * immediately after this which provides the memory barrier.
*/
- if (need_vpa_update) {
- spin_unlock(&vc->lock);
- for (i = 0; i < need_vpa_update; ++i)
- kvmppc_update_vpas(vcpus_to_update[i]);
- spin_lock(&vc->lock);
+ core = cpu >> threads_shift;
+ kvmppc_host_rm_ops_hv->rm_core[core].rm_state.in_host = 1;
+ return 0;
+}
+
+static void set_irq_happened(int trap)
+{
+ switch (trap) {
+ case BOOK3S_INTERRUPT_EXTERNAL:
+ local_paca->irq_happened |= PACA_IRQ_EE;
+ break;
+ case BOOK3S_INTERRUPT_H_DOORBELL:
+ local_paca->irq_happened |= PACA_IRQ_DBELL;
+ break;
+ case BOOK3S_INTERRUPT_HMI:
+ local_paca->irq_happened |= PACA_IRQ_HMI;
+ break;
+ case BOOK3S_INTERRUPT_SYSTEM_RESET:
+ replay_system_reset();
+ break;
}
+}
+
+/*
+ * Run a set of guest threads on a physical core.
+ * Called with vc->lock held.
+ */
+static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
+{
+ struct kvm_vcpu *vcpu;
+ int i;
+ int srcu_idx;
+ struct core_info core_info;
+ struct kvmppc_vcore *pvc;
+ struct kvm_split_mode split_info, *sip;
+ int split, subcore_size, active;
+ int sub;
+ bool thr0_done;
+ unsigned long cmd_bit, stat_bit;
+ int pcpu, thr;
+ int target_threads;
+ int controlled_threads;
+ int trap;
+ bool is_power8;
+
+ if (WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300)))
+ return;
+
+ /*
+ * Remove from the list any threads that have a signal pending
+ * or need a VPA update done
+ */
+ prepare_threads(vc);
+
+ /* if the runner is no longer runnable, let the caller pick a new one */
+ if (vc->runner->arch.state != KVMPPC_VCPU_RUNNABLE)
+ return;
+
+ /*
+ * Initialize *vc.
+ */
+ init_vcore_to_run(vc);
+ vc->preempt_tb = TB_NIL;
+
+ /*
+ * Number of threads that we will be controlling: the same as
+ * the number of threads per subcore, except on POWER9,
+ * where it's 1 because the threads are (mostly) independent.
+ */
+ controlled_threads = threads_per_vcore(vc->kvm);
/*
* Make sure we are running on primary threads, and that secondary
* threads are offline. Also check if the number of threads in this
* guest are greater than the current system threads per guest.
*/
- if ((threads_per_core > 1) &&
+ if ((controlled_threads > 1) &&
((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
- list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
+ for_each_runnable_thread(i, vcpu, vc) {
vcpu->arch.ret = -EBUSY;
+ kvmppc_remove_runnable(vc, vcpu, mftb());
+ wake_up(&vcpu->arch.cpu_run);
+ }
goto out;
}
+ /*
+ * See if we could run any other vcores on the physical core
+ * along with this one.
+ */
+ init_core_info(&core_info, vc);
+ pcpu = smp_processor_id();
+ target_threads = controlled_threads;
+ if (target_smt_mode && target_smt_mode < target_threads)
+ target_threads = target_smt_mode;
+ if (vc->num_threads < target_threads)
+ collect_piggybacks(&core_info, target_threads);
- vc->pcpu = smp_processor_id();
- list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
- kvmppc_start_thread(vcpu);
- kvmppc_create_dtl_entry(vcpu, vc);
+ /*
+ * Hard-disable interrupts, and check resched flag and signals.
+ * If we need to reschedule or deliver a signal, clean up
+ * and return without going into the guest(s).
+ * If the mmu_ready flag has been cleared, don't go into the
+ * guest because that means a HPT resize operation is in progress.
+ */
+ local_irq_disable();
+ hard_irq_disable();
+ if (lazy_irq_pending() || need_resched() ||
+ recheck_signals_and_mmu(&core_info)) {
+ local_irq_enable();
+ vc->vcore_state = VCORE_INACTIVE;
+ /* Unlock all except the primary vcore */
+ for (sub = 1; sub < core_info.n_subcores; ++sub) {
+ pvc = core_info.vc[sub];
+ /* Put back on to the preempted vcores list */
+ kvmppc_vcore_preempt(pvc);
+ spin_unlock(&pvc->lock);
+ }
+ for (i = 0; i < controlled_threads; ++i)
+ kvmppc_release_hwthread(pcpu + i);
+ return;
+ }
+
+ kvmppc_clear_host_core(pcpu);
+
+ /* Decide on micro-threading (split-core) mode */
+ subcore_size = threads_per_subcore;
+ cmd_bit = stat_bit = 0;
+ split = core_info.n_subcores;
+ sip = NULL;
+ is_power8 = cpu_has_feature(CPU_FTR_ARCH_207S);
+
+ if (split > 1) {
+ sip = &split_info;
+ memset(&split_info, 0, sizeof(split_info));
+ for (sub = 0; sub < core_info.n_subcores; ++sub)
+ split_info.vc[sub] = core_info.vc[sub];
+
+ if (is_power8) {
+ if (split == 2 && (dynamic_mt_modes & 2)) {
+ cmd_bit = HID0_POWER8_1TO2LPAR;
+ stat_bit = HID0_POWER8_2LPARMODE;
+ } else {
+ split = 4;
+ cmd_bit = HID0_POWER8_1TO4LPAR;
+ stat_bit = HID0_POWER8_4LPARMODE;
+ }
+ subcore_size = MAX_SMT_THREADS / split;
+ split_info.rpr = mfspr(SPRN_RPR);
+ split_info.pmmar = mfspr(SPRN_PMMAR);
+ split_info.ldbar = mfspr(SPRN_LDBAR);
+ split_info.subcore_size = subcore_size;
+ } else {
+ split_info.subcore_size = 1;
+ }
+
+ /* order writes to split_info before kvm_split_mode pointer */
+ smp_wmb();
+ }
+
+ for (thr = 0; thr < controlled_threads; ++thr) {
+ struct paca_struct *paca = paca_ptrs[pcpu + thr];
+
+ paca->kvm_hstate.napping = 0;
+ paca->kvm_hstate.kvm_split_mode = sip;
+ }
+
+ /* Initiate micro-threading (split-core) on POWER8 if required */
+ if (cmd_bit) {
+ unsigned long hid0 = mfspr(SPRN_HID0);
+
+ hid0 |= cmd_bit | HID0_POWER8_DYNLPARDIS;
+ mb();
+ mtspr(SPRN_HID0, hid0);
+ isync();
+ for (;;) {
+ hid0 = mfspr(SPRN_HID0);
+ if (hid0 & stat_bit)
+ break;
+ cpu_relax();
+ }
}
- /* Set this explicitly in case thread 0 doesn't have a vcpu */
- get_paca()->kvm_hstate.kvm_vcore = vc;
- get_paca()->kvm_hstate.ptid = 0;
+ /*
+ * On POWER8, set RWMR register.
+ * Since it only affects PURR and SPURR, it doesn't affect
+ * the host, so we don't save/restore the host value.
+ */
+ if (is_power8) {
+ unsigned long rwmr_val = RWMR_RPA_P8_8THREAD;
+ int n_online = atomic_read(&vc->online_count);
+
+ /*
+ * Use the 8-thread value if we're doing split-core
+ * or if the vcore's online count looks bogus.
+ */
+ if (split == 1 && threads_per_subcore == MAX_SMT_THREADS &&
+ n_online >= 1 && n_online <= MAX_SMT_THREADS)
+ rwmr_val = p8_rwmr_values[n_online];
+ mtspr(SPRN_RWMR, rwmr_val);
+ }
+
+ /* Start all the threads */
+ active = 0;
+ for (sub = 0; sub < core_info.n_subcores; ++sub) {
+ thr = is_power8 ? subcore_thread_map[sub] : sub;
+ thr0_done = false;
+ active |= 1 << thr;
+ pvc = core_info.vc[sub];
+ pvc->pcpu = pcpu + thr;
+ for_each_runnable_thread(i, vcpu, pvc) {
+ /*
+ * XXX: is kvmppc_start_thread called too late here?
+ * It updates vcpu->cpu and vcpu->arch.thread_cpu
+ * which are used by kvmppc_fast_vcpu_kick_hv(), but
+ * kick is called after new exceptions become available
+ * and exceptions are checked earlier than here, by
+ * kvmppc_core_prepare_to_enter.
+ */
+ kvmppc_start_thread(vcpu, pvc);
+ kvmppc_update_vpa_dispatch(vcpu, pvc);
+ trace_kvm_guest_enter(vcpu);
+ if (!vcpu->arch.ptid)
+ thr0_done = true;
+ active |= 1 << (thr + vcpu->arch.ptid);
+ }
+ /*
+ * We need to start the first thread of each subcore
+ * even if it doesn't have a vcpu.
+ */
+ if (!thr0_done)
+ kvmppc_start_thread(NULL, pvc);
+ }
+
+ /*
+ * Ensure that split_info.do_nap is set after setting
+ * the vcore pointer in the PACA of the secondaries.
+ */
+ smp_mb();
+
+ /*
+ * When doing micro-threading, poke the inactive threads as well.
+ * This gets them to the nap instruction after kvm_do_nap,
+ * which reduces the time taken to unsplit later.
+ */
+ if (cmd_bit) {
+ split_info.do_nap = 1; /* ask secondaries to nap when done */
+ for (thr = 1; thr < threads_per_subcore; ++thr)
+ if (!(active & (1 << thr)))
+ kvmppc_ipi_thread(pcpu + thr);
+ }
vc->vcore_state = VCORE_RUNNING;
preempt_disable();
- spin_unlock(&vc->lock);
- kvm_guest_enter();
+ trace_kvmppc_run_core(vc, 0);
+
+ for (sub = 0; sub < core_info.n_subcores; ++sub)
+ spin_unlock(&core_info.vc[sub]->lock);
+
+ guest_timing_enter_irqoff();
srcu_idx = srcu_read_lock(&vc->kvm->srcu);
- if (vc->mpp_buffer_is_valid)
- kvmppc_start_restoring_l2_cache(vc);
+ guest_state_enter_irqoff();
+ this_cpu_disable_ftrace();
- __kvmppc_vcore_entry();
+ trap = __kvmppc_vcore_entry();
- spin_lock(&vc->lock);
+ this_cpu_enable_ftrace();
+ guest_state_exit_irqoff();
- if (vc->mpp_buffer)
- kvmppc_start_saving_l2_cache(vc);
+ srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
- /* disable sending of IPIs on virtual external irqs */
- list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
- vcpu->cpu = -1;
- /* wait for secondary threads to finish writing their state to memory */
- if (vc->nap_count < vc->n_woken)
- kvmppc_wait_for_nap(vc);
- for (i = 0; i < threads_per_subcore; ++i)
- kvmppc_release_hwthread(vc->pcpu + i);
+ set_irq_happened(trap);
+
+ spin_lock(&vc->lock);
/* prevent other vcpu threads from doing kvmppc_start_thread() now */
vc->vcore_state = VCORE_EXITING;
- spin_unlock(&vc->lock);
- srcu_read_unlock(&vc->kvm->srcu, srcu_idx);
+ /* wait for secondary threads to finish writing their state to memory */
+ kvmppc_wait_for_nap(controlled_threads);
+
+ /* Return to whole-core mode if we split the core earlier */
+ if (cmd_bit) {
+ unsigned long hid0 = mfspr(SPRN_HID0);
+
+ hid0 &= ~HID0_POWER8_DYNLPARDIS;
+ stat_bit = HID0_POWER8_2LPARMODE | HID0_POWER8_4LPARMODE;
+ mb();
+ mtspr(SPRN_HID0, hid0);
+ isync();
+ for (;;) {
+ hid0 = mfspr(SPRN_HID0);
+ if (!(hid0 & stat_bit))
+ break;
+ cpu_relax();
+ }
+ split_info.do_nap = 0;
+ }
+
+ kvmppc_set_host_core(pcpu);
+
+ if (!vtime_accounting_enabled_this_cpu()) {
+ local_irq_enable();
+ /*
+ * Service IRQs here before guest_timing_exit_irqoff() so any
+ * ticks that occurred while running the guest are accounted to
+ * the guest. If vtime accounting is enabled, accounting uses
+ * TB rather than ticks, so it can be done without enabling
+ * interrupts here, which has the problem that it accounts
+ * interrupt processing overhead to the host.
+ */
+ local_irq_disable();
+ }
+ guest_timing_exit_irqoff();
+
+ local_irq_enable();
+
+ /* Let secondaries go back to the offline loop */
+ for (i = 0; i < controlled_threads; ++i) {
+ kvmppc_release_hwthread(pcpu + i);
+ if (sip && sip->napped[i])
+ kvmppc_ipi_thread(pcpu + i);
+ }
+
+ spin_unlock(&vc->lock);
/* make sure updates to secondary vcpu structs are visible now */
smp_mb();
- kvm_guest_exit();
preempt_enable();
- cond_resched();
+
+ for (sub = 0; sub < core_info.n_subcores; ++sub) {
+ pvc = core_info.vc[sub];
+ post_guest_process(pvc, pvc == vc);
+ }
spin_lock(&vc->lock);
- now = get_tb();
- list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
- /* cancel pending dec exception if dec is positive */
- if (now < vcpu->arch.dec_expires &&
- kvmppc_core_pending_dec(vcpu))
- kvmppc_core_dequeue_dec(vcpu);
- ret = RESUME_GUEST;
- if (vcpu->arch.trap)
- ret = kvmppc_handle_exit_hv(vcpu->arch.kvm_run, vcpu,
- vcpu->arch.run_task);
+ out:
+ vc->vcore_state = VCORE_INACTIVE;
+ trace_kvmppc_run_core(vc, 1);
+}
- vcpu->arch.ret = ret;
- vcpu->arch.trap = 0;
+static inline bool hcall_is_xics(unsigned long req)
+{
+ return req == H_EOI || req == H_CPPR || req == H_IPI ||
+ req == H_IPOLL || req == H_XIRR || req == H_XIRR_X;
+}
- if (vcpu->arch.ceded) {
- if (!is_kvmppc_resume_guest(ret))
- kvmppc_end_cede(vcpu);
- else
- kvmppc_set_timer(vcpu);
- }
+static void vcpu_vpa_increment_dispatch(struct kvm_vcpu *vcpu)
+{
+ struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
+ if (lp) {
+ u32 yield_count = be32_to_cpu(lp->yield_count) + 1;
+ lp->yield_count = cpu_to_be32(yield_count);
+ vcpu->arch.vpa.dirty = 1;
}
+}
- out:
- vc->vcore_state = VCORE_INACTIVE;
- list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
- arch.run_list) {
- if (!is_kvmppc_resume_guest(vcpu->arch.ret)) {
- kvmppc_remove_runnable(vc, vcpu);
- wake_up(&vcpu->arch.cpu_run);
+/* Helper functions for reading L2's stats from L1's VPA */
+#ifdef CONFIG_PPC_PSERIES
+static DEFINE_PER_CPU(u64, l1_to_l2_cs);
+static DEFINE_PER_CPU(u64, l2_to_l1_cs);
+static DEFINE_PER_CPU(u64, l2_runtime_agg);
+
+int kvmhv_get_l2_counters_status(void)
+{
+ return firmware_has_feature(FW_FEATURE_LPAR) &&
+ get_lppaca()->l2_counters_enable;
+}
+
+void kvmhv_set_l2_counters_status(int cpu, bool status)
+{
+ if (!firmware_has_feature(FW_FEATURE_LPAR))
+ return;
+ if (status)
+ lppaca_of(cpu).l2_counters_enable = 1;
+ else
+ lppaca_of(cpu).l2_counters_enable = 0;
+}
+EXPORT_SYMBOL(kvmhv_set_l2_counters_status);
+
+int kvmhv_counters_tracepoint_regfunc(void)
+{
+ int cpu;
+
+ for_each_present_cpu(cpu) {
+ kvmhv_set_l2_counters_status(cpu, true);
+ }
+ return 0;
+}
+
+void kvmhv_counters_tracepoint_unregfunc(void)
+{
+ int cpu;
+
+ for_each_present_cpu(cpu) {
+ kvmhv_set_l2_counters_status(cpu, false);
+ }
+}
+
+static void do_trace_nested_cs_time(struct kvm_vcpu *vcpu)
+{
+ struct lppaca *lp = get_lppaca();
+ u64 l1_to_l2_ns, l2_to_l1_ns, l2_runtime_ns;
+ u64 *l1_to_l2_cs_ptr = this_cpu_ptr(&l1_to_l2_cs);
+ u64 *l2_to_l1_cs_ptr = this_cpu_ptr(&l2_to_l1_cs);
+ u64 *l2_runtime_agg_ptr = this_cpu_ptr(&l2_runtime_agg);
+
+ l1_to_l2_ns = tb_to_ns(be64_to_cpu(lp->l1_to_l2_cs_tb));
+ l2_to_l1_ns = tb_to_ns(be64_to_cpu(lp->l2_to_l1_cs_tb));
+ l2_runtime_ns = tb_to_ns(be64_to_cpu(lp->l2_runtime_tb));
+ trace_kvmppc_vcpu_stats(vcpu, l1_to_l2_ns - *l1_to_l2_cs_ptr,
+ l2_to_l1_ns - *l2_to_l1_cs_ptr,
+ l2_runtime_ns - *l2_runtime_agg_ptr);
+ *l1_to_l2_cs_ptr = l1_to_l2_ns;
+ *l2_to_l1_cs_ptr = l2_to_l1_ns;
+ *l2_runtime_agg_ptr = l2_runtime_ns;
+ vcpu->arch.l1_to_l2_cs = l1_to_l2_ns;
+ vcpu->arch.l2_to_l1_cs = l2_to_l1_ns;
+ vcpu->arch.l2_runtime_agg = l2_runtime_ns;
+}
+
+u64 kvmhv_get_l1_to_l2_cs_time(void)
+{
+ return tb_to_ns(be64_to_cpu(get_lppaca()->l1_to_l2_cs_tb));
+}
+EXPORT_SYMBOL(kvmhv_get_l1_to_l2_cs_time);
+
+u64 kvmhv_get_l2_to_l1_cs_time(void)
+{
+ return tb_to_ns(be64_to_cpu(get_lppaca()->l2_to_l1_cs_tb));
+}
+EXPORT_SYMBOL(kvmhv_get_l2_to_l1_cs_time);
+
+u64 kvmhv_get_l2_runtime_agg(void)
+{
+ return tb_to_ns(be64_to_cpu(get_lppaca()->l2_runtime_tb));
+}
+EXPORT_SYMBOL(kvmhv_get_l2_runtime_agg);
+
+u64 kvmhv_get_l1_to_l2_cs_time_vcpu(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vcpu_arch *arch;
+
+ vcpu = local_paca->kvm_hstate.kvm_vcpu;
+ if (vcpu) {
+ arch = &vcpu->arch;
+ return arch->l1_to_l2_cs;
+ } else {
+ return 0;
+ }
+}
+EXPORT_SYMBOL(kvmhv_get_l1_to_l2_cs_time_vcpu);
+
+u64 kvmhv_get_l2_to_l1_cs_time_vcpu(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vcpu_arch *arch;
+
+ vcpu = local_paca->kvm_hstate.kvm_vcpu;
+ if (vcpu) {
+ arch = &vcpu->arch;
+ return arch->l2_to_l1_cs;
+ } else {
+ return 0;
+ }
+}
+EXPORT_SYMBOL(kvmhv_get_l2_to_l1_cs_time_vcpu);
+
+u64 kvmhv_get_l2_runtime_agg_vcpu(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vcpu_arch *arch;
+
+ vcpu = local_paca->kvm_hstate.kvm_vcpu;
+ if (vcpu) {
+ arch = &vcpu->arch;
+ return arch->l2_runtime_agg;
+ } else {
+ return 0;
+ }
+}
+EXPORT_SYMBOL(kvmhv_get_l2_runtime_agg_vcpu);
+
+#else
+int kvmhv_get_l2_counters_status(void)
+{
+ return 0;
+}
+
+static void do_trace_nested_cs_time(struct kvm_vcpu *vcpu)
+{
+}
+#endif
+
+static int kvmhv_vcpu_entry_nestedv2(struct kvm_vcpu *vcpu, u64 time_limit,
+ unsigned long lpcr, u64 *tb)
+{
+ struct kvmhv_nestedv2_io *io;
+ unsigned long msr, i;
+ int trap;
+ long rc;
+
+ if (vcpu->arch.doorbell_request) {
+ vcpu->arch.doorbell_request = 0;
+ kvmppc_set_dpdes(vcpu, 1);
+ }
+
+ io = &vcpu->arch.nestedv2_io;
+
+ msr = mfmsr();
+ kvmppc_msr_hard_disable_set_facilities(vcpu, msr);
+ if (lazy_irq_pending())
+ return 0;
+
+ rc = kvmhv_nestedv2_flush_vcpu(vcpu, time_limit);
+ if (rc < 0)
+ return -EINVAL;
+
+ kvmppc_gse_put_u64(io->vcpu_run_input, KVMPPC_GSID_LPCR, lpcr);
+
+ accumulate_time(vcpu, &vcpu->arch.in_guest);
+ rc = plpar_guest_run_vcpu(0, vcpu->kvm->arch.lpid, vcpu->vcpu_id,
+ &trap, &i);
+
+ if (rc != H_SUCCESS) {
+ pr_err("KVM Guest Run VCPU hcall failed\n");
+ if (rc == H_INVALID_ELEMENT_ID)
+ pr_err("KVM: Guest Run VCPU invalid element id at %ld\n", i);
+ else if (rc == H_INVALID_ELEMENT_SIZE)
+ pr_err("KVM: Guest Run VCPU invalid element size at %ld\n", i);
+ else if (rc == H_INVALID_ELEMENT_VALUE)
+ pr_err("KVM: Guest Run VCPU invalid element value at %ld\n", i);
+ return -EINVAL;
+ }
+ accumulate_time(vcpu, &vcpu->arch.guest_exit);
+
+ *tb = mftb();
+ kvmppc_gsm_reset(io->vcpu_message);
+ kvmppc_gsm_reset(io->vcore_message);
+ kvmppc_gsbm_zero(&io->valids);
+
+ rc = kvmhv_nestedv2_parse_output(vcpu);
+ if (rc < 0)
+ return -EINVAL;
+
+ timer_rearm_host_dec(*tb);
+
+ /* Record context switch and guest_run_time data */
+ if (kvmhv_get_l2_counters_status())
+ do_trace_nested_cs_time(vcpu);
+
+ return trap;
+}
+
+/* call our hypervisor to load up HV regs and go */
+static int kvmhv_vcpu_entry_p9_nested(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr, u64 *tb)
+{
+ unsigned long host_psscr;
+ unsigned long msr;
+ struct hv_guest_state hvregs;
+ struct p9_host_os_sprs host_os_sprs;
+ s64 dec;
+ int trap;
+
+ msr = mfmsr();
+
+ save_p9_host_os_sprs(&host_os_sprs);
+
+ /*
+ * We need to save and restore the guest visible part of the
+ * psscr (i.e. using SPRN_PSSCR_PR) since the hypervisor
+ * doesn't do this for us. Note only required if pseries since
+ * this is done in kvmhv_vcpu_entry_p9() below otherwise.
+ */
+ host_psscr = mfspr(SPRN_PSSCR_PR);
+
+ kvmppc_msr_hard_disable_set_facilities(vcpu, msr);
+ if (lazy_irq_pending())
+ return 0;
+
+ if (unlikely(load_vcpu_state(vcpu, &host_os_sprs)))
+ msr = mfmsr(); /* TM restore can update msr */
+
+ if (vcpu->arch.psscr != host_psscr)
+ mtspr(SPRN_PSSCR_PR, vcpu->arch.psscr);
+
+ kvmhv_save_hv_regs(vcpu, &hvregs);
+ hvregs.lpcr = lpcr;
+ hvregs.amor = ~0;
+ vcpu->arch.regs.msr = vcpu->arch.shregs.msr;
+ hvregs.version = HV_GUEST_STATE_VERSION;
+ if (vcpu->arch.nested) {
+ hvregs.lpid = vcpu->arch.nested->shadow_lpid;
+ hvregs.vcpu_token = vcpu->arch.nested_vcpu_id;
+ } else {
+ hvregs.lpid = vcpu->kvm->arch.lpid;
+ hvregs.vcpu_token = vcpu->vcpu_id;
+ }
+ hvregs.hdec_expiry = time_limit;
+
+ /*
+ * hvregs has the doorbell status, so zero it here which
+ * enables us to receive doorbells when H_ENTER_NESTED is
+ * in progress for this vCPU
+ */
+
+ if (vcpu->arch.doorbell_request)
+ vcpu->arch.doorbell_request = 0;
+
+ /*
+ * When setting DEC, we must always deal with irq_work_raise
+ * via NMI vs setting DEC. The problem occurs right as we
+ * switch into guest mode if a NMI hits and sets pending work
+ * and sets DEC, then that will apply to the guest and not
+ * bring us back to the host.
+ *
+ * irq_work_raise could check a flag (or possibly LPCR[HDICE]
+ * for example) and set HDEC to 1? That wouldn't solve the
+ * nested hv case which needs to abort the hcall or zero the
+ * time limit.
+ *
+ * XXX: Another day's problem.
+ */
+ mtspr(SPRN_DEC, kvmppc_dec_expires_host_tb(vcpu) - *tb);
+
+ mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
+ mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
+ switch_pmu_to_guest(vcpu, &host_os_sprs);
+ accumulate_time(vcpu, &vcpu->arch.in_guest);
+ trap = plpar_hcall_norets(H_ENTER_NESTED, __pa(&hvregs),
+ __pa(&vcpu->arch.regs));
+ accumulate_time(vcpu, &vcpu->arch.guest_exit);
+ kvmhv_restore_hv_return_state(vcpu, &hvregs);
+ switch_pmu_to_host(vcpu, &host_os_sprs);
+ vcpu->arch.shregs.msr = vcpu->arch.regs.msr;
+ vcpu->arch.shregs.dar = mfspr(SPRN_DAR);
+ vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR);
+ vcpu->arch.psscr = mfspr(SPRN_PSSCR_PR);
+
+ store_vcpu_state(vcpu);
+
+ dec = mfspr(SPRN_DEC);
+ if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */
+ dec = (s32) dec;
+ *tb = mftb();
+ vcpu->arch.dec_expires = dec + (*tb + kvmppc_get_tb_offset(vcpu));
+
+ timer_rearm_host_dec(*tb);
+
+ restore_p9_host_os_sprs(vcpu, &host_os_sprs);
+ if (vcpu->arch.psscr != host_psscr)
+ mtspr(SPRN_PSSCR_PR, host_psscr);
+
+ return trap;
+}
+
+/*
+ * Guest entry for POWER9 and later CPUs.
+ */
+static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
+ unsigned long lpcr, u64 *tb)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_nested_guest *nested = vcpu->arch.nested;
+ u64 next_timer;
+ int trap;
+
+ next_timer = timer_get_next_tb();
+ if (*tb >= next_timer)
+ return BOOK3S_INTERRUPT_HV_DECREMENTER;
+ if (next_timer < time_limit)
+ time_limit = next_timer;
+ else if (*tb >= time_limit) /* nested time limit */
+ return BOOK3S_INTERRUPT_NESTED_HV_DECREMENTER;
+
+ vcpu->arch.ceded = 0;
+
+ vcpu_vpa_increment_dispatch(vcpu);
+
+ if (kvmhv_on_pseries()) {
+ if (kvmhv_is_nestedv1())
+ trap = kvmhv_vcpu_entry_p9_nested(vcpu, time_limit, lpcr, tb);
+ else
+ trap = kvmhv_vcpu_entry_nestedv2(vcpu, time_limit, lpcr, tb);
+
+ /* H_CEDE has to be handled now, not later */
+ if (trap == BOOK3S_INTERRUPT_SYSCALL && !nested &&
+ kvmppc_get_gpr(vcpu, 3) == H_CEDE) {
+ kvmppc_cede(vcpu);
+ kvmppc_set_gpr(vcpu, 3, 0);
+ trap = 0;
}
+
+ } else if (nested) {
+ __this_cpu_write(cpu_in_guest, kvm);
+ trap = kvmhv_vcpu_entry_p9(vcpu, time_limit, lpcr, tb);
+ __this_cpu_write(cpu_in_guest, NULL);
+
+ } else {
+ kvmppc_xive_push_vcpu(vcpu);
+
+ __this_cpu_write(cpu_in_guest, kvm);
+ trap = kvmhv_vcpu_entry_p9(vcpu, time_limit, lpcr, tb);
+ __this_cpu_write(cpu_in_guest, NULL);
+
+ if (trap == BOOK3S_INTERRUPT_SYSCALL &&
+ !(__kvmppc_get_msr_hv(vcpu) & MSR_PR)) {
+ unsigned long req = kvmppc_get_gpr(vcpu, 3);
+
+ /*
+ * XIVE rearm and XICS hcalls must be handled
+ * before xive context is pulled (is this
+ * true?)
+ */
+ if (req == H_CEDE) {
+ /* H_CEDE has to be handled now */
+ kvmppc_cede(vcpu);
+ if (!kvmppc_xive_rearm_escalation(vcpu)) {
+ /*
+ * Pending escalation so abort
+ * the cede.
+ */
+ vcpu->arch.ceded = 0;
+ }
+ kvmppc_set_gpr(vcpu, 3, 0);
+ trap = 0;
+
+ } else if (req == H_ENTER_NESTED) {
+ /*
+ * L2 should not run with the L1
+ * context so rearm and pull it.
+ */
+ if (!kvmppc_xive_rearm_escalation(vcpu)) {
+ /*
+ * Pending escalation so abort
+ * H_ENTER_NESTED.
+ */
+ kvmppc_set_gpr(vcpu, 3, 0);
+ trap = 0;
+ }
+
+ } else if (hcall_is_xics(req)) {
+ int ret;
+
+ ret = kvmppc_xive_xics_hcall(vcpu, req);
+ if (ret != H_TOO_HARD) {
+ kvmppc_set_gpr(vcpu, 3, ret);
+ trap = 0;
+ }
+ }
+ }
+ kvmppc_xive_pull_vcpu(vcpu);
+
+ if (kvm_is_radix(kvm))
+ vcpu->arch.slb_max = 0;
}
+
+ vcpu_vpa_increment_dispatch(vcpu);
+
+ return trap;
}
/*
* Wait for some other vcpu thread to execute us, and
* wake us up when we need to handle something in the host.
*/
-static void kvmppc_wait_for_exec(struct kvm_vcpu *vcpu, int wait_state)
+static void kvmppc_wait_for_exec(struct kvmppc_vcore *vc,
+ struct kvm_vcpu *vcpu, int wait_state)
{
DEFINE_WAIT(wait);
prepare_to_wait(&vcpu->arch.cpu_run, &wait, wait_state);
- if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE)
+ if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
+ spin_unlock(&vc->lock);
schedule();
+ spin_lock(&vc->lock);
+ }
finish_wait(&vcpu->arch.cpu_run, &wait);
}
+static void grow_halt_poll_ns(struct kvmppc_vcore *vc)
+{
+ if (!halt_poll_ns_grow)
+ return;
+
+ vc->halt_poll_ns *= halt_poll_ns_grow;
+ if (vc->halt_poll_ns < halt_poll_ns_grow_start)
+ vc->halt_poll_ns = halt_poll_ns_grow_start;
+}
+
+static void shrink_halt_poll_ns(struct kvmppc_vcore *vc)
+{
+ if (halt_poll_ns_shrink == 0)
+ vc->halt_poll_ns = 0;
+ else
+ vc->halt_poll_ns /= halt_poll_ns_shrink;
+}
+
+#ifdef CONFIG_KVM_XICS
+static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu)
+{
+ if (!xics_on_xive())
+ return false;
+ return vcpu->arch.irq_pending || vcpu->arch.xive_saved_state.pipr <
+ vcpu->arch.xive_saved_state.cppr;
+}
+#else
+static inline bool xive_interrupt_pending(struct kvm_vcpu *vcpu)
+{
+ return false;
+}
+#endif /* CONFIG_KVM_XICS */
+
+static bool kvmppc_vcpu_woken(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.pending_exceptions || vcpu->arch.prodded ||
+ kvmppc_doorbell_pending(vcpu) || xive_interrupt_pending(vcpu))
+ return true;
+
+ return false;
+}
+
+static bool kvmppc_vcpu_check_block(struct kvm_vcpu *vcpu)
+{
+ if (!vcpu->arch.ceded || kvmppc_vcpu_woken(vcpu))
+ return true;
+ return false;
+}
+
+/*
+ * Check to see if any of the runnable vcpus on the vcore have pending
+ * exceptions or are no longer ceded
+ */
+static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc)
+{
+ struct kvm_vcpu *vcpu;
+ int i;
+
+ for_each_runnable_thread(i, vcpu, vc) {
+ if (kvmppc_vcpu_check_block(vcpu))
+ return 1;
+ }
+
+ return 0;
+}
+
/*
* All the vcpus in this vcore are idle, so wait for a decrementer
* or external interrupt to one of the vcpus. vc->lock is held.
*/
static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
{
- DEFINE_WAIT(wait);
+ ktime_t cur, start_poll, start_wait;
+ int do_sleep = 1;
+ u64 block_ns;
+
+ WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300));
+
+ /* Poll for pending exceptions and ceded state */
+ cur = start_poll = ktime_get();
+ if (vc->halt_poll_ns) {
+ ktime_t stop = ktime_add_ns(start_poll, vc->halt_poll_ns);
+ ++vc->runner->stat.generic.halt_attempted_poll;
+
+ vc->vcore_state = VCORE_POLLING;
+ spin_unlock(&vc->lock);
+
+ do {
+ if (kvmppc_vcore_check_block(vc)) {
+ do_sleep = 0;
+ break;
+ }
+ cur = ktime_get();
+ } while (kvm_vcpu_can_poll(cur, stop));
+
+ spin_lock(&vc->lock);
+ vc->vcore_state = VCORE_INACTIVE;
+
+ if (!do_sleep) {
+ ++vc->runner->stat.generic.halt_successful_poll;
+ goto out;
+ }
+ }
+
+ prepare_to_rcuwait(&vc->wait);
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (kvmppc_vcore_check_block(vc)) {
+ finish_rcuwait(&vc->wait);
+ do_sleep = 0;
+ /* If we polled, count this as a successful poll */
+ if (vc->halt_poll_ns)
+ ++vc->runner->stat.generic.halt_successful_poll;
+ goto out;
+ }
+
+ start_wait = ktime_get();
- prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
vc->vcore_state = VCORE_SLEEPING;
+ trace_kvmppc_vcore_blocked(vc->runner, 0);
spin_unlock(&vc->lock);
schedule();
- finish_wait(&vc->wq, &wait);
+ finish_rcuwait(&vc->wait);
spin_lock(&vc->lock);
vc->vcore_state = VCORE_INACTIVE;
+ trace_kvmppc_vcore_blocked(vc->runner, 1);
+ ++vc->runner->stat.halt_successful_wait;
+
+ cur = ktime_get();
+
+out:
+ block_ns = ktime_to_ns(cur) - ktime_to_ns(start_poll);
+
+ /* Attribute wait time */
+ if (do_sleep) {
+ vc->runner->stat.generic.halt_wait_ns +=
+ ktime_to_ns(cur) - ktime_to_ns(start_wait);
+ KVM_STATS_LOG_HIST_UPDATE(
+ vc->runner->stat.generic.halt_wait_hist,
+ ktime_to_ns(cur) - ktime_to_ns(start_wait));
+ /* Attribute failed poll time */
+ if (vc->halt_poll_ns) {
+ vc->runner->stat.generic.halt_poll_fail_ns +=
+ ktime_to_ns(start_wait) -
+ ktime_to_ns(start_poll);
+ KVM_STATS_LOG_HIST_UPDATE(
+ vc->runner->stat.generic.halt_poll_fail_hist,
+ ktime_to_ns(start_wait) -
+ ktime_to_ns(start_poll));
+ }
+ } else {
+ /* Attribute successful poll time */
+ if (vc->halt_poll_ns) {
+ vc->runner->stat.generic.halt_poll_success_ns +=
+ ktime_to_ns(cur) -
+ ktime_to_ns(start_poll);
+ KVM_STATS_LOG_HIST_UPDATE(
+ vc->runner->stat.generic.halt_poll_success_hist,
+ ktime_to_ns(cur) - ktime_to_ns(start_poll));
+ }
+ }
+
+ /* Adjust poll time */
+ if (halt_poll_ns) {
+ if (block_ns <= vc->halt_poll_ns)
+ ;
+ /* We slept and blocked for longer than the max halt time */
+ else if (vc->halt_poll_ns && block_ns > halt_poll_ns)
+ shrink_halt_poll_ns(vc);
+ /* We slept and our poll time is too small */
+ else if (vc->halt_poll_ns < halt_poll_ns &&
+ block_ns < halt_poll_ns)
+ grow_halt_poll_ns(vc);
+ if (vc->halt_poll_ns > halt_poll_ns)
+ vc->halt_poll_ns = halt_poll_ns;
+ } else
+ vc->halt_poll_ns = 0;
+
+ trace_kvmppc_vcore_wakeup(do_sleep, block_ns);
}
-static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+/*
+ * This never fails for a radix guest, as none of the operations it does
+ * for a radix guest can fail or have a way to report failure.
+ */
+static int kvmhv_setup_mmu(struct kvm_vcpu *vcpu)
{
- int n_ceded;
+ int r = 0;
+ struct kvm *kvm = vcpu->kvm;
+
+ mutex_lock(&kvm->arch.mmu_setup_lock);
+ if (!kvm->arch.mmu_ready) {
+ if (!kvm_is_radix(kvm))
+ r = kvmppc_hv_setup_htab_rma(vcpu);
+ if (!r) {
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ kvmppc_setup_partition_table(kvm);
+ kvm->arch.mmu_ready = 1;
+ }
+ }
+ mutex_unlock(&kvm->arch.mmu_setup_lock);
+ return r;
+}
+
+static int kvmppc_run_vcpu(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ int n_ceded, i, r;
struct kvmppc_vcore *vc;
- struct kvm_vcpu *v, *vn;
+ struct kvm_vcpu *v;
+
+ trace_kvmppc_run_vcpu_enter(vcpu);
- kvm_run->exit_reason = 0;
+ run->exit_reason = 0;
vcpu->arch.ret = RESUME_GUEST;
vcpu->arch.trap = 0;
kvmppc_update_vpas(vcpu);
@@ -1820,11 +4756,10 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
spin_lock(&vc->lock);
vcpu->arch.ceded = 0;
vcpu->arch.run_task = current;
- vcpu->arch.kvm_run = kvm_run;
vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb());
vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
vcpu->arch.busy_preempt = TB_NIL;
- list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads);
+ WRITE_ONCE(vc->runnable_threads[vcpu->arch.ptid], vcpu);
++vc->n_runnable;
/*
@@ -1833,244 +4768,460 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
* this thread straight away and have it join in.
*/
if (!signal_pending(current)) {
- if (vc->vcore_state == VCORE_RUNNING &&
- VCORE_EXIT_COUNT(vc) == 0) {
- kvmppc_create_dtl_entry(vcpu, vc);
- kvmppc_start_thread(vcpu);
+ if ((vc->vcore_state == VCORE_PIGGYBACK ||
+ vc->vcore_state == VCORE_RUNNING) &&
+ !VCORE_IS_EXITING(vc)) {
+ kvmppc_update_vpa_dispatch(vcpu, vc);
+ kvmppc_start_thread(vcpu, vc);
+ trace_kvm_guest_enter(vcpu);
} else if (vc->vcore_state == VCORE_SLEEPING) {
- wake_up(&vc->wq);
+ rcuwait_wake_up(&vc->wait);
}
}
while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
!signal_pending(current)) {
- if (vc->vcore_state != VCORE_INACTIVE) {
+ /* See if the MMU is ready to go */
+ if (!vcpu->kvm->arch.mmu_ready) {
spin_unlock(&vc->lock);
- kvmppc_wait_for_exec(vcpu, TASK_INTERRUPTIBLE);
+ r = kvmhv_setup_mmu(vcpu);
spin_lock(&vc->lock);
+ if (r) {
+ run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+ run->fail_entry.
+ hardware_entry_failure_reason = 0;
+ vcpu->arch.ret = r;
+ break;
+ }
+ }
+
+ if (vc->vcore_state == VCORE_PREEMPT && vc->runner == NULL)
+ kvmppc_vcore_end_preempt(vc);
+
+ if (vc->vcore_state != VCORE_INACTIVE) {
+ kvmppc_wait_for_exec(vc, vcpu, TASK_INTERRUPTIBLE);
continue;
}
- list_for_each_entry_safe(v, vn, &vc->runnable_threads,
- arch.run_list) {
+ for_each_runnable_thread(i, v, vc) {
kvmppc_core_prepare_to_enter(v);
if (signal_pending(v->arch.run_task)) {
- kvmppc_remove_runnable(vc, v);
+ kvmppc_remove_runnable(vc, v, mftb());
v->stat.signal_exits++;
- v->arch.kvm_run->exit_reason = KVM_EXIT_INTR;
+ v->run->exit_reason = KVM_EXIT_INTR;
v->arch.ret = -EINTR;
wake_up(&v->arch.cpu_run);
}
}
if (!vc->n_runnable || vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
break;
- vc->runner = vcpu;
n_ceded = 0;
- list_for_each_entry(v, &vc->runnable_threads, arch.run_list) {
- if (!v->arch.pending_exceptions)
+ for_each_runnable_thread(i, v, vc) {
+ if (!kvmppc_vcpu_woken(v))
n_ceded += v->arch.ceded;
else
v->arch.ceded = 0;
}
- if (n_ceded == vc->n_runnable)
+ vc->runner = vcpu;
+ if (n_ceded == vc->n_runnable) {
kvmppc_vcore_blocked(vc);
- else
+ } else if (need_resched()) {
+ kvmppc_vcore_preempt(vc);
+ /* Let something else run */
+ cond_resched_lock(&vc->lock);
+ if (vc->vcore_state == VCORE_PREEMPT)
+ kvmppc_vcore_end_preempt(vc);
+ } else {
kvmppc_run_core(vc);
+ }
vc->runner = NULL;
}
while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
(vc->vcore_state == VCORE_RUNNING ||
- vc->vcore_state == VCORE_EXITING)) {
- spin_unlock(&vc->lock);
- kvmppc_wait_for_exec(vcpu, TASK_UNINTERRUPTIBLE);
- spin_lock(&vc->lock);
- }
+ vc->vcore_state == VCORE_EXITING ||
+ vc->vcore_state == VCORE_PIGGYBACK))
+ kvmppc_wait_for_exec(vc, vcpu, TASK_UNINTERRUPTIBLE);
+
+ if (vc->vcore_state == VCORE_PREEMPT && vc->runner == NULL)
+ kvmppc_vcore_end_preempt(vc);
if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
- kvmppc_remove_runnable(vc, vcpu);
+ kvmppc_remove_runnable(vc, vcpu, mftb());
vcpu->stat.signal_exits++;
- kvm_run->exit_reason = KVM_EXIT_INTR;
+ run->exit_reason = KVM_EXIT_INTR;
vcpu->arch.ret = -EINTR;
}
if (vc->n_runnable && vc->vcore_state == VCORE_INACTIVE) {
/* Wake up some vcpu to run the core */
- v = list_first_entry(&vc->runnable_threads,
- struct kvm_vcpu, arch.run_list);
+ i = -1;
+ v = next_runnable_thread(vc, &i);
wake_up(&v->arch.cpu_run);
}
+ trace_kvmppc_run_vcpu_exit(vcpu);
spin_unlock(&vc->lock);
return vcpu->arch.ret;
}
-static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
+int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
+ unsigned long lpcr)
{
- int r;
+ struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
+ struct kvm_run *run = vcpu->run;
+ int trap, r, pcpu;
int srcu_idx;
+ struct kvmppc_vcore *vc;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_nested_guest *nested = vcpu->arch.nested;
+ unsigned long flags;
+ u64 tb;
- if (!vcpu->arch.sane) {
- run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
- return -EINVAL;
- }
+ trace_kvmppc_run_vcpu_enter(vcpu);
- kvmppc_core_prepare_to_enter(vcpu);
+ run->exit_reason = 0;
+ vcpu->arch.ret = RESUME_GUEST;
+ vcpu->arch.trap = 0;
- /* No need to go into the guest when all we'll do is come back out */
- if (signal_pending(current)) {
- run->exit_reason = KVM_EXIT_INTR;
- return -EINTR;
+ vc = vcpu->arch.vcore;
+ vcpu->arch.ceded = 0;
+ vcpu->arch.run_task = current;
+ vcpu->arch.last_inst = KVM_INST_FETCH_FAILED;
+
+ /* See if the MMU is ready to go */
+ if (unlikely(!kvm->arch.mmu_ready)) {
+ r = kvmhv_setup_mmu(vcpu);
+ if (r) {
+ run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+ run->fail_entry.hardware_entry_failure_reason = 0;
+ vcpu->arch.ret = r;
+ return r;
+ }
}
- atomic_inc(&vcpu->kvm->arch.vcpus_running);
- /* Order vcpus_running vs. rma_setup_done, see kvmppc_alloc_reset_hpt */
+ if (need_resched())
+ cond_resched();
+
+ kvmppc_update_vpas(vcpu);
+
+ preempt_disable();
+ pcpu = smp_processor_id();
+ if (kvm_is_radix(kvm))
+ kvmppc_prepare_radix_vcpu(vcpu, pcpu);
+
+ /* flags save not required, but irq_pmu has no disable/enable API */
+ powerpc_local_irq_pmu_save(flags);
+
+ vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
+
+ if (signal_pending(current))
+ goto sigpend;
+ if (need_resched() || !kvm->arch.mmu_ready)
+ goto out;
+
+ vcpu->cpu = pcpu;
+ vcpu->arch.thread_cpu = pcpu;
+ vc->pcpu = pcpu;
+ local_paca->kvm_hstate.kvm_vcpu = vcpu;
+ local_paca->kvm_hstate.ptid = 0;
+ local_paca->kvm_hstate.fake_suspend = 0;
+
+ /*
+ * Orders set cpu/thread_cpu vs testing for pending interrupts and
+ * doorbells below. The other side is when these fields are set vs
+ * kvmppc_fast_vcpu_kick_hv reading the cpu/thread_cpu fields to
+ * kick a vCPU to notice the pending interrupt.
+ */
smp_mb();
- /* On the first time here, set up HTAB and VRMA or RMA */
- if (!vcpu->kvm->arch.rma_setup_done) {
- r = kvmppc_hv_setup_htab_rma(vcpu);
- if (r)
- goto out;
+ if (!nested) {
+ kvmppc_core_prepare_to_enter(vcpu);
+ if (test_bit(BOOK3S_IRQPRIO_EXTERNAL,
+ &vcpu->arch.pending_exceptions) ||
+ xive_interrupt_pending(vcpu)) {
+ /*
+ * For nested HV, don't synthesize but always pass MER,
+ * the L0 will be able to optimise that more
+ * effectively than manipulating registers directly.
+ */
+ if (!kvmhv_on_pseries() && (__kvmppc_get_msr_hv(vcpu) & MSR_EE))
+ kvmppc_inject_interrupt_hv(vcpu,
+ BOOK3S_INTERRUPT_EXTERNAL, 0);
+ else
+ lpcr |= LPCR_MER;
+ } else {
+ /*
+ * L1's copy of L2's LPCR (vcpu->arch.vcore->lpcr) can get its MER bit
+ * unexpectedly set - for e.g. during NMI handling when all register
+ * states are synchronized from L0 to L1. L1 needs to inform L0 about
+ * MER=1 only when there are pending external interrupts.
+ * In the above if check, MER bit is set if there are pending
+ * external interrupts. Hence, explicitly mask off MER bit
+ * here as otherwise it may generate spurious interrupts in L2 KVM
+ * causing an endless loop, which results in L2 guest getting hung.
+ */
+ lpcr &= ~LPCR_MER;
+ }
+ } else if (vcpu->arch.pending_exceptions ||
+ xive_interrupt_pending(vcpu)) {
+ vcpu->arch.ret = RESUME_HOST;
+ goto out;
}
- flush_fp_to_thread(current);
- flush_altivec_to_thread(current);
- flush_vsx_to_thread(current);
- vcpu->arch.wqp = &vcpu->arch.vcore->wq;
- vcpu->arch.pgdir = current->mm->pgd;
- vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
+ if (vcpu->arch.timer_running) {
+ hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
+ vcpu->arch.timer_running = 0;
+ }
- do {
- r = kvmppc_run_vcpu(run, vcpu);
+ tb = mftb();
- if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
- !(vcpu->arch.shregs.msr & MSR_PR)) {
- r = kvmppc_pseries_do_hcall(vcpu);
- kvmppc_core_prepare_to_enter(vcpu);
- } else if (r == RESUME_PAGE_FAULT) {
- srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
- r = kvmppc_book3s_hv_page_fault(run, vcpu,
- vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
- srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
- }
- } while (is_kvmppc_resume_guest(r));
+ kvmppc_update_vpa_dispatch_p9(vcpu, vc, tb + kvmppc_get_tb_offset(vcpu));
- out:
- vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
- atomic_dec(&vcpu->kvm->arch.vcpus_running);
- return r;
-}
+ trace_kvm_guest_enter(vcpu);
+ guest_timing_enter_irqoff();
-/* Work out RMLS (real mode limit selector) field value for a given RMA size.
- Assumes POWER7 or PPC970. */
-static inline int lpcr_rmls(unsigned long rma_size)
-{
- switch (rma_size) {
- case 32ul << 20: /* 32 MB */
- if (cpu_has_feature(CPU_FTR_ARCH_206))
- return 8; /* only supported on POWER7 */
- return -1;
- case 64ul << 20: /* 64 MB */
- return 3;
- case 128ul << 20: /* 128 MB */
- return 7;
- case 256ul << 20: /* 256 MB */
- return 4;
- case 1ul << 30: /* 1 GB */
- return 2;
- case 16ul << 30: /* 16 GB */
- return 1;
- case 256ul << 30: /* 256 GB */
- return 0;
- default:
- return -1;
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+
+ guest_state_enter_irqoff();
+ this_cpu_disable_ftrace();
+
+ trap = kvmhv_p9_guest_entry(vcpu, time_limit, lpcr, &tb);
+ vcpu->arch.trap = trap;
+
+ this_cpu_enable_ftrace();
+ guest_state_exit_irqoff();
+
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+
+ set_irq_happened(trap);
+
+ vcpu->cpu = -1;
+ vcpu->arch.thread_cpu = -1;
+ vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
+
+ if (!vtime_accounting_enabled_this_cpu()) {
+ powerpc_local_irq_pmu_restore(flags);
+ /*
+ * Service IRQs here before guest_timing_exit_irqoff() so any
+ * ticks that occurred while running the guest are accounted to
+ * the guest. If vtime accounting is enabled, accounting uses
+ * TB rather than ticks, so it can be done without enabling
+ * interrupts here, which has the problem that it accounts
+ * interrupt processing overhead to the host.
+ */
+ powerpc_local_irq_pmu_save(flags);
}
-}
+ guest_timing_exit_irqoff();
-static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
- struct page *page;
- struct kvm_rma_info *ri = vma->vm_file->private_data;
+ powerpc_local_irq_pmu_restore(flags);
- if (vmf->pgoff >= kvm_rma_pages)
- return VM_FAULT_SIGBUS;
+ preempt_enable();
- page = pfn_to_page(ri->base_pfn + vmf->pgoff);
- get_page(page);
- vmf->page = page;
- return 0;
-}
+ /*
+ * cancel pending decrementer exception if DEC is now positive, or if
+ * entering a nested guest in which case the decrementer is now owned
+ * by L2 and the L1 decrementer is provided in hdec_expires
+ */
+ if (kvmppc_core_pending_dec(vcpu) &&
+ ((tb < kvmppc_dec_expires_host_tb(vcpu)) ||
+ (trap == BOOK3S_INTERRUPT_SYSCALL &&
+ kvmppc_get_gpr(vcpu, 3) == H_ENTER_NESTED)))
+ kvmppc_core_dequeue_dec(vcpu);
+
+ trace_kvm_guest_exit(vcpu);
+ r = RESUME_GUEST;
+ if (trap) {
+ if (!nested)
+ r = kvmppc_handle_exit_hv(vcpu, current);
+ else
+ r = kvmppc_handle_nested_exit(vcpu);
+ }
+ vcpu->arch.ret = r;
+
+ if (is_kvmppc_resume_guest(r) && !kvmppc_vcpu_check_block(vcpu)) {
+ kvmppc_set_timer(vcpu);
+
+ prepare_to_rcuwait(wait);
+ for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (signal_pending(current)) {
+ vcpu->stat.signal_exits++;
+ run->exit_reason = KVM_EXIT_INTR;
+ vcpu->arch.ret = -EINTR;
+ break;
+ }
-static const struct vm_operations_struct kvm_rma_vm_ops = {
- .fault = kvm_rma_fault,
-};
+ if (kvmppc_vcpu_check_block(vcpu))
+ break;
-static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
-{
- vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
- vma->vm_ops = &kvm_rma_vm_ops;
- return 0;
+ trace_kvmppc_vcore_blocked(vcpu, 0);
+ schedule();
+ trace_kvmppc_vcore_blocked(vcpu, 1);
+ }
+ finish_rcuwait(wait);
+ }
+ vcpu->arch.ceded = 0;
+
+ done:
+ trace_kvmppc_run_vcpu_exit(vcpu);
+
+ return vcpu->arch.ret;
+
+ sigpend:
+ vcpu->stat.signal_exits++;
+ run->exit_reason = KVM_EXIT_INTR;
+ vcpu->arch.ret = -EINTR;
+ out:
+ vcpu->cpu = -1;
+ vcpu->arch.thread_cpu = -1;
+ vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
+ powerpc_local_irq_pmu_restore(flags);
+ preempt_enable();
+ goto done;
}
-static int kvm_rma_release(struct inode *inode, struct file *filp)
+static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
{
- struct kvm_rma_info *ri = filp->private_data;
+ struct kvm_run *run = vcpu->run;
+ int r;
+ int srcu_idx;
+ struct kvm *kvm;
+ unsigned long msr;
- kvm_release_rma(ri);
- return 0;
-}
+ start_timing(vcpu, &vcpu->arch.vcpu_entry);
-static const struct file_operations kvm_rma_fops = {
- .mmap = kvm_rma_mmap,
- .release = kvm_rma_release,
-};
+ if (!vcpu->arch.sane) {
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ return -EINVAL;
+ }
-static long kvm_vm_ioctl_allocate_rma(struct kvm *kvm,
- struct kvm_allocate_rma *ret)
-{
- long fd;
- struct kvm_rma_info *ri;
+ /* No need to go into the guest when all we'll do is come back out */
+ if (signal_pending(current)) {
+ run->exit_reason = KVM_EXIT_INTR;
+ return -EINTR;
+ }
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
/*
- * Only do this on PPC970 in HV mode
+ * Don't allow entry with a suspended transaction, because
+ * the guest entry/exit code will lose it.
*/
- if (!cpu_has_feature(CPU_FTR_HVMODE) ||
- !cpu_has_feature(CPU_FTR_ARCH_201))
- return -EINVAL;
+ if (cpu_has_feature(CPU_FTR_TM) && current->thread.regs &&
+ (current->thread.regs->msr & MSR_TM)) {
+ if (MSR_TM_ACTIVE(current->thread.regs->msr)) {
+ run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+ run->fail_entry.hardware_entry_failure_reason = 0;
+ return -EINVAL;
+ }
+ }
+#endif
- if (!kvm_rma_pages)
- return -EINVAL;
+ /*
+ * Force online to 1 for the sake of old userspace which doesn't
+ * set it.
+ */
+ if (!vcpu->arch.online) {
+ atomic_inc(&vcpu->arch.vcore->online_count);
+ vcpu->arch.online = 1;
+ }
- ri = kvm_alloc_rma();
- if (!ri)
- return -ENOMEM;
+ kvmppc_core_prepare_to_enter(vcpu);
- fd = anon_inode_getfd("kvm-rma", &kvm_rma_fops, ri, O_RDWR | O_CLOEXEC);
- if (fd < 0)
- kvm_release_rma(ri);
+ kvm = vcpu->kvm;
+ atomic_inc(&kvm->arch.vcpus_running);
+ /* Order vcpus_running vs. mmu_ready, see kvmppc_alloc_reset_hpt */
+ smp_mb();
- ret->rma_size = kvm_rma_pages << PAGE_SHIFT;
- return fd;
+ msr = 0;
+ if (IS_ENABLED(CONFIG_PPC_FPU))
+ msr |= MSR_FP;
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ msr |= MSR_VEC;
+ if (cpu_has_feature(CPU_FTR_VSX))
+ msr |= MSR_VSX;
+ if ((cpu_has_feature(CPU_FTR_TM) ||
+ cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) &&
+ (kvmppc_get_hfscr_hv(vcpu) & HFSCR_TM))
+ msr |= MSR_TM;
+ msr = msr_check_and_set(msr);
+
+ kvmppc_save_user_regs();
+
+ kvmppc_save_current_sprs();
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ vcpu->arch.waitp = &vcpu->arch.vcore->wait;
+ vcpu->arch.pgdir = kvm->mm->pgd;
+ vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
+
+ do {
+ accumulate_time(vcpu, &vcpu->arch.guest_entry);
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ r = kvmhv_run_single_vcpu(vcpu, ~(u64)0,
+ vcpu->arch.vcore->lpcr);
+ else
+ r = kvmppc_run_vcpu(vcpu);
+
+ if (run->exit_reason == KVM_EXIT_PAPR_HCALL) {
+ accumulate_time(vcpu, &vcpu->arch.hcall);
+
+ if (!kvmhv_is_nestedv2() && WARN_ON_ONCE(__kvmppc_get_msr_hv(vcpu) & MSR_PR)) {
+ /*
+ * These should have been caught reflected
+ * into the guest by now. Final sanity check:
+ * don't allow userspace to execute hcalls in
+ * the hypervisor.
+ */
+ r = RESUME_GUEST;
+ continue;
+ }
+ trace_kvm_hcall_enter(vcpu);
+ r = kvmppc_pseries_do_hcall(vcpu);
+ trace_kvm_hcall_exit(vcpu, r);
+ kvmppc_core_prepare_to_enter(vcpu);
+ } else if (r == RESUME_PAGE_FAULT) {
+ accumulate_time(vcpu, &vcpu->arch.pg_fault);
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ r = kvmppc_book3s_hv_page_fault(vcpu,
+ vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ } else if (r == RESUME_PASSTHROUGH) {
+ if (WARN_ON(xics_on_xive()))
+ r = H_SUCCESS;
+ else
+ r = kvmppc_xics_rm_complete(vcpu, 0);
+ }
+ } while (is_kvmppc_resume_guest(r));
+ accumulate_time(vcpu, &vcpu->arch.vcpu_exit);
+
+ vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
+ atomic_dec(&kvm->arch.vcpus_running);
+
+ srr_regs_clobbered();
+
+ end_timing(vcpu);
+
+ return r;
}
static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps,
- int linux_psize)
+ int shift, int sllp)
{
- struct mmu_psize_def *def = &mmu_psize_defs[linux_psize];
-
- if (!def->shift)
- return;
- (*sps)->page_shift = def->shift;
- (*sps)->slb_enc = def->sllp;
- (*sps)->enc[0].page_shift = def->shift;
- (*sps)->enc[0].pte_enc = def->penc[linux_psize];
+ (*sps)->page_shift = shift;
+ (*sps)->slb_enc = sllp;
+ (*sps)->enc[0].page_shift = shift;
+ (*sps)->enc[0].pte_enc = kvmppc_pgsize_lp_encoding(shift, shift);
/*
- * Add 16MB MPSS support if host supports it
+ * Add 16MB MPSS support (may get filtered out by userspace)
*/
- if (linux_psize != MMU_PAGE_16M && def->penc[MMU_PAGE_16M] != -1) {
- (*sps)->enc[1].page_shift = 24;
- (*sps)->enc[1].pte_enc = def->penc[MMU_PAGE_16M];
+ if (shift != 24) {
+ int penc = kvmppc_pgsize_lp_encoding(shift, 24);
+ if (penc != -1) {
+ (*sps)->enc[1].page_shift = 24;
+ (*sps)->enc[1].pte_enc = penc;
+ }
}
(*sps)++;
}
@@ -2080,16 +5231,27 @@ static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm,
{
struct kvm_ppc_one_seg_page_size *sps;
- info->flags = KVM_PPC_PAGE_SIZES_REAL;
- if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
- info->flags |= KVM_PPC_1T_SEGMENTS;
- info->slb_size = mmu_slb_size;
+ /*
+ * POWER7, POWER8 and POWER9 all support 32 storage keys for data.
+ * POWER7 doesn't support keys for instruction accesses,
+ * POWER8 and POWER9 do.
+ */
+ info->data_keys = 32;
+ info->instr_keys = cpu_has_feature(CPU_FTR_ARCH_207S) ? 32 : 0;
+
+ /* POWER7, 8 and 9 all have 1T segments and 32-entry SLB */
+ info->flags = KVM_PPC_PAGE_SIZES_REAL | KVM_PPC_1T_SEGMENTS;
+ info->slb_size = 32;
/* We only support these sizes for now, and no muti-size segments */
sps = &info->sps[0];
- kvmppc_add_seg_page_size(&sps, MMU_PAGE_4K);
- kvmppc_add_seg_page_size(&sps, MMU_PAGE_64K);
- kvmppc_add_seg_page_size(&sps, MMU_PAGE_16M);
+ kvmppc_add_seg_page_size(&sps, 12, 0);
+ kvmppc_add_seg_page_size(&sps, 16, SLB_VSID_L | SLB_VSID_LP_01);
+ kvmppc_add_seg_page_size(&sps, 24, SLB_VSID_L);
+
+ /* If running as a nested hypervisor, we don't support HPT guests */
+ if (kvmhv_on_pseries())
+ info->flags |= KVM_PPC_NO_HASH;
return 0;
}
@@ -2100,9 +5262,12 @@ static int kvm_vm_ioctl_get_smmu_info_hv(struct kvm *kvm,
static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm,
struct kvm_dirty_log *log)
{
+ struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
int r;
- unsigned long n;
+ unsigned long n, i;
+ unsigned long *buf, *p;
+ struct kvm_vcpu *vcpu;
mutex_lock(&kvm->slots_lock);
@@ -2110,20 +5275,48 @@ static int kvm_vm_ioctl_get_dirty_log_hv(struct kvm *kvm,
if (log->slot >= KVM_USER_MEM_SLOTS)
goto out;
- memslot = id_to_memslot(kvm->memslots, log->slot);
+ slots = kvm_memslots(kvm);
+ memslot = id_to_memslot(slots, log->slot);
r = -ENOENT;
- if (!memslot->dirty_bitmap)
+ if (!memslot || !memslot->dirty_bitmap)
goto out;
+ /*
+ * Use second half of bitmap area because both HPT and radix
+ * accumulate bits in the first half.
+ */
n = kvm_dirty_bitmap_bytes(memslot);
- memset(memslot->dirty_bitmap, 0, n);
+ buf = memslot->dirty_bitmap + n / sizeof(long);
+ memset(buf, 0, n);
- r = kvmppc_hv_get_dirty_log(kvm, memslot, memslot->dirty_bitmap);
+ if (kvm_is_radix(kvm))
+ r = kvmppc_hv_get_dirty_log_radix(kvm, memslot, buf);
+ else
+ r = kvmppc_hv_get_dirty_log_hpt(kvm, memslot, buf);
if (r)
goto out;
+ /*
+ * We accumulate dirty bits in the first half of the
+ * memslot's dirty_bitmap area, for when pages are paged
+ * out or modified by the host directly. Pick up these
+ * bits and add them to the map.
+ */
+ p = memslot->dirty_bitmap;
+ for (i = 0; i < n / sizeof(long); ++i)
+ buf[i] |= xchg(&p[i], 0);
+
+ /* Harvest dirty bits from VPA and DTL updates */
+ /* Note: we never modify the SLB shadow buffer areas */
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ spin_lock(&vcpu->arch.vpa_update_lock);
+ kvmppc_harvest_vpa_dirty(&vcpu->arch.vpa, memslot, buf);
+ kvmppc_harvest_vpa_dirty(&vcpu->arch.dtl, memslot, buf);
+ spin_unlock(&vcpu->arch.vpa_update_lock);
+ }
+
r = -EFAULT;
- if (copy_to_user(log->dirty_bitmap, memslot->dirty_bitmap, n))
+ if (copy_to_user(log->dirty_bitmap, buf, n))
goto out;
r = 0;
@@ -2132,91 +5325,90 @@ out:
return r;
}
-static void unpin_slot(struct kvm_memory_slot *memslot)
-{
- unsigned long *physp;
- unsigned long j, npages, pfn;
- struct page *page;
-
- physp = memslot->arch.slot_phys;
- npages = memslot->npages;
- if (!physp)
- return;
- for (j = 0; j < npages; j++) {
- if (!(physp[j] & KVMPPC_GOT_PAGE))
- continue;
- pfn = physp[j] >> PAGE_SHIFT;
- page = pfn_to_page(pfn);
- SetPageDirty(page);
- put_page(page);
- }
-}
-
-static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free,
- struct kvm_memory_slot *dont)
-{
- if (!dont || free->arch.rmap != dont->arch.rmap) {
- vfree(free->arch.rmap);
- free->arch.rmap = NULL;
- }
- if (!dont || free->arch.slot_phys != dont->arch.slot_phys) {
- unpin_slot(free);
- vfree(free->arch.slot_phys);
- free->arch.slot_phys = NULL;
- }
-}
-
-static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot,
- unsigned long npages)
+static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *slot)
{
- slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap));
- if (!slot->arch.rmap)
- return -ENOMEM;
- slot->arch.slot_phys = NULL;
-
- return 0;
+ vfree(slot->arch.rmap);
+ slot->arch.rmap = NULL;
}
static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm,
- struct kvm_memory_slot *memslot,
- struct kvm_userspace_memory_region *mem)
+ const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new,
+ enum kvm_mr_change change)
{
- unsigned long *phys;
+ if (change == KVM_MR_CREATE) {
+ unsigned long size = array_size(new->npages, sizeof(*new->arch.rmap));
- /* Allocate a slot_phys array if needed */
- phys = memslot->arch.slot_phys;
- if (!kvm->arch.using_mmu_notifiers && !phys && memslot->npages) {
- phys = vzalloc(memslot->npages * sizeof(unsigned long));
- if (!phys)
+ if ((size >> PAGE_SHIFT) > totalram_pages())
return -ENOMEM;
- memslot->arch.slot_phys = phys;
+
+ new->arch.rmap = vzalloc(size);
+ if (!new->arch.rmap)
+ return -ENOMEM;
+ } else if (change != KVM_MR_DELETE) {
+ new->arch.rmap = old->arch.rmap;
}
return 0;
}
static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem,
- const struct kvm_memory_slot *old)
+ struct kvm_memory_slot *old,
+ const struct kvm_memory_slot *new,
+ enum kvm_mr_change change)
{
- unsigned long npages = mem->memory_size >> PAGE_SHIFT;
- struct kvm_memory_slot *memslot;
+ /*
+ * If we are creating or modifying a memslot, it might make
+ * some address that was previously cached as emulated
+ * MMIO be no longer emulated MMIO, so invalidate
+ * all the caches of emulated MMIO translations.
+ */
+ if (change != KVM_MR_DELETE)
+ atomic64_inc(&kvm->arch.mmio_update);
+
+ /*
+ * For change == KVM_MR_MOVE or KVM_MR_DELETE, higher levels
+ * have already called kvm_arch_flush_shadow_memslot() to
+ * flush shadow mappings. For KVM_MR_CREATE we have no
+ * previous mappings. So the only case to handle is
+ * KVM_MR_FLAGS_ONLY when the KVM_MEM_LOG_DIRTY_PAGES bit
+ * has been changed.
+ * For radix guests, we flush on setting KVM_MEM_LOG_DIRTY_PAGES
+ * to get rid of any THP PTEs in the partition-scoped page tables
+ * so we can track dirtiness at the page level; we flush when
+ * clearing KVM_MEM_LOG_DIRTY_PAGES so that we can go back to
+ * using THP PTEs.
+ */
+ if (change == KVM_MR_FLAGS_ONLY && kvm_is_radix(kvm) &&
+ ((new->flags ^ old->flags) & KVM_MEM_LOG_DIRTY_PAGES))
+ kvmppc_radix_flush_memslot(kvm, old);
+ /*
+ * If UV hasn't yet called H_SVM_INIT_START, don't register memslots.
+ */
+ if (!kvm->arch.secure_guest)
+ return;
- if (npages && old->npages) {
+ switch (change) {
+ case KVM_MR_CREATE:
/*
- * If modifying a memslot, reset all the rmap dirty bits.
- * If this is a new memslot, we don't need to do anything
- * since the rmap array starts out as all zeroes,
- * i.e. no pages are dirty.
+ * @TODO kvmppc_uvmem_memslot_create() can fail and
+ * return error. Fix this.
*/
- memslot = id_to_memslot(kvm->memslots, mem->slot);
- kvmppc_hv_get_dirty_log(kvm, memslot, NULL);
+ kvmppc_uvmem_memslot_create(kvm, new);
+ break;
+ case KVM_MR_DELETE:
+ kvmppc_uvmem_memslot_delete(kvm, old);
+ break;
+ default:
+ /* TODO: Handle KVM_MR_MOVE */
+ break;
}
}
/*
* Update LPCR values in kvm->arch and in vcores.
- * Caller must hold kvm->lock.
+ * Caller must hold kvm->arch.mmu_setup_lock (for mutual exclusion
+ * of kvm->arch.lpcr update).
*/
void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask)
{
@@ -2232,47 +5424,78 @@ void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr, unsigned long mask)
struct kvmppc_vcore *vc = kvm->arch.vcores[i];
if (!vc)
continue;
+
spin_lock(&vc->lock);
vc->lpcr = (vc->lpcr & ~mask) | lpcr;
+ verify_lpcr(kvm, vc->lpcr);
spin_unlock(&vc->lock);
if (++cores_done >= kvm->arch.online_vcores)
break;
}
+
+ if (kvmhv_is_nestedv2()) {
+ struct kvm_vcpu *vcpu;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_LPCR);
+ }
+ }
}
-static void kvmppc_mmu_destroy_hv(struct kvm_vcpu *vcpu)
+void kvmppc_setup_partition_table(struct kvm *kvm)
{
- return;
+ unsigned long dw0, dw1;
+
+ if (!kvm_is_radix(kvm)) {
+ /* PS field - page size for VRMA */
+ dw0 = ((kvm->arch.vrma_slb_v & SLB_VSID_L) >> 1) |
+ ((kvm->arch.vrma_slb_v & SLB_VSID_LP) << 1);
+ /* HTABSIZE and HTABORG fields */
+ dw0 |= kvm->arch.sdr1;
+
+ /* Second dword as set by userspace */
+ dw1 = kvm->arch.process_table;
+ } else {
+ dw0 = PATB_HR | radix__get_tree_size() |
+ __pa(kvm->arch.pgtable) | RADIX_PGD_INDEX_SIZE;
+ dw1 = PATB_GR | kvm->arch.process_table;
+ }
+ kvmhv_set_ptbl_entry(kvm->arch.lpid, dw0, dw1);
}
+/*
+ * Set up HPT (hashed page table) and RMA (real-mode area).
+ * Must be called with kvm->arch.mmu_setup_lock held.
+ */
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
{
int err = 0;
struct kvm *kvm = vcpu->kvm;
- struct kvm_rma_info *ri = NULL;
unsigned long hva;
struct kvm_memory_slot *memslot;
struct vm_area_struct *vma;
unsigned long lpcr = 0, senc;
- unsigned long lpcr_mask = 0;
unsigned long psize, porder;
- unsigned long rma_size;
- unsigned long rmls;
- unsigned long *physp;
- unsigned long i, npages;
int srcu_idx;
- mutex_lock(&kvm->lock);
- if (kvm->arch.rma_setup_done)
- goto out; /* another vcpu beat us to it */
-
/* Allocate hashed page table (if not done already) and reset it */
- if (!kvm->arch.hpt_virt) {
- err = kvmppc_alloc_hpt(kvm, NULL);
- if (err) {
+ if (!kvm->arch.hpt.virt) {
+ int order = KVM_DEFAULT_HPT_ORDER;
+ struct kvm_hpt_info info;
+
+ err = kvmppc_allocate_hpt(&info, order);
+ /* If we get here, it means userspace didn't specify a
+ * size explicitly. So, try successively smaller
+ * sizes if the default failed. */
+ while ((err == -ENOMEM) && --order >= PPC_MIN_HPT_ORDER)
+ err = kvmppc_allocate_hpt(&info, order);
+
+ if (err < 0) {
pr_err("KVM: Couldn't alloc HPT\n");
goto out;
}
+
+ kvmppc_set_hpt(kvm, &info);
}
/* Look up the memslot for guest physical address 0 */
@@ -2286,202 +5509,418 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
/* Look up the VMA for the start of this memory slot */
hva = memslot->userspace_addr;
- down_read(&current->mm->mmap_sem);
- vma = find_vma(current->mm, hva);
- if (!vma || vma->vm_start > hva || (vma->vm_flags & VM_IO))
+ mmap_read_lock(kvm->mm);
+ vma = vma_lookup(kvm->mm, hva);
+ if (!vma || (vma->vm_flags & VM_IO))
goto up_out;
psize = vma_kernel_pagesize(vma);
- porder = __ilog2(psize);
-
- /* Is this one of our preallocated RMAs? */
- if (vma->vm_file && vma->vm_file->f_op == &kvm_rma_fops &&
- hva == vma->vm_start)
- ri = vma->vm_file->private_data;
- up_read(&current->mm->mmap_sem);
+ mmap_read_unlock(kvm->mm);
- if (!ri) {
- /* On POWER7, use VRMA; on PPC970, give up */
- err = -EPERM;
- if (cpu_has_feature(CPU_FTR_ARCH_201)) {
- pr_err("KVM: CPU requires an RMO\n");
- goto out_srcu;
- }
+ /* We can handle 4k, 64k or 16M pages in the VRMA */
+ if (psize >= 0x1000000)
+ psize = 0x1000000;
+ else if (psize >= 0x10000)
+ psize = 0x10000;
+ else
+ psize = 0x1000;
+ porder = __ilog2(psize);
- /* We can handle 4k, 64k or 16M pages in the VRMA */
- err = -EINVAL;
- if (!(psize == 0x1000 || psize == 0x10000 ||
- psize == 0x1000000))
- goto out_srcu;
+ senc = slb_pgsize_encoding(psize);
+ kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
+ (VRMA_VSID << SLB_VSID_SHIFT_1T);
+ /* Create HPTEs in the hash page table for the VRMA */
+ kvmppc_map_vrma(vcpu, memslot, porder);
- /* Update VRMASD field in the LPCR */
- senc = slb_pgsize_encoding(psize);
- kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
- (VRMA_VSID << SLB_VSID_SHIFT_1T);
- lpcr_mask = LPCR_VRMASD;
+ /* Update VRMASD field in the LPCR */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
/* the -4 is to account for senc values starting at 0x10 */
lpcr = senc << (LPCR_VRMASD_SH - 4);
-
- /* Create HPTEs in the hash page table for the VRMA */
- kvmppc_map_vrma(vcpu, memslot, porder);
-
- } else {
- /* Set up to use an RMO region */
- rma_size = kvm_rma_pages;
- if (rma_size > memslot->npages)
- rma_size = memslot->npages;
- rma_size <<= PAGE_SHIFT;
- rmls = lpcr_rmls(rma_size);
- err = -EINVAL;
- if ((long)rmls < 0) {
- pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size);
- goto out_srcu;
- }
- atomic_inc(&ri->use_count);
- kvm->arch.rma = ri;
-
- /* Update LPCR and RMOR */
- if (cpu_has_feature(CPU_FTR_ARCH_201)) {
- /* PPC970; insert RMLS value (split field) in HID4 */
- lpcr_mask = (1ul << HID4_RMLS0_SH) |
- (3ul << HID4_RMLS2_SH) | HID4_RMOR;
- lpcr = ((rmls >> 2) << HID4_RMLS0_SH) |
- ((rmls & 3) << HID4_RMLS2_SH);
- /* RMOR is also in HID4 */
- lpcr |= ((ri->base_pfn >> (26 - PAGE_SHIFT)) & 0xffff)
- << HID4_RMOR_SH;
- } else {
- /* POWER7 */
- lpcr_mask = LPCR_VPM0 | LPCR_VRMA_L | LPCR_RMLS;
- lpcr = rmls << LPCR_RMLS_SH;
- kvm->arch.rmor = ri->base_pfn << PAGE_SHIFT;
- }
- pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
- ri->base_pfn << PAGE_SHIFT, rma_size, lpcr);
-
- /* Initialize phys addrs of pages in RMO */
- npages = kvm_rma_pages;
- porder = __ilog2(npages);
- physp = memslot->arch.slot_phys;
- if (physp) {
- if (npages > memslot->npages)
- npages = memslot->npages;
- spin_lock(&kvm->arch.slot_phys_lock);
- for (i = 0; i < npages; ++i)
- physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) +
- porder;
- spin_unlock(&kvm->arch.slot_phys_lock);
- }
+ kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD);
}
- kvmppc_update_lpcr(kvm, lpcr, lpcr_mask);
-
- /* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */
+ /* Order updates to kvm->arch.lpcr etc. vs. mmu_ready */
smp_wmb();
- kvm->arch.rma_setup_done = 1;
err = 0;
out_srcu:
srcu_read_unlock(&kvm->srcu, srcu_idx);
out:
- mutex_unlock(&kvm->lock);
return err;
up_out:
- up_read(&current->mm->mmap_sem);
+ mmap_read_unlock(kvm->mm);
goto out_srcu;
}
+/*
+ * Must be called with kvm->arch.mmu_setup_lock held and
+ * mmu_ready = 0 and no vcpus running.
+ */
+int kvmppc_switch_mmu_to_hpt(struct kvm *kvm)
+{
+ unsigned long lpcr, lpcr_mask;
+
+ if (nesting_enabled(kvm))
+ kvmhv_release_all_nested(kvm);
+ kvmppc_rmap_reset(kvm);
+ kvm->arch.process_table = 0;
+ /* Mutual exclusion with kvm_unmap_gfn_range etc. */
+ spin_lock(&kvm->mmu_lock);
+ kvm->arch.radix = 0;
+ spin_unlock(&kvm->mmu_lock);
+ kvmppc_free_radix(kvm);
+
+ lpcr = LPCR_VPM1;
+ lpcr_mask = LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR;
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ lpcr_mask |= LPCR_HAIL;
+ kvmppc_update_lpcr(kvm, lpcr, lpcr_mask);
+
+ return 0;
+}
+
+/*
+ * Must be called with kvm->arch.mmu_setup_lock held and
+ * mmu_ready = 0 and no vcpus running.
+ */
+int kvmppc_switch_mmu_to_radix(struct kvm *kvm)
+{
+ unsigned long lpcr, lpcr_mask;
+ int err;
+
+ err = kvmppc_init_vm_radix(kvm);
+ if (err)
+ return err;
+ kvmppc_rmap_reset(kvm);
+ /* Mutual exclusion with kvm_unmap_gfn_range etc. */
+ spin_lock(&kvm->mmu_lock);
+ kvm->arch.radix = 1;
+ spin_unlock(&kvm->mmu_lock);
+ kvmppc_free_hpt(&kvm->arch.hpt);
+
+ lpcr = LPCR_UPRT | LPCR_GTSE | LPCR_HR;
+ lpcr_mask = LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR;
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ lpcr_mask |= LPCR_HAIL;
+ if (cpu_has_feature(CPU_FTR_HVMODE) &&
+ (kvm->arch.host_lpcr & LPCR_HAIL))
+ lpcr |= LPCR_HAIL;
+ }
+ kvmppc_update_lpcr(kvm, lpcr, lpcr_mask);
+
+ return 0;
+}
+
+#ifdef CONFIG_KVM_XICS
+/*
+ * Allocate a per-core structure for managing state about which cores are
+ * running in the host versus the guest and for exchanging data between
+ * real mode KVM and CPU running in the host.
+ * This is only done for the first VM.
+ * The allocated structure stays even if all VMs have stopped.
+ * It is only freed when the kvm-hv module is unloaded.
+ * It's OK for this routine to fail, we just don't support host
+ * core operations like redirecting H_IPI wakeups.
+ */
+void kvmppc_alloc_host_rm_ops(void)
+{
+ struct kvmppc_host_rm_ops *ops;
+ unsigned long l_ops;
+ int cpu, core;
+ int size;
+
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ return;
+
+ /* Not the first time here ? */
+ if (kvmppc_host_rm_ops_hv != NULL)
+ return;
+
+ ops = kzalloc(sizeof(struct kvmppc_host_rm_ops), GFP_KERNEL);
+ if (!ops)
+ return;
+
+ size = cpu_nr_cores() * sizeof(struct kvmppc_host_rm_core);
+ ops->rm_core = kzalloc(size, GFP_KERNEL);
+
+ if (!ops->rm_core) {
+ kfree(ops);
+ return;
+ }
+
+ cpus_read_lock();
+
+ for (cpu = 0; cpu < nr_cpu_ids; cpu += threads_per_core) {
+ if (!cpu_online(cpu))
+ continue;
+
+ core = cpu >> threads_shift;
+ ops->rm_core[core].rm_state.in_host = 1;
+ }
+
+ ops->vcpu_kick = kvmppc_fast_vcpu_kick_hv;
+
+ /*
+ * Make the contents of the kvmppc_host_rm_ops structure visible
+ * to other CPUs before we assign it to the global variable.
+ * Do an atomic assignment (no locks used here), but if someone
+ * beats us to it, just free our copy and return.
+ */
+ smp_wmb();
+ l_ops = (unsigned long) ops;
+
+ if (cmpxchg64((unsigned long *)&kvmppc_host_rm_ops_hv, 0, l_ops)) {
+ cpus_read_unlock();
+ kfree(ops->rm_core);
+ kfree(ops);
+ return;
+ }
+
+ cpuhp_setup_state_nocalls_cpuslocked(CPUHP_KVM_PPC_BOOK3S_PREPARE,
+ "ppc/kvm_book3s:prepare",
+ kvmppc_set_host_core,
+ kvmppc_clear_host_core);
+ cpus_read_unlock();
+}
+
+void kvmppc_free_host_rm_ops(void)
+{
+ if (kvmppc_host_rm_ops_hv) {
+ cpuhp_remove_state_nocalls(CPUHP_KVM_PPC_BOOK3S_PREPARE);
+ kfree(kvmppc_host_rm_ops_hv->rm_core);
+ kfree(kvmppc_host_rm_ops_hv);
+ kvmppc_host_rm_ops_hv = NULL;
+ }
+}
+#endif
+
static int kvmppc_core_init_vm_hv(struct kvm *kvm)
{
unsigned long lpcr, lpid;
+ int ret;
+
+ mutex_init(&kvm->arch.uvmem_lock);
+ INIT_LIST_HEAD(&kvm->arch.uvmem_pfns);
+ mutex_init(&kvm->arch.mmu_setup_lock);
/* Allocate the guest's logical partition ID */
- lpid = kvmppc_alloc_lpid();
- if ((long)lpid < 0)
- return -ENOMEM;
- kvm->arch.lpid = lpid;
+ if (!kvmhv_is_nestedv2()) {
+ lpid = kvmppc_alloc_lpid();
+ if ((long)lpid < 0)
+ return -ENOMEM;
+ kvm->arch.lpid = lpid;
+ }
+
+ kvmppc_alloc_host_rm_ops();
+
+ kvmhv_vm_nested_init(kvm);
+
+ if (kvmhv_is_nestedv2()) {
+ long rc;
+ unsigned long guest_id;
+
+ rc = plpar_guest_create(0, &guest_id);
+
+ if (rc != H_SUCCESS)
+ pr_err("KVM: Create Guest hcall failed, rc=%ld\n", rc);
+
+ switch (rc) {
+ case H_PARAMETER:
+ case H_FUNCTION:
+ case H_STATE:
+ return -EINVAL;
+ case H_NOT_ENOUGH_RESOURCES:
+ case H_ABORTED:
+ return -ENOMEM;
+ case H_AUTHORITY:
+ return -EPERM;
+ case H_NOT_AVAILABLE:
+ return -EBUSY;
+ }
+ kvm->arch.lpid = guest_id;
+ }
+
/*
* Since we don't flush the TLB when tearing down a VM,
* and this lpid might have previously been used,
* make sure we flush on each core before running the new VM.
+ * On POWER9, the tlbie in mmu_partition_table_set_entry()
+ * does this flush for us.
*/
- cpumask_setall(&kvm->arch.need_tlb_flush);
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ cpumask_setall(&kvm->arch.need_tlb_flush);
/* Start out with the default set of hcalls enabled */
memcpy(kvm->arch.enabled_hcalls, default_enabled_hcalls,
sizeof(kvm->arch.enabled_hcalls));
- kvm->arch.rma = NULL;
-
- kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
- if (cpu_has_feature(CPU_FTR_ARCH_201)) {
- /* PPC970; HID4 is effectively the LPCR */
- kvm->arch.host_lpid = 0;
- kvm->arch.host_lpcr = lpcr = mfspr(SPRN_HID4);
- lpcr &= ~((3 << HID4_LPID1_SH) | (0xful << HID4_LPID5_SH));
- lpcr |= ((lpid >> 4) << HID4_LPID1_SH) |
- ((lpid & 0xf) << HID4_LPID5_SH);
- } else {
- /* POWER7; init LPCR for virtual RMA mode */
+ /* Init LPCR for virtual RMA mode */
+ if (cpu_has_feature(CPU_FTR_HVMODE)) {
kvm->arch.host_lpid = mfspr(SPRN_LPID);
kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR);
lpcr &= LPCR_PECE | LPCR_LPES;
- lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE |
- LPCR_VPM0 | LPCR_VPM1;
- kvm->arch.vrma_slb_v = SLB_VSID_B_1T |
- (VRMA_VSID << SLB_VSID_SHIFT_1T);
- /* On POWER8 turn on online bit to enable PURR/SPURR */
- if (cpu_has_feature(CPU_FTR_ARCH_207S))
- lpcr |= LPCR_ONL;
+ } else {
+ /*
+ * The L2 LPES mode will be set by the L0 according to whether
+ * or not it needs to take external interrupts in HV mode.
+ */
+ lpcr = 0;
}
+ lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE |
+ LPCR_VPM0 | LPCR_VPM1;
+ kvm->arch.vrma_slb_v = SLB_VSID_B_1T |
+ (VRMA_VSID << SLB_VSID_SHIFT_1T);
+ /* On POWER8 turn on online bit to enable PURR/SPURR */
+ if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ lpcr |= LPCR_ONL;
+ /*
+ * On POWER9, VPM0 bit is reserved (VPM0=1 behaviour is assumed)
+ * Set HVICE bit to enable hypervisor virtualization interrupts.
+ * Set HEIC to prevent OS interrupts to go to hypervisor (should
+ * be unnecessary but better safe than sorry in case we re-enable
+ * EE in HV mode with this LPCR still set)
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ lpcr &= ~LPCR_VPM0;
+ lpcr |= LPCR_HVICE | LPCR_HEIC;
+
+ /*
+ * If xive is enabled, we route 0x500 interrupts directly
+ * to the guest.
+ */
+ if (xics_on_xive())
+ lpcr |= LPCR_LPES;
+ }
+
+ /*
+ * If the host uses radix, the guest starts out as radix.
+ */
+ if (radix_enabled()) {
+ kvm->arch.radix = 1;
+ kvm->arch.mmu_ready = 1;
+ lpcr &= ~LPCR_VPM1;
+ lpcr |= LPCR_UPRT | LPCR_GTSE | LPCR_HR;
+ if (cpu_has_feature(CPU_FTR_HVMODE) &&
+ cpu_has_feature(CPU_FTR_ARCH_31) &&
+ (kvm->arch.host_lpcr & LPCR_HAIL))
+ lpcr |= LPCR_HAIL;
+ ret = kvmppc_init_vm_radix(kvm);
+ if (ret) {
+ if (kvmhv_is_nestedv2())
+ plpar_guest_delete(0, kvm->arch.lpid);
+ else
+ kvmppc_free_lpid(kvm->arch.lpid);
+ return ret;
+ }
+ kvmppc_setup_partition_table(kvm);
+ }
+
+ verify_lpcr(kvm, lpcr);
kvm->arch.lpcr = lpcr;
- kvm->arch.using_mmu_notifiers = !!cpu_has_feature(CPU_FTR_ARCH_206);
- spin_lock_init(&kvm->arch.slot_phys_lock);
+ /* Initialization for future HPT resizes */
+ kvm->arch.resize_hpt = NULL;
+
+ /*
+ * Work out how many sets the TLB has, for the use of
+ * the TLB invalidation loop in book3s_hv_rmhandlers.S.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ /*
+ * P10 will flush all the congruence class with a single tlbiel
+ */
+ kvm->arch.tlb_sets = 1;
+ } else if (radix_enabled())
+ kvm->arch.tlb_sets = POWER9_TLB_SETS_RADIX; /* 128 */
+ else if (cpu_has_feature(CPU_FTR_ARCH_300))
+ kvm->arch.tlb_sets = POWER9_TLB_SETS_HASH; /* 256 */
+ else if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ kvm->arch.tlb_sets = POWER8_TLB_SETS; /* 512 */
+ else
+ kvm->arch.tlb_sets = POWER7_TLB_SETS; /* 128 */
/*
* Track that we now have a HV mode VM active. This blocks secondary
* CPU threads from coming online.
*/
- kvm_hv_vm_activated();
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ kvm_hv_vm_activated();
+
+ /*
+ * Initialize smt_mode depending on processor.
+ * POWER8 and earlier have to use "strict" threading, where
+ * all vCPUs in a vcore have to run on the same (sub)core,
+ * whereas on POWER9 the threads can each run a different
+ * guest.
+ */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ kvm->arch.smt_mode = threads_per_subcore;
+ else
+ kvm->arch.smt_mode = 1;
+ kvm->arch.emul_smt_mode = 1;
return 0;
}
+static int kvmppc_arch_create_vm_debugfs_hv(struct kvm *kvm)
+{
+ kvmppc_mmu_debugfs_init(kvm);
+ if (radix_enabled())
+ kvmhv_radix_debugfs_init(kvm);
+ return 0;
+}
+
static void kvmppc_free_vcores(struct kvm *kvm)
{
long int i;
- for (i = 0; i < KVM_MAX_VCORES; ++i) {
- if (kvm->arch.vcores[i] && kvm->arch.vcores[i]->mpp_buffer) {
- struct kvmppc_vcore *vc = kvm->arch.vcores[i];
- free_pages((unsigned long)vc->mpp_buffer,
- MPP_BUFFER_ORDER);
- }
+ for (i = 0; i < KVM_MAX_VCORES; ++i)
kfree(kvm->arch.vcores[i]);
- }
kvm->arch.online_vcores = 0;
}
static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
{
- kvm_hv_vm_deactivated();
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ kvm_hv_vm_deactivated();
kvmppc_free_vcores(kvm);
- if (kvm->arch.rma) {
- kvm_release_rma(kvm->arch.rma);
- kvm->arch.rma = NULL;
+
+
+ if (kvm_is_radix(kvm))
+ kvmppc_free_radix(kvm);
+ else
+ kvmppc_free_hpt(&kvm->arch.hpt);
+
+ /* Perform global invalidation and return lpid to the pool */
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ if (nesting_enabled(kvm))
+ kvmhv_release_all_nested(kvm);
+ kvm->arch.process_table = 0;
+ if (kvm->arch.secure_guest)
+ uv_svm_terminate(kvm->arch.lpid);
+ if (!kvmhv_is_nestedv2())
+ kvmhv_set_ptbl_entry(kvm->arch.lpid, 0, 0);
}
- kvmppc_free_hpt(kvm);
+ if (kvmhv_is_nestedv2()) {
+ kvmhv_flush_lpid(kvm->arch.lpid);
+ plpar_guest_delete(0, kvm->arch.lpid);
+ } else {
+ kvmppc_free_lpid(kvm->arch.lpid);
+ }
+
+ kvmppc_free_pimap(kvm);
}
/* We don't need to emulate any privileged instructions or dcbz */
-static int kvmppc_core_emulate_op_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
+static int kvmppc_core_emulate_op_hv(struct kvm_vcpu *vcpu,
unsigned int inst, int *advance)
{
return EMULATE_FAIL;
@@ -2501,42 +5940,233 @@ static int kvmppc_core_emulate_mfspr_hv(struct kvm_vcpu *vcpu, int sprn,
static int kvmppc_core_check_processor_compat_hv(void)
{
- if (!cpu_has_feature(CPU_FTR_HVMODE))
+ if (cpu_has_feature(CPU_FTR_HVMODE) &&
+ cpu_has_feature(CPU_FTR_ARCH_206))
+ return 0;
+
+ /* POWER9 in radix mode is capable of being a nested hypervisor. */
+ if (cpu_has_feature(CPU_FTR_ARCH_300) && radix_enabled())
+ return 0;
+
+ return -EIO;
+}
+
+#ifdef CONFIG_KVM_XICS
+
+void kvmppc_free_pimap(struct kvm *kvm)
+{
+ kfree(kvm->arch.pimap);
+}
+
+static struct kvmppc_passthru_irqmap *kvmppc_alloc_pimap(void)
+{
+ return kzalloc(sizeof(struct kvmppc_passthru_irqmap), GFP_KERNEL);
+}
+
+static int kvmppc_set_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
+{
+ struct irq_desc *desc;
+ struct kvmppc_irq_map *irq_map;
+ struct kvmppc_passthru_irqmap *pimap;
+ struct irq_chip *chip;
+ int i, rc = 0;
+ struct irq_data *host_data;
+
+ if (!kvm_irq_bypass)
+ return 1;
+
+ desc = irq_to_desc(host_irq);
+ if (!desc)
return -EIO;
+
+ mutex_lock(&kvm->lock);
+
+ pimap = kvm->arch.pimap;
+ if (pimap == NULL) {
+ /* First call, allocate structure to hold IRQ map */
+ pimap = kvmppc_alloc_pimap();
+ if (pimap == NULL) {
+ mutex_unlock(&kvm->lock);
+ return -ENOMEM;
+ }
+ kvm->arch.pimap = pimap;
+ }
+
+ /*
+ * For now, we only support interrupts for which the EOI operation
+ * is an OPAL call followed by a write to XIRR, since that's
+ * what our real-mode EOI code does, or a XIVE interrupt
+ */
+ chip = irq_data_get_irq_chip(&desc->irq_data);
+ if (!chip || !is_pnv_opal_msi(chip)) {
+ pr_warn("kvmppc_set_passthru_irq_hv: Could not assign IRQ map for (%d,%d)\n",
+ host_irq, guest_gsi);
+ mutex_unlock(&kvm->lock);
+ return -ENOENT;
+ }
+
+ /*
+ * See if we already have an entry for this guest IRQ number.
+ * If it's mapped to a hardware IRQ number, that's an error,
+ * otherwise re-use this entry.
+ */
+ for (i = 0; i < pimap->n_mapped; i++) {
+ if (guest_gsi == pimap->mapped[i].v_hwirq) {
+ if (pimap->mapped[i].r_hwirq) {
+ mutex_unlock(&kvm->lock);
+ return -EINVAL;
+ }
+ break;
+ }
+ }
+
+ if (i == KVMPPC_PIRQ_MAPPED) {
+ mutex_unlock(&kvm->lock);
+ return -EAGAIN; /* table is full */
+ }
+
+ irq_map = &pimap->mapped[i];
+
+ irq_map->v_hwirq = guest_gsi;
+ irq_map->desc = desc;
+
+ /*
+ * Order the above two stores before the next to serialize with
+ * the KVM real mode handler.
+ */
+ smp_wmb();
+
+ /*
+ * The 'host_irq' number is mapped in the PCI-MSI domain but
+ * the underlying calls, which will EOI the interrupt in real
+ * mode, need an HW IRQ number mapped in the XICS IRQ domain.
+ */
+ host_data = irq_domain_get_irq_data(irq_get_default_domain(), host_irq);
+ irq_map->r_hwirq = (unsigned int)irqd_to_hwirq(host_data);
+
+ if (i == pimap->n_mapped)
+ pimap->n_mapped++;
+
+ if (xics_on_xive())
+ rc = kvmppc_xive_set_mapped(kvm, guest_gsi, host_irq);
+ else
+ kvmppc_xics_set_mapped(kvm, guest_gsi, irq_map->r_hwirq);
+ if (rc)
+ irq_map->r_hwirq = 0;
+
+ mutex_unlock(&kvm->lock);
+
return 0;
}
-static long kvm_arch_vm_ioctl_hv(struct file *filp,
- unsigned int ioctl, unsigned long arg)
+static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
{
- struct kvm *kvm __maybe_unused = filp->private_data;
- void __user *argp = (void __user *)arg;
- long r;
+ struct irq_desc *desc;
+ struct kvmppc_passthru_irqmap *pimap;
+ int i, rc = 0;
- switch (ioctl) {
+ if (!kvm_irq_bypass)
+ return 0;
- case KVM_ALLOCATE_RMA: {
- struct kvm_allocate_rma rma;
- struct kvm *kvm = filp->private_data;
+ desc = irq_to_desc(host_irq);
+ if (!desc)
+ return -EIO;
- r = kvm_vm_ioctl_allocate_rma(kvm, &rma);
- if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma)))
- r = -EFAULT;
- break;
+ mutex_lock(&kvm->lock);
+ if (!kvm->arch.pimap)
+ goto unlock;
+
+ pimap = kvm->arch.pimap;
+
+ for (i = 0; i < pimap->n_mapped; i++) {
+ if (guest_gsi == pimap->mapped[i].v_hwirq)
+ break;
}
+ if (i == pimap->n_mapped) {
+ mutex_unlock(&kvm->lock);
+ return -ENODEV;
+ }
+
+ if (xics_on_xive())
+ rc = kvmppc_xive_clr_mapped(kvm, guest_gsi, host_irq);
+ else
+ kvmppc_xics_clr_mapped(kvm, guest_gsi, pimap->mapped[i].r_hwirq);
+
+ /* invalidate the entry (what to do on error from the above ?) */
+ pimap->mapped[i].r_hwirq = 0;
+
+ /*
+ * We don't free this structure even when the count goes to
+ * zero. The structure is freed when we destroy the VM.
+ */
+ unlock:
+ mutex_unlock(&kvm->lock);
+ return rc;
+}
+
+static int kvmppc_irq_bypass_add_producer_hv(struct irq_bypass_consumer *cons,
+ struct irq_bypass_producer *prod)
+{
+ int ret = 0;
+ struct kvm_kernel_irqfd *irqfd =
+ container_of(cons, struct kvm_kernel_irqfd, consumer);
+
+ irqfd->producer = prod;
+
+ ret = kvmppc_set_passthru_irq(irqfd->kvm, prod->irq, irqfd->gsi);
+ if (ret)
+ pr_info("kvmppc_set_passthru_irq (irq %d, gsi %d) fails: %d\n",
+ prod->irq, irqfd->gsi, ret);
+
+ return ret;
+}
+
+static void kvmppc_irq_bypass_del_producer_hv(struct irq_bypass_consumer *cons,
+ struct irq_bypass_producer *prod)
+{
+ int ret;
+ struct kvm_kernel_irqfd *irqfd =
+ container_of(cons, struct kvm_kernel_irqfd, consumer);
+
+ irqfd->producer = NULL;
+
+ /*
+ * When producer of consumer is unregistered, we change back to
+ * default external interrupt handling mode - KVM real mode
+ * will switch back to host.
+ */
+ ret = kvmppc_clr_passthru_irq(irqfd->kvm, prod->irq, irqfd->gsi);
+ if (ret)
+ pr_warn("kvmppc_clr_passthru_irq (irq %d, gsi %d) fails: %d\n",
+ prod->irq, irqfd->gsi, ret);
+}
+#endif
+
+static int kvm_arch_vm_ioctl_hv(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ struct kvm *kvm __maybe_unused = filp->private_data;
+ void __user *argp = (void __user *)arg;
+ int r;
+
+ switch (ioctl) {
+
case KVM_PPC_ALLOCATE_HTAB: {
u32 htab_order;
+ /* If we're a nested hypervisor, we currently only support radix */
+ if (kvmhv_on_pseries()) {
+ r = -EOPNOTSUPP;
+ break;
+ }
+
r = -EFAULT;
if (get_user(htab_order, (u32 __user *)argp))
break;
- r = kvmppc_alloc_reset_hpt(kvm, &htab_order);
+ r = kvmppc_alloc_reset_hpt(kvm, htab_order);
if (r)
break;
- r = -EFAULT;
- if (put_user(htab_order, (u32 __user *)argp))
- break;
r = 0;
break;
}
@@ -2551,6 +6181,28 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp,
break;
}
+ case KVM_PPC_RESIZE_HPT_PREPARE: {
+ struct kvm_ppc_resize_hpt rhpt;
+
+ r = -EFAULT;
+ if (copy_from_user(&rhpt, argp, sizeof(rhpt)))
+ break;
+
+ r = kvm_vm_ioctl_resize_hpt_prepare(kvm, &rhpt);
+ break;
+ }
+
+ case KVM_PPC_RESIZE_HPT_COMMIT: {
+ struct kvm_ppc_resize_hpt rhpt;
+
+ r = -EFAULT;
+ if (copy_from_user(&rhpt, argp, sizeof(rhpt)))
+ break;
+
+ r = kvm_vm_ioctl_resize_hpt_commit(kvm, &rhpt);
+ break;
+ }
+
default:
r = -ENOTTY;
}
@@ -2570,8 +6222,10 @@ static unsigned int default_hcall_list[] = {
H_READ,
H_PROTECT,
H_BULK_REMOVE,
+#ifdef CONFIG_SPAPR_TCE_IOMMU
H_GET_TCE,
H_PUT_TCE,
+#endif
H_SET_DABR,
H_SET_XDABR,
H_CEDE,
@@ -2601,6 +6255,262 @@ static void init_default_hcalls(void)
}
}
+static int kvmhv_configure_mmu(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg)
+{
+ unsigned long lpcr;
+ int radix;
+ int err;
+
+ /* If not on a POWER9, reject it */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ return -ENODEV;
+
+ /* If any unknown flags set, reject it */
+ if (cfg->flags & ~(KVM_PPC_MMUV3_RADIX | KVM_PPC_MMUV3_GTSE))
+ return -EINVAL;
+
+ /* GR (guest radix) bit in process_table field must match */
+ radix = !!(cfg->flags & KVM_PPC_MMUV3_RADIX);
+ if (!!(cfg->process_table & PATB_GR) != radix)
+ return -EINVAL;
+
+ /* Process table size field must be reasonable, i.e. <= 24 */
+ if ((cfg->process_table & PRTS_MASK) > 24)
+ return -EINVAL;
+
+ /* We can change a guest to/from radix now, if the host is radix */
+ if (radix && !radix_enabled())
+ return -EINVAL;
+
+ /* If we're a nested hypervisor, we currently only support radix */
+ if (kvmhv_on_pseries() && !radix)
+ return -EINVAL;
+
+ mutex_lock(&kvm->arch.mmu_setup_lock);
+ if (radix != kvm_is_radix(kvm)) {
+ if (kvm->arch.mmu_ready) {
+ kvm->arch.mmu_ready = 0;
+ /* order mmu_ready vs. vcpus_running */
+ smp_mb();
+ if (atomic_read(&kvm->arch.vcpus_running)) {
+ kvm->arch.mmu_ready = 1;
+ err = -EBUSY;
+ goto out_unlock;
+ }
+ }
+ if (radix)
+ err = kvmppc_switch_mmu_to_radix(kvm);
+ else
+ err = kvmppc_switch_mmu_to_hpt(kvm);
+ if (err)
+ goto out_unlock;
+ }
+
+ kvm->arch.process_table = cfg->process_table;
+ kvmppc_setup_partition_table(kvm);
+
+ lpcr = (cfg->flags & KVM_PPC_MMUV3_GTSE) ? LPCR_GTSE : 0;
+ kvmppc_update_lpcr(kvm, lpcr, LPCR_GTSE);
+ err = 0;
+
+ out_unlock:
+ mutex_unlock(&kvm->arch.mmu_setup_lock);
+ return err;
+}
+
+static int kvmhv_enable_nested(struct kvm *kvm)
+{
+ if (!nested)
+ return -EPERM;
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ return -ENODEV;
+ if (!radix_enabled())
+ return -ENODEV;
+ if (kvmhv_is_nestedv2())
+ return -ENODEV;
+
+ /* kvm == NULL means the caller is testing if the capability exists */
+ if (kvm)
+ kvm->arch.nested_enable = true;
+ return 0;
+}
+
+static int kvmhv_load_from_eaddr(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr,
+ int size)
+{
+ int rc = -EINVAL;
+
+ if (kvmhv_vcpu_is_radix(vcpu)) {
+ rc = kvmhv_copy_from_guest_radix(vcpu, *eaddr, ptr, size);
+
+ if (rc > 0)
+ rc = -EINVAL;
+ }
+
+ /* For now quadrants are the only way to access nested guest memory */
+ if (rc && vcpu->arch.nested)
+ rc = -EAGAIN;
+
+ return rc;
+}
+
+static int kvmhv_store_to_eaddr(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr,
+ int size)
+{
+ int rc = -EINVAL;
+
+ if (kvmhv_vcpu_is_radix(vcpu)) {
+ rc = kvmhv_copy_to_guest_radix(vcpu, *eaddr, ptr, size);
+
+ if (rc > 0)
+ rc = -EINVAL;
+ }
+
+ /* For now quadrants are the only way to access nested guest memory */
+ if (rc && vcpu->arch.nested)
+ rc = -EAGAIN;
+
+ return rc;
+}
+
+static void unpin_vpa_reset(struct kvm *kvm, struct kvmppc_vpa *vpa)
+{
+ unpin_vpa(kvm, vpa);
+ vpa->gpa = 0;
+ vpa->pinned_addr = NULL;
+ vpa->dirty = false;
+ vpa->update_pending = 0;
+}
+
+/*
+ * Enable a guest to become a secure VM, or test whether
+ * that could be enabled.
+ * Called when the KVM_CAP_PPC_SECURE_GUEST capability is
+ * tested (kvm == NULL) or enabled (kvm != NULL).
+ */
+static int kvmhv_enable_svm(struct kvm *kvm)
+{
+ if (!kvmppc_uvmem_available())
+ return -EINVAL;
+ if (kvm)
+ kvm->arch.svm_enabled = 1;
+ return 0;
+}
+
+/*
+ * IOCTL handler to turn off secure mode of guest
+ *
+ * - Release all device pages
+ * - Issue ucall to terminate the guest on the UV side
+ * - Unpin the VPA pages.
+ * - Reinit the partition scoped page tables
+ */
+static int kvmhv_svm_off(struct kvm *kvm)
+{
+ struct kvm_vcpu *vcpu;
+ int mmu_was_ready;
+ int srcu_idx;
+ int ret = 0;
+ unsigned long i;
+
+ if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
+ return ret;
+
+ mutex_lock(&kvm->arch.mmu_setup_lock);
+ mmu_was_ready = kvm->arch.mmu_ready;
+ if (kvm->arch.mmu_ready) {
+ kvm->arch.mmu_ready = 0;
+ /* order mmu_ready vs. vcpus_running */
+ smp_mb();
+ if (atomic_read(&kvm->arch.vcpus_running)) {
+ kvm->arch.mmu_ready = 1;
+ ret = -EBUSY;
+ goto out;
+ }
+ }
+
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
+ struct kvm_memory_slot *memslot;
+ struct kvm_memslots *slots = __kvm_memslots(kvm, i);
+ int bkt;
+
+ if (!slots)
+ continue;
+
+ kvm_for_each_memslot(memslot, bkt, slots) {
+ kvmppc_uvmem_drop_pages(memslot, kvm, true);
+ uv_unregister_mem_slot(kvm->arch.lpid, memslot->id);
+ }
+ }
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+
+ ret = uv_svm_terminate(kvm->arch.lpid);
+ if (ret != U_SUCCESS) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * When secure guest is reset, all the guest pages are sent
+ * to UV via UV_PAGE_IN before the non-boot vcpus get a
+ * chance to run and unpin their VPA pages. Unpinning of all
+ * VPA pages is done here explicitly so that VPA pages
+ * can be migrated to the secure side.
+ *
+ * This is required to for the secure SMP guest to reboot
+ * correctly.
+ */
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ spin_lock(&vcpu->arch.vpa_update_lock);
+ unpin_vpa_reset(kvm, &vcpu->arch.dtl);
+ unpin_vpa_reset(kvm, &vcpu->arch.slb_shadow);
+ unpin_vpa_reset(kvm, &vcpu->arch.vpa);
+ spin_unlock(&vcpu->arch.vpa_update_lock);
+ }
+
+ kvmppc_setup_partition_table(kvm);
+ kvm->arch.secure_guest = 0;
+ kvm->arch.mmu_ready = mmu_was_ready;
+out:
+ mutex_unlock(&kvm->arch.mmu_setup_lock);
+ return ret;
+}
+
+static int kvmhv_enable_dawr1(struct kvm *kvm)
+{
+ if (!cpu_has_feature(CPU_FTR_DAWR1))
+ return -ENODEV;
+
+ /* kvm == NULL means the caller is testing if the capability exists */
+ if (kvm)
+ kvm->arch.dawr1_enabled = true;
+ return 0;
+}
+
+static bool kvmppc_hash_v3_possible(void)
+{
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ return false;
+
+ if (!cpu_has_feature(CPU_FTR_HVMODE))
+ return false;
+
+ /*
+ * POWER9 chips before version 2.02 can't have some threads in
+ * HPT mode and some in radix mode on the same core.
+ */
+ if (radix_enabled()) {
+ unsigned int pvr = mfspr(SPRN_PVR);
+ if ((pvr >> 16) == PVR_POWER9 &&
+ (((pvr & 0xe000) == 0 && (pvr & 0xfff) < 0x202) ||
+ ((pvr & 0xe000) == 0x2000 && (pvr & 0xfff) < 0x101)))
+ return false;
+ }
+
+ return true;
+}
+
static struct kvmppc_ops kvm_ops_hv = {
.get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv,
.set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv,
@@ -2608,6 +6518,7 @@ static struct kvmppc_ops kvm_ops_hv = {
.set_one_reg = kvmppc_set_one_reg_hv,
.vcpu_load = kvmppc_core_vcpu_load_hv,
.vcpu_put = kvmppc_core_vcpu_put_hv,
+ .inject_interrupt = kvmppc_inject_interrupt_hv,
.set_msr = kvmppc_set_msr_hv,
.vcpu_run = kvmppc_vcpu_run_hv,
.vcpu_create = kvmppc_core_vcpu_create_hv,
@@ -2617,14 +6528,10 @@ static struct kvmppc_ops kvm_ops_hv = {
.flush_memslot = kvmppc_core_flush_memslot_hv,
.prepare_memory_region = kvmppc_core_prepare_memory_region_hv,
.commit_memory_region = kvmppc_core_commit_memory_region_hv,
- .unmap_hva = kvm_unmap_hva_hv,
- .unmap_hva_range = kvm_unmap_hva_range_hv,
- .age_hva = kvm_age_hva_hv,
- .test_age_hva = kvm_test_age_hva_hv,
- .set_spte_hva = kvm_set_spte_hva_hv,
- .mmu_destroy = kvmppc_mmu_destroy_hv,
+ .unmap_gfn_range = kvm_unmap_gfn_range_hv,
+ .age_gfn = kvm_age_gfn_hv,
+ .test_age_gfn = kvm_test_age_gfn_hv,
.free_memslot = kvmppc_core_free_memslot_hv,
- .create_memslot = kvmppc_core_create_memslot_hv,
.init_vm = kvmppc_core_init_vm_hv,
.destroy_vm = kvmppc_core_destroy_vm_hv,
.get_smmu_info = kvm_vm_ioctl_get_smmu_info_hv,
@@ -2634,11 +6541,65 @@ static struct kvmppc_ops kvm_ops_hv = {
.fast_vcpu_kick = kvmppc_fast_vcpu_kick_hv,
.arch_vm_ioctl = kvm_arch_vm_ioctl_hv,
.hcall_implemented = kvmppc_hcall_impl_hv,
+ .configure_mmu = kvmhv_configure_mmu,
+ .get_rmmu_info = kvmhv_get_rmmu_info,
+ .set_smt_mode = kvmhv_set_smt_mode,
+ .enable_nested = kvmhv_enable_nested,
+ .load_from_eaddr = kvmhv_load_from_eaddr,
+ .store_to_eaddr = kvmhv_store_to_eaddr,
+ .enable_svm = kvmhv_enable_svm,
+ .svm_off = kvmhv_svm_off,
+ .enable_dawr1 = kvmhv_enable_dawr1,
+ .hash_v3_possible = kvmppc_hash_v3_possible,
+ .create_vcpu_debugfs = kvmppc_arch_create_vcpu_debugfs_hv,
+ .create_vm_debugfs = kvmppc_arch_create_vm_debugfs_hv,
};
+static int kvm_init_subcore_bitmap(void)
+{
+ int i, j;
+ int nr_cores = cpu_nr_cores();
+ struct sibling_subcore_state *sibling_subcore_state;
+
+ for (i = 0; i < nr_cores; i++) {
+ int first_cpu = i * threads_per_core;
+ int node = cpu_to_node(first_cpu);
+
+ /* Ignore if it is already allocated. */
+ if (paca_ptrs[first_cpu]->sibling_subcore_state)
+ continue;
+
+ sibling_subcore_state =
+ kzalloc_node(sizeof(struct sibling_subcore_state),
+ GFP_KERNEL, node);
+ if (!sibling_subcore_state)
+ return -ENOMEM;
+
+
+ for (j = 0; j < threads_per_core; j++) {
+ int cpu = first_cpu + j;
+
+ paca_ptrs[cpu]->sibling_subcore_state =
+ sibling_subcore_state;
+ }
+ }
+ return 0;
+}
+
+static int kvmppc_radix_possible(void)
+{
+ return cpu_has_feature(CPU_FTR_ARCH_300) && radix_enabled();
+}
+
static int kvmppc_book3s_init_hv(void)
{
int r;
+
+ if (!tlbie_capable) {
+ pr_err("KVM-HV: Host does not support TLBIE\n");
+ return -ENODEV;
+ }
+
/*
* FIXME!! Do we need to check on all cpus ?
*/
@@ -2646,22 +6607,98 @@ static int kvmppc_book3s_init_hv(void)
if (r < 0)
return -ENODEV;
- kvm_ops_hv.owner = THIS_MODULE;
- kvmppc_hv_ops = &kvm_ops_hv;
+ r = kvmhv_nested_init();
+ if (r)
+ return r;
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
+ r = kvm_init_subcore_bitmap();
+ if (r)
+ goto err;
+ }
+
+ /*
+ * We need a way of accessing the XICS interrupt controller,
+ * either directly, via paca_ptrs[cpu]->kvm_hstate.xics_phys, or
+ * indirectly, via OPAL.
+ */
+#ifdef CONFIG_SMP
+ if (!xics_on_xive() && !kvmhv_on_pseries() &&
+ !local_paca->kvm_hstate.xics_phys) {
+ struct device_node *np;
+
+ np = of_find_compatible_node(NULL, NULL, "ibm,opal-intc");
+ if (!np) {
+ pr_err("KVM-HV: Cannot determine method for accessing XICS\n");
+ r = -ENODEV;
+ goto err;
+ }
+ /* presence of intc confirmed - node can be dropped again */
+ of_node_put(np);
+ }
+#endif
init_default_hcalls();
+ init_vcore_lists();
+
r = kvmppc_mmu_hv_init();
+ if (r)
+ goto err;
+
+ if (kvmppc_radix_possible()) {
+ r = kvmppc_radix_init();
+ if (r)
+ goto err;
+ }
+
+ r = kvmppc_uvmem_init();
+ if (r < 0) {
+ pr_err("KVM-HV: kvmppc_uvmem_init failed %d\n", r);
+ return r;
+ }
+
+#if defined(CONFIG_KVM_XICS)
+ /*
+ * IRQ bypass is supported only for interrupts whose EOI operations are
+ * handled via OPAL calls. Therefore, register IRQ bypass handlers
+ * exclusively for PowerNV KVM when booted with 'xive=off', indicating
+ * the use of the emulated XICS interrupt controller.
+ */
+ if (!kvmhv_on_pseries()) {
+ pr_info("KVM-HV: Enabling IRQ bypass\n");
+ kvm_ops_hv.irq_bypass_add_producer =
+ kvmppc_irq_bypass_add_producer_hv;
+ kvm_ops_hv.irq_bypass_del_producer =
+ kvmppc_irq_bypass_del_producer_hv;
+ }
+#endif
+
+ kvm_ops_hv.owner = THIS_MODULE;
+ kvmppc_hv_ops = &kvm_ops_hv;
+
+ return 0;
+
+err:
+ kvmhv_nested_exit();
+ kvmppc_radix_exit();
+
return r;
}
static void kvmppc_book3s_exit_hv(void)
{
+ kvmppc_uvmem_free();
+ kvmppc_free_host_rm_ops();
+ if (kvmppc_radix_possible())
+ kvmppc_radix_exit();
kvmppc_hv_ops = NULL;
+ kvmhv_nested_exit();
}
module_init(kvmppc_book3s_init_hv);
module_exit(kvmppc_book3s_exit_hv);
+MODULE_DESCRIPTION("KVM on Book3S (POWER8 and later) in hypervisor mode");
MODULE_LICENSE("GPL");
MODULE_ALIAS_MISCDEV(KVM_MINOR);
MODULE_ALIAS("devname:kvm");
diff --git a/arch/powerpc/kvm/book3s_hv.h b/arch/powerpc/kvm/book3s_hv.h
new file mode 100644
index 000000000000..a404c9b221c1
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv.h
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Privileged (non-hypervisor) host registers to save.
+ */
+#include "asm/guest-state-buffer.h"
+
+struct p9_host_os_sprs {
+ unsigned long iamr;
+ unsigned long amr;
+
+ unsigned int pmc1;
+ unsigned int pmc2;
+ unsigned int pmc3;
+ unsigned int pmc4;
+ unsigned int pmc5;
+ unsigned int pmc6;
+ unsigned long mmcr0;
+ unsigned long mmcr1;
+ unsigned long mmcr2;
+ unsigned long mmcr3;
+ unsigned long mmcra;
+ unsigned long siar;
+ unsigned long sier1;
+ unsigned long sier2;
+ unsigned long sier3;
+ unsigned long sdar;
+};
+
+static inline bool nesting_enabled(struct kvm *kvm)
+{
+ return kvm->arch.nested_enable && kvm_is_radix(kvm);
+}
+
+bool load_vcpu_state(struct kvm_vcpu *vcpu,
+ struct p9_host_os_sprs *host_os_sprs);
+void store_vcpu_state(struct kvm_vcpu *vcpu);
+void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs);
+void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu,
+ struct p9_host_os_sprs *host_os_sprs);
+void switch_pmu_to_guest(struct kvm_vcpu *vcpu,
+ struct p9_host_os_sprs *host_os_sprs);
+void switch_pmu_to_host(struct kvm_vcpu *vcpu,
+ struct p9_host_os_sprs *host_os_sprs);
+
+#ifdef CONFIG_KVM_BOOK3S_HV_P9_TIMING
+void accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next);
+#define start_timing(vcpu, next) accumulate_time(vcpu, next)
+#define end_timing(vcpu) accumulate_time(vcpu, NULL)
+#else
+#define accumulate_time(vcpu, next) do {} while (0)
+#define start_timing(vcpu, next) do {} while (0)
+#define end_timing(vcpu) do {} while (0)
+#endif
+
+static inline void __kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 val)
+{
+ vcpu->arch.shregs.msr = val;
+ kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_MSR);
+}
+
+static inline u64 __kvmppc_get_msr_hv(struct kvm_vcpu *vcpu)
+{
+ WARN_ON(kvmhv_nestedv2_cached_reload(vcpu, KVMPPC_GSID_MSR) < 0);
+ return vcpu->arch.shregs.msr;
+}
+
+#define KVMPPC_BOOK3S_HV_VCPU_ACCESSOR_SET(reg, size, iden) \
+static inline void kvmppc_set_##reg ##_hv(struct kvm_vcpu *vcpu, u##size val) \
+{ \
+ vcpu->arch.reg = val; \
+ kvmhv_nestedv2_mark_dirty(vcpu, iden); \
+}
+
+#define KVMPPC_BOOK3S_HV_VCPU_ACCESSOR_GET(reg, size, iden) \
+static inline u##size kvmppc_get_##reg ##_hv(struct kvm_vcpu *vcpu) \
+{ \
+ kvmhv_nestedv2_cached_reload(vcpu, iden); \
+ return vcpu->arch.reg; \
+}
+
+#define KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(reg, size, iden) \
+ KVMPPC_BOOK3S_HV_VCPU_ACCESSOR_SET(reg, size, iden) \
+ KVMPPC_BOOK3S_HV_VCPU_ACCESSOR_GET(reg, size, iden) \
+
+#define KVMPPC_BOOK3S_HV_VCPU_ARRAY_ACCESSOR_SET(reg, size, iden) \
+static inline void kvmppc_set_##reg ##_hv(struct kvm_vcpu *vcpu, int i, u##size val) \
+{ \
+ vcpu->arch.reg[i] = val; \
+ kvmhv_nestedv2_mark_dirty(vcpu, iden(i)); \
+}
+
+#define KVMPPC_BOOK3S_HV_VCPU_ARRAY_ACCESSOR_GET(reg, size, iden) \
+static inline u##size kvmppc_get_##reg ##_hv(struct kvm_vcpu *vcpu, int i) \
+{ \
+ WARN_ON(kvmhv_nestedv2_cached_reload(vcpu, iden(i)) < 0); \
+ return vcpu->arch.reg[i]; \
+}
+
+#define KVMPPC_BOOK3S_HV_VCPU_ARRAY_ACCESSOR(reg, size, iden) \
+ KVMPPC_BOOK3S_HV_VCPU_ARRAY_ACCESSOR_SET(reg, size, iden) \
+ KVMPPC_BOOK3S_HV_VCPU_ARRAY_ACCESSOR_GET(reg, size, iden) \
+
+KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(mmcra, 64, KVMPPC_GSID_MMCRA)
+KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(hfscr, 64, KVMPPC_GSID_HFSCR)
+KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(fscr, 64, KVMPPC_GSID_FSCR)
+KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(dscr, 64, KVMPPC_GSID_DSCR)
+KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(purr, 64, KVMPPC_GSID_PURR)
+KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(spurr, 64, KVMPPC_GSID_SPURR)
+KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(amr, 64, KVMPPC_GSID_AMR)
+KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(uamor, 64, KVMPPC_GSID_UAMOR)
+KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(siar, 64, KVMPPC_GSID_SIAR)
+KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(sdar, 64, KVMPPC_GSID_SDAR)
+KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(iamr, 64, KVMPPC_GSID_IAMR)
+KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(dawr0, 64, KVMPPC_GSID_DAWR0)
+KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(dawr1, 64, KVMPPC_GSID_DAWR1)
+KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(dawrx0, 64, KVMPPC_GSID_DAWRX0)
+KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(dawrx1, 64, KVMPPC_GSID_DAWRX1)
+KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(dexcr, 64, KVMPPC_GSID_DEXCR)
+KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(hashkeyr, 64, KVMPPC_GSID_HASHKEYR)
+KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(hashpkeyr, 64, KVMPPC_GSID_HASHPKEYR)
+KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(ciabr, 64, KVMPPC_GSID_CIABR)
+KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(wort, 64, KVMPPC_GSID_WORT)
+KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(ppr, 64, KVMPPC_GSID_PPR)
+KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(ctrl, 64, KVMPPC_GSID_CTRL);
+
+KVMPPC_BOOK3S_HV_VCPU_ARRAY_ACCESSOR(mmcr, 64, KVMPPC_GSID_MMCR)
+KVMPPC_BOOK3S_HV_VCPU_ARRAY_ACCESSOR(sier, 64, KVMPPC_GSID_SIER)
+KVMPPC_BOOK3S_HV_VCPU_ARRAY_ACCESSOR(pmc, 32, KVMPPC_GSID_PMC)
+
+KVMPPC_BOOK3S_HV_VCPU_ACCESSOR(pspb, 32, KVMPPC_GSID_PSPB)
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 329d7fdd0a6a..fa0e3a22cac0 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
*/
#include <linux/cpu.h>
@@ -12,18 +9,31 @@
#include <linux/export.h>
#include <linux/sched.h>
#include <linux/spinlock.h>
-#include <linux/bootmem.h>
#include <linux/init.h>
#include <linux/memblock.h>
#include <linux/sizes.h>
#include <linux/cma.h>
+#include <linux/bitops.h>
#include <asm/cputable.h>
+#include <asm/interrupt.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
+#include <asm/machdep.h>
+#include <asm/xics.h>
+#include <asm/xive.h>
+#include <asm/dbell.h>
+#include <asm/cputhreads.h>
+#include <asm/io.h>
+#include <asm/opal.h>
+#include <asm/smp.h>
#define KVM_CMA_CHUNK_ORDER 18
+#include "book3s_xics.h"
+#include "book3s_xive.h"
+#include "book3s_hv.h"
+
/*
* Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
* should be power of 2.
@@ -33,95 +43,9 @@
* By default we reserve 5% of memory for hash pagetable allocation.
*/
static unsigned long kvm_cma_resv_ratio = 5;
-/*
- * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area.
- * Each RMA has to be physically contiguous and of a size that the
- * hardware supports. PPC970 and POWER7 support 64MB, 128MB and 256MB,
- * and other larger sizes. Since we are unlikely to be allocate that
- * much physically contiguous memory after the system is up and running,
- * we preallocate a set of RMAs in early boot using CMA.
- * should be power of 2.
- */
-unsigned long kvm_rma_pages = (1 << 27) >> PAGE_SHIFT; /* 128MB */
-EXPORT_SYMBOL_GPL(kvm_rma_pages);
static struct cma *kvm_cma;
-/* Work out RMLS (real mode limit selector) field value for a given RMA size.
- Assumes POWER7 or PPC970. */
-static inline int lpcr_rmls(unsigned long rma_size)
-{
- switch (rma_size) {
- case 32ul << 20: /* 32 MB */
- if (cpu_has_feature(CPU_FTR_ARCH_206))
- return 8; /* only supported on POWER7 */
- return -1;
- case 64ul << 20: /* 64 MB */
- return 3;
- case 128ul << 20: /* 128 MB */
- return 7;
- case 256ul << 20: /* 256 MB */
- return 4;
- case 1ul << 30: /* 1 GB */
- return 2;
- case 16ul << 30: /* 16 GB */
- return 1;
- case 256ul << 30: /* 256 GB */
- return 0;
- default:
- return -1;
- }
-}
-
-static int __init early_parse_rma_size(char *p)
-{
- unsigned long kvm_rma_size;
-
- pr_debug("%s(%s)\n", __func__, p);
- if (!p)
- return -EINVAL;
- kvm_rma_size = memparse(p, &p);
- /*
- * Check that the requested size is one supported in hardware
- */
- if (lpcr_rmls(kvm_rma_size) < 0) {
- pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
- return -EINVAL;
- }
- kvm_rma_pages = kvm_rma_size >> PAGE_SHIFT;
- return 0;
-}
-early_param("kvm_rma_size", early_parse_rma_size);
-
-struct kvm_rma_info *kvm_alloc_rma()
-{
- struct page *page;
- struct kvm_rma_info *ri;
-
- ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
- if (!ri)
- return NULL;
- page = cma_alloc(kvm_cma, kvm_rma_pages, get_order(kvm_rma_pages));
- if (!page)
- goto err_out;
- atomic_set(&ri->use_count, 1);
- ri->base_pfn = page_to_pfn(page);
- return ri;
-err_out:
- kfree(ri);
- return NULL;
-}
-EXPORT_SYMBOL_GPL(kvm_alloc_rma);
-
-void kvm_release_rma(struct kvm_rma_info *ri)
-{
- if (atomic_dec_and_test(&ri->use_count)) {
- cma_release(kvm_cma, pfn_to_page(ri->base_pfn), kvm_rma_pages);
- kfree(ri);
- }
-}
-EXPORT_SYMBOL_GPL(kvm_release_rma);
-
static int __init early_parse_kvm_cma_resv(char *p)
{
pr_debug("%s(%s)\n", __func__, p);
@@ -131,63 +55,81 @@ static int __init early_parse_kvm_cma_resv(char *p)
}
early_param("kvm_cma_resv_ratio", early_parse_kvm_cma_resv);
-struct page *kvm_alloc_hpt(unsigned long nr_pages)
+struct page *kvm_alloc_hpt_cma(unsigned long nr_pages)
{
- unsigned long align_pages = HPT_ALIGN_PAGES;
-
- VM_BUG_ON(get_order(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
+ VM_BUG_ON(order_base_2(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
- /* Old CPUs require HPT aligned on a multiple of its size */
- if (!cpu_has_feature(CPU_FTR_ARCH_206))
- align_pages = nr_pages;
- return cma_alloc(kvm_cma, nr_pages, get_order(align_pages));
+ return cma_alloc(kvm_cma, nr_pages, order_base_2(HPT_ALIGN_PAGES),
+ false);
}
-EXPORT_SYMBOL_GPL(kvm_alloc_hpt);
+EXPORT_SYMBOL_GPL(kvm_alloc_hpt_cma);
-void kvm_release_hpt(struct page *page, unsigned long nr_pages)
+void kvm_free_hpt_cma(struct page *page, unsigned long nr_pages)
{
cma_release(kvm_cma, page, nr_pages);
}
-EXPORT_SYMBOL_GPL(kvm_release_hpt);
+EXPORT_SYMBOL_GPL(kvm_free_hpt_cma);
/**
* kvm_cma_reserve() - reserve area for kvm hash pagetable
*
* This function reserves memory from early allocator. It should be
- * called by arch specific code once the early allocator (memblock or bootmem)
+ * called by arch specific code once the memblock allocator
* has been activated and all other subsystems have already allocated/reserved
* memory.
*/
void __init kvm_cma_reserve(void)
{
unsigned long align_size;
- struct memblock_region *reg;
- phys_addr_t selected_size = 0;
+ phys_addr_t selected_size;
+
/*
- * We cannot use memblock_phys_mem_size() here, because
- * memblock_analyze() has not been called yet.
+ * We need CMA reservation only when we are in HV mode
*/
- for_each_memblock(memory, reg)
- selected_size += memblock_region_memory_end_pfn(reg) -
- memblock_region_memory_base_pfn(reg);
+ if (!cpu_has_feature(CPU_FTR_HVMODE))
+ return;
- selected_size = (selected_size * kvm_cma_resv_ratio / 100) << PAGE_SHIFT;
+ selected_size = PAGE_ALIGN(memblock_phys_mem_size() * kvm_cma_resv_ratio / 100);
if (selected_size) {
- pr_debug("%s: reserving %ld MiB for global area\n", __func__,
+ pr_info("%s: reserving %ld MiB for global area\n", __func__,
(unsigned long)selected_size / SZ_1M);
- /*
- * Old CPUs require HPT aligned on a multiple of its size. So for them
- * make the alignment as max size we could request.
- */
- if (!cpu_has_feature(CPU_FTR_ARCH_206))
- align_size = __rounddown_pow_of_two(selected_size);
- else
- align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
-
- align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size);
+ align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
cma_declare_contiguous(0, selected_size, 0, align_size,
- KVM_CMA_CHUNK_ORDER - PAGE_SHIFT, false, &kvm_cma);
+ KVM_CMA_CHUNK_ORDER - PAGE_SHIFT, false, "kvm_cma",
+ &kvm_cma);
+ }
+}
+
+/*
+ * Real-mode H_CONFER implementation.
+ * We check if we are the only vcpu out of this virtual core
+ * still running in the guest and not ceded. If so, we pop up
+ * to the virtual-mode implementation; if not, just return to
+ * the guest.
+ */
+long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
+ unsigned int yield_count)
+{
+ struct kvmppc_vcore *vc = local_paca->kvm_hstate.kvm_vcore;
+ int ptid = local_paca->kvm_hstate.ptid;
+ int threads_running;
+ int threads_ceded;
+ int threads_conferring;
+ u64 stop = get_tb() + 10 * tb_ticks_per_usec;
+ int rv = H_SUCCESS; /* => don't yield */
+
+ set_bit(ptid, &vc->conferring_threads);
+ while ((get_tb() < stop) && !VCORE_IS_EXITING(vc)) {
+ threads_running = VCORE_ENTRY_MAP(vc);
+ threads_ceded = vc->napping_threads;
+ threads_conferring = vc->conferring_threads;
+ if ((threads_ceded | threads_conferring) == threads_running) {
+ rv = H_TOO_HARD; /* => do yield */
+ break;
+ }
}
+ clear_bit(ptid, &vc->conferring_threads);
+ return rv;
}
/*
@@ -195,23 +137,23 @@ void __init kvm_cma_reserve(void)
* exist in the system. We use a counter of VMs to track this.
*
* One of the operations we need to block is onlining of secondaries, so we
- * protect hv_vm_count with get/put_online_cpus().
+ * protect hv_vm_count with cpus_read_lock/unlock().
*/
static atomic_t hv_vm_count;
void kvm_hv_vm_activated(void)
{
- get_online_cpus();
+ cpus_read_lock();
atomic_inc(&hv_vm_count);
- put_online_cpus();
+ cpus_read_unlock();
}
EXPORT_SYMBOL_GPL(kvm_hv_vm_activated);
void kvm_hv_vm_deactivated(void)
{
- get_online_cpus();
+ cpus_read_lock();
atomic_dec(&hv_vm_count);
- put_online_cpus();
+ cpus_read_unlock();
}
EXPORT_SYMBOL_GPL(kvm_hv_vm_deactivated);
@@ -232,3 +174,456 @@ int kvmppc_hcall_impl_hv_realmode(unsigned long cmd)
return 0;
}
EXPORT_SYMBOL_GPL(kvmppc_hcall_impl_hv_realmode);
+
+int kvmppc_hwrng_present(void)
+{
+ return ppc_md.get_random_seed != NULL;
+}
+EXPORT_SYMBOL_GPL(kvmppc_hwrng_present);
+
+long kvmppc_rm_h_random(struct kvm_vcpu *vcpu)
+{
+ unsigned long rand;
+
+ if (ppc_md.get_random_seed &&
+ ppc_md.get_random_seed(&rand)) {
+ kvmppc_set_gpr(vcpu, 4, rand);
+ return H_SUCCESS;
+ }
+
+ return H_HARDWARE;
+}
+
+/*
+ * Send an interrupt or message to another CPU.
+ * The caller needs to include any barrier needed to order writes
+ * to memory vs. the IPI/message.
+ */
+void kvmhv_rm_send_ipi(int cpu)
+{
+ void __iomem *xics_phys;
+ unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
+
+ /* On POWER9 we can use msgsnd for any destination cpu. */
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ msg |= get_hard_smp_processor_id(cpu);
+ __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
+ return;
+ }
+
+ /* On POWER8 for IPIs to threads in the same core, use msgsnd. */
+ if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
+ cpu_first_thread_sibling(cpu) ==
+ cpu_first_thread_sibling(raw_smp_processor_id())) {
+ msg |= cpu_thread_in_core(cpu);
+ __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
+ return;
+ }
+
+ /* We should never reach this */
+ if (WARN_ON_ONCE(xics_on_xive()))
+ return;
+
+ /* Else poke the target with an IPI */
+ xics_phys = paca_ptrs[cpu]->kvm_hstate.xics_phys;
+ if (xics_phys)
+ __raw_rm_writeb(IPI_PRIORITY, xics_phys + XICS_MFRR);
+ else
+ opal_int_set_mfrr(get_hard_smp_processor_id(cpu), IPI_PRIORITY);
+}
+
+/*
+ * The following functions are called from the assembly code
+ * in book3s_hv_rmhandlers.S.
+ */
+static void kvmhv_interrupt_vcore(struct kvmppc_vcore *vc, int active)
+{
+ int cpu = vc->pcpu;
+
+ /* Order setting of exit map vs. msgsnd/IPI */
+ smp_mb();
+ for (; active; active >>= 1, ++cpu)
+ if (active & 1)
+ kvmhv_rm_send_ipi(cpu);
+}
+
+void kvmhv_commence_exit(int trap)
+{
+ struct kvmppc_vcore *vc = local_paca->kvm_hstate.kvm_vcore;
+ int ptid = local_paca->kvm_hstate.ptid;
+ struct kvm_split_mode *sip = local_paca->kvm_hstate.kvm_split_mode;
+ int me, ee, i;
+
+ /* Set our bit in the threads-exiting-guest map in the 0xff00
+ bits of vcore->entry_exit_map */
+ me = 0x100 << ptid;
+ do {
+ ee = vc->entry_exit_map;
+ } while (cmpxchg(&vc->entry_exit_map, ee, ee | me) != ee);
+
+ /* Are we the first here? */
+ if ((ee >> 8) != 0)
+ return;
+
+ /*
+ * Trigger the other threads in this vcore to exit the guest.
+ * If this is a hypervisor decrementer interrupt then they
+ * will be already on their way out of the guest.
+ */
+ if (trap != BOOK3S_INTERRUPT_HV_DECREMENTER)
+ kvmhv_interrupt_vcore(vc, ee & ~(1 << ptid));
+
+ /*
+ * If we are doing dynamic micro-threading, interrupt the other
+ * subcores to pull them out of their guests too.
+ */
+ if (!sip)
+ return;
+
+ for (i = 0; i < MAX_SUBCORES; ++i) {
+ vc = sip->vc[i];
+ if (!vc)
+ break;
+ do {
+ ee = vc->entry_exit_map;
+ /* Already asked to exit? */
+ if ((ee >> 8) != 0)
+ break;
+ } while (cmpxchg(&vc->entry_exit_map, ee,
+ ee | VCORE_EXIT_REQ) != ee);
+ if ((ee >> 8) == 0)
+ kvmhv_interrupt_vcore(vc, ee);
+ }
+}
+
+struct kvmppc_host_rm_ops *kvmppc_host_rm_ops_hv;
+EXPORT_SYMBOL_GPL(kvmppc_host_rm_ops_hv);
+
+#ifdef CONFIG_KVM_XICS
+static struct kvmppc_irq_map *get_irqmap(struct kvmppc_passthru_irqmap *pimap,
+ u32 xisr)
+{
+ int i;
+
+ /*
+ * We access the mapped array here without a lock. That
+ * is safe because we never reduce the number of entries
+ * in the array and we never change the v_hwirq field of
+ * an entry once it is set.
+ *
+ * We have also carefully ordered the stores in the writer
+ * and the loads here in the reader, so that if we find a matching
+ * hwirq here, the associated GSI and irq_desc fields are valid.
+ */
+ for (i = 0; i < pimap->n_mapped; i++) {
+ if (xisr == pimap->mapped[i].r_hwirq) {
+ /*
+ * Order subsequent reads in the caller to serialize
+ * with the writer.
+ */
+ smp_rmb();
+ return &pimap->mapped[i];
+ }
+ }
+ return NULL;
+}
+
+/*
+ * If we have an interrupt that's not an IPI, check if we have a
+ * passthrough adapter and if so, check if this external interrupt
+ * is for the adapter.
+ * We will attempt to deliver the IRQ directly to the target VCPU's
+ * ICP, the virtual ICP (based on affinity - the xive value in ICS).
+ *
+ * If the delivery fails or if this is not for a passthrough adapter,
+ * return to the host to handle this interrupt. We earlier
+ * saved a copy of the XIRR in the PACA, it will be picked up by
+ * the host ICP driver.
+ */
+static int kvmppc_check_passthru(u32 xisr, __be32 xirr, bool *again)
+{
+ struct kvmppc_passthru_irqmap *pimap;
+ struct kvmppc_irq_map *irq_map;
+ struct kvm_vcpu *vcpu;
+
+ vcpu = local_paca->kvm_hstate.kvm_vcpu;
+ if (!vcpu)
+ return 1;
+ pimap = kvmppc_get_passthru_irqmap(vcpu->kvm);
+ if (!pimap)
+ return 1;
+ irq_map = get_irqmap(pimap, xisr);
+ if (!irq_map)
+ return 1;
+
+ /* We're handling this interrupt, generic code doesn't need to */
+ local_paca->kvm_hstate.saved_xirr = 0;
+
+ return kvmppc_deliver_irq_passthru(vcpu, xirr, irq_map, pimap, again);
+}
+
+#else
+static inline int kvmppc_check_passthru(u32 xisr, __be32 xirr, bool *again)
+{
+ return 1;
+}
+#endif
+
+/*
+ * Determine what sort of external interrupt is pending (if any).
+ * Returns:
+ * 0 if no interrupt is pending
+ * 1 if an interrupt is pending that needs to be handled by the host
+ * 2 Passthrough that needs completion in the host
+ * -1 if there was a guest wakeup IPI (which has now been cleared)
+ * -2 if there is PCI passthrough external interrupt that was handled
+ */
+static long kvmppc_read_one_intr(bool *again);
+
+long kvmppc_read_intr(void)
+{
+ long ret = 0;
+ long rc;
+ bool again;
+
+ if (xive_enabled())
+ return 1;
+
+ do {
+ again = false;
+ rc = kvmppc_read_one_intr(&again);
+ if (rc && (ret == 0 || rc > ret))
+ ret = rc;
+ } while (again);
+ return ret;
+}
+
+static long kvmppc_read_one_intr(bool *again)
+{
+ void __iomem *xics_phys;
+ u32 h_xirr;
+ __be32 xirr;
+ u32 xisr;
+ u8 host_ipi;
+ int64_t rc;
+
+ if (xive_enabled())
+ return 1;
+
+ /* see if a host IPI is pending */
+ host_ipi = READ_ONCE(local_paca->kvm_hstate.host_ipi);
+ if (host_ipi)
+ return 1;
+
+ /* Now read the interrupt from the ICP */
+ xics_phys = local_paca->kvm_hstate.xics_phys;
+ rc = 0;
+ if (!xics_phys)
+ rc = opal_int_get_xirr(&xirr, false);
+ else
+ xirr = __raw_rm_readl(xics_phys + XICS_XIRR);
+ if (rc < 0)
+ return 1;
+
+ /*
+ * Save XIRR for later. Since we get control in reverse endian
+ * on LE systems, save it byte reversed and fetch it back in
+ * host endian. Note that xirr is the value read from the
+ * XIRR register, while h_xirr is the host endian version.
+ */
+ h_xirr = be32_to_cpu(xirr);
+ local_paca->kvm_hstate.saved_xirr = h_xirr;
+ xisr = h_xirr & 0xffffff;
+ /*
+ * Ensure that the store/load complete to guarantee all side
+ * effects of loading from XIRR has completed
+ */
+ smp_mb();
+
+ /* if nothing pending in the ICP */
+ if (!xisr)
+ return 0;
+
+ /* We found something in the ICP...
+ *
+ * If it is an IPI, clear the MFRR and EOI it.
+ */
+ if (xisr == XICS_IPI) {
+ rc = 0;
+ if (xics_phys) {
+ __raw_rm_writeb(0xff, xics_phys + XICS_MFRR);
+ __raw_rm_writel(xirr, xics_phys + XICS_XIRR);
+ } else {
+ opal_int_set_mfrr(hard_smp_processor_id(), 0xff);
+ rc = opal_int_eoi(h_xirr);
+ }
+ /* If rc > 0, there is another interrupt pending */
+ *again = rc > 0;
+
+ /*
+ * Need to ensure side effects of above stores
+ * complete before proceeding.
+ */
+ smp_mb();
+
+ /*
+ * We need to re-check host IPI now in case it got set in the
+ * meantime. If it's clear, we bounce the interrupt to the
+ * guest
+ */
+ host_ipi = READ_ONCE(local_paca->kvm_hstate.host_ipi);
+ if (unlikely(host_ipi != 0)) {
+ /* We raced with the host,
+ * we need to resend that IPI, bummer
+ */
+ if (xics_phys)
+ __raw_rm_writeb(IPI_PRIORITY,
+ xics_phys + XICS_MFRR);
+ else
+ opal_int_set_mfrr(hard_smp_processor_id(),
+ IPI_PRIORITY);
+ /* Let side effects complete */
+ smp_mb();
+ return 1;
+ }
+
+ /* OK, it's an IPI for us */
+ local_paca->kvm_hstate.saved_xirr = 0;
+ return -1;
+ }
+
+ return kvmppc_check_passthru(xisr, xirr, again);
+}
+
+static void kvmppc_end_cede(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.ceded = 0;
+ if (vcpu->arch.timer_running) {
+ hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
+ vcpu->arch.timer_running = 0;
+ }
+}
+
+void kvmppc_set_msr_hv(struct kvm_vcpu *vcpu, u64 msr)
+{
+ /* Guest must always run with ME enabled, HV disabled. */
+ msr = (msr | MSR_ME) & ~MSR_HV;
+
+ /*
+ * Check for illegal transactional state bit combination
+ * and if we find it, force the TS field to a safe state.
+ */
+ if ((msr & MSR_TS_MASK) == MSR_TS_MASK)
+ msr &= ~MSR_TS_MASK;
+ __kvmppc_set_msr_hv(vcpu, msr);
+ kvmppc_end_cede(vcpu);
+}
+EXPORT_SYMBOL_GPL(kvmppc_set_msr_hv);
+
+static void inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags)
+{
+ unsigned long msr, pc, new_msr, new_pc;
+
+ msr = kvmppc_get_msr(vcpu);
+ pc = kvmppc_get_pc(vcpu);
+ new_msr = vcpu->arch.intr_msr;
+ new_pc = vec;
+
+ /* If transactional, change to suspend mode on IRQ delivery */
+ if (MSR_TM_TRANSACTIONAL(msr))
+ new_msr |= MSR_TS_S;
+ else
+ new_msr |= msr & MSR_TS_MASK;
+
+ /*
+ * Perform MSR and PC adjustment for LPCR[AIL]=3 if it is set and
+ * applicable. AIL=2 is not supported.
+ *
+ * AIL does not apply to SRESET, MCE, or HMI (which is never
+ * delivered to the guest), and does not apply if IR=0 or DR=0.
+ */
+ if (vec != BOOK3S_INTERRUPT_SYSTEM_RESET &&
+ vec != BOOK3S_INTERRUPT_MACHINE_CHECK &&
+ (vcpu->arch.vcore->lpcr & LPCR_AIL) == LPCR_AIL_3 &&
+ (msr & (MSR_IR|MSR_DR)) == (MSR_IR|MSR_DR) ) {
+ new_msr |= MSR_IR | MSR_DR;
+ new_pc += 0xC000000000004000ULL;
+ }
+
+ kvmppc_set_srr0(vcpu, pc);
+ kvmppc_set_srr1(vcpu, (msr & SRR1_MSR_BITS) | srr1_flags);
+ kvmppc_set_pc(vcpu, new_pc);
+ __kvmppc_set_msr_hv(vcpu, new_msr);
+}
+
+void kvmppc_inject_interrupt_hv(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags)
+{
+ inject_interrupt(vcpu, vec, srr1_flags);
+ kvmppc_end_cede(vcpu);
+}
+EXPORT_SYMBOL_GPL(kvmppc_inject_interrupt_hv);
+
+/*
+ * Is there a PRIV_DOORBELL pending for the guest (on POWER9)?
+ * Can we inject a Decrementer or a External interrupt?
+ */
+void kvmppc_guest_entry_inject_int(struct kvm_vcpu *vcpu)
+{
+ int ext;
+ unsigned long lpcr;
+
+ WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300));
+
+ /* Insert EXTERNAL bit into LPCR at the MER bit position */
+ ext = (vcpu->arch.pending_exceptions >> BOOK3S_IRQPRIO_EXTERNAL) & 1;
+ lpcr = mfspr(SPRN_LPCR);
+ lpcr |= ext << LPCR_MER_SH;
+ mtspr(SPRN_LPCR, lpcr);
+ isync();
+
+ if (vcpu->arch.shregs.msr & MSR_EE) {
+ if (ext) {
+ inject_interrupt(vcpu, BOOK3S_INTERRUPT_EXTERNAL, 0);
+ } else {
+ long int dec = mfspr(SPRN_DEC);
+ if (!(lpcr & LPCR_LD))
+ dec = (int) dec;
+ if (dec < 0)
+ inject_interrupt(vcpu,
+ BOOK3S_INTERRUPT_DECREMENTER, 0);
+ }
+ }
+
+ if (vcpu->arch.doorbell_request) {
+ mtspr(SPRN_DPDES, 1);
+ vcpu->arch.vcore->dpdes = 1;
+ smp_wmb();
+ vcpu->arch.doorbell_request = 0;
+ }
+}
+
+static void flush_guest_tlb(struct kvm *kvm)
+{
+ unsigned long rb, set;
+
+ rb = PPC_BIT(52); /* IS = 2 */
+ for (set = 0; set < kvm->arch.tlb_sets; ++set) {
+ /* R=0 PRS=0 RIC=0 */
+ asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
+ : : "r" (rb), "i" (0), "i" (0), "i" (0),
+ "r" (0) : "memory");
+ rb += PPC_BIT(51); /* increment set number */
+ }
+ asm volatile("ptesync": : :"memory");
+}
+
+void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu)
+{
+ if (cpumask_test_cpu(pcpu, &kvm->arch.need_tlb_flush)) {
+ flush_guest_tlb(kvm);
+
+ /* Clear the bit after the TLB flush */
+ cpumask_clear_cpu(pcpu, &kvm->arch.need_tlb_flush);
+ }
+}
+EXPORT_SYMBOL_GPL(kvmppc_check_need_tlb_flush);
diff --git a/arch/powerpc/kvm/book3s_hv_hmi.c b/arch/powerpc/kvm/book3s_hv_hmi.c
new file mode 100644
index 000000000000..1ec50c69678b
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_hmi.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Hypervisor Maintenance Interrupt (HMI) handling.
+ *
+ * Copyright 2015 IBM Corporation
+ * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+ */
+
+#undef DEBUG
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <asm/paca.h>
+#include <asm/hmi.h>
+#include <asm/processor.h>
+
+void wait_for_subcore_guest_exit(void)
+{
+ int i;
+
+ /*
+ * NULL bitmap pointer indicates that KVM module hasn't
+ * been loaded yet and hence no guests are running, or running
+ * on POWER9 or newer CPU.
+ *
+ * If no KVM is in use, no need to co-ordinate among threads
+ * as all of them will always be in host and no one is going
+ * to modify TB other than the opal hmi handler.
+ *
+ * POWER9 and newer don't need this synchronisation.
+ *
+ * Hence, just return from here.
+ */
+ if (!local_paca->sibling_subcore_state)
+ return;
+
+ for (i = 0; i < MAX_SUBCORE_PER_CORE; i++)
+ while (local_paca->sibling_subcore_state->in_guest[i])
+ cpu_relax();
+}
+
+void wait_for_tb_resync(void)
+{
+ if (!local_paca->sibling_subcore_state)
+ return;
+
+ while (test_bit(CORE_TB_RESYNC_REQ_BIT,
+ &local_paca->sibling_subcore_state->flags))
+ cpu_relax();
+}
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index 731be7478b27..c0deeea7eef3 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -1,16 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
*
@@ -20,6 +9,7 @@
* Authors: Alexander Graf <agraf@suse.de>
*/
+#include <linux/linkage.h>
#include <asm/ppc_asm.h>
#include <asm/kvm_asm.h>
#include <asm/reg.h>
@@ -27,6 +17,8 @@
#include <asm/asm-offsets.h>
#include <asm/exception-64s.h>
#include <asm/ppc-opcode.h>
+#include <asm/asm-compat.h>
+#include <asm/feature-fixups.h>
/*****************************************************************************
* *
@@ -52,10 +44,8 @@ _GLOBAL(__kvmppc_vcore_entry)
std r3, _CCR(r1)
/* Save host DSCR */
-BEGIN_FTR_SECTION
mfspr r3, SPRN_DSCR
std r3, HSTATE_DSCR(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
BEGIN_FTR_SECTION
/* Save host DABR */
@@ -63,108 +53,28 @@ BEGIN_FTR_SECTION
std r3, HSTATE_DABR(r13)
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
- /* Hard-disable interrupts */
- mfmsr r10
- std r10, HSTATE_HOST_MSR(r13)
- rldicl r10,r10,48,1
- rotldi r10,r10,16
- mtmsrd r10,1
-
/* Save host PMU registers */
-BEGIN_FTR_SECTION
- /* Work around P8 PMAE bug */
- li r3, -1
- clrrdi r3, r3, 10
- mfspr r8, SPRN_MMCR2
- mtspr SPRN_MMCR2, r3 /* freeze all counters using MMCR2 */
- isync
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
- li r3, 1
- sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
- mfspr r7, SPRN_MMCR0 /* save MMCR0 */
- mtspr SPRN_MMCR0, r3 /* freeze all counters, disable interrupts */
- mfspr r6, SPRN_MMCRA
-BEGIN_FTR_SECTION
- /* On P7, clear MMCRA in order to disable SDAR updates */
- li r5, 0
- mtspr SPRN_MMCRA, r5
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
- isync
- ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */
- lbz r5, LPPACA_PMCINUSE(r3)
- cmpwi r5, 0
- beq 31f /* skip if not */
- mfspr r5, SPRN_MMCR1
- mfspr r9, SPRN_SIAR
- mfspr r10, SPRN_SDAR
- std r7, HSTATE_MMCR(r13)
- std r5, HSTATE_MMCR + 8(r13)
- std r6, HSTATE_MMCR + 16(r13)
- std r9, HSTATE_MMCR + 24(r13)
- std r10, HSTATE_MMCR + 32(r13)
-BEGIN_FTR_SECTION
- mfspr r9, SPRN_SIER
- std r8, HSTATE_MMCR + 40(r13)
- std r9, HSTATE_MMCR + 48(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
- mfspr r3, SPRN_PMC1
- mfspr r5, SPRN_PMC2
- mfspr r6, SPRN_PMC3
- mfspr r7, SPRN_PMC4
- mfspr r8, SPRN_PMC5
- mfspr r9, SPRN_PMC6
-BEGIN_FTR_SECTION
- mfspr r10, SPRN_PMC7
- mfspr r11, SPRN_PMC8
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
- stw r3, HSTATE_PMC(r13)
- stw r5, HSTATE_PMC + 4(r13)
- stw r6, HSTATE_PMC + 8(r13)
- stw r7, HSTATE_PMC + 12(r13)
- stw r8, HSTATE_PMC + 16(r13)
- stw r9, HSTATE_PMC + 20(r13)
-BEGIN_FTR_SECTION
- stw r10, HSTATE_PMC + 24(r13)
- stw r11, HSTATE_PMC + 28(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
-31:
+ bl kvmhv_save_host_pmu
/*
* Put whatever is in the decrementer into the
* hypervisor decrementer.
+ * Because of a hardware deviation in P8,
+ * we need to set LPCR[HDICE] before writing HDEC.
*/
+ ld r5, HSTATE_KVM_VCORE(r13)
+ ld r6, VCORE_KVM(r5)
+ ld r9, KVM_HOST_LPCR(r6)
+ ori r8, r9, LPCR_HDICE
+ mtspr SPRN_LPCR, r8
+ isync
mfspr r8,SPRN_DEC
mftb r7
- mtspr SPRN_HDEC,r8
extsw r8,r8
+ mtspr SPRN_HDEC,r8
add r8,r8,r7
std r8,HSTATE_DECEXP(r13)
-#ifdef CONFIG_SMP
- /*
- * On PPC970, if the guest vcpu has an external interrupt pending,
- * send ourselves an IPI so as to interrupt the guest once it
- * enables interrupts. (It must have interrupts disabled,
- * otherwise we would already have delivered the interrupt.)
- *
- * XXX If this is a UP build, smp_send_reschedule is not available,
- * so the interrupt will be delayed until the next time the vcpu
- * enters the guest with interrupts enabled.
- */
-BEGIN_FTR_SECTION
- ld r4, HSTATE_KVM_VCPU(r13)
- ld r0, VCPU_PENDING_EXC(r4)
- li r7, (1 << BOOK3S_IRQPRIO_EXTERNAL)
- oris r7, r7, (1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
- and. r0, r0, r7
- beq 32f
- lhz r3, PACAPACAINDEX(r13)
- bl smp_send_reschedule
- nop
-32:
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
-#endif /* CONFIG_SMP */
-
/* Jump to partition switch code */
bl kvmppc_hv_entry_trampoline
nop
@@ -172,7 +82,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
/*
* We return here in virtual mode after the guest exits
* with something that we can't handle in real mode.
- * Interrupts are enabled again at this point.
+ * Interrupts are still hard-disabled.
*/
/*
@@ -180,6 +90,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
*
* R1 = host R1
* R2 = host R2
+ * R3 = trap number on this thread
* R12 = exit handler id
* R13 = PACA
*/
@@ -193,3 +104,55 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
ld r0, PPC_LR_STKOFF(r1)
mtlr r0
blr
+
+/*
+ * void kvmhv_save_host_pmu(void)
+ */
+SYM_FUNC_START_LOCAL(kvmhv_save_host_pmu)
+BEGIN_FTR_SECTION
+ /* Work around P8 PMAE bug */
+ li r3, -1
+ clrrdi r3, r3, 10
+ mfspr r8, SPRN_MMCR2
+ mtspr SPRN_MMCR2, r3 /* freeze all counters using MMCR2 */
+ isync
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+ li r3, 1
+ sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
+ mfspr r7, SPRN_MMCR0 /* save MMCR0 */
+ mtspr SPRN_MMCR0, r3 /* freeze all counters, disable interrupts */
+ mfspr r6, SPRN_MMCRA
+ /* Clear MMCRA in order to disable SDAR updates */
+ li r5, 0
+ mtspr SPRN_MMCRA, r5
+ isync
+ lbz r5, PACA_PMCINUSE(r13) /* is the host using the PMU? */
+ cmpwi r5, 0
+ beq 31f /* skip if not */
+ mfspr r5, SPRN_MMCR1
+ mfspr r9, SPRN_SIAR
+ mfspr r10, SPRN_SDAR
+ std r7, HSTATE_MMCR0(r13)
+ std r5, HSTATE_MMCR1(r13)
+ std r6, HSTATE_MMCRA(r13)
+ std r9, HSTATE_SIAR(r13)
+ std r10, HSTATE_SDAR(r13)
+BEGIN_FTR_SECTION
+ mfspr r9, SPRN_SIER
+ std r8, HSTATE_MMCR2(r13)
+ std r9, HSTATE_SIER(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+ mfspr r3, SPRN_PMC1
+ mfspr r5, SPRN_PMC2
+ mfspr r6, SPRN_PMC3
+ mfspr r7, SPRN_PMC4
+ mfspr r8, SPRN_PMC5
+ mfspr r9, SPRN_PMC6
+ stw r3, HSTATE_PMC1(r13)
+ stw r5, HSTATE_PMC2(r13)
+ stw r6, HSTATE_PMC3(r13)
+ stw r7, HSTATE_PMC4(r13)
+ stw r8, HSTATE_PMC5(r13)
+ stw r9, HSTATE_PMC6(r13)
+31: blr
+SYM_FUNC_END(kvmhv_save_host_pmu)
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
new file mode 100644
index 000000000000..5f8c2321cfb5
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -0,0 +1,1714 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corporation, 2018
+ * Authors Suraj Jitindar Singh <sjitindarsingh@gmail.com>
+ * Paul Mackerras <paulus@ozlabs.org>
+ *
+ * Description: KVM functions specific to running nested KVM-HV guests
+ * on Book3S processors (specifically POWER9 and later).
+ */
+
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <linux/llist.h>
+#include <linux/pgtable.h>
+
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/mmu.h>
+#include <asm/pgalloc.h>
+#include <asm/pte-walk.h>
+#include <asm/reg.h>
+#include <asm/plpar_wrappers.h>
+#include <asm/firmware.h>
+
+static struct patb_entry *pseries_partition_tb;
+
+static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp);
+static void kvmhv_free_memslot_nest_rmap(struct kvm_memory_slot *free);
+
+void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr)
+{
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+
+ hr->pcr = vc->pcr | PCR_MASK;
+ hr->dpdes = vcpu->arch.doorbell_request;
+ hr->hfscr = vcpu->arch.hfscr;
+ hr->tb_offset = vc->tb_offset;
+ hr->dawr0 = vcpu->arch.dawr0;
+ hr->dawrx0 = vcpu->arch.dawrx0;
+ hr->ciabr = vcpu->arch.ciabr;
+ hr->purr = vcpu->arch.purr;
+ hr->spurr = vcpu->arch.spurr;
+ hr->ic = vcpu->arch.ic;
+ hr->vtb = vc->vtb;
+ hr->srr0 = vcpu->arch.shregs.srr0;
+ hr->srr1 = vcpu->arch.shregs.srr1;
+ hr->sprg[0] = vcpu->arch.shregs.sprg0;
+ hr->sprg[1] = vcpu->arch.shregs.sprg1;
+ hr->sprg[2] = vcpu->arch.shregs.sprg2;
+ hr->sprg[3] = vcpu->arch.shregs.sprg3;
+ hr->pidr = vcpu->arch.pid;
+ hr->cfar = vcpu->arch.cfar;
+ hr->ppr = vcpu->arch.ppr;
+ hr->dawr1 = vcpu->arch.dawr1;
+ hr->dawrx1 = vcpu->arch.dawrx1;
+}
+
+/* Use noinline_for_stack due to https://llvm.org/pr49610 */
+static noinline_for_stack void byteswap_pt_regs(struct pt_regs *regs)
+{
+ unsigned long *addr = (unsigned long *) regs;
+
+ for (; addr < ((unsigned long *) (regs + 1)); addr++)
+ *addr = swab64(*addr);
+}
+
+static void byteswap_hv_regs(struct hv_guest_state *hr)
+{
+ hr->version = swab64(hr->version);
+ hr->lpid = swab32(hr->lpid);
+ hr->vcpu_token = swab32(hr->vcpu_token);
+ hr->lpcr = swab64(hr->lpcr);
+ hr->pcr = swab64(hr->pcr) | PCR_MASK;
+ hr->amor = swab64(hr->amor);
+ hr->dpdes = swab64(hr->dpdes);
+ hr->hfscr = swab64(hr->hfscr);
+ hr->tb_offset = swab64(hr->tb_offset);
+ hr->dawr0 = swab64(hr->dawr0);
+ hr->dawrx0 = swab64(hr->dawrx0);
+ hr->ciabr = swab64(hr->ciabr);
+ hr->hdec_expiry = swab64(hr->hdec_expiry);
+ hr->purr = swab64(hr->purr);
+ hr->spurr = swab64(hr->spurr);
+ hr->ic = swab64(hr->ic);
+ hr->vtb = swab64(hr->vtb);
+ hr->hdar = swab64(hr->hdar);
+ hr->hdsisr = swab64(hr->hdsisr);
+ hr->heir = swab64(hr->heir);
+ hr->asdr = swab64(hr->asdr);
+ hr->srr0 = swab64(hr->srr0);
+ hr->srr1 = swab64(hr->srr1);
+ hr->sprg[0] = swab64(hr->sprg[0]);
+ hr->sprg[1] = swab64(hr->sprg[1]);
+ hr->sprg[2] = swab64(hr->sprg[2]);
+ hr->sprg[3] = swab64(hr->sprg[3]);
+ hr->pidr = swab64(hr->pidr);
+ hr->cfar = swab64(hr->cfar);
+ hr->ppr = swab64(hr->ppr);
+ hr->dawr1 = swab64(hr->dawr1);
+ hr->dawrx1 = swab64(hr->dawrx1);
+}
+
+static void save_hv_return_state(struct kvm_vcpu *vcpu,
+ struct hv_guest_state *hr)
+{
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+
+ hr->dpdes = vcpu->arch.doorbell_request;
+ hr->purr = vcpu->arch.purr;
+ hr->spurr = vcpu->arch.spurr;
+ hr->ic = vcpu->arch.ic;
+ hr->vtb = vc->vtb;
+ hr->srr0 = vcpu->arch.shregs.srr0;
+ hr->srr1 = vcpu->arch.shregs.srr1;
+ hr->sprg[0] = vcpu->arch.shregs.sprg0;
+ hr->sprg[1] = vcpu->arch.shregs.sprg1;
+ hr->sprg[2] = vcpu->arch.shregs.sprg2;
+ hr->sprg[3] = vcpu->arch.shregs.sprg3;
+ hr->pidr = vcpu->arch.pid;
+ hr->cfar = vcpu->arch.cfar;
+ hr->ppr = vcpu->arch.ppr;
+ switch (vcpu->arch.trap) {
+ case BOOK3S_INTERRUPT_H_DATA_STORAGE:
+ hr->hdar = vcpu->arch.fault_dar;
+ hr->hdsisr = vcpu->arch.fault_dsisr;
+ hr->asdr = vcpu->arch.fault_gpa;
+ break;
+ case BOOK3S_INTERRUPT_H_INST_STORAGE:
+ hr->asdr = vcpu->arch.fault_gpa;
+ break;
+ case BOOK3S_INTERRUPT_H_FAC_UNAVAIL:
+ hr->hfscr = ((~HFSCR_INTR_CAUSE & hr->hfscr) |
+ (HFSCR_INTR_CAUSE & vcpu->arch.hfscr));
+ break;
+ case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
+ hr->heir = vcpu->arch.emul_inst;
+ break;
+ }
+}
+
+static void restore_hv_regs(struct kvm_vcpu *vcpu, const struct hv_guest_state *hr)
+{
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+
+ vc->pcr = hr->pcr | PCR_MASK;
+ vcpu->arch.doorbell_request = hr->dpdes;
+ vcpu->arch.hfscr = hr->hfscr;
+ vcpu->arch.dawr0 = hr->dawr0;
+ vcpu->arch.dawrx0 = hr->dawrx0;
+ vcpu->arch.ciabr = hr->ciabr;
+ vcpu->arch.purr = hr->purr;
+ vcpu->arch.spurr = hr->spurr;
+ vcpu->arch.ic = hr->ic;
+ vc->vtb = hr->vtb;
+ vcpu->arch.shregs.srr0 = hr->srr0;
+ vcpu->arch.shregs.srr1 = hr->srr1;
+ vcpu->arch.shregs.sprg0 = hr->sprg[0];
+ vcpu->arch.shregs.sprg1 = hr->sprg[1];
+ vcpu->arch.shregs.sprg2 = hr->sprg[2];
+ vcpu->arch.shregs.sprg3 = hr->sprg[3];
+ vcpu->arch.pid = hr->pidr;
+ vcpu->arch.cfar = hr->cfar;
+ vcpu->arch.ppr = hr->ppr;
+ vcpu->arch.dawr1 = hr->dawr1;
+ vcpu->arch.dawrx1 = hr->dawrx1;
+}
+
+void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu,
+ struct hv_guest_state *hr)
+{
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+
+ /*
+ * This L2 vCPU might have received a doorbell while H_ENTER_NESTED was being handled.
+ * Make sure we preserve the doorbell if it was either:
+ * a) Sent after H_ENTER_NESTED was called on this vCPU (arch.doorbell_request would be 1)
+ * b) Doorbell was not handled and L2 exited for some other reason (hr->dpdes would be 1)
+ */
+ vcpu->arch.doorbell_request = vcpu->arch.doorbell_request | hr->dpdes;
+ vcpu->arch.hfscr = hr->hfscr;
+ vcpu->arch.purr = hr->purr;
+ vcpu->arch.spurr = hr->spurr;
+ vcpu->arch.ic = hr->ic;
+ vc->vtb = hr->vtb;
+ vcpu->arch.fault_dar = hr->hdar;
+ vcpu->arch.fault_dsisr = hr->hdsisr;
+ vcpu->arch.fault_gpa = hr->asdr;
+ vcpu->arch.emul_inst = hr->heir;
+ vcpu->arch.shregs.srr0 = hr->srr0;
+ vcpu->arch.shregs.srr1 = hr->srr1;
+ vcpu->arch.shregs.sprg0 = hr->sprg[0];
+ vcpu->arch.shregs.sprg1 = hr->sprg[1];
+ vcpu->arch.shregs.sprg2 = hr->sprg[2];
+ vcpu->arch.shregs.sprg3 = hr->sprg[3];
+ vcpu->arch.pid = hr->pidr;
+ vcpu->arch.cfar = hr->cfar;
+ vcpu->arch.ppr = hr->ppr;
+}
+
+static void kvmhv_nested_mmio_needed(struct kvm_vcpu *vcpu, u64 regs_ptr)
+{
+ /* No need to reflect the page fault to L1, we've handled it */
+ vcpu->arch.trap = 0;
+
+ /*
+ * Since the L2 gprs have already been written back into L1 memory when
+ * we complete the mmio, store the L1 memory location of the L2 gpr
+ * being loaded into by the mmio so that the loaded value can be
+ * written there in kvmppc_complete_mmio_load()
+ */
+ if (((vcpu->arch.io_gpr & KVM_MMIO_REG_EXT_MASK) == KVM_MMIO_REG_GPR)
+ && (vcpu->mmio_is_write == 0)) {
+ vcpu->arch.nested_io_gpr = (gpa_t) regs_ptr +
+ offsetof(struct pt_regs,
+ gpr[vcpu->arch.io_gpr]);
+ vcpu->arch.io_gpr = KVM_MMIO_REG_NESTED_GPR;
+ }
+}
+
+static int kvmhv_read_guest_state_and_regs(struct kvm_vcpu *vcpu,
+ struct hv_guest_state *l2_hv,
+ struct pt_regs *l2_regs,
+ u64 hv_ptr, u64 regs_ptr)
+{
+ int size;
+
+ if (kvm_vcpu_read_guest(vcpu, hv_ptr, &l2_hv->version,
+ sizeof(l2_hv->version)))
+ return -1;
+
+ if (kvmppc_need_byteswap(vcpu))
+ l2_hv->version = swab64(l2_hv->version);
+
+ size = hv_guest_state_size(l2_hv->version);
+ if (size < 0)
+ return -1;
+
+ return kvm_vcpu_read_guest(vcpu, hv_ptr, l2_hv, size) ||
+ kvm_vcpu_read_guest(vcpu, regs_ptr, l2_regs,
+ sizeof(struct pt_regs));
+}
+
+static int kvmhv_write_guest_state_and_regs(struct kvm_vcpu *vcpu,
+ struct hv_guest_state *l2_hv,
+ struct pt_regs *l2_regs,
+ u64 hv_ptr, u64 regs_ptr)
+{
+ int size;
+
+ size = hv_guest_state_size(l2_hv->version);
+ if (size < 0)
+ return -1;
+
+ return kvm_vcpu_write_guest(vcpu, hv_ptr, l2_hv, size) ||
+ kvm_vcpu_write_guest(vcpu, regs_ptr, l2_regs,
+ sizeof(struct pt_regs));
+}
+
+static void load_l2_hv_regs(struct kvm_vcpu *vcpu,
+ const struct hv_guest_state *l2_hv,
+ const struct hv_guest_state *l1_hv, u64 *lpcr)
+{
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+ u64 mask;
+
+ restore_hv_regs(vcpu, l2_hv);
+
+ /*
+ * Don't let L1 change LPCR bits for the L2 except these:
+ */
+ mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD | LPCR_MER;
+
+ /*
+ * Additional filtering is required depending on hardware
+ * and configuration.
+ */
+ *lpcr = kvmppc_filter_lpcr_hv(vcpu->kvm,
+ (vc->lpcr & ~mask) | (*lpcr & mask));
+
+ /*
+ * Don't let L1 enable features for L2 which we don't allow for L1,
+ * but preserve the interrupt cause field.
+ */
+ vcpu->arch.hfscr = l2_hv->hfscr & (HFSCR_INTR_CAUSE | vcpu->arch.hfscr_permitted);
+
+ /* Don't let data address watchpoint match in hypervisor state */
+ vcpu->arch.dawrx0 = l2_hv->dawrx0 & ~DAWRX_HYP;
+ vcpu->arch.dawrx1 = l2_hv->dawrx1 & ~DAWRX_HYP;
+
+ /* Don't let completed instruction address breakpt match in HV state */
+ if ((l2_hv->ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER)
+ vcpu->arch.ciabr = l2_hv->ciabr & ~CIABR_PRIV;
+}
+
+long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
+{
+ long int err, r;
+ struct kvm_nested_guest *l2;
+ struct pt_regs l2_regs, saved_l1_regs;
+ struct hv_guest_state l2_hv = {0}, saved_l1_hv;
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+ u64 hv_ptr, regs_ptr;
+ u64 hdec_exp, lpcr;
+ s64 delta_purr, delta_spurr, delta_ic, delta_vtb;
+
+ if (vcpu->kvm->arch.l1_ptcr == 0)
+ return H_NOT_AVAILABLE;
+
+ if (MSR_TM_TRANSACTIONAL(vcpu->arch.shregs.msr))
+ return H_BAD_MODE;
+
+ /* copy parameters in */
+ hv_ptr = kvmppc_get_gpr(vcpu, 4);
+ regs_ptr = kvmppc_get_gpr(vcpu, 5);
+ kvm_vcpu_srcu_read_lock(vcpu);
+ err = kvmhv_read_guest_state_and_regs(vcpu, &l2_hv, &l2_regs,
+ hv_ptr, regs_ptr);
+ kvm_vcpu_srcu_read_unlock(vcpu);
+ if (err)
+ return H_PARAMETER;
+
+ if (kvmppc_need_byteswap(vcpu))
+ byteswap_hv_regs(&l2_hv);
+ if (l2_hv.version > HV_GUEST_STATE_VERSION)
+ return H_P2;
+
+ if (kvmppc_need_byteswap(vcpu))
+ byteswap_pt_regs(&l2_regs);
+ if (l2_hv.vcpu_token >= NR_CPUS)
+ return H_PARAMETER;
+
+ /*
+ * L1 must have set up a suspended state to enter the L2 in a
+ * transactional state, and only in that case. These have to be
+ * filtered out here to prevent causing a TM Bad Thing in the
+ * host HRFID. We could synthesize a TM Bad Thing back to the L1
+ * here but there doesn't seem like much point.
+ */
+ if (MSR_TM_SUSPENDED(vcpu->arch.shregs.msr)) {
+ if (!MSR_TM_ACTIVE(l2_regs.msr))
+ return H_BAD_MODE;
+ } else {
+ if (l2_regs.msr & MSR_TS_MASK)
+ return H_BAD_MODE;
+ if (WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_TS_MASK))
+ return H_BAD_MODE;
+ }
+
+ /* translate lpid */
+ l2 = kvmhv_get_nested(vcpu->kvm, l2_hv.lpid, true);
+ if (!l2)
+ return H_PARAMETER;
+ if (!l2->l1_gr_to_hr) {
+ mutex_lock(&l2->tlb_lock);
+ kvmhv_update_ptbl_cache(l2);
+ mutex_unlock(&l2->tlb_lock);
+ }
+
+ /* save l1 values of things */
+ vcpu->arch.regs.msr = vcpu->arch.shregs.msr;
+ saved_l1_regs = vcpu->arch.regs;
+ kvmhv_save_hv_regs(vcpu, &saved_l1_hv);
+
+ /* convert TB values/offsets to host (L0) values */
+ hdec_exp = l2_hv.hdec_expiry - vc->tb_offset;
+ vc->tb_offset += l2_hv.tb_offset;
+ vcpu->arch.dec_expires += l2_hv.tb_offset;
+
+ /* set L1 state to L2 state */
+ vcpu->arch.nested = l2;
+ vcpu->arch.nested_vcpu_id = l2_hv.vcpu_token;
+ vcpu->arch.nested_hfscr = l2_hv.hfscr;
+ vcpu->arch.regs = l2_regs;
+
+ /* Guest must always run with ME enabled, HV disabled. */
+ vcpu->arch.shregs.msr = (vcpu->arch.regs.msr | MSR_ME) & ~MSR_HV;
+
+ lpcr = l2_hv.lpcr;
+ load_l2_hv_regs(vcpu, &l2_hv, &saved_l1_hv, &lpcr);
+
+ vcpu->arch.ret = RESUME_GUEST;
+ vcpu->arch.trap = 0;
+ do {
+ r = kvmhv_run_single_vcpu(vcpu, hdec_exp, lpcr);
+ } while (is_kvmppc_resume_guest(r));
+
+ /* save L2 state for return */
+ l2_regs = vcpu->arch.regs;
+ l2_regs.msr = vcpu->arch.shregs.msr;
+ delta_purr = vcpu->arch.purr - l2_hv.purr;
+ delta_spurr = vcpu->arch.spurr - l2_hv.spurr;
+ delta_ic = vcpu->arch.ic - l2_hv.ic;
+ delta_vtb = vc->vtb - l2_hv.vtb;
+ save_hv_return_state(vcpu, &l2_hv);
+
+ /* restore L1 state */
+ vcpu->arch.nested = NULL;
+ vcpu->arch.regs = saved_l1_regs;
+ vcpu->arch.shregs.msr = saved_l1_regs.msr & ~MSR_TS_MASK;
+ /* set L1 MSR TS field according to L2 transaction state */
+ if (l2_regs.msr & MSR_TS_MASK)
+ vcpu->arch.shregs.msr |= MSR_TS_S;
+ vc->tb_offset = saved_l1_hv.tb_offset;
+ /* XXX: is this always the same delta as saved_l1_hv.tb_offset? */
+ vcpu->arch.dec_expires -= l2_hv.tb_offset;
+ restore_hv_regs(vcpu, &saved_l1_hv);
+ vcpu->arch.purr += delta_purr;
+ vcpu->arch.spurr += delta_spurr;
+ vcpu->arch.ic += delta_ic;
+ vc->vtb += delta_vtb;
+
+ kvmhv_put_nested(l2);
+
+ /* copy l2_hv_state and regs back to guest */
+ if (kvmppc_need_byteswap(vcpu)) {
+ byteswap_hv_regs(&l2_hv);
+ byteswap_pt_regs(&l2_regs);
+ }
+ kvm_vcpu_srcu_read_lock(vcpu);
+ err = kvmhv_write_guest_state_and_regs(vcpu, &l2_hv, &l2_regs,
+ hv_ptr, regs_ptr);
+ kvm_vcpu_srcu_read_unlock(vcpu);
+ if (err)
+ return H_AUTHORITY;
+
+ if (r == -EINTR)
+ return H_INTERRUPT;
+
+ if (vcpu->mmio_needed) {
+ kvmhv_nested_mmio_needed(vcpu, regs_ptr);
+ return H_TOO_HARD;
+ }
+
+ return vcpu->arch.trap;
+}
+
+unsigned long nested_capabilities;
+
+long kvmhv_nested_init(void)
+{
+ long int ptb_order;
+ unsigned long ptcr, host_capabilities;
+ long rc;
+
+ if (!kvmhv_on_pseries())
+ return 0;
+ if (!radix_enabled())
+ return -ENODEV;
+
+ rc = plpar_guest_get_capabilities(0, &host_capabilities);
+ if (rc == H_SUCCESS) {
+ unsigned long capabilities = 0;
+
+ if (cpu_has_feature(CPU_FTR_P11_PVR))
+ capabilities |= H_GUEST_CAP_POWER11;
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ capabilities |= H_GUEST_CAP_POWER10;
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ capabilities |= H_GUEST_CAP_POWER9;
+
+ nested_capabilities = capabilities & host_capabilities;
+ rc = plpar_guest_set_capabilities(0, nested_capabilities);
+ if (rc != H_SUCCESS) {
+ pr_err("kvm-hv: Could not configure parent hypervisor capabilities (rc=%ld)",
+ rc);
+ return -ENODEV;
+ }
+
+ static_branch_enable(&__kvmhv_is_nestedv2);
+ return 0;
+ }
+
+ pr_info("kvm-hv: nestedv2 get capabilities hcall failed, falling back to nestedv1 (rc=%ld)\n",
+ rc);
+ /* Partition table entry is 1<<4 bytes in size, hence the 4. */
+ ptb_order = KVM_MAX_NESTED_GUESTS_SHIFT + 4;
+ /* Minimum partition table size is 1<<12 bytes */
+ if (ptb_order < 12)
+ ptb_order = 12;
+ pseries_partition_tb = kmalloc(sizeof(struct patb_entry) << ptb_order,
+ GFP_KERNEL);
+ if (!pseries_partition_tb) {
+ pr_err("kvm-hv: failed to allocated nested partition table\n");
+ return -ENOMEM;
+ }
+
+ ptcr = __pa(pseries_partition_tb) | (ptb_order - 12);
+ rc = plpar_hcall_norets(H_SET_PARTITION_TABLE, ptcr);
+ if (rc != H_SUCCESS) {
+ pr_err("kvm-hv: Parent hypervisor does not support nesting (rc=%ld)\n",
+ rc);
+ kfree(pseries_partition_tb);
+ pseries_partition_tb = NULL;
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+void kvmhv_nested_exit(void)
+{
+ /*
+ * N.B. the kvmhv_on_pseries() test is there because it enables
+ * the compiler to remove the call to plpar_hcall_norets()
+ * when CONFIG_PPC_PSERIES=n.
+ */
+ if (kvmhv_on_pseries() && pseries_partition_tb) {
+ plpar_hcall_norets(H_SET_PARTITION_TABLE, 0);
+ kfree(pseries_partition_tb);
+ pseries_partition_tb = NULL;
+ }
+}
+
+void kvmhv_flush_lpid(u64 lpid)
+{
+ long rc;
+
+ if (!kvmhv_on_pseries()) {
+ radix__flush_all_lpid(lpid);
+ return;
+ }
+
+ if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE))
+ rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(2, 0, 1),
+ lpid, TLBIEL_INVAL_SET_LPID);
+ else
+ rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU,
+ H_RPTI_TYPE_NESTED |
+ H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
+ H_RPTI_TYPE_PAT,
+ H_RPTI_PAGE_ALL, 0, -1UL);
+ if (rc)
+ pr_err("KVM: TLB LPID invalidation hcall failed, rc=%ld\n", rc);
+}
+
+void kvmhv_set_ptbl_entry(u64 lpid, u64 dw0, u64 dw1)
+{
+ if (!kvmhv_on_pseries()) {
+ mmu_partition_table_set_entry(lpid, dw0, dw1, true);
+ return;
+ }
+
+ if (kvmhv_is_nestedv1()) {
+ pseries_partition_tb[lpid].patb0 = cpu_to_be64(dw0);
+ pseries_partition_tb[lpid].patb1 = cpu_to_be64(dw1);
+ /* L0 will do the necessary barriers */
+ kvmhv_flush_lpid(lpid);
+ }
+
+ if (kvmhv_is_nestedv2())
+ kvmhv_nestedv2_set_ptbl_entry(lpid, dw0, dw1);
+}
+
+static void kvmhv_set_nested_ptbl(struct kvm_nested_guest *gp)
+{
+ unsigned long dw0;
+
+ dw0 = PATB_HR | radix__get_tree_size() |
+ __pa(gp->shadow_pgtable) | RADIX_PGD_INDEX_SIZE;
+ kvmhv_set_ptbl_entry(gp->shadow_lpid, dw0, gp->process_table);
+}
+
+/*
+ * Handle the H_SET_PARTITION_TABLE hcall.
+ * r4 = guest real address of partition table + log_2(size) - 12
+ * (formatted as for the PTCR).
+ */
+long kvmhv_set_partition_table(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+ unsigned long ptcr = kvmppc_get_gpr(vcpu, 4);
+ int srcu_idx;
+ long ret = H_SUCCESS;
+
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ /* Check partition size and base address. */
+ if ((ptcr & PRTS_MASK) + 12 - 4 > KVM_MAX_NESTED_GUESTS_SHIFT ||
+ !kvm_is_visible_gfn(vcpu->kvm, (ptcr & PRTB_MASK) >> PAGE_SHIFT))
+ ret = H_PARAMETER;
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ if (ret == H_SUCCESS)
+ kvm->arch.l1_ptcr = ptcr;
+
+ return ret;
+}
+
+/*
+ * Handle the H_COPY_TOFROM_GUEST hcall.
+ * r4 = L1 lpid of nested guest
+ * r5 = pid
+ * r6 = eaddr to access
+ * r7 = to buffer (L1 gpa)
+ * r8 = from buffer (L1 gpa)
+ * r9 = n bytes to copy
+ */
+long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu)
+{
+ struct kvm_nested_guest *gp;
+ int l1_lpid = kvmppc_get_gpr(vcpu, 4);
+ int pid = kvmppc_get_gpr(vcpu, 5);
+ gva_t eaddr = kvmppc_get_gpr(vcpu, 6);
+ gpa_t gp_to = (gpa_t) kvmppc_get_gpr(vcpu, 7);
+ gpa_t gp_from = (gpa_t) kvmppc_get_gpr(vcpu, 8);
+ void *buf;
+ unsigned long n = kvmppc_get_gpr(vcpu, 9);
+ bool is_load = !!gp_to;
+ long rc;
+
+ if (gp_to && gp_from) /* One must be NULL to determine the direction */
+ return H_PARAMETER;
+
+ if (eaddr & (0xFFFUL << 52))
+ return H_PARAMETER;
+
+ buf = kzalloc(n, GFP_KERNEL | __GFP_NOWARN);
+ if (!buf)
+ return H_NO_MEM;
+
+ gp = kvmhv_get_nested(vcpu->kvm, l1_lpid, false);
+ if (!gp) {
+ rc = H_PARAMETER;
+ goto out_free;
+ }
+
+ mutex_lock(&gp->tlb_lock);
+
+ if (is_load) {
+ /* Load from the nested guest into our buffer */
+ rc = __kvmhv_copy_tofrom_guest_radix(gp->shadow_lpid, pid,
+ eaddr, buf, NULL, n);
+ if (rc)
+ goto not_found;
+
+ /* Write what was loaded into our buffer back to the L1 guest */
+ kvm_vcpu_srcu_read_lock(vcpu);
+ rc = kvm_vcpu_write_guest(vcpu, gp_to, buf, n);
+ kvm_vcpu_srcu_read_unlock(vcpu);
+ if (rc)
+ goto not_found;
+ } else {
+ /* Load the data to be stored from the L1 guest into our buf */
+ kvm_vcpu_srcu_read_lock(vcpu);
+ rc = kvm_vcpu_read_guest(vcpu, gp_from, buf, n);
+ kvm_vcpu_srcu_read_unlock(vcpu);
+ if (rc)
+ goto not_found;
+
+ /* Store from our buffer into the nested guest */
+ rc = __kvmhv_copy_tofrom_guest_radix(gp->shadow_lpid, pid,
+ eaddr, NULL, buf, n);
+ if (rc)
+ goto not_found;
+ }
+
+out_unlock:
+ mutex_unlock(&gp->tlb_lock);
+ kvmhv_put_nested(gp);
+out_free:
+ kfree(buf);
+ return rc;
+not_found:
+ rc = H_NOT_FOUND;
+ goto out_unlock;
+}
+
+/*
+ * Reload the partition table entry for a guest.
+ * Caller must hold gp->tlb_lock.
+ */
+static void kvmhv_update_ptbl_cache(struct kvm_nested_guest *gp)
+{
+ int ret;
+ struct patb_entry ptbl_entry;
+ unsigned long ptbl_addr;
+ struct kvm *kvm = gp->l1_host;
+
+ ret = -EFAULT;
+ ptbl_addr = (kvm->arch.l1_ptcr & PRTB_MASK) + (gp->l1_lpid << 4);
+ if (gp->l1_lpid < (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 12 - 4))) {
+ int srcu_idx = srcu_read_lock(&kvm->srcu);
+ ret = kvm_read_guest(kvm, ptbl_addr,
+ &ptbl_entry, sizeof(ptbl_entry));
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ }
+ if (ret) {
+ gp->l1_gr_to_hr = 0;
+ gp->process_table = 0;
+ } else {
+ gp->l1_gr_to_hr = be64_to_cpu(ptbl_entry.patb0);
+ gp->process_table = be64_to_cpu(ptbl_entry.patb1);
+ }
+ kvmhv_set_nested_ptbl(gp);
+}
+
+void kvmhv_vm_nested_init(struct kvm *kvm)
+{
+ idr_init(&kvm->arch.kvm_nested_guest_idr);
+}
+
+static struct kvm_nested_guest *__find_nested(struct kvm *kvm, int lpid)
+{
+ return idr_find(&kvm->arch.kvm_nested_guest_idr, lpid);
+}
+
+static bool __prealloc_nested(struct kvm *kvm, int lpid)
+{
+ if (idr_alloc(&kvm->arch.kvm_nested_guest_idr,
+ NULL, lpid, lpid + 1, GFP_KERNEL) != lpid)
+ return false;
+ return true;
+}
+
+static void __add_nested(struct kvm *kvm, int lpid, struct kvm_nested_guest *gp)
+{
+ if (idr_replace(&kvm->arch.kvm_nested_guest_idr, gp, lpid))
+ WARN_ON(1);
+}
+
+static void __remove_nested(struct kvm *kvm, int lpid)
+{
+ idr_remove(&kvm->arch.kvm_nested_guest_idr, lpid);
+}
+
+static struct kvm_nested_guest *kvmhv_alloc_nested(struct kvm *kvm, unsigned int lpid)
+{
+ struct kvm_nested_guest *gp;
+ long shadow_lpid;
+
+ gp = kzalloc(sizeof(*gp), GFP_KERNEL);
+ if (!gp)
+ return NULL;
+ gp->l1_host = kvm;
+ gp->l1_lpid = lpid;
+ mutex_init(&gp->tlb_lock);
+ gp->shadow_pgtable = pgd_alloc(kvm->mm);
+ if (!gp->shadow_pgtable)
+ goto out_free;
+ shadow_lpid = kvmppc_alloc_lpid();
+ if (shadow_lpid < 0)
+ goto out_free2;
+ gp->shadow_lpid = shadow_lpid;
+ gp->radix = 1;
+
+ memset(gp->prev_cpu, -1, sizeof(gp->prev_cpu));
+
+ return gp;
+
+ out_free2:
+ pgd_free(kvm->mm, gp->shadow_pgtable);
+ out_free:
+ kfree(gp);
+ return NULL;
+}
+
+/*
+ * Free up any resources allocated for a nested guest.
+ */
+static void kvmhv_release_nested(struct kvm_nested_guest *gp)
+{
+ struct kvm *kvm = gp->l1_host;
+
+ if (gp->shadow_pgtable) {
+ /*
+ * No vcpu is using this struct and no call to
+ * kvmhv_get_nested can find this struct,
+ * so we don't need to hold kvm->mmu_lock.
+ */
+ kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable,
+ gp->shadow_lpid);
+ pgd_free(kvm->mm, gp->shadow_pgtable);
+ }
+ kvmhv_set_ptbl_entry(gp->shadow_lpid, 0, 0);
+ kvmppc_free_lpid(gp->shadow_lpid);
+ kfree(gp);
+}
+
+static void kvmhv_remove_nested(struct kvm_nested_guest *gp)
+{
+ struct kvm *kvm = gp->l1_host;
+ int lpid = gp->l1_lpid;
+ long ref;
+
+ spin_lock(&kvm->mmu_lock);
+ if (gp == __find_nested(kvm, lpid)) {
+ __remove_nested(kvm, lpid);
+ --gp->refcnt;
+ }
+ ref = gp->refcnt;
+ spin_unlock(&kvm->mmu_lock);
+ if (ref == 0)
+ kvmhv_release_nested(gp);
+}
+
+/*
+ * Free up all nested resources allocated for this guest.
+ * This is called with no vcpus of the guest running, when
+ * switching the guest to HPT mode or when destroying the
+ * guest.
+ */
+void kvmhv_release_all_nested(struct kvm *kvm)
+{
+ int lpid;
+ struct kvm_nested_guest *gp;
+ struct kvm_nested_guest *freelist = NULL;
+ struct kvm_memory_slot *memslot;
+ int srcu_idx, bkt;
+
+ spin_lock(&kvm->mmu_lock);
+ idr_for_each_entry(&kvm->arch.kvm_nested_guest_idr, gp, lpid) {
+ __remove_nested(kvm, lpid);
+ if (--gp->refcnt == 0) {
+ gp->next = freelist;
+ freelist = gp;
+ }
+ }
+ idr_destroy(&kvm->arch.kvm_nested_guest_idr);
+ /* idr is empty and may be reused at this point */
+ spin_unlock(&kvm->mmu_lock);
+ while ((gp = freelist) != NULL) {
+ freelist = gp->next;
+ kvmhv_release_nested(gp);
+ }
+
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ kvm_for_each_memslot(memslot, bkt, kvm_memslots(kvm))
+ kvmhv_free_memslot_nest_rmap(memslot);
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+}
+
+/* caller must hold gp->tlb_lock */
+static void kvmhv_flush_nested(struct kvm_nested_guest *gp)
+{
+ struct kvm *kvm = gp->l1_host;
+
+ spin_lock(&kvm->mmu_lock);
+ kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable, gp->shadow_lpid);
+ spin_unlock(&kvm->mmu_lock);
+ kvmhv_flush_lpid(gp->shadow_lpid);
+ kvmhv_update_ptbl_cache(gp);
+ if (gp->l1_gr_to_hr == 0)
+ kvmhv_remove_nested(gp);
+}
+
+struct kvm_nested_guest *kvmhv_get_nested(struct kvm *kvm, int l1_lpid,
+ bool create)
+{
+ struct kvm_nested_guest *gp, *newgp;
+
+ if (l1_lpid >= (1ul << ((kvm->arch.l1_ptcr & PRTS_MASK) + 12 - 4)))
+ return NULL;
+
+ spin_lock(&kvm->mmu_lock);
+ gp = __find_nested(kvm, l1_lpid);
+ if (gp)
+ ++gp->refcnt;
+ spin_unlock(&kvm->mmu_lock);
+
+ if (gp || !create)
+ return gp;
+
+ newgp = kvmhv_alloc_nested(kvm, l1_lpid);
+ if (!newgp)
+ return NULL;
+
+ if (!__prealloc_nested(kvm, l1_lpid)) {
+ kvmhv_release_nested(newgp);
+ return NULL;
+ }
+
+ spin_lock(&kvm->mmu_lock);
+ gp = __find_nested(kvm, l1_lpid);
+ if (!gp) {
+ __add_nested(kvm, l1_lpid, newgp);
+ ++newgp->refcnt;
+ gp = newgp;
+ newgp = NULL;
+ }
+ ++gp->refcnt;
+ spin_unlock(&kvm->mmu_lock);
+
+ if (newgp)
+ kvmhv_release_nested(newgp);
+
+ return gp;
+}
+
+void kvmhv_put_nested(struct kvm_nested_guest *gp)
+{
+ struct kvm *kvm = gp->l1_host;
+ long ref;
+
+ spin_lock(&kvm->mmu_lock);
+ ref = --gp->refcnt;
+ spin_unlock(&kvm->mmu_lock);
+ if (ref == 0)
+ kvmhv_release_nested(gp);
+}
+
+pte_t *find_kvm_nested_guest_pte(struct kvm *kvm, unsigned long lpid,
+ unsigned long ea, unsigned *hshift)
+{
+ struct kvm_nested_guest *gp;
+ pte_t *pte;
+
+ gp = __find_nested(kvm, lpid);
+ if (!gp)
+ return NULL;
+
+ VM_WARN(!spin_is_locked(&kvm->mmu_lock),
+ "%s called with kvm mmu_lock not held \n", __func__);
+ pte = __find_linux_pte(gp->shadow_pgtable, ea, NULL, hshift);
+
+ return pte;
+}
+
+static inline bool kvmhv_n_rmap_is_equal(u64 rmap_1, u64 rmap_2)
+{
+ return !((rmap_1 ^ rmap_2) & (RMAP_NESTED_LPID_MASK |
+ RMAP_NESTED_GPA_MASK));
+}
+
+void kvmhv_insert_nest_rmap(struct kvm *kvm, unsigned long *rmapp,
+ struct rmap_nested **n_rmap)
+{
+ struct llist_node *entry = ((struct llist_head *) rmapp)->first;
+ struct rmap_nested *cursor;
+ u64 rmap, new_rmap = (*n_rmap)->rmap;
+
+ /* Are there any existing entries? */
+ if (!(*rmapp)) {
+ /* No -> use the rmap as a single entry */
+ *rmapp = new_rmap | RMAP_NESTED_IS_SINGLE_ENTRY;
+ return;
+ }
+
+ /* Do any entries match what we're trying to insert? */
+ for_each_nest_rmap_safe(cursor, entry, &rmap) {
+ if (kvmhv_n_rmap_is_equal(rmap, new_rmap))
+ return;
+ }
+
+ /* Do we need to create a list or just add the new entry? */
+ rmap = *rmapp;
+ if (rmap & RMAP_NESTED_IS_SINGLE_ENTRY) /* Not previously a list */
+ *rmapp = 0UL;
+ llist_add(&((*n_rmap)->list), (struct llist_head *) rmapp);
+ if (rmap & RMAP_NESTED_IS_SINGLE_ENTRY) /* Not previously a list */
+ (*n_rmap)->list.next = (struct llist_node *) rmap;
+
+ /* Set NULL so not freed by caller */
+ *n_rmap = NULL;
+}
+
+static void kvmhv_update_nest_rmap_rc(struct kvm *kvm, u64 n_rmap,
+ unsigned long clr, unsigned long set,
+ unsigned long hpa, unsigned long mask)
+{
+ unsigned long gpa;
+ unsigned int shift, lpid;
+ pte_t *ptep;
+
+ gpa = n_rmap & RMAP_NESTED_GPA_MASK;
+ lpid = (n_rmap & RMAP_NESTED_LPID_MASK) >> RMAP_NESTED_LPID_SHIFT;
+
+ /* Find the pte */
+ ptep = find_kvm_nested_guest_pte(kvm, lpid, gpa, &shift);
+ /*
+ * If the pte is present and the pfn is still the same, update the pte.
+ * If the pfn has changed then this is a stale rmap entry, the nested
+ * gpa actually points somewhere else now, and there is nothing to do.
+ * XXX A future optimisation would be to remove the rmap entry here.
+ */
+ if (ptep && pte_present(*ptep) && ((pte_val(*ptep) & mask) == hpa)) {
+ __radix_pte_update(ptep, clr, set);
+ kvmppc_radix_tlbie_page(kvm, gpa, shift, lpid);
+ }
+}
+
+/*
+ * For a given list of rmap entries, update the rc bits in all ptes in shadow
+ * page tables for nested guests which are referenced by the rmap list.
+ */
+void kvmhv_update_nest_rmap_rc_list(struct kvm *kvm, unsigned long *rmapp,
+ unsigned long clr, unsigned long set,
+ unsigned long hpa, unsigned long nbytes)
+{
+ struct llist_node *entry = ((struct llist_head *) rmapp)->first;
+ struct rmap_nested *cursor;
+ unsigned long rmap, mask;
+
+ if ((clr | set) & ~(_PAGE_DIRTY | _PAGE_ACCESSED))
+ return;
+
+ mask = PTE_RPN_MASK & ~(nbytes - 1);
+ hpa &= mask;
+
+ for_each_nest_rmap_safe(cursor, entry, &rmap)
+ kvmhv_update_nest_rmap_rc(kvm, rmap, clr, set, hpa, mask);
+}
+
+static void kvmhv_remove_nest_rmap(struct kvm *kvm, u64 n_rmap,
+ unsigned long hpa, unsigned long mask)
+{
+ struct kvm_nested_guest *gp;
+ unsigned long gpa;
+ unsigned int shift, lpid;
+ pte_t *ptep;
+
+ gpa = n_rmap & RMAP_NESTED_GPA_MASK;
+ lpid = (n_rmap & RMAP_NESTED_LPID_MASK) >> RMAP_NESTED_LPID_SHIFT;
+ gp = __find_nested(kvm, lpid);
+ if (!gp)
+ return;
+
+ /* Find and invalidate the pte */
+ ptep = find_kvm_nested_guest_pte(kvm, lpid, gpa, &shift);
+ /* Don't spuriously invalidate ptes if the pfn has changed */
+ if (ptep && pte_present(*ptep) && ((pte_val(*ptep) & mask) == hpa))
+ kvmppc_unmap_pte(kvm, ptep, gpa, shift, NULL, gp->shadow_lpid);
+}
+
+static void kvmhv_remove_nest_rmap_list(struct kvm *kvm, unsigned long *rmapp,
+ unsigned long hpa, unsigned long mask)
+{
+ struct llist_node *entry = llist_del_all((struct llist_head *) rmapp);
+ struct rmap_nested *cursor;
+ unsigned long rmap;
+
+ for_each_nest_rmap_safe(cursor, entry, &rmap) {
+ kvmhv_remove_nest_rmap(kvm, rmap, hpa, mask);
+ kfree(cursor);
+ }
+}
+
+/* called with kvm->mmu_lock held */
+void kvmhv_remove_nest_rmap_range(struct kvm *kvm,
+ const struct kvm_memory_slot *memslot,
+ unsigned long gpa, unsigned long hpa,
+ unsigned long nbytes)
+{
+ unsigned long gfn, end_gfn;
+ unsigned long addr_mask;
+
+ if (!memslot)
+ return;
+ gfn = (gpa >> PAGE_SHIFT) - memslot->base_gfn;
+ end_gfn = gfn + (nbytes >> PAGE_SHIFT);
+
+ addr_mask = PTE_RPN_MASK & ~(nbytes - 1);
+ hpa &= addr_mask;
+
+ for (; gfn < end_gfn; gfn++) {
+ unsigned long *rmap = &memslot->arch.rmap[gfn];
+ kvmhv_remove_nest_rmap_list(kvm, rmap, hpa, addr_mask);
+ }
+}
+
+static void kvmhv_free_memslot_nest_rmap(struct kvm_memory_slot *free)
+{
+ unsigned long page;
+
+ for (page = 0; page < free->npages; page++) {
+ unsigned long rmap, *rmapp = &free->arch.rmap[page];
+ struct rmap_nested *cursor;
+ struct llist_node *entry;
+
+ entry = llist_del_all((struct llist_head *) rmapp);
+ for_each_nest_rmap_safe(cursor, entry, &rmap)
+ kfree(cursor);
+ }
+}
+
+static bool kvmhv_invalidate_shadow_pte(struct kvm_vcpu *vcpu,
+ struct kvm_nested_guest *gp,
+ long gpa, int *shift_ret)
+{
+ struct kvm *kvm = vcpu->kvm;
+ bool ret = false;
+ pte_t *ptep;
+ int shift;
+
+ spin_lock(&kvm->mmu_lock);
+ ptep = find_kvm_nested_guest_pte(kvm, gp->l1_lpid, gpa, &shift);
+ if (!shift)
+ shift = PAGE_SHIFT;
+ if (ptep && pte_present(*ptep)) {
+ kvmppc_unmap_pte(kvm, ptep, gpa, shift, NULL, gp->shadow_lpid);
+ ret = true;
+ }
+ spin_unlock(&kvm->mmu_lock);
+
+ if (shift_ret)
+ *shift_ret = shift;
+ return ret;
+}
+
+static inline int get_ric(unsigned int instr)
+{
+ return (instr >> 18) & 0x3;
+}
+
+static inline int get_prs(unsigned int instr)
+{
+ return (instr >> 17) & 0x1;
+}
+
+static inline int get_r(unsigned int instr)
+{
+ return (instr >> 16) & 0x1;
+}
+
+static inline int get_lpid(unsigned long r_val)
+{
+ return r_val & 0xffffffff;
+}
+
+static inline int get_is(unsigned long r_val)
+{
+ return (r_val >> 10) & 0x3;
+}
+
+static inline int get_ap(unsigned long r_val)
+{
+ return (r_val >> 5) & 0x7;
+}
+
+static inline long get_epn(unsigned long r_val)
+{
+ return r_val >> 12;
+}
+
+static int kvmhv_emulate_tlbie_tlb_addr(struct kvm_vcpu *vcpu, int lpid,
+ int ap, long epn)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_nested_guest *gp;
+ long npages;
+ int shift, shadow_shift;
+ unsigned long addr;
+
+ shift = ap_to_shift(ap);
+ addr = epn << 12;
+ if (shift < 0)
+ /* Invalid ap encoding */
+ return -EINVAL;
+
+ addr &= ~((1UL << shift) - 1);
+ npages = 1UL << (shift - PAGE_SHIFT);
+
+ gp = kvmhv_get_nested(kvm, lpid, false);
+ if (!gp) /* No such guest -> nothing to do */
+ return 0;
+ mutex_lock(&gp->tlb_lock);
+
+ /* There may be more than one host page backing this single guest pte */
+ do {
+ kvmhv_invalidate_shadow_pte(vcpu, gp, addr, &shadow_shift);
+
+ npages -= 1UL << (shadow_shift - PAGE_SHIFT);
+ addr += 1UL << shadow_shift;
+ } while (npages > 0);
+
+ mutex_unlock(&gp->tlb_lock);
+ kvmhv_put_nested(gp);
+ return 0;
+}
+
+static void kvmhv_emulate_tlbie_lpid(struct kvm_vcpu *vcpu,
+ struct kvm_nested_guest *gp, int ric)
+{
+ struct kvm *kvm = vcpu->kvm;
+
+ mutex_lock(&gp->tlb_lock);
+ switch (ric) {
+ case 0:
+ /* Invalidate TLB */
+ spin_lock(&kvm->mmu_lock);
+ kvmppc_free_pgtable_radix(kvm, gp->shadow_pgtable,
+ gp->shadow_lpid);
+ kvmhv_flush_lpid(gp->shadow_lpid);
+ spin_unlock(&kvm->mmu_lock);
+ break;
+ case 1:
+ /*
+ * Invalidate PWC
+ * We don't cache this -> nothing to do
+ */
+ break;
+ case 2:
+ /* Invalidate TLB, PWC and caching of partition table entries */
+ kvmhv_flush_nested(gp);
+ break;
+ default:
+ break;
+ }
+ mutex_unlock(&gp->tlb_lock);
+}
+
+static void kvmhv_emulate_tlbie_all_lpid(struct kvm_vcpu *vcpu, int ric)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_nested_guest *gp;
+ int lpid;
+
+ spin_lock(&kvm->mmu_lock);
+ idr_for_each_entry(&kvm->arch.kvm_nested_guest_idr, gp, lpid) {
+ spin_unlock(&kvm->mmu_lock);
+ kvmhv_emulate_tlbie_lpid(vcpu, gp, ric);
+ spin_lock(&kvm->mmu_lock);
+ }
+ spin_unlock(&kvm->mmu_lock);
+}
+
+static int kvmhv_emulate_priv_tlbie(struct kvm_vcpu *vcpu, unsigned int instr,
+ unsigned long rsval, unsigned long rbval)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_nested_guest *gp;
+ int r, ric, prs, is, ap;
+ int lpid;
+ long epn;
+ int ret = 0;
+
+ ric = get_ric(instr);
+ prs = get_prs(instr);
+ r = get_r(instr);
+ lpid = get_lpid(rsval);
+ is = get_is(rbval);
+
+ /*
+ * These cases are invalid and are not handled:
+ * r != 1 -> Only radix supported
+ * prs == 1 -> Not HV privileged
+ * ric == 3 -> No cluster bombs for radix
+ * is == 1 -> Partition scoped translations not associated with pid
+ * (!is) && (ric == 1 || ric == 2) -> Not supported by ISA
+ */
+ if ((!r) || (prs) || (ric == 3) || (is == 1) ||
+ ((!is) && (ric == 1 || ric == 2)))
+ return -EINVAL;
+
+ switch (is) {
+ case 0:
+ /*
+ * We know ric == 0
+ * Invalidate TLB for a given target address
+ */
+ epn = get_epn(rbval);
+ ap = get_ap(rbval);
+ ret = kvmhv_emulate_tlbie_tlb_addr(vcpu, lpid, ap, epn);
+ break;
+ case 2:
+ /* Invalidate matching LPID */
+ gp = kvmhv_get_nested(kvm, lpid, false);
+ if (gp) {
+ kvmhv_emulate_tlbie_lpid(vcpu, gp, ric);
+ kvmhv_put_nested(gp);
+ }
+ break;
+ case 3:
+ /* Invalidate ALL LPIDs */
+ kvmhv_emulate_tlbie_all_lpid(vcpu, ric);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+/*
+ * This handles the H_TLB_INVALIDATE hcall.
+ * Parameters are (r4) tlbie instruction code, (r5) rS contents,
+ * (r6) rB contents.
+ */
+long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu)
+{
+ int ret;
+
+ ret = kvmhv_emulate_priv_tlbie(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5), kvmppc_get_gpr(vcpu, 6));
+ if (ret)
+ return H_PARAMETER;
+ return H_SUCCESS;
+}
+
+static long do_tlb_invalidate_nested_all(struct kvm_vcpu *vcpu,
+ unsigned long lpid, unsigned long ric)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_nested_guest *gp;
+
+ gp = kvmhv_get_nested(kvm, lpid, false);
+ if (gp) {
+ kvmhv_emulate_tlbie_lpid(vcpu, gp, ric);
+ kvmhv_put_nested(gp);
+ }
+ return H_SUCCESS;
+}
+
+/*
+ * Number of pages above which we invalidate the entire LPID rather than
+ * flush individual pages.
+ */
+static unsigned long tlb_range_flush_page_ceiling __read_mostly = 33;
+
+static long do_tlb_invalidate_nested_tlb(struct kvm_vcpu *vcpu,
+ unsigned long lpid,
+ unsigned long pg_sizes,
+ unsigned long start,
+ unsigned long end)
+{
+ int ret = H_P4;
+ unsigned long addr, nr_pages;
+ struct mmu_psize_def *def;
+ unsigned long psize, ap, page_size;
+ bool flush_lpid;
+
+ for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
+ def = &mmu_psize_defs[psize];
+ if (!(pg_sizes & def->h_rpt_pgsize))
+ continue;
+
+ nr_pages = (end - start) >> def->shift;
+ flush_lpid = nr_pages > tlb_range_flush_page_ceiling;
+ if (flush_lpid)
+ return do_tlb_invalidate_nested_all(vcpu, lpid,
+ RIC_FLUSH_TLB);
+ addr = start;
+ ap = mmu_get_ap(psize);
+ page_size = 1UL << def->shift;
+ do {
+ ret = kvmhv_emulate_tlbie_tlb_addr(vcpu, lpid, ap,
+ get_epn(addr));
+ if (ret)
+ return H_P4;
+ addr += page_size;
+ } while (addr < end);
+ }
+ return ret;
+}
+
+/*
+ * Performs partition-scoped invalidations for nested guests
+ * as part of H_RPT_INVALIDATE hcall.
+ */
+long do_h_rpt_invalidate_pat(struct kvm_vcpu *vcpu, unsigned long lpid,
+ unsigned long type, unsigned long pg_sizes,
+ unsigned long start, unsigned long end)
+{
+ /*
+ * If L2 lpid isn't valid, we need to return H_PARAMETER.
+ *
+ * However, nested KVM issues a L2 lpid flush call when creating
+ * partition table entries for L2. This happens even before the
+ * corresponding shadow lpid is created in HV which happens in
+ * H_ENTER_NESTED call. Since we can't differentiate this case from
+ * the invalid case, we ignore such flush requests and return success.
+ */
+ if (!__find_nested(vcpu->kvm, lpid))
+ return H_SUCCESS;
+
+ /*
+ * A flush all request can be handled by a full lpid flush only.
+ */
+ if ((type & H_RPTI_TYPE_NESTED_ALL) == H_RPTI_TYPE_NESTED_ALL)
+ return do_tlb_invalidate_nested_all(vcpu, lpid, RIC_FLUSH_ALL);
+
+ /*
+ * We don't need to handle a PWC flush like process table here,
+ * because intermediate partition scoped table in nested guest doesn't
+ * really have PWC. Only level we have PWC is in L0 and for nested
+ * invalidate at L0 we always do kvm_flush_lpid() which does
+ * radix__flush_all_lpid(). For range invalidate at any level, we
+ * are not removing the higher level page tables and hence there is
+ * no PWC invalidate needed.
+ *
+ * if (type & H_RPTI_TYPE_PWC) {
+ * ret = do_tlb_invalidate_nested_all(vcpu, lpid, RIC_FLUSH_PWC);
+ * if (ret)
+ * return H_P4;
+ * }
+ */
+
+ if (start == 0 && end == -1)
+ return do_tlb_invalidate_nested_all(vcpu, lpid, RIC_FLUSH_TLB);
+
+ if (type & H_RPTI_TYPE_TLB)
+ return do_tlb_invalidate_nested_tlb(vcpu, lpid, pg_sizes,
+ start, end);
+ return H_SUCCESS;
+}
+
+/* Used to convert a nested guest real address to a L1 guest real address */
+static int kvmhv_translate_addr_nested(struct kvm_vcpu *vcpu,
+ struct kvm_nested_guest *gp,
+ unsigned long n_gpa, unsigned long dsisr,
+ struct kvmppc_pte *gpte_p)
+{
+ u64 fault_addr, flags = dsisr & DSISR_ISSTORE;
+ int ret;
+
+ ret = kvmppc_mmu_walk_radix_tree(vcpu, n_gpa, gpte_p, gp->l1_gr_to_hr,
+ &fault_addr);
+
+ if (ret) {
+ /* We didn't find a pte */
+ if (ret == -EINVAL) {
+ /* Unsupported mmu config */
+ flags |= DSISR_UNSUPP_MMU;
+ } else if (ret == -ENOENT) {
+ /* No translation found */
+ flags |= DSISR_NOHPTE;
+ } else if (ret == -EFAULT) {
+ /* Couldn't access L1 real address */
+ flags |= DSISR_PRTABLE_FAULT;
+ vcpu->arch.fault_gpa = fault_addr;
+ } else {
+ /* Unknown error */
+ return ret;
+ }
+ goto forward_to_l1;
+ } else {
+ /* We found a pte -> check permissions */
+ if (dsisr & DSISR_ISSTORE) {
+ /* Can we write? */
+ if (!gpte_p->may_write) {
+ flags |= DSISR_PROTFAULT;
+ goto forward_to_l1;
+ }
+ } else if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
+ /* Can we execute? */
+ if (!gpte_p->may_execute) {
+ flags |= SRR1_ISI_N_G_OR_CIP;
+ goto forward_to_l1;
+ }
+ } else {
+ /* Can we read? */
+ if (!gpte_p->may_read && !gpte_p->may_write) {
+ flags |= DSISR_PROTFAULT;
+ goto forward_to_l1;
+ }
+ }
+ }
+
+ return 0;
+
+forward_to_l1:
+ vcpu->arch.fault_dsisr = flags;
+ if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
+ vcpu->arch.shregs.msr &= SRR1_MSR_BITS;
+ vcpu->arch.shregs.msr |= flags;
+ }
+ return RESUME_HOST;
+}
+
+static long kvmhv_handle_nested_set_rc(struct kvm_vcpu *vcpu,
+ struct kvm_nested_guest *gp,
+ unsigned long n_gpa,
+ struct kvmppc_pte gpte,
+ unsigned long dsisr)
+{
+ struct kvm *kvm = vcpu->kvm;
+ bool writing = !!(dsisr & DSISR_ISSTORE);
+ u64 pgflags;
+ long ret;
+
+ /* Are the rc bits set in the L1 partition scoped pte? */
+ pgflags = _PAGE_ACCESSED;
+ if (writing)
+ pgflags |= _PAGE_DIRTY;
+ if (pgflags & ~gpte.rc)
+ return RESUME_HOST;
+
+ spin_lock(&kvm->mmu_lock);
+ /* Set the rc bit in the pte of our (L0) pgtable for the L1 guest */
+ ret = kvmppc_hv_handle_set_rc(kvm, false, writing,
+ gpte.raddr, kvm->arch.lpid);
+ if (!ret) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ /* Set the rc bit in the pte of the shadow_pgtable for the nest guest */
+ ret = kvmppc_hv_handle_set_rc(kvm, true, writing,
+ n_gpa, gp->l1_lpid);
+ if (!ret)
+ ret = -EINVAL;
+ else
+ ret = 0;
+
+out_unlock:
+ spin_unlock(&kvm->mmu_lock);
+ return ret;
+}
+
+static inline int kvmppc_radix_level_to_shift(int level)
+{
+ switch (level) {
+ case 2:
+ return PUD_SHIFT;
+ case 1:
+ return PMD_SHIFT;
+ default:
+ return PAGE_SHIFT;
+ }
+}
+
+static inline int kvmppc_radix_shift_to_level(int shift)
+{
+ if (shift == PUD_SHIFT)
+ return 2;
+ if (shift == PMD_SHIFT)
+ return 1;
+ if (shift == PAGE_SHIFT)
+ return 0;
+ WARN_ON_ONCE(1);
+ return 0;
+}
+
+/* called with gp->tlb_lock held */
+static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu,
+ struct kvm_nested_guest *gp)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_memory_slot *memslot;
+ struct rmap_nested *n_rmap;
+ struct kvmppc_pte gpte;
+ pte_t pte, *pte_p;
+ unsigned long mmu_seq;
+ unsigned long dsisr = vcpu->arch.fault_dsisr;
+ unsigned long ea = vcpu->arch.fault_dar;
+ unsigned long *rmapp;
+ unsigned long n_gpa, gpa, gfn, perm = 0UL;
+ unsigned int shift, l1_shift, level;
+ bool writing = !!(dsisr & DSISR_ISSTORE);
+ long int ret;
+
+ if (!gp->l1_gr_to_hr) {
+ kvmhv_update_ptbl_cache(gp);
+ if (!gp->l1_gr_to_hr)
+ return RESUME_HOST;
+ }
+
+ /* Convert the nested guest real address into a L1 guest real address */
+
+ n_gpa = vcpu->arch.fault_gpa & ~0xF000000000000FFFULL;
+ if (!(dsisr & DSISR_PRTABLE_FAULT))
+ n_gpa |= ea & 0xFFF;
+ ret = kvmhv_translate_addr_nested(vcpu, gp, n_gpa, dsisr, &gpte);
+
+ /*
+ * If the hardware found a translation but we don't now have a usable
+ * translation in the l1 partition-scoped tree, remove the shadow pte
+ * and let the guest retry.
+ */
+ if (ret == RESUME_HOST &&
+ (dsisr & (DSISR_PROTFAULT | DSISR_BADACCESS | DSISR_NOEXEC_OR_G |
+ DSISR_BAD_COPYPASTE)))
+ goto inval;
+ if (ret)
+ return ret;
+
+ /* Failed to set the reference/change bits */
+ if (dsisr & DSISR_SET_RC) {
+ ret = kvmhv_handle_nested_set_rc(vcpu, gp, n_gpa, gpte, dsisr);
+ if (ret == RESUME_HOST)
+ return ret;
+ if (ret)
+ goto inval;
+ dsisr &= ~DSISR_SET_RC;
+ if (!(dsisr & (DSISR_BAD_FAULT_64S | DSISR_NOHPTE |
+ DSISR_PROTFAULT)))
+ return RESUME_GUEST;
+ }
+
+ /*
+ * We took an HISI or HDSI while we were running a nested guest which
+ * means we have no partition scoped translation for that. This means
+ * we need to insert a pte for the mapping into our shadow_pgtable.
+ */
+
+ l1_shift = gpte.page_shift;
+ if (l1_shift < PAGE_SHIFT) {
+ /* We don't support l1 using a page size smaller than our own */
+ pr_err("KVM: L1 guest page shift (%d) less than our own (%d)\n",
+ l1_shift, PAGE_SHIFT);
+ return -EINVAL;
+ }
+ gpa = gpte.raddr;
+ gfn = gpa >> PAGE_SHIFT;
+
+ /* 1. Get the corresponding host memslot */
+
+ memslot = gfn_to_memslot(kvm, gfn);
+ if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) {
+ if (dsisr & (DSISR_PRTABLE_FAULT | DSISR_BADACCESS)) {
+ /* unusual error -> reflect to the guest as a DSI */
+ kvmppc_core_queue_data_storage(vcpu,
+ kvmppc_get_msr(vcpu) & SRR1_PREFIXED,
+ ea, dsisr);
+ return RESUME_GUEST;
+ }
+
+ /* passthrough of emulated MMIO case */
+ return kvmppc_hv_emulate_mmio(vcpu, gpa, ea, writing);
+ }
+ if (memslot->flags & KVM_MEM_READONLY) {
+ if (writing) {
+ /* Give the guest a DSI */
+ kvmppc_core_queue_data_storage(vcpu,
+ kvmppc_get_msr(vcpu) & SRR1_PREFIXED,
+ ea, DSISR_ISSTORE | DSISR_PROTFAULT);
+ return RESUME_GUEST;
+ }
+ }
+
+ /* 2. Find the host pte for this L1 guest real address */
+
+ /* Used to check for invalidations in progress */
+ mmu_seq = kvm->mmu_invalidate_seq;
+ smp_rmb();
+
+ /* See if can find translation in our partition scoped tables for L1 */
+ pte = __pte(0);
+ spin_lock(&kvm->mmu_lock);
+ pte_p = find_kvm_secondary_pte(kvm, gpa, &shift);
+ if (!shift)
+ shift = PAGE_SHIFT;
+ if (pte_p)
+ pte = *pte_p;
+ spin_unlock(&kvm->mmu_lock);
+
+ if (!pte_present(pte) || (writing && !(pte_val(pte) & _PAGE_WRITE))) {
+ /* No suitable pte found -> try to insert a mapping */
+ ret = kvmppc_book3s_instantiate_page(vcpu, gpa, memslot,
+ writing, &pte, &level);
+ if (ret == -EAGAIN)
+ return RESUME_GUEST;
+ else if (ret)
+ return ret;
+ shift = kvmppc_radix_level_to_shift(level);
+ }
+ /* Align gfn to the start of the page */
+ gfn = (gpa & ~((1UL << shift) - 1)) >> PAGE_SHIFT;
+
+ /* 3. Compute the pte we need to insert for nest_gpa -> host r_addr */
+
+ /* The permissions is the combination of the host and l1 guest ptes */
+ perm |= gpte.may_read ? 0UL : _PAGE_READ;
+ perm |= gpte.may_write ? 0UL : _PAGE_WRITE;
+ perm |= gpte.may_execute ? 0UL : _PAGE_EXEC;
+ /* Only set accessed/dirty (rc) bits if set in host and l1 guest ptes */
+ perm |= (gpte.rc & _PAGE_ACCESSED) ? 0UL : _PAGE_ACCESSED;
+ perm |= ((gpte.rc & _PAGE_DIRTY) && writing) ? 0UL : _PAGE_DIRTY;
+ pte = __pte(pte_val(pte) & ~perm);
+
+ /* What size pte can we insert? */
+ if (shift > l1_shift) {
+ u64 mask;
+ unsigned int actual_shift = PAGE_SHIFT;
+ if (PMD_SHIFT < l1_shift)
+ actual_shift = PMD_SHIFT;
+ mask = (1UL << shift) - (1UL << actual_shift);
+ pte = __pte(pte_val(pte) | (gpa & mask));
+ shift = actual_shift;
+ }
+ level = kvmppc_radix_shift_to_level(shift);
+ n_gpa &= ~((1UL << shift) - 1);
+
+ /* 4. Insert the pte into our shadow_pgtable */
+
+ n_rmap = kzalloc(sizeof(*n_rmap), GFP_KERNEL);
+ if (!n_rmap)
+ return RESUME_GUEST; /* Let the guest try again */
+ n_rmap->rmap = (n_gpa & RMAP_NESTED_GPA_MASK) |
+ (((unsigned long) gp->l1_lpid) << RMAP_NESTED_LPID_SHIFT);
+ rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn];
+ ret = kvmppc_create_pte(kvm, gp->shadow_pgtable, pte, n_gpa, level,
+ mmu_seq, gp->shadow_lpid, rmapp, &n_rmap);
+ kfree(n_rmap);
+ if (ret == -EAGAIN)
+ ret = RESUME_GUEST; /* Let the guest try again */
+
+ return ret;
+
+ inval:
+ kvmhv_invalidate_shadow_pte(vcpu, gp, n_gpa, NULL);
+ return RESUME_GUEST;
+}
+
+long int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu)
+{
+ struct kvm_nested_guest *gp = vcpu->arch.nested;
+ long int ret;
+
+ mutex_lock(&gp->tlb_lock);
+ ret = __kvmhv_nested_page_fault(vcpu, gp);
+ mutex_unlock(&gp->tlb_lock);
+ return ret;
+}
+
+int kvmhv_nested_next_lpid(struct kvm *kvm, int lpid)
+{
+ int ret = lpid + 1;
+
+ spin_lock(&kvm->mmu_lock);
+ if (!idr_get_next(&kvm->arch.kvm_nested_guest_idr, &ret))
+ ret = -1;
+ spin_unlock(&kvm->mmu_lock);
+
+ return ret;
+}
diff --git a/arch/powerpc/kvm/book3s_hv_nestedv2.c b/arch/powerpc/kvm/book3s_hv_nestedv2.c
new file mode 100644
index 000000000000..87691cf86cae
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_nestedv2.c
@@ -0,0 +1,1072 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2023 Jordan Niethe, IBM Corp. <jniethe5@gmail.com>
+ *
+ * Authors:
+ * Jordan Niethe <jniethe5@gmail.com>
+ *
+ * Description: KVM functions specific to running on Book 3S
+ * processors as a NESTEDv2 guest.
+ *
+ */
+
+#include "linux/blk-mq.h"
+#include "linux/console.h"
+#include "linux/gfp_types.h"
+#include "linux/signal.h"
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <linux/pgtable.h>
+
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/hvcall.h>
+#include <asm/pgalloc.h>
+#include <asm/reg.h>
+#include <asm/plpar_wrappers.h>
+#include <asm/guest-state-buffer.h>
+#include "trace_hv.h"
+
+struct static_key_false __kvmhv_is_nestedv2 __read_mostly;
+EXPORT_SYMBOL_GPL(__kvmhv_is_nestedv2);
+
+
+static size_t
+gs_msg_ops_kvmhv_nestedv2_config_get_size(struct kvmppc_gs_msg *gsm)
+{
+ u16 ids[] = {
+ KVMPPC_GSID_RUN_OUTPUT_MIN_SIZE,
+ KVMPPC_GSID_RUN_INPUT,
+ KVMPPC_GSID_RUN_OUTPUT,
+
+ };
+ size_t size = 0;
+
+ for (int i = 0; i < ARRAY_SIZE(ids); i++)
+ size += kvmppc_gse_total_size(kvmppc_gsid_size(ids[i]));
+ return size;
+}
+
+static int
+gs_msg_ops_kvmhv_nestedv2_config_fill_info(struct kvmppc_gs_buff *gsb,
+ struct kvmppc_gs_msg *gsm)
+{
+ struct kvmhv_nestedv2_config *cfg;
+ int rc;
+
+ cfg = gsm->data;
+
+ if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_RUN_OUTPUT_MIN_SIZE)) {
+ rc = kvmppc_gse_put_u64(gsb, KVMPPC_GSID_RUN_OUTPUT_MIN_SIZE,
+ cfg->vcpu_run_output_size);
+ if (rc < 0)
+ return rc;
+ }
+
+ if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_RUN_INPUT)) {
+ rc = kvmppc_gse_put_buff_info(gsb, KVMPPC_GSID_RUN_INPUT,
+ cfg->vcpu_run_input_cfg);
+ if (rc < 0)
+ return rc;
+ }
+
+ if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_RUN_OUTPUT)) {
+ rc = kvmppc_gse_put_buff_info(gsb, KVMPPC_GSID_RUN_OUTPUT,
+ cfg->vcpu_run_output_cfg);
+ if (rc < 0)
+ return rc;
+ }
+
+ return 0;
+}
+
+static int
+gs_msg_ops_kvmhv_nestedv2_config_refresh_info(struct kvmppc_gs_msg *gsm,
+ struct kvmppc_gs_buff *gsb)
+{
+ struct kvmhv_nestedv2_config *cfg;
+ struct kvmppc_gs_parser gsp = { 0 };
+ struct kvmppc_gs_elem *gse;
+ int rc;
+
+ cfg = gsm->data;
+
+ rc = kvmppc_gse_parse(&gsp, gsb);
+ if (rc < 0)
+ return rc;
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_RUN_OUTPUT_MIN_SIZE);
+ if (gse)
+ cfg->vcpu_run_output_size = kvmppc_gse_get_u64(gse);
+ return 0;
+}
+
+static struct kvmppc_gs_msg_ops config_msg_ops = {
+ .get_size = gs_msg_ops_kvmhv_nestedv2_config_get_size,
+ .fill_info = gs_msg_ops_kvmhv_nestedv2_config_fill_info,
+ .refresh_info = gs_msg_ops_kvmhv_nestedv2_config_refresh_info,
+};
+
+static size_t gs_msg_ops_vcpu_get_size(struct kvmppc_gs_msg *gsm)
+{
+ struct kvmppc_gs_bitmap gsbm = { 0 };
+ size_t size = 0;
+ u16 iden;
+
+ kvmppc_gsbm_fill(&gsbm);
+ kvmppc_gsbm_for_each(&gsbm, iden)
+ {
+ switch (iden) {
+ case KVMPPC_GSID_HOST_STATE_SIZE:
+ case KVMPPC_GSID_RUN_OUTPUT_MIN_SIZE:
+ case KVMPPC_GSID_PARTITION_TABLE:
+ case KVMPPC_GSID_PROCESS_TABLE:
+ case KVMPPC_GSID_RUN_INPUT:
+ case KVMPPC_GSID_RUN_OUTPUT:
+ /* Host wide counters */
+ case KVMPPC_GSID_L0_GUEST_HEAP:
+ case KVMPPC_GSID_L0_GUEST_HEAP_MAX:
+ case KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE:
+ case KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX:
+ case KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM:
+ break;
+ default:
+ size += kvmppc_gse_total_size(kvmppc_gsid_size(iden));
+ }
+ }
+ return size;
+}
+
+static int gs_msg_ops_vcpu_fill_info(struct kvmppc_gs_buff *gsb,
+ struct kvmppc_gs_msg *gsm)
+{
+ struct kvm_vcpu *vcpu;
+ vector128 v;
+ int rc, i;
+ u16 iden;
+ u32 arch_compat = 0;
+
+ vcpu = gsm->data;
+
+ kvmppc_gsm_for_each(gsm, iden)
+ {
+ rc = 0;
+
+ if ((gsm->flags & KVMPPC_GS_FLAGS_WIDE) !=
+ (kvmppc_gsid_flags(iden) & KVMPPC_GS_FLAGS_WIDE))
+ continue;
+
+ switch (iden) {
+ case KVMPPC_GSID_DSCR:
+ rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.dscr);
+ break;
+ case KVMPPC_GSID_MMCRA:
+ rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.mmcra);
+ break;
+ case KVMPPC_GSID_HFSCR:
+ rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.hfscr);
+ break;
+ case KVMPPC_GSID_PURR:
+ rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.purr);
+ break;
+ case KVMPPC_GSID_SPURR:
+ rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.spurr);
+ break;
+ case KVMPPC_GSID_AMR:
+ rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.amr);
+ break;
+ case KVMPPC_GSID_UAMOR:
+ rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.uamor);
+ break;
+ case KVMPPC_GSID_SIAR:
+ rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.siar);
+ break;
+ case KVMPPC_GSID_SDAR:
+ rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.sdar);
+ break;
+ case KVMPPC_GSID_IAMR:
+ rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.iamr);
+ break;
+ case KVMPPC_GSID_DAWR0:
+ rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.dawr0);
+ break;
+ case KVMPPC_GSID_DAWR1:
+ rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.dawr1);
+ break;
+ case KVMPPC_GSID_DAWRX0:
+ rc = kvmppc_gse_put_u32(gsb, iden, vcpu->arch.dawrx0);
+ break;
+ case KVMPPC_GSID_DAWRX1:
+ rc = kvmppc_gse_put_u32(gsb, iden, vcpu->arch.dawrx1);
+ break;
+ case KVMPPC_GSID_DEXCR:
+ rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.dexcr);
+ break;
+ case KVMPPC_GSID_HASHKEYR:
+ rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.hashkeyr);
+ break;
+ case KVMPPC_GSID_HASHPKEYR:
+ rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.hashpkeyr);
+ break;
+ case KVMPPC_GSID_CIABR:
+ rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.ciabr);
+ break;
+ case KVMPPC_GSID_WORT:
+ rc = kvmppc_gse_put_u32(gsb, iden, vcpu->arch.wort);
+ break;
+ case KVMPPC_GSID_PPR:
+ rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.ppr);
+ break;
+ case KVMPPC_GSID_PSPB:
+ rc = kvmppc_gse_put_u32(gsb, iden, vcpu->arch.pspb);
+ break;
+ case KVMPPC_GSID_TAR:
+ rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.tar);
+ break;
+ case KVMPPC_GSID_FSCR:
+ rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.fscr);
+ break;
+ case KVMPPC_GSID_EBBHR:
+ rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.ebbhr);
+ break;
+ case KVMPPC_GSID_EBBRR:
+ rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.ebbrr);
+ break;
+ case KVMPPC_GSID_BESCR:
+ rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.bescr);
+ break;
+ case KVMPPC_GSID_IC:
+ rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.ic);
+ break;
+ case KVMPPC_GSID_CTRL:
+ rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.ctrl);
+ break;
+ case KVMPPC_GSID_PIDR:
+ rc = kvmppc_gse_put_u32(gsb, iden, vcpu->arch.pid);
+ break;
+ case KVMPPC_GSID_AMOR: {
+ u64 amor = ~0;
+
+ rc = kvmppc_gse_put_u64(gsb, iden, amor);
+ break;
+ }
+ case KVMPPC_GSID_VRSAVE:
+ rc = kvmppc_gse_put_u32(gsb, iden, vcpu->arch.vrsave);
+ break;
+ case KVMPPC_GSID_MMCR(0)... KVMPPC_GSID_MMCR(3):
+ i = iden - KVMPPC_GSID_MMCR(0);
+ rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.mmcr[i]);
+ break;
+ case KVMPPC_GSID_SIER(0)... KVMPPC_GSID_SIER(2):
+ i = iden - KVMPPC_GSID_SIER(0);
+ rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.sier[i]);
+ break;
+ case KVMPPC_GSID_PMC(0)... KVMPPC_GSID_PMC(5):
+ i = iden - KVMPPC_GSID_PMC(0);
+ rc = kvmppc_gse_put_u32(gsb, iden, vcpu->arch.pmc[i]);
+ break;
+ case KVMPPC_GSID_GPR(0)... KVMPPC_GSID_GPR(31):
+ i = iden - KVMPPC_GSID_GPR(0);
+ rc = kvmppc_gse_put_u64(gsb, iden,
+ vcpu->arch.regs.gpr[i]);
+ break;
+ case KVMPPC_GSID_CR:
+ rc = kvmppc_gse_put_u32(gsb, iden, vcpu->arch.regs.ccr);
+ break;
+ case KVMPPC_GSID_XER:
+ rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.regs.xer);
+ break;
+ case KVMPPC_GSID_CTR:
+ rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.regs.ctr);
+ break;
+ case KVMPPC_GSID_LR:
+ rc = kvmppc_gse_put_u64(gsb, iden,
+ vcpu->arch.regs.link);
+ break;
+ case KVMPPC_GSID_NIA:
+ rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.regs.nip);
+ break;
+ case KVMPPC_GSID_SRR0:
+ rc = kvmppc_gse_put_u64(gsb, iden,
+ vcpu->arch.shregs.srr0);
+ break;
+ case KVMPPC_GSID_SRR1:
+ rc = kvmppc_gse_put_u64(gsb, iden,
+ vcpu->arch.shregs.srr1);
+ break;
+ case KVMPPC_GSID_SPRG0:
+ rc = kvmppc_gse_put_u64(gsb, iden,
+ vcpu->arch.shregs.sprg0);
+ break;
+ case KVMPPC_GSID_SPRG1:
+ rc = kvmppc_gse_put_u64(gsb, iden,
+ vcpu->arch.shregs.sprg1);
+ break;
+ case KVMPPC_GSID_SPRG2:
+ rc = kvmppc_gse_put_u64(gsb, iden,
+ vcpu->arch.shregs.sprg2);
+ break;
+ case KVMPPC_GSID_SPRG3:
+ rc = kvmppc_gse_put_u64(gsb, iden,
+ vcpu->arch.shregs.sprg3);
+ break;
+ case KVMPPC_GSID_DAR:
+ rc = kvmppc_gse_put_u64(gsb, iden,
+ vcpu->arch.shregs.dar);
+ break;
+ case KVMPPC_GSID_DSISR:
+ rc = kvmppc_gse_put_u32(gsb, iden,
+ vcpu->arch.shregs.dsisr);
+ break;
+ case KVMPPC_GSID_MSR:
+ rc = kvmppc_gse_put_u64(gsb, iden,
+ vcpu->arch.shregs.msr);
+ break;
+ case KVMPPC_GSID_VTB:
+ rc = kvmppc_gse_put_u64(gsb, iden,
+ vcpu->arch.vcore->vtb);
+ break;
+ case KVMPPC_GSID_DPDES:
+ rc = kvmppc_gse_put_u64(gsb, iden,
+ vcpu->arch.vcore->dpdes);
+ break;
+ case KVMPPC_GSID_LPCR:
+ rc = kvmppc_gse_put_u64(gsb, iden,
+ vcpu->arch.vcore->lpcr);
+ break;
+ case KVMPPC_GSID_TB_OFFSET:
+ rc = kvmppc_gse_put_u64(gsb, iden,
+ vcpu->arch.vcore->tb_offset);
+ break;
+ case KVMPPC_GSID_FPSCR:
+ rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.fp.fpscr);
+ break;
+ case KVMPPC_GSID_VSRS(0)... KVMPPC_GSID_VSRS(31):
+ i = iden - KVMPPC_GSID_VSRS(0);
+ memcpy(&v, &vcpu->arch.fp.fpr[i],
+ sizeof(vcpu->arch.fp.fpr[i]));
+ rc = kvmppc_gse_put_vector128(gsb, iden, &v);
+ break;
+#ifdef CONFIG_VSX
+ case KVMPPC_GSID_VSCR:
+ rc = kvmppc_gse_put_u32(gsb, iden,
+ vcpu->arch.vr.vscr.u[3]);
+ break;
+ case KVMPPC_GSID_VSRS(32)... KVMPPC_GSID_VSRS(63):
+ i = iden - KVMPPC_GSID_VSRS(32);
+ rc = kvmppc_gse_put_vector128(gsb, iden,
+ &vcpu->arch.vr.vr[i]);
+ break;
+#endif
+ case KVMPPC_GSID_DEC_EXPIRY_TB: {
+ u64 dw;
+
+ dw = vcpu->arch.dec_expires -
+ vcpu->arch.vcore->tb_offset;
+ rc = kvmppc_gse_put_u64(gsb, iden, dw);
+ break;
+ }
+ case KVMPPC_GSID_LOGICAL_PVR:
+ /*
+ * Though 'arch_compat == 0' would mean the default
+ * compatibility, arch_compat, being a Guest Wide
+ * Element, cannot be filled with a value of 0 in GSB
+ * as this would result into a kernel trap.
+ * Hence, when `arch_compat == 0`, arch_compat should
+ * default to L1's PVR.
+ */
+ if (!vcpu->arch.vcore->arch_compat) {
+ if (cpu_has_feature(CPU_FTR_P11_PVR))
+ arch_compat = PVR_ARCH_31_P11;
+ else if (cpu_has_feature(CPU_FTR_ARCH_31))
+ arch_compat = PVR_ARCH_31;
+ else if (cpu_has_feature(CPU_FTR_ARCH_300))
+ arch_compat = PVR_ARCH_300;
+ } else {
+ arch_compat = vcpu->arch.vcore->arch_compat;
+ }
+ rc = kvmppc_gse_put_u32(gsb, iden, arch_compat);
+ break;
+ }
+
+ if (rc < 0)
+ return rc;
+ }
+
+ return 0;
+}
+
+static int gs_msg_ops_vcpu_refresh_info(struct kvmppc_gs_msg *gsm,
+ struct kvmppc_gs_buff *gsb)
+{
+ struct kvmppc_gs_parser gsp = { 0 };
+ struct kvmhv_nestedv2_io *io;
+ struct kvmppc_gs_bitmap *valids;
+ struct kvm_vcpu *vcpu;
+ struct kvmppc_gs_elem *gse;
+ vector128 v;
+ int rc, i;
+ u16 iden;
+
+ vcpu = gsm->data;
+
+ rc = kvmppc_gse_parse(&gsp, gsb);
+ if (rc < 0)
+ return rc;
+
+ io = &vcpu->arch.nestedv2_io;
+ valids = &io->valids;
+
+ kvmppc_gsp_for_each(&gsp, iden, gse)
+ {
+ switch (iden) {
+ case KVMPPC_GSID_DSCR:
+ vcpu->arch.dscr = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_MMCRA:
+ vcpu->arch.mmcra = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_HFSCR:
+ vcpu->arch.hfscr = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_PURR:
+ vcpu->arch.purr = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_SPURR:
+ vcpu->arch.spurr = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_AMR:
+ vcpu->arch.amr = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_UAMOR:
+ vcpu->arch.uamor = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_SIAR:
+ vcpu->arch.siar = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_SDAR:
+ vcpu->arch.sdar = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_IAMR:
+ vcpu->arch.iamr = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_DAWR0:
+ vcpu->arch.dawr0 = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_DAWR1:
+ vcpu->arch.dawr1 = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_DAWRX0:
+ vcpu->arch.dawrx0 = kvmppc_gse_get_u32(gse);
+ break;
+ case KVMPPC_GSID_DAWRX1:
+ vcpu->arch.dawrx1 = kvmppc_gse_get_u32(gse);
+ break;
+ case KVMPPC_GSID_DEXCR:
+ vcpu->arch.dexcr = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_HASHKEYR:
+ vcpu->arch.hashkeyr = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_HASHPKEYR:
+ vcpu->arch.hashpkeyr = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_CIABR:
+ vcpu->arch.ciabr = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_WORT:
+ vcpu->arch.wort = kvmppc_gse_get_u32(gse);
+ break;
+ case KVMPPC_GSID_PPR:
+ vcpu->arch.ppr = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_PSPB:
+ vcpu->arch.pspb = kvmppc_gse_get_u32(gse);
+ break;
+ case KVMPPC_GSID_TAR:
+ vcpu->arch.tar = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_FSCR:
+ vcpu->arch.fscr = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_EBBHR:
+ vcpu->arch.ebbhr = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_EBBRR:
+ vcpu->arch.ebbrr = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_BESCR:
+ vcpu->arch.bescr = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_IC:
+ vcpu->arch.ic = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_CTRL:
+ vcpu->arch.ctrl = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_PIDR:
+ vcpu->arch.pid = kvmppc_gse_get_u32(gse);
+ break;
+ case KVMPPC_GSID_AMOR:
+ break;
+ case KVMPPC_GSID_VRSAVE:
+ vcpu->arch.vrsave = kvmppc_gse_get_u32(gse);
+ break;
+ case KVMPPC_GSID_MMCR(0)... KVMPPC_GSID_MMCR(3):
+ i = iden - KVMPPC_GSID_MMCR(0);
+ vcpu->arch.mmcr[i] = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_SIER(0)... KVMPPC_GSID_SIER(2):
+ i = iden - KVMPPC_GSID_SIER(0);
+ vcpu->arch.sier[i] = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_PMC(0)... KVMPPC_GSID_PMC(5):
+ i = iden - KVMPPC_GSID_PMC(0);
+ vcpu->arch.pmc[i] = kvmppc_gse_get_u32(gse);
+ break;
+ case KVMPPC_GSID_GPR(0)... KVMPPC_GSID_GPR(31):
+ i = iden - KVMPPC_GSID_GPR(0);
+ vcpu->arch.regs.gpr[i] = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_CR:
+ vcpu->arch.regs.ccr = kvmppc_gse_get_u32(gse);
+ break;
+ case KVMPPC_GSID_XER:
+ vcpu->arch.regs.xer = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_CTR:
+ vcpu->arch.regs.ctr = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_LR:
+ vcpu->arch.regs.link = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_NIA:
+ vcpu->arch.regs.nip = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_SRR0:
+ vcpu->arch.shregs.srr0 = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_SRR1:
+ vcpu->arch.shregs.srr1 = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_SPRG0:
+ vcpu->arch.shregs.sprg0 = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_SPRG1:
+ vcpu->arch.shregs.sprg1 = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_SPRG2:
+ vcpu->arch.shregs.sprg2 = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_SPRG3:
+ vcpu->arch.shregs.sprg3 = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_DAR:
+ vcpu->arch.shregs.dar = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_DSISR:
+ vcpu->arch.shregs.dsisr = kvmppc_gse_get_u32(gse);
+ break;
+ case KVMPPC_GSID_MSR:
+ vcpu->arch.shregs.msr = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_VTB:
+ vcpu->arch.vcore->vtb = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_DPDES:
+ vcpu->arch.vcore->dpdes = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_LPCR:
+ vcpu->arch.vcore->lpcr = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_TB_OFFSET:
+ vcpu->arch.vcore->tb_offset = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_FPSCR:
+ vcpu->arch.fp.fpscr = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_VSRS(0)... KVMPPC_GSID_VSRS(31):
+ kvmppc_gse_get_vector128(gse, &v);
+ i = iden - KVMPPC_GSID_VSRS(0);
+ memcpy(&vcpu->arch.fp.fpr[i], &v,
+ sizeof(vcpu->arch.fp.fpr[i]));
+ break;
+#ifdef CONFIG_VSX
+ case KVMPPC_GSID_VSCR:
+ vcpu->arch.vr.vscr.u[3] = kvmppc_gse_get_u32(gse);
+ break;
+ case KVMPPC_GSID_VSRS(32)... KVMPPC_GSID_VSRS(63):
+ i = iden - KVMPPC_GSID_VSRS(32);
+ kvmppc_gse_get_vector128(gse, &vcpu->arch.vr.vr[i]);
+ break;
+#endif
+ case KVMPPC_GSID_HDAR:
+ vcpu->arch.fault_dar = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_HDSISR:
+ vcpu->arch.fault_dsisr = kvmppc_gse_get_u32(gse);
+ break;
+ case KVMPPC_GSID_ASDR:
+ vcpu->arch.fault_gpa = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_HEIR:
+ vcpu->arch.emul_inst = kvmppc_gse_get_u64(gse);
+ break;
+ case KVMPPC_GSID_DEC_EXPIRY_TB: {
+ u64 dw;
+
+ dw = kvmppc_gse_get_u64(gse);
+ vcpu->arch.dec_expires =
+ dw + vcpu->arch.vcore->tb_offset;
+ break;
+ }
+ case KVMPPC_GSID_LOGICAL_PVR:
+ vcpu->arch.vcore->arch_compat = kvmppc_gse_get_u32(gse);
+ break;
+ default:
+ continue;
+ }
+ kvmppc_gsbm_set(valids, iden);
+ }
+
+ return 0;
+}
+
+static struct kvmppc_gs_msg_ops vcpu_message_ops = {
+ .get_size = gs_msg_ops_vcpu_get_size,
+ .fill_info = gs_msg_ops_vcpu_fill_info,
+ .refresh_info = gs_msg_ops_vcpu_refresh_info,
+};
+
+static int kvmhv_nestedv2_host_create(struct kvm_vcpu *vcpu,
+ struct kvmhv_nestedv2_io *io)
+{
+ struct kvmhv_nestedv2_config *cfg;
+ struct kvmppc_gs_buff *gsb, *vcpu_run_output, *vcpu_run_input;
+ unsigned long guest_id, vcpu_id;
+ struct kvmppc_gs_msg *gsm, *vcpu_message, *vcore_message;
+ int rc;
+
+ cfg = &io->cfg;
+ guest_id = vcpu->kvm->arch.lpid;
+ vcpu_id = vcpu->vcpu_id;
+
+ gsm = kvmppc_gsm_new(&config_msg_ops, cfg, KVMPPC_GS_FLAGS_WIDE,
+ GFP_KERNEL);
+ if (!gsm) {
+ rc = -ENOMEM;
+ goto err;
+ }
+
+ gsb = kvmppc_gsb_new(kvmppc_gsm_size(gsm), guest_id, vcpu_id,
+ GFP_KERNEL);
+ if (!gsb) {
+ rc = -ENOMEM;
+ goto free_gsm;
+ }
+
+ rc = kvmppc_gsb_receive_datum(gsb, gsm,
+ KVMPPC_GSID_RUN_OUTPUT_MIN_SIZE);
+ if (rc < 0) {
+ pr_err("KVM-NESTEDv2: couldn't get vcpu run output buffer minimum size\n");
+ goto free_gsb;
+ }
+
+ vcpu_run_output = kvmppc_gsb_new(cfg->vcpu_run_output_size, guest_id,
+ vcpu_id, GFP_KERNEL);
+ if (!vcpu_run_output) {
+ rc = -ENOMEM;
+ goto free_gsb;
+ }
+
+ cfg->vcpu_run_output_cfg.address = kvmppc_gsb_paddress(vcpu_run_output);
+ cfg->vcpu_run_output_cfg.size = kvmppc_gsb_capacity(vcpu_run_output);
+ io->vcpu_run_output = vcpu_run_output;
+
+ gsm->flags = 0;
+ rc = kvmppc_gsb_send_datum(gsb, gsm, KVMPPC_GSID_RUN_OUTPUT);
+ if (rc < 0) {
+ pr_err("KVM-NESTEDv2: couldn't set vcpu run output buffer\n");
+ goto free_gs_out;
+ }
+
+ vcpu_message = kvmppc_gsm_new(&vcpu_message_ops, vcpu, 0, GFP_KERNEL);
+ if (!vcpu_message) {
+ rc = -ENOMEM;
+ goto free_gs_out;
+ }
+ kvmppc_gsm_include_all(vcpu_message);
+
+ io->vcpu_message = vcpu_message;
+
+ vcpu_run_input = kvmppc_gsb_new(kvmppc_gsm_size(vcpu_message), guest_id,
+ vcpu_id, GFP_KERNEL);
+ if (!vcpu_run_input) {
+ rc = -ENOMEM;
+ goto free_vcpu_message;
+ }
+
+ io->vcpu_run_input = vcpu_run_input;
+ cfg->vcpu_run_input_cfg.address = kvmppc_gsb_paddress(vcpu_run_input);
+ cfg->vcpu_run_input_cfg.size = kvmppc_gsb_capacity(vcpu_run_input);
+ rc = kvmppc_gsb_send_datum(gsb, gsm, KVMPPC_GSID_RUN_INPUT);
+ if (rc < 0) {
+ pr_err("KVM-NESTEDv2: couldn't set vcpu run input buffer\n");
+ goto free_vcpu_run_input;
+ }
+
+ vcore_message = kvmppc_gsm_new(&vcpu_message_ops, vcpu,
+ KVMPPC_GS_FLAGS_WIDE, GFP_KERNEL);
+ if (!vcore_message) {
+ rc = -ENOMEM;
+ goto free_vcpu_run_input;
+ }
+
+ kvmppc_gsm_include_all(vcore_message);
+ kvmppc_gsbm_clear(&vcore_message->bitmap, KVMPPC_GSID_LOGICAL_PVR);
+ io->vcore_message = vcore_message;
+
+ kvmppc_gsbm_fill(&io->valids);
+ kvmppc_gsm_free(gsm);
+ kvmppc_gsb_free(gsb);
+ return 0;
+
+free_vcpu_run_input:
+ kvmppc_gsb_free(vcpu_run_input);
+free_vcpu_message:
+ kvmppc_gsm_free(vcpu_message);
+free_gs_out:
+ kvmppc_gsb_free(vcpu_run_output);
+free_gsb:
+ kvmppc_gsb_free(gsb);
+free_gsm:
+ kvmppc_gsm_free(gsm);
+err:
+ return rc;
+}
+
+/**
+ * __kvmhv_nestedv2_mark_dirty() - mark a Guest State ID to be sent to the host
+ * @vcpu: vcpu
+ * @iden: guest state ID
+ *
+ * Mark a guest state ID as having been changed by the L1 host and thus
+ * the new value must be sent to the L0 hypervisor. See kvmhv_nestedv2_flush_vcpu()
+ */
+int __kvmhv_nestedv2_mark_dirty(struct kvm_vcpu *vcpu, u16 iden)
+{
+ struct kvmhv_nestedv2_io *io;
+ struct kvmppc_gs_bitmap *valids;
+ struct kvmppc_gs_msg *gsm;
+
+ if (!iden)
+ return 0;
+
+ io = &vcpu->arch.nestedv2_io;
+ valids = &io->valids;
+ gsm = io->vcpu_message;
+ kvmppc_gsm_include(gsm, iden);
+ gsm = io->vcore_message;
+ kvmppc_gsm_include(gsm, iden);
+ kvmppc_gsbm_set(valids, iden);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(__kvmhv_nestedv2_mark_dirty);
+
+/**
+ * __kvmhv_nestedv2_cached_reload() - reload a Guest State ID from the host
+ * @vcpu: vcpu
+ * @iden: guest state ID
+ *
+ * Reload the value for the guest state ID from the L0 host into the L1 host.
+ * This is cached so that going out to the L0 host only happens if necessary.
+ */
+int __kvmhv_nestedv2_cached_reload(struct kvm_vcpu *vcpu, u16 iden)
+{
+ struct kvmhv_nestedv2_io *io;
+ struct kvmppc_gs_bitmap *valids;
+ struct kvmppc_gs_buff *gsb;
+ struct kvmppc_gs_msg gsm;
+ int rc;
+
+ if (!iden)
+ return 0;
+
+ io = &vcpu->arch.nestedv2_io;
+ valids = &io->valids;
+ if (kvmppc_gsbm_test(valids, iden))
+ return 0;
+
+ gsb = io->vcpu_run_input;
+ kvmppc_gsm_init(&gsm, &vcpu_message_ops, vcpu, kvmppc_gsid_flags(iden));
+ rc = kvmppc_gsb_receive_datum(gsb, &gsm, iden);
+ if (rc < 0) {
+ pr_err("KVM-NESTEDv2: couldn't get GSID: 0x%x\n", iden);
+ return rc;
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(__kvmhv_nestedv2_cached_reload);
+
+/**
+ * kvmhv_nestedv2_flush_vcpu() - send modified Guest State IDs to the host
+ * @vcpu: vcpu
+ * @time_limit: hdec expiry tb
+ *
+ * Send the values marked by __kvmhv_nestedv2_mark_dirty() to the L0 host.
+ * Thread wide values are copied to the H_GUEST_RUN_VCPU input buffer. Guest
+ * wide values need to be sent with H_GUEST_SET first.
+ *
+ * The hdec tb offset is always sent to L0 host.
+ */
+int kvmhv_nestedv2_flush_vcpu(struct kvm_vcpu *vcpu, u64 time_limit)
+{
+ struct kvmhv_nestedv2_io *io;
+ struct kvmppc_gs_buff *gsb;
+ struct kvmppc_gs_msg *gsm;
+ int rc;
+
+ io = &vcpu->arch.nestedv2_io;
+ gsb = io->vcpu_run_input;
+ gsm = io->vcore_message;
+ rc = kvmppc_gsb_send_data(gsb, gsm);
+ if (rc < 0) {
+ pr_err("KVM-NESTEDv2: couldn't set guest wide elements\n");
+ return rc;
+ }
+
+ gsm = io->vcpu_message;
+ kvmppc_gsb_reset(gsb);
+ rc = kvmppc_gsm_fill_info(gsm, gsb);
+ if (rc < 0) {
+ pr_err("KVM-NESTEDv2: couldn't fill vcpu run input buffer\n");
+ return rc;
+ }
+
+ rc = kvmppc_gse_put_u64(gsb, KVMPPC_GSID_HDEC_EXPIRY_TB, time_limit);
+ if (rc < 0)
+ return rc;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kvmhv_nestedv2_flush_vcpu);
+
+/**
+ * kvmhv_nestedv2_set_ptbl_entry() - send partition and process table state to
+ * L0 host
+ * @lpid: guest id
+ * @dw0: partition table double word
+ * @dw1: process table double word
+ */
+int kvmhv_nestedv2_set_ptbl_entry(unsigned long lpid, u64 dw0, u64 dw1)
+{
+ struct kvmppc_gs_part_table patbl;
+ struct kvmppc_gs_proc_table prtbl;
+ struct kvmppc_gs_buff *gsb;
+ size_t size;
+ int rc;
+
+ size = kvmppc_gse_total_size(
+ kvmppc_gsid_size(KVMPPC_GSID_PARTITION_TABLE)) +
+ kvmppc_gse_total_size(
+ kvmppc_gsid_size(KVMPPC_GSID_PROCESS_TABLE)) +
+ sizeof(struct kvmppc_gs_header);
+ gsb = kvmppc_gsb_new(size, lpid, 0, GFP_KERNEL);
+ if (!gsb)
+ return -ENOMEM;
+
+ patbl.address = dw0 & RPDB_MASK;
+ patbl.ea_bits = ((((dw0 & RTS1_MASK) >> (RTS1_SHIFT - 3)) |
+ ((dw0 & RTS2_MASK) >> RTS2_SHIFT)) +
+ 31);
+ patbl.gpd_size = 1ul << ((dw0 & RPDS_MASK) + 3);
+ rc = kvmppc_gse_put_part_table(gsb, KVMPPC_GSID_PARTITION_TABLE, patbl);
+ if (rc < 0)
+ goto free_gsb;
+
+ prtbl.address = dw1 & PRTB_MASK;
+ prtbl.gpd_size = 1ul << ((dw1 & PRTS_MASK) + 12);
+ rc = kvmppc_gse_put_proc_table(gsb, KVMPPC_GSID_PROCESS_TABLE, prtbl);
+ if (rc < 0)
+ goto free_gsb;
+
+ rc = kvmppc_gsb_send(gsb, KVMPPC_GS_FLAGS_WIDE);
+ if (rc < 0) {
+ pr_err("KVM-NESTEDv2: couldn't set the PATE\n");
+ goto free_gsb;
+ }
+
+ kvmppc_gsb_free(gsb);
+ return 0;
+
+free_gsb:
+ kvmppc_gsb_free(gsb);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(kvmhv_nestedv2_set_ptbl_entry);
+
+/**
+ * kvmhv_nestedv2_set_vpa() - register L2 VPA with L0
+ * @vcpu: vcpu
+ * @vpa: L1 logical real address
+ */
+int kvmhv_nestedv2_set_vpa(struct kvm_vcpu *vcpu, unsigned long vpa)
+{
+ struct kvmhv_nestedv2_io *io;
+ struct kvmppc_gs_buff *gsb;
+ int rc = 0;
+
+ io = &vcpu->arch.nestedv2_io;
+ gsb = io->vcpu_run_input;
+
+ kvmppc_gsb_reset(gsb);
+ rc = kvmppc_gse_put_u64(gsb, KVMPPC_GSID_VPA, vpa);
+ if (rc < 0)
+ goto out;
+
+ rc = kvmppc_gsb_send(gsb, 0);
+ if (rc < 0)
+ pr_err("KVM-NESTEDv2: couldn't register the L2 VPA (rc=%d)\n", rc);
+
+out:
+ kvmppc_gsb_reset(gsb);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(kvmhv_nestedv2_set_vpa);
+
+/**
+ * kvmhv_nestedv2_parse_output() - receive values from H_GUEST_RUN_VCPU output
+ * @vcpu: vcpu
+ *
+ * Parse the output buffer from H_GUEST_RUN_VCPU to update vcpu.
+ */
+int kvmhv_nestedv2_parse_output(struct kvm_vcpu *vcpu)
+{
+ struct kvmhv_nestedv2_io *io;
+ struct kvmppc_gs_buff *gsb;
+ struct kvmppc_gs_msg gsm;
+
+ io = &vcpu->arch.nestedv2_io;
+ gsb = io->vcpu_run_output;
+
+ vcpu->arch.fault_dar = 0;
+ vcpu->arch.fault_dsisr = 0;
+ vcpu->arch.fault_gpa = 0;
+ vcpu->arch.emul_inst = KVM_INST_FETCH_FAILED;
+
+ kvmppc_gsm_init(&gsm, &vcpu_message_ops, vcpu, 0);
+ return kvmppc_gsm_refresh_info(&gsm, gsb);
+}
+EXPORT_SYMBOL_GPL(kvmhv_nestedv2_parse_output);
+
+static void kvmhv_nestedv2_host_free(struct kvm_vcpu *vcpu,
+ struct kvmhv_nestedv2_io *io)
+{
+ kvmppc_gsm_free(io->vcpu_message);
+ kvmppc_gsm_free(io->vcore_message);
+ kvmppc_gsb_free(io->vcpu_run_input);
+ kvmppc_gsb_free(io->vcpu_run_output);
+}
+
+int __kvmhv_nestedv2_reload_ptregs(struct kvm_vcpu *vcpu, struct pt_regs *regs)
+{
+ struct kvmhv_nestedv2_io *io;
+ struct kvmppc_gs_bitmap *valids;
+ struct kvmppc_gs_buff *gsb;
+ struct kvmppc_gs_msg gsm;
+ int rc = 0;
+
+
+ io = &vcpu->arch.nestedv2_io;
+ valids = &io->valids;
+
+ gsb = io->vcpu_run_input;
+ kvmppc_gsm_init(&gsm, &vcpu_message_ops, vcpu, 0);
+
+ for (int i = 0; i < 32; i++) {
+ if (!kvmppc_gsbm_test(valids, KVMPPC_GSID_GPR(i)))
+ kvmppc_gsm_include(&gsm, KVMPPC_GSID_GPR(i));
+ }
+
+ if (!kvmppc_gsbm_test(valids, KVMPPC_GSID_CR))
+ kvmppc_gsm_include(&gsm, KVMPPC_GSID_CR);
+
+ if (!kvmppc_gsbm_test(valids, KVMPPC_GSID_XER))
+ kvmppc_gsm_include(&gsm, KVMPPC_GSID_XER);
+
+ if (!kvmppc_gsbm_test(valids, KVMPPC_GSID_CTR))
+ kvmppc_gsm_include(&gsm, KVMPPC_GSID_CTR);
+
+ if (!kvmppc_gsbm_test(valids, KVMPPC_GSID_LR))
+ kvmppc_gsm_include(&gsm, KVMPPC_GSID_LR);
+
+ if (!kvmppc_gsbm_test(valids, KVMPPC_GSID_NIA))
+ kvmppc_gsm_include(&gsm, KVMPPC_GSID_NIA);
+
+ rc = kvmppc_gsb_receive_data(gsb, &gsm);
+ if (rc < 0)
+ pr_err("KVM-NESTEDv2: couldn't reload ptregs\n");
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(__kvmhv_nestedv2_reload_ptregs);
+
+int __kvmhv_nestedv2_mark_dirty_ptregs(struct kvm_vcpu *vcpu,
+ struct pt_regs *regs)
+{
+ for (int i = 0; i < 32; i++)
+ kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_GPR(i));
+
+ kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_CR);
+ kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_XER);
+ kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_CTR);
+ kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_LR);
+ kvmhv_nestedv2_mark_dirty(vcpu, KVMPPC_GSID_NIA);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(__kvmhv_nestedv2_mark_dirty_ptregs);
+
+/**
+ * kvmhv_nestedv2_vcpu_create() - create nested vcpu for the NESTEDv2 API
+ * @vcpu: vcpu
+ * @io: NESTEDv2 nested io state
+ *
+ * Parse the output buffer from H_GUEST_RUN_VCPU to update vcpu.
+ */
+int kvmhv_nestedv2_vcpu_create(struct kvm_vcpu *vcpu,
+ struct kvmhv_nestedv2_io *io)
+{
+ long rc;
+
+ rc = plpar_guest_create_vcpu(0, vcpu->kvm->arch.lpid, vcpu->vcpu_id);
+
+ if (rc != H_SUCCESS) {
+ pr_err("KVM: Create Guest vcpu hcall failed, rc=%ld\n", rc);
+ switch (rc) {
+ case H_NOT_ENOUGH_RESOURCES:
+ case H_ABORTED:
+ return -ENOMEM;
+ case H_AUTHORITY:
+ return -EPERM;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ rc = kvmhv_nestedv2_host_create(vcpu, io);
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(kvmhv_nestedv2_vcpu_create);
+
+/**
+ * kvmhv_nestedv2_vcpu_free() - free the NESTEDv2 state
+ * @vcpu: vcpu
+ * @io: NESTEDv2 nested io state
+ */
+void kvmhv_nestedv2_vcpu_free(struct kvm_vcpu *vcpu,
+ struct kvmhv_nestedv2_io *io)
+{
+ kvmhv_nestedv2_host_free(vcpu, io);
+}
+EXPORT_SYMBOL_GPL(kvmhv_nestedv2_vcpu_free);
diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c
new file mode 100644
index 000000000000..34bc0a8a1288
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c
@@ -0,0 +1,930 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <asm/asm-prototypes.h>
+#include <asm/dbell.h>
+#include <asm/ppc-opcode.h>
+
+#include "book3s_hv.h"
+
+static void load_spr_state(struct kvm_vcpu *vcpu,
+ struct p9_host_os_sprs *host_os_sprs)
+{
+ /* TAR is very fast */
+ mtspr(SPRN_TAR, vcpu->arch.tar);
+
+#ifdef CONFIG_ALTIVEC
+ if (cpu_has_feature(CPU_FTR_ALTIVEC) &&
+ current->thread.vrsave != vcpu->arch.vrsave)
+ mtspr(SPRN_VRSAVE, vcpu->arch.vrsave);
+#endif
+
+ if (vcpu->arch.hfscr & HFSCR_EBB) {
+ if (current->thread.ebbhr != vcpu->arch.ebbhr)
+ mtspr(SPRN_EBBHR, vcpu->arch.ebbhr);
+ if (current->thread.ebbrr != vcpu->arch.ebbrr)
+ mtspr(SPRN_EBBRR, vcpu->arch.ebbrr);
+ if (current->thread.bescr != vcpu->arch.bescr)
+ mtspr(SPRN_BESCR, vcpu->arch.bescr);
+ }
+
+ if (cpu_has_feature(CPU_FTR_P9_TIDR) &&
+ current->thread.tidr != vcpu->arch.tid)
+ mtspr(SPRN_TIDR, vcpu->arch.tid);
+ if (host_os_sprs->iamr != vcpu->arch.iamr)
+ mtspr(SPRN_IAMR, vcpu->arch.iamr);
+ if (host_os_sprs->amr != vcpu->arch.amr)
+ mtspr(SPRN_AMR, vcpu->arch.amr);
+ if (vcpu->arch.uamor != 0)
+ mtspr(SPRN_UAMOR, vcpu->arch.uamor);
+ if (current->thread.fscr != vcpu->arch.fscr)
+ mtspr(SPRN_FSCR, vcpu->arch.fscr);
+ if (current->thread.dscr != vcpu->arch.dscr)
+ mtspr(SPRN_DSCR, vcpu->arch.dscr);
+ if (vcpu->arch.pspb != 0)
+ mtspr(SPRN_PSPB, vcpu->arch.pspb);
+
+ /*
+ * DAR, DSISR, and for nested HV, SPRGs must be set with MSR[RI]
+ * clear (or hstate set appropriately to catch those registers
+ * being clobbered if we take a MCE or SRESET), so those are done
+ * later.
+ */
+
+ if (!(vcpu->arch.ctrl & 1))
+ mtspr(SPRN_CTRLT, 0);
+}
+
+static void store_spr_state(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.tar = mfspr(SPRN_TAR);
+
+#ifdef CONFIG_ALTIVEC
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ vcpu->arch.vrsave = mfspr(SPRN_VRSAVE);
+#endif
+
+ if (vcpu->arch.hfscr & HFSCR_EBB) {
+ vcpu->arch.ebbhr = mfspr(SPRN_EBBHR);
+ vcpu->arch.ebbrr = mfspr(SPRN_EBBRR);
+ vcpu->arch.bescr = mfspr(SPRN_BESCR);
+ }
+
+ if (cpu_has_feature(CPU_FTR_P9_TIDR))
+ vcpu->arch.tid = mfspr(SPRN_TIDR);
+ vcpu->arch.iamr = mfspr(SPRN_IAMR);
+ vcpu->arch.amr = mfspr(SPRN_AMR);
+ vcpu->arch.uamor = mfspr(SPRN_UAMOR);
+ vcpu->arch.fscr = mfspr(SPRN_FSCR);
+ vcpu->arch.dscr = mfspr(SPRN_DSCR);
+ vcpu->arch.pspb = mfspr(SPRN_PSPB);
+
+ vcpu->arch.ctrl = mfspr(SPRN_CTRLF);
+}
+
+/* Returns true if current MSR and/or guest MSR may have changed */
+bool load_vcpu_state(struct kvm_vcpu *vcpu,
+ struct p9_host_os_sprs *host_os_sprs)
+{
+ bool ret = false;
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ if (cpu_has_feature(CPU_FTR_TM) ||
+ cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) {
+ unsigned long guest_msr = vcpu->arch.shregs.msr;
+ if (MSR_TM_ACTIVE(guest_msr)) {
+ kvmppc_restore_tm_hv(vcpu, guest_msr, true);
+ ret = true;
+ } else if (vcpu->arch.hfscr & HFSCR_TM) {
+ mtspr(SPRN_TEXASR, vcpu->arch.texasr);
+ mtspr(SPRN_TFHAR, vcpu->arch.tfhar);
+ mtspr(SPRN_TFIAR, vcpu->arch.tfiar);
+ }
+ }
+#endif
+
+ load_spr_state(vcpu, host_os_sprs);
+
+ load_fp_state(&vcpu->arch.fp);
+#ifdef CONFIG_ALTIVEC
+ load_vr_state(&vcpu->arch.vr);
+#endif
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(load_vcpu_state);
+
+void store_vcpu_state(struct kvm_vcpu *vcpu)
+{
+ store_spr_state(vcpu);
+
+ store_fp_state(&vcpu->arch.fp);
+#ifdef CONFIG_ALTIVEC
+ store_vr_state(&vcpu->arch.vr);
+#endif
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ if (cpu_has_feature(CPU_FTR_TM) ||
+ cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) {
+ unsigned long guest_msr = vcpu->arch.shregs.msr;
+ if (MSR_TM_ACTIVE(guest_msr)) {
+ kvmppc_save_tm_hv(vcpu, guest_msr, true);
+ } else if (vcpu->arch.hfscr & HFSCR_TM) {
+ vcpu->arch.texasr = mfspr(SPRN_TEXASR);
+ vcpu->arch.tfhar = mfspr(SPRN_TFHAR);
+ vcpu->arch.tfiar = mfspr(SPRN_TFIAR);
+
+ if (!vcpu->arch.nested) {
+ vcpu->arch.load_tm++; /* see load_ebb comment */
+ if (!vcpu->arch.load_tm)
+ vcpu->arch.hfscr &= ~HFSCR_TM;
+ }
+ }
+ }
+#endif
+}
+EXPORT_SYMBOL_GPL(store_vcpu_state);
+
+void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs)
+{
+ host_os_sprs->iamr = mfspr(SPRN_IAMR);
+ host_os_sprs->amr = mfspr(SPRN_AMR);
+}
+EXPORT_SYMBOL_GPL(save_p9_host_os_sprs);
+
+/* vcpu guest regs must already be saved */
+void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu,
+ struct p9_host_os_sprs *host_os_sprs)
+{
+ /*
+ * current->thread.xxx registers must all be restored to host
+ * values before a potential context switch, otherwise the context
+ * switch itself will overwrite current->thread.xxx with the values
+ * from the guest SPRs.
+ */
+
+ mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso);
+
+ if (cpu_has_feature(CPU_FTR_P9_TIDR) &&
+ current->thread.tidr != vcpu->arch.tid)
+ mtspr(SPRN_TIDR, current->thread.tidr);
+ if (host_os_sprs->iamr != vcpu->arch.iamr)
+ mtspr(SPRN_IAMR, host_os_sprs->iamr);
+ if (vcpu->arch.uamor != 0)
+ mtspr(SPRN_UAMOR, 0);
+ if (host_os_sprs->amr != vcpu->arch.amr)
+ mtspr(SPRN_AMR, host_os_sprs->amr);
+ if (current->thread.fscr != vcpu->arch.fscr)
+ mtspr(SPRN_FSCR, current->thread.fscr);
+ if (current->thread.dscr != vcpu->arch.dscr)
+ mtspr(SPRN_DSCR, current->thread.dscr);
+ if (vcpu->arch.pspb != 0)
+ mtspr(SPRN_PSPB, 0);
+
+ /* Save guest CTRL register, set runlatch to 1 */
+ if (!(vcpu->arch.ctrl & 1))
+ mtspr(SPRN_CTRLT, 1);
+
+#ifdef CONFIG_ALTIVEC
+ if (cpu_has_feature(CPU_FTR_ALTIVEC) &&
+ vcpu->arch.vrsave != current->thread.vrsave)
+ mtspr(SPRN_VRSAVE, current->thread.vrsave);
+#endif
+ if (vcpu->arch.hfscr & HFSCR_EBB) {
+ if (vcpu->arch.bescr != current->thread.bescr)
+ mtspr(SPRN_BESCR, current->thread.bescr);
+ if (vcpu->arch.ebbhr != current->thread.ebbhr)
+ mtspr(SPRN_EBBHR, current->thread.ebbhr);
+ if (vcpu->arch.ebbrr != current->thread.ebbrr)
+ mtspr(SPRN_EBBRR, current->thread.ebbrr);
+
+ if (!vcpu->arch.nested) {
+ /*
+ * This is like load_fp in context switching, turn off
+ * the facility after it wraps the u8 to try avoiding
+ * saving and restoring the registers each partition
+ * switch.
+ */
+ vcpu->arch.load_ebb++;
+ if (!vcpu->arch.load_ebb)
+ vcpu->arch.hfscr &= ~HFSCR_EBB;
+ }
+ }
+
+ if (vcpu->arch.tar != current->thread.tar)
+ mtspr(SPRN_TAR, current->thread.tar);
+}
+EXPORT_SYMBOL_GPL(restore_p9_host_os_sprs);
+
+#ifdef CONFIG_KVM_BOOK3S_HV_P9_TIMING
+void accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next)
+{
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+ struct kvmhv_tb_accumulator *curr;
+ u64 tb = mftb() - vc->tb_offset_applied;
+ u64 prev_tb;
+ u64 delta;
+ u64 seq;
+
+ curr = vcpu->arch.cur_activity;
+ vcpu->arch.cur_activity = next;
+ prev_tb = vcpu->arch.cur_tb_start;
+ vcpu->arch.cur_tb_start = tb;
+
+ if (!curr)
+ return;
+
+ delta = tb - prev_tb;
+
+ seq = curr->seqcount;
+ curr->seqcount = seq + 1;
+ smp_wmb();
+ curr->tb_total += delta;
+ if (seq == 0 || delta < curr->tb_min)
+ curr->tb_min = delta;
+ if (delta > curr->tb_max)
+ curr->tb_max = delta;
+ smp_wmb();
+ curr->seqcount = seq + 2;
+}
+EXPORT_SYMBOL_GPL(accumulate_time);
+#endif
+
+static inline u64 mfslbv(unsigned int idx)
+{
+ u64 slbev;
+
+ asm volatile("slbmfev %0,%1" : "=r" (slbev) : "r" (idx));
+
+ return slbev;
+}
+
+static inline u64 mfslbe(unsigned int idx)
+{
+ u64 slbee;
+
+ asm volatile("slbmfee %0,%1" : "=r" (slbee) : "r" (idx));
+
+ return slbee;
+}
+
+static inline void mtslb(u64 slbee, u64 slbev)
+{
+ asm volatile("slbmte %0,%1" :: "r" (slbev), "r" (slbee));
+}
+
+static inline void clear_slb_entry(unsigned int idx)
+{
+ mtslb(idx, 0);
+}
+
+static inline void slb_clear_invalidate_partition(void)
+{
+ clear_slb_entry(0);
+ asm volatile(PPC_SLBIA(6));
+}
+
+/*
+ * Malicious or buggy radix guests may have inserted SLB entries
+ * (only 0..3 because radix always runs with UPRT=1), so these must
+ * be cleared here to avoid side-channels. slbmte is used rather
+ * than slbia, as it won't clear cached translations.
+ */
+static void radix_clear_slb(void)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ clear_slb_entry(i);
+}
+
+static void switch_mmu_to_guest_radix(struct kvm *kvm, struct kvm_vcpu *vcpu, u64 lpcr)
+{
+ struct kvm_nested_guest *nested = vcpu->arch.nested;
+ u32 lpid;
+ u32 pid;
+
+ lpid = nested ? nested->shadow_lpid : kvm->arch.lpid;
+ pid = kvmppc_get_pid(vcpu);
+
+ /*
+ * Prior memory accesses to host PID Q3 must be completed before we
+ * start switching, and stores must be drained to avoid not-my-LPAR
+ * logic (see switch_mmu_to_host).
+ */
+ asm volatile("hwsync" ::: "memory");
+ isync();
+ mtspr(SPRN_LPID, lpid);
+ mtspr(SPRN_LPCR, lpcr);
+ mtspr(SPRN_PID, pid);
+ /*
+ * isync not required here because we are HRFID'ing to guest before
+ * any guest context access, which is context synchronising.
+ */
+}
+
+static void switch_mmu_to_guest_hpt(struct kvm *kvm, struct kvm_vcpu *vcpu, u64 lpcr)
+{
+ u32 lpid;
+ u32 pid;
+ int i;
+
+ lpid = kvm->arch.lpid;
+ pid = kvmppc_get_pid(vcpu);
+
+ /*
+ * See switch_mmu_to_guest_radix. ptesync should not be required here
+ * even if the host is in HPT mode because speculative accesses would
+ * not cause RC updates (we are in real mode).
+ */
+ asm volatile("hwsync" ::: "memory");
+ isync();
+ mtspr(SPRN_LPID, lpid);
+ mtspr(SPRN_LPCR, lpcr);
+ mtspr(SPRN_PID, pid);
+
+ for (i = 0; i < vcpu->arch.slb_max; i++)
+ mtslb(vcpu->arch.slb[i].orige, vcpu->arch.slb[i].origv);
+ /*
+ * isync not required here, see switch_mmu_to_guest_radix.
+ */
+}
+
+static void switch_mmu_to_host(struct kvm *kvm, u32 pid)
+{
+ u32 lpid = kvm->arch.host_lpid;
+ u64 lpcr = kvm->arch.host_lpcr;
+
+ /*
+ * The guest has exited, so guest MMU context is no longer being
+ * non-speculatively accessed, but a hwsync is needed before the
+ * mtLPIDR / mtPIDR switch, in order to ensure all stores are drained,
+ * so the not-my-LPAR tlbie logic does not overlook them.
+ */
+ asm volatile("hwsync" ::: "memory");
+ isync();
+ mtspr(SPRN_PID, pid);
+ mtspr(SPRN_LPID, lpid);
+ mtspr(SPRN_LPCR, lpcr);
+ /*
+ * isync is not required after the switch, because mtmsrd with L=0
+ * is performed after this switch, which is context synchronising.
+ */
+
+ if (!radix_enabled())
+ slb_restore_bolted_realmode();
+}
+
+static void save_clear_host_mmu(struct kvm *kvm)
+{
+ if (!radix_enabled()) {
+ /*
+ * Hash host could save and restore host SLB entries to
+ * reduce SLB fault overheads of VM exits, but for now the
+ * existing code clears all entries and restores just the
+ * bolted ones when switching back to host.
+ */
+ slb_clear_invalidate_partition();
+ }
+}
+
+static void save_clear_guest_mmu(struct kvm *kvm, struct kvm_vcpu *vcpu)
+{
+ if (kvm_is_radix(kvm)) {
+ radix_clear_slb();
+ } else {
+ int i;
+ int nr = 0;
+
+ /*
+ * This must run before switching to host (radix host can't
+ * access all SLBs).
+ */
+ for (i = 0; i < vcpu->arch.slb_nr; i++) {
+ u64 slbee, slbev;
+
+ slbee = mfslbe(i);
+ if (slbee & SLB_ESID_V) {
+ slbev = mfslbv(i);
+ vcpu->arch.slb[nr].orige = slbee | i;
+ vcpu->arch.slb[nr].origv = slbev;
+ nr++;
+ }
+ }
+ vcpu->arch.slb_max = nr;
+ slb_clear_invalidate_partition();
+ }
+}
+
+static void flush_guest_tlb(struct kvm *kvm)
+{
+ unsigned long rb, set;
+
+ rb = PPC_BIT(52); /* IS = 2 */
+ if (kvm_is_radix(kvm)) {
+ /* R=1 PRS=1 RIC=2 */
+ asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
+ : : "r" (rb), "i" (1), "i" (1), "i" (2),
+ "r" (0) : "memory");
+ for (set = 1; set < kvm->arch.tlb_sets; ++set) {
+ rb += PPC_BIT(51); /* increment set number */
+ /* R=1 PRS=1 RIC=0 */
+ asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
+ : : "r" (rb), "i" (1), "i" (1), "i" (0),
+ "r" (0) : "memory");
+ }
+ asm volatile("ptesync": : :"memory");
+ // POWER9 congruence-class TLBIEL leaves ERAT. Flush it now.
+ asm volatile(PPC_RADIX_INVALIDATE_ERAT_GUEST : : :"memory");
+ } else {
+ for (set = 0; set < kvm->arch.tlb_sets; ++set) {
+ /* R=0 PRS=0 RIC=0 */
+ asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
+ : : "r" (rb), "i" (0), "i" (0), "i" (0),
+ "r" (0) : "memory");
+ rb += PPC_BIT(51); /* increment set number */
+ }
+ asm volatile("ptesync": : :"memory");
+ // POWER9 congruence-class TLBIEL leaves ERAT. Flush it now.
+ asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory");
+ }
+}
+
+static void check_need_tlb_flush(struct kvm *kvm, int pcpu,
+ struct kvm_nested_guest *nested)
+{
+ cpumask_t *need_tlb_flush;
+ bool all_set = true;
+ int i;
+
+ if (nested)
+ need_tlb_flush = &nested->need_tlb_flush;
+ else
+ need_tlb_flush = &kvm->arch.need_tlb_flush;
+
+ if (likely(!cpumask_test_cpu(pcpu, need_tlb_flush)))
+ return;
+
+ /*
+ * Individual threads can come in here, but the TLB is shared between
+ * the 4 threads in a core, hence invalidating on one thread
+ * invalidates for all, so only invalidate the first time (if all bits
+ * were set. The others must still execute a ptesync.
+ *
+ * If a race occurs and two threads do the TLB flush, that is not a
+ * problem, just sub-optimal.
+ */
+ for (i = cpu_first_tlb_thread_sibling(pcpu);
+ i <= cpu_last_tlb_thread_sibling(pcpu);
+ i += cpu_tlb_thread_sibling_step()) {
+ if (!cpumask_test_cpu(i, need_tlb_flush)) {
+ all_set = false;
+ break;
+ }
+ }
+ if (all_set)
+ flush_guest_tlb(kvm);
+ else
+ asm volatile("ptesync" ::: "memory");
+
+ /* Clear the bit after the TLB flush */
+ cpumask_clear_cpu(pcpu, need_tlb_flush);
+}
+
+unsigned long kvmppc_msr_hard_disable_set_facilities(struct kvm_vcpu *vcpu, unsigned long msr)
+{
+ unsigned long msr_needed = 0;
+
+ msr &= ~MSR_EE;
+
+ /* MSR bits may have been cleared by context switch so must recheck */
+ if (IS_ENABLED(CONFIG_PPC_FPU))
+ msr_needed |= MSR_FP;
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ msr_needed |= MSR_VEC;
+ if (cpu_has_feature(CPU_FTR_VSX))
+ msr_needed |= MSR_VSX;
+ if ((cpu_has_feature(CPU_FTR_TM) ||
+ cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) &&
+ (vcpu->arch.hfscr & HFSCR_TM))
+ msr_needed |= MSR_TM;
+
+ /*
+ * This could be combined with MSR[RI] clearing, but that expands
+ * the unrecoverable window. It would be better to cover unrecoverable
+ * with KVM bad interrupt handling rather than use MSR[RI] at all.
+ *
+ * Much more difficult and less worthwhile to combine with IR/DR
+ * disable.
+ */
+ if ((msr & msr_needed) != msr_needed) {
+ msr |= msr_needed;
+ __mtmsrd(msr, 0);
+ } else {
+ __hard_irq_disable();
+ }
+ local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
+
+ return msr;
+}
+EXPORT_SYMBOL_GPL(kvmppc_msr_hard_disable_set_facilities);
+
+int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr, u64 *tb)
+{
+ struct p9_host_os_sprs host_os_sprs;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_nested_guest *nested = vcpu->arch.nested;
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+ s64 hdec, dec;
+ u64 purr, spurr;
+ u64 *exsave;
+ int trap;
+ unsigned long msr;
+ unsigned long host_hfscr;
+ unsigned long host_ciabr;
+ unsigned long host_dawr0;
+ unsigned long host_dawrx0;
+ unsigned long host_psscr;
+ unsigned long host_hpsscr;
+ unsigned long host_pidr;
+ unsigned long host_dawr1;
+ unsigned long host_dawrx1;
+ unsigned long dpdes;
+
+ hdec = time_limit - *tb;
+ if (hdec < 0)
+ return BOOK3S_INTERRUPT_HV_DECREMENTER;
+
+ WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_HV);
+ WARN_ON_ONCE(!(vcpu->arch.shregs.msr & MSR_ME));
+
+ vcpu->arch.ceded = 0;
+
+ /* Save MSR for restore, with EE clear. */
+ msr = mfmsr() & ~MSR_EE;
+
+ host_hfscr = mfspr(SPRN_HFSCR);
+ host_ciabr = mfspr(SPRN_CIABR);
+ host_psscr = mfspr(SPRN_PSSCR_PR);
+ if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
+ host_hpsscr = mfspr(SPRN_PSSCR);
+ host_pidr = mfspr(SPRN_PID);
+
+ if (dawr_enabled()) {
+ host_dawr0 = mfspr(SPRN_DAWR0);
+ host_dawrx0 = mfspr(SPRN_DAWRX0);
+ if (cpu_has_feature(CPU_FTR_DAWR1)) {
+ host_dawr1 = mfspr(SPRN_DAWR1);
+ host_dawrx1 = mfspr(SPRN_DAWRX1);
+ }
+ }
+
+ local_paca->kvm_hstate.host_purr = mfspr(SPRN_PURR);
+ local_paca->kvm_hstate.host_spurr = mfspr(SPRN_SPURR);
+
+ save_p9_host_os_sprs(&host_os_sprs);
+
+ msr = kvmppc_msr_hard_disable_set_facilities(vcpu, msr);
+ if (lazy_irq_pending()) {
+ trap = 0;
+ goto out;
+ }
+
+ if (unlikely(load_vcpu_state(vcpu, &host_os_sprs)))
+ msr = mfmsr(); /* MSR may have been updated */
+
+ if (vc->tb_offset) {
+ u64 new_tb = *tb + vc->tb_offset;
+ mtspr(SPRN_TBU40, new_tb);
+ if ((mftb() & 0xffffff) < (new_tb & 0xffffff)) {
+ new_tb += 0x1000000;
+ mtspr(SPRN_TBU40, new_tb);
+ }
+ *tb = new_tb;
+ vc->tb_offset_applied = vc->tb_offset;
+ }
+
+ mtspr(SPRN_VTB, vc->vtb);
+ mtspr(SPRN_PURR, vcpu->arch.purr);
+ mtspr(SPRN_SPURR, vcpu->arch.spurr);
+
+ if (vc->pcr)
+ mtspr(SPRN_PCR, vc->pcr | PCR_MASK);
+ if (vcpu->arch.doorbell_request) {
+ vcpu->arch.doorbell_request = 0;
+ mtspr(SPRN_DPDES, 1);
+ }
+
+ if (dawr_enabled()) {
+ if (vcpu->arch.dawr0 != host_dawr0)
+ mtspr(SPRN_DAWR0, vcpu->arch.dawr0);
+ if (vcpu->arch.dawrx0 != host_dawrx0)
+ mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0);
+ if (cpu_has_feature(CPU_FTR_DAWR1)) {
+ if (vcpu->arch.dawr1 != host_dawr1)
+ mtspr(SPRN_DAWR1, vcpu->arch.dawr1);
+ if (vcpu->arch.dawrx1 != host_dawrx1)
+ mtspr(SPRN_DAWRX1, vcpu->arch.dawrx1);
+ }
+ }
+ if (vcpu->arch.ciabr != host_ciabr)
+ mtspr(SPRN_CIABR, vcpu->arch.ciabr);
+
+
+ if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) {
+ mtspr(SPRN_PSSCR, vcpu->arch.psscr | PSSCR_EC |
+ (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
+ } else {
+ if (vcpu->arch.psscr != host_psscr)
+ mtspr(SPRN_PSSCR_PR, vcpu->arch.psscr);
+ }
+
+ mtspr(SPRN_HFSCR, vcpu->arch.hfscr);
+
+ mtspr(SPRN_HSRR0, vcpu->arch.regs.nip);
+ mtspr(SPRN_HSRR1, (vcpu->arch.shregs.msr & ~MSR_HV) | MSR_ME);
+
+ /*
+ * On POWER9 DD2.1 and below, sometimes on a Hypervisor Data Storage
+ * Interrupt (HDSI) the HDSISR is not be updated at all.
+ *
+ * To work around this we put a canary value into the HDSISR before
+ * returning to a guest and then check for this canary when we take a
+ * HDSI. If we find the canary on a HDSI, we know the hardware didn't
+ * update the HDSISR. In this case we return to the guest to retake the
+ * HDSI which should correctly update the HDSISR the second time HDSI
+ * entry.
+ *
+ * The "radix prefetch bug" test can be used to test for this bug, as
+ * it also exists fo DD2.1 and below.
+ */
+ if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
+ mtspr(SPRN_HDSISR, HDSISR_CANARY);
+
+ mtspr(SPRN_SPRG0, vcpu->arch.shregs.sprg0);
+ mtspr(SPRN_SPRG1, vcpu->arch.shregs.sprg1);
+ mtspr(SPRN_SPRG2, vcpu->arch.shregs.sprg2);
+ mtspr(SPRN_SPRG3, vcpu->arch.shregs.sprg3);
+
+ /*
+ * It might be preferable to load_vcpu_state here, in order to get the
+ * GPR/FP register loads executing in parallel with the previous mtSPR
+ * instructions, but for now that can't be done because the TM handling
+ * in load_vcpu_state can change some SPRs and vcpu state (nip, msr).
+ * But TM could be split out if this would be a significant benefit.
+ */
+
+ /*
+ * MSR[RI] does not need to be cleared (and is not, for radix guests
+ * with no prefetch bug), because in_guest is set. If we take a SRESET
+ * or MCE with in_guest set but still in HV mode, then
+ * kvmppc_p9_bad_interrupt handles the interrupt, which effectively
+ * clears MSR[RI] and doesn't return.
+ */
+ WRITE_ONCE(local_paca->kvm_hstate.in_guest, KVM_GUEST_MODE_HV_P9);
+ barrier(); /* Open in_guest critical section */
+
+ /*
+ * Hash host, hash guest, or radix guest with prefetch bug, all have
+ * to disable the MMU before switching to guest MMU state.
+ */
+ if (!radix_enabled() || !kvm_is_radix(kvm) ||
+ cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
+ __mtmsrd(msr & ~(MSR_IR|MSR_DR|MSR_RI), 0);
+
+ save_clear_host_mmu(kvm);
+
+ if (kvm_is_radix(kvm))
+ switch_mmu_to_guest_radix(kvm, vcpu, lpcr);
+ else
+ switch_mmu_to_guest_hpt(kvm, vcpu, lpcr);
+
+ /* TLBIEL uses LPID=LPIDR, so run this after setting guest LPID */
+ check_need_tlb_flush(kvm, vc->pcpu, nested);
+
+ /*
+ * P9 suppresses the HDEC exception when LPCR[HDICE] = 0,
+ * so set guest LPCR (with HDICE) before writing HDEC.
+ */
+ mtspr(SPRN_HDEC, hdec);
+
+ mtspr(SPRN_DEC, vcpu->arch.dec_expires - *tb);
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+tm_return_to_guest:
+#endif
+ mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
+ mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
+ mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0);
+ mtspr(SPRN_SRR1, vcpu->arch.shregs.srr1);
+
+ switch_pmu_to_guest(vcpu, &host_os_sprs);
+ accumulate_time(vcpu, &vcpu->arch.in_guest);
+
+ kvmppc_p9_enter_guest(vcpu);
+
+ accumulate_time(vcpu, &vcpu->arch.guest_exit);
+ switch_pmu_to_host(vcpu, &host_os_sprs);
+
+ /* XXX: Could get these from r11/12 and paca exsave instead */
+ vcpu->arch.shregs.srr0 = mfspr(SPRN_SRR0);
+ vcpu->arch.shregs.srr1 = mfspr(SPRN_SRR1);
+ vcpu->arch.shregs.dar = mfspr(SPRN_DAR);
+ vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR);
+
+ /* 0x2 bit for HSRR is only used by PR and P7/8 HV paths, clear it */
+ trap = local_paca->kvm_hstate.scratch0 & ~0x2;
+
+ if (likely(trap > BOOK3S_INTERRUPT_MACHINE_CHECK))
+ exsave = local_paca->exgen;
+ else if (trap == BOOK3S_INTERRUPT_SYSTEM_RESET)
+ exsave = local_paca->exnmi;
+ else /* trap == 0x200 */
+ exsave = local_paca->exmc;
+
+ vcpu->arch.regs.gpr[1] = local_paca->kvm_hstate.scratch1;
+ vcpu->arch.regs.gpr[3] = local_paca->kvm_hstate.scratch2;
+
+ /*
+ * After reading machine check regs (DAR, DSISR, SRR0/1) and hstate
+ * scratch (which we need to move into exsave to make re-entrant vs
+ * SRESET/MCE), register state is protected from reentrancy. However
+ * timebase, MMU, among other state is still set to guest, so don't
+ * enable MSR[RI] here. It gets enabled at the end, after in_guest
+ * is cleared.
+ *
+ * It is possible an NMI could come in here, which is why it is
+ * important to save the above state early so it can be debugged.
+ */
+
+ vcpu->arch.regs.gpr[9] = exsave[EX_R9/sizeof(u64)];
+ vcpu->arch.regs.gpr[10] = exsave[EX_R10/sizeof(u64)];
+ vcpu->arch.regs.gpr[11] = exsave[EX_R11/sizeof(u64)];
+ vcpu->arch.regs.gpr[12] = exsave[EX_R12/sizeof(u64)];
+ vcpu->arch.regs.gpr[13] = exsave[EX_R13/sizeof(u64)];
+ vcpu->arch.ppr = exsave[EX_PPR/sizeof(u64)];
+ vcpu->arch.cfar = exsave[EX_CFAR/sizeof(u64)];
+ vcpu->arch.regs.ctr = exsave[EX_CTR/sizeof(u64)];
+
+ vcpu->arch.last_inst = KVM_INST_FETCH_FAILED;
+
+ if (unlikely(trap == BOOK3S_INTERRUPT_MACHINE_CHECK)) {
+ vcpu->arch.fault_dar = exsave[EX_DAR/sizeof(u64)];
+ vcpu->arch.fault_dsisr = exsave[EX_DSISR/sizeof(u64)];
+ kvmppc_realmode_machine_check(vcpu);
+
+ } else if (unlikely(trap == BOOK3S_INTERRUPT_HMI)) {
+ kvmppc_p9_realmode_hmi_handler(vcpu);
+
+ } else if (trap == BOOK3S_INTERRUPT_H_EMUL_ASSIST) {
+ vcpu->arch.emul_inst = mfspr(SPRN_HEIR);
+
+ } else if (trap == BOOK3S_INTERRUPT_H_DATA_STORAGE) {
+ vcpu->arch.fault_dar = exsave[EX_DAR/sizeof(u64)];
+ vcpu->arch.fault_dsisr = exsave[EX_DSISR/sizeof(u64)];
+ vcpu->arch.fault_gpa = mfspr(SPRN_ASDR);
+
+ } else if (trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
+ vcpu->arch.fault_gpa = mfspr(SPRN_ASDR);
+
+ } else if (trap == BOOK3S_INTERRUPT_H_FAC_UNAVAIL) {
+ vcpu->arch.hfscr = mfspr(SPRN_HFSCR);
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ /*
+ * Softpatch interrupt for transactional memory emulation cases
+ * on POWER9 DD2.2. This is early in the guest exit path - we
+ * haven't saved registers or done a treclaim yet.
+ */
+ } else if (trap == BOOK3S_INTERRUPT_HV_SOFTPATCH) {
+ vcpu->arch.emul_inst = mfspr(SPRN_HEIR);
+
+ /*
+ * The cases we want to handle here are those where the guest
+ * is in real suspend mode and is trying to transition to
+ * transactional mode.
+ */
+ if (!local_paca->kvm_hstate.fake_suspend &&
+ (vcpu->arch.shregs.msr & MSR_TS_S)) {
+ if (kvmhv_p9_tm_emulation_early(vcpu)) {
+ /*
+ * Go straight back into the guest with the
+ * new NIP/MSR as set by TM emulation.
+ */
+ mtspr(SPRN_HSRR0, vcpu->arch.regs.nip);
+ mtspr(SPRN_HSRR1, vcpu->arch.shregs.msr);
+ goto tm_return_to_guest;
+ }
+ }
+#endif
+ }
+
+ /* Advance host PURR/SPURR by the amount used by guest */
+ purr = mfspr(SPRN_PURR);
+ spurr = mfspr(SPRN_SPURR);
+ local_paca->kvm_hstate.host_purr += purr - vcpu->arch.purr;
+ local_paca->kvm_hstate.host_spurr += spurr - vcpu->arch.spurr;
+ vcpu->arch.purr = purr;
+ vcpu->arch.spurr = spurr;
+
+ vcpu->arch.ic = mfspr(SPRN_IC);
+ vcpu->arch.pid = mfspr(SPRN_PID);
+ vcpu->arch.psscr = mfspr(SPRN_PSSCR_PR);
+
+ vcpu->arch.shregs.sprg0 = mfspr(SPRN_SPRG0);
+ vcpu->arch.shregs.sprg1 = mfspr(SPRN_SPRG1);
+ vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2);
+ vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3);
+
+ dpdes = mfspr(SPRN_DPDES);
+ if (dpdes)
+ vcpu->arch.doorbell_request = 1;
+
+ vc->vtb = mfspr(SPRN_VTB);
+
+ dec = mfspr(SPRN_DEC);
+ if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */
+ dec = (s32) dec;
+ *tb = mftb();
+ vcpu->arch.dec_expires = dec + *tb;
+
+ if (vc->tb_offset_applied) {
+ u64 new_tb = *tb - vc->tb_offset_applied;
+ mtspr(SPRN_TBU40, new_tb);
+ if ((mftb() & 0xffffff) < (new_tb & 0xffffff)) {
+ new_tb += 0x1000000;
+ mtspr(SPRN_TBU40, new_tb);
+ }
+ *tb = new_tb;
+ vc->tb_offset_applied = 0;
+ }
+
+ save_clear_guest_mmu(kvm, vcpu);
+ switch_mmu_to_host(kvm, host_pidr);
+
+ /*
+ * Enable MSR here in order to have facilities enabled to save
+ * guest registers. This enables MMU (if we were in realmode), so
+ * only switch MMU on after the MMU is switched to host, to avoid
+ * the P9_RADIX_PREFETCH_BUG or hash guest context.
+ */
+ if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) &&
+ vcpu->arch.shregs.msr & MSR_TS_MASK)
+ msr |= MSR_TS_S;
+ __mtmsrd(msr, 0);
+
+ store_vcpu_state(vcpu);
+
+ mtspr(SPRN_PURR, local_paca->kvm_hstate.host_purr);
+ mtspr(SPRN_SPURR, local_paca->kvm_hstate.host_spurr);
+
+ if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) {
+ /* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */
+ mtspr(SPRN_PSSCR, host_hpsscr |
+ (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
+ }
+
+ mtspr(SPRN_HFSCR, host_hfscr);
+ if (vcpu->arch.ciabr != host_ciabr)
+ mtspr(SPRN_CIABR, host_ciabr);
+
+ if (dawr_enabled()) {
+ if (vcpu->arch.dawr0 != host_dawr0)
+ mtspr(SPRN_DAWR0, host_dawr0);
+ if (vcpu->arch.dawrx0 != host_dawrx0)
+ mtspr(SPRN_DAWRX0, host_dawrx0);
+ if (cpu_has_feature(CPU_FTR_DAWR1)) {
+ if (vcpu->arch.dawr1 != host_dawr1)
+ mtspr(SPRN_DAWR1, host_dawr1);
+ if (vcpu->arch.dawrx1 != host_dawrx1)
+ mtspr(SPRN_DAWRX1, host_dawrx1);
+ }
+ }
+
+ if (dpdes)
+ mtspr(SPRN_DPDES, 0);
+ if (vc->pcr)
+ mtspr(SPRN_PCR, PCR_MASK);
+
+ /* HDEC must be at least as large as DEC, so decrementer_max fits */
+ mtspr(SPRN_HDEC, decrementer_max);
+
+ timer_rearm_host_dec(*tb);
+
+ restore_p9_host_os_sprs(vcpu, &host_os_sprs);
+
+ barrier(); /* Close in_guest critical section */
+ WRITE_ONCE(local_paca->kvm_hstate.in_guest, KVM_GUEST_MODE_NONE);
+ /* Interrupts are recoverable at this point */
+
+ /*
+ * cp_abort is required if the processor supports local copy-paste
+ * to clear the copy buffer that was under control of the guest.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ asm volatile(PPC_CP_ABORT);
+
+out:
+ return trap;
+}
+EXPORT_SYMBOL_GPL(kvmhv_vcpu_entry_p9);
diff --git a/arch/powerpc/kvm/book3s_hv_p9_perf.c b/arch/powerpc/kvm/book3s_hv_p9_perf.c
new file mode 100644
index 000000000000..44d24cca3df1
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_p9_perf.c
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <asm/kvm_ppc.h>
+#include <asm/pmc.h>
+
+#include "book3s_hv.h"
+
+static void freeze_pmu(unsigned long mmcr0, unsigned long mmcra)
+{
+ if (!(mmcr0 & MMCR0_FC))
+ goto do_freeze;
+ if (mmcra & MMCRA_SAMPLE_ENABLE)
+ goto do_freeze;
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ if (!(mmcr0 & MMCR0_PMCCEXT))
+ goto do_freeze;
+ if (!(mmcra & MMCRA_BHRB_DISABLE))
+ goto do_freeze;
+ }
+ return;
+
+do_freeze:
+ mmcr0 = MMCR0_FC;
+ mmcra = 0;
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ mmcr0 |= MMCR0_PMCCEXT;
+ mmcra = MMCRA_BHRB_DISABLE;
+ }
+
+ mtspr(SPRN_MMCR0, mmcr0);
+ mtspr(SPRN_MMCRA, mmcra);
+ isync();
+}
+
+void switch_pmu_to_guest(struct kvm_vcpu *vcpu,
+ struct p9_host_os_sprs *host_os_sprs)
+{
+ struct lppaca *lp;
+ int load_pmu = 1;
+
+ lp = vcpu->arch.vpa.pinned_addr;
+ if (lp)
+ load_pmu = lp->pmcregs_in_use;
+
+ /* Save host */
+ if (ppc_get_pmu_inuse()) {
+ /* POWER9, POWER10 do not implement HPMC or SPMC */
+
+ host_os_sprs->mmcr0 = mfspr(SPRN_MMCR0);
+ host_os_sprs->mmcra = mfspr(SPRN_MMCRA);
+
+ freeze_pmu(host_os_sprs->mmcr0, host_os_sprs->mmcra);
+
+ host_os_sprs->pmc1 = mfspr(SPRN_PMC1);
+ host_os_sprs->pmc2 = mfspr(SPRN_PMC2);
+ host_os_sprs->pmc3 = mfspr(SPRN_PMC3);
+ host_os_sprs->pmc4 = mfspr(SPRN_PMC4);
+ host_os_sprs->pmc5 = mfspr(SPRN_PMC5);
+ host_os_sprs->pmc6 = mfspr(SPRN_PMC6);
+ host_os_sprs->mmcr1 = mfspr(SPRN_MMCR1);
+ host_os_sprs->mmcr2 = mfspr(SPRN_MMCR2);
+ host_os_sprs->sdar = mfspr(SPRN_SDAR);
+ host_os_sprs->siar = mfspr(SPRN_SIAR);
+ host_os_sprs->sier1 = mfspr(SPRN_SIER);
+
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ host_os_sprs->mmcr3 = mfspr(SPRN_MMCR3);
+ host_os_sprs->sier2 = mfspr(SPRN_SIER2);
+ host_os_sprs->sier3 = mfspr(SPRN_SIER3);
+ }
+ }
+
+#ifdef CONFIG_PPC_PSERIES
+ /* After saving PMU, before loading guest PMU, flip pmcregs_in_use */
+ if (kvmhv_on_pseries()) {
+ barrier();
+ get_lppaca()->pmcregs_in_use = load_pmu;
+ barrier();
+ }
+#endif
+
+ /*
+ * Load guest. If the VPA said the PMCs are not in use but the guest
+ * tried to access them anyway, HFSCR[PM] will be set by the HFAC
+ * fault so we can make forward progress.
+ */
+ if (load_pmu || (vcpu->arch.hfscr & HFSCR_PM)) {
+ mtspr(SPRN_PMC1, vcpu->arch.pmc[0]);
+ mtspr(SPRN_PMC2, vcpu->arch.pmc[1]);
+ mtspr(SPRN_PMC3, vcpu->arch.pmc[2]);
+ mtspr(SPRN_PMC4, vcpu->arch.pmc[3]);
+ mtspr(SPRN_PMC5, vcpu->arch.pmc[4]);
+ mtspr(SPRN_PMC6, vcpu->arch.pmc[5]);
+ mtspr(SPRN_MMCR1, vcpu->arch.mmcr[1]);
+ mtspr(SPRN_MMCR2, vcpu->arch.mmcr[2]);
+ mtspr(SPRN_SDAR, vcpu->arch.sdar);
+ mtspr(SPRN_SIAR, vcpu->arch.siar);
+ mtspr(SPRN_SIER, vcpu->arch.sier[0]);
+
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ mtspr(SPRN_MMCR3, vcpu->arch.mmcr[3]);
+ mtspr(SPRN_SIER2, vcpu->arch.sier[1]);
+ mtspr(SPRN_SIER3, vcpu->arch.sier[2]);
+ }
+
+ /* Set MMCRA then MMCR0 last */
+ mtspr(SPRN_MMCRA, vcpu->arch.mmcra);
+ mtspr(SPRN_MMCR0, vcpu->arch.mmcr[0]);
+ /* No isync necessary because we're starting counters */
+
+ if (!vcpu->arch.nested &&
+ (vcpu->arch.hfscr_permitted & HFSCR_PM))
+ vcpu->arch.hfscr |= HFSCR_PM;
+ }
+}
+EXPORT_SYMBOL_GPL(switch_pmu_to_guest);
+
+void switch_pmu_to_host(struct kvm_vcpu *vcpu,
+ struct p9_host_os_sprs *host_os_sprs)
+{
+ struct lppaca *lp;
+ int save_pmu = 1;
+
+ lp = vcpu->arch.vpa.pinned_addr;
+ if (lp)
+ save_pmu = lp->pmcregs_in_use;
+ if (IS_ENABLED(CONFIG_KVM_BOOK3S_HV_NESTED_PMU_WORKAROUND)) {
+ /*
+ * Save pmu if this guest is capable of running nested guests.
+ * This is option is for old L1s that do not set their
+ * lppaca->pmcregs_in_use properly when entering their L2.
+ */
+ save_pmu |= nesting_enabled(vcpu->kvm);
+ }
+
+ if (save_pmu) {
+ vcpu->arch.mmcr[0] = mfspr(SPRN_MMCR0);
+ vcpu->arch.mmcra = mfspr(SPRN_MMCRA);
+
+ freeze_pmu(vcpu->arch.mmcr[0], vcpu->arch.mmcra);
+
+ vcpu->arch.pmc[0] = mfspr(SPRN_PMC1);
+ vcpu->arch.pmc[1] = mfspr(SPRN_PMC2);
+ vcpu->arch.pmc[2] = mfspr(SPRN_PMC3);
+ vcpu->arch.pmc[3] = mfspr(SPRN_PMC4);
+ vcpu->arch.pmc[4] = mfspr(SPRN_PMC5);
+ vcpu->arch.pmc[5] = mfspr(SPRN_PMC6);
+ vcpu->arch.mmcr[1] = mfspr(SPRN_MMCR1);
+ vcpu->arch.mmcr[2] = mfspr(SPRN_MMCR2);
+ vcpu->arch.sdar = mfspr(SPRN_SDAR);
+ vcpu->arch.siar = mfspr(SPRN_SIAR);
+ vcpu->arch.sier[0] = mfspr(SPRN_SIER);
+
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ vcpu->arch.mmcr[3] = mfspr(SPRN_MMCR3);
+ vcpu->arch.sier[1] = mfspr(SPRN_SIER2);
+ vcpu->arch.sier[2] = mfspr(SPRN_SIER3);
+ }
+
+ } else if (vcpu->arch.hfscr & HFSCR_PM) {
+ /*
+ * The guest accessed PMC SPRs without specifying they should
+ * be preserved, or it cleared pmcregs_in_use after the last
+ * access. Just ensure they are frozen.
+ */
+ freeze_pmu(mfspr(SPRN_MMCR0), mfspr(SPRN_MMCRA));
+
+ /*
+ * Demand-fault PMU register access in the guest.
+ *
+ * This is used to grab the guest's VPA pmcregs_in_use value
+ * and reflect it into the host's VPA in the case of a nested
+ * hypervisor.
+ *
+ * It also avoids having to zero-out SPRs after each guest
+ * exit to avoid side-channels when.
+ *
+ * This is cleared here when we exit the guest, so later HFSCR
+ * interrupt handling can add it back to run the guest with
+ * PM enabled next time.
+ */
+ if (!vcpu->arch.nested)
+ vcpu->arch.hfscr &= ~HFSCR_PM;
+ } /* otherwise the PMU should still be frozen */
+
+#ifdef CONFIG_PPC_PSERIES
+ if (kvmhv_on_pseries()) {
+ barrier();
+ get_lppaca()->pmcregs_in_use = ppc_get_pmu_inuse();
+ barrier();
+ }
+#endif
+
+ if (ppc_get_pmu_inuse()) {
+ mtspr(SPRN_PMC1, host_os_sprs->pmc1);
+ mtspr(SPRN_PMC2, host_os_sprs->pmc2);
+ mtspr(SPRN_PMC3, host_os_sprs->pmc3);
+ mtspr(SPRN_PMC4, host_os_sprs->pmc4);
+ mtspr(SPRN_PMC5, host_os_sprs->pmc5);
+ mtspr(SPRN_PMC6, host_os_sprs->pmc6);
+ mtspr(SPRN_MMCR1, host_os_sprs->mmcr1);
+ mtspr(SPRN_MMCR2, host_os_sprs->mmcr2);
+ mtspr(SPRN_SDAR, host_os_sprs->sdar);
+ mtspr(SPRN_SIAR, host_os_sprs->siar);
+ mtspr(SPRN_SIER, host_os_sprs->sier1);
+
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ mtspr(SPRN_MMCR3, host_os_sprs->mmcr3);
+ mtspr(SPRN_SIER2, host_os_sprs->sier2);
+ mtspr(SPRN_SIER3, host_os_sprs->sier3);
+ }
+
+ /* Set MMCRA then MMCR0 last */
+ mtspr(SPRN_MMCRA, host_os_sprs->mmcra);
+ mtspr(SPRN_MMCR0, host_os_sprs->mmcr0);
+ isync();
+ }
+}
+EXPORT_SYMBOL_GPL(switch_pmu_to_host);
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index d562c8e2bc30..9012acadbca8 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -1,7 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
*
* Copyright 2012 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
*/
@@ -11,8 +9,13 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <linux/kernel.h>
+#include <asm/lppaca.h>
#include <asm/opal.h>
#include <asm/mce.h>
+#include <asm/machdep.h>
+#include <asm/cputhreads.h>
+#include <asm/hmi.h>
+#include <asm/kvm_ppc.h>
/* SRR1 bits for machine check on POWER7 */
#define SRR1_MC_LDSTERR (1ul << (63-42))
@@ -62,13 +65,10 @@ static void reload_slb(struct kvm_vcpu *vcpu)
/*
* On POWER7, see if we can handle a machine check that occurred inside
* the guest in real mode, without switching to the host partition.
- *
- * Returns: 0 => exit guest, 1 => deliver machine check to guest
*/
static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
{
unsigned long srr1 = vcpu->arch.shregs.msr;
- struct machine_check_event mce_evt;
long handled = 1;
if (srr1 & SRR1_MC_LDSTERR) {
@@ -83,8 +83,7 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
DSISR_MC_SLB_PARITY | DSISR_MC_DERAT_MULTI);
}
if (dsisr & DSISR_MC_TLB_MULTI) {
- if (cur_cpu_spec && cur_cpu_spec->flush_tlb)
- cur_cpu_spec->flush_tlb(TLBIEL_INVAL_SET_LPID);
+ tlbiel_all_lpid(vcpu->kvm->arch.radix);
dsisr &= ~DSISR_MC_TLB_MULTI;
}
/* Any other errors we don't understand? */
@@ -101,45 +100,278 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
reload_slb(vcpu);
break;
case SRR1_MC_IFETCH_TLBMULTI:
- if (cur_cpu_spec && cur_cpu_spec->flush_tlb)
- cur_cpu_spec->flush_tlb(TLBIEL_INVAL_SET_LPID);
+ tlbiel_all_lpid(vcpu->kvm->arch.radix);
break;
default:
handled = 0;
}
+ return handled;
+}
+
+void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu)
+{
+ struct machine_check_event mce_evt;
+ long handled;
+
+ if (vcpu->kvm->arch.fwnmi_enabled) {
+ /* FWNMI guests handle their own recovery */
+ handled = 0;
+ } else {
+ handled = kvmppc_realmode_mc_power7(vcpu);
+ }
+
/*
- * See if we have already handled the condition in the linux host.
- * We assume that if the condition is recovered then linux host
- * will have generated an error log event that we will pick
- * up and log later.
- * Don't release mce event now. We will queue up the event so that
- * we can log the MCE event info on host console.
+ * Now get the event and stash it in the vcpu struct so it can
+ * be handled by the primary thread in virtual mode. We can't
+ * call machine_check_queue_event() here if we are running on
+ * an offline secondary thread.
*/
- if (!get_mce_event(&mce_evt, MCE_EVENT_DONTRELEASE))
- goto out;
+ if (get_mce_event(&mce_evt, MCE_EVENT_RELEASE)) {
+ if (handled && mce_evt.version == MCE_V1)
+ mce_evt.disposition = MCE_DISPOSITION_RECOVERED;
+ } else {
+ memset(&mce_evt, 0, sizeof(mce_evt));
+ }
- if (mce_evt.version == MCE_V1 &&
- (mce_evt.severity == MCE_SEV_NO_ERROR ||
- mce_evt.disposition == MCE_DISPOSITION_RECOVERED))
- handled = 1;
+ vcpu->arch.mce_evt = mce_evt;
+}
+
+
+long kvmppc_p9_realmode_hmi_handler(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+ long ret = 0;
-out:
/*
- * We are now going enter guest either through machine check
- * interrupt (for unhandled errors) or will continue from
- * current HSRR0 (for handled errors) in guest. Hence
- * queue up the event so that we can log it from host console later.
+ * Unapply and clear the offset first. That way, if the TB was not
+ * resynced then it will remain in host-offset, and if it was resynced
+ * then it is brought into host-offset. Then the tb offset is
+ * re-applied before continuing with the KVM exit.
+ *
+ * This way, we don't need to actually know whether not OPAL resynced
+ * the timebase or do any of the complicated dance that the P7/8
+ * path requires.
*/
- machine_check_queue_event();
+ if (vc->tb_offset_applied) {
+ u64 new_tb = mftb() - vc->tb_offset_applied;
+ mtspr(SPRN_TBU40, new_tb);
+ if ((mftb() & 0xffffff) < (new_tb & 0xffffff)) {
+ new_tb += 0x1000000;
+ mtspr(SPRN_TBU40, new_tb);
+ }
+ vc->tb_offset_applied = 0;
+ }
- return handled;
+ local_paca->hmi_irqs++;
+
+ if (hmi_handle_debugtrig(NULL) >= 0) {
+ ret = 1;
+ goto out;
+ }
+
+ if (ppc_md.hmi_exception_early)
+ ppc_md.hmi_exception_early(NULL);
+
+out:
+ if (kvmppc_get_tb_offset(vcpu)) {
+ u64 new_tb = mftb() + vc->tb_offset;
+ mtspr(SPRN_TBU40, new_tb);
+ if ((mftb() & 0xffffff) < (new_tb & 0xffffff)) {
+ new_tb += 0x1000000;
+ mtspr(SPRN_TBU40, new_tb);
+ }
+ vc->tb_offset_applied = kvmppc_get_tb_offset(vcpu);
+ }
+
+ return ret;
+}
+
+/*
+ * The following subcore HMI handling is all only for pre-POWER9 CPUs.
+ */
+
+/* Check if dynamic split is in force and return subcore size accordingly. */
+static inline int kvmppc_cur_subcore_size(void)
+{
+ if (local_paca->kvm_hstate.kvm_split_mode)
+ return local_paca->kvm_hstate.kvm_split_mode->subcore_size;
+
+ return threads_per_subcore;
+}
+
+void kvmppc_subcore_enter_guest(void)
+{
+ int thread_id, subcore_id;
+
+ thread_id = cpu_thread_in_core(local_paca->paca_index);
+ subcore_id = thread_id / kvmppc_cur_subcore_size();
+
+ local_paca->sibling_subcore_state->in_guest[subcore_id] = 1;
}
+EXPORT_SYMBOL_GPL(kvmppc_subcore_enter_guest);
-long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu)
+void kvmppc_subcore_exit_guest(void)
{
- if (cpu_has_feature(CPU_FTR_ARCH_206))
- return kvmppc_realmode_mc_power7(vcpu);
+ int thread_id, subcore_id;
+
+ thread_id = cpu_thread_in_core(local_paca->paca_index);
+ subcore_id = thread_id / kvmppc_cur_subcore_size();
+
+ local_paca->sibling_subcore_state->in_guest[subcore_id] = 0;
+}
+EXPORT_SYMBOL_GPL(kvmppc_subcore_exit_guest);
+
+static bool kvmppc_tb_resync_required(void)
+{
+ if (test_and_set_bit(CORE_TB_RESYNC_REQ_BIT,
+ &local_paca->sibling_subcore_state->flags))
+ return false;
+
+ return true;
+}
+
+static void kvmppc_tb_resync_done(void)
+{
+ clear_bit(CORE_TB_RESYNC_REQ_BIT,
+ &local_paca->sibling_subcore_state->flags);
+}
+
+/*
+ * kvmppc_realmode_hmi_handler() is called only by primary thread during
+ * guest exit path.
+ *
+ * There are multiple reasons why HMI could occur, one of them is
+ * Timebase (TB) error. If this HMI is due to TB error, then TB would
+ * have been in stopped state. The opal hmi handler Will fix it and
+ * restore the TB value with host timebase value. For HMI caused due
+ * to non-TB errors, opal hmi handler will not touch/restore TB register
+ * and hence there won't be any change in TB value.
+ *
+ * Since we are not sure about the cause of this HMI, we can't be sure
+ * about the content of TB register whether it holds guest or host timebase
+ * value. Hence the idea is to resync the TB on every HMI, so that we
+ * know about the exact state of the TB value. Resync TB call will
+ * restore TB to host timebase.
+ *
+ * Things to consider:
+ * - On TB error, HMI interrupt is reported on all the threads of the core
+ * that has encountered TB error irrespective of split-core mode.
+ * - The very first thread on the core that get chance to fix TB error
+ * would rsync the TB with local chipTOD value.
+ * - The resync TB is a core level action i.e. it will sync all the TBs
+ * in that core independent of split-core mode. This means if we trigger
+ * TB sync from a thread from one subcore, it would affect TB values of
+ * sibling subcores of the same core.
+ *
+ * All threads need to co-ordinate before making opal hmi handler.
+ * All threads will use sibling_subcore_state->in_guest[] (shared by all
+ * threads in the core) in paca which holds information about whether
+ * sibling subcores are in Guest mode or host mode. The in_guest[] array
+ * is of size MAX_SUBCORE_PER_CORE=4, indexed using subcore id to set/unset
+ * subcore status. Only primary threads from each subcore is responsible
+ * to set/unset its designated array element while entering/exiting the
+ * guset.
+ *
+ * After invoking opal hmi handler call, one of the thread (of entire core)
+ * will need to resync the TB. Bit 63 from subcore state bitmap flags
+ * (sibling_subcore_state->flags) will be used to co-ordinate between
+ * primary threads to decide who takes up the responsibility.
+ *
+ * This is what we do:
+ * - Primary thread from each subcore tries to set resync required bit[63]
+ * of paca->sibling_subcore_state->flags.
+ * - The first primary thread that is able to set the flag takes the
+ * responsibility of TB resync. (Let us call it as thread leader)
+ * - All other threads which are in host will call
+ * wait_for_subcore_guest_exit() and wait for in_guest[0-3] from
+ * paca->sibling_subcore_state to get cleared.
+ * - All the primary thread will clear its subcore status from subcore
+ * state in_guest[] array respectively.
+ * - Once all primary threads clear in_guest[0-3], all of them will invoke
+ * opal hmi handler.
+ * - Now all threads will wait for TB resync to complete by invoking
+ * wait_for_tb_resync() except the thread leader.
+ * - Thread leader will do a TB resync by invoking opal_resync_timebase()
+ * call and the it will clear the resync required bit.
+ * - All other threads will now come out of resync wait loop and proceed
+ * with individual execution.
+ * - On return of this function, primary thread will signal all
+ * secondary threads to proceed.
+ * - All secondary threads will eventually call opal hmi handler on
+ * their exit path.
+ *
+ * Returns 1 if the timebase offset should be applied, 0 if not.
+ */
+
+long kvmppc_realmode_hmi_handler(void)
+{
+ bool resync_req;
+
+ local_paca->hmi_irqs++;
+
+ if (hmi_handle_debugtrig(NULL) >= 0)
+ return 1;
+
+ /*
+ * By now primary thread has already completed guest->host
+ * partition switch but haven't signaled secondaries yet.
+ * All the secondary threads on this subcore is waiting
+ * for primary thread to signal them to go ahead.
+ *
+ * For threads from subcore which isn't in guest, they all will
+ * wait until all other subcores on this core exit the guest.
+ *
+ * Now set the resync required bit. If you are the first to
+ * set this bit then kvmppc_tb_resync_required() function will
+ * return true. For rest all other subcores
+ * kvmppc_tb_resync_required() will return false.
+ *
+ * If resync_req == true, then this thread is responsible to
+ * initiate TB resync after hmi handler has completed.
+ * All other threads on this core will wait until this thread
+ * clears the resync required bit flag.
+ */
+ resync_req = kvmppc_tb_resync_required();
+
+ /* Reset the subcore status to indicate it has exited guest */
+ kvmppc_subcore_exit_guest();
+
+ /*
+ * Wait for other subcores on this core to exit the guest.
+ * All the primary threads and threads from subcore that are
+ * not in guest will wait here until all subcores are out
+ * of guest context.
+ */
+ wait_for_subcore_guest_exit();
+
+ /*
+ * At this point we are sure that primary threads from each
+ * subcore on this core have completed guest->host partition
+ * switch. Now it is safe to call HMI handler.
+ */
+ if (ppc_md.hmi_exception_early)
+ ppc_md.hmi_exception_early(NULL);
+
+ /*
+ * Check if this thread is responsible to resync TB.
+ * All other threads will wait until this thread completes the
+ * TB resync.
+ */
+ if (resync_req) {
+ opal_resync_timebase();
+ /* Reset TB resync req bit */
+ kvmppc_tb_resync_done();
+ } else {
+ wait_for_tb_resync();
+ }
+
+ /*
+ * Reset tb_offset_applied so the guest exit code won't try
+ * to subtract the previous timebase offset from the timebase.
+ */
+ if (local_paca->kvm_hstate.kvm_vcore)
+ local_paca->kvm_hstate.kvm_vcore->tb_offset_applied = 0;
return 0;
}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 084ad54c73cd..17cb75a127b0 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -1,7 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
*
* Copyright 2010-2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
*/
@@ -12,56 +10,52 @@
#include <linux/kvm_host.h>
#include <linux/hugetlb.h>
#include <linux/module.h>
+#include <linux/log2.h>
+#include <linux/sizes.h>
-#include <asm/tlbflush.h>
+#include <asm/trace.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
-#include <asm/mmu-hash64.h>
+#include <asm/book3s/64/mmu-hash.h>
#include <asm/hvcall.h>
#include <asm/synch.h>
#include <asm/ppc-opcode.h>
+#include <asm/pte-walk.h>
/* Translate address of a vmalloc'd thing to a linear map address */
-static void *real_vmalloc_addr(void *x)
+static void *real_vmalloc_addr(void *addr)
{
- unsigned long addr = (unsigned long) x;
- pte_t *p;
-
- p = find_linux_pte_or_hugepte(swapper_pg_dir, addr, NULL);
- if (!p || !pte_present(*p))
- return NULL;
- /* assume we don't have huge pages in vmalloc space... */
- addr = (pte_pfn(*p) << PAGE_SHIFT) | (addr & ~PAGE_MASK);
- return __va(addr);
+ return __va(ppc_find_vmap_phys((unsigned long)addr));
}
/* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */
-static int global_invalidates(struct kvm *kvm, unsigned long flags)
+static int global_invalidates(struct kvm *kvm)
{
int global;
+ int cpu;
/*
* If there is only one vcore, and it's currently running,
* as indicated by local_paca->kvm_hstate.kvm_vcpu being set,
* we can use tlbiel as long as we mark all other physical
* cores as potentially having stale TLB entries for this lpid.
- * If we're not using MMU notifiers, we never take pages away
- * from the guest, so we can use tlbiel if requested.
* Otherwise, don't use tlbiel.
*/
if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcpu)
global = 0;
- else if (kvm->arch.using_mmu_notifiers)
- global = 1;
else
- global = !(flags & H_LOCAL);
+ global = 1;
+
+ /* LPID has been switched to host if in virt mode so can't do local */
+ if (!global && (mfmsr() & (MSR_IR|MSR_DR)))
+ global = 1;
if (!global) {
/* any other core might now have stale TLB entries... */
smp_wmb();
cpumask_setall(&kvm->arch.need_tlb_flush);
- cpumask_clear_cpu(local_paca->kvm_hstate.kvm_vcore->pcpu,
- &kvm->arch.need_tlb_flush);
+ cpu = local_paca->kvm_hstate.kvm_vcore->pcpu;
+ cpumask_clear_cpu(cpu, &kvm->arch.need_tlb_flush);
}
return global;
@@ -79,10 +73,10 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
if (*rmap & KVMPPC_RMAP_PRESENT) {
i = *rmap & KVMPPC_RMAP_INDEX;
- head = &kvm->arch.revmap[i];
+ head = &kvm->arch.hpt.rev[i];
if (realmode)
head = real_vmalloc_addr(head);
- tail = &kvm->arch.revmap[head->back];
+ tail = &kvm->arch.hpt.rev[head->back];
if (realmode)
tail = real_vmalloc_addr(tail);
rev->forw = i;
@@ -92,36 +86,85 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
} else {
rev->forw = rev->back = pte_index;
*rmap = (*rmap & ~KVMPPC_RMAP_INDEX) |
- pte_index | KVMPPC_RMAP_PRESENT;
+ pte_index | KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_HPT;
}
unlock_rmap(rmap);
}
EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
+/* Update the dirty bitmap of a memslot */
+void kvmppc_update_dirty_map(const struct kvm_memory_slot *memslot,
+ unsigned long gfn, unsigned long psize)
+{
+ unsigned long npages;
+
+ if (!psize || !memslot->dirty_bitmap)
+ return;
+ npages = (psize + PAGE_SIZE - 1) / PAGE_SIZE;
+ gfn -= memslot->base_gfn;
+ set_dirty_bits_atomic(memslot->dirty_bitmap, gfn, npages);
+}
+EXPORT_SYMBOL_GPL(kvmppc_update_dirty_map);
+
+static void kvmppc_set_dirty_from_hpte(struct kvm *kvm,
+ unsigned long hpte_v, unsigned long hpte_gr)
+{
+ struct kvm_memory_slot *memslot;
+ unsigned long gfn;
+ unsigned long psize;
+
+ psize = kvmppc_actual_pgsz(hpte_v, hpte_gr);
+ gfn = hpte_rpn(hpte_gr, psize);
+ memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
+ if (memslot && memslot->dirty_bitmap)
+ kvmppc_update_dirty_map(memslot, gfn, psize);
+}
+
+/* Returns a pointer to the revmap entry for the page mapped by a HPTE */
+static unsigned long *revmap_for_hpte(struct kvm *kvm, unsigned long hpte_v,
+ unsigned long hpte_gr,
+ struct kvm_memory_slot **memslotp,
+ unsigned long *gfnp)
+{
+ struct kvm_memory_slot *memslot;
+ unsigned long *rmap;
+ unsigned long gfn;
+
+ gfn = hpte_rpn(hpte_gr, kvmppc_actual_pgsz(hpte_v, hpte_gr));
+ memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
+ if (memslotp)
+ *memslotp = memslot;
+ if (gfnp)
+ *gfnp = gfn;
+ if (!memslot)
+ return NULL;
+
+ rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]);
+ return rmap;
+}
+
/* Remove this HPTE from the chain for a real page */
static void remove_revmap_chain(struct kvm *kvm, long pte_index,
struct revmap_entry *rev,
unsigned long hpte_v, unsigned long hpte_r)
{
struct revmap_entry *next, *prev;
- unsigned long gfn, ptel, head;
- struct kvm_memory_slot *memslot;
+ unsigned long ptel, head;
unsigned long *rmap;
unsigned long rcbits;
+ struct kvm_memory_slot *memslot;
+ unsigned long gfn;
rcbits = hpte_r & (HPTE_R_R | HPTE_R_C);
ptel = rev->guest_rpte |= rcbits;
- gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel));
- memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
- if (!memslot)
+ rmap = revmap_for_hpte(kvm, hpte_v, ptel, &memslot, &gfn);
+ if (!rmap)
return;
-
- rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]);
lock_rmap(rmap);
head = *rmap & KVMPPC_RMAP_INDEX;
- next = real_vmalloc_addr(&kvm->arch.revmap[rev->forw]);
- prev = real_vmalloc_addr(&kvm->arch.revmap[rev->back]);
+ next = real_vmalloc_addr(&kvm->arch.hpt.rev[rev->forw]);
+ prev = real_vmalloc_addr(&kvm->arch.hpt.rev[rev->back]);
next->back = rev->back;
prev->forw = rev->forw;
if (head == pte_index) {
@@ -132,34 +175,12 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
*rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head;
}
*rmap |= rcbits << KVMPPC_RMAP_RC_SHIFT;
+ if (rcbits & HPTE_R_C)
+ kvmppc_update_dirty_map(memslot, gfn,
+ kvmppc_actual_pgsz(hpte_v, hpte_r));
unlock_rmap(rmap);
}
-static pte_t lookup_linux_pte_and_update(pgd_t *pgdir, unsigned long hva,
- int writing, unsigned long *pte_sizep)
-{
- pte_t *ptep;
- unsigned long ps = *pte_sizep;
- unsigned int hugepage_shift;
-
- ptep = find_linux_pte_or_hugepte(pgdir, hva, &hugepage_shift);
- if (!ptep)
- return __pte(0);
- if (hugepage_shift)
- *pte_sizep = 1ul << hugepage_shift;
- else
- *pte_sizep = PAGE_SIZE;
- if (ps > *pte_sizep)
- return __pte(0);
- return kvmppc_read_update_linux_pte(ptep, writing, hugepage_shift);
-}
-
-static inline void unlock_hpte(__be64 *hpte, unsigned long hpte_v)
-{
- asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
- hpte[0] = cpu_to_be64(hpte_v);
-}
-
long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
long pte_index, unsigned long pteh, unsigned long ptel,
pgd_t *pgdir, bool realmode, unsigned long *pte_idx_ret)
@@ -170,15 +191,26 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
struct revmap_entry *rev;
unsigned long g_ptel;
struct kvm_memory_slot *memslot;
- unsigned long *physp, pte_size;
- unsigned long is_io;
+ unsigned hpage_shift;
+ bool is_ci;
unsigned long *rmap;
- pte_t pte;
+ pte_t *ptep;
unsigned int writing;
unsigned long mmu_seq;
unsigned long rcbits;
- psize = hpte_page_size(pteh, ptel);
+ if (kvm_is_radix(kvm))
+ return H_FUNCTION;
+ /*
+ * The HPTE gets used by compute_tlbie_rb() to set TLBIE bits, so
+ * these functions should work together -- must ensure a guest can not
+ * cause problems with the TLBIE that KVM executes.
+ */
+ if ((pteh >> HPTE_V_SSIZE_SHIFT) & 0x2) {
+ /* B=0b1x is a reserved value, disallow it. */
+ return H_PARAMETER;
+ }
+ psize = kvmppc_actual_pgsz(pteh, ptel);
if (!psize)
return H_PARAMETER;
writing = hpte_is_writable(ptel);
@@ -187,7 +219,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
g_ptel = ptel;
/* used later to detect if we might have been invalidated */
- mmu_seq = kvm->mmu_notifier_seq;
+ mmu_seq = kvm->mmu_invalidate_seq;
smp_rmb();
/* Find the memslot (if any) for this address */
@@ -195,12 +227,9 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
gfn = gpa >> PAGE_SHIFT;
memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
pa = 0;
- is_io = ~0ul;
+ is_ci = false;
rmap = NULL;
if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) {
- /* PPC970 can't do emulated MMIO */
- if (!cpu_has_feature(CPU_FTR_ARCH_206))
- return H_PARAMETER;
/* Emulated MMIO - mark this with key=31 */
pteh |= HPTE_V_ABSENT;
ptel |= HPTE_R_KEY_HI | HPTE_R_KEY_LO;
@@ -213,53 +242,53 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
slot_fn = gfn - memslot->base_gfn;
rmap = &memslot->arch.rmap[slot_fn];
- if (!kvm->arch.using_mmu_notifiers) {
- physp = memslot->arch.slot_phys;
- if (!physp)
+ /* Translate to host virtual address */
+ hva = __gfn_to_hva_memslot(memslot, gfn);
+
+ arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock);
+ ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &hpage_shift);
+ if (ptep) {
+ pte_t pte;
+ unsigned int host_pte_size;
+
+ if (hpage_shift)
+ host_pte_size = 1ul << hpage_shift;
+ else
+ host_pte_size = PAGE_SIZE;
+ /*
+ * We should always find the guest page size
+ * to <= host page size, if host is using hugepage
+ */
+ if (host_pte_size < psize) {
+ arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock);
return H_PARAMETER;
- physp += slot_fn;
- if (realmode)
- physp = real_vmalloc_addr(physp);
- pa = *physp;
- if (!pa)
- return H_TOO_HARD;
- is_io = pa & (HPTE_R_I | HPTE_R_W);
- pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK);
- pa &= PAGE_MASK;
- pa |= gpa & ~PAGE_MASK;
- } else {
- /* Translate to host virtual address */
- hva = __gfn_to_hva_memslot(memslot, gfn);
-
- /* Look up the Linux PTE for the backing page */
- pte_size = psize;
- pte = lookup_linux_pte_and_update(pgdir, hva, writing,
- &pte_size);
- if (pte_present(pte) && !pte_numa(pte)) {
+ }
+ pte = kvmppc_read_update_linux_pte(ptep, writing);
+ if (pte_present(pte) && !pte_protnone(pte)) {
if (writing && !pte_write(pte))
/* make the actual HPTE be read-only */
ptel = hpte_make_readonly(ptel);
- is_io = hpte_cache_bits(pte_val(pte));
+ is_ci = pte_ci(pte);
pa = pte_pfn(pte) << PAGE_SHIFT;
- pa |= hva & (pte_size - 1);
+ pa |= hva & (host_pte_size - 1);
pa |= gpa & ~PAGE_MASK;
}
}
+ arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock);
- if (pte_size < psize)
- return H_PARAMETER;
-
- ptel &= ~(HPTE_R_PP0 - psize);
+ ptel &= HPTE_R_KEY | HPTE_R_PP0 | (psize-1);
ptel |= pa;
if (pa)
pteh |= HPTE_V_VALID;
- else
+ else {
pteh |= HPTE_V_ABSENT;
+ ptel &= ~(HPTE_R_KEY_HI | HPTE_R_KEY_LO);
+ }
- /* Check WIMG */
- if (is_io != ~0ul && !hpte_cache_flags_ok(ptel, is_io)) {
- if (is_io)
+ /*If we had host pte mapping then Check WIMG */
+ if (ptep && !hpte_cache_flags_ok(ptel, is_ci)) {
+ if (is_ci)
return H_PARAMETER;
/*
* Allow guest to map emulated device memory as
@@ -271,11 +300,11 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
/* Find and lock the HPTEG slot to use */
do_insert:
- if (pte_index >= kvm->arch.hpt_npte)
+ if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt))
return H_PARAMETER;
if (likely((flags & H_EXACT) == 0)) {
pte_index &= ~7UL;
- hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
+ hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
for (i = 0; i < 8; ++i) {
if ((be64_to_cpu(*hpte) & HPTE_V_VALID) == 0 &&
try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID |
@@ -295,10 +324,10 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
u64 pte;
while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
cpu_relax();
- pte = be64_to_cpu(*hpte);
+ pte = be64_to_cpu(hpte[0]);
if (!(pte & (HPTE_V_VALID | HPTE_V_ABSENT)))
break;
- *hpte &= ~cpu_to_be64(HPTE_V_HVLOCK);
+ __unlock_hpte(hpte, pte);
hpte += 2;
}
if (i == 8)
@@ -306,7 +335,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
}
pte_index += i;
} else {
- hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
+ hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
if (!try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID |
HPTE_V_ABSENT)) {
/* Lock the slot and check again */
@@ -314,16 +343,16 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
cpu_relax();
- pte = be64_to_cpu(*hpte);
+ pte = be64_to_cpu(hpte[0]);
if (pte & (HPTE_V_VALID | HPTE_V_ABSENT)) {
- *hpte &= ~cpu_to_be64(HPTE_V_HVLOCK);
+ __unlock_hpte(hpte, pte);
return H_PTEG_FULL;
}
}
}
/* Save away the guest's idea of the second HPTE dword */
- rev = &kvm->arch.revmap[pte_index];
+ rev = &kvm->arch.hpt.rev[pte_index];
if (realmode)
rev = real_vmalloc_addr(rev);
if (rev) {
@@ -337,11 +366,11 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
rmap = real_vmalloc_addr(rmap);
lock_rmap(rmap);
/* Check for pending invalidations under the rmap chain lock */
- if (kvm->arch.using_mmu_notifiers &&
- mmu_notifier_retry(kvm, mmu_seq)) {
+ if (mmu_invalidate_retry(kvm, mmu_seq)) {
/* inval in progress, write a non-present HPTE */
pteh |= HPTE_V_ABSENT;
pteh &= ~HPTE_V_VALID;
+ ptel &= ~(HPTE_R_KEY_HI | HPTE_R_KEY_LO);
unlock_rmap(rmap);
} else {
kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index,
@@ -352,11 +381,16 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
}
}
+ /* Convert to new format on P9 */
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ ptel = hpte_old_to_new_r(pteh, ptel);
+ pteh = hpte_old_to_new_v(pteh);
+ }
hpte[1] = cpu_to_be64(ptel);
/* Write the first HPTE dword, unlocking the HPTE and making it valid */
eieio();
- hpte[0] = cpu_to_be64(pteh);
+ __unlock_hpte(hpte, pteh);
asm volatile("ptesync" : : : "memory");
*pte_idx_ret = pte_index;
@@ -368,8 +402,10 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
long pte_index, unsigned long pteh, unsigned long ptel)
{
return kvmppc_do_h_enter(vcpu->kvm, flags, pte_index, pteh, ptel,
- vcpu->arch.pgdir, true, &vcpu->arch.gpr[4]);
+ vcpu->arch.pgdir, true,
+ &vcpu->arch.regs.gpr[4]);
}
+EXPORT_SYMBOL_GPL(kvmppc_h_enter);
#ifdef __BIG_ENDIAN__
#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token))
@@ -377,66 +413,41 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
#define LOCK_TOKEN (*(u32 *)(&get_paca()->paca_index))
#endif
-static inline int try_lock_tlbie(unsigned int *lock)
+static inline int is_mmio_hpte(unsigned long v, unsigned long r)
{
- unsigned int tmp, old;
- unsigned int token = LOCK_TOKEN;
-
- asm volatile("1:lwarx %1,0,%2\n"
- " cmpwi cr0,%1,0\n"
- " bne 2f\n"
- " stwcx. %3,0,%2\n"
- " bne- 1b\n"
- " isync\n"
- "2:"
- : "=&r" (tmp), "=&r" (old)
- : "r" (lock), "r" (token)
- : "cc", "memory");
- return old == 0;
+ return ((v & HPTE_V_ABSENT) &&
+ (r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) ==
+ (HPTE_R_KEY_HI | HPTE_R_KEY_LO));
}
-/*
- * tlbie/tlbiel is a bit different on the PPC970 compared to later
- * processors such as POWER7; the large page bit is in the instruction
- * not RB, and the top 16 bits and the bottom 12 bits of the VA
- * in RB must be 0.
- */
-static void do_tlbies_970(struct kvm *kvm, unsigned long *rbvalues,
- long npages, int global, bool need_sync)
+static inline void fixup_tlbie_lpid(unsigned long rb_value, unsigned long lpid)
{
- long i;
- if (global) {
- while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
- cpu_relax();
- if (need_sync)
- asm volatile("ptesync" : : : "memory");
- for (i = 0; i < npages; ++i) {
- unsigned long rb = rbvalues[i];
-
- if (rb & 1) /* large page */
- asm volatile("tlbie %0,1" : :
- "r" (rb & 0x0000fffffffff000ul));
- else
- asm volatile("tlbie %0,0" : :
- "r" (rb & 0x0000fffffffff000ul));
- }
- asm volatile("eieio; tlbsync; ptesync" : : : "memory");
- kvm->arch.tlbie_lock = 0;
- } else {
- if (need_sync)
- asm volatile("ptesync" : : : "memory");
- for (i = 0; i < npages; ++i) {
- unsigned long rb = rbvalues[i];
-
- if (rb & 1) /* large page */
- asm volatile("tlbiel %0,1" : :
- "r" (rb & 0x0000fffffffff000ul));
- else
- asm volatile("tlbiel %0,0" : :
- "r" (rb & 0x0000fffffffff000ul));
- }
- asm volatile("ptesync" : : : "memory");
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+ /* Radix flush for a hash guest */
+
+ unsigned long rb,rs,prs,r,ric;
+
+ rb = PPC_BIT(52); /* IS = 2 */
+ rs = 0; /* lpid = 0 */
+ prs = 0; /* partition scoped */
+ r = 1; /* radix format */
+ ric = 0; /* RIC_FLSUH_TLB */
+
+ /*
+ * Need the extra ptesync to make sure we don't
+ * re-order the tlbie
+ */
+ asm volatile("ptesync": : :"memory");
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(r), "i"(prs),
+ "i"(ric), "r"(rs) : "memory");
+ }
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+ asm volatile("ptesync": : :"memory");
+ asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : :
+ "r" (rb_value), "r" (lpid));
}
}
@@ -445,26 +456,28 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
{
long i;
- if (cpu_has_feature(CPU_FTR_ARCH_201)) {
- /* PPC970 tlbie instruction is a bit different */
- do_tlbies_970(kvm, rbvalues, npages, global, need_sync);
- return;
- }
+ /*
+ * We use the POWER9 5-operand versions of tlbie and tlbiel here.
+ * Since we are using RIC=0 PRS=0 R=0, and P7/P8 tlbiel ignores
+ * the RS field, this is backwards-compatible with P7 and P8.
+ */
if (global) {
- while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
- cpu_relax();
if (need_sync)
asm volatile("ptesync" : : : "memory");
- for (i = 0; i < npages; ++i)
- asm volatile(PPC_TLBIE(%1,%0) : :
+ for (i = 0; i < npages; ++i) {
+ asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : :
"r" (rbvalues[i]), "r" (kvm->arch.lpid));
+ }
+
+ fixup_tlbie_lpid(rbvalues[i - 1], kvm->arch.lpid);
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
- kvm->arch.tlbie_lock = 0;
} else {
if (need_sync)
asm volatile("ptesync" : : : "memory");
- for (i = 0; i < npages; ++i)
- asm volatile("tlbiel %0" : : "r" (rbvalues[i]));
+ for (i = 0; i < npages; ++i) {
+ asm volatile(PPC_TLBIEL(%0,%1,0,0,0) : :
+ "r" (rbvalues[i]), "r" (0));
+ }
asm volatile("ptesync" : : : "memory");
}
}
@@ -476,37 +489,55 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
__be64 *hpte;
unsigned long v, r, rb;
struct revmap_entry *rev;
- u64 pte;
+ u64 pte, orig_pte, pte_r;
- if (pte_index >= kvm->arch.hpt_npte)
+ if (kvm_is_radix(kvm))
+ return H_FUNCTION;
+ if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt))
return H_PARAMETER;
- hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
+ hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
cpu_relax();
- pte = be64_to_cpu(hpte[0]);
+ pte = orig_pte = be64_to_cpu(hpte[0]);
+ pte_r = be64_to_cpu(hpte[1]);
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ pte = hpte_new_to_old_v(pte, pte_r);
+ pte_r = hpte_new_to_old_r(pte_r);
+ }
if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
((flags & H_AVPN) && (pte & ~0x7fUL) != avpn) ||
((flags & H_ANDCOND) && (pte & avpn) != 0)) {
- hpte[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
+ __unlock_hpte(hpte, orig_pte);
return H_NOT_FOUND;
}
- rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
+ rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]);
v = pte & ~HPTE_V_HVLOCK;
if (v & HPTE_V_VALID) {
- u64 pte1;
-
- pte1 = be64_to_cpu(hpte[1]);
hpte[0] &= ~cpu_to_be64(HPTE_V_VALID);
- rb = compute_tlbie_rb(v, pte1, pte_index);
- do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
- /* Read PTE low word after tlbie to get final R/C values */
- remove_revmap_chain(kvm, pte_index, rev, v, pte1);
+ rb = compute_tlbie_rb(v, pte_r, pte_index);
+ do_tlbies(kvm, &rb, 1, global_invalidates(kvm), true);
+ /*
+ * The reference (R) and change (C) bits in a HPT
+ * entry can be set by hardware at any time up until
+ * the HPTE is invalidated and the TLB invalidation
+ * sequence has completed. This means that when
+ * removing a HPTE, we need to re-read the HPTE after
+ * the invalidation sequence has completed in order to
+ * obtain reliable values of R and C.
+ */
+ remove_revmap_chain(kvm, pte_index, rev, v,
+ be64_to_cpu(hpte[1]));
}
r = rev->guest_rpte & ~HPTE_GR_RESERVED;
note_hpte_modification(kvm, rev);
unlock_hpte(hpte, 0);
+ if (is_mmio_hpte(v, pte_r))
+ atomic64_inc(&kvm->arch.mmio_update);
+
+ if (v & HPTE_V_ABSENT)
+ v = (v & ~HPTE_V_ABSENT) | HPTE_V_VALID;
hpret[0] = v;
hpret[1] = r;
return H_SUCCESS;
@@ -517,13 +548,14 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long pte_index, unsigned long avpn)
{
return kvmppc_do_h_remove(vcpu->kvm, flags, pte_index, avpn,
- &vcpu->arch.gpr[4]);
+ &vcpu->arch.regs.gpr[4]);
}
+EXPORT_SYMBOL_GPL(kvmppc_h_remove);
long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = vcpu->kvm;
- unsigned long *args = &vcpu->arch.gpr[4];
+ unsigned long *args = &vcpu->arch.regs.gpr[4];
__be64 *hp, *hptes[4];
unsigned long tlbrb[4];
long int i, j, k, n, found, indexes[4];
@@ -531,9 +563,11 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
int global;
long int ret = H_SUCCESS;
struct revmap_entry *rev, *revs[4];
- u64 hp0;
+ u64 hp0, hp1;
- global = global_invalidates(kvm, 0);
+ if (kvm_is_radix(kvm))
+ return H_FUNCTION;
+ global = global_invalidates(kvm);
for (i = 0; i < 4 && ret == H_SUCCESS; ) {
n = 0;
for (; i < 4; ++i) {
@@ -548,13 +582,13 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
break;
}
if (req != 1 || flags == 3 ||
- pte_index >= kvm->arch.hpt_npte) {
+ pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt)) {
/* parameter error */
args[j] = ((0xa0 | flags) << 56) + pte_index;
ret = H_PARAMETER;
break;
}
- hp = (__be64 *) (kvm->arch.hpt_virt + (pte_index << 4));
+ hp = (__be64 *) (kvm->arch.hpt.virt + (pte_index << 4));
/* to avoid deadlock, don't spin except for first */
if (!try_lock_hpte(hp, HPTE_V_HVLOCK)) {
if (n)
@@ -564,6 +598,11 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
}
found = 0;
hp0 = be64_to_cpu(hp[0]);
+ hp1 = be64_to_cpu(hp[1]);
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ hp0 = hpte_new_to_old_v(hp0, hp1);
+ hp1 = hpte_new_to_old_r(hp1);
+ }
if (hp0 & (HPTE_V_ABSENT | HPTE_V_VALID)) {
switch (flags & 3) {
case 0: /* absolute */
@@ -586,7 +625,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
}
args[j] = ((0x80 | flags) << 56) + pte_index;
- rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
+ rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]);
note_hpte_modification(kvm, rev);
if (!(hp0 & HPTE_V_VALID)) {
@@ -594,13 +633,14 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
args[j] |= rcbits << (56 - 5);
hp[0] = 0;
+ if (is_mmio_hpte(hp0, hp1))
+ atomic64_inc(&kvm->arch.mmio_update);
continue;
}
/* leave it locked */
hp[0] &= ~cpu_to_be64(HPTE_V_VALID);
- tlbrb[n] = compute_tlbie_rb(be64_to_cpu(hp[0]),
- be64_to_cpu(hp[1]), pte_index);
+ tlbrb[n] = compute_tlbie_rb(hp0, hp1, pte_index);
indexes[n] = j;
hptes[n] = hp;
revs[n] = rev;
@@ -623,37 +663,41 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
be64_to_cpu(hp[0]), be64_to_cpu(hp[1]));
rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
args[j] |= rcbits << (56 - 5);
- hp[0] = 0;
+ __unlock_hpte(hp, 0);
}
}
return ret;
}
+EXPORT_SYMBOL_GPL(kvmppc_h_bulk_remove);
long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
- unsigned long pte_index, unsigned long avpn,
- unsigned long va)
+ unsigned long pte_index, unsigned long avpn)
{
struct kvm *kvm = vcpu->kvm;
__be64 *hpte;
struct revmap_entry *rev;
unsigned long v, r, rb, mask, bits;
- u64 pte;
+ u64 pte_v, pte_r;
- if (pte_index >= kvm->arch.hpt_npte)
+ if (kvm_is_radix(kvm))
+ return H_FUNCTION;
+ if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt))
return H_PARAMETER;
- hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
+ hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
cpu_relax();
- pte = be64_to_cpu(hpte[0]);
- if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
- ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn)) {
- hpte[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
+ v = pte_v = be64_to_cpu(hpte[0]);
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ v = hpte_new_to_old_v(v, be64_to_cpu(hpte[1]));
+ if ((v & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
+ ((flags & H_AVPN) && (v & ~0x7fUL) != avpn)) {
+ __unlock_hpte(hpte, pte_v);
return H_NOT_FOUND;
}
- v = pte;
+ pte_r = be64_to_cpu(hpte[1]);
bits = (flags << 55) & HPTE_R_PP0;
bits |= (flags << 48) & HPTE_R_KEY_HI;
bits |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO);
@@ -661,49 +705,42 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
/* Update guest view of 2nd HPTE dword */
mask = HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N |
HPTE_R_KEY_HI | HPTE_R_KEY_LO;
- rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
+ rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]);
if (rev) {
r = (rev->guest_rpte & ~mask) | bits;
rev->guest_rpte = r;
note_hpte_modification(kvm, rev);
}
- r = (be64_to_cpu(hpte[1]) & ~mask) | bits;
/* Update HPTE */
if (v & HPTE_V_VALID) {
- rb = compute_tlbie_rb(v, r, pte_index);
- hpte[0] = cpu_to_be64(v & ~HPTE_V_VALID);
- do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
/*
- * If the host has this page as readonly but the guest
- * wants to make it read/write, reduce the permissions.
- * Checking the host permissions involves finding the
- * memslot and then the Linux PTE for the page.
+ * If the page is valid, don't let it transition from
+ * readonly to writable. If it should be writable, we'll
+ * take a trap and let the page fault code sort it out.
*/
- if (hpte_is_writable(r) && kvm->arch.using_mmu_notifiers) {
- unsigned long psize, gfn, hva;
- struct kvm_memory_slot *memslot;
- pgd_t *pgdir = vcpu->arch.pgdir;
- pte_t pte;
-
- psize = hpte_page_size(v, r);
- gfn = ((r & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
- memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
- if (memslot) {
- hva = __gfn_to_hva_memslot(memslot, gfn);
- pte = lookup_linux_pte_and_update(pgdir, hva,
- 1, &psize);
- if (pte_present(pte) && !pte_write(pte))
- r = hpte_make_readonly(r);
- }
+ r = (pte_r & ~mask) | bits;
+ if (hpte_is_writable(r) && !hpte_is_writable(pte_r))
+ r = hpte_make_readonly(r);
+ /* If the PTE is changing, invalidate it first */
+ if (r != pte_r) {
+ rb = compute_tlbie_rb(v, r, pte_index);
+ hpte[0] = cpu_to_be64((pte_v & ~HPTE_V_VALID) |
+ HPTE_V_ABSENT);
+ do_tlbies(kvm, &rb, 1, global_invalidates(kvm), true);
+ /* Don't lose R/C bit updates done by hardware */
+ r |= be64_to_cpu(hpte[1]) & (HPTE_R_R | HPTE_R_C);
+ hpte[1] = cpu_to_be64(r);
}
}
- hpte[1] = cpu_to_be64(r);
- eieio();
- hpte[0] = cpu_to_be64(v & ~HPTE_V_HVLOCK);
+ unlock_hpte(hpte, pte_v & ~HPTE_V_HVLOCK);
asm volatile("ptesync" : : : "memory");
+ if (is_mmio_hpte(v, pte_r))
+ atomic64_inc(&kvm->arch.mmio_update);
+
return H_SUCCESS;
}
+EXPORT_SYMBOL_GPL(kvmppc_h_protect);
long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long pte_index)
@@ -714,17 +751,23 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
int i, n = 1;
struct revmap_entry *rev = NULL;
- if (pte_index >= kvm->arch.hpt_npte)
+ if (kvm_is_radix(kvm))
+ return H_FUNCTION;
+ if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt))
return H_PARAMETER;
if (flags & H_READ_4) {
pte_index &= ~3;
n = 4;
}
- rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
+ rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]);
for (i = 0; i < n; ++i, ++pte_index) {
- hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
+ hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK;
r = be64_to_cpu(hpte[1]);
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ v = hpte_new_to_old_v(v, r);
+ r = hpte_new_to_old_r(r);
+ }
if (v & HPTE_V_ABSENT) {
v &= ~HPTE_V_ABSENT;
v |= HPTE_V_VALID;
@@ -733,20 +776,257 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C));
r &= ~HPTE_GR_RESERVED;
}
- vcpu->arch.gpr[4 + i * 2] = v;
- vcpu->arch.gpr[5 + i * 2] = r;
+ kvmppc_set_gpr(vcpu, 4 + i * 2, v);
+ kvmppc_set_gpr(vcpu, 5 + i * 2, r);
}
return H_SUCCESS;
}
+EXPORT_SYMBOL_GPL(kvmppc_h_read);
+
+long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
+ unsigned long pte_index)
+{
+ struct kvm *kvm = vcpu->kvm;
+ __be64 *hpte;
+ unsigned long v, r, gr;
+ struct revmap_entry *rev;
+ unsigned long *rmap;
+ long ret = H_NOT_FOUND;
+
+ if (kvm_is_radix(kvm))
+ return H_FUNCTION;
+ if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt))
+ return H_PARAMETER;
+
+ rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]);
+ hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
+ while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
+ cpu_relax();
+ v = be64_to_cpu(hpte[0]);
+ r = be64_to_cpu(hpte[1]);
+ if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT)))
+ goto out;
+
+ gr = rev->guest_rpte;
+ if (rev->guest_rpte & HPTE_R_R) {
+ rev->guest_rpte &= ~HPTE_R_R;
+ note_hpte_modification(kvm, rev);
+ }
+ if (v & HPTE_V_VALID) {
+ gr |= r & (HPTE_R_R | HPTE_R_C);
+ if (r & HPTE_R_R) {
+ kvmppc_clear_ref_hpte(kvm, hpte, pte_index);
+ rmap = revmap_for_hpte(kvm, v, gr, NULL, NULL);
+ if (rmap) {
+ lock_rmap(rmap);
+ *rmap |= KVMPPC_RMAP_REFERENCED;
+ unlock_rmap(rmap);
+ }
+ }
+ }
+ kvmppc_set_gpr(vcpu, 4, gr);
+ ret = H_SUCCESS;
+ out:
+ unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(kvmppc_h_clear_ref);
+
+long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
+ unsigned long pte_index)
+{
+ struct kvm *kvm = vcpu->kvm;
+ __be64 *hpte;
+ unsigned long v, r, gr;
+ struct revmap_entry *rev;
+ long ret = H_NOT_FOUND;
+
+ if (kvm_is_radix(kvm))
+ return H_FUNCTION;
+ if (pte_index >= kvmppc_hpt_npte(&kvm->arch.hpt))
+ return H_PARAMETER;
+
+ rev = real_vmalloc_addr(&kvm->arch.hpt.rev[pte_index]);
+ hpte = (__be64 *)(kvm->arch.hpt.virt + (pte_index << 4));
+ while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
+ cpu_relax();
+ v = be64_to_cpu(hpte[0]);
+ r = be64_to_cpu(hpte[1]);
+ if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT)))
+ goto out;
+
+ gr = rev->guest_rpte;
+ if (gr & HPTE_R_C) {
+ rev->guest_rpte &= ~HPTE_R_C;
+ note_hpte_modification(kvm, rev);
+ }
+ if (v & HPTE_V_VALID) {
+ /* need to make it temporarily absent so C is stable */
+ hpte[0] |= cpu_to_be64(HPTE_V_ABSENT);
+ kvmppc_invalidate_hpte(kvm, hpte, pte_index);
+ r = be64_to_cpu(hpte[1]);
+ gr |= r & (HPTE_R_R | HPTE_R_C);
+ if (r & HPTE_R_C) {
+ hpte[1] = cpu_to_be64(r & ~HPTE_R_C);
+ eieio();
+ kvmppc_set_dirty_from_hpte(kvm, v, gr);
+ }
+ }
+ kvmppc_set_gpr(vcpu, 4, gr);
+ ret = H_SUCCESS;
+ out:
+ unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(kvmppc_h_clear_mod);
+
+static int kvmppc_get_hpa(struct kvm_vcpu *vcpu, unsigned long mmu_seq,
+ unsigned long gpa, int writing, unsigned long *hpa,
+ struct kvm_memory_slot **memslot_p)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_memory_slot *memslot;
+ unsigned long gfn, hva, pa, psize = PAGE_SHIFT;
+ unsigned int shift;
+ pte_t *ptep, pte;
+
+ /* Find the memslot for this address */
+ gfn = gpa >> PAGE_SHIFT;
+ memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
+ if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
+ return H_PARAMETER;
+
+ /* Translate to host virtual address */
+ hva = __gfn_to_hva_memslot(memslot, gfn);
+
+ /* Try to find the host pte for that virtual address */
+ ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &shift);
+ if (!ptep)
+ return H_TOO_HARD;
+ pte = kvmppc_read_update_linux_pte(ptep, writing);
+ if (!pte_present(pte))
+ return H_TOO_HARD;
+
+ /* Convert to a physical address */
+ if (shift)
+ psize = 1UL << shift;
+ pa = pte_pfn(pte) << PAGE_SHIFT;
+ pa |= hva & (psize - 1);
+ pa |= gpa & ~PAGE_MASK;
+
+ if (hpa)
+ *hpa = pa;
+ if (memslot_p)
+ *memslot_p = memslot;
+
+ return H_SUCCESS;
+}
+
+static long kvmppc_do_h_page_init_zero(struct kvm_vcpu *vcpu,
+ unsigned long dest)
+{
+ struct kvm_memory_slot *memslot;
+ struct kvm *kvm = vcpu->kvm;
+ unsigned long pa, mmu_seq;
+ long ret = H_SUCCESS;
+ int i;
+
+ /* Used later to detect if we might have been invalidated */
+ mmu_seq = kvm->mmu_invalidate_seq;
+ smp_rmb();
+
+ arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock);
+
+ ret = kvmppc_get_hpa(vcpu, mmu_seq, dest, 1, &pa, &memslot);
+ if (ret != H_SUCCESS)
+ goto out_unlock;
+
+ /* Zero the page */
+ for (i = 0; i < SZ_4K; i += L1_CACHE_BYTES, pa += L1_CACHE_BYTES)
+ dcbz((void *)pa);
+ kvmppc_update_dirty_map(memslot, dest >> PAGE_SHIFT, PAGE_SIZE);
+
+out_unlock:
+ arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock);
+ return ret;
+}
+
+static long kvmppc_do_h_page_init_copy(struct kvm_vcpu *vcpu,
+ unsigned long dest, unsigned long src)
+{
+ unsigned long dest_pa, src_pa, mmu_seq;
+ struct kvm_memory_slot *dest_memslot;
+ struct kvm *kvm = vcpu->kvm;
+ long ret = H_SUCCESS;
+
+ /* Used later to detect if we might have been invalidated */
+ mmu_seq = kvm->mmu_invalidate_seq;
+ smp_rmb();
+
+ arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock);
+ ret = kvmppc_get_hpa(vcpu, mmu_seq, dest, 1, &dest_pa, &dest_memslot);
+ if (ret != H_SUCCESS)
+ goto out_unlock;
+
+ ret = kvmppc_get_hpa(vcpu, mmu_seq, src, 0, &src_pa, NULL);
+ if (ret != H_SUCCESS)
+ goto out_unlock;
+
+ /* Copy the page */
+ memcpy((void *)dest_pa, (void *)src_pa, SZ_4K);
+
+ kvmppc_update_dirty_map(dest_memslot, dest >> PAGE_SHIFT, PAGE_SIZE);
+
+out_unlock:
+ arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock);
+ return ret;
+}
+
+long kvmppc_rm_h_page_init(struct kvm_vcpu *vcpu, unsigned long flags,
+ unsigned long dest, unsigned long src)
+{
+ struct kvm *kvm = vcpu->kvm;
+ u64 pg_mask = SZ_4K - 1; /* 4K page size */
+ long ret = H_SUCCESS;
+
+ /* Don't handle radix mode here, go up to the virtual mode handler */
+ if (kvm_is_radix(kvm))
+ return H_TOO_HARD;
+
+ /* Check for invalid flags (H_PAGE_SET_LOANED covers all CMO flags) */
+ if (flags & ~(H_ICACHE_INVALIDATE | H_ICACHE_SYNCHRONIZE |
+ H_ZERO_PAGE | H_COPY_PAGE | H_PAGE_SET_LOANED))
+ return H_PARAMETER;
+
+ /* dest (and src if copy_page flag set) must be page aligned */
+ if ((dest & pg_mask) || ((flags & H_COPY_PAGE) && (src & pg_mask)))
+ return H_PARAMETER;
+
+ /* zero and/or copy the page as determined by the flags */
+ if (flags & H_COPY_PAGE)
+ ret = kvmppc_do_h_page_init_copy(vcpu, dest, src);
+ else if (flags & H_ZERO_PAGE)
+ ret = kvmppc_do_h_page_init_zero(vcpu, dest);
+
+ /* We can ignore the other flags */
+
+ return ret;
+}
void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep,
unsigned long pte_index)
{
unsigned long rb;
+ u64 hp0, hp1;
hptep[0] &= ~cpu_to_be64(HPTE_V_VALID);
- rb = compute_tlbie_rb(be64_to_cpu(hptep[0]), be64_to_cpu(hptep[1]),
- pte_index);
+ hp0 = be64_to_cpu(hptep[0]);
+ hp1 = be64_to_cpu(hptep[1]);
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ hp0 = hpte_new_to_old_v(hp0, hp1);
+ hp1 = hpte_new_to_old_r(hp1);
+ }
+ rb = compute_tlbie_rb(hp0, hp1, pte_index);
do_tlbies(kvm, &rb, 1, 1, true);
}
EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte);
@@ -756,9 +1036,15 @@ void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep,
{
unsigned long rb;
unsigned char rbyte;
+ u64 hp0, hp1;
- rb = compute_tlbie_rb(be64_to_cpu(hptep[0]), be64_to_cpu(hptep[1]),
- pte_index);
+ hp0 = be64_to_cpu(hptep[0]);
+ hp1 = be64_to_cpu(hptep[1]);
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ hp0 = hpte_new_to_old_v(hp0, hp1);
+ hp1 = hpte_new_to_old_r(hp1);
+ }
+ rb = compute_tlbie_rb(hp0, hp1, pte_index);
rbyte = (be64_to_cpu(hptep[1]) & ~HPTE_R_R) >> 8;
/* modify only the second-last byte, which contains the ref bit */
*((char *)hptep + 14) = rbyte;
@@ -773,6 +1059,37 @@ static int slb_base_page_shift[4] = {
20, /* 1M, unsupported */
};
+static struct mmio_hpte_cache_entry *mmio_cache_search(struct kvm_vcpu *vcpu,
+ unsigned long eaddr, unsigned long slb_v, long mmio_update)
+{
+ struct mmio_hpte_cache_entry *entry = NULL;
+ unsigned int pshift;
+ unsigned int i;
+
+ for (i = 0; i < MMIO_HPTE_CACHE_SIZE; i++) {
+ entry = &vcpu->arch.mmio_cache.entry[i];
+ if (entry->mmio_update == mmio_update) {
+ pshift = entry->slb_base_pshift;
+ if ((entry->eaddr >> pshift) == (eaddr >> pshift) &&
+ entry->slb_v == slb_v)
+ return entry;
+ }
+ }
+ return NULL;
+}
+
+static struct mmio_hpte_cache_entry *
+ next_mmio_cache_entry(struct kvm_vcpu *vcpu)
+{
+ unsigned int index = vcpu->arch.mmio_cache.index;
+
+ vcpu->arch.mmio_cache.index++;
+ if (vcpu->arch.mmio_cache.index == MMIO_HPTE_CACHE_SIZE)
+ vcpu->arch.mmio_cache.index = 0;
+
+ return &vcpu->arch.mmio_cache.entry[index];
+}
+
/* When called from virtmode, this func should be protected by
* preempt_disable(), otherwise, the holding of HPTE_V_HVLOCK
* can trigger deadlock issue.
@@ -787,7 +1104,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
unsigned long avpn;
__be64 *hpte;
unsigned long mask, val;
- unsigned long v, r;
+ unsigned long v, r, orig_v;
/* Get page shift, work out hash and AVPN etc. */
mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_SECONDARY;
@@ -806,7 +1123,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
somask = (1UL << 28) - 1;
vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT;
}
- hash = (vsid ^ ((eaddr & somask) >> pshift)) & kvm->arch.hpt_mask;
+ hash = (vsid ^ ((eaddr & somask) >> pshift)) & kvmppc_hpt_mask(&kvm->arch.hpt);
avpn = slb_v & ~(somask >> 16); /* also includes B */
avpn |= (eaddr & somask) >> 16;
@@ -817,11 +1134,13 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
val |= avpn;
for (;;) {
- hpte = (__be64 *)(kvm->arch.hpt_virt + (hash << 7));
+ hpte = (__be64 *)(kvm->arch.hpt.virt + (hash << 7));
for (i = 0; i < 16; i += 2) {
/* Read the PTE racily */
v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK;
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ v = hpte_new_to_old_v(v, be64_to_cpu(hpte[i+1]));
/* Check valid/absent, hash, segment size and AVPN */
if (!(v & valid) || (v & mask) != val)
@@ -830,25 +1149,28 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
/* Lock the PTE and read it under the lock */
while (!try_lock_hpte(&hpte[i], HPTE_V_HVLOCK))
cpu_relax();
- v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK;
+ v = orig_v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK;
r = be64_to_cpu(hpte[i+1]);
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ v = hpte_new_to_old_v(v, r);
+ r = hpte_new_to_old_r(r);
+ }
/*
* Check the HPTE again, including base page size
*/
if ((v & valid) && (v & mask) == val &&
- hpte_base_page_size(v, r) == (1ul << pshift))
+ kvmppc_hpte_base_page_shift(v, r) == pshift)
/* Return with the HPTE still locked */
return (hash << 3) + (i >> 1);
- /* Unlock and move on */
- hpte[i] = cpu_to_be64(v);
+ __unlock_hpte(&hpte[i], orig_v);
}
if (val & HPTE_V_SECONDARY)
break;
val |= HPTE_V_SECONDARY;
- hash = hash ^ kvm->arch.hpt_mask;
+ hash = hash ^ kvmppc_hpt_mask(&kvm->arch.hpt);
}
return -1;
}
@@ -870,30 +1192,45 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
{
struct kvm *kvm = vcpu->kvm;
long int index;
- unsigned long v, r, gr;
+ unsigned long v, r, gr, orig_v;
__be64 *hpte;
unsigned long valid;
struct revmap_entry *rev;
unsigned long pp, key;
+ struct mmio_hpte_cache_entry *cache_entry = NULL;
+ long mmio_update = 0;
/* For protection fault, expect to find a valid HPTE */
valid = HPTE_V_VALID;
- if (status & DSISR_NOHPTE)
+ if (status & DSISR_NOHPTE) {
valid |= HPTE_V_ABSENT;
-
- index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid);
- if (index < 0) {
- if (status & DSISR_NOHPTE)
- return status; /* there really was no HPTE */
- return 0; /* for prot fault, HPTE disappeared */
+ mmio_update = atomic64_read(&kvm->arch.mmio_update);
+ cache_entry = mmio_cache_search(vcpu, addr, slb_v, mmio_update);
}
- hpte = (__be64 *)(kvm->arch.hpt_virt + (index << 4));
- v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK;
- r = be64_to_cpu(hpte[1]);
- rev = real_vmalloc_addr(&kvm->arch.revmap[index]);
- gr = rev->guest_rpte;
+ if (cache_entry) {
+ index = cache_entry->pte_index;
+ v = cache_entry->hpte_v;
+ r = cache_entry->hpte_r;
+ gr = cache_entry->rpte;
+ } else {
+ index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid);
+ if (index < 0) {
+ if (status & DSISR_NOHPTE)
+ return status; /* there really was no HPTE */
+ return 0; /* for prot fault, HPTE disappeared */
+ }
+ hpte = (__be64 *)(kvm->arch.hpt.virt + (index << 4));
+ v = orig_v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK;
+ r = be64_to_cpu(hpte[1]);
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ v = hpte_new_to_old_v(v, r);
+ r = hpte_new_to_old_r(r);
+ }
+ rev = real_vmalloc_addr(&kvm->arch.hpt.rev[index]);
+ gr = rev->guest_rpte;
- unlock_hpte(hpte, v);
+ unlock_hpte(hpte, orig_v);
+ }
/* For not found, if the HPTE is valid by now, retry the instruction */
if ((status & DSISR_NOHPTE) && (v & HPTE_V_VALID))
@@ -905,7 +1242,7 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
status &= ~DSISR_NOHPTE; /* DSISR_NOHPTE == SRR1_ISI_NOPT */
if (!data) {
if (gr & (HPTE_R_N | HPTE_R_G))
- return status | SRR1_ISI_N_OR_G;
+ return status | SRR1_ISI_N_G_OR_CIP;
if (!hpte_read_permission(pp, slb_v & key))
return status | SRR1_ISI_PROT;
} else if (status & DSISR_ISSTORE) {
@@ -931,12 +1268,33 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
vcpu->arch.pgfault_index = index;
vcpu->arch.pgfault_hpte[0] = v;
vcpu->arch.pgfault_hpte[1] = r;
+ vcpu->arch.pgfault_cache = cache_entry;
/* Check the storage key to see if it is possibly emulated MMIO */
- if (data && (vcpu->arch.shregs.msr & MSR_IR) &&
- (r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) ==
- (HPTE_R_KEY_HI | HPTE_R_KEY_LO))
- return -2; /* MMIO emulation - load instr word */
+ if ((r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) ==
+ (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) {
+ if (!cache_entry) {
+ unsigned int pshift = 12;
+ unsigned int pshift_index;
+
+ if (slb_v & SLB_VSID_L) {
+ pshift_index = ((slb_v & SLB_VSID_LP) >> 4);
+ pshift = slb_base_page_shift[pshift_index];
+ }
+ cache_entry = next_mmio_cache_entry(vcpu);
+ cache_entry->eaddr = addr;
+ cache_entry->slb_base_pshift = pshift;
+ cache_entry->pte_index = index;
+ cache_entry->hpte_v = v;
+ cache_entry->hpte_r = r;
+ cache_entry->rpte = gr;
+ cache_entry->slb_v = slb_v;
+ cache_entry->mmio_update = mmio_update;
+ }
+ if (data && (vcpu->arch.shregs.msr & MSR_IR))
+ return -2; /* MMIO emulation - load instr word */
+ }
return -1; /* send fault up to host kernel mode */
}
+EXPORT_SYMBOL_GPL(kvmppc_hpte_hv_fault);
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 3ee38e6e884f..f2636414d82a 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -1,44 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright 2012 Michael Ellerman, IBM Corporation.
* Copyright 2012 Benjamin Herrenschmidt, IBM Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
*/
#include <linux/kernel.h>
#include <linux/kvm_host.h>
#include <linux/err.h>
+#include <linux/kernel_stat.h>
+#include <linux/pgtable.h>
#include <asm/kvm_book3s.h>
#include <asm/kvm_ppc.h>
#include <asm/hvcall.h>
#include <asm/xics.h>
-#include <asm/debug.h>
#include <asm/synch.h>
+#include <asm/cputhreads.h>
#include <asm/ppc-opcode.h>
+#include <asm/pnv-pci.h>
+#include <asm/opal.h>
+#include <asm/smp.h>
#include "book3s_xics.h"
#define DEBUG_PASSUP
-static inline void rm_writeb(unsigned long paddr, u8 val)
+int h_ipi_redirect = 1;
+EXPORT_SYMBOL(h_ipi_redirect);
+int kvm_irq_bypass = 1;
+EXPORT_SYMBOL(kvm_irq_bypass);
+
+static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
+ u32 new_irq, bool check_resend);
+static int xics_opal_set_server(unsigned int hw_irq, int server_cpu);
+
+/* -- ICS routines -- */
+static void ics_rm_check_resend(struct kvmppc_xics *xics,
+ struct kvmppc_ics *ics, struct kvmppc_icp *icp)
+{
+ int i;
+
+ for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
+ struct ics_irq_state *state = &ics->irq_state[i];
+ if (state->resend)
+ icp_rm_deliver_irq(xics, icp, state->number, true);
+ }
+
+}
+
+/* -- ICP routines -- */
+
+#ifdef CONFIG_SMP
+static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu)
+{
+ int hcpu;
+
+ hcpu = hcore << threads_shift;
+ kvmppc_host_rm_ops_hv->rm_core[hcore].rm_data = vcpu;
+ smp_muxed_ipi_set_message(hcpu, PPC_MSG_RM_HOST_ACTION);
+ kvmppc_set_host_ipi(hcpu);
+ smp_mb();
+ kvmhv_rm_send_ipi(hcpu);
+}
+#else
+static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu) { }
+#endif
+
+/*
+ * We start the search from our current CPU Id in the core map
+ * and go in a circle until we get back to our ID looking for a
+ * core that is running in host context and that hasn't already
+ * been targeted for another rm_host_ops.
+ *
+ * In the future, could consider using a fairer algorithm (one
+ * that distributes the IPIs better)
+ *
+ * Returns -1, if no CPU could be found in the host
+ * Else, returns a CPU Id which has been reserved for use
+ */
+static inline int grab_next_hostcore(int start,
+ struct kvmppc_host_rm_core *rm_core, int max, int action)
{
- __asm__ __volatile__("sync; stbcix %0,0,%1"
- : : "r" (val), "r" (paddr) : "memory");
+ bool success;
+ int core;
+ union kvmppc_rm_state old, new;
+
+ for (core = start + 1; core < max; core++) {
+ old = new = READ_ONCE(rm_core[core].rm_state);
+
+ if (!old.in_host || old.rm_action)
+ continue;
+
+ /* Try to grab this host core if not taken already. */
+ new.rm_action = action;
+
+ success = cmpxchg64(&rm_core[core].rm_state.raw,
+ old.raw, new.raw) == old.raw;
+ if (success) {
+ /*
+ * Make sure that the store to the rm_action is made
+ * visible before we return to caller (and the
+ * subsequent store to rm_data) to synchronize with
+ * the IPI handler.
+ */
+ smp_wmb();
+ return core;
+ }
+ }
+
+ return -1;
+}
+
+static inline int find_available_hostcore(int action)
+{
+ int core;
+ int my_core = smp_processor_id() >> threads_shift;
+ struct kvmppc_host_rm_core *rm_core = kvmppc_host_rm_ops_hv->rm_core;
+
+ core = grab_next_hostcore(my_core, rm_core, cpu_nr_cores(), action);
+ if (core == -1)
+ core = grab_next_hostcore(core, rm_core, my_core, action);
+
+ return core;
}
static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
struct kvm_vcpu *this_vcpu)
{
struct kvmppc_icp *this_icp = this_vcpu->arch.icp;
- unsigned long xics_phys;
int cpu;
+ int hcore;
/* Mark the target VCPU as having an interrupt pending */
vcpu->stat.queue_intr++;
- set_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
+ set_bit(BOOK3S_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions);
/* Kick self ? Just set MER and return */
if (vcpu == this_vcpu) {
@@ -46,26 +141,33 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
return;
}
- /* Check if the core is loaded, if not, too hard */
- cpu = vcpu->cpu;
+ /*
+ * Check if the core is loaded,
+ * if not, find an available host core to post to wake the VCPU,
+ * if we can't find one, set up state to eventually return too hard.
+ */
+ cpu = vcpu->arch.thread_cpu;
if (cpu < 0 || cpu >= nr_cpu_ids) {
- this_icp->rm_action |= XICS_RM_KICK_VCPU;
- this_icp->rm_kick_target = vcpu;
+ hcore = -1;
+ if (kvmppc_host_rm_ops_hv && h_ipi_redirect)
+ hcore = find_available_hostcore(XICS_RM_KICK_VCPU);
+ if (hcore != -1) {
+ icp_send_hcore_msg(hcore, vcpu);
+ } else {
+ this_icp->rm_action |= XICS_RM_KICK_VCPU;
+ this_icp->rm_kick_target = vcpu;
+ }
return;
}
- /* In SMT cpu will always point to thread 0, we adjust it */
- cpu += vcpu->arch.ptid;
- /* Not too hard, then poke the target */
- xics_phys = paca[cpu].kvm_hstate.xics_phys;
- rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY);
+ smp_mb();
+ kvmhv_rm_send_ipi(cpu);
}
static void icp_rm_clr_vcpu_irq(struct kvm_vcpu *vcpu)
{
/* Note: Only called on self ! */
- clear_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL,
- &vcpu->arch.pending_exceptions);
+ clear_bit(BOOK3S_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions);
mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_MER);
}
@@ -116,6 +218,194 @@ static inline int check_too_hard(struct kvmppc_xics *xics,
return (xics->real_mode_dbg || icp->rm_action) ? H_TOO_HARD : H_SUCCESS;
}
+static void icp_rm_check_resend(struct kvmppc_xics *xics,
+ struct kvmppc_icp *icp)
+{
+ u32 icsid;
+
+ /* Order this load with the test for need_resend in the caller */
+ smp_rmb();
+ for_each_set_bit(icsid, icp->resend_map, xics->max_icsid + 1) {
+ struct kvmppc_ics *ics = xics->ics[icsid];
+
+ if (!test_and_clear_bit(icsid, icp->resend_map))
+ continue;
+ if (!ics)
+ continue;
+ ics_rm_check_resend(xics, ics, icp);
+ }
+}
+
+static bool icp_rm_try_to_deliver(struct kvmppc_icp *icp, u32 irq, u8 priority,
+ u32 *reject)
+{
+ union kvmppc_icp_state old_state, new_state;
+ bool success;
+
+ do {
+ old_state = new_state = READ_ONCE(icp->state);
+
+ *reject = 0;
+
+ /* See if we can deliver */
+ success = new_state.cppr > priority &&
+ new_state.mfrr > priority &&
+ new_state.pending_pri > priority;
+
+ /*
+ * If we can, check for a rejection and perform the
+ * delivery
+ */
+ if (success) {
+ *reject = new_state.xisr;
+ new_state.xisr = irq;
+ new_state.pending_pri = priority;
+ } else {
+ /*
+ * If we failed to deliver we set need_resend
+ * so a subsequent CPPR state change causes us
+ * to try a new delivery.
+ */
+ new_state.need_resend = true;
+ }
+
+ } while (!icp_rm_try_update(icp, old_state, new_state));
+
+ return success;
+}
+
+static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
+ u32 new_irq, bool check_resend)
+{
+ struct ics_irq_state *state;
+ struct kvmppc_ics *ics;
+ u32 reject;
+ u16 src;
+
+ /*
+ * This is used both for initial delivery of an interrupt and
+ * for subsequent rejection.
+ *
+ * Rejection can be racy vs. resends. We have evaluated the
+ * rejection in an atomic ICP transaction which is now complete,
+ * so potentially the ICP can already accept the interrupt again.
+ *
+ * So we need to retry the delivery. Essentially the reject path
+ * boils down to a failed delivery. Always.
+ *
+ * Now the interrupt could also have moved to a different target,
+ * thus we may need to re-do the ICP lookup as well
+ */
+
+ again:
+ /* Get the ICS state and lock it */
+ ics = kvmppc_xics_find_ics(xics, new_irq, &src);
+ if (!ics) {
+ /* Unsafe increment, but this does not need to be accurate */
+ xics->err_noics++;
+ return;
+ }
+ state = &ics->irq_state[src];
+
+ /* Get a lock on the ICS */
+ arch_spin_lock(&ics->lock);
+
+ /* Get our server */
+ if (!icp || state->server != icp->server_num) {
+ icp = kvmppc_xics_find_server(xics->kvm, state->server);
+ if (!icp) {
+ /* Unsafe increment again*/
+ xics->err_noicp++;
+ goto out;
+ }
+ }
+
+ if (check_resend)
+ if (!state->resend)
+ goto out;
+
+ /* Clear the resend bit of that interrupt */
+ state->resend = 0;
+
+ /*
+ * If masked, bail out
+ *
+ * Note: PAPR doesn't mention anything about masked pending
+ * when doing a resend, only when doing a delivery.
+ *
+ * However that would have the effect of losing a masked
+ * interrupt that was rejected and isn't consistent with
+ * the whole masked_pending business which is about not
+ * losing interrupts that occur while masked.
+ *
+ * I don't differentiate normal deliveries and resends, this
+ * implementation will differ from PAPR and not lose such
+ * interrupts.
+ */
+ if (state->priority == MASKED) {
+ state->masked_pending = 1;
+ goto out;
+ }
+
+ /*
+ * Try the delivery, this will set the need_resend flag
+ * in the ICP as part of the atomic transaction if the
+ * delivery is not possible.
+ *
+ * Note that if successful, the new delivery might have itself
+ * rejected an interrupt that was "delivered" before we took the
+ * ics spin lock.
+ *
+ * In this case we do the whole sequence all over again for the
+ * new guy. We cannot assume that the rejected interrupt is less
+ * favored than the new one, and thus doesn't need to be delivered,
+ * because by the time we exit icp_rm_try_to_deliver() the target
+ * processor may well have already consumed & completed it, and thus
+ * the rejected interrupt might actually be already acceptable.
+ */
+ if (icp_rm_try_to_deliver(icp, new_irq, state->priority, &reject)) {
+ /*
+ * Delivery was successful, did we reject somebody else ?
+ */
+ if (reject && reject != XICS_IPI) {
+ arch_spin_unlock(&ics->lock);
+ icp->n_reject++;
+ new_irq = reject;
+ check_resend = 0;
+ goto again;
+ }
+ } else {
+ /*
+ * We failed to deliver the interrupt we need to set the
+ * resend map bit and mark the ICS state as needing a resend
+ */
+ state->resend = 1;
+
+ /*
+ * Make sure when checking resend, we don't miss the resend
+ * if resend_map bit is seen and cleared.
+ */
+ smp_wmb();
+ set_bit(ics->icsid, icp->resend_map);
+
+ /*
+ * If the need_resend flag got cleared in the ICP some time
+ * between icp_rm_try_to_deliver() atomic update and now, then
+ * we know it might have missed the resend_map bit. So we
+ * retry
+ */
+ smp_mb();
+ if (!icp->state.need_resend) {
+ state->resend = 0;
+ arch_spin_unlock(&ics->lock);
+ check_resend = 0;
+ goto again;
+ }
+ }
+ out:
+ arch_spin_unlock(&ics->lock);
+}
+
static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
u8 new_cppr)
{
@@ -152,7 +442,7 @@ static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
* in virtual mode.
*/
do {
- old_state = new_state = ACCESS_ONCE(icp->state);
+ old_state = new_state = READ_ONCE(icp->state);
/* Down_CPPR */
new_state.cppr = new_cppr;
@@ -183,12 +473,19 @@ static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
* state update in HW (ie bus transactions) so we can handle them
* separately here as well.
*/
- if (resend)
- icp->rm_action |= XICS_RM_CHECK_RESEND;
+ if (resend) {
+ icp->n_check_resend++;
+ icp_rm_check_resend(xics, icp);
+ }
}
+unsigned long xics_rm_h_xirr_x(struct kvm_vcpu *vcpu)
+{
+ kvmppc_set_gpr(vcpu, 5, get_tb());
+ return xics_rm_h_xirr(vcpu);
+}
-unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
+unsigned long xics_rm_h_xirr(struct kvm_vcpu *vcpu)
{
union kvmppc_icp_state old_state, new_state;
struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
@@ -209,7 +506,7 @@ unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
* pending priority
*/
do {
- old_state = new_state = ACCESS_ONCE(icp->state);
+ old_state = new_state = READ_ONCE(icp->state);
xirr = old_state.xisr | (((u32)old_state.cppr) << 24);
if (!old_state.xisr)
@@ -221,13 +518,13 @@ unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
} while (!icp_rm_try_update(icp, old_state, new_state));
/* Return the result in GPR4 */
- vcpu->arch.gpr[4] = xirr;
+ kvmppc_set_gpr(vcpu, 4, xirr);
return check_too_hard(xics, icp);
}
-int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
- unsigned long mfrr)
+int xics_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
+ unsigned long mfrr)
{
union kvmppc_icp_state old_state, new_state;
struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
@@ -254,13 +551,28 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
* nothing needs to be done as there can be no XISR to
* reject.
*
+ * ICP state: Check_IPI
+ *
* If the CPPR is less favored, then we might be replacing
- * an interrupt, and thus need to possibly reject it as in
+ * an interrupt, and thus need to possibly reject it.
*
- * ICP state: Check_IPI
+ * ICP State: IPI
+ *
+ * Besides rejecting any pending interrupts, we also
+ * update XISR and pending_pri to mark IPI as pending.
+ *
+ * PAPR does not describe this state, but if the MFRR is being
+ * made less favored than its earlier value, there might be
+ * a previously-rejected interrupt needing to be resent.
+ * Ideally, we would want to resend only if
+ * prio(pending_interrupt) < mfrr &&
+ * prio(pending_interrupt) < cppr
+ * where pending interrupt is the one that was rejected. But
+ * we don't have that state, so we simply trigger a resend
+ * whenever the MFRR is made less favored.
*/
do {
- old_state = new_state = ACCESS_ONCE(icp->state);
+ old_state = new_state = READ_ONCE(icp->state);
/* Set_MFRR */
new_state.mfrr = mfrr;
@@ -270,32 +582,35 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
resend = false;
if (mfrr < new_state.cppr) {
/* Reject a pending interrupt if not an IPI */
- if (mfrr <= new_state.pending_pri)
+ if (mfrr <= new_state.pending_pri) {
reject = new_state.xisr;
- new_state.pending_pri = mfrr;
- new_state.xisr = XICS_IPI;
+ new_state.pending_pri = mfrr;
+ new_state.xisr = XICS_IPI;
+ }
}
- if (mfrr > old_state.mfrr && mfrr > new_state.cppr) {
+ if (mfrr > old_state.mfrr) {
resend = new_state.need_resend;
new_state.need_resend = 0;
}
} while (!icp_rm_try_update(icp, old_state, new_state));
- /* Pass rejects to virtual mode */
+ /* Handle reject in real mode */
if (reject && reject != XICS_IPI) {
- this_icp->rm_action |= XICS_RM_REJECT;
- this_icp->rm_reject = reject;
+ this_icp->n_reject++;
+ icp_rm_deliver_irq(xics, icp, reject, false);
}
- /* Pass resends to virtual mode */
- if (resend)
- this_icp->rm_action |= XICS_RM_CHECK_RESEND;
+ /* Handle resends in real mode */
+ if (resend) {
+ this_icp->n_check_resend++;
+ icp_rm_check_resend(xics, icp);
+ }
return check_too_hard(xics, this_icp);
}
-int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
+int xics_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
{
union kvmppc_icp_state old_state, new_state;
struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
@@ -332,7 +647,7 @@ int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
icp_rm_clr_vcpu_irq(icp->vcpu);
do {
- old_state = new_state = ACCESS_ONCE(icp->state);
+ old_state = new_state = READ_ONCE(icp->state);
reject = 0;
new_state.cppr = cppr;
@@ -345,23 +660,82 @@ int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
} while (!icp_rm_try_update(icp, old_state, new_state));
- /* Pass rejects to virtual mode */
+ /*
+ * Check for rejects. They are handled by doing a new delivery
+ * attempt (see comments in icp_rm_deliver_irq).
+ */
if (reject && reject != XICS_IPI) {
- icp->rm_action |= XICS_RM_REJECT;
- icp->rm_reject = reject;
+ icp->n_reject++;
+ icp_rm_deliver_irq(xics, icp, reject, false);
}
bail:
return check_too_hard(xics, icp);
}
-int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
+static int ics_rm_eoi(struct kvm_vcpu *vcpu, u32 irq)
{
struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
struct kvmppc_icp *icp = vcpu->arch.icp;
struct kvmppc_ics *ics;
struct ics_irq_state *state;
- u32 irq = xirr & 0x00ffffff;
u16 src;
+ u32 pq_old, pq_new;
+
+ /*
+ * ICS EOI handling: For LSI, if P bit is still set, we need to
+ * resend it.
+ *
+ * For MSI, we move Q bit into P (and clear Q). If it is set,
+ * resend it.
+ */
+
+ ics = kvmppc_xics_find_ics(xics, irq, &src);
+ if (!ics)
+ goto bail;
+
+ state = &ics->irq_state[src];
+
+ if (state->lsi)
+ pq_new = state->pq_state;
+ else
+ do {
+ pq_old = state->pq_state;
+ pq_new = pq_old >> 1;
+ } while (cmpxchg(&state->pq_state, pq_old, pq_new) != pq_old);
+
+ if (pq_new & PQ_PRESENTED)
+ icp_rm_deliver_irq(xics, NULL, irq, false);
+
+ if (!hlist_empty(&vcpu->kvm->irq_ack_notifier_list)) {
+ icp->rm_action |= XICS_RM_NOTIFY_EOI;
+ icp->rm_eoied_irq = irq;
+ }
+
+ /* Handle passthrough interrupts */
+ if (state->host_irq) {
+ ++vcpu->stat.pthru_all;
+ if (state->intr_cpu != -1) {
+ int pcpu = raw_smp_processor_id();
+
+ pcpu = cpu_first_thread_sibling(pcpu);
+ ++vcpu->stat.pthru_host;
+ if (state->intr_cpu != pcpu) {
+ ++vcpu->stat.pthru_bad_aff;
+ xics_opal_set_server(state->host_irq, pcpu);
+ }
+ state->intr_cpu = -1;
+ }
+ }
+
+ bail:
+ return check_too_hard(xics, icp);
+}
+
+int xics_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
+{
+ struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+ struct kvmppc_icp *icp = vcpu->arch.icp;
+ u32 irq = xirr & 0x00ffffff;
if (!xics || !xics->real_mode)
return H_TOO_HARD;
@@ -371,7 +745,7 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
*
* Note: If EOI is incorrectly used by SW to lower the CPPR
* value (ie more favored), we do not check for rejection of
- * a pending interrupt, this is a SW error and PAPR sepcifies
+ * a pending interrupt, this is a SW error and PAPR specifies
* that we don't have to deal with it.
*
* The sending of an EOI to the ICS is handled after the
@@ -384,28 +758,167 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
/* IPIs have no EOI */
if (irq == XICS_IPI)
- goto bail;
- /*
- * EOI handling: If the interrupt is still asserted, we need to
- * resend it. We can take a lockless "peek" at the ICS state here.
- *
- * "Message" interrupts will never have "asserted" set
- */
+ return check_too_hard(xics, icp);
+
+ return ics_rm_eoi(vcpu, irq);
+}
+
+static unsigned long eoi_rc;
+
+static void icp_eoi(struct irq_data *d, u32 hwirq, __be32 xirr, bool *again)
+{
+ void __iomem *xics_phys;
+ int64_t rc;
+
+ rc = pnv_opal_pci_msi_eoi(d);
+
+ if (rc)
+ eoi_rc = rc;
+
+ iosync();
+
+ /* EOI it */
+ xics_phys = local_paca->kvm_hstate.xics_phys;
+ if (xics_phys) {
+ __raw_rm_writel(xirr, xics_phys + XICS_XIRR);
+ } else {
+ rc = opal_int_eoi(be32_to_cpu(xirr));
+ *again = rc > 0;
+ }
+}
+
+static int xics_opal_set_server(unsigned int hw_irq, int server_cpu)
+{
+ unsigned int mangle_cpu = get_hard_smp_processor_id(server_cpu) << 2;
+
+ return opal_set_xive(hw_irq, mangle_cpu, DEFAULT_PRIORITY);
+}
+
+/*
+ * Increment a per-CPU 32-bit unsigned integer variable.
+ * Safe to call in real-mode. Handles vmalloc'ed addresses
+ *
+ * ToDo: Make this work for any integral type
+ */
+
+static inline void this_cpu_inc_rm(unsigned int __percpu *addr)
+{
+ unsigned long l;
+ unsigned int *raddr;
+ int cpu = smp_processor_id();
+
+ raddr = per_cpu_ptr(addr, cpu);
+ l = (unsigned long)raddr;
+
+ if (get_region_id(l) == VMALLOC_REGION_ID) {
+ l = vmalloc_to_phys(raddr);
+ raddr = (unsigned int *)l;
+ }
+ ++*raddr;
+}
+
+/*
+ * We don't try to update the flags in the irq_desc 'istate' field in
+ * here as would happen in the normal IRQ handling path for several reasons:
+ * - state flags represent internal IRQ state and are not expected to be
+ * updated outside the IRQ subsystem
+ * - more importantly, these are useful for edge triggered interrupts,
+ * IRQ probing, etc., but we are only handling MSI/MSIx interrupts here
+ * and these states shouldn't apply to us.
+ *
+ * However, we do update irq_stats - we somewhat duplicate the code in
+ * kstat_incr_irqs_this_cpu() for this since this function is defined
+ * in irq/internal.h which we don't want to include here.
+ * The only difference is that desc->kstat_irqs is an allocated per CPU
+ * variable and could have been vmalloc'ed, so we can't directly
+ * call __this_cpu_inc() on it. The kstat structure is a static
+ * per CPU variable and it should be accessible by real-mode KVM.
+ *
+ */
+static void kvmppc_rm_handle_irq_desc(struct irq_desc *desc)
+{
+ this_cpu_inc_rm(&desc->kstat_irqs->cnt);
+ __this_cpu_inc(kstat.irqs_sum);
+}
+
+long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu,
+ __be32 xirr,
+ struct kvmppc_irq_map *irq_map,
+ struct kvmppc_passthru_irqmap *pimap,
+ bool *again)
+{
+ struct kvmppc_xics *xics;
+ struct kvmppc_icp *icp;
+ struct kvmppc_ics *ics;
+ struct ics_irq_state *state;
+ u32 irq;
+ u16 src;
+ u32 pq_old, pq_new;
+
+ irq = irq_map->v_hwirq;
+ xics = vcpu->kvm->arch.xics;
+ icp = vcpu->arch.icp;
+
+ kvmppc_rm_handle_irq_desc(irq_map->desc);
+
ics = kvmppc_xics_find_ics(xics, irq, &src);
if (!ics)
- goto bail;
+ return 2;
+
state = &ics->irq_state[src];
- /* Still asserted, resend it, we make it look like a reject */
- if (state->asserted) {
- icp->rm_action |= XICS_RM_REJECT;
- icp->rm_reject = irq;
+ /* only MSIs register bypass producers, so it must be MSI here */
+ do {
+ pq_old = state->pq_state;
+ pq_new = ((pq_old << 1) & 3) | PQ_PRESENTED;
+ } while (cmpxchg(&state->pq_state, pq_old, pq_new) != pq_old);
+
+ /* Test P=1, Q=0, this is the only case where we present */
+ if (pq_new == PQ_PRESENTED)
+ icp_rm_deliver_irq(xics, icp, irq, false);
+
+ /* EOI the interrupt */
+ icp_eoi(irq_desc_get_irq_data(irq_map->desc), irq_map->r_hwirq, xirr, again);
+
+ if (check_too_hard(xics, icp) == H_TOO_HARD)
+ return 2;
+ else
+ return -2;
+}
+
+/* --- Non-real mode XICS-related built-in routines --- */
+
+/*
+ * Host Operations poked by RM KVM
+ */
+static void rm_host_ipi_action(int action, void *data)
+{
+ switch (action) {
+ case XICS_RM_KICK_VCPU:
+ kvmppc_host_rm_ops_hv->vcpu_kick(data);
+ break;
+ default:
+ WARN(1, "Unexpected rm_action=%d data=%p\n", action, data);
+ break;
}
- if (!hlist_empty(&vcpu->kvm->irq_ack_notifier_list)) {
- icp->rm_action |= XICS_RM_NOTIFY_EOI;
- icp->rm_eoied_irq = irq;
+}
+
+void kvmppc_xics_ipi_action(void)
+{
+ int core;
+ unsigned int cpu = smp_processor_id();
+ struct kvmppc_host_rm_core *rm_corep;
+
+ core = cpu >> threads_shift;
+ rm_corep = &kvmppc_host_rm_ops_hv->rm_core[core];
+
+ if (rm_corep->rm_data) {
+ rm_host_ipi_action(rm_corep->rm_state.rm_action,
+ rm_corep->rm_data);
+ /* Order these stores against the real mode KVM */
+ rm_corep->rm_data = NULL;
+ smp_wmb();
+ rm_corep->rm_state.rm_action = 0;
}
- bail:
- return check_too_hard(xics, icp);
}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index f0c4db7704c3..83f7504349d2 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1,12 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
*
* Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
*
@@ -17,7 +10,11 @@
* Authors: Alexander Graf <agraf@suse.de>
*/
+#include <linux/export.h>
+#include <linux/linkage.h>
+#include <linux/objtool.h>
#include <asm/ppc_asm.h>
+#include <asm/code-patching-asm.h>
#include <asm/kvm_asm.h>
#include <asm/reg.h>
#include <asm/mmu.h>
@@ -27,14 +24,41 @@
#include <asm/asm-offsets.h>
#include <asm/exception-64s.h>
#include <asm/kvm_book3s_asm.h>
-#include <asm/mmu-hash64.h>
+#include <asm/book3s/64/mmu-hash.h>
#include <asm/tm.h>
-
-#define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM)
+#include <asm/opal.h>
+#include <asm/thread_info.h>
+#include <asm/asm-compat.h>
+#include <asm/feature-fixups.h>
+#include <asm/cpuidle.h>
/* Values in HSTATE_NAPPING(r13) */
#define NAPPING_CEDE 1
#define NAPPING_NOVCPU 2
+#define NAPPING_UNSPLIT 3
+
+/* Stack frame offsets for kvmppc_hv_entry */
+#define SFS 160
+#define STACK_SLOT_TRAP (SFS-4)
+#define STACK_SLOT_TID (SFS-16)
+#define STACK_SLOT_PSSCR (SFS-24)
+#define STACK_SLOT_PID (SFS-32)
+#define STACK_SLOT_IAMR (SFS-40)
+#define STACK_SLOT_CIABR (SFS-48)
+#define STACK_SLOT_DAWR0 (SFS-56)
+#define STACK_SLOT_DAWRX0 (SFS-64)
+#define STACK_SLOT_HFSCR (SFS-72)
+#define STACK_SLOT_AMR (SFS-80)
+#define STACK_SLOT_UAMOR (SFS-88)
+#define STACK_SLOT_FSCR (SFS-96)
+
+/*
+ * Use the last LPID (all implemented LPID bits = 1) for partition switching.
+ * This is reserved in the LPID allocator. POWER7 only implements 0x3ff, but
+ * we write 0xfff into the LPID SPR anyway, which seems to work and just
+ * ignores the top bits.
+ */
+#define LPID_RSVD 0xfff
/*
* Call kvmppc_hv_entry in real mode.
@@ -49,6 +73,7 @@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline)
std r0, PPC_LR_STKOFF(r1)
stdu r1, -112(r1)
mfmsr r10
+ std r10, HSTATE_HOST_MSR(r13)
LOAD_REG_ADDR(r5, kvmppc_call_hv_entry)
li r0,MSR_RI
andc r0,r10,r0
@@ -57,7 +82,7 @@ _GLOBAL_TOC(kvmppc_hv_entry_trampoline)
mtmsrd r0,1 /* clear RI in MSR */
mtsrr0 r5
mtsrr1 r6
- RFI
+ RFI_TO_KERNEL
kvmppc_call_hv_entry:
ld r4, HSTATE_KVM_VCPU(r13)
@@ -78,54 +103,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
mtspr SPRN_SPRG_VDSO_WRITE,r3
/* Reload the host's PMU registers */
- ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */
- lbz r4, LPPACA_PMCINUSE(r3)
- cmpwi r4, 0
- beq 23f /* skip if not */
-BEGIN_FTR_SECTION
- ld r3, HSTATE_MMCR(r13)
- andi. r4, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO
- cmpwi r4, MMCR0_PMAO
- beql kvmppc_fix_pmao
-END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
- lwz r3, HSTATE_PMC(r13)
- lwz r4, HSTATE_PMC + 4(r13)
- lwz r5, HSTATE_PMC + 8(r13)
- lwz r6, HSTATE_PMC + 12(r13)
- lwz r8, HSTATE_PMC + 16(r13)
- lwz r9, HSTATE_PMC + 20(r13)
-BEGIN_FTR_SECTION
- lwz r10, HSTATE_PMC + 24(r13)
- lwz r11, HSTATE_PMC + 28(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
- mtspr SPRN_PMC1, r3
- mtspr SPRN_PMC2, r4
- mtspr SPRN_PMC3, r5
- mtspr SPRN_PMC4, r6
- mtspr SPRN_PMC5, r8
- mtspr SPRN_PMC6, r9
-BEGIN_FTR_SECTION
- mtspr SPRN_PMC7, r10
- mtspr SPRN_PMC8, r11
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
- ld r3, HSTATE_MMCR(r13)
- ld r4, HSTATE_MMCR + 8(r13)
- ld r5, HSTATE_MMCR + 16(r13)
- ld r6, HSTATE_MMCR + 24(r13)
- ld r7, HSTATE_MMCR + 32(r13)
- mtspr SPRN_MMCR1, r4
- mtspr SPRN_MMCRA, r5
- mtspr SPRN_SIAR, r6
- mtspr SPRN_SDAR, r7
-BEGIN_FTR_SECTION
- ld r8, HSTATE_MMCR + 40(r13)
- ld r9, HSTATE_MMCR + 48(r13)
- mtspr SPRN_MMCR2, r8
- mtspr SPRN_SIER, r9
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
- mtspr SPRN_MMCR0, r3
- isync
-23:
+ bl kvmhv_load_host_pmu
/*
* Reload DEC. HDEC interrupts were disabled when
@@ -136,53 +114,55 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
subf r4, r4, r3
mtspr SPRN_DEC, r4
+ /* hwthread_req may have got set by cede or no vcpu, so clear it */
+ li r0, 0
+ stb r0, HSTATE_HWTHREAD_REQ(r13)
+
/*
- * For external and machine check interrupts, we need
- * to call the Linux handler to process the interrupt.
- * We do that by jumping to absolute address 0x500 for
- * external interrupts, or the machine_check_fwnmi label
- * for machine checks (since firmware might have patched
- * the vector area at 0x200). The [h]rfid at the end of the
- * handler will return to the book3s_hv_interrupts.S code.
- * For other interrupts we do the rfid to get back
- * to the book3s_hv_interrupts.S code here.
+ * For external interrupts we need to call the Linux
+ * handler to process the interrupt. We do that by jumping
+ * to absolute address 0x500 for external interrupts.
+ * The [h]rfid at the end of the handler will return to
+ * the book3s_hv_interrupts.S code. For other interrupts
+ * we do the rfid to get back to the book3s_hv_interrupts.S
+ * code here.
*/
ld r8, 112+PPC_LR_STKOFF(r1)
addi r1, r1, 112
ld r7, HSTATE_HOST_MSR(r13)
- cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK
- cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
-BEGIN_FTR_SECTION
- beq 11f
- cmpwi cr2, r12, BOOK3S_INTERRUPT_HMI
- beq cr2, 14f /* HMI check */
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+ /* Return the trap number on this thread as the return value */
+ mr r3, r12
- /* RFI into the highmem handler, or branch to interrupt handler */
+ /* RFI into the highmem handler */
mfmsr r6
li r0, MSR_RI
andc r6, r6, r0
mtmsrd r6, 1 /* Clear RI in MSR */
mtsrr0 r8
mtsrr1 r7
- beqa 0x500 /* external interrupt (PPC970) */
- beq cr1, 13f /* machine check */
- RFI
-
- /* On POWER7, we have external interrupts set to use HSRR0/1 */
-11: mtspr SPRN_HSRR0, r8
- mtspr SPRN_HSRR1, r7
- ba 0x500
-
-13: b machine_check_fwnmi
-
-14: mtspr SPRN_HSRR0, r8
- mtspr SPRN_HSRR1, r7
- b hmi_exception_after_realmode
+ RFI_TO_KERNEL
kvmppc_primary_no_guest:
/* We handle this much like a ceded vcpu */
+ /* put the HDEC into the DEC, since HDEC interrupts don't wake us */
+ /* HDEC may be larger than DEC for arch >= v3.00, but since the */
+ /* HDEC value came from DEC in the first place, it will fit */
+ mfspr r3, SPRN_HDEC
+ mtspr SPRN_DEC, r3
+ /*
+ * Make sure the primary has finished the MMU switch.
+ * We should never get here on a secondary thread, but
+ * check it for robustness' sake.
+ */
+ ld r5, HSTATE_KVM_VCORE(r13)
+65: lbz r0, VCORE_IN_GUEST(r5)
+ cmpwi r0, 0
+ beq 65b
+ /* Set LPCR. */
+ ld r8,VCORE_LPCR(r5)
+ mtspr SPRN_LPCR,r8
+ isync
/* set our bit in napping_threads */
ld r5, HSTATE_KVM_VCORE(r13)
lbz r7, HSTATE_PTID(r13)
@@ -193,7 +173,7 @@ kvmppc_primary_no_guest:
or r3, r3, r0
stwcx. r3, 0, r6
bne 1b
- /* order napping_threads update vs testing entry_exit_count */
+ /* order napping_threads update vs testing entry_exit_map */
isync
li r12, 0
lwz r7, VCORE_ENTRY_EXIT(r5)
@@ -201,21 +181,33 @@ kvmppc_primary_no_guest:
bge kvm_novcpu_exit /* another thread already exiting */
li r3, NAPPING_NOVCPU
stb r3, HSTATE_NAPPING(r13)
- li r3, 1
- stb r3, HSTATE_HWTHREAD_REQ(r13)
+ li r3, 0 /* Don't wake on privileged (OS) doorbell */
b kvm_do_nap
+/*
+ * kvm_novcpu_wakeup
+ * Entered from kvm_start_guest if kvm_hstate.napping is set
+ * to NAPPING_NOVCPU
+ * r2 = kernel TOC
+ * r13 = paca
+ */
kvm_novcpu_wakeup:
ld r1, HSTATE_HOST_R1(r13)
ld r5, HSTATE_KVM_VCORE(r13)
li r0, 0
stb r0, HSTATE_NAPPING(r13)
- stb r0, HSTATE_HWTHREAD_REQ(r13)
/* check the wake reason */
bl kvmppc_check_wake_reason
-
+
+ /*
+ * Restore volatile registers since we could have called
+ * a C routine in kvmppc_check_wake_reason.
+ * r5 = VCORE
+ */
+ ld r5, HSTATE_KVM_VCORE(r13)
+
/* see if any other thread is already exiting */
lwz r0, VCORE_ENTRY_EXIT(r5)
cmpwi r0, 0x100
@@ -235,45 +227,79 @@ kvm_novcpu_wakeup:
cmpdi r3, 0
bge kvm_novcpu_exit
+ /* See if our timeslice has expired (HDEC is negative) */
+ mfspr r0, SPRN_HDEC
+ extsw r0, r0
+ li r12, BOOK3S_INTERRUPT_HV_DECREMENTER
+ cmpdi r0, 0
+ blt kvm_novcpu_exit
+
/* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */
ld r4, HSTATE_KVM_VCPU(r13)
cmpdi r4, 0
- bne kvmppc_got_guest
+ beq kvmppc_primary_no_guest
+
+#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING
+ addi r3, r4, VCPU_TB_RMENTRY
+ bl kvmhv_start_timing
+#endif
+ b kvmppc_got_guest
kvm_novcpu_exit:
- b hdec_soon
+#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING
+ ld r4, HSTATE_KVM_VCPU(r13)
+ cmpdi r4, 0
+ beq 13f
+ addi r3, r4, VCPU_TB_RMEXIT
+ bl kvmhv_accumulate_time
+#endif
+13: mr r3, r12
+ stw r12, STACK_SLOT_TRAP(r1)
+ bl kvmhv_commence_exit
+ nop
+ b kvmhv_switch_to_host
/*
- * We come in here when wakened from nap mode.
- * Relocation is off and most register values are lost.
- * r13 points to the PACA.
+ * We come in here when wakened from Linux offline idle code.
+ * Relocation is off
+ * r3 contains the SRR1 wakeup value, SRR1 is trashed.
*/
- .globl kvm_start_guest
-kvm_start_guest:
+_GLOBAL(idle_kvm_start_guest)
+ mfcr r5
+ mflr r0
+ std r5, 8(r1) // Save CR in caller's frame
+ std r0, 16(r1) // Save LR in caller's frame
+ // Create frame on emergency stack
+ ld r4, PACAEMERGSP(r13)
+ stdu r1, -SWITCH_FRAME_SIZE(r4)
+ // Switch to new frame on emergency stack
+ mr r1, r4
+ std r3, 32(r1) // Save SRR1 wakeup value
+ SAVE_NVGPRS(r1)
- /* Set runlatch bit the minute you wake up from nap */
- mfspr r1, SPRN_CTRLF
- ori r1, r1, 1
- mtspr SPRN_CTRLT, r1
+ /*
+ * Could avoid this and pass it through in r3. For now,
+ * code expects it to be in SRR1.
+ */
+ mtspr SPRN_SRR1,r3
- ld r2,PACATOC(r13)
+ li r0,0
+ stb r0,PACA_FTRACE_ENABLED(r13)
li r0,KVM_HWTHREAD_IN_KVM
stb r0,HSTATE_HWTHREAD_STATE(r13)
- /* NV GPR values from power7_idle() will no longer be valid */
- li r0,1
- stb r0,PACA_NAPSTATELOST(r13)
-
- /* were we napping due to cede? */
+ /* kvm cede / napping does not come through here */
lbz r0,HSTATE_NAPPING(r13)
- cmpwi r0,NAPPING_CEDE
- beq kvm_end_cede
- cmpwi r0,NAPPING_NOVCPU
- beq kvm_novcpu_wakeup
+ twnei r0,0
- ld r1,PACAEMERGSP(r13)
- subi r1,r1,STACK_FRAME_OVERHEAD
+ b 1f
+
+kvm_unsplit_wakeup:
+ li r0, 0
+ stb r0, HSTATE_NAPPING(r13)
+
+1:
/*
* We weren't napping due to cede, so this must be a secondary
@@ -284,61 +310,191 @@ kvm_start_guest:
/* Check the wake reason in SRR1 to see why we got here */
bl kvmppc_check_wake_reason
+ /*
+ * kvmppc_check_wake_reason could invoke a C routine, but we
+ * have no volatile registers to restore when we return.
+ */
+
cmpdi r3, 0
bge kvm_no_guest
- /* get vcpu pointer, NULL if we have no vcpu to run */
- ld r4,HSTATE_KVM_VCPU(r13)
- cmpdi r4,0
- /* if we have no vcpu to run, go back to sleep */
+ /* get vcore pointer, NULL if we have nothing to run */
+ ld r5,HSTATE_KVM_VCORE(r13)
+ cmpdi r5,0
+ /* if we have no vcore to run, go back to sleep */
beq kvm_no_guest
+kvm_secondary_got_guest:
+
+ // About to go to guest, clear saved SRR1
+ li r0, 0
+ std r0, 32(r1)
+
/* Set HSTATE_DSCR(r13) to something sensible */
- ld r6, PACA_DSCR(r13)
+ ld r6, PACA_DSCR_DEFAULT(r13)
std r6, HSTATE_DSCR(r13)
+ /* On thread 0 of a subcore, set HDEC to max */
+ lbz r4, HSTATE_PTID(r13)
+ cmpwi r4, 0
+ bne 63f
+ lis r6,0x7fff /* MAX_INT@h */
+ mtspr SPRN_HDEC, r6
+ /* and set per-LPAR registers, if doing dynamic micro-threading */
+ ld r6, HSTATE_SPLIT_MODE(r13)
+ cmpdi r6, 0
+ beq 63f
+ ld r0, KVM_SPLIT_RPR(r6)
+ mtspr SPRN_RPR, r0
+ ld r0, KVM_SPLIT_PMMAR(r6)
+ mtspr SPRN_PMMAR, r0
+ ld r0, KVM_SPLIT_LDBAR(r6)
+ mtspr SPRN_LDBAR, r0
+ isync
+63:
+ /* Order load of vcpu after load of vcore */
+ lwsync
+ ld r4, HSTATE_KVM_VCPU(r13)
bl kvmppc_hv_entry
/* Back from the guest, go back to nap */
- /* Clear our vcpu pointer so we don't come back in early */
+ /* Clear our vcpu and vcore pointers so we don't come back in early */
li r0, 0
std r0, HSTATE_KVM_VCPU(r13)
/*
- * Make sure we clear HSTATE_KVM_VCPU(r13) before incrementing
- * the nap_count, because once the increment to nap_count is
- * visible we could be given another vcpu.
+ * Once we clear HSTATE_KVM_VCORE(r13), the code in
+ * kvmppc_run_core() is going to assume that all our vcpu
+ * state is visible in memory. This lwsync makes sure
+ * that that is true.
*/
lwsync
+ std r0, HSTATE_KVM_VCORE(r13)
- /* increment the nap count and then go to nap mode */
- ld r4, HSTATE_KVM_VCORE(r13)
- addi r4, r4, VCORE_NAP_COUNT
-51: lwarx r3, 0, r4
- addi r3, r3, 1
- stwcx. r3, 0, r4
- bne 51b
+ /*
+ * All secondaries exiting guest will fall through this path.
+ * Before proceeding, just check for HMI interrupt and
+ * invoke opal hmi handler. By now we are sure that the
+ * primary thread on this core/subcore has already made partition
+ * switch/TB resync and we are good to call opal hmi handler.
+ */
+ cmpwi r12, BOOK3S_INTERRUPT_HMI
+ bne kvm_no_guest
+ li r3,0 /* NULL argument */
+ bl CFUNC(hmi_exception_realmode)
+/*
+ * At this point we have finished executing in the guest.
+ * We need to wait for hwthread_req to become zero, since
+ * we may not turn on the MMU while hwthread_req is non-zero.
+ * While waiting we also need to check if we get given a vcpu to run.
+ */
kvm_no_guest:
- li r0, KVM_HWTHREAD_IN_NAP
+ lbz r3, HSTATE_HWTHREAD_REQ(r13)
+ cmpwi r3, 0
+ bne 53f
+ HMT_MEDIUM
+ li r0, KVM_HWTHREAD_IN_KERNEL
stb r0, HSTATE_HWTHREAD_STATE(r13)
-kvm_do_nap:
- /* Clear the runlatch bit before napping */
- mfspr r2, SPRN_CTRLF
- clrrdi r2, r2, 1
- mtspr SPRN_CTRLT, r2
+ /* need to recheck hwthread_req after a barrier, to avoid race */
+ sync
+ lbz r3, HSTATE_HWTHREAD_REQ(r13)
+ cmpwi r3, 0
+ bne 54f
+ /*
+ * Jump to idle_return_gpr_loss, which returns to the
+ * idle_kvm_start_guest caller.
+ */
li r3, LPCR_PECE0
mfspr r4, SPRN_LPCR
rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1
mtspr SPRN_LPCR, r4
- isync
- std r0, HSTATE_SCRATCH0(r13)
- ptesync
- ld r0, HSTATE_SCRATCH0(r13)
-1: cmpd r0, r0
- bne 1b
- nap
- b .
+ // Return SRR1 wakeup value, or 0 if we went into the guest
+ ld r3, 32(r1)
+ REST_NVGPRS(r1)
+ ld r1, 0(r1) // Switch back to caller stack
+ ld r0, 16(r1) // Reload LR
+ ld r5, 8(r1) // Reload CR
+ mtlr r0
+ mtcr r5
+ blr
+
+53:
+ HMT_LOW
+ ld r5, HSTATE_KVM_VCORE(r13)
+ cmpdi r5, 0
+ bne 60f
+ ld r3, HSTATE_SPLIT_MODE(r13)
+ cmpdi r3, 0
+ beq kvm_no_guest
+ lbz r0, KVM_SPLIT_DO_NAP(r3)
+ cmpwi r0, 0
+ beq kvm_no_guest
+ HMT_MEDIUM
+ b kvm_unsplit_nap
+60: HMT_MEDIUM
+ b kvm_secondary_got_guest
+
+54: li r0, KVM_HWTHREAD_IN_KVM
+ stb r0, HSTATE_HWTHREAD_STATE(r13)
+ b kvm_no_guest
+
+/*
+ * Here the primary thread is trying to return the core to
+ * whole-core mode, so we need to nap.
+ */
+kvm_unsplit_nap:
+ /*
+ * When secondaries are napping in kvm_unsplit_nap() with
+ * hwthread_req = 1, HMI goes ignored even though subcores are
+ * already exited the guest. Hence HMI keeps waking up secondaries
+ * from nap in a loop and secondaries always go back to nap since
+ * no vcore is assigned to them. This makes impossible for primary
+ * thread to get hold of secondary threads resulting into a soft
+ * lockup in KVM path.
+ *
+ * Let us check if HMI is pending and handle it before we go to nap.
+ */
+ cmpwi r12, BOOK3S_INTERRUPT_HMI
+ bne 55f
+ li r3, 0 /* NULL argument */
+ bl CFUNC(hmi_exception_realmode)
+55:
+ /*
+ * Ensure that secondary doesn't nap when it has
+ * its vcore pointer set.
+ */
+ sync /* matches smp_mb() before setting split_info.do_nap */
+ ld r0, HSTATE_KVM_VCORE(r13)
+ cmpdi r0, 0
+ bne kvm_no_guest
+ /* clear any pending message */
+BEGIN_FTR_SECTION
+ lis r6, (PPC_DBELL_SERVER << (63-36))@h
+ PPC_MSGCLR(6)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+ /* Set kvm_split_mode.napped[tid] = 1 */
+ ld r3, HSTATE_SPLIT_MODE(r13)
+ li r0, 1
+ lhz r4, PACAPACAINDEX(r13)
+ clrldi r4, r4, 61 /* micro-threading => P8 => 8 threads/core */
+ addi r4, r4, KVM_SPLIT_NAPPED
+ stbx r0, r3, r4
+ /* Check the do_nap flag again after setting napped[] */
+ sync
+ lbz r0, KVM_SPLIT_DO_NAP(r3)
+ cmpwi r0, 0
+ beq 57f
+ li r3, NAPPING_UNSPLIT
+ stb r3, HSTATE_NAPPING(r13)
+ li r3, (LPCR_PECEDH | LPCR_PECE0) >> 4
+ mfspr r5, SPRN_LPCR
+ rlwimi r5, r3, 4, (LPCR_PECEDP | LPCR_PECEDH | LPCR_PECE0 | LPCR_PECE1)
+ b kvm_nap_sequence
+
+57: li r0, 0
+ stbx r0, r3, r4
+ b kvm_no_guest
/******************************************************************************
* *
@@ -346,8 +502,7 @@ kvm_do_nap:
* *
*****************************************************************************/
-.global kvmppc_hv_entry
-kvmppc_hv_entry:
+SYM_CODE_START_LOCAL(kvmppc_hv_entry)
/* Required state:
*
@@ -355,11 +510,13 @@ kvmppc_hv_entry:
* MSR = ~IR|DR
* R13 = PACA
* R1 = host R1
+ * R2 = TOC
* all other volatile GPRS = free
+ * Does not preserve non-volatile GPRs or CR fields
*/
mflr r0
std r0, PPC_LR_STKOFF(r1)
- stdu r1, -112(r1)
+ stdu r1, -SFS(r1)
/* Save R1 in the PACA */
std r1, HSTATE_HOST_R1(r13)
@@ -367,37 +524,41 @@ kvmppc_hv_entry:
li r6, KVM_GUEST_MODE_HOST_HV
stb r6, HSTATE_IN_GUEST(r13)
- /* Clear out SLB */
- li r6,0
- slbmte r6,r6
- slbia
- ptesync
+#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING
+ /* Store initial timestamp */
+ cmpdi r4, 0
+ beq 1f
+ addi r3, r4, VCPU_TB_RMENTRY
+ bl kvmhv_start_timing
+1:
+#endif
+
+ ld r5, HSTATE_KVM_VCORE(r13)
+ ld r9, VCORE_KVM(r5) /* pointer to struct kvm */
-BEGIN_FTR_SECTION
- b 30f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
/*
- * POWER7 host -> guest partition switch code.
+ * POWER7/POWER8 host -> guest partition switch code.
* We don't have to lock against concurrent tlbies,
* but we do have to coordinate across hardware threads.
*/
- /* Increment entry count iff exit count is zero. */
- ld r5,HSTATE_KVM_VCORE(r13)
- addi r9,r5,VCORE_ENTRY_EXIT
-21: lwarx r3,0,r9
- cmpwi r3,0x100 /* any threads starting to exit? */
+ /* Set bit in entry map iff exit map is zero. */
+ li r7, 1
+ lbz r6, HSTATE_PTID(r13)
+ sld r7, r7, r6
+ addi r8, r5, VCORE_ENTRY_EXIT
+21: lwarx r3, 0, r8
+ cmpwi r3, 0x100 /* any threads starting to exit? */
bge secondary_too_late /* if so we're too late to the party */
- addi r3,r3,1
- stwcx. r3,0,r9
+ or r3, r3, r7
+ stwcx. r3, 0, r8
bne 21b
/* Primary thread switches to guest partition. */
- ld r9,VCORE_KVM(r5) /* pointer to struct kvm */
- lbz r6,HSTATE_PTID(r13)
cmpwi r6,0
- bne 20f
- ld r6,KVM_SDR1(r9)
+ bne 10f
+
lwz r7,KVM_LPID(r9)
+ ld r6,KVM_SDR1(r9)
li r0,LPID_RSVD /* switch to reserved LPID */
mtspr SPRN_LPID,r0
ptesync
@@ -405,41 +566,19 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
mtspr SPRN_LPID,r7
isync
- /* See if we need to flush the TLB */
- lhz r6,PACAPACAINDEX(r13) /* test_bit(cpu, need_tlb_flush) */
- clrldi r7,r6,64-6 /* extract bit number (6 bits) */
- srdi r6,r6,6 /* doubleword number */
- sldi r6,r6,3 /* address offset */
- add r6,r6,r9
- addi r6,r6,KVM_NEED_FLUSH /* dword in kvm->arch.need_tlb_flush */
- li r0,1
- sld r0,r0,r7
- ld r7,0(r6)
- and. r7,r7,r0
- beq 22f
-23: ldarx r7,0,r6 /* if set, clear the bit */
- andc r7,r7,r0
- stdcx. r7,0,r6
- bne 23b
- /* Flush the TLB of any entries for this LPID */
- /* use arch 2.07S as a proxy for POWER8 */
-BEGIN_FTR_SECTION
- li r6,512 /* POWER8 has 512 sets */
-FTR_SECTION_ELSE
- li r6,128 /* POWER7 has 128 sets */
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_207S)
- mtctr r6
- li r7,0x800 /* IS field = 0b10 */
- ptesync
-28: tlbiel r7
- addi r7,r7,0x1000
- bdnz 28b
- ptesync
+ /* See if we need to flush the TLB. */
+ mr r3, r9 /* kvm pointer */
+ lhz r4, PACAPACAINDEX(r13) /* physical cpu number */
+ li r5, 0 /* nested vcpu pointer */
+ bl kvmppc_check_need_tlb_flush
+ nop
+ ld r5, HSTATE_KVM_VCORE(r13)
/* Add timebase offset onto timebase */
22: ld r8,VCORE_TB_OFFSET(r5)
cmpdi r8,0
beq 37f
+ std r8, VCORE_TB_OFFSET_APPL(r5)
mftb r6 /* current host timebase */
add r8,r8,r6
mtspr SPRN_TBU40,r8 /* update upper 40 bits */
@@ -453,146 +592,33 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_207S)
/* Load guest PCR value to select appropriate compat mode */
37: ld r7, VCORE_PCR(r5)
- cmpdi r7, 0
+ LOAD_REG_IMMEDIATE(r6, PCR_MASK)
+ cmpld r7, r6
beq 38f
+ or r7, r7, r6
mtspr SPRN_PCR, r7
38:
BEGIN_FTR_SECTION
- /* DPDES is shared between threads */
+ /* DPDES and VTB are shared between threads */
ld r8, VCORE_DPDES(r5)
+ ld r7, VCORE_VTB(r5)
mtspr SPRN_DPDES, r8
+ mtspr SPRN_VTB, r7
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+ /* Mark the subcore state as inside guest */
+ bl kvmppc_subcore_enter_guest
+ nop
+ ld r5, HSTATE_KVM_VCORE(r13)
+ ld r4, HSTATE_KVM_VCPU(r13)
li r0,1
stb r0,VCORE_IN_GUEST(r5) /* signal secondaries to continue */
- b 10f
-
- /* Secondary threads wait for primary to have done partition switch */
-20: lbz r0,VCORE_IN_GUEST(r5)
- cmpwi r0,0
- beq 20b
-
- /* Set LPCR and RMOR. */
-10: ld r8,VCORE_LPCR(r5)
- mtspr SPRN_LPCR,r8
- ld r8,KVM_RMOR(r9)
- mtspr SPRN_RMOR,r8
- isync
-
- /* Check if HDEC expires soon */
- mfspr r3,SPRN_HDEC
- cmpwi r3,512 /* 1 microsecond */
- li r12,BOOK3S_INTERRUPT_HV_DECREMENTER
- blt hdec_soon
- b 31f
-
- /*
- * PPC970 host -> guest partition switch code.
- * We have to lock against concurrent tlbies,
- * using native_tlbie_lock to lock against host tlbies
- * and kvm->arch.tlbie_lock to lock against guest tlbies.
- * We also have to invalidate the TLB since its
- * entries aren't tagged with the LPID.
- */
-30: ld r5,HSTATE_KVM_VCORE(r13)
- ld r9,VCORE_KVM(r5) /* pointer to struct kvm */
-
- /* first take native_tlbie_lock */
- .section ".toc","aw"
-toc_tlbie_lock:
- .tc native_tlbie_lock[TC],native_tlbie_lock
- .previous
- ld r3,toc_tlbie_lock@toc(2)
-#ifdef __BIG_ENDIAN__
- lwz r8,PACA_LOCK_TOKEN(r13)
-#else
- lwz r8,PACAPACAINDEX(r13)
-#endif
-24: lwarx r0,0,r3
- cmpwi r0,0
- bne 24b
- stwcx. r8,0,r3
- bne 24b
- isync
-
- ld r5,HSTATE_KVM_VCORE(r13)
- ld r7,VCORE_LPCR(r5) /* use vcore->lpcr to store HID4 */
- li r0,0x18f
- rotldi r0,r0,HID4_LPID5_SH /* all lpid bits in HID4 = 1 */
- or r0,r7,r0
- ptesync
- sync
- mtspr SPRN_HID4,r0 /* switch to reserved LPID */
- isync
- li r0,0
- stw r0,0(r3) /* drop native_tlbie_lock */
-
- /* invalidate the whole TLB */
- li r0,256
- mtctr r0
- li r6,0
-25: tlbiel r6
- addi r6,r6,0x1000
- bdnz 25b
- ptesync
-
- /* Take the guest's tlbie_lock */
- addi r3,r9,KVM_TLBIE_LOCK
-24: lwarx r0,0,r3
- cmpwi r0,0
- bne 24b
- stwcx. r8,0,r3
- bne 24b
- isync
- ld r6,KVM_SDR1(r9)
- mtspr SPRN_SDR1,r6 /* switch to partition page table */
-
- /* Set up HID4 with the guest's LPID etc. */
- sync
- mtspr SPRN_HID4,r7
- isync
-
- /* drop the guest's tlbie_lock */
- li r0,0
- stw r0,0(r3)
-
- /* Check if HDEC expires soon */
- mfspr r3,SPRN_HDEC
- cmpwi r3,10
- li r12,BOOK3S_INTERRUPT_HV_DECREMENTER
- blt hdec_soon
- /* Enable HDEC interrupts */
- mfspr r0,SPRN_HID0
- li r3,1
- rldimi r0,r3, HID0_HDICE_SH, 64-HID0_HDICE_SH-1
- sync
- mtspr SPRN_HID0,r0
- mfspr r0,SPRN_HID0
- mfspr r0,SPRN_HID0
- mfspr r0,SPRN_HID0
- mfspr r0,SPRN_HID0
- mfspr r0,SPRN_HID0
- mfspr r0,SPRN_HID0
-31:
/* Do we have a guest vcpu to run? */
- cmpdi r4, 0
+10: cmpdi r4, 0
beq kvmppc_primary_no_guest
kvmppc_got_guest:
-
- /* Load up guest SLB entries */
- lwz r5,VCPU_SLB_MAX(r4)
- cmpwi r5,0
- beq 9f
- mtctr r5
- addi r6,r4,VCPU_SLB
-1: ld r8,VCPU_SLB_E(r6)
- ld r9,VCPU_SLB_V(r6)
- slbmte r9,r8
- addi r6,r6,VCPU_SLB_SIZE
- bdnz 1b
-9:
/* Increment yield count if they have a VPA */
ld r3, VCPU_VPA(r4)
cmpdi r3, 0
@@ -605,7 +631,6 @@ kvmppc_got_guest:
stb r6, VCPU_VPA_DIRTY(r4)
25:
-BEGIN_FTR_SECTION
/* Save purr/spurr */
mfspr r5,SPRN_PURR
mfspr r6,SPRN_SPURR
@@ -615,7 +640,25 @@ BEGIN_FTR_SECTION
ld r8,VCPU_SPURR(r4)
mtspr SPRN_PURR,r7
mtspr SPRN_SPURR,r8
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+
+ /* Save host values of some registers */
+BEGIN_FTR_SECTION
+ mfspr r5, SPRN_CIABR
+ mfspr r6, SPRN_DAWR0
+ mfspr r7, SPRN_DAWRX0
+ mfspr r8, SPRN_IAMR
+ std r5, STACK_SLOT_CIABR(r1)
+ std r6, STACK_SLOT_DAWR0(r1)
+ std r7, STACK_SLOT_DAWRX0(r1)
+ std r8, STACK_SLOT_IAMR(r1)
+ mfspr r5, SPRN_FSCR
+ std r5, STACK_SLOT_FSCR(r1)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+
+ mfspr r5, SPRN_AMR
+ std r5, STACK_SLOT_AMR(r1)
+ mfspr r6, SPRN_UAMOR
+ std r6, STACK_SLOT_UAMOR(r1)
BEGIN_FTR_SECTION
/* Set partition DABR */
@@ -624,178 +667,31 @@ BEGIN_FTR_SECTION
ld r6,VCPU_DABR(r4)
mtspr SPRN_DABRX,r5
mtspr SPRN_DABR,r6
- BEGIN_FTR_SECTION_NESTED(89)
isync
- END_FTR_SECTION_NESTED(CPU_FTR_ARCH_206, CPU_FTR_ARCH_206, 89)
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
BEGIN_FTR_SECTION
- b skip_tm
+ b 91f
END_FTR_SECTION_IFCLR(CPU_FTR_TM)
-
- /* Turn on TM/FP/VSX/VMX so we can restore them. */
- mfmsr r5
- li r6, MSR_TM >> 32
- sldi r6, r6, 32
- or r5, r5, r6
- ori r5, r5, MSR_FP
- oris r5, r5, (MSR_VEC | MSR_VSX)@h
- mtmsrd r5
-
- /*
- * The user may change these outside of a transaction, so they must
- * always be context switched.
- */
- ld r5, VCPU_TFHAR(r4)
- ld r6, VCPU_TFIAR(r4)
- ld r7, VCPU_TEXASR(r4)
- mtspr SPRN_TFHAR, r5
- mtspr SPRN_TFIAR, r6
- mtspr SPRN_TEXASR, r7
-
- ld r5, VCPU_MSR(r4)
- rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
- beq skip_tm /* TM not active in guest */
-
- /* Make sure the failure summary is set, otherwise we'll program check
- * when we trechkpt. It's possible that this might have been not set
- * on a kvmppc_set_one_reg() call but we shouldn't let this crash the
- * host.
- */
- oris r7, r7, (TEXASR_FS)@h
- mtspr SPRN_TEXASR, r7
-
- /*
- * We need to load up the checkpointed state for the guest.
- * We need to do this early as it will blow away any GPRs, VSRs and
- * some SPRs.
- */
-
- mr r31, r4
- addi r3, r31, VCPU_FPRS_TM
- bl load_fp_state
- addi r3, r31, VCPU_VRS_TM
- bl load_vr_state
- mr r4, r31
- lwz r7, VCPU_VRSAVE_TM(r4)
- mtspr SPRN_VRSAVE, r7
-
- ld r5, VCPU_LR_TM(r4)
- lwz r6, VCPU_CR_TM(r4)
- ld r7, VCPU_CTR_TM(r4)
- ld r8, VCPU_AMR_TM(r4)
- ld r9, VCPU_TAR_TM(r4)
- mtlr r5
- mtcr r6
- mtctr r7
- mtspr SPRN_AMR, r8
- mtspr SPRN_TAR, r9
-
/*
- * Load up PPR and DSCR values but don't put them in the actual SPRs
- * till the last moment to avoid running with userspace PPR and DSCR for
- * too long.
+ * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
*/
- ld r29, VCPU_DSCR_TM(r4)
- ld r30, VCPU_PPR_TM(r4)
-
- std r2, PACATMSCRATCH(r13) /* Save TOC */
-
- /* Clear the MSR RI since r1, r13 are all going to be foobar. */
- li r5, 0
- mtmsrd r5, 1
-
- /* Load GPRs r0-r28 */
- reg = 0
- .rept 29
- ld reg, VCPU_GPRS_TM(reg)(r31)
- reg = reg + 1
- .endr
-
- mtspr SPRN_DSCR, r29
- mtspr SPRN_PPR, r30
-
- /* Load final GPRs */
- ld 29, VCPU_GPRS_TM(29)(r31)
- ld 30, VCPU_GPRS_TM(30)(r31)
- ld 31, VCPU_GPRS_TM(31)(r31)
-
- /* TM checkpointed state is now setup. All GPRs are now volatile. */
- TRECHKPT
-
- /* Now let's get back the state we need. */
- HMT_MEDIUM
- GET_PACA(r13)
- ld r29, HSTATE_DSCR(r13)
- mtspr SPRN_DSCR, r29
+ mr r3, r4
+ ld r4, VCPU_MSR(r3)
+ li r5, 0 /* don't preserve non-vol regs */
+ bl kvmppc_restore_tm_hv
+ nop
ld r4, HSTATE_KVM_VCPU(r13)
- ld r1, HSTATE_HOST_R1(r13)
- ld r2, PACATMSCRATCH(r13)
-
- /* Set the MSR RI since we have our registers back. */
- li r5, MSR_RI
- mtmsrd r5, 1
-skip_tm:
+91:
#endif
- /* Load guest PMU registers */
- /* R4 is live here (vcpu pointer) */
- li r3, 1
- sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
- mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */
- isync
-BEGIN_FTR_SECTION
- ld r3, VCPU_MMCR(r4)
- andi. r5, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO
- cmpwi r5, MMCR0_PMAO
- beql kvmppc_fix_pmao
-END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
- lwz r3, VCPU_PMC(r4) /* always load up guest PMU registers */
- lwz r5, VCPU_PMC + 4(r4) /* to prevent information leak */
- lwz r6, VCPU_PMC + 8(r4)
- lwz r7, VCPU_PMC + 12(r4)
- lwz r8, VCPU_PMC + 16(r4)
- lwz r9, VCPU_PMC + 20(r4)
-BEGIN_FTR_SECTION
- lwz r10, VCPU_PMC + 24(r4)
- lwz r11, VCPU_PMC + 28(r4)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
- mtspr SPRN_PMC1, r3
- mtspr SPRN_PMC2, r5
- mtspr SPRN_PMC3, r6
- mtspr SPRN_PMC4, r7
- mtspr SPRN_PMC5, r8
- mtspr SPRN_PMC6, r9
-BEGIN_FTR_SECTION
- mtspr SPRN_PMC7, r10
- mtspr SPRN_PMC8, r11
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
- ld r3, VCPU_MMCR(r4)
- ld r5, VCPU_MMCR + 8(r4)
- ld r6, VCPU_MMCR + 16(r4)
- ld r7, VCPU_SIAR(r4)
- ld r8, VCPU_SDAR(r4)
- mtspr SPRN_MMCR1, r5
- mtspr SPRN_MMCRA, r6
- mtspr SPRN_SIAR, r7
- mtspr SPRN_SDAR, r8
-BEGIN_FTR_SECTION
- ld r5, VCPU_MMCR + 24(r4)
- ld r6, VCPU_SIER(r4)
- lwz r7, VCPU_PMC + 24(r4)
- lwz r8, VCPU_PMC + 28(r4)
- ld r9, VCPU_MMCR + 32(r4)
- mtspr SPRN_MMCR2, r5
- mtspr SPRN_SIER, r6
- mtspr SPRN_SPMC1, r7
- mtspr SPRN_SPMC2, r8
- mtspr SPRN_MMCRS, r9
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
- mtspr SPRN_MMCR0, r3
- isync
+ /* Load guest PMU registers; r4 = vcpu pointer here */
+ mr r3, r4
+ bl kvmhv_load_guest_pmu
/* Load up FP, VMX and VSX registers */
+ ld r4, HSTATE_KVM_VCPU(r13)
bl kvmppc_load_fp
ld r14, VCPU_GPR(R14)(r4)
@@ -817,22 +713,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
ld r30, VCPU_GPR(R30)(r4)
ld r31, VCPU_GPR(R31)(r4)
-BEGIN_FTR_SECTION
/* Switch DSCR to guest value */
ld r5, VCPU_DSCR(r4)
mtspr SPRN_DSCR, r5
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
BEGIN_FTR_SECTION
- /* Skip next section on POWER7 or PPC970 */
+ /* Skip next section on POWER7 */
b 8f
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
- /* Turn on TM so we can access TFHAR/TFIAR/TEXASR */
- mfmsr r8
- li r0, 1
- rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG
- mtmsrd r8
-
/* Load up POWER8-specific registers */
ld r5, VCPU_IAMR(r4)
lwz r6, VCPU_PSPB(r4)
@@ -840,51 +728,47 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
mtspr SPRN_IAMR, r5
mtspr SPRN_PSPB, r6
mtspr SPRN_FSCR, r7
- ld r5, VCPU_DAWR(r4)
- ld r6, VCPU_DAWRX(r4)
+ /*
+ * Handle broken DAWR case by not writing it. This means we
+ * can still store the DAWR register for migration.
+ */
+ LOAD_REG_ADDR(r5, dawr_force_enable)
+ lbz r5, 0(r5)
+ cmpdi r5, 0
+ beq 1f
+ ld r5, VCPU_DAWR0(r4)
+ ld r6, VCPU_DAWRX0(r4)
+ mtspr SPRN_DAWR0, r5
+ mtspr SPRN_DAWRX0, r6
+1:
ld r7, VCPU_CIABR(r4)
ld r8, VCPU_TAR(r4)
- mtspr SPRN_DAWR, r5
- mtspr SPRN_DAWRX, r6
mtspr SPRN_CIABR, r7
mtspr SPRN_TAR, r8
ld r5, VCPU_IC(r4)
- ld r6, VCPU_VTB(r4)
- mtspr SPRN_IC, r5
- mtspr SPRN_VTB, r6
ld r8, VCPU_EBBHR(r4)
+ mtspr SPRN_IC, r5
mtspr SPRN_EBBHR, r8
ld r5, VCPU_EBBRR(r4)
ld r6, VCPU_BESCR(r4)
- ld r7, VCPU_CSIGR(r4)
- ld r8, VCPU_TACR(r4)
+ lwz r7, VCPU_GUEST_PID(r4)
+ ld r8, VCPU_WORT(r4)
mtspr SPRN_EBBRR, r5
mtspr SPRN_BESCR, r6
- mtspr SPRN_CSIGR, r7
- mtspr SPRN_TACR, r8
+ mtspr SPRN_PID, r7
+ mtspr SPRN_WORT, r8
+ /* POWER8-only registers */
ld r5, VCPU_TCSCR(r4)
ld r6, VCPU_ACOP(r4)
- lwz r7, VCPU_GUEST_PID(r4)
- ld r8, VCPU_WORT(r4)
+ ld r7, VCPU_CSIGR(r4)
+ ld r8, VCPU_TACR(r4)
mtspr SPRN_TCSCR, r5
mtspr SPRN_ACOP, r6
- mtspr SPRN_PID, r7
- mtspr SPRN_WORT, r8
+ mtspr SPRN_CSIGR, r7
+ mtspr SPRN_TACR, r8
+ nop
8:
- /*
- * Set the decrementer to the guest decrementer.
- */
- ld r8,VCPU_DEC_EXPIRES(r4)
- /* r8 is a host timebase value here, convert to guest TB */
- ld r5,HSTATE_KVM_VCORE(r13)
- ld r6,VCORE_TB_OFFSET(r5)
- add r8,r8,r6
- mftb r7
- subf r3,r7,r8
- mtspr SPRN_DEC,r3
- stw r3,VCPU_DEC(r4)
-
ld r5, VCPU_SPRG0(r4)
ld r6, VCPU_SPRG1(r4)
ld r7, VCPU_SPRG2(r4)
@@ -900,70 +784,99 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
mtspr SPRN_DAR, r5
mtspr SPRN_DSISR, r6
-BEGIN_FTR_SECTION
/* Restore AMR and UAMOR, set AMOR to all 1s */
ld r5,VCPU_AMR(r4)
ld r6,VCPU_UAMOR(r4)
- li r7,-1
mtspr SPRN_AMR,r5
mtspr SPRN_UAMOR,r6
- mtspr SPRN_AMOR,r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
- /* Restore state of CTRL run bit; assume 1 on entry */
+ /* Restore state of CTRL run bit; the host currently has it set to 1 */
lwz r5,VCPU_CTRL(r4)
andi. r5,r5,1
bne 4f
- mfspr r6,SPRN_CTRLF
- clrrdi r6,r6,1
+ li r6,0
mtspr SPRN_CTRLT,r6
4:
- ld r6, VCPU_CTR(r4)
- lwz r7, VCPU_XER(r4)
+ /* Secondary threads wait for primary to have done partition switch */
+ ld r5, HSTATE_KVM_VCORE(r13)
+ lbz r6, HSTATE_PTID(r13)
+ cmpwi r6, 0
+ beq 21f
+ lbz r0, VCORE_IN_GUEST(r5)
+ cmpwi r0, 0
+ bne 21f
+ HMT_LOW
+20: lwz r3, VCORE_ENTRY_EXIT(r5)
+ cmpwi r3, 0x100
+ bge no_switch_exit
+ lbz r0, VCORE_IN_GUEST(r5)
+ cmpwi r0, 0
+ beq 20b
+ HMT_MEDIUM
+21:
+ /* Set LPCR. */
+ ld r8,VCORE_LPCR(r5)
+ mtspr SPRN_LPCR,r8
+ isync
- mtctr r6
- mtxer r7
+ /*
+ * Set the decrementer to the guest decrementer.
+ */
+ ld r8,VCPU_DEC_EXPIRES(r4)
+ mftb r7
+ subf r3,r7,r8
+ mtspr SPRN_DEC,r3
-kvmppc_cede_reentry: /* r4 = vcpu, r13 = paca */
- ld r10, VCPU_PC(r4)
- ld r11, VCPU_MSR(r4)
+ /* Check if HDEC expires soon */
+ mfspr r3, SPRN_HDEC
+ extsw r3, r3
+ cmpdi r3, 512 /* 1 microsecond */
+ blt hdec_soon
+
+ /* Clear out and reload the SLB */
+ li r6, 0
+ slbmte r6, r6
+ PPC_SLBIA(6)
+ ptesync
+
+ /* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */
+ lwz r5,VCPU_SLB_MAX(r4)
+ cmpwi r5,0
+ beq 9f
+ mtctr r5
+ addi r6,r4,VCPU_SLB
+1: ld r8,VCPU_SLB_E(r6)
+ ld r9,VCPU_SLB_V(r6)
+ slbmte r9,r8
+ addi r6,r6,VCPU_SLB_SIZE
+ bdnz 1b
+9:
+
+deliver_guest_interrupt: /* r4 = vcpu, r13 = paca */
+ /* Check if we can deliver an external or decrementer interrupt now */
+ ld r0, VCPU_PENDING_EXC(r4)
+ cmpdi r0, 0
+ beq 71f
+ mr r3, r4
+ bl CFUNC(kvmppc_guest_entry_inject_int)
+ ld r4, HSTATE_KVM_VCPU(r13)
+71:
ld r6, VCPU_SRR0(r4)
ld r7, VCPU_SRR1(r4)
mtspr SPRN_SRR0, r6
mtspr SPRN_SRR1, r7
-deliver_guest_interrupt:
+ ld r10, VCPU_PC(r4)
+ ld r11, VCPU_MSR(r4)
/* r11 = vcpu->arch.msr & ~MSR_HV */
rldicl r11, r11, 63 - MSR_HV_LG, 1
rotldi r11, r11, 1 + MSR_HV_LG
ori r11, r11, MSR_ME
- /* Check if we can deliver an external or decrementer interrupt now */
- ld r0, VCPU_PENDING_EXC(r4)
- rldicl r0, r0, 64 - BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 63
- cmpdi cr1, r0, 0
- andi. r8, r11, MSR_EE
-BEGIN_FTR_SECTION
- mfspr r8, SPRN_LPCR
- /* Insert EXTERNAL_LEVEL bit into LPCR at the MER bit position */
- rldimi r8, r0, LPCR_MER_SH, 63 - LPCR_MER_SH
- mtspr SPRN_LPCR, r8
- isync
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
- beq 5f
- li r0, BOOK3S_INTERRUPT_EXTERNAL
- bne cr1, 12f
- mfspr r0, SPRN_DEC
- cmpwi r0, 0
- li r0, BOOK3S_INTERRUPT_DECREMENTER
- bge 5f
-
-12: mtspr SPRN_SRR0, r10
- mr r10,r0
- mtspr SPRN_SRR1, r11
- mr r9, r4
- bl kvmppc_msr_interrupt
-5:
+ ld r6, VCPU_CTR(r4)
+ ld r7, VCPU_XER(r4)
+ mtctr r6
+ mtxer r7
/*
* Required state:
@@ -982,6 +895,12 @@ fast_guest_return:
li r9, KVM_GUEST_MODE_GUEST_HV
stb r9, HSTATE_IN_GUEST(r13)
+#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING
+ /* Accumulate timing */
+ addi r3, r4, VCPU_TB_GUEST
+ bl kvmhv_accumulate_time
+#endif
+
/* Enter guest */
BEGIN_FTR_SECTION
@@ -993,16 +912,10 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
ld r5, VCPU_LR(r4)
- lwz r6, VCPU_CR(r4)
mtlr r5
- mtcr r6
ld r1, VCPU_GPR(R1)(r4)
- ld r2, VCPU_GPR(R2)(r4)
- ld r3, VCPU_GPR(R3)(r4)
ld r5, VCPU_GPR(R5)(r4)
- ld r6, VCPU_GPR(R6)(r4)
- ld r7, VCPU_GPR(R7)(r4)
ld r8, VCPU_GPR(R8)(r4)
ld r9, VCPU_GPR(R9)(r4)
ld r10, VCPU_GPR(R10)(r4)
@@ -1013,11 +926,46 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
BEGIN_FTR_SECTION
mtspr SPRN_PPR, r0
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
+ ld r6, VCPU_GPR(R6)(r4)
+ ld r7, VCPU_GPR(R7)(r4)
+
+ ld r0, VCPU_CR(r4)
+ mtcr r0
+
ld r0, VCPU_GPR(R0)(r4)
+ ld r2, VCPU_GPR(R2)(r4)
+ ld r3, VCPU_GPR(R3)(r4)
ld r4, VCPU_GPR(R4)(r4)
-
- hrfid
+ HRFI_TO_GUEST
b .
+SYM_CODE_END(kvmppc_hv_entry)
+
+secondary_too_late:
+ li r12, 0
+ stw r12, STACK_SLOT_TRAP(r1)
+ cmpdi r4, 0
+ beq 11f
+ stw r12, VCPU_TRAP(r4)
+#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING
+ addi r3, r4, VCPU_TB_RMEXIT
+ bl kvmhv_accumulate_time
+#endif
+11: b kvmhv_switch_to_host
+
+no_switch_exit:
+ HMT_MEDIUM
+ li r12, 0
+ b 12f
+hdec_soon:
+ li r12, BOOK3S_INTERRUPT_HV_DECREMENTER
+12: stw r12, VCPU_TRAP(r4)
+ mr r9, r4
+#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING
+ addi r3, r4, VCPU_TB_RMEXIT
+ bl kvmhv_accumulate_time
+#endif
+ b guest_bypass
/******************************************************************************
* *
@@ -1032,22 +980,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
kvmppc_interrupt_hv:
/*
* Register contents:
- * R12 = interrupt vector
+ * R9 = HSTATE_IN_GUEST
+ * R12 = (guest CR << 32) | interrupt vector
* R13 = PACA
- * guest CR, R12 saved in shadow VCPU SCRATCH1/0
+ * guest R12 saved in shadow VCPU SCRATCH0
* guest R13 saved in SPRN_SCRATCH0
+ * guest R9 saved in HSTATE_SCRATCH2
*/
- std r9, HSTATE_SCRATCH2(r13)
-
- lbz r9, HSTATE_IN_GUEST(r13)
- cmpwi r9, KVM_GUEST_MODE_HOST_HV
- beq kvmppc_bad_host_intr
-#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
- cmpwi r9, KVM_GUEST_MODE_GUEST
- ld r9, HSTATE_SCRATCH2(r13)
- beq kvmppc_interrupt_pr
-#endif
/* We're now back in the host but in guest MMU context */
+ cmpwi r9,KVM_GUEST_MODE_HOST_HV
+ beq kvmppc_bad_host_intr
li r9, KVM_GUEST_MODE_HOST_HV
stb r9, HSTATE_IN_GUEST(r13)
@@ -1069,9 +1011,10 @@ kvmppc_interrupt_hv:
std r10, VCPU_GPR(R10)(r9)
std r11, VCPU_GPR(R11)(r9)
ld r3, HSTATE_SCRATCH0(r13)
- lwz r4, HSTATE_SCRATCH1(r13)
std r3, VCPU_GPR(R12)(r9)
- stw r4, VCPU_CR(r9)
+ /* CR is in the high half of r12 */
+ srdi r4, r12, 32
+ std r4, VCPU_CR(r9)
BEGIN_FTR_SECTION
ld r3, HSTATE_CFAR(r13)
std r3, VCPU_CFAR(r9)
@@ -1083,12 +1026,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
/* Restore R1/R2 so we can handle faults */
ld r1, HSTATE_HOST_R1(r13)
- ld r2, PACATOC(r13)
+ LOAD_PACA_TOC()
mfspr r10, SPRN_SRR0
mfspr r11, SPRN_SRR1
std r10, VCPU_SRR0(r9)
std r11, VCPU_SRR1(r9)
+ /* trap is in the low half of r12, clear CR from the high half */
+ clrldi r12, r12, 32
andi. r0, r12, 2 /* need to read HSRR0/1? */
beq 1f
mfspr r10, SPRN_HSRR0
@@ -1104,102 +1049,113 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
stw r12,VCPU_TRAP(r9)
- /* Save HEIR (HV emulation assist reg) in last_inst
+ /*
+ * Now that we have saved away SRR0/1 and HSRR0/1,
+ * interrupts are recoverable in principle, so set MSR_RI.
+ * This becomes important for relocation-on interrupts from
+ * the guest, which we can get in radix mode on POWER9.
+ */
+ li r0, MSR_RI
+ mtmsrd r0, 1
+
+#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING
+ addi r3, r9, VCPU_TB_RMINTR
+ mr r4, r9
+ bl kvmhv_accumulate_time
+ ld r5, VCPU_GPR(R5)(r9)
+ ld r6, VCPU_GPR(R6)(r9)
+ ld r7, VCPU_GPR(R7)(r9)
+ ld r8, VCPU_GPR(R8)(r9)
+#endif
+
+ /* Save HEIR (HV emulation assist reg) in emul_inst
if this is an HEI (HV emulation interrupt, e40) */
li r3,KVM_INST_FETCH_FAILED
-BEGIN_FTR_SECTION
+ std r3,VCPU_LAST_INST(r9)
cmpwi r12,BOOK3S_INTERRUPT_H_EMUL_ASSIST
bne 11f
mfspr r3,SPRN_HEIR
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
-11: stw r3,VCPU_LAST_INST(r9)
+11: std r3,VCPU_HEIR(r9)
/* these are volatile across C function calls */
mfctr r3
mfxer r4
std r3, VCPU_CTR(r9)
- stw r4, VCPU_XER(r9)
+ std r4, VCPU_XER(r9)
+
+ /* Save more register state */
+ mfdar r3
+ mfdsisr r4
+ std r3, VCPU_DAR(r9)
+ stw r4, VCPU_DSISR(r9)
-BEGIN_FTR_SECTION
/* If this is a page table miss then see if it's theirs or ours */
cmpwi r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
beq kvmppc_hdsi
+ std r3, VCPU_FAULT_DAR(r9)
+ stw r4, VCPU_FAULT_DSISR(r9)
cmpwi r12, BOOK3S_INTERRUPT_H_INST_STORAGE
beq kvmppc_hisi
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
/* See if this is a leftover HDEC interrupt */
cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER
bne 2f
mfspr r3,SPRN_HDEC
- cmpwi r3,0
- bge ignore_hdec
+ extsw r3, r3
+ cmpdi r3,0
+ mr r4,r9
+ bge fast_guest_return
2:
/* See if this is an hcall we can handle in real mode */
cmpwi r12,BOOK3S_INTERRUPT_SYSCALL
beq hcall_try_real_mode
- /* Only handle external interrupts here on arch 206 and later */
-BEGIN_FTR_SECTION
- b ext_interrupt_to_host
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
-
+ /* Hypervisor doorbell - exit only if host IPI flag set */
+ cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL
+ bne 3f
+ lbz r0, HSTATE_HOST_IPI(r13)
+ cmpwi r0, 0
+ beq maybe_reenter_guest
+ b guest_exit_cont
+3:
+ /* If it's a hypervisor facility unavailable interrupt, save HFSCR */
+ cmpwi r12, BOOK3S_INTERRUPT_H_FAC_UNAVAIL
+ bne 14f
+ mfspr r3, SPRN_HFSCR
+ std r3, VCPU_HFSCR(r9)
+ b guest_exit_cont
+14:
/* External interrupt ? */
cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
- bne+ ext_interrupt_to_host
-
- /* External interrupt, first check for host_ipi. If this is
- * set, we know the host wants us out so let's do it now
- */
- bl kvmppc_read_intr
- cmpdi r3, 0
- bgt ext_interrupt_to_host
+ beq kvmppc_guest_external
+ /* See if it is a machine check */
+ cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK
+ beq machine_check_realmode
+ /* Or a hypervisor maintenance interrupt */
+ cmpwi r12, BOOK3S_INTERRUPT_HMI
+ beq hmi_realmode
- /* Check if any CPU is heading out to the host, if so head out too */
- ld r5, HSTATE_KVM_VCORE(r13)
- lwz r0, VCORE_ENTRY_EXIT(r5)
- cmpwi r0, 0x100
- bge ext_interrupt_to_host
+guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
- /* Return to guest after delivering any pending interrupt */
+#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING
+ addi r3, r9, VCPU_TB_RMEXIT
mr r4, r9
- b deliver_guest_interrupt
-
-ext_interrupt_to_host:
-
-guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
- /* Save more register state */
- mfdar r6
- mfdsisr r7
- std r6, VCPU_DAR(r9)
- stw r7, VCPU_DSISR(r9)
-BEGIN_FTR_SECTION
- /* don't overwrite fault_dar/fault_dsisr if HDSI */
- cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE
- beq 6f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
- std r6, VCPU_FAULT_DAR(r9)
- stw r7, VCPU_FAULT_DSISR(r9)
+ bl kvmhv_accumulate_time
+#endif
- /* See if it is a machine check */
- cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK
- beq machine_check_realmode
-mc_cont:
+ /*
+ * Possibly flush the link stack here, before we do a blr in
+ * kvmhv_switch_to_host.
+ */
+1: nop
+ patch_site 1b patch__call_kvm_flush_link_stack
- /* Save guest CTRL register, set runlatch to 1 */
-6: mfspr r6,SPRN_CTRLF
- stw r6,VCPU_CTRL(r9)
- andi. r0,r6,1
- bne 4f
- ori r6,r6,1
- mtspr SPRN_CTRLT,r6
-4:
- /* Read the guest SLB and save it away */
+ /* For hash guest, read the guest SLB and save it away */
+ li r5, 0
lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */
mtctr r0
li r6,0
addi r7,r9,VCPU_SLB
- li r5,0
1: slbmfee r8,r6
andis. r0,r8,SLB_ESID_V@h
beq 2f
@@ -1211,12 +1167,61 @@ mc_cont:
addi r5,r5,1
2: addi r6,r6,1
bdnz 1b
+ /* Finally clear out the SLB */
+ li r0,0
+ slbmte r0,r0
+ PPC_SLBIA(6)
+ ptesync
stw r5,VCPU_SLB_MAX(r9)
+ /* load host SLB entries */
+ ld r8,PACA_SLBSHADOWPTR(r13)
+
+ .rept SLB_NUM_BOLTED
+ li r3, SLBSHADOW_SAVEAREA
+ LDX_BE r5, r8, r3
+ addi r3, r3, 8
+ LDX_BE r6, r8, r3
+ andis. r7,r5,SLB_ESID_V@h
+ beq 1f
+ slbmte r6,r5
+1: addi r8,r8,16
+ .endr
+
+guest_bypass:
+ stw r12, STACK_SLOT_TRAP(r1)
+
+ /* Save DEC */
+ /* Do this before kvmhv_commence_exit so we know TB is guest TB */
+ ld r3, HSTATE_KVM_VCORE(r13)
+ mfspr r5,SPRN_DEC
+ mftb r6
+ extsw r5,r5
+16: add r5,r5,r6
+ std r5,VCPU_DEC_EXPIRES(r9)
+
+ /* Increment exit count, poke other threads to exit */
+ mr r3, r12
+ bl kvmhv_commence_exit
+ nop
+ ld r9, HSTATE_KVM_VCPU(r13)
+
+ /* Stop others sending VCPU interrupts to this physical CPU */
+ li r0, -1
+ stw r0, VCPU_CPU(r9)
+ stw r0, VCPU_THREAD_CPU(r9)
+
+ /* Save guest CTRL register, set runlatch to 1 if it was clear */
+ mfspr r6,SPRN_CTRLF
+ stw r6,VCPU_CTRL(r9)
+ andi. r0,r6,1
+ bne 4f
+ li r6,1
+ mtspr SPRN_CTRLT,r6
+4:
/*
* Save the guest PURR/SPURR
*/
-BEGIN_FTR_SECTION
mfspr r5,SPRN_PURR
mfspr r6,SPRN_SPURR
ld r7,VCPU_PURR(r9)
@@ -1236,18 +1241,6 @@ BEGIN_FTR_SECTION
add r4,r4,r6
mtspr SPRN_PURR,r3
mtspr SPRN_SPURR,r4
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_201)
-
- /* Save DEC */
- mfspr r5,SPRN_DEC
- mftb r6
- extsw r5,r5
- add r5,r5,r6
- /* r5 is a guest timebase value here, convert to host TB */
- ld r3,HSTATE_KVM_VCORE(r13)
- ld r4,VCORE_TB_OFFSET(r3)
- subf r5,r4,r5
- std r5,VCPU_DEC_EXPIRES(r9)
BEGIN_FTR_SECTION
b 8f
@@ -1260,48 +1253,63 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
stw r6, VCPU_PSPB(r9)
std r7, VCPU_FSCR(r9)
mfspr r5, SPRN_IC
- mfspr r6, SPRN_VTB
mfspr r7, SPRN_TAR
std r5, VCPU_IC(r9)
- std r6, VCPU_VTB(r9)
std r7, VCPU_TAR(r9)
mfspr r8, SPRN_EBBHR
std r8, VCPU_EBBHR(r9)
mfspr r5, SPRN_EBBRR
mfspr r6, SPRN_BESCR
- mfspr r7, SPRN_CSIGR
- mfspr r8, SPRN_TACR
+ mfspr r7, SPRN_PID
+ mfspr r8, SPRN_WORT
std r5, VCPU_EBBRR(r9)
std r6, VCPU_BESCR(r9)
- std r7, VCPU_CSIGR(r9)
- std r8, VCPU_TACR(r9)
+ stw r7, VCPU_GUEST_PID(r9)
+ std r8, VCPU_WORT(r9)
mfspr r5, SPRN_TCSCR
mfspr r6, SPRN_ACOP
- mfspr r7, SPRN_PID
- mfspr r8, SPRN_WORT
+ mfspr r7, SPRN_CSIGR
+ mfspr r8, SPRN_TACR
std r5, VCPU_TCSCR(r9)
std r6, VCPU_ACOP(r9)
- stw r7, VCPU_GUEST_PID(r9)
- std r8, VCPU_WORT(r9)
-8:
-
- /* Save and reset AMR and UAMOR before turning on the MMU */
+ std r7, VCPU_CSIGR(r9)
+ std r8, VCPU_TACR(r9)
BEGIN_FTR_SECTION
+ ld r5, STACK_SLOT_FSCR(r1)
+ mtspr SPRN_FSCR, r5
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+ /*
+ * Restore various registers to 0, where non-zero values
+ * set by the guest could disrupt the host.
+ */
+ li r0, 0
+ mtspr SPRN_PSPB, r0
+ mtspr SPRN_WORT, r0
+ mtspr SPRN_TCSCR, r0
+ /* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */
+ li r0, 1
+ sldi r0, r0, 31
+ mtspr SPRN_MMCRS, r0
+
+ /* Save and restore AMR, IAMR and UAMOR before turning on the MMU */
+ ld r8, STACK_SLOT_IAMR(r1)
+ mtspr SPRN_IAMR, r8
+
+8: /* Power7 jumps back in here */
mfspr r5,SPRN_AMR
mfspr r6,SPRN_UAMOR
std r5,VCPU_AMR(r9)
std r6,VCPU_UAMOR(r9)
- li r6,0
- mtspr SPRN_AMR,r6
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
+ ld r5,STACK_SLOT_AMR(r1)
+ ld r6,STACK_SLOT_UAMOR(r1)
+ mtspr SPRN_AMR, r5
+ mtspr SPRN_UAMOR, r6
/* Switch DSCR back to host value */
-BEGIN_FTR_SECTION
mfspr r8, SPRN_DSCR
ld r7, HSTATE_DSCR(r13)
std r8, VCPU_DSCR(r9)
mtspr SPRN_DSCR, r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
/* Save non-volatile GPRs */
std r14, VCPU_GPR(R14)(r9)
@@ -1339,106 +1347,18 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
BEGIN_FTR_SECTION
- b 2f
+ b 91f
END_FTR_SECTION_IFCLR(CPU_FTR_TM)
- /* Turn on TM. */
- mfmsr r8
- li r0, 1
- rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG
- mtmsrd r8
-
- ld r5, VCPU_MSR(r9)
- rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
- beq 1f /* TM not active in guest. */
-
- li r3, TM_CAUSE_KVM_RESCHED
-
- /* Clear the MSR RI since r1, r13 are all going to be foobar. */
- li r5, 0
- mtmsrd r5, 1
-
- /* All GPRs are volatile at this point. */
- TRECLAIM(R3)
-
- /* Temporarily store r13 and r9 so we have some regs to play with */
- SET_SCRATCH0(r13)
- GET_PACA(r13)
- std r9, PACATMSCRATCH(r13)
- ld r9, HSTATE_KVM_VCPU(r13)
-
- /* Get a few more GPRs free. */
- std r29, VCPU_GPRS_TM(29)(r9)
- std r30, VCPU_GPRS_TM(30)(r9)
- std r31, VCPU_GPRS_TM(31)(r9)
-
- /* Save away PPR and DSCR soon so don't run with user values. */
- mfspr r31, SPRN_PPR
- HMT_MEDIUM
- mfspr r30, SPRN_DSCR
- ld r29, HSTATE_DSCR(r13)
- mtspr SPRN_DSCR, r29
-
- /* Save all but r9, r13 & r29-r31 */
- reg = 0
- .rept 29
- .if (reg != 9) && (reg != 13)
- std reg, VCPU_GPRS_TM(reg)(r9)
- .endif
- reg = reg + 1
- .endr
- /* ... now save r13 */
- GET_SCRATCH0(r4)
- std r4, VCPU_GPRS_TM(13)(r9)
- /* ... and save r9 */
- ld r4, PACATMSCRATCH(r13)
- std r4, VCPU_GPRS_TM(9)(r9)
-
- /* Reload stack pointer and TOC. */
- ld r1, HSTATE_HOST_R1(r13)
- ld r2, PACATOC(r13)
-
- /* Set MSR RI now we have r1 and r13 back. */
- li r5, MSR_RI
- mtmsrd r5, 1
-
- /* Save away checkpinted SPRs. */
- std r31, VCPU_PPR_TM(r9)
- std r30, VCPU_DSCR_TM(r9)
- mflr r5
- mfcr r6
- mfctr r7
- mfspr r8, SPRN_AMR
- mfspr r10, SPRN_TAR
- std r5, VCPU_LR_TM(r9)
- stw r6, VCPU_CR_TM(r9)
- std r7, VCPU_CTR_TM(r9)
- std r8, VCPU_AMR_TM(r9)
- std r10, VCPU_TAR_TM(r9)
-
- /* Restore r12 as trap number. */
- lwz r12, VCPU_TRAP(r9)
-
- /* Save FP/VSX. */
- addi r3, r9, VCPU_FPRS_TM
- bl store_fp_state
- addi r3, r9, VCPU_VRS_TM
- bl store_vr_state
- mfspr r6, SPRN_VRSAVE
- stw r6, VCPU_VRSAVE_TM(r9)
-1:
/*
- * We need to save these SPRs after the treclaim so that the software
- * error code is recorded correctly in the TEXASR. Also the user may
- * change these outside of a transaction, so they must always be
- * context switched.
+ * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
*/
- mfspr r5, SPRN_TFHAR
- mfspr r6, SPRN_TFIAR
- mfspr r7, SPRN_TEXASR
- std r5, VCPU_TFHAR(r9)
- std r6, VCPU_TFIAR(r9)
- std r7, VCPU_TEXASR(r9)
-2:
+ mr r3, r9
+ ld r4, VCPU_MSR(r3)
+ li r5, 0 /* don't preserve non-vol regs */
+ bl kvmppc_save_tm_hv
+ nop
+ ld r9, HSTATE_KVM_VCPU(r13)
+91:
#endif
/* Increment yield count if they have a VPA */
@@ -1454,162 +1374,36 @@ END_FTR_SECTION_IFCLR(CPU_FTR_TM)
25:
/* Save PMU registers if requested */
/* r8 and cr0.eq are live here */
-BEGIN_FTR_SECTION
- /*
- * POWER8 seems to have a hardware bug where setting
- * MMCR0[PMAE] along with MMCR0[PMC1CE] and/or MMCR0[PMCjCE]
- * when some counters are already negative doesn't seem
- * to cause a performance monitor alert (and hence interrupt).
- * The effect of this is that when saving the PMU state,
- * if there is no PMU alert pending when we read MMCR0
- * before freezing the counters, but one becomes pending
- * before we read the counters, we lose it.
- * To work around this, we need a way to freeze the counters
- * before reading MMCR0. Normally, freezing the counters
- * is done by writing MMCR0 (to set MMCR0[FC]) which
- * unavoidably writes MMCR0[PMA0] as well. On POWER8,
- * we can also freeze the counters using MMCR2, by writing
- * 1s to all the counter freeze condition bits (there are
- * 9 bits each for 6 counters).
- */
- li r3, -1 /* set all freeze bits */
- clrrdi r3, r3, 10
- mfspr r10, SPRN_MMCR2
- mtspr SPRN_MMCR2, r3
- isync
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
- li r3, 1
- sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
- mfspr r4, SPRN_MMCR0 /* save MMCR0 */
- mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */
- mfspr r6, SPRN_MMCRA
-BEGIN_FTR_SECTION
- /* On P7, clear MMCRA in order to disable SDAR updates */
- li r7, 0
- mtspr SPRN_MMCRA, r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
- isync
+ mr r3, r9
+ li r4, 1
beq 21f /* if no VPA, save PMU stuff anyway */
- lbz r7, LPPACA_PMCINUSE(r8)
- cmpwi r7, 0 /* did they ask for PMU stuff to be saved? */
- bne 21f
- std r3, VCPU_MMCR(r9) /* if not, set saved MMCR0 to FC */
- b 22f
-21: mfspr r5, SPRN_MMCR1
- mfspr r7, SPRN_SIAR
- mfspr r8, SPRN_SDAR
- std r4, VCPU_MMCR(r9)
- std r5, VCPU_MMCR + 8(r9)
- std r6, VCPU_MMCR + 16(r9)
-BEGIN_FTR_SECTION
- std r10, VCPU_MMCR + 24(r9)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
- std r7, VCPU_SIAR(r9)
- std r8, VCPU_SDAR(r9)
- mfspr r3, SPRN_PMC1
- mfspr r4, SPRN_PMC2
- mfspr r5, SPRN_PMC3
- mfspr r6, SPRN_PMC4
- mfspr r7, SPRN_PMC5
- mfspr r8, SPRN_PMC6
-BEGIN_FTR_SECTION
- mfspr r10, SPRN_PMC7
- mfspr r11, SPRN_PMC8
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
- stw r3, VCPU_PMC(r9)
- stw r4, VCPU_PMC + 4(r9)
- stw r5, VCPU_PMC + 8(r9)
- stw r6, VCPU_PMC + 12(r9)
- stw r7, VCPU_PMC + 16(r9)
- stw r8, VCPU_PMC + 20(r9)
-BEGIN_FTR_SECTION
- stw r10, VCPU_PMC + 24(r9)
- stw r11, VCPU_PMC + 28(r9)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
+ lbz r4, LPPACA_PMCINUSE(r8)
+21: bl kvmhv_save_guest_pmu
+ ld r9, HSTATE_KVM_VCPU(r13)
+
+ /* Restore host values of some registers */
BEGIN_FTR_SECTION
- mfspr r5, SPRN_SIER
- mfspr r6, SPRN_SPMC1
- mfspr r7, SPRN_SPMC2
- mfspr r8, SPRN_MMCRS
- std r5, VCPU_SIER(r9)
- stw r6, VCPU_PMC + 24(r9)
- stw r7, VCPU_PMC + 28(r9)
- std r8, VCPU_MMCR + 32(r9)
- lis r4, 0x8000
- mtspr SPRN_MMCRS, r4
+ ld r5, STACK_SLOT_CIABR(r1)
+ ld r6, STACK_SLOT_DAWR0(r1)
+ ld r7, STACK_SLOT_DAWRX0(r1)
+ mtspr SPRN_CIABR, r5
+ /*
+ * If the DAWR doesn't work, it's ok to write these here as
+ * this value should always be zero
+ */
+ mtspr SPRN_DAWR0, r6
+ mtspr SPRN_DAWRX0, r7
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-22:
- /* Clear out SLB */
- li r5,0
- slbmte r5,r5
- slbia
- ptesync
-hdec_soon: /* r12 = trap, r13 = paca */
-BEGIN_FTR_SECTION
- b 32f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
/*
- * POWER7 guest -> host partition switch code.
+ * POWER7/POWER8 guest -> host partition switch code.
* We don't have to lock against tlbies but we do
* have to coordinate the hardware threads.
+ * Here STACK_SLOT_TRAP(r1) contains the trap number.
*/
- /* Increment the threads-exiting-guest count in the 0xff00
- bits of vcore->entry_exit_count */
- ld r5,HSTATE_KVM_VCORE(r13)
- addi r6,r5,VCORE_ENTRY_EXIT
-41: lwarx r3,0,r6
- addi r0,r3,0x100
- stwcx. r0,0,r6
- bne 41b
- isync /* order stwcx. vs. reading napping_threads */
-
- /*
- * At this point we have an interrupt that we have to pass
- * up to the kernel or qemu; we can't handle it in real mode.
- * Thus we have to do a partition switch, so we have to
- * collect the other threads, if we are the first thread
- * to take an interrupt. To do this, we set the HDEC to 0,
- * which causes an HDEC interrupt in all threads within 2ns
- * because the HDEC register is shared between all 4 threads.
- * However, we don't need to bother if this is an HDEC
- * interrupt, since the other threads will already be on their
- * way here in that case.
- */
- cmpwi r3,0x100 /* Are we the first here? */
- bge 43f
- cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER
- beq 40f
- li r0,0
- mtspr SPRN_HDEC,r0
-40:
- /*
- * Send an IPI to any napping threads, since an HDEC interrupt
- * doesn't wake CPUs up from nap.
- */
- lwz r3,VCORE_NAPPING_THREADS(r5)
- lbz r4,HSTATE_PTID(r13)
- li r0,1
- sld r0,r0,r4
- andc. r3,r3,r0 /* no sense IPI'ing ourselves */
- beq 43f
- /* Order entry/exit update vs. IPIs */
- sync
- mulli r4,r4,PACA_SIZE /* get paca for thread 0 */
- subf r6,r4,r13
-42: andi. r0,r3,1
- beq 44f
- ld r8,HSTATE_XICS_PHYS(r6) /* get thread's XICS reg addr */
- li r0,IPI_PRIORITY
- li r7,XICS_MFRR
- stbcix r0,r7,r8 /* trigger the IPI */
-44: srdi. r3,r3,1
- addi r6,r6,PACA_SIZE
- bne 42b
-
-secondary_too_late:
+kvmhv_switch_to_host:
/* Secondary threads wait for primary to do partition switch */
-43: ld r5,HSTATE_KVM_VCORE(r13)
+ ld r5,HSTATE_KVM_VCORE(r13)
ld r4,VCORE_KVM(r5) /* pointer to struct kvm */
lbz r3,HSTATE_PTID(r13)
cmpwi r3,0
@@ -1623,35 +1417,44 @@ secondary_too_late:
/* Primary thread waits for all the secondaries to exit guest */
15: lwz r3,VCORE_ENTRY_EXIT(r5)
- srwi r0,r3,8
+ rlwinm r0,r3,32-8,0xff
clrldi r3,r3,56
cmpw r3,r0
bne 15b
isync
+ /* Did we actually switch to the guest at all? */
+ lbz r6, VCORE_IN_GUEST(r5)
+ cmpwi r6, 0
+ beq 19f
+
/* Primary thread switches back to host partition */
- ld r6,KVM_HOST_SDR1(r4)
lwz r7,KVM_HOST_LPID(r4)
+ ld r6,KVM_HOST_SDR1(r4)
li r8,LPID_RSVD /* switch to reserved LPID */
mtspr SPRN_LPID,r8
ptesync
- mtspr SPRN_SDR1,r6 /* switch to partition page table */
+ mtspr SPRN_SDR1,r6 /* switch to host page table */
mtspr SPRN_LPID,r7
isync
BEGIN_FTR_SECTION
- /* DPDES is shared between threads */
+ /* DPDES and VTB are shared between threads */
mfspr r7, SPRN_DPDES
+ mfspr r8, SPRN_VTB
std r7, VCORE_DPDES(r5)
+ std r8, VCORE_VTB(r5)
/* clear DPDES so we don't get guest doorbells in the host */
li r8, 0
mtspr SPRN_DPDES, r8
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
/* Subtract timebase offset from timebase */
- ld r8,VCORE_TB_OFFSET(r5)
+ ld r8, VCORE_TB_OFFSET_APPL(r5)
cmpdi r8,0
beq 17f
+ li r0, 0
+ std r0, VCORE_TB_OFFSET_APPL(r5)
mftb r6 /* current guest timebase */
subf r8,r8,r6
mtspr SPRN_TBU40,r8 /* update upper 40 bits */
@@ -1663,122 +1466,138 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
addis r8,r8,0x100 /* if so, increment upper 40 bits */
mtspr SPRN_TBU40,r8
+17:
+ /*
+ * If this is an HMI, we called kvmppc_realmode_hmi_handler
+ * above, which may or may not have already called
+ * kvmppc_subcore_exit_guest. Fortunately, all that
+ * kvmppc_subcore_exit_guest does is clear a flag, so calling
+ * it again here is benign even if kvmppc_realmode_hmi_handler
+ * has already called it.
+ */
+ bl kvmppc_subcore_exit_guest
+ nop
+30: ld r5,HSTATE_KVM_VCORE(r13)
+ ld r4,VCORE_KVM(r5) /* pointer to struct kvm */
+
/* Reset PCR */
-17: ld r0, VCORE_PCR(r5)
- cmpdi r0, 0
+ ld r0, VCORE_PCR(r5)
+ LOAD_REG_IMMEDIATE(r6, PCR_MASK)
+ cmpld r0, r6
beq 18f
- li r0, 0
- mtspr SPRN_PCR, r0
+ mtspr SPRN_PCR, r6
18:
/* Signal secondary CPUs to continue */
+ li r0, 0
stb r0,VCORE_IN_GUEST(r5)
- lis r8,0x7fff /* MAX_INT@h */
+19: lis r8,0x7fff /* MAX_INT@h */
mtspr SPRN_HDEC,r8
16: ld r8,KVM_HOST_LPCR(r4)
mtspr SPRN_LPCR,r8
isync
- b 33f
- /*
- * PPC970 guest -> host partition switch code.
- * We have to lock against concurrent tlbies, and
- * we have to flush the whole TLB.
- */
-32: ld r5,HSTATE_KVM_VCORE(r13)
- ld r4,VCORE_KVM(r5) /* pointer to struct kvm */
-
- /* Take the guest's tlbie_lock */
-#ifdef __BIG_ENDIAN__
- lwz r8,PACA_LOCK_TOKEN(r13)
-#else
- lwz r8,PACAPACAINDEX(r13)
+#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING
+ /* Finish timing, if we have a vcpu */
+ ld r4, HSTATE_KVM_VCPU(r13)
+ cmpdi r4, 0
+ li r3, 0
+ beq 2f
+ bl kvmhv_accumulate_time
+2:
#endif
- addi r3,r4,KVM_TLBIE_LOCK
-24: lwarx r0,0,r3
- cmpwi r0,0
- bne 24b
- stwcx. r8,0,r3
- bne 24b
- isync
+ /* Unset guest mode */
+ li r0, KVM_GUEST_MODE_NONE
+ stb r0, HSTATE_IN_GUEST(r13)
- ld r7,KVM_HOST_LPCR(r4) /* use kvm->arch.host_lpcr for HID4 */
- li r0,0x18f
- rotldi r0,r0,HID4_LPID5_SH /* all lpid bits in HID4 = 1 */
- or r0,r7,r0
- ptesync
- sync
- mtspr SPRN_HID4,r0 /* switch to reserved LPID */
- isync
- li r0,0
- stw r0,0(r3) /* drop guest tlbie_lock */
+ lwz r12, STACK_SLOT_TRAP(r1) /* return trap # in r12 */
+ ld r0, SFS+PPC_LR_STKOFF(r1)
+ addi r1, r1, SFS
+ mtlr r0
+ blr
- /* invalidate the whole TLB */
- li r0,256
- mtctr r0
- li r6,0
-25: tlbiel r6
- addi r6,r6,0x1000
- bdnz 25b
- ptesync
+.balign 32
+.global kvm_flush_link_stack
+kvm_flush_link_stack:
+ /* Save LR into r0 */
+ mflr r0
- /* take native_tlbie_lock */
- ld r3,toc_tlbie_lock@toc(2)
-24: lwarx r0,0,r3
- cmpwi r0,0
- bne 24b
- stwcx. r8,0,r3
- bne 24b
- isync
+ /* Flush the link stack. On Power8 it's up to 32 entries in size. */
+ .rept 32
+ bl .+4
+ .endr
- ld r6,KVM_HOST_SDR1(r4)
- mtspr SPRN_SDR1,r6 /* switch to host page table */
+ /* And on Power9 it's up to 64. */
+BEGIN_FTR_SECTION
+ .rept 32
+ bl .+4
+ .endr
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
- /* Set up host HID4 value */
- sync
- mtspr SPRN_HID4,r7
- isync
- li r0,0
- stw r0,0(r3) /* drop native_tlbie_lock */
+ /* Restore LR */
+ mtlr r0
+ blr
- lis r8,0x7fff /* MAX_INT@h */
- mtspr SPRN_HDEC,r8
+kvmppc_guest_external:
+ /* External interrupt, first check for host_ipi. If this is
+ * set, we know the host wants us out so let's do it now
+ */
+ bl CFUNC(kvmppc_read_intr)
- /* Disable HDEC interrupts */
- mfspr r0,SPRN_HID0
- li r3,0
- rldimi r0,r3, HID0_HDICE_SH, 64-HID0_HDICE_SH-1
- sync
- mtspr SPRN_HID0,r0
- mfspr r0,SPRN_HID0
- mfspr r0,SPRN_HID0
- mfspr r0,SPRN_HID0
- mfspr r0,SPRN_HID0
- mfspr r0,SPRN_HID0
- mfspr r0,SPRN_HID0
+ /*
+ * Restore the active volatile registers after returning from
+ * a C function.
+ */
+ ld r9, HSTATE_KVM_VCPU(r13)
+ li r12, BOOK3S_INTERRUPT_EXTERNAL
- /* load host SLB entries */
-33: ld r8,PACA_SLBSHADOWPTR(r13)
+ /*
+ * kvmppc_read_intr return codes:
+ *
+ * Exit to host (r3 > 0)
+ * 1 An interrupt is pending that needs to be handled by the host
+ * Exit guest and return to host by branching to guest_exit_cont
+ *
+ * 2 Passthrough that needs completion in the host
+ * Exit guest and return to host by branching to guest_exit_cont
+ * However, we also set r12 to BOOK3S_INTERRUPT_HV_RM_HARD
+ * to indicate to the host to complete handling the interrupt
+ *
+ * Before returning to guest, we check if any CPU is heading out
+ * to the host and if so, we head out also. If no CPUs are heading
+ * check return values <= 0.
+ *
+ * Return to guest (r3 <= 0)
+ * 0 No external interrupt is pending
+ * -1 A guest wakeup IPI (which has now been cleared)
+ * In either case, we return to guest to deliver any pending
+ * guest interrupts.
+ *
+ * -2 A PCI passthrough external interrupt was handled
+ * (interrupt was delivered directly to guest)
+ * Return to guest to deliver any pending guest interrupts.
+ */
- .rept SLB_NUM_BOLTED
- li r3, SLBSHADOW_SAVEAREA
- LDX_BE r5, r8, r3
- addi r3, r3, 8
- LDX_BE r6, r8, r3
- andis. r7,r5,SLB_ESID_V@h
- beq 1f
- slbmte r6,r5
-1: addi r8,r8,16
- .endr
+ cmpdi r3, 1
+ ble 1f
- /* Unset guest mode */
- li r0, KVM_GUEST_MODE_NONE
- stb r0, HSTATE_IN_GUEST(r13)
+ /* Return code = 2 */
+ li r12, BOOK3S_INTERRUPT_HV_RM_HARD
+ stw r12, VCPU_TRAP(r9)
+ b guest_exit_cont
- ld r0, 112+PPC_LR_STKOFF(r1)
- addi r1, r1, 112
- mtlr r0
- blr
+1: /* Return code <= 1 */
+ cmpdi r3, 0
+ bgt guest_exit_cont
+
+ /* Return code <= 0 */
+maybe_reenter_guest:
+ ld r5, HSTATE_KVM_VCORE(r13)
+ lwz r0, VCORE_ENTRY_EXIT(r5)
+ cmpwi r0, 0x100
+ mr r4, r9
+ blt deliver_guest_interrupt
+ b guest_exit_cont
/*
* Check whether an HDSI is an HPTE not found fault or something else.
@@ -1797,14 +1616,15 @@ kvmppc_hdsi:
beq 3f
clrrdi r0, r4, 28
PPC_SLBFEE_DOT(R5, R0) /* if so, look up SLB */
- bne 1f /* if no SLB entry found */
+ li r0, BOOK3S_INTERRUPT_DATA_SEGMENT
+ bne 7f /* if no SLB entry found */
4: std r4, VCPU_FAULT_DAR(r9)
stw r6, VCPU_FAULT_DSISR(r9)
/* Search the hash table. */
mr r3, r9 /* vcpu pointer */
li r7, 1 /* data fault */
- bl kvmppc_hpte_hv_fault
+ bl CFUNC(kvmppc_hpte_hv_fault)
ld r9, HSTATE_KVM_VCPU(r13)
ld r10, VCPU_PC(r9)
ld r11, VCPU_MSR(r9)
@@ -1816,18 +1636,19 @@ kvmppc_hdsi:
cmpdi r3, -2 /* MMIO emulation; need instr word */
beq 2f
- /* Synthesize a DSI for the guest */
+ /* Synthesize a DSI (or DSegI) for the guest */
ld r4, VCPU_FAULT_DAR(r9)
mr r6, r3
-1: mtspr SPRN_DAR, r4
+1: li r0, BOOK3S_INTERRUPT_DATA_STORAGE
mtspr SPRN_DSISR, r6
+7: mtspr SPRN_DAR, r4
mtspr SPRN_SRR0, r10
mtspr SPRN_SRR1, r11
- li r10, BOOK3S_INTERRUPT_DATA_STORAGE
+ mr r10, r0
bl kvmppc_msr_interrupt
fast_interrupt_c_return:
6: ld r7, VCPU_CTR(r9)
- lwz r8, VCPU_XER(r9)
+ ld r8, VCPU_XER(r9)
mtctr r7
mtxer r8
mr r4, r9
@@ -1853,7 +1674,7 @@ fast_interrupt_c_return:
mtmsrd r3
/* Store the result */
- stw r8, VCPU_LAST_INST(r9)
+ std r8, VCPU_LAST_INST(r9)
/* Unset guest mode. */
li r0, KVM_GUEST_MODE_HOST_HV
@@ -1871,14 +1692,15 @@ kvmppc_hisi:
beq 3f
clrrdi r0, r10, 28
PPC_SLBFEE_DOT(R5, R0) /* if so, look up SLB */
- bne 1f /* if no SLB entry found */
+ li r0, BOOK3S_INTERRUPT_INST_SEGMENT
+ bne 7f /* if no SLB entry found */
4:
/* Search the hash table. */
mr r3, r9 /* vcpu pointer */
mr r4, r10
mr r6, r11
li r7, 0 /* instruction fault */
- bl kvmppc_hpte_hv_fault
+ bl CFUNC(kvmppc_hpte_hv_fault)
ld r9, HSTATE_KVM_VCPU(r13)
ld r10, VCPU_PC(r9)
ld r11, VCPU_MSR(r9)
@@ -1888,11 +1710,12 @@ kvmppc_hisi:
cmpdi r3, -1 /* handle in kernel mode */
beq guest_exit_cont
- /* Synthesize an ISI for the guest */
+ /* Synthesize an ISI (or ISegI) for the guest */
mr r11, r3
-1: mtspr SPRN_SRR0, r10
+1: li r0, BOOK3S_INTERRUPT_INST_STORAGE
+7: mtspr SPRN_SRR0, r10
mtspr SPRN_SRR1, r11
- li r10, BOOK3S_INTERRUPT_INST_STORAGE
+ mr r10, r0
bl kvmppc_msr_interrupt
b fast_interrupt_c_return
@@ -1905,8 +1728,10 @@ kvmppc_hisi:
* Returns to the guest if we handle it, or continues on up to
* the kernel if we can't (i.e. if we don't have a handler for
* it, or if the handler returns H_TOO_HARD).
+ *
+ * r5 - r8 contain hcall args,
+ * r9 = vcpu, r10 = pc, r11 = msr, r12 = trap, r13 = paca
*/
- .globl hcall_try_real_mode
hcall_try_real_mode:
ld r3,VCPU_GPR(R3)(r9)
andi. r0,r11,MSR_PR
@@ -1966,14 +1791,14 @@ hcall_real_table:
.long DOTSYM(kvmppc_h_remove) - hcall_real_table
.long DOTSYM(kvmppc_h_enter) - hcall_real_table
.long DOTSYM(kvmppc_h_read) - hcall_real_table
- .long 0 /* 0x10 - H_CLEAR_MOD */
- .long 0 /* 0x14 - H_CLEAR_REF */
+ .long DOTSYM(kvmppc_h_clear_mod) - hcall_real_table
+ .long DOTSYM(kvmppc_h_clear_ref) - hcall_real_table
.long DOTSYM(kvmppc_h_protect) - hcall_real_table
- .long DOTSYM(kvmppc_h_get_tce) - hcall_real_table
- .long DOTSYM(kvmppc_h_put_tce) - hcall_real_table
+ .long 0 /* 0x1c */
+ .long 0 /* 0x20 */
.long 0 /* 0x24 - H_SET_SPRG0 */
.long DOTSYM(kvmppc_h_set_dabr) - hcall_real_table
- .long 0 /* 0x2c */
+ .long DOTSYM(kvmppc_rm_h_page_init) - hcall_real_table
.long 0 /* 0x30 */
.long 0 /* 0x34 */
.long 0 /* 0x38 */
@@ -1988,11 +1813,11 @@ hcall_real_table:
.long 0 /* 0x5c */
.long 0 /* 0x60 */
#ifdef CONFIG_KVM_XICS
- .long DOTSYM(kvmppc_rm_h_eoi) - hcall_real_table
- .long DOTSYM(kvmppc_rm_h_cppr) - hcall_real_table
- .long DOTSYM(kvmppc_rm_h_ipi) - hcall_real_table
+ .long DOTSYM(xics_rm_h_eoi) - hcall_real_table
+ .long DOTSYM(xics_rm_h_cppr) - hcall_real_table
+ .long DOTSYM(xics_rm_h_ipi) - hcall_real_table
.long 0 /* 0x70 - H_IPOLL */
- .long DOTSYM(kvmppc_rm_h_xirr) - hcall_real_table
+ .long DOTSYM(xics_rm_h_xirr) - hcall_real_table
#else
.long 0 /* 0x64 - H_EOI */
.long 0 /* 0x68 - H_CPPR */
@@ -2027,7 +1852,7 @@ hcall_real_table:
.long 0 /* 0xd8 */
.long 0 /* 0xdc */
.long DOTSYM(kvmppc_h_cede) - hcall_real_table
- .long 0 /* 0xe4 */
+ .long DOTSYM(kvmppc_rm_h_confer) - hcall_real_table
.long 0 /* 0xe8 */
.long 0 /* 0xec */
.long 0 /* 0xf0 */
@@ -2048,14 +1873,130 @@ hcall_real_table:
.long 0 /* 0x12c */
.long 0 /* 0x130 */
.long DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table
+ .long 0 /* 0x138 */
+ .long 0 /* 0x13c */
+ .long 0 /* 0x140 */
+ .long 0 /* 0x144 */
+ .long 0 /* 0x148 */
+ .long 0 /* 0x14c */
+ .long 0 /* 0x150 */
+ .long 0 /* 0x154 */
+ .long 0 /* 0x158 */
+ .long 0 /* 0x15c */
+ .long 0 /* 0x160 */
+ .long 0 /* 0x164 */
+ .long 0 /* 0x168 */
+ .long 0 /* 0x16c */
+ .long 0 /* 0x170 */
+ .long 0 /* 0x174 */
+ .long 0 /* 0x178 */
+ .long 0 /* 0x17c */
+ .long 0 /* 0x180 */
+ .long 0 /* 0x184 */
+ .long 0 /* 0x188 */
+ .long 0 /* 0x18c */
+ .long 0 /* 0x190 */
+ .long 0 /* 0x194 */
+ .long 0 /* 0x198 */
+ .long 0 /* 0x19c */
+ .long 0 /* 0x1a0 */
+ .long 0 /* 0x1a4 */
+ .long 0 /* 0x1a8 */
+ .long 0 /* 0x1ac */
+ .long 0 /* 0x1b0 */
+ .long 0 /* 0x1b4 */
+ .long 0 /* 0x1b8 */
+ .long 0 /* 0x1bc */
+ .long 0 /* 0x1c0 */
+ .long 0 /* 0x1c4 */
+ .long 0 /* 0x1c8 */
+ .long 0 /* 0x1cc */
+ .long 0 /* 0x1d0 */
+ .long 0 /* 0x1d4 */
+ .long 0 /* 0x1d8 */
+ .long 0 /* 0x1dc */
+ .long 0 /* 0x1e0 */
+ .long 0 /* 0x1e4 */
+ .long 0 /* 0x1e8 */
+ .long 0 /* 0x1ec */
+ .long 0 /* 0x1f0 */
+ .long 0 /* 0x1f4 */
+ .long 0 /* 0x1f8 */
+ .long 0 /* 0x1fc */
+ .long 0 /* 0x200 */
+ .long 0 /* 0x204 */
+ .long 0 /* 0x208 */
+ .long 0 /* 0x20c */
+ .long 0 /* 0x210 */
+ .long 0 /* 0x214 */
+ .long 0 /* 0x218 */
+ .long 0 /* 0x21c */
+ .long 0 /* 0x220 */
+ .long 0 /* 0x224 */
+ .long 0 /* 0x228 */
+ .long 0 /* 0x22c */
+ .long 0 /* 0x230 */
+ .long 0 /* 0x234 */
+ .long 0 /* 0x238 */
+ .long 0 /* 0x23c */
+ .long 0 /* 0x240 */
+ .long 0 /* 0x244 */
+ .long 0 /* 0x248 */
+ .long 0 /* 0x24c */
+ .long 0 /* 0x250 */
+ .long 0 /* 0x254 */
+ .long 0 /* 0x258 */
+ .long 0 /* 0x25c */
+ .long 0 /* 0x260 */
+ .long 0 /* 0x264 */
+ .long 0 /* 0x268 */
+ .long 0 /* 0x26c */
+ .long 0 /* 0x270 */
+ .long 0 /* 0x274 */
+ .long 0 /* 0x278 */
+ .long 0 /* 0x27c */
+ .long 0 /* 0x280 */
+ .long 0 /* 0x284 */
+ .long 0 /* 0x288 */
+ .long 0 /* 0x28c */
+ .long 0 /* 0x290 */
+ .long 0 /* 0x294 */
+ .long 0 /* 0x298 */
+ .long 0 /* 0x29c */
+ .long 0 /* 0x2a0 */
+ .long 0 /* 0x2a4 */
+ .long 0 /* 0x2a8 */
+ .long 0 /* 0x2ac */
+ .long 0 /* 0x2b0 */
+ .long 0 /* 0x2b4 */
+ .long 0 /* 0x2b8 */
+ .long 0 /* 0x2bc */
+ .long 0 /* 0x2c0 */
+ .long 0 /* 0x2c4 */
+ .long 0 /* 0x2c8 */
+ .long 0 /* 0x2cc */
+ .long 0 /* 0x2d0 */
+ .long 0 /* 0x2d4 */
+ .long 0 /* 0x2d8 */
+ .long 0 /* 0x2dc */
+ .long 0 /* 0x2e0 */
+ .long 0 /* 0x2e4 */
+ .long 0 /* 0x2e8 */
+ .long 0 /* 0x2ec */
+ .long 0 /* 0x2f0 */
+ .long 0 /* 0x2f4 */
+ .long 0 /* 0x2f8 */
+#ifdef CONFIG_KVM_XICS
+ .long DOTSYM(xics_rm_h_xirr_x) - hcall_real_table
+#else
+ .long 0 /* 0x2fc - H_XIRR_X*/
+#endif
+ .long DOTSYM(kvmppc_rm_h_random) - hcall_real_table
.globl hcall_real_table_end
hcall_real_table_end:
-ignore_hdec:
- mr r4,r9
- b fast_guest_return
-
-_GLOBAL(kvmppc_h_set_xdabr)
+_GLOBAL_TOC(kvmppc_h_set_xdabr)
+EXPORT_SYMBOL_GPL(kvmppc_h_set_xdabr)
andi. r0, r5, DABRX_USER | DABRX_KERNEL
beq 6f
li r0, DABRX_USER | DABRX_KERNEL | DABRX_BTI
@@ -2064,7 +2005,8 @@ _GLOBAL(kvmppc_h_set_xdabr)
6: li r3, H_PARAMETER
blr
-_GLOBAL(kvmppc_h_set_dabr)
+_GLOBAL_TOC(kvmppc_h_set_dabr)
+EXPORT_SYMBOL_GPL(kvmppc_h_set_dabr)
li r5, DABRX_USER | DABRX_KERNEL
3:
BEGIN_FTR_SECTION
@@ -2082,18 +2024,35 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
li r3,0
blr
+2:
+ LOAD_REG_ADDR(r11, dawr_force_enable)
+ lbz r11, 0(r11)
+ cmpdi r11, 0
+ bne 3f
+ li r3, H_HARDWARE
+ blr
+3:
/* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */
-2: rlwimi r5, r4, 5, DAWRX_DR | DAWRX_DW
- rlwimi r5, r4, 1, DAWRX_WT
+ rlwimi r5, r4, 5, DAWRX_DR | DAWRX_DW
+ rlwimi r5, r4, 2, DAWRX_WT
clrrdi r4, r4, 3
- std r4, VCPU_DAWR(r3)
- std r5, VCPU_DAWRX(r3)
- mtspr SPRN_DAWR, r4
- mtspr SPRN_DAWRX, r5
- li r3, 0
+ std r4, VCPU_DAWR0(r3)
+ std r5, VCPU_DAWRX0(r3)
+ /*
+ * If came in through the real mode hcall handler then it is necessary
+ * to write the registers since the return path won't. Otherwise it is
+ * sufficient to store then in the vcpu struct as they will be loaded
+ * next time the vcpu is run.
+ */
+ mfmsr r6
+ andi. r6, r6, MSR_DR /* in real mode? */
+ bne 4f
+ mtspr SPRN_DAWR0, r4
+ mtspr SPRN_DAWRX0, r5
+4: li r3, 0
blr
-_GLOBAL(kvmppc_h_cede)
+_GLOBAL(kvmppc_h_cede) /* r3 = vcpu pointer, r11 = msr, r13 = paca */
ori r11,r11,MSR_EE
std r11,VCPU_MSR(r3)
li r0,1
@@ -2102,13 +2061,10 @@ _GLOBAL(kvmppc_h_cede)
lbz r5,VCPU_PRODDED(r3)
cmpwi r5,0
bne kvm_cede_prodded
- li r0,0 /* set trap to 0 to say hcall is handled */
- stw r0,VCPU_TRAP(r3)
+ li r12,0 /* set trap to 0 to say hcall is handled */
+ stw r12,VCPU_TRAP(r3)
li r0,H_SUCCESS
std r0,VCPU_GPR(R3)(r3)
-BEGIN_FTR_SECTION
- b kvm_cede_exit /* just send it up to host on 970 */
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
/*
* Set our bit in the bitmask of napping threads unless all the
@@ -2124,12 +2080,11 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
addi r6,r5,VCORE_NAPPING_THREADS
31: lwarx r4,0,r6
or r4,r4,r0
- PPC_POPCNTW(R7,R4)
- cmpw r7,r8
- bge kvm_cede_exit
+ cmpw r4,r8
+ beq kvm_cede_exit
stwcx. r4,0,r6
bne 31b
- /* order napping_threads update vs testing entry_exit_count */
+ /* order napping_threads update vs testing entry_exit_map */
isync
li r0,NAPPING_CEDE
stb r0,HSTATE_NAPPING(r13)
@@ -2166,32 +2121,96 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
/* save FP state */
bl kvmppc_save_fp
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+BEGIN_FTR_SECTION
+ b 91f
+END_FTR_SECTION_IFCLR(CPU_FTR_TM)
+ /*
+ * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
+ */
+ ld r3, HSTATE_KVM_VCPU(r13)
+ ld r4, VCPU_MSR(r3)
+ li r5, 0 /* don't preserve non-vol regs */
+ bl kvmppc_save_tm_hv
+ nop
+91:
+#endif
+
+ /*
+ * Set DEC to the smaller of DEC and HDEC, so that we wake
+ * no later than the end of our timeslice (HDEC interrupts
+ * don't wake us from nap).
+ */
+ mfspr r3, SPRN_DEC
+ mfspr r4, SPRN_HDEC
+ mftb r5
+ extsw r3, r3
+ extsw r4, r4
+ cmpd r3, r4
+ ble 67f
+ mtspr SPRN_DEC, r4
+67:
+ /* save expiry time of guest decrementer */
+ add r3, r3, r5
+ ld r4, HSTATE_KVM_VCPU(r13)
+ std r3, VCPU_DEC_EXPIRES(r4)
+
+#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING
+ ld r4, HSTATE_KVM_VCPU(r13)
+ addi r3, r4, VCPU_TB_CEDE
+ bl kvmhv_accumulate_time
+#endif
+
+ lis r3, LPCR_PECEDP@h /* Do wake on privileged doorbell */
+
+ /* Go back to host stack */
+ ld r1, HSTATE_HOST_R1(r13)
+
/*
* Take a nap until a decrementer or external or doobell interrupt
- * occurs, with PECE1, PECE0 and PECEDP set in LPCR. Also clear the
- * runlatch bit before napping.
+ * occurs, with PECE1 and PECE0 set in LPCR.
+ * On POWER8, set PECEDH, and if we are ceding, also set PECEDP.
+ * Also clear the runlatch bit before napping.
*/
- mfspr r2, SPRN_CTRLF
- clrrdi r2, r2, 1
- mtspr SPRN_CTRLT, r2
+kvm_do_nap:
+ li r0,0
+ mtspr SPRN_CTRLT, r0
li r0,1
stb r0,HSTATE_HWTHREAD_REQ(r13)
mfspr r5,SPRN_LPCR
ori r5,r5,LPCR_PECE0 | LPCR_PECE1
BEGIN_FTR_SECTION
- oris r5,r5,LPCR_PECEDP@h
+ ori r5, r5, LPCR_PECEDH
+ rlwimi r5, r3, 0, LPCR_PECEDP
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+
+kvm_nap_sequence: /* desired LPCR value in r5 */
+ li r3, PNV_THREAD_NAP
mtspr SPRN_LPCR,r5
isync
+
+ bl isa206_idle_insn_mayloss
+
+ li r0,1
+ mtspr SPRN_CTRLT, r0
+
+ mtspr SPRN_SRR1, r3
+
li r0, 0
- std r0, HSTATE_SCRATCH0(r13)
- ptesync
- ld r0, HSTATE_SCRATCH0(r13)
-1: cmpd r0, r0
- bne 1b
- nap
- b .
+ stb r0, PACA_FTRACE_ENABLED(r13)
+
+ li r0, KVM_HWTHREAD_IN_KVM
+ stb r0, HSTATE_HWTHREAD_STATE(r13)
+
+ lbz r0, HSTATE_NAPPING(r13)
+ cmpwi r0, NAPPING_CEDE
+ beq kvm_end_cede
+ cmpwi r0, NAPPING_NOVCPU
+ beq kvm_novcpu_wakeup
+ cmpwi r0, NAPPING_UNSPLIT
+ beq kvm_unsplit_wakeup
+ twi 31,0,0 /* Nap state must not be zero */
33: mr r4, r3
li r3, 0
@@ -2199,15 +2218,41 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
b 34f
kvm_end_cede:
+ /* Woken by external or decrementer interrupt */
+
/* get vcpu pointer */
ld r4, HSTATE_KVM_VCPU(r13)
- /* Woken by external or decrementer interrupt */
- ld r1, HSTATE_HOST_R1(r13)
+#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING
+ addi r3, r4, VCPU_TB_RMINTR
+ bl kvmhv_accumulate_time
+#endif
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+BEGIN_FTR_SECTION
+ b 91f
+END_FTR_SECTION_IFCLR(CPU_FTR_TM)
+ /*
+ * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
+ */
+ mr r3, r4
+ ld r4, VCPU_MSR(r3)
+ li r5, 0 /* don't preserve non-vol regs */
+ bl kvmppc_restore_tm_hv
+ nop
+ ld r4, HSTATE_KVM_VCPU(r13)
+91:
+#endif
/* load up FP state */
bl kvmppc_load_fp
+ /* Restore guest decrementer */
+ ld r3, VCPU_DEC_EXPIRES(r4)
+ mftb r7
+ subf r3, r7, r3
+ mtspr SPRN_DEC, r3
+
/* Load NV GPRS */
ld r14, VCPU_GPR(R14)(r4)
ld r15, VCPU_GPR(R15)(r4)
@@ -2227,10 +2272,20 @@ kvm_end_cede:
ld r29, VCPU_GPR(R29)(r4)
ld r30, VCPU_GPR(R30)(r4)
ld r31, VCPU_GPR(R31)(r4)
-
+
/* Check the wake reason in SRR1 to see why we got here */
bl kvmppc_check_wake_reason
+ /*
+ * Restore volatile registers since we could have called a
+ * C routine in kvmppc_check_wake_reason
+ * r4 = VCPU
+ * r3 tells us whether we need to return to host or not
+ * WARNING: it gets checked further down:
+ * should not modify r3 until this check is done.
+ */
+ ld r4, HSTATE_KVM_VCPU(r13)
+
/* clear our bit in vcore->napping_threads */
34: ld r5,HSTATE_KVM_VCORE(r13)
lbz r7,HSTATE_PTID(r13)
@@ -2244,18 +2299,12 @@ kvm_end_cede:
li r0,0
stb r0,HSTATE_NAPPING(r13)
- /* See if the wake reason means we need to exit */
+ /* See if the wake reason saved in r3 means we need to exit */
stw r12, VCPU_TRAP(r4)
mr r9, r4
cmpdi r3, 0
bgt guest_exit_cont
-
- /* see if any other thread is already exiting */
- lwz r0,VCORE_ENTRY_EXIT(r5)
- cmpwi r0,0x100
- bge guest_exit_cont
-
- b kvmppc_cede_reentry /* if not go back to guest */
+ b maybe_reenter_guest
/* cede when already previously prodded case */
kvm_cede_prodded:
@@ -2268,47 +2317,50 @@ kvm_cede_prodded:
/* we've ceded but we want to give control to the host */
kvm_cede_exit:
- b hcall_real_fallback
+ ld r9, HSTATE_KVM_VCPU(r13)
+ b guest_exit_cont
- /* Try to handle a machine check in real mode */
+ /* Try to do machine check recovery in real mode */
machine_check_realmode:
mr r3, r9 /* get vcpu pointer */
bl kvmppc_realmode_machine_check
nop
- cmpdi r3, 0 /* Did we handle MCE ? */
+ /* all machine checks go to virtual mode for further handling */
ld r9, HSTATE_KVM_VCPU(r13)
li r12, BOOK3S_INTERRUPT_MACHINE_CHECK
- /*
- * Deliver unhandled/fatal (e.g. UE) MCE errors to guest through
- * machine check interrupt (set HSRR0 to 0x200). And for handled
- * errors (no-fatal), just go back to guest execution with current
- * HSRR0 instead of exiting guest. This new approach will inject
- * machine check to guest for fatal error causing guest to crash.
- *
- * The old code used to return to host for unhandled errors which
- * was causing guest to hang with soft lockups inside guest and
- * makes it difficult to recover guest instance.
- */
- ld r10, VCPU_PC(r9)
- ld r11, VCPU_MSR(r9)
- bne 2f /* Continue guest execution. */
- /* If not, deliver a machine check. SRR0/1 are already set */
- li r10, BOOK3S_INTERRUPT_MACHINE_CHECK
- ld r11, VCPU_MSR(r9)
- bl kvmppc_msr_interrupt
-2: b fast_interrupt_c_return
+ b guest_exit_cont
+
+/*
+ * Call C code to handle a HMI in real mode.
+ * Only the primary thread does the call, secondary threads are handled
+ * by calling hmi_exception_realmode() after kvmppc_hv_entry returns.
+ * r9 points to the vcpu on entry
+ */
+hmi_realmode:
+ lbz r0, HSTATE_PTID(r13)
+ cmpwi r0, 0
+ bne guest_exit_cont
+ bl CFUNC(kvmppc_realmode_hmi_handler)
+ ld r9, HSTATE_KVM_VCPU(r13)
+ li r12, BOOK3S_INTERRUPT_HMI
+ b guest_exit_cont
/*
* Check the reason we woke from nap, and take appropriate action.
- * Returns:
+ * Returns (in r3):
* 0 if nothing needs to be done
* 1 if something happened that needs to be handled by the host
- * -1 if there was a guest wakeup (IPI)
+ * -1 if there was a guest wakeup (IPI or msgsnd)
+ * -2 if we handled a PCI passthrough interrupt (returned by
+ * kvmppc_read_intr only)
*
* Also sets r12 to the interrupt vector for any interrupt that needs
* to be handled now by the host (0x500 for external interrupt), or zero.
+ * Modifies all volatile registers (since it may call a C function).
+ * This routine calls kvmppc_read_intr, a C function, if an external
+ * interrupt is pending.
*/
-kvmppc_check_wake_reason:
+SYM_FUNC_START_LOCAL(kvmppc_check_wake_reason)
mfspr r6, SPRN_SRR1
BEGIN_FTR_SECTION
rlwinm r6, r6, 45-31, 0xf /* extract wake reason field (P8) */
@@ -2316,8 +2368,7 @@ FTR_SECTION_ELSE
rlwinm r6, r6, 45-31, 0xe /* P7 wake reason field is 3 bits */
ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_207S)
cmpwi r6, 8 /* was it an external interrupt? */
- li r12, BOOK3S_INTERRUPT_EXTERNAL
- beq kvmppc_read_intr /* if so, see what it was */
+ beq 7f /* if so, see what it was */
li r3, 0
li r12, 0
cmpwi r6, 6 /* was it the decrementer? */
@@ -2328,89 +2379,57 @@ BEGIN_FTR_SECTION
cmpwi r6, 3 /* hypervisor doorbell? */
beq 3f
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+ cmpwi r6, 0xa /* Hypervisor maintenance ? */
+ beq 4f
li r3, 1 /* anything else, return 1 */
0: blr
/* hypervisor doorbell */
3: li r12, BOOK3S_INTERRUPT_H_DOORBELL
- li r3, 1
- blr
-/*
- * Determine what sort of external interrupt is pending (if any).
- * Returns:
- * 0 if no interrupt is pending
- * 1 if an interrupt is pending that needs to be handled by the host
- * -1 if there was a guest wakeup IPI (which has now been cleared)
- */
-kvmppc_read_intr:
- /* see if a host IPI is pending */
- li r3, 1
- lbz r0, HSTATE_HOST_IPI(r13)
- cmpwi r0, 0
- bne 1f
-
- /* Now read the interrupt from the ICP */
- ld r6, HSTATE_XICS_PHYS(r13)
- li r7, XICS_XIRR
- cmpdi r6, 0
- beq- 1f
- lwzcix r0, r6, r7
/*
- * Save XIRR for later. Since we get in in reverse endian on LE
- * systems, save it byte reversed and fetch it back in host endian.
- */
- li r3, HSTATE_SAVED_XIRR
- STWX_BE r0, r3, r13
-#ifdef __LITTLE_ENDIAN__
- lwz r3, HSTATE_SAVED_XIRR(r13)
-#else
- mr r3, r0
-#endif
- rlwinm. r3, r3, 0, 0xffffff
- sync
- beq 1f /* if nothing pending in the ICP */
-
- /* We found something in the ICP...
- *
- * If it's not an IPI, stash it in the PACA and return to
- * the host, we don't (yet) handle directing real external
- * interrupts directly to the guest
- */
- cmpwi r3, XICS_IPI /* if there is, is it an IPI? */
- bne 42f
-
- /* It's an IPI, clear the MFRR and EOI it */
- li r3, 0xff
- li r8, XICS_MFRR
- stbcix r3, r6, r8 /* clear the IPI */
- stwcix r0, r6, r7 /* EOI it */
- sync
-
- /* We need to re-check host IPI now in case it got set in the
- * meantime. If it's clear, we bounce the interrupt to the
- * guest
+ * Clear the doorbell as we will invoke the handler
+ * explicitly in the guest exit path.
*/
+ lis r6, (PPC_DBELL_SERVER << (63-36))@h
+ PPC_MSGCLR(6)
+ /* see if it's a host IPI */
+ li r3, 1
lbz r0, HSTATE_HOST_IPI(r13)
cmpwi r0, 0
- bne- 43f
-
- /* OK, it's an IPI for us */
+ bnelr
+ /* if not, return -1 */
li r3, -1
-1: blr
+ blr
-42: /* It's not an IPI and it's for the host. We saved a copy of XIRR in
- * the PACA earlier, it will be picked up by the host ICP driver
- */
+ /* Woken up due to Hypervisor maintenance interrupt */
+4: li r12, BOOK3S_INTERRUPT_HMI
li r3, 1
- b 1b
+ blr
-43: /* We raced with the host, we need to resend that IPI, bummer */
- li r0, IPI_PRIORITY
- stbcix r0, r6, r8 /* set the IPI */
- sync
- li r3, 1
- b 1b
+ /* external interrupt - create a stack frame so we can call C */
+7: mflr r0
+ std r0, PPC_LR_STKOFF(r1)
+ stdu r1, -PPC_MIN_STKFRM(r1)
+ bl CFUNC(kvmppc_read_intr)
+ nop
+ li r12, BOOK3S_INTERRUPT_EXTERNAL
+ cmpdi r3, 1
+ ble 1f
+
+ /*
+ * Return code of 2 means PCI passthrough interrupt, but
+ * we need to return back to host to complete handling the
+ * interrupt. Trap reason is expected in r12 by guest
+ * exit code.
+ */
+ li r12, BOOK3S_INTERRUPT_HV_RM_HARD
+1:
+ ld r0, PPC_MIN_STKFRM+PPC_LR_STKOFF(r1)
+ addi r1, r1, PPC_MIN_STKFRM
+ mtlr r0
+ blr
+SYM_FUNC_END(kvmppc_check_wake_reason)
/*
* Save away FP, VMX and VSX registers.
@@ -2418,7 +2437,7 @@ kvmppc_read_intr:
* N.B. r30 and r31 are volatile across this function,
* thus it is not callable from C.
*/
-kvmppc_save_fp:
+SYM_FUNC_START_LOCAL(kvmppc_save_fp)
mflr r30
mr r31,r3
mfmsr r5
@@ -2434,7 +2453,6 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_VSX)
#endif
mtmsrd r8
- isync
addi r3,r3,VCPU_FPRS
bl store_fp_state
#ifdef CONFIG_ALTIVEC
@@ -2447,6 +2465,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
stw r6,VCPU_VRSAVE(r31)
mtlr r30
blr
+SYM_FUNC_END(kvmppc_save_fp)
/*
* Load up FP, VMX and VSX registers
@@ -2454,7 +2473,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
* N.B. r30 and r31 are volatile across this function,
* thus it is not callable from C.
*/
-kvmppc_load_fp:
+SYM_FUNC_START_LOCAL(kvmppc_load_fp)
mflr r30
mr r31,r4
mfmsr r9
@@ -2470,7 +2489,6 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_VSX)
#endif
mtmsrd r8
- isync
addi r3,r4,VCPU_FPRS
bl load_fp_state
#ifdef CONFIG_ALTIVEC
@@ -2484,13 +2502,246 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
mtlr r30
mr r4,r31
blr
+SYM_FUNC_END(kvmppc_load_fp)
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+/*
+ * Save transactional state and TM-related registers.
+ * Called with r3 pointing to the vcpu struct and r4 containing
+ * the guest MSR value.
+ * r5 is non-zero iff non-volatile register state needs to be maintained.
+ * If r5 == 0, this can modify all checkpointed registers, but
+ * restores r1 and r2 before exit.
+ */
+_GLOBAL_TOC(kvmppc_save_tm_hv)
+EXPORT_SYMBOL_GPL(kvmppc_save_tm_hv)
+ /* See if we need to handle fake suspend mode */
+BEGIN_FTR_SECTION
+ b __kvmppc_save_tm
+END_FTR_SECTION_IFCLR(CPU_FTR_P9_TM_HV_ASSIST)
+
+ lbz r0, HSTATE_FAKE_SUSPEND(r13) /* Were we fake suspended? */
+ cmpwi r0, 0
+ beq __kvmppc_save_tm
+
+ /* The following code handles the fake_suspend = 1 case */
+ mflr r0
+ std r0, PPC_LR_STKOFF(r1)
+ stdu r1, -TM_FRAME_SIZE(r1)
+
+ /* Turn on TM. */
+ mfmsr r8
+ li r0, 1
+ rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG
+ mtmsrd r8
+
+ rldicl. r8, r8, 64 - MSR_TS_S_LG, 62 /* Did we actually hrfid? */
+ beq 4f
+BEGIN_FTR_SECTION
+ bl pnv_power9_force_smt4_catch
+END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG)
+ nop
+
+ /*
+ * It's possible that treclaim. may modify registers, if we have lost
+ * track of fake-suspend state in the guest due to it using rfscv.
+ * Save and restore registers in case this occurs.
+ */
+ mfspr r3, SPRN_DSCR
+ mfspr r4, SPRN_XER
+ mfspr r5, SPRN_AMR
+ /* SPRN_TAR would need to be saved here if the kernel ever used it */
+ mfcr r12
+ SAVE_NVGPRS(r1)
+ SAVE_GPR(2, r1)
+ SAVE_GPR(3, r1)
+ SAVE_GPR(4, r1)
+ SAVE_GPR(5, r1)
+ stw r12, 8(r1)
+ std r1, HSTATE_HOST_R1(r13)
+
+ /* We have to treclaim here because that's the only way to do S->N */
+ li r3, TM_CAUSE_KVM_RESCHED
+ TRECLAIM(R3)
+
+ GET_PACA(r13)
+ ld r1, HSTATE_HOST_R1(r13)
+ REST_GPR(2, r1)
+ REST_GPR(3, r1)
+ REST_GPR(4, r1)
+ REST_GPR(5, r1)
+ lwz r12, 8(r1)
+ REST_NVGPRS(r1)
+ mtspr SPRN_DSCR, r3
+ mtspr SPRN_XER, r4
+ mtspr SPRN_AMR, r5
+ mtcr r12
+ HMT_MEDIUM
+
+ /*
+ * We were in fake suspend, so we are not going to save the
+ * register state as the guest checkpointed state (since
+ * we already have it), therefore we can now use any volatile GPR.
+ * In fact treclaim in fake suspend state doesn't modify
+ * any registers.
+ */
+
+BEGIN_FTR_SECTION
+ bl pnv_power9_force_smt4_release
+END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG)
+ nop
+
+4:
+ mfspr r3, SPRN_PSSCR
+ /* PSSCR_FAKE_SUSPEND is a write-only bit, but clear it anyway */
+ li r0, PSSCR_FAKE_SUSPEND
+ andc r3, r3, r0
+ mtspr SPRN_PSSCR, r3
+
+ /* Don't save TEXASR, use value from last exit in real suspend state */
+ ld r9, HSTATE_KVM_VCPU(r13)
+ mfspr r5, SPRN_TFHAR
+ mfspr r6, SPRN_TFIAR
+ std r5, VCPU_TFHAR(r9)
+ std r6, VCPU_TFIAR(r9)
+
+ addi r1, r1, TM_FRAME_SIZE
+ ld r0, PPC_LR_STKOFF(r1)
+ mtlr r0
+ blr
+
+/*
+ * Restore transactional state and TM-related registers.
+ * Called with r3 pointing to the vcpu struct
+ * and r4 containing the guest MSR value.
+ * r5 is non-zero iff non-volatile register state needs to be maintained.
+ * This potentially modifies all checkpointed registers.
+ * It restores r1 and r2 from the PACA.
+ */
+_GLOBAL_TOC(kvmppc_restore_tm_hv)
+EXPORT_SYMBOL_GPL(kvmppc_restore_tm_hv)
+ /*
+ * If we are doing TM emulation for the guest on a POWER9 DD2,
+ * then we don't actually do a trechkpt -- we either set up
+ * fake-suspend mode, or emulate a TM rollback.
+ */
+BEGIN_FTR_SECTION
+ b __kvmppc_restore_tm
+END_FTR_SECTION_IFCLR(CPU_FTR_P9_TM_HV_ASSIST)
+ mflr r0
+ std r0, PPC_LR_STKOFF(r1)
+
+ li r0, 0
+ stb r0, HSTATE_FAKE_SUSPEND(r13)
+
+ /* Turn on TM so we can restore TM SPRs */
+ mfmsr r5
+ li r0, 1
+ rldimi r5, r0, MSR_TM_LG, 63-MSR_TM_LG
+ mtmsrd r5
+
+ /*
+ * The user may change these outside of a transaction, so they must
+ * always be context switched.
+ */
+ ld r5, VCPU_TFHAR(r3)
+ ld r6, VCPU_TFIAR(r3)
+ ld r7, VCPU_TEXASR(r3)
+ mtspr SPRN_TFHAR, r5
+ mtspr SPRN_TFIAR, r6
+ mtspr SPRN_TEXASR, r7
+
+ rldicl. r5, r4, 64 - MSR_TS_S_LG, 62
+ beqlr /* TM not active in guest */
+
+ /* Make sure the failure summary is set */
+ oris r7, r7, (TEXASR_FS)@h
+ mtspr SPRN_TEXASR, r7
+
+ cmpwi r5, 1 /* check for suspended state */
+ bgt 10f
+ stb r5, HSTATE_FAKE_SUSPEND(r13)
+ b 9f /* and return */
+10: stdu r1, -PPC_MIN_STKFRM(r1)
+ /* guest is in transactional state, so simulate rollback */
+ bl kvmhv_emulate_tm_rollback
+ nop
+ addi r1, r1, PPC_MIN_STKFRM
+9: ld r0, PPC_LR_STKOFF(r1)
+ mtlr r0
+ blr
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
/*
* We come here if we get any exception or interrupt while we are
* executing host real mode code while in guest MMU context.
- * For now just spin, but we should do something better.
+ * r12 is (CR << 32) | vector
+ * r13 points to our PACA
+ * r12 is saved in HSTATE_SCRATCH0(r13)
+ * r9 is saved in HSTATE_SCRATCH2(r13)
+ * r13 is saved in HSPRG1
+ * cfar is saved in HSTATE_CFAR(r13)
+ * ppr is saved in HSTATE_PPR(r13)
*/
kvmppc_bad_host_intr:
+ /*
+ * Switch to the emergency stack, but start half-way down in
+ * case we were already on it.
+ */
+ mr r9, r1
+ std r1, PACAR1(r13)
+ ld r1, PACAEMERGSP(r13)
+ subi r1, r1, THREAD_SIZE/2 + INT_FRAME_SIZE
+ std r9, 0(r1)
+ std r0, GPR0(r1)
+ std r9, GPR1(r1)
+ std r2, GPR2(r1)
+ SAVE_GPRS(3, 8, r1)
+ srdi r0, r12, 32
+ clrldi r12, r12, 32
+ std r0, _CCR(r1)
+ std r12, _TRAP(r1)
+ andi. r0, r12, 2
+ beq 1f
+ mfspr r3, SPRN_HSRR0
+ mfspr r4, SPRN_HSRR1
+ mfspr r5, SPRN_HDAR
+ mfspr r6, SPRN_HDSISR
+ b 2f
+1: mfspr r3, SPRN_SRR0
+ mfspr r4, SPRN_SRR1
+ mfspr r5, SPRN_DAR
+ mfspr r6, SPRN_DSISR
+2: std r3, _NIP(r1)
+ std r4, _MSR(r1)
+ std r5, _DAR(r1)
+ std r6, _DSISR(r1)
+ ld r9, HSTATE_SCRATCH2(r13)
+ ld r12, HSTATE_SCRATCH0(r13)
+ GET_SCRATCH0(r0)
+ SAVE_GPRS(9, 12, r1)
+ std r0, GPR13(r1)
+ SAVE_NVGPRS(r1)
+ ld r5, HSTATE_CFAR(r13)
+ std r5, ORIG_GPR3(r1)
+ mflr r3
+ mfctr r4
+ mfxer r5
+ lbz r6, PACAIRQSOFTMASK(r13)
+ std r3, _LINK(r1)
+ std r4, _CTR(r1)
+ std r5, _XER(r1)
+ std r6, SOFTE(r1)
+ LOAD_PACA_TOC()
+ LOAD_REG_IMMEDIATE(3, STACK_FRAME_REGS_MARKER)
+ std r3, STACK_INT_FRAME_MARKER(r1)
+
+ /*
+ * XXX On POWER7 and POWER8, we just spin here since we don't
+ * know what the other threads are doing (and we don't want to
+ * coordinate with them) - but at least we now have register state
+ * in memory that we might be able to look at from another CPU.
+ */
b .
/*
@@ -2500,7 +2751,7 @@ kvmppc_bad_host_intr:
* r9 has a vcpu pointer (in)
* r0 is used as a scratch register
*/
-kvmppc_msr_interrupt:
+SYM_FUNC_START_LOCAL(kvmppc_msr_interrupt)
rldicl r0, r11, 64 - MSR_TS_S_LG, 62
cmpwi r0, 2 /* Check if we are in transactional state.. */
ld r11, VCPU_INTR_MSR(r9)
@@ -2509,6 +2760,197 @@ kvmppc_msr_interrupt:
li r0, 1
1: rldimi r11, r0, MSR_TS_S_LG, 63 - MSR_TS_T_LG
blr
+SYM_FUNC_END(kvmppc_msr_interrupt)
+
+/*
+ * void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu)
+ *
+ * Load up guest PMU state. R3 points to the vcpu struct.
+ */
+SYM_FUNC_START_LOCAL(kvmhv_load_guest_pmu)
+ mr r4, r3
+ mflr r0
+ li r3, 1
+ sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
+ mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */
+ isync
+BEGIN_FTR_SECTION
+ ld r3, VCPU_MMCR(r4)
+ andi. r5, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO
+ cmpwi r5, MMCR0_PMAO
+ beql kvmppc_fix_pmao
+END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
+ lwz r3, VCPU_PMC(r4) /* always load up guest PMU registers */
+ lwz r5, VCPU_PMC + 4(r4) /* to prevent information leak */
+ lwz r6, VCPU_PMC + 8(r4)
+ lwz r7, VCPU_PMC + 12(r4)
+ lwz r8, VCPU_PMC + 16(r4)
+ lwz r9, VCPU_PMC + 20(r4)
+ mtspr SPRN_PMC1, r3
+ mtspr SPRN_PMC2, r5
+ mtspr SPRN_PMC3, r6
+ mtspr SPRN_PMC4, r7
+ mtspr SPRN_PMC5, r8
+ mtspr SPRN_PMC6, r9
+ ld r3, VCPU_MMCR(r4)
+ ld r5, VCPU_MMCR + 8(r4)
+ ld r6, VCPU_MMCRA(r4)
+ ld r7, VCPU_SIAR(r4)
+ ld r8, VCPU_SDAR(r4)
+ mtspr SPRN_MMCR1, r5
+ mtspr SPRN_MMCRA, r6
+ mtspr SPRN_SIAR, r7
+ mtspr SPRN_SDAR, r8
+BEGIN_FTR_SECTION
+ ld r5, VCPU_MMCR + 16(r4)
+ ld r6, VCPU_SIER(r4)
+ mtspr SPRN_MMCR2, r5
+ mtspr SPRN_SIER, r6
+ lwz r7, VCPU_PMC + 24(r4)
+ lwz r8, VCPU_PMC + 28(r4)
+ ld r9, VCPU_MMCRS(r4)
+ mtspr SPRN_SPMC1, r7
+ mtspr SPRN_SPMC2, r8
+ mtspr SPRN_MMCRS, r9
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+ mtspr SPRN_MMCR0, r3
+ isync
+ mtlr r0
+ blr
+SYM_FUNC_END(kvmhv_load_guest_pmu)
+
+/*
+ * void kvmhv_load_host_pmu(void)
+ *
+ * Reload host PMU state saved in the PACA by kvmhv_save_host_pmu.
+ */
+SYM_FUNC_START_LOCAL(kvmhv_load_host_pmu)
+ mflr r0
+ lbz r4, PACA_PMCINUSE(r13) /* is the host using the PMU? */
+ cmpwi r4, 0
+ beq 23f /* skip if not */
+BEGIN_FTR_SECTION
+ ld r3, HSTATE_MMCR0(r13)
+ andi. r4, r3, MMCR0_PMAO_SYNC | MMCR0_PMAO
+ cmpwi r4, MMCR0_PMAO
+ beql kvmppc_fix_pmao
+END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
+ lwz r3, HSTATE_PMC1(r13)
+ lwz r4, HSTATE_PMC2(r13)
+ lwz r5, HSTATE_PMC3(r13)
+ lwz r6, HSTATE_PMC4(r13)
+ lwz r8, HSTATE_PMC5(r13)
+ lwz r9, HSTATE_PMC6(r13)
+ mtspr SPRN_PMC1, r3
+ mtspr SPRN_PMC2, r4
+ mtspr SPRN_PMC3, r5
+ mtspr SPRN_PMC4, r6
+ mtspr SPRN_PMC5, r8
+ mtspr SPRN_PMC6, r9
+ ld r3, HSTATE_MMCR0(r13)
+ ld r4, HSTATE_MMCR1(r13)
+ ld r5, HSTATE_MMCRA(r13)
+ ld r6, HSTATE_SIAR(r13)
+ ld r7, HSTATE_SDAR(r13)
+ mtspr SPRN_MMCR1, r4
+ mtspr SPRN_MMCRA, r5
+ mtspr SPRN_SIAR, r6
+ mtspr SPRN_SDAR, r7
+BEGIN_FTR_SECTION
+ ld r8, HSTATE_MMCR2(r13)
+ ld r9, HSTATE_SIER(r13)
+ mtspr SPRN_MMCR2, r8
+ mtspr SPRN_SIER, r9
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+ mtspr SPRN_MMCR0, r3
+ isync
+ mtlr r0
+23: blr
+SYM_FUNC_END(kvmhv_load_host_pmu)
+
+/*
+ * void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use)
+ *
+ * Save guest PMU state into the vcpu struct.
+ * r3 = vcpu, r4 = full save flag (PMU in use flag set in VPA)
+ */
+SYM_FUNC_START_LOCAL(kvmhv_save_guest_pmu)
+ mr r9, r3
+ mr r8, r4
+BEGIN_FTR_SECTION
+ /*
+ * POWER8 seems to have a hardware bug where setting
+ * MMCR0[PMAE] along with MMCR0[PMC1CE] and/or MMCR0[PMCjCE]
+ * when some counters are already negative doesn't seem
+ * to cause a performance monitor alert (and hence interrupt).
+ * The effect of this is that when saving the PMU state,
+ * if there is no PMU alert pending when we read MMCR0
+ * before freezing the counters, but one becomes pending
+ * before we read the counters, we lose it.
+ * To work around this, we need a way to freeze the counters
+ * before reading MMCR0. Normally, freezing the counters
+ * is done by writing MMCR0 (to set MMCR0[FC]) which
+ * unavoidably writes MMCR0[PMA0] as well. On POWER8,
+ * we can also freeze the counters using MMCR2, by writing
+ * 1s to all the counter freeze condition bits (there are
+ * 9 bits each for 6 counters).
+ */
+ li r3, -1 /* set all freeze bits */
+ clrrdi r3, r3, 10
+ mfspr r10, SPRN_MMCR2
+ mtspr SPRN_MMCR2, r3
+ isync
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+ li r3, 1
+ sldi r3, r3, 31 /* MMCR0_FC (freeze counters) bit */
+ mfspr r4, SPRN_MMCR0 /* save MMCR0 */
+ mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */
+ mfspr r6, SPRN_MMCRA
+ /* Clear MMCRA in order to disable SDAR updates */
+ li r7, 0
+ mtspr SPRN_MMCRA, r7
+ isync
+ cmpwi r8, 0 /* did they ask for PMU stuff to be saved? */
+ bne 21f
+ std r3, VCPU_MMCR(r9) /* if not, set saved MMCR0 to FC */
+ b 22f
+21: mfspr r5, SPRN_MMCR1
+ mfspr r7, SPRN_SIAR
+ mfspr r8, SPRN_SDAR
+ std r4, VCPU_MMCR(r9)
+ std r5, VCPU_MMCR + 8(r9)
+ std r6, VCPU_MMCRA(r9)
+BEGIN_FTR_SECTION
+ std r10, VCPU_MMCR + 16(r9)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+ std r7, VCPU_SIAR(r9)
+ std r8, VCPU_SDAR(r9)
+ mfspr r3, SPRN_PMC1
+ mfspr r4, SPRN_PMC2
+ mfspr r5, SPRN_PMC3
+ mfspr r6, SPRN_PMC4
+ mfspr r7, SPRN_PMC5
+ mfspr r8, SPRN_PMC6
+ stw r3, VCPU_PMC(r9)
+ stw r4, VCPU_PMC + 4(r9)
+ stw r5, VCPU_PMC + 8(r9)
+ stw r6, VCPU_PMC + 12(r9)
+ stw r7, VCPU_PMC + 16(r9)
+ stw r8, VCPU_PMC + 20(r9)
+BEGIN_FTR_SECTION
+ mfspr r5, SPRN_SIER
+ std r5, VCPU_SIER(r9)
+ mfspr r6, SPRN_SPMC1
+ mfspr r7, SPRN_SPMC2
+ mfspr r8, SPRN_MMCRS
+ stw r6, VCPU_PMC + 24(r9)
+ stw r7, VCPU_PMC + 28(r9)
+ std r8, VCPU_MMCRS(r9)
+ lis r4, 0x8000
+ mtspr SPRN_MMCRS, r4
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+22: blr
+SYM_FUNC_END(kvmhv_save_guest_pmu)
/*
* This works around a hardware bug on POWER8E processors, where
@@ -2527,3 +2969,56 @@ kvmppc_fix_pmao:
mtspr SPRN_PMC6, r3
isync
blr
+
+#ifdef CONFIG_KVM_BOOK3S_HV_P8_TIMING
+/*
+ * Start timing an activity
+ * r3 = pointer to time accumulation struct, r4 = vcpu
+ */
+kvmhv_start_timing:
+ ld r5, HSTATE_KVM_VCORE(r13)
+ ld r6, VCORE_TB_OFFSET_APPL(r5)
+ mftb r5
+ subf r5, r6, r5 /* subtract current timebase offset */
+ std r3, VCPU_CUR_ACTIVITY(r4)
+ std r5, VCPU_ACTIVITY_START(r4)
+ blr
+
+/*
+ * Accumulate time to one activity and start another.
+ * r3 = pointer to new time accumulation struct, r4 = vcpu
+ */
+kvmhv_accumulate_time:
+ ld r5, HSTATE_KVM_VCORE(r13)
+ ld r8, VCORE_TB_OFFSET_APPL(r5)
+ ld r5, VCPU_CUR_ACTIVITY(r4)
+ ld r6, VCPU_ACTIVITY_START(r4)
+ std r3, VCPU_CUR_ACTIVITY(r4)
+ mftb r7
+ subf r7, r8, r7 /* subtract current timebase offset */
+ std r7, VCPU_ACTIVITY_START(r4)
+ cmpdi r5, 0
+ beqlr
+ subf r3, r6, r7
+ ld r8, TAS_SEQCOUNT(r5)
+ cmpdi r8, 0
+ addi r8, r8, 1
+ std r8, TAS_SEQCOUNT(r5)
+ lwsync
+ ld r7, TAS_TOTAL(r5)
+ add r7, r7, r3
+ std r7, TAS_TOTAL(r5)
+ ld r6, TAS_MIN(r5)
+ ld r7, TAS_MAX(r5)
+ beq 3f
+ cmpd r3, r6
+ bge 1f
+3: std r3, TAS_MIN(r5)
+1: cmpd r3, r7
+ ble 2f
+ std r3, TAS_MAX(r5)
+2: lwsync
+ addi r8, r8, 1
+ std r8, TAS_SEQCOUNT(r5)
+ blr
+#endif
diff --git a/arch/powerpc/kvm/book3s_hv_tm.c b/arch/powerpc/kvm/book3s_hv_tm.c
new file mode 100644
index 000000000000..866cadd70094
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_tm.c
@@ -0,0 +1,248 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2017 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_book3s_64.h>
+#include <asm/reg.h>
+#include <asm/ppc-opcode.h>
+
+static void emulate_tx_failure(struct kvm_vcpu *vcpu, u64 failure_cause)
+{
+ u64 texasr, tfiar;
+ u64 msr = vcpu->arch.shregs.msr;
+
+ tfiar = vcpu->arch.regs.nip & ~0x3ull;
+ texasr = (failure_cause << 56) | TEXASR_ABORT | TEXASR_FS | TEXASR_EXACT;
+ if (MSR_TM_SUSPENDED(vcpu->arch.shregs.msr))
+ texasr |= TEXASR_SUSP;
+ if (msr & MSR_PR) {
+ texasr |= TEXASR_PR;
+ tfiar |= 1;
+ }
+ vcpu->arch.tfiar = tfiar;
+ /* Preserve ROT and TL fields of existing TEXASR */
+ vcpu->arch.texasr = (vcpu->arch.texasr & 0x3ffffff) | texasr;
+}
+
+/*
+ * This gets called on a softpatch interrupt on POWER9 DD2.2 processors.
+ * We expect to find a TM-related instruction to be emulated. The
+ * instruction image is in vcpu->arch.emul_inst. If the guest was in
+ * TM suspended or transactional state, the checkpointed state has been
+ * reclaimed and is in the vcpu struct. The CPU is in virtual mode in
+ * host context.
+ */
+int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
+{
+ u32 instr = vcpu->arch.emul_inst;
+ u64 msr = vcpu->arch.shregs.msr;
+ u64 newmsr, bescr;
+ int ra, rs;
+
+ /*
+ * The TM softpatch interrupt sets NIP to the instruction following
+ * the faulting instruction, which is not executed. Rewind nip to the
+ * faulting instruction so it looks like a normal synchronous
+ * interrupt, then update nip in the places where the instruction is
+ * emulated.
+ */
+ vcpu->arch.regs.nip -= 4;
+
+ /*
+ * rfid, rfebb, and mtmsrd encode bit 31 = 0 since it's a reserved bit
+ * in these instructions, so masking bit 31 out doesn't change these
+ * instructions. For treclaim., tsr., and trechkpt. instructions if bit
+ * 31 = 0 then they are per ISA invalid forms, however P9 UM, in section
+ * 4.6.10 Book II Invalid Forms, informs specifically that ignoring bit
+ * 31 is an acceptable way to handle these invalid forms that have
+ * bit 31 = 0. Moreover, for emulation purposes both forms (w/ and wo/
+ * bit 31 set) can generate a softpatch interrupt. Hence both forms
+ * are handled below for these instructions so they behave the same way.
+ */
+ switch (instr & PO_XOP_OPCODE_MASK) {
+ case PPC_INST_RFID:
+ /* XXX do we need to check for PR=0 here? */
+ newmsr = vcpu->arch.shregs.srr1;
+ /* should only get here for Sx -> T1 transition */
+ WARN_ON_ONCE(!(MSR_TM_SUSPENDED(msr) &&
+ MSR_TM_TRANSACTIONAL(newmsr) &&
+ (newmsr & MSR_TM)));
+ newmsr = sanitize_msr(newmsr);
+ vcpu->arch.shregs.msr = newmsr;
+ vcpu->arch.cfar = vcpu->arch.regs.nip;
+ vcpu->arch.regs.nip = vcpu->arch.shregs.srr0;
+ return RESUME_GUEST;
+
+ case PPC_INST_RFEBB:
+ if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206)) {
+ /* generate an illegal instruction interrupt */
+ kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+ return RESUME_GUEST;
+ }
+ /* check EBB facility is available */
+ if (!(vcpu->arch.hfscr & HFSCR_EBB)) {
+ vcpu->arch.hfscr &= ~HFSCR_INTR_CAUSE;
+ vcpu->arch.hfscr |= (u64)FSCR_EBB_LG << 56;
+ vcpu->arch.trap = BOOK3S_INTERRUPT_H_FAC_UNAVAIL;
+ return -1; /* rerun host interrupt handler */
+ }
+ if ((msr & MSR_PR) && !(vcpu->arch.fscr & FSCR_EBB)) {
+ /* generate a facility unavailable interrupt */
+ vcpu->arch.fscr &= ~FSCR_INTR_CAUSE;
+ vcpu->arch.fscr |= (u64)FSCR_EBB_LG << 56;
+ kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FAC_UNAVAIL);
+ return RESUME_GUEST;
+ }
+ bescr = vcpu->arch.bescr;
+ /* expect to see a S->T transition requested */
+ WARN_ON_ONCE(!(MSR_TM_SUSPENDED(msr) &&
+ ((bescr >> 30) & 3) == 2));
+ bescr &= ~BESCR_GE;
+ if (instr & (1 << 11))
+ bescr |= BESCR_GE;
+ vcpu->arch.bescr = bescr;
+ msr = (msr & ~MSR_TS_MASK) | MSR_TS_T;
+ vcpu->arch.shregs.msr = msr;
+ vcpu->arch.cfar = vcpu->arch.regs.nip;
+ vcpu->arch.regs.nip = vcpu->arch.ebbrr;
+ return RESUME_GUEST;
+
+ case PPC_INST_MTMSRD:
+ /* XXX do we need to check for PR=0 here? */
+ rs = (instr >> 21) & 0x1f;
+ newmsr = kvmppc_get_gpr(vcpu, rs);
+ /* check this is a Sx -> T1 transition */
+ WARN_ON_ONCE(!(MSR_TM_SUSPENDED(msr) &&
+ MSR_TM_TRANSACTIONAL(newmsr) &&
+ (newmsr & MSR_TM)));
+ /* mtmsrd doesn't change LE */
+ newmsr = (newmsr & ~MSR_LE) | (msr & MSR_LE);
+ newmsr = sanitize_msr(newmsr);
+ vcpu->arch.shregs.msr = newmsr;
+ vcpu->arch.regs.nip += 4;
+ return RESUME_GUEST;
+
+ /* ignore bit 31, see comment above */
+ case (PPC_INST_TSR & PO_XOP_OPCODE_MASK):
+ /* check for PR=1 and arch 2.06 bit set in PCR */
+ if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206)) {
+ /* generate an illegal instruction interrupt */
+ kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+ return RESUME_GUEST;
+ }
+ /* check for TM disabled in the HFSCR or MSR */
+ if (!(vcpu->arch.hfscr & HFSCR_TM)) {
+ vcpu->arch.hfscr &= ~HFSCR_INTR_CAUSE;
+ vcpu->arch.hfscr |= (u64)FSCR_TM_LG << 56;
+ vcpu->arch.trap = BOOK3S_INTERRUPT_H_FAC_UNAVAIL;
+ return -1; /* rerun host interrupt handler */
+ }
+ if (!(msr & MSR_TM)) {
+ /* generate a facility unavailable interrupt */
+ vcpu->arch.fscr &= ~FSCR_INTR_CAUSE;
+ vcpu->arch.fscr |= (u64)FSCR_TM_LG << 56;
+ kvmppc_book3s_queue_irqprio(vcpu,
+ BOOK3S_INTERRUPT_FAC_UNAVAIL);
+ return RESUME_GUEST;
+ }
+ /* Set CR0 to indicate previous transactional state */
+ vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
+ (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 29);
+ /* L=1 => tresume, L=0 => tsuspend */
+ if (instr & (1 << 21)) {
+ if (MSR_TM_SUSPENDED(msr))
+ msr = (msr & ~MSR_TS_MASK) | MSR_TS_T;
+ } else {
+ if (MSR_TM_TRANSACTIONAL(msr))
+ msr = (msr & ~MSR_TS_MASK) | MSR_TS_S;
+ }
+ vcpu->arch.shregs.msr = msr;
+ vcpu->arch.regs.nip += 4;
+ return RESUME_GUEST;
+
+ /* ignore bit 31, see comment above */
+ case (PPC_INST_TRECLAIM & PO_XOP_OPCODE_MASK):
+ /* check for TM disabled in the HFSCR or MSR */
+ if (!(vcpu->arch.hfscr & HFSCR_TM)) {
+ vcpu->arch.hfscr &= ~HFSCR_INTR_CAUSE;
+ vcpu->arch.hfscr |= (u64)FSCR_TM_LG << 56;
+ vcpu->arch.trap = BOOK3S_INTERRUPT_H_FAC_UNAVAIL;
+ return -1; /* rerun host interrupt handler */
+ }
+ if (!(msr & MSR_TM)) {
+ /* generate a facility unavailable interrupt */
+ vcpu->arch.fscr &= ~FSCR_INTR_CAUSE;
+ vcpu->arch.fscr |= (u64)FSCR_TM_LG << 56;
+ kvmppc_book3s_queue_irqprio(vcpu,
+ BOOK3S_INTERRUPT_FAC_UNAVAIL);
+ return RESUME_GUEST;
+ }
+ /* If no transaction active, generate TM bad thing */
+ if (!MSR_TM_ACTIVE(msr)) {
+ kvmppc_core_queue_program(vcpu, SRR1_PROGTM);
+ return RESUME_GUEST;
+ }
+ /* If failure was not previously recorded, recompute TEXASR */
+ if (!(vcpu->arch.orig_texasr & TEXASR_FS)) {
+ ra = (instr >> 16) & 0x1f;
+ if (ra)
+ ra = kvmppc_get_gpr(vcpu, ra) & 0xff;
+ emulate_tx_failure(vcpu, ra);
+ }
+
+ copy_from_checkpoint(vcpu);
+
+ /* Set CR0 to indicate previous transactional state */
+ vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
+ (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 29);
+ vcpu->arch.shregs.msr &= ~MSR_TS_MASK;
+ vcpu->arch.regs.nip += 4;
+ return RESUME_GUEST;
+
+ /* ignore bit 31, see comment above */
+ case (PPC_INST_TRECHKPT & PO_XOP_OPCODE_MASK):
+ /* XXX do we need to check for PR=0 here? */
+ /* check for TM disabled in the HFSCR or MSR */
+ if (!(vcpu->arch.hfscr & HFSCR_TM)) {
+ vcpu->arch.hfscr &= ~HFSCR_INTR_CAUSE;
+ vcpu->arch.hfscr |= (u64)FSCR_TM_LG << 56;
+ vcpu->arch.trap = BOOK3S_INTERRUPT_H_FAC_UNAVAIL;
+ return -1; /* rerun host interrupt handler */
+ }
+ if (!(msr & MSR_TM)) {
+ /* generate a facility unavailable interrupt */
+ vcpu->arch.fscr &= ~FSCR_INTR_CAUSE;
+ vcpu->arch.fscr |= (u64)FSCR_TM_LG << 56;
+ kvmppc_book3s_queue_irqprio(vcpu,
+ BOOK3S_INTERRUPT_FAC_UNAVAIL);
+ return RESUME_GUEST;
+ }
+ /* If transaction active or TEXASR[FS] = 0, bad thing */
+ if (MSR_TM_ACTIVE(msr) || !(vcpu->arch.texasr & TEXASR_FS)) {
+ kvmppc_core_queue_program(vcpu, SRR1_PROGTM);
+ return RESUME_GUEST;
+ }
+
+ copy_to_checkpoint(vcpu);
+
+ /* Set CR0 to indicate previous transactional state */
+ vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
+ (((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 29);
+ vcpu->arch.shregs.msr = msr | MSR_TS_S;
+ vcpu->arch.regs.nip += 4;
+ return RESUME_GUEST;
+ }
+
+ /* What should we do here? We didn't recognize the instruction */
+ kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+ pr_warn_ratelimited("Unrecognized TM-related instruction %#x for emulation", instr);
+
+ return RESUME_GUEST;
+}
diff --git a/arch/powerpc/kvm/book3s_hv_tm_builtin.c b/arch/powerpc/kvm/book3s_hv_tm_builtin.c
new file mode 100644
index 000000000000..fad931f224ef
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_tm_builtin.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2017 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ */
+
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_book3s_64.h>
+#include <asm/reg.h>
+#include <asm/ppc-opcode.h>
+
+/*
+ * This handles the cases where the guest is in real suspend mode
+ * and we want to get back to the guest without dooming the transaction.
+ * The caller has checked that the guest is in real-suspend mode
+ * (MSR[TS] = S and the fake-suspend flag is not set).
+ */
+int kvmhv_p9_tm_emulation_early(struct kvm_vcpu *vcpu)
+{
+ u32 instr = vcpu->arch.emul_inst;
+ u64 newmsr, msr, bescr;
+ int rs;
+
+ /*
+ * rfid, rfebb, and mtmsrd encode bit 31 = 0 since it's a reserved bit
+ * in these instructions, so masking bit 31 out doesn't change these
+ * instructions. For the tsr. instruction if bit 31 = 0 then it is per
+ * ISA an invalid form, however P9 UM, in section 4.6.10 Book II Invalid
+ * Forms, informs specifically that ignoring bit 31 is an acceptable way
+ * to handle TM-related invalid forms that have bit 31 = 0. Moreover,
+ * for emulation purposes both forms (w/ and wo/ bit 31 set) can
+ * generate a softpatch interrupt. Hence both forms are handled below
+ * for tsr. to make them behave the same way.
+ */
+ switch (instr & PO_XOP_OPCODE_MASK) {
+ case PPC_INST_RFID:
+ /* XXX do we need to check for PR=0 here? */
+ newmsr = vcpu->arch.shregs.srr1;
+ /* should only get here for Sx -> T1 transition */
+ if (!(MSR_TM_TRANSACTIONAL(newmsr) && (newmsr & MSR_TM)))
+ return 0;
+ newmsr = sanitize_msr(newmsr);
+ vcpu->arch.shregs.msr = newmsr;
+ vcpu->arch.cfar = vcpu->arch.regs.nip - 4;
+ vcpu->arch.regs.nip = vcpu->arch.shregs.srr0;
+ return 1;
+
+ case PPC_INST_RFEBB:
+ /* check for PR=1 and arch 2.06 bit set in PCR */
+ msr = vcpu->arch.shregs.msr;
+ if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206))
+ return 0;
+ /* check EBB facility is available */
+ if (!(vcpu->arch.hfscr & HFSCR_EBB) ||
+ ((msr & MSR_PR) && !(mfspr(SPRN_FSCR) & FSCR_EBB)))
+ return 0;
+ bescr = mfspr(SPRN_BESCR);
+ /* expect to see a S->T transition requested */
+ if (((bescr >> 30) & 3) != 2)
+ return 0;
+ bescr &= ~BESCR_GE;
+ if (instr & (1 << 11))
+ bescr |= BESCR_GE;
+ mtspr(SPRN_BESCR, bescr);
+ msr = (msr & ~MSR_TS_MASK) | MSR_TS_T;
+ vcpu->arch.shregs.msr = msr;
+ vcpu->arch.cfar = vcpu->arch.regs.nip - 4;
+ vcpu->arch.regs.nip = mfspr(SPRN_EBBRR);
+ return 1;
+
+ case PPC_INST_MTMSRD:
+ /* XXX do we need to check for PR=0 here? */
+ rs = (instr >> 21) & 0x1f;
+ newmsr = kvmppc_get_gpr(vcpu, rs);
+ msr = vcpu->arch.shregs.msr;
+ /* check this is a Sx -> T1 transition */
+ if (!(MSR_TM_TRANSACTIONAL(newmsr) && (newmsr & MSR_TM)))
+ return 0;
+ /* mtmsrd doesn't change LE */
+ newmsr = (newmsr & ~MSR_LE) | (msr & MSR_LE);
+ newmsr = sanitize_msr(newmsr);
+ vcpu->arch.shregs.msr = newmsr;
+ return 1;
+
+ /* ignore bit 31, see comment above */
+ case (PPC_INST_TSR & PO_XOP_OPCODE_MASK):
+ /* we know the MSR has the TS field = S (0b01) here */
+ msr = vcpu->arch.shregs.msr;
+ /* check for PR=1 and arch 2.06 bit set in PCR */
+ if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206))
+ return 0;
+ /* check for TM disabled in the HFSCR or MSR */
+ if (!(vcpu->arch.hfscr & HFSCR_TM) || !(msr & MSR_TM))
+ return 0;
+ /* L=1 => tresume => set TS to T (0b10) */
+ if (instr & (1 << 21))
+ vcpu->arch.shregs.msr = (msr & ~MSR_TS_MASK) | MSR_TS_T;
+ /* Set CR0 to 0b0010 */
+ vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
+ 0x20000000;
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * This is called when we are returning to a guest in TM transactional
+ * state. We roll the guest state back to the checkpointed state.
+ */
+void kvmhv_emulate_tm_rollback(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.shregs.msr &= ~MSR_TS_MASK; /* go to N state */
+ vcpu->arch.regs.nip = vcpu->arch.tfhar;
+ copy_from_checkpoint(vcpu);
+ vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) | 0xa0000000;
+}
diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
new file mode 100644
index 000000000000..03f8c34fa0a2
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
@@ -0,0 +1,1222 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Secure pages management: Migration of pages between normal and secure
+ * memory of KVM guests.
+ *
+ * Copyright 2018 Bharata B Rao, IBM Corp. <bharata@linux.ibm.com>
+ */
+
+/*
+ * A pseries guest can be run as secure guest on Ultravisor-enabled
+ * POWER platforms. On such platforms, this driver will be used to manage
+ * the movement of guest pages between the normal memory managed by
+ * hypervisor (HV) and secure memory managed by Ultravisor (UV).
+ *
+ * The page-in or page-out requests from UV will come to HV as hcalls and
+ * HV will call back into UV via ultracalls to satisfy these page requests.
+ *
+ * Private ZONE_DEVICE memory equal to the amount of secure memory
+ * available in the platform for running secure guests is hotplugged.
+ * Whenever a page belonging to the guest becomes secure, a page from this
+ * private device memory is used to represent and track that secure page
+ * on the HV side. Some pages (like virtio buffers, VPA pages etc) are
+ * shared between UV and HV. However such pages aren't represented by
+ * device private memory and mappings to shared memory exist in both
+ * UV and HV page tables.
+ */
+
+/*
+ * Notes on locking
+ *
+ * kvm->arch.uvmem_lock is a per-guest lock that prevents concurrent
+ * page-in and page-out requests for the same GPA. Concurrent accesses
+ * can either come via UV (guest vCPUs requesting for same page)
+ * or when HV and guest simultaneously access the same page.
+ * This mutex serializes the migration of page from HV(normal) to
+ * UV(secure) and vice versa. So the serialization points are around
+ * migrate_vma routines and page-in/out routines.
+ *
+ * Per-guest mutex comes with a cost though. Mainly it serializes the
+ * fault path as page-out can occur when HV faults on accessing secure
+ * guest pages. Currently UV issues page-in requests for all the guest
+ * PFNs one at a time during early boot (UV_ESM uvcall), so this is
+ * not a cause for concern. Also currently the number of page-outs caused
+ * by HV touching secure pages is very very low. If an when UV supports
+ * overcommitting, then we might see concurrent guest driven page-outs.
+ *
+ * Locking order
+ *
+ * 1. kvm->srcu - Protects KVM memslots
+ * 2. kvm->mm->mmap_lock - find_vma, migrate_vma_pages and helpers, ksm_madvise
+ * 3. kvm->arch.uvmem_lock - protects read/writes to uvmem slots thus acting
+ * as sync-points for page-in/out
+ */
+
+/*
+ * Notes on page size
+ *
+ * Currently UV uses 2MB mappings internally, but will issue H_SVM_PAGE_IN
+ * and H_SVM_PAGE_OUT hcalls in PAGE_SIZE(64K) granularity. HV tracks
+ * secure GPAs at 64K page size and maintains one device PFN for each
+ * 64K secure GPA. UV_PAGE_IN and UV_PAGE_OUT calls by HV are also issued
+ * for 64K page at a time.
+ *
+ * HV faulting on secure pages: When HV touches any secure page, it
+ * faults and issues a UV_PAGE_OUT request with 64K page size. Currently
+ * UV splits and remaps the 2MB page if necessary and copies out the
+ * required 64K page contents.
+ *
+ * Shared pages: Whenever guest shares a secure page, UV will split and
+ * remap the 2MB page if required and issue H_SVM_PAGE_IN with 64K page size.
+ *
+ * HV invalidating a page: When a regular page belonging to secure
+ * guest gets unmapped, HV informs UV with UV_PAGE_INVAL of 64K
+ * page size. Using 64K page size is correct here because any non-secure
+ * page will essentially be of 64K page size. Splitting by UV during sharing
+ * and page-out ensures this.
+ *
+ * Page fault handling: When HV handles page fault of a page belonging
+ * to secure guest, it sends that to UV with a 64K UV_PAGE_IN request.
+ * Using 64K size is correct here too as UV would have split the 2MB page
+ * into 64k mappings and would have done page-outs earlier.
+ *
+ * In summary, the current secure pages handling code in HV assumes
+ * 64K page size and in fact fails any page-in/page-out requests of
+ * non-64K size upfront. If and when UV starts supporting multiple
+ * page-sizes, we need to break this assumption.
+ */
+
+#include <linux/pagemap.h>
+#include <linux/migrate.h>
+#include <linux/kvm_host.h>
+#include <linux/ksm.h>
+#include <linux/of.h>
+#include <linux/memremap.h>
+#include <asm/ultravisor.h>
+#include <asm/mman.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s_uvmem.h>
+
+static struct dev_pagemap kvmppc_uvmem_pgmap;
+static unsigned long *kvmppc_uvmem_bitmap;
+static DEFINE_SPINLOCK(kvmppc_uvmem_bitmap_lock);
+
+/*
+ * States of a GFN
+ * ---------------
+ * The GFN can be in one of the following states.
+ *
+ * (a) Secure - The GFN is secure. The GFN is associated with
+ * a Secure VM, the contents of the GFN is not accessible
+ * to the Hypervisor. This GFN can be backed by a secure-PFN,
+ * or can be backed by a normal-PFN with contents encrypted.
+ * The former is true when the GFN is paged-in into the
+ * ultravisor. The latter is true when the GFN is paged-out
+ * of the ultravisor.
+ *
+ * (b) Shared - The GFN is shared. The GFN is associated with a
+ * a secure VM. The contents of the GFN is accessible to
+ * Hypervisor. This GFN is backed by a normal-PFN and its
+ * content is un-encrypted.
+ *
+ * (c) Normal - The GFN is a normal. The GFN is associated with
+ * a normal VM. The contents of the GFN is accessible to
+ * the Hypervisor. Its content is never encrypted.
+ *
+ * States of a VM.
+ * ---------------
+ *
+ * Normal VM: A VM whose contents are always accessible to
+ * the hypervisor. All its GFNs are normal-GFNs.
+ *
+ * Secure VM: A VM whose contents are not accessible to the
+ * hypervisor without the VM's consent. Its GFNs are
+ * either Shared-GFN or Secure-GFNs.
+ *
+ * Transient VM: A Normal VM that is transitioning to secure VM.
+ * The transition starts on successful return of
+ * H_SVM_INIT_START, and ends on successful return
+ * of H_SVM_INIT_DONE. This transient VM, can have GFNs
+ * in any of the three states; i.e Secure-GFN, Shared-GFN,
+ * and Normal-GFN. The VM never executes in this state
+ * in supervisor-mode.
+ *
+ * Memory slot State.
+ * -----------------------------
+ * The state of a memory slot mirrors the state of the
+ * VM the memory slot is associated with.
+ *
+ * VM State transition.
+ * --------------------
+ *
+ * A VM always starts in Normal Mode.
+ *
+ * H_SVM_INIT_START moves the VM into transient state. During this
+ * time the Ultravisor may request some of its GFNs to be shared or
+ * secured. So its GFNs can be in one of the three GFN states.
+ *
+ * H_SVM_INIT_DONE moves the VM entirely from transient state to
+ * secure-state. At this point any left-over normal-GFNs are
+ * transitioned to Secure-GFN.
+ *
+ * H_SVM_INIT_ABORT moves the transient VM back to normal VM.
+ * All its GFNs are moved to Normal-GFNs.
+ *
+ * UV_TERMINATE transitions the secure-VM back to normal-VM. All
+ * the secure-GFN and shared-GFNs are tranistioned to normal-GFN
+ * Note: The contents of the normal-GFN is undefined at this point.
+ *
+ * GFN state implementation:
+ * -------------------------
+ *
+ * Secure GFN is associated with a secure-PFN; also called uvmem_pfn,
+ * when the GFN is paged-in. Its pfn[] has KVMPPC_GFN_UVMEM_PFN flag
+ * set, and contains the value of the secure-PFN.
+ * It is associated with a normal-PFN; also called mem_pfn, when
+ * the GFN is pagedout. Its pfn[] has KVMPPC_GFN_MEM_PFN flag set.
+ * The value of the normal-PFN is not tracked.
+ *
+ * Shared GFN is associated with a normal-PFN. Its pfn[] has
+ * KVMPPC_UVMEM_SHARED_PFN flag set. The value of the normal-PFN
+ * is not tracked.
+ *
+ * Normal GFN is associated with normal-PFN. Its pfn[] has
+ * no flag set. The value of the normal-PFN is not tracked.
+ *
+ * Life cycle of a GFN
+ * --------------------
+ *
+ * --------------------------------------------------------------
+ * | | Share | Unshare | SVM |H_SVM_INIT_DONE|
+ * | |operation |operation | abort/ | |
+ * | | | | terminate | |
+ * -------------------------------------------------------------
+ * | | | | | |
+ * | Secure | Shared | Secure |Normal |Secure |
+ * | | | | | |
+ * | Shared | Shared | Secure |Normal |Shared |
+ * | | | | | |
+ * | Normal | Shared | Secure |Normal |Secure |
+ * --------------------------------------------------------------
+ *
+ * Life cycle of a VM
+ * --------------------
+ *
+ * --------------------------------------------------------------------
+ * | | start | H_SVM_ |H_SVM_ |H_SVM_ |UV_SVM_ |
+ * | | VM |INIT_START|INIT_DONE|INIT_ABORT |TERMINATE |
+ * | | | | | | |
+ * --------- ----------------------------------------------------------
+ * | | | | | | |
+ * | Normal | Normal | Transient|Error |Error |Normal |
+ * | | | | | | |
+ * | Secure | Error | Error |Error |Error |Normal |
+ * | | | | | | |
+ * |Transient| N/A | Error |Secure |Normal |Normal |
+ * --------------------------------------------------------------------
+ */
+
+#define KVMPPC_GFN_UVMEM_PFN (1UL << 63)
+#define KVMPPC_GFN_MEM_PFN (1UL << 62)
+#define KVMPPC_GFN_SHARED (1UL << 61)
+#define KVMPPC_GFN_SECURE (KVMPPC_GFN_UVMEM_PFN | KVMPPC_GFN_MEM_PFN)
+#define KVMPPC_GFN_FLAG_MASK (KVMPPC_GFN_SECURE | KVMPPC_GFN_SHARED)
+#define KVMPPC_GFN_PFN_MASK (~KVMPPC_GFN_FLAG_MASK)
+
+struct kvmppc_uvmem_slot {
+ struct list_head list;
+ unsigned long nr_pfns;
+ unsigned long base_pfn;
+ unsigned long *pfns;
+};
+struct kvmppc_uvmem_page_pvt {
+ struct kvm *kvm;
+ unsigned long gpa;
+ bool skip_page_out;
+ bool remove_gfn;
+};
+
+bool kvmppc_uvmem_available(void)
+{
+ /*
+ * If kvmppc_uvmem_bitmap != NULL, then there is an ultravisor
+ * and our data structures have been initialized successfully.
+ */
+ return !!kvmppc_uvmem_bitmap;
+}
+
+int kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot)
+{
+ struct kvmppc_uvmem_slot *p;
+
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+ p->pfns = vcalloc(slot->npages, sizeof(*p->pfns));
+ if (!p->pfns) {
+ kfree(p);
+ return -ENOMEM;
+ }
+ p->nr_pfns = slot->npages;
+ p->base_pfn = slot->base_gfn;
+
+ mutex_lock(&kvm->arch.uvmem_lock);
+ list_add(&p->list, &kvm->arch.uvmem_pfns);
+ mutex_unlock(&kvm->arch.uvmem_lock);
+
+ return 0;
+}
+
+/*
+ * All device PFNs are already released by the time we come here.
+ */
+void kvmppc_uvmem_slot_free(struct kvm *kvm, const struct kvm_memory_slot *slot)
+{
+ struct kvmppc_uvmem_slot *p, *next;
+
+ mutex_lock(&kvm->arch.uvmem_lock);
+ list_for_each_entry_safe(p, next, &kvm->arch.uvmem_pfns, list) {
+ if (p->base_pfn == slot->base_gfn) {
+ vfree(p->pfns);
+ list_del(&p->list);
+ kfree(p);
+ break;
+ }
+ }
+ mutex_unlock(&kvm->arch.uvmem_lock);
+}
+
+static void kvmppc_mark_gfn(unsigned long gfn, struct kvm *kvm,
+ unsigned long flag, unsigned long uvmem_pfn)
+{
+ struct kvmppc_uvmem_slot *p;
+
+ list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) {
+ if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
+ unsigned long index = gfn - p->base_pfn;
+
+ if (flag == KVMPPC_GFN_UVMEM_PFN)
+ p->pfns[index] = uvmem_pfn | flag;
+ else
+ p->pfns[index] = flag;
+ return;
+ }
+ }
+}
+
+/* mark the GFN as secure-GFN associated with @uvmem pfn device-PFN. */
+static void kvmppc_gfn_secure_uvmem_pfn(unsigned long gfn,
+ unsigned long uvmem_pfn, struct kvm *kvm)
+{
+ kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_UVMEM_PFN, uvmem_pfn);
+}
+
+/* mark the GFN as secure-GFN associated with a memory-PFN. */
+static void kvmppc_gfn_secure_mem_pfn(unsigned long gfn, struct kvm *kvm)
+{
+ kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_MEM_PFN, 0);
+}
+
+/* mark the GFN as a shared GFN. */
+static void kvmppc_gfn_shared(unsigned long gfn, struct kvm *kvm)
+{
+ kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_SHARED, 0);
+}
+
+/* mark the GFN as a non-existent GFN. */
+static void kvmppc_gfn_remove(unsigned long gfn, struct kvm *kvm)
+{
+ kvmppc_mark_gfn(gfn, kvm, 0, 0);
+}
+
+/* return true, if the GFN is a secure-GFN backed by a secure-PFN */
+static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm,
+ unsigned long *uvmem_pfn)
+{
+ struct kvmppc_uvmem_slot *p;
+
+ list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) {
+ if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
+ unsigned long index = gfn - p->base_pfn;
+
+ if (p->pfns[index] & KVMPPC_GFN_UVMEM_PFN) {
+ if (uvmem_pfn)
+ *uvmem_pfn = p->pfns[index] &
+ KVMPPC_GFN_PFN_MASK;
+ return true;
+ } else
+ return false;
+ }
+ }
+ return false;
+}
+
+/*
+ * starting from *gfn search for the next available GFN that is not yet
+ * transitioned to a secure GFN. return the value of that GFN in *gfn. If a
+ * GFN is found, return true, else return false
+ *
+ * Must be called with kvm->arch.uvmem_lock held.
+ */
+static bool kvmppc_next_nontransitioned_gfn(const struct kvm_memory_slot *memslot,
+ struct kvm *kvm, unsigned long *gfn)
+{
+ struct kvmppc_uvmem_slot *p = NULL, *iter;
+ bool ret = false;
+ unsigned long i;
+
+ list_for_each_entry(iter, &kvm->arch.uvmem_pfns, list)
+ if (*gfn >= iter->base_pfn && *gfn < iter->base_pfn + iter->nr_pfns) {
+ p = iter;
+ break;
+ }
+ if (!p)
+ return ret;
+ /*
+ * The code below assumes, one to one correspondence between
+ * kvmppc_uvmem_slot and memslot.
+ */
+ for (i = *gfn; i < p->base_pfn + p->nr_pfns; i++) {
+ unsigned long index = i - p->base_pfn;
+
+ if (!(p->pfns[index] & KVMPPC_GFN_FLAG_MASK)) {
+ *gfn = i;
+ ret = true;
+ break;
+ }
+ }
+ return ret;
+}
+
+static int kvmppc_memslot_page_merge(struct kvm *kvm,
+ const struct kvm_memory_slot *memslot, bool merge)
+{
+ unsigned long gfn = memslot->base_gfn;
+ unsigned long end, start = gfn_to_hva(kvm, gfn);
+ vm_flags_t vm_flags;
+ int ret = 0;
+ struct vm_area_struct *vma;
+ int merge_flag = (merge) ? MADV_MERGEABLE : MADV_UNMERGEABLE;
+
+ if (kvm_is_error_hva(start))
+ return H_STATE;
+
+ end = start + (memslot->npages << PAGE_SHIFT);
+
+ mmap_write_lock(kvm->mm);
+ do {
+ vma = find_vma_intersection(kvm->mm, start, end);
+ if (!vma) {
+ ret = H_STATE;
+ break;
+ }
+ vma_start_write(vma);
+ /* Copy vm_flags to avoid partial modifications in ksm_madvise */
+ vm_flags = vma->vm_flags;
+ ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
+ merge_flag, &vm_flags);
+ if (ret) {
+ ret = H_STATE;
+ break;
+ }
+ vm_flags_reset(vma, vm_flags);
+ start = vma->vm_end;
+ } while (end > vma->vm_end);
+
+ mmap_write_unlock(kvm->mm);
+ return ret;
+}
+
+static void __kvmppc_uvmem_memslot_delete(struct kvm *kvm,
+ const struct kvm_memory_slot *memslot)
+{
+ uv_unregister_mem_slot(kvm->arch.lpid, memslot->id);
+ kvmppc_uvmem_slot_free(kvm, memslot);
+ kvmppc_memslot_page_merge(kvm, memslot, true);
+}
+
+static int __kvmppc_uvmem_memslot_create(struct kvm *kvm,
+ const struct kvm_memory_slot *memslot)
+{
+ int ret = H_PARAMETER;
+
+ if (kvmppc_memslot_page_merge(kvm, memslot, false))
+ return ret;
+
+ if (kvmppc_uvmem_slot_init(kvm, memslot))
+ goto out1;
+
+ ret = uv_register_mem_slot(kvm->arch.lpid,
+ memslot->base_gfn << PAGE_SHIFT,
+ memslot->npages * PAGE_SIZE,
+ 0, memslot->id);
+ if (ret < 0) {
+ ret = H_PARAMETER;
+ goto out;
+ }
+ return 0;
+out:
+ kvmppc_uvmem_slot_free(kvm, memslot);
+out1:
+ kvmppc_memslot_page_merge(kvm, memslot, true);
+ return ret;
+}
+
+unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
+{
+ struct kvm_memslots *slots;
+ struct kvm_memory_slot *memslot, *m;
+ int ret = H_SUCCESS;
+ int srcu_idx, bkt;
+
+ kvm->arch.secure_guest = KVMPPC_SECURE_INIT_START;
+
+ if (!kvmppc_uvmem_bitmap)
+ return H_UNSUPPORTED;
+
+ /* Only radix guests can be secure guests */
+ if (!kvm_is_radix(kvm))
+ return H_UNSUPPORTED;
+
+ /* NAK the transition to secure if not enabled */
+ if (!kvm->arch.svm_enabled)
+ return H_AUTHORITY;
+
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+
+ /* register the memslot */
+ slots = kvm_memslots(kvm);
+ kvm_for_each_memslot(memslot, bkt, slots) {
+ ret = __kvmppc_uvmem_memslot_create(kvm, memslot);
+ if (ret)
+ break;
+ }
+
+ if (ret) {
+ slots = kvm_memslots(kvm);
+ kvm_for_each_memslot(m, bkt, slots) {
+ if (m == memslot)
+ break;
+ __kvmppc_uvmem_memslot_delete(kvm, memslot);
+ }
+ }
+
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ return ret;
+}
+
+/*
+ * Provision a new page on HV side and copy over the contents
+ * from secure memory using UV_PAGE_OUT uvcall.
+ * Caller must held kvm->arch.uvmem_lock.
+ */
+static int __kvmppc_svm_page_out(struct vm_area_struct *vma,
+ unsigned long start,
+ unsigned long end, unsigned long page_shift,
+ struct kvm *kvm, unsigned long gpa, struct page *fault_page)
+{
+ unsigned long src_pfn, dst_pfn = 0;
+ struct migrate_vma mig = { 0 };
+ struct page *dpage, *spage;
+ struct kvmppc_uvmem_page_pvt *pvt;
+ unsigned long pfn;
+ int ret = U_SUCCESS;
+
+ memset(&mig, 0, sizeof(mig));
+ mig.vma = vma;
+ mig.start = start;
+ mig.end = end;
+ mig.src = &src_pfn;
+ mig.dst = &dst_pfn;
+ mig.pgmap_owner = &kvmppc_uvmem_pgmap;
+ mig.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
+ mig.fault_page = fault_page;
+
+ /* The requested page is already paged-out, nothing to do */
+ if (!kvmppc_gfn_is_uvmem_pfn(gpa >> page_shift, kvm, NULL))
+ return ret;
+
+ ret = migrate_vma_setup(&mig);
+ if (ret)
+ return -1;
+
+ spage = migrate_pfn_to_page(*mig.src);
+ if (!spage || !(*mig.src & MIGRATE_PFN_MIGRATE))
+ goto out_finalize;
+
+ if (!is_zone_device_page(spage))
+ goto out_finalize;
+
+ dpage = alloc_page_vma(GFP_HIGHUSER, vma, start);
+ if (!dpage) {
+ ret = -1;
+ goto out_finalize;
+ }
+
+ lock_page(dpage);
+ pvt = spage->zone_device_data;
+ pfn = page_to_pfn(dpage);
+
+ /*
+ * This function is used in two cases:
+ * - When HV touches a secure page, for which we do UV_PAGE_OUT
+ * - When a secure page is converted to shared page, we *get*
+ * the page to essentially unmap the device page. In this
+ * case we skip page-out.
+ */
+ if (!pvt->skip_page_out)
+ ret = uv_page_out(kvm->arch.lpid, pfn << page_shift,
+ gpa, 0, page_shift);
+
+ if (ret == U_SUCCESS)
+ *mig.dst = migrate_pfn(pfn);
+ else {
+ unlock_page(dpage);
+ __free_page(dpage);
+ goto out_finalize;
+ }
+
+ migrate_vma_pages(&mig);
+
+out_finalize:
+ migrate_vma_finalize(&mig);
+ return ret;
+}
+
+static inline int kvmppc_svm_page_out(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end,
+ unsigned long page_shift,
+ struct kvm *kvm, unsigned long gpa,
+ struct page *fault_page)
+{
+ int ret;
+
+ mutex_lock(&kvm->arch.uvmem_lock);
+ ret = __kvmppc_svm_page_out(vma, start, end, page_shift, kvm, gpa,
+ fault_page);
+ mutex_unlock(&kvm->arch.uvmem_lock);
+
+ return ret;
+}
+
+/*
+ * Drop device pages that we maintain for the secure guest
+ *
+ * We first mark the pages to be skipped from UV_PAGE_OUT when there
+ * is HV side fault on these pages. Next we *get* these pages, forcing
+ * fault on them, do fault time migration to replace the device PTEs in
+ * QEMU page table with normal PTEs from newly allocated pages.
+ */
+void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *slot,
+ struct kvm *kvm, bool skip_page_out)
+{
+ int i;
+ struct kvmppc_uvmem_page_pvt *pvt;
+ struct page *uvmem_page;
+ struct vm_area_struct *vma = NULL;
+ unsigned long uvmem_pfn, gfn;
+ unsigned long addr;
+
+ mmap_read_lock(kvm->mm);
+
+ addr = slot->userspace_addr;
+
+ gfn = slot->base_gfn;
+ for (i = slot->npages; i; --i, ++gfn, addr += PAGE_SIZE) {
+
+ /* Fetch the VMA if addr is not in the latest fetched one */
+ if (!vma || addr >= vma->vm_end) {
+ vma = vma_lookup(kvm->mm, addr);
+ if (!vma) {
+ pr_err("Can't find VMA for gfn:0x%lx\n", gfn);
+ break;
+ }
+ }
+
+ mutex_lock(&kvm->arch.uvmem_lock);
+
+ if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) {
+ uvmem_page = pfn_to_page(uvmem_pfn);
+ pvt = uvmem_page->zone_device_data;
+ pvt->skip_page_out = skip_page_out;
+ pvt->remove_gfn = true;
+
+ if (__kvmppc_svm_page_out(vma, addr, addr + PAGE_SIZE,
+ PAGE_SHIFT, kvm, pvt->gpa, NULL))
+ pr_err("Can't page out gpa:0x%lx addr:0x%lx\n",
+ pvt->gpa, addr);
+ } else {
+ /* Remove the shared flag if any */
+ kvmppc_gfn_remove(gfn, kvm);
+ }
+
+ mutex_unlock(&kvm->arch.uvmem_lock);
+ }
+
+ mmap_read_unlock(kvm->mm);
+}
+
+unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm)
+{
+ int srcu_idx, bkt;
+ struct kvm_memory_slot *memslot;
+
+ /*
+ * Expect to be called only after INIT_START and before INIT_DONE.
+ * If INIT_DONE was completed, use normal VM termination sequence.
+ */
+ if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
+ return H_UNSUPPORTED;
+
+ if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
+ return H_STATE;
+
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+
+ kvm_for_each_memslot(memslot, bkt, kvm_memslots(kvm))
+ kvmppc_uvmem_drop_pages(memslot, kvm, false);
+
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+
+ kvm->arch.secure_guest = 0;
+ uv_svm_terminate(kvm->arch.lpid);
+
+ return H_PARAMETER;
+}
+
+/*
+ * Get a free device PFN from the pool
+ *
+ * Called when a normal page is moved to secure memory (UV_PAGE_IN). Device
+ * PFN will be used to keep track of the secure page on HV side.
+ *
+ * Called with kvm->arch.uvmem_lock held
+ */
+static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm)
+{
+ struct page *dpage = NULL;
+ unsigned long bit, uvmem_pfn;
+ struct kvmppc_uvmem_page_pvt *pvt;
+ unsigned long pfn_last, pfn_first;
+
+ pfn_first = kvmppc_uvmem_pgmap.range.start >> PAGE_SHIFT;
+ pfn_last = pfn_first +
+ (range_len(&kvmppc_uvmem_pgmap.range) >> PAGE_SHIFT);
+
+ spin_lock(&kvmppc_uvmem_bitmap_lock);
+ bit = find_first_zero_bit(kvmppc_uvmem_bitmap,
+ pfn_last - pfn_first);
+ if (bit >= (pfn_last - pfn_first))
+ goto out;
+ bitmap_set(kvmppc_uvmem_bitmap, bit, 1);
+ spin_unlock(&kvmppc_uvmem_bitmap_lock);
+
+ pvt = kzalloc(sizeof(*pvt), GFP_KERNEL);
+ if (!pvt)
+ goto out_clear;
+
+ uvmem_pfn = bit + pfn_first;
+ kvmppc_gfn_secure_uvmem_pfn(gpa >> PAGE_SHIFT, uvmem_pfn, kvm);
+
+ pvt->gpa = gpa;
+ pvt->kvm = kvm;
+
+ dpage = pfn_to_page(uvmem_pfn);
+ dpage->zone_device_data = pvt;
+ zone_device_page_init(dpage);
+ return dpage;
+out_clear:
+ spin_lock(&kvmppc_uvmem_bitmap_lock);
+ bitmap_clear(kvmppc_uvmem_bitmap, bit, 1);
+out:
+ spin_unlock(&kvmppc_uvmem_bitmap_lock);
+ return NULL;
+}
+
+/*
+ * Alloc a PFN from private device memory pool. If @pagein is true,
+ * copy page from normal memory to secure memory using UV_PAGE_IN uvcall.
+ */
+static int kvmppc_svm_page_in(struct vm_area_struct *vma,
+ unsigned long start,
+ unsigned long end, unsigned long gpa, struct kvm *kvm,
+ unsigned long page_shift,
+ bool pagein)
+{
+ unsigned long src_pfn, dst_pfn = 0;
+ struct migrate_vma mig = { 0 };
+ struct page *spage;
+ unsigned long pfn;
+ struct page *dpage;
+ int ret = 0;
+
+ memset(&mig, 0, sizeof(mig));
+ mig.vma = vma;
+ mig.start = start;
+ mig.end = end;
+ mig.src = &src_pfn;
+ mig.dst = &dst_pfn;
+ mig.flags = MIGRATE_VMA_SELECT_SYSTEM;
+
+ ret = migrate_vma_setup(&mig);
+ if (ret)
+ return ret;
+
+ if (!(*mig.src & MIGRATE_PFN_MIGRATE)) {
+ ret = -1;
+ goto out_finalize;
+ }
+
+ dpage = kvmppc_uvmem_get_page(gpa, kvm);
+ if (!dpage) {
+ ret = -1;
+ goto out_finalize;
+ }
+
+ if (pagein) {
+ pfn = *mig.src >> MIGRATE_PFN_SHIFT;
+ spage = migrate_pfn_to_page(*mig.src);
+ if (spage) {
+ ret = uv_page_in(kvm->arch.lpid, pfn << page_shift,
+ gpa, 0, page_shift);
+ if (ret)
+ goto out_finalize;
+ }
+ }
+
+ *mig.dst = migrate_pfn(page_to_pfn(dpage));
+ migrate_vma_pages(&mig);
+out_finalize:
+ migrate_vma_finalize(&mig);
+ return ret;
+}
+
+static int kvmppc_uv_migrate_mem_slot(struct kvm *kvm,
+ const struct kvm_memory_slot *memslot)
+{
+ unsigned long gfn = memslot->base_gfn;
+ struct vm_area_struct *vma;
+ unsigned long start, end;
+ int ret = 0;
+
+ mmap_read_lock(kvm->mm);
+ mutex_lock(&kvm->arch.uvmem_lock);
+ while (kvmppc_next_nontransitioned_gfn(memslot, kvm, &gfn)) {
+ ret = H_STATE;
+ start = gfn_to_hva(kvm, gfn);
+ if (kvm_is_error_hva(start))
+ break;
+
+ end = start + (1UL << PAGE_SHIFT);
+ vma = find_vma_intersection(kvm->mm, start, end);
+ if (!vma || vma->vm_start > start || vma->vm_end < end)
+ break;
+
+ ret = kvmppc_svm_page_in(vma, start, end,
+ (gfn << PAGE_SHIFT), kvm, PAGE_SHIFT, false);
+ if (ret) {
+ ret = H_STATE;
+ break;
+ }
+
+ /* relinquish the cpu if needed */
+ cond_resched();
+ }
+ mutex_unlock(&kvm->arch.uvmem_lock);
+ mmap_read_unlock(kvm->mm);
+ return ret;
+}
+
+unsigned long kvmppc_h_svm_init_done(struct kvm *kvm)
+{
+ struct kvm_memslots *slots;
+ struct kvm_memory_slot *memslot;
+ int srcu_idx, bkt;
+ long ret = H_SUCCESS;
+
+ if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
+ return H_UNSUPPORTED;
+
+ /* migrate any unmoved normal pfn to device pfns*/
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ slots = kvm_memslots(kvm);
+ kvm_for_each_memslot(memslot, bkt, slots) {
+ ret = kvmppc_uv_migrate_mem_slot(kvm, memslot);
+ if (ret) {
+ /*
+ * The pages will remain transitioned.
+ * Its the callers responsibility to
+ * terminate the VM, which will undo
+ * all state of the VM. Till then
+ * this VM is in a erroneous state.
+ * Its KVMPPC_SECURE_INIT_DONE will
+ * remain unset.
+ */
+ ret = H_STATE;
+ goto out;
+ }
+ }
+
+ kvm->arch.secure_guest |= KVMPPC_SECURE_INIT_DONE;
+ pr_info("LPID %lld went secure\n", kvm->arch.lpid);
+
+out:
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ return ret;
+}
+
+/*
+ * Shares the page with HV, thus making it a normal page.
+ *
+ * - If the page is already secure, then provision a new page and share
+ * - If the page is a normal page, share the existing page
+ *
+ * In the former case, uses dev_pagemap_ops.migrate_to_ram handler
+ * to unmap the device page from QEMU's page tables.
+ */
+static unsigned long kvmppc_share_page(struct kvm *kvm, unsigned long gpa,
+ unsigned long page_shift)
+{
+
+ int ret = H_PARAMETER;
+ struct page *page, *uvmem_page;
+ struct kvmppc_uvmem_page_pvt *pvt;
+ unsigned long gfn = gpa >> page_shift;
+ int srcu_idx;
+ unsigned long uvmem_pfn;
+
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ mutex_lock(&kvm->arch.uvmem_lock);
+ if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) {
+ uvmem_page = pfn_to_page(uvmem_pfn);
+ pvt = uvmem_page->zone_device_data;
+ pvt->skip_page_out = true;
+ /*
+ * do not drop the GFN. It is a valid GFN
+ * that is transitioned to a shared GFN.
+ */
+ pvt->remove_gfn = false;
+ }
+
+retry:
+ mutex_unlock(&kvm->arch.uvmem_lock);
+ page = gfn_to_page(kvm, gfn);
+ if (!page)
+ goto out;
+
+ mutex_lock(&kvm->arch.uvmem_lock);
+ if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) {
+ uvmem_page = pfn_to_page(uvmem_pfn);
+ pvt = uvmem_page->zone_device_data;
+ pvt->skip_page_out = true;
+ pvt->remove_gfn = false; /* it continues to be a valid GFN */
+ kvm_release_page_unused(page);
+ goto retry;
+ }
+
+ if (!uv_page_in(kvm->arch.lpid, page_to_pfn(page) << page_shift, gpa, 0,
+ page_shift)) {
+ kvmppc_gfn_shared(gfn, kvm);
+ ret = H_SUCCESS;
+ }
+ kvm_release_page_clean(page);
+ mutex_unlock(&kvm->arch.uvmem_lock);
+out:
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ return ret;
+}
+
+/*
+ * H_SVM_PAGE_IN: Move page from normal memory to secure memory.
+ *
+ * H_PAGE_IN_SHARED flag makes the page shared which means that the same
+ * memory in is visible from both UV and HV.
+ */
+unsigned long kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa,
+ unsigned long flags,
+ unsigned long page_shift)
+{
+ unsigned long start, end;
+ struct vm_area_struct *vma;
+ int srcu_idx;
+ unsigned long gfn = gpa >> page_shift;
+ int ret;
+
+ if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
+ return H_UNSUPPORTED;
+
+ if (page_shift != PAGE_SHIFT)
+ return H_P3;
+
+ if (flags & ~H_PAGE_IN_SHARED)
+ return H_P2;
+
+ if (flags & H_PAGE_IN_SHARED)
+ return kvmppc_share_page(kvm, gpa, page_shift);
+
+ ret = H_PARAMETER;
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ mmap_read_lock(kvm->mm);
+
+ start = gfn_to_hva(kvm, gfn);
+ if (kvm_is_error_hva(start))
+ goto out;
+
+ mutex_lock(&kvm->arch.uvmem_lock);
+ /* Fail the page-in request of an already paged-in page */
+ if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, NULL))
+ goto out_unlock;
+
+ end = start + (1UL << page_shift);
+ vma = find_vma_intersection(kvm->mm, start, end);
+ if (!vma || vma->vm_start > start || vma->vm_end < end)
+ goto out_unlock;
+
+ if (kvmppc_svm_page_in(vma, start, end, gpa, kvm, page_shift,
+ true))
+ goto out_unlock;
+
+ ret = H_SUCCESS;
+
+out_unlock:
+ mutex_unlock(&kvm->arch.uvmem_lock);
+out:
+ mmap_read_unlock(kvm->mm);
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ return ret;
+}
+
+
+/*
+ * Fault handler callback that gets called when HV touches any page that
+ * has been moved to secure memory, we ask UV to give back the page by
+ * issuing UV_PAGE_OUT uvcall.
+ *
+ * This eventually results in dropping of device PFN and the newly
+ * provisioned page/PFN gets populated in QEMU page tables.
+ */
+static vm_fault_t kvmppc_uvmem_migrate_to_ram(struct vm_fault *vmf)
+{
+ struct kvmppc_uvmem_page_pvt *pvt = vmf->page->zone_device_data;
+
+ if (kvmppc_svm_page_out(vmf->vma, vmf->address,
+ vmf->address + PAGE_SIZE, PAGE_SHIFT,
+ pvt->kvm, pvt->gpa, vmf->page))
+ return VM_FAULT_SIGBUS;
+ else
+ return 0;
+}
+
+/*
+ * Release the device PFN back to the pool
+ *
+ * Gets called when secure GFN tranistions from a secure-PFN
+ * to a normal PFN during H_SVM_PAGE_OUT.
+ * Gets called with kvm->arch.uvmem_lock held.
+ */
+static void kvmppc_uvmem_page_free(struct page *page)
+{
+ unsigned long pfn = page_to_pfn(page) -
+ (kvmppc_uvmem_pgmap.range.start >> PAGE_SHIFT);
+ struct kvmppc_uvmem_page_pvt *pvt;
+
+ spin_lock(&kvmppc_uvmem_bitmap_lock);
+ bitmap_clear(kvmppc_uvmem_bitmap, pfn, 1);
+ spin_unlock(&kvmppc_uvmem_bitmap_lock);
+
+ pvt = page->zone_device_data;
+ page->zone_device_data = NULL;
+ if (pvt->remove_gfn)
+ kvmppc_gfn_remove(pvt->gpa >> PAGE_SHIFT, pvt->kvm);
+ else
+ kvmppc_gfn_secure_mem_pfn(pvt->gpa >> PAGE_SHIFT, pvt->kvm);
+ kfree(pvt);
+}
+
+static const struct dev_pagemap_ops kvmppc_uvmem_ops = {
+ .page_free = kvmppc_uvmem_page_free,
+ .migrate_to_ram = kvmppc_uvmem_migrate_to_ram,
+};
+
+/*
+ * H_SVM_PAGE_OUT: Move page from secure memory to normal memory.
+ */
+unsigned long
+kvmppc_h_svm_page_out(struct kvm *kvm, unsigned long gpa,
+ unsigned long flags, unsigned long page_shift)
+{
+ unsigned long gfn = gpa >> page_shift;
+ unsigned long start, end;
+ struct vm_area_struct *vma;
+ int srcu_idx;
+ int ret;
+
+ if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
+ return H_UNSUPPORTED;
+
+ if (page_shift != PAGE_SHIFT)
+ return H_P3;
+
+ if (flags)
+ return H_P2;
+
+ ret = H_PARAMETER;
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ mmap_read_lock(kvm->mm);
+ start = gfn_to_hva(kvm, gfn);
+ if (kvm_is_error_hva(start))
+ goto out;
+
+ end = start + (1UL << page_shift);
+ vma = find_vma_intersection(kvm->mm, start, end);
+ if (!vma || vma->vm_start > start || vma->vm_end < end)
+ goto out;
+
+ if (!kvmppc_svm_page_out(vma, start, end, page_shift, kvm, gpa, NULL))
+ ret = H_SUCCESS;
+out:
+ mmap_read_unlock(kvm->mm);
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ return ret;
+}
+
+int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn)
+{
+ struct page *page;
+ int ret = U_SUCCESS;
+
+ page = gfn_to_page(kvm, gfn);
+ if (!page)
+ return -EFAULT;
+
+ mutex_lock(&kvm->arch.uvmem_lock);
+ if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, NULL))
+ goto out;
+
+ ret = uv_page_in(kvm->arch.lpid, page_to_pfn(page) << PAGE_SHIFT,
+ gfn << PAGE_SHIFT, 0, PAGE_SHIFT);
+out:
+ kvm_release_page_clean(page);
+ mutex_unlock(&kvm->arch.uvmem_lock);
+ return (ret == U_SUCCESS) ? RESUME_GUEST : -EFAULT;
+}
+
+int kvmppc_uvmem_memslot_create(struct kvm *kvm, const struct kvm_memory_slot *new)
+{
+ int ret = __kvmppc_uvmem_memslot_create(kvm, new);
+
+ if (!ret)
+ ret = kvmppc_uv_migrate_mem_slot(kvm, new);
+
+ return ret;
+}
+
+void kvmppc_uvmem_memslot_delete(struct kvm *kvm, const struct kvm_memory_slot *old)
+{
+ __kvmppc_uvmem_memslot_delete(kvm, old);
+}
+
+static u64 kvmppc_get_secmem_size(void)
+{
+ struct device_node *np;
+ int i, len;
+ const __be32 *prop;
+ u64 size = 0;
+
+ /*
+ * First try the new ibm,secure-memory nodes which supersede the
+ * secure-memory-ranges property.
+ * If we found some, no need to read the deprecated ones.
+ */
+ for_each_compatible_node(np, NULL, "ibm,secure-memory") {
+ prop = of_get_property(np, "reg", &len);
+ if (!prop)
+ continue;
+ size += of_read_number(prop + 2, 2);
+ }
+ if (size)
+ return size;
+
+ np = of_find_compatible_node(NULL, NULL, "ibm,uv-firmware");
+ if (!np)
+ goto out;
+
+ prop = of_get_property(np, "secure-memory-ranges", &len);
+ if (!prop)
+ goto out_put;
+
+ for (i = 0; i < len / (sizeof(*prop) * 4); i++)
+ size += of_read_number(prop + (i * 4) + 2, 2);
+
+out_put:
+ of_node_put(np);
+out:
+ return size;
+}
+
+int kvmppc_uvmem_init(void)
+{
+ int ret = 0;
+ unsigned long size;
+ struct resource *res;
+ void *addr;
+ unsigned long pfn_last, pfn_first;
+
+ size = kvmppc_get_secmem_size();
+ if (!size) {
+ /*
+ * Don't fail the initialization of kvm-hv module if
+ * the platform doesn't export ibm,uv-firmware node.
+ * Let normal guests run on such PEF-disabled platform.
+ */
+ pr_info("KVMPPC-UVMEM: No support for secure guests\n");
+ goto out;
+ }
+
+ res = request_free_mem_region(&iomem_resource, size, "kvmppc_uvmem");
+ if (IS_ERR(res)) {
+ ret = PTR_ERR(res);
+ goto out;
+ }
+
+ kvmppc_uvmem_pgmap.type = MEMORY_DEVICE_PRIVATE;
+ kvmppc_uvmem_pgmap.range.start = res->start;
+ kvmppc_uvmem_pgmap.range.end = res->end;
+ kvmppc_uvmem_pgmap.nr_range = 1;
+ kvmppc_uvmem_pgmap.ops = &kvmppc_uvmem_ops;
+ /* just one global instance: */
+ kvmppc_uvmem_pgmap.owner = &kvmppc_uvmem_pgmap;
+ addr = memremap_pages(&kvmppc_uvmem_pgmap, NUMA_NO_NODE);
+ if (IS_ERR(addr)) {
+ ret = PTR_ERR(addr);
+ goto out_free_region;
+ }
+
+ pfn_first = res->start >> PAGE_SHIFT;
+ pfn_last = pfn_first + (resource_size(res) >> PAGE_SHIFT);
+ kvmppc_uvmem_bitmap = bitmap_zalloc(pfn_last - pfn_first, GFP_KERNEL);
+ if (!kvmppc_uvmem_bitmap) {
+ ret = -ENOMEM;
+ goto out_unmap;
+ }
+
+ pr_info("KVMPPC-UVMEM: Secure Memory size 0x%lx\n", size);
+ return ret;
+out_unmap:
+ memunmap_pages(&kvmppc_uvmem_pgmap);
+out_free_region:
+ release_mem_region(res->start, size);
+out:
+ return ret;
+}
+
+void kvmppc_uvmem_free(void)
+{
+ if (!kvmppc_uvmem_bitmap)
+ return;
+
+ memunmap_pages(&kvmppc_uvmem_pgmap);
+ release_mem_region(kvmppc_uvmem_pgmap.range.start,
+ range_len(&kvmppc_uvmem_pgmap.range));
+ bitmap_free(kvmppc_uvmem_bitmap);
+}
diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S
index d044b8b7c69d..f4bec2fc51aa 100644
--- a/arch/powerpc/kvm/book3s_interrupts.S
+++ b/arch/powerpc/kvm/book3s_interrupts.S
@@ -1,16 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright SUSE Linux Products GmbH 2009
*
@@ -23,9 +12,10 @@
#include <asm/page.h>
#include <asm/asm-offsets.h>
#include <asm/exception-64s.h>
+#include <asm/asm-compat.h>
#if defined(CONFIG_PPC_BOOK3S_64)
-#if defined(_CALL_ELF) && _CALL_ELF == 2
+#ifdef CONFIG_PPC64_ELF_ABI_V2
#define FUNC(name) name
#else
#define FUNC(name) GLUE(.,name)
@@ -36,7 +26,7 @@
#define FUNC(name) name
#define GET_SHADOW_VCPU(reg) lwz reg, (THREAD + THREAD_KVM_SVCPU)(r2)
-#endif /* CONFIG_PPC_BOOK3S_XX */
+#endif /* CONFIG_PPC_BOOK3S_64 */
#define VCPU_LOAD_NVGPRS(vcpu) \
PPC_LL r14, VCPU_GPR(R14)(vcpu); \
@@ -65,8 +55,7 @@
****************************************************************************/
/* Registers:
- * r3: kvm_run pointer
- * r4: vcpu pointer
+ * r3: vcpu pointer
*/
_GLOBAL(__kvmppc_vcpu_run)
@@ -78,8 +67,8 @@ kvm_start_entry:
/* Save host state to the stack */
PPC_STLU r1, -SWITCH_FRAME_SIZE(r1)
- /* Save r3 (kvm_run) and r4 (vcpu) */
- SAVE_2GPRS(3, r1)
+ /* Save r3 (vcpu) */
+ SAVE_GPR(3, r1)
/* Save non-volatile registers (r14 - r31) */
SAVE_NVGPRS(r1)
@@ -92,47 +81,46 @@ kvm_start_entry:
PPC_STL r0, _LINK(r1)
/* Load non-volatile guest state from the vcpu */
- VCPU_LOAD_NVGPRS(r4)
+ VCPU_LOAD_NVGPRS(r3)
kvm_start_lightweight:
/* Copy registers into shadow vcpu so we can access them in real mode */
- GET_SHADOW_VCPU(r3)
bl FUNC(kvmppc_copy_to_svcpu)
nop
- REST_GPR(4, r1)
+ REST_GPR(3, r1)
#ifdef CONFIG_PPC_BOOK3S_64
/* Get the dcbz32 flag */
- PPC_LL r3, VCPU_HFLAGS(r4)
- rldicl r3, r3, 0, 63 /* r3 &= 1 */
- stb r3, HSTATE_RESTORE_HID5(r13)
+ PPC_LL r0, VCPU_HFLAGS(r3)
+ rldicl r0, r0, 0, 63 /* r3 &= 1 */
+ stb r0, HSTATE_RESTORE_HID5(r13)
/* Load up guest SPRG3 value, since it's user readable */
- lwz r3, VCPU_SHAREDBE(r4)
- cmpwi r3, 0
- ld r5, VCPU_SHARED(r4)
+ lbz r4, VCPU_SHAREDBE(r3)
+ cmpwi r4, 0
+ ld r5, VCPU_SHARED(r3)
beq sprg3_little_endian
sprg3_big_endian:
#ifdef __BIG_ENDIAN__
- ld r3, VCPU_SHARED_SPRG3(r5)
+ ld r4, VCPU_SHARED_SPRG3(r5)
#else
addi r5, r5, VCPU_SHARED_SPRG3
- ldbrx r3, 0, r5
+ ldbrx r4, 0, r5
#endif
b after_sprg3_load
sprg3_little_endian:
#ifdef __LITTLE_ENDIAN__
- ld r3, VCPU_SHARED_SPRG3(r5)
+ ld r4, VCPU_SHARED_SPRG3(r5)
#else
addi r5, r5, VCPU_SHARED_SPRG3
- ldbrx r3, 0, r5
+ ldbrx r4, 0, r5
#endif
after_sprg3_load:
- mtspr SPRN_SPRG3, r3
+ mtspr SPRN_SPRG3, r4
#endif /* CONFIG_PPC_BOOK3S_64 */
- PPC_LL r4, VCPU_SHADOW_MSR(r4) /* get shadow_msr */
+ PPC_LL r4, VCPU_SHADOW_MSR(r3) /* get shadow_msr */
/* Jump to segment patching handler and into our guest */
bl FUNC(kvmppc_entry_trampoline)
@@ -156,7 +144,7 @@ after_sprg3_load:
*
*/
- PPC_LL r3, GPR4(r1) /* vcpu pointer */
+ PPC_LL r3, GPR3(r1) /* vcpu pointer */
/*
* kvmppc_copy_from_svcpu can clobber volatile registers, save
@@ -165,9 +153,7 @@ after_sprg3_load:
stw r12, VCPU_TRAP(r3)
/* Transfer reg values from shadow vcpu back to vcpu struct */
- /* On 64-bit, interrupts are still off at this point */
- GET_SHADOW_VCPU(r4)
bl FUNC(kvmppc_copy_from_svcpu)
nop
@@ -181,7 +167,7 @@ after_sprg3_load:
#endif /* CONFIG_PPC_BOOK3S_64 */
/* R7 = vcpu */
- PPC_LL r7, GPR4(r1)
+ PPC_LL r7, GPR3(r1)
PPC_STL r14, VCPU_GPR(R14)(r7)
PPC_STL r15, VCPU_GPR(R15)(r7)
@@ -202,11 +188,11 @@ after_sprg3_load:
PPC_STL r30, VCPU_GPR(R30)(r7)
PPC_STL r31, VCPU_GPR(R31)(r7)
- /* Pass the exit number as 3rd argument to kvmppc_handle_exit */
- lwz r5, VCPU_TRAP(r7)
+ /* Pass the exit number as 2nd argument to kvmppc_handle_exit */
+ lwz r4, VCPU_TRAP(r7)
- /* Restore r3 (kvm_run) and r4 (vcpu) */
- REST_2GPRS(3, r1)
+ /* Restore r3 (vcpu) */
+ REST_GPR(3, r1)
bl FUNC(kvmppc_handle_exit_pr)
/* If RESUME_GUEST, get back in the loop */
@@ -235,11 +221,11 @@ kvm_loop_heavyweight:
PPC_LL r4, _LINK(r1)
PPC_STL r4, (PPC_LR_STKOFF + SWITCH_FRAME_SIZE)(r1)
- /* Load vcpu and cpu_run */
- REST_2GPRS(3, r1)
+ /* Load vcpu */
+ REST_GPR(3, r1)
/* Load non-volatile guest state from the vcpu */
- VCPU_LOAD_NVGPRS(r4)
+ VCPU_LOAD_NVGPRS(r3)
/* Jump back into the beginning of this function */
b kvm_start_lightweight
@@ -247,7 +233,7 @@ kvm_loop_heavyweight:
kvm_loop_lightweight:
/* We'll need the vcpu pointer */
- REST_GPR(4, r1)
+ REST_GPR(3, r1)
/* Jump back into the beginning of this function */
b kvm_start_lightweight
diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c
index 5a1ab1250a05..d904e13e069b 100644
--- a/arch/powerpc/kvm/book3s_mmu_hpte.c
+++ b/arch/powerpc/kvm/book3s_mmu_hpte.c
@@ -1,26 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2010 SUSE Linux Products GmbH. All rights reserved.
*
* Authors:
* Alexander Graf <agraf@suse.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include <linux/kvm_host.h>
#include <linux/hash.h>
#include <linux/slab.h>
+#include <linux/rculist.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
@@ -103,12 +92,6 @@ void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
spin_unlock(&vcpu3s->mmu_lock);
}
-static void free_pte_rcu(struct rcu_head *head)
-{
- struct hpte_cache *pte = container_of(head, struct hpte_cache, rcu_head);
- kmem_cache_free(hpte_cache, pte);
-}
-
static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
{
struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
@@ -137,7 +120,7 @@ static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
spin_unlock(&vcpu3s->mmu_lock);
- call_rcu(&pte->rcu_head, free_pte_rcu);
+ kfree_rcu(pte, rcu_head);
}
static void kvmppc_mmu_pte_flush_all(struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c
index bfb8035314e3..bc39c76c9d9f 100644
--- a/arch/powerpc/kvm/book3s_paired_singles.c
+++ b/arch/powerpc/kvm/book3s_paired_singles.c
@@ -1,16 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright Novell Inc 2010
*
@@ -180,7 +169,7 @@ static void kvmppc_inject_pf(struct kvm_vcpu *vcpu, ulong eaddr, bool is_store)
kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE);
}
-static int kvmppc_emulate_fpr_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
+static int kvmppc_emulate_fpr_load(struct kvm_vcpu *vcpu,
int rs, ulong addr, int ls_type)
{
int emulated = EMULATE_FAIL;
@@ -199,7 +188,7 @@ static int kvmppc_emulate_fpr_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
kvmppc_inject_pf(vcpu, addr, false);
goto done_load;
} else if (r == EMULATE_DO_MMIO) {
- emulated = kvmppc_handle_load(run, vcpu, KVM_MMIO_REG_FPR | rs,
+ emulated = kvmppc_handle_load(vcpu, KVM_MMIO_REG_FPR | rs,
len, 1);
goto done_load;
}
@@ -224,7 +213,7 @@ done_load:
return emulated;
}
-static int kvmppc_emulate_fpr_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
+static int kvmppc_emulate_fpr_store(struct kvm_vcpu *vcpu,
int rs, ulong addr, int ls_type)
{
int emulated = EMULATE_FAIL;
@@ -259,7 +248,7 @@ static int kvmppc_emulate_fpr_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
if (r < 0) {
kvmppc_inject_pf(vcpu, addr, true);
} else if (r == EMULATE_DO_MMIO) {
- emulated = kvmppc_handle_store(run, vcpu, val, len, 1);
+ emulated = kvmppc_handle_store(vcpu, val, len, 1);
} else {
emulated = EMULATE_DONE;
}
@@ -270,7 +259,7 @@ static int kvmppc_emulate_fpr_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
return emulated;
}
-static int kvmppc_emulate_psq_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
+static int kvmppc_emulate_psq_load(struct kvm_vcpu *vcpu,
int rs, ulong addr, bool w, int i)
{
int emulated = EMULATE_FAIL;
@@ -290,12 +279,12 @@ static int kvmppc_emulate_psq_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
kvmppc_inject_pf(vcpu, addr, false);
goto done_load;
} else if ((r == EMULATE_DO_MMIO) && w) {
- emulated = kvmppc_handle_load(run, vcpu, KVM_MMIO_REG_FPR | rs,
+ emulated = kvmppc_handle_load(vcpu, KVM_MMIO_REG_FPR | rs,
4, 1);
vcpu->arch.qpr[rs] = tmp[1];
goto done_load;
} else if (r == EMULATE_DO_MMIO) {
- emulated = kvmppc_handle_load(run, vcpu, KVM_MMIO_REG_FQPR | rs,
+ emulated = kvmppc_handle_load(vcpu, KVM_MMIO_REG_FQPR | rs,
8, 1);
goto done_load;
}
@@ -313,7 +302,7 @@ done_load:
return emulated;
}
-static int kvmppc_emulate_psq_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
+static int kvmppc_emulate_psq_store(struct kvm_vcpu *vcpu,
int rs, ulong addr, bool w, int i)
{
int emulated = EMULATE_FAIL;
@@ -329,10 +318,10 @@ static int kvmppc_emulate_psq_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
if (r < 0) {
kvmppc_inject_pf(vcpu, addr, true);
} else if ((r == EMULATE_DO_MMIO) && w) {
- emulated = kvmppc_handle_store(run, vcpu, tmp[0], 4, 1);
+ emulated = kvmppc_handle_store(vcpu, tmp[0], 4, 1);
} else if (r == EMULATE_DO_MMIO) {
u64 val = ((u64)tmp[0] << 32) | tmp[1];
- emulated = kvmppc_handle_store(run, vcpu, val, 8, 1);
+ emulated = kvmppc_handle_store(vcpu, val, 8, 1);
} else {
emulated = EMULATE_DONE;
}
@@ -352,15 +341,7 @@ static inline u32 inst_get_field(u32 inst, int msb, int lsb)
return kvmppc_get_field(inst, msb + 32, lsb + 32);
}
-/*
- * Replaces inst bits with ordering according to spec.
- */
-static inline u32 inst_set_field(u32 inst, int msb, int lsb, int value)
-{
- return kvmppc_set_field(inst, msb + 32, lsb + 32, value);
-}
-
-bool kvmppc_inst_is_paired_single(struct kvm_vcpu *vcpu, u32 inst)
+static bool kvmppc_inst_is_paired_single(struct kvm_vcpu *vcpu, u32 inst)
{
if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE))
return false;
@@ -637,9 +618,10 @@ static int kvmppc_ps_one_in(struct kvm_vcpu *vcpu, bool rc,
return EMULATE_DONE;
}
-int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
+int kvmppc_emulate_paired_single(struct kvm_vcpu *vcpu)
{
u32 inst;
+ ppc_inst_t pinst;
enum emulation_result emulated = EMULATE_DONE;
int ax_rd, ax_ra, ax_rb, ax_rc;
short full_d;
@@ -651,7 +633,8 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
int i;
#endif
- emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst);
+ emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &pinst);
+ inst = ppc_inst_val(pinst);
if (emulated != EMULATE_DONE)
return emulated;
@@ -699,7 +682,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
int i = inst_get_field(inst, 17, 19);
addr += get_d_signext(inst);
- emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i);
+ emulated = kvmppc_emulate_psq_load(vcpu, ax_rd, addr, w, i);
break;
}
case OP_PSQ_LU:
@@ -709,7 +692,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
int i = inst_get_field(inst, 17, 19);
addr += get_d_signext(inst);
- emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i);
+ emulated = kvmppc_emulate_psq_load(vcpu, ax_rd, addr, w, i);
if (emulated == EMULATE_DONE)
kvmppc_set_gpr(vcpu, ax_ra, addr);
@@ -722,7 +705,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
int i = inst_get_field(inst, 17, 19);
addr += get_d_signext(inst);
- emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i);
+ emulated = kvmppc_emulate_psq_store(vcpu, ax_rd, addr, w, i);
break;
}
case OP_PSQ_STU:
@@ -732,7 +715,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
int i = inst_get_field(inst, 17, 19);
addr += get_d_signext(inst);
- emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i);
+ emulated = kvmppc_emulate_psq_store(vcpu, ax_rd, addr, w, i);
if (emulated == EMULATE_DONE)
kvmppc_set_gpr(vcpu, ax_ra, addr);
@@ -752,7 +735,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
int i = inst_get_field(inst, 22, 24);
addr += kvmppc_get_gpr(vcpu, ax_rb);
- emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i);
+ emulated = kvmppc_emulate_psq_load(vcpu, ax_rd, addr, w, i);
break;
}
case OP_4X_PS_CMPO0:
@@ -766,7 +749,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
int i = inst_get_field(inst, 22, 24);
addr += kvmppc_get_gpr(vcpu, ax_rb);
- emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i);
+ emulated = kvmppc_emulate_psq_load(vcpu, ax_rd, addr, w, i);
if (emulated == EMULATE_DONE)
kvmppc_set_gpr(vcpu, ax_ra, addr);
@@ -843,7 +826,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
int i = inst_get_field(inst, 22, 24);
addr += kvmppc_get_gpr(vcpu, ax_rb);
- emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i);
+ emulated = kvmppc_emulate_psq_store(vcpu, ax_rd, addr, w, i);
break;
}
case OP_4XW_PSQ_STUX:
@@ -853,7 +836,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
int i = inst_get_field(inst, 22, 24);
addr += kvmppc_get_gpr(vcpu, ax_rb);
- emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i);
+ emulated = kvmppc_emulate_psq_store(vcpu, ax_rd, addr, w, i);
if (emulated == EMULATE_DONE)
kvmppc_set_gpr(vcpu, ax_ra, addr);
@@ -941,7 +924,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
{
ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d;
- emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr,
+ emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd, addr,
FPU_LS_SINGLE);
break;
}
@@ -949,7 +932,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
{
ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d;
- emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr,
+ emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd, addr,
FPU_LS_SINGLE);
if (emulated == EMULATE_DONE)
@@ -960,7 +943,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
{
ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d;
- emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr,
+ emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd, addr,
FPU_LS_DOUBLE);
break;
}
@@ -968,7 +951,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
{
ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d;
- emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr,
+ emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd, addr,
FPU_LS_DOUBLE);
if (emulated == EMULATE_DONE)
@@ -979,7 +962,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
{
ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d;
- emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr,
+ emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd, addr,
FPU_LS_SINGLE);
break;
}
@@ -987,7 +970,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
{
ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d;
- emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr,
+ emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd, addr,
FPU_LS_SINGLE);
if (emulated == EMULATE_DONE)
@@ -998,7 +981,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
{
ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d;
- emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr,
+ emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd, addr,
FPU_LS_DOUBLE);
break;
}
@@ -1006,7 +989,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
{
ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d;
- emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr,
+ emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd, addr,
FPU_LS_DOUBLE);
if (emulated == EMULATE_DONE)
@@ -1020,7 +1003,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0;
addr += kvmppc_get_gpr(vcpu, ax_rb);
- emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd,
+ emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd,
addr, FPU_LS_SINGLE);
break;
}
@@ -1029,7 +1012,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
ulong addr = kvmppc_get_gpr(vcpu, ax_ra) +
kvmppc_get_gpr(vcpu, ax_rb);
- emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd,
+ emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd,
addr, FPU_LS_SINGLE);
if (emulated == EMULATE_DONE)
@@ -1041,7 +1024,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) +
kvmppc_get_gpr(vcpu, ax_rb);
- emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd,
+ emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd,
addr, FPU_LS_DOUBLE);
break;
}
@@ -1050,7 +1033,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
ulong addr = kvmppc_get_gpr(vcpu, ax_ra) +
kvmppc_get_gpr(vcpu, ax_rb);
- emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd,
+ emulated = kvmppc_emulate_fpr_load(vcpu, ax_rd,
addr, FPU_LS_DOUBLE);
if (emulated == EMULATE_DONE)
@@ -1062,7 +1045,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) +
kvmppc_get_gpr(vcpu, ax_rb);
- emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd,
+ emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd,
addr, FPU_LS_SINGLE);
break;
}
@@ -1071,7 +1054,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
ulong addr = kvmppc_get_gpr(vcpu, ax_ra) +
kvmppc_get_gpr(vcpu, ax_rb);
- emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd,
+ emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd,
addr, FPU_LS_SINGLE);
if (emulated == EMULATE_DONE)
@@ -1083,7 +1066,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) +
kvmppc_get_gpr(vcpu, ax_rb);
- emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd,
+ emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd,
addr, FPU_LS_DOUBLE);
break;
}
@@ -1092,7 +1075,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
ulong addr = kvmppc_get_gpr(vcpu, ax_ra) +
kvmppc_get_gpr(vcpu, ax_rb);
- emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd,
+ emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd,
addr, FPU_LS_DOUBLE);
if (emulated == EMULATE_DONE)
@@ -1104,7 +1087,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) +
kvmppc_get_gpr(vcpu, ax_rb);
- emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd,
+ emulated = kvmppc_emulate_fpr_store(vcpu, ax_rd,
addr,
FPU_LS_SINGLE_LOW);
break;
@@ -1273,6 +1256,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
if (rcomp)
kvmppc_set_cr(vcpu, cr);
+ disable_kernel_fp();
preempt_enable();
return emulated;
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index faffb27badd9..83bcdc80ce51 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved.
*
@@ -13,10 +14,6 @@
*
* This file is derived from arch/powerpc/kvm/44x.c,
* by Hollis Blanchard <hollisb@us.ibm.com>.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
*/
#include <linux/kvm_host.h>
@@ -27,21 +24,23 @@
#include <asm/reg.h>
#include <asm/cputable.h>
#include <asm/cacheflush.h>
-#include <asm/tlbflush.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
+#include <asm/interrupt.h>
#include <asm/io.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
#include <asm/mmu_context.h>
#include <asm/switch_to.h>
#include <asm/firmware.h>
-#include <asm/hvcall.h>
+#include <asm/setup.h>
#include <linux/gfp.h>
#include <linux/sched.h>
#include <linux/vmalloc.h>
#include <linux/highmem.h>
#include <linux/module.h>
#include <linux/miscdevice.h>
+#include <asm/asm-prototypes.h>
+#include <asm/tm.h>
#include "book3s.h"
@@ -53,13 +52,16 @@
static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
ulong msr);
-static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac);
+#ifdef CONFIG_PPC_BOOK3S_64
+static int kvmppc_handle_fac(struct kvm_vcpu *vcpu, ulong fac);
+#endif
/* Some compatibility defines */
#ifdef CONFIG_PPC_BOOK3S_32
#define MSR_USER32 MSR_USER
#define MSR_USER64 MSR_USER
#define HW_PAGE_SIZE PAGE_SIZE
+#define HPTE_R_M _PAGE_COHERENT
#endif
static bool kvmppc_is_split_real(struct kvm_vcpu *vcpu)
@@ -89,7 +91,43 @@ static void kvmppc_fixup_split_real(struct kvm_vcpu *vcpu)
kvmppc_set_pc(vcpu, pc | SPLIT_HACK_OFFS);
}
-void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu);
+static void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) {
+ ulong pc = kvmppc_get_pc(vcpu);
+ ulong lr = kvmppc_get_lr(vcpu);
+ if ((pc & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
+ kvmppc_set_pc(vcpu, pc & ~SPLIT_HACK_MASK);
+ if ((lr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
+ kvmppc_set_lr(vcpu, lr & ~SPLIT_HACK_MASK);
+ vcpu->arch.hflags &= ~BOOK3S_HFLAG_SPLIT_HACK;
+ }
+}
+
+static void kvmppc_inject_interrupt_pr(struct kvm_vcpu *vcpu, int vec, u64 srr1_flags)
+{
+ unsigned long msr, pc, new_msr, new_pc;
+
+ kvmppc_unfixup_split_real(vcpu);
+
+ msr = kvmppc_get_msr(vcpu);
+ pc = kvmppc_get_pc(vcpu);
+ new_msr = vcpu->arch.intr_msr;
+ new_pc = to_book3s(vcpu)->hior + vec;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* If transactional, change to suspend mode on IRQ delivery */
+ if (MSR_TM_TRANSACTIONAL(msr))
+ new_msr |= MSR_TS_S;
+ else
+ new_msr |= msr & MSR_TS_MASK;
+#endif
+
+ kvmppc_set_srr0(vcpu, pc);
+ kvmppc_set_srr1(vcpu, (msr & SRR1_MSR_BITS) | srr1_flags);
+ kvmppc_set_pc(vcpu, new_pc);
+ kvmppc_set_msr(vcpu, new_msr);
+}
static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu)
{
@@ -99,12 +137,15 @@ static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu)
svcpu->slb_max = to_book3s(vcpu)->slb_shadow_max;
svcpu->in_use = 0;
svcpu_put(svcpu);
-#endif
/* Disable AIL if supported */
- if (cpu_has_feature(CPU_FTR_HVMODE) &&
- cpu_has_feature(CPU_FTR_ARCH_207S))
- mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_AIL);
+ if (cpu_has_feature(CPU_FTR_HVMODE)) {
+ if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_AIL);
+ if (cpu_has_feature(CPU_FTR_ARCH_300) && (current->thread.fscr & FSCR_SCV))
+ mtspr(SPRN_FSCR, mfspr(SPRN_FSCR) & ~FSCR_SCV);
+ }
+#endif
vcpu->cpu = smp_processor_id();
#ifdef CONFIG_PPC_BOOK3S_32
@@ -113,6 +154,8 @@ static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu)
if (kvmppc_is_split_real(vcpu))
kvmppc_fixup_split_real(vcpu);
+
+ kvmppc_restore_tm_pr(vcpu);
}
static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu)
@@ -120,11 +163,19 @@ static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu)
#ifdef CONFIG_PPC_BOOK3S_64
struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
if (svcpu->in_use) {
- kvmppc_copy_from_svcpu(vcpu, svcpu);
+ kvmppc_copy_from_svcpu(vcpu);
}
memcpy(to_book3s(vcpu)->slb_shadow, svcpu->slb, sizeof(svcpu->slb));
to_book3s(vcpu)->slb_shadow_max = svcpu->slb_max;
svcpu_put(svcpu);
+
+ /* Enable AIL if supported */
+ if (cpu_has_feature(CPU_FTR_HVMODE)) {
+ if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_AIL_3);
+ if (cpu_has_feature(CPU_FTR_ARCH_300) && (current->thread.fscr & FSCR_SCV))
+ mtspr(SPRN_FSCR, mfspr(SPRN_FSCR) | FSCR_SCV);
+ }
#endif
if (kvmppc_is_split_real(vcpu))
@@ -132,38 +183,35 @@ static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu)
kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX);
kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
-
- /* Enable AIL if supported */
- if (cpu_has_feature(CPU_FTR_HVMODE) &&
- cpu_has_feature(CPU_FTR_ARCH_207S))
- mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_AIL_3);
+ kvmppc_save_tm_pr(vcpu);
vcpu->cpu = -1;
}
/* Copy data needed by real-mode code from vcpu to shadow vcpu */
-void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu,
- struct kvm_vcpu *vcpu)
+void kvmppc_copy_to_svcpu(struct kvm_vcpu *vcpu)
{
- svcpu->gpr[0] = vcpu->arch.gpr[0];
- svcpu->gpr[1] = vcpu->arch.gpr[1];
- svcpu->gpr[2] = vcpu->arch.gpr[2];
- svcpu->gpr[3] = vcpu->arch.gpr[3];
- svcpu->gpr[4] = vcpu->arch.gpr[4];
- svcpu->gpr[5] = vcpu->arch.gpr[5];
- svcpu->gpr[6] = vcpu->arch.gpr[6];
- svcpu->gpr[7] = vcpu->arch.gpr[7];
- svcpu->gpr[8] = vcpu->arch.gpr[8];
- svcpu->gpr[9] = vcpu->arch.gpr[9];
- svcpu->gpr[10] = vcpu->arch.gpr[10];
- svcpu->gpr[11] = vcpu->arch.gpr[11];
- svcpu->gpr[12] = vcpu->arch.gpr[12];
- svcpu->gpr[13] = vcpu->arch.gpr[13];
- svcpu->cr = vcpu->arch.cr;
- svcpu->xer = vcpu->arch.xer;
- svcpu->ctr = vcpu->arch.ctr;
- svcpu->lr = vcpu->arch.lr;
- svcpu->pc = vcpu->arch.pc;
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+
+ svcpu->gpr[0] = vcpu->arch.regs.gpr[0];
+ svcpu->gpr[1] = vcpu->arch.regs.gpr[1];
+ svcpu->gpr[2] = vcpu->arch.regs.gpr[2];
+ svcpu->gpr[3] = vcpu->arch.regs.gpr[3];
+ svcpu->gpr[4] = vcpu->arch.regs.gpr[4];
+ svcpu->gpr[5] = vcpu->arch.regs.gpr[5];
+ svcpu->gpr[6] = vcpu->arch.regs.gpr[6];
+ svcpu->gpr[7] = vcpu->arch.regs.gpr[7];
+ svcpu->gpr[8] = vcpu->arch.regs.gpr[8];
+ svcpu->gpr[9] = vcpu->arch.regs.gpr[9];
+ svcpu->gpr[10] = vcpu->arch.regs.gpr[10];
+ svcpu->gpr[11] = vcpu->arch.regs.gpr[11];
+ svcpu->gpr[12] = vcpu->arch.regs.gpr[12];
+ svcpu->gpr[13] = vcpu->arch.regs.gpr[13];
+ svcpu->cr = vcpu->arch.regs.ccr;
+ svcpu->xer = vcpu->arch.regs.xer;
+ svcpu->ctr = vcpu->arch.regs.ctr;
+ svcpu->lr = vcpu->arch.regs.link;
+ svcpu->pc = vcpu->arch.regs.nip;
#ifdef CONFIG_PPC_BOOK3S_64
svcpu->shadow_fscr = vcpu->arch.shadow_fscr;
#endif
@@ -176,17 +224,49 @@ void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu,
if (cpu_has_feature(CPU_FTR_ARCH_207S))
vcpu->arch.entry_ic = mfspr(SPRN_IC);
svcpu->in_use = true;
+
+ svcpu_put(svcpu);
}
-/* Copy data touched by real-mode code from shadow vcpu back to vcpu */
-void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
- struct kvmppc_book3s_shadow_vcpu *svcpu)
+static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
{
+ ulong guest_msr = kvmppc_get_msr(vcpu);
+ ulong smsr = guest_msr;
+
+ /* Guest MSR values */
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_LE |
+ MSR_TM | MSR_TS_MASK;
+#else
+ smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_LE;
+#endif
+ /* Process MSR values */
+ smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE;
+ /* External providers the guest reserved */
+ smsr |= (guest_msr & vcpu->arch.guest_owned_ext);
+ /* 64-bit Process MSR values */
+#ifdef CONFIG_PPC_BOOK3S_64
+ smsr |= MSR_HV;
+#endif
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
/*
- * vcpu_put would just call us again because in_use hasn't
- * been updated yet.
+ * in guest privileged state, we want to fail all TM transactions.
+ * So disable MSR TM bit so that all tbegin. will be able to be
+ * trapped into host.
*/
- preempt_disable();
+ if (!(guest_msr & MSR_PR))
+ smsr &= ~MSR_TM;
+#endif
+ vcpu->arch.shadow_msr = smsr;
+}
+
+/* Copy data touched by real-mode code from shadow vcpu back to vcpu */
+void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_book3s_shadow_vcpu *svcpu = svcpu_get(vcpu);
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ ulong old_msr;
+#endif
/*
* Maybe we were already preempted and synced the svcpu from
@@ -195,25 +275,25 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
if (!svcpu->in_use)
goto out;
- vcpu->arch.gpr[0] = svcpu->gpr[0];
- vcpu->arch.gpr[1] = svcpu->gpr[1];
- vcpu->arch.gpr[2] = svcpu->gpr[2];
- vcpu->arch.gpr[3] = svcpu->gpr[3];
- vcpu->arch.gpr[4] = svcpu->gpr[4];
- vcpu->arch.gpr[5] = svcpu->gpr[5];
- vcpu->arch.gpr[6] = svcpu->gpr[6];
- vcpu->arch.gpr[7] = svcpu->gpr[7];
- vcpu->arch.gpr[8] = svcpu->gpr[8];
- vcpu->arch.gpr[9] = svcpu->gpr[9];
- vcpu->arch.gpr[10] = svcpu->gpr[10];
- vcpu->arch.gpr[11] = svcpu->gpr[11];
- vcpu->arch.gpr[12] = svcpu->gpr[12];
- vcpu->arch.gpr[13] = svcpu->gpr[13];
- vcpu->arch.cr = svcpu->cr;
- vcpu->arch.xer = svcpu->xer;
- vcpu->arch.ctr = svcpu->ctr;
- vcpu->arch.lr = svcpu->lr;
- vcpu->arch.pc = svcpu->pc;
+ vcpu->arch.regs.gpr[0] = svcpu->gpr[0];
+ vcpu->arch.regs.gpr[1] = svcpu->gpr[1];
+ vcpu->arch.regs.gpr[2] = svcpu->gpr[2];
+ vcpu->arch.regs.gpr[3] = svcpu->gpr[3];
+ vcpu->arch.regs.gpr[4] = svcpu->gpr[4];
+ vcpu->arch.regs.gpr[5] = svcpu->gpr[5];
+ vcpu->arch.regs.gpr[6] = svcpu->gpr[6];
+ vcpu->arch.regs.gpr[7] = svcpu->gpr[7];
+ vcpu->arch.regs.gpr[8] = svcpu->gpr[8];
+ vcpu->arch.regs.gpr[9] = svcpu->gpr[9];
+ vcpu->arch.regs.gpr[10] = svcpu->gpr[10];
+ vcpu->arch.regs.gpr[11] = svcpu->gpr[11];
+ vcpu->arch.regs.gpr[12] = svcpu->gpr[12];
+ vcpu->arch.regs.gpr[13] = svcpu->gpr[13];
+ vcpu->arch.regs.ccr = svcpu->cr;
+ vcpu->arch.regs.xer = svcpu->xer;
+ vcpu->arch.regs.ctr = svcpu->ctr;
+ vcpu->arch.regs.link = svcpu->lr;
+ vcpu->arch.regs.nip = svcpu->pc;
vcpu->arch.shadow_srr1 = svcpu->shadow_srr1;
vcpu->arch.fault_dar = svcpu->fault_dar;
vcpu->arch.fault_dsisr = svcpu->fault_dsisr;
@@ -226,15 +306,119 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
*/
vcpu->arch.purr += get_tb() - vcpu->arch.entry_tb;
vcpu->arch.spurr += get_tb() - vcpu->arch.entry_tb;
- vcpu->arch.vtb += get_vtb() - vcpu->arch.entry_vtb;
+ to_book3s(vcpu)->vtb += get_vtb() - vcpu->arch.entry_vtb;
if (cpu_has_feature(CPU_FTR_ARCH_207S))
vcpu->arch.ic += mfspr(SPRN_IC) - vcpu->arch.entry_ic;
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ /*
+ * Unlike other MSR bits, MSR[TS]bits can be changed at guest without
+ * notifying host:
+ * modified by unprivileged instructions like "tbegin"/"tend"/
+ * "tresume"/"tsuspend" in PR KVM guest.
+ *
+ * It is necessary to sync here to calculate a correct shadow_msr.
+ *
+ * privileged guest's tbegin will be failed at present. So we
+ * only take care of problem state guest.
+ */
+ old_msr = kvmppc_get_msr(vcpu);
+ if (unlikely((old_msr & MSR_PR) &&
+ (vcpu->arch.shadow_srr1 & (MSR_TS_MASK)) !=
+ (old_msr & (MSR_TS_MASK)))) {
+ old_msr &= ~(MSR_TS_MASK);
+ old_msr |= (vcpu->arch.shadow_srr1 & (MSR_TS_MASK));
+ kvmppc_set_msr_fast(vcpu, old_msr);
+ kvmppc_recalc_shadow_msr(vcpu);
+ }
+#endif
+
svcpu->in_use = false;
out:
+ svcpu_put(svcpu);
+}
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+void kvmppc_save_tm_sprs(struct kvm_vcpu *vcpu)
+{
+ tm_enable();
+ vcpu->arch.tfhar = mfspr(SPRN_TFHAR);
+ vcpu->arch.texasr = mfspr(SPRN_TEXASR);
+ vcpu->arch.tfiar = mfspr(SPRN_TFIAR);
+ tm_disable();
+}
+
+void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu)
+{
+ tm_enable();
+ mtspr(SPRN_TFHAR, vcpu->arch.tfhar);
+ mtspr(SPRN_TEXASR, vcpu->arch.texasr);
+ mtspr(SPRN_TFIAR, vcpu->arch.tfiar);
+ tm_disable();
+}
+
+/* loadup math bits which is enabled at kvmppc_get_msr() but not enabled at
+ * hardware.
+ */
+static void kvmppc_handle_lost_math_exts(struct kvm_vcpu *vcpu)
+{
+ ulong exit_nr;
+ ulong ext_diff = (kvmppc_get_msr(vcpu) & ~vcpu->arch.guest_owned_ext) &
+ (MSR_FP | MSR_VEC | MSR_VSX);
+
+ if (!ext_diff)
+ return;
+
+ if (ext_diff == MSR_FP)
+ exit_nr = BOOK3S_INTERRUPT_FP_UNAVAIL;
+ else if (ext_diff == MSR_VEC)
+ exit_nr = BOOK3S_INTERRUPT_ALTIVEC;
+ else
+ exit_nr = BOOK3S_INTERRUPT_VSX;
+
+ kvmppc_handle_ext(vcpu, exit_nr, ext_diff);
+}
+
+void kvmppc_save_tm_pr(struct kvm_vcpu *vcpu)
+{
+ if (!(MSR_TM_ACTIVE(kvmppc_get_msr(vcpu)))) {
+ kvmppc_save_tm_sprs(vcpu);
+ return;
+ }
+
+ kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
+ kvmppc_giveup_ext(vcpu, MSR_VSX);
+
+ preempt_disable();
+ _kvmppc_save_tm_pr(vcpu, mfmsr());
preempt_enable();
}
+void kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu)
+{
+ if (!MSR_TM_ACTIVE(kvmppc_get_msr(vcpu))) {
+ kvmppc_restore_tm_sprs(vcpu);
+ if (kvmppc_get_msr(vcpu) & MSR_TM) {
+ kvmppc_handle_lost_math_exts(vcpu);
+ if (vcpu->arch.fscr & FSCR_TAR)
+ kvmppc_handle_fac(vcpu, FSCR_TAR_LG);
+ }
+ return;
+ }
+
+ preempt_disable();
+ _kvmppc_restore_tm_pr(vcpu, kvmppc_get_msr(vcpu));
+ preempt_enable();
+
+ if (kvmppc_get_msr(vcpu) & MSR_TM) {
+ kvmppc_handle_lost_math_exts(vcpu);
+ if (vcpu->arch.fscr & FSCR_TAR)
+ kvmppc_handle_fac(vcpu, FSCR_TAR_LG);
+ }
+}
+#endif
+
static int kvmppc_core_check_requests_pr(struct kvm_vcpu *vcpu)
{
int r = 1; /* Indicate we want to get back into the guest */
@@ -248,108 +432,68 @@ static int kvmppc_core_check_requests_pr(struct kvm_vcpu *vcpu)
}
/************* MMU Notifiers *************/
-static void do_kvm_unmap_hva(struct kvm *kvm, unsigned long start,
- unsigned long end)
+static bool do_kvm_unmap_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
- long i;
+ unsigned long i;
struct kvm_vcpu *vcpu;
- struct kvm_memslots *slots;
- struct kvm_memory_slot *memslot;
- slots = kvm_memslots(kvm);
- kvm_for_each_memslot(memslot, slots) {
- unsigned long hva_start, hva_end;
- gfn_t gfn, gfn_end;
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ kvmppc_mmu_pte_pflush(vcpu, range->start << PAGE_SHIFT,
+ range->end << PAGE_SHIFT);
- hva_start = max(start, memslot->userspace_addr);
- hva_end = min(end, memslot->userspace_addr +
- (memslot->npages << PAGE_SHIFT));
- if (hva_start >= hva_end)
- continue;
- /*
- * {gfn(page) | page intersects with [hva_start, hva_end)} =
- * {gfn, gfn+1, ..., gfn_end-1}.
- */
- gfn = hva_to_gfn_memslot(hva_start, memslot);
- gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
- kvm_for_each_vcpu(i, vcpu, kvm)
- kvmppc_mmu_pte_pflush(vcpu, gfn << PAGE_SHIFT,
- gfn_end << PAGE_SHIFT);
- }
+ return false;
}
-static int kvm_unmap_hva_pr(struct kvm *kvm, unsigned long hva)
+static bool kvm_unmap_gfn_range_pr(struct kvm *kvm, struct kvm_gfn_range *range)
{
- trace_kvm_unmap_hva(hva);
-
- do_kvm_unmap_hva(kvm, hva, hva + PAGE_SIZE);
-
- return 0;
+ return do_kvm_unmap_gfn(kvm, range);
}
-static int kvm_unmap_hva_range_pr(struct kvm *kvm, unsigned long start,
- unsigned long end)
-{
- do_kvm_unmap_hva(kvm, start, end);
-
- return 0;
-}
-
-static int kvm_age_hva_pr(struct kvm *kvm, unsigned long hva)
+static bool kvm_age_gfn_pr(struct kvm *kvm, struct kvm_gfn_range *range)
{
/* XXX could be more clever ;) */
- return 0;
+ return false;
}
-static int kvm_test_age_hva_pr(struct kvm *kvm, unsigned long hva)
+static bool kvm_test_age_gfn_pr(struct kvm *kvm, struct kvm_gfn_range *range)
{
/* XXX could be more clever ;) */
- return 0;
-}
-
-static void kvm_set_spte_hva_pr(struct kvm *kvm, unsigned long hva, pte_t pte)
-{
- /* The page will get remapped properly on its next fault */
- do_kvm_unmap_hva(kvm, hva, hva + PAGE_SIZE);
+ return false;
}
/*****************************************/
-static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
-{
- ulong guest_msr = kvmppc_get_msr(vcpu);
- ulong smsr = guest_msr;
-
- /* Guest MSR values */
- smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_LE;
- /* Process MSR values */
- smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE;
- /* External providers the guest reserved */
- smsr |= (guest_msr & vcpu->arch.guest_owned_ext);
- /* 64-bit Process MSR values */
-#ifdef CONFIG_PPC_BOOK3S_64
- smsr |= MSR_ISF | MSR_HV;
-#endif
- vcpu->arch.shadow_msr = smsr;
-}
-
static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr)
{
- ulong old_msr = kvmppc_get_msr(vcpu);
+ ulong old_msr;
+
+ /* For PAPR guest, make sure MSR reflects guest mode */
+ if (vcpu->arch.papr_enabled)
+ msr = (msr & ~MSR_HV) | MSR_ME;
#ifdef EXIT_DEBUG
printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr);
#endif
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ /* We should never target guest MSR to TS=10 && PR=0,
+ * since we always fail transaction for guest privilege
+ * state.
+ */
+ if (!(msr & MSR_PR) && MSR_TM_TRANSACTIONAL(msr))
+ kvmppc_emulate_tabort(vcpu,
+ TM_CAUSE_KVM_FAC_UNAV | TM_CAUSE_PERSISTENT);
+#endif
+
+ old_msr = kvmppc_get_msr(vcpu);
msr &= to_book3s(vcpu)->msr_mask;
kvmppc_set_msr_fast(vcpu, msr);
kvmppc_recalc_shadow_msr(vcpu);
if (msr & MSR_POW) {
if (!vcpu->arch.pending_exceptions) {
- kvm_vcpu_block(vcpu);
- clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
- vcpu->stat.halt_wakeup++;
+ kvm_vcpu_halt(vcpu);
+ vcpu->stat.generic.halt_wakeup++;
/* Unset POW bit after we woke up */
msr &= ~MSR_POW;
@@ -381,7 +525,7 @@ static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr)
/*
* When switching from 32 to 64-bit, we may have a stale 32-bit
* magic page around, we need to flush it. Typically 32-bit magic
- * page will be instanciated when calling into RTAS. Note: We
+ * page will be instantiated when calling into RTAS. Note: We
* assume that such transition only happens while in kernel mode,
* ie, we never transition from user 32-bit to kernel 64-bit with
* a 32-bit magic page around.
@@ -396,9 +540,14 @@ static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr)
/* Preload FPU if it's enabled */
if (kvmppc_get_msr(vcpu) & MSR_FP)
kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ if (kvmppc_get_msr(vcpu) & MSR_TM)
+ kvmppc_handle_lost_math_exts(vcpu);
+#endif
}
-void kvmppc_set_pvr_pr(struct kvm_vcpu *vcpu, u32 pvr)
+static void kvmppc_set_pvr_pr(struct kvm_vcpu *vcpu, u32 pvr)
{
u32 host_pvr;
@@ -447,6 +596,10 @@ void kvmppc_set_pvr_pr(struct kvm_vcpu *vcpu, u32 pvr)
case PVR_POWER7:
case PVR_POWER7p:
case PVR_POWER8:
+ case PVR_POWER8E:
+ case PVR_POWER8NVL:
+ case PVR_HX_C2000:
+ case PVR_POWER9:
vcpu->arch.hflags |= BOOK3S_HFLAG_MULTI_PGSIZE |
BOOK3S_HFLAG_NEW_TLBIE;
break;
@@ -486,32 +639,30 @@ void kvmppc_set_pvr_pr(struct kvm_vcpu *vcpu, u32 pvr)
*/
static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
{
- struct page *hpage;
+ struct kvm_host_map map;
u64 hpage_offset;
u32 *page;
- int i;
+ int i, r;
- hpage = gfn_to_page(vcpu->kvm, pte->raddr >> PAGE_SHIFT);
- if (is_error_page(hpage))
+ r = kvm_vcpu_map(vcpu, pte->raddr >> PAGE_SHIFT, &map);
+ if (r)
return;
hpage_offset = pte->raddr & ~PAGE_MASK;
hpage_offset &= ~0xFFFULL;
hpage_offset /= 4;
- get_page(hpage);
- page = kmap_atomic(hpage);
+ page = map.hva;
/* patch dcbz into reserved instruction, so we trap */
for (i=hpage_offset; i < hpage_offset + (HW_PAGE_SIZE / 4); i++)
if ((be32_to_cpu(page[i]) & 0xff0007ff) == INS_DCBZ)
page[i] &= cpu_to_be32(0xfffffff7);
- kunmap_atomic(page);
- put_page(hpage);
+ kvm_vcpu_unmap(vcpu, &map);
}
-static int kvmppc_visible_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
+static bool kvmppc_visible_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
{
ulong mp_pa = vcpu->arch.magic_page_pa;
@@ -520,13 +671,13 @@ static int kvmppc_visible_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
gpa &= ~0xFFFULL;
if (unlikely(mp_pa) && unlikely((mp_pa & KVM_PAM) == (gpa & KVM_PAM))) {
- return 1;
+ return true;
}
return kvm_is_visible_gfn(vcpu->kvm, gpa >> PAGE_SHIFT);
}
-int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
+static int kvmppc_handle_pagefault(struct kvm_vcpu *vcpu,
ulong eaddr, int vec)
{
bool data = (vec == BOOK3S_INTERRUPT_DATA_STORAGE);
@@ -534,8 +685,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
int r = RESUME_GUEST;
int relocated;
int page_found = 0;
- struct kvmppc_pte pte;
- bool is_mmio = false;
+ struct kvmppc_pte pte = { 0 };
bool dr = (kvmppc_get_msr(vcpu) & MSR_DR) ? true : false;
bool ir = (kvmppc_get_msr(vcpu) & MSR_IR) ? true : false;
u64 vsid;
@@ -555,6 +705,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
pte.eaddr = eaddr;
pte.vpage = eaddr >> 12;
pte.page_size = MMU_PAGE_64K;
+ pte.wimg = HPTE_R_M;
}
switch (kvmppc_get_msr(vcpu) & (MSR_DR|MSR_IR)) {
@@ -566,7 +717,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
(vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) &&
((pte.raddr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS))
pte.raddr &= ~SPLIT_HACK_MASK;
- /* fall through */
+ fallthrough;
case MSR_IR:
vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid);
@@ -591,30 +742,25 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
pte.may_execute = !data;
}
- if (page_found == -ENOENT) {
- /* Page not found in guest PTE entries */
- u64 ssrr1 = vcpu->arch.shadow_srr1;
- u64 msr = kvmppc_get_msr(vcpu);
- kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu));
- kvmppc_set_dsisr(vcpu, vcpu->arch.fault_dsisr);
- kvmppc_set_msr_fast(vcpu, msr | (ssrr1 & 0xf8000000ULL));
- kvmppc_book3s_queue_irqprio(vcpu, vec);
- } else if (page_found == -EPERM) {
- /* Storage protection */
- u32 dsisr = vcpu->arch.fault_dsisr;
- u64 ssrr1 = vcpu->arch.shadow_srr1;
- u64 msr = kvmppc_get_msr(vcpu);
- kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu));
- dsisr = (dsisr & ~DSISR_NOHPTE) | DSISR_PROTFAULT;
- kvmppc_set_dsisr(vcpu, dsisr);
- kvmppc_set_msr_fast(vcpu, msr | (ssrr1 & 0xf8000000ULL));
- kvmppc_book3s_queue_irqprio(vcpu, vec);
+ if (page_found == -ENOENT || page_found == -EPERM) {
+ /* Page not found in guest PTE entries, or protection fault */
+ u64 flags;
+
+ if (page_found == -EPERM)
+ flags = DSISR_PROTFAULT;
+ else
+ flags = DSISR_NOHPTE;
+ if (data) {
+ flags |= vcpu->arch.fault_dsisr & DSISR_ISSTORE;
+ kvmppc_core_queue_data_storage(vcpu, 0, eaddr, flags);
+ } else {
+ kvmppc_core_queue_inst_storage(vcpu, flags);
+ }
} else if (page_found == -EINVAL) {
/* Page not found in guest SLB */
kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu));
kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80);
- } else if (!is_mmio &&
- kvmppc_visible_gpa(vcpu, pte.raddr)) {
+ } else if (kvmppc_visible_gpa(vcpu, pte.raddr)) {
if (data && !(vcpu->arch.fault_dsisr & DSISR_NOHPTE)) {
/*
* There is already a host HPTE there, presumably
@@ -624,7 +770,11 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
kvmppc_mmu_unmap_page(vcpu, &pte);
}
/* The guest's PTE is not mapped yet. Map on the host */
- kvmppc_mmu_map_page(vcpu, &pte, iswrite);
+ if (kvmppc_mmu_map_page(vcpu, &pte, iswrite) == -EIO) {
+ /* Exit KVM if mapping failed */
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ return RESUME_HOST;
+ }
if (data)
vcpu->stat.sp_storage++;
else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
@@ -635,7 +785,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
vcpu->stat.mmio_exits++;
vcpu->arch.paddr_accessed = pte.raddr;
vcpu->arch.vaddr_accessed = pte.eaddr;
- r = kvmppc_emulate_mmio(run, vcpu);
+ r = kvmppc_emulate_mmio(vcpu);
if ( r == RESUME_HOST_NV )
r = RESUME_HOST;
}
@@ -643,11 +793,6 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
return r;
}
-static inline int get_fpr_index(int i)
-{
- return i * TS_FPRWIDTH;
-}
-
/* Give up external provider (FPU, Altivec, VSX) */
void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
{
@@ -692,7 +837,7 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
}
/* Give up facility (TAR / EBB / DSCR) */
-static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac)
+void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac)
{
#ifdef CONFIG_PPC_BOOK3S_64
if (!(vcpu->arch.shadow_fscr & (1ULL << fac))) {
@@ -755,6 +900,7 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
preempt_disable();
enable_kernel_fp();
load_fp_state(&vcpu->arch.fp);
+ disable_kernel_fp();
t->fp_save_area = &vcpu->arch.fp;
preempt_enable();
}
@@ -764,6 +910,7 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
preempt_disable();
enable_kernel_altivec();
load_vr_state(&vcpu->arch.vr);
+ disable_kernel_altivec();
t->vr_save_area = &vcpu->arch.vr;
preempt_enable();
#endif
@@ -792,6 +939,7 @@ static void kvmppc_handle_lost_ext(struct kvm_vcpu *vcpu)
preempt_disable();
enable_kernel_fp();
load_fp_state(&vcpu->arch.fp);
+ disable_kernel_fp();
preempt_enable();
}
#ifdef CONFIG_ALTIVEC
@@ -799,6 +947,7 @@ static void kvmppc_handle_lost_ext(struct kvm_vcpu *vcpu)
preempt_disable();
enable_kernel_altivec();
load_vr_state(&vcpu->arch.vr);
+ disable_kernel_altivec();
preempt_enable();
}
#endif
@@ -807,7 +956,7 @@ static void kvmppc_handle_lost_ext(struct kvm_vcpu *vcpu)
#ifdef CONFIG_PPC_BOOK3S_64
-static void kvmppc_trigger_fac_interrupt(struct kvm_vcpu *vcpu, ulong fac)
+void kvmppc_trigger_fac_interrupt(struct kvm_vcpu *vcpu, ulong fac)
{
/* Inject the Interrupt Cause field and trigger a guest interrupt */
vcpu->arch.fscr &= ~(0xffULL << 56);
@@ -820,7 +969,7 @@ static void kvmppc_emulate_fac(struct kvm_vcpu *vcpu, ulong fac)
enum emulation_result er = EMULATE_FAIL;
if (!(kvmppc_get_msr(vcpu) & MSR_PR))
- er = kvmppc_emulate_instruction(vcpu->run, vcpu);
+ er = kvmppc_emulate_instruction(vcpu);
if ((er != EMULATE_DONE) && (er != EMULATE_AGAIN)) {
/* Couldn't emulate, trigger interrupt in guest */
@@ -869,22 +1018,123 @@ static int kvmppc_handle_fac(struct kvm_vcpu *vcpu, ulong fac)
break;
}
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ /* Since we disabled MSR_TM at privilege state, the mfspr instruction
+ * for TM spr can trigger TM fac unavailable. In this case, the
+ * emulation is handled by kvmppc_emulate_fac(), which invokes
+ * kvmppc_emulate_mfspr() finally. But note the mfspr can include
+ * RT for NV registers. So it need to restore those NV reg to reflect
+ * the update.
+ */
+ if ((fac == FSCR_TM_LG) && !(kvmppc_get_msr(vcpu) & MSR_PR))
+ return RESUME_GUEST_NV;
+#endif
+
return RESUME_GUEST;
}
void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr)
{
+ if (fscr & FSCR_SCV)
+ fscr &= ~FSCR_SCV; /* SCV must not be enabled */
+ /* Prohibit prefixed instructions for now */
+ fscr &= ~FSCR_PREFIX;
if ((vcpu->arch.fscr & FSCR_TAR) && !(fscr & FSCR_TAR)) {
/* TAR got dropped, drop it in shadow too */
kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
+ } else if (!(vcpu->arch.fscr & FSCR_TAR) && (fscr & FSCR_TAR)) {
+ vcpu->arch.fscr = fscr;
+ kvmppc_handle_fac(vcpu, FSCR_TAR_LG);
+ return;
}
+
vcpu->arch.fscr = fscr;
}
#endif
-int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
- unsigned int exit_nr)
+static void kvmppc_setup_debug(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+ u64 msr = kvmppc_get_msr(vcpu);
+
+ kvmppc_set_msr(vcpu, msr | MSR_SE);
+ }
+}
+
+static void kvmppc_clear_debug(struct kvm_vcpu *vcpu)
{
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+ u64 msr = kvmppc_get_msr(vcpu);
+
+ kvmppc_set_msr(vcpu, msr & ~MSR_SE);
+ }
+}
+
+static int kvmppc_exit_pr_progint(struct kvm_vcpu *vcpu, unsigned int exit_nr)
+{
+ enum emulation_result er;
+ ulong flags;
+ ppc_inst_t last_inst;
+ int emul, r;
+
+ /*
+ * shadow_srr1 only contains valid flags if we came here via a program
+ * exception. The other exceptions (emulation assist, FP unavailable,
+ * etc.) do not provide flags in SRR1, so use an illegal-instruction
+ * exception when injecting a program interrupt into the guest.
+ */
+ if (exit_nr == BOOK3S_INTERRUPT_PROGRAM)
+ flags = vcpu->arch.shadow_srr1 & 0x1f0000ull;
+ else
+ flags = SRR1_PROGILL;
+
+ emul = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst);
+ if (emul != EMULATE_DONE)
+ return RESUME_GUEST;
+
+ if (kvmppc_get_msr(vcpu) & MSR_PR) {
+#ifdef EXIT_DEBUG
+ pr_info("Userspace triggered 0x700 exception at\n 0x%lx (0x%x)\n",
+ kvmppc_get_pc(vcpu), ppc_inst_val(last_inst));
+#endif
+ if ((ppc_inst_val(last_inst) & 0xff0007ff) != (INS_DCBZ & 0xfffffff7)) {
+ kvmppc_core_queue_program(vcpu, flags);
+ return RESUME_GUEST;
+ }
+ }
+
+ vcpu->stat.emulated_inst_exits++;
+ er = kvmppc_emulate_instruction(vcpu);
+ switch (er) {
+ case EMULATE_DONE:
+ r = RESUME_GUEST_NV;
+ break;
+ case EMULATE_AGAIN:
+ r = RESUME_GUEST;
+ break;
+ case EMULATE_FAIL:
+ pr_crit("%s: emulation at %lx failed (%08x)\n",
+ __func__, kvmppc_get_pc(vcpu), ppc_inst_val(last_inst));
+ kvmppc_core_queue_program(vcpu, flags);
+ r = RESUME_GUEST;
+ break;
+ case EMULATE_DO_MMIO:
+ vcpu->run->exit_reason = KVM_EXIT_MMIO;
+ r = RESUME_HOST_NV;
+ break;
+ case EMULATE_EXIT_USER:
+ r = RESUME_HOST_NV;
+ break;
+ default:
+ BUG();
+ }
+
+ return r;
+}
+
+int kvmppc_handle_exit_pr(struct kvm_vcpu *vcpu, unsigned int exit_nr)
+{
+ struct kvm_run *run = vcpu->run;
int r = RESUME_HOST;
int s;
@@ -896,7 +1146,7 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
/* We get here with MSR.EE=1 */
trace_kvm_exit(exit_nr, vcpu);
- kvm_guest_exit();
+ guest_exit();
switch (exit_nr) {
case BOOK3S_INTERRUPT_INST_STORAGE:
@@ -928,7 +1178,7 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
/* only care about PTEG not found errors, but leave NX alone */
if (shadow_srr1 & 0x40000000) {
int idx = srcu_read_lock(&vcpu->kvm->srcu);
- r = kvmppc_handle_pagefault(run, vcpu, kvmppc_get_pc(vcpu), exit_nr);
+ r = kvmppc_handle_pagefault(vcpu, kvmppc_get_pc(vcpu), exit_nr);
srcu_read_unlock(&vcpu->kvm->srcu, idx);
vcpu->stat.sp_instruc++;
} else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
@@ -941,10 +1191,8 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL);
r = RESUME_GUEST;
} else {
- u64 msr = kvmppc_get_msr(vcpu);
- msr |= shadow_srr1 & 0x58000000;
- kvmppc_set_msr_fast(vcpu, msr);
- kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+ kvmppc_core_queue_inst_storage(vcpu,
+ shadow_srr1 & 0x58000000);
r = RESUME_GUEST;
}
break;
@@ -980,12 +1228,10 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
*/
if (fault_dsisr & (DSISR_NOHPTE | DSISR_PROTFAULT)) {
int idx = srcu_read_lock(&vcpu->kvm->srcu);
- r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr);
+ r = kvmppc_handle_pagefault(vcpu, dar, exit_nr);
srcu_read_unlock(&vcpu->kvm->srcu, idx);
} else {
- kvmppc_set_dar(vcpu, dar);
- kvmppc_set_dsisr(vcpu, fault_dsisr);
- kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+ kvmppc_core_queue_data_storage(vcpu, 0, dar, fault_dsisr);
r = RESUME_GUEST;
}
break;
@@ -1014,79 +1260,28 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
r = RESUME_GUEST;
break;
case BOOK3S_INTERRUPT_EXTERNAL:
- case BOOK3S_INTERRUPT_EXTERNAL_LEVEL:
case BOOK3S_INTERRUPT_EXTERNAL_HV:
+ case BOOK3S_INTERRUPT_H_VIRT:
vcpu->stat.ext_intr_exits++;
r = RESUME_GUEST;
break;
+ case BOOK3S_INTERRUPT_HMI:
case BOOK3S_INTERRUPT_PERFMON:
+ case BOOK3S_INTERRUPT_SYSTEM_RESET:
r = RESUME_GUEST;
break;
case BOOK3S_INTERRUPT_PROGRAM:
case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
- {
- enum emulation_result er;
- ulong flags;
- u32 last_inst;
- int emul;
-
-program_interrupt:
- flags = vcpu->arch.shadow_srr1 & 0x1f0000ull;
-
- emul = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst);
- if (emul != EMULATE_DONE) {
- r = RESUME_GUEST;
- break;
- }
-
- if (kvmppc_get_msr(vcpu) & MSR_PR) {
-#ifdef EXIT_DEBUG
- pr_info("Userspace triggered 0x700 exception at\n 0x%lx (0x%x)\n",
- kvmppc_get_pc(vcpu), last_inst);
-#endif
- if ((last_inst & 0xff0007ff) !=
- (INS_DCBZ & 0xfffffff7)) {
- kvmppc_core_queue_program(vcpu, flags);
- r = RESUME_GUEST;
- break;
- }
- }
-
- vcpu->stat.emulated_inst_exits++;
- er = kvmppc_emulate_instruction(run, vcpu);
- switch (er) {
- case EMULATE_DONE:
- r = RESUME_GUEST_NV;
- break;
- case EMULATE_AGAIN:
- r = RESUME_GUEST;
- break;
- case EMULATE_FAIL:
- printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
- __func__, kvmppc_get_pc(vcpu), last_inst);
- kvmppc_core_queue_program(vcpu, flags);
- r = RESUME_GUEST;
- break;
- case EMULATE_DO_MMIO:
- run->exit_reason = KVM_EXIT_MMIO;
- r = RESUME_HOST_NV;
- break;
- case EMULATE_EXIT_USER:
- r = RESUME_HOST_NV;
- break;
- default:
- BUG();
- }
+ r = kvmppc_exit_pr_progint(vcpu, exit_nr);
break;
- }
case BOOK3S_INTERRUPT_SYSCALL:
{
- u32 last_sc;
+ ppc_inst_t last_sc;
int emul;
/* Get last sc for papr */
if (vcpu->arch.papr_enabled) {
- /* The sc instuction points SRR0 to the next inst */
+ /* The sc instruction points SRR0 to the next inst */
emul = kvmppc_get_last_inst(vcpu, INST_SC, &last_sc);
if (emul != EMULATE_DONE) {
kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) - 4);
@@ -1096,7 +1291,7 @@ program_interrupt:
}
if (vcpu->arch.papr_enabled &&
- (last_sc == 0x44000022) &&
+ (ppc_inst_val(last_sc) == 0x44000022) &&
!(kvmppc_get_msr(vcpu) & MSR_PR)) {
/* SC 1 papr hypercalls */
ulong cmd = kvmppc_get_gpr(vcpu, 3);
@@ -1148,14 +1343,14 @@ program_interrupt:
{
int ext_msr = 0;
int emul;
- u32 last_inst;
+ ppc_inst_t last_inst;
if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE) {
/* Do paired single instruction emulation */
emul = kvmppc_get_last_inst(vcpu, INST_GENERIC,
&last_inst);
if (emul == EMULATE_DONE)
- goto program_interrupt;
+ r = kvmppc_exit_pr_progint(vcpu, exit_nr);
else
r = RESUME_GUEST;
@@ -1182,15 +1377,15 @@ program_interrupt:
}
case BOOK3S_INTERRUPT_ALIGNMENT:
{
- u32 last_inst;
+ ppc_inst_t last_inst;
int emul = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst);
if (emul == EMULATE_DONE) {
u32 dsisr;
u64 dar;
- dsisr = kvmppc_alignment_dsisr(vcpu, last_inst);
- dar = kvmppc_alignment_dar(vcpu, last_inst);
+ dsisr = kvmppc_alignment_dsisr(vcpu, ppc_inst_val(last_inst));
+ dar = kvmppc_alignment_dar(vcpu, ppc_inst_val(last_inst));
kvmppc_set_dsisr(vcpu, dsisr);
kvmppc_set_dar(vcpu, dar);
@@ -1202,15 +1397,22 @@ program_interrupt:
}
#ifdef CONFIG_PPC_BOOK3S_64
case BOOK3S_INTERRUPT_FAC_UNAVAIL:
- kvmppc_handle_fac(vcpu, vcpu->arch.shadow_fscr >> 56);
- r = RESUME_GUEST;
+ r = kvmppc_handle_fac(vcpu, vcpu->arch.shadow_fscr >> 56);
break;
#endif
case BOOK3S_INTERRUPT_MACHINE_CHECK:
- case BOOK3S_INTERRUPT_TRACE:
kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
r = RESUME_GUEST;
break;
+ case BOOK3S_INTERRUPT_TRACE:
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+ run->exit_reason = KVM_EXIT_DEBUG;
+ r = RESUME_HOST;
+ } else {
+ kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
+ r = RESUME_GUEST;
+ }
+ break;
default:
{
ulong shadow_srr1 = vcpu->arch.shadow_srr1;
@@ -1286,12 +1488,22 @@ static int kvm_arch_vcpu_ioctl_set_sregs_pr(struct kvm_vcpu *vcpu,
kvmppc_set_pvr_pr(vcpu, sregs->pvr);
vcpu3s->sdr1 = sregs->u.s.sdr1;
+#ifdef CONFIG_PPC_BOOK3S_64
if (vcpu->arch.hflags & BOOK3S_HFLAG_SLB) {
+ /* Flush all SLB entries */
+ vcpu->arch.mmu.slbmte(vcpu, 0, 0);
+ vcpu->arch.mmu.slbia(vcpu);
+
for (i = 0; i < 64; i++) {
- vcpu->arch.mmu.slbmte(vcpu, sregs->u.s.ppc64.slb[i].slbv,
- sregs->u.s.ppc64.slb[i].slbe);
+ u64 rb = sregs->u.s.ppc64.slb[i].slbe;
+ u64 rs = sregs->u.s.ppc64.slb[i].slbv;
+
+ if (rb & SLB_ESID_V)
+ vcpu->arch.mmu.slbmte(vcpu, rs, rb);
}
- } else {
+ } else
+#endif
+ {
for (i = 0; i < 16; i++) {
vcpu->arch.mmu.mtsrin(vcpu, i, sregs->u.s.ppc32.sr[i]);
}
@@ -1319,9 +1531,15 @@ static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
int r = 0;
switch (id) {
+ case KVM_REG_PPC_DEBUG_INST:
+ *val = get_reg_val(id, KVMPPC_INST_SW_BREAKPOINT);
+ break;
case KVM_REG_PPC_HIOR:
*val = get_reg_val(id, to_book3s(vcpu)->hior);
break;
+ case KVM_REG_PPC_VTB:
+ *val = get_reg_val(id, to_book3s(vcpu)->vtb);
+ break;
case KVM_REG_PPC_LPCR:
case KVM_REG_PPC_LPCR_64:
/*
@@ -1332,6 +1550,73 @@ static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
else
*val = get_reg_val(id, 0);
break;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ case KVM_REG_PPC_TFHAR:
+ *val = get_reg_val(id, vcpu->arch.tfhar);
+ break;
+ case KVM_REG_PPC_TFIAR:
+ *val = get_reg_val(id, vcpu->arch.tfiar);
+ break;
+ case KVM_REG_PPC_TEXASR:
+ *val = get_reg_val(id, vcpu->arch.texasr);
+ break;
+ case KVM_REG_PPC_TM_GPR0 ... KVM_REG_PPC_TM_GPR31:
+ *val = get_reg_val(id,
+ vcpu->arch.gpr_tm[id-KVM_REG_PPC_TM_GPR0]);
+ break;
+ case KVM_REG_PPC_TM_VSR0 ... KVM_REG_PPC_TM_VSR63:
+ {
+ int i, j;
+
+ i = id - KVM_REG_PPC_TM_VSR0;
+ if (i < 32)
+ for (j = 0; j < TS_FPRWIDTH; j++)
+ val->vsxval[j] = vcpu->arch.fp_tm.fpr[i][j];
+ else {
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ val->vval = vcpu->arch.vr_tm.vr[i-32];
+ else
+ r = -ENXIO;
+ }
+ break;
+ }
+ case KVM_REG_PPC_TM_CR:
+ *val = get_reg_val(id, vcpu->arch.cr_tm);
+ break;
+ case KVM_REG_PPC_TM_XER:
+ *val = get_reg_val(id, vcpu->arch.xer_tm);
+ break;
+ case KVM_REG_PPC_TM_LR:
+ *val = get_reg_val(id, vcpu->arch.lr_tm);
+ break;
+ case KVM_REG_PPC_TM_CTR:
+ *val = get_reg_val(id, vcpu->arch.ctr_tm);
+ break;
+ case KVM_REG_PPC_TM_FPSCR:
+ *val = get_reg_val(id, vcpu->arch.fp_tm.fpscr);
+ break;
+ case KVM_REG_PPC_TM_AMR:
+ *val = get_reg_val(id, vcpu->arch.amr_tm);
+ break;
+ case KVM_REG_PPC_TM_PPR:
+ *val = get_reg_val(id, vcpu->arch.ppr_tm);
+ break;
+ case KVM_REG_PPC_TM_VRSAVE:
+ *val = get_reg_val(id, vcpu->arch.vrsave_tm);
+ break;
+ case KVM_REG_PPC_TM_VSCR:
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ *val = get_reg_val(id, vcpu->arch.vr_tm.vscr.u[3]);
+ else
+ r = -ENXIO;
+ break;
+ case KVM_REG_PPC_TM_DSCR:
+ *val = get_reg_val(id, vcpu->arch.dscr_tm);
+ break;
+ case KVM_REG_PPC_TM_TAR:
+ *val = get_reg_val(id, vcpu->arch.tar_tm);
+ break;
+#endif
default:
r = -EINVAL;
break;
@@ -1358,10 +1643,79 @@ static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
to_book3s(vcpu)->hior = set_reg_val(id, *val);
to_book3s(vcpu)->hior_explicit = true;
break;
+ case KVM_REG_PPC_VTB:
+ to_book3s(vcpu)->vtb = set_reg_val(id, *val);
+ break;
case KVM_REG_PPC_LPCR:
case KVM_REG_PPC_LPCR_64:
kvmppc_set_lpcr_pr(vcpu, set_reg_val(id, *val));
break;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ case KVM_REG_PPC_TFHAR:
+ vcpu->arch.tfhar = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_TFIAR:
+ vcpu->arch.tfiar = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_TEXASR:
+ vcpu->arch.texasr = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_TM_GPR0 ... KVM_REG_PPC_TM_GPR31:
+ vcpu->arch.gpr_tm[id - KVM_REG_PPC_TM_GPR0] =
+ set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_TM_VSR0 ... KVM_REG_PPC_TM_VSR63:
+ {
+ int i, j;
+
+ i = id - KVM_REG_PPC_TM_VSR0;
+ if (i < 32)
+ for (j = 0; j < TS_FPRWIDTH; j++)
+ vcpu->arch.fp_tm.fpr[i][j] = val->vsxval[j];
+ else
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ vcpu->arch.vr_tm.vr[i-32] = val->vval;
+ else
+ r = -ENXIO;
+ break;
+ }
+ case KVM_REG_PPC_TM_CR:
+ vcpu->arch.cr_tm = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_TM_XER:
+ vcpu->arch.xer_tm = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_TM_LR:
+ vcpu->arch.lr_tm = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_TM_CTR:
+ vcpu->arch.ctr_tm = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_TM_FPSCR:
+ vcpu->arch.fp_tm.fpscr = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_TM_AMR:
+ vcpu->arch.amr_tm = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_TM_PPR:
+ vcpu->arch.ppr_tm = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_TM_VRSAVE:
+ vcpu->arch.vrsave_tm = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_TM_VSCR:
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ vcpu->arch.vr.vscr.u[3] = set_reg_val(id, *val);
+ else
+ r = -ENXIO;
+ break;
+ case KVM_REG_PPC_TM_DSCR:
+ vcpu->arch.dscr_tm = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_TM_TAR:
+ vcpu->arch.tar_tm = set_reg_val(id, *val);
+ break;
+#endif
default:
r = -EINVAL;
break;
@@ -1370,21 +1724,17 @@ static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
return r;
}
-static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm,
- unsigned int id)
+static int kvmppc_core_vcpu_create_pr(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_book3s *vcpu_book3s;
- struct kvm_vcpu *vcpu;
- int err = -ENOMEM;
unsigned long p;
+ int err;
- vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
- if (!vcpu)
- goto out;
+ err = -ENOMEM;
vcpu_book3s = vzalloc(sizeof(struct kvmppc_vcpu_book3s));
if (!vcpu_book3s)
- goto free_vcpu;
+ goto out;
vcpu->arch.book3s = vcpu_book3s;
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
@@ -1394,14 +1744,9 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm,
goto free_vcpu3s;
#endif
- err = kvm_vcpu_init(vcpu, kvm, id);
- if (err)
- goto free_shadow_vcpu;
-
- err = -ENOMEM;
p = __get_free_page(GFP_KERNEL|__GFP_ZERO);
if (!p)
- goto uninit_vcpu;
+ goto free_shadow_vcpu;
vcpu->arch.shared = (void *)p;
#ifdef CONFIG_PPC_BOOK3S_64
/* Always start the shared struct in native endian mode */
@@ -1423,59 +1768,56 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm,
#else
/* default to book3s_32 (750) */
vcpu->arch.pvr = 0x84202;
+ vcpu->arch.intr_msr = 0;
#endif
kvmppc_set_pvr_pr(vcpu, vcpu->arch.pvr);
vcpu->arch.slb_nr = 64;
vcpu->arch.shadow_msr = MSR_USER64 & ~MSR_LE;
- err = kvmppc_mmu_init(vcpu);
+ err = kvmppc_mmu_init_pr(vcpu);
if (err < 0)
- goto uninit_vcpu;
+ goto free_shared_page;
- return vcpu;
+ return 0;
-uninit_vcpu:
- kvm_vcpu_uninit(vcpu);
+free_shared_page:
+ free_page((unsigned long)vcpu->arch.shared);
free_shadow_vcpu:
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
kfree(vcpu->arch.shadow_vcpu);
free_vcpu3s:
#endif
vfree(vcpu_book3s);
-free_vcpu:
- kmem_cache_free(kvm_vcpu_cache, vcpu);
out:
- return ERR_PTR(err);
+ return err;
}
static void kvmppc_core_vcpu_free_pr(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
+ kvmppc_mmu_destroy_pr(vcpu);
free_page((unsigned long)vcpu->arch.shared & PAGE_MASK);
- kvm_vcpu_uninit(vcpu);
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
kfree(vcpu->arch.shadow_vcpu);
#endif
vfree(vcpu_book3s);
- kmem_cache_free(kvm_vcpu_cache, vcpu);
}
-static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+static int kvmppc_vcpu_run_pr(struct kvm_vcpu *vcpu)
{
int ret;
-#ifdef CONFIG_ALTIVEC
- unsigned long uninitialized_var(vrsave);
-#endif
/* Check if we can run the vcpu at all */
if (!vcpu->arch.sane) {
- kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
ret = -EINVAL;
goto out;
}
+ kvmppc_setup_debug(vcpu);
+
/*
* Interrupts could be timers for the guest which we have to inject
* again, so let's postpone them until we're in the guest and if we
@@ -1487,21 +1829,8 @@ static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
goto out;
/* interrupts now hard-disabled */
- /* Save FPU state in thread_struct */
- if (current->thread.regs->msr & MSR_FP)
- giveup_fpu(current);
-
-#ifdef CONFIG_ALTIVEC
- /* Save Altivec state in thread_struct */
- if (current->thread.regs->msr & MSR_VEC)
- giveup_altivec(current);
-#endif
-
-#ifdef CONFIG_VSX
- /* Save VSX state in thread_struct */
- if (current->thread.regs->msr & MSR_VSX)
- __giveup_vsx(current);
-#endif
+ /* Save FPU, Altivec and VSX state */
+ giveup_all(current);
/* Preload FPU if it's enabled */
if (kvmppc_get_msr(vcpu) & MSR_FP)
@@ -1509,9 +1838,11 @@ static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
kvmppc_fix_ee_before_entry();
- ret = __kvmppc_vcpu_run(kvm_run, vcpu);
+ ret = __kvmppc_vcpu_run(vcpu);
- /* No need for kvm_guest_exit. It's done in handle_exit.
+ kvmppc_clear_debug(vcpu);
+
+ /* No need for guest_exit. It's done in handle_exit.
We also get here with interrupts enabled. */
/* Make sure we save the guest FPU/Altivec/VSX state */
@@ -1520,6 +1851,7 @@ static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
/* Make sure we save the guest TAR/EBB/DSCR state */
kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
+ srr_regs_clobbered();
out:
vcpu->mode = OUTSIDE_GUEST_MODE;
return ret;
@@ -1540,14 +1872,12 @@ static int kvm_vm_ioctl_get_dirty_log_pr(struct kvm *kvm,
mutex_lock(&kvm->slots_lock);
- r = kvm_get_dirty_log(kvm, log, &is_dirty);
+ r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot);
if (r)
goto out;
/* If nothing is dirty, don't bother messing with page tables. */
if (is_dirty) {
- memslot = id_to_memslot(kvm->memslots, log->slot);
-
ga = memslot->base_gfn << PAGE_SHIFT;
ga_end = ga + (memslot->npages << PAGE_SHIFT);
@@ -1571,32 +1901,26 @@ static void kvmppc_core_flush_memslot_pr(struct kvm *kvm,
}
static int kvmppc_core_prepare_memory_region_pr(struct kvm *kvm,
- struct kvm_memory_slot *memslot,
- struct kvm_userspace_memory_region *mem)
+ const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new,
+ enum kvm_mr_change change)
{
return 0;
}
static void kvmppc_core_commit_memory_region_pr(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem,
- const struct kvm_memory_slot *old)
+ struct kvm_memory_slot *old,
+ const struct kvm_memory_slot *new,
+ enum kvm_mr_change change)
{
return;
}
-static void kvmppc_core_free_memslot_pr(struct kvm_memory_slot *free,
- struct kvm_memory_slot *dont)
+static void kvmppc_core_free_memslot_pr(struct kvm_memory_slot *slot)
{
return;
}
-static int kvmppc_core_create_memslot_pr(struct kvm_memory_slot *slot,
- unsigned long npages)
-{
- return 0;
-}
-
-
#ifdef CONFIG_PPC64
static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm,
struct kvm_ppc_smmu_info *info)
@@ -1643,12 +1967,24 @@ static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm,
return 0;
}
+
+static int kvm_configure_mmu_pr(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg)
+{
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ return -ENODEV;
+ /* Require flags and process table base and size to all be zero. */
+ if (cfg->flags || cfg->process_table)
+ return -EINVAL;
+ return 0;
+}
+
#else
static int kvm_vm_ioctl_get_smmu_info_pr(struct kvm *kvm,
struct kvm_ppc_smmu_info *info)
{
/* We should not get called */
BUG();
+ return 0;
}
#endif /* CONFIG_PPC64 */
@@ -1667,7 +2003,7 @@ static int kvmppc_core_init_vm_pr(struct kvm *kvm)
if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
spin_lock(&kvm_global_user_count_lock);
if (++kvm_global_user_count == 1)
- pSeries_disable_reloc_on_exc();
+ pseries_disable_reloc_on_exc();
spin_unlock(&kvm_global_user_count_lock);
}
return 0;
@@ -1683,19 +2019,26 @@ static void kvmppc_core_destroy_vm_pr(struct kvm *kvm)
spin_lock(&kvm_global_user_count_lock);
BUG_ON(kvm_global_user_count == 0);
if (--kvm_global_user_count == 0)
- pSeries_enable_reloc_on_exc();
+ pseries_enable_reloc_on_exc();
spin_unlock(&kvm_global_user_count_lock);
}
}
static int kvmppc_core_check_processor_compat_pr(void)
{
- /* we are always compatible */
+ /*
+ * PR KVM can work on POWER9 inside a guest partition
+ * running in HPT mode. It can't work if we are using
+ * radix translation (because radix provides no way for
+ * a process to have unique translations in quadrant 3).
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_300) && radix_enabled())
+ return -EIO;
return 0;
}
-static long kvm_arch_vm_ioctl_pr(struct file *filp,
- unsigned int ioctl, unsigned long arg)
+static int kvm_arch_vm_ioctl_pr(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
{
return -ENOTTY;
}
@@ -1707,6 +2050,7 @@ static struct kvmppc_ops kvm_ops_pr = {
.set_one_reg = kvmppc_set_one_reg_pr,
.vcpu_load = kvmppc_core_vcpu_load_pr,
.vcpu_put = kvmppc_core_vcpu_put_pr,
+ .inject_interrupt = kvmppc_inject_interrupt_pr,
.set_msr = kvmppc_set_msr_pr,
.vcpu_run = kvmppc_vcpu_run_pr,
.vcpu_create = kvmppc_core_vcpu_create_pr,
@@ -1716,14 +2060,10 @@ static struct kvmppc_ops kvm_ops_pr = {
.flush_memslot = kvmppc_core_flush_memslot_pr,
.prepare_memory_region = kvmppc_core_prepare_memory_region_pr,
.commit_memory_region = kvmppc_core_commit_memory_region_pr,
- .unmap_hva = kvm_unmap_hva_pr,
- .unmap_hva_range = kvm_unmap_hva_range_pr,
- .age_hva = kvm_age_hva_pr,
- .test_age_hva = kvm_test_age_hva_pr,
- .set_spte_hva = kvm_set_spte_hva_pr,
- .mmu_destroy = kvmppc_mmu_destroy_pr,
+ .unmap_gfn_range = kvm_unmap_gfn_range_pr,
+ .age_gfn = kvm_age_gfn_pr,
+ .test_age_gfn = kvm_test_age_gfn_pr,
.free_memslot = kvmppc_core_free_memslot_pr,
- .create_memslot = kvmppc_core_create_memslot_pr,
.init_vm = kvmppc_core_init_vm_pr,
.destroy_vm = kvmppc_core_destroy_vm_pr,
.get_smmu_info = kvm_vm_ioctl_get_smmu_info_pr,
@@ -1734,7 +2074,9 @@ static struct kvmppc_ops kvm_ops_pr = {
.arch_vm_ioctl = kvm_arch_vm_ioctl_pr,
#ifdef CONFIG_PPC_BOOK3S_64
.hcall_implemented = kvmppc_hcall_impl_pr,
+ .configure_mmu = kvm_configure_mmu_pr,
#endif
+ .giveup_ext = kvmppc_giveup_ext,
};
@@ -1767,6 +2109,7 @@ void kvmppc_book3s_exit_pr(void)
module_init(kvmppc_book3s_init_pr);
module_exit(kvmppc_book3s_exit_pr);
+MODULE_DESCRIPTION("KVM on Book3S without using hypervisor mode");
MODULE_LICENSE("GPL");
MODULE_ALIAS_MISCDEV(KVM_MINOR);
MODULE_ALIAS("devname:kvm");
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
index ce3c893d509b..b2c89e850d7a 100644
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2011. Freescale Inc. All rights reserved.
*
@@ -9,15 +10,11 @@
*
* Hypercall handling for running PAPR guests in PR KVM on Book 3S
* processors.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
*/
#include <linux/anon_inodes.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
@@ -50,7 +47,9 @@ static int kvmppc_h_pr_enter(struct kvm_vcpu *vcpu)
pteg_addr = get_pteg_addr(vcpu, pte_index);
mutex_lock(&vcpu->kvm->arch.hpt_mutex);
- copy_from_user(pteg, (void __user *)pteg_addr, sizeof(pteg));
+ ret = H_FUNCTION;
+ if (copy_from_user(pteg, (void __user *)pteg_addr, sizeof(pteg)))
+ goto done;
hpte = pteg;
ret = H_PTEG_FULL;
@@ -71,7 +70,9 @@ static int kvmppc_h_pr_enter(struct kvm_vcpu *vcpu)
hpte[0] = cpu_to_be64(kvmppc_get_gpr(vcpu, 6));
hpte[1] = cpu_to_be64(kvmppc_get_gpr(vcpu, 7));
pteg_addr += i * HPTE_SIZE;
- copy_to_user((void __user *)pteg_addr, hpte, HPTE_SIZE);
+ ret = H_FUNCTION;
+ if (copy_to_user((void __user *)pteg_addr, hpte, HPTE_SIZE))
+ goto done;
kvmppc_set_gpr(vcpu, 4, pte_index | i);
ret = H_SUCCESS;
@@ -93,7 +94,9 @@ static int kvmppc_h_pr_remove(struct kvm_vcpu *vcpu)
pteg = get_pteg_addr(vcpu, pte_index);
mutex_lock(&vcpu->kvm->arch.hpt_mutex);
- copy_from_user(pte, (void __user *)pteg, sizeof(pte));
+ ret = H_FUNCTION;
+ if (copy_from_user(pte, (void __user *)pteg, sizeof(pte)))
+ goto done;
pte[0] = be64_to_cpu((__force __be64)pte[0]);
pte[1] = be64_to_cpu((__force __be64)pte[1]);
@@ -103,7 +106,9 @@ static int kvmppc_h_pr_remove(struct kvm_vcpu *vcpu)
((flags & H_ANDCOND) && (pte[0] & avpn) != 0))
goto done;
- copy_to_user((void __user *)pteg, &v, sizeof(v));
+ ret = H_FUNCTION;
+ if (copy_to_user((void __user *)pteg, &v, sizeof(v)))
+ goto done;
rb = compute_tlbie_rb(pte[0], pte[1], pte_index);
vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false);
@@ -171,7 +176,10 @@ static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu)
}
pteg = get_pteg_addr(vcpu, tsh & H_BULK_REMOVE_PTEX);
- copy_from_user(pte, (void __user *)pteg, sizeof(pte));
+ if (copy_from_user(pte, (void __user *)pteg, sizeof(pte))) {
+ ret = H_FUNCTION;
+ break;
+ }
pte[0] = be64_to_cpu((__force __be64)pte[0]);
pte[1] = be64_to_cpu((__force __be64)pte[1]);
@@ -184,7 +192,10 @@ static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu)
tsh |= H_BULK_REMOVE_NOT_FOUND;
} else {
/* Splat the pteg in (userland) hpt */
- copy_to_user((void __user *)pteg, &v, sizeof(v));
+ if (copy_to_user((void __user *)pteg, &v, sizeof(v))) {
+ ret = H_FUNCTION;
+ break;
+ }
rb = compute_tlbie_rb(pte[0], pte[1],
tsh & H_BULK_REMOVE_PTEX);
@@ -211,7 +222,9 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu)
pteg = get_pteg_addr(vcpu, pte_index);
mutex_lock(&vcpu->kvm->arch.hpt_mutex);
- copy_from_user(pte, (void __user *)pteg, sizeof(pte));
+ ret = H_FUNCTION;
+ if (copy_from_user(pte, (void __user *)pteg, sizeof(pte)))
+ goto done;
pte[0] = be64_to_cpu((__force __be64)pte[0]);
pte[1] = be64_to_cpu((__force __be64)pte[1]);
@@ -234,7 +247,9 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu)
vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false);
pte[0] = (__force u64)cpu_to_be64(pte[0]);
pte[1] = (__force u64)cpu_to_be64(pte[1]);
- copy_to_user((void __user *)pteg, pte, sizeof(pte));
+ ret = H_FUNCTION;
+ if (copy_to_user((void __user *)pteg, pte, sizeof(pte)))
+ goto done;
ret = H_SUCCESS;
done:
@@ -244,6 +259,45 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu)
return EMULATE_DONE;
}
+static int kvmppc_h_pr_logical_ci_load(struct kvm_vcpu *vcpu)
+{
+ long rc;
+
+ rc = kvmppc_h_logical_ci_load(vcpu);
+ if (rc == H_TOO_HARD)
+ return EMULATE_FAIL;
+ kvmppc_set_gpr(vcpu, 3, rc);
+ return EMULATE_DONE;
+}
+
+static int kvmppc_h_pr_logical_ci_store(struct kvm_vcpu *vcpu)
+{
+ long rc;
+
+ rc = kvmppc_h_logical_ci_store(vcpu);
+ if (rc == H_TOO_HARD)
+ return EMULATE_FAIL;
+ kvmppc_set_gpr(vcpu, 3, rc);
+ return EMULATE_DONE;
+}
+
+static int kvmppc_h_pr_set_mode(struct kvm_vcpu *vcpu)
+{
+ unsigned long mflags = kvmppc_get_gpr(vcpu, 4);
+ unsigned long resource = kvmppc_get_gpr(vcpu, 5);
+
+ if (resource == H_SET_MODE_RESOURCE_ADDR_TRANS_MODE) {
+ /* KVM PR does not provide AIL!=0 to guests */
+ if (mflags == 0)
+ kvmppc_set_gpr(vcpu, 3, H_SUCCESS);
+ else
+ kvmppc_set_gpr(vcpu, 3, H_UNSUPPORTED_FLAG_START - 63);
+ return EMULATE_DONE;
+ }
+ return EMULATE_FAIL;
+}
+
+#ifdef CONFIG_SPAPR_TCE_IOMMU
static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu)
{
unsigned long liobn = kvmppc_get_gpr(vcpu, 4);
@@ -258,6 +312,54 @@ static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu)
return EMULATE_DONE;
}
+static int kvmppc_h_pr_put_tce_indirect(struct kvm_vcpu *vcpu)
+{
+ unsigned long liobn = kvmppc_get_gpr(vcpu, 4);
+ unsigned long ioba = kvmppc_get_gpr(vcpu, 5);
+ unsigned long tce = kvmppc_get_gpr(vcpu, 6);
+ unsigned long npages = kvmppc_get_gpr(vcpu, 7);
+ long rc;
+
+ rc = kvmppc_h_put_tce_indirect(vcpu, liobn, ioba,
+ tce, npages);
+ if (rc == H_TOO_HARD)
+ return EMULATE_FAIL;
+ kvmppc_set_gpr(vcpu, 3, rc);
+ return EMULATE_DONE;
+}
+
+static int kvmppc_h_pr_stuff_tce(struct kvm_vcpu *vcpu)
+{
+ unsigned long liobn = kvmppc_get_gpr(vcpu, 4);
+ unsigned long ioba = kvmppc_get_gpr(vcpu, 5);
+ unsigned long tce_value = kvmppc_get_gpr(vcpu, 6);
+ unsigned long npages = kvmppc_get_gpr(vcpu, 7);
+ long rc;
+
+ rc = kvmppc_h_stuff_tce(vcpu, liobn, ioba, tce_value, npages);
+ if (rc == H_TOO_HARD)
+ return EMULATE_FAIL;
+ kvmppc_set_gpr(vcpu, 3, rc);
+ return EMULATE_DONE;
+}
+
+#else /* CONFIG_SPAPR_TCE_IOMMU */
+static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu)
+{
+ return EMULATE_FAIL;
+}
+
+static int kvmppc_h_pr_put_tce_indirect(struct kvm_vcpu *vcpu)
+{
+ return EMULATE_FAIL;
+}
+
+static int kvmppc_h_pr_stuff_tce(struct kvm_vcpu *vcpu)
+{
+ return EMULATE_FAIL;
+}
+#endif /* CONFIG_SPAPR_TCE_IOMMU */
+
static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
{
long rc = kvmppc_xics_hcall(vcpu, cmd);
@@ -284,12 +386,21 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
return kvmppc_h_pr_bulk_remove(vcpu);
case H_PUT_TCE:
return kvmppc_h_pr_put_tce(vcpu);
+ case H_PUT_TCE_INDIRECT:
+ return kvmppc_h_pr_put_tce_indirect(vcpu);
+ case H_STUFF_TCE:
+ return kvmppc_h_pr_stuff_tce(vcpu);
case H_CEDE:
kvmppc_set_msr_fast(vcpu, kvmppc_get_msr(vcpu) | MSR_EE);
- kvm_vcpu_block(vcpu);
- clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
- vcpu->stat.halt_wakeup++;
+ kvm_vcpu_halt(vcpu);
+ vcpu->stat.generic.halt_wakeup++;
return EMULATE_DONE;
+ case H_LOGICAL_CI_LOAD:
+ return kvmppc_h_pr_logical_ci_load(vcpu);
+ case H_LOGICAL_CI_STORE:
+ return kvmppc_h_pr_logical_ci_store(vcpu);
+ case H_SET_MODE:
+ return kvmppc_h_pr_set_mode(vcpu);
case H_XIRR:
case H_CPPR:
case H_EOI:
@@ -321,8 +432,16 @@ int kvmppc_hcall_impl_pr(unsigned long cmd)
case H_REMOVE:
case H_PROTECT:
case H_BULK_REMOVE:
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+ case H_GET_TCE:
case H_PUT_TCE:
+ case H_PUT_TCE_INDIRECT:
+ case H_STUFF_TCE:
+#endif
case H_CEDE:
+ case H_LOGICAL_CI_LOAD:
+ case H_LOGICAL_CI_STORE:
+ case H_SET_MODE:
#ifdef CONFIG_KVM_XICS
case H_XIRR:
case H_CPPR:
@@ -347,8 +466,12 @@ static unsigned int default_hcall_list[] = {
H_REMOVE,
H_PROTECT,
H_BULK_REMOVE,
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+ H_GET_TCE,
H_PUT_TCE,
+#endif
H_CEDE,
+ H_SET_MODE,
#ifdef CONFIG_KVM_XICS
H_XIRR,
H_CPPR,
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
index 16c4d88ba27d..0a557ffca9fe 100644
--- a/arch/powerpc/kvm/book3s_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -1,16 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright SUSE Linux Products GmbH 2009
*
@@ -23,6 +12,7 @@
#include <asm/mmu.h>
#include <asm/page.h>
#include <asm/asm-offsets.h>
+#include <asm/asm-compat.h>
#ifdef CONFIG_PPC_BOOK3S_64
#include <asm/exception-64s.h>
@@ -36,7 +26,7 @@
#if defined(CONFIG_PPC_BOOK3S_64)
-#if defined(_CALL_ELF) && _CALL_ELF == 2
+#ifdef CONFIG_PPC64_ELF_ABI_V2
#define FUNC(name) name
#else
#define FUNC(name) GLUE(.,name)
@@ -46,6 +36,9 @@
#define FUNC(name) name
+#define RFI_TO_KERNEL rfi
+#define RFI_TO_GUEST rfi
+
.macro INTERRUPT_TRAMPOLINE intno
.global kvmppc_trampoline_\intno
@@ -130,6 +123,7 @@ INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_ALTIVEC
kvmppc_handler_skip_ins:
/* Patch the IP to the next instruction */
+ /* Note that prefixed instructions are disabled in PR KVM for now */
mfsrr0 r12
addi r12, r12, 4
mtsrr0 r12
@@ -141,7 +135,7 @@ kvmppc_handler_skip_ins:
GET_SCRATCH0(r13)
/* And get back into the code */
- RFI
+ RFI_TO_KERNEL
#endif
/*
@@ -164,6 +158,6 @@ _GLOBAL_TOC(kvmppc_entry_trampoline)
ori r5, r5, MSR_EE
mtsrr0 r7
mtsrr1 r6
- RFI
+ RFI_TO_KERNEL
#include "book3s_segment.S"
diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c
index ef27fbd5d9c5..6808bda0dbc1 100644
--- a/arch/powerpc/kvm/book3s_rtas.c
+++ b/arch/powerpc/kvm/book3s_rtas.c
@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright 2012 Michael Ellerman, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
*/
#include <linux/kernel.h>
@@ -11,11 +8,12 @@
#include <linux/kvm.h>
#include <linux/err.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/kvm_book3s.h>
#include <asm/kvm_ppc.h>
#include <asm/hvcall.h>
#include <asm/rtas.h>
+#include <asm/xive.h>
#ifdef CONFIG_KVM_XICS
static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
@@ -32,7 +30,10 @@ static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
server = be32_to_cpu(args->args[1]);
priority = be32_to_cpu(args->args[2]);
- rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority);
+ if (xics_on_xive())
+ rc = kvmppc_xive_set_xive(vcpu->kvm, irq, server, priority);
+ else
+ rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority);
if (rc)
rc = -3;
out:
@@ -52,7 +53,10 @@ static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
irq = be32_to_cpu(args->args[0]);
server = priority = 0;
- rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority);
+ if (xics_on_xive())
+ rc = kvmppc_xive_get_xive(vcpu->kvm, irq, &server, &priority);
+ else
+ rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority);
if (rc) {
rc = -3;
goto out;
@@ -76,7 +80,10 @@ static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args)
irq = be32_to_cpu(args->args[0]);
- rc = kvmppc_xics_int_off(vcpu->kvm, irq);
+ if (xics_on_xive())
+ rc = kvmppc_xive_int_off(vcpu->kvm, irq);
+ else
+ rc = kvmppc_xics_int_off(vcpu->kvm, irq);
if (rc)
rc = -3;
out:
@@ -95,7 +102,10 @@ static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args)
irq = be32_to_cpu(args->args[0]);
- rc = kvmppc_xics_int_on(vcpu->kvm, irq);
+ if (xics_on_xive())
+ rc = kvmppc_xive_int_on(vcpu->kvm, irq);
+ else
+ rc = kvmppc_xics_int_on(vcpu->kvm, irq);
if (rc)
rc = -3;
out:
@@ -133,7 +143,7 @@ static int rtas_token_undefine(struct kvm *kvm, char *name)
{
struct rtas_token_definition *d, *tmp;
- lockdep_assert_held(&kvm->lock);
+ lockdep_assert_held(&kvm->arch.rtas_token_lock);
list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) {
if (rtas_name_matches(d->handler->name, name)) {
@@ -154,7 +164,7 @@ static int rtas_token_define(struct kvm *kvm, char *name, u64 token)
bool found;
int i;
- lockdep_assert_held(&kvm->lock);
+ lockdep_assert_held(&kvm->arch.rtas_token_lock);
list_for_each_entry(d, &kvm->arch.rtas_tokens, list) {
if (d->token == token)
@@ -193,14 +203,14 @@ int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp)
if (copy_from_user(&args, argp, sizeof(args)))
return -EFAULT;
- mutex_lock(&kvm->lock);
+ mutex_lock(&kvm->arch.rtas_token_lock);
if (args.token)
rc = rtas_token_define(kvm, args.name, args.token);
else
rc = rtas_token_undefine(kvm, args.name);
- mutex_unlock(&kvm->lock);
+ mutex_unlock(&kvm->arch.rtas_token_lock);
return rc;
}
@@ -219,7 +229,9 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu)
*/
args_phys = kvmppc_get_gpr(vcpu, 4) & KVM_PAM;
+ kvm_vcpu_srcu_read_lock(vcpu);
rc = kvm_read_guest(vcpu->kvm, args_phys, &args, sizeof(args));
+ kvm_vcpu_srcu_read_unlock(vcpu);
if (rc)
goto fail;
@@ -230,9 +242,20 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu)
* value so we can restore it on the way out.
*/
orig_rets = args.rets;
+ if (be32_to_cpu(args.nargs) >= ARRAY_SIZE(args.args)) {
+ /*
+ * Don't overflow our args array: ensure there is room for
+ * at least rets[0] (even if the call specifies 0 nret).
+ *
+ * Each handler must then check for the correct nargs and nret
+ * values, but they may always return failure in rets[0].
+ */
+ rc = -EINVAL;
+ goto fail;
+ }
args.rets = &args.args[be32_to_cpu(args.nargs)];
- mutex_lock(&vcpu->kvm->lock);
+ mutex_lock(&vcpu->kvm->arch.rtas_token_lock);
rc = -ENOENT;
list_for_each_entry(d, &vcpu->kvm->arch.rtas_tokens, list) {
@@ -243,7 +266,7 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu)
}
}
- mutex_unlock(&vcpu->kvm->lock);
+ mutex_unlock(&vcpu->kvm->arch.rtas_token_lock);
if (rc == 0) {
args.rets = orig_rets;
@@ -257,9 +280,17 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu)
fail:
/*
* We only get here if the guest has called RTAS with a bogus
- * args pointer. That means we can't get to the args, and so we
- * can't fail the RTAS call. So fail right out to userspace,
- * which should kill the guest.
+ * args pointer or nargs/nret values that would overflow the
+ * array. That means we can't get to the args, and so we can't
+ * fail the RTAS call. So fail right out to userspace, which
+ * should kill the guest.
+ *
+ * SLOF should actually pass the hcall return value from the
+ * rtas handler call in r3, so enter_rtas could be modified to
+ * return a failure indication in r3 and we could return such
+ * errors to the guest rather than failing to host userspace.
+ * However old guests that don't test for failure could then
+ * continue silently after errors, so for now we won't do this.
*/
return rc;
}
@@ -269,8 +300,6 @@ void kvmppc_rtas_tokens_free(struct kvm *kvm)
{
struct rtas_token_definition *d, *tmp;
- lockdep_assert_held(&kvm->lock);
-
list_for_each_entry_safe(d, tmp, &kvm->arch.rtas_tokens, list) {
list_del(&d->list);
kfree(d);
diff --git a/arch/powerpc/kvm/book3s_segment.S b/arch/powerpc/kvm/book3s_segment.S
index acee37cde840..202046a83fc1 100644
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -1,16 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright SUSE Linux Products GmbH 2010
*
@@ -19,6 +8,9 @@
/* Real mode helpers */
+#include <asm/asm-compat.h>
+#include <asm/feature-fixups.h>
+
#if defined(CONFIG_PPC_BOOK3S_64)
#define GET_SHADOW_VCPU(reg) \
@@ -123,7 +115,7 @@ no_dcbz32_on:
PPC_LL r8, SVCPU_CTR(r3)
PPC_LL r9, SVCPU_LR(r3)
lwz r10, SVCPU_CR(r3)
- lwz r11, SVCPU_XER(r3)
+ PPC_LL r11, SVCPU_XER(r3)
mtctr r8
mtlr r9
@@ -156,7 +148,7 @@ no_dcbz32_on:
PPC_LL r9, SVCPU_R9(r3)
PPC_LL r3, (SVCPU_R3)(r3)
- RFI
+ RFI_TO_GUEST
kvmppc_handler_trampoline_enter_end:
@@ -167,20 +159,34 @@ kvmppc_handler_trampoline_enter_end:
* *
*****************************************************************************/
-.global kvmppc_handler_trampoline_exit
-kvmppc_handler_trampoline_exit:
-
.global kvmppc_interrupt_pr
kvmppc_interrupt_pr:
+ /* 64-bit entry. Register usage at this point:
+ *
+ * SPRG_SCRATCH0 = guest R13
+ * R9 = HSTATE_IN_GUEST
+ * R12 = (guest CR << 32) | exit handler id
+ * R13 = PACA
+ * HSTATE.SCRATCH0 = guest R12
+ * HSTATE.SCRATCH2 = guest R9
+ */
+#ifdef CONFIG_PPC64
+ /* Match 32-bit entry */
+ ld r9,HSTATE_SCRATCH2(r13)
+ rotldi r12, r12, 32 /* Flip R12 halves for stw */
+ stw r12, HSTATE_SCRATCH1(r13) /* CR is now in the low half */
+ srdi r12, r12, 32 /* shift trap into low half */
+#endif
+.global kvmppc_handler_trampoline_exit
+kvmppc_handler_trampoline_exit:
/* Register usage at this point:
*
- * SPRG_SCRATCH0 = guest R13
- * R12 = exit handler id
- * R13 = shadow vcpu (32-bit) or PACA (64-bit)
+ * SPRG_SCRATCH0 = guest R13
+ * R12 = exit handler id
+ * R13 = shadow vcpu (32-bit) or PACA (64-bit)
* HSTATE.SCRATCH0 = guest R12
* HSTATE.SCRATCH1 = guest CR
- *
*/
/* Save registers */
@@ -237,7 +243,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
mfctr r8
mflr r9
- stw r5, SVCPU_XER(r13)
+ PPC_STL r5, SVCPU_XER(r13)
PPC_STL r6, SVCPU_FAULT_DAR(r13)
stw r7, SVCPU_FAULT_DSISR(r13)
PPC_STL r8, SVCPU_CTR(r13)
@@ -365,6 +371,19 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
*/
PPC_LL r6, HSTATE_HOST_MSR(r13)
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ /*
+ * We don't want to change MSR[TS] bits via rfi here.
+ * The actual TM handling logic will be in host with
+ * recovered DR/IR bits after HSTATE_VMHANDLER.
+ * And MSR_TM can be enabled in HOST_MSR so rfid may
+ * not suppress this change and can lead to exception.
+ * Manually set MSR to prevent TS state change here.
+ */
+ mfmsr r7
+ rldicl r7, r7, 64 - MSR_TS_S_LG, 62
+ rldimi r6, r7, MSR_TS_S_LG, 63 - MSR_TS_T_LG
+#endif
PPC_LL r8, HSTATE_VMHANDLER(r13)
#ifdef CONFIG_PPC64
@@ -389,5 +408,5 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
cmpwi r12, BOOK3S_INTERRUPT_DOORBELL
beqa BOOK3S_INTERRUPT_DOORBELL
- RFI
+ RFI_TO_KERNEL
kvmppc_handler_trampoline_exit_end:
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index eaeb78047fb8..589a8f257120 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright 2012 Michael Ellerman, IBM Corporation.
* Copyright 2012 Benjamin Herrenschmidt, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
*/
#include <linux/kernel.h>
@@ -12,16 +9,16 @@
#include <linux/err.h>
#include <linux/gfp.h>
#include <linux/anon_inodes.h>
+#include <linux/spinlock.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
-#include <asm/uaccess.h>
#include <asm/kvm_book3s.h>
#include <asm/kvm_ppc.h>
#include <asm/hvcall.h>
#include <asm/xics.h>
-#include <asm/debug.h>
#include <asm/time.h>
-#include <linux/debugfs.h>
#include <linux/seq_file.h>
#include "book3s_xics.h"
@@ -39,8 +36,8 @@
* LOCKING
* =======
*
- * Each ICS has a mutex protecting the information about the IRQ
- * sources and avoiding simultaneous deliveries if the same interrupt.
+ * Each ICS has a spin lock protecting the information about the IRQ
+ * sources and avoiding simultaneous deliveries of the same interrupt.
*
* ICP operations are done via a single compare & swap transaction
* (most ICP state fits in the union kvmppc_icp_state)
@@ -62,7 +59,7 @@
/* -- ICS routines -- */
static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
- u32 new_irq);
+ u32 new_irq, bool check_resend);
/*
* Return value ideally indicates how the interrupt was handled, but no
@@ -74,6 +71,7 @@ static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level)
struct ics_irq_state *state;
struct kvmppc_ics *ics;
u16 src;
+ u32 pq_old, pq_new;
XICS_DBG("ics deliver %#x (level: %d)\n", irq, level);
@@ -86,20 +84,40 @@ static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level)
if (!state->exists)
return -EINVAL;
+ if (level == KVM_INTERRUPT_SET_LEVEL || level == KVM_INTERRUPT_SET)
+ level = 1;
+ else if (level == KVM_INTERRUPT_UNSET)
+ level = 0;
/*
- * We set state->asserted locklessly. This should be fine as
- * we are the only setter, thus concurrent access is undefined
- * to begin with.
+ * Take other values the same as 1, consistent with original code.
+ * maybe WARN here?
*/
- if (level == 1 || level == KVM_INTERRUPT_SET_LEVEL)
- state->asserted = 1;
- else if (level == 0 || level == KVM_INTERRUPT_UNSET) {
- state->asserted = 0;
+
+ if (!state->lsi && level == 0) /* noop for MSI */
return 0;
- }
- /* Attempt delivery */
- icp_deliver_irq(xics, NULL, irq);
+ do {
+ pq_old = state->pq_state;
+ if (state->lsi) {
+ if (level) {
+ if (pq_old & PQ_PRESENTED)
+ /* Setting already set LSI ... */
+ return 0;
+
+ pq_new = PQ_PRESENTED;
+ } else
+ pq_new = 0;
+ } else
+ pq_new = ((pq_old << 1) & 3) | PQ_PRESENTED;
+ } while (cmpxchg(&state->pq_state, pq_old, pq_new) != pq_old);
+
+ /* Test P=1, Q=0, this is the only case where we present */
+ if (pq_new == PQ_PRESENTED)
+ icp_deliver_irq(xics, NULL, irq, false);
+
+ /* Record which CPU this arrived on for passed-through interrupts */
+ if (state->host_irq)
+ state->intr_cpu = raw_smp_processor_id();
return 0;
}
@@ -109,23 +127,14 @@ static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
{
int i;
- mutex_lock(&ics->lock);
-
for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
struct ics_irq_state *state = &ics->irq_state[i];
-
- if (!state->resend)
- continue;
-
- XICS_DBG("resend %#x prio %#x\n", state->number,
- state->priority);
-
- mutex_unlock(&ics->lock);
- icp_deliver_irq(xics, icp, state->number);
- mutex_lock(&ics->lock);
+ if (state->resend) {
+ XICS_DBG("resend %#x prio %#x\n", state->number,
+ state->priority);
+ icp_deliver_irq(xics, icp, state->number, true);
+ }
}
-
- mutex_unlock(&ics->lock);
}
static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
@@ -133,8 +142,10 @@ static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
u32 server, u32 priority, u32 saved_priority)
{
bool deliver;
+ unsigned long flags;
- mutex_lock(&ics->lock);
+ local_irq_save(flags);
+ arch_spin_lock(&ics->lock);
state->server = server;
state->priority = priority;
@@ -142,10 +153,12 @@ static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
deliver = false;
if ((state->masked_pending || state->resend) && priority != MASKED) {
state->masked_pending = 0;
+ state->resend = 0;
deliver = true;
}
- mutex_unlock(&ics->lock);
+ arch_spin_unlock(&ics->lock);
+ local_irq_restore(flags);
return deliver;
}
@@ -175,7 +188,7 @@ int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority)
state->masked_pending, state->resend);
if (write_xive(xics, ics, state, server, priority, priority))
- icp_deliver_irq(xics, icp, irq);
+ icp_deliver_irq(xics, icp, irq, false);
return 0;
}
@@ -186,6 +199,7 @@ int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 *priority)
struct kvmppc_ics *ics;
struct ics_irq_state *state;
u16 src;
+ unsigned long flags;
if (!xics)
return -ENODEV;
@@ -195,10 +209,12 @@ int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 *priority)
return -EINVAL;
state = &ics->irq_state[src];
- mutex_lock(&ics->lock);
+ local_irq_save(flags);
+ arch_spin_lock(&ics->lock);
*server = state->server;
*priority = state->priority;
- mutex_unlock(&ics->lock);
+ arch_spin_unlock(&ics->lock);
+ local_irq_restore(flags);
return 0;
}
@@ -225,7 +241,7 @@ int kvmppc_xics_int_on(struct kvm *kvm, u32 irq)
if (write_xive(xics, ics, state, state->server, state->saved_priority,
state->saved_priority))
- icp_deliver_irq(xics, icp, irq);
+ icp_deliver_irq(xics, icp, irq, false);
return 0;
}
@@ -267,7 +283,7 @@ static inline bool icp_try_update(struct kvmppc_icp *icp,
if (!success)
goto bail;
- XICS_DBG("UPD [%04x] - C:%02x M:%02x PP: %02x PI:%06x R:%d O:%d\n",
+ XICS_DBG("UPD [%04lx] - C:%02x M:%02x PP: %02x PI:%06x R:%d O:%d\n",
icp->server_num,
old.cppr, old.mfrr, old.pending_pri, old.xisr,
old.need_resend, old.out_ee);
@@ -291,7 +307,7 @@ static inline bool icp_try_update(struct kvmppc_icp *icp,
*/
if (new.out_ee) {
kvmppc_book3s_queue_irqprio(icp->vcpu,
- BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
+ BOOK3S_INTERRUPT_EXTERNAL);
if (!change_self)
kvmppc_fast_vcpu_kick(icp->vcpu);
}
@@ -323,11 +339,11 @@ static bool icp_try_to_deliver(struct kvmppc_icp *icp, u32 irq, u8 priority,
union kvmppc_icp_state old_state, new_state;
bool success;
- XICS_DBG("try deliver %#x(P:%#x) to server %#x\n", irq, priority,
+ XICS_DBG("try deliver %#x(P:%#x) to server %#lx\n", irq, priority,
icp->server_num);
do {
- old_state = new_state = ACCESS_ONCE(icp->state);
+ old_state = new_state = READ_ONCE(icp->state);
*reject = 0;
@@ -359,12 +375,13 @@ static bool icp_try_to_deliver(struct kvmppc_icp *icp, u32 irq, u8 priority,
}
static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
- u32 new_irq)
+ u32 new_irq, bool check_resend)
{
struct ics_irq_state *state;
struct kvmppc_ics *ics;
u32 reject;
u16 src;
+ unsigned long flags;
/*
* This is used both for initial delivery of an interrupt and
@@ -391,7 +408,8 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
state = &ics->irq_state[src];
/* Get a lock on the ICS */
- mutex_lock(&ics->lock);
+ local_irq_save(flags);
+ arch_spin_lock(&ics->lock);
/* Get our server */
if (!icp || state->server != icp->server_num) {
@@ -403,6 +421,10 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
}
}
+ if (check_resend)
+ if (!state->resend)
+ goto out;
+
/* Clear the resend bit of that interrupt */
state->resend = 0;
@@ -417,7 +439,7 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
* the whole masked_pending business which is about not
* losing interrupts that occur while masked.
*
- * I don't differenciate normal deliveries and resends, this
+ * I don't differentiate normal deliveries and resends, this
* implementation will differ from PAPR and not lose such
* interrupts.
*/
@@ -434,13 +456,13 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
*
* Note that if successful, the new delivery might have itself
* rejected an interrupt that was "delivered" before we took the
- * icp mutex.
+ * ics spin lock.
*
* In this case we do the whole sequence all over again for the
* new guy. We cannot assume that the rejected interrupt is less
* favored than the new one, and thus doesn't need to be delivered,
* because by the time we exit icp_try_to_deliver() the target
- * processor may well have alrady consumed & completed it, and thus
+ * processor may well have already consumed & completed it, and thus
* the rejected interrupt might actually be already acceptable.
*/
if (icp_try_to_deliver(icp, new_irq, state->priority, &reject)) {
@@ -448,8 +470,10 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
* Delivery was successful, did we reject somebody else ?
*/
if (reject && reject != XICS_IPI) {
- mutex_unlock(&ics->lock);
+ arch_spin_unlock(&ics->lock);
+ local_irq_restore(flags);
new_irq = reject;
+ check_resend = false;
goto again;
}
} else {
@@ -457,10 +481,16 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
* We failed to deliver the interrupt we need to set the
* resend map bit and mark the ICS state as needing a resend
*/
- set_bit(ics->icsid, icp->resend_map);
state->resend = 1;
/*
+ * Make sure when checking resend, we don't miss the resend
+ * if resend_map bit is seen and cleared.
+ */
+ smp_wmb();
+ set_bit(ics->icsid, icp->resend_map);
+
+ /*
* If the need_resend flag got cleared in the ICP some time
* between icp_try_to_deliver() atomic update and now, then
* we know it might have missed the resend_map bit. So we
@@ -468,12 +498,16 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
*/
smp_mb();
if (!icp->state.need_resend) {
- mutex_unlock(&ics->lock);
+ state->resend = 0;
+ arch_spin_unlock(&ics->lock);
+ local_irq_restore(flags);
+ check_resend = false;
goto again;
}
}
out:
- mutex_unlock(&ics->lock);
+ arch_spin_unlock(&ics->lock);
+ local_irq_restore(flags);
}
static void icp_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
@@ -512,7 +546,7 @@ static void icp_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
* in virtual mode.
*/
do {
- old_state = new_state = ACCESS_ONCE(icp->state);
+ old_state = new_state = READ_ONCE(icp->state);
/* Down_CPPR */
new_state.cppr = new_cppr;
@@ -556,8 +590,7 @@ static noinline unsigned long kvmppc_h_xirr(struct kvm_vcpu *vcpu)
u32 xirr;
/* First, remove EE from the processor */
- kvmppc_book3s_dequeue_irqprio(icp->vcpu,
- BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
+ kvmppc_book3s_dequeue_irqprio(icp->vcpu, BOOK3S_INTERRUPT_EXTERNAL);
/*
* ICP State: Accept_Interrupt
@@ -567,7 +600,7 @@ static noinline unsigned long kvmppc_h_xirr(struct kvm_vcpu *vcpu)
* pending priority
*/
do {
- old_state = new_state = ACCESS_ONCE(icp->state);
+ old_state = new_state = READ_ONCE(icp->state);
xirr = old_state.xisr | (((u32)old_state.cppr) << 24);
if (!old_state.xisr)
@@ -613,13 +646,28 @@ static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
* there might be a previously-rejected interrupt needing
* to be resent.
*
+ * ICP state: Check_IPI
+ *
* If the CPPR is less favored, then we might be replacing
- * an interrupt, and thus need to possibly reject it as in
+ * an interrupt, and thus need to possibly reject it.
*
- * ICP state: Check_IPI
+ * ICP State: IPI
+ *
+ * Besides rejecting any pending interrupts, we also
+ * update XISR and pending_pri to mark IPI as pending.
+ *
+ * PAPR does not describe this state, but if the MFRR is being
+ * made less favored than its earlier value, there might be
+ * a previously-rejected interrupt needing to be resent.
+ * Ideally, we would want to resend only if
+ * prio(pending_interrupt) < mfrr &&
+ * prio(pending_interrupt) < cppr
+ * where pending interrupt is the one that was rejected. But
+ * we don't have that state, so we simply trigger a resend
+ * whenever the MFRR is made less favored.
*/
do {
- old_state = new_state = ACCESS_ONCE(icp->state);
+ old_state = new_state = READ_ONCE(icp->state);
/* Set_MFRR */
new_state.mfrr = mfrr;
@@ -629,13 +677,14 @@ static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
resend = false;
if (mfrr < new_state.cppr) {
/* Reject a pending interrupt if not an IPI */
- if (mfrr <= new_state.pending_pri)
+ if (mfrr <= new_state.pending_pri) {
reject = new_state.xisr;
- new_state.pending_pri = mfrr;
- new_state.xisr = XICS_IPI;
+ new_state.pending_pri = mfrr;
+ new_state.xisr = XICS_IPI;
+ }
}
- if (mfrr > old_state.mfrr && mfrr > new_state.cppr) {
+ if (mfrr > old_state.mfrr) {
resend = new_state.need_resend;
new_state.need_resend = 0;
}
@@ -643,7 +692,7 @@ static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
/* Handle reject */
if (reject && reject != XICS_IPI)
- icp_deliver_irq(xics, icp, reject);
+ icp_deliver_irq(xics, icp, reject, false);
/* Handle resend */
if (resend)
@@ -663,7 +712,7 @@ static int kvmppc_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server)
if (!icp)
return H_PARAMETER;
}
- state = ACCESS_ONCE(icp->state);
+ state = READ_ONCE(icp->state);
kvmppc_set_gpr(vcpu, 4, ((u32)state.cppr << 24) | state.xisr);
kvmppc_set_gpr(vcpu, 5, state.mfrr);
return H_SUCCESS;
@@ -701,11 +750,10 @@ static noinline void kvmppc_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
* We can remove EE from the current processor, the update
* transaction will set it again if needed
*/
- kvmppc_book3s_dequeue_irqprio(icp->vcpu,
- BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
+ kvmppc_book3s_dequeue_irqprio(icp->vcpu, BOOK3S_INTERRUPT_EXTERNAL);
do {
- old_state = new_state = ACCESS_ONCE(icp->state);
+ old_state = new_state = READ_ONCE(icp->state);
reject = 0;
new_state.cppr = cppr;
@@ -723,17 +771,54 @@ static noinline void kvmppc_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
* attempt (see comments in icp_deliver_irq).
*/
if (reject && reject != XICS_IPI)
- icp_deliver_irq(xics, icp, reject);
+ icp_deliver_irq(xics, icp, reject, false);
}
-static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
+static int ics_eoi(struct kvm_vcpu *vcpu, u32 irq)
{
struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
struct kvmppc_icp *icp = vcpu->arch.icp;
struct kvmppc_ics *ics;
struct ics_irq_state *state;
- u32 irq = xirr & 0x00ffffff;
u16 src;
+ u32 pq_old, pq_new;
+
+ /*
+ * ICS EOI handling: For LSI, if P bit is still set, we need to
+ * resend it.
+ *
+ * For MSI, we move Q bit into P (and clear Q). If it is set,
+ * resend it.
+ */
+
+ ics = kvmppc_xics_find_ics(xics, irq, &src);
+ if (!ics) {
+ XICS_DBG("ios_eoi: IRQ 0x%06x not found !\n", irq);
+ return H_PARAMETER;
+ }
+ state = &ics->irq_state[src];
+
+ if (state->lsi)
+ pq_new = state->pq_state;
+ else
+ do {
+ pq_old = state->pq_state;
+ pq_new = pq_old >> 1;
+ } while (cmpxchg(&state->pq_state, pq_old, pq_new) != pq_old);
+
+ if (pq_new & PQ_PRESENTED)
+ icp_deliver_irq(xics, icp, irq, false);
+
+ kvm_notify_acked_irq(vcpu->kvm, 0, irq);
+
+ return H_SUCCESS;
+}
+
+static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
+{
+ struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+ struct kvmppc_icp *icp = vcpu->arch.icp;
+ u32 irq = xirr & 0x00ffffff;
XICS_DBG("h_eoi vcpu %d eoi %#lx\n", vcpu->vcpu_id, xirr);
@@ -742,7 +827,7 @@ static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
*
* Note: If EOI is incorrectly used by SW to lower the CPPR
* value (ie more favored), we do not check for rejection of
- * a pending interrupt, this is a SW error and PAPR sepcifies
+ * a pending interrupt, this is a SW error and PAPR specifies
* that we don't have to deal with it.
*
* The sending of an EOI to the ICS is handled after the
@@ -756,29 +841,11 @@ static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
/* IPIs have no EOI */
if (irq == XICS_IPI)
return H_SUCCESS;
- /*
- * EOI handling: If the interrupt is still asserted, we need to
- * resend it. We can take a lockless "peek" at the ICS state here.
- *
- * "Message" interrupts will never have "asserted" set
- */
- ics = kvmppc_xics_find_ics(xics, irq, &src);
- if (!ics) {
- XICS_DBG("h_eoi: IRQ 0x%06x not found !\n", irq);
- return H_PARAMETER;
- }
- state = &ics->irq_state[src];
-
- /* Still asserted, resend it */
- if (state->asserted)
- icp_deliver_irq(xics, icp, irq);
-
- kvm_notify_acked_irq(vcpu->kvm, 0, irq);
- return H_SUCCESS;
+ return ics_eoi(vcpu, irq);
}
-static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
+int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
{
struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
struct kvmppc_icp *icp = vcpu->arch.icp;
@@ -786,19 +853,24 @@ static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
XICS_DBG("XICS_RM: H_%x completing, act: %x state: %lx tgt: %p\n",
hcall, icp->rm_action, icp->rm_dbgstate.raw, icp->rm_dbgtgt);
- if (icp->rm_action & XICS_RM_KICK_VCPU)
+ if (icp->rm_action & XICS_RM_KICK_VCPU) {
+ icp->n_rm_kick_vcpu++;
kvmppc_fast_vcpu_kick(icp->rm_kick_target);
- if (icp->rm_action & XICS_RM_CHECK_RESEND)
- icp_check_resend(xics, icp);
- if (icp->rm_action & XICS_RM_REJECT)
- icp_deliver_irq(xics, icp, icp->rm_reject);
- if (icp->rm_action & XICS_RM_NOTIFY_EOI)
+ }
+ if (icp->rm_action & XICS_RM_CHECK_RESEND) {
+ icp->n_rm_check_resend++;
+ icp_check_resend(xics, icp->rm_resend_icp);
+ }
+ if (icp->rm_action & XICS_RM_NOTIFY_EOI) {
+ icp->n_rm_notify_eoi++;
kvm_notify_acked_irq(vcpu->kvm, 0, icp->rm_eoied_irq);
+ }
icp->rm_action = 0;
return H_SUCCESS;
}
+EXPORT_SYMBOL_GPL(kvmppc_xics_rm_complete);
int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
{
@@ -850,16 +922,43 @@ EXPORT_SYMBOL_GPL(kvmppc_xics_hcall);
/* -- Initialisation code etc. -- */
+static void xics_debugfs_irqmap(struct seq_file *m,
+ struct kvmppc_passthru_irqmap *pimap)
+{
+ int i;
+
+ if (!pimap)
+ return;
+ seq_printf(m, "========\nPIRQ mappings: %d maps\n===========\n",
+ pimap->n_mapped);
+ for (i = 0; i < pimap->n_mapped; i++) {
+ seq_printf(m, "r_hwirq=%x, v_hwirq=%x\n",
+ pimap->mapped[i].r_hwirq, pimap->mapped[i].v_hwirq);
+ }
+}
+
static int xics_debug_show(struct seq_file *m, void *private)
{
struct kvmppc_xics *xics = m->private;
struct kvm *kvm = xics->kvm;
struct kvm_vcpu *vcpu;
- int icsid, i;
+ int icsid;
+ unsigned long flags, i;
+ unsigned long t_rm_kick_vcpu, t_rm_check_resend;
+ unsigned long t_rm_notify_eoi;
+ unsigned long t_reject, t_check_resend;
if (!kvm)
return 0;
+ t_rm_kick_vcpu = 0;
+ t_rm_notify_eoi = 0;
+ t_rm_check_resend = 0;
+ t_check_resend = 0;
+ t_reject = 0;
+
+ xics_debugfs_irqmap(m, kvm->arch.pimap);
+
seq_printf(m, "=========\nICP state\n=========\n");
kvm_for_each_vcpu(i, vcpu, kvm) {
@@ -869,13 +968,23 @@ static int xics_debug_show(struct seq_file *m, void *private)
if (!icp)
continue;
- state.raw = ACCESS_ONCE(icp->state.raw);
+ state.raw = READ_ONCE(icp->state.raw);
seq_printf(m, "cpu server %#lx XIRR:%#x PPRI:%#x CPPR:%#x MFRR:%#x OUT:%d NR:%d\n",
icp->server_num, state.xisr,
state.pending_pri, state.cppr, state.mfrr,
state.out_ee, state.need_resend);
+ t_rm_kick_vcpu += icp->n_rm_kick_vcpu;
+ t_rm_notify_eoi += icp->n_rm_notify_eoi;
+ t_rm_check_resend += icp->n_rm_check_resend;
+ t_check_resend += icp->n_check_resend;
+ t_reject += icp->n_reject;
}
+ seq_printf(m, "ICP Guest->Host totals: kick_vcpu=%lu check_resend=%lu notify_eoi=%lu\n",
+ t_rm_kick_vcpu, t_rm_check_resend,
+ t_rm_notify_eoi);
+ seq_printf(m, "ICP Real Mode totals: check_resend=%lu resend=%lu\n",
+ t_check_resend, t_reject);
for (icsid = 0; icsid <= KVMPPC_XICS_MAX_ICS_ID; icsid++) {
struct kvmppc_ics *ics = xics->ics[icsid];
@@ -885,49 +994,32 @@ static int xics_debug_show(struct seq_file *m, void *private)
seq_printf(m, "=========\nICS state for ICS 0x%x\n=========\n",
icsid);
- mutex_lock(&ics->lock);
+ local_irq_save(flags);
+ arch_spin_lock(&ics->lock);
for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
struct ics_irq_state *irq = &ics->irq_state[i];
- seq_printf(m, "irq 0x%06x: server %#x prio %#x save prio %#x asserted %d resend %d masked pending %d\n",
+ seq_printf(m, "irq 0x%06x: server %#x prio %#x save prio %#x pq_state %d resend %d masked pending %d\n",
irq->number, irq->server, irq->priority,
- irq->saved_priority, irq->asserted,
+ irq->saved_priority, irq->pq_state,
irq->resend, irq->masked_pending);
}
- mutex_unlock(&ics->lock);
+ arch_spin_unlock(&ics->lock);
+ local_irq_restore(flags);
}
return 0;
}
-static int xics_debug_open(struct inode *inode, struct file *file)
-{
- return single_open(file, xics_debug_show, inode->i_private);
-}
-
-static const struct file_operations xics_debug_fops = {
- .open = xics_debug_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(xics_debug);
static void xics_debugfs_init(struct kvmppc_xics *xics)
{
- char *name;
-
- name = kasprintf(GFP_KERNEL, "kvm-xics-%p", xics);
- if (!name) {
- pr_err("%s: no memory for name\n", __func__);
- return;
- }
-
- xics->dentry = debugfs_create_file(name, S_IRUGO, powerpc_debugfs_root,
+ xics->dentry = debugfs_create_file("xics", 0444, xics->kvm->debugfs_dentry,
xics, &xics_debug_fops);
- pr_debug("%s: created %s\n", __func__, name);
- kfree(name);
+ pr_debug("%s: created\n", __func__);
}
static struct kvmppc_ics *kvmppc_xics_create_ics(struct kvm *kvm,
@@ -949,7 +1041,6 @@ static struct kvmppc_ics *kvmppc_xics_create_ics(struct kvm *kvm,
if (!ics)
goto out;
- mutex_init(&ics->lock);
ics->icsid = icsid;
for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
@@ -968,7 +1059,7 @@ static struct kvmppc_ics *kvmppc_xics_create_ics(struct kvm *kvm,
return xics->ics[icsid];
}
-int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server_num)
+static int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server_num)
{
struct kvmppc_icp *icp;
@@ -1052,8 +1143,7 @@ int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
* Deassert the CPU interrupt request.
* icp_try_update will reassert it if necessary.
*/
- kvmppc_book3s_dequeue_irqprio(icp->vcpu,
- BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
+ kvmppc_book3s_dequeue_irqprio(icp->vcpu, BOOK3S_INTERRUPT_EXTERNAL);
/*
* Note that if we displace an interrupt from old_state.xisr,
@@ -1066,7 +1156,7 @@ int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
* the ICS states before the ICP states.
*/
do {
- old_state = ACCESS_ONCE(icp->state);
+ old_state = READ_ONCE(icp->state);
if (new_state.mfrr <= old_state.mfrr) {
resend = false;
@@ -1091,13 +1181,15 @@ static int xics_get_source(struct kvmppc_xics *xics, long irq, u64 addr)
u64 __user *ubufp = (u64 __user *) addr;
u16 idx;
u64 val, prio;
+ unsigned long flags;
ics = kvmppc_xics_find_ics(xics, irq, &idx);
if (!ics)
return -ENOENT;
irqp = &ics->irq_state[idx];
- mutex_lock(&ics->lock);
+ local_irq_save(flags);
+ arch_spin_lock(&ics->lock);
ret = -ENOENT;
if (irqp->exists) {
val = irqp->server;
@@ -1107,13 +1199,23 @@ static int xics_get_source(struct kvmppc_xics *xics, long irq, u64 addr)
prio = irqp->saved_priority;
}
val |= prio << KVM_XICS_PRIORITY_SHIFT;
- if (irqp->asserted)
- val |= KVM_XICS_LEVEL_SENSITIVE | KVM_XICS_PENDING;
- else if (irqp->masked_pending || irqp->resend)
+ if (irqp->lsi) {
+ val |= KVM_XICS_LEVEL_SENSITIVE;
+ if (irqp->pq_state & PQ_PRESENTED)
+ val |= KVM_XICS_PENDING;
+ } else if (irqp->masked_pending || irqp->resend)
val |= KVM_XICS_PENDING;
+
+ if (irqp->pq_state & PQ_PRESENTED)
+ val |= KVM_XICS_PRESENTED;
+
+ if (irqp->pq_state & PQ_QUEUED)
+ val |= KVM_XICS_QUEUED;
+
ret = 0;
}
- mutex_unlock(&ics->lock);
+ arch_spin_unlock(&ics->lock);
+ local_irq_restore(flags);
if (!ret && put_user(val, ubufp))
ret = -EFAULT;
@@ -1130,6 +1232,7 @@ static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr)
u64 val;
u8 prio;
u32 server;
+ unsigned long flags;
if (irq < KVMPPC_XICS_FIRST_IRQ || irq >= KVMPPC_XICS_NR_IRQS)
return -ENOENT;
@@ -1150,7 +1253,8 @@ static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr)
kvmppc_xics_find_server(xics->kvm, server) == NULL)
return -EINVAL;
- mutex_lock(&ics->lock);
+ local_irq_save(flags);
+ arch_spin_lock(&ics->lock);
irqp->server = server;
irqp->saved_priority = prio;
if (val & KVM_XICS_MASKED)
@@ -1158,35 +1262,35 @@ static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr)
irqp->priority = prio;
irqp->resend = 0;
irqp->masked_pending = 0;
- irqp->asserted = 0;
- if ((val & KVM_XICS_PENDING) && (val & KVM_XICS_LEVEL_SENSITIVE))
- irqp->asserted = 1;
+ irqp->lsi = 0;
+ irqp->pq_state = 0;
+ if (val & KVM_XICS_LEVEL_SENSITIVE)
+ irqp->lsi = 1;
+ /* If PENDING, set P in case P is not saved because of old code */
+ if (val & KVM_XICS_PRESENTED || val & KVM_XICS_PENDING)
+ irqp->pq_state |= PQ_PRESENTED;
+ if (val & KVM_XICS_QUEUED)
+ irqp->pq_state |= PQ_QUEUED;
irqp->exists = 1;
- mutex_unlock(&ics->lock);
+ arch_spin_unlock(&ics->lock);
+ local_irq_restore(flags);
if (val & KVM_XICS_PENDING)
- icp_deliver_irq(xics, NULL, irqp->number);
+ icp_deliver_irq(xics, NULL, irqp->number, false);
return 0;
}
-int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
- bool line_status)
+int kvmppc_xics_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
+ bool line_status)
{
struct kvmppc_xics *xics = kvm->arch.xics;
+ if (!xics)
+ return -ENODEV;
return ics_deliver_irq(xics, irq, level);
}
-int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm,
- int irq_source_id, int level, bool line_status)
-{
- if (!level)
- return -1;
- return kvm_set_irq(kvm, irq_source_id, irq_entry->gsi,
- level, line_status);
-}
-
static int xics_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
{
struct kvmppc_xics *xics = dev->private;
@@ -1221,54 +1325,101 @@ static int xics_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
return -ENXIO;
}
-static void kvmppc_xics_free(struct kvm_device *dev)
+/*
+ * Called when device fd is closed. kvm->lock is held.
+ */
+static void kvmppc_xics_release(struct kvm_device *dev)
{
struct kvmppc_xics *xics = dev->private;
- int i;
+ unsigned long i;
struct kvm *kvm = xics->kvm;
+ struct kvm_vcpu *vcpu;
+
+ pr_devel("Releasing xics device\n");
+
+ /*
+ * Since this is the device release function, we know that
+ * userspace does not have any open fd referring to the
+ * device. Therefore there can not be any of the device
+ * attribute set/get functions being executed concurrently,
+ * and similarly, the connect_vcpu and set/clr_mapped
+ * functions also cannot be being executed.
+ */
debugfs_remove(xics->dentry);
+ /*
+ * We should clean up the vCPU interrupt presenters first.
+ */
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ /*
+ * Take vcpu->mutex to ensure that no one_reg get/set ioctl
+ * (i.e. kvmppc_xics_[gs]et_icp) can be done concurrently.
+ * Holding the vcpu->mutex also means that execution is
+ * excluded for the vcpu until the ICP was freed. When the vcpu
+ * can execute again, vcpu->arch.icp and vcpu->arch.irq_type
+ * have been cleared and the vcpu will not be going into the
+ * XICS code anymore.
+ */
+ mutex_lock(&vcpu->mutex);
+ kvmppc_xics_free_icp(vcpu);
+ mutex_unlock(&vcpu->mutex);
+ }
+
if (kvm)
kvm->arch.xics = NULL;
- for (i = 0; i <= xics->max_icsid; i++)
+ for (i = 0; i <= xics->max_icsid; i++) {
kfree(xics->ics[i]);
- kfree(xics);
+ xics->ics[i] = NULL;
+ }
+ /*
+ * A reference of the kvmppc_xics pointer is now kept under
+ * the xics_device pointer of the machine for reuse. It is
+ * freed when the VM is destroyed for now until we fix all the
+ * execution paths.
+ */
kfree(dev);
}
+static struct kvmppc_xics *kvmppc_xics_get_device(struct kvm *kvm)
+{
+ struct kvmppc_xics **kvm_xics_device = &kvm->arch.xics_device;
+ struct kvmppc_xics *xics = *kvm_xics_device;
+
+ if (!xics) {
+ xics = kzalloc(sizeof(*xics), GFP_KERNEL);
+ *kvm_xics_device = xics;
+ } else {
+ memset(xics, 0, sizeof(*xics));
+ }
+
+ return xics;
+}
+
static int kvmppc_xics_create(struct kvm_device *dev, u32 type)
{
struct kvmppc_xics *xics;
struct kvm *kvm = dev->kvm;
- int ret = 0;
- xics = kzalloc(sizeof(*xics), GFP_KERNEL);
+ pr_devel("Creating xics for partition\n");
+
+ /* Already there ? */
+ if (kvm->arch.xics)
+ return -EEXIST;
+
+ xics = kvmppc_xics_get_device(kvm);
if (!xics)
return -ENOMEM;
dev->private = xics;
xics->dev = dev;
xics->kvm = kvm;
-
- /* Already there ? */
- mutex_lock(&kvm->lock);
- if (kvm->arch.xics)
- ret = -EEXIST;
- else
- kvm->arch.xics = xics;
- mutex_unlock(&kvm->lock);
-
- if (ret) {
- kfree(xics);
- return ret;
- }
-
- xics_debugfs_init(xics);
+ kvm->arch.xics = xics;
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- if (cpu_has_feature(CPU_FTR_ARCH_206)) {
+ if (cpu_has_feature(CPU_FTR_ARCH_206) &&
+ cpu_has_feature(CPU_FTR_HVMODE)) {
/* Enable real mode support */
xics->real_mode = ENABLE_REALMODE;
xics->real_mode_dbg = DEBUG_REALMODE;
@@ -1278,10 +1429,18 @@ static int kvmppc_xics_create(struct kvm_device *dev, u32 type)
return 0;
}
+static void kvmppc_xics_init(struct kvm_device *dev)
+{
+ struct kvmppc_xics *xics = dev->private;
+
+ xics_debugfs_init(xics);
+}
+
struct kvm_device_ops kvm_xics_ops = {
.name = "kvm-xics",
.create = kvmppc_xics_create,
- .destroy = kvmppc_xics_free,
+ .init = kvmppc_xics_init,
+ .release = kvmppc_xics_release,
.set_attr = xics_set_attr,
.get_attr = xics_get_attr,
.has_attr = xics_has_attr,
@@ -1297,7 +1456,7 @@ int kvmppc_xics_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu,
return -EPERM;
if (xics->kvm != vcpu->kvm)
return -EPERM;
- if (vcpu->arch.irq_type)
+ if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
return -EBUSY;
r = kvmppc_xics_create_icp(vcpu, xcpu);
@@ -1316,25 +1475,33 @@ void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu)
vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
}
-static int xics_set_irq(struct kvm_kernel_irq_routing_entry *e,
- struct kvm *kvm, int irq_source_id, int level,
- bool line_status)
+void kvmppc_xics_set_mapped(struct kvm *kvm, unsigned long irq,
+ unsigned long host_irq)
{
- return kvm_set_irq(kvm, irq_source_id, e->gsi, level, line_status);
-}
+ struct kvmppc_xics *xics = kvm->arch.xics;
+ struct kvmppc_ics *ics;
+ u16 idx;
-int kvm_irq_map_gsi(struct kvm *kvm,
- struct kvm_kernel_irq_routing_entry *entries, int gsi)
-{
- entries->gsi = gsi;
- entries->type = KVM_IRQ_ROUTING_IRQCHIP;
- entries->set = xics_set_irq;
- entries->irqchip.irqchip = 0;
- entries->irqchip.pin = gsi;
- return 1;
+ ics = kvmppc_xics_find_ics(xics, irq, &idx);
+ if (!ics)
+ return;
+
+ ics->irq_state[idx].host_irq = host_irq;
+ ics->irq_state[idx].intr_cpu = -1;
}
+EXPORT_SYMBOL_GPL(kvmppc_xics_set_mapped);
-int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin)
+void kvmppc_xics_clr_mapped(struct kvm *kvm, unsigned long irq,
+ unsigned long host_irq)
{
- return pin;
+ struct kvmppc_xics *xics = kvm->arch.xics;
+ struct kvmppc_ics *ics;
+ u16 idx;
+
+ ics = kvmppc_xics_find_ics(xics, irq, &idx);
+ if (!ics)
+ return;
+
+ ics->irq_state[idx].host_irq = 0;
}
+EXPORT_SYMBOL_GPL(kvmppc_xics_clr_mapped);
diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h
index e8aaa7a3f209..08fb0843faf5 100644
--- a/arch/powerpc/kvm/book3s_xics.h
+++ b/arch/powerpc/kvm/book3s_xics.h
@@ -1,15 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright 2012 Michael Ellerman, IBM Corporation.
* Copyright 2012 Benjamin Herrenschmidt, IBM Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
*/
#ifndef _KVM_PPC_BOOK3S_XICS_H
#define _KVM_PPC_BOOK3S_XICS_H
+#ifdef CONFIG_KVM_XICS
/*
* We use a two-level tree to store interrupt source information.
* There are up to 1024 ICS nodes, each of which can represent
@@ -31,16 +29,22 @@
/* Priority value to use for disabling an interrupt */
#define MASKED 0xff
+#define PQ_PRESENTED 1
+#define PQ_QUEUED 2
+
/* State for one irq source */
struct ics_irq_state {
u32 number;
u32 server;
+ u32 pq_state;
u8 priority;
u8 saved_priority;
u8 resend;
u8 masked_pending;
- u8 asserted; /* Only for LSI */
+ u8 lsi; /* level-sensitive interrupt */
u8 exists;
+ int intr_cpu;
+ u32 host_irq;
};
/* Atomic ICP state, updated with a single compare & swap */
@@ -70,20 +74,28 @@ struct kvmppc_icp {
*/
#define XICS_RM_KICK_VCPU 0x1
#define XICS_RM_CHECK_RESEND 0x2
-#define XICS_RM_REJECT 0x4
#define XICS_RM_NOTIFY_EOI 0x8
u32 rm_action;
struct kvm_vcpu *rm_kick_target;
+ struct kvmppc_icp *rm_resend_icp;
u32 rm_reject;
u32 rm_eoied_irq;
+ /* Counters for each reason we exited real mode */
+ unsigned long n_rm_kick_vcpu;
+ unsigned long n_rm_check_resend;
+ unsigned long n_rm_notify_eoi;
+ /* Counters for handling ICP processing in real mode */
+ unsigned long n_check_resend;
+ unsigned long n_reject;
+
/* Debug stuff for real mode */
union kvmppc_icp_state rm_dbgstate;
struct kvm_vcpu *rm_dbgtgt;
};
struct kvmppc_ics {
- struct mutex lock;
+ arch_spinlock_t lock;
u16 icsid;
struct ics_irq_state irq_state[KVMPPC_XICS_IRQ_PER_ICS];
};
@@ -95,6 +107,8 @@ struct kvmppc_xics {
u32 max_icsid;
bool real_mode;
bool real_mode_dbg;
+ u32 err_noics;
+ u32 err_noicp;
struct kvmppc_ics *ics[KVMPPC_XICS_MAX_ICS_ID + 1];
};
@@ -102,7 +116,7 @@ static inline struct kvmppc_icp *kvmppc_xics_find_server(struct kvm *kvm,
u32 nr)
{
struct kvm_vcpu *vcpu = NULL;
- int i;
+ unsigned long i;
kvm_for_each_vcpu(i, vcpu, kvm) {
if (vcpu->arch.icp && nr == vcpu->arch.icp->server_num)
@@ -128,5 +142,12 @@ static inline struct kvmppc_ics *kvmppc_xics_find_ics(struct kvmppc_xics *xics,
return ics;
}
+extern unsigned long xics_rm_h_xirr(struct kvm_vcpu *vcpu);
+extern unsigned long xics_rm_h_xirr_x(struct kvm_vcpu *vcpu);
+extern int xics_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
+ unsigned long mfrr);
+extern int xics_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr);
+extern int xics_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr);
+#endif /* CONFIG_KVM_XICS */
#endif /* _KVM_PPC_BOOK3S_XICS_H */
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
new file mode 100644
index 000000000000..1302b5ac5672
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -0,0 +1,2980 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2017 Benjamin Herrenschmidt, IBM Corporation.
+ */
+
+#define pr_fmt(fmt) "xive-kvm: " fmt
+
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <linux/err.h>
+#include <linux/gfp.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/percpu.h>
+#include <linux/cpumask.h>
+#include <linux/uaccess.h>
+#include <linux/irqdomain.h>
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_ppc.h>
+#include <asm/hvcall.h>
+#include <asm/xics.h>
+#include <asm/xive.h>
+#include <asm/xive-regs.h>
+#include <asm/debug.h>
+#include <asm/time.h>
+#include <asm/opal.h>
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include "book3s_xive.h"
+
+#define __x_eoi_page(xd) ((void __iomem *)((xd)->eoi_mmio))
+#define __x_trig_page(xd) ((void __iomem *)((xd)->trig_mmio))
+
+/* Dummy interrupt used when taking interrupts out of a queue in H_CPPR */
+#define XICS_DUMMY 1
+
+static void xive_vm_ack_pending(struct kvmppc_xive_vcpu *xc)
+{
+ u8 cppr;
+ u16 ack;
+
+ /*
+ * Ensure any previous store to CPPR is ordered vs.
+ * the subsequent loads from PIPR or ACK.
+ */
+ eieio();
+
+ /* Perform the acknowledge OS to register cycle. */
+ ack = be16_to_cpu(__raw_readw(xive_tima + TM_SPC_ACK_OS_REG));
+
+ /* Synchronize subsequent queue accesses */
+ mb();
+
+ /* XXX Check grouping level */
+
+ /* Anything ? */
+ if (!((ack >> 8) & TM_QW1_NSR_EO))
+ return;
+
+ /* Grab CPPR of the most favored pending interrupt */
+ cppr = ack & 0xff;
+ if (cppr < 8)
+ xc->pending |= 1 << cppr;
+
+ /* Check consistency */
+ if (cppr >= xc->hw_cppr)
+ pr_warn("KVM-XIVE: CPU %d odd ack CPPR, got %d at %d\n",
+ smp_processor_id(), cppr, xc->hw_cppr);
+
+ /*
+ * Update our image of the HW CPPR. We don't yet modify
+ * xc->cppr, this will be done as we scan for interrupts
+ * in the queues.
+ */
+ xc->hw_cppr = cppr;
+}
+
+static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset)
+{
+ u64 val;
+
+ if (offset == XIVE_ESB_SET_PQ_10 && xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
+ offset |= XIVE_ESB_LD_ST_MO;
+
+ val = __raw_readq(__x_eoi_page(xd) + offset);
+#ifdef __LITTLE_ENDIAN__
+ val >>= 64-8;
+#endif
+ return (u8)val;
+}
+
+
+static void xive_vm_source_eoi(u32 hw_irq, struct xive_irq_data *xd)
+{
+ /* If the XIVE supports the new "store EOI facility, use it */
+ if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
+ __raw_writeq(0, __x_eoi_page(xd) + XIVE_ESB_STORE_EOI);
+ else if (xd->flags & XIVE_IRQ_FLAG_LSI) {
+ /*
+ * For LSIs the HW EOI cycle is used rather than PQ bits,
+ * as they are automatically re-triggred in HW when still
+ * pending.
+ */
+ __raw_readq(__x_eoi_page(xd) + XIVE_ESB_LOAD_EOI);
+ } else {
+ uint64_t eoi_val;
+
+ /*
+ * Otherwise for EOI, we use the special MMIO that does
+ * a clear of both P and Q and returns the old Q,
+ * except for LSIs where we use the "EOI cycle" special
+ * load.
+ *
+ * This allows us to then do a re-trigger if Q was set
+ * rather than synthetizing an interrupt in software
+ */
+ eoi_val = xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_00);
+
+ /* Re-trigger if needed */
+ if ((eoi_val & 1) && __x_trig_page(xd))
+ __raw_writeq(0, __x_trig_page(xd));
+ }
+}
+
+enum {
+ scan_fetch,
+ scan_poll,
+ scan_eoi,
+};
+
+static u32 xive_vm_scan_interrupts(struct kvmppc_xive_vcpu *xc,
+ u8 pending, int scan_type)
+{
+ u32 hirq = 0;
+ u8 prio = 0xff;
+
+ /* Find highest pending priority */
+ while ((xc->mfrr != 0xff || pending != 0) && hirq == 0) {
+ struct xive_q *q;
+ u32 idx, toggle;
+ __be32 *qpage;
+
+ /*
+ * If pending is 0 this will return 0xff which is what
+ * we want
+ */
+ prio = ffs(pending) - 1;
+
+ /* Don't scan past the guest cppr */
+ if (prio >= xc->cppr || prio > 7) {
+ if (xc->mfrr < xc->cppr) {
+ prio = xc->mfrr;
+ hirq = XICS_IPI;
+ }
+ break;
+ }
+
+ /* Grab queue and pointers */
+ q = &xc->queues[prio];
+ idx = q->idx;
+ toggle = q->toggle;
+
+ /*
+ * Snapshot the queue page. The test further down for EOI
+ * must use the same "copy" that was used by __xive_read_eq
+ * since qpage can be set concurrently and we don't want
+ * to miss an EOI.
+ */
+ qpage = READ_ONCE(q->qpage);
+
+skip_ipi:
+ /*
+ * Try to fetch from the queue. Will return 0 for a
+ * non-queueing priority (ie, qpage = 0).
+ */
+ hirq = __xive_read_eq(qpage, q->msk, &idx, &toggle);
+
+ /*
+ * If this was a signal for an MFFR change done by
+ * H_IPI we skip it. Additionally, if we were fetching
+ * we EOI it now, thus re-enabling reception of a new
+ * such signal.
+ *
+ * We also need to do that if prio is 0 and we had no
+ * page for the queue. In this case, we have non-queued
+ * IPI that needs to be EOId.
+ *
+ * This is safe because if we have another pending MFRR
+ * change that wasn't observed above, the Q bit will have
+ * been set and another occurrence of the IPI will trigger.
+ */
+ if (hirq == XICS_IPI || (prio == 0 && !qpage)) {
+ if (scan_type == scan_fetch) {
+ xive_vm_source_eoi(xc->vp_ipi,
+ &xc->vp_ipi_data);
+ q->idx = idx;
+ q->toggle = toggle;
+ }
+ /* Loop back on same queue with updated idx/toggle */
+ WARN_ON(hirq && hirq != XICS_IPI);
+ if (hirq)
+ goto skip_ipi;
+ }
+
+ /* If it's the dummy interrupt, continue searching */
+ if (hirq == XICS_DUMMY)
+ goto skip_ipi;
+
+ /* Clear the pending bit if the queue is now empty */
+ if (!hirq) {
+ pending &= ~(1 << prio);
+
+ /*
+ * Check if the queue count needs adjusting due to
+ * interrupts being moved away.
+ */
+ if (atomic_read(&q->pending_count)) {
+ int p = atomic_xchg(&q->pending_count, 0);
+
+ if (p) {
+ WARN_ON(p > atomic_read(&q->count));
+ atomic_sub(p, &q->count);
+ }
+ }
+ }
+
+ /*
+ * If the most favoured prio we found pending is less
+ * favored (or equal) than a pending IPI, we return
+ * the IPI instead.
+ */
+ if (prio >= xc->mfrr && xc->mfrr < xc->cppr) {
+ prio = xc->mfrr;
+ hirq = XICS_IPI;
+ break;
+ }
+
+ /* If fetching, update queue pointers */
+ if (scan_type == scan_fetch) {
+ q->idx = idx;
+ q->toggle = toggle;
+ }
+ }
+
+ /* If we are just taking a "peek", do nothing else */
+ if (scan_type == scan_poll)
+ return hirq;
+
+ /* Update the pending bits */
+ xc->pending = pending;
+
+ /*
+ * If this is an EOI that's it, no CPPR adjustment done here,
+ * all we needed was cleanup the stale pending bits and check
+ * if there's anything left.
+ */
+ if (scan_type == scan_eoi)
+ return hirq;
+
+ /*
+ * If we found an interrupt, adjust what the guest CPPR should
+ * be as if we had just fetched that interrupt from HW.
+ *
+ * Note: This can only make xc->cppr smaller as the previous
+ * loop will only exit with hirq != 0 if prio is lower than
+ * the current xc->cppr. Thus we don't need to re-check xc->mfrr
+ * for pending IPIs.
+ */
+ if (hirq)
+ xc->cppr = prio;
+ /*
+ * If it was an IPI the HW CPPR might have been lowered too much
+ * as the HW interrupt we use for IPIs is routed to priority 0.
+ *
+ * We re-sync it here.
+ */
+ if (xc->cppr != xc->hw_cppr) {
+ xc->hw_cppr = xc->cppr;
+ __raw_writeb(xc->cppr, xive_tima + TM_QW1_OS + TM_CPPR);
+ }
+
+ return hirq;
+}
+
+static unsigned long xive_vm_h_xirr(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ u8 old_cppr;
+ u32 hirq;
+
+ pr_devel("H_XIRR\n");
+
+ xc->stat_vm_h_xirr++;
+
+ /* First collect pending bits from HW */
+ xive_vm_ack_pending(xc);
+
+ pr_devel(" new pending=0x%02x hw_cppr=%d cppr=%d\n",
+ xc->pending, xc->hw_cppr, xc->cppr);
+
+ /* Grab previous CPPR and reverse map it */
+ old_cppr = xive_prio_to_guest(xc->cppr);
+
+ /* Scan for actual interrupts */
+ hirq = xive_vm_scan_interrupts(xc, xc->pending, scan_fetch);
+
+ pr_devel(" got hirq=0x%x hw_cppr=%d cppr=%d\n",
+ hirq, xc->hw_cppr, xc->cppr);
+
+ /* That should never hit */
+ if (hirq & 0xff000000)
+ pr_warn("XIVE: Weird guest interrupt number 0x%08x\n", hirq);
+
+ /*
+ * XXX We could check if the interrupt is masked here and
+ * filter it. If we chose to do so, we would need to do:
+ *
+ * if (masked) {
+ * lock();
+ * if (masked) {
+ * old_Q = true;
+ * hirq = 0;
+ * }
+ * unlock();
+ * }
+ */
+
+ /* Return interrupt and old CPPR in GPR4 */
+ kvmppc_set_gpr(vcpu, 4, hirq | (old_cppr << 24));
+
+ return H_SUCCESS;
+}
+
+static unsigned long xive_vm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server)
+{
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ u8 pending = xc->pending;
+ u32 hirq;
+
+ pr_devel("H_IPOLL(server=%ld)\n", server);
+
+ xc->stat_vm_h_ipoll++;
+
+ /* Grab the target VCPU if not the current one */
+ if (xc->server_num != server) {
+ vcpu = kvmppc_xive_find_server(vcpu->kvm, server);
+ if (!vcpu)
+ return H_PARAMETER;
+ xc = vcpu->arch.xive_vcpu;
+
+ /* Scan all priorities */
+ pending = 0xff;
+ } else {
+ /* Grab pending interrupt if any */
+ __be64 qw1 = __raw_readq(xive_tima + TM_QW1_OS);
+ u8 pipr = be64_to_cpu(qw1) & 0xff;
+
+ if (pipr < 8)
+ pending |= 1 << pipr;
+ }
+
+ hirq = xive_vm_scan_interrupts(xc, pending, scan_poll);
+
+ /* Return interrupt and old CPPR in GPR4 */
+ kvmppc_set_gpr(vcpu, 4, hirq | (xc->cppr << 24));
+
+ return H_SUCCESS;
+}
+
+static void xive_vm_push_pending_to_hw(struct kvmppc_xive_vcpu *xc)
+{
+ u8 pending, prio;
+
+ pending = xc->pending;
+ if (xc->mfrr != 0xff) {
+ if (xc->mfrr < 8)
+ pending |= 1 << xc->mfrr;
+ else
+ pending |= 0x80;
+ }
+ if (!pending)
+ return;
+ prio = ffs(pending) - 1;
+
+ __raw_writeb(prio, xive_tima + TM_SPC_SET_OS_PENDING);
+}
+
+static void xive_vm_scan_for_rerouted_irqs(struct kvmppc_xive *xive,
+ struct kvmppc_xive_vcpu *xc)
+{
+ unsigned int prio;
+
+ /* For each priority that is now masked */
+ for (prio = xc->cppr; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
+ struct xive_q *q = &xc->queues[prio];
+ struct kvmppc_xive_irq_state *state;
+ struct kvmppc_xive_src_block *sb;
+ u32 idx, toggle, entry, irq, hw_num;
+ struct xive_irq_data *xd;
+ __be32 *qpage;
+ u16 src;
+
+ idx = q->idx;
+ toggle = q->toggle;
+ qpage = READ_ONCE(q->qpage);
+ if (!qpage)
+ continue;
+
+ /* For each interrupt in the queue */
+ for (;;) {
+ entry = be32_to_cpup(qpage + idx);
+
+ /* No more ? */
+ if ((entry >> 31) == toggle)
+ break;
+ irq = entry & 0x7fffffff;
+
+ /* Skip dummies and IPIs */
+ if (irq == XICS_DUMMY || irq == XICS_IPI)
+ goto next;
+ sb = kvmppc_xive_find_source(xive, irq, &src);
+ if (!sb)
+ goto next;
+ state = &sb->irq_state[src];
+
+ /* Has it been rerouted ? */
+ if (xc->server_num == state->act_server)
+ goto next;
+
+ /*
+ * Allright, it *has* been re-routed, kill it from
+ * the queue.
+ */
+ qpage[idx] = cpu_to_be32((entry & 0x80000000) | XICS_DUMMY);
+
+ /* Find the HW interrupt */
+ kvmppc_xive_select_irq(state, &hw_num, &xd);
+
+ /* If it's not an LSI, set PQ to 11 the EOI will force a resend */
+ if (!(xd->flags & XIVE_IRQ_FLAG_LSI))
+ xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_11);
+
+ /* EOI the source */
+ xive_vm_source_eoi(hw_num, xd);
+
+next:
+ idx = (idx + 1) & q->msk;
+ if (idx == 0)
+ toggle ^= 1;
+ }
+ }
+}
+
+static int xive_vm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
+{
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
+ u8 old_cppr;
+
+ pr_devel("H_CPPR(cppr=%ld)\n", cppr);
+
+ xc->stat_vm_h_cppr++;
+
+ /* Map CPPR */
+ cppr = xive_prio_from_guest(cppr);
+
+ /* Remember old and update SW state */
+ old_cppr = xc->cppr;
+ xc->cppr = cppr;
+
+ /*
+ * Order the above update of xc->cppr with the subsequent
+ * read of xc->mfrr inside push_pending_to_hw()
+ */
+ smp_mb();
+
+ if (cppr > old_cppr) {
+ /*
+ * We are masking less, we need to look for pending things
+ * to deliver and set VP pending bits accordingly to trigger
+ * a new interrupt otherwise we might miss MFRR changes for
+ * which we have optimized out sending an IPI signal.
+ */
+ xive_vm_push_pending_to_hw(xc);
+ } else {
+ /*
+ * We are masking more, we need to check the queue for any
+ * interrupt that has been routed to another CPU, take
+ * it out (replace it with the dummy) and retrigger it.
+ *
+ * This is necessary since those interrupts may otherwise
+ * never be processed, at least not until this CPU restores
+ * its CPPR.
+ *
+ * This is in theory racy vs. HW adding new interrupts to
+ * the queue. In practice this works because the interesting
+ * cases are when the guest has done a set_xive() to move the
+ * interrupt away, which flushes the xive, followed by the
+ * target CPU doing a H_CPPR. So any new interrupt coming into
+ * the queue must still be routed to us and isn't a source
+ * of concern.
+ */
+ xive_vm_scan_for_rerouted_irqs(xive, xc);
+ }
+
+ /* Apply new CPPR */
+ xc->hw_cppr = cppr;
+ __raw_writeb(cppr, xive_tima + TM_QW1_OS + TM_CPPR);
+
+ return H_SUCCESS;
+}
+
+static int xive_vm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
+{
+ struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
+ struct kvmppc_xive_src_block *sb;
+ struct kvmppc_xive_irq_state *state;
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ struct xive_irq_data *xd;
+ u8 new_cppr = xirr >> 24;
+ u32 irq = xirr & 0x00ffffff, hw_num;
+ u16 src;
+ int rc = 0;
+
+ pr_devel("H_EOI(xirr=%08lx)\n", xirr);
+
+ xc->stat_vm_h_eoi++;
+
+ xc->cppr = xive_prio_from_guest(new_cppr);
+
+ /*
+ * IPIs are synthesized from MFRR and thus don't need
+ * any special EOI handling. The underlying interrupt
+ * used to signal MFRR changes is EOId when fetched from
+ * the queue.
+ */
+ if (irq == XICS_IPI || irq == 0) {
+ /*
+ * This barrier orders the setting of xc->cppr vs.
+ * subsequent test of xc->mfrr done inside
+ * scan_interrupts and push_pending_to_hw
+ */
+ smp_mb();
+ goto bail;
+ }
+
+ /* Find interrupt source */
+ sb = kvmppc_xive_find_source(xive, irq, &src);
+ if (!sb) {
+ pr_devel(" source not found !\n");
+ rc = H_PARAMETER;
+ /* Same as above */
+ smp_mb();
+ goto bail;
+ }
+ state = &sb->irq_state[src];
+ kvmppc_xive_select_irq(state, &hw_num, &xd);
+
+ state->in_eoi = true;
+
+ /*
+ * This barrier orders both setting of in_eoi above vs,
+ * subsequent test of guest_priority, and the setting
+ * of xc->cppr vs. subsequent test of xc->mfrr done inside
+ * scan_interrupts and push_pending_to_hw
+ */
+ smp_mb();
+
+again:
+ if (state->guest_priority == MASKED) {
+ arch_spin_lock(&sb->lock);
+ if (state->guest_priority != MASKED) {
+ arch_spin_unlock(&sb->lock);
+ goto again;
+ }
+ pr_devel(" EOI on saved P...\n");
+
+ /* Clear old_p, that will cause unmask to perform an EOI */
+ state->old_p = false;
+
+ arch_spin_unlock(&sb->lock);
+ } else {
+ pr_devel(" EOI on source...\n");
+
+ /* Perform EOI on the source */
+ xive_vm_source_eoi(hw_num, xd);
+
+ /* If it's an emulated LSI, check level and resend */
+ if (state->lsi && state->asserted)
+ __raw_writeq(0, __x_trig_page(xd));
+
+ }
+
+ /*
+ * This barrier orders the above guest_priority check
+ * and spin_lock/unlock with clearing in_eoi below.
+ *
+ * It also has to be a full mb() as it must ensure
+ * the MMIOs done in source_eoi() are completed before
+ * state->in_eoi is visible.
+ */
+ mb();
+ state->in_eoi = false;
+bail:
+
+ /* Re-evaluate pending IRQs and update HW */
+ xive_vm_scan_interrupts(xc, xc->pending, scan_eoi);
+ xive_vm_push_pending_to_hw(xc);
+ pr_devel(" after scan pending=%02x\n", xc->pending);
+
+ /* Apply new CPPR */
+ xc->hw_cppr = xc->cppr;
+ __raw_writeb(xc->cppr, xive_tima + TM_QW1_OS + TM_CPPR);
+
+ return rc;
+}
+
+static int xive_vm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
+ unsigned long mfrr)
+{
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+
+ pr_devel("H_IPI(server=%08lx,mfrr=%ld)\n", server, mfrr);
+
+ xc->stat_vm_h_ipi++;
+
+ /* Find target */
+ vcpu = kvmppc_xive_find_server(vcpu->kvm, server);
+ if (!vcpu)
+ return H_PARAMETER;
+ xc = vcpu->arch.xive_vcpu;
+
+ /* Locklessly write over MFRR */
+ xc->mfrr = mfrr;
+
+ /*
+ * The load of xc->cppr below and the subsequent MMIO store
+ * to the IPI must happen after the above mfrr update is
+ * globally visible so that:
+ *
+ * - Synchronize with another CPU doing an H_EOI or a H_CPPR
+ * updating xc->cppr then reading xc->mfrr.
+ *
+ * - The target of the IPI sees the xc->mfrr update
+ */
+ mb();
+
+ /* Shoot the IPI if most favored than target cppr */
+ if (mfrr < xc->cppr)
+ __raw_writeq(0, __x_trig_page(&xc->vp_ipi_data));
+
+ return H_SUCCESS;
+}
+
+/*
+ * We leave a gap of a couple of interrupts in the queue to
+ * account for the IPI and additional safety guard.
+ */
+#define XIVE_Q_GAP 2
+
+static bool kvmppc_xive_vcpu_has_save_restore(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+
+ /* Check enablement at VP level */
+ return xc->vp_cam & TM_QW1W2_HO;
+}
+
+bool kvmppc_xive_check_save_restore(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ struct kvmppc_xive *xive = xc->xive;
+
+ if (xive->flags & KVMPPC_XIVE_FLAG_SAVE_RESTORE)
+ return kvmppc_xive_vcpu_has_save_restore(vcpu);
+
+ return true;
+}
+
+/*
+ * Push a vcpu's context to the XIVE on guest entry.
+ * This assumes we are in virtual mode (MMU on)
+ */
+void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu)
+{
+ void __iomem *tima = local_paca->kvm_hstate.xive_tima_virt;
+ u64 pq;
+
+ /*
+ * Nothing to do if the platform doesn't have a XIVE
+ * or this vCPU doesn't have its own XIVE context
+ * (e.g. because it's not using an in-kernel interrupt controller).
+ */
+ if (!tima || !vcpu->arch.xive_cam_word)
+ return;
+
+ eieio();
+ if (!kvmppc_xive_vcpu_has_save_restore(vcpu))
+ __raw_writeq(vcpu->arch.xive_saved_state.w01, tima + TM_QW1_OS);
+ __raw_writel(vcpu->arch.xive_cam_word, tima + TM_QW1_OS + TM_WORD2);
+ vcpu->arch.xive_pushed = 1;
+ eieio();
+
+ /*
+ * We clear the irq_pending flag. There is a small chance of a
+ * race vs. the escalation interrupt happening on another
+ * processor setting it again, but the only consequence is to
+ * cause a spurious wakeup on the next H_CEDE, which is not an
+ * issue.
+ */
+ vcpu->arch.irq_pending = 0;
+
+ /*
+ * In single escalation mode, if the escalation interrupt is
+ * on, we mask it.
+ */
+ if (vcpu->arch.xive_esc_on) {
+ pq = __raw_readq((void __iomem *)(vcpu->arch.xive_esc_vaddr +
+ XIVE_ESB_SET_PQ_01));
+ mb();
+
+ /*
+ * We have a possible subtle race here: The escalation
+ * interrupt might have fired and be on its way to the
+ * host queue while we mask it, and if we unmask it
+ * early enough (re-cede right away), there is a
+ * theoretical possibility that it fires again, thus
+ * landing in the target queue more than once which is
+ * a big no-no.
+ *
+ * Fortunately, solving this is rather easy. If the
+ * above load setting PQ to 01 returns a previous
+ * value where P is set, then we know the escalation
+ * interrupt is somewhere on its way to the host. In
+ * that case we simply don't clear the xive_esc_on
+ * flag below. It will be eventually cleared by the
+ * handler for the escalation interrupt.
+ *
+ * Then, when doing a cede, we check that flag again
+ * before re-enabling the escalation interrupt, and if
+ * set, we abort the cede.
+ */
+ if (!(pq & XIVE_ESB_VAL_P))
+ /* Now P is 0, we can clear the flag */
+ vcpu->arch.xive_esc_on = 0;
+ }
+}
+EXPORT_SYMBOL_GPL(kvmppc_xive_push_vcpu);
+
+/*
+ * Pull a vcpu's context from the XIVE on guest exit.
+ * This assumes we are in virtual mode (MMU on)
+ */
+void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu)
+{
+ void __iomem *tima = local_paca->kvm_hstate.xive_tima_virt;
+
+ if (!vcpu->arch.xive_pushed)
+ return;
+
+ /*
+ * Should not have been pushed if there is no tima
+ */
+ if (WARN_ON(!tima))
+ return;
+
+ eieio();
+ /* First load to pull the context, we ignore the value */
+ __raw_readl(tima + TM_SPC_PULL_OS_CTX);
+ /* Second load to recover the context state (Words 0 and 1) */
+ if (!kvmppc_xive_vcpu_has_save_restore(vcpu))
+ vcpu->arch.xive_saved_state.w01 = __raw_readq(tima + TM_QW1_OS);
+
+ /* Fixup some of the state for the next load */
+ vcpu->arch.xive_saved_state.lsmfb = 0;
+ vcpu->arch.xive_saved_state.ack = 0xff;
+ vcpu->arch.xive_pushed = 0;
+ eieio();
+}
+EXPORT_SYMBOL_GPL(kvmppc_xive_pull_vcpu);
+
+bool kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu)
+{
+ void __iomem *esc_vaddr = (void __iomem *)vcpu->arch.xive_esc_vaddr;
+ bool ret = true;
+
+ if (!esc_vaddr)
+ return ret;
+
+ /* we are using XIVE with single escalation */
+
+ if (vcpu->arch.xive_esc_on) {
+ /*
+ * If we still have a pending escalation, abort the cede,
+ * and we must set PQ to 10 rather than 00 so that we don't
+ * potentially end up with two entries for the escalation
+ * interrupt in the XIVE interrupt queue. In that case
+ * we also don't want to set xive_esc_on to 1 here in
+ * case we race with xive_esc_irq().
+ */
+ ret = false;
+ /*
+ * The escalation interrupts are special as we don't EOI them.
+ * There is no need to use the load-after-store ordering offset
+ * to set PQ to 10 as we won't use StoreEOI.
+ */
+ __raw_readq(esc_vaddr + XIVE_ESB_SET_PQ_10);
+ } else {
+ vcpu->arch.xive_esc_on = true;
+ mb();
+ __raw_readq(esc_vaddr + XIVE_ESB_SET_PQ_00);
+ }
+ mb();
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(kvmppc_xive_rearm_escalation);
+
+/*
+ * This is a simple trigger for a generic XIVE IRQ. This must
+ * only be called for interrupts that support a trigger page
+ */
+static bool xive_irq_trigger(struct xive_irq_data *xd)
+{
+ /* This should be only for MSIs */
+ if (WARN_ON(xd->flags & XIVE_IRQ_FLAG_LSI))
+ return false;
+
+ /* Those interrupts should always have a trigger page */
+ if (WARN_ON(!xd->trig_mmio))
+ return false;
+
+ out_be64(xd->trig_mmio, 0);
+
+ return true;
+}
+
+static irqreturn_t xive_esc_irq(int irq, void *data)
+{
+ struct kvm_vcpu *vcpu = data;
+
+ vcpu->arch.irq_pending = 1;
+ smp_mb();
+ if (vcpu->arch.ceded || vcpu->arch.nested)
+ kvmppc_fast_vcpu_kick(vcpu);
+
+ /* Since we have the no-EOI flag, the interrupt is effectively
+ * disabled now. Clearing xive_esc_on means we won't bother
+ * doing so on the next entry.
+ *
+ * This also allows the entry code to know that if a PQ combination
+ * of 10 is observed while xive_esc_on is true, it means the queue
+ * contains an unprocessed escalation interrupt. We don't make use of
+ * that knowledge today but might (see comment in book3s_hv_rmhandler.S)
+ */
+ vcpu->arch.xive_esc_on = false;
+
+ /* This orders xive_esc_on = false vs. subsequent stale_p = true */
+ smp_wmb(); /* goes with smp_mb() in cleanup_single_escalation */
+
+ return IRQ_HANDLED;
+}
+
+int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio,
+ bool single_escalation)
+{
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ struct xive_q *q = &xc->queues[prio];
+ char *name = NULL;
+ int rc;
+
+ /* Already there ? */
+ if (xc->esc_virq[prio])
+ return 0;
+
+ /* Hook up the escalation interrupt */
+ xc->esc_virq[prio] = irq_create_mapping(NULL, q->esc_irq);
+ if (!xc->esc_virq[prio]) {
+ pr_err("Failed to map escalation interrupt for queue %d of VCPU %d\n",
+ prio, xc->server_num);
+ return -EIO;
+ }
+
+ if (single_escalation)
+ name = kasprintf(GFP_KERNEL, "kvm-%lld-%d",
+ vcpu->kvm->arch.lpid, xc->server_num);
+ else
+ name = kasprintf(GFP_KERNEL, "kvm-%lld-%d-%d",
+ vcpu->kvm->arch.lpid, xc->server_num, prio);
+ if (!name) {
+ pr_err("Failed to allocate escalation irq name for queue %d of VCPU %d\n",
+ prio, xc->server_num);
+ rc = -ENOMEM;
+ goto error;
+ }
+
+ pr_devel("Escalation %s irq %d (prio %d)\n", name, xc->esc_virq[prio], prio);
+
+ rc = request_irq(xc->esc_virq[prio], xive_esc_irq,
+ IRQF_NO_THREAD, name, vcpu);
+ if (rc) {
+ pr_err("Failed to request escalation interrupt for queue %d of VCPU %d\n",
+ prio, xc->server_num);
+ goto error;
+ }
+ xc->esc_virq_names[prio] = name;
+
+ /* In single escalation mode, we grab the ESB MMIO of the
+ * interrupt and mask it. Also populate the VCPU v/raddr
+ * of the ESB page for use by asm entry/exit code. Finally
+ * set the XIVE_IRQ_FLAG_NO_EOI flag which will prevent the
+ * core code from performing an EOI on the escalation
+ * interrupt, thus leaving it effectively masked after
+ * it fires once.
+ */
+ if (single_escalation) {
+ struct irq_data *d = irq_get_irq_data(xc->esc_virq[prio]);
+ struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+
+ xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01);
+ vcpu->arch.xive_esc_raddr = xd->eoi_page;
+ vcpu->arch.xive_esc_vaddr = (__force u64)xd->eoi_mmio;
+ xd->flags |= XIVE_IRQ_FLAG_NO_EOI;
+ }
+
+ return 0;
+error:
+ irq_dispose_mapping(xc->esc_virq[prio]);
+ xc->esc_virq[prio] = 0;
+ kfree(name);
+ return rc;
+}
+
+static int xive_provision_queue(struct kvm_vcpu *vcpu, u8 prio)
+{
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ struct kvmppc_xive *xive = xc->xive;
+ struct xive_q *q = &xc->queues[prio];
+ void *qpage;
+ int rc;
+
+ if (WARN_ON(q->qpage))
+ return 0;
+
+ /* Allocate the queue and retrieve infos on current node for now */
+ qpage = (__be32 *)__get_free_pages(GFP_KERNEL, xive->q_page_order);
+ if (!qpage) {
+ pr_err("Failed to allocate queue %d for VCPU %d\n",
+ prio, xc->server_num);
+ return -ENOMEM;
+ }
+ memset(qpage, 0, 1 << xive->q_order);
+
+ /*
+ * Reconfigure the queue. This will set q->qpage only once the
+ * queue is fully configured. This is a requirement for prio 0
+ * as we will stop doing EOIs for every IPI as soon as we observe
+ * qpage being non-NULL, and instead will only EOI when we receive
+ * corresponding queue 0 entries
+ */
+ rc = xive_native_configure_queue(xc->vp_id, q, prio, qpage,
+ xive->q_order, true);
+ if (rc)
+ pr_err("Failed to configure queue %d for VCPU %d\n",
+ prio, xc->server_num);
+ return rc;
+}
+
+/* Called with xive->lock held */
+static int xive_check_provisioning(struct kvm *kvm, u8 prio)
+{
+ struct kvmppc_xive *xive = kvm->arch.xive;
+ struct kvm_vcpu *vcpu;
+ unsigned long i;
+ int rc;
+
+ lockdep_assert_held(&xive->lock);
+
+ /* Already provisioned ? */
+ if (xive->qmap & (1 << prio))
+ return 0;
+
+ pr_devel("Provisioning prio... %d\n", prio);
+
+ /* Provision each VCPU and enable escalations if needed */
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (!vcpu->arch.xive_vcpu)
+ continue;
+ rc = xive_provision_queue(vcpu, prio);
+ if (rc == 0 && !kvmppc_xive_has_single_escalation(xive))
+ kvmppc_xive_attach_escalation(vcpu, prio,
+ kvmppc_xive_has_single_escalation(xive));
+ if (rc)
+ return rc;
+ }
+
+ /* Order previous stores and mark it as provisioned */
+ mb();
+ xive->qmap |= (1 << prio);
+ return 0;
+}
+
+static void xive_inc_q_pending(struct kvm *kvm, u32 server, u8 prio)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvmppc_xive_vcpu *xc;
+ struct xive_q *q;
+
+ /* Locate target server */
+ vcpu = kvmppc_xive_find_server(kvm, server);
+ if (!vcpu) {
+ pr_warn("%s: Can't find server %d\n", __func__, server);
+ return;
+ }
+ xc = vcpu->arch.xive_vcpu;
+ if (WARN_ON(!xc))
+ return;
+
+ q = &xc->queues[prio];
+ atomic_inc(&q->pending_count);
+}
+
+static int xive_try_pick_queue(struct kvm_vcpu *vcpu, u8 prio)
+{
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ struct xive_q *q;
+ u32 max;
+
+ if (WARN_ON(!xc))
+ return -ENXIO;
+ if (!xc->valid)
+ return -ENXIO;
+
+ q = &xc->queues[prio];
+ if (WARN_ON(!q->qpage))
+ return -ENXIO;
+
+ /* Calculate max number of interrupts in that queue. */
+ max = (q->msk + 1) - XIVE_Q_GAP;
+ return atomic_add_unless(&q->count, 1, max) ? 0 : -EBUSY;
+}
+
+int kvmppc_xive_select_target(struct kvm *kvm, u32 *server, u8 prio)
+{
+ struct kvm_vcpu *vcpu;
+ unsigned long i;
+ int rc;
+
+ /* Locate target server */
+ vcpu = kvmppc_xive_find_server(kvm, *server);
+ if (!vcpu) {
+ pr_devel("Can't find server %d\n", *server);
+ return -EINVAL;
+ }
+
+ pr_devel("Finding irq target on 0x%x/%d...\n", *server, prio);
+
+ /* Try pick it */
+ rc = xive_try_pick_queue(vcpu, prio);
+ if (rc == 0)
+ return rc;
+
+ pr_devel(" .. failed, looking up candidate...\n");
+
+ /* Failed, pick another VCPU */
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (!vcpu->arch.xive_vcpu)
+ continue;
+ rc = xive_try_pick_queue(vcpu, prio);
+ if (rc == 0) {
+ *server = vcpu->arch.xive_vcpu->server_num;
+ pr_devel(" found on 0x%x/%d\n", *server, prio);
+ return rc;
+ }
+ }
+ pr_devel(" no available target !\n");
+
+ /* No available target ! */
+ return -EBUSY;
+}
+
+static u8 xive_lock_and_mask(struct kvmppc_xive *xive,
+ struct kvmppc_xive_src_block *sb,
+ struct kvmppc_xive_irq_state *state)
+{
+ struct xive_irq_data *xd;
+ u32 hw_num;
+ u8 old_prio;
+ u64 val;
+
+ /*
+ * Take the lock, set masked, try again if racing
+ * with H_EOI
+ */
+ for (;;) {
+ arch_spin_lock(&sb->lock);
+ old_prio = state->guest_priority;
+ state->guest_priority = MASKED;
+ mb();
+ if (!state->in_eoi)
+ break;
+ state->guest_priority = old_prio;
+ arch_spin_unlock(&sb->lock);
+ }
+
+ /* No change ? Bail */
+ if (old_prio == MASKED)
+ return old_prio;
+
+ /* Get the right irq */
+ kvmppc_xive_select_irq(state, &hw_num, &xd);
+
+ /* Set PQ to 10, return old P and old Q and remember them */
+ val = xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_10);
+ state->old_p = !!(val & 2);
+ state->old_q = !!(val & 1);
+
+ /*
+ * Synchronize hardware to sensure the queues are updated when
+ * masking
+ */
+ xive_native_sync_source(hw_num);
+
+ return old_prio;
+}
+
+static void xive_lock_for_unmask(struct kvmppc_xive_src_block *sb,
+ struct kvmppc_xive_irq_state *state)
+{
+ /*
+ * Take the lock try again if racing with H_EOI
+ */
+ for (;;) {
+ arch_spin_lock(&sb->lock);
+ if (!state->in_eoi)
+ break;
+ arch_spin_unlock(&sb->lock);
+ }
+}
+
+static void xive_finish_unmask(struct kvmppc_xive *xive,
+ struct kvmppc_xive_src_block *sb,
+ struct kvmppc_xive_irq_state *state,
+ u8 prio)
+{
+ struct xive_irq_data *xd;
+ u32 hw_num;
+
+ /* If we aren't changing a thing, move on */
+ if (state->guest_priority != MASKED)
+ goto bail;
+
+ /* Get the right irq */
+ kvmppc_xive_select_irq(state, &hw_num, &xd);
+
+ /* Old Q set, set PQ to 11 */
+ if (state->old_q)
+ xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_11);
+
+ /*
+ * If not old P, then perform an "effective" EOI,
+ * on the source. This will handle the cases where
+ * FW EOI is needed.
+ */
+ if (!state->old_p)
+ xive_vm_source_eoi(hw_num, xd);
+
+ /* Synchronize ordering and mark unmasked */
+ mb();
+bail:
+ state->guest_priority = prio;
+}
+
+/*
+ * Target an interrupt to a given server/prio, this will fallback
+ * to another server if necessary and perform the HW targetting
+ * updates as needed
+ *
+ * NOTE: Must be called with the state lock held
+ */
+static int xive_target_interrupt(struct kvm *kvm,
+ struct kvmppc_xive_irq_state *state,
+ u32 server, u8 prio)
+{
+ struct kvmppc_xive *xive = kvm->arch.xive;
+ u32 hw_num;
+ int rc;
+
+ /*
+ * This will return a tentative server and actual
+ * priority. The count for that new target will have
+ * already been incremented.
+ */
+ rc = kvmppc_xive_select_target(kvm, &server, prio);
+
+ /*
+ * We failed to find a target ? Not much we can do
+ * at least until we support the GIQ.
+ */
+ if (rc)
+ return rc;
+
+ /*
+ * Increment the old queue pending count if there
+ * was one so that the old queue count gets adjusted later
+ * when observed to be empty.
+ */
+ if (state->act_priority != MASKED)
+ xive_inc_q_pending(kvm,
+ state->act_server,
+ state->act_priority);
+ /*
+ * Update state and HW
+ */
+ state->act_priority = prio;
+ state->act_server = server;
+
+ /* Get the right irq */
+ kvmppc_xive_select_irq(state, &hw_num, NULL);
+
+ return xive_native_configure_irq(hw_num,
+ kvmppc_xive_vp(xive, server),
+ prio, state->number);
+}
+
+/*
+ * Targetting rules: In order to avoid losing track of
+ * pending interrupts across mask and unmask, which would
+ * allow queue overflows, we implement the following rules:
+ *
+ * - Unless it was never enabled (or we run out of capacity)
+ * an interrupt is always targetted at a valid server/queue
+ * pair even when "masked" by the guest. This pair tends to
+ * be the last one used but it can be changed under some
+ * circumstances. That allows us to separate targetting
+ * from masking, we only handle accounting during (re)targetting,
+ * this also allows us to let an interrupt drain into its target
+ * queue after masking, avoiding complex schemes to remove
+ * interrupts out of remote processor queues.
+ *
+ * - When masking, we set PQ to 10 and save the previous value
+ * of P and Q.
+ *
+ * - When unmasking, if saved Q was set, we set PQ to 11
+ * otherwise we leave PQ to the HW state which will be either
+ * 10 if nothing happened or 11 if the interrupt fired while
+ * masked. Effectively we are OR'ing the previous Q into the
+ * HW Q.
+ *
+ * Then if saved P is clear, we do an effective EOI (Q->P->Trigger)
+ * which will unmask the interrupt and shoot a new one if Q was
+ * set.
+ *
+ * Otherwise (saved P is set) we leave PQ unchanged (so 10 or 11,
+ * effectively meaning an H_EOI from the guest is still expected
+ * for that interrupt).
+ *
+ * - If H_EOI occurs while masked, we clear the saved P.
+ *
+ * - When changing target, we account on the new target and
+ * increment a separate "pending" counter on the old one.
+ * This pending counter will be used to decrement the old
+ * target's count when its queue has been observed empty.
+ */
+
+int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
+ u32 priority)
+{
+ struct kvmppc_xive *xive = kvm->arch.xive;
+ struct kvmppc_xive_src_block *sb;
+ struct kvmppc_xive_irq_state *state;
+ u8 new_act_prio;
+ int rc = 0;
+ u16 idx;
+
+ if (!xive)
+ return -ENODEV;
+
+ pr_devel("set_xive ! irq 0x%x server 0x%x prio %d\n",
+ irq, server, priority);
+
+ /* First, check provisioning of queues */
+ if (priority != MASKED) {
+ mutex_lock(&xive->lock);
+ rc = xive_check_provisioning(xive->kvm,
+ xive_prio_from_guest(priority));
+ mutex_unlock(&xive->lock);
+ }
+ if (rc) {
+ pr_devel(" provisioning failure %d !\n", rc);
+ return rc;
+ }
+
+ sb = kvmppc_xive_find_source(xive, irq, &idx);
+ if (!sb)
+ return -EINVAL;
+ state = &sb->irq_state[idx];
+
+ /*
+ * We first handle masking/unmasking since the locking
+ * might need to be retried due to EOIs, we'll handle
+ * targetting changes later. These functions will return
+ * with the SB lock held.
+ *
+ * xive_lock_and_mask() will also set state->guest_priority
+ * but won't otherwise change other fields of the state.
+ *
+ * xive_lock_for_unmask will not actually unmask, this will
+ * be done later by xive_finish_unmask() once the targetting
+ * has been done, so we don't try to unmask an interrupt
+ * that hasn't yet been targetted.
+ */
+ if (priority == MASKED)
+ xive_lock_and_mask(xive, sb, state);
+ else
+ xive_lock_for_unmask(sb, state);
+
+
+ /*
+ * Then we handle targetting.
+ *
+ * First calculate a new "actual priority"
+ */
+ new_act_prio = state->act_priority;
+ if (priority != MASKED)
+ new_act_prio = xive_prio_from_guest(priority);
+
+ pr_devel(" new_act_prio=%x act_server=%x act_prio=%x\n",
+ new_act_prio, state->act_server, state->act_priority);
+
+ /*
+ * Then check if we actually need to change anything,
+ *
+ * The condition for re-targetting the interrupt is that
+ * we have a valid new priority (new_act_prio is not 0xff)
+ * and either the server or the priority changed.
+ *
+ * Note: If act_priority was ff and the new priority is
+ * also ff, we don't do anything and leave the interrupt
+ * untargetted. An attempt of doing an int_on on an
+ * untargetted interrupt will fail. If that is a problem
+ * we could initialize interrupts with valid default
+ */
+
+ if (new_act_prio != MASKED &&
+ (state->act_server != server ||
+ state->act_priority != new_act_prio))
+ rc = xive_target_interrupt(kvm, state, server, new_act_prio);
+
+ /*
+ * Perform the final unmasking of the interrupt source
+ * if necessary
+ */
+ if (priority != MASKED)
+ xive_finish_unmask(xive, sb, state, priority);
+
+ /*
+ * Finally Update saved_priority to match. Only int_on/off
+ * set this field to a different value.
+ */
+ state->saved_priority = priority;
+
+ arch_spin_unlock(&sb->lock);
+ return rc;
+}
+
+int kvmppc_xive_get_xive(struct kvm *kvm, u32 irq, u32 *server,
+ u32 *priority)
+{
+ struct kvmppc_xive *xive = kvm->arch.xive;
+ struct kvmppc_xive_src_block *sb;
+ struct kvmppc_xive_irq_state *state;
+ u16 idx;
+
+ if (!xive)
+ return -ENODEV;
+
+ sb = kvmppc_xive_find_source(xive, irq, &idx);
+ if (!sb)
+ return -EINVAL;
+ state = &sb->irq_state[idx];
+ arch_spin_lock(&sb->lock);
+ *server = state->act_server;
+ *priority = state->guest_priority;
+ arch_spin_unlock(&sb->lock);
+
+ return 0;
+}
+
+int kvmppc_xive_int_on(struct kvm *kvm, u32 irq)
+{
+ struct kvmppc_xive *xive = kvm->arch.xive;
+ struct kvmppc_xive_src_block *sb;
+ struct kvmppc_xive_irq_state *state;
+ u16 idx;
+
+ if (!xive)
+ return -ENODEV;
+
+ sb = kvmppc_xive_find_source(xive, irq, &idx);
+ if (!sb)
+ return -EINVAL;
+ state = &sb->irq_state[idx];
+
+ pr_devel("int_on(irq=0x%x)\n", irq);
+
+ /*
+ * Check if interrupt was not targetted
+ */
+ if (state->act_priority == MASKED) {
+ pr_devel("int_on on untargetted interrupt\n");
+ return -EINVAL;
+ }
+
+ /* If saved_priority is 0xff, do nothing */
+ if (state->saved_priority == MASKED)
+ return 0;
+
+ /*
+ * Lock and unmask it.
+ */
+ xive_lock_for_unmask(sb, state);
+ xive_finish_unmask(xive, sb, state, state->saved_priority);
+ arch_spin_unlock(&sb->lock);
+
+ return 0;
+}
+
+int kvmppc_xive_int_off(struct kvm *kvm, u32 irq)
+{
+ struct kvmppc_xive *xive = kvm->arch.xive;
+ struct kvmppc_xive_src_block *sb;
+ struct kvmppc_xive_irq_state *state;
+ u16 idx;
+
+ if (!xive)
+ return -ENODEV;
+
+ sb = kvmppc_xive_find_source(xive, irq, &idx);
+ if (!sb)
+ return -EINVAL;
+ state = &sb->irq_state[idx];
+
+ pr_devel("int_off(irq=0x%x)\n", irq);
+
+ /*
+ * Lock and mask
+ */
+ state->saved_priority = xive_lock_and_mask(xive, sb, state);
+ arch_spin_unlock(&sb->lock);
+
+ return 0;
+}
+
+static bool xive_restore_pending_irq(struct kvmppc_xive *xive, u32 irq)
+{
+ struct kvmppc_xive_src_block *sb;
+ struct kvmppc_xive_irq_state *state;
+ u16 idx;
+
+ sb = kvmppc_xive_find_source(xive, irq, &idx);
+ if (!sb)
+ return false;
+ state = &sb->irq_state[idx];
+ if (!state->valid)
+ return false;
+
+ /*
+ * Trigger the IPI. This assumes we never restore a pass-through
+ * interrupt which should be safe enough
+ */
+ xive_irq_trigger(&state->ipi_data);
+
+ return true;
+}
+
+u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+
+ if (!xc)
+ return 0;
+
+ /* Return the per-cpu state for state saving/migration */
+ return (u64)xc->cppr << KVM_REG_PPC_ICP_CPPR_SHIFT |
+ (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT |
+ (u64)0xff << KVM_REG_PPC_ICP_PPRI_SHIFT;
+}
+
+int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
+{
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
+ u8 cppr, mfrr;
+ u32 xisr;
+
+ if (!xc || !xive)
+ return -ENOENT;
+
+ /* Grab individual state fields. We don't use pending_pri */
+ cppr = icpval >> KVM_REG_PPC_ICP_CPPR_SHIFT;
+ xisr = (icpval >> KVM_REG_PPC_ICP_XISR_SHIFT) &
+ KVM_REG_PPC_ICP_XISR_MASK;
+ mfrr = icpval >> KVM_REG_PPC_ICP_MFRR_SHIFT;
+
+ pr_devel("set_icp vcpu %d cppr=0x%x mfrr=0x%x xisr=0x%x\n",
+ xc->server_num, cppr, mfrr, xisr);
+
+ /*
+ * We can't update the state of a "pushed" VCPU, but that
+ * shouldn't happen because the vcpu->mutex makes running a
+ * vcpu mutually exclusive with doing one_reg get/set on it.
+ */
+ if (WARN_ON(vcpu->arch.xive_pushed))
+ return -EIO;
+
+ /* Update VCPU HW saved state */
+ vcpu->arch.xive_saved_state.cppr = cppr;
+ xc->hw_cppr = xc->cppr = cppr;
+
+ /*
+ * Update MFRR state. If it's not 0xff, we mark the VCPU as
+ * having a pending MFRR change, which will re-evaluate the
+ * target. The VCPU will thus potentially get a spurious
+ * interrupt but that's not a big deal.
+ */
+ xc->mfrr = mfrr;
+ if (mfrr < cppr)
+ xive_irq_trigger(&xc->vp_ipi_data);
+
+ /*
+ * Now saved XIRR is "interesting". It means there's something in
+ * the legacy "1 element" queue... for an IPI we simply ignore it,
+ * as the MFRR restore will handle that. For anything else we need
+ * to force a resend of the source.
+ * However the source may not have been setup yet. If that's the
+ * case, we keep that info and increment a counter in the xive to
+ * tell subsequent xive_set_source() to go look.
+ */
+ if (xisr > XICS_IPI && !xive_restore_pending_irq(xive, xisr)) {
+ xc->delayed_irq = xisr;
+ xive->delayed_irqs++;
+ pr_devel(" xisr restore delayed\n");
+ }
+
+ return 0;
+}
+
+int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
+ unsigned long host_irq)
+{
+ struct kvmppc_xive *xive = kvm->arch.xive;
+ struct kvmppc_xive_src_block *sb;
+ struct kvmppc_xive_irq_state *state;
+ struct irq_data *host_data =
+ irq_domain_get_irq_data(irq_get_default_domain(), host_irq);
+ unsigned int hw_irq = (unsigned int)irqd_to_hwirq(host_data);
+ u16 idx;
+ u8 prio;
+ int rc;
+
+ if (!xive)
+ return -ENODEV;
+
+ pr_debug("%s: GIRQ 0x%lx host IRQ %ld XIVE HW IRQ 0x%x\n",
+ __func__, guest_irq, host_irq, hw_irq);
+
+ sb = kvmppc_xive_find_source(xive, guest_irq, &idx);
+ if (!sb)
+ return -EINVAL;
+ state = &sb->irq_state[idx];
+
+ /*
+ * Mark the passed-through interrupt as going to a VCPU,
+ * this will prevent further EOIs and similar operations
+ * from the XIVE code. It will also mask the interrupt
+ * to either PQ=10 or 11 state, the latter if the interrupt
+ * is pending. This will allow us to unmask or retrigger it
+ * after routing it to the guest with a simple EOI.
+ *
+ * The "state" argument is a "token", all it needs is to be
+ * non-NULL to switch to passed-through or NULL for the
+ * other way around. We may not yet have an actual VCPU
+ * target here and we don't really care.
+ */
+ rc = irq_set_vcpu_affinity(host_irq, state);
+ if (rc) {
+ pr_err("Failed to set VCPU affinity for host IRQ %ld\n", host_irq);
+ return rc;
+ }
+
+ /*
+ * Mask and read state of IPI. We need to know if its P bit
+ * is set as that means it's potentially already using a
+ * queue entry in the target
+ */
+ prio = xive_lock_and_mask(xive, sb, state);
+ pr_devel(" old IPI prio %02x P:%d Q:%d\n", prio,
+ state->old_p, state->old_q);
+
+ /* Turn the IPI hard off */
+ xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
+
+ /*
+ * Reset ESB guest mapping. Needed when ESB pages are exposed
+ * to the guest in XIVE native mode
+ */
+ if (xive->ops && xive->ops->reset_mapped)
+ xive->ops->reset_mapped(kvm, guest_irq);
+
+ /* Grab info about irq */
+ state->pt_number = hw_irq;
+ state->pt_data = irq_data_get_irq_handler_data(host_data);
+
+ /*
+ * Configure the IRQ to match the existing configuration of
+ * the IPI if it was already targetted. Otherwise this will
+ * mask the interrupt in a lossy way (act_priority is 0xff)
+ * which is fine for a never started interrupt.
+ */
+ xive_native_configure_irq(hw_irq,
+ kvmppc_xive_vp(xive, state->act_server),
+ state->act_priority, state->number);
+
+ /*
+ * We do an EOI to enable the interrupt (and retrigger if needed)
+ * if the guest has the interrupt unmasked and the P bit was *not*
+ * set in the IPI. If it was set, we know a slot may still be in
+ * use in the target queue thus we have to wait for a guest
+ * originated EOI
+ */
+ if (prio != MASKED && !state->old_p)
+ xive_vm_source_eoi(hw_irq, state->pt_data);
+
+ /* Clear old_p/old_q as they are no longer relevant */
+ state->old_p = state->old_q = false;
+
+ /* Restore guest prio (unlocks EOI) */
+ mb();
+ state->guest_priority = prio;
+ arch_spin_unlock(&sb->lock);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kvmppc_xive_set_mapped);
+
+int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
+ unsigned long host_irq)
+{
+ struct kvmppc_xive *xive = kvm->arch.xive;
+ struct kvmppc_xive_src_block *sb;
+ struct kvmppc_xive_irq_state *state;
+ u16 idx;
+ u8 prio;
+ int rc;
+
+ if (!xive)
+ return -ENODEV;
+
+ pr_debug("%s: GIRQ 0x%lx host IRQ %ld\n", __func__, guest_irq, host_irq);
+
+ sb = kvmppc_xive_find_source(xive, guest_irq, &idx);
+ if (!sb)
+ return -EINVAL;
+ state = &sb->irq_state[idx];
+
+ /*
+ * Mask and read state of IRQ. We need to know if its P bit
+ * is set as that means it's potentially already using a
+ * queue entry in the target
+ */
+ prio = xive_lock_and_mask(xive, sb, state);
+ pr_devel(" old IRQ prio %02x P:%d Q:%d\n", prio,
+ state->old_p, state->old_q);
+
+ /*
+ * If old_p is set, the interrupt is pending, we switch it to
+ * PQ=11. This will force a resend in the host so the interrupt
+ * isn't lost to whatever host driver may pick it up
+ */
+ if (state->old_p)
+ xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_11);
+
+ /* Release the passed-through interrupt to the host */
+ rc = irq_set_vcpu_affinity(host_irq, NULL);
+ if (rc) {
+ pr_err("Failed to clr VCPU affinity for host IRQ %ld\n", host_irq);
+ return rc;
+ }
+
+ /* Forget about the IRQ */
+ state->pt_number = 0;
+ state->pt_data = NULL;
+
+ /*
+ * Reset ESB guest mapping. Needed when ESB pages are exposed
+ * to the guest in XIVE native mode
+ */
+ if (xive->ops && xive->ops->reset_mapped) {
+ xive->ops->reset_mapped(kvm, guest_irq);
+ }
+
+ /* Reconfigure the IPI */
+ xive_native_configure_irq(state->ipi_number,
+ kvmppc_xive_vp(xive, state->act_server),
+ state->act_priority, state->number);
+
+ /*
+ * If old_p is set (we have a queue entry potentially
+ * occupied) or the interrupt is masked, we set the IPI
+ * to PQ=10 state. Otherwise we just re-enable it (PQ=00).
+ */
+ if (prio == MASKED || state->old_p)
+ xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_10);
+ else
+ xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_00);
+
+ /* Restore guest prio (unlocks EOI) */
+ mb();
+ state->guest_priority = prio;
+ arch_spin_unlock(&sb->lock);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kvmppc_xive_clr_mapped);
+
+void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ struct kvm *kvm = vcpu->kvm;
+ struct kvmppc_xive *xive = kvm->arch.xive;
+ int i, j;
+
+ for (i = 0; i <= xive->max_sbid; i++) {
+ struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
+
+ if (!sb)
+ continue;
+ for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++) {
+ struct kvmppc_xive_irq_state *state = &sb->irq_state[j];
+
+ if (!state->valid)
+ continue;
+ if (state->act_priority == MASKED)
+ continue;
+ if (state->act_server != xc->server_num)
+ continue;
+
+ /* Clean it up */
+ arch_spin_lock(&sb->lock);
+ state->act_priority = MASKED;
+ xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
+ xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);
+ if (state->pt_number) {
+ xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_01);
+ xive_native_configure_irq(state->pt_number, 0, MASKED, 0);
+ }
+ arch_spin_unlock(&sb->lock);
+ }
+ }
+
+ /* Disable vcpu's escalation interrupt */
+ if (vcpu->arch.xive_esc_on) {
+ __raw_readq((void __iomem *)(vcpu->arch.xive_esc_vaddr +
+ XIVE_ESB_SET_PQ_01));
+ vcpu->arch.xive_esc_on = false;
+ }
+
+ /*
+ * Clear pointers to escalation interrupt ESB.
+ * This is safe because the vcpu->mutex is held, preventing
+ * any other CPU from concurrently executing a KVM_RUN ioctl.
+ */
+ vcpu->arch.xive_esc_vaddr = 0;
+ vcpu->arch.xive_esc_raddr = 0;
+}
+
+/*
+ * In single escalation mode, the escalation interrupt is marked so
+ * that EOI doesn't re-enable it, but just sets the stale_p flag to
+ * indicate that the P bit has already been dealt with. However, the
+ * assembly code that enters the guest sets PQ to 00 without clearing
+ * stale_p (because it has no easy way to address it). Hence we have
+ * to adjust stale_p before shutting down the interrupt.
+ */
+void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu, int irq)
+{
+ struct irq_data *d = irq_get_irq_data(irq);
+ struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+
+ /*
+ * This slightly odd sequence gives the right result
+ * (i.e. stale_p set if xive_esc_on is false) even if
+ * we race with xive_esc_irq() and xive_irq_eoi().
+ */
+ xd->stale_p = false;
+ smp_mb(); /* paired with smb_wmb in xive_esc_irq */
+ if (!vcpu->arch.xive_esc_on)
+ xd->stale_p = true;
+}
+
+void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
+ int i;
+
+ if (!kvmppc_xics_enabled(vcpu))
+ return;
+
+ if (!xc)
+ return;
+
+ pr_devel("cleanup_vcpu(cpu=%d)\n", xc->server_num);
+
+ /* Ensure no interrupt is still routed to that VP */
+ xc->valid = false;
+ kvmppc_xive_disable_vcpu_interrupts(vcpu);
+
+ /* Mask the VP IPI */
+ xive_vm_esb_load(&xc->vp_ipi_data, XIVE_ESB_SET_PQ_01);
+
+ /* Free escalations */
+ for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+ if (xc->esc_virq[i]) {
+ if (kvmppc_xive_has_single_escalation(xc->xive))
+ xive_cleanup_single_escalation(vcpu, xc->esc_virq[i]);
+ free_irq(xc->esc_virq[i], vcpu);
+ irq_dispose_mapping(xc->esc_virq[i]);
+ kfree(xc->esc_virq_names[i]);
+ }
+ }
+
+ /* Disable the VP */
+ xive_native_disable_vp(xc->vp_id);
+
+ /* Clear the cam word so guest entry won't try to push context */
+ vcpu->arch.xive_cam_word = 0;
+
+ /* Free the queues */
+ for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+ struct xive_q *q = &xc->queues[i];
+
+ xive_native_disable_queue(xc->vp_id, q, i);
+ if (q->qpage) {
+ free_pages((unsigned long)q->qpage,
+ xive->q_page_order);
+ q->qpage = NULL;
+ }
+ }
+
+ /* Free the IPI */
+ if (xc->vp_ipi) {
+ xive_cleanup_irq_data(&xc->vp_ipi_data);
+ xive_native_free_irq(xc->vp_ipi);
+ }
+ /* Free the VP */
+ kfree(xc);
+
+ /* Cleanup the vcpu */
+ vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
+ vcpu->arch.xive_vcpu = NULL;
+}
+
+static bool kvmppc_xive_vcpu_id_valid(struct kvmppc_xive *xive, u32 cpu)
+{
+ /* We have a block of xive->nr_servers VPs. We just need to check
+ * packed vCPU ids are below that.
+ */
+ return kvmppc_pack_vcpu_id(xive->kvm, cpu) < xive->nr_servers;
+}
+
+int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp)
+{
+ u32 vp_id;
+
+ if (!kvmppc_xive_vcpu_id_valid(xive, cpu)) {
+ pr_devel("Out of bounds !\n");
+ return -EINVAL;
+ }
+
+ if (xive->vp_base == XIVE_INVALID_VP) {
+ xive->vp_base = xive_native_alloc_vp_block(xive->nr_servers);
+ pr_devel("VP_Base=%x nr_servers=%d\n", xive->vp_base, xive->nr_servers);
+
+ if (xive->vp_base == XIVE_INVALID_VP)
+ return -ENOSPC;
+ }
+
+ vp_id = kvmppc_xive_vp(xive, cpu);
+ if (kvmppc_xive_vp_in_use(xive->kvm, vp_id)) {
+ pr_devel("Duplicate !\n");
+ return -EEXIST;
+ }
+
+ *vp = vp_id;
+
+ return 0;
+}
+
+int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
+ struct kvm_vcpu *vcpu, u32 cpu)
+{
+ struct kvmppc_xive *xive = dev->private;
+ struct kvmppc_xive_vcpu *xc;
+ int i, r = -EBUSY;
+ u32 vp_id;
+
+ pr_devel("connect_vcpu(cpu=%d)\n", cpu);
+
+ if (dev->ops != &kvm_xive_ops) {
+ pr_devel("Wrong ops !\n");
+ return -EPERM;
+ }
+ if (xive->kvm != vcpu->kvm)
+ return -EPERM;
+ if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
+ return -EBUSY;
+
+ /* We need to synchronize with queue provisioning */
+ mutex_lock(&xive->lock);
+
+ r = kvmppc_xive_compute_vp_id(xive, cpu, &vp_id);
+ if (r)
+ goto bail;
+
+ xc = kzalloc(sizeof(*xc), GFP_KERNEL);
+ if (!xc) {
+ r = -ENOMEM;
+ goto bail;
+ }
+
+ vcpu->arch.xive_vcpu = xc;
+ xc->xive = xive;
+ xc->vcpu = vcpu;
+ xc->server_num = cpu;
+ xc->vp_id = vp_id;
+ xc->mfrr = 0xff;
+ xc->valid = true;
+
+ r = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id);
+ if (r)
+ goto bail;
+
+ if (!kvmppc_xive_check_save_restore(vcpu)) {
+ pr_err("inconsistent save-restore setup for VCPU %d\n", cpu);
+ r = -EIO;
+ goto bail;
+ }
+
+ /* Configure VCPU fields for use by assembly push/pull */
+ vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000);
+ vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO);
+
+ /* Allocate IPI */
+ xc->vp_ipi = xive_native_alloc_irq();
+ if (!xc->vp_ipi) {
+ pr_err("Failed to allocate xive irq for VCPU IPI\n");
+ r = -EIO;
+ goto bail;
+ }
+ pr_devel(" IPI=0x%x\n", xc->vp_ipi);
+
+ r = xive_native_populate_irq_data(xc->vp_ipi, &xc->vp_ipi_data);
+ if (r)
+ goto bail;
+
+ /*
+ * Enable the VP first as the single escalation mode will
+ * affect escalation interrupts numbering
+ */
+ r = xive_native_enable_vp(xc->vp_id, kvmppc_xive_has_single_escalation(xive));
+ if (r) {
+ pr_err("Failed to enable VP in OPAL, err %d\n", r);
+ goto bail;
+ }
+
+ /*
+ * Initialize queues. Initially we set them all for no queueing
+ * and we enable escalation for queue 0 only which we'll use for
+ * our mfrr change notifications. If the VCPU is hot-plugged, we
+ * do handle provisioning however based on the existing "map"
+ * of enabled queues.
+ */
+ for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+ struct xive_q *q = &xc->queues[i];
+
+ /* Single escalation, no queue 7 */
+ if (i == 7 && kvmppc_xive_has_single_escalation(xive))
+ break;
+
+ /* Is queue already enabled ? Provision it */
+ if (xive->qmap & (1 << i)) {
+ r = xive_provision_queue(vcpu, i);
+ if (r == 0 && !kvmppc_xive_has_single_escalation(xive))
+ kvmppc_xive_attach_escalation(
+ vcpu, i, kvmppc_xive_has_single_escalation(xive));
+ if (r)
+ goto bail;
+ } else {
+ r = xive_native_configure_queue(xc->vp_id,
+ q, i, NULL, 0, true);
+ if (r) {
+ pr_err("Failed to configure queue %d for VCPU %d\n",
+ i, cpu);
+ goto bail;
+ }
+ }
+ }
+
+ /* If not done above, attach priority 0 escalation */
+ r = kvmppc_xive_attach_escalation(vcpu, 0, kvmppc_xive_has_single_escalation(xive));
+ if (r)
+ goto bail;
+
+ /* Route the IPI */
+ r = xive_native_configure_irq(xc->vp_ipi, xc->vp_id, 0, XICS_IPI);
+ if (!r)
+ xive_vm_esb_load(&xc->vp_ipi_data, XIVE_ESB_SET_PQ_00);
+
+bail:
+ mutex_unlock(&xive->lock);
+ if (r) {
+ kvmppc_xive_cleanup_vcpu(vcpu);
+ return r;
+ }
+
+ vcpu->arch.irq_type = KVMPPC_IRQ_XICS;
+ return 0;
+}
+
+/*
+ * Scanning of queues before/after migration save
+ */
+static void xive_pre_save_set_queued(struct kvmppc_xive *xive, u32 irq)
+{
+ struct kvmppc_xive_src_block *sb;
+ struct kvmppc_xive_irq_state *state;
+ u16 idx;
+
+ sb = kvmppc_xive_find_source(xive, irq, &idx);
+ if (!sb)
+ return;
+
+ state = &sb->irq_state[idx];
+
+ /* Some sanity checking */
+ if (!state->valid) {
+ pr_err("invalid irq 0x%x in cpu queue!\n", irq);
+ return;
+ }
+
+ /*
+ * If the interrupt is in a queue it should have P set.
+ * We warn so that gets reported. A backtrace isn't useful
+ * so no need to use a WARN_ON.
+ */
+ if (!state->saved_p)
+ pr_err("Interrupt 0x%x is marked in a queue but P not set !\n", irq);
+
+ /* Set flag */
+ state->in_queue = true;
+}
+
+static void xive_pre_save_mask_irq(struct kvmppc_xive *xive,
+ struct kvmppc_xive_src_block *sb,
+ u32 irq)
+{
+ struct kvmppc_xive_irq_state *state = &sb->irq_state[irq];
+
+ if (!state->valid)
+ return;
+
+ /* Mask and save state, this will also sync HW queues */
+ state->saved_scan_prio = xive_lock_and_mask(xive, sb, state);
+
+ /* Transfer P and Q */
+ state->saved_p = state->old_p;
+ state->saved_q = state->old_q;
+
+ /* Unlock */
+ arch_spin_unlock(&sb->lock);
+}
+
+static void xive_pre_save_unmask_irq(struct kvmppc_xive *xive,
+ struct kvmppc_xive_src_block *sb,
+ u32 irq)
+{
+ struct kvmppc_xive_irq_state *state = &sb->irq_state[irq];
+
+ if (!state->valid)
+ return;
+
+ /*
+ * Lock / exclude EOI (not technically necessary if the
+ * guest isn't running concurrently. If this becomes a
+ * performance issue we can probably remove the lock.
+ */
+ xive_lock_for_unmask(sb, state);
+
+ /* Restore mask/prio if it wasn't masked */
+ if (state->saved_scan_prio != MASKED)
+ xive_finish_unmask(xive, sb, state, state->saved_scan_prio);
+
+ /* Unlock */
+ arch_spin_unlock(&sb->lock);
+}
+
+static void xive_pre_save_queue(struct kvmppc_xive *xive, struct xive_q *q)
+{
+ u32 idx = q->idx;
+ u32 toggle = q->toggle;
+ u32 irq;
+
+ do {
+ irq = __xive_read_eq(q->qpage, q->msk, &idx, &toggle);
+ if (irq > XICS_IPI)
+ xive_pre_save_set_queued(xive, irq);
+ } while(irq);
+}
+
+static void xive_pre_save_scan(struct kvmppc_xive *xive)
+{
+ struct kvm_vcpu *vcpu = NULL;
+ unsigned long i;
+ int j;
+
+ /*
+ * See comment in xive_get_source() about how this
+ * work. Collect a stable state for all interrupts
+ */
+ for (i = 0; i <= xive->max_sbid; i++) {
+ struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
+ if (!sb)
+ continue;
+ for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++)
+ xive_pre_save_mask_irq(xive, sb, j);
+ }
+
+ /* Then scan the queues and update the "in_queue" flag */
+ kvm_for_each_vcpu(i, vcpu, xive->kvm) {
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ if (!xc)
+ continue;
+ for (j = 0; j < KVMPPC_XIVE_Q_COUNT; j++) {
+ if (xc->queues[j].qpage)
+ xive_pre_save_queue(xive, &xc->queues[j]);
+ }
+ }
+
+ /* Finally restore interrupt states */
+ for (i = 0; i <= xive->max_sbid; i++) {
+ struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
+ if (!sb)
+ continue;
+ for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++)
+ xive_pre_save_unmask_irq(xive, sb, j);
+ }
+}
+
+static void xive_post_save_scan(struct kvmppc_xive *xive)
+{
+ u32 i, j;
+
+ /* Clear all the in_queue flags */
+ for (i = 0; i <= xive->max_sbid; i++) {
+ struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
+ if (!sb)
+ continue;
+ for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++)
+ sb->irq_state[j].in_queue = false;
+ }
+
+ /* Next get_source() will do a new scan */
+ xive->saved_src_count = 0;
+}
+
+/*
+ * This returns the source configuration and state to user space.
+ */
+static int xive_get_source(struct kvmppc_xive *xive, long irq, u64 addr)
+{
+ struct kvmppc_xive_src_block *sb;
+ struct kvmppc_xive_irq_state *state;
+ u64 __user *ubufp = (u64 __user *) addr;
+ u64 val, prio;
+ u16 idx;
+
+ sb = kvmppc_xive_find_source(xive, irq, &idx);
+ if (!sb)
+ return -ENOENT;
+
+ state = &sb->irq_state[idx];
+
+ if (!state->valid)
+ return -ENOENT;
+
+ pr_devel("get_source(%ld)...\n", irq);
+
+ /*
+ * So to properly save the state into something that looks like a
+ * XICS migration stream we cannot treat interrupts individually.
+ *
+ * We need, instead, mask them all (& save their previous PQ state)
+ * to get a stable state in the HW, then sync them to ensure that
+ * any interrupt that had already fired hits its queue, and finally
+ * scan all the queues to collect which interrupts are still present
+ * in the queues, so we can set the "pending" flag on them and
+ * they can be resent on restore.
+ *
+ * So we do it all when the "first" interrupt gets saved, all the
+ * state is collected at that point, the rest of xive_get_source()
+ * will merely collect and convert that state to the expected
+ * userspace bit mask.
+ */
+ if (xive->saved_src_count == 0)
+ xive_pre_save_scan(xive);
+ xive->saved_src_count++;
+
+ /* Convert saved state into something compatible with xics */
+ val = state->act_server;
+ prio = state->saved_scan_prio;
+
+ if (prio == MASKED) {
+ val |= KVM_XICS_MASKED;
+ prio = state->saved_priority;
+ }
+ val |= prio << KVM_XICS_PRIORITY_SHIFT;
+ if (state->lsi) {
+ val |= KVM_XICS_LEVEL_SENSITIVE;
+ if (state->saved_p)
+ val |= KVM_XICS_PENDING;
+ } else {
+ if (state->saved_p)
+ val |= KVM_XICS_PRESENTED;
+
+ if (state->saved_q)
+ val |= KVM_XICS_QUEUED;
+
+ /*
+ * We mark it pending (which will attempt a re-delivery)
+ * if we are in a queue *or* we were masked and had
+ * Q set which is equivalent to the XICS "masked pending"
+ * state
+ */
+ if (state->in_queue || (prio == MASKED && state->saved_q))
+ val |= KVM_XICS_PENDING;
+ }
+
+ /*
+ * If that was the last interrupt saved, reset the
+ * in_queue flags
+ */
+ if (xive->saved_src_count == xive->src_count)
+ xive_post_save_scan(xive);
+
+ /* Copy the result to userspace */
+ if (put_user(val, ubufp))
+ return -EFAULT;
+
+ return 0;
+}
+
+struct kvmppc_xive_src_block *kvmppc_xive_create_src_block(
+ struct kvmppc_xive *xive, int irq)
+{
+ struct kvmppc_xive_src_block *sb;
+ int i, bid;
+
+ bid = irq >> KVMPPC_XICS_ICS_SHIFT;
+
+ mutex_lock(&xive->lock);
+
+ /* block already exists - somebody else got here first */
+ if (xive->src_blocks[bid])
+ goto out;
+
+ /* Create the ICS */
+ sb = kzalloc(sizeof(*sb), GFP_KERNEL);
+ if (!sb)
+ goto out;
+
+ sb->id = bid;
+
+ for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
+ sb->irq_state[i].number = (bid << KVMPPC_XICS_ICS_SHIFT) | i;
+ sb->irq_state[i].eisn = 0;
+ sb->irq_state[i].guest_priority = MASKED;
+ sb->irq_state[i].saved_priority = MASKED;
+ sb->irq_state[i].act_priority = MASKED;
+ }
+ smp_wmb();
+ xive->src_blocks[bid] = sb;
+
+ if (bid > xive->max_sbid)
+ xive->max_sbid = bid;
+
+out:
+ mutex_unlock(&xive->lock);
+ return xive->src_blocks[bid];
+}
+
+static bool xive_check_delayed_irq(struct kvmppc_xive *xive, u32 irq)
+{
+ struct kvm *kvm = xive->kvm;
+ struct kvm_vcpu *vcpu = NULL;
+ unsigned long i;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+
+ if (!xc)
+ continue;
+
+ if (xc->delayed_irq == irq) {
+ xc->delayed_irq = 0;
+ xive->delayed_irqs--;
+ return true;
+ }
+ }
+ return false;
+}
+
+static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
+{
+ struct kvmppc_xive_src_block *sb;
+ struct kvmppc_xive_irq_state *state;
+ u64 __user *ubufp = (u64 __user *) addr;
+ u16 idx;
+ u64 val;
+ u8 act_prio, guest_prio;
+ u32 server;
+ int rc = 0;
+
+ if (irq < KVMPPC_XICS_FIRST_IRQ || irq >= KVMPPC_XICS_NR_IRQS)
+ return -ENOENT;
+
+ pr_devel("set_source(irq=0x%lx)\n", irq);
+
+ /* Find the source */
+ sb = kvmppc_xive_find_source(xive, irq, &idx);
+ if (!sb) {
+ pr_devel("No source, creating source block...\n");
+ sb = kvmppc_xive_create_src_block(xive, irq);
+ if (!sb) {
+ pr_devel("Failed to create block...\n");
+ return -ENOMEM;
+ }
+ }
+ state = &sb->irq_state[idx];
+
+ /* Read user passed data */
+ if (get_user(val, ubufp)) {
+ pr_devel("fault getting user info !\n");
+ return -EFAULT;
+ }
+
+ server = val & KVM_XICS_DESTINATION_MASK;
+ guest_prio = val >> KVM_XICS_PRIORITY_SHIFT;
+
+ pr_devel(" val=0x016%llx (server=0x%x, guest_prio=%d)\n",
+ val, server, guest_prio);
+
+ /*
+ * If the source doesn't already have an IPI, allocate
+ * one and get the corresponding data
+ */
+ if (!state->ipi_number) {
+ state->ipi_number = xive_native_alloc_irq();
+ if (state->ipi_number == 0) {
+ pr_devel("Failed to allocate IPI !\n");
+ return -ENOMEM;
+ }
+ xive_native_populate_irq_data(state->ipi_number, &state->ipi_data);
+ pr_devel(" src_ipi=0x%x\n", state->ipi_number);
+ }
+
+ /*
+ * We use lock_and_mask() to set us in the right masked
+ * state. We will override that state from the saved state
+ * further down, but this will handle the cases of interrupts
+ * that need FW masking. We set the initial guest_priority to
+ * 0 before calling it to ensure it actually performs the masking.
+ */
+ state->guest_priority = 0;
+ xive_lock_and_mask(xive, sb, state);
+
+ /*
+ * Now, we select a target if we have one. If we don't we
+ * leave the interrupt untargetted. It means that an interrupt
+ * can become "untargetted" across migration if it was masked
+ * by set_xive() but there is little we can do about it.
+ */
+
+ /* First convert prio and mark interrupt as untargetted */
+ act_prio = xive_prio_from_guest(guest_prio);
+ state->act_priority = MASKED;
+
+ /*
+ * We need to drop the lock due to the mutex below. Hopefully
+ * nothing is touching that interrupt yet since it hasn't been
+ * advertized to a running guest yet
+ */
+ arch_spin_unlock(&sb->lock);
+
+ /* If we have a priority target the interrupt */
+ if (act_prio != MASKED) {
+ /* First, check provisioning of queues */
+ mutex_lock(&xive->lock);
+ rc = xive_check_provisioning(xive->kvm, act_prio);
+ mutex_unlock(&xive->lock);
+
+ /* Target interrupt */
+ if (rc == 0)
+ rc = xive_target_interrupt(xive->kvm, state,
+ server, act_prio);
+ /*
+ * If provisioning or targetting failed, leave it
+ * alone and masked. It will remain disabled until
+ * the guest re-targets it.
+ */
+ }
+
+ /*
+ * Find out if this was a delayed irq stashed in an ICP,
+ * in which case, treat it as pending
+ */
+ if (xive->delayed_irqs && xive_check_delayed_irq(xive, irq)) {
+ val |= KVM_XICS_PENDING;
+ pr_devel(" Found delayed ! forcing PENDING !\n");
+ }
+
+ /* Cleanup the SW state */
+ state->old_p = false;
+ state->old_q = false;
+ state->lsi = false;
+ state->asserted = false;
+
+ /* Restore LSI state */
+ if (val & KVM_XICS_LEVEL_SENSITIVE) {
+ state->lsi = true;
+ if (val & KVM_XICS_PENDING)
+ state->asserted = true;
+ pr_devel(" LSI ! Asserted=%d\n", state->asserted);
+ }
+
+ /*
+ * Restore P and Q. If the interrupt was pending, we
+ * force Q and !P, which will trigger a resend.
+ *
+ * That means that a guest that had both an interrupt
+ * pending (queued) and Q set will restore with only
+ * one instance of that interrupt instead of 2, but that
+ * is perfectly fine as coalescing interrupts that haven't
+ * been presented yet is always allowed.
+ */
+ if (val & KVM_XICS_PRESENTED && !(val & KVM_XICS_PENDING))
+ state->old_p = true;
+ if (val & KVM_XICS_QUEUED || val & KVM_XICS_PENDING)
+ state->old_q = true;
+
+ pr_devel(" P=%d, Q=%d\n", state->old_p, state->old_q);
+
+ /*
+ * If the interrupt was unmasked, update guest priority and
+ * perform the appropriate state transition and do a
+ * re-trigger if necessary.
+ */
+ if (val & KVM_XICS_MASKED) {
+ pr_devel(" masked, saving prio\n");
+ state->guest_priority = MASKED;
+ state->saved_priority = guest_prio;
+ } else {
+ pr_devel(" unmasked, restoring to prio %d\n", guest_prio);
+ xive_finish_unmask(xive, sb, state, guest_prio);
+ state->saved_priority = guest_prio;
+ }
+
+ /* Increment the number of valid sources and mark this one valid */
+ if (!state->valid)
+ xive->src_count++;
+ state->valid = true;
+
+ return 0;
+}
+
+int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
+ bool line_status)
+{
+ struct kvmppc_xive *xive = kvm->arch.xive;
+ struct kvmppc_xive_src_block *sb;
+ struct kvmppc_xive_irq_state *state;
+ u16 idx;
+
+ if (!xive)
+ return -ENODEV;
+
+ sb = kvmppc_xive_find_source(xive, irq, &idx);
+ if (!sb)
+ return -EINVAL;
+
+ /* Perform locklessly .... (we need to do some RCUisms here...) */
+ state = &sb->irq_state[idx];
+ if (!state->valid)
+ return -EINVAL;
+
+ /* We don't allow a trigger on a passed-through interrupt */
+ if (state->pt_number)
+ return -EINVAL;
+
+ if ((level == 1 && state->lsi) || level == KVM_INTERRUPT_SET_LEVEL)
+ state->asserted = true;
+ else if (level == 0 || level == KVM_INTERRUPT_UNSET) {
+ state->asserted = false;
+ return 0;
+ }
+
+ /* Trigger the IPI */
+ xive_irq_trigger(&state->ipi_data);
+
+ return 0;
+}
+
+int kvmppc_xive_set_nr_servers(struct kvmppc_xive *xive, u64 addr)
+{
+ u32 __user *ubufp = (u32 __user *) addr;
+ u32 nr_servers;
+ int rc = 0;
+
+ if (get_user(nr_servers, ubufp))
+ return -EFAULT;
+
+ pr_devel("%s nr_servers=%u\n", __func__, nr_servers);
+
+ if (!nr_servers || nr_servers > KVM_MAX_VCPU_IDS)
+ return -EINVAL;
+
+ mutex_lock(&xive->lock);
+ if (xive->vp_base != XIVE_INVALID_VP)
+ /* The VP block is allocated once and freed when the device
+ * is released. Better not allow to change its size since its
+ * used by connect_vcpu to validate vCPU ids are valid (eg,
+ * setting it back to a higher value could allow connect_vcpu
+ * to come up with a VP id that goes beyond the VP block, which
+ * is likely to cause a crash in OPAL).
+ */
+ rc = -EBUSY;
+ else if (nr_servers > KVM_MAX_VCPUS)
+ /* We don't need more servers. Higher vCPU ids get packed
+ * down below KVM_MAX_VCPUS by kvmppc_pack_vcpu_id().
+ */
+ xive->nr_servers = KVM_MAX_VCPUS;
+ else
+ xive->nr_servers = nr_servers;
+
+ mutex_unlock(&xive->lock);
+
+ return rc;
+}
+
+static int xive_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+ struct kvmppc_xive *xive = dev->private;
+
+ /* We honor the existing XICS ioctl */
+ switch (attr->group) {
+ case KVM_DEV_XICS_GRP_SOURCES:
+ return xive_set_source(xive, attr->attr, attr->addr);
+ case KVM_DEV_XICS_GRP_CTRL:
+ switch (attr->attr) {
+ case KVM_DEV_XICS_NR_SERVERS:
+ return kvmppc_xive_set_nr_servers(xive, attr->addr);
+ }
+ }
+ return -ENXIO;
+}
+
+static int xive_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+ struct kvmppc_xive *xive = dev->private;
+
+ /* We honor the existing XICS ioctl */
+ switch (attr->group) {
+ case KVM_DEV_XICS_GRP_SOURCES:
+ return xive_get_source(xive, attr->attr, attr->addr);
+ }
+ return -ENXIO;
+}
+
+static int xive_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+ /* We honor the same limits as XICS, at least for now */
+ switch (attr->group) {
+ case KVM_DEV_XICS_GRP_SOURCES:
+ if (attr->attr >= KVMPPC_XICS_FIRST_IRQ &&
+ attr->attr < KVMPPC_XICS_NR_IRQS)
+ return 0;
+ break;
+ case KVM_DEV_XICS_GRP_CTRL:
+ switch (attr->attr) {
+ case KVM_DEV_XICS_NR_SERVERS:
+ return 0;
+ }
+ }
+ return -ENXIO;
+}
+
+static void kvmppc_xive_cleanup_irq(u32 hw_num, struct xive_irq_data *xd)
+{
+ xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01);
+ xive_native_configure_irq(hw_num, 0, MASKED, 0);
+}
+
+void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb)
+{
+ int i;
+
+ for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
+ struct kvmppc_xive_irq_state *state = &sb->irq_state[i];
+
+ if (!state->valid)
+ continue;
+
+ kvmppc_xive_cleanup_irq(state->ipi_number, &state->ipi_data);
+ xive_cleanup_irq_data(&state->ipi_data);
+ xive_native_free_irq(state->ipi_number);
+
+ /* Pass-through, cleanup too but keep IRQ hw data */
+ if (state->pt_number)
+ kvmppc_xive_cleanup_irq(state->pt_number, state->pt_data);
+
+ state->valid = false;
+ }
+}
+
+/*
+ * Called when device fd is closed. kvm->lock is held.
+ */
+static void kvmppc_xive_release(struct kvm_device *dev)
+{
+ struct kvmppc_xive *xive = dev->private;
+ struct kvm *kvm = xive->kvm;
+ struct kvm_vcpu *vcpu;
+ unsigned long i;
+
+ pr_devel("Releasing xive device\n");
+
+ /*
+ * Since this is the device release function, we know that
+ * userspace does not have any open fd referring to the
+ * device. Therefore there can not be any of the device
+ * attribute set/get functions being executed concurrently,
+ * and similarly, the connect_vcpu and set/clr_mapped
+ * functions also cannot be being executed.
+ */
+
+ debugfs_remove(xive->dentry);
+
+ /*
+ * We should clean up the vCPU interrupt presenters first.
+ */
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ /*
+ * Take vcpu->mutex to ensure that no one_reg get/set ioctl
+ * (i.e. kvmppc_xive_[gs]et_icp) can be done concurrently.
+ * Holding the vcpu->mutex also means that the vcpu cannot
+ * be executing the KVM_RUN ioctl, and therefore it cannot
+ * be executing the XIVE push or pull code or accessing
+ * the XIVE MMIO regions.
+ */
+ mutex_lock(&vcpu->mutex);
+ kvmppc_xive_cleanup_vcpu(vcpu);
+ mutex_unlock(&vcpu->mutex);
+ }
+
+ /*
+ * Now that we have cleared vcpu->arch.xive_vcpu, vcpu->arch.irq_type
+ * and vcpu->arch.xive_esc_[vr]addr on each vcpu, we are safe
+ * against xive code getting called during vcpu execution or
+ * set/get one_reg operations.
+ */
+ kvm->arch.xive = NULL;
+
+ /* Mask and free interrupts */
+ for (i = 0; i <= xive->max_sbid; i++) {
+ if (xive->src_blocks[i])
+ kvmppc_xive_free_sources(xive->src_blocks[i]);
+ kfree(xive->src_blocks[i]);
+ xive->src_blocks[i] = NULL;
+ }
+
+ if (xive->vp_base != XIVE_INVALID_VP)
+ xive_native_free_vp_block(xive->vp_base);
+
+ /*
+ * A reference of the kvmppc_xive pointer is now kept under
+ * the xive_devices struct of the machine for reuse. It is
+ * freed when the VM is destroyed for now until we fix all the
+ * execution paths.
+ */
+
+ kfree(dev);
+}
+
+/*
+ * When the guest chooses the interrupt mode (XICS legacy or XIVE
+ * native), the VM will switch of KVM device. The previous device will
+ * be "released" before the new one is created.
+ *
+ * Until we are sure all execution paths are well protected, provide a
+ * fail safe (transitional) method for device destruction, in which
+ * the XIVE device pointer is recycled and not directly freed.
+ */
+struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type)
+{
+ struct kvmppc_xive **kvm_xive_device = type == KVM_DEV_TYPE_XIVE ?
+ &kvm->arch.xive_devices.native :
+ &kvm->arch.xive_devices.xics_on_xive;
+ struct kvmppc_xive *xive = *kvm_xive_device;
+
+ if (!xive) {
+ xive = kzalloc(sizeof(*xive), GFP_KERNEL);
+ *kvm_xive_device = xive;
+ } else {
+ memset(xive, 0, sizeof(*xive));
+ }
+
+ return xive;
+}
+
+/*
+ * Create a XICS device with XIVE backend. kvm->lock is held.
+ */
+static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
+{
+ struct kvmppc_xive *xive;
+ struct kvm *kvm = dev->kvm;
+
+ pr_devel("Creating xive for partition\n");
+
+ /* Already there ? */
+ if (kvm->arch.xive)
+ return -EEXIST;
+
+ xive = kvmppc_xive_get_device(kvm, type);
+ if (!xive)
+ return -ENOMEM;
+
+ dev->private = xive;
+ xive->dev = dev;
+ xive->kvm = kvm;
+ mutex_init(&xive->lock);
+
+ /* We use the default queue size set by the host */
+ xive->q_order = xive_native_default_eq_shift();
+ if (xive->q_order < PAGE_SHIFT)
+ xive->q_page_order = 0;
+ else
+ xive->q_page_order = xive->q_order - PAGE_SHIFT;
+
+ /* VP allocation is delayed to the first call to connect_vcpu */
+ xive->vp_base = XIVE_INVALID_VP;
+ /* KVM_MAX_VCPUS limits the number of VMs to roughly 64 per sockets
+ * on a POWER9 system.
+ */
+ xive->nr_servers = KVM_MAX_VCPUS;
+
+ if (xive_native_has_single_escalation())
+ xive->flags |= KVMPPC_XIVE_FLAG_SINGLE_ESCALATION;
+
+ if (xive_native_has_save_restore())
+ xive->flags |= KVMPPC_XIVE_FLAG_SAVE_RESTORE;
+
+ kvm->arch.xive = xive;
+ return 0;
+}
+
+int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
+{
+ /* The VM should have configured XICS mode before doing XICS hcalls. */
+ if (!kvmppc_xics_enabled(vcpu))
+ return H_TOO_HARD;
+
+ switch (req) {
+ case H_XIRR:
+ return xive_vm_h_xirr(vcpu);
+ case H_CPPR:
+ return xive_vm_h_cppr(vcpu, kvmppc_get_gpr(vcpu, 4));
+ case H_EOI:
+ return xive_vm_h_eoi(vcpu, kvmppc_get_gpr(vcpu, 4));
+ case H_IPI:
+ return xive_vm_h_ipi(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5));
+ case H_IPOLL:
+ return xive_vm_h_ipoll(vcpu, kvmppc_get_gpr(vcpu, 4));
+ case H_XIRR_X:
+ xive_vm_h_xirr(vcpu);
+ kvmppc_set_gpr(vcpu, 5, get_tb() + kvmppc_get_tb_offset(vcpu));
+ return H_SUCCESS;
+ }
+
+ return H_UNSUPPORTED;
+}
+EXPORT_SYMBOL_GPL(kvmppc_xive_xics_hcall);
+
+int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ unsigned int i;
+
+ for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+ struct xive_q *q = &xc->queues[i];
+ u32 i0, i1, idx;
+
+ if (!q->qpage && !xc->esc_virq[i])
+ continue;
+
+ if (q->qpage) {
+ seq_printf(m, " q[%d]: ", i);
+ idx = q->idx;
+ i0 = be32_to_cpup(q->qpage + idx);
+ idx = (idx + 1) & q->msk;
+ i1 = be32_to_cpup(q->qpage + idx);
+ seq_printf(m, "T=%d %08x %08x...\n", q->toggle,
+ i0, i1);
+ }
+ if (xc->esc_virq[i]) {
+ struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]);
+ struct xive_irq_data *xd =
+ irq_data_get_irq_handler_data(d);
+ u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET);
+
+ seq_printf(m, " ESC %d %c%c EOI @%llx",
+ xc->esc_virq[i],
+ (pq & XIVE_ESB_VAL_P) ? 'P' : '-',
+ (pq & XIVE_ESB_VAL_Q) ? 'Q' : '-',
+ xd->eoi_page);
+ seq_puts(m, "\n");
+ }
+ }
+ return 0;
+}
+
+void kvmppc_xive_debug_show_sources(struct seq_file *m,
+ struct kvmppc_xive_src_block *sb)
+{
+ int i;
+
+ seq_puts(m, " LISN HW/CHIP TYPE PQ EISN CPU/PRIO\n");
+ for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
+ struct kvmppc_xive_irq_state *state = &sb->irq_state[i];
+ struct xive_irq_data *xd;
+ u64 pq;
+ u32 hw_num;
+
+ if (!state->valid)
+ continue;
+
+ kvmppc_xive_select_irq(state, &hw_num, &xd);
+
+ pq = xive_vm_esb_load(xd, XIVE_ESB_GET);
+
+ seq_printf(m, "%08x %08x/%02x", state->number, hw_num,
+ xd->src_chip);
+ if (state->lsi)
+ seq_printf(m, " %cLSI", state->asserted ? '^' : ' ');
+ else
+ seq_puts(m, " MSI");
+
+ seq_printf(m, " %s %c%c %08x % 4d/%d",
+ state->ipi_number == hw_num ? "IPI" : " PT",
+ pq & XIVE_ESB_VAL_P ? 'P' : '-',
+ pq & XIVE_ESB_VAL_Q ? 'Q' : '-',
+ state->eisn, state->act_server,
+ state->act_priority);
+
+ seq_puts(m, "\n");
+ }
+}
+
+static int xive_debug_show(struct seq_file *m, void *private)
+{
+ struct kvmppc_xive *xive = m->private;
+ struct kvm *kvm = xive->kvm;
+ struct kvm_vcpu *vcpu;
+ u64 t_rm_h_xirr = 0;
+ u64 t_rm_h_ipoll = 0;
+ u64 t_rm_h_cppr = 0;
+ u64 t_rm_h_eoi = 0;
+ u64 t_rm_h_ipi = 0;
+ u64 t_vm_h_xirr = 0;
+ u64 t_vm_h_ipoll = 0;
+ u64 t_vm_h_cppr = 0;
+ u64 t_vm_h_eoi = 0;
+ u64 t_vm_h_ipi = 0;
+ unsigned long i;
+
+ if (!kvm)
+ return 0;
+
+ seq_puts(m, "=========\nVCPU state\n=========\n");
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+
+ if (!xc)
+ continue;
+
+ seq_printf(m, "VCPU %d: VP:%#x/%02x\n"
+ " CPPR:%#x HWCPPR:%#x MFRR:%#x PEND:%#x h_xirr: R=%lld V=%lld\n",
+ xc->server_num, xc->vp_id, xc->vp_chip_id,
+ xc->cppr, xc->hw_cppr,
+ xc->mfrr, xc->pending,
+ xc->stat_rm_h_xirr, xc->stat_vm_h_xirr);
+
+ kvmppc_xive_debug_show_queues(m, vcpu);
+
+ t_rm_h_xirr += xc->stat_rm_h_xirr;
+ t_rm_h_ipoll += xc->stat_rm_h_ipoll;
+ t_rm_h_cppr += xc->stat_rm_h_cppr;
+ t_rm_h_eoi += xc->stat_rm_h_eoi;
+ t_rm_h_ipi += xc->stat_rm_h_ipi;
+ t_vm_h_xirr += xc->stat_vm_h_xirr;
+ t_vm_h_ipoll += xc->stat_vm_h_ipoll;
+ t_vm_h_cppr += xc->stat_vm_h_cppr;
+ t_vm_h_eoi += xc->stat_vm_h_eoi;
+ t_vm_h_ipi += xc->stat_vm_h_ipi;
+ }
+
+ seq_puts(m, "Hcalls totals\n");
+ seq_printf(m, " H_XIRR R=%10lld V=%10lld\n", t_rm_h_xirr, t_vm_h_xirr);
+ seq_printf(m, " H_IPOLL R=%10lld V=%10lld\n", t_rm_h_ipoll, t_vm_h_ipoll);
+ seq_printf(m, " H_CPPR R=%10lld V=%10lld\n", t_rm_h_cppr, t_vm_h_cppr);
+ seq_printf(m, " H_EOI R=%10lld V=%10lld\n", t_rm_h_eoi, t_vm_h_eoi);
+ seq_printf(m, " H_IPI R=%10lld V=%10lld\n", t_rm_h_ipi, t_vm_h_ipi);
+
+ seq_puts(m, "=========\nSources\n=========\n");
+
+ for (i = 0; i <= xive->max_sbid; i++) {
+ struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
+
+ if (sb) {
+ arch_spin_lock(&sb->lock);
+ kvmppc_xive_debug_show_sources(m, sb);
+ arch_spin_unlock(&sb->lock);
+ }
+ }
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(xive_debug);
+
+static void xive_debugfs_init(struct kvmppc_xive *xive)
+{
+ xive->dentry = debugfs_create_file("xive", S_IRUGO, xive->kvm->debugfs_dentry,
+ xive, &xive_debug_fops);
+
+ pr_debug("%s: created\n", __func__);
+}
+
+static void kvmppc_xive_init(struct kvm_device *dev)
+{
+ struct kvmppc_xive *xive = dev->private;
+
+ /* Register some debug interfaces */
+ xive_debugfs_init(xive);
+}
+
+struct kvm_device_ops kvm_xive_ops = {
+ .name = "kvm-xive",
+ .create = kvmppc_xive_create,
+ .init = kvmppc_xive_init,
+ .release = kvmppc_xive_release,
+ .set_attr = xive_set_attr,
+ .get_attr = xive_get_attr,
+ .has_attr = xive_has_attr,
+};
diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h
new file mode 100644
index 000000000000..62bf39f53783
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_xive.h
@@ -0,0 +1,313 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2017 Benjamin Herrenschmidt, IBM Corporation
+ */
+
+#ifndef _KVM_PPC_BOOK3S_XIVE_H
+#define _KVM_PPC_BOOK3S_XIVE_H
+
+#ifdef CONFIG_KVM_XICS
+#include "book3s_xics.h"
+
+/*
+ * The XIVE Interrupt source numbers are within the range 0 to
+ * KVMPPC_XICS_NR_IRQS.
+ */
+#define KVMPPC_XIVE_FIRST_IRQ 0
+#define KVMPPC_XIVE_NR_IRQS KVMPPC_XICS_NR_IRQS
+
+/*
+ * State for one guest irq source.
+ *
+ * For each guest source we allocate a HW interrupt in the XIVE
+ * which we use for all SW triggers. It will be unused for
+ * pass-through but it's easier to keep around as the same
+ * guest interrupt can alternatively be emulated or pass-through
+ * if a physical device is hot unplugged and replaced with an
+ * emulated one.
+ *
+ * This state structure is very similar to the XICS one with
+ * additional XIVE specific tracking.
+ */
+struct kvmppc_xive_irq_state {
+ bool valid; /* Interrupt entry is valid */
+
+ u32 number; /* Guest IRQ number */
+ u32 ipi_number; /* XIVE IPI HW number */
+ struct xive_irq_data ipi_data; /* XIVE IPI associated data */
+ u32 pt_number; /* XIVE Pass-through number if any */
+ struct xive_irq_data *pt_data; /* XIVE Pass-through associated data */
+
+ /* Targetting as set by guest */
+ u8 guest_priority; /* Guest set priority */
+ u8 saved_priority; /* Saved priority when masking */
+
+ /* Actual targetting */
+ u32 act_server; /* Actual server */
+ u8 act_priority; /* Actual priority */
+
+ /* Various state bits */
+ bool in_eoi; /* Synchronize with H_EOI */
+ bool old_p; /* P bit state when masking */
+ bool old_q; /* Q bit state when masking */
+ bool lsi; /* level-sensitive interrupt */
+ bool asserted; /* Only for emulated LSI: current state */
+
+ /* Saved for migration state */
+ bool in_queue;
+ bool saved_p;
+ bool saved_q;
+ u8 saved_scan_prio;
+
+ /* Xive native */
+ u32 eisn; /* Guest Effective IRQ number */
+};
+
+/* Select the "right" interrupt (IPI vs. passthrough) */
+static inline void kvmppc_xive_select_irq(struct kvmppc_xive_irq_state *state,
+ u32 *out_hw_irq,
+ struct xive_irq_data **out_xd)
+{
+ if (state->pt_number) {
+ if (out_hw_irq)
+ *out_hw_irq = state->pt_number;
+ if (out_xd)
+ *out_xd = state->pt_data;
+ } else {
+ if (out_hw_irq)
+ *out_hw_irq = state->ipi_number;
+ if (out_xd)
+ *out_xd = &state->ipi_data;
+ }
+}
+
+/*
+ * This corresponds to an "ICS" in XICS terminology, we use it
+ * as a mean to break up source information into multiple structures.
+ */
+struct kvmppc_xive_src_block {
+ arch_spinlock_t lock;
+ u16 id;
+ struct kvmppc_xive_irq_state irq_state[KVMPPC_XICS_IRQ_PER_ICS];
+};
+
+struct kvmppc_xive;
+
+struct kvmppc_xive_ops {
+ int (*reset_mapped)(struct kvm *kvm, unsigned long guest_irq);
+};
+
+#define KVMPPC_XIVE_FLAG_SINGLE_ESCALATION 0x1
+#define KVMPPC_XIVE_FLAG_SAVE_RESTORE 0x2
+
+struct kvmppc_xive {
+ struct kvm *kvm;
+ struct kvm_device *dev;
+ struct dentry *dentry;
+
+ /* VP block associated with the VM */
+ u32 vp_base;
+
+ /* Blocks of sources */
+ struct kvmppc_xive_src_block *src_blocks[KVMPPC_XICS_MAX_ICS_ID + 1];
+ u32 max_sbid;
+
+ /*
+ * For state save, we lazily scan the queues on the first interrupt
+ * being migrated. We don't have a clean way to reset that flags
+ * so we keep track of the number of valid sources and how many of
+ * them were migrated so we can reset when all of them have been
+ * processed.
+ */
+ u32 src_count;
+ u32 saved_src_count;
+
+ /*
+ * Some irqs are delayed on restore until the source is created,
+ * keep track here of how many of them
+ */
+ u32 delayed_irqs;
+
+ /* Which queues (priorities) are in use by the guest */
+ u8 qmap;
+
+ /* Queue orders */
+ u32 q_order;
+ u32 q_page_order;
+
+ /* Flags */
+ u8 flags;
+
+ /* Number of entries in the VP block */
+ u32 nr_servers;
+
+ struct kvmppc_xive_ops *ops;
+ struct address_space *mapping;
+ struct mutex mapping_lock;
+ struct mutex lock;
+};
+
+#define KVMPPC_XIVE_Q_COUNT 8
+
+struct kvmppc_xive_vcpu {
+ struct kvmppc_xive *xive;
+ struct kvm_vcpu *vcpu;
+ bool valid;
+
+ /* Server number. This is the HW CPU ID from a guest perspective */
+ u32 server_num;
+
+ /*
+ * HW VP corresponding to this VCPU. This is the base of the VP
+ * block plus the server number.
+ */
+ u32 vp_id;
+ u32 vp_chip_id;
+ u32 vp_cam;
+
+ /* IPI used for sending ... IPIs */
+ u32 vp_ipi;
+ struct xive_irq_data vp_ipi_data;
+
+ /* Local emulation state */
+ uint8_t cppr; /* guest CPPR */
+ uint8_t hw_cppr;/* Hardware CPPR */
+ uint8_t mfrr;
+ uint8_t pending;
+
+ /* Each VP has 8 queues though we only provision some */
+ struct xive_q queues[KVMPPC_XIVE_Q_COUNT];
+ u32 esc_virq[KVMPPC_XIVE_Q_COUNT];
+ char *esc_virq_names[KVMPPC_XIVE_Q_COUNT];
+
+ /* Stash a delayed irq on restore from migration (see set_icp) */
+ u32 delayed_irq;
+
+ /* Stats */
+ u64 stat_rm_h_xirr;
+ u64 stat_rm_h_ipoll;
+ u64 stat_rm_h_cppr;
+ u64 stat_rm_h_eoi;
+ u64 stat_rm_h_ipi;
+ u64 stat_vm_h_xirr;
+ u64 stat_vm_h_ipoll;
+ u64 stat_vm_h_cppr;
+ u64 stat_vm_h_eoi;
+ u64 stat_vm_h_ipi;
+};
+
+static inline struct kvm_vcpu *kvmppc_xive_find_server(struct kvm *kvm, u32 nr)
+{
+ struct kvm_vcpu *vcpu = NULL;
+ unsigned long i;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (vcpu->arch.xive_vcpu && nr == vcpu->arch.xive_vcpu->server_num)
+ return vcpu;
+ }
+ return NULL;
+}
+
+static inline struct kvmppc_xive_src_block *kvmppc_xive_find_source(struct kvmppc_xive *xive,
+ u32 irq, u16 *source)
+{
+ u32 bid = irq >> KVMPPC_XICS_ICS_SHIFT;
+ u16 src = irq & KVMPPC_XICS_SRC_MASK;
+
+ if (source)
+ *source = src;
+ if (bid > KVMPPC_XICS_MAX_ICS_ID)
+ return NULL;
+ return xive->src_blocks[bid];
+}
+
+/*
+ * When the XIVE resources are allocated at the HW level, the VP
+ * structures describing the vCPUs of a guest are distributed among
+ * the chips to optimize the PowerBUS usage. For best performance, the
+ * guest vCPUs can be pinned to match the VP structure distribution.
+ *
+ * Currently, the VP identifiers are deduced from the vCPU id using
+ * the kvmppc_pack_vcpu_id() routine which is not incorrect but not
+ * optimal either. It VSMT is used, the result is not continuous and
+ * the constraints on HW resources described above can not be met.
+ */
+static inline u32 kvmppc_xive_vp(struct kvmppc_xive *xive, u32 server)
+{
+ return xive->vp_base + kvmppc_pack_vcpu_id(xive->kvm, server);
+}
+
+static inline bool kvmppc_xive_vp_in_use(struct kvm *kvm, u32 vp_id)
+{
+ struct kvm_vcpu *vcpu = NULL;
+ unsigned long i;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (vcpu->arch.xive_vcpu && vp_id == vcpu->arch.xive_vcpu->vp_id)
+ return true;
+ }
+ return false;
+}
+
+/*
+ * Mapping between guest priorities and host priorities
+ * is as follow.
+ *
+ * Guest request for 0...6 are honored. Guest request for anything
+ * higher results in a priority of 6 being applied.
+ *
+ * Similar mapping is done for CPPR values
+ */
+static inline u8 xive_prio_from_guest(u8 prio)
+{
+ if (prio == 0xff || prio < 6)
+ return prio;
+ return 6;
+}
+
+static inline u8 xive_prio_to_guest(u8 prio)
+{
+ return prio;
+}
+
+static inline u32 __xive_read_eq(__be32 *qpage, u32 msk, u32 *idx, u32 *toggle)
+{
+ u32 cur;
+
+ if (!qpage)
+ return 0;
+ cur = be32_to_cpup(qpage + *idx);
+ if ((cur >> 31) == *toggle)
+ return 0;
+ *idx = (*idx + 1) & msk;
+ if (*idx == 0)
+ (*toggle) ^= 1;
+ return cur & 0x7fffffff;
+}
+
+/*
+ * Common Xive routines for XICS-over-XIVE and XIVE native
+ */
+void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu);
+int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu);
+void kvmppc_xive_debug_show_sources(struct seq_file *m,
+ struct kvmppc_xive_src_block *sb);
+struct kvmppc_xive_src_block *kvmppc_xive_create_src_block(
+ struct kvmppc_xive *xive, int irq);
+void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb);
+int kvmppc_xive_select_target(struct kvm *kvm, u32 *server, u8 prio);
+int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio,
+ bool single_escalation);
+struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type);
+void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu, int irq);
+int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp);
+int kvmppc_xive_set_nr_servers(struct kvmppc_xive *xive, u64 addr);
+bool kvmppc_xive_check_save_restore(struct kvm_vcpu *vcpu);
+
+static inline bool kvmppc_xive_has_single_escalation(struct kvmppc_xive *xive)
+{
+ return xive->flags & KVMPPC_XIVE_FLAG_SINGLE_ESCALATION;
+}
+
+#endif /* CONFIG_KVM_XICS */
+#endif /* _KVM_PPC_BOOK3S_XICS_H */
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
new file mode 100644
index 000000000000..d9bf1bc3ff61
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -0,0 +1,1284 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2017-2019, IBM Corporation.
+ */
+
+#define pr_fmt(fmt) "xive-kvm: " fmt
+
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <linux/err.h>
+#include <linux/gfp.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/file.h>
+#include <linux/irqdomain.h>
+#include <asm/uaccess.h>
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_ppc.h>
+#include <asm/hvcall.h>
+#include <asm/xive.h>
+#include <asm/xive-regs.h>
+#include <asm/debug.h>
+#include <asm/opal.h>
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include "book3s_xive.h"
+
+static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset)
+{
+ u64 val;
+
+ /*
+ * The KVM XIVE native device does not use the XIVE_ESB_SET_PQ_10
+ * load operation, so there is no need to enforce load-after-store
+ * ordering.
+ */
+
+ val = in_be64(xd->eoi_mmio + offset);
+ return (u8)val;
+}
+
+static void kvmppc_xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio)
+{
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ struct xive_q *q = &xc->queues[prio];
+
+ xive_native_disable_queue(xc->vp_id, q, prio);
+ if (q->qpage) {
+ put_page(virt_to_page(q->qpage));
+ q->qpage = NULL;
+ }
+}
+
+static int kvmppc_xive_native_configure_queue(u32 vp_id, struct xive_q *q,
+ u8 prio, __be32 *qpage,
+ u32 order, bool can_escalate)
+{
+ int rc;
+ __be32 *qpage_prev = q->qpage;
+
+ rc = xive_native_configure_queue(vp_id, q, prio, qpage, order,
+ can_escalate);
+ if (rc)
+ return rc;
+
+ if (qpage_prev)
+ put_page(virt_to_page(qpage_prev));
+
+ return rc;
+}
+
+void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ int i;
+
+ if (!kvmppc_xive_enabled(vcpu))
+ return;
+
+ if (!xc)
+ return;
+
+ pr_devel("native_cleanup_vcpu(cpu=%d)\n", xc->server_num);
+
+ /* Ensure no interrupt is still routed to that VP */
+ xc->valid = false;
+ kvmppc_xive_disable_vcpu_interrupts(vcpu);
+
+ /* Free escalations */
+ for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+ /* Free the escalation irq */
+ if (xc->esc_virq[i]) {
+ if (kvmppc_xive_has_single_escalation(xc->xive))
+ xive_cleanup_single_escalation(vcpu, xc->esc_virq[i]);
+ free_irq(xc->esc_virq[i], vcpu);
+ irq_dispose_mapping(xc->esc_virq[i]);
+ kfree(xc->esc_virq_names[i]);
+ xc->esc_virq[i] = 0;
+ }
+ }
+
+ /* Disable the VP */
+ xive_native_disable_vp(xc->vp_id);
+
+ /* Clear the cam word so guest entry won't try to push context */
+ vcpu->arch.xive_cam_word = 0;
+
+ /* Free the queues */
+ for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+ kvmppc_xive_native_cleanup_queue(vcpu, i);
+ }
+
+ /* Free the VP */
+ kfree(xc);
+
+ /* Cleanup the vcpu */
+ vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
+ vcpu->arch.xive_vcpu = NULL;
+}
+
+int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
+ struct kvm_vcpu *vcpu, u32 server_num)
+{
+ struct kvmppc_xive *xive = dev->private;
+ struct kvmppc_xive_vcpu *xc = NULL;
+ int rc;
+ u32 vp_id;
+
+ pr_devel("native_connect_vcpu(server=%d)\n", server_num);
+
+ if (dev->ops != &kvm_xive_native_ops) {
+ pr_devel("Wrong ops !\n");
+ return -EPERM;
+ }
+ if (xive->kvm != vcpu->kvm)
+ return -EPERM;
+ if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
+ return -EBUSY;
+
+ mutex_lock(&xive->lock);
+
+ rc = kvmppc_xive_compute_vp_id(xive, server_num, &vp_id);
+ if (rc)
+ goto bail;
+
+ xc = kzalloc(sizeof(*xc), GFP_KERNEL);
+ if (!xc) {
+ rc = -ENOMEM;
+ goto bail;
+ }
+
+ vcpu->arch.xive_vcpu = xc;
+ xc->xive = xive;
+ xc->vcpu = vcpu;
+ xc->server_num = server_num;
+
+ xc->vp_id = vp_id;
+ xc->valid = true;
+ vcpu->arch.irq_type = KVMPPC_IRQ_XIVE;
+
+ rc = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id);
+ if (rc) {
+ pr_err("Failed to get VP info from OPAL: %d\n", rc);
+ goto bail;
+ }
+
+ if (!kvmppc_xive_check_save_restore(vcpu)) {
+ pr_err("inconsistent save-restore setup for VCPU %d\n", server_num);
+ rc = -EIO;
+ goto bail;
+ }
+
+ /*
+ * Enable the VP first as the single escalation mode will
+ * affect escalation interrupts numbering
+ */
+ rc = xive_native_enable_vp(xc->vp_id, kvmppc_xive_has_single_escalation(xive));
+ if (rc) {
+ pr_err("Failed to enable VP in OPAL: %d\n", rc);
+ goto bail;
+ }
+
+ /* Configure VCPU fields for use by assembly push/pull */
+ vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000);
+ vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO);
+
+ /* TODO: reset all queues to a clean state ? */
+bail:
+ mutex_unlock(&xive->lock);
+ if (rc)
+ kvmppc_xive_native_cleanup_vcpu(vcpu);
+
+ return rc;
+}
+
+/*
+ * Device passthrough support
+ */
+static int kvmppc_xive_native_reset_mapped(struct kvm *kvm, unsigned long irq)
+{
+ struct kvmppc_xive *xive = kvm->arch.xive;
+ pgoff_t esb_pgoff = KVM_XIVE_ESB_PAGE_OFFSET + irq * 2;
+
+ if (irq >= KVMPPC_XIVE_NR_IRQS)
+ return -EINVAL;
+
+ /*
+ * Clear the ESB pages of the IRQ number being mapped (or
+ * unmapped) into the guest and let the VM fault handler
+ * repopulate with the appropriate ESB pages (device or IC)
+ */
+ pr_debug("clearing esb pages for girq 0x%lx\n", irq);
+ mutex_lock(&xive->mapping_lock);
+ if (xive->mapping)
+ unmap_mapping_range(xive->mapping,
+ esb_pgoff << PAGE_SHIFT,
+ 2ull << PAGE_SHIFT, 1);
+ mutex_unlock(&xive->mapping_lock);
+ return 0;
+}
+
+static struct kvmppc_xive_ops kvmppc_xive_native_ops = {
+ .reset_mapped = kvmppc_xive_native_reset_mapped,
+};
+
+static vm_fault_t xive_native_esb_fault(struct vm_fault *vmf)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ struct kvm_device *dev = vma->vm_file->private_data;
+ struct kvmppc_xive *xive = dev->private;
+ struct kvmppc_xive_src_block *sb;
+ struct kvmppc_xive_irq_state *state;
+ struct xive_irq_data *xd;
+ u32 hw_num;
+ u16 src;
+ u64 page;
+ unsigned long irq;
+ u64 page_offset;
+
+ /*
+ * Linux/KVM uses a two pages ESB setting, one for trigger and
+ * one for EOI
+ */
+ page_offset = vmf->pgoff - vma->vm_pgoff;
+ irq = page_offset / 2;
+
+ sb = kvmppc_xive_find_source(xive, irq, &src);
+ if (!sb) {
+ pr_devel("%s: source %lx not found !\n", __func__, irq);
+ return VM_FAULT_SIGBUS;
+ }
+
+ state = &sb->irq_state[src];
+
+ /* Some sanity checking */
+ if (!state->valid) {
+ pr_devel("%s: source %lx invalid !\n", __func__, irq);
+ return VM_FAULT_SIGBUS;
+ }
+
+ kvmppc_xive_select_irq(state, &hw_num, &xd);
+
+ arch_spin_lock(&sb->lock);
+
+ /*
+ * first/even page is for trigger
+ * second/odd page is for EOI and management.
+ */
+ page = page_offset % 2 ? xd->eoi_page : xd->trig_page;
+ arch_spin_unlock(&sb->lock);
+
+ if (WARN_ON(!page)) {
+ pr_err("%s: accessing invalid ESB page for source %lx !\n",
+ __func__, irq);
+ return VM_FAULT_SIGBUS;
+ }
+
+ vmf_insert_pfn(vma, vmf->address, page >> PAGE_SHIFT);
+ return VM_FAULT_NOPAGE;
+}
+
+static const struct vm_operations_struct xive_native_esb_vmops = {
+ .fault = xive_native_esb_fault,
+};
+
+static vm_fault_t xive_native_tima_fault(struct vm_fault *vmf)
+{
+ struct vm_area_struct *vma = vmf->vma;
+
+ switch (vmf->pgoff - vma->vm_pgoff) {
+ case 0: /* HW - forbid access */
+ case 1: /* HV - forbid access */
+ return VM_FAULT_SIGBUS;
+ case 2: /* OS */
+ vmf_insert_pfn(vma, vmf->address, xive_tima_os >> PAGE_SHIFT);
+ return VM_FAULT_NOPAGE;
+ case 3: /* USER - TODO */
+ default:
+ return VM_FAULT_SIGBUS;
+ }
+}
+
+static const struct vm_operations_struct xive_native_tima_vmops = {
+ .fault = xive_native_tima_fault,
+};
+
+static int kvmppc_xive_native_mmap(struct kvm_device *dev,
+ struct vm_area_struct *vma)
+{
+ struct kvmppc_xive *xive = dev->private;
+
+ /* We only allow mappings at fixed offset for now */
+ if (vma->vm_pgoff == KVM_XIVE_TIMA_PAGE_OFFSET) {
+ if (vma_pages(vma) > 4)
+ return -EINVAL;
+ vma->vm_ops = &xive_native_tima_vmops;
+ } else if (vma->vm_pgoff == KVM_XIVE_ESB_PAGE_OFFSET) {
+ if (vma_pages(vma) > KVMPPC_XIVE_NR_IRQS * 2)
+ return -EINVAL;
+ vma->vm_ops = &xive_native_esb_vmops;
+ } else {
+ return -EINVAL;
+ }
+
+ vm_flags_set(vma, VM_IO | VM_PFNMAP);
+ vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot);
+
+ /*
+ * Grab the KVM device file address_space to be able to clear
+ * the ESB pages mapping when a device is passed-through into
+ * the guest.
+ */
+ xive->mapping = vma->vm_file->f_mapping;
+ return 0;
+}
+
+static int kvmppc_xive_native_set_source(struct kvmppc_xive *xive, long irq,
+ u64 addr)
+{
+ struct kvmppc_xive_src_block *sb;
+ struct kvmppc_xive_irq_state *state;
+ u64 __user *ubufp = (u64 __user *) addr;
+ u64 val;
+ u16 idx;
+ int rc;
+
+ pr_devel("%s irq=0x%lx\n", __func__, irq);
+
+ if (irq < KVMPPC_XIVE_FIRST_IRQ || irq >= KVMPPC_XIVE_NR_IRQS)
+ return -E2BIG;
+
+ sb = kvmppc_xive_find_source(xive, irq, &idx);
+ if (!sb) {
+ pr_debug("No source, creating source block...\n");
+ sb = kvmppc_xive_create_src_block(xive, irq);
+ if (!sb) {
+ pr_err("Failed to create block...\n");
+ return -ENOMEM;
+ }
+ }
+ state = &sb->irq_state[idx];
+
+ if (get_user(val, ubufp)) {
+ pr_err("fault getting user info !\n");
+ return -EFAULT;
+ }
+
+ arch_spin_lock(&sb->lock);
+
+ /*
+ * If the source doesn't already have an IPI, allocate
+ * one and get the corresponding data
+ */
+ if (!state->ipi_number) {
+ state->ipi_number = xive_native_alloc_irq();
+ if (state->ipi_number == 0) {
+ pr_err("Failed to allocate IRQ !\n");
+ rc = -ENXIO;
+ goto unlock;
+ }
+ xive_native_populate_irq_data(state->ipi_number,
+ &state->ipi_data);
+ pr_debug("%s allocated hw_irq=0x%x for irq=0x%lx\n", __func__,
+ state->ipi_number, irq);
+ }
+
+ /* Restore LSI state */
+ if (val & KVM_XIVE_LEVEL_SENSITIVE) {
+ state->lsi = true;
+ if (val & KVM_XIVE_LEVEL_ASSERTED)
+ state->asserted = true;
+ pr_devel(" LSI ! Asserted=%d\n", state->asserted);
+ }
+
+ /* Mask IRQ to start with */
+ state->act_server = 0;
+ state->act_priority = MASKED;
+ xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
+ xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);
+
+ /* Increment the number of valid sources and mark this one valid */
+ if (!state->valid)
+ xive->src_count++;
+ state->valid = true;
+
+ rc = 0;
+
+unlock:
+ arch_spin_unlock(&sb->lock);
+
+ return rc;
+}
+
+static int kvmppc_xive_native_update_source_config(struct kvmppc_xive *xive,
+ struct kvmppc_xive_src_block *sb,
+ struct kvmppc_xive_irq_state *state,
+ u32 server, u8 priority, bool masked,
+ u32 eisn)
+{
+ struct kvm *kvm = xive->kvm;
+ u32 hw_num;
+ int rc = 0;
+
+ arch_spin_lock(&sb->lock);
+
+ if (state->act_server == server && state->act_priority == priority &&
+ state->eisn == eisn)
+ goto unlock;
+
+ pr_devel("new_act_prio=%d new_act_server=%d mask=%d act_server=%d act_prio=%d\n",
+ priority, server, masked, state->act_server,
+ state->act_priority);
+
+ kvmppc_xive_select_irq(state, &hw_num, NULL);
+
+ if (priority != MASKED && !masked) {
+ rc = kvmppc_xive_select_target(kvm, &server, priority);
+ if (rc)
+ goto unlock;
+
+ state->act_priority = priority;
+ state->act_server = server;
+ state->eisn = eisn;
+
+ rc = xive_native_configure_irq(hw_num,
+ kvmppc_xive_vp(xive, server),
+ priority, eisn);
+ } else {
+ state->act_priority = MASKED;
+ state->act_server = 0;
+ state->eisn = 0;
+
+ rc = xive_native_configure_irq(hw_num, 0, MASKED, 0);
+ }
+
+unlock:
+ arch_spin_unlock(&sb->lock);
+ return rc;
+}
+
+static int kvmppc_xive_native_set_source_config(struct kvmppc_xive *xive,
+ long irq, u64 addr)
+{
+ struct kvmppc_xive_src_block *sb;
+ struct kvmppc_xive_irq_state *state;
+ u64 __user *ubufp = (u64 __user *) addr;
+ u16 src;
+ u64 kvm_cfg;
+ u32 server;
+ u8 priority;
+ bool masked;
+ u32 eisn;
+
+ sb = kvmppc_xive_find_source(xive, irq, &src);
+ if (!sb)
+ return -ENOENT;
+
+ state = &sb->irq_state[src];
+
+ if (!state->valid)
+ return -EINVAL;
+
+ if (get_user(kvm_cfg, ubufp))
+ return -EFAULT;
+
+ pr_devel("%s irq=0x%lx cfg=%016llx\n", __func__, irq, kvm_cfg);
+
+ priority = (kvm_cfg & KVM_XIVE_SOURCE_PRIORITY_MASK) >>
+ KVM_XIVE_SOURCE_PRIORITY_SHIFT;
+ server = (kvm_cfg & KVM_XIVE_SOURCE_SERVER_MASK) >>
+ KVM_XIVE_SOURCE_SERVER_SHIFT;
+ masked = (kvm_cfg & KVM_XIVE_SOURCE_MASKED_MASK) >>
+ KVM_XIVE_SOURCE_MASKED_SHIFT;
+ eisn = (kvm_cfg & KVM_XIVE_SOURCE_EISN_MASK) >>
+ KVM_XIVE_SOURCE_EISN_SHIFT;
+
+ if (priority != xive_prio_from_guest(priority)) {
+ pr_err("invalid priority for queue %d for VCPU %d\n",
+ priority, server);
+ return -EINVAL;
+ }
+
+ return kvmppc_xive_native_update_source_config(xive, sb, state, server,
+ priority, masked, eisn);
+}
+
+static int kvmppc_xive_native_sync_source(struct kvmppc_xive *xive,
+ long irq, u64 addr)
+{
+ struct kvmppc_xive_src_block *sb;
+ struct kvmppc_xive_irq_state *state;
+ struct xive_irq_data *xd;
+ u32 hw_num;
+ u16 src;
+ int rc = 0;
+
+ pr_devel("%s irq=0x%lx", __func__, irq);
+
+ sb = kvmppc_xive_find_source(xive, irq, &src);
+ if (!sb)
+ return -ENOENT;
+
+ state = &sb->irq_state[src];
+
+ rc = -EINVAL;
+
+ arch_spin_lock(&sb->lock);
+
+ if (state->valid) {
+ kvmppc_xive_select_irq(state, &hw_num, &xd);
+ xive_native_sync_source(hw_num);
+ rc = 0;
+ }
+
+ arch_spin_unlock(&sb->lock);
+ return rc;
+}
+
+static int xive_native_validate_queue_size(u32 qshift)
+{
+ /*
+ * We only support 64K pages for the moment. This is also
+ * advertised in the DT property "ibm,xive-eq-sizes"
+ */
+ switch (qshift) {
+ case 0: /* EQ reset */
+ case 16:
+ return 0;
+ case 12:
+ case 21:
+ case 24:
+ default:
+ return -EINVAL;
+ }
+}
+
+static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive,
+ long eq_idx, u64 addr)
+{
+ struct kvm *kvm = xive->kvm;
+ struct kvm_vcpu *vcpu;
+ struct kvmppc_xive_vcpu *xc;
+ void __user *ubufp = (void __user *) addr;
+ u32 server;
+ u8 priority;
+ struct kvm_ppc_xive_eq kvm_eq;
+ int rc;
+ __be32 *qaddr = NULL;
+ struct page *page;
+ struct xive_q *q;
+ gfn_t gfn;
+ unsigned long page_size;
+ int srcu_idx;
+
+ /*
+ * Demangle priority/server tuple from the EQ identifier
+ */
+ priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >>
+ KVM_XIVE_EQ_PRIORITY_SHIFT;
+ server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >>
+ KVM_XIVE_EQ_SERVER_SHIFT;
+
+ if (copy_from_user(&kvm_eq, ubufp, sizeof(kvm_eq)))
+ return -EFAULT;
+
+ vcpu = kvmppc_xive_find_server(kvm, server);
+ if (!vcpu) {
+ pr_err("Can't find server %d\n", server);
+ return -ENOENT;
+ }
+ xc = vcpu->arch.xive_vcpu;
+
+ if (priority != xive_prio_from_guest(priority)) {
+ pr_err("Trying to restore invalid queue %d for VCPU %d\n",
+ priority, server);
+ return -EINVAL;
+ }
+ q = &xc->queues[priority];
+
+ pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n",
+ __func__, server, priority, kvm_eq.flags,
+ kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex);
+
+ /* reset queue and disable queueing */
+ if (!kvm_eq.qshift) {
+ q->guest_qaddr = 0;
+ q->guest_qshift = 0;
+
+ rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority,
+ NULL, 0, true);
+ if (rc) {
+ pr_err("Failed to reset queue %d for VCPU %d: %d\n",
+ priority, xc->server_num, rc);
+ return rc;
+ }
+
+ return 0;
+ }
+
+ /*
+ * sPAPR specifies a "Unconditional Notify (n) flag" for the
+ * H_INT_SET_QUEUE_CONFIG hcall which forces notification
+ * without using the coalescing mechanisms provided by the
+ * XIVE END ESBs. This is required on KVM as notification
+ * using the END ESBs is not supported.
+ */
+ if (kvm_eq.flags != KVM_XIVE_EQ_ALWAYS_NOTIFY) {
+ pr_err("invalid flags %d\n", kvm_eq.flags);
+ return -EINVAL;
+ }
+
+ rc = xive_native_validate_queue_size(kvm_eq.qshift);
+ if (rc) {
+ pr_err("invalid queue size %d\n", kvm_eq.qshift);
+ return rc;
+ }
+
+ if (kvm_eq.qaddr & ((1ull << kvm_eq.qshift) - 1)) {
+ pr_err("queue page is not aligned %llx/%llx\n", kvm_eq.qaddr,
+ 1ull << kvm_eq.qshift);
+ return -EINVAL;
+ }
+
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ gfn = gpa_to_gfn(kvm_eq.qaddr);
+
+ page_size = kvm_host_page_size(vcpu, gfn);
+ if (1ull << kvm_eq.qshift > page_size) {
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ pr_warn("Incompatible host page size %lx!\n", page_size);
+ return -EINVAL;
+ }
+
+ page = gfn_to_page(kvm, gfn);
+ if (!page) {
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr);
+ return -EINVAL;
+ }
+
+ qaddr = page_to_virt(page) + (kvm_eq.qaddr & ~PAGE_MASK);
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+
+ /*
+ * Backup the queue page guest address to the mark EQ page
+ * dirty for migration.
+ */
+ q->guest_qaddr = kvm_eq.qaddr;
+ q->guest_qshift = kvm_eq.qshift;
+
+ /*
+ * Unconditional Notification is forced by default at the
+ * OPAL level because the use of END ESBs is not supported by
+ * Linux.
+ */
+ rc = kvmppc_xive_native_configure_queue(xc->vp_id, q, priority,
+ (__be32 *) qaddr, kvm_eq.qshift, true);
+ if (rc) {
+ pr_err("Failed to configure queue %d for VCPU %d: %d\n",
+ priority, xc->server_num, rc);
+ put_page(page);
+ return rc;
+ }
+
+ /*
+ * Only restore the queue state when needed. When doing the
+ * H_INT_SET_SOURCE_CONFIG hcall, it should not.
+ */
+ if (kvm_eq.qtoggle != 1 || kvm_eq.qindex != 0) {
+ rc = xive_native_set_queue_state(xc->vp_id, priority,
+ kvm_eq.qtoggle,
+ kvm_eq.qindex);
+ if (rc)
+ goto error;
+ }
+
+ rc = kvmppc_xive_attach_escalation(vcpu, priority,
+ kvmppc_xive_has_single_escalation(xive));
+error:
+ if (rc)
+ kvmppc_xive_native_cleanup_queue(vcpu, priority);
+ return rc;
+}
+
+static int kvmppc_xive_native_get_queue_config(struct kvmppc_xive *xive,
+ long eq_idx, u64 addr)
+{
+ struct kvm *kvm = xive->kvm;
+ struct kvm_vcpu *vcpu;
+ struct kvmppc_xive_vcpu *xc;
+ struct xive_q *q;
+ void __user *ubufp = (u64 __user *) addr;
+ u32 server;
+ u8 priority;
+ struct kvm_ppc_xive_eq kvm_eq;
+ u64 qaddr;
+ u64 qshift;
+ u64 qeoi_page;
+ u32 escalate_irq;
+ u64 qflags;
+ int rc;
+
+ /*
+ * Demangle priority/server tuple from the EQ identifier
+ */
+ priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >>
+ KVM_XIVE_EQ_PRIORITY_SHIFT;
+ server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >>
+ KVM_XIVE_EQ_SERVER_SHIFT;
+
+ vcpu = kvmppc_xive_find_server(kvm, server);
+ if (!vcpu) {
+ pr_err("Can't find server %d\n", server);
+ return -ENOENT;
+ }
+ xc = vcpu->arch.xive_vcpu;
+
+ if (priority != xive_prio_from_guest(priority)) {
+ pr_err("invalid priority for queue %d for VCPU %d\n",
+ priority, server);
+ return -EINVAL;
+ }
+ q = &xc->queues[priority];
+
+ memset(&kvm_eq, 0, sizeof(kvm_eq));
+
+ if (!q->qpage)
+ return 0;
+
+ rc = xive_native_get_queue_info(xc->vp_id, priority, &qaddr, &qshift,
+ &qeoi_page, &escalate_irq, &qflags);
+ if (rc)
+ return rc;
+
+ kvm_eq.flags = 0;
+ if (qflags & OPAL_XIVE_EQ_ALWAYS_NOTIFY)
+ kvm_eq.flags |= KVM_XIVE_EQ_ALWAYS_NOTIFY;
+
+ kvm_eq.qshift = q->guest_qshift;
+ kvm_eq.qaddr = q->guest_qaddr;
+
+ rc = xive_native_get_queue_state(xc->vp_id, priority, &kvm_eq.qtoggle,
+ &kvm_eq.qindex);
+ if (rc)
+ return rc;
+
+ pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n",
+ __func__, server, priority, kvm_eq.flags,
+ kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex);
+
+ if (copy_to_user(ubufp, &kvm_eq, sizeof(kvm_eq)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static void kvmppc_xive_reset_sources(struct kvmppc_xive_src_block *sb)
+{
+ int i;
+
+ for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
+ struct kvmppc_xive_irq_state *state = &sb->irq_state[i];
+
+ if (!state->valid)
+ continue;
+
+ if (state->act_priority == MASKED)
+ continue;
+
+ state->eisn = 0;
+ state->act_server = 0;
+ state->act_priority = MASKED;
+ xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
+ xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);
+ if (state->pt_number) {
+ xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_01);
+ xive_native_configure_irq(state->pt_number,
+ 0, MASKED, 0);
+ }
+ }
+}
+
+static int kvmppc_xive_reset(struct kvmppc_xive *xive)
+{
+ struct kvm *kvm = xive->kvm;
+ struct kvm_vcpu *vcpu;
+ unsigned long i;
+
+ pr_devel("%s\n", __func__);
+
+ mutex_lock(&xive->lock);
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ unsigned int prio;
+
+ if (!xc)
+ continue;
+
+ kvmppc_xive_disable_vcpu_interrupts(vcpu);
+
+ for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
+
+ /* Single escalation, no queue 7 */
+ if (prio == 7 && kvmppc_xive_has_single_escalation(xive))
+ break;
+
+ if (xc->esc_virq[prio]) {
+ free_irq(xc->esc_virq[prio], vcpu);
+ irq_dispose_mapping(xc->esc_virq[prio]);
+ kfree(xc->esc_virq_names[prio]);
+ xc->esc_virq[prio] = 0;
+ }
+
+ kvmppc_xive_native_cleanup_queue(vcpu, prio);
+ }
+ }
+
+ for (i = 0; i <= xive->max_sbid; i++) {
+ struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
+
+ if (sb) {
+ arch_spin_lock(&sb->lock);
+ kvmppc_xive_reset_sources(sb);
+ arch_spin_unlock(&sb->lock);
+ }
+ }
+
+ mutex_unlock(&xive->lock);
+
+ return 0;
+}
+
+static void kvmppc_xive_native_sync_sources(struct kvmppc_xive_src_block *sb)
+{
+ int j;
+
+ for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++) {
+ struct kvmppc_xive_irq_state *state = &sb->irq_state[j];
+ struct xive_irq_data *xd;
+ u32 hw_num;
+
+ if (!state->valid)
+ continue;
+
+ /*
+ * The struct kvmppc_xive_irq_state reflects the state
+ * of the EAS configuration and not the state of the
+ * source. The source is masked setting the PQ bits to
+ * '-Q', which is what is being done before calling
+ * the KVM_DEV_XIVE_EQ_SYNC control.
+ *
+ * If a source EAS is configured, OPAL syncs the XIVE
+ * IC of the source and the XIVE IC of the previous
+ * target if any.
+ *
+ * So it should be fine ignoring MASKED sources as
+ * they have been synced already.
+ */
+ if (state->act_priority == MASKED)
+ continue;
+
+ kvmppc_xive_select_irq(state, &hw_num, &xd);
+ xive_native_sync_source(hw_num);
+ xive_native_sync_queue(hw_num);
+ }
+}
+
+static int kvmppc_xive_native_vcpu_eq_sync(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ unsigned int prio;
+ int srcu_idx;
+
+ if (!xc)
+ return -ENOENT;
+
+ for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
+ struct xive_q *q = &xc->queues[prio];
+
+ if (!q->qpage)
+ continue;
+
+ /* Mark EQ page dirty for migration */
+ srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ mark_page_dirty(vcpu->kvm, gpa_to_gfn(q->guest_qaddr));
+ srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
+ }
+ return 0;
+}
+
+static int kvmppc_xive_native_eq_sync(struct kvmppc_xive *xive)
+{
+ struct kvm *kvm = xive->kvm;
+ struct kvm_vcpu *vcpu;
+ unsigned long i;
+
+ pr_devel("%s\n", __func__);
+
+ mutex_lock(&xive->lock);
+ for (i = 0; i <= xive->max_sbid; i++) {
+ struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
+
+ if (sb) {
+ arch_spin_lock(&sb->lock);
+ kvmppc_xive_native_sync_sources(sb);
+ arch_spin_unlock(&sb->lock);
+ }
+ }
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ kvmppc_xive_native_vcpu_eq_sync(vcpu);
+ }
+ mutex_unlock(&xive->lock);
+
+ return 0;
+}
+
+static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ struct kvmppc_xive *xive = dev->private;
+
+ switch (attr->group) {
+ case KVM_DEV_XIVE_GRP_CTRL:
+ switch (attr->attr) {
+ case KVM_DEV_XIVE_RESET:
+ return kvmppc_xive_reset(xive);
+ case KVM_DEV_XIVE_EQ_SYNC:
+ return kvmppc_xive_native_eq_sync(xive);
+ case KVM_DEV_XIVE_NR_SERVERS:
+ return kvmppc_xive_set_nr_servers(xive, attr->addr);
+ }
+ break;
+ case KVM_DEV_XIVE_GRP_SOURCE:
+ return kvmppc_xive_native_set_source(xive, attr->attr,
+ attr->addr);
+ case KVM_DEV_XIVE_GRP_SOURCE_CONFIG:
+ return kvmppc_xive_native_set_source_config(xive, attr->attr,
+ attr->addr);
+ case KVM_DEV_XIVE_GRP_EQ_CONFIG:
+ return kvmppc_xive_native_set_queue_config(xive, attr->attr,
+ attr->addr);
+ case KVM_DEV_XIVE_GRP_SOURCE_SYNC:
+ return kvmppc_xive_native_sync_source(xive, attr->attr,
+ attr->addr);
+ }
+ return -ENXIO;
+}
+
+static int kvmppc_xive_native_get_attr(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ struct kvmppc_xive *xive = dev->private;
+
+ switch (attr->group) {
+ case KVM_DEV_XIVE_GRP_EQ_CONFIG:
+ return kvmppc_xive_native_get_queue_config(xive, attr->attr,
+ attr->addr);
+ }
+ return -ENXIO;
+}
+
+static int kvmppc_xive_native_has_attr(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ switch (attr->group) {
+ case KVM_DEV_XIVE_GRP_CTRL:
+ switch (attr->attr) {
+ case KVM_DEV_XIVE_RESET:
+ case KVM_DEV_XIVE_EQ_SYNC:
+ case KVM_DEV_XIVE_NR_SERVERS:
+ return 0;
+ }
+ break;
+ case KVM_DEV_XIVE_GRP_SOURCE:
+ case KVM_DEV_XIVE_GRP_SOURCE_CONFIG:
+ case KVM_DEV_XIVE_GRP_SOURCE_SYNC:
+ if (attr->attr >= KVMPPC_XIVE_FIRST_IRQ &&
+ attr->attr < KVMPPC_XIVE_NR_IRQS)
+ return 0;
+ break;
+ case KVM_DEV_XIVE_GRP_EQ_CONFIG:
+ return 0;
+ }
+ return -ENXIO;
+}
+
+/*
+ * Called when device fd is closed. kvm->lock is held.
+ */
+static void kvmppc_xive_native_release(struct kvm_device *dev)
+{
+ struct kvmppc_xive *xive = dev->private;
+ struct kvm *kvm = xive->kvm;
+ struct kvm_vcpu *vcpu;
+ unsigned long i;
+
+ pr_devel("Releasing xive native device\n");
+
+ /*
+ * Clear the KVM device file address_space which is used to
+ * unmap the ESB pages when a device is passed-through.
+ */
+ mutex_lock(&xive->mapping_lock);
+ xive->mapping = NULL;
+ mutex_unlock(&xive->mapping_lock);
+
+ /*
+ * Since this is the device release function, we know that
+ * userspace does not have any open fd or mmap referring to
+ * the device. Therefore there can not be any of the
+ * device attribute set/get, mmap, or page fault functions
+ * being executed concurrently, and similarly, the
+ * connect_vcpu and set/clr_mapped functions also cannot
+ * be being executed.
+ */
+
+ debugfs_remove(xive->dentry);
+
+ /*
+ * We should clean up the vCPU interrupt presenters first.
+ */
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ /*
+ * Take vcpu->mutex to ensure that no one_reg get/set ioctl
+ * (i.e. kvmppc_xive_native_[gs]et_vp) can be being done.
+ * Holding the vcpu->mutex also means that the vcpu cannot
+ * be executing the KVM_RUN ioctl, and therefore it cannot
+ * be executing the XIVE push or pull code or accessing
+ * the XIVE MMIO regions.
+ */
+ mutex_lock(&vcpu->mutex);
+ kvmppc_xive_native_cleanup_vcpu(vcpu);
+ mutex_unlock(&vcpu->mutex);
+ }
+
+ /*
+ * Now that we have cleared vcpu->arch.xive_vcpu, vcpu->arch.irq_type
+ * and vcpu->arch.xive_esc_[vr]addr on each vcpu, we are safe
+ * against xive code getting called during vcpu execution or
+ * set/get one_reg operations.
+ */
+ kvm->arch.xive = NULL;
+
+ for (i = 0; i <= xive->max_sbid; i++) {
+ if (xive->src_blocks[i])
+ kvmppc_xive_free_sources(xive->src_blocks[i]);
+ kfree(xive->src_blocks[i]);
+ xive->src_blocks[i] = NULL;
+ }
+
+ if (xive->vp_base != XIVE_INVALID_VP)
+ xive_native_free_vp_block(xive->vp_base);
+
+ /*
+ * A reference of the kvmppc_xive pointer is now kept under
+ * the xive_devices struct of the machine for reuse. It is
+ * freed when the VM is destroyed for now until we fix all the
+ * execution paths.
+ */
+
+ kfree(dev);
+}
+
+/*
+ * Create a XIVE device. kvm->lock is held.
+ */
+static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type)
+{
+ struct kvmppc_xive *xive;
+ struct kvm *kvm = dev->kvm;
+
+ pr_devel("Creating xive native device\n");
+
+ if (kvm->arch.xive)
+ return -EEXIST;
+
+ xive = kvmppc_xive_get_device(kvm, type);
+ if (!xive)
+ return -ENOMEM;
+
+ dev->private = xive;
+ xive->dev = dev;
+ xive->kvm = kvm;
+ mutex_init(&xive->mapping_lock);
+ mutex_init(&xive->lock);
+
+ /* VP allocation is delayed to the first call to connect_vcpu */
+ xive->vp_base = XIVE_INVALID_VP;
+ /* KVM_MAX_VCPUS limits the number of VMs to roughly 64 per sockets
+ * on a POWER9 system.
+ */
+ xive->nr_servers = KVM_MAX_VCPUS;
+
+ if (xive_native_has_single_escalation())
+ xive->flags |= KVMPPC_XIVE_FLAG_SINGLE_ESCALATION;
+
+ if (xive_native_has_save_restore())
+ xive->flags |= KVMPPC_XIVE_FLAG_SAVE_RESTORE;
+
+ xive->ops = &kvmppc_xive_native_ops;
+
+ kvm->arch.xive = xive;
+ return 0;
+}
+
+/*
+ * Interrupt Pending Buffer (IPB) offset
+ */
+#define TM_IPB_SHIFT 40
+#define TM_IPB_MASK (((u64) 0xFF) << TM_IPB_SHIFT)
+
+int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
+{
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ u64 opal_state;
+ int rc;
+
+ if (!kvmppc_xive_enabled(vcpu))
+ return -EPERM;
+
+ if (!xc)
+ return -ENOENT;
+
+ /* Thread context registers. We only care about IPB and CPPR */
+ val->xive_timaval[0] = vcpu->arch.xive_saved_state.w01;
+
+ /* Get the VP state from OPAL */
+ rc = xive_native_get_vp_state(xc->vp_id, &opal_state);
+ if (rc)
+ return rc;
+
+ /*
+ * Capture the backup of IPB register in the NVT structure and
+ * merge it in our KVM VP state.
+ */
+ val->xive_timaval[0] |= cpu_to_be64(opal_state & TM_IPB_MASK);
+
+ pr_devel("%s NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x opal=%016llx\n",
+ __func__,
+ vcpu->arch.xive_saved_state.nsr,
+ vcpu->arch.xive_saved_state.cppr,
+ vcpu->arch.xive_saved_state.ipb,
+ vcpu->arch.xive_saved_state.pipr,
+ vcpu->arch.xive_saved_state.w01,
+ (u32) vcpu->arch.xive_cam_word, opal_state);
+
+ return 0;
+}
+
+int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
+{
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+ struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
+
+ pr_devel("%s w01=%016llx vp=%016llx\n", __func__,
+ val->xive_timaval[0], val->xive_timaval[1]);
+
+ if (!kvmppc_xive_enabled(vcpu))
+ return -EPERM;
+
+ if (!xc || !xive)
+ return -ENOENT;
+
+ /* We can't update the state of a "pushed" VCPU */
+ if (WARN_ON(vcpu->arch.xive_pushed))
+ return -EBUSY;
+
+ /*
+ * Restore the thread context registers. IPB and CPPR should
+ * be the only ones that matter.
+ */
+ vcpu->arch.xive_saved_state.w01 = val->xive_timaval[0];
+
+ /*
+ * There is no need to restore the XIVE internal state (IPB
+ * stored in the NVT) as the IPB register was merged in KVM VP
+ * state when captured.
+ */
+ return 0;
+}
+
+bool kvmppc_xive_native_supported(void)
+{
+ return xive_native_has_queue_state_support();
+}
+
+static int xive_native_debug_show(struct seq_file *m, void *private)
+{
+ struct kvmppc_xive *xive = m->private;
+ struct kvm *kvm = xive->kvm;
+ struct kvm_vcpu *vcpu;
+ unsigned long i;
+
+ if (!kvm)
+ return 0;
+
+ seq_puts(m, "=========\nVCPU state\n=========\n");
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+
+ if (!xc)
+ continue;
+
+ seq_printf(m, "VCPU %d: VP=%#x/%02x\n"
+ " NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n",
+ xc->server_num, xc->vp_id, xc->vp_chip_id,
+ vcpu->arch.xive_saved_state.nsr,
+ vcpu->arch.xive_saved_state.cppr,
+ vcpu->arch.xive_saved_state.ipb,
+ vcpu->arch.xive_saved_state.pipr,
+ be64_to_cpu(vcpu->arch.xive_saved_state.w01),
+ be32_to_cpu(vcpu->arch.xive_cam_word));
+
+ kvmppc_xive_debug_show_queues(m, vcpu);
+ }
+
+ seq_puts(m, "=========\nSources\n=========\n");
+
+ for (i = 0; i <= xive->max_sbid; i++) {
+ struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
+
+ if (sb) {
+ arch_spin_lock(&sb->lock);
+ kvmppc_xive_debug_show_sources(m, sb);
+ arch_spin_unlock(&sb->lock);
+ }
+ }
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(xive_native_debug);
+
+static void xive_native_debugfs_init(struct kvmppc_xive *xive)
+{
+ xive->dentry = debugfs_create_file("xive", 0444, xive->kvm->debugfs_dentry,
+ xive, &xive_native_debug_fops);
+
+ pr_debug("%s: created\n", __func__);
+}
+
+static void kvmppc_xive_native_init(struct kvm_device *dev)
+{
+ struct kvmppc_xive *xive = dev->private;
+
+ /* Register some debug interfaces */
+ xive_native_debugfs_init(xive);
+}
+
+struct kvm_device_ops kvm_xive_native_ops = {
+ .name = "kvm-xive-native",
+ .create = kvmppc_xive_native_create,
+ .init = kvmppc_xive_native_init,
+ .release = kvmppc_xive_native_release,
+ .set_attr = kvmppc_xive_native_set_attr,
+ .get_attr = kvmppc_xive_native_get_attr,
+ .has_attr = kvmppc_xive_native_has_attr,
+ .mmap = kvmppc_xive_native_mmap,
+};
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index b4c89fa6f109..3401b96be475 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1,16 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright IBM Corp. 2007
* Copyright 2010-2011 Freescale Semiconductor, Inc.
@@ -30,7 +19,8 @@
#include <linux/fs.h>
#include <asm/cputable.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
+#include <asm/interrupt.h>
#include <asm/kvm_ppc.h>
#include <asm/cacheflush.h>
#include <asm/dbell.h>
@@ -46,27 +36,54 @@
unsigned long kvmppc_booke_handlers;
-#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
-#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
-
-struct kvm_stats_debugfs_item debugfs_entries[] = {
- { "mmio", VCPU_STAT(mmio_exits) },
- { "sig", VCPU_STAT(signal_exits) },
- { "itlb_r", VCPU_STAT(itlb_real_miss_exits) },
- { "itlb_v", VCPU_STAT(itlb_virt_miss_exits) },
- { "dtlb_r", VCPU_STAT(dtlb_real_miss_exits) },
- { "dtlb_v", VCPU_STAT(dtlb_virt_miss_exits) },
- { "sysc", VCPU_STAT(syscall_exits) },
- { "isi", VCPU_STAT(isi_exits) },
- { "dsi", VCPU_STAT(dsi_exits) },
- { "inst_emu", VCPU_STAT(emulated_inst_exits) },
- { "dec", VCPU_STAT(dec_exits) },
- { "ext_intr", VCPU_STAT(ext_intr_exits) },
- { "halt_wakeup", VCPU_STAT(halt_wakeup) },
- { "doorbell", VCPU_STAT(dbell_exits) },
- { "guest doorbell", VCPU_STAT(gdbell_exits) },
- { "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
- { NULL }
+const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
+ KVM_GENERIC_VM_STATS(),
+ STATS_DESC_ICOUNTER(VM, num_2M_pages),
+ STATS_DESC_ICOUNTER(VM, num_1G_pages)
+};
+
+const struct kvm_stats_header kvm_vm_stats_header = {
+ .name_size = KVM_STATS_NAME_SIZE,
+ .num_desc = ARRAY_SIZE(kvm_vm_stats_desc),
+ .id_offset = sizeof(struct kvm_stats_header),
+ .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
+ .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
+ sizeof(kvm_vm_stats_desc),
+};
+
+const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
+ KVM_GENERIC_VCPU_STATS(),
+ STATS_DESC_COUNTER(VCPU, sum_exits),
+ STATS_DESC_COUNTER(VCPU, mmio_exits),
+ STATS_DESC_COUNTER(VCPU, signal_exits),
+ STATS_DESC_COUNTER(VCPU, light_exits),
+ STATS_DESC_COUNTER(VCPU, itlb_real_miss_exits),
+ STATS_DESC_COUNTER(VCPU, itlb_virt_miss_exits),
+ STATS_DESC_COUNTER(VCPU, dtlb_real_miss_exits),
+ STATS_DESC_COUNTER(VCPU, dtlb_virt_miss_exits),
+ STATS_DESC_COUNTER(VCPU, syscall_exits),
+ STATS_DESC_COUNTER(VCPU, isi_exits),
+ STATS_DESC_COUNTER(VCPU, dsi_exits),
+ STATS_DESC_COUNTER(VCPU, emulated_inst_exits),
+ STATS_DESC_COUNTER(VCPU, dec_exits),
+ STATS_DESC_COUNTER(VCPU, ext_intr_exits),
+ STATS_DESC_COUNTER(VCPU, halt_successful_wait),
+ STATS_DESC_COUNTER(VCPU, dbell_exits),
+ STATS_DESC_COUNTER(VCPU, gdbell_exits),
+ STATS_DESC_COUNTER(VCPU, ld),
+ STATS_DESC_COUNTER(VCPU, st),
+ STATS_DESC_COUNTER(VCPU, pthru_all),
+ STATS_DESC_COUNTER(VCPU, pthru_host),
+ STATS_DESC_COUNTER(VCPU, pthru_bad_aff)
+};
+
+const struct kvm_stats_header kvm_vcpu_stats_header = {
+ .name_size = KVM_STATS_NAME_SIZE,
+ .num_desc = ARRAY_SIZE(kvm_vcpu_stats_desc),
+ .id_offset = sizeof(struct kvm_stats_header),
+ .desc_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE,
+ .data_offset = sizeof(struct kvm_stats_header) + KVM_STATS_NAME_SIZE +
+ sizeof(kvm_vcpu_stats_desc),
};
/* TODO: use vcpu_printf() */
@@ -74,8 +91,10 @@ void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
{
int i;
- printk("pc: %08lx msr: %08llx\n", vcpu->arch.pc, vcpu->arch.shared->msr);
- printk("lr: %08lx ctr: %08lx\n", vcpu->arch.lr, vcpu->arch.ctr);
+ printk("pc: %08lx msr: %08llx\n", vcpu->arch.regs.nip,
+ vcpu->arch.shared->msr);
+ printk("lr: %08lx ctr: %08lx\n", vcpu->arch.regs.link,
+ vcpu->arch.regs.ctr);
printk("srr0: %08llx srr1: %08llx\n", vcpu->arch.shared->srr0,
vcpu->arch.shared->srr1);
@@ -96,6 +115,7 @@ void kvmppc_vcpu_disable_spe(struct kvm_vcpu *vcpu)
preempt_disable();
enable_kernel_spe();
kvmppc_save_guest_spe(vcpu);
+ disable_kernel_spe();
vcpu->arch.shadow_msr &= ~MSR_SPE;
preempt_enable();
}
@@ -105,6 +125,7 @@ static void kvmppc_vcpu_enable_spe(struct kvm_vcpu *vcpu)
preempt_disable();
enable_kernel_spe();
kvmppc_load_guest_spe(vcpu);
+ disable_kernel_spe();
vcpu->arch.shadow_msr |= MSR_SPE;
preempt_enable();
}
@@ -124,6 +145,41 @@ static void kvmppc_vcpu_sync_spe(struct kvm_vcpu *vcpu)
}
#endif
+/*
+ * Load up guest vcpu FP state if it's needed.
+ * It also set the MSR_FP in thread so that host know
+ * we're holding FPU, and then host can help to save
+ * guest vcpu FP state if other threads require to use FPU.
+ * This simulates an FP unavailable fault.
+ *
+ * It requires to be called with preemption disabled.
+ */
+static inline void kvmppc_load_guest_fp(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_PPC_FPU
+ if (!(current->thread.regs->msr & MSR_FP)) {
+ enable_kernel_fp();
+ load_fp_state(&vcpu->arch.fp);
+ disable_kernel_fp();
+ current->thread.fp_save_area = &vcpu->arch.fp;
+ current->thread.regs->msr |= MSR_FP;
+ }
+#endif
+}
+
+/*
+ * Save guest vcpu FP state into thread.
+ * It requires to be called with preemption disabled.
+ */
+static inline void kvmppc_save_guest_fp(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_PPC_FPU
+ if (current->thread.regs->msr & MSR_FP)
+ giveup_fpu(current);
+ current->thread.fp_save_area = NULL;
+#endif
+}
+
static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu)
{
#if defined(CONFIG_PPC_FPU) && !defined(CONFIG_KVM_BOOKE_HV)
@@ -134,6 +190,41 @@ static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu)
#endif
}
+/*
+ * Simulate AltiVec unavailable fault to load guest state
+ * from thread to AltiVec unit.
+ * It requires to be called with preemption disabled.
+ */
+static inline void kvmppc_load_guest_altivec(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_ALTIVEC
+ if (cpu_has_feature(CPU_FTR_ALTIVEC)) {
+ if (!(current->thread.regs->msr & MSR_VEC)) {
+ enable_kernel_altivec();
+ load_vr_state(&vcpu->arch.vr);
+ disable_kernel_altivec();
+ current->thread.vr_save_area = &vcpu->arch.vr;
+ current->thread.regs->msr |= MSR_VEC;
+ }
+ }
+#endif
+}
+
+/*
+ * Save guest vcpu AltiVec state into thread.
+ * It requires to be called with preemption disabled.
+ */
+static inline void kvmppc_save_guest_altivec(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_ALTIVEC
+ if (cpu_has_feature(CPU_FTR_ALTIVEC)) {
+ if (current->thread.regs->msr & MSR_VEC)
+ giveup_altivec(current);
+ current->thread.vr_save_area = NULL;
+ }
+#endif
+}
+
static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu)
{
/* Synchronize guest's desire to get debug interrupts into shadow MSR */
@@ -192,9 +283,10 @@ void kvmppc_core_queue_dtlb_miss(struct kvm_vcpu *vcpu,
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS);
}
-void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu,
+void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, ulong srr1_flags,
ulong dear_flags, ulong esr_flags)
{
+ WARN_ON_ONCE(srr1_flags);
vcpu->arch.queued_dear = dear_flags;
vcpu->arch.queued_esr = esr_flags;
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE);
@@ -225,6 +317,20 @@ void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong esr_flags)
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
}
+void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu, ulong srr1_flags)
+{
+ WARN_ON_ONCE(srr1_flags);
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_FP_UNAVAIL);
+}
+
+#ifdef CONFIG_ALTIVEC
+void kvmppc_core_queue_vec_unavail(struct kvm_vcpu *vcpu, ulong srr1_flags)
+{
+ WARN_ON_ONCE(srr1_flags);
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ALTIVEC_UNAVAIL);
+}
+#endif
+
void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
{
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DECREMENTER);
@@ -267,6 +373,16 @@ static void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu)
clear_bit(BOOKE_IRQPRIO_WATCHDOG, &vcpu->arch.pending_exceptions);
}
+void kvmppc_core_queue_debug(struct kvm_vcpu *vcpu)
+{
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DEBUG);
+}
+
+void kvmppc_core_dequeue_debug(struct kvm_vcpu *vcpu)
+{
+ clear_bit(BOOKE_IRQPRIO_DEBUG, &vcpu->arch.pending_exceptions);
+}
+
static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
{
kvmppc_set_srr0(vcpu, srr0);
@@ -333,17 +449,23 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
case BOOKE_IRQPRIO_DATA_STORAGE:
case BOOKE_IRQPRIO_ALIGNMENT:
update_dear = true;
- /* fall through */
+ fallthrough;
case BOOKE_IRQPRIO_INST_STORAGE:
case BOOKE_IRQPRIO_PROGRAM:
update_esr = true;
- /* fall through */
+ fallthrough;
case BOOKE_IRQPRIO_ITLB_MISS:
case BOOKE_IRQPRIO_SYSCALL:
case BOOKE_IRQPRIO_FP_UNAVAIL:
+#ifdef CONFIG_SPE_POSSIBLE
case BOOKE_IRQPRIO_SPE_UNAVAIL:
case BOOKE_IRQPRIO_SPE_FP_DATA:
case BOOKE_IRQPRIO_SPE_FP_ROUND:
+#endif
+#ifdef CONFIG_ALTIVEC
+ case BOOKE_IRQPRIO_ALTIVEC_UNAVAIL:
+ case BOOKE_IRQPRIO_ALTIVEC_ASSIST:
+#endif
case BOOKE_IRQPRIO_AP_UNAVAIL:
allowed = 1;
msr_mask = MSR_CE | MSR_ME | MSR_DE;
@@ -365,7 +487,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
case BOOKE_IRQPRIO_DECREMENTER:
case BOOKE_IRQPRIO_FIT:
keep_irq = true;
- /* fall through */
+ fallthrough;
case BOOKE_IRQPRIO_EXTERNAL:
case BOOKE_IRQPRIO_DBELL:
allowed = vcpu->arch.shared->msr & MSR_EE;
@@ -377,36 +499,41 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
allowed = vcpu->arch.shared->msr & MSR_DE;
allowed = allowed && !crit;
msr_mask = MSR_ME;
- int_class = INT_CLASS_CRIT;
+ if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC))
+ int_class = INT_CLASS_DBG;
+ else
+ int_class = INT_CLASS_CRIT;
+
break;
}
if (allowed) {
switch (int_class) {
case INT_CLASS_NONCRIT:
- set_guest_srr(vcpu, vcpu->arch.pc,
+ set_guest_srr(vcpu, vcpu->arch.regs.nip,
vcpu->arch.shared->msr);
break;
case INT_CLASS_CRIT:
- set_guest_csrr(vcpu, vcpu->arch.pc,
+ set_guest_csrr(vcpu, vcpu->arch.regs.nip,
vcpu->arch.shared->msr);
break;
case INT_CLASS_DBG:
- set_guest_dsrr(vcpu, vcpu->arch.pc,
+ set_guest_dsrr(vcpu, vcpu->arch.regs.nip,
vcpu->arch.shared->msr);
break;
case INT_CLASS_MC:
- set_guest_mcsrr(vcpu, vcpu->arch.pc,
+ set_guest_mcsrr(vcpu, vcpu->arch.regs.nip,
vcpu->arch.shared->msr);
break;
}
- vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority];
- if (update_esr == true)
+ vcpu->arch.regs.nip = vcpu->arch.ivpr |
+ vcpu->arch.ivor[priority];
+ if (update_esr)
kvmppc_set_esr(vcpu, vcpu->arch.queued_esr);
- if (update_dear == true)
+ if (update_dear)
kvmppc_set_dar(vcpu, vcpu->arch.queued_dear);
- if (update_epr == true) {
+ if (update_epr) {
if (vcpu->arch.epr_flags & KVMPPC_EPR_USER)
kvm_make_request(KVM_REQ_EPR_EXIT, vcpu);
else if (vcpu->arch.epr_flags & KVMPPC_EPR_KERNEL) {
@@ -445,7 +572,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
/*
* Return the number of jiffies until the next timeout. If the timeout is
- * longer than the NEXT_TIMER_MAX_DELTA, then return NEXT_TIMER_MAX_DELTA
+ * longer than the TIMER_NEXT_MAX_DELTA, then return TIMER_NEXT_MAX_DELTA
* because the larger value can break the timer APIs.
*/
static unsigned long watchdog_next_timeout(struct kvm_vcpu *vcpu)
@@ -471,7 +598,7 @@ static unsigned long watchdog_next_timeout(struct kvm_vcpu *vcpu)
if (do_div(nr_jiffies, tb_ticks_per_jiffy))
nr_jiffies++;
- return min_t(unsigned long long, nr_jiffies, NEXT_TIMER_MAX_DELTA);
+ return min_t(unsigned long long, nr_jiffies, TIMER_NEXT_MAX_DELTA);
}
static void arm_next_watchdog(struct kvm_vcpu *vcpu)
@@ -484,24 +611,24 @@ static void arm_next_watchdog(struct kvm_vcpu *vcpu)
* userspace, so clear the KVM_REQ_WATCHDOG request.
*/
if ((vcpu->arch.tsr & (TSR_ENW | TSR_WIS)) != (TSR_ENW | TSR_WIS))
- clear_bit(KVM_REQ_WATCHDOG, &vcpu->requests);
+ kvm_clear_request(KVM_REQ_WATCHDOG, vcpu);
spin_lock_irqsave(&vcpu->arch.wdt_lock, flags);
nr_jiffies = watchdog_next_timeout(vcpu);
/*
- * If the number of jiffies of watchdog timer >= NEXT_TIMER_MAX_DELTA
+ * If the number of jiffies of watchdog timer >= TIMER_NEXT_MAX_DELTA
* then do not run the watchdog timer as this can break timer APIs.
*/
- if (nr_jiffies < NEXT_TIMER_MAX_DELTA)
+ if (nr_jiffies < TIMER_NEXT_MAX_DELTA)
mod_timer(&vcpu->arch.wdt_timer, jiffies + nr_jiffies);
else
- del_timer(&vcpu->arch.wdt_timer);
+ timer_delete(&vcpu->arch.wdt_timer);
spin_unlock_irqrestore(&vcpu->arch.wdt_lock, flags);
}
-void kvmppc_watchdog_func(unsigned long data)
+static void kvmppc_watchdog_func(struct timer_list *t)
{
- struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
+ struct kvm_vcpu *vcpu = timer_container_of(vcpu, t, arch.wdt_timer);
u32 tsr, new_tsr;
int final;
@@ -587,20 +714,19 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
kvmppc_core_check_exceptions(vcpu);
- if (vcpu->requests) {
+ if (kvm_request_pending(vcpu)) {
/* Exception delivery raised request; start over */
return 1;
}
if (vcpu->arch.shared->msr & MSR_WE) {
local_irq_enable();
- kvm_vcpu_block(vcpu);
- clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
+ kvm_vcpu_halt(vcpu);
hard_irq_disable();
kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
r = 1;
- };
+ }
return r;
}
@@ -631,13 +757,13 @@ int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
return r;
}
-int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+int kvmppc_vcpu_run(struct kvm_vcpu *vcpu)
{
int ret, s;
struct debug_reg debug;
if (!vcpu->arch.sane) {
- kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
return -EINVAL;
}
@@ -654,27 +780,34 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
/*
* Since we can't trap on MSR_FP in GS-mode, we consider the guest
- * as always using the FPU. Kernel usage of FP (via
- * enable_kernel_fp()) in this thread must not occur while
- * vcpu->fpu_active is set.
+ * as always using the FPU.
*/
- vcpu->fpu_active = 1;
-
kvmppc_load_guest_fp(vcpu);
#endif
+#ifdef CONFIG_ALTIVEC
+ /* Save userspace AltiVec state in stack */
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ enable_kernel_altivec();
+ /*
+ * Since we can't trap on MSR_VEC in GS-mode, we consider the guest
+ * as always using the AltiVec.
+ */
+ kvmppc_load_guest_altivec(vcpu);
+#endif
+
/* Switch to guest debug context */
- debug = vcpu->arch.shadow_dbg_reg;
+ debug = vcpu->arch.dbg_reg;
switch_booke_debug_regs(&debug);
debug = current->thread.debug;
- current->thread.debug = vcpu->arch.shadow_dbg_reg;
+ current->thread.debug = vcpu->arch.dbg_reg;
- vcpu->arch.pgdir = current->mm->pgd;
+ vcpu->arch.pgdir = vcpu->kvm->mm->pgd;
kvmppc_fix_ee_before_entry();
- ret = __kvmppc_vcpu_run(kvm_run, vcpu);
+ ret = __kvmppc_vcpu_run(vcpu);
- /* No need for kvm_guest_exit. It's done in handle_exit.
+ /* No need for guest_exit. It's done in handle_exit.
We also get here with interrupts enabled. */
/* Switch back to user space debug context */
@@ -683,8 +816,10 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
#ifdef CONFIG_PPC_FPU
kvmppc_save_guest_fp(vcpu);
+#endif
- vcpu->fpu_active = 0;
+#ifdef CONFIG_ALTIVEC
+ kvmppc_save_guest_altivec(vcpu);
#endif
out:
@@ -692,11 +827,11 @@ out:
return ret;
}
-static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
+static int emulation_exit(struct kvm_vcpu *vcpu)
{
enum emulation_result er;
- er = kvmppc_emulate_instruction(run, vcpu);
+ er = kvmppc_emulate_instruction(vcpu);
switch (er) {
case EMULATE_DONE:
/* don't overwrite subtypes, just account kvm_stats */
@@ -709,12 +844,12 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
return RESUME_GUEST;
case EMULATE_FAIL:
- printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
- __func__, vcpu->arch.pc, vcpu->arch.last_inst);
+ printk(KERN_CRIT "%s: emulation at %lx failed (%08lx)\n",
+ __func__, vcpu->arch.regs.nip, vcpu->arch.last_inst);
/* For debugging, encode the failing instruction and
* report it to userspace. */
- run->hw.hardware_exit_reason = ~0ULL << 32;
- run->hw.hardware_exit_reason |= vcpu->arch.last_inst;
+ vcpu->run->hw.hardware_exit_reason = ~0ULL << 32;
+ vcpu->run->hw.hardware_exit_reason |= vcpu->arch.last_inst;
kvmppc_core_queue_program(vcpu, ESR_PIL);
return RESUME_HOST;
@@ -726,13 +861,41 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
}
}
-static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu *vcpu)
+static int kvmppc_handle_debug(struct kvm_vcpu *vcpu)
{
- struct debug_reg *dbg_reg = &(vcpu->arch.shadow_dbg_reg);
+ struct kvm_run *run = vcpu->run;
+ struct debug_reg *dbg_reg = &(vcpu->arch.dbg_reg);
u32 dbsr = vcpu->arch.dbsr;
+ if (vcpu->guest_debug == 0) {
+ /*
+ * Debug resources belong to Guest.
+ * Imprecise debug event is not injected
+ */
+ if (dbsr & DBSR_IDE) {
+ dbsr &= ~DBSR_IDE;
+ if (!dbsr)
+ return RESUME_GUEST;
+ }
+
+ if (dbsr && (vcpu->arch.shared->msr & MSR_DE) &&
+ (vcpu->arch.dbg_reg.dbcr0 & DBCR0_IDM))
+ kvmppc_core_queue_debug(vcpu);
+
+ /* Inject a program interrupt if trap debug is not allowed */
+ if ((dbsr & DBSR_TIE) && !(vcpu->arch.shared->msr & MSR_DE))
+ kvmppc_core_queue_program(vcpu, ESR_PTR);
+
+ return RESUME_GUEST;
+ }
+
+ /*
+ * Debug resource owned by userspace.
+ * Clear guest dbsr (vcpu->arch.dbsr)
+ */
+ vcpu->arch.dbsr = 0;
run->debug.arch.status = 0;
- run->debug.arch.address = vcpu->arch.pc;
+ run->debug.arch.address = vcpu->arch.regs.nip;
if (dbsr & (DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4)) {
run->debug.arch.status |= KVMPPC_DEBUG_BREAKPOINT;
@@ -752,16 +915,15 @@ static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu *vcpu)
static void kvmppc_fill_pt_regs(struct pt_regs *regs)
{
- ulong r1, ip, msr, lr;
+ ulong r1, msr, lr;
asm("mr %0, 1" : "=r"(r1));
asm("mflr %0" : "=r"(lr));
asm("mfmsr %0" : "=r"(msr));
- asm("bl 1f; 1: mflr %0" : "=r"(ip));
memset(regs, 0, sizeof(*regs));
regs->gpr[1] = r1;
- regs->nip = ip;
+ regs->nip = _THIS_IP_;
regs->msr = msr;
regs->link = lr;
}
@@ -808,6 +970,7 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu,
#endif
break;
case BOOKE_INTERRUPT_CRITICAL:
+ kvmppc_fill_pt_regs(&regs);
unknown_exception(&regs);
break;
case BOOKE_INTERRUPT_DEBUG:
@@ -818,7 +981,7 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu,
}
}
-static int kvmppc_resume_inst_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
+static int kvmppc_resume_inst_load(struct kvm_vcpu *vcpu,
enum emulation_result emulated, u32 last_inst)
{
switch (emulated) {
@@ -827,11 +990,11 @@ static int kvmppc_resume_inst_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
case EMULATE_FAIL:
pr_debug("%s: load instruction from guest address %lx failed\n",
- __func__, vcpu->arch.pc);
+ __func__, vcpu->arch.regs.nip);
/* For debugging, encode the failing instruction and
* report it to userspace. */
- run->hw.hardware_exit_reason = ~0ULL << 32;
- run->hw.hardware_exit_reason |= last_inst;
+ vcpu->run->hw.hardware_exit_reason = ~0ULL << 32;
+ vcpu->run->hw.hardware_exit_reason |= last_inst;
kvmppc_core_queue_program(vcpu, ESR_PIL);
return RESUME_HOST;
@@ -840,20 +1003,24 @@ static int kvmppc_resume_inst_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
}
}
-/**
+/*
* kvmppc_handle_exit
*
* Return value is in the form (errcode<<2 | RESUME_FLAG_HOST | RESUME_FLAG_NV)
*/
-int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
- unsigned int exit_nr)
+int kvmppc_handle_exit(struct kvm_vcpu *vcpu, unsigned int exit_nr)
{
+ struct kvm_run *run = vcpu->run;
int r = RESUME_HOST;
int s;
int idx;
u32 last_inst = KVM_INST_FETCH_FAILED;
+ ppc_inst_t pinst;
enum emulation_result emulated = EMULATE_DONE;
+ /* Fix irq state (pairs with kvmppc_fix_ee_before_entry()) */
+ kvmppc_fix_ee_after_exit();
+
/* update before a new last_exit_type is rewritten */
kvmppc_update_timing_stats(vcpu);
@@ -861,29 +1028,51 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
kvmppc_restart_interrupt(vcpu, exit_nr);
/*
- * get last instruction before beeing preempted
+ * get last instruction before being preempted
* TODO: for e6500 check also BOOKE_INTERRUPT_LRAT_ERROR & ESR_DATA
*/
switch (exit_nr) {
case BOOKE_INTERRUPT_DATA_STORAGE:
case BOOKE_INTERRUPT_DTLB_MISS:
case BOOKE_INTERRUPT_HV_PRIV:
- emulated = kvmppc_get_last_inst(vcpu, false, &last_inst);
+ emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &pinst);
+ last_inst = ppc_inst_val(pinst);
+ break;
+ case BOOKE_INTERRUPT_PROGRAM:
+ /* SW breakpoints arrive as illegal instructions on HV */
+ if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) {
+ emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &pinst);
+ last_inst = ppc_inst_val(pinst);
+ }
break;
default:
break;
}
- local_irq_enable();
-
trace_kvm_exit(exit_nr, vcpu);
- kvm_guest_exit();
+
+ context_tracking_guest_exit();
+ if (!vtime_accounting_enabled_this_cpu()) {
+ local_irq_enable();
+ /*
+ * Service IRQs here before vtime_account_guest_exit() so any
+ * ticks that occurred while running the guest are accounted to
+ * the guest. If vtime accounting is enabled, accounting uses
+ * TB rather than ticks, so it can be done without enabling
+ * interrupts here, which has the problem that it accounts
+ * interrupt processing overhead to the host.
+ */
+ local_irq_disable();
+ }
+ vtime_account_guest_exit();
+
+ local_irq_enable();
run->exit_reason = KVM_EXIT_UNKNOWN;
run->ready_for_interrupt_injection = 1;
if (emulated != EMULATE_DONE) {
- r = kvmppc_resume_inst_load(run, vcpu, emulated, last_inst);
+ r = kvmppc_resume_inst_load(vcpu, emulated, last_inst);
goto out;
}
@@ -943,10 +1132,22 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
case BOOKE_INTERRUPT_HV_PRIV:
- r = emulation_exit(run, vcpu);
+ r = emulation_exit(vcpu);
break;
case BOOKE_INTERRUPT_PROGRAM:
+ if ((vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) &&
+ (last_inst == KVMPPC_INST_SW_BREAKPOINT)) {
+ /*
+ * We are here because of an SW breakpoint instr,
+ * so lets return to host to handle.
+ */
+ r = kvmppc_handle_debug(vcpu);
+ run->exit_reason = KVM_EXIT_DEBUG;
+ kvmppc_account_exit(vcpu, DEBUG_EXITS);
+ break;
+ }
+
if (vcpu->arch.shared->msr & (MSR_PR | MSR_GS)) {
/*
* Program traps generated by user-level software must
@@ -962,7 +1163,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
}
- r = emulation_exit(run, vcpu);
+ r = emulation_exit(vcpu);
break;
case BOOKE_INTERRUPT_FP_UNAVAIL:
@@ -991,7 +1192,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SPE_FP_ROUND);
r = RESUME_GUEST;
break;
-#else
+#elif defined(CONFIG_SPE_POSSIBLE)
case BOOKE_INTERRUPT_SPE_UNAVAIL:
/*
* Guest wants SPE, but host kernel doesn't support it. Send
@@ -1008,14 +1209,30 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
case BOOKE_INTERRUPT_SPE_FP_DATA:
case BOOKE_INTERRUPT_SPE_FP_ROUND:
printk(KERN_CRIT "%s: unexpected SPE interrupt %u at %08lx\n",
- __func__, exit_nr, vcpu->arch.pc);
+ __func__, exit_nr, vcpu->arch.regs.nip);
run->hw.hardware_exit_reason = exit_nr;
r = RESUME_HOST;
break;
+#endif /* CONFIG_SPE_POSSIBLE */
+
+/*
+ * On cores with Vector category, KVM is loaded only if CONFIG_ALTIVEC,
+ * see kvmppc_e500mc_check_processor_compat().
+ */
+#ifdef CONFIG_ALTIVEC
+ case BOOKE_INTERRUPT_ALTIVEC_UNAVAIL:
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ALTIVEC_UNAVAIL);
+ r = RESUME_GUEST;
+ break;
+
+ case BOOKE_INTERRUPT_ALTIVEC_ASSIST:
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ALTIVEC_ASSIST);
+ r = RESUME_GUEST;
+ break;
#endif
case BOOKE_INTERRUPT_DATA_STORAGE:
- kvmppc_core_queue_data_storage(vcpu, vcpu->arch.fault_dear,
+ kvmppc_core_queue_data_storage(vcpu, 0, vcpu->arch.fault_dear,
vcpu->arch.fault_esr);
kvmppc_account_exit(vcpu, DSI_EXITS);
r = RESUME_GUEST;
@@ -1113,7 +1330,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
* actually RAM. */
vcpu->arch.paddr_accessed = gpaddr;
vcpu->arch.vaddr_accessed = eaddr;
- r = kvmppc_emulate_mmio(run, vcpu);
+ r = kvmppc_emulate_mmio(vcpu);
kvmppc_account_exit(vcpu, MMIO_EXITS);
}
@@ -1122,7 +1339,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
}
case BOOKE_INTERRUPT_ITLB_MISS: {
- unsigned long eaddr = vcpu->arch.pc;
+ unsigned long eaddr = vcpu->arch.regs.nip;
gpa_t gpaddr;
gfn_t gfn;
int gtlb_index;
@@ -1164,7 +1381,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
}
case BOOKE_INTERRUPT_DEBUG: {
- r = kvmppc_handle_debug(run, vcpu);
+ r = kvmppc_handle_debug(vcpu);
if (r == RESUME_HOST)
run->exit_reason = KVM_EXIT_DEBUG;
kvmppc_account_exit(vcpu, DEBUG_EXITS);
@@ -1188,6 +1405,8 @@ out:
else {
/* interrupts now hard-disabled */
kvmppc_fix_ee_before_entry();
+ kvmppc_load_guest_fp(vcpu);
+ kvmppc_load_guest_altivec(vcpu);
}
}
@@ -1206,59 +1425,35 @@ static void kvmppc_set_tsr(struct kvm_vcpu *vcpu, u32 new_tsr)
update_timer_ints(vcpu);
}
-/* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */
-int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
-{
- int i;
- int r;
-
- vcpu->arch.pc = 0;
- vcpu->arch.shared->pir = vcpu->vcpu_id;
- kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */
- kvmppc_set_msr(vcpu, 0);
-
-#ifndef CONFIG_KVM_BOOKE_HV
- vcpu->arch.shadow_msr = MSR_USER | MSR_IS | MSR_DS;
- vcpu->arch.shadow_pid = 1;
- vcpu->arch.shared->msr = 0;
-#endif
-
- /* Eye-catching numbers so we know if the guest takes an interrupt
- * before it's programmed its own IVPR/IVORs. */
- vcpu->arch.ivpr = 0x55550000;
- for (i = 0; i < BOOKE_IRQPRIO_MAX; i++)
- vcpu->arch.ivor[i] = 0x7700 | i * 4;
-
- kvmppc_init_timing_stats(vcpu);
-
- r = kvmppc_core_vcpu_setup(vcpu);
- kvmppc_sanity_check(vcpu);
- return r;
-}
-
int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu)
{
/* setup watchdog timer once */
spin_lock_init(&vcpu->arch.wdt_lock);
- setup_timer(&vcpu->arch.wdt_timer, kvmppc_watchdog_func,
- (unsigned long)vcpu);
+ timer_setup(&vcpu->arch.wdt_timer, kvmppc_watchdog_func, 0);
+ /*
+ * Clear DBSR.MRR to avoid guest debug interrupt as
+ * this is of host interest
+ */
+ mtspr(SPRN_DBSR, DBSR_MRR);
return 0;
}
void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)
{
- del_timer_sync(&vcpu->arch.wdt_timer);
+ timer_delete_sync(&vcpu->arch.wdt_timer);
}
int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
int i;
- regs->pc = vcpu->arch.pc;
+ vcpu_load(vcpu);
+
+ regs->pc = vcpu->arch.regs.nip;
regs->cr = kvmppc_get_cr(vcpu);
- regs->ctr = vcpu->arch.ctr;
- regs->lr = vcpu->arch.lr;
+ regs->ctr = vcpu->arch.regs.ctr;
+ regs->lr = vcpu->arch.regs.link;
regs->xer = kvmppc_get_xer(vcpu);
regs->msr = vcpu->arch.shared->msr;
regs->srr0 = kvmppc_get_srr0(vcpu);
@@ -1276,6 +1471,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
+ vcpu_put(vcpu);
return 0;
}
@@ -1283,10 +1479,12 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
{
int i;
- vcpu->arch.pc = regs->pc;
+ vcpu_load(vcpu);
+
+ vcpu->arch.regs.nip = regs->pc;
kvmppc_set_cr(vcpu, regs->cr);
- vcpu->arch.ctr = regs->ctr;
- vcpu->arch.lr = regs->lr;
+ vcpu->arch.regs.ctr = regs->ctr;
+ vcpu->arch.regs.link = regs->lr;
kvmppc_set_xer(vcpu, regs->xer);
kvmppc_set_msr(vcpu, regs->msr);
kvmppc_set_srr0(vcpu, regs->srr0);
@@ -1304,6 +1502,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
+ vcpu_put(vcpu);
return 0;
}
@@ -1431,170 +1630,163 @@ int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
+ int ret;
+
+ vcpu_load(vcpu);
+
sregs->pvr = vcpu->arch.pvr;
get_sregs_base(vcpu, sregs);
get_sregs_arch206(vcpu, sregs);
- return vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs);
+ ret = vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs);
+
+ vcpu_put(vcpu);
+ return ret;
}
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
struct kvm_sregs *sregs)
{
- int ret;
+ int ret = -EINVAL;
+ vcpu_load(vcpu);
if (vcpu->arch.pvr != sregs->pvr)
- return -EINVAL;
+ goto out;
ret = set_sregs_base(vcpu, sregs);
if (ret < 0)
- return ret;
+ goto out;
ret = set_sregs_arch206(vcpu, sregs);
if (ret < 0)
- return ret;
+ goto out;
- return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs);
+ ret = vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs);
+
+out:
+ vcpu_put(vcpu);
+ return ret;
}
-int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val)
{
int r = 0;
- union kvmppc_one_reg val;
- int size;
- size = one_reg_size(reg->id);
- if (size > sizeof(val))
- return -EINVAL;
-
- switch (reg->id) {
+ switch (id) {
case KVM_REG_PPC_IAC1:
- val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac1);
+ *val = get_reg_val(id, vcpu->arch.dbg_reg.iac1);
break;
case KVM_REG_PPC_IAC2:
- val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac2);
+ *val = get_reg_val(id, vcpu->arch.dbg_reg.iac2);
break;
#if CONFIG_PPC_ADV_DEBUG_IACS > 2
case KVM_REG_PPC_IAC3:
- val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac3);
+ *val = get_reg_val(id, vcpu->arch.dbg_reg.iac3);
break;
case KVM_REG_PPC_IAC4:
- val = get_reg_val(reg->id, vcpu->arch.dbg_reg.iac4);
+ *val = get_reg_val(id, vcpu->arch.dbg_reg.iac4);
break;
#endif
case KVM_REG_PPC_DAC1:
- val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac1);
+ *val = get_reg_val(id, vcpu->arch.dbg_reg.dac1);
break;
case KVM_REG_PPC_DAC2:
- val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac2);
+ *val = get_reg_val(id, vcpu->arch.dbg_reg.dac2);
break;
case KVM_REG_PPC_EPR: {
u32 epr = kvmppc_get_epr(vcpu);
- val = get_reg_val(reg->id, epr);
+ *val = get_reg_val(id, epr);
break;
}
#if defined(CONFIG_64BIT)
case KVM_REG_PPC_EPCR:
- val = get_reg_val(reg->id, vcpu->arch.epcr);
+ *val = get_reg_val(id, vcpu->arch.epcr);
break;
#endif
case KVM_REG_PPC_TCR:
- val = get_reg_val(reg->id, vcpu->arch.tcr);
+ *val = get_reg_val(id, vcpu->arch.tcr);
break;
case KVM_REG_PPC_TSR:
- val = get_reg_val(reg->id, vcpu->arch.tsr);
+ *val = get_reg_val(id, vcpu->arch.tsr);
break;
case KVM_REG_PPC_DEBUG_INST:
- val = get_reg_val(reg->id, KVMPPC_INST_EHPRIV_DEBUG);
+ *val = get_reg_val(id, KVMPPC_INST_SW_BREAKPOINT);
break;
case KVM_REG_PPC_VRSAVE:
- val = get_reg_val(reg->id, vcpu->arch.vrsave);
+ *val = get_reg_val(id, vcpu->arch.vrsave);
break;
default:
- r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, reg->id, &val);
+ r = vcpu->kvm->arch.kvm_ops->get_one_reg(vcpu, id, val);
break;
}
- if (r)
- return r;
-
- if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size))
- r = -EFAULT;
-
return r;
}
-int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
+ union kvmppc_one_reg *val)
{
int r = 0;
- union kvmppc_one_reg val;
- int size;
- size = one_reg_size(reg->id);
- if (size > sizeof(val))
- return -EINVAL;
-
- if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size))
- return -EFAULT;
-
- switch (reg->id) {
+ switch (id) {
case KVM_REG_PPC_IAC1:
- vcpu->arch.dbg_reg.iac1 = set_reg_val(reg->id, val);
+ vcpu->arch.dbg_reg.iac1 = set_reg_val(id, *val);
break;
case KVM_REG_PPC_IAC2:
- vcpu->arch.dbg_reg.iac2 = set_reg_val(reg->id, val);
+ vcpu->arch.dbg_reg.iac2 = set_reg_val(id, *val);
break;
#if CONFIG_PPC_ADV_DEBUG_IACS > 2
case KVM_REG_PPC_IAC3:
- vcpu->arch.dbg_reg.iac3 = set_reg_val(reg->id, val);
+ vcpu->arch.dbg_reg.iac3 = set_reg_val(id, *val);
break;
case KVM_REG_PPC_IAC4:
- vcpu->arch.dbg_reg.iac4 = set_reg_val(reg->id, val);
+ vcpu->arch.dbg_reg.iac4 = set_reg_val(id, *val);
break;
#endif
case KVM_REG_PPC_DAC1:
- vcpu->arch.dbg_reg.dac1 = set_reg_val(reg->id, val);
+ vcpu->arch.dbg_reg.dac1 = set_reg_val(id, *val);
break;
case KVM_REG_PPC_DAC2:
- vcpu->arch.dbg_reg.dac2 = set_reg_val(reg->id, val);
+ vcpu->arch.dbg_reg.dac2 = set_reg_val(id, *val);
break;
case KVM_REG_PPC_EPR: {
- u32 new_epr = set_reg_val(reg->id, val);
+ u32 new_epr = set_reg_val(id, *val);
kvmppc_set_epr(vcpu, new_epr);
break;
}
#if defined(CONFIG_64BIT)
case KVM_REG_PPC_EPCR: {
- u32 new_epcr = set_reg_val(reg->id, val);
+ u32 new_epcr = set_reg_val(id, *val);
kvmppc_set_epcr(vcpu, new_epcr);
break;
}
#endif
case KVM_REG_PPC_OR_TSR: {
- u32 tsr_bits = set_reg_val(reg->id, val);
+ u32 tsr_bits = set_reg_val(id, *val);
kvmppc_set_tsr_bits(vcpu, tsr_bits);
break;
}
case KVM_REG_PPC_CLEAR_TSR: {
- u32 tsr_bits = set_reg_val(reg->id, val);
+ u32 tsr_bits = set_reg_val(id, *val);
kvmppc_clr_tsr_bits(vcpu, tsr_bits);
break;
}
case KVM_REG_PPC_TSR: {
- u32 tsr = set_reg_val(reg->id, val);
+ u32 tsr = set_reg_val(id, *val);
kvmppc_set_tsr(vcpu, tsr);
break;
}
case KVM_REG_PPC_TCR: {
- u32 tcr = set_reg_val(reg->id, val);
+ u32 tcr = set_reg_val(id, *val);
kvmppc_set_tcr(vcpu, tcr);
break;
}
case KVM_REG_PPC_VRSAVE:
- vcpu->arch.vrsave = set_reg_val(reg->id, val);
+ vcpu->arch.vrsave = set_reg_val(id, *val);
break;
default:
- r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, reg->id, &val);
+ r = vcpu->kvm->arch.kvm_ops->set_one_reg(vcpu, id, val);
break;
}
@@ -1603,12 +1795,12 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
@@ -1616,36 +1808,38 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
{
int r;
+ vcpu_load(vcpu);
r = kvmppc_core_vcpu_translate(vcpu, tr);
+ vcpu_put(vcpu);
return r;
}
-int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
+void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
{
- return -ENOTSUPP;
+
}
-void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
- struct kvm_memory_slot *dont)
+int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
{
+ return -EOPNOTSUPP;
}
-int kvmppc_core_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
- unsigned long npages)
+void kvmppc_core_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
{
- return 0;
}
int kvmppc_core_prepare_memory_region(struct kvm *kvm,
- struct kvm_memory_slot *memslot,
- struct kvm_userspace_memory_region *mem)
+ const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new,
+ enum kvm_mr_change change)
{
return 0;
}
void kvmppc_core_commit_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem,
- const struct kvm_memory_slot *old)
+ struct kvm_memory_slot *old,
+ const struct kvm_memory_slot *new,
+ enum kvm_mr_change change)
{
}
@@ -1694,10 +1888,8 @@ void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
update_timer_ints(vcpu);
}
-void kvmppc_decrementer_func(unsigned long data)
+void kvmppc_decrementer_func(struct kvm_vcpu *vcpu)
{
- struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
-
if (vcpu->arch.tcr & TCR_ARE) {
vcpu->arch.dec = vcpu->arch.decar;
kvmppc_emulate_dec(vcpu);
@@ -1761,7 +1953,8 @@ static int kvmppc_booke_add_watchpoint(struct debug_reg *dbg_reg, uint64_t addr,
dbg_reg->dbcr0 |= DBCR0_IDM;
return 0;
}
-void kvm_guest_protect_msr(struct kvm_vcpu *vcpu, ulong prot_bitmap, bool set)
+static void kvm_guest_protect_msr(struct kvm_vcpu *vcpu, ulong prot_bitmap,
+ bool set)
{
/* XXX: Add similar MSR protection for BookE-PR */
#ifdef CONFIG_KVM_BOOKE_HV
@@ -1840,25 +2033,26 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
{
struct debug_reg *dbg_reg;
int n, b = 0, w = 0;
+ int ret = 0;
+
+ vcpu_load(vcpu);
if (!(dbg->control & KVM_GUESTDBG_ENABLE)) {
- vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
+ vcpu->arch.dbg_reg.dbcr0 = 0;
vcpu->guest_debug = 0;
kvm_guest_protect_msr(vcpu, MSR_DE, false);
- return 0;
+ goto out;
}
kvm_guest_protect_msr(vcpu, MSR_DE, true);
vcpu->guest_debug = dbg->control;
- vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
- /* Set DBCR0_EDM in guest visible DBCR0 register. */
- vcpu->arch.dbg_reg.dbcr0 = DBCR0_EDM;
+ vcpu->arch.dbg_reg.dbcr0 = 0;
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
- vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC;
+ vcpu->arch.dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC;
/* Code below handles only HW breakpoints */
- dbg_reg = &(vcpu->arch.shadow_dbg_reg);
+ dbg_reg = &(vcpu->arch.dbg_reg);
#ifdef CONFIG_KVM_BOOKE_HV
/*
@@ -1879,8 +2073,9 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
#endif
if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
- return 0;
+ goto out;
+ ret = -EINVAL;
for (n = 0; n < (KVMPPC_BOOKE_IAC_NUM + KVMPPC_BOOKE_DAC_NUM); n++) {
uint64_t addr = dbg->arch.bp[n].addr;
uint32_t type = dbg->arch.bp[n].type;
@@ -1888,24 +2083,27 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
if (type == KVMPPC_DEBUG_NONE)
continue;
- if (type & !(KVMPPC_DEBUG_WATCH_READ |
+ if (type & ~(KVMPPC_DEBUG_WATCH_READ |
KVMPPC_DEBUG_WATCH_WRITE |
KVMPPC_DEBUG_BREAKPOINT))
- return -EINVAL;
+ goto out;
if (type & KVMPPC_DEBUG_BREAKPOINT) {
/* Setting H/W breakpoint */
if (kvmppc_booke_add_breakpoint(dbg_reg, addr, b++))
- return -EINVAL;
+ goto out;
} else {
/* Setting H/W watchpoint */
if (kvmppc_booke_add_watchpoint(dbg_reg, addr,
type, w++))
- return -EINVAL;
+ goto out;
}
}
- return 0;
+ ret = 0;
+out:
+ vcpu_put(vcpu);
+ return ret;
}
void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -1923,19 +2121,45 @@ void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu)
kvmppc_clear_dbsr();
}
-void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
-{
- vcpu->kvm->arch.kvm_ops->mmu_destroy(vcpu);
-}
-
int kvmppc_core_init_vm(struct kvm *kvm)
{
return kvm->arch.kvm_ops->init_vm(kvm);
}
-struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+int kvmppc_core_vcpu_create(struct kvm_vcpu *vcpu)
{
- return kvm->arch.kvm_ops->vcpu_create(kvm, id);
+ int i;
+ int r;
+
+ r = vcpu->kvm->arch.kvm_ops->vcpu_create(vcpu);
+ if (r)
+ return r;
+
+ /* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */
+ vcpu->arch.regs.nip = 0;
+ vcpu->arch.shared->pir = vcpu->vcpu_id;
+ kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */
+ kvmppc_set_msr(vcpu, 0);
+
+#ifndef CONFIG_KVM_BOOKE_HV
+ vcpu->arch.shadow_msr = MSR_USER | MSR_IS | MSR_DS;
+ vcpu->arch.shadow_pid = 1;
+ vcpu->arch.shared->msr = 0;
+#endif
+
+ /* Eye-catching numbers so we know if the guest takes an interrupt
+ * before it's programmed its own IVPR/IVORs. */
+ vcpu->arch.ivpr = 0x55550000;
+ for (i = 0; i < BOOKE_IRQPRIO_MAX; i++)
+ vcpu->arch.ivor[i] = 0x7700 | i * 4;
+
+ kvmppc_init_timing_stats(vcpu);
+
+ r = kvmppc_core_vcpu_setup(vcpu);
+ if (r)
+ vcpu->kvm->arch.kvm_ops->vcpu_free(vcpu);
+ kvmppc_sanity_check(vcpu);
+ return r;
}
void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
index f753543c56fa..9c5b8e76014f 100644
--- a/arch/powerpc/kvm/booke.h
+++ b/arch/powerpc/kvm/booke.h
@@ -1,16 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright IBM Corp. 2008
*
@@ -32,9 +21,15 @@
#define BOOKE_IRQPRIO_ALIGNMENT 2
#define BOOKE_IRQPRIO_PROGRAM 3
#define BOOKE_IRQPRIO_FP_UNAVAIL 4
+#ifdef CONFIG_SPE_POSSIBLE
#define BOOKE_IRQPRIO_SPE_UNAVAIL 5
#define BOOKE_IRQPRIO_SPE_FP_DATA 6
#define BOOKE_IRQPRIO_SPE_FP_ROUND 7
+#endif
+#ifdef CONFIG_PPC_E500MC
+#define BOOKE_IRQPRIO_ALTIVEC_UNAVAIL 5
+#define BOOKE_IRQPRIO_ALTIVEC_ASSIST 6
+#endif
#define BOOKE_IRQPRIO_SYSCALL 8
#define BOOKE_IRQPRIO_AP_UNAVAIL 9
#define BOOKE_IRQPRIO_DTLB_MISS 10
@@ -75,7 +70,7 @@ void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr);
void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits);
void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits);
-int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
+int kvmppc_booke_emulate_op(struct kvm_vcpu *vcpu,
unsigned int inst, int *advance);
int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val);
int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val);
@@ -99,59 +94,22 @@ enum int_class {
void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type);
-extern void kvmppc_mmu_destroy_e500(struct kvm_vcpu *vcpu);
-extern int kvmppc_core_emulate_op_e500(struct kvm_run *run,
- struct kvm_vcpu *vcpu,
+extern int kvmppc_core_emulate_op_e500(struct kvm_vcpu *vcpu,
unsigned int inst, int *advance);
extern int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn,
ulong spr_val);
extern int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn,
ulong *spr_val);
-extern void kvmppc_mmu_destroy_e500(struct kvm_vcpu *vcpu);
-extern int kvmppc_core_emulate_op_e500(struct kvm_run *run,
- struct kvm_vcpu *vcpu,
- unsigned int inst, int *advance);
extern int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn,
ulong spr_val);
extern int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn,
ulong *spr_val);
-/*
- * Load up guest vcpu FP state if it's needed.
- * It also set the MSR_FP in thread so that host know
- * we're holding FPU, and then host can help to save
- * guest vcpu FP state if other threads require to use FPU.
- * This simulates an FP unavailable fault.
- *
- * It requires to be called with preemption disabled.
- */
-static inline void kvmppc_load_guest_fp(struct kvm_vcpu *vcpu)
-{
-#ifdef CONFIG_PPC_FPU
- if (vcpu->fpu_active && !(current->thread.regs->msr & MSR_FP)) {
- enable_kernel_fp();
- load_fp_state(&vcpu->arch.fp);
- current->thread.fp_save_area = &vcpu->arch.fp;
- current->thread.regs->msr |= MSR_FP;
- }
-#endif
-}
-
-/*
- * Save guest vcpu FP state into thread.
- * It requires to be called with preemption disabled.
- */
-static inline void kvmppc_save_guest_fp(struct kvm_vcpu *vcpu)
-{
-#ifdef CONFIG_PPC_FPU
- if (vcpu->fpu_active && (current->thread.regs->msr & MSR_FP))
- giveup_fpu(current);
- current->thread.fp_save_area = NULL;
-#endif
-}
-
static inline void kvmppc_clear_dbsr(void)
{
mtspr(SPRN_DBSR, mfspr(SPRN_DBSR));
}
+
+int kvmppc_handle_exit(struct kvm_vcpu *vcpu, unsigned int exit_nr);
+
#endif /* __KVM_BOOKE_H__ */
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
index 28c158881d23..d8d38aca71bd 100644
--- a/arch/powerpc/kvm/booke_emulate.c
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -1,16 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright IBM Corp. 2008
* Copyright 2011 Freescale Semiconductor, Inc.
@@ -25,6 +14,7 @@
#define OP_19_XOP_RFI 50
#define OP_19_XOP_RFCI 51
+#define OP_19_XOP_RFDI 39
#define OP_31_XOP_MFMSR 83
#define OP_31_XOP_WRTEE 131
@@ -33,17 +23,23 @@
static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
{
- vcpu->arch.pc = vcpu->arch.shared->srr0;
+ vcpu->arch.regs.nip = vcpu->arch.shared->srr0;
kvmppc_set_msr(vcpu, vcpu->arch.shared->srr1);
}
+static void kvmppc_emul_rfdi(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.regs.nip = vcpu->arch.dsrr0;
+ kvmppc_set_msr(vcpu, vcpu->arch.dsrr1);
+}
+
static void kvmppc_emul_rfci(struct kvm_vcpu *vcpu)
{
- vcpu->arch.pc = vcpu->arch.csrr0;
+ vcpu->arch.regs.nip = vcpu->arch.csrr0;
kvmppc_set_msr(vcpu, vcpu->arch.csrr1);
}
-int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
+int kvmppc_booke_emulate_op(struct kvm_vcpu *vcpu,
unsigned int inst, int *advance)
{
int emulated = EMULATE_DONE;
@@ -65,6 +61,12 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
*advance = 0;
break;
+ case OP_19_XOP_RFDI:
+ kvmppc_emul_rfdi(vcpu);
+ kvmppc_set_exit_type(vcpu, EMULATED_RFDI_EXITS);
+ *advance = 0;
+ break;
+
default:
emulated = EMULATE_FAIL;
break;
@@ -118,6 +120,7 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
{
int emulated = EMULATE_DONE;
+ bool debug_inst = false;
switch (sprn) {
case SPRN_DEAR:
@@ -132,14 +135,128 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
case SPRN_CSRR1:
vcpu->arch.csrr1 = spr_val;
break;
+ case SPRN_DSRR0:
+ vcpu->arch.dsrr0 = spr_val;
+ break;
+ case SPRN_DSRR1:
+ vcpu->arch.dsrr1 = spr_val;
+ break;
+ case SPRN_IAC1:
+ /*
+ * If userspace is debugging guest then guest
+ * can not access debug registers.
+ */
+ if (vcpu->guest_debug)
+ break;
+
+ debug_inst = true;
+ vcpu->arch.dbg_reg.iac1 = spr_val;
+ break;
+ case SPRN_IAC2:
+ /*
+ * If userspace is debugging guest then guest
+ * can not access debug registers.
+ */
+ if (vcpu->guest_debug)
+ break;
+
+ debug_inst = true;
+ vcpu->arch.dbg_reg.iac2 = spr_val;
+ break;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+ case SPRN_IAC3:
+ /*
+ * If userspace is debugging guest then guest
+ * can not access debug registers.
+ */
+ if (vcpu->guest_debug)
+ break;
+
+ debug_inst = true;
+ vcpu->arch.dbg_reg.iac3 = spr_val;
+ break;
+ case SPRN_IAC4:
+ /*
+ * If userspace is debugging guest then guest
+ * can not access debug registers.
+ */
+ if (vcpu->guest_debug)
+ break;
+
+ debug_inst = true;
+ vcpu->arch.dbg_reg.iac4 = spr_val;
+ break;
+#endif
+ case SPRN_DAC1:
+ /*
+ * If userspace is debugging guest then guest
+ * can not access debug registers.
+ */
+ if (vcpu->guest_debug)
+ break;
+
+ debug_inst = true;
+ vcpu->arch.dbg_reg.dac1 = spr_val;
+ break;
+ case SPRN_DAC2:
+ /*
+ * If userspace is debugging guest then guest
+ * can not access debug registers.
+ */
+ if (vcpu->guest_debug)
+ break;
+
+ debug_inst = true;
+ vcpu->arch.dbg_reg.dac2 = spr_val;
+ break;
case SPRN_DBCR0:
+ /*
+ * If userspace is debugging guest then guest
+ * can not access debug registers.
+ */
+ if (vcpu->guest_debug)
+ break;
+
+ debug_inst = true;
+ spr_val &= (DBCR0_IDM | DBCR0_IC | DBCR0_BT | DBCR0_TIE |
+ DBCR0_IAC1 | DBCR0_IAC2 | DBCR0_IAC3 | DBCR0_IAC4 |
+ DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R | DBCR0_DAC2W);
+
vcpu->arch.dbg_reg.dbcr0 = spr_val;
break;
case SPRN_DBCR1:
+ /*
+ * If userspace is debugging guest then guest
+ * can not access debug registers.
+ */
+ if (vcpu->guest_debug)
+ break;
+
+ debug_inst = true;
vcpu->arch.dbg_reg.dbcr1 = spr_val;
break;
+ case SPRN_DBCR2:
+ /*
+ * If userspace is debugging guest then guest
+ * can not access debug registers.
+ */
+ if (vcpu->guest_debug)
+ break;
+
+ debug_inst = true;
+ vcpu->arch.dbg_reg.dbcr2 = spr_val;
+ break;
case SPRN_DBSR:
+ /*
+ * If userspace is debugging guest then guest
+ * can not access debug registers.
+ */
+ if (vcpu->guest_debug)
+ break;
+
vcpu->arch.dbsr &= ~spr_val;
+ if (!(vcpu->arch.dbsr & ~DBSR_IDE))
+ kvmppc_core_dequeue_debug(vcpu);
break;
case SPRN_TSR:
kvmppc_clr_tsr_bits(vcpu, spr_val);
@@ -252,6 +369,10 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
emulated = EMULATE_FAIL;
}
+ if (debug_inst) {
+ current->thread.debug = vcpu->arch.dbg_reg;
+ switch_booke_debug_regs(&vcpu->arch.dbg_reg);
+ }
return emulated;
}
@@ -278,12 +399,43 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
case SPRN_CSRR1:
*spr_val = vcpu->arch.csrr1;
break;
+ case SPRN_DSRR0:
+ *spr_val = vcpu->arch.dsrr0;
+ break;
+ case SPRN_DSRR1:
+ *spr_val = vcpu->arch.dsrr1;
+ break;
+ case SPRN_IAC1:
+ *spr_val = vcpu->arch.dbg_reg.iac1;
+ break;
+ case SPRN_IAC2:
+ *spr_val = vcpu->arch.dbg_reg.iac2;
+ break;
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+ case SPRN_IAC3:
+ *spr_val = vcpu->arch.dbg_reg.iac3;
+ break;
+ case SPRN_IAC4:
+ *spr_val = vcpu->arch.dbg_reg.iac4;
+ break;
+#endif
+ case SPRN_DAC1:
+ *spr_val = vcpu->arch.dbg_reg.dac1;
+ break;
+ case SPRN_DAC2:
+ *spr_val = vcpu->arch.dbg_reg.dac2;
+ break;
case SPRN_DBCR0:
*spr_val = vcpu->arch.dbg_reg.dbcr0;
+ if (vcpu->guest_debug)
+ *spr_val = *spr_val | DBCR0_EDM;
break;
case SPRN_DBCR1:
*spr_val = vcpu->arch.dbg_reg.dbcr1;
break;
+ case SPRN_DBCR2:
+ *spr_val = vcpu->arch.dbg_reg.dbcr2;
+ break;
case SPRN_DBSR:
*spr_val = vcpu->arch.dbsr;
break;
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
index 84c308a9a371..205545d820a1 100644
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -1,16 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright IBM Corp. 2007
* Copyright 2011 Freescale Semiconductor, Inc.
@@ -234,7 +223,7 @@ _GLOBAL(kvmppc_resume_host)
lwz r3, VCPU_HOST_PID(r4)
mtspr SPRN_PID, r3
-#ifdef CONFIG_FSL_BOOKE
+#ifdef CONFIG_PPC_85xx
/* we cheat and know that Linux doesn't use PID1 which is always 0 */
lis r3, 0
mtspr SPRN_PID1, r3
@@ -248,7 +237,7 @@ _GLOBAL(kvmppc_resume_host)
/* Switch to kernel stack and jump to handler. */
LOAD_REG_ADDR(r3, kvmppc_handle_exit)
mtctr r3
- lwz r3, HOST_RUN(r1)
+ mr r3, r4
lwz r2, HOST_R2(r1)
mr r14, r4 /* Save vcpu pointer. */
@@ -348,15 +337,14 @@ heavyweight_exit:
/* Registers:
- * r3: kvm_run pointer
- * r4: vcpu pointer
+ * r3: vcpu pointer
*/
_GLOBAL(__kvmppc_vcpu_run)
stwu r1, -HOST_STACK_SIZE(r1)
- stw r1, VCPU_HOST_STACK(r4) /* Save stack pointer to vcpu. */
+ stw r1, VCPU_HOST_STACK(r3) /* Save stack pointer to vcpu. */
/* Save host state to stack. */
- stw r3, HOST_RUN(r1)
+ mr r4, r3
mflr r3
stw r3, HOST_STACK_LR(r1)
mfcr r5
@@ -418,7 +406,7 @@ lightweight_exit:
lwz r3, VCPU_SHADOW_PID(r4)
mtspr SPRN_PID, r3
-#ifdef CONFIG_FSL_BOOKE
+#ifdef CONFIG_PPC_85xx
lwz r3, VCPU_SHADOW_PID1(r4)
mtspr SPRN_PID1, r3
#endif
diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S
index e9fa56a911fd..8b4a402217ba 100644
--- a/arch/powerpc/kvm/bookehv_interrupts.S
+++ b/arch/powerpc/kvm/bookehv_interrupts.S
@@ -1,16 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright (C) 2010-2011 Freescale Semiconductor, Inc.
*
@@ -75,6 +64,10 @@
PPC_LL r1, VCPU_HOST_STACK(r4)
PPC_LL r2, HOST_R2(r1)
+START_BTB_FLUSH_SECTION
+ BTB_FLUSH(r10)
+END_BTB_FLUSH_SECTION
+
mfspr r10, SPRN_PID
lwz r8, VCPU_HOST_PID(r4)
PPC_LL r11, VCPU_SHARED(r4)
@@ -146,7 +139,7 @@
* kvmppc_get_last_inst().
*/
li r9, KVM_INST_FETCH_FAILED
- stw r9, VCPU_LAST_INST(r4)
+ PPC_STL r9, VCPU_LAST_INST(r4)
.endif
.if \flags & NEED_ESR
@@ -182,7 +175,7 @@
*/
PPC_LL r4, PACACURRENT(r13)
PPC_LL r4, (THREAD + THREAD_KVM_VCPU)(r4)
- stw r10, VCPU_CR(r4)
+ PPC_STL r10, VCPU_CR(r4)
PPC_STL r11, VCPU_GPR(R4)(r4)
PPC_STL r5, VCPU_GPR(R5)(r4)
PPC_STL r6, VCPU_GPR(R6)(r4)
@@ -238,7 +231,7 @@ kvm_handler BOOKE_INTERRUPT_EXTERNAL, EX_PARAMS(GEN), \
kvm_handler BOOKE_INTERRUPT_ALIGNMENT, EX_PARAMS(GEN), \
SPRN_SRR0, SPRN_SRR1,(NEED_DEAR | NEED_ESR)
kvm_handler BOOKE_INTERRUPT_PROGRAM, EX_PARAMS(GEN), \
- SPRN_SRR0, SPRN_SRR1,NEED_ESR
+ SPRN_SRR0, SPRN_SRR1, (NEED_ESR | NEED_EMU)
kvm_handler BOOKE_INTERRUPT_FP_UNAVAIL, EX_PARAMS(GEN), \
SPRN_SRR0, SPRN_SRR1, 0
kvm_handler BOOKE_INTERRUPT_AP_UNAVAIL, EX_PARAMS(GEN), \
@@ -256,11 +249,9 @@ kvm_handler BOOKE_INTERRUPT_DTLB_MISS, EX_PARAMS_TLB, \
SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR)
kvm_handler BOOKE_INTERRUPT_ITLB_MISS, EX_PARAMS_TLB, \
SPRN_SRR0, SPRN_SRR1, 0
-kvm_handler BOOKE_INTERRUPT_SPE_UNAVAIL, EX_PARAMS(GEN), \
+kvm_handler BOOKE_INTERRUPT_ALTIVEC_UNAVAIL, EX_PARAMS(GEN), \
SPRN_SRR0, SPRN_SRR1, 0
-kvm_handler BOOKE_INTERRUPT_SPE_FP_DATA, EX_PARAMS(GEN), \
- SPRN_SRR0, SPRN_SRR1, 0
-kvm_handler BOOKE_INTERRUPT_SPE_FP_ROUND, EX_PARAMS(GEN), \
+kvm_handler BOOKE_INTERRUPT_ALTIVEC_ASSIST, EX_PARAMS(GEN), \
SPRN_SRR0, SPRN_SRR1, 0
kvm_handler BOOKE_INTERRUPT_PERFORMANCE_MONITOR, EX_PARAMS(GEN), \
SPRN_SRR0, SPRN_SRR1, 0
@@ -294,7 +285,7 @@ _GLOBAL(kvmppc_handler_\intno\()_\srr1)
PPC_STL r4, VCPU_GPR(R4)(r11)
PPC_LL r4, THREAD_NORMSAVE(0)(r10)
PPC_STL r5, VCPU_GPR(R5)(r11)
- stw r13, VCPU_CR(r11)
+ PPC_STL r13, VCPU_CR(r11)
mfspr r5, \srr0
PPC_STL r3, VCPU_GPR(R10)(r11)
PPC_LL r3, THREAD_NORMSAVE(2)(r10)
@@ -321,7 +312,7 @@ _GLOBAL(kvmppc_handler_\intno\()_\srr1)
PPC_STL r4, VCPU_GPR(R4)(r11)
PPC_LL r4, GPR9(r8)
PPC_STL r5, VCPU_GPR(R5)(r11)
- stw r9, VCPU_CR(r11)
+ PPC_STL r9, VCPU_CR(r11)
mfspr r5, \srr0
PPC_STL r3, VCPU_GPR(R8)(r11)
PPC_LL r3, GPR10(r8)
@@ -350,7 +341,7 @@ kvm_handler BOOKE_INTERRUPT_INST_STORAGE, SPRN_SRR0, SPRN_SRR1, NEED_ESR
kvm_handler BOOKE_INTERRUPT_EXTERNAL, SPRN_SRR0, SPRN_SRR1, 0
kvm_handler BOOKE_INTERRUPT_ALIGNMENT, \
SPRN_SRR0, SPRN_SRR1, (NEED_DEAR | NEED_ESR)
-kvm_handler BOOKE_INTERRUPT_PROGRAM, SPRN_SRR0, SPRN_SRR1, NEED_ESR
+kvm_handler BOOKE_INTERRUPT_PROGRAM, SPRN_SRR0, SPRN_SRR1, (NEED_ESR | NEED_EMU)
kvm_handler BOOKE_INTERRUPT_FP_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0
kvm_handler BOOKE_INTERRUPT_SYSCALL, SPRN_SRR0, SPRN_SRR1, 0
kvm_handler BOOKE_INTERRUPT_AP_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0
@@ -361,9 +352,6 @@ kvm_lvl_handler BOOKE_INTERRUPT_WATCHDOG, \
kvm_handler BOOKE_INTERRUPT_DTLB_MISS, \
SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR)
kvm_handler BOOKE_INTERRUPT_ITLB_MISS, SPRN_SRR0, SPRN_SRR1, 0
-kvm_handler BOOKE_INTERRUPT_SPE_UNAVAIL, SPRN_SRR0, SPRN_SRR1, 0
-kvm_handler BOOKE_INTERRUPT_SPE_FP_DATA, SPRN_SRR0, SPRN_SRR1, 0
-kvm_handler BOOKE_INTERRUPT_SPE_FP_ROUND, SPRN_SRR0, SPRN_SRR1, 0
kvm_handler BOOKE_INTERRUPT_PERFORMANCE_MONITOR, SPRN_SRR0, SPRN_SRR1, 0
kvm_handler BOOKE_INTERRUPT_DOORBELL, SPRN_SRR0, SPRN_SRR1, 0
kvm_lvl_handler BOOKE_INTERRUPT_DOORBELL_CRITICAL, \
@@ -436,19 +424,11 @@ _GLOBAL(kvmppc_resume_host)
mtspr SPRN_EPCR, r3
isync
-#ifdef CONFIG_64BIT
- /*
- * We enter with interrupts disabled in hardware, but
- * we need to call RECONCILE_IRQ_STATE to ensure
- * that the software state is kept in sync.
- */
- RECONCILE_IRQ_STATE(r3,r5)
-#endif
-
/* Switch to kernel stack and jump to handler. */
- PPC_LL r3, HOST_RUN(r1)
+ mr r3, r4
mr r5, r14 /* intno */
mr r14, r4 /* Save vcpu pointer. */
+ mr r4, r5
bl kvmppc_handle_exit
/* Restore vcpu pointer and the nonvolatiles we used. */
@@ -537,15 +517,14 @@ heavyweight_exit:
blr
/* Registers:
- * r3: kvm_run pointer
- * r4: vcpu pointer
+ * r3: vcpu pointer
*/
_GLOBAL(__kvmppc_vcpu_run)
stwu r1, -HOST_STACK_SIZE(r1)
- PPC_STL r1, VCPU_HOST_STACK(r4) /* Save stack pointer to vcpu. */
+ PPC_STL r1, VCPU_HOST_STACK(r3) /* Save stack pointer to vcpu. */
/* Save host state to stack. */
- PPC_STL r3, HOST_RUN(r1)
+ mr r4, r3
mflr r3
mfcr r5
PPC_STL r3, HOST_STACK_LR(r1)
@@ -648,7 +627,7 @@ lightweight_exit:
PPC_LL r3, VCPU_LR(r4)
PPC_LL r5, VCPU_XER(r4)
PPC_LL r6, VCPU_CTR(r4)
- lwz r7, VCPU_CR(r4)
+ PPC_LL r7, VCPU_CR(r4)
PPC_LL r8, VCPU_PC(r4)
PPC_LD(r9, VCPU_SHARED_MSR, r11)
PPC_LL r0, VCPU_GPR(R0)(r4)
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index 2e02ed849f36..b0f695428733 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
*
@@ -6,10 +7,6 @@
* Description:
* This file is derived from arch/powerpc/kvm/44x.c,
* by Hollis Blanchard <hollisb@us.ibm.com>.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
*/
#include <linux/kvm_host.h>
@@ -21,7 +18,6 @@
#include <asm/reg.h>
#include <asm/cputable.h>
-#include <asm/tlbflush.h>
#include <asm/kvm_ppc.h>
#include "../mm/mmu_decl.h"
@@ -76,11 +72,11 @@ static inline int local_sid_setup_one(struct id *entry)
unsigned long sid;
int ret = -1;
- sid = ++(__get_cpu_var(pcpu_last_used_sid));
+ sid = __this_cpu_inc_return(pcpu_last_used_sid);
if (sid < NUM_TIDS) {
- __get_cpu_var(pcpu_sids).entry[sid] = entry;
+ __this_cpu_write(pcpu_sids.entry[sid], entry);
entry->val = sid;
- entry->pentry = &__get_cpu_var(pcpu_sids).entry[sid];
+ entry->pentry = this_cpu_ptr(&pcpu_sids.entry[sid]);
ret = sid;
}
@@ -108,8 +104,8 @@ static inline int local_sid_setup_one(struct id *entry)
static inline int local_sid_lookup(struct id *entry)
{
if (entry && entry->val != 0 &&
- __get_cpu_var(pcpu_sids).entry[entry->val] == entry &&
- entry->pentry == &__get_cpu_var(pcpu_sids).entry[entry->val])
+ __this_cpu_read(pcpu_sids.entry[entry->val]) == entry &&
+ entry->pentry == this_cpu_ptr(&pcpu_sids.entry[entry->val]))
return entry->val;
return -1;
}
@@ -117,8 +113,8 @@ static inline int local_sid_lookup(struct id *entry)
/* Invalidate all id mappings on local core -- call with preempt disabled */
static inline void local_sid_destroy_all(void)
{
- __get_cpu_var(pcpu_last_used_sid) = 0;
- memset(&__get_cpu_var(pcpu_sids), 0, sizeof(__get_cpu_var(pcpu_sids)));
+ __this_cpu_write(pcpu_last_used_sid, 0);
+ memset(this_cpu_ptr(&pcpu_sids), 0, sizeof(pcpu_sids));
}
static void *kvmppc_e500_id_table_alloc(struct kvmppc_vcpu_e500 *vcpu_e500)
@@ -237,7 +233,8 @@ void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500,
struct kvm_book3e_206_tlb_entry *gtlbe)
{
struct vcpu_id_table *idt = vcpu_e500->idt;
- unsigned int pr, tid, ts, pid;
+ unsigned int pr, tid, ts;
+ int pid;
u32 val, eaddr;
unsigned long flags;
@@ -299,14 +296,6 @@ void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr)
kvmppc_e500_recalc_shadow_pid(to_e500(vcpu));
}
-void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
-{
-}
-
-void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
-{
-}
-
static void kvmppc_core_vcpu_load_e500(struct kvm_vcpu *vcpu, int cpu)
{
kvmppc_booke_vcpu_load(vcpu, cpu);
@@ -325,7 +314,7 @@ static void kvmppc_core_vcpu_put_e500(struct kvm_vcpu *vcpu)
kvmppc_booke_vcpu_put(vcpu);
}
-int kvmppc_core_check_processor_compat(void)
+static int kvmppc_e500_check_processor_compat(void)
{
int r;
@@ -444,47 +433,34 @@ static int kvmppc_set_one_reg_e500(struct kvm_vcpu *vcpu, u64 id,
return r;
}
-static struct kvm_vcpu *kvmppc_core_vcpu_create_e500(struct kvm *kvm,
- unsigned int id)
+static int kvmppc_core_vcpu_create_e500(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_e500 *vcpu_e500;
- struct kvm_vcpu *vcpu;
int err;
- vcpu_e500 = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
- if (!vcpu_e500) {
- err = -ENOMEM;
- goto out;
- }
-
- vcpu = &vcpu_e500->vcpu;
- err = kvm_vcpu_init(vcpu, kvm, id);
- if (err)
- goto free_vcpu;
+ BUILD_BUG_ON(offsetof(struct kvmppc_vcpu_e500, vcpu) != 0);
+ vcpu_e500 = to_e500(vcpu);
if (kvmppc_e500_id_table_alloc(vcpu_e500) == NULL)
- goto uninit_vcpu;
+ return -ENOMEM;
err = kvmppc_e500_tlb_init(vcpu_e500);
if (err)
goto uninit_id;
vcpu->arch.shared = (void*)__get_free_page(GFP_KERNEL|__GFP_ZERO);
- if (!vcpu->arch.shared)
+ if (!vcpu->arch.shared) {
+ err = -ENOMEM;
goto uninit_tlb;
+ }
- return vcpu;
+ return 0;
uninit_tlb:
kvmppc_e500_tlb_uninit(vcpu_e500);
uninit_id:
kvmppc_e500_id_table_free(vcpu_e500);
-uninit_vcpu:
- kvm_vcpu_uninit(vcpu);
-free_vcpu:
- kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
-out:
- return ERR_PTR(err);
+ return err;
}
static void kvmppc_core_vcpu_free_e500(struct kvm_vcpu *vcpu)
@@ -494,8 +470,6 @@ static void kvmppc_core_vcpu_free_e500(struct kvm_vcpu *vcpu)
free_page((unsigned long)vcpu->arch.shared);
kvmppc_e500_tlb_uninit(vcpu_e500);
kvmppc_e500_id_table_free(vcpu_e500);
- kvm_vcpu_uninit(vcpu);
- kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
}
static int kvmppc_core_init_vm_e500(struct kvm *kvm)
@@ -516,12 +490,12 @@ static struct kvmppc_ops kvm_ops_e500 = {
.vcpu_put = kvmppc_core_vcpu_put_e500,
.vcpu_create = kvmppc_core_vcpu_create_e500,
.vcpu_free = kvmppc_core_vcpu_free_e500,
- .mmu_destroy = kvmppc_mmu_destroy_e500,
.init_vm = kvmppc_core_init_vm_e500,
.destroy_vm = kvmppc_core_destroy_vm_e500,
.emulate_op = kvmppc_core_emulate_op_e500,
.emulate_mtspr = kvmppc_core_emulate_mtspr_e500,
.emulate_mfspr = kvmppc_core_emulate_mfspr_e500,
+ .create_vcpu_debugfs = kvmppc_create_vcpu_debugfs_e500,
};
static int __init kvmppc_e500_init(void)
@@ -533,7 +507,7 @@ static int __init kvmppc_e500_init(void)
unsigned long handler_len;
unsigned long max_ivor = 0;
- r = kvmppc_core_check_processor_compat();
+ r = kvmppc_e500_check_processor_compat();
if (r)
goto err_out;
@@ -557,7 +531,7 @@ static int __init kvmppc_e500_init(void)
flush_icache_range(kvmppc_booke_handlers, kvmppc_booke_handlers +
ivor[max_ivor] + handler_len);
- r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE);
+ r = kvm_init(sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE);
if (r)
goto err_out;
kvm_ops_e500.owner = THIS_MODULE;
diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h
index a326178bdea5..f9acf866c709 100644
--- a/arch/powerpc/kvm/e500.h
+++ b/arch/powerpc/kvm/e500.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
*
@@ -10,18 +11,15 @@
* This file is based on arch/powerpc/kvm/44x_tlb.h and
* arch/powerpc/include/asm/kvm_44x.h by Hollis Blanchard <hollisb@us.ibm.com>,
* Copyright IBM Corp. 2007-2008
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
*/
#ifndef KVM_E500_H
#define KVM_E500_H
#include <linux/kvm_host.h>
-#include <asm/mmu-book3e.h>
+#include <asm/nohash/mmu-e500.h>
#include <asm/tlb.h>
+#include <asm/cputhreads.h>
enum vcpu_ftr {
VCPU_FTR_MMU_V2
@@ -36,11 +34,13 @@ enum vcpu_ftr {
#define E500_TLB_BITMAP (1 << 30)
/* TLB1 entry is mapped by host TLB0 */
#define E500_TLB_TLB0 (1 << 29)
+/* entry is writable on the host */
+#define E500_TLB_WRITABLE (1 << 28)
/* bits [6-5] MAS2_X1 and MAS2_X0 and [4-0] bits for WIMGE */
#define E500_TLB_MAS2_ATTR (0x7f)
struct tlbe_ref {
- pfn_t pfn; /* valid only for TLB0, except briefly */
+ kvm_pfn_t pfn; /* valid only for TLB0, except briefly */
unsigned int flags; /* E500_TLB_* */
};
@@ -289,6 +289,25 @@ void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500);
#define kvmppc_e500_get_tlb_stid(vcpu, gtlbe) get_tlb_tid(gtlbe)
#define get_tlbmiss_tid(vcpu) get_cur_pid(vcpu)
#define get_tlb_sts(gtlbe) (gtlbe->mas1 & MAS1_TS)
+
+/*
+ * These functions should be called with preemption disabled
+ * and the returned value is valid only in that context
+ */
+static inline int get_thread_specific_lpid(int vm_lpid)
+{
+ int vcpu_lpid = vm_lpid;
+
+ if (threads_per_core == 2)
+ vcpu_lpid |= smp_processor_id() & 1;
+
+ return vcpu_lpid;
+}
+
+static inline int get_lpid(struct kvm_vcpu *vcpu)
+{
+ return get_thread_specific_lpid(vcpu->kvm->arch.lpid);
+}
#else
unsigned int kvmppc_e500_get_tlb_stid(struct kvm_vcpu *vcpu,
struct kvm_book3e_206_tlb_entry *gtlbe);
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index c99c40e9182a..051102d50c31 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved.
*
@@ -6,15 +7,12 @@
* Description:
* This file is derived from arch/powerpc/kvm/44x_emulate.c,
* by Hollis Blanchard <hollisb@us.ibm.com>.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
*/
#include <asm/kvm_ppc.h>
#include <asm/disassemble.h>
#include <asm/dbell.h>
+#include <asm/reg_booke.h>
#include "booke.h"
#include "e500.h"
@@ -22,6 +20,7 @@
#define XOP_DCBTLS 166
#define XOP_MSGSND 206
#define XOP_MSGCLR 238
+#define XOP_MFTMR 366
#define XOP_TLBIVAX 786
#define XOP_TLBSX 914
#define XOP_TLBRE 946
@@ -51,7 +50,7 @@ static int dbell2prio(ulong param)
static int kvmppc_e500_emul_msgclr(struct kvm_vcpu *vcpu, int rb)
{
- ulong param = vcpu->arch.gpr[rb];
+ ulong param = vcpu->arch.regs.gpr[rb];
int prio = dbell2prio(param);
if (prio < 0)
@@ -63,10 +62,10 @@ static int kvmppc_e500_emul_msgclr(struct kvm_vcpu *vcpu, int rb)
static int kvmppc_e500_emul_msgsnd(struct kvm_vcpu *vcpu, int rb)
{
- ulong param = vcpu->arch.gpr[rb];
+ ulong param = vcpu->arch.regs.gpr[rb];
int prio = dbell2prio(rb);
int pir = param & PPC_DBELL_PIR_MASK;
- int i;
+ unsigned long i;
struct kvm_vcpu *cvcpu;
if (prio < 0)
@@ -84,16 +83,16 @@ static int kvmppc_e500_emul_msgsnd(struct kvm_vcpu *vcpu, int rb)
}
#endif
-static int kvmppc_e500_emul_ehpriv(struct kvm_run *run, struct kvm_vcpu *vcpu,
+static int kvmppc_e500_emul_ehpriv(struct kvm_vcpu *vcpu,
unsigned int inst, int *advance)
{
int emulated = EMULATE_DONE;
switch (get_oc(inst)) {
case EHPRIV_OC_DEBUG:
- run->exit_reason = KVM_EXIT_DEBUG;
- run->debug.arch.address = vcpu->arch.pc;
- run->debug.arch.status = 0;
+ vcpu->run->exit_reason = KVM_EXIT_DEBUG;
+ vcpu->run->debug.arch.address = vcpu->arch.regs.nip;
+ vcpu->run->debug.arch.status = 0;
kvmppc_account_exit(vcpu, DEBUG_EXITS);
emulated = EMULATE_EXIT_USER;
*advance = 0;
@@ -113,7 +112,20 @@ static int kvmppc_e500_emul_dcbtls(struct kvm_vcpu *vcpu)
return EMULATE_DONE;
}
-int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu,
+static int kvmppc_e500_emul_mftmr(struct kvm_vcpu *vcpu, unsigned int inst,
+ int rt)
+{
+ /* Expose one thread per vcpu */
+ if (get_tmrn(inst) == TMRN_TMCFG0) {
+ kvmppc_set_gpr(vcpu, rt,
+ 1 | (1 << TMRN_TMCFG0_NATHRD_SHIFT));
+ return EMULATE_DONE;
+ }
+
+ return EMULATE_FAIL;
+}
+
+int kvmppc_core_emulate_op_e500(struct kvm_vcpu *vcpu,
unsigned int inst, int *advance)
{
int emulated = EMULATE_DONE;
@@ -165,9 +177,12 @@ int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu,
emulated = kvmppc_e500_emul_tlbivax(vcpu, ea);
break;
+ case XOP_MFTMR:
+ emulated = kvmppc_e500_emul_mftmr(vcpu, inst, rt);
+ break;
+
case XOP_EHPRIV:
- emulated = kvmppc_e500_emul_ehpriv(run, vcpu, inst,
- advance);
+ emulated = kvmppc_e500_emul_ehpriv(vcpu, inst, advance);
break;
default:
@@ -181,7 +196,7 @@ int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu,
}
if (emulated == EMULATE_FAIL)
- emulated = kvmppc_booke_emulate_op(run, vcpu, inst, advance);
+ emulated = kvmppc_booke_emulate_op(vcpu, inst, advance);
return emulated;
}
@@ -258,7 +273,15 @@ int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong spr_va
vcpu->arch.pwrmgtcr0 = spr_val;
break;
+ case SPRN_BUCSR:
+ /*
+ * If we are here, it means that we have already flushed the
+ * branch predictor, so just return to guest.
+ */
+ break;
+
/* extra exceptions */
+#ifdef CONFIG_SPE_POSSIBLE
case SPRN_IVOR32:
vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = spr_val;
break;
@@ -268,6 +291,15 @@ int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong spr_va
case SPRN_IVOR34:
vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] = spr_val;
break;
+#endif
+#ifdef CONFIG_ALTIVEC
+ case SPRN_IVOR32:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_ALTIVEC_UNAVAIL] = spr_val;
+ break;
+ case SPRN_IVOR33:
+ vcpu->arch.ivor[BOOKE_IRQPRIO_ALTIVEC_ASSIST] = spr_val;
+ break;
+#endif
case SPRN_IVOR35:
vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = spr_val;
break;
@@ -381,6 +413,7 @@ int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong *spr_v
break;
/* extra exceptions */
+#ifdef CONFIG_SPE_POSSIBLE
case SPRN_IVOR32:
*spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL];
break;
@@ -390,6 +423,15 @@ int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong *spr_v
case SPRN_IVOR34:
*spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND];
break;
+#endif
+#ifdef CONFIG_ALTIVEC
+ case SPRN_IVOR32:
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_ALTIVEC_UNAVAIL];
+ break;
+ case SPRN_IVOR33:
+ *spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_ALTIVEC_ASSIST];
+ break;
+#endif
case SPRN_IVOR35:
*spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR];
break;
diff --git a/arch/powerpc/kvm/e500_mmu.c b/arch/powerpc/kvm/e500_mmu.c
index 50860e919cb8..e131fbecdcc4 100644
--- a/arch/powerpc/kvm/e500_mmu.c
+++ b/arch/powerpc/kvm/e500_mmu.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2008-2013 Freescale Semiconductor, Inc. All rights reserved.
*
@@ -10,10 +11,6 @@
* Description:
* This file is based on arch/powerpc/kvm/44x_tlb.c,
* by Hollis Blanchard <hollisb@us.ibm.com>.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
*/
#include <linux/kernel.h>
@@ -377,7 +374,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, gva_t ea)
| MAS0_NV(vcpu_e500->gtlb_nv[tlbsel]);
vcpu->arch.shared->mas1 =
(vcpu->arch.shared->mas6 & MAS6_SPID0)
- | (vcpu->arch.shared->mas6 & (MAS6_SAS ? MAS1_TS : 0))
+ | ((vcpu->arch.shared->mas6 & MAS6_SAS) ? MAS1_TS : 0)
| (vcpu->arch.shared->mas4 & MAS4_TSIZED(~0));
vcpu->arch.shared->mas2 &= MAS2_EPN;
vcpu->arch.shared->mas2 |= vcpu->arch.shared->mas4 &
@@ -513,7 +510,7 @@ void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu)
{
unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS);
- kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.pc, as);
+ kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.regs.nip, as);
}
void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu)
@@ -536,10 +533,6 @@ gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int index,
return get_tlb_raddr(gtlbe) | (eaddr & pgmask);
}
-void kvmppc_mmu_destroy_e500(struct kvm_vcpu *vcpu)
-{
-}
-
/*****************************************/
static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500)
@@ -743,7 +736,7 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
char *virt;
struct page **pages;
struct tlbe_priv *privs[2] = {};
- u64 *g2h_bitmap = NULL;
+ u64 *g2h_bitmap;
size_t array_len;
u32 sets;
int num_pages, ret, i;
@@ -779,41 +772,44 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
num_pages = DIV_ROUND_UP(cfg->array + array_len - 1, PAGE_SIZE) -
cfg->array / PAGE_SIZE;
- pages = kmalloc(sizeof(struct page *) * num_pages, GFP_KERNEL);
+ pages = kmalloc_array(num_pages, sizeof(*pages), GFP_KERNEL);
if (!pages)
return -ENOMEM;
- ret = get_user_pages_fast(cfg->array, num_pages, 1, pages);
+ ret = get_user_pages_fast(cfg->array, num_pages, FOLL_WRITE, pages);
if (ret < 0)
- goto err_pages;
+ goto free_pages;
if (ret != num_pages) {
num_pages = ret;
ret = -EFAULT;
- goto err_put_page;
+ goto put_pages;
}
virt = vmap(pages, num_pages, VM_MAP, PAGE_KERNEL);
if (!virt) {
ret = -ENOMEM;
- goto err_put_page;
+ goto put_pages;
}
- privs[0] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[0],
- GFP_KERNEL);
- privs[1] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[1],
- GFP_KERNEL);
+ privs[0] = kcalloc(params.tlb_sizes[0], sizeof(*privs[0]), GFP_KERNEL);
+ if (!privs[0]) {
+ ret = -ENOMEM;
+ goto put_pages;
+ }
- if (!privs[0] || !privs[1]) {
+ privs[1] = kcalloc(params.tlb_sizes[1], sizeof(*privs[1]), GFP_KERNEL);
+ if (!privs[1]) {
ret = -ENOMEM;
- goto err_privs;
+ goto free_privs_first;
}
- g2h_bitmap = kzalloc(sizeof(u64) * params.tlb_sizes[1],
- GFP_KERNEL);
+ g2h_bitmap = kcalloc(params.tlb_sizes[1],
+ sizeof(*g2h_bitmap),
+ GFP_KERNEL);
if (!g2h_bitmap) {
ret = -ENOMEM;
- goto err_privs;
+ goto free_privs_second;
}
free_gtlb(vcpu_e500);
@@ -845,16 +841,14 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
kvmppc_recalc_tlb1map_range(vcpu_e500);
return 0;
-
-err_privs:
- kfree(privs[0]);
+ free_privs_second:
kfree(privs[1]);
-
-err_put_page:
+ free_privs_first:
+ kfree(privs[0]);
+ put_pages:
for (i = 0; i < num_pages; i++)
put_page(pages[i]);
-
-err_pages:
+ free_pages:
kfree(pages);
return ret;
}
@@ -904,11 +898,9 @@ static int vcpu_mmu_init(struct kvm_vcpu *vcpu,
int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
{
struct kvm_vcpu *vcpu = &vcpu_e500->vcpu;
- int entry_size = sizeof(struct kvm_book3e_206_tlb_entry);
- int entries = KVM_E500_TLB0_SIZE + KVM_E500_TLB1_SIZE;
if (e500_mmu_host_init(vcpu_e500))
- goto err;
+ goto free_vcpu;
vcpu_e500->gtlb_params[0].entries = KVM_E500_TLB0_SIZE;
vcpu_e500->gtlb_params[1].entries = KVM_E500_TLB1_SIZE;
@@ -920,37 +912,39 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
vcpu_e500->gtlb_params[1].ways = KVM_E500_TLB1_SIZE;
vcpu_e500->gtlb_params[1].sets = 1;
- vcpu_e500->gtlb_arch = kmalloc(entries * entry_size, GFP_KERNEL);
+ vcpu_e500->gtlb_arch = kmalloc_array(KVM_E500_TLB0_SIZE +
+ KVM_E500_TLB1_SIZE,
+ sizeof(*vcpu_e500->gtlb_arch),
+ GFP_KERNEL);
if (!vcpu_e500->gtlb_arch)
return -ENOMEM;
vcpu_e500->gtlb_offset[0] = 0;
vcpu_e500->gtlb_offset[1] = KVM_E500_TLB0_SIZE;
- vcpu_e500->gtlb_priv[0] = kzalloc(sizeof(struct tlbe_ref) *
- vcpu_e500->gtlb_params[0].entries,
+ vcpu_e500->gtlb_priv[0] = kcalloc(vcpu_e500->gtlb_params[0].entries,
+ sizeof(struct tlbe_ref),
GFP_KERNEL);
if (!vcpu_e500->gtlb_priv[0])
- goto err;
+ goto free_vcpu;
- vcpu_e500->gtlb_priv[1] = kzalloc(sizeof(struct tlbe_ref) *
- vcpu_e500->gtlb_params[1].entries,
+ vcpu_e500->gtlb_priv[1] = kcalloc(vcpu_e500->gtlb_params[1].entries,
+ sizeof(struct tlbe_ref),
GFP_KERNEL);
if (!vcpu_e500->gtlb_priv[1])
- goto err;
+ goto free_vcpu;
- vcpu_e500->g2h_tlb1_map = kzalloc(sizeof(u64) *
- vcpu_e500->gtlb_params[1].entries,
+ vcpu_e500->g2h_tlb1_map = kcalloc(vcpu_e500->gtlb_params[1].entries,
+ sizeof(*vcpu_e500->g2h_tlb1_map),
GFP_KERNEL);
if (!vcpu_e500->g2h_tlb1_map)
- goto err;
+ goto free_vcpu;
vcpu_mmu_init(vcpu, vcpu_e500->gtlb_params);
kvmppc_recalc_tlb1map_range(vcpu_e500);
return 0;
-
-err:
+ free_vcpu:
free_gtlb(vcpu_e500);
return -1;
}
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index 08f14bb57897..06caf8bbbe2b 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2008-2013 Freescale Semiconductor, Inc. All rights reserved.
*
@@ -10,10 +11,6 @@
* Description:
* This file is based on arch/powerpc/kvm/44x_tlb.c,
* by Hollis Blanchard <hollisb@us.ibm.com>.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
*/
#include <linux/kernel.h>
@@ -25,11 +22,12 @@
#include <linux/highmem.h>
#include <linux/log2.h>
#include <linux/uaccess.h>
-#include <linux/sched.h>
+#include <linux/sched/mm.h>
#include <linux/rwsem.h>
#include <linux/vmalloc.h>
#include <linux/hugetlb.h>
#include <asm/kvm_ppc.h>
+#include <asm/pte-walk.h>
#include "e500.h"
#include "timing.h"
@@ -47,11 +45,14 @@ static inline unsigned int tlb1_max_shadow_size(void)
return host_tlb_params[1].entries - tlbcam_index - 1;
}
-static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
+static inline u32 e500_shadow_mas3_attrib(u32 mas3, bool writable, int usermode)
{
/* Mask off reserved bits. */
mas3 &= MAS3_ATTRIB_MASK;
+ if (!writable)
+ mas3 &= ~(MAS3_UW|MAS3_SW);
+
#ifndef CONFIG_KVM_BOOKE_HV
if (!usermode) {
/* Guest is in supervisor mode,
@@ -69,7 +70,8 @@ static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode)
* writing shadow tlb entry to host TLB
*/
static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe,
- uint32_t mas0)
+ uint32_t mas0,
+ uint32_t lpid)
{
unsigned long flags;
@@ -80,7 +82,7 @@ static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe,
mtspr(SPRN_MAS3, (u32)stlbe->mas7_3);
mtspr(SPRN_MAS7, (u32)(stlbe->mas7_3 >> 32));
#ifdef CONFIG_KVM_BOOKE_HV
- mtspr(SPRN_MAS8, stlbe->mas8);
+ mtspr(SPRN_MAS8, MAS8_TGS | get_thread_specific_lpid(lpid));
#endif
asm volatile("isync; tlbwe" : : : "memory");
@@ -129,11 +131,12 @@ static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500,
if (tlbsel == 0) {
mas0 = get_host_mas0(stlbe->mas2);
- __write_host_tlbe(stlbe, mas0);
+ __write_host_tlbe(stlbe, mas0, vcpu_e500->vcpu.kvm->arch.lpid);
} else {
__write_host_tlbe(stlbe,
MAS0_TLBSEL(1) |
- MAS0_ESEL(to_htlb1_esel(sesel)));
+ MAS0_ESEL(to_htlb1_esel(sesel)),
+ vcpu_e500->vcpu.kvm->arch.lpid);
}
}
@@ -161,9 +164,9 @@ void kvmppc_map_magic(struct kvm_vcpu *vcpu)
struct kvm_book3e_206_tlb_entry magic;
ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK;
unsigned int stid;
- pfn_t pfn;
+ kvm_pfn_t pfn;
- pfn = (pfn_t)virt_to_phys((void *)shared_page) >> PAGE_SHIFT;
+ pfn = (kvm_pfn_t)virt_to_phys((void *)shared_page) >> PAGE_SHIFT;
get_page(pfn_to_page(pfn));
preempt_disable();
@@ -176,7 +179,7 @@ void kvmppc_map_magic(struct kvm_vcpu *vcpu)
MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR;
magic.mas8 = 0;
- __write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index));
+ __write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index), 0);
preempt_enable();
}
#endif
@@ -244,19 +247,16 @@ static inline int tlbe_is_writable(struct kvm_book3e_206_tlb_entry *tlbe)
static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref,
struct kvm_book3e_206_tlb_entry *gtlbe,
- pfn_t pfn, unsigned int wimg)
+ kvm_pfn_t pfn, unsigned int wimg,
+ bool writable)
{
ref->pfn = pfn;
ref->flags = E500_TLB_VALID;
+ if (writable)
+ ref->flags |= E500_TLB_WRITABLE;
/* Use guest supplied MAS2_G and MAS2_E */
ref->flags |= (gtlbe->mas2 & MAS2_ATTRIB_MASK) | wimg;
-
- /* Mark the page accessed */
- kvm_set_pfn_accessed(pfn);
-
- if (tlbe_is_writable(gtlbe))
- kvm_set_pfn_dirty(pfn);
}
static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref)
@@ -307,8 +307,9 @@ static void kvmppc_e500_setup_stlbe(
int tsize, struct tlbe_ref *ref, u64 gvaddr,
struct kvm_book3e_206_tlb_entry *stlbe)
{
- pfn_t pfn = ref->pfn;
+ kvm_pfn_t pfn = ref->pfn;
u32 pr = vcpu->arch.shared->msr & MSR_PR;
+ bool writable = !!(ref->flags & E500_TLB_WRITABLE);
BUG_ON(!(ref->flags & E500_TLB_VALID));
@@ -316,11 +317,7 @@ static void kvmppc_e500_setup_stlbe(
stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID;
stlbe->mas2 = (gvaddr & MAS2_EPN) | (ref->flags & E500_TLB_MAS2_ATTR);
stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) |
- e500_shadow_mas3_attrib(gtlbe->mas7_3, pr);
-
-#ifdef CONFIG_KVM_BOOKE_HV
- stlbe->mas8 = MAS8_TGS | vcpu->kvm->arch.lpid;
-#endif
+ e500_shadow_mas3_attrib(gtlbe->mas7_3, writable, pr);
}
static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
@@ -329,20 +326,22 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
struct tlbe_ref *ref)
{
struct kvm_memory_slot *slot;
- unsigned long pfn = 0; /* silence GCC warning */
+ unsigned int psize;
+ unsigned long pfn;
+ struct page *page = NULL;
unsigned long hva;
- int pfnmap = 0;
int tsize = BOOK3E_PAGESZ_4K;
int ret = 0;
unsigned long mmu_seq;
struct kvm *kvm = vcpu_e500->vcpu.kvm;
- unsigned long tsize_pages = 0;
pte_t *ptep;
unsigned int wimg = 0;
pgd_t *pgdir;
+ unsigned long flags;
+ bool writable = false;
/* used to check for invalidations in progress */
- mmu_seq = kvm->mmu_notifier_seq;
+ mmu_seq = kvm->mmu_invalidate_seq;
smp_rmb();
/*
@@ -356,144 +355,117 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
slot = gfn_to_memslot(vcpu_e500->vcpu.kvm, gfn);
hva = gfn_to_hva_memslot(slot, gfn);
- if (tlbsel == 1) {
- struct vm_area_struct *vma;
- down_read(&current->mm->mmap_sem);
-
- vma = find_vma(current->mm, hva);
- if (vma && hva >= vma->vm_start &&
- (vma->vm_flags & VM_PFNMAP)) {
- /*
- * This VMA is a physically contiguous region (e.g.
- * /dev/mem) that bypasses normal Linux page
- * management. Find the overlap between the
- * vma and the memslot.
- */
-
- unsigned long start, end;
- unsigned long slot_start, slot_end;
-
- pfnmap = 1;
-
- start = vma->vm_pgoff;
- end = start +
- ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT);
-
- pfn = start + ((hva - vma->vm_start) >> PAGE_SHIFT);
-
- slot_start = pfn - (gfn - slot->base_gfn);
- slot_end = slot_start + slot->npages;
-
- if (start < slot_start)
- start = slot_start;
- if (end > slot_end)
- end = slot_end;
-
- tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >>
- MAS1_TSIZE_SHIFT;
-
- /*
- * e500 doesn't implement the lowest tsize bit,
- * or 1K pages.
- */
- tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1);
-
- /*
- * Now find the largest tsize (up to what the guest
- * requested) that will cover gfn, stay within the
- * range, and for which gfn and pfn are mutually
- * aligned.
- */
-
- for (; tsize > BOOK3E_PAGESZ_4K; tsize -= 2) {
- unsigned long gfn_start, gfn_end;
- tsize_pages = 1 << (tsize - 2);
-
- gfn_start = gfn & ~(tsize_pages - 1);
- gfn_end = gfn_start + tsize_pages;
-
- if (gfn_start + pfn - gfn < start)
- continue;
- if (gfn_end + pfn - gfn > end)
- continue;
- if ((gfn & (tsize_pages - 1)) !=
- (pfn & (tsize_pages - 1)))
- continue;
-
- gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1);
- pfn &= ~(tsize_pages - 1);
- break;
- }
- } else if (vma && hva >= vma->vm_start &&
- (vma->vm_flags & VM_HUGETLB)) {
- unsigned long psize = vma_kernel_pagesize(vma);
-
- tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >>
- MAS1_TSIZE_SHIFT;
-
- /*
- * Take the largest page size that satisfies both host
- * and guest mapping
- */
- tsize = min(__ilog2(psize) - 10, tsize);
-
- /*
- * e500 doesn't implement the lowest tsize bit,
- * or 1K pages.
- */
- tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1);
- }
-
- up_read(&current->mm->mmap_sem);
- }
-
- if (likely(!pfnmap)) {
- tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT);
- pfn = gfn_to_pfn_memslot(slot, gfn);
- if (is_error_noslot_pfn(pfn)) {
- if (printk_ratelimit())
- pr_err("%s: real page not found for gfn %lx\n",
- __func__, (long)gfn);
- return -EINVAL;
- }
-
- /* Align guest and physical address to page map boundaries */
- pfn &= ~(tsize_pages - 1);
- gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1);
+ pfn = __kvm_faultin_pfn(slot, gfn, FOLL_WRITE, &writable, &page);
+ if (is_error_noslot_pfn(pfn)) {
+ if (printk_ratelimit())
+ pr_err("%s: real page not found for gfn %lx\n",
+ __func__, (long)gfn);
+ return -EINVAL;
}
spin_lock(&kvm->mmu_lock);
- if (mmu_notifier_retry(kvm, mmu_seq)) {
+ if (mmu_invalidate_retry(kvm, mmu_seq)) {
ret = -EAGAIN;
goto out;
}
pgdir = vcpu_e500->vcpu.arch.pgdir;
- ptep = lookup_linux_ptep(pgdir, hva, &tsize_pages);
- if (pte_present(*ptep))
- wimg = (*ptep >> PTE_WIMGE_SHIFT) & MAS2_WIMGE_MASK;
- else {
- if (printk_ratelimit())
- pr_err("%s: pte not present: gfn %lx, pfn %lx\n",
- __func__, (long)gfn, pfn);
- ret = -EINVAL;
- goto out;
+ /*
+ * We are just looking at the wimg bits, so we don't
+ * care much about the trans splitting bit.
+ * We are holding kvm->mmu_lock so a notifier invalidate
+ * can't run hence pfn won't change.
+ */
+ local_irq_save(flags);
+ ptep = find_linux_pte(pgdir, hva, NULL, &psize);
+ if (ptep) {
+ pte_t pte = READ_ONCE(*ptep);
+
+ if (pte_present(pte)) {
+ wimg = (pte_val(pte) >> PTE_WIMGE_SHIFT) &
+ MAS2_WIMGE_MASK;
+ } else {
+ local_irq_restore(flags);
+ pr_err_ratelimited("%s: pte not present: gfn %lx,pfn %lx\n",
+ __func__, (long)gfn, pfn);
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+ local_irq_restore(flags);
+
+ if (psize && tlbsel == 1) {
+ unsigned long psize_pages, tsize_pages;
+ unsigned long start, end;
+ unsigned long slot_start, slot_end;
+
+ psize_pages = 1UL << (psize - PAGE_SHIFT);
+ start = pfn & ~(psize_pages - 1);
+ end = start + psize_pages;
+
+ slot_start = pfn - (gfn - slot->base_gfn);
+ slot_end = slot_start + slot->npages;
+
+ if (start < slot_start)
+ start = slot_start;
+ if (end > slot_end)
+ end = slot_end;
+
+ tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >>
+ MAS1_TSIZE_SHIFT;
+
+ /*
+ * Any page size that doesn't satisfy the host mapping
+ * will fail the start and end tests.
+ */
+ tsize = min(psize - PAGE_SHIFT + BOOK3E_PAGESZ_4K, tsize);
+
+ /*
+ * e500 doesn't implement the lowest tsize bit,
+ * or 1K pages.
+ */
+ tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1);
+
+ /*
+ * Now find the largest tsize (up to what the guest
+ * requested) that will cover gfn, stay within the
+ * range, and for which gfn and pfn are mutually
+ * aligned.
+ */
+
+ for (; tsize > BOOK3E_PAGESZ_4K; tsize -= 2) {
+ unsigned long gfn_start, gfn_end;
+ tsize_pages = 1UL << (tsize - 2);
+
+ gfn_start = gfn & ~(tsize_pages - 1);
+ gfn_end = gfn_start + tsize_pages;
+
+ if (gfn_start + pfn - gfn < start)
+ continue;
+ if (gfn_end + pfn - gfn > end)
+ continue;
+ if ((gfn & (tsize_pages - 1)) !=
+ (pfn & (tsize_pages - 1)))
+ continue;
+
+ gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1);
+ pfn &= ~(tsize_pages - 1);
+ break;
+ }
}
- kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg);
+ kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg, writable);
kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize,
ref, gvaddr, stlbe);
+ writable = tlbe_is_writable(stlbe);
/* Clear i-cache for new pages */
kvmppc_mmu_flush_icache(pfn);
out:
+ kvm_release_faultin_page(kvm, page, !!ret, writable);
spin_unlock(&kvm->mmu_lock);
-
- /* Drop refcount on page, so that mmu notifiers can clear it */
- kvm_release_pfn_clean(pfn);
-
return ret;
}
@@ -612,8 +584,8 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
}
#ifdef CONFIG_KVM_BOOKE_HV
-int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type,
- u32 *instr)
+int kvmppc_load_last_inst(struct kvm_vcpu *vcpu,
+ enum instruction_fetch_type type, unsigned long *instr)
{
gva_t geaddr;
hpa_t addr;
@@ -633,7 +605,7 @@ int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type,
local_irq_save(flags);
mtspr(SPRN_MAS6, (vcpu->arch.pid << MAS6_SPID_SHIFT) | addr_space);
- mtspr(SPRN_MAS5, MAS5_SGS | vcpu->kvm->arch.lpid);
+ mtspr(SPRN_MAS5, MAS5_SGS | get_lpid(vcpu));
asm volatile("tlbsx 0, %[geaddr]\n" : :
[geaddr] "r" (geaddr));
mtspr(SPRN_MAS5, 0);
@@ -663,7 +635,7 @@ int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type,
if (unlikely((pr && !(mas3 & MAS3_UX)) ||
(!pr && !(mas3 & MAS3_SX)))) {
pr_err_ratelimited(
- "%s: Instuction emulation from guest addres %08lx without execute permission\n",
+ "%s: Instruction emulation from guest address %08lx without execute permission\n",
__func__, geaddr);
return EMULATE_AGAIN;
}
@@ -675,7 +647,7 @@ int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type,
if (has_feature(vcpu, VCPU_FTR_MMU_V2) &&
unlikely((mas2 & MAS2_I) || (mas2 & MAS2_W) || !(mas2 & MAS2_M))) {
pr_err_ratelimited(
- "%s: Instuction emulation from guest addres %08lx mismatches storage attributes\n",
+ "%s: Instruction emulation from guest address %08lx mismatches storage attributes\n",
__func__, geaddr);
return EMULATE_AGAIN;
}
@@ -688,7 +660,7 @@ int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type,
/* Guard against emulation from devices area */
if (unlikely(!page_is_ram(pfn))) {
- pr_err_ratelimited("%s: Instruction emulation from non-RAM host addres %08llx is not supported\n",
+ pr_err_ratelimited("%s: Instruction emulation from non-RAM host address %08llx is not supported\n",
__func__, addr);
return EMULATE_AGAIN;
}
@@ -702,8 +674,8 @@ int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type,
return EMULATE_DONE;
}
#else
-int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type,
- u32 *instr)
+int kvmppc_load_last_inst(struct kvm_vcpu *vcpu,
+ enum instruction_fetch_type type, unsigned long *instr)
{
return EMULATE_AGAIN;
}
@@ -711,43 +683,30 @@ int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type,
/************* MMU Notifiers *************/
-int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
+static bool kvm_e500_mmu_unmap_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
- trace_kvm_unmap_hva(hva);
-
/*
* Flush all shadow tlb entries everywhere. This is slow, but
* we are 100% sure that we catch the to be unmapped page
*/
- kvm_flush_remote_tlbs(kvm);
-
- return 0;
+ return true;
}
-int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
{
- /* kvm_unmap_hva flushes everything anyways */
- kvm_unmap_hva(kvm, start);
-
- return 0;
+ return kvm_e500_mmu_unmap_gfn(kvm, range);
}
-int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
/* XXX could be more clever ;) */
- return 0;
+ return false;
}
-int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
+bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
/* XXX could be more clever ;) */
- return 0;
-}
-
-void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
-{
- /* The page will get remapped properly on its next fault */
- kvm_unmap_hva(kvm, hva);
+ return false;
}
/*****************************************/
@@ -785,9 +744,8 @@ int e500_mmu_host_init(struct kvmppc_vcpu_e500 *vcpu_e500)
host_tlb_params[0].sets =
host_tlb_params[0].entries / host_tlb_params[0].ways;
host_tlb_params[1].sets = 1;
-
- vcpu_e500->h2g_tlb1_rmap = kzalloc(sizeof(unsigned int) *
- host_tlb_params[1].entries,
+ vcpu_e500->h2g_tlb1_rmap = kcalloc(host_tlb_params[1].entries,
+ sizeof(*vcpu_e500->h2g_tlb1_rmap),
GFP_KERNEL);
if (!vcpu_e500->h2g_tlb1_rmap)
return -EINVAL;
diff --git a/arch/powerpc/kvm/e500_mmu_host.h b/arch/powerpc/kvm/e500_mmu_host.h
index 7624835b76c7..d8178cc86b30 100644
--- a/arch/powerpc/kvm/e500_mmu_host.h
+++ b/arch/powerpc/kvm/e500_mmu_host.h
@@ -1,9 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2008-2013 Freescale Semiconductor, Inc. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
*/
#ifndef KVM_E500_MMU_HOST_H
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
index 164bad2a19bf..e476e107a932 100644
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2010,2012 Freescale Semiconductor, Inc. All rights reserved.
*
@@ -6,10 +7,6 @@
* Description:
* This file is derived from arch/powerpc/kvm/e500.c,
* by Yu Liu <yu.liu@freescale.com>.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
*/
#include <linux/kvm_host.h>
@@ -21,9 +18,9 @@
#include <asm/reg.h>
#include <asm/cputable.h>
-#include <asm/tlbflush.h>
#include <asm/kvm_ppc.h>
#include <asm/dbell.h>
+#include <asm/ppc-opcode.h>
#include "booke.h"
#include "e500.h"
@@ -48,10 +45,11 @@ void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type)
return;
}
-
- tag = PPC_DBELL_LPID(vcpu->kvm->arch.lpid) | vcpu->vcpu_id;
+ preempt_disable();
+ tag = PPC_DBELL_LPID(get_lpid(vcpu)) | vcpu->vcpu_id;
mb();
ppc_msgsnd(dbell_type, 0, tag);
+ preempt_enable();
}
/* gtlbe must not be mapped by more than one host tlb entry */
@@ -60,12 +58,11 @@ void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500,
{
unsigned int tid, ts;
gva_t eaddr;
- u32 val, lpid;
+ u32 val;
unsigned long flags;
ts = get_tlb_ts(gtlbe);
tid = get_tlb_tid(gtlbe);
- lpid = vcpu_e500->vcpu.kvm->arch.lpid;
/* We search the host TLB to invalidate its shadow TLB entry */
val = (tid << 16) | ts;
@@ -74,7 +71,7 @@ void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500,
local_irq_save(flags);
mtspr(SPRN_MAS6, val);
- mtspr(SPRN_MAS5, MAS5_SGS | lpid);
+ mtspr(SPRN_MAS5, MAS5_SGS | get_lpid(&vcpu_e500->vcpu));
asm volatile("tlbsx 0, %[eaddr]\n" : : [eaddr] "r" (eaddr));
val = mfspr(SPRN_MAS1);
@@ -95,8 +92,12 @@ void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500)
unsigned long flags;
local_irq_save(flags);
- mtspr(SPRN_MAS5, MAS5_SGS | vcpu_e500->vcpu.kvm->arch.lpid);
- asm volatile("tlbilxlpid");
+ mtspr(SPRN_MAS5, MAS5_SGS | get_lpid(&vcpu_e500->vcpu));
+ /*
+ * clang-17 and older could not assemble tlbilxlpid.
+ * https://github.com/ClangBuiltLinux/linux/issues/1891
+ */
+ asm volatile (PPC_TLBILX_LPID);
mtspr(SPRN_MAS5, 0);
local_irq_restore(flags);
}
@@ -110,6 +111,7 @@ void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr)
{
}
+/* We use two lpids per VM */
static DEFINE_PER_CPU(struct kvm_vcpu *[KVMPPC_NR_LPIDS], last_vcpu_of_lpid);
static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu)
@@ -118,10 +120,12 @@ static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu)
kvmppc_booke_vcpu_load(vcpu, cpu);
- mtspr(SPRN_LPID, vcpu->kvm->arch.lpid);
+ mtspr(SPRN_LPID, get_lpid(vcpu));
mtspr(SPRN_EPCR, vcpu->arch.shadow_epcr);
mtspr(SPRN_GPIR, vcpu->vcpu_id);
mtspr(SPRN_MSRP, vcpu->arch.shadow_msrp);
+ vcpu->arch.eplc = EPC_EGS | (get_lpid(vcpu) << EPC_ELPID_SHIFT);
+ vcpu->arch.epsc = vcpu->arch.eplc;
mtspr(SPRN_EPLC, vcpu->arch.eplc);
mtspr(SPRN_EPSC, vcpu->arch.epsc);
@@ -141,12 +145,10 @@ static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu)
mtspr(SPRN_GESR, vcpu->arch.shared->esr);
if (vcpu->arch.oldpir != mfspr(SPRN_PIR) ||
- __get_cpu_var(last_vcpu_of_lpid)[vcpu->kvm->arch.lpid] != vcpu) {
+ __this_cpu_read(last_vcpu_of_lpid[get_lpid(vcpu)]) != vcpu) {
kvmppc_e500_tlbil_all(vcpu_e500);
- __get_cpu_var(last_vcpu_of_lpid)[vcpu->kvm->arch.lpid] = vcpu;
+ __this_cpu_write(last_vcpu_of_lpid[get_lpid(vcpu)], vcpu);
}
-
- kvmppc_load_guest_fp(vcpu);
}
static void kvmppc_core_vcpu_put_e500mc(struct kvm_vcpu *vcpu)
@@ -171,7 +173,7 @@ static void kvmppc_core_vcpu_put_e500mc(struct kvm_vcpu *vcpu)
kvmppc_booke_vcpu_put(vcpu);
}
-int kvmppc_core_check_processor_compat(void)
+static int kvmppc_e500mc_check_processor_compat(void)
{
int r;
@@ -179,6 +181,16 @@ int kvmppc_core_check_processor_compat(void)
r = 0;
else if (strcmp(cur_cpu_spec->cpu_name, "e5500") == 0)
r = 0;
+#ifdef CONFIG_ALTIVEC
+ /*
+ * Since guests have the privilege to enable AltiVec, we need AltiVec
+ * support in the host to save/restore their context.
+ * Don't use CPU_FTR_ALTIVEC to identify cores with AltiVec unit
+ * because it's cleared in the absence of CONFIG_ALTIVEC!
+ */
+ else if (strcmp(cur_cpu_spec->cpu_name, "e6500") == 0)
+ r = 0;
+#endif
else
r = -ENOTSUPP;
@@ -194,9 +206,7 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
#ifdef CONFIG_64BIT
vcpu->arch.shadow_epcr |= SPRN_EPCR_ICM;
#endif
- vcpu->arch.shadow_msrp = MSRP_UCLEP | MSRP_DEP | MSRP_PMMP;
- vcpu->arch.eplc = EPC_EGS | (vcpu->kvm->arch.lpid << EPC_ELPID_SHIFT);
- vcpu->arch.epsc = vcpu->arch.eplc;
+ vcpu->arch.shadow_msrp = MSRP_UCLEP | MSRP_PMMP;
vcpu->arch.pvr = mfspr(SPRN_PVR);
vcpu_e500->svr = mfspr(SPRN_SVR);
@@ -296,46 +306,32 @@ static int kvmppc_set_one_reg_e500mc(struct kvm_vcpu *vcpu, u64 id,
return r;
}
-static struct kvm_vcpu *kvmppc_core_vcpu_create_e500mc(struct kvm *kvm,
- unsigned int id)
+static int kvmppc_core_vcpu_create_e500mc(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_e500 *vcpu_e500;
- struct kvm_vcpu *vcpu;
int err;
- vcpu_e500 = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
- if (!vcpu_e500) {
- err = -ENOMEM;
- goto out;
- }
- vcpu = &vcpu_e500->vcpu;
+ BUILD_BUG_ON(offsetof(struct kvmppc_vcpu_e500, vcpu) != 0);
+ vcpu_e500 = to_e500(vcpu);
- /* Invalid PIR value -- this LPID dosn't have valid state on any cpu */
+ /* Invalid PIR value -- this LPID doesn't have valid state on any cpu */
vcpu->arch.oldpir = 0xffffffff;
- err = kvm_vcpu_init(vcpu, kvm, id);
- if (err)
- goto free_vcpu;
-
err = kvmppc_e500_tlb_init(vcpu_e500);
if (err)
- goto uninit_vcpu;
+ return err;
vcpu->arch.shared = (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
- if (!vcpu->arch.shared)
+ if (!vcpu->arch.shared) {
+ err = -ENOMEM;
goto uninit_tlb;
+ }
- return vcpu;
+ return 0;
uninit_tlb:
kvmppc_e500_tlb_uninit(vcpu_e500);
-uninit_vcpu:
- kvm_vcpu_uninit(vcpu);
-
-free_vcpu:
- kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
-out:
- return ERR_PTR(err);
+ return err;
}
static void kvmppc_core_vcpu_free_e500mc(struct kvm_vcpu *vcpu)
@@ -344,8 +340,6 @@ static void kvmppc_core_vcpu_free_e500mc(struct kvm_vcpu *vcpu)
free_page((unsigned long)vcpu->arch.shared);
kvmppc_e500_tlb_uninit(vcpu_e500);
- kvm_vcpu_uninit(vcpu);
- kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
}
static int kvmppc_core_init_vm_e500mc(struct kvm *kvm)
@@ -356,13 +350,26 @@ static int kvmppc_core_init_vm_e500mc(struct kvm *kvm)
if (lpid < 0)
return lpid;
+ /*
+ * Use two lpids per VM on cores with two threads like e6500. Use
+ * even numbers to speedup vcpu lpid computation with consecutive lpids
+ * per VM. vm1 will use lpids 2 and 3, vm2 lpids 4 and 5, and so on.
+ */
+ if (threads_per_core == 2)
+ lpid <<= 1;
+
kvm->arch.lpid = lpid;
return 0;
}
static void kvmppc_core_destroy_vm_e500mc(struct kvm *kvm)
{
- kvmppc_free_lpid(kvm->arch.lpid);
+ int lpid = kvm->arch.lpid;
+
+ if (threads_per_core == 2)
+ lpid >>= 1;
+
+ kvmppc_free_lpid(lpid);
}
static struct kvmppc_ops kvm_ops_e500mc = {
@@ -374,26 +381,35 @@ static struct kvmppc_ops kvm_ops_e500mc = {
.vcpu_put = kvmppc_core_vcpu_put_e500mc,
.vcpu_create = kvmppc_core_vcpu_create_e500mc,
.vcpu_free = kvmppc_core_vcpu_free_e500mc,
- .mmu_destroy = kvmppc_mmu_destroy_e500,
.init_vm = kvmppc_core_init_vm_e500mc,
.destroy_vm = kvmppc_core_destroy_vm_e500mc,
.emulate_op = kvmppc_core_emulate_op_e500,
.emulate_mtspr = kvmppc_core_emulate_mtspr_e500,
.emulate_mfspr = kvmppc_core_emulate_mfspr_e500,
+ .create_vcpu_debugfs = kvmppc_create_vcpu_debugfs_e500,
};
static int __init kvmppc_e500mc_init(void)
{
int r;
+ r = kvmppc_e500mc_check_processor_compat();
+ if (r)
+ goto err_out;
+
r = kvmppc_booke_init();
if (r)
goto err_out;
- kvmppc_init_lpid(64);
- kvmppc_claim_lpid(0); /* host */
+ /*
+ * Use two lpids per VM on dual threaded processors like e6500
+ * to workarround the lack of tlb write conditional instruction.
+ * Expose half the number of available hardware lpids to the lpid
+ * allocator.
+ */
+ kvmppc_init_lpid(KVMPPC_NR_LPIDS/threads_per_core);
- r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE);
+ r = kvm_init(sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE);
if (r)
goto err_out;
kvm_ops_e500mc.owner = THIS_MODULE;
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index e96b50d0bdab..355d5206e8aa 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -1,16 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright IBM Corp. 2007
* Copyright 2011 Freescale Semiconductor, Inc.
@@ -39,18 +28,12 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
unsigned long dec_nsec;
unsigned long long dec_time;
- pr_debug("mtDEC: %x\n", vcpu->arch.dec);
+ pr_debug("mtDEC: %lx\n", vcpu->arch.dec);
hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
#ifdef CONFIG_PPC_BOOK3S
/* mtdec lowers the interrupt line when positive. */
kvmppc_core_dequeue_dec(vcpu);
-
- /* POWER4+ triggers a dec interrupt if the value is < 0 */
- if (vcpu->arch.dec & 0x80000000) {
- kvmppc_core_queue_dec(vcpu);
- return;
- }
#endif
#ifdef CONFIG_BOOKE
@@ -67,11 +50,10 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
dec_time = vcpu->arch.dec;
/*
- * Guest timebase ticks at the same frequency as host decrementer.
- * So use the host decrementer calculations for decrementer emulation.
+ * Guest timebase ticks at the same frequency as host timebase.
+ * So use the host timebase calculations for decrementer emulation.
*/
- dec_time = dec_time << decrementer_clockevent.shift;
- do_div(dec_time, decrementer_clockevent.mult);
+ dec_time = tb_to_ns(dec_time);
dec_nsec = do_div(dec_time, NSEC_PER_SEC);
hrtimer_start(&vcpu->arch.dec_timer,
ktime_set(dec_time, dec_nsec), HRTIMER_MODE_REL);
@@ -109,7 +91,7 @@ static int kvmppc_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
case SPRN_TBWU: break;
case SPRN_DEC:
- vcpu->arch.dec = spr_val;
+ vcpu->arch.dec = (u32) spr_val;
kvmppc_emulate_dec(vcpu);
break;
@@ -209,9 +191,10 @@ static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
/* XXX Should probably auto-generate instruction decoding for a particular core
* from opcode tables in the future. */
-int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
+int kvmppc_emulate_instruction(struct kvm_vcpu *vcpu)
{
u32 inst;
+ ppc_inst_t pinst;
int rs, rt, sprn;
enum emulation_result emulated;
int advance = 1;
@@ -219,7 +202,8 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
/* this default type might be overwritten by subcategories */
kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS);
- emulated = kvmppc_get_last_inst(vcpu, false, &inst);
+ emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &pinst);
+ inst = ppc_inst_val(pinst);
if (emulated != EMULATE_DONE)
return emulated;
@@ -259,10 +243,18 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
case OP_31_XOP_MFSPR:
emulated = kvmppc_emulate_mfspr(vcpu, sprn, rt);
+ if (emulated == EMULATE_AGAIN) {
+ emulated = EMULATE_DONE;
+ advance = 0;
+ }
break;
case OP_31_XOP_MTSPR:
emulated = kvmppc_emulate_mtspr(vcpu, sprn, rs);
+ if (emulated == EMULATE_AGAIN) {
+ emulated = EMULATE_DONE;
+ advance = 0;
+ }
break;
case OP_31_XOP_TLBSYNC:
@@ -274,12 +266,28 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
}
break;
+ case 0:
+ /*
+ * Instruction with primary opcode 0. Based on PowerISA
+ * these are illegal instructions.
+ */
+ if (inst == KVMPPC_INST_SW_BREAKPOINT) {
+ vcpu->run->exit_reason = KVM_EXIT_DEBUG;
+ vcpu->run->debug.arch.status = 0;
+ vcpu->run->debug.arch.address = kvmppc_get_pc(vcpu);
+ emulated = EMULATE_EXIT_USER;
+ advance = 0;
+ } else
+ emulated = EMULATE_FAIL;
+
+ break;
+
default:
emulated = EMULATE_FAIL;
}
if (emulated == EMULATE_FAIL) {
- emulated = vcpu->kvm->arch.kvm_ops->emulate_op(run, vcpu, inst,
+ emulated = vcpu->kvm->arch.kvm_ops->emulate_op(vcpu, inst,
&advance);
if (emulated == EMULATE_AGAIN) {
advance = 0;
@@ -287,13 +295,16 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
advance = 0;
printk(KERN_ERR "Couldn't emulate instruction 0x%08x "
"(op %d xop %d)\n", inst, get_op(inst), get_xop(inst));
- kvmppc_core_queue_program(vcpu, 0);
}
}
trace_kvm_ppc_instr(inst, kvmppc_get_pc(vcpu), emulated);
/* Advance past emulated instruction. */
+ /*
+ * If this ever handles prefixed instructions, the 4
+ * will need to become ppc_inst_len(pinst) instead.
+ */
if (advance)
kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4);
diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c
index 0de4ffa175a9..ec60c7979718 100644
--- a/arch/powerpc/kvm/emulate_loadstore.c
+++ b/arch/powerpc/kvm/emulate_loadstore.c
@@ -1,16 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright IBM Corp. 2007
* Copyright 2011 Freescale Semiconductor, Inc.
@@ -31,242 +20,348 @@
#include <asm/kvm_ppc.h>
#include <asm/disassemble.h>
#include <asm/ppc-opcode.h>
+#include <asm/sstep.h>
#include "timing.h"
#include "trace.h"
-/* XXX to do:
- * lhax
- * lhaux
- * lswx
- * lswi
- * stswx
- * stswi
- * lha
- * lhau
- * lmw
- * stmw
+#ifdef CONFIG_PPC_FPU
+static bool kvmppc_check_fp_disabled(struct kvm_vcpu *vcpu)
+{
+ if (!(kvmppc_get_msr(vcpu) & MSR_FP)) {
+ kvmppc_core_queue_fpunavail(vcpu, kvmppc_get_msr(vcpu) & SRR1_PREFIXED);
+ return true;
+ }
+
+ return false;
+}
+#endif /* CONFIG_PPC_FPU */
+
+#ifdef CONFIG_VSX
+static bool kvmppc_check_vsx_disabled(struct kvm_vcpu *vcpu)
+{
+ if (!(kvmppc_get_msr(vcpu) & MSR_VSX)) {
+ kvmppc_core_queue_vsx_unavail(vcpu, kvmppc_get_msr(vcpu) & SRR1_PREFIXED);
+ return true;
+ }
+
+ return false;
+}
+#endif /* CONFIG_VSX */
+
+#ifdef CONFIG_ALTIVEC
+static bool kvmppc_check_altivec_disabled(struct kvm_vcpu *vcpu)
+{
+ if (!(kvmppc_get_msr(vcpu) & MSR_VEC)) {
+ kvmppc_core_queue_vec_unavail(vcpu, kvmppc_get_msr(vcpu) & SRR1_PREFIXED);
+ return true;
+ }
+
+ return false;
+}
+#endif /* CONFIG_ALTIVEC */
+
+/*
+ * XXX to do:
+ * lfiwax, lfiwzx
+ * vector loads and stores
*
+ * Instructions that trap when used on cache-inhibited mappings
+ * are not emulated here: multiple and string instructions,
+ * lq/stq, and the load-reserve/store-conditional instructions.
*/
int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
{
- struct kvm_run *run = vcpu->run;
- u32 inst;
- int ra, rs, rt;
- enum emulation_result emulated;
- int advance = 1;
+ ppc_inst_t inst;
+ enum emulation_result emulated = EMULATE_FAIL;
+ struct instruction_op op;
/* this default type might be overwritten by subcategories */
kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS);
- emulated = kvmppc_get_last_inst(vcpu, false, &inst);
+ emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst);
if (emulated != EMULATE_DONE)
return emulated;
- ra = get_ra(inst);
- rs = get_rs(inst);
- rt = get_rt(inst);
+ vcpu->arch.mmio_vsx_copy_nums = 0;
+ vcpu->arch.mmio_vsx_offset = 0;
+ vcpu->arch.mmio_copy_type = KVMPPC_VSX_COPY_NONE;
+ vcpu->arch.mmio_sp64_extend = 0;
+ vcpu->arch.mmio_sign_extend = 0;
+ vcpu->arch.mmio_vmx_copy_nums = 0;
+ vcpu->arch.mmio_vmx_offset = 0;
+ vcpu->arch.mmio_host_swabbed = 0;
+
+ emulated = EMULATE_FAIL;
+ vcpu->arch.regs.msr = kvmppc_get_msr(vcpu);
+ if (analyse_instr(&op, &vcpu->arch.regs, inst) == 0) {
+ int type = op.type & INSTR_TYPE_MASK;
+ int size = GETSIZE(op.type);
+
+ vcpu->mmio_is_write = OP_IS_STORE(type);
+
+ switch (type) {
+ case LOAD: {
+ int instr_byte_swap = op.type & BYTEREV;
+
+ if (op.type & SIGNEXT)
+ emulated = kvmppc_handle_loads(vcpu,
+ op.reg, size, !instr_byte_swap);
+ else
+ emulated = kvmppc_handle_load(vcpu,
+ op.reg, size, !instr_byte_swap);
+
+ if ((op.type & UPDATE) && (emulated != EMULATE_FAIL))
+ kvmppc_set_gpr(vcpu, op.update_reg, vcpu->arch.vaddr_accessed);
- switch (get_op(inst)) {
- case 31:
- switch (get_xop(inst)) {
- case OP_31_XOP_LWZX:
- emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
break;
+ }
+#ifdef CONFIG_PPC_FPU
+ case LOAD_FP:
+ if (kvmppc_check_fp_disabled(vcpu))
+ return EMULATE_DONE;
- case OP_31_XOP_LBZX:
- emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
- break;
+ if (op.type & FPCONV)
+ vcpu->arch.mmio_sp64_extend = 1;
- case OP_31_XOP_LBZUX:
- emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
- kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
- break;
+ if (op.type & SIGNEXT)
+ emulated = kvmppc_handle_loads(vcpu,
+ KVM_MMIO_REG_FPR|op.reg, size, 1);
+ else
+ emulated = kvmppc_handle_load(vcpu,
+ KVM_MMIO_REG_FPR|op.reg, size, 1);
- case OP_31_XOP_STWX:
- emulated = kvmppc_handle_store(run, vcpu,
- kvmppc_get_gpr(vcpu, rs),
- 4, 1);
- break;
+ if ((op.type & UPDATE) && (emulated != EMULATE_FAIL))
+ kvmppc_set_gpr(vcpu, op.update_reg, vcpu->arch.vaddr_accessed);
- case OP_31_XOP_STBX:
- emulated = kvmppc_handle_store(run, vcpu,
- kvmppc_get_gpr(vcpu, rs),
- 1, 1);
break;
-
- case OP_31_XOP_STBUX:
- emulated = kvmppc_handle_store(run, vcpu,
- kvmppc_get_gpr(vcpu, rs),
- 1, 1);
- kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+#endif
+#ifdef CONFIG_ALTIVEC
+ case LOAD_VMX:
+ if (kvmppc_check_altivec_disabled(vcpu))
+ return EMULATE_DONE;
+
+ /* Hardware enforces alignment of VMX accesses */
+ vcpu->arch.vaddr_accessed &= ~((unsigned long)size - 1);
+ vcpu->arch.paddr_accessed &= ~((unsigned long)size - 1);
+
+ if (size == 16) { /* lvx */
+ vcpu->arch.mmio_copy_type =
+ KVMPPC_VMX_COPY_DWORD;
+ } else if (size == 4) { /* lvewx */
+ vcpu->arch.mmio_copy_type =
+ KVMPPC_VMX_COPY_WORD;
+ } else if (size == 2) { /* lvehx */
+ vcpu->arch.mmio_copy_type =
+ KVMPPC_VMX_COPY_HWORD;
+ } else if (size == 1) { /* lvebx */
+ vcpu->arch.mmio_copy_type =
+ KVMPPC_VMX_COPY_BYTE;
+ } else
+ break;
+
+ vcpu->arch.mmio_vmx_offset =
+ (vcpu->arch.vaddr_accessed & 0xf)/size;
+
+ if (size == 16) {
+ vcpu->arch.mmio_vmx_copy_nums = 2;
+ emulated = kvmppc_handle_vmx_load(vcpu,
+ KVM_MMIO_REG_VMX|op.reg,
+ 8, 1);
+ } else {
+ vcpu->arch.mmio_vmx_copy_nums = 1;
+ emulated = kvmppc_handle_vmx_load(vcpu,
+ KVM_MMIO_REG_VMX|op.reg,
+ size, 1);
+ }
break;
-
- case OP_31_XOP_LHAX:
- emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
+#endif
+#ifdef CONFIG_VSX
+ case LOAD_VSX: {
+ int io_size_each;
+
+ if (op.vsx_flags & VSX_CHECK_VEC) {
+ if (kvmppc_check_altivec_disabled(vcpu))
+ return EMULATE_DONE;
+ } else {
+ if (kvmppc_check_vsx_disabled(vcpu))
+ return EMULATE_DONE;
+ }
+
+ if (op.vsx_flags & VSX_FPCONV)
+ vcpu->arch.mmio_sp64_extend = 1;
+
+ if (op.element_size == 8) {
+ if (op.vsx_flags & VSX_SPLAT)
+ vcpu->arch.mmio_copy_type =
+ KVMPPC_VSX_COPY_DWORD_LOAD_DUMP;
+ else
+ vcpu->arch.mmio_copy_type =
+ KVMPPC_VSX_COPY_DWORD;
+ } else if (op.element_size == 4) {
+ if (op.vsx_flags & VSX_SPLAT)
+ vcpu->arch.mmio_copy_type =
+ KVMPPC_VSX_COPY_WORD_LOAD_DUMP;
+ else
+ vcpu->arch.mmio_copy_type =
+ KVMPPC_VSX_COPY_WORD;
+ } else
+ break;
+
+ if (size < op.element_size) {
+ /* precision convert case: lxsspx, etc */
+ vcpu->arch.mmio_vsx_copy_nums = 1;
+ io_size_each = size;
+ } else { /* lxvw4x, lxvd2x, etc */
+ vcpu->arch.mmio_vsx_copy_nums =
+ size/op.element_size;
+ io_size_each = op.element_size;
+ }
+
+ emulated = kvmppc_handle_vsx_load(vcpu,
+ KVM_MMIO_REG_VSX|op.reg, io_size_each,
+ 1, op.type & SIGNEXT);
break;
+ }
+#endif
+ case STORE: {
+ int instr_byte_swap = op.type & BYTEREV;
- case OP_31_XOP_LHZX:
- emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
- break;
+ emulated = kvmppc_handle_store(vcpu, kvmppc_get_gpr(vcpu, op.reg),
+ size, !instr_byte_swap);
- case OP_31_XOP_LHZUX:
- emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
- kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
- break;
+ if ((op.type & UPDATE) && (emulated != EMULATE_FAIL))
+ kvmppc_set_gpr(vcpu, op.update_reg, vcpu->arch.vaddr_accessed);
- case OP_31_XOP_STHX:
- emulated = kvmppc_handle_store(run, vcpu,
- kvmppc_get_gpr(vcpu, rs),
- 2, 1);
break;
+ }
+#ifdef CONFIG_PPC_FPU
+ case STORE_FP:
+ if (kvmppc_check_fp_disabled(vcpu))
+ return EMULATE_DONE;
- case OP_31_XOP_STHUX:
- emulated = kvmppc_handle_store(run, vcpu,
- kvmppc_get_gpr(vcpu, rs),
- 2, 1);
- kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
- break;
+ /* The FP registers need to be flushed so that
+ * kvmppc_handle_store() can read actual FP vals
+ * from vcpu->arch.
+ */
+ if (vcpu->kvm->arch.kvm_ops->giveup_ext)
+ vcpu->kvm->arch.kvm_ops->giveup_ext(vcpu,
+ MSR_FP);
- case OP_31_XOP_DCBST:
- case OP_31_XOP_DCBF:
- case OP_31_XOP_DCBI:
- /* Do nothing. The guest is performing dcbi because
- * hardware DMA is not snooped by the dcache, but
- * emulated DMA either goes through the dcache as
- * normal writes, or the host kernel has handled dcache
- * coherence. */
- break;
+ if (op.type & FPCONV)
+ vcpu->arch.mmio_sp64_extend = 1;
- case OP_31_XOP_LWBRX:
- emulated = kvmppc_handle_load(run, vcpu, rt, 4, 0);
- break;
+ emulated = kvmppc_handle_store(vcpu,
+ kvmppc_get_fpr(vcpu, op.reg), size, 1);
- case OP_31_XOP_STWBRX:
- emulated = kvmppc_handle_store(run, vcpu,
- kvmppc_get_gpr(vcpu, rs),
- 4, 0);
- break;
+ if ((op.type & UPDATE) && (emulated != EMULATE_FAIL))
+ kvmppc_set_gpr(vcpu, op.update_reg, vcpu->arch.vaddr_accessed);
- case OP_31_XOP_LHBRX:
- emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0);
break;
+#endif
+#ifdef CONFIG_ALTIVEC
+ case STORE_VMX:
+ if (kvmppc_check_altivec_disabled(vcpu))
+ return EMULATE_DONE;
+
+ /* Hardware enforces alignment of VMX accesses. */
+ vcpu->arch.vaddr_accessed &= ~((unsigned long)size - 1);
+ vcpu->arch.paddr_accessed &= ~((unsigned long)size - 1);
+
+ if (vcpu->kvm->arch.kvm_ops->giveup_ext)
+ vcpu->kvm->arch.kvm_ops->giveup_ext(vcpu,
+ MSR_VEC);
+ if (size == 16) { /* stvx */
+ vcpu->arch.mmio_copy_type =
+ KVMPPC_VMX_COPY_DWORD;
+ } else if (size == 4) { /* stvewx */
+ vcpu->arch.mmio_copy_type =
+ KVMPPC_VMX_COPY_WORD;
+ } else if (size == 2) { /* stvehx */
+ vcpu->arch.mmio_copy_type =
+ KVMPPC_VMX_COPY_HWORD;
+ } else if (size == 1) { /* stvebx */
+ vcpu->arch.mmio_copy_type =
+ KVMPPC_VMX_COPY_BYTE;
+ } else
+ break;
+
+ vcpu->arch.mmio_vmx_offset =
+ (vcpu->arch.vaddr_accessed & 0xf)/size;
+
+ if (size == 16) {
+ vcpu->arch.mmio_vmx_copy_nums = 2;
+ emulated = kvmppc_handle_vmx_store(vcpu,
+ op.reg, 8, 1);
+ } else {
+ vcpu->arch.mmio_vmx_copy_nums = 1;
+ emulated = kvmppc_handle_vmx_store(vcpu,
+ op.reg, size, 1);
+ }
- case OP_31_XOP_STHBRX:
- emulated = kvmppc_handle_store(run, vcpu,
- kvmppc_get_gpr(vcpu, rs),
- 2, 0);
break;
-
+#endif
+#ifdef CONFIG_VSX
+ case STORE_VSX: {
+ int io_size_each;
+
+ if (op.vsx_flags & VSX_CHECK_VEC) {
+ if (kvmppc_check_altivec_disabled(vcpu))
+ return EMULATE_DONE;
+ } else {
+ if (kvmppc_check_vsx_disabled(vcpu))
+ return EMULATE_DONE;
+ }
+
+ if (vcpu->kvm->arch.kvm_ops->giveup_ext)
+ vcpu->kvm->arch.kvm_ops->giveup_ext(vcpu,
+ MSR_VSX);
+
+ if (op.vsx_flags & VSX_FPCONV)
+ vcpu->arch.mmio_sp64_extend = 1;
+
+ if (op.element_size == 8)
+ vcpu->arch.mmio_copy_type =
+ KVMPPC_VSX_COPY_DWORD;
+ else if (op.element_size == 4)
+ vcpu->arch.mmio_copy_type =
+ KVMPPC_VSX_COPY_WORD;
+ else
+ break;
+
+ if (size < op.element_size) {
+ /* precise conversion case, like stxsspx */
+ vcpu->arch.mmio_vsx_copy_nums = 1;
+ io_size_each = size;
+ } else { /* stxvw4x, stxvd2x, etc */
+ vcpu->arch.mmio_vsx_copy_nums =
+ size/op.element_size;
+ io_size_each = op.element_size;
+ }
+
+ emulated = kvmppc_handle_vsx_store(vcpu,
+ op.reg, io_size_each, 1);
+ break;
+ }
+#endif
+ case CACHEOP:
+ /* Do nothing. The guest is performing dcbi because
+ * hardware DMA is not snooped by the dcache, but
+ * emulated DMA either goes through the dcache as
+ * normal writes, or the host kernel has handled dcache
+ * coherence.
+ */
+ emulated = EMULATE_DONE;
+ break;
default:
- emulated = EMULATE_FAIL;
break;
}
- break;
-
- case OP_LWZ:
- emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
- break;
-
- /* TBD: Add support for other 64 bit load variants like ldu, ldux, ldx etc. */
- case OP_LD:
- rt = get_rt(inst);
- emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1);
- break;
-
- case OP_LWZU:
- emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
- kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
- break;
-
- case OP_LBZ:
- emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
- break;
-
- case OP_LBZU:
- emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
- kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
- break;
-
- case OP_STW:
- emulated = kvmppc_handle_store(run, vcpu,
- kvmppc_get_gpr(vcpu, rs),
- 4, 1);
- break;
-
- /* TBD: Add support for other 64 bit store variants like stdu, stdux, stdx etc. */
- case OP_STD:
- rs = get_rs(inst);
- emulated = kvmppc_handle_store(run, vcpu,
- kvmppc_get_gpr(vcpu, rs),
- 8, 1);
- break;
-
- case OP_STWU:
- emulated = kvmppc_handle_store(run, vcpu,
- kvmppc_get_gpr(vcpu, rs),
- 4, 1);
- kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
- break;
-
- case OP_STB:
- emulated = kvmppc_handle_store(run, vcpu,
- kvmppc_get_gpr(vcpu, rs),
- 1, 1);
- break;
-
- case OP_STBU:
- emulated = kvmppc_handle_store(run, vcpu,
- kvmppc_get_gpr(vcpu, rs),
- 1, 1);
- kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
- break;
-
- case OP_LHZ:
- emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
- break;
-
- case OP_LHZU:
- emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
- kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
- break;
-
- case OP_LHA:
- emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
- break;
-
- case OP_LHAU:
- emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
- kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
- break;
-
- case OP_STH:
- emulated = kvmppc_handle_store(run, vcpu,
- kvmppc_get_gpr(vcpu, rs),
- 2, 1);
- break;
-
- case OP_STHU:
- emulated = kvmppc_handle_store(run, vcpu,
- kvmppc_get_gpr(vcpu, rs),
- 2, 1);
- kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
- break;
-
- default:
- emulated = EMULATE_FAIL;
- break;
- }
-
- if (emulated == EMULATE_FAIL) {
- advance = 0;
- kvmppc_core_queue_program(vcpu, 0);
}
- trace_kvm_ppc_instr(inst, kvmppc_get_pc(vcpu), emulated);
+ trace_kvm_ppc_instr(ppc_inst_val(inst), kvmppc_get_pc(vcpu), emulated);
/* Advance past emulated instruction. */
- if (advance)
- kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4);
+ if (emulated != EMULATE_FAIL)
+ kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + ppc_inst_len(inst));
return emulated;
}
diff --git a/arch/powerpc/kvm/fpu.S b/arch/powerpc/kvm/fpu.S
index bf68d597549e..b68e7f26a81f 100644
--- a/arch/powerpc/kvm/fpu.S
+++ b/arch/powerpc/kvm/fpu.S
@@ -1,19 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* FPU helper code to use FPU operations from inside the kernel
*
* Copyright (C) 2010 Alexander Graf (agraf@suse.de)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
+#include <linux/pgtable.h>
+#include <linux/linkage.h>
+
#include <asm/reg.h>
#include <asm/page.h>
#include <asm/mmu.h>
-#include <asm/pgtable.h>
#include <asm/cputable.h>
#include <asm/cache.h>
#include <asm/thread_info.h>
@@ -115,18 +112,22 @@ FPS_THREE_IN(fsel)
* R8 = (double*)&param3 [load_three]
* LR = instruction call function
*/
-fpd_load_three:
+SYM_FUNC_START_LOCAL(fpd_load_three)
lfd 2,0(r8) /* load param3 */
-fpd_load_two:
+SYM_FUNC_START_LOCAL(fpd_load_two)
lfd 1,0(r7) /* load param2 */
-fpd_load_one:
+SYM_FUNC_START_LOCAL(fpd_load_one)
lfd 0,0(r6) /* load param1 */
-fpd_load_none:
+SYM_FUNC_START_LOCAL(fpd_load_none)
lfd 3,0(r3) /* load up fpscr value */
MTFSF_L(3)
lwz r6, 0(r4) /* load cr */
mtcr r6
blr
+SYM_FUNC_END(fpd_load_none)
+SYM_FUNC_END(fpd_load_one)
+SYM_FUNC_END(fpd_load_two)
+SYM_FUNC_END(fpd_load_three)
/*
* End of double instruction processing
@@ -136,13 +137,14 @@ fpd_load_none:
* R5 = (double*)&result
* LR = caller of instruction call function
*/
-fpd_return:
+SYM_FUNC_START_LOCAL(fpd_return)
mfcr r6
stfd 0,0(r5) /* save result */
mffs 0
stfd 0,0(r3) /* save new fpscr value */
stw r6,0(r4) /* save new cr value */
blr
+SYM_FUNC_END(fpd_return)
/*
* Double operation with no input operand
diff --git a/arch/powerpc/kvm/guest-state-buffer.c b/arch/powerpc/kvm/guest-state-buffer.c
new file mode 100644
index 000000000000..871cf60ddeb6
--- /dev/null
+++ b/arch/powerpc/kvm/guest-state-buffer.c
@@ -0,0 +1,660 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "asm/hvcall.h"
+#include <linux/log2.h>
+#include <asm/pgalloc.h>
+#include <asm/guest-state-buffer.h>
+
+static const u16 kvmppc_gse_iden_len[__KVMPPC_GSE_TYPE_MAX] = {
+ [KVMPPC_GSE_BE32] = sizeof(__be32),
+ [KVMPPC_GSE_BE64] = sizeof(__be64),
+ [KVMPPC_GSE_VEC128] = sizeof(vector128),
+ [KVMPPC_GSE_PARTITION_TABLE] = sizeof(struct kvmppc_gs_part_table),
+ [KVMPPC_GSE_PROCESS_TABLE] = sizeof(struct kvmppc_gs_proc_table),
+ [KVMPPC_GSE_BUFFER] = sizeof(struct kvmppc_gs_buff_info),
+};
+
+/**
+ * kvmppc_gsb_new() - create a new guest state buffer
+ * @size: total size of the guest state buffer (includes header)
+ * @guest_id: guest_id
+ * @vcpu_id: vcpu_id
+ * @flags: GFP flags
+ *
+ * Returns a guest state buffer.
+ */
+struct kvmppc_gs_buff *kvmppc_gsb_new(size_t size, unsigned long guest_id,
+ unsigned long vcpu_id, gfp_t flags)
+{
+ struct kvmppc_gs_buff *gsb;
+
+ gsb = kzalloc(sizeof(*gsb), flags);
+ if (!gsb)
+ return NULL;
+
+ size = roundup_pow_of_two(size);
+ gsb->hdr = kzalloc(size, GFP_KERNEL);
+ if (!gsb->hdr)
+ goto free;
+
+ gsb->capacity = size;
+ gsb->len = sizeof(struct kvmppc_gs_header);
+ gsb->vcpu_id = vcpu_id;
+ gsb->guest_id = guest_id;
+
+ gsb->hdr->nelems = cpu_to_be32(0);
+
+ return gsb;
+
+free:
+ kfree(gsb);
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(kvmppc_gsb_new);
+
+/**
+ * kvmppc_gsb_free() - free a guest state buffer
+ * @gsb: guest state buffer
+ */
+void kvmppc_gsb_free(struct kvmppc_gs_buff *gsb)
+{
+ kfree(gsb->hdr);
+ kfree(gsb);
+}
+EXPORT_SYMBOL_GPL(kvmppc_gsb_free);
+
+/**
+ * kvmppc_gsb_put() - allocate space in a guest state buffer
+ * @gsb: buffer to allocate in
+ * @size: amount of space to allocate
+ *
+ * Returns a pointer to the amount of space requested within the buffer and
+ * increments the count of elements in the buffer.
+ *
+ * Does not check if there is enough space in the buffer.
+ */
+void *kvmppc_gsb_put(struct kvmppc_gs_buff *gsb, size_t size)
+{
+ u32 nelems = kvmppc_gsb_nelems(gsb);
+ void *p;
+
+ p = (void *)kvmppc_gsb_header(gsb) + kvmppc_gsb_len(gsb);
+ gsb->len += size;
+
+ kvmppc_gsb_header(gsb)->nelems = cpu_to_be32(nelems + 1);
+ return p;
+}
+EXPORT_SYMBOL_GPL(kvmppc_gsb_put);
+
+static int kvmppc_gsid_class(u16 iden)
+{
+ if ((iden >= KVMPPC_GSE_GUESTWIDE_START) &&
+ (iden <= KVMPPC_GSE_GUESTWIDE_END))
+ return KVMPPC_GS_CLASS_GUESTWIDE;
+
+ if ((iden >= KVMPPC_GSE_HOSTWIDE_START) &&
+ (iden <= KVMPPC_GSE_HOSTWIDE_END))
+ return KVMPPC_GS_CLASS_HOSTWIDE;
+
+ if ((iden >= KVMPPC_GSE_META_START) && (iden <= KVMPPC_GSE_META_END))
+ return KVMPPC_GS_CLASS_META;
+
+ if ((iden >= KVMPPC_GSE_DW_REGS_START) &&
+ (iden <= KVMPPC_GSE_DW_REGS_END))
+ return KVMPPC_GS_CLASS_DWORD_REG;
+
+ if ((iden >= KVMPPC_GSE_W_REGS_START) &&
+ (iden <= KVMPPC_GSE_W_REGS_END))
+ return KVMPPC_GS_CLASS_WORD_REG;
+
+ if ((iden >= KVMPPC_GSE_VSRS_START) && (iden <= KVMPPC_GSE_VSRS_END))
+ return KVMPPC_GS_CLASS_VECTOR;
+
+ if ((iden >= KVMPPC_GSE_INTR_REGS_START) &&
+ (iden <= KVMPPC_GSE_INTR_REGS_END))
+ return KVMPPC_GS_CLASS_INTR;
+
+ return -1;
+}
+
+static int kvmppc_gsid_type(u16 iden)
+{
+ int type = -1;
+
+ switch (kvmppc_gsid_class(iden)) {
+ case KVMPPC_GS_CLASS_HOSTWIDE:
+ switch (iden) {
+ case KVMPPC_GSID_L0_GUEST_HEAP:
+ fallthrough;
+ case KVMPPC_GSID_L0_GUEST_HEAP_MAX:
+ fallthrough;
+ case KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE:
+ fallthrough;
+ case KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX:
+ fallthrough;
+ case KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM:
+ type = KVMPPC_GSE_BE64;
+ break;
+ }
+ break;
+ case KVMPPC_GS_CLASS_GUESTWIDE:
+ switch (iden) {
+ case KVMPPC_GSID_HOST_STATE_SIZE:
+ case KVMPPC_GSID_RUN_OUTPUT_MIN_SIZE:
+ case KVMPPC_GSID_TB_OFFSET:
+ type = KVMPPC_GSE_BE64;
+ break;
+ case KVMPPC_GSID_PARTITION_TABLE:
+ type = KVMPPC_GSE_PARTITION_TABLE;
+ break;
+ case KVMPPC_GSID_PROCESS_TABLE:
+ type = KVMPPC_GSE_PROCESS_TABLE;
+ break;
+ case KVMPPC_GSID_LOGICAL_PVR:
+ type = KVMPPC_GSE_BE32;
+ break;
+ }
+ break;
+ case KVMPPC_GS_CLASS_META:
+ switch (iden) {
+ case KVMPPC_GSID_RUN_INPUT:
+ case KVMPPC_GSID_RUN_OUTPUT:
+ type = KVMPPC_GSE_BUFFER;
+ break;
+ case KVMPPC_GSID_VPA:
+ type = KVMPPC_GSE_BE64;
+ break;
+ }
+ break;
+ case KVMPPC_GS_CLASS_DWORD_REG:
+ type = KVMPPC_GSE_BE64;
+ break;
+ case KVMPPC_GS_CLASS_WORD_REG:
+ type = KVMPPC_GSE_BE32;
+ break;
+ case KVMPPC_GS_CLASS_VECTOR:
+ type = KVMPPC_GSE_VEC128;
+ break;
+ case KVMPPC_GS_CLASS_INTR:
+ switch (iden) {
+ case KVMPPC_GSID_HDAR:
+ case KVMPPC_GSID_ASDR:
+ case KVMPPC_GSID_HEIR:
+ type = KVMPPC_GSE_BE64;
+ break;
+ case KVMPPC_GSID_HDSISR:
+ type = KVMPPC_GSE_BE32;
+ break;
+ }
+ break;
+ }
+
+ return type;
+}
+
+/**
+ * kvmppc_gsid_flags() - the flags for a guest state ID
+ * @iden: guest state ID
+ *
+ * Returns any flags for the guest state ID.
+ */
+unsigned long kvmppc_gsid_flags(u16 iden)
+{
+ unsigned long flags = 0;
+
+ switch (kvmppc_gsid_class(iden)) {
+ case KVMPPC_GS_CLASS_GUESTWIDE:
+ flags = KVMPPC_GS_FLAGS_WIDE;
+ break;
+ case KVMPPC_GS_CLASS_HOSTWIDE:
+ flags = KVMPPC_GS_FLAGS_HOST_WIDE;
+ break;
+ case KVMPPC_GS_CLASS_META:
+ case KVMPPC_GS_CLASS_DWORD_REG:
+ case KVMPPC_GS_CLASS_WORD_REG:
+ case KVMPPC_GS_CLASS_VECTOR:
+ case KVMPPC_GS_CLASS_INTR:
+ break;
+ }
+
+ return flags;
+}
+EXPORT_SYMBOL_GPL(kvmppc_gsid_flags);
+
+/**
+ * kvmppc_gsid_size() - the size of a guest state ID
+ * @iden: guest state ID
+ *
+ * Returns the size of guest state ID.
+ */
+u16 kvmppc_gsid_size(u16 iden)
+{
+ int type;
+
+ type = kvmppc_gsid_type(iden);
+ if (type == -1)
+ return 0;
+
+ if (type >= __KVMPPC_GSE_TYPE_MAX)
+ return 0;
+
+ return kvmppc_gse_iden_len[type];
+}
+EXPORT_SYMBOL_GPL(kvmppc_gsid_size);
+
+/**
+ * kvmppc_gsid_mask() - the settable bits of a guest state ID
+ * @iden: guest state ID
+ *
+ * Returns a mask of settable bits for a guest state ID.
+ */
+u64 kvmppc_gsid_mask(u16 iden)
+{
+ u64 mask = ~0ull;
+
+ switch (iden) {
+ case KVMPPC_GSID_LPCR:
+ mask = LPCR_DPFD | LPCR_ILE | LPCR_AIL | LPCR_LD | LPCR_MER |
+ LPCR_GTSE;
+ break;
+ case KVMPPC_GSID_MSR:
+ mask = ~(MSR_HV | MSR_S | MSR_ME);
+ break;
+ }
+
+ return mask;
+}
+EXPORT_SYMBOL_GPL(kvmppc_gsid_mask);
+
+/**
+ * __kvmppc_gse_put() - add a guest state element to a buffer
+ * @gsb: buffer to the element to
+ * @iden: guest state ID
+ * @size: length of data
+ * @data: pointer to data
+ */
+int __kvmppc_gse_put(struct kvmppc_gs_buff *gsb, u16 iden, u16 size,
+ const void *data)
+{
+ struct kvmppc_gs_elem *gse;
+ u16 total_size;
+
+ total_size = sizeof(*gse) + size;
+ if (total_size + kvmppc_gsb_len(gsb) > kvmppc_gsb_capacity(gsb))
+ return -ENOMEM;
+
+ if (kvmppc_gsid_size(iden) != size)
+ return -EINVAL;
+
+ gse = kvmppc_gsb_put(gsb, total_size);
+ gse->iden = cpu_to_be16(iden);
+ gse->len = cpu_to_be16(size);
+ memcpy(gse->data, data, size);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(__kvmppc_gse_put);
+
+/**
+ * kvmppc_gse_parse() - create a parse map from a guest state buffer
+ * @gsp: guest state parser
+ * @gsb: guest state buffer
+ */
+int kvmppc_gse_parse(struct kvmppc_gs_parser *gsp, struct kvmppc_gs_buff *gsb)
+{
+ struct kvmppc_gs_elem *curr;
+ int rem, i;
+
+ kvmppc_gsb_for_each_elem(i, curr, gsb, rem) {
+ if (kvmppc_gse_len(curr) !=
+ kvmppc_gsid_size(kvmppc_gse_iden(curr)))
+ return -EINVAL;
+ kvmppc_gsp_insert(gsp, kvmppc_gse_iden(curr), curr);
+ }
+
+ if (kvmppc_gsb_nelems(gsb) != i)
+ return -EINVAL;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kvmppc_gse_parse);
+
+static inline int kvmppc_gse_flatten_iden(u16 iden)
+{
+ int bit = 0;
+ int class;
+
+ class = kvmppc_gsid_class(iden);
+
+ if (class == KVMPPC_GS_CLASS_GUESTWIDE) {
+ bit += iden - KVMPPC_GSE_GUESTWIDE_START;
+ return bit;
+ }
+
+ bit += KVMPPC_GSE_GUESTWIDE_COUNT;
+
+ if (class == KVMPPC_GS_CLASS_HOSTWIDE) {
+ bit += iden - KVMPPC_GSE_HOSTWIDE_START;
+ return bit;
+ }
+
+ bit += KVMPPC_GSE_HOSTWIDE_COUNT;
+
+ if (class == KVMPPC_GS_CLASS_META) {
+ bit += iden - KVMPPC_GSE_META_START;
+ return bit;
+ }
+
+ bit += KVMPPC_GSE_META_COUNT;
+
+ if (class == KVMPPC_GS_CLASS_DWORD_REG) {
+ bit += iden - KVMPPC_GSE_DW_REGS_START;
+ return bit;
+ }
+
+ bit += KVMPPC_GSE_DW_REGS_COUNT;
+
+ if (class == KVMPPC_GS_CLASS_WORD_REG) {
+ bit += iden - KVMPPC_GSE_W_REGS_START;
+ return bit;
+ }
+
+ bit += KVMPPC_GSE_W_REGS_COUNT;
+
+ if (class == KVMPPC_GS_CLASS_VECTOR) {
+ bit += iden - KVMPPC_GSE_VSRS_START;
+ return bit;
+ }
+
+ bit += KVMPPC_GSE_VSRS_COUNT;
+
+ if (class == KVMPPC_GS_CLASS_INTR) {
+ bit += iden - KVMPPC_GSE_INTR_REGS_START;
+ return bit;
+ }
+
+ return 0;
+}
+
+static inline u16 kvmppc_gse_unflatten_iden(int bit)
+{
+ u16 iden;
+
+ if (bit < KVMPPC_GSE_GUESTWIDE_COUNT) {
+ iden = KVMPPC_GSE_GUESTWIDE_START + bit;
+ return iden;
+ }
+ bit -= KVMPPC_GSE_GUESTWIDE_COUNT;
+
+ if (bit < KVMPPC_GSE_HOSTWIDE_COUNT) {
+ iden = KVMPPC_GSE_HOSTWIDE_START + bit;
+ return iden;
+ }
+ bit -= KVMPPC_GSE_HOSTWIDE_COUNT;
+
+ if (bit < KVMPPC_GSE_META_COUNT) {
+ iden = KVMPPC_GSE_META_START + bit;
+ return iden;
+ }
+ bit -= KVMPPC_GSE_META_COUNT;
+
+ if (bit < KVMPPC_GSE_DW_REGS_COUNT) {
+ iden = KVMPPC_GSE_DW_REGS_START + bit;
+ return iden;
+ }
+ bit -= KVMPPC_GSE_DW_REGS_COUNT;
+
+ if (bit < KVMPPC_GSE_W_REGS_COUNT) {
+ iden = KVMPPC_GSE_W_REGS_START + bit;
+ return iden;
+ }
+ bit -= KVMPPC_GSE_W_REGS_COUNT;
+
+ if (bit < KVMPPC_GSE_VSRS_COUNT) {
+ iden = KVMPPC_GSE_VSRS_START + bit;
+ return iden;
+ }
+ bit -= KVMPPC_GSE_VSRS_COUNT;
+
+ if (bit < KVMPPC_GSE_IDEN_COUNT) {
+ iden = KVMPPC_GSE_INTR_REGS_START + bit;
+ return iden;
+ }
+
+ return 0;
+}
+
+/**
+ * kvmppc_gsp_insert() - add a mapping from an guest state ID to an element
+ * @gsp: guest state parser
+ * @iden: guest state id (key)
+ * @gse: guest state element (value)
+ */
+void kvmppc_gsp_insert(struct kvmppc_gs_parser *gsp, u16 iden,
+ struct kvmppc_gs_elem *gse)
+{
+ int i;
+
+ i = kvmppc_gse_flatten_iden(iden);
+ kvmppc_gsbm_set(&gsp->iterator, iden);
+ gsp->gses[i] = gse;
+}
+EXPORT_SYMBOL_GPL(kvmppc_gsp_insert);
+
+/**
+ * kvmppc_gsp_lookup() - lookup an element from a guest state ID
+ * @gsp: guest state parser
+ * @iden: guest state ID (key)
+ *
+ * Returns the guest state element if present.
+ */
+struct kvmppc_gs_elem *kvmppc_gsp_lookup(struct kvmppc_gs_parser *gsp, u16 iden)
+{
+ int i;
+
+ i = kvmppc_gse_flatten_iden(iden);
+ return gsp->gses[i];
+}
+EXPORT_SYMBOL_GPL(kvmppc_gsp_lookup);
+
+/**
+ * kvmppc_gsbm_set() - set the guest state ID
+ * @gsbm: guest state bitmap
+ * @iden: guest state ID
+ */
+void kvmppc_gsbm_set(struct kvmppc_gs_bitmap *gsbm, u16 iden)
+{
+ set_bit(kvmppc_gse_flatten_iden(iden), gsbm->bitmap);
+}
+EXPORT_SYMBOL_GPL(kvmppc_gsbm_set);
+
+/**
+ * kvmppc_gsbm_clear() - clear the guest state ID
+ * @gsbm: guest state bitmap
+ * @iden: guest state ID
+ */
+void kvmppc_gsbm_clear(struct kvmppc_gs_bitmap *gsbm, u16 iden)
+{
+ clear_bit(kvmppc_gse_flatten_iden(iden), gsbm->bitmap);
+}
+EXPORT_SYMBOL_GPL(kvmppc_gsbm_clear);
+
+/**
+ * kvmppc_gsbm_test() - test the guest state ID
+ * @gsbm: guest state bitmap
+ * @iden: guest state ID
+ */
+bool kvmppc_gsbm_test(struct kvmppc_gs_bitmap *gsbm, u16 iden)
+{
+ return test_bit(kvmppc_gse_flatten_iden(iden), gsbm->bitmap);
+}
+EXPORT_SYMBOL_GPL(kvmppc_gsbm_test);
+
+/**
+ * kvmppc_gsbm_next() - return the next set guest state ID
+ * @gsbm: guest state bitmap
+ * @prev: last guest state ID
+ */
+u16 kvmppc_gsbm_next(struct kvmppc_gs_bitmap *gsbm, u16 prev)
+{
+ int bit, pbit;
+
+ pbit = prev ? kvmppc_gse_flatten_iden(prev) + 1 : 0;
+ bit = find_next_bit(gsbm->bitmap, KVMPPC_GSE_IDEN_COUNT, pbit);
+
+ if (bit < KVMPPC_GSE_IDEN_COUNT)
+ return kvmppc_gse_unflatten_iden(bit);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kvmppc_gsbm_next);
+
+/**
+ * kvmppc_gsm_init() - initialize a guest state message
+ * @gsm: guest state message
+ * @ops: callbacks
+ * @data: private data
+ * @flags: guest wide or thread wide
+ */
+int kvmppc_gsm_init(struct kvmppc_gs_msg *gsm, struct kvmppc_gs_msg_ops *ops,
+ void *data, unsigned long flags)
+{
+ memset(gsm, 0, sizeof(*gsm));
+ gsm->ops = ops;
+ gsm->data = data;
+ gsm->flags = flags;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kvmppc_gsm_init);
+
+/**
+ * kvmppc_gsm_new() - creates a new guest state message
+ * @ops: callbacks
+ * @data: private data
+ * @flags: guest wide or thread wide
+ * @gfp_flags: GFP allocation flags
+ *
+ * Returns an initialized guest state message.
+ */
+struct kvmppc_gs_msg *kvmppc_gsm_new(struct kvmppc_gs_msg_ops *ops, void *data,
+ unsigned long flags, gfp_t gfp_flags)
+{
+ struct kvmppc_gs_msg *gsm;
+
+ gsm = kzalloc(sizeof(*gsm), gfp_flags);
+ if (!gsm)
+ return NULL;
+
+ kvmppc_gsm_init(gsm, ops, data, flags);
+
+ return gsm;
+}
+EXPORT_SYMBOL_GPL(kvmppc_gsm_new);
+
+/**
+ * kvmppc_gsm_size() - creates a new guest state message
+ * @gsm: self
+ *
+ * Returns the size required for the message.
+ */
+size_t kvmppc_gsm_size(struct kvmppc_gs_msg *gsm)
+{
+ if (gsm->ops->get_size)
+ return gsm->ops->get_size(gsm);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kvmppc_gsm_size);
+
+/**
+ * kvmppc_gsm_free() - free guest state message
+ * @gsm: guest state message
+ *
+ * Returns the size required for the message.
+ */
+void kvmppc_gsm_free(struct kvmppc_gs_msg *gsm)
+{
+ kfree(gsm);
+}
+EXPORT_SYMBOL_GPL(kvmppc_gsm_free);
+
+/**
+ * kvmppc_gsm_fill_info() - serialises message to guest state buffer format
+ * @gsm: self
+ * @gsb: buffer to serialise into
+ */
+int kvmppc_gsm_fill_info(struct kvmppc_gs_msg *gsm, struct kvmppc_gs_buff *gsb)
+{
+ if (!gsm->ops->fill_info)
+ return -EINVAL;
+
+ return gsm->ops->fill_info(gsb, gsm);
+}
+EXPORT_SYMBOL_GPL(kvmppc_gsm_fill_info);
+
+/**
+ * kvmppc_gsm_refresh_info() - deserialises from guest state buffer
+ * @gsm: self
+ * @gsb: buffer to serialise from
+ */
+int kvmppc_gsm_refresh_info(struct kvmppc_gs_msg *gsm,
+ struct kvmppc_gs_buff *gsb)
+{
+ if (!gsm->ops->fill_info)
+ return -EINVAL;
+
+ return gsm->ops->refresh_info(gsm, gsb);
+}
+EXPORT_SYMBOL_GPL(kvmppc_gsm_refresh_info);
+
+/**
+ * kvmppc_gsb_send - send all elements in the buffer to the hypervisor.
+ * @gsb: guest state buffer
+ * @flags: guest wide or thread wide
+ *
+ * Performs the H_GUEST_SET_STATE hcall for the guest state buffer.
+ */
+int kvmppc_gsb_send(struct kvmppc_gs_buff *gsb, unsigned long flags)
+{
+ unsigned long hflags = 0;
+ unsigned long i;
+ int rc;
+
+ if (kvmppc_gsb_nelems(gsb) == 0)
+ return 0;
+
+ if (flags & KVMPPC_GS_FLAGS_WIDE)
+ hflags |= H_GUEST_FLAGS_WIDE;
+ if (flags & KVMPPC_GS_FLAGS_HOST_WIDE)
+ hflags |= H_GUEST_FLAGS_HOST_WIDE;
+
+ rc = plpar_guest_set_state(hflags, gsb->guest_id, gsb->vcpu_id,
+ __pa(gsb->hdr), gsb->capacity, &i);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(kvmppc_gsb_send);
+
+/**
+ * kvmppc_gsb_recv - request all elements in the buffer have their value
+ * updated.
+ * @gsb: guest state buffer
+ * @flags: guest wide or thread wide
+ *
+ * Performs the H_GUEST_GET_STATE hcall for the guest state buffer.
+ * After returning from the hcall the guest state elements that were
+ * present in the buffer will have updated values from the hypervisor.
+ */
+int kvmppc_gsb_recv(struct kvmppc_gs_buff *gsb, unsigned long flags)
+{
+ unsigned long hflags = 0;
+ unsigned long i;
+ int rc;
+
+ if (flags & KVMPPC_GS_FLAGS_WIDE)
+ hflags |= H_GUEST_FLAGS_WIDE;
+ if (flags & KVMPPC_GS_FLAGS_HOST_WIDE)
+ hflags |= H_GUEST_FLAGS_HOST_WIDE;
+
+ rc = plpar_guest_get_state(hflags, gsb->guest_id, gsb->vcpu_id,
+ __pa(gsb->hdr), gsb->capacity, &i);
+ return rc;
+}
+EXPORT_SYMBOL_GPL(kvmppc_gsb_recv);
diff --git a/arch/powerpc/kvm/irq.h b/arch/powerpc/kvm/irq.h
deleted file mode 100644
index 5a9a10b90762..000000000000
--- a/arch/powerpc/kvm/irq.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef __IRQ_H
-#define __IRQ_H
-
-#include <linux/kvm_host.h>
-
-static inline int irqchip_in_kernel(struct kvm *kvm)
-{
- int ret = 0;
-
-#ifdef CONFIG_KVM_MPIC
- ret = ret || (kvm->arch.mpic != NULL);
-#endif
-#ifdef CONFIG_KVM_XICS
- ret = ret || (kvm->arch.xics != NULL);
-#endif
- smp_rmb();
- return ret;
-}
-
-#endif
diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c
index 39b3a8f816f2..23e9c2bd9f27 100644
--- a/arch/powerpc/kvm/mpic.c
+++ b/arch/powerpc/kvm/mpic.c
@@ -29,12 +29,11 @@
#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/anon_inodes.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/mpic.h>
#include <asm/kvm_para.h>
-#include <asm/kvm_host.h>
#include <asm/kvm_ppc.h>
-#include "iodev.h"
+#include <kvm/iodev.h>
#define MAX_CPU 32
#define MAX_SRC 256
@@ -289,11 +288,6 @@ static inline void IRQ_resetbit(struct irq_queue *q, int n_IRQ)
clear_bit(n_IRQ, q->queue);
}
-static inline int IRQ_testbit(struct irq_queue *q, int n_IRQ)
-{
- return test_bit(n_IRQ, q->queue);
-}
-
static void IRQ_check(struct openpic *opp, struct irq_queue *q)
{
int irq = -1;
@@ -1374,8 +1368,9 @@ static int kvm_mpic_write_internal(struct openpic *opp, gpa_t addr, u32 val)
return -ENXIO;
}
-static int kvm_mpic_read(struct kvm_io_device *this, gpa_t addr,
- int len, void *ptr)
+static int kvm_mpic_read(struct kvm_vcpu *vcpu,
+ struct kvm_io_device *this,
+ gpa_t addr, int len, void *ptr)
{
struct openpic *opp = container_of(this, struct openpic, mmio);
int ret;
@@ -1415,8 +1410,9 @@ static int kvm_mpic_read(struct kvm_io_device *this, gpa_t addr,
return ret;
}
-static int kvm_mpic_write(struct kvm_io_device *this, gpa_t addr,
- int len, const void *ptr)
+static int kvm_mpic_write(struct kvm_vcpu *vcpu,
+ struct kvm_io_device *this,
+ gpa_t addr, int len, const void *ptr)
{
struct openpic *opp = container_of(this, struct openpic, mmio);
int ret;
@@ -1826,7 +1822,8 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
return 0;
}
-int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
+int kvm_set_routing_entry(struct kvm *kvm,
+ struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue)
{
int r = -EINVAL;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 4c79284b58be..2ba057171ebe 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -1,16 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright IBM Corp. 2007
*
@@ -23,18 +12,30 @@
#include <linux/kvm_host.h>
#include <linux/vmalloc.h>
#include <linux/hrtimer.h>
+#include <linux/sched/signal.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/file.h>
#include <linux/module.h>
+#include <linux/irqbypass.h>
+#include <linux/kvm_irqfd.h>
+#include <linux/of.h>
#include <asm/cputable.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/kvm_ppc.h>
-#include <asm/tlbflush.h>
#include <asm/cputhreads.h>
#include <asm/irqflags.h>
+#include <asm/iommu.h>
+#include <asm/switch_to.h>
+#include <asm/xive.h>
+#ifdef CONFIG_PPC_PSERIES
+#include <asm/hvcall.h>
+#include <asm/plpar_wrappers.h>
+#endif
+#include <asm/ultravisor.h>
+#include <asm/setup.h>
+
#include "timing.h"
-#include "irq.h"
#include "../mm/mmu_decl.h"
#define CREATE_TRACE_POINTS
@@ -48,8 +49,17 @@ EXPORT_SYMBOL_GPL(kvmppc_pr_ops);
int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
{
- return !!(v->arch.pending_exceptions) ||
- v->requests;
+ return !!(v->arch.pending_exceptions) || kvm_request_pending(v);
+}
+
+bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu)
+{
+ return kvm_arch_vcpu_runnable(vcpu);
+}
+
+bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
+{
+ return false;
}
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
@@ -59,7 +69,7 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
/*
* Common checks before entering the guest world. Call with interrupts
- * disabled.
+ * enabled.
*
* returns:
*
@@ -95,10 +105,13 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
* so we don't miss a request because the requester sees
* OUTSIDE_GUEST_MODE and assumes we'll be checking requests
* before next entering the guest (and thus doesn't IPI).
+ * This also orders the write to mode from any reads
+ * to the page tables done while the VCPU is running.
+ * Please see the comment in kvm_flush_remote_tlbs.
*/
smp_mb();
- if (vcpu->requests) {
+ if (kvm_request_pending(vcpu)) {
/* Make sure we process requests preemptable */
local_irq_enable();
trace_kvm_check_requests(vcpu);
@@ -115,7 +128,7 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
continue;
}
- kvm_guest_enter();
+ guest_enter_irqoff();
return 1;
}
@@ -224,8 +237,7 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
break;
case EV_HCALL_TOKEN(EV_IDLE):
r = EV_SUCCESS;
- kvm_vcpu_block(vcpu);
- clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
+ kvm_vcpu_halt(vcpu);
break;
default:
r = EV_UNIMPLEMENTED;
@@ -267,7 +279,7 @@ out:
}
EXPORT_SYMBOL_GPL(kvmppc_sanity_check);
-int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
+int kvmppc_emulate_mmio(struct kvm_vcpu *vcpu)
{
enum emulation_result er;
int r;
@@ -283,7 +295,7 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
r = RESUME_GUEST;
break;
case EMULATE_DO_MMIO:
- run->exit_reason = KVM_EXIT_MMIO;
+ vcpu->run->exit_reason = KVM_EXIT_MMIO;
/* We must reload nonvolatiles because "update" load/store
* instructions modify register state. */
/* Future optimization: only reload non-volatiles if they were
@@ -292,12 +304,36 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
break;
case EMULATE_FAIL:
{
- u32 last_inst;
+ ppc_inst_t last_inst;
- kvmppc_get_last_inst(vcpu, false, &last_inst);
- /* XXX Deliver Program interrupt to guest. */
- pr_emerg("%s: emulation failed (%08x)\n", __func__, last_inst);
- r = RESUME_HOST;
+ kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst);
+ kvm_debug_ratelimited("Guest access to device memory using unsupported instruction (opcode: %#08x)\n",
+ ppc_inst_val(last_inst));
+
+ /*
+ * Injecting a Data Storage here is a bit more
+ * accurate since the instruction that caused the
+ * access could still be a valid one.
+ */
+ if (!IS_ENABLED(CONFIG_BOOKE)) {
+ ulong dsisr = DSISR_BADACCESS;
+
+ if (vcpu->mmio_is_write)
+ dsisr |= DSISR_ISSTORE;
+
+ kvmppc_core_queue_data_storage(vcpu,
+ kvmppc_get_msr(vcpu) & SRR1_PREFIXED,
+ vcpu->arch.vaddr_accessed, dsisr);
+ } else {
+ /*
+ * BookE does not send a SIGBUS on a bad
+ * fault, so use a Program interrupt instead
+ * to avoid a fault loop.
+ */
+ kvmppc_core_queue_program(vcpu, 0);
+ }
+
+ r = RESUME_GUEST;
break;
}
default:
@@ -314,10 +350,17 @@ int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
{
ulong mp_pa = vcpu->arch.magic_page_pa & KVM_PAM & PAGE_MASK;
struct kvmppc_pte pte;
- int r;
+ int r = -EINVAL;
vcpu->stat.st++;
+ if (vcpu->kvm->arch.kvm_ops && vcpu->kvm->arch.kvm_ops->store_to_eaddr)
+ r = vcpu->kvm->arch.kvm_ops->store_to_eaddr(vcpu, eaddr, ptr,
+ size);
+
+ if ((!r) || (r == -EAGAIN))
+ return r;
+
r = kvmppc_xlate(vcpu, *eaddr, data ? XLATE_DATA : XLATE_INST,
XLATE_WRITE, &pte);
if (r < 0)
@@ -350,10 +393,17 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
{
ulong mp_pa = vcpu->arch.magic_page_pa & KVM_PAM & PAGE_MASK;
struct kvmppc_pte pte;
- int rc;
+ int rc = -EINVAL;
vcpu->stat.ld++;
+ if (vcpu->kvm->arch.kvm_ops && vcpu->kvm->arch.kvm_ops->load_from_eaddr)
+ rc = vcpu->kvm->arch.kvm_ops->load_from_eaddr(vcpu, eaddr, ptr,
+ size);
+
+ if ((!rc) || (rc == -EAGAIN))
+ return rc;
+
rc = kvmppc_xlate(vcpu, *eaddr, data ? XLATE_DATA : XLATE_INST,
XLATE_READ, &pte);
if (rc)
@@ -377,39 +427,21 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
return EMULATE_DONE;
}
- if (kvm_read_guest(vcpu->kvm, pte.raddr, ptr, size))
+ kvm_vcpu_srcu_read_lock(vcpu);
+ rc = kvm_read_guest(vcpu->kvm, pte.raddr, ptr, size);
+ kvm_vcpu_srcu_read_unlock(vcpu);
+ if (rc)
return EMULATE_DO_MMIO;
return EMULATE_DONE;
}
EXPORT_SYMBOL_GPL(kvmppc_ld);
-int kvm_arch_hardware_enable(void *garbage)
-{
- return 0;
-}
-
-void kvm_arch_hardware_disable(void *garbage)
-{
-}
-
-int kvm_arch_hardware_setup(void)
-{
- return 0;
-}
-
-void kvm_arch_hardware_unsetup(void)
-{
-}
-
-void kvm_arch_check_processor_compat(void *rtn)
-{
- *(int *)rtn = kvmppc_core_check_processor_compat();
-}
-
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
{
struct kvmppc_ops *kvm_ops = NULL;
+ int r;
+
/*
* if we have both HV and PR enabled, default is HV
*/
@@ -431,28 +463,33 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
} else
goto err_out;
- if (kvm_ops->owner && !try_module_get(kvm_ops->owner))
+ if (!try_module_get(kvm_ops->owner))
return -ENOENT;
kvm->arch.kvm_ops = kvm_ops;
- return kvmppc_core_init_vm(kvm);
+ r = kvmppc_core_init_vm(kvm);
+ if (r)
+ module_put(kvm_ops->owner);
+ return r;
err_out:
return -EINVAL;
}
void kvm_arch_destroy_vm(struct kvm *kvm)
{
- unsigned int i;
- struct kvm_vcpu *vcpu;
+#ifdef CONFIG_KVM_XICS
+ /*
+ * We call kick_all_cpus_sync() to ensure that all
+ * CPUs have executed any pending IPIs before we
+ * continue and free VCPUs structures below.
+ */
+ if (is_kvmppc_hv_enabled(kvm))
+ kick_all_cpus_sync();
+#endif
- kvm_for_each_vcpu(i, vcpu, kvm)
- kvm_arch_vcpu_free(vcpu);
+ kvm_destroy_vcpus(kvm);
mutex_lock(&kvm->lock);
- for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
- kvm->vcpus[i] = NULL;
-
- atomic_set(&kvm->online_vcpus, 0);
kvmppc_core_destroy_vm(kvm);
@@ -462,10 +499,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
module_put(kvm->arch.kvm_ops->owner);
}
-void kvm_arch_sync_events(struct kvm *kvm)
-{
-}
-
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
{
int r;
@@ -493,12 +526,13 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_PPC_UNSET_IRQ:
case KVM_CAP_PPC_IRQ_LEVEL:
case KVM_CAP_ENABLE_CAP:
- case KVM_CAP_ENABLE_CAP_VM:
case KVM_CAP_ONE_REG:
case KVM_CAP_IOEVENTFD:
- case KVM_CAP_DEVICE_CTRL:
+ case KVM_CAP_IMMEDIATE_EXIT:
+ case KVM_CAP_SET_GUEST_DEBUG:
r = 1;
break;
+ case KVM_CAP_PPC_GUEST_DEBUG_SSTEP:
case KVM_CAP_PPC_PAIRED_SINGLES:
case KVM_CAP_PPC_OSI:
case KVM_CAP_PPC_GET_PVINFO:
@@ -508,11 +542,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
/* We support this only for PR */
r = !hv_enabled;
break;
-#ifdef CONFIG_KVM_MMIO
- case KVM_CAP_COALESCED_MMIO:
- r = KVM_COALESCED_MMIO_PAGE_OFFSET;
- break;
-#endif
#ifdef CONFIG_KVM_MPIC
case KVM_CAP_IRQ_MPIC:
r = 1;
@@ -521,41 +550,83 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
#ifdef CONFIG_PPC_BOOK3S_64
case KVM_CAP_SPAPR_TCE:
- case KVM_CAP_PPC_ALLOC_HTAB:
+ fallthrough;
+ case KVM_CAP_SPAPR_TCE_64:
+ case KVM_CAP_SPAPR_TCE_VFIO:
case KVM_CAP_PPC_RTAS:
case KVM_CAP_PPC_FIXUP_HCALL:
case KVM_CAP_PPC_ENABLE_HCALL:
#ifdef CONFIG_KVM_XICS
case KVM_CAP_IRQ_XICS:
#endif
+ case KVM_CAP_PPC_GET_CPU_CHAR:
r = 1;
break;
+#ifdef CONFIG_KVM_XIVE
+ case KVM_CAP_PPC_IRQ_XIVE:
+ /*
+ * We need XIVE to be enabled on the platform (implies
+ * a POWER9 processor) and the PowerNV platform, as
+ * nested is not yet supported.
+ */
+ r = xive_enabled() && !!cpu_has_feature(CPU_FTR_HVMODE) &&
+ kvmppc_xive_native_supported();
+ break;
+#endif
+
+#ifdef CONFIG_HAVE_KVM_IRQCHIP
+ case KVM_CAP_IRQFD_RESAMPLE:
+ r = !xive_enabled();
+ break;
+#endif
+
+ case KVM_CAP_PPC_ALLOC_HTAB:
+ r = hv_enabled;
+ break;
#endif /* CONFIG_PPC_BOOK3S_64 */
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
case KVM_CAP_PPC_SMT:
- if (hv_enabled)
- r = threads_per_subcore;
- else
- r = 0;
+ r = 0;
+ if (kvm) {
+ if (kvm->arch.emul_smt_mode > 1)
+ r = kvm->arch.emul_smt_mode;
+ else
+ r = kvm->arch.smt_mode;
+ } else if (hv_enabled) {
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ r = 1;
+ else
+ r = threads_per_subcore;
+ }
break;
- case KVM_CAP_PPC_RMA:
- r = hv_enabled;
- /* PPC970 requires an RMA */
- if (r && cpu_has_feature(CPU_FTR_ARCH_201))
- r = 2;
+ case KVM_CAP_PPC_SMT_POSSIBLE:
+ r = 1;
+ if (hv_enabled) {
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ r = ((threads_per_subcore << 1) - 1);
+ else
+ /* P9 can emulate dbells, so allow any mode */
+ r = 8 | 4 | 2 | 1;
+ }
+ break;
+ case KVM_CAP_PPC_HWRNG:
+ r = kvmppc_hwrng_present();
+ break;
+ case KVM_CAP_PPC_MMU_RADIX:
+ r = !!(hv_enabled && radix_enabled());
+ break;
+ case KVM_CAP_PPC_MMU_HASH_V3:
+ r = !!(hv_enabled && kvmppc_hv_ops->hash_v3_possible &&
+ kvmppc_hv_ops->hash_v3_possible());
+ break;
+ case KVM_CAP_PPC_NESTED_HV:
+ r = !!(hv_enabled && kvmppc_hv_ops->enable_nested &&
+ !kvmppc_hv_ops->enable_nested(NULL));
break;
#endif
case KVM_CAP_SYNC_MMU:
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- if (hv_enabled)
- r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0;
- else
- r = 0;
-#elif defined(KVM_ARCH_WANT_MMU_NOTIFIER)
+ BUILD_BUG_ON(!IS_ENABLED(CONFIG_KVM_GENERIC_MMU_NOTIFIER));
r = 1;
-#else
- r = 0;
-#endif
break;
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
case KVM_CAP_PPC_HTAB_FD:
@@ -570,18 +641,68 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
* implementations just count online CPUs.
*/
if (hv_enabled)
- r = num_present_cpus();
+ r = min_t(unsigned int, num_present_cpus(), KVM_MAX_VCPUS);
else
- r = num_online_cpus();
+ r = min_t(unsigned int, num_online_cpus(), KVM_MAX_VCPUS);
break;
case KVM_CAP_MAX_VCPUS:
r = KVM_MAX_VCPUS;
break;
+ case KVM_CAP_MAX_VCPU_ID:
+ r = KVM_MAX_VCPU_IDS;
+ break;
#ifdef CONFIG_PPC_BOOK3S_64
case KVM_CAP_PPC_GET_SMMU_INFO:
r = 1;
break;
+ case KVM_CAP_SPAPR_MULTITCE:
+ r = 1;
+ break;
+ case KVM_CAP_SPAPR_RESIZE_HPT:
+ r = !!hv_enabled;
+ break;
+#endif
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ case KVM_CAP_PPC_FWNMI:
+ r = hv_enabled;
+ break;
+#endif
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ case KVM_CAP_PPC_HTM:
+ r = !!(cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_HTM) ||
+ (hv_enabled && cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST));
+ break;
+#endif
+#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE)
+ case KVM_CAP_PPC_SECURE_GUEST:
+ r = hv_enabled && kvmppc_hv_ops->enable_svm &&
+ !kvmppc_hv_ops->enable_svm(NULL);
+ break;
+ case KVM_CAP_PPC_DAWR1:
+ r = !!(hv_enabled && kvmppc_hv_ops->enable_dawr1 &&
+ !kvmppc_hv_ops->enable_dawr1(NULL));
+ break;
+ case KVM_CAP_PPC_RPT_INVALIDATE:
+ r = 1;
+ break;
#endif
+ case KVM_CAP_PPC_AIL_MODE_3:
+ r = 0;
+ /*
+ * KVM PR, POWER7, and some POWER9s don't support AIL=3 mode.
+ * The POWER9s can support it if the guest runs in hash mode,
+ * but QEMU doesn't necessarily query the capability in time.
+ */
+ if (hv_enabled) {
+ if (kvmhv_on_pseries()) {
+ if (pseries_reloc_on_exception())
+ r = 1;
+ } else if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
+ !cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
+ r = 1;
+ }
+ }
+ break;
default:
r = 0;
break;
@@ -596,40 +717,25 @@ long kvm_arch_dev_ioctl(struct file *filp,
return -EINVAL;
}
-void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
- struct kvm_memory_slot *dont)
-{
- kvmppc_core_free_memslot(kvm, free, dont);
-}
-
-int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
- unsigned long npages)
-{
- return kvmppc_core_create_memslot(kvm, slot, npages);
-}
-
-void kvm_arch_memslots_updated(struct kvm *kvm)
+void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
{
+ kvmppc_core_free_memslot(kvm, slot);
}
int kvm_arch_prepare_memory_region(struct kvm *kvm,
- struct kvm_memory_slot *memslot,
- struct kvm_userspace_memory_region *mem,
+ const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
- return kvmppc_core_prepare_memory_region(kvm, memslot, mem);
+ return kvmppc_core_prepare_memory_region(kvm, old, new, change);
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem,
- const struct kvm_memory_slot *old,
+ struct kvm_memory_slot *old,
+ const struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
- kvmppc_core_commit_memory_region(kvm, mem, old);
-}
-
-void kvm_arch_flush_shadow_all(struct kvm *kvm)
-{
+ kvmppc_core_commit_memory_region(kvm, old, new, change);
}
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
@@ -638,45 +744,75 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
kvmppc_core_flush_memslot(kvm, slot);
}
-struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
+int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id)
+{
+ return 0;
+}
+
+static enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer)
{
struct kvm_vcpu *vcpu;
- vcpu = kvmppc_core_vcpu_create(kvm, id);
- if (!IS_ERR(vcpu)) {
- vcpu->arch.wqp = &vcpu->wq;
- kvmppc_create_vcpu_debugfs(vcpu, id);
- }
- return vcpu;
+
+ vcpu = container_of(timer, struct kvm_vcpu, arch.dec_timer);
+ kvmppc_decrementer_func(vcpu);
+
+ return HRTIMER_NORESTART;
}
-int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
+int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
{
+ int err;
+
+ hrtimer_setup(&vcpu->arch.dec_timer, kvmppc_decrementer_wakeup, CLOCK_REALTIME,
+ HRTIMER_MODE_ABS);
+
+#ifdef CONFIG_KVM_EXIT_TIMING
+ mutex_init(&vcpu->arch.exit_timing_lock);
+#endif
+ err = kvmppc_subarch_vcpu_init(vcpu);
+ if (err)
+ return err;
+
+ err = kvmppc_core_vcpu_create(vcpu);
+ if (err)
+ goto out_vcpu_uninit;
+
+ rcuwait_init(&vcpu->arch.wait);
+ vcpu->arch.waitp = &vcpu->arch.wait;
return 0;
+
+out_vcpu_uninit:
+ kvmppc_subarch_vcpu_uninit(vcpu);
+ return err;
+}
+
+void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
+{
}
-void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
+void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{
/* Make sure we're not using the vcpu anymore */
hrtimer_cancel(&vcpu->arch.dec_timer);
- tasklet_kill(&vcpu->arch.tasklet);
-
- kvmppc_remove_vcpu_debugfs(vcpu);
switch (vcpu->arch.irq_type) {
case KVMPPC_IRQ_MPIC:
kvmppc_mpic_disconnect_vcpu(vcpu->arch.mpic, vcpu);
break;
case KVMPPC_IRQ_XICS:
- kvmppc_xics_free_icp(vcpu);
+ if (xics_on_xive())
+ kvmppc_xive_cleanup_vcpu(vcpu);
+ else
+ kvmppc_xics_free_icp(vcpu);
+ break;
+ case KVMPPC_IRQ_XIVE:
+ kvmppc_xive_native_cleanup_vcpu(vcpu);
break;
}
kvmppc_core_vcpu_free(vcpu);
-}
-void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
-{
- kvm_arch_vcpu_free(vcpu);
+ kvmppc_subarch_vcpu_uninit(vcpu);
}
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
@@ -684,42 +820,6 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
return kvmppc_core_pending_dec(vcpu);
}
-/*
- * low level hrtimer wake routine. Because this runs in hardirq context
- * we schedule a tasklet to do the real work.
- */
-enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer)
-{
- struct kvm_vcpu *vcpu;
-
- vcpu = container_of(timer, struct kvm_vcpu, arch.dec_timer);
- tasklet_schedule(&vcpu->arch.tasklet);
-
- return HRTIMER_NORESTART;
-}
-
-int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
-{
- int ret;
-
- hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
- tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu);
- vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
- vcpu->arch.dec_expires = ~(u64)0;
-
-#ifdef CONFIG_KVM_EXIT_TIMING
- mutex_init(&vcpu->arch.exit_timing_lock);
-#endif
- ret = kvmppc_subarch_vcpu_init(vcpu);
- return ret;
-}
-
-void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
-{
- kvmppc_mmu_destroy(vcpu);
- kvmppc_subarch_vcpu_uninit(vcpu);
-}
-
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
#ifdef CONFIG_BOOKE
@@ -743,17 +843,302 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
#endif
}
-static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
- struct kvm_run *run)
+/*
+ * irq_bypass_add_producer and irq_bypass_del_producer are only
+ * useful if the architecture supports PCI passthrough.
+ * irq_bypass_stop and irq_bypass_start are not needed and so
+ * kvm_ops are not defined for them.
+ */
+bool kvm_arch_has_irq_bypass(void)
+{
+ return ((kvmppc_hv_ops && kvmppc_hv_ops->irq_bypass_add_producer) ||
+ (kvmppc_pr_ops && kvmppc_pr_ops->irq_bypass_add_producer));
+}
+
+int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
+ struct irq_bypass_producer *prod)
{
- u64 uninitialized_var(gpr);
+ struct kvm_kernel_irqfd *irqfd =
+ container_of(cons, struct kvm_kernel_irqfd, consumer);
+ struct kvm *kvm = irqfd->kvm;
+
+ if (kvm->arch.kvm_ops->irq_bypass_add_producer)
+ return kvm->arch.kvm_ops->irq_bypass_add_producer(cons, prod);
+
+ return 0;
+}
- if (run->mmio.len > sizeof(gpr)) {
- printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len);
+void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
+ struct irq_bypass_producer *prod)
+{
+ struct kvm_kernel_irqfd *irqfd =
+ container_of(cons, struct kvm_kernel_irqfd, consumer);
+ struct kvm *kvm = irqfd->kvm;
+
+ if (kvm->arch.kvm_ops->irq_bypass_del_producer)
+ kvm->arch.kvm_ops->irq_bypass_del_producer(cons, prod);
+}
+
+#ifdef CONFIG_VSX
+static inline int kvmppc_get_vsr_dword_offset(int index)
+{
+ int offset;
+
+ if ((index != 0) && (index != 1))
+ return -1;
+
+#ifdef __BIG_ENDIAN
+ offset = index;
+#else
+ offset = 1 - index;
+#endif
+
+ return offset;
+}
+
+static inline int kvmppc_get_vsr_word_offset(int index)
+{
+ int offset;
+
+ if ((index > 3) || (index < 0))
+ return -1;
+
+#ifdef __BIG_ENDIAN
+ offset = index;
+#else
+ offset = 3 - index;
+#endif
+ return offset;
+}
+
+static inline void kvmppc_set_vsr_dword(struct kvm_vcpu *vcpu,
+ u64 gpr)
+{
+ union kvmppc_one_reg val;
+ int offset = kvmppc_get_vsr_dword_offset(vcpu->arch.mmio_vsx_offset);
+ int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
+
+ if (offset == -1)
+ return;
+
+ if (index >= 32) {
+ kvmppc_get_vsx_vr(vcpu, index - 32, &val.vval);
+ val.vsxval[offset] = gpr;
+ kvmppc_set_vsx_vr(vcpu, index - 32, &val.vval);
+ } else {
+ kvmppc_set_vsx_fpr(vcpu, index, offset, gpr);
+ }
+}
+
+static inline void kvmppc_set_vsr_dword_dump(struct kvm_vcpu *vcpu,
+ u64 gpr)
+{
+ union kvmppc_one_reg val;
+ int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
+
+ if (index >= 32) {
+ kvmppc_get_vsx_vr(vcpu, index - 32, &val.vval);
+ val.vsxval[0] = gpr;
+ val.vsxval[1] = gpr;
+ kvmppc_set_vsx_vr(vcpu, index - 32, &val.vval);
+ } else {
+ kvmppc_set_vsx_fpr(vcpu, index, 0, gpr);
+ kvmppc_set_vsx_fpr(vcpu, index, 1, gpr);
+ }
+}
+
+static inline void kvmppc_set_vsr_word_dump(struct kvm_vcpu *vcpu,
+ u32 gpr)
+{
+ union kvmppc_one_reg val;
+ int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
+
+ if (index >= 32) {
+ val.vsx32val[0] = gpr;
+ val.vsx32val[1] = gpr;
+ val.vsx32val[2] = gpr;
+ val.vsx32val[3] = gpr;
+ kvmppc_set_vsx_vr(vcpu, index - 32, &val.vval);
+ } else {
+ val.vsx32val[0] = gpr;
+ val.vsx32val[1] = gpr;
+ kvmppc_set_vsx_fpr(vcpu, index, 0, val.vsxval[0]);
+ kvmppc_set_vsx_fpr(vcpu, index, 1, val.vsxval[0]);
+ }
+}
+
+static inline void kvmppc_set_vsr_word(struct kvm_vcpu *vcpu,
+ u32 gpr32)
+{
+ union kvmppc_one_reg val;
+ int offset = kvmppc_get_vsr_word_offset(vcpu->arch.mmio_vsx_offset);
+ int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
+ int dword_offset, word_offset;
+
+ if (offset == -1)
return;
+
+ if (index >= 32) {
+ kvmppc_get_vsx_vr(vcpu, index - 32, &val.vval);
+ val.vsx32val[offset] = gpr32;
+ kvmppc_set_vsx_vr(vcpu, index - 32, &val.vval);
+ } else {
+ dword_offset = offset / 2;
+ word_offset = offset % 2;
+ val.vsxval[0] = kvmppc_get_vsx_fpr(vcpu, index, dword_offset);
+ val.vsx32val[word_offset] = gpr32;
+ kvmppc_set_vsx_fpr(vcpu, index, dword_offset, val.vsxval[0]);
}
+}
+#endif /* CONFIG_VSX */
+
+#ifdef CONFIG_ALTIVEC
+static inline int kvmppc_get_vmx_offset_generic(struct kvm_vcpu *vcpu,
+ int index, int element_size)
+{
+ int offset;
+ int elts = sizeof(vector128)/element_size;
+
+ if ((index < 0) || (index >= elts))
+ return -1;
+
+ if (kvmppc_need_byteswap(vcpu))
+ offset = elts - index - 1;
+ else
+ offset = index;
+
+ return offset;
+}
+
+static inline int kvmppc_get_vmx_dword_offset(struct kvm_vcpu *vcpu,
+ int index)
+{
+ return kvmppc_get_vmx_offset_generic(vcpu, index, 8);
+}
+
+static inline int kvmppc_get_vmx_word_offset(struct kvm_vcpu *vcpu,
+ int index)
+{
+ return kvmppc_get_vmx_offset_generic(vcpu, index, 4);
+}
+
+static inline int kvmppc_get_vmx_hword_offset(struct kvm_vcpu *vcpu,
+ int index)
+{
+ return kvmppc_get_vmx_offset_generic(vcpu, index, 2);
+}
+
+static inline int kvmppc_get_vmx_byte_offset(struct kvm_vcpu *vcpu,
+ int index)
+{
+ return kvmppc_get_vmx_offset_generic(vcpu, index, 1);
+}
- if (vcpu->arch.mmio_is_bigendian) {
+
+static inline void kvmppc_set_vmx_dword(struct kvm_vcpu *vcpu,
+ u64 gpr)
+{
+ union kvmppc_one_reg val;
+ int offset = kvmppc_get_vmx_dword_offset(vcpu,
+ vcpu->arch.mmio_vmx_offset);
+ int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
+
+ if (offset == -1)
+ return;
+
+ kvmppc_get_vsx_vr(vcpu, index, &val.vval);
+ val.vsxval[offset] = gpr;
+ kvmppc_set_vsx_vr(vcpu, index, &val.vval);
+}
+
+static inline void kvmppc_set_vmx_word(struct kvm_vcpu *vcpu,
+ u32 gpr32)
+{
+ union kvmppc_one_reg val;
+ int offset = kvmppc_get_vmx_word_offset(vcpu,
+ vcpu->arch.mmio_vmx_offset);
+ int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
+
+ if (offset == -1)
+ return;
+
+ kvmppc_get_vsx_vr(vcpu, index, &val.vval);
+ val.vsx32val[offset] = gpr32;
+ kvmppc_set_vsx_vr(vcpu, index, &val.vval);
+}
+
+static inline void kvmppc_set_vmx_hword(struct kvm_vcpu *vcpu,
+ u16 gpr16)
+{
+ union kvmppc_one_reg val;
+ int offset = kvmppc_get_vmx_hword_offset(vcpu,
+ vcpu->arch.mmio_vmx_offset);
+ int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
+
+ if (offset == -1)
+ return;
+
+ kvmppc_get_vsx_vr(vcpu, index, &val.vval);
+ val.vsx16val[offset] = gpr16;
+ kvmppc_set_vsx_vr(vcpu, index, &val.vval);
+}
+
+static inline void kvmppc_set_vmx_byte(struct kvm_vcpu *vcpu,
+ u8 gpr8)
+{
+ union kvmppc_one_reg val;
+ int offset = kvmppc_get_vmx_byte_offset(vcpu,
+ vcpu->arch.mmio_vmx_offset);
+ int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
+
+ if (offset == -1)
+ return;
+
+ kvmppc_get_vsx_vr(vcpu, index, &val.vval);
+ val.vsx8val[offset] = gpr8;
+ kvmppc_set_vsx_vr(vcpu, index, &val.vval);
+}
+#endif /* CONFIG_ALTIVEC */
+
+#ifdef CONFIG_PPC_FPU
+static inline u64 sp_to_dp(u32 fprs)
+{
+ u64 fprd;
+
+ preempt_disable();
+ enable_kernel_fp();
+ asm ("lfs%U1%X1 0,%1; stfd%U0%X0 0,%0" : "=m<>" (fprd) : "m<>" (fprs)
+ : "fr0");
+ preempt_enable();
+ return fprd;
+}
+
+static inline u32 dp_to_sp(u64 fprd)
+{
+ u32 fprs;
+
+ preempt_disable();
+ enable_kernel_fp();
+ asm ("lfd%U1%X1 0,%1; stfs%U0%X0 0,%0" : "=m<>" (fprs) : "m<>" (fprd)
+ : "fr0");
+ preempt_enable();
+ return fprs;
+}
+
+#else
+#define sp_to_dp(x) (x)
+#define dp_to_sp(x) (x)
+#endif /* CONFIG_PPC_FPU */
+
+static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ u64 gpr;
+
+ if (run->mmio.len > sizeof(gpr))
+ return;
+
+ if (!vcpu->arch.mmio_host_swabbed) {
switch (run->mmio.len) {
case 8: gpr = *(u64 *)run->mmio.data; break;
case 4: gpr = *(u32 *)run->mmio.data; break;
@@ -761,14 +1146,18 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
case 1: gpr = *(u8 *)run->mmio.data; break;
}
} else {
- /* Convert BE data from userland back to LE. */
switch (run->mmio.len) {
- case 4: gpr = ld_le32((u32 *)run->mmio.data); break;
- case 2: gpr = ld_le16((u16 *)run->mmio.data); break;
+ case 8: gpr = swab64(*(u64 *)run->mmio.data); break;
+ case 4: gpr = swab32(*(u32 *)run->mmio.data); break;
+ case 2: gpr = swab16(*(u16 *)run->mmio.data); break;
case 1: gpr = *(u8 *)run->mmio.data; break;
}
}
+ /* conversion between single and double precision */
+ if ((vcpu->arch.mmio_sp64_extend) && (run->mmio.len == 4))
+ gpr = sp_to_dp(gpr);
+
if (vcpu->arch.mmio_sign_extend) {
switch (run->mmio.len) {
#ifdef CONFIG_PPC64
@@ -785,108 +1174,176 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
}
}
- kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr);
-
switch (vcpu->arch.io_gpr & KVM_MMIO_REG_EXT_MASK) {
case KVM_MMIO_REG_GPR:
kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr);
break;
case KVM_MMIO_REG_FPR:
- VCPU_FPR(vcpu, vcpu->arch.io_gpr & KVM_MMIO_REG_MASK) = gpr;
+ if (vcpu->kvm->arch.kvm_ops->giveup_ext)
+ vcpu->kvm->arch.kvm_ops->giveup_ext(vcpu, MSR_FP);
+
+ kvmppc_set_fpr(vcpu, vcpu->arch.io_gpr & KVM_MMIO_REG_MASK, gpr);
break;
#ifdef CONFIG_PPC_BOOK3S
case KVM_MMIO_REG_QPR:
vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr;
break;
case KVM_MMIO_REG_FQPR:
- VCPU_FPR(vcpu, vcpu->arch.io_gpr & KVM_MMIO_REG_MASK) = gpr;
+ kvmppc_set_fpr(vcpu, vcpu->arch.io_gpr & KVM_MMIO_REG_MASK, gpr);
vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr;
break;
#endif
+#ifdef CONFIG_VSX
+ case KVM_MMIO_REG_VSX:
+ if (vcpu->kvm->arch.kvm_ops->giveup_ext)
+ vcpu->kvm->arch.kvm_ops->giveup_ext(vcpu, MSR_VSX);
+
+ if (vcpu->arch.mmio_copy_type == KVMPPC_VSX_COPY_DWORD)
+ kvmppc_set_vsr_dword(vcpu, gpr);
+ else if (vcpu->arch.mmio_copy_type == KVMPPC_VSX_COPY_WORD)
+ kvmppc_set_vsr_word(vcpu, gpr);
+ else if (vcpu->arch.mmio_copy_type ==
+ KVMPPC_VSX_COPY_DWORD_LOAD_DUMP)
+ kvmppc_set_vsr_dword_dump(vcpu, gpr);
+ else if (vcpu->arch.mmio_copy_type ==
+ KVMPPC_VSX_COPY_WORD_LOAD_DUMP)
+ kvmppc_set_vsr_word_dump(vcpu, gpr);
+ break;
+#endif
+#ifdef CONFIG_ALTIVEC
+ case KVM_MMIO_REG_VMX:
+ if (vcpu->kvm->arch.kvm_ops->giveup_ext)
+ vcpu->kvm->arch.kvm_ops->giveup_ext(vcpu, MSR_VEC);
+
+ if (vcpu->arch.mmio_copy_type == KVMPPC_VMX_COPY_DWORD)
+ kvmppc_set_vmx_dword(vcpu, gpr);
+ else if (vcpu->arch.mmio_copy_type == KVMPPC_VMX_COPY_WORD)
+ kvmppc_set_vmx_word(vcpu, gpr);
+ else if (vcpu->arch.mmio_copy_type ==
+ KVMPPC_VMX_COPY_HWORD)
+ kvmppc_set_vmx_hword(vcpu, gpr);
+ else if (vcpu->arch.mmio_copy_type ==
+ KVMPPC_VMX_COPY_BYTE)
+ kvmppc_set_vmx_byte(vcpu, gpr);
+ break;
+#endif
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ case KVM_MMIO_REG_NESTED_GPR:
+ if (kvmppc_need_byteswap(vcpu))
+ gpr = swab64(gpr);
+ kvm_vcpu_write_guest(vcpu, vcpu->arch.nested_io_gpr, &gpr,
+ sizeof(gpr));
+ break;
+#endif
default:
BUG();
}
}
-int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
- unsigned int rt, unsigned int bytes,
- int is_default_endian)
+static int __kvmppc_handle_load(struct kvm_vcpu *vcpu,
+ unsigned int rt, unsigned int bytes,
+ int is_default_endian, int sign_extend)
{
+ struct kvm_run *run = vcpu->run;
int idx, ret;
- int is_bigendian;
+ bool host_swabbed;
+ /* Pity C doesn't have a logical XOR operator */
if (kvmppc_need_byteswap(vcpu)) {
- /* Default endianness is "little endian". */
- is_bigendian = !is_default_endian;
+ host_swabbed = is_default_endian;
} else {
- /* Default endianness is "big endian". */
- is_bigendian = is_default_endian;
+ host_swabbed = !is_default_endian;
}
- if (bytes > sizeof(run->mmio.data)) {
- printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__,
- run->mmio.len);
- }
+ if (bytes > sizeof(run->mmio.data))
+ return EMULATE_FAIL;
run->mmio.phys_addr = vcpu->arch.paddr_accessed;
run->mmio.len = bytes;
run->mmio.is_write = 0;
vcpu->arch.io_gpr = rt;
- vcpu->arch.mmio_is_bigendian = is_bigendian;
+ vcpu->arch.mmio_host_swabbed = host_swabbed;
vcpu->mmio_needed = 1;
vcpu->mmio_is_write = 0;
- vcpu->arch.mmio_sign_extend = 0;
+ vcpu->arch.mmio_sign_extend = sign_extend;
idx = srcu_read_lock(&vcpu->kvm->srcu);
- ret = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
+ ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, run->mmio.phys_addr,
bytes, &run->mmio.data);
srcu_read_unlock(&vcpu->kvm->srcu, idx);
if (!ret) {
- kvmppc_complete_mmio_load(vcpu, run);
+ kvmppc_complete_mmio_load(vcpu);
vcpu->mmio_needed = 0;
return EMULATE_DONE;
}
return EMULATE_DO_MMIO;
}
+
+int kvmppc_handle_load(struct kvm_vcpu *vcpu,
+ unsigned int rt, unsigned int bytes,
+ int is_default_endian)
+{
+ return __kvmppc_handle_load(vcpu, rt, bytes, is_default_endian, 0);
+}
EXPORT_SYMBOL_GPL(kvmppc_handle_load);
/* Same as above, but sign extends */
-int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu,
+int kvmppc_handle_loads(struct kvm_vcpu *vcpu,
unsigned int rt, unsigned int bytes,
int is_default_endian)
{
- int r;
+ return __kvmppc_handle_load(vcpu, rt, bytes, is_default_endian, 1);
+}
- vcpu->arch.mmio_sign_extend = 1;
- r = kvmppc_handle_load(run, vcpu, rt, bytes, is_default_endian);
+#ifdef CONFIG_VSX
+int kvmppc_handle_vsx_load(struct kvm_vcpu *vcpu,
+ unsigned int rt, unsigned int bytes,
+ int is_default_endian, int mmio_sign_extend)
+{
+ enum emulation_result emulated = EMULATE_DONE;
- return r;
+ /* Currently, mmio_vsx_copy_nums only allowed to be 4 or less */
+ if (vcpu->arch.mmio_vsx_copy_nums > 4)
+ return EMULATE_FAIL;
+
+ while (vcpu->arch.mmio_vsx_copy_nums) {
+ emulated = __kvmppc_handle_load(vcpu, rt, bytes,
+ is_default_endian, mmio_sign_extend);
+
+ if (emulated != EMULATE_DONE)
+ break;
+
+ vcpu->arch.paddr_accessed += vcpu->run->mmio.len;
+
+ vcpu->arch.mmio_vsx_copy_nums--;
+ vcpu->arch.mmio_vsx_offset++;
+ }
+ return emulated;
}
+#endif /* CONFIG_VSX */
-int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
+int kvmppc_handle_store(struct kvm_vcpu *vcpu,
u64 val, unsigned int bytes, int is_default_endian)
{
+ struct kvm_run *run = vcpu->run;
void *data = run->mmio.data;
int idx, ret;
- int is_bigendian;
+ bool host_swabbed;
+ /* Pity C doesn't have a logical XOR operator */
if (kvmppc_need_byteswap(vcpu)) {
- /* Default endianness is "little endian". */
- is_bigendian = !is_default_endian;
+ host_swabbed = is_default_endian;
} else {
- /* Default endianness is "big endian". */
- is_bigendian = is_default_endian;
+ host_swabbed = !is_default_endian;
}
- if (bytes > sizeof(run->mmio.data)) {
- printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__,
- run->mmio.len);
- }
+ if (bytes > sizeof(run->mmio.data))
+ return EMULATE_FAIL;
run->mmio.phys_addr = vcpu->arch.paddr_accessed;
run->mmio.len = bytes;
@@ -894,8 +1351,11 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
vcpu->mmio_needed = 1;
vcpu->mmio_is_write = 1;
+ if ((vcpu->arch.mmio_sp64_extend) && (bytes == 4))
+ val = dp_to_sp(val);
+
/* Store the value at the lowest bytes in 'data'. */
- if (is_bigendian) {
+ if (!host_swabbed) {
switch (bytes) {
case 8: *(u64 *)data = val; break;
case 4: *(u32 *)data = val; break;
@@ -903,17 +1363,17 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
case 1: *(u8 *)data = val; break;
}
} else {
- /* Store LE value into 'data'. */
switch (bytes) {
- case 4: st_le32(data, val); break;
- case 2: st_le16(data, val); break;
- case 1: *(u8 *)data = val; break;
+ case 8: *(u64 *)data = swab64(val); break;
+ case 4: *(u32 *)data = swab32(val); break;
+ case 2: *(u16 *)data = swab16(val); break;
+ case 1: *(u8 *)data = val; break;
}
}
idx = srcu_read_lock(&vcpu->kvm->srcu);
- ret = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
+ ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, run->mmio.phys_addr,
bytes, &run->mmio.data);
srcu_read_unlock(&vcpu->kvm->srcu, idx);
@@ -927,18 +1387,442 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
}
EXPORT_SYMBOL_GPL(kvmppc_handle_store);
-int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
+#ifdef CONFIG_VSX
+static inline int kvmppc_get_vsr_data(struct kvm_vcpu *vcpu, int rs, u64 *val)
+{
+ u32 dword_offset, word_offset;
+ union kvmppc_one_reg reg;
+ int vsx_offset = 0;
+ int copy_type = vcpu->arch.mmio_copy_type;
+ int result = 0;
+
+ switch (copy_type) {
+ case KVMPPC_VSX_COPY_DWORD:
+ vsx_offset =
+ kvmppc_get_vsr_dword_offset(vcpu->arch.mmio_vsx_offset);
+
+ if (vsx_offset == -1) {
+ result = -1;
+ break;
+ }
+
+ if (rs < 32) {
+ *val = kvmppc_get_vsx_fpr(vcpu, rs, vsx_offset);
+ } else {
+ kvmppc_get_vsx_vr(vcpu, rs - 32, &reg.vval);
+ *val = reg.vsxval[vsx_offset];
+ }
+ break;
+
+ case KVMPPC_VSX_COPY_WORD:
+ vsx_offset =
+ kvmppc_get_vsr_word_offset(vcpu->arch.mmio_vsx_offset);
+
+ if (vsx_offset == -1) {
+ result = -1;
+ break;
+ }
+
+ if (rs < 32) {
+ dword_offset = vsx_offset / 2;
+ word_offset = vsx_offset % 2;
+ reg.vsxval[0] = kvmppc_get_vsx_fpr(vcpu, rs, dword_offset);
+ *val = reg.vsx32val[word_offset];
+ } else {
+ kvmppc_get_vsx_vr(vcpu, rs - 32, &reg.vval);
+ *val = reg.vsx32val[vsx_offset];
+ }
+ break;
+
+ default:
+ result = -1;
+ break;
+ }
+
+ return result;
+}
+
+int kvmppc_handle_vsx_store(struct kvm_vcpu *vcpu,
+ int rs, unsigned int bytes, int is_default_endian)
{
+ u64 val;
+ enum emulation_result emulated = EMULATE_DONE;
+
+ vcpu->arch.io_gpr = rs;
+
+ /* Currently, mmio_vsx_copy_nums only allowed to be 4 or less */
+ if (vcpu->arch.mmio_vsx_copy_nums > 4)
+ return EMULATE_FAIL;
+
+ while (vcpu->arch.mmio_vsx_copy_nums) {
+ if (kvmppc_get_vsr_data(vcpu, rs, &val) == -1)
+ return EMULATE_FAIL;
+
+ emulated = kvmppc_handle_store(vcpu,
+ val, bytes, is_default_endian);
+
+ if (emulated != EMULATE_DONE)
+ break;
+
+ vcpu->arch.paddr_accessed += vcpu->run->mmio.len;
+
+ vcpu->arch.mmio_vsx_copy_nums--;
+ vcpu->arch.mmio_vsx_offset++;
+ }
+
+ return emulated;
+}
+
+static int kvmppc_emulate_mmio_vsx_loadstore(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ enum emulation_result emulated = EMULATE_FAIL;
int r;
- sigset_t sigsaved;
- if (vcpu->sigset_active)
- sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
+ vcpu->arch.paddr_accessed += run->mmio.len;
+
+ if (!vcpu->mmio_is_write) {
+ emulated = kvmppc_handle_vsx_load(vcpu, vcpu->arch.io_gpr,
+ run->mmio.len, 1, vcpu->arch.mmio_sign_extend);
+ } else {
+ emulated = kvmppc_handle_vsx_store(vcpu,
+ vcpu->arch.io_gpr, run->mmio.len, 1);
+ }
+
+ switch (emulated) {
+ case EMULATE_DO_MMIO:
+ run->exit_reason = KVM_EXIT_MMIO;
+ r = RESUME_HOST;
+ break;
+ case EMULATE_FAIL:
+ pr_info("KVM: MMIO emulation failed (VSX repeat)\n");
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+ r = RESUME_HOST;
+ break;
+ default:
+ r = RESUME_GUEST;
+ break;
+ }
+ return r;
+}
+#endif /* CONFIG_VSX */
+
+#ifdef CONFIG_ALTIVEC
+int kvmppc_handle_vmx_load(struct kvm_vcpu *vcpu,
+ unsigned int rt, unsigned int bytes, int is_default_endian)
+{
+ enum emulation_result emulated = EMULATE_DONE;
+
+ if (vcpu->arch.mmio_vmx_copy_nums > 2)
+ return EMULATE_FAIL;
+
+ while (vcpu->arch.mmio_vmx_copy_nums) {
+ emulated = __kvmppc_handle_load(vcpu, rt, bytes,
+ is_default_endian, 0);
+
+ if (emulated != EMULATE_DONE)
+ break;
+
+ vcpu->arch.paddr_accessed += vcpu->run->mmio.len;
+ vcpu->arch.mmio_vmx_copy_nums--;
+ vcpu->arch.mmio_vmx_offset++;
+ }
+
+ return emulated;
+}
+
+static int kvmppc_get_vmx_dword(struct kvm_vcpu *vcpu, int index, u64 *val)
+{
+ union kvmppc_one_reg reg;
+ int vmx_offset = 0;
+ int result = 0;
+
+ vmx_offset =
+ kvmppc_get_vmx_dword_offset(vcpu, vcpu->arch.mmio_vmx_offset);
+
+ if (vmx_offset == -1)
+ return -1;
+
+ kvmppc_get_vsx_vr(vcpu, index, &reg.vval);
+ *val = reg.vsxval[vmx_offset];
+
+ return result;
+}
+
+static int kvmppc_get_vmx_word(struct kvm_vcpu *vcpu, int index, u64 *val)
+{
+ union kvmppc_one_reg reg;
+ int vmx_offset = 0;
+ int result = 0;
+
+ vmx_offset =
+ kvmppc_get_vmx_word_offset(vcpu, vcpu->arch.mmio_vmx_offset);
+
+ if (vmx_offset == -1)
+ return -1;
+
+ kvmppc_get_vsx_vr(vcpu, index, &reg.vval);
+ *val = reg.vsx32val[vmx_offset];
+
+ return result;
+}
+
+static int kvmppc_get_vmx_hword(struct kvm_vcpu *vcpu, int index, u64 *val)
+{
+ union kvmppc_one_reg reg;
+ int vmx_offset = 0;
+ int result = 0;
+
+ vmx_offset =
+ kvmppc_get_vmx_hword_offset(vcpu, vcpu->arch.mmio_vmx_offset);
+
+ if (vmx_offset == -1)
+ return -1;
+
+ kvmppc_get_vsx_vr(vcpu, index, &reg.vval);
+ *val = reg.vsx16val[vmx_offset];
+
+ return result;
+}
+
+static int kvmppc_get_vmx_byte(struct kvm_vcpu *vcpu, int index, u64 *val)
+{
+ union kvmppc_one_reg reg;
+ int vmx_offset = 0;
+ int result = 0;
+
+ vmx_offset =
+ kvmppc_get_vmx_byte_offset(vcpu, vcpu->arch.mmio_vmx_offset);
+
+ if (vmx_offset == -1)
+ return -1;
+
+ kvmppc_get_vsx_vr(vcpu, index, &reg.vval);
+ *val = reg.vsx8val[vmx_offset];
+
+ return result;
+}
+
+int kvmppc_handle_vmx_store(struct kvm_vcpu *vcpu,
+ unsigned int rs, unsigned int bytes, int is_default_endian)
+{
+ u64 val = 0;
+ unsigned int index = rs & KVM_MMIO_REG_MASK;
+ enum emulation_result emulated = EMULATE_DONE;
+
+ if (vcpu->arch.mmio_vmx_copy_nums > 2)
+ return EMULATE_FAIL;
+
+ vcpu->arch.io_gpr = rs;
+
+ while (vcpu->arch.mmio_vmx_copy_nums) {
+ switch (vcpu->arch.mmio_copy_type) {
+ case KVMPPC_VMX_COPY_DWORD:
+ if (kvmppc_get_vmx_dword(vcpu, index, &val) == -1)
+ return EMULATE_FAIL;
+
+ break;
+ case KVMPPC_VMX_COPY_WORD:
+ if (kvmppc_get_vmx_word(vcpu, index, &val) == -1)
+ return EMULATE_FAIL;
+ break;
+ case KVMPPC_VMX_COPY_HWORD:
+ if (kvmppc_get_vmx_hword(vcpu, index, &val) == -1)
+ return EMULATE_FAIL;
+ break;
+ case KVMPPC_VMX_COPY_BYTE:
+ if (kvmppc_get_vmx_byte(vcpu, index, &val) == -1)
+ return EMULATE_FAIL;
+ break;
+ default:
+ return EMULATE_FAIL;
+ }
+
+ emulated = kvmppc_handle_store(vcpu, val, bytes,
+ is_default_endian);
+ if (emulated != EMULATE_DONE)
+ break;
+
+ vcpu->arch.paddr_accessed += vcpu->run->mmio.len;
+ vcpu->arch.mmio_vmx_copy_nums--;
+ vcpu->arch.mmio_vmx_offset++;
+ }
+
+ return emulated;
+}
+
+static int kvmppc_emulate_mmio_vmx_loadstore(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ enum emulation_result emulated = EMULATE_FAIL;
+ int r;
+
+ vcpu->arch.paddr_accessed += run->mmio.len;
+
+ if (!vcpu->mmio_is_write) {
+ emulated = kvmppc_handle_vmx_load(vcpu,
+ vcpu->arch.io_gpr, run->mmio.len, 1);
+ } else {
+ emulated = kvmppc_handle_vmx_store(vcpu,
+ vcpu->arch.io_gpr, run->mmio.len, 1);
+ }
+
+ switch (emulated) {
+ case EMULATE_DO_MMIO:
+ run->exit_reason = KVM_EXIT_MMIO;
+ r = RESUME_HOST;
+ break;
+ case EMULATE_FAIL:
+ pr_info("KVM: MMIO emulation failed (VMX repeat)\n");
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+ r = RESUME_HOST;
+ break;
+ default:
+ r = RESUME_GUEST;
+ break;
+ }
+ return r;
+}
+#endif /* CONFIG_ALTIVEC */
+
+int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+{
+ int r = 0;
+ union kvmppc_one_reg val;
+ int size;
+
+ size = one_reg_size(reg->id);
+ if (size > sizeof(val))
+ return -EINVAL;
+
+ r = kvmppc_get_one_reg(vcpu, reg->id, &val);
+ if (r == -EINVAL) {
+ r = 0;
+ switch (reg->id) {
+#ifdef CONFIG_ALTIVEC
+ case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
+ if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+ r = -ENXIO;
+ break;
+ }
+ kvmppc_get_vsx_vr(vcpu, reg->id - KVM_REG_PPC_VR0, &val.vval);
+ break;
+ case KVM_REG_PPC_VSCR:
+ if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+ r = -ENXIO;
+ break;
+ }
+ val = get_reg_val(reg->id, kvmppc_get_vscr(vcpu));
+ break;
+ case KVM_REG_PPC_VRSAVE:
+ val = get_reg_val(reg->id, kvmppc_get_vrsave(vcpu));
+ break;
+#endif /* CONFIG_ALTIVEC */
+ default:
+ r = -EINVAL;
+ break;
+ }
+ }
+
+ if (r)
+ return r;
+
+ if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size))
+ r = -EFAULT;
+
+ return r;
+}
+
+int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+{
+ int r;
+ union kvmppc_one_reg val;
+ int size;
+
+ size = one_reg_size(reg->id);
+ if (size > sizeof(val))
+ return -EINVAL;
+
+ if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size))
+ return -EFAULT;
+
+ r = kvmppc_set_one_reg(vcpu, reg->id, &val);
+ if (r == -EINVAL) {
+ r = 0;
+ switch (reg->id) {
+#ifdef CONFIG_ALTIVEC
+ case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
+ if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+ r = -ENXIO;
+ break;
+ }
+ kvmppc_set_vsx_vr(vcpu, reg->id - KVM_REG_PPC_VR0, &val.vval);
+ break;
+ case KVM_REG_PPC_VSCR:
+ if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+ r = -ENXIO;
+ break;
+ }
+ kvmppc_set_vscr(vcpu, set_reg_val(reg->id, val));
+ break;
+ case KVM_REG_PPC_VRSAVE:
+ if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+ r = -ENXIO;
+ break;
+ }
+ kvmppc_set_vrsave(vcpu, set_reg_val(reg->id, val));
+ break;
+#endif /* CONFIG_ALTIVEC */
+ default:
+ r = -EINVAL;
+ break;
+ }
+ }
+
+ return r;
+}
+
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ int r;
+
+ vcpu_load(vcpu);
if (vcpu->mmio_needed) {
- if (!vcpu->mmio_is_write)
- kvmppc_complete_mmio_load(vcpu, run);
vcpu->mmio_needed = 0;
+ if (!vcpu->mmio_is_write)
+ kvmppc_complete_mmio_load(vcpu);
+#ifdef CONFIG_VSX
+ if (vcpu->arch.mmio_vsx_copy_nums > 0) {
+ vcpu->arch.mmio_vsx_copy_nums--;
+ vcpu->arch.mmio_vsx_offset++;
+ }
+
+ if (vcpu->arch.mmio_vsx_copy_nums > 0) {
+ r = kvmppc_emulate_mmio_vsx_loadstore(vcpu);
+ if (r == RESUME_HOST) {
+ vcpu->mmio_needed = 1;
+ goto out;
+ }
+ }
+#endif
+#ifdef CONFIG_ALTIVEC
+ if (vcpu->arch.mmio_vmx_copy_nums > 0) {
+ vcpu->arch.mmio_vmx_copy_nums--;
+ vcpu->arch.mmio_vmx_offset++;
+ }
+
+ if (vcpu->arch.mmio_vmx_copy_nums > 0) {
+ r = kvmppc_emulate_mmio_vmx_loadstore(vcpu);
+ if (r == RESUME_HOST) {
+ vcpu->mmio_needed = 1;
+ goto out;
+ }
+ }
+#endif
} else if (vcpu->arch.osi_needed) {
u64 *gprs = run->osi.gprs;
int i;
@@ -960,11 +1844,27 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
#endif
}
- r = kvmppc_vcpu_run(run, vcpu);
+ kvm_sigset_activate(vcpu);
+
+ if (!vcpu->wants_to_run)
+ r = -EINTR;
+ else
+ r = kvmppc_vcpu_run(vcpu);
- if (vcpu->sigset_active)
- sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+ kvm_sigset_deactivate(vcpu);
+#ifdef CONFIG_ALTIVEC
+out:
+#endif
+
+ /*
+ * We're already returning to userspace, don't pass the
+ * RESUME_HOST flags along.
+ */
+ if (r > 0)
+ r = 0;
+
+ vcpu_put(vcpu);
return r;
}
@@ -1027,42 +1927,71 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
#endif
#ifdef CONFIG_KVM_MPIC
case KVM_CAP_IRQ_MPIC: {
- struct fd f;
+ CLASS(fd, f)(cap->args[0]);
struct kvm_device *dev;
r = -EBADF;
- f = fdget(cap->args[0]);
- if (!f.file)
+ if (fd_empty(f))
break;
r = -EPERM;
- dev = kvm_device_from_filp(f.file);
+ dev = kvm_device_from_filp(fd_file(f));
if (dev)
r = kvmppc_mpic_connect_vcpu(dev, vcpu, cap->args[1]);
- fdput(f);
break;
}
#endif
#ifdef CONFIG_KVM_XICS
case KVM_CAP_IRQ_XICS: {
- struct fd f;
+ CLASS(fd, f)(cap->args[0]);
struct kvm_device *dev;
r = -EBADF;
- f = fdget(cap->args[0]);
- if (!f.file)
+ if (fd_empty(f))
break;
r = -EPERM;
- dev = kvm_device_from_filp(f.file);
- if (dev)
- r = kvmppc_xics_connect_vcpu(dev, vcpu, cap->args[1]);
-
- fdput(f);
+ dev = kvm_device_from_filp(fd_file(f));
+ if (dev) {
+ if (xics_on_xive())
+ r = kvmppc_xive_connect_vcpu(dev, vcpu, cap->args[1]);
+ else
+ r = kvmppc_xics_connect_vcpu(dev, vcpu, cap->args[1]);
+ }
break;
}
#endif /* CONFIG_KVM_XICS */
+#ifdef CONFIG_KVM_XIVE
+ case KVM_CAP_PPC_IRQ_XIVE: {
+ CLASS(fd, f)(cap->args[0]);
+ struct kvm_device *dev;
+
+ r = -EBADF;
+ if (fd_empty(f))
+ break;
+
+ r = -ENXIO;
+ if (!xive_enabled())
+ break;
+
+ r = -EPERM;
+ dev = kvm_device_from_filp(fd_file(f));
+ if (dev)
+ r = kvmppc_xive_native_connect_vcpu(dev, vcpu,
+ cap->args[1]);
+ break;
+ }
+#endif /* CONFIG_KVM_XIVE */
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ case KVM_CAP_PPC_FWNMI:
+ r = -EINVAL;
+ if (!is_kvmppc_hv_enabled(vcpu->kvm))
+ break;
+ r = 0;
+ vcpu->kvm->arch.fwnmi_enabled = true;
+ break;
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
default:
r = -EINVAL;
break;
@@ -1074,6 +2003,19 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
return r;
}
+bool kvm_arch_intc_initialized(struct kvm *kvm)
+{
+#ifdef CONFIG_KVM_MPIC
+ if (kvm->arch.mpic)
+ return true;
+#endif
+#ifdef CONFIG_KVM_XICS
+ if (kvm->arch.xics || kvm->arch.xive)
+ return true;
+#endif
+ return false;
+}
+
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
@@ -1086,30 +2028,38 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
return -EINVAL;
}
-long kvm_arch_vcpu_ioctl(struct file *filp,
- unsigned int ioctl, unsigned long arg)
+long kvm_arch_vcpu_async_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
{
struct kvm_vcpu *vcpu = filp->private_data;
void __user *argp = (void __user *)arg;
- long r;
- switch (ioctl) {
- case KVM_INTERRUPT: {
+ if (ioctl == KVM_INTERRUPT) {
struct kvm_interrupt irq;
- r = -EFAULT;
if (copy_from_user(&irq, argp, sizeof(irq)))
- goto out;
- r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
- goto out;
+ return -EFAULT;
+ return kvm_vcpu_ioctl_interrupt(vcpu, &irq);
}
+ return -ENOIOCTLCMD;
+}
+
+long kvm_arch_vcpu_ioctl(struct file *filp,
+ unsigned int ioctl, unsigned long arg)
+{
+ struct kvm_vcpu *vcpu = filp->private_data;
+ void __user *argp = (void __user *)arg;
+ long r;
+ switch (ioctl) {
case KVM_ENABLE_CAP:
{
struct kvm_enable_cap cap;
r = -EFAULT;
if (copy_from_user(&cap, argp, sizeof(cap)))
goto out;
+ vcpu_load(vcpu);
r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
+ vcpu_put(vcpu);
break;
}
@@ -1133,7 +2083,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = -EFAULT;
if (copy_from_user(&dirty, argp, sizeof(dirty)))
goto out;
+ vcpu_load(vcpu);
r = kvm_vcpu_ioctl_dirty_tlb(vcpu, &dirty);
+ vcpu_put(vcpu);
break;
}
#endif
@@ -1145,7 +2097,7 @@ out:
return r;
}
-int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
+vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
{
return VM_FAULT_SIGBUS;
}
@@ -1185,10 +2137,25 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)
return 0;
}
+bool kvm_arch_irqchip_in_kernel(struct kvm *kvm)
+{
+ int ret = 0;
+
+#ifdef CONFIG_KVM_MPIC
+ ret = ret || (kvm->arch.mpic != NULL);
+#endif
+#ifdef CONFIG_KVM_XICS
+ ret = ret || (kvm->arch.xics != NULL);
+ ret = ret || (kvm->arch.xive != NULL);
+#endif
+ smp_rmb();
+ return ret;
+}
+
int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
bool line_status)
{
- if (!irqchip_in_kernel(kvm))
+ if (!kvm_arch_irqchip_in_kernel(kvm))
return -ENXIO;
irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
@@ -1198,8 +2165,8 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
}
-static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
- struct kvm_enable_cap *cap)
+int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
+ struct kvm_enable_cap *cap)
{
int r;
@@ -1224,6 +2191,37 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
r = 0;
break;
}
+ case KVM_CAP_PPC_SMT: {
+ unsigned long mode = cap->args[0];
+ unsigned long flags = cap->args[1];
+
+ r = -EINVAL;
+ if (kvm->arch.kvm_ops->set_smt_mode)
+ r = kvm->arch.kvm_ops->set_smt_mode(kvm, mode, flags);
+ break;
+ }
+
+ case KVM_CAP_PPC_NESTED_HV:
+ r = -EINVAL;
+ if (!is_kvmppc_hv_enabled(kvm) ||
+ !kvm->arch.kvm_ops->enable_nested)
+ break;
+ r = kvm->arch.kvm_ops->enable_nested(kvm);
+ break;
+#endif
+#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE)
+ case KVM_CAP_PPC_SECURE_GUEST:
+ r = -EINVAL;
+ if (!is_kvmppc_hv_enabled(kvm) || !kvm->arch.kvm_ops->enable_svm)
+ break;
+ r = kvm->arch.kvm_ops->enable_svm(kvm);
+ break;
+ case KVM_CAP_PPC_DAWR1:
+ r = -EINVAL;
+ if (!is_kvmppc_hv_enabled(kvm) || !kvm->arch.kvm_ops->enable_dawr1)
+ break;
+ r = kvm->arch.kvm_ops->enable_dawr1(kvm);
+ break;
#endif
default:
r = -EINVAL;
@@ -1233,12 +2231,139 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
return r;
}
-long kvm_arch_vm_ioctl(struct file *filp,
- unsigned int ioctl, unsigned long arg)
+#ifdef CONFIG_PPC_BOOK3S_64
+/*
+ * These functions check whether the underlying hardware is safe
+ * against attacks based on observing the effects of speculatively
+ * executed instructions, and whether it supplies instructions for
+ * use in workarounds. The information comes from firmware, either
+ * via the device tree on powernv platforms or from an hcall on
+ * pseries platforms.
+ */
+#ifdef CONFIG_PPC_PSERIES
+static int pseries_get_cpu_char(struct kvm_ppc_cpu_char *cp)
+{
+ struct h_cpu_char_result c;
+ unsigned long rc;
+
+ if (!machine_is(pseries))
+ return -ENOTTY;
+
+ rc = plpar_get_cpu_characteristics(&c);
+ if (rc == H_SUCCESS) {
+ cp->character = c.character;
+ cp->behaviour = c.behaviour;
+ cp->character_mask = KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31 |
+ KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED |
+ KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30 |
+ KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2 |
+ KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV |
+ KVM_PPC_CPU_CHAR_BR_HINT_HONOURED |
+ KVM_PPC_CPU_CHAR_MTTRIG_THR_RECONF |
+ KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS |
+ KVM_PPC_CPU_CHAR_BCCTR_FLUSH_ASSIST;
+ cp->behaviour_mask = KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY |
+ KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR |
+ KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR |
+ KVM_PPC_CPU_BEHAV_FLUSH_COUNT_CACHE;
+ }
+ return 0;
+}
+#else
+static int pseries_get_cpu_char(struct kvm_ppc_cpu_char *cp)
+{
+ return -ENOTTY;
+}
+#endif
+
+static inline bool have_fw_feat(struct device_node *fw_features,
+ const char *state, const char *name)
+{
+ struct device_node *np;
+ bool r = false;
+
+ np = of_get_child_by_name(fw_features, name);
+ if (np) {
+ r = of_property_read_bool(np, state);
+ of_node_put(np);
+ }
+ return r;
+}
+
+static int kvmppc_get_cpu_char(struct kvm_ppc_cpu_char *cp)
+{
+ struct device_node *np, *fw_features;
+ int r;
+
+ memset(cp, 0, sizeof(*cp));
+ r = pseries_get_cpu_char(cp);
+ if (r != -ENOTTY)
+ return r;
+
+ np = of_find_node_by_name(NULL, "ibm,opal");
+ if (np) {
+ fw_features = of_get_child_by_name(np, "fw-features");
+ of_node_put(np);
+ if (!fw_features)
+ return 0;
+ if (have_fw_feat(fw_features, "enabled",
+ "inst-spec-barrier-ori31,31,0"))
+ cp->character |= KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31;
+ if (have_fw_feat(fw_features, "enabled",
+ "fw-bcctrl-serialized"))
+ cp->character |= KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED;
+ if (have_fw_feat(fw_features, "enabled",
+ "inst-l1d-flush-ori30,30,0"))
+ cp->character |= KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30;
+ if (have_fw_feat(fw_features, "enabled",
+ "inst-l1d-flush-trig2"))
+ cp->character |= KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2;
+ if (have_fw_feat(fw_features, "enabled",
+ "fw-l1d-thread-split"))
+ cp->character |= KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV;
+ if (have_fw_feat(fw_features, "enabled",
+ "fw-count-cache-disabled"))
+ cp->character |= KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS;
+ if (have_fw_feat(fw_features, "enabled",
+ "fw-count-cache-flush-bcctr2,0,0"))
+ cp->character |= KVM_PPC_CPU_CHAR_BCCTR_FLUSH_ASSIST;
+ cp->character_mask = KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31 |
+ KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED |
+ KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30 |
+ KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2 |
+ KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV |
+ KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS |
+ KVM_PPC_CPU_CHAR_BCCTR_FLUSH_ASSIST;
+
+ if (have_fw_feat(fw_features, "enabled",
+ "speculation-policy-favor-security"))
+ cp->behaviour |= KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY;
+ if (!have_fw_feat(fw_features, "disabled",
+ "needs-l1d-flush-msr-pr-0-to-1"))
+ cp->behaviour |= KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR;
+ if (!have_fw_feat(fw_features, "disabled",
+ "needs-spec-barrier-for-bound-checks"))
+ cp->behaviour |= KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR;
+ if (have_fw_feat(fw_features, "enabled",
+ "needs-count-cache-flush-on-context-switch"))
+ cp->behaviour |= KVM_PPC_CPU_BEHAV_FLUSH_COUNT_CACHE;
+ cp->behaviour_mask = KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY |
+ KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR |
+ KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR |
+ KVM_PPC_CPU_BEHAV_FLUSH_COUNT_CACHE;
+
+ of_node_put(fw_features);
+ }
+
+ return 0;
+}
+#endif
+
+int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
{
struct kvm *kvm __maybe_unused = filp->private_data;
void __user *argp = (void __user *)arg;
- long r;
+ int r;
switch (ioctl) {
case KVM_PPC_GET_PVINFO: {
@@ -1252,25 +2377,39 @@ long kvm_arch_vm_ioctl(struct file *filp,
break;
}
- case KVM_ENABLE_CAP:
- {
- struct kvm_enable_cap cap;
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+ case KVM_CREATE_SPAPR_TCE_64: {
+ struct kvm_create_spapr_tce_64 create_tce_64;
+
r = -EFAULT;
- if (copy_from_user(&cap, argp, sizeof(cap)))
+ if (copy_from_user(&create_tce_64, argp, sizeof(create_tce_64)))
goto out;
- r = kvm_vm_ioctl_enable_cap(kvm, &cap);
- break;
+ if (create_tce_64.flags) {
+ r = -EINVAL;
+ goto out;
+ }
+ r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce_64);
+ goto out;
}
-#ifdef CONFIG_PPC_BOOK3S_64
case KVM_CREATE_SPAPR_TCE: {
struct kvm_create_spapr_tce create_tce;
+ struct kvm_create_spapr_tce_64 create_tce_64;
r = -EFAULT;
if (copy_from_user(&create_tce, argp, sizeof(create_tce)))
goto out;
- r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce);
+
+ create_tce_64.liobn = create_tce.liobn;
+ create_tce_64.page_shift = IOMMU_PAGE_SHIFT_4K;
+ create_tce_64.offset = 0;
+ create_tce_64.size = create_tce.window_size >>
+ IOMMU_PAGE_SHIFT_4K;
+ create_tce_64.flags = 0;
+ r = kvm_vm_ioctl_create_spapr_tce(kvm, &create_tce_64);
goto out;
}
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
case KVM_PPC_GET_SMMU_INFO: {
struct kvm_ppc_smmu_info info;
struct kvm *kvm = filp->private_data;
@@ -1287,6 +2426,49 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = kvm_vm_ioctl_rtas_define_token(kvm, argp);
break;
}
+ case KVM_PPC_CONFIGURE_V3_MMU: {
+ struct kvm *kvm = filp->private_data;
+ struct kvm_ppc_mmuv3_cfg cfg;
+
+ r = -EINVAL;
+ if (!kvm->arch.kvm_ops->configure_mmu)
+ goto out;
+ r = -EFAULT;
+ if (copy_from_user(&cfg, argp, sizeof(cfg)))
+ goto out;
+ r = kvm->arch.kvm_ops->configure_mmu(kvm, &cfg);
+ break;
+ }
+ case KVM_PPC_GET_RMMU_INFO: {
+ struct kvm *kvm = filp->private_data;
+ struct kvm_ppc_rmmu_info info;
+
+ r = -EINVAL;
+ if (!kvm->arch.kvm_ops->get_rmmu_info)
+ goto out;
+ r = kvm->arch.kvm_ops->get_rmmu_info(kvm, &info);
+ if (r >= 0 && copy_to_user(argp, &info, sizeof(info)))
+ r = -EFAULT;
+ break;
+ }
+ case KVM_PPC_GET_CPU_CHAR: {
+ struct kvm_ppc_cpu_char cpuchar;
+
+ r = kvmppc_get_cpu_char(&cpuchar);
+ if (r >= 0 && copy_to_user(argp, &cpuchar, sizeof(cpuchar)))
+ r = -EFAULT;
+ break;
+ }
+ case KVM_PPC_SVM_OFF: {
+ struct kvm *kvm = filp->private_data;
+
+ r = 0;
+ if (!kvm->arch.kvm_ops->svm_off)
+ goto out;
+
+ r = kvm->arch.kvm_ops->svm_off(kvm);
+ break;
+ }
default: {
struct kvm *kvm = filp->private_data;
r = kvm->arch.kvm_ops->arch_vm_ioctl(filp, ioctl, arg);
@@ -1300,52 +2482,50 @@ out:
return r;
}
-static unsigned long lpid_inuse[BITS_TO_LONGS(KVMPPC_NR_LPIDS)];
+static DEFINE_IDA(lpid_inuse);
static unsigned long nr_lpids;
long kvmppc_alloc_lpid(void)
{
- long lpid;
+ int lpid;
- do {
- lpid = find_first_zero_bit(lpid_inuse, KVMPPC_NR_LPIDS);
- if (lpid >= nr_lpids) {
+ /* The host LPID must always be 0 (allocation starts at 1) */
+ lpid = ida_alloc_range(&lpid_inuse, 1, nr_lpids - 1, GFP_KERNEL);
+ if (lpid < 0) {
+ if (lpid == -ENOMEM)
+ pr_err("%s: Out of memory\n", __func__);
+ else
pr_err("%s: No LPIDs free\n", __func__);
- return -ENOMEM;
- }
- } while (test_and_set_bit(lpid, lpid_inuse));
+ return -ENOMEM;
+ }
return lpid;
}
EXPORT_SYMBOL_GPL(kvmppc_alloc_lpid);
-void kvmppc_claim_lpid(long lpid)
-{
- set_bit(lpid, lpid_inuse);
-}
-EXPORT_SYMBOL_GPL(kvmppc_claim_lpid);
-
void kvmppc_free_lpid(long lpid)
{
- clear_bit(lpid, lpid_inuse);
+ ida_free(&lpid_inuse, lpid);
}
EXPORT_SYMBOL_GPL(kvmppc_free_lpid);
+/* nr_lpids_param includes the host LPID */
void kvmppc_init_lpid(unsigned long nr_lpids_param)
{
- nr_lpids = min_t(unsigned long, KVMPPC_NR_LPIDS, nr_lpids_param);
- memset(lpid_inuse, 0, sizeof(lpid_inuse));
+ nr_lpids = nr_lpids_param;
}
EXPORT_SYMBOL_GPL(kvmppc_init_lpid);
-int kvm_arch_init(void *opaque)
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ppc_instr);
+
+void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry)
{
- return 0;
+ if (vcpu->kvm->arch.kvm_ops->create_vcpu_debugfs)
+ vcpu->kvm->arch.kvm_ops->create_vcpu_debugfs(vcpu, debugfs_dentry);
}
-void kvm_arch_exit(void)
+void kvm_arch_create_vm_debugfs(struct kvm *kvm)
{
-
+ if (kvm->arch.kvm_ops->create_vm_debugfs)
+ kvm->arch.kvm_ops->create_vm_debugfs(kvm);
}
-
-EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_ppc_instr);
diff --git a/arch/powerpc/kvm/test-guest-state-buffer.c b/arch/powerpc/kvm/test-guest-state-buffer.c
new file mode 100644
index 000000000000..5ccca306997a
--- /dev/null
+++ b/arch/powerpc/kvm/test-guest-state-buffer.c
@@ -0,0 +1,543 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/init.h>
+#include <linux/log2.h>
+#include <kunit/test.h>
+
+#include <asm/guest-state-buffer.h>
+#include <asm/kvm_ppc.h>
+
+static void test_creating_buffer(struct kunit *test)
+{
+ struct kvmppc_gs_buff *gsb;
+ size_t size = 0x100;
+
+ gsb = kvmppc_gsb_new(size, 0, 0, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gsb);
+
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gsb->hdr);
+
+ KUNIT_EXPECT_EQ(test, gsb->capacity, roundup_pow_of_two(size));
+ KUNIT_EXPECT_EQ(test, gsb->len, sizeof(__be32));
+
+ kvmppc_gsb_free(gsb);
+}
+
+static void test_adding_element(struct kunit *test)
+{
+ const struct kvmppc_gs_elem *head, *curr;
+ union {
+ __vector128 v;
+ u64 dw[2];
+ } u;
+ int rem;
+ struct kvmppc_gs_buff *gsb;
+ size_t size = 0x1000;
+ int i, rc;
+ u64 data;
+
+ gsb = kvmppc_gsb_new(size, 0, 0, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gsb);
+
+ /* Single elements, direct use of __kvmppc_gse_put() */
+ data = 0xdeadbeef;
+ rc = __kvmppc_gse_put(gsb, KVMPPC_GSID_GPR(0), 8, &data);
+ KUNIT_EXPECT_GE(test, rc, 0);
+
+ head = kvmppc_gsb_data(gsb);
+ KUNIT_EXPECT_EQ(test, kvmppc_gse_iden(head), KVMPPC_GSID_GPR(0));
+ KUNIT_EXPECT_EQ(test, kvmppc_gse_len(head), 8);
+ data = 0;
+ memcpy(&data, kvmppc_gse_data(head), 8);
+ KUNIT_EXPECT_EQ(test, data, 0xdeadbeef);
+
+ /* Multiple elements, simple wrapper */
+ rc = kvmppc_gse_put_u64(gsb, KVMPPC_GSID_GPR(1), 0xcafef00d);
+ KUNIT_EXPECT_GE(test, rc, 0);
+
+ u.dw[0] = 0x1;
+ u.dw[1] = 0x2;
+ rc = kvmppc_gse_put_vector128(gsb, KVMPPC_GSID_VSRS(0), &u.v);
+ KUNIT_EXPECT_GE(test, rc, 0);
+ u.dw[0] = 0x0;
+ u.dw[1] = 0x0;
+
+ kvmppc_gsb_for_each_elem(i, curr, gsb, rem) {
+ switch (i) {
+ case 0:
+ KUNIT_EXPECT_EQ(test, kvmppc_gse_iden(curr),
+ KVMPPC_GSID_GPR(0));
+ KUNIT_EXPECT_EQ(test, kvmppc_gse_len(curr), 8);
+ KUNIT_EXPECT_EQ(test, kvmppc_gse_get_be64(curr),
+ 0xdeadbeef);
+ break;
+ case 1:
+ KUNIT_EXPECT_EQ(test, kvmppc_gse_iden(curr),
+ KVMPPC_GSID_GPR(1));
+ KUNIT_EXPECT_EQ(test, kvmppc_gse_len(curr), 8);
+ KUNIT_EXPECT_EQ(test, kvmppc_gse_get_u64(curr),
+ 0xcafef00d);
+ break;
+ case 2:
+ KUNIT_EXPECT_EQ(test, kvmppc_gse_iden(curr),
+ KVMPPC_GSID_VSRS(0));
+ KUNIT_EXPECT_EQ(test, kvmppc_gse_len(curr), 16);
+ kvmppc_gse_get_vector128(curr, &u.v);
+ KUNIT_EXPECT_EQ(test, u.dw[0], 0x1);
+ KUNIT_EXPECT_EQ(test, u.dw[1], 0x2);
+ break;
+ }
+ }
+ KUNIT_EXPECT_EQ(test, i, 3);
+
+ kvmppc_gsb_reset(gsb);
+ KUNIT_EXPECT_EQ(test, kvmppc_gsb_nelems(gsb), 0);
+ KUNIT_EXPECT_EQ(test, kvmppc_gsb_len(gsb),
+ sizeof(struct kvmppc_gs_header));
+
+ kvmppc_gsb_free(gsb);
+}
+
+static void test_gs_parsing(struct kunit *test)
+{
+ struct kvmppc_gs_elem *gse;
+ struct kvmppc_gs_parser gsp = { 0 };
+ struct kvmppc_gs_buff *gsb;
+ size_t size = 0x1000;
+ u64 tmp1, tmp2;
+
+ gsb = kvmppc_gsb_new(size, 0, 0, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gsb);
+
+ tmp1 = 0xdeadbeefull;
+ kvmppc_gse_put_u64(gsb, KVMPPC_GSID_GPR(0), tmp1);
+
+ KUNIT_EXPECT_GE(test, kvmppc_gse_parse(&gsp, gsb), 0);
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_GPR(0));
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gse);
+
+ tmp2 = kvmppc_gse_get_u64(gse);
+ KUNIT_EXPECT_EQ(test, tmp2, 0xdeadbeefull);
+
+ kvmppc_gsb_free(gsb);
+}
+
+static void test_gs_bitmap(struct kunit *test)
+{
+ struct kvmppc_gs_bitmap gsbm = { 0 };
+ struct kvmppc_gs_bitmap gsbm1 = { 0 };
+ struct kvmppc_gs_bitmap gsbm2 = { 0 };
+ u16 iden;
+ int i, j;
+
+ i = 0;
+ for (u16 iden = KVMPPC_GSID_HOST_STATE_SIZE;
+ iden <= KVMPPC_GSID_PROCESS_TABLE; iden++) {
+ kvmppc_gsbm_set(&gsbm, iden);
+ kvmppc_gsbm_set(&gsbm1, iden);
+ KUNIT_EXPECT_TRUE(test, kvmppc_gsbm_test(&gsbm, iden));
+ kvmppc_gsbm_clear(&gsbm, iden);
+ KUNIT_EXPECT_FALSE(test, kvmppc_gsbm_test(&gsbm, iden));
+ i++;
+ }
+
+ for (u16 iden = KVMPPC_GSID_L0_GUEST_HEAP;
+ iden <= KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM; iden++) {
+ kvmppc_gsbm_set(&gsbm, iden);
+ kvmppc_gsbm_set(&gsbm1, iden);
+ KUNIT_EXPECT_TRUE(test, kvmppc_gsbm_test(&gsbm, iden));
+ kvmppc_gsbm_clear(&gsbm, iden);
+ KUNIT_EXPECT_FALSE(test, kvmppc_gsbm_test(&gsbm, iden));
+ i++;
+ }
+
+ for (u16 iden = KVMPPC_GSID_RUN_INPUT; iden <= KVMPPC_GSID_VPA;
+ iden++) {
+ kvmppc_gsbm_set(&gsbm, iden);
+ kvmppc_gsbm_set(&gsbm1, iden);
+ KUNIT_EXPECT_TRUE(test, kvmppc_gsbm_test(&gsbm, iden));
+ kvmppc_gsbm_clear(&gsbm, iden);
+ KUNIT_EXPECT_FALSE(test, kvmppc_gsbm_test(&gsbm, iden));
+ i++;
+ }
+
+ for (u16 iden = KVMPPC_GSID_GPR(0); iden <= KVMPPC_GSE_DW_REGS_END; iden++) {
+ kvmppc_gsbm_set(&gsbm, iden);
+ kvmppc_gsbm_set(&gsbm1, iden);
+ KUNIT_EXPECT_TRUE(test, kvmppc_gsbm_test(&gsbm, iden));
+ kvmppc_gsbm_clear(&gsbm, iden);
+ KUNIT_EXPECT_FALSE(test, kvmppc_gsbm_test(&gsbm, iden));
+ i++;
+ }
+
+ for (u16 iden = KVMPPC_GSID_CR; iden <= KVMPPC_GSID_PSPB; iden++) {
+ kvmppc_gsbm_set(&gsbm, iden);
+ kvmppc_gsbm_set(&gsbm1, iden);
+ KUNIT_EXPECT_TRUE(test, kvmppc_gsbm_test(&gsbm, iden));
+ kvmppc_gsbm_clear(&gsbm, iden);
+ KUNIT_EXPECT_FALSE(test, kvmppc_gsbm_test(&gsbm, iden));
+ i++;
+ }
+
+ for (u16 iden = KVMPPC_GSID_VSRS(0); iden <= KVMPPC_GSID_VSRS(63);
+ iden++) {
+ kvmppc_gsbm_set(&gsbm, iden);
+ kvmppc_gsbm_set(&gsbm1, iden);
+ KUNIT_EXPECT_TRUE(test, kvmppc_gsbm_test(&gsbm, iden));
+ kvmppc_gsbm_clear(&gsbm, iden);
+ KUNIT_EXPECT_FALSE(test, kvmppc_gsbm_test(&gsbm, iden));
+ i++;
+ }
+
+ for (u16 iden = KVMPPC_GSID_HDAR; iden <= KVMPPC_GSID_ASDR; iden++) {
+ kvmppc_gsbm_set(&gsbm, iden);
+ kvmppc_gsbm_set(&gsbm1, iden);
+ KUNIT_EXPECT_TRUE(test, kvmppc_gsbm_test(&gsbm, iden));
+ kvmppc_gsbm_clear(&gsbm, iden);
+ KUNIT_EXPECT_FALSE(test, kvmppc_gsbm_test(&gsbm, iden));
+ i++;
+ }
+
+ j = 0;
+ kvmppc_gsbm_for_each(&gsbm1, iden)
+ {
+ kvmppc_gsbm_set(&gsbm2, iden);
+ j++;
+ }
+ KUNIT_EXPECT_EQ(test, i, j);
+ KUNIT_EXPECT_MEMEQ(test, &gsbm1, &gsbm2, sizeof(gsbm1));
+}
+
+struct kvmppc_gs_msg_test1_data {
+ u64 a;
+ u32 b;
+ struct kvmppc_gs_part_table c;
+ struct kvmppc_gs_proc_table d;
+ struct kvmppc_gs_buff_info e;
+};
+
+static size_t test1_get_size(struct kvmppc_gs_msg *gsm)
+{
+ size_t size = 0;
+ u16 ids[] = {
+ KVMPPC_GSID_PARTITION_TABLE,
+ KVMPPC_GSID_PROCESS_TABLE,
+ KVMPPC_GSID_RUN_INPUT,
+ KVMPPC_GSID_GPR(0),
+ KVMPPC_GSID_CR,
+ };
+
+ for (int i = 0; i < ARRAY_SIZE(ids); i++)
+ size += kvmppc_gse_total_size(kvmppc_gsid_size(ids[i]));
+ return size;
+}
+
+static int test1_fill_info(struct kvmppc_gs_buff *gsb,
+ struct kvmppc_gs_msg *gsm)
+{
+ struct kvmppc_gs_msg_test1_data *data = gsm->data;
+
+ if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_GPR(0)))
+ kvmppc_gse_put_u64(gsb, KVMPPC_GSID_GPR(0), data->a);
+
+ if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_CR))
+ kvmppc_gse_put_u32(gsb, KVMPPC_GSID_CR, data->b);
+
+ if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_PARTITION_TABLE))
+ kvmppc_gse_put_part_table(gsb, KVMPPC_GSID_PARTITION_TABLE,
+ data->c);
+
+ if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_PROCESS_TABLE))
+ kvmppc_gse_put_proc_table(gsb, KVMPPC_GSID_PARTITION_TABLE,
+ data->d);
+
+ if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_RUN_INPUT))
+ kvmppc_gse_put_buff_info(gsb, KVMPPC_GSID_RUN_INPUT, data->e);
+
+ return 0;
+}
+
+static int test1_refresh_info(struct kvmppc_gs_msg *gsm,
+ struct kvmppc_gs_buff *gsb)
+{
+ struct kvmppc_gs_parser gsp = { 0 };
+ struct kvmppc_gs_msg_test1_data *data = gsm->data;
+ struct kvmppc_gs_elem *gse;
+ int rc;
+
+ rc = kvmppc_gse_parse(&gsp, gsb);
+ if (rc < 0)
+ return rc;
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_GPR(0));
+ if (gse)
+ data->a = kvmppc_gse_get_u64(gse);
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_CR);
+ if (gse)
+ data->b = kvmppc_gse_get_u32(gse);
+
+ return 0;
+}
+
+static struct kvmppc_gs_msg_ops gs_msg_test1_ops = {
+ .get_size = test1_get_size,
+ .fill_info = test1_fill_info,
+ .refresh_info = test1_refresh_info,
+};
+
+static void test_gs_msg(struct kunit *test)
+{
+ struct kvmppc_gs_msg_test1_data test1_data = {
+ .a = 0xdeadbeef,
+ .b = 0x1,
+ };
+ struct kvmppc_gs_msg *gsm;
+ struct kvmppc_gs_buff *gsb;
+
+ gsm = kvmppc_gsm_new(&gs_msg_test1_ops, &test1_data, GSM_SEND,
+ GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gsm);
+
+ gsb = kvmppc_gsb_new(kvmppc_gsm_size(gsm), 0, 0, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gsb);
+
+ kvmppc_gsm_include(gsm, KVMPPC_GSID_PARTITION_TABLE);
+ kvmppc_gsm_include(gsm, KVMPPC_GSID_PROCESS_TABLE);
+ kvmppc_gsm_include(gsm, KVMPPC_GSID_RUN_INPUT);
+ kvmppc_gsm_include(gsm, KVMPPC_GSID_GPR(0));
+ kvmppc_gsm_include(gsm, KVMPPC_GSID_CR);
+
+ kvmppc_gsm_fill_info(gsm, gsb);
+
+ memset(&test1_data, 0, sizeof(test1_data));
+
+ kvmppc_gsm_refresh_info(gsm, gsb);
+ KUNIT_EXPECT_EQ(test, test1_data.a, 0xdeadbeef);
+ KUNIT_EXPECT_EQ(test, test1_data.b, 0x1);
+
+ kvmppc_gsm_free(gsm);
+}
+
+/* Test data struct for hostwide/L0 counters */
+struct kvmppc_gs_msg_test_hostwide_data {
+ u64 guest_heap;
+ u64 guest_heap_max;
+ u64 guest_pgtable_size;
+ u64 guest_pgtable_size_max;
+ u64 guest_pgtable_reclaim;
+};
+
+static size_t test_hostwide_get_size(struct kvmppc_gs_msg *gsm)
+
+{
+ size_t size = 0;
+ u16 ids[] = {
+ KVMPPC_GSID_L0_GUEST_HEAP,
+ KVMPPC_GSID_L0_GUEST_HEAP_MAX,
+ KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE,
+ KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX,
+ KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM
+ };
+
+ for (int i = 0; i < ARRAY_SIZE(ids); i++)
+ size += kvmppc_gse_total_size(kvmppc_gsid_size(ids[i]));
+ return size;
+}
+
+static int test_hostwide_fill_info(struct kvmppc_gs_buff *gsb,
+ struct kvmppc_gs_msg *gsm)
+{
+ struct kvmppc_gs_msg_test_hostwide_data *data = gsm->data;
+
+ if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_L0_GUEST_HEAP))
+ kvmppc_gse_put_u64(gsb, KVMPPC_GSID_L0_GUEST_HEAP,
+ data->guest_heap);
+ if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_L0_GUEST_HEAP_MAX))
+ kvmppc_gse_put_u64(gsb, KVMPPC_GSID_L0_GUEST_HEAP_MAX,
+ data->guest_heap_max);
+ if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE))
+ kvmppc_gse_put_u64(gsb, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE,
+ data->guest_pgtable_size);
+ if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX))
+ kvmppc_gse_put_u64(gsb, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX,
+ data->guest_pgtable_size_max);
+ if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM))
+ kvmppc_gse_put_u64(gsb, KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM,
+ data->guest_pgtable_reclaim);
+
+ return 0;
+}
+
+static int test_hostwide_refresh_info(struct kvmppc_gs_msg *gsm,
+ struct kvmppc_gs_buff *gsb)
+{
+ struct kvmppc_gs_parser gsp = { 0 };
+ struct kvmppc_gs_msg_test_hostwide_data *data = gsm->data;
+ struct kvmppc_gs_elem *gse;
+ int rc;
+
+ rc = kvmppc_gse_parse(&gsp, gsb);
+ if (rc < 0)
+ return rc;
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_HEAP);
+ if (gse)
+ data->guest_heap = kvmppc_gse_get_u64(gse);
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_HEAP_MAX);
+ if (gse)
+ data->guest_heap_max = kvmppc_gse_get_u64(gse);
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE);
+ if (gse)
+ data->guest_pgtable_size = kvmppc_gse_get_u64(gse);
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX);
+ if (gse)
+ data->guest_pgtable_size_max = kvmppc_gse_get_u64(gse);
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM);
+ if (gse)
+ data->guest_pgtable_reclaim = kvmppc_gse_get_u64(gse);
+
+ return 0;
+}
+
+static struct kvmppc_gs_msg_ops gs_msg_test_hostwide_ops = {
+ .get_size = test_hostwide_get_size,
+ .fill_info = test_hostwide_fill_info,
+ .refresh_info = test_hostwide_refresh_info,
+};
+
+static void test_gs_hostwide_msg(struct kunit *test)
+{
+ struct kvmppc_gs_msg_test_hostwide_data test_data = {
+ .guest_heap = 0xdeadbeef,
+ .guest_heap_max = ~0ULL,
+ .guest_pgtable_size = 0xff,
+ .guest_pgtable_size_max = 0xffffff,
+ .guest_pgtable_reclaim = 0xdeadbeef,
+ };
+ struct kvmppc_gs_msg *gsm;
+ struct kvmppc_gs_buff *gsb;
+
+ gsm = kvmppc_gsm_new(&gs_msg_test_hostwide_ops, &test_data, GSM_SEND,
+ GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gsm);
+
+ gsb = kvmppc_gsb_new(kvmppc_gsm_size(gsm), 0, 0, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gsb);
+
+ kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_HEAP);
+ kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_HEAP_MAX);
+ kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE);
+ kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX);
+ kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM);
+
+ kvmppc_gsm_fill_info(gsm, gsb);
+
+ memset(&test_data, 0, sizeof(test_data));
+
+ kvmppc_gsm_refresh_info(gsm, gsb);
+ KUNIT_EXPECT_EQ(test, test_data.guest_heap, 0xdeadbeef);
+ KUNIT_EXPECT_EQ(test, test_data.guest_heap_max, ~0ULL);
+ KUNIT_EXPECT_EQ(test, test_data.guest_pgtable_size, 0xff);
+ KUNIT_EXPECT_EQ(test, test_data.guest_pgtable_size_max, 0xffffff);
+ KUNIT_EXPECT_EQ(test, test_data.guest_pgtable_reclaim, 0xdeadbeef);
+
+ kvmppc_gsm_free(gsm);
+}
+
+/* Test if the H_GUEST_GET_STATE for hostwide counters works */
+static void test_gs_hostwide_counters(struct kunit *test)
+{
+ struct kvmppc_gs_msg_test_hostwide_data test_data;
+ struct kvmppc_gs_parser gsp = { 0 };
+
+ struct kvmppc_gs_msg *gsm;
+ struct kvmppc_gs_buff *gsb;
+ struct kvmppc_gs_elem *gse;
+ int rc;
+
+ if (!kvmhv_on_pseries())
+ kunit_skip(test, "This test need a kmv-hv guest");
+
+ gsm = kvmppc_gsm_new(&gs_msg_test_hostwide_ops, &test_data, GSM_SEND,
+ GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gsm);
+
+ gsb = kvmppc_gsb_new(kvmppc_gsm_size(gsm), 0, 0, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gsb);
+
+ kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_HEAP);
+
+ kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_HEAP_MAX);
+
+ kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE);
+
+ kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX);
+
+ kvmppc_gsm_include(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM);
+
+ kvmppc_gsm_fill_info(gsm, gsb);
+
+ /* With HOST_WIDE flags guestid and vcpuid will be ignored */
+ rc = kvmppc_gsb_recv(gsb, KVMPPC_GS_FLAGS_HOST_WIDE);
+ KUNIT_ASSERT_EQ(test, rc, 0);
+
+ /* Parse the guest state buffer is successful */
+ rc = kvmppc_gse_parse(&gsp, gsb);
+ KUNIT_ASSERT_EQ(test, rc, 0);
+
+ /* Parse the GSB and get the counters */
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_HEAP);
+ KUNIT_ASSERT_NOT_NULL_MSG(test, gse, "L0 Heap counter missing");
+ kunit_info(test, "Guest Heap Size=%llu bytes",
+ kvmppc_gse_get_u64(gse));
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_HEAP_MAX);
+ KUNIT_ASSERT_NOT_NULL_MSG(test, gse, "L0 Heap counter max missing");
+ kunit_info(test, "Guest Heap Size Max=%llu bytes",
+ kvmppc_gse_get_u64(gse));
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE);
+ KUNIT_ASSERT_NOT_NULL_MSG(test, gse, "L0 page-table size missing");
+ kunit_info(test, "Guest Page-table Size=%llu bytes",
+ kvmppc_gse_get_u64(gse));
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX);
+ KUNIT_ASSERT_NOT_NULL_MSG(test, gse, "L0 page-table size-max missing");
+ kunit_info(test, "Guest Page-table Size Max=%llu bytes",
+ kvmppc_gse_get_u64(gse));
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM);
+ KUNIT_ASSERT_NOT_NULL_MSG(test, gse, "L0 page-table reclaim size missing");
+ kunit_info(test, "Guest Page-table Reclaim Size=%llu bytes",
+ kvmppc_gse_get_u64(gse));
+
+ kvmppc_gsm_free(gsm);
+ kvmppc_gsb_free(gsb);
+}
+
+static struct kunit_case guest_state_buffer_testcases[] = {
+ KUNIT_CASE(test_creating_buffer),
+ KUNIT_CASE(test_adding_element),
+ KUNIT_CASE(test_gs_bitmap),
+ KUNIT_CASE(test_gs_parsing),
+ KUNIT_CASE(test_gs_msg),
+ KUNIT_CASE(test_gs_hostwide_msg),
+ KUNIT_CASE(test_gs_hostwide_counters),
+ {}
+};
+
+static struct kunit_suite guest_state_buffer_test_suite = {
+ .name = "guest_state_buffer_test",
+ .test_cases = guest_state_buffer_testcases,
+};
+
+kunit_test_suites(&guest_state_buffer_test_suite);
+
+MODULE_DESCRIPTION("KUnit tests for Guest State Buffer APIs");
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c
index e44d2b2ea97e..25071331f8c1 100644
--- a/arch/powerpc/kvm/timing.c
+++ b/arch/powerpc/kvm/timing.c
@@ -1,16 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright IBM Corp. 2008
*
@@ -143,8 +132,7 @@ static int kvmppc_exit_timing_show(struct seq_file *m, void *private)
int i;
u64 min, max, sum, sum_quad;
- seq_printf(m, "%s", "type count min max sum sum_squared\n");
-
+ seq_puts(m, "type count min max sum sum_squared\n");
for (i = 0; i < __NUMBER_OF_KVM_EXIT_TYPES; i++) {
@@ -216,30 +204,10 @@ static const struct file_operations kvmppc_exit_timing_fops = {
.release = single_release,
};
-void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id)
-{
- static char dbg_fname[50];
- struct dentry *debugfs_file;
-
- snprintf(dbg_fname, sizeof(dbg_fname), "vm%u_vcpu%u_timing",
- current->pid, id);
- debugfs_file = debugfs_create_file(dbg_fname, 0666,
- kvm_debugfs_dir, vcpu,
- &kvmppc_exit_timing_fops);
-
- if (!debugfs_file) {
- printk(KERN_ERR"%s: error creating debugfs file %s\n",
- __func__, dbg_fname);
- return;
- }
-
- vcpu->arch.debugfs_exit_timing = debugfs_file;
-}
-
-void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu)
+int kvmppc_create_vcpu_debugfs_e500(struct kvm_vcpu *vcpu,
+ struct dentry *debugfs_dentry)
{
- if (vcpu->arch.debugfs_exit_timing) {
- debugfs_remove(vcpu->arch.debugfs_exit_timing);
- vcpu->arch.debugfs_exit_timing = NULL;
- }
+ debugfs_create_file("timing", 0666, debugfs_dentry,
+ vcpu, &kvmppc_exit_timing_fops);
+ return 0;
}
diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h
index 3123690c82dc..14b0e23f601f 100644
--- a/arch/powerpc/kvm/timing.h
+++ b/arch/powerpc/kvm/timing.h
@@ -1,16 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright IBM Corp. 2008
*
@@ -21,13 +10,12 @@
#define __POWERPC_KVM_EXITTIMING_H__
#include <linux/kvm_host.h>
-#include <asm/kvm_host.h>
#ifdef CONFIG_KVM_EXIT_TIMING
void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu);
void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu);
-void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu, unsigned int id);
-void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu);
+int kvmppc_create_vcpu_debugfs_e500(struct kvm_vcpu *vcpu,
+ struct dentry *debugfs_dentry);
static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type)
{
@@ -38,9 +26,11 @@ static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type)
/* if exit timing is not configured there is no need to build the c file */
static inline void kvmppc_init_timing_stats(struct kvm_vcpu *vcpu) {}
static inline void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu) {}
-static inline void kvmppc_create_vcpu_debugfs(struct kvm_vcpu *vcpu,
- unsigned int id) {}
-static inline void kvmppc_remove_vcpu_debugfs(struct kvm_vcpu *vcpu) {}
+static inline int kvmppc_create_vcpu_debugfs_e500(struct kvm_vcpu *vcpu,
+ struct dentry *debugfs_dentry)
+{
+ return 0;
+}
static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) {}
#endif /* CONFIG_KVM_EXIT_TIMING */
@@ -48,11 +38,7 @@ static inline void kvmppc_set_exit_type(struct kvm_vcpu *vcpu, int type) {}
static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type)
{
/* type has to be known at build time for optimization */
-
- /* The BUILD_BUG_ON below breaks in funny ways, commented out
- * for now ... -BenH
BUILD_BUG_ON(!__builtin_constant_p(type));
- */
switch (type) {
case EXT_INTR_EXITS:
vcpu->stat.ext_intr_exits++;
diff --git a/arch/powerpc/kvm/tm.S b/arch/powerpc/kvm/tm.S
new file mode 100644
index 000000000000..b506c4d9a8d9
--- /dev/null
+++ b/arch/powerpc/kvm/tm.S
@@ -0,0 +1,398 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *
+ * Derived from book3s_hv_rmhandlers.S, which is:
+ *
+ * Copyright 2011 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ */
+
+#include <linux/export.h>
+#include <asm/reg.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/tm.h>
+#include <asm/cputable.h>
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+#define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM)
+
+/*
+ * Save transactional state and TM-related registers.
+ * Called with:
+ * - r3 pointing to the vcpu struct
+ * - r4 containing the MSR with current TS bits:
+ * (For HV KVM, it is VCPU_MSR ; For PR KVM, it is host MSR).
+ * - r5 containing a flag indicating that non-volatile registers
+ * must be preserved.
+ * If r5 == 0, this can modify all checkpointed registers, but
+ * restores r1, r2 before exit. If r5 != 0, this restores the
+ * MSR TM/FP/VEC/VSX bits to their state on entry.
+ */
+_GLOBAL(__kvmppc_save_tm)
+ mflr r0
+ std r0, PPC_LR_STKOFF(r1)
+ stdu r1, -SWITCH_FRAME_SIZE(r1)
+
+ mr r9, r3
+ cmpdi cr7, r5, 0
+
+ /* Turn on TM. */
+ mfmsr r8
+ mr r10, r8
+ li r0, 1
+ rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG
+ ori r8, r8, MSR_FP
+ oris r8, r8, (MSR_VEC | MSR_VSX)@h
+ mtmsrd r8
+
+ rldicl. r4, r4, 64 - MSR_TS_S_LG, 62
+ beq 1f /* TM not active in guest. */
+
+ std r1, HSTATE_SCRATCH2(r13)
+ std r3, HSTATE_SCRATCH1(r13)
+
+ /* Save CR on the stack - even if r5 == 0 we need to get cr7 back. */
+ mfcr r6
+ SAVE_GPR(6, r1)
+
+ /* Save DSCR so we can restore it to avoid running with user value */
+ mfspr r7, SPRN_DSCR
+ SAVE_GPR(7, r1)
+
+ /*
+ * We are going to do treclaim., which will modify all checkpointed
+ * registers. Save the non-volatile registers on the stack if
+ * preservation of non-volatile state has been requested.
+ */
+ beq cr7, 3f
+ SAVE_NVGPRS(r1)
+
+ /* MSR[TS] will be 0 (non-transactional) once we do treclaim. */
+ li r0, 0
+ rldimi r10, r0, MSR_TS_S_LG, 63 - MSR_TS_T_LG
+ SAVE_GPR(10, r1) /* final MSR value */
+3:
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+BEGIN_FTR_SECTION
+ /* Emulation of the treclaim instruction needs TEXASR before treclaim */
+ mfspr r6, SPRN_TEXASR
+ std r6, VCPU_ORIG_TEXASR(r3)
+END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_HV_ASSIST)
+#endif
+
+ /* Clear the MSR RI since r1, r13 are all going to be foobar. */
+ li r5, 0
+ mtmsrd r5, 1
+
+ li r3, TM_CAUSE_KVM_RESCHED
+
+ /* All GPRs are volatile at this point. */
+ TRECLAIM(R3)
+
+ /* Temporarily store r13 and r9 so we have some regs to play with */
+ SET_SCRATCH0(r13)
+ GET_PACA(r13)
+ std r9, PACATMSCRATCH(r13)
+ ld r9, HSTATE_SCRATCH1(r13)
+
+ /* Save away PPR soon so we don't run with user value. */
+ std r0, VCPU_GPRS_TM(0)(r9)
+ mfspr r0, SPRN_PPR
+ HMT_MEDIUM
+
+ /* Reload stack pointer. */
+ std r1, VCPU_GPRS_TM(1)(r9)
+ ld r1, HSTATE_SCRATCH2(r13)
+
+ /* Set MSR RI now we have r1 and r13 back. */
+ std r2, VCPU_GPRS_TM(2)(r9)
+ li r2, MSR_RI
+ mtmsrd r2, 1
+
+ /* Reload TOC pointer. */
+ LOAD_PACA_TOC()
+
+ /* Save all but r0-r2, r9 & r13 */
+ reg = 3
+ .rept 29
+ .if (reg != 9) && (reg != 13)
+ std reg, VCPU_GPRS_TM(reg)(r9)
+ .endif
+ reg = reg + 1
+ .endr
+ /* ... now save r13 */
+ GET_SCRATCH0(r4)
+ std r4, VCPU_GPRS_TM(13)(r9)
+ /* ... and save r9 */
+ ld r4, PACATMSCRATCH(r13)
+ std r4, VCPU_GPRS_TM(9)(r9)
+
+ /* Restore host DSCR and CR values, after saving guest values */
+ mfcr r6
+ mfspr r7, SPRN_DSCR
+ stw r6, VCPU_CR_TM(r9)
+ std r7, VCPU_DSCR_TM(r9)
+ REST_GPR(6, r1)
+ REST_GPR(7, r1)
+ mtcr r6
+ mtspr SPRN_DSCR, r7
+
+ /* Save away checkpointed SPRs. */
+ std r0, VCPU_PPR_TM(r9)
+ mflr r5
+ mfctr r7
+ mfspr r8, SPRN_AMR
+ mfspr r10, SPRN_TAR
+ mfxer r11
+ std r5, VCPU_LR_TM(r9)
+ std r7, VCPU_CTR_TM(r9)
+ std r8, VCPU_AMR_TM(r9)
+ std r10, VCPU_TAR_TM(r9)
+ std r11, VCPU_XER_TM(r9)
+
+ /* Save FP/VSX. */
+ addi r3, r9, VCPU_FPRS_TM
+ bl store_fp_state
+ addi r3, r9, VCPU_VRS_TM
+ bl store_vr_state
+ mfspr r6, SPRN_VRSAVE
+ stw r6, VCPU_VRSAVE_TM(r9)
+
+ /* Restore non-volatile registers if requested to */
+ beq cr7, 1f
+ REST_NVGPRS(r1)
+ REST_GPR(10, r1)
+1:
+ /*
+ * We need to save these SPRs after the treclaim so that the software
+ * error code is recorded correctly in the TEXASR. Also the user may
+ * change these outside of a transaction, so they must always be
+ * context switched.
+ */
+ mfspr r7, SPRN_TEXASR
+ std r7, VCPU_TEXASR(r9)
+ mfspr r5, SPRN_TFHAR
+ mfspr r6, SPRN_TFIAR
+ std r5, VCPU_TFHAR(r9)
+ std r6, VCPU_TFIAR(r9)
+
+ /* Restore MSR state if requested */
+ beq cr7, 2f
+ mtmsrd r10, 0
+2:
+ addi r1, r1, SWITCH_FRAME_SIZE
+ ld r0, PPC_LR_STKOFF(r1)
+ mtlr r0
+ blr
+
+/*
+ * _kvmppc_save_tm_pr() is a wrapper around __kvmppc_save_tm(), so that it can
+ * be invoked from C function by PR KVM only.
+ */
+_GLOBAL(_kvmppc_save_tm_pr)
+ mflr r0
+ std r0, PPC_LR_STKOFF(r1)
+ stdu r1, -PPC_MIN_STKFRM(r1)
+
+ mfspr r8, SPRN_TAR
+ std r8, PPC_MIN_STKFRM-8(r1)
+
+ li r5, 1 /* preserve non-volatile registers */
+ bl __kvmppc_save_tm
+
+ ld r8, PPC_MIN_STKFRM-8(r1)
+ mtspr SPRN_TAR, r8
+
+ addi r1, r1, PPC_MIN_STKFRM
+ ld r0, PPC_LR_STKOFF(r1)
+ mtlr r0
+ blr
+
+EXPORT_SYMBOL_GPL(_kvmppc_save_tm_pr);
+
+/*
+ * Restore transactional state and TM-related registers.
+ * Called with:
+ * - r3 pointing to the vcpu struct.
+ * - r4 is the guest MSR with desired TS bits:
+ * For HV KVM, it is VCPU_MSR
+ * For PR KVM, it is provided by caller
+ * - r5 containing a flag indicating that non-volatile registers
+ * must be preserved.
+ * If r5 == 0, this potentially modifies all checkpointed registers, but
+ * restores r1, r2 from the PACA before exit.
+ * If r5 != 0, this restores the MSR TM/FP/VEC/VSX bits to their state on entry.
+ */
+_GLOBAL(__kvmppc_restore_tm)
+ mflr r0
+ std r0, PPC_LR_STKOFF(r1)
+
+ cmpdi cr7, r5, 0
+
+ /* Turn on TM/FP/VSX/VMX so we can restore them. */
+ mfmsr r5
+ mr r10, r5
+ li r6, MSR_TM >> 32
+ sldi r6, r6, 32
+ or r5, r5, r6
+ ori r5, r5, MSR_FP
+ oris r5, r5, (MSR_VEC | MSR_VSX)@h
+ mtmsrd r5
+
+ /*
+ * The user may change these outside of a transaction, so they must
+ * always be context switched.
+ */
+ ld r5, VCPU_TFHAR(r3)
+ ld r6, VCPU_TFIAR(r3)
+ ld r7, VCPU_TEXASR(r3)
+ mtspr SPRN_TFHAR, r5
+ mtspr SPRN_TFIAR, r6
+ mtspr SPRN_TEXASR, r7
+
+ mr r5, r4
+ rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
+ beq 9f /* TM not active in guest */
+
+ /* Make sure the failure summary is set, otherwise we'll program check
+ * when we trechkpt. It's possible that this might have been not set
+ * on a kvmppc_set_one_reg() call but we shouldn't let this crash the
+ * host.
+ */
+ oris r7, r7, (TEXASR_FS)@h
+ mtspr SPRN_TEXASR, r7
+
+ /*
+ * Make a stack frame and save non-volatile registers if requested.
+ */
+ stdu r1, -SWITCH_FRAME_SIZE(r1)
+ std r1, HSTATE_SCRATCH2(r13)
+
+ mfcr r6
+ mfspr r7, SPRN_DSCR
+ SAVE_GPR(2, r1)
+ SAVE_GPR(6, r1)
+ SAVE_GPR(7, r1)
+
+ beq cr7, 4f
+ SAVE_NVGPRS(r1)
+
+ /* MSR[TS] will be 1 (suspended) once we do trechkpt */
+ li r0, 1
+ rldimi r10, r0, MSR_TS_S_LG, 63 - MSR_TS_T_LG
+ SAVE_GPR(10, r1) /* final MSR value */
+4:
+ /*
+ * We need to load up the checkpointed state for the guest.
+ * We need to do this early as it will blow away any GPRs, VSRs and
+ * some SPRs.
+ */
+
+ mr r31, r3
+ addi r3, r31, VCPU_FPRS_TM
+ bl load_fp_state
+ addi r3, r31, VCPU_VRS_TM
+ bl load_vr_state
+ mr r3, r31
+ lwz r7, VCPU_VRSAVE_TM(r3)
+ mtspr SPRN_VRSAVE, r7
+
+ ld r5, VCPU_LR_TM(r3)
+ lwz r6, VCPU_CR_TM(r3)
+ ld r7, VCPU_CTR_TM(r3)
+ ld r8, VCPU_AMR_TM(r3)
+ ld r9, VCPU_TAR_TM(r3)
+ ld r10, VCPU_XER_TM(r3)
+ mtlr r5
+ mtcr r6
+ mtctr r7
+ mtspr SPRN_AMR, r8
+ mtspr SPRN_TAR, r9
+ mtxer r10
+
+ /*
+ * Load up PPR and DSCR values but don't put them in the actual SPRs
+ * till the last moment to avoid running with userspace PPR and DSCR for
+ * too long.
+ */
+ ld r29, VCPU_DSCR_TM(r3)
+ ld r30, VCPU_PPR_TM(r3)
+
+ /* Clear the MSR RI since r1, r13 are all going to be foobar. */
+ li r5, 0
+ mtmsrd r5, 1
+
+ /* Load GPRs r0-r28 */
+ reg = 0
+ .rept 29
+ ld reg, VCPU_GPRS_TM(reg)(r31)
+ reg = reg + 1
+ .endr
+
+ mtspr SPRN_DSCR, r29
+ mtspr SPRN_PPR, r30
+
+ /* Load final GPRs */
+ ld 29, VCPU_GPRS_TM(29)(r31)
+ ld 30, VCPU_GPRS_TM(30)(r31)
+ ld 31, VCPU_GPRS_TM(31)(r31)
+
+ /* TM checkpointed state is now setup. All GPRs are now volatile. */
+ TRECHKPT
+
+ /* Now let's get back the state we need. */
+ HMT_MEDIUM
+ GET_PACA(r13)
+ ld r1, HSTATE_SCRATCH2(r13)
+ REST_GPR(7, r1)
+ mtspr SPRN_DSCR, r7
+
+ /* Set the MSR RI since we have our registers back. */
+ li r5, MSR_RI
+ mtmsrd r5, 1
+
+ /* Restore TOC pointer and CR */
+ REST_GPR(2, r1)
+ REST_GPR(6, r1)
+ mtcr r6
+
+ /* Restore non-volatile registers if requested to. */
+ beq cr7, 5f
+ REST_GPR(10, r1)
+ REST_NVGPRS(r1)
+
+5: addi r1, r1, SWITCH_FRAME_SIZE
+ ld r0, PPC_LR_STKOFF(r1)
+ mtlr r0
+
+9: /* Restore MSR bits if requested */
+ beqlr cr7
+ mtmsrd r10, 0
+ blr
+
+/*
+ * _kvmppc_restore_tm_pr() is a wrapper around __kvmppc_restore_tm(), so that it
+ * can be invoked from C function by PR KVM only.
+ */
+_GLOBAL(_kvmppc_restore_tm_pr)
+ mflr r0
+ std r0, PPC_LR_STKOFF(r1)
+ stdu r1, -PPC_MIN_STKFRM(r1)
+
+ /* save TAR so that it can be recovered later */
+ mfspr r8, SPRN_TAR
+ std r8, PPC_MIN_STKFRM-8(r1)
+
+ li r5, 1
+ bl __kvmppc_restore_tm
+
+ ld r8, PPC_MIN_STKFRM-8(r1)
+ mtspr SPRN_TAR, r8
+
+ addi r1, r1, PPC_MIN_STKFRM
+ ld r0, PPC_LR_STKOFF(r1)
+ mtlr r0
+ blr
+
+EXPORT_SYMBOL_GPL(_kvmppc_restore_tm_pr);
+#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h
index 2e0e67ef3544..ea1d7c808319 100644
--- a/arch/powerpc/kvm/trace.h
+++ b/arch/powerpc/kvm/trace.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_KVM_H
@@ -5,8 +6,6 @@
#undef TRACE_SYSTEM
#define TRACE_SYSTEM kvm
-#define TRACE_INCLUDE_PATH .
-#define TRACE_INCLUDE_FILE trace
/*
* Tracepoint for guest mode entry.
@@ -119,4 +118,10 @@ TRACE_EVENT(kvm_check_requests,
#endif /* _TRACE_KVM_H */
/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace
+
#include <trace/define_trace.h>
diff --git a/arch/powerpc/kvm/trace_book3s.h b/arch/powerpc/kvm/trace_book3s.h
new file mode 100644
index 000000000000..9260ddbd557f
--- /dev/null
+++ b/arch/powerpc/kvm/trace_book3s.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if !defined(_TRACE_KVM_BOOK3S_H)
+#define _TRACE_KVM_BOOK3S_H
+
+/*
+ * Common defines used by the trace macros in trace_pr.h and trace_hv.h
+ */
+
+#define kvm_trace_symbol_exit \
+ {0x100, "SYSTEM_RESET"}, \
+ {0x200, "MACHINE_CHECK"}, \
+ {0x300, "DATA_STORAGE"}, \
+ {0x380, "DATA_SEGMENT"}, \
+ {0x400, "INST_STORAGE"}, \
+ {0x480, "INST_SEGMENT"}, \
+ {0x500, "EXTERNAL"}, \
+ {0x502, "EXTERNAL_HV"}, \
+ {0x600, "ALIGNMENT"}, \
+ {0x700, "PROGRAM"}, \
+ {0x800, "FP_UNAVAIL"}, \
+ {0x900, "DECREMENTER"}, \
+ {0x980, "HV_DECREMENTER"}, \
+ {0xc00, "SYSCALL"}, \
+ {0xd00, "TRACE"}, \
+ {0xe00, "H_DATA_STORAGE"}, \
+ {0xe20, "H_INST_STORAGE"}, \
+ {0xe40, "H_EMUL_ASSIST"}, \
+ {0xea0, "H_VIRT"}, \
+ {0xf00, "PERFMON"}, \
+ {0xf20, "ALTIVEC"}, \
+ {0xf40, "VSX"}
+
+#endif
diff --git a/arch/powerpc/kvm/trace_booke.h b/arch/powerpc/kvm/trace_booke.h
index f7537cf26ce7..eff6e82dbcd4 100644
--- a/arch/powerpc/kvm/trace_booke.h
+++ b/arch/powerpc/kvm/trace_booke.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#if !defined(_TRACE_KVM_BOOKE_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_KVM_BOOKE_H
@@ -5,8 +6,6 @@
#undef TRACE_SYSTEM
#define TRACE_SYSTEM kvm_booke
-#define TRACE_INCLUDE_PATH .
-#define TRACE_INCLUDE_FILE trace_booke
#define kvm_trace_symbol_exit \
{0, "CRITICAL"}, \
@@ -70,21 +69,6 @@ TRACE_EVENT(kvm_exit,
)
);
-TRACE_EVENT(kvm_unmap_hva,
- TP_PROTO(unsigned long hva),
- TP_ARGS(hva),
-
- TP_STRUCT__entry(
- __field( unsigned long, hva )
- ),
-
- TP_fast_assign(
- __entry->hva = hva;
- ),
-
- TP_printk("unmap hva 0x%lx\n", __entry->hva)
-);
-
TRACE_EVENT(kvm_booke206_stlb_write,
TP_PROTO(__u32 mas0, __u32 mas8, __u32 mas1, __u64 mas2, __u64 mas7_3),
TP_ARGS(mas0, mas8, mas1, mas2, mas7_3),
@@ -151,6 +135,47 @@ TRACE_EVENT(kvm_booke206_ref_release,
__entry->pfn, __entry->flags)
);
+#ifdef CONFIG_SPE_POSSIBLE
+#define kvm_trace_symbol_irqprio_spe \
+ {BOOKE_IRQPRIO_SPE_UNAVAIL, "SPE_UNAVAIL"}, \
+ {BOOKE_IRQPRIO_SPE_FP_DATA, "SPE_FP_DATA"}, \
+ {BOOKE_IRQPRIO_SPE_FP_ROUND, "SPE_FP_ROUND"},
+#else
+#define kvm_trace_symbol_irqprio_spe
+#endif
+
+#ifdef CONFIG_PPC_E500MC
+#define kvm_trace_symbol_irqprio_e500mc \
+ {BOOKE_IRQPRIO_ALTIVEC_UNAVAIL, "ALTIVEC_UNAVAIL"}, \
+ {BOOKE_IRQPRIO_ALTIVEC_ASSIST, "ALTIVEC_ASSIST"},
+#else
+#define kvm_trace_symbol_irqprio_e500mc
+#endif
+
+#define kvm_trace_symbol_irqprio \
+ kvm_trace_symbol_irqprio_spe \
+ kvm_trace_symbol_irqprio_e500mc \
+ {BOOKE_IRQPRIO_DATA_STORAGE, "DATA_STORAGE"}, \
+ {BOOKE_IRQPRIO_INST_STORAGE, "INST_STORAGE"}, \
+ {BOOKE_IRQPRIO_ALIGNMENT, "ALIGNMENT"}, \
+ {BOOKE_IRQPRIO_PROGRAM, "PROGRAM"}, \
+ {BOOKE_IRQPRIO_FP_UNAVAIL, "FP_UNAVAIL"}, \
+ {BOOKE_IRQPRIO_SYSCALL, "SYSCALL"}, \
+ {BOOKE_IRQPRIO_AP_UNAVAIL, "AP_UNAVAIL"}, \
+ {BOOKE_IRQPRIO_DTLB_MISS, "DTLB_MISS"}, \
+ {BOOKE_IRQPRIO_ITLB_MISS, "ITLB_MISS"}, \
+ {BOOKE_IRQPRIO_MACHINE_CHECK, "MACHINE_CHECK"}, \
+ {BOOKE_IRQPRIO_DEBUG, "DEBUG"}, \
+ {BOOKE_IRQPRIO_CRITICAL, "CRITICAL"}, \
+ {BOOKE_IRQPRIO_WATCHDOG, "WATCHDOG"}, \
+ {BOOKE_IRQPRIO_EXTERNAL, "EXTERNAL"}, \
+ {BOOKE_IRQPRIO_FIT, "FIT"}, \
+ {BOOKE_IRQPRIO_DECREMENTER, "DECREMENTER"}, \
+ {BOOKE_IRQPRIO_PERFORMANCE_MONITOR, "PERFORMANCE_MONITOR"}, \
+ {BOOKE_IRQPRIO_EXTERNAL_LEVEL, "EXTERNAL_LEVEL"}, \
+ {BOOKE_IRQPRIO_DBELL, "DBELL"}, \
+ {BOOKE_IRQPRIO_DBELL_CRIT, "DBELL_CRIT"} \
+
TRACE_EVENT(kvm_booke_queue_irqprio,
TP_PROTO(struct kvm_vcpu *vcpu, unsigned int priority),
TP_ARGS(vcpu, priority),
@@ -167,11 +192,20 @@ TRACE_EVENT(kvm_booke_queue_irqprio,
__entry->pending = vcpu->arch.pending_exceptions;
),
- TP_printk("vcpu=%x prio=%x pending=%lx",
- __entry->cpu_nr, __entry->priority, __entry->pending)
+ TP_printk("vcpu=%x prio=%s pending=%lx",
+ __entry->cpu_nr,
+ __print_symbolic(__entry->priority, kvm_trace_symbol_irqprio),
+ __entry->pending)
);
#endif
/* This part must be outside protection */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_booke
+
#include <trace/define_trace.h>
diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h
new file mode 100644
index 000000000000..35fccaa575cc
--- /dev/null
+++ b/arch/powerpc/kvm/trace_hv.h
@@ -0,0 +1,554 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if !defined(_TRACE_KVM_HV_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVM_HV_H
+
+#include <linux/tracepoint.h>
+#include "trace_book3s.h"
+#include <asm/hvcall.h>
+#include <asm/kvm_asm.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm_hv
+
+#define kvm_trace_symbol_hcall \
+ {H_REMOVE, "H_REMOVE"}, \
+ {H_ENTER, "H_ENTER"}, \
+ {H_READ, "H_READ"}, \
+ {H_CLEAR_MOD, "H_CLEAR_MOD"}, \
+ {H_CLEAR_REF, "H_CLEAR_REF"}, \
+ {H_PROTECT, "H_PROTECT"}, \
+ {H_GET_TCE, "H_GET_TCE"}, \
+ {H_PUT_TCE, "H_PUT_TCE"}, \
+ {H_SET_SPRG0, "H_SET_SPRG0"}, \
+ {H_SET_DABR, "H_SET_DABR"}, \
+ {H_PAGE_INIT, "H_PAGE_INIT"}, \
+ {H_SET_ASR, "H_SET_ASR"}, \
+ {H_ASR_ON, "H_ASR_ON"}, \
+ {H_ASR_OFF, "H_ASR_OFF"}, \
+ {H_LOGICAL_CI_LOAD, "H_LOGICAL_CI_LOAD"}, \
+ {H_LOGICAL_CI_STORE, "H_LOGICAL_CI_STORE"}, \
+ {H_LOGICAL_CACHE_LOAD, "H_LOGICAL_CACHE_LOAD"}, \
+ {H_LOGICAL_CACHE_STORE, "H_LOGICAL_CACHE_STORE"}, \
+ {H_LOGICAL_ICBI, "H_LOGICAL_ICBI"}, \
+ {H_LOGICAL_DCBF, "H_LOGICAL_DCBF"}, \
+ {H_GET_TERM_CHAR, "H_GET_TERM_CHAR"}, \
+ {H_PUT_TERM_CHAR, "H_PUT_TERM_CHAR"}, \
+ {H_REAL_TO_LOGICAL, "H_REAL_TO_LOGICAL"}, \
+ {H_HYPERVISOR_DATA, "H_HYPERVISOR_DATA"}, \
+ {H_EOI, "H_EOI"}, \
+ {H_CPPR, "H_CPPR"}, \
+ {H_IPI, "H_IPI"}, \
+ {H_IPOLL, "H_IPOLL"}, \
+ {H_XIRR, "H_XIRR"}, \
+ {H_PERFMON, "H_PERFMON"}, \
+ {H_MIGRATE_DMA, "H_MIGRATE_DMA"}, \
+ {H_REGISTER_VPA, "H_REGISTER_VPA"}, \
+ {H_CEDE, "H_CEDE"}, \
+ {H_CONFER, "H_CONFER"}, \
+ {H_PROD, "H_PROD"}, \
+ {H_GET_PPP, "H_GET_PPP"}, \
+ {H_SET_PPP, "H_SET_PPP"}, \
+ {H_PURR, "H_PURR"}, \
+ {H_PIC, "H_PIC"}, \
+ {H_REG_CRQ, "H_REG_CRQ"}, \
+ {H_FREE_CRQ, "H_FREE_CRQ"}, \
+ {H_VIO_SIGNAL, "H_VIO_SIGNAL"}, \
+ {H_SEND_CRQ, "H_SEND_CRQ"}, \
+ {H_COPY_RDMA, "H_COPY_RDMA"}, \
+ {H_REGISTER_LOGICAL_LAN, "H_REGISTER_LOGICAL_LAN"}, \
+ {H_FREE_LOGICAL_LAN, "H_FREE_LOGICAL_LAN"}, \
+ {H_ADD_LOGICAL_LAN_BUFFER, "H_ADD_LOGICAL_LAN_BUFFER"}, \
+ {H_SEND_LOGICAL_LAN, "H_SEND_LOGICAL_LAN"}, \
+ {H_BULK_REMOVE, "H_BULK_REMOVE"}, \
+ {H_MULTICAST_CTRL, "H_MULTICAST_CTRL"}, \
+ {H_SET_XDABR, "H_SET_XDABR"}, \
+ {H_STUFF_TCE, "H_STUFF_TCE"}, \
+ {H_PUT_TCE_INDIRECT, "H_PUT_TCE_INDIRECT"}, \
+ {H_CHANGE_LOGICAL_LAN_MAC, "H_CHANGE_LOGICAL_LAN_MAC"}, \
+ {H_VTERM_PARTNER_INFO, "H_VTERM_PARTNER_INFO"}, \
+ {H_REGISTER_VTERM, "H_REGISTER_VTERM"}, \
+ {H_FREE_VTERM, "H_FREE_VTERM"}, \
+ {H_RESET_EVENTS, "H_RESET_EVENTS"}, \
+ {H_ALLOC_RESOURCE, "H_ALLOC_RESOURCE"}, \
+ {H_FREE_RESOURCE, "H_FREE_RESOURCE"}, \
+ {H_MODIFY_QP, "H_MODIFY_QP"}, \
+ {H_QUERY_QP, "H_QUERY_QP"}, \
+ {H_REREGISTER_PMR, "H_REREGISTER_PMR"}, \
+ {H_REGISTER_SMR, "H_REGISTER_SMR"}, \
+ {H_QUERY_MR, "H_QUERY_MR"}, \
+ {H_QUERY_MW, "H_QUERY_MW"}, \
+ {H_QUERY_HCA, "H_QUERY_HCA"}, \
+ {H_QUERY_PORT, "H_QUERY_PORT"}, \
+ {H_MODIFY_PORT, "H_MODIFY_PORT"}, \
+ {H_DEFINE_AQP1, "H_DEFINE_AQP1"}, \
+ {H_GET_TRACE_BUFFER, "H_GET_TRACE_BUFFER"}, \
+ {H_DEFINE_AQP0, "H_DEFINE_AQP0"}, \
+ {H_RESIZE_MR, "H_RESIZE_MR"}, \
+ {H_ATTACH_MCQP, "H_ATTACH_MCQP"}, \
+ {H_DETACH_MCQP, "H_DETACH_MCQP"}, \
+ {H_CREATE_RPT, "H_CREATE_RPT"}, \
+ {H_REMOVE_RPT, "H_REMOVE_RPT"}, \
+ {H_REGISTER_RPAGES, "H_REGISTER_RPAGES"}, \
+ {H_DISABLE_AND_GET, "H_DISABLE_AND_GET"}, \
+ {H_ERROR_DATA, "H_ERROR_DATA"}, \
+ {H_GET_HCA_INFO, "H_GET_HCA_INFO"}, \
+ {H_GET_PERF_COUNT, "H_GET_PERF_COUNT"}, \
+ {H_MANAGE_TRACE, "H_MANAGE_TRACE"}, \
+ {H_GET_CPU_CHARACTERISTICS, "H_GET_CPU_CHARACTERISTICS"}, \
+ {H_FREE_LOGICAL_LAN_BUFFER, "H_FREE_LOGICAL_LAN_BUFFER"}, \
+ {H_QUERY_INT_STATE, "H_QUERY_INT_STATE"}, \
+ {H_POLL_PENDING, "H_POLL_PENDING"}, \
+ {H_ILLAN_ATTRIBUTES, "H_ILLAN_ATTRIBUTES"}, \
+ {H_MODIFY_HEA_QP, "H_MODIFY_HEA_QP"}, \
+ {H_QUERY_HEA_QP, "H_QUERY_HEA_QP"}, \
+ {H_QUERY_HEA, "H_QUERY_HEA"}, \
+ {H_QUERY_HEA_PORT, "H_QUERY_HEA_PORT"}, \
+ {H_MODIFY_HEA_PORT, "H_MODIFY_HEA_PORT"}, \
+ {H_REG_BCMC, "H_REG_BCMC"}, \
+ {H_DEREG_BCMC, "H_DEREG_BCMC"}, \
+ {H_REGISTER_HEA_RPAGES, "H_REGISTER_HEA_RPAGES"}, \
+ {H_DISABLE_AND_GET_HEA, "H_DISABLE_AND_GET_HEA"}, \
+ {H_GET_HEA_INFO, "H_GET_HEA_INFO"}, \
+ {H_ALLOC_HEA_RESOURCE, "H_ALLOC_HEA_RESOURCE"}, \
+ {H_ADD_CONN, "H_ADD_CONN"}, \
+ {H_DEL_CONN, "H_DEL_CONN"}, \
+ {H_JOIN, "H_JOIN"}, \
+ {H_VASI_STATE, "H_VASI_STATE"}, \
+ {H_ENABLE_CRQ, "H_ENABLE_CRQ"}, \
+ {H_GET_EM_PARMS, "H_GET_EM_PARMS"}, \
+ {H_GET_ENERGY_SCALE_INFO, "H_GET_ENERGY_SCALE_INFO"}, \
+ {H_SET_MPP, "H_SET_MPP"}, \
+ {H_GET_MPP, "H_GET_MPP"}, \
+ {H_HOME_NODE_ASSOCIATIVITY, "H_HOME_NODE_ASSOCIATIVITY"}, \
+ {H_BEST_ENERGY, "H_BEST_ENERGY"}, \
+ {H_XIRR_X, "H_XIRR_X"}, \
+ {H_RANDOM, "H_RANDOM"}, \
+ {H_COP, "H_COP"}, \
+ {H_GET_MPP_X, "H_GET_MPP_X"}, \
+ {H_SET_MODE, "H_SET_MODE"}, \
+ {H_REGISTER_PROC_TBL, "H_REGISTER_PROC_TBL"}, \
+ {H_QUERY_VAS_CAPABILITIES, "H_QUERY_VAS_CAPABILITIES"}, \
+ {H_INT_GET_SOURCE_INFO, "H_INT_GET_SOURCE_INFO"}, \
+ {H_INT_SET_SOURCE_CONFIG, "H_INT_SET_SOURCE_CONFIG"}, \
+ {H_INT_GET_QUEUE_INFO, "H_INT_GET_QUEUE_INFO"}, \
+ {H_INT_SET_QUEUE_CONFIG, "H_INT_SET_QUEUE_CONFIG"}, \
+ {H_INT_ESB, "H_INT_ESB"}, \
+ {H_INT_RESET, "H_INT_RESET"}, \
+ {H_RPT_INVALIDATE, "H_RPT_INVALIDATE"}, \
+ {H_RTAS, "H_RTAS"}, \
+ {H_LOGICAL_MEMOP, "H_LOGICAL_MEMOP"}, \
+ {H_CAS, "H_CAS"}, \
+ {H_UPDATE_DT, "H_UPDATE_DT"}, \
+ {H_GET_PERF_COUNTER_INFO, "H_GET_PERF_COUNTER_INFO"}, \
+ {H_SET_PARTITION_TABLE, "H_SET_PARTITION_TABLE"}, \
+ {H_ENTER_NESTED, "H_ENTER_NESTED"}, \
+ {H_TLB_INVALIDATE, "H_TLB_INVALIDATE"}, \
+ {H_COPY_TOFROM_GUEST, "H_COPY_TOFROM_GUEST"}
+
+
+#define kvm_trace_symbol_kvmret \
+ {RESUME_GUEST, "RESUME_GUEST"}, \
+ {RESUME_GUEST_NV, "RESUME_GUEST_NV"}, \
+ {RESUME_HOST, "RESUME_HOST"}, \
+ {RESUME_HOST_NV, "RESUME_HOST_NV"}
+
+#define kvm_trace_symbol_hcall_rc \
+ {H_SUCCESS, "H_SUCCESS"}, \
+ {H_BUSY, "H_BUSY"}, \
+ {H_CLOSED, "H_CLOSED"}, \
+ {H_NOT_AVAILABLE, "H_NOT_AVAILABLE"}, \
+ {H_CONSTRAINED, "H_CONSTRAINED"}, \
+ {H_PARTIAL, "H_PARTIAL"}, \
+ {H_IN_PROGRESS, "H_IN_PROGRESS"}, \
+ {H_PAGE_REGISTERED, "H_PAGE_REGISTERED"}, \
+ {H_PARTIAL_STORE, "H_PARTIAL_STORE"}, \
+ {H_PENDING, "H_PENDING"}, \
+ {H_CONTINUE, "H_CONTINUE"}, \
+ {H_LONG_BUSY_START_RANGE, "H_LONG_BUSY_START_RANGE"}, \
+ {H_LONG_BUSY_ORDER_1_MSEC, "H_LONG_BUSY_ORDER_1_MSEC"}, \
+ {H_LONG_BUSY_ORDER_10_MSEC, "H_LONG_BUSY_ORDER_10_MSEC"}, \
+ {H_LONG_BUSY_ORDER_100_MSEC, "H_LONG_BUSY_ORDER_100_MSEC"}, \
+ {H_LONG_BUSY_ORDER_1_SEC, "H_LONG_BUSY_ORDER_1_SEC"}, \
+ {H_LONG_BUSY_ORDER_10_SEC, "H_LONG_BUSY_ORDER_10_SEC"}, \
+ {H_LONG_BUSY_ORDER_100_SEC, "H_LONG_BUSY_ORDER_100_SEC"}, \
+ {H_LONG_BUSY_END_RANGE, "H_LONG_BUSY_END_RANGE"}, \
+ {H_TOO_HARD, "H_TOO_HARD"}, \
+ {H_HARDWARE, "H_HARDWARE"}, \
+ {H_FUNCTION, "H_FUNCTION"}, \
+ {H_PRIVILEGE, "H_PRIVILEGE"}, \
+ {H_PARAMETER, "H_PARAMETER"}, \
+ {H_BAD_MODE, "H_BAD_MODE"}, \
+ {H_PTEG_FULL, "H_PTEG_FULL"}, \
+ {H_NOT_FOUND, "H_NOT_FOUND"}, \
+ {H_RESERVED_DABR, "H_RESERVED_DABR"}, \
+ {H_NO_MEM, "H_NO_MEM"}, \
+ {H_AUTHORITY, "H_AUTHORITY"}, \
+ {H_PERMISSION, "H_PERMISSION"}, \
+ {H_DROPPED, "H_DROPPED"}, \
+ {H_SOURCE_PARM, "H_SOURCE_PARM"}, \
+ {H_DEST_PARM, "H_DEST_PARM"}, \
+ {H_REMOTE_PARM, "H_REMOTE_PARM"}, \
+ {H_RESOURCE, "H_RESOURCE"}, \
+ {H_ADAPTER_PARM, "H_ADAPTER_PARM"}, \
+ {H_RH_PARM, "H_RH_PARM"}, \
+ {H_RCQ_PARM, "H_RCQ_PARM"}, \
+ {H_SCQ_PARM, "H_SCQ_PARM"}, \
+ {H_EQ_PARM, "H_EQ_PARM"}, \
+ {H_RT_PARM, "H_RT_PARM"}, \
+ {H_ST_PARM, "H_ST_PARM"}, \
+ {H_SIGT_PARM, "H_SIGT_PARM"}, \
+ {H_TOKEN_PARM, "H_TOKEN_PARM"}, \
+ {H_MLENGTH_PARM, "H_MLENGTH_PARM"}, \
+ {H_MEM_PARM, "H_MEM_PARM"}, \
+ {H_MEM_ACCESS_PARM, "H_MEM_ACCESS_PARM"}, \
+ {H_ATTR_PARM, "H_ATTR_PARM"}, \
+ {H_PORT_PARM, "H_PORT_PARM"}, \
+ {H_MCG_PARM, "H_MCG_PARM"}, \
+ {H_VL_PARM, "H_VL_PARM"}, \
+ {H_TSIZE_PARM, "H_TSIZE_PARM"}, \
+ {H_TRACE_PARM, "H_TRACE_PARM"}, \
+ {H_MASK_PARM, "H_MASK_PARM"}, \
+ {H_MCG_FULL, "H_MCG_FULL"}, \
+ {H_ALIAS_EXIST, "H_ALIAS_EXIST"}, \
+ {H_P_COUNTER, "H_P_COUNTER"}, \
+ {H_TABLE_FULL, "H_TABLE_FULL"}, \
+ {H_ALT_TABLE, "H_ALT_TABLE"}, \
+ {H_MR_CONDITION, "H_MR_CONDITION"}, \
+ {H_NOT_ENOUGH_RESOURCES, "H_NOT_ENOUGH_RESOURCES"}, \
+ {H_R_STATE, "H_R_STATE"}, \
+ {H_RESCINDED, "H_RESCINDED"}, \
+ {H_P2, "H_P2"}, \
+ {H_P3, "H_P3"}, \
+ {H_P4, "H_P4"}, \
+ {H_P5, "H_P5"}, \
+ {H_P6, "H_P6"}, \
+ {H_P7, "H_P7"}, \
+ {H_P8, "H_P8"}, \
+ {H_P9, "H_P9"}, \
+ {H_TOO_BIG, "H_TOO_BIG"}, \
+ {H_OVERLAP, "H_OVERLAP"}, \
+ {H_INTERRUPT, "H_INTERRUPT"}, \
+ {H_BAD_DATA, "H_BAD_DATA"}, \
+ {H_NOT_ACTIVE, "H_NOT_ACTIVE"}, \
+ {H_SG_LIST, "H_SG_LIST"}, \
+ {H_OP_MODE, "H_OP_MODE"}, \
+ {H_COP_HW, "H_COP_HW"}, \
+ {H_UNSUPPORTED_FLAG_START, "H_UNSUPPORTED_FLAG_START"}, \
+ {H_UNSUPPORTED_FLAG_END, "H_UNSUPPORTED_FLAG_END"}, \
+ {H_MULTI_THREADS_ACTIVE, "H_MULTI_THREADS_ACTIVE"}, \
+ {H_OUTSTANDING_COP_OPS, "H_OUTSTANDING_COP_OPS"}
+
+TRACE_EVENT(kvm_guest_enter,
+ TP_PROTO(struct kvm_vcpu *vcpu),
+ TP_ARGS(vcpu),
+
+ TP_STRUCT__entry(
+ __field(int, vcpu_id)
+ __field(unsigned long, pc)
+ __field(unsigned long, pending_exceptions)
+ __field(u8, ceded)
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu_id = vcpu->vcpu_id;
+ __entry->pc = kvmppc_get_pc(vcpu);
+ __entry->ceded = vcpu->arch.ceded;
+ __entry->pending_exceptions = vcpu->arch.pending_exceptions;
+ ),
+
+ TP_printk("VCPU %d: pc=0x%lx pexcp=0x%lx ceded=%d",
+ __entry->vcpu_id,
+ __entry->pc,
+ __entry->pending_exceptions, __entry->ceded)
+);
+
+TRACE_EVENT(kvm_guest_exit,
+ TP_PROTO(struct kvm_vcpu *vcpu),
+ TP_ARGS(vcpu),
+
+ TP_STRUCT__entry(
+ __field(int, vcpu_id)
+ __field(int, trap)
+ __field(unsigned long, pc)
+ __field(unsigned long, msr)
+ __field(u8, ceded)
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu_id = vcpu->vcpu_id;
+ __entry->trap = vcpu->arch.trap;
+ __entry->ceded = vcpu->arch.ceded;
+ __entry->pc = kvmppc_get_pc(vcpu);
+ __entry->msr = vcpu->arch.shregs.msr;
+ ),
+
+ TP_printk("VCPU %d: trap=%s pc=0x%lx msr=0x%lx, ceded=%d",
+ __entry->vcpu_id,
+ __print_symbolic(__entry->trap, kvm_trace_symbol_exit),
+ __entry->pc, __entry->msr, __entry->ceded
+ )
+);
+
+TRACE_EVENT(kvm_page_fault_enter,
+ TP_PROTO(struct kvm_vcpu *vcpu, unsigned long *hptep,
+ struct kvm_memory_slot *memslot, unsigned long ea,
+ unsigned long dsisr),
+
+ TP_ARGS(vcpu, hptep, memslot, ea, dsisr),
+
+ TP_STRUCT__entry(
+ __field(int, vcpu_id)
+ __field(unsigned long, hpte_v)
+ __field(unsigned long, hpte_r)
+ __field(unsigned long, gpte_r)
+ __field(unsigned long, ea)
+ __field(u64, base_gfn)
+ __field(u32, slot_flags)
+ __field(u32, dsisr)
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu_id = vcpu->vcpu_id;
+ __entry->hpte_v = hptep[0];
+ __entry->hpte_r = hptep[1];
+ __entry->gpte_r = hptep[2];
+ __entry->ea = ea;
+ __entry->dsisr = dsisr;
+ __entry->base_gfn = memslot ? memslot->base_gfn : -1UL;
+ __entry->slot_flags = memslot ? memslot->flags : 0;
+ ),
+
+ TP_printk("VCPU %d: hpte=0x%lx:0x%lx guest=0x%lx ea=0x%lx,%x slot=0x%llx,0x%x",
+ __entry->vcpu_id,
+ __entry->hpte_v, __entry->hpte_r, __entry->gpte_r,
+ __entry->ea, __entry->dsisr,
+ __entry->base_gfn, __entry->slot_flags)
+);
+
+TRACE_EVENT(kvm_page_fault_exit,
+ TP_PROTO(struct kvm_vcpu *vcpu, unsigned long *hptep, long ret),
+
+ TP_ARGS(vcpu, hptep, ret),
+
+ TP_STRUCT__entry(
+ __field(int, vcpu_id)
+ __field(unsigned long, hpte_v)
+ __field(unsigned long, hpte_r)
+ __field(long, ret)
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu_id = vcpu->vcpu_id;
+ __entry->hpte_v = hptep[0];
+ __entry->hpte_r = hptep[1];
+ __entry->ret = ret;
+ ),
+
+ TP_printk("VCPU %d: hpte=0x%lx:0x%lx ret=0x%lx",
+ __entry->vcpu_id,
+ __entry->hpte_v, __entry->hpte_r, __entry->ret)
+);
+
+TRACE_EVENT(kvm_hcall_enter,
+ TP_PROTO(struct kvm_vcpu *vcpu),
+
+ TP_ARGS(vcpu),
+
+ TP_STRUCT__entry(
+ __field(int, vcpu_id)
+ __field(unsigned long, req)
+ __field(unsigned long, gpr4)
+ __field(unsigned long, gpr5)
+ __field(unsigned long, gpr6)
+ __field(unsigned long, gpr7)
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu_id = vcpu->vcpu_id;
+ __entry->req = kvmppc_get_gpr(vcpu, 3);
+ __entry->gpr4 = kvmppc_get_gpr(vcpu, 4);
+ __entry->gpr5 = kvmppc_get_gpr(vcpu, 5);
+ __entry->gpr6 = kvmppc_get_gpr(vcpu, 6);
+ __entry->gpr7 = kvmppc_get_gpr(vcpu, 7);
+ ),
+
+ TP_printk("VCPU %d: hcall=%s GPR4-7=0x%lx,0x%lx,0x%lx,0x%lx",
+ __entry->vcpu_id,
+ __print_symbolic(__entry->req, kvm_trace_symbol_hcall),
+ __entry->gpr4, __entry->gpr5, __entry->gpr6, __entry->gpr7)
+);
+
+TRACE_EVENT(kvm_hcall_exit,
+ TP_PROTO(struct kvm_vcpu *vcpu, int ret),
+
+ TP_ARGS(vcpu, ret),
+
+ TP_STRUCT__entry(
+ __field(int, vcpu_id)
+ __field(unsigned long, ret)
+ __field(unsigned long, hcall_rc)
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu_id = vcpu->vcpu_id;
+ __entry->ret = ret;
+ __entry->hcall_rc = kvmppc_get_gpr(vcpu, 3);
+ ),
+
+ TP_printk("VCPU %d: ret=%s hcall_rc=%s",
+ __entry->vcpu_id,
+ __print_symbolic(__entry->ret, kvm_trace_symbol_kvmret),
+ __print_symbolic(__entry->ret & RESUME_FLAG_HOST ?
+ H_TOO_HARD : __entry->hcall_rc,
+ kvm_trace_symbol_hcall_rc))
+);
+
+TRACE_EVENT(kvmppc_run_core,
+ TP_PROTO(struct kvmppc_vcore *vc, int where),
+
+ TP_ARGS(vc, where),
+
+ TP_STRUCT__entry(
+ __field(int, n_runnable)
+ __field(int, runner_vcpu)
+ __field(int, where)
+ __field(pid_t, tgid)
+ ),
+
+ TP_fast_assign(
+ __entry->runner_vcpu = vc->runner->vcpu_id;
+ __entry->n_runnable = vc->n_runnable;
+ __entry->where = where;
+ __entry->tgid = current->tgid;
+ ),
+
+ TP_printk("%s runner_vcpu==%d runnable=%d tgid=%d",
+ __entry->where ? "Exit" : "Enter",
+ __entry->runner_vcpu, __entry->n_runnable, __entry->tgid)
+);
+
+TRACE_EVENT(kvmppc_vcore_blocked,
+ TP_PROTO(struct kvm_vcpu *vcpu, int where),
+
+ TP_ARGS(vcpu, where),
+
+ TP_STRUCT__entry(
+ __field(int, n_runnable)
+ __field(int, runner_vcpu)
+ __field(int, where)
+ __field(pid_t, tgid)
+ ),
+
+ TP_fast_assign(
+ __entry->runner_vcpu = vcpu->vcpu_id;
+ __entry->n_runnable = vcpu->arch.vcore->n_runnable;
+ __entry->where = where;
+ __entry->tgid = current->tgid;
+ ),
+
+ TP_printk("%s runner_vcpu=%d runnable=%d tgid=%d",
+ __entry->where ? "Exit" : "Enter",
+ __entry->runner_vcpu, __entry->n_runnable, __entry->tgid)
+);
+
+TRACE_EVENT(kvmppc_vcore_wakeup,
+ TP_PROTO(int do_sleep, __u64 ns),
+
+ TP_ARGS(do_sleep, ns),
+
+ TP_STRUCT__entry(
+ __field(__u64, ns)
+ __field(int, waited)
+ __field(pid_t, tgid)
+ ),
+
+ TP_fast_assign(
+ __entry->ns = ns;
+ __entry->waited = do_sleep;
+ __entry->tgid = current->tgid;
+ ),
+
+ TP_printk("%s time %llu ns, tgid=%d",
+ __entry->waited ? "wait" : "poll",
+ __entry->ns, __entry->tgid)
+);
+
+TRACE_EVENT(kvmppc_run_vcpu_enter,
+ TP_PROTO(struct kvm_vcpu *vcpu),
+
+ TP_ARGS(vcpu),
+
+ TP_STRUCT__entry(
+ __field(int, vcpu_id)
+ __field(pid_t, tgid)
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu_id = vcpu->vcpu_id;
+ __entry->tgid = current->tgid;
+ ),
+
+ TP_printk("VCPU %d: tgid=%d", __entry->vcpu_id, __entry->tgid)
+);
+
+TRACE_EVENT(kvmppc_run_vcpu_exit,
+ TP_PROTO(struct kvm_vcpu *vcpu),
+
+ TP_ARGS(vcpu),
+
+ TP_STRUCT__entry(
+ __field(int, vcpu_id)
+ __field(int, exit)
+ __field(int, ret)
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu_id = vcpu->vcpu_id;
+ __entry->exit = vcpu->run->exit_reason;
+ __entry->ret = vcpu->arch.ret;
+ ),
+
+ TP_printk("VCPU %d: exit=%d, ret=%d",
+ __entry->vcpu_id, __entry->exit, __entry->ret)
+);
+
+#ifdef CONFIG_PPC_PSERIES
+
+TRACE_EVENT_FN_COND(kvmppc_vcpu_stats,
+ TP_PROTO(struct kvm_vcpu *vcpu, u64 l1_to_l2_cs, u64 l2_to_l1_cs, u64 l2_runtime),
+
+ TP_ARGS(vcpu, l1_to_l2_cs, l2_to_l1_cs, l2_runtime),
+
+ TP_CONDITION(l1_to_l2_cs || l2_to_l1_cs || l2_runtime),
+
+ TP_STRUCT__entry(
+ __field(int, vcpu_id)
+ __field(u64, l1_to_l2_cs)
+ __field(u64, l2_to_l1_cs)
+ __field(u64, l2_runtime)
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu_id = vcpu->vcpu_id;
+ __entry->l1_to_l2_cs = l1_to_l2_cs;
+ __entry->l2_to_l1_cs = l2_to_l1_cs;
+ __entry->l2_runtime = l2_runtime;
+ ),
+
+ TP_printk("VCPU %d: l1_to_l2_cs_time=%llu ns l2_to_l1_cs_time=%llu ns l2_runtime=%llu ns",
+ __entry->vcpu_id, __entry->l1_to_l2_cs,
+ __entry->l2_to_l1_cs, __entry->l2_runtime),
+ kvmhv_counters_tracepoint_regfunc, kvmhv_counters_tracepoint_unregfunc
+);
+#endif
+#endif /* _TRACE_KVM_HV_H */
+
+/* This part must be outside protection */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_hv
+
+#include <trace/define_trace.h>
diff --git a/arch/powerpc/kvm/trace_pr.h b/arch/powerpc/kvm/trace_pr.h
index e1357cd8dc1f..46a46d328fbf 100644
--- a/arch/powerpc/kvm/trace_pr.h
+++ b/arch/powerpc/kvm/trace_pr.h
@@ -1,37 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#if !defined(_TRACE_KVM_PR_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_KVM_PR_H
#include <linux/tracepoint.h>
+#include "trace_book3s.h"
#undef TRACE_SYSTEM
#define TRACE_SYSTEM kvm_pr
-#define TRACE_INCLUDE_PATH .
-#define TRACE_INCLUDE_FILE trace_pr
-
-#define kvm_trace_symbol_exit \
- {0x100, "SYSTEM_RESET"}, \
- {0x200, "MACHINE_CHECK"}, \
- {0x300, "DATA_STORAGE"}, \
- {0x380, "DATA_SEGMENT"}, \
- {0x400, "INST_STORAGE"}, \
- {0x480, "INST_SEGMENT"}, \
- {0x500, "EXTERNAL"}, \
- {0x501, "EXTERNAL_LEVEL"}, \
- {0x502, "EXTERNAL_HV"}, \
- {0x600, "ALIGNMENT"}, \
- {0x700, "PROGRAM"}, \
- {0x800, "FP_UNAVAIL"}, \
- {0x900, "DECREMENTER"}, \
- {0x980, "HV_DECREMENTER"}, \
- {0xc00, "SYSCALL"}, \
- {0xd00, "TRACE"}, \
- {0xe00, "H_DATA_STORAGE"}, \
- {0xe20, "H_INST_STORAGE"}, \
- {0xe40, "H_EMUL_ASSIST"}, \
- {0xf00, "PERFMON"}, \
- {0xf20, "ALTIVEC"}, \
- {0xf40, "VSX"}
TRACE_EVENT(kvm_book3s_reenter,
TP_PROTO(int r, struct kvm_vcpu *vcpu),
@@ -53,7 +29,7 @@ TRACE_EVENT(kvm_book3s_reenter,
#ifdef CONFIG_PPC_BOOK3S_64
TRACE_EVENT(kvm_book3s_64_mmu_map,
- TP_PROTO(int rflags, ulong hpteg, ulong va, pfn_t hpaddr,
+ TP_PROTO(int rflags, ulong hpteg, ulong va, kvm_pfn_t hpaddr,
struct kvmppc_pte *orig_pte),
TP_ARGS(rflags, hpteg, va, hpaddr, orig_pte),
@@ -276,22 +252,14 @@ TRACE_EVENT(kvm_exit,
)
);
-TRACE_EVENT(kvm_unmap_hva,
- TP_PROTO(unsigned long hva),
- TP_ARGS(hva),
-
- TP_STRUCT__entry(
- __field( unsigned long, hva )
- ),
+#endif /* _TRACE_KVM_H */
- TP_fast_assign(
- __entry->hva = hva;
- ),
+/* This part must be outside protection */
- TP_printk("unmap hva 0x%lx\n", __entry->hva)
-);
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
-#endif /* _TRACE_KVM_H */
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_pr
-/* This part must be outside protection */
#include <trace/define_trace.h>
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 59fa2de9546d..f14ecab674a3 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -1,42 +1,81 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for ppc-specific library files..
#
-subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
+CFLAGS_code-patching.o += -fno-stack-protector
+CFLAGS_feature-fixups.o += -fno-stack-protector
-ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
+CFLAGS_REMOVE_code-patching.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_feature-fixups.o = $(CC_FLAGS_FTRACE)
-CFLAGS_REMOVE_code-patching.o = -pg
-CFLAGS_REMOVE_feature-fixups.o = -pg
+KASAN_SANITIZE_code-patching.o := n
+KASAN_SANITIZE_feature-fixups.o := n
+# restart_table.o contains functions called in the NMI interrupt path
+# which can be in real mode. Disable KASAN.
+KASAN_SANITIZE_restart_table.o := n
+KCSAN_SANITIZE_code-patching.o := n
+KCSAN_SANITIZE_feature-fixups.o := n
-obj-y := string.o alloc.o \
- crtsavres.o
-obj-$(CONFIG_PPC32) += div64.o copy_32.o
-obj-$(CONFIG_HAS_IOMEM) += devres.o
+ifdef CONFIG_KASAN
+CFLAGS_code-patching.o += -DDISABLE_BRANCH_PROFILING
+CFLAGS_feature-fixups.o += -DDISABLE_BRANCH_PROFILING
+endif
+
+CFLAGS_code-patching.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
+CFLAGS_feature-fixups.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
+
+obj-y += code-patching.o feature-fixups.o pmem.o
+
+obj-$(CONFIG_CODE_PATCHING_SELFTEST) += test-code-patching.o
+
+ifndef CONFIG_KASAN
+obj-y += string.o memcmp_$(BITS).o
+obj-$(CONFIG_PPC32) += strlen_32.o
+endif
-obj-$(CONFIG_PPC64) += copypage_64.o copyuser_64.o \
- usercopy_64.o mem_64.o string.o \
- hweight_64.o \
- copyuser_power7.o string_64.o copypage_power7.o
-ifeq ($(CONFIG_GENERIC_CSUM),)
-obj-y += checksum_$(CONFIG_WORD_SIZE).o
-obj-$(CONFIG_PPC64) += checksum_wrappers_64.o
+obj-$(CONFIG_PPC32) += div64.o copy_32.o crtsavres.o
+
+obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
+
+# See corresponding test in arch/powerpc/Makefile
+# 64-bit linker creates .sfpr on demand for final link (vmlinux),
+# so it is only needed for modules, and only for older linkers which
+# do not support --save-restore-funcs
+ifndef CONFIG_LD_IS_BFD
+always-$(CONFIG_PPC64) += crtsavres.o
endif
-obj-$(CONFIG_PPC64) += memcpy_power7.o memcpy_64.o
+obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \
+ memcpy_power7.o restart_table.o
-obj-$(CONFIG_PPC_EMULATE_SSTEP) += sstep.o ldstfp.o
+obj64-y += copypage_64.o copyuser_64.o mem_64.o hweight_64.o \
+ memcpy_64.o copy_mc_64.o
-ifeq ($(CONFIG_PPC64),y)
-obj-$(CONFIG_SMP) += locks.o
-obj-$(CONFIG_ALTIVEC) += vmx-helper.o
+ifdef CONFIG_PPC_QUEUED_SPINLOCKS
+obj-$(CONFIG_SMP) += qspinlock.o
+else
+obj64-$(CONFIG_SMP) += locks.o
endif
+obj64-$(CONFIG_ALTIVEC) += vmx-helper.o
+obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o \
+ test_emulate_step_exec_instr.o
+
+obj-y += checksum_$(BITS).o checksum_wrappers.o \
+ string_$(BITS).o
+
+obj-y += sstep.o
+obj-$(CONFIG_PPC_FPU) += ldstfp.o
+obj64-y += quad.o
+
obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o
-obj-y += code-patching.o
-obj-y += feature-fixups.o
obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o
-obj-$(CONFIG_ALTIVEC) += xor_vmx.o
-CFLAGS_xor_vmx.o += -maltivec -mabi=altivec
+obj-$(CONFIG_ALTIVEC) += xor_vmx.o xor_vmx_glue.o
+CFLAGS_xor_vmx.o += -mhard-float -maltivec $(call cc-option,-mabi=altivec)
+# Enable <altivec.h>
+CFLAGS_xor_vmx.o += -isystem $(shell $(CC) -print-file-name=include)
+
+obj-$(CONFIG_PPC64) += $(obj64-y)
diff --git a/arch/powerpc/lib/alloc.c b/arch/powerpc/lib/alloc.c
deleted file mode 100644
index da22c84a8fed..000000000000
--- a/arch/powerpc/lib/alloc.c
+++ /dev/null
@@ -1,21 +0,0 @@
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/bootmem.h>
-#include <linux/string.h>
-#include <asm/setup.h>
-
-
-void * __init_refok zalloc_maybe_bootmem(size_t size, gfp_t mask)
-{
- void *p;
-
- if (mem_init_done)
- p = kzalloc(size, mask);
- else {
- p = alloc_bootmem(size);
- if (p)
- memset(p, 0, size);
- }
- return p;
-}
diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S
index 7874e8a80455..cd00b9bdd772 100644
--- a/arch/powerpc/lib/checksum_32.S
+++ b/arch/powerpc/lib/checksum_32.S
@@ -1,225 +1,309 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* This file contains assembly-language implementations
* of IP-style 1's complement checksum routines.
*
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
*/
+#include <linux/export.h>
#include <linux/sys.h>
#include <asm/processor.h>
+#include <asm/cache.h>
#include <asm/errno.h>
#include <asm/ppc_asm.h>
.text
/*
- * ip_fast_csum(buf, len) -- Optimized for IP header
- * len is in words and is always >= 5.
- */
-_GLOBAL(ip_fast_csum)
- lwz r0,0(r3)
- lwzu r5,4(r3)
- addic. r4,r4,-2
- addc r0,r0,r5
- mtctr r4
- blelr-
-1: lwzu r4,4(r3)
- adde r0,r0,r4
- bdnz 1b
- addze r0,r0 /* add in final carry */
- rlwinm r3,r0,16,0,31 /* fold two halves together */
- add r3,r0,r3
- not r3,r3
- srwi r3,r3,16
- blr
-
-/*
- * Compute checksum of TCP or UDP pseudo-header:
- * csum_tcpudp_magic(saddr, daddr, len, proto, sum)
- */
-_GLOBAL(csum_tcpudp_magic)
- rlwimi r5,r6,16,0,15 /* put proto in upper half of len */
- addc r0,r3,r4 /* add 4 32-bit words together */
- adde r0,r0,r5
- adde r0,r0,r7
- addze r0,r0 /* add in final carry */
- rlwinm r3,r0,16,0,31 /* fold two halves together */
- add r3,r0,r3
- not r3,r3
- srwi r3,r3,16
- blr
-
-/*
* computes the checksum of a memory block at buff, length len,
* and adds in "sum" (32-bit)
*
- * csum_partial(buff, len, sum)
+ * __csum_partial(buff, len, sum)
*/
-_GLOBAL(csum_partial)
- addic r0,r5,0
+_GLOBAL(__csum_partial)
subi r3,r3,4
- srwi. r6,r4,2
+ srawi. r6,r4,2 /* Divide len by 4 and also clear carry */
beq 3f /* if we're doing < 4 bytes */
- andi. r5,r3,2 /* Align buffer to longword boundary */
+ andi. r0,r3,2 /* Align buffer to longword boundary */
beq+ 1f
- lhz r5,4(r3) /* do 2 bytes to get aligned */
- addi r3,r3,2
+ lhz r0,4(r3) /* do 2 bytes to get aligned */
subi r4,r4,2
- addc r0,r0,r5
+ addi r3,r3,2
srwi. r6,r4,2 /* # words to do */
+ adde r5,r5,r0
beq 3f
-1: mtctr r6
-2: lwzu r5,4(r3) /* the bdnz has zero overhead, so it should */
- adde r0,r0,r5 /* be unnecessary to unroll this loop */
+1: andi. r6,r6,3 /* Prepare to handle words 4 by 4 */
+ beq 21f
+ mtctr r6
+2: lwzu r0,4(r3)
+ adde r5,r5,r0
bdnz 2b
- andi. r4,r4,3
-3: cmpwi 0,r4,2
- blt+ 4f
- lhz r5,4(r3)
+21: srwi. r6,r4,4 /* # blocks of 4 words to do */
+ beq 3f
+ lwz r0,4(r3)
+ mtctr r6
+ lwz r6,8(r3)
+ adde r5,r5,r0
+ lwz r7,12(r3)
+ adde r5,r5,r6
+ lwzu r8,16(r3)
+ adde r5,r5,r7
+ bdz 23f
+22: lwz r0,4(r3)
+ adde r5,r5,r8
+ lwz r6,8(r3)
+ adde r5,r5,r0
+ lwz r7,12(r3)
+ adde r5,r5,r6
+ lwzu r8,16(r3)
+ adde r5,r5,r7
+ bdnz 22b
+23: adde r5,r5,r8
+3: andi. r0,r4,2
+ beq+ 4f
+ lhz r0,4(r3)
addi r3,r3,2
- subi r4,r4,2
- adde r0,r0,r5
-4: cmpwi 0,r4,1
- bne+ 5f
- lbz r5,4(r3)
- slwi r5,r5,8 /* Upper byte of word */
- adde r0,r0,r5
-5: addze r3,r0 /* add in final carry */
+ adde r5,r5,r0
+4: andi. r0,r4,1
+ beq+ 5f
+ lbz r0,4(r3)
+ slwi r0,r0,8 /* Upper byte of word */
+ adde r5,r5,r0
+5: addze r3,r5 /* add in final carry */
blr
+EXPORT_SYMBOL(__csum_partial)
/*
* Computes the checksum of a memory block at src, length len,
- * and adds in "sum" (32-bit), while copying the block to dst.
- * If an access exception occurs on src or dst, it stores -EFAULT
- * to *src_err or *dst_err respectively, and (for an error on
- * src) zeroes the rest of dst.
+ * and adds in 0xffffffff, while copying the block to dst.
+ * If an access exception occurs it returns zero.
*
- * csum_partial_copy_generic(src, dst, len, sum, src_err, dst_err)
+ * csum_partial_copy_generic(src, dst, len)
*/
+#define CSUM_COPY_16_BYTES_WITHEX(n) \
+8 ## n ## 0: \
+ lwz r7,4(r4); \
+8 ## n ## 1: \
+ lwz r8,8(r4); \
+8 ## n ## 2: \
+ lwz r9,12(r4); \
+8 ## n ## 3: \
+ lwzu r10,16(r4); \
+8 ## n ## 4: \
+ stw r7,4(r6); \
+ adde r12,r12,r7; \
+8 ## n ## 5: \
+ stw r8,8(r6); \
+ adde r12,r12,r8; \
+8 ## n ## 6: \
+ stw r9,12(r6); \
+ adde r12,r12,r9; \
+8 ## n ## 7: \
+ stwu r10,16(r6); \
+ adde r12,r12,r10
+
+#define CSUM_COPY_16_BYTES_EXCODE(n) \
+ EX_TABLE(8 ## n ## 0b, fault); \
+ EX_TABLE(8 ## n ## 1b, fault); \
+ EX_TABLE(8 ## n ## 2b, fault); \
+ EX_TABLE(8 ## n ## 3b, fault); \
+ EX_TABLE(8 ## n ## 4b, fault); \
+ EX_TABLE(8 ## n ## 5b, fault); \
+ EX_TABLE(8 ## n ## 6b, fault); \
+ EX_TABLE(8 ## n ## 7b, fault);
+
+ .text
+
+CACHELINE_BYTES = L1_CACHE_BYTES
+LG_CACHELINE_BYTES = L1_CACHE_SHIFT
+CACHELINE_MASK = (L1_CACHE_BYTES-1)
+
_GLOBAL(csum_partial_copy_generic)
- addic r0,r6,0
- subi r3,r3,4
- subi r4,r4,4
- srwi. r6,r5,2
- beq 3f /* if we're doing < 4 bytes */
- andi. r9,r4,2 /* Align dst to longword boundary */
- beq+ 1f
-81: lhz r6,4(r3) /* do 2 bytes to get aligned */
- addi r3,r3,2
- subi r5,r5,2
-91: sth r6,4(r4)
- addi r4,r4,2
- addc r0,r0,r6
- srwi. r6,r5,2 /* # words to do */
- beq 3f
-1: srwi. r6,r5,4 /* # groups of 4 words to do */
- beq 10f
- mtctr r6
-71: lwz r6,4(r3)
-72: lwz r9,8(r3)
-73: lwz r10,12(r3)
-74: lwzu r11,16(r3)
- adde r0,r0,r6
-75: stw r6,4(r4)
- adde r0,r0,r9
-76: stw r9,8(r4)
- adde r0,r0,r10
-77: stw r10,12(r4)
- adde r0,r0,r11
-78: stwu r11,16(r4)
- bdnz 71b
-10: rlwinm. r6,r5,30,30,31 /* # words left to do */
- beq 13f
- mtctr r6
-82: lwzu r9,4(r3)
-92: stwu r9,4(r4)
- adde r0,r0,r9
- bdnz 82b
-13: andi. r5,r5,3
-3: cmpwi 0,r5,2
- blt+ 4f
-83: lhz r6,4(r3)
- addi r3,r3,2
- subi r5,r5,2
-93: sth r6,4(r4)
- addi r4,r4,2
- adde r0,r0,r6
-4: cmpwi 0,r5,1
- bne+ 5f
-84: lbz r6,4(r3)
-94: stb r6,4(r4)
- slwi r6,r6,8 /* Upper byte of word */
- adde r0,r0,r6
-5: addze r3,r0 /* add in final carry */
- blr
+ li r12,-1
+ addic r0,r0,0 /* clear carry */
+ addi r6,r4,-4
+ neg r0,r4
+ addi r4,r3,-4
+ andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */
+ crset 4*cr7+eq
+ beq 58f
+
+ cmplw 0,r5,r0 /* is this more than total to do? */
+ blt 63f /* if not much to do */
+ rlwinm r7,r6,3,0x8
+ rlwnm r12,r12,r7,0,31 /* odd destination address: rotate one byte */
+ cmplwi cr7,r7,0 /* is destination address even ? */
+ andi. r8,r0,3 /* get it word-aligned first */
+ mtctr r8
+ beq+ 61f
+ li r3,0
+70: lbz r9,4(r4) /* do some bytes */
+ addi r4,r4,1
+ slwi r3,r3,8
+ rlwimi r3,r9,0,24,31
+71: stb r9,4(r6)
+ addi r6,r6,1
+ bdnz 70b
+ adde r12,r12,r3
+61: subf r5,r0,r5
+ srwi. r0,r0,2
+ mtctr r0
+ beq 58f
+72: lwzu r9,4(r4) /* do some words */
+ adde r12,r12,r9
+73: stwu r9,4(r6)
+ bdnz 72b
+
+58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
+ clrlwi r5,r5,32-LG_CACHELINE_BYTES
+ li r11,4
+ beq 63f
+
+ /* Here we decide how far ahead to prefetch the source */
+ li r3,4
+ cmpwi r0,1
+ li r7,0
+ ble 114f
+ li r7,1
+#if MAX_COPY_PREFETCH > 1
+ /* Heuristically, for large transfers we prefetch
+ MAX_COPY_PREFETCH cachelines ahead. For small transfers
+ we prefetch 1 cacheline ahead. */
+ cmpwi r0,MAX_COPY_PREFETCH
+ ble 112f
+ li r7,MAX_COPY_PREFETCH
+112: mtctr r7
+111: dcbt r3,r4
+ addi r3,r3,CACHELINE_BYTES
+ bdnz 111b
+#else
+ dcbt r3,r4
+ addi r3,r3,CACHELINE_BYTES
+#endif /* MAX_COPY_PREFETCH > 1 */
+
+114: subf r8,r7,r0
+ mr r0,r7
+ mtctr r8
+
+53: dcbt r3,r4
+54: dcbz r11,r6
+/* the main body of the cacheline loop */
+ CSUM_COPY_16_BYTES_WITHEX(0)
+#if L1_CACHE_BYTES >= 32
+ CSUM_COPY_16_BYTES_WITHEX(1)
+#if L1_CACHE_BYTES >= 64
+ CSUM_COPY_16_BYTES_WITHEX(2)
+ CSUM_COPY_16_BYTES_WITHEX(3)
+#if L1_CACHE_BYTES >= 128
+ CSUM_COPY_16_BYTES_WITHEX(4)
+ CSUM_COPY_16_BYTES_WITHEX(5)
+ CSUM_COPY_16_BYTES_WITHEX(6)
+ CSUM_COPY_16_BYTES_WITHEX(7)
+#endif
+#endif
+#endif
+ bdnz 53b
+ cmpwi r0,0
+ li r3,4
+ li r7,0
+ bne 114b
+
+63: srwi. r0,r5,2
+ mtctr r0
+ beq 64f
+30: lwzu r0,4(r4)
+ adde r12,r12,r0
+31: stwu r0,4(r6)
+ bdnz 30b
-/* These shouldn't go in the fixup section, since that would
- cause the ex_table addresses to get out of order. */
-
-src_error_4:
- mfctr r6 /* update # bytes remaining from ctr */
- rlwimi r5,r6,4,0,27
- b 79f
-src_error_1:
- li r6,0
- subi r5,r5,2
-95: sth r6,4(r4)
+64: andi. r0,r5,2
+ beq+ 65f
+40: lhz r0,4(r4)
addi r4,r4,2
-79: srwi. r6,r5,2
- beq 3f
- mtctr r6
-src_error_2:
- li r6,0
-96: stwu r6,4(r4)
- bdnz 96b
-3: andi. r5,r5,3
- beq src_error
-src_error_3:
- li r6,0
- mtctr r5
- addi r4,r4,3
-97: stbu r6,1(r4)
- bdnz 97b
-src_error:
- cmpwi 0,r7,0
- beq 1f
- li r6,-EFAULT
- stw r6,0(r7)
-1: addze r3,r0
+41: sth r0,4(r6)
+ adde r12,r12,r0
+ addi r6,r6,2
+65: andi. r0,r5,1
+ beq+ 66f
+50: lbz r0,4(r4)
+51: stb r0,4(r6)
+ slwi r0,r0,8
+ adde r12,r12,r0
+66: addze r3,r12
+ beqlr+ cr7
+ rlwinm r3,r3,8,0,31 /* odd destination address: rotate one byte */
blr
-dst_error:
- cmpwi 0,r8,0
- beq 1f
- li r6,-EFAULT
- stw r6,0(r8)
-1: addze r3,r0
+fault:
+ li r3,0
blr
-.section __ex_table,"a"
- .long 81b,src_error_1
- .long 91b,dst_error
- .long 71b,src_error_4
- .long 72b,src_error_4
- .long 73b,src_error_4
- .long 74b,src_error_4
- .long 75b,dst_error
- .long 76b,dst_error
- .long 77b,dst_error
- .long 78b,dst_error
- .long 82b,src_error_2
- .long 92b,dst_error
- .long 83b,src_error_3
- .long 93b,dst_error
- .long 84b,src_error_3
- .long 94b,dst_error
- .long 95b,dst_error
- .long 96b,dst_error
- .long 97b,dst_error
+ EX_TABLE(70b, fault);
+ EX_TABLE(71b, fault);
+ EX_TABLE(72b, fault);
+ EX_TABLE(73b, fault);
+ EX_TABLE(54b, fault);
+
+/*
+ * this stuff handles faults in the cacheline loop and branches to either
+ * fault (if in read part) or fault (if in write part)
+ */
+ CSUM_COPY_16_BYTES_EXCODE(0)
+#if L1_CACHE_BYTES >= 32
+ CSUM_COPY_16_BYTES_EXCODE(1)
+#if L1_CACHE_BYTES >= 64
+ CSUM_COPY_16_BYTES_EXCODE(2)
+ CSUM_COPY_16_BYTES_EXCODE(3)
+#if L1_CACHE_BYTES >= 128
+ CSUM_COPY_16_BYTES_EXCODE(4)
+ CSUM_COPY_16_BYTES_EXCODE(5)
+ CSUM_COPY_16_BYTES_EXCODE(6)
+ CSUM_COPY_16_BYTES_EXCODE(7)
+#endif
+#endif
+#endif
+
+ EX_TABLE(30b, fault);
+ EX_TABLE(31b, fault);
+ EX_TABLE(40b, fault);
+ EX_TABLE(41b, fault);
+ EX_TABLE(50b, fault);
+ EX_TABLE(51b, fault);
+
+EXPORT_SYMBOL(csum_partial_copy_generic)
+
+/*
+ * __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+ * const struct in6_addr *daddr,
+ * __u32 len, __u8 proto, __wsum sum)
+ */
+
+_GLOBAL(csum_ipv6_magic)
+ lwz r8, 0(r3)
+ lwz r9, 4(r3)
+ addc r0, r7, r8
+ lwz r10, 8(r3)
+ adde r0, r0, r9
+ lwz r11, 12(r3)
+ adde r0, r0, r10
+ lwz r8, 0(r4)
+ adde r0, r0, r11
+ lwz r9, 4(r4)
+ adde r0, r0, r8
+ lwz r10, 8(r4)
+ adde r0, r0, r9
+ lwz r11, 12(r4)
+ adde r0, r0, r10
+ add r5, r5, r6 /* assumption: len + proto doesn't carry */
+ adde r0, r0, r11
+ adde r0, r0, r5
+ addze r0, r0
+ rotlwi r3, r0, 16
+ add r3, r0, r3
+ not r3, r3
+ rlwinm r3, r3, 16, 16, 31
+ blr
+EXPORT_SYMBOL(csum_ipv6_magic)
diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S
index 57a072065057..d53d8f09a2c2 100644
--- a/arch/powerpc/lib/checksum_64.S
+++ b/arch/powerpc/lib/checksum_64.S
@@ -1,77 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* This file contains assembly-language implementations
* of IP-style 1's complement checksum routines.
*
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
*/
+#include <linux/export.h>
#include <linux/sys.h>
#include <asm/processor.h>
#include <asm/errno.h>
#include <asm/ppc_asm.h>
/*
- * ip_fast_csum(r3=buf, r4=len) -- Optimized for IP header
- * len is in words and is always >= 5.
- *
- * In practice len == 5, but this is not guaranteed. So this code does not
- * attempt to use doubleword instructions.
- */
-_GLOBAL(ip_fast_csum)
- lwz r0,0(r3)
- lwzu r5,4(r3)
- addic. r4,r4,-2
- addc r0,r0,r5
- mtctr r4
- blelr-
-1: lwzu r4,4(r3)
- adde r0,r0,r4
- bdnz 1b
- addze r0,r0 /* add in final carry */
- rldicl r4,r0,32,0 /* fold two 32-bit halves together */
- add r0,r0,r4
- srdi r0,r0,32
- rlwinm r3,r0,16,0,31 /* fold two halves together */
- add r3,r0,r3
- not r3,r3
- srwi r3,r3,16
- blr
-
-/*
- * Compute checksum of TCP or UDP pseudo-header:
- * csum_tcpudp_magic(r3=saddr, r4=daddr, r5=len, r6=proto, r7=sum)
- * No real gain trying to do this specially for 64 bit, but
- * the 32 bit addition may spill into the upper bits of
- * the doubleword so we still must fold it down from 64.
- */
-_GLOBAL(csum_tcpudp_magic)
- rlwimi r5,r6,16,0,15 /* put proto in upper half of len */
- addc r0,r3,r4 /* add 4 32-bit words together */
- adde r0,r0,r5
- adde r0,r0,r7
- rldicl r4,r0,32,0 /* fold 64 bit value */
- add r0,r4,r0
- srdi r0,r0,32
- rlwinm r3,r0,16,0,31 /* fold two halves together */
- add r3,r0,r3
- not r3,r3
- srwi r3,r3,16
- blr
-
-/*
* Computes the checksum of a memory block at buff, length len,
* and adds in "sum" (32-bit).
*
- * csum_partial(r3=buff, r4=len, r5=sum)
+ * __csum_partial(r3=buff, r4=len, r5=sum)
*/
-_GLOBAL(csum_partial)
+_GLOBAL(__csum_partial)
addic r0,r5,0 /* clear carry */
srdi. r6,r4,3 /* less than 8 bytes? */
@@ -83,7 +32,7 @@ _GLOBAL(csum_partial)
* work to calculate the correct checksum, we ignore that case
* and take the potential slowdown of unaligned loads.
*/
- rldicl. r6,r3,64-1,64-2 /* r6 = (r3 & 0x3) >> 1 */
+ rldicl. r6,r3,64-1,64-2 /* r6 = (r3 >> 1) & 0x3 */
beq .Lcsum_aligned
li r7,4
@@ -122,9 +71,9 @@ _GLOBAL(csum_partial)
ld r11,24(r3)
/*
- * On POWER6 and POWER7 back to back addes take 2 cycles because of
- * the XER dependency. This means the fastest this loop can go is
- * 16 cycles per iteration. The scheduling of the loop below has
+ * On POWER6 and POWER7 back to back adde instructions take 2 cycles
+ * because of the XER dependency. This means the fastest this loop can
+ * go is 16 cycles per iteration. The scheduling of the loop below has
* been shown to hit this on both POWER6 and POWER7.
*/
.align 5
@@ -215,8 +164,12 @@ _GLOBAL(csum_partial)
beq .Lcsum_finish
lbz r6,0(r3)
+#ifdef __BIG_ENDIAN__
sldi r9,r6,8 /* Pad the byte out to 16 bits */
adde r0,r0,r9
+#else
+ adde r0,r0,r6
+#endif
.Lcsum_finish:
addze r0,r0 /* add in final carry */
@@ -224,50 +177,38 @@ _GLOBAL(csum_partial)
add r3,r4,r0
srdi r3,r3,32
blr
+EXPORT_SYMBOL(__csum_partial)
.macro srcnr
100:
- .section __ex_table,"a"
- .align 3
- .llong 100b,.Lsrc_error_nr
- .previous
+ EX_TABLE(100b,.Lerror_nr)
.endm
.macro source
150:
- .section __ex_table,"a"
- .align 3
- .llong 150b,.Lsrc_error
- .previous
+ EX_TABLE(150b,.Lerror)
.endm
.macro dstnr
200:
- .section __ex_table,"a"
- .align 3
- .llong 200b,.Ldest_error_nr
- .previous
+ EX_TABLE(200b,.Lerror_nr)
.endm
.macro dest
250:
- .section __ex_table,"a"
- .align 3
- .llong 250b,.Ldest_error
- .previous
+ EX_TABLE(250b,.Lerror)
.endm
/*
* Computes the checksum of a memory block at src, length len,
- * and adds in "sum" (32-bit), while copying the block to dst.
- * If an access exception occurs on src or dst, it stores -EFAULT
- * to *src_err or *dst_err respectively. The caller must take any action
- * required in this case (zeroing memory, recalculating partial checksum etc).
+ * and adds in 0xffffffff (32-bit), while copying the block to dst.
+ * If an access exception occurs, it returns 0.
*
- * csum_partial_copy_generic(r3=src, r4=dst, r5=len, r6=sum, r7=src_err, r8=dst_err)
+ * csum_partial_copy_generic(r3=src, r4=dst, r5=len)
*/
_GLOBAL(csum_partial_copy_generic)
+ li r6,-1
addic r0,r6,0 /* clear carry */
srdi. r6,r5,3 /* less than 8 bytes? */
@@ -282,7 +223,7 @@ _GLOBAL(csum_partial_copy_generic)
* If the source and destination are relatively unaligned we only
* align the source. This keeps things simple.
*/
- rldicl. r6,r3,64-1,64-2 /* r6 = (r3 & 0x3) >> 1 */
+ rldicl. r6,r3,64-1,64-2 /* r6 = (r3 >> 1) & 0x3 */
beq .Lcopy_aligned
li r9,4
@@ -323,9 +264,9 @@ source; ld r10,16(r3)
source; ld r11,24(r3)
/*
- * On POWER6 and POWER7 back to back addes take 2 cycles because of
- * the XER dependency. This means the fastest this loop can go is
- * 16 cycles per iteration. The scheduling of the loop below has
+ * On POWER6 and POWER7 back to back adde instructions take 2 cycles
+ * because of the XER dependency. This means the fastest this loop can
+ * go is 16 cycles per iteration. The scheduling of the loop below has
* been shown to hit this on both POWER6 and POWER7.
*/
.align 5
@@ -444,8 +385,12 @@ dstnr; sth r6,0(r4)
beq .Lcopy_finish
srcnr; lbz r6,0(r3)
+#ifdef __BIG_ENDIAN__
sldi r9,r6,8 /* Pad the byte out to 16 bits */
adde r0,r0,r9
+#else
+ adde r0,r0,r6
+#endif
dstnr; stb r6,0(r4)
.Lcopy_finish:
@@ -455,26 +400,44 @@ dstnr; stb r6,0(r4)
srdi r3,r3,32
blr
-.Lsrc_error:
+.Lerror:
ld r14,STK_REG(R14)(r1)
ld r15,STK_REG(R15)(r1)
ld r16,STK_REG(R16)(r1)
addi r1,r1,STACKFRAMESIZE
-.Lsrc_error_nr:
- cmpdi 0,r7,0
- beqlr
- li r6,-EFAULT
- stw r6,0(r7)
+.Lerror_nr:
+ li r3,0
blr
-.Ldest_error:
- ld r14,STK_REG(R14)(r1)
- ld r15,STK_REG(R15)(r1)
- ld r16,STK_REG(R16)(r1)
- addi r1,r1,STACKFRAMESIZE
-.Ldest_error_nr:
- cmpdi 0,r8,0
- beqlr
- li r6,-EFAULT
- stw r6,0(r8)
+EXPORT_SYMBOL(csum_partial_copy_generic)
+
+/*
+ * __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+ * const struct in6_addr *daddr,
+ * __u32 len, __u8 proto, __wsum sum)
+ */
+
+_GLOBAL(csum_ipv6_magic)
+ ld r8, 0(r3)
+ ld r9, 8(r3)
+ add r5, r5, r6
+ addc r0, r8, r9
+ ld r10, 0(r4)
+ ld r11, 8(r4)
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+ rotldi r5, r5, 8
+#endif
+ adde r0, r0, r10
+ add r5, r5, r7
+ adde r0, r0, r11
+ adde r0, r0, r5
+ addze r0, r0
+ rotldi r3, r0, 32 /* fold two 32 bit halves together */
+ add r3, r0, r3
+ srdi r0, r3, 32
+ rotlwi r3, r0, 16 /* fold two 16 bit halves together */
+ add r3, r0, r3
+ not r3, r3
+ rlwinm r3, r3, 16, 16, 31
blr
+EXPORT_SYMBOL(csum_ipv6_magic)
diff --git a/arch/powerpc/lib/checksum_wrappers.c b/arch/powerpc/lib/checksum_wrappers.c
new file mode 100644
index 000000000000..1a14c8780278
--- /dev/null
+++ b/arch/powerpc/lib/checksum_wrappers.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *
+ * Copyright (C) IBM Corporation, 2010
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ */
+#include <linux/export.h>
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <asm/checksum.h>
+#include <linux/uaccess.h>
+
+__wsum csum_and_copy_from_user(const void __user *src, void *dst,
+ int len)
+{
+ __wsum csum;
+
+ if (unlikely(!user_read_access_begin(src, len)))
+ return 0;
+
+ csum = csum_partial_copy_generic((void __force *)src, dst, len);
+
+ user_read_access_end();
+ return csum;
+}
+
+__wsum csum_and_copy_to_user(const void *src, void __user *dst, int len)
+{
+ __wsum csum;
+
+ if (unlikely(!user_write_access_begin(dst, len)))
+ return 0;
+
+ csum = csum_partial_copy_generic(src, (void __force *)dst, len);
+
+ user_write_access_end();
+ return csum;
+}
diff --git a/arch/powerpc/lib/checksum_wrappers_64.c b/arch/powerpc/lib/checksum_wrappers_64.c
deleted file mode 100644
index 08e3a3356c40..000000000000
--- a/arch/powerpc/lib/checksum_wrappers_64.c
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- *
- * Copyright (C) IBM Corporation, 2010
- *
- * Author: Anton Blanchard <anton@au.ibm.com>
- */
-#include <linux/export.h>
-#include <linux/compiler.h>
-#include <linux/types.h>
-#include <asm/checksum.h>
-#include <asm/uaccess.h>
-
-__wsum csum_and_copy_from_user(const void __user *src, void *dst,
- int len, __wsum sum, int *err_ptr)
-{
- unsigned int csum;
-
- might_sleep();
-
- *err_ptr = 0;
-
- if (!len) {
- csum = 0;
- goto out;
- }
-
- if (unlikely((len < 0) || !access_ok(VERIFY_READ, src, len))) {
- *err_ptr = -EFAULT;
- csum = (__force unsigned int)sum;
- goto out;
- }
-
- csum = csum_partial_copy_generic((void __force *)src, dst,
- len, sum, err_ptr, NULL);
-
- if (unlikely(*err_ptr)) {
- int missing = __copy_from_user(dst, src, len);
-
- if (missing) {
- memset(dst + len - missing, 0, missing);
- *err_ptr = -EFAULT;
- } else {
- *err_ptr = 0;
- }
-
- csum = csum_partial(dst, len, sum);
- }
-
-out:
- return (__force __wsum)csum;
-}
-EXPORT_SYMBOL(csum_and_copy_from_user);
-
-__wsum csum_and_copy_to_user(const void *src, void __user *dst, int len,
- __wsum sum, int *err_ptr)
-{
- unsigned int csum;
-
- might_sleep();
-
- *err_ptr = 0;
-
- if (!len) {
- csum = 0;
- goto out;
- }
-
- if (unlikely((len < 0) || !access_ok(VERIFY_WRITE, dst, len))) {
- *err_ptr = -EFAULT;
- csum = -1; /* invalid checksum */
- goto out;
- }
-
- csum = csum_partial_copy_generic(src, (void __force *)dst,
- len, sum, NULL, err_ptr);
-
- if (unlikely(*err_ptr)) {
- csum = csum_partial(src, len, sum);
-
- if (copy_to_user(dst, src, len)) {
- *err_ptr = -EFAULT;
- csum = -1; /* invalid checksum */
- }
- }
-
-out:
- return (__force __wsum)csum;
-}
-EXPORT_SYMBOL(csum_and_copy_to_user);
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index d5edbeb8eb82..f84e0337cc02 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -1,470 +1,697 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright 2008 Michael Ellerman, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
-#include <linux/kernel.h>
+#include <linux/kprobes.h>
+#include <linux/mmu_context.h>
+#include <linux/random.h>
#include <linux/vmalloc.h>
#include <linux/init.h>
-#include <linux/mm.h>
+#include <linux/cpuhotplug.h>
+#include <linux/uaccess.h>
+#include <linux/jump_label.h>
+
+#include <asm/debug.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
#include <asm/page.h>
-#include <asm/code-patching.h>
-#include <asm/uaccess.h>
+#include <asm/text-patching.h>
+#include <asm/inst.h>
+
+static int __patch_mem(void *exec_addr, unsigned long val, void *patch_addr, bool is_dword)
+{
+ if (!IS_ENABLED(CONFIG_PPC64) || likely(!is_dword)) {
+ /* For big endian correctness: plain address would use the wrong half */
+ u32 val32 = val;
+
+ __put_kernel_nofault(patch_addr, &val32, u32, failed);
+ } else {
+ __put_kernel_nofault(patch_addr, &val, u64, failed);
+ }
+
+ asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr),
+ "r" (exec_addr));
+
+ return 0;
+
+failed:
+ mb(); /* sync */
+ return -EPERM;
+}
+
+int raw_patch_instruction(u32 *addr, ppc_inst_t instr)
+{
+ if (ppc_inst_prefixed(instr))
+ return __patch_mem(addr, ppc_inst_as_ulong(instr), addr, true);
+ else
+ return __patch_mem(addr, ppc_inst_val(instr), addr, false);
+}
+
+struct patch_context {
+ union {
+ struct vm_struct *area;
+ struct mm_struct *mm;
+ };
+ unsigned long addr;
+ pte_t *pte;
+};
+
+static DEFINE_PER_CPU(struct patch_context, cpu_patching_context);
+static int map_patch_area(void *addr, unsigned long text_poke_addr);
+static void unmap_patch_area(unsigned long addr);
-int patch_instruction(unsigned int *addr, unsigned int instr)
+static bool mm_patch_enabled(void)
{
+ return IS_ENABLED(CONFIG_SMP) && radix_enabled();
+}
+
+/*
+ * The following applies for Radix MMU. Hash MMU has different requirements,
+ * and so is not supported.
+ *
+ * Changing mm requires context synchronising instructions on both sides of
+ * the context switch, as well as a hwsync between the last instruction for
+ * which the address of an associated storage access was translated using
+ * the current context.
+ *
+ * switch_mm_irqs_off() performs an isync after the context switch. It is
+ * the responsibility of the caller to perform the CSI and hwsync before
+ * starting/stopping the temp mm.
+ */
+static struct mm_struct *start_using_temp_mm(struct mm_struct *temp_mm)
+{
+ struct mm_struct *orig_mm = current->active_mm;
+
+ lockdep_assert_irqs_disabled();
+ switch_mm_irqs_off(orig_mm, temp_mm, current);
+
+ WARN_ON(!mm_is_thread_local(temp_mm));
+
+ suspend_breakpoints();
+ return orig_mm;
+}
+
+static void stop_using_temp_mm(struct mm_struct *temp_mm,
+ struct mm_struct *orig_mm)
+{
+ lockdep_assert_irqs_disabled();
+ switch_mm_irqs_off(temp_mm, orig_mm, current);
+ restore_breakpoints();
+}
+
+static int text_area_cpu_up(unsigned int cpu)
+{
+ struct vm_struct *area;
+ unsigned long addr;
int err;
- __put_user_size(instr, addr, 4, err);
+ area = get_vm_area(PAGE_SIZE, 0);
+ if (!area) {
+ WARN_ONCE(1, "Failed to create text area for cpu %d\n",
+ cpu);
+ return -1;
+ }
+
+ // Map/unmap the area to ensure all page tables are pre-allocated
+ addr = (unsigned long)area->addr;
+ err = map_patch_area(empty_zero_page, addr);
if (err)
return err;
- asm ("dcbst 0, %0; sync; icbi 0,%0; sync; isync" : : "r" (addr));
+
+ unmap_patch_area(addr);
+
+ this_cpu_write(cpu_patching_context.area, area);
+ this_cpu_write(cpu_patching_context.addr, addr);
+ this_cpu_write(cpu_patching_context.pte, virt_to_kpte(addr));
+
return 0;
}
-int patch_branch(unsigned int *addr, unsigned long target, int flags)
+static int text_area_cpu_down(unsigned int cpu)
{
- return patch_instruction(addr, create_branch(addr, target, flags));
+ free_vm_area(this_cpu_read(cpu_patching_context.area));
+ this_cpu_write(cpu_patching_context.area, NULL);
+ this_cpu_write(cpu_patching_context.addr, 0);
+ this_cpu_write(cpu_patching_context.pte, NULL);
+ return 0;
}
-unsigned int create_branch(const unsigned int *addr,
- unsigned long target, int flags)
+static void put_patching_mm(struct mm_struct *mm, unsigned long patching_addr)
{
- unsigned int instruction;
- long offset;
+ struct mmu_gather tlb;
- offset = target;
- if (! (flags & BRANCH_ABSOLUTE))
- offset = offset - (unsigned long)addr;
+ tlb_gather_mmu(&tlb, mm);
+ free_pgd_range(&tlb, patching_addr, patching_addr + PAGE_SIZE, 0, 0);
+ mmput(mm);
+}
- /* Check we can represent the target in the instruction format */
- if (offset < -0x2000000 || offset > 0x1fffffc || offset & 0x3)
- return 0;
+static int text_area_cpu_up_mm(unsigned int cpu)
+{
+ struct mm_struct *mm;
+ unsigned long addr;
+ pte_t *pte;
+ spinlock_t *ptl;
- /* Mask out the flags and target, so they don't step on each other. */
- instruction = 0x48000000 | (flags & 0x3) | (offset & 0x03FFFFFC);
+ mm = mm_alloc();
+ if (WARN_ON(!mm))
+ goto fail_no_mm;
+
+ /*
+ * Choose a random page-aligned address from the interval
+ * [PAGE_SIZE .. DEFAULT_MAP_WINDOW - PAGE_SIZE].
+ * The lower address bound is PAGE_SIZE to avoid the zero-page.
+ */
+ addr = (1 + (get_random_long() % (DEFAULT_MAP_WINDOW / PAGE_SIZE - 2))) << PAGE_SHIFT;
+
+ /*
+ * PTE allocation uses GFP_KERNEL which means we need to
+ * pre-allocate the PTE here because we cannot do the
+ * allocation during patching when IRQs are disabled.
+ *
+ * Using get_locked_pte() to avoid open coding, the lock
+ * is unnecessary.
+ */
+ pte = get_locked_pte(mm, addr, &ptl);
+ if (!pte)
+ goto fail_no_pte;
+ pte_unmap_unlock(pte, ptl);
- return instruction;
+ this_cpu_write(cpu_patching_context.mm, mm);
+ this_cpu_write(cpu_patching_context.addr, addr);
+
+ return 0;
+
+fail_no_pte:
+ put_patching_mm(mm, addr);
+fail_no_mm:
+ return -ENOMEM;
}
-unsigned int create_cond_branch(const unsigned int *addr,
- unsigned long target, int flags)
+static int text_area_cpu_down_mm(unsigned int cpu)
{
- unsigned int instruction;
- long offset;
+ put_patching_mm(this_cpu_read(cpu_patching_context.mm),
+ this_cpu_read(cpu_patching_context.addr));
- offset = target;
- if (! (flags & BRANCH_ABSOLUTE))
- offset = offset - (unsigned long)addr;
+ this_cpu_write(cpu_patching_context.mm, NULL);
+ this_cpu_write(cpu_patching_context.addr, 0);
- /* Check we can represent the target in the instruction format */
- if (offset < -0x8000 || offset > 0x7FFF || offset & 0x3)
- return 0;
+ return 0;
+}
- /* Mask out the flags and target, so they don't step on each other. */
- instruction = 0x40000000 | (flags & 0x3FF0003) | (offset & 0xFFFC);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(poking_init_done);
+
+void __init poking_init(void)
+{
+ int ret;
+
+ if (mm_patch_enabled())
+ ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+ "powerpc/text_poke_mm:online",
+ text_area_cpu_up_mm,
+ text_area_cpu_down_mm);
+ else
+ ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+ "powerpc/text_poke:online",
+ text_area_cpu_up,
+ text_area_cpu_down);
+
+ /* cpuhp_setup_state returns >= 0 on success */
+ if (WARN_ON(ret < 0))
+ return;
- return instruction;
+ static_branch_enable(&poking_init_done);
}
-static unsigned int branch_opcode(unsigned int instr)
+static unsigned long get_patch_pfn(void *addr)
{
- return (instr >> 26) & 0x3F;
+ if (IS_ENABLED(CONFIG_EXECMEM) && is_vmalloc_or_module_addr(addr))
+ return vmalloc_to_pfn(addr);
+ else
+ return __pa_symbol(addr) >> PAGE_SHIFT;
}
-static int instr_is_branch_iform(unsigned int instr)
+/*
+ * This can be called for kernel text or a module.
+ */
+static int map_patch_area(void *addr, unsigned long text_poke_addr)
{
- return branch_opcode(instr) == 18;
+ unsigned long pfn = get_patch_pfn(addr);
+
+ return map_kernel_page(text_poke_addr, (pfn << PAGE_SHIFT), PAGE_KERNEL);
}
-static int instr_is_branch_bform(unsigned int instr)
+static void unmap_patch_area(unsigned long addr)
{
- return branch_opcode(instr) == 16;
+ pte_t *ptep;
+ pmd_t *pmdp;
+ pud_t *pudp;
+ p4d_t *p4dp;
+ pgd_t *pgdp;
+
+ pgdp = pgd_offset_k(addr);
+ if (WARN_ON(pgd_none(*pgdp)))
+ return;
+
+ p4dp = p4d_offset(pgdp, addr);
+ if (WARN_ON(p4d_none(*p4dp)))
+ return;
+
+ pudp = pud_offset(p4dp, addr);
+ if (WARN_ON(pud_none(*pudp)))
+ return;
+
+ pmdp = pmd_offset(pudp, addr);
+ if (WARN_ON(pmd_none(*pmdp)))
+ return;
+
+ ptep = pte_offset_kernel(pmdp, addr);
+ if (WARN_ON(pte_none(*ptep)))
+ return;
+
+ /*
+ * In hash, pte_clear flushes the tlb, in radix, we have to
+ */
+ pte_clear(&init_mm, addr, ptep);
+ flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
}
-int instr_is_relative_branch(unsigned int instr)
+static int __do_patch_mem_mm(void *addr, unsigned long val, bool is_dword)
{
- if (instr & BRANCH_ABSOLUTE)
- return 0;
+ int err;
+ u32 *patch_addr;
+ unsigned long text_poke_addr;
+ pte_t *pte;
+ unsigned long pfn = get_patch_pfn(addr);
+ struct mm_struct *patching_mm;
+ struct mm_struct *orig_mm;
+ spinlock_t *ptl;
- return instr_is_branch_iform(instr) || instr_is_branch_bform(instr);
+ patching_mm = __this_cpu_read(cpu_patching_context.mm);
+ text_poke_addr = __this_cpu_read(cpu_patching_context.addr);
+ patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
+
+ pte = get_locked_pte(patching_mm, text_poke_addr, &ptl);
+ if (!pte)
+ return -ENOMEM;
+
+ __set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
+
+ /* order PTE update before use, also serves as the hwsync */
+ asm volatile("ptesync": : :"memory");
+
+ /* order context switch after arbitrary prior code */
+ isync();
+
+ orig_mm = start_using_temp_mm(patching_mm);
+
+ err = __patch_mem(addr, val, patch_addr, is_dword);
+
+ /* context synchronisation performed by __patch_instruction (isync or exception) */
+ stop_using_temp_mm(patching_mm, orig_mm);
+
+ pte_clear(patching_mm, text_poke_addr, pte);
+ /*
+ * ptesync to order PTE update before TLB invalidation done
+ * by radix__local_flush_tlb_page_psize (in _tlbiel_va)
+ */
+ local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize);
+
+ pte_unmap_unlock(pte, ptl);
+
+ return err;
}
-static unsigned long branch_iform_target(const unsigned int *instr)
+static int __do_patch_mem(void *addr, unsigned long val, bool is_dword)
{
- signed long imm;
+ int err;
+ u32 *patch_addr;
+ unsigned long text_poke_addr;
+ pte_t *pte;
+ unsigned long pfn = get_patch_pfn(addr);
- imm = *instr & 0x3FFFFFC;
+ text_poke_addr = (unsigned long)__this_cpu_read(cpu_patching_context.addr) & PAGE_MASK;
+ patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
- /* If the top bit of the immediate value is set this is negative */
- if (imm & 0x2000000)
- imm -= 0x4000000;
+ pte = __this_cpu_read(cpu_patching_context.pte);
+ __set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
+ /* See ptesync comment in radix__set_pte_at() */
+ if (radix_enabled())
+ asm volatile("ptesync": : :"memory");
- if ((*instr & BRANCH_ABSOLUTE) == 0)
- imm += (unsigned long)instr;
+ err = __patch_mem(addr, val, patch_addr, is_dword);
- return (unsigned long)imm;
+ pte_clear(&init_mm, text_poke_addr, pte);
+ flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE);
+
+ return err;
}
-static unsigned long branch_bform_target(const unsigned int *instr)
+static int patch_mem(void *addr, unsigned long val, bool is_dword)
{
- signed long imm;
+ int err;
+ unsigned long flags;
- imm = *instr & 0xFFFC;
+ /*
+ * During early early boot patch_instruction is called
+ * when text_poke_area is not ready, but we still need
+ * to allow patching. We just do the plain old patching
+ */
+ if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) ||
+ !static_branch_likely(&poking_init_done))
+ return __patch_mem(addr, val, addr, is_dword);
+
+ local_irq_save(flags);
+ if (mm_patch_enabled())
+ err = __do_patch_mem_mm(addr, val, is_dword);
+ else
+ err = __do_patch_mem(addr, val, is_dword);
+ local_irq_restore(flags);
+
+ return err;
+}
- /* If the top bit of the immediate value is set this is negative */
- if (imm & 0x8000)
- imm -= 0x10000;
+#ifdef CONFIG_PPC64
- if ((*instr & BRANCH_ABSOLUTE) == 0)
- imm += (unsigned long)instr;
+int patch_instruction(u32 *addr, ppc_inst_t instr)
+{
+ if (ppc_inst_prefixed(instr))
+ return patch_mem(addr, ppc_inst_as_ulong(instr), true);
+ else
+ return patch_mem(addr, ppc_inst_val(instr), false);
+}
+NOKPROBE_SYMBOL(patch_instruction);
- return (unsigned long)imm;
+int patch_uint(void *addr, unsigned int val)
+{
+ if (!IS_ALIGNED((unsigned long)addr, sizeof(unsigned int)))
+ return -EINVAL;
+
+ return patch_mem(addr, val, false);
}
+NOKPROBE_SYMBOL(patch_uint);
-unsigned long branch_target(const unsigned int *instr)
+int patch_ulong(void *addr, unsigned long val)
{
- if (instr_is_branch_iform(*instr))
- return branch_iform_target(instr);
- else if (instr_is_branch_bform(*instr))
- return branch_bform_target(instr);
+ if (!IS_ALIGNED((unsigned long)addr, sizeof(unsigned long)))
+ return -EINVAL;
+
+ return patch_mem(addr, val, true);
+}
+NOKPROBE_SYMBOL(patch_ulong);
+
+#else
+
+int patch_instruction(u32 *addr, ppc_inst_t instr)
+{
+ return patch_mem(addr, ppc_inst_val(instr), false);
+}
+NOKPROBE_SYMBOL(patch_instruction)
+
+#endif
+
+static int patch_memset64(u64 *addr, u64 val, size_t count)
+{
+ for (u64 *end = addr + count; addr < end; addr++)
+ __put_kernel_nofault(addr, &val, u64, failed);
return 0;
+
+failed:
+ return -EPERM;
}
-int instr_is_branch_to_addr(const unsigned int *instr, unsigned long addr)
+static int patch_memset32(u32 *addr, u32 val, size_t count)
{
- if (instr_is_branch_iform(*instr) || instr_is_branch_bform(*instr))
- return branch_target(instr) == addr;
+ for (u32 *end = addr + count; addr < end; addr++)
+ __put_kernel_nofault(addr, &val, u32, failed);
return 0;
+
+failed:
+ return -EPERM;
}
-unsigned int translate_branch(const unsigned int *dest, const unsigned int *src)
+static int __patch_instructions(u32 *patch_addr, u32 *code, size_t len, bool repeat_instr)
{
- unsigned long target;
+ unsigned long start = (unsigned long)patch_addr;
+ int err;
- target = branch_target(src);
+ /* Repeat instruction */
+ if (repeat_instr) {
+ ppc_inst_t instr = ppc_inst_read(code);
- if (instr_is_branch_iform(*src))
- return create_branch(dest, target, *src);
- else if (instr_is_branch_bform(*src))
- return create_cond_branch(dest, target, *src);
+ if (ppc_inst_prefixed(instr)) {
+ u64 val = ppc_inst_as_ulong(instr);
- return 0;
+ err = patch_memset64((u64 *)patch_addr, val, len / 8);
+ } else {
+ u32 val = ppc_inst_val(instr);
+
+ err = patch_memset32(patch_addr, val, len / 4);
+ }
+ } else {
+ err = copy_to_kernel_nofault(patch_addr, code, len);
+ }
+
+ smp_wmb(); /* smp write barrier */
+ flush_icache_range(start, start + len);
+ return err;
}
-#ifdef CONFIG_PPC_BOOK3E_64
-void __patch_exception(int exc, unsigned long addr)
+/*
+ * A page is mapped and instructions that fit the page are patched.
+ * Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below.
+ */
+static int __do_patch_instructions_mm(u32 *addr, u32 *code, size_t len, bool repeat_instr)
{
- extern unsigned int interrupt_base_book3e;
- unsigned int *ibase = &interrupt_base_book3e;
+ struct mm_struct *patching_mm, *orig_mm;
+ unsigned long pfn = get_patch_pfn(addr);
+ unsigned long text_poke_addr;
+ spinlock_t *ptl;
+ u32 *patch_addr;
+ pte_t *pte;
+ int err;
+
+ patching_mm = __this_cpu_read(cpu_patching_context.mm);
+ text_poke_addr = __this_cpu_read(cpu_patching_context.addr);
+ patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
+
+ pte = get_locked_pte(patching_mm, text_poke_addr, &ptl);
+ if (!pte)
+ return -ENOMEM;
+
+ __set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
+
+ /* order PTE update before use, also serves as the hwsync */
+ asm volatile("ptesync" ::: "memory");
+
+ /* order context switch after arbitrary prior code */
+ isync();
+
+ orig_mm = start_using_temp_mm(patching_mm);
- /* Our exceptions vectors start with a NOP and -then- a branch
- * to deal with single stepping from userspace which stops on
- * the second instruction. Thus we need to patch the second
- * instruction of the exception, not the first one
+ kasan_disable_current();
+ err = __patch_instructions(patch_addr, code, len, repeat_instr);
+ kasan_enable_current();
+
+ /* context synchronisation performed by __patch_instructions */
+ stop_using_temp_mm(patching_mm, orig_mm);
+
+ pte_clear(patching_mm, text_poke_addr, pte);
+ /*
+ * ptesync to order PTE update before TLB invalidation done
+ * by radix__local_flush_tlb_page_psize (in _tlbiel_va)
*/
+ local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize);
- patch_branch(ibase + (exc / 4) + 1, addr, 0);
+ pte_unmap_unlock(pte, ptl);
+
+ return err;
}
-#endif
-#ifdef CONFIG_CODE_PATCHING_SELFTEST
+/*
+ * A page is mapped and instructions that fit the page are patched.
+ * Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below.
+ */
+static int __do_patch_instructions(u32 *addr, u32 *code, size_t len, bool repeat_instr)
+{
+ unsigned long pfn = get_patch_pfn(addr);
+ unsigned long text_poke_addr;
+ u32 *patch_addr;
+ pte_t *pte;
+ int err;
+
+ text_poke_addr = (unsigned long)__this_cpu_read(cpu_patching_context.addr) & PAGE_MASK;
+ patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
+
+ pte = __this_cpu_read(cpu_patching_context.pte);
+ __set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
+ /* See ptesync comment in radix__set_pte_at() */
+ if (radix_enabled())
+ asm volatile("ptesync" ::: "memory");
+
+ err = __patch_instructions(patch_addr, code, len, repeat_instr);
+
+ pte_clear(&init_mm, text_poke_addr, pte);
+ flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE);
+
+ return err;
+}
-static void __init test_trampoline(void)
+/*
+ * Patch 'addr' with 'len' bytes of instructions from 'code'.
+ *
+ * If repeat_instr is true, the same instruction is filled for
+ * 'len' bytes.
+ */
+int patch_instructions(u32 *addr, u32 *code, size_t len, bool repeat_instr)
{
- asm ("nop;\n");
+ while (len > 0) {
+ unsigned long flags;
+ size_t plen;
+ int err;
+
+ plen = min_t(size_t, PAGE_SIZE - offset_in_page(addr), len);
+
+ local_irq_save(flags);
+ if (mm_patch_enabled())
+ err = __do_patch_instructions_mm(addr, code, plen, repeat_instr);
+ else
+ err = __do_patch_instructions(addr, code, plen, repeat_instr);
+ local_irq_restore(flags);
+ if (err)
+ return err;
+
+ len -= plen;
+ addr = (u32 *)((unsigned long)addr + plen);
+ if (!repeat_instr)
+ code = (u32 *)((unsigned long)code + plen);
+ }
+
+ return 0;
}
+NOKPROBE_SYMBOL(patch_instructions);
+
+int patch_branch(u32 *addr, unsigned long target, int flags)
+{
+ ppc_inst_t instr;
-#define check(x) \
- if (!(x)) printk("code-patching: test failed at line %d\n", __LINE__);
+ if (create_branch(&instr, addr, target, flags))
+ return -ERANGE;
-static void __init test_branch_iform(void)
+ return patch_instruction(addr, instr);
+}
+
+/*
+ * Helper to check if a given instruction is a conditional branch
+ * Derived from the conditional checks in analyse_instr()
+ */
+bool is_conditional_branch(ppc_inst_t instr)
{
- unsigned int instr;
- unsigned long addr;
+ unsigned int opcode = ppc_inst_primary_opcode(instr);
+
+ if (opcode == 16) /* bc, bca, bcl, bcla */
+ return true;
+ if (opcode == 19) {
+ switch ((ppc_inst_val(instr) >> 1) & 0x3ff) {
+ case 16: /* bclr, bclrl */
+ case 528: /* bcctr, bcctrl */
+ case 560: /* bctar, bctarl */
+ return true;
+ }
+ }
+ return false;
+}
+NOKPROBE_SYMBOL(is_conditional_branch);
- addr = (unsigned long)&instr;
-
- /* The simplest case, branch to self, no flags */
- check(instr_is_branch_iform(0x48000000));
- /* All bits of target set, and flags */
- check(instr_is_branch_iform(0x4bffffff));
- /* High bit of opcode set, which is wrong */
- check(!instr_is_branch_iform(0xcbffffff));
- /* Middle bits of opcode set, which is wrong */
- check(!instr_is_branch_iform(0x7bffffff));
-
- /* Simplest case, branch to self with link */
- check(instr_is_branch_iform(0x48000001));
- /* All bits of targets set */
- check(instr_is_branch_iform(0x4bfffffd));
- /* Some bits of targets set */
- check(instr_is_branch_iform(0x4bff00fd));
- /* Must be a valid branch to start with */
- check(!instr_is_branch_iform(0x7bfffffd));
-
- /* Absolute branch to 0x100 */
- instr = 0x48000103;
- check(instr_is_branch_to_addr(&instr, 0x100));
- /* Absolute branch to 0x420fc */
- instr = 0x480420ff;
- check(instr_is_branch_to_addr(&instr, 0x420fc));
- /* Maximum positive relative branch, + 20MB - 4B */
- instr = 0x49fffffc;
- check(instr_is_branch_to_addr(&instr, addr + 0x1FFFFFC));
- /* Smallest negative relative branch, - 4B */
- instr = 0x4bfffffc;
- check(instr_is_branch_to_addr(&instr, addr - 4));
- /* Largest negative relative branch, - 32 MB */
- instr = 0x4a000000;
- check(instr_is_branch_to_addr(&instr, addr - 0x2000000));
-
- /* Branch to self, with link */
- instr = create_branch(&instr, addr, BRANCH_SET_LINK);
- check(instr_is_branch_to_addr(&instr, addr));
-
- /* Branch to self - 0x100, with link */
- instr = create_branch(&instr, addr - 0x100, BRANCH_SET_LINK);
- check(instr_is_branch_to_addr(&instr, addr - 0x100));
-
- /* Branch to self + 0x100, no link */
- instr = create_branch(&instr, addr + 0x100, 0);
- check(instr_is_branch_to_addr(&instr, addr + 0x100));
-
- /* Maximum relative negative offset, - 32 MB */
- instr = create_branch(&instr, addr - 0x2000000, BRANCH_SET_LINK);
- check(instr_is_branch_to_addr(&instr, addr - 0x2000000));
-
- /* Out of range relative negative offset, - 32 MB + 4*/
- instr = create_branch(&instr, addr - 0x2000004, BRANCH_SET_LINK);
- check(instr == 0);
-
- /* Out of range relative positive offset, + 32 MB */
- instr = create_branch(&instr, addr + 0x2000000, BRANCH_SET_LINK);
- check(instr == 0);
-
- /* Unaligned target */
- instr = create_branch(&instr, addr + 3, BRANCH_SET_LINK);
- check(instr == 0);
-
- /* Check flags are masked correctly */
- instr = create_branch(&instr, addr, 0xFFFFFFFC);
- check(instr_is_branch_to_addr(&instr, addr));
- check(instr == 0x48000000);
-}
-
-static void __init test_create_function_call(void)
-{
- unsigned int *iptr;
- unsigned long dest;
-
- /* Check we can create a function call */
- iptr = (unsigned int *)ppc_function_entry(test_trampoline);
- dest = ppc_function_entry(test_create_function_call);
- patch_instruction(iptr, create_branch(iptr, dest, BRANCH_SET_LINK));
- check(instr_is_branch_to_addr(iptr, dest));
-}
-
-static void __init test_branch_bform(void)
+int create_cond_branch(ppc_inst_t *instr, const u32 *addr,
+ unsigned long target, int flags)
{
- unsigned long addr;
- unsigned int *iptr, instr, flags;
-
- iptr = &instr;
- addr = (unsigned long)iptr;
-
- /* The simplest case, branch to self, no flags */
- check(instr_is_branch_bform(0x40000000));
- /* All bits of target set, and flags */
- check(instr_is_branch_bform(0x43ffffff));
- /* High bit of opcode set, which is wrong */
- check(!instr_is_branch_bform(0xc3ffffff));
- /* Middle bits of opcode set, which is wrong */
- check(!instr_is_branch_bform(0x7bffffff));
-
- /* Absolute conditional branch to 0x100 */
- instr = 0x43ff0103;
- check(instr_is_branch_to_addr(&instr, 0x100));
- /* Absolute conditional branch to 0x20fc */
- instr = 0x43ff20ff;
- check(instr_is_branch_to_addr(&instr, 0x20fc));
- /* Maximum positive relative conditional branch, + 32 KB - 4B */
- instr = 0x43ff7ffc;
- check(instr_is_branch_to_addr(&instr, addr + 0x7FFC));
- /* Smallest negative relative conditional branch, - 4B */
- instr = 0x43fffffc;
- check(instr_is_branch_to_addr(&instr, addr - 4));
- /* Largest negative relative conditional branch, - 32 KB */
- instr = 0x43ff8000;
- check(instr_is_branch_to_addr(&instr, addr - 0x8000));
-
- /* All condition code bits set & link */
- flags = 0x3ff000 | BRANCH_SET_LINK;
-
- /* Branch to self */
- instr = create_cond_branch(iptr, addr, flags);
- check(instr_is_branch_to_addr(&instr, addr));
-
- /* Branch to self - 0x100 */
- instr = create_cond_branch(iptr, addr - 0x100, flags);
- check(instr_is_branch_to_addr(&instr, addr - 0x100));
-
- /* Branch to self + 0x100 */
- instr = create_cond_branch(iptr, addr + 0x100, flags);
- check(instr_is_branch_to_addr(&instr, addr + 0x100));
-
- /* Maximum relative negative offset, - 32 KB */
- instr = create_cond_branch(iptr, addr - 0x8000, flags);
- check(instr_is_branch_to_addr(&instr, addr - 0x8000));
-
- /* Out of range relative negative offset, - 32 KB + 4*/
- instr = create_cond_branch(iptr, addr - 0x8004, flags);
- check(instr == 0);
-
- /* Out of range relative positive offset, + 32 KB */
- instr = create_cond_branch(iptr, addr + 0x8000, flags);
- check(instr == 0);
-
- /* Unaligned target */
- instr = create_cond_branch(iptr, addr + 3, flags);
- check(instr == 0);
-
- /* Check flags are masked correctly */
- instr = create_cond_branch(iptr, addr, 0xFFFFFFFC);
- check(instr_is_branch_to_addr(&instr, addr));
- check(instr == 0x43FF0000);
-}
-
-static void __init test_translate_branch(void)
+ long offset;
+
+ offset = target;
+ if (! (flags & BRANCH_ABSOLUTE))
+ offset = offset - (unsigned long)addr;
+
+ /* Check we can represent the target in the instruction format */
+ if (!is_offset_in_cond_branch_range(offset))
+ return 1;
+
+ /* Mask out the flags and target, so they don't step on each other. */
+ *instr = ppc_inst(0x40000000 | (flags & 0x3FF0003) | (offset & 0xFFFC));
+
+ return 0;
+}
+
+int instr_is_relative_branch(ppc_inst_t instr)
{
- unsigned long addr;
- unsigned int *p, *q;
- void *buf;
+ if (ppc_inst_val(instr) & BRANCH_ABSOLUTE)
+ return 0;
- buf = vmalloc(PAGE_ALIGN(0x2000000 + 1));
- check(buf);
- if (!buf)
- return;
+ return instr_is_branch_iform(instr) || instr_is_branch_bform(instr);
+}
+
+int instr_is_relative_link_branch(ppc_inst_t instr)
+{
+ return instr_is_relative_branch(instr) && (ppc_inst_val(instr) & BRANCH_SET_LINK);
+}
+
+static unsigned long branch_iform_target(const u32 *instr)
+{
+ signed long imm;
+
+ imm = ppc_inst_val(ppc_inst_read(instr)) & 0x3FFFFFC;
+
+ /* If the top bit of the immediate value is set this is negative */
+ if (imm & 0x2000000)
+ imm -= 0x4000000;
+
+ if ((ppc_inst_val(ppc_inst_read(instr)) & BRANCH_ABSOLUTE) == 0)
+ imm += (unsigned long)instr;
+
+ return (unsigned long)imm;
+}
+
+static unsigned long branch_bform_target(const u32 *instr)
+{
+ signed long imm;
+
+ imm = ppc_inst_val(ppc_inst_read(instr)) & 0xFFFC;
+
+ /* If the top bit of the immediate value is set this is negative */
+ if (imm & 0x8000)
+ imm -= 0x10000;
- /* Simple case, branch to self moved a little */
- p = buf;
- addr = (unsigned long)p;
- patch_branch(p, addr, 0);
- check(instr_is_branch_to_addr(p, addr));
- q = p + 1;
- patch_instruction(q, translate_branch(q, p));
- check(instr_is_branch_to_addr(q, addr));
-
- /* Maximum negative case, move b . to addr + 32 MB */
- p = buf;
- addr = (unsigned long)p;
- patch_branch(p, addr, 0);
- q = buf + 0x2000000;
- patch_instruction(q, translate_branch(q, p));
- check(instr_is_branch_to_addr(p, addr));
- check(instr_is_branch_to_addr(q, addr));
- check(*q == 0x4a000000);
-
- /* Maximum positive case, move x to x - 32 MB + 4 */
- p = buf + 0x2000000;
- addr = (unsigned long)p;
- patch_branch(p, addr, 0);
- q = buf + 4;
- patch_instruction(q, translate_branch(q, p));
- check(instr_is_branch_to_addr(p, addr));
- check(instr_is_branch_to_addr(q, addr));
- check(*q == 0x49fffffc);
-
- /* Jump to x + 16 MB moved to x + 20 MB */
- p = buf;
- addr = 0x1000000 + (unsigned long)buf;
- patch_branch(p, addr, BRANCH_SET_LINK);
- q = buf + 0x1400000;
- patch_instruction(q, translate_branch(q, p));
- check(instr_is_branch_to_addr(p, addr));
- check(instr_is_branch_to_addr(q, addr));
-
- /* Jump to x + 16 MB moved to x - 16 MB + 4 */
- p = buf + 0x1000000;
- addr = 0x2000000 + (unsigned long)buf;
- patch_branch(p, addr, 0);
- q = buf + 4;
- patch_instruction(q, translate_branch(q, p));
- check(instr_is_branch_to_addr(p, addr));
- check(instr_is_branch_to_addr(q, addr));
-
-
- /* Conditional branch tests */
-
- /* Simple case, branch to self moved a little */
- p = buf;
- addr = (unsigned long)p;
- patch_instruction(p, create_cond_branch(p, addr, 0));
- check(instr_is_branch_to_addr(p, addr));
- q = p + 1;
- patch_instruction(q, translate_branch(q, p));
- check(instr_is_branch_to_addr(q, addr));
-
- /* Maximum negative case, move b . to addr + 32 KB */
- p = buf;
- addr = (unsigned long)p;
- patch_instruction(p, create_cond_branch(p, addr, 0xFFFFFFFC));
- q = buf + 0x8000;
- patch_instruction(q, translate_branch(q, p));
- check(instr_is_branch_to_addr(p, addr));
- check(instr_is_branch_to_addr(q, addr));
- check(*q == 0x43ff8000);
-
- /* Maximum positive case, move x to x - 32 KB + 4 */
- p = buf + 0x8000;
- addr = (unsigned long)p;
- patch_instruction(p, create_cond_branch(p, addr, 0xFFFFFFFC));
- q = buf + 4;
- patch_instruction(q, translate_branch(q, p));
- check(instr_is_branch_to_addr(p, addr));
- check(instr_is_branch_to_addr(q, addr));
- check(*q == 0x43ff7ffc);
-
- /* Jump to x + 12 KB moved to x + 20 KB */
- p = buf;
- addr = 0x3000 + (unsigned long)buf;
- patch_instruction(p, create_cond_branch(p, addr, BRANCH_SET_LINK));
- q = buf + 0x5000;
- patch_instruction(q, translate_branch(q, p));
- check(instr_is_branch_to_addr(p, addr));
- check(instr_is_branch_to_addr(q, addr));
-
- /* Jump to x + 8 KB moved to x - 8 KB + 4 */
- p = buf + 0x2000;
- addr = 0x4000 + (unsigned long)buf;
- patch_instruction(p, create_cond_branch(p, addr, 0));
- q = buf + 4;
- patch_instruction(q, translate_branch(q, p));
- check(instr_is_branch_to_addr(p, addr));
- check(instr_is_branch_to_addr(q, addr));
-
- /* Free the buffer we were using */
- vfree(buf);
-}
-
-static int __init test_code_patching(void)
-{
- printk(KERN_DEBUG "Running code patching self-tests ...\n");
-
- test_branch_iform();
- test_branch_bform();
- test_create_function_call();
- test_translate_branch();
+ if ((ppc_inst_val(ppc_inst_read(instr)) & BRANCH_ABSOLUTE) == 0)
+ imm += (unsigned long)instr;
+
+ return (unsigned long)imm;
+}
+
+unsigned long branch_target(const u32 *instr)
+{
+ if (instr_is_branch_iform(ppc_inst_read(instr)))
+ return branch_iform_target(instr);
+ else if (instr_is_branch_bform(ppc_inst_read(instr)))
+ return branch_bform_target(instr);
return 0;
}
-late_initcall(test_code_patching);
-#endif /* CONFIG_CODE_PATCHING_SELFTEST */
+int translate_branch(ppc_inst_t *instr, const u32 *dest, const u32 *src)
+{
+ unsigned long target;
+ target = branch_target(src);
+
+ if (instr_is_branch_iform(ppc_inst_read(src)))
+ return create_branch(instr, dest, target,
+ ppc_inst_val(ppc_inst_read(src)));
+ else if (instr_is_branch_bform(ppc_inst_read(src)))
+ return create_cond_branch(instr, dest, target,
+ ppc_inst_val(ppc_inst_read(src)));
+
+ return 1;
+}
diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S
index 55f19f9fd708..933b685e7ab6 100644
--- a/arch/powerpc/lib/copy_32.S
+++ b/arch/powerpc/lib/copy_32.S
@@ -1,17 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Memory copy functions for 32-bit PowerPC.
*
* Copyright (C) 1996-2005 Paul Mackerras.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
+#include <linux/export.h>
#include <asm/processor.h>
#include <asm/cache.h>
#include <asm/errno.h>
#include <asm/ppc_asm.h>
+#include <asm/code-patching-asm.h>
+#include <asm/kasan.h>
#define COPY_16_BYTES \
lwz r7,4(r4); \
@@ -48,43 +47,66 @@
9 ## n ## 1: \
addi r5,r5,-(16 * n); \
b 105f; \
-.section __ex_table,"a"; \
- .align 2; \
- .long 8 ## n ## 0b,9 ## n ## 0b; \
- .long 8 ## n ## 1b,9 ## n ## 0b; \
- .long 8 ## n ## 2b,9 ## n ## 0b; \
- .long 8 ## n ## 3b,9 ## n ## 0b; \
- .long 8 ## n ## 4b,9 ## n ## 1b; \
- .long 8 ## n ## 5b,9 ## n ## 1b; \
- .long 8 ## n ## 6b,9 ## n ## 1b; \
- .long 8 ## n ## 7b,9 ## n ## 1b; \
- .text
+ EX_TABLE(8 ## n ## 0b,9 ## n ## 0b); \
+ EX_TABLE(8 ## n ## 1b,9 ## n ## 0b); \
+ EX_TABLE(8 ## n ## 2b,9 ## n ## 0b); \
+ EX_TABLE(8 ## n ## 3b,9 ## n ## 0b); \
+ EX_TABLE(8 ## n ## 4b,9 ## n ## 1b); \
+ EX_TABLE(8 ## n ## 5b,9 ## n ## 1b); \
+ EX_TABLE(8 ## n ## 6b,9 ## n ## 1b); \
+ EX_TABLE(8 ## n ## 7b,9 ## n ## 1b)
.text
- .stabs "arch/powerpc/lib/",N_SO,0,0,0f
- .stabs "copy_32.S",N_SO,0,0,0f
-0:
CACHELINE_BYTES = L1_CACHE_BYTES
LG_CACHELINE_BYTES = L1_CACHE_SHIFT
CACHELINE_MASK = (L1_CACHE_BYTES-1)
+#ifndef CONFIG_KASAN
+_GLOBAL(memset16)
+ rlwinm. r0 ,r5, 31, 1, 31
+ addi r6, r3, -4
+ beq- 2f
+ rlwimi r4 ,r4 ,16 ,0 ,15
+ mtctr r0
+1: stwu r4, 4(r6)
+ bdnz 1b
+2: andi. r0, r5, 1
+ beqlr
+ sth r4, 4(r6)
+ blr
+EXPORT_SYMBOL(memset16)
+#endif
+
/*
* Use dcbz on the complete cache lines in the destination
* to set them to zero. This requires that the destination
* area is cacheable. -- paulus
+ *
+ * During early init, cache might not be active yet, so dcbz cannot be used.
+ * We therefore skip the optimised bloc that uses dcbz. This jump is
+ * replaced by a nop once cache is active. This is done in machine_init()
*/
-_GLOBAL(cacheable_memzero)
- mr r5,r4
- li r4,0
- addi r6,r3,-4
+_GLOBAL_KASAN(memset)
cmplwi 0,r5,4
blt 7f
- stwu r4,4(r6)
+
+ rlwimi r4,r4,8,16,23
+ rlwimi r4,r4,16,0,15
+
+ stw r4,0(r3)
beqlr
- andi. r0,r6,3
+ andi. r0,r3,3
add r5,r0,r5
- subf r6,r0,r6
+ subf r6,r0,r3
+ cmplwi 0,r4,0
+ /*
+ * Skip optimised bloc until cache is enabled. Will be replaced
+ * by 'bne' during boot to use normal procedure if r4 is not zero
+ */
+5: b 2f
+ patch_site 5b, patch__memset_nocache
+
clrlwi r7,r6,32-LG_CACHELINE_BYTES
add r8,r7,r5
srwi r9,r8,LG_CACHELINE_BYTES
@@ -103,13 +125,13 @@ _GLOBAL(cacheable_memzero)
bdnz 10b
clrlwi r5,r8,32-LG_CACHELINE_BYTES
addi r5,r5,4
+
2: srwi r0,r5,2
mtctr r0
bdz 6f
1: stwu r4,4(r6)
bdnz 1b
6: andi. r5,r5,3
-7: cmpwi 0,r5,0
beqlr
mtctr r5
addi r6,r6,3
@@ -117,30 +139,15 @@ _GLOBAL(cacheable_memzero)
bdnz 8b
blr
-_GLOBAL(memset)
- rlwimi r4,r4,8,16,23
- rlwimi r4,r4,16,0,15
- addi r6,r3,-4
- cmplwi 0,r5,4
- blt 7f
- stwu r4,4(r6)
- beqlr
- andi. r0,r6,3
- add r5,r0,r5
- subf r6,r0,r6
- srwi r0,r5,2
- mtctr r0
- bdz 6f
-1: stwu r4,4(r6)
- bdnz 1b
-6: andi. r5,r5,3
7: cmpwi 0,r5,0
beqlr
mtctr r5
- addi r6,r6,3
-8: stbu r4,1(r6)
- bdnz 8b
+ addi r6,r3,-1
+9: stbu r4,1(r6)
+ bdnz 9b
blr
+EXPORT_SYMBOL(memset)
+EXPORT_SYMBOL_KASAN(memset)
/*
* This version uses dcbz on the complete cache lines in the
@@ -148,14 +155,26 @@ _GLOBAL(memset)
* the destination area is cacheable.
* We only use this version if the source and dest don't overlap.
* -- paulus.
+ *
+ * During early init, cache might not be active yet, so dcbz cannot be used.
+ * We therefore jump to generic_memcpy which doesn't use dcbz. This jump is
+ * replaced by a nop once cache is active. This is done in machine_init()
*/
-_GLOBAL(cacheable_memcpy)
+_GLOBAL_KASAN(memmove)
+ cmplw 0,r3,r4
+ bgt backwards_memcpy
+ /* fall through */
+
+_GLOBAL_KASAN(memcpy)
+1: b generic_memcpy
+ patch_site 1b, patch__memcpy_nocache
+
add r7,r3,r5 /* test if the src & dst overlap */
add r8,r4,r5
cmplw 0,r4,r7
cmplw 1,r3,r8
crand 0,0,4 /* cr0.lt &= cr1.lt */
- blt memcpy /* if regions overlap */
+ blt generic_memcpy /* if regions overlap */
addi r4,r4,-4
addi r6,r3,-4
@@ -170,9 +189,9 @@ _GLOBAL(cacheable_memcpy)
mtctr r8
beq+ 61f
70: lbz r9,4(r4) /* do some bytes */
- stb r9,4(r6)
addi r4,r4,1
addi r6,r6,1
+ stb r9,3(r6)
bdnz 70b
61: srwi. r0,r0,2
mtctr r0
@@ -214,19 +233,18 @@ _GLOBAL(cacheable_memcpy)
64: andi. r0,r5,3
mtctr r0
beq+ 65f
-40: lbz r0,4(r4)
- stb r0,4(r6)
- addi r4,r4,1
- addi r6,r6,1
+ addi r4,r4,3
+ addi r6,r6,3
+40: lbzu r0,1(r4)
+ stbu r0,1(r6)
bdnz 40b
65: blr
+EXPORT_SYMBOL(memcpy)
+EXPORT_SYMBOL(memmove)
+EXPORT_SYMBOL_KASAN(memcpy)
+EXPORT_SYMBOL_KASAN(memmove)
-_GLOBAL(memmove)
- cmplw 0,r3,r4
- bgt backwards_memcpy
- /* fall through */
-
-_GLOBAL(memcpy)
+generic_memcpy:
srwi. r7,r5,3
addi r6,r3,-4
addi r4,r4,-4
@@ -328,13 +346,10 @@ _GLOBAL(__copy_tofrom_user)
73: stwu r9,4(r6)
bdnz 72b
- .section __ex_table,"a"
- .align 2
- .long 70b,100f
- .long 71b,101f
- .long 72b,102f
- .long 73b,103f
- .text
+ EX_TABLE(70b,100f)
+ EX_TABLE(71b,101f)
+ EX_TABLE(72b,102f)
+ EX_TABLE(73b,103f)
58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
clrlwi r5,r5,32-LG_CACHELINE_BYTES
@@ -369,10 +384,7 @@ _GLOBAL(__copy_tofrom_user)
53: dcbt r3,r4
54: dcbz r11,r6
- .section __ex_table,"a"
- .align 2
- .long 54b,105f
- .text
+ EX_TABLE(54b,105f)
/* the main body of the cacheline loop */
COPY_16_BYTES_WITHEX(0)
#if L1_CACHE_BYTES >= 32
@@ -491,28 +503,13 @@ _GLOBAL(__copy_tofrom_user)
bdnz 130b
/* then clear out the destination: r3 bytes starting at 4(r6) */
132: mfctr r3
- srwi. r0,r3,2
- li r9,0
- mtctr r0
- beq 113f
-112: stwu r9,4(r6)
- bdnz 112b
-113: andi. r0,r3,3
- mtctr r0
- beq 120f
-114: stb r9,4(r6)
- addi r6,r6,1
- bdnz 114b
120: blr
- .section __ex_table,"a"
- .align 2
- .long 30b,108b
- .long 31b,109b
- .long 40b,110b
- .long 41b,111b
- .long 130b,132b
- .long 131b,120b
- .long 112b,120b
- .long 114b,120b
- .text
+ EX_TABLE(30b,108b)
+ EX_TABLE(31b,109b)
+ EX_TABLE(40b,110b)
+ EX_TABLE(41b,111b)
+ EX_TABLE(130b,132b)
+ EX_TABLE(131b,120b)
+
+EXPORT_SYMBOL(__copy_tofrom_user)
diff --git a/arch/powerpc/lib/copy_mc_64.S b/arch/powerpc/lib/copy_mc_64.S
new file mode 100644
index 000000000000..bf1014b28fe8
--- /dev/null
+++ b/arch/powerpc/lib/copy_mc_64.S
@@ -0,0 +1,242 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) IBM Corporation, 2011
+ * Derived from copyuser_power7.s by Anton Blanchard <anton@au.ibm.com>
+ * Author - Balbir Singh <bsingharora@gmail.com>
+ */
+#include <linux/export.h>
+#include <asm/ppc_asm.h>
+#include <asm/errno.h>
+
+ .macro err1
+100:
+ EX_TABLE(100b,.Ldo_err1)
+ .endm
+
+ .macro err2
+200:
+ EX_TABLE(200b,.Ldo_err2)
+ .endm
+
+ .macro err3
+300: EX_TABLE(300b,.Ldone)
+ .endm
+
+.Ldo_err2:
+ ld r22,STK_REG(R22)(r1)
+ ld r21,STK_REG(R21)(r1)
+ ld r20,STK_REG(R20)(r1)
+ ld r19,STK_REG(R19)(r1)
+ ld r18,STK_REG(R18)(r1)
+ ld r17,STK_REG(R17)(r1)
+ ld r16,STK_REG(R16)(r1)
+ ld r15,STK_REG(R15)(r1)
+ ld r14,STK_REG(R14)(r1)
+ addi r1,r1,STACKFRAMESIZE
+.Ldo_err1:
+ /* Do a byte by byte copy to get the exact remaining size */
+ mtctr r7
+46:
+err3; lbz r0,0(r4)
+ addi r4,r4,1
+err3; stb r0,0(r3)
+ addi r3,r3,1
+ bdnz 46b
+ li r3,0
+ blr
+
+.Ldone:
+ mfctr r3
+ blr
+
+
+_GLOBAL(copy_mc_generic)
+ mr r7,r5
+ cmpldi r5,16
+ blt .Lshort_copy
+
+.Lcopy:
+ /* Get the source 8B aligned */
+ neg r6,r4
+ mtocrf 0x01,r6
+ clrldi r6,r6,(64-3)
+
+ bf cr7*4+3,1f
+err1; lbz r0,0(r4)
+ addi r4,r4,1
+err1; stb r0,0(r3)
+ addi r3,r3,1
+ subi r7,r7,1
+
+1: bf cr7*4+2,2f
+err1; lhz r0,0(r4)
+ addi r4,r4,2
+err1; sth r0,0(r3)
+ addi r3,r3,2
+ subi r7,r7,2
+
+2: bf cr7*4+1,3f
+err1; lwz r0,0(r4)
+ addi r4,r4,4
+err1; stw r0,0(r3)
+ addi r3,r3,4
+ subi r7,r7,4
+
+3: sub r5,r5,r6
+ cmpldi r5,128
+
+ mflr r0
+ stdu r1,-STACKFRAMESIZE(r1)
+ std r14,STK_REG(R14)(r1)
+ std r15,STK_REG(R15)(r1)
+ std r16,STK_REG(R16)(r1)
+ std r17,STK_REG(R17)(r1)
+ std r18,STK_REG(R18)(r1)
+ std r19,STK_REG(R19)(r1)
+ std r20,STK_REG(R20)(r1)
+ std r21,STK_REG(R21)(r1)
+ std r22,STK_REG(R22)(r1)
+ std r0,STACKFRAMESIZE+16(r1)
+
+ blt 5f
+ srdi r6,r5,7
+ mtctr r6
+
+ /* Now do cacheline (128B) sized loads and stores. */
+ .align 5
+4:
+err2; ld r0,0(r4)
+err2; ld r6,8(r4)
+err2; ld r8,16(r4)
+err2; ld r9,24(r4)
+err2; ld r10,32(r4)
+err2; ld r11,40(r4)
+err2; ld r12,48(r4)
+err2; ld r14,56(r4)
+err2; ld r15,64(r4)
+err2; ld r16,72(r4)
+err2; ld r17,80(r4)
+err2; ld r18,88(r4)
+err2; ld r19,96(r4)
+err2; ld r20,104(r4)
+err2; ld r21,112(r4)
+err2; ld r22,120(r4)
+ addi r4,r4,128
+err2; std r0,0(r3)
+err2; std r6,8(r3)
+err2; std r8,16(r3)
+err2; std r9,24(r3)
+err2; std r10,32(r3)
+err2; std r11,40(r3)
+err2; std r12,48(r3)
+err2; std r14,56(r3)
+err2; std r15,64(r3)
+err2; std r16,72(r3)
+err2; std r17,80(r3)
+err2; std r18,88(r3)
+err2; std r19,96(r3)
+err2; std r20,104(r3)
+err2; std r21,112(r3)
+err2; std r22,120(r3)
+ addi r3,r3,128
+ subi r7,r7,128
+ bdnz 4b
+
+ clrldi r5,r5,(64-7)
+
+ /* Up to 127B to go */
+5: srdi r6,r5,4
+ mtocrf 0x01,r6
+
+6: bf cr7*4+1,7f
+err2; ld r0,0(r4)
+err2; ld r6,8(r4)
+err2; ld r8,16(r4)
+err2; ld r9,24(r4)
+err2; ld r10,32(r4)
+err2; ld r11,40(r4)
+err2; ld r12,48(r4)
+err2; ld r14,56(r4)
+ addi r4,r4,64
+err2; std r0,0(r3)
+err2; std r6,8(r3)
+err2; std r8,16(r3)
+err2; std r9,24(r3)
+err2; std r10,32(r3)
+err2; std r11,40(r3)
+err2; std r12,48(r3)
+err2; std r14,56(r3)
+ addi r3,r3,64
+ subi r7,r7,64
+
+7: ld r14,STK_REG(R14)(r1)
+ ld r15,STK_REG(R15)(r1)
+ ld r16,STK_REG(R16)(r1)
+ ld r17,STK_REG(R17)(r1)
+ ld r18,STK_REG(R18)(r1)
+ ld r19,STK_REG(R19)(r1)
+ ld r20,STK_REG(R20)(r1)
+ ld r21,STK_REG(R21)(r1)
+ ld r22,STK_REG(R22)(r1)
+ addi r1,r1,STACKFRAMESIZE
+
+ /* Up to 63B to go */
+ bf cr7*4+2,8f
+err1; ld r0,0(r4)
+err1; ld r6,8(r4)
+err1; ld r8,16(r4)
+err1; ld r9,24(r4)
+ addi r4,r4,32
+err1; std r0,0(r3)
+err1; std r6,8(r3)
+err1; std r8,16(r3)
+err1; std r9,24(r3)
+ addi r3,r3,32
+ subi r7,r7,32
+
+ /* Up to 31B to go */
+8: bf cr7*4+3,9f
+err1; ld r0,0(r4)
+err1; ld r6,8(r4)
+ addi r4,r4,16
+err1; std r0,0(r3)
+err1; std r6,8(r3)
+ addi r3,r3,16
+ subi r7,r7,16
+
+9: clrldi r5,r5,(64-4)
+
+ /* Up to 15B to go */
+.Lshort_copy:
+ mtocrf 0x01,r5
+ bf cr7*4+0,12f
+err1; lwz r0,0(r4) /* Less chance of a reject with word ops */
+err1; lwz r6,4(r4)
+ addi r4,r4,8
+err1; stw r0,0(r3)
+err1; stw r6,4(r3)
+ addi r3,r3,8
+ subi r7,r7,8
+
+12: bf cr7*4+1,13f
+err1; lwz r0,0(r4)
+ addi r4,r4,4
+err1; stw r0,0(r3)
+ addi r3,r3,4
+ subi r7,r7,4
+
+13: bf cr7*4+2,14f
+err1; lhz r0,0(r4)
+ addi r4,r4,2
+err1; sth r0,0(r3)
+ addi r3,r3,2
+ subi r7,r7,2
+
+14: bf cr7*4+3,15f
+err1; lbz r0,0(r4)
+err1; stb r0,0(r3)
+
+15: li r3,0
+ blr
+
+EXPORT_SYMBOL_GPL(copy_mc_generic);
diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S
index a3c4dc4defdd..f33a2e6088e5 100644
--- a/arch/powerpc/lib/copypage_64.S
+++ b/arch/powerpc/lib/copypage_64.S
@@ -1,32 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (C) 2008 Mark Nelson, IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
+#include <linux/export.h>
#include <asm/page.h>
#include <asm/processor.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
-
- .section ".toc","aw"
-PPC64_CACHES:
- .tc ppc64_caches[TC],ppc64_caches
- .section ".text"
+#include <asm/feature-fixups.h>
_GLOBAL_TOC(copy_page)
BEGIN_FTR_SECTION
lis r5,PAGE_SIZE@h
FTR_SECTION_ELSE
+#ifdef CONFIG_PPC_BOOK3S_64
b copypage_power7
+#endif
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
ori r5,r5,PAGE_SIZE@l
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ /*
+ * Hack for toolchain - prefixed instructions cause label difference to
+ * be non-constant even if 8 byte alignment is known, so they can not
+ * be put in FTR sections.
+ */
+ LOAD_REG_ADDR(r10, ppc64_caches)
+BEGIN_FTR_SECTION
+#else
BEGIN_FTR_SECTION
- ld r10,PPC64_CACHES@toc(r2)
- lwz r11,DCACHEL1LOGLINESIZE(r10) /* log2 of cache line size */
- lwz r12,DCACHEL1LINESIZE(r10) /* get cache line size */
+ LOAD_REG_ADDR(r10, ppc64_caches)
+#endif
+ lwz r11,DCACHEL1LOGBLOCKSIZE(r10) /* log2 of cache block size */
+ lwz r12,DCACHEL1BLOCKSIZE(r10) /* get cache block size */
li r9,0
srd r8,r5,r11
@@ -110,3 +115,4 @@ END_FTR_SECTION_IFSET(CPU_FTR_CP_USE_DCBTZ)
std r11,120(r3)
std r12,128(r3)
blr
+EXPORT_SYMBOL(copy_page)
diff --git a/arch/powerpc/lib/copypage_power7.S b/arch/powerpc/lib/copypage_power7.S
index d7dafb3777ac..07e7cec4d135 100644
--- a/arch/powerpc/lib/copypage_power7.S
+++ b/arch/powerpc/lib/copypage_power7.S
@@ -1,17 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright (C) IBM Corporation, 2012
*
@@ -39,20 +27,7 @@ _GLOBAL(copypage_power7)
#endif
ori r10,r7,1 /* stream=1 */
- lis r8,0x8000 /* GO=1 */
- clrldi r8,r8,32
-
-.machine push
-.machine "power4"
- /* setup read stream 0 */
- dcbt r0,r4,0b01000 /* addr from */
- dcbt r0,r7,0b01010 /* length and depth from */
- /* setup write stream 1 */
- dcbtst r0,r9,0b01000 /* addr to */
- dcbtst r0,r10,0b01010 /* length and depth to */
- eieio
- dcbt r0,r8,0b01010 /* all streams GO */
-.machine pop
+ DCBT_SETUP_STREAMS(r4, r7, r9, r10, r8)
#ifdef CONFIG_ALTIVEC
mflr r0
@@ -60,7 +35,7 @@ _GLOBAL(copypage_power7)
std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
std r0,16(r1)
stdu r1,-STACKFRAMESIZE(r1)
- bl enter_vmx_copy
+ bl CFUNC(enter_vmx_ops)
cmpwi r3,0
ld r0,STACKFRAMESIZE+16(r1)
ld r3,STK_REG(R31)(r1)
@@ -83,27 +58,27 @@ _GLOBAL(copypage_power7)
li r12,112
.align 5
-1: lvx vr7,r0,r4
- lvx vr6,r4,r6
- lvx vr5,r4,r7
- lvx vr4,r4,r8
- lvx vr3,r4,r9
- lvx vr2,r4,r10
- lvx vr1,r4,r11
- lvx vr0,r4,r12
+1: lvx v7,0,r4
+ lvx v6,r4,r6
+ lvx v5,r4,r7
+ lvx v4,r4,r8
+ lvx v3,r4,r9
+ lvx v2,r4,r10
+ lvx v1,r4,r11
+ lvx v0,r4,r12
addi r4,r4,128
- stvx vr7,r0,r3
- stvx vr6,r3,r6
- stvx vr5,r3,r7
- stvx vr4,r3,r8
- stvx vr3,r3,r9
- stvx vr2,r3,r10
- stvx vr1,r3,r11
- stvx vr0,r3,r12
+ stvx v7,0,r3
+ stvx v6,r3,r6
+ stvx v5,r3,r7
+ stvx v4,r3,r8
+ stvx v3,r3,r9
+ stvx v2,r3,r10
+ stvx v1,r3,r11
+ stvx v0,r3,r12
addi r3,r3,128
bdnz 1b
- b exit_vmx_copy /* tail call optimise */
+ b CFUNC(exit_vmx_ops) /* tail call optimise */
#else
li r0,(PAGE_SIZE/128)
diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S
index f09899e35991..9af969d2cc0c 100644
--- a/arch/powerpc/lib/copyuser_64.S
+++ b/arch/powerpc/lib/copyuser_64.S
@@ -1,13 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (C) 2002 Paul Mackerras, IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
+#include <linux/export.h>
#include <asm/processor.h>
#include <asm/ppc_asm.h>
+#include <asm/asm-compat.h>
+#include <asm/feature-fixups.h>
+
+#ifndef SELFTEST_CASE
+/* 0 == most CPUs, 1 == POWER6, 2 == Cell */
+#define SELFTEST_CASE 0
+#endif
#ifdef __BIG_ENDIAN__
#define sLd sld /* Shift towards low-numbered address. */
@@ -17,15 +21,39 @@
#define sHd sld /* Shift towards high-numbered address. */
#endif
+/*
+ * These macros are used to generate exception table entries.
+ * The exception handlers below use the original arguments
+ * (stored on the stack) and the point where we're up to in
+ * the destination buffer, i.e. the address of the first
+ * unmodified byte. Generally r3 points into the destination
+ * buffer, but the first unmodified byte is at a variable
+ * offset from r3. In the code below, the symbol r3_offset
+ * is set to indicate the current offset at each point in
+ * the code. This offset is then used as a negative offset
+ * from the exception handler code, and those instructions
+ * before the exception handlers are addi instructions that
+ * adjust r3 to point to the correct place.
+ */
+ .macro lex /* exception handler for load */
+100: EX_TABLE(100b, .Lld_exc - r3_offset)
+ .endm
+
+ .macro stex /* exception handler for store */
+100: EX_TABLE(100b, .Lst_exc - r3_offset)
+ .endm
+
.align 7
_GLOBAL_TOC(__copy_tofrom_user)
+#ifdef CONFIG_PPC_BOOK3S_64
BEGIN_FTR_SECTION
nop
FTR_SECTION_ELSE
b __copy_tofrom_user_power7
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
+#endif
_GLOBAL(__copy_tofrom_user_base)
- /* first check for a whole page copy on a page boundary */
+ /* first check for a 4kB copy on a 4kB boundary */
cmpldi cr1,r5,16
cmpdi cr6,r5,4096
or r0,r3,r4
@@ -46,6 +74,7 @@ _GLOBAL(__copy_tofrom_user_base)
* At the time of writing the only CPU that has this combination of bits
* set is Power6.
*/
+test_feature = (SELFTEST_CASE == 1)
BEGIN_FTR_SECTION
nop
FTR_SECTION_ELSE
@@ -54,6 +83,8 @@ ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
CPU_FTR_UNALIGNED_LD_STD)
.Ldst_aligned:
addi r3,r3,-16
+r3_offset = 16
+test_feature = (SELFTEST_CASE == 0)
BEGIN_FTR_SECTION
andi. r0,r4,7
bne .Lsrc_unaligned
@@ -61,57 +92,69 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
blt cr1,.Ldo_tail /* if < 16 bytes to copy */
srdi r0,r5,5
cmpdi cr1,r0,0
-20: ld r7,0(r4)
-220: ld r6,8(r4)
+lex; ld r7,0(r4)
+lex; ld r6,8(r4)
addi r4,r4,16
mtctr r0
andi. r0,r5,0x10
beq 22f
addi r3,r3,16
+r3_offset = 0
addi r4,r4,-16
mr r9,r7
mr r8,r6
beq cr1,72f
-21: ld r7,16(r4)
-221: ld r6,24(r4)
+21:
+lex; ld r7,16(r4)
+lex; ld r6,24(r4)
addi r4,r4,32
-70: std r9,0(r3)
-270: std r8,8(r3)
-22: ld r9,0(r4)
-222: ld r8,8(r4)
-71: std r7,16(r3)
-271: std r6,24(r3)
+stex; std r9,0(r3)
+r3_offset = 8
+stex; std r8,8(r3)
+r3_offset = 16
+22:
+lex; ld r9,0(r4)
+lex; ld r8,8(r4)
+stex; std r7,16(r3)
+r3_offset = 24
+stex; std r6,24(r3)
addi r3,r3,32
+r3_offset = 0
bdnz 21b
-72: std r9,0(r3)
-272: std r8,8(r3)
+72:
+stex; std r9,0(r3)
+r3_offset = 8
+stex; std r8,8(r3)
+r3_offset = 16
andi. r5,r5,0xf
beq+ 3f
addi r4,r4,16
.Ldo_tail:
addi r3,r3,16
+r3_offset = 0
bf cr7*4+0,246f
-244: ld r9,0(r4)
+lex; ld r9,0(r4)
addi r4,r4,8
-245: std r9,0(r3)
+stex; std r9,0(r3)
addi r3,r3,8
246: bf cr7*4+1,1f
-23: lwz r9,0(r4)
+lex; lwz r9,0(r4)
addi r4,r4,4
-73: stw r9,0(r3)
+stex; stw r9,0(r3)
addi r3,r3,4
1: bf cr7*4+2,2f
-44: lhz r9,0(r4)
+lex; lhz r9,0(r4)
addi r4,r4,2
-74: sth r9,0(r3)
+stex; sth r9,0(r3)
addi r3,r3,2
2: bf cr7*4+3,3f
-45: lbz r9,0(r4)
-75: stb r9,0(r3)
+lex; lbz r9,0(r4)
+stex; stb r9,0(r3)
3: li r3,0
blr
.Lsrc_unaligned:
+r3_offset = 16
srdi r6,r5,3
addi r5,r5,-16
subf r4,r0,r4
@@ -124,58 +167,69 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
add r5,r5,r0
bt cr7*4+0,28f
-24: ld r9,0(r4) /* 3+2n loads, 2+2n stores */
-25: ld r0,8(r4)
+lex; ld r9,0(r4) /* 3+2n loads, 2+2n stores */
+lex; ld r0,8(r4)
sLd r6,r9,r10
-26: ldu r9,16(r4)
+lex; ldu r9,16(r4)
sHd r7,r0,r11
sLd r8,r0,r10
or r7,r7,r6
blt cr6,79f
-27: ld r0,8(r4)
+lex; ld r0,8(r4)
b 2f
-28: ld r0,0(r4) /* 4+2n loads, 3+2n stores */
-29: ldu r9,8(r4)
+28:
+lex; ld r0,0(r4) /* 4+2n loads, 3+2n stores */
+lex; ldu r9,8(r4)
sLd r8,r0,r10
addi r3,r3,-8
+r3_offset = 24
blt cr6,5f
-30: ld r0,8(r4)
+lex; ld r0,8(r4)
sHd r12,r9,r11
sLd r6,r9,r10
-31: ldu r9,16(r4)
+lex; ldu r9,16(r4)
or r12,r8,r12
sHd r7,r0,r11
sLd r8,r0,r10
addi r3,r3,16
+r3_offset = 8
beq cr6,78f
1: or r7,r7,r6
-32: ld r0,8(r4)
-76: std r12,8(r3)
+lex; ld r0,8(r4)
+stex; std r12,8(r3)
+r3_offset = 16
2: sHd r12,r9,r11
sLd r6,r9,r10
-33: ldu r9,16(r4)
+lex; ldu r9,16(r4)
or r12,r8,r12
-77: stdu r7,16(r3)
+stex; stdu r7,16(r3)
+r3_offset = 8
sHd r7,r0,r11
sLd r8,r0,r10
bdnz 1b
-78: std r12,8(r3)
+78:
+stex; std r12,8(r3)
+r3_offset = 16
or r7,r7,r6
-79: std r7,16(r3)
+79:
+stex; std r7,16(r3)
+r3_offset = 24
5: sHd r12,r9,r11
or r12,r8,r12
-80: std r12,24(r3)
+stex; std r12,24(r3)
+r3_offset = 32
bne 6f
li r3,0
blr
6: cmpwi cr1,r5,8
addi r3,r3,32
+r3_offset = 0
sLd r9,r9,r10
ble cr1,7f
-34: ld r0,8(r4)
+lex; ld r0,8(r4)
sHd r7,r0,r11
or r9,r7,r9
7:
@@ -183,7 +237,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
#ifdef __BIG_ENDIAN__
rotldi r9,r9,32
#endif
-94: stw r9,0(r3)
+stex; stw r9,0(r3)
#ifdef __LITTLE_ENDIAN__
rotrdi r9,r9,32
#endif
@@ -192,7 +246,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
#ifdef __BIG_ENDIAN__
rotldi r9,r9,16
#endif
-95: sth r9,0(r3)
+stex; sth r9,0(r3)
#ifdef __LITTLE_ENDIAN__
rotrdi r9,r9,16
#endif
@@ -201,7 +255,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
#ifdef __BIG_ENDIAN__
rotldi r9,r9,8
#endif
-96: stb r9,0(r3)
+stex; stb r9,0(r3)
#ifdef __LITTLE_ENDIAN__
rotrdi r9,r9,8
#endif
@@ -209,47 +263,55 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
blr
.Ldst_unaligned:
+r3_offset = 0
PPC_MTOCRF(0x01,r6) /* put #bytes to 8B bdry into cr7 */
subf r5,r6,r5
li r7,0
cmpldi cr1,r5,16
bf cr7*4+3,1f
-35: lbz r0,0(r4)
-81: stb r0,0(r3)
+100: EX_TABLE(100b, .Lld_exc_r7)
+ lbz r0,0(r4)
+100: EX_TABLE(100b, .Lst_exc_r7)
+ stb r0,0(r3)
addi r7,r7,1
1: bf cr7*4+2,2f
-36: lhzx r0,r7,r4
-82: sthx r0,r7,r3
+100: EX_TABLE(100b, .Lld_exc_r7)
+ lhzx r0,r7,r4
+100: EX_TABLE(100b, .Lst_exc_r7)
+ sthx r0,r7,r3
addi r7,r7,2
2: bf cr7*4+1,3f
-37: lwzx r0,r7,r4
-83: stwx r0,r7,r3
+100: EX_TABLE(100b, .Lld_exc_r7)
+ lwzx r0,r7,r4
+100: EX_TABLE(100b, .Lst_exc_r7)
+ stwx r0,r7,r3
3: PPC_MTOCRF(0x01,r5)
add r4,r6,r4
add r3,r6,r3
b .Ldst_aligned
.Lshort_copy:
+r3_offset = 0
bf cr7*4+0,1f
-38: lwz r0,0(r4)
-39: lwz r9,4(r4)
+lex; lwz r0,0(r4)
+lex; lwz r9,4(r4)
addi r4,r4,8
-84: stw r0,0(r3)
-85: stw r9,4(r3)
+stex; stw r0,0(r3)
+stex; stw r9,4(r3)
addi r3,r3,8
1: bf cr7*4+1,2f
-40: lwz r0,0(r4)
+lex; lwz r0,0(r4)
addi r4,r4,4
-86: stw r0,0(r3)
+stex; stw r0,0(r3)
addi r3,r3,4
2: bf cr7*4+2,3f
-41: lhz r0,0(r4)
+lex; lhz r0,0(r4)
addi r4,r4,2
-87: sth r0,0(r3)
+stex; sth r0,0(r3)
addi r3,r3,2
3: bf cr7*4+3,4f
-42: lbz r0,0(r4)
-88: stb r0,0(r3)
+lex; lbz r0,0(r4)
+stex; stb r0,0(r3)
4: li r3,0
blr
@@ -257,48 +319,34 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
* exception handlers follow
* we have to return the number of bytes not copied
* for an exception on a load, we set the rest of the destination to 0
+ * Note that the number of bytes of instructions for adjusting r3 needs
+ * to equal the amount of the adjustment, due to the trick of using
+ * .Lld_exc - r3_offset as the handler address.
*/
-136:
-137:
+.Lld_exc_r7:
add r3,r3,r7
- b 1f
-130:
-131:
+ b .Lld_exc
+
+ /* adjust by 24 */
addi r3,r3,8
-120:
-320:
-122:
-322:
-124:
-125:
-126:
-127:
-128:
-129:
-133:
+ nop
+ /* adjust by 16 */
addi r3,r3,8
-132:
+ nop
+ /* adjust by 8 */
addi r3,r3,8
-121:
-321:
-344:
-134:
-135:
-138:
-139:
-140:
-141:
-142:
-123:
-144:
-145:
+ nop
/*
- * here we have had a fault on a load and r3 points to the first
- * unmodified byte of the destination
+ * Here we have had a fault on a load and r3 points to the first
+ * unmodified byte of the destination. We use the original arguments
+ * and r3 to work out how much wasn't copied. Since we load some
+ * distance ahead of the stores, we continue copying byte-by-byte until
+ * we hit the load fault again in order to copy as much as possible.
*/
-1: ld r6,-24(r1)
+.Lld_exc:
+ ld r6,-24(r1)
ld r4,-16(r1)
ld r5,-8(r1)
subf r6,r6,r3
@@ -309,160 +357,76 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
* first see if we can copy any more bytes before hitting another exception
*/
mtctr r5
+r3_offset = 0
+100: EX_TABLE(100b, .Ldone)
43: lbz r0,0(r4)
addi r4,r4,1
-89: stb r0,0(r3)
+stex; stb r0,0(r3)
addi r3,r3,1
bdnz 43b
li r3,0 /* huh? all copied successfully this time? */
blr
/*
- * here we have trapped again, need to clear ctr bytes starting at r3
+ * here we have trapped again, amount remaining is in ctr.
*/
-143: mfctr r5
- li r0,0
- mr r4,r3
- mr r3,r5 /* return the number of bytes not copied */
-1: andi. r9,r4,7
- beq 3f
-90: stb r0,0(r4)
- addic. r5,r5,-1
- addi r4,r4,1
- bne 1b
- blr
-3: cmpldi cr1,r5,8
- srdi r9,r5,3
- andi. r5,r5,7
- blt cr1,93f
- mtctr r9
-91: std r0,0(r4)
- addi r4,r4,8
- bdnz 91b
-93: beqlr
- mtctr r5
-92: stb r0,0(r4)
- addi r4,r4,1
- bdnz 92b
+.Ldone:
+ mfctr r3
blr
/*
- * exception handlers for stores: we just need to work
- * out how many bytes weren't copied
+ * exception handlers for stores: we need to work out how many bytes
+ * weren't copied, and we may need to copy some more.
+ * Note that the number of bytes of instructions for adjusting r3 needs
+ * to equal the amount of the adjustment, due to the trick of using
+ * .Lst_exc - r3_offset as the handler address.
*/
-182:
-183:
+.Lst_exc_r7:
add r3,r3,r7
- b 1f
-371:
-180:
+ b .Lst_exc
+
+ /* adjust by 24 */
addi r3,r3,8
-171:
-177:
+ nop
+ /* adjust by 16 */
addi r3,r3,8
-370:
-372:
-176:
-178:
+ nop
+ /* adjust by 8 */
addi r3,r3,4
-185:
+ /* adjust by 4 */
addi r3,r3,4
-170:
-172:
-345:
-173:
-174:
-175:
-179:
-181:
-184:
-186:
-187:
-188:
-189:
-194:
-195:
-196:
-1:
- ld r6,-24(r1)
- ld r5,-8(r1)
- add r6,r6,r5
- subf r3,r3,r6 /* #bytes not copied */
-190:
-191:
-192:
- blr /* #bytes not copied in r3 */
-
- .section __ex_table,"a"
- .align 3
- .llong 20b,120b
- .llong 220b,320b
- .llong 21b,121b
- .llong 221b,321b
- .llong 70b,170b
- .llong 270b,370b
- .llong 22b,122b
- .llong 222b,322b
- .llong 71b,171b
- .llong 271b,371b
- .llong 72b,172b
- .llong 272b,372b
- .llong 244b,344b
- .llong 245b,345b
- .llong 23b,123b
- .llong 73b,173b
- .llong 44b,144b
- .llong 74b,174b
- .llong 45b,145b
- .llong 75b,175b
- .llong 24b,124b
- .llong 25b,125b
- .llong 26b,126b
- .llong 27b,127b
- .llong 28b,128b
- .llong 29b,129b
- .llong 30b,130b
- .llong 31b,131b
- .llong 32b,132b
- .llong 76b,176b
- .llong 33b,133b
- .llong 77b,177b
- .llong 78b,178b
- .llong 79b,179b
- .llong 80b,180b
- .llong 34b,134b
- .llong 94b,194b
- .llong 95b,195b
- .llong 96b,196b
- .llong 35b,135b
- .llong 81b,181b
- .llong 36b,136b
- .llong 82b,182b
- .llong 37b,137b
- .llong 83b,183b
- .llong 38b,138b
- .llong 39b,139b
- .llong 84b,184b
- .llong 85b,185b
- .llong 40b,140b
- .llong 86b,186b
- .llong 41b,141b
- .llong 87b,187b
- .llong 42b,142b
- .llong 88b,188b
- .llong 43b,143b
- .llong 89b,189b
- .llong 90b,190b
- .llong 91b,191b
- .llong 92b,192b
-
- .text
+.Lst_exc:
+ ld r6,-24(r1) /* original destination pointer */
+ ld r4,-16(r1) /* original source pointer */
+ ld r5,-8(r1) /* original number of bytes */
+ add r7,r6,r5
+ /*
+ * If the destination pointer isn't 8-byte aligned,
+ * we may have got the exception as a result of a
+ * store that overlapped a page boundary, so we may be
+ * able to copy a few more bytes.
+ */
+17: andi. r0,r3,7
+ beq 19f
+ subf r8,r6,r3 /* #bytes copied */
+100: EX_TABLE(100b,19f)
+ lbzx r0,r8,r4
+100: EX_TABLE(100b,19f)
+ stb r0,0(r3)
+ addi r3,r3,1
+ cmpld r3,r7
+ blt 17b
+19: subf r3,r3,r7 /* #bytes not copied in r3 */
+ blr
/*
* Routine to copy a whole page of data, optimized for POWER4.
* On POWER4 it is more than 50% faster than the simple loop
* above (following the .Ldst_aligned label).
*/
+ .macro exc
+100: EX_TABLE(100b, .Labort)
+ .endm
.Lcopy_page_4K:
std r31,-32(1)
std r30,-40(1)
@@ -481,86 +445,86 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
li r0,5
0: addi r5,r5,-24
mtctr r0
-20: ld r22,640(4)
-21: ld r21,512(4)
-22: ld r20,384(4)
-23: ld r11,256(4)
-24: ld r9,128(4)
-25: ld r7,0(4)
-26: ld r25,648(4)
-27: ld r24,520(4)
-28: ld r23,392(4)
-29: ld r10,264(4)
-30: ld r8,136(4)
-31: ldu r6,8(4)
+exc; ld r22,640(4)
+exc; ld r21,512(4)
+exc; ld r20,384(4)
+exc; ld r11,256(4)
+exc; ld r9,128(4)
+exc; ld r7,0(4)
+exc; ld r25,648(4)
+exc; ld r24,520(4)
+exc; ld r23,392(4)
+exc; ld r10,264(4)
+exc; ld r8,136(4)
+exc; ldu r6,8(4)
cmpwi r5,24
1:
-32: std r22,648(3)
-33: std r21,520(3)
-34: std r20,392(3)
-35: std r11,264(3)
-36: std r9,136(3)
-37: std r7,8(3)
-38: ld r28,648(4)
-39: ld r27,520(4)
-40: ld r26,392(4)
-41: ld r31,264(4)
-42: ld r30,136(4)
-43: ld r29,8(4)
-44: std r25,656(3)
-45: std r24,528(3)
-46: std r23,400(3)
-47: std r10,272(3)
-48: std r8,144(3)
-49: std r6,16(3)
-50: ld r22,656(4)
-51: ld r21,528(4)
-52: ld r20,400(4)
-53: ld r11,272(4)
-54: ld r9,144(4)
-55: ld r7,16(4)
-56: std r28,664(3)
-57: std r27,536(3)
-58: std r26,408(3)
-59: std r31,280(3)
-60: std r30,152(3)
-61: stdu r29,24(3)
-62: ld r25,664(4)
-63: ld r24,536(4)
-64: ld r23,408(4)
-65: ld r10,280(4)
-66: ld r8,152(4)
-67: ldu r6,24(4)
+exc; std r22,648(3)
+exc; std r21,520(3)
+exc; std r20,392(3)
+exc; std r11,264(3)
+exc; std r9,136(3)
+exc; std r7,8(3)
+exc; ld r28,648(4)
+exc; ld r27,520(4)
+exc; ld r26,392(4)
+exc; ld r31,264(4)
+exc; ld r30,136(4)
+exc; ld r29,8(4)
+exc; std r25,656(3)
+exc; std r24,528(3)
+exc; std r23,400(3)
+exc; std r10,272(3)
+exc; std r8,144(3)
+exc; std r6,16(3)
+exc; ld r22,656(4)
+exc; ld r21,528(4)
+exc; ld r20,400(4)
+exc; ld r11,272(4)
+exc; ld r9,144(4)
+exc; ld r7,16(4)
+exc; std r28,664(3)
+exc; std r27,536(3)
+exc; std r26,408(3)
+exc; std r31,280(3)
+exc; std r30,152(3)
+exc; stdu r29,24(3)
+exc; ld r25,664(4)
+exc; ld r24,536(4)
+exc; ld r23,408(4)
+exc; ld r10,280(4)
+exc; ld r8,152(4)
+exc; ldu r6,24(4)
bdnz 1b
-68: std r22,648(3)
-69: std r21,520(3)
-70: std r20,392(3)
-71: std r11,264(3)
-72: std r9,136(3)
-73: std r7,8(3)
-74: addi r4,r4,640
-75: addi r3,r3,648
+exc; std r22,648(3)
+exc; std r21,520(3)
+exc; std r20,392(3)
+exc; std r11,264(3)
+exc; std r9,136(3)
+exc; std r7,8(3)
+ addi r4,r4,640
+ addi r3,r3,648
bge 0b
mtctr r5
-76: ld r7,0(4)
-77: ld r8,8(4)
-78: ldu r9,16(4)
+exc; ld r7,0(4)
+exc; ld r8,8(4)
+exc; ldu r9,16(4)
3:
-79: ld r10,8(4)
-80: std r7,8(3)
-81: ld r7,16(4)
-82: std r8,16(3)
-83: ld r8,24(4)
-84: std r9,24(3)
-85: ldu r9,32(4)
-86: stdu r10,32(3)
+exc; ld r10,8(4)
+exc; std r7,8(3)
+exc; ld r7,16(4)
+exc; std r8,16(3)
+exc; ld r8,24(4)
+exc; std r9,24(3)
+exc; ldu r9,32(4)
+exc; stdu r10,32(3)
bdnz 3b
4:
-87: ld r10,8(4)
-88: std r7,8(3)
-89: std r8,16(3)
-90: std r9,24(3)
-91: std r10,32(3)
+exc; ld r10,8(4)
+exc; std r7,8(3)
+exc; std r8,16(3)
+exc; std r9,24(3)
+exc; std r10,32(3)
9: ld r20,-120(1)
ld r21,-112(1)
ld r22,-104(1)
@@ -580,7 +544,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
* on an exception, reset to the beginning and jump back into the
* standard __copy_tofrom_user
*/
-100: ld r20,-120(1)
+.Labort:
+ ld r20,-120(1)
ld r21,-112(1)
ld r22,-104(1)
ld r23,-96(1)
@@ -596,78 +561,4 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
ld r4,-16(r1)
li r5,4096
b .Ldst_aligned
-
- .section __ex_table,"a"
- .align 3
- .llong 20b,100b
- .llong 21b,100b
- .llong 22b,100b
- .llong 23b,100b
- .llong 24b,100b
- .llong 25b,100b
- .llong 26b,100b
- .llong 27b,100b
- .llong 28b,100b
- .llong 29b,100b
- .llong 30b,100b
- .llong 31b,100b
- .llong 32b,100b
- .llong 33b,100b
- .llong 34b,100b
- .llong 35b,100b
- .llong 36b,100b
- .llong 37b,100b
- .llong 38b,100b
- .llong 39b,100b
- .llong 40b,100b
- .llong 41b,100b
- .llong 42b,100b
- .llong 43b,100b
- .llong 44b,100b
- .llong 45b,100b
- .llong 46b,100b
- .llong 47b,100b
- .llong 48b,100b
- .llong 49b,100b
- .llong 50b,100b
- .llong 51b,100b
- .llong 52b,100b
- .llong 53b,100b
- .llong 54b,100b
- .llong 55b,100b
- .llong 56b,100b
- .llong 57b,100b
- .llong 58b,100b
- .llong 59b,100b
- .llong 60b,100b
- .llong 61b,100b
- .llong 62b,100b
- .llong 63b,100b
- .llong 64b,100b
- .llong 65b,100b
- .llong 66b,100b
- .llong 67b,100b
- .llong 68b,100b
- .llong 69b,100b
- .llong 70b,100b
- .llong 71b,100b
- .llong 72b,100b
- .llong 73b,100b
- .llong 74b,100b
- .llong 75b,100b
- .llong 76b,100b
- .llong 77b,100b
- .llong 78b,100b
- .llong 79b,100b
- .llong 80b,100b
- .llong 81b,100b
- .llong 82b,100b
- .llong 83b,100b
- .llong 84b,100b
- .llong 85b,100b
- .llong 86b,100b
- .llong 87b,100b
- .llong 88b,100b
- .llong 89b,100b
- .llong 90b,100b
- .llong 91b,100b
+EXPORT_SYMBOL(__copy_tofrom_user)
diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S
index c46c876ac96a..8474c682a178 100644
--- a/arch/powerpc/lib/copyuser_power7.S
+++ b/arch/powerpc/lib/copyuser_power7.S
@@ -1,17 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright (C) IBM Corporation, 2011
*
@@ -19,6 +7,11 @@
*/
#include <asm/ppc_asm.h>
+#ifndef SELFTEST_CASE
+/* 0 == don't use VMX, 1 == use VMX */
+#define SELFTEST_CASE 0
+#endif
+
#ifdef __BIG_ENDIAN__
#define LVS(VRT,RA,RB) lvsl VRT,RA,RB
#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRA,VRB,VRC
@@ -29,35 +22,23 @@
.macro err1
100:
- .section __ex_table,"a"
- .align 3
- .llong 100b,.Ldo_err1
- .previous
+ EX_TABLE(100b,.Ldo_err1)
.endm
.macro err2
200:
- .section __ex_table,"a"
- .align 3
- .llong 200b,.Ldo_err2
- .previous
+ EX_TABLE(200b,.Ldo_err2)
.endm
#ifdef CONFIG_ALTIVEC
.macro err3
300:
- .section __ex_table,"a"
- .align 3
- .llong 300b,.Ldo_err3
- .previous
+ EX_TABLE(300b,.Ldo_err3)
.endm
.macro err4
400:
- .section __ex_table,"a"
- .align 3
- .llong 400b,.Ldo_err4
- .previous
+ EX_TABLE(400b,.Ldo_err4)
.endm
@@ -66,7 +47,7 @@
ld r15,STK_REG(R15)(r1)
ld r14,STK_REG(R14)(r1)
.Ldo_err3:
- bl exit_vmx_usercopy
+ bl CFUNC(exit_vmx_usercopy)
ld r0,STACKFRAMESIZE+16(r1)
mtlr r0
b .Lexit
@@ -92,24 +73,20 @@
_GLOBAL(__copy_tofrom_user_power7)
-#ifdef CONFIG_ALTIVEC
cmpldi r5,16
- cmpldi cr1,r5,4096
+ cmpldi cr1,r5,3328
std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
std r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
blt .Lshort_copy
- bgt cr1,.Lvmx_copy
-#else
- cmpldi r5,16
-
- std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
- std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
- std r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
- blt .Lshort_copy
+#ifdef CONFIG_ALTIVEC
+test_feature = SELFTEST_CASE
+BEGIN_FTR_SECTION
+ bgt cr1,.Lvmx_copy
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
#endif
.Lnonvmx_copy:
@@ -290,12 +267,12 @@ err1; stb r0,0(r3)
addi r1,r1,STACKFRAMESIZE
b .Lnonvmx_copy
-#ifdef CONFIG_ALTIVEC
.Lvmx_copy:
+#ifdef CONFIG_ALTIVEC
mflr r0
std r0,16(r1)
stdu r1,-STACKFRAMESIZE(r1)
- bl enter_vmx_usercopy
+ bl CFUNC(enter_vmx_usercopy)
cmpwi cr1,r3,0
ld r0,STACKFRAMESIZE+16(r1)
ld r3,STK_REG(R31)(r1)
@@ -321,20 +298,7 @@ err1; stb r0,0(r3)
or r7,r7,r0
ori r10,r7,1 /* stream=1 */
- lis r8,0x8000 /* GO=1 */
- clrldi r8,r8,32
-
-.machine push
-.machine "power4"
- /* setup read stream 0 */
- dcbt r0,r6,0b01000 /* addr from */
- dcbt r0,r7,0b01010 /* length and depth from */
- /* setup write stream 1 */
- dcbtst r0,r9,0b01000 /* addr to */
- dcbtst r0,r10,0b01010 /* length and depth to */
- eieio
- dcbt r0,r8,0b01010 /* all streams GO */
-.machine pop
+ DCBT_SETUP_STREAMS(r6, r7, r9, r10, r8)
beq cr1,.Lunwind_stack_nonvmx_copy
@@ -388,29 +352,29 @@ err3; std r0,0(r3)
li r11,48
bf cr7*4+3,5f
-err3; lvx vr1,r0,r4
+err3; lvx v1,0,r4
addi r4,r4,16
-err3; stvx vr1,r0,r3
+err3; stvx v1,0,r3
addi r3,r3,16
5: bf cr7*4+2,6f
-err3; lvx vr1,r0,r4
-err3; lvx vr0,r4,r9
+err3; lvx v1,0,r4
+err3; lvx v0,r4,r9
addi r4,r4,32
-err3; stvx vr1,r0,r3
-err3; stvx vr0,r3,r9
+err3; stvx v1,0,r3
+err3; stvx v0,r3,r9
addi r3,r3,32
6: bf cr7*4+1,7f
-err3; lvx vr3,r0,r4
-err3; lvx vr2,r4,r9
-err3; lvx vr1,r4,r10
-err3; lvx vr0,r4,r11
+err3; lvx v3,0,r4
+err3; lvx v2,r4,r9
+err3; lvx v1,r4,r10
+err3; lvx v0,r4,r11
addi r4,r4,64
-err3; stvx vr3,r0,r3
-err3; stvx vr2,r3,r9
-err3; stvx vr1,r3,r10
-err3; stvx vr0,r3,r11
+err3; stvx v3,0,r3
+err3; stvx v2,r3,r9
+err3; stvx v1,r3,r10
+err3; stvx v0,r3,r11
addi r3,r3,64
7: sub r5,r5,r6
@@ -433,23 +397,23 @@ err3; stvx vr0,r3,r11
*/
.align 5
8:
-err4; lvx vr7,r0,r4
-err4; lvx vr6,r4,r9
-err4; lvx vr5,r4,r10
-err4; lvx vr4,r4,r11
-err4; lvx vr3,r4,r12
-err4; lvx vr2,r4,r14
-err4; lvx vr1,r4,r15
-err4; lvx vr0,r4,r16
+err4; lvx v7,0,r4
+err4; lvx v6,r4,r9
+err4; lvx v5,r4,r10
+err4; lvx v4,r4,r11
+err4; lvx v3,r4,r12
+err4; lvx v2,r4,r14
+err4; lvx v1,r4,r15
+err4; lvx v0,r4,r16
addi r4,r4,128
-err4; stvx vr7,r0,r3
-err4; stvx vr6,r3,r9
-err4; stvx vr5,r3,r10
-err4; stvx vr4,r3,r11
-err4; stvx vr3,r3,r12
-err4; stvx vr2,r3,r14
-err4; stvx vr1,r3,r15
-err4; stvx vr0,r3,r16
+err4; stvx v7,0,r3
+err4; stvx v6,r3,r9
+err4; stvx v5,r3,r10
+err4; stvx v4,r3,r11
+err4; stvx v3,r3,r12
+err4; stvx v2,r3,r14
+err4; stvx v1,r3,r15
+err4; stvx v0,r3,r16
addi r3,r3,128
bdnz 8b
@@ -463,29 +427,29 @@ err4; stvx vr0,r3,r16
mtocrf 0x01,r6
bf cr7*4+1,9f
-err3; lvx vr3,r0,r4
-err3; lvx vr2,r4,r9
-err3; lvx vr1,r4,r10
-err3; lvx vr0,r4,r11
+err3; lvx v3,0,r4
+err3; lvx v2,r4,r9
+err3; lvx v1,r4,r10
+err3; lvx v0,r4,r11
addi r4,r4,64
-err3; stvx vr3,r0,r3
-err3; stvx vr2,r3,r9
-err3; stvx vr1,r3,r10
-err3; stvx vr0,r3,r11
+err3; stvx v3,0,r3
+err3; stvx v2,r3,r9
+err3; stvx v1,r3,r10
+err3; stvx v0,r3,r11
addi r3,r3,64
9: bf cr7*4+2,10f
-err3; lvx vr1,r0,r4
-err3; lvx vr0,r4,r9
+err3; lvx v1,0,r4
+err3; lvx v0,r4,r9
addi r4,r4,32
-err3; stvx vr1,r0,r3
-err3; stvx vr0,r3,r9
+err3; stvx v1,0,r3
+err3; stvx v0,r3,r9
addi r3,r3,32
10: bf cr7*4+3,11f
-err3; lvx vr1,r0,r4
+err3; lvx v1,0,r4
addi r4,r4,16
-err3; stvx vr1,r0,r3
+err3; stvx v1,0,r3
addi r3,r3,16
/* Up to 15B to go */
@@ -514,7 +478,7 @@ err3; lbz r0,0(r4)
err3; stb r0,0(r3)
15: addi r1,r1,STACKFRAMESIZE
- b exit_vmx_usercopy /* tail call optimise */
+ b CFUNC(exit_vmx_usercopy) /* tail call optimise */
.Lvmx_unaligned_copy:
/* Get the destination 16B aligned */
@@ -560,42 +524,42 @@ err3; stw r7,4(r3)
li r10,32
li r11,48
- LVS(vr16,0,r4) /* Setup permute control vector */
-err3; lvx vr0,0,r4
+ LVS(v16,0,r4) /* Setup permute control vector */
+err3; lvx v0,0,r4
addi r4,r4,16
bf cr7*4+3,5f
-err3; lvx vr1,r0,r4
- VPERM(vr8,vr0,vr1,vr16)
+err3; lvx v1,0,r4
+ VPERM(v8,v0,v1,v16)
addi r4,r4,16
-err3; stvx vr8,r0,r3
+err3; stvx v8,0,r3
addi r3,r3,16
- vor vr0,vr1,vr1
+ vor v0,v1,v1
5: bf cr7*4+2,6f
-err3; lvx vr1,r0,r4
- VPERM(vr8,vr0,vr1,vr16)
-err3; lvx vr0,r4,r9
- VPERM(vr9,vr1,vr0,vr16)
+err3; lvx v1,0,r4
+ VPERM(v8,v0,v1,v16)
+err3; lvx v0,r4,r9
+ VPERM(v9,v1,v0,v16)
addi r4,r4,32
-err3; stvx vr8,r0,r3
-err3; stvx vr9,r3,r9
+err3; stvx v8,0,r3
+err3; stvx v9,r3,r9
addi r3,r3,32
6: bf cr7*4+1,7f
-err3; lvx vr3,r0,r4
- VPERM(vr8,vr0,vr3,vr16)
-err3; lvx vr2,r4,r9
- VPERM(vr9,vr3,vr2,vr16)
-err3; lvx vr1,r4,r10
- VPERM(vr10,vr2,vr1,vr16)
-err3; lvx vr0,r4,r11
- VPERM(vr11,vr1,vr0,vr16)
+err3; lvx v3,0,r4
+ VPERM(v8,v0,v3,v16)
+err3; lvx v2,r4,r9
+ VPERM(v9,v3,v2,v16)
+err3; lvx v1,r4,r10
+ VPERM(v10,v2,v1,v16)
+err3; lvx v0,r4,r11
+ VPERM(v11,v1,v0,v16)
addi r4,r4,64
-err3; stvx vr8,r0,r3
-err3; stvx vr9,r3,r9
-err3; stvx vr10,r3,r10
-err3; stvx vr11,r3,r11
+err3; stvx v8,0,r3
+err3; stvx v9,r3,r9
+err3; stvx v10,r3,r10
+err3; stvx v11,r3,r11
addi r3,r3,64
7: sub r5,r5,r6
@@ -618,31 +582,31 @@ err3; stvx vr11,r3,r11
*/
.align 5
8:
-err4; lvx vr7,r0,r4
- VPERM(vr8,vr0,vr7,vr16)
-err4; lvx vr6,r4,r9
- VPERM(vr9,vr7,vr6,vr16)
-err4; lvx vr5,r4,r10
- VPERM(vr10,vr6,vr5,vr16)
-err4; lvx vr4,r4,r11
- VPERM(vr11,vr5,vr4,vr16)
-err4; lvx vr3,r4,r12
- VPERM(vr12,vr4,vr3,vr16)
-err4; lvx vr2,r4,r14
- VPERM(vr13,vr3,vr2,vr16)
-err4; lvx vr1,r4,r15
- VPERM(vr14,vr2,vr1,vr16)
-err4; lvx vr0,r4,r16
- VPERM(vr15,vr1,vr0,vr16)
+err4; lvx v7,0,r4
+ VPERM(v8,v0,v7,v16)
+err4; lvx v6,r4,r9
+ VPERM(v9,v7,v6,v16)
+err4; lvx v5,r4,r10
+ VPERM(v10,v6,v5,v16)
+err4; lvx v4,r4,r11
+ VPERM(v11,v5,v4,v16)
+err4; lvx v3,r4,r12
+ VPERM(v12,v4,v3,v16)
+err4; lvx v2,r4,r14
+ VPERM(v13,v3,v2,v16)
+err4; lvx v1,r4,r15
+ VPERM(v14,v2,v1,v16)
+err4; lvx v0,r4,r16
+ VPERM(v15,v1,v0,v16)
addi r4,r4,128
-err4; stvx vr8,r0,r3
-err4; stvx vr9,r3,r9
-err4; stvx vr10,r3,r10
-err4; stvx vr11,r3,r11
-err4; stvx vr12,r3,r12
-err4; stvx vr13,r3,r14
-err4; stvx vr14,r3,r15
-err4; stvx vr15,r3,r16
+err4; stvx v8,0,r3
+err4; stvx v9,r3,r9
+err4; stvx v10,r3,r10
+err4; stvx v11,r3,r11
+err4; stvx v12,r3,r12
+err4; stvx v13,r3,r14
+err4; stvx v14,r3,r15
+err4; stvx v15,r3,r16
addi r3,r3,128
bdnz 8b
@@ -656,36 +620,36 @@ err4; stvx vr15,r3,r16
mtocrf 0x01,r6
bf cr7*4+1,9f
-err3; lvx vr3,r0,r4
- VPERM(vr8,vr0,vr3,vr16)
-err3; lvx vr2,r4,r9
- VPERM(vr9,vr3,vr2,vr16)
-err3; lvx vr1,r4,r10
- VPERM(vr10,vr2,vr1,vr16)
-err3; lvx vr0,r4,r11
- VPERM(vr11,vr1,vr0,vr16)
+err3; lvx v3,0,r4
+ VPERM(v8,v0,v3,v16)
+err3; lvx v2,r4,r9
+ VPERM(v9,v3,v2,v16)
+err3; lvx v1,r4,r10
+ VPERM(v10,v2,v1,v16)
+err3; lvx v0,r4,r11
+ VPERM(v11,v1,v0,v16)
addi r4,r4,64
-err3; stvx vr8,r0,r3
-err3; stvx vr9,r3,r9
-err3; stvx vr10,r3,r10
-err3; stvx vr11,r3,r11
+err3; stvx v8,0,r3
+err3; stvx v9,r3,r9
+err3; stvx v10,r3,r10
+err3; stvx v11,r3,r11
addi r3,r3,64
9: bf cr7*4+2,10f
-err3; lvx vr1,r0,r4
- VPERM(vr8,vr0,vr1,vr16)
-err3; lvx vr0,r4,r9
- VPERM(vr9,vr1,vr0,vr16)
+err3; lvx v1,0,r4
+ VPERM(v8,v0,v1,v16)
+err3; lvx v0,r4,r9
+ VPERM(v9,v1,v0,v16)
addi r4,r4,32
-err3; stvx vr8,r0,r3
-err3; stvx vr9,r3,r9
+err3; stvx v8,0,r3
+err3; stvx v9,r3,r9
addi r3,r3,32
10: bf cr7*4+3,11f
-err3; lvx vr1,r0,r4
- VPERM(vr8,vr0,vr1,vr16)
+err3; lvx v1,0,r4
+ VPERM(v8,v0,v1,v16)
addi r4,r4,16
-err3; stvx vr8,r0,r3
+err3; stvx v8,0,r3
addi r3,r3,16
/* Up to 15B to go */
@@ -717,5 +681,5 @@ err3; lbz r0,0(r4)
err3; stb r0,0(r3)
15: addi r1,r1,STACKFRAMESIZE
- b exit_vmx_usercopy /* tail call optimise */
-#endif /* CONFiG_ALTIVEC */
+ b CFUNC(exit_vmx_usercopy) /* tail call optimise */
+#endif /* CONFIG_ALTIVEC */
diff --git a/arch/powerpc/lib/crtsavres.S b/arch/powerpc/lib/crtsavres.S
index a5b30c71a8d3..8967903c15e9 100644
--- a/arch/powerpc/lib/crtsavres.S
+++ b/arch/powerpc/lib/crtsavres.S
@@ -44,10 +44,10 @@
#ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
-#ifndef CONFIG_PPC64
-
.section ".text"
+#ifndef __powerpc64__
+
/* Routines for saving integer registers, called by the compiler. */
/* Called with r11 pointing to the stack header word of the caller of the */
/* function, just beyond the end of the integer save area. */
@@ -236,86 +236,84 @@ _GLOBAL(_rest32gpr_31_x)
_GLOBAL(_savevr_20)
li r11,-192
- stvx vr20,r11,r0
+ stvx v20,r11,r0
_GLOBAL(_savevr_21)
li r11,-176
- stvx vr21,r11,r0
+ stvx v21,r11,r0
_GLOBAL(_savevr_22)
li r11,-160
- stvx vr22,r11,r0
+ stvx v22,r11,r0
_GLOBAL(_savevr_23)
li r11,-144
- stvx vr23,r11,r0
+ stvx v23,r11,r0
_GLOBAL(_savevr_24)
li r11,-128
- stvx vr24,r11,r0
+ stvx v24,r11,r0
_GLOBAL(_savevr_25)
li r11,-112
- stvx vr25,r11,r0
+ stvx v25,r11,r0
_GLOBAL(_savevr_26)
li r11,-96
- stvx vr26,r11,r0
+ stvx v26,r11,r0
_GLOBAL(_savevr_27)
li r11,-80
- stvx vr27,r11,r0
+ stvx v27,r11,r0
_GLOBAL(_savevr_28)
li r11,-64
- stvx vr28,r11,r0
+ stvx v28,r11,r0
_GLOBAL(_savevr_29)
li r11,-48
- stvx vr29,r11,r0
+ stvx v29,r11,r0
_GLOBAL(_savevr_30)
li r11,-32
- stvx vr30,r11,r0
+ stvx v30,r11,r0
_GLOBAL(_savevr_31)
li r11,-16
- stvx vr31,r11,r0
+ stvx v31,r11,r0
blr
_GLOBAL(_restvr_20)
li r11,-192
- lvx vr20,r11,r0
+ lvx v20,r11,r0
_GLOBAL(_restvr_21)
li r11,-176
- lvx vr21,r11,r0
+ lvx v21,r11,r0
_GLOBAL(_restvr_22)
li r11,-160
- lvx vr22,r11,r0
+ lvx v22,r11,r0
_GLOBAL(_restvr_23)
li r11,-144
- lvx vr23,r11,r0
+ lvx v23,r11,r0
_GLOBAL(_restvr_24)
li r11,-128
- lvx vr24,r11,r0
+ lvx v24,r11,r0
_GLOBAL(_restvr_25)
li r11,-112
- lvx vr25,r11,r0
+ lvx v25,r11,r0
_GLOBAL(_restvr_26)
li r11,-96
- lvx vr26,r11,r0
+ lvx v26,r11,r0
_GLOBAL(_restvr_27)
li r11,-80
- lvx vr27,r11,r0
+ lvx v27,r11,r0
_GLOBAL(_restvr_28)
li r11,-64
- lvx vr28,r11,r0
+ lvx v28,r11,r0
_GLOBAL(_restvr_29)
li r11,-48
- lvx vr29,r11,r0
+ lvx v29,r11,r0
_GLOBAL(_restvr_30)
li r11,-32
- lvx vr30,r11,r0
+ lvx v30,r11,r0
_GLOBAL(_restvr_31)
li r11,-16
- lvx vr31,r11,r0
+ lvx v31,r11,r0
blr
#endif /* CONFIG_ALTIVEC */
#else /* CONFIG_PPC64 */
- .section ".text.save.restore","ax",@progbits
-
.globl _savegpr0_14
_savegpr0_14:
std r14,-144(r1)
@@ -443,101 +441,101 @@ _restgpr0_31:
.globl _savevr_20
_savevr_20:
li r12,-192
- stvx vr20,r12,r0
+ stvx v20,r12,r0
.globl _savevr_21
_savevr_21:
li r12,-176
- stvx vr21,r12,r0
+ stvx v21,r12,r0
.globl _savevr_22
_savevr_22:
li r12,-160
- stvx vr22,r12,r0
+ stvx v22,r12,r0
.globl _savevr_23
_savevr_23:
li r12,-144
- stvx vr23,r12,r0
+ stvx v23,r12,r0
.globl _savevr_24
_savevr_24:
li r12,-128
- stvx vr24,r12,r0
+ stvx v24,r12,r0
.globl _savevr_25
_savevr_25:
li r12,-112
- stvx vr25,r12,r0
+ stvx v25,r12,r0
.globl _savevr_26
_savevr_26:
li r12,-96
- stvx vr26,r12,r0
+ stvx v26,r12,r0
.globl _savevr_27
_savevr_27:
li r12,-80
- stvx vr27,r12,r0
+ stvx v27,r12,r0
.globl _savevr_28
_savevr_28:
li r12,-64
- stvx vr28,r12,r0
+ stvx v28,r12,r0
.globl _savevr_29
_savevr_29:
li r12,-48
- stvx vr29,r12,r0
+ stvx v29,r12,r0
.globl _savevr_30
_savevr_30:
li r12,-32
- stvx vr30,r12,r0
+ stvx v30,r12,r0
.globl _savevr_31
_savevr_31:
li r12,-16
- stvx vr31,r12,r0
+ stvx v31,r12,r0
blr
.globl _restvr_20
_restvr_20:
li r12,-192
- lvx vr20,r12,r0
+ lvx v20,r12,r0
.globl _restvr_21
_restvr_21:
li r12,-176
- lvx vr21,r12,r0
+ lvx v21,r12,r0
.globl _restvr_22
_restvr_22:
li r12,-160
- lvx vr22,r12,r0
+ lvx v22,r12,r0
.globl _restvr_23
_restvr_23:
li r12,-144
- lvx vr23,r12,r0
+ lvx v23,r12,r0
.globl _restvr_24
_restvr_24:
li r12,-128
- lvx vr24,r12,r0
+ lvx v24,r12,r0
.globl _restvr_25
_restvr_25:
li r12,-112
- lvx vr25,r12,r0
+ lvx v25,r12,r0
.globl _restvr_26
_restvr_26:
li r12,-96
- lvx vr26,r12,r0
+ lvx v26,r12,r0
.globl _restvr_27
_restvr_27:
li r12,-80
- lvx vr27,r12,r0
+ lvx v27,r12,r0
.globl _restvr_28
_restvr_28:
li r12,-64
- lvx vr28,r12,r0
+ lvx v28,r12,r0
.globl _restvr_29
_restvr_29:
li r12,-48
- lvx vr29,r12,r0
+ lvx v29,r12,r0
.globl _restvr_30
_restvr_30:
li r12,-32
- lvx vr30,r12,r0
+ lvx v30,r12,r0
.globl _restvr_31
_restvr_31:
li r12,-16
- lvx vr31,r12,r0
+ lvx v31,r12,r0
blr
#endif /* CONFIG_ALTIVEC */
diff --git a/arch/powerpc/lib/devres.c b/arch/powerpc/lib/devres.c
deleted file mode 100644
index 8df55fc3aad6..000000000000
--- a/arch/powerpc/lib/devres.c
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (C) 2008 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/device.h> /* devres_*(), devm_ioremap_release() */
-#include <linux/gfp.h>
-#include <linux/io.h> /* ioremap_prot() */
-#include <linux/export.h> /* EXPORT_SYMBOL() */
-
-/**
- * devm_ioremap_prot - Managed ioremap_prot()
- * @dev: Generic device to remap IO address for
- * @offset: BUS offset to map
- * @size: Size of map
- * @flags: Page flags
- *
- * Managed ioremap_prot(). Map is automatically unmapped on driver
- * detach.
- */
-void __iomem *devm_ioremap_prot(struct device *dev, resource_size_t offset,
- size_t size, unsigned long flags)
-{
- void __iomem **ptr, *addr;
-
- ptr = devres_alloc(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL);
- if (!ptr)
- return NULL;
-
- addr = ioremap_prot(offset, size, flags);
- if (addr) {
- *ptr = addr;
- devres_add(dev, ptr);
- } else
- devres_free(ptr);
-
- return addr;
-}
-EXPORT_SYMBOL(devm_ioremap_prot);
diff --git a/arch/powerpc/lib/div64.S b/arch/powerpc/lib/div64.S
index 83d9832fd919..3d5426e7dcc4 100644
--- a/arch/powerpc/lib/div64.S
+++ b/arch/powerpc/lib/div64.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Divide a 64-bit unsigned number by a 32-bit unsigned number.
* This routine assumes that the top 32 bits of the dividend are
@@ -7,11 +8,6 @@
* On exit, r3 contains the remainder.
*
* Copyright (C) 2002 Paul Mackerras, IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <asm/ppc_asm.h>
#include <asm/processor.h>
diff --git a/arch/powerpc/lib/error-inject.c b/arch/powerpc/lib/error-inject.c
new file mode 100644
index 000000000000..e834079d2b5c
--- /dev/null
+++ b/arch/powerpc/lib/error-inject.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <linux/error-injection.h>
+#include <linux/kprobes.h>
+#include <linux/uaccess.h>
+
+void override_function_with_return(struct pt_regs *regs)
+{
+ /*
+ * Emulate 'blr'. 'regs' represents the state on entry of a predefined
+ * function in the kernel/module, captured on a kprobe. We don't need
+ * to worry about 32-bit userspace on a 64-bit kernel.
+ */
+ regs_set_return_ip(regs, regs->link);
+}
+NOKPROBE_SYMBOL(override_function_with_return);
diff --git a/arch/powerpc/lib/feature-fixups-test.S b/arch/powerpc/lib/feature-fixups-test.S
index f4613118132e..480172fbd024 100644
--- a/arch/powerpc/lib/feature-fixups-test.S
+++ b/arch/powerpc/lib/feature-fixups-test.S
@@ -1,16 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright 2008 Michael Ellerman, IBM Corporation.
- *
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <asm/feature-fixups.h>
#include <asm/ppc_asm.h>
#include <asm/synch.h>
+#include <asm/asm-compat.h>
+#include <asm/ppc-opcode.h>
.text
@@ -167,16 +164,52 @@ globl(ftr_fixup_test6_expected)
blt 2b
b 3f
b 1b
-2: or 1,1,1
+3: or 1,1,1
or 2,2,2
-3: or 3,3,3
+ or 3,3,3
+globl(ftr_fixup_test7)
+ or 1,1,1
+BEGIN_FTR_SECTION
+ or 2,2,2
+ or 2,2,2
+ or 2,2,2
+ or 2,2,2
+ or 2,2,2
+ or 2,2,2
+ or 2,2,2
+FTR_SECTION_ELSE
+2: b 3f
+3: or 5,5,5
+ beq 3b
+ b 1f
+ or 6,6,6
+ b 2b
+ bdnz 3b
+1:
+ALT_FTR_SECTION_END(0, 1)
+ or 1,1,1
+ or 1,1,1
+
+globl(end_ftr_fixup_test7)
+ nop
+
+globl(ftr_fixup_test7_expected)
+ or 1,1,1
+2: b 3f
+3: or 5,5,5
+ beq 3b
+ b 1f
+ or 6,6,6
+ b 2b
+ bdnz 3b
+1: or 1,1,1
#if 0
/* Test that if we have a larger else case the assembler spots it and
* reports an error. #if 0'ed so as not to break the build normally.
*/
-ftr_fixup_test7:
+ftr_fixup_test_too_big:
or 1,1,1
BEGIN_FTR_SECTION
or 2,2,2
@@ -759,3 +792,71 @@ globl(lwsync_fixup_test_expected_SYNC)
1: or 1,1,1
sync
+globl(ftr_fixup_prefix1)
+ or 1,1,1
+ .long OP_PREFIX << 26
+ .long 0x0000000
+ or 2,2,2
+globl(end_ftr_fixup_prefix1)
+
+globl(ftr_fixup_prefix1_orig)
+ or 1,1,1
+ .long OP_PREFIX << 26
+ .long 0x0000000
+ or 2,2,2
+
+globl(ftr_fixup_prefix1_expected)
+ or 1,1,1
+ nop
+ nop
+ or 2,2,2
+
+globl(ftr_fixup_prefix2)
+ or 1,1,1
+ .long OP_PREFIX << 26
+ .long 0x0000000
+ or 2,2,2
+globl(end_ftr_fixup_prefix2)
+
+globl(ftr_fixup_prefix2_orig)
+ or 1,1,1
+ .long OP_PREFIX << 26
+ .long 0x0000000
+ or 2,2,2
+
+globl(ftr_fixup_prefix2_alt)
+ .long OP_PREFIX << 26
+ .long 0x0000001
+
+globl(ftr_fixup_prefix2_expected)
+ or 1,1,1
+ .long OP_PREFIX << 26
+ .long 0x0000001
+ or 2,2,2
+
+globl(ftr_fixup_prefix3)
+ or 1,1,1
+ .long OP_PREFIX << 26
+ .long 0x0000000
+ or 2,2,2
+ or 3,3,3
+globl(end_ftr_fixup_prefix3)
+
+globl(ftr_fixup_prefix3_orig)
+ or 1,1,1
+ .long OP_PREFIX << 26
+ .long 0x0000000
+ or 2,2,2
+ or 3,3,3
+
+globl(ftr_fixup_prefix3_alt)
+ .long OP_PREFIX << 26
+ .long 0x0000001
+ nop
+
+globl(ftr_fixup_prefix3_expected)
+ or 1,1,1
+ .long OP_PREFIX << 26
+ .long 0x0000001
+ nop
+ or 3,3,3
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 7a8a7487cee8..587c8cf1230f 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org)
*
@@ -5,22 +6,31 @@
* Copyright (C) 2003 Dave Engebretsen <engebret@us.ibm.com>
*
* Copyright 2008 Michael Ellerman, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/types.h>
+#include <linux/jump_label.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/init.h>
+#include <linux/sched/mm.h>
+#include <linux/stop_machine.h>
#include <asm/cputable.h>
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
+#include <asm/interrupt.h>
#include <asm/page.h>
#include <asm/sections.h>
+#include <asm/setup.h>
+#include <asm/security_features.h>
+#include <asm/firmware.h>
+#include <asm/inst.h>
+/*
+ * Used to generate warnings if mmu or cpu feature check functions that
+ * use static keys before they are initialized.
+ */
+bool static_key_feature_checks_initialized __read_mostly;
+EXPORT_SYMBOL_GPL(static_key_feature_checks_initialized);
struct fixup_entry {
unsigned long mask;
@@ -31,42 +41,43 @@ struct fixup_entry {
long alt_end_off;
};
-static unsigned int *calc_addr(struct fixup_entry *fcur, long offset)
+static u32 *calc_addr(struct fixup_entry *fcur, long offset)
{
/*
* We store the offset to the code as a negative offset from
* the start of the alt_entry, to support the VDSO. This
* routine converts that back into an actual address.
*/
- return (unsigned int *)((unsigned long)fcur + offset);
+ return (u32 *)((unsigned long)fcur + offset);
}
-static int patch_alt_instruction(unsigned int *src, unsigned int *dest,
- unsigned int *alt_start, unsigned int *alt_end)
+static int patch_alt_instruction(u32 *src, u32 *dest, u32 *alt_start, u32 *alt_end)
{
- unsigned int instr;
+ int err;
+ ppc_inst_t instr;
- instr = *src;
+ instr = ppc_inst_read(src);
- if (instr_is_relative_branch(*src)) {
- unsigned int *target = (unsigned int *)branch_target(src);
+ if (instr_is_relative_branch(ppc_inst_read(src))) {
+ u32 *target = (u32 *)branch_target(src);
/* Branch within the section doesn't need translating */
- if (target < alt_start || target >= alt_end) {
- instr = translate_branch(dest, src);
- if (!instr)
+ if (target < alt_start || target > alt_end) {
+ err = translate_branch(&instr, dest, src);
+ if (err)
return 1;
}
}
- patch_instruction(dest, instr);
+ raw_patch_instruction(dest, instr);
return 0;
}
-static int patch_feature_section(unsigned long value, struct fixup_entry *fcur)
+static int patch_feature_section_mask(unsigned long value, unsigned long mask,
+ struct fixup_entry *fcur)
{
- unsigned int *start, *end, *alt_start, *alt_end, *src, *dest;
+ u32 *start, *end, *alt_start, *alt_end, *src, *dest;
start = calc_addr(fcur, fcur->start_off);
end = calc_addr(fcur, fcur->end_off);
@@ -76,24 +87,26 @@ static int patch_feature_section(unsigned long value, struct fixup_entry *fcur)
if ((alt_end - alt_start) > (end - start))
return 1;
- if ((value & fcur->mask) == fcur->value)
+ if ((value & fcur->mask & mask) == (fcur->value & mask))
return 0;
src = alt_start;
dest = start;
- for (; src < alt_end; src++, dest++) {
+ for (; src < alt_end; src = ppc_inst_next(src, src),
+ dest = ppc_inst_next(dest, dest)) {
if (patch_alt_instruction(src, dest, alt_start, alt_end))
return 1;
}
for (; dest < end; dest++)
- patch_instruction(dest, PPC_INST_NOP);
+ raw_patch_instruction(dest, ppc_inst(PPC_RAW_NOP()));
return 0;
}
-void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
+static void do_feature_fixups_mask(unsigned long value, unsigned long mask,
+ void *fixup_start, void *fixup_end)
{
struct fixup_entry *fcur, *fend;
@@ -101,7 +114,7 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
fend = fixup_end;
for (; fcur < fend; fcur++) {
- if (patch_feature_section(value, fcur)) {
+ if (patch_feature_section_mask(value, mask, fcur)) {
WARN_ON(1);
printk("Unable to patch feature section at %p - %p" \
" with %p - %p\n",
@@ -113,10 +126,473 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
}
}
+void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
+{
+ do_feature_fixups_mask(value, ~0, fixup_start, fixup_end);
+}
+
+#ifdef CONFIG_PPC_BARRIER_NOSPEC
+static bool is_fixup_addr_valid(void *dest, size_t size)
+{
+ return system_state < SYSTEM_FREEING_INITMEM ||
+ !init_section_contains(dest, size);
+}
+
+static int do_patch_fixups(long *start, long *end, unsigned int *instrs, int num)
+{
+ int i;
+
+ for (i = 0; start < end; start++, i++) {
+ int j;
+ unsigned int *dest = (void *)start + *start;
+
+ if (!is_fixup_addr_valid(dest, sizeof(*instrs) * num))
+ continue;
+
+ pr_devel("patching dest %lx\n", (unsigned long)dest);
+
+ for (j = 0; j < num; j++)
+ patch_instruction(dest + j, ppc_inst(instrs[j]));
+ }
+ return i;
+}
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_64
+static int do_patch_entry_fixups(long *start, long *end, unsigned int *instrs,
+ bool do_fallback, void *fallback)
+{
+ int i;
+
+ for (i = 0; start < end; start++, i++) {
+ unsigned int *dest = (void *)start + *start;
+
+ if (!is_fixup_addr_valid(dest, sizeof(*instrs) * 3))
+ continue;
+
+ pr_devel("patching dest %lx\n", (unsigned long)dest);
+
+ // See comment in do_entry_flush_fixups() RE order of patching
+ if (do_fallback) {
+ patch_instruction(dest, ppc_inst(instrs[0]));
+ patch_instruction(dest + 2, ppc_inst(instrs[2]));
+ patch_branch(dest + 1, (unsigned long)fallback, BRANCH_SET_LINK);
+ } else {
+ patch_instruction(dest + 1, ppc_inst(instrs[1]));
+ patch_instruction(dest + 2, ppc_inst(instrs[2]));
+ patch_instruction(dest, ppc_inst(instrs[0]));
+ }
+ }
+ return i;
+}
+
+static void do_stf_entry_barrier_fixups(enum stf_barrier_type types)
+{
+ unsigned int instrs[3];
+ long *start, *end;
+ int i;
+
+ start = PTRRELOC(&__start___stf_entry_barrier_fixup);
+ end = PTRRELOC(&__stop___stf_entry_barrier_fixup);
+
+ instrs[0] = PPC_RAW_NOP();
+ instrs[1] = PPC_RAW_NOP();
+ instrs[2] = PPC_RAW_NOP();
+
+ i = 0;
+ if (types & STF_BARRIER_FALLBACK) {
+ instrs[i++] = PPC_RAW_MFLR(_R10);
+ instrs[i++] = PPC_RAW_NOP(); /* branch patched below */
+ instrs[i++] = PPC_RAW_MTLR(_R10);
+ } else if (types & STF_BARRIER_EIEIO) {
+ instrs[i++] = PPC_RAW_EIEIO() | 0x02000000; /* eieio + bit 6 hint */
+ } else if (types & STF_BARRIER_SYNC_ORI) {
+ instrs[i++] = PPC_RAW_SYNC();
+ instrs[i++] = PPC_RAW_LD(_R10, _R13, 0);
+ instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
+ }
+
+ i = do_patch_entry_fixups(start, end, instrs, types & STF_BARRIER_FALLBACK,
+ &stf_barrier_fallback);
+
+ printk(KERN_DEBUG "stf-barrier: patched %d entry locations (%s barrier)\n", i,
+ (types == STF_BARRIER_NONE) ? "no" :
+ (types == STF_BARRIER_FALLBACK) ? "fallback" :
+ (types == STF_BARRIER_EIEIO) ? "eieio" :
+ (types == (STF_BARRIER_SYNC_ORI)) ? "hwsync"
+ : "unknown");
+}
+
+static void do_stf_exit_barrier_fixups(enum stf_barrier_type types)
+{
+ unsigned int instrs[6];
+ long *start, *end;
+ int i;
+
+ start = PTRRELOC(&__start___stf_exit_barrier_fixup);
+ end = PTRRELOC(&__stop___stf_exit_barrier_fixup);
+
+ instrs[0] = PPC_RAW_NOP();
+ instrs[1] = PPC_RAW_NOP();
+ instrs[2] = PPC_RAW_NOP();
+ instrs[3] = PPC_RAW_NOP();
+ instrs[4] = PPC_RAW_NOP();
+ instrs[5] = PPC_RAW_NOP();
+
+ i = 0;
+ if (types & STF_BARRIER_FALLBACK || types & STF_BARRIER_SYNC_ORI) {
+ if (cpu_has_feature(CPU_FTR_HVMODE)) {
+ instrs[i++] = PPC_RAW_MTSPR(SPRN_HSPRG1, _R13);
+ instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_HSPRG0);
+ } else {
+ instrs[i++] = PPC_RAW_MTSPR(SPRN_SPRG2, _R13);
+ instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_SPRG1);
+ }
+ instrs[i++] = PPC_RAW_SYNC();
+ instrs[i++] = PPC_RAW_LD(_R13, _R13, 0);
+ instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
+ if (cpu_has_feature(CPU_FTR_HVMODE))
+ instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_HSPRG1);
+ else
+ instrs[i++] = PPC_RAW_MFSPR(_R13, SPRN_SPRG2);
+ } else if (types & STF_BARRIER_EIEIO) {
+ instrs[i++] = PPC_RAW_EIEIO() | 0x02000000; /* eieio + bit 6 hint */
+ }
+
+ i = do_patch_fixups(start, end, instrs, ARRAY_SIZE(instrs));
+
+ printk(KERN_DEBUG "stf-barrier: patched %d exit locations (%s barrier)\n", i,
+ (types == STF_BARRIER_NONE) ? "no" :
+ (types == STF_BARRIER_FALLBACK) ? "fallback" :
+ (types == STF_BARRIER_EIEIO) ? "eieio" :
+ (types == (STF_BARRIER_SYNC_ORI)) ? "hwsync"
+ : "unknown");
+}
+
+static bool stf_exit_reentrant = false;
+static bool rfi_exit_reentrant = false;
+static DEFINE_MUTEX(exit_flush_lock);
+
+static int __do_stf_barrier_fixups(void *data)
+{
+ enum stf_barrier_type *types = data;
+
+ do_stf_entry_barrier_fixups(*types);
+ do_stf_exit_barrier_fixups(*types);
+
+ return 0;
+}
+
+void do_stf_barrier_fixups(enum stf_barrier_type types)
+{
+ /*
+ * The call to the fallback entry flush, and the fallback/sync-ori exit
+ * flush can not be safely patched in/out while other CPUs are
+ * executing them. So call __do_stf_barrier_fixups() on one CPU while
+ * all other CPUs spin in the stop machine core with interrupts hard
+ * disabled.
+ *
+ * The branch to mark interrupt exits non-reentrant is enabled first,
+ * then stop_machine runs which will ensure all CPUs are out of the
+ * low level interrupt exit code before patching. After the patching,
+ * if allowed, then flip the branch to allow fast exits.
+ */
+
+ // Prevent static key update races with do_rfi_flush_fixups()
+ mutex_lock(&exit_flush_lock);
+ static_branch_enable(&interrupt_exit_not_reentrant);
+
+ stop_machine(__do_stf_barrier_fixups, &types, NULL);
+
+ if ((types & STF_BARRIER_FALLBACK) || (types & STF_BARRIER_SYNC_ORI))
+ stf_exit_reentrant = false;
+ else
+ stf_exit_reentrant = true;
+
+ if (stf_exit_reentrant && rfi_exit_reentrant)
+ static_branch_disable(&interrupt_exit_not_reentrant);
+
+ mutex_unlock(&exit_flush_lock);
+}
+
+void do_uaccess_flush_fixups(enum l1d_flush_type types)
+{
+ unsigned int instrs[4];
+ long *start, *end;
+ int i;
+
+ start = PTRRELOC(&__start___uaccess_flush_fixup);
+ end = PTRRELOC(&__stop___uaccess_flush_fixup);
+
+ instrs[0] = PPC_RAW_NOP();
+ instrs[1] = PPC_RAW_NOP();
+ instrs[2] = PPC_RAW_NOP();
+ instrs[3] = PPC_RAW_BLR();
+
+ i = 0;
+ if (types == L1D_FLUSH_FALLBACK) {
+ instrs[3] = PPC_RAW_NOP();
+ /* fallthrough to fallback flush */
+ }
+
+ if (types & L1D_FLUSH_ORI) {
+ instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
+ instrs[i++] = PPC_RAW_ORI(_R30, _R30, 0); /* L1d flush */
+ }
+
+ if (types & L1D_FLUSH_MTTRIG)
+ instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0);
+
+ i = do_patch_fixups(start, end, instrs, ARRAY_SIZE(instrs));
+
+ printk(KERN_DEBUG "uaccess-flush: patched %d locations (%s flush)\n", i,
+ (types == L1D_FLUSH_NONE) ? "no" :
+ (types == L1D_FLUSH_FALLBACK) ? "fallback displacement" :
+ (types & L1D_FLUSH_ORI) ? (types & L1D_FLUSH_MTTRIG)
+ ? "ori+mttrig type"
+ : "ori type" :
+ (types & L1D_FLUSH_MTTRIG) ? "mttrig type"
+ : "unknown");
+}
+
+static int __do_entry_flush_fixups(void *data)
+{
+ enum l1d_flush_type types = *(enum l1d_flush_type *)data;
+ unsigned int instrs[3];
+ long *start, *end;
+ int i;
+
+ instrs[0] = PPC_RAW_NOP();
+ instrs[1] = PPC_RAW_NOP();
+ instrs[2] = PPC_RAW_NOP();
+
+ i = 0;
+ if (types == L1D_FLUSH_FALLBACK) {
+ instrs[i++] = PPC_RAW_MFLR(_R10);
+ instrs[i++] = PPC_RAW_NOP(); /* branch patched below */
+ instrs[i++] = PPC_RAW_MTLR(_R10);
+ }
+
+ if (types & L1D_FLUSH_ORI) {
+ instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
+ instrs[i++] = PPC_RAW_ORI(_R30, _R30, 0); /* L1d flush */
+ }
+
+ if (types & L1D_FLUSH_MTTRIG)
+ instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0);
+
+ /*
+ * If we're patching in or out the fallback flush we need to be careful about the
+ * order in which we patch instructions. That's because it's possible we could
+ * take a page fault after patching one instruction, so the sequence of
+ * instructions must be safe even in a half patched state.
+ *
+ * To make that work, when patching in the fallback flush we patch in this order:
+ * - the mflr (dest)
+ * - the mtlr (dest + 2)
+ * - the branch (dest + 1)
+ *
+ * That ensures the sequence is safe to execute at any point. In contrast if we
+ * patch the mtlr last, it's possible we could return from the branch and not
+ * restore LR, leading to a crash later.
+ *
+ * When patching out the fallback flush (either with nops or another flush type),
+ * we patch in this order:
+ * - the branch (dest + 1)
+ * - the mtlr (dest + 2)
+ * - the mflr (dest)
+ *
+ * Note we are protected by stop_machine() from other CPUs executing the code in a
+ * semi-patched state.
+ */
+
+ start = PTRRELOC(&__start___entry_flush_fixup);
+ end = PTRRELOC(&__stop___entry_flush_fixup);
+ i = do_patch_entry_fixups(start, end, instrs, types == L1D_FLUSH_FALLBACK,
+ &entry_flush_fallback);
+
+ start = PTRRELOC(&__start___scv_entry_flush_fixup);
+ end = PTRRELOC(&__stop___scv_entry_flush_fixup);
+ i += do_patch_entry_fixups(start, end, instrs, types == L1D_FLUSH_FALLBACK,
+ &scv_entry_flush_fallback);
+
+ printk(KERN_DEBUG "entry-flush: patched %d locations (%s flush)\n", i,
+ (types == L1D_FLUSH_NONE) ? "no" :
+ (types == L1D_FLUSH_FALLBACK) ? "fallback displacement" :
+ (types & L1D_FLUSH_ORI) ? (types & L1D_FLUSH_MTTRIG)
+ ? "ori+mttrig type"
+ : "ori type" :
+ (types & L1D_FLUSH_MTTRIG) ? "mttrig type"
+ : "unknown");
+
+ return 0;
+}
+
+void do_entry_flush_fixups(enum l1d_flush_type types)
+{
+ /*
+ * The call to the fallback flush can not be safely patched in/out while
+ * other CPUs are executing it. So call __do_entry_flush_fixups() on one
+ * CPU while all other CPUs spin in the stop machine core with interrupts
+ * hard disabled.
+ */
+ stop_machine(__do_entry_flush_fixups, &types, NULL);
+}
+
+static int __do_rfi_flush_fixups(void *data)
+{
+ enum l1d_flush_type types = *(enum l1d_flush_type *)data;
+ unsigned int instrs[3];
+ long *start, *end;
+ int i;
+
+ start = PTRRELOC(&__start___rfi_flush_fixup);
+ end = PTRRELOC(&__stop___rfi_flush_fixup);
+
+ instrs[0] = PPC_RAW_NOP();
+ instrs[1] = PPC_RAW_NOP();
+ instrs[2] = PPC_RAW_NOP();
+
+ if (types & L1D_FLUSH_FALLBACK)
+ /* b .+16 to fallback flush */
+ instrs[0] = PPC_RAW_BRANCH(16);
+
+ i = 0;
+ if (types & L1D_FLUSH_ORI) {
+ instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
+ instrs[i++] = PPC_RAW_ORI(_R30, _R30, 0); /* L1d flush */
+ }
+
+ if (types & L1D_FLUSH_MTTRIG)
+ instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0);
+
+ i = do_patch_fixups(start, end, instrs, ARRAY_SIZE(instrs));
+
+ printk(KERN_DEBUG "rfi-flush: patched %d locations (%s flush)\n", i,
+ (types == L1D_FLUSH_NONE) ? "no" :
+ (types == L1D_FLUSH_FALLBACK) ? "fallback displacement" :
+ (types & L1D_FLUSH_ORI) ? (types & L1D_FLUSH_MTTRIG)
+ ? "ori+mttrig type"
+ : "ori type" :
+ (types & L1D_FLUSH_MTTRIG) ? "mttrig type"
+ : "unknown");
+
+ return 0;
+}
+
+void do_rfi_flush_fixups(enum l1d_flush_type types)
+{
+ /*
+ * stop_machine gets all CPUs out of the interrupt exit handler same
+ * as do_stf_barrier_fixups. do_rfi_flush_fixups patching can run
+ * without stop_machine, so this could be achieved with a broadcast
+ * IPI instead, but this matches the stf sequence.
+ */
+
+ // Prevent static key update races with do_stf_barrier_fixups()
+ mutex_lock(&exit_flush_lock);
+ static_branch_enable(&interrupt_exit_not_reentrant);
+
+ stop_machine(__do_rfi_flush_fixups, &types, NULL);
+
+ if (types & L1D_FLUSH_FALLBACK)
+ rfi_exit_reentrant = false;
+ else
+ rfi_exit_reentrant = true;
+
+ if (stf_exit_reentrant && rfi_exit_reentrant)
+ static_branch_disable(&interrupt_exit_not_reentrant);
+
+ mutex_unlock(&exit_flush_lock);
+}
+
+void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end)
+{
+ unsigned int instr;
+ long *start, *end;
+ int i;
+
+ start = fixup_start;
+ end = fixup_end;
+
+ instr = PPC_RAW_NOP();
+
+ if (enable) {
+ pr_info("barrier-nospec: using ORI speculation barrier\n");
+ instr = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
+ }
+
+ i = do_patch_fixups(start, end, &instr, 1);
+
+ printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i);
+}
+
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
+#ifdef CONFIG_PPC_BARRIER_NOSPEC
+void do_barrier_nospec_fixups(bool enable)
+{
+ void *start, *end;
+
+ start = PTRRELOC(&__start___barrier_nospec_fixup);
+ end = PTRRELOC(&__stop___barrier_nospec_fixup);
+
+ do_barrier_nospec_fixups_range(enable, start, end);
+}
+#endif /* CONFIG_PPC_BARRIER_NOSPEC */
+
+#ifdef CONFIG_PPC_E500
+void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end)
+{
+ unsigned int instr[2];
+ long *start, *end;
+ int i;
+
+ start = fixup_start;
+ end = fixup_end;
+
+ instr[0] = PPC_RAW_NOP();
+ instr[1] = PPC_RAW_NOP();
+
+ if (enable) {
+ pr_info("barrier-nospec: using isync; sync as speculation barrier\n");
+ instr[0] = PPC_RAW_ISYNC();
+ instr[1] = PPC_RAW_SYNC();
+ }
+
+ i = do_patch_fixups(start, end, instr, ARRAY_SIZE(instr));
+
+ printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i);
+}
+
+static void __init patch_btb_flush_section(long *curr)
+{
+ unsigned int *start, *end;
+
+ start = (void *)curr + *curr;
+ end = (void *)curr + *(curr + 1);
+ for (; start < end; start++) {
+ pr_devel("patching dest %lx\n", (unsigned long)start);
+ patch_instruction(start, ppc_inst(PPC_RAW_NOP()));
+ }
+}
+
+void __init do_btb_flush_fixups(void)
+{
+ long *start, *end;
+
+ start = PTRRELOC(&__start__btb_flush_fixup);
+ end = PTRRELOC(&__stop__btb_flush_fixup);
+
+ for (; start < end; start += 2)
+ patch_btb_flush_section(start);
+}
+#endif /* CONFIG_PPC_E500 */
+
void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)
{
long *start, *end;
- unsigned int *dest;
+ u32 *dest;
if (!(value & CPU_FTR_LWSYNC))
return ;
@@ -126,235 +602,399 @@ void do_lwsync_fixups(unsigned long value, void *fixup_start, void *fixup_end)
for (; start < end; start++) {
dest = (void *)start + *start;
- patch_instruction(dest, PPC_INST_LWSYNC);
+ raw_patch_instruction(dest, ppc_inst(PPC_INST_LWSYNC));
}
}
-void do_final_fixups(void)
+static void __init do_final_fixups(void)
{
#if defined(CONFIG_PPC64) && defined(CONFIG_RELOCATABLE)
- int *src, *dest;
- unsigned long length;
+ ppc_inst_t inst;
+ u32 *src, *dest, *end;
if (PHYSICAL_START == 0)
return;
- src = (int *)(KERNELBASE + PHYSICAL_START);
- dest = (int *)KERNELBASE;
- length = (__end_interrupts - _stext) / sizeof(int);
+ src = (u32 *)(KERNELBASE + PHYSICAL_START);
+ dest = (u32 *)KERNELBASE;
+ end = (void *)src + (__end_interrupts - _stext);
- while (length--) {
- patch_instruction(dest, *src);
- src++;
- dest++;
+ while (src < end) {
+ inst = ppc_inst_read(src);
+ raw_patch_instruction(dest, inst);
+ src = ppc_inst_next(src, src);
+ dest = ppc_inst_next(dest, dest);
}
#endif
}
+static unsigned long __initdata saved_cpu_features;
+static unsigned int __initdata saved_mmu_features;
+#ifdef CONFIG_PPC64
+static unsigned long __initdata saved_firmware_features;
+#endif
+
+void __init apply_feature_fixups(void)
+{
+ struct cpu_spec *spec = PTRRELOC(*PTRRELOC(&cur_cpu_spec));
+
+ *PTRRELOC(&saved_cpu_features) = spec->cpu_features;
+ *PTRRELOC(&saved_mmu_features) = spec->mmu_features;
+
+ /*
+ * Apply the CPU-specific and firmware specific fixups to kernel text
+ * (nop out sections not relevant to this CPU or this firmware).
+ */
+ do_feature_fixups(spec->cpu_features,
+ PTRRELOC(&__start___ftr_fixup),
+ PTRRELOC(&__stop___ftr_fixup));
+
+ do_feature_fixups(spec->mmu_features,
+ PTRRELOC(&__start___mmu_ftr_fixup),
+ PTRRELOC(&__stop___mmu_ftr_fixup));
+
+ do_lwsync_fixups(spec->cpu_features,
+ PTRRELOC(&__start___lwsync_fixup),
+ PTRRELOC(&__stop___lwsync_fixup));
+
+#ifdef CONFIG_PPC64
+ saved_firmware_features = powerpc_firmware_features;
+ do_feature_fixups(powerpc_firmware_features,
+ &__start___fw_ftr_fixup, &__stop___fw_ftr_fixup);
+#endif
+ do_final_fixups();
+}
+
+void __init update_mmu_feature_fixups(unsigned long mask)
+{
+ saved_mmu_features &= ~mask;
+ saved_mmu_features |= cur_cpu_spec->mmu_features & mask;
+
+ do_feature_fixups_mask(cur_cpu_spec->mmu_features, mask,
+ PTRRELOC(&__start___mmu_ftr_fixup),
+ PTRRELOC(&__stop___mmu_ftr_fixup));
+ mmu_feature_keys_init();
+}
+
+void __init setup_feature_keys(void)
+{
+ /*
+ * Initialise jump label. This causes all the cpu/mmu_has_feature()
+ * checks to take on their correct polarity based on the current set of
+ * CPU/MMU features.
+ */
+ jump_label_init();
+ cpu_feature_keys_init();
+ mmu_feature_keys_init();
+ static_key_feature_checks_initialized = true;
+}
+
+static int __init check_features(void)
+{
+ WARN(saved_cpu_features != cur_cpu_spec->cpu_features,
+ "CPU features changed after feature patching!\n");
+ WARN(saved_mmu_features != cur_cpu_spec->mmu_features,
+ "MMU features changed after feature patching!\n");
+#ifdef CONFIG_PPC64
+ WARN(saved_firmware_features != powerpc_firmware_features,
+ "Firmware features changed after feature patching!\n");
+#endif
+
+ return 0;
+}
+late_initcall(check_features);
+
#ifdef CONFIG_FTR_FIXUP_SELFTEST
#define check(x) \
if (!(x)) printk("feature-fixups: test failed at line %d\n", __LINE__);
+static int patch_feature_section(unsigned long value, struct fixup_entry *fcur)
+{
+ return patch_feature_section_mask(value, ~0, fcur);
+}
+
/* This must be after the text it fixes up, vmlinux.lds.S enforces that atm */
static struct fixup_entry fixup;
-static long calc_offset(struct fixup_entry *entry, unsigned int *p)
+static long __init calc_offset(struct fixup_entry *entry, unsigned int *p)
{
return (unsigned long)p - (unsigned long)entry;
}
-void test_basic_patching(void)
+static void __init test_basic_patching(void)
{
- extern unsigned int ftr_fixup_test1;
- extern unsigned int end_ftr_fixup_test1;
- extern unsigned int ftr_fixup_test1_orig;
- extern unsigned int ftr_fixup_test1_expected;
- int size = &end_ftr_fixup_test1 - &ftr_fixup_test1;
+ extern unsigned int ftr_fixup_test1[];
+ extern unsigned int end_ftr_fixup_test1[];
+ extern unsigned int ftr_fixup_test1_orig[];
+ extern unsigned int ftr_fixup_test1_expected[];
+ int size = 4 * (end_ftr_fixup_test1 - ftr_fixup_test1);
fixup.value = fixup.mask = 8;
- fixup.start_off = calc_offset(&fixup, &ftr_fixup_test1 + 1);
- fixup.end_off = calc_offset(&fixup, &ftr_fixup_test1 + 2);
+ fixup.start_off = calc_offset(&fixup, ftr_fixup_test1 + 1);
+ fixup.end_off = calc_offset(&fixup, ftr_fixup_test1 + 2);
fixup.alt_start_off = fixup.alt_end_off = 0;
/* Sanity check */
- check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_orig, size) == 0);
+ check(memcmp(ftr_fixup_test1, ftr_fixup_test1_orig, size) == 0);
/* Check we don't patch if the value matches */
patch_feature_section(8, &fixup);
- check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_orig, size) == 0);
+ check(memcmp(ftr_fixup_test1, ftr_fixup_test1_orig, size) == 0);
/* Check we do patch if the value doesn't match */
patch_feature_section(0, &fixup);
- check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_expected, size) == 0);
+ check(memcmp(ftr_fixup_test1, ftr_fixup_test1_expected, size) == 0);
/* Check we do patch if the mask doesn't match */
- memcpy(&ftr_fixup_test1, &ftr_fixup_test1_orig, size);
- check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_orig, size) == 0);
+ memcpy(ftr_fixup_test1, ftr_fixup_test1_orig, size);
+ check(memcmp(ftr_fixup_test1, ftr_fixup_test1_orig, size) == 0);
patch_feature_section(~8, &fixup);
- check(memcmp(&ftr_fixup_test1, &ftr_fixup_test1_expected, size) == 0);
+ check(memcmp(ftr_fixup_test1, ftr_fixup_test1_expected, size) == 0);
}
-static void test_alternative_patching(void)
+static void __init test_alternative_patching(void)
{
- extern unsigned int ftr_fixup_test2;
- extern unsigned int end_ftr_fixup_test2;
- extern unsigned int ftr_fixup_test2_orig;
- extern unsigned int ftr_fixup_test2_alt;
- extern unsigned int ftr_fixup_test2_expected;
- int size = &end_ftr_fixup_test2 - &ftr_fixup_test2;
+ extern unsigned int ftr_fixup_test2[];
+ extern unsigned int end_ftr_fixup_test2[];
+ extern unsigned int ftr_fixup_test2_orig[];
+ extern unsigned int ftr_fixup_test2_alt[];
+ extern unsigned int ftr_fixup_test2_expected[];
+ int size = 4 * (end_ftr_fixup_test2 - ftr_fixup_test2);
fixup.value = fixup.mask = 0xF;
- fixup.start_off = calc_offset(&fixup, &ftr_fixup_test2 + 1);
- fixup.end_off = calc_offset(&fixup, &ftr_fixup_test2 + 2);
- fixup.alt_start_off = calc_offset(&fixup, &ftr_fixup_test2_alt);
- fixup.alt_end_off = calc_offset(&fixup, &ftr_fixup_test2_alt + 1);
+ fixup.start_off = calc_offset(&fixup, ftr_fixup_test2 + 1);
+ fixup.end_off = calc_offset(&fixup, ftr_fixup_test2 + 2);
+ fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_test2_alt);
+ fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_test2_alt + 1);
/* Sanity check */
- check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_orig, size) == 0);
+ check(memcmp(ftr_fixup_test2, ftr_fixup_test2_orig, size) == 0);
/* Check we don't patch if the value matches */
patch_feature_section(0xF, &fixup);
- check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_orig, size) == 0);
+ check(memcmp(ftr_fixup_test2, ftr_fixup_test2_orig, size) == 0);
/* Check we do patch if the value doesn't match */
patch_feature_section(0, &fixup);
- check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_expected, size) == 0);
+ check(memcmp(ftr_fixup_test2, ftr_fixup_test2_expected, size) == 0);
/* Check we do patch if the mask doesn't match */
- memcpy(&ftr_fixup_test2, &ftr_fixup_test2_orig, size);
- check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_orig, size) == 0);
+ memcpy(ftr_fixup_test2, ftr_fixup_test2_orig, size);
+ check(memcmp(ftr_fixup_test2, ftr_fixup_test2_orig, size) == 0);
patch_feature_section(~0xF, &fixup);
- check(memcmp(&ftr_fixup_test2, &ftr_fixup_test2_expected, size) == 0);
+ check(memcmp(ftr_fixup_test2, ftr_fixup_test2_expected, size) == 0);
}
-static void test_alternative_case_too_big(void)
+static void __init test_alternative_case_too_big(void)
{
- extern unsigned int ftr_fixup_test3;
- extern unsigned int end_ftr_fixup_test3;
- extern unsigned int ftr_fixup_test3_orig;
- extern unsigned int ftr_fixup_test3_alt;
- int size = &end_ftr_fixup_test3 - &ftr_fixup_test3;
+ extern unsigned int ftr_fixup_test3[];
+ extern unsigned int end_ftr_fixup_test3[];
+ extern unsigned int ftr_fixup_test3_orig[];
+ extern unsigned int ftr_fixup_test3_alt[];
+ int size = 4 * (end_ftr_fixup_test3 - ftr_fixup_test3);
fixup.value = fixup.mask = 0xC;
- fixup.start_off = calc_offset(&fixup, &ftr_fixup_test3 + 1);
- fixup.end_off = calc_offset(&fixup, &ftr_fixup_test3 + 2);
- fixup.alt_start_off = calc_offset(&fixup, &ftr_fixup_test3_alt);
- fixup.alt_end_off = calc_offset(&fixup, &ftr_fixup_test3_alt + 2);
+ fixup.start_off = calc_offset(&fixup, ftr_fixup_test3 + 1);
+ fixup.end_off = calc_offset(&fixup, ftr_fixup_test3 + 2);
+ fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_test3_alt);
+ fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_test3_alt + 2);
/* Sanity check */
- check(memcmp(&ftr_fixup_test3, &ftr_fixup_test3_orig, size) == 0);
+ check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0);
/* Expect nothing to be patched, and the error returned to us */
check(patch_feature_section(0xF, &fixup) == 1);
- check(memcmp(&ftr_fixup_test3, &ftr_fixup_test3_orig, size) == 0);
+ check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0);
check(patch_feature_section(0, &fixup) == 1);
- check(memcmp(&ftr_fixup_test3, &ftr_fixup_test3_orig, size) == 0);
+ check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0);
check(patch_feature_section(~0xF, &fixup) == 1);
- check(memcmp(&ftr_fixup_test3, &ftr_fixup_test3_orig, size) == 0);
+ check(memcmp(ftr_fixup_test3, ftr_fixup_test3_orig, size) == 0);
}
-static void test_alternative_case_too_small(void)
+static void __init test_alternative_case_too_small(void)
{
- extern unsigned int ftr_fixup_test4;
- extern unsigned int end_ftr_fixup_test4;
- extern unsigned int ftr_fixup_test4_orig;
- extern unsigned int ftr_fixup_test4_alt;
- extern unsigned int ftr_fixup_test4_expected;
- int size = &end_ftr_fixup_test4 - &ftr_fixup_test4;
+ extern unsigned int ftr_fixup_test4[];
+ extern unsigned int end_ftr_fixup_test4[];
+ extern unsigned int ftr_fixup_test4_orig[];
+ extern unsigned int ftr_fixup_test4_alt[];
+ extern unsigned int ftr_fixup_test4_expected[];
+ int size = 4 * (end_ftr_fixup_test4 - ftr_fixup_test4);
unsigned long flag;
/* Check a high-bit flag */
flag = 1UL << ((sizeof(unsigned long) - 1) * 8);
fixup.value = fixup.mask = flag;
- fixup.start_off = calc_offset(&fixup, &ftr_fixup_test4 + 1);
- fixup.end_off = calc_offset(&fixup, &ftr_fixup_test4 + 5);
- fixup.alt_start_off = calc_offset(&fixup, &ftr_fixup_test4_alt);
- fixup.alt_end_off = calc_offset(&fixup, &ftr_fixup_test4_alt + 2);
+ fixup.start_off = calc_offset(&fixup, ftr_fixup_test4 + 1);
+ fixup.end_off = calc_offset(&fixup, ftr_fixup_test4 + 5);
+ fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_test4_alt);
+ fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_test4_alt + 2);
/* Sanity check */
- check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_orig, size) == 0);
+ check(memcmp(ftr_fixup_test4, ftr_fixup_test4_orig, size) == 0);
/* Check we don't patch if the value matches */
patch_feature_section(flag, &fixup);
- check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_orig, size) == 0);
+ check(memcmp(ftr_fixup_test4, ftr_fixup_test4_orig, size) == 0);
/* Check we do patch if the value doesn't match */
patch_feature_section(0, &fixup);
- check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_expected, size) == 0);
+ check(memcmp(ftr_fixup_test4, ftr_fixup_test4_expected, size) == 0);
/* Check we do patch if the mask doesn't match */
- memcpy(&ftr_fixup_test4, &ftr_fixup_test4_orig, size);
- check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_orig, size) == 0);
+ memcpy(ftr_fixup_test4, ftr_fixup_test4_orig, size);
+ check(memcmp(ftr_fixup_test4, ftr_fixup_test4_orig, size) == 0);
patch_feature_section(~flag, &fixup);
- check(memcmp(&ftr_fixup_test4, &ftr_fixup_test4_expected, size) == 0);
+ check(memcmp(ftr_fixup_test4, ftr_fixup_test4_expected, size) == 0);
}
static void test_alternative_case_with_branch(void)
{
- extern unsigned int ftr_fixup_test5;
- extern unsigned int end_ftr_fixup_test5;
- extern unsigned int ftr_fixup_test5_expected;
- int size = &end_ftr_fixup_test5 - &ftr_fixup_test5;
+ extern unsigned int ftr_fixup_test5[];
+ extern unsigned int end_ftr_fixup_test5[];
+ extern unsigned int ftr_fixup_test5_expected[];
+ int size = 4 * (end_ftr_fixup_test5 - ftr_fixup_test5);
+
+ check(memcmp(ftr_fixup_test5, ftr_fixup_test5_expected, size) == 0);
+}
+
+static void __init test_alternative_case_with_external_branch(void)
+{
+ extern unsigned int ftr_fixup_test6[];
+ extern unsigned int end_ftr_fixup_test6[];
+ extern unsigned int ftr_fixup_test6_expected[];
+ int size = 4 * (end_ftr_fixup_test6 - ftr_fixup_test6);
- check(memcmp(&ftr_fixup_test5, &ftr_fixup_test5_expected, size) == 0);
+ check(memcmp(ftr_fixup_test6, ftr_fixup_test6_expected, size) == 0);
}
-static void test_alternative_case_with_external_branch(void)
+static void __init test_alternative_case_with_branch_to_end(void)
{
- extern unsigned int ftr_fixup_test6;
- extern unsigned int end_ftr_fixup_test6;
- extern unsigned int ftr_fixup_test6_expected;
- int size = &end_ftr_fixup_test6 - &ftr_fixup_test6;
+ extern unsigned int ftr_fixup_test7[];
+ extern unsigned int end_ftr_fixup_test7[];
+ extern unsigned int ftr_fixup_test7_expected[];
+ int size = 4 * (end_ftr_fixup_test7 - ftr_fixup_test7);
- check(memcmp(&ftr_fixup_test6, &ftr_fixup_test6_expected, size) == 0);
+ check(memcmp(ftr_fixup_test7, ftr_fixup_test7_expected, size) == 0);
}
-static void test_cpu_macros(void)
+static void __init test_cpu_macros(void)
{
- extern u8 ftr_fixup_test_FTR_macros;
- extern u8 ftr_fixup_test_FTR_macros_expected;
- unsigned long size = &ftr_fixup_test_FTR_macros_expected -
- &ftr_fixup_test_FTR_macros;
+ extern u8 ftr_fixup_test_FTR_macros[];
+ extern u8 ftr_fixup_test_FTR_macros_expected[];
+ unsigned long size = ftr_fixup_test_FTR_macros_expected -
+ ftr_fixup_test_FTR_macros;
/* The fixups have already been done for us during boot */
- check(memcmp(&ftr_fixup_test_FTR_macros,
- &ftr_fixup_test_FTR_macros_expected, size) == 0);
+ check(memcmp(ftr_fixup_test_FTR_macros,
+ ftr_fixup_test_FTR_macros_expected, size) == 0);
}
-static void test_fw_macros(void)
+static void __init test_fw_macros(void)
{
#ifdef CONFIG_PPC64
- extern u8 ftr_fixup_test_FW_FTR_macros;
- extern u8 ftr_fixup_test_FW_FTR_macros_expected;
- unsigned long size = &ftr_fixup_test_FW_FTR_macros_expected -
- &ftr_fixup_test_FW_FTR_macros;
+ extern u8 ftr_fixup_test_FW_FTR_macros[];
+ extern u8 ftr_fixup_test_FW_FTR_macros_expected[];
+ unsigned long size = ftr_fixup_test_FW_FTR_macros_expected -
+ ftr_fixup_test_FW_FTR_macros;
/* The fixups have already been done for us during boot */
- check(memcmp(&ftr_fixup_test_FW_FTR_macros,
- &ftr_fixup_test_FW_FTR_macros_expected, size) == 0);
+ check(memcmp(ftr_fixup_test_FW_FTR_macros,
+ ftr_fixup_test_FW_FTR_macros_expected, size) == 0);
#endif
}
-static void test_lwsync_macros(void)
+static void __init test_lwsync_macros(void)
{
- extern u8 lwsync_fixup_test;
- extern u8 end_lwsync_fixup_test;
- extern u8 lwsync_fixup_test_expected_LWSYNC;
- extern u8 lwsync_fixup_test_expected_SYNC;
- unsigned long size = &end_lwsync_fixup_test -
- &lwsync_fixup_test;
+ extern u8 lwsync_fixup_test[];
+ extern u8 end_lwsync_fixup_test[];
+ extern u8 lwsync_fixup_test_expected_LWSYNC[];
+ extern u8 lwsync_fixup_test_expected_SYNC[];
+ unsigned long size = end_lwsync_fixup_test -
+ lwsync_fixup_test;
/* The fixups have already been done for us during boot */
if (cur_cpu_spec->cpu_features & CPU_FTR_LWSYNC) {
- check(memcmp(&lwsync_fixup_test,
- &lwsync_fixup_test_expected_LWSYNC, size) == 0);
+ check(memcmp(lwsync_fixup_test,
+ lwsync_fixup_test_expected_LWSYNC, size) == 0);
} else {
- check(memcmp(&lwsync_fixup_test,
- &lwsync_fixup_test_expected_SYNC, size) == 0);
+ check(memcmp(lwsync_fixup_test,
+ lwsync_fixup_test_expected_SYNC, size) == 0);
}
}
+#ifdef CONFIG_PPC64
+static void __init test_prefix_patching(void)
+{
+ extern unsigned int ftr_fixup_prefix1[];
+ extern unsigned int end_ftr_fixup_prefix1[];
+ extern unsigned int ftr_fixup_prefix1_orig[];
+ extern unsigned int ftr_fixup_prefix1_expected[];
+ int size = sizeof(unsigned int) * (end_ftr_fixup_prefix1 - ftr_fixup_prefix1);
+
+ fixup.value = fixup.mask = 8;
+ fixup.start_off = calc_offset(&fixup, ftr_fixup_prefix1 + 1);
+ fixup.end_off = calc_offset(&fixup, ftr_fixup_prefix1 + 3);
+ fixup.alt_start_off = fixup.alt_end_off = 0;
+
+ /* Sanity check */
+ check(memcmp(ftr_fixup_prefix1, ftr_fixup_prefix1_orig, size) == 0);
+
+ patch_feature_section(0, &fixup);
+ check(memcmp(ftr_fixup_prefix1, ftr_fixup_prefix1_expected, size) == 0);
+ check(memcmp(ftr_fixup_prefix1, ftr_fixup_prefix1_orig, size) != 0);
+}
+
+static void __init test_prefix_alt_patching(void)
+{
+ extern unsigned int ftr_fixup_prefix2[];
+ extern unsigned int end_ftr_fixup_prefix2[];
+ extern unsigned int ftr_fixup_prefix2_orig[];
+ extern unsigned int ftr_fixup_prefix2_expected[];
+ extern unsigned int ftr_fixup_prefix2_alt[];
+ int size = sizeof(unsigned int) * (end_ftr_fixup_prefix2 - ftr_fixup_prefix2);
+
+ fixup.value = fixup.mask = 8;
+ fixup.start_off = calc_offset(&fixup, ftr_fixup_prefix2 + 1);
+ fixup.end_off = calc_offset(&fixup, ftr_fixup_prefix2 + 3);
+ fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_prefix2_alt);
+ fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_prefix2_alt + 2);
+ /* Sanity check */
+ check(memcmp(ftr_fixup_prefix2, ftr_fixup_prefix2_orig, size) == 0);
+
+ patch_feature_section(0, &fixup);
+ check(memcmp(ftr_fixup_prefix2, ftr_fixup_prefix2_expected, size) == 0);
+ check(memcmp(ftr_fixup_prefix2, ftr_fixup_prefix2_orig, size) != 0);
+}
+
+static void __init test_prefix_word_alt_patching(void)
+{
+ extern unsigned int ftr_fixup_prefix3[];
+ extern unsigned int end_ftr_fixup_prefix3[];
+ extern unsigned int ftr_fixup_prefix3_orig[];
+ extern unsigned int ftr_fixup_prefix3_expected[];
+ extern unsigned int ftr_fixup_prefix3_alt[];
+ int size = sizeof(unsigned int) * (end_ftr_fixup_prefix3 - ftr_fixup_prefix3);
+
+ fixup.value = fixup.mask = 8;
+ fixup.start_off = calc_offset(&fixup, ftr_fixup_prefix3 + 1);
+ fixup.end_off = calc_offset(&fixup, ftr_fixup_prefix3 + 4);
+ fixup.alt_start_off = calc_offset(&fixup, ftr_fixup_prefix3_alt);
+ fixup.alt_end_off = calc_offset(&fixup, ftr_fixup_prefix3_alt + 3);
+ /* Sanity check */
+ check(memcmp(ftr_fixup_prefix3, ftr_fixup_prefix3_orig, size) == 0);
+
+ patch_feature_section(0, &fixup);
+ check(memcmp(ftr_fixup_prefix3, ftr_fixup_prefix3_expected, size) == 0);
+ patch_feature_section(0, &fixup);
+ check(memcmp(ftr_fixup_prefix3, ftr_fixup_prefix3_orig, size) != 0);
+}
+#else
+static inline void test_prefix_patching(void) {}
+static inline void test_prefix_alt_patching(void) {}
+static inline void test_prefix_word_alt_patching(void) {}
+#endif /* CONFIG_PPC64 */
+
static int __init test_feature_fixups(void)
{
printk(KERN_DEBUG "Running feature fixup self-tests ...\n");
@@ -365,9 +1005,13 @@ static int __init test_feature_fixups(void)
test_alternative_case_too_small();
test_alternative_case_with_branch();
test_alternative_case_with_external_branch();
+ test_alternative_case_with_branch_to_end();
test_cpu_macros();
test_fw_macros();
test_lwsync_macros();
+ test_prefix_patching();
+ test_prefix_alt_patching();
+ test_prefix_word_alt_patching();
return 0;
}
diff --git a/arch/powerpc/lib/hweight_64.S b/arch/powerpc/lib/hweight_64.S
index 19e66001a4f9..151875050da9 100644
--- a/arch/powerpc/lib/hweight_64.S
+++ b/arch/powerpc/lib/hweight_64.S
@@ -1,30 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright (C) IBM Corporation, 2010
*
* Author: Anton Blanchard <anton@au.ibm.com>
*/
+#include <linux/export.h>
#include <asm/processor.h>
#include <asm/ppc_asm.h>
+#include <asm/feature-fixups.h>
/* Note: This code relies on -mminimal-toc */
_GLOBAL(__arch_hweight8)
BEGIN_FTR_SECTION
- b __sw_hweight8
+ b CFUNC(__sw_hweight8)
nop
nop
FTR_SECTION_ELSE
@@ -32,10 +22,11 @@ FTR_SECTION_ELSE
clrldi r3,r3,64-8
blr
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)
+EXPORT_SYMBOL(__arch_hweight8)
_GLOBAL(__arch_hweight16)
BEGIN_FTR_SECTION
- b __sw_hweight16
+ b CFUNC(__sw_hweight16)
nop
nop
nop
@@ -54,10 +45,11 @@ FTR_SECTION_ELSE
blr
ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 50)
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)
+EXPORT_SYMBOL(__arch_hweight16)
_GLOBAL(__arch_hweight32)
BEGIN_FTR_SECTION
- b __sw_hweight32
+ b CFUNC(__sw_hweight32)
nop
nop
nop
@@ -79,10 +71,11 @@ FTR_SECTION_ELSE
blr
ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 51)
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)
+EXPORT_SYMBOL(__arch_hweight32)
_GLOBAL(__arch_hweight64)
BEGIN_FTR_SECTION
- b __sw_hweight64
+ b CFUNC(__sw_hweight64)
nop
nop
nop
@@ -108,3 +101,4 @@ FTR_SECTION_ELSE
blr
ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_POPCNTD, 52)
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_POPCNTB)
+EXPORT_SYMBOL(__arch_hweight64)
diff --git a/arch/powerpc/lib/ldstfp.S b/arch/powerpc/lib/ldstfp.S
index 85aec08ab234..e00abeabc54d 100644
--- a/arch/powerpc/lib/ldstfp.S
+++ b/arch/powerpc/lib/ldstfp.S
@@ -1,13 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Floating-point, VMX/Altivec and VSX loads and stores
* for use in instruction emulation.
*
* Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <asm/processor.h>
@@ -15,39 +11,24 @@
#include <asm/ppc-opcode.h>
#include <asm/reg.h>
#include <asm/asm-offsets.h>
+#include <asm/asm-compat.h>
#include <linux/errno.h>
-#ifdef CONFIG_PPC_FPU
-
#define STKFRM (PPC_MIN_STKFRM + 16)
- .macro extab instr,handler
- .section __ex_table,"a"
- PPC_LONG \instr,\handler
- .previous
- .endm
-
- .macro inst32 op
-reg = 0
- .rept 32
-20: \op reg,0,r4
- b 3f
- extab 20b,99f
-reg = reg + 1
- .endr
- .endm
-
-/* Get the contents of frN into fr0; N is in r3. */
+/* Get the contents of frN into *p; N is in r3 and p is in r4. */
_GLOBAL(get_fpr)
mflr r0
+ mfmsr r6
+ ori r7, r6, MSR_FP
+ MTMSRD(r7)
+ isync
rlwinm r3,r3,3,0xf8
bcl 20,31,1f
- blr /* fr0 is already in fr0 */
- nop
-reg = 1
- .rept 31
- fmr fr0,reg
- blr
+reg = 0
+ .rept 32
+ stfd reg, 0(r4)
+ b 2f
reg = reg + 1
.endr
1: mflr r5
@@ -55,18 +36,23 @@ reg = reg + 1
mtctr r5
mtlr r0
bctr
+2: MTMSRD(r6)
+ isync
+ blr
-/* Put the contents of fr0 into frN; N is in r3. */
+/* Put the contents of *p into frN; N is in r3 and p is in r4. */
_GLOBAL(put_fpr)
mflr r0
+ mfmsr r6
+ ori r7, r6, MSR_FP
+ MTMSRD(r7)
+ isync
rlwinm r3,r3,3,0xf8
bcl 20,31,1f
- blr /* fr0 is already in fr0 */
- nop
-reg = 1
- .rept 31
- fmr reg,fr0
- blr
+reg = 0
+ .rept 32
+ lfd reg, 0(r4)
+ b 2f
reg = reg + 1
.endr
1: mflr r5
@@ -74,127 +60,24 @@ reg = reg + 1
mtctr r5
mtlr r0
bctr
-
-/* Load FP reg N from float at *p. N is in r3, p in r4. */
-_GLOBAL(do_lfs)
- PPC_STLU r1,-STKFRM(r1)
- mflr r0
- PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1)
- mfmsr r6
- ori r7,r6,MSR_FP
- cmpwi cr7,r3,0
- MTMSRD(r7)
- isync
- beq cr7,1f
- stfd fr0,STKFRM-16(r1)
-1: li r9,-EFAULT
-2: lfs fr0,0(r4)
- li r9,0
-3: bl put_fpr
- beq cr7,4f
- lfd fr0,STKFRM-16(r1)
-4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
- mtlr r0
- MTMSRD(r6)
+2: MTMSRD(r6)
isync
- mr r3,r9
- addi r1,r1,STKFRM
- blr
- extab 2b,3b
-
-/* Load FP reg N from double at *p. N is in r3, p in r4. */
-_GLOBAL(do_lfd)
- PPC_STLU r1,-STKFRM(r1)
- mflr r0
- PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1)
- mfmsr r6
- ori r7,r6,MSR_FP
- cmpwi cr7,r3,0
- MTMSRD(r7)
- isync
- beq cr7,1f
- stfd fr0,STKFRM-16(r1)
-1: li r9,-EFAULT
-2: lfd fr0,0(r4)
- li r9,0
-3: beq cr7,4f
- bl put_fpr
- lfd fr0,STKFRM-16(r1)
-4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
- mtlr r0
- MTMSRD(r6)
- isync
- mr r3,r9
- addi r1,r1,STKFRM
blr
- extab 2b,3b
-/* Store FP reg N to float at *p. N is in r3, p in r4. */
-_GLOBAL(do_stfs)
- PPC_STLU r1,-STKFRM(r1)
- mflr r0
- PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1)
- mfmsr r6
- ori r7,r6,MSR_FP
- cmpwi cr7,r3,0
- MTMSRD(r7)
- isync
- beq cr7,1f
- stfd fr0,STKFRM-16(r1)
- bl get_fpr
-1: li r9,-EFAULT
-2: stfs fr0,0(r4)
- li r9,0
-3: beq cr7,4f
- lfd fr0,STKFRM-16(r1)
-4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
- mtlr r0
- MTMSRD(r6)
- isync
- mr r3,r9
- addi r1,r1,STKFRM
- blr
- extab 2b,3b
-
-/* Store FP reg N to double at *p. N is in r3, p in r4. */
-_GLOBAL(do_stfd)
- PPC_STLU r1,-STKFRM(r1)
+#ifdef CONFIG_ALTIVEC
+/* Get the contents of vrN into *p; N is in r3 and p is in r4. */
+_GLOBAL(get_vr)
mflr r0
- PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1)
mfmsr r6
- ori r7,r6,MSR_FP
- cmpwi cr7,r3,0
+ oris r7, r6, MSR_VEC@h
MTMSRD(r7)
isync
- beq cr7,1f
- stfd fr0,STKFRM-16(r1)
- bl get_fpr
-1: li r9,-EFAULT
-2: stfd fr0,0(r4)
- li r9,0
-3: beq cr7,4f
- lfd fr0,STKFRM-16(r1)
-4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
- mtlr r0
- MTMSRD(r6)
- isync
- mr r3,r9
- addi r1,r1,STKFRM
- blr
- extab 2b,3b
-
-#ifdef CONFIG_ALTIVEC
-/* Get the contents of vrN into vr0; N is in r3. */
-_GLOBAL(get_vr)
- mflr r0
rlwinm r3,r3,3,0xf8
bcl 20,31,1f
- blr /* vr0 is already in vr0 */
- nop
-reg = 1
- .rept 31
- vor vr0,reg,reg /* assembler doesn't know vmr? */
- blr
+reg = 0
+ .rept 32
+ stvx reg, 0, r4
+ b 2f
reg = reg + 1
.endr
1: mflr r5
@@ -202,18 +85,23 @@ reg = reg + 1
mtctr r5
mtlr r0
bctr
+2: MTMSRD(r6)
+ isync
+ blr
-/* Put the contents of vr0 into vrN; N is in r3. */
+/* Put the contents of *p into vrN; N is in r3 and p is in r4. */
_GLOBAL(put_vr)
mflr r0
+ mfmsr r6
+ oris r7, r6, MSR_VEC@h
+ MTMSRD(r7)
+ isync
rlwinm r3,r3,3,0xf8
bcl 20,31,1f
- blr /* vr0 is already in vr0 */
- nop
-reg = 1
- .rept 31
- vor reg,vr0,vr0
- blr
+reg = 0
+ .rept 32
+ lvx reg, 0, r4
+ b 2f
reg = reg + 1
.endr
1: mflr r5
@@ -221,71 +109,18 @@ reg = reg + 1
mtctr r5
mtlr r0
bctr
-
-/* Load vector reg N from *p. N is in r3, p in r4. */
-_GLOBAL(do_lvx)
- PPC_STLU r1,-STKFRM(r1)
- mflr r0
- PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1)
- mfmsr r6
- oris r7,r6,MSR_VEC@h
- cmpwi cr7,r3,0
- li r8,STKFRM-16
- MTMSRD(r7)
- isync
- beq cr7,1f
- stvx vr0,r1,r8
-1: li r9,-EFAULT
-2: lvx vr0,0,r4
- li r9,0
-3: beq cr7,4f
- bl put_vr
- lvx vr0,r1,r8
-4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
- mtlr r0
- MTMSRD(r6)
- isync
- mr r3,r9
- addi r1,r1,STKFRM
- blr
- extab 2b,3b
-
-/* Store vector reg N to *p. N is in r3, p in r4. */
-_GLOBAL(do_stvx)
- PPC_STLU r1,-STKFRM(r1)
- mflr r0
- PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1)
- mfmsr r6
- oris r7,r6,MSR_VEC@h
- cmpwi cr7,r3,0
- li r8,STKFRM-16
- MTMSRD(r7)
- isync
- beq cr7,1f
- stvx vr0,r1,r8
- bl get_vr
-1: li r9,-EFAULT
-2: stvx vr0,0,r4
- li r9,0
-3: beq cr7,4f
- lvx vr0,r1,r8
-4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
- mtlr r0
- MTMSRD(r6)
+2: MTMSRD(r6)
isync
- mr r3,r9
- addi r1,r1,STKFRM
blr
- extab 2b,3b
#endif /* CONFIG_ALTIVEC */
#ifdef CONFIG_VSX
-/* Get the contents of vsrN into vsr0; N is in r3. */
+/* Get the contents of vsN into vs0; N is in r3. */
_GLOBAL(get_vsr)
mflr r0
rlwinm r3,r3,3,0x1f8
bcl 20,31,1f
- blr /* vsr0 is already in vsr0 */
+ blr /* vs0 is already in vs0 */
nop
reg = 1
.rept 63
@@ -299,12 +134,12 @@ reg = reg + 1
mtlr r0
bctr
-/* Put the contents of vsr0 into vsrN; N is in r3. */
+/* Put the contents of vs0 into vsN; N is in r3. */
_GLOBAL(put_vsr)
mflr r0
rlwinm r3,r3,3,0x1f8
bcl 20,31,1f
- blr /* vr0 is already in vr0 */
+ blr /* v0 is already in v0 */
nop
reg = 1
.rept 63
@@ -319,7 +154,7 @@ reg = reg + 1
bctr
/* Load VSX reg N from vector doubleword *p. N is in r3, p in r4. */
-_GLOBAL(do_lxvd2x)
+_GLOBAL(load_vsrn)
PPC_STLU r1,-STKFRM(r1)
mflr r0
PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1)
@@ -331,49 +166,72 @@ _GLOBAL(do_lxvd2x)
isync
beq cr7,1f
STXVD2X(0,R1,R8)
-1: li r9,-EFAULT
-2: LXVD2X(0,R0,R4)
- li r9,0
-3: beq cr7,4f
+1: LXVD2X(0,R0,R4)
+#ifdef __LITTLE_ENDIAN__
+ XXSWAPD(0,0)
+#endif
+ beq cr7,4f
bl put_vsr
LXVD2X(0,R1,R8)
4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
mtlr r0
MTMSRD(r6)
isync
- mr r3,r9
addi r1,r1,STKFRM
blr
- extab 2b,3b
/* Store VSX reg N to vector doubleword *p. N is in r3, p in r4. */
-_GLOBAL(do_stxvd2x)
+_GLOBAL(store_vsrn)
PPC_STLU r1,-STKFRM(r1)
mflr r0
PPC_STL r0,STKFRM+PPC_LR_STKOFF(r1)
mfmsr r6
oris r7,r6,MSR_VSX@h
- cmpwi cr7,r3,0
li r8,STKFRM-16
MTMSRD(r7)
isync
- beq cr7,1f
STXVD2X(0,R1,R8)
bl get_vsr
-1: li r9,-EFAULT
-2: STXVD2X(0,R0,R4)
- li r9,0
-3: beq cr7,4f
+#ifdef __LITTLE_ENDIAN__
+ XXSWAPD(0,0)
+#endif
+ STXVD2X(0,R0,R4)
LXVD2X(0,R1,R8)
-4: PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
+ PPC_LL r0,STKFRM+PPC_LR_STKOFF(r1)
mtlr r0
MTMSRD(r6)
isync
mr r3,r9
addi r1,r1,STKFRM
blr
- extab 2b,3b
-
#endif /* CONFIG_VSX */
-#endif /* CONFIG_PPC_FPU */
+/* Convert single-precision to double, without disturbing FPRs. */
+/* conv_sp_to_dp(float *sp, double *dp) */
+_GLOBAL(conv_sp_to_dp)
+ mfmsr r6
+ ori r7, r6, MSR_FP
+ MTMSRD(r7)
+ isync
+ stfd fr0, -16(r1)
+ lfs fr0, 0(r3)
+ stfd fr0, 0(r4)
+ lfd fr0, -16(r1)
+ MTMSRD(r6)
+ isync
+ blr
+
+/* Convert single-precision to double, without disturbing FPRs. */
+/* conv_sp_to_dp(double *dp, float *sp) */
+_GLOBAL(conv_dp_to_sp)
+ mfmsr r6
+ ori r7, r6, MSR_FP
+ MTMSRD(r7)
+ isync
+ stfd fr0, -16(r1)
+ lfd fr0, 0(r3)
+ stfs fr0, 0(r4)
+ lfd fr0, -16(r1)
+ MTMSRD(r6)
+ isync
+ blr
diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
index 170a0346f756..04165b7a163f 100644
--- a/arch/powerpc/lib/locks.c
+++ b/arch/powerpc/lib/locks.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Spin and read/write lock operations.
*
@@ -5,17 +6,11 @@
* Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
* Copyright (C) 2002 Dave Engebretsen <engebret@us.ibm.com>, IBM
* Rework to support virtual processors
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/spinlock.h>
#include <linux/export.h>
-#include <linux/stringify.h>
#include <linux/smp.h>
/* waiting for a spinlock... */
@@ -23,7 +18,7 @@
#include <asm/hvcall.h>
#include <asm/smp.h>
-void __spin_yield(arch_spinlock_t *lock)
+void splpar_spin_yield(arch_spinlock_t *lock)
{
unsigned int lock_value, holder_cpu, yield_count;
@@ -32,22 +27,23 @@ void __spin_yield(arch_spinlock_t *lock)
return;
holder_cpu = lock_value & 0xffff;
BUG_ON(holder_cpu >= NR_CPUS);
- yield_count = be32_to_cpu(lppaca_of(holder_cpu).yield_count);
+
+ yield_count = yield_count_of(holder_cpu);
if ((yield_count & 1) == 0)
return; /* virtual cpu is currently running */
rmb();
if (lock->slock != lock_value)
return; /* something has changed */
- plpar_hcall_norets(H_CONFER,
- get_hard_smp_processor_id(holder_cpu), yield_count);
+ yield_to_preempted(holder_cpu, yield_count);
}
+EXPORT_SYMBOL_GPL(splpar_spin_yield);
/*
* Waiting for a read lock or a write lock on a rwlock...
* This turns out to be the same for read and write locks, since
* we only know the holder if it is write-locked.
*/
-void __rw_yield(arch_rwlock_t *rw)
+void splpar_rw_yield(arch_rwlock_t *rw)
{
int lock_value;
unsigned int holder_cpu, yield_count;
@@ -57,29 +53,13 @@ void __rw_yield(arch_rwlock_t *rw)
return; /* no write lock at present */
holder_cpu = lock_value & 0xffff;
BUG_ON(holder_cpu >= NR_CPUS);
- yield_count = be32_to_cpu(lppaca_of(holder_cpu).yield_count);
+
+ yield_count = yield_count_of(holder_cpu);
if ((yield_count & 1) == 0)
return; /* virtual cpu is currently running */
rmb();
if (rw->lock != lock_value)
return; /* something has changed */
- plpar_hcall_norets(H_CONFER,
- get_hard_smp_processor_id(holder_cpu), yield_count);
+ yield_to_preempted(holder_cpu, yield_count);
}
#endif
-
-void arch_spin_unlock_wait(arch_spinlock_t *lock)
-{
- smp_mb();
-
- while (lock->slock) {
- HMT_low();
- if (SHARED_PROCESSOR)
- __spin_yield(lock);
- }
- HMT_medium();
-
- smp_mb();
-}
-
-EXPORT_SYMBOL(arch_spin_unlock_wait);
diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S
index 43435c6892fb..6fd06cd20faa 100644
--- a/arch/powerpc/lib/mem_64.S
+++ b/arch/powerpc/lib/mem_64.S
@@ -1,28 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* String handling functions for PowerPC.
*
* Copyright (C) 1996 Paul Mackerras.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
+#include <linux/export.h>
#include <asm/processor.h>
#include <asm/errno.h>
#include <asm/ppc_asm.h>
+#include <asm/kasan.h>
+
+#ifndef CONFIG_KASAN
+_GLOBAL(__memset16)
+ rlwimi r4,r4,16,0,15
+ /* fall through */
+
+_GLOBAL(__memset32)
+ rldimi r4,r4,32,0
+ /* fall through */
-_GLOBAL(memset)
+_GLOBAL(__memset64)
+ neg r0,r3
+ andi. r0,r0,7
+ cmplw cr1,r5,r0
+ b .Lms
+EXPORT_SYMBOL(__memset16)
+EXPORT_SYMBOL(__memset32)
+EXPORT_SYMBOL(__memset64)
+#endif
+
+_GLOBAL_KASAN(memset)
neg r0,r3
rlwimi r4,r4,8,16,23
andi. r0,r0,7 /* # bytes to be 8-byte aligned */
rlwimi r4,r4,16,0,15
cmplw cr1,r5,r0 /* do we get that far? */
rldimi r4,r4,32,0
- PPC_MTOCRF(1,r0)
+.Lms: PPC_MTOCRF(1,r0)
mr r6,r3
blt cr1,8f
- beq+ 3f /* if already 8-byte aligned */
+ beq 3f /* if already 8-byte aligned */
subf r5,r0,r5
bf 31,1f
stb r4,0(r6)
@@ -37,6 +54,7 @@ _GLOBAL(memset)
clrldi r5,r5,58
mtctr r0
beq 5f
+ .balign 16
4: std r4,0(r6)
std r4,8(r6)
std r4,16(r6)
@@ -66,7 +84,7 @@ _GLOBAL(memset)
addi r6,r6,8
8: cmpwi r5,0
PPC_MTOCRF(1,r5)
- beqlr+
+ beqlr
bf 29,9f
stw r4,0(r6)
addi r6,r6,4
@@ -76,8 +94,10 @@ _GLOBAL(memset)
10: bflr 31
stb r4,0(r6)
blr
+EXPORT_SYMBOL(memset)
+EXPORT_SYMBOL_KASAN(memset)
-_GLOBAL_TOC(memmove)
+_GLOBAL_TOC_KASAN(memmove)
cmplw 0,r3,r4
bgt backwards_memcpy
b memcpy
@@ -90,6 +110,7 @@ _GLOBAL(backwards_memcpy)
andi. r0,r6,3
mtctr r7
bne 5f
+ .balign 16
1: lwz r7,-4(r4)
lwzu r8,-8(r4)
stw r7,-4(r6)
@@ -117,3 +138,5 @@ _GLOBAL(backwards_memcpy)
beq 2b
mtctr r7
b 1b
+EXPORT_SYMBOL(memmove)
+EXPORT_SYMBOL_KASAN(memmove)
diff --git a/arch/powerpc/lib/memcmp_32.S b/arch/powerpc/lib/memcmp_32.S
new file mode 100644
index 000000000000..f6fca5664e91
--- /dev/null
+++ b/arch/powerpc/lib/memcmp_32.S
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * memcmp for PowerPC32
+ *
+ * Copyright (C) 1996 Paul Mackerras.
+ *
+ */
+
+#include <linux/export.h>
+#include <asm/ppc_asm.h>
+
+ .text
+
+_GLOBAL(memcmp)
+ srawi. r7, r5, 2 /* Divide len by 4 */
+ mr r6, r3
+ beq- 3f
+ mtctr r7
+ li r7, 0
+1: lwzx r3, r6, r7
+ lwzx r0, r4, r7
+ addi r7, r7, 4
+ cmplw cr0, r3, r0
+ bdnzt eq, 1b
+ bne 5f
+3: andi. r3, r5, 3
+ beqlr
+ cmplwi cr1, r3, 2
+ blt- cr1, 4f
+ lhzx r3, r6, r7
+ lhzx r0, r4, r7
+ addi r7, r7, 2
+ subf. r3, r0, r3
+ beqlr cr1
+ bnelr
+4: lbzx r3, r6, r7
+ lbzx r0, r4, r7
+ subf. r3, r0, r3
+ blr
+5: li r3, 1
+ bgtlr
+ li r3, -1
+ blr
+EXPORT_SYMBOL(memcmp)
diff --git a/arch/powerpc/lib/memcmp_64.S b/arch/powerpc/lib/memcmp_64.S
new file mode 100644
index 000000000000..142c666d3897
--- /dev/null
+++ b/arch/powerpc/lib/memcmp_64.S
@@ -0,0 +1,638 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ * Copyright 2015 IBM Corporation.
+ */
+#include <linux/export.h>
+#include <asm/ppc_asm.h>
+#include <asm/ppc-opcode.h>
+
+#define off8 r6
+#define off16 r7
+#define off24 r8
+
+#define rA r9
+#define rB r10
+#define rC r11
+#define rD r27
+#define rE r28
+#define rF r29
+#define rG r30
+#define rH r31
+
+#ifdef __LITTLE_ENDIAN__
+#define LH lhbrx
+#define LW lwbrx
+#define LD ldbrx
+#define LVS lvsr
+#define VPERM(_VRT,_VRA,_VRB,_VRC) \
+ vperm _VRT,_VRB,_VRA,_VRC
+#else
+#define LH lhzx
+#define LW lwzx
+#define LD ldx
+#define LVS lvsl
+#define VPERM(_VRT,_VRA,_VRB,_VRC) \
+ vperm _VRT,_VRA,_VRB,_VRC
+#endif
+
+#define VMX_THRESH 4096
+#define ENTER_VMX_OPS \
+ mflr r0; \
+ std r3,-STACKFRAMESIZE+STK_REG(R31)(r1); \
+ std r4,-STACKFRAMESIZE+STK_REG(R30)(r1); \
+ std r5,-STACKFRAMESIZE+STK_REG(R29)(r1); \
+ std r0,16(r1); \
+ stdu r1,-STACKFRAMESIZE(r1); \
+ bl CFUNC(enter_vmx_ops); \
+ cmpwi cr1,r3,0; \
+ ld r0,STACKFRAMESIZE+16(r1); \
+ ld r3,STK_REG(R31)(r1); \
+ ld r4,STK_REG(R30)(r1); \
+ ld r5,STK_REG(R29)(r1); \
+ addi r1,r1,STACKFRAMESIZE; \
+ mtlr r0
+
+#define EXIT_VMX_OPS \
+ mflr r0; \
+ std r3,-STACKFRAMESIZE+STK_REG(R31)(r1); \
+ std r4,-STACKFRAMESIZE+STK_REG(R30)(r1); \
+ std r5,-STACKFRAMESIZE+STK_REG(R29)(r1); \
+ std r0,16(r1); \
+ stdu r1,-STACKFRAMESIZE(r1); \
+ bl CFUNC(exit_vmx_ops); \
+ ld r0,STACKFRAMESIZE+16(r1); \
+ ld r3,STK_REG(R31)(r1); \
+ ld r4,STK_REG(R30)(r1); \
+ ld r5,STK_REG(R29)(r1); \
+ addi r1,r1,STACKFRAMESIZE; \
+ mtlr r0
+
+/*
+ * LD_VSR_CROSS16B load the 2nd 16 bytes for _vaddr which is unaligned with
+ * 16 bytes boundary and permute the result with the 1st 16 bytes.
+
+ * | y y y y y y y y y y y y y 0 1 2 | 3 4 5 6 7 8 9 a b c d e f z z z |
+ * ^ ^ ^
+ * 0xbbbb10 0xbbbb20 0xbbb30
+ * ^
+ * _vaddr
+ *
+ *
+ * _vmask is the mask generated by LVS
+ * _v1st_qw is the 1st aligned QW of current addr which is already loaded.
+ * for example: 0xyyyyyyyyyyyyy012 for big endian
+ * _v2nd_qw is the 2nd aligned QW of cur _vaddr to be loaded.
+ * for example: 0x3456789abcdefzzz for big endian
+ * The permute result is saved in _v_res.
+ * for example: 0x0123456789abcdef for big endian.
+ */
+#define LD_VSR_CROSS16B(_vaddr,_vmask,_v1st_qw,_v2nd_qw,_v_res) \
+ lvx _v2nd_qw,_vaddr,off16; \
+ VPERM(_v_res,_v1st_qw,_v2nd_qw,_vmask)
+
+/*
+ * There are 2 categories for memcmp:
+ * 1) src/dst has the same offset to the 8 bytes boundary. The handlers
+ * are named like .Lsameoffset_xxxx
+ * 2) src/dst has different offset to the 8 bytes boundary. The handlers
+ * are named like .Ldiffoffset_xxxx
+ */
+_GLOBAL_TOC(memcmp)
+ cmpdi cr1,r5,0
+
+ /* Use the short loop if the src/dst addresses are not
+ * with the same offset of 8 bytes align boundary.
+ */
+ xor r6,r3,r4
+ andi. r6,r6,7
+
+ /* Fall back to short loop if compare at aligned addrs
+ * with less than 8 bytes.
+ */
+ cmpdi cr6,r5,7
+
+ beq cr1,.Lzero
+ bgt cr6,.Lno_short
+
+.Lshort:
+ mtctr r5
+1: lbz rA,0(r3)
+ lbz rB,0(r4)
+ subf. rC,rB,rA
+ bne .Lnon_zero
+ bdz .Lzero
+
+ lbz rA,1(r3)
+ lbz rB,1(r4)
+ subf. rC,rB,rA
+ bne .Lnon_zero
+ bdz .Lzero
+
+ lbz rA,2(r3)
+ lbz rB,2(r4)
+ subf. rC,rB,rA
+ bne .Lnon_zero
+ bdz .Lzero
+
+ lbz rA,3(r3)
+ lbz rB,3(r4)
+ subf. rC,rB,rA
+ bne .Lnon_zero
+
+ addi r3,r3,4
+ addi r4,r4,4
+
+ bdnz 1b
+
+.Lzero:
+ li r3,0
+ blr
+
+.Lno_short:
+ dcbt 0,r3
+ dcbt 0,r4
+ bne .Ldiffoffset_8bytes_make_align_start
+
+
+.Lsameoffset_8bytes_make_align_start:
+ /* attempt to compare bytes not aligned with 8 bytes so that
+ * rest comparison can run based on 8 bytes alignment.
+ */
+ andi. r6,r3,7
+
+ /* Try to compare the first double word which is not 8 bytes aligned:
+ * load the first double word at (src & ~7UL) and shift left appropriate
+ * bits before comparision.
+ */
+ rlwinm r6,r3,3,26,28
+ beq .Lsameoffset_8bytes_aligned
+ clrrdi r3,r3,3
+ clrrdi r4,r4,3
+ LD rA,0,r3
+ LD rB,0,r4
+ sld rA,rA,r6
+ sld rB,rB,r6
+ cmpld cr0,rA,rB
+ srwi r6,r6,3
+ bne cr0,.LcmpAB_lightweight
+ subfic r6,r6,8
+ subf. r5,r6,r5
+ addi r3,r3,8
+ addi r4,r4,8
+ beq .Lzero
+
+.Lsameoffset_8bytes_aligned:
+ /* now we are aligned with 8 bytes.
+ * Use .Llong loop if left cmp bytes are equal or greater than 32B.
+ */
+ cmpdi cr6,r5,31
+ bgt cr6,.Llong
+
+.Lcmp_lt32bytes:
+ /* compare 1 ~ 31 bytes, at least r3 addr is 8 bytes aligned now */
+ cmpdi cr5,r5,7
+ srdi r0,r5,3
+ ble cr5,.Lcmp_rest_lt8bytes
+
+ /* handle 8 ~ 31 bytes */
+ clrldi r5,r5,61
+ mtctr r0
+2:
+ LD rA,0,r3
+ LD rB,0,r4
+ cmpld cr0,rA,rB
+ addi r3,r3,8
+ addi r4,r4,8
+ bne cr0,.LcmpAB_lightweight
+ bdnz 2b
+
+ cmpwi r5,0
+ beq .Lzero
+
+.Lcmp_rest_lt8bytes:
+ /*
+ * Here we have less than 8 bytes to compare. At least s1 is aligned to
+ * 8 bytes, but s2 may not be. We must make sure s2 + 7 doesn't cross a
+ * page boundary, otherwise we might read past the end of the buffer and
+ * trigger a page fault. We use 4K as the conservative minimum page
+ * size. If we detect that case we go to the byte-by-byte loop.
+ *
+ * Otherwise the next double word is loaded from s1 and s2, and shifted
+ * right to compare the appropriate bits.
+ */
+ clrldi r6,r4,(64-12) // r6 = r4 & 0xfff
+ cmpdi r6,0xff8
+ bgt .Lshort
+
+ subfic r6,r5,8
+ slwi r6,r6,3
+ LD rA,0,r3
+ LD rB,0,r4
+ srd rA,rA,r6
+ srd rB,rB,r6
+ cmpld cr0,rA,rB
+ bne cr0,.LcmpAB_lightweight
+ b .Lzero
+
+.Lnon_zero:
+ mr r3,rC
+ blr
+
+.Llong:
+#ifdef CONFIG_ALTIVEC
+BEGIN_FTR_SECTION
+ /* Try to use vmx loop if length is equal or greater than 4K */
+ cmpldi cr6,r5,VMX_THRESH
+ bge cr6,.Lsameoffset_vmx_cmp
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+
+.Llong_novmx_cmp:
+#endif
+ /* At least s1 addr is aligned with 8 bytes */
+ li off8,8
+ li off16,16
+ li off24,24
+
+ std r31,-8(r1)
+ std r30,-16(r1)
+ std r29,-24(r1)
+ std r28,-32(r1)
+ std r27,-40(r1)
+
+ srdi r0,r5,5
+ mtctr r0
+ andi. r5,r5,31
+
+ LD rA,0,r3
+ LD rB,0,r4
+
+ LD rC,off8,r3
+ LD rD,off8,r4
+
+ LD rE,off16,r3
+ LD rF,off16,r4
+
+ LD rG,off24,r3
+ LD rH,off24,r4
+ cmpld cr0,rA,rB
+
+ addi r3,r3,32
+ addi r4,r4,32
+
+ bdz .Lfirst32
+
+ LD rA,0,r3
+ LD rB,0,r4
+ cmpld cr1,rC,rD
+
+ LD rC,off8,r3
+ LD rD,off8,r4
+ cmpld cr6,rE,rF
+
+ LD rE,off16,r3
+ LD rF,off16,r4
+ cmpld cr7,rG,rH
+ bne cr0,.LcmpAB
+
+ LD rG,off24,r3
+ LD rH,off24,r4
+ cmpld cr0,rA,rB
+ bne cr1,.LcmpCD
+
+ addi r3,r3,32
+ addi r4,r4,32
+
+ bdz .Lsecond32
+
+ .balign 16
+
+1: LD rA,0,r3
+ LD rB,0,r4
+ cmpld cr1,rC,rD
+ bne cr6,.LcmpEF
+
+ LD rC,off8,r3
+ LD rD,off8,r4
+ cmpld cr6,rE,rF
+ bne cr7,.LcmpGH
+
+ LD rE,off16,r3
+ LD rF,off16,r4
+ cmpld cr7,rG,rH
+ bne cr0,.LcmpAB
+
+ LD rG,off24,r3
+ LD rH,off24,r4
+ cmpld cr0,rA,rB
+ bne cr1,.LcmpCD
+
+ addi r3,r3,32
+ addi r4,r4,32
+
+ bdnz 1b
+
+.Lsecond32:
+ cmpld cr1,rC,rD
+ bne cr6,.LcmpEF
+
+ cmpld cr6,rE,rF
+ bne cr7,.LcmpGH
+
+ cmpld cr7,rG,rH
+ bne cr0,.LcmpAB
+
+ bne cr1,.LcmpCD
+ bne cr6,.LcmpEF
+ bne cr7,.LcmpGH
+
+.Ltail:
+ ld r31,-8(r1)
+ ld r30,-16(r1)
+ ld r29,-24(r1)
+ ld r28,-32(r1)
+ ld r27,-40(r1)
+
+ cmpdi r5,0
+ beq .Lzero
+ b .Lshort
+
+.Lfirst32:
+ cmpld cr1,rC,rD
+ cmpld cr6,rE,rF
+ cmpld cr7,rG,rH
+
+ bne cr0,.LcmpAB
+ bne cr1,.LcmpCD
+ bne cr6,.LcmpEF
+ bne cr7,.LcmpGH
+
+ b .Ltail
+
+.LcmpAB:
+ li r3,1
+ bgt cr0,.Lout
+ li r3,-1
+ b .Lout
+
+.LcmpCD:
+ li r3,1
+ bgt cr1,.Lout
+ li r3,-1
+ b .Lout
+
+.LcmpEF:
+ li r3,1
+ bgt cr6,.Lout
+ li r3,-1
+ b .Lout
+
+.LcmpGH:
+ li r3,1
+ bgt cr7,.Lout
+ li r3,-1
+
+.Lout:
+ ld r31,-8(r1)
+ ld r30,-16(r1)
+ ld r29,-24(r1)
+ ld r28,-32(r1)
+ ld r27,-40(r1)
+ blr
+
+.LcmpAB_lightweight: /* skip NV GPRS restore */
+ li r3,1
+ bgtlr
+ li r3,-1
+ blr
+
+#ifdef CONFIG_ALTIVEC
+.Lsameoffset_vmx_cmp:
+ /* Enter with src/dst addrs has the same offset with 8 bytes
+ * align boundary.
+ *
+ * There is an optimization based on following fact: memcmp()
+ * prones to fail early at the first 32 bytes.
+ * Before applying VMX instructions which will lead to 32x128bits
+ * VMX regs load/restore penalty, we compare the first 32 bytes
+ * so that we can catch the ~80% fail cases.
+ */
+
+ li r0,4
+ mtctr r0
+.Lsameoffset_prechk_32B_loop:
+ LD rA,0,r3
+ LD rB,0,r4
+ cmpld cr0,rA,rB
+ addi r3,r3,8
+ addi r4,r4,8
+ bne cr0,.LcmpAB_lightweight
+ addi r5,r5,-8
+ bdnz .Lsameoffset_prechk_32B_loop
+
+ ENTER_VMX_OPS
+ beq cr1,.Llong_novmx_cmp
+
+3:
+ /* need to check whether r4 has the same offset with r3
+ * for 16 bytes boundary.
+ */
+ xor r0,r3,r4
+ andi. r0,r0,0xf
+ bne .Ldiffoffset_vmx_cmp_start
+
+ /* len is no less than 4KB. Need to align with 16 bytes further.
+ */
+ andi. rA,r3,8
+ LD rA,0,r3
+ beq 4f
+ LD rB,0,r4
+ cmpld cr0,rA,rB
+ addi r3,r3,8
+ addi r4,r4,8
+ addi r5,r5,-8
+
+ beq cr0,4f
+ /* save and restore cr0 */
+ mfocrf r5,128
+ EXIT_VMX_OPS
+ mtocrf 128,r5
+ b .LcmpAB_lightweight
+
+4:
+ /* compare 32 bytes for each loop */
+ srdi r0,r5,5
+ mtctr r0
+ clrldi r5,r5,59
+ li off16,16
+
+.balign 16
+5:
+ lvx v0,0,r3
+ lvx v1,0,r4
+ VCMPEQUD_RC(v0,v0,v1)
+ bnl cr6,7f
+ lvx v0,off16,r3
+ lvx v1,off16,r4
+ VCMPEQUD_RC(v0,v0,v1)
+ bnl cr6,6f
+ addi r3,r3,32
+ addi r4,r4,32
+ bdnz 5b
+
+ EXIT_VMX_OPS
+ cmpdi r5,0
+ beq .Lzero
+ b .Lcmp_lt32bytes
+
+6:
+ addi r3,r3,16
+ addi r4,r4,16
+
+7:
+ /* diff the last 16 bytes */
+ EXIT_VMX_OPS
+ LD rA,0,r3
+ LD rB,0,r4
+ cmpld cr0,rA,rB
+ li off8,8
+ bne cr0,.LcmpAB_lightweight
+
+ LD rA,off8,r3
+ LD rB,off8,r4
+ cmpld cr0,rA,rB
+ bne cr0,.LcmpAB_lightweight
+ b .Lzero
+#endif
+
+.Ldiffoffset_8bytes_make_align_start:
+ /* now try to align s1 with 8 bytes */
+ rlwinm r6,r3,3,26,28
+ beq .Ldiffoffset_align_s1_8bytes
+
+ clrrdi r3,r3,3
+ LD rA,0,r3
+ LD rB,0,r4 /* unaligned load */
+ sld rA,rA,r6
+ srd rA,rA,r6
+ srd rB,rB,r6
+ cmpld cr0,rA,rB
+ srwi r6,r6,3
+ bne cr0,.LcmpAB_lightweight
+
+ subfic r6,r6,8
+ subf. r5,r6,r5
+ addi r3,r3,8
+ add r4,r4,r6
+
+ beq .Lzero
+
+.Ldiffoffset_align_s1_8bytes:
+ /* now s1 is aligned with 8 bytes. */
+#ifdef CONFIG_ALTIVEC
+BEGIN_FTR_SECTION
+ /* only do vmx ops when the size equal or greater than 4K bytes */
+ cmpdi cr5,r5,VMX_THRESH
+ bge cr5,.Ldiffoffset_vmx_cmp
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+
+.Ldiffoffset_novmx_cmp:
+#endif
+
+
+ cmpdi cr5,r5,31
+ ble cr5,.Lcmp_lt32bytes
+
+#ifdef CONFIG_ALTIVEC
+ b .Llong_novmx_cmp
+#else
+ b .Llong
+#endif
+
+#ifdef CONFIG_ALTIVEC
+.Ldiffoffset_vmx_cmp:
+ /* perform a 32 bytes pre-checking before
+ * enable VMX operations.
+ */
+ li r0,4
+ mtctr r0
+.Ldiffoffset_prechk_32B_loop:
+ LD rA,0,r3
+ LD rB,0,r4
+ cmpld cr0,rA,rB
+ addi r3,r3,8
+ addi r4,r4,8
+ bne cr0,.LcmpAB_lightweight
+ addi r5,r5,-8
+ bdnz .Ldiffoffset_prechk_32B_loop
+
+ ENTER_VMX_OPS
+ beq cr1,.Ldiffoffset_novmx_cmp
+
+.Ldiffoffset_vmx_cmp_start:
+ /* Firstly try to align r3 with 16 bytes */
+ andi. r6,r3,0xf
+ li off16,16
+ beq .Ldiffoffset_vmx_s1_16bytes_align
+
+ LVS v3,0,r3
+ LVS v4,0,r4
+
+ lvx v5,0,r3
+ lvx v6,0,r4
+ LD_VSR_CROSS16B(r3,v3,v5,v7,v9)
+ LD_VSR_CROSS16B(r4,v4,v6,v8,v10)
+
+ VCMPEQUB_RC(v7,v9,v10)
+ bnl cr6,.Ldiffoffset_vmx_diff_found
+
+ subfic r6,r6,16
+ subf r5,r6,r5
+ add r3,r3,r6
+ add r4,r4,r6
+
+.Ldiffoffset_vmx_s1_16bytes_align:
+ /* now s1 is aligned with 16 bytes */
+ lvx v6,0,r4
+ LVS v4,0,r4
+ srdi r6,r5,5 /* loop for 32 bytes each */
+ clrldi r5,r5,59
+ mtctr r6
+
+.balign 16
+.Ldiffoffset_vmx_32bytesloop:
+ /* the first qw of r4 was saved in v6 */
+ lvx v9,0,r3
+ LD_VSR_CROSS16B(r4,v4,v6,v8,v10)
+ VCMPEQUB_RC(v7,v9,v10)
+ vor v6,v8,v8
+ bnl cr6,.Ldiffoffset_vmx_diff_found
+
+ addi r3,r3,16
+ addi r4,r4,16
+
+ lvx v9,0,r3
+ LD_VSR_CROSS16B(r4,v4,v6,v8,v10)
+ VCMPEQUB_RC(v7,v9,v10)
+ vor v6,v8,v8
+ bnl cr6,.Ldiffoffset_vmx_diff_found
+
+ addi r3,r3,16
+ addi r4,r4,16
+
+ bdnz .Ldiffoffset_vmx_32bytesloop
+
+ EXIT_VMX_OPS
+
+ cmpdi r5,0
+ beq .Lzero
+ b .Lcmp_lt32bytes
+
+.Ldiffoffset_vmx_diff_found:
+ EXIT_VMX_OPS
+ /* anyway, the diff will appear in next 16 bytes */
+ li r5,16
+ b .Lcmp_lt32bytes
+
+#endif
+EXPORT_SYMBOL(memcmp)
diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S
index 32a06ec395d2..b5a67e20143f 100644
--- a/arch/powerpc/lib/memcpy_64.S
+++ b/arch/powerpc/lib/memcpy_64.S
@@ -1,16 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (C) 2002 Paul Mackerras, IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
+#include <linux/export.h>
#include <asm/processor.h>
#include <asm/ppc_asm.h>
+#include <asm/asm-compat.h>
+#include <asm/feature-fixups.h>
+#include <asm/kasan.h>
+
+#ifndef SELFTEST_CASE
+/* For big-endian, 0 == most CPUs, 1 == POWER6, 2 == Cell */
+#define SELFTEST_CASE 0
+#endif
.align 7
-_GLOBAL_TOC(memcpy)
+_GLOBAL_TOC_KASAN(memcpy)
BEGIN_FTR_SECTION
#ifdef __LITTLE_ENDIAN__
cmpdi cr7,r5,0
@@ -18,7 +23,7 @@ BEGIN_FTR_SECTION
std r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* save destination pointer for return value */
#endif
FTR_SECTION_ELSE
-#ifndef SELFTEST
+#ifdef CONFIG_PPC_BOOK3S_64
b memcpy_power7
#endif
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
@@ -44,6 +49,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
cleared.
At the time of writing the only CPU that has this combination of bits
set is Power6. */
+test_feature = (SELFTEST_CASE == 1)
BEGIN_FTR_SECTION
nop
FTR_SECTION_ELSE
@@ -52,6 +58,7 @@ ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
CPU_FTR_UNALIGNED_LD_STD)
.Ldst_aligned:
addi r3,r3,-16
+test_feature = (SELFTEST_CASE == 0)
BEGIN_FTR_SECTION
andi. r0,r4,7
bne .Lsrc_unaligned
@@ -219,3 +226,5 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
4: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1) /* return dest pointer */
blr
#endif
+EXPORT_SYMBOL(memcpy)
+EXPORT_SYMBOL_KASAN(memcpy)
diff --git a/arch/powerpc/lib/memcpy_power7.S b/arch/powerpc/lib/memcpy_power7.S
index 2ff5c142f87b..b7c5e7fca8b9 100644
--- a/arch/powerpc/lib/memcpy_power7.S
+++ b/arch/powerpc/lib/memcpy_power7.S
@@ -1,17 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright (C) IBM Corporation, 2012
*
@@ -19,7 +7,10 @@
*/
#include <asm/ppc_asm.h>
-_GLOBAL(memcpy_power7)
+#ifndef SELFTEST_CASE
+/* 0 == don't use VMX, 1 == use VMX */
+#define SELFTEST_CASE 0
+#endif
#ifdef __BIG_ENDIAN__
#define LVS(VRT,RA,RB) lvsl VRT,RA,RB
@@ -29,20 +20,17 @@ _GLOBAL(memcpy_power7)
#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRB,VRA,VRC
#endif
-#ifdef CONFIG_ALTIVEC
+_GLOBAL(memcpy_power7)
cmpldi r5,16
cmpldi cr1,r5,4096
-
std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
-
blt .Lshort_copy
- bgt cr1,.Lvmx_copy
-#else
- cmpldi r5,16
-
- std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
- blt .Lshort_copy
+#ifdef CONFIG_ALTIVEC
+test_feature = SELFTEST_CASE
+BEGIN_FTR_SECTION
+ bgt cr1, .Lvmx_copy
+END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
#endif
.Lnonvmx_copy:
@@ -223,14 +211,14 @@ _GLOBAL(memcpy_power7)
addi r1,r1,STACKFRAMESIZE
b .Lnonvmx_copy
-#ifdef CONFIG_ALTIVEC
.Lvmx_copy:
+#ifdef CONFIG_ALTIVEC
mflr r0
std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
std r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
std r0,16(r1)
stdu r1,-STACKFRAMESIZE(r1)
- bl enter_vmx_copy
+ bl CFUNC(enter_vmx_ops)
cmpwi cr1,r3,0
ld r0,STACKFRAMESIZE+16(r1)
ld r3,STK_REG(R31)(r1)
@@ -256,18 +244,7 @@ _GLOBAL(memcpy_power7)
or r7,r7,r0
ori r10,r7,1 /* stream=1 */
- lis r8,0x8000 /* GO=1 */
- clrldi r8,r8,32
-
-.machine push
-.machine "power4"
- dcbt r0,r6,0b01000
- dcbt r0,r7,0b01010
- dcbtst r0,r9,0b01000
- dcbtst r0,r10,0b01010
- eieio
- dcbt r0,r8,0b01010 /* GO */
-.machine pop
+ DCBT_SETUP_STREAMS(r6, r7, r9, r10, r8)
beq cr1,.Lunwind_stack_nonvmx_copy
@@ -321,29 +298,29 @@ _GLOBAL(memcpy_power7)
li r11,48
bf cr7*4+3,5f
- lvx vr1,r0,r4
+ lvx v1,0,r4
addi r4,r4,16
- stvx vr1,r0,r3
+ stvx v1,0,r3
addi r3,r3,16
5: bf cr7*4+2,6f
- lvx vr1,r0,r4
- lvx vr0,r4,r9
+ lvx v1,0,r4
+ lvx v0,r4,r9
addi r4,r4,32
- stvx vr1,r0,r3
- stvx vr0,r3,r9
+ stvx v1,0,r3
+ stvx v0,r3,r9
addi r3,r3,32
6: bf cr7*4+1,7f
- lvx vr3,r0,r4
- lvx vr2,r4,r9
- lvx vr1,r4,r10
- lvx vr0,r4,r11
+ lvx v3,0,r4
+ lvx v2,r4,r9
+ lvx v1,r4,r10
+ lvx v0,r4,r11
addi r4,r4,64
- stvx vr3,r0,r3
- stvx vr2,r3,r9
- stvx vr1,r3,r10
- stvx vr0,r3,r11
+ stvx v3,0,r3
+ stvx v2,r3,r9
+ stvx v1,r3,r10
+ stvx v0,r3,r11
addi r3,r3,64
7: sub r5,r5,r6
@@ -366,23 +343,23 @@ _GLOBAL(memcpy_power7)
*/
.align 5
8:
- lvx vr7,r0,r4
- lvx vr6,r4,r9
- lvx vr5,r4,r10
- lvx vr4,r4,r11
- lvx vr3,r4,r12
- lvx vr2,r4,r14
- lvx vr1,r4,r15
- lvx vr0,r4,r16
+ lvx v7,0,r4
+ lvx v6,r4,r9
+ lvx v5,r4,r10
+ lvx v4,r4,r11
+ lvx v3,r4,r12
+ lvx v2,r4,r14
+ lvx v1,r4,r15
+ lvx v0,r4,r16
addi r4,r4,128
- stvx vr7,r0,r3
- stvx vr6,r3,r9
- stvx vr5,r3,r10
- stvx vr4,r3,r11
- stvx vr3,r3,r12
- stvx vr2,r3,r14
- stvx vr1,r3,r15
- stvx vr0,r3,r16
+ stvx v7,0,r3
+ stvx v6,r3,r9
+ stvx v5,r3,r10
+ stvx v4,r3,r11
+ stvx v3,r3,r12
+ stvx v2,r3,r14
+ stvx v1,r3,r15
+ stvx v0,r3,r16
addi r3,r3,128
bdnz 8b
@@ -396,29 +373,29 @@ _GLOBAL(memcpy_power7)
mtocrf 0x01,r6
bf cr7*4+1,9f
- lvx vr3,r0,r4
- lvx vr2,r4,r9
- lvx vr1,r4,r10
- lvx vr0,r4,r11
+ lvx v3,0,r4
+ lvx v2,r4,r9
+ lvx v1,r4,r10
+ lvx v0,r4,r11
addi r4,r4,64
- stvx vr3,r0,r3
- stvx vr2,r3,r9
- stvx vr1,r3,r10
- stvx vr0,r3,r11
+ stvx v3,0,r3
+ stvx v2,r3,r9
+ stvx v1,r3,r10
+ stvx v0,r3,r11
addi r3,r3,64
9: bf cr7*4+2,10f
- lvx vr1,r0,r4
- lvx vr0,r4,r9
+ lvx v1,0,r4
+ lvx v0,r4,r9
addi r4,r4,32
- stvx vr1,r0,r3
- stvx vr0,r3,r9
+ stvx v1,0,r3
+ stvx v0,r3,r9
addi r3,r3,32
10: bf cr7*4+3,11f
- lvx vr1,r0,r4
+ lvx v1,0,r4
addi r4,r4,16
- stvx vr1,r0,r3
+ stvx v1,0,r3
addi r3,r3,16
/* Up to 15B to go */
@@ -448,7 +425,7 @@ _GLOBAL(memcpy_power7)
15: addi r1,r1,STACKFRAMESIZE
ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
- b exit_vmx_copy /* tail call optimise */
+ b CFUNC(exit_vmx_ops) /* tail call optimise */
.Lvmx_unaligned_copy:
/* Get the destination 16B aligned */
@@ -494,42 +471,42 @@ _GLOBAL(memcpy_power7)
li r10,32
li r11,48
- LVS(vr16,0,r4) /* Setup permute control vector */
- lvx vr0,0,r4
+ LVS(v16,0,r4) /* Setup permute control vector */
+ lvx v0,0,r4
addi r4,r4,16
bf cr7*4+3,5f
- lvx vr1,r0,r4
- VPERM(vr8,vr0,vr1,vr16)
+ lvx v1,0,r4
+ VPERM(v8,v0,v1,v16)
addi r4,r4,16
- stvx vr8,r0,r3
+ stvx v8,0,r3
addi r3,r3,16
- vor vr0,vr1,vr1
+ vor v0,v1,v1
5: bf cr7*4+2,6f
- lvx vr1,r0,r4
- VPERM(vr8,vr0,vr1,vr16)
- lvx vr0,r4,r9
- VPERM(vr9,vr1,vr0,vr16)
+ lvx v1,0,r4
+ VPERM(v8,v0,v1,v16)
+ lvx v0,r4,r9
+ VPERM(v9,v1,v0,v16)
addi r4,r4,32
- stvx vr8,r0,r3
- stvx vr9,r3,r9
+ stvx v8,0,r3
+ stvx v9,r3,r9
addi r3,r3,32
6: bf cr7*4+1,7f
- lvx vr3,r0,r4
- VPERM(vr8,vr0,vr3,vr16)
- lvx vr2,r4,r9
- VPERM(vr9,vr3,vr2,vr16)
- lvx vr1,r4,r10
- VPERM(vr10,vr2,vr1,vr16)
- lvx vr0,r4,r11
- VPERM(vr11,vr1,vr0,vr16)
+ lvx v3,0,r4
+ VPERM(v8,v0,v3,v16)
+ lvx v2,r4,r9
+ VPERM(v9,v3,v2,v16)
+ lvx v1,r4,r10
+ VPERM(v10,v2,v1,v16)
+ lvx v0,r4,r11
+ VPERM(v11,v1,v0,v16)
addi r4,r4,64
- stvx vr8,r0,r3
- stvx vr9,r3,r9
- stvx vr10,r3,r10
- stvx vr11,r3,r11
+ stvx v8,0,r3
+ stvx v9,r3,r9
+ stvx v10,r3,r10
+ stvx v11,r3,r11
addi r3,r3,64
7: sub r5,r5,r6
@@ -552,31 +529,31 @@ _GLOBAL(memcpy_power7)
*/
.align 5
8:
- lvx vr7,r0,r4
- VPERM(vr8,vr0,vr7,vr16)
- lvx vr6,r4,r9
- VPERM(vr9,vr7,vr6,vr16)
- lvx vr5,r4,r10
- VPERM(vr10,vr6,vr5,vr16)
- lvx vr4,r4,r11
- VPERM(vr11,vr5,vr4,vr16)
- lvx vr3,r4,r12
- VPERM(vr12,vr4,vr3,vr16)
- lvx vr2,r4,r14
- VPERM(vr13,vr3,vr2,vr16)
- lvx vr1,r4,r15
- VPERM(vr14,vr2,vr1,vr16)
- lvx vr0,r4,r16
- VPERM(vr15,vr1,vr0,vr16)
+ lvx v7,0,r4
+ VPERM(v8,v0,v7,v16)
+ lvx v6,r4,r9
+ VPERM(v9,v7,v6,v16)
+ lvx v5,r4,r10
+ VPERM(v10,v6,v5,v16)
+ lvx v4,r4,r11
+ VPERM(v11,v5,v4,v16)
+ lvx v3,r4,r12
+ VPERM(v12,v4,v3,v16)
+ lvx v2,r4,r14
+ VPERM(v13,v3,v2,v16)
+ lvx v1,r4,r15
+ VPERM(v14,v2,v1,v16)
+ lvx v0,r4,r16
+ VPERM(v15,v1,v0,v16)
addi r4,r4,128
- stvx vr8,r0,r3
- stvx vr9,r3,r9
- stvx vr10,r3,r10
- stvx vr11,r3,r11
- stvx vr12,r3,r12
- stvx vr13,r3,r14
- stvx vr14,r3,r15
- stvx vr15,r3,r16
+ stvx v8,0,r3
+ stvx v9,r3,r9
+ stvx v10,r3,r10
+ stvx v11,r3,r11
+ stvx v12,r3,r12
+ stvx v13,r3,r14
+ stvx v14,r3,r15
+ stvx v15,r3,r16
addi r3,r3,128
bdnz 8b
@@ -590,36 +567,36 @@ _GLOBAL(memcpy_power7)
mtocrf 0x01,r6
bf cr7*4+1,9f
- lvx vr3,r0,r4
- VPERM(vr8,vr0,vr3,vr16)
- lvx vr2,r4,r9
- VPERM(vr9,vr3,vr2,vr16)
- lvx vr1,r4,r10
- VPERM(vr10,vr2,vr1,vr16)
- lvx vr0,r4,r11
- VPERM(vr11,vr1,vr0,vr16)
+ lvx v3,0,r4
+ VPERM(v8,v0,v3,v16)
+ lvx v2,r4,r9
+ VPERM(v9,v3,v2,v16)
+ lvx v1,r4,r10
+ VPERM(v10,v2,v1,v16)
+ lvx v0,r4,r11
+ VPERM(v11,v1,v0,v16)
addi r4,r4,64
- stvx vr8,r0,r3
- stvx vr9,r3,r9
- stvx vr10,r3,r10
- stvx vr11,r3,r11
+ stvx v8,0,r3
+ stvx v9,r3,r9
+ stvx v10,r3,r10
+ stvx v11,r3,r11
addi r3,r3,64
9: bf cr7*4+2,10f
- lvx vr1,r0,r4
- VPERM(vr8,vr0,vr1,vr16)
- lvx vr0,r4,r9
- VPERM(vr9,vr1,vr0,vr16)
+ lvx v1,0,r4
+ VPERM(v8,v0,v1,v16)
+ lvx v0,r4,r9
+ VPERM(v9,v1,v0,v16)
addi r4,r4,32
- stvx vr8,r0,r3
- stvx vr9,r3,r9
+ stvx v8,0,r3
+ stvx v9,r3,r9
addi r3,r3,32
10: bf cr7*4+3,11f
- lvx vr1,r0,r4
- VPERM(vr8,vr0,vr1,vr16)
+ lvx v1,0,r4
+ VPERM(v8,v0,v1,v16)
addi r4,r4,16
- stvx vr8,r0,r3
+ stvx v8,0,r3
addi r3,r3,16
/* Up to 15B to go */
@@ -652,5 +629,5 @@ _GLOBAL(memcpy_power7)
15: addi r1,r1,STACKFRAMESIZE
ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
- b exit_vmx_copy /* tail call optimise */
-#endif /* CONFiG_ALTIVEC */
+ b CFUNC(exit_vmx_ops) /* tail call optimise */
+#endif /* CONFIG_ALTIVEC */
diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c
new file mode 100644
index 000000000000..4e724c4c01ad
--- /dev/null
+++ b/arch/powerpc/lib/pmem.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright(c) 2017 IBM Corporation. All rights reserved.
+ */
+
+#include <linux/string.h>
+#include <linux/export.h>
+#include <linux/uaccess.h>
+#include <linux/libnvdimm.h>
+
+#include <asm/cacheflush.h>
+
+static inline void __clean_pmem_range(unsigned long start, unsigned long stop)
+{
+ unsigned long shift = l1_dcache_shift();
+ unsigned long bytes = l1_dcache_bytes();
+ void *addr = (void *)(start & ~(bytes - 1));
+ unsigned long size = stop - (unsigned long)addr + (bytes - 1);
+ unsigned long i;
+
+ for (i = 0; i < size >> shift; i++, addr += bytes)
+ asm volatile(PPC_DCBSTPS(%0, %1): :"i"(0), "r"(addr): "memory");
+}
+
+static inline void __flush_pmem_range(unsigned long start, unsigned long stop)
+{
+ unsigned long shift = l1_dcache_shift();
+ unsigned long bytes = l1_dcache_bytes();
+ void *addr = (void *)(start & ~(bytes - 1));
+ unsigned long size = stop - (unsigned long)addr + (bytes - 1);
+ unsigned long i;
+
+ for (i = 0; i < size >> shift; i++, addr += bytes)
+ asm volatile(PPC_DCBFPS(%0, %1): :"i"(0), "r"(addr): "memory");
+}
+
+static inline void clean_pmem_range(unsigned long start, unsigned long stop)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ return __clean_pmem_range(start, stop);
+}
+
+static inline void flush_pmem_range(unsigned long start, unsigned long stop)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ return __flush_pmem_range(start, stop);
+}
+
+/*
+ * CONFIG_ARCH_HAS_PMEM_API symbols
+ */
+void arch_wb_cache_pmem(void *addr, size_t size)
+{
+ unsigned long start = (unsigned long) addr;
+ clean_pmem_range(start, start + size);
+}
+EXPORT_SYMBOL_GPL(arch_wb_cache_pmem);
+
+void arch_invalidate_pmem(void *addr, size_t size)
+{
+ unsigned long start = (unsigned long) addr;
+ flush_pmem_range(start, start + size);
+}
+EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
+
+/*
+ * CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE symbols
+ */
+long __copy_from_user_flushcache(void *dest, const void __user *src,
+ unsigned size)
+{
+ unsigned long copied, start = (unsigned long) dest;
+
+ copied = __copy_from_user(dest, src, size);
+ clean_pmem_range(start, start + size);
+
+ return copied;
+}
+
+void memcpy_flushcache(void *dest, const void *src, size_t size)
+{
+ unsigned long start = (unsigned long) dest;
+
+ memcpy(dest, src, size);
+ clean_pmem_range(start, start + size);
+}
+EXPORT_SYMBOL(memcpy_flushcache);
diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
new file mode 100644
index 000000000000..95ab4cdf582e
--- /dev/null
+++ b/arch/powerpc/lib/qspinlock.c
@@ -0,0 +1,998 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/bug.h>
+#include <linux/compiler.h>
+#include <linux/export.h>
+#include <linux/percpu.h>
+#include <linux/processor.h>
+#include <linux/smp.h>
+#include <linux/topology.h>
+#include <linux/sched/clock.h>
+#include <asm/qspinlock.h>
+#include <asm/paravirt.h>
+#include <trace/events/lock.h>
+
+#define MAX_NODES 4
+
+struct qnode {
+ struct qnode *next;
+ struct qspinlock *lock;
+ int cpu;
+ u8 sleepy; /* 1 if the previous vCPU was preempted or
+ * if the previous node was sleepy */
+ u8 locked; /* 1 if lock acquired */
+};
+
+struct qnodes {
+ int count;
+ struct qnode nodes[MAX_NODES];
+};
+
+/* Tuning parameters */
+static int steal_spins __read_mostly = (1 << 5);
+static int remote_steal_spins __read_mostly = (1 << 2);
+#if _Q_SPIN_TRY_LOCK_STEAL == 1
+static const bool maybe_stealers = true;
+#else
+static bool maybe_stealers __read_mostly = true;
+#endif
+static int head_spins __read_mostly = (1 << 8);
+
+static bool pv_yield_owner __read_mostly = true;
+static bool pv_yield_allow_steal __read_mostly = false;
+static bool pv_spin_on_preempted_owner __read_mostly = false;
+static bool pv_sleepy_lock __read_mostly = true;
+static bool pv_sleepy_lock_sticky __read_mostly = false;
+static u64 pv_sleepy_lock_interval_ns __read_mostly = 0;
+static int pv_sleepy_lock_factor __read_mostly = 256;
+static bool pv_yield_prev __read_mostly = true;
+static bool pv_yield_sleepy_owner __read_mostly = true;
+static bool pv_prod_head __read_mostly = false;
+
+static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
+static DEFINE_PER_CPU_ALIGNED(u64, sleepy_lock_seen_clock);
+
+#if _Q_SPIN_SPEC_BARRIER == 1
+#define spec_barrier() do { asm volatile("ori 31,31,0" ::: "memory"); } while (0)
+#else
+#define spec_barrier() do { } while (0)
+#endif
+
+static __always_inline bool recently_sleepy(void)
+{
+ /* pv_sleepy_lock is true when this is called */
+ if (pv_sleepy_lock_interval_ns) {
+ u64 seen = this_cpu_read(sleepy_lock_seen_clock);
+
+ if (seen) {
+ u64 delta = sched_clock() - seen;
+ if (delta < pv_sleepy_lock_interval_ns)
+ return true;
+ this_cpu_write(sleepy_lock_seen_clock, 0);
+ }
+ }
+
+ return false;
+}
+
+static __always_inline int get_steal_spins(bool paravirt, bool sleepy)
+{
+ if (paravirt && sleepy)
+ return steal_spins * pv_sleepy_lock_factor;
+ else
+ return steal_spins;
+}
+
+static __always_inline int get_remote_steal_spins(bool paravirt, bool sleepy)
+{
+ if (paravirt && sleepy)
+ return remote_steal_spins * pv_sleepy_lock_factor;
+ else
+ return remote_steal_spins;
+}
+
+static __always_inline int get_head_spins(bool paravirt, bool sleepy)
+{
+ if (paravirt && sleepy)
+ return head_spins * pv_sleepy_lock_factor;
+ else
+ return head_spins;
+}
+
+static inline u32 encode_tail_cpu(int cpu)
+{
+ return (cpu + 1) << _Q_TAIL_CPU_OFFSET;
+}
+
+static inline int decode_tail_cpu(u32 val)
+{
+ return (val >> _Q_TAIL_CPU_OFFSET) - 1;
+}
+
+static inline int get_owner_cpu(u32 val)
+{
+ return (val & _Q_OWNER_CPU_MASK) >> _Q_OWNER_CPU_OFFSET;
+}
+
+/*
+ * Try to acquire the lock if it was not already locked. If the tail matches
+ * mytail then clear it, otherwise leave it unchnaged. Return previous value.
+ *
+ * This is used by the head of the queue to acquire the lock and clean up
+ * its tail if it was the last one queued.
+ */
+static __always_inline u32 trylock_clean_tail(struct qspinlock *lock, u32 tail)
+{
+ u32 newval = queued_spin_encode_locked_val();
+ u32 prev, tmp;
+
+ asm volatile(
+"1: lwarx %0,0,%2,%7 # trylock_clean_tail \n"
+ /* This test is necessary if there could be stealers */
+" andi. %1,%0,%5 \n"
+" bne 3f \n"
+ /* Test whether the lock tail == mytail */
+" and %1,%0,%6 \n"
+" cmpw 0,%1,%3 \n"
+ /* Merge the new locked value */
+" or %1,%1,%4 \n"
+" bne 2f \n"
+ /* If the lock tail matched, then clear it, otherwise leave it. */
+" andc %1,%1,%6 \n"
+"2: stwcx. %1,0,%2 \n"
+" bne- 1b \n"
+"\t" PPC_ACQUIRE_BARRIER " \n"
+"3: \n"
+ : "=&r" (prev), "=&r" (tmp)
+ : "r" (&lock->val), "r"(tail), "r" (newval),
+ "i" (_Q_LOCKED_VAL),
+ "r" (_Q_TAIL_CPU_MASK),
+ "i" (_Q_SPIN_EH_HINT)
+ : "cr0", "memory");
+
+ return prev;
+}
+
+/*
+ * Publish our tail, replacing previous tail. Return previous value.
+ *
+ * This provides a release barrier for publishing node, this pairs with the
+ * acquire barrier in get_tail_qnode() when the next CPU finds this tail
+ * value.
+ */
+static __always_inline u32 publish_tail_cpu(struct qspinlock *lock, u32 tail)
+{
+ u32 prev, tmp;
+
+ kcsan_release();
+
+ asm volatile(
+"\t" PPC_RELEASE_BARRIER " \n"
+"1: lwarx %0,0,%2 # publish_tail_cpu \n"
+" andc %1,%0,%4 \n"
+" or %1,%1,%3 \n"
+" stwcx. %1,0,%2 \n"
+" bne- 1b \n"
+ : "=&r" (prev), "=&r"(tmp)
+ : "r" (&lock->val), "r" (tail), "r"(_Q_TAIL_CPU_MASK)
+ : "cr0", "memory");
+
+ return prev;
+}
+
+static __always_inline u32 set_mustq(struct qspinlock *lock)
+{
+ u32 prev;
+
+ asm volatile(
+"1: lwarx %0,0,%1 # set_mustq \n"
+" or %0,%0,%2 \n"
+" stwcx. %0,0,%1 \n"
+" bne- 1b \n"
+ : "=&r" (prev)
+ : "r" (&lock->val), "r" (_Q_MUST_Q_VAL)
+ : "cr0", "memory");
+
+ return prev;
+}
+
+static __always_inline u32 clear_mustq(struct qspinlock *lock)
+{
+ u32 prev;
+
+ asm volatile(
+"1: lwarx %0,0,%1 # clear_mustq \n"
+" andc %0,%0,%2 \n"
+" stwcx. %0,0,%1 \n"
+" bne- 1b \n"
+ : "=&r" (prev)
+ : "r" (&lock->val), "r" (_Q_MUST_Q_VAL)
+ : "cr0", "memory");
+
+ return prev;
+}
+
+static __always_inline bool try_set_sleepy(struct qspinlock *lock, u32 old)
+{
+ u32 prev;
+ u32 new = old | _Q_SLEEPY_VAL;
+
+ BUG_ON(!(old & _Q_LOCKED_VAL));
+ BUG_ON(old & _Q_SLEEPY_VAL);
+
+ asm volatile(
+"1: lwarx %0,0,%1 # try_set_sleepy \n"
+" cmpw 0,%0,%2 \n"
+" bne- 2f \n"
+" stwcx. %3,0,%1 \n"
+" bne- 1b \n"
+"2: \n"
+ : "=&r" (prev)
+ : "r" (&lock->val), "r"(old), "r" (new)
+ : "cr0", "memory");
+
+ return likely(prev == old);
+}
+
+static __always_inline void seen_sleepy_owner(struct qspinlock *lock, u32 val)
+{
+ if (pv_sleepy_lock) {
+ if (pv_sleepy_lock_interval_ns)
+ this_cpu_write(sleepy_lock_seen_clock, sched_clock());
+ if (!(val & _Q_SLEEPY_VAL))
+ try_set_sleepy(lock, val);
+ }
+}
+
+static __always_inline void seen_sleepy_lock(void)
+{
+ if (pv_sleepy_lock && pv_sleepy_lock_interval_ns)
+ this_cpu_write(sleepy_lock_seen_clock, sched_clock());
+}
+
+static __always_inline void seen_sleepy_node(void)
+{
+ if (pv_sleepy_lock) {
+ if (pv_sleepy_lock_interval_ns)
+ this_cpu_write(sleepy_lock_seen_clock, sched_clock());
+ /* Don't set sleepy because we likely have a stale val */
+ }
+}
+
+static struct qnode *get_tail_qnode(struct qspinlock *lock, int prev_cpu)
+{
+ struct qnodes *qnodesp = per_cpu_ptr(&qnodes, prev_cpu);
+ int idx;
+
+ /*
+ * After publishing the new tail and finding a previous tail in the
+ * previous val (which is the control dependency), this barrier
+ * orders the release barrier in publish_tail_cpu performed by the
+ * last CPU, with subsequently looking at its qnode structures
+ * after the barrier.
+ */
+ smp_acquire__after_ctrl_dep();
+
+ for (idx = 0; idx < MAX_NODES; idx++) {
+ struct qnode *qnode = &qnodesp->nodes[idx];
+ if (qnode->lock == lock)
+ return qnode;
+ }
+
+ BUG();
+}
+
+/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
+static __always_inline bool __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool mustq)
+{
+ int owner;
+ u32 yield_count;
+ bool preempted = false;
+
+ BUG_ON(!(val & _Q_LOCKED_VAL));
+
+ if (!paravirt)
+ goto relax;
+
+ if (!pv_yield_owner)
+ goto relax;
+
+ owner = get_owner_cpu(val);
+ yield_count = yield_count_of(owner);
+
+ if ((yield_count & 1) == 0)
+ goto relax; /* owner vcpu is running */
+
+ spin_end();
+
+ seen_sleepy_owner(lock, val);
+ preempted = true;
+
+ /*
+ * Read the lock word after sampling the yield count. On the other side
+ * there may a wmb because the yield count update is done by the
+ * hypervisor preemption and the value update by the OS, however this
+ * ordering might reduce the chance of out of order accesses and
+ * improve the heuristic.
+ */
+ smp_rmb();
+
+ if (READ_ONCE(lock->val) == val) {
+ if (mustq)
+ clear_mustq(lock);
+ yield_to_preempted(owner, yield_count);
+ if (mustq)
+ set_mustq(lock);
+ spin_begin();
+
+ /* Don't relax if we yielded. Maybe we should? */
+ return preempted;
+ }
+ spin_begin();
+relax:
+ spin_cpu_relax();
+
+ return preempted;
+}
+
+/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
+static __always_inline bool yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
+{
+ return __yield_to_locked_owner(lock, val, paravirt, false);
+}
+
+/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
+static __always_inline bool yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
+{
+ bool mustq = false;
+
+ if ((val & _Q_MUST_Q_VAL) && pv_yield_allow_steal)
+ mustq = true;
+
+ return __yield_to_locked_owner(lock, val, paravirt, mustq);
+}
+
+static __always_inline void propagate_sleepy(struct qnode *node, u32 val, bool paravirt)
+{
+ struct qnode *next;
+ int owner;
+
+ if (!paravirt)
+ return;
+ if (!pv_yield_sleepy_owner)
+ return;
+
+ next = READ_ONCE(node->next);
+ if (!next)
+ return;
+
+ if (next->sleepy)
+ return;
+
+ owner = get_owner_cpu(val);
+ if (vcpu_is_preempted(owner))
+ next->sleepy = 1;
+}
+
+/* Called inside spin_begin() */
+static __always_inline bool yield_to_prev(struct qspinlock *lock, struct qnode *node, int prev_cpu, bool paravirt)
+{
+ u32 yield_count;
+ bool preempted = false;
+
+ if (!paravirt)
+ goto relax;
+
+ if (!pv_yield_sleepy_owner)
+ goto yield_prev;
+
+ /*
+ * If the previous waiter was preempted it might not be able to
+ * propagate sleepy to us, so check the lock in that case too.
+ */
+ if (node->sleepy || vcpu_is_preempted(prev_cpu)) {
+ u32 val = READ_ONCE(lock->val);
+
+ if (val & _Q_LOCKED_VAL) {
+ if (node->next && !node->next->sleepy) {
+ /*
+ * Propagate sleepy to next waiter. Only if
+ * owner is preempted, which allows the queue
+ * to become "non-sleepy" if vCPU preemption
+ * ceases to occur, even if the lock remains
+ * highly contended.
+ */
+ if (vcpu_is_preempted(get_owner_cpu(val)))
+ node->next->sleepy = 1;
+ }
+
+ preempted = yield_to_locked_owner(lock, val, paravirt);
+ if (preempted)
+ return preempted;
+ }
+ node->sleepy = false;
+ }
+
+yield_prev:
+ if (!pv_yield_prev)
+ goto relax;
+
+ yield_count = yield_count_of(prev_cpu);
+ if ((yield_count & 1) == 0)
+ goto relax; /* owner vcpu is running */
+
+ spin_end();
+
+ preempted = true;
+ seen_sleepy_node();
+
+ smp_rmb(); /* See __yield_to_locked_owner comment */
+
+ if (!READ_ONCE(node->locked)) {
+ yield_to_preempted(prev_cpu, yield_count);
+ spin_begin();
+ return preempted;
+ }
+ spin_begin();
+
+relax:
+ spin_cpu_relax();
+
+ return preempted;
+}
+
+static __always_inline bool steal_break(u32 val, int iters, bool paravirt, bool sleepy)
+{
+ if (iters >= get_steal_spins(paravirt, sleepy))
+ return true;
+
+ if (IS_ENABLED(CONFIG_NUMA) &&
+ (iters >= get_remote_steal_spins(paravirt, sleepy))) {
+ int cpu = get_owner_cpu(val);
+ if (numa_node_id() != cpu_to_node(cpu))
+ return true;
+ }
+ return false;
+}
+
+static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt)
+{
+ bool seen_preempted = false;
+ bool sleepy = false;
+ int iters = 0;
+ u32 val;
+
+ if (!steal_spins) {
+ /* XXX: should spin_on_preempted_owner do anything here? */
+ return false;
+ }
+
+ /* Attempt to steal the lock */
+ spin_begin();
+ do {
+ bool preempted = false;
+
+ val = READ_ONCE(lock->val);
+ if (val & _Q_MUST_Q_VAL)
+ break;
+ spec_barrier();
+
+ if (unlikely(!(val & _Q_LOCKED_VAL))) {
+ spin_end();
+ if (__queued_spin_trylock_steal(lock))
+ return true;
+ spin_begin();
+ } else {
+ preempted = yield_to_locked_owner(lock, val, paravirt);
+ }
+
+ if (paravirt && pv_sleepy_lock) {
+ if (!sleepy) {
+ if (val & _Q_SLEEPY_VAL) {
+ seen_sleepy_lock();
+ sleepy = true;
+ } else if (recently_sleepy()) {
+ sleepy = true;
+ }
+ }
+ if (pv_sleepy_lock_sticky && seen_preempted &&
+ !(val & _Q_SLEEPY_VAL)) {
+ if (try_set_sleepy(lock, val))
+ val |= _Q_SLEEPY_VAL;
+ }
+ }
+
+ if (preempted) {
+ seen_preempted = true;
+ sleepy = true;
+ if (!pv_spin_on_preempted_owner)
+ iters++;
+ /*
+ * pv_spin_on_preempted_owner don't increase iters
+ * while the owner is preempted -- we won't interfere
+ * with it by definition. This could introduce some
+ * latency issue if we continually observe preempted
+ * owners, but hopefully that's a rare corner case of
+ * a badly oversubscribed system.
+ */
+ } else {
+ iters++;
+ }
+ } while (!steal_break(val, iters, paravirt, sleepy));
+
+ spin_end();
+
+ return false;
+}
+
+static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, bool paravirt)
+{
+ struct qnodes *qnodesp;
+ struct qnode *next, *node;
+ u32 val, old, tail;
+ bool seen_preempted = false;
+ bool sleepy = false;
+ bool mustq = false;
+ int idx;
+ int iters = 0;
+
+ BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
+
+ qnodesp = this_cpu_ptr(&qnodes);
+ if (unlikely(qnodesp->count >= MAX_NODES)) {
+ spec_barrier();
+ while (!queued_spin_trylock(lock))
+ cpu_relax();
+ return;
+ }
+
+ idx = qnodesp->count++;
+ /*
+ * Ensure that we increment the head node->count before initialising
+ * the actual node. If the compiler is kind enough to reorder these
+ * stores, then an IRQ could overwrite our assignments.
+ */
+ barrier();
+ node = &qnodesp->nodes[idx];
+ node->next = NULL;
+ node->lock = lock;
+ node->cpu = smp_processor_id();
+ node->sleepy = 0;
+ node->locked = 0;
+
+ tail = encode_tail_cpu(node->cpu);
+
+ /*
+ * Assign all attributes of a node before it can be published.
+ * Issues an lwsync, serving as a release barrier, as well as a
+ * compiler barrier.
+ */
+ old = publish_tail_cpu(lock, tail);
+
+ /*
+ * If there was a previous node; link it and wait until reaching the
+ * head of the waitqueue.
+ */
+ if (old & _Q_TAIL_CPU_MASK) {
+ int prev_cpu = decode_tail_cpu(old);
+ struct qnode *prev = get_tail_qnode(lock, prev_cpu);
+
+ /* Link @node into the waitqueue. */
+ WRITE_ONCE(prev->next, node);
+
+ /* Wait for mcs node lock to be released */
+ spin_begin();
+ while (!READ_ONCE(node->locked)) {
+ spec_barrier();
+
+ if (yield_to_prev(lock, node, prev_cpu, paravirt))
+ seen_preempted = true;
+ }
+ spec_barrier();
+ spin_end();
+
+ smp_rmb(); /* acquire barrier for the mcs lock */
+
+ /*
+ * Generic qspinlocks have this prefetch here, but it seems
+ * like it could cause additional line transitions because
+ * the waiter will keep loading from it.
+ */
+ if (_Q_SPIN_PREFETCH_NEXT) {
+ next = READ_ONCE(node->next);
+ if (next)
+ prefetchw(next);
+ }
+ }
+
+ /* We're at the head of the waitqueue, wait for the lock. */
+again:
+ spin_begin();
+ for (;;) {
+ bool preempted;
+
+ val = READ_ONCE(lock->val);
+ if (!(val & _Q_LOCKED_VAL))
+ break;
+ spec_barrier();
+
+ if (paravirt && pv_sleepy_lock && maybe_stealers) {
+ if (!sleepy) {
+ if (val & _Q_SLEEPY_VAL) {
+ seen_sleepy_lock();
+ sleepy = true;
+ } else if (recently_sleepy()) {
+ sleepy = true;
+ }
+ }
+ if (pv_sleepy_lock_sticky && seen_preempted &&
+ !(val & _Q_SLEEPY_VAL)) {
+ if (try_set_sleepy(lock, val))
+ val |= _Q_SLEEPY_VAL;
+ }
+ }
+
+ propagate_sleepy(node, val, paravirt);
+ preempted = yield_head_to_locked_owner(lock, val, paravirt);
+ if (!maybe_stealers)
+ continue;
+
+ if (preempted)
+ seen_preempted = true;
+
+ if (paravirt && preempted) {
+ sleepy = true;
+
+ if (!pv_spin_on_preempted_owner)
+ iters++;
+ } else {
+ iters++;
+ }
+
+ if (!mustq && iters >= get_head_spins(paravirt, sleepy)) {
+ mustq = true;
+ set_mustq(lock);
+ val |= _Q_MUST_Q_VAL;
+ }
+ }
+ spec_barrier();
+ spin_end();
+
+ /* If we're the last queued, must clean up the tail. */
+ old = trylock_clean_tail(lock, tail);
+ if (unlikely(old & _Q_LOCKED_VAL)) {
+ BUG_ON(!maybe_stealers);
+ goto again; /* Can only be true if maybe_stealers. */
+ }
+
+ if ((old & _Q_TAIL_CPU_MASK) == tail)
+ goto release; /* We were the tail, no next. */
+
+ /* There is a next, must wait for node->next != NULL (MCS protocol) */
+ next = READ_ONCE(node->next);
+ if (!next) {
+ spin_begin();
+ while (!(next = READ_ONCE(node->next)))
+ cpu_relax();
+ spin_end();
+ }
+ spec_barrier();
+
+ /*
+ * Unlock the next mcs waiter node. Release barrier is not required
+ * here because the acquirer is only accessing the lock word, and
+ * the acquire barrier we took the lock with orders that update vs
+ * this store to locked. The corresponding barrier is the smp_rmb()
+ * acquire barrier for mcs lock, above.
+ */
+ if (paravirt && pv_prod_head) {
+ int next_cpu = next->cpu;
+ WRITE_ONCE(next->locked, 1);
+ if (_Q_SPIN_MISO)
+ asm volatile("miso" ::: "memory");
+ if (vcpu_is_preempted(next_cpu))
+ prod_cpu(next_cpu);
+ } else {
+ WRITE_ONCE(next->locked, 1);
+ if (_Q_SPIN_MISO)
+ asm volatile("miso" ::: "memory");
+ }
+
+release:
+ /*
+ * Clear the lock before releasing the node, as another CPU might see stale
+ * values if an interrupt occurs after we increment qnodesp->count
+ * but before node->lock is initialized. The barrier ensures that
+ * there are no further stores to the node after it has been released.
+ */
+ node->lock = NULL;
+ barrier();
+ qnodesp->count--;
+}
+
+void __lockfunc queued_spin_lock_slowpath(struct qspinlock *lock)
+{
+ trace_contention_begin(lock, LCB_F_SPIN);
+ /*
+ * This looks funny, but it induces the compiler to inline both
+ * sides of the branch rather than share code as when the condition
+ * is passed as the paravirt argument to the functions.
+ */
+ if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && is_shared_processor()) {
+ if (try_to_steal_lock(lock, true))
+ spec_barrier();
+ else
+ queued_spin_lock_mcs_queue(lock, true);
+ } else {
+ if (try_to_steal_lock(lock, false))
+ spec_barrier();
+ else
+ queued_spin_lock_mcs_queue(lock, false);
+ }
+ trace_contention_end(lock, 0);
+}
+EXPORT_SYMBOL(queued_spin_lock_slowpath);
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+void pv_spinlocks_init(void)
+{
+}
+#endif
+
+#include <linux/debugfs.h>
+static int steal_spins_set(void *data, u64 val)
+{
+#if _Q_SPIN_TRY_LOCK_STEAL == 1
+ /* MAYBE_STEAL remains true */
+ steal_spins = val;
+#else
+ static DEFINE_MUTEX(lock);
+
+ /*
+ * The lock slow path has a !maybe_stealers case that can assume
+ * the head of queue will not see concurrent waiters. That waiter
+ * is unsafe in the presence of stealers, so must keep them away
+ * from one another.
+ */
+
+ mutex_lock(&lock);
+ if (val && !steal_spins) {
+ maybe_stealers = true;
+ /* wait for queue head waiter to go away */
+ synchronize_rcu();
+ steal_spins = val;
+ } else if (!val && steal_spins) {
+ steal_spins = val;
+ /* wait for all possible stealers to go away */
+ synchronize_rcu();
+ maybe_stealers = false;
+ } else {
+ steal_spins = val;
+ }
+ mutex_unlock(&lock);
+#endif
+
+ return 0;
+}
+
+static int steal_spins_get(void *data, u64 *val)
+{
+ *val = steal_spins;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_steal_spins, steal_spins_get, steal_spins_set, "%llu\n");
+
+static int remote_steal_spins_set(void *data, u64 val)
+{
+ remote_steal_spins = val;
+
+ return 0;
+}
+
+static int remote_steal_spins_get(void *data, u64 *val)
+{
+ *val = remote_steal_spins;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_remote_steal_spins, remote_steal_spins_get, remote_steal_spins_set, "%llu\n");
+
+static int head_spins_set(void *data, u64 val)
+{
+ head_spins = val;
+
+ return 0;
+}
+
+static int head_spins_get(void *data, u64 *val)
+{
+ *val = head_spins;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_head_spins, head_spins_get, head_spins_set, "%llu\n");
+
+static int pv_yield_owner_set(void *data, u64 val)
+{
+ pv_yield_owner = !!val;
+
+ return 0;
+}
+
+static int pv_yield_owner_get(void *data, u64 *val)
+{
+ *val = pv_yield_owner;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_owner, pv_yield_owner_get, pv_yield_owner_set, "%llu\n");
+
+static int pv_yield_allow_steal_set(void *data, u64 val)
+{
+ pv_yield_allow_steal = !!val;
+
+ return 0;
+}
+
+static int pv_yield_allow_steal_get(void *data, u64 *val)
+{
+ *val = pv_yield_allow_steal;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_allow_steal, pv_yield_allow_steal_get, pv_yield_allow_steal_set, "%llu\n");
+
+static int pv_spin_on_preempted_owner_set(void *data, u64 val)
+{
+ pv_spin_on_preempted_owner = !!val;
+
+ return 0;
+}
+
+static int pv_spin_on_preempted_owner_get(void *data, u64 *val)
+{
+ *val = pv_spin_on_preempted_owner;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_spin_on_preempted_owner, pv_spin_on_preempted_owner_get, pv_spin_on_preempted_owner_set, "%llu\n");
+
+static int pv_sleepy_lock_set(void *data, u64 val)
+{
+ pv_sleepy_lock = !!val;
+
+ return 0;
+}
+
+static int pv_sleepy_lock_get(void *data, u64 *val)
+{
+ *val = pv_sleepy_lock;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock, pv_sleepy_lock_get, pv_sleepy_lock_set, "%llu\n");
+
+static int pv_sleepy_lock_sticky_set(void *data, u64 val)
+{
+ pv_sleepy_lock_sticky = !!val;
+
+ return 0;
+}
+
+static int pv_sleepy_lock_sticky_get(void *data, u64 *val)
+{
+ *val = pv_sleepy_lock_sticky;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_sticky, pv_sleepy_lock_sticky_get, pv_sleepy_lock_sticky_set, "%llu\n");
+
+static int pv_sleepy_lock_interval_ns_set(void *data, u64 val)
+{
+ pv_sleepy_lock_interval_ns = val;
+
+ return 0;
+}
+
+static int pv_sleepy_lock_interval_ns_get(void *data, u64 *val)
+{
+ *val = pv_sleepy_lock_interval_ns;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_interval_ns, pv_sleepy_lock_interval_ns_get, pv_sleepy_lock_interval_ns_set, "%llu\n");
+
+static int pv_sleepy_lock_factor_set(void *data, u64 val)
+{
+ pv_sleepy_lock_factor = val;
+
+ return 0;
+}
+
+static int pv_sleepy_lock_factor_get(void *data, u64 *val)
+{
+ *val = pv_sleepy_lock_factor;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_factor, pv_sleepy_lock_factor_get, pv_sleepy_lock_factor_set, "%llu\n");
+
+static int pv_yield_prev_set(void *data, u64 val)
+{
+ pv_yield_prev = !!val;
+
+ return 0;
+}
+
+static int pv_yield_prev_get(void *data, u64 *val)
+{
+ *val = pv_yield_prev;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n");
+
+static int pv_yield_sleepy_owner_set(void *data, u64 val)
+{
+ pv_yield_sleepy_owner = !!val;
+
+ return 0;
+}
+
+static int pv_yield_sleepy_owner_get(void *data, u64 *val)
+{
+ *val = pv_yield_sleepy_owner;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_sleepy_owner, pv_yield_sleepy_owner_get, pv_yield_sleepy_owner_set, "%llu\n");
+
+static int pv_prod_head_set(void *data, u64 val)
+{
+ pv_prod_head = !!val;
+
+ return 0;
+}
+
+static int pv_prod_head_get(void *data, u64 *val)
+{
+ *val = pv_prod_head;
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_prod_head, pv_prod_head_get, pv_prod_head_set, "%llu\n");
+
+static __init int spinlock_debugfs_init(void)
+{
+ debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins);
+ debugfs_create_file("qspl_remote_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_remote_steal_spins);
+ debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins);
+ if (is_shared_processor()) {
+ debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner);
+ debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal);
+ debugfs_create_file("qspl_pv_spin_on_preempted_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_spin_on_preempted_owner);
+ debugfs_create_file("qspl_pv_sleepy_lock", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock);
+ debugfs_create_file("qspl_pv_sleepy_lock_sticky", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_sticky);
+ debugfs_create_file("qspl_pv_sleepy_lock_interval_ns", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_interval_ns);
+ debugfs_create_file("qspl_pv_sleepy_lock_factor", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_factor);
+ debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev);
+ debugfs_create_file("qspl_pv_yield_sleepy_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_sleepy_owner);
+ debugfs_create_file("qspl_pv_prod_head", 0600, arch_debugfs_dir, NULL, &fops_pv_prod_head);
+ }
+
+ return 0;
+}
+device_initcall(spinlock_debugfs_init);
diff --git a/arch/powerpc/lib/quad.S b/arch/powerpc/lib/quad.S
new file mode 100644
index 000000000000..da71760e50b5
--- /dev/null
+++ b/arch/powerpc/lib/quad.S
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Quadword loads and stores
+ * for use in instruction emulation.
+ *
+ * Copyright 2017 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ */
+
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/ppc-opcode.h>
+#include <asm/reg.h>
+#include <asm/asm-offsets.h>
+#include <linux/errno.h>
+
+/* do_lq(unsigned long ea, unsigned long *regs) */
+_GLOBAL(do_lq)
+1: lq r6, 0(r3)
+ std r6, 0(r4)
+ std r7, 8(r4)
+ li r3, 0
+ blr
+2: li r3, -EFAULT
+ blr
+ EX_TABLE(1b, 2b)
+
+/* do_stq(unsigned long ea, unsigned long val0, unsigned long val1) */
+_GLOBAL(do_stq)
+1: stq r4, 0(r3)
+ li r3, 0
+ blr
+2: li r3, -EFAULT
+ blr
+ EX_TABLE(1b, 2b)
+
+/* do_lqarx(unsigned long ea, unsigned long *regs) */
+_GLOBAL(do_lqarx)
+1: PPC_LQARX(6, 0, 3, 0)
+ std r6, 0(r4)
+ std r7, 8(r4)
+ li r3, 0
+ blr
+2: li r3, -EFAULT
+ blr
+ EX_TABLE(1b, 2b)
+
+/* do_stqcx(unsigned long ea, unsigned long val0, unsigned long val1,
+ unsigned int *crp) */
+
+_GLOBAL(do_stqcx)
+1: PPC_STQCX(4, 0, 3)
+ mfcr r5
+ stw r5, 0(r6)
+ li r3, 0
+ blr
+2: li r3, -EFAULT
+ blr
+ EX_TABLE(1b, 2b)
diff --git a/arch/powerpc/lib/restart_table.c b/arch/powerpc/lib/restart_table.c
new file mode 100644
index 000000000000..bccb662c1b7b
--- /dev/null
+++ b/arch/powerpc/lib/restart_table.c
@@ -0,0 +1,56 @@
+#include <asm/interrupt.h>
+#include <asm/kprobes.h>
+
+struct soft_mask_table_entry {
+ unsigned long start;
+ unsigned long end;
+};
+
+struct restart_table_entry {
+ unsigned long start;
+ unsigned long end;
+ unsigned long fixup;
+};
+
+extern struct soft_mask_table_entry __start___soft_mask_table[];
+extern struct soft_mask_table_entry __stop___soft_mask_table[];
+
+extern struct restart_table_entry __start___restart_table[];
+extern struct restart_table_entry __stop___restart_table[];
+
+/* Given an address, look for it in the soft mask table */
+bool search_kernel_soft_mask_table(unsigned long addr)
+{
+ struct soft_mask_table_entry *smte = __start___soft_mask_table;
+
+ while (smte < __stop___soft_mask_table) {
+ unsigned long start = smte->start;
+ unsigned long end = smte->end;
+
+ if (addr >= start && addr < end)
+ return true;
+
+ smte++;
+ }
+ return false;
+}
+NOKPROBE_SYMBOL(search_kernel_soft_mask_table);
+
+/* Given an address, look for it in the kernel exception table */
+unsigned long search_kernel_restart_table(unsigned long addr)
+{
+ struct restart_table_entry *rte = __start___restart_table;
+
+ while (rte < __stop___restart_table) {
+ unsigned long start = rte->start;
+ unsigned long end = rte->end;
+ unsigned long fixup = rte->fixup;
+
+ if (addr >= start && addr < end)
+ return fixup;
+
+ rte++;
+ }
+ return 0;
+}
+NOKPROBE_SYMBOL(search_kernel_restart_table);
diff --git a/arch/powerpc/lib/rheap.c b/arch/powerpc/lib/rheap.c
index a1060a868e69..6aa774aa5b16 100644
--- a/arch/powerpc/lib/rheap.c
+++ b/arch/powerpc/lib/rheap.c
@@ -54,7 +54,7 @@ static int grow(rh_info_t * info, int max_blocks)
new_blocks = max_blocks - info->max_blocks;
- block = kmalloc(sizeof(rh_block_t) * max_blocks, GFP_ATOMIC);
+ block = kmalloc_array(max_blocks, sizeof(rh_block_t), GFP_ATOMIC);
if (block == NULL)
return -ENOMEM;
@@ -284,7 +284,7 @@ EXPORT_SYMBOL_GPL(rh_create);
*/
void rh_destroy(rh_info_t * info)
{
- if ((info->flags & RHIF_STATIC_BLOCK) == 0 && info->block != NULL)
+ if ((info->flags & RHIF_STATIC_BLOCK) == 0)
kfree(info->block);
if ((info->flags & RHIF_STATIC_INFO) == 0)
@@ -325,7 +325,7 @@ void rh_init(rh_info_t * info, unsigned int alignment, int max_blocks,
}
EXPORT_SYMBOL_GPL(rh_init);
-/* Attach a free memory region, coalesces regions if adjuscent */
+/* Attach a free memory region, coalesces regions if adjacent */
int rh_attach_region(rh_info_t * info, unsigned long start, int size)
{
rh_block_t *blk;
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 5c09f365c842..ac3ee19531d8 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Single-step support.
*
* Copyright (C) 2004 Paul Mackerras <paulus@au.ibm.com>, IBM
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/kprobes.h>
@@ -14,10 +10,10 @@
#include <linux/prefetch.h>
#include <asm/sstep.h>
#include <asm/processor.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
+#include <asm/cpu_has_feature.h>
#include <asm/cputable.h>
-
-extern char system_call_common[];
+#include <asm/disassemble.h>
#ifdef CONFIG_PPC64
/* Bits in SRR1 that are copied from MSR */
@@ -30,45 +26,71 @@ extern char system_call_common[];
#define XER_SO 0x80000000U
#define XER_OV 0x40000000U
#define XER_CA 0x20000000U
+#define XER_OV32 0x00080000U
+#define XER_CA32 0x00040000U
+
+#ifdef CONFIG_VSX
+#define VSX_REGISTER_XTP(rd) ((((rd) & 1) << 5) | ((rd) & 0xfe))
+#endif
#ifdef CONFIG_PPC_FPU
/*
* Functions in ldstfp.S
*/
-extern int do_lfs(int rn, unsigned long ea);
-extern int do_lfd(int rn, unsigned long ea);
-extern int do_stfs(int rn, unsigned long ea);
-extern int do_stfd(int rn, unsigned long ea);
-extern int do_lvx(int rn, unsigned long ea);
-extern int do_stvx(int rn, unsigned long ea);
-extern int do_lxvd2x(int rn, unsigned long ea);
-extern int do_stxvd2x(int rn, unsigned long ea);
+extern void get_fpr(int rn, double *p);
+extern void put_fpr(int rn, const double *p);
+extern void get_vr(int rn, __vector128 *p);
+extern void put_vr(int rn, __vector128 *p);
+extern void load_vsrn(int vsr, const void *p);
+extern void store_vsrn(int vsr, void *p);
+extern void conv_sp_to_dp(const float *sp, double *dp);
+extern void conv_dp_to_sp(const double *dp, float *sp);
+#endif
+
+#ifdef __powerpc64__
+/*
+ * Functions in quad.S
+ */
+extern int do_lq(unsigned long ea, unsigned long *regs);
+extern int do_stq(unsigned long ea, unsigned long val0, unsigned long val1);
+extern int do_lqarx(unsigned long ea, unsigned long *regs);
+extern int do_stqcx(unsigned long ea, unsigned long val0, unsigned long val1,
+ unsigned int *crp);
+#endif
+
+#ifdef __LITTLE_ENDIAN__
+#define IS_LE 1
+#define IS_BE 0
+#else
+#define IS_LE 0
+#define IS_BE 1
#endif
/*
* Emulate the truncation of 64 bit values in 32-bit mode.
*/
-static unsigned long truncate_if_32bit(unsigned long msr, unsigned long val)
+static nokprobe_inline unsigned long truncate_if_32bit(unsigned long msr,
+ unsigned long val)
{
-#ifdef __powerpc64__
if ((msr & MSR_64BIT) == 0)
val &= 0xffffffffUL;
-#endif
return val;
}
/*
* Determine whether a conditional branch instruction would branch.
*/
-static int __kprobes branch_taken(unsigned int instr, struct pt_regs *regs)
+static nokprobe_inline int branch_taken(unsigned int instr,
+ const struct pt_regs *regs,
+ struct instruction_op *op)
{
unsigned int bo = (instr >> 21) & 0x1f;
unsigned int bi;
if ((bo & 4) == 0) {
/* decrement counter */
- --regs->ctr;
- if (((bo >> 1) & 1) ^ (regs->ctr == 0))
+ op->type |= DECCTR;
+ if (((bo >> 1) & 1) ^ (regs->ctr == 1))
return 0;
}
if ((bo & 0x10) == 0) {
@@ -80,61 +102,79 @@ static int __kprobes branch_taken(unsigned int instr, struct pt_regs *regs)
return 1;
}
-
-static long __kprobes address_ok(struct pt_regs *regs, unsigned long ea, int nb)
+static nokprobe_inline long address_ok(struct pt_regs *regs,
+ unsigned long ea, int nb)
{
if (!user_mode(regs))
return 1;
- return __access_ok(ea, nb, USER_DS);
+ if (access_ok((void __user *)ea, nb))
+ return 1;
+ if (access_ok((void __user *)ea, 1))
+ /* Access overlaps the end of the user region */
+ regs->dar = TASK_SIZE_MAX - 1;
+ else
+ regs->dar = ea;
+ return 0;
}
/*
* Calculate effective address for a D-form instruction
*/
-static unsigned long __kprobes dform_ea(unsigned int instr, struct pt_regs *regs)
+static nokprobe_inline unsigned long dform_ea(unsigned int instr,
+ const struct pt_regs *regs)
{
int ra;
unsigned long ea;
ra = (instr >> 16) & 0x1f;
ea = (signed short) instr; /* sign-extend */
- if (ra) {
+ if (ra)
ea += regs->gpr[ra];
- if (instr & 0x04000000) { /* update forms */
- if ((instr>>26) != 47) /* stmw is not an update form */
- regs->gpr[ra] = ea;
- }
- }
- return truncate_if_32bit(regs->msr, ea);
+ return ea;
}
#ifdef __powerpc64__
/*
* Calculate effective address for a DS-form instruction
*/
-static unsigned long __kprobes dsform_ea(unsigned int instr, struct pt_regs *regs)
+static nokprobe_inline unsigned long dsform_ea(unsigned int instr,
+ const struct pt_regs *regs)
{
int ra;
unsigned long ea;
ra = (instr >> 16) & 0x1f;
ea = (signed short) (instr & ~3); /* sign-extend */
- if (ra) {
+ if (ra)
+ ea += regs->gpr[ra];
+
+ return ea;
+}
+
+/*
+ * Calculate effective address for a DQ-form instruction
+ */
+static nokprobe_inline unsigned long dqform_ea(unsigned int instr,
+ const struct pt_regs *regs)
+{
+ int ra;
+ unsigned long ea;
+
+ ra = (instr >> 16) & 0x1f;
+ ea = (signed short) (instr & ~0xf); /* sign-extend */
+ if (ra)
ea += regs->gpr[ra];
- if ((instr & 3) == 1) /* update forms */
- regs->gpr[ra] = ea;
- }
- return truncate_if_32bit(regs->msr, ea);
+ return ea;
}
#endif /* __powerpc64 */
/*
* Calculate effective address for an X-form instruction
*/
-static unsigned long __kprobes xform_ea(unsigned int instr, struct pt_regs *regs,
- int do_update)
+static nokprobe_inline unsigned long xform_ea(unsigned int instr,
+ const struct pt_regs *regs)
{
int ra, rb;
unsigned long ea;
@@ -142,387 +182,942 @@ static unsigned long __kprobes xform_ea(unsigned int instr, struct pt_regs *regs
ra = (instr >> 16) & 0x1f;
rb = (instr >> 11) & 0x1f;
ea = regs->gpr[rb];
- if (ra) {
+ if (ra)
ea += regs->gpr[ra];
- if (do_update) /* update forms */
- regs->gpr[ra] = ea;
- }
- return truncate_if_32bit(regs->msr, ea);
+ return ea;
+}
+
+/*
+ * Calculate effective address for a MLS:D-form / 8LS:D-form
+ * prefixed instruction
+ */
+static nokprobe_inline unsigned long mlsd_8lsd_ea(unsigned int instr,
+ unsigned int suffix,
+ const struct pt_regs *regs)
+{
+ int ra, prefix_r;
+ unsigned int dd;
+ unsigned long ea, d0, d1, d;
+
+ prefix_r = GET_PREFIX_R(instr);
+ ra = GET_PREFIX_RA(suffix);
+
+ d0 = instr & 0x3ffff;
+ d1 = suffix & 0xffff;
+ d = (d0 << 16) | d1;
+
+ /*
+ * sign extend a 34 bit number
+ */
+ dd = (unsigned int)(d >> 2);
+ ea = (signed int)dd;
+ ea = (ea << 2) | (d & 0x3);
+
+ if (!prefix_r && ra)
+ ea += regs->gpr[ra];
+ else if (!prefix_r && !ra)
+ ; /* Leave ea as is */
+ else if (prefix_r)
+ ea += regs->nip;
+
+ /*
+ * (prefix_r && ra) is an invalid form. Should already be
+ * checked for by caller!
+ */
+
+ return ea;
}
/*
* Return the largest power of 2, not greater than sizeof(unsigned long),
* such that x is a multiple of it.
*/
-static inline unsigned long max_align(unsigned long x)
+static nokprobe_inline unsigned long max_align(unsigned long x)
{
x |= sizeof(unsigned long);
return x & -x; /* isolates rightmost bit */
}
-
-static inline unsigned long byterev_2(unsigned long x)
+static nokprobe_inline unsigned long byterev_2(unsigned long x)
{
return ((x >> 8) & 0xff) | ((x & 0xff) << 8);
}
-static inline unsigned long byterev_4(unsigned long x)
+static nokprobe_inline unsigned long byterev_4(unsigned long x)
{
return ((x >> 24) & 0xff) | ((x >> 8) & 0xff00) |
((x & 0xff00) << 8) | ((x & 0xff) << 24);
}
#ifdef __powerpc64__
-static inline unsigned long byterev_8(unsigned long x)
+static nokprobe_inline unsigned long byterev_8(unsigned long x)
{
return (byterev_4(x) << 32) | byterev_4(x >> 32);
}
#endif
-static int __kprobes read_mem_aligned(unsigned long *dest, unsigned long ea,
- int nb)
+static nokprobe_inline void do_byte_reverse(void *ptr, int nb)
+{
+ switch (nb) {
+ case 2:
+ *(u16 *)ptr = byterev_2(*(u16 *)ptr);
+ break;
+ case 4:
+ *(u32 *)ptr = byterev_4(*(u32 *)ptr);
+ break;
+#ifdef __powerpc64__
+ case 8:
+ *(unsigned long *)ptr = byterev_8(*(unsigned long *)ptr);
+ break;
+ case 16: {
+ unsigned long *up = (unsigned long *)ptr;
+ unsigned long tmp;
+ tmp = byterev_8(up[0]);
+ up[0] = byterev_8(up[1]);
+ up[1] = tmp;
+ break;
+ }
+ case 32: {
+ unsigned long *up = (unsigned long *)ptr;
+ unsigned long tmp;
+
+ tmp = byterev_8(up[0]);
+ up[0] = byterev_8(up[3]);
+ up[3] = tmp;
+ tmp = byterev_8(up[2]);
+ up[2] = byterev_8(up[1]);
+ up[1] = tmp;
+ break;
+ }
+
+#endif
+ default:
+ WARN_ON_ONCE(1);
+ }
+}
+
+static __always_inline int
+__read_mem_aligned(unsigned long *dest, unsigned long ea, int nb, struct pt_regs *regs)
{
- int err = 0;
unsigned long x = 0;
switch (nb) {
case 1:
- err = __get_user(x, (unsigned char __user *) ea);
+ unsafe_get_user(x, (unsigned char __user *)ea, Efault);
break;
case 2:
- err = __get_user(x, (unsigned short __user *) ea);
+ unsafe_get_user(x, (unsigned short __user *)ea, Efault);
break;
case 4:
- err = __get_user(x, (unsigned int __user *) ea);
+ unsafe_get_user(x, (unsigned int __user *)ea, Efault);
break;
#ifdef __powerpc64__
case 8:
- err = __get_user(x, (unsigned long __user *) ea);
+ unsafe_get_user(x, (unsigned long __user *)ea, Efault);
break;
#endif
}
- if (!err)
- *dest = x;
- return err;
+ *dest = x;
+ return 0;
+
+Efault:
+ regs->dar = ea;
+ return -EFAULT;
}
-static int __kprobes read_mem_unaligned(unsigned long *dest, unsigned long ea,
- int nb, struct pt_regs *regs)
+static nokprobe_inline int
+read_mem_aligned(unsigned long *dest, unsigned long ea, int nb, struct pt_regs *regs)
{
int err;
- unsigned long x, b, c;
-#ifdef __LITTLE_ENDIAN__
- int len = nb; /* save a copy of the length for byte reversal */
-#endif
- /* unaligned, do this in pieces */
- x = 0;
+ if (is_kernel_addr(ea))
+ return __read_mem_aligned(dest, ea, nb, regs);
+
+ if (user_read_access_begin((void __user *)ea, nb)) {
+ err = __read_mem_aligned(dest, ea, nb, regs);
+ user_read_access_end();
+ } else {
+ err = -EFAULT;
+ regs->dar = ea;
+ }
+
+ return err;
+}
+
+/*
+ * Copy from userspace to a buffer, using the largest possible
+ * aligned accesses, up to sizeof(long).
+ */
+static __always_inline int __copy_mem_in(u8 *dest, unsigned long ea, int nb, struct pt_regs *regs)
+{
+ int c;
+
for (; nb > 0; nb -= c) {
-#ifdef __LITTLE_ENDIAN__
- c = 1;
-#endif
-#ifdef __BIG_ENDIAN__
c = max_align(ea);
-#endif
if (c > nb)
c = max_align(nb);
- err = read_mem_aligned(&b, ea, c);
- if (err)
- return err;
- x = (x << (8 * c)) + b;
- ea += c;
- }
-#ifdef __LITTLE_ENDIAN__
- switch (len) {
- case 2:
- *dest = byterev_2(x);
- break;
- case 4:
- *dest = byterev_4(x);
- break;
+ switch (c) {
+ case 1:
+ unsafe_get_user(*dest, (u8 __user *)ea, Efault);
+ break;
+ case 2:
+ unsafe_get_user(*(u16 *)dest, (u16 __user *)ea, Efault);
+ break;
+ case 4:
+ unsafe_get_user(*(u32 *)dest, (u32 __user *)ea, Efault);
+ break;
#ifdef __powerpc64__
- case 8:
- *dest = byterev_8(x);
- break;
+ case 8:
+ unsafe_get_user(*(u64 *)dest, (u64 __user *)ea, Efault);
+ break;
#endif
+ }
+ dest += c;
+ ea += c;
}
-#endif
-#ifdef __BIG_ENDIAN__
- *dest = x;
-#endif
return 0;
+
+Efault:
+ regs->dar = ea;
+ return -EFAULT;
+}
+
+static nokprobe_inline int copy_mem_in(u8 *dest, unsigned long ea, int nb, struct pt_regs *regs)
+{
+ int err;
+
+ if (is_kernel_addr(ea))
+ return __copy_mem_in(dest, ea, nb, regs);
+
+ if (user_read_access_begin((void __user *)ea, nb)) {
+ err = __copy_mem_in(dest, ea, nb, regs);
+ user_read_access_end();
+ } else {
+ err = -EFAULT;
+ regs->dar = ea;
+ }
+
+ return err;
+}
+
+static nokprobe_inline int read_mem_unaligned(unsigned long *dest,
+ unsigned long ea, int nb,
+ struct pt_regs *regs)
+{
+ union {
+ unsigned long ul;
+ u8 b[sizeof(unsigned long)];
+ } u;
+ int i;
+ int err;
+
+ u.ul = 0;
+ i = IS_BE ? sizeof(unsigned long) - nb : 0;
+ err = copy_mem_in(&u.b[i], ea, nb, regs);
+ if (!err)
+ *dest = u.ul;
+ return err;
}
/*
* Read memory at address ea for nb bytes, return 0 for success
- * or -EFAULT if an error occurred.
+ * or -EFAULT if an error occurred. N.B. nb must be 1, 2, 4 or 8.
+ * If nb < sizeof(long), the result is right-justified on BE systems.
*/
-static int __kprobes read_mem(unsigned long *dest, unsigned long ea, int nb,
+static int read_mem(unsigned long *dest, unsigned long ea, int nb,
struct pt_regs *regs)
{
if (!address_ok(regs, ea, nb))
return -EFAULT;
if ((ea & (nb - 1)) == 0)
- return read_mem_aligned(dest, ea, nb);
+ return read_mem_aligned(dest, ea, nb, regs);
return read_mem_unaligned(dest, ea, nb, regs);
}
+NOKPROBE_SYMBOL(read_mem);
-static int __kprobes write_mem_aligned(unsigned long val, unsigned long ea,
- int nb)
+static __always_inline int
+__write_mem_aligned(unsigned long val, unsigned long ea, int nb, struct pt_regs *regs)
{
- int err = 0;
-
switch (nb) {
case 1:
- err = __put_user(val, (unsigned char __user *) ea);
+ unsafe_put_user(val, (unsigned char __user *)ea, Efault);
break;
case 2:
- err = __put_user(val, (unsigned short __user *) ea);
+ unsafe_put_user(val, (unsigned short __user *)ea, Efault);
break;
case 4:
- err = __put_user(val, (unsigned int __user *) ea);
+ unsafe_put_user(val, (unsigned int __user *)ea, Efault);
break;
#ifdef __powerpc64__
case 8:
- err = __put_user(val, (unsigned long __user *) ea);
+ unsafe_put_user(val, (unsigned long __user *)ea, Efault);
break;
#endif
}
- return err;
+ return 0;
+
+Efault:
+ regs->dar = ea;
+ return -EFAULT;
}
-static int __kprobes write_mem_unaligned(unsigned long val, unsigned long ea,
- int nb, struct pt_regs *regs)
+static nokprobe_inline int
+write_mem_aligned(unsigned long val, unsigned long ea, int nb, struct pt_regs *regs)
{
int err;
- unsigned long c;
-#ifdef __LITTLE_ENDIAN__
- switch (nb) {
- case 2:
- val = byterev_2(val);
- break;
- case 4:
- val = byterev_4(val);
- break;
-#ifdef __powerpc64__
- case 8:
- val = byterev_8(val);
- break;
-#endif
+ if (is_kernel_addr(ea))
+ return __write_mem_aligned(val, ea, nb, regs);
+
+ if (user_write_access_begin((void __user *)ea, nb)) {
+ err = __write_mem_aligned(val, ea, nb, regs);
+ user_write_access_end();
+ } else {
+ err = -EFAULT;
+ regs->dar = ea;
}
-#endif
- /* unaligned or little-endian, do this in pieces */
+
+ return err;
+}
+
+/*
+ * Copy from a buffer to userspace, using the largest possible
+ * aligned accesses, up to sizeof(long).
+ */
+static __always_inline int __copy_mem_out(u8 *dest, unsigned long ea, int nb, struct pt_regs *regs)
+{
+ int c;
+
for (; nb > 0; nb -= c) {
-#ifdef __LITTLE_ENDIAN__
- c = 1;
-#endif
-#ifdef __BIG_ENDIAN__
c = max_align(ea);
-#endif
if (c > nb)
c = max_align(nb);
- err = write_mem_aligned(val >> (nb - c) * 8, ea, c);
- if (err)
- return err;
+ switch (c) {
+ case 1:
+ unsafe_put_user(*dest, (u8 __user *)ea, Efault);
+ break;
+ case 2:
+ unsafe_put_user(*(u16 *)dest, (u16 __user *)ea, Efault);
+ break;
+ case 4:
+ unsafe_put_user(*(u32 *)dest, (u32 __user *)ea, Efault);
+ break;
+#ifdef __powerpc64__
+ case 8:
+ unsafe_put_user(*(u64 *)dest, (u64 __user *)ea, Efault);
+ break;
+#endif
+ }
+ dest += c;
ea += c;
}
return 0;
+
+Efault:
+ regs->dar = ea;
+ return -EFAULT;
+}
+
+static nokprobe_inline int copy_mem_out(u8 *dest, unsigned long ea, int nb, struct pt_regs *regs)
+{
+ int err;
+
+ if (is_kernel_addr(ea))
+ return __copy_mem_out(dest, ea, nb, regs);
+
+ if (user_write_access_begin((void __user *)ea, nb)) {
+ err = __copy_mem_out(dest, ea, nb, regs);
+ user_write_access_end();
+ } else {
+ err = -EFAULT;
+ regs->dar = ea;
+ }
+
+ return err;
+}
+
+static nokprobe_inline int write_mem_unaligned(unsigned long val,
+ unsigned long ea, int nb,
+ struct pt_regs *regs)
+{
+ union {
+ unsigned long ul;
+ u8 b[sizeof(unsigned long)];
+ } u;
+ int i;
+
+ u.ul = val;
+ i = IS_BE ? sizeof(unsigned long) - nb : 0;
+ return copy_mem_out(&u.b[i], ea, nb, regs);
}
/*
* Write memory at address ea for nb bytes, return 0 for success
- * or -EFAULT if an error occurred.
+ * or -EFAULT if an error occurred. N.B. nb must be 1, 2, 4 or 8.
*/
-static int __kprobes write_mem(unsigned long val, unsigned long ea, int nb,
+static int write_mem(unsigned long val, unsigned long ea, int nb,
struct pt_regs *regs)
{
if (!address_ok(regs, ea, nb))
return -EFAULT;
if ((ea & (nb - 1)) == 0)
- return write_mem_aligned(val, ea, nb);
+ return write_mem_aligned(val, ea, nb, regs);
return write_mem_unaligned(val, ea, nb, regs);
}
+NOKPROBE_SYMBOL(write_mem);
#ifdef CONFIG_PPC_FPU
/*
- * Check the address and alignment, and call func to do the actual
- * load or store.
+ * These access either the real FP register or the image in the
+ * thread_struct, depending on regs->msr & MSR_FP.
*/
-static int __kprobes do_fp_load(int rn, int (*func)(int, unsigned long),
- unsigned long ea, int nb,
- struct pt_regs *regs)
+static int do_fp_load(struct instruction_op *op, unsigned long ea,
+ struct pt_regs *regs, bool cross_endian)
{
- int err;
+ int err, rn, nb;
union {
- double dbl;
- unsigned long ul[2];
- struct {
-#ifdef __BIG_ENDIAN__
- unsigned _pad_;
- unsigned word;
-#endif
-#ifdef __LITTLE_ENDIAN__
- unsigned word;
- unsigned _pad_;
-#endif
- } single;
- } data;
- unsigned long ptr;
-
+ int i;
+ unsigned int u;
+ float f;
+ double d[2];
+ unsigned long l[2];
+ u8 b[2 * sizeof(double)];
+ } u;
+
+ nb = GETSIZE(op->type);
+ if (nb > sizeof(u))
+ return -EINVAL;
if (!address_ok(regs, ea, nb))
return -EFAULT;
- if ((ea & 3) == 0)
- return (*func)(rn, ea);
- ptr = (unsigned long) &data.ul;
- if (sizeof(unsigned long) == 8 || nb == 4) {
- err = read_mem_unaligned(&data.ul[0], ea, nb, regs);
- if (nb == 4)
- ptr = (unsigned long)&(data.single.word);
- } else {
- /* reading a double on 32-bit */
- err = read_mem_unaligned(&data.ul[0], ea, 4, regs);
- if (!err)
- err = read_mem_unaligned(&data.ul[1], ea + 4, 4, regs);
- }
+ rn = op->reg;
+ err = copy_mem_in(u.b, ea, nb, regs);
if (err)
return err;
- return (*func)(rn, ptr);
+ if (unlikely(cross_endian)) {
+ do_byte_reverse(u.b, min(nb, 8));
+ if (nb == 16)
+ do_byte_reverse(&u.b[8], 8);
+ }
+ preempt_disable();
+ if (nb == 4) {
+ if (op->type & FPCONV)
+ conv_sp_to_dp(&u.f, &u.d[0]);
+ else if (op->type & SIGNEXT)
+ u.l[0] = u.i;
+ else
+ u.l[0] = u.u;
+ }
+ if (regs->msr & MSR_FP)
+ put_fpr(rn, &u.d[0]);
+ else
+ current->thread.TS_FPR(rn) = u.l[0];
+ if (nb == 16) {
+ /* lfdp */
+ rn |= 1;
+ if (regs->msr & MSR_FP)
+ put_fpr(rn, &u.d[1]);
+ else
+ current->thread.TS_FPR(rn) = u.l[1];
+ }
+ preempt_enable();
+ return 0;
}
+NOKPROBE_SYMBOL(do_fp_load);
-static int __kprobes do_fp_store(int rn, int (*func)(int, unsigned long),
- unsigned long ea, int nb,
- struct pt_regs *regs)
+static int do_fp_store(struct instruction_op *op, unsigned long ea,
+ struct pt_regs *regs, bool cross_endian)
{
- int err;
+ int rn, nb;
union {
- double dbl;
- unsigned long ul[2];
- struct {
-#ifdef __BIG_ENDIAN__
- unsigned _pad_;
- unsigned word;
-#endif
-#ifdef __LITTLE_ENDIAN__
- unsigned word;
- unsigned _pad_;
-#endif
- } single;
- } data;
- unsigned long ptr;
-
+ unsigned int u;
+ float f;
+ double d[2];
+ unsigned long l[2];
+ u8 b[2 * sizeof(double)];
+ } u;
+
+ nb = GETSIZE(op->type);
+ if (nb > sizeof(u))
+ return -EINVAL;
if (!address_ok(regs, ea, nb))
return -EFAULT;
- if ((ea & 3) == 0)
- return (*func)(rn, ea);
- ptr = (unsigned long) &data.ul[0];
- if (sizeof(unsigned long) == 8 || nb == 4) {
- if (nb == 4)
- ptr = (unsigned long)&(data.single.word);
- err = (*func)(rn, ptr);
- if (err)
- return err;
- err = write_mem_unaligned(data.ul[0], ea, nb, regs);
- } else {
- /* writing a double on 32-bit */
- err = (*func)(rn, ptr);
- if (err)
- return err;
- err = write_mem_unaligned(data.ul[0], ea, 4, regs);
- if (!err)
- err = write_mem_unaligned(data.ul[1], ea + 4, 4, regs);
+ rn = op->reg;
+ preempt_disable();
+ if (regs->msr & MSR_FP)
+ get_fpr(rn, &u.d[0]);
+ else
+ u.l[0] = current->thread.TS_FPR(rn);
+ if (nb == 4) {
+ if (op->type & FPCONV)
+ conv_dp_to_sp(&u.d[0], &u.f);
+ else
+ u.u = u.l[0];
}
- return err;
+ if (nb == 16) {
+ rn |= 1;
+ if (regs->msr & MSR_FP)
+ get_fpr(rn, &u.d[1]);
+ else
+ u.l[1] = current->thread.TS_FPR(rn);
+ }
+ preempt_enable();
+ if (unlikely(cross_endian)) {
+ do_byte_reverse(u.b, min(nb, 8));
+ if (nb == 16)
+ do_byte_reverse(&u.b[8], 8);
+ }
+ return copy_mem_out(u.b, ea, nb, regs);
}
+NOKPROBE_SYMBOL(do_fp_store);
#endif
#ifdef CONFIG_ALTIVEC
/* For Altivec/VMX, no need to worry about alignment */
-static int __kprobes do_vec_load(int rn, int (*func)(int, unsigned long),
- unsigned long ea, struct pt_regs *regs)
+static nokprobe_inline int do_vec_load(int rn, unsigned long ea,
+ int size, struct pt_regs *regs,
+ bool cross_endian)
{
+ int err;
+ union {
+ __vector128 v;
+ u8 b[sizeof(__vector128)];
+ } u = {};
+
+ if (size > sizeof(u))
+ return -EINVAL;
+
if (!address_ok(regs, ea & ~0xfUL, 16))
return -EFAULT;
- return (*func)(rn, ea);
+ /* align to multiple of size */
+ ea &= ~(size - 1);
+ err = copy_mem_in(&u.b[ea & 0xf], ea, size, regs);
+ if (err)
+ return err;
+ if (unlikely(cross_endian))
+ do_byte_reverse(&u.b[ea & 0xf], min_t(size_t, size, sizeof(u)));
+ preempt_disable();
+ if (regs->msr & MSR_VEC)
+ put_vr(rn, &u.v);
+ else
+ current->thread.vr_state.vr[rn] = u.v;
+ preempt_enable();
+ return 0;
}
-static int __kprobes do_vec_store(int rn, int (*func)(int, unsigned long),
- unsigned long ea, struct pt_regs *regs)
+static nokprobe_inline int do_vec_store(int rn, unsigned long ea,
+ int size, struct pt_regs *regs,
+ bool cross_endian)
{
+ union {
+ __vector128 v;
+ u8 b[sizeof(__vector128)];
+ } u;
+
+ if (size > sizeof(u))
+ return -EINVAL;
+
if (!address_ok(regs, ea & ~0xfUL, 16))
return -EFAULT;
- return (*func)(rn, ea);
+ /* align to multiple of size */
+ ea &= ~(size - 1);
+
+ preempt_disable();
+ if (regs->msr & MSR_VEC)
+ get_vr(rn, &u.v);
+ else
+ u.v = current->thread.vr_state.vr[rn];
+ preempt_enable();
+ if (unlikely(cross_endian))
+ do_byte_reverse(&u.b[ea & 0xf], min_t(size_t, size, sizeof(u)));
+ return copy_mem_out(&u.b[ea & 0xf], ea, size, regs);
}
#endif /* CONFIG_ALTIVEC */
-#ifdef CONFIG_VSX
-static int __kprobes do_vsx_load(int rn, int (*func)(int, unsigned long),
- unsigned long ea, struct pt_regs *regs)
+#ifdef __powerpc64__
+static nokprobe_inline int emulate_lq(struct pt_regs *regs, unsigned long ea,
+ int reg, bool cross_endian)
{
int err;
- unsigned long val[2];
if (!address_ok(regs, ea, 16))
return -EFAULT;
- if ((ea & 3) == 0)
- return (*func)(rn, ea);
- err = read_mem_unaligned(&val[0], ea, 8, regs);
- if (!err)
- err = read_mem_unaligned(&val[1], ea + 8, 8, regs);
- if (!err)
- err = (*func)(rn, (unsigned long) &val[0]);
+ /* if aligned, should be atomic */
+ if ((ea & 0xf) == 0) {
+ err = do_lq(ea, &regs->gpr[reg]);
+ } else {
+ err = read_mem(&regs->gpr[reg + IS_LE], ea, 8, regs);
+ if (!err)
+ err = read_mem(&regs->gpr[reg + IS_BE], ea + 8, 8, regs);
+ }
+ if (!err && unlikely(cross_endian))
+ do_byte_reverse(&regs->gpr[reg], 16);
return err;
}
-static int __kprobes do_vsx_store(int rn, int (*func)(int, unsigned long),
- unsigned long ea, struct pt_regs *regs)
+static nokprobe_inline int emulate_stq(struct pt_regs *regs, unsigned long ea,
+ int reg, bool cross_endian)
{
int err;
- unsigned long val[2];
+ unsigned long vals[2];
if (!address_ok(regs, ea, 16))
return -EFAULT;
- if ((ea & 3) == 0)
- return (*func)(rn, ea);
- err = (*func)(rn, (unsigned long) &val[0]);
- if (err)
- return err;
- err = write_mem_unaligned(val[0], ea, 8, regs);
+ vals[0] = regs->gpr[reg];
+ vals[1] = regs->gpr[reg + 1];
+ if (unlikely(cross_endian))
+ do_byte_reverse(vals, 16);
+
+ /* if aligned, should be atomic */
+ if ((ea & 0xf) == 0)
+ return do_stq(ea, vals[0], vals[1]);
+
+ err = write_mem(vals[IS_LE], ea, 8, regs);
if (!err)
- err = write_mem_unaligned(val[1], ea + 8, 8, regs);
+ err = write_mem(vals[IS_BE], ea + 8, 8, regs);
return err;
}
+#endif /* __powerpc64 */
+
+#ifdef CONFIG_VSX
+static nokprobe_inline void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg,
+ const void *mem, bool rev)
+{
+ int size, read_size;
+ int i, j;
+ const unsigned int *wp;
+ const unsigned short *hp;
+ const unsigned char *bp;
+
+ size = GETSIZE(op->type);
+ reg->d[0] = reg->d[1] = 0;
+
+ switch (op->element_size) {
+ case 32:
+ /* [p]lxvp[x] */
+ case 16:
+ /* whole vector; lxv[x] or lxvl[l] */
+ if (size == 0)
+ break;
+ memcpy(reg, mem, size);
+ if (IS_LE && (op->vsx_flags & VSX_LDLEFT))
+ rev = !rev;
+ if (rev)
+ do_byte_reverse(reg, size);
+ break;
+ case 8:
+ /* scalar loads, lxvd2x, lxvdsx */
+ read_size = (size >= 8) ? 8 : size;
+ i = IS_LE ? 8 : 8 - read_size;
+ memcpy(&reg->b[i], mem, read_size);
+ if (rev)
+ do_byte_reverse(&reg->b[i], 8);
+ if (size < 8) {
+ if (op->type & SIGNEXT) {
+ /* size == 4 is the only case here */
+ reg->d[IS_LE] = (signed int) reg->d[IS_LE];
+ } else if (op->vsx_flags & VSX_FPCONV) {
+ preempt_disable();
+ conv_sp_to_dp(&reg->fp[1 + IS_LE],
+ &reg->dp[IS_LE]);
+ preempt_enable();
+ }
+ } else {
+ if (size == 16) {
+ unsigned long v = *(unsigned long *)(mem + 8);
+ reg->d[IS_BE] = !rev ? v : byterev_8(v);
+ } else if (op->vsx_flags & VSX_SPLAT)
+ reg->d[IS_BE] = reg->d[IS_LE];
+ }
+ break;
+ case 4:
+ /* lxvw4x, lxvwsx */
+ wp = mem;
+ for (j = 0; j < size / 4; ++j) {
+ i = IS_LE ? 3 - j : j;
+ reg->w[i] = !rev ? *wp++ : byterev_4(*wp++);
+ }
+ if (op->vsx_flags & VSX_SPLAT) {
+ u32 val = reg->w[IS_LE ? 3 : 0];
+ for (; j < 4; ++j) {
+ i = IS_LE ? 3 - j : j;
+ reg->w[i] = val;
+ }
+ }
+ break;
+ case 2:
+ /* lxvh8x */
+ hp = mem;
+ for (j = 0; j < size / 2; ++j) {
+ i = IS_LE ? 7 - j : j;
+ reg->h[i] = !rev ? *hp++ : byterev_2(*hp++);
+ }
+ break;
+ case 1:
+ /* lxvb16x */
+ bp = mem;
+ for (j = 0; j < size; ++j) {
+ i = IS_LE ? 15 - j : j;
+ reg->b[i] = *bp++;
+ }
+ break;
+ }
+}
+
+static nokprobe_inline void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg,
+ void *mem, bool rev)
+{
+ int size, write_size;
+ int i, j;
+ union vsx_reg buf;
+ unsigned int *wp;
+ unsigned short *hp;
+ unsigned char *bp;
+
+ size = GETSIZE(op->type);
+
+ switch (op->element_size) {
+ case 32:
+ /* [p]stxvp[x] */
+ if (size == 0)
+ break;
+ if (rev) {
+ /* reverse 32 bytes */
+ union vsx_reg buf32[2];
+ buf32[0].d[0] = byterev_8(reg[1].d[1]);
+ buf32[0].d[1] = byterev_8(reg[1].d[0]);
+ buf32[1].d[0] = byterev_8(reg[0].d[1]);
+ buf32[1].d[1] = byterev_8(reg[0].d[0]);
+ memcpy(mem, buf32, size);
+ } else {
+ memcpy(mem, reg, size);
+ }
+ break;
+ case 16:
+ /* stxv, stxvx, stxvl, stxvll */
+ if (size == 0)
+ break;
+ if (IS_LE && (op->vsx_flags & VSX_LDLEFT))
+ rev = !rev;
+ if (rev) {
+ /* reverse 16 bytes */
+ buf.d[0] = byterev_8(reg->d[1]);
+ buf.d[1] = byterev_8(reg->d[0]);
+ reg = &buf;
+ }
+ memcpy(mem, reg, size);
+ break;
+ case 8:
+ /* scalar stores, stxvd2x */
+ write_size = (size >= 8) ? 8 : size;
+ i = IS_LE ? 8 : 8 - write_size;
+ if (size < 8 && op->vsx_flags & VSX_FPCONV) {
+ buf.d[0] = buf.d[1] = 0;
+ preempt_disable();
+ conv_dp_to_sp(&reg->dp[IS_LE], &buf.fp[1 + IS_LE]);
+ preempt_enable();
+ reg = &buf;
+ }
+ memcpy(mem, &reg->b[i], write_size);
+ if (size == 16)
+ memcpy(mem + 8, &reg->d[IS_BE], 8);
+ if (unlikely(rev)) {
+ do_byte_reverse(mem, write_size);
+ if (size == 16)
+ do_byte_reverse(mem + 8, 8);
+ }
+ break;
+ case 4:
+ /* stxvw4x */
+ wp = mem;
+ for (j = 0; j < size / 4; ++j) {
+ i = IS_LE ? 3 - j : j;
+ *wp++ = !rev ? reg->w[i] : byterev_4(reg->w[i]);
+ }
+ break;
+ case 2:
+ /* stxvh8x */
+ hp = mem;
+ for (j = 0; j < size / 2; ++j) {
+ i = IS_LE ? 7 - j : j;
+ *hp++ = !rev ? reg->h[i] : byterev_2(reg->h[i]);
+ }
+ break;
+ case 1:
+ /* stvxb16x */
+ bp = mem;
+ for (j = 0; j < size; ++j) {
+ i = IS_LE ? 15 - j : j;
+ *bp++ = reg->b[i];
+ }
+ break;
+ }
+}
+
+static nokprobe_inline int do_vsx_load(struct instruction_op *op,
+ unsigned long ea, struct pt_regs *regs,
+ bool cross_endian)
+{
+ int reg = op->reg;
+ int i, j, nr_vsx_regs;
+ u8 mem[32];
+ union vsx_reg buf[2];
+ int size = GETSIZE(op->type);
+
+ if (!address_ok(regs, ea, size) || copy_mem_in(mem, ea, size, regs))
+ return -EFAULT;
+
+ nr_vsx_regs = max(1ul, size / sizeof(__vector128));
+ emulate_vsx_load(op, buf, mem, cross_endian);
+ preempt_disable();
+ if (reg < 32) {
+ /* FP regs + extensions */
+ if (regs->msr & MSR_FP) {
+ for (i = 0; i < nr_vsx_regs; i++) {
+ j = IS_LE ? nr_vsx_regs - i - 1 : i;
+ load_vsrn(reg + i, &buf[j].v);
+ }
+ } else {
+ for (i = 0; i < nr_vsx_regs; i++) {
+ j = IS_LE ? nr_vsx_regs - i - 1 : i;
+ current->thread.fp_state.fpr[reg + i][0] = buf[j].d[0];
+ current->thread.fp_state.fpr[reg + i][1] = buf[j].d[1];
+ }
+ }
+ } else {
+ if (regs->msr & MSR_VEC) {
+ for (i = 0; i < nr_vsx_regs; i++) {
+ j = IS_LE ? nr_vsx_regs - i - 1 : i;
+ load_vsrn(reg + i, &buf[j].v);
+ }
+ } else {
+ for (i = 0; i < nr_vsx_regs; i++) {
+ j = IS_LE ? nr_vsx_regs - i - 1 : i;
+ current->thread.vr_state.vr[reg - 32 + i] = buf[j].v;
+ }
+ }
+ }
+ preempt_enable();
+ return 0;
+}
+
+static nokprobe_inline int do_vsx_store(struct instruction_op *op,
+ unsigned long ea, struct pt_regs *regs,
+ bool cross_endian)
+{
+ int reg = op->reg;
+ int i, j, nr_vsx_regs;
+ u8 mem[32];
+ union vsx_reg buf[2];
+ int size = GETSIZE(op->type);
+
+ if (!address_ok(regs, ea, size))
+ return -EFAULT;
+
+ nr_vsx_regs = max(1ul, size / sizeof(__vector128));
+ preempt_disable();
+ if (reg < 32) {
+ /* FP regs + extensions */
+ if (regs->msr & MSR_FP) {
+ for (i = 0; i < nr_vsx_regs; i++) {
+ j = IS_LE ? nr_vsx_regs - i - 1 : i;
+ store_vsrn(reg + i, &buf[j].v);
+ }
+ } else {
+ for (i = 0; i < nr_vsx_regs; i++) {
+ j = IS_LE ? nr_vsx_regs - i - 1 : i;
+ buf[j].d[0] = current->thread.fp_state.fpr[reg + i][0];
+ buf[j].d[1] = current->thread.fp_state.fpr[reg + i][1];
+ }
+ }
+ } else {
+ if (regs->msr & MSR_VEC) {
+ for (i = 0; i < nr_vsx_regs; i++) {
+ j = IS_LE ? nr_vsx_regs - i - 1 : i;
+ store_vsrn(reg + i, &buf[j].v);
+ }
+ } else {
+ for (i = 0; i < nr_vsx_regs; i++) {
+ j = IS_LE ? nr_vsx_regs - i - 1 : i;
+ buf[j].v = current->thread.vr_state.vr[reg - 32 + i];
+ }
+ }
+ }
+ preempt_enable();
+ emulate_vsx_store(op, buf, mem, cross_endian);
+ return copy_mem_out(mem, ea, size, regs);
+}
#endif /* CONFIG_VSX */
+static __always_inline int __emulate_dcbz(unsigned long ea)
+{
+ unsigned long i;
+ unsigned long size = l1_dcache_bytes();
+
+ for (i = 0; i < size; i += sizeof(long))
+ unsafe_put_user(0, (unsigned long __user *)(ea + i), Efault);
+
+ return 0;
+
+Efault:
+ return -EFAULT;
+}
+
+int emulate_dcbz(unsigned long ea, struct pt_regs *regs)
+{
+ int err;
+ unsigned long size = l1_dcache_bytes();
+
+ ea = truncate_if_32bit(regs->msr, ea);
+ ea &= ~(size - 1);
+ if (!address_ok(regs, ea, size))
+ return -EFAULT;
+
+ if (is_kernel_addr(ea)) {
+ err = __emulate_dcbz(ea);
+ } else if (user_write_access_begin((void __user *)ea, size)) {
+ err = __emulate_dcbz(ea);
+ user_write_access_end();
+ } else {
+ err = -EFAULT;
+ }
+
+ if (err)
+ regs->dar = ea;
+
+
+ return err;
+}
+NOKPROBE_SYMBOL(emulate_dcbz);
+
#define __put_user_asmx(x, addr, err, op, cr) \
__asm__ __volatile__( \
+ ".machine push\n" \
+ ".machine power8\n" \
"1: " op " %2,0,%3\n" \
+ ".machine pop\n" \
" mfcr %1\n" \
"2:\n" \
".section .fixup,\"ax\"\n" \
"3: li %0,%4\n" \
" b 2b\n" \
".previous\n" \
- ".section __ex_table,\"a\"\n" \
- PPC_LONG_ALIGN "\n" \
- PPC_LONG "1b,3b\n" \
- ".previous" \
+ EX_TABLE(1b, 3b) \
: "=r" (err), "=r" (cr) \
: "r" (x), "r" (addr), "i" (-EFAULT), "0" (err))
#define __get_user_asmx(x, addr, err, op) \
__asm__ __volatile__( \
+ ".machine push\n" \
+ ".machine power8\n" \
"1: "op" %1,0,%2\n" \
+ ".machine pop\n" \
"2:\n" \
".section .fixup,\"ax\"\n" \
"3: li %0,%3\n" \
" b 2b\n" \
".previous\n" \
- ".section __ex_table,\"a\"\n" \
- PPC_LONG_ALIGN "\n" \
- PPC_LONG "1b,3b\n" \
- ".previous" \
+ EX_TABLE(1b, 3b) \
: "=r" (err), "=r" (x) \
: "r" (addr), "i" (-EFAULT), "0" (err))
@@ -534,31 +1129,39 @@ static int __kprobes do_vsx_store(int rn, int (*func)(int, unsigned long),
"3: li %0,%3\n" \
" b 2b\n" \
".previous\n" \
- ".section __ex_table,\"a\"\n" \
- PPC_LONG_ALIGN "\n" \
- PPC_LONG "1b,3b\n" \
- ".previous" \
+ EX_TABLE(1b, 3b) \
: "=r" (err) \
: "r" (addr), "i" (-EFAULT), "0" (err))
-static void __kprobes set_cr0(struct pt_regs *regs, int rd)
+static nokprobe_inline void set_cr0(const struct pt_regs *regs,
+ struct instruction_op *op)
{
- long val = regs->gpr[rd];
+ long val = op->val;
- regs->ccr = (regs->ccr & 0x0fffffff) | ((regs->xer >> 3) & 0x10000000);
-#ifdef __powerpc64__
+ op->type |= SETCC;
+ op->ccval = (regs->ccr & 0x0fffffff) | ((regs->xer >> 3) & 0x10000000);
if (!(regs->msr & MSR_64BIT))
val = (int) val;
-#endif
if (val < 0)
- regs->ccr |= 0x80000000;
+ op->ccval |= 0x80000000;
else if (val > 0)
- regs->ccr |= 0x40000000;
+ op->ccval |= 0x40000000;
else
- regs->ccr |= 0x20000000;
+ op->ccval |= 0x20000000;
}
-static void __kprobes add_with_carry(struct pt_regs *regs, int rd,
+static nokprobe_inline void set_ca32(struct instruction_op *op, bool val)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ if (val)
+ op->xerval |= XER_CA32;
+ else
+ op->xerval &= ~XER_CA32;
+ }
+}
+
+static nokprobe_inline void add_with_carry(const struct pt_regs *regs,
+ struct instruction_op *op, int rd,
unsigned long val1, unsigned long val2,
unsigned long carry_in)
{
@@ -566,24 +1169,28 @@ static void __kprobes add_with_carry(struct pt_regs *regs, int rd,
if (carry_in)
++val;
- regs->gpr[rd] = val;
-#ifdef __powerpc64__
- if (!(regs->msr & MSR_64BIT)) {
- val = (unsigned int) val;
- val1 = (unsigned int) val1;
- }
-#endif
+ op->type = COMPUTE | SETREG | SETXER;
+ op->reg = rd;
+ op->val = val;
+ val = truncate_if_32bit(regs->msr, val);
+ val1 = truncate_if_32bit(regs->msr, val1);
+ op->xerval = regs->xer;
if (val < val1 || (carry_in && val == val1))
- regs->xer |= XER_CA;
+ op->xerval |= XER_CA;
else
- regs->xer &= ~XER_CA;
+ op->xerval &= ~XER_CA;
+
+ set_ca32(op, (unsigned int)val < (unsigned int)val1 ||
+ (carry_in && (unsigned int)val == (unsigned int)val1));
}
-static void __kprobes do_cmp_signed(struct pt_regs *regs, long v1, long v2,
- int crfld)
+static nokprobe_inline void do_cmp_signed(const struct pt_regs *regs,
+ struct instruction_op *op,
+ long v1, long v2, int crfld)
{
unsigned int crval, shift;
+ op->type = COMPUTE | SETCC;
crval = (regs->xer >> 31) & 1; /* get SO bit */
if (v1 < v2)
crval |= 8;
@@ -592,14 +1199,17 @@ static void __kprobes do_cmp_signed(struct pt_regs *regs, long v1, long v2,
else
crval |= 2;
shift = (7 - crfld) * 4;
- regs->ccr = (regs->ccr & ~(0xf << shift)) | (crval << shift);
+ op->ccval = (regs->ccr & ~(0xf << shift)) | (crval << shift);
}
-static void __kprobes do_cmp_unsigned(struct pt_regs *regs, unsigned long v1,
- unsigned long v2, int crfld)
+static nokprobe_inline void do_cmp_unsigned(const struct pt_regs *regs,
+ struct instruction_op *op,
+ unsigned long v1,
+ unsigned long v2, int crfld)
{
unsigned int crval, shift;
+ op->type = COMPUTE | SETCC;
crval = (regs->xer >> 31) & 1; /* get SO bit */
if (v1 < v2)
crval |= 8;
@@ -608,7 +1218,108 @@ static void __kprobes do_cmp_unsigned(struct pt_regs *regs, unsigned long v1,
else
crval |= 2;
shift = (7 - crfld) * 4;
- regs->ccr = (regs->ccr & ~(0xf << shift)) | (crval << shift);
+ op->ccval = (regs->ccr & ~(0xf << shift)) | (crval << shift);
+}
+
+static nokprobe_inline void do_cmpb(const struct pt_regs *regs,
+ struct instruction_op *op,
+ unsigned long v1, unsigned long v2)
+{
+ unsigned long long out_val, mask;
+ int i;
+
+ out_val = 0;
+ for (i = 0; i < 8; i++) {
+ mask = 0xffUL << (i * 8);
+ if ((v1 & mask) == (v2 & mask))
+ out_val |= mask;
+ }
+ op->val = out_val;
+}
+
+/*
+ * The size parameter is used to adjust the equivalent popcnt instruction.
+ * popcntb = 8, popcntw = 32, popcntd = 64
+ */
+static nokprobe_inline void do_popcnt(const struct pt_regs *regs,
+ struct instruction_op *op,
+ unsigned long v1, int size)
+{
+ unsigned long long out = v1;
+
+ out -= (out >> 1) & 0x5555555555555555ULL;
+ out = (0x3333333333333333ULL & out) +
+ (0x3333333333333333ULL & (out >> 2));
+ out = (out + (out >> 4)) & 0x0f0f0f0f0f0f0f0fULL;
+
+ if (size == 8) { /* popcntb */
+ op->val = out;
+ return;
+ }
+ out += out >> 8;
+ out += out >> 16;
+ if (size == 32) { /* popcntw */
+ op->val = out & 0x0000003f0000003fULL;
+ return;
+ }
+
+ out = (out + (out >> 32)) & 0x7f;
+ op->val = out; /* popcntd */
+}
+
+#ifdef CONFIG_PPC64
+static nokprobe_inline void do_bpermd(const struct pt_regs *regs,
+ struct instruction_op *op,
+ unsigned long v1, unsigned long v2)
+{
+ unsigned char perm, idx;
+ unsigned int i;
+
+ perm = 0;
+ for (i = 0; i < 8; i++) {
+ idx = (v1 >> (i * 8)) & 0xff;
+ if (idx < 64)
+ if (v2 & PPC_BIT(idx))
+ perm |= 1 << i;
+ }
+ op->val = perm;
+}
+#endif /* CONFIG_PPC64 */
+/*
+ * The size parameter adjusts the equivalent prty instruction.
+ * prtyw = 32, prtyd = 64
+ */
+static nokprobe_inline void do_prty(const struct pt_regs *regs,
+ struct instruction_op *op,
+ unsigned long v, int size)
+{
+ unsigned long long res = v ^ (v >> 8);
+
+ res ^= res >> 16;
+ if (size == 32) { /* prtyw */
+ op->val = res & 0x0000000100000001ULL;
+ return;
+ }
+
+ res ^= res >> 32;
+ op->val = res & 1; /*prtyd */
+}
+
+static nokprobe_inline int trap_compare(long v1, long v2)
+{
+ int ret = 0;
+
+ if (v1 < v2)
+ ret |= 0x10;
+ else if (v1 > v2)
+ ret |= 0x08;
+ else
+ ret |= 0x04;
+ if ((unsigned long)v1 < (unsigned long)v2)
+ ret |= 0x02;
+ else if ((unsigned long)v1 > (unsigned long)v2)
+ ret |= 0x01;
+ return ret;
}
/*
@@ -627,87 +1338,101 @@ static void __kprobes do_cmp_unsigned(struct pt_regs *regs, unsigned long v1,
#define ROTATE(x, n) ((n) ? (((x) << (n)) | ((x) >> (8 * sizeof(long) - (n)))) : (x))
/*
- * Emulate instructions that cause a transfer of control,
- * loads and stores, and a few other instructions.
- * Returns 1 if the step was emulated, 0 if not,
- * or -1 if the instruction is one that should not be stepped,
- * such as an rfid, or a mtmsrd that would clear MSR_RI.
+ * Decode an instruction, and return information about it in *op
+ * without changing *regs.
+ * Integer arithmetic and logical instructions, branches, and barrier
+ * instructions can be emulated just using the information in *op.
+ *
+ * Return value is 1 if the instruction can be emulated just by
+ * updating *regs with the information in *op, -1 if we need the
+ * GPRs but *regs doesn't contain the full register set, or 0
+ * otherwise.
*/
-int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
+int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
+ ppc_inst_t instr)
{
- unsigned int opcode, ra, rb, rd, spr, u;
+#ifdef CONFIG_PPC64
+ unsigned int suffixopcode, prefixtype, prefix_r;
+#endif
+ unsigned int opcode, ra, rb, rc, rd, spr, u;
unsigned long int imm;
unsigned long int val, val2;
- unsigned long int ea;
- unsigned int cr, mb, me, sh;
- int err;
- unsigned long old_ra, val3;
+ unsigned int mb, me, sh;
+ unsigned int word, suffix;
long ival;
- opcode = instr >> 26;
+ word = ppc_inst_val(instr);
+ suffix = ppc_inst_suffix(instr);
+
+ op->type = COMPUTE;
+
+ opcode = ppc_inst_primary_opcode(instr);
switch (opcode) {
case 16: /* bc */
- imm = (signed short)(instr & 0xfffc);
- if ((instr & 2) == 0)
+ op->type = BRANCH;
+ imm = (signed short)(word & 0xfffc);
+ if ((word & 2) == 0)
imm += regs->nip;
- regs->nip += 4;
- regs->nip = truncate_if_32bit(regs->msr, regs->nip);
- if (instr & 1)
- regs->link = regs->nip;
- if (branch_taken(instr, regs))
- regs->nip = truncate_if_32bit(regs->msr, imm);
+ op->val = truncate_if_32bit(regs->msr, imm);
+ if (word & 1)
+ op->type |= SETLK;
+ if (branch_taken(word, regs, op))
+ op->type |= BRTAKEN;
return 1;
-#ifdef CONFIG_PPC64
case 17: /* sc */
- /*
- * N.B. this uses knowledge about how the syscall
- * entry code works. If that is changed, this will
- * need to be changed also.
- */
- if (regs->gpr[0] == 0x1ebe &&
- cpu_has_feature(CPU_FTR_REAL_LE)) {
- regs->msr ^= MSR_LE;
- goto instr_done;
- }
- regs->gpr[9] = regs->gpr[13];
- regs->gpr[10] = MSR_KERNEL;
- regs->gpr[11] = regs->nip + 4;
- regs->gpr[12] = regs->msr & MSR_MASK;
- regs->gpr[13] = (unsigned long) get_paca();
- regs->nip = (unsigned long) &system_call_common;
- regs->msr = MSR_KERNEL;
- return 1;
-#endif
+ if ((word & 0xfe2) == 2)
+ op->type = SYSCALL;
+ else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) &&
+ (word & 0xfe3) == 1) { /* scv */
+ op->type = SYSCALL_VECTORED_0;
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ } else
+ op->type = UNKNOWN;
+ return 0;
case 18: /* b */
- imm = instr & 0x03fffffc;
+ op->type = BRANCH | BRTAKEN;
+ imm = word & 0x03fffffc;
if (imm & 0x02000000)
imm -= 0x04000000;
- if ((instr & 2) == 0)
+ if ((word & 2) == 0)
imm += regs->nip;
- if (instr & 1)
- regs->link = truncate_if_32bit(regs->msr, regs->nip + 4);
- imm = truncate_if_32bit(regs->msr, imm);
- regs->nip = imm;
+ op->val = truncate_if_32bit(regs->msr, imm);
+ if (word & 1)
+ op->type |= SETLK;
return 1;
case 19:
- switch ((instr >> 1) & 0x3ff) {
+ switch ((word >> 1) & 0x3ff) {
+ case 0: /* mcrf */
+ op->type = COMPUTE + SETCC;
+ rd = 7 - ((word >> 23) & 0x7);
+ ra = 7 - ((word >> 18) & 0x7);
+ rd *= 4;
+ ra *= 4;
+ val = (regs->ccr >> ra) & 0xf;
+ op->ccval = (regs->ccr & ~(0xfUL << rd)) | (val << rd);
+ return 1;
+
case 16: /* bclr */
case 528: /* bcctr */
- imm = (instr & 0x400)? regs->ctr: regs->link;
- regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4);
- imm = truncate_if_32bit(regs->msr, imm);
- if (instr & 1)
- regs->link = regs->nip;
- if (branch_taken(instr, regs))
- regs->nip = imm;
+ op->type = BRANCH;
+ imm = (word & 0x400)? regs->ctr: regs->link;
+ op->val = truncate_if_32bit(regs->msr, imm);
+ if (word & 1)
+ op->type |= SETLK;
+ if (branch_taken(word, regs, op))
+ op->type |= BRTAKEN;
return 1;
case 18: /* rfid, scary */
- return -1;
+ if (user_mode(regs))
+ goto priv;
+ op->type = RFI;
+ return 0;
case 150: /* isync */
- isync();
- goto instr_done;
+ op->type = BARRIER | BARRIER_ISYNC;
+ return 1;
case 33: /* crnor */
case 129: /* crandc */
@@ -717,272 +1442,394 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
case 289: /* creqv */
case 417: /* crorc */
case 449: /* cror */
- ra = (instr >> 16) & 0x1f;
- rb = (instr >> 11) & 0x1f;
- rd = (instr >> 21) & 0x1f;
+ op->type = COMPUTE + SETCC;
+ ra = (word >> 16) & 0x1f;
+ rb = (word >> 11) & 0x1f;
+ rd = (word >> 21) & 0x1f;
ra = (regs->ccr >> (31 - ra)) & 1;
rb = (regs->ccr >> (31 - rb)) & 1;
- val = (instr >> (6 + ra * 2 + rb)) & 1;
- regs->ccr = (regs->ccr & ~(1UL << (31 - rd))) |
+ val = (word >> (6 + ra * 2 + rb)) & 1;
+ op->ccval = (regs->ccr & ~(1UL << (31 - rd))) |
(val << (31 - rd));
- goto instr_done;
+ return 1;
}
break;
case 31:
- switch ((instr >> 1) & 0x3ff) {
+ switch ((word >> 1) & 0x3ff) {
case 598: /* sync */
+ op->type = BARRIER + BARRIER_SYNC;
#ifdef __powerpc64__
- switch ((instr >> 21) & 3) {
+ switch ((word >> 21) & 3) {
case 1: /* lwsync */
- asm volatile("lwsync" : : : "memory");
- goto instr_done;
+ op->type = BARRIER + BARRIER_LWSYNC;
+ break;
case 2: /* ptesync */
- asm volatile("ptesync" : : : "memory");
- goto instr_done;
+ op->type = BARRIER + BARRIER_PTESYNC;
+ break;
}
#endif
- mb();
- goto instr_done;
+ return 1;
case 854: /* eieio */
- eieio();
- goto instr_done;
+ op->type = BARRIER + BARRIER_EIEIO;
+ return 1;
}
break;
}
- /* Following cases refer to regs->gpr[], so we need all regs */
- if (!FULL_REGS(regs))
- return 0;
-
- rd = (instr >> 21) & 0x1f;
- ra = (instr >> 16) & 0x1f;
- rb = (instr >> 11) & 0x1f;
+ rd = (word >> 21) & 0x1f;
+ ra = (word >> 16) & 0x1f;
+ rb = (word >> 11) & 0x1f;
+ rc = (word >> 6) & 0x1f;
switch (opcode) {
+#ifdef __powerpc64__
+ case 1:
+ if (!cpu_has_feature(CPU_FTR_ARCH_31))
+ goto unknown_opcode;
+
+ prefix_r = GET_PREFIX_R(word);
+ ra = GET_PREFIX_RA(suffix);
+ rd = (suffix >> 21) & 0x1f;
+ op->reg = rd;
+ op->val = regs->gpr[rd];
+ suffixopcode = get_op(suffix);
+ prefixtype = (word >> 24) & 0x3;
+ switch (prefixtype) {
+ case 2:
+ if (prefix_r && ra)
+ return 0;
+ switch (suffixopcode) {
+ case 14: /* paddi */
+ op->type = COMPUTE | PREFIXED;
+ op->val = mlsd_8lsd_ea(word, suffix, regs);
+ goto compute_done;
+ }
+ }
+ break;
+ case 2: /* tdi */
+ if (rd & trap_compare(regs->gpr[ra], (short) word))
+ goto trap;
+ return 1;
+#endif
+ case 3: /* twi */
+ if (rd & trap_compare((int)regs->gpr[ra], (short) word))
+ goto trap;
+ return 1;
+
+#ifdef __powerpc64__
+ case 4:
+ /*
+ * There are very many instructions with this primary opcode
+ * introduced in the ISA as early as v2.03. However, the ones
+ * we currently emulate were all introduced with ISA 3.0
+ */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+
+ switch (word & 0x3f) {
+ case 48: /* maddhd */
+ asm volatile(PPC_MADDHD(%0, %1, %2, %3) :
+ "=r" (op->val) : "r" (regs->gpr[ra]),
+ "r" (regs->gpr[rb]), "r" (regs->gpr[rc]));
+ goto compute_done;
+
+ case 49: /* maddhdu */
+ asm volatile(PPC_MADDHDU(%0, %1, %2, %3) :
+ "=r" (op->val) : "r" (regs->gpr[ra]),
+ "r" (regs->gpr[rb]), "r" (regs->gpr[rc]));
+ goto compute_done;
+
+ case 51: /* maddld */
+ asm volatile(PPC_MADDLD(%0, %1, %2, %3) :
+ "=r" (op->val) : "r" (regs->gpr[ra]),
+ "r" (regs->gpr[rb]), "r" (regs->gpr[rc]));
+ goto compute_done;
+ }
+
+ /*
+ * There are other instructions from ISA 3.0 with the same
+ * primary opcode which do not have emulation support yet.
+ */
+ goto unknown_opcode;
+#endif
+
case 7: /* mulli */
- regs->gpr[rd] = regs->gpr[ra] * (short) instr;
- goto instr_done;
+ op->val = regs->gpr[ra] * (short) word;
+ goto compute_done;
case 8: /* subfic */
- imm = (short) instr;
- add_with_carry(regs, rd, ~regs->gpr[ra], imm, 1);
- goto instr_done;
+ imm = (short) word;
+ add_with_carry(regs, op, rd, ~regs->gpr[ra], imm, 1);
+ return 1;
case 10: /* cmpli */
- imm = (unsigned short) instr;
+ imm = (unsigned short) word;
val = regs->gpr[ra];
#ifdef __powerpc64__
if ((rd & 1) == 0)
val = (unsigned int) val;
#endif
- do_cmp_unsigned(regs, val, imm, rd >> 2);
- goto instr_done;
+ do_cmp_unsigned(regs, op, val, imm, rd >> 2);
+ return 1;
case 11: /* cmpi */
- imm = (short) instr;
+ imm = (short) word;
val = regs->gpr[ra];
#ifdef __powerpc64__
if ((rd & 1) == 0)
val = (int) val;
#endif
- do_cmp_signed(regs, val, imm, rd >> 2);
- goto instr_done;
+ do_cmp_signed(regs, op, val, imm, rd >> 2);
+ return 1;
case 12: /* addic */
- imm = (short) instr;
- add_with_carry(regs, rd, regs->gpr[ra], imm, 0);
- goto instr_done;
+ imm = (short) word;
+ add_with_carry(regs, op, rd, regs->gpr[ra], imm, 0);
+ return 1;
case 13: /* addic. */
- imm = (short) instr;
- add_with_carry(regs, rd, regs->gpr[ra], imm, 0);
- set_cr0(regs, rd);
- goto instr_done;
+ imm = (short) word;
+ add_with_carry(regs, op, rd, regs->gpr[ra], imm, 0);
+ set_cr0(regs, op);
+ return 1;
case 14: /* addi */
- imm = (short) instr;
+ imm = (short) word;
if (ra)
imm += regs->gpr[ra];
- regs->gpr[rd] = imm;
- goto instr_done;
+ op->val = imm;
+ goto compute_done;
case 15: /* addis */
- imm = ((short) instr) << 16;
+ imm = ((short) word) << 16;
if (ra)
imm += regs->gpr[ra];
- regs->gpr[rd] = imm;
- goto instr_done;
+ op->val = imm;
+ goto compute_done;
+
+ case 19:
+ if (((word >> 1) & 0x1f) == 2) {
+ /* addpcis */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ imm = (short) (word & 0xffc1); /* d0 + d2 fields */
+ imm |= (word >> 15) & 0x3e; /* d1 field */
+ op->val = regs->nip + (imm << 16) + 4;
+ goto compute_done;
+ }
+ op->type = UNKNOWN;
+ return 0;
case 20: /* rlwimi */
- mb = (instr >> 6) & 0x1f;
- me = (instr >> 1) & 0x1f;
+ mb = (word >> 6) & 0x1f;
+ me = (word >> 1) & 0x1f;
val = DATA32(regs->gpr[rd]);
imm = MASK32(mb, me);
- regs->gpr[ra] = (regs->gpr[ra] & ~imm) | (ROTATE(val, rb) & imm);
+ op->val = (regs->gpr[ra] & ~imm) | (ROTATE(val, rb) & imm);
goto logical_done;
case 21: /* rlwinm */
- mb = (instr >> 6) & 0x1f;
- me = (instr >> 1) & 0x1f;
+ mb = (word >> 6) & 0x1f;
+ me = (word >> 1) & 0x1f;
val = DATA32(regs->gpr[rd]);
- regs->gpr[ra] = ROTATE(val, rb) & MASK32(mb, me);
+ op->val = ROTATE(val, rb) & MASK32(mb, me);
goto logical_done;
case 23: /* rlwnm */
- mb = (instr >> 6) & 0x1f;
- me = (instr >> 1) & 0x1f;
+ mb = (word >> 6) & 0x1f;
+ me = (word >> 1) & 0x1f;
rb = regs->gpr[rb] & 0x1f;
val = DATA32(regs->gpr[rd]);
- regs->gpr[ra] = ROTATE(val, rb) & MASK32(mb, me);
+ op->val = ROTATE(val, rb) & MASK32(mb, me);
goto logical_done;
case 24: /* ori */
- imm = (unsigned short) instr;
- regs->gpr[ra] = regs->gpr[rd] | imm;
- goto instr_done;
+ op->val = regs->gpr[rd] | (unsigned short) word;
+ goto logical_done_nocc;
case 25: /* oris */
- imm = (unsigned short) instr;
- regs->gpr[ra] = regs->gpr[rd] | (imm << 16);
- goto instr_done;
+ imm = (unsigned short) word;
+ op->val = regs->gpr[rd] | (imm << 16);
+ goto logical_done_nocc;
case 26: /* xori */
- imm = (unsigned short) instr;
- regs->gpr[ra] = regs->gpr[rd] ^ imm;
- goto instr_done;
+ op->val = regs->gpr[rd] ^ (unsigned short) word;
+ goto logical_done_nocc;
case 27: /* xoris */
- imm = (unsigned short) instr;
- regs->gpr[ra] = regs->gpr[rd] ^ (imm << 16);
- goto instr_done;
+ imm = (unsigned short) word;
+ op->val = regs->gpr[rd] ^ (imm << 16);
+ goto logical_done_nocc;
case 28: /* andi. */
- imm = (unsigned short) instr;
- regs->gpr[ra] = regs->gpr[rd] & imm;
- set_cr0(regs, ra);
- goto instr_done;
+ op->val = regs->gpr[rd] & (unsigned short) word;
+ set_cr0(regs, op);
+ goto logical_done_nocc;
case 29: /* andis. */
- imm = (unsigned short) instr;
- regs->gpr[ra] = regs->gpr[rd] & (imm << 16);
- set_cr0(regs, ra);
- goto instr_done;
+ imm = (unsigned short) word;
+ op->val = regs->gpr[rd] & (imm << 16);
+ set_cr0(regs, op);
+ goto logical_done_nocc;
#ifdef __powerpc64__
case 30: /* rld* */
- mb = ((instr >> 6) & 0x1f) | (instr & 0x20);
+ mb = ((word >> 6) & 0x1f) | (word & 0x20);
val = regs->gpr[rd];
- if ((instr & 0x10) == 0) {
- sh = rb | ((instr & 2) << 4);
+ if ((word & 0x10) == 0) {
+ sh = rb | ((word & 2) << 4);
val = ROTATE(val, sh);
- switch ((instr >> 2) & 3) {
+ switch ((word >> 2) & 3) {
case 0: /* rldicl */
- regs->gpr[ra] = val & MASK64_L(mb);
- goto logical_done;
+ val &= MASK64_L(mb);
+ break;
case 1: /* rldicr */
- regs->gpr[ra] = val & MASK64_R(mb);
- goto logical_done;
+ val &= MASK64_R(mb);
+ break;
case 2: /* rldic */
- regs->gpr[ra] = val & MASK64(mb, 63 - sh);
- goto logical_done;
+ val &= MASK64(mb, 63 - sh);
+ break;
case 3: /* rldimi */
imm = MASK64(mb, 63 - sh);
- regs->gpr[ra] = (regs->gpr[ra] & ~imm) |
+ val = (regs->gpr[ra] & ~imm) |
(val & imm);
- goto logical_done;
}
+ op->val = val;
+ goto logical_done;
} else {
sh = regs->gpr[rb] & 0x3f;
val = ROTATE(val, sh);
- switch ((instr >> 1) & 7) {
+ switch ((word >> 1) & 7) {
case 0: /* rldcl */
- regs->gpr[ra] = val & MASK64_L(mb);
+ op->val = val & MASK64_L(mb);
goto logical_done;
case 1: /* rldcr */
- regs->gpr[ra] = val & MASK64_R(mb);
+ op->val = val & MASK64_R(mb);
goto logical_done;
}
}
#endif
+ op->type = UNKNOWN; /* illegal instruction */
+ return 0;
case 31:
- switch ((instr >> 1) & 0x3ff) {
+ /* isel occupies 32 minor opcodes */
+ if (((word >> 1) & 0x1f) == 15) {
+ mb = (word >> 6) & 0x1f; /* bc field */
+ val = (regs->ccr >> (31 - mb)) & 1;
+ val2 = (ra) ? regs->gpr[ra] : 0;
+
+ op->val = (val) ? val2 : regs->gpr[rb];
+ goto compute_done;
+ }
+
+ switch ((word >> 1) & 0x3ff) {
+ case 4: /* tw */
+ if (rd == 0x1f ||
+ (rd & trap_compare((int)regs->gpr[ra],
+ (int)regs->gpr[rb])))
+ goto trap;
+ return 1;
+#ifdef __powerpc64__
+ case 68: /* td */
+ if (rd & trap_compare(regs->gpr[ra], regs->gpr[rb]))
+ goto trap;
+ return 1;
+#endif
case 83: /* mfmsr */
- if (regs->msr & MSR_PR)
- break;
- regs->gpr[rd] = regs->msr & MSR_MASK;
- goto instr_done;
+ if (user_mode(regs))
+ goto priv;
+ op->type = MFMSR;
+ op->reg = rd;
+ return 0;
case 146: /* mtmsr */
- if (regs->msr & MSR_PR)
- break;
- imm = regs->gpr[rd];
- if ((imm & MSR_RI) == 0)
- /* can't step mtmsr that would clear MSR_RI */
- return -1;
- regs->msr = imm;
- goto instr_done;
+ if (user_mode(regs))
+ goto priv;
+ op->type = MTMSR;
+ op->reg = rd;
+ op->val = 0xffffffff & ~(MSR_ME | MSR_LE);
+ return 0;
#ifdef CONFIG_PPC64
case 178: /* mtmsrd */
+ if (user_mode(regs))
+ goto priv;
+ op->type = MTMSR;
+ op->reg = rd;
/* only MSR_EE and MSR_RI get changed if bit 15 set */
- /* mtmsrd doesn't change MSR_HV and MSR_ME */
- if (regs->msr & MSR_PR)
- break;
- imm = (instr & 0x10000)? 0x8002: 0xefffffffffffefffUL;
- imm = (regs->msr & MSR_MASK & ~imm)
- | (regs->gpr[rd] & imm);
- if ((imm & MSR_RI) == 0)
- /* can't step mtmsrd that would clear MSR_RI */
- return -1;
- regs->msr = imm;
- goto instr_done;
+ /* mtmsrd doesn't change MSR_HV, MSR_ME or MSR_LE */
+ imm = (word & 0x10000)? 0x8002: 0xefffffffffffeffeUL;
+ op->val = imm;
+ return 0;
#endif
+
case 19: /* mfcr */
- regs->gpr[rd] = regs->ccr;
- regs->gpr[rd] &= 0xffffffffUL;
- goto instr_done;
+ imm = 0xffffffffUL;
+ if ((word >> 20) & 1) {
+ imm = 0xf0000000UL;
+ for (sh = 0; sh < 8; ++sh) {
+ if (word & (0x80000 >> sh))
+ break;
+ imm >>= 4;
+ }
+ }
+ op->val = regs->ccr & imm;
+ goto compute_done;
+
+ case 128: /* setb */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ /*
+ * 'ra' encodes the CR field number (bfa) in the top 3 bits.
+ * Since each CR field is 4 bits,
+ * we can simply mask off the bottom two bits (bfa * 4)
+ * to yield the first bit in the CR field.
+ */
+ ra = ra & ~0x3;
+ /* 'val' stores bits of the CR field (bfa) */
+ val = regs->ccr >> (CR0_SHIFT - ra);
+ /* checks if the LT bit of CR field (bfa) is set */
+ if (val & 8)
+ op->val = -1;
+ /* checks if the GT bit of CR field (bfa) is set */
+ else if (val & 4)
+ op->val = 1;
+ else
+ op->val = 0;
+ goto compute_done;
case 144: /* mtcrf */
+ op->type = COMPUTE + SETCC;
imm = 0xf0000000UL;
val = regs->gpr[rd];
+ op->ccval = regs->ccr;
for (sh = 0; sh < 8; ++sh) {
- if (instr & (0x80000 >> sh))
- regs->ccr = (regs->ccr & ~imm) |
+ if (word & (0x80000 >> sh))
+ op->ccval = (op->ccval & ~imm) |
(val & imm);
imm >>= 4;
}
- goto instr_done;
+ return 1;
case 339: /* mfspr */
- spr = (instr >> 11) & 0x3ff;
- switch (spr) {
- case 0x20: /* mfxer */
- regs->gpr[rd] = regs->xer;
- regs->gpr[rd] &= 0xffffffffUL;
- goto instr_done;
- case 0x100: /* mflr */
- regs->gpr[rd] = regs->link;
- goto instr_done;
- case 0x120: /* mfctr */
- regs->gpr[rd] = regs->ctr;
- goto instr_done;
- }
- break;
+ spr = ((word >> 16) & 0x1f) | ((word >> 6) & 0x3e0);
+ op->type = MFSPR;
+ op->reg = rd;
+ op->spr = spr;
+ if (spr == SPRN_XER || spr == SPRN_LR ||
+ spr == SPRN_CTR)
+ return 1;
+ return 0;
case 467: /* mtspr */
- spr = (instr >> 11) & 0x3ff;
- switch (spr) {
- case 0x20: /* mtxer */
- regs->xer = (regs->gpr[rd] & 0xffffffffUL);
- goto instr_done;
- case 0x100: /* mtlr */
- regs->link = regs->gpr[rd];
- goto instr_done;
- case 0x120: /* mtctr */
- regs->ctr = regs->gpr[rd];
- goto instr_done;
- }
- break;
+ spr = ((word >> 16) & 0x1f) | ((word >> 6) & 0x3e0);
+ op->type = MTSPR;
+ op->val = regs->gpr[rd];
+ op->spr = spr;
+ if (spr == SPRN_XER || spr == SPRN_LR ||
+ spr == SPRN_CTR)
+ return 1;
+ return 0;
/*
* Compare instructions
@@ -997,8 +1844,8 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
val2 = (int) val2;
}
#endif
- do_cmp_signed(regs, val, val2, rd >> 2);
- goto instr_done;
+ do_cmp_signed(regs, op, val, val2, rd >> 2);
+ return 1;
case 32: /* cmpl */
val = regs->gpr[ra];
@@ -1010,168 +1857,271 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
val2 = (unsigned int) val2;
}
#endif
- do_cmp_unsigned(regs, val, val2, rd >> 2);
- goto instr_done;
+ do_cmp_unsigned(regs, op, val, val2, rd >> 2);
+ return 1;
+
+ case 508: /* cmpb */
+ do_cmpb(regs, op, regs->gpr[rd], regs->gpr[rb]);
+ goto logical_done_nocc;
/*
* Arithmetic instructions
*/
case 8: /* subfc */
- add_with_carry(regs, rd, ~regs->gpr[ra],
+ add_with_carry(regs, op, rd, ~regs->gpr[ra],
regs->gpr[rb], 1);
goto arith_done;
#ifdef __powerpc64__
case 9: /* mulhdu */
- asm("mulhdu %0,%1,%2" : "=r" (regs->gpr[rd]) :
+ asm("mulhdu %0,%1,%2" : "=r" (op->val) :
"r" (regs->gpr[ra]), "r" (regs->gpr[rb]));
goto arith_done;
#endif
case 10: /* addc */
- add_with_carry(regs, rd, regs->gpr[ra],
+ add_with_carry(regs, op, rd, regs->gpr[ra],
regs->gpr[rb], 0);
goto arith_done;
case 11: /* mulhwu */
- asm("mulhwu %0,%1,%2" : "=r" (regs->gpr[rd]) :
+ asm("mulhwu %0,%1,%2" : "=r" (op->val) :
"r" (regs->gpr[ra]), "r" (regs->gpr[rb]));
goto arith_done;
case 40: /* subf */
- regs->gpr[rd] = regs->gpr[rb] - regs->gpr[ra];
+ op->val = regs->gpr[rb] - regs->gpr[ra];
goto arith_done;
#ifdef __powerpc64__
case 73: /* mulhd */
- asm("mulhd %0,%1,%2" : "=r" (regs->gpr[rd]) :
+ asm("mulhd %0,%1,%2" : "=r" (op->val) :
"r" (regs->gpr[ra]), "r" (regs->gpr[rb]));
goto arith_done;
#endif
case 75: /* mulhw */
- asm("mulhw %0,%1,%2" : "=r" (regs->gpr[rd]) :
+ asm("mulhw %0,%1,%2" : "=r" (op->val) :
"r" (regs->gpr[ra]), "r" (regs->gpr[rb]));
goto arith_done;
case 104: /* neg */
- regs->gpr[rd] = -regs->gpr[ra];
+ op->val = -regs->gpr[ra];
goto arith_done;
case 136: /* subfe */
- add_with_carry(regs, rd, ~regs->gpr[ra], regs->gpr[rb],
- regs->xer & XER_CA);
+ add_with_carry(regs, op, rd, ~regs->gpr[ra],
+ regs->gpr[rb], regs->xer & XER_CA);
goto arith_done;
case 138: /* adde */
- add_with_carry(regs, rd, regs->gpr[ra], regs->gpr[rb],
- regs->xer & XER_CA);
+ add_with_carry(regs, op, rd, regs->gpr[ra],
+ regs->gpr[rb], regs->xer & XER_CA);
goto arith_done;
case 200: /* subfze */
- add_with_carry(regs, rd, ~regs->gpr[ra], 0L,
+ add_with_carry(regs, op, rd, ~regs->gpr[ra], 0L,
regs->xer & XER_CA);
goto arith_done;
case 202: /* addze */
- add_with_carry(regs, rd, regs->gpr[ra], 0L,
+ add_with_carry(regs, op, rd, regs->gpr[ra], 0L,
regs->xer & XER_CA);
goto arith_done;
case 232: /* subfme */
- add_with_carry(regs, rd, ~regs->gpr[ra], -1L,
+ add_with_carry(regs, op, rd, ~regs->gpr[ra], -1L,
regs->xer & XER_CA);
goto arith_done;
#ifdef __powerpc64__
case 233: /* mulld */
- regs->gpr[rd] = regs->gpr[ra] * regs->gpr[rb];
+ op->val = regs->gpr[ra] * regs->gpr[rb];
goto arith_done;
#endif
case 234: /* addme */
- add_with_carry(regs, rd, regs->gpr[ra], -1L,
+ add_with_carry(regs, op, rd, regs->gpr[ra], -1L,
regs->xer & XER_CA);
goto arith_done;
case 235: /* mullw */
- regs->gpr[rd] = (unsigned int) regs->gpr[ra] *
- (unsigned int) regs->gpr[rb];
- goto arith_done;
+ op->val = (long)(int) regs->gpr[ra] *
+ (int) regs->gpr[rb];
+ goto arith_done;
+#ifdef __powerpc64__
+ case 265: /* modud */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->val = regs->gpr[ra] % regs->gpr[rb];
+ goto compute_done;
+#endif
case 266: /* add */
- regs->gpr[rd] = regs->gpr[ra] + regs->gpr[rb];
+ op->val = regs->gpr[ra] + regs->gpr[rb];
goto arith_done;
+
+ case 267: /* moduw */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->val = (unsigned int) regs->gpr[ra] %
+ (unsigned int) regs->gpr[rb];
+ goto compute_done;
#ifdef __powerpc64__
case 457: /* divdu */
- regs->gpr[rd] = regs->gpr[ra] / regs->gpr[rb];
+ op->val = regs->gpr[ra] / regs->gpr[rb];
goto arith_done;
#endif
case 459: /* divwu */
- regs->gpr[rd] = (unsigned int) regs->gpr[ra] /
+ op->val = (unsigned int) regs->gpr[ra] /
(unsigned int) regs->gpr[rb];
goto arith_done;
#ifdef __powerpc64__
case 489: /* divd */
- regs->gpr[rd] = (long int) regs->gpr[ra] /
+ op->val = (long int) regs->gpr[ra] /
(long int) regs->gpr[rb];
goto arith_done;
#endif
case 491: /* divw */
- regs->gpr[rd] = (int) regs->gpr[ra] /
+ op->val = (int) regs->gpr[ra] /
(int) regs->gpr[rb];
goto arith_done;
+#ifdef __powerpc64__
+ case 425: /* divde[.] */
+ asm volatile(PPC_DIVDE(%0, %1, %2) :
+ "=r" (op->val) : "r" (regs->gpr[ra]),
+ "r" (regs->gpr[rb]));
+ goto arith_done;
+ case 393: /* divdeu[.] */
+ asm volatile(PPC_DIVDEU(%0, %1, %2) :
+ "=r" (op->val) : "r" (regs->gpr[ra]),
+ "r" (regs->gpr[rb]));
+ goto arith_done;
+#endif
+ case 755: /* darn */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ switch (ra & 0x3) {
+ case 0:
+ /* 32-bit conditioned */
+ asm volatile(PPC_DARN(%0, 0) : "=r" (op->val));
+ goto compute_done;
+
+ case 1:
+ /* 64-bit conditioned */
+ asm volatile(PPC_DARN(%0, 1) : "=r" (op->val));
+ goto compute_done;
+
+ case 2:
+ /* 64-bit raw */
+ asm volatile(PPC_DARN(%0, 2) : "=r" (op->val));
+ goto compute_done;
+ }
+
+ goto unknown_opcode;
+#ifdef __powerpc64__
+ case 777: /* modsd */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->val = (long int) regs->gpr[ra] %
+ (long int) regs->gpr[rb];
+ goto compute_done;
+#endif
+ case 779: /* modsw */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->val = (int) regs->gpr[ra] %
+ (int) regs->gpr[rb];
+ goto compute_done;
/*
* Logical instructions
*/
case 26: /* cntlzw */
- asm("cntlzw %0,%1" : "=r" (regs->gpr[ra]) :
- "r" (regs->gpr[rd]));
+ val = (unsigned int) regs->gpr[rd];
+ op->val = ( val ? __builtin_clz(val) : 32 );
goto logical_done;
#ifdef __powerpc64__
case 58: /* cntlzd */
- asm("cntlzd %0,%1" : "=r" (regs->gpr[ra]) :
- "r" (regs->gpr[rd]));
+ val = regs->gpr[rd];
+ op->val = ( val ? __builtin_clzl(val) : 64 );
goto logical_done;
#endif
case 28: /* and */
- regs->gpr[ra] = regs->gpr[rd] & regs->gpr[rb];
+ op->val = regs->gpr[rd] & regs->gpr[rb];
goto logical_done;
case 60: /* andc */
- regs->gpr[ra] = regs->gpr[rd] & ~regs->gpr[rb];
+ op->val = regs->gpr[rd] & ~regs->gpr[rb];
goto logical_done;
+ case 122: /* popcntb */
+ do_popcnt(regs, op, regs->gpr[rd], 8);
+ goto logical_done_nocc;
+
case 124: /* nor */
- regs->gpr[ra] = ~(regs->gpr[rd] | regs->gpr[rb]);
+ op->val = ~(regs->gpr[rd] | regs->gpr[rb]);
goto logical_done;
+ case 154: /* prtyw */
+ do_prty(regs, op, regs->gpr[rd], 32);
+ goto logical_done_nocc;
+
+ case 186: /* prtyd */
+ do_prty(regs, op, regs->gpr[rd], 64);
+ goto logical_done_nocc;
+#ifdef CONFIG_PPC64
+ case 252: /* bpermd */
+ do_bpermd(regs, op, regs->gpr[rd], regs->gpr[rb]);
+ goto logical_done_nocc;
+#endif
case 284: /* xor */
- regs->gpr[ra] = ~(regs->gpr[rd] ^ regs->gpr[rb]);
+ op->val = ~(regs->gpr[rd] ^ regs->gpr[rb]);
goto logical_done;
case 316: /* xor */
- regs->gpr[ra] = regs->gpr[rd] ^ regs->gpr[rb];
+ op->val = regs->gpr[rd] ^ regs->gpr[rb];
goto logical_done;
+ case 378: /* popcntw */
+ do_popcnt(regs, op, regs->gpr[rd], 32);
+ goto logical_done_nocc;
+
case 412: /* orc */
- regs->gpr[ra] = regs->gpr[rd] | ~regs->gpr[rb];
+ op->val = regs->gpr[rd] | ~regs->gpr[rb];
goto logical_done;
case 444: /* or */
- regs->gpr[ra] = regs->gpr[rd] | regs->gpr[rb];
+ op->val = regs->gpr[rd] | regs->gpr[rb];
goto logical_done;
case 476: /* nand */
- regs->gpr[ra] = ~(regs->gpr[rd] & regs->gpr[rb]);
+ op->val = ~(regs->gpr[rd] & regs->gpr[rb]);
goto logical_done;
-
+#ifdef CONFIG_PPC64
+ case 506: /* popcntd */
+ do_popcnt(regs, op, regs->gpr[rd], 64);
+ goto logical_done_nocc;
+#endif
+ case 538: /* cnttzw */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ val = (unsigned int) regs->gpr[rd];
+ op->val = (val ? __builtin_ctz(val) : 32);
+ goto logical_done;
+#ifdef __powerpc64__
+ case 570: /* cnttzd */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ val = regs->gpr[rd];
+ op->val = (val ? __builtin_ctzl(val) : 64);
+ goto logical_done;
+#endif
case 922: /* extsh */
- regs->gpr[ra] = (signed short) regs->gpr[rd];
+ op->val = (signed short) regs->gpr[rd];
goto logical_done;
case 954: /* extsb */
- regs->gpr[ra] = (signed char) regs->gpr[rd];
+ op->val = (signed char) regs->gpr[rd];
goto logical_done;
#ifdef __powerpc64__
case 986: /* extsw */
- regs->gpr[ra] = (signed int) regs->gpr[rd];
+ op->val = (signed int) regs->gpr[rd];
goto logical_done;
#endif
@@ -1181,370 +2131,621 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
case 24: /* slw */
sh = regs->gpr[rb] & 0x3f;
if (sh < 32)
- regs->gpr[ra] = (regs->gpr[rd] << sh) & 0xffffffffUL;
+ op->val = (regs->gpr[rd] << sh) & 0xffffffffUL;
else
- regs->gpr[ra] = 0;
+ op->val = 0;
goto logical_done;
case 536: /* srw */
sh = regs->gpr[rb] & 0x3f;
if (sh < 32)
- regs->gpr[ra] = (regs->gpr[rd] & 0xffffffffUL) >> sh;
+ op->val = (regs->gpr[rd] & 0xffffffffUL) >> sh;
else
- regs->gpr[ra] = 0;
+ op->val = 0;
goto logical_done;
case 792: /* sraw */
+ op->type = COMPUTE + SETREG + SETXER;
sh = regs->gpr[rb] & 0x3f;
ival = (signed int) regs->gpr[rd];
- regs->gpr[ra] = ival >> (sh < 32 ? sh : 31);
+ op->val = ival >> (sh < 32 ? sh : 31);
+ op->xerval = regs->xer;
if (ival < 0 && (sh >= 32 || (ival & ((1ul << sh) - 1)) != 0))
- regs->xer |= XER_CA;
+ op->xerval |= XER_CA;
else
- regs->xer &= ~XER_CA;
+ op->xerval &= ~XER_CA;
+ set_ca32(op, op->xerval & XER_CA);
goto logical_done;
case 824: /* srawi */
+ op->type = COMPUTE + SETREG + SETXER;
sh = rb;
ival = (signed int) regs->gpr[rd];
- regs->gpr[ra] = ival >> sh;
+ op->val = ival >> sh;
+ op->xerval = regs->xer;
if (ival < 0 && (ival & ((1ul << sh) - 1)) != 0)
- regs->xer |= XER_CA;
+ op->xerval |= XER_CA;
else
- regs->xer &= ~XER_CA;
+ op->xerval &= ~XER_CA;
+ set_ca32(op, op->xerval & XER_CA);
goto logical_done;
#ifdef __powerpc64__
case 27: /* sld */
sh = regs->gpr[rb] & 0x7f;
if (sh < 64)
- regs->gpr[ra] = regs->gpr[rd] << sh;
+ op->val = regs->gpr[rd] << sh;
else
- regs->gpr[ra] = 0;
+ op->val = 0;
goto logical_done;
case 539: /* srd */
sh = regs->gpr[rb] & 0x7f;
if (sh < 64)
- regs->gpr[ra] = regs->gpr[rd] >> sh;
+ op->val = regs->gpr[rd] >> sh;
else
- regs->gpr[ra] = 0;
+ op->val = 0;
goto logical_done;
case 794: /* srad */
+ op->type = COMPUTE + SETREG + SETXER;
sh = regs->gpr[rb] & 0x7f;
ival = (signed long int) regs->gpr[rd];
- regs->gpr[ra] = ival >> (sh < 64 ? sh : 63);
+ op->val = ival >> (sh < 64 ? sh : 63);
+ op->xerval = regs->xer;
if (ival < 0 && (sh >= 64 || (ival & ((1ul << sh) - 1)) != 0))
- regs->xer |= XER_CA;
+ op->xerval |= XER_CA;
else
- regs->xer &= ~XER_CA;
+ op->xerval &= ~XER_CA;
+ set_ca32(op, op->xerval & XER_CA);
goto logical_done;
case 826: /* sradi with sh_5 = 0 */
case 827: /* sradi with sh_5 = 1 */
- sh = rb | ((instr & 2) << 4);
+ op->type = COMPUTE + SETREG + SETXER;
+ sh = rb | ((word & 2) << 4);
ival = (signed long int) regs->gpr[rd];
- regs->gpr[ra] = ival >> sh;
+ op->val = ival >> sh;
+ op->xerval = regs->xer;
if (ival < 0 && (ival & ((1ul << sh) - 1)) != 0)
- regs->xer |= XER_CA;
+ op->xerval |= XER_CA;
else
- regs->xer &= ~XER_CA;
+ op->xerval &= ~XER_CA;
+ set_ca32(op, op->xerval & XER_CA);
goto logical_done;
+
+ case 890: /* extswsli with sh_5 = 0 */
+ case 891: /* extswsli with sh_5 = 1 */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->type = COMPUTE + SETREG;
+ sh = rb | ((word & 2) << 4);
+ val = (signed int) regs->gpr[rd];
+ if (sh)
+ op->val = ROTATE(val, sh) & MASK64(0, 63 - sh);
+ else
+ op->val = val;
+ goto logical_done;
+
#endif /* __powerpc64__ */
/*
* Cache instructions
*/
case 54: /* dcbst */
- ea = xform_ea(instr, regs, 0);
- if (!address_ok(regs, ea, 8))
- return 0;
- err = 0;
- __cacheop_user_asmx(ea, err, "dcbst");
- if (err)
- return 0;
- goto instr_done;
+ op->type = MKOP(CACHEOP, DCBST, 0);
+ op->ea = xform_ea(word, regs);
+ return 0;
case 86: /* dcbf */
- ea = xform_ea(instr, regs, 0);
- if (!address_ok(regs, ea, 8))
- return 0;
- err = 0;
- __cacheop_user_asmx(ea, err, "dcbf");
- if (err)
- return 0;
- goto instr_done;
+ op->type = MKOP(CACHEOP, DCBF, 0);
+ op->ea = xform_ea(word, regs);
+ return 0;
case 246: /* dcbtst */
- if (rd == 0) {
- ea = xform_ea(instr, regs, 0);
- prefetchw((void *) ea);
- }
- goto instr_done;
+ op->type = MKOP(CACHEOP, DCBTST, 0);
+ op->ea = xform_ea(word, regs);
+ op->reg = rd;
+ return 0;
case 278: /* dcbt */
- if (rd == 0) {
- ea = xform_ea(instr, regs, 0);
- prefetch((void *) ea);
- }
- goto instr_done;
+ op->type = MKOP(CACHEOP, DCBTST, 0);
+ op->ea = xform_ea(word, regs);
+ op->reg = rd;
+ return 0;
+ case 982: /* icbi */
+ op->type = MKOP(CACHEOP, ICBI, 0);
+ op->ea = xform_ea(word, regs);
+ return 0;
+
+ case 1014: /* dcbz */
+ op->type = MKOP(CACHEOP, DCBZ, 0);
+ op->ea = xform_ea(word, regs);
+ return 0;
}
break;
}
- /*
- * Following cases are for loads and stores, so bail out
- * if we're in little-endian mode.
- */
- if (regs->msr & MSR_LE)
- return 0;
-
- /*
- * Save register RA in case it's an update form load or store
- * and the access faults.
- */
- old_ra = regs->gpr[ra];
+/*
+ * Loads and stores.
+ */
+ op->type = UNKNOWN;
+ op->update_reg = ra;
+ op->reg = rd;
+ op->val = regs->gpr[rd];
+ u = (word >> 20) & UPDATE;
+ op->vsx_flags = 0;
switch (opcode) {
case 31:
- u = instr & 0x40;
- switch ((instr >> 1) & 0x3ff) {
+ u = word & UPDATE;
+ op->ea = xform_ea(word, regs);
+ switch ((word >> 1) & 0x3ff) {
case 20: /* lwarx */
- ea = xform_ea(instr, regs, 0);
- if (ea & 3)
- break; /* can't handle misaligned */
- err = -EFAULT;
- if (!address_ok(regs, ea, 4))
- goto ldst_done;
- err = 0;
- __get_user_asmx(val, ea, err, "lwarx");
- if (!err)
- regs->gpr[rd] = val;
- goto ldst_done;
+ op->type = MKOP(LARX, 0, 4);
+ break;
case 150: /* stwcx. */
- ea = xform_ea(instr, regs, 0);
- if (ea & 3)
- break; /* can't handle misaligned */
- err = -EFAULT;
- if (!address_ok(regs, ea, 4))
- goto ldst_done;
- err = 0;
- __put_user_asmx(regs->gpr[rd], ea, err, "stwcx.", cr);
- if (!err)
- regs->ccr = (regs->ccr & 0x0fffffff) |
- (cr & 0xe0000000) |
- ((regs->xer >> 3) & 0x10000000);
- goto ldst_done;
+ op->type = MKOP(STCX, 0, 4);
+ break;
+#ifdef CONFIG_PPC_HAS_LBARX_LHARX
+ case 52: /* lbarx */
+ op->type = MKOP(LARX, 0, 1);
+ break;
+
+ case 694: /* stbcx. */
+ op->type = MKOP(STCX, 0, 1);
+ break;
+
+ case 116: /* lharx */
+ op->type = MKOP(LARX, 0, 2);
+ break;
+
+ case 726: /* sthcx. */
+ op->type = MKOP(STCX, 0, 2);
+ break;
+#endif
#ifdef __powerpc64__
case 84: /* ldarx */
- ea = xform_ea(instr, regs, 0);
- if (ea & 7)
- break; /* can't handle misaligned */
- err = -EFAULT;
- if (!address_ok(regs, ea, 8))
- goto ldst_done;
- err = 0;
- __get_user_asmx(val, ea, err, "ldarx");
- if (!err)
- regs->gpr[rd] = val;
- goto ldst_done;
+ op->type = MKOP(LARX, 0, 8);
+ break;
case 214: /* stdcx. */
- ea = xform_ea(instr, regs, 0);
- if (ea & 7)
- break; /* can't handle misaligned */
- err = -EFAULT;
- if (!address_ok(regs, ea, 8))
- goto ldst_done;
- err = 0;
- __put_user_asmx(regs->gpr[rd], ea, err, "stdcx.", cr);
- if (!err)
- regs->ccr = (regs->ccr & 0x0fffffff) |
- (cr & 0xe0000000) |
- ((regs->xer >> 3) & 0x10000000);
- goto ldst_done;
+ op->type = MKOP(STCX, 0, 8);
+ break;
- case 21: /* ldx */
- case 53: /* ldux */
- err = read_mem(&regs->gpr[rd], xform_ea(instr, regs, u),
- 8, regs);
- goto ldst_done;
+ case 276: /* lqarx */
+ if (!((rd & 1) || rd == ra || rd == rb))
+ op->type = MKOP(LARX, 0, 16);
+ break;
+
+ case 182: /* stqcx. */
+ if (!(rd & 1))
+ op->type = MKOP(STCX, 0, 16);
+ break;
#endif
case 23: /* lwzx */
case 55: /* lwzux */
- err = read_mem(&regs->gpr[rd], xform_ea(instr, regs, u),
- 4, regs);
- goto ldst_done;
+ op->type = MKOP(LOAD, u, 4);
+ break;
case 87: /* lbzx */
case 119: /* lbzux */
- err = read_mem(&regs->gpr[rd], xform_ea(instr, regs, u),
- 1, regs);
- goto ldst_done;
+ op->type = MKOP(LOAD, u, 1);
+ break;
#ifdef CONFIG_ALTIVEC
+ /*
+ * Note: for the load/store vector element instructions,
+ * bits of the EA say which field of the VMX register to use.
+ */
+ case 7: /* lvebx */
+ op->type = MKOP(LOAD_VMX, 0, 1);
+ op->element_size = 1;
+ break;
+
+ case 39: /* lvehx */
+ op->type = MKOP(LOAD_VMX, 0, 2);
+ op->element_size = 2;
+ break;
+
+ case 71: /* lvewx */
+ op->type = MKOP(LOAD_VMX, 0, 4);
+ op->element_size = 4;
+ break;
+
case 103: /* lvx */
case 359: /* lvxl */
- if (!(regs->msr & MSR_VEC))
- break;
- ea = xform_ea(instr, regs, 0);
- err = do_vec_load(rd, do_lvx, ea, regs);
- goto ldst_done;
+ op->type = MKOP(LOAD_VMX, 0, 16);
+ op->element_size = 16;
+ break;
+
+ case 135: /* stvebx */
+ op->type = MKOP(STORE_VMX, 0, 1);
+ op->element_size = 1;
+ break;
+
+ case 167: /* stvehx */
+ op->type = MKOP(STORE_VMX, 0, 2);
+ op->element_size = 2;
+ break;
+
+ case 199: /* stvewx */
+ op->type = MKOP(STORE_VMX, 0, 4);
+ op->element_size = 4;
+ break;
case 231: /* stvx */
case 487: /* stvxl */
- if (!(regs->msr & MSR_VEC))
- break;
- ea = xform_ea(instr, regs, 0);
- err = do_vec_store(rd, do_stvx, ea, regs);
- goto ldst_done;
+ op->type = MKOP(STORE_VMX, 0, 16);
+ break;
#endif /* CONFIG_ALTIVEC */
#ifdef __powerpc64__
+ case 21: /* ldx */
+ case 53: /* ldux */
+ op->type = MKOP(LOAD, u, 8);
+ break;
+
case 149: /* stdx */
case 181: /* stdux */
- val = regs->gpr[rd];
- err = write_mem(val, xform_ea(instr, regs, u), 8, regs);
- goto ldst_done;
+ op->type = MKOP(STORE, u, 8);
+ break;
#endif
case 151: /* stwx */
case 183: /* stwux */
- val = regs->gpr[rd];
- err = write_mem(val, xform_ea(instr, regs, u), 4, regs);
- goto ldst_done;
+ op->type = MKOP(STORE, u, 4);
+ break;
case 215: /* stbx */
case 247: /* stbux */
- val = regs->gpr[rd];
- err = write_mem(val, xform_ea(instr, regs, u), 1, regs);
- goto ldst_done;
+ op->type = MKOP(STORE, u, 1);
+ break;
case 279: /* lhzx */
case 311: /* lhzux */
- err = read_mem(&regs->gpr[rd], xform_ea(instr, regs, u),
- 2, regs);
- goto ldst_done;
+ op->type = MKOP(LOAD, u, 2);
+ break;
#ifdef __powerpc64__
case 341: /* lwax */
case 373: /* lwaux */
- err = read_mem(&regs->gpr[rd], xform_ea(instr, regs, u),
- 4, regs);
- if (!err)
- regs->gpr[rd] = (signed int) regs->gpr[rd];
- goto ldst_done;
+ op->type = MKOP(LOAD, SIGNEXT | u, 4);
+ break;
#endif
case 343: /* lhax */
case 375: /* lhaux */
- err = read_mem(&regs->gpr[rd], xform_ea(instr, regs, u),
- 2, regs);
- if (!err)
- regs->gpr[rd] = (signed short) regs->gpr[rd];
- goto ldst_done;
+ op->type = MKOP(LOAD, SIGNEXT | u, 2);
+ break;
case 407: /* sthx */
case 439: /* sthux */
- val = regs->gpr[rd];
- err = write_mem(val, xform_ea(instr, regs, u), 2, regs);
- goto ldst_done;
+ op->type = MKOP(STORE, u, 2);
+ break;
#ifdef __powerpc64__
case 532: /* ldbrx */
- err = read_mem(&val, xform_ea(instr, regs, 0), 8, regs);
- if (!err)
- regs->gpr[rd] = byterev_8(val);
- goto ldst_done;
+ op->type = MKOP(LOAD, BYTEREV, 8);
+ break;
#endif
+ case 533: /* lswx */
+ op->type = MKOP(LOAD_MULTI, 0, regs->xer & 0x7f);
+ break;
case 534: /* lwbrx */
- err = read_mem(&val, xform_ea(instr, regs, 0), 4, regs);
- if (!err)
- regs->gpr[rd] = byterev_4(val);
- goto ldst_done;
+ op->type = MKOP(LOAD, BYTEREV, 4);
+ break;
+
+ case 597: /* lswi */
+ if (rb == 0)
+ rb = 32; /* # bytes to load */
+ op->type = MKOP(LOAD_MULTI, 0, rb);
+ op->ea = ra ? regs->gpr[ra] : 0;
+ break;
#ifdef CONFIG_PPC_FPU
case 535: /* lfsx */
case 567: /* lfsux */
- if (!(regs->msr & MSR_FP))
- break;
- ea = xform_ea(instr, regs, u);
- err = do_fp_load(rd, do_lfs, ea, 4, regs);
- goto ldst_done;
+ op->type = MKOP(LOAD_FP, u | FPCONV, 4);
+ break;
case 599: /* lfdx */
case 631: /* lfdux */
- if (!(regs->msr & MSR_FP))
- break;
- ea = xform_ea(instr, regs, u);
- err = do_fp_load(rd, do_lfd, ea, 8, regs);
- goto ldst_done;
+ op->type = MKOP(LOAD_FP, u, 8);
+ break;
case 663: /* stfsx */
case 695: /* stfsux */
- if (!(regs->msr & MSR_FP))
- break;
- ea = xform_ea(instr, regs, u);
- err = do_fp_store(rd, do_stfs, ea, 4, regs);
- goto ldst_done;
+ op->type = MKOP(STORE_FP, u | FPCONV, 4);
+ break;
case 727: /* stfdx */
case 759: /* stfdux */
- if (!(regs->msr & MSR_FP))
- break;
- ea = xform_ea(instr, regs, u);
- err = do_fp_store(rd, do_stfd, ea, 8, regs);
- goto ldst_done;
-#endif
+ op->type = MKOP(STORE_FP, u, 8);
+ break;
+
+#ifdef __powerpc64__
+ case 791: /* lfdpx */
+ op->type = MKOP(LOAD_FP, 0, 16);
+ break;
+
+ case 855: /* lfiwax */
+ op->type = MKOP(LOAD_FP, SIGNEXT, 4);
+ break;
+
+ case 887: /* lfiwzx */
+ op->type = MKOP(LOAD_FP, 0, 4);
+ break;
+
+ case 919: /* stfdpx */
+ op->type = MKOP(STORE_FP, 0, 16);
+ break;
+
+ case 983: /* stfiwx */
+ op->type = MKOP(STORE_FP, 0, 4);
+ break;
+#endif /* __powerpc64 */
+#endif /* CONFIG_PPC_FPU */
#ifdef __powerpc64__
case 660: /* stdbrx */
- val = byterev_8(regs->gpr[rd]);
- err = write_mem(val, xform_ea(instr, regs, 0), 8, regs);
- goto ldst_done;
+ op->type = MKOP(STORE, BYTEREV, 8);
+ op->val = byterev_8(regs->gpr[rd]);
+ break;
#endif
+ case 661: /* stswx */
+ op->type = MKOP(STORE_MULTI, 0, regs->xer & 0x7f);
+ break;
+
case 662: /* stwbrx */
- val = byterev_4(regs->gpr[rd]);
- err = write_mem(val, xform_ea(instr, regs, 0), 4, regs);
- goto ldst_done;
+ op->type = MKOP(STORE, BYTEREV, 4);
+ op->val = byterev_4(regs->gpr[rd]);
+ break;
+
+ case 725: /* stswi */
+ if (rb == 0)
+ rb = 32; /* # bytes to store */
+ op->type = MKOP(STORE_MULTI, 0, rb);
+ op->ea = ra ? regs->gpr[ra] : 0;
+ break;
case 790: /* lhbrx */
- err = read_mem(&val, xform_ea(instr, regs, 0), 2, regs);
- if (!err)
- regs->gpr[rd] = byterev_2(val);
- goto ldst_done;
+ op->type = MKOP(LOAD, BYTEREV, 2);
+ break;
case 918: /* sthbrx */
- val = byterev_2(regs->gpr[rd]);
- err = write_mem(val, xform_ea(instr, regs, 0), 2, regs);
- goto ldst_done;
+ op->type = MKOP(STORE, BYTEREV, 2);
+ op->val = byterev_2(regs->gpr[rd]);
+ break;
#ifdef CONFIG_VSX
+ case 12: /* lxsiwzx */
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(LOAD_VSX, 0, 4);
+ op->element_size = 8;
+ break;
+
+ case 76: /* lxsiwax */
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(LOAD_VSX, SIGNEXT, 4);
+ op->element_size = 8;
+ break;
+
+ case 140: /* stxsiwx */
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(STORE_VSX, 0, 4);
+ op->element_size = 8;
+ break;
+
+ case 268: /* lxvx */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(LOAD_VSX, 0, 16);
+ op->element_size = 16;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+
+ case 269: /* lxvl */
+ case 301: { /* lxvll */
+ int nb;
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->reg = rd | ((word & 1) << 5);
+ op->ea = ra ? regs->gpr[ra] : 0;
+ nb = regs->gpr[rb] & 0xff;
+ if (nb > 16)
+ nb = 16;
+ op->type = MKOP(LOAD_VSX, 0, nb);
+ op->element_size = 16;
+ op->vsx_flags = ((word & 0x20) ? VSX_LDLEFT : 0) |
+ VSX_CHECK_VEC;
+ break;
+ }
+ case 332: /* lxvdsx */
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(LOAD_VSX, 0, 8);
+ op->element_size = 8;
+ op->vsx_flags = VSX_SPLAT;
+ break;
+
+ case 333: /* lxvpx */
+ if (!cpu_has_feature(CPU_FTR_ARCH_31))
+ goto unknown_opcode;
+ op->reg = VSX_REGISTER_XTP(rd);
+ op->type = MKOP(LOAD_VSX, 0, 32);
+ op->element_size = 32;
+ break;
+
+ case 364: /* lxvwsx */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(LOAD_VSX, 0, 4);
+ op->element_size = 4;
+ op->vsx_flags = VSX_SPLAT | VSX_CHECK_VEC;
+ break;
+
+ case 396: /* stxvx */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(STORE_VSX, 0, 16);
+ op->element_size = 16;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+
+ case 397: /* stxvl */
+ case 429: { /* stxvll */
+ int nb;
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->reg = rd | ((word & 1) << 5);
+ op->ea = ra ? regs->gpr[ra] : 0;
+ nb = regs->gpr[rb] & 0xff;
+ if (nb > 16)
+ nb = 16;
+ op->type = MKOP(STORE_VSX, 0, nb);
+ op->element_size = 16;
+ op->vsx_flags = ((word & 0x20) ? VSX_LDLEFT : 0) |
+ VSX_CHECK_VEC;
+ break;
+ }
+ case 461: /* stxvpx */
+ if (!cpu_has_feature(CPU_FTR_ARCH_31))
+ goto unknown_opcode;
+ op->reg = VSX_REGISTER_XTP(rd);
+ op->type = MKOP(STORE_VSX, 0, 32);
+ op->element_size = 32;
+ break;
+ case 524: /* lxsspx */
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(LOAD_VSX, 0, 4);
+ op->element_size = 8;
+ op->vsx_flags = VSX_FPCONV;
+ break;
+
+ case 588: /* lxsdx */
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(LOAD_VSX, 0, 8);
+ op->element_size = 8;
+ break;
+
+ case 652: /* stxsspx */
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(STORE_VSX, 0, 4);
+ op->element_size = 8;
+ op->vsx_flags = VSX_FPCONV;
+ break;
+
+ case 716: /* stxsdx */
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(STORE_VSX, 0, 8);
+ op->element_size = 8;
+ break;
+
+ case 780: /* lxvw4x */
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(LOAD_VSX, 0, 16);
+ op->element_size = 4;
+ break;
+
+ case 781: /* lxsibzx */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(LOAD_VSX, 0, 1);
+ op->element_size = 8;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+
+ case 812: /* lxvh8x */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(LOAD_VSX, 0, 16);
+ op->element_size = 2;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+
+ case 813: /* lxsihzx */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(LOAD_VSX, 0, 2);
+ op->element_size = 8;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+
case 844: /* lxvd2x */
- case 876: /* lxvd2ux */
- if (!(regs->msr & MSR_VSX))
- break;
- rd |= (instr & 1) << 5;
- ea = xform_ea(instr, regs, u);
- err = do_vsx_load(rd, do_lxvd2x, ea, regs);
- goto ldst_done;
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(LOAD_VSX, 0, 16);
+ op->element_size = 8;
+ break;
+
+ case 876: /* lxvb16x */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(LOAD_VSX, 0, 16);
+ op->element_size = 1;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+
+ case 908: /* stxvw4x */
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(STORE_VSX, 0, 16);
+ op->element_size = 4;
+ break;
+
+ case 909: /* stxsibx */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(STORE_VSX, 0, 1);
+ op->element_size = 8;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+
+ case 940: /* stxvh8x */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(STORE_VSX, 0, 16);
+ op->element_size = 2;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+
+ case 941: /* stxsihx */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(STORE_VSX, 0, 2);
+ op->element_size = 8;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
case 972: /* stxvd2x */
- case 1004: /* stxvd2ux */
- if (!(regs->msr & MSR_VSX))
- break;
- rd |= (instr & 1) << 5;
- ea = xform_ea(instr, regs, u);
- err = do_vsx_store(rd, do_stxvd2x, ea, regs);
- goto ldst_done;
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(STORE_VSX, 0, 16);
+ op->element_size = 8;
+ break;
+
+ case 1004: /* stxvb16x */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->reg = rd | ((word & 1) << 5);
+ op->type = MKOP(STORE_VSX, 0, 16);
+ op->element_size = 1;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
#endif /* CONFIG_VSX */
}
@@ -1552,186 +2753,919 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
case 32: /* lwz */
case 33: /* lwzu */
- err = read_mem(&regs->gpr[rd], dform_ea(instr, regs), 4, regs);
- goto ldst_done;
+ op->type = MKOP(LOAD, u, 4);
+ op->ea = dform_ea(word, regs);
+ break;
case 34: /* lbz */
case 35: /* lbzu */
- err = read_mem(&regs->gpr[rd], dform_ea(instr, regs), 1, regs);
- goto ldst_done;
+ op->type = MKOP(LOAD, u, 1);
+ op->ea = dform_ea(word, regs);
+ break;
case 36: /* stw */
- val = regs->gpr[rd];
- err = write_mem(val, dform_ea(instr, regs), 4, regs);
- goto ldst_done;
-
case 37: /* stwu */
- val = regs->gpr[rd];
- val3 = dform_ea(instr, regs);
- /*
- * For PPC32 we always use stwu to change stack point with r1. So
- * this emulated store may corrupt the exception frame, now we
- * have to provide the exception frame trampoline, which is pushed
- * below the kprobed function stack. So we only update gpr[1] but
- * don't emulate the real store operation. We will do real store
- * operation safely in exception return code by checking this flag.
- */
- if ((ra == 1) && !(regs->msr & MSR_PR) \
- && (val3 >= (regs->gpr[1] - STACK_INT_FRAME_SIZE))) {
-#ifdef CONFIG_PPC32
- /*
- * Check if we will touch kernel sack overflow
- */
- if (val3 - STACK_INT_FRAME_SIZE <= current->thread.ksp_limit) {
- printk(KERN_CRIT "Can't kprobe this since Kernel stack overflow.\n");
- err = -EINVAL;
- break;
- }
-#endif /* CONFIG_PPC32 */
- /*
- * Check if we already set since that means we'll
- * lose the previous value.
- */
- WARN_ON(test_thread_flag(TIF_EMULATE_STACK_STORE));
- set_thread_flag(TIF_EMULATE_STACK_STORE);
- err = 0;
- } else
- err = write_mem(val, val3, 4, regs);
- goto ldst_done;
+ op->type = MKOP(STORE, u, 4);
+ op->ea = dform_ea(word, regs);
+ break;
case 38: /* stb */
case 39: /* stbu */
- val = regs->gpr[rd];
- err = write_mem(val, dform_ea(instr, regs), 1, regs);
- goto ldst_done;
+ op->type = MKOP(STORE, u, 1);
+ op->ea = dform_ea(word, regs);
+ break;
case 40: /* lhz */
case 41: /* lhzu */
- err = read_mem(&regs->gpr[rd], dform_ea(instr, regs), 2, regs);
- goto ldst_done;
+ op->type = MKOP(LOAD, u, 2);
+ op->ea = dform_ea(word, regs);
+ break;
case 42: /* lha */
case 43: /* lhau */
- err = read_mem(&regs->gpr[rd], dform_ea(instr, regs), 2, regs);
- if (!err)
- regs->gpr[rd] = (signed short) regs->gpr[rd];
- goto ldst_done;
+ op->type = MKOP(LOAD, SIGNEXT | u, 2);
+ op->ea = dform_ea(word, regs);
+ break;
case 44: /* sth */
case 45: /* sthu */
- val = regs->gpr[rd];
- err = write_mem(val, dform_ea(instr, regs), 2, regs);
- goto ldst_done;
+ op->type = MKOP(STORE, u, 2);
+ op->ea = dform_ea(word, regs);
+ break;
case 46: /* lmw */
- ra = (instr >> 16) & 0x1f;
if (ra >= rd)
break; /* invalid form, ra in range to load */
- ea = dform_ea(instr, regs);
- do {
- err = read_mem(&regs->gpr[rd], ea, 4, regs);
- if (err)
- return 0;
- ea += 4;
- } while (++rd < 32);
- goto instr_done;
+ op->type = MKOP(LOAD_MULTI, 0, 4 * (32 - rd));
+ op->ea = dform_ea(word, regs);
+ break;
case 47: /* stmw */
- ea = dform_ea(instr, regs);
- do {
- err = write_mem(regs->gpr[rd], ea, 4, regs);
- if (err)
- return 0;
- ea += 4;
- } while (++rd < 32);
- goto instr_done;
+ op->type = MKOP(STORE_MULTI, 0, 4 * (32 - rd));
+ op->ea = dform_ea(word, regs);
+ break;
#ifdef CONFIG_PPC_FPU
case 48: /* lfs */
case 49: /* lfsu */
- if (!(regs->msr & MSR_FP))
- break;
- ea = dform_ea(instr, regs);
- err = do_fp_load(rd, do_lfs, ea, 4, regs);
- goto ldst_done;
+ op->type = MKOP(LOAD_FP, u | FPCONV, 4);
+ op->ea = dform_ea(word, regs);
+ break;
case 50: /* lfd */
case 51: /* lfdu */
- if (!(regs->msr & MSR_FP))
- break;
- ea = dform_ea(instr, regs);
- err = do_fp_load(rd, do_lfd, ea, 8, regs);
- goto ldst_done;
+ op->type = MKOP(LOAD_FP, u, 8);
+ op->ea = dform_ea(word, regs);
+ break;
case 52: /* stfs */
case 53: /* stfsu */
- if (!(regs->msr & MSR_FP))
- break;
- ea = dform_ea(instr, regs);
- err = do_fp_store(rd, do_stfs, ea, 4, regs);
- goto ldst_done;
+ op->type = MKOP(STORE_FP, u | FPCONV, 4);
+ op->ea = dform_ea(word, regs);
+ break;
case 54: /* stfd */
case 55: /* stfdu */
- if (!(regs->msr & MSR_FP))
- break;
- ea = dform_ea(instr, regs);
- err = do_fp_store(rd, do_stfd, ea, 8, regs);
- goto ldst_done;
+ op->type = MKOP(STORE_FP, u, 8);
+ op->ea = dform_ea(word, regs);
+ break;
#endif
#ifdef __powerpc64__
+ case 56: /* lq */
+ if (!((rd & 1) || (rd == ra)))
+ op->type = MKOP(LOAD, 0, 16);
+ op->ea = dqform_ea(word, regs);
+ break;
+#endif
+
+#ifdef CONFIG_VSX
+ case 57: /* lfdp, lxsd, lxssp */
+ op->ea = dsform_ea(word, regs);
+ switch (word & 3) {
+ case 0: /* lfdp */
+ if (rd & 1)
+ break; /* reg must be even */
+ op->type = MKOP(LOAD_FP, 0, 16);
+ break;
+ case 2: /* lxsd */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->reg = rd + 32;
+ op->type = MKOP(LOAD_VSX, 0, 8);
+ op->element_size = 8;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+ case 3: /* lxssp */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->reg = rd + 32;
+ op->type = MKOP(LOAD_VSX, 0, 4);
+ op->element_size = 8;
+ op->vsx_flags = VSX_FPCONV | VSX_CHECK_VEC;
+ break;
+ }
+ break;
+#endif /* CONFIG_VSX */
+
+#ifdef __powerpc64__
case 58: /* ld[u], lwa */
- switch (instr & 3) {
+ op->ea = dsform_ea(word, regs);
+ switch (word & 3) {
case 0: /* ld */
- err = read_mem(&regs->gpr[rd], dsform_ea(instr, regs),
- 8, regs);
- goto ldst_done;
+ op->type = MKOP(LOAD, 0, 8);
+ break;
case 1: /* ldu */
- err = read_mem(&regs->gpr[rd], dsform_ea(instr, regs),
- 8, regs);
- goto ldst_done;
+ op->type = MKOP(LOAD, UPDATE, 8);
+ break;
case 2: /* lwa */
- err = read_mem(&regs->gpr[rd], dsform_ea(instr, regs),
- 4, regs);
- if (!err)
- regs->gpr[rd] = (signed int) regs->gpr[rd];
- goto ldst_done;
+ op->type = MKOP(LOAD, SIGNEXT, 4);
+ break;
}
break;
+#endif
+#ifdef CONFIG_VSX
+ case 6:
+ if (!cpu_has_feature(CPU_FTR_ARCH_31))
+ goto unknown_opcode;
+ op->ea = dqform_ea(word, regs);
+ op->reg = VSX_REGISTER_XTP(rd);
+ op->element_size = 32;
+ switch (word & 0xf) {
+ case 0: /* lxvp */
+ op->type = MKOP(LOAD_VSX, 0, 32);
+ break;
+ case 1: /* stxvp */
+ op->type = MKOP(STORE_VSX, 0, 32);
+ break;
+ }
+ break;
+
+ case 61: /* stfdp, lxv, stxsd, stxssp, stxv */
+ switch (word & 7) {
+ case 0: /* stfdp with LSB of DS field = 0 */
+ case 4: /* stfdp with LSB of DS field = 1 */
+ op->ea = dsform_ea(word, regs);
+ op->type = MKOP(STORE_FP, 0, 16);
+ break;
+
+ case 1: /* lxv */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->ea = dqform_ea(word, regs);
+ if (word & 8)
+ op->reg = rd + 32;
+ op->type = MKOP(LOAD_VSX, 0, 16);
+ op->element_size = 16;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+
+ case 2: /* stxsd with LSB of DS field = 0 */
+ case 6: /* stxsd with LSB of DS field = 1 */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->ea = dsform_ea(word, regs);
+ op->reg = rd + 32;
+ op->type = MKOP(STORE_VSX, 0, 8);
+ op->element_size = 8;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+
+ case 3: /* stxssp with LSB of DS field = 0 */
+ case 7: /* stxssp with LSB of DS field = 1 */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->ea = dsform_ea(word, regs);
+ op->reg = rd + 32;
+ op->type = MKOP(STORE_VSX, 0, 4);
+ op->element_size = 8;
+ op->vsx_flags = VSX_FPCONV | VSX_CHECK_VEC;
+ break;
+
+ case 5: /* stxv */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ goto unknown_opcode;
+ op->ea = dqform_ea(word, regs);
+ if (word & 8)
+ op->reg = rd + 32;
+ op->type = MKOP(STORE_VSX, 0, 16);
+ op->element_size = 16;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+ }
+ break;
+#endif /* CONFIG_VSX */
+
+#ifdef __powerpc64__
case 62: /* std[u] */
- val = regs->gpr[rd];
- switch (instr & 3) {
+ op->ea = dsform_ea(word, regs);
+ switch (word & 3) {
case 0: /* std */
- err = write_mem(val, dsform_ea(instr, regs), 8, regs);
- goto ldst_done;
+ op->type = MKOP(STORE, 0, 8);
+ break;
case 1: /* stdu */
- err = write_mem(val, dsform_ea(instr, regs), 8, regs);
- goto ldst_done;
+ op->type = MKOP(STORE, UPDATE, 8);
+ break;
+ case 2: /* stq */
+ if (!(rd & 1))
+ op->type = MKOP(STORE, 0, 16);
+ break;
}
break;
+ case 1: /* Prefixed instructions */
+ if (!cpu_has_feature(CPU_FTR_ARCH_31))
+ goto unknown_opcode;
+
+ prefix_r = GET_PREFIX_R(word);
+ ra = GET_PREFIX_RA(suffix);
+ op->update_reg = ra;
+ rd = (suffix >> 21) & 0x1f;
+ op->reg = rd;
+ op->val = regs->gpr[rd];
+
+ suffixopcode = get_op(suffix);
+ prefixtype = (word >> 24) & 0x3;
+ switch (prefixtype) {
+ case 0: /* Type 00 Eight-Byte Load/Store */
+ if (prefix_r && ra)
+ break;
+ op->ea = mlsd_8lsd_ea(word, suffix, regs);
+ switch (suffixopcode) {
+ case 41: /* plwa */
+ op->type = MKOP(LOAD, PREFIXED | SIGNEXT, 4);
+ break;
+#ifdef CONFIG_VSX
+ case 42: /* plxsd */
+ op->reg = rd + 32;
+ op->type = MKOP(LOAD_VSX, PREFIXED, 8);
+ op->element_size = 8;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+ case 43: /* plxssp */
+ op->reg = rd + 32;
+ op->type = MKOP(LOAD_VSX, PREFIXED, 4);
+ op->element_size = 8;
+ op->vsx_flags = VSX_FPCONV | VSX_CHECK_VEC;
+ break;
+ case 46: /* pstxsd */
+ op->reg = rd + 32;
+ op->type = MKOP(STORE_VSX, PREFIXED, 8);
+ op->element_size = 8;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+ case 47: /* pstxssp */
+ op->reg = rd + 32;
+ op->type = MKOP(STORE_VSX, PREFIXED, 4);
+ op->element_size = 8;
+ op->vsx_flags = VSX_FPCONV | VSX_CHECK_VEC;
+ break;
+ case 51: /* plxv1 */
+ op->reg += 32;
+ fallthrough;
+ case 50: /* plxv0 */
+ op->type = MKOP(LOAD_VSX, PREFIXED, 16);
+ op->element_size = 16;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+ case 55: /* pstxv1 */
+ op->reg = rd + 32;
+ fallthrough;
+ case 54: /* pstxv0 */
+ op->type = MKOP(STORE_VSX, PREFIXED, 16);
+ op->element_size = 16;
+ op->vsx_flags = VSX_CHECK_VEC;
+ break;
+#endif /* CONFIG_VSX */
+ case 56: /* plq */
+ op->type = MKOP(LOAD, PREFIXED, 16);
+ break;
+ case 57: /* pld */
+ op->type = MKOP(LOAD, PREFIXED, 8);
+ break;
+#ifdef CONFIG_VSX
+ case 58: /* plxvp */
+ op->reg = VSX_REGISTER_XTP(rd);
+ op->type = MKOP(LOAD_VSX, PREFIXED, 32);
+ op->element_size = 32;
+ break;
+#endif /* CONFIG_VSX */
+ case 60: /* pstq */
+ op->type = MKOP(STORE, PREFIXED, 16);
+ break;
+ case 61: /* pstd */
+ op->type = MKOP(STORE, PREFIXED, 8);
+ break;
+#ifdef CONFIG_VSX
+ case 62: /* pstxvp */
+ op->reg = VSX_REGISTER_XTP(rd);
+ op->type = MKOP(STORE_VSX, PREFIXED, 32);
+ op->element_size = 32;
+ break;
+#endif /* CONFIG_VSX */
+ }
+ break;
+ case 1: /* Type 01 Eight-Byte Register-to-Register */
+ break;
+ case 2: /* Type 10 Modified Load/Store */
+ if (prefix_r && ra)
+ break;
+ op->ea = mlsd_8lsd_ea(word, suffix, regs);
+ switch (suffixopcode) {
+ case 32: /* plwz */
+ op->type = MKOP(LOAD, PREFIXED, 4);
+ break;
+ case 34: /* plbz */
+ op->type = MKOP(LOAD, PREFIXED, 1);
+ break;
+ case 36: /* pstw */
+ op->type = MKOP(STORE, PREFIXED, 4);
+ break;
+ case 38: /* pstb */
+ op->type = MKOP(STORE, PREFIXED, 1);
+ break;
+ case 40: /* plhz */
+ op->type = MKOP(LOAD, PREFIXED, 2);
+ break;
+ case 42: /* plha */
+ op->type = MKOP(LOAD, PREFIXED | SIGNEXT, 2);
+ break;
+ case 44: /* psth */
+ op->type = MKOP(STORE, PREFIXED, 2);
+ break;
+ case 48: /* plfs */
+ op->type = MKOP(LOAD_FP, PREFIXED | FPCONV, 4);
+ break;
+ case 50: /* plfd */
+ op->type = MKOP(LOAD_FP, PREFIXED, 8);
+ break;
+ case 52: /* pstfs */
+ op->type = MKOP(STORE_FP, PREFIXED | FPCONV, 4);
+ break;
+ case 54: /* pstfd */
+ op->type = MKOP(STORE_FP, PREFIXED, 8);
+ break;
+ }
+ break;
+ case 3: /* Type 11 Modified Register-to-Register */
+ break;
+ }
#endif /* __powerpc64__ */
}
- err = -EINVAL;
- ldst_done:
- if (err) {
- regs->gpr[ra] = old_ra;
- return 0; /* invoke DSI if -EFAULT? */
+ if (OP_IS_LOAD_STORE(op->type) && (op->type & UPDATE)) {
+ switch (GETTYPE(op->type)) {
+ case LOAD:
+ if (ra == rd)
+ goto unknown_opcode;
+ fallthrough;
+ case STORE:
+ case LOAD_FP:
+ case STORE_FP:
+ if (ra == 0)
+ goto unknown_opcode;
+ }
}
- instr_done:
- regs->nip = truncate_if_32bit(regs->msr, regs->nip + 4);
- return 1;
+
+#ifdef CONFIG_VSX
+ if ((GETTYPE(op->type) == LOAD_VSX ||
+ GETTYPE(op->type) == STORE_VSX) &&
+ !cpu_has_feature(CPU_FTR_VSX)) {
+ return -1;
+ }
+#endif /* CONFIG_VSX */
+
+ return 0;
+
+ unknown_opcode:
+ op->type = UNKNOWN;
+ return 0;
logical_done:
- if (instr & 1)
- set_cr0(regs, ra);
- goto instr_done;
+ if (word & 1)
+ set_cr0(regs, op);
+ logical_done_nocc:
+ op->reg = ra;
+ op->type |= SETREG;
+ return 1;
arith_done:
- if (instr & 1)
- set_cr0(regs, rd);
- goto instr_done;
+ if (word & 1)
+ set_cr0(regs, op);
+ compute_done:
+ op->reg = rd;
+ op->type |= SETREG;
+ return 1;
+
+ priv:
+ op->type = INTERRUPT | 0x700;
+ op->val = SRR1_PROGPRIV;
+ return 0;
+
+ trap:
+ op->type = INTERRUPT | 0x700;
+ op->val = SRR1_PROGTRAP;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(analyse_instr);
+NOKPROBE_SYMBOL(analyse_instr);
+
+/*
+ * For PPC32 we always use stwu with r1 to change the stack pointer.
+ * So this emulated store may corrupt the exception frame, now we
+ * have to provide the exception frame trampoline, which is pushed
+ * below the kprobed function stack. So we only update gpr[1] but
+ * don't emulate the real store operation. We will do real store
+ * operation safely in exception return code by checking this flag.
+ */
+static nokprobe_inline int handle_stack_update(unsigned long ea, struct pt_regs *regs)
+{
+ /*
+ * Check if we already set since that means we'll
+ * lose the previous value.
+ */
+ WARN_ON(test_thread_flag(TIF_EMULATE_STACK_STORE));
+ set_thread_flag(TIF_EMULATE_STACK_STORE);
+ return 0;
+}
+
+static nokprobe_inline void do_signext(unsigned long *valp, int size)
+{
+ switch (size) {
+ case 2:
+ *valp = (signed short) *valp;
+ break;
+ case 4:
+ *valp = (signed int) *valp;
+ break;
+ }
+}
+
+static nokprobe_inline void do_byterev(unsigned long *valp, int size)
+{
+ switch (size) {
+ case 2:
+ *valp = byterev_2(*valp);
+ break;
+ case 4:
+ *valp = byterev_4(*valp);
+ break;
+#ifdef __powerpc64__
+ case 8:
+ *valp = byterev_8(*valp);
+ break;
+#endif
+ }
+}
+
+/*
+ * Emulate an instruction that can be executed just by updating
+ * fields in *regs.
+ */
+void emulate_update_regs(struct pt_regs *regs, struct instruction_op *op)
+{
+ unsigned long next_pc;
+
+ next_pc = truncate_if_32bit(regs->msr, regs->nip + GETLENGTH(op->type));
+ switch (GETTYPE(op->type)) {
+ case COMPUTE:
+ if (op->type & SETREG)
+ regs->gpr[op->reg] = op->val;
+ if (op->type & SETCC)
+ regs->ccr = op->ccval;
+ if (op->type & SETXER)
+ regs->xer = op->xerval;
+ break;
+
+ case BRANCH:
+ if (op->type & SETLK)
+ regs->link = next_pc;
+ if (op->type & BRTAKEN)
+ next_pc = op->val;
+ if (op->type & DECCTR)
+ --regs->ctr;
+ break;
+
+ case BARRIER:
+ switch (op->type & BARRIER_MASK) {
+ case BARRIER_SYNC:
+ mb();
+ break;
+ case BARRIER_ISYNC:
+ isync();
+ break;
+ case BARRIER_EIEIO:
+ eieio();
+ break;
+#ifdef CONFIG_PPC64
+ case BARRIER_LWSYNC:
+ asm volatile("lwsync" : : : "memory");
+ break;
+ case BARRIER_PTESYNC:
+ asm volatile("ptesync" : : : "memory");
+ break;
+#endif
+ }
+ break;
+
+ case MFSPR:
+ switch (op->spr) {
+ case SPRN_XER:
+ regs->gpr[op->reg] = regs->xer & 0xffffffffUL;
+ break;
+ case SPRN_LR:
+ regs->gpr[op->reg] = regs->link;
+ break;
+ case SPRN_CTR:
+ regs->gpr[op->reg] = regs->ctr;
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ }
+ break;
+
+ case MTSPR:
+ switch (op->spr) {
+ case SPRN_XER:
+ regs->xer = op->val & 0xffffffffUL;
+ break;
+ case SPRN_LR:
+ regs->link = op->val;
+ break;
+ case SPRN_CTR:
+ regs->ctr = op->val;
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ }
+ break;
+
+ default:
+ WARN_ON_ONCE(1);
+ }
+ regs_set_return_ip(regs, next_pc);
+}
+NOKPROBE_SYMBOL(emulate_update_regs);
+
+/*
+ * Emulate a previously-analysed load or store instruction.
+ * Return values are:
+ * 0 = instruction emulated successfully
+ * -EFAULT = address out of range or access faulted (regs->dar
+ * contains the faulting address)
+ * -EACCES = misaligned access, instruction requires alignment
+ * -EINVAL = unknown operation in *op
+ */
+int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op)
+{
+ int err, size, type;
+ int i, rd, nb;
+ unsigned int cr;
+ unsigned long val;
+ unsigned long ea;
+ bool cross_endian;
+
+ err = 0;
+ size = GETSIZE(op->type);
+ type = GETTYPE(op->type);
+ cross_endian = (regs->msr & MSR_LE) != (MSR_KERNEL & MSR_LE);
+ ea = truncate_if_32bit(regs->msr, op->ea);
+
+ switch (type) {
+ case LARX:
+ if (ea & (size - 1))
+ return -EACCES; /* can't handle misaligned */
+ if (!address_ok(regs, ea, size))
+ return -EFAULT;
+ err = 0;
+ val = 0;
+ switch (size) {
+#ifdef CONFIG_PPC_HAS_LBARX_LHARX
+ case 1:
+ __get_user_asmx(val, ea, err, "lbarx");
+ break;
+ case 2:
+ __get_user_asmx(val, ea, err, "lharx");
+ break;
+#endif
+ case 4:
+ __get_user_asmx(val, ea, err, "lwarx");
+ break;
+#ifdef __powerpc64__
+ case 8:
+ __get_user_asmx(val, ea, err, "ldarx");
+ break;
+ case 16:
+ err = do_lqarx(ea, &regs->gpr[op->reg]);
+ break;
+#endif
+ default:
+ return -EINVAL;
+ }
+ if (err) {
+ regs->dar = ea;
+ break;
+ }
+ if (size < 16)
+ regs->gpr[op->reg] = val;
+ break;
+
+ case STCX:
+ if (ea & (size - 1))
+ return -EACCES; /* can't handle misaligned */
+ if (!address_ok(regs, ea, size))
+ return -EFAULT;
+ err = 0;
+ switch (size) {
+#ifdef __powerpc64__
+ case 1:
+ __put_user_asmx(op->val, ea, err, "stbcx.", cr);
+ break;
+ case 2:
+ __put_user_asmx(op->val, ea, err, "sthcx.", cr);
+ break;
+#endif
+ case 4:
+ __put_user_asmx(op->val, ea, err, "stwcx.", cr);
+ break;
+#ifdef __powerpc64__
+ case 8:
+ __put_user_asmx(op->val, ea, err, "stdcx.", cr);
+ break;
+ case 16:
+ err = do_stqcx(ea, regs->gpr[op->reg],
+ regs->gpr[op->reg + 1], &cr);
+ break;
+#endif
+ default:
+ return -EINVAL;
+ }
+ if (!err)
+ regs->ccr = (regs->ccr & 0x0fffffff) |
+ (cr & 0xe0000000) |
+ ((regs->xer >> 3) & 0x10000000);
+ else
+ regs->dar = ea;
+ break;
+
+ case LOAD:
+#ifdef __powerpc64__
+ if (size == 16) {
+ err = emulate_lq(regs, ea, op->reg, cross_endian);
+ break;
+ }
+#endif
+ err = read_mem(&regs->gpr[op->reg], ea, size, regs);
+ if (!err) {
+ if (op->type & SIGNEXT)
+ do_signext(&regs->gpr[op->reg], size);
+ if ((op->type & BYTEREV) == (cross_endian ? 0 : BYTEREV))
+ do_byterev(&regs->gpr[op->reg], size);
+ }
+ break;
+
+#ifdef CONFIG_PPC_FPU
+ case LOAD_FP:
+ /*
+ * If the instruction is in userspace, we can emulate it even
+ * if the VMX state is not live, because we have the state
+ * stored in the thread_struct. If the instruction is in
+ * the kernel, we must not touch the state in the thread_struct.
+ */
+ if (!user_mode(regs) && !(regs->msr & MSR_FP))
+ return 0;
+ err = do_fp_load(op, ea, regs, cross_endian);
+ break;
+#endif
+#ifdef CONFIG_ALTIVEC
+ case LOAD_VMX:
+ if (!user_mode(regs) && !(regs->msr & MSR_VEC))
+ return 0;
+ err = do_vec_load(op->reg, ea, size, regs, cross_endian);
+ break;
+#endif
+#ifdef CONFIG_VSX
+ case LOAD_VSX: {
+ unsigned long msrbit = MSR_VSX;
+
+ /*
+ * Some VSX instructions check the MSR_VEC bit rather than MSR_VSX
+ * when the target of the instruction is a vector register.
+ */
+ if (op->reg >= 32 && (op->vsx_flags & VSX_CHECK_VEC))
+ msrbit = MSR_VEC;
+ if (!user_mode(regs) && !(regs->msr & msrbit))
+ return 0;
+ err = do_vsx_load(op, ea, regs, cross_endian);
+ break;
+ }
+#endif
+ case LOAD_MULTI:
+ if (!address_ok(regs, ea, size))
+ return -EFAULT;
+ rd = op->reg;
+ for (i = 0; i < size; i += 4) {
+ unsigned int v32 = 0;
+
+ nb = size - i;
+ if (nb > 4)
+ nb = 4;
+ err = copy_mem_in((u8 *) &v32, ea, nb, regs);
+ if (err)
+ break;
+ if (unlikely(cross_endian))
+ v32 = byterev_4(v32);
+ regs->gpr[rd] = v32;
+ ea += 4;
+ /* reg number wraps from 31 to 0 for lsw[ix] */
+ rd = (rd + 1) & 0x1f;
+ }
+ break;
+
+ case STORE:
+#ifdef __powerpc64__
+ if (size == 16) {
+ err = emulate_stq(regs, ea, op->reg, cross_endian);
+ break;
+ }
+#endif
+ if ((op->type & UPDATE) && size == sizeof(long) &&
+ op->reg == 1 && op->update_reg == 1 && !user_mode(regs) &&
+ ea >= regs->gpr[1] - STACK_INT_FRAME_SIZE) {
+ err = handle_stack_update(ea, regs);
+ break;
+ }
+ if (unlikely(cross_endian))
+ do_byterev(&op->val, size);
+ err = write_mem(op->val, ea, size, regs);
+ break;
+
+#ifdef CONFIG_PPC_FPU
+ case STORE_FP:
+ if (!user_mode(regs) && !(regs->msr & MSR_FP))
+ return 0;
+ err = do_fp_store(op, ea, regs, cross_endian);
+ break;
+#endif
+#ifdef CONFIG_ALTIVEC
+ case STORE_VMX:
+ if (!user_mode(regs) && !(regs->msr & MSR_VEC))
+ return 0;
+ err = do_vec_store(op->reg, ea, size, regs, cross_endian);
+ break;
+#endif
+#ifdef CONFIG_VSX
+ case STORE_VSX: {
+ unsigned long msrbit = MSR_VSX;
+
+ /*
+ * Some VSX instructions check the MSR_VEC bit rather than MSR_VSX
+ * when the target of the instruction is a vector register.
+ */
+ if (op->reg >= 32 && (op->vsx_flags & VSX_CHECK_VEC))
+ msrbit = MSR_VEC;
+ if (!user_mode(regs) && !(regs->msr & msrbit))
+ return 0;
+ err = do_vsx_store(op, ea, regs, cross_endian);
+ break;
+ }
+#endif
+ case STORE_MULTI:
+ if (!address_ok(regs, ea, size))
+ return -EFAULT;
+ rd = op->reg;
+ for (i = 0; i < size; i += 4) {
+ unsigned int v32 = regs->gpr[rd];
+
+ nb = size - i;
+ if (nb > 4)
+ nb = 4;
+ if (unlikely(cross_endian))
+ v32 = byterev_4(v32);
+ err = copy_mem_out((u8 *) &v32, ea, nb, regs);
+ if (err)
+ break;
+ ea += 4;
+ /* reg number wraps from 31 to 0 for stsw[ix] */
+ rd = (rd + 1) & 0x1f;
+ }
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ if (err)
+ return err;
+
+ if (op->type & UPDATE)
+ regs->gpr[op->update_reg] = op->ea;
+
+ return 0;
+}
+NOKPROBE_SYMBOL(emulate_loadstore);
+
+/*
+ * Emulate instructions that cause a transfer of control,
+ * loads and stores, and a few other instructions.
+ * Returns 1 if the step was emulated, 0 if not,
+ * or -1 if the instruction is one that should not be stepped,
+ * such as an rfid, or a mtmsrd that would clear MSR_RI.
+ */
+int emulate_step(struct pt_regs *regs, ppc_inst_t instr)
+{
+ struct instruction_op op;
+ int r, err, type;
+ unsigned long val;
+ unsigned long ea;
+
+ r = analyse_instr(&op, regs, instr);
+ if (r < 0)
+ return r;
+ if (r > 0) {
+ emulate_update_regs(regs, &op);
+ return 1;
+ }
+
+ err = 0;
+ type = GETTYPE(op.type);
+
+ if (OP_IS_LOAD_STORE(type)) {
+ err = emulate_loadstore(regs, &op);
+ if (err)
+ return 0;
+ goto instr_done;
+ }
+
+ switch (type) {
+ case CACHEOP:
+ ea = truncate_if_32bit(regs->msr, op.ea);
+ if (!address_ok(regs, ea, 8))
+ return 0;
+ switch (op.type & CACHEOP_MASK) {
+ case DCBST:
+ __cacheop_user_asmx(ea, err, "dcbst");
+ break;
+ case DCBF:
+ __cacheop_user_asmx(ea, err, "dcbf");
+ break;
+ case DCBTST:
+ if (op.reg == 0)
+ prefetchw((void *) ea);
+ break;
+ case DCBT:
+ if (op.reg == 0)
+ prefetch((void *) ea);
+ break;
+ case ICBI:
+ __cacheop_user_asmx(ea, err, "icbi");
+ break;
+ case DCBZ:
+ err = emulate_dcbz(ea, regs);
+ break;
+ }
+ if (err) {
+ regs->dar = ea;
+ return 0;
+ }
+ goto instr_done;
+
+ case MFMSR:
+ regs->gpr[op.reg] = regs->msr & MSR_MASK;
+ goto instr_done;
+
+ case MTMSR:
+ val = regs->gpr[op.reg];
+ if ((val & MSR_RI) == 0)
+ /* can't step mtmsr[d] that would clear MSR_RI */
+ return -1;
+ /* here op.val is the mask of bits to change */
+ regs_set_return_msr(regs, (regs->msr & ~op.val) | (val & op.val));
+ goto instr_done;
+
+ case SYSCALL: /* sc */
+ /*
+ * Per ISA v3.1, section 7.5.15 'Trace Interrupt', we can't
+ * single step a system call instruction:
+ *
+ * Successful completion for an instruction means that the
+ * instruction caused no other interrupt. Thus a Trace
+ * interrupt never occurs for a System Call or System Call
+ * Vectored instruction, or for a Trap instruction that
+ * traps.
+ */
+ return -1;
+ case SYSCALL_VECTORED_0: /* scv 0 */
+ return -1;
+ case RFI:
+ return -1;
+ }
+ return 0;
+
+ instr_done:
+ regs_set_return_ip(regs,
+ truncate_if_32bit(regs->msr, regs->nip + GETLENGTH(op.type)));
+ return 1;
}
+NOKPROBE_SYMBOL(emulate_step);
diff --git a/arch/powerpc/lib/string.S b/arch/powerpc/lib/string.S
index 1b5a0a09d609..daa72061dc0c 100644
--- a/arch/powerpc/lib/string.S
+++ b/arch/powerpc/lib/string.S
@@ -1,30 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* String handling functions for PowerPC.
*
* Copyright (C) 1996 Paul Mackerras.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
-#include <asm/processor.h>
-#include <asm/errno.h>
+#include <linux/export.h>
#include <asm/ppc_asm.h>
+#include <asm/cache.h>
- .section __ex_table,"a"
- PPC_LONG_ALIGN
.text
-_GLOBAL(strcpy)
- addi r5,r3,-1
- addi r4,r4,-1
-1: lbzu r0,1(r4)
- cmpwi 0,r0,0
- stbu r0,1(r5)
- bne 1b
- blr
-
/* This clears out any unused part of the destination buffer,
just as the libc version does. -- paulus */
_GLOBAL(strncpy)
@@ -33,6 +18,7 @@ _GLOBAL(strncpy)
mtctr r5
addi r6,r3,-1
addi r4,r4,-1
+ .balign IFETCH_ALIGN_BYTES
1: lbzu r0,1(r4)
cmpwi 0,r0,0
stbu r0,1(r6)
@@ -44,30 +30,7 @@ _GLOBAL(strncpy)
2: stbu r0,1(r6) /* clear it out if so */
bdnz 2b
blr
-
-_GLOBAL(strcat)
- addi r5,r3,-1
- addi r4,r4,-1
-1: lbzu r0,1(r5)
- cmpwi 0,r0,0
- bne 1b
- addi r5,r5,-1
-1: lbzu r0,1(r4)
- cmpwi 0,r0,0
- stbu r0,1(r5)
- bne 1b
- blr
-
-_GLOBAL(strcmp)
- addi r5,r3,-1
- addi r4,r4,-1
-1: lbzu r3,1(r5)
- cmpwi 1,r3,0
- lbzu r0,1(r4)
- subf. r3,r0,r3
- beqlr 1
- beq 1b
- blr
+EXPORT_SYMBOL(strncpy)
_GLOBAL(strncmp)
PPC_LCMPI 0,r5,0
@@ -75,6 +38,7 @@ _GLOBAL(strncmp)
mtctr r5
addi r5,r3,-1
addi r4,r4,-1
+ .balign IFETCH_ALIGN_BYTES
1: lbzu r3,1(r5)
cmpwi 1,r3,0
lbzu r0,1(r4)
@@ -84,81 +48,18 @@ _GLOBAL(strncmp)
blr
2: li r3,0
blr
-
-_GLOBAL(strlen)
- addi r4,r3,-1
-1: lbzu r0,1(r4)
- cmpwi 0,r0,0
- bne 1b
- subf r3,r3,r4
- blr
-
-_GLOBAL(memcmp)
- PPC_LCMPI 0,r5,0
- beq- 2f
- mtctr r5
- addi r6,r3,-1
- addi r4,r4,-1
-1: lbzu r3,1(r6)
- lbzu r0,1(r4)
- subf. r3,r0,r3
- bdnzt 2,1b
- blr
-2: li r3,0
- blr
+EXPORT_SYMBOL(strncmp)
_GLOBAL(memchr)
PPC_LCMPI 0,r5,0
beq- 2f
mtctr r5
addi r3,r3,-1
+ .balign IFETCH_ALIGN_BYTES
1: lbzu r0,1(r3)
cmpw 0,r0,r4
bdnzf 2,1b
beqlr
2: li r3,0
blr
-
-#ifdef CONFIG_PPC32
-_GLOBAL(__clear_user)
- addi r6,r3,-4
- li r3,0
- li r5,0
- cmplwi 0,r4,4
- blt 7f
- /* clear a single word */
-11: stwu r5,4(r6)
- beqlr
- /* clear word sized chunks */
- andi. r0,r6,3
- add r4,r0,r4
- subf r6,r0,r6
- srwi r0,r4,2
- andi. r4,r4,3
- mtctr r0
- bdz 7f
-1: stwu r5,4(r6)
- bdnz 1b
- /* clear byte sized chunks */
-7: cmpwi 0,r4,0
- beqlr
- mtctr r4
- addi r6,r6,3
-8: stbu r5,1(r6)
- bdnz 8b
- blr
-90: mr r3,r4
- blr
-91: mfctr r3
- slwi r3,r3,2
- add r3,r3,r4
- blr
-92: mfctr r3
- blr
-
- .section __ex_table,"a"
- PPC_LONG 11b,90b
- PPC_LONG 1b,91b
- PPC_LONG 8b,92b
- .text
-#endif
+EXPORT_SYMBOL(memchr)
diff --git a/arch/powerpc/lib/string_32.S b/arch/powerpc/lib/string_32.S
new file mode 100644
index 000000000000..3ee45619a3f8
--- /dev/null
+++ b/arch/powerpc/lib/string_32.S
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * String handling functions for PowerPC32
+ *
+ * Copyright (C) 1996 Paul Mackerras.
+ *
+ */
+
+#include <linux/export.h>
+#include <asm/ppc_asm.h>
+#include <asm/cache.h>
+
+ .text
+
+CACHELINE_BYTES = L1_CACHE_BYTES
+LG_CACHELINE_BYTES = L1_CACHE_SHIFT
+CACHELINE_MASK = (L1_CACHE_BYTES-1)
+
+_GLOBAL(__arch_clear_user)
+/*
+ * Use dcbz on the complete cache lines in the destination
+ * to set them to zero. This requires that the destination
+ * area is cacheable.
+ */
+ cmplwi cr0, r4, 4
+ mr r10, r3
+ li r3, 0
+ blt 7f
+
+11: stw r3, 0(r10)
+ beqlr
+ andi. r0, r10, 3
+ add r11, r0, r4
+ subf r6, r0, r10
+
+ clrlwi r7, r6, 32 - LG_CACHELINE_BYTES
+ add r8, r7, r11
+ srwi r9, r8, LG_CACHELINE_BYTES
+ addic. r9, r9, -1 /* total number of complete cachelines */
+ ble 2f
+ xori r0, r7, CACHELINE_MASK & ~3
+ srwi. r0, r0, 2
+ beq 3f
+ mtctr r0
+4: stwu r3, 4(r6)
+ bdnz 4b
+3: mtctr r9
+ li r7, 4
+10: dcbz r7, r6
+ addi r6, r6, CACHELINE_BYTES
+ bdnz 10b
+ clrlwi r11, r8, 32 - LG_CACHELINE_BYTES
+ addi r11, r11, 4
+
+2: srwi r0 ,r11 ,2
+ mtctr r0
+ bdz 6f
+1: stwu r3, 4(r6)
+ bdnz 1b
+6: andi. r11, r11, 3
+ beqlr
+ mtctr r11
+ addi r6, r6, 3
+8: stbu r3, 1(r6)
+ bdnz 8b
+ blr
+
+7: cmpwi cr0, r4, 0
+ beqlr
+ mtctr r4
+ addi r6, r10, -1
+9: stbu r3, 1(r6)
+ bdnz 9b
+ blr
+
+90: mr r3, r4
+ blr
+91: add r3, r10, r4
+ subf r3, r6, r3
+ blr
+
+ EX_TABLE(11b, 90b)
+ EX_TABLE(4b, 91b)
+ EX_TABLE(10b, 91b)
+ EX_TABLE(1b, 91b)
+ EX_TABLE(8b, 91b)
+ EX_TABLE(9b, 91b)
+
+EXPORT_SYMBOL(__arch_clear_user)
diff --git a/arch/powerpc/lib/string_64.S b/arch/powerpc/lib/string_64.S
index 7bd9549a90a2..a25eb8588434 100644
--- a/arch/powerpc/lib/string_64.S
+++ b/arch/powerpc/lib/string_64.S
@@ -1,33 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright (C) IBM Corporation, 2012
*
* Author: Anton Blanchard <anton@au.ibm.com>
*/
+#include <linux/export.h>
#include <asm/ppc_asm.h>
+#include <asm/linkage.h>
#include <asm/asm-offsets.h>
- .section ".toc","aw"
-PPC64_CACHES:
- .tc ppc64_caches[TC],ppc64_caches
- .section ".text"
-
/**
- * __clear_user: - Zero a block of memory in user space, with less checking.
+ * __arch_clear_user: - Zero a block of memory in user space, with less checking.
* @to: Destination address, in user space.
* @n: Number of bytes to zero.
*
@@ -40,26 +25,17 @@ PPC64_CACHES:
.macro err1
100:
- .section __ex_table,"a"
- .align 3
- .llong 100b,.Ldo_err1
- .previous
+ EX_TABLE(100b,.Ldo_err1)
.endm
.macro err2
200:
- .section __ex_table,"a"
- .align 3
- .llong 200b,.Ldo_err2
- .previous
+ EX_TABLE(200b,.Ldo_err2)
.endm
.macro err3
300:
- .section __ex_table,"a"
- .align 3
- .llong 300b,.Ldo_err3
- .previous
+ EX_TABLE(300b,.Ldo_err3)
.endm
.Ldo_err1:
@@ -77,7 +53,7 @@ err3; stb r0,0(r3)
mr r3,r4
blr
-_GLOBAL_TOC(__clear_user)
+_GLOBAL_TOC(__arch_clear_user)
cmpdi r4,32
neg r6,r3
li r0,0
@@ -152,16 +128,16 @@ err1; stb r0,0(r3)
blr
.Llong_clear:
- ld r5,PPC64_CACHES@toc(r2)
+ LOAD_REG_ADDR(r5, ppc64_caches)
bf cr7*4+0,11f
err2; std r0,0(r3)
addi r3,r3,8
addi r4,r4,-8
- /* Destination is 16 byte aligned, need to get it cacheline aligned */
-11: lwz r7,DCACHEL1LOGLINESIZE(r5)
- lwz r9,DCACHEL1LINESIZE(r5)
+ /* Destination is 16 byte aligned, need to get it cache block aligned */
+11: lwz r7,DCACHEL1LOGBLOCKSIZE(r5)
+ lwz r9,DCACHEL1BLOCKSIZE(r5)
/*
* With worst case alignment the long clear loop takes a minimum
@@ -191,7 +167,7 @@ err1; std r0,8(r3)
mtctr r6
mr r8,r3
14:
-err1; dcbz r0,r3
+err1; dcbz 0,r3
add r3,r3,r9
bdnz 14b
@@ -200,3 +176,4 @@ err1; dcbz r0,r3
cmpdi r4,32
blt .Lshort_clear
b .Lmedium_clear
+EXPORT_SYMBOL(__arch_clear_user)
diff --git a/arch/powerpc/lib/strlen_32.S b/arch/powerpc/lib/strlen_32.S
new file mode 100644
index 000000000000..bbd24feb233f
--- /dev/null
+++ b/arch/powerpc/lib/strlen_32.S
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * strlen() for PPC32
+ *
+ * Copyright (C) 2018 Christophe Leroy CS Systemes d'Information.
+ *
+ * Inspired from glibc implementation
+ */
+#include <linux/export.h>
+#include <asm/ppc_asm.h>
+#include <asm/cache.h>
+
+ .text
+
+/*
+ * Algorithm:
+ *
+ * 1) Given a word 'x', we can test to see if it contains any 0 bytes
+ * by subtracting 0x01010101, and seeing if any of the high bits of each
+ * byte changed from 0 to 1. This works because the least significant
+ * 0 byte must have had no incoming carry (otherwise it's not the least
+ * significant), so it is 0x00 - 0x01 == 0xff. For all other
+ * byte values, either they have the high bit set initially, or when
+ * 1 is subtracted you get a value in the range 0x00-0x7f, none of which
+ * have their high bit set. The expression here is
+ * (x - 0x01010101) & ~x & 0x80808080), which gives 0x00000000 when
+ * there were no 0x00 bytes in the word. You get 0x80 in bytes that
+ * match, but possibly false 0x80 matches in the next more significant
+ * byte to a true match due to carries. For little-endian this is
+ * of no consequence since the least significant match is the one
+ * we're interested in, but big-endian needs method 2 to find which
+ * byte matches.
+ * 2) Given a word 'x', we can test to see _which_ byte was zero by
+ * calculating ~(((x & ~0x80808080) - 0x80808080 - 1) | x | ~0x80808080).
+ * This produces 0x80 in each byte that was zero, and 0x00 in all
+ * the other bytes. The '| ~0x80808080' clears the low 7 bits in each
+ * byte, and the '| x' part ensures that bytes with the high bit set
+ * produce 0x00. The addition will carry into the high bit of each byte
+ * iff that byte had one of its low 7 bits set. We can then just see
+ * which was the most significant bit set and divide by 8 to find how
+ * many to add to the index.
+ * This is from the book 'The PowerPC Compiler Writer's Guide',
+ * by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren.
+ */
+
+_GLOBAL(strlen)
+ andi. r0, r3, 3
+ lis r7, 0x0101
+ addi r10, r3, -4
+ addic r7, r7, 0x0101 /* r7 = 0x01010101 (lomagic) & clear XER[CA] */
+ rotlwi r6, r7, 31 /* r6 = 0x80808080 (himagic) */
+ bne- 3f
+ .balign IFETCH_ALIGN_BYTES
+1: lwzu r9, 4(r10)
+2: subf r8, r7, r9
+ and. r8, r8, r6
+ beq+ 1b
+ andc. r8, r8, r9
+ beq+ 1b
+ andc r8, r9, r6
+ orc r9, r9, r6
+ subfe r8, r6, r8
+ nor r8, r8, r9
+ cntlzw r8, r8
+ subf r3, r3, r10
+ srwi r8, r8, 3
+ add r3, r3, r8
+ blr
+
+ /* Missaligned string: make sure bytes before string are seen not 0 */
+3: xor r10, r10, r0
+ orc r8, r8, r8
+ lwzu r9, 4(r10)
+ slwi r0, r0, 3
+ srw r8, r8, r0
+ orc r9, r9, r8
+ b 2b
+EXPORT_SYMBOL(strlen)
diff --git a/arch/powerpc/lib/test-code-patching.c b/arch/powerpc/lib/test-code-patching.c
new file mode 100644
index 000000000000..1440d99630b3
--- /dev/null
+++ b/arch/powerpc/lib/test-code-patching.c
@@ -0,0 +1,495 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2008 Michael Ellerman, IBM Corporation.
+ */
+
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+
+#include <asm/text-patching.h>
+
+static int __init instr_is_branch_to_addr(const u32 *instr, unsigned long addr)
+{
+ if (instr_is_branch_iform(ppc_inst_read(instr)) ||
+ instr_is_branch_bform(ppc_inst_read(instr)))
+ return branch_target(instr) == addr;
+
+ return 0;
+}
+
+static void __init test_trampoline(void)
+{
+ asm ("nop;nop;\n");
+}
+
+#define check(x) do { \
+ if (!(x)) \
+ pr_err("code-patching: test failed at line %d\n", __LINE__); \
+} while (0)
+
+static void __init test_branch_iform(void)
+{
+ int err;
+ ppc_inst_t instr;
+ u32 tmp[2];
+ u32 *iptr = tmp;
+ unsigned long addr = (unsigned long)tmp;
+
+ /* The simplest case, branch to self, no flags */
+ check(instr_is_branch_iform(ppc_inst(0x48000000)));
+ /* All bits of target set, and flags */
+ check(instr_is_branch_iform(ppc_inst(0x4bffffff)));
+ /* High bit of opcode set, which is wrong */
+ check(!instr_is_branch_iform(ppc_inst(0xcbffffff)));
+ /* Middle bits of opcode set, which is wrong */
+ check(!instr_is_branch_iform(ppc_inst(0x7bffffff)));
+
+ /* Simplest case, branch to self with link */
+ check(instr_is_branch_iform(ppc_inst(0x48000001)));
+ /* All bits of targets set */
+ check(instr_is_branch_iform(ppc_inst(0x4bfffffd)));
+ /* Some bits of targets set */
+ check(instr_is_branch_iform(ppc_inst(0x4bff00fd)));
+ /* Must be a valid branch to start with */
+ check(!instr_is_branch_iform(ppc_inst(0x7bfffffd)));
+
+ /* Absolute branch to 0x100 */
+ ppc_inst_write(iptr, ppc_inst(0x48000103));
+ check(instr_is_branch_to_addr(iptr, 0x100));
+ /* Absolute branch to 0x420fc */
+ ppc_inst_write(iptr, ppc_inst(0x480420ff));
+ check(instr_is_branch_to_addr(iptr, 0x420fc));
+ /* Maximum positive relative branch, + 20MB - 4B */
+ ppc_inst_write(iptr, ppc_inst(0x49fffffc));
+ check(instr_is_branch_to_addr(iptr, addr + 0x1FFFFFC));
+ /* Smallest negative relative branch, - 4B */
+ ppc_inst_write(iptr, ppc_inst(0x4bfffffc));
+ check(instr_is_branch_to_addr(iptr, addr - 4));
+ /* Largest negative relative branch, - 32 MB */
+ ppc_inst_write(iptr, ppc_inst(0x4a000000));
+ check(instr_is_branch_to_addr(iptr, addr - 0x2000000));
+
+ /* Branch to self, with link */
+ err = create_branch(&instr, iptr, addr, BRANCH_SET_LINK);
+ ppc_inst_write(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr));
+
+ /* Branch to self - 0x100, with link */
+ err = create_branch(&instr, iptr, addr - 0x100, BRANCH_SET_LINK);
+ ppc_inst_write(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr - 0x100));
+
+ /* Branch to self + 0x100, no link */
+ err = create_branch(&instr, iptr, addr + 0x100, 0);
+ ppc_inst_write(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr + 0x100));
+
+ /* Maximum relative negative offset, - 32 MB */
+ err = create_branch(&instr, iptr, addr - 0x2000000, BRANCH_SET_LINK);
+ ppc_inst_write(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr - 0x2000000));
+
+ /* Out of range relative negative offset, - 32 MB + 4*/
+ err = create_branch(&instr, iptr, addr - 0x2000004, BRANCH_SET_LINK);
+ check(err);
+
+ /* Out of range relative positive offset, + 32 MB */
+ err = create_branch(&instr, iptr, addr + 0x2000000, BRANCH_SET_LINK);
+ check(err);
+
+ /* Unaligned target */
+ err = create_branch(&instr, iptr, addr + 3, BRANCH_SET_LINK);
+ check(err);
+
+ /* Check flags are masked correctly */
+ err = create_branch(&instr, iptr, addr, 0xFFFFFFFC);
+ ppc_inst_write(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr));
+ check(ppc_inst_equal(instr, ppc_inst(0x48000000)));
+}
+
+static void __init test_create_function_call(void)
+{
+ u32 *iptr;
+ unsigned long dest;
+ ppc_inst_t instr;
+
+ /* Check we can create a function call */
+ iptr = (u32 *)ppc_function_entry(test_trampoline);
+ dest = ppc_function_entry(test_create_function_call);
+ create_branch(&instr, iptr, dest, BRANCH_SET_LINK);
+ patch_instruction(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, dest));
+}
+
+static void __init test_branch_bform(void)
+{
+ int err;
+ unsigned long addr;
+ ppc_inst_t instr;
+ u32 tmp[2];
+ u32 *iptr = tmp;
+ unsigned int flags;
+
+ addr = (unsigned long)iptr;
+
+ /* The simplest case, branch to self, no flags */
+ check(instr_is_branch_bform(ppc_inst(0x40000000)));
+ /* All bits of target set, and flags */
+ check(instr_is_branch_bform(ppc_inst(0x43ffffff)));
+ /* High bit of opcode set, which is wrong */
+ check(!instr_is_branch_bform(ppc_inst(0xc3ffffff)));
+ /* Middle bits of opcode set, which is wrong */
+ check(!instr_is_branch_bform(ppc_inst(0x7bffffff)));
+
+ /* Absolute conditional branch to 0x100 */
+ ppc_inst_write(iptr, ppc_inst(0x43ff0103));
+ check(instr_is_branch_to_addr(iptr, 0x100));
+ /* Absolute conditional branch to 0x20fc */
+ ppc_inst_write(iptr, ppc_inst(0x43ff20ff));
+ check(instr_is_branch_to_addr(iptr, 0x20fc));
+ /* Maximum positive relative conditional branch, + 32 KB - 4B */
+ ppc_inst_write(iptr, ppc_inst(0x43ff7ffc));
+ check(instr_is_branch_to_addr(iptr, addr + 0x7FFC));
+ /* Smallest negative relative conditional branch, - 4B */
+ ppc_inst_write(iptr, ppc_inst(0x43fffffc));
+ check(instr_is_branch_to_addr(iptr, addr - 4));
+ /* Largest negative relative conditional branch, - 32 KB */
+ ppc_inst_write(iptr, ppc_inst(0x43ff8000));
+ check(instr_is_branch_to_addr(iptr, addr - 0x8000));
+
+ /* All condition code bits set & link */
+ flags = 0x3ff000 | BRANCH_SET_LINK;
+
+ /* Branch to self */
+ err = create_cond_branch(&instr, iptr, addr, flags);
+ ppc_inst_write(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr));
+
+ /* Branch to self - 0x100 */
+ err = create_cond_branch(&instr, iptr, addr - 0x100, flags);
+ ppc_inst_write(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr - 0x100));
+
+ /* Branch to self + 0x100 */
+ err = create_cond_branch(&instr, iptr, addr + 0x100, flags);
+ ppc_inst_write(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr + 0x100));
+
+ /* Maximum relative negative offset, - 32 KB */
+ err = create_cond_branch(&instr, iptr, addr - 0x8000, flags);
+ ppc_inst_write(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr - 0x8000));
+
+ /* Out of range relative negative offset, - 32 KB + 4*/
+ err = create_cond_branch(&instr, iptr, addr - 0x8004, flags);
+ check(err);
+
+ /* Out of range relative positive offset, + 32 KB */
+ err = create_cond_branch(&instr, iptr, addr + 0x8000, flags);
+ check(err);
+
+ /* Unaligned target */
+ err = create_cond_branch(&instr, iptr, addr + 3, flags);
+ check(err);
+
+ /* Check flags are masked correctly */
+ err = create_cond_branch(&instr, iptr, addr, 0xFFFFFFFC);
+ ppc_inst_write(iptr, instr);
+ check(instr_is_branch_to_addr(iptr, addr));
+ check(ppc_inst_equal(instr, ppc_inst(0x43FF0000)));
+}
+
+static void __init test_translate_branch(void)
+{
+ unsigned long addr;
+ void *p, *q;
+ ppc_inst_t instr;
+ void *buf;
+
+ buf = vmalloc(PAGE_ALIGN(0x2000000 + 1));
+ check(buf);
+ if (!buf)
+ return;
+
+ /* Simple case, branch to self moved a little */
+ p = buf;
+ addr = (unsigned long)p;
+ create_branch(&instr, p, addr, 0);
+ ppc_inst_write(p, instr);
+ check(instr_is_branch_to_addr(p, addr));
+ q = p + 4;
+ translate_branch(&instr, q, p);
+ ppc_inst_write(q, instr);
+ check(instr_is_branch_to_addr(q, addr));
+
+ /* Maximum negative case, move b . to addr + 32 MB */
+ p = buf;
+ addr = (unsigned long)p;
+ create_branch(&instr, p, addr, 0);
+ ppc_inst_write(p, instr);
+ q = buf + 0x2000000;
+ translate_branch(&instr, q, p);
+ ppc_inst_write(q, instr);
+ check(instr_is_branch_to_addr(p, addr));
+ check(instr_is_branch_to_addr(q, addr));
+ check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x4a000000)));
+
+ /* Maximum positive case, move x to x - 32 MB + 4 */
+ p = buf + 0x2000000;
+ addr = (unsigned long)p;
+ create_branch(&instr, p, addr, 0);
+ ppc_inst_write(p, instr);
+ q = buf + 4;
+ translate_branch(&instr, q, p);
+ ppc_inst_write(q, instr);
+ check(instr_is_branch_to_addr(p, addr));
+ check(instr_is_branch_to_addr(q, addr));
+ check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x49fffffc)));
+
+ /* Jump to x + 16 MB moved to x + 20 MB */
+ p = buf;
+ addr = 0x1000000 + (unsigned long)buf;
+ create_branch(&instr, p, addr, BRANCH_SET_LINK);
+ ppc_inst_write(p, instr);
+ q = buf + 0x1400000;
+ translate_branch(&instr, q, p);
+ ppc_inst_write(q, instr);
+ check(instr_is_branch_to_addr(p, addr));
+ check(instr_is_branch_to_addr(q, addr));
+
+ /* Jump to x + 16 MB moved to x - 16 MB + 4 */
+ p = buf + 0x1000000;
+ addr = 0x2000000 + (unsigned long)buf;
+ create_branch(&instr, p, addr, 0);
+ ppc_inst_write(p, instr);
+ q = buf + 4;
+ translate_branch(&instr, q, p);
+ ppc_inst_write(q, instr);
+ check(instr_is_branch_to_addr(p, addr));
+ check(instr_is_branch_to_addr(q, addr));
+
+
+ /* Conditional branch tests */
+
+ /* Simple case, branch to self moved a little */
+ p = buf;
+ addr = (unsigned long)p;
+ create_cond_branch(&instr, p, addr, 0);
+ ppc_inst_write(p, instr);
+ check(instr_is_branch_to_addr(p, addr));
+ q = buf + 4;
+ translate_branch(&instr, q, p);
+ ppc_inst_write(q, instr);
+ check(instr_is_branch_to_addr(q, addr));
+
+ /* Maximum negative case, move b . to addr + 32 KB */
+ p = buf;
+ addr = (unsigned long)p;
+ create_cond_branch(&instr, p, addr, 0xFFFFFFFC);
+ ppc_inst_write(p, instr);
+ q = buf + 0x8000;
+ translate_branch(&instr, q, p);
+ ppc_inst_write(q, instr);
+ check(instr_is_branch_to_addr(p, addr));
+ check(instr_is_branch_to_addr(q, addr));
+ check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x43ff8000)));
+
+ /* Maximum positive case, move x to x - 32 KB + 4 */
+ p = buf + 0x8000;
+ addr = (unsigned long)p;
+ create_cond_branch(&instr, p, addr, 0xFFFFFFFC);
+ ppc_inst_write(p, instr);
+ q = buf + 4;
+ translate_branch(&instr, q, p);
+ ppc_inst_write(q, instr);
+ check(instr_is_branch_to_addr(p, addr));
+ check(instr_is_branch_to_addr(q, addr));
+ check(ppc_inst_equal(ppc_inst_read(q), ppc_inst(0x43ff7ffc)));
+
+ /* Jump to x + 12 KB moved to x + 20 KB */
+ p = buf;
+ addr = 0x3000 + (unsigned long)buf;
+ create_cond_branch(&instr, p, addr, BRANCH_SET_LINK);
+ ppc_inst_write(p, instr);
+ q = buf + 0x5000;
+ translate_branch(&instr, q, p);
+ ppc_inst_write(q, instr);
+ check(instr_is_branch_to_addr(p, addr));
+ check(instr_is_branch_to_addr(q, addr));
+
+ /* Jump to x + 8 KB moved to x - 8 KB + 4 */
+ p = buf + 0x2000;
+ addr = 0x4000 + (unsigned long)buf;
+ create_cond_branch(&instr, p, addr, 0);
+ ppc_inst_write(p, instr);
+ q = buf + 4;
+ translate_branch(&instr, q, p);
+ ppc_inst_write(q, instr);
+ check(instr_is_branch_to_addr(p, addr));
+ check(instr_is_branch_to_addr(q, addr));
+
+ /* Free the buffer we were using */
+ vfree(buf);
+}
+
+static void __init test_prefixed_patching(void)
+{
+ u32 *iptr = (u32 *)ppc_function_entry(test_trampoline);
+ u32 expected[2] = {OP_PREFIX << 26, 0};
+ ppc_inst_t inst = ppc_inst_prefix(OP_PREFIX << 26, 0);
+
+ if (!IS_ENABLED(CONFIG_PPC64))
+ return;
+
+ patch_instruction(iptr, inst);
+
+ check(!memcmp(iptr, expected, sizeof(expected)));
+}
+
+static void __init test_multi_instruction_patching(void)
+{
+ u32 code[32];
+ void *buf;
+ u32 *addr32;
+ u64 *addr64;
+ ppc_inst_t inst64 = ppc_inst_prefix(OP_PREFIX << 26 | 3UL << 24, PPC_RAW_TRAP());
+ u32 inst32 = PPC_RAW_NOP();
+
+ buf = vzalloc(PAGE_SIZE * 8);
+ check(buf);
+ if (!buf)
+ return;
+
+ /* Test single page 32-bit repeated instruction */
+ addr32 = buf + PAGE_SIZE;
+ check(!patch_instructions(addr32 + 1, &inst32, 12, true));
+
+ check(addr32[0] == 0);
+ check(addr32[1] == inst32);
+ check(addr32[2] == inst32);
+ check(addr32[3] == inst32);
+ check(addr32[4] == 0);
+
+ /* Test single page 64-bit repeated instruction */
+ if (IS_ENABLED(CONFIG_PPC64)) {
+ check(ppc_inst_prefixed(inst64));
+
+ addr64 = buf + PAGE_SIZE * 2;
+ ppc_inst_write(code, inst64);
+ check(!patch_instructions((u32 *)(addr64 + 1), code, 24, true));
+
+ check(addr64[0] == 0);
+ check(ppc_inst_equal(ppc_inst_read((u32 *)&addr64[1]), inst64));
+ check(ppc_inst_equal(ppc_inst_read((u32 *)&addr64[2]), inst64));
+ check(ppc_inst_equal(ppc_inst_read((u32 *)&addr64[3]), inst64));
+ check(addr64[4] == 0);
+ }
+
+ /* Test single page memcpy */
+ addr32 = buf + PAGE_SIZE * 3;
+
+ for (int i = 0; i < ARRAY_SIZE(code); i++)
+ code[i] = i + 1;
+
+ check(!patch_instructions(addr32 + 1, code, sizeof(code), false));
+
+ check(addr32[0] == 0);
+ check(!memcmp(&addr32[1], code, sizeof(code)));
+ check(addr32[ARRAY_SIZE(code) + 1] == 0);
+
+ /* Test multipage 32-bit repeated instruction */
+ addr32 = buf + PAGE_SIZE * 4 - 8;
+ check(!patch_instructions(addr32 + 1, &inst32, 12, true));
+
+ check(addr32[0] == 0);
+ check(addr32[1] == inst32);
+ check(addr32[2] == inst32);
+ check(addr32[3] == inst32);
+ check(addr32[4] == 0);
+
+ /* Test multipage 64-bit repeated instruction */
+ if (IS_ENABLED(CONFIG_PPC64)) {
+ check(ppc_inst_prefixed(inst64));
+
+ addr64 = buf + PAGE_SIZE * 5 - 8;
+ ppc_inst_write(code, inst64);
+ check(!patch_instructions((u32 *)(addr64 + 1), code, 24, true));
+
+ check(addr64[0] == 0);
+ check(ppc_inst_equal(ppc_inst_read((u32 *)&addr64[1]), inst64));
+ check(ppc_inst_equal(ppc_inst_read((u32 *)&addr64[2]), inst64));
+ check(ppc_inst_equal(ppc_inst_read((u32 *)&addr64[3]), inst64));
+ check(addr64[4] == 0);
+ }
+
+ /* Test multipage memcpy */
+ addr32 = buf + PAGE_SIZE * 6 - 12;
+
+ for (int i = 0; i < ARRAY_SIZE(code); i++)
+ code[i] = i + 1;
+
+ check(!patch_instructions(addr32 + 1, code, sizeof(code), false));
+
+ check(addr32[0] == 0);
+ check(!memcmp(&addr32[1], code, sizeof(code)));
+ check(addr32[ARRAY_SIZE(code) + 1] == 0);
+
+ vfree(buf);
+}
+
+static void __init test_data_patching(void)
+{
+ void *buf;
+ u32 *addr32;
+
+ buf = vzalloc(PAGE_SIZE);
+ check(buf);
+ if (!buf)
+ return;
+
+ addr32 = buf + 128;
+
+ addr32[1] = 0xA0A1A2A3;
+ addr32[2] = 0xB0B1B2B3;
+
+ check(!patch_uint(&addr32[1], 0xC0C1C2C3));
+
+ check(addr32[0] == 0);
+ check(addr32[1] == 0xC0C1C2C3);
+ check(addr32[2] == 0xB0B1B2B3);
+ check(addr32[3] == 0);
+
+ /* Unaligned patch_ulong() should fail */
+ if (IS_ENABLED(CONFIG_PPC64))
+ check(patch_ulong(&addr32[1], 0xD0D1D2D3) == -EINVAL);
+
+ check(!patch_ulong(&addr32[2], 0xD0D1D2D3));
+
+ check(addr32[0] == 0);
+ check(addr32[1] == 0xC0C1C2C3);
+ check(*(unsigned long *)(&addr32[2]) == 0xD0D1D2D3);
+
+ if (!IS_ENABLED(CONFIG_PPC64))
+ check(addr32[3] == 0);
+
+ check(addr32[4] == 0);
+
+ vfree(buf);
+}
+
+static int __init test_code_patching(void)
+{
+ pr_info("Running code patching self-tests ...\n");
+
+ test_branch_iform();
+ test_branch_bform();
+ test_create_function_call();
+ test_translate_branch();
+ test_prefixed_patching();
+ test_multi_instruction_patching();
+ test_data_patching();
+
+ return 0;
+}
+late_initcall(test_code_patching);
diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c
new file mode 100644
index 000000000000..66b5b4fa1686
--- /dev/null
+++ b/arch/powerpc/lib/test_emulate_step.c
@@ -0,0 +1,1741 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Simple sanity tests for instruction emulation infrastructure.
+ *
+ * Copyright IBM Corp. 2016
+ */
+
+#define pr_fmt(fmt) "emulate_step_test: " fmt
+
+#include <linux/ptrace.h>
+#include <asm/cpu_has_feature.h>
+#include <asm/sstep.h>
+#include <asm/ppc-opcode.h>
+#include <asm/text-patching.h>
+#include <asm/inst.h>
+
+#define MAX_SUBTESTS 16
+
+#define IGNORE_GPR(n) (0x1UL << (n))
+#define IGNORE_XER (0x1UL << 32)
+#define IGNORE_CCR (0x1UL << 33)
+#define NEGATIVE_TEST (0x1UL << 63)
+
+#define TEST_PLD(r, base, i, pr) \
+ ppc_inst_prefix(PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_H(i), \
+ PPC_INST_PLD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
+
+#define TEST_PLWZ(r, base, i, pr) \
+ ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \
+ PPC_RAW_LWZ(r, base, i))
+
+#define TEST_PSTD(r, base, i, pr) \
+ ppc_inst_prefix(PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_H(i), \
+ PPC_INST_PSTD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
+
+#define TEST_PLFS(r, base, i, pr) \
+ ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \
+ PPC_INST_LFS | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
+
+#define TEST_PSTFS(r, base, i, pr) \
+ ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \
+ PPC_INST_STFS | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
+
+#define TEST_PLFD(r, base, i, pr) \
+ ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \
+ PPC_INST_LFD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
+
+#define TEST_PSTFD(r, base, i, pr) \
+ ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \
+ PPC_INST_STFD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
+
+#define TEST_PADDI(t, a, i, pr) \
+ ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \
+ PPC_RAW_ADDI(t, a, i))
+
+static void __init init_pt_regs(struct pt_regs *regs)
+{
+ static unsigned long msr;
+ static bool msr_cached;
+
+ memset(regs, 0, sizeof(struct pt_regs));
+
+ if (likely(msr_cached)) {
+ regs->msr = msr;
+ return;
+ }
+
+ asm volatile("mfmsr %0" : "=r"(regs->msr));
+
+ regs->msr |= MSR_FP;
+ regs->msr |= MSR_VEC;
+ regs->msr |= MSR_VSX;
+
+ msr = regs->msr;
+ msr_cached = true;
+}
+
+static void __init show_result(char *mnemonic, char *result)
+{
+ pr_info("%-14s : %s\n", mnemonic, result);
+}
+
+static void __init show_result_with_descr(char *mnemonic, char *descr,
+ char *result)
+{
+ pr_info("%-14s : %-50s %s\n", mnemonic, descr, result);
+}
+
+static void __init test_ld(void)
+{
+ struct pt_regs regs;
+ unsigned long a = 0x23;
+ int stepped = -1;
+
+ init_pt_regs(&regs);
+ regs.gpr[3] = (unsigned long) &a;
+
+ /* ld r5, 0(r3) */
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LD(5, 3, 0)));
+
+ if (stepped == 1 && regs.gpr[5] == a)
+ show_result("ld", "PASS");
+ else
+ show_result("ld", "FAIL");
+}
+
+static void __init test_pld(void)
+{
+ struct pt_regs regs;
+ unsigned long a = 0x23;
+ int stepped = -1;
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
+ show_result("pld", "SKIP (!CPU_FTR_ARCH_31)");
+ return;
+ }
+
+ init_pt_regs(&regs);
+ regs.gpr[3] = (unsigned long)&a;
+
+ /* pld r5, 0(r3), 0 */
+ stepped = emulate_step(&regs, TEST_PLD(5, 3, 0, 0));
+
+ if (stepped == 1 && regs.gpr[5] == a)
+ show_result("pld", "PASS");
+ else
+ show_result("pld", "FAIL");
+}
+
+static void __init test_lwz(void)
+{
+ struct pt_regs regs;
+ unsigned int a = 0x4545;
+ int stepped = -1;
+
+ init_pt_regs(&regs);
+ regs.gpr[3] = (unsigned long) &a;
+
+ /* lwz r5, 0(r3) */
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LWZ(5, 3, 0)));
+
+ if (stepped == 1 && regs.gpr[5] == a)
+ show_result("lwz", "PASS");
+ else
+ show_result("lwz", "FAIL");
+}
+
+static void __init test_plwz(void)
+{
+ struct pt_regs regs;
+ unsigned int a = 0x4545;
+ int stepped = -1;
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
+ show_result("plwz", "SKIP (!CPU_FTR_ARCH_31)");
+ return;
+ }
+
+ init_pt_regs(&regs);
+ regs.gpr[3] = (unsigned long)&a;
+
+ /* plwz r5, 0(r3), 0 */
+
+ stepped = emulate_step(&regs, TEST_PLWZ(5, 3, 0, 0));
+
+ if (stepped == 1 && regs.gpr[5] == a)
+ show_result("plwz", "PASS");
+ else
+ show_result("plwz", "FAIL");
+}
+
+static void __init test_lwzx(void)
+{
+ struct pt_regs regs;
+ unsigned int a[3] = {0x0, 0x0, 0x1234};
+ int stepped = -1;
+
+ init_pt_regs(&regs);
+ regs.gpr[3] = (unsigned long) a;
+ regs.gpr[4] = 8;
+ regs.gpr[5] = 0x8765;
+
+ /* lwzx r5, r3, r4 */
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LWZX(5, 3, 4)));
+ if (stepped == 1 && regs.gpr[5] == a[2])
+ show_result("lwzx", "PASS");
+ else
+ show_result("lwzx", "FAIL");
+}
+
+static void __init test_std(void)
+{
+ struct pt_regs regs;
+ unsigned long a = 0x1234;
+ int stepped = -1;
+
+ init_pt_regs(&regs);
+ regs.gpr[3] = (unsigned long) &a;
+ regs.gpr[5] = 0x5678;
+
+ /* std r5, 0(r3) */
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_STD(5, 3, 0)));
+ if (stepped == 1 && regs.gpr[5] == a)
+ show_result("std", "PASS");
+ else
+ show_result("std", "FAIL");
+}
+
+static void __init test_pstd(void)
+{
+ struct pt_regs regs;
+ unsigned long a = 0x1234;
+ int stepped = -1;
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
+ show_result("pstd", "SKIP (!CPU_FTR_ARCH_31)");
+ return;
+ }
+
+ init_pt_regs(&regs);
+ regs.gpr[3] = (unsigned long)&a;
+ regs.gpr[5] = 0x5678;
+
+ /* pstd r5, 0(r3), 0 */
+ stepped = emulate_step(&regs, TEST_PSTD(5, 3, 0, 0));
+ if (stepped == 1 || regs.gpr[5] == a)
+ show_result("pstd", "PASS");
+ else
+ show_result("pstd", "FAIL");
+}
+
+static void __init test_ldarx_stdcx(void)
+{
+ struct pt_regs regs;
+ unsigned long a = 0x1234;
+ int stepped = -1;
+ unsigned long cr0_eq = 0x1 << 29; /* eq bit of CR0 */
+
+ init_pt_regs(&regs);
+ asm volatile("mfcr %0" : "=r"(regs.ccr));
+
+
+ /*** ldarx ***/
+
+ regs.gpr[3] = (unsigned long) &a;
+ regs.gpr[4] = 0;
+ regs.gpr[5] = 0x5678;
+
+ /* ldarx r5, r3, r4, 0 */
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LDARX(5, 3, 4, 0)));
+
+ /*
+ * Don't touch 'a' here. Touching 'a' can do Load/store
+ * of 'a' which result in failure of subsequent stdcx.
+ * Instead, use hardcoded value for comparison.
+ */
+ if (stepped <= 0 || regs.gpr[5] != 0x1234) {
+ show_result("ldarx / stdcx.", "FAIL (ldarx)");
+ return;
+ }
+
+
+ /*** stdcx. ***/
+
+ regs.gpr[5] = 0x9ABC;
+
+ /* stdcx. r5, r3, r4 */
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_STDCX(5, 3, 4)));
+
+ /*
+ * Two possible scenarios that indicates successful emulation
+ * of stdcx. :
+ * 1. Reservation is active and store is performed. In this
+ * case cr0.eq bit will be set to 1.
+ * 2. Reservation is not active and store is not performed.
+ * In this case cr0.eq bit will be set to 0.
+ */
+ if (stepped == 1 && ((regs.gpr[5] == a && (regs.ccr & cr0_eq))
+ || (regs.gpr[5] != a && !(regs.ccr & cr0_eq))))
+ show_result("ldarx / stdcx.", "PASS");
+ else
+ show_result("ldarx / stdcx.", "FAIL (stdcx.)");
+}
+
+#ifdef CONFIG_PPC_FPU
+static void __init test_lfsx_stfsx(void)
+{
+ struct pt_regs regs;
+ union {
+ float a;
+ int b;
+ } c;
+ int cached_b;
+ int stepped = -1;
+
+ init_pt_regs(&regs);
+
+
+ /*** lfsx ***/
+
+ c.a = 123.45;
+ cached_b = c.b;
+
+ regs.gpr[3] = (unsigned long) &c.a;
+ regs.gpr[4] = 0;
+
+ /* lfsx frt10, r3, r4 */
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LFSX(10, 3, 4)));
+
+ if (stepped == 1)
+ show_result("lfsx", "PASS");
+ else
+ show_result("lfsx", "FAIL");
+
+
+ /*** stfsx ***/
+
+ c.a = 678.91;
+
+ /* stfsx frs10, r3, r4 */
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_STFSX(10, 3, 4)));
+
+ if (stepped == 1 && c.b == cached_b)
+ show_result("stfsx", "PASS");
+ else
+ show_result("stfsx", "FAIL");
+}
+
+static void __init test_plfs_pstfs(void)
+{
+ struct pt_regs regs;
+ union {
+ float a;
+ int b;
+ } c;
+ int cached_b;
+ int stepped = -1;
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
+ show_result("pld", "SKIP (!CPU_FTR_ARCH_31)");
+ return;
+ }
+
+ init_pt_regs(&regs);
+
+
+ /*** plfs ***/
+
+ c.a = 123.45;
+ cached_b = c.b;
+
+ regs.gpr[3] = (unsigned long)&c.a;
+
+ /* plfs frt10, 0(r3), 0 */
+ stepped = emulate_step(&regs, TEST_PLFS(10, 3, 0, 0));
+
+ if (stepped == 1)
+ show_result("plfs", "PASS");
+ else
+ show_result("plfs", "FAIL");
+
+
+ /*** pstfs ***/
+
+ c.a = 678.91;
+
+ /* pstfs frs10, 0(r3), 0 */
+ stepped = emulate_step(&regs, TEST_PSTFS(10, 3, 0, 0));
+
+ if (stepped == 1 && c.b == cached_b)
+ show_result("pstfs", "PASS");
+ else
+ show_result("pstfs", "FAIL");
+}
+
+static void __init test_lfdx_stfdx(void)
+{
+ struct pt_regs regs;
+ union {
+ double a;
+ long b;
+ } c;
+ long cached_b;
+ int stepped = -1;
+
+ init_pt_regs(&regs);
+
+
+ /*** lfdx ***/
+
+ c.a = 123456.78;
+ cached_b = c.b;
+
+ regs.gpr[3] = (unsigned long) &c.a;
+ regs.gpr[4] = 0;
+
+ /* lfdx frt10, r3, r4 */
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LFDX(10, 3, 4)));
+
+ if (stepped == 1)
+ show_result("lfdx", "PASS");
+ else
+ show_result("lfdx", "FAIL");
+
+
+ /*** stfdx ***/
+
+ c.a = 987654.32;
+
+ /* stfdx frs10, r3, r4 */
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_STFDX(10, 3, 4)));
+
+ if (stepped == 1 && c.b == cached_b)
+ show_result("stfdx", "PASS");
+ else
+ show_result("stfdx", "FAIL");
+}
+
+static void __init test_plfd_pstfd(void)
+{
+ struct pt_regs regs;
+ union {
+ double a;
+ long b;
+ } c;
+ long cached_b;
+ int stepped = -1;
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
+ show_result("pld", "SKIP (!CPU_FTR_ARCH_31)");
+ return;
+ }
+
+ init_pt_regs(&regs);
+
+
+ /*** plfd ***/
+
+ c.a = 123456.78;
+ cached_b = c.b;
+
+ regs.gpr[3] = (unsigned long)&c.a;
+
+ /* plfd frt10, 0(r3), 0 */
+ stepped = emulate_step(&regs, TEST_PLFD(10, 3, 0, 0));
+
+ if (stepped == 1)
+ show_result("plfd", "PASS");
+ else
+ show_result("plfd", "FAIL");
+
+
+ /*** pstfd ***/
+
+ c.a = 987654.32;
+
+ /* pstfd frs10, 0(r3), 0 */
+ stepped = emulate_step(&regs, TEST_PSTFD(10, 3, 0, 0));
+
+ if (stepped == 1 && c.b == cached_b)
+ show_result("pstfd", "PASS");
+ else
+ show_result("pstfd", "FAIL");
+}
+#else
+static void __init test_lfsx_stfsx(void)
+{
+ show_result("lfsx", "SKIP (CONFIG_PPC_FPU is not set)");
+ show_result("stfsx", "SKIP (CONFIG_PPC_FPU is not set)");
+}
+
+static void __init test_plfs_pstfs(void)
+{
+ show_result("plfs", "SKIP (CONFIG_PPC_FPU is not set)");
+ show_result("pstfs", "SKIP (CONFIG_PPC_FPU is not set)");
+}
+
+static void __init test_lfdx_stfdx(void)
+{
+ show_result("lfdx", "SKIP (CONFIG_PPC_FPU is not set)");
+ show_result("stfdx", "SKIP (CONFIG_PPC_FPU is not set)");
+}
+
+static void __init test_plfd_pstfd(void)
+{
+ show_result("plfd", "SKIP (CONFIG_PPC_FPU is not set)");
+ show_result("pstfd", "SKIP (CONFIG_PPC_FPU is not set)");
+}
+#endif /* CONFIG_PPC_FPU */
+
+#ifdef CONFIG_ALTIVEC
+static void __init test_lvx_stvx(void)
+{
+ struct pt_regs regs;
+ union {
+ vector128 a;
+ u32 b[4];
+ } c;
+ u32 cached_b[4];
+ int stepped = -1;
+
+ init_pt_regs(&regs);
+
+
+ /*** lvx ***/
+
+ cached_b[0] = c.b[0] = 923745;
+ cached_b[1] = c.b[1] = 2139478;
+ cached_b[2] = c.b[2] = 9012;
+ cached_b[3] = c.b[3] = 982134;
+
+ regs.gpr[3] = (unsigned long) &c.a;
+ regs.gpr[4] = 0;
+
+ /* lvx vrt10, r3, r4 */
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LVX(10, 3, 4)));
+
+ if (stepped == 1)
+ show_result("lvx", "PASS");
+ else
+ show_result("lvx", "FAIL");
+
+
+ /*** stvx ***/
+
+ c.b[0] = 4987513;
+ c.b[1] = 84313948;
+ c.b[2] = 71;
+ c.b[3] = 498532;
+
+ /* stvx vrs10, r3, r4 */
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_STVX(10, 3, 4)));
+
+ if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] &&
+ cached_b[2] == c.b[2] && cached_b[3] == c.b[3])
+ show_result("stvx", "PASS");
+ else
+ show_result("stvx", "FAIL");
+}
+#else
+static void __init test_lvx_stvx(void)
+{
+ show_result("lvx", "SKIP (CONFIG_ALTIVEC is not set)");
+ show_result("stvx", "SKIP (CONFIG_ALTIVEC is not set)");
+}
+#endif /* CONFIG_ALTIVEC */
+
+#ifdef CONFIG_VSX
+static void __init test_lxvd2x_stxvd2x(void)
+{
+ struct pt_regs regs;
+ union {
+ vector128 a;
+ u32 b[4];
+ } c;
+ u32 cached_b[4];
+ int stepped = -1;
+
+ init_pt_regs(&regs);
+
+
+ /*** lxvd2x ***/
+
+ cached_b[0] = c.b[0] = 18233;
+ cached_b[1] = c.b[1] = 34863571;
+ cached_b[2] = c.b[2] = 834;
+ cached_b[3] = c.b[3] = 6138911;
+
+ regs.gpr[3] = (unsigned long) &c.a;
+ regs.gpr[4] = 0;
+
+ /* lxvd2x vsr39, r3, r4 */
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LXVD2X(39, R3, R4)));
+
+ if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) {
+ show_result("lxvd2x", "PASS");
+ } else {
+ if (!cpu_has_feature(CPU_FTR_VSX))
+ show_result("lxvd2x", "PASS (!CPU_FTR_VSX)");
+ else
+ show_result("lxvd2x", "FAIL");
+ }
+
+
+ /*** stxvd2x ***/
+
+ c.b[0] = 21379463;
+ c.b[1] = 87;
+ c.b[2] = 374234;
+ c.b[3] = 4;
+
+ /* stxvd2x vsr39, r3, r4 */
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_STXVD2X(39, R3, R4)));
+
+ if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] &&
+ cached_b[2] == c.b[2] && cached_b[3] == c.b[3] &&
+ cpu_has_feature(CPU_FTR_VSX)) {
+ show_result("stxvd2x", "PASS");
+ } else {
+ if (!cpu_has_feature(CPU_FTR_VSX))
+ show_result("stxvd2x", "PASS (!CPU_FTR_VSX)");
+ else
+ show_result("stxvd2x", "FAIL");
+ }
+}
+#else
+static void __init test_lxvd2x_stxvd2x(void)
+{
+ show_result("lxvd2x", "SKIP (CONFIG_VSX is not set)");
+ show_result("stxvd2x", "SKIP (CONFIG_VSX is not set)");
+}
+#endif /* CONFIG_VSX */
+
+#ifdef CONFIG_VSX
+static void __init test_lxvp_stxvp(void)
+{
+ struct pt_regs regs;
+ union {
+ vector128 a;
+ u32 b[4];
+ } c[2];
+ u32 cached_b[8];
+ int stepped = -1;
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
+ show_result("lxvp", "SKIP (!CPU_FTR_ARCH_31)");
+ show_result("stxvp", "SKIP (!CPU_FTR_ARCH_31)");
+ return;
+ }
+
+ init_pt_regs(&regs);
+
+ /*** lxvp ***/
+
+ cached_b[0] = c[0].b[0] = 18233;
+ cached_b[1] = c[0].b[1] = 34863571;
+ cached_b[2] = c[0].b[2] = 834;
+ cached_b[3] = c[0].b[3] = 6138911;
+ cached_b[4] = c[1].b[0] = 1234;
+ cached_b[5] = c[1].b[1] = 5678;
+ cached_b[6] = c[1].b[2] = 91011;
+ cached_b[7] = c[1].b[3] = 121314;
+
+ regs.gpr[4] = (unsigned long)&c[0].a;
+
+ /*
+ * lxvp XTp,DQ(RA)
+ * XTp = 32xTX + 2xTp
+ * let TX=1 Tp=1 RA=4 DQ=0
+ */
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LXVP(34, 4, 0)));
+
+ if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) {
+ show_result("lxvp", "PASS");
+ } else {
+ if (!cpu_has_feature(CPU_FTR_VSX))
+ show_result("lxvp", "PASS (!CPU_FTR_VSX)");
+ else
+ show_result("lxvp", "FAIL");
+ }
+
+ /*** stxvp ***/
+
+ c[0].b[0] = 21379463;
+ c[0].b[1] = 87;
+ c[0].b[2] = 374234;
+ c[0].b[3] = 4;
+ c[1].b[0] = 90;
+ c[1].b[1] = 122;
+ c[1].b[2] = 555;
+ c[1].b[3] = 32144;
+
+ /*
+ * stxvp XSp,DQ(RA)
+ * XSp = 32xSX + 2xSp
+ * let SX=1 Sp=1 RA=4 DQ=0
+ */
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_STXVP(34, 4, 0)));
+
+ if (stepped == 1 && cached_b[0] == c[0].b[0] && cached_b[1] == c[0].b[1] &&
+ cached_b[2] == c[0].b[2] && cached_b[3] == c[0].b[3] &&
+ cached_b[4] == c[1].b[0] && cached_b[5] == c[1].b[1] &&
+ cached_b[6] == c[1].b[2] && cached_b[7] == c[1].b[3] &&
+ cpu_has_feature(CPU_FTR_VSX)) {
+ show_result("stxvp", "PASS");
+ } else {
+ if (!cpu_has_feature(CPU_FTR_VSX))
+ show_result("stxvp", "PASS (!CPU_FTR_VSX)");
+ else
+ show_result("stxvp", "FAIL");
+ }
+}
+#else
+static void __init test_lxvp_stxvp(void)
+{
+ show_result("lxvp", "SKIP (CONFIG_VSX is not set)");
+ show_result("stxvp", "SKIP (CONFIG_VSX is not set)");
+}
+#endif /* CONFIG_VSX */
+
+#ifdef CONFIG_VSX
+static void __init test_lxvpx_stxvpx(void)
+{
+ struct pt_regs regs;
+ union {
+ vector128 a;
+ u32 b[4];
+ } c[2];
+ u32 cached_b[8];
+ int stepped = -1;
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
+ show_result("lxvpx", "SKIP (!CPU_FTR_ARCH_31)");
+ show_result("stxvpx", "SKIP (!CPU_FTR_ARCH_31)");
+ return;
+ }
+
+ init_pt_regs(&regs);
+
+ /*** lxvpx ***/
+
+ cached_b[0] = c[0].b[0] = 18233;
+ cached_b[1] = c[0].b[1] = 34863571;
+ cached_b[2] = c[0].b[2] = 834;
+ cached_b[3] = c[0].b[3] = 6138911;
+ cached_b[4] = c[1].b[0] = 1234;
+ cached_b[5] = c[1].b[1] = 5678;
+ cached_b[6] = c[1].b[2] = 91011;
+ cached_b[7] = c[1].b[3] = 121314;
+
+ regs.gpr[3] = (unsigned long)&c[0].a;
+ regs.gpr[4] = 0;
+
+ /*
+ * lxvpx XTp,RA,RB
+ * XTp = 32xTX + 2xTp
+ * let TX=1 Tp=1 RA=3 RB=4
+ */
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LXVPX(34, 3, 4)));
+
+ if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) {
+ show_result("lxvpx", "PASS");
+ } else {
+ if (!cpu_has_feature(CPU_FTR_VSX))
+ show_result("lxvpx", "PASS (!CPU_FTR_VSX)");
+ else
+ show_result("lxvpx", "FAIL");
+ }
+
+ /*** stxvpx ***/
+
+ c[0].b[0] = 21379463;
+ c[0].b[1] = 87;
+ c[0].b[2] = 374234;
+ c[0].b[3] = 4;
+ c[1].b[0] = 90;
+ c[1].b[1] = 122;
+ c[1].b[2] = 555;
+ c[1].b[3] = 32144;
+
+ /*
+ * stxvpx XSp,RA,RB
+ * XSp = 32xSX + 2xSp
+ * let SX=1 Sp=1 RA=3 RB=4
+ */
+ stepped = emulate_step(&regs, ppc_inst(PPC_RAW_STXVPX(34, 3, 4)));
+
+ if (stepped == 1 && cached_b[0] == c[0].b[0] && cached_b[1] == c[0].b[1] &&
+ cached_b[2] == c[0].b[2] && cached_b[3] == c[0].b[3] &&
+ cached_b[4] == c[1].b[0] && cached_b[5] == c[1].b[1] &&
+ cached_b[6] == c[1].b[2] && cached_b[7] == c[1].b[3] &&
+ cpu_has_feature(CPU_FTR_VSX)) {
+ show_result("stxvpx", "PASS");
+ } else {
+ if (!cpu_has_feature(CPU_FTR_VSX))
+ show_result("stxvpx", "PASS (!CPU_FTR_VSX)");
+ else
+ show_result("stxvpx", "FAIL");
+ }
+}
+#else
+static void __init test_lxvpx_stxvpx(void)
+{
+ show_result("lxvpx", "SKIP (CONFIG_VSX is not set)");
+ show_result("stxvpx", "SKIP (CONFIG_VSX is not set)");
+}
+#endif /* CONFIG_VSX */
+
+#ifdef CONFIG_VSX
+static void __init test_plxvp_pstxvp(void)
+{
+ ppc_inst_t instr;
+ struct pt_regs regs;
+ union {
+ vector128 a;
+ u32 b[4];
+ } c[2];
+ u32 cached_b[8];
+ int stepped = -1;
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
+ show_result("plxvp", "SKIP (!CPU_FTR_ARCH_31)");
+ show_result("pstxvp", "SKIP (!CPU_FTR_ARCH_31)");
+ return;
+ }
+
+ /*** plxvp ***/
+
+ cached_b[0] = c[0].b[0] = 18233;
+ cached_b[1] = c[0].b[1] = 34863571;
+ cached_b[2] = c[0].b[2] = 834;
+ cached_b[3] = c[0].b[3] = 6138911;
+ cached_b[4] = c[1].b[0] = 1234;
+ cached_b[5] = c[1].b[1] = 5678;
+ cached_b[6] = c[1].b[2] = 91011;
+ cached_b[7] = c[1].b[3] = 121314;
+
+ init_pt_regs(&regs);
+ regs.gpr[3] = (unsigned long)&c[0].a;
+
+ /*
+ * plxvp XTp,D(RA),R
+ * XTp = 32xTX + 2xTp
+ * let RA=3 R=0 D=d0||d1=0 R=0 Tp=1 TX=1
+ */
+ instr = ppc_inst_prefix(PPC_RAW_PLXVP_P(34, 0, 3, 0), PPC_RAW_PLXVP_S(34, 0, 3, 0));
+
+ stepped = emulate_step(&regs, instr);
+ if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) {
+ show_result("plxvp", "PASS");
+ } else {
+ if (!cpu_has_feature(CPU_FTR_VSX))
+ show_result("plxvp", "PASS (!CPU_FTR_VSX)");
+ else
+ show_result("plxvp", "FAIL");
+ }
+
+ /*** pstxvp ***/
+
+ c[0].b[0] = 21379463;
+ c[0].b[1] = 87;
+ c[0].b[2] = 374234;
+ c[0].b[3] = 4;
+ c[1].b[0] = 90;
+ c[1].b[1] = 122;
+ c[1].b[2] = 555;
+ c[1].b[3] = 32144;
+
+ /*
+ * pstxvp XSp,D(RA),R
+ * XSp = 32xSX + 2xSp
+ * let RA=3 D=d0||d1=0 R=0 Sp=1 SX=1
+ */
+ instr = ppc_inst_prefix(PPC_RAW_PSTXVP_P(34, 0, 3, 0), PPC_RAW_PSTXVP_S(34, 0, 3, 0));
+
+ stepped = emulate_step(&regs, instr);
+
+ if (stepped == 1 && cached_b[0] == c[0].b[0] && cached_b[1] == c[0].b[1] &&
+ cached_b[2] == c[0].b[2] && cached_b[3] == c[0].b[3] &&
+ cached_b[4] == c[1].b[0] && cached_b[5] == c[1].b[1] &&
+ cached_b[6] == c[1].b[2] && cached_b[7] == c[1].b[3] &&
+ cpu_has_feature(CPU_FTR_VSX)) {
+ show_result("pstxvp", "PASS");
+ } else {
+ if (!cpu_has_feature(CPU_FTR_VSX))
+ show_result("pstxvp", "PASS (!CPU_FTR_VSX)");
+ else
+ show_result("pstxvp", "FAIL");
+ }
+}
+#else
+static void __init test_plxvp_pstxvp(void)
+{
+ show_result("plxvp", "SKIP (CONFIG_VSX is not set)");
+ show_result("pstxvp", "SKIP (CONFIG_VSX is not set)");
+}
+#endif /* CONFIG_VSX */
+
+static void __init run_tests_load_store(void)
+{
+ test_ld();
+ test_pld();
+ test_lwz();
+ test_plwz();
+ test_lwzx();
+ test_std();
+ test_pstd();
+ test_ldarx_stdcx();
+ test_lfsx_stfsx();
+ test_plfs_pstfs();
+ test_lfdx_stfdx();
+ test_plfd_pstfd();
+ test_lvx_stvx();
+ test_lxvd2x_stxvd2x();
+ test_lxvp_stxvp();
+ test_lxvpx_stxvpx();
+ test_plxvp_pstxvp();
+}
+
+struct compute_test {
+ char *mnemonic;
+ unsigned long cpu_feature;
+ struct {
+ char *descr;
+ unsigned long flags;
+ ppc_inst_t instr;
+ struct pt_regs regs;
+ } subtests[MAX_SUBTESTS + 1];
+};
+
+/* Extreme values for si0||si1 (the MLS:D-form 34 bit immediate field) */
+#define SI_MIN BIT(33)
+#define SI_MAX (BIT(33) - 1)
+#define SI_UMAX (BIT(34) - 1)
+
+static struct compute_test compute_tests[] = {
+ {
+ .mnemonic = "nop",
+ .subtests = {
+ {
+ .descr = "R0 = LONG_MAX",
+ .instr = ppc_inst(PPC_RAW_NOP()),
+ .regs = {
+ .gpr[0] = LONG_MAX,
+ }
+ }
+ }
+ },
+ {
+ .mnemonic = "setb",
+ .cpu_feature = CPU_FTR_ARCH_300,
+ .subtests = {
+ {
+ .descr = "BFA = 1, CR = GT",
+ .instr = ppc_inst(PPC_RAW_SETB(20, 1)),
+ .regs = {
+ .ccr = 0x4000000,
+ }
+ },
+ {
+ .descr = "BFA = 4, CR = LT",
+ .instr = ppc_inst(PPC_RAW_SETB(20, 4)),
+ .regs = {
+ .ccr = 0x8000,
+ }
+ },
+ {
+ .descr = "BFA = 5, CR = EQ",
+ .instr = ppc_inst(PPC_RAW_SETB(20, 5)),
+ .regs = {
+ .ccr = 0x200,
+ }
+ }
+ }
+ },
+ {
+ .mnemonic = "add",
+ .subtests = {
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MIN",
+ .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MIN,
+ }
+ },
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MAX",
+ .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = LONG_MAX, RB = LONG_MAX",
+ .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MAX,
+ .gpr[22] = LONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = ULONG_MAX, RB = ULONG_MAX",
+ .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
+ .regs = {
+ .gpr[21] = ULONG_MAX,
+ .gpr[22] = ULONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = ULONG_MAX, RB = 0x1",
+ .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
+ .regs = {
+ .gpr[21] = ULONG_MAX,
+ .gpr[22] = 0x1,
+ }
+ },
+ {
+ .descr = "RA = INT_MIN, RB = INT_MIN",
+ .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
+ .regs = {
+ .gpr[21] = INT_MIN,
+ .gpr[22] = INT_MIN,
+ }
+ },
+ {
+ .descr = "RA = INT_MIN, RB = INT_MAX",
+ .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
+ .regs = {
+ .gpr[21] = INT_MIN,
+ .gpr[22] = INT_MAX,
+ }
+ },
+ {
+ .descr = "RA = INT_MAX, RB = INT_MAX",
+ .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
+ .regs = {
+ .gpr[21] = INT_MAX,
+ .gpr[22] = INT_MAX,
+ }
+ },
+ {
+ .descr = "RA = UINT_MAX, RB = UINT_MAX",
+ .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
+ .regs = {
+ .gpr[21] = UINT_MAX,
+ .gpr[22] = UINT_MAX,
+ }
+ },
+ {
+ .descr = "RA = UINT_MAX, RB = 0x1",
+ .instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
+ .regs = {
+ .gpr[21] = UINT_MAX,
+ .gpr[22] = 0x1,
+ }
+ }
+ }
+ },
+ {
+ .mnemonic = "add.",
+ .subtests = {
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MIN",
+ .flags = IGNORE_CCR,
+ .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MIN,
+ }
+ },
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MAX",
+ .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = LONG_MAX, RB = LONG_MAX",
+ .flags = IGNORE_CCR,
+ .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MAX,
+ .gpr[22] = LONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = ULONG_MAX, RB = ULONG_MAX",
+ .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = ULONG_MAX,
+ .gpr[22] = ULONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = ULONG_MAX, RB = 0x1",
+ .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = ULONG_MAX,
+ .gpr[22] = 0x1,
+ }
+ },
+ {
+ .descr = "RA = INT_MIN, RB = INT_MIN",
+ .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = INT_MIN,
+ .gpr[22] = INT_MIN,
+ }
+ },
+ {
+ .descr = "RA = INT_MIN, RB = INT_MAX",
+ .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = INT_MIN,
+ .gpr[22] = INT_MAX,
+ }
+ },
+ {
+ .descr = "RA = INT_MAX, RB = INT_MAX",
+ .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = INT_MAX,
+ .gpr[22] = INT_MAX,
+ }
+ },
+ {
+ .descr = "RA = UINT_MAX, RB = UINT_MAX",
+ .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = UINT_MAX,
+ .gpr[22] = UINT_MAX,
+ }
+ },
+ {
+ .descr = "RA = UINT_MAX, RB = 0x1",
+ .instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = UINT_MAX,
+ .gpr[22] = 0x1,
+ }
+ }
+ }
+ },
+ {
+ .mnemonic = "addc",
+ .subtests = {
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MIN",
+ .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MIN,
+ }
+ },
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MAX",
+ .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = LONG_MAX, RB = LONG_MAX",
+ .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MAX,
+ .gpr[22] = LONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = ULONG_MAX, RB = ULONG_MAX",
+ .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
+ .regs = {
+ .gpr[21] = ULONG_MAX,
+ .gpr[22] = ULONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = ULONG_MAX, RB = 0x1",
+ .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
+ .regs = {
+ .gpr[21] = ULONG_MAX,
+ .gpr[22] = 0x1,
+ }
+ },
+ {
+ .descr = "RA = INT_MIN, RB = INT_MIN",
+ .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
+ .regs = {
+ .gpr[21] = INT_MIN,
+ .gpr[22] = INT_MIN,
+ }
+ },
+ {
+ .descr = "RA = INT_MIN, RB = INT_MAX",
+ .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
+ .regs = {
+ .gpr[21] = INT_MIN,
+ .gpr[22] = INT_MAX,
+ }
+ },
+ {
+ .descr = "RA = INT_MAX, RB = INT_MAX",
+ .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
+ .regs = {
+ .gpr[21] = INT_MAX,
+ .gpr[22] = INT_MAX,
+ }
+ },
+ {
+ .descr = "RA = UINT_MAX, RB = UINT_MAX",
+ .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
+ .regs = {
+ .gpr[21] = UINT_MAX,
+ .gpr[22] = UINT_MAX,
+ }
+ },
+ {
+ .descr = "RA = UINT_MAX, RB = 0x1",
+ .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
+ .regs = {
+ .gpr[21] = UINT_MAX,
+ .gpr[22] = 0x1,
+ }
+ },
+ {
+ .descr = "RA = LONG_MIN | INT_MIN, RB = LONG_MIN | INT_MIN",
+ .instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MIN | (uint)INT_MIN,
+ .gpr[22] = LONG_MIN | (uint)INT_MIN,
+ }
+ }
+ }
+ },
+ {
+ .mnemonic = "addc.",
+ .subtests = {
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MIN",
+ .flags = IGNORE_CCR,
+ .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MIN,
+ }
+ },
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MAX",
+ .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = LONG_MAX, RB = LONG_MAX",
+ .flags = IGNORE_CCR,
+ .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MAX,
+ .gpr[22] = LONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = ULONG_MAX, RB = ULONG_MAX",
+ .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = ULONG_MAX,
+ .gpr[22] = ULONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = ULONG_MAX, RB = 0x1",
+ .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = ULONG_MAX,
+ .gpr[22] = 0x1,
+ }
+ },
+ {
+ .descr = "RA = INT_MIN, RB = INT_MIN",
+ .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = INT_MIN,
+ .gpr[22] = INT_MIN,
+ }
+ },
+ {
+ .descr = "RA = INT_MIN, RB = INT_MAX",
+ .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = INT_MIN,
+ .gpr[22] = INT_MAX,
+ }
+ },
+ {
+ .descr = "RA = INT_MAX, RB = INT_MAX",
+ .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = INT_MAX,
+ .gpr[22] = INT_MAX,
+ }
+ },
+ {
+ .descr = "RA = UINT_MAX, RB = UINT_MAX",
+ .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = UINT_MAX,
+ .gpr[22] = UINT_MAX,
+ }
+ },
+ {
+ .descr = "RA = UINT_MAX, RB = 0x1",
+ .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = UINT_MAX,
+ .gpr[22] = 0x1,
+ }
+ },
+ {
+ .descr = "RA = LONG_MIN | INT_MIN, RB = LONG_MIN | INT_MIN",
+ .instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MIN | (uint)INT_MIN,
+ .gpr[22] = LONG_MIN | (uint)INT_MIN,
+ }
+ }
+ }
+ },
+ {
+ .mnemonic = "divde",
+ .subtests = {
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MIN",
+ .instr = ppc_inst(PPC_RAW_DIVDE(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MIN,
+ }
+ },
+ {
+ .descr = "RA = 1L, RB = 0",
+ .instr = ppc_inst(PPC_RAW_DIVDE(20, 21, 22)),
+ .flags = IGNORE_GPR(20),
+ .regs = {
+ .gpr[21] = 1L,
+ .gpr[22] = 0,
+ }
+ },
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MAX",
+ .instr = ppc_inst(PPC_RAW_DIVDE(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MAX,
+ }
+ }
+ }
+ },
+ {
+ .mnemonic = "divde.",
+ .subtests = {
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MIN",
+ .instr = ppc_inst(PPC_RAW_DIVDE_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MIN,
+ }
+ },
+ {
+ .descr = "RA = 1L, RB = 0",
+ .instr = ppc_inst(PPC_RAW_DIVDE_DOT(20, 21, 22)),
+ .flags = IGNORE_GPR(20),
+ .regs = {
+ .gpr[21] = 1L,
+ .gpr[22] = 0,
+ }
+ },
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MAX",
+ .instr = ppc_inst(PPC_RAW_DIVDE_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MAX,
+ }
+ }
+ }
+ },
+ {
+ .mnemonic = "divdeu",
+ .subtests = {
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MIN",
+ .instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)),
+ .flags = IGNORE_GPR(20),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MIN,
+ }
+ },
+ {
+ .descr = "RA = 1L, RB = 0",
+ .instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)),
+ .flags = IGNORE_GPR(20),
+ .regs = {
+ .gpr[21] = 1L,
+ .gpr[22] = 0,
+ }
+ },
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MAX",
+ .instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = LONG_MAX - 1, RB = LONG_MAX",
+ .instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MAX - 1,
+ .gpr[22] = LONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = LONG_MIN + 1, RB = LONG_MIN",
+ .instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)),
+ .flags = IGNORE_GPR(20),
+ .regs = {
+ .gpr[21] = LONG_MIN + 1,
+ .gpr[22] = LONG_MIN,
+ }
+ }
+ }
+ },
+ {
+ .mnemonic = "divdeu.",
+ .subtests = {
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MIN",
+ .instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)),
+ .flags = IGNORE_GPR(20),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MIN,
+ }
+ },
+ {
+ .descr = "RA = 1L, RB = 0",
+ .instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)),
+ .flags = IGNORE_GPR(20),
+ .regs = {
+ .gpr[21] = 1L,
+ .gpr[22] = 0,
+ }
+ },
+ {
+ .descr = "RA = LONG_MIN, RB = LONG_MAX",
+ .instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MIN,
+ .gpr[22] = LONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = LONG_MAX - 1, RB = LONG_MAX",
+ .instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)),
+ .regs = {
+ .gpr[21] = LONG_MAX - 1,
+ .gpr[22] = LONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = LONG_MIN + 1, RB = LONG_MIN",
+ .instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)),
+ .flags = IGNORE_GPR(20),
+ .regs = {
+ .gpr[21] = LONG_MIN + 1,
+ .gpr[22] = LONG_MIN,
+ }
+ }
+ }
+ },
+ {
+ .mnemonic = "paddi",
+ .cpu_feature = CPU_FTR_ARCH_31,
+ .subtests = {
+ {
+ .descr = "RA = LONG_MIN, SI = SI_MIN, R = 0",
+ .instr = TEST_PADDI(21, 22, SI_MIN, 0),
+ .regs = {
+ .gpr[21] = 0,
+ .gpr[22] = LONG_MIN,
+ }
+ },
+ {
+ .descr = "RA = LONG_MIN, SI = SI_MAX, R = 0",
+ .instr = TEST_PADDI(21, 22, SI_MAX, 0),
+ .regs = {
+ .gpr[21] = 0,
+ .gpr[22] = LONG_MIN,
+ }
+ },
+ {
+ .descr = "RA = LONG_MAX, SI = SI_MAX, R = 0",
+ .instr = TEST_PADDI(21, 22, SI_MAX, 0),
+ .regs = {
+ .gpr[21] = 0,
+ .gpr[22] = LONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = ULONG_MAX, SI = SI_UMAX, R = 0",
+ .instr = TEST_PADDI(21, 22, SI_UMAX, 0),
+ .regs = {
+ .gpr[21] = 0,
+ .gpr[22] = ULONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = ULONG_MAX, SI = 0x1, R = 0",
+ .instr = TEST_PADDI(21, 22, 0x1, 0),
+ .regs = {
+ .gpr[21] = 0,
+ .gpr[22] = ULONG_MAX,
+ }
+ },
+ {
+ .descr = "RA = INT_MIN, SI = SI_MIN, R = 0",
+ .instr = TEST_PADDI(21, 22, SI_MIN, 0),
+ .regs = {
+ .gpr[21] = 0,
+ .gpr[22] = INT_MIN,
+ }
+ },
+ {
+ .descr = "RA = INT_MIN, SI = SI_MAX, R = 0",
+ .instr = TEST_PADDI(21, 22, SI_MAX, 0),
+ .regs = {
+ .gpr[21] = 0,
+ .gpr[22] = INT_MIN,
+ }
+ },
+ {
+ .descr = "RA = INT_MAX, SI = SI_MAX, R = 0",
+ .instr = TEST_PADDI(21, 22, SI_MAX, 0),
+ .regs = {
+ .gpr[21] = 0,
+ .gpr[22] = INT_MAX,
+ }
+ },
+ {
+ .descr = "RA = UINT_MAX, SI = 0x1, R = 0",
+ .instr = TEST_PADDI(21, 22, 0x1, 0),
+ .regs = {
+ .gpr[21] = 0,
+ .gpr[22] = UINT_MAX,
+ }
+ },
+ {
+ .descr = "RA = UINT_MAX, SI = SI_MAX, R = 0",
+ .instr = TEST_PADDI(21, 22, SI_MAX, 0),
+ .regs = {
+ .gpr[21] = 0,
+ .gpr[22] = UINT_MAX,
+ }
+ },
+ {
+ .descr = "RA is r0, SI = SI_MIN, R = 0",
+ .instr = TEST_PADDI(21, 0, SI_MIN, 0),
+ .regs = {
+ .gpr[21] = 0x0,
+ }
+ },
+ {
+ .descr = "RA = 0, SI = SI_MIN, R = 0",
+ .instr = TEST_PADDI(21, 22, SI_MIN, 0),
+ .regs = {
+ .gpr[21] = 0x0,
+ .gpr[22] = 0x0,
+ }
+ },
+ {
+ .descr = "RA is r0, SI = 0, R = 1",
+ .instr = TEST_PADDI(21, 0, 0, 1),
+ .regs = {
+ .gpr[21] = 0,
+ }
+ },
+ {
+ .descr = "RA is r0, SI = SI_MIN, R = 1",
+ .instr = TEST_PADDI(21, 0, SI_MIN, 1),
+ .regs = {
+ .gpr[21] = 0,
+ }
+ },
+ /* Invalid instruction form with R = 1 and RA != 0 */
+ {
+ .descr = "RA = R22(0), SI = 0, R = 1",
+ .instr = TEST_PADDI(21, 22, 0, 1),
+ .flags = NEGATIVE_TEST,
+ .regs = {
+ .gpr[21] = 0,
+ .gpr[22] = 0,
+ }
+ }
+ }
+ }
+};
+
+static int __init emulate_compute_instr(struct pt_regs *regs,
+ ppc_inst_t instr,
+ bool negative)
+{
+ int analysed;
+ struct instruction_op op;
+
+ if (!regs || !ppc_inst_val(instr))
+ return -EINVAL;
+
+ /* This is not a return frame regs */
+ regs->nip = patch_site_addr(&patch__exec_instr);
+
+ analysed = analyse_instr(&op, regs, instr);
+ if (analysed != 1 || GETTYPE(op.type) != COMPUTE) {
+ if (negative)
+ return -EFAULT;
+ pr_info("emulation failed, instruction = %08lx\n", ppc_inst_as_ulong(instr));
+ return -EFAULT;
+ }
+ if (analysed == 1 && negative)
+ pr_info("negative test failed, instruction = %08lx\n", ppc_inst_as_ulong(instr));
+ if (!negative)
+ emulate_update_regs(regs, &op);
+ return 0;
+}
+
+static int __init execute_compute_instr(struct pt_regs *regs,
+ ppc_inst_t instr)
+{
+ extern int exec_instr(struct pt_regs *regs);
+
+ if (!regs || !ppc_inst_val(instr))
+ return -EINVAL;
+
+ /* Patch the NOP with the actual instruction */
+ patch_instruction_site(&patch__exec_instr, instr);
+ if (exec_instr(regs)) {
+ pr_info("execution failed, instruction = %08lx\n", ppc_inst_as_ulong(instr));
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+#define gpr_mismatch(gprn, exp, got) \
+ pr_info("GPR%u mismatch, exp = 0x%016lx, got = 0x%016lx\n", \
+ gprn, exp, got)
+
+#define reg_mismatch(name, exp, got) \
+ pr_info("%s mismatch, exp = 0x%016lx, got = 0x%016lx\n", \
+ name, exp, got)
+
+static void __init run_tests_compute(void)
+{
+ unsigned long flags;
+ struct compute_test *test;
+ struct pt_regs *regs, exp, got;
+ unsigned int i, j, k;
+ ppc_inst_t instr;
+ bool ignore_gpr, ignore_xer, ignore_ccr, passed, rc, negative;
+
+ for (i = 0; i < ARRAY_SIZE(compute_tests); i++) {
+ test = &compute_tests[i];
+
+ if (test->cpu_feature && !early_cpu_has_feature(test->cpu_feature)) {
+ show_result(test->mnemonic, "SKIP (!CPU_FTR)");
+ continue;
+ }
+
+ for (j = 0; j < MAX_SUBTESTS && test->subtests[j].descr; j++) {
+ instr = test->subtests[j].instr;
+ flags = test->subtests[j].flags;
+ regs = &test->subtests[j].regs;
+ negative = flags & NEGATIVE_TEST;
+ ignore_xer = flags & IGNORE_XER;
+ ignore_ccr = flags & IGNORE_CCR;
+ passed = true;
+
+ memcpy(&exp, regs, sizeof(struct pt_regs));
+ memcpy(&got, regs, sizeof(struct pt_regs));
+
+ /*
+ * Set a compatible MSR value explicitly to ensure
+ * that XER and CR bits are updated appropriately
+ */
+ exp.msr = MSR_KERNEL;
+ got.msr = MSR_KERNEL;
+
+ rc = emulate_compute_instr(&got, instr, negative) != 0;
+ if (negative) {
+ /* skip executing instruction */
+ passed = rc;
+ goto print;
+ } else if (rc || execute_compute_instr(&exp, instr)) {
+ passed = false;
+ goto print;
+ }
+
+ /* Verify GPR values */
+ for (k = 0; k < 32; k++) {
+ ignore_gpr = flags & IGNORE_GPR(k);
+ if (!ignore_gpr && exp.gpr[k] != got.gpr[k]) {
+ passed = false;
+ gpr_mismatch(k, exp.gpr[k], got.gpr[k]);
+ }
+ }
+
+ /* Verify LR value */
+ if (exp.link != got.link) {
+ passed = false;
+ reg_mismatch("LR", exp.link, got.link);
+ }
+
+ /* Verify XER value */
+ if (!ignore_xer && exp.xer != got.xer) {
+ passed = false;
+ reg_mismatch("XER", exp.xer, got.xer);
+ }
+
+ /* Verify CR value */
+ if (!ignore_ccr && exp.ccr != got.ccr) {
+ passed = false;
+ reg_mismatch("CR", exp.ccr, got.ccr);
+ }
+
+print:
+ show_result_with_descr(test->mnemonic,
+ test->subtests[j].descr,
+ passed ? "PASS" : "FAIL");
+ }
+ }
+}
+
+static int __init test_emulate_step(void)
+{
+ printk(KERN_INFO "Running instruction emulation self-tests ...\n");
+ run_tests_load_store();
+ run_tests_compute();
+
+ return 0;
+}
+late_initcall(test_emulate_step);
diff --git a/arch/powerpc/lib/test_emulate_step_exec_instr.S b/arch/powerpc/lib/test_emulate_step_exec_instr.S
new file mode 100644
index 000000000000..e2b646a4f7fa
--- /dev/null
+++ b/arch/powerpc/lib/test_emulate_step_exec_instr.S
@@ -0,0 +1,150 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Non-emulated single-stepping support (currently limited to basic integer
+ * computations) used to validate the instruction emulation infrastructure.
+ *
+ * Copyright (C) 2019 IBM Corporation
+ */
+
+#include <asm/asm-offsets.h>
+#include <asm/ppc_asm.h>
+#include <asm/code-patching-asm.h>
+#include <linux/errno.h>
+
+/* int exec_instr(struct pt_regs *regs) */
+_GLOBAL(exec_instr)
+
+ /*
+ * Stack frame layout (INT_FRAME_SIZE bytes)
+ * In-memory pt_regs (SP + STACK_INT_FRAME_REGS)
+ * Scratch space (SP + 8)
+ * Back chain (SP + 0)
+ */
+
+ /*
+ * Allocate a new stack frame with enough space to hold the register
+ * states in an in-memory pt_regs and also create the back chain to
+ * the caller's stack frame.
+ */
+ stdu r1, -INT_FRAME_SIZE(r1)
+
+ /*
+ * Save non-volatile GPRs on stack. This includes TOC pointer (GPR2)
+ * and local variables (GPR14 to GPR31). The register for the pt_regs
+ * parameter (GPR3) is saved additionally to ensure that the resulting
+ * register state can still be saved even if GPR3 gets overwritten
+ * when loading the initial register state for the test instruction.
+ * The stack pointer (GPR1) and the thread pointer (GPR13) are not
+ * saved as these should not be modified anyway.
+ */
+ SAVE_GPRS(2, 3, r1)
+ SAVE_NVGPRS(r1)
+
+ /*
+ * Save LR on stack to ensure that the return address is available
+ * even if it gets overwritten by the test instruction.
+ */
+ mflr r0
+ std r0, _LINK(r1)
+
+ /*
+ * Save CR on stack. For simplicity, the entire register is saved
+ * even though only fields 2 to 4 are non-volatile.
+ */
+ mfcr r0
+ std r0, _CCR(r1)
+
+ /*
+ * Load register state for the test instruction without touching the
+ * critical non-volatile registers. The register state is passed as a
+ * pointer to a pt_regs instance.
+ */
+ subi r31, r3, GPR0
+
+ /* Load LR from pt_regs */
+ ld r0, _LINK(r31)
+ mtlr r0
+
+ /* Load CR from pt_regs */
+ ld r0, _CCR(r31)
+ mtcr r0
+
+ /* Load XER from pt_regs */
+ ld r0, _XER(r31)
+ mtxer r0
+
+ /* Load GPRs from pt_regs */
+ REST_GPR(0, r31)
+ REST_GPRS(2, 12, r31)
+ REST_NVGPRS(r31)
+
+ /* Placeholder for the test instruction */
+ .balign 64
+1: nop
+ nop
+ patch_site 1b patch__exec_instr
+
+ /*
+ * Since GPR3 is overwritten, temporarily restore it back to its
+ * original state, i.e. the pointer to pt_regs, to ensure that the
+ * resulting register state can be saved. Before doing this, a copy
+ * of it is created in the scratch space which is used later on to
+ * save it to pt_regs.
+ */
+ std r3, 8(r1)
+ REST_GPR(3, r1)
+
+ /* Save resulting GPR state to pt_regs */
+ subi r3, r3, GPR0
+ SAVE_GPR(0, r3)
+ SAVE_GPR(2, r3)
+ SAVE_GPRS(4, 12, r3)
+ SAVE_NVGPRS(r3)
+
+ /* Save resulting LR to pt_regs */
+ mflr r0
+ std r0, _LINK(r3)
+
+ /* Save resulting CR to pt_regs */
+ mfcr r0
+ std r0, _CCR(r3)
+
+ /* Save resulting XER to pt_regs */
+ mfxer r0
+ std r0, _XER(r3)
+
+ /* Restore resulting GPR3 from scratch space and save it to pt_regs */
+ ld r0, 8(r1)
+ std r0, GPR3(r3)
+
+ /* Set return value to denote execution success */
+ li r3, 0
+
+ /* Continue */
+ b 3f
+
+ /* Set return value to denote execution failure */
+2: li r3, -EFAULT
+
+ /* Restore the non-volatile GPRs from stack */
+3: REST_GPR(2, r1)
+ REST_NVGPRS(r1)
+
+ /* Restore LR from stack to be able to return */
+ ld r0, _LINK(r1)
+ mtlr r0
+
+ /* Restore CR from stack */
+ ld r0, _CCR(r1)
+ mtcr r0
+
+ /* Tear down stack frame */
+ addi r1, r1, INT_FRAME_SIZE
+
+ /* Return */
+ blr
+
+ /* Setup exception table */
+ EX_TABLE(1b, 2b)
+
+_ASM_NOKPROBE_SYMBOL(exec_instr)
diff --git a/arch/powerpc/lib/usercopy_64.c b/arch/powerpc/lib/usercopy_64.c
deleted file mode 100644
index 5eea6f3c1e03..000000000000
--- a/arch/powerpc/lib/usercopy_64.c
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Functions which are too large to be inlined.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <linux/module.h>
-#include <asm/uaccess.h>
-
-unsigned long copy_from_user(void *to, const void __user *from, unsigned long n)
-{
- if (likely(access_ok(VERIFY_READ, from, n)))
- n = __copy_from_user(to, from, n);
- else
- memset(to, 0, n);
- return n;
-}
-
-unsigned long copy_to_user(void __user *to, const void *from, unsigned long n)
-{
- if (likely(access_ok(VERIFY_WRITE, to, n)))
- n = __copy_to_user(to, from, n);
- return n;
-}
-
-unsigned long copy_in_user(void __user *to, const void __user *from,
- unsigned long n)
-{
- might_sleep();
- if (likely(access_ok(VERIFY_READ, from, n) &&
- access_ok(VERIFY_WRITE, to, n)))
- n =__copy_tofrom_user(to, from, n);
- return n;
-}
-
-EXPORT_SYMBOL(copy_from_user);
-EXPORT_SYMBOL(copy_to_user);
-EXPORT_SYMBOL(copy_in_user);
-
diff --git a/arch/powerpc/lib/vmx-helper.c b/arch/powerpc/lib/vmx-helper.c
index 3cf529ceec5b..54340912398f 100644
--- a/arch/powerpc/lib/vmx-helper.c
+++ b/arch/powerpc/lib/vmx-helper.c
@@ -1,17 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright (C) IBM Corporation, 2011
*
@@ -27,11 +15,11 @@ int enter_vmx_usercopy(void)
if (in_interrupt())
return 0;
- /* This acts as preempt_disable() as well and will make
- * enable_kernel_altivec(). We need to disable page faults
- * as they can call schedule and thus make us lose the VMX
- * context. So on page faults, we just fail which will cause
- * a fallback to the normal non-vmx copy.
+ preempt_disable();
+ /*
+ * We need to disable page faults as they can call schedule and
+ * thus make us lose the VMX context. So on page faults, we just
+ * fail which will cause a fallback to the normal non-vmx copy.
*/
pagefault_disable();
@@ -46,11 +34,23 @@ int enter_vmx_usercopy(void)
*/
int exit_vmx_usercopy(void)
{
+ disable_kernel_altivec();
pagefault_enable();
+ preempt_enable_no_resched();
+ /*
+ * Must never explicitly call schedule (including preempt_enable())
+ * while in a kuap-unlocked user copy, because the AMR register will
+ * not be saved and restored across context switch. However preempt
+ * kernels need to be preempted as soon as possible if need_resched is
+ * set and we are preemptible. The hack here is to schedule a
+ * decrementer to fire here and reschedule for us if necessary.
+ */
+ if (need_irq_preemption() && need_resched())
+ set_dec(1);
return 0;
}
-int enter_vmx_copy(void)
+int enter_vmx_ops(void)
{
if (in_interrupt())
return 0;
@@ -67,8 +67,9 @@ int enter_vmx_copy(void)
* passed a pointer to the destination which we return as required by a
* memcpy implementation.
*/
-void *exit_vmx_copy(void *dest)
+void *exit_vmx_ops(void *dest)
{
+ disable_kernel_altivec();
preempt_enable();
return dest;
}
diff --git a/arch/powerpc/lib/xor_vmx.c b/arch/powerpc/lib/xor_vmx.c
index e905f7c2ea7b..aab49d056d18 100644
--- a/arch/powerpc/lib/xor_vmx.c
+++ b/arch/powerpc/lib/xor_vmx.c
@@ -1,28 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Copyright (C) IBM Corporation, 2012
*
* Author: Anton Blanchard <anton@au.ibm.com>
*/
+
+/*
+ * Sparse (as at v0.5.0) gets very, very confused by this file.
+ * Make it a bit simpler for it.
+ */
+#if !defined(__CHECKER__)
#include <altivec.h>
+#else
+#define vec_xor(a, b) a ^ b
+#define vector __attribute__((vector_size(16)))
+#endif
-#include <linux/preempt.h>
-#include <linux/export.h>
-#include <linux/sched.h>
-#include <asm/switch_to.h>
+#include "xor_vmx.h"
typedef vector signed char unative_t;
@@ -54,16 +49,14 @@ typedef vector signed char unative_t;
V1##_3 = vec_xor(V1##_3, V2##_3); \
} while (0)
-void xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
- unsigned long *v2_in)
+void __xor_altivec_2(unsigned long bytes,
+ unsigned long * __restrict v1_in,
+ const unsigned long * __restrict v2_in)
{
DEFINE(v1);
DEFINE(v2);
unsigned long lines = bytes / (sizeof(unative_t)) / 4;
- preempt_disable();
- enable_kernel_altivec();
-
do {
LOAD(v1);
LOAD(v2);
@@ -73,22 +66,18 @@ void xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
v1 += 4;
v2 += 4;
} while (--lines > 0);
-
- preempt_enable();
}
-EXPORT_SYMBOL(xor_altivec_2);
-void xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
- unsigned long *v2_in, unsigned long *v3_in)
+void __xor_altivec_3(unsigned long bytes,
+ unsigned long * __restrict v1_in,
+ const unsigned long * __restrict v2_in,
+ const unsigned long * __restrict v3_in)
{
DEFINE(v1);
DEFINE(v2);
DEFINE(v3);
unsigned long lines = bytes / (sizeof(unative_t)) / 4;
- preempt_disable();
- enable_kernel_altivec();
-
do {
LOAD(v1);
LOAD(v2);
@@ -101,14 +90,13 @@ void xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
v2 += 4;
v3 += 4;
} while (--lines > 0);
-
- preempt_enable();
}
-EXPORT_SYMBOL(xor_altivec_3);
-void xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
- unsigned long *v2_in, unsigned long *v3_in,
- unsigned long *v4_in)
+void __xor_altivec_4(unsigned long bytes,
+ unsigned long * __restrict v1_in,
+ const unsigned long * __restrict v2_in,
+ const unsigned long * __restrict v3_in,
+ const unsigned long * __restrict v4_in)
{
DEFINE(v1);
DEFINE(v2);
@@ -116,9 +104,6 @@ void xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
DEFINE(v4);
unsigned long lines = bytes / (sizeof(unative_t)) / 4;
- preempt_disable();
- enable_kernel_altivec();
-
do {
LOAD(v1);
LOAD(v2);
@@ -134,14 +119,14 @@ void xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
v3 += 4;
v4 += 4;
} while (--lines > 0);
-
- preempt_enable();
}
-EXPORT_SYMBOL(xor_altivec_4);
-void xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
- unsigned long *v2_in, unsigned long *v3_in,
- unsigned long *v4_in, unsigned long *v5_in)
+void __xor_altivec_5(unsigned long bytes,
+ unsigned long * __restrict v1_in,
+ const unsigned long * __restrict v2_in,
+ const unsigned long * __restrict v3_in,
+ const unsigned long * __restrict v4_in,
+ const unsigned long * __restrict v5_in)
{
DEFINE(v1);
DEFINE(v2);
@@ -150,9 +135,6 @@ void xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
DEFINE(v5);
unsigned long lines = bytes / (sizeof(unative_t)) / 4;
- preempt_disable();
- enable_kernel_altivec();
-
do {
LOAD(v1);
LOAD(v2);
@@ -171,7 +153,4 @@ void xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
v4 += 4;
v5 += 4;
} while (--lines > 0);
-
- preempt_enable();
}
-EXPORT_SYMBOL(xor_altivec_5);
diff --git a/arch/powerpc/lib/xor_vmx.h b/arch/powerpc/lib/xor_vmx.h
new file mode 100644
index 000000000000..573c41d90dac
--- /dev/null
+++ b/arch/powerpc/lib/xor_vmx.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Simple interface to link xor_vmx.c and xor_vmx_glue.c
+ *
+ * Separating these file ensures that no altivec instructions are run
+ * outside of the enable/disable altivec block.
+ */
+
+void __xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2);
+void __xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3);
+void __xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4);
+void __xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4,
+ const unsigned long * __restrict p5);
diff --git a/arch/powerpc/lib/xor_vmx_glue.c b/arch/powerpc/lib/xor_vmx_glue.c
new file mode 100644
index 000000000000..35d917ece4d1
--- /dev/null
+++ b/arch/powerpc/lib/xor_vmx_glue.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Altivec XOR operations
+ *
+ * Copyright 2017 IBM Corp.
+ */
+
+#include <linux/preempt.h>
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <asm/switch_to.h>
+#include <asm/xor_altivec.h>
+#include "xor_vmx.h"
+
+void xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2)
+{
+ preempt_disable();
+ enable_kernel_altivec();
+ __xor_altivec_2(bytes, p1, p2);
+ disable_kernel_altivec();
+ preempt_enable();
+}
+EXPORT_SYMBOL(xor_altivec_2);
+
+void xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3)
+{
+ preempt_disable();
+ enable_kernel_altivec();
+ __xor_altivec_3(bytes, p1, p2, p3);
+ disable_kernel_altivec();
+ preempt_enable();
+}
+EXPORT_SYMBOL(xor_altivec_3);
+
+void xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4)
+{
+ preempt_disable();
+ enable_kernel_altivec();
+ __xor_altivec_4(bytes, p1, p2, p3, p4);
+ disable_kernel_altivec();
+ preempt_enable();
+}
+EXPORT_SYMBOL(xor_altivec_4);
+
+void xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2,
+ const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4,
+ const unsigned long * __restrict p5)
+{
+ preempt_disable();
+ enable_kernel_altivec();
+ __xor_altivec_5(bytes, p1, p2, p3, p4, p5);
+ disable_kernel_altivec();
+ preempt_enable();
+}
+EXPORT_SYMBOL(xor_altivec_5);
diff --git a/arch/powerpc/math-emu/Makefile b/arch/powerpc/math-emu/Makefile
index 1b46ab4f6417..603e59c3db10 100644
--- a/arch/powerpc/math-emu/Makefile
+++ b/arch/powerpc/math-emu/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
math-emu-common-objs = math.o fre.o fsqrt.o fsqrts.o frsqrtes.o mtfsf.o mtfsfi.o
obj-$(CONFIG_MATH_EMULATION_HW_UNIMPLEMENTED) += $(math-emu-common-objs)
obj-$(CONFIG_MATH_EMULATION_FULL) += $(math-emu-common-objs) fabs.o fadd.o \
@@ -16,4 +17,9 @@ obj-$(CONFIG_SPE) += math_efp.o
CFLAGS_fabs.o = -fno-builtin-fabs
CFLAGS_math.o = -fno-builtin-fabs
-ccflags-y = -I. -Iinclude/math-emu -w
+ccflags-remove-y = -Wmissing-prototypes -Wmissing-declarations -Wunused-but-set-variable
+
+ifdef KBUILD_EXTRA_WARN
+CFLAGS_math.o += -Wmissing-prototypes -Wmissing-declarations -Wunused-but-set-variable
+CFLAGS_math_efp.o += -Wmissing-prototypes -Wmissing-declarations -Wunused-but-set-variable
+endif
diff --git a/arch/powerpc/math-emu/fabs.c b/arch/powerpc/math-emu/fabs.c
index 549baba5948f..3b62fd70b77e 100644
--- a/arch/powerpc/math-emu/fabs.c
+++ b/arch/powerpc/math-emu/fabs.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
int
fabs(u32 *frD, u32 *frB)
diff --git a/arch/powerpc/math-emu/fadd.c b/arch/powerpc/math-emu/fadd.c
index 0158a16e2b82..727e49ad55d1 100644
--- a/arch/powerpc/math-emu/fadd.c
+++ b/arch/powerpc/math-emu/fadd.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/sfp-machine.h>
#include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/fadds.c b/arch/powerpc/math-emu/fadds.c
index 5930f40a8687..45254be05662 100644
--- a/arch/powerpc/math-emu/fadds.c
+++ b/arch/powerpc/math-emu/fadds.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/sfp-machine.h>
#include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/fcmpo.c b/arch/powerpc/math-emu/fcmpo.c
index 5bce011c2aec..f437d0896c59 100644
--- a/arch/powerpc/math-emu/fcmpo.c
+++ b/arch/powerpc/math-emu/fcmpo.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/sfp-machine.h>
#include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/fcmpu.c b/arch/powerpc/math-emu/fcmpu.c
index d4fb1babc6ad..65631fa5dc39 100644
--- a/arch/powerpc/math-emu/fcmpu.c
+++ b/arch/powerpc/math-emu/fcmpu.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/sfp-machine.h>
#include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/fctiw.c b/arch/powerpc/math-emu/fctiw.c
index f694440ddc00..ebb0f11e735e 100644
--- a/arch/powerpc/math-emu/fctiw.c
+++ b/arch/powerpc/math-emu/fctiw.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/sfp-machine.h>
#include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/fctiwz.c b/arch/powerpc/math-emu/fctiwz.c
index 71e782fd4fe3..426271c4f004 100644
--- a/arch/powerpc/math-emu/fctiwz.c
+++ b/arch/powerpc/math-emu/fctiwz.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/sfp-machine.h>
#include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/fdiv.c b/arch/powerpc/math-emu/fdiv.c
index a29239c05e3e..6e64ece2d395 100644
--- a/arch/powerpc/math-emu/fdiv.c
+++ b/arch/powerpc/math-emu/fdiv.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/sfp-machine.h>
#include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/fdivs.c b/arch/powerpc/math-emu/fdivs.c
index 526bc261275f..f9f7adf46262 100644
--- a/arch/powerpc/math-emu/fdivs.c
+++ b/arch/powerpc/math-emu/fdivs.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/sfp-machine.h>
#include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/fmadd.c b/arch/powerpc/math-emu/fmadd.c
index 8c3f20aa5a95..e8458aed5edb 100644
--- a/arch/powerpc/math-emu/fmadd.c
+++ b/arch/powerpc/math-emu/fmadd.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/sfp-machine.h>
#include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/fmadds.c b/arch/powerpc/math-emu/fmadds.c
index 794fb31e59d1..a6d3f9842f19 100644
--- a/arch/powerpc/math-emu/fmadds.c
+++ b/arch/powerpc/math-emu/fmadds.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/sfp-machine.h>
#include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/fmr.c b/arch/powerpc/math-emu/fmr.c
index bd55384b8196..48c64374286e 100644
--- a/arch/powerpc/math-emu/fmr.c
+++ b/arch/powerpc/math-emu/fmr.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
int
fmr(u32 *frD, u32 *frB)
diff --git a/arch/powerpc/math-emu/fmsub.c b/arch/powerpc/math-emu/fmsub.c
index 626f6fed84ac..605cda49e7b2 100644
--- a/arch/powerpc/math-emu/fmsub.c
+++ b/arch/powerpc/math-emu/fmsub.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/sfp-machine.h>
#include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/fmsubs.c b/arch/powerpc/math-emu/fmsubs.c
index 3425bc899760..f26ec0acf0a5 100644
--- a/arch/powerpc/math-emu/fmsubs.c
+++ b/arch/powerpc/math-emu/fmsubs.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/sfp-machine.h>
#include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/fmul.c b/arch/powerpc/math-emu/fmul.c
index 2c1929779892..d114f7acdbb1 100644
--- a/arch/powerpc/math-emu/fmul.c
+++ b/arch/powerpc/math-emu/fmul.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/sfp-machine.h>
#include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/fmuls.c b/arch/powerpc/math-emu/fmuls.c
index f5ad5c9c77d0..aaeba0acb47f 100644
--- a/arch/powerpc/math-emu/fmuls.c
+++ b/arch/powerpc/math-emu/fmuls.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/sfp-machine.h>
#include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/fnabs.c b/arch/powerpc/math-emu/fnabs.c
index a7d34f3d9499..6c439e6c2c58 100644
--- a/arch/powerpc/math-emu/fnabs.c
+++ b/arch/powerpc/math-emu/fnabs.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
int
fnabs(u32 *frD, u32 *frB)
diff --git a/arch/powerpc/math-emu/fneg.c b/arch/powerpc/math-emu/fneg.c
index 1e988cd9c6cc..791e724f712f 100644
--- a/arch/powerpc/math-emu/fneg.c
+++ b/arch/powerpc/math-emu/fneg.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
int
fneg(u32 *frD, u32 *frB)
diff --git a/arch/powerpc/math-emu/fnmadd.c b/arch/powerpc/math-emu/fnmadd.c
index e817bc5453ef..02a7099b26af 100644
--- a/arch/powerpc/math-emu/fnmadd.c
+++ b/arch/powerpc/math-emu/fnmadd.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/sfp-machine.h>
#include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/fnmadds.c b/arch/powerpc/math-emu/fnmadds.c
index 4db4b7d9ba8d..ce42a7a44d2e 100644
--- a/arch/powerpc/math-emu/fnmadds.c
+++ b/arch/powerpc/math-emu/fnmadds.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/sfp-machine.h>
#include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/fnmsub.c b/arch/powerpc/math-emu/fnmsub.c
index f65979fa770e..eade699c51d5 100644
--- a/arch/powerpc/math-emu/fnmsub.c
+++ b/arch/powerpc/math-emu/fnmsub.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/sfp-machine.h>
#include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/fnmsubs.c b/arch/powerpc/math-emu/fnmsubs.c
index 9021dacc03b8..4e1f6c2b7c40 100644
--- a/arch/powerpc/math-emu/fnmsubs.c
+++ b/arch/powerpc/math-emu/fnmsubs.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/sfp-machine.h>
#include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/fre.c b/arch/powerpc/math-emu/fre.c
index 49ccf2cc6a5a..584b16f53304 100644
--- a/arch/powerpc/math-emu/fre.c
+++ b/arch/powerpc/math-emu/fre.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
int fre(void *frD, void *frB)
{
diff --git a/arch/powerpc/math-emu/fres.c b/arch/powerpc/math-emu/fres.c
index 10ecbd08b79e..f7d5654ce7d6 100644
--- a/arch/powerpc/math-emu/fres.c
+++ b/arch/powerpc/math-emu/fres.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
int
fres(void *frD, void *frB)
diff --git a/arch/powerpc/math-emu/frsp.c b/arch/powerpc/math-emu/frsp.c
index ddcc14664b1a..cb33e3d9bbb2 100644
--- a/arch/powerpc/math-emu/frsp.c
+++ b/arch/powerpc/math-emu/frsp.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/sfp-machine.h>
#include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/frsqrte.c b/arch/powerpc/math-emu/frsqrte.c
index 1d0a3a0fd0e6..72955b27c3ca 100644
--- a/arch/powerpc/math-emu/frsqrte.c
+++ b/arch/powerpc/math-emu/frsqrte.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
int
frsqrte(void *frD, void *frB)
diff --git a/arch/powerpc/math-emu/frsqrtes.c b/arch/powerpc/math-emu/frsqrtes.c
index 7e838e380314..a036f7b7140c 100644
--- a/arch/powerpc/math-emu/frsqrtes.c
+++ b/arch/powerpc/math-emu/frsqrtes.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
int frsqrtes(void *frD, void *frB)
{
diff --git a/arch/powerpc/math-emu/fsel.c b/arch/powerpc/math-emu/fsel.c
index 1b0c14498032..b0d15e15a5d3 100644
--- a/arch/powerpc/math-emu/fsel.c
+++ b/arch/powerpc/math-emu/fsel.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/sfp-machine.h>
#include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/fsqrt.c b/arch/powerpc/math-emu/fsqrt.c
index a55fc7d49983..05438590388e 100644
--- a/arch/powerpc/math-emu/fsqrt.c
+++ b/arch/powerpc/math-emu/fsqrt.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/sfp-machine.h>
#include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/fsqrts.c b/arch/powerpc/math-emu/fsqrts.c
index 31dccbfc39ff..1624f97c69cc 100644
--- a/arch/powerpc/math-emu/fsqrts.c
+++ b/arch/powerpc/math-emu/fsqrts.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/sfp-machine.h>
#include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/fsub.c b/arch/powerpc/math-emu/fsub.c
index 02c5dff458ba..47a8f847b422 100644
--- a/arch/powerpc/math-emu/fsub.c
+++ b/arch/powerpc/math-emu/fsub.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/sfp-machine.h>
#include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/fsubs.c b/arch/powerpc/math-emu/fsubs.c
index 5d9b18c35e07..fa1b3b18c379 100644
--- a/arch/powerpc/math-emu/fsubs.c
+++ b/arch/powerpc/math-emu/fsubs.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/sfp-machine.h>
#include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/lfd.c b/arch/powerpc/math-emu/lfd.c
index 79ac76d596c3..3a6b03d999ab 100644
--- a/arch/powerpc/math-emu/lfd.c
+++ b/arch/powerpc/math-emu/lfd.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/sfp-machine.h>
#include <math-emu/double.h>
diff --git a/arch/powerpc/math-emu/lfs.c b/arch/powerpc/math-emu/lfs.c
index 434ed27be8db..7fd3d0854cd8 100644
--- a/arch/powerpc/math-emu/lfs.c
+++ b/arch/powerpc/math-emu/lfs.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/sfp-machine.h>
#include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/math.c b/arch/powerpc/math-emu/math.c
index ab151f040502..936a9a149037 100644
--- a/arch/powerpc/math-emu/math.c
+++ b/arch/powerpc/math-emu/math.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 1999 Eddie C. Dost (ecd@atecom.com)
*/
@@ -5,7 +6,7 @@
#include <linux/types.h>
#include <linux/sched.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/reg.h>
#include <asm/switch_to.h>
@@ -23,9 +24,9 @@ FLOATFUNC(mtfsf);
FLOATFUNC(mtfsfi);
#ifdef CONFIG_MATH_EMULATION_HW_UNIMPLEMENTED
-#undef FLOATFUNC(x)
+#undef FLOATFUNC
#define FLOATFUNC(x) static inline int x(void *op1, void *op2, void *op3, \
- void *op4) { }
+ void *op4) { return 0; }
#endif
FLOATFUNC(fadd);
@@ -224,7 +225,7 @@ record_exception(struct pt_regs *regs, int eflag)
int
do_mathemu(struct pt_regs *regs)
{
- void *op0 = 0, *op1 = 0, *op2 = 0, *op3 = 0;
+ void *op0 = NULL, *op1 = NULL, *op2 = NULL, *op3 = NULL;
unsigned long pc = regs->nip;
signed short sdisp;
u32 insn = 0;
@@ -233,7 +234,7 @@ do_mathemu(struct pt_regs *regs)
int type = 0;
int eflag, trap;
- if (get_user(insn, (u32 *)pc))
+ if (get_user(insn, (u32 __user *)pc))
return -EFAULT;
switch (insn >> 26) {
@@ -395,28 +396,28 @@ do_mathemu(struct pt_regs *regs)
case XCR:
op0 = (void *)&regs->ccr;
- op1 = (void *)((insn >> 23) & 0x7);
+ op1 = (void *)(long)((insn >> 23) & 0x7);
op2 = (void *)&current->thread.TS_FPR((insn >> 16) & 0x1f);
op3 = (void *)&current->thread.TS_FPR((insn >> 11) & 0x1f);
break;
case XCRL:
op0 = (void *)&regs->ccr;
- op1 = (void *)((insn >> 23) & 0x7);
- op2 = (void *)((insn >> 18) & 0x7);
+ op1 = (void *)(long)((insn >> 23) & 0x7);
+ op2 = (void *)(long)((insn >> 18) & 0x7);
break;
case XCRB:
- op0 = (void *)((insn >> 21) & 0x1f);
+ op0 = (void *)(long)((insn >> 21) & 0x1f);
break;
case XCRI:
- op0 = (void *)((insn >> 23) & 0x7);
- op1 = (void *)((insn >> 12) & 0xf);
+ op0 = (void *)(long)((insn >> 23) & 0x7);
+ op1 = (void *)(long)((insn >> 12) & 0xf);
break;
case XFLB:
- op0 = (void *)((insn >> 17) & 0xff);
+ op0 = (void *)(long)((insn >> 17) & 0xff);
op1 = (void *)&current->thread.TS_FPR((insn >> 11) & 0x1f);
break;
@@ -452,7 +453,7 @@ do_mathemu(struct pt_regs *regs)
break;
}
- regs->nip += 4;
+ regs_add_return_ip(regs, 4);
return 0;
illegal:
diff --git a/arch/powerpc/math-emu/math_efp.c b/arch/powerpc/math-emu/math_efp.c
index 28337c9709ae..34f62aafe706 100644
--- a/arch/powerpc/math-emu/math_efp.c
+++ b/arch/powerpc/math-emu/math_efp.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* arch/powerpc/math-emu/math_efp.c
*
@@ -12,17 +13,13 @@
* Description:
* This file is the exception handler to make E500 SPE instructions
* fully comply with IEEE-754 floating point standard.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/types.h>
#include <linux/prctl.h>
+#include <linux/module.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/reg.h>
#define FP_EX_BOOKE_E500_SPE
@@ -222,6 +219,7 @@ int do_spe_mathemu(struct pt_regs *regs)
case AB:
case XCR:
FP_UNPACK_SP(SA, va.wp + 1);
+ fallthrough;
case XB:
FP_UNPACK_SP(SB, vb.wp + 1);
break;
@@ -230,8 +228,8 @@ int do_spe_mathemu(struct pt_regs *regs)
break;
}
- pr_debug("SA: %ld %08lx %ld (%ld)\n", SA_s, SA_f, SA_e, SA_c);
- pr_debug("SB: %ld %08lx %ld (%ld)\n", SB_s, SB_f, SB_e, SB_c);
+ pr_debug("SA: %d %08x %d (%d)\n", SA_s, SA_f, SA_e, SA_c);
+ pr_debug("SB: %d %08x %d (%d)\n", SB_s, SB_f, SB_e, SB_c);
switch (func) {
case EFSABS:
@@ -282,7 +280,7 @@ int do_spe_mathemu(struct pt_regs *regs)
} else {
SB_e += (func == EFSCTSF ? 31 : 32);
FP_TO_INT_ROUND_S(vc.wp[1], SB, 32,
- (func == EFSCTSF));
+ (func == EFSCTSF) ? 1 : 0);
}
goto update_regs;
@@ -291,7 +289,7 @@ int do_spe_mathemu(struct pt_regs *regs)
FP_CLEAR_EXCEPTIONS;
FP_UNPACK_DP(DB, vb.dp);
- pr_debug("DB: %ld %08lx %08lx %ld (%ld)\n",
+ pr_debug("DB: %d %08x %08x %d (%d)\n",
DB_s, DB_f1, DB_f0, DB_e, DB_c);
FP_CONV(S, D, 1, 2, SR, DB);
@@ -305,7 +303,7 @@ int do_spe_mathemu(struct pt_regs *regs)
FP_SET_EXCEPTION(FP_EX_INVALID);
} else {
FP_TO_INT_ROUND_S(vc.wp[1], SB, 32,
- ((func & 0x3) != 0));
+ ((func & 0x3) != 0) ? 1 : 0);
}
goto update_regs;
@@ -316,7 +314,7 @@ int do_spe_mathemu(struct pt_regs *regs)
FP_SET_EXCEPTION(FP_EX_INVALID);
} else {
FP_TO_INT_S(vc.wp[1], SB, 32,
- ((func & 0x3) != 0));
+ ((func & 0x3) != 0) ? 1 : 0);
}
goto update_regs;
@@ -326,7 +324,7 @@ int do_spe_mathemu(struct pt_regs *regs)
break;
pack_s:
- pr_debug("SR: %ld %08lx %ld (%ld)\n", SR_s, SR_f, SR_e, SR_c);
+ pr_debug("SR: %d %08x %d (%d)\n", SR_s, SR_f, SR_e, SR_c);
FP_PACK_SP(vc.wp + 1, SR);
goto update_regs;
@@ -350,6 +348,7 @@ cmp_s:
case AB:
case XCR:
FP_UNPACK_DP(DA, va.dp);
+ fallthrough;
case XB:
FP_UNPACK_DP(DB, vb.dp);
break;
@@ -358,9 +357,9 @@ cmp_s:
break;
}
- pr_debug("DA: %ld %08lx %08lx %ld (%ld)\n",
+ pr_debug("DA: %d %08x %08x %d (%d)\n",
DA_s, DA_f1, DA_f0, DA_e, DA_c);
- pr_debug("DB: %ld %08lx %08lx %ld (%ld)\n",
+ pr_debug("DB: %d %08x %08x %d (%d)\n",
DB_s, DB_f1, DB_f0, DB_e, DB_c);
switch (func) {
@@ -412,7 +411,7 @@ cmp_s:
} else {
DB_e += (func == EFDCTSF ? 31 : 32);
FP_TO_INT_ROUND_D(vc.wp[1], DB, 32,
- (func == EFDCTSF));
+ (func == EFDCTSF) ? 1 : 0);
}
goto update_regs;
@@ -421,7 +420,7 @@ cmp_s:
FP_CLEAR_EXCEPTIONS;
FP_UNPACK_SP(SB, vb.wp + 1);
- pr_debug("SB: %ld %08lx %ld (%ld)\n",
+ pr_debug("SB: %d %08x %d (%d)\n",
SB_s, SB_f, SB_e, SB_c);
FP_CONV(D, S, 2, 1, DR, SB);
@@ -435,7 +434,7 @@ cmp_s:
FP_SET_EXCEPTION(FP_EX_INVALID);
} else {
FP_TO_INT_D(vc.dp[0], DB, 64,
- ((func & 0x1) == 0));
+ ((func & 0x1) == 0) ? 1 : 0);
}
goto update_regs;
@@ -446,7 +445,7 @@ cmp_s:
FP_SET_EXCEPTION(FP_EX_INVALID);
} else {
FP_TO_INT_ROUND_D(vc.wp[1], DB, 32,
- ((func & 0x3) != 0));
+ ((func & 0x3) != 0) ? 1 : 0);
}
goto update_regs;
@@ -457,7 +456,7 @@ cmp_s:
FP_SET_EXCEPTION(FP_EX_INVALID);
} else {
FP_TO_INT_D(vc.wp[1], DB, 32,
- ((func & 0x3) != 0));
+ ((func & 0x3) != 0) ? 1 : 0);
}
goto update_regs;
@@ -467,7 +466,7 @@ cmp_s:
break;
pack_d:
- pr_debug("DR: %ld %08lx %08lx %ld (%ld)\n",
+ pr_debug("DR: %d %08x %08x %d (%d)\n",
DR_s, DR_f1, DR_f0, DR_e, DR_c);
FP_PACK_DP(vc.dp, DR);
@@ -496,6 +495,7 @@ cmp_d:
case XCR:
FP_UNPACK_SP(SA0, va.wp);
FP_UNPACK_SP(SA1, va.wp + 1);
+ fallthrough;
case XB:
FP_UNPACK_SP(SB0, vb.wp);
FP_UNPACK_SP(SB1, vb.wp + 1);
@@ -506,13 +506,13 @@ cmp_d:
break;
}
- pr_debug("SA0: %ld %08lx %ld (%ld)\n",
+ pr_debug("SA0: %d %08x %d (%d)\n",
SA0_s, SA0_f, SA0_e, SA0_c);
- pr_debug("SA1: %ld %08lx %ld (%ld)\n",
+ pr_debug("SA1: %d %08x %d (%d)\n",
SA1_s, SA1_f, SA1_e, SA1_c);
- pr_debug("SB0: %ld %08lx %ld (%ld)\n",
+ pr_debug("SB0: %d %08x %d (%d)\n",
SB0_s, SB0_f, SB0_e, SB0_c);
- pr_debug("SB1: %ld %08lx %ld (%ld)\n",
+ pr_debug("SB1: %d %08x %d (%d)\n",
SB1_s, SB1_f, SB1_e, SB1_c);
switch (func) {
@@ -571,7 +571,7 @@ cmp_d:
} else {
SB0_e += (func == EVFSCTSF ? 31 : 32);
FP_TO_INT_ROUND_S(vc.wp[0], SB0, 32,
- (func == EVFSCTSF));
+ (func == EVFSCTSF) ? 1 : 0);
}
if (SB1_c == FP_CLS_NAN) {
vc.wp[1] = 0;
@@ -579,7 +579,7 @@ cmp_d:
} else {
SB1_e += (func == EVFSCTSF ? 31 : 32);
FP_TO_INT_ROUND_S(vc.wp[1], SB1, 32,
- (func == EVFSCTSF));
+ (func == EVFSCTSF) ? 1 : 0);
}
goto update_regs;
@@ -590,14 +590,14 @@ cmp_d:
FP_SET_EXCEPTION(FP_EX_INVALID);
} else {
FP_TO_INT_ROUND_S(vc.wp[0], SB0, 32,
- ((func & 0x3) != 0));
+ ((func & 0x3) != 0) ? 1 : 0);
}
if (SB1_c == FP_CLS_NAN) {
vc.wp[1] = 0;
FP_SET_EXCEPTION(FP_EX_INVALID);
} else {
FP_TO_INT_ROUND_S(vc.wp[1], SB1, 32,
- ((func & 0x3) != 0));
+ ((func & 0x3) != 0) ? 1 : 0);
}
goto update_regs;
@@ -608,14 +608,14 @@ cmp_d:
FP_SET_EXCEPTION(FP_EX_INVALID);
} else {
FP_TO_INT_S(vc.wp[0], SB0, 32,
- ((func & 0x3) != 0));
+ ((func & 0x3) != 0) ? 1 : 0);
}
if (SB1_c == FP_CLS_NAN) {
vc.wp[1] = 0;
FP_SET_EXCEPTION(FP_EX_INVALID);
} else {
FP_TO_INT_S(vc.wp[1], SB1, 32,
- ((func & 0x3) != 0));
+ ((func & 0x3) != 0) ? 1 : 0);
}
goto update_regs;
@@ -625,9 +625,9 @@ cmp_d:
break;
pack_vs:
- pr_debug("SR0: %ld %08lx %ld (%ld)\n",
+ pr_debug("SR0: %d %08x %d (%d)\n",
SR0_s, SR0_f, SR0_e, SR0_c);
- pr_debug("SR1: %ld %08lx %ld (%ld)\n",
+ pr_debug("SR1: %d %08x %d (%d)\n",
SR1_s, SR1_f, SR1_e, SR1_c);
FP_PACK_SP(vc.wp, SR0);
@@ -714,7 +714,7 @@ update_regs:
illegal:
if (have_e500_cpu_a005_erratum) {
/* according to e500 cpu a005 erratum, reissue efp inst */
- regs->nip -= 4;
+ regs_add_return_ip(regs, -4);
pr_debug("re-issue efp inst: %08lx\n", speinsn);
return 0;
}
@@ -890,7 +890,7 @@ int speround_handler(struct pt_regs *regs)
return 0;
}
-int __init spe_mathemu_init(void)
+static int __init spe_mathemu_init(void)
{
u32 pvr, maj, min;
diff --git a/arch/powerpc/math-emu/mcrfs.c b/arch/powerpc/math-emu/mcrfs.c
index e948d5708e2b..9c4fdaace475 100644
--- a/arch/powerpc/math-emu/mcrfs.c
+++ b/arch/powerpc/math-emu/mcrfs.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/sfp-machine.h>
#include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/mffs.c b/arch/powerpc/math-emu/mffs.c
index 5526cf96ede5..d42f1278e958 100644
--- a/arch/powerpc/math-emu/mffs.c
+++ b/arch/powerpc/math-emu/mffs.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/sfp-machine.h>
#include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/mtfsb0.c b/arch/powerpc/math-emu/mtfsb0.c
index bc985585bca8..5753170b5dfd 100644
--- a/arch/powerpc/math-emu/mtfsb0.c
+++ b/arch/powerpc/math-emu/mtfsb0.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/sfp-machine.h>
#include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/mtfsb1.c b/arch/powerpc/math-emu/mtfsb1.c
index fe6ed5ac85b3..8162c3bfd149 100644
--- a/arch/powerpc/math-emu/mtfsb1.c
+++ b/arch/powerpc/math-emu/mtfsb1.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/sfp-machine.h>
#include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/mtfsf.c b/arch/powerpc/math-emu/mtfsf.c
index 44b0fc8214f4..7ae990f6b58b 100644
--- a/arch/powerpc/math-emu/mtfsf.c
+++ b/arch/powerpc/math-emu/mtfsf.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/sfp-machine.h>
#include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/mtfsfi.c b/arch/powerpc/math-emu/mtfsfi.c
index fd2acc26813b..45f1edbda357 100644
--- a/arch/powerpc/math-emu/mtfsfi.c
+++ b/arch/powerpc/math-emu/mtfsfi.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/sfp-machine.h>
#include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/stfd.c b/arch/powerpc/math-emu/stfd.c
index 33a165c8df0f..463d2f0832d9 100644
--- a/arch/powerpc/math-emu/stfd.c
+++ b/arch/powerpc/math-emu/stfd.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
int
stfd(void *frS, void *ea)
diff --git a/arch/powerpc/math-emu/stfiwx.c b/arch/powerpc/math-emu/stfiwx.c
index f15a35f67e2c..24ae9622fed6 100644
--- a/arch/powerpc/math-emu/stfiwx.c
+++ b/arch/powerpc/math-emu/stfiwx.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
int
stfiwx(u32 *frS, void *ea)
diff --git a/arch/powerpc/math-emu/stfs.c b/arch/powerpc/math-emu/stfs.c
index 6122147356d1..ddf9bbdb5b55 100644
--- a/arch/powerpc/math-emu/stfs.c
+++ b/arch/powerpc/math-emu/stfs.c
@@ -1,6 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
#include <linux/errno.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/sfp-machine.h>
#include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/math-emu/udivmodti4.c b/arch/powerpc/math-emu/udivmodti4.c
index 6172044ab003..1e52633dcbb7 100644
--- a/arch/powerpc/math-emu/udivmodti4.c
+++ b/arch/powerpc/math-emu/udivmodti4.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/* This has so very few changes over libgcc2's __udivmoddi4 it isn't funny. */
#include <math-emu/soft-fp.h>
diff --git a/arch/powerpc/mm/40x_mmu.c b/arch/powerpc/mm/40x_mmu.c
deleted file mode 100644
index 5810967511d4..000000000000
--- a/arch/powerpc/mm/40x_mmu.c
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * This file contains the routines for initializing the MMU
- * on the 4xx series of chips.
- * -- paulus
- *
- * Derived from arch/ppc/mm/init.c:
- * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
- * and Cort Dougan (PReP) (cort@cs.nmt.edu)
- * Copyright (C) 1996 Paul Mackerras
- *
- * Derived from "arch/i386/mm/init.c"
- * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- */
-
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/ptrace.h>
-#include <linux/mman.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/stddef.h>
-#include <linux/vmalloc.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/highmem.h>
-#include <linux/memblock.h>
-
-#include <asm/pgalloc.h>
-#include <asm/prom.h>
-#include <asm/io.h>
-#include <asm/mmu_context.h>
-#include <asm/pgtable.h>
-#include <asm/mmu.h>
-#include <asm/uaccess.h>
-#include <asm/smp.h>
-#include <asm/bootx.h>
-#include <asm/machdep.h>
-#include <asm/setup.h>
-
-#include "mmu_decl.h"
-
-extern int __map_without_ltlbs;
-/*
- * MMU_init_hw does the chip-specific initialization of the MMU hardware.
- */
-void __init MMU_init_hw(void)
-{
- /*
- * The Zone Protection Register (ZPR) defines how protection will
- * be applied to every page which is a member of a given zone. At
- * present, we utilize only two of the 4xx's zones.
- * The zone index bits (of ZSEL) in the PTE are used for software
- * indicators, except the LSB. For user access, zone 1 is used,
- * for kernel access, zone 0 is used. We set all but zone 1
- * to zero, allowing only kernel access as indicated in the PTE.
- * For zone 1, we set a 01 binary (a value of 10 will not work)
- * to allow user access as indicated in the PTE. This also allows
- * kernel access as indicated in the PTE.
- */
-
- mtspr(SPRN_ZPR, 0x10000000);
-
- flush_instruction_cache();
-
- /*
- * Set up the real-mode cache parameters for the exception vector
- * handlers (which are run in real-mode).
- */
-
- mtspr(SPRN_DCWR, 0x00000000); /* All caching is write-back */
-
- /*
- * Cache instruction and data space where the exception
- * vectors and the kernel live in real-mode.
- */
-
- mtspr(SPRN_DCCR, 0xFFFF0000); /* 2GByte of data space at 0x0. */
- mtspr(SPRN_ICCR, 0xFFFF0000); /* 2GByte of instr. space at 0x0. */
-}
-
-#define LARGE_PAGE_SIZE_16M (1<<24)
-#define LARGE_PAGE_SIZE_4M (1<<22)
-
-unsigned long __init mmu_mapin_ram(unsigned long top)
-{
- unsigned long v, s, mapped;
- phys_addr_t p;
-
- v = KERNELBASE;
- p = 0;
- s = total_lowmem;
-
- if (__map_without_ltlbs)
- return 0;
-
- while (s >= LARGE_PAGE_SIZE_16M) {
- pmd_t *pmdp;
- unsigned long val = p | _PMD_SIZE_16M | _PAGE_EXEC | _PAGE_HWWRITE;
-
- pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v);
- pmd_val(*pmdp++) = val;
- pmd_val(*pmdp++) = val;
- pmd_val(*pmdp++) = val;
- pmd_val(*pmdp++) = val;
-
- v += LARGE_PAGE_SIZE_16M;
- p += LARGE_PAGE_SIZE_16M;
- s -= LARGE_PAGE_SIZE_16M;
- }
-
- while (s >= LARGE_PAGE_SIZE_4M) {
- pmd_t *pmdp;
- unsigned long val = p | _PMD_SIZE_4M | _PAGE_EXEC | _PAGE_HWWRITE;
-
- pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v);
- pmd_val(*pmdp) = val;
-
- v += LARGE_PAGE_SIZE_4M;
- p += LARGE_PAGE_SIZE_4M;
- s -= LARGE_PAGE_SIZE_4M;
- }
-
- mapped = total_lowmem - s;
-
- /* If the size of RAM is not an exact power of two, we may not
- * have covered RAM in its entirety with 16 and 4 MiB
- * pages. Consequently, restrict the top end of RAM currently
- * allocable so that calls to the MEMBLOCK to allocate PTEs for "tail"
- * coverage with normal-sized pages (or other reasons) do not
- * attempt to allocate outside the allowed range.
- */
- memblock_set_current_limit(mapped);
-
- return mapped;
-}
-
-void setup_initial_memory_limit(phys_addr_t first_memblock_base,
- phys_addr_t first_memblock_size)
-{
- /* We don't currently support the first MEMBLOCK not mapping 0
- * physical on those processors
- */
- BUG_ON(first_memblock_base != 0);
-
- /* 40x can only access 16MB at the moment (see head_40x.S) */
- memblock_set_current_limit(min_t(u64, first_memblock_size, 0x00800000));
-}
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index d0130fff20e5..8c1582b2987d 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -1,36 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the linux ppc-specific parts of the memory manager.
#
-subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
-
-ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
-
-obj-y := fault.o mem.o pgtable.o gup.o mmap.o \
- init_$(CONFIG_WORD_SIZE).o \
- pgtable_$(CONFIG_WORD_SIZE).o
-obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \
- tlb_nohash_low.o
-obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(CONFIG_WORD_SIZE)e.o
-hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o
-obj-$(CONFIG_PPC_STD_MMU_64) += hash_utils_64.o slb_low.o slb.o $(hash64-y)
-obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o
-obj-$(CONFIG_PPC_STD_MMU) += hash_low_$(CONFIG_WORD_SIZE).o \
- tlb_hash$(CONFIG_WORD_SIZE).o \
- mmu_context_hash$(CONFIG_WORD_SIZE).o
-obj-$(CONFIG_PPC_ICSWX) += icswx.o
-obj-$(CONFIG_PPC_ICSWX_PID) += icswx_pid.o
-obj-$(CONFIG_40x) += 40x_mmu.o
-obj-$(CONFIG_44x) += 44x_mmu.o
-obj-$(CONFIG_PPC_FSL_BOOK3E) += fsl_booke_mmu.o
-obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o
-obj-$(CONFIG_PPC_MM_SLICES) += slice.o
-obj-y += hugetlbpage.o
-ifeq ($(CONFIG_HUGETLB_PAGE),y)
-obj-$(CONFIG_PPC_STD_MMU_64) += hugetlbpage-hash64.o
-obj-$(CONFIG_PPC_BOOK3E_MMU) += hugetlbpage-book3e.o
-endif
-obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hugepage-hash64.o
-obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o
+obj-y := fault.o mem.o pgtable.o maccess.o pageattr.o \
+ init_$(BITS).o pgtable_$(BITS).o \
+ pgtable-frag.o ioremap.o ioremap_$(BITS).o \
+ init-common.o mmu_context.o drmem.o \
+ cacheflush.o
+obj-$(CONFIG_PPC_MMU_NOHASH) += nohash/
+obj-$(CONFIG_PPC_BOOK3S_32) += book3s32/
+obj-$(CONFIG_PPC_BOOK3S_64) += book3s64/
+obj-$(CONFIG_NUMA) += numa.o
+obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
-obj-$(CONFIG_HIGHMEM) += highmem.o
+obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o
+obj-$(CONFIG_PTDUMP) += ptdump/
+obj-$(CONFIG_KASAN) += kasan/
diff --git a/arch/powerpc/mm/book3s32/Makefile b/arch/powerpc/mm/book3s32/Makefile
new file mode 100644
index 000000000000..50dd8f6bdf46
--- /dev/null
+++ b/arch/powerpc/mm/book3s32/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+
+KASAN_SANITIZE_mmu.o := n
+
+ifdef CONFIG_KASAN
+CFLAGS_mmu.o += -DDISABLE_BRANCH_PROFILING
+endif
+
+obj-y += mmu.o mmu_context.o
+obj-$(CONFIG_PPC_BOOK3S_603) += nohash_low.o
+obj-$(CONFIG_PPC_BOOK3S_604) += hash_low.o tlb.o
+obj-$(CONFIG_PPC_KUAP) += kuap.o
diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/book3s32/hash_low.S
index 115347f74ce5..4ed0efd03db5 100644
--- a/arch/powerpc/mm/hash_low_32.S
+++ b/arch/powerpc/mm/book3s32/hash_low.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* PowerPC version
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
@@ -11,48 +12,46 @@
* This file contains low-level assembler routines for managing
* the PowerPC MMU hash table. (PPC 8xx processors don't use a
* hash table, so this file is not used on them.)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
+#include <linux/export.h>
+#include <linux/pgtable.h>
+#include <linux/init.h>
#include <asm/reg.h>
#include <asm/page.h>
-#include <asm/pgtable.h>
#include <asm/cputable.h>
#include <asm/ppc_asm.h>
#include <asm/thread_info.h>
#include <asm/asm-offsets.h>
+#include <asm/feature-fixups.h>
+#include <asm/code-patching-asm.h>
-#ifdef CONFIG_SMP
- .section .bss
- .align 2
- .globl mmu_hash_lock
-mmu_hash_lock:
- .space 4
-#endif /* CONFIG_SMP */
+#ifdef CONFIG_PTE_64BIT
+#define PTE_T_SIZE 8
+#define PTE_FLAGS_OFFSET 4 /* offset of PTE flags, in bytes */
+#else
+#define PTE_T_SIZE 4
+#define PTE_FLAGS_OFFSET 0
+#endif
/*
* Load a PTE into the hash table, if possible.
- * The address is in r4, and r3 contains an access flag:
- * _PAGE_RW (0x400) if a write.
+ * The address is in r4, and r3 contains required access flags:
+ * - For ISI: _PAGE_PRESENT | _PAGE_EXEC
+ * - For DSI: _PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE if a write.
* r9 contains the SRR1 value, from which we use the MSR_PR bit.
* SPRG_THREAD contains the physical address of the current task's thread.
*
* Returns to the caller if the access is illegal or there is no
* mapping for the address. Otherwise it places an appropriate PTE
* in the hash table and returns from the exception.
- * Uses r0, r3 - r8, r10, ctr, lr.
+ * Uses r0, r3 - r6, r8, r10, ctr, lr.
*/
.text
_GLOBAL(hash_page)
- tophys(r7,0) /* gets -KERNELBASE into r7 */
#ifdef CONFIG_SMP
- addis r8,r7,mmu_hash_lock@h
- ori r8,r8,mmu_hash_lock@l
+ lis r8, (mmu_hash_lock - PAGE_OFFSET)@h
+ ori r8, r8, (mmu_hash_lock - PAGE_OFFSET)@l
lis r0,0x0fff
b 10f
11: lwz r6,0(r8)
@@ -66,16 +65,20 @@ _GLOBAL(hash_page)
isync
#endif
/* Get PTE (linux-style) and check access */
- lis r0,KERNELBASE@h /* check if kernel address */
+ lis r0, TASK_SIZE@h /* check if kernel address */
cmplw 0,r4,r0
mfspr r8,SPRN_SPRG_THREAD /* current task's THREAD (phys) */
- ori r3,r3,_PAGE_USER|_PAGE_PRESENT /* test low addresses as user */
lwz r5,PGDIR(r8) /* virt page-table root */
blt+ 112f /* assume user more likely */
lis r5,swapper_pg_dir@ha /* if kernel address, use */
+ andi. r0,r9,MSR_PR /* Check usermode */
addi r5,r5,swapper_pg_dir@l /* kernel page table */
- rlwimi r3,r9,32-12,29,29 /* MSR_PR -> _PAGE_USER */
-112: add r5,r5,r7 /* convert to phys addr */
+#ifdef CONFIG_SMP
+ bne- .Lhash_page_out /* return if usermode */
+#else
+ bnelr-
+#endif
+112: tophys(r5, r5)
#ifndef CONFIG_PTE_64BIT
rlwimi r5,r4,12,20,29 /* insert top 10 bits of address */
lwz r8,0(r5) /* get pmd entry */
@@ -86,7 +89,7 @@ _GLOBAL(hash_page)
rlwinm. r8,r8,0,0,20 /* extract pt base address */
#endif
#ifdef CONFIG_SMP
- beq- hash_page_out /* return if no mapping */
+ beq- .Lhash_page_out /* return if no mapping */
#else
/* XXX it seems like the 601 will give a machine fault on the
rfi if its alignment is wrong (bottom 4 bits of address are
@@ -98,27 +101,35 @@ _GLOBAL(hash_page)
rlwimi r8,r4,22,20,29 /* insert next 10 bits of address */
#else
rlwimi r8,r4,23,20,28 /* compute pte address */
+ /*
+ * If PTE_64BIT is set, the low word is the flags word; use that
+ * word for locking since it contains all the interesting bits.
+ */
+ addi r8,r8,PTE_FLAGS_OFFSET
#endif
- rlwinm r0,r3,32-3,24,24 /* _PAGE_RW access -> _PAGE_DIRTY */
- ori r0,r0,_PAGE_ACCESSED|_PAGE_HASHPTE
/*
* Update the linux PTE atomically. We do the lwarx up-front
* because almost always, there won't be a permission violation
* and there won't already be an HPTE, and thus we will have
* to update the PTE to set _PAGE_HASHPTE. -- paulus.
- *
- * If PTE_64BIT is set, the low word is the flags word; use that
- * word for locking since it contains all the interesting bits.
*/
-#if (PTE_FLAGS_OFFSET != 0)
- addi r8,r8,PTE_FLAGS_OFFSET
-#endif
-retry:
+.Lretry:
lwarx r6,0,r8 /* get linux-style pte, flag word */
+#ifdef CONFIG_PPC_KUAP
+ mfsrin r5,r4
+ rlwinm r0,r9,28,_PAGE_WRITE /* MSR[PR] => _PAGE_WRITE */
+ rlwinm r5,r5,12,_PAGE_WRITE /* Ks => _PAGE_WRITE */
+ andc r5,r5,r0 /* Ks & ~MSR[PR] */
+ andc r5,r6,r5 /* Clear _PAGE_WRITE when Ks = 1 && MSR[PR] = 0 */
+ andc. r5,r3,r5 /* check access & ~permission */
+#else
andc. r5,r3,r6 /* check access & ~permission */
+#endif
+ rlwinm r0,r3,32-3,24,24 /* _PAGE_WRITE access -> _PAGE_DIRTY */
+ ori r0,r0,_PAGE_ACCESSED|_PAGE_HASHPTE
#ifdef CONFIG_SMP
- bne- hash_page_out /* return if access not permitted */
+ bne- .Lhash_page_out /* return if access not permitted */
#else
bnelr-
#endif
@@ -133,36 +144,28 @@ retry:
#endif /* CONFIG_SMP */
#endif /* CONFIG_PTE_64BIT */
stwcx. r5,0,r8 /* attempt to update PTE */
- bne- retry /* retry if someone got there first */
+ bne- .Lretry /* retry if someone got there first */
mfsrin r3,r4 /* get segment reg for segment */
- mfctr r0
- stw r0,_CTR(r11)
bl create_hpte /* add the hash table entry */
#ifdef CONFIG_SMP
eieio
- addis r8,r7,mmu_hash_lock@ha
+ lis r8, (mmu_hash_lock - PAGE_OFFSET)@ha
li r0,0
- stw r0,mmu_hash_lock@l(r8)
+ stw r0, (mmu_hash_lock - PAGE_OFFSET)@l(r8)
#endif
-
- /* Return from the exception */
- lwz r5,_CTR(r11)
- mtctr r5
- lwz r0,GPR0(r11)
- lwz r7,GPR7(r11)
- lwz r8,GPR8(r11)
- b fast_exception_return
+ b fast_hash_page_return
#ifdef CONFIG_SMP
-hash_page_out:
+.Lhash_page_out:
eieio
- addis r8,r7,mmu_hash_lock@ha
+ lis r8, (mmu_hash_lock - PAGE_OFFSET)@ha
li r0,0
- stw r0,mmu_hash_lock@l(r8)
+ stw r0, (mmu_hash_lock - PAGE_OFFSET)@l(r8)
blr
#endif /* CONFIG_SMP */
+_ASM_NOKPROBE_SYMBOL(hash_page)
/*
* Add an entry for a particular page to the hash table.
@@ -177,15 +180,8 @@ _GLOBAL(add_hash_page)
mflr r0
stw r0,4(r1)
- /* Convert context and va to VSID */
- mulli r3,r3,897*16 /* multiply context by context skew */
- rlwinm r0,r4,4,28,31 /* get ESID (top 4 bits of va) */
- mulli r0,r0,0x111 /* multiply by ESID skew */
- add r3,r3,r0 /* note create_hpte trims to 24 bits */
-
#ifdef CONFIG_SMP
- CURRENT_THREAD_INFO(r8, r1) /* use cpu number to make tag */
- lwz r8,TI_CPU(r8) /* to go in mmu_hash_lock */
+ lwz r8,TASK_CPU(r2) /* to go in mmu_hash_lock */
oris r8,r8,12
#endif /* CONFIG_SMP */
@@ -199,25 +195,21 @@ _GLOBAL(add_hash_page)
* covered by a BAT). -- paulus
*/
mfmsr r9
- SYNC
rlwinm r0,r9,0,17,15 /* clear bit 16 (MSR_EE) */
rlwinm r0,r0,0,28,26 /* clear MSR_DR */
mtmsr r0
- SYNC_601
isync
- tophys(r7,0)
-
#ifdef CONFIG_SMP
- addis r6,r7,mmu_hash_lock@ha
- addi r6,r6,mmu_hash_lock@l
+ lis r6, (mmu_hash_lock - PAGE_OFFSET)@ha
+ addi r6, r6, (mmu_hash_lock - PAGE_OFFSET)@l
10: lwarx r0,0,r6 /* take the mmu_hash_lock */
- cmpi 0,r0,0
+ cmpwi 0,r0,0
bne- 11f
stwcx. r8,0,r6
beq+ 12f
11: lwz r0,0(r6)
- cmpi 0,r0,0
+ cmpwi 0,r0,0
beq 10b
b 11b
12: isync
@@ -251,12 +243,18 @@ _GLOBAL(add_hash_page)
stwcx. r5,0,r8
bne- 1b
+ /* Convert context and va to VSID */
+ mulli r3,r3,897*16 /* multiply context by context skew */
+ rlwinm r0,r4,4,28,31 /* get ESID (top 4 bits of va) */
+ mulli r0,r0,0x111 /* multiply by ESID skew */
+ add r3,r3,r0 /* note create_hpte trims to 24 bits */
+
bl create_hpte
9:
#ifdef CONFIG_SMP
- addis r6,r7,mmu_hash_lock@ha
- addi r6,r6,mmu_hash_lock@l
+ lis r6, (mmu_hash_lock - PAGE_OFFSET)@ha
+ addi r6, r6, (mmu_hash_lock - PAGE_OFFSET)@l
eieio
li r0,0
stw r0,0(r6) /* clear mmu_hash_lock */
@@ -264,22 +262,20 @@ _GLOBAL(add_hash_page)
/* reenable interrupts and DR */
mtmsr r9
- SYNC_601
isync
lwz r0,4(r1)
mtlr r0
blr
+_ASM_NOKPROBE_SYMBOL(add_hash_page)
/*
* This routine adds a hardware PTE to the hash table.
* It is designed to be called with the MMU either on or off.
* r3 contains the VSID, r4 contains the virtual address,
* r5 contains the linux PTE, r6 contains the old value of the
- * linux PTE (before setting _PAGE_HASHPTE) and r7 contains the
- * offset to be added to addresses (0 if the MMU is on,
- * -KERNELBASE if it is off). r10 contains the upper half of
- * the PTE if CONFIG_PTE_64BIT.
+ * linux PTE (before setting _PAGE_HASHPTE). r10 contains the
+ * upper half of the PTE if CONFIG_PTE_64BIT.
* On SMP, the caller should have the mmu_hash_lock held.
* We assume that the caller has (or will) set the _PAGE_HASHPTE
* bit in the linux PTE in memory. The value passed in r6 should
@@ -290,9 +286,9 @@ _GLOBAL(add_hash_page)
*
* For speed, 4 of the instructions get patched once the size and
* physical address of the hash table are known. These definitions
- * of Hash_base and Hash_bits below are just an example.
+ * of Hash_base and Hash_bits below are for the early hash table.
*/
-Hash_base = 0xc0180000
+Hash_base = early_hash
Hash_bits = 12 /* e.g. 256kB hash table */
Hash_msk = (((1 << Hash_bits) - 1) * 64)
@@ -313,15 +309,19 @@ Hash_msk = (((1 << Hash_bits) - 1) * 64)
#define HASH_LEFT 31-(LG_PTEG_SIZE+Hash_bits-1)
#define HASH_RIGHT 31-LG_PTEG_SIZE
+__REF
_GLOBAL(create_hpte)
/* Convert linux-style PTE (r5) to low word of PPC-style PTE (r8) */
- rlwinm r8,r5,32-10,31,31 /* _PAGE_RW -> PP lsb */
- rlwinm r0,r5,32-7,31,31 /* _PAGE_DIRTY -> PP lsb */
+ lis r0, TASK_SIZE@h
+ rlwinm r5,r5,0,~3 /* Clear PP bits */
+ cmplw r4,r0
+ rlwinm r8,r5,32-9,30,30 /* _PAGE_WRITE -> PP msb */
+ rlwinm r0,r5,32-6,30,30 /* _PAGE_DIRTY -> PP msb */
and r8,r8,r0 /* writable if _RW & _DIRTY */
- rlwimi r5,r5,32-1,30,30 /* _PAGE_USER -> PP msb */
- rlwimi r5,r5,32-2,31,31 /* _PAGE_USER -> PP lsb */
- ori r8,r8,0xe04 /* clear out reserved bits */
- andc r8,r5,r8 /* PP = user? (rw&dirty? 2: 3): 0 */
+ bge- 1f /* Kernelspace ? Skip */
+ ori r5,r5,3 /* Userspace ? PP = 3 */
+1: ori r8,r8,0xe04 /* clear out reserved bits */
+ andc r8,r5,r8 /* PP = user? (rw&dirty? 1: 3): 0 */
BEGIN_FTR_SECTION
rlwinm r8,r8,0,~_PAGE_COHERENT /* clear M (coherence not required) */
END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
@@ -336,11 +336,13 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
rlwimi r5,r4,10,26,31 /* put in API (abbrev page index) */
SET_V(r5) /* set V (valid) bit */
+ patch_site 0f, patch__hash_page_A0
+ patch_site 1f, patch__hash_page_A1
+ patch_site 2f, patch__hash_page_A2
/* Get the address of the primary PTE group in the hash table (r3) */
-_GLOBAL(hash_page_patch_A)
- addis r0,r7,Hash_base@h /* base address of hash table */
- rlwimi r0,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* VSID -> hash */
- rlwinm r3,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */
+0: lis r0, (Hash_base - PAGE_OFFSET)@h /* base address of hash table */
+1: rlwimi r0,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* VSID -> hash */
+2: rlwinm r3,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */
xor r3,r3,r0 /* make primary hash */
li r0,8 /* PTEs/group */
@@ -352,30 +354,25 @@ _GLOBAL(hash_page_patch_A)
beq+ 10f /* no PTE: go look for an empty slot */
tlbie r4
- addis r4,r7,htab_hash_searches@ha
- lwz r6,htab_hash_searches@l(r4)
- addi r6,r6,1 /* count how many searches we do */
- stw r6,htab_hash_searches@l(r4)
-
/* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */
mtctr r0
addi r4,r3,-HPTE_SIZE
1: LDPTEu r6,HPTE_SIZE(r4) /* get next PTE */
CMPPTE 0,r6,r5
bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */
- beq+ found_slot
+ beq+ .Lfound_slot
+ patch_site 0f, patch__hash_page_B
/* Search the secondary PTEG for a matching PTE */
ori r5,r5,PTE_H /* set H (secondary hash) bit */
-_GLOBAL(hash_page_patch_B)
- xoris r4,r3,Hash_msk>>16 /* compute secondary hash */
+0: xoris r4,r3,Hash_msk>>16 /* compute secondary hash */
xori r4,r4,(-PTEG_SIZE & 0xffff)
addi r4,r4,-HPTE_SIZE
mtctr r0
2: LDPTEu r6,HPTE_SIZE(r4)
CMPPTE 0,r6,r5
bdnzf 2,2b
- beq+ found_slot
+ beq+ .Lfound_slot
xori r5,r5,PTE_H /* clear H bit again */
/* Search the primary PTEG for an empty slot */
@@ -384,25 +381,19 @@ _GLOBAL(hash_page_patch_B)
1: LDPTEu r6,HPTE_SIZE(r4) /* get next PTE */
TST_V(r6) /* test valid bit */
bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */
- beq+ found_empty
-
- /* update counter of times that the primary PTEG is full */
- addis r4,r7,primary_pteg_full@ha
- lwz r6,primary_pteg_full@l(r4)
- addi r6,r6,1
- stw r6,primary_pteg_full@l(r4)
+ beq+ .Lfound_empty
+ patch_site 0f, patch__hash_page_C
/* Search the secondary PTEG for an empty slot */
ori r5,r5,PTE_H /* set H (secondary hash) bit */
-_GLOBAL(hash_page_patch_C)
- xoris r4,r3,Hash_msk>>16 /* compute secondary hash */
+0: xoris r4,r3,Hash_msk>>16 /* compute secondary hash */
xori r4,r4,(-PTEG_SIZE & 0xffff)
addi r4,r4,-HPTE_SIZE
mtctr r0
2: LDPTEu r6,HPTE_SIZE(r4)
TST_V(r6)
bdnzf 2,2b
- beq+ found_empty
+ beq+ .Lfound_empty
xori r5,r5,PTE_H /* clear H bit again */
/*
@@ -413,36 +404,20 @@ _GLOBAL(hash_page_patch_C)
* and we know there is a definite (although small) speed
* advantage to putting the PTE in the primary PTEG, we always
* put the PTE in the primary PTEG.
- *
- * In addition, we skip any slot that is mapping kernel text in
- * order to avoid a deadlock when not using BAT mappings if
- * trying to hash in the kernel hash code itself after it has
- * already taken the hash table lock. This works in conjunction
- * with pre-faulting of the kernel text.
- *
- * If the hash table bucket is full of kernel text entries, we'll
- * lockup here but that shouldn't happen
*/
-1: addis r4,r7,next_slot@ha /* get next evict slot */
- lwz r6,next_slot@l(r4)
+ lis r4, (next_slot - PAGE_OFFSET)@ha /* get next evict slot */
+ lwz r6, (next_slot - PAGE_OFFSET)@l(r4)
addi r6,r6,HPTE_SIZE /* search for candidate */
andi. r6,r6,7*HPTE_SIZE
stw r6,next_slot@l(r4)
add r4,r3,r6
- LDPTE r0,HPTE_SIZE/2(r4) /* get PTE second word */
- clrrwi r0,r0,12
- lis r6,etext@h
- ori r6,r6,etext@l /* get etext */
- tophys(r6,r6)
- cmpl cr0,r0,r6 /* compare and try again */
- blt 1b
#ifndef CONFIG_SMP
/* Store PTE in PTEG */
-found_empty:
+.Lfound_empty:
STPTE r5,0(r4)
-found_slot:
+.Lfound_slot:
STPTE r8,HPTE_SIZE/2(r4)
#else /* CONFIG_SMP */
@@ -463,8 +438,8 @@ found_slot:
* We do however have to make sure that the PTE is never in an invalid
* state with the V bit set.
*/
-found_empty:
-found_slot:
+.Lfound_empty:
+.Lfound_slot:
CLR_V(r5,r0) /* clear V (valid) bit in PTE */
STPTE r5,0(r4)
sync
@@ -477,15 +452,13 @@ found_slot:
sync /* make sure pte updates get to memory */
blr
+ .previous
+_ASM_NOKPROBE_SYMBOL(create_hpte)
.section .bss
.align 2
next_slot:
.space 4
-primary_pteg_full:
- .space 4
-htab_hash_searches:
- .space 4
.previous
/*
@@ -496,9 +469,8 @@ htab_hash_searches:
*
* We assume that there is a hash table in use (Hash != 0).
*/
+__REF
_GLOBAL(flush_hash_pages)
- tophys(r7,0)
-
/*
* We disable interrupts here, even on UP, because we want
* the _PAGE_HASHPTE bit to be a reliable indication of
@@ -508,11 +480,9 @@ _GLOBAL(flush_hash_pages)
* covered by a BAT). -- paulus
*/
mfmsr r10
- SYNC
rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */
rlwinm r0,r0,0,28,26 /* clear MSR_DR */
mtmsr r0
- SYNC_601
isync
/* First find a PTE in the range that has _PAGE_HASHPTE set */
@@ -520,14 +490,15 @@ _GLOBAL(flush_hash_pages)
rlwimi r5,r4,22,20,29
#else
rlwimi r5,r4,23,20,28
+ addi r5,r5,PTE_FLAGS_OFFSET
#endif
-1: lwz r0,PTE_FLAGS_OFFSET(r5)
+1: lwz r0,0(r5)
cmpwi cr1,r6,1
andi. r0,r0,_PAGE_HASHPTE
bne 2f
ble cr1,19f
addi r4,r4,0x1000
- addi r5,r5,PTE_SIZE
+ addi r5,r5,PTE_T_SIZE
addi r6,r6,-1
b 1b
@@ -543,19 +514,18 @@ _GLOBAL(flush_hash_pages)
SET_V(r11) /* set V (valid) bit */
#ifdef CONFIG_SMP
- addis r9,r7,mmu_hash_lock@ha
- addi r9,r9,mmu_hash_lock@l
- CURRENT_THREAD_INFO(r8, r1)
- add r8,r8,r7
- lwz r8,TI_CPU(r8)
+ lis r9, (mmu_hash_lock - PAGE_OFFSET)@ha
+ addi r9, r9, (mmu_hash_lock - PAGE_OFFSET)@l
+ tophys (r8, r2)
+ lwz r8, TASK_CPU(r8)
oris r8,r8,9
10: lwarx r0,0,r9
- cmpi 0,r0,0
+ cmpwi 0,r0,0
bne- 11f
stwcx. r8,0,r9
beq+ 12f
11: lwz r0,0(r9)
- cmpi 0,r0,0
+ cmpwi 0,r0,0
beq 10b
b 11b
12: isync
@@ -566,9 +536,6 @@ _GLOBAL(flush_hash_pages)
* already clear, we're done (for this pte). If not,
* clear it (atomically) and proceed. -- paulus.
*/
-#if (PTE_FLAGS_OFFSET != 0)
- addi r5,r5,PTE_FLAGS_OFFSET
-#endif
33: lwarx r8,0,r5 /* fetch the pte flags word */
andi. r0,r8,_PAGE_HASHPTE
beq 8f /* done if HASHPTE is already clear */
@@ -576,11 +543,13 @@ _GLOBAL(flush_hash_pages)
stwcx. r8,0,r5 /* update the pte */
bne- 33b
+ patch_site 0f, patch__flush_hash_A0
+ patch_site 1f, patch__flush_hash_A1
+ patch_site 2f, patch__flush_hash_A2
/* Get the address of the primary PTE group in the hash table (r3) */
-_GLOBAL(flush_hash_patch_A)
- addis r8,r7,Hash_base@h /* base address of hash table */
- rlwimi r8,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* VSID -> hash */
- rlwinm r0,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */
+0: lis r8, (Hash_base - PAGE_OFFSET)@h /* base address of hash table */
+1: rlwimi r8,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* VSID -> hash */
+2: rlwinm r0,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */
xor r8,r0,r8 /* make primary hash */
/* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */
@@ -592,11 +561,11 @@ _GLOBAL(flush_hash_patch_A)
bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */
beq+ 3f
+ patch_site 0f, patch__flush_hash_B
/* Search the secondary PTEG for a matching PTE */
ori r11,r11,PTE_H /* set H (secondary hash) bit */
li r0,8 /* PTEs/group */
-_GLOBAL(flush_hash_patch_B)
- xoris r12,r8,Hash_msk>>16 /* compute secondary hash */
+0: xoris r12,r8,Hash_msk>>16 /* compute secondary hash */
xori r12,r12,(-PTEG_SIZE & 0xffff)
addi r12,r12,-HPTE_SIZE
mtctr r0
@@ -614,7 +583,7 @@ _GLOBAL(flush_hash_patch_B)
8: ble cr1,9f /* if all ptes checked */
81: addi r6,r6,-1
- addi r5,r5,PTE_SIZE
+ addi r5,r5,PTE_T_SIZE
addi r4,r4,0x1000
lwz r0,0(r5) /* check next pte */
cmpwi cr1,r6,1
@@ -630,83 +599,8 @@ _GLOBAL(flush_hash_patch_B)
#endif
19: mtmsr r10
- SYNC_601
- isync
- blr
-
-/*
- * Flush an entry from the TLB
- */
-_GLOBAL(_tlbie)
-#ifdef CONFIG_SMP
- CURRENT_THREAD_INFO(r8, r1)
- lwz r8,TI_CPU(r8)
- oris r8,r8,11
- mfmsr r10
- SYNC
- rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */
- rlwinm r0,r0,0,28,26 /* clear DR */
- mtmsr r0
- SYNC_601
- isync
- lis r9,mmu_hash_lock@h
- ori r9,r9,mmu_hash_lock@l
- tophys(r9,r9)
-10: lwarx r7,0,r9
- cmpwi 0,r7,0
- bne- 10b
- stwcx. r8,0,r9
- bne- 10b
- eieio
- tlbie r3
- sync
- TLBSYNC
- li r0,0
- stw r0,0(r9) /* clear mmu_hash_lock */
- mtmsr r10
- SYNC_601
- isync
-#else /* CONFIG_SMP */
- tlbie r3
- sync
-#endif /* CONFIG_SMP */
- blr
-
-/*
- * Flush the entire TLB. 603/603e only
- */
-_GLOBAL(_tlbia)
-#if defined(CONFIG_SMP)
- CURRENT_THREAD_INFO(r8, r1)
- lwz r8,TI_CPU(r8)
- oris r8,r8,10
- mfmsr r10
- SYNC
- rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */
- rlwinm r0,r0,0,28,26 /* clear DR */
- mtmsr r0
- SYNC_601
isync
- lis r9,mmu_hash_lock@h
- ori r9,r9,mmu_hash_lock@l
- tophys(r9,r9)
-10: lwarx r7,0,r9
- cmpwi 0,r7,0
- bne- 10b
- stwcx. r8,0,r9
- bne- 10b
- sync
- tlbia
- sync
- TLBSYNC
- li r0,0
- stw r0,0(r9) /* clear mmu_hash_lock */
- mtmsr r10
- SYNC_601
- isync
-#else /* CONFIG_SMP */
- sync
- tlbia
- sync
-#endif /* CONFIG_SMP */
blr
+ .previous
+EXPORT_SYMBOL(flush_hash_pages)
+_ASM_NOKPROBE_SYMBOL(flush_hash_pages)
diff --git a/arch/powerpc/mm/book3s32/kuap.c b/arch/powerpc/mm/book3s32/kuap.c
new file mode 100644
index 000000000000..3a8815555a48
--- /dev/null
+++ b/arch/powerpc/mm/book3s32/kuap.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <asm/kup.h>
+#include <asm/smp.h>
+
+void setup_kuap(bool disabled)
+{
+ if (!disabled) {
+ update_user_segments(mfsr(0) | SR_KS);
+ isync(); /* Context sync required after mtsr() */
+ init_mm.context.sr0 |= SR_KS;
+ current->thread.sr0 |= SR_KS;
+ }
+
+ if (smp_processor_id() != boot_cpuid)
+ return;
+
+ if (disabled)
+ cur_cpu_spec->mmu_features &= ~MMU_FTR_KUAP;
+ else
+ pr_info("Activating Kernel Userspace Access Protection\n");
+}
diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
new file mode 100644
index 000000000000..c42ecdf94e48
--- /dev/null
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -0,0 +1,446 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * This file contains the routines for handling the MMU on those
+ * PowerPC implementations where the MMU substantially follows the
+ * architecture specification. This includes the 6xx, 7xx, 7xxx,
+ * and 8260 implementations but excludes the 8xx and 4xx.
+ * -- paulus
+ *
+ * Derived from arch/ppc/mm/init.c:
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ * and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ * Copyright (C) 1996 Paul Mackerras
+ *
+ * Derived from "arch/i386/mm/init.c"
+ * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/highmem.h>
+#include <linux/memblock.h>
+
+#include <asm/mmu.h>
+#include <asm/machdep.h>
+#include <asm/text-patching.h>
+#include <asm/sections.h>
+
+#include <mm/mmu_decl.h>
+
+u8 __initdata early_hash[SZ_256K] __aligned(SZ_256K) = {0};
+
+static struct hash_pte __initdata *Hash = (struct hash_pte *)early_hash;
+static unsigned long __initdata Hash_size, Hash_mask;
+static unsigned int __initdata hash_mb, hash_mb2;
+unsigned long __initdata _SDR1;
+
+struct ppc_bat BATS[8][2]; /* 8 pairs of IBAT, DBAT */
+
+static struct batrange { /* stores address ranges mapped by BATs */
+ unsigned long start;
+ unsigned long limit;
+ phys_addr_t phys;
+} bat_addrs[8];
+
+#ifdef CONFIG_SMP
+unsigned long mmu_hash_lock;
+#endif
+
+/*
+ * Return PA for this VA if it is mapped by a BAT, or 0
+ */
+phys_addr_t v_block_mapped(unsigned long va)
+{
+ int b;
+ for (b = 0; b < ARRAY_SIZE(bat_addrs); ++b)
+ if (va >= bat_addrs[b].start && va < bat_addrs[b].limit)
+ return bat_addrs[b].phys + (va - bat_addrs[b].start);
+ return 0;
+}
+
+/*
+ * Return VA for a given PA or 0 if not mapped
+ */
+unsigned long p_block_mapped(phys_addr_t pa)
+{
+ int b;
+ for (b = 0; b < ARRAY_SIZE(bat_addrs); ++b)
+ if (pa >= bat_addrs[b].phys
+ && pa < (bat_addrs[b].limit-bat_addrs[b].start)
+ +bat_addrs[b].phys)
+ return bat_addrs[b].start+(pa-bat_addrs[b].phys);
+ return 0;
+}
+
+int __init find_free_bat(void)
+{
+ int b;
+ int n = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4;
+
+ for (b = 0; b < n; b++) {
+ struct ppc_bat *bat = BATS[b];
+
+ if (!(bat[1].batu & 3))
+ return b;
+ }
+ return -1;
+}
+
+/*
+ * This function calculates the size of the larger block usable to map the
+ * beginning of an area based on the start address and size of that area:
+ * - max block size is 256 on 6xx.
+ * - base address must be aligned to the block size. So the maximum block size
+ * is identified by the lowest bit set to 1 in the base address (for instance
+ * if base is 0x16000000, max size is 0x02000000).
+ * - block size has to be a power of two. This is calculated by finding the
+ * highest bit set to 1.
+ */
+unsigned int bat_block_size(unsigned long base, unsigned long top)
+{
+ unsigned int max_size = SZ_256M;
+ unsigned int base_shift = (ffs(base) - 1) & 31;
+ unsigned int block_shift = (fls(top - base) - 1) & 31;
+
+ return min3(max_size, 1U << base_shift, 1U << block_shift);
+}
+
+/*
+ * Set up one of the IBAT (block address translation) register pairs.
+ * The parameters are not checked; in particular size must be a power
+ * of 2 between 128k and 256M.
+ */
+static void setibat(int index, unsigned long virt, phys_addr_t phys,
+ unsigned int size, pgprot_t prot)
+{
+ unsigned int bl = (size >> 17) - 1;
+ int wimgxpp;
+ struct ppc_bat *bat = BATS[index];
+ unsigned long flags = pgprot_val(prot);
+
+ if (!cpu_has_feature(CPU_FTR_NEED_COHERENT))
+ flags &= ~_PAGE_COHERENT;
+
+ wimgxpp = (flags & _PAGE_COHERENT) | (_PAGE_EXEC ? BPP_RX : BPP_XX);
+ bat[0].batu = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */
+ bat[0].batl = BAT_PHYS_ADDR(phys) | wimgxpp;
+ if (!is_kernel_addr(virt))
+ bat[0].batu |= 1; /* Vp = 1 */
+}
+
+static void clearibat(int index)
+{
+ struct ppc_bat *bat = BATS[index];
+
+ bat[0].batu = 0;
+ bat[0].batl = 0;
+}
+
+static unsigned long __init __mmu_mapin_ram(unsigned long base, unsigned long top)
+{
+ int idx;
+
+ while ((idx = find_free_bat()) != -1 && base != top) {
+ unsigned int size = bat_block_size(base, top);
+
+ if (size < 128 << 10)
+ break;
+ setbat(idx, PAGE_OFFSET + base, base, size, PAGE_KERNEL_X);
+ base += size;
+ }
+
+ return base;
+}
+
+unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
+{
+ unsigned long done;
+ unsigned long border = (unsigned long)__srwx_boundary - PAGE_OFFSET;
+ unsigned long size;
+
+ size = roundup_pow_of_two((unsigned long)_einittext - PAGE_OFFSET);
+ setibat(0, PAGE_OFFSET, 0, size, PAGE_KERNEL_X);
+
+ if (debug_pagealloc_enabled_or_kfence()) {
+ pr_debug_once("Read-Write memory mapped without BATs\n");
+ if (base >= border)
+ return base;
+ if (top >= border)
+ top = border;
+ }
+
+ if (!strict_kernel_rwx_enabled() || base >= border || top <= border)
+ return __mmu_mapin_ram(base, top);
+
+ done = __mmu_mapin_ram(base, border);
+ if (done != border)
+ return done;
+
+ return __mmu_mapin_ram(border, top);
+}
+
+static bool is_module_segment(unsigned long addr)
+{
+ if (!IS_ENABLED(CONFIG_EXECMEM))
+ return false;
+ if (addr < ALIGN_DOWN(MODULES_VADDR, SZ_256M))
+ return false;
+ if (addr > ALIGN(MODULES_END, SZ_256M) - 1)
+ return false;
+ return true;
+}
+
+int mmu_mark_initmem_nx(void)
+{
+ int nb = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4;
+ int i;
+ unsigned long base = (unsigned long)_stext - PAGE_OFFSET;
+ unsigned long top = ALIGN((unsigned long)_etext - PAGE_OFFSET, SZ_128K);
+ unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET;
+ unsigned long size;
+
+ for (i = 0; i < nb - 1 && base < top;) {
+ size = bat_block_size(base, top);
+ setibat(i++, PAGE_OFFSET + base, base, size, PAGE_KERNEL_X);
+ base += size;
+ }
+ if (base < top) {
+ size = bat_block_size(base, top);
+ if ((top - base) > size) {
+ size <<= 1;
+ if (strict_kernel_rwx_enabled() && base + size > border)
+ pr_warn("Some RW data is getting mapped X. "
+ "Adjust CONFIG_DATA_SHIFT to avoid that.\n");
+ }
+ setibat(i++, PAGE_OFFSET + base, base, size, PAGE_KERNEL_X);
+ base += size;
+ }
+ for (; i < nb; i++)
+ clearibat(i);
+
+ update_bats();
+
+ BUILD_BUG_ON(ALIGN_DOWN(MODULES_VADDR, SZ_256M) < TASK_SIZE);
+
+ for (i = TASK_SIZE >> 28; i < 16; i++) {
+ /* Do not set NX on VM space for modules */
+ if (is_module_segment(i << 28))
+ continue;
+
+ mtsr(mfsr(i << 28) | 0x10000000, i << 28);
+ }
+ return 0;
+}
+
+int mmu_mark_rodata_ro(void)
+{
+ int nb = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4;
+ int i;
+
+ for (i = 0; i < nb; i++) {
+ struct ppc_bat *bat = BATS[i];
+
+ if (bat_addrs[i].start < (unsigned long)__end_rodata)
+ bat[1].batl = (bat[1].batl & ~BPP_RW) | BPP_RX;
+ }
+
+ update_bats();
+
+ return 0;
+}
+
+/*
+ * Set up one of the D BAT (block address translation) register pairs.
+ * The parameters are not checked; in particular size must be a power
+ * of 2 between 128k and 256M.
+ */
+void __init setbat(int index, unsigned long virt, phys_addr_t phys,
+ unsigned int size, pgprot_t prot)
+{
+ unsigned int bl;
+ int wimgxpp;
+ struct ppc_bat *bat;
+ unsigned long flags = pgprot_val(prot);
+
+ if (index == -1)
+ index = find_free_bat();
+ if (index == -1) {
+ pr_err("%s: no BAT available for mapping 0x%llx\n", __func__,
+ (unsigned long long)phys);
+ return;
+ }
+ bat = BATS[index];
+
+ if ((flags & _PAGE_NO_CACHE) ||
+ (cpu_has_feature(CPU_FTR_NEED_COHERENT) == 0))
+ flags &= ~_PAGE_COHERENT;
+
+ bl = (size >> 17) - 1;
+ /* Do DBAT first */
+ wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE
+ | _PAGE_COHERENT | _PAGE_GUARDED);
+ wimgxpp |= (flags & _PAGE_WRITE) ? BPP_RW : BPP_RX;
+ bat[1].batu = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */
+ bat[1].batl = BAT_PHYS_ADDR(phys) | wimgxpp;
+ if (!is_kernel_addr(virt))
+ bat[1].batu |= 1; /* Vp = 1 */
+ if (flags & _PAGE_GUARDED) {
+ /* G bit must be zero in IBATs */
+ flags &= ~_PAGE_EXEC;
+ }
+
+ bat_addrs[index].start = virt;
+ bat_addrs[index].limit = virt + ((bl + 1) << 17) - 1;
+ bat_addrs[index].phys = phys;
+}
+
+/*
+ * Preload a translation in the hash table
+ */
+static void hash_preload(struct mm_struct *mm, unsigned long ea)
+{
+ pmd_t *pmd;
+
+ if (!mmu_has_feature(MMU_FTR_HPTE_TABLE))
+ return;
+ pmd = pmd_off(mm, ea);
+ if (!pmd_none(*pmd))
+ add_hash_page(mm->context.id, ea, pmd_val(*pmd));
+}
+
+/*
+ * This is called at the end of handling a user page fault, when the
+ * fault has been handled by updating a PTE in the linux page tables.
+ * We use it to preload an HPTE into the hash table corresponding to
+ * the updated linux PTE.
+ *
+ * This must always be called with the pte lock held.
+ */
+void __update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
+ pte_t *ptep)
+{
+ /*
+ * We don't need to worry about _PAGE_PRESENT here because we are
+ * called with either mm->page_table_lock held or ptl lock held
+ */
+
+ /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
+ if (!pte_young(*ptep) || address >= TASK_SIZE)
+ return;
+
+ /* We have to test for regs NULL since init will get here first thing at boot */
+ if (!current->thread.regs)
+ return;
+
+ /* We also avoid filling the hash if not coming from a fault */
+ if (TRAP(current->thread.regs) != 0x300 && TRAP(current->thread.regs) != 0x400)
+ return;
+
+ hash_preload(vma->vm_mm, address);
+}
+
+/*
+ * Initialize the hash table and patch the instructions in hashtable.S.
+ */
+void __init MMU_init_hw(void)
+{
+ unsigned int n_hpteg, lg_n_hpteg;
+
+ if (!mmu_has_feature(MMU_FTR_HPTE_TABLE))
+ return;
+
+ if ( ppc_md.progress ) ppc_md.progress("hash:enter", 0x105);
+
+#define LG_HPTEG_SIZE 6 /* 64 bytes per HPTEG */
+#define SDR1_LOW_BITS ((n_hpteg - 1) >> 10)
+#define MIN_N_HPTEG 1024 /* min 64kB hash table */
+
+ /*
+ * Allow 1 HPTE (1/8 HPTEG) for each page of memory.
+ * This is less than the recommended amount, but then
+ * Linux ain't AIX.
+ */
+ n_hpteg = total_memory / (PAGE_SIZE * 8);
+ if (n_hpteg < MIN_N_HPTEG)
+ n_hpteg = MIN_N_HPTEG;
+ lg_n_hpteg = __ilog2(n_hpteg);
+ if (n_hpteg & (n_hpteg - 1)) {
+ ++lg_n_hpteg; /* round up if not power of 2 */
+ n_hpteg = 1 << lg_n_hpteg;
+ }
+ Hash_size = n_hpteg << LG_HPTEG_SIZE;
+
+ /*
+ * Find some memory for the hash table.
+ */
+ if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322);
+ Hash = memblock_alloc_or_panic(Hash_size, Hash_size);
+ _SDR1 = __pa(Hash) | SDR1_LOW_BITS;
+
+ pr_info("Total memory = %lldMB; using %ldkB for hash table\n",
+ (unsigned long long)(total_memory >> 20), Hash_size >> 10);
+
+
+ Hash_mask = n_hpteg - 1;
+ hash_mb2 = hash_mb = 32 - LG_HPTEG_SIZE - lg_n_hpteg;
+ if (lg_n_hpteg > 16)
+ hash_mb2 = 16 - LG_HPTEG_SIZE;
+}
+
+void __init MMU_init_hw_patch(void)
+{
+ unsigned int hmask = Hash_mask >> (16 - LG_HPTEG_SIZE);
+ unsigned int hash = (unsigned int)Hash - PAGE_OFFSET;
+
+ if (!mmu_has_feature(MMU_FTR_HPTE_TABLE))
+ return;
+
+ if (ppc_md.progress)
+ ppc_md.progress("hash:patch", 0x345);
+ if (ppc_md.progress)
+ ppc_md.progress("hash:done", 0x205);
+
+ /* WARNING: Make sure nothing can trigger a KASAN check past this point */
+
+ /*
+ * Patch up the instructions in hashtable.S:create_hpte
+ */
+ modify_instruction_site(&patch__hash_page_A0, 0xffff, hash >> 16);
+ modify_instruction_site(&patch__hash_page_A1, 0x7c0, hash_mb << 6);
+ modify_instruction_site(&patch__hash_page_A2, 0x7c0, hash_mb2 << 6);
+ modify_instruction_site(&patch__hash_page_B, 0xffff, hmask);
+ modify_instruction_site(&patch__hash_page_C, 0xffff, hmask);
+
+ /*
+ * Patch up the instructions in hashtable.S:flush_hash_page
+ */
+ modify_instruction_site(&patch__flush_hash_A0, 0xffff, hash >> 16);
+ modify_instruction_site(&patch__flush_hash_A1, 0x7c0, hash_mb << 6);
+ modify_instruction_site(&patch__flush_hash_A2, 0x7c0, hash_mb2 << 6);
+ modify_instruction_site(&patch__flush_hash_B, 0xffff, hmask);
+}
+
+void setup_initial_memory_limit(phys_addr_t first_memblock_base,
+ phys_addr_t first_memblock_size)
+{
+ /* We don't currently support the first MEMBLOCK not mapping 0
+ * physical on those processors
+ */
+ BUG_ON(first_memblock_base != 0);
+
+ memblock_set_current_limit(min_t(u64, first_memblock_size, SZ_256M));
+}
+
+void __init print_system_hash_info(void)
+{
+ pr_info("Hash_size = 0x%lx\n", Hash_size);
+ if (Hash_mask)
+ pr_info("Hash_mask = 0x%lx\n", Hash_mask);
+}
+
+void __init early_init_mmu(void)
+{
+}
diff --git a/arch/powerpc/mm/mmu_context_hash32.c b/arch/powerpc/mm/book3s32/mmu_context.c
index aa5a7fd89461..1922f9a6b058 100644
--- a/arch/powerpc/mm/mmu_context_hash32.c
+++ b/arch/powerpc/mm/book3s32/mmu_context.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* This file contains the routines for handling the MMU on those
* PowerPC implementations where the MMU substantially follows the
@@ -14,12 +15,6 @@
*
* Derived from "arch/i386/mm/init.c"
* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
#include <linux/mm.h>
@@ -27,7 +22,12 @@
#include <linux/export.h>
#include <asm/mmu_context.h>
-#include <asm/tlbflush.h>
+
+/*
+ * Room for two PTE pointers, usually the kernel and current user pointers
+ * to their respective root page table.
+ */
+void *abatron_pteptrs[2];
/*
* On 32-bit PowerPC 6xx/7xx/7xxx CPUs, we use a set of 16 VSIDs
@@ -45,19 +45,6 @@
#define LAST_CONTEXT 32767
#define FIRST_CONTEXT 1
-/*
- * This function defines the mapping from contexts to VSIDs (virtual
- * segment IDs). We use a skew on both the context and the high 4 bits
- * of the 32-bit virtual address (the "effective segment ID") in order
- * to spread out the entries in the MMU hash table. Note, if this
- * function is changed then arch/ppc/mm/hashtable.S will have to be
- * changed to correspond.
- *
- *
- * CTX_TO_VSID(ctx, va) (((ctx) * (897 * 16) + ((va) >> 28) * 0x111) \
- * & 0xffffff)
- */
-
static unsigned long next_mmu_context;
static unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1];
@@ -82,6 +69,12 @@ EXPORT_SYMBOL_GPL(__init_new_context);
int init_new_context(struct task_struct *t, struct mm_struct *mm)
{
mm->context.id = __init_new_context();
+ mm->context.sr0 = CTX_TO_VSID(mm->context.id, 0);
+
+ if (IS_ENABLED(CONFIG_PPC_KUEP))
+ mm->context.sr0 |= SR_NX;
+ if (!kuap_is_disabled())
+ mm->context.sr0 |= SR_KS;
return 0;
}
@@ -117,3 +110,25 @@ void __init mmu_context_init(void)
context_map[0] = (1 << FIRST_CONTEXT) - 1;
next_mmu_context = FIRST_CONTEXT;
}
+
+void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk)
+{
+ long id = next->context.id;
+
+ if (id < 0)
+ panic("mm_struct %p has no context ID", next);
+
+ isync();
+
+ update_user_segments(next->context.sr0);
+
+ if (IS_ENABLED(CONFIG_BDI_SWITCH))
+ abatron_pteptrs[1] = next->pgd;
+
+ if (!mmu_has_feature(MMU_FTR_HPTE_TABLE))
+ mtspr(SPRN_SDR1, rol32(__pa(next->pgd), 4) & 0xffff01ff);
+
+ mb(); /* sync */
+ isync();
+}
+EXPORT_SYMBOL(switch_mmu_context);
diff --git a/arch/powerpc/mm/book3s32/nohash_low.S b/arch/powerpc/mm/book3s32/nohash_low.S
new file mode 100644
index 000000000000..19f418b0ed2d
--- /dev/null
+++ b/arch/powerpc/mm/book3s32/nohash_low.S
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * This file contains low-level assembler routines for managing
+ * the PowerPC 603 tlb invalidation.
+ */
+
+#include <asm/page.h>
+#include <asm/ppc_asm.h>
+#include <asm/asm-offsets.h>
+
+/*
+ * Flush an entry from the TLB
+ */
+#ifdef CONFIG_SMP
+_GLOBAL(_tlbie)
+ lwz r8,TASK_CPU(r2)
+ oris r8,r8,11
+ mfmsr r10
+ rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */
+ rlwinm r0,r0,0,28,26 /* clear DR */
+ mtmsr r0
+ isync
+ lis r9,mmu_hash_lock@h
+ ori r9,r9,mmu_hash_lock@l
+ tophys(r9,r9)
+10: lwarx r7,0,r9
+ cmpwi 0,r7,0
+ bne- 10b
+ stwcx. r8,0,r9
+ bne- 10b
+ eieio
+ tlbie r3
+ sync
+ TLBSYNC
+ li r0,0
+ stw r0,0(r9) /* clear mmu_hash_lock */
+ mtmsr r10
+ isync
+ blr
+_ASM_NOKPROBE_SYMBOL(_tlbie)
+#endif /* CONFIG_SMP */
+
+/*
+ * Flush the entire TLB. 603/603e only
+ */
+_GLOBAL(_tlbia)
+#if defined(CONFIG_SMP)
+ lwz r8,TASK_CPU(r2)
+ oris r8,r8,10
+ mfmsr r10
+ rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */
+ rlwinm r0,r0,0,28,26 /* clear DR */
+ mtmsr r0
+ isync
+ lis r9,mmu_hash_lock@h
+ ori r9,r9,mmu_hash_lock@l
+ tophys(r9,r9)
+10: lwarx r7,0,r9
+ cmpwi 0,r7,0
+ bne- 10b
+ stwcx. r8,0,r9
+ bne- 10b
+#endif /* CONFIG_SMP */
+ li r5, 32
+ lis r4, KERNELBASE@h
+ mtctr r5
+ sync
+0: tlbie r4
+ addi r4, r4, 0x1000
+ bdnz 0b
+ sync
+#ifdef CONFIG_SMP
+ TLBSYNC
+ li r0,0
+ stw r0,0(r9) /* clear mmu_hash_lock */
+ mtmsr r10
+ isync
+#endif /* CONFIG_SMP */
+ blr
+_ASM_NOKPROBE_SYMBOL(_tlbia)
diff --git a/arch/powerpc/mm/book3s32/tlb.c b/arch/powerpc/mm/book3s32/tlb.c
new file mode 100644
index 000000000000..9ad6b56bfec9
--- /dev/null
+++ b/arch/powerpc/mm/book3s32/tlb.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * This file contains the routines for TLB flushing.
+ * On machines where the MMU uses a hash table to store virtual to
+ * physical translations, these routines flush entries from the
+ * hash table also.
+ * -- paulus
+ *
+ * Derived from arch/ppc/mm/init.c:
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ * and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ * Copyright (C) 1996 Paul Mackerras
+ *
+ * Derived from "arch/i386/mm/init.c"
+ * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/export.h>
+
+#include <asm/tlbflush.h>
+#include <asm/tlb.h>
+
+#include <mm/mmu_decl.h>
+
+/*
+ * TLB flushing:
+ *
+ * - flush_tlb_mm(mm) flushes the specified mm context TLB's
+ * - flush_tlb_page(vma, vmaddr) flushes one page
+ * - flush_tlb_range(vma, start, end) flushes a range of pages
+ * - flush_tlb_kernel_range(start, end) flushes kernel pages
+ *
+ * since the hardware hash table functions as an extension of the
+ * tlb as far as the linux tables are concerned, flush it too.
+ * -- Cort
+ */
+
+/*
+ * For each address in the range, find the pte for the address
+ * and check _PAGE_HASHPTE bit; if it is set, find and destroy
+ * the corresponding HPTE.
+ */
+void hash__flush_range(struct mm_struct *mm, unsigned long start, unsigned long end)
+{
+ pmd_t *pmd;
+ unsigned long pmd_end;
+ int count;
+ unsigned int ctx = mm->context.id;
+
+ start &= PAGE_MASK;
+ if (start >= end)
+ return;
+ end = (end - 1) | ~PAGE_MASK;
+ pmd = pmd_off(mm, start);
+ for (;;) {
+ pmd_end = ((start + PGDIR_SIZE) & PGDIR_MASK) - 1;
+ if (pmd_end > end)
+ pmd_end = end;
+ if (!pmd_none(*pmd)) {
+ count = ((pmd_end - start) >> PAGE_SHIFT) + 1;
+ flush_hash_pages(ctx, start, pmd_val(*pmd), count);
+ }
+ if (pmd_end == end)
+ break;
+ start = pmd_end + 1;
+ ++pmd;
+ }
+}
+EXPORT_SYMBOL(hash__flush_range);
+
+/*
+ * Flush all the (user) entries for the address space described by mm.
+ */
+void hash__flush_tlb_mm(struct mm_struct *mm)
+{
+ struct vm_area_struct *mp;
+ VMA_ITERATOR(vmi, mm, 0);
+
+ /*
+ * It is safe to iterate the vmas when called from dup_mmap,
+ * holding mmap_lock. It would also be safe from unmap_region
+ * or exit_mmap, but not from vmtruncate on SMP - but it seems
+ * dup_mmap is the only SMP case which gets here.
+ */
+ for_each_vma(vmi, mp)
+ hash__flush_range(mp->vm_mm, mp->vm_start, mp->vm_end);
+}
+EXPORT_SYMBOL(hash__flush_tlb_mm);
+
+void hash__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
+{
+ struct mm_struct *mm;
+ pmd_t *pmd;
+
+ mm = (vmaddr < TASK_SIZE)? vma->vm_mm: &init_mm;
+ pmd = pmd_off(mm, vmaddr);
+ if (!pmd_none(*pmd))
+ flush_hash_pages(mm->context.id, vmaddr, pmd_val(*pmd), 1);
+}
+EXPORT_SYMBOL(hash__flush_tlb_page);
diff --git a/arch/powerpc/mm/book3s64/Makefile b/arch/powerpc/mm/book3s64/Makefile
new file mode 100644
index 000000000000..33af5795856a
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/Makefile
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-y += mmu_context.o pgtable.o trace.o
+ifdef CONFIG_PPC_64S_HASH_MMU
+CFLAGS_REMOVE_slb.o = $(CC_FLAGS_FTRACE)
+obj-y += hash_pgtable.o hash_utils.o hash_tlb.o slb.o slice.o
+obj-$(CONFIG_PPC_HASH_MMU_NATIVE) += hash_native.o
+obj-$(CONFIG_PPC_4K_PAGES) += hash_4k.o
+obj-$(CONFIG_PPC_64K_PAGES) += hash_64k.o
+obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hash_hugepage.o
+obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage_prot.o
+endif
+
+obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
+
+obj-$(CONFIG_PPC_RADIX_MMU) += radix_pgtable.o radix_tlb.o
+ifdef CONFIG_HUGETLB_PAGE
+obj-$(CONFIG_PPC_RADIX_MMU) += radix_hugetlbpage.o
+endif
+obj-$(CONFIG_SPAPR_TCE_IOMMU) += iommu_api.o
+obj-$(CONFIG_PPC_PKEY) += pkeys.o
+
+# Instrumenting the SLB fault path can lead to duplicate SLB entries
+KCOV_INSTRUMENT_slb.o := n
+
+# Parts of these can run in real mode and therefore are
+# not safe with the current outline KASAN implementation
+KASAN_SANITIZE_mmu_context.o := n
+KASAN_SANITIZE_pgtable.o := n
+KASAN_SANITIZE_radix_pgtable.o := n
+KASAN_SANITIZE_radix_tlb.o := n
+KASAN_SANITIZE_slb.o := n
+KASAN_SANITIZE_pkeys.o := n
diff --git a/arch/powerpc/mm/book3s64/hash_4k.c b/arch/powerpc/mm/book3s64/hash_4k.c
new file mode 100644
index 000000000000..02acbfd05b46
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/hash_4k.c
@@ -0,0 +1,129 @@
+/*
+ * Copyright IBM Corporation, 2015
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+#include <linux/mm.h>
+#include <asm/machdep.h>
+#include <asm/mmu.h>
+
+#include "internal.h"
+
+int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
+ pte_t *ptep, unsigned long trap, unsigned long flags,
+ int ssize, int subpg_prot)
+{
+ real_pte_t rpte;
+ unsigned long hpte_group;
+ unsigned long rflags, pa;
+ unsigned long old_pte, new_pte;
+ unsigned long vpn, hash, slot;
+ unsigned long shift = mmu_psize_defs[MMU_PAGE_4K].shift;
+
+ /*
+ * atomically mark the linux large page PTE busy and dirty
+ */
+ do {
+ pte_t pte = READ_ONCE(*ptep);
+
+ old_pte = pte_val(pte);
+ /* If PTE busy, retry the access */
+ if (unlikely(old_pte & H_PAGE_BUSY))
+ return 0;
+ /* If PTE permissions don't match, take page fault */
+ if (unlikely(!check_pte_access(access, old_pte)))
+ return 1;
+ /*
+ * Try to lock the PTE, add ACCESSED and DIRTY if it was
+ * a write access. Since this is 4K insert of 64K page size
+ * also add H_PAGE_COMBO
+ */
+ new_pte = old_pte | H_PAGE_BUSY | _PAGE_ACCESSED;
+ if (access & _PAGE_WRITE)
+ new_pte |= _PAGE_DIRTY;
+ } while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
+
+ /*
+ * PP bits. _PAGE_USER is already PP bit 0x2, so we only
+ * need to add in 0x1 if it's a read-only user page
+ */
+ rflags = htab_convert_pte_flags(new_pte, flags);
+ rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE);
+
+ if (cpu_has_feature(CPU_FTR_NOEXECUTE) &&
+ !cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
+ rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap);
+
+ vpn = hpt_vpn(ea, vsid, ssize);
+ if (unlikely(old_pte & H_PAGE_HASHPTE)) {
+ /*
+ * There MIGHT be an HPTE for this pte
+ */
+ unsigned long gslot = pte_get_hash_gslot(vpn, shift, ssize,
+ rpte, 0);
+
+ if (mmu_hash_ops.hpte_updatepp(gslot, rflags, vpn, MMU_PAGE_4K,
+ MMU_PAGE_4K, ssize, flags) == -1)
+ old_pte &= ~_PAGE_HPTEFLAGS;
+ }
+
+ if (likely(!(old_pte & H_PAGE_HASHPTE))) {
+
+ pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
+ hash = hpt_hash(vpn, shift, ssize);
+
+repeat:
+ hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+
+ /* Insert into the hash table, primary slot */
+ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0,
+ MMU_PAGE_4K, MMU_PAGE_4K, ssize);
+ /*
+ * Primary is full, try the secondary
+ */
+ if (unlikely(slot == -1)) {
+ hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP;
+ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa,
+ rflags,
+ HPTE_V_SECONDARY,
+ MMU_PAGE_4K,
+ MMU_PAGE_4K, ssize);
+ if (slot == -1) {
+ if (mftb() & 0x1)
+ hpte_group = (hash & htab_hash_mask) *
+ HPTES_PER_GROUP;
+ mmu_hash_ops.hpte_remove(hpte_group);
+ /*
+ * FIXME!! Should be try the group from which we removed ?
+ */
+ goto repeat;
+ }
+ }
+ /*
+ * Hypervisor failure. Restore old pte and return -1
+ * similar to __hash_page_*
+ */
+ if (unlikely(slot == -2)) {
+ *ptep = __pte(old_pte);
+ hash_failure_debug(ea, access, vsid, trap, ssize,
+ MMU_PAGE_4K, MMU_PAGE_4K, old_pte);
+ return -1;
+ }
+ new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
+ new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE);
+
+ if (stress_hpt())
+ hpt_do_stress(ea, hpte_group);
+ }
+ *ptep = __pte(new_pte & ~H_PAGE_BUSY);
+ return 0;
+}
diff --git a/arch/powerpc/mm/book3s64/hash_64k.c b/arch/powerpc/mm/book3s64/hash_64k.c
new file mode 100644
index 000000000000..954af420f358
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/hash_64k.c
@@ -0,0 +1,343 @@
+/*
+ * Copyright IBM Corporation, 2015
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+#include <linux/mm.h>
+#include <asm/machdep.h>
+#include <asm/mmu.h>
+
+#include "internal.h"
+
+/*
+ * Return true, if the entry has a slot value which
+ * the software considers as invalid.
+ */
+static inline bool hpte_soft_invalid(unsigned long hidx)
+{
+ return ((hidx & 0xfUL) == 0xfUL);
+}
+
+/*
+ * index from 0 - 15
+ */
+bool __rpte_sub_valid(real_pte_t rpte, unsigned long index)
+{
+ return !(hpte_soft_invalid(__rpte_to_hidx(rpte, index)));
+}
+
+int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
+ pte_t *ptep, unsigned long trap, unsigned long flags,
+ int ssize, int subpg_prot)
+{
+ real_pte_t rpte;
+ unsigned long hpte_group;
+ unsigned int subpg_index;
+ unsigned long rflags, pa;
+ unsigned long old_pte, new_pte, subpg_pte;
+ unsigned long vpn, hash, slot, gslot;
+ unsigned long shift = mmu_psize_defs[MMU_PAGE_4K].shift;
+
+ /*
+ * atomically mark the linux large page PTE busy and dirty
+ */
+ do {
+ pte_t pte = READ_ONCE(*ptep);
+
+ old_pte = pte_val(pte);
+ /* If PTE busy, retry the access */
+ if (unlikely(old_pte & H_PAGE_BUSY))
+ return 0;
+ /* If PTE permissions don't match, take page fault */
+ if (unlikely(!check_pte_access(access, old_pte)))
+ return 1;
+ /*
+ * Try to lock the PTE, add ACCESSED and DIRTY if it was
+ * a write access. Since this is 4K insert of 64K page size
+ * also add H_PAGE_COMBO
+ */
+ new_pte = old_pte | H_PAGE_BUSY | _PAGE_ACCESSED | H_PAGE_COMBO;
+ if (access & _PAGE_WRITE)
+ new_pte |= _PAGE_DIRTY;
+ } while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
+
+ /*
+ * Handle the subpage protection bits
+ */
+ subpg_pte = new_pte & ~subpg_prot;
+ rflags = htab_convert_pte_flags(subpg_pte, flags);
+
+ if (cpu_has_feature(CPU_FTR_NOEXECUTE) &&
+ !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) {
+
+ /*
+ * No CPU has hugepages but lacks no execute, so we
+ * don't need to worry about that case
+ */
+ rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap);
+ }
+
+ subpg_index = (ea & (PAGE_SIZE - 1)) >> shift;
+ vpn = hpt_vpn(ea, vsid, ssize);
+ rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE);
+ /*
+ *None of the sub 4k page is hashed
+ */
+ if (!(old_pte & H_PAGE_HASHPTE))
+ goto htab_insert_hpte;
+ /*
+ * Check if the pte was already inserted into the hash table
+ * as a 64k HW page, and invalidate the 64k HPTE if so.
+ */
+ if (!(old_pte & H_PAGE_COMBO)) {
+ flush_hash_page(vpn, rpte, MMU_PAGE_64K, ssize, flags);
+ /*
+ * clear the old slot details from the old and new pte.
+ * On hash insert failure we use old pte value and we don't
+ * want slot information there if we have a insert failure.
+ */
+ old_pte &= ~H_PAGE_HASHPTE;
+ new_pte &= ~H_PAGE_HASHPTE;
+ goto htab_insert_hpte;
+ }
+ /*
+ * Check for sub page valid and update
+ */
+ if (__rpte_sub_valid(rpte, subpg_index)) {
+ int ret;
+
+ gslot = pte_get_hash_gslot(vpn, shift, ssize, rpte,
+ subpg_index);
+ ret = mmu_hash_ops.hpte_updatepp(gslot, rflags, vpn,
+ MMU_PAGE_4K, MMU_PAGE_4K,
+ ssize, flags);
+
+ /*
+ * If we failed because typically the HPTE wasn't really here
+ * we try an insertion.
+ */
+ if (ret == -1)
+ goto htab_insert_hpte;
+
+ *ptep = __pte(new_pte & ~H_PAGE_BUSY);
+ return 0;
+ }
+
+htab_insert_hpte:
+
+ /*
+ * Initialize all hidx entries to invalid value, the first time
+ * the PTE is about to allocate a 4K HPTE.
+ */
+ if (!(old_pte & H_PAGE_COMBO))
+ rpte.hidx = INVALID_RPTE_HIDX;
+
+ /*
+ * handle H_PAGE_4K_PFN case
+ */
+ if (old_pte & H_PAGE_4K_PFN) {
+ /*
+ * All the sub 4k page have the same
+ * physical address.
+ */
+ pa = pte_pfn(__pte(old_pte)) << HW_PAGE_SHIFT;
+ } else {
+ pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
+ pa += (subpg_index << shift);
+ }
+ hash = hpt_hash(vpn, shift, ssize);
+repeat:
+ hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+
+ /* Insert into the hash table, primary slot */
+ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0,
+ MMU_PAGE_4K, MMU_PAGE_4K, ssize);
+ /*
+ * Primary is full, try the secondary
+ */
+ if (unlikely(slot == -1)) {
+ bool soft_invalid;
+
+ hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP;
+ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa,
+ rflags, HPTE_V_SECONDARY,
+ MMU_PAGE_4K, MMU_PAGE_4K,
+ ssize);
+
+ soft_invalid = hpte_soft_invalid(slot);
+ if (unlikely(soft_invalid)) {
+ /*
+ * We got a valid slot from a hardware point of view.
+ * but we cannot use it, because we use this special
+ * value; as defined by hpte_soft_invalid(), to track
+ * invalid slots. We cannot use it. So invalidate it.
+ */
+ gslot = slot & _PTEIDX_GROUP_IX;
+ mmu_hash_ops.hpte_invalidate(hpte_group + gslot, vpn,
+ MMU_PAGE_4K, MMU_PAGE_4K,
+ ssize, 0);
+ }
+
+ if (unlikely(slot == -1 || soft_invalid)) {
+ /*
+ * For soft invalid slot, let's ensure that we release a
+ * slot from the primary, with the hope that we will
+ * acquire that slot next time we try. This will ensure
+ * that we do not get the same soft-invalid slot.
+ */
+ if (soft_invalid || (mftb() & 0x1))
+ hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+
+ mmu_hash_ops.hpte_remove(hpte_group);
+ /*
+ * FIXME!! Should be try the group from which we removed ?
+ */
+ goto repeat;
+ }
+ }
+ /*
+ * Hypervisor failure. Restore old pte and return -1
+ * similar to __hash_page_*
+ */
+ if (unlikely(slot == -2)) {
+ *ptep = __pte(old_pte);
+ hash_failure_debug(ea, access, vsid, trap, ssize,
+ MMU_PAGE_4K, MMU_PAGE_4K, old_pte);
+ return -1;
+ }
+
+ new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot, PTRS_PER_PTE);
+ new_pte |= H_PAGE_HASHPTE;
+
+ if (stress_hpt())
+ hpt_do_stress(ea, hpte_group);
+
+ *ptep = __pte(new_pte & ~H_PAGE_BUSY);
+ return 0;
+}
+
+int __hash_page_64K(unsigned long ea, unsigned long access,
+ unsigned long vsid, pte_t *ptep, unsigned long trap,
+ unsigned long flags, int ssize)
+{
+ real_pte_t rpte;
+ unsigned long hpte_group;
+ unsigned long rflags, pa;
+ unsigned long old_pte, new_pte;
+ unsigned long vpn, hash, slot;
+ unsigned long shift = mmu_psize_defs[MMU_PAGE_64K].shift;
+
+ /*
+ * atomically mark the linux large page PTE busy and dirty
+ */
+ do {
+ pte_t pte = READ_ONCE(*ptep);
+
+ old_pte = pte_val(pte);
+ /* If PTE busy, retry the access */
+ if (unlikely(old_pte & H_PAGE_BUSY))
+ return 0;
+ /* If PTE permissions don't match, take page fault */
+ if (unlikely(!check_pte_access(access, old_pte)))
+ return 1;
+ /*
+ * Check if PTE has the cache-inhibit bit set
+ * If so, bail out and refault as a 4k page
+ */
+ if (!mmu_has_feature(MMU_FTR_CI_LARGE_PAGE) &&
+ unlikely(pte_ci(pte)))
+ return 0;
+ /*
+ * Try to lock the PTE, add ACCESSED and DIRTY if it was
+ * a write access.
+ */
+ new_pte = old_pte | H_PAGE_BUSY | _PAGE_ACCESSED;
+ if (access & _PAGE_WRITE)
+ new_pte |= _PAGE_DIRTY;
+ } while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
+
+ rflags = htab_convert_pte_flags(new_pte, flags);
+ rpte = __real_pte(__pte(old_pte), ptep, PTRS_PER_PTE);
+
+ if (cpu_has_feature(CPU_FTR_NOEXECUTE) &&
+ !cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
+ rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap);
+
+ vpn = hpt_vpn(ea, vsid, ssize);
+ if (unlikely(old_pte & H_PAGE_HASHPTE)) {
+ unsigned long gslot;
+
+ /*
+ * There MIGHT be an HPTE for this pte
+ */
+ gslot = pte_get_hash_gslot(vpn, shift, ssize, rpte, 0);
+ if (mmu_hash_ops.hpte_updatepp(gslot, rflags, vpn, MMU_PAGE_64K,
+ MMU_PAGE_64K, ssize,
+ flags) == -1)
+ old_pte &= ~_PAGE_HPTEFLAGS;
+ }
+
+ if (likely(!(old_pte & H_PAGE_HASHPTE))) {
+
+ pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
+ hash = hpt_hash(vpn, shift, ssize);
+
+repeat:
+ hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+
+ /* Insert into the hash table, primary slot */
+ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0,
+ MMU_PAGE_64K, MMU_PAGE_64K,
+ ssize);
+ /*
+ * Primary is full, try the secondary
+ */
+ if (unlikely(slot == -1)) {
+ hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP;
+ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa,
+ rflags,
+ HPTE_V_SECONDARY,
+ MMU_PAGE_64K,
+ MMU_PAGE_64K, ssize);
+ if (slot == -1) {
+ if (mftb() & 0x1)
+ hpte_group = (hash & htab_hash_mask) *
+ HPTES_PER_GROUP;
+ mmu_hash_ops.hpte_remove(hpte_group);
+ /*
+ * FIXME!! Should be try the group from which we removed ?
+ */
+ goto repeat;
+ }
+ }
+ /*
+ * Hypervisor failure. Restore old pte and return -1
+ * similar to __hash_page_*
+ */
+ if (unlikely(slot == -2)) {
+ *ptep = __pte(old_pte);
+ hash_failure_debug(ea, access, vsid, trap, ssize,
+ MMU_PAGE_64K, MMU_PAGE_64K, old_pte);
+ return -1;
+ }
+
+ new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
+ new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE);
+
+ if (stress_hpt())
+ hpt_do_stress(ea, hpte_group);
+ }
+
+ *ptep = __pte(new_pte & ~H_PAGE_BUSY);
+
+ return 0;
+}
diff --git a/arch/powerpc/mm/book3s64/hash_hugepage.c b/arch/powerpc/mm/book3s64/hash_hugepage.c
new file mode 100644
index 000000000000..cdfd4fe75edb
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/hash_hugepage.c
@@ -0,0 +1,188 @@
+/*
+ * Copyright IBM Corporation, 2013
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+/*
+ * PPC64 THP Support for hash based MMUs
+ */
+#include <linux/mm.h>
+#include <asm/machdep.h>
+
+int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
+ pmd_t *pmdp, unsigned long trap, unsigned long flags,
+ int ssize, unsigned int psize)
+{
+ unsigned int index, valid;
+ unsigned char *hpte_slot_array;
+ unsigned long rflags, pa, hidx;
+ unsigned long old_pmd, new_pmd;
+ int ret, lpsize = MMU_PAGE_16M;
+ unsigned long vpn, hash, shift, slot;
+
+ /*
+ * atomically mark the linux large page PMD busy and dirty
+ */
+ do {
+ pmd_t pmd = READ_ONCE(*pmdp);
+
+ old_pmd = pmd_val(pmd);
+ /* If PMD busy, retry the access */
+ if (unlikely(old_pmd & H_PAGE_BUSY))
+ return 0;
+ /* If PMD permissions don't match, take page fault */
+ if (unlikely(!check_pte_access(access, old_pmd)))
+ return 1;
+ /*
+ * Try to lock the PTE, add ACCESSED and DIRTY if it was
+ * a write access
+ */
+ new_pmd = old_pmd | H_PAGE_BUSY | _PAGE_ACCESSED;
+ if (access & _PAGE_WRITE)
+ new_pmd |= _PAGE_DIRTY;
+ } while (!pmd_xchg(pmdp, __pmd(old_pmd), __pmd(new_pmd)));
+
+ /*
+ * Make sure this is thp or devmap entry
+ */
+ if (!(old_pmd & H_PAGE_THP_HUGE))
+ return 0;
+
+ rflags = htab_convert_pte_flags(new_pmd, flags);
+
+ /*
+ * THPs are only supported on platforms that can do mixed page size
+ * segments (MPSS) and all such platforms have coherent icache. Hence we
+ * don't need to do lazy icache flush (hash_page_do_lazy_icache()) on
+ * noexecute fault.
+ */
+
+ /*
+ * Find the slot index details for this ea, using base page size.
+ */
+ shift = mmu_psize_defs[psize].shift;
+ index = (ea & ~HPAGE_PMD_MASK) >> shift;
+ BUG_ON(index >= PTE_FRAG_SIZE);
+
+ vpn = hpt_vpn(ea, vsid, ssize);
+ hpte_slot_array = get_hpte_slot_array(pmdp);
+ if (psize == MMU_PAGE_4K) {
+ /*
+ * invalidate the old hpte entry if we have that mapped via 64K
+ * base page size. This is because demote_segment won't flush
+ * hash page table entries.
+ */
+ if ((old_pmd & H_PAGE_HASHPTE) && !(old_pmd & H_PAGE_COMBO)) {
+ flush_hash_hugepage(vsid, ea, pmdp, MMU_PAGE_64K,
+ ssize, flags);
+ /*
+ * With THP, we also clear the slot information with
+ * respect to all the 64K hash pte mapping the 16MB
+ * page. They are all invalid now. This make sure we
+ * don't find the slot valid when we fault with 4k
+ * base page size.
+ *
+ */
+ memset(hpte_slot_array, 0, PTE_FRAG_SIZE);
+ }
+ }
+
+ valid = hpte_valid(hpte_slot_array, index);
+ if (valid) {
+ /* update the hpte bits */
+ hash = hpt_hash(vpn, shift, ssize);
+ hidx = hpte_hash_index(hpte_slot_array, index);
+ if (hidx & _PTEIDX_SECONDARY)
+ hash = ~hash;
+ slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+ slot += hidx & _PTEIDX_GROUP_IX;
+
+ ret = mmu_hash_ops.hpte_updatepp(slot, rflags, vpn,
+ psize, lpsize, ssize, flags);
+ /*
+ * We failed to update, try to insert a new entry.
+ */
+ if (ret == -1) {
+ /*
+ * large pte is marked busy, so we can be sure
+ * nobody is looking at hpte_slot_array. hence we can
+ * safely update this here.
+ */
+ valid = 0;
+ hpte_slot_array[index] = 0;
+ }
+ }
+
+ if (!valid) {
+ unsigned long hpte_group;
+
+ hash = hpt_hash(vpn, shift, ssize);
+ /* insert new entry */
+ pa = pmd_pfn(__pmd(old_pmd)) << PAGE_SHIFT;
+ new_pmd |= H_PAGE_HASHPTE;
+
+repeat:
+ hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+
+ /* Insert into the hash table, primary slot */
+ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, 0,
+ psize, lpsize, ssize);
+ /*
+ * Primary is full, try the secondary
+ */
+ if (unlikely(slot == -1)) {
+ hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP;
+ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa,
+ rflags,
+ HPTE_V_SECONDARY,
+ psize, lpsize, ssize);
+ if (slot == -1) {
+ if (mftb() & 0x1)
+ hpte_group = (hash & htab_hash_mask) *
+ HPTES_PER_GROUP;
+
+ mmu_hash_ops.hpte_remove(hpte_group);
+ goto repeat;
+ }
+ }
+ /*
+ * Hypervisor failure. Restore old pmd and return -1
+ * similar to __hash_page_*
+ */
+ if (unlikely(slot == -2)) {
+ *pmdp = __pmd(old_pmd);
+ hash_failure_debug(ea, access, vsid, trap, ssize,
+ psize, lpsize, old_pmd);
+ return -1;
+ }
+ /*
+ * large pte is marked busy, so we can be sure
+ * nobody is looking at hpte_slot_array. hence we can
+ * safely update this here.
+ */
+ mark_hpte_slot_valid(hpte_slot_array, index, slot);
+ }
+ /*
+ * Mark the pte with H_PAGE_COMBO, if we are trying to hash it with
+ * base page size 4k.
+ */
+ if (psize == MMU_PAGE_4K)
+ new_pmd |= H_PAGE_COMBO;
+ /*
+ * The hpte valid is stored in the pgtable whose address is in the
+ * second half of the PMD. Order this against clearing of the busy bit in
+ * huge pmd.
+ */
+ smp_wmb();
+ *pmdp = __pmd(new_pmd & ~H_PAGE_BUSY);
+ return 0;
+}
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/book3s64/hash_native.c
index afc0a8295f84..e9e2dd70c060 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/book3s64/hash_native.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* native hashtable management.
*
* SMP scalability work:
* Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#undef DEBUG_LOW
@@ -15,19 +11,21 @@
#include <linux/spinlock.h>
#include <linux/bitops.h>
#include <linux/of.h>
+#include <linux/processor.h>
#include <linux/threads.h>
#include <linux/smp.h>
+#include <linux/pgtable.h>
#include <asm/machdep.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
-#include <asm/pgtable.h>
-#include <asm/tlbflush.h>
+#include <asm/trace.h>
#include <asm/tlb.h>
#include <asm/cputable.h>
#include <asm/udbg.h>
#include <asm/kexec.h>
#include <asm/ppc-opcode.h>
+#include <asm/feature-fixups.h>
#ifdef DEBUG_LOW
#define DBG_LOW(fmt...) udbg_printf(fmt)
@@ -41,9 +39,33 @@
#define HPTE_LOCK_BIT (56+3)
#endif
-DEFINE_RAW_SPINLOCK(native_tlbie_lock);
+static DEFINE_RAW_SPINLOCK(native_tlbie_lock);
-static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
+#ifdef CONFIG_LOCKDEP
+static struct lockdep_map hpte_lock_map =
+ STATIC_LOCKDEP_MAP_INIT("hpte_lock", &hpte_lock_map);
+
+static void acquire_hpte_lock(void)
+{
+ lock_map_acquire(&hpte_lock_map);
+}
+
+static void release_hpte_lock(void)
+{
+ lock_map_release(&hpte_lock_map);
+}
+#else
+static void acquire_hpte_lock(void)
+{
+}
+
+static void release_hpte_lock(void)
+{
+}
+#endif
+
+static inline unsigned long ___tlbie(unsigned long vpn, int psize,
+ int apsize, int ssize)
{
unsigned long va;
unsigned int penc;
@@ -53,7 +75,7 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
* We need 14 to 65 bits of va for a tlibe of 4K page
* With vpn we ignore the lower VPN_SHIFT bits already.
* And top two bits are already ignored because we can
- * only accomadate 76 bits in a 64 bit vpn with a VPN_SHIFT
+ * only accomodate 76 bits in a 64 bit vpn with a VPN_SHIFT
* of 12.
*/
va = vpn << VPN_SHIFT;
@@ -62,15 +84,15 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
* Older versions of the architecture (2.02 and earler) require the
* masking of the top 16 bits.
*/
- va &= ~(0xffffULL << 48);
+ if (mmu_has_feature(MMU_FTR_TLBIE_CROP_VA))
+ va &= ~(0xffffULL << 48);
switch (psize) {
case MMU_PAGE_4K:
/* clear out bits after (52) [0....52.....63] */
va &= ~((1ul << (64 - 52)) - 1);
va |= ssize << 8;
- sllp = ((mmu_psize_defs[apsize].sllp & SLB_VSID_L) >> 6) |
- ((mmu_psize_defs[apsize].sllp & SLB_VSID_LP) >> 4);
+ sllp = get_sllp_encoding(apsize);
va |= sllp << 5;
asm volatile(ASM_FTR_IFCLR("tlbie %0,0", PPC_TLBIE(%1,%0), %2)
: : "r" (va), "r"(0), "i" (CPU_FTR_ARCH_206)
@@ -96,6 +118,47 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
: "memory");
break;
}
+ return va;
+}
+
+static inline void fixup_tlbie_vpn(unsigned long vpn, int psize,
+ int apsize, int ssize)
+{
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+ /* Radix flush for a hash guest */
+
+ unsigned long rb,rs,prs,r,ric;
+
+ rb = PPC_BIT(52); /* IS = 2 */
+ rs = 0; /* lpid = 0 */
+ prs = 0; /* partition scoped */
+ r = 1; /* radix format */
+ ric = 0; /* RIC_FLSUH_TLB */
+
+ /*
+ * Need the extra ptesync to make sure we don't
+ * re-order the tlbie
+ */
+ asm volatile("ptesync": : :"memory");
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(r), "i"(prs),
+ "i"(ric), "r"(rs) : "memory");
+ }
+
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+ /* Need the extra ptesync to ensure we don't reorder tlbie*/
+ asm volatile("ptesync": : :"memory");
+ ___tlbie(vpn, psize, apsize, ssize);
+ }
+}
+
+static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
+{
+ unsigned long rb;
+
+ rb = ___tlbie(vpn, psize, apsize, ssize);
+ trace_tlbie(0, 0, rb, 0, 0, 0, 0);
}
static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
@@ -111,18 +174,19 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
* Older versions of the architecture (2.02 and earler) require the
* masking of the top 16 bits.
*/
- va &= ~(0xffffULL << 48);
+ if (mmu_has_feature(MMU_FTR_TLBIE_CROP_VA))
+ va &= ~(0xffffULL << 48);
switch (psize) {
case MMU_PAGE_4K:
/* clear out bits after(52) [0....52.....63] */
va &= ~((1ul << (64 - 52)) - 1);
va |= ssize << 8;
- sllp = ((mmu_psize_defs[apsize].sllp & SLB_VSID_L) >> 6) |
- ((mmu_psize_defs[apsize].sllp & SLB_VSID_LP) >> 4);
+ sllp = get_sllp_encoding(apsize);
va |= sllp << 5;
- asm volatile(".long 0x7c000224 | (%0 << 11) | (0 << 21)"
- : : "r"(va) : "memory");
+ asm volatile(ASM_FTR_IFSET("tlbiel %0", PPC_TLBIEL_v205(%0, 0), %1)
+ : : "r" (va), "i" (CPU_FTR_ARCH_206)
+ : "memory");
break;
default:
/* We need 14 to 14 + i bits of va */
@@ -139,10 +203,12 @@ static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
*/
va |= (vpn & 0xfe);
va |= 1; /* L */
- asm volatile(".long 0x7c000224 | (%0 << 11) | (1 << 21)"
- : : "r"(va) : "memory");
+ asm volatile(ASM_FTR_IFSET("tlbiel %0", PPC_TLBIEL_v205(%0, 1), %1)
+ : : "r" (va), "i" (CPU_FTR_ARCH_206)
+ : "memory");
break;
}
+ trace_tlbie(0, 1, va, 0, 0, 0, 0);
}
@@ -159,9 +225,10 @@ static inline void tlbie(unsigned long vpn, int psize, int apsize,
asm volatile("ptesync": : :"memory");
if (use_local) {
__tlbiel(vpn, psize, apsize, ssize);
- asm volatile("ptesync": : :"memory");
+ ppc_after_tlbiel_barrier();
} else {
__tlbie(vpn, psize, apsize, ssize);
+ fixup_tlbie_vpn(vpn, psize, apsize, ssize);
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
if (lock_tlbie && !use_local)
@@ -172,11 +239,14 @@ static inline void native_lock_hpte(struct hash_pte *hptep)
{
unsigned long *word = (unsigned long *)&hptep->v;
+ acquire_hpte_lock();
while (1) {
if (!test_and_set_bit_lock(HPTE_LOCK_BIT, word))
break;
+ spin_begin();
while(test_bit(HPTE_LOCK_BIT, word))
- cpu_relax();
+ spin_cpu_relax();
+ spin_end();
}
}
@@ -184,6 +254,7 @@ static inline void native_unlock_hpte(struct hash_pte *hptep)
{
unsigned long *word = (unsigned long *)&hptep->v;
+ release_hpte_lock();
clear_bit_unlock(HPTE_LOCK_BIT, word);
}
@@ -193,8 +264,11 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
{
struct hash_pte *hptep = htab_address + hpte_group;
unsigned long hpte_v, hpte_r;
+ unsigned long flags;
int i;
+ local_irq_save(flags);
+
if (!(vflags & HPTE_V_BOLTED)) {
DBG_LOW(" insert(group=%lx, vpn=%016lx, pa=%016lx,"
" rflags=%lx, vflags=%lx, psize=%d)\n",
@@ -213,8 +287,10 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
hptep++;
}
- if (i == HPTES_PER_GROUP)
+ if (i == HPTES_PER_GROUP) {
+ local_irq_restore(flags);
return -1;
+ }
hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
@@ -224,6 +300,11 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
i, hpte_v, hpte_r);
}
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ hpte_r = hpte_old_to_new_r(hpte_v, hpte_r);
+ hpte_v = hpte_old_to_new_v(hpte_v);
+ }
+
hptep->r = cpu_to_be64(hpte_r);
/* Guarantee the second dword is visible before the valid bit */
eieio();
@@ -231,19 +312,24 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
* Now set the first dword including the valid bit
* NOTE: this also unlocks the hpte
*/
+ release_hpte_lock();
hptep->v = cpu_to_be64(hpte_v);
__asm__ __volatile__ ("ptesync" : : : "memory");
+ local_irq_restore(flags);
+
return i | (!!(vflags & HPTE_V_SECONDARY) << 3);
}
static long native_hpte_remove(unsigned long hpte_group)
{
+ unsigned long hpte_v, flags;
struct hash_pte *hptep;
int i;
int slot_offset;
- unsigned long hpte_v;
+
+ local_irq_save(flags);
DBG_LOW(" remove(group=%lx)\n", hpte_group);
@@ -268,31 +354,36 @@ static long native_hpte_remove(unsigned long hpte_group)
slot_offset &= 0x7;
}
- if (i == HPTES_PER_GROUP)
- return -1;
+ if (i == HPTES_PER_GROUP) {
+ i = -1;
+ goto out;
+ }
/* Invalidate the hpte. NOTE: this also unlocks it */
+ release_hpte_lock();
hptep->v = 0;
-
+out:
+ local_irq_restore(flags);
return i;
}
static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
unsigned long vpn, int bpsize,
- int apsize, int ssize, int local)
+ int apsize, int ssize, unsigned long flags)
{
struct hash_pte *hptep = htab_address + slot;
unsigned long hpte_v, want_v;
- int ret = 0;
+ int ret = 0, local = 0;
+ unsigned long irqflags;
+
+ local_irq_save(irqflags);
want_v = hpte_encode_avpn(vpn, bpsize, ssize);
DBG_LOW(" update(vpn=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)",
vpn, want_v & HPTE_V_AVPN, slot, newpp);
- native_lock_hpte(hptep);
-
- hpte_v = be64_to_cpu(hptep->v);
+ hpte_v = hpte_get_old_v(hptep);
/*
* We need to invalidate the TLB always because hpte_remove doesn't do
* a tlb invalidate. If a hash bucket gets full, we "evict" a more/less
@@ -304,36 +395,46 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
DBG_LOW(" -> miss\n");
ret = -1;
} else {
- DBG_LOW(" -> hit\n");
- /* Update the HPTE */
- hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) & ~(HPTE_R_PP | HPTE_R_N)) |
- (newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C)));
+ native_lock_hpte(hptep);
+ /* recheck with locks held */
+ hpte_v = hpte_get_old_v(hptep);
+ if (unlikely(!HPTE_V_COMPARE(hpte_v, want_v) ||
+ !(hpte_v & HPTE_V_VALID))) {
+ ret = -1;
+ } else {
+ DBG_LOW(" -> hit\n");
+ /* Update the HPTE */
+ hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) &
+ ~(HPTE_R_PPP | HPTE_R_N)) |
+ (newpp & (HPTE_R_PPP | HPTE_R_N |
+ HPTE_R_C)));
+ }
+ native_unlock_hpte(hptep);
}
- native_unlock_hpte(hptep);
- /* Ensure it is out of the tlb too. */
- tlbie(vpn, bpsize, apsize, ssize, local);
+ if (flags & HPTE_LOCAL_UPDATE)
+ local = 1;
+ /*
+ * Ensure it is out of the tlb too if it is not a nohpte fault
+ */
+ if (!(flags & HPTE_NOHPTE_UPDATE))
+ tlbie(vpn, bpsize, apsize, ssize, local);
+
+ local_irq_restore(irqflags);
return ret;
}
-static long native_hpte_find(unsigned long vpn, int psize, int ssize)
+static long __native_hpte_find(unsigned long want_v, unsigned long slot)
{
struct hash_pte *hptep;
- unsigned long hash;
+ unsigned long hpte_v;
unsigned long i;
- long slot;
- unsigned long want_v, hpte_v;
-
- hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize);
- want_v = hpte_encode_avpn(vpn, psize, ssize);
- /* Bolted mappings are only ever in the primary group */
- slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
for (i = 0; i < HPTES_PER_GROUP; i++) {
- hptep = htab_address + slot;
- hpte_v = be64_to_cpu(hptep->v);
+ hptep = htab_address + slot;
+ hpte_v = hpte_get_old_v(hptep);
if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID))
/* HPTE matches */
return slot;
@@ -343,6 +444,33 @@ static long native_hpte_find(unsigned long vpn, int psize, int ssize)
return -1;
}
+static long native_hpte_find(unsigned long vpn, int psize, int ssize)
+{
+ unsigned long hpte_group;
+ unsigned long want_v;
+ unsigned long hash;
+ long slot;
+
+ hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize);
+ want_v = hpte_encode_avpn(vpn, psize, ssize);
+
+ /*
+ * We try to keep bolted entries always in primary hash
+ * But in some case we can find them in secondary too.
+ */
+ hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+ slot = __native_hpte_find(want_v, hpte_group);
+ if (slot < 0) {
+ /* Try in secondary */
+ hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP;
+ slot = __native_hpte_find(want_v, hpte_group);
+ if (slot < 0)
+ return -1;
+ }
+
+ return slot;
+}
+
/*
* Update the page protection bits. Intended to be used to create
* guard pages for kernel data structures on pages which are bolted
@@ -357,6 +485,9 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
unsigned long vsid;
long slot;
struct hash_pte *hptep;
+ unsigned long flags;
+
+ local_irq_save(flags);
vsid = get_kernel_vsid(ea, ssize);
vpn = hpt_vpn(ea, vsid, ssize);
@@ -368,15 +499,55 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
/* Update the HPTE */
hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) &
- ~(HPTE_R_PP | HPTE_R_N)) |
- (newpp & (HPTE_R_PP | HPTE_R_N)));
+ ~(HPTE_R_PPP | HPTE_R_N)) |
+ (newpp & (HPTE_R_PPP | HPTE_R_N)));
/*
* Ensure it is out of the tlb too. Bolted entries base and
* actual page size will be same.
*/
tlbie(vpn, psize, psize, ssize, 0);
+
+ local_irq_restore(flags);
}
+/*
+ * Remove a bolted kernel entry. Memory hotplug uses this.
+ *
+ * No need to lock here because we should be the only user.
+ */
+static int native_hpte_removebolted(unsigned long ea, int psize, int ssize)
+{
+ unsigned long vpn;
+ unsigned long vsid;
+ long slot;
+ struct hash_pte *hptep;
+ unsigned long flags;
+
+ local_irq_save(flags);
+
+ vsid = get_kernel_vsid(ea, ssize);
+ vpn = hpt_vpn(ea, vsid, ssize);
+
+ slot = native_hpte_find(vpn, psize, ssize);
+ if (slot == -1)
+ return -ENOENT;
+
+ hptep = htab_address + slot;
+
+ VM_WARN_ON(!(be64_to_cpu(hptep->v) & HPTE_V_BOLTED));
+
+ /* Invalidate the hpte */
+ hptep->v = 0;
+
+ /* Invalidate the TLB */
+ tlbie(vpn, psize, psize, ssize, 0);
+
+ local_irq_restore(flags);
+
+ return 0;
+}
+
+
static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
int bpsize, int apsize, int ssize, int local)
{
@@ -390,9 +561,20 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
DBG_LOW(" invalidate(vpn=%016lx, hash: %lx)\n", vpn, slot);
want_v = hpte_encode_avpn(vpn, bpsize, ssize);
- native_lock_hpte(hptep);
- hpte_v = be64_to_cpu(hptep->v);
+ hpte_v = hpte_get_old_v(hptep);
+
+ if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) {
+ native_lock_hpte(hptep);
+ /* recheck with locks held */
+ hpte_v = hpte_get_old_v(hptep);
+ if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) {
+ /* Invalidate the hpte. NOTE: this also unlocks it */
+ release_hpte_lock();
+ hptep->v = 0;
+ } else
+ native_unlock_hpte(hptep);
+ }
/*
* We need to invalidate the TLB always because hpte_remove doesn't do
* a tlb invalidate. If a hash bucket gets full, we "evict" a more/less
@@ -400,22 +582,16 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
* (hpte_remove) because we assume the old translation is still
* technically "valid".
*/
- if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
- native_unlock_hpte(hptep);
- else
- /* Invalidate the hpte. NOTE: this also unlocks it */
- hptep->v = 0;
-
- /* Invalidate the TLB */
tlbie(vpn, bpsize, apsize, ssize, local);
local_irq_restore(flags);
}
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static void native_hugepage_invalidate(unsigned long vsid,
unsigned long addr,
unsigned char *hpte_slot_array,
- int psize, int ssize)
+ int psize, int ssize, int local)
{
int i;
struct hash_pte *hptep;
@@ -447,54 +623,39 @@ static void native_hugepage_invalidate(unsigned long vsid,
hptep = htab_address + slot;
want_v = hpte_encode_avpn(vpn, psize, ssize);
- native_lock_hpte(hptep);
- hpte_v = be64_to_cpu(hptep->v);
+ hpte_v = hpte_get_old_v(hptep);
/* Even if we miss, we need to invalidate the TLB */
- if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
- native_unlock_hpte(hptep);
- else
- /* Invalidate the hpte. NOTE: this also unlocks it */
- hptep->v = 0;
+ if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) {
+ /* recheck with locks held */
+ native_lock_hpte(hptep);
+ hpte_v = hpte_get_old_v(hptep);
+
+ if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) {
+ /* Invalidate the hpte. NOTE: this also unlocks it */
+ release_hpte_lock();
+ hptep->v = 0;
+ } else
+ native_unlock_hpte(hptep);
+ }
/*
* We need to do tlb invalidate for all the address, tlbie
* instruction compares entry_VA in tlb with the VA specified
* here
*/
- tlbie(vpn, psize, actual_psize, ssize, 0);
+ tlbie(vpn, psize, actual_psize, ssize, local);
}
local_irq_restore(flags);
}
-
-static inline int __hpte_actual_psize(unsigned int lp, int psize)
+#else
+static void native_hugepage_invalidate(unsigned long vsid,
+ unsigned long addr,
+ unsigned char *hpte_slot_array,
+ int psize, int ssize, int local)
{
- int i, shift;
- unsigned int mask;
-
- /* start from 1 ignoring MMU_PAGE_4K */
- for (i = 1; i < MMU_PAGE_COUNT; i++) {
-
- /* invalid penc */
- if (mmu_psize_defs[psize].penc[i] == -1)
- continue;
- /*
- * encoding bits per actual page size
- * PTE LP actual page size
- * rrrr rrrz >=8KB
- * rrrr rrzz >=16KB
- * rrrr rzzz >=32KB
- * rrrr zzzz >=64KB
- * .......
- */
- shift = mmu_psize_defs[i].shift - LP_SHIFT;
- if (shift > LP_BITS)
- shift = LP_BITS;
- mask = (1 << shift) - 1;
- if ((lp & mask) == mmu_psize_defs[psize].penc[i])
- return i;
- }
- return -1;
+ WARN(1, "%s called without THP support\n", __func__);
}
+#endif
static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
int *psize, int *apsize, int *ssize, unsigned long *vpn)
@@ -507,20 +668,16 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
/* Look at the 8 bit LP value */
unsigned int lp = (hpte_r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ hpte_v = hpte_new_to_old_v(hpte_v, hpte_r);
+ hpte_r = hpte_new_to_old_r(hpte_r);
+ }
if (!(hpte_v & HPTE_V_LARGE)) {
size = MMU_PAGE_4K;
a_size = MMU_PAGE_4K;
} else {
- for (size = 0; size < MMU_PAGE_COUNT; size++) {
-
- /* valid entries have a shift value */
- if (!mmu_psize_defs[size].shift)
- continue;
-
- a_size = __hpte_actual_psize(lp, size);
- if (a_size != -1)
- break;
- }
+ size = hpte_page_sizes[lp] & 0xf;
+ a_size = hpte_page_sizes[lp] >> 4;
}
/* This works for all page sizes, and for 256M and 1T segments */
*ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
@@ -565,13 +722,21 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
* be when they isi), and we are the only one left. We rely on our kernel
* mapping being 0xC0's and the hardware ignoring those two real bits.
*
+ * This must be called with interrupts disabled.
+ *
+ * Taking the native_tlbie_lock is unsafe here due to the possibility of
+ * lockdep being on. On pre POWER5 hardware, not taking the lock could
+ * cause deadlock. POWER5 and newer not taking the lock is fine. This only
+ * gets called during boot before secondary CPUs have come up and during
+ * crashdump and all bets are off anyway.
+ *
* TODO: add batching support when enabled. remember, no dynamic memory here,
- * athough there is the control page available...
+ * although there is the control page available...
*/
-static void native_hpte_clear(void)
+static notrace void native_hpte_clear(void)
{
unsigned long vpn = 0;
- unsigned long slot, slots, flags;
+ unsigned long slot, slots;
struct hash_pte *hptep = htab_address;
unsigned long hpte_v;
unsigned long pteg_count;
@@ -579,13 +744,6 @@ static void native_hpte_clear(void)
pteg_count = htab_hash_mask + 1;
- local_irq_save(flags);
-
- /* we take the tlbie lock and hold it. Some hardware will
- * deadlock if we try to tlbie from two processors at once.
- */
- raw_spin_lock(&native_tlbie_lock);
-
slots = pteg_count * HPTES_PER_GROUP;
for (slot = 0; slot < slots; slot++, hptep++) {
@@ -597,19 +755,17 @@ static void native_hpte_clear(void)
hpte_v = be64_to_cpu(hptep->v);
/*
- * Call __tlbie() here rather than tlbie() since we
- * already hold the native_tlbie_lock.
+ * Call __tlbie() here rather than tlbie() since we can't take the
+ * native_tlbie_lock.
*/
if (hpte_v & HPTE_V_VALID) {
hpte_decode(hptep, slot, &psize, &apsize, &ssize, &vpn);
hptep->v = 0;
- __tlbie(vpn, psize, apsize, ssize);
+ ___tlbie(vpn, psize, apsize, ssize);
}
}
asm volatile("eieio; tlbsync; ptesync":::"memory");
- raw_spin_unlock(&native_tlbie_lock);
- local_irq_restore(flags);
}
/*
@@ -618,14 +774,14 @@ static void native_hpte_clear(void)
*/
static void native_flush_hash_range(unsigned long number, int local)
{
- unsigned long vpn;
+ unsigned long vpn = 0;
unsigned long hash, index, hidx, shift, slot;
struct hash_pte *hptep;
unsigned long hpte_v;
unsigned long want_v;
unsigned long flags;
real_pte_t pte;
- struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
+ struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch);
unsigned long psize = batch->psize;
int ssize = batch->ssize;
int i;
@@ -645,13 +801,21 @@ static void native_flush_hash_range(unsigned long number, int local)
slot += hidx & _PTEIDX_GROUP_IX;
hptep = htab_address + slot;
want_v = hpte_encode_avpn(vpn, psize, ssize);
+ hpte_v = hpte_get_old_v(hptep);
+
+ if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
+ continue;
+ /* lock and try again */
native_lock_hpte(hptep);
- hpte_v = be64_to_cpu(hptep->v);
- if (!HPTE_V_COMPARE(hpte_v, want_v) ||
- !(hpte_v & HPTE_V_VALID))
+ hpte_v = hpte_get_old_v(hptep);
+
+ if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
native_unlock_hpte(hptep);
- else
+ else {
+ release_hpte_lock();
hptep->v = 0;
+ }
+
} pte_iterate_hashed_end();
}
@@ -667,7 +831,7 @@ static void native_flush_hash_range(unsigned long number, int local)
__tlbiel(vpn, psize, psize, ssize);
} pte_iterate_hashed_end();
}
- asm volatile("ptesync":::"memory");
+ ppc_after_tlbiel_barrier();
} else {
int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
@@ -684,6 +848,10 @@ static void native_flush_hash_range(unsigned long number, int local)
__tlbie(vpn, psize, psize, ssize);
} pte_iterate_hashed_end();
}
+ /*
+ * Just do one more with the last used values.
+ */
+ fixup_tlbie_vpn(vpn, psize, psize, ssize);
asm volatile("eieio; tlbsync; ptesync":::"memory");
if (lock_tlbie)
@@ -695,12 +863,13 @@ static void native_flush_hash_range(unsigned long number, int local)
void __init hpte_init_native(void)
{
- ppc_md.hpte_invalidate = native_hpte_invalidate;
- ppc_md.hpte_updatepp = native_hpte_updatepp;
- ppc_md.hpte_updateboltedpp = native_hpte_updateboltedpp;
- ppc_md.hpte_insert = native_hpte_insert;
- ppc_md.hpte_remove = native_hpte_remove;
- ppc_md.hpte_clear_all = native_hpte_clear;
- ppc_md.flush_hash_range = native_flush_hash_range;
- ppc_md.hugepage_invalidate = native_hugepage_invalidate;
+ mmu_hash_ops.hpte_invalidate = native_hpte_invalidate;
+ mmu_hash_ops.hpte_updatepp = native_hpte_updatepp;
+ mmu_hash_ops.hpte_updateboltedpp = native_hpte_updateboltedpp;
+ mmu_hash_ops.hpte_removebolted = native_hpte_removebolted;
+ mmu_hash_ops.hpte_insert = native_hpte_insert;
+ mmu_hash_ops.hpte_remove = native_hpte_remove;
+ mmu_hash_ops.hpte_clear_all = native_hpte_clear;
+ mmu_hash_ops.flush_hash_range = native_flush_hash_range;
+ mmu_hash_ops.hugepage_invalidate = native_hugepage_invalidate;
}
diff --git a/arch/powerpc/mm/book3s64/hash_pgtable.c b/arch/powerpc/mm/book3s64/hash_pgtable.c
new file mode 100644
index 000000000000..82d31177630b
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/hash_pgtable.c
@@ -0,0 +1,563 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2005, Paul Mackerras, IBM Corporation.
+ * Copyright 2009, Benjamin Herrenschmidt, IBM Corporation.
+ * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
+ */
+
+#include <linux/sched.h>
+#include <linux/mm_types.h>
+#include <linux/mm.h>
+#include <linux/stop_machine.h>
+
+#include <asm/sections.h>
+#include <asm/mmu.h>
+#include <asm/tlb.h>
+#include <asm/firmware.h>
+
+#include <mm/mmu_decl.h>
+
+#include <trace/events/thp.h>
+
+#if H_PGTABLE_RANGE > (USER_VSID_RANGE * (TASK_SIZE_USER64 / TASK_CONTEXT_SIZE))
+#warning Limited user VSID range means pagetable space is wasted
+#endif
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+/*
+ * vmemmap is the starting address of the virtual address space where
+ * struct pages are allocated for all possible PFNs present on the system
+ * including holes and bad memory (hence sparse). These virtual struct
+ * pages are stored in sequence in this virtual address space irrespective
+ * of the fact whether the corresponding PFN is valid or not. This achieves
+ * constant relationship between address of struct page and its PFN.
+ *
+ * During boot or memory hotplug operation when a new memory section is
+ * added, physical memory allocation (including hash table bolting) will
+ * be performed for the set of struct pages which are part of the memory
+ * section. This saves memory by not allocating struct pages for PFNs
+ * which are not valid.
+ *
+ * ----------------------------------------------
+ * | PHYSICAL ALLOCATION OF VIRTUAL STRUCT PAGES|
+ * ----------------------------------------------
+ *
+ * f000000000000000 c000000000000000
+ * vmemmap +--------------+ +--------------+
+ * + | page struct | +--------------> | page struct |
+ * | +--------------+ +--------------+
+ * | | page struct | +--------------> | page struct |
+ * | +--------------+ | +--------------+
+ * | | page struct | + +------> | page struct |
+ * | +--------------+ | +--------------+
+ * | | page struct | | +--> | page struct |
+ * | +--------------+ | | +--------------+
+ * | | page struct | | |
+ * | +--------------+ | |
+ * | | page struct | | |
+ * | +--------------+ | |
+ * | | page struct | | |
+ * | +--------------+ | |
+ * | | page struct | | |
+ * | +--------------+ | |
+ * | | page struct | +-------+ |
+ * | +--------------+ |
+ * | | page struct | +-----------+
+ * | +--------------+
+ * | | page struct | No mapping
+ * | +--------------+
+ * | | page struct | No mapping
+ * v +--------------+
+ *
+ * -----------------------------------------
+ * | RELATION BETWEEN STRUCT PAGES AND PFNS|
+ * -----------------------------------------
+ *
+ * vmemmap +--------------+ +---------------+
+ * + | page struct | +-------------> | PFN |
+ * | +--------------+ +---------------+
+ * | | page struct | +-------------> | PFN |
+ * | +--------------+ +---------------+
+ * | | page struct | +-------------> | PFN |
+ * | +--------------+ +---------------+
+ * | | page struct | +-------------> | PFN |
+ * | +--------------+ +---------------+
+ * | | |
+ * | +--------------+
+ * | | |
+ * | +--------------+
+ * | | |
+ * | +--------------+ +---------------+
+ * | | page struct | +-------------> | PFN |
+ * | +--------------+ +---------------+
+ * | | |
+ * | +--------------+
+ * | | |
+ * | +--------------+ +---------------+
+ * | | page struct | +-------------> | PFN |
+ * | +--------------+ +---------------+
+ * | | page struct | +-------------> | PFN |
+ * v +--------------+ +---------------+
+ */
+/*
+ * On hash-based CPUs, the vmemmap is bolted in the hash table.
+ *
+ */
+int __meminit hash__vmemmap_create_mapping(unsigned long start,
+ unsigned long page_size,
+ unsigned long phys)
+{
+ int rc;
+
+ if ((start + page_size) >= H_VMEMMAP_END) {
+ pr_warn("Outside the supported range\n");
+ return -1;
+ }
+
+ rc = htab_bolt_mapping(start, start + page_size, phys,
+ pgprot_val(PAGE_KERNEL),
+ mmu_vmemmap_psize, mmu_kernel_ssize);
+ if (rc < 0) {
+ int rc2 = htab_remove_mapping(start, start + page_size,
+ mmu_vmemmap_psize,
+ mmu_kernel_ssize);
+ BUG_ON(rc2 && (rc2 != -ENOENT));
+ }
+ return rc;
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+void hash__vmemmap_remove_mapping(unsigned long start,
+ unsigned long page_size)
+{
+ int rc = htab_remove_mapping(start, start + page_size,
+ mmu_vmemmap_psize,
+ mmu_kernel_ssize);
+ BUG_ON((rc < 0) && (rc != -ENOENT));
+ WARN_ON(rc == -ENOENT);
+}
+#endif
+#endif /* CONFIG_SPARSEMEM_VMEMMAP */
+
+/*
+ * map_kernel_page currently only called by __ioremap
+ * map_kernel_page adds an entry to the ioremap page table
+ * and adds an entry to the HPT, possibly bolting it
+ */
+int hash__map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot)
+{
+ pgd_t *pgdp;
+ p4d_t *p4dp;
+ pud_t *pudp;
+ pmd_t *pmdp;
+ pte_t *ptep;
+
+ BUILD_BUG_ON(TASK_SIZE_USER64 > H_PGTABLE_RANGE);
+ if (slab_is_available()) {
+ pgdp = pgd_offset_k(ea);
+ p4dp = p4d_offset(pgdp, ea);
+ pudp = pud_alloc(&init_mm, p4dp, ea);
+ if (!pudp)
+ return -ENOMEM;
+ pmdp = pmd_alloc(&init_mm, pudp, ea);
+ if (!pmdp)
+ return -ENOMEM;
+ ptep = pte_alloc_kernel(pmdp, ea);
+ if (!ptep)
+ return -ENOMEM;
+ set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, prot));
+ } else {
+ /*
+ * If the mm subsystem is not fully up, we cannot create a
+ * linux page table entry for this mapping. Simply bolt an
+ * entry in the hardware page table.
+ *
+ */
+ if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, pgprot_val(prot),
+ mmu_io_psize, mmu_kernel_ssize)) {
+ printk(KERN_ERR "Failed to do bolted mapping IO "
+ "memory at %016lx !\n", pa);
+ return -ENOMEM;
+ }
+ }
+
+ smp_wmb();
+ return 0;
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+
+unsigned long hash__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, unsigned long clr,
+ unsigned long set)
+{
+ __be64 old_be, tmp;
+ unsigned long old;
+
+#ifdef CONFIG_DEBUG_VM
+ WARN_ON(!hash__pmd_trans_huge(*pmdp));
+ assert_spin_locked(pmd_lockptr(mm, pmdp));
+#endif
+
+ __asm__ __volatile__(
+ "1: ldarx %0,0,%3\n\
+ and. %1,%0,%6\n\
+ bne- 1b \n\
+ andc %1,%0,%4 \n\
+ or %1,%1,%7\n\
+ stdcx. %1,0,%3 \n\
+ bne- 1b"
+ : "=&r" (old_be), "=&r" (tmp), "=m" (*pmdp)
+ : "r" (pmdp), "r" (cpu_to_be64(clr)), "m" (*pmdp),
+ "r" (cpu_to_be64(H_PAGE_BUSY)), "r" (cpu_to_be64(set))
+ : "cc" );
+
+ old = be64_to_cpu(old_be);
+
+ trace_hugepage_update_pmd(addr, old, clr, set);
+ if (old & H_PAGE_HASHPTE)
+ hpte_do_hugepage_flush(mm, addr, pmdp, old);
+ return old;
+}
+
+pmd_t hash__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address,
+ pmd_t *pmdp)
+{
+ pmd_t pmd;
+
+ VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+ VM_BUG_ON(pmd_trans_huge(*pmdp));
+
+ pmd = *pmdp;
+ pmd_clear(pmdp);
+ /*
+ * Wait for all pending hash_page to finish. This is needed
+ * in case of subpage collapse. When we collapse normal pages
+ * to hugepage, we first clear the pmd, then invalidate all
+ * the PTE entries. The assumption here is that any low level
+ * page fault will see a none pmd and take the slow path that
+ * will wait on mmap_lock. But we could very well be in a
+ * hash_page with local ptep pointer value. Such a hash page
+ * can result in adding new HPTE entries for normal subpages.
+ * That means we could be modifying the page content as we
+ * copy them to a huge page. So wait for parallel hash_page
+ * to finish before invalidating HPTE entries. We can do this
+ * by sending an IPI to all the cpus and executing a dummy
+ * function there.
+ */
+ serialize_against_pte_lookup(vma->vm_mm);
+ /*
+ * Now invalidate the hpte entries in the range
+ * covered by pmd. This make sure we take a
+ * fault and will find the pmd as none, which will
+ * result in a major fault which takes mmap_lock and
+ * hence wait for collapse to complete. Without this
+ * the __collapse_huge_page_copy can result in copying
+ * the old content.
+ */
+ flush_hash_table_pmd_range(vma->vm_mm, &pmd, address);
+ return pmd;
+}
+
+/*
+ * We want to put the pgtable in pmd and use pgtable for tracking
+ * the base page size hptes
+ */
+void hash__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+ pgtable_t pgtable)
+{
+ pgtable_t *pgtable_slot;
+
+ assert_spin_locked(pmd_lockptr(mm, pmdp));
+ /*
+ * we store the pgtable in the second half of PMD
+ */
+ pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
+ *pgtable_slot = pgtable;
+ /*
+ * expose the deposited pgtable to other cpus.
+ * before we set the hugepage PTE at pmd level
+ * hash fault code looks at the deposted pgtable
+ * to store hash index values.
+ */
+ smp_wmb();
+}
+
+pgtable_t hash__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
+{
+ pgtable_t pgtable;
+ pgtable_t *pgtable_slot;
+
+ assert_spin_locked(pmd_lockptr(mm, pmdp));
+
+ pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
+ pgtable = *pgtable_slot;
+ /*
+ * Once we withdraw, mark the entry NULL.
+ */
+ *pgtable_slot = NULL;
+ /*
+ * We store HPTE information in the deposited PTE fragment.
+ * zero out the content on withdraw.
+ */
+ memset(pgtable, 0, PTE_FRAG_SIZE);
+ return pgtable;
+}
+
+/*
+ * A linux hugepage PMD was changed and the corresponding hash table entries
+ * neesd to be flushed.
+ */
+void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, unsigned long old_pmd)
+{
+ int ssize;
+ unsigned int psize;
+ unsigned long vsid;
+ unsigned long flags = 0;
+
+ /* get the base page size,vsid and segment size */
+#ifdef CONFIG_DEBUG_VM
+ psize = get_slice_psize(mm, addr);
+ BUG_ON(psize == MMU_PAGE_16M);
+#endif
+ if (old_pmd & H_PAGE_COMBO)
+ psize = MMU_PAGE_4K;
+ else
+ psize = MMU_PAGE_64K;
+
+ if (!is_kernel_addr(addr)) {
+ ssize = user_segment_size(addr);
+ vsid = get_user_vsid(&mm->context, addr, ssize);
+ WARN_ON(vsid == 0);
+ } else {
+ vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
+ ssize = mmu_kernel_ssize;
+ }
+
+ if (mm_is_thread_local(mm))
+ flags |= HPTE_LOCAL_UPDATE;
+
+ return flush_hash_hugepage(vsid, addr, pmdp, psize, ssize, flags);
+}
+
+pmd_t hash__pmdp_huge_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp)
+{
+ pmd_t old_pmd;
+ pgtable_t pgtable;
+ unsigned long old;
+ pgtable_t *pgtable_slot;
+
+ old = pmd_hugepage_update(mm, addr, pmdp, ~0UL, 0);
+ old_pmd = __pmd(old);
+ /*
+ * We have pmd == none and we are holding page_table_lock.
+ * So we can safely go and clear the pgtable hash
+ * index info.
+ */
+ pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
+ pgtable = *pgtable_slot;
+ /*
+ * Let's zero out old valid and hash index details
+ * hash fault look at them.
+ */
+ memset(pgtable, 0, PTE_FRAG_SIZE);
+ return old_pmd;
+}
+
+int hash__has_transparent_hugepage(void)
+{
+
+ if (!mmu_has_feature(MMU_FTR_16M_PAGE))
+ return 0;
+ /*
+ * We support THP only if PMD_SIZE is 16MB.
+ */
+ if (mmu_psize_defs[MMU_PAGE_16M].shift != PMD_SHIFT)
+ return 0;
+ /*
+ * We need to make sure that we support 16MB hugepage in a segment
+ * with base page size 64K or 4K. We only enable THP with a PAGE_SIZE
+ * of 64K.
+ */
+ /*
+ * If we have 64K HPTE, we will be using that by default
+ */
+ if (mmu_psize_defs[MMU_PAGE_64K].shift &&
+ (mmu_psize_defs[MMU_PAGE_64K].penc[MMU_PAGE_16M] == -1))
+ return 0;
+ /*
+ * Ok we only have 4K HPTE
+ */
+ if (mmu_psize_defs[MMU_PAGE_4K].penc[MMU_PAGE_16M] == -1)
+ return 0;
+
+ return 1;
+}
+EXPORT_SYMBOL_GPL(hash__has_transparent_hugepage);
+
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+#ifdef CONFIG_STRICT_KERNEL_RWX
+
+struct change_memory_parms {
+ unsigned long start, end, newpp;
+ unsigned int step, nr_cpus;
+ atomic_t master_cpu;
+ atomic_t cpu_counter;
+};
+
+// We'd rather this was on the stack but it has to be in the RMO
+static struct change_memory_parms chmem_parms;
+
+// And therefore we need a lock to protect it from concurrent use
+static DEFINE_MUTEX(chmem_lock);
+
+static void change_memory_range(unsigned long start, unsigned long end,
+ unsigned int step, unsigned long newpp)
+{
+ unsigned long idx;
+
+ pr_debug("Changing page protection on range 0x%lx-0x%lx, to 0x%lx, step 0x%x\n",
+ start, end, newpp, step);
+
+ for (idx = start; idx < end; idx += step)
+ /* Not sure if we can do much with the return value */
+ mmu_hash_ops.hpte_updateboltedpp(newpp, idx, mmu_linear_psize,
+ mmu_kernel_ssize);
+}
+
+static int notrace chmem_secondary_loop(struct change_memory_parms *parms)
+{
+ unsigned long msr, tmp, flags;
+ int *p;
+
+ p = &parms->cpu_counter.counter;
+
+ local_irq_save(flags);
+ hard_irq_disable();
+
+ asm volatile (
+ // Switch to real mode and leave interrupts off
+ "mfmsr %[msr] ;"
+ "li %[tmp], %[MSR_IR_DR] ;"
+ "andc %[tmp], %[msr], %[tmp] ;"
+ "mtmsrd %[tmp] ;"
+
+ // Tell the master we are in real mode
+ "1: "
+ "lwarx %[tmp], 0, %[p] ;"
+ "addic %[tmp], %[tmp], -1 ;"
+ "stwcx. %[tmp], 0, %[p] ;"
+ "bne- 1b ;"
+
+ // Spin until the counter goes to zero
+ "2: ;"
+ "lwz %[tmp], 0(%[p]) ;"
+ "cmpwi %[tmp], 0 ;"
+ "bne- 2b ;"
+
+ // Switch back to virtual mode
+ "mtmsrd %[msr] ;"
+
+ : // outputs
+ [msr] "=&r" (msr), [tmp] "=&b" (tmp), "+m" (*p)
+ : // inputs
+ [p] "b" (p), [MSR_IR_DR] "i" (MSR_IR | MSR_DR)
+ : // clobbers
+ "cc", "xer"
+ );
+
+ local_irq_restore(flags);
+
+ return 0;
+}
+
+static int change_memory_range_fn(void *data)
+{
+ struct change_memory_parms *parms = data;
+
+ // First CPU goes through, all others wait.
+ if (atomic_xchg(&parms->master_cpu, 1) == 1)
+ return chmem_secondary_loop(parms);
+
+ // Wait for all but one CPU (this one) to call-in
+ while (atomic_read(&parms->cpu_counter) > 1)
+ barrier();
+
+ change_memory_range(parms->start, parms->end, parms->step, parms->newpp);
+
+ mb();
+
+ // Signal the other CPUs that we're done
+ atomic_dec(&parms->cpu_counter);
+
+ return 0;
+}
+
+static bool hash__change_memory_range(unsigned long start, unsigned long end,
+ unsigned long newpp)
+{
+ unsigned int step, shift;
+
+ shift = mmu_psize_defs[mmu_linear_psize].shift;
+ step = 1 << shift;
+
+ start = ALIGN_DOWN(start, step);
+ end = ALIGN(end, step); // aligns up
+
+ if (start >= end)
+ return false;
+
+ if (firmware_has_feature(FW_FEATURE_LPAR)) {
+ mutex_lock(&chmem_lock);
+
+ chmem_parms.start = start;
+ chmem_parms.end = end;
+ chmem_parms.step = step;
+ chmem_parms.newpp = newpp;
+ atomic_set(&chmem_parms.master_cpu, 0);
+
+ cpus_read_lock();
+
+ atomic_set(&chmem_parms.cpu_counter, num_online_cpus());
+
+ // Ensure state is consistent before we call the other CPUs
+ mb();
+
+ stop_machine_cpuslocked(change_memory_range_fn, &chmem_parms,
+ cpu_online_mask);
+
+ cpus_read_unlock();
+ mutex_unlock(&chmem_lock);
+ } else
+ change_memory_range(start, end, step, newpp);
+
+ return true;
+}
+
+void hash__mark_rodata_ro(void)
+{
+ unsigned long start, end, pp;
+
+ start = (unsigned long)_stext;
+ end = (unsigned long)__end_rodata;
+
+ pp = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL_ROX), HPTE_USE_KERNEL_KEY);
+
+ WARN_ON(!hash__change_memory_range(start, end, pp));
+}
+
+void hash__mark_initmem_nx(void)
+{
+ unsigned long start, end, pp;
+
+ start = (unsigned long)__init_begin;
+ end = (unsigned long)__init_end;
+
+ pp = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL), HPTE_USE_KERNEL_KEY);
+
+ WARN_ON(!hash__change_memory_range(start, end, pp));
+}
+#endif
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/book3s64/hash_tlb.c
index d2a94b85dbc2..21fcad97ae80 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/book3s64/hash_tlb.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* This file contains the routines for flushing entries from the
* TLB and MMU hash table.
@@ -14,21 +15,17 @@
*
* Dave Engebretsen <engebret@us.ibm.com>
* Rework for PPC64 port.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/percpu.h>
#include <linux/hardirq.h>
-#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/tlb.h>
#include <asm/bug.h>
+#include <asm/pte-walk.h>
+
#include <trace/events/thp.h>
@@ -49,11 +46,12 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
unsigned int psize;
int ssize;
real_pte_t rpte;
- int i;
+ int i, offset;
i = batch->index;
- /* Get page size (maybe move back to caller).
+ /*
+ * Get page size (maybe move back to caller).
*
* NOTE: when using special 64K mappings in 4K environment like
* for SPEs, we obtain the page size from the slice, which thus
@@ -65,40 +63,45 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
psize = get_slice_psize(mm, addr);
/* Mask the address for the correct page size */
addr &= ~((1UL << mmu_psize_defs[psize].shift) - 1);
+ if (unlikely(psize == MMU_PAGE_16G))
+ offset = PTRS_PER_PUD;
+ else
+ offset = PTRS_PER_PMD;
#else
BUG();
psize = pte_pagesize_index(mm, addr, pte); /* shutup gcc */
#endif
} else {
psize = pte_pagesize_index(mm, addr, pte);
- /* Mask the address for the standard page size. If we
+ /*
+ * Mask the address for the standard page size. If we
* have a 64k page kernel, but the hardware does not
* support 64k pages, this might be different from the
- * hardware page size encoded in the slice table. */
+ * hardware page size encoded in the slice table.
+ */
addr &= PAGE_MASK;
+ offset = PTRS_PER_PTE;
}
/* Build full vaddr */
if (!is_kernel_addr(addr)) {
ssize = user_segment_size(addr);
- vsid = get_vsid(mm->context.id, addr, ssize);
+ vsid = get_user_vsid(&mm->context, addr, ssize);
} else {
vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
ssize = mmu_kernel_ssize;
}
WARN_ON(vsid == 0);
vpn = hpt_vpn(addr, vsid, ssize);
- rpte = __real_pte(__pte(pte), ptep);
+ rpte = __real_pte(__pte(pte), ptep, offset);
/*
* Check if we have an active batch on this CPU. If not, just
- * flush now and return. For now, we don global invalidates
- * in that case, might be worth testing the mm cpu mask though
- * and decide to use local invalidates instead...
+ * flush now and return.
*/
if (!batch->active) {
- flush_hash_page(vpn, rpte, psize, ssize, 0);
+ flush_hash_page(vpn, rpte, psize, ssize, mm_is_thread_local(mm));
put_cpu_var(ppc64_tlb_batch);
return;
}
@@ -140,13 +143,10 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
*/
void __flush_tlb_pending(struct ppc64_tlb_batch *batch)
{
- const struct cpumask *tmp;
- int i, local = 0;
+ int i, local;
i = batch->index;
- tmp = cpumask_of(smp_processor_id());
- if (cpumask_equal(mm_cpumask(batch->mm), tmp))
- local = 1;
+ local = mm_is_thread_local(batch->mm);
if (i == 1)
flush_hash_page(batch->vpn[0], batch->pte[0],
batch->psize, batch->ssize, local);
@@ -155,11 +155,12 @@ void __flush_tlb_pending(struct ppc64_tlb_batch *batch)
batch->index = 0;
}
-void tlb_flush(struct mmu_gather *tlb)
+void hash__tlb_flush(struct mmu_gather *tlb)
{
struct ppc64_tlb_batch *tlbbatch = &get_cpu_var(ppc64_tlb_batch);
- /* If there's a TLB batch pending, then we must flush it because the
+ /*
+ * If there's a TLB batch pending, then we must flush it because the
* pages are going to be freed and we really don't want to have a CPU
* access a freed page because it has a stale TLB
*/
@@ -174,7 +175,6 @@ void tlb_flush(struct mmu_gather *tlb)
* from the hash table (and the TLB). But keeps
* the linux PTEs intact.
*
- * @mm : mm_struct of the target address space (generally init_mm)
* @start : starting address
* @end : ending address (not included in the flush)
*
@@ -187,18 +187,17 @@ void tlb_flush(struct mmu_gather *tlb)
* Because of that usage pattern, it is implemented for small size rather
* than speed.
*/
-void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
- unsigned long end)
+void __flush_hash_table_range(unsigned long start, unsigned long end)
{
int hugepage_shift;
unsigned long flags;
- start = _ALIGN_DOWN(start, PAGE_SIZE);
- end = _ALIGN_UP(end, PAGE_SIZE);
+ start = ALIGN_DOWN(start, PAGE_SIZE);
+ end = ALIGN(end, PAGE_SIZE);
- BUG_ON(!mm->pgd);
- /* Note: Normally, we should only ever use a batch within a
+ /*
+ * Note: Normally, we should only ever use a batch within a
* PTE locked section. This violates the rule, but will work
* since we don't actually modify the PTEs, we just flush the
* hash while leaving the PTEs intact (including their reference
@@ -208,34 +207,29 @@ void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
local_irq_save(flags);
arch_enter_lazy_mmu_mode();
for (; start < end; start += PAGE_SIZE) {
- pte_t *ptep = find_linux_pte_or_hugepte(mm->pgd, start,
- &hugepage_shift);
+ pte_t *ptep = find_init_mm_pte(start, &hugepage_shift);
unsigned long pte;
if (ptep == NULL)
continue;
pte = pte_val(*ptep);
- if (hugepage_shift)
- trace_hugepage_invalidate(start, pte_val(pte));
- if (!(pte & _PAGE_HASHPTE))
+ if (!(pte & H_PAGE_HASHPTE))
continue;
- if (unlikely(hugepage_shift && pmd_trans_huge(*(pmd_t *)pte)))
- hpte_do_hugepage_flush(mm, start, (pmd_t *)ptep, pte);
- else
- hpte_need_flush(mm, start, ptep, pte, 0);
+ hpte_need_flush(&init_mm, start, ptep, pte, hugepage_shift);
}
arch_leave_lazy_mmu_mode();
local_irq_restore(flags);
}
-void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr)
+void flush_hash_table_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr)
{
pte_t *pte;
pte_t *start_pte;
unsigned long flags;
- addr = _ALIGN_DOWN(addr, PMD_SIZE);
- /* Note: Normally, we should only ever use a batch within a
+ addr = ALIGN_DOWN(addr, PMD_SIZE);
+ /*
+ * Note: Normally, we should only ever use a batch within a
* PTE locked section. This violates the rule, but will work
* since we don't actually modify the PTEs, we just flush the
* hash while leaving the PTEs intact (including their reference
@@ -245,12 +239,16 @@ void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr)
local_irq_save(flags);
arch_enter_lazy_mmu_mode();
start_pte = pte_offset_map(pmd, addr);
+ if (!start_pte)
+ goto out;
for (pte = start_pte; pte < start_pte + PTRS_PER_PTE; pte++) {
unsigned long pteval = pte_val(*pte);
- if (pteval & _PAGE_HASHPTE)
+ if (pteval & H_PAGE_HASHPTE)
hpte_need_flush(mm, addr, pte, pteval, 0);
addr += PAGE_SIZE;
}
+ pte_unmap(start_pte);
+out:
arch_leave_lazy_mmu_mode();
local_irq_restore(flags);
}
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
new file mode 100644
index 000000000000..3aee3af614af
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -0,0 +1,2465 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerPC64 port by Mike Corrigan and Dave Engebretsen
+ * {mikejc|engebret}@us.ibm.com
+ *
+ * Copyright (c) 2000 Mike Corrigan <mikejc@us.ibm.com>
+ *
+ * SMP scalability work:
+ * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
+ *
+ * Module name: htab.c
+ *
+ * Description:
+ * PowerPC Hashed Page Table functions
+ */
+
+#undef DEBUG
+#undef DEBUG_LOW
+
+#define pr_fmt(fmt) "hash-mmu: " fmt
+#include <linux/spinlock.h>
+#include <linux/errno.h>
+#include <linux/sched/mm.h>
+#include <linux/proc_fs.h>
+#include <linux/stat.h>
+#include <linux/sysctl.h>
+#include <linux/export.h>
+#include <linux/ctype.h>
+#include <linux/cache.h>
+#include <linux/init.h>
+#include <linux/signal.h>
+#include <linux/memblock.h>
+#include <linux/context_tracking.h>
+#include <linux/libfdt.h>
+#include <linux/pkeys.h>
+#include <linux/hugetlb.h>
+#include <linux/cpu.h>
+#include <linux/pgtable.h>
+#include <linux/debugfs.h>
+#include <linux/random.h>
+#include <linux/elf-randomize.h>
+#include <linux/of_fdt.h>
+#include <linux/kfence.h>
+
+#include <asm/interrupt.h>
+#include <asm/processor.h>
+#include <asm/mmu.h>
+#include <asm/mmu_context.h>
+#include <asm/page.h>
+#include <asm/types.h>
+#include <linux/uaccess.h>
+#include <asm/machdep.h>
+#include <asm/io.h>
+#include <asm/eeh.h>
+#include <asm/tlb.h>
+#include <asm/cacheflush.h>
+#include <asm/cputable.h>
+#include <asm/sections.h>
+#include <asm/spu.h>
+#include <asm/udbg.h>
+#include <asm/text-patching.h>
+#include <asm/fadump.h>
+#include <asm/firmware.h>
+#include <asm/tm.h>
+#include <asm/trace.h>
+#include <asm/ps3.h>
+#include <asm/pte-walk.h>
+#include <asm/asm-prototypes.h>
+#include <asm/ultravisor.h>
+#include <asm/kfence.h>
+
+#include <mm/mmu_decl.h>
+
+#include "internal.h"
+
+
+#ifdef DEBUG
+#define DBG(fmt...) udbg_printf(fmt)
+#else
+#define DBG(fmt...)
+#endif
+
+#ifdef DEBUG_LOW
+#define DBG_LOW(fmt...) udbg_printf(fmt)
+#else
+#define DBG_LOW(fmt...)
+#endif
+
+#define KB (1024)
+#define MB (1024*KB)
+#define GB (1024L*MB)
+
+/*
+ * Note: pte --> Linux PTE
+ * HPTE --> PowerPC Hashed Page Table Entry
+ *
+ * Execution context:
+ * htab_initialize is called with the MMU off (of course), but
+ * the kernel has been copied down to zero so it can directly
+ * reference global data. At this point it is very difficult
+ * to print debug info.
+ *
+ */
+
+static unsigned long _SDR1;
+
+u8 hpte_page_sizes[1 << LP_BITS];
+EXPORT_SYMBOL_GPL(hpte_page_sizes);
+
+struct hash_pte *htab_address;
+unsigned long htab_size_bytes;
+unsigned long htab_hash_mask;
+EXPORT_SYMBOL_GPL(htab_hash_mask);
+int mmu_linear_psize = MMU_PAGE_4K;
+EXPORT_SYMBOL_GPL(mmu_linear_psize);
+int mmu_virtual_psize = MMU_PAGE_4K;
+int mmu_vmalloc_psize = MMU_PAGE_4K;
+EXPORT_SYMBOL_GPL(mmu_vmalloc_psize);
+int mmu_io_psize = MMU_PAGE_4K;
+int mmu_kernel_ssize = MMU_SEGSIZE_256M;
+EXPORT_SYMBOL_GPL(mmu_kernel_ssize);
+int mmu_highuser_ssize = MMU_SEGSIZE_256M;
+u16 mmu_slb_size = 64;
+EXPORT_SYMBOL_GPL(mmu_slb_size);
+#ifdef CONFIG_PPC_64K_PAGES
+int mmu_ci_restrictions;
+#endif
+struct mmu_hash_ops mmu_hash_ops __ro_after_init;
+EXPORT_SYMBOL(mmu_hash_ops);
+
+/*
+ * These are definitions of page sizes arrays to be used when none
+ * is provided by the firmware.
+ */
+
+/*
+ * Fallback (4k pages only)
+ */
+static struct mmu_psize_def mmu_psize_defaults[] = {
+ [MMU_PAGE_4K] = {
+ .shift = 12,
+ .sllp = 0,
+ .penc = {[MMU_PAGE_4K] = 0, [1 ... MMU_PAGE_COUNT - 1] = -1},
+ .avpnm = 0,
+ .tlbiel = 0,
+ },
+};
+
+/*
+ * POWER4, GPUL, POWER5
+ *
+ * Support for 16Mb large pages
+ */
+static struct mmu_psize_def mmu_psize_defaults_gp[] = {
+ [MMU_PAGE_4K] = {
+ .shift = 12,
+ .sllp = 0,
+ .penc = {[MMU_PAGE_4K] = 0, [1 ... MMU_PAGE_COUNT - 1] = -1},
+ .avpnm = 0,
+ .tlbiel = 1,
+ },
+ [MMU_PAGE_16M] = {
+ .shift = 24,
+ .sllp = SLB_VSID_L,
+ .penc = {[0 ... MMU_PAGE_16M - 1] = -1, [MMU_PAGE_16M] = 0,
+ [MMU_PAGE_16M + 1 ... MMU_PAGE_COUNT - 1] = -1 },
+ .avpnm = 0x1UL,
+ .tlbiel = 0,
+ },
+};
+
+static inline void tlbiel_hash_set_isa206(unsigned int set, unsigned int is)
+{
+ unsigned long rb;
+
+ rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
+
+ asm volatile("tlbiel %0" : : "r" (rb));
+}
+
+/*
+ * tlbiel instruction for hash, set invalidation
+ * i.e., r=1 and is=01 or is=10 or is=11
+ */
+static __always_inline void tlbiel_hash_set_isa300(unsigned int set, unsigned int is,
+ unsigned int pid,
+ unsigned int ric, unsigned int prs)
+{
+ unsigned long rb;
+ unsigned long rs;
+ unsigned int r = 0; /* hash format */
+
+ rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
+ rs = ((unsigned long)pid << PPC_BITLSHIFT(31));
+
+ asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4)
+ : : "r"(rb), "r"(rs), "i"(ric), "i"(prs), "i"(r)
+ : "memory");
+}
+
+
+static void tlbiel_all_isa206(unsigned int num_sets, unsigned int is)
+{
+ unsigned int set;
+
+ asm volatile("ptesync": : :"memory");
+
+ for (set = 0; set < num_sets; set++)
+ tlbiel_hash_set_isa206(set, is);
+
+ ppc_after_tlbiel_barrier();
+}
+
+static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
+{
+ unsigned int set;
+
+ asm volatile("ptesync": : :"memory");
+
+ /*
+ * Flush the partition table cache if this is HV mode.
+ */
+ if (early_cpu_has_feature(CPU_FTR_HVMODE))
+ tlbiel_hash_set_isa300(0, is, 0, 2, 0);
+
+ /*
+ * Now invalidate the process table cache. UPRT=0 HPT modes (what
+ * current hardware implements) do not use the process table, but
+ * add the flushes anyway.
+ *
+ * From ISA v3.0B p. 1078:
+ * The following forms are invalid.
+ * * PRS=1, R=0, and RIC!=2 (The only process-scoped
+ * HPT caching is of the Process Table.)
+ */
+ tlbiel_hash_set_isa300(0, is, 0, 2, 1);
+
+ /*
+ * Then flush the sets of the TLB proper. Hash mode uses
+ * partition scoped TLB translations, which may be flushed
+ * in !HV mode.
+ */
+ for (set = 0; set < num_sets; set++)
+ tlbiel_hash_set_isa300(set, is, 0, 0, 0);
+
+ ppc_after_tlbiel_barrier();
+
+ asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory");
+}
+
+void hash__tlbiel_all(unsigned int action)
+{
+ unsigned int is;
+
+ switch (action) {
+ case TLB_INVAL_SCOPE_GLOBAL:
+ is = 3;
+ break;
+ case TLB_INVAL_SCOPE_LPID:
+ is = 2;
+ break;
+ default:
+ BUG();
+ }
+
+ if (early_cpu_has_feature(CPU_FTR_ARCH_300))
+ tlbiel_all_isa300(POWER9_TLB_SETS_HASH, is);
+ else if (early_cpu_has_feature(CPU_FTR_ARCH_207S))
+ tlbiel_all_isa206(POWER8_TLB_SETS, is);
+ else if (early_cpu_has_feature(CPU_FTR_ARCH_206))
+ tlbiel_all_isa206(POWER7_TLB_SETS, is);
+ else
+ WARN(1, "%s called on pre-POWER7 CPU\n", __func__);
+}
+
+#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE)
+static void kernel_map_linear_page(unsigned long vaddr, unsigned long idx,
+ u8 *slots, raw_spinlock_t *lock)
+{
+ unsigned long hash;
+ unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize);
+ unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize);
+ unsigned long mode = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL), HPTE_USE_KERNEL_KEY);
+ long ret;
+
+ hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize);
+
+ /* Don't create HPTE entries for bad address */
+ if (!vsid)
+ return;
+
+ if (slots[idx] & 0x80)
+ return;
+
+ ret = hpte_insert_repeating(hash, vpn, __pa(vaddr), mode,
+ HPTE_V_BOLTED,
+ mmu_linear_psize, mmu_kernel_ssize);
+
+ BUG_ON (ret < 0);
+ raw_spin_lock(lock);
+ BUG_ON(slots[idx] & 0x80);
+ slots[idx] = ret | 0x80;
+ raw_spin_unlock(lock);
+}
+
+static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long idx,
+ u8 *slots, raw_spinlock_t *lock)
+{
+ unsigned long hash, hslot, slot;
+ unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize);
+ unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize);
+
+ hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize);
+ raw_spin_lock(lock);
+ if (!(slots[idx] & 0x80)) {
+ raw_spin_unlock(lock);
+ return;
+ }
+ hslot = slots[idx] & 0x7f;
+ slots[idx] = 0;
+ raw_spin_unlock(lock);
+ if (hslot & _PTEIDX_SECONDARY)
+ hash = ~hash;
+ slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+ slot += hslot & _PTEIDX_GROUP_IX;
+ mmu_hash_ops.hpte_invalidate(slot, vpn, mmu_linear_psize,
+ mmu_linear_psize,
+ mmu_kernel_ssize, 0);
+}
+#endif
+
+static inline bool hash_supports_debug_pagealloc(void)
+{
+ unsigned long max_hash_count = ppc64_rma_size / 4;
+ unsigned long linear_map_count = memblock_end_of_DRAM() >> PAGE_SHIFT;
+
+ if (!debug_pagealloc_enabled() || linear_map_count > max_hash_count)
+ return false;
+ return true;
+}
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+static u8 *linear_map_hash_slots;
+static unsigned long linear_map_hash_count;
+static DEFINE_RAW_SPINLOCK(linear_map_hash_lock);
+static __init void hash_debug_pagealloc_alloc_slots(void)
+{
+ if (!hash_supports_debug_pagealloc())
+ return;
+
+ linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT;
+ linear_map_hash_slots = memblock_alloc_try_nid(
+ linear_map_hash_count, 1, MEMBLOCK_LOW_LIMIT,
+ ppc64_rma_size, NUMA_NO_NODE);
+ if (!linear_map_hash_slots)
+ panic("%s: Failed to allocate %lu bytes max_addr=%pa\n",
+ __func__, linear_map_hash_count, &ppc64_rma_size);
+}
+
+static inline void hash_debug_pagealloc_add_slot(phys_addr_t paddr,
+ int slot)
+{
+ if (!debug_pagealloc_enabled() || !linear_map_hash_count)
+ return;
+ if ((paddr >> PAGE_SHIFT) < linear_map_hash_count)
+ linear_map_hash_slots[paddr >> PAGE_SHIFT] = slot | 0x80;
+}
+
+static int hash_debug_pagealloc_map_pages(struct page *page, int numpages,
+ int enable)
+{
+ unsigned long flags, vaddr, lmi;
+ int i;
+
+ if (!debug_pagealloc_enabled() || !linear_map_hash_count)
+ return 0;
+
+ local_irq_save(flags);
+ for (i = 0; i < numpages; i++, page++) {
+ vaddr = (unsigned long)page_address(page);
+ lmi = __pa(vaddr) >> PAGE_SHIFT;
+ if (lmi >= linear_map_hash_count)
+ continue;
+ if (enable)
+ kernel_map_linear_page(vaddr, lmi,
+ linear_map_hash_slots, &linear_map_hash_lock);
+ else
+ kernel_unmap_linear_page(vaddr, lmi,
+ linear_map_hash_slots, &linear_map_hash_lock);
+ }
+ local_irq_restore(flags);
+ return 0;
+}
+
+#else /* CONFIG_DEBUG_PAGEALLOC */
+static inline void hash_debug_pagealloc_alloc_slots(void) {}
+static inline void hash_debug_pagealloc_add_slot(phys_addr_t paddr, int slot) {}
+static int __maybe_unused
+hash_debug_pagealloc_map_pages(struct page *page, int numpages, int enable)
+{
+ return 0;
+}
+#endif /* CONFIG_DEBUG_PAGEALLOC */
+
+#ifdef CONFIG_KFENCE
+static u8 *linear_map_kf_hash_slots;
+static unsigned long linear_map_kf_hash_count;
+static DEFINE_RAW_SPINLOCK(linear_map_kf_hash_lock);
+
+static phys_addr_t kfence_pool;
+
+static __init void hash_kfence_alloc_pool(void)
+{
+ if (!kfence_early_init_enabled())
+ goto err;
+
+ /* allocate linear map for kfence within RMA region */
+ linear_map_kf_hash_count = KFENCE_POOL_SIZE >> PAGE_SHIFT;
+ linear_map_kf_hash_slots = memblock_alloc_try_nid(
+ linear_map_kf_hash_count, 1,
+ MEMBLOCK_LOW_LIMIT, ppc64_rma_size,
+ NUMA_NO_NODE);
+ if (!linear_map_kf_hash_slots) {
+ pr_err("%s: memblock for linear map (%lu) failed\n", __func__,
+ linear_map_kf_hash_count);
+ goto err;
+ }
+
+ /* allocate kfence pool early */
+ kfence_pool = memblock_phys_alloc_range(KFENCE_POOL_SIZE, PAGE_SIZE,
+ MEMBLOCK_LOW_LIMIT, MEMBLOCK_ALLOC_ANYWHERE);
+ if (!kfence_pool) {
+ pr_err("%s: memblock for kfence pool (%lu) failed\n", __func__,
+ KFENCE_POOL_SIZE);
+ memblock_free(linear_map_kf_hash_slots,
+ linear_map_kf_hash_count);
+ linear_map_kf_hash_count = 0;
+ goto err;
+ }
+ memblock_mark_nomap(kfence_pool, KFENCE_POOL_SIZE);
+
+ return;
+err:
+ pr_info("Disabling kfence\n");
+ disable_kfence();
+}
+
+static __init void hash_kfence_map_pool(void)
+{
+ unsigned long kfence_pool_start, kfence_pool_end;
+ unsigned long prot = pgprot_val(PAGE_KERNEL);
+
+ if (!kfence_pool)
+ return;
+
+ kfence_pool_start = (unsigned long) __va(kfence_pool);
+ kfence_pool_end = kfence_pool_start + KFENCE_POOL_SIZE;
+ __kfence_pool = (char *) kfence_pool_start;
+ BUG_ON(htab_bolt_mapping(kfence_pool_start, kfence_pool_end,
+ kfence_pool, prot, mmu_linear_psize,
+ mmu_kernel_ssize));
+ memblock_clear_nomap(kfence_pool, KFENCE_POOL_SIZE);
+}
+
+static inline void hash_kfence_add_slot(phys_addr_t paddr, int slot)
+{
+ unsigned long vaddr = (unsigned long) __va(paddr);
+ unsigned long lmi = (vaddr - (unsigned long)__kfence_pool)
+ >> PAGE_SHIFT;
+
+ if (!kfence_pool)
+ return;
+ BUG_ON(!is_kfence_address((void *)vaddr));
+ BUG_ON(lmi >= linear_map_kf_hash_count);
+ linear_map_kf_hash_slots[lmi] = slot | 0x80;
+}
+
+static int hash_kfence_map_pages(struct page *page, int numpages, int enable)
+{
+ unsigned long flags, vaddr, lmi;
+ int i;
+
+ WARN_ON_ONCE(!linear_map_kf_hash_count);
+ local_irq_save(flags);
+ for (i = 0; i < numpages; i++, page++) {
+ vaddr = (unsigned long)page_address(page);
+ lmi = (vaddr - (unsigned long)__kfence_pool) >> PAGE_SHIFT;
+
+ /* Ideally this should never happen */
+ if (lmi >= linear_map_kf_hash_count) {
+ WARN_ON_ONCE(1);
+ continue;
+ }
+
+ if (enable)
+ kernel_map_linear_page(vaddr, lmi,
+ linear_map_kf_hash_slots,
+ &linear_map_kf_hash_lock);
+ else
+ kernel_unmap_linear_page(vaddr, lmi,
+ linear_map_kf_hash_slots,
+ &linear_map_kf_hash_lock);
+ }
+ local_irq_restore(flags);
+ return 0;
+}
+#else
+static inline void hash_kfence_alloc_pool(void) {}
+static inline void hash_kfence_map_pool(void) {}
+static inline void hash_kfence_add_slot(phys_addr_t paddr, int slot) {}
+static int __maybe_unused
+hash_kfence_map_pages(struct page *page, int numpages, int enable)
+{
+ return 0;
+}
+#endif
+
+#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE)
+int hash__kernel_map_pages(struct page *page, int numpages, int enable)
+{
+ void *vaddr = page_address(page);
+
+ if (is_kfence_address(vaddr))
+ return hash_kfence_map_pages(page, numpages, enable);
+ else
+ return hash_debug_pagealloc_map_pages(page, numpages, enable);
+}
+
+static void hash_linear_map_add_slot(phys_addr_t paddr, int slot)
+{
+ if (is_kfence_address(__va(paddr)))
+ hash_kfence_add_slot(paddr, slot);
+ else
+ hash_debug_pagealloc_add_slot(paddr, slot);
+}
+#else
+static void hash_linear_map_add_slot(phys_addr_t paddr, int slot) {}
+#endif
+
+/*
+ * 'R' and 'C' update notes:
+ * - Under pHyp or KVM, the updatepp path will not set C, thus it *will*
+ * create writeable HPTEs without C set, because the hcall H_PROTECT
+ * that we use in that case will not update C
+ * - The above is however not a problem, because we also don't do that
+ * fancy "no flush" variant of eviction and we use H_REMOVE which will
+ * do the right thing and thus we don't have the race I described earlier
+ *
+ * - Under bare metal, we do have the race, so we need R and C set
+ * - We make sure R is always set and never lost
+ * - C is _PAGE_DIRTY, and *should* always be set for a writeable mapping
+ */
+unsigned long htab_convert_pte_flags(unsigned long pteflags, unsigned long flags)
+{
+ unsigned long rflags = 0;
+
+ /* _PAGE_EXEC -> NOEXEC */
+ if ((pteflags & _PAGE_EXEC) == 0)
+ rflags |= HPTE_R_N;
+ /*
+ * PPP bits:
+ * Linux uses slb key 0 for kernel and 1 for user.
+ * kernel RW areas are mapped with PPP=0b000
+ * User area is mapped with PPP=0b010 for read/write
+ * or PPP=0b011 for read-only (including writeable but clean pages).
+ */
+ if (pteflags & _PAGE_PRIVILEGED) {
+ /*
+ * Kernel read only mapped with ppp bits 0b110
+ */
+ if (!(pteflags & _PAGE_WRITE)) {
+ if (mmu_has_feature(MMU_FTR_KERNEL_RO))
+ rflags |= (HPTE_R_PP0 | 0x2);
+ else
+ rflags |= 0x3;
+ }
+ VM_WARN_ONCE(!(pteflags & _PAGE_RWX), "no-access mapping request");
+ } else {
+ if (pteflags & _PAGE_RWX)
+ rflags |= 0x2;
+ /*
+ * We should never hit this in normal fault handling because
+ * a permission check (check_pte_access()) will bubble this
+ * to higher level linux handler even for PAGE_NONE.
+ */
+ VM_WARN_ONCE(!(pteflags & _PAGE_RWX), "no-access mapping request");
+ if (!((pteflags & _PAGE_WRITE) && (pteflags & _PAGE_DIRTY)))
+ rflags |= 0x1;
+ }
+ /*
+ * We can't allow hardware to update hpte bits. Hence always
+ * set 'R' bit and set 'C' if it is a write fault
+ */
+ rflags |= HPTE_R_R;
+
+ if (pteflags & _PAGE_DIRTY)
+ rflags |= HPTE_R_C;
+ /*
+ * Add in WIG bits
+ */
+
+ if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_TOLERANT)
+ rflags |= HPTE_R_I;
+ else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_NON_IDEMPOTENT)
+ rflags |= (HPTE_R_I | HPTE_R_G);
+ else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_SAO)
+ rflags |= (HPTE_R_W | HPTE_R_I | HPTE_R_M);
+ else
+ /*
+ * Add memory coherence if cache inhibited is not set
+ */
+ rflags |= HPTE_R_M;
+
+ rflags |= pte_to_hpte_pkey_bits(pteflags, flags);
+ return rflags;
+}
+
+int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
+ unsigned long pstart, unsigned long prot,
+ int psize, int ssize)
+{
+ unsigned long vaddr, paddr;
+ unsigned int step, shift;
+ int ret = 0;
+
+ shift = mmu_psize_defs[psize].shift;
+ step = 1 << shift;
+
+ prot = htab_convert_pte_flags(prot, HPTE_USE_KERNEL_KEY);
+
+ DBG("htab_bolt_mapping(%lx..%lx -> %lx (%lx,%d,%d)\n",
+ vstart, vend, pstart, prot, psize, ssize);
+
+ /* Carefully map only the possible range */
+ vaddr = ALIGN(vstart, step);
+ paddr = ALIGN(pstart, step);
+ vend = ALIGN_DOWN(vend, step);
+
+ for (; vaddr < vend; vaddr += step, paddr += step) {
+ unsigned long hash, hpteg;
+ unsigned long vsid = get_kernel_vsid(vaddr, ssize);
+ unsigned long vpn = hpt_vpn(vaddr, vsid, ssize);
+ unsigned long tprot = prot;
+ bool secondary_hash = false;
+
+ /*
+ * If we hit a bad address return error.
+ */
+ if (!vsid)
+ return -1;
+ /* Make kernel text executable */
+ if (overlaps_kernel_text(vaddr, vaddr + step))
+ tprot &= ~HPTE_R_N;
+
+ /*
+ * If relocatable, check if it overlaps interrupt vectors that
+ * are copied down to real 0. For relocatable kernel
+ * (e.g. kdump case) we copy interrupt vectors down to real
+ * address 0. Mark that region as executable. This is
+ * because on p8 system with relocation on exception feature
+ * enabled, exceptions are raised with MMU (IR=DR=1) ON. Hence
+ * in order to execute the interrupt handlers in virtual
+ * mode the vector region need to be marked as executable.
+ */
+ if ((PHYSICAL_START > MEMORY_START) &&
+ overlaps_interrupt_vector_text(vaddr, vaddr + step))
+ tprot &= ~HPTE_R_N;
+
+ hash = hpt_hash(vpn, shift, ssize);
+ hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
+
+ BUG_ON(!mmu_hash_ops.hpte_insert);
+repeat:
+ ret = mmu_hash_ops.hpte_insert(hpteg, vpn, paddr, tprot,
+ HPTE_V_BOLTED, psize, psize,
+ ssize);
+ if (ret == -1) {
+ /*
+ * Try to keep bolted entries in primary.
+ * Remove non bolted entries and try insert again
+ */
+ ret = mmu_hash_ops.hpte_remove(hpteg);
+ if (ret != -1)
+ ret = mmu_hash_ops.hpte_insert(hpteg, vpn, paddr, tprot,
+ HPTE_V_BOLTED, psize, psize,
+ ssize);
+ if (ret == -1 && !secondary_hash) {
+ secondary_hash = true;
+ hpteg = ((~hash & htab_hash_mask) * HPTES_PER_GROUP);
+ goto repeat;
+ }
+ }
+
+ if (ret < 0)
+ break;
+
+ cond_resched();
+ /* add slot info in debug_pagealloc / kfence linear map */
+ hash_linear_map_add_slot(paddr, ret);
+ }
+ return ret < 0 ? ret : 0;
+}
+
+int htab_remove_mapping(unsigned long vstart, unsigned long vend,
+ int psize, int ssize)
+{
+ unsigned long vaddr, time_limit;
+ unsigned int step, shift;
+ int rc;
+ int ret = 0;
+
+ shift = mmu_psize_defs[psize].shift;
+ step = 1 << shift;
+
+ if (!mmu_hash_ops.hpte_removebolted)
+ return -ENODEV;
+
+ /* Unmap the full range specificied */
+ vaddr = ALIGN_DOWN(vstart, step);
+ time_limit = jiffies + HZ;
+
+ for (;vaddr < vend; vaddr += step) {
+ rc = mmu_hash_ops.hpte_removebolted(vaddr, psize, ssize);
+
+ /*
+ * For large number of mappings introduce a cond_resched()
+ * to prevent softlockup warnings.
+ */
+ if (time_after(jiffies, time_limit)) {
+ cond_resched();
+ time_limit = jiffies + HZ;
+ }
+ if (rc == -ENOENT) {
+ ret = -ENOENT;
+ continue;
+ }
+ if (rc < 0)
+ return rc;
+ }
+
+ return ret;
+}
+
+static bool disable_1tb_segments __ro_after_init;
+
+static int __init parse_disable_1tb_segments(char *p)
+{
+ disable_1tb_segments = true;
+ return 0;
+}
+early_param("disable_1tb_segments", parse_disable_1tb_segments);
+
+bool stress_hpt_enabled __initdata;
+
+static int __init parse_stress_hpt(char *p)
+{
+ stress_hpt_enabled = true;
+ return 0;
+}
+early_param("stress_hpt", parse_stress_hpt);
+
+__ro_after_init DEFINE_STATIC_KEY_FALSE(stress_hpt_key);
+
+/*
+ * per-CPU array allocated if we enable stress_hpt.
+ */
+#define STRESS_MAX_GROUPS 16
+struct stress_hpt_struct {
+ unsigned long last_group[STRESS_MAX_GROUPS];
+};
+
+static inline int stress_nr_groups(void)
+{
+ /*
+ * LPAR H_REMOVE flushes TLB, so need some number > 1 of entries
+ * to allow practical forward progress. Bare metal returns 1, which
+ * seems to help uncover more bugs.
+ */
+ if (firmware_has_feature(FW_FEATURE_LPAR))
+ return STRESS_MAX_GROUPS;
+ else
+ return 1;
+}
+
+static struct stress_hpt_struct *stress_hpt_struct;
+
+static int __init htab_dt_scan_seg_sizes(unsigned long node,
+ const char *uname, int depth,
+ void *data)
+{
+ const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+ const __be32 *prop;
+ int size = 0;
+
+ /* We are scanning "cpu" nodes only */
+ if (type == NULL || strcmp(type, "cpu") != 0)
+ return 0;
+
+ prop = of_get_flat_dt_prop(node, "ibm,processor-segment-sizes", &size);
+ if (prop == NULL)
+ return 0;
+ for (; size >= 4; size -= 4, ++prop) {
+ if (be32_to_cpu(prop[0]) == 40) {
+ DBG("1T segment support detected\n");
+
+ if (disable_1tb_segments) {
+ DBG("1T segments disabled by command line\n");
+ break;
+ }
+
+ cur_cpu_spec->mmu_features |= MMU_FTR_1T_SEGMENT;
+ return 1;
+ }
+ }
+ cur_cpu_spec->mmu_features &= ~MMU_FTR_NO_SLBIE_B;
+ return 0;
+}
+
+static int __init get_idx_from_shift(unsigned int shift)
+{
+ int idx = -1;
+
+ switch (shift) {
+ case 0xc:
+ idx = MMU_PAGE_4K;
+ break;
+ case 0x10:
+ idx = MMU_PAGE_64K;
+ break;
+ case 0x14:
+ idx = MMU_PAGE_1M;
+ break;
+ case 0x18:
+ idx = MMU_PAGE_16M;
+ break;
+ case 0x22:
+ idx = MMU_PAGE_16G;
+ break;
+ }
+ return idx;
+}
+
+static int __init htab_dt_scan_page_sizes(unsigned long node,
+ const char *uname, int depth,
+ void *data)
+{
+ const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+ const __be32 *prop;
+ int size = 0;
+
+ /* We are scanning "cpu" nodes only */
+ if (type == NULL || strcmp(type, "cpu") != 0)
+ return 0;
+
+ prop = of_get_flat_dt_prop(node, "ibm,segment-page-sizes", &size);
+ if (!prop)
+ return 0;
+
+ pr_info("Page sizes from device-tree:\n");
+ size /= 4;
+ cur_cpu_spec->mmu_features &= ~(MMU_FTR_16M_PAGE);
+ while(size > 0) {
+ unsigned int base_shift = be32_to_cpu(prop[0]);
+ unsigned int slbenc = be32_to_cpu(prop[1]);
+ unsigned int lpnum = be32_to_cpu(prop[2]);
+ struct mmu_psize_def *def;
+ int idx, base_idx;
+
+ size -= 3; prop += 3;
+ base_idx = get_idx_from_shift(base_shift);
+ if (base_idx < 0) {
+ /* skip the pte encoding also */
+ prop += lpnum * 2; size -= lpnum * 2;
+ continue;
+ }
+ def = &mmu_psize_defs[base_idx];
+ if (base_idx == MMU_PAGE_16M)
+ cur_cpu_spec->mmu_features |= MMU_FTR_16M_PAGE;
+
+ def->shift = base_shift;
+ if (base_shift <= 23)
+ def->avpnm = 0;
+ else
+ def->avpnm = (1 << (base_shift - 23)) - 1;
+ def->sllp = slbenc;
+ /*
+ * We don't know for sure what's up with tlbiel, so
+ * for now we only set it for 4K and 64K pages
+ */
+ if (base_idx == MMU_PAGE_4K || base_idx == MMU_PAGE_64K)
+ def->tlbiel = 1;
+ else
+ def->tlbiel = 0;
+
+ while (size > 0 && lpnum) {
+ unsigned int shift = be32_to_cpu(prop[0]);
+ int penc = be32_to_cpu(prop[1]);
+
+ prop += 2; size -= 2;
+ lpnum--;
+
+ idx = get_idx_from_shift(shift);
+ if (idx < 0)
+ continue;
+
+ if (penc == -1)
+ pr_err("Invalid penc for base_shift=%d "
+ "shift=%d\n", base_shift, shift);
+
+ def->penc[idx] = penc;
+ pr_info("base_shift=%d: shift=%d, sllp=0x%04lx,"
+ " avpnm=0x%08lx, tlbiel=%d, penc=%d\n",
+ base_shift, shift, def->sllp,
+ def->avpnm, def->tlbiel, def->penc[idx]);
+ }
+ }
+
+ return 1;
+}
+
+#ifdef CONFIG_HUGETLB_PAGE
+/*
+ * Scan for 16G memory blocks that have been set aside for huge pages
+ * and reserve those blocks for 16G huge pages.
+ */
+static int __init htab_dt_scan_hugepage_blocks(unsigned long node,
+ const char *uname, int depth,
+ void *data) {
+ const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+ const __be64 *addr_prop;
+ const __be32 *page_count_prop;
+ unsigned int expected_pages;
+ long unsigned int phys_addr;
+ long unsigned int block_size;
+
+ /* We are scanning "memory" nodes only */
+ if (type == NULL || strcmp(type, "memory") != 0)
+ return 0;
+
+ /*
+ * This property is the log base 2 of the number of virtual pages that
+ * will represent this memory block.
+ */
+ page_count_prop = of_get_flat_dt_prop(node, "ibm,expected#pages", NULL);
+ if (page_count_prop == NULL)
+ return 0;
+ expected_pages = (1 << be32_to_cpu(page_count_prop[0]));
+ addr_prop = of_get_flat_dt_prop(node, "reg", NULL);
+ if (addr_prop == NULL)
+ return 0;
+ phys_addr = be64_to_cpu(addr_prop[0]);
+ block_size = be64_to_cpu(addr_prop[1]);
+ if (block_size != (16 * GB))
+ return 0;
+ printk(KERN_INFO "Huge page(16GB) memory: "
+ "addr = 0x%lX size = 0x%lX pages = %d\n",
+ phys_addr, block_size, expected_pages);
+ if (phys_addr + block_size * expected_pages <= memblock_end_of_DRAM()) {
+ memblock_reserve(phys_addr, block_size * expected_pages);
+ pseries_add_gpage(phys_addr, block_size, expected_pages);
+ }
+ return 0;
+}
+#endif /* CONFIG_HUGETLB_PAGE */
+
+static void __init mmu_psize_set_default_penc(void)
+{
+ int bpsize, apsize;
+ for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++)
+ for (apsize = 0; apsize < MMU_PAGE_COUNT; apsize++)
+ mmu_psize_defs[bpsize].penc[apsize] = -1;
+}
+
+#ifdef CONFIG_PPC_64K_PAGES
+
+static bool __init might_have_hea(void)
+{
+ /*
+ * The HEA ethernet adapter requires awareness of the
+ * GX bus. Without that awareness we can easily assume
+ * we will never see an HEA ethernet device.
+ */
+#ifdef CONFIG_IBMEBUS
+ return !cpu_has_feature(CPU_FTR_ARCH_207S) &&
+ firmware_has_feature(FW_FEATURE_SPLPAR);
+#else
+ return false;
+#endif
+}
+
+#endif /* #ifdef CONFIG_PPC_64K_PAGES */
+
+static void __init htab_scan_page_sizes(void)
+{
+ int rc;
+
+ /* se the invalid penc to -1 */
+ mmu_psize_set_default_penc();
+
+ /* Default to 4K pages only */
+ memcpy(mmu_psize_defs, mmu_psize_defaults,
+ sizeof(mmu_psize_defaults));
+
+ /*
+ * Try to find the available page sizes in the device-tree
+ */
+ rc = of_scan_flat_dt(htab_dt_scan_page_sizes, NULL);
+ if (rc == 0 && early_mmu_has_feature(MMU_FTR_16M_PAGE)) {
+ /*
+ * Nothing in the device-tree, but the CPU supports 16M pages,
+ * so let's fallback on a known size list for 16M capable CPUs.
+ */
+ memcpy(mmu_psize_defs, mmu_psize_defaults_gp,
+ sizeof(mmu_psize_defaults_gp));
+ }
+
+#ifdef CONFIG_HUGETLB_PAGE
+ if (!hugetlb_disabled && !early_radix_enabled() ) {
+ /* Reserve 16G huge page memory sections for huge pages */
+ of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL);
+ }
+#endif /* CONFIG_HUGETLB_PAGE */
+}
+
+/*
+ * Fill in the hpte_page_sizes[] array.
+ * We go through the mmu_psize_defs[] array looking for all the
+ * supported base/actual page size combinations. Each combination
+ * has a unique pagesize encoding (penc) value in the low bits of
+ * the LP field of the HPTE. For actual page sizes less than 1MB,
+ * some of the upper LP bits are used for RPN bits, meaning that
+ * we need to fill in several entries in hpte_page_sizes[].
+ *
+ * In diagrammatic form, with r = RPN bits and z = page size bits:
+ * PTE LP actual page size
+ * rrrr rrrz >=8KB
+ * rrrr rrzz >=16KB
+ * rrrr rzzz >=32KB
+ * rrrr zzzz >=64KB
+ * ...
+ *
+ * The zzzz bits are implementation-specific but are chosen so that
+ * no encoding for a larger page size uses the same value in its
+ * low-order N bits as the encoding for the 2^(12+N) byte page size
+ * (if it exists).
+ */
+static void __init init_hpte_page_sizes(void)
+{
+ long int ap, bp;
+ long int shift, penc;
+
+ for (bp = 0; bp < MMU_PAGE_COUNT; ++bp) {
+ if (!mmu_psize_defs[bp].shift)
+ continue; /* not a supported page size */
+ for (ap = bp; ap < MMU_PAGE_COUNT; ++ap) {
+ penc = mmu_psize_defs[bp].penc[ap];
+ if (penc == -1 || !mmu_psize_defs[ap].shift)
+ continue;
+ shift = mmu_psize_defs[ap].shift - LP_SHIFT;
+ if (shift <= 0)
+ continue; /* should never happen */
+ /*
+ * For page sizes less than 1MB, this loop
+ * replicates the entry for all possible values
+ * of the rrrr bits.
+ */
+ while (penc < (1 << LP_BITS)) {
+ hpte_page_sizes[penc] = (ap << 4) | bp;
+ penc += 1 << shift;
+ }
+ }
+ }
+}
+
+static void __init htab_init_page_sizes(void)
+{
+ bool aligned = true;
+ init_hpte_page_sizes();
+
+ if (!hash_supports_debug_pagealloc() && !kfence_early_init_enabled()) {
+ /*
+ * Pick a size for the linear mapping. Currently, we only
+ * support 16M, 1M and 4K which is the default
+ */
+ if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) &&
+ (unsigned long)_stext % 0x1000000) {
+ if (mmu_psize_defs[MMU_PAGE_16M].shift)
+ pr_warn("Kernel not 16M aligned, disabling 16M linear map alignment\n");
+ aligned = false;
+ }
+
+ if (mmu_psize_defs[MMU_PAGE_16M].shift && aligned)
+ mmu_linear_psize = MMU_PAGE_16M;
+ else if (mmu_psize_defs[MMU_PAGE_1M].shift)
+ mmu_linear_psize = MMU_PAGE_1M;
+ }
+
+#ifdef CONFIG_PPC_64K_PAGES
+ /*
+ * Pick a size for the ordinary pages. Default is 4K, we support
+ * 64K for user mappings and vmalloc if supported by the processor.
+ * We only use 64k for ioremap if the processor
+ * (and firmware) support cache-inhibited large pages.
+ * If not, we use 4k and set mmu_ci_restrictions so that
+ * hash_page knows to switch processes that use cache-inhibited
+ * mappings to 4k pages.
+ */
+ if (mmu_psize_defs[MMU_PAGE_64K].shift) {
+ mmu_virtual_psize = MMU_PAGE_64K;
+ mmu_vmalloc_psize = MMU_PAGE_64K;
+ if (mmu_linear_psize == MMU_PAGE_4K)
+ mmu_linear_psize = MMU_PAGE_64K;
+ if (mmu_has_feature(MMU_FTR_CI_LARGE_PAGE)) {
+ /*
+ * When running on pSeries using 64k pages for ioremap
+ * would stop us accessing the HEA ethernet. So if we
+ * have the chance of ever seeing one, stay at 4k.
+ */
+ if (!might_have_hea())
+ mmu_io_psize = MMU_PAGE_64K;
+ } else
+ mmu_ci_restrictions = 1;
+ }
+#endif /* CONFIG_PPC_64K_PAGES */
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+ /*
+ * We try to use 16M pages for vmemmap if that is supported
+ * and we have at least 1G of RAM at boot
+ */
+ if (mmu_psize_defs[MMU_PAGE_16M].shift &&
+ memblock_phys_mem_size() >= 0x40000000)
+ mmu_vmemmap_psize = MMU_PAGE_16M;
+ else
+ mmu_vmemmap_psize = mmu_virtual_psize;
+#endif /* CONFIG_SPARSEMEM_VMEMMAP */
+
+ printk(KERN_DEBUG "Page orders: linear mapping = %d, "
+ "virtual = %d, io = %d"
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+ ", vmemmap = %d"
+#endif
+ "\n",
+ mmu_psize_defs[mmu_linear_psize].shift,
+ mmu_psize_defs[mmu_virtual_psize].shift,
+ mmu_psize_defs[mmu_io_psize].shift
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+ ,mmu_psize_defs[mmu_vmemmap_psize].shift
+#endif
+ );
+}
+
+static int __init htab_dt_scan_pftsize(unsigned long node,
+ const char *uname, int depth,
+ void *data)
+{
+ const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+ const __be32 *prop;
+
+ /* We are scanning "cpu" nodes only */
+ if (type == NULL || strcmp(type, "cpu") != 0)
+ return 0;
+
+ prop = of_get_flat_dt_prop(node, "ibm,pft-size", NULL);
+ if (prop != NULL) {
+ /* pft_size[0] is the NUMA CEC cookie */
+ ppc64_pft_size = be32_to_cpu(prop[1]);
+ return 1;
+ }
+ return 0;
+}
+
+unsigned htab_shift_for_mem_size(unsigned long mem_size)
+{
+ unsigned memshift = __ilog2(mem_size);
+ unsigned pshift = mmu_psize_defs[mmu_virtual_psize].shift;
+ unsigned pteg_shift;
+
+ /* round mem_size up to next power of 2 */
+ if ((1UL << memshift) < mem_size)
+ memshift += 1;
+
+ /* aim for 2 pages / pteg */
+ pteg_shift = memshift - (pshift + 1);
+
+ /*
+ * 2^11 PTEGS of 128 bytes each, ie. 2^18 bytes is the minimum htab
+ * size permitted by the architecture.
+ */
+ return max(pteg_shift + 7, 18U);
+}
+
+static unsigned long __init htab_get_table_size(void)
+{
+ /*
+ * If hash size isn't already provided by the platform, we try to
+ * retrieve it from the device-tree. If it's not there neither, we
+ * calculate it now based on the total RAM size
+ */
+ if (ppc64_pft_size == 0)
+ of_scan_flat_dt(htab_dt_scan_pftsize, NULL);
+ if (ppc64_pft_size)
+ return 1UL << ppc64_pft_size;
+
+ return 1UL << htab_shift_for_mem_size(memblock_phys_mem_size());
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+static int resize_hpt_for_hotplug(unsigned long new_mem_size)
+{
+ unsigned target_hpt_shift;
+
+ if (!mmu_hash_ops.resize_hpt)
+ return 0;
+
+ target_hpt_shift = htab_shift_for_mem_size(new_mem_size);
+
+ /*
+ * To avoid lots of HPT resizes if memory size is fluctuating
+ * across a boundary, we deliberately have some hysterisis
+ * here: we immediately increase the HPT size if the target
+ * shift exceeds the current shift, but we won't attempt to
+ * reduce unless the target shift is at least 2 below the
+ * current shift
+ */
+ if (target_hpt_shift > ppc64_pft_size ||
+ target_hpt_shift < ppc64_pft_size - 1)
+ return mmu_hash_ops.resize_hpt(target_hpt_shift);
+
+ return 0;
+}
+
+int hash__create_section_mapping(unsigned long start, unsigned long end,
+ int nid, pgprot_t prot)
+{
+ int rc;
+
+ if (end >= H_VMALLOC_START) {
+ pr_warn("Outside the supported range\n");
+ return -1;
+ }
+
+ resize_hpt_for_hotplug(memblock_phys_mem_size());
+
+ rc = htab_bolt_mapping(start, end, __pa(start),
+ pgprot_val(prot), mmu_linear_psize,
+ mmu_kernel_ssize);
+
+ if (rc < 0) {
+ int rc2 = htab_remove_mapping(start, end, mmu_linear_psize,
+ mmu_kernel_ssize);
+ BUG_ON(rc2 && (rc2 != -ENOENT));
+ }
+ return rc;
+}
+
+int hash__remove_section_mapping(unsigned long start, unsigned long end)
+{
+ int rc = htab_remove_mapping(start, end, mmu_linear_psize,
+ mmu_kernel_ssize);
+
+ if (resize_hpt_for_hotplug(memblock_phys_mem_size()) == -ENOSPC)
+ pr_warn("Hash collision while resizing HPT\n");
+
+ return rc;
+}
+#endif /* CONFIG_MEMORY_HOTPLUG */
+
+static void __init hash_init_partition_table(phys_addr_t hash_table,
+ unsigned long htab_size)
+{
+ mmu_partition_table_init();
+
+ /*
+ * PS field (VRMA page size) is not used for LPID 0, hence set to 0.
+ * For now, UPRT is 0 and we have no segment table.
+ */
+ htab_size = __ilog2(htab_size) - 18;
+ mmu_partition_table_set_entry(0, hash_table | htab_size, 0, false);
+ pr_info("Partition table %p\n", partition_tb);
+}
+
+void hpt_clear_stress(void);
+static struct timer_list stress_hpt_timer;
+static void stress_hpt_timer_fn(struct timer_list *timer)
+{
+ int next_cpu;
+
+ hpt_clear_stress();
+ if (!firmware_has_feature(FW_FEATURE_LPAR))
+ tlbiel_all();
+
+ next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
+ if (next_cpu >= nr_cpu_ids)
+ next_cpu = cpumask_first(cpu_online_mask);
+ stress_hpt_timer.expires = jiffies + msecs_to_jiffies(10);
+ add_timer_on(&stress_hpt_timer, next_cpu);
+}
+
+static void __init htab_initialize(void)
+{
+ unsigned long table;
+ unsigned long pteg_count;
+ unsigned long prot;
+ phys_addr_t base = 0, size = 0, end;
+ u64 i;
+
+ DBG(" -> htab_initialize()\n");
+
+ if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) {
+ mmu_kernel_ssize = MMU_SEGSIZE_1T;
+ mmu_highuser_ssize = MMU_SEGSIZE_1T;
+ printk(KERN_INFO "Using 1TB segments\n");
+ }
+
+ if (stress_slb_enabled)
+ static_branch_enable(&stress_slb_key);
+
+ if (stress_hpt_enabled) {
+ unsigned long tmp;
+ static_branch_enable(&stress_hpt_key);
+ // Too early to use nr_cpu_ids, so use NR_CPUS
+ tmp = memblock_phys_alloc_range(sizeof(struct stress_hpt_struct) * NR_CPUS,
+ __alignof__(struct stress_hpt_struct),
+ 0, MEMBLOCK_ALLOC_ANYWHERE);
+ memset((void *)tmp, 0xff, sizeof(struct stress_hpt_struct) * NR_CPUS);
+ stress_hpt_struct = __va(tmp);
+
+ timer_setup(&stress_hpt_timer, stress_hpt_timer_fn, 0);
+ stress_hpt_timer.expires = jiffies + msecs_to_jiffies(10);
+ add_timer(&stress_hpt_timer);
+ }
+
+ /*
+ * Calculate the required size of the htab. We want the number of
+ * PTEGs to equal one half the number of real pages.
+ */
+ htab_size_bytes = htab_get_table_size();
+ pteg_count = htab_size_bytes >> 7;
+
+ htab_hash_mask = pteg_count - 1;
+
+ if (firmware_has_feature(FW_FEATURE_LPAR) ||
+ firmware_has_feature(FW_FEATURE_PS3_LV1)) {
+ /* Using a hypervisor which owns the htab */
+ htab_address = NULL;
+ _SDR1 = 0;
+#ifdef CONFIG_FA_DUMP
+ /*
+ * If firmware assisted dump is active firmware preserves
+ * the contents of htab along with entire partition memory.
+ * Clear the htab if firmware assisted dump is active so
+ * that we dont end up using old mappings.
+ */
+ if (is_fadump_active() && mmu_hash_ops.hpte_clear_all)
+ mmu_hash_ops.hpte_clear_all();
+#endif
+ } else {
+ unsigned long limit = MEMBLOCK_ALLOC_ANYWHERE;
+
+ table = memblock_phys_alloc_range(htab_size_bytes,
+ htab_size_bytes,
+ 0, limit);
+ if (!table)
+ panic("ERROR: Failed to allocate %pa bytes below %pa\n",
+ &htab_size_bytes, &limit);
+
+ DBG("Hash table allocated at %lx, size: %lx\n", table,
+ htab_size_bytes);
+
+ htab_address = __va(table);
+
+ /* htab absolute addr + encoded htabsize */
+ _SDR1 = table + __ilog2(htab_size_bytes) - 18;
+
+ /* Initialize the HPT with no entries */
+ memset((void *)table, 0, htab_size_bytes);
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ /* Set SDR1 */
+ mtspr(SPRN_SDR1, _SDR1);
+ else
+ hash_init_partition_table(table, htab_size_bytes);
+ }
+
+ prot = pgprot_val(PAGE_KERNEL);
+
+ hash_debug_pagealloc_alloc_slots();
+ hash_kfence_alloc_pool();
+ /* create bolted the linear mapping in the hash table */
+ for_each_mem_range(i, &base, &end) {
+ size = end - base;
+ base = (unsigned long)__va(base);
+
+ DBG("creating mapping for region: %lx..%lx (prot: %lx)\n",
+ base, size, prot);
+
+ if ((base + size) >= H_VMALLOC_START) {
+ pr_warn("Outside the supported range\n");
+ continue;
+ }
+
+ BUG_ON(htab_bolt_mapping(base, base + size, __pa(base),
+ prot, mmu_linear_psize, mmu_kernel_ssize));
+ }
+ hash_kfence_map_pool();
+ memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
+
+ /*
+ * If we have a memory_limit and we've allocated TCEs then we need to
+ * explicitly map the TCE area at the top of RAM. We also cope with the
+ * case that the TCEs start below memory_limit.
+ * tce_alloc_start/end are 16MB aligned so the mapping should work
+ * for either 4K or 16MB pages.
+ */
+ if (tce_alloc_start) {
+ tce_alloc_start = (unsigned long)__va(tce_alloc_start);
+ tce_alloc_end = (unsigned long)__va(tce_alloc_end);
+
+ if (base + size >= tce_alloc_start)
+ tce_alloc_start = base + size + 1;
+
+ BUG_ON(htab_bolt_mapping(tce_alloc_start, tce_alloc_end,
+ __pa(tce_alloc_start), prot,
+ mmu_linear_psize, mmu_kernel_ssize));
+ }
+
+
+ DBG(" <- htab_initialize()\n");
+}
+#undef KB
+#undef MB
+
+void __init hash__early_init_devtree(void)
+{
+ /* Initialize segment sizes */
+ of_scan_flat_dt(htab_dt_scan_seg_sizes, NULL);
+
+ /* Initialize page sizes */
+ htab_scan_page_sizes();
+}
+
+static struct hash_mm_context init_hash_mm_context;
+void __init hash__early_init_mmu(void)
+{
+#ifndef CONFIG_PPC_64K_PAGES
+ /*
+ * We have code in __hash_page_4K() and elsewhere, which assumes it can
+ * do the following:
+ * new_pte |= (slot << H_PAGE_F_GIX_SHIFT) & (H_PAGE_F_SECOND | H_PAGE_F_GIX);
+ *
+ * Where the slot number is between 0-15, and values of 8-15 indicate
+ * the secondary bucket. For that code to work H_PAGE_F_SECOND and
+ * H_PAGE_F_GIX must occupy four contiguous bits in the PTE, and
+ * H_PAGE_F_SECOND must be placed above H_PAGE_F_GIX. Assert that here
+ * with a BUILD_BUG_ON().
+ */
+ BUILD_BUG_ON(H_PAGE_F_SECOND != (1ul << (H_PAGE_F_GIX_SHIFT + 3)));
+#endif /* CONFIG_PPC_64K_PAGES */
+
+ htab_init_page_sizes();
+
+ /*
+ * initialize page table size
+ */
+ __pte_frag_nr = H_PTE_FRAG_NR;
+ __pte_frag_size_shift = H_PTE_FRAG_SIZE_SHIFT;
+ __pmd_frag_nr = H_PMD_FRAG_NR;
+ __pmd_frag_size_shift = H_PMD_FRAG_SIZE_SHIFT;
+
+ __pte_index_size = H_PTE_INDEX_SIZE;
+ __pmd_index_size = H_PMD_INDEX_SIZE;
+ __pud_index_size = H_PUD_INDEX_SIZE;
+ __pgd_index_size = H_PGD_INDEX_SIZE;
+ __pud_cache_index = H_PUD_CACHE_INDEX;
+ __pte_table_size = H_PTE_TABLE_SIZE;
+ __pmd_table_size = H_PMD_TABLE_SIZE;
+ __pud_table_size = H_PUD_TABLE_SIZE;
+ __pgd_table_size = H_PGD_TABLE_SIZE;
+ __pmd_val_bits = HASH_PMD_VAL_BITS;
+ __pud_val_bits = HASH_PUD_VAL_BITS;
+ __pgd_val_bits = HASH_PGD_VAL_BITS;
+
+ __kernel_virt_start = H_KERN_VIRT_START;
+ __vmalloc_start = H_VMALLOC_START;
+ __vmalloc_end = H_VMALLOC_END;
+ __kernel_io_start = H_KERN_IO_START;
+ __kernel_io_end = H_KERN_IO_END;
+ vmemmap = (struct page *)H_VMEMMAP_START;
+ ioremap_bot = IOREMAP_BASE;
+
+#ifdef CONFIG_PCI
+ pci_io_base = ISA_IO_BASE;
+#endif
+
+ /* Select appropriate backend */
+ if (firmware_has_feature(FW_FEATURE_PS3_LV1))
+ ps3_early_mm_init();
+ else if (firmware_has_feature(FW_FEATURE_LPAR))
+ hpte_init_pseries();
+ else if (IS_ENABLED(CONFIG_PPC_HASH_MMU_NATIVE))
+ hpte_init_native();
+
+ if (!mmu_hash_ops.hpte_insert)
+ panic("hash__early_init_mmu: No MMU hash ops defined!\n");
+
+ /*
+ * Initialize the MMU Hash table and create the linear mapping
+ * of memory. Has to be done before SLB initialization as this is
+ * currently where the page size encoding is obtained.
+ */
+ htab_initialize();
+
+ init_mm.context.hash_context = &init_hash_mm_context;
+ mm_ctx_set_slb_addr_limit(&init_mm.context, SLB_ADDR_LIMIT_DEFAULT);
+
+ pr_info("Initializing hash mmu with SLB\n");
+ /* Initialize SLB management */
+ slb_initialize();
+
+ if (cpu_has_feature(CPU_FTR_ARCH_206)
+ && cpu_has_feature(CPU_FTR_HVMODE))
+ tlbiel_all();
+}
+
+#ifdef CONFIG_SMP
+void hash__early_init_mmu_secondary(void)
+{
+ /* Initialize hash table for that CPU */
+ if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ mtspr(SPRN_SDR1, _SDR1);
+ else
+ set_ptcr_when_no_uv(__pa(partition_tb) |
+ (PATB_SIZE_SHIFT - 12));
+ }
+ /* Initialize SLB */
+ slb_initialize();
+
+ if (cpu_has_feature(CPU_FTR_ARCH_206)
+ && cpu_has_feature(CPU_FTR_HVMODE))
+ tlbiel_all();
+
+#ifdef CONFIG_PPC_MEM_KEYS
+ if (mmu_has_feature(MMU_FTR_PKEY))
+ mtspr(SPRN_UAMOR, default_uamor);
+#endif
+}
+#endif /* CONFIG_SMP */
+
+/*
+ * Called by asm hashtable.S for doing lazy icache flush
+ */
+unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
+{
+ struct folio *folio;
+
+ if (!pfn_valid(pte_pfn(pte)))
+ return pp;
+
+ folio = page_folio(pte_page(pte));
+
+ /* page is dirty */
+ if (!test_bit(PG_dcache_clean, &folio->flags.f) &&
+ !folio_test_reserved(folio)) {
+ if (trap == INTERRUPT_INST_STORAGE) {
+ flush_dcache_icache_folio(folio);
+ set_bit(PG_dcache_clean, &folio->flags.f);
+ } else
+ pp |= HPTE_R_N;
+ }
+ return pp;
+}
+
+static unsigned int get_paca_psize(unsigned long addr)
+{
+ unsigned char *psizes;
+ unsigned long index, mask_index;
+
+ if (addr < SLICE_LOW_TOP) {
+ psizes = get_paca()->mm_ctx_low_slices_psize;
+ index = GET_LOW_SLICE_INDEX(addr);
+ } else {
+ psizes = get_paca()->mm_ctx_high_slices_psize;
+ index = GET_HIGH_SLICE_INDEX(addr);
+ }
+ mask_index = index & 0x1;
+ return (psizes[index >> 1] >> (mask_index * 4)) & 0xF;
+}
+
+
+/*
+ * Demote a segment to using 4k pages.
+ * For now this makes the whole process use 4k pages.
+ */
+#ifdef CONFIG_PPC_64K_PAGES
+void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
+{
+ if (get_slice_psize(mm, addr) == MMU_PAGE_4K)
+ return;
+ slice_set_range_psize(mm, addr, 1, MMU_PAGE_4K);
+#ifdef CONFIG_SPU_BASE
+ spu_flush_all_slbs(mm);
+#endif
+ if ((get_paca_psize(addr) != MMU_PAGE_4K) && (current->mm == mm)) {
+
+ copy_mm_to_paca(mm);
+ slb_flush_and_restore_bolted();
+ }
+}
+#endif /* CONFIG_PPC_64K_PAGES */
+
+#ifdef CONFIG_PPC_SUBPAGE_PROT
+/*
+ * This looks up a 2-bit protection code for a 4k subpage of a 64k page.
+ * Userspace sets the subpage permissions using the subpage_prot system call.
+ *
+ * Result is 0: full permissions, _PAGE_RW: read-only,
+ * _PAGE_RWX: no access.
+ */
+static int subpage_protection(struct mm_struct *mm, unsigned long ea)
+{
+ struct subpage_prot_table *spt = mm_ctx_subpage_prot(&mm->context);
+ u32 spp = 0;
+ u32 **sbpm, *sbpp;
+
+ if (!spt)
+ return 0;
+
+ if (ea >= spt->maxaddr)
+ return 0;
+ if (ea < 0x100000000UL) {
+ /* addresses below 4GB use spt->low_prot */
+ sbpm = spt->low_prot;
+ } else {
+ sbpm = spt->protptrs[ea >> SBP_L3_SHIFT];
+ if (!sbpm)
+ return 0;
+ }
+ sbpp = sbpm[(ea >> SBP_L2_SHIFT) & (SBP_L2_COUNT - 1)];
+ if (!sbpp)
+ return 0;
+ spp = sbpp[(ea >> PAGE_SHIFT) & (SBP_L1_COUNT - 1)];
+
+ /* extract 2-bit bitfield for this 4k subpage */
+ spp >>= 30 - 2 * ((ea >> 12) & 0xf);
+
+ /*
+ * 0 -> full permission
+ * 1 -> Read only
+ * 2 -> no access.
+ * We return the flag that need to be cleared.
+ */
+ spp = ((spp & 2) ? _PAGE_RWX : 0) | ((spp & 1) ? _PAGE_WRITE : 0);
+ return spp;
+}
+
+#else /* CONFIG_PPC_SUBPAGE_PROT */
+static inline int subpage_protection(struct mm_struct *mm, unsigned long ea)
+{
+ return 0;
+}
+#endif
+
+void hash_failure_debug(unsigned long ea, unsigned long access,
+ unsigned long vsid, unsigned long trap,
+ int ssize, int psize, int lpsize, unsigned long pte)
+{
+ if (!printk_ratelimit())
+ return;
+ pr_info("mm: Hashing failure ! EA=0x%lx access=0x%lx current=%s\n",
+ ea, access, current->comm);
+ pr_info(" trap=0x%lx vsid=0x%lx ssize=%d base psize=%d psize %d pte=0x%lx\n",
+ trap, vsid, ssize, psize, lpsize, pte);
+}
+
+static void check_paca_psize(unsigned long ea, struct mm_struct *mm,
+ int psize, bool user_region)
+{
+ if (user_region) {
+ if (psize != get_paca_psize(ea)) {
+ copy_mm_to_paca(mm);
+ slb_flush_and_restore_bolted();
+ }
+ } else if (get_paca()->vmalloc_sllp !=
+ mmu_psize_defs[mmu_vmalloc_psize].sllp) {
+ get_paca()->vmalloc_sllp =
+ mmu_psize_defs[mmu_vmalloc_psize].sllp;
+ slb_vmalloc_update();
+ }
+}
+
+/*
+ * Result code is:
+ * 0 - handled
+ * 1 - normal page fault
+ * -1 - critical hash insertion error
+ * -2 - access not permitted by subpage protection mechanism
+ */
+int hash_page_mm(struct mm_struct *mm, unsigned long ea,
+ unsigned long access, unsigned long trap,
+ unsigned long flags)
+{
+ bool is_thp;
+ pgd_t *pgdir;
+ unsigned long vsid;
+ pte_t *ptep;
+ unsigned hugeshift;
+ int rc, user_region = 0;
+ int psize, ssize;
+
+ DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n",
+ ea, access, trap);
+ trace_hash_fault(ea, access, trap);
+
+ /* Get region & vsid */
+ switch (get_region_id(ea)) {
+ case USER_REGION_ID:
+ user_region = 1;
+ if (! mm) {
+ DBG_LOW(" user region with no mm !\n");
+ rc = 1;
+ goto bail;
+ }
+ psize = get_slice_psize(mm, ea);
+ ssize = user_segment_size(ea);
+ vsid = get_user_vsid(&mm->context, ea, ssize);
+ break;
+ case VMALLOC_REGION_ID:
+ vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
+ psize = mmu_vmalloc_psize;
+ ssize = mmu_kernel_ssize;
+ flags |= HPTE_USE_KERNEL_KEY;
+ break;
+
+ case IO_REGION_ID:
+ vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
+ psize = mmu_io_psize;
+ ssize = mmu_kernel_ssize;
+ flags |= HPTE_USE_KERNEL_KEY;
+ break;
+ default:
+ /*
+ * Not a valid range
+ * Send the problem up to do_page_fault()
+ */
+ rc = 1;
+ goto bail;
+ }
+ DBG_LOW(" mm=%p, mm->pgdir=%p, vsid=%016lx\n", mm, mm->pgd, vsid);
+
+ /* Bad address. */
+ if (!vsid) {
+ DBG_LOW("Bad address!\n");
+ rc = 1;
+ goto bail;
+ }
+ /* Get pgdir */
+ pgdir = mm->pgd;
+ if (pgdir == NULL) {
+ rc = 1;
+ goto bail;
+ }
+
+ /* Check CPU locality */
+ if (user_region && mm_is_thread_local(mm))
+ flags |= HPTE_LOCAL_UPDATE;
+
+#ifndef CONFIG_PPC_64K_PAGES
+ /*
+ * If we use 4K pages and our psize is not 4K, then we might
+ * be hitting a special driver mapping, and need to align the
+ * address before we fetch the PTE.
+ *
+ * It could also be a hugepage mapping, in which case this is
+ * not necessary, but it's not harmful, either.
+ */
+ if (psize != MMU_PAGE_4K)
+ ea &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
+#endif /* CONFIG_PPC_64K_PAGES */
+
+ /* Get PTE and page size from page tables */
+ ptep = find_linux_pte(pgdir, ea, &is_thp, &hugeshift);
+ if (ptep == NULL || !pte_present(*ptep)) {
+ DBG_LOW(" no PTE !\n");
+ rc = 1;
+ goto bail;
+ }
+
+ if (IS_ENABLED(CONFIG_PPC_4K_PAGES) && !radix_enabled()) {
+ if (hugeshift == PMD_SHIFT && psize == MMU_PAGE_16M)
+ hugeshift = mmu_psize_defs[MMU_PAGE_16M].shift;
+ if (hugeshift == PUD_SHIFT && psize == MMU_PAGE_16G)
+ hugeshift = mmu_psize_defs[MMU_PAGE_16G].shift;
+ }
+
+ /*
+ * Add _PAGE_PRESENT to the required access perm. If there are parallel
+ * updates to the pte that can possibly clear _PAGE_PTE, catch that too.
+ *
+ * We can safely use the return pte address in rest of the function
+ * because we do set H_PAGE_BUSY which prevents further updates to pte
+ * from generic code.
+ */
+ access |= _PAGE_PRESENT | _PAGE_PTE;
+
+ /*
+ * Pre-check access permissions (will be re-checked atomically
+ * in __hash_page_XX but this pre-check is a fast path
+ */
+ if (!check_pte_access(access, pte_val(*ptep))) {
+ DBG_LOW(" no access !\n");
+ rc = 1;
+ goto bail;
+ }
+
+ if (hugeshift) {
+ if (is_thp)
+ rc = __hash_page_thp(ea, access, vsid, (pmd_t *)ptep,
+ trap, flags, ssize, psize);
+#ifdef CONFIG_HUGETLB_PAGE
+ else
+ rc = __hash_page_huge(ea, access, vsid, ptep, trap,
+ flags, ssize, hugeshift, psize);
+#else
+ else {
+ /*
+ * if we have hugeshift, and is not transhuge with
+ * hugetlb disabled, something is really wrong.
+ */
+ rc = 1;
+ WARN_ON(1);
+ }
+#endif
+ if (current->mm == mm)
+ check_paca_psize(ea, mm, psize, user_region);
+
+ goto bail;
+ }
+
+#ifndef CONFIG_PPC_64K_PAGES
+ DBG_LOW(" i-pte: %016lx\n", pte_val(*ptep));
+#else
+ DBG_LOW(" i-pte: %016lx %016lx\n", pte_val(*ptep),
+ pte_val(*(ptep + PTRS_PER_PTE)));
+#endif
+ /* Do actual hashing */
+#ifdef CONFIG_PPC_64K_PAGES
+ /* If H_PAGE_4K_PFN is set, make sure this is a 4k segment */
+ if ((pte_val(*ptep) & H_PAGE_4K_PFN) && psize == MMU_PAGE_64K) {
+ demote_segment_4k(mm, ea);
+ psize = MMU_PAGE_4K;
+ }
+
+ /*
+ * If this PTE is non-cacheable and we have restrictions on
+ * using non cacheable large pages, then we switch to 4k
+ */
+ if (mmu_ci_restrictions && psize == MMU_PAGE_64K && pte_ci(*ptep)) {
+ if (user_region) {
+ demote_segment_4k(mm, ea);
+ psize = MMU_PAGE_4K;
+ } else if (ea < VMALLOC_END) {
+ /*
+ * some driver did a non-cacheable mapping
+ * in vmalloc space, so switch vmalloc
+ * to 4k pages
+ */
+ printk(KERN_ALERT "Reducing vmalloc segment "
+ "to 4kB pages because of "
+ "non-cacheable mapping\n");
+ psize = mmu_vmalloc_psize = MMU_PAGE_4K;
+#ifdef CONFIG_SPU_BASE
+ spu_flush_all_slbs(mm);
+#endif
+ }
+ }
+
+#endif /* CONFIG_PPC_64K_PAGES */
+
+ if (current->mm == mm)
+ check_paca_psize(ea, mm, psize, user_region);
+
+#ifdef CONFIG_PPC_64K_PAGES
+ if (psize == MMU_PAGE_64K)
+ rc = __hash_page_64K(ea, access, vsid, ptep, trap,
+ flags, ssize);
+ else
+#endif /* CONFIG_PPC_64K_PAGES */
+ {
+ int spp = subpage_protection(mm, ea);
+ if (access & spp)
+ rc = -2;
+ else
+ rc = __hash_page_4K(ea, access, vsid, ptep, trap,
+ flags, ssize, spp);
+ }
+
+ /*
+ * Dump some info in case of hash insertion failure, they should
+ * never happen so it is really useful to know if/when they do
+ */
+ if (rc == -1)
+ hash_failure_debug(ea, access, vsid, trap, ssize, psize,
+ psize, pte_val(*ptep));
+#ifndef CONFIG_PPC_64K_PAGES
+ DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep));
+#else
+ DBG_LOW(" o-pte: %016lx %016lx\n", pte_val(*ptep),
+ pte_val(*(ptep + PTRS_PER_PTE)));
+#endif
+ DBG_LOW(" -> rc=%d\n", rc);
+
+bail:
+ return rc;
+}
+EXPORT_SYMBOL_GPL(hash_page_mm);
+
+int hash_page(unsigned long ea, unsigned long access, unsigned long trap,
+ unsigned long dsisr)
+{
+ unsigned long flags = 0;
+ struct mm_struct *mm = current->mm;
+
+ if ((get_region_id(ea) == VMALLOC_REGION_ID) ||
+ (get_region_id(ea) == IO_REGION_ID))
+ mm = &init_mm;
+
+ if (dsisr & DSISR_NOHPTE)
+ flags |= HPTE_NOHPTE_UPDATE;
+
+ return hash_page_mm(mm, ea, access, trap, flags);
+}
+EXPORT_SYMBOL_GPL(hash_page);
+
+DEFINE_INTERRUPT_HANDLER(do_hash_fault)
+{
+ unsigned long ea = regs->dar;
+ unsigned long dsisr = regs->dsisr;
+ unsigned long access = _PAGE_PRESENT | _PAGE_READ;
+ unsigned long flags = 0;
+ struct mm_struct *mm;
+ unsigned int region_id;
+ long err;
+
+ if (unlikely(dsisr & (DSISR_BAD_FAULT_64S | DSISR_KEYFAULT))) {
+ hash__do_page_fault(regs);
+ return;
+ }
+
+ region_id = get_region_id(ea);
+ if ((region_id == VMALLOC_REGION_ID) || (region_id == IO_REGION_ID))
+ mm = &init_mm;
+ else
+ mm = current->mm;
+
+ if (dsisr & DSISR_NOHPTE)
+ flags |= HPTE_NOHPTE_UPDATE;
+
+ if (dsisr & DSISR_ISSTORE)
+ access |= _PAGE_WRITE;
+ /*
+ * We set _PAGE_PRIVILEGED only when
+ * kernel mode access kernel space.
+ *
+ * _PAGE_PRIVILEGED is NOT set
+ * 1) when kernel mode access user space
+ * 2) user space access kernel space.
+ */
+ access |= _PAGE_PRIVILEGED;
+ if (user_mode(regs) || (region_id == USER_REGION_ID))
+ access &= ~_PAGE_PRIVILEGED;
+
+ if (TRAP(regs) == INTERRUPT_INST_STORAGE)
+ access |= _PAGE_EXEC;
+
+ err = hash_page_mm(mm, ea, access, TRAP(regs), flags);
+ if (unlikely(err < 0)) {
+ // failed to insert a hash PTE due to an hypervisor error
+ if (user_mode(regs)) {
+ if (IS_ENABLED(CONFIG_PPC_SUBPAGE_PROT) && err == -2)
+ _exception(SIGSEGV, regs, SEGV_ACCERR, ea);
+ else
+ _exception(SIGBUS, regs, BUS_ADRERR, ea);
+ } else {
+ bad_page_fault(regs, SIGBUS);
+ }
+ err = 0;
+
+ } else if (err) {
+ hash__do_page_fault(regs);
+ }
+}
+
+static bool should_hash_preload(struct mm_struct *mm, unsigned long ea)
+{
+ int psize = get_slice_psize(mm, ea);
+
+ /* We only prefault standard pages for now */
+ if (unlikely(psize != mm_ctx_user_psize(&mm->context)))
+ return false;
+
+ /*
+ * Don't prefault if subpage protection is enabled for the EA.
+ */
+ if (unlikely((psize == MMU_PAGE_4K) && subpage_protection(mm, ea)))
+ return false;
+
+ return true;
+}
+
+static void hash_preload(struct mm_struct *mm, pte_t *ptep, unsigned long ea,
+ bool is_exec, unsigned long trap)
+{
+ unsigned long vsid;
+ pgd_t *pgdir;
+ int rc, ssize, update_flags = 0;
+ unsigned long access = _PAGE_PRESENT | _PAGE_READ | (is_exec ? _PAGE_EXEC : 0);
+ unsigned long flags;
+
+ BUG_ON(get_region_id(ea) != USER_REGION_ID);
+
+ if (!should_hash_preload(mm, ea))
+ return;
+
+ DBG_LOW("hash_preload(mm=%p, mm->pgdir=%p, ea=%016lx, access=%lx,"
+ " trap=%lx\n", mm, mm->pgd, ea, access, trap);
+
+ /* Get Linux PTE if available */
+ pgdir = mm->pgd;
+ if (pgdir == NULL)
+ return;
+
+ /* Get VSID */
+ ssize = user_segment_size(ea);
+ vsid = get_user_vsid(&mm->context, ea, ssize);
+ if (!vsid)
+ return;
+
+#ifdef CONFIG_PPC_64K_PAGES
+ /* If either H_PAGE_4K_PFN or cache inhibited is set (and we are on
+ * a 64K kernel), then we don't preload, hash_page() will take
+ * care of it once we actually try to access the page.
+ * That way we don't have to duplicate all of the logic for segment
+ * page size demotion here
+ * Called with PTL held, hence can be sure the value won't change in
+ * between.
+ */
+ if ((pte_val(*ptep) & H_PAGE_4K_PFN) || pte_ci(*ptep))
+ return;
+#endif /* CONFIG_PPC_64K_PAGES */
+
+ /*
+ * __hash_page_* must run with interrupts off, including PMI interrupts
+ * off, as it sets the H_PAGE_BUSY bit.
+ *
+ * It's otherwise possible for perf interrupts to hit at any time and
+ * may take a hash fault reading the user stack, which could take a
+ * hash miss and deadlock on the same H_PAGE_BUSY bit.
+ *
+ * Interrupts must also be off for the duration of the
+ * mm_is_thread_local test and update, to prevent preempt running the
+ * mm on another CPU (XXX: this may be racy vs kthread_use_mm).
+ */
+ powerpc_local_irq_pmu_save(flags);
+
+ /* Is that local to this CPU ? */
+ if (mm_is_thread_local(mm))
+ update_flags |= HPTE_LOCAL_UPDATE;
+
+ /* Hash it in */
+#ifdef CONFIG_PPC_64K_PAGES
+ if (mm_ctx_user_psize(&mm->context) == MMU_PAGE_64K)
+ rc = __hash_page_64K(ea, access, vsid, ptep, trap,
+ update_flags, ssize);
+ else
+#endif /* CONFIG_PPC_64K_PAGES */
+ rc = __hash_page_4K(ea, access, vsid, ptep, trap, update_flags,
+ ssize, subpage_protection(mm, ea));
+
+ /* Dump some info in case of hash insertion failure, they should
+ * never happen so it is really useful to know if/when they do
+ */
+ if (rc == -1)
+ hash_failure_debug(ea, access, vsid, trap, ssize,
+ mm_ctx_user_psize(&mm->context),
+ mm_ctx_user_psize(&mm->context),
+ pte_val(*ptep));
+
+ powerpc_local_irq_pmu_restore(flags);
+}
+
+/*
+ * This is called at the end of handling a user page fault, when the
+ * fault has been handled by updating a PTE in the linux page tables.
+ * We use it to preload an HPTE into the hash table corresponding to
+ * the updated linux PTE.
+ *
+ * This must always be called with the pte lock held.
+ */
+void __update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
+ pte_t *ptep)
+{
+ /*
+ * We don't need to worry about _PAGE_PRESENT here because we are
+ * called with either mm->page_table_lock held or ptl lock held
+ */
+ unsigned long trap;
+ bool is_exec;
+
+ /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
+ if (!pte_young(*ptep) || address >= TASK_SIZE)
+ return;
+
+ /*
+ * We try to figure out if we are coming from an instruction
+ * access fault and pass that down to __hash_page so we avoid
+ * double-faulting on execution of fresh text. We have to test
+ * for regs NULL since init will get here first thing at boot.
+ *
+ * We also avoid filling the hash if not coming from a fault.
+ */
+
+ trap = current->thread.regs ? TRAP(current->thread.regs) : 0UL;
+ switch (trap) {
+ case 0x300:
+ is_exec = false;
+ break;
+ case 0x400:
+ is_exec = true;
+ break;
+ default:
+ return;
+ }
+
+ hash_preload(vma->vm_mm, ptep, address, is_exec, trap);
+}
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+static inline void tm_flush_hash_page(int local)
+{
+ /*
+ * Transactions are not aborted by tlbiel, only tlbie. Without, syncing a
+ * page back to a block device w/PIO could pick up transactional data
+ * (bad!) so we force an abort here. Before the sync the page will be
+ * made read-only, which will flush_hash_page. BIG ISSUE here: if the
+ * kernel uses a page from userspace without unmapping it first, it may
+ * see the speculated version.
+ */
+ if (local && cpu_has_feature(CPU_FTR_TM) && current->thread.regs &&
+ MSR_TM_ACTIVE(current->thread.regs->msr)) {
+ tm_enable();
+ tm_abort(TM_CAUSE_TLBI);
+ }
+}
+#else
+static inline void tm_flush_hash_page(int local)
+{
+}
+#endif
+
+/*
+ * Return the global hash slot, corresponding to the given PTE, which contains
+ * the HPTE.
+ */
+unsigned long pte_get_hash_gslot(unsigned long vpn, unsigned long shift,
+ int ssize, real_pte_t rpte, unsigned int subpg_index)
+{
+ unsigned long hash, gslot, hidx;
+
+ hash = hpt_hash(vpn, shift, ssize);
+ hidx = __rpte_to_hidx(rpte, subpg_index);
+ if (hidx & _PTEIDX_SECONDARY)
+ hash = ~hash;
+ gslot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+ gslot += hidx & _PTEIDX_GROUP_IX;
+ return gslot;
+}
+
+void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize,
+ unsigned long flags)
+{
+ unsigned long index, shift, gslot;
+ int local = flags & HPTE_LOCAL_UPDATE;
+
+ DBG_LOW("flush_hash_page(vpn=%016lx)\n", vpn);
+ pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
+ gslot = pte_get_hash_gslot(vpn, shift, ssize, pte, index);
+ DBG_LOW(" sub %ld: gslot=%lx\n", index, gslot);
+ /*
+ * We use same base page size and actual psize, because we don't
+ * use these functions for hugepage
+ */
+ mmu_hash_ops.hpte_invalidate(gslot, vpn, psize, psize,
+ ssize, local);
+ } pte_iterate_hashed_end();
+
+ tm_flush_hash_page(local);
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+void flush_hash_hugepage(unsigned long vsid, unsigned long addr,
+ pmd_t *pmdp, unsigned int psize, int ssize,
+ unsigned long flags)
+{
+ int i, max_hpte_count, valid;
+ unsigned long s_addr;
+ unsigned char *hpte_slot_array;
+ unsigned long hidx, shift, vpn, hash, slot;
+ int local = flags & HPTE_LOCAL_UPDATE;
+
+ s_addr = addr & HPAGE_PMD_MASK;
+ hpte_slot_array = get_hpte_slot_array(pmdp);
+ /*
+ * IF we try to do a HUGE PTE update after a withdraw is done.
+ * we will find the below NULL. This happens when we do
+ * split_huge_pmd
+ */
+ if (!hpte_slot_array)
+ return;
+
+ if (mmu_hash_ops.hugepage_invalidate) {
+ mmu_hash_ops.hugepage_invalidate(vsid, s_addr, hpte_slot_array,
+ psize, ssize, local);
+ goto tm_abort;
+ }
+ /*
+ * No bluk hpte removal support, invalidate each entry
+ */
+ shift = mmu_psize_defs[psize].shift;
+ max_hpte_count = HPAGE_PMD_SIZE >> shift;
+ for (i = 0; i < max_hpte_count; i++) {
+ /*
+ * 8 bits per each hpte entries
+ * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit]
+ */
+ valid = hpte_valid(hpte_slot_array, i);
+ if (!valid)
+ continue;
+ hidx = hpte_hash_index(hpte_slot_array, i);
+
+ /* get the vpn */
+ addr = s_addr + (i * (1ul << shift));
+ vpn = hpt_vpn(addr, vsid, ssize);
+ hash = hpt_hash(vpn, shift, ssize);
+ if (hidx & _PTEIDX_SECONDARY)
+ hash = ~hash;
+
+ slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+ slot += hidx & _PTEIDX_GROUP_IX;
+ mmu_hash_ops.hpte_invalidate(slot, vpn, psize,
+ MMU_PAGE_16M, ssize, local);
+ }
+tm_abort:
+ tm_flush_hash_page(local);
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+void flush_hash_range(unsigned long number, int local)
+{
+ if (mmu_hash_ops.flush_hash_range)
+ mmu_hash_ops.flush_hash_range(number, local);
+ else {
+ int i;
+ struct ppc64_tlb_batch *batch =
+ this_cpu_ptr(&ppc64_tlb_batch);
+
+ for (i = 0; i < number; i++)
+ flush_hash_page(batch->vpn[i], batch->pte[i],
+ batch->psize, batch->ssize, local);
+ }
+}
+
+long hpte_insert_repeating(unsigned long hash, unsigned long vpn,
+ unsigned long pa, unsigned long rflags,
+ unsigned long vflags, int psize, int ssize)
+{
+ unsigned long hpte_group;
+ long slot;
+
+repeat:
+ hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+
+ /* Insert into the hash table, primary slot */
+ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags, vflags,
+ psize, psize, ssize);
+
+ /* Primary is full, try the secondary */
+ if (unlikely(slot == -1)) {
+ hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP;
+ slot = mmu_hash_ops.hpte_insert(hpte_group, vpn, pa, rflags,
+ vflags | HPTE_V_SECONDARY,
+ psize, psize, ssize);
+ if (slot == -1) {
+ if (mftb() & 0x1)
+ hpte_group = (hash & htab_hash_mask) *
+ HPTES_PER_GROUP;
+
+ mmu_hash_ops.hpte_remove(hpte_group);
+ goto repeat;
+ }
+ }
+
+ return slot;
+}
+
+void hpt_clear_stress(void)
+{
+ int cpu = raw_smp_processor_id();
+ int g;
+
+ for (g = 0; g < stress_nr_groups(); g++) {
+ unsigned long last_group;
+ last_group = stress_hpt_struct[cpu].last_group[g];
+
+ if (last_group != -1UL) {
+ int i;
+ for (i = 0; i < HPTES_PER_GROUP; i++) {
+ if (mmu_hash_ops.hpte_remove(last_group) == -1)
+ break;
+ }
+ stress_hpt_struct[cpu].last_group[g] = -1;
+ }
+ }
+}
+
+void hpt_do_stress(unsigned long ea, unsigned long hpte_group)
+{
+ unsigned long last_group;
+ int cpu = raw_smp_processor_id();
+
+ last_group = stress_hpt_struct[cpu].last_group[stress_nr_groups() - 1];
+ if (hpte_group == last_group)
+ return;
+
+ if (last_group != -1UL) {
+ int i;
+ /*
+ * Concurrent CPUs might be inserting into this group, so
+ * give up after a number of iterations, to prevent a live
+ * lock.
+ */
+ for (i = 0; i < HPTES_PER_GROUP; i++) {
+ if (mmu_hash_ops.hpte_remove(last_group) == -1)
+ break;
+ }
+ stress_hpt_struct[cpu].last_group[stress_nr_groups() - 1] = -1;
+ }
+
+ if (ea >= PAGE_OFFSET) {
+ /*
+ * We would really like to prefetch to get the TLB loaded, then
+ * remove the PTE before returning from fault interrupt, to
+ * increase the hash fault rate.
+ *
+ * Unfortunately QEMU TCG does not model the TLB in a way that
+ * makes this possible, and systemsim (mambo) emulator does not
+ * bring in TLBs with prefetches (although loads/stores do
+ * work for non-CI PTEs).
+ *
+ * So remember this PTE and clear it on the next hash fault.
+ */
+ memmove(&stress_hpt_struct[cpu].last_group[1],
+ &stress_hpt_struct[cpu].last_group[0],
+ (stress_nr_groups() - 1) * sizeof(unsigned long));
+ stress_hpt_struct[cpu].last_group[0] = hpte_group;
+ }
+}
+
+void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base,
+ phys_addr_t first_memblock_size)
+{
+ /*
+ * We don't currently support the first MEMBLOCK not mapping 0
+ * physical on those processors
+ */
+ BUG_ON(first_memblock_base != 0);
+
+ /*
+ * On virtualized systems the first entry is our RMA region aka VRMA,
+ * non-virtualized 64-bit hash MMU systems don't have a limitation
+ * on real mode access.
+ *
+ * For guests on platforms before POWER9, we clamp the it limit to 1G
+ * to avoid some funky things such as RTAS bugs etc...
+ *
+ * On POWER9 we limit to 1TB in case the host erroneously told us that
+ * the RMA was >1TB. Effective address bits 0:23 are treated as zero
+ * (meaning the access is aliased to zero i.e. addr = addr % 1TB)
+ * for virtual real mode addressing and so it doesn't make sense to
+ * have an area larger than 1TB as it can't be addressed.
+ */
+ if (!early_cpu_has_feature(CPU_FTR_HVMODE)) {
+ ppc64_rma_size = first_memblock_size;
+ if (!early_cpu_has_feature(CPU_FTR_ARCH_300))
+ ppc64_rma_size = min_t(u64, ppc64_rma_size, 0x40000000);
+ else
+ ppc64_rma_size = min_t(u64, ppc64_rma_size,
+ 1UL << SID_SHIFT_1T);
+
+ /* Finally limit subsequent allocations */
+ memblock_set_current_limit(ppc64_rma_size);
+ } else {
+ ppc64_rma_size = ULONG_MAX;
+ }
+}
+
+#ifdef CONFIG_DEBUG_FS
+
+static int hpt_order_get(void *data, u64 *val)
+{
+ *val = ppc64_pft_size;
+ return 0;
+}
+
+static int hpt_order_set(void *data, u64 val)
+{
+ int ret;
+
+ if (!mmu_hash_ops.resize_hpt)
+ return -ENODEV;
+
+ cpus_read_lock();
+ ret = mmu_hash_ops.resize_hpt(val);
+ cpus_read_unlock();
+
+ return ret;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(fops_hpt_order, hpt_order_get, hpt_order_set, "%llu\n");
+
+static int __init hash64_debugfs(void)
+{
+ debugfs_create_file("hpt_order", 0600, arch_debugfs_dir, NULL,
+ &fops_hpt_order);
+ return 0;
+}
+machine_device_initcall(pseries, hash64_debugfs);
+#endif /* CONFIG_DEBUG_FS */
+
+void __init print_system_hash_info(void)
+{
+ pr_info("ppc64_pft_size = 0x%llx\n", ppc64_pft_size);
+
+ if (htab_hash_mask)
+ pr_info("htab_hash_mask = 0x%lx\n", htab_hash_mask);
+}
+
+unsigned long arch_randomize_brk(struct mm_struct *mm)
+{
+ /*
+ * If we are using 1TB segments and we are allowed to randomise
+ * the heap, we can put it above 1TB so it is backed by a 1TB
+ * segment. Otherwise the heap will be in the bottom 1TB
+ * which always uses 256MB segments and this may result in a
+ * performance penalty.
+ */
+ if (is_32bit_task())
+ return randomize_page(mm->brk, SZ_32M);
+ else if (!radix_enabled() && mmu_highuser_ssize == MMU_SEGSIZE_1T)
+ return randomize_page(max_t(unsigned long, mm->brk, SZ_1T), SZ_1G);
+ else
+ return randomize_page(mm->brk, SZ_1G);
+}
diff --git a/arch/powerpc/mm/book3s64/hugetlbpage.c b/arch/powerpc/mm/book3s64/hugetlbpage.c
new file mode 100644
index 000000000000..2bcbbf9d85ac
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/hugetlbpage.c
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PPC64 Huge TLB Page Support for hash based MMUs (POWER4 and later)
+ *
+ * Copyright (C) 2003 David Gibson, IBM Corporation.
+ *
+ * Based on the IA-32 version:
+ * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
+ */
+
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <asm/cacheflush.h>
+#include <asm/machdep.h>
+
+unsigned int hpage_shift;
+EXPORT_SYMBOL(hpage_shift);
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
+ pte_t *ptep, unsigned long trap, unsigned long flags,
+ int ssize, unsigned int shift, unsigned int mmu_psize)
+{
+ real_pte_t rpte;
+ unsigned long vpn;
+ unsigned long old_pte, new_pte;
+ unsigned long rflags, pa;
+ long slot, offset;
+
+ BUG_ON(shift != mmu_psize_defs[mmu_psize].shift);
+
+ /* Search the Linux page table for a match with va */
+ vpn = hpt_vpn(ea, vsid, ssize);
+
+ /*
+ * At this point, we have a pte (old_pte) which can be used to build
+ * or update an HPTE. There are 2 cases:
+ *
+ * 1. There is a valid (present) pte with no associated HPTE (this is
+ * the most common case)
+ * 2. There is a valid (present) pte with an associated HPTE. The
+ * current values of the pp bits in the HPTE prevent access
+ * because we are doing software DIRTY bit management and the
+ * page is currently not DIRTY.
+ */
+
+
+ do {
+ old_pte = pte_val(*ptep);
+ /* If PTE busy, retry the access */
+ if (unlikely(old_pte & H_PAGE_BUSY))
+ return 0;
+ /* If PTE permissions don't match, take page fault */
+ if (unlikely(!check_pte_access(access, old_pte)))
+ return 1;
+ /*
+ * If hash-4k, hugepages use seeral contiguous PxD entries
+ * so bail out and let mm make the page young or dirty
+ */
+ if (IS_ENABLED(CONFIG_PPC_4K_PAGES)) {
+ if (!(old_pte & _PAGE_ACCESSED))
+ return 1;
+ if ((access & _PAGE_WRITE) && !(old_pte & _PAGE_DIRTY))
+ return 1;
+ }
+
+ /*
+ * Try to lock the PTE, add ACCESSED and DIRTY if it was
+ * a write access
+ */
+ new_pte = old_pte | H_PAGE_BUSY | _PAGE_ACCESSED;
+ if (access & _PAGE_WRITE)
+ new_pte |= _PAGE_DIRTY;
+ } while(!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
+
+ /* Make sure this is a hugetlb entry */
+ if (old_pte & H_PAGE_THP_HUGE)
+ return 0;
+
+ rflags = htab_convert_pte_flags(new_pte, flags);
+ if (unlikely(mmu_psize == MMU_PAGE_16G))
+ offset = PTRS_PER_PUD;
+ else
+ offset = PTRS_PER_PMD;
+ rpte = __real_pte(__pte(old_pte), ptep, offset);
+
+ if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
+ /*
+ * No CPU has hugepages but lacks no execute, so we
+ * don't need to worry about that case
+ */
+ rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap);
+
+ /* Check if pte already has an hpte (case 2) */
+ if (unlikely(old_pte & H_PAGE_HASHPTE)) {
+ /* There MIGHT be an HPTE for this pte */
+ unsigned long gslot;
+
+ gslot = pte_get_hash_gslot(vpn, shift, ssize, rpte, 0);
+ if (mmu_hash_ops.hpte_updatepp(gslot, rflags, vpn, mmu_psize,
+ mmu_psize, ssize, flags) == -1)
+ old_pte &= ~_PAGE_HPTEFLAGS;
+ }
+
+ if (likely(!(old_pte & H_PAGE_HASHPTE))) {
+ unsigned long hash = hpt_hash(vpn, shift, ssize);
+
+ pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
+
+ /* clear HPTE slot informations in new PTE */
+ new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
+
+ slot = hpte_insert_repeating(hash, vpn, pa, rflags, 0,
+ mmu_psize, ssize);
+
+ /*
+ * Hypervisor failure. Restore old pte and return -1
+ * similar to __hash_page_*
+ */
+ if (unlikely(slot == -2)) {
+ *ptep = __pte(old_pte);
+ hash_failure_debug(ea, access, vsid, trap, ssize,
+ mmu_psize, mmu_psize, old_pte);
+ return -1;
+ }
+
+ new_pte |= pte_set_hidx(ptep, rpte, 0, slot, offset);
+ }
+
+ /*
+ * No need to use ldarx/stdcx here
+ */
+ *ptep = __pte(new_pte & ~H_PAGE_BUSY);
+ return 0;
+}
+#endif
+
+pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep)
+{
+ unsigned long pte_val;
+ /*
+ * Clear the _PAGE_PRESENT so that no hardware parallel update is
+ * possible. Also keep the pte_present true so that we don't take
+ * wrong fault.
+ */
+ pte_val = pte_update(vma->vm_mm, addr, ptep,
+ _PAGE_PRESENT, _PAGE_INVALID, 1);
+
+ return __pte(pte_val);
+}
+
+void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
+ pte_t *ptep, pte_t old_pte, pte_t pte)
+{
+ unsigned long psize;
+
+ if (radix_enabled())
+ return radix__huge_ptep_modify_prot_commit(vma, addr, ptep,
+ old_pte, pte);
+
+ psize = huge_page_size(hstate_vma(vma));
+ set_huge_pte_at(vma->vm_mm, addr, ptep, pte, psize);
+}
+
+void __init hugetlbpage_init_defaultsize(void)
+{
+ /* Set default large page size. Currently, we pick 16M or 1M
+ * depending on what is available
+ */
+ if (mmu_psize_defs[MMU_PAGE_16M].shift)
+ hpage_shift = mmu_psize_defs[MMU_PAGE_16M].shift;
+ else if (mmu_psize_defs[MMU_PAGE_1M].shift)
+ hpage_shift = mmu_psize_defs[MMU_PAGE_1M].shift;
+ else if (mmu_psize_defs[MMU_PAGE_2M].shift)
+ hpage_shift = mmu_psize_defs[MMU_PAGE_2M].shift;
+}
diff --git a/arch/powerpc/mm/book3s64/internal.h b/arch/powerpc/mm/book3s64/internal.h
new file mode 100644
index 000000000000..a57a25f06a21
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/internal.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef ARCH_POWERPC_MM_BOOK3S64_INTERNAL_H
+#define ARCH_POWERPC_MM_BOOK3S64_INTERNAL_H
+
+#include <linux/jump_label.h>
+
+extern bool stress_slb_enabled;
+
+DECLARE_STATIC_KEY_FALSE(stress_slb_key);
+
+static inline bool stress_slb(void)
+{
+ return static_branch_unlikely(&stress_slb_key);
+}
+
+extern bool stress_hpt_enabled;
+
+DECLARE_STATIC_KEY_FALSE(stress_hpt_key);
+
+static inline bool stress_hpt(void)
+{
+ return static_branch_unlikely(&stress_hpt_key);
+}
+
+void hpt_do_stress(unsigned long ea, unsigned long hpte_group);
+
+void slb_setup_new_exec(void);
+
+void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush);
+
+#endif /* ARCH_POWERPC_MM_BOOK3S64_INTERNAL_H */
diff --git a/arch/powerpc/mm/book3s64/iommu_api.c b/arch/powerpc/mm/book3s64/iommu_api.c
new file mode 100644
index 000000000000..c0e8d597e4cb
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/iommu_api.c
@@ -0,0 +1,402 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * IOMMU helpers in MMU context.
+ *
+ * Copyright (C) 2015 IBM Corp. <aik@ozlabs.ru>
+ */
+
+#include <linux/sched/signal.h>
+#include <linux/slab.h>
+#include <linux/rculist.h>
+#include <linux/vmalloc.h>
+#include <linux/mutex.h>
+#include <linux/migrate.h>
+#include <linux/hugetlb.h>
+#include <linux/swap.h>
+#include <linux/sizes.h>
+#include <linux/mm.h>
+#include <asm/mmu_context.h>
+#include <asm/pte-walk.h>
+#include <linux/mm_inline.h>
+
+static DEFINE_MUTEX(mem_list_mutex);
+
+#define MM_IOMMU_TABLE_GROUP_PAGE_DIRTY 0x1
+#define MM_IOMMU_TABLE_GROUP_PAGE_MASK ~(SZ_4K - 1)
+
+struct mm_iommu_table_group_mem_t {
+ struct list_head next;
+ struct rcu_head rcu;
+ unsigned long used;
+ atomic64_t mapped;
+ unsigned int pageshift;
+ u64 ua; /* userspace address */
+ u64 entries; /* number of entries in hpas/hpages[] */
+ /*
+ * in mm_iommu_get we temporarily use this to store
+ * struct page address.
+ *
+ * We need to convert ua to hpa in real mode. Make it
+ * simpler by storing physical address.
+ */
+ union {
+ struct page **hpages; /* vmalloc'ed */
+ phys_addr_t *hpas;
+ };
+#define MM_IOMMU_TABLE_INVALID_HPA ((uint64_t)-1)
+ u64 dev_hpa; /* Device memory base address */
+};
+
+bool mm_iommu_preregistered(struct mm_struct *mm)
+{
+ return !list_empty(&mm->context.iommu_group_mem_list);
+}
+EXPORT_SYMBOL_GPL(mm_iommu_preregistered);
+
+static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua,
+ unsigned long entries, unsigned long dev_hpa,
+ struct mm_iommu_table_group_mem_t **pmem)
+{
+ struct mm_iommu_table_group_mem_t *mem, *mem2;
+ long i, ret, locked_entries = 0, pinned = 0;
+ unsigned int pageshift;
+ unsigned long entry, chunk;
+
+ if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) {
+ ret = account_locked_vm(mm, entries, true);
+ if (ret)
+ return ret;
+
+ locked_entries = entries;
+ }
+
+ mem = kzalloc(sizeof(*mem), GFP_KERNEL);
+ if (!mem) {
+ ret = -ENOMEM;
+ goto unlock_exit;
+ }
+
+ if (dev_hpa != MM_IOMMU_TABLE_INVALID_HPA) {
+ mem->pageshift = __ffs(dev_hpa | (entries << PAGE_SHIFT));
+ mem->dev_hpa = dev_hpa;
+ goto good_exit;
+ }
+ mem->dev_hpa = MM_IOMMU_TABLE_INVALID_HPA;
+
+ /*
+ * For a starting point for a maximum page size calculation
+ * we use @ua and @entries natural alignment to allow IOMMU pages
+ * smaller than huge pages but still bigger than PAGE_SIZE.
+ */
+ mem->pageshift = __ffs(ua | (entries << PAGE_SHIFT));
+ mem->hpas = vzalloc(array_size(entries, sizeof(mem->hpas[0])));
+ if (!mem->hpas) {
+ kfree(mem);
+ ret = -ENOMEM;
+ goto unlock_exit;
+ }
+
+ mmap_read_lock(mm);
+ chunk = (1UL << (PAGE_SHIFT + MAX_PAGE_ORDER)) /
+ sizeof(struct vm_area_struct *);
+ chunk = min(chunk, entries);
+ for (entry = 0; entry < entries; entry += chunk) {
+ unsigned long n = min(entries - entry, chunk);
+
+ ret = pin_user_pages(ua + (entry << PAGE_SHIFT), n,
+ FOLL_WRITE | FOLL_LONGTERM,
+ mem->hpages + entry);
+ if (ret == n) {
+ pinned += n;
+ continue;
+ }
+ if (ret > 0)
+ pinned += ret;
+ break;
+ }
+ mmap_read_unlock(mm);
+ if (pinned != entries) {
+ if (!ret)
+ ret = -EFAULT;
+ goto free_exit;
+ }
+
+good_exit:
+ atomic64_set(&mem->mapped, 1);
+ mem->used = 1;
+ mem->ua = ua;
+ mem->entries = entries;
+
+ mutex_lock(&mem_list_mutex);
+
+ list_for_each_entry_rcu(mem2, &mm->context.iommu_group_mem_list, next,
+ lockdep_is_held(&mem_list_mutex)) {
+ /* Overlap? */
+ if ((mem2->ua < (ua + (entries << PAGE_SHIFT))) &&
+ (ua < (mem2->ua +
+ (mem2->entries << PAGE_SHIFT)))) {
+ ret = -EINVAL;
+ mutex_unlock(&mem_list_mutex);
+ goto free_exit;
+ }
+ }
+
+ if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) {
+ /*
+ * Allow to use larger than 64k IOMMU pages. Only do that
+ * if we are backed by hugetlb. Skip device memory as it is not
+ * backed with page structs.
+ */
+ pageshift = PAGE_SHIFT;
+ for (i = 0; i < entries; ++i) {
+ struct page *page = mem->hpages[i];
+
+ if ((mem->pageshift > PAGE_SHIFT) && PageHuge(page))
+ pageshift = page_shift(compound_head(page));
+ mem->pageshift = min(mem->pageshift, pageshift);
+ /*
+ * We don't need struct page reference any more, switch
+ * to physical address.
+ */
+ mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT;
+ }
+ }
+
+ list_add_rcu(&mem->next, &mm->context.iommu_group_mem_list);
+
+ mutex_unlock(&mem_list_mutex);
+
+ *pmem = mem;
+
+ return 0;
+
+free_exit:
+ /* free the references taken */
+ unpin_user_pages(mem->hpages, pinned);
+
+ vfree(mem->hpas);
+ kfree(mem);
+
+unlock_exit:
+ account_locked_vm(mm, locked_entries, false);
+
+ return ret;
+}
+
+long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries,
+ struct mm_iommu_table_group_mem_t **pmem)
+{
+ return mm_iommu_do_alloc(mm, ua, entries, MM_IOMMU_TABLE_INVALID_HPA,
+ pmem);
+}
+EXPORT_SYMBOL_GPL(mm_iommu_new);
+
+long mm_iommu_newdev(struct mm_struct *mm, unsigned long ua,
+ unsigned long entries, unsigned long dev_hpa,
+ struct mm_iommu_table_group_mem_t **pmem)
+{
+ return mm_iommu_do_alloc(mm, ua, entries, dev_hpa, pmem);
+}
+EXPORT_SYMBOL_GPL(mm_iommu_newdev);
+
+static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t *mem)
+{
+ long i;
+ struct page *page = NULL;
+
+ if (!mem->hpas)
+ return;
+
+ for (i = 0; i < mem->entries; ++i) {
+ if (!mem->hpas[i])
+ continue;
+
+ page = pfn_to_page(mem->hpas[i] >> PAGE_SHIFT);
+ if (!page)
+ continue;
+
+ if (mem->hpas[i] & MM_IOMMU_TABLE_GROUP_PAGE_DIRTY)
+ SetPageDirty(page);
+
+ unpin_user_page(page);
+
+ mem->hpas[i] = 0;
+ }
+}
+
+static void mm_iommu_do_free(struct mm_iommu_table_group_mem_t *mem)
+{
+
+ mm_iommu_unpin(mem);
+ vfree(mem->hpas);
+ kfree(mem);
+}
+
+static void mm_iommu_free(struct rcu_head *head)
+{
+ struct mm_iommu_table_group_mem_t *mem = container_of(head,
+ struct mm_iommu_table_group_mem_t, rcu);
+
+ mm_iommu_do_free(mem);
+}
+
+static void mm_iommu_release(struct mm_iommu_table_group_mem_t *mem)
+{
+ list_del_rcu(&mem->next);
+ call_rcu(&mem->rcu, mm_iommu_free);
+}
+
+long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem)
+{
+ long ret = 0;
+ unsigned long unlock_entries = 0;
+
+ mutex_lock(&mem_list_mutex);
+
+ if (mem->used == 0) {
+ ret = -ENOENT;
+ goto unlock_exit;
+ }
+
+ --mem->used;
+ /* There are still users, exit */
+ if (mem->used)
+ goto unlock_exit;
+
+ /* Are there still mappings? */
+ if (atomic64_cmpxchg(&mem->mapped, 1, 0) != 1) {
+ ++mem->used;
+ ret = -EBUSY;
+ goto unlock_exit;
+ }
+
+ if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
+ unlock_entries = mem->entries;
+
+ /* @mapped became 0 so now mappings are disabled, release the region */
+ mm_iommu_release(mem);
+
+unlock_exit:
+ mutex_unlock(&mem_list_mutex);
+
+ account_locked_vm(mm, unlock_entries, false);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(mm_iommu_put);
+
+struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm,
+ unsigned long ua, unsigned long size)
+{
+ struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
+ if ((mem->ua <= ua) &&
+ (ua + size <= mem->ua +
+ (mem->entries << PAGE_SHIFT))) {
+ ret = mem;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(mm_iommu_lookup);
+
+struct mm_iommu_table_group_mem_t *mm_iommu_get(struct mm_struct *mm,
+ unsigned long ua, unsigned long entries)
+{
+ struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
+
+ mutex_lock(&mem_list_mutex);
+
+ list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next,
+ lockdep_is_held(&mem_list_mutex)) {
+ if ((mem->ua == ua) && (mem->entries == entries)) {
+ ret = mem;
+ ++mem->used;
+ break;
+ }
+ }
+
+ mutex_unlock(&mem_list_mutex);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(mm_iommu_get);
+
+long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
+ unsigned long ua, unsigned int pageshift, unsigned long *hpa)
+{
+ const long entry = (ua - mem->ua) >> PAGE_SHIFT;
+ u64 *va;
+
+ if (entry >= mem->entries)
+ return -EFAULT;
+
+ if (pageshift > mem->pageshift)
+ return -EFAULT;
+
+ if (!mem->hpas) {
+ *hpa = mem->dev_hpa + (ua - mem->ua);
+ return 0;
+ }
+
+ va = &mem->hpas[entry];
+ *hpa = (*va & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa);
+
+bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa,
+ unsigned int pageshift, unsigned long *size)
+{
+ struct mm_iommu_table_group_mem_t *mem;
+ unsigned long end;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) {
+ if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA)
+ continue;
+
+ end = mem->dev_hpa + (mem->entries << PAGE_SHIFT);
+ if ((mem->dev_hpa <= hpa) && (hpa < end)) {
+ /*
+ * Since the IOMMU page size might be bigger than
+ * PAGE_SIZE, the amount of preregistered memory
+ * starting from @hpa might be smaller than 1<<pageshift
+ * and the caller needs to distinguish this situation.
+ */
+ *size = min(1UL << pageshift, end - hpa);
+ return true;
+ }
+ }
+ rcu_read_unlock();
+
+ return false;
+}
+EXPORT_SYMBOL_GPL(mm_iommu_is_devmem);
+
+long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem)
+{
+ if (atomic64_inc_not_zero(&mem->mapped))
+ return 0;
+
+ /* Last mm_iommu_put() has been called, no more mappings allowed() */
+ return -ENXIO;
+}
+EXPORT_SYMBOL_GPL(mm_iommu_mapped_inc);
+
+void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem)
+{
+ atomic64_add_unless(&mem->mapped, -1, 1);
+}
+EXPORT_SYMBOL_GPL(mm_iommu_mapped_dec);
+
+void mm_iommu_init(struct mm_struct *mm)
+{
+ INIT_LIST_HEAD_RCU(&mm->context.iommu_group_mem_list);
+}
diff --git a/arch/powerpc/mm/book3s64/mmu_context.c b/arch/powerpc/mm/book3s64/mmu_context.c
new file mode 100644
index 000000000000..4e1e45420bd4
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/mmu_context.c
@@ -0,0 +1,349 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MMU context allocation for 64-bit kernels.
+ *
+ * Copyright (C) 2004 Anton Blanchard, IBM Corp. <anton@samba.org>
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/pkeys.h>
+#include <linux/spinlock.h>
+#include <linux/idr.h>
+#include <linux/export.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+#include <linux/cpu.h>
+
+#include <asm/mmu_context.h>
+#include <asm/pgalloc.h>
+
+#include "internal.h"
+
+static DEFINE_IDA(mmu_context_ida);
+
+static int alloc_context_id(int min_id, int max_id)
+{
+ return ida_alloc_range(&mmu_context_ida, min_id, max_id, GFP_KERNEL);
+}
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+void __init hash__reserve_context_id(int id)
+{
+ int result = ida_alloc_range(&mmu_context_ida, id, id, GFP_KERNEL);
+
+ WARN(result != id, "mmu: Failed to reserve context id %d (rc %d)\n", id, result);
+}
+
+int hash__alloc_context_id(void)
+{
+ unsigned long max;
+
+ if (mmu_has_feature(MMU_FTR_68_BIT_VA))
+ max = MAX_USER_CONTEXT;
+ else
+ max = MAX_USER_CONTEXT_65BIT_VA;
+
+ return alloc_context_id(MIN_USER_CONTEXT, max);
+}
+EXPORT_SYMBOL_GPL(hash__alloc_context_id);
+#endif
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+static int realloc_context_ids(mm_context_t *ctx)
+{
+ int i, id;
+
+ /*
+ * id 0 (aka. ctx->id) is special, we always allocate a new one, even if
+ * there wasn't one allocated previously (which happens in the exec
+ * case where ctx is newly allocated).
+ *
+ * We have to be a bit careful here. We must keep the existing ids in
+ * the array, so that we can test if they're non-zero to decide if we
+ * need to allocate a new one. However in case of error we must free the
+ * ids we've allocated but *not* any of the existing ones (or risk a
+ * UAF). That's why we decrement i at the start of the error handling
+ * loop, to skip the id that we just tested but couldn't reallocate.
+ */
+ for (i = 0; i < ARRAY_SIZE(ctx->extended_id); i++) {
+ if (i == 0 || ctx->extended_id[i]) {
+ id = hash__alloc_context_id();
+ if (id < 0)
+ goto error;
+
+ ctx->extended_id[i] = id;
+ }
+ }
+
+ /* The caller expects us to return id */
+ return ctx->id;
+
+error:
+ for (i--; i >= 0; i--) {
+ if (ctx->extended_id[i])
+ ida_free(&mmu_context_ida, ctx->extended_id[i]);
+ }
+
+ return id;
+}
+
+static int hash__init_new_context(struct mm_struct *mm)
+{
+ int index;
+
+ mm->context.hash_context = kmalloc(sizeof(struct hash_mm_context),
+ GFP_KERNEL);
+ if (!mm->context.hash_context)
+ return -ENOMEM;
+
+ /*
+ * The old code would re-promote on fork, we don't do that when using
+ * slices as it could cause problem promoting slices that have been
+ * forced down to 4K.
+ *
+ * For book3s we have MMU_NO_CONTEXT set to be ~0. Hence check
+ * explicitly against context.id == 0. This ensures that we properly
+ * initialize context slice details for newly allocated mm's (which will
+ * have id == 0) and don't alter context slice inherited via fork (which
+ * will have id != 0).
+ *
+ * We should not be calling init_new_context() on init_mm. Hence a
+ * check against 0 is OK.
+ */
+ if (mm->context.id == 0) {
+ memset(mm->context.hash_context, 0, sizeof(struct hash_mm_context));
+ slice_init_new_context_exec(mm);
+ } else {
+ /* This is fork. Copy hash_context details from current->mm */
+ memcpy(mm->context.hash_context, current->mm->context.hash_context, sizeof(struct hash_mm_context));
+#ifdef CONFIG_PPC_SUBPAGE_PROT
+ /* inherit subpage prot details if we have one. */
+ if (current->mm->context.hash_context->spt) {
+ mm->context.hash_context->spt = kmalloc(sizeof(struct subpage_prot_table),
+ GFP_KERNEL);
+ if (!mm->context.hash_context->spt) {
+ kfree(mm->context.hash_context);
+ return -ENOMEM;
+ }
+ }
+#endif
+ }
+
+ index = realloc_context_ids(&mm->context);
+ if (index < 0) {
+#ifdef CONFIG_PPC_SUBPAGE_PROT
+ kfree(mm->context.hash_context->spt);
+#endif
+ kfree(mm->context.hash_context);
+ return index;
+ }
+
+ pkey_mm_init(mm);
+ return index;
+}
+
+void hash__setup_new_exec(void)
+{
+ slice_setup_new_exec();
+
+ slb_setup_new_exec();
+}
+#else
+static inline int hash__init_new_context(struct mm_struct *mm)
+{
+ BUILD_BUG();
+ return 0;
+}
+#endif
+
+static int radix__init_new_context(struct mm_struct *mm)
+{
+ unsigned long rts_field;
+ int index, max_id;
+
+ max_id = (1 << mmu_pid_bits) - 1;
+ index = alloc_context_id(mmu_base_pid, max_id);
+ if (index < 0)
+ return index;
+
+ /*
+ * set the process table entry,
+ */
+ rts_field = radix__get_tree_size();
+ process_tb[index].prtb0 = cpu_to_be64(rts_field | __pa(mm->pgd) | RADIX_PGD_INDEX_SIZE);
+
+ /*
+ * Order the above store with subsequent update of the PID
+ * register (at which point HW can start loading/caching
+ * the entry) and the corresponding load by the MMU from
+ * the L2 cache.
+ */
+ asm volatile("ptesync;isync" : : : "memory");
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+ mm->context.hash_context = NULL;
+#endif
+
+ return index;
+}
+
+int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
+{
+ int index;
+
+ if (radix_enabled())
+ index = radix__init_new_context(mm);
+ else
+ index = hash__init_new_context(mm);
+
+ if (index < 0)
+ return index;
+
+ mm->context.id = index;
+
+ mm->context.pte_frag = NULL;
+ mm->context.pmd_frag = NULL;
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+ mm_iommu_init(mm);
+#endif
+ atomic_set(&mm->context.active_cpus, 0);
+ atomic_set(&mm->context.copros, 0);
+
+ return 0;
+}
+
+void __destroy_context(int context_id)
+{
+ ida_free(&mmu_context_ida, context_id);
+}
+EXPORT_SYMBOL_GPL(__destroy_context);
+
+static void destroy_contexts(mm_context_t *ctx)
+{
+ if (radix_enabled()) {
+ ida_free(&mmu_context_ida, ctx->id);
+ } else {
+#ifdef CONFIG_PPC_64S_HASH_MMU
+ int index, context_id;
+
+ for (index = 0; index < ARRAY_SIZE(ctx->extended_id); index++) {
+ context_id = ctx->extended_id[index];
+ if (context_id)
+ ida_free(&mmu_context_ida, context_id);
+ }
+ kfree(ctx->hash_context);
+#else
+ BUILD_BUG(); // radix_enabled() should be constant true
+#endif
+ }
+}
+
+static void pmd_frag_destroy(void *pmd_frag)
+{
+ int count;
+ struct ptdesc *ptdesc;
+
+ ptdesc = virt_to_ptdesc(pmd_frag);
+ /* drop all the pending references */
+ count = ((unsigned long)pmd_frag & ~PAGE_MASK) >> PMD_FRAG_SIZE_SHIFT;
+ /* We allow PTE_FRAG_NR fragments from a PTE page */
+ if (atomic_sub_and_test(PMD_FRAG_NR - count, &ptdesc->pt_frag_refcount)) {
+ pagetable_dtor(ptdesc);
+ pagetable_free(ptdesc);
+ }
+}
+
+static void destroy_pagetable_cache(struct mm_struct *mm)
+{
+ void *frag;
+
+ frag = mm->context.pte_frag;
+ if (frag)
+ pte_frag_destroy(frag);
+
+ frag = mm->context.pmd_frag;
+ if (frag)
+ pmd_frag_destroy(frag);
+ return;
+}
+
+void destroy_context(struct mm_struct *mm)
+{
+#ifdef CONFIG_SPAPR_TCE_IOMMU
+ WARN_ON_ONCE(!list_empty(&mm->context.iommu_group_mem_list));
+#endif
+ /*
+ * For tasks which were successfully initialized we end up calling
+ * arch_exit_mmap() which clears the process table entry. And
+ * arch_exit_mmap() is called before the required fullmm TLB flush
+ * which does a RIC=2 flush. Hence for an initialized task, we do clear
+ * any cached process table entries.
+ *
+ * The condition below handles the error case during task init. We have
+ * set the process table entry early and if we fail a task
+ * initialization, we need to ensure the process table entry is zeroed.
+ * We need not worry about process table entry caches because the task
+ * never ran with the PID value.
+ */
+ if (radix_enabled())
+ process_tb[mm->context.id].prtb0 = 0;
+ else
+ subpage_prot_free(mm);
+ destroy_contexts(&mm->context);
+ mm->context.id = MMU_NO_CONTEXT;
+}
+
+void arch_exit_mmap(struct mm_struct *mm)
+{
+ destroy_pagetable_cache(mm);
+
+ if (radix_enabled()) {
+ /*
+ * Radix doesn't have a valid bit in the process table
+ * entries. However we know that at least P9 implementation
+ * will avoid caching an entry with an invalid RTS field,
+ * and 0 is invalid. So this will do.
+ *
+ * This runs before the "fullmm" tlb flush in exit_mmap,
+ * which does a RIC=2 tlbie to clear the process table
+ * entry. See the "fullmm" comments in tlb-radix.c.
+ *
+ * No barrier required here after the store because
+ * this process will do the invalidate, which starts with
+ * ptesync.
+ */
+ process_tb[mm->context.id].prtb0 = 0;
+ }
+}
+
+#ifdef CONFIG_PPC_RADIX_MMU
+void radix__switch_mmu_context(struct mm_struct *prev, struct mm_struct *next)
+{
+ mtspr(SPRN_PID, next->context.id);
+ isync();
+}
+#endif
+
+/**
+ * cleanup_cpu_mmu_context - Clean up MMU details for this CPU (newly offlined)
+ *
+ * This clears the CPU from mm_cpumask for all processes, and then flushes the
+ * local TLB to ensure TLB coherency in case the CPU is onlined again.
+ *
+ * KVM guest translations are not necessarily flushed here. If KVM started
+ * using mm_cpumask or the Linux APIs which do, this would have to be resolved.
+ */
+#ifdef CONFIG_HOTPLUG_CPU
+void cleanup_cpu_mmu_context(void)
+{
+ int cpu = smp_processor_id();
+
+ clear_tasks_mm_cpumask(cpu);
+ tlbiel_all();
+}
+#endif
diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
new file mode 100644
index 000000000000..c9431ae7f78a
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -0,0 +1,664 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
+ */
+
+#include <linux/sched.h>
+#include <linux/mm_types.h>
+#include <linux/memblock.h>
+#include <linux/memremap.h>
+#include <linux/pkeys.h>
+#include <linux/debugfs.h>
+#include <linux/proc_fs.h>
+
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+#include <asm/trace.h>
+#include <asm/powernv.h>
+#include <asm/firmware.h>
+#include <asm/ultravisor.h>
+#include <asm/kexec.h>
+
+#include <mm/mmu_decl.h>
+#include <trace/events/thp.h>
+
+#include "internal.h"
+
+struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
+EXPORT_SYMBOL_GPL(mmu_psize_defs);
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+int mmu_vmemmap_psize = MMU_PAGE_4K;
+#endif
+
+unsigned long __pmd_frag_nr;
+EXPORT_SYMBOL(__pmd_frag_nr);
+unsigned long __pmd_frag_size_shift;
+EXPORT_SYMBOL(__pmd_frag_size_shift);
+
+#ifdef CONFIG_KFENCE
+extern bool kfence_early_init;
+static int __init parse_kfence_early_init(char *arg)
+{
+ int val;
+
+ if (get_option(&arg, &val))
+ kfence_early_init = !!val;
+ return 0;
+}
+early_param("kfence.sample_interval", parse_kfence_early_init);
+#endif
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+/*
+ * This is called when relaxing access to a hugepage. It's also called in the page
+ * fault path when we don't hit any of the major fault cases, ie, a minor
+ * update of _PAGE_ACCESSED, _PAGE_DIRTY, etc... The generic code will have
+ * handled those two for us, we additionally deal with missing execute
+ * permission here on some processors
+ */
+int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
+ pmd_t *pmdp, pmd_t entry, int dirty)
+{
+ int changed;
+#ifdef CONFIG_DEBUG_VM
+ WARN_ON(!pmd_trans_huge(*pmdp));
+ assert_spin_locked(pmd_lockptr(vma->vm_mm, pmdp));
+#endif
+ changed = !pmd_same(*(pmdp), entry);
+ if (changed) {
+ /*
+ * We can use MMU_PAGE_2M here, because only radix
+ * path look at the psize.
+ */
+ __ptep_set_access_flags(vma, pmdp_ptep(pmdp),
+ pmd_pte(entry), address, MMU_PAGE_2M);
+ }
+ return changed;
+}
+
+int pudp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
+ pud_t *pudp, pud_t entry, int dirty)
+{
+ int changed;
+#ifdef CONFIG_DEBUG_VM
+ assert_spin_locked(pud_lockptr(vma->vm_mm, pudp));
+#endif
+ changed = !pud_same(*(pudp), entry);
+ if (changed) {
+ /*
+ * We can use MMU_PAGE_1G here, because only radix
+ * path look at the psize.
+ */
+ __ptep_set_access_flags(vma, pudp_ptep(pudp),
+ pud_pte(entry), address, MMU_PAGE_1G);
+ }
+ return changed;
+}
+
+
+int pmdp_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp)
+{
+ return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
+}
+
+int pudp_test_and_clear_young(struct vm_area_struct *vma,
+ unsigned long address, pud_t *pudp)
+{
+ return __pudp_test_and_clear_young(vma->vm_mm, address, pudp);
+}
+
+/*
+ * set a new huge pmd. We should not be called for updating
+ * an existing pmd entry. That should go via pmd_hugepage_update.
+ */
+void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t pmd)
+{
+#ifdef CONFIG_DEBUG_VM
+ /*
+ * Make sure hardware valid bit is not set. We don't do
+ * tlb flush for this update.
+ */
+
+ WARN_ON(pte_hw_valid(pmd_pte(*pmdp)) && !pte_protnone(pmd_pte(*pmdp)));
+ assert_spin_locked(pmd_lockptr(mm, pmdp));
+ WARN_ON(!(pmd_leaf(pmd)));
+#endif
+ trace_hugepage_set_pmd(addr, pmd_val(pmd));
+ return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
+}
+
+void set_pud_at(struct mm_struct *mm, unsigned long addr,
+ pud_t *pudp, pud_t pud)
+{
+#ifdef CONFIG_DEBUG_VM
+ /*
+ * Make sure hardware valid bit is not set. We don't do
+ * tlb flush for this update.
+ */
+
+ WARN_ON(pte_hw_valid(pud_pte(*pudp)));
+ assert_spin_locked(pud_lockptr(mm, pudp));
+ WARN_ON(!(pud_leaf(pud)));
+#endif
+ trace_hugepage_set_pud(addr, pud_val(pud));
+ return set_pte_at(mm, addr, pudp_ptep(pudp), pud_pte(pud));
+}
+
+static void do_serialize(void *arg)
+{
+ /* We've taken the IPI, so try to trim the mask while here */
+ if (radix_enabled()) {
+ struct mm_struct *mm = arg;
+ exit_lazy_flush_tlb(mm, false);
+ }
+}
+
+/*
+ * Serialize against __find_linux_pte() which does lock-less
+ * lookup in page tables with local interrupts disabled. For huge pages
+ * it casts pmd_t to pte_t. Since format of pte_t is different from
+ * pmd_t we want to prevent transit from pmd pointing to page table
+ * to pmd pointing to huge page (and back) while interrupts are disabled.
+ * We clear pmd to possibly replace it with page table pointer in
+ * different code paths. So make sure we wait for the parallel
+ * __find_linux_pte() to finish.
+ */
+void serialize_against_pte_lookup(struct mm_struct *mm)
+{
+ smp_mb();
+ smp_call_function_many(mm_cpumask(mm), do_serialize, mm, 1);
+}
+
+/*
+ * We use this to invalidate a pmdp entry before switching from a
+ * hugepte to regular pmd entry.
+ */
+pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
+ pmd_t *pmdp)
+{
+ unsigned long old_pmd;
+
+ VM_WARN_ON_ONCE(!pmd_present(*pmdp));
+ old_pmd = pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, _PAGE_INVALID);
+ flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
+ return __pmd(old_pmd);
+}
+
+pud_t pudp_invalidate(struct vm_area_struct *vma, unsigned long address,
+ pud_t *pudp)
+{
+ unsigned long old_pud;
+
+ VM_WARN_ON_ONCE(!pud_present(*pudp));
+ old_pud = pud_hugepage_update(vma->vm_mm, address, pudp, _PAGE_PRESENT, _PAGE_INVALID);
+ flush_pud_tlb_range(vma, address, address + HPAGE_PUD_SIZE);
+ return __pud(old_pud);
+}
+
+pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma,
+ unsigned long addr, pmd_t *pmdp, int full)
+{
+ pmd_t pmd;
+ VM_BUG_ON(addr & ~HPAGE_PMD_MASK);
+ VM_BUG_ON((pmd_present(*pmdp) && !pmd_trans_huge(*pmdp)) ||
+ !pmd_present(*pmdp));
+ pmd = pmdp_huge_get_and_clear(vma->vm_mm, addr, pmdp);
+ /*
+ * if it not a fullmm flush, then we can possibly end up converting
+ * this PMD pte entry to a regular level 0 PTE by a parallel page fault.
+ * Make sure we flush the tlb in this case.
+ */
+ if (!full)
+ flush_pmd_tlb_range(vma, addr, addr + HPAGE_PMD_SIZE);
+ return pmd;
+}
+
+pud_t pudp_huge_get_and_clear_full(struct vm_area_struct *vma,
+ unsigned long addr, pud_t *pudp, int full)
+{
+ pud_t pud;
+
+ VM_BUG_ON(addr & ~HPAGE_PMD_MASK);
+ VM_BUG_ON(!pud_present(*pudp));
+ pud = pudp_huge_get_and_clear(vma->vm_mm, addr, pudp);
+ /*
+ * if it not a fullmm flush, then we can possibly end up converting
+ * this PMD pte entry to a regular level 0 PTE by a parallel page fault.
+ * Make sure we flush the tlb in this case.
+ */
+ if (!full)
+ flush_pud_tlb_range(vma, addr, addr + HPAGE_PUD_SIZE);
+ return pud;
+}
+
+static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot)
+{
+ return __pmd(pmd_val(pmd) | pgprot_val(pgprot));
+}
+
+static pud_t pud_set_protbits(pud_t pud, pgprot_t pgprot)
+{
+ return __pud(pud_val(pud) | pgprot_val(pgprot));
+}
+
+/*
+ * At some point we should be able to get rid of
+ * pmd_mkhuge() and mk_huge_pmd() when we update all the
+ * other archs to mark the pmd huge in pfn_pmd()
+ */
+pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot)
+{
+ unsigned long pmdv;
+
+ pmdv = (pfn << PAGE_SHIFT) & PTE_RPN_MASK;
+
+ return __pmd_mkhuge(pmd_set_protbits(__pmd(pmdv), pgprot));
+}
+
+pud_t pfn_pud(unsigned long pfn, pgprot_t pgprot)
+{
+ unsigned long pudv;
+
+ pudv = (pfn << PAGE_SHIFT) & PTE_RPN_MASK;
+
+ return __pud_mkhuge(pud_set_protbits(__pud(pudv), pgprot));
+}
+
+pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
+{
+ unsigned long pmdv;
+
+ pmdv = pmd_val(pmd);
+ pmdv &= _HPAGE_CHG_MASK;
+ return pmd_set_protbits(__pmd(pmdv), newprot);
+}
+
+pud_t pud_modify(pud_t pud, pgprot_t newprot)
+{
+ unsigned long pudv;
+
+ pudv = pud_val(pud);
+ pudv &= _HPAGE_CHG_MASK;
+ return pud_set_protbits(__pud(pudv), newprot);
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+/* For use by kexec, called with MMU off */
+notrace void mmu_cleanup_all(void)
+{
+ if (radix_enabled())
+ radix__mmu_cleanup_all();
+ else if (mmu_hash_ops.hpte_clear_all)
+ mmu_hash_ops.hpte_clear_all();
+
+ reset_sprs();
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+int __meminit create_section_mapping(unsigned long start, unsigned long end,
+ int nid, pgprot_t prot)
+{
+ if (radix_enabled())
+ return radix__create_section_mapping(start, end, nid, prot);
+
+ return hash__create_section_mapping(start, end, nid, prot);
+}
+
+int __meminit remove_section_mapping(unsigned long start, unsigned long end)
+{
+ if (radix_enabled())
+ return radix__remove_section_mapping(start, end);
+
+ return hash__remove_section_mapping(start, end);
+}
+#endif /* CONFIG_MEMORY_HOTPLUG */
+
+void __init mmu_partition_table_init(void)
+{
+ unsigned long patb_size = 1UL << PATB_SIZE_SHIFT;
+ unsigned long ptcr;
+
+ /* Initialize the Partition Table with no entries */
+ partition_tb = memblock_alloc_or_panic(patb_size, patb_size);
+ ptcr = __pa(partition_tb) | (PATB_SIZE_SHIFT - 12);
+ set_ptcr_when_no_uv(ptcr);
+ powernv_set_nmmu_ptcr(ptcr);
+}
+
+static void flush_partition(unsigned int lpid, bool radix)
+{
+ if (radix) {
+ radix__flush_all_lpid(lpid);
+ radix__flush_all_lpid_guest(lpid);
+ } else {
+ asm volatile("ptesync" : : : "memory");
+ asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :
+ "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
+ /* do we need fixup here ?*/
+ asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+ trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 0);
+ }
+}
+
+void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
+ unsigned long dw1, bool flush)
+{
+ unsigned long old = be64_to_cpu(partition_tb[lpid].patb0);
+
+ /*
+ * When ultravisor is enabled, the partition table is stored in secure
+ * memory and can only be accessed doing an ultravisor call. However, we
+ * maintain a copy of the partition table in normal memory to allow Nest
+ * MMU translations to occur (for normal VMs).
+ *
+ * Therefore, here we always update partition_tb, regardless of whether
+ * we are running under an ultravisor or not.
+ */
+ partition_tb[lpid].patb0 = cpu_to_be64(dw0);
+ partition_tb[lpid].patb1 = cpu_to_be64(dw1);
+
+ /*
+ * If ultravisor is enabled, we do an ultravisor call to register the
+ * partition table entry (PATE), which also do a global flush of TLBs
+ * and partition table caches for the lpid. Otherwise, just do the
+ * flush. The type of flush (hash or radix) depends on what the previous
+ * use of the partition ID was, not the new use.
+ */
+ if (firmware_has_feature(FW_FEATURE_ULTRAVISOR)) {
+ uv_register_pate(lpid, dw0, dw1);
+ pr_info("PATE registered by ultravisor: dw0 = 0x%lx, dw1 = 0x%lx\n",
+ dw0, dw1);
+ } else if (flush) {
+ /*
+ * Boot does not need to flush, because MMU is off and each
+ * CPU does a tlbiel_all() before switching them on, which
+ * flushes everything.
+ */
+ flush_partition(lpid, (old & PATB_HR));
+ }
+}
+EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry);
+
+static pmd_t *get_pmd_from_cache(struct mm_struct *mm)
+{
+ void *pmd_frag, *ret;
+
+ if (PMD_FRAG_NR == 1)
+ return NULL;
+
+ spin_lock(&mm->page_table_lock);
+ ret = mm->context.pmd_frag;
+ if (ret) {
+ pmd_frag = ret + PMD_FRAG_SIZE;
+ /*
+ * If we have taken up all the fragments mark PTE page NULL
+ */
+ if (((unsigned long)pmd_frag & ~PAGE_MASK) == 0)
+ pmd_frag = NULL;
+ mm->context.pmd_frag = pmd_frag;
+ }
+ spin_unlock(&mm->page_table_lock);
+ return (pmd_t *)ret;
+}
+
+static pmd_t *__alloc_for_pmdcache(struct mm_struct *mm)
+{
+ void *ret = NULL;
+ struct ptdesc *ptdesc;
+ gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO;
+
+ if (mm == &init_mm)
+ gfp &= ~__GFP_ACCOUNT;
+ ptdesc = pagetable_alloc(gfp, 0);
+ if (!ptdesc)
+ return NULL;
+ if (!pagetable_pmd_ctor(mm, ptdesc)) {
+ pagetable_free(ptdesc);
+ return NULL;
+ }
+
+ atomic_set(&ptdesc->pt_frag_refcount, 1);
+
+ ret = ptdesc_address(ptdesc);
+ /*
+ * if we support only one fragment just return the
+ * allocated page.
+ */
+ if (PMD_FRAG_NR == 1)
+ return ret;
+
+ spin_lock(&mm->page_table_lock);
+ /*
+ * If we find ptdesc_page set, we return
+ * the allocated page with single fragment
+ * count.
+ */
+ if (likely(!mm->context.pmd_frag)) {
+ atomic_set(&ptdesc->pt_frag_refcount, PMD_FRAG_NR);
+ mm->context.pmd_frag = ret + PMD_FRAG_SIZE;
+ }
+ spin_unlock(&mm->page_table_lock);
+
+ return (pmd_t *)ret;
+}
+
+pmd_t *pmd_fragment_alloc(struct mm_struct *mm, unsigned long vmaddr)
+{
+ pmd_t *pmd;
+
+ pmd = get_pmd_from_cache(mm);
+ if (pmd)
+ return pmd;
+
+ return __alloc_for_pmdcache(mm);
+}
+
+void pmd_fragment_free(unsigned long *pmd)
+{
+ struct ptdesc *ptdesc = virt_to_ptdesc(pmd);
+
+ if (pagetable_is_reserved(ptdesc))
+ return free_reserved_ptdesc(ptdesc);
+
+ BUG_ON(atomic_read(&ptdesc->pt_frag_refcount) <= 0);
+ if (atomic_dec_and_test(&ptdesc->pt_frag_refcount)) {
+ pagetable_dtor(ptdesc);
+ pagetable_free(ptdesc);
+ }
+}
+
+static inline void pgtable_free(void *table, int index)
+{
+ switch (index) {
+ case PTE_INDEX:
+ pte_fragment_free(table, 0);
+ break;
+ case PMD_INDEX:
+ pmd_fragment_free(table);
+ break;
+ case PUD_INDEX:
+ __pud_free(table);
+ break;
+ /* We don't free pgd table via RCU callback */
+ default:
+ BUG();
+ }
+}
+
+void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int index)
+{
+ unsigned long pgf = (unsigned long)table;
+
+ BUG_ON(index > MAX_PGTABLE_INDEX_SIZE);
+ pgf |= index;
+ tlb_remove_table(tlb, (void *)pgf);
+}
+
+void __tlb_remove_table(void *_table)
+{
+ void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
+ unsigned int index = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
+
+ return pgtable_free(table, index);
+}
+
+#ifdef CONFIG_PROC_FS
+atomic_long_t direct_pages_count[MMU_PAGE_COUNT];
+
+void arch_report_meminfo(struct seq_file *m)
+{
+ /*
+ * Hash maps the memory with one size mmu_linear_psize.
+ * So don't bother to print these on hash
+ */
+ if (!radix_enabled())
+ return;
+ seq_printf(m, "DirectMap4k: %8lu kB\n",
+ atomic_long_read(&direct_pages_count[MMU_PAGE_4K]) << 2);
+ seq_printf(m, "DirectMap64k: %8lu kB\n",
+ atomic_long_read(&direct_pages_count[MMU_PAGE_64K]) << 6);
+ seq_printf(m, "DirectMap2M: %8lu kB\n",
+ atomic_long_read(&direct_pages_count[MMU_PAGE_2M]) << 11);
+ seq_printf(m, "DirectMap1G: %8lu kB\n",
+ atomic_long_read(&direct_pages_count[MMU_PAGE_1G]) << 20);
+}
+#endif /* CONFIG_PROC_FS */
+
+pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr,
+ pte_t *ptep)
+{
+ unsigned long pte_val;
+
+ /*
+ * Clear the _PAGE_PRESENT so that no hardware parallel update is
+ * possible. Also keep the pte_present true so that we don't take
+ * wrong fault.
+ */
+ pte_val = pte_update(vma->vm_mm, addr, ptep, _PAGE_PRESENT, _PAGE_INVALID, 0);
+
+ return __pte(pte_val);
+
+}
+
+void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
+ pte_t *ptep, pte_t old_pte, pte_t pte)
+{
+ if (radix_enabled())
+ return radix__ptep_modify_prot_commit(vma, addr,
+ ptep, old_pte, pte);
+ set_pte_at(vma->vm_mm, addr, ptep, pte);
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+/*
+ * For hash translation mode, we use the deposited table to store hash slot
+ * information and they are stored at PTRS_PER_PMD offset from related pmd
+ * location. Hence a pmd move requires deposit and withdraw.
+ *
+ * For radix translation with split pmd ptl, we store the deposited table in the
+ * pmd page. Hence if we have different pmd page we need to withdraw during pmd
+ * move.
+ *
+ * With hash we use deposited table always irrespective of anon or not.
+ * With radix we use deposited table only for anonymous mapping.
+ */
+int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
+ struct spinlock *old_pmd_ptl,
+ struct vm_area_struct *vma)
+{
+ if (radix_enabled())
+ return (new_pmd_ptl != old_pmd_ptl) && vma_is_anonymous(vma);
+
+ return true;
+}
+#endif
+
+/*
+ * Does the CPU support tlbie?
+ */
+bool tlbie_capable __read_mostly = IS_ENABLED(CONFIG_PPC_RADIX_BROADCAST_TLBIE);
+EXPORT_SYMBOL(tlbie_capable);
+
+/*
+ * Should tlbie be used for management of CPU TLBs, for kernel and process
+ * address spaces? tlbie may still be used for nMMU accelerators, and for KVM
+ * guest address spaces.
+ */
+bool tlbie_enabled __read_mostly = IS_ENABLED(CONFIG_PPC_RADIX_BROADCAST_TLBIE);
+
+static int __init setup_disable_tlbie(char *str)
+{
+ if (!radix_enabled()) {
+ pr_err("disable_tlbie: Unable to disable TLBIE with Hash MMU.\n");
+ return 1;
+ }
+
+ tlbie_capable = false;
+ tlbie_enabled = false;
+
+ return 1;
+}
+__setup("disable_tlbie", setup_disable_tlbie);
+
+static int __init pgtable_debugfs_setup(void)
+{
+ if (!tlbie_capable)
+ return 0;
+
+ /*
+ * There is no locking vs tlb flushing when changing this value.
+ * The tlb flushers will see one value or another, and use either
+ * tlbie or tlbiel with IPIs. In both cases the TLBs will be
+ * invalidated as expected.
+ */
+ debugfs_create_bool("tlbie_enabled", 0600,
+ arch_debugfs_dir,
+ &tlbie_enabled);
+
+ return 0;
+}
+arch_initcall(pgtable_debugfs_setup);
+
+#if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_ARCH_HAS_MEMREMAP_COMPAT_ALIGN)
+/*
+ * Override the generic version in mm/memremap.c.
+ *
+ * With hash translation, the direct-map range is mapped with just one
+ * page size selected by htab_init_page_sizes(). Consult
+ * mmu_psize_defs[] to determine the minimum page size alignment.
+*/
+unsigned long memremap_compat_align(void)
+{
+ if (!radix_enabled()) {
+ unsigned int shift = mmu_psize_defs[mmu_linear_psize].shift;
+ return max(SUBSECTION_SIZE, 1UL << shift);
+ }
+
+ return SUBSECTION_SIZE;
+}
+EXPORT_SYMBOL_GPL(memremap_compat_align);
+#endif
+
+pgprot_t vm_get_page_prot(vm_flags_t vm_flags)
+{
+ unsigned long prot;
+
+ /* Radix supports execute-only, but protection_map maps X -> RX */
+ if (!radix_enabled() && ((vm_flags & VM_ACCESS_FLAGS) == VM_EXEC))
+ vm_flags |= VM_READ;
+
+ prot = pgprot_val(protection_map[vm_flags & (VM_ACCESS_FLAGS | VM_SHARED)]);
+
+ if (vm_flags & VM_SAO)
+ prot |= _PAGE_SAO;
+
+#ifdef CONFIG_PPC_MEM_KEYS
+ prot |= vmflag_to_pte_pkey_bits(vm_flags);
+#endif
+
+ return __pgprot(prot);
+}
+EXPORT_SYMBOL(vm_get_page_prot);
diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c
new file mode 100644
index 000000000000..a974baf8f327
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/pkeys.c
@@ -0,0 +1,471 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * PowerPC Memory Protection Keys management
+ *
+ * Copyright 2017, Ram Pai, IBM Corporation.
+ */
+
+#include <asm/mman.h>
+#include <asm/mmu_context.h>
+#include <asm/mmu.h>
+#include <asm/setup.h>
+#include <asm/smp.h>
+#include <asm/firmware.h>
+
+#include <linux/pkeys.h>
+#include <linux/of_fdt.h>
+
+
+int num_pkey; /* Max number of pkeys supported */
+/*
+ * Keys marked in the reservation list cannot be allocated by userspace
+ */
+u32 reserved_allocation_mask __ro_after_init;
+
+/* Bits set for the initially allocated keys */
+static u32 initial_allocation_mask __ro_after_init;
+
+/*
+ * Even if we allocate keys with sys_pkey_alloc(), we need to make sure
+ * other thread still find the access denied using the same keys.
+ */
+u64 default_amr __ro_after_init = ~0x0UL;
+u64 default_iamr __ro_after_init = 0x5555555555555555UL;
+u64 default_uamor __ro_after_init;
+EXPORT_SYMBOL(default_amr);
+/*
+ * Key used to implement PROT_EXEC mmap. Denies READ/WRITE
+ * We pick key 2 because 0 is special key and 1 is reserved as per ISA.
+ */
+static int execute_only_key = 2;
+static bool pkey_execute_disable_supported;
+
+
+#define AMR_BITS_PER_PKEY 2
+#define AMR_RD_BIT 0x1UL
+#define AMR_WR_BIT 0x2UL
+#define IAMR_EX_BIT 0x1UL
+#define PKEY_REG_BITS (sizeof(u64) * 8)
+#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey+1) * AMR_BITS_PER_PKEY))
+
+static int __init dt_scan_storage_keys(unsigned long node,
+ const char *uname, int depth,
+ void *data)
+{
+ const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+ const __be32 *prop;
+ int *pkeys_total = (int *) data;
+
+ /* We are scanning "cpu" nodes only */
+ if (type == NULL || strcmp(type, "cpu") != 0)
+ return 0;
+
+ prop = of_get_flat_dt_prop(node, "ibm,processor-storage-keys", NULL);
+ if (!prop)
+ return 0;
+ *pkeys_total = be32_to_cpu(prop[0]);
+ return 1;
+}
+
+static int __init scan_pkey_feature(void)
+{
+ int ret;
+ int pkeys_total = 0;
+
+ /*
+ * Pkey is not supported with Radix translation.
+ */
+ if (early_radix_enabled())
+ return 0;
+
+ ret = of_scan_flat_dt(dt_scan_storage_keys, &pkeys_total);
+ if (ret == 0) {
+ /*
+ * Let's assume 32 pkeys on P8/P9 bare metal, if its not defined by device
+ * tree. We make this exception since some version of skiboot forgot to
+ * expose this property on power8/9.
+ */
+ if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+ unsigned long pvr = mfspr(SPRN_PVR);
+
+ if (PVR_VER(pvr) == PVR_POWER8 || PVR_VER(pvr) == PVR_POWER8E ||
+ PVR_VER(pvr) == PVR_POWER8NVL || PVR_VER(pvr) == PVR_POWER9 ||
+ PVR_VER(pvr) == PVR_HX_C2000)
+ pkeys_total = 32;
+ }
+ }
+
+#ifdef CONFIG_PPC_MEM_KEYS
+ /*
+ * Adjust the upper limit, based on the number of bits supported by
+ * arch-neutral code.
+ */
+ pkeys_total = min_t(int, pkeys_total,
+ ((ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) + 1));
+#endif
+ return pkeys_total;
+}
+
+void __init pkey_early_init_devtree(void)
+{
+ int pkeys_total, i;
+
+#ifdef CONFIG_PPC_MEM_KEYS
+ /*
+ * We define PKEY_DISABLE_EXECUTE in addition to the arch-neutral
+ * generic defines for PKEY_DISABLE_ACCESS and PKEY_DISABLE_WRITE.
+ * Ensure that the bits a distinct.
+ */
+ BUILD_BUG_ON(PKEY_DISABLE_EXECUTE &
+ (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
+
+ /*
+ * pkey_to_vmflag_bits() assumes that the pkey bits are contiguous
+ * in the vmaflag. Make sure that is really the case.
+ */
+ BUILD_BUG_ON(__builtin_clzl(ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) +
+ __builtin_popcountl(ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT)
+ != (sizeof(u64) * BITS_PER_BYTE));
+#endif
+ /*
+ * Only P7 and above supports SPRN_AMR update with MSR[PR] = 1
+ */
+ if (!early_cpu_has_feature(CPU_FTR_ARCH_206))
+ return;
+
+ /* scan the device tree for pkey feature */
+ pkeys_total = scan_pkey_feature();
+ if (!pkeys_total)
+ goto out;
+
+ /* Allow all keys to be modified by default */
+ default_uamor = ~0x0UL;
+
+ cur_cpu_spec->mmu_features |= MMU_FTR_PKEY;
+
+ /*
+ * The device tree cannot be relied to indicate support for
+ * execute_disable support. Instead we use a PVR check.
+ */
+ if (pvr_version_is(PVR_POWER7) || pvr_version_is(PVR_POWER7p))
+ pkey_execute_disable_supported = false;
+ else
+ pkey_execute_disable_supported = true;
+
+#ifdef CONFIG_PPC_4K_PAGES
+ /*
+ * The OS can manage only 8 pkeys due to its inability to represent them
+ * in the Linux 4K PTE. Mark all other keys reserved.
+ */
+ num_pkey = min(8, pkeys_total);
+#else
+ num_pkey = pkeys_total;
+#endif
+
+ if (unlikely(num_pkey <= execute_only_key) || !pkey_execute_disable_supported) {
+ /*
+ * Insufficient number of keys to support
+ * execute only key. Mark it unavailable.
+ */
+ execute_only_key = -1;
+ } else {
+ /*
+ * Mark the execute_only_pkey as not available for
+ * user allocation via pkey_alloc.
+ */
+ reserved_allocation_mask |= (0x1 << execute_only_key);
+
+ /*
+ * Deny READ/WRITE for execute_only_key.
+ * Allow execute in IAMR.
+ */
+ default_amr |= (0x3ul << pkeyshift(execute_only_key));
+ default_iamr &= ~(0x1ul << pkeyshift(execute_only_key));
+
+ /*
+ * Clear the uamor bits for this key.
+ */
+ default_uamor &= ~(0x3ul << pkeyshift(execute_only_key));
+ }
+
+ if (unlikely(num_pkey <= 3)) {
+ /*
+ * Insufficient number of keys to support
+ * KUAP/KUEP feature.
+ */
+ disable_kuep = true;
+ disable_kuap = true;
+ WARN(1, "Disabling kernel user protection due to low (%d) max supported keys\n", num_pkey);
+ } else {
+ /* handle key which is used by kernel for KAUP */
+ reserved_allocation_mask |= (0x1 << 3);
+ /*
+ * Mark access for kup_key in default amr so that
+ * we continue to operate with that AMR in
+ * copy_to/from_user().
+ */
+ default_amr &= ~(0x3ul << pkeyshift(3));
+ default_iamr &= ~(0x1ul << pkeyshift(3));
+ default_uamor &= ~(0x3ul << pkeyshift(3));
+ }
+
+ /*
+ * Allow access for only key 0. And prevent any other modification.
+ */
+ default_amr &= ~(0x3ul << pkeyshift(0));
+ default_iamr &= ~(0x1ul << pkeyshift(0));
+ default_uamor &= ~(0x3ul << pkeyshift(0));
+ /*
+ * key 0 is special in that we want to consider it an allocated
+ * key which is preallocated. We don't allow changing AMR bits
+ * w.r.t key 0. But one can pkey_free(key0)
+ */
+ initial_allocation_mask |= (0x1 << 0);
+
+ /*
+ * key 1 is recommended not to be used. PowerISA(3.0) page 1015,
+ * programming note.
+ */
+ reserved_allocation_mask |= (0x1 << 1);
+ default_uamor &= ~(0x3ul << pkeyshift(1));
+
+ /*
+ * Prevent the usage of OS reserved keys. Update UAMOR
+ * for those keys. Also mark the rest of the bits in the
+ * 32 bit mask as reserved.
+ */
+ for (i = num_pkey; i < 32 ; i++) {
+ reserved_allocation_mask |= (0x1 << i);
+ default_uamor &= ~(0x3ul << pkeyshift(i));
+ }
+ /*
+ * Prevent the allocation of reserved keys too.
+ */
+ initial_allocation_mask |= reserved_allocation_mask;
+
+ pr_info("Enabling pkeys with max key count %d\n", num_pkey);
+out:
+ /*
+ * Setup uamor on boot cpu
+ */
+ mtspr(SPRN_UAMOR, default_uamor);
+
+ return;
+}
+
+#ifdef CONFIG_PPC_KUEP
+void setup_kuep(bool disabled)
+{
+ if (disabled)
+ return;
+ /*
+ * On hash if PKEY feature is not enabled, disable KUAP too.
+ */
+ if (!early_radix_enabled() && !early_mmu_has_feature(MMU_FTR_PKEY))
+ return;
+
+ if (smp_processor_id() == boot_cpuid) {
+ pr_info("Activating Kernel Userspace Execution Prevention\n");
+ cur_cpu_spec->mmu_features |= MMU_FTR_BOOK3S_KUEP;
+ }
+
+ /*
+ * Radix always uses key0 of the IAMR to determine if an access is
+ * allowed. We set bit 0 (IBM bit 1) of key0, to prevent instruction
+ * fetch.
+ */
+ mtspr(SPRN_IAMR, AMR_KUEP_BLOCKED);
+ isync();
+}
+#endif
+
+#ifdef CONFIG_PPC_KUAP
+void setup_kuap(bool disabled)
+{
+ if (disabled)
+ return;
+ /*
+ * On hash if PKEY feature is not enabled, disable KUAP too.
+ */
+ if (!early_radix_enabled() && !early_mmu_has_feature(MMU_FTR_PKEY))
+ return;
+
+ if (smp_processor_id() == boot_cpuid) {
+ pr_info("Activating Kernel Userspace Access Prevention\n");
+ cur_cpu_spec->mmu_features |= MMU_FTR_KUAP;
+ }
+
+ /*
+ * Set the default kernel AMR values on all cpus.
+ */
+ mtspr(SPRN_AMR, AMR_KUAP_BLOCKED);
+ isync();
+}
+#endif
+
+#ifdef CONFIG_PPC_MEM_KEYS
+void pkey_mm_init(struct mm_struct *mm)
+{
+ if (!mmu_has_feature(MMU_FTR_PKEY))
+ return;
+ mm_pkey_allocation_map(mm) = initial_allocation_mask;
+ mm->context.execute_only_pkey = execute_only_key;
+}
+
+static inline void init_amr(int pkey, u8 init_bits)
+{
+ u64 new_amr_bits = (((u64)init_bits & 0x3UL) << pkeyshift(pkey));
+ u64 old_amr = current_thread_amr() & ~((u64)(0x3ul) << pkeyshift(pkey));
+
+ current->thread.regs->amr = old_amr | new_amr_bits;
+}
+
+static inline void init_iamr(int pkey, u8 init_bits)
+{
+ u64 new_iamr_bits = (((u64)init_bits & 0x1UL) << pkeyshift(pkey));
+ u64 old_iamr = current_thread_iamr() & ~((u64)(0x1ul) << pkeyshift(pkey));
+
+ if (!likely(pkey_execute_disable_supported))
+ return;
+
+ current->thread.regs->iamr = old_iamr | new_iamr_bits;
+}
+
+/*
+ * Set the access rights in AMR IAMR and UAMOR registers for @pkey to that
+ * specified in @init_val.
+ */
+int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
+ unsigned long init_val)
+{
+ u64 new_amr_bits = 0x0ul;
+ u64 new_iamr_bits = 0x0ul;
+ u64 pkey_bits, uamor_pkey_bits;
+
+ /*
+ * Check whether the key is disabled by UAMOR.
+ */
+ pkey_bits = 0x3ul << pkeyshift(pkey);
+ uamor_pkey_bits = (default_uamor & pkey_bits);
+
+ /*
+ * Both the bits in UAMOR corresponding to the key should be set
+ */
+ if (uamor_pkey_bits != pkey_bits)
+ return -EINVAL;
+
+ if (init_val & PKEY_DISABLE_EXECUTE) {
+ if (!pkey_execute_disable_supported)
+ return -EINVAL;
+ new_iamr_bits |= IAMR_EX_BIT;
+ }
+ init_iamr(pkey, new_iamr_bits);
+
+ /* Set the bits we need in AMR: */
+ if (init_val & PKEY_DISABLE_ACCESS)
+ new_amr_bits |= AMR_RD_BIT | AMR_WR_BIT;
+ else if (init_val & PKEY_DISABLE_WRITE)
+ new_amr_bits |= AMR_WR_BIT;
+
+ init_amr(pkey, new_amr_bits);
+ return 0;
+}
+
+int execute_only_pkey(struct mm_struct *mm)
+{
+ return mm->context.execute_only_pkey;
+}
+
+static inline bool vma_is_pkey_exec_only(struct vm_area_struct *vma)
+{
+ /* Do this check first since the vm_flags should be hot */
+ if ((vma->vm_flags & VM_ACCESS_FLAGS) != VM_EXEC)
+ return false;
+
+ return (vma_pkey(vma) == vma->vm_mm->context.execute_only_pkey);
+}
+
+/*
+ * This should only be called for *plain* mprotect calls.
+ */
+int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot,
+ int pkey)
+{
+ /*
+ * If the currently associated pkey is execute-only, but the requested
+ * protection is not execute-only, move it back to the default pkey.
+ */
+ if (vma_is_pkey_exec_only(vma) && (prot != PROT_EXEC))
+ return 0;
+
+ /*
+ * The requested protection is execute-only. Hence let's use an
+ * execute-only pkey.
+ */
+ if (prot == PROT_EXEC) {
+ pkey = execute_only_pkey(vma->vm_mm);
+ if (pkey > 0)
+ return pkey;
+ }
+
+ /* Nothing to override. */
+ return vma_pkey(vma);
+}
+
+static bool pkey_access_permitted(int pkey, bool write, bool execute)
+{
+ int pkey_shift;
+ u64 amr;
+
+ pkey_shift = pkeyshift(pkey);
+ if (execute)
+ return !(current_thread_iamr() & (IAMR_EX_BIT << pkey_shift));
+
+ amr = current_thread_amr();
+ if (write)
+ return !(amr & (AMR_WR_BIT << pkey_shift));
+
+ return !(amr & (AMR_RD_BIT << pkey_shift));
+}
+
+bool arch_pte_access_permitted(u64 pte, bool write, bool execute)
+{
+ if (!mmu_has_feature(MMU_FTR_PKEY))
+ return true;
+
+ return pkey_access_permitted(pte_to_pkey_bits(pte), write, execute);
+}
+
+/*
+ * We only want to enforce protection keys on the current thread because we
+ * effectively have no access to AMR/IAMR for other threads or any way to tell
+ * which AMR/IAMR in a threaded process we could use.
+ *
+ * So do not enforce things if the VMA is not from the current mm, or if we are
+ * in a kernel thread.
+ */
+bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write,
+ bool execute, bool foreign)
+{
+ if (!mmu_has_feature(MMU_FTR_PKEY))
+ return true;
+ /*
+ * Do not enforce our key-permissions on a foreign vma.
+ */
+ if (foreign || vma_is_foreign(vma))
+ return true;
+
+ return pkey_access_permitted(vma_pkey(vma), write, execute);
+}
+
+void arch_dup_pkeys(struct mm_struct *oldmm, struct mm_struct *mm)
+{
+ if (!mmu_has_feature(MMU_FTR_PKEY))
+ return;
+
+ /* Duplicate the oldmm pkey state in mm: */
+ mm_pkey_allocation_map(mm) = mm_pkey_allocation_map(oldmm);
+ mm->context.execute_only_pkey = oldmm->context.execute_only_pkey;
+}
+
+#endif /* CONFIG_PPC_MEM_KEYS */
diff --git a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
new file mode 100644
index 000000000000..35fd2a95be24
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/security.h>
+#include <asm/cacheflush.h>
+#include <asm/machdep.h>
+#include <asm/mman.h>
+#include <asm/tlb.h>
+
+void radix__flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
+{
+ int psize;
+ struct hstate *hstate = hstate_file(vma->vm_file);
+
+ psize = hstate_get_psize(hstate);
+ radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, psize);
+}
+
+void radix__local_flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
+{
+ int psize;
+ struct hstate *hstate = hstate_file(vma->vm_file);
+
+ psize = hstate_get_psize(hstate);
+ radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, psize);
+}
+
+void radix__flush_hugetlb_tlb_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end)
+{
+ int psize;
+ struct hstate *hstate = hstate_file(vma->vm_file);
+
+ psize = hstate_get_psize(hstate);
+ /*
+ * Flush PWC even if we get PUD_SIZE hugetlb invalidate to keep this simpler.
+ */
+ if (end - start >= PUD_SIZE)
+ radix__flush_tlb_pwc_range_psize(vma->vm_mm, start, end, psize);
+ else
+ radix__flush_tlb_range_psize(vma->vm_mm, start, end, psize);
+ mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end);
+}
+
+void radix__huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t old_pte, pte_t pte)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ unsigned long psize = huge_page_size(hstate_vma(vma));
+
+ /*
+ * POWER9 NMMU must flush the TLB after clearing the PTE before
+ * installing a PTE with more relaxed access permissions, see
+ * radix__ptep_set_access_flags.
+ */
+ if (!cpu_has_feature(CPU_FTR_ARCH_31) &&
+ is_pte_rw_upgrade(pte_val(old_pte), pte_val(pte)) &&
+ atomic_read(&mm->context.copros) > 0)
+ radix__flush_hugetlb_page(vma, addr);
+
+ set_huge_pte_at(vma->vm_mm, addr, ptep, pte, psize);
+}
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
new file mode 100644
index 000000000000..73977dbabcf2
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -0,0 +1,1694 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Page table handling routines for radix page table.
+ *
+ * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
+ */
+
+#define pr_fmt(fmt) "radix-mmu: " fmt
+
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/sched/mm.h>
+#include <linux/memblock.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/string_helpers.h>
+#include <linux/memory.h>
+#include <linux/kfence.h>
+
+#include <asm/pgalloc.h>
+#include <asm/mmu_context.h>
+#include <asm/dma.h>
+#include <asm/machdep.h>
+#include <asm/mmu.h>
+#include <asm/firmware.h>
+#include <asm/powernv.h>
+#include <asm/sections.h>
+#include <asm/smp.h>
+#include <asm/trace.h>
+#include <asm/uaccess.h>
+#include <asm/ultravisor.h>
+#include <asm/set_memory.h>
+#include <asm/kfence.h>
+
+#include <trace/events/thp.h>
+
+#include <mm/mmu_decl.h>
+
+unsigned int mmu_base_pid;
+
+static __ref void *early_alloc_pgtable(unsigned long size, int nid,
+ unsigned long region_start, unsigned long region_end)
+{
+ phys_addr_t min_addr = MEMBLOCK_LOW_LIMIT;
+ phys_addr_t max_addr = MEMBLOCK_ALLOC_ANYWHERE;
+ void *ptr;
+
+ if (region_start)
+ min_addr = region_start;
+ if (region_end)
+ max_addr = region_end;
+
+ ptr = memblock_alloc_try_nid(size, size, min_addr, max_addr, nid);
+
+ if (!ptr)
+ panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%pa max_addr=%pa\n",
+ __func__, size, size, nid, &min_addr, &max_addr);
+
+ return ptr;
+}
+
+/*
+ * When allocating pud or pmd pointers, we allocate a complete page
+ * of PAGE_SIZE rather than PUD_TABLE_SIZE or PMD_TABLE_SIZE. This
+ * is to ensure that the page obtained from the memblock allocator
+ * can be completely used as page table page and can be freed
+ * correctly when the page table entries are removed.
+ */
+static int early_map_kernel_page(unsigned long ea, unsigned long pa,
+ pgprot_t flags,
+ unsigned int map_page_size,
+ int nid,
+ unsigned long region_start, unsigned long region_end)
+{
+ unsigned long pfn = pa >> PAGE_SHIFT;
+ pgd_t *pgdp;
+ p4d_t *p4dp;
+ pud_t *pudp;
+ pmd_t *pmdp;
+ pte_t *ptep;
+
+ pgdp = pgd_offset_k(ea);
+ p4dp = p4d_offset(pgdp, ea);
+ if (p4d_none(*p4dp)) {
+ pudp = early_alloc_pgtable(PAGE_SIZE, nid,
+ region_start, region_end);
+ p4d_populate(&init_mm, p4dp, pudp);
+ }
+ pudp = pud_offset(p4dp, ea);
+ if (map_page_size == PUD_SIZE) {
+ ptep = (pte_t *)pudp;
+ goto set_the_pte;
+ }
+ if (pud_none(*pudp)) {
+ pmdp = early_alloc_pgtable(PAGE_SIZE, nid, region_start,
+ region_end);
+ pud_populate(&init_mm, pudp, pmdp);
+ }
+ pmdp = pmd_offset(pudp, ea);
+ if (map_page_size == PMD_SIZE) {
+ ptep = pmdp_ptep(pmdp);
+ goto set_the_pte;
+ }
+ if (!pmd_present(*pmdp)) {
+ ptep = early_alloc_pgtable(PAGE_SIZE, nid,
+ region_start, region_end);
+ pmd_populate_kernel(&init_mm, pmdp, ptep);
+ }
+ ptep = pte_offset_kernel(pmdp, ea);
+
+set_the_pte:
+ set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags));
+ asm volatile("ptesync": : :"memory");
+ return 0;
+}
+
+/*
+ * nid, region_start, and region_end are hints to try to place the page
+ * table memory in the same node or region.
+ */
+static int __map_kernel_page(unsigned long ea, unsigned long pa,
+ pgprot_t flags,
+ unsigned int map_page_size,
+ int nid,
+ unsigned long region_start, unsigned long region_end)
+{
+ unsigned long pfn = pa >> PAGE_SHIFT;
+ pgd_t *pgdp;
+ p4d_t *p4dp;
+ pud_t *pudp;
+ pmd_t *pmdp;
+ pte_t *ptep;
+ /*
+ * Make sure task size is correct as per the max adddr
+ */
+ BUILD_BUG_ON(TASK_SIZE_USER64 > RADIX_PGTABLE_RANGE);
+
+#ifdef CONFIG_PPC_64K_PAGES
+ BUILD_BUG_ON(RADIX_KERN_MAP_SIZE != (1UL << MAX_EA_BITS_PER_CONTEXT));
+#endif
+
+ if (unlikely(!slab_is_available()))
+ return early_map_kernel_page(ea, pa, flags, map_page_size,
+ nid, region_start, region_end);
+
+ /*
+ * Should make page table allocation functions be able to take a
+ * node, so we can place kernel page tables on the right nodes after
+ * boot.
+ */
+ pgdp = pgd_offset_k(ea);
+ p4dp = p4d_offset(pgdp, ea);
+ pudp = pud_alloc(&init_mm, p4dp, ea);
+ if (!pudp)
+ return -ENOMEM;
+ if (map_page_size == PUD_SIZE) {
+ ptep = (pte_t *)pudp;
+ goto set_the_pte;
+ }
+ pmdp = pmd_alloc(&init_mm, pudp, ea);
+ if (!pmdp)
+ return -ENOMEM;
+ if (map_page_size == PMD_SIZE) {
+ ptep = pmdp_ptep(pmdp);
+ goto set_the_pte;
+ }
+ ptep = pte_alloc_kernel(pmdp, ea);
+ if (!ptep)
+ return -ENOMEM;
+
+set_the_pte:
+ set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags));
+ asm volatile("ptesync": : :"memory");
+ return 0;
+}
+
+int radix__map_kernel_page(unsigned long ea, unsigned long pa,
+ pgprot_t flags,
+ unsigned int map_page_size)
+{
+ return __map_kernel_page(ea, pa, flags, map_page_size, -1, 0, 0);
+}
+
+#ifdef CONFIG_STRICT_KERNEL_RWX
+static void radix__change_memory_range(unsigned long start, unsigned long end,
+ unsigned long clear)
+{
+ unsigned long idx;
+ pgd_t *pgdp;
+ p4d_t *p4dp;
+ pud_t *pudp;
+ pmd_t *pmdp;
+ pte_t *ptep;
+
+ start = ALIGN_DOWN(start, PAGE_SIZE);
+ end = PAGE_ALIGN(end); // aligns up
+
+ pr_debug("Changing flags on range %lx-%lx removing 0x%lx\n",
+ start, end, clear);
+
+ for (idx = start; idx < end; idx += PAGE_SIZE) {
+ pgdp = pgd_offset_k(idx);
+ p4dp = p4d_offset(pgdp, idx);
+ pudp = pud_alloc(&init_mm, p4dp, idx);
+ if (!pudp)
+ continue;
+ if (pud_leaf(*pudp)) {
+ ptep = (pte_t *)pudp;
+ goto update_the_pte;
+ }
+ pmdp = pmd_alloc(&init_mm, pudp, idx);
+ if (!pmdp)
+ continue;
+ if (pmd_leaf(*pmdp)) {
+ ptep = pmdp_ptep(pmdp);
+ goto update_the_pte;
+ }
+ ptep = pte_alloc_kernel(pmdp, idx);
+ if (!ptep)
+ continue;
+update_the_pte:
+ radix__pte_update(&init_mm, idx, ptep, clear, 0, 0);
+ }
+
+ radix__flush_tlb_kernel_range(start, end);
+}
+
+void radix__mark_rodata_ro(void)
+{
+ unsigned long start, end;
+
+ start = (unsigned long)_stext;
+ end = (unsigned long)__end_rodata;
+
+ radix__change_memory_range(start, end, _PAGE_WRITE);
+
+ for (start = PAGE_OFFSET; start < (unsigned long)_stext; start += PAGE_SIZE) {
+ end = start + PAGE_SIZE;
+ if (overlaps_interrupt_vector_text(start, end))
+ radix__change_memory_range(start, end, _PAGE_WRITE);
+ else
+ break;
+ }
+}
+
+void radix__mark_initmem_nx(void)
+{
+ unsigned long start = (unsigned long)__init_begin;
+ unsigned long end = (unsigned long)__init_end;
+
+ radix__change_memory_range(start, end, _PAGE_EXEC);
+}
+#endif /* CONFIG_STRICT_KERNEL_RWX */
+
+static inline void __meminit
+print_mapping(unsigned long start, unsigned long end, unsigned long size, bool exec)
+{
+ char buf[10];
+
+ if (end <= start)
+ return;
+
+ string_get_size(size, 1, STRING_UNITS_2, buf, sizeof(buf));
+
+ pr_info("Mapped 0x%016lx-0x%016lx with %s pages%s\n", start, end, buf,
+ exec ? " (exec)" : "");
+}
+
+static unsigned long next_boundary(unsigned long addr, unsigned long end)
+{
+#ifdef CONFIG_STRICT_KERNEL_RWX
+ unsigned long stext_phys;
+
+ stext_phys = __pa_symbol(_stext);
+
+ // Relocatable kernel running at non-zero real address
+ if (stext_phys != 0) {
+ // The end of interrupts code at zero is a rodata boundary
+ unsigned long end_intr = __pa_symbol(__end_interrupts) - stext_phys;
+ if (addr < end_intr)
+ return end_intr;
+
+ // Start of relocated kernel text is a rodata boundary
+ if (addr < stext_phys)
+ return stext_phys;
+ }
+
+ if (addr < __pa_symbol(__srwx_boundary))
+ return __pa_symbol(__srwx_boundary);
+#endif
+ return end;
+}
+
+static int __meminit create_physical_mapping(unsigned long start,
+ unsigned long end,
+ int nid, pgprot_t _prot,
+ unsigned long mapping_sz_limit)
+{
+ unsigned long vaddr, addr, mapping_size = 0;
+ bool prev_exec, exec = false;
+ pgprot_t prot;
+ int psize;
+ unsigned long max_mapping_size = memory_block_size;
+
+ if (mapping_sz_limit < max_mapping_size)
+ max_mapping_size = mapping_sz_limit;
+
+ if (debug_pagealloc_enabled())
+ max_mapping_size = PAGE_SIZE;
+
+ start = ALIGN(start, PAGE_SIZE);
+ end = ALIGN_DOWN(end, PAGE_SIZE);
+ for (addr = start; addr < end; addr += mapping_size) {
+ unsigned long gap, previous_size;
+ int rc;
+
+ gap = next_boundary(addr, end) - addr;
+ if (gap > max_mapping_size)
+ gap = max_mapping_size;
+ previous_size = mapping_size;
+ prev_exec = exec;
+
+ if (IS_ALIGNED(addr, PUD_SIZE) && gap >= PUD_SIZE &&
+ mmu_psize_defs[MMU_PAGE_1G].shift) {
+ mapping_size = PUD_SIZE;
+ psize = MMU_PAGE_1G;
+ } else if (IS_ALIGNED(addr, PMD_SIZE) && gap >= PMD_SIZE &&
+ mmu_psize_defs[MMU_PAGE_2M].shift) {
+ mapping_size = PMD_SIZE;
+ psize = MMU_PAGE_2M;
+ } else {
+ mapping_size = PAGE_SIZE;
+ psize = mmu_virtual_psize;
+ }
+
+ vaddr = (unsigned long)__va(addr);
+
+ if (overlaps_kernel_text(vaddr, vaddr + mapping_size) ||
+ overlaps_interrupt_vector_text(vaddr, vaddr + mapping_size)) {
+ prot = PAGE_KERNEL_X;
+ exec = true;
+ } else {
+ prot = _prot;
+ exec = false;
+ }
+
+ if (mapping_size != previous_size || exec != prev_exec) {
+ print_mapping(start, addr, previous_size, prev_exec);
+ start = addr;
+ }
+
+ rc = __map_kernel_page(vaddr, addr, prot, mapping_size, nid, start, end);
+ if (rc)
+ return rc;
+
+ update_page_count(psize, 1);
+ }
+
+ print_mapping(start, addr, mapping_size, exec);
+ return 0;
+}
+
+#ifdef CONFIG_KFENCE
+static __init phys_addr_t alloc_kfence_pool(void)
+{
+ phys_addr_t kfence_pool;
+
+ /*
+ * TODO: Support to enable KFENCE after bootup depends on the ability to
+ * split page table mappings. As such support is not currently
+ * implemented for radix pagetables, support enabling KFENCE
+ * only at system startup for now.
+ *
+ * After support for splitting mappings is available on radix,
+ * alloc_kfence_pool() & map_kfence_pool() can be dropped and
+ * mapping for __kfence_pool memory can be
+ * split during arch_kfence_init_pool().
+ */
+ if (!kfence_early_init)
+ goto no_kfence;
+
+ kfence_pool = memblock_phys_alloc(KFENCE_POOL_SIZE, PAGE_SIZE);
+ if (!kfence_pool)
+ goto no_kfence;
+
+ memblock_mark_nomap(kfence_pool, KFENCE_POOL_SIZE);
+ return kfence_pool;
+
+no_kfence:
+ disable_kfence();
+ return 0;
+}
+
+static __init void map_kfence_pool(phys_addr_t kfence_pool)
+{
+ if (!kfence_pool)
+ return;
+
+ if (create_physical_mapping(kfence_pool, kfence_pool + KFENCE_POOL_SIZE,
+ -1, PAGE_KERNEL, PAGE_SIZE))
+ goto err;
+
+ memblock_clear_nomap(kfence_pool, KFENCE_POOL_SIZE);
+ __kfence_pool = __va(kfence_pool);
+ return;
+
+err:
+ memblock_phys_free(kfence_pool, KFENCE_POOL_SIZE);
+ disable_kfence();
+}
+#else
+static inline phys_addr_t alloc_kfence_pool(void) { return 0; }
+static inline void map_kfence_pool(phys_addr_t kfence_pool) { }
+#endif
+
+static void __init radix_init_pgtable(void)
+{
+ phys_addr_t kfence_pool;
+ unsigned long rts_field;
+ phys_addr_t start, end;
+ u64 i;
+
+ /* We don't support slb for radix */
+ slb_set_size(0);
+
+ kfence_pool = alloc_kfence_pool();
+
+ /*
+ * Create the linear mapping
+ */
+ for_each_mem_range(i, &start, &end) {
+ /*
+ * The memblock allocator is up at this point, so the
+ * page tables will be allocated within the range. No
+ * need or a node (which we don't have yet).
+ */
+
+ if (end >= RADIX_VMALLOC_START) {
+ pr_warn("Outside the supported range\n");
+ continue;
+ }
+
+ WARN_ON(create_physical_mapping(start, end,
+ -1, PAGE_KERNEL, ~0UL));
+ }
+
+ map_kfence_pool(kfence_pool);
+
+ if (!cpu_has_feature(CPU_FTR_HVMODE) &&
+ cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
+ /*
+ * Older versions of KVM on these machines prefer if the
+ * guest only uses the low 19 PID bits.
+ */
+ mmu_pid_bits = 19;
+ }
+ mmu_base_pid = 1;
+
+ /*
+ * Allocate Partition table and process table for the
+ * host.
+ */
+ BUG_ON(PRTB_SIZE_SHIFT > 36);
+ process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT, -1, 0, 0);
+ /*
+ * Fill in the process table.
+ */
+ rts_field = radix__get_tree_size();
+ process_tb->prtb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE);
+
+ /*
+ * The init_mm context is given the first available (non-zero) PID,
+ * which is the "guard PID" and contains no page table. PIDR should
+ * never be set to zero because that duplicates the kernel address
+ * space at the 0x0... offset (quadrant 0)!
+ *
+ * An arbitrary PID that may later be allocated by the PID allocator
+ * for userspace processes must not be used either, because that
+ * would cause stale user mappings for that PID on CPUs outside of
+ * the TLB invalidation scheme (because it won't be in mm_cpumask).
+ *
+ * So permanently carve out one PID for the purpose of a guard PID.
+ */
+ init_mm.context.id = mmu_base_pid;
+ mmu_base_pid++;
+}
+
+static void __init radix_init_partition_table(void)
+{
+ unsigned long rts_field, dw0, dw1;
+
+ mmu_partition_table_init();
+ rts_field = radix__get_tree_size();
+ dw0 = rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE | PATB_HR;
+ dw1 = __pa(process_tb) | (PRTB_SIZE_SHIFT - 12) | PATB_GR;
+ mmu_partition_table_set_entry(0, dw0, dw1, false);
+
+ pr_info("Initializing Radix MMU\n");
+}
+
+static int __init get_idx_from_shift(unsigned int shift)
+{
+ int idx = -1;
+
+ switch (shift) {
+ case 0xc:
+ idx = MMU_PAGE_4K;
+ break;
+ case 0x10:
+ idx = MMU_PAGE_64K;
+ break;
+ case 0x15:
+ idx = MMU_PAGE_2M;
+ break;
+ case 0x1e:
+ idx = MMU_PAGE_1G;
+ break;
+ }
+ return idx;
+}
+
+static int __init radix_dt_scan_page_sizes(unsigned long node,
+ const char *uname, int depth,
+ void *data)
+{
+ int size = 0;
+ int shift, idx;
+ unsigned int ap;
+ const __be32 *prop;
+ const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+
+ /* We are scanning "cpu" nodes only */
+ if (type == NULL || strcmp(type, "cpu") != 0)
+ return 0;
+
+ /* Grab page size encodings */
+ prop = of_get_flat_dt_prop(node, "ibm,processor-radix-AP-encodings", &size);
+ if (!prop)
+ return 0;
+
+ pr_info("Page sizes from device-tree:\n");
+ for (; size >= 4; size -= 4, ++prop) {
+
+ struct mmu_psize_def *def;
+
+ /* top 3 bit is AP encoding */
+ shift = be32_to_cpu(prop[0]) & ~(0xe << 28);
+ ap = be32_to_cpu(prop[0]) >> 29;
+ pr_info("Page size shift = %d AP=0x%x\n", shift, ap);
+
+ idx = get_idx_from_shift(shift);
+ if (idx < 0)
+ continue;
+
+ def = &mmu_psize_defs[idx];
+ def->shift = shift;
+ def->ap = ap;
+ def->h_rpt_pgsize = psize_to_rpti_pgsize(idx);
+ }
+
+ /* needed ? */
+ cur_cpu_spec->mmu_features &= ~MMU_FTR_NO_SLBIE_B;
+ return 1;
+}
+
+void __init radix__early_init_devtree(void)
+{
+ int rc;
+
+ /*
+ * Try to find the available page sizes in the device-tree
+ */
+ rc = of_scan_flat_dt(radix_dt_scan_page_sizes, NULL);
+ if (!rc) {
+ /*
+ * No page size details found in device tree.
+ * Let's assume we have page 4k and 64k support
+ */
+ mmu_psize_defs[MMU_PAGE_4K].shift = 12;
+ mmu_psize_defs[MMU_PAGE_4K].ap = 0x0;
+ mmu_psize_defs[MMU_PAGE_4K].h_rpt_pgsize =
+ psize_to_rpti_pgsize(MMU_PAGE_4K);
+
+ mmu_psize_defs[MMU_PAGE_64K].shift = 16;
+ mmu_psize_defs[MMU_PAGE_64K].ap = 0x5;
+ mmu_psize_defs[MMU_PAGE_64K].h_rpt_pgsize =
+ psize_to_rpti_pgsize(MMU_PAGE_64K);
+ }
+ return;
+}
+
+void __init radix__early_init_mmu(void)
+{
+ unsigned long lpcr;
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+#ifdef CONFIG_PPC_64K_PAGES
+ /* PAGE_SIZE mappings */
+ mmu_virtual_psize = MMU_PAGE_64K;
+#else
+ mmu_virtual_psize = MMU_PAGE_4K;
+#endif
+#endif
+ /*
+ * initialize page table size
+ */
+ __pte_index_size = RADIX_PTE_INDEX_SIZE;
+ __pmd_index_size = RADIX_PMD_INDEX_SIZE;
+ __pud_index_size = RADIX_PUD_INDEX_SIZE;
+ __pgd_index_size = RADIX_PGD_INDEX_SIZE;
+ __pud_cache_index = RADIX_PUD_INDEX_SIZE;
+ __pte_table_size = RADIX_PTE_TABLE_SIZE;
+ __pmd_table_size = RADIX_PMD_TABLE_SIZE;
+ __pud_table_size = RADIX_PUD_TABLE_SIZE;
+ __pgd_table_size = RADIX_PGD_TABLE_SIZE;
+
+ __pmd_val_bits = RADIX_PMD_VAL_BITS;
+ __pud_val_bits = RADIX_PUD_VAL_BITS;
+ __pgd_val_bits = RADIX_PGD_VAL_BITS;
+
+ __kernel_virt_start = RADIX_KERN_VIRT_START;
+ __vmalloc_start = RADIX_VMALLOC_START;
+ __vmalloc_end = RADIX_VMALLOC_END;
+ __kernel_io_start = RADIX_KERN_IO_START;
+ __kernel_io_end = RADIX_KERN_IO_END;
+ vmemmap = (struct page *)RADIX_VMEMMAP_START;
+ ioremap_bot = IOREMAP_BASE;
+
+#ifdef CONFIG_PCI
+ pci_io_base = ISA_IO_BASE;
+#endif
+ __pte_frag_nr = RADIX_PTE_FRAG_NR;
+ __pte_frag_size_shift = RADIX_PTE_FRAG_SIZE_SHIFT;
+ __pmd_frag_nr = RADIX_PMD_FRAG_NR;
+ __pmd_frag_size_shift = RADIX_PMD_FRAG_SIZE_SHIFT;
+
+ radix_init_pgtable();
+
+ if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+ lpcr = mfspr(SPRN_LPCR);
+ mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
+ radix_init_partition_table();
+ } else {
+ radix_init_pseries();
+ }
+
+ memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
+
+ /* Switch to the guard PID before turning on MMU */
+ radix__switch_mmu_context(NULL, &init_mm);
+ tlbiel_all();
+}
+
+void radix__early_init_mmu_secondary(void)
+{
+ unsigned long lpcr;
+ /*
+ * update partition table control register and UPRT
+ */
+ if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+ lpcr = mfspr(SPRN_LPCR);
+ mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
+
+ set_ptcr_when_no_uv(__pa(partition_tb) |
+ (PATB_SIZE_SHIFT - 12));
+ }
+
+ radix__switch_mmu_context(NULL, &init_mm);
+ tlbiel_all();
+
+ /* Make sure userspace can't change the AMR */
+ mtspr(SPRN_UAMOR, 0);
+}
+
+/* Called during kexec sequence with MMU off */
+notrace void radix__mmu_cleanup_all(void)
+{
+ unsigned long lpcr;
+
+ if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+ lpcr = mfspr(SPRN_LPCR);
+ mtspr(SPRN_LPCR, lpcr & ~LPCR_UPRT);
+ set_ptcr_when_no_uv(0);
+ powernv_set_nmmu_ptcr(0);
+ radix__flush_tlb_all();
+ }
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+static void free_pte_table(pte_t *pte_start, pmd_t *pmd)
+{
+ pte_t *pte;
+ int i;
+
+ for (i = 0; i < PTRS_PER_PTE; i++) {
+ pte = pte_start + i;
+ if (!pte_none(*pte))
+ return;
+ }
+
+ pte_free_kernel(&init_mm, pte_start);
+ pmd_clear(pmd);
+}
+
+static void free_pmd_table(pmd_t *pmd_start, pud_t *pud)
+{
+ pmd_t *pmd;
+ int i;
+
+ for (i = 0; i < PTRS_PER_PMD; i++) {
+ pmd = pmd_start + i;
+ if (!pmd_none(*pmd))
+ return;
+ }
+
+ pmd_free(&init_mm, pmd_start);
+ pud_clear(pud);
+}
+
+static void free_pud_table(pud_t *pud_start, p4d_t *p4d)
+{
+ pud_t *pud;
+ int i;
+
+ for (i = 0; i < PTRS_PER_PUD; i++) {
+ pud = pud_start + i;
+ if (!pud_none(*pud))
+ return;
+ }
+
+ pud_free(&init_mm, pud_start);
+ p4d_clear(p4d);
+}
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+static bool __meminit vmemmap_pmd_is_unused(unsigned long addr, unsigned long end)
+{
+ unsigned long start = ALIGN_DOWN(addr, PMD_SIZE);
+
+ return !vmemmap_populated(start, PMD_SIZE);
+}
+
+static bool __meminit vmemmap_page_is_unused(unsigned long addr, unsigned long end)
+{
+ unsigned long start = ALIGN_DOWN(addr, PAGE_SIZE);
+
+ return !vmemmap_populated(start, PAGE_SIZE);
+
+}
+#endif
+
+static void __meminit free_vmemmap_pages(struct page *page,
+ struct vmem_altmap *altmap,
+ int order)
+{
+ unsigned int nr_pages = 1 << order;
+
+ if (altmap) {
+ unsigned long alt_start, alt_end;
+ unsigned long base_pfn = page_to_pfn(page);
+
+ /*
+ * with 2M vmemmap mmaping we can have things setup
+ * such that even though atlmap is specified we never
+ * used altmap.
+ */
+ alt_start = altmap->base_pfn;
+ alt_end = altmap->base_pfn + altmap->reserve + altmap->free;
+
+ if (base_pfn >= alt_start && base_pfn < alt_end) {
+ vmem_altmap_free(altmap, nr_pages);
+ return;
+ }
+ }
+
+ if (PageReserved(page)) {
+ /* allocated from memblock */
+ while (nr_pages--)
+ free_reserved_page(page++);
+ } else
+ __free_pages(page, order);
+}
+
+static void __meminit remove_pte_table(pte_t *pte_start, unsigned long addr,
+ unsigned long end, bool direct,
+ struct vmem_altmap *altmap)
+{
+ unsigned long next, pages = 0;
+ pte_t *pte;
+
+ pte = pte_start + pte_index(addr);
+ for (; addr < end; addr = next, pte++) {
+ next = (addr + PAGE_SIZE) & PAGE_MASK;
+ if (next > end)
+ next = end;
+
+ if (!pte_present(*pte))
+ continue;
+
+ if (PAGE_ALIGNED(addr) && PAGE_ALIGNED(next)) {
+ if (!direct)
+ free_vmemmap_pages(pte_page(*pte), altmap, 0);
+ pte_clear(&init_mm, addr, pte);
+ pages++;
+ }
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+ else if (!direct && vmemmap_page_is_unused(addr, next)) {
+ free_vmemmap_pages(pte_page(*pte), altmap, 0);
+ pte_clear(&init_mm, addr, pte);
+ }
+#endif
+ }
+ if (direct)
+ update_page_count(mmu_virtual_psize, -pages);
+}
+
+static void __meminit remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
+ unsigned long end, bool direct,
+ struct vmem_altmap *altmap)
+{
+ unsigned long next, pages = 0;
+ pte_t *pte_base;
+ pmd_t *pmd;
+
+ pmd = pmd_start + pmd_index(addr);
+ for (; addr < end; addr = next, pmd++) {
+ next = pmd_addr_end(addr, end);
+
+ if (!pmd_present(*pmd))
+ continue;
+
+ if (pmd_leaf(*pmd)) {
+ if (IS_ALIGNED(addr, PMD_SIZE) &&
+ IS_ALIGNED(next, PMD_SIZE)) {
+ if (!direct)
+ free_vmemmap_pages(pmd_page(*pmd), altmap, get_order(PMD_SIZE));
+ pte_clear(&init_mm, addr, (pte_t *)pmd);
+ pages++;
+ }
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+ else if (!direct && vmemmap_pmd_is_unused(addr, next)) {
+ free_vmemmap_pages(pmd_page(*pmd), altmap, get_order(PMD_SIZE));
+ pte_clear(&init_mm, addr, (pte_t *)pmd);
+ }
+#endif
+ continue;
+ }
+
+ pte_base = (pte_t *)pmd_page_vaddr(*pmd);
+ remove_pte_table(pte_base, addr, next, direct, altmap);
+ free_pte_table(pte_base, pmd);
+ }
+ if (direct)
+ update_page_count(MMU_PAGE_2M, -pages);
+}
+
+static void __meminit remove_pud_table(pud_t *pud_start, unsigned long addr,
+ unsigned long end, bool direct,
+ struct vmem_altmap *altmap)
+{
+ unsigned long next, pages = 0;
+ pmd_t *pmd_base;
+ pud_t *pud;
+
+ pud = pud_start + pud_index(addr);
+ for (; addr < end; addr = next, pud++) {
+ next = pud_addr_end(addr, end);
+
+ if (!pud_present(*pud))
+ continue;
+
+ if (pud_leaf(*pud)) {
+ if (!IS_ALIGNED(addr, PUD_SIZE) ||
+ !IS_ALIGNED(next, PUD_SIZE)) {
+ WARN_ONCE(1, "%s: unaligned range\n", __func__);
+ continue;
+ }
+ pte_clear(&init_mm, addr, (pte_t *)pud);
+ pages++;
+ continue;
+ }
+
+ pmd_base = pud_pgtable(*pud);
+ remove_pmd_table(pmd_base, addr, next, direct, altmap);
+ free_pmd_table(pmd_base, pud);
+ }
+ if (direct)
+ update_page_count(MMU_PAGE_1G, -pages);
+}
+
+static void __meminit
+remove_pagetable(unsigned long start, unsigned long end, bool direct,
+ struct vmem_altmap *altmap)
+{
+ unsigned long addr, next;
+ pud_t *pud_base;
+ pgd_t *pgd;
+ p4d_t *p4d;
+
+ spin_lock(&init_mm.page_table_lock);
+
+ for (addr = start; addr < end; addr = next) {
+ next = pgd_addr_end(addr, end);
+
+ pgd = pgd_offset_k(addr);
+ p4d = p4d_offset(pgd, addr);
+ if (!p4d_present(*p4d))
+ continue;
+
+ if (p4d_leaf(*p4d)) {
+ if (!IS_ALIGNED(addr, P4D_SIZE) ||
+ !IS_ALIGNED(next, P4D_SIZE)) {
+ WARN_ONCE(1, "%s: unaligned range\n", __func__);
+ continue;
+ }
+
+ pte_clear(&init_mm, addr, (pte_t *)pgd);
+ continue;
+ }
+
+ pud_base = p4d_pgtable(*p4d);
+ remove_pud_table(pud_base, addr, next, direct, altmap);
+ free_pud_table(pud_base, p4d);
+ }
+
+ spin_unlock(&init_mm.page_table_lock);
+ radix__flush_tlb_kernel_range(start, end);
+}
+
+int __meminit radix__create_section_mapping(unsigned long start,
+ unsigned long end, int nid,
+ pgprot_t prot)
+{
+ if (end >= RADIX_VMALLOC_START) {
+ pr_warn("Outside the supported range\n");
+ return -1;
+ }
+
+ return create_physical_mapping(__pa(start), __pa(end),
+ nid, prot, ~0UL);
+}
+
+int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end)
+{
+ remove_pagetable(start, end, true, NULL);
+ return 0;
+}
+#endif /* CONFIG_MEMORY_HOTPLUG */
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+static int __map_kernel_page_nid(unsigned long ea, unsigned long pa,
+ pgprot_t flags, unsigned int map_page_size,
+ int nid)
+{
+ return __map_kernel_page(ea, pa, flags, map_page_size, nid, 0, 0);
+}
+
+int __meminit radix__vmemmap_create_mapping(unsigned long start,
+ unsigned long page_size,
+ unsigned long phys)
+{
+ /* Create a PTE encoding */
+ int nid = early_pfn_to_nid(phys >> PAGE_SHIFT);
+ int ret;
+
+ if ((start + page_size) >= RADIX_VMEMMAP_END) {
+ pr_warn("Outside the supported range\n");
+ return -1;
+ }
+
+ ret = __map_kernel_page_nid(start, phys, PAGE_KERNEL, page_size, nid);
+ BUG_ON(ret);
+
+ return 0;
+}
+
+#ifdef CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP
+bool vmemmap_can_optimize(struct vmem_altmap *altmap, struct dev_pagemap *pgmap)
+{
+ if (radix_enabled())
+ return __vmemmap_can_optimize(altmap, pgmap);
+
+ return false;
+}
+#endif
+
+int __meminit vmemmap_check_pmd(pmd_t *pmdp, int node,
+ unsigned long addr, unsigned long next)
+{
+ int large = pmd_leaf(*pmdp);
+
+ if (large)
+ vmemmap_verify(pmdp_ptep(pmdp), node, addr, next);
+
+ return large;
+}
+
+void __meminit vmemmap_set_pmd(pmd_t *pmdp, void *p, int node,
+ unsigned long addr, unsigned long next)
+{
+ pte_t entry;
+ pte_t *ptep = pmdp_ptep(pmdp);
+
+ VM_BUG_ON(!IS_ALIGNED(addr, PMD_SIZE));
+ entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
+ set_pte_at(&init_mm, addr, ptep, entry);
+ asm volatile("ptesync": : :"memory");
+
+ vmemmap_verify(ptep, node, addr, next);
+}
+
+static pte_t * __meminit radix__vmemmap_pte_populate(pmd_t *pmdp, unsigned long addr,
+ int node,
+ struct vmem_altmap *altmap,
+ struct page *reuse)
+{
+ pte_t *pte = pte_offset_kernel(pmdp, addr);
+
+ if (pte_none(*pte)) {
+ pte_t entry;
+ void *p;
+
+ if (!reuse) {
+ /*
+ * make sure we don't create altmap mappings
+ * covering things outside the device.
+ */
+ if (altmap && altmap_cross_boundary(altmap, addr, PAGE_SIZE))
+ altmap = NULL;
+
+ p = vmemmap_alloc_block_buf(PAGE_SIZE, node, altmap);
+ if (!p && altmap)
+ p = vmemmap_alloc_block_buf(PAGE_SIZE, node, NULL);
+ if (!p)
+ return NULL;
+ pr_debug("PAGE_SIZE vmemmap mapping\n");
+ } else {
+ /*
+ * When a PTE/PMD entry is freed from the init_mm
+ * there's a free_pages() call to this page allocated
+ * above. Thus this get_page() is paired with the
+ * put_page_testzero() on the freeing path.
+ * This can only called by certain ZONE_DEVICE path,
+ * and through vmemmap_populate_compound_pages() when
+ * slab is available.
+ */
+ get_page(reuse);
+ p = page_to_virt(reuse);
+ pr_debug("Tail page reuse vmemmap mapping\n");
+ }
+
+ VM_BUG_ON(!PAGE_ALIGNED(addr));
+ entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
+ set_pte_at(&init_mm, addr, pte, entry);
+ asm volatile("ptesync": : :"memory");
+ }
+ return pte;
+}
+
+static inline pud_t *vmemmap_pud_alloc(p4d_t *p4dp, int node,
+ unsigned long address)
+{
+ pud_t *pud;
+
+ /* All early vmemmap mapping to keep simple do it at PAGE_SIZE */
+ if (unlikely(p4d_none(*p4dp))) {
+ if (unlikely(!slab_is_available())) {
+ pud = early_alloc_pgtable(PAGE_SIZE, node, 0, 0);
+ p4d_populate(&init_mm, p4dp, pud);
+ /* go to the pud_offset */
+ } else
+ return pud_alloc(&init_mm, p4dp, address);
+ }
+ return pud_offset(p4dp, address);
+}
+
+static inline pmd_t *vmemmap_pmd_alloc(pud_t *pudp, int node,
+ unsigned long address)
+{
+ pmd_t *pmd;
+
+ /* All early vmemmap mapping to keep simple do it at PAGE_SIZE */
+ if (unlikely(pud_none(*pudp))) {
+ if (unlikely(!slab_is_available())) {
+ pmd = early_alloc_pgtable(PAGE_SIZE, node, 0, 0);
+ pud_populate(&init_mm, pudp, pmd);
+ } else
+ return pmd_alloc(&init_mm, pudp, address);
+ }
+ return pmd_offset(pudp, address);
+}
+
+static inline pte_t *vmemmap_pte_alloc(pmd_t *pmdp, int node,
+ unsigned long address)
+{
+ pte_t *pte;
+
+ /* All early vmemmap mapping to keep simple do it at PAGE_SIZE */
+ if (unlikely(pmd_none(*pmdp))) {
+ if (unlikely(!slab_is_available())) {
+ pte = early_alloc_pgtable(PAGE_SIZE, node, 0, 0);
+ pmd_populate(&init_mm, pmdp, pte);
+ } else
+ return pte_alloc_kernel(pmdp, address);
+ }
+ return pte_offset_kernel(pmdp, address);
+}
+
+
+
+int __meminit radix__vmemmap_populate(unsigned long start, unsigned long end, int node,
+ struct vmem_altmap *altmap)
+{
+ unsigned long addr;
+ unsigned long next;
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ /*
+ * If altmap is present, Make sure we align the start vmemmap addr
+ * to PAGE_SIZE so that we calculate the correct start_pfn in
+ * altmap boundary check to decide whether we should use altmap or
+ * RAM based backing memory allocation. Also the address need to be
+ * aligned for set_pte operation. If the start addr is already
+ * PMD_SIZE aligned and with in the altmap boundary then we will
+ * try to use a pmd size altmap mapping else we go for page size
+ * mapping.
+ *
+ * If altmap is not present, align the vmemmap addr to PMD_SIZE and
+ * always allocate a PMD size page for vmemmap backing.
+ *
+ */
+
+ if (altmap)
+ start = ALIGN_DOWN(start, PAGE_SIZE);
+ else
+ start = ALIGN_DOWN(start, PMD_SIZE);
+
+ for (addr = start; addr < end; addr = next) {
+ next = pmd_addr_end(addr, end);
+
+ pgd = pgd_offset_k(addr);
+ p4d = p4d_offset(pgd, addr);
+ pud = vmemmap_pud_alloc(p4d, node, addr);
+ if (!pud)
+ return -ENOMEM;
+ pmd = vmemmap_pmd_alloc(pud, node, addr);
+ if (!pmd)
+ return -ENOMEM;
+
+ if (pmd_none(READ_ONCE(*pmd))) {
+ void *p;
+
+ /*
+ * keep it simple by checking addr PMD_SIZE alignment
+ * and verifying the device boundary condition.
+ * For us to use a pmd mapping, both addr and pfn should
+ * be aligned. We skip if addr is not aligned and for
+ * pfn we hope we have extra area in the altmap that
+ * can help to find an aligned block. This can result
+ * in altmap block allocation failures, in which case
+ * we fallback to RAM for vmemmap allocation.
+ */
+ if (altmap && (!IS_ALIGNED(addr, PMD_SIZE) ||
+ altmap_cross_boundary(altmap, addr, PMD_SIZE))) {
+ /*
+ * make sure we don't create altmap mappings
+ * covering things outside the device.
+ */
+ goto base_mapping;
+ }
+
+ p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
+ if (p) {
+ vmemmap_set_pmd(pmd, p, node, addr, next);
+ pr_debug("PMD_SIZE vmemmap mapping\n");
+ continue;
+ } else {
+ /*
+ * A vmemmap block allocation can fail due to
+ * alignment requirements and we trying to align
+ * things aggressively there by running out of
+ * space. Try base mapping on failure.
+ */
+ goto base_mapping;
+ }
+ } else if (vmemmap_check_pmd(pmd, node, addr, next)) {
+ /*
+ * If a huge mapping exist due to early call to
+ * vmemmap_populate, let's try to use that.
+ */
+ continue;
+ }
+base_mapping:
+ /*
+ * Not able allocate higher order memory to back memmap
+ * or we found a pointer to pte page. Allocate base page
+ * size vmemmap
+ */
+ pte = vmemmap_pte_alloc(pmd, node, addr);
+ if (!pte)
+ return -ENOMEM;
+
+ pte = radix__vmemmap_pte_populate(pmd, addr, node, altmap, NULL);
+ if (!pte)
+ return -ENOMEM;
+
+ vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
+ next = addr + PAGE_SIZE;
+ }
+ return 0;
+}
+
+static pte_t * __meminit radix__vmemmap_populate_address(unsigned long addr, int node,
+ struct vmem_altmap *altmap,
+ struct page *reuse)
+{
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ pgd = pgd_offset_k(addr);
+ p4d = p4d_offset(pgd, addr);
+ pud = vmemmap_pud_alloc(p4d, node, addr);
+ if (!pud)
+ return NULL;
+ pmd = vmemmap_pmd_alloc(pud, node, addr);
+ if (!pmd)
+ return NULL;
+ if (pmd_leaf(*pmd))
+ /*
+ * The second page is mapped as a hugepage due to a nearby request.
+ * Force our mapping to page size without deduplication
+ */
+ return NULL;
+ pte = vmemmap_pte_alloc(pmd, node, addr);
+ if (!pte)
+ return NULL;
+ radix__vmemmap_pte_populate(pmd, addr, node, NULL, NULL);
+ vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
+
+ return pte;
+}
+
+static pte_t * __meminit vmemmap_compound_tail_page(unsigned long addr,
+ unsigned long pfn_offset, int node)
+{
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+ unsigned long map_addr;
+
+ /* the second vmemmap page which we use for duplication */
+ map_addr = addr - pfn_offset * sizeof(struct page) + PAGE_SIZE;
+ pgd = pgd_offset_k(map_addr);
+ p4d = p4d_offset(pgd, map_addr);
+ pud = vmemmap_pud_alloc(p4d, node, map_addr);
+ if (!pud)
+ return NULL;
+ pmd = vmemmap_pmd_alloc(pud, node, map_addr);
+ if (!pmd)
+ return NULL;
+ if (pmd_leaf(*pmd))
+ /*
+ * The second page is mapped as a hugepage due to a nearby request.
+ * Force our mapping to page size without deduplication
+ */
+ return NULL;
+ pte = vmemmap_pte_alloc(pmd, node, map_addr);
+ if (!pte)
+ return NULL;
+ /*
+ * Check if there exist a mapping to the left
+ */
+ if (pte_none(*pte)) {
+ /*
+ * Populate the head page vmemmap page.
+ * It can fall in different pmd, hence
+ * vmemmap_populate_address()
+ */
+ pte = radix__vmemmap_populate_address(map_addr - PAGE_SIZE, node, NULL, NULL);
+ if (!pte)
+ return NULL;
+ /*
+ * Populate the tail pages vmemmap page
+ */
+ pte = radix__vmemmap_pte_populate(pmd, map_addr, node, NULL, NULL);
+ if (!pte)
+ return NULL;
+ vmemmap_verify(pte, node, map_addr, map_addr + PAGE_SIZE);
+ return pte;
+ }
+ return pte;
+}
+
+int __meminit vmemmap_populate_compound_pages(unsigned long start_pfn,
+ unsigned long start,
+ unsigned long end, int node,
+ struct dev_pagemap *pgmap)
+{
+ /*
+ * we want to map things as base page size mapping so that
+ * we can save space in vmemmap. We could have huge mapping
+ * covering out both edges.
+ */
+ unsigned long addr;
+ unsigned long addr_pfn = start_pfn;
+ unsigned long next;
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ for (addr = start; addr < end; addr = next) {
+
+ pgd = pgd_offset_k(addr);
+ p4d = p4d_offset(pgd, addr);
+ pud = vmemmap_pud_alloc(p4d, node, addr);
+ if (!pud)
+ return -ENOMEM;
+ pmd = vmemmap_pmd_alloc(pud, node, addr);
+ if (!pmd)
+ return -ENOMEM;
+
+ if (pmd_leaf(READ_ONCE(*pmd))) {
+ /* existing huge mapping. Skip the range */
+ addr_pfn += (PMD_SIZE >> PAGE_SHIFT);
+ next = pmd_addr_end(addr, end);
+ continue;
+ }
+ pte = vmemmap_pte_alloc(pmd, node, addr);
+ if (!pte)
+ return -ENOMEM;
+ if (!pte_none(*pte)) {
+ /*
+ * This could be because we already have a compound
+ * page whose VMEMMAP_RESERVE_NR pages were mapped and
+ * this request fall in those pages.
+ */
+ addr_pfn += 1;
+ next = addr + PAGE_SIZE;
+ continue;
+ } else {
+ unsigned long nr_pages = pgmap_vmemmap_nr(pgmap);
+ unsigned long pfn_offset = addr_pfn - ALIGN_DOWN(addr_pfn, nr_pages);
+ pte_t *tail_page_pte;
+
+ /*
+ * if the address is aligned to huge page size it is the
+ * head mapping.
+ */
+ if (pfn_offset == 0) {
+ /* Populate the head page vmemmap page */
+ pte = radix__vmemmap_pte_populate(pmd, addr, node, NULL, NULL);
+ if (!pte)
+ return -ENOMEM;
+ vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
+
+ /*
+ * Populate the tail pages vmemmap page
+ * It can fall in different pmd, hence
+ * vmemmap_populate_address()
+ */
+ pte = radix__vmemmap_populate_address(addr + PAGE_SIZE, node, NULL, NULL);
+ if (!pte)
+ return -ENOMEM;
+
+ addr_pfn += 2;
+ next = addr + 2 * PAGE_SIZE;
+ continue;
+ }
+ /*
+ * get the 2nd mapping details
+ * Also create it if that doesn't exist
+ */
+ tail_page_pte = vmemmap_compound_tail_page(addr, pfn_offset, node);
+ if (!tail_page_pte) {
+
+ pte = radix__vmemmap_pte_populate(pmd, addr, node, NULL, NULL);
+ if (!pte)
+ return -ENOMEM;
+ vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
+
+ addr_pfn += 1;
+ next = addr + PAGE_SIZE;
+ continue;
+ }
+
+ pte = radix__vmemmap_pte_populate(pmd, addr, node, NULL, pte_page(*tail_page_pte));
+ if (!pte)
+ return -ENOMEM;
+ vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
+
+ addr_pfn += 1;
+ next = addr + PAGE_SIZE;
+ continue;
+ }
+ }
+ return 0;
+}
+
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+void __meminit radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size)
+{
+ remove_pagetable(start, start + page_size, true, NULL);
+}
+
+void __ref radix__vmemmap_free(unsigned long start, unsigned long end,
+ struct vmem_altmap *altmap)
+{
+ remove_pagetable(start, end, false, altmap);
+}
+#endif
+#endif
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+
+unsigned long radix__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, unsigned long clr,
+ unsigned long set)
+{
+ unsigned long old;
+
+#ifdef CONFIG_DEBUG_VM
+ WARN_ON(!radix__pmd_trans_huge(*pmdp));
+ assert_spin_locked(pmd_lockptr(mm, pmdp));
+#endif
+
+ old = radix__pte_update(mm, addr, pmdp_ptep(pmdp), clr, set, 1);
+ trace_hugepage_update_pmd(addr, old, clr, set);
+
+ return old;
+}
+
+unsigned long radix__pud_hugepage_update(struct mm_struct *mm, unsigned long addr,
+ pud_t *pudp, unsigned long clr,
+ unsigned long set)
+{
+ unsigned long old;
+
+#ifdef CONFIG_DEBUG_VM
+ WARN_ON(!pud_trans_huge(*pudp));
+ assert_spin_locked(pud_lockptr(mm, pudp));
+#endif
+
+ old = radix__pte_update(mm, addr, pudp_ptep(pudp), clr, set, 1);
+ trace_hugepage_update_pud(addr, old, clr, set);
+
+ return old;
+}
+
+pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address,
+ pmd_t *pmdp)
+
+{
+ pmd_t pmd;
+
+ VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+ VM_BUG_ON(radix__pmd_trans_huge(*pmdp));
+ /*
+ * khugepaged calls this for normal pmd
+ */
+ pmd = *pmdp;
+ pmd_clear(pmdp);
+
+ radix__flush_tlb_collapsed_pmd(vma->vm_mm, address);
+
+ return pmd;
+}
+
+/*
+ * For us pgtable_t is pte_t *. Inorder to save the deposisted
+ * page table, we consider the allocated page table as a list
+ * head. On withdraw we need to make sure we zero out the used
+ * list_head memory area.
+ */
+void radix__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+ pgtable_t pgtable)
+{
+ struct list_head *lh = (struct list_head *) pgtable;
+
+ assert_spin_locked(pmd_lockptr(mm, pmdp));
+
+ /* FIFO */
+ if (!pmd_huge_pte(mm, pmdp))
+ INIT_LIST_HEAD(lh);
+ else
+ list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
+ pmd_huge_pte(mm, pmdp) = pgtable;
+}
+
+pgtable_t radix__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
+{
+ pte_t *ptep;
+ pgtable_t pgtable;
+ struct list_head *lh;
+
+ assert_spin_locked(pmd_lockptr(mm, pmdp));
+
+ /* FIFO */
+ pgtable = pmd_huge_pte(mm, pmdp);
+ lh = (struct list_head *) pgtable;
+ if (list_empty(lh))
+ pmd_huge_pte(mm, pmdp) = NULL;
+ else {
+ pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
+ list_del(lh);
+ }
+ ptep = (pte_t *) pgtable;
+ *ptep = __pte(0);
+ ptep++;
+ *ptep = __pte(0);
+ return pgtable;
+}
+
+pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pmd_t *pmdp)
+{
+ pmd_t old_pmd;
+ unsigned long old;
+
+ old = radix__pmd_hugepage_update(mm, addr, pmdp, ~0UL, 0);
+ old_pmd = __pmd(old);
+ return old_pmd;
+}
+
+pud_t radix__pudp_huge_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pud_t *pudp)
+{
+ pud_t old_pud;
+ unsigned long old;
+
+ old = radix__pud_hugepage_update(mm, addr, pudp, ~0UL, 0);
+ old_pud = __pud(old);
+ return old_pud;
+}
+
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+void radix__ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep,
+ pte_t entry, unsigned long address, int psize)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ unsigned long set = pte_val(entry) & (_PAGE_DIRTY | _PAGE_SOFT_DIRTY |
+ _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
+
+ unsigned long change = pte_val(entry) ^ pte_val(*ptep);
+ /*
+ * On POWER9, the NMMU is not able to relax PTE access permissions
+ * for a translation with a TLB. The PTE must be invalidated, TLB
+ * flushed before the new PTE is installed.
+ *
+ * This only needs to be done for radix, because hash translation does
+ * flush when updating the linux pte (and we don't support NMMU
+ * accelerators on HPT on POWER9 anyway XXX: do we?).
+ *
+ * POWER10 (and P9P) NMMU does behave as per ISA.
+ */
+ if (!cpu_has_feature(CPU_FTR_ARCH_31) && (change & _PAGE_RW) &&
+ atomic_read(&mm->context.copros) > 0) {
+ unsigned long old_pte, new_pte;
+
+ old_pte = __radix_pte_update(ptep, _PAGE_PRESENT, _PAGE_INVALID);
+ new_pte = old_pte | set;
+ radix__flush_tlb_page_psize(mm, address, psize);
+ __radix_pte_update(ptep, _PAGE_INVALID, new_pte);
+ } else {
+ __radix_pte_update(ptep, 0, set);
+ /*
+ * Book3S does not require a TLB flush when relaxing access
+ * restrictions when the address space (modulo the POWER9 nest
+ * MMU issue above) because the MMU will reload the PTE after
+ * taking an access fault, as defined by the architecture. See
+ * "Setting a Reference or Change Bit or Upgrading Access
+ * Authority (PTE Subject to Atomic Hardware Updates)" in
+ * Power ISA Version 3.1B.
+ */
+ }
+ /* See ptesync comment in radix__set_pte_at */
+}
+
+void radix__ptep_modify_prot_commit(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t old_pte, pte_t pte)
+{
+ struct mm_struct *mm = vma->vm_mm;
+
+ /*
+ * POWER9 NMMU must flush the TLB after clearing the PTE before
+ * installing a PTE with more relaxed access permissions, see
+ * radix__ptep_set_access_flags.
+ */
+ if (!cpu_has_feature(CPU_FTR_ARCH_31) &&
+ is_pte_rw_upgrade(pte_val(old_pte), pte_val(pte)) &&
+ (atomic_read(&mm->context.copros) > 0))
+ radix__flush_tlb_page(vma, addr);
+
+ set_pte_at(mm, addr, ptep, pte);
+}
+
+int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
+{
+ pte_t *ptep = (pte_t *)pud;
+ pte_t new_pud = pfn_pte(__phys_to_pfn(addr), prot);
+
+ if (!radix_enabled())
+ return 0;
+
+ set_pte_at(&init_mm, 0 /* radix unused */, ptep, new_pud);
+
+ return 1;
+}
+
+int pud_clear_huge(pud_t *pud)
+{
+ if (pud_leaf(*pud)) {
+ pud_clear(pud);
+ return 1;
+ }
+
+ return 0;
+}
+
+int pud_free_pmd_page(pud_t *pud, unsigned long addr)
+{
+ pmd_t *pmd;
+ int i;
+
+ pmd = pud_pgtable(*pud);
+ pud_clear(pud);
+
+ flush_tlb_kernel_range(addr, addr + PUD_SIZE);
+
+ for (i = 0; i < PTRS_PER_PMD; i++) {
+ if (!pmd_none(pmd[i])) {
+ pte_t *pte;
+ pte = (pte_t *)pmd_page_vaddr(pmd[i]);
+
+ pte_free_kernel(&init_mm, pte);
+ }
+ }
+
+ pmd_free(&init_mm, pmd);
+
+ return 1;
+}
+
+int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
+{
+ pte_t *ptep = (pte_t *)pmd;
+ pte_t new_pmd = pfn_pte(__phys_to_pfn(addr), prot);
+
+ if (!radix_enabled())
+ return 0;
+
+ set_pte_at(&init_mm, 0 /* radix unused */, ptep, new_pmd);
+
+ return 1;
+}
+
+int pmd_clear_huge(pmd_t *pmd)
+{
+ if (pmd_leaf(*pmd)) {
+ pmd_clear(pmd);
+ return 1;
+ }
+
+ return 0;
+}
+
+int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
+{
+ pte_t *pte;
+
+ pte = (pte_t *)pmd_page_vaddr(*pmd);
+ pmd_clear(pmd);
+
+ flush_tlb_kernel_range(addr, addr + PMD_SIZE);
+
+ pte_free_kernel(&init_mm, pte);
+
+ return 1;
+}
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
new file mode 100644
index 000000000000..9e1f6558d026
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -0,0 +1,1587 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * TLB flush routines for radix kernels.
+ *
+ * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
+ */
+
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/memblock.h>
+#include <linux/mmu_context.h>
+#include <linux/sched/mm.h>
+#include <linux/debugfs.h>
+
+#include <asm/ppc-opcode.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+#include <asm/trace.h>
+#include <asm/cputhreads.h>
+#include <asm/plpar_wrappers.h>
+
+#include "internal.h"
+
+/*
+ * tlbiel instruction for radix, set invalidation
+ * i.e., r=1 and is=01 or is=10 or is=11
+ */
+static __always_inline void tlbiel_radix_set_isa300(unsigned int set, unsigned int is,
+ unsigned int pid,
+ unsigned int ric, unsigned int prs)
+{
+ unsigned long rb;
+ unsigned long rs;
+
+ rb = (set << PPC_BITLSHIFT(51)) | (is << PPC_BITLSHIFT(53));
+ rs = ((unsigned long)pid << PPC_BITLSHIFT(31));
+
+ asm volatile(PPC_TLBIEL(%0, %1, %2, %3, 1)
+ : : "r"(rb), "r"(rs), "i"(ric), "i"(prs)
+ : "memory");
+}
+
+static void tlbiel_all_isa300(unsigned int num_sets, unsigned int is)
+{
+ unsigned int set;
+
+ asm volatile("ptesync": : :"memory");
+
+ /*
+ * Flush the first set of the TLB, and the entire Page Walk Cache
+ * and partition table entries. Then flush the remaining sets of the
+ * TLB.
+ */
+
+ if (early_cpu_has_feature(CPU_FTR_HVMODE)) {
+ /* MSR[HV] should flush partition scope translations first. */
+ tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 0);
+
+ if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) {
+ for (set = 1; set < num_sets; set++)
+ tlbiel_radix_set_isa300(set, is, 0,
+ RIC_FLUSH_TLB, 0);
+ }
+ }
+
+ /* Flush process scoped entries. */
+ tlbiel_radix_set_isa300(0, is, 0, RIC_FLUSH_ALL, 1);
+
+ if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) {
+ for (set = 1; set < num_sets; set++)
+ tlbiel_radix_set_isa300(set, is, 0, RIC_FLUSH_TLB, 1);
+ }
+
+ ppc_after_tlbiel_barrier();
+}
+
+void radix__tlbiel_all(unsigned int action)
+{
+ unsigned int is;
+
+ switch (action) {
+ case TLB_INVAL_SCOPE_GLOBAL:
+ is = 3;
+ break;
+ case TLB_INVAL_SCOPE_LPID:
+ is = 2;
+ break;
+ default:
+ BUG();
+ }
+
+ if (early_cpu_has_feature(CPU_FTR_ARCH_300))
+ tlbiel_all_isa300(POWER9_TLB_SETS_RADIX, is);
+ else
+ WARN(1, "%s called on pre-POWER9 CPU\n", __func__);
+
+ asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT "; isync" : : :"memory");
+}
+
+static __always_inline void __tlbiel_pid(unsigned long pid, int set,
+ unsigned long ric)
+{
+ unsigned long rb,rs,prs,r;
+
+ rb = PPC_BIT(53); /* IS = 1 */
+ rb |= set << PPC_BITLSHIFT(51);
+ rs = ((unsigned long)pid) << PPC_BITLSHIFT(31);
+ prs = 1; /* process scoped */
+ r = 1; /* radix format */
+
+ asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+ trace_tlbie(0, 1, rb, rs, ric, prs, r);
+}
+
+static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric)
+{
+ unsigned long rb,rs,prs,r;
+
+ rb = PPC_BIT(53); /* IS = 1 */
+ rs = pid << PPC_BITLSHIFT(31);
+ prs = 1; /* process scoped */
+ r = 1; /* radix format */
+
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+ trace_tlbie(0, 0, rb, rs, ric, prs, r);
+}
+
+static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric)
+{
+ unsigned long rb,rs,prs,r;
+
+ rb = PPC_BIT(52); /* IS = 2 */
+ rs = lpid;
+ prs = 0; /* partition scoped */
+ r = 1; /* radix format */
+
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+ trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
+}
+
+static __always_inline void __tlbie_lpid_guest(unsigned long lpid, unsigned long ric)
+{
+ unsigned long rb,rs,prs,r;
+
+ rb = PPC_BIT(52); /* IS = 2 */
+ rs = lpid;
+ prs = 1; /* process scoped */
+ r = 1; /* radix format */
+
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+ trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
+}
+
+static __always_inline void __tlbiel_va(unsigned long va, unsigned long pid,
+ unsigned long ap, unsigned long ric)
+{
+ unsigned long rb,rs,prs,r;
+
+ rb = va & ~(PPC_BITMASK(52, 63));
+ rb |= ap << PPC_BITLSHIFT(58);
+ rs = pid << PPC_BITLSHIFT(31);
+ prs = 1; /* process scoped */
+ r = 1; /* radix format */
+
+ asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+ trace_tlbie(0, 1, rb, rs, ric, prs, r);
+}
+
+static __always_inline void __tlbie_va(unsigned long va, unsigned long pid,
+ unsigned long ap, unsigned long ric)
+{
+ unsigned long rb,rs,prs,r;
+
+ rb = va & ~(PPC_BITMASK(52, 63));
+ rb |= ap << PPC_BITLSHIFT(58);
+ rs = pid << PPC_BITLSHIFT(31);
+ prs = 1; /* process scoped */
+ r = 1; /* radix format */
+
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+ trace_tlbie(0, 0, rb, rs, ric, prs, r);
+}
+
+static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid,
+ unsigned long ap, unsigned long ric)
+{
+ unsigned long rb,rs,prs,r;
+
+ rb = va & ~(PPC_BITMASK(52, 63));
+ rb |= ap << PPC_BITLSHIFT(58);
+ rs = lpid;
+ prs = 0; /* partition scoped */
+ r = 1; /* radix format */
+
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+ trace_tlbie(lpid, 0, rb, rs, ric, prs, r);
+}
+
+
+static inline void fixup_tlbie_va(unsigned long va, unsigned long pid,
+ unsigned long ap)
+{
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+ asm volatile("ptesync": : :"memory");
+ __tlbie_va(va, 0, ap, RIC_FLUSH_TLB);
+ }
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+ asm volatile("ptesync": : :"memory");
+ __tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
+ }
+}
+
+static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid,
+ unsigned long ap)
+{
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+ asm volatile("ptesync": : :"memory");
+ __tlbie_pid(0, RIC_FLUSH_TLB);
+ }
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+ asm volatile("ptesync": : :"memory");
+ __tlbie_va(va, pid, ap, RIC_FLUSH_TLB);
+ }
+}
+
+static inline void fixup_tlbie_pid(unsigned long pid)
+{
+ /*
+ * We can use any address for the invalidation, pick one which is
+ * probably unused as an optimisation.
+ */
+ unsigned long va = ((1UL << 52) - 1);
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+ asm volatile("ptesync": : :"memory");
+ __tlbie_pid(0, RIC_FLUSH_TLB);
+ }
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+ asm volatile("ptesync": : :"memory");
+ __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
+ }
+}
+
+static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid,
+ unsigned long ap)
+{
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+ asm volatile("ptesync": : :"memory");
+ __tlbie_lpid_va(va, 0, ap, RIC_FLUSH_TLB);
+ }
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+ asm volatile("ptesync": : :"memory");
+ __tlbie_lpid_va(va, lpid, ap, RIC_FLUSH_TLB);
+ }
+}
+
+static inline void fixup_tlbie_lpid(unsigned long lpid)
+{
+ /*
+ * We can use any address for the invalidation, pick one which is
+ * probably unused as an optimisation.
+ */
+ unsigned long va = ((1UL << 52) - 1);
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+ asm volatile("ptesync": : :"memory");
+ __tlbie_lpid(0, RIC_FLUSH_TLB);
+ }
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+ asm volatile("ptesync": : :"memory");
+ __tlbie_lpid_va(va, lpid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB);
+ }
+}
+
+/*
+ * We use 128 set in radix mode and 256 set in hpt mode.
+ */
+static inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
+{
+ int set;
+
+ asm volatile("ptesync": : :"memory");
+
+ switch (ric) {
+ case RIC_FLUSH_PWC:
+
+ /* For PWC, only one flush is needed */
+ __tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
+ ppc_after_tlbiel_barrier();
+ return;
+ case RIC_FLUSH_TLB:
+ __tlbiel_pid(pid, 0, RIC_FLUSH_TLB);
+ break;
+ case RIC_FLUSH_ALL:
+ default:
+ /*
+ * Flush the first set of the TLB, and if
+ * we're doing a RIC_FLUSH_ALL, also flush
+ * the entire Page Walk Cache.
+ */
+ __tlbiel_pid(pid, 0, RIC_FLUSH_ALL);
+ }
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
+ /* For the remaining sets, just flush the TLB */
+ for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++)
+ __tlbiel_pid(pid, set, RIC_FLUSH_TLB);
+ }
+
+ ppc_after_tlbiel_barrier();
+ asm volatile(PPC_RADIX_INVALIDATE_ERAT_USER "; isync" : : :"memory");
+}
+
+static inline void _tlbie_pid(unsigned long pid, unsigned long ric)
+{
+ asm volatile("ptesync": : :"memory");
+
+ /*
+ * Workaround the fact that the "ric" argument to __tlbie_pid
+ * must be a compile-time constraint to match the "i" constraint
+ * in the asm statement.
+ */
+ switch (ric) {
+ case RIC_FLUSH_TLB:
+ __tlbie_pid(pid, RIC_FLUSH_TLB);
+ fixup_tlbie_pid(pid);
+ break;
+ case RIC_FLUSH_PWC:
+ __tlbie_pid(pid, RIC_FLUSH_PWC);
+ break;
+ case RIC_FLUSH_ALL:
+ default:
+ __tlbie_pid(pid, RIC_FLUSH_ALL);
+ fixup_tlbie_pid(pid);
+ }
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+}
+
+struct tlbiel_pid {
+ unsigned long pid;
+ unsigned long ric;
+};
+
+static void do_tlbiel_pid(void *info)
+{
+ struct tlbiel_pid *t = info;
+
+ if (t->ric == RIC_FLUSH_TLB)
+ _tlbiel_pid(t->pid, RIC_FLUSH_TLB);
+ else if (t->ric == RIC_FLUSH_PWC)
+ _tlbiel_pid(t->pid, RIC_FLUSH_PWC);
+ else
+ _tlbiel_pid(t->pid, RIC_FLUSH_ALL);
+}
+
+static inline void _tlbiel_pid_multicast(struct mm_struct *mm,
+ unsigned long pid, unsigned long ric)
+{
+ struct cpumask *cpus = mm_cpumask(mm);
+ struct tlbiel_pid t = { .pid = pid, .ric = ric };
+
+ on_each_cpu_mask(cpus, do_tlbiel_pid, &t, 1);
+ /*
+ * Always want the CPU translations to be invalidated with tlbiel in
+ * these paths, so while coprocessors must use tlbie, we can not
+ * optimise away the tlbiel component.
+ */
+ if (atomic_read(&mm->context.copros) > 0)
+ _tlbie_pid(pid, RIC_FLUSH_ALL);
+}
+
+static inline void _tlbie_lpid(unsigned long lpid, unsigned long ric)
+{
+ asm volatile("ptesync": : :"memory");
+
+ /*
+ * Workaround the fact that the "ric" argument to __tlbie_pid
+ * must be a compile-time contraint to match the "i" constraint
+ * in the asm statement.
+ */
+ switch (ric) {
+ case RIC_FLUSH_TLB:
+ __tlbie_lpid(lpid, RIC_FLUSH_TLB);
+ fixup_tlbie_lpid(lpid);
+ break;
+ case RIC_FLUSH_PWC:
+ __tlbie_lpid(lpid, RIC_FLUSH_PWC);
+ break;
+ case RIC_FLUSH_ALL:
+ default:
+ __tlbie_lpid(lpid, RIC_FLUSH_ALL);
+ fixup_tlbie_lpid(lpid);
+ }
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+}
+
+static __always_inline void _tlbie_lpid_guest(unsigned long lpid, unsigned long ric)
+{
+ /*
+ * Workaround the fact that the "ric" argument to __tlbie_pid
+ * must be a compile-time contraint to match the "i" constraint
+ * in the asm statement.
+ */
+ switch (ric) {
+ case RIC_FLUSH_TLB:
+ __tlbie_lpid_guest(lpid, RIC_FLUSH_TLB);
+ break;
+ case RIC_FLUSH_PWC:
+ __tlbie_lpid_guest(lpid, RIC_FLUSH_PWC);
+ break;
+ case RIC_FLUSH_ALL:
+ default:
+ __tlbie_lpid_guest(lpid, RIC_FLUSH_ALL);
+ }
+ fixup_tlbie_lpid(lpid);
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+}
+
+static inline void __tlbiel_va_range(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long page_size,
+ unsigned long psize)
+{
+ unsigned long addr;
+ unsigned long ap = mmu_get_ap(psize);
+
+ for (addr = start; addr < end; addr += page_size)
+ __tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB);
+}
+
+static __always_inline void _tlbiel_va(unsigned long va, unsigned long pid,
+ unsigned long psize, unsigned long ric)
+{
+ unsigned long ap = mmu_get_ap(psize);
+
+ asm volatile("ptesync": : :"memory");
+ __tlbiel_va(va, pid, ap, ric);
+ ppc_after_tlbiel_barrier();
+}
+
+static inline void _tlbiel_va_range(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long page_size,
+ unsigned long psize, bool also_pwc)
+{
+ asm volatile("ptesync": : :"memory");
+ if (also_pwc)
+ __tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
+ __tlbiel_va_range(start, end, pid, page_size, psize);
+ ppc_after_tlbiel_barrier();
+}
+
+static inline void __tlbie_va_range(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long page_size,
+ unsigned long psize)
+{
+ unsigned long addr;
+ unsigned long ap = mmu_get_ap(psize);
+
+ for (addr = start; addr < end; addr += page_size)
+ __tlbie_va(addr, pid, ap, RIC_FLUSH_TLB);
+
+ fixup_tlbie_va_range(addr - page_size, pid, ap);
+}
+
+static __always_inline void _tlbie_va(unsigned long va, unsigned long pid,
+ unsigned long psize, unsigned long ric)
+{
+ unsigned long ap = mmu_get_ap(psize);
+
+ asm volatile("ptesync": : :"memory");
+ __tlbie_va(va, pid, ap, ric);
+ fixup_tlbie_va(va, pid, ap);
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+}
+
+struct tlbiel_va {
+ unsigned long pid;
+ unsigned long va;
+ unsigned long psize;
+ unsigned long ric;
+};
+
+static void do_tlbiel_va(void *info)
+{
+ struct tlbiel_va *t = info;
+
+ if (t->ric == RIC_FLUSH_TLB)
+ _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_TLB);
+ else if (t->ric == RIC_FLUSH_PWC)
+ _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_PWC);
+ else
+ _tlbiel_va(t->va, t->pid, t->psize, RIC_FLUSH_ALL);
+}
+
+static inline void _tlbiel_va_multicast(struct mm_struct *mm,
+ unsigned long va, unsigned long pid,
+ unsigned long psize, unsigned long ric)
+{
+ struct cpumask *cpus = mm_cpumask(mm);
+ struct tlbiel_va t = { .va = va, .pid = pid, .psize = psize, .ric = ric };
+ on_each_cpu_mask(cpus, do_tlbiel_va, &t, 1);
+ if (atomic_read(&mm->context.copros) > 0)
+ _tlbie_va(va, pid, psize, RIC_FLUSH_TLB);
+}
+
+struct tlbiel_va_range {
+ unsigned long pid;
+ unsigned long start;
+ unsigned long end;
+ unsigned long page_size;
+ unsigned long psize;
+ bool also_pwc;
+};
+
+static void do_tlbiel_va_range(void *info)
+{
+ struct tlbiel_va_range *t = info;
+
+ _tlbiel_va_range(t->start, t->end, t->pid, t->page_size,
+ t->psize, t->also_pwc);
+}
+
+static __always_inline void _tlbie_lpid_va(unsigned long va, unsigned long lpid,
+ unsigned long psize, unsigned long ric)
+{
+ unsigned long ap = mmu_get_ap(psize);
+
+ asm volatile("ptesync": : :"memory");
+ __tlbie_lpid_va(va, lpid, ap, ric);
+ fixup_tlbie_lpid_va(va, lpid, ap);
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+}
+
+static inline void _tlbie_va_range(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long page_size,
+ unsigned long psize, bool also_pwc)
+{
+ asm volatile("ptesync": : :"memory");
+ if (also_pwc)
+ __tlbie_pid(pid, RIC_FLUSH_PWC);
+ __tlbie_va_range(start, end, pid, page_size, psize);
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+}
+
+static inline void _tlbiel_va_range_multicast(struct mm_struct *mm,
+ unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long page_size,
+ unsigned long psize, bool also_pwc)
+{
+ struct cpumask *cpus = mm_cpumask(mm);
+ struct tlbiel_va_range t = { .start = start, .end = end,
+ .pid = pid, .page_size = page_size,
+ .psize = psize, .also_pwc = also_pwc };
+
+ on_each_cpu_mask(cpus, do_tlbiel_va_range, &t, 1);
+ if (atomic_read(&mm->context.copros) > 0)
+ _tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
+}
+
+/*
+ * Base TLB flushing operations:
+ *
+ * - flush_tlb_mm(mm) flushes the specified mm context TLB's
+ * - flush_tlb_page(vma, vmaddr) flushes one page
+ * - flush_tlb_range(vma, start, end) flushes a range of pages
+ * - flush_tlb_kernel_range(start, end) flushes kernel pages
+ *
+ * - local_* variants of page and mm only apply to the current
+ * processor
+ */
+void radix__local_flush_tlb_mm(struct mm_struct *mm)
+{
+ unsigned long pid = mm->context.id;
+
+ if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT))
+ return;
+
+ preempt_disable();
+ _tlbiel_pid(pid, RIC_FLUSH_TLB);
+ preempt_enable();
+}
+EXPORT_SYMBOL(radix__local_flush_tlb_mm);
+
+#ifndef CONFIG_SMP
+void radix__local_flush_all_mm(struct mm_struct *mm)
+{
+ unsigned long pid = mm->context.id;
+
+ if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT))
+ return;
+
+ preempt_disable();
+ _tlbiel_pid(pid, RIC_FLUSH_ALL);
+ preempt_enable();
+}
+EXPORT_SYMBOL(radix__local_flush_all_mm);
+
+static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
+{
+ radix__local_flush_all_mm(mm);
+}
+#endif /* CONFIG_SMP */
+
+void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
+ int psize)
+{
+ unsigned long pid = mm->context.id;
+
+ if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT))
+ return;
+
+ preempt_disable();
+ _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
+ preempt_enable();
+}
+
+void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
+{
+#ifdef CONFIG_HUGETLB_PAGE
+ /* need the return fix for nohash.c */
+ if (is_vm_hugetlb_page(vma))
+ return radix__local_flush_hugetlb_page(vma, vmaddr);
+#endif
+ radix__local_flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
+}
+EXPORT_SYMBOL(radix__local_flush_tlb_page);
+
+static bool mm_needs_flush_escalation(struct mm_struct *mm)
+{
+ /*
+ * The P9 nest MMU has issues with the page walk cache caching PTEs
+ * and not flushing them when RIC = 0 for a PID/LPID invalidate.
+ *
+ * This may have been fixed in shipping firmware (by disabling PWC
+ * or preventing it from caching PTEs), but until that is confirmed,
+ * this workaround is required - escalate all RIC=0 IS=1/2/3 flushes
+ * to RIC=2.
+ *
+ * POWER10 (and P9P) does not have this problem.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ return false;
+ if (atomic_read(&mm->context.copros) > 0)
+ return true;
+ return false;
+}
+
+/*
+ * If always_flush is true, then flush even if this CPU can't be removed
+ * from mm_cpumask.
+ */
+void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush)
+{
+ unsigned long pid = mm->context.id;
+ int cpu = smp_processor_id();
+
+ /*
+ * A kthread could have done a mmget_not_zero() after the flushing CPU
+ * checked mm_cpumask, and be in the process of kthread_use_mm when
+ * interrupted here. In that case, current->mm will be set to mm,
+ * because kthread_use_mm() setting ->mm and switching to the mm is
+ * done with interrupts off.
+ */
+ if (current->mm == mm)
+ goto out;
+
+ if (current->active_mm == mm) {
+ unsigned long flags;
+
+ WARN_ON_ONCE(current->mm != NULL);
+ /*
+ * It is a kernel thread and is using mm as the lazy tlb, so
+ * switch it to init_mm. This is not always called from IPI
+ * (e.g., flush_type_needed), so must disable irqs.
+ */
+ local_irq_save(flags);
+ mmgrab_lazy_tlb(&init_mm);
+ current->active_mm = &init_mm;
+ switch_mm_irqs_off(mm, &init_mm, current);
+ mmdrop_lazy_tlb(mm);
+ local_irq_restore(flags);
+ }
+
+ /*
+ * This IPI may be initiated from any source including those not
+ * running the mm, so there may be a racing IPI that comes after
+ * this one which finds the cpumask already clear. Check and avoid
+ * underflowing the active_cpus count in that case. The race should
+ * not otherwise be a problem, but the TLB must be flushed because
+ * that's what the caller expects.
+ */
+ if (cpumask_test_cpu(cpu, mm_cpumask(mm))) {
+ dec_mm_active_cpus(mm);
+ cpumask_clear_cpu(cpu, mm_cpumask(mm));
+ always_flush = true;
+ }
+
+out:
+ if (always_flush)
+ _tlbiel_pid(pid, RIC_FLUSH_ALL);
+}
+
+#ifdef CONFIG_SMP
+static void do_exit_flush_lazy_tlb(void *arg)
+{
+ struct mm_struct *mm = arg;
+ exit_lazy_flush_tlb(mm, true);
+}
+
+static void exit_flush_lazy_tlbs(struct mm_struct *mm)
+{
+ /*
+ * Would be nice if this was async so it could be run in
+ * parallel with our local flush, but generic code does not
+ * give a good API for it. Could extend the generic code or
+ * make a special powerpc IPI for flushing TLBs.
+ * For now it's not too performance critical.
+ */
+ smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb,
+ (void *)mm, 1);
+}
+
+#else /* CONFIG_SMP */
+static inline void exit_flush_lazy_tlbs(struct mm_struct *mm) { }
+#endif /* CONFIG_SMP */
+
+static DEFINE_PER_CPU(unsigned int, mm_cpumask_trim_clock);
+
+/*
+ * Interval between flushes at which we send out IPIs to check whether the
+ * mm_cpumask can be trimmed for the case where it's not a single-threaded
+ * process flushing its own mm. The intent is to reduce the cost of later
+ * flushes. Don't want this to be so low that it adds noticable cost to TLB
+ * flushing, or so high that it doesn't help reduce global TLBIEs.
+ */
+static unsigned long tlb_mm_cpumask_trim_timer = 1073;
+
+static bool tick_and_test_trim_clock(void)
+{
+ if (__this_cpu_inc_return(mm_cpumask_trim_clock) ==
+ tlb_mm_cpumask_trim_timer) {
+ __this_cpu_write(mm_cpumask_trim_clock, 0);
+ return true;
+ }
+ return false;
+}
+
+enum tlb_flush_type {
+ FLUSH_TYPE_NONE,
+ FLUSH_TYPE_LOCAL,
+ FLUSH_TYPE_GLOBAL,
+};
+
+static enum tlb_flush_type flush_type_needed(struct mm_struct *mm, bool fullmm)
+{
+ int active_cpus = atomic_read(&mm->context.active_cpus);
+ int cpu = smp_processor_id();
+
+ if (active_cpus == 0)
+ return FLUSH_TYPE_NONE;
+ if (active_cpus == 1 && cpumask_test_cpu(cpu, mm_cpumask(mm))) {
+ if (current->mm != mm) {
+ /*
+ * Asynchronous flush sources may trim down to nothing
+ * if the process is not running, so occasionally try
+ * to trim.
+ */
+ if (tick_and_test_trim_clock()) {
+ exit_lazy_flush_tlb(mm, true);
+ return FLUSH_TYPE_NONE;
+ }
+ }
+ return FLUSH_TYPE_LOCAL;
+ }
+
+ /* Coprocessors require TLBIE to invalidate nMMU. */
+ if (atomic_read(&mm->context.copros) > 0)
+ return FLUSH_TYPE_GLOBAL;
+
+ /*
+ * In the fullmm case there's no point doing the exit_flush_lazy_tlbs
+ * because the mm is being taken down anyway, and a TLBIE tends to
+ * be faster than an IPI+TLBIEL.
+ */
+ if (fullmm)
+ return FLUSH_TYPE_GLOBAL;
+
+ /*
+ * If we are running the only thread of a single-threaded process,
+ * then we should almost always be able to trim off the rest of the
+ * CPU mask (except in the case of use_mm() races), so always try
+ * trimming the mask.
+ */
+ if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm) {
+ exit_flush_lazy_tlbs(mm);
+ /*
+ * use_mm() race could prevent IPIs from being able to clear
+ * the cpumask here, however those users are established
+ * after our first check (and so after the PTEs are removed),
+ * and the TLB still gets flushed by the IPI, so this CPU
+ * will only require a local flush.
+ */
+ return FLUSH_TYPE_LOCAL;
+ }
+
+ /*
+ * Occasionally try to trim down the cpumask. It's possible this can
+ * bring the mask to zero, which results in no flush.
+ */
+ if (tick_and_test_trim_clock()) {
+ exit_flush_lazy_tlbs(mm);
+ if (current->mm == mm)
+ return FLUSH_TYPE_LOCAL;
+ if (cpumask_test_cpu(cpu, mm_cpumask(mm)))
+ exit_lazy_flush_tlb(mm, true);
+ return FLUSH_TYPE_NONE;
+ }
+
+ return FLUSH_TYPE_GLOBAL;
+}
+
+#ifdef CONFIG_SMP
+void radix__flush_tlb_mm(struct mm_struct *mm)
+{
+ unsigned long pid;
+ enum tlb_flush_type type;
+
+ pid = mm->context.id;
+ if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT))
+ return;
+
+ preempt_disable();
+ /*
+ * Order loads of mm_cpumask (in flush_type_needed) vs previous
+ * stores to clear ptes before the invalidate. See barrier in
+ * switch_mm_irqs_off
+ */
+ smp_mb();
+ type = flush_type_needed(mm, false);
+ if (type == FLUSH_TYPE_LOCAL) {
+ _tlbiel_pid(pid, RIC_FLUSH_TLB);
+ } else if (type == FLUSH_TYPE_GLOBAL) {
+ if (!mmu_has_feature(MMU_FTR_GTSE)) {
+ unsigned long tgt = H_RPTI_TARGET_CMMU;
+
+ if (atomic_read(&mm->context.copros) > 0)
+ tgt |= H_RPTI_TARGET_NMMU;
+ pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB,
+ H_RPTI_PAGE_ALL, 0, -1UL);
+ } else if (cputlb_use_tlbie()) {
+ if (mm_needs_flush_escalation(mm))
+ _tlbie_pid(pid, RIC_FLUSH_ALL);
+ else
+ _tlbie_pid(pid, RIC_FLUSH_TLB);
+ } else {
+ _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB);
+ }
+ }
+ preempt_enable();
+ mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
+}
+EXPORT_SYMBOL(radix__flush_tlb_mm);
+
+static void __flush_all_mm(struct mm_struct *mm, bool fullmm)
+{
+ unsigned long pid;
+ enum tlb_flush_type type;
+
+ pid = mm->context.id;
+ if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT))
+ return;
+
+ preempt_disable();
+ smp_mb(); /* see radix__flush_tlb_mm */
+ type = flush_type_needed(mm, fullmm);
+ if (type == FLUSH_TYPE_LOCAL) {
+ _tlbiel_pid(pid, RIC_FLUSH_ALL);
+ } else if (type == FLUSH_TYPE_GLOBAL) {
+ if (!mmu_has_feature(MMU_FTR_GTSE)) {
+ unsigned long tgt = H_RPTI_TARGET_CMMU;
+ unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
+ H_RPTI_TYPE_PRT;
+
+ if (atomic_read(&mm->context.copros) > 0)
+ tgt |= H_RPTI_TARGET_NMMU;
+ pseries_rpt_invalidate(pid, tgt, type,
+ H_RPTI_PAGE_ALL, 0, -1UL);
+ } else if (cputlb_use_tlbie())
+ _tlbie_pid(pid, RIC_FLUSH_ALL);
+ else
+ _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL);
+ }
+ preempt_enable();
+ mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
+}
+
+void radix__flush_all_mm(struct mm_struct *mm)
+{
+ __flush_all_mm(mm, false);
+}
+EXPORT_SYMBOL(radix__flush_all_mm);
+
+void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
+ int psize)
+{
+ unsigned long pid;
+ enum tlb_flush_type type;
+
+ pid = mm->context.id;
+ if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT))
+ return;
+
+ preempt_disable();
+ smp_mb(); /* see radix__flush_tlb_mm */
+ type = flush_type_needed(mm, false);
+ if (type == FLUSH_TYPE_LOCAL) {
+ _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
+ } else if (type == FLUSH_TYPE_GLOBAL) {
+ if (!mmu_has_feature(MMU_FTR_GTSE)) {
+ unsigned long tgt, pg_sizes, size;
+
+ tgt = H_RPTI_TARGET_CMMU;
+ pg_sizes = psize_to_rpti_pgsize(psize);
+ size = 1UL << mmu_psize_to_shift(psize);
+
+ if (atomic_read(&mm->context.copros) > 0)
+ tgt |= H_RPTI_TARGET_NMMU;
+ pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB,
+ pg_sizes, vmaddr,
+ vmaddr + size);
+ } else if (cputlb_use_tlbie())
+ _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
+ else
+ _tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB);
+ }
+ preempt_enable();
+}
+
+void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
+{
+#ifdef CONFIG_HUGETLB_PAGE
+ if (is_vm_hugetlb_page(vma))
+ return radix__flush_hugetlb_page(vma, vmaddr);
+#endif
+ radix__flush_tlb_page_psize(vma->vm_mm, vmaddr, mmu_virtual_psize);
+}
+EXPORT_SYMBOL(radix__flush_tlb_page);
+
+#endif /* CONFIG_SMP */
+
+static void do_tlbiel_kernel(void *info)
+{
+ _tlbiel_pid(0, RIC_FLUSH_ALL);
+}
+
+static inline void _tlbiel_kernel_broadcast(void)
+{
+ on_each_cpu(do_tlbiel_kernel, NULL, 1);
+ if (tlbie_capable) {
+ /*
+ * Coherent accelerators don't refcount kernel memory mappings,
+ * so have to always issue a tlbie for them. This is quite a
+ * slow path anyway.
+ */
+ _tlbie_pid(0, RIC_FLUSH_ALL);
+ }
+}
+
+/*
+ * If kernel TLBIs ever become local rather than global, then
+ * drivers/misc/ocxl/link.c:ocxl_link_add_pe will need some work, as it
+ * assumes kernel TLBIs are global.
+ */
+void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+ if (!mmu_has_feature(MMU_FTR_GTSE)) {
+ unsigned long tgt = H_RPTI_TARGET_CMMU | H_RPTI_TARGET_NMMU;
+ unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
+ H_RPTI_TYPE_PRT;
+
+ pseries_rpt_invalidate(0, tgt, type, H_RPTI_PAGE_ALL,
+ start, end);
+ } else if (cputlb_use_tlbie())
+ _tlbie_pid(0, RIC_FLUSH_ALL);
+ else
+ _tlbiel_kernel_broadcast();
+}
+EXPORT_SYMBOL(radix__flush_tlb_kernel_range);
+
+/*
+ * Doesn't appear to be used anywhere. Remove.
+ */
+#define TLB_FLUSH_ALL -1UL
+
+/*
+ * Number of pages above which we invalidate the entire PID rather than
+ * flush individual pages, for local and global flushes respectively.
+ *
+ * tlbie goes out to the interconnect and individual ops are more costly.
+ * It also does not iterate over sets like the local tlbiel variant when
+ * invalidating a full PID, so it has a far lower threshold to change from
+ * individual page flushes to full-pid flushes.
+ */
+static u32 tlb_single_page_flush_ceiling __read_mostly = 33;
+static u32 tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2;
+
+static inline void __radix__flush_tlb_range(struct mm_struct *mm,
+ unsigned long start, unsigned long end)
+{
+ unsigned long pid;
+ unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift;
+ unsigned long page_size = 1UL << page_shift;
+ unsigned long nr_pages = (end - start) >> page_shift;
+ bool flush_pid, flush_pwc = false;
+ enum tlb_flush_type type;
+
+ pid = mm->context.id;
+ if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT))
+ return;
+
+ WARN_ON_ONCE(end == TLB_FLUSH_ALL);
+
+ preempt_disable();
+ smp_mb(); /* see radix__flush_tlb_mm */
+ type = flush_type_needed(mm, false);
+ if (type == FLUSH_TYPE_NONE)
+ goto out;
+
+ if (type == FLUSH_TYPE_GLOBAL)
+ flush_pid = nr_pages > tlb_single_page_flush_ceiling;
+ else
+ flush_pid = nr_pages > tlb_local_single_page_flush_ceiling;
+ /*
+ * full pid flush already does the PWC flush. if it is not full pid
+ * flush check the range is more than PMD and force a pwc flush
+ * mremap() depends on this behaviour.
+ */
+ if (!flush_pid && (end - start) >= PMD_SIZE)
+ flush_pwc = true;
+
+ if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) {
+ unsigned long type = H_RPTI_TYPE_TLB;
+ unsigned long tgt = H_RPTI_TARGET_CMMU;
+ unsigned long pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize);
+
+ if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
+ pg_sizes |= psize_to_rpti_pgsize(MMU_PAGE_2M);
+ if (atomic_read(&mm->context.copros) > 0)
+ tgt |= H_RPTI_TARGET_NMMU;
+ if (flush_pwc)
+ type |= H_RPTI_TYPE_PWC;
+ pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end);
+ } else if (flush_pid) {
+ /*
+ * We are now flushing a range larger than PMD size force a RIC_FLUSH_ALL
+ */
+ if (type == FLUSH_TYPE_LOCAL) {
+ _tlbiel_pid(pid, RIC_FLUSH_ALL);
+ } else {
+ if (cputlb_use_tlbie()) {
+ _tlbie_pid(pid, RIC_FLUSH_ALL);
+ } else {
+ _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL);
+ }
+ }
+ } else {
+ bool hflush;
+ unsigned long hstart, hend;
+
+ hstart = (start + PMD_SIZE - 1) & PMD_MASK;
+ hend = end & PMD_MASK;
+ hflush = IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hstart < hend;
+
+ if (type == FLUSH_TYPE_LOCAL) {
+ asm volatile("ptesync": : :"memory");
+ if (flush_pwc)
+ /* For PWC, only one flush is needed */
+ __tlbiel_pid(pid, 0, RIC_FLUSH_PWC);
+ __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
+ if (hflush)
+ __tlbiel_va_range(hstart, hend, pid,
+ PMD_SIZE, MMU_PAGE_2M);
+ ppc_after_tlbiel_barrier();
+ } else if (cputlb_use_tlbie()) {
+ asm volatile("ptesync": : :"memory");
+ if (flush_pwc)
+ __tlbie_pid(pid, RIC_FLUSH_PWC);
+ __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
+ if (hflush)
+ __tlbie_va_range(hstart, hend, pid,
+ PMD_SIZE, MMU_PAGE_2M);
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+ } else {
+ _tlbiel_va_range_multicast(mm,
+ start, end, pid, page_size, mmu_virtual_psize, flush_pwc);
+ if (hflush)
+ _tlbiel_va_range_multicast(mm,
+ hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, flush_pwc);
+ }
+ }
+out:
+ preempt_enable();
+ mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end);
+}
+
+void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end)
+
+{
+#ifdef CONFIG_HUGETLB_PAGE
+ if (is_vm_hugetlb_page(vma))
+ return radix__flush_hugetlb_tlb_range(vma, start, end);
+#endif
+
+ __radix__flush_tlb_range(vma->vm_mm, start, end);
+}
+EXPORT_SYMBOL(radix__flush_tlb_range);
+
+static int radix_get_mmu_psize(int page_size)
+{
+ int psize;
+
+ if (page_size == (1UL << mmu_psize_defs[mmu_virtual_psize].shift))
+ psize = mmu_virtual_psize;
+ else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_2M].shift))
+ psize = MMU_PAGE_2M;
+ else if (page_size == (1UL << mmu_psize_defs[MMU_PAGE_1G].shift))
+ psize = MMU_PAGE_1G;
+ else
+ return -1;
+ return psize;
+}
+
+/*
+ * Flush partition scoped LPID address translation for all CPUs.
+ */
+void radix__flush_tlb_lpid_page(unsigned int lpid,
+ unsigned long addr,
+ unsigned long page_size)
+{
+ int psize = radix_get_mmu_psize(page_size);
+
+ _tlbie_lpid_va(addr, lpid, psize, RIC_FLUSH_TLB);
+}
+EXPORT_SYMBOL_GPL(radix__flush_tlb_lpid_page);
+
+/*
+ * Flush partition scoped PWC from LPID for all CPUs.
+ */
+void radix__flush_pwc_lpid(unsigned int lpid)
+{
+ _tlbie_lpid(lpid, RIC_FLUSH_PWC);
+}
+EXPORT_SYMBOL_GPL(radix__flush_pwc_lpid);
+
+/*
+ * Flush partition scoped translations from LPID (=LPIDR)
+ */
+void radix__flush_all_lpid(unsigned int lpid)
+{
+ _tlbie_lpid(lpid, RIC_FLUSH_ALL);
+}
+EXPORT_SYMBOL_GPL(radix__flush_all_lpid);
+
+/*
+ * Flush process scoped translations from LPID (=LPIDR)
+ */
+void radix__flush_all_lpid_guest(unsigned int lpid)
+{
+ _tlbie_lpid_guest(lpid, RIC_FLUSH_ALL);
+}
+
+void radix__tlb_flush(struct mmu_gather *tlb)
+{
+ int psize = 0;
+ struct mm_struct *mm = tlb->mm;
+ int page_size = tlb->page_size;
+ unsigned long start = tlb->start;
+ unsigned long end = tlb->end;
+
+ /*
+ * if page size is not something we understand, do a full mm flush
+ *
+ * A "fullmm" flush must always do a flush_all_mm (RIC=2) flush
+ * that flushes the process table entry cache upon process teardown.
+ * See the comment for radix in arch_exit_mmap().
+ */
+ if (tlb->fullmm) {
+ if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_SHOOTDOWN)) {
+ /*
+ * Shootdown based lazy tlb mm refcounting means we
+ * have to IPI everyone in the mm_cpumask anyway soon
+ * when the mm goes away, so might as well do it as
+ * part of the final flush now.
+ *
+ * If lazy shootdown was improved to reduce IPIs (e.g.,
+ * by batching), then it may end up being better to use
+ * tlbies here instead.
+ */
+ preempt_disable();
+
+ smp_mb(); /* see radix__flush_tlb_mm */
+ exit_flush_lazy_tlbs(mm);
+ __flush_all_mm(mm, true);
+
+ preempt_enable();
+ } else {
+ __flush_all_mm(mm, true);
+ }
+
+ } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) {
+ if (!tlb->freed_tables)
+ radix__flush_tlb_mm(mm);
+ else
+ radix__flush_all_mm(mm);
+ } else {
+ if (!tlb->freed_tables)
+ radix__flush_tlb_range_psize(mm, start, end, psize);
+ else
+ radix__flush_tlb_pwc_range_psize(mm, start, end, psize);
+ }
+}
+
+static void __radix__flush_tlb_range_psize(struct mm_struct *mm,
+ unsigned long start, unsigned long end,
+ int psize, bool also_pwc)
+{
+ unsigned long pid;
+ unsigned int page_shift = mmu_psize_defs[psize].shift;
+ unsigned long page_size = 1UL << page_shift;
+ unsigned long nr_pages = (end - start) >> page_shift;
+ bool flush_pid;
+ enum tlb_flush_type type;
+
+ pid = mm->context.id;
+ if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT))
+ return;
+
+ WARN_ON_ONCE(end == TLB_FLUSH_ALL);
+
+ preempt_disable();
+ smp_mb(); /* see radix__flush_tlb_mm */
+ type = flush_type_needed(mm, false);
+ if (type == FLUSH_TYPE_NONE)
+ goto out;
+
+ if (type == FLUSH_TYPE_GLOBAL)
+ flush_pid = nr_pages > tlb_single_page_flush_ceiling;
+ else
+ flush_pid = nr_pages > tlb_local_single_page_flush_ceiling;
+
+ if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) {
+ unsigned long tgt = H_RPTI_TARGET_CMMU;
+ unsigned long type = H_RPTI_TYPE_TLB;
+ unsigned long pg_sizes = psize_to_rpti_pgsize(psize);
+
+ if (also_pwc)
+ type |= H_RPTI_TYPE_PWC;
+ if (atomic_read(&mm->context.copros) > 0)
+ tgt |= H_RPTI_TARGET_NMMU;
+ pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end);
+ } else if (flush_pid) {
+ if (type == FLUSH_TYPE_LOCAL) {
+ _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
+ } else {
+ if (cputlb_use_tlbie()) {
+ if (mm_needs_flush_escalation(mm))
+ also_pwc = true;
+
+ _tlbie_pid(pid,
+ also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
+ } else {
+ _tlbiel_pid_multicast(mm, pid,
+ also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
+ }
+
+ }
+ } else {
+ if (type == FLUSH_TYPE_LOCAL)
+ _tlbiel_va_range(start, end, pid, page_size, psize, also_pwc);
+ else if (cputlb_use_tlbie())
+ _tlbie_va_range(start, end, pid, page_size, psize, also_pwc);
+ else
+ _tlbiel_va_range_multicast(mm,
+ start, end, pid, page_size, psize, also_pwc);
+ }
+out:
+ preempt_enable();
+ mmu_notifier_arch_invalidate_secondary_tlbs(mm, start, end);
+}
+
+void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
+ unsigned long end, int psize)
+{
+ return __radix__flush_tlb_range_psize(mm, start, end, psize, false);
+}
+
+void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long start,
+ unsigned long end, int psize)
+{
+ __radix__flush_tlb_range_psize(mm, start, end, psize, true);
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr)
+{
+ unsigned long pid, end;
+ enum tlb_flush_type type;
+
+ pid = mm->context.id;
+ if (WARN_ON_ONCE(pid == MMU_NO_CONTEXT))
+ return;
+
+ /* 4k page size, just blow the world */
+ if (PAGE_SIZE == 0x1000) {
+ radix__flush_all_mm(mm);
+ return;
+ }
+
+ end = addr + HPAGE_PMD_SIZE;
+
+ /* Otherwise first do the PWC, then iterate the pages. */
+ preempt_disable();
+ smp_mb(); /* see radix__flush_tlb_mm */
+ type = flush_type_needed(mm, false);
+ if (type == FLUSH_TYPE_LOCAL) {
+ _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
+ } else if (type == FLUSH_TYPE_GLOBAL) {
+ if (!mmu_has_feature(MMU_FTR_GTSE)) {
+ unsigned long tgt, type, pg_sizes;
+
+ tgt = H_RPTI_TARGET_CMMU;
+ type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
+ H_RPTI_TYPE_PRT;
+ pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize);
+
+ if (atomic_read(&mm->context.copros) > 0)
+ tgt |= H_RPTI_TARGET_NMMU;
+ pseries_rpt_invalidate(pid, tgt, type, pg_sizes,
+ addr, end);
+ } else if (cputlb_use_tlbie())
+ _tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
+ else
+ _tlbiel_va_range_multicast(mm,
+ addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
+ }
+
+ preempt_enable();
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+void radix__flush_pmd_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+ radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_2M);
+}
+EXPORT_SYMBOL(radix__flush_pmd_tlb_range);
+
+void radix__flush_pud_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+ radix__flush_tlb_range_psize(vma->vm_mm, start, end, MMU_PAGE_1G);
+}
+EXPORT_SYMBOL(radix__flush_pud_tlb_range);
+
+void radix__flush_tlb_all(void)
+{
+ unsigned long rb,prs,r,rs;
+ unsigned long ric = RIC_FLUSH_ALL;
+
+ rb = 0x3 << PPC_BITLSHIFT(53); /* IS = 3 */
+ prs = 0; /* partition scoped */
+ r = 1; /* radix format */
+ rs = 1 & ((1UL << 32) - 1); /* any LPID value to flush guest mappings */
+
+ asm volatile("ptesync": : :"memory");
+ /*
+ * now flush guest entries by passing PRS = 1 and LPID != 0
+ */
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(r), "i"(1), "i"(ric), "r"(rs) : "memory");
+ /*
+ * now flush host entires by passing PRS = 0 and LPID == 0
+ */
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory");
+ asm volatile("eieio; tlbsync; ptesync": : :"memory");
+}
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+static __always_inline void __tlbie_pid_lpid(unsigned long pid,
+ unsigned long lpid,
+ unsigned long ric)
+{
+ unsigned long rb, rs, prs, r;
+
+ rb = PPC_BIT(53); /* IS = 1 */
+ rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
+ prs = 1; /* process scoped */
+ r = 1; /* radix format */
+
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+ trace_tlbie(0, 0, rb, rs, ric, prs, r);
+}
+
+static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid,
+ unsigned long lpid,
+ unsigned long ap, unsigned long ric)
+{
+ unsigned long rb, rs, prs, r;
+
+ rb = va & ~(PPC_BITMASK(52, 63));
+ rb |= ap << PPC_BITLSHIFT(58);
+ rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31)));
+ prs = 1; /* process scoped */
+ r = 1; /* radix format */
+
+ asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+ : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
+ trace_tlbie(0, 0, rb, rs, ric, prs, r);
+}
+
+static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid)
+{
+ /*
+ * We can use any address for the invalidation, pick one which is
+ * probably unused as an optimisation.
+ */
+ unsigned long va = ((1UL << 52) - 1);
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+ asm volatile("ptesync" : : : "memory");
+ __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
+ }
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+ asm volatile("ptesync" : : : "memory");
+ __tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K),
+ RIC_FLUSH_TLB);
+ }
+}
+
+static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid,
+ unsigned long ric)
+{
+ asm volatile("ptesync" : : : "memory");
+
+ /*
+ * Workaround the fact that the "ric" argument to __tlbie_pid
+ * must be a compile-time contraint to match the "i" constraint
+ * in the asm statement.
+ */
+ switch (ric) {
+ case RIC_FLUSH_TLB:
+ __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
+ fixup_tlbie_pid_lpid(pid, lpid);
+ break;
+ case RIC_FLUSH_PWC:
+ __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
+ break;
+ case RIC_FLUSH_ALL:
+ default:
+ __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
+ fixup_tlbie_pid_lpid(pid, lpid);
+ }
+ asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+}
+
+static inline void fixup_tlbie_va_range_lpid(unsigned long va,
+ unsigned long pid,
+ unsigned long lpid,
+ unsigned long ap)
+{
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) {
+ asm volatile("ptesync" : : : "memory");
+ __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB);
+ }
+
+ if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) {
+ asm volatile("ptesync" : : : "memory");
+ __tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB);
+ }
+}
+
+static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long lpid,
+ unsigned long page_size,
+ unsigned long psize)
+{
+ unsigned long addr;
+ unsigned long ap = mmu_get_ap(psize);
+
+ for (addr = start; addr < end; addr += page_size)
+ __tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB);
+
+ fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap);
+}
+
+static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end,
+ unsigned long pid, unsigned long lpid,
+ unsigned long page_size,
+ unsigned long psize, bool also_pwc)
+{
+ asm volatile("ptesync" : : : "memory");
+ if (also_pwc)
+ __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
+ __tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize);
+ asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+}
+
+/*
+ * Performs process-scoped invalidations for a given LPID
+ * as part of H_RPT_INVALIDATE hcall.
+ */
+void do_h_rpt_invalidate_prt(unsigned long pid, unsigned long lpid,
+ unsigned long type, unsigned long pg_sizes,
+ unsigned long start, unsigned long end)
+{
+ unsigned long psize, nr_pages;
+ struct mmu_psize_def *def;
+ bool flush_pid;
+
+ /*
+ * A H_RPTI_TYPE_ALL request implies RIC=3, hence
+ * do a single IS=1 based flush.
+ */
+ if ((type & H_RPTI_TYPE_ALL) == H_RPTI_TYPE_ALL) {
+ _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL);
+ return;
+ }
+
+ if (type & H_RPTI_TYPE_PWC)
+ _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC);
+
+ /* Full PID flush */
+ if (start == 0 && end == -1)
+ return _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
+
+ /* Do range invalidation for all the valid page sizes */
+ for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
+ def = &mmu_psize_defs[psize];
+ if (!(pg_sizes & def->h_rpt_pgsize))
+ continue;
+
+ nr_pages = (end - start) >> def->shift;
+ flush_pid = nr_pages > tlb_single_page_flush_ceiling;
+
+ /*
+ * If the number of pages spanning the range is above
+ * the ceiling, convert the request into a full PID flush.
+ * And since PID flush takes out all the page sizes, there
+ * is no need to consider remaining page sizes.
+ */
+ if (flush_pid) {
+ _tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB);
+ return;
+ }
+ _tlbie_va_range_lpid(start, end, pid, lpid,
+ (1UL << def->shift), psize, false);
+ }
+}
+EXPORT_SYMBOL_GPL(do_h_rpt_invalidate_prt);
+
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
+
+static int __init create_tlb_single_page_flush_ceiling(void)
+{
+ debugfs_create_u32("tlb_single_page_flush_ceiling", 0600,
+ arch_debugfs_dir, &tlb_single_page_flush_ceiling);
+ debugfs_create_u32("tlb_local_single_page_flush_ceiling", 0600,
+ arch_debugfs_dir, &tlb_local_single_page_flush_ceiling);
+ return 0;
+}
+late_initcall(create_tlb_single_page_flush_ceiling);
+
diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c
new file mode 100644
index 000000000000..6b783552403c
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/slb.c
@@ -0,0 +1,870 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerPC64 SLB support.
+ *
+ * Copyright (C) 2004 David Gibson <dwg@au.ibm.com>, IBM
+ * Based on earlier code written by:
+ * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com
+ * Copyright (c) 2001 Dave Engebretsen
+ * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
+ */
+
+#include <asm/interrupt.h>
+#include <asm/mmu.h>
+#include <asm/mmu_context.h>
+#include <asm/paca.h>
+#include <asm/lppaca.h>
+#include <asm/ppc-opcode.h>
+#include <asm/cputable.h>
+#include <asm/cacheflush.h>
+#include <asm/smp.h>
+#include <linux/compiler.h>
+#include <linux/context_tracking.h>
+#include <linux/mm_types.h>
+#include <linux/pgtable.h>
+
+#include <asm/udbg.h>
+#include <asm/text-patching.h>
+
+#include "internal.h"
+
+
+static long slb_allocate_user(struct mm_struct *mm, unsigned long ea);
+
+bool stress_slb_enabled __initdata;
+
+static int __init parse_stress_slb(char *p)
+{
+ stress_slb_enabled = true;
+ return 0;
+}
+early_param("stress_slb", parse_stress_slb);
+
+__ro_after_init DEFINE_STATIC_KEY_FALSE(stress_slb_key);
+
+static void assert_slb_presence(bool present, unsigned long ea)
+{
+#ifdef CONFIG_DEBUG_VM
+ unsigned long tmp;
+
+ WARN_ON_ONCE(mfmsr() & MSR_EE);
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_206))
+ return;
+
+ /*
+ * slbfee. requires bit 24 (PPC bit 39) be clear in RB. Hardware
+ * ignores all other bits from 0-27, so just clear them all.
+ */
+ ea &= ~((1UL << SID_SHIFT) - 1);
+ asm volatile(__PPC_SLBFEE_DOT(%0, %1) : "=r"(tmp) : "r"(ea) : "cr0");
+
+ WARN_ON(present == (tmp == 0));
+#endif
+}
+
+static inline void slb_shadow_update(unsigned long ea, int ssize,
+ unsigned long flags,
+ enum slb_index index)
+{
+ struct slb_shadow *p = get_slb_shadow();
+
+ /*
+ * Clear the ESID first so the entry is not valid while we are
+ * updating it. No write barriers are needed here, provided
+ * we only update the current CPU's SLB shadow buffer.
+ */
+ WRITE_ONCE(p->save_area[index].esid, 0);
+ WRITE_ONCE(p->save_area[index].vsid, cpu_to_be64(mk_vsid_data(ea, ssize, flags)));
+ WRITE_ONCE(p->save_area[index].esid, cpu_to_be64(mk_esid_data(ea, ssize, index)));
+}
+
+static inline void slb_shadow_clear(enum slb_index index)
+{
+ WRITE_ONCE(get_slb_shadow()->save_area[index].esid, cpu_to_be64(index));
+}
+
+static inline void create_shadowed_slbe(unsigned long ea, int ssize,
+ unsigned long flags,
+ enum slb_index index)
+{
+ /*
+ * Updating the shadow buffer before writing the SLB ensures
+ * we don't get a stale entry here if we get preempted by PHYP
+ * between these two statements.
+ */
+ slb_shadow_update(ea, ssize, flags, index);
+
+ assert_slb_presence(false, ea);
+ asm volatile("slbmte %0,%1" :
+ : "r" (mk_vsid_data(ea, ssize, flags)),
+ "r" (mk_esid_data(ea, ssize, index))
+ : "memory" );
+}
+
+/*
+ * Insert bolted entries into SLB (which may not be empty, so don't clear
+ * slb_cache_ptr).
+ */
+void __slb_restore_bolted_realmode(void)
+{
+ struct slb_shadow *p = get_slb_shadow();
+ enum slb_index index;
+
+ /* No isync needed because realmode. */
+ for (index = 0; index < SLB_NUM_BOLTED; index++) {
+ asm volatile("slbmte %0,%1" :
+ : "r" (be64_to_cpu(p->save_area[index].vsid)),
+ "r" (be64_to_cpu(p->save_area[index].esid)));
+ }
+
+ assert_slb_presence(true, local_paca->kstack);
+}
+
+/*
+ * Insert the bolted entries into an empty SLB.
+ */
+void slb_restore_bolted_realmode(void)
+{
+ __slb_restore_bolted_realmode();
+ get_paca()->slb_cache_ptr = 0;
+
+ get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
+ get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
+}
+
+/*
+ * This flushes all SLB entries including 0, so it must be realmode.
+ */
+void slb_flush_all_realmode(void)
+{
+ asm volatile("slbmte %0,%0; slbia" : : "r" (0));
+}
+
+static __always_inline void __slb_flush_and_restore_bolted(bool preserve_kernel_lookaside)
+{
+ struct slb_shadow *p = get_slb_shadow();
+ unsigned long ksp_esid_data, ksp_vsid_data;
+ u32 ih;
+
+ /*
+ * SLBIA IH=1 on ISA v2.05 and newer processors may preserve lookaside
+ * information created with Class=0 entries, which we use for kernel
+ * SLB entries (the SLB entries themselves are still invalidated).
+ *
+ * Older processors will ignore this optimisation. Over-invalidation
+ * is fine because we never rely on lookaside information existing.
+ */
+ if (preserve_kernel_lookaside)
+ ih = 1;
+ else
+ ih = 0;
+
+ ksp_esid_data = be64_to_cpu(p->save_area[KSTACK_INDEX].esid);
+ ksp_vsid_data = be64_to_cpu(p->save_area[KSTACK_INDEX].vsid);
+
+ asm volatile(PPC_SLBIA(%0)" \n"
+ "slbmte %1, %2 \n"
+ :: "i" (ih),
+ "r" (ksp_vsid_data),
+ "r" (ksp_esid_data)
+ : "memory");
+}
+
+/*
+ * This flushes non-bolted entries, it can be run in virtual mode. Must
+ * be called with interrupts disabled.
+ */
+void slb_flush_and_restore_bolted(void)
+{
+ BUILD_BUG_ON(SLB_NUM_BOLTED != 2);
+
+ WARN_ON(!irqs_disabled());
+
+ /*
+ * We can't take a PMU exception in the following code, so hard
+ * disable interrupts.
+ */
+ hard_irq_disable();
+
+ isync();
+ __slb_flush_and_restore_bolted(false);
+ isync();
+
+ assert_slb_presence(true, get_paca()->kstack);
+
+ get_paca()->slb_cache_ptr = 0;
+
+ get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
+ get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
+}
+
+void slb_save_contents(struct slb_entry *slb_ptr)
+{
+ int i;
+ unsigned long e, v;
+
+ /* Save slb_cache_ptr value. */
+ get_paca()->slb_save_cache_ptr = get_paca()->slb_cache_ptr;
+
+ if (!slb_ptr)
+ return;
+
+ for (i = 0; i < mmu_slb_size; i++) {
+ asm volatile("slbmfee %0,%1" : "=r" (e) : "r" (i));
+ asm volatile("slbmfev %0,%1" : "=r" (v) : "r" (i));
+ slb_ptr->esid = e;
+ slb_ptr->vsid = v;
+ slb_ptr++;
+ }
+}
+
+void slb_dump_contents(struct slb_entry *slb_ptr)
+{
+ int i, n;
+ unsigned long e, v;
+ unsigned long llp;
+
+ if (!slb_ptr)
+ return;
+
+ pr_err("SLB contents of cpu 0x%x\n", smp_processor_id());
+
+ for (i = 0; i < mmu_slb_size; i++) {
+ e = slb_ptr->esid;
+ v = slb_ptr->vsid;
+ slb_ptr++;
+
+ if (!e && !v)
+ continue;
+
+ pr_err("%02d %016lx %016lx %s\n", i, e, v,
+ (e & SLB_ESID_V) ? "VALID" : "NOT VALID");
+
+ if (!(e & SLB_ESID_V))
+ continue;
+
+ llp = v & SLB_VSID_LLP;
+ if (v & SLB_VSID_B_1T) {
+ pr_err(" 1T ESID=%9lx VSID=%13lx LLP:%3lx\n",
+ GET_ESID_1T(e),
+ (v & ~SLB_VSID_B) >> SLB_VSID_SHIFT_1T, llp);
+ } else {
+ pr_err(" 256M ESID=%9lx VSID=%13lx LLP:%3lx\n",
+ GET_ESID(e),
+ (v & ~SLB_VSID_B) >> SLB_VSID_SHIFT, llp);
+ }
+ }
+
+ if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) {
+ /* RR is not so useful as it's often not used for allocation */
+ pr_err("SLB RR allocator index %d\n", get_paca()->stab_rr);
+
+ /* Dump slb cache entires as well. */
+ pr_err("SLB cache ptr value = %d\n", get_paca()->slb_save_cache_ptr);
+ pr_err("Valid SLB cache entries:\n");
+ n = min_t(int, get_paca()->slb_save_cache_ptr, SLB_CACHE_ENTRIES);
+ for (i = 0; i < n; i++)
+ pr_err("%02d EA[0-35]=%9x\n", i, get_paca()->slb_cache[i]);
+ pr_err("Rest of SLB cache entries:\n");
+ for (i = n; i < SLB_CACHE_ENTRIES; i++)
+ pr_err("%02d EA[0-35]=%9x\n", i, get_paca()->slb_cache[i]);
+ }
+}
+
+void slb_vmalloc_update(void)
+{
+ /*
+ * vmalloc is not bolted, so just have to flush non-bolted.
+ */
+ slb_flush_and_restore_bolted();
+}
+
+static bool preload_hit(struct thread_info *ti, unsigned long esid)
+{
+ unsigned char i;
+
+ for (i = 0; i < ti->slb_preload_nr; i++) {
+ unsigned char idx;
+
+ idx = (ti->slb_preload_tail + i) % SLB_PRELOAD_NR;
+ if (esid == ti->slb_preload_esid[idx])
+ return true;
+ }
+ return false;
+}
+
+static bool preload_add(struct thread_info *ti, unsigned long ea)
+{
+ unsigned char idx;
+ unsigned long esid;
+
+ if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) {
+ /* EAs are stored >> 28 so 256MB segments don't need clearing */
+ if (ea & ESID_MASK_1T)
+ ea &= ESID_MASK_1T;
+ }
+
+ esid = ea >> SID_SHIFT;
+
+ if (preload_hit(ti, esid))
+ return false;
+
+ idx = (ti->slb_preload_tail + ti->slb_preload_nr) % SLB_PRELOAD_NR;
+ ti->slb_preload_esid[idx] = esid;
+ if (ti->slb_preload_nr == SLB_PRELOAD_NR)
+ ti->slb_preload_tail = (ti->slb_preload_tail + 1) % SLB_PRELOAD_NR;
+ else
+ ti->slb_preload_nr++;
+
+ return true;
+}
+
+static void preload_age(struct thread_info *ti)
+{
+ if (!ti->slb_preload_nr)
+ return;
+ ti->slb_preload_nr--;
+ ti->slb_preload_tail = (ti->slb_preload_tail + 1) % SLB_PRELOAD_NR;
+}
+
+void slb_setup_new_exec(void)
+{
+ struct thread_info *ti = current_thread_info();
+ struct mm_struct *mm = current->mm;
+ unsigned long exec = 0x10000000;
+
+ WARN_ON(irqs_disabled());
+
+ /*
+ * preload cache can only be used to determine whether a SLB
+ * entry exists if it does not start to overflow.
+ */
+ if (ti->slb_preload_nr + 2 > SLB_PRELOAD_NR)
+ return;
+
+ hard_irq_disable();
+
+ /*
+ * We have no good place to clear the slb preload cache on exec,
+ * flush_thread is about the earliest arch hook but that happens
+ * after we switch to the mm and have already preloaded the SLBEs.
+ *
+ * For the most part that's probably okay to use entries from the
+ * previous exec, they will age out if unused. It may turn out to
+ * be an advantage to clear the cache before switching to it,
+ * however.
+ */
+
+ /*
+ * preload some userspace segments into the SLB.
+ * Almost all 32 and 64bit PowerPC executables are linked at
+ * 0x10000000 so it makes sense to preload this segment.
+ */
+ if (!is_kernel_addr(exec)) {
+ if (preload_add(ti, exec))
+ slb_allocate_user(mm, exec);
+ }
+
+ /* Libraries and mmaps. */
+ if (!is_kernel_addr(mm->mmap_base)) {
+ if (preload_add(ti, mm->mmap_base))
+ slb_allocate_user(mm, mm->mmap_base);
+ }
+
+ /* see switch_slb */
+ asm volatile("isync" : : : "memory");
+
+ local_irq_enable();
+}
+
+void preload_new_slb_context(unsigned long start, unsigned long sp)
+{
+ struct thread_info *ti = current_thread_info();
+ struct mm_struct *mm = current->mm;
+ unsigned long heap = mm->start_brk;
+
+ WARN_ON(irqs_disabled());
+
+ /* see above */
+ if (ti->slb_preload_nr + 3 > SLB_PRELOAD_NR)
+ return;
+
+ hard_irq_disable();
+
+ /* Userspace entry address. */
+ if (!is_kernel_addr(start)) {
+ if (preload_add(ti, start))
+ slb_allocate_user(mm, start);
+ }
+
+ /* Top of stack, grows down. */
+ if (!is_kernel_addr(sp)) {
+ if (preload_add(ti, sp))
+ slb_allocate_user(mm, sp);
+ }
+
+ /* Bottom of heap, grows up. */
+ if (heap && !is_kernel_addr(heap)) {
+ if (preload_add(ti, heap))
+ slb_allocate_user(mm, heap);
+ }
+
+ /* see switch_slb */
+ asm volatile("isync" : : : "memory");
+
+ local_irq_enable();
+}
+
+static void slb_cache_slbie_kernel(unsigned int index)
+{
+ unsigned long slbie_data = get_paca()->slb_cache[index];
+ unsigned long ksp = get_paca()->kstack;
+
+ slbie_data <<= SID_SHIFT;
+ slbie_data |= 0xc000000000000000ULL;
+ if ((ksp & slb_esid_mask(mmu_kernel_ssize)) == slbie_data)
+ return;
+ slbie_data |= mmu_kernel_ssize << SLBIE_SSIZE_SHIFT;
+
+ asm volatile("slbie %0" : : "r" (slbie_data));
+}
+
+static void slb_cache_slbie_user(unsigned int index)
+{
+ unsigned long slbie_data = get_paca()->slb_cache[index];
+
+ slbie_data <<= SID_SHIFT;
+ slbie_data |= user_segment_size(slbie_data) << SLBIE_SSIZE_SHIFT;
+ slbie_data |= SLBIE_C; /* user slbs have C=1 */
+
+ asm volatile("slbie %0" : : "r" (slbie_data));
+}
+
+/* Flush all user entries from the segment table of the current processor. */
+void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
+{
+ struct thread_info *ti = task_thread_info(tsk);
+ unsigned char i;
+
+ /*
+ * We need interrupts hard-disabled here, not just soft-disabled,
+ * so that a PMU interrupt can't occur, which might try to access
+ * user memory (to get a stack trace) and possible cause an SLB miss
+ * which would update the slb_cache/slb_cache_ptr fields in the PACA.
+ */
+ hard_irq_disable();
+ isync();
+ if (stress_slb()) {
+ __slb_flush_and_restore_bolted(false);
+ isync();
+ get_paca()->slb_cache_ptr = 0;
+ get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
+
+ } else if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ /*
+ * SLBIA IH=3 invalidates all Class=1 SLBEs and their
+ * associated lookaside structures, which matches what
+ * switch_slb wants. So ARCH_300 does not use the slb
+ * cache.
+ */
+ asm volatile(PPC_SLBIA(3));
+
+ } else {
+ unsigned long offset = get_paca()->slb_cache_ptr;
+
+ if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) &&
+ offset <= SLB_CACHE_ENTRIES) {
+ /*
+ * Could assert_slb_presence(true) here, but
+ * hypervisor or machine check could have come
+ * in and removed the entry at this point.
+ */
+
+ for (i = 0; i < offset; i++)
+ slb_cache_slbie_user(i);
+
+ /* Workaround POWER5 < DD2.1 issue */
+ if (!cpu_has_feature(CPU_FTR_ARCH_207S) && offset == 1)
+ slb_cache_slbie_user(0);
+
+ } else {
+ /* Flush but retain kernel lookaside information */
+ __slb_flush_and_restore_bolted(true);
+ isync();
+
+ get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
+ }
+
+ get_paca()->slb_cache_ptr = 0;
+ }
+ get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
+
+ copy_mm_to_paca(mm);
+
+ /*
+ * We gradually age out SLBs after a number of context switches to
+ * reduce reload overhead of unused entries (like we do with FP/VEC
+ * reload). Each time we wrap 256 switches, take an entry out of the
+ * SLB preload cache.
+ */
+ tsk->thread.load_slb++;
+ if (!tsk->thread.load_slb) {
+ unsigned long pc = KSTK_EIP(tsk);
+
+ preload_age(ti);
+ preload_add(ti, pc);
+ }
+
+ for (i = 0; i < ti->slb_preload_nr; i++) {
+ unsigned char idx;
+ unsigned long ea;
+
+ idx = (ti->slb_preload_tail + i) % SLB_PRELOAD_NR;
+ ea = (unsigned long)ti->slb_preload_esid[idx] << SID_SHIFT;
+
+ slb_allocate_user(mm, ea);
+ }
+
+ /*
+ * Synchronize slbmte preloads with possible subsequent user memory
+ * address accesses by the kernel (user mode won't happen until
+ * rfid, which is safe).
+ */
+ isync();
+}
+
+void slb_set_size(u16 size)
+{
+ mmu_slb_size = size;
+}
+
+void slb_initialize(void)
+{
+ unsigned long linear_llp, vmalloc_llp, io_llp;
+ unsigned long lflags;
+ static int slb_encoding_inited;
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+ unsigned long vmemmap_llp;
+#endif
+
+ /* Prepare our SLB miss handler based on our page size */
+ linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
+ io_llp = mmu_psize_defs[mmu_io_psize].sllp;
+ vmalloc_llp = mmu_psize_defs[mmu_vmalloc_psize].sllp;
+ get_paca()->vmalloc_sllp = SLB_VSID_KERNEL | vmalloc_llp;
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+ vmemmap_llp = mmu_psize_defs[mmu_vmemmap_psize].sllp;
+#endif
+ if (!slb_encoding_inited) {
+ slb_encoding_inited = 1;
+ pr_devel("SLB: linear LLP = %04lx\n", linear_llp);
+ pr_devel("SLB: io LLP = %04lx\n", io_llp);
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+ pr_devel("SLB: vmemmap LLP = %04lx\n", vmemmap_llp);
+#endif
+ }
+
+ get_paca()->stab_rr = SLB_NUM_BOLTED - 1;
+ get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
+ get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
+
+ lflags = SLB_VSID_KERNEL | linear_llp;
+
+ /* Invalidate the entire SLB (even entry 0) & all the ERATS */
+ asm volatile("isync":::"memory");
+ asm volatile("slbmte %0,%0"::"r" (0) : "memory");
+ asm volatile("isync; slbia; isync":::"memory");
+ create_shadowed_slbe(PAGE_OFFSET, mmu_kernel_ssize, lflags, LINEAR_INDEX);
+
+ /*
+ * For the boot cpu, we're running on the stack in init_thread_union,
+ * which is in the first segment of the linear mapping, and also
+ * get_paca()->kstack hasn't been initialized yet.
+ * For secondary cpus, we need to bolt the kernel stack entry now.
+ */
+ slb_shadow_clear(KSTACK_INDEX);
+ if (raw_smp_processor_id() != boot_cpuid &&
+ (get_paca()->kstack & slb_esid_mask(mmu_kernel_ssize)) > PAGE_OFFSET)
+ create_shadowed_slbe(get_paca()->kstack,
+ mmu_kernel_ssize, lflags, KSTACK_INDEX);
+
+ asm volatile("isync":::"memory");
+}
+
+static void slb_cache_update(unsigned long esid_data)
+{
+ int slb_cache_index;
+
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ return; /* ISAv3.0B and later does not use slb_cache */
+
+ if (stress_slb())
+ return;
+
+ /*
+ * Now update slb cache entries
+ */
+ slb_cache_index = local_paca->slb_cache_ptr;
+ if (slb_cache_index < SLB_CACHE_ENTRIES) {
+ /*
+ * We have space in slb cache for optimized switch_slb().
+ * Top 36 bits from esid_data as per ISA
+ */
+ local_paca->slb_cache[slb_cache_index++] = esid_data >> SID_SHIFT;
+ local_paca->slb_cache_ptr++;
+ } else {
+ /*
+ * Our cache is full and the current cache content strictly
+ * doesn't indicate the active SLB contents. Bump the ptr
+ * so that switch_slb() will ignore the cache.
+ */
+ local_paca->slb_cache_ptr = SLB_CACHE_ENTRIES + 1;
+ }
+}
+
+static enum slb_index alloc_slb_index(bool kernel)
+{
+ enum slb_index index;
+
+ /*
+ * The allocation bitmaps can become out of synch with the SLB
+ * when the _switch code does slbie when bolting a new stack
+ * segment and it must not be anywhere else in the SLB. This leaves
+ * a kernel allocated entry that is unused in the SLB. With very
+ * large systems or small segment sizes, the bitmaps could slowly
+ * fill with these entries. They will eventually be cleared out
+ * by the round robin allocator in that case, so it's probably not
+ * worth accounting for.
+ */
+
+ /*
+ * SLBs beyond 32 entries are allocated with stab_rr only
+ * POWER7/8/9 have 32 SLB entries, this could be expanded if a
+ * future CPU has more.
+ */
+ if (local_paca->slb_used_bitmap != U32_MAX) {
+ index = ffz(local_paca->slb_used_bitmap);
+ local_paca->slb_used_bitmap |= 1U << index;
+ if (kernel)
+ local_paca->slb_kern_bitmap |= 1U << index;
+ } else {
+ /* round-robin replacement of slb starting at SLB_NUM_BOLTED. */
+ index = local_paca->stab_rr;
+ if (index < (mmu_slb_size - 1))
+ index++;
+ else
+ index = SLB_NUM_BOLTED;
+ local_paca->stab_rr = index;
+ if (index < 32) {
+ if (kernel)
+ local_paca->slb_kern_bitmap |= 1U << index;
+ else
+ local_paca->slb_kern_bitmap &= ~(1U << index);
+ }
+ }
+ BUG_ON(index < SLB_NUM_BOLTED);
+
+ return index;
+}
+
+static long slb_insert_entry(unsigned long ea, unsigned long context,
+ unsigned long flags, int ssize, bool kernel)
+{
+ unsigned long vsid;
+ unsigned long vsid_data, esid_data;
+ enum slb_index index;
+
+ vsid = get_vsid(context, ea, ssize);
+ if (!vsid)
+ return -EFAULT;
+
+ /*
+ * There must not be a kernel SLB fault in alloc_slb_index or before
+ * slbmte here or the allocation bitmaps could get out of whack with
+ * the SLB.
+ *
+ * User SLB faults or preloads take this path which might get inlined
+ * into the caller, so add compiler barriers here to ensure unsafe
+ * memory accesses do not come between.
+ */
+ barrier();
+
+ index = alloc_slb_index(kernel);
+
+ vsid_data = __mk_vsid_data(vsid, ssize, flags);
+ esid_data = mk_esid_data(ea, ssize, index);
+
+ /*
+ * No need for an isync before or after this slbmte. The exception
+ * we enter with and the rfid we exit with are context synchronizing.
+ * User preloads should add isync afterwards in case the kernel
+ * accesses user memory before it returns to userspace with rfid.
+ */
+ assert_slb_presence(false, ea);
+ if (stress_slb()) {
+ int slb_cache_index = local_paca->slb_cache_ptr;
+
+ /*
+ * stress_slb() does not use slb cache, repurpose as a
+ * cache of inserted (non-bolted) kernel SLB entries. All
+ * non-bolted kernel entries are flushed on any user fault,
+ * or if there are already 3 non-boled kernel entries.
+ */
+ BUILD_BUG_ON(SLB_CACHE_ENTRIES < 3);
+ if (!kernel || slb_cache_index == 3) {
+ int i;
+
+ for (i = 0; i < slb_cache_index; i++)
+ slb_cache_slbie_kernel(i);
+ slb_cache_index = 0;
+ }
+
+ if (kernel)
+ local_paca->slb_cache[slb_cache_index++] = esid_data >> SID_SHIFT;
+ local_paca->slb_cache_ptr = slb_cache_index;
+ }
+ asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data));
+
+ barrier();
+
+ if (!kernel)
+ slb_cache_update(esid_data);
+
+ return 0;
+}
+
+static long slb_allocate_kernel(unsigned long ea, unsigned long id)
+{
+ unsigned long context;
+ unsigned long flags;
+ int ssize;
+
+ if (id == LINEAR_MAP_REGION_ID) {
+
+ /* We only support upto H_MAX_PHYSMEM_BITS */
+ if ((ea & EA_MASK) > (1UL << H_MAX_PHYSMEM_BITS))
+ return -EFAULT;
+
+ flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_linear_psize].sllp;
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+ } else if (id == VMEMMAP_REGION_ID) {
+
+ if (ea >= H_VMEMMAP_END)
+ return -EFAULT;
+
+ flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_vmemmap_psize].sllp;
+#endif
+ } else if (id == VMALLOC_REGION_ID) {
+
+ if (ea >= H_VMALLOC_END)
+ return -EFAULT;
+
+ flags = local_paca->vmalloc_sllp;
+
+ } else if (id == IO_REGION_ID) {
+
+ if (ea >= H_KERN_IO_END)
+ return -EFAULT;
+
+ flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_io_psize].sllp;
+
+ } else {
+ return -EFAULT;
+ }
+
+ ssize = MMU_SEGSIZE_1T;
+ if (!mmu_has_feature(MMU_FTR_1T_SEGMENT))
+ ssize = MMU_SEGSIZE_256M;
+
+ context = get_kernel_context(ea);
+
+ return slb_insert_entry(ea, context, flags, ssize, true);
+}
+
+static long slb_allocate_user(struct mm_struct *mm, unsigned long ea)
+{
+ unsigned long context;
+ unsigned long flags;
+ int bpsize;
+ int ssize;
+
+ /*
+ * consider this as bad access if we take a SLB miss
+ * on an address above addr limit.
+ */
+ if (ea >= mm_ctx_slb_addr_limit(&mm->context))
+ return -EFAULT;
+
+ context = get_user_context(&mm->context, ea);
+ if (!context)
+ return -EFAULT;
+
+ if (unlikely(ea >= H_PGTABLE_RANGE)) {
+ WARN_ON(1);
+ return -EFAULT;
+ }
+
+ ssize = user_segment_size(ea);
+
+ bpsize = get_slice_psize(mm, ea);
+ flags = SLB_VSID_USER | mmu_psize_defs[bpsize].sllp;
+
+ return slb_insert_entry(ea, context, flags, ssize, false);
+}
+
+DEFINE_INTERRUPT_HANDLER_RAW(do_slb_fault)
+{
+ unsigned long ea = regs->dar;
+ unsigned long id = get_region_id(ea);
+
+ /* IRQs are not reconciled here, so can't check irqs_disabled */
+ VM_WARN_ON(mfmsr() & MSR_EE);
+
+ if (regs_is_unrecoverable(regs))
+ return -EINVAL;
+
+ /*
+ * SLB kernel faults must be very careful not to touch anything that is
+ * not bolted. E.g., PACA and global variables are okay, mm->context
+ * stuff is not. SLB user faults may access all of memory (and induce
+ * one recursive SLB kernel fault), so the kernel fault must not
+ * trample on the user fault state at those points.
+ */
+
+ /*
+ * This is a raw interrupt handler, for performance, so that
+ * fast_interrupt_return can be used. The handler must not touch local
+ * irq state, or schedule. We could test for usermode and upgrade to a
+ * normal process context (synchronous) interrupt for those, which
+ * would make them first-class kernel code and able to be traced and
+ * instrumented, although performance would suffer a bit, it would
+ * probably be a good tradeoff.
+ */
+ if (id >= LINEAR_MAP_REGION_ID) {
+ long err;
+#ifdef CONFIG_DEBUG_VM
+ /* Catch recursive kernel SLB faults. */
+ BUG_ON(local_paca->in_kernel_slb_handler);
+ local_paca->in_kernel_slb_handler = 1;
+#endif
+ err = slb_allocate_kernel(ea, id);
+#ifdef CONFIG_DEBUG_VM
+ local_paca->in_kernel_slb_handler = 0;
+#endif
+ return err;
+ } else {
+ struct mm_struct *mm = current->mm;
+ long err;
+
+ if (unlikely(!mm))
+ return -EFAULT;
+
+ err = slb_allocate_user(mm, ea);
+ if (!err)
+ preload_add(current_thread_info(), ea);
+
+ return err;
+ }
+}
diff --git a/arch/powerpc/mm/book3s64/slice.c b/arch/powerpc/mm/book3s64/slice.c
new file mode 100644
index 000000000000..28bec5bc7879
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/slice.c
@@ -0,0 +1,819 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * address space "slices" (meta-segments) support
+ *
+ * Copyright (C) 2007 Benjamin Herrenschmidt, IBM Corporation.
+ *
+ * Based on hugetlb implementation
+ *
+ * Copyright (C) 2003 David Gibson, IBM Corporation.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/err.h>
+#include <linux/spinlock.h>
+#include <linux/export.h>
+#include <linux/hugetlb.h>
+#include <linux/sched/mm.h>
+#include <linux/security.h>
+#include <asm/mman.h>
+#include <asm/mmu.h>
+#include <asm/spu.h>
+#include <asm/hugetlb.h>
+#include <asm/mmu_context.h>
+
+static DEFINE_SPINLOCK(slice_convert_lock);
+
+#ifdef DEBUG
+int _slice_debug = 1;
+
+static void slice_print_mask(const char *label, const struct slice_mask *mask)
+{
+ if (!_slice_debug)
+ return;
+ pr_devel("%s low_slice: %*pbl\n", label,
+ (int)SLICE_NUM_LOW, &mask->low_slices);
+ pr_devel("%s high_slice: %*pbl\n", label,
+ (int)SLICE_NUM_HIGH, mask->high_slices);
+}
+
+#define slice_dbg(fmt...) do { if (_slice_debug) pr_devel(fmt); } while (0)
+
+#else
+
+static void slice_print_mask(const char *label, const struct slice_mask *mask) {}
+#define slice_dbg(fmt...)
+
+#endif
+
+static inline notrace bool slice_addr_is_low(unsigned long addr)
+{
+ u64 tmp = (u64)addr;
+
+ return tmp < SLICE_LOW_TOP;
+}
+
+static void slice_range_to_mask(unsigned long start, unsigned long len,
+ struct slice_mask *ret)
+{
+ unsigned long end = start + len - 1;
+
+ ret->low_slices = 0;
+ if (SLICE_NUM_HIGH)
+ bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
+
+ if (slice_addr_is_low(start)) {
+ unsigned long mend = min(end,
+ (unsigned long)(SLICE_LOW_TOP - 1));
+
+ ret->low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1))
+ - (1u << GET_LOW_SLICE_INDEX(start));
+ }
+
+ if (SLICE_NUM_HIGH && !slice_addr_is_low(end)) {
+ unsigned long start_index = GET_HIGH_SLICE_INDEX(start);
+ unsigned long align_end = ALIGN(end, (1UL << SLICE_HIGH_SHIFT));
+ unsigned long count = GET_HIGH_SLICE_INDEX(align_end) - start_index;
+
+ bitmap_set(ret->high_slices, start_index, count);
+ }
+}
+
+static int slice_area_is_free(struct mm_struct *mm, unsigned long addr,
+ unsigned long len)
+{
+ struct vm_area_struct *vma;
+
+ if ((mm_ctx_slb_addr_limit(&mm->context) - len) < addr)
+ return 0;
+ vma = find_vma(mm, addr);
+ return (!vma || (addr + len) <= vm_start_gap(vma));
+}
+
+static int slice_low_has_vma(struct mm_struct *mm, unsigned long slice)
+{
+ return !slice_area_is_free(mm, slice << SLICE_LOW_SHIFT,
+ 1ul << SLICE_LOW_SHIFT);
+}
+
+static int slice_high_has_vma(struct mm_struct *mm, unsigned long slice)
+{
+ unsigned long start = slice << SLICE_HIGH_SHIFT;
+ unsigned long end = start + (1ul << SLICE_HIGH_SHIFT);
+
+ /* Hack, so that each addresses is controlled by exactly one
+ * of the high or low area bitmaps, the first high area starts
+ * at 4GB, not 0 */
+ if (start == 0)
+ start = (unsigned long)SLICE_LOW_TOP;
+
+ return !slice_area_is_free(mm, start, end - start);
+}
+
+static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret,
+ unsigned long high_limit)
+{
+ unsigned long i;
+
+ ret->low_slices = 0;
+ if (SLICE_NUM_HIGH)
+ bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
+
+ for (i = 0; i < SLICE_NUM_LOW; i++)
+ if (!slice_low_has_vma(mm, i))
+ ret->low_slices |= 1u << i;
+
+ if (slice_addr_is_low(high_limit - 1))
+ return;
+
+ for (i = 0; i < GET_HIGH_SLICE_INDEX(high_limit); i++)
+ if (!slice_high_has_vma(mm, i))
+ __set_bit(i, ret->high_slices);
+}
+
+static bool slice_check_range_fits(struct mm_struct *mm,
+ const struct slice_mask *available,
+ unsigned long start, unsigned long len)
+{
+ unsigned long end = start + len - 1;
+ u64 low_slices = 0;
+
+ if (slice_addr_is_low(start)) {
+ unsigned long mend = min(end,
+ (unsigned long)(SLICE_LOW_TOP - 1));
+
+ low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1))
+ - (1u << GET_LOW_SLICE_INDEX(start));
+ }
+ if ((low_slices & available->low_slices) != low_slices)
+ return false;
+
+ if (SLICE_NUM_HIGH && !slice_addr_is_low(end)) {
+ unsigned long start_index = GET_HIGH_SLICE_INDEX(start);
+ unsigned long align_end = ALIGN(end, (1UL << SLICE_HIGH_SHIFT));
+ unsigned long count = GET_HIGH_SLICE_INDEX(align_end) - start_index;
+ unsigned long i;
+
+ for (i = start_index; i < start_index + count; i++) {
+ if (!test_bit(i, available->high_slices))
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static void slice_flush_segments(void *parm)
+{
+#ifdef CONFIG_PPC64
+ struct mm_struct *mm = parm;
+ unsigned long flags;
+
+ if (mm != current->active_mm)
+ return;
+
+ copy_mm_to_paca(current->active_mm);
+
+ local_irq_save(flags);
+ slb_flush_and_restore_bolted();
+ local_irq_restore(flags);
+#endif
+}
+
+static void slice_convert(struct mm_struct *mm,
+ const struct slice_mask *mask, int psize)
+{
+ int index, mask_index;
+ /* Write the new slice psize bits */
+ unsigned char *hpsizes, *lpsizes;
+ struct slice_mask *psize_mask, *old_mask;
+ unsigned long i, flags;
+ int old_psize;
+
+ slice_dbg("slice_convert(mm=%p, psize=%d)\n", mm, psize);
+ slice_print_mask(" mask", mask);
+
+ psize_mask = slice_mask_for_size(&mm->context, psize);
+
+ /* We need to use a spinlock here to protect against
+ * concurrent 64k -> 4k demotion ...
+ */
+ spin_lock_irqsave(&slice_convert_lock, flags);
+
+ lpsizes = mm_ctx_low_slices(&mm->context);
+ for (i = 0; i < SLICE_NUM_LOW; i++) {
+ if (!(mask->low_slices & (1u << i)))
+ continue;
+
+ mask_index = i & 0x1;
+ index = i >> 1;
+
+ /* Update the slice_mask */
+ old_psize = (lpsizes[index] >> (mask_index * 4)) & 0xf;
+ old_mask = slice_mask_for_size(&mm->context, old_psize);
+ old_mask->low_slices &= ~(1u << i);
+ psize_mask->low_slices |= 1u << i;
+
+ /* Update the sizes array */
+ lpsizes[index] = (lpsizes[index] & ~(0xf << (mask_index * 4))) |
+ (((unsigned long)psize) << (mask_index * 4));
+ }
+
+ hpsizes = mm_ctx_high_slices(&mm->context);
+ for (i = 0; i < GET_HIGH_SLICE_INDEX(mm_ctx_slb_addr_limit(&mm->context)); i++) {
+ if (!test_bit(i, mask->high_slices))
+ continue;
+
+ mask_index = i & 0x1;
+ index = i >> 1;
+
+ /* Update the slice_mask */
+ old_psize = (hpsizes[index] >> (mask_index * 4)) & 0xf;
+ old_mask = slice_mask_for_size(&mm->context, old_psize);
+ __clear_bit(i, old_mask->high_slices);
+ __set_bit(i, psize_mask->high_slices);
+
+ /* Update the sizes array */
+ hpsizes[index] = (hpsizes[index] & ~(0xf << (mask_index * 4))) |
+ (((unsigned long)psize) << (mask_index * 4));
+ }
+
+ slice_dbg(" lsps=%lx, hsps=%lx\n",
+ (unsigned long)mm_ctx_low_slices(&mm->context),
+ (unsigned long)mm_ctx_high_slices(&mm->context));
+
+ spin_unlock_irqrestore(&slice_convert_lock, flags);
+
+#ifdef CONFIG_SPU_BASE
+ spu_flush_all_slbs(mm);
+#endif
+}
+
+/*
+ * Compute which slice addr is part of;
+ * set *boundary_addr to the start or end boundary of that slice
+ * (depending on 'end' parameter);
+ * return boolean indicating if the slice is marked as available in the
+ * 'available' slice_mark.
+ */
+static bool slice_scan_available(unsigned long addr,
+ const struct slice_mask *available,
+ int end, unsigned long *boundary_addr)
+{
+ unsigned long slice;
+ if (slice_addr_is_low(addr)) {
+ slice = GET_LOW_SLICE_INDEX(addr);
+ *boundary_addr = (slice + end) << SLICE_LOW_SHIFT;
+ return !!(available->low_slices & (1u << slice));
+ } else {
+ slice = GET_HIGH_SLICE_INDEX(addr);
+ *boundary_addr = (slice + end) ?
+ ((slice + end) << SLICE_HIGH_SHIFT) : SLICE_LOW_TOP;
+ return !!test_bit(slice, available->high_slices);
+ }
+}
+
+static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
+ unsigned long addr, unsigned long len,
+ const struct slice_mask *available,
+ int psize, unsigned long high_limit)
+{
+ int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
+ unsigned long found, next_end;
+ struct vm_unmapped_area_info info = {
+ .length = len,
+ .align_mask = PAGE_MASK & ((1ul << pshift) - 1),
+ };
+ /*
+ * Check till the allow max value for this mmap request
+ */
+ while (addr < high_limit) {
+ info.low_limit = addr;
+ if (!slice_scan_available(addr, available, 1, &addr))
+ continue;
+
+ next_slice:
+ /*
+ * At this point [info.low_limit; addr) covers
+ * available slices only and ends at a slice boundary.
+ * Check if we need to reduce the range, or if we can
+ * extend it to cover the next available slice.
+ */
+ if (addr >= high_limit)
+ addr = high_limit;
+ else if (slice_scan_available(addr, available, 1, &next_end)) {
+ addr = next_end;
+ goto next_slice;
+ }
+ info.high_limit = addr;
+
+ found = vm_unmapped_area(&info);
+ if (!(found & ~PAGE_MASK))
+ return found;
+ }
+
+ return -ENOMEM;
+}
+
+static unsigned long slice_find_area_topdown(struct mm_struct *mm,
+ unsigned long addr, unsigned long len,
+ const struct slice_mask *available,
+ int psize, unsigned long high_limit)
+{
+ int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
+ unsigned long found, prev;
+ struct vm_unmapped_area_info info = {
+ .flags = VM_UNMAPPED_AREA_TOPDOWN,
+ .length = len,
+ .align_mask = PAGE_MASK & ((1ul << pshift) - 1),
+ };
+ unsigned long min_addr = max(PAGE_SIZE, mmap_min_addr);
+
+ /*
+ * If we are trying to allocate above DEFAULT_MAP_WINDOW
+ * Add the different to the mmap_base.
+ * Only for that request for which high_limit is above
+ * DEFAULT_MAP_WINDOW we should apply this.
+ */
+ if (high_limit > DEFAULT_MAP_WINDOW)
+ addr += mm_ctx_slb_addr_limit(&mm->context) - DEFAULT_MAP_WINDOW;
+
+ while (addr > min_addr) {
+ info.high_limit = addr;
+ if (!slice_scan_available(addr - 1, available, 0, &addr))
+ continue;
+
+ prev_slice:
+ /*
+ * At this point [addr; info.high_limit) covers
+ * available slices only and starts at a slice boundary.
+ * Check if we need to reduce the range, or if we can
+ * extend it to cover the previous available slice.
+ */
+ if (addr < min_addr)
+ addr = min_addr;
+ else if (slice_scan_available(addr - 1, available, 0, &prev)) {
+ addr = prev;
+ goto prev_slice;
+ }
+ info.low_limit = addr;
+
+ found = vm_unmapped_area(&info);
+ if (!(found & ~PAGE_MASK))
+ return found;
+ }
+
+ /*
+ * A failed mmap() very likely causes application failure,
+ * so fall back to the bottom-up function here. This scenario
+ * can happen with large stack limits and large mmap()
+ * allocations.
+ */
+ return slice_find_area_bottomup(mm, TASK_UNMAPPED_BASE, len, available, psize, high_limit);
+}
+
+
+static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len,
+ const struct slice_mask *mask, int psize,
+ int topdown, unsigned long high_limit)
+{
+ if (topdown)
+ return slice_find_area_topdown(mm, mm->mmap_base, len, mask, psize, high_limit);
+ else
+ return slice_find_area_bottomup(mm, mm->mmap_base, len, mask, psize, high_limit);
+}
+
+static inline void slice_copy_mask(struct slice_mask *dst,
+ const struct slice_mask *src)
+{
+ dst->low_slices = src->low_slices;
+ if (!SLICE_NUM_HIGH)
+ return;
+ bitmap_copy(dst->high_slices, src->high_slices, SLICE_NUM_HIGH);
+}
+
+static inline void slice_or_mask(struct slice_mask *dst,
+ const struct slice_mask *src1,
+ const struct slice_mask *src2)
+{
+ dst->low_slices = src1->low_slices | src2->low_slices;
+ if (!SLICE_NUM_HIGH)
+ return;
+ bitmap_or(dst->high_slices, src1->high_slices, src2->high_slices, SLICE_NUM_HIGH);
+}
+
+static inline void slice_andnot_mask(struct slice_mask *dst,
+ const struct slice_mask *src1,
+ const struct slice_mask *src2)
+{
+ dst->low_slices = src1->low_slices & ~src2->low_slices;
+ if (!SLICE_NUM_HIGH)
+ return;
+ bitmap_andnot(dst->high_slices, src1->high_slices, src2->high_slices, SLICE_NUM_HIGH);
+}
+
+#ifdef CONFIG_PPC_64K_PAGES
+#define MMU_PAGE_BASE MMU_PAGE_64K
+#else
+#define MMU_PAGE_BASE MMU_PAGE_4K
+#endif
+
+unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
+ unsigned long flags, unsigned int psize,
+ int topdown)
+{
+ struct slice_mask good_mask;
+ struct slice_mask potential_mask;
+ const struct slice_mask *maskp;
+ const struct slice_mask *compat_maskp = NULL;
+ int fixed = (flags & MAP_FIXED);
+ int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
+ unsigned long page_size = 1UL << pshift;
+ struct mm_struct *mm = current->mm;
+ unsigned long newaddr;
+ unsigned long high_limit;
+
+ high_limit = DEFAULT_MAP_WINDOW;
+ if (addr >= high_limit || (fixed && (addr + len > high_limit)))
+ high_limit = TASK_SIZE;
+
+ if (len > high_limit)
+ return -ENOMEM;
+ if (len & (page_size - 1))
+ return -EINVAL;
+ if (fixed) {
+ if (addr & (page_size - 1))
+ return -EINVAL;
+ if (addr > high_limit - len)
+ return -ENOMEM;
+ }
+
+ if (high_limit > mm_ctx_slb_addr_limit(&mm->context)) {
+ /*
+ * Increasing the slb_addr_limit does not require
+ * slice mask cache to be recalculated because it should
+ * be already initialised beyond the old address limit.
+ */
+ mm_ctx_set_slb_addr_limit(&mm->context, high_limit);
+
+ on_each_cpu(slice_flush_segments, mm, 1);
+ }
+
+ /* Sanity checks */
+ BUG_ON(mm->task_size == 0);
+ BUG_ON(mm_ctx_slb_addr_limit(&mm->context) == 0);
+ VM_BUG_ON(radix_enabled());
+
+ slice_dbg("slice_get_unmapped_area(mm=%p, psize=%d...\n", mm, psize);
+ slice_dbg(" addr=%lx, len=%lx, flags=%lx, topdown=%d\n",
+ addr, len, flags, topdown);
+
+ /* If hint, make sure it matches our alignment restrictions */
+ if (!fixed && addr) {
+ addr = ALIGN(addr, page_size);
+ slice_dbg(" aligned addr=%lx\n", addr);
+ /* Ignore hint if it's too large or overlaps a VMA */
+ if (addr > high_limit - len || addr < mmap_min_addr ||
+ !slice_area_is_free(mm, addr, len))
+ addr = 0;
+ }
+
+ /* First make up a "good" mask of slices that have the right size
+ * already
+ */
+ maskp = slice_mask_for_size(&mm->context, psize);
+
+ /*
+ * Here "good" means slices that are already the right page size,
+ * "compat" means slices that have a compatible page size (i.e.
+ * 4k in a 64k pagesize kernel), and "free" means slices without
+ * any VMAs.
+ *
+ * If MAP_FIXED:
+ * check if fits in good | compat => OK
+ * check if fits in good | compat | free => convert free
+ * else bad
+ * If have hint:
+ * check if hint fits in good => OK
+ * check if hint fits in good | free => convert free
+ * Otherwise:
+ * search in good, found => OK
+ * search in good | free, found => convert free
+ * search in good | compat | free, found => convert free.
+ */
+
+ /*
+ * If we support combo pages, we can allow 64k pages in 4k slices
+ * The mask copies could be avoided in most cases here if we had
+ * a pointer to good mask for the next code to use.
+ */
+ if (IS_ENABLED(CONFIG_PPC_64K_PAGES) && psize == MMU_PAGE_64K) {
+ compat_maskp = slice_mask_for_size(&mm->context, MMU_PAGE_4K);
+ if (fixed)
+ slice_or_mask(&good_mask, maskp, compat_maskp);
+ else
+ slice_copy_mask(&good_mask, maskp);
+ } else {
+ slice_copy_mask(&good_mask, maskp);
+ }
+
+ slice_print_mask(" good_mask", &good_mask);
+ if (compat_maskp)
+ slice_print_mask(" compat_mask", compat_maskp);
+
+ /* First check hint if it's valid or if we have MAP_FIXED */
+ if (addr != 0 || fixed) {
+ /* Check if we fit in the good mask. If we do, we just return,
+ * nothing else to do
+ */
+ if (slice_check_range_fits(mm, &good_mask, addr, len)) {
+ slice_dbg(" fits good !\n");
+ newaddr = addr;
+ goto return_addr;
+ }
+ } else {
+ /* Now let's see if we can find something in the existing
+ * slices for that size
+ */
+ newaddr = slice_find_area(mm, len, &good_mask,
+ psize, topdown, high_limit);
+ if (newaddr != -ENOMEM) {
+ /* Found within the good mask, we don't have to setup,
+ * we thus return directly
+ */
+ slice_dbg(" found area at 0x%lx\n", newaddr);
+ goto return_addr;
+ }
+ }
+ /*
+ * We don't fit in the good mask, check what other slices are
+ * empty and thus can be converted
+ */
+ slice_mask_for_free(mm, &potential_mask, high_limit);
+ slice_or_mask(&potential_mask, &potential_mask, &good_mask);
+ slice_print_mask(" potential", &potential_mask);
+
+ if (addr != 0 || fixed) {
+ if (slice_check_range_fits(mm, &potential_mask, addr, len)) {
+ slice_dbg(" fits potential !\n");
+ newaddr = addr;
+ goto convert;
+ }
+ }
+
+ /* If we have MAP_FIXED and failed the above steps, then error out */
+ if (fixed)
+ return -EBUSY;
+
+ slice_dbg(" search...\n");
+
+ /* If we had a hint that didn't work out, see if we can fit
+ * anywhere in the good area.
+ */
+ if (addr) {
+ newaddr = slice_find_area(mm, len, &good_mask,
+ psize, topdown, high_limit);
+ if (newaddr != -ENOMEM) {
+ slice_dbg(" found area at 0x%lx\n", newaddr);
+ goto return_addr;
+ }
+ }
+
+ /* Now let's see if we can find something in the existing slices
+ * for that size plus free slices
+ */
+ newaddr = slice_find_area(mm, len, &potential_mask,
+ psize, topdown, high_limit);
+
+ if (IS_ENABLED(CONFIG_PPC_64K_PAGES) && newaddr == -ENOMEM &&
+ psize == MMU_PAGE_64K) {
+ /* retry the search with 4k-page slices included */
+ slice_or_mask(&potential_mask, &potential_mask, compat_maskp);
+ newaddr = slice_find_area(mm, len, &potential_mask,
+ psize, topdown, high_limit);
+ }
+
+ if (newaddr == -ENOMEM)
+ return -ENOMEM;
+
+ slice_range_to_mask(newaddr, len, &potential_mask);
+ slice_dbg(" found potential area at 0x%lx\n", newaddr);
+ slice_print_mask(" mask", &potential_mask);
+
+ convert:
+ /*
+ * Try to allocate the context before we do slice convert
+ * so that we handle the context allocation failure gracefully.
+ */
+ if (need_extra_context(mm, newaddr)) {
+ if (alloc_extended_context(mm, newaddr) < 0)
+ return -ENOMEM;
+ }
+
+ slice_andnot_mask(&potential_mask, &potential_mask, &good_mask);
+ if (compat_maskp && !fixed)
+ slice_andnot_mask(&potential_mask, &potential_mask, compat_maskp);
+ if (potential_mask.low_slices ||
+ (SLICE_NUM_HIGH &&
+ !bitmap_empty(potential_mask.high_slices, SLICE_NUM_HIGH))) {
+ slice_convert(mm, &potential_mask, psize);
+ if (psize > MMU_PAGE_BASE)
+ on_each_cpu(slice_flush_segments, mm, 1);
+ }
+ return newaddr;
+
+return_addr:
+ if (need_extra_context(mm, newaddr)) {
+ if (alloc_extended_context(mm, newaddr) < 0)
+ return -ENOMEM;
+ }
+ return newaddr;
+}
+EXPORT_SYMBOL_GPL(slice_get_unmapped_area);
+
+#ifdef CONFIG_HUGETLB_PAGE
+static int file_to_psize(struct file *file)
+{
+ struct hstate *hstate = hstate_file(file);
+
+ return shift_to_mmu_psize(huge_page_shift(hstate));
+}
+#else
+static int file_to_psize(struct file *file)
+{
+ return 0;
+}
+#endif
+
+unsigned long arch_get_unmapped_area(struct file *filp,
+ unsigned long addr,
+ unsigned long len,
+ unsigned long pgoff,
+ unsigned long flags,
+ vm_flags_t vm_flags)
+{
+ unsigned int psize;
+
+ if (radix_enabled())
+ return generic_get_unmapped_area(filp, addr, len, pgoff, flags, vm_flags);
+
+ if (filp && is_file_hugepages(filp))
+ psize = file_to_psize(filp);
+ else
+ psize = mm_ctx_user_psize(&current->mm->context);
+
+ return slice_get_unmapped_area(addr, len, flags, psize, 0);
+}
+
+unsigned long arch_get_unmapped_area_topdown(struct file *filp,
+ const unsigned long addr0,
+ const unsigned long len,
+ const unsigned long pgoff,
+ const unsigned long flags,
+ vm_flags_t vm_flags)
+{
+ unsigned int psize;
+
+ if (radix_enabled())
+ return generic_get_unmapped_area_topdown(filp, addr0, len, pgoff, flags, vm_flags);
+
+ if (filp && is_file_hugepages(filp))
+ psize = file_to_psize(filp);
+ else
+ psize = mm_ctx_user_psize(&current->mm->context);
+
+ return slice_get_unmapped_area(addr0, len, flags, psize, 1);
+}
+
+unsigned int notrace get_slice_psize(struct mm_struct *mm, unsigned long addr)
+{
+ unsigned char *psizes;
+ int index, mask_index;
+
+ VM_BUG_ON(radix_enabled());
+
+ if (slice_addr_is_low(addr)) {
+ psizes = mm_ctx_low_slices(&mm->context);
+ index = GET_LOW_SLICE_INDEX(addr);
+ } else {
+ psizes = mm_ctx_high_slices(&mm->context);
+ index = GET_HIGH_SLICE_INDEX(addr);
+ }
+ mask_index = index & 0x1;
+ return (psizes[index >> 1] >> (mask_index * 4)) & 0xf;
+}
+EXPORT_SYMBOL_GPL(get_slice_psize);
+
+void slice_init_new_context_exec(struct mm_struct *mm)
+{
+ unsigned char *hpsizes, *lpsizes;
+ struct slice_mask *mask;
+ unsigned int psize = mmu_virtual_psize;
+
+ slice_dbg("slice_init_new_context_exec(mm=%p)\n", mm);
+
+ /*
+ * In the case of exec, use the default limit. In the
+ * case of fork it is just inherited from the mm being
+ * duplicated.
+ */
+ mm_ctx_set_slb_addr_limit(&mm->context, SLB_ADDR_LIMIT_DEFAULT);
+ mm_ctx_set_user_psize(&mm->context, psize);
+
+ /*
+ * Set all slice psizes to the default.
+ */
+ lpsizes = mm_ctx_low_slices(&mm->context);
+ memset(lpsizes, (psize << 4) | psize, SLICE_NUM_LOW >> 1);
+
+ hpsizes = mm_ctx_high_slices(&mm->context);
+ memset(hpsizes, (psize << 4) | psize, SLICE_NUM_HIGH >> 1);
+
+ /*
+ * Slice mask cache starts zeroed, fill the default size cache.
+ */
+ mask = slice_mask_for_size(&mm->context, psize);
+ mask->low_slices = ~0UL;
+ if (SLICE_NUM_HIGH)
+ bitmap_fill(mask->high_slices, SLICE_NUM_HIGH);
+}
+
+void slice_setup_new_exec(void)
+{
+ struct mm_struct *mm = current->mm;
+
+ slice_dbg("slice_setup_new_exec(mm=%p)\n", mm);
+
+ if (!is_32bit_task())
+ return;
+
+ mm_ctx_set_slb_addr_limit(&mm->context, DEFAULT_MAP_WINDOW);
+}
+
+void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
+ unsigned long len, unsigned int psize)
+{
+ struct slice_mask mask;
+
+ VM_BUG_ON(radix_enabled());
+
+ slice_range_to_mask(start, len, &mask);
+ slice_convert(mm, &mask, psize);
+}
+
+#ifdef CONFIG_HUGETLB_PAGE
+/*
+ * is_hugepage_only_range() is used by generic code to verify whether
+ * a normal mmap mapping (non hugetlbfs) is valid on a given area.
+ *
+ * until the generic code provides a more generic hook and/or starts
+ * calling arch get_unmapped_area for MAP_FIXED (which our implementation
+ * here knows how to deal with), we hijack it to keep standard mappings
+ * away from us.
+ *
+ * because of that generic code limitation, MAP_FIXED mapping cannot
+ * "convert" back a slice with no VMAs to the standard page size, only
+ * get_unmapped_area() can. It would be possible to fix it here but I
+ * prefer working on fixing the generic code instead.
+ *
+ * WARNING: This will not work if hugetlbfs isn't enabled since the
+ * generic code will redefine that function as 0 in that. This is ok
+ * for now as we only use slices with hugetlbfs enabled. This should
+ * be fixed as the generic code gets fixed.
+ */
+int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
+ unsigned long len)
+{
+ const struct slice_mask *maskp;
+ unsigned int psize = mm_ctx_user_psize(&mm->context);
+
+ VM_BUG_ON(radix_enabled());
+
+ maskp = slice_mask_for_size(&mm->context, psize);
+
+ /* We need to account for 4k slices too */
+ if (IS_ENABLED(CONFIG_PPC_64K_PAGES) && psize == MMU_PAGE_64K) {
+ const struct slice_mask *compat_maskp;
+ struct slice_mask available;
+
+ compat_maskp = slice_mask_for_size(&mm->context, MMU_PAGE_4K);
+ slice_or_mask(&available, maskp, compat_maskp);
+ return !slice_check_range_fits(mm, &available, addr, len);
+ }
+
+ return !slice_check_range_fits(mm, maskp, addr, len);
+}
+
+unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
+{
+ /* With radix we don't use slice, so derive it from vma*/
+ if (radix_enabled())
+ return vma_kernel_pagesize(vma);
+
+ return 1UL << mmu_psize_to_shift(get_slice_psize(vma->vm_mm, vma->vm_start));
+}
+#endif
diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/book3s64/subpage_prot.c
index 6c0b1f5f8d2c..ec98e526167e 100644
--- a/arch/powerpc/mm/subpage-prot.c
+++ b/arch/powerpc/mm/book3s64/subpage_prot.c
@@ -1,22 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright 2007-2008 Paul Mackerras, IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/gfp.h>
#include <linux/types.h>
-#include <linux/mm.h>
+#include <linux/pagewalk.h>
#include <linux/hugetlb.h>
+#include <linux/syscalls.h>
-#include <asm/pgtable.h>
-#include <asm/uaccess.h>
-#include <asm/tlbflush.h>
+#include <linux/pgtable.h>
+#include <linux/uaccess.h>
/*
* Free all pages allocated for subpage protection maps and pointers.
@@ -25,10 +21,13 @@
*/
void subpage_prot_free(struct mm_struct *mm)
{
- struct subpage_prot_table *spt = &mm->context.spt;
+ struct subpage_prot_table *spt = mm_ctx_subpage_prot(&mm->context);
unsigned long i, j, addr;
u32 **p;
+ if (!spt)
+ return;
+
for (i = 0; i < 4; ++i) {
if (spt->low_prot[i]) {
free_page((unsigned long)spt->low_prot[i]);
@@ -36,7 +35,7 @@ void subpage_prot_free(struct mm_struct *mm)
}
}
addr = 0;
- for (i = 0; i < 2; ++i) {
+ for (i = 0; i < (TASK_SIZE_USER64 >> 43); ++i) {
p = spt->protptrs[i];
if (!p)
continue;
@@ -48,34 +47,32 @@ void subpage_prot_free(struct mm_struct *mm)
free_page((unsigned long)p);
}
spt->maxaddr = 0;
-}
-
-void subpage_prot_init_new_context(struct mm_struct *mm)
-{
- struct subpage_prot_table *spt = &mm->context.spt;
-
- memset(spt, 0, sizeof(*spt));
+ kfree(spt);
}
static void hpte_flush_range(struct mm_struct *mm, unsigned long addr,
int npages)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
spinlock_t *ptl;
pgd = pgd_offset(mm, addr);
- if (pgd_none(*pgd))
+ p4d = p4d_offset(pgd, addr);
+ if (p4d_none(*p4d))
return;
- pud = pud_offset(pgd, addr);
+ pud = pud_offset(p4d, addr);
if (pud_none(*pud))
return;
pmd = pmd_offset(pud, addr);
if (pmd_none(*pmd))
return;
pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+ if (!pte)
+ return;
arch_enter_lazy_mmu_mode();
for (; npages > 0; --npages) {
pte_update(mm, addr, pte, 0, 0, 0);
@@ -93,13 +90,18 @@ static void hpte_flush_range(struct mm_struct *mm, unsigned long addr,
static void subpage_prot_clear(unsigned long addr, unsigned long len)
{
struct mm_struct *mm = current->mm;
- struct subpage_prot_table *spt = &mm->context.spt;
+ struct subpage_prot_table *spt;
u32 **spm, *spp;
unsigned long i;
size_t nw;
unsigned long next, limit;
- down_write(&mm->mmap_sem);
+ mmap_write_lock(mm);
+
+ spt = mm_ctx_subpage_prot(&mm->context);
+ if (!spt)
+ goto err_out;
+
limit = addr + len;
if (limit > spt->maxaddr)
limit = spt->maxaddr;
@@ -127,46 +129,38 @@ static void subpage_prot_clear(unsigned long addr, unsigned long len)
/* now flush any existing HPTEs for the range */
hpte_flush_range(mm, addr, nw);
}
- up_write(&mm->mmap_sem);
+
+err_out:
+ mmap_write_unlock(mm);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static int subpage_walk_pmd_entry(pmd_t *pmd, unsigned long addr,
unsigned long end, struct mm_walk *walk)
{
- struct vm_area_struct *vma = walk->private;
- split_huge_page_pmd(vma, addr, pmd);
+ struct vm_area_struct *vma = walk->vma;
+ split_huge_pmd(vma, pmd, addr);
return 0;
}
+static const struct mm_walk_ops subpage_walk_ops = {
+ .pmd_entry = subpage_walk_pmd_entry,
+ .walk_lock = PGWALK_WRLOCK_VERIFY,
+};
+
static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr,
unsigned long len)
{
struct vm_area_struct *vma;
- struct mm_walk subpage_proto_walk = {
- .mm = mm,
- .pmd_entry = subpage_walk_pmd_entry,
- };
+ VMA_ITERATOR(vmi, mm, addr);
/*
* We don't try too hard, we just mark all the vma in that range
* VM_NOHUGEPAGE and split them.
*/
- vma = find_vma(mm, addr);
- /*
- * If the range is in unmapped range, just return
- */
- if (vma && ((addr + len) <= vma->vm_start))
- return;
-
- while (vma) {
- if (vma->vm_start >= (addr + len))
- break;
- vma->vm_flags |= VM_NOHUGEPAGE;
- subpage_proto_walk.private = vma;
- walk_page_range(vma->vm_start, vma->vm_end,
- &subpage_proto_walk);
- vma = vma->vm_next;
+ for_each_vma_range(vmi, vma, addr + len) {
+ vm_flags_set(vma, VM_NOHUGEPAGE);
+ walk_page_vma(vma, &subpage_walk_ops, NULL);
}
}
#else
@@ -187,19 +181,24 @@ static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr,
* in a 2-bit field won't allow writes to a page that is otherwise
* write-protected.
*/
-long sys_subpage_prot(unsigned long addr, unsigned long len, u32 __user *map)
+SYSCALL_DEFINE3(subpage_prot, unsigned long, addr,
+ unsigned long, len, u32 __user *, map)
{
struct mm_struct *mm = current->mm;
- struct subpage_prot_table *spt = &mm->context.spt;
+ struct subpage_prot_table *spt;
u32 **spm, *spp;
unsigned long i;
size_t nw;
unsigned long next, limit;
int err;
+ if (radix_enabled())
+ return -ENOENT;
+
/* Check parameters */
if ((addr & ~PAGE_MASK) || (len & ~PAGE_MASK) ||
- addr >= TASK_SIZE || len >= TASK_SIZE || addr + len > TASK_SIZE)
+ addr >= mm->task_size || len >= mm->task_size ||
+ addr + len > mm->task_size)
return -EINVAL;
if (is_hugepage_only_range(mm, addr, len))
@@ -211,10 +210,25 @@ long sys_subpage_prot(unsigned long addr, unsigned long len, u32 __user *map)
return 0;
}
- if (!access_ok(VERIFY_READ, map, (len >> PAGE_SHIFT) * sizeof(u32)))
+ if (!access_ok(map, (len >> PAGE_SHIFT) * sizeof(u32)))
return -EFAULT;
- down_write(&mm->mmap_sem);
+ mmap_write_lock(mm);
+
+ spt = mm_ctx_subpage_prot(&mm->context);
+ if (!spt) {
+ /*
+ * Allocate subpage prot table if not already done.
+ * Do this with mmap_lock held
+ */
+ spt = kzalloc(sizeof(struct subpage_prot_table), GFP_KERNEL);
+ if (!spt) {
+ err = -ENOMEM;
+ goto out;
+ }
+ mm->context.hash_context->spt = spt;
+ }
+
subpage_mark_vma_nohuge(mm, addr, len);
for (limit = addr + len; addr < limit; addr = next) {
next = pmd_addr_end(addr, limit);
@@ -249,12 +263,11 @@ long sys_subpage_prot(unsigned long addr, unsigned long len, u32 __user *map)
if (addr + (nw << PAGE_SHIFT) > next)
nw = (next - addr) >> PAGE_SHIFT;
- up_write(&mm->mmap_sem);
- err = -EFAULT;
+ mmap_write_unlock(mm);
if (__copy_from_user(spp, map, nw * sizeof(u32)))
- goto out2;
+ return -EFAULT;
map += nw;
- down_write(&mm->mmap_sem);
+ mmap_write_lock(mm);
/* now flush any existing HPTEs for the range */
hpte_flush_range(mm, addr, nw);
@@ -263,7 +276,6 @@ long sys_subpage_prot(unsigned long addr, unsigned long len, u32 __user *map)
spt->maxaddr = limit;
err = 0;
out:
- up_write(&mm->mmap_sem);
- out2:
+ mmap_write_unlock(mm);
return err;
}
diff --git a/arch/powerpc/mm/book3s64/trace.c b/arch/powerpc/mm/book3s64/trace.c
new file mode 100644
index 000000000000..ccd64b5e6cac
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/trace.c
@@ -0,0 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * This file is for defining trace points and trace related helpers.
+ */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#include <trace/events/thp.h>
+#endif
diff --git a/arch/powerpc/mm/cacheflush.c b/arch/powerpc/mm/cacheflush.c
new file mode 100644
index 000000000000..7186516eca52
--- /dev/null
+++ b/arch/powerpc/mm/cacheflush.c
@@ -0,0 +1,221 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/highmem.h>
+#include <linux/kprobes.h>
+
+/**
+ * flush_coherent_icache() - if a CPU has a coherent icache, flush it
+ * Return true if the cache was flushed, false otherwise
+ */
+static inline bool flush_coherent_icache(void)
+{
+ /*
+ * For a snooping icache, we still need a dummy icbi to purge all the
+ * prefetched instructions from the ifetch buffers. We also need a sync
+ * before the icbi to order the actual stores to memory that might
+ * have modified instructions with the icbi.
+ */
+ if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) {
+ mb(); /* sync */
+ icbi((void *)PAGE_OFFSET);
+ mb(); /* sync */
+ isync();
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * invalidate_icache_range() - Flush the icache by issuing icbi across an address range
+ * @start: the start address
+ * @stop: the stop address (exclusive)
+ */
+static void invalidate_icache_range(unsigned long start, unsigned long stop)
+{
+ unsigned long shift = l1_icache_shift();
+ unsigned long bytes = l1_icache_bytes();
+ char *addr = (char *)(start & ~(bytes - 1));
+ unsigned long size = stop - (unsigned long)addr + (bytes - 1);
+ unsigned long i;
+
+ for (i = 0; i < size >> shift; i++, addr += bytes)
+ icbi(addr);
+
+ mb(); /* sync */
+ isync();
+}
+
+/**
+ * flush_icache_range: Write any modified data cache blocks out to memory
+ * and invalidate the corresponding blocks in the instruction cache
+ *
+ * Generic code will call this after writing memory, before executing from it.
+ *
+ * @start: the start address
+ * @stop: the stop address (exclusive)
+ */
+void flush_icache_range(unsigned long start, unsigned long stop)
+{
+ if (flush_coherent_icache())
+ return;
+
+ clean_dcache_range(start, stop);
+
+ if (IS_ENABLED(CONFIG_44x)) {
+ /*
+ * Flash invalidate on 44x because we are passed kmapped
+ * addresses and this doesn't work for userspace pages due to
+ * the virtually tagged icache.
+ */
+ iccci((void *)start);
+ mb(); /* sync */
+ isync();
+ } else
+ invalidate_icache_range(start, stop);
+}
+EXPORT_SYMBOL(flush_icache_range);
+
+#ifdef CONFIG_HIGHMEM
+/**
+ * flush_dcache_icache_phys() - Flush a page by its physical address
+ * @physaddr: the physical address of the page
+ */
+static void flush_dcache_icache_phys(unsigned long physaddr)
+{
+ unsigned long bytes = l1_dcache_bytes();
+ unsigned long nb = PAGE_SIZE / bytes;
+ unsigned long addr = physaddr & PAGE_MASK;
+ unsigned long msr, msr0;
+ unsigned long loop1 = addr, loop2 = addr;
+
+ msr0 = mfmsr();
+ msr = msr0 & ~MSR_DR;
+ /*
+ * This must remain as ASM to prevent potential memory accesses
+ * while the data MMU is disabled
+ */
+ asm volatile(
+ " mtctr %2;\n"
+ " mtmsr %3;\n"
+ " isync;\n"
+ "0: dcbst 0, %0;\n"
+ " addi %0, %0, %4;\n"
+ " bdnz 0b;\n"
+ " sync;\n"
+ " mtctr %2;\n"
+ "1: icbi 0, %1;\n"
+ " addi %1, %1, %4;\n"
+ " bdnz 1b;\n"
+ " sync;\n"
+ " mtmsr %5;\n"
+ " isync;\n"
+ : "+&r" (loop1), "+&r" (loop2)
+ : "r" (nb), "r" (msr), "i" (bytes), "r" (msr0)
+ : "ctr", "memory");
+}
+NOKPROBE_SYMBOL(flush_dcache_icache_phys)
+#else
+static void flush_dcache_icache_phys(unsigned long physaddr)
+{
+}
+#endif
+
+/**
+ * __flush_dcache_icache(): Flush a particular page from the data cache to RAM.
+ * Note: this is necessary because the instruction cache does *not*
+ * snoop from the data cache.
+ *
+ * @p: the address of the page to flush
+ */
+static void __flush_dcache_icache(void *p)
+{
+ unsigned long addr = (unsigned long)p & PAGE_MASK;
+
+ clean_dcache_range(addr, addr + PAGE_SIZE);
+
+ /*
+ * We don't flush the icache on 44x. Those have a virtual icache and we
+ * don't have access to the virtual address here (it's not the page
+ * vaddr but where it's mapped in user space). The flushing of the
+ * icache on these is handled elsewhere, when a change in the address
+ * space occurs, before returning to user space.
+ */
+
+ if (mmu_has_feature(MMU_FTR_TYPE_44x))
+ return;
+
+ invalidate_icache_range(addr, addr + PAGE_SIZE);
+}
+
+void flush_dcache_icache_folio(struct folio *folio)
+{
+ unsigned int i, nr = folio_nr_pages(folio);
+
+ if (flush_coherent_icache())
+ return;
+
+ if (!folio_test_highmem(folio)) {
+ void *addr = folio_address(folio);
+ for (i = 0; i < nr; i++)
+ __flush_dcache_icache(addr + i * PAGE_SIZE);
+ } else if (IS_ENABLED(CONFIG_BOOKE) || sizeof(phys_addr_t) > sizeof(void *)) {
+ for (i = 0; i < nr; i++) {
+ void *start = kmap_local_folio(folio, i * PAGE_SIZE);
+
+ __flush_dcache_icache(start);
+ kunmap_local(start);
+ }
+ } else {
+ unsigned long pfn = folio_pfn(folio);
+ for (i = 0; i < nr; i++)
+ flush_dcache_icache_phys((pfn + i) * PAGE_SIZE);
+ }
+}
+EXPORT_SYMBOL(flush_dcache_icache_folio);
+
+void clear_user_page(void *page, unsigned long vaddr, struct page *pg)
+{
+ clear_page(page);
+
+ /*
+ * We shouldn't have to do this, but some versions of glibc
+ * require it (ld.so assumes zero filled pages are icache clean)
+ * - Anton
+ */
+ flush_dcache_page(pg);
+}
+EXPORT_SYMBOL(clear_user_page);
+
+void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
+ struct page *pg)
+{
+ copy_page(vto, vfrom);
+
+ /*
+ * We should be able to use the following optimisation, however
+ * there are two problems.
+ * Firstly a bug in some versions of binutils meant PLT sections
+ * were not marked executable.
+ * Secondly the first word in the GOT section is blrl, used
+ * to establish the GOT address. Until recently the GOT was
+ * not marked executable.
+ * - Anton
+ */
+#if 0
+ if (!vma->vm_file && ((vma->vm_flags & VM_EXEC) == 0))
+ return;
+#endif
+
+ flush_dcache_page(pg);
+}
+
+void flush_icache_user_page(struct vm_area_struct *vma, struct page *page,
+ unsigned long addr, int len)
+{
+ void *maddr;
+
+ maddr = kmap_local_page(page) + (addr & ~PAGE_MASK);
+ flush_icache_range((unsigned long)maddr, (unsigned long)maddr + len);
+ kunmap_local(maddr);
+}
diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c
new file mode 100644
index 000000000000..f5f8692e2c69
--- /dev/null
+++ b/arch/powerpc/mm/copro_fault.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * CoProcessor (SPU/AFU) mm fault handler
+ *
+ * (C) Copyright IBM Deutschland Entwicklung GmbH 2007
+ *
+ * Author: Arnd Bergmann <arndb@de.ibm.com>
+ * Author: Jeremy Kerr <jk@ozlabs.org>
+ */
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/export.h>
+#include <asm/reg.h>
+#include <asm/copro.h>
+
+/*
+ * This ought to be kept in sync with the powerpc specific do_page_fault
+ * function. Currently, there are a few corner cases that we haven't had
+ * to handle fortunately.
+ */
+int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea,
+ unsigned long dsisr, vm_fault_t *flt)
+{
+ struct vm_area_struct *vma;
+ unsigned long is_write;
+ int ret;
+
+ if (mm == NULL)
+ return -EFAULT;
+
+ if (mm->pgd == NULL)
+ return -EFAULT;
+
+ vma = lock_mm_and_find_vma(mm, ea, NULL);
+ if (!vma)
+ return -EFAULT;
+
+ ret = -EFAULT;
+ is_write = dsisr & DSISR_ISSTORE;
+ if (is_write) {
+ if (!(vma->vm_flags & VM_WRITE))
+ goto out_unlock;
+ } else {
+ if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+ goto out_unlock;
+ /*
+ * PROT_NONE is covered by the VMA check above.
+ * and hash should get a NOHPTE fault instead of
+ * a PROTFAULT in case fixup is needed for things
+ * like autonuma.
+ */
+ if (!radix_enabled())
+ WARN_ON_ONCE(dsisr & DSISR_PROTFAULT);
+ }
+
+ ret = 0;
+ *flt = handle_mm_fault(vma, ea, is_write ? FAULT_FLAG_WRITE : 0, NULL);
+
+ /* The fault is fully completed (including releasing mmap lock) */
+ if (*flt & VM_FAULT_COMPLETED)
+ return 0;
+
+ if (unlikely(*flt & VM_FAULT_ERROR)) {
+ if (*flt & VM_FAULT_OOM) {
+ ret = -ENOMEM;
+ goto out_unlock;
+ } else if (*flt & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV)) {
+ ret = -EFAULT;
+ goto out_unlock;
+ }
+ BUG();
+ }
+
+out_unlock:
+ mmap_read_unlock(mm);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(copro_handle_mm_fault);
+
+#ifdef CONFIG_PPC_64S_HASH_MMU
+int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb)
+{
+ u64 vsid, vsidkey;
+ int psize, ssize;
+
+ switch (get_region_id(ea)) {
+ case USER_REGION_ID:
+ pr_devel("%s: 0x%llx -- USER_REGION_ID\n", __func__, ea);
+ if (mm == NULL)
+ return 1;
+ psize = get_slice_psize(mm, ea);
+ ssize = user_segment_size(ea);
+ vsid = get_user_vsid(&mm->context, ea, ssize);
+ vsidkey = SLB_VSID_USER;
+ break;
+ case VMALLOC_REGION_ID:
+ pr_devel("%s: 0x%llx -- VMALLOC_REGION_ID\n", __func__, ea);
+ psize = mmu_vmalloc_psize;
+ ssize = mmu_kernel_ssize;
+ vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
+ vsidkey = SLB_VSID_KERNEL;
+ break;
+ case IO_REGION_ID:
+ pr_devel("%s: 0x%llx -- IO_REGION_ID\n", __func__, ea);
+ psize = mmu_io_psize;
+ ssize = mmu_kernel_ssize;
+ vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
+ vsidkey = SLB_VSID_KERNEL;
+ break;
+ case LINEAR_MAP_REGION_ID:
+ pr_devel("%s: 0x%llx -- LINEAR_MAP_REGION_ID\n", __func__, ea);
+ psize = mmu_linear_psize;
+ ssize = mmu_kernel_ssize;
+ vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
+ vsidkey = SLB_VSID_KERNEL;
+ break;
+ default:
+ pr_debug("%s: invalid region access at %016llx\n", __func__, ea);
+ return 1;
+ }
+ /* Bad address */
+ if (!vsid)
+ return 1;
+
+ vsid = (vsid << slb_vsid_shift(ssize)) | vsidkey;
+
+ vsid |= mmu_psize_defs[psize].sllp |
+ ((ssize == MMU_SEGSIZE_1T) ? SLB_VSID_B_1T : 0);
+
+ slb->esid = (ea & (ssize == MMU_SEGSIZE_1T ? ESID_MASK_1T : ESID_MASK)) | SLB_ESID_V;
+ slb->vsid = vsid;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(copro_calculate_slb);
+#endif
diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c
index d85e86aac7fb..30260b5d146d 100644
--- a/arch/powerpc/mm/dma-noncoherent.c
+++ b/arch/powerpc/mm/dma-noncoherent.c
@@ -1,320 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* PowerPC version derived from arch/arm/mm/consistent.c
* Copyright (C) 2001 Dan Malek (dmalek@jlc.net)
*
* Copyright (C) 2000 Russell King
- *
- * Consistent memory allocators. Used for DMA devices that want to
- * share uncached memory with the processor core. The function return
- * is the virtual address and 'dma_handle' is the physical address.
- * Mostly stolen from the ARM port, with some changes for PowerPC.
- * -- Dan
- *
- * Reorganized to get rid of the arch-specific consistent_* functions
- * and provide non-coherent implementations for the DMA API. -Matt
- *
- * Added in_interrupt() safe dma_alloc_coherent()/dma_free_coherent()
- * implementation. This is pulled straight from ARM and barely
- * modified. -Matt
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
-#include <linux/sched.h>
-#include <linux/slab.h>
#include <linux/kernel.h>
#include <linux/errno.h>
-#include <linux/string.h>
#include <linux/types.h>
#include <linux/highmem.h>
-#include <linux/dma-mapping.h>
-#include <linux/export.h>
+#include <linux/dma-direct.h>
+#include <linux/dma-map-ops.h>
#include <asm/tlbflush.h>
#include <asm/dma.h>
-#include "mmu_decl.h"
-
-/*
- * This address range defaults to a value that is safe for all
- * platforms which currently set CONFIG_NOT_COHERENT_CACHE. It
- * can be further configured for specific applications under
- * the "Advanced Setup" menu. -Matt
- */
-#define CONSISTENT_BASE (IOREMAP_TOP)
-#define CONSISTENT_END (CONSISTENT_BASE + CONFIG_CONSISTENT_SIZE)
-#define CONSISTENT_OFFSET(x) (((unsigned long)(x) - CONSISTENT_BASE) >> PAGE_SHIFT)
-
-/*
- * This is the page table (2MB) covering uncached, DMA consistent allocations
- */
-static DEFINE_SPINLOCK(consistent_lock);
-
-/*
- * VM region handling support.
- *
- * This should become something generic, handling VM region allocations for
- * vmalloc and similar (ioremap, module space, etc).
- *
- * I envisage vmalloc()'s supporting vm_struct becoming:
- *
- * struct vm_struct {
- * struct vm_region region;
- * unsigned long flags;
- * struct page **pages;
- * unsigned int nr_pages;
- * unsigned long phys_addr;
- * };
- *
- * get_vm_area() would then call vm_region_alloc with an appropriate
- * struct vm_region head (eg):
- *
- * struct vm_region vmalloc_head = {
- * .vm_list = LIST_HEAD_INIT(vmalloc_head.vm_list),
- * .vm_start = VMALLOC_START,
- * .vm_end = VMALLOC_END,
- * };
- *
- * However, vmalloc_head.vm_start is variable (typically, it is dependent on
- * the amount of RAM found at boot time.) I would imagine that get_vm_area()
- * would have to initialise this each time prior to calling vm_region_alloc().
- */
-struct ppc_vm_region {
- struct list_head vm_list;
- unsigned long vm_start;
- unsigned long vm_end;
-};
-
-static struct ppc_vm_region consistent_head = {
- .vm_list = LIST_HEAD_INIT(consistent_head.vm_list),
- .vm_start = CONSISTENT_BASE,
- .vm_end = CONSISTENT_END,
-};
-
-static struct ppc_vm_region *
-ppc_vm_region_alloc(struct ppc_vm_region *head, size_t size, gfp_t gfp)
-{
- unsigned long addr = head->vm_start, end = head->vm_end - size;
- unsigned long flags;
- struct ppc_vm_region *c, *new;
-
- new = kmalloc(sizeof(struct ppc_vm_region), gfp);
- if (!new)
- goto out;
-
- spin_lock_irqsave(&consistent_lock, flags);
-
- list_for_each_entry(c, &head->vm_list, vm_list) {
- if ((addr + size) < addr)
- goto nospc;
- if ((addr + size) <= c->vm_start)
- goto found;
- addr = c->vm_end;
- if (addr > end)
- goto nospc;
- }
-
- found:
- /*
- * Insert this entry _before_ the one we found.
- */
- list_add_tail(&new->vm_list, &c->vm_list);
- new->vm_start = addr;
- new->vm_end = addr + size;
-
- spin_unlock_irqrestore(&consistent_lock, flags);
- return new;
-
- nospc:
- spin_unlock_irqrestore(&consistent_lock, flags);
- kfree(new);
- out:
- return NULL;
-}
-
-static struct ppc_vm_region *ppc_vm_region_find(struct ppc_vm_region *head, unsigned long addr)
-{
- struct ppc_vm_region *c;
-
- list_for_each_entry(c, &head->vm_list, vm_list) {
- if (c->vm_start == addr)
- goto out;
- }
- c = NULL;
- out:
- return c;
-}
-
-/*
- * Allocate DMA-coherent memory space and return both the kernel remapped
- * virtual and bus address for that space.
- */
-void *
-__dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp)
-{
- struct page *page;
- struct ppc_vm_region *c;
- unsigned long order;
- u64 mask = ISA_DMA_THRESHOLD, limit;
-
- if (dev) {
- mask = dev->coherent_dma_mask;
-
- /*
- * Sanity check the DMA mask - it must be non-zero, and
- * must be able to be satisfied by a DMA allocation.
- */
- if (mask == 0) {
- dev_warn(dev, "coherent DMA mask is unset\n");
- goto no_page;
- }
-
- if ((~mask) & ISA_DMA_THRESHOLD) {
- dev_warn(dev, "coherent DMA mask %#llx is smaller "
- "than system GFP_DMA mask %#llx\n",
- mask, (unsigned long long)ISA_DMA_THRESHOLD);
- goto no_page;
- }
- }
-
-
- size = PAGE_ALIGN(size);
- limit = (mask + 1) & ~mask;
- if ((limit && size >= limit) ||
- size >= (CONSISTENT_END - CONSISTENT_BASE)) {
- printk(KERN_WARNING "coherent allocation too big (requested %#x mask %#Lx)\n",
- size, mask);
- return NULL;
- }
-
- order = get_order(size);
-
- /* Might be useful if we ever have a real legacy DMA zone... */
- if (mask != 0xffffffff)
- gfp |= GFP_DMA;
-
- page = alloc_pages(gfp, order);
- if (!page)
- goto no_page;
-
- /*
- * Invalidate any data that might be lurking in the
- * kernel direct-mapped region for device DMA.
- */
- {
- unsigned long kaddr = (unsigned long)page_address(page);
- memset(page_address(page), 0, size);
- flush_dcache_range(kaddr, kaddr + size);
- }
-
- /*
- * Allocate a virtual address in the consistent mapping region.
- */
- c = ppc_vm_region_alloc(&consistent_head, size,
- gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
- if (c) {
- unsigned long vaddr = c->vm_start;
- struct page *end = page + (1 << order);
-
- split_page(page, order);
-
- /*
- * Set the "dma handle"
- */
- *handle = page_to_phys(page);
-
- do {
- SetPageReserved(page);
- map_page(vaddr, page_to_phys(page),
- pgprot_noncached(PAGE_KERNEL));
- page++;
- vaddr += PAGE_SIZE;
- } while (size -= PAGE_SIZE);
-
- /*
- * Free the otherwise unused pages.
- */
- while (page < end) {
- __free_page(page);
- page++;
- }
-
- return (void *)c->vm_start;
- }
-
- if (page)
- __free_pages(page, order);
- no_page:
- return NULL;
-}
-EXPORT_SYMBOL(__dma_alloc_coherent);
-
-/*
- * free a page as defined by the above mapping.
- */
-void __dma_free_coherent(size_t size, void *vaddr)
-{
- struct ppc_vm_region *c;
- unsigned long flags, addr;
-
- size = PAGE_ALIGN(size);
-
- spin_lock_irqsave(&consistent_lock, flags);
-
- c = ppc_vm_region_find(&consistent_head, (unsigned long)vaddr);
- if (!c)
- goto no_area;
-
- if ((c->vm_end - c->vm_start) != size) {
- printk(KERN_ERR "%s: freeing wrong coherent size (%ld != %d)\n",
- __func__, c->vm_end - c->vm_start, size);
- dump_stack();
- size = c->vm_end - c->vm_start;
- }
-
- addr = c->vm_start;
- do {
- pte_t *ptep;
- unsigned long pfn;
-
- ptep = pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(addr),
- addr),
- addr),
- addr);
- if (!pte_none(*ptep) && pte_present(*ptep)) {
- pfn = pte_pfn(*ptep);
- pte_clear(&init_mm, addr, ptep);
- if (pfn_valid(pfn)) {
- struct page *page = pfn_to_page(pfn);
- __free_reserved_page(page);
- }
- }
- addr += PAGE_SIZE;
- } while (size -= PAGE_SIZE);
-
- flush_tlb_kernel_range(c->vm_start, c->vm_end);
-
- list_del(&c->vm_list);
-
- spin_unlock_irqrestore(&consistent_lock, flags);
-
- kfree(c);
- return;
-
- no_area:
- spin_unlock_irqrestore(&consistent_lock, flags);
- printk(KERN_ERR "%s: trying to free invalid coherent area: %p\n",
- __func__, vaddr);
- dump_stack();
-}
-EXPORT_SYMBOL(__dma_free_coherent);
-
/*
* make an area consistent.
*/
-void __dma_sync(void *vaddr, size_t size, int direction)
+static void __dma_sync(void *vaddr, size_t size, int direction)
{
unsigned long start = (unsigned long)vaddr;
unsigned long end = start + size;
@@ -327,7 +32,7 @@ void __dma_sync(void *vaddr, size_t size, int direction)
* invalidate only when cache-line aligned otherwise there is
* the potential for discarding uncommitted data from the cache
*/
- if ((start & (L1_CACHE_BYTES - 1)) || (size & (L1_CACHE_BYTES - 1)))
+ if ((start | end) & (L1_CACHE_BYTES - 1))
flush_dcache_range(start, end);
else
invalidate_dcache_range(start, end);
@@ -340,7 +45,6 @@ void __dma_sync(void *vaddr, size_t size, int direction)
break;
}
}
-EXPORT_SYMBOL(__dma_sync);
#ifdef CONFIG_HIGHMEM
/*
@@ -387,34 +91,34 @@ static inline void __dma_sync_page_highmem(struct page *page,
* __dma_sync_page makes memory consistent. identical to __dma_sync, but
* takes a struct page instead of a virtual address
*/
-void __dma_sync_page(struct page *page, unsigned long offset,
- size_t size, int direction)
+static void __dma_sync_page(phys_addr_t paddr, size_t size, int dir)
{
+ struct page *page = pfn_to_page(paddr >> PAGE_SHIFT);
+ unsigned offset = paddr & ~PAGE_MASK;
+
#ifdef CONFIG_HIGHMEM
- __dma_sync_page_highmem(page, offset, size, direction);
+ __dma_sync_page_highmem(page, offset, size, dir);
#else
unsigned long start = (unsigned long)page_address(page) + offset;
- __dma_sync((void *)start, size, direction);
+ __dma_sync((void *)start, size, dir);
#endif
}
-EXPORT_SYMBOL(__dma_sync_page);
-/*
- * Return the PFN for a given cpu virtual address returned by
- * __dma_alloc_coherent. This is used by dma_mmap_coherent()
- */
-unsigned long __dma_get_coherent_pfn(unsigned long cpu_addr)
+void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
+ enum dma_data_direction dir)
+{
+ __dma_sync_page(paddr, size, dir);
+}
+
+void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size,
+ enum dma_data_direction dir)
+{
+ __dma_sync_page(paddr, size, dir);
+}
+
+void arch_dma_prep_coherent(struct page *page, size_t size)
{
- /* This should always be populated, so we don't test every
- * level. If that fails, we'll have a nice crash which
- * will be as good as a BUG_ON()
- */
- pgd_t *pgd = pgd_offset_k(cpu_addr);
- pud_t *pud = pud_offset(pgd, cpu_addr);
- pmd_t *pmd = pmd_offset(pud, cpu_addr);
- pte_t *ptep = pte_offset_kernel(pmd, cpu_addr);
+ unsigned long kaddr = (unsigned long)page_address(page);
- if (pte_none(*ptep) || !pte_present(*ptep))
- return 0;
- return pte_pfn(*ptep);
+ flush_dcache_range(kaddr, kaddr + size);
}
diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
new file mode 100644
index 000000000000..8dd7b340d51f
--- /dev/null
+++ b/arch/powerpc/mm/drmem.c
@@ -0,0 +1,514 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Dynamic reconfiguration memory support
+ *
+ * Copyright 2017 IBM Corporation
+ */
+
+#define pr_fmt(fmt) "drmem: " fmt
+
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/memblock.h>
+#include <linux/slab.h>
+#include <asm/drmem.h>
+
+static int n_root_addr_cells, n_root_size_cells;
+
+static struct drmem_lmb_info __drmem_info;
+struct drmem_lmb_info *drmem_info = &__drmem_info;
+static bool in_drmem_update;
+
+u64 drmem_lmb_memory_max(void)
+{
+ struct drmem_lmb *last_lmb;
+
+ last_lmb = &drmem_info->lmbs[drmem_info->n_lmbs - 1];
+ return last_lmb->base_addr + drmem_lmb_size();
+}
+
+static u32 drmem_lmb_flags(struct drmem_lmb *lmb)
+{
+ /*
+ * Return the value of the lmb flags field minus the reserved
+ * bit used internally for hotplug processing.
+ */
+ return lmb->flags & ~DRMEM_LMB_RESERVED;
+}
+
+static struct property *clone_property(struct property *prop, u32 prop_sz)
+{
+ struct property *new_prop;
+
+ new_prop = kzalloc(sizeof(*new_prop), GFP_KERNEL);
+ if (!new_prop)
+ return NULL;
+
+ new_prop->name = kstrdup(prop->name, GFP_KERNEL);
+ new_prop->value = kzalloc(prop_sz, GFP_KERNEL);
+ if (!new_prop->name || !new_prop->value) {
+ kfree(new_prop->name);
+ kfree(new_prop->value);
+ kfree(new_prop);
+ return NULL;
+ }
+
+ new_prop->length = prop_sz;
+#if defined(CONFIG_OF_DYNAMIC)
+ of_property_set_flag(new_prop, OF_DYNAMIC);
+#endif
+ return new_prop;
+}
+
+static int drmem_update_dt_v1(struct device_node *memory,
+ struct property *prop)
+{
+ struct property *new_prop;
+ struct of_drconf_cell_v1 *dr_cell;
+ struct drmem_lmb *lmb;
+ __be32 *p;
+
+ new_prop = clone_property(prop, prop->length);
+ if (!new_prop)
+ return -1;
+
+ p = new_prop->value;
+ *p++ = cpu_to_be32(drmem_info->n_lmbs);
+
+ dr_cell = (struct of_drconf_cell_v1 *)p;
+
+ for_each_drmem_lmb(lmb) {
+ dr_cell->base_addr = cpu_to_be64(lmb->base_addr);
+ dr_cell->drc_index = cpu_to_be32(lmb->drc_index);
+ dr_cell->aa_index = cpu_to_be32(lmb->aa_index);
+ dr_cell->flags = cpu_to_be32(drmem_lmb_flags(lmb));
+
+ dr_cell++;
+ }
+
+ of_update_property(memory, new_prop);
+ return 0;
+}
+
+static void init_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell,
+ struct drmem_lmb *lmb)
+{
+ dr_cell->base_addr = cpu_to_be64(lmb->base_addr);
+ dr_cell->drc_index = cpu_to_be32(lmb->drc_index);
+ dr_cell->aa_index = cpu_to_be32(lmb->aa_index);
+ dr_cell->flags = cpu_to_be32(drmem_lmb_flags(lmb));
+}
+
+static int drmem_update_dt_v2(struct device_node *memory,
+ struct property *prop)
+{
+ struct property *new_prop;
+ struct of_drconf_cell_v2 *dr_cell;
+ struct drmem_lmb *lmb, *prev_lmb;
+ u32 lmb_sets, prop_sz, seq_lmbs;
+ u32 *p;
+
+ /* First pass, determine how many LMB sets are needed. */
+ lmb_sets = 0;
+ prev_lmb = NULL;
+ for_each_drmem_lmb(lmb) {
+ if (!prev_lmb) {
+ prev_lmb = lmb;
+ lmb_sets++;
+ continue;
+ }
+
+ if (prev_lmb->aa_index != lmb->aa_index ||
+ drmem_lmb_flags(prev_lmb) != drmem_lmb_flags(lmb))
+ lmb_sets++;
+
+ prev_lmb = lmb;
+ }
+
+ prop_sz = lmb_sets * sizeof(*dr_cell) + sizeof(__be32);
+ new_prop = clone_property(prop, prop_sz);
+ if (!new_prop)
+ return -1;
+
+ p = new_prop->value;
+ *p++ = cpu_to_be32(lmb_sets);
+
+ dr_cell = (struct of_drconf_cell_v2 *)p;
+
+ /* Second pass, populate the LMB set data */
+ prev_lmb = NULL;
+ seq_lmbs = 0;
+ for_each_drmem_lmb(lmb) {
+ if (prev_lmb == NULL) {
+ /* Start of first LMB set */
+ prev_lmb = lmb;
+ init_drconf_v2_cell(dr_cell, lmb);
+ seq_lmbs++;
+ continue;
+ }
+
+ if (prev_lmb->aa_index != lmb->aa_index ||
+ drmem_lmb_flags(prev_lmb) != drmem_lmb_flags(lmb)) {
+ /* end of one set, start of another */
+ dr_cell->seq_lmbs = cpu_to_be32(seq_lmbs);
+ dr_cell++;
+
+ init_drconf_v2_cell(dr_cell, lmb);
+ seq_lmbs = 1;
+ } else {
+ seq_lmbs++;
+ }
+
+ prev_lmb = lmb;
+ }
+
+ /* close out last LMB set */
+ dr_cell->seq_lmbs = cpu_to_be32(seq_lmbs);
+ of_update_property(memory, new_prop);
+ return 0;
+}
+
+int drmem_update_dt(void)
+{
+ struct device_node *memory;
+ struct property *prop;
+ int rc = -1;
+
+ memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+ if (!memory)
+ return -1;
+
+ /*
+ * Set in_drmem_update to prevent the notifier callback to process the
+ * DT property back since the change is coming from the LMB tree.
+ */
+ in_drmem_update = true;
+ prop = of_find_property(memory, "ibm,dynamic-memory", NULL);
+ if (prop) {
+ rc = drmem_update_dt_v1(memory, prop);
+ } else {
+ prop = of_find_property(memory, "ibm,dynamic-memory-v2", NULL);
+ if (prop)
+ rc = drmem_update_dt_v2(memory, prop);
+ }
+ in_drmem_update = false;
+
+ of_node_put(memory);
+ return rc;
+}
+
+static void read_drconf_v1_cell(struct drmem_lmb *lmb,
+ const __be32 **prop)
+{
+ const __be32 *p = *prop;
+
+ lmb->base_addr = of_read_number(p, n_root_addr_cells);
+ p += n_root_addr_cells;
+ lmb->drc_index = of_read_number(p++, 1);
+
+ p++; /* skip reserved field */
+
+ lmb->aa_index = of_read_number(p++, 1);
+ lmb->flags = of_read_number(p++, 1);
+
+ *prop = p;
+}
+
+static int
+__walk_drmem_v1_lmbs(const __be32 *prop, const __be32 *usm, void *data,
+ int (*func)(struct drmem_lmb *, const __be32 **, void *))
+{
+ struct drmem_lmb lmb;
+ u32 i, n_lmbs;
+ int ret = 0;
+
+ n_lmbs = of_read_number(prop++, 1);
+ for (i = 0; i < n_lmbs; i++) {
+ read_drconf_v1_cell(&lmb, &prop);
+ ret = func(&lmb, &usm, data);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+static void read_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell,
+ const __be32 **prop)
+{
+ const __be32 *p = *prop;
+
+ dr_cell->seq_lmbs = of_read_number(p++, 1);
+ dr_cell->base_addr = of_read_number(p, n_root_addr_cells);
+ p += n_root_addr_cells;
+ dr_cell->drc_index = of_read_number(p++, 1);
+ dr_cell->aa_index = of_read_number(p++, 1);
+ dr_cell->flags = of_read_number(p++, 1);
+
+ *prop = p;
+}
+
+static int
+__walk_drmem_v2_lmbs(const __be32 *prop, const __be32 *usm, void *data,
+ int (*func)(struct drmem_lmb *, const __be32 **, void *))
+{
+ struct of_drconf_cell_v2 dr_cell;
+ struct drmem_lmb lmb;
+ u32 i, j, lmb_sets;
+ int ret = 0;
+
+ lmb_sets = of_read_number(prop++, 1);
+ for (i = 0; i < lmb_sets; i++) {
+ read_drconf_v2_cell(&dr_cell, &prop);
+
+ for (j = 0; j < dr_cell.seq_lmbs; j++) {
+ lmb.base_addr = dr_cell.base_addr;
+ dr_cell.base_addr += drmem_lmb_size();
+
+ lmb.drc_index = dr_cell.drc_index;
+ dr_cell.drc_index++;
+
+ lmb.aa_index = dr_cell.aa_index;
+ lmb.flags = dr_cell.flags;
+
+ ret = func(&lmb, &usm, data);
+ if (ret)
+ break;
+ }
+ }
+
+ return ret;
+}
+
+#ifdef CONFIG_PPC_PSERIES
+int __init walk_drmem_lmbs_early(unsigned long node, void *data,
+ int (*func)(struct drmem_lmb *, const __be32 **, void *))
+{
+ const __be32 *prop, *usm;
+ int len, ret = -ENODEV;
+
+ prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &len);
+ if (!prop || len < dt_root_size_cells * sizeof(__be32))
+ return ret;
+
+ /* Get the address & size cells */
+ n_root_addr_cells = dt_root_addr_cells;
+ n_root_size_cells = dt_root_size_cells;
+
+ drmem_info->lmb_size = dt_mem_next_cell(dt_root_size_cells, &prop);
+
+ usm = of_get_flat_dt_prop(node, "linux,drconf-usable-memory", &len);
+
+ prop = of_get_flat_dt_prop(node, "ibm,dynamic-memory", &len);
+ if (prop) {
+ ret = __walk_drmem_v1_lmbs(prop, usm, data, func);
+ } else {
+ prop = of_get_flat_dt_prop(node, "ibm,dynamic-memory-v2",
+ &len);
+ if (prop)
+ ret = __walk_drmem_v2_lmbs(prop, usm, data, func);
+ }
+
+ memblock_dump_all();
+ return ret;
+}
+
+/*
+ * Update the LMB associativity index.
+ */
+static int update_lmb(struct drmem_lmb *updated_lmb,
+ __maybe_unused const __be32 **usm,
+ __maybe_unused void *data)
+{
+ struct drmem_lmb *lmb;
+
+ for_each_drmem_lmb(lmb) {
+ if (lmb->drc_index != updated_lmb->drc_index)
+ continue;
+
+ lmb->aa_index = updated_lmb->aa_index;
+ break;
+ }
+ return 0;
+}
+
+/*
+ * Update the LMB associativity index.
+ *
+ * This needs to be called when the hypervisor is updating the
+ * dynamic-reconfiguration-memory node property.
+ */
+void drmem_update_lmbs(struct property *prop)
+{
+ /*
+ * Don't update the LMBs if triggered by the update done in
+ * drmem_update_dt(), the LMB values have been used to the update the DT
+ * property in that case.
+ */
+ if (in_drmem_update)
+ return;
+ if (!strcmp(prop->name, "ibm,dynamic-memory"))
+ __walk_drmem_v1_lmbs(prop->value, NULL, NULL, update_lmb);
+ else if (!strcmp(prop->name, "ibm,dynamic-memory-v2"))
+ __walk_drmem_v2_lmbs(prop->value, NULL, NULL, update_lmb);
+}
+#endif
+
+static int init_drmem_lmb_size(struct device_node *dn)
+{
+ const __be32 *prop;
+ int len;
+
+ if (drmem_info->lmb_size)
+ return 0;
+
+ prop = of_get_property(dn, "ibm,lmb-size", &len);
+ if (!prop || len < n_root_size_cells * sizeof(__be32)) {
+ pr_info("Could not determine LMB size\n");
+ return -1;
+ }
+
+ drmem_info->lmb_size = of_read_number(prop, n_root_size_cells);
+ return 0;
+}
+
+/*
+ * Returns the property linux,drconf-usable-memory if
+ * it exists (the property exists only in kexec/kdump kernels,
+ * added by kexec-tools)
+ */
+static const __be32 *of_get_usable_memory(struct device_node *dn)
+{
+ const __be32 *prop;
+ u32 len;
+
+ prop = of_get_property(dn, "linux,drconf-usable-memory", &len);
+ if (!prop || len < sizeof(unsigned int))
+ return NULL;
+
+ return prop;
+}
+
+int walk_drmem_lmbs(struct device_node *dn, void *data,
+ int (*func)(struct drmem_lmb *, const __be32 **, void *))
+{
+ struct device_node *root = of_find_node_by_path("/");
+ const __be32 *prop, *usm;
+ int ret = -ENODEV;
+
+ if (!root)
+ return ret;
+
+ /* Get the address & size cells */
+ n_root_addr_cells = of_n_addr_cells(root);
+ n_root_size_cells = of_n_size_cells(root);
+ of_node_put(root);
+
+ if (init_drmem_lmb_size(dn))
+ return ret;
+
+ usm = of_get_usable_memory(dn);
+
+ prop = of_get_property(dn, "ibm,dynamic-memory", NULL);
+ if (prop) {
+ ret = __walk_drmem_v1_lmbs(prop, usm, data, func);
+ } else {
+ prop = of_get_property(dn, "ibm,dynamic-memory-v2", NULL);
+ if (prop)
+ ret = __walk_drmem_v2_lmbs(prop, usm, data, func);
+ }
+
+ return ret;
+}
+
+static void __init init_drmem_v1_lmbs(const __be32 *prop)
+{
+ struct drmem_lmb *lmb;
+
+ drmem_info->n_lmbs = of_read_number(prop++, 1);
+ if (drmem_info->n_lmbs == 0)
+ return;
+
+ drmem_info->lmbs = kcalloc(drmem_info->n_lmbs, sizeof(*lmb),
+ GFP_KERNEL);
+ if (!drmem_info->lmbs)
+ return;
+
+ for_each_drmem_lmb(lmb)
+ read_drconf_v1_cell(lmb, &prop);
+}
+
+static void __init init_drmem_v2_lmbs(const __be32 *prop)
+{
+ struct drmem_lmb *lmb;
+ struct of_drconf_cell_v2 dr_cell;
+ const __be32 *p;
+ u32 i, j, lmb_sets;
+ int lmb_index;
+
+ lmb_sets = of_read_number(prop++, 1);
+ if (lmb_sets == 0)
+ return;
+
+ /* first pass, calculate the number of LMBs */
+ p = prop;
+ for (i = 0; i < lmb_sets; i++) {
+ read_drconf_v2_cell(&dr_cell, &p);
+ drmem_info->n_lmbs += dr_cell.seq_lmbs;
+ }
+
+ drmem_info->lmbs = kcalloc(drmem_info->n_lmbs, sizeof(*lmb),
+ GFP_KERNEL);
+ if (!drmem_info->lmbs)
+ return;
+
+ /* second pass, read in the LMB information */
+ lmb_index = 0;
+ p = prop;
+
+ for (i = 0; i < lmb_sets; i++) {
+ read_drconf_v2_cell(&dr_cell, &p);
+
+ for (j = 0; j < dr_cell.seq_lmbs; j++) {
+ lmb = &drmem_info->lmbs[lmb_index++];
+
+ lmb->base_addr = dr_cell.base_addr;
+ dr_cell.base_addr += drmem_info->lmb_size;
+
+ lmb->drc_index = dr_cell.drc_index;
+ dr_cell.drc_index++;
+
+ lmb->aa_index = dr_cell.aa_index;
+ lmb->flags = dr_cell.flags;
+ }
+ }
+}
+
+static int __init drmem_init(void)
+{
+ struct device_node *dn;
+ const __be32 *prop;
+
+ dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+ if (!dn)
+ return 0;
+
+ if (init_drmem_lmb_size(dn)) {
+ of_node_put(dn);
+ return 0;
+ }
+
+ prop = of_get_property(dn, "ibm,dynamic-memory", NULL);
+ if (prop) {
+ init_drmem_v1_lmbs(prop);
+ } else {
+ prop = of_get_property(dn, "ibm,dynamic-memory-v2", NULL);
+ if (prop)
+ init_drmem_v2_lmbs(prop);
+ }
+
+ of_node_put(dn);
+ return 0;
+}
+late_initcall(drmem_init);
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 51ab9e7e6c39..806c74e0d5ab 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* PowerPC version
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
@@ -8,538 +9,689 @@
* Modified by Cort Dougan and Paul Mackerras.
*
* Modified for PPC64 by Dave Engebretsen (engebret@ibm.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/signal.h>
#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/string.h>
+#include <linux/string_choices.h>
#include <linux/types.h>
+#include <linux/pagemap.h>
#include <linux/ptrace.h>
#include <linux/mman.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/highmem.h>
-#include <linux/module.h>
+#include <linux/extable.h>
#include <linux/kprobes.h>
#include <linux/kdebug.h>
#include <linux/perf_event.h>
-#include <linux/magic.h>
#include <linux/ratelimit.h>
#include <linux/context_tracking.h>
+#include <linux/hugetlb.h>
+#include <linux/uaccess.h>
+#include <linux/kfence.h>
+#include <linux/pkeys.h>
#include <asm/firmware.h>
+#include <asm/interrupt.h>
#include <asm/page.h>
-#include <asm/pgtable.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
-#include <asm/uaccess.h>
-#include <asm/tlbflush.h>
#include <asm/siginfo.h>
#include <asm/debug.h>
-#include <mm/mmu_decl.h>
+#include <asm/kup.h>
+#include <asm/inst.h>
+
-#include "icswx.h"
+/*
+ * do_page_fault error handling helpers
+ */
-#ifdef CONFIG_KPROBES
-static inline int notify_page_fault(struct pt_regs *regs)
+static int
+__bad_area_nosemaphore(struct pt_regs *regs, unsigned long address, int si_code)
{
- int ret = 0;
+ /*
+ * If we are in kernel mode, bail out with a SEGV, this will
+ * be caught by the assembly which will restore the non-volatile
+ * registers before calling bad_page_fault()
+ */
+ if (!user_mode(regs))
+ return SIGSEGV;
- /* kprobe_running() needs smp_processor_id() */
- if (!user_mode(regs)) {
- preempt_disable();
- if (kprobe_running() && kprobe_fault_handler(regs, 11))
- ret = 1;
- preempt_enable();
- }
+ _exception(SIGSEGV, regs, si_code, address);
- return ret;
+ return 0;
}
-#else
-static inline int notify_page_fault(struct pt_regs *regs)
+
+static noinline int bad_area_nosemaphore(struct pt_regs *regs, unsigned long address)
{
- return 0;
+ return __bad_area_nosemaphore(regs, address, SEGV_MAPERR);
}
-#endif
-/*
- * Check whether the instruction at regs->nip is a store using
- * an update addressing form which will update r1.
- */
-static int store_updates_sp(struct pt_regs *regs)
+static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code,
+ struct mm_struct *mm, struct vm_area_struct *vma)
{
- unsigned int inst;
- if (get_user(inst, (unsigned int __user *)regs->nip))
- return 0;
- /* check for 1 in the rA field */
- if (((inst >> 16) & 0x1f) != 1)
- return 0;
- /* check major opcode */
- switch (inst >> 26) {
- case 37: /* stwu */
- case 39: /* stbu */
- case 45: /* sthu */
- case 53: /* stfsu */
- case 55: /* stfdu */
- return 1;
- case 62: /* std or stdu */
- return (inst & 3) == 1;
- case 31:
- /* check minor opcode */
- switch ((inst >> 1) & 0x3ff) {
- case 181: /* stdux */
- case 183: /* stwux */
- case 247: /* stbux */
- case 439: /* sthux */
- case 695: /* stfsux */
- case 759: /* stfdux */
- return 1;
- }
- }
+ /*
+ * Something tried to access memory that isn't in our memory map..
+ * Fix it, but check if it's kernel or user first..
+ */
+ if (mm)
+ mmap_read_unlock(mm);
+ else
+ vma_end_read(vma);
+
+ return __bad_area_nosemaphore(regs, address, si_code);
+}
+
+static noinline int bad_access_pkey(struct pt_regs *regs, unsigned long address,
+ struct mm_struct *mm,
+ struct vm_area_struct *vma)
+{
+ int pkey;
+
+ /*
+ * We don't try to fetch the pkey from page table because reading
+ * page table without locking doesn't guarantee stable pte value.
+ * Hence the pkey value that we return to userspace can be different
+ * from the pkey that actually caused access error.
+ *
+ * It does *not* guarantee that the VMA we find here
+ * was the one that we faulted on.
+ *
+ * 1. T1 : mprotect_key(foo, PAGE_SIZE, pkey=4);
+ * 2. T1 : set AMR to deny access to pkey=4, touches, page
+ * 3. T1 : faults...
+ * 4. T2: mprotect_key(foo, PAGE_SIZE, pkey=5);
+ * 5. T1 : enters fault handler, takes mmap_lock, etc...
+ * 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really
+ * faulted on a pte with its pkey=4.
+ */
+ pkey = vma_pkey(vma);
+
+ if (mm)
+ mmap_read_unlock(mm);
+ else
+ vma_end_read(vma);
+
+ /*
+ * If we are in kernel mode, bail out with a SEGV, this will
+ * be caught by the assembly which will restore the non-volatile
+ * registers before calling bad_page_fault()
+ */
+ if (!user_mode(regs))
+ return SIGSEGV;
+
+ _exception_pkey(regs, address, pkey);
+
return 0;
}
-/*
- * do_page_fault error handling helpers
- */
-#define MM_FAULT_RETURN 0
-#define MM_FAULT_CONTINUE -1
-#define MM_FAULT_ERR(sig) (sig)
+static noinline int bad_access(struct pt_regs *regs, unsigned long address,
+ struct mm_struct *mm, struct vm_area_struct *vma)
+{
+ return __bad_area(regs, address, SEGV_ACCERR, mm, vma);
+}
-static int do_sigbus(struct pt_regs *regs, unsigned long address)
+static int do_sigbus(struct pt_regs *regs, unsigned long address,
+ vm_fault_t fault)
{
- siginfo_t info;
-
- up_read(&current->mm->mmap_sem);
-
- if (user_mode(regs)) {
- current->thread.trap_nr = BUS_ADRERR;
- info.si_signo = SIGBUS;
- info.si_errno = 0;
- info.si_code = BUS_ADRERR;
- info.si_addr = (void __user *)address;
- force_sig_info(SIGBUS, &info, current);
- return MM_FAULT_RETURN;
+ if (!user_mode(regs))
+ return SIGBUS;
+
+ current->thread.trap_nr = BUS_ADRERR;
+#ifdef CONFIG_MEMORY_FAILURE
+ if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) {
+ unsigned int lsb = 0; /* shutup gcc */
+
+ pr_err("MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n",
+ current->comm, current->pid, address);
+
+ if (fault & VM_FAULT_HWPOISON_LARGE)
+ lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault));
+ if (fault & VM_FAULT_HWPOISON)
+ lsb = PAGE_SHIFT;
+
+ force_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb);
+ return 0;
}
- return MM_FAULT_ERR(SIGBUS);
+
+#endif
+ force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address);
+ return 0;
}
-static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault)
+static int mm_fault_error(struct pt_regs *regs, unsigned long addr,
+ vm_fault_t fault)
{
/*
- * Pagefault was interrupted by SIGKILL. We have no reason to
- * continue the pagefault.
+ * Kernel page fault interrupted by SIGKILL. We have no reason to
+ * continue processing.
*/
- if (fatal_signal_pending(current)) {
- /*
- * If we have retry set, the mmap semaphore will have
- * alrady been released in __lock_page_or_retry(). Else
- * we release it now.
- */
- if (!(fault & VM_FAULT_RETRY))
- up_read(&current->mm->mmap_sem);
- /* Coming from kernel, we need to deal with uaccess fixups */
- if (user_mode(regs))
- return MM_FAULT_RETURN;
- return MM_FAULT_ERR(SIGKILL);
- }
-
- /* No fault: be happy */
- if (!(fault & VM_FAULT_ERROR))
- return MM_FAULT_CONTINUE;
+ if (fatal_signal_pending(current) && !user_mode(regs))
+ return SIGKILL;
/* Out of memory */
if (fault & VM_FAULT_OOM) {
- up_read(&current->mm->mmap_sem);
-
/*
* We ran out of memory, or some other thing happened to us that
* made us unable to handle the page fault gracefully.
*/
if (!user_mode(regs))
- return MM_FAULT_ERR(SIGKILL);
+ return SIGSEGV;
pagefault_out_of_memory();
- return MM_FAULT_RETURN;
+ } else {
+ if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
+ VM_FAULT_HWPOISON_LARGE))
+ return do_sigbus(regs, addr, fault);
+ else if (fault & VM_FAULT_SIGSEGV)
+ return bad_area_nosemaphore(regs, addr);
+ else
+ BUG();
+ }
+ return 0;
+}
+
+/* Is this a bad kernel fault ? */
+static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
+ unsigned long address, bool is_write)
+{
+ int is_exec = TRAP(regs) == INTERRUPT_INST_STORAGE;
+
+ if (is_exec) {
+ pr_crit_ratelimited("kernel tried to execute %s page (%lx) - exploit attempt? (uid: %d)\n",
+ address >= TASK_SIZE ? "exec-protected" : "user",
+ address,
+ from_kuid(&init_user_ns, current_uid()));
+
+ // Kernel exec fault is always bad
+ return true;
}
- /* Bus error. x86 handles HWPOISON here, we'll add this if/when
- * we support the feature in HW
+ // Kernel fault on kernel address is bad
+ if (address >= TASK_SIZE)
+ return true;
+
+ // Read/write fault blocked by KUAP is bad, it can never succeed.
+ if (bad_kuap_fault(regs, address, is_write)) {
+ pr_crit_ratelimited("Kernel attempted to %s user page (%lx) - exploit attempt? (uid: %d)\n",
+ str_write_read(is_write), address,
+ from_kuid(&init_user_ns, current_uid()));
+
+ // Fault on user outside of certain regions (eg. copy_tofrom_user()) is bad
+ if (!search_exception_tables(regs->nip))
+ return true;
+
+ // Read/write fault in a valid region (the exception table search passed
+ // above), but blocked by KUAP is bad, it can never succeed.
+ return WARN(true, "Bug: %s fault blocked by KUAP!", is_write ? "Write" : "Read");
+ }
+
+ // What's left? Kernel fault on user and allowed by KUAP in the faulting context.
+ return false;
+}
+
+static bool access_pkey_error(bool is_write, bool is_exec, bool is_pkey,
+ struct vm_area_struct *vma)
+{
+ /*
+ * Make sure to check the VMA so that we do not perform
+ * faults just to hit a pkey fault as soon as we fill in a
+ * page. Only called for current mm, hence foreign == 0
*/
- if (fault & VM_FAULT_SIGBUS)
- return do_sigbus(regs, addr);
+ if (!arch_vma_access_permitted(vma, is_write, is_exec, 0))
+ return true;
- /* We don't understand the fault code, this is fatal */
- BUG();
- return MM_FAULT_CONTINUE;
+ return false;
+}
+
+static bool access_error(bool is_write, bool is_exec, struct vm_area_struct *vma)
+{
+ /*
+ * Allow execution from readable areas if the MMU does not
+ * provide separate controls over reading and executing.
+ *
+ * Note: That code used to not be enabled for 4xx/BookE.
+ * It is now as I/D cache coherency for these is done at
+ * set_pte_at() time and I see no reason why the test
+ * below wouldn't be valid on those processors. This -may-
+ * break programs compiled with a really old ABI though.
+ */
+ if (is_exec) {
+ return !(vma->vm_flags & VM_EXEC) &&
+ (cpu_has_feature(CPU_FTR_NOEXECUTE) ||
+ !(vma->vm_flags & (VM_READ | VM_WRITE)));
+ }
+
+ if (is_write) {
+ if (unlikely(!(vma->vm_flags & VM_WRITE)))
+ return true;
+ return false;
+ }
+
+ /*
+ * VM_READ, VM_WRITE and VM_EXEC may imply read permissions, as
+ * defined in protection_map[]. In that case Read faults can only be
+ * caused by a PROT_NONE mapping. However a non exec access on a
+ * VM_EXEC only mapping is invalid anyway, so report it as such.
+ */
+ if (unlikely(!vma_is_accessible(vma)))
+ return true;
+
+ if ((vma->vm_flags & VM_ACCESS_FLAGS) == VM_EXEC)
+ return true;
+
+ /*
+ * We should ideally do the vma pkey access check here. But in the
+ * fault path, handle_mm_fault() also does the same check. To avoid
+ * these multiple checks, we skip it here and handle access error due
+ * to pkeys later.
+ */
+ return false;
+}
+
+#ifdef CONFIG_PPC_SMLPAR
+static inline void cmo_account_page_fault(void)
+{
+ if (firmware_has_feature(FW_FEATURE_CMO)) {
+ u32 page_ins;
+
+ preempt_disable();
+ page_ins = be32_to_cpu(get_lppaca()->page_ins);
+ page_ins += 1 << PAGE_FACTOR;
+ get_lppaca()->page_ins = cpu_to_be32(page_ins);
+ preempt_enable();
+ }
+}
+#else
+static inline void cmo_account_page_fault(void) { }
+#endif /* CONFIG_PPC_SMLPAR */
+
+static void sanity_check_fault(bool is_write, bool is_user,
+ unsigned long error_code, unsigned long address)
+{
+ /*
+ * Userspace trying to access kernel address, we get PROTFAULT for that.
+ */
+ if (is_user && address >= TASK_SIZE) {
+ if ((long)address == -1)
+ return;
+
+ pr_crit_ratelimited("%s[%d]: User access of kernel address (%lx) - exploit attempt? (uid: %d)\n",
+ current->comm, current->pid, address,
+ from_kuid(&init_user_ns, current_uid()));
+ return;
+ }
+
+ if (!IS_ENABLED(CONFIG_PPC_BOOK3S))
+ return;
+
+ /*
+ * For hash translation mode, we should never get a
+ * PROTFAULT. Any update to pte to reduce access will result in us
+ * removing the hash page table entry, thus resulting in a DSISR_NOHPTE
+ * fault instead of DSISR_PROTFAULT.
+ *
+ * A pte update to relax the access will not result in a hash page table
+ * entry invalidate and hence can result in DSISR_PROTFAULT.
+ * ptep_set_access_flags() doesn't do a hpte flush. This is why we have
+ * the special !is_write in the below conditional.
+ *
+ * For platforms that doesn't supports coherent icache and do support
+ * per page noexec bit, we do setup things such that we do the
+ * sync between D/I cache via fault. But that is handled via low level
+ * hash fault code (hash_page_do_lazy_icache()) and we should not reach
+ * here in such case.
+ *
+ * For wrong access that can result in PROTFAULT, the above vma->vm_flags
+ * check should handle those and hence we should fall to the bad_area
+ * handling correctly.
+ *
+ * For embedded with per page exec support that doesn't support coherent
+ * icache we do get PROTFAULT and we handle that D/I cache sync in
+ * set_pte_at while taking the noexec/prot fault. Hence this is WARN_ON
+ * is conditional for server MMU.
+ *
+ * For radix, we can get prot fault for autonuma case, because radix
+ * page table will have them marked noaccess for user.
+ */
+ if (radix_enabled() || is_write)
+ return;
+
+ WARN_ON_ONCE(error_code & DSISR_PROTFAULT);
}
/*
+ * Define the correct "is_write" bit in error_code based
+ * on the processor family
+ */
+#ifdef CONFIG_BOOKE
+#define page_fault_is_write(__err) ((__err) & ESR_DST)
+#else
+#define page_fault_is_write(__err) ((__err) & DSISR_ISSTORE)
+#endif
+
+#ifdef CONFIG_BOOKE
+#define page_fault_is_bad(__err) (0)
+#elif defined(CONFIG_PPC_8xx)
+#define page_fault_is_bad(__err) ((__err) & DSISR_NOEXEC_OR_G)
+#elif defined(CONFIG_PPC64)
+static int page_fault_is_bad(unsigned long err)
+{
+ unsigned long flag = DSISR_BAD_FAULT_64S;
+
+ /*
+ * PAPR+ v2.11 § 14.15.3.4.1 (unreleased)
+ * If byte 0, bit 3 of pi-attribute-specifier-type in
+ * ibm,pi-features property is defined, ignore the DSI error
+ * which is caused by the paste instruction on the
+ * suspended NX window.
+ */
+ if (mmu_has_feature(MMU_FTR_NX_DSI))
+ flag &= ~DSISR_BAD_COPYPASTE;
+
+ return err & flag;
+}
+#else
+#define page_fault_is_bad(__err) ((__err) & DSISR_BAD_FAULT_32S)
+#endif
+
+/*
* For 600- and 800-family processors, the error_code parameter is DSISR
- * for a data fault, SRR1 for an instruction fault. For 400-family processors
- * the error_code parameter is ESR for a data fault, 0 for an instruction
- * fault.
- * For 64-bit processors, the error_code parameter is
- * - DSISR for a non-SLB data access fault,
- * - SRR1 & 0x08000000 for a non-SLB instruction access fault
- * - 0 any SLB fault.
+ * for a data fault, SRR1 for an instruction fault.
+ * For 400-family processors the error_code parameter is ESR for a data fault,
+ * 0 for an instruction fault.
+ * For 64-bit processors, the error_code parameter is DSISR for a data access
+ * fault, SRR1 & 0x08000000 for an instruction access fault.
*
* The return value is 0 if the fault was handled, or the signal
* number if this is a kernel fault that can't be handled here.
*/
-int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
- unsigned long error_code)
+static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
+ unsigned long error_code)
{
- enum ctx_state prev_state = exception_enter();
struct vm_area_struct * vma;
struct mm_struct *mm = current->mm;
- unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
- int code = SEGV_MAPERR;
- int is_write = 0;
- int trap = TRAP(regs);
- int is_exec = trap == 0x400;
- int fault;
- int rc = 0, store_update_sp = 0;
-
-#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
+ unsigned int flags = FAULT_FLAG_DEFAULT;
+ int is_exec = TRAP(regs) == INTERRUPT_INST_STORAGE;
+ int is_user = user_mode(regs);
+ int is_write = page_fault_is_write(error_code);
+ vm_fault_t fault, major = 0;
+ bool kprobe_fault = kprobe_page_fault(regs, 11);
+
+ if (unlikely(debugger_fault_handler(regs) || kprobe_fault))
+ return 0;
+
+ if (unlikely(page_fault_is_bad(error_code))) {
+ if (is_user) {
+ _exception(SIGBUS, regs, BUS_OBJERR, address);
+ return 0;
+ }
+ return SIGBUS;
+ }
+
+ /* Additional sanity check(s) */
+ sanity_check_fault(is_write, is_user, error_code, address);
+
/*
- * Fortunately the bit assignments in SRR1 for an instruction
- * fault and DSISR for a data fault are mostly the same for the
- * bits we are interested in. But there are some bits which
- * indicate errors in DSISR but can validly be set in SRR1.
+ * The kernel should never take an execute fault nor should it
+ * take a page fault to a kernel address or a page fault to a user
+ * address outside of dedicated places.
+ *
+ * Rather than kfence directly reporting false negatives, search whether
+ * the NIP belongs to the fixup table for cases where fault could come
+ * from functions like copy_from_kernel_nofault().
*/
- if (trap == 0x400)
- error_code &= 0x48200000;
- else
- is_write = error_code & DSISR_ISSTORE;
-#else
- is_write = error_code & ESR_DST;
-#endif /* CONFIG_4xx || CONFIG_BOOKE */
+ if (unlikely(!is_user && bad_kernel_fault(regs, error_code, address, is_write))) {
+ if (is_kfence_address((void *)address) &&
+ !search_exception_tables(instruction_pointer(regs)) &&
+ kfence_handle_page_fault(address, is_write, regs))
+ return 0;
+
+ return SIGSEGV;
+ }
-#ifdef CONFIG_PPC_ICSWX
/*
- * we need to do this early because this "data storage
- * interrupt" does not update the DAR/DEAR so we don't want to
- * look at it
+ * If we're in an interrupt, have no user context or are running
+ * in a region with pagefaults disabled then we must not take the fault
*/
- if (error_code & ICSWX_DSI_UCT) {
- rc = acop_handle_fault(regs, address, error_code);
- if (rc)
- goto bail;
+ if (unlikely(faulthandler_disabled() || !mm)) {
+ if (is_user)
+ printk_ratelimited(KERN_ERR "Page fault in user mode"
+ " with faulthandler_disabled()=%d"
+ " mm=%p\n",
+ faulthandler_disabled(), mm);
+ return bad_area_nosemaphore(regs, address);
}
-#endif /* CONFIG_PPC_ICSWX */
- if (notify_page_fault(regs))
- goto bail;
+ interrupt_cond_local_irq_enable(regs);
- if (unlikely(debugger_fault_handler(regs)))
- goto bail;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
- /* On a kernel SLB miss we can only check for a valid exception entry */
- if (!user_mode(regs) && (address >= TASK_SIZE)) {
- rc = SIGSEGV;
- goto bail;
+ /*
+ * We want to do this outside mmap_lock, because reading code around nip
+ * can result in fault, which will cause a deadlock when called with
+ * mmap_lock held
+ */
+ if (is_user)
+ flags |= FAULT_FLAG_USER;
+ if (is_write)
+ flags |= FAULT_FLAG_WRITE;
+ if (is_exec)
+ flags |= FAULT_FLAG_INSTRUCTION;
+
+ if (!(flags & FAULT_FLAG_USER))
+ goto lock_mmap;
+
+ vma = lock_vma_under_rcu(mm, address);
+ if (!vma)
+ goto lock_mmap;
+
+ if (unlikely(access_pkey_error(is_write, is_exec,
+ (error_code & DSISR_KEYFAULT), vma))) {
+ count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+ return bad_access_pkey(regs, address, NULL, vma);
}
-#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE) || \
- defined(CONFIG_PPC_BOOK3S_64))
- if (error_code & DSISR_DABRMATCH) {
- /* breakpoint match */
- do_break(regs, address, error_code);
- goto bail;
+ if (unlikely(access_error(is_write, is_exec, vma))) {
+ count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+ return bad_access(regs, address, NULL, vma);
}
-#endif
- /* We restore the interrupt state now */
- if (!arch_irq_disabled_regs(regs))
- local_irq_enable();
+ fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
+ if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
+ vma_end_read(vma);
- if (in_atomic() || mm == NULL) {
- if (!user_mode(regs)) {
- rc = SIGSEGV;
- goto bail;
- }
- /* in_atomic() in user mode is really bad,
- as is current->mm == NULL. */
- printk(KERN_EMERG "Page fault in user mode with "
- "in_atomic() = %d mm = %p\n", in_atomic(), mm);
- printk(KERN_EMERG "NIP = %lx MSR = %lx\n",
- regs->nip, regs->msr);
- die("Weird page fault", regs, SIGSEGV);
+ if (!(fault & VM_FAULT_RETRY)) {
+ count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+ goto done;
}
+ count_vm_vma_lock_event(VMA_LOCK_RETRY);
+ if (fault & VM_FAULT_MAJOR)
+ flags |= FAULT_FLAG_TRIED;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+ if (fault_signal_pending(fault, regs))
+ return user_mode(regs) ? 0 : SIGBUS;
- /*
- * We want to do this outside mmap_sem, because reading code around nip
- * can result in fault, which will cause a deadlock when called with
- * mmap_sem held
- */
- if (user_mode(regs))
- store_update_sp = store_updates_sp(regs);
-
- if (user_mode(regs))
- flags |= FAULT_FLAG_USER;
+lock_mmap:
/* When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in the
* kernel and should generate an OOPS. Unfortunately, in the case of an
- * erroneous fault occurring in a code path which already holds mmap_sem
+ * erroneous fault occurring in a code path which already holds mmap_lock
* we will deadlock attempting to validate the fault against the
* address space. Luckily the kernel only validly references user
* space from well defined areas of code, which are listed in the
- * exceptions table.
- *
- * As the vast majority of faults will be valid we will only perform
- * the source reference check when there is a possibility of a deadlock.
- * Attempt to lock the address space, if we cannot we then validate the
- * source. If this is invalid we can skip the address space check,
- * thus avoiding the deadlock.
+ * exceptions table. lock_mm_and_find_vma() handles that logic.
*/
- if (!down_read_trylock(&mm->mmap_sem)) {
- if (!user_mode(regs) && !search_exception_tables(regs->nip))
- goto bad_area_nosemaphore;
-
retry:
- down_read(&mm->mmap_sem);
- } else {
- /*
- * The above down_read_trylock() might have succeeded in
- * which case we'll have missed the might_sleep() from
- * down_read():
- */
- might_sleep();
- }
+ vma = lock_mm_and_find_vma(mm, address, regs);
+ if (unlikely(!vma))
+ return bad_area_nosemaphore(regs, address);
- vma = find_vma(mm, address);
- if (!vma)
- goto bad_area;
- if (vma->vm_start <= address)
- goto good_area;
- if (!(vma->vm_flags & VM_GROWSDOWN))
- goto bad_area;
+ if (unlikely(access_pkey_error(is_write, is_exec,
+ (error_code & DSISR_KEYFAULT), vma)))
+ return bad_access_pkey(regs, address, mm, vma);
- /*
- * N.B. The POWER/Open ABI allows programs to access up to
- * 288 bytes below the stack pointer.
- * The kernel signal delivery code writes up to about 1.5kB
- * below the stack pointer (r1) before decrementing it.
- * The exec code can write slightly over 640kB to the stack
- * before setting the user r1. Thus we allow the stack to
- * expand to 1MB without further checks.
- */
- if (address + 0x100000 < vma->vm_end) {
- /* get user regs even if this fault is in kernel mode */
- struct pt_regs *uregs = current->thread.regs;
- if (uregs == NULL)
- goto bad_area;
-
- /*
- * A user-mode access to an address a long way below
- * the stack pointer is only valid if the instruction
- * is one which would update the stack pointer to the
- * address accessed if the instruction completed,
- * i.e. either stwu rs,n(r1) or stwux rs,r1,rb
- * (or the byte, halfword, float or double forms).
- *
- * If we don't check this then any write to the area
- * between the last mapped region and the stack will
- * expand the stack rather than segfaulting.
- */
- if (address + 2048 < uregs->gpr[1] && !store_update_sp)
- goto bad_area;
- }
- if (expand_stack(vma, address))
- goto bad_area;
-
-good_area:
- code = SEGV_ACCERR;
-#if defined(CONFIG_6xx)
- if (error_code & 0x95700000)
- /* an error such as lwarx to I/O controller space,
- address matching DABR, eciwx, etc. */
- goto bad_area;
-#endif /* CONFIG_6xx */
-#if defined(CONFIG_8xx)
- /* 8xx sometimes need to load a invalid/non-present TLBs.
- * These must be invalidated separately as linux mm don't.
- */
- if (error_code & 0x40000000) /* no translation? */
- _tlbil_va(address, 0, 0, 0);
-
- /* The MPC8xx seems to always set 0x80000000, which is
- * "undefined". Of those that can be set, this is the only
- * one which seems bad.
- */
- if (error_code & 0x10000000)
- /* Guarded storage error. */
- goto bad_area;
-#endif /* CONFIG_8xx */
-
- if (is_exec) {
-#ifdef CONFIG_PPC_STD_MMU
- /* Protection fault on exec go straight to failure on
- * Hash based MMUs as they either don't support per-page
- * execute permission, or if they do, it's handled already
- * at the hash level. This test would probably have to
- * be removed if we change the way this works to make hash
- * processors use the same I/D cache coherency mechanism
- * as embedded.
- */
- if (error_code & DSISR_PROTFAULT)
- goto bad_area;
-#endif /* CONFIG_PPC_STD_MMU */
-
- /*
- * Allow execution from readable areas if the MMU does not
- * provide separate controls over reading and executing.
- *
- * Note: That code used to not be enabled for 4xx/BookE.
- * It is now as I/D cache coherency for these is done at
- * set_pte_at() time and I see no reason why the test
- * below wouldn't be valid on those processors. This -may-
- * break programs compiled with a really old ABI though.
- */
- if (!(vma->vm_flags & VM_EXEC) &&
- (cpu_has_feature(CPU_FTR_NOEXECUTE) ||
- !(vma->vm_flags & (VM_READ | VM_WRITE))))
- goto bad_area;
- /* a write */
- } else if (is_write) {
- if (!(vma->vm_flags & VM_WRITE))
- goto bad_area;
- flags |= FAULT_FLAG_WRITE;
- /* a read */
- } else {
- /* protection fault */
- if (error_code & 0x08000000)
- goto bad_area;
- if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
- goto bad_area;
- }
+ if (unlikely(access_error(is_write, is_exec, vma)))
+ return bad_access(regs, address, mm, vma);
/*
* If for any reason at all we couldn't handle the fault,
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
- fault = handle_mm_fault(mm, vma, address, flags);
- if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) {
- rc = mm_fault_error(regs, address, fault);
- if (rc >= MM_FAULT_RETURN)
- goto bail;
- else
- rc = 0;
- }
+ fault = handle_mm_fault(vma, address, flags, regs);
+
+ major |= fault & VM_FAULT_MAJOR;
+
+ if (fault_signal_pending(fault, regs))
+ return user_mode(regs) ? 0 : SIGBUS;
+
+ /* The fault is fully completed (including releasing mmap lock) */
+ if (fault & VM_FAULT_COMPLETED)
+ goto out;
/*
- * Major/minor page fault accounting is only done on the
- * initial attempt. If we go through a retry, it is extremely
- * likely that the page will be found in page cache at that point.
+ * Handle the retry right now, the mmap_lock has been released in that
+ * case.
*/
- if (flags & FAULT_FLAG_ALLOW_RETRY) {
- if (fault & VM_FAULT_MAJOR) {
- current->maj_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
- regs, address);
-#ifdef CONFIG_PPC_SMLPAR
- if (firmware_has_feature(FW_FEATURE_CMO)) {
- u32 page_ins;
-
- preempt_disable();
- page_ins = be32_to_cpu(get_lppaca()->page_ins);
- page_ins += 1 << PAGE_FACTOR;
- get_lppaca()->page_ins = cpu_to_be32(page_ins);
- preempt_enable();
- }
-#endif /* CONFIG_PPC_SMLPAR */
- } else {
- current->min_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
- regs, address);
- }
- if (fault & VM_FAULT_RETRY) {
- /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
- * of starvation. */
- flags &= ~FAULT_FLAG_ALLOW_RETRY;
- flags |= FAULT_FLAG_TRIED;
- goto retry;
- }
+ if (unlikely(fault & VM_FAULT_RETRY)) {
+ flags |= FAULT_FLAG_TRIED;
+ goto retry;
}
- up_read(&mm->mmap_sem);
- goto bail;
+ mmap_read_unlock(current->mm);
-bad_area:
- up_read(&mm->mmap_sem);
+done:
+ if (unlikely(fault & VM_FAULT_ERROR))
+ return mm_fault_error(regs, address, fault);
-bad_area_nosemaphore:
- /* User mode accesses cause a SIGSEGV */
- if (user_mode(regs)) {
- _exception(SIGSEGV, regs, code, address);
- goto bail;
- }
+out:
+ /*
+ * Major/minor page fault accounting.
+ */
+ if (major)
+ cmo_account_page_fault();
- if (is_exec && (error_code & DSISR_PROTFAULT))
- printk_ratelimited(KERN_CRIT "kernel tried to execute NX-protected"
- " page (%lx) - exploit attempt? (uid: %d)\n",
- address, from_kuid(&init_user_ns, current_uid()));
+ return 0;
+}
+NOKPROBE_SYMBOL(___do_page_fault);
- rc = SIGSEGV;
+static __always_inline void __do_page_fault(struct pt_regs *regs)
+{
+ long err;
-bail:
- exception_exit(prev_state);
- return rc;
+ err = ___do_page_fault(regs, regs->dar, regs->dsisr);
+ if (unlikely(err))
+ bad_page_fault(regs, err);
+}
+DEFINE_INTERRUPT_HANDLER(do_page_fault)
+{
+ __do_page_fault(regs);
}
+#ifdef CONFIG_PPC_BOOK3S_64
+/* Same as do_page_fault but interrupt entry has already run in do_hash_fault */
+void hash__do_page_fault(struct pt_regs *regs)
+{
+ __do_page_fault(regs);
+}
+NOKPROBE_SYMBOL(hash__do_page_fault);
+#endif
+
/*
* bad_page_fault is called when we have a bad access from the kernel.
* It is called from the DSI and ISI handlers in head.S and from some
* of the procedures in traps.c.
*/
-void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
+static void __bad_page_fault(struct pt_regs *regs, int sig)
{
- const struct exception_table_entry *entry;
- unsigned long *stackend;
-
- /* Are we prepared to handle this fault? */
- if ((entry = search_exception_tables(regs->nip)) != NULL) {
- regs->nip = entry->fixup;
- return;
- }
+ int is_write = page_fault_is_write(regs->dsisr);
+ const char *msg;
/* kernel has accessed a bad area */
- switch (regs->trap) {
- case 0x300:
- case 0x380:
- printk(KERN_ALERT "Unable to handle kernel paging request for "
- "data at address 0x%08lx\n", regs->dar);
+ if (regs->dar < PAGE_SIZE)
+ msg = "Kernel NULL pointer dereference";
+ else
+ msg = "Unable to handle kernel data access";
+
+ switch (TRAP(regs)) {
+ case INTERRUPT_DATA_STORAGE:
+ case INTERRUPT_H_DATA_STORAGE:
+ pr_alert("BUG: %s on %s at 0x%08lx\n", msg,
+ str_write_read(is_write), regs->dar);
+ break;
+ case INTERRUPT_DATA_SEGMENT:
+ pr_alert("BUG: %s at 0x%08lx\n", msg, regs->dar);
+ break;
+ case INTERRUPT_INST_STORAGE:
+ case INTERRUPT_INST_SEGMENT:
+ pr_alert("BUG: Unable to handle kernel instruction fetch%s",
+ regs->nip < PAGE_SIZE ? " (NULL pointer?)\n" : "\n");
break;
- case 0x400:
- case 0x480:
- printk(KERN_ALERT "Unable to handle kernel paging request for "
- "instruction fetch\n");
+ case INTERRUPT_ALIGNMENT:
+ pr_alert("BUG: Unable to handle kernel unaligned access at 0x%08lx\n",
+ regs->dar);
break;
default:
- printk(KERN_ALERT "Unable to handle kernel paging request for "
- "unknown fault\n");
+ pr_alert("BUG: Unable to handle unknown paging fault at 0x%08lx\n",
+ regs->dar);
break;
}
printk(KERN_ALERT "Faulting instruction address: 0x%08lx\n",
regs->nip);
- stackend = end_of_stack(current);
- if (current != &init_task && *stackend != STACK_END_MAGIC)
+ if (task_stack_end_corrupted(current))
printk(KERN_ALERT "Thread overran stack, or stack corrupted\n");
die("Kernel access of bad area", regs, sig);
}
+
+void bad_page_fault(struct pt_regs *regs, int sig)
+{
+ const struct exception_table_entry *entry;
+
+ /* Are we prepared to handle this fault? */
+ entry = search_exception_tables(instruction_pointer(regs));
+ if (entry)
+ instruction_pointer_set(regs, extable_fixup(entry));
+ else
+ __bad_page_fault(regs, sig);
+}
+
+#ifdef CONFIG_PPC_BOOK3S_64
+DEFINE_INTERRUPT_HANDLER(do_bad_page_fault_segv)
+{
+ bad_page_fault(regs, SIGSEGV);
+}
+
+/*
+ * In radix, segment interrupts indicate the EA is not addressable by the
+ * page table geometry, so they are always sent here.
+ *
+ * In hash, this is called if do_slb_fault returns error. Typically it is
+ * because the EA was outside the region allowed by software.
+ */
+DEFINE_INTERRUPT_HANDLER(do_bad_segment_interrupt)
+{
+ int err = regs->result;
+
+ if (err == -EFAULT) {
+ if (user_mode(regs))
+ _exception(SIGSEGV, regs, SEGV_BNDERR, regs->dar);
+ else
+ bad_page_fault(regs, SIGSEGV);
+ } else if (err == -EINVAL) {
+ unrecoverable_exception(regs);
+ } else {
+ BUG();
+ }
+}
+#endif
diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c
deleted file mode 100644
index d8746684f606..000000000000
--- a/arch/powerpc/mm/gup.c
+++ /dev/null
@@ -1,235 +0,0 @@
-/*
- * Lockless get_user_pages_fast for powerpc
- *
- * Copyright (C) 2008 Nick Piggin
- * Copyright (C) 2008 Novell Inc.
- */
-#undef DEBUG
-
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/hugetlb.h>
-#include <linux/vmstat.h>
-#include <linux/pagemap.h>
-#include <linux/rwsem.h>
-#include <asm/pgtable.h>
-
-#ifdef __HAVE_ARCH_PTE_SPECIAL
-
-/*
- * The performance critical leaf functions are made noinline otherwise gcc
- * inlines everything into a single function which results in too much
- * register pressure.
- */
-static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
- unsigned long end, int write, struct page **pages, int *nr)
-{
- unsigned long mask, result;
- pte_t *ptep;
-
- result = _PAGE_PRESENT|_PAGE_USER;
- if (write)
- result |= _PAGE_RW;
- mask = result | _PAGE_SPECIAL;
-
- ptep = pte_offset_kernel(&pmd, addr);
- do {
- pte_t pte = ACCESS_ONCE(*ptep);
- struct page *page;
- /*
- * Similar to the PMD case, NUMA hinting must take slow path
- */
- if (pte_numa(pte))
- return 0;
-
- if ((pte_val(pte) & mask) != result)
- return 0;
- VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
- page = pte_page(pte);
- if (!page_cache_get_speculative(page))
- return 0;
- if (unlikely(pte_val(pte) != pte_val(*ptep))) {
- put_page(page);
- return 0;
- }
- pages[*nr] = page;
- (*nr)++;
-
- } while (ptep++, addr += PAGE_SIZE, addr != end);
-
- return 1;
-}
-
-static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
- int write, struct page **pages, int *nr)
-{
- unsigned long next;
- pmd_t *pmdp;
-
- pmdp = pmd_offset(&pud, addr);
- do {
- pmd_t pmd = ACCESS_ONCE(*pmdp);
-
- next = pmd_addr_end(addr, end);
- /*
- * If we find a splitting transparent hugepage we
- * return zero. That will result in taking the slow
- * path which will call wait_split_huge_page()
- * if the pmd is still in splitting state
- */
- if (pmd_none(pmd) || pmd_trans_splitting(pmd))
- return 0;
- if (pmd_huge(pmd) || pmd_large(pmd)) {
- /*
- * NUMA hinting faults need to be handled in the GUP
- * slowpath for accounting purposes and so that they
- * can be serialised against THP migration.
- */
- if (pmd_numa(pmd))
- return 0;
-
- if (!gup_hugepte((pte_t *)pmdp, PMD_SIZE, addr, next,
- write, pages, nr))
- return 0;
- } else if (is_hugepd(pmdp)) {
- if (!gup_hugepd((hugepd_t *)pmdp, PMD_SHIFT,
- addr, next, write, pages, nr))
- return 0;
- } else if (!gup_pte_range(pmd, addr, next, write, pages, nr))
- return 0;
- } while (pmdp++, addr = next, addr != end);
-
- return 1;
-}
-
-static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
- int write, struct page **pages, int *nr)
-{
- unsigned long next;
- pud_t *pudp;
-
- pudp = pud_offset(&pgd, addr);
- do {
- pud_t pud = ACCESS_ONCE(*pudp);
-
- next = pud_addr_end(addr, end);
- if (pud_none(pud))
- return 0;
- if (pud_huge(pud)) {
- if (!gup_hugepte((pte_t *)pudp, PUD_SIZE, addr, next,
- write, pages, nr))
- return 0;
- } else if (is_hugepd(pudp)) {
- if (!gup_hugepd((hugepd_t *)pudp, PUD_SHIFT,
- addr, next, write, pages, nr))
- return 0;
- } else if (!gup_pmd_range(pud, addr, next, write, pages, nr))
- return 0;
- } while (pudp++, addr = next, addr != end);
-
- return 1;
-}
-
-int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
- struct page **pages)
-{
- struct mm_struct *mm = current->mm;
- unsigned long addr, len, end;
- unsigned long next;
- unsigned long flags;
- pgd_t *pgdp;
- int nr = 0;
-
- pr_devel("%s(%lx,%x,%s)\n", __func__, start, nr_pages, write ? "write" : "read");
-
- start &= PAGE_MASK;
- addr = start;
- len = (unsigned long) nr_pages << PAGE_SHIFT;
- end = start + len;
-
- if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
- start, len)))
- return 0;
-
- pr_devel(" aligned: %lx .. %lx\n", start, end);
-
- /*
- * XXX: batch / limit 'nr', to avoid large irq off latency
- * needs some instrumenting to determine the common sizes used by
- * important workloads (eg. DB2), and whether limiting the batch size
- * will decrease performance.
- *
- * It seems like we're in the clear for the moment. Direct-IO is
- * the main guy that batches up lots of get_user_pages, and even
- * they are limited to 64-at-a-time which is not so many.
- */
- /*
- * This doesn't prevent pagetable teardown, but does prevent
- * the pagetables from being freed on powerpc.
- *
- * So long as we atomically load page table pointers versus teardown,
- * we can follow the address down to the the page and take a ref on it.
- */
- local_irq_save(flags);
-
- pgdp = pgd_offset(mm, addr);
- do {
- pgd_t pgd = ACCESS_ONCE(*pgdp);
-
- pr_devel(" %016lx: normal pgd %p\n", addr,
- (void *)pgd_val(pgd));
- next = pgd_addr_end(addr, end);
- if (pgd_none(pgd))
- break;
- if (pgd_huge(pgd)) {
- if (!gup_hugepte((pte_t *)pgdp, PGDIR_SIZE, addr, next,
- write, pages, &nr))
- break;
- } else if (is_hugepd(pgdp)) {
- if (!gup_hugepd((hugepd_t *)pgdp, PGDIR_SHIFT,
- addr, next, write, pages, &nr))
- break;
- } else if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
- break;
- } while (pgdp++, addr = next, addr != end);
-
- local_irq_restore(flags);
-
- return nr;
-}
-
-int get_user_pages_fast(unsigned long start, int nr_pages, int write,
- struct page **pages)
-{
- struct mm_struct *mm = current->mm;
- int nr, ret;
-
- start &= PAGE_MASK;
- nr = __get_user_pages_fast(start, nr_pages, write, pages);
- ret = nr;
-
- if (nr < nr_pages) {
- pr_devel(" slow path ! nr = %d\n", nr);
-
- /* Try to get the remaining pages with get_user_pages */
- start += nr << PAGE_SHIFT;
- pages += nr;
-
- down_read(&mm->mmap_sem);
- ret = get_user_pages(current, mm, start,
- nr_pages - nr, write, 0, pages, NULL);
- up_read(&mm->mmap_sem);
-
- /* Have to be a bit careful with return values */
- if (nr > 0) {
- if (ret < 0)
- ret = nr;
- else
- ret += nr;
- }
- }
-
- return ret;
-}
-
-#endif /* __HAVE_ARCH_PTE_SPECIAL */
diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
deleted file mode 100644
index 057cbbb4c576..000000000000
--- a/arch/powerpc/mm/hash_low_64.S
+++ /dev/null
@@ -1,1002 +0,0 @@
-/*
- * ppc64 MMU hashtable management routines
- *
- * (c) Copyright IBM Corp. 2003, 2005
- *
- * Maintained by: Benjamin Herrenschmidt
- * <benh@kernel.crashing.org>
- *
- * This file is covered by the GNU Public Licence v2 as
- * described in the kernel's COPYING file.
- */
-
-#include <asm/reg.h>
-#include <asm/pgtable.h>
-#include <asm/mmu.h>
-#include <asm/page.h>
-#include <asm/types.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/cputable.h>
-
- .text
-
-/*
- * Stackframe:
- *
- * +-> Back chain (SP + 256)
- * | General register save area (SP + 112)
- * | Parameter save area (SP + 48)
- * | TOC save area (SP + 40)
- * | link editor doubleword (SP + 32)
- * | compiler doubleword (SP + 24)
- * | LR save area (SP + 16)
- * | CR save area (SP + 8)
- * SP ---> +-- Back chain (SP + 0)
- */
-
-#ifndef CONFIG_PPC_64K_PAGES
-
-/*****************************************************************************
- * *
- * 4K SW & 4K HW pages implementation *
- * *
- *****************************************************************************/
-
-
-/*
- * _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
- * pte_t *ptep, unsigned long trap, int local, int ssize)
- *
- * Adds a 4K page to the hash table in a segment of 4K pages only
- */
-
-_GLOBAL(__hash_page_4K)
- mflr r0
- std r0,16(r1)
- stdu r1,-STACKFRAMESIZE(r1)
- /* Save all params that we need after a function call */
- std r6,STK_PARAM(R6)(r1)
- std r8,STK_PARAM(R8)(r1)
- std r9,STK_PARAM(R9)(r1)
-
- /* Save non-volatile registers.
- * r31 will hold "old PTE"
- * r30 is "new PTE"
- * r29 is vpn
- * r28 is a hash value
- * r27 is hashtab mask (maybe dynamic patched instead ?)
- */
- std r27,STK_REG(R27)(r1)
- std r28,STK_REG(R28)(r1)
- std r29,STK_REG(R29)(r1)
- std r30,STK_REG(R30)(r1)
- std r31,STK_REG(R31)(r1)
-
- /* Step 1:
- *
- * Check permissions, atomically mark the linux PTE busy
- * and hashed.
- */
-1:
- ldarx r31,0,r6
- /* Check access rights (access & ~(pte_val(*ptep))) */
- andc. r0,r4,r31
- bne- htab_wrong_access
- /* Check if PTE is busy */
- andi. r0,r31,_PAGE_BUSY
- /* If so, just bail out and refault if needed. Someone else
- * is changing this PTE anyway and might hash it.
- */
- bne- htab_bail_ok
-
- /* Prepare new PTE value (turn access RW into DIRTY, then
- * add BUSY,HASHPTE and ACCESSED)
- */
- rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */
- or r30,r30,r31
- ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE
- /* Write the linux PTE atomically (setting busy) */
- stdcx. r30,0,r6
- bne- 1b
- isync
-
- /* Step 2:
- *
- * Insert/Update the HPTE in the hash table. At this point,
- * r4 (access) is re-useable, we use it for the new HPTE flags
- */
-
-BEGIN_FTR_SECTION
- cmpdi r9,0 /* check segment size */
- bne 3f
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
- /* Calc vpn and put it in r29 */
- sldi r29,r5,SID_SHIFT - VPN_SHIFT
- rldicl r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT - VPN_SHIFT)
- or r29,r28,r29
- /*
- * Calculate hash value for primary slot and store it in r28
- * r3 = va, r5 = vsid
- * r0 = (va >> 12) & ((1ul << (28 - 12)) -1)
- */
- rldicl r0,r3,64-12,48
- xor r28,r5,r0 /* hash */
- b 4f
-
-3: /* Calc vpn and put it in r29 */
- sldi r29,r5,SID_SHIFT_1T - VPN_SHIFT
- rldicl r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT_1T - VPN_SHIFT)
- or r29,r28,r29
-
- /*
- * calculate hash value for primary slot and
- * store it in r28 for 1T segment
- * r3 = va, r5 = vsid
- */
- sldi r28,r5,25 /* vsid << 25 */
- /* r0 = (va >> 12) & ((1ul << (40 - 12)) -1) */
- rldicl r0,r3,64-12,36
- xor r28,r28,r5 /* vsid ^ ( vsid << 25) */
- xor r28,r28,r0 /* hash */
-
- /* Convert linux PTE bits into HW equivalents */
-4: andi. r3,r30,0x1fe /* Get basic set of flags */
- xori r3,r3,HPTE_R_N /* _PAGE_EXEC -> NOEXEC */
- rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */
- rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */
- and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/
- andc r0,r30,r0 /* r0 = pte & ~r0 */
- rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */
- /*
- * Always add "C" bit for perf. Memory coherence is always enabled
- */
- ori r3,r3,HPTE_R_C | HPTE_R_M
-
- /* We eventually do the icache sync here (maybe inline that
- * code rather than call a C function...)
- */
-BEGIN_FTR_SECTION
- mr r4,r30
- mr r5,r7
- bl hash_page_do_lazy_icache
-END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)
-
- /* At this point, r3 contains new PP bits, save them in
- * place of "access" in the param area (sic)
- */
- std r3,STK_PARAM(R4)(r1)
-
- /* Get htab_hash_mask */
- ld r4,htab_hash_mask@got(2)
- ld r27,0(r4) /* htab_hash_mask -> r27 */
-
- /* Check if we may already be in the hashtable, in this case, we
- * go to out-of-line code to try to modify the HPTE
- */
- andi. r0,r31,_PAGE_HASHPTE
- bne htab_modify_pte
-
-htab_insert_pte:
- /* Clear hpte bits in new pte (we also clear BUSY btw) and
- * add _PAGE_HASHPTE
- */
- lis r0,_PAGE_HPTEFLAGS@h
- ori r0,r0,_PAGE_HPTEFLAGS@l
- andc r30,r30,r0
- ori r30,r30,_PAGE_HASHPTE
-
- /* physical address r5 */
- rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
- sldi r5,r5,PAGE_SHIFT
-
- /* Calculate primary group hash */
- and r0,r28,r27
- rldicr r3,r0,3,63-3 /* r3 = (hash & mask) << 3 */
-
- /* Call ppc_md.hpte_insert */
- ld r6,STK_PARAM(R4)(r1) /* Retrieve new pp bits */
- mr r4,r29 /* Retrieve vpn */
- li r7,0 /* !bolted, !secondary */
- li r8,MMU_PAGE_4K /* page size */
- li r9,MMU_PAGE_4K /* actual page size */
- ld r10,STK_PARAM(R9)(r1) /* segment size */
-.globl htab_call_hpte_insert1
-htab_call_hpte_insert1:
- bl . /* Patched by htab_finish_init() */
- cmpdi 0,r3,0
- bge htab_pte_insert_ok /* Insertion successful */
- cmpdi 0,r3,-2 /* Critical failure */
- beq- htab_pte_insert_failure
-
- /* Now try secondary slot */
-
- /* physical address r5 */
- rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
- sldi r5,r5,PAGE_SHIFT
-
- /* Calculate secondary group hash */
- andc r0,r27,r28
- rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */
-
- /* Call ppc_md.hpte_insert */
- ld r6,STK_PARAM(R4)(r1) /* Retrieve new pp bits */
- mr r4,r29 /* Retrieve vpn */
- li r7,HPTE_V_SECONDARY /* !bolted, secondary */
- li r8,MMU_PAGE_4K /* page size */
- li r9,MMU_PAGE_4K /* actual page size */
- ld r10,STK_PARAM(R9)(r1) /* segment size */
-.globl htab_call_hpte_insert2
-htab_call_hpte_insert2:
- bl . /* Patched by htab_finish_init() */
- cmpdi 0,r3,0
- bge+ htab_pte_insert_ok /* Insertion successful */
- cmpdi 0,r3,-2 /* Critical failure */
- beq- htab_pte_insert_failure
-
- /* Both are full, we need to evict something */
- mftb r0
- /* Pick a random group based on TB */
- andi. r0,r0,1
- mr r5,r28
- bne 2f
- not r5,r5
-2: and r0,r5,r27
- rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
- /* Call ppc_md.hpte_remove */
-.globl htab_call_hpte_remove
-htab_call_hpte_remove:
- bl . /* Patched by htab_finish_init() */
-
- /* Try all again */
- b htab_insert_pte
-
-htab_bail_ok:
- li r3,0
- b htab_bail
-
-htab_pte_insert_ok:
- /* Insert slot number & secondary bit in PTE */
- rldimi r30,r3,12,63-15
-
- /* Write out the PTE with a normal write
- * (maybe add eieio may be good still ?)
- */
-htab_write_out_pte:
- ld r6,STK_PARAM(R6)(r1)
- std r30,0(r6)
- li r3, 0
-htab_bail:
- ld r27,STK_REG(R27)(r1)
- ld r28,STK_REG(R28)(r1)
- ld r29,STK_REG(R29)(r1)
- ld r30,STK_REG(R30)(r1)
- ld r31,STK_REG(R31)(r1)
- addi r1,r1,STACKFRAMESIZE
- ld r0,16(r1)
- mtlr r0
- blr
-
-htab_modify_pte:
- /* Keep PP bits in r4 and slot idx from the PTE around in r3 */
- mr r4,r3
- rlwinm r3,r31,32-12,29,31
-
- /* Secondary group ? if yes, get a inverted hash value */
- mr r5,r28
- andi. r0,r31,_PAGE_SECONDARY
- beq 1f
- not r5,r5
-1:
- /* Calculate proper slot value for ppc_md.hpte_updatepp */
- and r0,r5,r27
- rldicr r0,r0,3,63-3 /* r0 = (hash & mask) << 3 */
- add r3,r0,r3 /* add slot idx */
-
- /* Call ppc_md.hpte_updatepp */
- mr r5,r29 /* vpn */
- li r6,MMU_PAGE_4K /* base page size */
- li r7,MMU_PAGE_4K /* actual page size */
- ld r8,STK_PARAM(R9)(r1) /* segment size */
- ld r9,STK_PARAM(R8)(r1) /* get "local" param */
-.globl htab_call_hpte_updatepp
-htab_call_hpte_updatepp:
- bl . /* Patched by htab_finish_init() */
-
- /* if we failed because typically the HPTE wasn't really here
- * we try an insertion.
- */
- cmpdi 0,r3,-1
- beq- htab_insert_pte
-
- /* Clear the BUSY bit and Write out the PTE */
- li r0,_PAGE_BUSY
- andc r30,r30,r0
- b htab_write_out_pte
-
-htab_wrong_access:
- /* Bail out clearing reservation */
- stdcx. r31,0,r6
- li r3,1
- b htab_bail
-
-htab_pte_insert_failure:
- /* Bail out restoring old PTE */
- ld r6,STK_PARAM(R6)(r1)
- std r31,0(r6)
- li r3,-1
- b htab_bail
-
-
-#else /* CONFIG_PPC_64K_PAGES */
-
-
-/*****************************************************************************
- * *
- * 64K SW & 4K or 64K HW in a 4K segment pages implementation *
- * *
- *****************************************************************************/
-
-/* _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
- * pte_t *ptep, unsigned long trap, int local, int ssize,
- * int subpg_prot)
- */
-
-/*
- * For now, we do NOT implement Admixed pages
- */
-_GLOBAL(__hash_page_4K)
- mflr r0
- std r0,16(r1)
- stdu r1,-STACKFRAMESIZE(r1)
- /* Save all params that we need after a function call */
- std r6,STK_PARAM(R6)(r1)
- std r8,STK_PARAM(R8)(r1)
- std r9,STK_PARAM(R9)(r1)
-
- /* Save non-volatile registers.
- * r31 will hold "old PTE"
- * r30 is "new PTE"
- * r29 is vpn
- * r28 is a hash value
- * r27 is hashtab mask (maybe dynamic patched instead ?)
- * r26 is the hidx mask
- * r25 is the index in combo page
- */
- std r25,STK_REG(R25)(r1)
- std r26,STK_REG(R26)(r1)
- std r27,STK_REG(R27)(r1)
- std r28,STK_REG(R28)(r1)
- std r29,STK_REG(R29)(r1)
- std r30,STK_REG(R30)(r1)
- std r31,STK_REG(R31)(r1)
-
- /* Step 1:
- *
- * Check permissions, atomically mark the linux PTE busy
- * and hashed.
- */
-1:
- ldarx r31,0,r6
- /* Check access rights (access & ~(pte_val(*ptep))) */
- andc. r0,r4,r31
- bne- htab_wrong_access
- /* Check if PTE is busy */
- andi. r0,r31,_PAGE_BUSY
- /* If so, just bail out and refault if needed. Someone else
- * is changing this PTE anyway and might hash it.
- */
- bne- htab_bail_ok
- /* Prepare new PTE value (turn access RW into DIRTY, then
- * add BUSY and ACCESSED)
- */
- rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */
- or r30,r30,r31
- ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED
- oris r30,r30,_PAGE_COMBO@h
- /* Write the linux PTE atomically (setting busy) */
- stdcx. r30,0,r6
- bne- 1b
- isync
-
- /* Step 2:
- *
- * Insert/Update the HPTE in the hash table. At this point,
- * r4 (access) is re-useable, we use it for the new HPTE flags
- */
-
- /* Load the hidx index */
- rldicl r25,r3,64-12,60
-
-BEGIN_FTR_SECTION
- cmpdi r9,0 /* check segment size */
- bne 3f
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
- /* Calc vpn and put it in r29 */
- sldi r29,r5,SID_SHIFT - VPN_SHIFT
- /*
- * clrldi r3,r3,64 - SID_SHIFT --> ea & 0xfffffff
- * srdi r28,r3,VPN_SHIFT
- */
- rldicl r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT - VPN_SHIFT)
- or r29,r28,r29
- /*
- * Calculate hash value for primary slot and store it in r28
- * r3 = va, r5 = vsid
- * r0 = (va >> 12) & ((1ul << (28 - 12)) -1)
- */
- rldicl r0,r3,64-12,48
- xor r28,r5,r0 /* hash */
- b 4f
-
-3: /* Calc vpn and put it in r29 */
- sldi r29,r5,SID_SHIFT_1T - VPN_SHIFT
- /*
- * clrldi r3,r3,64 - SID_SHIFT_1T --> ea & 0xffffffffff
- * srdi r28,r3,VPN_SHIFT
- */
- rldicl r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT_1T - VPN_SHIFT)
- or r29,r28,r29
-
- /*
- * Calculate hash value for primary slot and
- * store it in r28 for 1T segment
- * r3 = va, r5 = vsid
- */
- sldi r28,r5,25 /* vsid << 25 */
- /* r0 = (va >> 12) & ((1ul << (40 - 12)) -1) */
- rldicl r0,r3,64-12,36
- xor r28,r28,r5 /* vsid ^ ( vsid << 25) */
- xor r28,r28,r0 /* hash */
-
- /* Convert linux PTE bits into HW equivalents */
-4:
-#ifdef CONFIG_PPC_SUBPAGE_PROT
- andc r10,r30,r10
- andi. r3,r10,0x1fe /* Get basic set of flags */
- rlwinm r0,r10,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */
-#else
- andi. r3,r30,0x1fe /* Get basic set of flags */
- rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */
-#endif
- xori r3,r3,HPTE_R_N /* _PAGE_EXEC -> NOEXEC */
- rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */
- and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/
- andc r0,r3,r0 /* r0 = pte & ~r0 */
- rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */
- /*
- * Always add "C" bit for perf. Memory coherence is always enabled
- */
- ori r3,r3,HPTE_R_C | HPTE_R_M
-
- /* We eventually do the icache sync here (maybe inline that
- * code rather than call a C function...)
- */
-BEGIN_FTR_SECTION
- mr r4,r30
- mr r5,r7
- bl hash_page_do_lazy_icache
-END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)
-
- /* At this point, r3 contains new PP bits, save them in
- * place of "access" in the param area (sic)
- */
- std r3,STK_PARAM(R4)(r1)
-
- /* Get htab_hash_mask */
- ld r4,htab_hash_mask@got(2)
- ld r27,0(r4) /* htab_hash_mask -> r27 */
-
- /* Check if we may already be in the hashtable, in this case, we
- * go to out-of-line code to try to modify the HPTE. We look for
- * the bit at (1 >> (index + 32))
- */
- rldicl. r0,r31,64-12,48
- li r26,0 /* Default hidx */
- beq htab_insert_pte
-
- /*
- * Check if the pte was already inserted into the hash table
- * as a 64k HW page, and invalidate the 64k HPTE if so.
- */
- andis. r0,r31,_PAGE_COMBO@h
- beq htab_inval_old_hpte
-
- ld r6,STK_PARAM(R6)(r1)
- ori r26,r6,PTE_PAGE_HIDX_OFFSET /* Load the hidx mask. */
- ld r26,0(r26)
- addi r5,r25,36 /* Check actual HPTE_SUB bit, this */
- rldcr. r0,r31,r5,0 /* must match pgtable.h definition */
- bne htab_modify_pte
-
-htab_insert_pte:
- /* real page number in r5, PTE RPN value + index */
- andis. r0,r31,_PAGE_4K_PFN@h
- srdi r5,r31,PTE_RPN_SHIFT
- bne- htab_special_pfn
- sldi r5,r5,PAGE_SHIFT-HW_PAGE_SHIFT
- add r5,r5,r25
-htab_special_pfn:
- sldi r5,r5,HW_PAGE_SHIFT
-
- /* Calculate primary group hash */
- and r0,r28,r27
- rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
-
- /* Call ppc_md.hpte_insert */
- ld r6,STK_PARAM(R4)(r1) /* Retrieve new pp bits */
- mr r4,r29 /* Retrieve vpn */
- li r7,0 /* !bolted, !secondary */
- li r8,MMU_PAGE_4K /* page size */
- li r9,MMU_PAGE_4K /* actual page size */
- ld r10,STK_PARAM(R9)(r1) /* segment size */
-.globl htab_call_hpte_insert1
-htab_call_hpte_insert1:
- bl . /* patched by htab_finish_init() */
- cmpdi 0,r3,0
- bge htab_pte_insert_ok /* Insertion successful */
- cmpdi 0,r3,-2 /* Critical failure */
- beq- htab_pte_insert_failure
-
- /* Now try secondary slot */
-
- /* real page number in r5, PTE RPN value + index */
- andis. r0,r31,_PAGE_4K_PFN@h
- srdi r5,r31,PTE_RPN_SHIFT
- bne- 3f
- sldi r5,r5,PAGE_SHIFT-HW_PAGE_SHIFT
- add r5,r5,r25
-3: sldi r5,r5,HW_PAGE_SHIFT
-
- /* Calculate secondary group hash */
- andc r0,r27,r28
- rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */
-
- /* Call ppc_md.hpte_insert */
- ld r6,STK_PARAM(R4)(r1) /* Retrieve new pp bits */
- mr r4,r29 /* Retrieve vpn */
- li r7,HPTE_V_SECONDARY /* !bolted, secondary */
- li r8,MMU_PAGE_4K /* page size */
- li r9,MMU_PAGE_4K /* actual page size */
- ld r10,STK_PARAM(R9)(r1) /* segment size */
-.globl htab_call_hpte_insert2
-htab_call_hpte_insert2:
- bl . /* patched by htab_finish_init() */
- cmpdi 0,r3,0
- bge+ htab_pte_insert_ok /* Insertion successful */
- cmpdi 0,r3,-2 /* Critical failure */
- beq- htab_pte_insert_failure
-
- /* Both are full, we need to evict something */
- mftb r0
- /* Pick a random group based on TB */
- andi. r0,r0,1
- mr r5,r28
- bne 2f
- not r5,r5
-2: and r0,r5,r27
- rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
- /* Call ppc_md.hpte_remove */
-.globl htab_call_hpte_remove
-htab_call_hpte_remove:
- bl . /* patched by htab_finish_init() */
-
- /* Try all again */
- b htab_insert_pte
-
- /*
- * Call out to C code to invalidate an 64k HW HPTE that is
- * useless now that the segment has been switched to 4k pages.
- */
-htab_inval_old_hpte:
- mr r3,r29 /* vpn */
- mr r4,r31 /* PTE.pte */
- li r5,0 /* PTE.hidx */
- li r6,MMU_PAGE_64K /* psize */
- ld r7,STK_PARAM(R9)(r1) /* ssize */
- ld r8,STK_PARAM(R8)(r1) /* local */
- bl flush_hash_page
- /* Clear out _PAGE_HPTE_SUB bits in the new linux PTE */
- lis r0,_PAGE_HPTE_SUB@h
- ori r0,r0,_PAGE_HPTE_SUB@l
- andc r30,r30,r0
- b htab_insert_pte
-
-htab_bail_ok:
- li r3,0
- b htab_bail
-
-htab_pte_insert_ok:
- /* Insert slot number & secondary bit in PTE second half,
- * clear _PAGE_BUSY and set approriate HPTE slot bit
- */
- ld r6,STK_PARAM(R6)(r1)
- li r0,_PAGE_BUSY
- andc r30,r30,r0
- /* HPTE SUB bit */
- li r0,1
- subfic r5,r25,27 /* Must match bit position in */
- sld r0,r0,r5 /* pgtable.h */
- or r30,r30,r0
- /* hindx */
- sldi r5,r25,2
- sld r3,r3,r5
- li r4,0xf
- sld r4,r4,r5
- andc r26,r26,r4
- or r26,r26,r3
- ori r5,r6,PTE_PAGE_HIDX_OFFSET
- std r26,0(r5)
- lwsync
- std r30,0(r6)
- li r3, 0
-htab_bail:
- ld r25,STK_REG(R25)(r1)
- ld r26,STK_REG(R26)(r1)
- ld r27,STK_REG(R27)(r1)
- ld r28,STK_REG(R28)(r1)
- ld r29,STK_REG(R29)(r1)
- ld r30,STK_REG(R30)(r1)
- ld r31,STK_REG(R31)(r1)
- addi r1,r1,STACKFRAMESIZE
- ld r0,16(r1)
- mtlr r0
- blr
-
-htab_modify_pte:
- /* Keep PP bits in r4 and slot idx from the PTE around in r3 */
- mr r4,r3
- sldi r5,r25,2
- srd r3,r26,r5
-
- /* Secondary group ? if yes, get a inverted hash value */
- mr r5,r28
- andi. r0,r3,0x8 /* page secondary ? */
- beq 1f
- not r5,r5
-1: andi. r3,r3,0x7 /* extract idx alone */
-
- /* Calculate proper slot value for ppc_md.hpte_updatepp */
- and r0,r5,r27
- rldicr r0,r0,3,63-3 /* r0 = (hash & mask) << 3 */
- add r3,r0,r3 /* add slot idx */
-
- /* Call ppc_md.hpte_updatepp */
- mr r5,r29 /* vpn */
- li r6,MMU_PAGE_4K /* base page size */
- li r7,MMU_PAGE_4K /* actual page size */
- ld r8,STK_PARAM(R9)(r1) /* segment size */
- ld r9,STK_PARAM(R8)(r1) /* get "local" param */
-.globl htab_call_hpte_updatepp
-htab_call_hpte_updatepp:
- bl . /* patched by htab_finish_init() */
-
- /* if we failed because typically the HPTE wasn't really here
- * we try an insertion.
- */
- cmpdi 0,r3,-1
- beq- htab_insert_pte
-
- /* Clear the BUSY bit and Write out the PTE */
- li r0,_PAGE_BUSY
- andc r30,r30,r0
- ld r6,STK_PARAM(R6)(r1)
- std r30,0(r6)
- li r3,0
- b htab_bail
-
-htab_wrong_access:
- /* Bail out clearing reservation */
- stdcx. r31,0,r6
- li r3,1
- b htab_bail
-
-htab_pte_insert_failure:
- /* Bail out restoring old PTE */
- ld r6,STK_PARAM(R6)(r1)
- std r31,0(r6)
- li r3,-1
- b htab_bail
-
-#endif /* CONFIG_PPC_64K_PAGES */
-
-#ifdef CONFIG_PPC_HAS_HASH_64K
-
-/*****************************************************************************
- * *
- * 64K SW & 64K HW in a 64K segment pages implementation *
- * *
- *****************************************************************************/
-
-_GLOBAL(__hash_page_64K)
- mflr r0
- std r0,16(r1)
- stdu r1,-STACKFRAMESIZE(r1)
- /* Save all params that we need after a function call */
- std r6,STK_PARAM(R6)(r1)
- std r8,STK_PARAM(R8)(r1)
- std r9,STK_PARAM(R9)(r1)
-
- /* Save non-volatile registers.
- * r31 will hold "old PTE"
- * r30 is "new PTE"
- * r29 is vpn
- * r28 is a hash value
- * r27 is hashtab mask (maybe dynamic patched instead ?)
- */
- std r27,STK_REG(R27)(r1)
- std r28,STK_REG(R28)(r1)
- std r29,STK_REG(R29)(r1)
- std r30,STK_REG(R30)(r1)
- std r31,STK_REG(R31)(r1)
-
- /* Step 1:
- *
- * Check permissions, atomically mark the linux PTE busy
- * and hashed.
- */
-1:
- ldarx r31,0,r6
- /* Check access rights (access & ~(pte_val(*ptep))) */
- andc. r0,r4,r31
- bne- ht64_wrong_access
- /* Check if PTE is busy */
- andi. r0,r31,_PAGE_BUSY
- /* If so, just bail out and refault if needed. Someone else
- * is changing this PTE anyway and might hash it.
- */
- bne- ht64_bail_ok
-BEGIN_FTR_SECTION
- /* Check if PTE has the cache-inhibit bit set */
- andi. r0,r31,_PAGE_NO_CACHE
- /* If so, bail out and refault as a 4k page */
- bne- ht64_bail_ok
-END_MMU_FTR_SECTION_IFCLR(MMU_FTR_CI_LARGE_PAGE)
- /* Prepare new PTE value (turn access RW into DIRTY, then
- * add BUSY and ACCESSED)
- */
- rlwinm r30,r4,32-9+7,31-7,31-7 /* _PAGE_RW -> _PAGE_DIRTY */
- or r30,r30,r31
- ori r30,r30,_PAGE_BUSY | _PAGE_ACCESSED
- /* Write the linux PTE atomically (setting busy) */
- stdcx. r30,0,r6
- bne- 1b
- isync
-
- /* Step 2:
- *
- * Insert/Update the HPTE in the hash table. At this point,
- * r4 (access) is re-useable, we use it for the new HPTE flags
- */
-
-BEGIN_FTR_SECTION
- cmpdi r9,0 /* check segment size */
- bne 3f
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
- /* Calc vpn and put it in r29 */
- sldi r29,r5,SID_SHIFT - VPN_SHIFT
- rldicl r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT - VPN_SHIFT)
- or r29,r28,r29
-
- /* Calculate hash value for primary slot and store it in r28
- * r3 = va, r5 = vsid
- * r0 = (va >> 16) & ((1ul << (28 - 16)) -1)
- */
- rldicl r0,r3,64-16,52
- xor r28,r5,r0 /* hash */
- b 4f
-
-3: /* Calc vpn and put it in r29 */
- sldi r29,r5,SID_SHIFT_1T - VPN_SHIFT
- rldicl r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT_1T - VPN_SHIFT)
- or r29,r28,r29
- /*
- * calculate hash value for primary slot and
- * store it in r28 for 1T segment
- * r3 = va, r5 = vsid
- */
- sldi r28,r5,25 /* vsid << 25 */
- /* r0 = (va >> 16) & ((1ul << (40 - 16)) -1) */
- rldicl r0,r3,64-16,40
- xor r28,r28,r5 /* vsid ^ ( vsid << 25) */
- xor r28,r28,r0 /* hash */
-
- /* Convert linux PTE bits into HW equivalents */
-4: andi. r3,r30,0x1fe /* Get basic set of flags */
- xori r3,r3,HPTE_R_N /* _PAGE_EXEC -> NOEXEC */
- rlwinm r0,r30,32-9+1,30,30 /* _PAGE_RW -> _PAGE_USER (r0) */
- rlwinm r4,r30,32-7+1,30,30 /* _PAGE_DIRTY -> _PAGE_USER (r4) */
- and r0,r0,r4 /* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/
- andc r0,r30,r0 /* r0 = pte & ~r0 */
- rlwimi r3,r0,32-1,31,31 /* Insert result into PP lsb */
- /*
- * Always add "C" bit for perf. Memory coherence is always enabled
- */
- ori r3,r3,HPTE_R_C | HPTE_R_M
-
- /* We eventually do the icache sync here (maybe inline that
- * code rather than call a C function...)
- */
-BEGIN_FTR_SECTION
- mr r4,r30
- mr r5,r7
- bl hash_page_do_lazy_icache
-END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)
-
- /* At this point, r3 contains new PP bits, save them in
- * place of "access" in the param area (sic)
- */
- std r3,STK_PARAM(R4)(r1)
-
- /* Get htab_hash_mask */
- ld r4,htab_hash_mask@got(2)
- ld r27,0(r4) /* htab_hash_mask -> r27 */
-
- /* Check if we may already be in the hashtable, in this case, we
- * go to out-of-line code to try to modify the HPTE
- */
- rldicl. r0,r31,64-12,48
- bne ht64_modify_pte
-
-ht64_insert_pte:
- /* Clear hpte bits in new pte (we also clear BUSY btw) and
- * add _PAGE_HPTE_SUB0
- */
- lis r0,_PAGE_HPTEFLAGS@h
- ori r0,r0,_PAGE_HPTEFLAGS@l
- andc r30,r30,r0
-#ifdef CONFIG_PPC_64K_PAGES
- oris r30,r30,_PAGE_HPTE_SUB0@h
-#else
- ori r30,r30,_PAGE_HASHPTE
-#endif
- /* Phyical address in r5 */
- rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
- sldi r5,r5,PAGE_SHIFT
-
- /* Calculate primary group hash */
- and r0,r28,r27
- rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
-
- /* Call ppc_md.hpte_insert */
- ld r6,STK_PARAM(R4)(r1) /* Retrieve new pp bits */
- mr r4,r29 /* Retrieve vpn */
- li r7,0 /* !bolted, !secondary */
- li r8,MMU_PAGE_64K
- li r9,MMU_PAGE_64K /* actual page size */
- ld r10,STK_PARAM(R9)(r1) /* segment size */
-.globl ht64_call_hpte_insert1
-ht64_call_hpte_insert1:
- bl . /* patched by htab_finish_init() */
- cmpdi 0,r3,0
- bge ht64_pte_insert_ok /* Insertion successful */
- cmpdi 0,r3,-2 /* Critical failure */
- beq- ht64_pte_insert_failure
-
- /* Now try secondary slot */
-
- /* Phyical address in r5 */
- rldicl r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
- sldi r5,r5,PAGE_SHIFT
-
- /* Calculate secondary group hash */
- andc r0,r27,r28
- rldicr r3,r0,3,63-3 /* r0 = (~hash & mask) << 3 */
-
- /* Call ppc_md.hpte_insert */
- ld r6,STK_PARAM(R4)(r1) /* Retrieve new pp bits */
- mr r4,r29 /* Retrieve vpn */
- li r7,HPTE_V_SECONDARY /* !bolted, secondary */
- li r8,MMU_PAGE_64K
- li r9,MMU_PAGE_64K /* actual page size */
- ld r10,STK_PARAM(R9)(r1) /* segment size */
-.globl ht64_call_hpte_insert2
-ht64_call_hpte_insert2:
- bl . /* patched by htab_finish_init() */
- cmpdi 0,r3,0
- bge+ ht64_pte_insert_ok /* Insertion successful */
- cmpdi 0,r3,-2 /* Critical failure */
- beq- ht64_pte_insert_failure
-
- /* Both are full, we need to evict something */
- mftb r0
- /* Pick a random group based on TB */
- andi. r0,r0,1
- mr r5,r28
- bne 2f
- not r5,r5
-2: and r0,r5,r27
- rldicr r3,r0,3,63-3 /* r0 = (hash & mask) << 3 */
- /* Call ppc_md.hpte_remove */
-.globl ht64_call_hpte_remove
-ht64_call_hpte_remove:
- bl . /* patched by htab_finish_init() */
-
- /* Try all again */
- b ht64_insert_pte
-
-ht64_bail_ok:
- li r3,0
- b ht64_bail
-
-ht64_pte_insert_ok:
- /* Insert slot number & secondary bit in PTE */
- rldimi r30,r3,12,63-15
-
- /* Write out the PTE with a normal write
- * (maybe add eieio may be good still ?)
- */
-ht64_write_out_pte:
- ld r6,STK_PARAM(R6)(r1)
- std r30,0(r6)
- li r3, 0
-ht64_bail:
- ld r27,STK_REG(R27)(r1)
- ld r28,STK_REG(R28)(r1)
- ld r29,STK_REG(R29)(r1)
- ld r30,STK_REG(R30)(r1)
- ld r31,STK_REG(R31)(r1)
- addi r1,r1,STACKFRAMESIZE
- ld r0,16(r1)
- mtlr r0
- blr
-
-ht64_modify_pte:
- /* Keep PP bits in r4 and slot idx from the PTE around in r3 */
- mr r4,r3
- rlwinm r3,r31,32-12,29,31
-
- /* Secondary group ? if yes, get a inverted hash value */
- mr r5,r28
- andi. r0,r31,_PAGE_F_SECOND
- beq 1f
- not r5,r5
-1:
- /* Calculate proper slot value for ppc_md.hpte_updatepp */
- and r0,r5,r27
- rldicr r0,r0,3,63-3 /* r0 = (hash & mask) << 3 */
- add r3,r0,r3 /* add slot idx */
-
- /* Call ppc_md.hpte_updatepp */
- mr r5,r29 /* vpn */
- li r6,MMU_PAGE_64K /* base page size */
- li r7,MMU_PAGE_64K /* actual page size */
- ld r8,STK_PARAM(R9)(r1) /* segment size */
- ld r9,STK_PARAM(R8)(r1) /* get "local" param */
-.globl ht64_call_hpte_updatepp
-ht64_call_hpte_updatepp:
- bl . /* patched by htab_finish_init() */
-
- /* if we failed because typically the HPTE wasn't really here
- * we try an insertion.
- */
- cmpdi 0,r3,-1
- beq- ht64_insert_pte
-
- /* Clear the BUSY bit and Write out the PTE */
- li r0,_PAGE_BUSY
- andc r30,r30,r0
- b ht64_write_out_pte
-
-ht64_wrong_access:
- /* Bail out clearing reservation */
- stdcx. r31,0,r6
- li r3,1
- b ht64_bail
-
-ht64_pte_insert_failure:
- /* Bail out restoring old PTE */
- ld r6,STK_PARAM(R6)(r1)
- std r31,0(r6)
- li r3,-1
- b ht64_bail
-
-
-#endif /* CONFIG_PPC_HAS_HASH_64K */
-
-
-/*****************************************************************************
- * *
- * Huge pages implementation is in hugetlbpage.c *
- * *
- *****************************************************************************/
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
deleted file mode 100644
index daee7f4e5a14..000000000000
--- a/arch/powerpc/mm/hash_utils_64.c
+++ /dev/null
@@ -1,1465 +0,0 @@
-/*
- * PowerPC64 port by Mike Corrigan and Dave Engebretsen
- * {mikejc|engebret}@us.ibm.com
- *
- * Copyright (c) 2000 Mike Corrigan <mikejc@us.ibm.com>
- *
- * SMP scalability work:
- * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
- *
- * Module name: htab.c
- *
- * Description:
- * PowerPC Hashed Page Table functions
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#undef DEBUG
-#undef DEBUG_LOW
-
-#include <linux/spinlock.h>
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/proc_fs.h>
-#include <linux/stat.h>
-#include <linux/sysctl.h>
-#include <linux/export.h>
-#include <linux/ctype.h>
-#include <linux/cache.h>
-#include <linux/init.h>
-#include <linux/signal.h>
-#include <linux/memblock.h>
-#include <linux/context_tracking.h>
-
-#include <asm/processor.h>
-#include <asm/pgtable.h>
-#include <asm/mmu.h>
-#include <asm/mmu_context.h>
-#include <asm/page.h>
-#include <asm/types.h>
-#include <asm/uaccess.h>
-#include <asm/machdep.h>
-#include <asm/prom.h>
-#include <asm/tlbflush.h>
-#include <asm/io.h>
-#include <asm/eeh.h>
-#include <asm/tlb.h>
-#include <asm/cacheflush.h>
-#include <asm/cputable.h>
-#include <asm/sections.h>
-#include <asm/spu.h>
-#include <asm/udbg.h>
-#include <asm/code-patching.h>
-#include <asm/fadump.h>
-#include <asm/firmware.h>
-#include <asm/tm.h>
-
-#ifdef DEBUG
-#define DBG(fmt...) udbg_printf(fmt)
-#else
-#define DBG(fmt...)
-#endif
-
-#ifdef DEBUG_LOW
-#define DBG_LOW(fmt...) udbg_printf(fmt)
-#else
-#define DBG_LOW(fmt...)
-#endif
-
-#define KB (1024)
-#define MB (1024*KB)
-#define GB (1024L*MB)
-
-/*
- * Note: pte --> Linux PTE
- * HPTE --> PowerPC Hashed Page Table Entry
- *
- * Execution context:
- * htab_initialize is called with the MMU off (of course), but
- * the kernel has been copied down to zero so it can directly
- * reference global data. At this point it is very difficult
- * to print debug info.
- *
- */
-
-#ifdef CONFIG_U3_DART
-extern unsigned long dart_tablebase;
-#endif /* CONFIG_U3_DART */
-
-static unsigned long _SDR1;
-struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
-
-struct hash_pte *htab_address;
-unsigned long htab_size_bytes;
-unsigned long htab_hash_mask;
-EXPORT_SYMBOL_GPL(htab_hash_mask);
-int mmu_linear_psize = MMU_PAGE_4K;
-int mmu_virtual_psize = MMU_PAGE_4K;
-int mmu_vmalloc_psize = MMU_PAGE_4K;
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
-int mmu_vmemmap_psize = MMU_PAGE_4K;
-#endif
-int mmu_io_psize = MMU_PAGE_4K;
-int mmu_kernel_ssize = MMU_SEGSIZE_256M;
-int mmu_highuser_ssize = MMU_SEGSIZE_256M;
-u16 mmu_slb_size = 64;
-EXPORT_SYMBOL_GPL(mmu_slb_size);
-#ifdef CONFIG_PPC_64K_PAGES
-int mmu_ci_restrictions;
-#endif
-#ifdef CONFIG_DEBUG_PAGEALLOC
-static u8 *linear_map_hash_slots;
-static unsigned long linear_map_hash_count;
-static DEFINE_SPINLOCK(linear_map_hash_lock);
-#endif /* CONFIG_DEBUG_PAGEALLOC */
-
-/* There are definitions of page sizes arrays to be used when none
- * is provided by the firmware.
- */
-
-/* Pre-POWER4 CPUs (4k pages only)
- */
-static struct mmu_psize_def mmu_psize_defaults_old[] = {
- [MMU_PAGE_4K] = {
- .shift = 12,
- .sllp = 0,
- .penc = {[MMU_PAGE_4K] = 0, [1 ... MMU_PAGE_COUNT - 1] = -1},
- .avpnm = 0,
- .tlbiel = 0,
- },
-};
-
-/* POWER4, GPUL, POWER5
- *
- * Support for 16Mb large pages
- */
-static struct mmu_psize_def mmu_psize_defaults_gp[] = {
- [MMU_PAGE_4K] = {
- .shift = 12,
- .sllp = 0,
- .penc = {[MMU_PAGE_4K] = 0, [1 ... MMU_PAGE_COUNT - 1] = -1},
- .avpnm = 0,
- .tlbiel = 1,
- },
- [MMU_PAGE_16M] = {
- .shift = 24,
- .sllp = SLB_VSID_L,
- .penc = {[0 ... MMU_PAGE_16M - 1] = -1, [MMU_PAGE_16M] = 0,
- [MMU_PAGE_16M + 1 ... MMU_PAGE_COUNT - 1] = -1 },
- .avpnm = 0x1UL,
- .tlbiel = 0,
- },
-};
-
-static unsigned long htab_convert_pte_flags(unsigned long pteflags)
-{
- unsigned long rflags = pteflags & 0x1fa;
-
- /* _PAGE_EXEC -> NOEXEC */
- if ((pteflags & _PAGE_EXEC) == 0)
- rflags |= HPTE_R_N;
-
- /* PP bits. PAGE_USER is already PP bit 0x2, so we only
- * need to add in 0x1 if it's a read-only user page
- */
- if ((pteflags & _PAGE_USER) && !((pteflags & _PAGE_RW) &&
- (pteflags & _PAGE_DIRTY)))
- rflags |= 1;
- /*
- * Always add "C" bit for perf. Memory coherence is always enabled
- */
- return rflags | HPTE_R_C | HPTE_R_M;
-}
-
-int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
- unsigned long pstart, unsigned long prot,
- int psize, int ssize)
-{
- unsigned long vaddr, paddr;
- unsigned int step, shift;
- int ret = 0;
-
- shift = mmu_psize_defs[psize].shift;
- step = 1 << shift;
-
- prot = htab_convert_pte_flags(prot);
-
- DBG("htab_bolt_mapping(%lx..%lx -> %lx (%lx,%d,%d)\n",
- vstart, vend, pstart, prot, psize, ssize);
-
- for (vaddr = vstart, paddr = pstart; vaddr < vend;
- vaddr += step, paddr += step) {
- unsigned long hash, hpteg;
- unsigned long vsid = get_kernel_vsid(vaddr, ssize);
- unsigned long vpn = hpt_vpn(vaddr, vsid, ssize);
- unsigned long tprot = prot;
-
- /*
- * If we hit a bad address return error.
- */
- if (!vsid)
- return -1;
- /* Make kernel text executable */
- if (overlaps_kernel_text(vaddr, vaddr + step))
- tprot &= ~HPTE_R_N;
-
- /* Make kvm guest trampolines executable */
- if (overlaps_kvm_tmp(vaddr, vaddr + step))
- tprot &= ~HPTE_R_N;
-
- /*
- * If relocatable, check if it overlaps interrupt vectors that
- * are copied down to real 0. For relocatable kernel
- * (e.g. kdump case) we copy interrupt vectors down to real
- * address 0. Mark that region as executable. This is
- * because on p8 system with relocation on exception feature
- * enabled, exceptions are raised with MMU (IR=DR=1) ON. Hence
- * in order to execute the interrupt handlers in virtual
- * mode the vector region need to be marked as executable.
- */
- if ((PHYSICAL_START > MEMORY_START) &&
- overlaps_interrupt_vector_text(vaddr, vaddr + step))
- tprot &= ~HPTE_R_N;
-
- hash = hpt_hash(vpn, shift, ssize);
- hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
-
- BUG_ON(!ppc_md.hpte_insert);
- ret = ppc_md.hpte_insert(hpteg, vpn, paddr, tprot,
- HPTE_V_BOLTED, psize, psize, ssize);
-
- if (ret < 0)
- break;
-#ifdef CONFIG_DEBUG_PAGEALLOC
- if ((paddr >> PAGE_SHIFT) < linear_map_hash_count)
- linear_map_hash_slots[paddr >> PAGE_SHIFT] = ret | 0x80;
-#endif /* CONFIG_DEBUG_PAGEALLOC */
- }
- return ret < 0 ? ret : 0;
-}
-
-#ifdef CONFIG_MEMORY_HOTPLUG
-int htab_remove_mapping(unsigned long vstart, unsigned long vend,
- int psize, int ssize)
-{
- unsigned long vaddr;
- unsigned int step, shift;
-
- shift = mmu_psize_defs[psize].shift;
- step = 1 << shift;
-
- if (!ppc_md.hpte_removebolted) {
- printk(KERN_WARNING "Platform doesn't implement "
- "hpte_removebolted\n");
- return -EINVAL;
- }
-
- for (vaddr = vstart; vaddr < vend; vaddr += step)
- ppc_md.hpte_removebolted(vaddr, psize, ssize);
-
- return 0;
-}
-#endif /* CONFIG_MEMORY_HOTPLUG */
-
-static int __init htab_dt_scan_seg_sizes(unsigned long node,
- const char *uname, int depth,
- void *data)
-{
- const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
- const __be32 *prop;
- int size = 0;
-
- /* We are scanning "cpu" nodes only */
- if (type == NULL || strcmp(type, "cpu") != 0)
- return 0;
-
- prop = of_get_flat_dt_prop(node, "ibm,processor-segment-sizes", &size);
- if (prop == NULL)
- return 0;
- for (; size >= 4; size -= 4, ++prop) {
- if (be32_to_cpu(prop[0]) == 40) {
- DBG("1T segment support detected\n");
- cur_cpu_spec->mmu_features |= MMU_FTR_1T_SEGMENT;
- return 1;
- }
- }
- cur_cpu_spec->mmu_features &= ~MMU_FTR_NO_SLBIE_B;
- return 0;
-}
-
-static void __init htab_init_seg_sizes(void)
-{
- of_scan_flat_dt(htab_dt_scan_seg_sizes, NULL);
-}
-
-static int __init get_idx_from_shift(unsigned int shift)
-{
- int idx = -1;
-
- switch (shift) {
- case 0xc:
- idx = MMU_PAGE_4K;
- break;
- case 0x10:
- idx = MMU_PAGE_64K;
- break;
- case 0x14:
- idx = MMU_PAGE_1M;
- break;
- case 0x18:
- idx = MMU_PAGE_16M;
- break;
- case 0x22:
- idx = MMU_PAGE_16G;
- break;
- }
- return idx;
-}
-
-static int __init htab_dt_scan_page_sizes(unsigned long node,
- const char *uname, int depth,
- void *data)
-{
- const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
- const __be32 *prop;
- int size = 0;
-
- /* We are scanning "cpu" nodes only */
- if (type == NULL || strcmp(type, "cpu") != 0)
- return 0;
-
- prop = of_get_flat_dt_prop(node, "ibm,segment-page-sizes", &size);
- if (prop != NULL) {
- pr_info("Page sizes from device-tree:\n");
- size /= 4;
- cur_cpu_spec->mmu_features &= ~(MMU_FTR_16M_PAGE);
- while(size > 0) {
- unsigned int base_shift = be32_to_cpu(prop[0]);
- unsigned int slbenc = be32_to_cpu(prop[1]);
- unsigned int lpnum = be32_to_cpu(prop[2]);
- struct mmu_psize_def *def;
- int idx, base_idx;
-
- size -= 3; prop += 3;
- base_idx = get_idx_from_shift(base_shift);
- if (base_idx < 0) {
- /*
- * skip the pte encoding also
- */
- prop += lpnum * 2; size -= lpnum * 2;
- continue;
- }
- def = &mmu_psize_defs[base_idx];
- if (base_idx == MMU_PAGE_16M)
- cur_cpu_spec->mmu_features |= MMU_FTR_16M_PAGE;
-
- def->shift = base_shift;
- if (base_shift <= 23)
- def->avpnm = 0;
- else
- def->avpnm = (1 << (base_shift - 23)) - 1;
- def->sllp = slbenc;
- /*
- * We don't know for sure what's up with tlbiel, so
- * for now we only set it for 4K and 64K pages
- */
- if (base_idx == MMU_PAGE_4K || base_idx == MMU_PAGE_64K)
- def->tlbiel = 1;
- else
- def->tlbiel = 0;
-
- while (size > 0 && lpnum) {
- unsigned int shift = be32_to_cpu(prop[0]);
- int penc = be32_to_cpu(prop[1]);
-
- prop += 2; size -= 2;
- lpnum--;
-
- idx = get_idx_from_shift(shift);
- if (idx < 0)
- continue;
-
- if (penc == -1)
- pr_err("Invalid penc for base_shift=%d "
- "shift=%d\n", base_shift, shift);
-
- def->penc[idx] = penc;
- pr_info("base_shift=%d: shift=%d, sllp=0x%04lx,"
- " avpnm=0x%08lx, tlbiel=%d, penc=%d\n",
- base_shift, shift, def->sllp,
- def->avpnm, def->tlbiel, def->penc[idx]);
- }
- }
- return 1;
- }
- return 0;
-}
-
-#ifdef CONFIG_HUGETLB_PAGE
-/* Scan for 16G memory blocks that have been set aside for huge pages
- * and reserve those blocks for 16G huge pages.
- */
-static int __init htab_dt_scan_hugepage_blocks(unsigned long node,
- const char *uname, int depth,
- void *data) {
- const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
- const __be64 *addr_prop;
- const __be32 *page_count_prop;
- unsigned int expected_pages;
- long unsigned int phys_addr;
- long unsigned int block_size;
-
- /* We are scanning "memory" nodes only */
- if (type == NULL || strcmp(type, "memory") != 0)
- return 0;
-
- /* This property is the log base 2 of the number of virtual pages that
- * will represent this memory block. */
- page_count_prop = of_get_flat_dt_prop(node, "ibm,expected#pages", NULL);
- if (page_count_prop == NULL)
- return 0;
- expected_pages = (1 << be32_to_cpu(page_count_prop[0]));
- addr_prop = of_get_flat_dt_prop(node, "reg", NULL);
- if (addr_prop == NULL)
- return 0;
- phys_addr = be64_to_cpu(addr_prop[0]);
- block_size = be64_to_cpu(addr_prop[1]);
- if (block_size != (16 * GB))
- return 0;
- printk(KERN_INFO "Huge page(16GB) memory: "
- "addr = 0x%lX size = 0x%lX pages = %d\n",
- phys_addr, block_size, expected_pages);
- if (phys_addr + (16 * GB) <= memblock_end_of_DRAM()) {
- memblock_reserve(phys_addr, block_size * expected_pages);
- add_gpage(phys_addr, block_size, expected_pages);
- }
- return 0;
-}
-#endif /* CONFIG_HUGETLB_PAGE */
-
-static void mmu_psize_set_default_penc(void)
-{
- int bpsize, apsize;
- for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++)
- for (apsize = 0; apsize < MMU_PAGE_COUNT; apsize++)
- mmu_psize_defs[bpsize].penc[apsize] = -1;
-}
-
-#ifdef CONFIG_PPC_64K_PAGES
-
-static bool might_have_hea(void)
-{
- /*
- * The HEA ethernet adapter requires awareness of the
- * GX bus. Without that awareness we can easily assume
- * we will never see an HEA ethernet device.
- */
-#ifdef CONFIG_IBMEBUS
- return !cpu_has_feature(CPU_FTR_ARCH_207S);
-#else
- return false;
-#endif
-}
-
-#endif /* #ifdef CONFIG_PPC_64K_PAGES */
-
-static void __init htab_init_page_sizes(void)
-{
- int rc;
-
- /* se the invalid penc to -1 */
- mmu_psize_set_default_penc();
-
- /* Default to 4K pages only */
- memcpy(mmu_psize_defs, mmu_psize_defaults_old,
- sizeof(mmu_psize_defaults_old));
-
- /*
- * Try to find the available page sizes in the device-tree
- */
- rc = of_scan_flat_dt(htab_dt_scan_page_sizes, NULL);
- if (rc != 0) /* Found */
- goto found;
-
- /*
- * Not in the device-tree, let's fallback on known size
- * list for 16M capable GP & GR
- */
- if (mmu_has_feature(MMU_FTR_16M_PAGE))
- memcpy(mmu_psize_defs, mmu_psize_defaults_gp,
- sizeof(mmu_psize_defaults_gp));
- found:
-#ifndef CONFIG_DEBUG_PAGEALLOC
- /*
- * Pick a size for the linear mapping. Currently, we only support
- * 16M, 1M and 4K which is the default
- */
- if (mmu_psize_defs[MMU_PAGE_16M].shift)
- mmu_linear_psize = MMU_PAGE_16M;
- else if (mmu_psize_defs[MMU_PAGE_1M].shift)
- mmu_linear_psize = MMU_PAGE_1M;
-#endif /* CONFIG_DEBUG_PAGEALLOC */
-
-#ifdef CONFIG_PPC_64K_PAGES
- /*
- * Pick a size for the ordinary pages. Default is 4K, we support
- * 64K for user mappings and vmalloc if supported by the processor.
- * We only use 64k for ioremap if the processor
- * (and firmware) support cache-inhibited large pages.
- * If not, we use 4k and set mmu_ci_restrictions so that
- * hash_page knows to switch processes that use cache-inhibited
- * mappings to 4k pages.
- */
- if (mmu_psize_defs[MMU_PAGE_64K].shift) {
- mmu_virtual_psize = MMU_PAGE_64K;
- mmu_vmalloc_psize = MMU_PAGE_64K;
- if (mmu_linear_psize == MMU_PAGE_4K)
- mmu_linear_psize = MMU_PAGE_64K;
- if (mmu_has_feature(MMU_FTR_CI_LARGE_PAGE)) {
- /*
- * When running on pSeries using 64k pages for ioremap
- * would stop us accessing the HEA ethernet. So if we
- * have the chance of ever seeing one, stay at 4k.
- */
- if (!might_have_hea() || !machine_is(pseries))
- mmu_io_psize = MMU_PAGE_64K;
- } else
- mmu_ci_restrictions = 1;
- }
-#endif /* CONFIG_PPC_64K_PAGES */
-
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
- /* We try to use 16M pages for vmemmap if that is supported
- * and we have at least 1G of RAM at boot
- */
- if (mmu_psize_defs[MMU_PAGE_16M].shift &&
- memblock_phys_mem_size() >= 0x40000000)
- mmu_vmemmap_psize = MMU_PAGE_16M;
- else if (mmu_psize_defs[MMU_PAGE_64K].shift)
- mmu_vmemmap_psize = MMU_PAGE_64K;
- else
- mmu_vmemmap_psize = MMU_PAGE_4K;
-#endif /* CONFIG_SPARSEMEM_VMEMMAP */
-
- printk(KERN_DEBUG "Page orders: linear mapping = %d, "
- "virtual = %d, io = %d"
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
- ", vmemmap = %d"
-#endif
- "\n",
- mmu_psize_defs[mmu_linear_psize].shift,
- mmu_psize_defs[mmu_virtual_psize].shift,
- mmu_psize_defs[mmu_io_psize].shift
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
- ,mmu_psize_defs[mmu_vmemmap_psize].shift
-#endif
- );
-
-#ifdef CONFIG_HUGETLB_PAGE
- /* Reserve 16G huge page memory sections for huge pages */
- of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL);
-#endif /* CONFIG_HUGETLB_PAGE */
-}
-
-static int __init htab_dt_scan_pftsize(unsigned long node,
- const char *uname, int depth,
- void *data)
-{
- const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
- const __be32 *prop;
-
- /* We are scanning "cpu" nodes only */
- if (type == NULL || strcmp(type, "cpu") != 0)
- return 0;
-
- prop = of_get_flat_dt_prop(node, "ibm,pft-size", NULL);
- if (prop != NULL) {
- /* pft_size[0] is the NUMA CEC cookie */
- ppc64_pft_size = be32_to_cpu(prop[1]);
- return 1;
- }
- return 0;
-}
-
-static unsigned long __init htab_get_table_size(void)
-{
- unsigned long mem_size, rnd_mem_size, pteg_count, psize;
-
- /* If hash size isn't already provided by the platform, we try to
- * retrieve it from the device-tree. If it's not there neither, we
- * calculate it now based on the total RAM size
- */
- if (ppc64_pft_size == 0)
- of_scan_flat_dt(htab_dt_scan_pftsize, NULL);
- if (ppc64_pft_size)
- return 1UL << ppc64_pft_size;
-
- /* round mem_size up to next power of 2 */
- mem_size = memblock_phys_mem_size();
- rnd_mem_size = 1UL << __ilog2(mem_size);
- if (rnd_mem_size < mem_size)
- rnd_mem_size <<= 1;
-
- /* # pages / 2 */
- psize = mmu_psize_defs[mmu_virtual_psize].shift;
- pteg_count = max(rnd_mem_size >> (psize + 1), 1UL << 11);
-
- return pteg_count << 7;
-}
-
-#ifdef CONFIG_MEMORY_HOTPLUG
-int create_section_mapping(unsigned long start, unsigned long end)
-{
- return htab_bolt_mapping(start, end, __pa(start),
- pgprot_val(PAGE_KERNEL), mmu_linear_psize,
- mmu_kernel_ssize);
-}
-
-int remove_section_mapping(unsigned long start, unsigned long end)
-{
- return htab_remove_mapping(start, end, mmu_linear_psize,
- mmu_kernel_ssize);
-}
-#endif /* CONFIG_MEMORY_HOTPLUG */
-
-extern u32 htab_call_hpte_insert1[];
-extern u32 htab_call_hpte_insert2[];
-extern u32 htab_call_hpte_remove[];
-extern u32 htab_call_hpte_updatepp[];
-extern u32 ht64_call_hpte_insert1[];
-extern u32 ht64_call_hpte_insert2[];
-extern u32 ht64_call_hpte_remove[];
-extern u32 ht64_call_hpte_updatepp[];
-
-static void __init htab_finish_init(void)
-{
-#ifdef CONFIG_PPC_HAS_HASH_64K
- patch_branch(ht64_call_hpte_insert1,
- ppc_function_entry(ppc_md.hpte_insert),
- BRANCH_SET_LINK);
- patch_branch(ht64_call_hpte_insert2,
- ppc_function_entry(ppc_md.hpte_insert),
- BRANCH_SET_LINK);
- patch_branch(ht64_call_hpte_remove,
- ppc_function_entry(ppc_md.hpte_remove),
- BRANCH_SET_LINK);
- patch_branch(ht64_call_hpte_updatepp,
- ppc_function_entry(ppc_md.hpte_updatepp),
- BRANCH_SET_LINK);
-#endif /* CONFIG_PPC_HAS_HASH_64K */
-
- patch_branch(htab_call_hpte_insert1,
- ppc_function_entry(ppc_md.hpte_insert),
- BRANCH_SET_LINK);
- patch_branch(htab_call_hpte_insert2,
- ppc_function_entry(ppc_md.hpte_insert),
- BRANCH_SET_LINK);
- patch_branch(htab_call_hpte_remove,
- ppc_function_entry(ppc_md.hpte_remove),
- BRANCH_SET_LINK);
- patch_branch(htab_call_hpte_updatepp,
- ppc_function_entry(ppc_md.hpte_updatepp),
- BRANCH_SET_LINK);
-}
-
-static void __init htab_initialize(void)
-{
- unsigned long table;
- unsigned long pteg_count;
- unsigned long prot;
- unsigned long base = 0, size = 0, limit;
- struct memblock_region *reg;
-
- DBG(" -> htab_initialize()\n");
-
- /* Initialize segment sizes */
- htab_init_seg_sizes();
-
- /* Initialize page sizes */
- htab_init_page_sizes();
-
- if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) {
- mmu_kernel_ssize = MMU_SEGSIZE_1T;
- mmu_highuser_ssize = MMU_SEGSIZE_1T;
- printk(KERN_INFO "Using 1TB segments\n");
- }
-
- /*
- * Calculate the required size of the htab. We want the number of
- * PTEGs to equal one half the number of real pages.
- */
- htab_size_bytes = htab_get_table_size();
- pteg_count = htab_size_bytes >> 7;
-
- htab_hash_mask = pteg_count - 1;
-
- if (firmware_has_feature(FW_FEATURE_LPAR)) {
- /* Using a hypervisor which owns the htab */
- htab_address = NULL;
- _SDR1 = 0;
-#ifdef CONFIG_FA_DUMP
- /*
- * If firmware assisted dump is active firmware preserves
- * the contents of htab along with entire partition memory.
- * Clear the htab if firmware assisted dump is active so
- * that we dont end up using old mappings.
- */
- if (is_fadump_active() && ppc_md.hpte_clear_all)
- ppc_md.hpte_clear_all();
-#endif
- } else {
- /* Find storage for the HPT. Must be contiguous in
- * the absolute address space. On cell we want it to be
- * in the first 2 Gig so we can use it for IOMMU hacks.
- */
- if (machine_is(cell))
- limit = 0x80000000;
- else
- limit = MEMBLOCK_ALLOC_ANYWHERE;
-
- table = memblock_alloc_base(htab_size_bytes, htab_size_bytes, limit);
-
- DBG("Hash table allocated at %lx, size: %lx\n", table,
- htab_size_bytes);
-
- htab_address = __va(table);
-
- /* htab absolute addr + encoded htabsize */
- _SDR1 = table + __ilog2(pteg_count) - 11;
-
- /* Initialize the HPT with no entries */
- memset((void *)table, 0, htab_size_bytes);
-
- /* Set SDR1 */
- mtspr(SPRN_SDR1, _SDR1);
- }
-
- prot = pgprot_val(PAGE_KERNEL);
-
-#ifdef CONFIG_DEBUG_PAGEALLOC
- linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT;
- linear_map_hash_slots = __va(memblock_alloc_base(linear_map_hash_count,
- 1, ppc64_rma_size));
- memset(linear_map_hash_slots, 0, linear_map_hash_count);
-#endif /* CONFIG_DEBUG_PAGEALLOC */
-
- /* On U3 based machines, we need to reserve the DART area and
- * _NOT_ map it to avoid cache paradoxes as it's remapped non
- * cacheable later on
- */
-
- /* create bolted the linear mapping in the hash table */
- for_each_memblock(memory, reg) {
- base = (unsigned long)__va(reg->base);
- size = reg->size;
-
- DBG("creating mapping for region: %lx..%lx (prot: %lx)\n",
- base, size, prot);
-
-#ifdef CONFIG_U3_DART
- /* Do not map the DART space. Fortunately, it will be aligned
- * in such a way that it will not cross two memblock regions and
- * will fit within a single 16Mb page.
- * The DART space is assumed to be a full 16Mb region even if
- * we only use 2Mb of that space. We will use more of it later
- * for AGP GART. We have to use a full 16Mb large page.
- */
- DBG("DART base: %lx\n", dart_tablebase);
-
- if (dart_tablebase != 0 && dart_tablebase >= base
- && dart_tablebase < (base + size)) {
- unsigned long dart_table_end = dart_tablebase + 16 * MB;
- if (base != dart_tablebase)
- BUG_ON(htab_bolt_mapping(base, dart_tablebase,
- __pa(base), prot,
- mmu_linear_psize,
- mmu_kernel_ssize));
- if ((base + size) > dart_table_end)
- BUG_ON(htab_bolt_mapping(dart_tablebase+16*MB,
- base + size,
- __pa(dart_table_end),
- prot,
- mmu_linear_psize,
- mmu_kernel_ssize));
- continue;
- }
-#endif /* CONFIG_U3_DART */
- BUG_ON(htab_bolt_mapping(base, base + size, __pa(base),
- prot, mmu_linear_psize, mmu_kernel_ssize));
- }
- memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
-
- /*
- * If we have a memory_limit and we've allocated TCEs then we need to
- * explicitly map the TCE area at the top of RAM. We also cope with the
- * case that the TCEs start below memory_limit.
- * tce_alloc_start/end are 16MB aligned so the mapping should work
- * for either 4K or 16MB pages.
- */
- if (tce_alloc_start) {
- tce_alloc_start = (unsigned long)__va(tce_alloc_start);
- tce_alloc_end = (unsigned long)__va(tce_alloc_end);
-
- if (base + size >= tce_alloc_start)
- tce_alloc_start = base + size + 1;
-
- BUG_ON(htab_bolt_mapping(tce_alloc_start, tce_alloc_end,
- __pa(tce_alloc_start), prot,
- mmu_linear_psize, mmu_kernel_ssize));
- }
-
- htab_finish_init();
-
- DBG(" <- htab_initialize()\n");
-}
-#undef KB
-#undef MB
-
-void __init early_init_mmu(void)
-{
- /* Initialize the MMU Hash table and create the linear mapping
- * of memory. Has to be done before SLB initialization as this is
- * currently where the page size encoding is obtained.
- */
- htab_initialize();
-
- /* Initialize SLB management */
- slb_initialize();
-}
-
-#ifdef CONFIG_SMP
-void early_init_mmu_secondary(void)
-{
- /* Initialize hash table for that CPU */
- if (!firmware_has_feature(FW_FEATURE_LPAR))
- mtspr(SPRN_SDR1, _SDR1);
-
- /* Initialize SLB */
- slb_initialize();
-}
-#endif /* CONFIG_SMP */
-
-/*
- * Called by asm hashtable.S for doing lazy icache flush
- */
-unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap)
-{
- struct page *page;
-
- if (!pfn_valid(pte_pfn(pte)))
- return pp;
-
- page = pte_page(pte);
-
- /* page is dirty */
- if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) {
- if (trap == 0x400) {
- flush_dcache_icache_page(page);
- set_bit(PG_arch_1, &page->flags);
- } else
- pp |= HPTE_R_N;
- }
- return pp;
-}
-
-#ifdef CONFIG_PPC_MM_SLICES
-unsigned int get_paca_psize(unsigned long addr)
-{
- u64 lpsizes;
- unsigned char *hpsizes;
- unsigned long index, mask_index;
-
- if (addr < SLICE_LOW_TOP) {
- lpsizes = get_paca()->context.low_slices_psize;
- index = GET_LOW_SLICE_INDEX(addr);
- return (lpsizes >> (index * 4)) & 0xF;
- }
- hpsizes = get_paca()->context.high_slices_psize;
- index = GET_HIGH_SLICE_INDEX(addr);
- mask_index = index & 0x1;
- return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xF;
-}
-
-#else
-unsigned int get_paca_psize(unsigned long addr)
-{
- return get_paca()->context.user_psize;
-}
-#endif
-
-/*
- * Demote a segment to using 4k pages.
- * For now this makes the whole process use 4k pages.
- */
-#ifdef CONFIG_PPC_64K_PAGES
-void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
-{
- if (get_slice_psize(mm, addr) == MMU_PAGE_4K)
- return;
- slice_set_range_psize(mm, addr, 1, MMU_PAGE_4K);
-#ifdef CONFIG_SPU_BASE
- spu_flush_all_slbs(mm);
-#endif
- if (get_paca_psize(addr) != MMU_PAGE_4K) {
- get_paca()->context = mm->context;
- slb_flush_and_rebolt();
- }
-}
-#endif /* CONFIG_PPC_64K_PAGES */
-
-#ifdef CONFIG_PPC_SUBPAGE_PROT
-/*
- * This looks up a 2-bit protection code for a 4k subpage of a 64k page.
- * Userspace sets the subpage permissions using the subpage_prot system call.
- *
- * Result is 0: full permissions, _PAGE_RW: read-only,
- * _PAGE_USER or _PAGE_USER|_PAGE_RW: no access.
- */
-static int subpage_protection(struct mm_struct *mm, unsigned long ea)
-{
- struct subpage_prot_table *spt = &mm->context.spt;
- u32 spp = 0;
- u32 **sbpm, *sbpp;
-
- if (ea >= spt->maxaddr)
- return 0;
- if (ea < 0x100000000UL) {
- /* addresses below 4GB use spt->low_prot */
- sbpm = spt->low_prot;
- } else {
- sbpm = spt->protptrs[ea >> SBP_L3_SHIFT];
- if (!sbpm)
- return 0;
- }
- sbpp = sbpm[(ea >> SBP_L2_SHIFT) & (SBP_L2_COUNT - 1)];
- if (!sbpp)
- return 0;
- spp = sbpp[(ea >> PAGE_SHIFT) & (SBP_L1_COUNT - 1)];
-
- /* extract 2-bit bitfield for this 4k subpage */
- spp >>= 30 - 2 * ((ea >> 12) & 0xf);
-
- /* turn 0,1,2,3 into combination of _PAGE_USER and _PAGE_RW */
- spp = ((spp & 2) ? _PAGE_USER : 0) | ((spp & 1) ? _PAGE_RW : 0);
- return spp;
-}
-
-#else /* CONFIG_PPC_SUBPAGE_PROT */
-static inline int subpage_protection(struct mm_struct *mm, unsigned long ea)
-{
- return 0;
-}
-#endif
-
-void hash_failure_debug(unsigned long ea, unsigned long access,
- unsigned long vsid, unsigned long trap,
- int ssize, int psize, int lpsize, unsigned long pte)
-{
- if (!printk_ratelimit())
- return;
- pr_info("mm: Hashing failure ! EA=0x%lx access=0x%lx current=%s\n",
- ea, access, current->comm);
- pr_info(" trap=0x%lx vsid=0x%lx ssize=%d base psize=%d psize %d pte=0x%lx\n",
- trap, vsid, ssize, psize, lpsize, pte);
-}
-
-static void check_paca_psize(unsigned long ea, struct mm_struct *mm,
- int psize, bool user_region)
-{
- if (user_region) {
- if (psize != get_paca_psize(ea)) {
- get_paca()->context = mm->context;
- slb_flush_and_rebolt();
- }
- } else if (get_paca()->vmalloc_sllp !=
- mmu_psize_defs[mmu_vmalloc_psize].sllp) {
- get_paca()->vmalloc_sllp =
- mmu_psize_defs[mmu_vmalloc_psize].sllp;
- slb_vmalloc_update();
- }
-}
-
-/* Result code is:
- * 0 - handled
- * 1 - normal page fault
- * -1 - critical hash insertion error
- * -2 - access not permitted by subpage protection mechanism
- */
-int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
-{
- enum ctx_state prev_state = exception_enter();
- pgd_t *pgdir;
- unsigned long vsid;
- struct mm_struct *mm;
- pte_t *ptep;
- unsigned hugeshift;
- const struct cpumask *tmp;
- int rc, user_region = 0, local = 0;
- int psize, ssize;
-
- DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n",
- ea, access, trap);
-
- /* Get region & vsid */
- switch (REGION_ID(ea)) {
- case USER_REGION_ID:
- user_region = 1;
- mm = current->mm;
- if (! mm) {
- DBG_LOW(" user region with no mm !\n");
- rc = 1;
- goto bail;
- }
- psize = get_slice_psize(mm, ea);
- ssize = user_segment_size(ea);
- vsid = get_vsid(mm->context.id, ea, ssize);
- break;
- case VMALLOC_REGION_ID:
- mm = &init_mm;
- vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
- if (ea < VMALLOC_END)
- psize = mmu_vmalloc_psize;
- else
- psize = mmu_io_psize;
- ssize = mmu_kernel_ssize;
- break;
- default:
- /* Not a valid range
- * Send the problem up to do_page_fault
- */
- rc = 1;
- goto bail;
- }
- DBG_LOW(" mm=%p, mm->pgdir=%p, vsid=%016lx\n", mm, mm->pgd, vsid);
-
- /* Bad address. */
- if (!vsid) {
- DBG_LOW("Bad address!\n");
- rc = 1;
- goto bail;
- }
- /* Get pgdir */
- pgdir = mm->pgd;
- if (pgdir == NULL) {
- rc = 1;
- goto bail;
- }
-
- /* Check CPU locality */
- tmp = cpumask_of(smp_processor_id());
- if (user_region && cpumask_equal(mm_cpumask(mm), tmp))
- local = 1;
-
-#ifndef CONFIG_PPC_64K_PAGES
- /* If we use 4K pages and our psize is not 4K, then we might
- * be hitting a special driver mapping, and need to align the
- * address before we fetch the PTE.
- *
- * It could also be a hugepage mapping, in which case this is
- * not necessary, but it's not harmful, either.
- */
- if (psize != MMU_PAGE_4K)
- ea &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
-#endif /* CONFIG_PPC_64K_PAGES */
-
- /* Get PTE and page size from page tables */
- ptep = find_linux_pte_or_hugepte(pgdir, ea, &hugeshift);
- if (ptep == NULL || !pte_present(*ptep)) {
- DBG_LOW(" no PTE !\n");
- rc = 1;
- goto bail;
- }
-
- /* Add _PAGE_PRESENT to the required access perm */
- access |= _PAGE_PRESENT;
-
- /* Pre-check access permissions (will be re-checked atomically
- * in __hash_page_XX but this pre-check is a fast path
- */
- if (access & ~pte_val(*ptep)) {
- DBG_LOW(" no access !\n");
- rc = 1;
- goto bail;
- }
-
- if (hugeshift) {
- if (pmd_trans_huge(*(pmd_t *)ptep))
- rc = __hash_page_thp(ea, access, vsid, (pmd_t *)ptep,
- trap, local, ssize, psize);
-#ifdef CONFIG_HUGETLB_PAGE
- else
- rc = __hash_page_huge(ea, access, vsid, ptep, trap,
- local, ssize, hugeshift, psize);
-#else
- else {
- /*
- * if we have hugeshift, and is not transhuge with
- * hugetlb disabled, something is really wrong.
- */
- rc = 1;
- WARN_ON(1);
- }
-#endif
- check_paca_psize(ea, mm, psize, user_region);
-
- goto bail;
- }
-
-#ifndef CONFIG_PPC_64K_PAGES
- DBG_LOW(" i-pte: %016lx\n", pte_val(*ptep));
-#else
- DBG_LOW(" i-pte: %016lx %016lx\n", pte_val(*ptep),
- pte_val(*(ptep + PTRS_PER_PTE)));
-#endif
- /* Do actual hashing */
-#ifdef CONFIG_PPC_64K_PAGES
- /* If _PAGE_4K_PFN is set, make sure this is a 4k segment */
- if ((pte_val(*ptep) & _PAGE_4K_PFN) && psize == MMU_PAGE_64K) {
- demote_segment_4k(mm, ea);
- psize = MMU_PAGE_4K;
- }
-
- /* If this PTE is non-cacheable and we have restrictions on
- * using non cacheable large pages, then we switch to 4k
- */
- if (mmu_ci_restrictions && psize == MMU_PAGE_64K &&
- (pte_val(*ptep) & _PAGE_NO_CACHE)) {
- if (user_region) {
- demote_segment_4k(mm, ea);
- psize = MMU_PAGE_4K;
- } else if (ea < VMALLOC_END) {
- /*
- * some driver did a non-cacheable mapping
- * in vmalloc space, so switch vmalloc
- * to 4k pages
- */
- printk(KERN_ALERT "Reducing vmalloc segment "
- "to 4kB pages because of "
- "non-cacheable mapping\n");
- psize = mmu_vmalloc_psize = MMU_PAGE_4K;
-#ifdef CONFIG_SPU_BASE
- spu_flush_all_slbs(mm);
-#endif
- }
- }
-
- check_paca_psize(ea, mm, psize, user_region);
-#endif /* CONFIG_PPC_64K_PAGES */
-
-#ifdef CONFIG_PPC_HAS_HASH_64K
- if (psize == MMU_PAGE_64K)
- rc = __hash_page_64K(ea, access, vsid, ptep, trap, local, ssize);
- else
-#endif /* CONFIG_PPC_HAS_HASH_64K */
- {
- int spp = subpage_protection(mm, ea);
- if (access & spp)
- rc = -2;
- else
- rc = __hash_page_4K(ea, access, vsid, ptep, trap,
- local, ssize, spp);
- }
-
- /* Dump some info in case of hash insertion failure, they should
- * never happen so it is really useful to know if/when they do
- */
- if (rc == -1)
- hash_failure_debug(ea, access, vsid, trap, ssize, psize,
- psize, pte_val(*ptep));
-#ifndef CONFIG_PPC_64K_PAGES
- DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep));
-#else
- DBG_LOW(" o-pte: %016lx %016lx\n", pte_val(*ptep),
- pte_val(*(ptep + PTRS_PER_PTE)));
-#endif
- DBG_LOW(" -> rc=%d\n", rc);
-
-bail:
- exception_exit(prev_state);
- return rc;
-}
-EXPORT_SYMBOL_GPL(hash_page);
-
-void hash_preload(struct mm_struct *mm, unsigned long ea,
- unsigned long access, unsigned long trap)
-{
- int hugepage_shift;
- unsigned long vsid;
- pgd_t *pgdir;
- pte_t *ptep;
- unsigned long flags;
- int rc, ssize, local = 0;
-
- BUG_ON(REGION_ID(ea) != USER_REGION_ID);
-
-#ifdef CONFIG_PPC_MM_SLICES
- /* We only prefault standard pages for now */
- if (unlikely(get_slice_psize(mm, ea) != mm->context.user_psize))
- return;
-#endif
-
- DBG_LOW("hash_preload(mm=%p, mm->pgdir=%p, ea=%016lx, access=%lx,"
- " trap=%lx\n", mm, mm->pgd, ea, access, trap);
-
- /* Get Linux PTE if available */
- pgdir = mm->pgd;
- if (pgdir == NULL)
- return;
-
- /* Get VSID */
- ssize = user_segment_size(ea);
- vsid = get_vsid(mm->context.id, ea, ssize);
- if (!vsid)
- return;
- /*
- * Hash doesn't like irqs. Walking linux page table with irq disabled
- * saves us from holding multiple locks.
- */
- local_irq_save(flags);
-
- /*
- * THP pages use update_mmu_cache_pmd. We don't do
- * hash preload there. Hence can ignore THP here
- */
- ptep = find_linux_pte_or_hugepte(pgdir, ea, &hugepage_shift);
- if (!ptep)
- goto out_exit;
-
- WARN_ON(hugepage_shift);
-#ifdef CONFIG_PPC_64K_PAGES
- /* If either _PAGE_4K_PFN or _PAGE_NO_CACHE is set (and we are on
- * a 64K kernel), then we don't preload, hash_page() will take
- * care of it once we actually try to access the page.
- * That way we don't have to duplicate all of the logic for segment
- * page size demotion here
- */
- if (pte_val(*ptep) & (_PAGE_4K_PFN | _PAGE_NO_CACHE))
- goto out_exit;
-#endif /* CONFIG_PPC_64K_PAGES */
-
- /* Is that local to this CPU ? */
- if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
- local = 1;
-
- /* Hash it in */
-#ifdef CONFIG_PPC_HAS_HASH_64K
- if (mm->context.user_psize == MMU_PAGE_64K)
- rc = __hash_page_64K(ea, access, vsid, ptep, trap, local, ssize);
- else
-#endif /* CONFIG_PPC_HAS_HASH_64K */
- rc = __hash_page_4K(ea, access, vsid, ptep, trap, local, ssize,
- subpage_protection(mm, ea));
-
- /* Dump some info in case of hash insertion failure, they should
- * never happen so it is really useful to know if/when they do
- */
- if (rc == -1)
- hash_failure_debug(ea, access, vsid, trap, ssize,
- mm->context.user_psize,
- mm->context.user_psize,
- pte_val(*ptep));
-out_exit:
- local_irq_restore(flags);
-}
-
-/* WARNING: This is called from hash_low_64.S, if you change this prototype,
- * do not forget to update the assembly call site !
- */
-void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize,
- int local)
-{
- unsigned long hash, index, shift, hidx, slot;
-
- DBG_LOW("flush_hash_page(vpn=%016lx)\n", vpn);
- pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
- hash = hpt_hash(vpn, shift, ssize);
- hidx = __rpte_to_hidx(pte, index);
- if (hidx & _PTEIDX_SECONDARY)
- hash = ~hash;
- slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
- slot += hidx & _PTEIDX_GROUP_IX;
- DBG_LOW(" sub %ld: hash=%lx, hidx=%lx\n", index, slot, hidx);
- /*
- * We use same base page size and actual psize, because we don't
- * use these functions for hugepage
- */
- ppc_md.hpte_invalidate(slot, vpn, psize, psize, ssize, local);
- } pte_iterate_hashed_end();
-
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- /* Transactions are not aborted by tlbiel, only tlbie.
- * Without, syncing a page back to a block device w/ PIO could pick up
- * transactional data (bad!) so we force an abort here. Before the
- * sync the page will be made read-only, which will flush_hash_page.
- * BIG ISSUE here: if the kernel uses a page from userspace without
- * unmapping it first, it may see the speculated version.
- */
- if (local && cpu_has_feature(CPU_FTR_TM) &&
- current->thread.regs &&
- MSR_TM_ACTIVE(current->thread.regs->msr)) {
- tm_enable();
- tm_abort(TM_CAUSE_TLBI);
- }
-#endif
-}
-
-void flush_hash_range(unsigned long number, int local)
-{
- if (ppc_md.flush_hash_range)
- ppc_md.flush_hash_range(number, local);
- else {
- int i;
- struct ppc64_tlb_batch *batch =
- &__get_cpu_var(ppc64_tlb_batch);
-
- for (i = 0; i < number; i++)
- flush_hash_page(batch->vpn[i], batch->pte[i],
- batch->psize, batch->ssize, local);
- }
-}
-
-/*
- * low_hash_fault is called when we the low level hash code failed
- * to instert a PTE due to an hypervisor error
- */
-void low_hash_fault(struct pt_regs *regs, unsigned long address, int rc)
-{
- enum ctx_state prev_state = exception_enter();
-
- if (user_mode(regs)) {
-#ifdef CONFIG_PPC_SUBPAGE_PROT
- if (rc == -2)
- _exception(SIGSEGV, regs, SEGV_ACCERR, address);
- else
-#endif
- _exception(SIGBUS, regs, BUS_ADRERR, address);
- } else
- bad_page_fault(regs, address, SIGBUS);
-
- exception_exit(prev_state);
-}
-
-long hpte_insert_repeating(unsigned long hash, unsigned long vpn,
- unsigned long pa, unsigned long rflags,
- unsigned long vflags, int psize, int ssize)
-{
- unsigned long hpte_group;
- long slot;
-
-repeat:
- hpte_group = ((hash & htab_hash_mask) *
- HPTES_PER_GROUP) & ~0x7UL;
-
- /* Insert into the hash table, primary slot */
- slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, vflags,
- psize, psize, ssize);
-
- /* Primary is full, try the secondary */
- if (unlikely(slot == -1)) {
- hpte_group = ((~hash & htab_hash_mask) *
- HPTES_PER_GROUP) & ~0x7UL;
- slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags,
- vflags | HPTE_V_SECONDARY,
- psize, psize, ssize);
- if (slot == -1) {
- if (mftb() & 0x1)
- hpte_group = ((hash & htab_hash_mask) *
- HPTES_PER_GROUP)&~0x7UL;
-
- ppc_md.hpte_remove(hpte_group);
- goto repeat;
- }
- }
-
- return slot;
-}
-
-#ifdef CONFIG_DEBUG_PAGEALLOC
-static void kernel_map_linear_page(unsigned long vaddr, unsigned long lmi)
-{
- unsigned long hash;
- unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize);
- unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize);
- unsigned long mode = htab_convert_pte_flags(PAGE_KERNEL);
- long ret;
-
- hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize);
-
- /* Don't create HPTE entries for bad address */
- if (!vsid)
- return;
-
- ret = hpte_insert_repeating(hash, vpn, __pa(vaddr), mode,
- HPTE_V_BOLTED,
- mmu_linear_psize, mmu_kernel_ssize);
-
- BUG_ON (ret < 0);
- spin_lock(&linear_map_hash_lock);
- BUG_ON(linear_map_hash_slots[lmi] & 0x80);
- linear_map_hash_slots[lmi] = ret | 0x80;
- spin_unlock(&linear_map_hash_lock);
-}
-
-static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi)
-{
- unsigned long hash, hidx, slot;
- unsigned long vsid = get_kernel_vsid(vaddr, mmu_kernel_ssize);
- unsigned long vpn = hpt_vpn(vaddr, vsid, mmu_kernel_ssize);
-
- hash = hpt_hash(vpn, PAGE_SHIFT, mmu_kernel_ssize);
- spin_lock(&linear_map_hash_lock);
- BUG_ON(!(linear_map_hash_slots[lmi] & 0x80));
- hidx = linear_map_hash_slots[lmi] & 0x7f;
- linear_map_hash_slots[lmi] = 0;
- spin_unlock(&linear_map_hash_lock);
- if (hidx & _PTEIDX_SECONDARY)
- hash = ~hash;
- slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
- slot += hidx & _PTEIDX_GROUP_IX;
- ppc_md.hpte_invalidate(slot, vpn, mmu_linear_psize, mmu_linear_psize,
- mmu_kernel_ssize, 0);
-}
-
-void kernel_map_pages(struct page *page, int numpages, int enable)
-{
- unsigned long flags, vaddr, lmi;
- int i;
-
- local_irq_save(flags);
- for (i = 0; i < numpages; i++, page++) {
- vaddr = (unsigned long)page_address(page);
- lmi = __pa(vaddr) >> PAGE_SHIFT;
- if (lmi >= linear_map_hash_count)
- continue;
- if (enable)
- kernel_map_linear_page(vaddr, lmi);
- else
- kernel_unmap_linear_page(vaddr, lmi);
- }
- local_irq_restore(flags);
-}
-#endif /* CONFIG_DEBUG_PAGEALLOC */
-
-void setup_initial_memory_limit(phys_addr_t first_memblock_base,
- phys_addr_t first_memblock_size)
-{
- /* We don't currently support the first MEMBLOCK not mapping 0
- * physical on those processors
- */
- BUG_ON(first_memblock_base != 0);
-
- /* On LPAR systems, the first entry is our RMA region,
- * non-LPAR 64-bit hash MMU systems don't have a limitation
- * on real mode access, but using the first entry works well
- * enough. We also clamp it to 1G to avoid some funky things
- * such as RTAS bugs etc...
- */
- ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000);
-
- /* Finally limit subsequent allocations */
- memblock_set_current_limit(ppc64_rma_size);
-}
diff --git a/arch/powerpc/mm/highmem.c b/arch/powerpc/mm/highmem.c
deleted file mode 100644
index e7450bdbe83a..000000000000
--- a/arch/powerpc/mm/highmem.c
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * highmem.c: virtual kernel memory mappings for high memory
- *
- * PowerPC version, stolen from the i386 version.
- *
- * Used in CONFIG_HIGHMEM systems for memory pages which
- * are not addressable by direct kernel virtual addresses.
- *
- * Copyright (C) 1999 Gerhard Wichert, Siemens AG
- * Gerhard.Wichert@pdb.siemens.de
- *
- *
- * Redesigned the x86 32-bit VM architecture to deal with
- * up to 16 Terrabyte physical memory. With current x86 CPUs
- * we now support up to 64 Gigabytes physical RAM.
- *
- * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
- *
- * Reworked for PowerPC by various contributors. Moved from
- * highmem.h by Benjamin Herrenschmidt (c) 2009 IBM Corp.
- */
-
-#include <linux/highmem.h>
-#include <linux/module.h>
-
-/*
- * The use of kmap_atomic/kunmap_atomic is discouraged - kmap/kunmap
- * gives a more generic (and caching) interface. But kmap_atomic can
- * be used in IRQ contexts, so in some (very limited) cases we need
- * it.
- */
-void *kmap_atomic_prot(struct page *page, pgprot_t prot)
-{
- unsigned long vaddr;
- int idx, type;
-
- /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
- pagefault_disable();
- if (!PageHighMem(page))
- return page_address(page);
-
- type = kmap_atomic_idx_push();
- idx = type + KM_TYPE_NR*smp_processor_id();
- vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
-#ifdef CONFIG_DEBUG_HIGHMEM
- BUG_ON(!pte_none(*(kmap_pte-idx)));
-#endif
- __set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot), 1);
- local_flush_tlb_page(NULL, vaddr);
-
- return (void*) vaddr;
-}
-EXPORT_SYMBOL(kmap_atomic_prot);
-
-void __kunmap_atomic(void *kvaddr)
-{
- unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
- int type;
-
- if (vaddr < __fix_to_virt(FIX_KMAP_END)) {
- pagefault_enable();
- return;
- }
-
- type = kmap_atomic_idx();
-
-#ifdef CONFIG_DEBUG_HIGHMEM
- {
- unsigned int idx;
-
- idx = type + KM_TYPE_NR * smp_processor_id();
- BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
-
- /*
- * force other mappings to Oops if they'll try to access
- * this pte without first remap it
- */
- pte_clear(&init_mm, vaddr, kmap_pte-idx);
- local_flush_tlb_page(NULL, vaddr);
- }
-#endif
-
- kmap_atomic_idx_pop();
- pagefault_enable();
-}
-EXPORT_SYMBOL(__kunmap_atomic);
diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c
deleted file mode 100644
index 5f5e6328c21c..000000000000
--- a/arch/powerpc/mm/hugepage-hash64.c
+++ /dev/null
@@ -1,245 +0,0 @@
-/*
- * Copyright IBM Corporation, 2013
- * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of version 2.1 of the GNU Lesser General Public License
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- *
- */
-
-/*
- * PPC64 THP Support for hash based MMUs
- */
-#include <linux/mm.h>
-#include <asm/machdep.h>
-
-static void invalidate_old_hpte(unsigned long vsid, unsigned long addr,
- pmd_t *pmdp, unsigned int psize, int ssize)
-{
- int i, max_hpte_count, valid;
- unsigned long s_addr;
- unsigned char *hpte_slot_array;
- unsigned long hidx, shift, vpn, hash, slot;
-
- s_addr = addr & HPAGE_PMD_MASK;
- hpte_slot_array = get_hpte_slot_array(pmdp);
- /*
- * IF we try to do a HUGE PTE update after a withdraw is done.
- * we will find the below NULL. This happens when we do
- * split_huge_page_pmd
- */
- if (!hpte_slot_array)
- return;
-
- if (ppc_md.hugepage_invalidate)
- return ppc_md.hugepage_invalidate(vsid, s_addr, hpte_slot_array,
- psize, ssize);
- /*
- * No bluk hpte removal support, invalidate each entry
- */
- shift = mmu_psize_defs[psize].shift;
- max_hpte_count = HPAGE_PMD_SIZE >> shift;
- for (i = 0; i < max_hpte_count; i++) {
- /*
- * 8 bits per each hpte entries
- * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit]
- */
- valid = hpte_valid(hpte_slot_array, i);
- if (!valid)
- continue;
- hidx = hpte_hash_index(hpte_slot_array, i);
-
- /* get the vpn */
- addr = s_addr + (i * (1ul << shift));
- vpn = hpt_vpn(addr, vsid, ssize);
- hash = hpt_hash(vpn, shift, ssize);
- if (hidx & _PTEIDX_SECONDARY)
- hash = ~hash;
-
- slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
- slot += hidx & _PTEIDX_GROUP_IX;
- ppc_md.hpte_invalidate(slot, vpn, psize,
- MMU_PAGE_16M, ssize, 0);
- }
-}
-
-
-int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
- pmd_t *pmdp, unsigned long trap, int local, int ssize,
- unsigned int psize)
-{
- unsigned int index, valid;
- unsigned char *hpte_slot_array;
- unsigned long rflags, pa, hidx;
- unsigned long old_pmd, new_pmd;
- int ret, lpsize = MMU_PAGE_16M;
- unsigned long vpn, hash, shift, slot;
-
- /*
- * atomically mark the linux large page PMD busy and dirty
- */
- do {
- pmd_t pmd = ACCESS_ONCE(*pmdp);
-
- old_pmd = pmd_val(pmd);
- /* If PMD busy, retry the access */
- if (unlikely(old_pmd & _PAGE_BUSY))
- return 0;
- /* If PMD is trans splitting retry the access */
- if (unlikely(old_pmd & _PAGE_SPLITTING))
- return 0;
- /* If PMD permissions don't match, take page fault */
- if (unlikely(access & ~old_pmd))
- return 1;
- /*
- * Try to lock the PTE, add ACCESSED and DIRTY if it was
- * a write access
- */
- new_pmd = old_pmd | _PAGE_BUSY | _PAGE_ACCESSED;
- if (access & _PAGE_RW)
- new_pmd |= _PAGE_DIRTY;
- } while (old_pmd != __cmpxchg_u64((unsigned long *)pmdp,
- old_pmd, new_pmd));
- /*
- * PP bits. _PAGE_USER is already PP bit 0x2, so we only
- * need to add in 0x1 if it's a read-only user page
- */
- rflags = new_pmd & _PAGE_USER;
- if ((new_pmd & _PAGE_USER) && !((new_pmd & _PAGE_RW) &&
- (new_pmd & _PAGE_DIRTY)))
- rflags |= 0x1;
- /*
- * _PAGE_EXEC -> HW_NO_EXEC since it's inverted
- */
- rflags |= ((new_pmd & _PAGE_EXEC) ? 0 : HPTE_R_N);
-
-#if 0
- if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) {
-
- /*
- * No CPU has hugepages but lacks no execute, so we
- * don't need to worry about that case
- */
- rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap);
- }
-#endif
- /*
- * Find the slot index details for this ea, using base page size.
- */
- shift = mmu_psize_defs[psize].shift;
- index = (ea & ~HPAGE_PMD_MASK) >> shift;
- BUG_ON(index >= 4096);
-
- vpn = hpt_vpn(ea, vsid, ssize);
- hash = hpt_hash(vpn, shift, ssize);
- hpte_slot_array = get_hpte_slot_array(pmdp);
- if (psize == MMU_PAGE_4K) {
- /*
- * invalidate the old hpte entry if we have that mapped via 64K
- * base page size. This is because demote_segment won't flush
- * hash page table entries.
- */
- if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO))
- invalidate_old_hpte(vsid, ea, pmdp, MMU_PAGE_64K, ssize);
- }
-
- valid = hpte_valid(hpte_slot_array, index);
- if (valid) {
- /* update the hpte bits */
- hidx = hpte_hash_index(hpte_slot_array, index);
- if (hidx & _PTEIDX_SECONDARY)
- hash = ~hash;
- slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
- slot += hidx & _PTEIDX_GROUP_IX;
-
- ret = ppc_md.hpte_updatepp(slot, rflags, vpn,
- psize, lpsize, ssize, local);
- /*
- * We failed to update, try to insert a new entry.
- */
- if (ret == -1) {
- /*
- * large pte is marked busy, so we can be sure
- * nobody is looking at hpte_slot_array. hence we can
- * safely update this here.
- */
- valid = 0;
- hpte_slot_array[index] = 0;
- }
- }
-
- if (!valid) {
- unsigned long hpte_group;
-
- /* insert new entry */
- pa = pmd_pfn(__pmd(old_pmd)) << PAGE_SHIFT;
- new_pmd |= _PAGE_HASHPTE;
-
- /* Add in WIMG bits */
- rflags |= (new_pmd & (_PAGE_WRITETHRU | _PAGE_NO_CACHE |
- _PAGE_GUARDED));
- /*
- * enable the memory coherence always
- */
- rflags |= HPTE_R_M;
-repeat:
- hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
-
- /* Insert into the hash table, primary slot */
- slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0,
- psize, lpsize, ssize);
- /*
- * Primary is full, try the secondary
- */
- if (unlikely(slot == -1)) {
- hpte_group = ((~hash & htab_hash_mask) *
- HPTES_PER_GROUP) & ~0x7UL;
- slot = ppc_md.hpte_insert(hpte_group, vpn, pa,
- rflags, HPTE_V_SECONDARY,
- psize, lpsize, ssize);
- if (slot == -1) {
- if (mftb() & 0x1)
- hpte_group = ((hash & htab_hash_mask) *
- HPTES_PER_GROUP) & ~0x7UL;
-
- ppc_md.hpte_remove(hpte_group);
- goto repeat;
- }
- }
- /*
- * Hypervisor failure. Restore old pmd and return -1
- * similar to __hash_page_*
- */
- if (unlikely(slot == -2)) {
- *pmdp = __pmd(old_pmd);
- hash_failure_debug(ea, access, vsid, trap, ssize,
- psize, lpsize, old_pmd);
- return -1;
- }
- /*
- * large pte is marked busy, so we can be sure
- * nobody is looking at hpte_slot_array. hence we can
- * safely update this here.
- */
- mark_hpte_slot_valid(hpte_slot_array, index, slot);
- }
- /*
- * Mark the pte with _PAGE_COMBO, if we are trying to hash it with
- * base page size 4k.
- */
- if (psize == MMU_PAGE_4K)
- new_pmd |= _PAGE_COMBO;
- /*
- * The hpte valid is stored in the pgtable whose address is in the
- * second half of the PMD. Order this against clearing of the busy bit in
- * huge pmd.
- */
- smp_wmb();
- *pmdp = __pmd(new_pmd & ~_PAGE_BUSY);
- return 0;
-}
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
deleted file mode 100644
index a5bcf9301196..000000000000
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * PPC64 Huge TLB Page Support for hash based MMUs (POWER4 and later)
- *
- * Copyright (C) 2003 David Gibson, IBM Corporation.
- *
- * Based on the IA-32 version:
- * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
- */
-
-#include <linux/mm.h>
-#include <linux/hugetlb.h>
-#include <asm/pgtable.h>
-#include <asm/pgalloc.h>
-#include <asm/cacheflush.h>
-#include <asm/machdep.h>
-
-extern long hpte_insert_repeating(unsigned long hash, unsigned long vpn,
- unsigned long pa, unsigned long rlags,
- unsigned long vflags, int psize, int ssize);
-
-int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
- pte_t *ptep, unsigned long trap, int local, int ssize,
- unsigned int shift, unsigned int mmu_psize)
-{
- unsigned long vpn;
- unsigned long old_pte, new_pte;
- unsigned long rflags, pa, sz;
- long slot;
-
- BUG_ON(shift != mmu_psize_defs[mmu_psize].shift);
-
- /* Search the Linux page table for a match with va */
- vpn = hpt_vpn(ea, vsid, ssize);
-
- /* At this point, we have a pte (old_pte) which can be used to build
- * or update an HPTE. There are 2 cases:
- *
- * 1. There is a valid (present) pte with no associated HPTE (this is
- * the most common case)
- * 2. There is a valid (present) pte with an associated HPTE. The
- * current values of the pp bits in the HPTE prevent access
- * because we are doing software DIRTY bit management and the
- * page is currently not DIRTY.
- */
-
-
- do {
- old_pte = pte_val(*ptep);
- /* If PTE busy, retry the access */
- if (unlikely(old_pte & _PAGE_BUSY))
- return 0;
- /* If PTE permissions don't match, take page fault */
- if (unlikely(access & ~old_pte))
- return 1;
- /* Try to lock the PTE, add ACCESSED and DIRTY if it was
- * a write access */
- new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
- if (access & _PAGE_RW)
- new_pte |= _PAGE_DIRTY;
- } while(old_pte != __cmpxchg_u64((unsigned long *)ptep,
- old_pte, new_pte));
-
- rflags = 0x2 | (!(new_pte & _PAGE_RW));
- /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */
- rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N);
- sz = ((1UL) << shift);
- if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
- /* No CPU has hugepages but lacks no execute, so we
- * don't need to worry about that case */
- rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap);
-
- /* Check if pte already has an hpte (case 2) */
- if (unlikely(old_pte & _PAGE_HASHPTE)) {
- /* There MIGHT be an HPTE for this pte */
- unsigned long hash, slot;
-
- hash = hpt_hash(vpn, shift, ssize);
- if (old_pte & _PAGE_F_SECOND)
- hash = ~hash;
- slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
- slot += (old_pte & _PAGE_F_GIX) >> 12;
-
- if (ppc_md.hpte_updatepp(slot, rflags, vpn, mmu_psize,
- mmu_psize, ssize, local) == -1)
- old_pte &= ~_PAGE_HPTEFLAGS;
- }
-
- if (likely(!(old_pte & _PAGE_HASHPTE))) {
- unsigned long hash = hpt_hash(vpn, shift, ssize);
-
- pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
-
- /* clear HPTE slot informations in new PTE */
-#ifdef CONFIG_PPC_64K_PAGES
- new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HPTE_SUB0;
-#else
- new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE;
-#endif
- /* Add in WIMG bits */
- rflags |= (new_pte & (_PAGE_WRITETHRU | _PAGE_NO_CACHE |
- _PAGE_COHERENT | _PAGE_GUARDED));
- /*
- * enable the memory coherence always
- */
- rflags |= HPTE_R_M;
-
- slot = hpte_insert_repeating(hash, vpn, pa, rflags, 0,
- mmu_psize, ssize);
-
- /*
- * Hypervisor failure. Restore old pte and return -1
- * similar to __hash_page_*
- */
- if (unlikely(slot == -2)) {
- *ptep = __pte(old_pte);
- hash_failure_debug(ea, access, vsid, trap, ssize,
- mmu_psize, mmu_psize, old_pte);
- return -1;
- }
-
- new_pte |= (slot << 12) & (_PAGE_F_SECOND | _PAGE_F_GIX);
- }
-
- /*
- * No need to use ldarx/stdcx here
- */
- *ptep = __pte(new_pte & ~_PAGE_BUSY);
- return 0;
-}
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 7e70ae968e5f..d3c1b749dcfc 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -15,393 +15,84 @@
#include <linux/export.h>
#include <linux/of_fdt.h>
#include <linux/memblock.h>
-#include <linux/bootmem.h>
#include <linux/moduleparam.h>
-#include <asm/pgtable.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
+#include <linux/kmemleak.h>
#include <asm/pgalloc.h>
#include <asm/tlb.h>
#include <asm/setup.h>
#include <asm/hugetlb.h>
+#include <asm/pte-walk.h>
+#include <asm/firmware.h>
-#ifdef CONFIG_HUGETLB_PAGE
+bool hugetlb_disabled = false;
-#define PAGE_SHIFT_64K 16
-#define PAGE_SHIFT_16M 24
-#define PAGE_SHIFT_16G 34
+#define PTE_T_ORDER (__builtin_ffs(sizeof(pte_basic_t)) - \
+ __builtin_ffs(sizeof(void *)))
-unsigned int HPAGE_SHIFT;
-
-/*
- * Tracks gpages after the device tree is scanned and before the
- * huge_boot_pages list is ready. On non-Freescale implementations, this is
- * just used to track 16G pages and so is a single array. FSL-based
- * implementations may have more than one gpage size, so we need multiple
- * arrays
- */
-#ifdef CONFIG_PPC_FSL_BOOK3E
-#define MAX_NUMBER_GPAGES 128
-struct psize_gpages {
- u64 gpage_list[MAX_NUMBER_GPAGES];
- unsigned int nr_gpages;
-};
-static struct psize_gpages gpage_freearray[MMU_PAGE_COUNT];
-#else
-#define MAX_NUMBER_GPAGES 1024
-static u64 gpage_freearray[MAX_NUMBER_GPAGES];
-static unsigned nr_gpages;
-#endif
-
-#define hugepd_none(hpd) ((hpd).pd == 0)
-
-#ifdef CONFIG_PPC_BOOK3S_64
-/*
- * At this point we do the placement change only for BOOK3S 64. This would
- * possibly work on other subarchs.
- */
-
-/*
- * We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have
- * 16GB hugepage pte in PGD and 16MB hugepage pte at PMD;
- */
-int pmd_huge(pmd_t pmd)
-{
- /*
- * leaf pte for huge page, bottom two bits != 00
- */
- return ((pmd_val(pmd) & 0x3) != 0x0);
-}
-
-int pud_huge(pud_t pud)
-{
- /*
- * leaf pte for huge page, bottom two bits != 00
- */
- return ((pud_val(pud) & 0x3) != 0x0);
-}
-
-int pgd_huge(pgd_t pgd)
+pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz)
{
/*
- * leaf pte for huge page, bottom two bits != 00
+ * Only called for hugetlbfs pages, hence can ignore THP and the
+ * irq disabled walk.
*/
- return ((pgd_val(pgd) & 0x3) != 0x0);
-}
-#else
-int pmd_huge(pmd_t pmd)
-{
- return 0;
+ return __find_linux_pte(mm->pgd, addr, NULL, NULL);
}
-int pud_huge(pud_t pud)
+pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long addr, unsigned long sz)
{
- return 0;
-}
-
-int pgd_huge(pgd_t pgd)
-{
- return 0;
-}
-#endif
-
-pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
-{
- /* Only called for hugetlbfs pages, hence can ignore THP */
- return find_linux_pte_or_hugepte(mm->pgd, addr, NULL);
-}
-
-static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
- unsigned long address, unsigned pdshift, unsigned pshift)
-{
- struct kmem_cache *cachep;
- pte_t *new;
-
-#ifdef CONFIG_PPC_FSL_BOOK3E
- int i;
- int num_hugepd = 1 << (pshift - pdshift);
- cachep = hugepte_cache;
-#else
- cachep = PGT_CACHE(pdshift - pshift);
-#endif
-
- new = kmem_cache_zalloc(cachep, GFP_KERNEL|__GFP_REPEAT);
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
- BUG_ON(pshift > HUGEPD_SHIFT_MASK);
- BUG_ON((unsigned long)new & HUGEPD_SHIFT_MASK);
+ addr &= ~(sz - 1);
- if (! new)
- return -ENOMEM;
+ p4d = p4d_offset(pgd_offset(mm, addr), addr);
+ if (!mm_pud_folded(mm) && sz >= P4D_SIZE)
+ return (pte_t *)p4d;
- spin_lock(&mm->page_table_lock);
-#ifdef CONFIG_PPC_FSL_BOOK3E
- /*
- * We have multiple higher-level entries that point to the same
- * actual pte location. Fill in each as we go and backtrack on error.
- * We need all of these so the DTLB pgtable walk code can find the
- * right higher-level entry without knowing if it's a hugepage or not.
- */
- for (i = 0; i < num_hugepd; i++, hpdp++) {
- if (unlikely(!hugepd_none(*hpdp)))
- break;
- else
- /* We use the old format for PPC_FSL_BOOK3E */
- hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
- }
- /* If we bailed from the for loop early, an error occurred, clean up */
- if (i < num_hugepd) {
- for (i = i - 1 ; i >= 0; i--, hpdp--)
- hpdp->pd = 0;
- kmem_cache_free(cachep, new);
- }
-#else
- if (!hugepd_none(*hpdp))
- kmem_cache_free(cachep, new);
- else {
-#ifdef CONFIG_PPC_BOOK3S_64
- hpdp->pd = (unsigned long)new |
- (shift_to_mmu_psize(pshift) << 2);
-#else
- hpdp->pd = ((unsigned long)new & ~PD_HUGE) | pshift;
-#endif
- }
-#endif
- spin_unlock(&mm->page_table_lock);
- return 0;
-}
-
-/*
- * These macros define how to determine which level of the page table holds
- * the hpdp.
- */
-#ifdef CONFIG_PPC_FSL_BOOK3E
-#define HUGEPD_PGD_SHIFT PGDIR_SHIFT
-#define HUGEPD_PUD_SHIFT PUD_SHIFT
-#else
-#define HUGEPD_PGD_SHIFT PUD_SHIFT
-#define HUGEPD_PUD_SHIFT PMD_SHIFT
-#endif
-
-#ifdef CONFIG_PPC_BOOK3S_64
-/*
- * At this point we do the placement change only for BOOK3S 64. This would
- * possibly work on other subarchs.
- */
-pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
-{
- pgd_t *pg;
- pud_t *pu;
- pmd_t *pm;
- hugepd_t *hpdp = NULL;
- unsigned pshift = __ffs(sz);
- unsigned pdshift = PGDIR_SHIFT;
-
- addr &= ~(sz-1);
- pg = pgd_offset(mm, addr);
-
- if (pshift == PGDIR_SHIFT)
- /* 16GB huge page */
- return (pte_t *) pg;
- else if (pshift > PUD_SHIFT)
- /*
- * We need to use hugepd table
- */
- hpdp = (hugepd_t *)pg;
- else {
- pdshift = PUD_SHIFT;
- pu = pud_alloc(mm, pg, addr);
- if (pshift == PUD_SHIFT)
- return (pte_t *)pu;
- else if (pshift > PMD_SHIFT)
- hpdp = (hugepd_t *)pu;
- else {
- pdshift = PMD_SHIFT;
- pm = pmd_alloc(mm, pu, addr);
- if (pshift == PMD_SHIFT)
- /* 16MB hugepage */
- return (pte_t *)pm;
- else
- hpdp = (hugepd_t *)pm;
- }
- }
- if (!hpdp)
+ pud = pud_alloc(mm, p4d, addr);
+ if (!pud)
return NULL;
+ if (!mm_pmd_folded(mm) && sz >= PUD_SIZE)
+ return (pte_t *)pud;
- BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp));
-
- if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift))
+ pmd = pmd_alloc(mm, pud, addr);
+ if (!pmd)
return NULL;
- return hugepte_offset(hpdp, addr, pdshift);
-}
-
-#else
-
-pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
-{
- pgd_t *pg;
- pud_t *pu;
- pmd_t *pm;
- hugepd_t *hpdp = NULL;
- unsigned pshift = __ffs(sz);
- unsigned pdshift = PGDIR_SHIFT;
-
- addr &= ~(sz-1);
-
- pg = pgd_offset(mm, addr);
+ if (sz >= PMD_SIZE) {
+ /* On 8xx, all hugepages are handled as contiguous PTEs */
+ if (IS_ENABLED(CONFIG_PPC_8xx)) {
+ int i;
- if (pshift >= HUGEPD_PGD_SHIFT) {
- hpdp = (hugepd_t *)pg;
- } else {
- pdshift = PUD_SHIFT;
- pu = pud_alloc(mm, pg, addr);
- if (pshift >= HUGEPD_PUD_SHIFT) {
- hpdp = (hugepd_t *)pu;
- } else {
- pdshift = PMD_SHIFT;
- pm = pmd_alloc(mm, pu, addr);
- hpdp = (hugepd_t *)pm;
+ for (i = 0; i < sz / PMD_SIZE; i++) {
+ if (!pte_alloc_huge(mm, pmd + i, addr))
+ return NULL;
+ }
}
+ return (pte_t *)pmd;
}
- if (!hpdp)
- return NULL;
-
- BUG_ON(!hugepd_none(*hpdp) && !hugepd_ok(*hpdp));
-
- if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr, pdshift, pshift))
- return NULL;
-
- return hugepte_offset(hpdp, addr, pdshift);
-}
-#endif
-
-#ifdef CONFIG_PPC_FSL_BOOK3E
-/* Build list of addresses of gigantic pages. This function is used in early
- * boot before the buddy or bootmem allocator is setup.
- */
-void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages)
-{
- unsigned int idx = shift_to_mmu_psize(__ffs(page_size));
- int i;
-
- if (addr == 0)
- return;
-
- gpage_freearray[idx].nr_gpages = number_of_pages;
-
- for (i = 0; i < number_of_pages; i++) {
- gpage_freearray[idx].gpage_list[i] = addr;
- addr += page_size;
- }
+ return pte_alloc_huge(mm, pmd, addr);
}
+#ifdef CONFIG_PPC_BOOK3S_64
/*
- * Moves the gigantic page addresses from the temporary list to the
- * huge_boot_pages list.
- */
-int alloc_bootmem_huge_page(struct hstate *hstate)
-{
- struct huge_bootmem_page *m;
- int idx = shift_to_mmu_psize(huge_page_shift(hstate));
- int nr_gpages = gpage_freearray[idx].nr_gpages;
-
- if (nr_gpages == 0)
- return 0;
-
-#ifdef CONFIG_HIGHMEM
- /*
- * If gpages can be in highmem we can't use the trick of storing the
- * data structure in the page; allocate space for this
- */
- m = alloc_bootmem(sizeof(struct huge_bootmem_page));
- m->phys = gpage_freearray[idx].gpage_list[--nr_gpages];
-#else
- m = phys_to_virt(gpage_freearray[idx].gpage_list[--nr_gpages]);
-#endif
-
- list_add(&m->list, &huge_boot_pages);
- gpage_freearray[idx].nr_gpages = nr_gpages;
- gpage_freearray[idx].gpage_list[nr_gpages] = 0;
- m->hstate = hstate;
-
- return 1;
-}
-/*
- * Scan the command line hugepagesz= options for gigantic pages; store those in
- * a list that we use to allocate the memory once all options are parsed.
+ * Tracks gpages after the device tree is scanned and before the
+ * huge_boot_pages list is ready on pseries.
*/
-
-unsigned long gpage_npages[MMU_PAGE_COUNT];
-
-static int __init do_gpage_early_setup(char *param, char *val,
- const char *unused)
-{
- static phys_addr_t size;
- unsigned long npages;
-
- /*
- * The hugepagesz and hugepages cmdline options are interleaved. We
- * use the size variable to keep track of whether or not this was done
- * properly and skip over instances where it is incorrect. Other
- * command-line parsing code will issue warnings, so we don't need to.
- *
- */
- if ((strcmp(param, "default_hugepagesz") == 0) ||
- (strcmp(param, "hugepagesz") == 0)) {
- size = memparse(val, NULL);
- } else if (strcmp(param, "hugepages") == 0) {
- if (size != 0) {
- if (sscanf(val, "%lu", &npages) <= 0)
- npages = 0;
- gpage_npages[shift_to_mmu_psize(__ffs(size))] = npages;
- size = 0;
- }
- }
- return 0;
-}
-
+#define MAX_NUMBER_GPAGES 1024
+__initdata static u64 gpage_freearray[MAX_NUMBER_GPAGES];
+__initdata static unsigned nr_gpages;
/*
- * This function allocates physical space for pages that are larger than the
- * buddy allocator can handle. We want to allocate these in highmem because
- * the amount of lowmem is limited. This means that this function MUST be
- * called before lowmem_end_addr is set up in MMU_init() in order for the lmb
- * allocate to grab highmem.
+ * Build list of addresses of gigantic pages. This function is used in early
+ * boot before the buddy allocator is setup.
*/
-void __init reserve_hugetlb_gpages(void)
-{
- static __initdata char cmdline[COMMAND_LINE_SIZE];
- phys_addr_t size, base;
- int i;
-
- strlcpy(cmdline, boot_command_line, COMMAND_LINE_SIZE);
- parse_args("hugetlb gpages", cmdline, NULL, 0, 0, 0,
- &do_gpage_early_setup);
-
- /*
- * Walk gpage list in reverse, allocating larger page sizes first.
- * Skip over unsupported sizes, or sizes that have 0 gpages allocated.
- * When we reach the point in the list where pages are no longer
- * considered gpages, we're done.
- */
- for (i = MMU_PAGE_COUNT-1; i >= 0; i--) {
- if (mmu_psize_defs[i].shift == 0 || gpage_npages[i] == 0)
- continue;
- else if (mmu_psize_to_shift(i) < (MAX_ORDER + PAGE_SHIFT))
- break;
-
- size = (phys_addr_t)(1ULL << mmu_psize_to_shift(i));
- base = memblock_alloc_base(size * gpage_npages[i], size,
- MEMBLOCK_ALLOC_ANYWHERE);
- add_gpage(base, size, gpage_npages[i]);
- }
-}
-
-#else /* !PPC_FSL_BOOK3E */
-
-/* Build list of addresses of gigantic pages. This function is used in early
- * boot before the buddy or bootmem allocator is setup.
- */
-void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages)
+void __init pseries_add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages)
{
if (!addr)
return;
@@ -413,458 +104,82 @@ void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages)
}
}
-/* Moves the gigantic page addresses from the temporary list to the
- * huge_boot_pages list.
- */
-int alloc_bootmem_huge_page(struct hstate *hstate)
+static int __init pseries_alloc_bootmem_huge_page(struct hstate *hstate)
{
struct huge_bootmem_page *m;
if (nr_gpages == 0)
return 0;
m = phys_to_virt(gpage_freearray[--nr_gpages]);
gpage_freearray[nr_gpages] = 0;
- list_add(&m->list, &huge_boot_pages);
+ list_add(&m->list, &huge_boot_pages[0]);
m->hstate = hstate;
+ m->flags = 0;
return 1;
}
-#endif
-int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
+bool __init hugetlb_node_alloc_supported(void)
{
- return 0;
-}
-
-#ifdef CONFIG_PPC_FSL_BOOK3E
-#define HUGEPD_FREELIST_SIZE \
- ((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t))
-
-struct hugepd_freelist {
- struct rcu_head rcu;
- unsigned int index;
- void *ptes[0];
-};
-
-static DEFINE_PER_CPU(struct hugepd_freelist *, hugepd_freelist_cur);
-
-static void hugepd_free_rcu_callback(struct rcu_head *head)
-{
- struct hugepd_freelist *batch =
- container_of(head, struct hugepd_freelist, rcu);
- unsigned int i;
-
- for (i = 0; i < batch->index; i++)
- kmem_cache_free(hugepte_cache, batch->ptes[i]);
-
- free_page((unsigned long)batch);
-}
-
-static void hugepd_free(struct mmu_gather *tlb, void *hugepte)
-{
- struct hugepd_freelist **batchp;
-
- batchp = &get_cpu_var(hugepd_freelist_cur);
-
- if (atomic_read(&tlb->mm->mm_users) < 2 ||
- cpumask_equal(mm_cpumask(tlb->mm),
- cpumask_of(smp_processor_id()))) {
- kmem_cache_free(hugepte_cache, hugepte);
- put_cpu_var(hugepd_freelist_cur);
- return;
- }
-
- if (*batchp == NULL) {
- *batchp = (struct hugepd_freelist *)__get_free_page(GFP_ATOMIC);
- (*batchp)->index = 0;
- }
-
- (*batchp)->ptes[(*batchp)->index++] = hugepte;
- if ((*batchp)->index == HUGEPD_FREELIST_SIZE) {
- call_rcu_sched(&(*batchp)->rcu, hugepd_free_rcu_callback);
- *batchp = NULL;
- }
- put_cpu_var(hugepd_freelist_cur);
-}
-#endif
-
-static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshift,
- unsigned long start, unsigned long end,
- unsigned long floor, unsigned long ceiling)
-{
- pte_t *hugepte = hugepd_page(*hpdp);
- int i;
-
- unsigned long pdmask = ~((1UL << pdshift) - 1);
- unsigned int num_hugepd = 1;
-
-#ifdef CONFIG_PPC_FSL_BOOK3E
- /* Note: On fsl the hpdp may be the first of several */
- num_hugepd = (1 << (hugepd_shift(*hpdp) - pdshift));
-#else
- unsigned int shift = hugepd_shift(*hpdp);
-#endif
-
- start &= pdmask;
- if (start < floor)
- return;
- if (ceiling) {
- ceiling &= pdmask;
- if (! ceiling)
- return;
- }
- if (end - 1 > ceiling - 1)
- return;
-
- for (i = 0; i < num_hugepd; i++, hpdp++)
- hpdp->pd = 0;
-
- tlb->need_flush = 1;
-
-#ifdef CONFIG_PPC_FSL_BOOK3E
- hugepd_free(tlb, hugepte);
-#else
- pgtable_free_tlb(tlb, hugepte, pdshift - shift);
-#endif
-}
-
-static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
- unsigned long addr, unsigned long end,
- unsigned long floor, unsigned long ceiling)
-{
- pmd_t *pmd;
- unsigned long next;
- unsigned long start;
-
- start = addr;
- do {
- pmd = pmd_offset(pud, addr);
- next = pmd_addr_end(addr, end);
- if (!is_hugepd(pmd)) {
- /*
- * if it is not hugepd pointer, we should already find
- * it cleared.
- */
- WARN_ON(!pmd_none_or_clear_bad(pmd));
- continue;
- }
-#ifdef CONFIG_PPC_FSL_BOOK3E
- /*
- * Increment next by the size of the huge mapping since
- * there may be more than one entry at this level for a
- * single hugepage, but all of them point to
- * the same kmem cache that holds the hugepte.
- */
- next = addr + (1 << hugepd_shift(*(hugepd_t *)pmd));
-#endif
- free_hugepd_range(tlb, (hugepd_t *)pmd, PMD_SHIFT,
- addr, next, floor, ceiling);
- } while (addr = next, addr != end);
-
- start &= PUD_MASK;
- if (start < floor)
- return;
- if (ceiling) {
- ceiling &= PUD_MASK;
- if (!ceiling)
- return;
- }
- if (end - 1 > ceiling - 1)
- return;
-
- pmd = pmd_offset(pud, start);
- pud_clear(pud);
- pmd_free_tlb(tlb, pmd, start);
+ return false;
}
-
-static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
- unsigned long addr, unsigned long end,
- unsigned long floor, unsigned long ceiling)
-{
- pud_t *pud;
- unsigned long next;
- unsigned long start;
-
- start = addr;
- do {
- pud = pud_offset(pgd, addr);
- next = pud_addr_end(addr, end);
- if (!is_hugepd(pud)) {
- if (pud_none_or_clear_bad(pud))
- continue;
- hugetlb_free_pmd_range(tlb, pud, addr, next, floor,
- ceiling);
- } else {
-#ifdef CONFIG_PPC_FSL_BOOK3E
- /*
- * Increment next by the size of the huge mapping since
- * there may be more than one entry at this level for a
- * single hugepage, but all of them point to
- * the same kmem cache that holds the hugepte.
- */
- next = addr + (1 << hugepd_shift(*(hugepd_t *)pud));
#endif
- free_hugepd_range(tlb, (hugepd_t *)pud, PUD_SHIFT,
- addr, next, floor, ceiling);
- }
- } while (addr = next, addr != end);
- start &= PGDIR_MASK;
- if (start < floor)
- return;
- if (ceiling) {
- ceiling &= PGDIR_MASK;
- if (!ceiling)
- return;
- }
- if (end - 1 > ceiling - 1)
- return;
- pud = pud_offset(pgd, start);
- pgd_clear(pgd);
- pud_free_tlb(tlb, pud, start);
-}
-
-/*
- * This function frees user-level page tables of a process.
- */
-void hugetlb_free_pgd_range(struct mmu_gather *tlb,
- unsigned long addr, unsigned long end,
- unsigned long floor, unsigned long ceiling)
+int __init alloc_bootmem_huge_page(struct hstate *h, int nid)
{
- pgd_t *pgd;
- unsigned long next;
-
- /*
- * Because there are a number of different possible pagetable
- * layouts for hugepage ranges, we limit knowledge of how
- * things should be laid out to the allocation path
- * (huge_pte_alloc(), above). Everything else works out the
- * structure as it goes from information in the hugepd
- * pointers. That means that we can't here use the
- * optimization used in the normal page free_pgd_range(), of
- * checking whether we're actually covering a large enough
- * range to have to do anything at the top level of the walk
- * instead of at the bottom.
- *
- * To make sense of this, you should probably go read the big
- * block comment at the top of the normal free_pgd_range(),
- * too.
- */
-
- do {
- next = pgd_addr_end(addr, end);
- pgd = pgd_offset(tlb->mm, addr);
- if (!is_hugepd(pgd)) {
- if (pgd_none_or_clear_bad(pgd))
- continue;
- hugetlb_free_pud_range(tlb, pgd, addr, next, floor, ceiling);
- } else {
-#ifdef CONFIG_PPC_FSL_BOOK3E
- /*
- * Increment next by the size of the huge mapping since
- * there may be more than one entry at the pgd level
- * for a single hugepage, but all of them point to the
- * same kmem cache that holds the hugepte.
- */
- next = addr + (1 << hugepd_shift(*(hugepd_t *)pgd));
-#endif
- free_hugepd_range(tlb, (hugepd_t *)pgd, PGDIR_SHIFT,
- addr, next, floor, ceiling);
- }
- } while (addr = next, addr != end);
-}
-struct page *
-follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
-{
- pte_t *ptep;
- struct page *page;
- unsigned shift;
- unsigned long mask;
- /*
- * Transparent hugepages are handled by generic code. We can skip them
- * here.
- */
- ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift);
-
- /* Verify it is a huge page else bail. */
- if (!ptep || !shift || pmd_trans_huge(*(pmd_t *)ptep))
- return ERR_PTR(-EINVAL);
-
- mask = (1UL << shift) - 1;
- page = pte_page(*ptep);
- if (page)
- page += (address & mask) / PAGE_SIZE;
-
- return page;
-}
-
-struct page *
-follow_huge_pmd(struct mm_struct *mm, unsigned long address,
- pmd_t *pmd, int write)
-{
- BUG();
- return NULL;
-}
-
-static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
- unsigned long sz)
-{
- unsigned long __boundary = (addr + sz) & ~(sz-1);
- return (__boundary - 1 < end - 1) ? __boundary : end;
-}
-
-int gup_hugepd(hugepd_t *hugepd, unsigned pdshift,
- unsigned long addr, unsigned long end,
- int write, struct page **pages, int *nr)
-{
- pte_t *ptep;
- unsigned long sz = 1UL << hugepd_shift(*hugepd);
- unsigned long next;
-
- ptep = hugepte_offset(hugepd, addr, pdshift);
- do {
- next = hugepte_addr_end(addr, end, sz);
- if (!gup_hugepte(ptep, sz, addr, end, write, pages, nr))
- return 0;
- } while (ptep++, addr = next, addr != end);
-
- return 1;
-}
-
-#ifdef CONFIG_PPC_MM_SLICES
-unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
- unsigned long len, unsigned long pgoff,
- unsigned long flags)
-{
- struct hstate *hstate = hstate_file(file);
- int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate));
-
- return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1);
-}
-#endif
-
-unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
-{
-#ifdef CONFIG_PPC_MM_SLICES
- unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start);
-
- return 1UL << mmu_psize_to_shift(psize);
-#else
- if (!is_vm_hugetlb_page(vma))
- return PAGE_SIZE;
-
- return huge_page_size(hstate_vma(vma));
+#ifdef CONFIG_PPC_BOOK3S_64
+ if (firmware_has_feature(FW_FEATURE_LPAR) && !radix_enabled())
+ return pseries_alloc_bootmem_huge_page(h);
#endif
+ return __alloc_bootmem_huge_page(h, nid);
}
-static inline bool is_power_of_4(unsigned long x)
-{
- if (is_power_of_2(x))
- return (__ilog2(x) % 2) ? false : true;
- return false;
-}
-
-static int __init add_huge_page_size(unsigned long long size)
+bool __init arch_hugetlb_valid_size(unsigned long size)
{
int shift = __ffs(size);
int mmu_psize;
/* Check that it is a page size supported by the hardware and
* that it fits within pagetable and slice limits. */
-#ifdef CONFIG_PPC_FSL_BOOK3E
- if ((size < PAGE_SIZE) || !is_power_of_4(size))
- return -EINVAL;
-#else
- if (!is_power_of_2(size)
- || (shift > SLICE_HIGH_SHIFT) || (shift <= PAGE_SHIFT))
- return -EINVAL;
-#endif
-
- if ((mmu_psize = shift_to_mmu_psize(shift)) < 0)
- return -EINVAL;
+ if (size <= PAGE_SIZE || !is_power_of_2(size))
+ return false;
-#ifdef CONFIG_SPU_FS_64K_LS
- /* Disable support for 64K huge pages when 64K SPU local store
- * support is enabled as the current implementation conflicts.
- */
- if (shift == PAGE_SHIFT_64K)
- return -EINVAL;
-#endif /* CONFIG_SPU_FS_64K_LS */
+ mmu_psize = check_and_get_huge_psize(shift);
+ if (mmu_psize < 0)
+ return false;
BUG_ON(mmu_psize_defs[mmu_psize].shift != shift);
- /* Return if huge page size has already been setup */
- if (size_to_hstate(size))
- return 0;
-
- hugetlb_add_hstate(shift - PAGE_SHIFT);
-
- return 0;
+ return true;
}
-static int __init hugepage_setup_sz(char *str)
+static int __init add_huge_page_size(unsigned long long size)
{
- unsigned long long size;
-
- size = memparse(str, &str);
+ int shift = __ffs(size);
- if (add_huge_page_size(size) != 0)
- printk(KERN_WARNING "Invalid huge page size specified(%llu)\n", size);
+ if (!arch_hugetlb_valid_size((unsigned long)size))
+ return -EINVAL;
- return 1;
+ hugetlb_add_hstate(shift - PAGE_SHIFT);
+ return 0;
}
-__setup("hugepagesz=", hugepage_setup_sz);
-#ifdef CONFIG_PPC_FSL_BOOK3E
-struct kmem_cache *hugepte_cache;
static int __init hugetlbpage_init(void)
{
+ bool configured = false;
int psize;
- for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
- unsigned shift;
-
- if (!mmu_psize_defs[psize].shift)
- continue;
-
- shift = mmu_psize_to_shift(psize);
-
- /* Don't treat normal page sizes as huge... */
- if (shift != PAGE_SHIFT)
- if (add_huge_page_size(1ULL << shift) < 0)
- continue;
+ if (hugetlb_disabled) {
+ pr_info("HugeTLB support is disabled!\n");
+ return 0;
}
- /*
- * Create a kmem cache for hugeptes. The bottom bits in the pte have
- * size information encoded in them, so align them to allow this
- */
- hugepte_cache = kmem_cache_create("hugepte-cache", sizeof(pte_t),
- HUGEPD_SHIFT_MASK + 1, 0, NULL);
- if (hugepte_cache == NULL)
- panic("%s: Unable to create kmem cache for hugeptes\n",
- __func__);
-
- /* Default hpage size = 4M */
- if (mmu_psize_defs[MMU_PAGE_4M].shift)
- HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_4M].shift;
- else
- panic("%s: Unable to set default huge page size\n", __func__);
-
-
- return 0;
-}
-#else
-static int __init hugetlbpage_init(void)
-{
- int psize;
-
- if (!mmu_has_feature(MMU_FTR_16M_PAGE))
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !radix_enabled() &&
+ !mmu_has_feature(MMU_FTR_16M_PAGE))
return -ENODEV;
for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
unsigned shift;
- unsigned pdshift;
if (!mmu_psize_defs[psize].shift)
continue;
@@ -874,216 +189,29 @@ static int __init hugetlbpage_init(void)
if (add_huge_page_size(1ULL << shift) < 0)
continue;
- if (shift < PMD_SHIFT)
- pdshift = PMD_SHIFT;
- else if (shift < PUD_SHIFT)
- pdshift = PUD_SHIFT;
- else
- pdshift = PGDIR_SHIFT;
- /*
- * if we have pdshift and shift value same, we don't
- * use pgt cache for hugepd.
- */
- if (pdshift != shift) {
- pgtable_cache_add(pdshift - shift, NULL);
- if (!PGT_CACHE(pdshift - shift))
- panic("hugetlbpage_init(): could not create "
- "pgtable cache for %d bit pagesize\n", shift);
- }
+ configured = true;
}
- /* Set default large page size. Currently, we pick 16M or 1M
- * depending on what is available
- */
- if (mmu_psize_defs[MMU_PAGE_16M].shift)
- HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_16M].shift;
- else if (mmu_psize_defs[MMU_PAGE_1M].shift)
- HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_1M].shift;
+ if (!configured)
+ pr_info("Failed to initialize. Disabling HugeTLB");
return 0;
}
-#endif
-module_init(hugetlbpage_init);
-
-void flush_dcache_icache_hugepage(struct page *page)
-{
- int i;
- void *start;
-
- BUG_ON(!PageCompound(page));
-
- for (i = 0; i < (1UL << compound_order(page)); i++) {
- if (!PageHighMem(page)) {
- __flush_dcache_icache(page_address(page+i));
- } else {
- start = kmap_atomic(page+i);
- __flush_dcache_icache(start);
- kunmap_atomic(start);
- }
- }
-}
-
-#endif /* CONFIG_HUGETLB_PAGE */
-/*
- * We have 4 cases for pgds and pmds:
- * (1) invalid (all zeroes)
- * (2) pointer to next table, as normal; bottom 6 bits == 0
- * (3) leaf pte for huge page, bottom two bits != 00
- * (4) hugepd pointer, bottom two bits == 00, next 4 bits indicate size of table
- *
- * So long as we atomically load page table pointers we are safe against teardown,
- * we can follow the address down to the the page and take a ref on it.
- */
+arch_initcall(hugetlbpage_init);
-pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift)
+void __init gigantic_hugetlb_cma_reserve(void)
{
- pgd_t pgd, *pgdp;
- pud_t pud, *pudp;
- pmd_t pmd, *pmdp;
- pte_t *ret_pte;
- hugepd_t *hpdp = NULL;
- unsigned pdshift = PGDIR_SHIFT;
-
- if (shift)
- *shift = 0;
+ unsigned long order = 0;
- pgdp = pgdir + pgd_index(ea);
- pgd = ACCESS_ONCE(*pgdp);
- /*
- * Always operate on the local stack value. This make sure the
- * value don't get updated by a parallel THP split/collapse,
- * page fault or a page unmap. The return pte_t * is still not
- * stable. So should be checked there for above conditions.
- */
- if (pgd_none(pgd))
- return NULL;
- else if (pgd_huge(pgd)) {
- ret_pte = (pte_t *) pgdp;
- goto out;
- } else if (is_hugepd(&pgd))
- hpdp = (hugepd_t *)&pgd;
- else {
+ if (radix_enabled())
+ order = PUD_SHIFT - PAGE_SHIFT;
+ else if (!firmware_has_feature(FW_FEATURE_LPAR) && mmu_psize_defs[MMU_PAGE_16G].shift)
/*
- * Even if we end up with an unmap, the pgtable will not
- * be freed, because we do an rcu free and here we are
- * irq disabled
+ * For pseries we do use ibm,expected#pages for reserving 16G pages.
*/
- pdshift = PUD_SHIFT;
- pudp = pud_offset(&pgd, ea);
- pud = ACCESS_ONCE(*pudp);
+ order = mmu_psize_to_shift(MMU_PAGE_16G) - PAGE_SHIFT;
- if (pud_none(pud))
- return NULL;
- else if (pud_huge(pud)) {
- ret_pte = (pte_t *) pudp;
- goto out;
- } else if (is_hugepd(&pud))
- hpdp = (hugepd_t *)&pud;
- else {
- pdshift = PMD_SHIFT;
- pmdp = pmd_offset(&pud, ea);
- pmd = ACCESS_ONCE(*pmdp);
- /*
- * A hugepage collapse is captured by pmd_none, because
- * it mark the pmd none and do a hpte invalidate.
- *
- * A hugepage split is captured by pmd_trans_splitting
- * because we mark the pmd trans splitting and do a
- * hpte invalidate
- *
- */
- if (pmd_none(pmd) || pmd_trans_splitting(pmd))
- return NULL;
-
- if (pmd_huge(pmd) || pmd_large(pmd)) {
- ret_pte = (pte_t *) pmdp;
- goto out;
- } else if (is_hugepd(&pmd))
- hpdp = (hugepd_t *)&pmd;
- else
- return pte_offset_kernel(&pmd, ea);
- }
- }
- if (!hpdp)
- return NULL;
-
- ret_pte = hugepte_offset(hpdp, ea, pdshift);
- pdshift = hugepd_shift(*hpdp);
-out:
- if (shift)
- *shift = pdshift;
- return ret_pte;
-}
-EXPORT_SYMBOL_GPL(find_linux_pte_or_hugepte);
-
-int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
- unsigned long end, int write, struct page **pages, int *nr)
-{
- unsigned long mask;
- unsigned long pte_end;
- struct page *head, *page, *tail;
- pte_t pte;
- int refs;
-
- pte_end = (addr + sz) & ~(sz-1);
- if (pte_end < end)
- end = pte_end;
-
- pte = ACCESS_ONCE(*ptep);
- mask = _PAGE_PRESENT | _PAGE_USER;
- if (write)
- mask |= _PAGE_RW;
-
- if ((pte_val(pte) & mask) != mask)
- return 0;
-
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- /*
- * check for splitting here
- */
- if (pmd_trans_splitting(pte_pmd(pte)))
- return 0;
-#endif
-
- /* hugepages are never "special" */
- VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
-
- refs = 0;
- head = pte_page(pte);
-
- page = head + ((addr & (sz-1)) >> PAGE_SHIFT);
- tail = page;
- do {
- VM_BUG_ON(compound_head(page) != head);
- pages[*nr] = page;
- (*nr)++;
- page++;
- refs++;
- } while (addr += PAGE_SIZE, addr != end);
-
- if (!page_cache_add_speculative(head, refs)) {
- *nr -= refs;
- return 0;
- }
-
- if (unlikely(pte_val(pte) != pte_val(*ptep))) {
- /* Could be optimized better */
- *nr -= refs;
- while (refs--)
- put_page(head);
- return 0;
- }
-
- /*
- * Any tail page need their mapcount reference taken before we
- * return.
- */
- while (refs--) {
- if (PageTail(tail))
- get_huge_page_tail(tail);
- tail++;
- }
-
- return 1;
+ if (order)
+ hugetlb_cma_reserve(order);
}
diff --git a/arch/powerpc/mm/icswx.c b/arch/powerpc/mm/icswx.c
deleted file mode 100644
index 915412e4d5ba..000000000000
--- a/arch/powerpc/mm/icswx.c
+++ /dev/null
@@ -1,292 +0,0 @@
-/*
- * ICSWX and ACOP Management
- *
- * Copyright (C) 2011 Anton Blanchard, IBM Corp. <anton@samba.org>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- */
-
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/spinlock.h>
-#include <linux/module.h>
-#include <linux/uaccess.h>
-
-#include "icswx.h"
-
-/*
- * The processor and its L2 cache cause the icswx instruction to
- * generate a COP_REQ transaction on PowerBus. The transaction has no
- * address, and the processor does not perform an MMU access to
- * authenticate the transaction. The command portion of the PowerBus
- * COP_REQ transaction includes the LPAR_ID (LPID) and the coprocessor
- * Process ID (PID), which the coprocessor compares to the authorized
- * LPID and PID held in the coprocessor, to determine if the process
- * is authorized to generate the transaction. The data of the COP_REQ
- * transaction is 128-byte or less in size and is placed in cacheable
- * memory on a 128-byte cache line boundary.
- *
- * The task to use a coprocessor should use use_cop() to mark the use
- * of the Coprocessor Type (CT) and context switching. On a server
- * class processor, the PID register is used only for coprocessor
- * management + * and so a coprocessor PID is allocated before
- * executing icswx + * instruction. Drop_cop() is used to free the
- * coprocessor PID.
- *
- * Example:
- * Host Fabric Interface (HFI) is a PowerPC network coprocessor.
- * Each HFI have multiple windows. Each HFI window serves as a
- * network device sending to and receiving from HFI network.
- * HFI immediate send function uses icswx instruction. The immediate
- * send function allows small (single cache-line) packets be sent
- * without using the regular HFI send FIFO and doorbell, which are
- * much slower than immediate send.
- *
- * For each task intending to use HFI immediate send, the HFI driver
- * calls use_cop() to obtain a coprocessor PID for the task.
- * The HFI driver then allocate a free HFI window and save the
- * coprocessor PID to the HFI window to allow the task to use the
- * HFI window.
- *
- * The HFI driver repeatedly creates immediate send packets and
- * issues icswx instruction to send data through the HFI window.
- * The HFI compares the coprocessor PID in the CPU PID register
- * to the PID held in the HFI window to determine if the transaction
- * is allowed.
- *
- * When the task to release the HFI window, the HFI driver calls
- * drop_cop() to release the coprocessor PID.
- */
-
-void switch_cop(struct mm_struct *next)
-{
-#ifdef CONFIG_PPC_ICSWX_PID
- mtspr(SPRN_PID, next->context.cop_pid);
-#endif
- mtspr(SPRN_ACOP, next->context.acop);
-}
-
-/**
- * Start using a coprocessor.
- * @acop: mask of coprocessor to be used.
- * @mm: The mm the coprocessor to associate with. Most likely current mm.
- *
- * Return a positive PID if successful. Negative errno otherwise.
- * The returned PID will be fed to the coprocessor to determine if an
- * icswx transaction is authenticated.
- */
-int use_cop(unsigned long acop, struct mm_struct *mm)
-{
- int ret;
-
- if (!cpu_has_feature(CPU_FTR_ICSWX))
- return -ENODEV;
-
- if (!mm || !acop)
- return -EINVAL;
-
- /* The page_table_lock ensures mm_users won't change under us */
- spin_lock(&mm->page_table_lock);
- spin_lock(mm->context.cop_lockp);
-
- ret = get_cop_pid(mm);
- if (ret < 0)
- goto out;
-
- /* update acop */
- mm->context.acop |= acop;
-
- sync_cop(mm);
-
- /*
- * If this is a threaded process then there might be other threads
- * running. We need to send an IPI to force them to pick up any
- * change in PID and ACOP.
- */
- if (atomic_read(&mm->mm_users) > 1)
- smp_call_function(sync_cop, mm, 1);
-
-out:
- spin_unlock(mm->context.cop_lockp);
- spin_unlock(&mm->page_table_lock);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(use_cop);
-
-/**
- * Stop using a coprocessor.
- * @acop: mask of coprocessor to be stopped.
- * @mm: The mm the coprocessor associated with.
- */
-void drop_cop(unsigned long acop, struct mm_struct *mm)
-{
- int free_pid;
-
- if (!cpu_has_feature(CPU_FTR_ICSWX))
- return;
-
- if (WARN_ON_ONCE(!mm))
- return;
-
- /* The page_table_lock ensures mm_users won't change under us */
- spin_lock(&mm->page_table_lock);
- spin_lock(mm->context.cop_lockp);
-
- mm->context.acop &= ~acop;
-
- free_pid = disable_cop_pid(mm);
- sync_cop(mm);
-
- /*
- * If this is a threaded process then there might be other threads
- * running. We need to send an IPI to force them to pick up any
- * change in PID and ACOP.
- */
- if (atomic_read(&mm->mm_users) > 1)
- smp_call_function(sync_cop, mm, 1);
-
- if (free_pid != COP_PID_NONE)
- free_cop_pid(free_pid);
-
- spin_unlock(mm->context.cop_lockp);
- spin_unlock(&mm->page_table_lock);
-}
-EXPORT_SYMBOL_GPL(drop_cop);
-
-static int acop_use_cop(int ct)
-{
- /* There is no alternate policy, yet */
- return -1;
-}
-
-/*
- * Get the instruction word at the NIP
- */
-static u32 acop_get_inst(struct pt_regs *regs)
-{
- u32 inst;
- u32 __user *p;
-
- p = (u32 __user *)regs->nip;
- if (!access_ok(VERIFY_READ, p, sizeof(*p)))
- return 0;
-
- if (__get_user(inst, p))
- return 0;
-
- return inst;
-}
-
-/**
- * @regs: regsiters at time of interrupt
- * @address: storage address
- * @error_code: Fault code, usually the DSISR or ESR depending on
- * processor type
- *
- * Return 0 if we are able to resolve the data storage fault that
- * results from a CT miss in the ACOP register.
- */
-int acop_handle_fault(struct pt_regs *regs, unsigned long address,
- unsigned long error_code)
-{
- int ct;
- u32 inst = 0;
-
- if (!cpu_has_feature(CPU_FTR_ICSWX)) {
- pr_info("No coprocessors available");
- _exception(SIGILL, regs, ILL_ILLOPN, address);
- }
-
- if (!user_mode(regs)) {
- /* this could happen if the HV denies the
- * kernel access, for now we just die */
- die("ICSWX from kernel failed", regs, SIGSEGV);
- }
-
- /* Some implementations leave us a hint for the CT */
- ct = ICSWX_GET_CT_HINT(error_code);
- if (ct < 0) {
- /* we have to peek at the instruction word to figure out CT */
- u32 ccw;
- u32 rs;
-
- inst = acop_get_inst(regs);
- if (inst == 0)
- return -1;
-
- rs = (inst >> (31 - 10)) & 0x1f;
- ccw = regs->gpr[rs];
- ct = (ccw >> 16) & 0x3f;
- }
-
- /*
- * We could be here because another thread has enabled acop
- * but the ACOP register has yet to be updated.
- *
- * This should have been taken care of by the IPI to sync all
- * the threads (see smp_call_function(sync_cop, mm, 1)), but
- * that could take forever if there are a significant amount
- * of threads.
- *
- * Given the number of threads on some of these systems,
- * perhaps this is the best way to sync ACOP rather than whack
- * every thread with an IPI.
- */
- if ((acop_copro_type_bit(ct) & current->active_mm->context.acop) != 0) {
- sync_cop(current->active_mm);
- return 0;
- }
-
- /* check for alternate policy */
- if (!acop_use_cop(ct))
- return 0;
-
- /* at this point the CT is unknown to the system */
- pr_warn("%s[%d]: Coprocessor %d is unavailable\n",
- current->comm, current->pid, ct);
-
- /* get inst if we don't already have it */
- if (inst == 0) {
- inst = acop_get_inst(regs);
- if (inst == 0)
- return -1;
- }
-
- /* Check if the instruction is the "record form" */
- if (inst & 1) {
- /*
- * the instruction is "record" form so we can reject
- * using CR0
- */
- regs->ccr &= ~(0xful << 28);
- regs->ccr |= ICSWX_RC_NOT_FOUND << 28;
-
- /* Move on to the next instruction */
- regs->nip += 4;
- } else {
- /*
- * There is no architected mechanism to report a bad
- * CT so we could either SIGILL or report nothing.
- * Since the non-record version should only bu used
- * for "hints" or "don't care" we should probably do
- * nothing. However, I could see how some people
- * might want an SIGILL so it here if you want it.
- */
-#ifdef CONFIG_PPC_ICSWX_USE_SIGILL
- _exception(SIGILL, regs, ILL_ILLOPN, address);
-#else
- regs->nip += 4;
-#endif
- }
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(acop_handle_fault);
diff --git a/arch/powerpc/mm/icswx.h b/arch/powerpc/mm/icswx.h
deleted file mode 100644
index 6dedc08e62c8..000000000000
--- a/arch/powerpc/mm/icswx.h
+++ /dev/null
@@ -1,68 +0,0 @@
-#ifndef _ARCH_POWERPC_MM_ICSWX_H_
-#define _ARCH_POWERPC_MM_ICSWX_H_
-
-/*
- * ICSWX and ACOP Management
- *
- * Copyright (C) 2011 Anton Blanchard, IBM Corp. <anton@samba.org>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- */
-
-#include <asm/mmu_context.h>
-
-/* also used to denote that PIDs are not used */
-#define COP_PID_NONE 0
-
-static inline void sync_cop(void *arg)
-{
- struct mm_struct *mm = arg;
-
- if (mm == current->active_mm)
- switch_cop(current->active_mm);
-}
-
-#ifdef CONFIG_PPC_ICSWX_PID
-extern int get_cop_pid(struct mm_struct *mm);
-extern int disable_cop_pid(struct mm_struct *mm);
-extern void free_cop_pid(int free_pid);
-#else
-#define get_cop_pid(m) (COP_PID_NONE)
-#define disable_cop_pid(m) (COP_PID_NONE)
-#define free_cop_pid(p)
-#endif
-
-/*
- * These are implementation bits for architected registers. If this
- * ever becomes architecture the should be moved to reg.h et. al.
- */
-/* UCT is the same bit for Server and Embedded */
-#define ICSWX_DSI_UCT 0x00004000 /* Unavailable Coprocessor Type */
-
-#ifdef CONFIG_PPC_BOOK3E
-/* Embedded implementation gives us no hints as to what the CT is */
-#define ICSWX_GET_CT_HINT(x) (-1)
-#else
-/* Server implementation contains the CT value in the DSISR */
-#define ICSWX_DSISR_CTMASK 0x00003f00
-#define ICSWX_GET_CT_HINT(x) (((x) & ICSWX_DSISR_CTMASK) >> 8)
-#endif
-
-#define ICSWX_RC_STARTED 0x8 /* The request has been started */
-#define ICSWX_RC_NOT_IDLE 0x4 /* No coprocessor found idle */
-#define ICSWX_RC_NOT_FOUND 0x2 /* No coprocessor found */
-#define ICSWX_RC_UNDEFINED 0x1 /* Reserved */
-
-extern int acop_handle_fault(struct pt_regs *regs, unsigned long address,
- unsigned long error_code);
-
-static inline u64 acop_copro_type_bit(unsigned int type)
-{
- return 1ULL << (63 - type);
-}
-
-#endif /* !_ARCH_POWERPC_MM_ICSWX_H_ */
diff --git a/arch/powerpc/mm/icswx_pid.c b/arch/powerpc/mm/icswx_pid.c
deleted file mode 100644
index 91e30eb7d054..000000000000
--- a/arch/powerpc/mm/icswx_pid.c
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * ICSWX and ACOP/PID Management
- *
- * Copyright (C) 2011 Anton Blanchard, IBM Corp. <anton@samba.org>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- */
-
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/spinlock.h>
-#include <linux/idr.h>
-#include <linux/module.h>
-#include "icswx.h"
-
-#define COP_PID_MIN (COP_PID_NONE + 1)
-#define COP_PID_MAX (0xFFFF)
-
-static DEFINE_SPINLOCK(mmu_context_acop_lock);
-static DEFINE_IDA(cop_ida);
-
-static int new_cop_pid(struct ida *ida, int min_id, int max_id,
- spinlock_t *lock)
-{
- int index;
- int err;
-
-again:
- if (!ida_pre_get(ida, GFP_KERNEL))
- return -ENOMEM;
-
- spin_lock(lock);
- err = ida_get_new_above(ida, min_id, &index);
- spin_unlock(lock);
-
- if (err == -EAGAIN)
- goto again;
- else if (err)
- return err;
-
- if (index > max_id) {
- spin_lock(lock);
- ida_remove(ida, index);
- spin_unlock(lock);
- return -ENOMEM;
- }
-
- return index;
-}
-
-int get_cop_pid(struct mm_struct *mm)
-{
- int pid;
-
- if (mm->context.cop_pid == COP_PID_NONE) {
- pid = new_cop_pid(&cop_ida, COP_PID_MIN, COP_PID_MAX,
- &mmu_context_acop_lock);
- if (pid >= 0)
- mm->context.cop_pid = pid;
- }
- return mm->context.cop_pid;
-}
-
-int disable_cop_pid(struct mm_struct *mm)
-{
- int free_pid = COP_PID_NONE;
-
- if ((!mm->context.acop) && (mm->context.cop_pid != COP_PID_NONE)) {
- free_pid = mm->context.cop_pid;
- mm->context.cop_pid = COP_PID_NONE;
- }
- return free_pid;
-}
-
-void free_cop_pid(int free_pid)
-{
- spin_lock(&mmu_context_acop_lock);
- ida_remove(&cop_ida, free_pid);
- spin_unlock(&mmu_context_acop_lock);
-}
diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c
new file mode 100644
index 000000000000..745097554bea
--- /dev/null
+++ b/arch/powerpc/mm/init-common.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PowerPC version
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ * and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ * Copyright (C) 1996 Paul Mackerras
+ *
+ * Derived from "arch/i386/mm/init.c"
+ * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
+ *
+ * Dave Engebretsen <engebret@us.ibm.com>
+ * Rework for PPC64 port.
+ */
+
+#undef DEBUG
+
+#include <linux/string.h>
+#include <linux/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/kup.h>
+#include <asm/smp.h>
+
+phys_addr_t memstart_addr __ro_after_init = (phys_addr_t)~0ull;
+EXPORT_SYMBOL_GPL(memstart_addr);
+phys_addr_t kernstart_addr __ro_after_init;
+EXPORT_SYMBOL_GPL(kernstart_addr);
+unsigned long kernstart_virt_addr __ro_after_init = KERNELBASE;
+EXPORT_SYMBOL_GPL(kernstart_virt_addr);
+
+bool disable_kuep = !IS_ENABLED(CONFIG_PPC_KUEP);
+bool disable_kuap = !IS_ENABLED(CONFIG_PPC_KUAP);
+#ifdef CONFIG_KFENCE
+bool __ro_after_init kfence_disabled;
+bool __ro_after_init kfence_early_init = !!CONFIG_KFENCE_SAMPLE_INTERVAL;
+#endif
+
+static int __init parse_nosmep(char *p)
+{
+ if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64))
+ return 0;
+
+ disable_kuep = true;
+ pr_warn("Disabling Kernel Userspace Execution Prevention\n");
+ return 0;
+}
+early_param("nosmep", parse_nosmep);
+
+static int __init parse_nosmap(char *p)
+{
+ disable_kuap = true;
+ pr_warn("Disabling Kernel Userspace Access Protection\n");
+ return 0;
+}
+early_param("nosmap", parse_nosmap);
+
+void __weak setup_kuep(bool disabled)
+{
+ if (!IS_ENABLED(CONFIG_PPC_KUEP) || disabled)
+ return;
+
+ if (smp_processor_id() != boot_cpuid)
+ return;
+
+ pr_info("Activating Kernel Userspace Execution Prevention\n");
+}
+
+void setup_kup(void)
+{
+ setup_kuap(disable_kuap);
+ setup_kuep(disable_kuep);
+}
+
+#define CTOR(shift) static void ctor_##shift(void *addr) \
+{ \
+ memset(addr, 0, sizeof(pgd_t) << (shift)); \
+}
+
+CTOR(0); CTOR(1); CTOR(2); CTOR(3); CTOR(4); CTOR(5); CTOR(6); CTOR(7);
+CTOR(8); CTOR(9); CTOR(10); CTOR(11); CTOR(12); CTOR(13); CTOR(14); CTOR(15);
+
+static inline void (*ctor(int shift))(void *)
+{
+ BUILD_BUG_ON(MAX_PGTABLE_INDEX_SIZE != 15);
+
+ switch (shift) {
+ case 0: return ctor_0;
+ case 1: return ctor_1;
+ case 2: return ctor_2;
+ case 3: return ctor_3;
+ case 4: return ctor_4;
+ case 5: return ctor_5;
+ case 6: return ctor_6;
+ case 7: return ctor_7;
+ case 8: return ctor_8;
+ case 9: return ctor_9;
+ case 10: return ctor_10;
+ case 11: return ctor_11;
+ case 12: return ctor_12;
+ case 13: return ctor_13;
+ case 14: return ctor_14;
+ case 15: return ctor_15;
+ }
+ return NULL;
+}
+
+struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE + 1];
+EXPORT_SYMBOL_GPL(pgtable_cache); /* used by kvm_hv module */
+
+/*
+ * Create a kmem_cache() for pagetables. This is not used for PTE
+ * pages - they're linked to struct page, come from the normal free
+ * pages pool and have a different entry size (see real_pte_t) to
+ * everything else. Caches created by this function are used for all
+ * the higher level pagetables, and for hugepage pagetables.
+ */
+void pgtable_cache_add(unsigned int shift)
+{
+ char *name;
+ unsigned long table_size = sizeof(pgd_t) << shift;
+ unsigned long align = table_size;
+
+ /* When batching pgtable pointers for RCU freeing, we store
+ * the index size in the low bits. Table alignment must be
+ * big enough to fit it.
+ */
+ unsigned long minalign = MAX_PGTABLE_INDEX_SIZE + 1;
+ struct kmem_cache *new = NULL;
+
+ /* It would be nice if this was a BUILD_BUG_ON(), but at the
+ * moment, gcc doesn't seem to recognize is_power_of_2 as a
+ * constant expression, so so much for that. */
+ BUG_ON(!is_power_of_2(minalign));
+ BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
+
+ if (PGT_CACHE(shift))
+ return; /* Already have a cache of this size */
+
+ align = max_t(unsigned long, align, minalign);
+ name = kasprintf(GFP_KERNEL, "pgtable-2^%d", shift);
+ if (name)
+ new = kmem_cache_create(name, table_size, align, 0, ctor(shift));
+ if (!new)
+ panic("Could not allocate pgtable cache for order %d", shift);
+
+ kfree(name);
+ pgtable_cache[shift] = new;
+
+ pr_debug("Allocated pgtable cache for order %d\n", shift);
+}
+EXPORT_SYMBOL_GPL(pgtable_cache_add); /* used by kvm_hv module */
+
+void pgtable_cache_init(void)
+{
+ pgtable_cache_add(PGD_INDEX_SIZE);
+
+ if (PMD_CACHE_INDEX)
+ pgtable_cache_add(PMD_CACHE_INDEX);
+ /*
+ * In all current configs, when the PUD index exists it's the
+ * same size as either the pgd or pmd index except with THP enabled
+ * on book3s 64
+ */
+ if (PUD_CACHE_INDEX)
+ pgtable_cache_add(PUD_CACHE_INDEX);
+}
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index cff59f1bec23..4e71dfe7d026 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* PowerPC version
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
@@ -9,12 +10,6 @@
*
* Derived from "arch/i386/mm/init.c"
* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
#include <linux/module.h>
@@ -26,7 +21,6 @@
#include <linux/mm.h>
#include <linux/stddef.h>
#include <linux/init.h>
-#include <linux/bootmem.h>
#include <linux/highmem.h>
#include <linux/initrd.h>
#include <linux/pagemap.h>
@@ -35,10 +29,7 @@
#include <linux/slab.h>
#include <linux/hugetlb.h>
-#include <asm/pgalloc.h>
-#include <asm/prom.h>
#include <asm/io.h>
-#include <asm/pgtable.h>
#include <asm/mmu.h>
#include <asm/smp.h>
#include <asm/machdep.h>
@@ -46,8 +37,11 @@
#include <asm/tlb.h>
#include <asm/sections.h>
#include <asm/hugetlb.h>
+#include <asm/kup.h>
+#include <asm/kasan.h>
+#include <asm/fixmap.h>
-#include "mmu_decl.h"
+#include <mm/mmu_decl.h>
#if defined(CONFIG_KERNEL_START_BOOL) || defined(CONFIG_LOWMEM_SIZE_BOOL)
/* The amount of lowmem must be within 0xF0000000 - KERNELBASE. */
@@ -60,12 +54,7 @@
phys_addr_t total_memory;
phys_addr_t total_lowmem;
-phys_addr_t memstart_addr = (phys_addr_t)~0ull;
-EXPORT_SYMBOL(memstart_addr);
-phys_addr_t kernstart_addr;
-EXPORT_SYMBOL(kernstart_addr);
-
-#ifdef CONFIG_RELOCATABLE_PPC32
+#ifdef CONFIG_RELOCATABLE
/* Used in __va()/__pa() */
long long virt_phys_offset;
EXPORT_SYMBOL(virt_phys_offset);
@@ -81,45 +70,10 @@ EXPORT_SYMBOL(agp_special_page);
void MMU_init(void);
-/* XXX should be in current.h -- paulus */
-extern struct task_struct *current_set[NR_CPUS];
-
-/*
- * this tells the system to map all of ram with the segregs
- * (i.e. page tables) instead of the bats.
- * -- Cort
- */
-int __map_without_bats;
-int __map_without_ltlbs;
-
-/*
- * This tells the system to allow ioremapping memory marked as reserved.
- */
-int __allow_ioremap_reserved;
-
/* max amount of low RAM to map in */
unsigned long __max_low_memory = MAX_LOW_MEM;
/*
- * Check for command-line options that affect what MMU_init will do.
- */
-void MMU_setup(void)
-{
- /* Check for nobats option (used in mapin_ram). */
- if (strstr(cmd_line, "nobats")) {
- __map_without_bats = 1;
- }
-
- if (strstr(cmd_line, "noltlbs")) {
- __map_without_ltlbs = 1;
- }
-#ifdef CONFIG_DEBUG_PAGEALLOC
- __map_without_bats = 1;
- __map_without_ltlbs = 1;
-#endif
-}
-
-/*
* MMU_init sets up the basic memory mappings for the kernel,
* including both RAM and possibly some I/O regions,
* and sets up the page tables and the MMU hardware ready to go.
@@ -129,33 +83,15 @@ void __init MMU_init(void)
if (ppc_md.progress)
ppc_md.progress("MMU:enter", 0x111);
- /* parse args from command line */
- MMU_setup();
-
- /*
- * Reserve gigantic pages for hugetlb. This MUST occur before
- * lowmem_end_addr is initialized below.
- */
- reserve_hugetlb_gpages();
-
- if (memblock.memory.cnt > 1) {
-#ifndef CONFIG_WII
- memblock_enforce_memory_limit(memblock.memory.regions[0].size);
- printk(KERN_WARNING "Only using first contiguous memory region");
-#else
- wii_memory_fixups();
-#endif
- }
-
total_lowmem = total_memory = memblock_end_of_DRAM() - memstart_addr;
lowmem_end_addr = memstart_addr + total_lowmem;
-#ifdef CONFIG_FSL_BOOKE
+#ifdef CONFIG_PPC_85xx
/* Freescale Book-E parts expect lowmem to be mapped by fixed TLB
* entries, so we need to adjust lowmem to match the amount we can map
* in the fixed entries */
adjust_total_lowmem();
-#endif /* CONFIG_FSL_BOOKE */
+#endif /* CONFIG_PPC_85xx */
if (total_lowmem > __max_low_memory) {
total_lowmem = __max_low_memory;
@@ -179,10 +115,6 @@ void __init MMU_init(void)
/* Initialize early top-down ioremap allocator */
ioremap_bot = IOREMAP_TOP;
- /* Map in I/O resources */
- if (ppc_md.progress)
- ppc_md.progress("MMU:setio", 0x302);
-
if (ppc_md.progress)
ppc_md.progress("MMU:exit", 0x211);
@@ -191,34 +123,12 @@ void __init MMU_init(void)
btext_unmap();
#endif
- /* Shortly after that, the entire linear mapping will be available */
- memblock_set_current_limit(lowmem_end_addr);
-}
+ kasan_mmu_init();
-/* This is only called until mem_init is done. */
-void __init *early_get_page(void)
-{
- if (init_bootmem_done)
- return alloc_bootmem_pages(PAGE_SIZE);
- else
- return __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE));
-}
+ setup_kup();
-#ifdef CONFIG_8xx /* No 8xx specific .c file to put that in ... */
-void setup_initial_memory_limit(phys_addr_t first_memblock_base,
- phys_addr_t first_memblock_size)
-{
- /* We don't currently support the first MEMBLOCK not mapping 0
- * physical on those processors
- */
- BUG_ON(first_memblock_base != 0);
-
-#ifdef CONFIG_PIN_TLB
- /* 8xx can only access 24MB at the moment */
- memblock_set_current_limit(min_t(u64, first_memblock_size, 0x01800000));
-#else
- /* 8xx can only access 8MB at the moment */
- memblock_set_current_limit(min_t(u64, first_memblock_size, 0x00800000));
-#endif
+ update_mmu_feature_fixups(MMU_FTR_KUAP);
+
+ /* Shortly after that, the entire linear mapping will be available */
+ memblock_set_current_limit(lowmem_end_addr);
}
-#endif /* CONFIG_8xx */
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 253b4b971c8a..b6f3ae03ca9e 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* PowerPC version
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
@@ -11,12 +12,6 @@
*
* Dave Engebretsen <engebret@us.ibm.com>
* Rework for PPC64 port.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
#undef DEBUG
@@ -34,7 +29,6 @@
#include <linux/vmalloc.h>
#include <linux/init.h>
#include <linux/delay.h>
-#include <linux/bootmem.h>
#include <linux/highmem.h>
#include <linux/idr.h>
#include <linux/nodemask.h>
@@ -43,6 +37,11 @@
#include <linux/memblock.h>
#include <linux/hugetlb.h>
#include <linux/slab.h>
+#include <linux/of_fdt.h>
+#include <linux/libfdt.h>
+#include <linux/memremap.h>
+#include <linux/memory.h>
+#include <linux/bootmem_info.h>
#include <asm/pgalloc.h>
#include <asm/page.h>
@@ -50,9 +49,8 @@
#include <asm/rtas.h>
#include <asm/io.h>
#include <asm/mmu_context.h>
-#include <asm/pgtable.h>
#include <asm/mmu.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/smp.h>
#include <asm/machdep.h>
#include <asm/tlb.h>
@@ -63,193 +61,80 @@
#include <asm/sections.h>
#include <asm/iommu.h>
#include <asm/vdso.h>
+#include <asm/hugetlb.h>
-#include "mmu_decl.h"
-
-#ifdef CONFIG_PPC_STD_MMU_64
-#if PGTABLE_RANGE > USER_VSID_RANGE
-#warning Limited user VSID range means pagetable space is wasted
-#endif
-
-#if (TASK_SIZE_USER64 < PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE)
-#warning TASK_SIZE is smaller than it needs to be.
-#endif
-#endif /* CONFIG_PPC_STD_MMU_64 */
-
-phys_addr_t memstart_addr = ~0;
-EXPORT_SYMBOL_GPL(memstart_addr);
-phys_addr_t kernstart_addr;
-EXPORT_SYMBOL_GPL(kernstart_addr);
-
-static void pgd_ctor(void *addr)
-{
- memset(addr, 0, PGD_TABLE_SIZE);
-}
-
-static void pmd_ctor(void *addr)
-{
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- memset(addr, 0, PMD_TABLE_SIZE * 2);
-#else
- memset(addr, 0, PMD_TABLE_SIZE);
-#endif
-}
-
-struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE];
-
-/*
- * Create a kmem_cache() for pagetables. This is not used for PTE
- * pages - they're linked to struct page, come from the normal free
- * pages pool and have a different entry size (see real_pte_t) to
- * everything else. Caches created by this function are used for all
- * the higher level pagetables, and for hugepage pagetables.
- */
-void pgtable_cache_add(unsigned shift, void (*ctor)(void *))
-{
- char *name;
- unsigned long table_size = sizeof(void *) << shift;
- unsigned long align = table_size;
-
- /* When batching pgtable pointers for RCU freeing, we store
- * the index size in the low bits. Table alignment must be
- * big enough to fit it.
- *
- * Likewise, hugeapge pagetable pointers contain a (different)
- * shift value in the low bits. All tables must be aligned so
- * as to leave enough 0 bits in the address to contain it. */
- unsigned long minalign = max(MAX_PGTABLE_INDEX_SIZE + 1,
- HUGEPD_SHIFT_MASK + 1);
- struct kmem_cache *new;
-
- /* It would be nice if this was a BUILD_BUG_ON(), but at the
- * moment, gcc doesn't seem to recognize is_power_of_2 as a
- * constant expression, so so much for that. */
- BUG_ON(!is_power_of_2(minalign));
- BUG_ON((shift < 1) || (shift > MAX_PGTABLE_INDEX_SIZE));
-
- if (PGT_CACHE(shift))
- return; /* Already have a cache of this size */
-
- align = max_t(unsigned long, align, minalign);
- name = kasprintf(GFP_KERNEL, "pgtable-2^%d", shift);
- new = kmem_cache_create(name, table_size, align, 0, ctor);
- pgtable_cache[shift - 1] = new;
- pr_debug("Allocated pgtable cache for order %d\n", shift);
-}
-
-
-void pgtable_cache_init(void)
-{
- pgtable_cache_add(PGD_INDEX_SIZE, pgd_ctor);
- pgtable_cache_add(PMD_CACHE_INDEX, pmd_ctor);
- if (!PGT_CACHE(PGD_INDEX_SIZE) || !PGT_CACHE(PMD_CACHE_INDEX))
- panic("Couldn't allocate pgtable caches");
- /* In all current configs, when the PUD index exists it's the
- * same size as either the pgd or pmd index. Verify that the
- * initialization above has also created a PUD cache. This
- * will need re-examiniation if we add new possibilities for
- * the pagetable layout. */
- BUG_ON(PUD_INDEX_SIZE && !PGT_CACHE(PUD_INDEX_SIZE));
-}
+#include <mm/mmu_decl.h>
#ifdef CONFIG_SPARSEMEM_VMEMMAP
/*
- * Given an address within the vmemmap, determine the pfn of the page that
- * represents the start of the section it is within. Note that we have to
+ * Given an address within the vmemmap, determine the page that
+ * represents the start of the subsection it is within. Note that we have to
* do this by hand as the proffered address may not be correctly aligned.
* Subtraction of non-aligned pointers produces undefined results.
*/
-static unsigned long __meminit vmemmap_section_start(unsigned long page)
+static struct page * __meminit vmemmap_subsection_start(unsigned long vmemmap_addr)
{
- unsigned long offset = page - ((unsigned long)(vmemmap));
+ unsigned long start_pfn;
+ unsigned long offset = vmemmap_addr - ((unsigned long)(vmemmap));
/* Return the pfn of the start of the section. */
- return (offset / sizeof(struct page)) & PAGE_SECTION_MASK;
+ start_pfn = (offset / sizeof(struct page)) & PAGE_SUBSECTION_MASK;
+ return pfn_to_page(start_pfn);
}
/*
- * Check if this vmemmap page is already initialised. If any section
- * which overlaps this vmemmap page is initialised then this page is
- * initialised already.
+ * Since memory is added in sub-section chunks, before creating a new vmemmap
+ * mapping, the kernel should check whether there is an existing memmap mapping
+ * covering the new subsection added. This is needed because kernel can map
+ * vmemmap area using 16MB pages which will cover a memory range of 16G. Such
+ * a range covers multiple subsections (2M)
+ *
+ * If any subsection in the 16G range mapped by vmemmap is valid we consider the
+ * vmemmap populated (There is a page table entry already present). We can't do
+ * a page table lookup here because with the hash translation we don't keep
+ * vmemmap details in linux page table.
*/
-static int __meminit vmemmap_populated(unsigned long start, int page_size)
+int __meminit vmemmap_populated(unsigned long vmemmap_addr, int vmemmap_map_size)
{
- unsigned long end = start + page_size;
- start = (unsigned long)(pfn_to_page(vmemmap_section_start(start)));
+ struct page *start;
+ unsigned long vmemmap_end = vmemmap_addr + vmemmap_map_size;
+ start = vmemmap_subsection_start(vmemmap_addr);
- for (; start < end; start += (PAGES_PER_SECTION * sizeof(struct page)))
- if (pfn_valid(page_to_pfn((struct page *)start)))
+ for (; (unsigned long)start < vmemmap_end; start += PAGES_PER_SUBSECTION)
+ /*
+ * pfn valid check here is intended to really check
+ * whether we have any subsection already initialized
+ * in this range.
+ */
+ if (pfn_valid(page_to_pfn(start)))
return 1;
return 0;
}
-/* On hash-based CPUs, the vmemmap is bolted in the hash table.
- *
- * On Book3E CPUs, the vmemmap is currently mapped in the top half of
- * the vmalloc space using normal page tables, though the size of
- * pages encoded in the PTEs can be different
+/*
+ * vmemmap virtual address space management does not have a traditional page
+ * table to track which virtual struct pages are backed by physical mapping.
+ * The virtual to physical mappings are tracked in a simple linked list
+ * format. 'vmemmap_list' maintains the entire vmemmap physical mapping at
+ * all times where as the 'next' list maintains the available
+ * vmemmap_backing structures which have been deleted from the
+ * 'vmemmap_global' list during system runtime (memory hotplug remove
+ * operation). The freed 'vmemmap_backing' structures are reused later when
+ * new requests come in without allocating fresh memory. This pointer also
+ * tracks the allocated 'vmemmap_backing' structures as we allocate one
+ * full page memory at a time when we dont have any.
*/
-
-#ifdef CONFIG_PPC_BOOK3E
-static void __meminit vmemmap_create_mapping(unsigned long start,
- unsigned long page_size,
- unsigned long phys)
-{
- /* Create a PTE encoding without page size */
- unsigned long i, flags = _PAGE_PRESENT | _PAGE_ACCESSED |
- _PAGE_KERNEL_RW;
-
- /* PTEs only contain page size encodings up to 32M */
- BUG_ON(mmu_psize_defs[mmu_vmemmap_psize].enc > 0xf);
-
- /* Encode the size in the PTE */
- flags |= mmu_psize_defs[mmu_vmemmap_psize].enc << 8;
-
- /* For each PTE for that area, map things. Note that we don't
- * increment phys because all PTEs are of the large size and
- * thus must have the low bits clear
- */
- for (i = 0; i < page_size; i += PAGE_SIZE)
- BUG_ON(map_kernel_page(start + i, phys, flags));
-}
-
-#ifdef CONFIG_MEMORY_HOTPLUG
-static void vmemmap_remove_mapping(unsigned long start,
- unsigned long page_size)
-{
-}
-#endif
-#else /* CONFIG_PPC_BOOK3E */
-static void __meminit vmemmap_create_mapping(unsigned long start,
- unsigned long page_size,
- unsigned long phys)
-{
- int mapped = htab_bolt_mapping(start, start + page_size, phys,
- pgprot_val(PAGE_KERNEL),
- mmu_vmemmap_psize,
- mmu_kernel_ssize);
- BUG_ON(mapped < 0);
-}
-
-#ifdef CONFIG_MEMORY_HOTPLUG
-extern int htab_remove_mapping(unsigned long vstart, unsigned long vend,
- int psize, int ssize);
-
-static void vmemmap_remove_mapping(unsigned long start,
- unsigned long page_size)
-{
- int mapped = htab_remove_mapping(start, start + page_size,
- mmu_vmemmap_psize,
- mmu_kernel_ssize);
- BUG_ON(mapped < 0);
-}
-#endif
-
-#endif /* CONFIG_PPC_BOOK3E */
-
struct vmemmap_backing *vmemmap_list;
static struct vmemmap_backing *next;
+
+/*
+ * The same pointer 'next' tracks individual chunks inside the allocated
+ * full page during the boot time and again tracks the freed nodes during
+ * runtime. It is racy but it does not happen as they are separated by the
+ * boot process. Will create problem if some how we have memory hotplug
+ * operation during boot !!
+ */
static int num_left;
static int num_freed;
@@ -280,16 +165,16 @@ static __meminit struct vmemmap_backing * vmemmap_list_alloc(int node)
return next++;
}
-static __meminit void vmemmap_list_populate(unsigned long phys,
- unsigned long start,
- int node)
+static __meminit int vmemmap_list_populate(unsigned long phys,
+ unsigned long start,
+ int node)
{
struct vmemmap_backing *vmem_back;
vmem_back = vmemmap_list_alloc(node);
if (unlikely(!vmem_back)) {
- WARN_ON(1);
- return;
+ pr_debug("vmemap list allocation failed\n");
+ return -ENOMEM;
}
vmem_back->phys = phys;
@@ -297,38 +182,110 @@ static __meminit void vmemmap_list_populate(unsigned long phys,
vmem_back->list = vmemmap_list;
vmemmap_list = vmem_back;
+ return 0;
}
-int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
+bool altmap_cross_boundary(struct vmem_altmap *altmap, unsigned long start,
+ unsigned long page_size)
{
+ unsigned long nr_pfn = page_size / sizeof(struct page);
+ unsigned long start_pfn = page_to_pfn((struct page *)start);
+
+ if ((start_pfn + nr_pfn - 1) > altmap->end_pfn)
+ return true;
+
+ if (start_pfn < altmap->base_pfn)
+ return true;
+
+ return false;
+}
+
+static int __meminit __vmemmap_populate(unsigned long start, unsigned long end, int node,
+ struct vmem_altmap *altmap)
+{
+ bool altmap_alloc;
unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
/* Align to the page size of the linear mapping. */
- start = _ALIGN_DOWN(start, page_size);
+ start = ALIGN_DOWN(start, page_size);
pr_debug("vmemmap_populate %lx..%lx, node %d\n", start, end, node);
for (; start < end; start += page_size) {
- void *p;
+ void *p = NULL;
+ int rc;
+ /*
+ * This vmemmap range is backing different subsections. If any
+ * of that subsection is marked valid, that means we already
+ * have initialized a page table covering this range and hence
+ * the vmemmap range is populated.
+ */
if (vmemmap_populated(start, page_size))
continue;
- p = vmemmap_alloc_block(page_size, node);
+ /*
+ * Allocate from the altmap first if we have one. This may
+ * fail due to alignment issues when using 16MB hugepages, so
+ * fall back to system memory if the altmap allocation fail.
+ */
+ if (altmap && !altmap_cross_boundary(altmap, start, page_size)) {
+ p = vmemmap_alloc_block_buf(page_size, node, altmap);
+ if (!p)
+ pr_debug("altmap block allocation failed, falling back to system memory");
+ else
+ altmap_alloc = true;
+ }
+ if (!p) {
+ p = vmemmap_alloc_block_buf(page_size, node, NULL);
+ altmap_alloc = false;
+ }
if (!p)
return -ENOMEM;
- vmemmap_list_populate(__pa(p), start, node);
+ if (vmemmap_list_populate(__pa(p), start, node)) {
+ /*
+ * If we don't populate vmemap list, we don't have
+ * the ability to free the allocated vmemmap
+ * pages in section_deactivate. Hence free them
+ * here.
+ */
+ int nr_pfns = page_size >> PAGE_SHIFT;
+ unsigned long page_order = get_order(page_size);
+
+ if (altmap_alloc)
+ vmem_altmap_free(altmap, nr_pfns);
+ else
+ free_pages((unsigned long)p, page_order);
+ return -ENOMEM;
+ }
pr_debug(" * %016lx..%016lx allocated at %p\n",
start, start + page_size, p);
- vmemmap_create_mapping(start, page_size, __pa(p));
+ rc = vmemmap_create_mapping(start, page_size, __pa(p));
+ if (rc < 0) {
+ pr_warn("%s: Unable to create vmemmap mapping: %d\n",
+ __func__, rc);
+ return -EFAULT;
+ }
}
return 0;
}
+int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
+ struct vmem_altmap *altmap)
+{
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ if (radix_enabled())
+ return radix__vmemmap_populate(start, end, node, altmap);
+#endif
+
+ return __vmemmap_populate(start, end, node, altmap);
+}
+
#ifdef CONFIG_MEMORY_HOTPLUG
static unsigned long vmemmap_list_free(unsigned long start)
{
@@ -343,10 +300,8 @@ static unsigned long vmemmap_list_free(unsigned long start)
vmem_back_prev = vmem_back;
}
- if (unlikely(!vmem_back)) {
- WARN_ON(1);
+ if (unlikely(!vmem_back))
return 0;
- }
/* remove it from vmemmap_list */
if (vmem_back == vmemmap_list) /* remove head */
@@ -362,104 +317,364 @@ static unsigned long vmemmap_list_free(unsigned long start)
return vmem_back->phys;
}
-void __ref vmemmap_free(unsigned long start, unsigned long end)
+static void __ref __vmemmap_free(unsigned long start, unsigned long end,
+ struct vmem_altmap *altmap)
{
unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
-
- start = _ALIGN_DOWN(start, page_size);
+ unsigned long page_order = get_order(page_size);
+ unsigned long alt_start = ~0, alt_end = ~0;
+ unsigned long base_pfn;
+
+ start = ALIGN_DOWN(start, page_size);
+ if (altmap) {
+ alt_start = altmap->base_pfn;
+ alt_end = altmap->base_pfn + altmap->reserve + altmap->free;
+ }
pr_debug("vmemmap_free %lx...%lx\n", start, end);
for (; start < end; start += page_size) {
- unsigned long addr;
+ unsigned long nr_pages, addr;
+ struct page *page;
/*
- * the section has already be marked as invalid, so
- * vmemmap_populated() true means some other sections still
- * in this page, so skip it.
+ * We have already marked the subsection we are trying to remove
+ * invalid. So if we want to remove the vmemmap range, we
+ * need to make sure there is no subsection marked valid
+ * in this range.
*/
if (vmemmap_populated(start, page_size))
continue;
addr = vmemmap_list_free(start);
- if (addr) {
- struct page *page = pfn_to_page(addr >> PAGE_SHIFT);
-
- if (PageReserved(page)) {
- /* allocated from bootmem */
- if (page_size < PAGE_SIZE) {
- /*
- * this shouldn't happen, but if it is
- * the case, leave the memory there
- */
- WARN_ON_ONCE(1);
- } else {
- unsigned int nr_pages =
- 1 << get_order(page_size);
- while (nr_pages--)
- free_reserved_page(page++);
- }
- } else
- free_pages((unsigned long)(__va(addr)),
- get_order(page_size));
-
- vmemmap_remove_mapping(start, page_size);
+ if (!addr)
+ continue;
+
+ page = pfn_to_page(addr >> PAGE_SHIFT);
+ nr_pages = 1 << page_order;
+ base_pfn = PHYS_PFN(addr);
+
+ if (base_pfn >= alt_start && base_pfn < alt_end) {
+ vmem_altmap_free(altmap, nr_pages);
+ } else if (PageReserved(page)) {
+ /* allocated from bootmem */
+ if (page_size < PAGE_SIZE) {
+ /*
+ * this shouldn't happen, but if it is
+ * the case, leave the memory there
+ */
+ WARN_ON_ONCE(1);
+ } else {
+ while (nr_pages--)
+ free_reserved_page(page++);
+ }
+ } else {
+ free_pages((unsigned long)(__va(addr)), page_order);
}
+
+ vmemmap_remove_mapping(start, page_size);
}
}
+
+void __ref vmemmap_free(unsigned long start, unsigned long end,
+ struct vmem_altmap *altmap)
+{
+#ifdef CONFIG_PPC_BOOK3S_64
+ if (radix_enabled())
+ return radix__vmemmap_free(start, end, altmap);
+#endif
+ return __vmemmap_free(start, end, altmap);
+}
+
#endif
+
+#ifdef CONFIG_HAVE_BOOTMEM_INFO_NODE
void register_page_bootmem_memmap(unsigned long section_nr,
struct page *start_page, unsigned long size)
{
}
+#endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */
+
+#endif /* CONFIG_SPARSEMEM_VMEMMAP */
+
+#ifdef CONFIG_PPC_BOOK3S_64
+unsigned int mmu_lpid_bits;
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+EXPORT_SYMBOL_GPL(mmu_lpid_bits);
+#endif
+unsigned int mmu_pid_bits;
+
+static bool disable_radix = !IS_ENABLED(CONFIG_PPC_RADIX_MMU_DEFAULT);
+
+static int __init parse_disable_radix(char *p)
+{
+ bool val;
+
+ if (!p)
+ val = true;
+ else if (kstrtobool(p, &val))
+ return -EINVAL;
+
+ disable_radix = val;
+
+ return 0;
+}
+early_param("disable_radix", parse_disable_radix);
/*
- * We do not have access to the sparsemem vmemmap, so we fallback to
- * walking the list of sparsemem blocks which we already maintain for
- * the sake of crashdump. In the long run, we might want to maintain
- * a tree if performance of that linear walk becomes a problem.
- *
- * realmode_pfn_to_page functions can fail due to:
- * 1) As real sparsemem blocks do not lay in RAM continously (they
- * are in virtual address space which is not available in the real mode),
- * the requested page struct can be split between blocks so get_page/put_page
- * may fail.
- * 2) When huge pages are used, the get_page/put_page API will fail
- * in real mode as the linked addresses in the page struct are virtual
- * too.
+ * If we're running under a hypervisor, we need to check the contents of
+ * /chosen/ibm,architecture-vec-5 to see if the hypervisor is willing to do
+ * radix. If not, we clear the radix feature bit so we fall back to hash.
*/
-struct page *realmode_pfn_to_page(unsigned long pfn)
+static void __init early_check_vec5(void)
{
- struct vmemmap_backing *vmem_back;
- struct page *page;
- unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
- unsigned long pg_va = (unsigned long) pfn_to_page(pfn);
-
- for (vmem_back = vmemmap_list; vmem_back; vmem_back = vmem_back->list) {
- if (pg_va < vmem_back->virt_addr)
- continue;
+ unsigned long root, chosen;
+ int size;
+ const u8 *vec5;
+ u8 mmu_supported;
+
+ root = of_get_flat_dt_root();
+ chosen = of_get_flat_dt_subnode_by_name(root, "chosen");
+ if (chosen == -FDT_ERR_NOTFOUND) {
+ cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
+ return;
+ }
+ vec5 = of_get_flat_dt_prop(chosen, "ibm,architecture-vec-5", &size);
+ if (!vec5) {
+ cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
+ return;
+ }
+ if (size <= OV5_INDX(OV5_MMU_SUPPORT)) {
+ cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
+ return;
+ }
- /* After vmemmap_list entry free is possible, need check all */
- if ((pg_va + sizeof(struct page)) <=
- (vmem_back->virt_addr + page_size)) {
- page = (struct page *) (vmem_back->phys + pg_va -
- vmem_back->virt_addr);
- return page;
+ /* Check for supported configuration */
+ mmu_supported = vec5[OV5_INDX(OV5_MMU_SUPPORT)] &
+ OV5_FEAT(OV5_MMU_SUPPORT);
+ if (mmu_supported == OV5_FEAT(OV5_MMU_RADIX)) {
+ /* Hypervisor only supports radix - check enabled && GTSE */
+ if (!early_radix_enabled()) {
+ pr_warn("WARNING: Ignoring cmdline option disable_radix\n");
}
+ if (!(vec5[OV5_INDX(OV5_RADIX_GTSE)] &
+ OV5_FEAT(OV5_RADIX_GTSE))) {
+ cur_cpu_spec->mmu_features &= ~MMU_FTR_GTSE;
+ } else
+ cur_cpu_spec->mmu_features |= MMU_FTR_GTSE;
+ /* Do radix anyway - the hypervisor said we had to */
+ cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX;
+ } else if (mmu_supported == OV5_FEAT(OV5_MMU_HASH)) {
+ /* Hypervisor only supports hash - disable radix */
+ cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
+ cur_cpu_spec->mmu_features &= ~MMU_FTR_GTSE;
}
+}
+
+static int __init dt_scan_mmu_pid_width(unsigned long node,
+ const char *uname, int depth,
+ void *data)
+{
+ int size = 0;
+ const __be32 *prop;
+ const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+
+ /* We are scanning "cpu" nodes only */
+ if (type == NULL || strcmp(type, "cpu") != 0)
+ return 0;
+
+ /* Find MMU LPID, PID register size */
+ prop = of_get_flat_dt_prop(node, "ibm,mmu-lpid-bits", &size);
+ if (prop && size == 4)
+ mmu_lpid_bits = be32_to_cpup(prop);
+
+ prop = of_get_flat_dt_prop(node, "ibm,mmu-pid-bits", &size);
+ if (prop && size == 4)
+ mmu_pid_bits = be32_to_cpup(prop);
+
+ if (!mmu_pid_bits && !mmu_lpid_bits)
+ return 0;
+
+ return 1;
+}
- /* Probably that page struct is split between real pages */
- return NULL;
+/*
+ * Outside hotplug the kernel uses this value to map the kernel direct map
+ * with radix. To be compatible with older kernels, let's keep this value
+ * as 16M which is also SECTION_SIZE with SPARSEMEM. We can ideally map
+ * things with 1GB size in the case where we don't support hotplug.
+ */
+#ifndef CONFIG_MEMORY_HOTPLUG
+#define DEFAULT_MEMORY_BLOCK_SIZE SZ_16M
+#else
+#define DEFAULT_MEMORY_BLOCK_SIZE MIN_MEMORY_BLOCK_SIZE
+#endif
+
+static void update_memory_block_size(unsigned long *block_size, unsigned long mem_size)
+{
+ unsigned long min_memory_block_size = DEFAULT_MEMORY_BLOCK_SIZE;
+
+ for (; *block_size > min_memory_block_size; *block_size >>= 2) {
+ if ((mem_size & *block_size) == 0)
+ break;
+ }
}
-EXPORT_SYMBOL_GPL(realmode_pfn_to_page);
-#elif defined(CONFIG_FLATMEM)
+static int __init probe_memory_block_size(unsigned long node, const char *uname, int
+ depth, void *data)
+{
+ const char *type;
+ unsigned long *block_size = (unsigned long *)data;
+ const __be32 *reg, *endp;
+ int l;
+
+ if (depth != 1)
+ return 0;
+ /*
+ * If we have dynamic-reconfiguration-memory node, use the
+ * lmb value.
+ */
+ if (strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0) {
+
+ const __be32 *prop;
+
+ prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &l);
+
+ if (!prop || l < dt_root_size_cells * sizeof(__be32))
+ /*
+ * Nothing in the device tree
+ */
+ *block_size = DEFAULT_MEMORY_BLOCK_SIZE;
+ else
+ *block_size = of_read_number(prop, dt_root_size_cells);
+ /*
+ * We have found the final value. Don't probe further.
+ */
+ return 1;
+ }
+ /*
+ * Find all the device tree nodes of memory type and make sure
+ * the area can be mapped using the memory block size value
+ * we end up using. We start with 1G value and keep reducing
+ * it such that we can map the entire area using memory_block_size.
+ * This will be used on powernv and older pseries that don't
+ * have ibm,lmb-size node.
+ * For ex: with P5 we can end up with
+ * memory@0 -> 128MB
+ * memory@128M -> 64M
+ * This will end up using 64MB memory block size value.
+ */
+ type = of_get_flat_dt_prop(node, "device_type", NULL);
+ if (type == NULL || strcmp(type, "memory") != 0)
+ return 0;
+
+ reg = of_get_flat_dt_prop(node, "linux,usable-memory", &l);
+ if (!reg)
+ reg = of_get_flat_dt_prop(node, "reg", &l);
+ if (!reg)
+ return 0;
+
+ endp = reg + (l / sizeof(__be32));
+ while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) {
+ const char *compatible;
+ u64 size;
+
+ dt_mem_next_cell(dt_root_addr_cells, &reg);
+ size = dt_mem_next_cell(dt_root_size_cells, &reg);
+
+ if (size) {
+ update_memory_block_size(block_size, size);
+ continue;
+ }
+ /*
+ * ibm,coherent-device-memory with linux,usable-memory = 0
+ * Force 256MiB block size. Work around for GPUs on P9 PowerNV
+ * linux,usable-memory == 0 implies driver managed memory and
+ * we can't use large memory block size due to hotplug/unplug
+ * limitations.
+ */
+ compatible = of_get_flat_dt_prop(node, "compatible", NULL);
+ if (compatible && !strcmp(compatible, "ibm,coherent-device-memory")) {
+ if (*block_size > SZ_256M)
+ *block_size = SZ_256M;
+ /*
+ * We keep 256M as the upper limit with GPU present.
+ */
+ return 0;
+ }
+ }
+ /* continue looking for other memory device types */
+ return 0;
+}
-struct page *realmode_pfn_to_page(unsigned long pfn)
+/*
+ * start with 1G memory block size. Early init will
+ * fix this with correct value.
+ */
+unsigned long memory_block_size __ro_after_init = 1UL << 30;
+static void __init early_init_memory_block_size(void)
{
- struct page *page = pfn_to_page(pfn);
- return page;
+ /*
+ * We need to do memory_block_size probe early so that
+ * radix__early_init_mmu() can use this as limit for
+ * mapping page size.
+ */
+ of_scan_flat_dt(probe_memory_block_size, &memory_block_size);
}
-EXPORT_SYMBOL_GPL(realmode_pfn_to_page);
-#endif /* CONFIG_SPARSEMEM_VMEMMAP/CONFIG_FLATMEM */
+void __init mmu_early_init_devtree(void)
+{
+ bool hvmode = !!(mfmsr() & MSR_HV);
+
+ /* Disable radix mode based on kernel command line. */
+ if (disable_radix) {
+ if (IS_ENABLED(CONFIG_PPC_64S_HASH_MMU))
+ cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
+ else
+ pr_warn("WARNING: Ignoring cmdline option disable_radix\n");
+ }
+
+ of_scan_flat_dt(dt_scan_mmu_pid_width, NULL);
+ if (hvmode && !mmu_lpid_bits) {
+ if (early_cpu_has_feature(CPU_FTR_ARCH_207S))
+ mmu_lpid_bits = 12; /* POWER8-10 */
+ else
+ mmu_lpid_bits = 10; /* POWER7 */
+ }
+ if (!mmu_pid_bits) {
+ if (early_cpu_has_feature(CPU_FTR_ARCH_300))
+ mmu_pid_bits = 20; /* POWER9-10 */
+ }
+
+ /*
+ * Check /chosen/ibm,architecture-vec-5 if running as a guest.
+ * When running bare-metal, we can use radix if we like
+ * even though the ibm,architecture-vec-5 property created by
+ * skiboot doesn't have the necessary bits set.
+ */
+ if (!hvmode)
+ early_check_vec5();
+
+ early_init_memory_block_size();
+
+ if (early_radix_enabled()) {
+ radix__early_init_devtree();
+
+ /*
+ * We have finalized the translation we are going to use by now.
+ * Radix mode is not limited by RMA / VRMA addressing.
+ * Hence don't limit memblock allocations.
+ */
+ ppc64_rma_size = ULONG_MAX;
+ memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
+ } else
+ hash__early_init_devtree();
+
+ if (IS_ENABLED(CONFIG_HUGETLB_PAGE_SIZE_VARIABLE))
+ hugetlbpage_init_defaultsize();
+
+ if (!(cur_cpu_spec->mmu_features & MMU_FTR_HPTE_TABLE) &&
+ !(cur_cpu_spec->mmu_features & MMU_FTR_TYPE_RADIX))
+ panic("kernel does not support any MMU type offered by platform");
+}
+#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/mm/ioremap.c b/arch/powerpc/mm/ioremap.c
new file mode 100644
index 000000000000..4b4feba9873b
--- /dev/null
+++ b/arch/powerpc/mm/ioremap.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/mmzone.h>
+#include <linux/vmalloc.h>
+
+unsigned long ioremap_bot;
+EXPORT_SYMBOL(ioremap_bot);
+
+void __iomem *ioremap(phys_addr_t addr, unsigned long size)
+{
+ pgprot_t prot = pgprot_noncached(PAGE_KERNEL);
+ void *caller = __builtin_return_address(0);
+
+ return __ioremap_caller(addr, size, prot, caller);
+}
+EXPORT_SYMBOL(ioremap);
+
+void __iomem *ioremap_wc(phys_addr_t addr, unsigned long size)
+{
+ pgprot_t prot = pgprot_noncached_wc(PAGE_KERNEL);
+ void *caller = __builtin_return_address(0);
+
+ return __ioremap_caller(addr, size, prot, caller);
+}
+EXPORT_SYMBOL(ioremap_wc);
+
+void __iomem *ioremap_coherent(phys_addr_t addr, unsigned long size)
+{
+ pgprot_t prot = pgprot_cached(PAGE_KERNEL);
+ void *caller = __builtin_return_address(0);
+
+ return __ioremap_caller(addr, size, prot, caller);
+}
+
+void __iomem *ioremap_prot(phys_addr_t addr, size_t size, pgprot_t prot)
+{
+ pte_t pte = __pte(pgprot_val(prot));
+ void *caller = __builtin_return_address(0);
+
+ /* writeable implies dirty for kernel addresses */
+ if (pte_write(pte))
+ pte = pte_mkdirty(pte);
+
+ return __ioremap_caller(addr, size, pte_pgprot(pte), caller);
+}
+EXPORT_SYMBOL(ioremap_prot);
+
+int early_ioremap_range(unsigned long ea, phys_addr_t pa,
+ unsigned long size, pgprot_t prot)
+{
+ unsigned long i;
+
+ for (i = 0; i < size; i += PAGE_SIZE) {
+ int err = map_kernel_page(ea + i, pa + i, pgprot_nx(prot));
+
+ if (WARN_ON_ONCE(err)) /* Should clean up */
+ return err;
+ }
+
+ return 0;
+}
diff --git a/arch/powerpc/mm/ioremap_32.c b/arch/powerpc/mm/ioremap_32.c
new file mode 100644
index 000000000000..ca5bc6be3e6f
--- /dev/null
+++ b/arch/powerpc/mm/ioremap_32.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+#include <mm/mmu_decl.h>
+
+void __iomem *ioremap_wt(phys_addr_t addr, unsigned long size)
+{
+ pgprot_t prot = pgprot_cached_wthru(PAGE_KERNEL);
+
+ return __ioremap_caller(addr, size, prot, __builtin_return_address(0));
+}
+EXPORT_SYMBOL(ioremap_wt);
+
+void __iomem *
+__ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *caller)
+{
+ unsigned long v;
+ phys_addr_t p, offset;
+ int err;
+
+ /*
+ * If the address lies within the first 16 MB, assume it's in ISA
+ * memory space
+ */
+ if (addr < SZ_16M)
+ addr += _ISA_MEM_BASE;
+
+ /*
+ * Choose an address to map it to.
+ * Once the vmalloc system is running, we use it.
+ * Before then, we use space going down from IOREMAP_TOP
+ * (ioremap_bot records where we're up to).
+ */
+ p = addr & PAGE_MASK;
+ offset = addr & ~PAGE_MASK;
+ size = PAGE_ALIGN(addr + size) - p;
+
+#ifndef CONFIG_CRASH_DUMP
+ /*
+ * Don't allow anybody to remap normal RAM that we're using.
+ * mem_init() sets high_memory so only do the check after that.
+ */
+ if (slab_is_available() && p <= virt_to_phys(high_memory - 1) &&
+ page_is_ram(__phys_to_pfn(p))) {
+ pr_warn("%s(): phys addr 0x%llx is RAM lr %ps\n", __func__,
+ (unsigned long long)p, __builtin_return_address(0));
+ return NULL;
+ }
+#endif
+
+ if (size == 0)
+ return NULL;
+
+ /*
+ * Is it already mapped? Perhaps overlapped by a previous
+ * mapping.
+ */
+ v = p_block_mapped(p);
+ if (v)
+ return (void __iomem *)v + offset;
+
+ if (slab_is_available())
+ return generic_ioremap_prot(addr, size, prot);
+
+ /*
+ * Should check if it is a candidate for a BAT mapping
+ */
+ pr_warn("ioremap() called early from %pS. Use early_ioremap() instead\n", caller);
+
+ err = early_ioremap_range(ioremap_bot - size - PAGE_SIZE, p, size, prot);
+ if (err)
+ return NULL;
+ ioremap_bot -= size + PAGE_SIZE;
+
+ return (void __iomem *)ioremap_bot + offset;
+}
+
+void iounmap(volatile void __iomem *addr)
+{
+ /*
+ * If mapped by BATs then there is nothing to do.
+ * Calling vfree() generates a benign warning.
+ */
+ if (v_block_mapped((unsigned long)addr))
+ return;
+
+ generic_iounmap(addr);
+}
+EXPORT_SYMBOL(iounmap);
diff --git a/arch/powerpc/mm/ioremap_64.c b/arch/powerpc/mm/ioremap_64.c
new file mode 100644
index 000000000000..fb8b55bd2cd5
--- /dev/null
+++ b/arch/powerpc/mm/ioremap_64.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+void __iomem *__ioremap_caller(phys_addr_t addr, unsigned long size,
+ pgprot_t prot, void *caller)
+{
+ phys_addr_t paligned, offset;
+ void __iomem *ret;
+ int err;
+
+ /* We don't support the 4K PFN hack with ioremap */
+ if (pgprot_val(prot) & H_PAGE_4K_PFN)
+ return NULL;
+
+ /*
+ * Choose an address to map it to. Once the vmalloc system is running,
+ * we use it. Before that, we map using addresses going up from
+ * ioremap_bot. vmalloc will use the addresses from IOREMAP_BASE
+ * through ioremap_bot.
+ */
+ paligned = addr & PAGE_MASK;
+ offset = addr & ~PAGE_MASK;
+ size = PAGE_ALIGN(addr + size) - paligned;
+
+ if (size == 0 || paligned == 0)
+ return NULL;
+
+ if (slab_is_available())
+ return generic_ioremap_prot(addr, size, prot);
+
+ pr_warn("ioremap() called early from %pS. Use early_ioremap() instead\n", caller);
+
+ err = early_ioremap_range(ioremap_bot, paligned, size, prot);
+ if (err)
+ return NULL;
+
+ ret = (void __iomem *)ioremap_bot + offset;
+ ioremap_bot += size + PAGE_SIZE;
+
+ return ret;
+}
+
+/*
+ * Unmap an IO region and remove it from vmalloc'd list.
+ * Access to IO memory should be serialized by driver.
+ */
+void iounmap(volatile void __iomem *token)
+{
+ if (!slab_is_available())
+ return;
+
+ generic_iounmap(token);
+}
+EXPORT_SYMBOL(iounmap);
diff --git a/arch/powerpc/mm/kasan/8xx.c b/arch/powerpc/mm/kasan/8xx.c
new file mode 100644
index 000000000000..989d6cdf4141
--- /dev/null
+++ b/arch/powerpc/mm/kasan/8xx.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define DISABLE_BRANCH_PROFILING
+
+#include <linux/kasan.h>
+#include <linux/memblock.h>
+#include <linux/hugetlb.h>
+
+#include <asm/pgalloc.h>
+
+static int __init
+kasan_init_shadow_8M(unsigned long k_start, unsigned long k_end, void *block)
+{
+ pmd_t *pmd = pmd_off_k(k_start);
+ unsigned long k_cur, k_next;
+
+ for (k_cur = k_start; k_cur != k_end; k_cur = k_next, pmd++, block += SZ_4M) {
+ pte_t *ptep;
+ int i;
+
+ k_next = pgd_addr_end(k_cur, k_end);
+ if ((void *)pmd_page_vaddr(*pmd) != kasan_early_shadow_pte)
+ continue;
+
+ ptep = memblock_alloc(PTE_FRAG_SIZE, PTE_FRAG_SIZE);
+ if (!ptep)
+ return -ENOMEM;
+
+ for (i = 0; i < PTRS_PER_PTE; i++) {
+ pte_t pte = pte_mkhuge(pfn_pte(PHYS_PFN(__pa(block + i * PAGE_SIZE)), PAGE_KERNEL));
+
+ __set_pte_at(&init_mm, k_cur, ptep + i, pte, 1);
+ }
+ pmd_populate_kernel(&init_mm, pmd, ptep);
+ *pmd = __pmd(pmd_val(*pmd) | _PMD_PAGE_8M);
+ }
+ return 0;
+}
+
+int __init kasan_init_region(void *start, size_t size)
+{
+ unsigned long k_start = (unsigned long)kasan_mem_to_shadow(start);
+ unsigned long k_end = (unsigned long)kasan_mem_to_shadow(start + size);
+ unsigned long k_cur;
+ int ret;
+ void *block;
+
+ block = memblock_alloc(k_end - k_start, SZ_8M);
+ if (!block)
+ return -ENOMEM;
+
+ if (IS_ALIGNED(k_start, SZ_8M)) {
+ kasan_init_shadow_8M(k_start, ALIGN_DOWN(k_end, SZ_8M), block);
+ k_cur = ALIGN_DOWN(k_end, SZ_8M);
+ if (k_cur == k_end)
+ goto finish;
+ } else {
+ k_cur = k_start;
+ }
+
+ ret = kasan_init_shadow_page_tables(k_start, k_end);
+ if (ret)
+ return ret;
+
+ for (; k_cur < k_end; k_cur += PAGE_SIZE) {
+ pmd_t *pmd = pmd_off_k(k_cur);
+ void *va = block + k_cur - k_start;
+ pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL);
+
+ if (k_cur < ALIGN_DOWN(k_end, SZ_512K))
+ pte = pte_mkhuge(pte);
+
+ __set_pte_at(&init_mm, k_cur, pte_offset_kernel(pmd, k_cur), pte, 0);
+ }
+finish:
+ flush_tlb_kernel_range(k_start, k_end);
+ return 0;
+}
diff --git a/arch/powerpc/mm/kasan/Makefile b/arch/powerpc/mm/kasan/Makefile
new file mode 100644
index 000000000000..f9522fd70b2f
--- /dev/null
+++ b/arch/powerpc/mm/kasan/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+
+KASAN_SANITIZE := n
+KCOV_INSTRUMENT := n
+
+obj-$(CONFIG_PPC32) += init_32.o
+obj-$(CONFIG_PPC_8xx) += 8xx.o
+obj-$(CONFIG_PPC_BOOK3S_32) += book3s_32.o
+obj-$(CONFIG_PPC_BOOK3S_64) += init_book3s_64.o
+obj-$(CONFIG_PPC_BOOK3E_64) += init_book3e_64.o
diff --git a/arch/powerpc/mm/kasan/book3s_32.c b/arch/powerpc/mm/kasan/book3s_32.c
new file mode 100644
index 000000000000..450a67ef0bbe
--- /dev/null
+++ b/arch/powerpc/mm/kasan/book3s_32.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define DISABLE_BRANCH_PROFILING
+
+#include <linux/kasan.h>
+#include <linux/memblock.h>
+#include <mm/mmu_decl.h>
+
+int __init kasan_init_region(void *start, size_t size)
+{
+ unsigned long k_start = (unsigned long)kasan_mem_to_shadow(start);
+ unsigned long k_end = (unsigned long)kasan_mem_to_shadow(start + size);
+ unsigned long k_nobat = k_start;
+ unsigned long k_cur;
+ phys_addr_t phys;
+ int ret;
+
+ while (k_nobat < k_end) {
+ unsigned int k_size = bat_block_size(k_nobat, k_end);
+ int idx = find_free_bat();
+
+ if (idx == -1)
+ break;
+ if (k_size < SZ_128K)
+ break;
+ phys = memblock_phys_alloc_range(k_size, k_size, 0,
+ MEMBLOCK_ALLOC_ANYWHERE);
+ if (!phys)
+ break;
+
+ setbat(idx, k_nobat, phys, k_size, PAGE_KERNEL);
+ k_nobat += k_size;
+ }
+ if (k_nobat != k_start)
+ update_bats();
+
+ if (k_nobat < k_end) {
+ phys = memblock_phys_alloc_range(k_end - k_nobat, PAGE_SIZE, 0,
+ MEMBLOCK_ALLOC_ANYWHERE);
+ if (!phys)
+ return -ENOMEM;
+ }
+
+ ret = kasan_init_shadow_page_tables(k_start, k_end);
+ if (ret)
+ return ret;
+
+ kasan_update_early_region(k_start, k_nobat, __pte(0));
+
+ for (k_cur = k_nobat; k_cur < k_end; k_cur += PAGE_SIZE) {
+ pmd_t *pmd = pmd_off_k(k_cur);
+ pte_t pte = pfn_pte(PHYS_PFN(phys + k_cur - k_nobat), PAGE_KERNEL);
+
+ __set_pte_at(&init_mm, k_cur, pte_offset_kernel(pmd, k_cur), pte, 0);
+ }
+ flush_tlb_kernel_range(k_start, k_end);
+ memset(kasan_mem_to_shadow(start), 0, k_end - k_start);
+
+ return 0;
+}
diff --git a/arch/powerpc/mm/kasan/init_32.c b/arch/powerpc/mm/kasan/init_32.c
new file mode 100644
index 000000000000..1d083597464f
--- /dev/null
+++ b/arch/powerpc/mm/kasan/init_32.c
@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define DISABLE_BRANCH_PROFILING
+
+#include <linux/kasan.h>
+#include <linux/printk.h>
+#include <linux/memblock.h>
+#include <linux/sched/task.h>
+#include <asm/pgalloc.h>
+#include <asm/text-patching.h>
+#include <mm/mmu_decl.h>
+
+static pgprot_t __init kasan_prot_ro(void)
+{
+ if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE))
+ return PAGE_READONLY;
+
+ return PAGE_KERNEL_RO;
+}
+
+static void __init kasan_populate_pte(pte_t *ptep, pgprot_t prot)
+{
+ unsigned long va = (unsigned long)kasan_early_shadow_page;
+ phys_addr_t pa = __pa(kasan_early_shadow_page);
+ int i;
+
+ for (i = 0; i < PTRS_PER_PTE; i++, ptep++)
+ __set_pte_at(&init_mm, va, ptep, pfn_pte(PHYS_PFN(pa), prot), 1);
+}
+
+int __init kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_end)
+{
+ pmd_t *pmd;
+ unsigned long k_cur, k_next;
+
+ pmd = pmd_off_k(k_start);
+
+ for (k_cur = k_start; k_cur != k_end; k_cur = k_next, pmd++) {
+ pte_t *new;
+
+ k_next = pgd_addr_end(k_cur, k_end);
+ if ((void *)pmd_page_vaddr(*pmd) != kasan_early_shadow_pte)
+ continue;
+
+ new = memblock_alloc(PTE_FRAG_SIZE, PTE_FRAG_SIZE);
+
+ if (!new)
+ return -ENOMEM;
+ kasan_populate_pte(new, PAGE_KERNEL);
+ pmd_populate_kernel(&init_mm, pmd, new);
+ }
+ return 0;
+}
+
+int __init __weak kasan_init_region(void *start, size_t size)
+{
+ unsigned long k_start = (unsigned long)kasan_mem_to_shadow(start);
+ unsigned long k_end = (unsigned long)kasan_mem_to_shadow(start + size);
+ unsigned long k_cur;
+ int ret;
+ void *block;
+
+ ret = kasan_init_shadow_page_tables(k_start, k_end);
+ if (ret)
+ return ret;
+
+ k_start = k_start & PAGE_MASK;
+ block = memblock_alloc(k_end - k_start, PAGE_SIZE);
+ if (!block)
+ return -ENOMEM;
+
+ for (k_cur = k_start & PAGE_MASK; k_cur < k_end; k_cur += PAGE_SIZE) {
+ pmd_t *pmd = pmd_off_k(k_cur);
+ void *va = block + k_cur - k_start;
+ pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL);
+
+ __set_pte_at(&init_mm, k_cur, pte_offset_kernel(pmd, k_cur), pte, 0);
+ }
+ flush_tlb_kernel_range(k_start, k_end);
+ return 0;
+}
+
+void __init
+kasan_update_early_region(unsigned long k_start, unsigned long k_end, pte_t pte)
+{
+ unsigned long k_cur;
+
+ for (k_cur = k_start; k_cur != k_end; k_cur += PAGE_SIZE) {
+ pmd_t *pmd = pmd_off_k(k_cur);
+ pte_t *ptep = pte_offset_kernel(pmd, k_cur);
+
+ if (pte_page(*ptep) != virt_to_page(lm_alias(kasan_early_shadow_page)))
+ continue;
+
+ __set_pte_at(&init_mm, k_cur, ptep, pte, 0);
+ }
+
+ flush_tlb_kernel_range(k_start, k_end);
+}
+
+static void __init kasan_remap_early_shadow_ro(void)
+{
+ pgprot_t prot = kasan_prot_ro();
+ phys_addr_t pa = __pa(kasan_early_shadow_page);
+
+ kasan_populate_pte(kasan_early_shadow_pte, prot);
+
+ kasan_update_early_region(KASAN_SHADOW_START, KASAN_SHADOW_END,
+ pfn_pte(PHYS_PFN(pa), prot));
+}
+
+static void __init kasan_unmap_early_shadow_vmalloc(void)
+{
+ unsigned long k_start = (unsigned long)kasan_mem_to_shadow((void *)VMALLOC_START);
+ unsigned long k_end = (unsigned long)kasan_mem_to_shadow((void *)VMALLOC_END);
+
+ kasan_update_early_region(k_start, k_end, __pte(0));
+
+#ifdef MODULES_VADDR
+ k_start = (unsigned long)kasan_mem_to_shadow((void *)MODULES_VADDR);
+ k_end = (unsigned long)kasan_mem_to_shadow((void *)MODULES_END);
+ kasan_update_early_region(k_start, k_end, __pte(0));
+#endif
+}
+
+void __init kasan_mmu_init(void)
+{
+ int ret;
+
+ if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE)) {
+ ret = kasan_init_shadow_page_tables(KASAN_SHADOW_START, KASAN_SHADOW_END);
+
+ if (ret)
+ panic("kasan: kasan_init_shadow_page_tables() failed");
+ }
+}
+
+void __init kasan_init(void)
+{
+ phys_addr_t base, end;
+ u64 i;
+ int ret;
+
+ for_each_mem_range(i, &base, &end) {
+ phys_addr_t top = min(end, total_lowmem);
+
+ if (base >= top)
+ continue;
+
+ ret = kasan_init_region(__va(base), top - base);
+ if (ret)
+ panic("kasan: kasan_init_region() failed");
+ }
+
+ if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
+ ret = kasan_init_shadow_page_tables(KASAN_SHADOW_START, KASAN_SHADOW_END);
+
+ if (ret)
+ panic("kasan: kasan_init_shadow_page_tables() failed");
+ }
+
+ kasan_remap_early_shadow_ro();
+
+ clear_page(kasan_early_shadow_page);
+
+ /* At this point kasan is fully initialized. Enable error messages */
+ init_task.kasan_depth = 0;
+ kasan_init_generic();
+}
+
+void __init kasan_late_init(void)
+{
+ if (IS_ENABLED(CONFIG_KASAN_VMALLOC))
+ kasan_unmap_early_shadow_vmalloc();
+}
+
+void __init kasan_early_init(void)
+{
+ unsigned long addr = KASAN_SHADOW_START;
+ unsigned long end = KASAN_SHADOW_END;
+ unsigned long next;
+ pmd_t *pmd = pmd_off_k(addr);
+
+ BUILD_BUG_ON(KASAN_SHADOW_START & ~PGDIR_MASK);
+
+ kasan_populate_pte(kasan_early_shadow_pte, PAGE_KERNEL);
+
+ do {
+ next = pgd_addr_end(addr, end);
+ pmd_populate_kernel(&init_mm, pmd, kasan_early_shadow_pte);
+ } while (pmd++, addr = next, addr != end);
+}
diff --git a/arch/powerpc/mm/kasan/init_book3e_64.c b/arch/powerpc/mm/kasan/init_book3e_64.c
new file mode 100644
index 000000000000..0d3a73d6d4b0
--- /dev/null
+++ b/arch/powerpc/mm/kasan/init_book3e_64.c
@@ -0,0 +1,133 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KASAN for 64-bit Book3e powerpc
+ *
+ * Copyright 2022, Christophe Leroy, CS GROUP France
+ */
+
+#define DISABLE_BRANCH_PROFILING
+
+#include <linux/kasan.h>
+#include <linux/printk.h>
+#include <linux/memblock.h>
+#include <linux/set_memory.h>
+
+#include <asm/pgalloc.h>
+
+static inline bool kasan_pud_table(p4d_t p4d)
+{
+ return p4d_page(p4d) == virt_to_page(lm_alias(kasan_early_shadow_pud));
+}
+
+static inline bool kasan_pmd_table(pud_t pud)
+{
+ return pud_page(pud) == virt_to_page(lm_alias(kasan_early_shadow_pmd));
+}
+
+static inline bool kasan_pte_table(pmd_t pmd)
+{
+ return pmd_page(pmd) == virt_to_page(lm_alias(kasan_early_shadow_pte));
+}
+
+static int __init kasan_map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot)
+{
+ pgd_t *pgdp;
+ p4d_t *p4dp;
+ pud_t *pudp;
+ pmd_t *pmdp;
+ pte_t *ptep;
+
+ pgdp = pgd_offset_k(ea);
+ p4dp = p4d_offset(pgdp, ea);
+ if (kasan_pud_table(*p4dp)) {
+ pudp = memblock_alloc_or_panic(PUD_TABLE_SIZE, PUD_TABLE_SIZE);
+ memcpy(pudp, kasan_early_shadow_pud, PUD_TABLE_SIZE);
+ p4d_populate(&init_mm, p4dp, pudp);
+ }
+ pudp = pud_offset(p4dp, ea);
+ if (kasan_pmd_table(*pudp)) {
+ pmdp = memblock_alloc_or_panic(PMD_TABLE_SIZE, PMD_TABLE_SIZE);
+ memcpy(pmdp, kasan_early_shadow_pmd, PMD_TABLE_SIZE);
+ pud_populate(&init_mm, pudp, pmdp);
+ }
+ pmdp = pmd_offset(pudp, ea);
+ if (kasan_pte_table(*pmdp)) {
+ ptep = memblock_alloc_or_panic(PTE_TABLE_SIZE, PTE_TABLE_SIZE);
+ memcpy(ptep, kasan_early_shadow_pte, PTE_TABLE_SIZE);
+ pmd_populate_kernel(&init_mm, pmdp, ptep);
+ }
+ ptep = pte_offset_kernel(pmdp, ea);
+
+ __set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, prot), 0);
+
+ return 0;
+}
+
+static void __init kasan_init_phys_region(void *start, void *end)
+{
+ unsigned long k_start, k_end, k_cur;
+ void *va;
+
+ if (start >= end)
+ return;
+
+ k_start = ALIGN_DOWN((unsigned long)kasan_mem_to_shadow(start), PAGE_SIZE);
+ k_end = ALIGN((unsigned long)kasan_mem_to_shadow(end), PAGE_SIZE);
+
+ va = memblock_alloc_or_panic(k_end - k_start, PAGE_SIZE);
+ for (k_cur = k_start; k_cur < k_end; k_cur += PAGE_SIZE, va += PAGE_SIZE)
+ kasan_map_kernel_page(k_cur, __pa(va), PAGE_KERNEL);
+}
+
+void __init kasan_early_init(void)
+{
+ int i;
+ unsigned long addr;
+ pgd_t *pgd = pgd_offset_k(KASAN_SHADOW_START);
+ pte_t zero_pte = pfn_pte(virt_to_pfn(kasan_early_shadow_page), PAGE_KERNEL);
+
+ BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE));
+ BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE));
+
+ for (i = 0; i < PTRS_PER_PTE; i++)
+ __set_pte_at(&init_mm, (unsigned long)kasan_early_shadow_page,
+ &kasan_early_shadow_pte[i], zero_pte, 0);
+
+ for (i = 0; i < PTRS_PER_PMD; i++)
+ pmd_populate_kernel(&init_mm, &kasan_early_shadow_pmd[i],
+ kasan_early_shadow_pte);
+
+ for (i = 0; i < PTRS_PER_PUD; i++)
+ pud_populate(&init_mm, &kasan_early_shadow_pud[i],
+ kasan_early_shadow_pmd);
+
+ for (addr = KASAN_SHADOW_START; addr != KASAN_SHADOW_END; addr += PGDIR_SIZE)
+ p4d_populate(&init_mm, p4d_offset(pgd++, addr), kasan_early_shadow_pud);
+}
+
+void __init kasan_init(void)
+{
+ phys_addr_t start, end;
+ u64 i;
+ pte_t zero_pte = pfn_pte(virt_to_pfn(kasan_early_shadow_page), PAGE_KERNEL_RO);
+
+ for_each_mem_range(i, &start, &end)
+ kasan_init_phys_region(phys_to_virt(start), phys_to_virt(end));
+
+ if (IS_ENABLED(CONFIG_KASAN_VMALLOC))
+ kasan_remove_zero_shadow((void *)VMALLOC_START, VMALLOC_SIZE);
+
+ for (i = 0; i < PTRS_PER_PTE; i++)
+ __set_pte_at(&init_mm, (unsigned long)kasan_early_shadow_page,
+ &kasan_early_shadow_pte[i], zero_pte, 0);
+
+ flush_tlb_kernel_range(KASAN_SHADOW_START, KASAN_SHADOW_END);
+
+ memset(kasan_early_shadow_page, 0, PAGE_SIZE);
+
+ /* Enable error messages */
+ init_task.kasan_depth = 0;
+ kasan_init_generic();
+}
+
+void __init kasan_late_init(void) { }
diff --git a/arch/powerpc/mm/kasan/init_book3s_64.c b/arch/powerpc/mm/kasan/init_book3s_64.c
new file mode 100644
index 000000000000..dcafa641804c
--- /dev/null
+++ b/arch/powerpc/mm/kasan/init_book3s_64.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KASAN for 64-bit Book3S powerpc
+ *
+ * Copyright 2019-2022, Daniel Axtens, IBM Corporation.
+ */
+
+/*
+ * ppc64 turns on virtual memory late in boot, after calling into generic code
+ * like the device-tree parser, so it uses this in conjunction with a hook in
+ * outline mode to avoid invalid access early in boot.
+ */
+
+#define DISABLE_BRANCH_PROFILING
+
+#include <linux/kasan.h>
+#include <linux/printk.h>
+#include <linux/sched/task.h>
+#include <linux/memblock.h>
+#include <asm/pgalloc.h>
+
+static void __init kasan_init_phys_region(void *start, void *end)
+{
+ unsigned long k_start, k_end, k_cur;
+ void *va;
+
+ if (start >= end)
+ return;
+
+ k_start = ALIGN_DOWN((unsigned long)kasan_mem_to_shadow(start), PAGE_SIZE);
+ k_end = ALIGN((unsigned long)kasan_mem_to_shadow(end), PAGE_SIZE);
+
+ va = memblock_alloc_or_panic(k_end - k_start, PAGE_SIZE);
+ for (k_cur = k_start; k_cur < k_end; k_cur += PAGE_SIZE, va += PAGE_SIZE)
+ map_kernel_page(k_cur, __pa(va), PAGE_KERNEL);
+}
+
+void __init kasan_init(void)
+{
+ /*
+ * We want to do the following things:
+ * 1) Map real memory into the shadow for all physical memblocks
+ * This takes us from c000... to c008...
+ * 2) Leave a hole over the shadow of vmalloc space. KASAN_VMALLOC
+ * will manage this for us.
+ * This takes us from c008... to c00a...
+ * 3) Map the 'early shadow'/zero page over iomap and vmemmap space.
+ * This takes us up to where we start at c00e...
+ */
+
+ void *k_start = kasan_mem_to_shadow((void *)RADIX_VMALLOC_END);
+ void *k_end = kasan_mem_to_shadow((void *)RADIX_VMEMMAP_END);
+ phys_addr_t start, end;
+ u64 i;
+ pte_t zero_pte = pfn_pte(virt_to_pfn(kasan_early_shadow_page), PAGE_KERNEL);
+
+ if (!early_radix_enabled()) {
+ pr_warn("KASAN not enabled as it requires radix!");
+ return;
+ }
+
+ for_each_mem_range(i, &start, &end)
+ kasan_init_phys_region(phys_to_virt(start), phys_to_virt(end));
+
+ for (i = 0; i < PTRS_PER_PTE; i++)
+ __set_pte_at(&init_mm, (unsigned long)kasan_early_shadow_page,
+ &kasan_early_shadow_pte[i], zero_pte, 0);
+
+ for (i = 0; i < PTRS_PER_PMD; i++)
+ pmd_populate_kernel(&init_mm, &kasan_early_shadow_pmd[i],
+ kasan_early_shadow_pte);
+
+ for (i = 0; i < PTRS_PER_PUD; i++)
+ pud_populate(&init_mm, &kasan_early_shadow_pud[i],
+ kasan_early_shadow_pmd);
+
+ /* map the early shadow over the iomap and vmemmap space */
+ kasan_populate_early_shadow(k_start, k_end);
+
+ /* mark early shadow region as RO and wipe it */
+ zero_pte = pfn_pte(virt_to_pfn(kasan_early_shadow_page), PAGE_KERNEL_RO);
+ for (i = 0; i < PTRS_PER_PTE; i++)
+ __set_pte_at(&init_mm, (unsigned long)kasan_early_shadow_page,
+ &kasan_early_shadow_pte[i], zero_pte, 0);
+
+ /*
+ * clear_page relies on some cache info that hasn't been set up yet.
+ * It ends up looping ~forever and blows up other data.
+ * Use memset instead.
+ */
+ memset(kasan_early_shadow_page, 0, PAGE_SIZE);
+
+ /* Enable error messages */
+ init_task.kasan_depth = 0;
+ kasan_init_generic();
+}
+
+void __init kasan_early_init(void) { }
+
+void __init kasan_late_init(void) { }
diff --git a/arch/powerpc/mm/maccess.c b/arch/powerpc/mm/maccess.c
new file mode 100644
index 000000000000..ea821d0ffe16
--- /dev/null
+++ b/arch/powerpc/mm/maccess.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/uaccess.h>
+#include <linux/kernel.h>
+
+#include <asm/disassemble.h>
+#include <asm/inst.h>
+#include <asm/ppc-opcode.h>
+
+bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size)
+{
+ return is_kernel_addr((unsigned long)unsafe_src);
+}
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index e0f7a189c48e..3ddbfdbfa941 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* PowerPC version
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
@@ -9,257 +10,218 @@
*
* Derived from "arch/i386/mm/init.c"
* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
-#include <linux/export.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/gfp.h>
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/stddef.h>
-#include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/highmem.h>
-#include <linux/initrd.h>
-#include <linux/pagemap.h>
#include <linux/suspend.h>
-#include <linux/memblock.h>
-#include <linux/hugetlb.h>
-#include <linux/slab.h>
-
-#include <asm/pgalloc.h>
-#include <asm/prom.h>
-#include <asm/io.h>
-#include <asm/mmu_context.h>
-#include <asm/pgtable.h>
-#include <asm/mmu.h>
-#include <asm/smp.h>
-#include <asm/machdep.h>
-#include <asm/btext.h>
-#include <asm/tlb.h>
-#include <asm/sections.h>
-#include <asm/sparsemem.h>
-#include <asm/vdso.h>
-#include <asm/fixmap.h>
+#include <linux/dma-direct.h>
+#include <linux/execmem.h>
+#include <linux/vmalloc.h>
+
#include <asm/swiotlb.h>
+#include <asm/machdep.h>
#include <asm/rtas.h>
+#include <asm/kasan.h>
+#include <asm/svm.h>
+#include <asm/mmzone.h>
+#include <asm/ftrace.h>
+#include <asm/text-patching.h>
+#include <asm/setup.h>
+#include <asm/fixmap.h>
-#include "mmu_decl.h"
-
-#ifndef CPU_FTR_COHERENT_ICACHE
-#define CPU_FTR_COHERENT_ICACHE 0 /* XXX for now */
-#define CPU_FTR_NOEXECUTE 0
-#endif
-
-int init_bootmem_done;
-int mem_init_done;
-unsigned long long memory_limit;
-
-#ifdef CONFIG_HIGHMEM
-pte_t *kmap_pte;
-EXPORT_SYMBOL(kmap_pte);
-pgprot_t kmap_prot;
-EXPORT_SYMBOL(kmap_prot);
-
-static inline pte_t *virt_to_kpte(unsigned long vaddr)
-{
- return pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr),
- vaddr), vaddr), vaddr);
-}
-#endif
+#include <mm/mmu_decl.h>
-int page_is_ram(unsigned long pfn)
-{
-#ifndef CONFIG_PPC64 /* XXX for now */
- return pfn < max_pfn;
-#else
- unsigned long paddr = (pfn << PAGE_SHIFT);
- struct memblock_region *reg;
+unsigned long long memory_limit __initdata;
- for_each_memblock(memory, reg)
- if (paddr >= reg->base && paddr < (reg->base + reg->size))
- return 1;
- return 0;
-#endif
-}
+unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
+EXPORT_SYMBOL(empty_zero_page);
-pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
- unsigned long size, pgprot_t vma_prot)
+pgprot_t __phys_mem_access_prot(unsigned long pfn, unsigned long size,
+ pgprot_t vma_prot)
{
if (ppc_md.phys_mem_access_prot)
- return ppc_md.phys_mem_access_prot(file, pfn, size, vma_prot);
+ return ppc_md.phys_mem_access_prot(pfn, size, vma_prot);
if (!page_is_ram(pfn))
vma_prot = pgprot_noncached(vma_prot);
return vma_prot;
}
-EXPORT_SYMBOL(phys_mem_access_prot);
+EXPORT_SYMBOL(__phys_mem_access_prot);
#ifdef CONFIG_MEMORY_HOTPLUG
+static DEFINE_MUTEX(linear_mapping_mutex);
#ifdef CONFIG_NUMA
int memory_add_physaddr_to_nid(u64 start)
{
return hot_add_scn_to_nid(start);
}
+EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
#endif
-int arch_add_memory(int nid, u64 start, u64 size)
+int __weak create_section_mapping(unsigned long start, unsigned long end,
+ int nid, pgprot_t prot)
{
- struct pglist_data *pgdata;
- struct zone *zone;
- unsigned long start_pfn = start >> PAGE_SHIFT;
- unsigned long nr_pages = size >> PAGE_SHIFT;
-
- pgdata = NODE_DATA(nid);
+ return -ENODEV;
+}
- start = (unsigned long)__va(start);
- if (create_section_mapping(start, start + size))
- return -EINVAL;
+int __weak remove_section_mapping(unsigned long start, unsigned long end)
+{
+ return -ENODEV;
+}
- /* this should work for most non-highmem platforms */
- zone = pgdata->node_zones +
- zone_for_memory(nid, start, size, 0);
+int __ref arch_create_linear_mapping(int nid, u64 start, u64 size,
+ struct mhp_params *params)
+{
+ int rc;
- return __add_pages(nid, zone, start_pfn, nr_pages);
+ start = (unsigned long)__va(start);
+ mutex_lock(&linear_mapping_mutex);
+ rc = create_section_mapping(start, start + size, nid,
+ params->pgprot);
+ mutex_unlock(&linear_mapping_mutex);
+ if (rc) {
+ pr_warn("Unable to create linear mapping for 0x%llx..0x%llx: %d\n",
+ start, start + size, rc);
+ return -EFAULT;
+ }
+ return 0;
}
-#ifdef CONFIG_MEMORY_HOTREMOVE
-int arch_remove_memory(u64 start, u64 size)
+void __ref arch_remove_linear_mapping(u64 start, u64 size)
{
- unsigned long start_pfn = start >> PAGE_SHIFT;
- unsigned long nr_pages = size >> PAGE_SHIFT;
- struct zone *zone;
int ret;
- zone = page_zone(pfn_to_page(start_pfn));
- ret = __remove_pages(zone, start_pfn, nr_pages);
- if (!ret && (ppc_md.remove_memory))
- ret = ppc_md.remove_memory(start, size);
+ /* Remove htab bolted mappings for this section of memory */
+ start = (unsigned long)__va(start);
- return ret;
+ mutex_lock(&linear_mapping_mutex);
+ ret = remove_section_mapping(start, start + size);
+ mutex_unlock(&linear_mapping_mutex);
+ if (ret)
+ pr_warn("Unable to remove linear mapping for 0x%llx..0x%llx: %d\n",
+ start, start + size, ret);
+
+ /* Ensure all vmalloc mappings are flushed in case they also
+ * hit that section of memory
+ */
+ vm_unmap_aliases();
}
-#endif
-#endif /* CONFIG_MEMORY_HOTPLUG */
/*
- * walk_memory_resource() needs to make sure there is no holes in a given
- * memory range. PPC64 does not maintain the memory layout in /proc/iomem.
- * Instead it maintains it in memblock.memory structures. Walk through the
- * memory regions, find holes and callback for contiguous regions.
+ * After memory hotplug the variables max_pfn, max_low_pfn and high_memory need
+ * updating.
*/
-int
-walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
- void *arg, int (*func)(unsigned long, unsigned long, void *))
+static void update_end_of_memory_vars(u64 start, u64 size)
{
- struct memblock_region *reg;
- unsigned long end_pfn = start_pfn + nr_pages;
- unsigned long tstart, tend;
- int ret = -1;
-
- for_each_memblock(memory, reg) {
- tstart = max(start_pfn, memblock_region_memory_base_pfn(reg));
- tend = min(end_pfn, memblock_region_memory_end_pfn(reg));
- if (tstart >= tend)
- continue;
- ret = (*func)(tstart, tend - tstart, arg);
- if (ret)
- break;
+ unsigned long end_pfn = PFN_UP(start + size);
+
+ if (end_pfn > max_pfn) {
+ max_pfn = end_pfn;
+ max_low_pfn = end_pfn;
+ high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
}
+}
+
+int __ref add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
+ struct mhp_params *params)
+{
+ int ret;
+
+ ret = __add_pages(nid, start_pfn, nr_pages, params);
+ if (ret)
+ return ret;
+
+ /* update max_pfn, max_low_pfn and high_memory */
+ update_end_of_memory_vars(start_pfn << PAGE_SHIFT,
+ nr_pages << PAGE_SHIFT);
+
return ret;
}
-EXPORT_SYMBOL_GPL(walk_system_ram_range);
-/*
- * Initialize the bootmem system and give it all the memory we
- * have available. If we are using highmem, we only put the
- * lowmem into the bootmem system.
- */
-#ifndef CONFIG_NEED_MULTIPLE_NODES
-void __init do_init_bootmem(void)
+int __ref arch_add_memory(int nid, u64 start, u64 size,
+ struct mhp_params *params)
+{
+ unsigned long start_pfn = start >> PAGE_SHIFT;
+ unsigned long nr_pages = size >> PAGE_SHIFT;
+ int rc;
+
+ rc = arch_create_linear_mapping(nid, start, size, params);
+ if (rc)
+ return rc;
+ rc = add_pages(nid, start_pfn, nr_pages, params);
+ if (rc)
+ arch_remove_linear_mapping(start, size);
+ return rc;
+}
+
+void __ref arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
{
- unsigned long start, bootmap_pages;
- unsigned long total_pages;
- struct memblock_region *reg;
- int boot_mapsize;
+ unsigned long start_pfn = start >> PAGE_SHIFT;
+ unsigned long nr_pages = size >> PAGE_SHIFT;
+
+ __remove_pages(start_pfn, nr_pages, altmap);
+ arch_remove_linear_mapping(start, size);
+}
+#endif
+#ifndef CONFIG_NUMA
+void __init mem_topology_setup(void)
+{
max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
- total_pages = (memblock_end_of_DRAM() - memstart_addr) >> PAGE_SHIFT;
+ min_low_pfn = MEMORY_START >> PAGE_SHIFT;
#ifdef CONFIG_HIGHMEM
- total_pages = total_lowmem >> PAGE_SHIFT;
max_low_pfn = lowmem_end_addr >> PAGE_SHIFT;
#endif
- /*
- * Find an area to use for the bootmem bitmap. Calculate the size of
- * bitmap required as (Total Memory) / PAGE_SIZE / BITS_PER_BYTE.
- * Add 1 additional page in case the address isn't page-aligned.
- */
- bootmap_pages = bootmem_bootmap_pages(total_pages);
-
- start = memblock_alloc(bootmap_pages << PAGE_SHIFT, PAGE_SIZE);
-
- min_low_pfn = MEMORY_START >> PAGE_SHIFT;
- boot_mapsize = init_bootmem_node(NODE_DATA(0), start >> PAGE_SHIFT, min_low_pfn, max_low_pfn);
-
/* Place all memblock_regions in the same node and merge contiguous
* memblock_regions
*/
- memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0);
-
- /* Add all physical memory to the bootmem map, mark each area
- * present.
- */
-#ifdef CONFIG_HIGHMEM
- free_bootmem_with_active_regions(0, lowmem_end_addr >> PAGE_SHIFT);
-
- /* reserve the sections we're already using */
- for_each_memblock(reserved, reg) {
- unsigned long top = reg->base + reg->size - 1;
- if (top < lowmem_end_addr)
- reserve_bootmem(reg->base, reg->size, BOOTMEM_DEFAULT);
- else if (reg->base < lowmem_end_addr) {
- unsigned long trunc_size = lowmem_end_addr - reg->base;
- reserve_bootmem(reg->base, trunc_size, BOOTMEM_DEFAULT);
- }
- }
-#else
- free_bootmem_with_active_regions(0, max_pfn);
-
- /* reserve the sections we're already using */
- for_each_memblock(reserved, reg)
- reserve_bootmem(reg->base, reg->size, BOOTMEM_DEFAULT);
-#endif
- /* XXX need to clip this if using highmem? */
- sparse_memory_present_with_active_regions(0);
+ memblock_set_node(0, PHYS_ADDR_MAX, &memblock.memory, 0);
+}
- init_bootmem_done = 1;
+void __init initmem_init(void)
+{
+ sparse_init();
}
/* mark pages that don't exist as nosave */
static int __init mark_nonram_nosave(void)
{
- struct memblock_region *reg, *prev = NULL;
-
- for_each_memblock(memory, reg) {
- if (prev &&
- memblock_region_memory_end_pfn(prev) < memblock_region_memory_base_pfn(reg))
- register_nosave_region(memblock_region_memory_end_pfn(prev),
- memblock_region_memory_base_pfn(reg));
- prev = reg;
+ unsigned long spfn, epfn, prev = 0;
+ int i;
+
+ for_each_mem_pfn_range(i, MAX_NUMNODES, &spfn, &epfn, NULL) {
+ if (prev && prev < spfn)
+ register_nosave_region(prev, spfn);
+
+ prev = epfn;
}
+
return 0;
}
+#else /* CONFIG_NUMA */
+static int __init mark_nonram_nosave(void)
+{
+ return 0;
+}
+#endif
+
+/*
+ * Zones usage:
+ *
+ * We setup ZONE_DMA to be 31-bits on all platforms and ZONE_NORMAL to be
+ * everything else. GFP_DMA32 page allocations automatically fall back to
+ * ZONE_DMA.
+ *
+ * By using 31-bit unconditionally, we can exploit zone_dma_limit to inform the
+ * generic DMA mapping code. 32-bit only devices (if not handled by an IOMMU
+ * anyway) will take a first dip into ZONE_NORMAL and get otherwise served by
+ * ZONE_DMA.
+ */
+static unsigned long max_zone_pfns[MAX_NR_ZONES];
/*
* paging_init() sets up the page tables - in fact we've already done this.
@@ -268,50 +230,50 @@ void __init paging_init(void)
{
unsigned long long total_ram = memblock_phys_mem_size();
phys_addr_t top_of_ram = memblock_end_of_DRAM();
- unsigned long max_zone_pfns[MAX_NR_ZONES];
+ int zone_dma_bits;
-#ifdef CONFIG_PPC32
- unsigned long v = __fix_to_virt(__end_of_fixed_addresses - 1);
- unsigned long end = __fix_to_virt(FIX_HOLE);
+#ifdef CONFIG_HIGHMEM
+ unsigned long v = __fix_to_virt(FIX_KMAP_END);
+ unsigned long end = __fix_to_virt(FIX_KMAP_BEGIN);
for (; v < end; v += PAGE_SIZE)
- map_page(v, 0, 0); /* XXX gross */
-#endif
+ map_kernel_page(v, 0, __pgprot(0)); /* XXX gross */
-#ifdef CONFIG_HIGHMEM
- map_page(PKMAP_BASE, 0, 0); /* XXX gross */
+ map_kernel_page(PKMAP_BASE, 0, __pgprot(0)); /* XXX gross */
pkmap_page_table = virt_to_kpte(PKMAP_BASE);
-
- kmap_pte = virt_to_kpte(__fix_to_virt(FIX_KMAP_BEGIN));
- kmap_prot = PAGE_KERNEL;
#endif /* CONFIG_HIGHMEM */
printk(KERN_DEBUG "Top of RAM: 0x%llx, Total RAM: 0x%llx\n",
(unsigned long long)top_of_ram, total_ram);
printk(KERN_DEBUG "Memory hole size: %ldMB\n",
(long int)((top_of_ram - total_ram) >> 20));
- memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
+
+ /*
+ * Allow 30-bit DMA for very limited Broadcom wifi chips on many
+ * powerbooks.
+ */
+ if (IS_ENABLED(CONFIG_PPC32))
+ zone_dma_bits = 30;
+ else
+ zone_dma_bits = 31;
+
+ zone_dma_limit = DMA_BIT_MASK(zone_dma_bits);
+
+#ifdef CONFIG_ZONE_DMA
+ max_zone_pfns[ZONE_DMA] = min(max_low_pfn,
+ 1UL << (zone_dma_bits - PAGE_SHIFT));
+#endif
+ max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
#ifdef CONFIG_HIGHMEM
- max_zone_pfns[ZONE_DMA] = lowmem_end_addr >> PAGE_SHIFT;
- max_zone_pfns[ZONE_HIGHMEM] = top_of_ram >> PAGE_SHIFT;
-#else
- max_zone_pfns[ZONE_DMA] = top_of_ram >> PAGE_SHIFT;
+ max_zone_pfns[ZONE_HIGHMEM] = max_pfn;
#endif
- free_area_init_nodes(max_zone_pfns);
-
- mark_nonram_nosave();
-}
-#endif /* ! CONFIG_NEED_MULTIPLE_NODES */
-static void __init register_page_bootmem_info(void)
-{
- int i;
+ free_area_init(max_zone_pfns);
- for_each_online_node(i)
- register_page_bootmem_info_node(NODE_DATA(i));
+ mark_nonram_nosave();
}
-void __init mem_init(void)
+void __init arch_mm_preinit(void)
{
/*
* book3s is limited to 16 page sizes due to encoding this in
@@ -320,29 +282,20 @@ void __init mem_init(void)
BUILD_BUG_ON(MMU_PAGE_COUNT > 16);
#ifdef CONFIG_SWIOTLB
- swiotlb_init(0);
+ /*
+ * Some platforms (e.g. 85xx) limit DMA-able memory way below
+ * 4G. We force memblock to bottom-up mode to ensure that the
+ * memory allocated in swiotlb_init() is DMA-able.
+ * As it's the last memblock allocation, no need to reset it
+ * back to to-down.
+ */
+ memblock_set_bottom_up(true);
+ swiotlb_init(ppc_swiotlb_enable, ppc_swiotlb_flags);
#endif
- register_page_bootmem_info();
- high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
- set_max_mapnr(max_pfn);
- free_all_bootmem();
-
-#ifdef CONFIG_HIGHMEM
- {
- unsigned long pfn, highmem_mapnr;
-
- highmem_mapnr = lowmem_end_addr >> PAGE_SHIFT;
- for (pfn = highmem_mapnr; pfn < max_mapnr; ++pfn) {
- phys_addr_t paddr = (phys_addr_t)pfn << PAGE_SHIFT;
- struct page *page = pfn_to_page(pfn);
- if (!memblock_is_reserved(paddr))
- free_highmem_page(page);
- }
- }
-#endif /* CONFIG_HIGHMEM */
+ kasan_late_init();
-#if defined(CONFIG_PPC_FSL_BOOK3E) && !defined(CONFIG_SMP)
+#if defined(CONFIG_PPC_E500) && !defined(CONFIG_SMP)
/*
* If smp is enabled, next_tlbcam_idx is initialized in the cpu up
* functions.... do it here for the non-smp case.
@@ -350,169 +303,14 @@ void __init mem_init(void)
per_cpu(next_tlbcam_idx, smp_processor_id()) =
(mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) - 1;
#endif
-
- mem_init_print_info(NULL);
-#ifdef CONFIG_PPC32
- pr_info("Kernel virtual memory layout:\n");
- pr_info(" * 0x%08lx..0x%08lx : fixmap\n", FIXADDR_START, FIXADDR_TOP);
-#ifdef CONFIG_HIGHMEM
- pr_info(" * 0x%08lx..0x%08lx : highmem PTEs\n",
- PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP));
-#endif /* CONFIG_HIGHMEM */
-#ifdef CONFIG_NOT_COHERENT_CACHE
- pr_info(" * 0x%08lx..0x%08lx : consistent mem\n",
- IOREMAP_TOP, IOREMAP_TOP + CONFIG_CONSISTENT_SIZE);
-#endif /* CONFIG_NOT_COHERENT_CACHE */
- pr_info(" * 0x%08lx..0x%08lx : early ioremap\n",
- ioremap_bot, IOREMAP_TOP);
- pr_info(" * 0x%08lx..0x%08lx : vmalloc & ioremap\n",
- VMALLOC_START, VMALLOC_END);
-#endif /* CONFIG_PPC32 */
-
- mem_init_done = 1;
}
void free_initmem(void)
{
ppc_md.progress = ppc_printk_progress;
+ mark_initmem_nx();
free_initmem_default(POISON_FREE_INITMEM);
-}
-
-#ifdef CONFIG_BLK_DEV_INITRD
-void __init free_initrd_mem(unsigned long start, unsigned long end)
-{
- free_reserved_area((void *)start, (void *)end, -1, "initrd");
-}
-#endif
-
-/*
- * This is called when a page has been modified by the kernel.
- * It just marks the page as not i-cache clean. We do the i-cache
- * flush later when the page is given to a user process, if necessary.
- */
-void flush_dcache_page(struct page *page)
-{
- if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
- return;
- /* avoid an atomic op if possible */
- if (test_bit(PG_arch_1, &page->flags))
- clear_bit(PG_arch_1, &page->flags);
-}
-EXPORT_SYMBOL(flush_dcache_page);
-
-void flush_dcache_icache_page(struct page *page)
-{
-#ifdef CONFIG_HUGETLB_PAGE
- if (PageCompound(page)) {
- flush_dcache_icache_hugepage(page);
- return;
- }
-#endif
-#ifdef CONFIG_BOOKE
- {
- void *start = kmap_atomic(page);
- __flush_dcache_icache(start);
- kunmap_atomic(start);
- }
-#elif defined(CONFIG_8xx) || defined(CONFIG_PPC64)
- /* On 8xx there is no need to kmap since highmem is not supported */
- __flush_dcache_icache(page_address(page));
-#else
- __flush_dcache_icache_phys(page_to_pfn(page) << PAGE_SHIFT);
-#endif
-}
-EXPORT_SYMBOL(flush_dcache_icache_page);
-
-void clear_user_page(void *page, unsigned long vaddr, struct page *pg)
-{
- clear_page(page);
-
- /*
- * We shouldn't have to do this, but some versions of glibc
- * require it (ld.so assumes zero filled pages are icache clean)
- * - Anton
- */
- flush_dcache_page(pg);
-}
-EXPORT_SYMBOL(clear_user_page);
-
-void copy_user_page(void *vto, void *vfrom, unsigned long vaddr,
- struct page *pg)
-{
- copy_page(vto, vfrom);
-
- /*
- * We should be able to use the following optimisation, however
- * there are two problems.
- * Firstly a bug in some versions of binutils meant PLT sections
- * were not marked executable.
- * Secondly the first word in the GOT section is blrl, used
- * to establish the GOT address. Until recently the GOT was
- * not marked executable.
- * - Anton
- */
-#if 0
- if (!vma->vm_file && ((vma->vm_flags & VM_EXEC) == 0))
- return;
-#endif
-
- flush_dcache_page(pg);
-}
-
-void flush_icache_user_range(struct vm_area_struct *vma, struct page *page,
- unsigned long addr, int len)
-{
- unsigned long maddr;
-
- maddr = (unsigned long) kmap(page) + (addr & ~PAGE_MASK);
- flush_icache_range(maddr, maddr + len);
- kunmap(page);
-}
-EXPORT_SYMBOL(flush_icache_user_range);
-
-/*
- * This is called at the end of handling a user page fault, when the
- * fault has been handled by updating a PTE in the linux page tables.
- * We use it to preload an HPTE into the hash table corresponding to
- * the updated linux PTE.
- *
- * This must always be called with the pte lock held.
- */
-void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
- pte_t *ptep)
-{
-#ifdef CONFIG_PPC_STD_MMU
- /*
- * We don't need to worry about _PAGE_PRESENT here because we are
- * called with either mm->page_table_lock held or ptl lock held
- */
- unsigned long access = 0, trap;
-
- /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */
- if (!pte_young(*ptep) || address >= TASK_SIZE)
- return;
-
- /* We try to figure out if we are coming from an instruction
- * access fault and pass that down to __hash_page so we avoid
- * double-faulting on execution of fresh text. We have to test
- * for regs NULL since init will get here first thing at boot
- *
- * We also avoid filling the hash if not coming from a fault
- */
- if (current->thread.regs == NULL)
- return;
- trap = TRAP(current->thread.regs);
- if (trap == 0x400)
- access |= _PAGE_EXEC;
- else if (trap != 0x300)
- return;
- hash_preload(vma->vm_mm, address, access, trap);
-#endif /* CONFIG_PPC_STD_MMU */
-#if (defined(CONFIG_PPC_BOOK3E_64) || defined(CONFIG_PPC_FSL_BOOK3E)) \
- && defined(CONFIG_HUGETLB_PAGE)
- if (is_vm_hugetlb_page(vma))
- book3e_hugetlb_preload(vma, address, *ptep);
-#endif
+ ftrace_free_init_tramp();
}
/*
@@ -521,22 +319,26 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
*/
static int __init add_system_ram_resources(void)
{
- struct memblock_region *reg;
+ phys_addr_t start, end;
+ u64 i;
- for_each_memblock(memory, reg) {
+ for_each_mem_range(i, &start, &end) {
struct resource *res;
- unsigned long base = reg->base;
- unsigned long size = reg->size;
res = kzalloc(sizeof(struct resource), GFP_KERNEL);
WARN_ON(!res);
if (res) {
res->name = "System RAM";
- res->start = base;
- res->end = base + size - 1;
- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
- WARN_ON(request_resource(&iomem_resource, res) < 0);
+ res->start = start;
+ /*
+ * In memblock, end points to the first byte after
+ * the range while in resourses, end points to the
+ * last byte in the range.
+ */
+ res->end = end - 1;
+ res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
+ WARN_ON(insert_resource(&iomem_resource, res) < 0);
}
}
@@ -554,12 +356,95 @@ subsys_initcall(add_system_ram_resources);
*/
int devmem_is_allowed(unsigned long pfn)
{
- if (iomem_is_exclusive(pfn << PAGE_SHIFT))
+ if (page_is_rtas_user_buf(pfn))
+ return 1;
+ if (iomem_is_exclusive(PFN_PHYS(pfn)))
return 0;
if (!page_is_ram(pfn))
return 1;
- if (page_is_rtas_user_buf(pfn))
- return 1;
return 0;
}
#endif /* CONFIG_STRICT_DEVMEM */
+
+/*
+ * This is defined in kernel/resource.c but only powerpc needs to export it, for
+ * the EHEA driver. Drop this when drivers/net/ethernet/ibm/ehea is removed.
+ */
+EXPORT_SYMBOL_GPL(walk_system_ram_range);
+
+#ifdef CONFIG_EXECMEM
+static struct execmem_info execmem_info __ro_after_init;
+
+#if defined(CONFIG_PPC_8xx) || defined(CONFIG_PPC_BOOK3S_603)
+static void prealloc_execmem_pgtable(void)
+{
+ unsigned long va;
+
+ for (va = ALIGN_DOWN(MODULES_VADDR, PGDIR_SIZE); va < MODULES_END; va += PGDIR_SIZE)
+ pte_alloc_kernel(pmd_off_k(va), va);
+}
+#else
+static void prealloc_execmem_pgtable(void) { }
+#endif
+
+struct execmem_info __init *execmem_arch_setup(void)
+{
+ pgprot_t kprobes_prot = strict_module_rwx_enabled() ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC;
+ pgprot_t prot = strict_module_rwx_enabled() ? PAGE_KERNEL : PAGE_KERNEL_EXEC;
+ unsigned long fallback_start = 0, fallback_end = 0;
+ unsigned long start, end;
+
+ /*
+ * BOOK3S_32 and 8xx define MODULES_VADDR for text allocations and
+ * allow allocating data in the entire vmalloc space
+ */
+#ifdef MODULES_VADDR
+ unsigned long limit = (unsigned long)_etext - SZ_32M;
+
+ BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR);
+
+ /* First try within 32M limit from _etext to avoid branch trampolines */
+ if (MODULES_VADDR < PAGE_OFFSET && MODULES_END > limit) {
+ start = limit;
+ fallback_start = MODULES_VADDR;
+ fallback_end = MODULES_END;
+ } else {
+ start = MODULES_VADDR;
+ }
+
+ end = MODULES_END;
+#else
+ start = VMALLOC_START;
+ end = VMALLOC_END;
+#endif
+
+ prealloc_execmem_pgtable();
+
+ execmem_info = (struct execmem_info){
+ .ranges = {
+ [EXECMEM_DEFAULT] = {
+ .start = start,
+ .end = end,
+ .pgprot = prot,
+ .alignment = 1,
+ .fallback_start = fallback_start,
+ .fallback_end = fallback_end,
+ },
+ [EXECMEM_KPROBES] = {
+ .start = VMALLOC_START,
+ .end = VMALLOC_END,
+ .pgprot = kprobes_prot,
+ .alignment = 1,
+ },
+ [EXECMEM_MODULE_DATA] = {
+ .start = VMALLOC_START,
+ .end = VMALLOC_END,
+ .pgprot = PAGE_KERNEL,
+ .alignment = 1,
+ },
+ },
+ };
+
+ return &execmem_info;
+}
+#endif /* CONFIG_EXECMEM */
diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
deleted file mode 100644
index cb8bdbe4972f..000000000000
--- a/arch/powerpc/mm/mmap.c
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * flexible mmap layout support
- *
- * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- *
- * Started by Ingo Molnar <mingo@elte.hu>
- */
-
-#include <linux/personality.h>
-#include <linux/mm.h>
-#include <linux/random.h>
-#include <linux/sched.h>
-
-/*
- * Top of mmap area (just below the process stack).
- *
- * Leave at least a ~128 MB hole on 32bit applications.
- *
- * On 64bit applications we randomise the stack by 1GB so we need to
- * space our mmap start address by a further 1GB, otherwise there is a
- * chance the mmap area will end up closer to the stack than our ulimit
- * requires.
- */
-#define MIN_GAP32 (128*1024*1024)
-#define MIN_GAP64 ((128 + 1024)*1024*1024UL)
-#define MIN_GAP ((is_32bit_task()) ? MIN_GAP32 : MIN_GAP64)
-#define MAX_GAP (TASK_SIZE/6*5)
-
-static inline int mmap_is_legacy(void)
-{
- if (current->personality & ADDR_COMPAT_LAYOUT)
- return 1;
-
- if (rlimit(RLIMIT_STACK) == RLIM_INFINITY)
- return 1;
-
- return sysctl_legacy_va_layout;
-}
-
-static unsigned long mmap_rnd(void)
-{
- unsigned long rnd = 0;
-
- if (current->flags & PF_RANDOMIZE) {
- /* 8MB for 32bit, 1GB for 64bit */
- if (is_32bit_task())
- rnd = (long)(get_random_int() % (1<<(23-PAGE_SHIFT)));
- else
- rnd = (long)(get_random_int() % (1<<(30-PAGE_SHIFT)));
- }
- return rnd << PAGE_SHIFT;
-}
-
-static inline unsigned long mmap_base(void)
-{
- unsigned long gap = rlimit(RLIMIT_STACK);
-
- if (gap < MIN_GAP)
- gap = MIN_GAP;
- else if (gap > MAX_GAP)
- gap = MAX_GAP;
-
- return PAGE_ALIGN(TASK_SIZE - gap - mmap_rnd());
-}
-
-/*
- * This function, called very early during the creation of a new
- * process VM image, sets up which VM layout function to use:
- */
-void arch_pick_mmap_layout(struct mm_struct *mm)
-{
- /*
- * Fall back to the standard layout if the personality
- * bit is set, or if the expected stack growth is unlimited:
- */
- if (mmap_is_legacy()) {
- mm->mmap_base = TASK_UNMAPPED_BASE;
- mm->get_unmapped_area = arch_get_unmapped_area;
- } else {
- mm->mmap_base = mmap_base();
- mm->get_unmapped_area = arch_get_unmapped_area_topdown;
- }
-}
diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c
new file mode 100644
index 000000000000..3e3af29b4523
--- /dev/null
+++ b/arch/powerpc/mm/mmu_context.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Common implementation of switch_mm_irqs_off
+ *
+ * Copyright IBM Corp. 2017
+ */
+
+#include <linux/mm.h>
+#include <linux/cpu.h>
+#include <linux/sched/mm.h>
+
+#include <asm/mmu_context.h>
+#include <asm/pgalloc.h>
+
+#if defined(CONFIG_PPC32)
+static inline void switch_mm_pgdir(struct task_struct *tsk,
+ struct mm_struct *mm)
+{
+ /* 32-bit keeps track of the current PGDIR in the thread struct */
+ tsk->thread.pgdir = mm->pgd;
+#ifdef CONFIG_PPC_BOOK3S_32
+ tsk->thread.sr0 = mm->context.sr0;
+#endif
+#if defined(CONFIG_BOOKE) && defined(CONFIG_PPC_KUAP)
+ tsk->thread.pid = mm->context.id;
+#endif
+}
+#elif defined(CONFIG_PPC_BOOK3E_64)
+static inline void switch_mm_pgdir(struct task_struct *tsk,
+ struct mm_struct *mm)
+{
+ /* 64-bit Book3E keeps track of current PGD in the PACA */
+ get_paca()->pgd = mm->pgd;
+#ifdef CONFIG_PPC_KUAP
+ tsk->thread.pid = mm->context.id;
+#endif
+}
+#else
+static inline void switch_mm_pgdir(struct task_struct *tsk,
+ struct mm_struct *mm) { }
+#endif
+
+void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
+ struct task_struct *tsk)
+{
+ int cpu = smp_processor_id();
+ bool new_on_cpu = false;
+
+ /* Mark this context has been used on the new CPU */
+ if (!cpumask_test_cpu(cpu, mm_cpumask(next))) {
+ VM_WARN_ON_ONCE(next == &init_mm);
+ cpumask_set_cpu(cpu, mm_cpumask(next));
+ inc_mm_active_cpus(next);
+
+ /*
+ * This full barrier orders the store to the cpumask above vs
+ * a subsequent load which allows this CPU/MMU to begin loading
+ * translations for 'next' from page table PTEs into the TLB.
+ *
+ * When using the radix MMU, that operation is the load of the
+ * MMU context id, which is then moved to SPRN_PID.
+ *
+ * For the hash MMU it is either the first load from slb_cache
+ * in switch_slb() to preload the SLBs, or the load of
+ * get_user_context which loads the context for the VSID hash
+ * to insert a new SLB, in the SLB fault handler.
+ *
+ * On the other side, the barrier is in mm/tlb-radix.c for
+ * radix which orders earlier stores to clear the PTEs before
+ * the load of mm_cpumask to check which CPU TLBs should be
+ * flushed. For hash, pte_xchg to clear the PTE includes the
+ * barrier.
+ *
+ * This full barrier is also needed by membarrier when
+ * switching between processes after store to rq->curr, before
+ * user-space memory accesses.
+ */
+ smp_mb();
+
+ new_on_cpu = true;
+ }
+
+ /* Some subarchs need to track the PGD elsewhere */
+ switch_mm_pgdir(tsk, next);
+
+ /* Nothing else to do if we aren't actually switching */
+ if (prev == next)
+ return;
+
+ /*
+ * We must stop all altivec streams before changing the HW
+ * context
+ */
+ if (cpu_has_feature(CPU_FTR_ALTIVEC))
+ asm volatile (PPC_DSSALL);
+
+ if (!new_on_cpu)
+ membarrier_arch_switch_mm(prev, next, tsk);
+
+ /*
+ * The actual HW switching method differs between the various
+ * sub architectures. Out of line for now
+ */
+ switch_mmu_context(prev, next, tsk);
+
+ VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(prev)));
+}
+
+#ifndef CONFIG_PPC_BOOK3S_64
+void arch_exit_mmap(struct mm_struct *mm)
+{
+ void *frag = pte_frag_get(&mm->context);
+
+ if (frag)
+ pte_frag_destroy(frag);
+}
+#endif
diff --git a/arch/powerpc/mm/mmu_context_hash64.c b/arch/powerpc/mm/mmu_context_hash64.c
deleted file mode 100644
index 178876aef40f..000000000000
--- a/arch/powerpc/mm/mmu_context_hash64.c
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * MMU context allocation for 64-bit kernels.
- *
- * Copyright (C) 2004 Anton Blanchard, IBM Corp. <anton@samba.org>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- */
-
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/spinlock.h>
-#include <linux/idr.h>
-#include <linux/export.h>
-#include <linux/gfp.h>
-#include <linux/slab.h>
-
-#include <asm/mmu_context.h>
-#include <asm/pgalloc.h>
-
-#include "icswx.h"
-
-static DEFINE_SPINLOCK(mmu_context_lock);
-static DEFINE_IDA(mmu_context_ida);
-
-int __init_new_context(void)
-{
- int index;
- int err;
-
-again:
- if (!ida_pre_get(&mmu_context_ida, GFP_KERNEL))
- return -ENOMEM;
-
- spin_lock(&mmu_context_lock);
- err = ida_get_new_above(&mmu_context_ida, 1, &index);
- spin_unlock(&mmu_context_lock);
-
- if (err == -EAGAIN)
- goto again;
- else if (err)
- return err;
-
- if (index > MAX_USER_CONTEXT) {
- spin_lock(&mmu_context_lock);
- ida_remove(&mmu_context_ida, index);
- spin_unlock(&mmu_context_lock);
- return -ENOMEM;
- }
-
- return index;
-}
-EXPORT_SYMBOL_GPL(__init_new_context);
-
-int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
-{
- int index;
-
- index = __init_new_context();
- if (index < 0)
- return index;
-
- /* The old code would re-promote on fork, we don't do that
- * when using slices as it could cause problem promoting slices
- * that have been forced down to 4K
- */
- if (slice_mm_new_context(mm))
- slice_set_user_psize(mm, mmu_virtual_psize);
- subpage_prot_init_new_context(mm);
- mm->context.id = index;
-#ifdef CONFIG_PPC_ICSWX
- mm->context.cop_lockp = kmalloc(sizeof(spinlock_t), GFP_KERNEL);
- if (!mm->context.cop_lockp) {
- __destroy_context(index);
- subpage_prot_free(mm);
- mm->context.id = MMU_NO_CONTEXT;
- return -ENOMEM;
- }
- spin_lock_init(mm->context.cop_lockp);
-#endif /* CONFIG_PPC_ICSWX */
-
-#ifdef CONFIG_PPC_64K_PAGES
- mm->context.pte_frag = NULL;
-#endif
- return 0;
-}
-
-void __destroy_context(int context_id)
-{
- spin_lock(&mmu_context_lock);
- ida_remove(&mmu_context_ida, context_id);
- spin_unlock(&mmu_context_lock);
-}
-EXPORT_SYMBOL_GPL(__destroy_context);
-
-#ifdef CONFIG_PPC_64K_PAGES
-static void destroy_pagetable_page(struct mm_struct *mm)
-{
- int count;
- void *pte_frag;
- struct page *page;
-
- pte_frag = mm->context.pte_frag;
- if (!pte_frag)
- return;
-
- page = virt_to_page(pte_frag);
- /* drop all the pending references */
- count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT;
- /* We allow PTE_FRAG_NR fragments from a PTE page */
- count = atomic_sub_return(PTE_FRAG_NR - count, &page->_count);
- if (!count) {
- pgtable_page_dtor(page);
- free_hot_cold_page(page, 0);
- }
-}
-
-#else
-static inline void destroy_pagetable_page(struct mm_struct *mm)
-{
- return;
-}
-#endif
-
-
-void destroy_context(struct mm_struct *mm)
-{
-
-#ifdef CONFIG_PPC_ICSWX
- drop_cop(mm->context.acop, mm);
- kfree(mm->context.cop_lockp);
- mm->context.cop_lockp = NULL;
-#endif /* CONFIG_PPC_ICSWX */
-
- destroy_pagetable_page(mm);
- __destroy_context(mm->context.id);
- subpage_prot_free(mm);
- mm->context.id = MMU_NO_CONTEXT;
-}
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 9615d82919b8..b2d1eea09761 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Declarations of procedures and variables shared between files
* in arch/ppc/mm/.
@@ -11,53 +12,50 @@
*
* Derived from "arch/i386/mm/init.c"
* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
#include <linux/mm.h>
-#include <asm/tlbflush.h>
#include <asm/mmu.h>
#ifdef CONFIG_PPC_MMU_NOHASH
+#include <asm/trace.h>
/*
- * On 40x and 8xx, we directly inline tlbia and tlbivax
+ * On 8xx, we directly inline tlbia
*/
-#if defined(CONFIG_40x) || defined(CONFIG_8xx)
+#ifdef CONFIG_PPC_8xx
static inline void _tlbil_all(void)
{
asm volatile ("sync; tlbia; isync" : : : "memory");
+ trace_tlbia(MMU_NO_CONTEXT);
}
static inline void _tlbil_pid(unsigned int pid)
{
asm volatile ("sync; tlbia; isync" : : : "memory");
+ trace_tlbia(pid);
}
#define _tlbil_pid_noind(pid) _tlbil_pid(pid)
-#else /* CONFIG_40x || CONFIG_8xx */
+#else /* CONFIG_PPC_8xx */
extern void _tlbil_all(void);
extern void _tlbil_pid(unsigned int pid);
-#ifdef CONFIG_PPC_BOOK3E
+#ifdef CONFIG_PPC_BOOK3E_64
extern void _tlbil_pid_noind(unsigned int pid);
#else
#define _tlbil_pid_noind(pid) _tlbil_pid(pid)
#endif
-#endif /* !(CONFIG_40x || CONFIG_8xx) */
+#endif /* !CONFIG_PPC_8xx */
/*
* On 8xx, we directly inline tlbie, on others, it's extern
*/
-#ifdef CONFIG_8xx
+#ifdef CONFIG_PPC_8xx
static inline void _tlbil_va(unsigned long address, unsigned int pid,
unsigned int tsize, unsigned int ind)
{
asm volatile ("tlbie %0; sync" : : "r" (address) : "memory");
+ trace_tlbie(0, 0, address, pid, 0, 0, 0);
}
-#elif defined(CONFIG_PPC_BOOK3E)
+#elif defined(CONFIG_PPC_BOOK3E_64)
extern void _tlbil_va(unsigned long address, unsigned int pid,
unsigned int tsize, unsigned int ind);
#else
@@ -67,9 +65,9 @@ static inline void _tlbil_va(unsigned long address, unsigned int pid,
{
__tlbil_va(address, pid);
}
-#endif /* CONIFG_8xx */
+#endif /* CONFIG_PPC_8xx */
-#if defined(CONFIG_PPC_BOOK3E) || defined(CONFIG_PPC_47x)
+#if defined(CONFIG_PPC_BOOK3E_64) || defined(CONFIG_PPC_47x)
extern void _tlbivax_bcast(unsigned long address, unsigned int pid,
unsigned int tsize, unsigned int ind);
#else
@@ -80,78 +78,62 @@ static inline void _tlbivax_bcast(unsigned long address, unsigned int pid,
}
#endif
-#else /* CONFIG_PPC_MMU_NOHASH */
-
-extern void hash_preload(struct mm_struct *mm, unsigned long ea,
- unsigned long access, unsigned long trap);
+static inline void print_system_hash_info(void) {}
+#else /* CONFIG_PPC_MMU_NOHASH */
-extern void _tlbie(unsigned long address);
-extern void _tlbia(void);
+void print_system_hash_info(void);
#endif /* CONFIG_PPC_MMU_NOHASH */
#ifdef CONFIG_PPC32
extern void mapin_ram(void);
-extern int map_page(unsigned long va, phys_addr_t pa, int flags);
extern void setbat(int index, unsigned long virt, phys_addr_t phys,
- unsigned int size, int flags);
+ unsigned int size, pgprot_t prot);
-extern int __map_without_bats;
-extern int __allow_ioremap_reserved;
-extern unsigned long ioremap_base;
-extern unsigned int rtas_data, rtas_size;
-
-struct hash_pte;
-extern struct hash_pte *Hash, *Hash_end;
-extern unsigned long Hash_size, Hash_mask;
+extern u8 early_hash[];
#endif /* CONFIG_PPC32 */
-#ifdef CONFIG_PPC64
-extern int map_kernel_page(unsigned long ea, unsigned long pa, int flags);
-#endif /* CONFIG_PPC64 */
-
-extern unsigned long ioremap_bot;
extern unsigned long __max_low_memory;
-extern phys_addr_t __initial_memory_limit_addr;
extern phys_addr_t total_memory;
extern phys_addr_t total_lowmem;
extern phys_addr_t memstart_addr;
extern phys_addr_t lowmem_end_addr;
-#ifdef CONFIG_WII
-extern unsigned long wii_hole_start;
-extern unsigned long wii_hole_size;
-
-extern unsigned long wii_mmu_mapin_mem2(unsigned long top);
-extern void wii_memory_fixups(void);
-#endif
-
/* ...and now those things that may be slightly different between processor
* architectures. -- Dan
*/
-#if defined(CONFIG_8xx)
-#define MMU_init_hw() do { } while(0)
-#define mmu_mapin_ram(top) (0UL)
-
-#elif defined(CONFIG_4xx)
+#ifdef CONFIG_PPC32
extern void MMU_init_hw(void);
-extern unsigned long mmu_mapin_ram(unsigned long top);
+void MMU_init_hw_patch(void);
+unsigned long mmu_mapin_ram(unsigned long base, unsigned long top);
+#endif
+void mmu_init_secondary(int cpu);
-#elif defined(CONFIG_PPC_FSL_BOOK3E)
-extern unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx);
-extern unsigned long calc_cam_sz(unsigned long ram, unsigned long virt,
- phys_addr_t phys);
+#ifdef CONFIG_PPC_E500
+extern unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx,
+ bool dryrun, bool init);
#ifdef CONFIG_PPC32
-extern void MMU_init_hw(void);
-extern unsigned long mmu_mapin_ram(unsigned long top);
extern void adjust_total_lowmem(void);
extern int switch_to_as1(void);
extern void restore_to_as0(int esel, int offset, void *dt_ptr, int bootcpu);
+void create_kaslr_tlb_entry(int entry, unsigned long virt, phys_addr_t phys);
+void reloc_kernel_entry(void *fdt, int addr);
+void relocate_init(u64 dt_ptr, phys_addr_t start);
+extern int is_second_reloc;
#endif
extern void loadcam_entry(unsigned int index);
+extern void loadcam_multi(int first_idx, int num, int tmp_idx);
+
+#ifdef CONFIG_RANDOMIZE_BASE
+void kaslr_early_init(void *dt_ptr, phys_addr_t size);
+void kaslr_late_init(void);
+#else
+static inline void kaslr_early_init(void *dt_ptr, phys_addr_t size) {}
+static inline void kaslr_late_init(void) {}
+#endif
struct tlbcam {
u32 MAS0;
@@ -160,8 +142,43 @@ struct tlbcam {
u32 MAS3;
u32 MAS7;
};
-#elif defined(CONFIG_PPC32)
-/* anything 32-bit except 4xx or 8xx */
-extern void MMU_init_hw(void);
-extern unsigned long mmu_mapin_ram(unsigned long top);
+
+#define NUM_TLBCAMS 64
+
+extern struct tlbcam TLBCAM[NUM_TLBCAMS];
+#endif
+
+#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_PPC_85xx) || defined(CONFIG_PPC_8xx)
+/* 6xx have BATS */
+/* PPC_85xx have TLBCAM */
+/* 8xx have LTLB */
+phys_addr_t v_block_mapped(unsigned long va);
+unsigned long p_block_mapped(phys_addr_t pa);
+#else
+static inline phys_addr_t v_block_mapped(unsigned long va) { return 0; }
+static inline unsigned long p_block_mapped(phys_addr_t pa) { return 0; }
+#endif
+
+#if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_PPC_8xx) || defined(CONFIG_PPC_E500)
+int mmu_mark_initmem_nx(void);
+int mmu_mark_rodata_ro(void);
+#else
+static inline int mmu_mark_initmem_nx(void) { return 0; }
+static inline int mmu_mark_rodata_ro(void) { return 0; }
#endif
+
+#ifdef CONFIG_PPC_8xx
+void __init mmu_mapin_immr(void);
+#endif
+
+static inline bool debug_pagealloc_enabled_or_kfence(void)
+{
+ return IS_ENABLED(CONFIG_KFENCE) || debug_pagealloc_enabled();
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+int create_section_mapping(unsigned long start, unsigned long end,
+ int nid, pgprot_t prot);
+#endif
+
+int hash__kernel_map_pages(struct page *page, int numpages, int enable);
diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/nohash/44x.c
index 82b1ff759e26..6d10c6d8be71 100644
--- a/arch/powerpc/mm/44x_mmu.c
+++ b/arch/powerpc/mm/nohash/44x.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Modifications by Matt Porter (mporter@mvista.com) to support
* PPC44x Book E processors.
@@ -15,12 +16,6 @@
*
* Derived from "arch/i386/mm/init.c"
* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
#include <linux/init.h>
@@ -29,8 +24,10 @@
#include <asm/mmu.h>
#include <asm/page.h>
#include <asm/cacheflush.h>
+#include <asm/text-patching.h>
+#include <asm/smp.h>
-#include "mmu_decl.h"
+#include <mm/mmu_decl.h>
/* Used by the 44x TLB replacement exception handler.
* Just needed it declared someplace.
@@ -41,24 +38,15 @@ int icache_44x_need_flush;
unsigned long tlb_47x_boltmap[1024/8];
-static void ppc44x_update_tlb_hwater(void)
+static void __init ppc44x_update_tlb_hwater(void)
{
- extern unsigned int tlb_44x_patch_hwater_D[];
- extern unsigned int tlb_44x_patch_hwater_I[];
-
/* The TLB miss handlers hard codes the watermark in a cmpli
* instruction to improve performances rather than loading it
* from the global variable. Thus, we patch the instructions
* in the 2 TLB miss handlers when updating the value
*/
- tlb_44x_patch_hwater_D[0] = (tlb_44x_patch_hwater_D[0] & 0xffff0000) |
- tlb_44x_hwater;
- flush_icache_range((unsigned long)&tlb_44x_patch_hwater_D[0],
- (unsigned long)&tlb_44x_patch_hwater_D[1]);
- tlb_44x_patch_hwater_I[0] = (tlb_44x_patch_hwater_I[0] & 0xffff0000) |
- tlb_44x_hwater;
- flush_icache_range((unsigned long)&tlb_44x_patch_hwater_I[0],
- (unsigned long)&tlb_44x_patch_hwater_I[1]);
+ modify_instruction_site(&patch__tlb_44x_hwater_D, 0xffff, tlb_44x_hwater);
+ modify_instruction_site(&patch__tlb_44x_hwater_I, 0xffff, tlb_44x_hwater);
}
/*
@@ -134,7 +122,7 @@ static void __init ppc47x_update_boltmap(void)
/*
* "Pins" a 256MB TLB entry in AS0 for kernel lowmem for 47x type MMU
*/
-static void ppc47x_pin_tlb(unsigned int virt, unsigned int phys)
+static void __init ppc47x_pin_tlb(unsigned int virt, unsigned int phys)
{
unsigned int rA;
int bolted;
@@ -178,7 +166,7 @@ void __init MMU_init_hw(void)
flush_instruction_cache();
}
-unsigned long __init mmu_mapin_ram(unsigned long top)
+unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
{
unsigned long addr;
unsigned long memstart = memstart_addr & ~(PPC_PIN_SIZE - 1);
@@ -229,7 +217,7 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base,
}
#ifdef CONFIG_SMP
-void mmu_init_secondary(int cpu)
+void __init mmu_init_secondary(int cpu)
{
unsigned long addr;
unsigned long memstart = memstart_addr & ~(PPC_PIN_SIZE - 1);
diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c
new file mode 100644
index 000000000000..ab1505cf42bf
--- /dev/null
+++ b/arch/powerpc/mm/nohash/8xx.c
@@ -0,0 +1,224 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * This file contains the routines for initializing the MMU
+ * on the 8xx series of chips.
+ * -- christophe
+ *
+ * Derived from arch/powerpc/mm/40x_mmu.c:
+ */
+
+#include <linux/memblock.h>
+#include <linux/hugetlb.h>
+
+#include <asm/fixmap.h>
+#include <asm/pgalloc.h>
+
+#include <mm/mmu_decl.h>
+
+#define IMMR_SIZE (FIX_IMMR_SIZE << PAGE_SHIFT)
+
+static unsigned long block_mapped_ram;
+
+/*
+ * Return PA for this VA if it is in an area mapped with LTLBs or fixmap.
+ * Otherwise, returns 0
+ */
+phys_addr_t v_block_mapped(unsigned long va)
+{
+ unsigned long p = PHYS_IMMR_BASE;
+
+ if (va >= VIRT_IMMR_BASE && va < VIRT_IMMR_BASE + IMMR_SIZE)
+ return p + va - VIRT_IMMR_BASE;
+ if (va >= PAGE_OFFSET && va < PAGE_OFFSET + block_mapped_ram)
+ return __pa(va);
+ return 0;
+}
+
+/*
+ * Return VA for a given PA mapped with LTLBs or fixmap
+ * Return 0 if not mapped
+ */
+unsigned long p_block_mapped(phys_addr_t pa)
+{
+ unsigned long p = PHYS_IMMR_BASE;
+
+ if (pa >= p && pa < p + IMMR_SIZE)
+ return VIRT_IMMR_BASE + pa - p;
+ if (pa < block_mapped_ram)
+ return (unsigned long)__va(pa);
+ return 0;
+}
+
+static int __ref __early_map_kernel_hugepage(unsigned long va, phys_addr_t pa,
+ pgprot_t prot, int psize, bool new)
+{
+ pmd_t *pmdp = pmd_off_k(va);
+ pte_t *ptep;
+ unsigned int shift = mmu_psize_to_shift(psize);
+
+ if (new) {
+ if (WARN_ON(slab_is_available()))
+ return -EINVAL;
+
+ if (psize == MMU_PAGE_8M) {
+ if (WARN_ON(!pmd_none(*pmdp) || !pmd_none(*(pmdp + 1))))
+ return -EINVAL;
+
+ ptep = early_alloc_pgtable(PTE_FRAG_SIZE);
+ pmd_populate_kernel(&init_mm, pmdp, ptep);
+
+ ptep = early_alloc_pgtable(PTE_FRAG_SIZE);
+ pmd_populate_kernel(&init_mm, pmdp + 1, ptep);
+
+ ptep = (pte_t *)pmdp;
+ } else {
+ ptep = early_pte_alloc_kernel(pmdp, va);
+ /* The PTE should never be already present */
+ if (WARN_ON(pte_present(*ptep) && pgprot_val(prot)))
+ return -EINVAL;
+ }
+ } else {
+ if (psize == MMU_PAGE_8M)
+ ptep = (pte_t *)pmdp;
+ else
+ ptep = pte_offset_kernel(pmdp, va);
+ }
+
+ if (WARN_ON(!ptep))
+ return -ENOMEM;
+
+ set_huge_pte_at(&init_mm, va, ptep,
+ arch_make_huge_pte(pfn_pte(pa >> PAGE_SHIFT, prot), shift, 0),
+ 1UL << shift);
+
+ return 0;
+}
+
+/*
+ * MMU_init_hw does the chip-specific initialization of the MMU hardware.
+ */
+void __init MMU_init_hw(void)
+{
+}
+
+static bool immr_is_mapped __initdata;
+
+void __init mmu_mapin_immr(void)
+{
+ if (immr_is_mapped)
+ return;
+
+ immr_is_mapped = true;
+
+ __early_map_kernel_hugepage(VIRT_IMMR_BASE, PHYS_IMMR_BASE,
+ PAGE_KERNEL_NCG, MMU_PAGE_512K, true);
+}
+
+static int mmu_mapin_ram_chunk(unsigned long offset, unsigned long top,
+ pgprot_t prot, bool new)
+{
+ unsigned long v = PAGE_OFFSET + offset;
+ unsigned long p = offset;
+ int err = 0;
+
+ WARN_ON(!IS_ALIGNED(offset, SZ_16K) || !IS_ALIGNED(top, SZ_16K));
+
+ for (; p < ALIGN(p, SZ_512K) && p < top && !err; p += SZ_16K, v += SZ_16K)
+ err = __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_16K, new);
+ for (; p < ALIGN(p, SZ_8M) && p < top && !err; p += SZ_512K, v += SZ_512K)
+ err = __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_512K, new);
+ for (; p < ALIGN_DOWN(top, SZ_8M) && p < top && !err; p += SZ_8M, v += SZ_8M)
+ err = __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_8M, new);
+ for (; p < ALIGN_DOWN(top, SZ_512K) && p < top && !err; p += SZ_512K, v += SZ_512K)
+ err = __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_512K, new);
+ for (; p < ALIGN_DOWN(top, SZ_16K) && p < top && !err; p += SZ_16K, v += SZ_16K)
+ err = __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_16K, new);
+
+ if (!new)
+ flush_tlb_kernel_range(PAGE_OFFSET + v, PAGE_OFFSET + top);
+
+ return err;
+}
+
+unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
+{
+ unsigned long etext8 = ALIGN(__pa(_etext), SZ_8M);
+ unsigned long sinittext = __pa(_sinittext);
+ bool strict_boundary = strict_kernel_rwx_enabled() || debug_pagealloc_enabled_or_kfence();
+ unsigned long boundary = strict_boundary ? sinittext : etext8;
+ unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M);
+
+ WARN_ON(top < einittext8);
+
+ mmu_mapin_immr();
+
+ mmu_mapin_ram_chunk(0, boundary, PAGE_KERNEL_X, true);
+ if (debug_pagealloc_enabled_or_kfence()) {
+ top = boundary;
+ } else {
+ mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL_X, true);
+ mmu_mapin_ram_chunk(einittext8, top, PAGE_KERNEL, true);
+ }
+
+ if (top > SZ_32M)
+ memblock_set_current_limit(top);
+
+ block_mapped_ram = top;
+
+ return top;
+}
+
+int mmu_mark_initmem_nx(void)
+{
+ unsigned long etext8 = ALIGN(__pa(_etext), SZ_8M);
+ unsigned long sinittext = __pa(_sinittext);
+ unsigned long boundary = strict_kernel_rwx_enabled() ? sinittext : etext8;
+ unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M);
+ int err = 0;
+
+ if (!debug_pagealloc_enabled_or_kfence())
+ err = mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL, false);
+
+ if (IS_ENABLED(CONFIG_PIN_TLB_TEXT))
+ mmu_pin_tlb(block_mapped_ram, false);
+
+ return err;
+}
+
+#ifdef CONFIG_STRICT_KERNEL_RWX
+int mmu_mark_rodata_ro(void)
+{
+ unsigned long sinittext = __pa(_sinittext);
+ int err;
+
+ err = mmu_mapin_ram_chunk(0, sinittext, PAGE_KERNEL_ROX, false);
+ if (IS_ENABLED(CONFIG_PIN_TLB_DATA))
+ mmu_pin_tlb(block_mapped_ram, true);
+
+ return err;
+}
+#endif
+
+void __init setup_initial_memory_limit(phys_addr_t first_memblock_base,
+ phys_addr_t first_memblock_size)
+{
+ /* We don't currently support the first MEMBLOCK not mapping 0
+ * physical on those processors
+ */
+ BUG_ON(first_memblock_base != 0);
+
+ /* 8xx can only access 32MB at the moment */
+ memblock_set_current_limit(min_t(u64, first_memblock_size, SZ_32M));
+
+ BUILD_BUG_ON(ALIGN_DOWN(MODULES_VADDR, PGDIR_SIZE) < TASK_SIZE);
+}
+
+int pud_clear_huge(pud_t *pud)
+{
+ return 0;
+}
+
+int pmd_clear_huge(pmd_t *pmd)
+{
+ return 0;
+}
diff --git a/arch/powerpc/mm/nohash/Makefile b/arch/powerpc/mm/nohash/Makefile
new file mode 100644
index 000000000000..cf60c776c883
--- /dev/null
+++ b/arch/powerpc/mm/nohash/Makefile
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-y += mmu_context.o tlb.o tlb_low.o kup.o
+obj-$(CONFIG_PPC_BOOK3E_64) += tlb_64e.o tlb_low_64e.o book3e_pgtable.o
+obj-$(CONFIG_44x) += 44x.o
+obj-$(CONFIG_PPC_8xx) += 8xx.o
+obj-$(CONFIG_PPC_E500) += e500.o
+obj-$(CONFIG_RANDOMIZE_BASE) += kaslr_booke.o
+ifdef CONFIG_HUGETLB_PAGE
+obj-$(CONFIG_PPC_E500) += e500_hugetlbpage.o
+endif
+
+# Disable kcov instrumentation on sensitive code
+# This is necessary for booting with kcov enabled on book3e machines
+KCOV_INSTRUMENT_tlb.o := n
+KCOV_INSTRUMENT_e500.o := n
diff --git a/arch/powerpc/mm/nohash/book3e_pgtable.c b/arch/powerpc/mm/nohash/book3e_pgtable.c
new file mode 100644
index 000000000000..062e8785c1bb
--- /dev/null
+++ b/arch/powerpc/mm/nohash/book3e_pgtable.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2005, Paul Mackerras, IBM Corporation.
+ * Copyright 2009, Benjamin Herrenschmidt, IBM Corporation.
+ * Copyright 2015-2016, Aneesh Kumar K.V, IBM Corporation.
+ */
+
+#include <linux/sched.h>
+#include <linux/memblock.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+#include <asm/dma.h>
+#include <asm/text-patching.h>
+
+#include <mm/mmu_decl.h>
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+/*
+ * On Book3E CPUs, the vmemmap is currently mapped in the top half of
+ * the vmalloc space using normal page tables, though the size of
+ * pages encoded in the PTEs can be different
+ */
+int __meminit vmemmap_create_mapping(unsigned long start,
+ unsigned long page_size,
+ unsigned long phys)
+{
+ /* Create a PTE encoding without page size */
+ unsigned long i, flags = _PAGE_PRESENT | _PAGE_ACCESSED |
+ _PAGE_KERNEL_RW;
+
+ /* PTEs only contain page size encodings up to 32M */
+ BUG_ON(mmu_psize_defs[mmu_vmemmap_psize].shift - 10 > 0xf);
+
+ /* Encode the size in the PTE */
+ flags |= (mmu_psize_defs[mmu_vmemmap_psize].shift - 10) << 8;
+
+ /* For each PTE for that area, map things. Note that we don't
+ * increment phys because all PTEs are of the large size and
+ * thus must have the low bits clear
+ */
+ for (i = 0; i < page_size; i += PAGE_SIZE)
+ BUG_ON(map_kernel_page(start + i, phys, __pgprot(flags)));
+
+ return 0;
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+void vmemmap_remove_mapping(unsigned long start,
+ unsigned long page_size)
+{
+}
+#endif
+#endif /* CONFIG_SPARSEMEM_VMEMMAP */
+
+static void __init *early_alloc_pgtable(unsigned long size)
+{
+ void *ptr;
+
+ ptr = memblock_alloc_try_nid(size, size, MEMBLOCK_LOW_LIMIT,
+ __pa(MAX_DMA_ADDRESS), NUMA_NO_NODE);
+
+ if (!ptr)
+ panic("%s: Failed to allocate %lu bytes align=0x%lx max_addr=%lx\n",
+ __func__, size, size, __pa(MAX_DMA_ADDRESS));
+
+ return ptr;
+}
+
+/*
+ * map_kernel_page currently only called by __ioremap
+ * map_kernel_page adds an entry to the ioremap page table
+ * and adds an entry to the HPT, possibly bolting it
+ */
+int __ref map_kernel_page(unsigned long ea, phys_addr_t pa, pgprot_t prot)
+{
+ pgd_t *pgdp;
+ p4d_t *p4dp;
+ pud_t *pudp;
+ pmd_t *pmdp;
+ pte_t *ptep;
+
+ BUILD_BUG_ON(TASK_SIZE_USER64 > PGTABLE_RANGE);
+ if (slab_is_available()) {
+ pgdp = pgd_offset_k(ea);
+ p4dp = p4d_offset(pgdp, ea);
+ pudp = pud_alloc(&init_mm, p4dp, ea);
+ if (!pudp)
+ return -ENOMEM;
+ pmdp = pmd_alloc(&init_mm, pudp, ea);
+ if (!pmdp)
+ return -ENOMEM;
+ ptep = pte_alloc_kernel(pmdp, ea);
+ if (!ptep)
+ return -ENOMEM;
+ } else {
+ pgdp = pgd_offset_k(ea);
+ p4dp = p4d_offset(pgdp, ea);
+ if (p4d_none(*p4dp)) {
+ pudp = early_alloc_pgtable(PUD_TABLE_SIZE);
+ p4d_populate(&init_mm, p4dp, pudp);
+ }
+ pudp = pud_offset(p4dp, ea);
+ if (pud_none(*pudp)) {
+ pmdp = early_alloc_pgtable(PMD_TABLE_SIZE);
+ pud_populate(&init_mm, pudp, pmdp);
+ }
+ pmdp = pmd_offset(pudp, ea);
+ if (!pmd_present(*pmdp)) {
+ ptep = early_alloc_pgtable(PTE_TABLE_SIZE);
+ pmd_populate_kernel(&init_mm, pmdp, ptep);
+ }
+ ptep = pte_offset_kernel(pmdp, ea);
+ }
+ set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, prot));
+
+ smp_wmb();
+ return 0;
+}
+
+void __patch_exception(int exc, unsigned long addr)
+{
+ unsigned int *ibase = &interrupt_base_book3e;
+
+ /*
+ * Our exceptions vectors start with a NOP and -then- a branch
+ * to deal with single stepping from userspace which stops on
+ * the second instruction. Thus we need to patch the second
+ * instruction of the exception, not the first one.
+ */
+
+ patch_branch(ibase + (exc / 4) + 1, addr, 0);
+}
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/nohash/e500.c
index 94cd728166d3..266fb22131fc 100644
--- a/arch/powerpc/mm/fsl_booke_mmu.c
+++ b/arch/powerpc/mm/nohash/e500.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Modifications by Kumar Gala (galak@kernel.crashing.org) to support
* E500 Book E processors.
@@ -17,12 +18,6 @@
*
* Derived from "arch/i386/mm/init.c"
* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
#include <linux/signal.h>
@@ -41,43 +36,34 @@
#include <linux/delay.h>
#include <linux/highmem.h>
#include <linux/memblock.h>
+#include <linux/of_fdt.h>
-#include <asm/pgalloc.h>
-#include <asm/prom.h>
#include <asm/io.h>
#include <asm/mmu_context.h>
-#include <asm/pgtable.h>
#include <asm/mmu.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <asm/smp.h>
#include <asm/machdep.h>
#include <asm/setup.h>
#include <asm/paca.h>
-#include "mmu_decl.h"
+#include <mm/mmu_decl.h>
unsigned int tlbcam_index;
-#define NUM_TLBCAMS (64)
struct tlbcam TLBCAM[NUM_TLBCAMS];
-struct tlbcamrange {
+static struct {
unsigned long start;
unsigned long limit;
phys_addr_t phys;
} tlbcam_addrs[NUM_TLBCAMS];
-extern unsigned int tlbcam_index;
-
-unsigned long tlbcam_sz(int idx)
-{
- return tlbcam_addrs[idx].limit - tlbcam_addrs[idx].start + 1;
-}
-
+#ifdef CONFIG_PPC_85xx
/*
* Return PA for this VA if it is mapped by a CAM, or 0
*/
-phys_addr_t v_mapped_by_tlbcam(unsigned long va)
+phys_addr_t v_block_mapped(unsigned long va)
{
int b;
for (b = 0; b < tlbcam_index; ++b)
@@ -89,7 +75,7 @@ phys_addr_t v_mapped_by_tlbcam(unsigned long va)
/*
* Return VA for a given PA or 0 if not mapped
*/
-unsigned long p_mapped_by_tlbcam(phys_addr_t pa)
+unsigned long p_block_mapped(phys_addr_t pa)
{
int b;
for (b = 0; b < tlbcam_index; ++b)
@@ -99,6 +85,7 @@ unsigned long p_mapped_by_tlbcam(phys_addr_t pa)
return tlbcam_addrs[b].start+(pa-tlbcam_addrs[b].phys);
return 0;
}
+#endif
/*
* Set up a variable-size TLB entry (tlbcam). The parameters are not checked;
@@ -114,7 +101,7 @@ static void settlbcam(int index, unsigned long virt, phys_addr_t phys,
tsize = __ilog2(size) - 10;
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC)
if ((flags & _PAGE_NO_CACHE) == 0)
flags |= _PAGE_COHERENT;
#endif
@@ -129,26 +116,27 @@ static void settlbcam(int index, unsigned long virt, phys_addr_t phys,
TLBCAM[index].MAS2 |= (flags & _PAGE_GUARDED) ? MAS2_G : 0;
TLBCAM[index].MAS2 |= (flags & _PAGE_ENDIAN) ? MAS2_E : 0;
- TLBCAM[index].MAS3 = (phys & MAS3_RPN) | MAS3_SX | MAS3_SR;
- TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_SW : 0);
+ TLBCAM[index].MAS3 = (phys & MAS3_RPN) | MAS3_SR;
+ TLBCAM[index].MAS3 |= (flags & _PAGE_WRITE) ? MAS3_SW : 0;
if (mmu_has_feature(MMU_FTR_BIG_PHYS))
TLBCAM[index].MAS7 = (u64)phys >> 32;
/* Below is unlikely -- only for large user pages or similar */
- if (pte_user(flags)) {
- TLBCAM[index].MAS3 |= MAS3_UX | MAS3_UR;
- TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_UW : 0);
+ if (!is_kernel_addr(virt)) {
+ TLBCAM[index].MAS3 |= MAS3_UR;
+ TLBCAM[index].MAS3 |= (flags & _PAGE_EXEC) ? MAS3_UX : 0;
+ TLBCAM[index].MAS3 |= (flags & _PAGE_WRITE) ? MAS3_UW : 0;
+ } else {
+ TLBCAM[index].MAS3 |= (flags & _PAGE_EXEC) ? MAS3_SX : 0;
}
tlbcam_addrs[index].start = virt;
tlbcam_addrs[index].limit = virt + size - 1;
tlbcam_addrs[index].phys = phys;
-
- loadcam_entry(index);
}
-unsigned long calc_cam_sz(unsigned long ram, unsigned long virt,
- phys_addr_t phys)
+static unsigned long calc_cam_sz(unsigned long ram, unsigned long virt,
+ phys_addr_t phys)
{
unsigned int camsize = __ilog2(ram);
unsigned int align = __ffs(virt | phys);
@@ -173,24 +161,56 @@ unsigned long calc_cam_sz(unsigned long ram, unsigned long virt,
}
static unsigned long map_mem_in_cams_addr(phys_addr_t phys, unsigned long virt,
- unsigned long ram, int max_cam_idx)
+ unsigned long ram, int max_cam_idx,
+ bool dryrun, bool init)
{
int i;
unsigned long amount_mapped = 0;
+ unsigned long boundary;
+
+ if (strict_kernel_rwx_enabled())
+ boundary = (unsigned long)(_sinittext - _stext);
+ else
+ boundary = ram;
/* Calculate CAM values */
- for (i = 0; ram && i < max_cam_idx; i++) {
+ for (i = 0; boundary && i < max_cam_idx; i++) {
unsigned long cam_sz;
+ pgprot_t prot = init ? PAGE_KERNEL_X : PAGE_KERNEL_ROX;
+
+ cam_sz = calc_cam_sz(boundary, virt, phys);
+ if (!dryrun)
+ settlbcam(i, virt, phys, cam_sz, pgprot_val(prot), 0);
+
+ boundary -= cam_sz;
+ amount_mapped += cam_sz;
+ virt += cam_sz;
+ phys += cam_sz;
+ }
+ for (ram -= amount_mapped; ram && i < max_cam_idx; i++) {
+ unsigned long cam_sz;
+ pgprot_t prot = init ? PAGE_KERNEL_X : PAGE_KERNEL;
cam_sz = calc_cam_sz(ram, virt, phys);
- settlbcam(i, virt, phys, cam_sz, PAGE_KERNEL_X, 0);
+ if (!dryrun)
+ settlbcam(i, virt, phys, cam_sz, pgprot_val(prot), 0);
ram -= cam_sz;
amount_mapped += cam_sz;
virt += cam_sz;
phys += cam_sz;
}
- tlbcam_index = i;
+
+ if (dryrun)
+ return amount_mapped;
+
+ if (init) {
+ loadcam_multi(0, i, max_cam_idx);
+ tlbcam_index = i;
+ } else {
+ loadcam_multi(0, i, 0);
+ WARN_ON(i > tlbcam_index);
+ }
#ifdef CONFIG_PPC64
get_paca()->tcd.esel_next = i;
@@ -201,12 +221,12 @@ static unsigned long map_mem_in_cams_addr(phys_addr_t phys, unsigned long virt,
return amount_mapped;
}
-unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx)
+unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx, bool dryrun, bool init)
{
unsigned long virt = PAGE_OFFSET;
phys_addr_t phys = memstart_addr;
- return map_mem_in_cams_addr(phys, virt, ram, max_cam_idx);
+ return map_mem_in_cams_addr(phys, virt, ram, max_cam_idx, dryrun, init);
}
#ifdef CONFIG_PPC32
@@ -215,11 +235,21 @@ unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx)
#error "LOWMEM_CAM_NUM must be less than NUM_TLBCAMS"
#endif
-unsigned long __init mmu_mapin_ram(unsigned long top)
+unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
{
return tlbcam_addrs[tlbcam_index - 1].limit - PAGE_OFFSET + 1;
}
+void flush_instruction_cache(void)
+{
+ unsigned long tmp;
+
+ tmp = mfspr(SPRN_L1CSR1);
+ tmp |= L1CSR1_ICFI | L1CSR1_ICLFR;
+ mtspr(SPRN_L1CSR1, tmp);
+ isync();
+}
+
/*
* MMU_init_hw does the chip-specific initialization of the MMU hardware.
*/
@@ -228,6 +258,11 @@ void __init MMU_init_hw(void)
flush_instruction_cache();
}
+static unsigned long __init tlbcam_sz(int idx)
+{
+ return tlbcam_addrs[idx].limit - tlbcam_addrs[idx].start + 1;
+}
+
void __init adjust_total_lowmem(void)
{
unsigned long ram;
@@ -237,8 +272,8 @@ void __init adjust_total_lowmem(void)
ram = min((phys_addr_t)__max_low_memory, (phys_addr_t)total_lowmem);
i = switch_to_as1();
- __max_low_memory = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM);
- restore_to_as0(i, 0, 0, 1);
+ __max_low_memory = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM, false, true);
+ restore_to_as0(i, 0, NULL, 1);
pr_info("Memory CAM mapping: ");
for (i = 0; i < tlbcam_index - 1; i++)
@@ -249,6 +284,26 @@ void __init adjust_total_lowmem(void)
memblock_set_current_limit(memstart_addr + __max_low_memory);
}
+#ifdef CONFIG_STRICT_KERNEL_RWX
+int mmu_mark_rodata_ro(void)
+{
+ unsigned long remapped;
+
+ remapped = map_mem_in_cams(__max_low_memory, CONFIG_LOWMEM_CAM_NUM, false, false);
+
+ if (WARN_ON(__max_low_memory != remapped))
+ return -EINVAL;
+
+ return 0;
+}
+#endif
+
+int mmu_mark_initmem_nx(void)
+{
+ /* Everything is done in mmu_mark_rodata_ro() */
+ return 0;
+}
+
void setup_initial_memory_limit(phys_addr_t first_memblock_base,
phys_addr_t first_memblock_size)
{
@@ -262,11 +317,13 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base,
int __initdata is_second_reloc;
notrace void __init relocate_init(u64 dt_ptr, phys_addr_t start)
{
- unsigned long base = KERNELBASE;
+ unsigned long base = kernstart_virt_addr;
+ phys_addr_t size;
kernstart_addr = start;
if (is_second_reloc) {
virt_phys_offset = PAGE_OFFSET - memstart_addr;
+ kaslr_late_init();
return;
}
@@ -290,7 +347,7 @@ notrace void __init relocate_init(u64 dt_ptr, phys_addr_t start)
start &= ~0x3ffffff;
base &= ~0x3ffffff;
virt_phys_offset = base - start;
- early_get_first_memblock_info(__va(dt_ptr), NULL);
+ early_get_first_memblock_info(__va(dt_ptr), &size);
/*
* We now get the memstart_addr, then we should check if this
* address is the same as what the PAGE_OFFSET map to now. If
@@ -305,14 +362,18 @@ notrace void __init relocate_init(u64 dt_ptr, phys_addr_t start)
n = switch_to_as1();
/* map a 64M area for the second relocation */
if (memstart_addr > start)
- map_mem_in_cams(0x4000000, CONFIG_LOWMEM_CAM_NUM);
+ map_mem_in_cams(0x4000000, CONFIG_LOWMEM_CAM_NUM,
+ false, true);
else
map_mem_in_cams_addr(start, PAGE_OFFSET + offset,
- 0x4000000, CONFIG_LOWMEM_CAM_NUM);
+ 0x4000000, CONFIG_LOWMEM_CAM_NUM,
+ false, true);
restore_to_as0(n, offset, __va(dt_ptr), 1);
/* We should never reach here */
panic("Relocation error");
}
+
+ kaslr_early_init(__va(dt_ptr), size);
}
#endif
#endif
diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c b/arch/powerpc/mm/nohash/e500_hugetlbpage.c
index 5e4ee2573903..a134d28a0e4d 100644
--- a/arch/powerpc/mm/hugetlbpage-book3e.c
+++ b/arch/powerpc/mm/nohash/e500_hugetlbpage.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* PPC Huge TLB Page Support for Book3E MMU
*
@@ -8,8 +9,11 @@
#include <linux/mm.h>
#include <linux/hugetlb.h>
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#include <asm/mmu.h>
+
#ifdef CONFIG_PPC64
+#include <asm/paca.h>
+
static inline int tlb1_next(void)
{
struct paca_struct *paca = get_paca();
@@ -26,6 +30,50 @@ static inline int tlb1_next(void)
tcd->esel_next = next;
return this;
}
+
+static inline void book3e_tlb_lock(void)
+{
+ struct paca_struct *paca = get_paca();
+ unsigned long tmp;
+ int token = smp_processor_id() + 1;
+
+ /*
+ * Besides being unnecessary in the absence of SMT, this
+ * check prevents trying to do lbarx/stbcx. on e5500 which
+ * doesn't implement either feature.
+ */
+ if (!cpu_has_feature(CPU_FTR_SMT))
+ return;
+
+ asm volatile(".machine push;"
+ ".machine e6500;"
+ "1: lbarx %0, 0, %1;"
+ "cmpwi %0, 0;"
+ "bne 2f;"
+ "stbcx. %2, 0, %1;"
+ "bne 1b;"
+ "b 3f;"
+ "2: lbzx %0, 0, %1;"
+ "cmpwi %0, 0;"
+ "bne 2b;"
+ "b 1b;"
+ "3:"
+ ".machine pop;"
+ : "=&r" (tmp)
+ : "r" (&paca->tcd_ptr->lock), "r" (token)
+ : "memory");
+}
+
+static inline void book3e_tlb_unlock(void)
+{
+ struct paca_struct *paca = get_paca();
+
+ if (!cpu_has_feature(CPU_FTR_SMT))
+ return;
+
+ isync();
+ paca->tcd_ptr->lock = 0;
+}
#else
static inline int tlb1_next(void)
{
@@ -33,92 +81,75 @@ static inline int tlb1_next(void)
ncams = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY;
- index = __get_cpu_var(next_tlbcam_idx);
+ index = this_cpu_read(next_tlbcam_idx);
/* Just round-robin the entries and wrap when we hit the end */
if (unlikely(index == ncams - 1))
- __get_cpu_var(next_tlbcam_idx) = tlbcam_index;
+ __this_cpu_write(next_tlbcam_idx, tlbcam_index);
else
- __get_cpu_var(next_tlbcam_idx)++;
+ __this_cpu_inc(next_tlbcam_idx);
return index;
}
-#endif /* !PPC64 */
-#endif /* FSL */
-static inline int mmu_get_tsize(int psize)
+static inline void book3e_tlb_lock(void)
{
- return mmu_psize_defs[psize].enc;
}
+static inline void book3e_tlb_unlock(void)
+{
+}
+#endif
+
static inline int book3e_tlb_exists(unsigned long ea, unsigned long pid)
{
int found = 0;
mtspr(SPRN_MAS6, pid << 16);
- if (mmu_has_feature(MMU_FTR_USE_TLBRSRV)) {
- asm volatile(
- "li %0,0\n"
- "tlbsx. 0,%1\n"
- "bne 1f\n"
- "li %0,1\n"
- "1:\n"
- : "=&r"(found) : "r"(ea));
- } else {
- asm volatile(
- "tlbsx 0,%1\n"
- "mfspr %0,0x271\n"
- "srwi %0,%0,31\n"
- : "=&r"(found) : "r"(ea));
- }
+ asm volatile(
+ "tlbsx 0,%1\n"
+ "mfspr %0,0x271\n"
+ "srwi %0,%0,31\n"
+ : "=&r"(found) : "r"(ea));
return found;
}
-void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea,
- pte_t pte)
+static void
+book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea, pte_t pte)
{
unsigned long mas1, mas2;
u64 mas7_3;
unsigned long psize, tsize, shift;
unsigned long flags;
struct mm_struct *mm;
-
-#ifdef CONFIG_PPC_FSL_BOOK3E
int index;
-#endif
if (unlikely(is_kernel_addr(ea)))
return;
mm = vma->vm_mm;
-#ifdef CONFIG_PPC_MM_SLICES
- psize = get_slice_psize(mm, ea);
- tsize = mmu_get_tsize(psize);
- shift = mmu_psize_defs[psize].shift;
-#else
psize = vma_mmu_pagesize(vma);
shift = __ilog2(psize);
tsize = shift - 10;
-#endif
-
/*
* We can't be interrupted while we're setting up the MAS
- * regusters or after we've confirmed that no tlb exists.
+ * registers or after we've confirmed that no tlb exists.
*/
local_irq_save(flags);
+ book3e_tlb_lock();
+
if (unlikely(book3e_tlb_exists(ea, mm->context.id))) {
+ book3e_tlb_unlock();
local_irq_restore(flags);
return;
}
-#ifdef CONFIG_PPC_FSL_BOOK3E
/* We have to use the CAM(TLB1) on FSL parts for hugepages */
index = tlb1_next();
mtspr(SPRN_MAS0, MAS0_ESEL(index) | MAS0_TLBSEL(1));
-#endif
mas1 = MAS1_VALID | MAS1_TID(mm->context.id) | MAS1_TSIZE(tsize);
mas2 = ea & ~((1UL << shift) - 1);
@@ -131,19 +162,28 @@ void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea,
mtspr(SPRN_MAS1, mas1);
mtspr(SPRN_MAS2, mas2);
- if (mmu_has_feature(MMU_FTR_USE_PAIRED_MAS)) {
- mtspr(SPRN_MAS7_MAS3, mas7_3);
- } else {
- if (mmu_has_feature(MMU_FTR_BIG_PHYS))
- mtspr(SPRN_MAS7, upper_32_bits(mas7_3));
- mtspr(SPRN_MAS3, lower_32_bits(mas7_3));
- }
+ if (mmu_has_feature(MMU_FTR_BIG_PHYS))
+ mtspr(SPRN_MAS7, upper_32_bits(mas7_3));
+ mtspr(SPRN_MAS3, lower_32_bits(mas7_3));
asm volatile ("tlbwe");
+ book3e_tlb_unlock();
local_irq_restore(flags);
}
+/*
+ * This is called at the end of handling a user page fault, when the
+ * fault has been handled by updating a PTE in the linux page tables.
+ *
+ * This must always be called with the pte lock held.
+ */
+void __update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
+{
+ if (is_vm_hugetlb_page(vma))
+ book3e_hugetlb_preload(vma, address, *ptep);
+}
+
void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
{
struct hstate *hstate = hstate_file(vma->vm_file);
diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c
new file mode 100644
index 000000000000..5e4897daaaea
--- /dev/null
+++ b/arch/powerpc/mm/nohash/kaslr_booke.c
@@ -0,0 +1,395 @@
+// SPDX-License-Identifier: GPL-2.0-only
+//
+// Copyright (C) 2019 Jason Yan <yanaijie@huawei.com>
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/stddef.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/memblock.h>
+#include <linux/libfdt.h>
+#include <linux/crash_reserve.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <asm/cacheflush.h>
+#include <asm/kdump.h>
+#include <mm/mmu_decl.h>
+
+struct regions {
+ unsigned long pa_start;
+ unsigned long pa_end;
+ unsigned long kernel_size;
+ unsigned long dtb_start;
+ unsigned long dtb_end;
+ unsigned long initrd_start;
+ unsigned long initrd_end;
+ unsigned long crash_start;
+ unsigned long crash_end;
+ int reserved_mem;
+ int reserved_mem_addr_cells;
+ int reserved_mem_size_cells;
+};
+
+struct regions __initdata regions;
+
+static __init void kaslr_get_cmdline(void *fdt)
+{
+ early_init_dt_scan_chosen(boot_command_line);
+}
+
+static unsigned long __init rotate_xor(unsigned long hash, const void *area,
+ size_t size)
+{
+ size_t i;
+ const unsigned long *ptr = area;
+
+ for (i = 0; i < size / sizeof(hash); i++) {
+ /* Rotate by odd number of bits and XOR. */
+ hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7);
+ hash ^= ptr[i];
+ }
+
+ return hash;
+}
+
+/* Attempt to create a simple starting entropy. This can make it defferent for
+ * every build but it is still not enough. Stronger entropy should
+ * be added to make it change for every boot.
+ */
+static unsigned long __init get_boot_seed(void *fdt)
+{
+ unsigned long hash = 0;
+
+ /* build-specific string for starting entropy. */
+ hash = rotate_xor(hash, linux_banner, strlen(linux_banner));
+ hash = rotate_xor(hash, fdt, fdt_totalsize(fdt));
+
+ return hash;
+}
+
+static __init u64 get_kaslr_seed(void *fdt)
+{
+ int node, len;
+ fdt64_t *prop;
+ u64 ret;
+
+ node = fdt_path_offset(fdt, "/chosen");
+ if (node < 0)
+ return 0;
+
+ prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len);
+ if (!prop || len != sizeof(u64))
+ return 0;
+
+ ret = fdt64_to_cpu(*prop);
+ *prop = 0;
+ return ret;
+}
+
+static __init bool regions_overlap(u32 s1, u32 e1, u32 s2, u32 e2)
+{
+ return e1 >= s2 && e2 >= s1;
+}
+
+static __init bool overlaps_reserved_region(const void *fdt, u32 start,
+ u32 end)
+{
+ int subnode, len, i;
+ u64 base, size;
+
+ /* check for overlap with /memreserve/ entries */
+ for (i = 0; i < fdt_num_mem_rsv(fdt); i++) {
+ if (fdt_get_mem_rsv(fdt, i, &base, &size) < 0)
+ continue;
+ if (regions_overlap(start, end, base, base + size))
+ return true;
+ }
+
+ if (regions.reserved_mem < 0)
+ return false;
+
+ /* check for overlap with static reservations in /reserved-memory */
+ for (subnode = fdt_first_subnode(fdt, regions.reserved_mem);
+ subnode >= 0;
+ subnode = fdt_next_subnode(fdt, subnode)) {
+ const fdt32_t *reg;
+ u64 rsv_end;
+
+ len = 0;
+ reg = fdt_getprop(fdt, subnode, "reg", &len);
+ while (len >= (regions.reserved_mem_addr_cells +
+ regions.reserved_mem_size_cells)) {
+ base = fdt32_to_cpu(reg[0]);
+ if (regions.reserved_mem_addr_cells == 2)
+ base = (base << 32) | fdt32_to_cpu(reg[1]);
+
+ reg += regions.reserved_mem_addr_cells;
+ len -= 4 * regions.reserved_mem_addr_cells;
+
+ size = fdt32_to_cpu(reg[0]);
+ if (regions.reserved_mem_size_cells == 2)
+ size = (size << 32) | fdt32_to_cpu(reg[1]);
+
+ reg += regions.reserved_mem_size_cells;
+ len -= 4 * regions.reserved_mem_size_cells;
+
+ if (base >= regions.pa_end)
+ continue;
+
+ rsv_end = min(base + size, (u64)U32_MAX);
+
+ if (regions_overlap(start, end, base, rsv_end))
+ return true;
+ }
+ }
+ return false;
+}
+
+static __init bool overlaps_region(const void *fdt, u32 start,
+ u32 end)
+{
+ if (regions_overlap(start, end, __pa(_stext), __pa(_end)))
+ return true;
+
+ if (regions_overlap(start, end, regions.dtb_start,
+ regions.dtb_end))
+ return true;
+
+ if (regions_overlap(start, end, regions.initrd_start,
+ regions.initrd_end))
+ return true;
+
+ if (regions_overlap(start, end, regions.crash_start,
+ regions.crash_end))
+ return true;
+
+ return overlaps_reserved_region(fdt, start, end);
+}
+
+static void __init get_crash_kernel(void *fdt, unsigned long size)
+{
+#ifdef CONFIG_CRASH_RESERVE
+ unsigned long long crash_size, crash_base;
+ int ret;
+
+ ret = parse_crashkernel(boot_command_line, size, &crash_size,
+ &crash_base, NULL, NULL, NULL);
+ if (ret != 0 || crash_size == 0)
+ return;
+ if (crash_base == 0)
+ crash_base = KDUMP_KERNELBASE;
+
+ regions.crash_start = (unsigned long)crash_base;
+ regions.crash_end = (unsigned long)(crash_base + crash_size);
+
+ pr_debug("crash_base=0x%llx crash_size=0x%llx\n", crash_base, crash_size);
+#endif
+}
+
+static void __init get_initrd_range(void *fdt)
+{
+ u64 start, end;
+ int node, len;
+ const __be32 *prop;
+
+ node = fdt_path_offset(fdt, "/chosen");
+ if (node < 0)
+ return;
+
+ prop = fdt_getprop(fdt, node, "linux,initrd-start", &len);
+ if (!prop)
+ return;
+ start = of_read_number(prop, len / 4);
+
+ prop = fdt_getprop(fdt, node, "linux,initrd-end", &len);
+ if (!prop)
+ return;
+ end = of_read_number(prop, len / 4);
+
+ regions.initrd_start = (unsigned long)start;
+ regions.initrd_end = (unsigned long)end;
+
+ pr_debug("initrd_start=0x%llx initrd_end=0x%llx\n", start, end);
+}
+
+static __init unsigned long get_usable_address(const void *fdt,
+ unsigned long start,
+ unsigned long offset)
+{
+ unsigned long pa;
+ unsigned long pa_end;
+
+ for (pa = offset; (long)pa > (long)start; pa -= SZ_16K) {
+ pa_end = pa + regions.kernel_size;
+ if (overlaps_region(fdt, pa, pa_end))
+ continue;
+
+ return pa;
+ }
+ return 0;
+}
+
+static __init void get_cell_sizes(const void *fdt, int node, int *addr_cells,
+ int *size_cells)
+{
+ const int *prop;
+ int len;
+
+ /*
+ * Retrieve the #address-cells and #size-cells properties
+ * from the 'node', or use the default if not provided.
+ */
+ *addr_cells = *size_cells = 1;
+
+ prop = fdt_getprop(fdt, node, "#address-cells", &len);
+ if (len == 4)
+ *addr_cells = fdt32_to_cpu(*prop);
+ prop = fdt_getprop(fdt, node, "#size-cells", &len);
+ if (len == 4)
+ *size_cells = fdt32_to_cpu(*prop);
+}
+
+static unsigned long __init kaslr_legal_offset(void *dt_ptr, unsigned long index,
+ unsigned long offset)
+{
+ unsigned long koffset = 0;
+ unsigned long start;
+
+ while ((long)index >= 0) {
+ offset = memstart_addr + index * SZ_64M + offset;
+ start = memstart_addr + index * SZ_64M;
+ koffset = get_usable_address(dt_ptr, start, offset);
+ if (koffset)
+ break;
+ index--;
+ }
+
+ if (koffset != 0)
+ koffset -= memstart_addr;
+
+ return koffset;
+}
+
+static inline __init bool kaslr_disabled(void)
+{
+ return strstr(boot_command_line, "nokaslr") != NULL;
+}
+
+static unsigned long __init kaslr_choose_location(void *dt_ptr, phys_addr_t size,
+ unsigned long kernel_sz)
+{
+ unsigned long offset, random;
+ unsigned long ram, linear_sz;
+ u64 seed;
+ unsigned long index;
+
+ kaslr_get_cmdline(dt_ptr);
+ if (kaslr_disabled())
+ return 0;
+
+ random = get_boot_seed(dt_ptr);
+
+ seed = get_tb() << 32;
+ seed ^= get_tb();
+ random = rotate_xor(random, &seed, sizeof(seed));
+
+ /*
+ * Retrieve (and wipe) the seed from the FDT
+ */
+ seed = get_kaslr_seed(dt_ptr);
+ if (seed)
+ random = rotate_xor(random, &seed, sizeof(seed));
+ else
+ pr_warn("KASLR: No safe seed for randomizing the kernel base.\n");
+
+ ram = min_t(phys_addr_t, __max_low_memory, size);
+ ram = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM, true, true);
+ linear_sz = min_t(unsigned long, ram, SZ_512M);
+
+ /* If the linear size is smaller than 64M, do not randomize */
+ if (linear_sz < SZ_64M)
+ return 0;
+
+ /* check for a reserved-memory node and record its cell sizes */
+ regions.reserved_mem = fdt_path_offset(dt_ptr, "/reserved-memory");
+ if (regions.reserved_mem >= 0)
+ get_cell_sizes(dt_ptr, regions.reserved_mem,
+ &regions.reserved_mem_addr_cells,
+ &regions.reserved_mem_size_cells);
+
+ regions.pa_start = memstart_addr;
+ regions.pa_end = memstart_addr + linear_sz;
+ regions.dtb_start = __pa(dt_ptr);
+ regions.dtb_end = __pa(dt_ptr) + fdt_totalsize(dt_ptr);
+ regions.kernel_size = kernel_sz;
+
+ get_initrd_range(dt_ptr);
+ get_crash_kernel(dt_ptr, ram);
+
+ /*
+ * Decide which 64M we want to start
+ * Only use the low 8 bits of the random seed
+ */
+ index = random & 0xFF;
+ index %= linear_sz / SZ_64M;
+
+ /* Decide offset inside 64M */
+ offset = random % (SZ_64M - kernel_sz);
+ offset = round_down(offset, SZ_16K);
+
+ return kaslr_legal_offset(dt_ptr, index, offset);
+}
+
+/*
+ * To see if we need to relocate the kernel to a random offset
+ * void *dt_ptr - address of the device tree
+ * phys_addr_t size - size of the first memory block
+ */
+notrace void __init kaslr_early_init(void *dt_ptr, phys_addr_t size)
+{
+ unsigned long tlb_virt;
+ phys_addr_t tlb_phys;
+ unsigned long offset;
+ unsigned long kernel_sz;
+
+ kernel_sz = (unsigned long)_end - (unsigned long)_stext;
+
+ offset = kaslr_choose_location(dt_ptr, size, kernel_sz);
+ if (offset == 0)
+ return;
+
+ kernstart_virt_addr += offset;
+ kernstart_addr += offset;
+
+ is_second_reloc = 1;
+
+ if (offset >= SZ_64M) {
+ tlb_virt = round_down(kernstart_virt_addr, SZ_64M);
+ tlb_phys = round_down(kernstart_addr, SZ_64M);
+
+ /* Create kernel map to relocate in */
+ create_kaslr_tlb_entry(1, tlb_virt, tlb_phys);
+ }
+
+ /* Copy the kernel to its new location and run */
+ memcpy((void *)kernstart_virt_addr, (void *)_stext, kernel_sz);
+ flush_icache_range(kernstart_virt_addr, kernstart_virt_addr + kernel_sz);
+
+ reloc_kernel_entry(dt_ptr, kernstart_virt_addr);
+}
+
+void __init kaslr_late_init(void)
+{
+ /* If randomized, clear the original kernel */
+ if (kernstart_virt_addr != KERNELBASE) {
+ unsigned long kernel_sz;
+
+ kernel_sz = (unsigned long)_end - kernstart_virt_addr;
+ memzero_explicit((void *)KERNELBASE, kernel_sz);
+ }
+}
diff --git a/arch/powerpc/mm/nohash/kup.c b/arch/powerpc/mm/nohash/kup.c
new file mode 100644
index 000000000000..c20c4f357fbf
--- /dev/null
+++ b/arch/powerpc/mm/nohash/kup.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * This file contains the routines for initializing kernel userspace protection
+ */
+
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/printk.h>
+#include <linux/smp.h>
+
+#include <asm/kup.h>
+#include <asm/smp.h>
+
+#ifdef CONFIG_PPC_KUAP
+void setup_kuap(bool disabled)
+{
+ if (disabled) {
+ if (smp_processor_id() == boot_cpuid)
+ cur_cpu_spec->mmu_features &= ~MMU_FTR_KUAP;
+ return;
+ }
+
+ pr_info("Activating Kernel Userspace Access Protection\n");
+
+ prevent_user_access(KUAP_READ_WRITE);
+}
+#endif
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/nohash/mmu_context.c
index 928ebe79668b..28a96a10c907 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/nohash/mmu_context.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* This file contains the routines for handling the MMU on those
* PowerPC implementations where the MMU is not using the hash
@@ -9,11 +10,6 @@
* Derived from previous arch/powerpc/mm/mmu_context.c
* and arch/powerpc/include/asm/mmu_context.h
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
* TODO:
*
* - The global context lock will not scale very well
@@ -25,34 +21,54 @@
* also clear mm->cpu_vm_mask bits when processes are migrated
*/
-//#define DEBUG_MAP_CONSISTENCY
-//#define DEBUG_CLAMP_LAST_CONTEXT 31
-//#define DEBUG_HARDER
-
-/* We don't use DEBUG because it tends to be compiled in always nowadays
- * and this would generate way too much output
- */
-#ifdef DEBUG_HARDER
-#define pr_hard(args...) printk(KERN_DEBUG args)
-#define pr_hardcont(args...) printk(KERN_CONT args)
-#else
-#define pr_hard(args...) do { } while(0)
-#define pr_hardcont(args...) do { } while(0)
-#endif
-
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/init.h>
#include <linux/spinlock.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/notifier.h>
#include <linux/cpu.h>
#include <linux/slab.h>
#include <asm/mmu_context.h>
#include <asm/tlbflush.h>
+#include <asm/smp.h>
+#include <asm/kup.h>
+
+#include <mm/mmu_decl.h>
+
+/*
+ * Room for two PTE table pointers, usually the kernel and current user
+ * pointer to their respective root page table (pgdir).
+ */
+void *abatron_pteptrs[2];
+
+/*
+ * The MPC8xx has only 16 contexts. We rotate through them on each task switch.
+ * A better way would be to keep track of tasks that own contexts, and implement
+ * an LRU usage. That way very active tasks don't always have to pay the TLB
+ * reload overhead. The kernel pages are mapped shared, so the kernel can run on
+ * behalf of any task that makes a kernel entry. Shared does not mean they are
+ * not protected, just that the ASID comparison is not performed. -- Dan
+ *
+ * The IBM4xx has 256 contexts, so we can just rotate through these as a way of
+ * "switching" contexts. If the TID of the TLB is zero, the PID/TID comparison
+ * is disabled, so we can use a TID of zero to represent all kernel pages as
+ * shared among all contexts. -- Dan
+ *
+ * The IBM 47x core supports 16-bit PIDs, thus 65535 contexts. We should
+ * normally never have to steal though the facility is present if needed.
+ * -- BenH
+ */
+#define FIRST_CONTEXT 1
+#if defined(CONFIG_PPC_8xx)
+#define LAST_CONTEXT 16
+#elif defined(CONFIG_PPC_47x)
+#define LAST_CONTEXT 65535
+#else
+#define LAST_CONTEXT 255
+#endif
-static unsigned int first_context, last_context;
static unsigned int next_context, nr_free_contexts;
static unsigned long *context_map;
static unsigned long *stale_map[NR_CPUS];
@@ -60,7 +76,7 @@ static struct mm_struct **context_mm;
static DEFINE_RAW_SPINLOCK(context_lock);
#define CTX_MAP_SIZE \
- (sizeof(unsigned long) * (last_context / BITS_PER_LONG + 1))
+ (sizeof(unsigned long) * (LAST_CONTEXT / BITS_PER_LONG + 1))
/* Steal a context from a task that has one at the moment.
@@ -78,13 +94,12 @@ static DEFINE_RAW_SPINLOCK(context_lock);
* the stale map as we can just flush the local CPU
* -- benh
*/
-#ifdef CONFIG_SMP
static unsigned int steal_context_smp(unsigned int id)
{
struct mm_struct *mm;
unsigned int cpu, max, i;
- max = last_context - first_context;
+ max = LAST_CONTEXT - FIRST_CONTEXT;
/* Attempt to free next_context first and then loop until we manage */
while (max--) {
@@ -96,11 +111,10 @@ static unsigned int steal_context_smp(unsigned int id)
*/
if (mm->context.active) {
id++;
- if (id > last_context)
- id = first_context;
+ if (id > LAST_CONTEXT)
+ id = FIRST_CONTEXT;
continue;
}
- pr_hardcont(" | steal %d from 0x%p", id, mm);
/* Mark this mm has having no context anymore */
mm->context.id = MMU_NO_CONTEXT;
@@ -131,7 +145,34 @@ static unsigned int steal_context_smp(unsigned int id)
/* This will cause the caller to try again */
return MMU_NO_CONTEXT;
}
-#endif /* CONFIG_SMP */
+
+static unsigned int steal_all_contexts(void)
+{
+ struct mm_struct *mm;
+ int cpu = smp_processor_id();
+ unsigned int id;
+
+ for (id = FIRST_CONTEXT; id <= LAST_CONTEXT; id++) {
+ /* Pick up the victim mm */
+ mm = context_mm[id];
+
+ /* Mark this mm as having no context anymore */
+ mm->context.id = MMU_NO_CONTEXT;
+ if (id != FIRST_CONTEXT) {
+ context_mm[id] = NULL;
+ __clear_bit(id, context_map);
+ }
+ if (IS_ENABLED(CONFIG_SMP))
+ __clear_bit(id, stale_map[cpu]);
+ }
+
+ /* Flush the TLB for all contexts (not to be used on SMP) */
+ _tlbil_all();
+
+ nr_free_contexts = LAST_CONTEXT - FIRST_CONTEXT;
+
+ return FIRST_CONTEXT;
+}
/* Note that this will also be called on SMP if all other CPUs are
* offlined, which means that it may be called for cpu != 0. For
@@ -146,8 +187,6 @@ static unsigned int steal_context_up(unsigned int id)
/* Pick up the victim mm */
mm = context_mm[id];
- pr_hardcont(" | steal %d from 0x%p", id, mm);
-
/* Flush the TLB for that context */
local_flush_tlb_mm(mm);
@@ -155,120 +194,93 @@ static unsigned int steal_context_up(unsigned int id)
mm->context.id = MMU_NO_CONTEXT;
/* XXX This clear should ultimately be part of local_flush_tlb_mm */
- __clear_bit(id, stale_map[cpu]);
+ if (IS_ENABLED(CONFIG_SMP))
+ __clear_bit(id, stale_map[cpu]);
return id;
}
-#ifdef DEBUG_MAP_CONSISTENCY
-static void context_check_map(void)
+static void set_context(unsigned long id, pgd_t *pgd)
{
- unsigned int id, nrf, nact;
-
- nrf = nact = 0;
- for (id = first_context; id <= last_context; id++) {
- int used = test_bit(id, context_map);
- if (!used)
- nrf++;
- if (used != (context_mm[id] != NULL))
- pr_err("MMU: Context %d is %s and MM is %p !\n",
- id, used ? "used" : "free", context_mm[id]);
- if (context_mm[id] != NULL)
- nact += context_mm[id]->context.active;
- }
- if (nrf != nr_free_contexts) {
- pr_err("MMU: Free context count out of sync ! (%d vs %d)\n",
- nr_free_contexts, nrf);
- nr_free_contexts = nrf;
+ if (IS_ENABLED(CONFIG_PPC_8xx)) {
+ mtspr(SPRN_M_TWB, __pa(pgd));
+
+ /* Update context */
+ mtspr(SPRN_M_CASID, id - 1);
+
+ /* sync */
+ mb();
+ } else if (kuap_is_disabled()) {
+ mtspr(SPRN_PID, id);
+ isync();
}
- if (nact > num_online_cpus())
- pr_err("MMU: More active contexts than CPUs ! (%d vs %d)\n",
- nact, num_online_cpus());
- if (first_context > 0 && !test_bit(0, context_map))
- pr_err("MMU: Context 0 has been freed !!!\n");
}
-#else
-static void context_check_map(void) { }
-#endif
-void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next)
+void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next,
+ struct task_struct *tsk)
{
- unsigned int i, id, cpu = smp_processor_id();
+ unsigned int id;
+ unsigned int i, cpu = smp_processor_id();
unsigned long *map;
/* No lockless fast path .. yet */
raw_spin_lock(&context_lock);
- pr_hard("[%d] activating context for mm @%p, active=%d, id=%d",
- cpu, next, next->context.active, next->context.id);
-
-#ifdef CONFIG_SMP
- /* Mark us active and the previous one not anymore */
- next->context.active++;
- if (prev) {
- pr_hardcont(" (old=0x%p a=%d)", prev, prev->context.active);
- WARN_ON(prev->context.active < 1);
- prev->context.active--;
+ if (IS_ENABLED(CONFIG_SMP)) {
+ /* Mark us active and the previous one not anymore */
+ next->context.active++;
+ if (prev) {
+ WARN_ON(prev->context.active < 1);
+ prev->context.active--;
+ }
}
again:
-#endif /* CONFIG_SMP */
/* If we already have a valid assigned context, skip all that */
id = next->context.id;
- if (likely(id != MMU_NO_CONTEXT)) {
-#ifdef DEBUG_MAP_CONSISTENCY
- if (context_mm[id] != next)
- pr_err("MMU: mm 0x%p has id %d but context_mm[%d] says 0x%p\n",
- next, id, id, context_mm[id]);
-#endif
+ if (likely(id != MMU_NO_CONTEXT))
goto ctxt_ok;
- }
/* We really don't have a context, let's try to acquire one */
id = next_context;
- if (id > last_context)
- id = first_context;
+ if (id > LAST_CONTEXT)
+ id = FIRST_CONTEXT;
map = context_map;
/* No more free contexts, let's try to steal one */
if (nr_free_contexts == 0) {
-#ifdef CONFIG_SMP
if (num_online_cpus() > 1) {
id = steal_context_smp(id);
if (id == MMU_NO_CONTEXT)
goto again;
goto stolen;
}
-#endif /* CONFIG_SMP */
- id = steal_context_up(id);
+ if (IS_ENABLED(CONFIG_PPC_8xx))
+ id = steal_all_contexts();
+ else
+ id = steal_context_up(id);
goto stolen;
}
nr_free_contexts--;
/* We know there's at least one free context, try to find it */
while (__test_and_set_bit(id, map)) {
- id = find_next_zero_bit(map, last_context+1, id);
- if (id > last_context)
- id = first_context;
+ id = find_next_zero_bit(map, LAST_CONTEXT+1, id);
+ if (id > LAST_CONTEXT)
+ id = FIRST_CONTEXT;
}
stolen:
next_context = id + 1;
context_mm[id] = next;
next->context.id = id;
- pr_hardcont(" | new id=%d,nrf=%d", id, nr_free_contexts);
- context_check_map();
ctxt_ok:
/* If that context got marked stale on this CPU, then flush the
* local TLB for it and unmark it before we use it
*/
- if (test_bit(id, stale_map[cpu])) {
- pr_hardcont(" | stale flush %d [%d..%d]",
- id, cpu_first_thread_sibling(cpu),
- cpu_last_thread_sibling(cpu));
-
+ if (IS_ENABLED(CONFIG_SMP) && test_bit(id, stale_map[cpu])) {
local_flush_tlb_mm(next);
/* XXX This clear should ultimately be part of local_flush_tlb_mm */
@@ -280,8 +292,12 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next)
}
/* Flick the MMU and release lock */
- pr_hardcont(" -> %d\n", id);
+ if (IS_ENABLED(CONFIG_BDI_SWITCH))
+ abatron_pteptrs[1] = next->pgd;
set_context(id, next->pgd);
+#if defined(CONFIG_BOOKE) && defined(CONFIG_PPC_KUAP)
+ tsk->thread.pid = id;
+#endif
raw_spin_unlock(&context_lock);
}
@@ -290,16 +306,9 @@ void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next)
*/
int init_new_context(struct task_struct *t, struct mm_struct *mm)
{
- pr_hard("initing context for mm @%p\n", mm);
-
mm->context.id = MMU_NO_CONTEXT;
mm->context.active = 0;
-
-#ifdef CONFIG_PPC_MM_SLICES
- if (slice_mm_new_context(mm))
- slice_set_user_psize(mm, mmu_virtual_psize);
-#endif
-
+ pte_frag_set(&mm->context, NULL);
return 0;
}
@@ -321,56 +330,38 @@ void destroy_context(struct mm_struct *mm)
if (id != MMU_NO_CONTEXT) {
__clear_bit(id, context_map);
mm->context.id = MMU_NO_CONTEXT;
-#ifdef DEBUG_MAP_CONSISTENCY
- mm->context.active = 0;
-#endif
context_mm[id] = NULL;
nr_free_contexts++;
}
raw_spin_unlock_irqrestore(&context_lock, flags);
}
-#ifdef CONFIG_SMP
-
-static int mmu_context_cpu_notify(struct notifier_block *self,
- unsigned long action, void *hcpu)
+static int mmu_ctx_cpu_prepare(unsigned int cpu)
{
- unsigned int cpu = (unsigned int)(long)hcpu;
-
/* We don't touch CPU 0 map, it's allocated at aboot and kept
* around forever
*/
if (cpu == boot_cpuid)
- return NOTIFY_OK;
-
- switch (action) {
- case CPU_UP_PREPARE:
- case CPU_UP_PREPARE_FROZEN:
- pr_devel("MMU: Allocating stale context map for CPU %d\n", cpu);
- stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL);
- break;
-#ifdef CONFIG_HOTPLUG_CPU
- case CPU_UP_CANCELED:
- case CPU_UP_CANCELED_FROZEN:
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
- pr_devel("MMU: Freeing stale context map for CPU %d\n", cpu);
- kfree(stale_map[cpu]);
- stale_map[cpu] = NULL;
-
- /* We also clear the cpu_vm_mask bits of CPUs going away */
- clear_tasks_mm_cpumask(cpu);
- break;
-#endif /* CONFIG_HOTPLUG_CPU */
- }
- return NOTIFY_OK;
+ return 0;
+
+ stale_map[cpu] = kzalloc(CTX_MAP_SIZE, GFP_KERNEL);
+ return 0;
}
-static struct notifier_block mmu_context_cpu_nb = {
- .notifier_call = mmu_context_cpu_notify,
-};
+static int mmu_ctx_cpu_dead(unsigned int cpu)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+ if (cpu == boot_cpuid)
+ return 0;
-#endif /* CONFIG_SMP */
+ kfree(stale_map[cpu]);
+ stale_map[cpu] = NULL;
+
+ /* We also clear the cpu_vm_mask bits of CPUs going away */
+ clear_tasks_mm_cpumask(cpu);
+#endif
+ return 0;
+}
/*
* Initialize the context management stuff.
@@ -384,66 +375,30 @@ void __init mmu_context_init(void)
init_mm.context.active = NR_CPUS;
/*
- * The MPC8xx has only 16 contexts. We rotate through them on each
- * task switch. A better way would be to keep track of tasks that
- * own contexts, and implement an LRU usage. That way very active
- * tasks don't always have to pay the TLB reload overhead. The
- * kernel pages are mapped shared, so the kernel can run on behalf
- * of any task that makes a kernel entry. Shared does not mean they
- * are not protected, just that the ASID comparison is not performed.
- * -- Dan
- *
- * The IBM4xx has 256 contexts, so we can just rotate through these
- * as a way of "switching" contexts. If the TID of the TLB is zero,
- * the PID/TID comparison is disabled, so we can use a TID of zero
- * to represent all kernel pages as shared among all contexts.
- * -- Dan
- *
- * The IBM 47x core supports 16-bit PIDs, thus 65535 contexts. We
- * should normally never have to steal though the facility is
- * present if needed.
- * -- BenH
- */
- if (mmu_has_feature(MMU_FTR_TYPE_8xx)) {
- first_context = 0;
- last_context = 15;
- } else if (mmu_has_feature(MMU_FTR_TYPE_47x)) {
- first_context = 1;
- last_context = 65535;
- } else {
- first_context = 1;
- last_context = 255;
- }
-
-#ifdef DEBUG_CLAMP_LAST_CONTEXT
- last_context = DEBUG_CLAMP_LAST_CONTEXT;
-#endif
- /*
* Allocate the maps used by context management
*/
- context_map = alloc_bootmem(CTX_MAP_SIZE);
- context_mm = alloc_bootmem(sizeof(void *) * (last_context + 1));
-#ifndef CONFIG_SMP
- stale_map[0] = alloc_bootmem(CTX_MAP_SIZE);
-#else
- stale_map[boot_cpuid] = alloc_bootmem(CTX_MAP_SIZE);
-
- register_cpu_notifier(&mmu_context_cpu_nb);
-#endif
+ context_map = memblock_alloc_or_panic(CTX_MAP_SIZE, SMP_CACHE_BYTES);
+ context_mm = memblock_alloc_or_panic(sizeof(void *) * (LAST_CONTEXT + 1),
+ SMP_CACHE_BYTES);
+ if (IS_ENABLED(CONFIG_SMP)) {
+ stale_map[boot_cpuid] = memblock_alloc_or_panic(CTX_MAP_SIZE, SMP_CACHE_BYTES);
+ cpuhp_setup_state_nocalls(CPUHP_POWERPC_MMU_CTX_PREPARE,
+ "powerpc/mmu/ctx:prepare",
+ mmu_ctx_cpu_prepare, mmu_ctx_cpu_dead);
+ }
printk(KERN_INFO
"MMU: Allocated %zu bytes of context maps for %d contexts\n",
- 2 * CTX_MAP_SIZE + (sizeof(void *) * (last_context + 1)),
- last_context - first_context + 1);
+ 2 * CTX_MAP_SIZE + (sizeof(void *) * (LAST_CONTEXT + 1)),
+ LAST_CONTEXT - FIRST_CONTEXT + 1);
/*
* Some processors have too few contexts to reserve one for
* init_mm, and require using context 0 for a normal task.
* Other processors reserve the use of context zero for the kernel.
- * This code assumes first_context < 32.
+ * This code assumes FIRST_CONTEXT < 32.
*/
- context_map[0] = (1 << first_context) - 1;
- next_context = first_context;
- nr_free_contexts = last_context - first_context + 1;
+ context_map[0] = (1 << FIRST_CONTEXT) - 1;
+ next_context = FIRST_CONTEXT;
+ nr_free_contexts = LAST_CONTEXT - FIRST_CONTEXT + 1;
}
-
diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c
new file mode 100644
index 000000000000..0a650742f3a0
--- /dev/null
+++ b/arch/powerpc/mm/nohash/tlb.c
@@ -0,0 +1,341 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * This file contains the routines for TLB flushing.
+ * On machines where the MMU does not use a hash table to store virtual to
+ * physical translations (ie, SW loaded TLBs or Book3E compilant processors,
+ * this does -not- include 603 however which shares the implementation with
+ * hash based processors)
+ *
+ * -- BenH
+ *
+ * Copyright 2008,2009 Ben Herrenschmidt <benh@kernel.crashing.org>
+ * IBM Corp.
+ *
+ * Derived from arch/ppc/mm/init.c:
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ * and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ * Copyright (C) 1996 Paul Mackerras
+ *
+ * Derived from "arch/i386/mm/init.c"
+ * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
+ */
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/preempt.h>
+#include <linux/spinlock.h>
+#include <linux/memblock.h>
+#include <linux/of_fdt.h>
+#include <linux/hugetlb.h>
+
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+#include <asm/tlb.h>
+#include <asm/text-patching.h>
+#include <asm/cputhreads.h>
+#include <asm/hugetlb.h>
+#include <asm/paca.h>
+
+#include <mm/mmu_decl.h>
+
+/*
+ * This struct lists the sw-supported page sizes. The hardawre MMU may support
+ * other sizes not listed here. The .ind field is only used on MMUs that have
+ * indirect page table entries.
+ */
+#ifdef CONFIG_PPC_E500
+struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
+ [MMU_PAGE_4K] = {
+ .shift = 12,
+ },
+ [MMU_PAGE_2M] = {
+ .shift = 21,
+ },
+ [MMU_PAGE_4M] = {
+ .shift = 22,
+ },
+ [MMU_PAGE_16M] = {
+ .shift = 24,
+ },
+ [MMU_PAGE_64M] = {
+ .shift = 26,
+ },
+ [MMU_PAGE_256M] = {
+ .shift = 28,
+ },
+ [MMU_PAGE_1G] = {
+ .shift = 30,
+ },
+};
+
+static inline int mmu_get_tsize(int psize)
+{
+ return mmu_psize_defs[psize].shift - 10;
+}
+#else
+static inline int mmu_get_tsize(int psize)
+{
+ /* This isn't used on !Book3E for now */
+ return 0;
+}
+#endif
+
+#ifdef CONFIG_PPC_8xx
+struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
+ [MMU_PAGE_4K] = {
+ .shift = 12,
+ },
+ [MMU_PAGE_16K] = {
+ .shift = 14,
+ },
+ [MMU_PAGE_512K] = {
+ .shift = 19,
+ },
+ [MMU_PAGE_8M] = {
+ .shift = 23,
+ },
+};
+#endif
+
+#ifdef CONFIG_PPC_E500
+/* next_tlbcam_idx is used to round-robin tlbcam entry assignment */
+DEFINE_PER_CPU(int, next_tlbcam_idx);
+EXPORT_PER_CPU_SYMBOL(next_tlbcam_idx);
+#endif
+
+/*
+ * Base TLB flushing operations:
+ *
+ * - flush_tlb_mm(mm) flushes the specified mm context TLB's
+ * - flush_tlb_page(vma, vmaddr) flushes one page
+ * - flush_tlb_range(vma, start, end) flushes a range of pages
+ * - flush_tlb_kernel_range(start, end) flushes kernel pages
+ *
+ * - local_* variants of page and mm only apply to the current
+ * processor
+ */
+
+#ifndef CONFIG_PPC_8xx
+/*
+ * These are the base non-SMP variants of page and mm flushing
+ */
+void local_flush_tlb_mm(struct mm_struct *mm)
+{
+ unsigned int pid;
+
+ preempt_disable();
+ pid = mm->context.id;
+ if (pid != MMU_NO_CONTEXT)
+ _tlbil_pid(pid);
+ preempt_enable();
+}
+EXPORT_SYMBOL(local_flush_tlb_mm);
+
+void __local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
+ int tsize, int ind)
+{
+ unsigned int pid;
+
+ preempt_disable();
+ pid = mm ? mm->context.id : 0;
+ if (pid != MMU_NO_CONTEXT)
+ _tlbil_va(vmaddr, pid, tsize, ind);
+ preempt_enable();
+}
+
+void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
+{
+ __local_flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr,
+ mmu_get_tsize(mmu_virtual_psize), 0);
+}
+EXPORT_SYMBOL(local_flush_tlb_page);
+
+void local_flush_tlb_page_psize(struct mm_struct *mm,
+ unsigned long vmaddr, int psize)
+{
+ __local_flush_tlb_page(mm, vmaddr, mmu_get_tsize(psize), 0);
+}
+EXPORT_SYMBOL(local_flush_tlb_page_psize);
+
+#endif
+
+/*
+ * And here are the SMP non-local implementations
+ */
+#ifdef CONFIG_SMP
+
+static DEFINE_RAW_SPINLOCK(tlbivax_lock);
+
+struct tlb_flush_param {
+ unsigned long addr;
+ unsigned int pid;
+ unsigned int tsize;
+ unsigned int ind;
+};
+
+static void do_flush_tlb_mm_ipi(void *param)
+{
+ struct tlb_flush_param *p = param;
+
+ _tlbil_pid(p ? p->pid : 0);
+}
+
+static void do_flush_tlb_page_ipi(void *param)
+{
+ struct tlb_flush_param *p = param;
+
+ _tlbil_va(p->addr, p->pid, p->tsize, p->ind);
+}
+
+
+/* Note on invalidations and PID:
+ *
+ * We snapshot the PID with preempt disabled. At this point, it can still
+ * change either because:
+ * - our context is being stolen (PID -> NO_CONTEXT) on another CPU
+ * - we are invaliating some target that isn't currently running here
+ * and is concurrently acquiring a new PID on another CPU
+ * - some other CPU is re-acquiring a lost PID for this mm
+ * etc...
+ *
+ * However, this shouldn't be a problem as we only guarantee
+ * invalidation of TLB entries present prior to this call, so we
+ * don't care about the PID changing, and invalidating a stale PID
+ * is generally harmless.
+ */
+
+void flush_tlb_mm(struct mm_struct *mm)
+{
+ unsigned int pid;
+
+ preempt_disable();
+ pid = mm->context.id;
+ if (unlikely(pid == MMU_NO_CONTEXT))
+ goto no_context;
+ if (!mm_is_core_local(mm)) {
+ struct tlb_flush_param p = { .pid = pid };
+ /* Ignores smp_processor_id() even if set. */
+ smp_call_function_many(mm_cpumask(mm),
+ do_flush_tlb_mm_ipi, &p, 1);
+ }
+ _tlbil_pid(pid);
+ no_context:
+ preempt_enable();
+}
+EXPORT_SYMBOL(flush_tlb_mm);
+
+void __flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
+ int tsize, int ind)
+{
+ struct cpumask *cpu_mask;
+ unsigned int pid;
+
+ /*
+ * This function as well as __local_flush_tlb_page() must only be called
+ * for user contexts.
+ */
+ if (WARN_ON(!mm))
+ return;
+
+ preempt_disable();
+ pid = mm->context.id;
+ if (unlikely(pid == MMU_NO_CONTEXT))
+ goto bail;
+ cpu_mask = mm_cpumask(mm);
+ if (!mm_is_core_local(mm)) {
+ /* If broadcast tlbivax is supported, use it */
+ if (mmu_has_feature(MMU_FTR_USE_TLBIVAX_BCAST)) {
+ int lock = mmu_has_feature(MMU_FTR_LOCK_BCAST_INVAL);
+ if (lock)
+ raw_spin_lock(&tlbivax_lock);
+ _tlbivax_bcast(vmaddr, pid, tsize, ind);
+ if (lock)
+ raw_spin_unlock(&tlbivax_lock);
+ goto bail;
+ } else {
+ struct tlb_flush_param p = {
+ .pid = pid,
+ .addr = vmaddr,
+ .tsize = tsize,
+ .ind = ind,
+ };
+ /* Ignores smp_processor_id() even if set in cpu_mask */
+ smp_call_function_many(cpu_mask,
+ do_flush_tlb_page_ipi, &p, 1);
+ }
+ }
+ _tlbil_va(vmaddr, pid, tsize, ind);
+ bail:
+ preempt_enable();
+}
+
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
+{
+#ifdef CONFIG_HUGETLB_PAGE
+ if (vma && is_vm_hugetlb_page(vma))
+ flush_hugetlb_page(vma, vmaddr);
+#endif
+
+ __flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr,
+ mmu_get_tsize(mmu_virtual_psize), 0);
+}
+EXPORT_SYMBOL(flush_tlb_page);
+
+#endif /* CONFIG_SMP */
+
+/*
+ * Flush kernel TLB entries in the given range
+ */
+#ifndef CONFIG_PPC_8xx
+void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+#ifdef CONFIG_SMP
+ preempt_disable();
+ smp_call_function(do_flush_tlb_mm_ipi, NULL, 1);
+ _tlbil_pid(0);
+ preempt_enable();
+#else
+ _tlbil_pid(0);
+#endif
+}
+EXPORT_SYMBOL(flush_tlb_kernel_range);
+#endif
+
+/*
+ * Currently, for range flushing, we just do a full mm flush. This should
+ * be optimized based on a threshold on the size of the range, since
+ * some implementation can stack multiple tlbivax before a tlbsync but
+ * for now, we keep it that way
+ */
+void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end)
+
+{
+ if (end - start == PAGE_SIZE && !(start & ~PAGE_MASK))
+ flush_tlb_page(vma, start);
+ else
+ flush_tlb_mm(vma->vm_mm);
+}
+EXPORT_SYMBOL(flush_tlb_range);
+
+void tlb_flush(struct mmu_gather *tlb)
+{
+ flush_tlb_mm(tlb->mm);
+}
+
+#ifndef CONFIG_PPC64
+void __init early_init_mmu(void)
+{
+ unsigned long root = of_get_flat_dt_root();
+
+ if (IS_ENABLED(CONFIG_PPC_47x) && IS_ENABLED(CONFIG_SMP) &&
+ of_get_flat_dt_prop(root, "cooperative-partition", NULL))
+ mmu_clear_feature(MMU_FTR_USE_TLBIVAX_BCAST);
+}
+#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/mm/nohash/tlb_64e.c b/arch/powerpc/mm/nohash/tlb_64e.c
new file mode 100644
index 000000000000..4f925adf2695
--- /dev/null
+++ b/arch/powerpc/mm/nohash/tlb_64e.c
@@ -0,0 +1,314 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2008,2009 Ben Herrenschmidt <benh@kernel.crashing.org>
+ * IBM Corp.
+ *
+ * Derived from arch/ppc/mm/init.c:
+ * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
+ *
+ * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+ * and Cort Dougan (PReP) (cort@cs.nmt.edu)
+ * Copyright (C) 1996 Paul Mackerras
+ *
+ * Derived from "arch/i386/mm/init.c"
+ * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
+ */
+
+#include <linux/kernel.h>
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/pagemap.h>
+#include <linux/memblock.h>
+
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+#include <asm/tlb.h>
+#include <asm/text-patching.h>
+#include <asm/cputhreads.h>
+
+#include <mm/mmu_decl.h>
+
+/* The variables below are currently only used on 64-bit Book3E
+ * though this will probably be made common with other nohash
+ * implementations at some point
+ */
+static int mmu_pte_psize; /* Page size used for PTE pages */
+int mmu_vmemmap_psize; /* Page size used for the virtual mem map */
+int book3e_htw_mode; /* HW tablewalk? Value is PPC_HTW_* */
+unsigned long linear_map_top; /* Top of linear mapping */
+
+
+/*
+ * Number of bytes to add to SPRN_SPRG_TLB_EXFRAME on crit/mcheck/debug
+ * exceptions. This is used for bolted and e6500 TLB miss handlers which
+ * do not modify this SPRG in the TLB miss code; for other TLB miss handlers,
+ * this is set to zero.
+ */
+int extlb_level_exc;
+
+/*
+ * Handling of virtual linear page tables or indirect TLB entries
+ * flushing when PTE pages are freed
+ */
+void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address)
+{
+ int tsize = mmu_psize_defs[mmu_pte_psize].shift - 10;
+
+ if (book3e_htw_mode != PPC_HTW_NONE) {
+ unsigned long start = address & PMD_MASK;
+ unsigned long end = address + PMD_SIZE;
+ unsigned long size = 1UL << mmu_psize_defs[mmu_pte_psize].shift;
+
+ /* This isn't the most optimal, ideally we would factor out the
+ * while preempt & CPU mask mucking around, or even the IPI but
+ * it will do for now
+ */
+ while (start < end) {
+ __flush_tlb_page(tlb->mm, start, tsize, 1);
+ start += size;
+ }
+ } else {
+ unsigned long rmask = 0xf000000000000000ul;
+ unsigned long rid = (address & rmask) | 0x1000000000000000ul;
+ unsigned long vpte = address & ~rmask;
+
+ vpte = (vpte >> (PAGE_SHIFT - 3)) & ~0xffful;
+ vpte |= rid;
+ __flush_tlb_page(tlb->mm, vpte, tsize, 0);
+ }
+}
+
+static void __init setup_page_sizes(void)
+{
+ unsigned int tlb0cfg;
+ unsigned int eptcfg;
+ int psize;
+
+ unsigned int mmucfg = mfspr(SPRN_MMUCFG);
+
+ if ((mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V1) {
+ unsigned int tlb1cfg = mfspr(SPRN_TLB1CFG);
+ unsigned int min_pg, max_pg;
+
+ min_pg = (tlb1cfg & TLBnCFG_MINSIZE) >> TLBnCFG_MINSIZE_SHIFT;
+ max_pg = (tlb1cfg & TLBnCFG_MAXSIZE) >> TLBnCFG_MAXSIZE_SHIFT;
+
+ for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
+ struct mmu_psize_def *def;
+ unsigned int shift;
+
+ def = &mmu_psize_defs[psize];
+ shift = def->shift;
+
+ if (shift == 0 || shift & 1)
+ continue;
+
+ /* adjust to be in terms of 4^shift Kb */
+ shift = (shift - 10) >> 1;
+
+ if ((shift >= min_pg) && (shift <= max_pg))
+ def->flags |= MMU_PAGE_SIZE_DIRECT;
+ }
+
+ goto out;
+ }
+
+ if ((mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V2) {
+ u32 tlb1cfg, tlb1ps;
+
+ tlb0cfg = mfspr(SPRN_TLB0CFG);
+ tlb1cfg = mfspr(SPRN_TLB1CFG);
+ tlb1ps = mfspr(SPRN_TLB1PS);
+ eptcfg = mfspr(SPRN_EPTCFG);
+
+ if ((tlb1cfg & TLBnCFG_IND) && (tlb0cfg & TLBnCFG_PT))
+ book3e_htw_mode = PPC_HTW_E6500;
+
+ /*
+ * We expect 4K subpage size and unrestricted indirect size.
+ * The lack of a restriction on indirect size is a Freescale
+ * extension, indicated by PSn = 0 but SPSn != 0.
+ */
+ if (eptcfg != 2)
+ book3e_htw_mode = PPC_HTW_NONE;
+
+ for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
+ struct mmu_psize_def *def = &mmu_psize_defs[psize];
+
+ if (!def->shift)
+ continue;
+
+ if (tlb1ps & (1U << (def->shift - 10))) {
+ def->flags |= MMU_PAGE_SIZE_DIRECT;
+
+ if (book3e_htw_mode && psize == MMU_PAGE_2M)
+ def->flags |= MMU_PAGE_SIZE_INDIRECT;
+ }
+ }
+
+ goto out;
+ }
+out:
+ /* Cleanup array and print summary */
+ pr_info("MMU: Supported page sizes\n");
+ for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
+ struct mmu_psize_def *def = &mmu_psize_defs[psize];
+ const char *__page_type_names[] = {
+ "unsupported",
+ "direct",
+ "indirect",
+ "direct & indirect"
+ };
+ if (def->flags == 0) {
+ def->shift = 0;
+ continue;
+ }
+ pr_info(" %8ld KB as %s\n", 1ul << (def->shift - 10),
+ __page_type_names[def->flags & 0x3]);
+ }
+}
+
+/*
+ * Early initialization of the MMU TLB code
+ */
+static void early_init_this_mmu(void)
+{
+ unsigned int mas4;
+
+ /* Set MAS4 based on page table setting */
+
+ mas4 = 0x4 << MAS4_WIMGED_SHIFT;
+ switch (book3e_htw_mode) {
+ case PPC_HTW_E6500:
+ mas4 |= MAS4_INDD;
+ mas4 |= BOOK3E_PAGESZ_2M << MAS4_TSIZED_SHIFT;
+ mas4 |= MAS4_TLBSELD(1);
+ mmu_pte_psize = MMU_PAGE_2M;
+ break;
+
+ case PPC_HTW_NONE:
+ mas4 |= BOOK3E_PAGESZ_4K << MAS4_TSIZED_SHIFT;
+ mmu_pte_psize = mmu_virtual_psize;
+ break;
+ }
+ mtspr(SPRN_MAS4, mas4);
+
+ unsigned int num_cams;
+ bool map = true;
+
+ /* use a quarter of the TLBCAM for bolted linear map */
+ num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4;
+
+ /*
+ * Only do the mapping once per core, or else the
+ * transient mapping would cause problems.
+ */
+#ifdef CONFIG_SMP
+ if (hweight32(get_tensr()) > 1)
+ map = false;
+#endif
+
+ if (map)
+ linear_map_top = map_mem_in_cams(linear_map_top,
+ num_cams, false, true);
+
+ /* A sync won't hurt us after mucking around with
+ * the MMU configuration
+ */
+ mb();
+}
+
+static void __init early_init_mmu_global(void)
+{
+ /*
+ * Freescale booke only supports 4K pages in TLB0, so use that.
+ */
+ mmu_vmemmap_psize = MMU_PAGE_4K;
+
+ /* XXX This code only checks for TLB 0 capabilities and doesn't
+ * check what page size combos are supported by the HW. It
+ * also doesn't handle the case where a separate array holds
+ * the IND entries from the array loaded by the PT.
+ */
+ /* Look for supported page sizes */
+ setup_page_sizes();
+
+ /*
+ * If we want to use HW tablewalk, enable it by patching the TLB miss
+ * handlers to branch to the one dedicated to it.
+ */
+ extlb_level_exc = EX_TLB_SIZE;
+ switch (book3e_htw_mode) {
+ case PPC_HTW_E6500:
+ patch_exception(0x1c0, exc_data_tlb_miss_e6500_book3e);
+ patch_exception(0x1e0, exc_instruction_tlb_miss_e6500_book3e);
+ break;
+ }
+
+ pr_info("MMU: Book3E HW tablewalk %s\n",
+ book3e_htw_mode != PPC_HTW_NONE ? "enabled" : "not supported");
+
+ /* Set the global containing the top of the linear mapping
+ * for use by the TLB miss code
+ */
+ linear_map_top = memblock_end_of_DRAM();
+
+ ioremap_bot = IOREMAP_BASE;
+}
+
+static void __init early_mmu_set_memory_limit(void)
+{
+ /*
+ * Limit memory so we dont have linear faults.
+ * Unlike memblock_set_current_limit, which limits
+ * memory available during early boot, this permanently
+ * reduces the memory available to Linux. We need to
+ * do this because highmem is not supported on 64-bit.
+ */
+ memblock_enforce_memory_limit(linear_map_top);
+
+ memblock_set_current_limit(linear_map_top);
+}
+
+/* boot cpu only */
+void __init early_init_mmu(void)
+{
+ early_init_mmu_global();
+ early_init_this_mmu();
+ early_mmu_set_memory_limit();
+}
+
+void early_init_mmu_secondary(void)
+{
+ early_init_this_mmu();
+}
+
+void setup_initial_memory_limit(phys_addr_t first_memblock_base,
+ phys_addr_t first_memblock_size)
+{
+ /*
+ * On FSL Embedded 64-bit, usually all RAM is bolted, but with
+ * unusual memory sizes it's possible for some RAM to not be mapped
+ * (such RAM is not used at all by Linux, since we don't support
+ * highmem on 64-bit). We limit ppc64_rma_size to what would be
+ * mappable if this memblock is the only one. Additional memblocks
+ * can only increase, not decrease, the amount that ends up getting
+ * mapped. We still limit max to 1G even if we'll eventually map
+ * more. This is due to what the early init code is set up to do.
+ *
+ * We crop it to the size of the first MEMBLOCK to
+ * avoid going over total available memory just in case...
+ */
+ unsigned long linear_sz;
+ unsigned int num_cams;
+
+ /* use a quarter of the TLBCAM for bolted linear map */
+ num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4;
+
+ linear_sz = map_mem_in_cams(first_memblock_size, num_cams, true, true);
+ ppc64_rma_size = min_t(u64, linear_sz, 0x40000000);
+
+ /* Finally limit subsequent allocations */
+ memblock_set_current_limit(first_memblock_base + ppc64_rma_size);
+}
diff --git a/arch/powerpc/mm/tlb_nohash_low.S b/arch/powerpc/mm/nohash/tlb_low.S
index 43ff3c797fbf..c4d296e73731 100644
--- a/arch/powerpc/mm/tlb_nohash_low.S
+++ b/arch/powerpc/mm/nohash/tlb_low.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* This file contains low-level functions for performing various
* types of TLB invalidations on various processors with no hash
@@ -18,12 +19,6 @@
*
* Partially rewritten by Cort Dougan (cort@cs.nmt.edu)
* Paul Mackerras, Kumar Gala and Benjamin Herrenschmidt.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
#include <asm/reg.h>
@@ -34,33 +29,10 @@
#include <asm/asm-offsets.h>
#include <asm/processor.h>
#include <asm/bug.h>
+#include <asm/asm-compat.h>
+#include <asm/feature-fixups.h>
-#if defined(CONFIG_40x)
-
-/*
- * 40x implementation needs only tlbil_va
- */
-_GLOBAL(__tlbil_va)
- /* We run the search with interrupts disabled because we have to change
- * the PID and I don't want to preempt when that happens.
- */
- mfmsr r5
- mfspr r6,SPRN_PID
- wrteei 0
- mtspr SPRN_PID,r4
- tlbsx. r3, 0, r3
- mtspr SPRN_PID,r6
- wrtee r5
- bne 1f
- sync
- /* There are only 64 TLB entries, so r3 < 64, which means bit 25 is
- * clear. Since 25 is the V bit in the TLB_TAG, loading this value
- * will invalidate the TLB entry. */
- tlbwe r3, r3, TLB_TAG
- isync
-1: blr
-
-#elif defined(CONFIG_8xx)
+#if defined(CONFIG_PPC_8xx)
/*
* Nothing to do for 8xx, everything is inline
@@ -95,36 +67,25 @@ _GLOBAL(__tlbil_va)
tlbsx. r6,0,r3
bne 10f
sync
-BEGIN_MMU_FTR_SECTION
- b 2f
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x)
+#ifndef CONFIG_PPC_47x
/* On 440 There are only 64 TLB entries, so r3 < 64, which means bit
* 22, is clear. Since 22 is the V bit in the TLB_PAGEID, loading this
* value will invalidate the TLB entry.
*/
tlbwe r6,r6,PPC44x_TLB_PAGEID
- isync
-10: wrtee r10
- blr
-2:
-#ifdef CONFIG_PPC_47x
- oris r7,r6,0x8000 /* specify way explicitely */
+#else
+ oris r7,r6,0x8000 /* specify way explicitly */
clrrwi r4,r3,12 /* get an EPN for the hashing with V = 0 */
ori r4,r4,PPC47x_TLBE_SIZE
tlbwe r4,r7,0 /* write it */
+#endif /* !CONFIG_PPC_47x */
isync
- wrtee r10
+10: wrtee r10
blr
-#else /* CONFIG_PPC_47x */
-1: trap
- EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0;
-#endif /* !CONFIG_PPC_47x */
_GLOBAL(_tlbil_all)
_GLOBAL(_tlbil_pid)
-BEGIN_MMU_FTR_SECTION
- b 2f
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x)
+#ifndef CONFIG_PPC_47x
li r3,0
sync
@@ -139,8 +100,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x)
isync
blr
-2:
-#ifdef CONFIG_PPC_47x
+#else
/* 476 variant. There's not simple way to do this, hopefully we'll
* try to limit the amount of such full invalidates
*/
@@ -149,7 +109,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x)
li r3,-1 /* Current set */
lis r10,tlb_47x_boltmap@h
ori r10,r10,tlb_47x_boltmap@l
- lis r7,0x8000 /* Specify way explicitely */
+ lis r7,0x8000 /* Specify way explicitly */
b 9f /* For each set */
@@ -182,11 +142,8 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x)
b 1b /* Then loop */
1: isync /* Sync shadows */
wrtee r11
-#else /* CONFIG_PPC_47x */
-1: trap
- EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0;
-#endif /* !CONFIG_PPC_47x */
blr
+#endif /* !CONFIG_PPC_47x */
#ifdef CONFIG_PPC_47x
@@ -204,7 +161,7 @@ _GLOBAL(_tlbivax_bcast)
isync
PPC_TLBIVAX(0, R3)
isync
- eieio
+ mbar
tlbsync
BEGIN_FTR_SECTION
b 1f
@@ -217,7 +174,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_476_DD2)
* Touch enough instruction cache lines to ensure cache hits
*/
1: mflr r9
- bl 2f
+ bcl 20,31,$+4
2: mflr r6
li r7,32
PPC_ICBT(0,R6,R7) /* touch next cache line */
@@ -239,7 +196,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_476_DD2)
blr
#endif /* CONFIG_PPC_47x */
-#elif defined(CONFIG_FSL_BOOKE)
+#elif defined(CONFIG_PPC_85xx)
/*
* FSL BookE implementations.
*
@@ -312,7 +269,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_TLBILX)
isync
1: wrtee r10
blr
-#elif defined(CONFIG_PPC_BOOK3E)
+#elif defined(CONFIG_PPC_BOOK3E_64)
/*
* New Book3E (>= 2.06) implementation
*
@@ -373,33 +330,21 @@ _GLOBAL(_tlbivax_bcast)
rlwimi r4,r6,MAS6_SIND_SHIFT,MAS6_SIND
1: mtspr SPRN_MAS6,r4 /* assume AS=0 for now */
PPC_TLBIVAX(0,R3)
- eieio
+ mbar
tlbsync
sync
wrtee r10
blr
-
-_GLOBAL(set_context)
-#ifdef CONFIG_BDI_SWITCH
- /* Context switch the PTE pointer for the Abatron BDI2000.
- * The PGDIR is the second parameter.
- */
- lis r5, abatron_pteptrs@h
- ori r5, r5, abatron_pteptrs@l
- stw r4, 0x4(r5)
-#endif
- mtspr SPRN_PID,r3
- isync /* Force context change */
- blr
#else
#error Unsupported processor type !
#endif
-#if defined(CONFIG_PPC_FSL_BOOK3E)
+#if defined(CONFIG_PPC_E500)
/*
* extern void loadcam_entry(unsigned int index)
*
* Load TLBCAM[index] entry in to the L2 CAM MMU
+ * Must preserve r7, r8, r9, r10, r11, r12
*/
_GLOBAL(loadcam_entry)
mflr r5
@@ -423,4 +368,80 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS)
tlbwe
isync
blr
+
+/*
+ * Load multiple TLB entries at once, using an alternate-space
+ * trampoline so that we don't have to care about whether the same
+ * TLB entry maps us before and after.
+ *
+ * r3 = first entry to write
+ * r4 = number of entries to write
+ * r5 = temporary tlb entry (0 means no switch to AS1)
+ */
+_GLOBAL(loadcam_multi)
+ mflr r8
+ /* Don't switch to AS=1 if already there */
+ mfmsr r11
+ andi. r11,r11,MSR_IS
+ bne 10f
+ mr. r12, r5
+ beq 10f
+
+ /*
+ * Set up temporary TLB entry that is the same as what we're
+ * running from, but in AS=1.
+ */
+ bcl 20,31,$+4
+1: mflr r6
+ tlbsx 0,r8
+ mfspr r6,SPRN_MAS1
+ ori r6,r6,MAS1_TS
+ mtspr SPRN_MAS1,r6
+ mfspr r6,SPRN_MAS0
+ rlwimi r6,r5,MAS0_ESEL_SHIFT,MAS0_ESEL_MASK
+ mr r7,r5
+ mtspr SPRN_MAS0,r6
+ isync
+ tlbwe
+ isync
+
+ /* Switch to AS=1 */
+ mfmsr r6
+ ori r6,r6,MSR_IS|MSR_DS
+ mtmsr r6
+ isync
+
+10:
+ mr r9,r3
+ add r10,r3,r4
+2: bl loadcam_entry
+ addi r9,r9,1
+ cmpw r9,r10
+ mr r3,r9
+ blt 2b
+
+ /* Don't return to AS=0 if we were in AS=1 at function start */
+ andi. r11,r11,MSR_IS
+ bne 3f
+ cmpwi r12, 0
+ beq 3f
+
+ /* Return to AS=0 and clear the temporary entry */
+ mfmsr r6
+ rlwinm. r6,r6,0,~(MSR_IS|MSR_DS)
+ mtmsr r6
+ isync
+
+ li r6,0
+ mtspr SPRN_MAS1,r6
+ rlwinm r6,r7,MAS0_ESEL_SHIFT,MAS0_ESEL_MASK
+ oris r6,r6,MAS0_TLBSEL(1)@h
+ mtspr SPRN_MAS0,r6
+ isync
+ tlbwe
+ isync
+
+3:
+ mtlr r8
+ blr
#endif
diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/nohash/tlb_low_64e.S
index 89bf95bd63b1..de568297d5c5 100644
--- a/arch/powerpc/mm/tlb_low_64e.S
+++ b/arch/powerpc/mm/nohash/tlb_low_64e.S
@@ -1,15 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Low level TLB miss handlers for Book3E
*
* Copyright (C) 2008-2009
* Ben. Herrenschmidt (benh@kernel.crashing.org), IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
+#include <linux/pgtable.h>
#include <asm/processor.h>
#include <asm/reg.h>
#include <asm/page.h>
@@ -17,17 +14,13 @@
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/cputable.h>
-#include <asm/pgtable.h>
#include <asm/exception-64e.h>
#include <asm/ppc-opcode.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_booke_hv_asm.h>
+#include <asm/feature-fixups.h>
-#ifdef CONFIG_PPC_64K_PAGES
-#define VPTE_PMD_SHIFT (PTE_INDEX_SIZE+1)
-#else
#define VPTE_PMD_SHIFT (PTE_INDEX_SIZE)
-#endif
#define VPTE_PUD_SHIFT (VPTE_PMD_SHIFT + PMD_INDEX_SIZE)
#define VPTE_PGD_SHIFT (VPTE_PUD_SHIFT + PUD_INDEX_SIZE)
#define VPTE_INDEX_SIZE (VPTE_PGD_SHIFT + PGD_INDEX_SIZE)
@@ -68,18 +61,25 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
ld r14,PACAPGD(r13)
std r15,EX_TLB_R15(r12)
std r10,EX_TLB_CR(r12)
- TLB_MISS_PROLOG_STATS
+START_BTB_FLUSH_SECTION
+ mfspr r11, SPRN_SRR1
+ andi. r10,r11,MSR_PR
+ beq 1f
+ BTB_FLUSH(r10)
+1:
+END_BTB_FLUSH_SECTION
+ std r7,EX_TLB_R7(r12)
.endm
.macro tlb_epilog_bolted
ld r14,EX_TLB_CR(r12)
+ ld r7,EX_TLB_R7(r12)
ld r10,EX_TLB_R10(r12)
ld r11,EX_TLB_R11(r12)
ld r13,EX_TLB_R13(r12)
mtcr r14
ld r14,EX_TLB_R14(r12)
ld r15,EX_TLB_R15(r12)
- TLB_MISS_RESTORE_STATS
ld r16,EX_TLB_R16(r12)
mfspr r12,SPRN_SPRG_GEN_SCRATCH
.endm
@@ -122,9 +122,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
ori r10,r10,_PAGE_PRESENT
oris r11,r10,_PAGE_ACCESSED@h
- TLB_MISS_STATS_SAVE_INFO_BOLTED
bne tlb_miss_kernel_bolted
+tlb_miss_user_bolted:
+#ifdef CONFIG_PPC_KUAP
+ mfspr r10,SPRN_MAS1
+ rlwinm. r10,r10,0,0x3fff0000
+ beq- tlb_miss_fault_bolted /* KUAP fault */
+#endif
+
tlb_miss_common_bolted:
/*
* This is the guts of the TLB miss handler for bolted-linear.
@@ -142,24 +148,13 @@ tlb_miss_common_bolted:
clrrdi r15,r15,3
beq tlb_miss_fault_bolted /* No PGDIR, bail */
-BEGIN_MMU_FTR_SECTION
- /* Set the TLB reservation and search for existing entry. Then load
- * the entry.
- */
- PPC_TLBSRX_DOT(0,R16)
ldx r14,r14,r15 /* grab pgd entry */
- beq tlb_miss_done_bolted /* tlb exists already, bail */
-MMU_FTR_SECTION_ELSE
- ldx r14,r14,r15 /* grab pgd entry */
-ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV)
-#ifndef CONFIG_PPC_64K_PAGES
rldicl r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3
clrrdi r15,r15,3
cmpdi cr0,r14,0
bge tlb_miss_fault_bolted /* Bad pgd entry or hugepage; bail */
ldx r14,r14,r15 /* grab pud entry */
-#endif /* CONFIG_PPC_64K_PAGES */
rldicl r15,r16,64-PMD_SHIFT+3,64-PMD_INDEX_SIZE-3
clrrdi r15,r15,3
@@ -205,7 +200,6 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV)
tlbwe
tlb_miss_done_bolted:
- TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK)
tlb_epilog_bolted
rfi
@@ -215,21 +209,20 @@ itlb_miss_kernel_bolted:
tlb_miss_kernel_bolted:
mfspr r10,SPRN_MAS1
ld r14,PACA_KERNELPGD(r13)
- cmpldi cr0,r15,8 /* Check for vmalloc region */
+ srdi r15,r16,44 /* get kernel region */
+ andi. r15,r15,1 /* Check for vmalloc region */
rlwinm r10,r10,0,16,1 /* Clear TID */
mtspr SPRN_MAS1,r10
- beq+ tlb_miss_common_bolted
+ bne+ tlb_miss_common_bolted
tlb_miss_fault_bolted:
/* We need to check if it was an instruction miss */
- andi. r10,r11,_PAGE_EXEC|_PAGE_BAP_SX
+ andi. r10,r11,_PAGE_BAP_UX|_PAGE_BAP_SX
bne itlb_miss_fault_bolted
dtlb_miss_fault_bolted:
- TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT)
tlb_epilog_bolted
b exc_data_storage_book3e
itlb_miss_fault_bolted:
- TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
tlb_epilog_bolted
b exc_instruction_storage_book3e
@@ -239,20 +232,18 @@ itlb_miss_fault_bolted:
rldicl. r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4
srdi r15,r16,60 /* get region */
- TLB_MISS_STATS_SAVE_INFO_BOLTED
bne- itlb_miss_fault_bolted
- li r11,_PAGE_PRESENT|_PAGE_EXEC /* Base perm */
+ li r11,_PAGE_PRESENT|_PAGE_BAP_UX /* Base perm */
/* We do the user/kernel test for the PID here along with the RW test
*/
cmpldi cr0,r15,0 /* Check for user region */
oris r11,r11,_PAGE_ACCESSED@h
- beq tlb_miss_common_bolted
+ beq tlb_miss_user_bolted
b itlb_miss_kernel_bolted
-#ifdef CONFIG_PPC_FSL_BOOK3E
/*
* TLB miss handling for e6500 and derivatives, using hardware tablewalk.
*
@@ -272,7 +263,6 @@ itlb_miss_fault_bolted:
srdi. r15,r16,60 /* get region */
ori r16,r16,1
- TLB_MISS_STATS_SAVE_INFO_BOLTED
bne tlb_miss_kernel_e6500 /* user/kernel test */
b tlb_miss_common_e6500
@@ -284,7 +274,6 @@ itlb_miss_fault_bolted:
srdi. r15,r16,60 /* get region */
rldicr r16,r16,0,62
- TLB_MISS_STATS_SAVE_INFO_BOLTED
bne tlb_miss_kernel_e6500 /* user vs kernel check */
/*
@@ -297,6 +286,7 @@ itlb_miss_fault_bolted:
* r13 = PACA
* r11 = tlb_per_core ptr
* r10 = crap (free to use)
+ * r7 = esel_next
*/
tlb_miss_common_e6500:
crmove cr2*4+2,cr0*4+2 /* cr2.eq != 0 if kernel address */
@@ -308,11 +298,11 @@ BEGIN_FTR_SECTION /* CPU_FTR_SMT */
*
* MAS6:IND should be already set based on MAS4
*/
-1: lbarx r15,0,r11
lhz r10,PACAPACAINDEX(r13)
- cmpdi r15,0
- cmpdi cr1,r15,1 /* set cr1.eq = 0 for non-recursive */
addi r10,r10,1
+ crclr cr1*4+eq /* set cr1.eq = 0 for non-recursive */
+1: lbarx r15,0,r11
+ cmpdi r15,0
bne 2f
stbcx. r10,0,r11
bne 1b
@@ -320,12 +310,16 @@ BEGIN_FTR_SECTION /* CPU_FTR_SMT */
.subsection 1
2: cmpd cr1,r15,r10 /* recursive lock due to mcheck/crit/etc? */
beq cr1,3b /* unlock will happen if cr1.eq = 0 */
- lbz r15,0(r11)
+10: lbz r15,0(r11)
cmpdi r15,0
- bne 2b
+ bne 10b
b 1b
.previous
+END_FTR_SECTION_IFSET(CPU_FTR_SMT)
+
+ lbz r7,TCD_ESEL_NEXT(r11)
+BEGIN_FTR_SECTION /* CPU_FTR_SMT */
/*
* Erratum A-008139 says that we can't use tlbwe to change
* an indirect entry in any way (including replacing or
@@ -334,8 +328,7 @@ BEGIN_FTR_SECTION /* CPU_FTR_SMT */
* with tlbilx before overwriting.
*/
- lbz r15,TCD_ESEL_NEXT(r11)
- rlwinm r10,r15,16,0xff0000
+ rlwinm r10,r7,16,0xff0000
oris r10,r10,MAS0_TLBSEL(1)@h
mtspr SPRN_MAS0,r10
isync
@@ -358,7 +351,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_EMB_HV,CPU_FTR_EMB_HV,532)
mfspr r15,SPRN_MAS2
isync
- tlbilxva 0,r15
+ PPC_TLBILX_VA(0,R15)
isync
mtspr SPRN_MAS6,r10
@@ -398,18 +391,18 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_SMT)
rldicl r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3
clrrdi r15,r15,3
cmpdi cr0,r14,0
- bge tlb_miss_fault_e6500 /* Bad pgd entry or hugepage; bail */
+ bge tlb_miss_huge_e6500 /* Bad pgd entry or hugepage; bail */
ldx r14,r14,r15 /* grab pud entry */
rldicl r15,r16,64-PMD_SHIFT+3,64-PMD_INDEX_SIZE-3
clrrdi r15,r15,3
cmpdi cr0,r14,0
- bge tlb_miss_fault_e6500
+ bge tlb_miss_huge_e6500
ldx r14,r14,r15 /* Grab pmd entry */
mfspr r10,SPRN_MAS0
cmpdi cr0,r14,0
- bge tlb_miss_fault_e6500
+ bge tlb_miss_huge_e6500
/* Now we build the MAS for a 2M indirect page:
*
@@ -428,15 +421,15 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_SMT)
clrrdi r15,r16,21 /* make EA 2M-aligned */
mtspr SPRN_MAS2,r15
- lbz r15,TCD_ESEL_NEXT(r11)
+tlb_miss_huge_done_e6500:
lbz r16,TCD_ESEL_MAX(r11)
lbz r14,TCD_ESEL_FIRST(r11)
- rlwimi r10,r15,16,0x00ff0000 /* insert esel_next into MAS0 */
- addi r15,r15,1 /* increment esel_next */
+ rlwimi r10,r7,16,0x00ff0000 /* insert esel_next into MAS0 */
+ addi r7,r7,1 /* increment esel_next */
mtspr SPRN_MAS0,r10
- cmpw r15,r16
- iseleq r15,r14,r15 /* if next == last use first */
- stb r15,TCD_ESEL_NEXT(r11)
+ cmpw r7,r16
+ iseleq r7,r14,r7 /* if next == last use first */
+ stb r7,TCD_ESEL_NEXT(r11)
tlbwe
@@ -452,282 +445,67 @@ END_FTR_SECTION_IFSET(CPU_FTR_SMT)
.endm
tlb_unlock_e6500
- TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK)
tlb_epilog_bolted
rfi
-tlb_miss_kernel_e6500:
- ld r14,PACA_KERNELPGD(r13)
- cmpldi cr1,r15,8 /* Check for vmalloc region */
- beq+ cr1,tlb_miss_common_e6500
-
-tlb_miss_fault_e6500:
- tlb_unlock_e6500
- /* We need to check if it was an instruction miss */
- andi. r16,r16,1
- bne itlb_miss_fault_e6500
-dtlb_miss_fault_e6500:
- TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT)
- tlb_epilog_bolted
- b exc_data_storage_book3e
-itlb_miss_fault_e6500:
- TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
- tlb_epilog_bolted
- b exc_instruction_storage_book3e
-#endif /* CONFIG_PPC_FSL_BOOK3E */
-
-/**********************************************************************
- * *
- * TLB miss handling for Book3E with TLB reservation and HES support *
- * *
- **********************************************************************/
-
-
-/* Data TLB miss */
- START_EXCEPTION(data_tlb_miss)
- TLB_MISS_PROLOG
-
- /* Now we handle the fault proper. We only save DEAR in normal
- * fault case since that's the only interesting values here.
- * We could probably also optimize by not saving SRR0/1 in the
- * linear mapping case but I'll leave that for later
- */
- mfspr r14,SPRN_ESR
- mfspr r16,SPRN_DEAR /* get faulting address */
- srdi r15,r16,60 /* get region */
- cmpldi cr0,r15,0xc /* linear mapping ? */
- TLB_MISS_STATS_SAVE_INFO
- beq tlb_load_linear /* yes -> go to linear map load */
-
- /* The page tables are mapped virtually linear. At this point, though,
- * we don't know whether we are trying to fault in a first level
- * virtual address or a virtual page table address. We can get that
- * from bit 0x1 of the region ID which we have set for a page table
- */
- andi. r10,r15,0x1
- bne- virt_page_table_tlb_miss
-
- std r14,EX_TLB_ESR(r12); /* save ESR */
- std r16,EX_TLB_DEAR(r12); /* save DEAR */
-
- /* We need _PAGE_PRESENT and _PAGE_ACCESSED set */
- li r11,_PAGE_PRESENT
- oris r11,r11,_PAGE_ACCESSED@h
-
- /* We do the user/kernel test for the PID here along with the RW test
- */
- cmpldi cr0,r15,0 /* Check for user region */
-
- /* We pre-test some combination of permissions to avoid double
- * faults:
- *
- * We move the ESR:ST bit into the position of _PAGE_BAP_SW in the PTE
- * ESR_ST is 0x00800000
- * _PAGE_BAP_SW is 0x00000010
- * So the shift is >> 19. This tests for supervisor writeability.
- * If the page happens to be supervisor writeable and not user
- * writeable, we will take a new fault later, but that should be
- * a rare enough case.
- *
- * We also move ESR_ST in _PAGE_DIRTY position
- * _PAGE_DIRTY is 0x00001000 so the shift is >> 11
- *
- * MAS1 is preset for all we need except for TID that needs to
- * be cleared for kernel translations
- */
- rlwimi r11,r14,32-19,27,27
- rlwimi r11,r14,32-16,19,19
- beq normal_tlb_miss
- /* XXX replace the RMW cycles with immediate loads + writes */
-1: mfspr r10,SPRN_MAS1
- cmpldi cr0,r15,8 /* Check for vmalloc region */
- rlwinm r10,r10,0,16,1 /* Clear TID */
- mtspr SPRN_MAS1,r10
- beq+ normal_tlb_miss
-
- /* We got a crappy address, just fault with whatever DEAR and ESR
- * are here
- */
- TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT)
- TLB_MISS_EPILOG_ERROR
- b exc_data_storage_book3e
+tlb_miss_huge_e6500:
+ beq tlb_miss_fault_e6500
+ rlwinm r15,r14,32-_PAGE_PSIZE_SHIFT,0x1e
-/* Instruction TLB miss */
- START_EXCEPTION(instruction_tlb_miss)
- TLB_MISS_PROLOG
-
- /* If we take a recursive fault, the second level handler may need
- * to know whether we are handling a data or instruction fault in
- * order to get to the right store fault handler. We provide that
- * info by writing a crazy value in ESR in our exception frame
- */
- li r14,-1 /* store to exception frame is done later */
-
- /* Now we handle the fault proper. We only save DEAR in the non
- * linear mapping case since we know the linear mapping case will
- * not re-enter. We could indeed optimize and also not save SRR0/1
- * in the linear mapping case but I'll leave that for later
+ /*
+ * Now we build the MAS for a huge page.
*
- * Faulting address is SRR0 which is already in r16
+ * MAS 0 : ESEL needs to be filled by software round-robin
+ * - can be handled by indirect code
+ * MAS 1 : Need to clear IND and set TSIZE
+ * MAS 2,3+7: Needs to be redone similar to non-tablewalk handler
*/
- srdi r15,r16,60 /* get region */
- cmpldi cr0,r15,0xc /* linear mapping ? */
- TLB_MISS_STATS_SAVE_INFO
- beq tlb_load_linear /* yes -> go to linear map load */
- /* We do the user/kernel test for the PID here along with the RW test
- */
- li r11,_PAGE_PRESENT|_PAGE_EXEC /* Base perm */
- oris r11,r11,_PAGE_ACCESSED@h
-
- cmpldi cr0,r15,0 /* Check for user region */
- std r14,EX_TLB_ESR(r12) /* write crazy -1 to frame */
- beq normal_tlb_miss
-
- li r11,_PAGE_PRESENT|_PAGE_BAP_SX /* Base perm */
- oris r11,r11,_PAGE_ACCESSED@h
- /* XXX replace the RMW cycles with immediate loads + writes */
mfspr r10,SPRN_MAS1
- cmpldi cr0,r15,8 /* Check for vmalloc region */
- rlwinm r10,r10,0,16,1 /* Clear TID */
+ rlwinm r10,r10,0,~MAS1_IND
+ rlwimi r10,r15,MAS1_TSIZE_SHIFT,MAS1_TSIZE_MASK
mtspr SPRN_MAS1,r10
- beq+ normal_tlb_miss
- /* We got a crappy address, just fault */
- TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
- TLB_MISS_EPILOG_ERROR
- b exc_instruction_storage_book3e
-
-/*
- * This is the guts of the first-level TLB miss handler for direct
- * misses. We are entered with:
- *
- * r16 = faulting address
- * r15 = region ID
- * r14 = crap (free to use)
- * r13 = PACA
- * r12 = TLB exception frame in PACA
- * r11 = PTE permission mask
- * r10 = crap (free to use)
- */
-normal_tlb_miss:
- /* So we first construct the page table address. We do that by
- * shifting the bottom of the address (not the region ID) by
- * PAGE_SHIFT-3, clearing the bottom 3 bits (get a PTE ptr) and
- * or'ing the fourth high bit.
- *
- * NOTE: For 64K pages, we do things slightly differently in
- * order to handle the weird page table format used by linux
- */
- ori r10,r15,0x1
-#ifdef CONFIG_PPC_64K_PAGES
- /* For the top bits, 16 bytes per PTE */
- rldicl r14,r16,64-(PAGE_SHIFT-4),PAGE_SHIFT-4+4
- /* Now create the bottom bits as 0 in position 0x8000 and
- * the rest calculated for 8 bytes per PTE
- */
- rldicl r15,r16,64-(PAGE_SHIFT-3),64-15
- /* Insert the bottom bits in */
- rlwimi r14,r15,0,16,31
-#else
- rldicl r14,r16,64-(PAGE_SHIFT-3),PAGE_SHIFT-3+4
-#endif
- sldi r15,r10,60
- clrrdi r14,r14,3
- or r10,r15,r14
-
-BEGIN_MMU_FTR_SECTION
- /* Set the TLB reservation and search for existing entry. Then load
- * the entry.
- */
- PPC_TLBSRX_DOT(0,R16)
- ld r14,0(r10)
- beq normal_tlb_miss_done
-MMU_FTR_SECTION_ELSE
- ld r14,0(r10)
-ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV)
-
-finish_normal_tlb_miss:
- /* Check if required permissions are met */
- andc. r15,r11,r14
- bne- normal_tlb_miss_access_fault
-
- /* Now we build the MAS:
- *
- * MAS 0 : Fully setup with defaults in MAS4 and TLBnCFG
- * MAS 1 : Almost fully setup
- * - PID already updated by caller if necessary
- * - TSIZE need change if !base page size, not
- * yet implemented for now
- * MAS 2 : Defaults not useful, need to be redone
- * MAS 3+7 : Needs to be done
- *
- * TODO: mix up code below for better scheduling
- */
- clrrdi r11,r16,12 /* Clear low crap in EA */
- rlwimi r11,r14,32-19,27,31 /* Insert WIMGE */
- mtspr SPRN_MAS2,r11
-
- /* Check page size, if not standard, update MAS1 */
- rldicl r11,r14,64-8,64-8
-#ifdef CONFIG_PPC_64K_PAGES
- cmpldi cr0,r11,BOOK3E_PAGESZ_64K
-#else
- cmpldi cr0,r11,BOOK3E_PAGESZ_4K
-#endif
- beq- 1f
- mfspr r11,SPRN_MAS1
- rlwimi r11,r14,31,21,24
- rlwinm r11,r11,0,21,19
- mtspr SPRN_MAS1,r11
-1:
- /* Move RPN in position */
- rldicr r11,r14,64-(PTE_RPN_SHIFT-PAGE_SHIFT),63-PAGE_SHIFT
- clrldi r15,r11,12 /* Clear crap at the top */
+ li r10,-0x400
+ sld r15,r10,r15 /* Generate mask based on size */
+ and r10,r16,r15
+ rldicr r15,r14,64-(PTE_RPN_SHIFT-PAGE_SHIFT),63-PAGE_SHIFT
+ rlwimi r10,r14,32-19,27,31 /* Insert WIMGE */
+ clrldi r15,r15,PAGE_SHIFT /* Clear crap at the top */
rlwimi r15,r14,32-8,22,25 /* Move in U bits */
+ mtspr SPRN_MAS2,r10
+ andi. r10,r14,_PAGE_DIRTY
rlwimi r15,r14,32-2,26,31 /* Move in BAP bits */
/* Mask out SW and UW if !DIRTY (XXX optimize this !) */
- andi. r11,r14,_PAGE_DIRTY
bne 1f
- li r11,MAS3_SW|MAS3_UW
- andc r15,r15,r11
+ li r10,MAS3_SW|MAS3_UW
+ andc r15,r15,r10
1:
-BEGIN_MMU_FTR_SECTION
- srdi r16,r15,32
- mtspr SPRN_MAS3,r15
- mtspr SPRN_MAS7,r16
-MMU_FTR_SECTION_ELSE
mtspr SPRN_MAS7_MAS3,r15
-ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS)
- tlbwe
+ mfspr r10,SPRN_MAS0
+ b tlb_miss_huge_done_e6500
-normal_tlb_miss_done:
- /* We don't bother with restoring DEAR or ESR since we know we are
- * level 0 and just going back to userland. They are only needed
- * if you are going to take an access fault
- */
- TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK)
- TLB_MISS_EPILOG_SUCCESS
- rfi
+tlb_miss_kernel_e6500:
+ ld r14,PACA_KERNELPGD(r13)
+ srdi r15,r16,44 /* get kernel region */
+ xoris r15,r15,0xc /* Check for vmalloc region */
+ cmplwi cr1,r15,1
+ beq+ cr1,tlb_miss_common_e6500
-normal_tlb_miss_access_fault:
+tlb_miss_fault_e6500:
+ tlb_unlock_e6500
/* We need to check if it was an instruction miss */
- andi. r10,r11,_PAGE_EXEC
- bne 1f
- ld r14,EX_TLB_DEAR(r12)
- ld r15,EX_TLB_ESR(r12)
- mtspr SPRN_DEAR,r14
- mtspr SPRN_ESR,r15
- TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT)
- TLB_MISS_EPILOG_ERROR
+ andi. r16,r16,1
+ bne itlb_miss_fault_e6500
+dtlb_miss_fault_e6500:
+ tlb_epilog_bolted
b exc_data_storage_book3e
-1: TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
- TLB_MISS_EPILOG_ERROR
+itlb_miss_fault_e6500:
+ tlb_epilog_bolted
b exc_instruction_storage_book3e
-
/*
* This is the guts of the second-level TLB miss handler for direct
* misses. We are entered with:
@@ -754,6 +532,7 @@ normal_tlb_miss_access_fault:
*/
virt_page_table_tlb_miss:
/* Are we hitting a kernel page table ? */
+ srdi r15,r16,60
andi. r10,r15,0x8
/* The cool thing now is that r10 contains 0 for user and 8 for kernel,
@@ -768,19 +547,22 @@ virt_page_table_tlb_miss:
mfspr r10,SPRN_MAS1
rlwinm r10,r10,0,16,1 /* Clear TID */
mtspr SPRN_MAS1,r10
+#ifdef CONFIG_PPC_KUAP
+ b 2f
1:
-BEGIN_MMU_FTR_SECTION
- /* Search if we already have a TLB entry for that virtual address, and
- * if we do, bail out.
- */
- PPC_TLBSRX_DOT(0,R16)
- beq virt_page_table_tlb_miss_done
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV)
+ mfspr r10,SPRN_MAS1
+ rlwinm. r10,r10,0,0x3fff0000
+ beq- virt_page_table_tlb_miss_fault /* KUAP fault */
+2:
+#else
+1:
+#endif
/* Now, we need to walk the page tables. First check if we are in
* range.
*/
- rldicl. r10,r16,64-(VPTE_INDEX_SIZE+3),VPTE_INDEX_SIZE+3+4
+ rldicl r10,r16,64-(VPTE_INDEX_SIZE+3),VPTE_INDEX_SIZE+3+4
+ cmpldi r10,0x80
bne- virt_page_table_tlb_miss_fault
/* Get the PGD pointer */
@@ -795,14 +577,12 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV)
cmpdi cr0,r15,0
bge virt_page_table_tlb_miss_fault
-#ifndef CONFIG_PPC_64K_PAGES
/* Get to PUD entry */
rldicl r11,r16,64-VPTE_PUD_SHIFT,64-PUD_INDEX_SIZE-3
clrrdi r10,r11,3
ldx r15,r10,r15
cmpdi cr0,r15,0
bge virt_page_table_tlb_miss_fault
-#endif /* CONFIG_PPC_64K_PAGES */
/* Get to PMD entry */
rldicl r11,r16,64-VPTE_PMD_SHIFT,64-PMD_INDEX_SIZE-3
@@ -828,43 +608,13 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV)
clrldi r11,r15,4 /* remove region ID from RPN */
ori r10,r11,1 /* Or-in SR */
-BEGIN_MMU_FTR_SECTION
srdi r16,r10,32
mtspr SPRN_MAS3,r10
mtspr SPRN_MAS7,r16
-MMU_FTR_SECTION_ELSE
- mtspr SPRN_MAS7_MAS3,r10
-ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS)
tlbwe
-BEGIN_MMU_FTR_SECTION
-virt_page_table_tlb_miss_done:
-
- /* We have overriden MAS2:EPN but currently our primary TLB miss
- * handler will always restore it so that should not be an issue,
- * if we ever optimize the primary handler to not write MAS2 on
- * some cases, we'll have to restore MAS2:EPN here based on the
- * original fault's DEAR. If we do that we have to modify the
- * ITLB miss handler to also store SRR0 in the exception frame
- * as DEAR.
- *
- * However, one nasty thing we did is we cleared the reservation
- * (well, potentially we did). We do a trick here thus if we
- * are not a level 0 exception (we interrupted the TLB miss) we
- * offset the return address by -4 in order to replay the tlbsrx
- * instruction there
- */
- subf r10,r13,r12
- cmpldi cr0,r10,PACA_EXTLB+EX_TLB_SIZE
- bne- 1f
- ld r11,PACA_EXTLB+EX_TLB_SIZE+EX_TLB_SRR0(r13)
- addi r10,r11,-4
- std r10,PACA_EXTLB+EX_TLB_SIZE+EX_TLB_SRR0(r13)
-1:
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV)
/* Return to caller, normal case */
- TLB_MISS_STATS_X(MMSTAT_TLB_MISS_PT_OK);
TLB_MISS_EPILOG_SUCCESS
rfi
@@ -900,227 +650,17 @@ virt_page_table_tlb_miss_fault:
beq 1f
mtspr SPRN_DEAR,r15
mtspr SPRN_ESR,r16
- TLB_MISS_STATS_D(MMSTAT_TLB_MISS_PT_FAULT);
TLB_MISS_EPILOG_ERROR
b exc_data_storage_book3e
-1: TLB_MISS_STATS_I(MMSTAT_TLB_MISS_PT_FAULT);
- TLB_MISS_EPILOG_ERROR
+1: TLB_MISS_EPILOG_ERROR
b exc_instruction_storage_book3e
virt_page_table_tlb_miss_whacko_fault:
/* The linear fault will restart everything so ESR and DEAR will
* not have been clobbered, let's just fault with what we have
*/
- TLB_MISS_STATS_X(MMSTAT_TLB_MISS_PT_FAULT);
- TLB_MISS_EPILOG_ERROR
- b exc_data_storage_book3e
-
-
-/**************************************************************
- * *
- * TLB miss handling for Book3E with hw page table support *
- * *
- **************************************************************/
-
-
-/* Data TLB miss */
- START_EXCEPTION(data_tlb_miss_htw)
- TLB_MISS_PROLOG
-
- /* Now we handle the fault proper. We only save DEAR in normal
- * fault case since that's the only interesting values here.
- * We could probably also optimize by not saving SRR0/1 in the
- * linear mapping case but I'll leave that for later
- */
- mfspr r14,SPRN_ESR
- mfspr r16,SPRN_DEAR /* get faulting address */
- srdi r11,r16,60 /* get region */
- cmpldi cr0,r11,0xc /* linear mapping ? */
- TLB_MISS_STATS_SAVE_INFO
- beq tlb_load_linear /* yes -> go to linear map load */
-
- /* We do the user/kernel test for the PID here along with the RW test
- */
- cmpldi cr0,r11,0 /* Check for user region */
- ld r15,PACAPGD(r13) /* Load user pgdir */
- beq htw_tlb_miss
-
- /* XXX replace the RMW cycles with immediate loads + writes */
-1: mfspr r10,SPRN_MAS1
- cmpldi cr0,r11,8 /* Check for vmalloc region */
- rlwinm r10,r10,0,16,1 /* Clear TID */
- mtspr SPRN_MAS1,r10
- ld r15,PACA_KERNELPGD(r13) /* Load kernel pgdir */
- beq+ htw_tlb_miss
-
- /* We got a crappy address, just fault with whatever DEAR and ESR
- * are here
- */
- TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT)
- TLB_MISS_EPILOG_ERROR
- b exc_data_storage_book3e
-
-/* Instruction TLB miss */
- START_EXCEPTION(instruction_tlb_miss_htw)
- TLB_MISS_PROLOG
-
- /* If we take a recursive fault, the second level handler may need
- * to know whether we are handling a data or instruction fault in
- * order to get to the right store fault handler. We provide that
- * info by keeping a crazy value for ESR in r14
- */
- li r14,-1 /* store to exception frame is done later */
-
- /* Now we handle the fault proper. We only save DEAR in the non
- * linear mapping case since we know the linear mapping case will
- * not re-enter. We could indeed optimize and also not save SRR0/1
- * in the linear mapping case but I'll leave that for later
- *
- * Faulting address is SRR0 which is already in r16
- */
- srdi r11,r16,60 /* get region */
- cmpldi cr0,r11,0xc /* linear mapping ? */
- TLB_MISS_STATS_SAVE_INFO
- beq tlb_load_linear /* yes -> go to linear map load */
-
- /* We do the user/kernel test for the PID here along with the RW test
- */
- cmpldi cr0,r11,0 /* Check for user region */
- ld r15,PACAPGD(r13) /* Load user pgdir */
- beq htw_tlb_miss
-
- /* XXX replace the RMW cycles with immediate loads + writes */
-1: mfspr r10,SPRN_MAS1
- cmpldi cr0,r11,8 /* Check for vmalloc region */
- rlwinm r10,r10,0,16,1 /* Clear TID */
- mtspr SPRN_MAS1,r10
- ld r15,PACA_KERNELPGD(r13) /* Load kernel pgdir */
- beq+ htw_tlb_miss
-
- /* We got a crappy address, just fault */
- TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
- TLB_MISS_EPILOG_ERROR
- b exc_instruction_storage_book3e
-
-
-/*
- * This is the guts of the second-level TLB miss handler for direct
- * misses. We are entered with:
- *
- * r16 = virtual page table faulting address
- * r15 = PGD pointer
- * r14 = ESR
- * r13 = PACA
- * r12 = TLB exception frame in PACA
- * r11 = crap (free to use)
- * r10 = crap (free to use)
- *
- * It can be re-entered by the linear mapping miss handler. However, to
- * avoid too much complication, it will save/restore things for us
- */
-htw_tlb_miss:
- /* Search if we already have a TLB entry for that virtual address, and
- * if we do, bail out.
- *
- * MAS1:IND should be already set based on MAS4
- */
- PPC_TLBSRX_DOT(0,R16)
- beq htw_tlb_miss_done
-
- /* Now, we need to walk the page tables. First check if we are in
- * range.
- */
- rldicl. r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4
- bne- htw_tlb_miss_fault
-
- /* Get the PGD pointer */
- cmpldi cr0,r15,0
- beq- htw_tlb_miss_fault
-
- /* Get to PGD entry */
- rldicl r11,r16,64-(PGDIR_SHIFT-3),64-PGD_INDEX_SIZE-3
- clrrdi r10,r11,3
- ldx r15,r10,r15
- cmpdi cr0,r15,0
- bge htw_tlb_miss_fault
-
-#ifndef CONFIG_PPC_64K_PAGES
- /* Get to PUD entry */
- rldicl r11,r16,64-(PUD_SHIFT-3),64-PUD_INDEX_SIZE-3
- clrrdi r10,r11,3
- ldx r15,r10,r15
- cmpdi cr0,r15,0
- bge htw_tlb_miss_fault
-#endif /* CONFIG_PPC_64K_PAGES */
-
- /* Get to PMD entry */
- rldicl r11,r16,64-(PMD_SHIFT-3),64-PMD_INDEX_SIZE-3
- clrrdi r10,r11,3
- ldx r15,r10,r15
- cmpdi cr0,r15,0
- bge htw_tlb_miss_fault
-
- /* Ok, we're all right, we can now create an indirect entry for
- * a 1M or 256M page.
- *
- * The last trick is now that because we use "half" pages for
- * the HTW (1M IND is 2K and 256M IND is 32K) we need to account
- * for an added LSB bit to the RPN. For 64K pages, there is no
- * problem as we already use 32K arrays (half PTE pages), but for
- * 4K page we need to extract a bit from the virtual address and
- * insert it into the "PA52" bit of the RPN.
- */
-#ifndef CONFIG_PPC_64K_PAGES
- rlwimi r15,r16,32-9,20,20
-#endif
- /* Now we build the MAS:
- *
- * MAS 0 : Fully setup with defaults in MAS4 and TLBnCFG
- * MAS 1 : Almost fully setup
- * - PID already updated by caller if necessary
- * - TSIZE for now is base ind page size always
- * MAS 2 : Use defaults
- * MAS 3+7 : Needs to be done
- */
-#ifdef CONFIG_PPC_64K_PAGES
- ori r10,r15,(BOOK3E_PAGESZ_64K << MAS3_SPSIZE_SHIFT)
-#else
- ori r10,r15,(BOOK3E_PAGESZ_4K << MAS3_SPSIZE_SHIFT)
-#endif
-
-BEGIN_MMU_FTR_SECTION
- srdi r16,r10,32
- mtspr SPRN_MAS3,r10
- mtspr SPRN_MAS7,r16
-MMU_FTR_SECTION_ELSE
- mtspr SPRN_MAS7_MAS3,r10
-ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS)
-
- tlbwe
-
-htw_tlb_miss_done:
- /* We don't bother with restoring DEAR or ESR since we know we are
- * level 0 and just going back to userland. They are only needed
- * if you are going to take an access fault
- */
- TLB_MISS_STATS_X(MMSTAT_TLB_MISS_PT_OK)
- TLB_MISS_EPILOG_SUCCESS
- rfi
-
-htw_tlb_miss_fault:
- /* We need to check if it was an instruction miss. We know this
- * though because r14 would contain -1
- */
- cmpdi cr0,r14,-1
- beq 1f
- mtspr SPRN_DEAR,r16
- mtspr SPRN_ESR,r14
- TLB_MISS_STATS_D(MMSTAT_TLB_MISS_PT_FAULT)
TLB_MISS_EPILOG_ERROR
b exc_data_storage_book3e
-1: TLB_MISS_STATS_I(MMSTAT_TLB_MISS_PT_FAULT)
- TLB_MISS_EPILOG_ERROR
- b exc_instruction_storage_book3e
/*
* This is the guts of "any" level TLB miss handler for kernel linear
@@ -1152,8 +692,8 @@ tlb_load_linear:
* we only use 1G pages for now. That might have to be changed in a
* final implementation, especially when dealing with hypervisors
*/
- ld r11,PACATOC(r13)
- ld r11,linear_map_top@got(r11)
+ __LOAD_PACA_TOC(r11)
+ LOAD_REG_ADDR_ALTTOC(r11, r11, linear_map_top)
ld r10,0(r11)
tovirt(10,10)
cmpld cr0,r16,r10
@@ -1178,13 +718,9 @@ tlb_load_linear:
clrldi r10,r10,4 /* clear region bits */
ori r10,r10,MAS3_SR|MAS3_SW|MAS3_SX
-BEGIN_MMU_FTR_SECTION
srdi r16,r10,32
mtspr SPRN_MAS3,r10
mtspr SPRN_MAS7,r16
-MMU_FTR_SECTION_ELSE
- mtspr SPRN_MAS7_MAS3,r10
-ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_PAIRED_MAS)
tlbwe
@@ -1194,7 +730,6 @@ tlb_load_linear_done:
* We do that because we can't resume a fault within a TLB
* miss handler, due to MAS and TLB reservation being clobbered.
*/
- TLB_MISS_STATS_X(MMSTAT_TLB_MISS_LINEAR)
TLB_MISS_EPILOG_ERROR
rfi
@@ -1206,13 +741,3 @@ tlb_load_linear_fault:
b exc_data_storage_book3e
1: TLB_MISS_EPILOG_ERROR_SPECIAL
b exc_instruction_storage_book3e
-
-
-#ifdef CONFIG_BOOK3E_MMU_TLB_STATS
-.tlb_stat_inc:
-1: ldarx r8,0,r9
- addi r8,r8,1
- stdcx. r8,0,r9
- bne- 1b
- blr
-#endif
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index d7737a542fd7..603a0f652ba6 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1,15 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* pSeries NUMA support
*
* Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
+#define pr_fmt(fmt) "numa: " fmt
+
#include <linux/threads.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/mmzone.h>
@@ -17,8 +15,8 @@
#include <linux/nodemask.h>
#include <linux/cpu.h>
#include <linux/notifier.h>
-#include <linux/memblock.h>
#include <linux/of.h>
+#include <linux/of_address.h>
#include <linux/pfn.h>
#include <linux/cpuset.h>
#include <linux/node.h>
@@ -29,39 +27,42 @@
#include <linux/slab.h>
#include <asm/cputhreads.h>
#include <asm/sparsemem.h>
-#include <asm/prom.h>
#include <asm/smp.h>
-#include <asm/cputhreads.h>
#include <asm/topology.h>
#include <asm/firmware.h>
#include <asm/paca.h>
#include <asm/hvcall.h>
#include <asm/setup.h>
#include <asm/vdso.h>
+#include <asm/vphn.h>
+#include <asm/drmem.h>
static int numa_enabled = 1;
static char *cmdline __initdata;
-static int numa_debug;
-#define dbg(args...) if (numa_debug) { printk(KERN_INFO args); }
-
int numa_cpu_lookup_table[NR_CPUS];
cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
-struct pglist_data *node_data[MAX_NUMNODES];
EXPORT_SYMBOL(numa_cpu_lookup_table);
EXPORT_SYMBOL(node_to_cpumask_map);
-EXPORT_SYMBOL(node_data);
-static int min_common_depth;
+static int primary_domain_index;
static int n_mem_addr_cells, n_mem_size_cells;
-static int form1_affinity;
+
+#define FORM0_AFFINITY 0
+#define FORM1_AFFINITY 1
+#define FORM2_AFFINITY 2
+static int affinity_form;
#define MAX_DISTANCE_REF_POINTS 4
static int distance_ref_points_depth;
static const __be32 *distance_ref_points;
static int distance_lookup_table[MAX_NUMNODES][MAX_DISTANCE_REF_POINTS];
+static int numa_distance_table[MAX_NUMNODES][MAX_NUMNODES] = {
+ [0 ... MAX_NUMNODES - 1] = { [0 ... MAX_NUMNODES - 1] = -1 }
+};
+static int numa_id_index_table[MAX_NUMNODES] = { [0 ... MAX_NUMNODES - 1] = NUMA_NO_NODE };
/*
* Allocate node_to_cpumask_map based on number of available nodes
@@ -78,11 +79,11 @@ static void __init setup_node_to_cpumask_map(void)
setup_nr_node_ids();
/* allocate the map */
- for (node = 0; node < nr_node_ids; node++)
+ for_each_node(node)
alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]);
/* cpumask_of_node() will now work */
- dbg("Node to cpumask map for %d nodes\n", nr_node_ids);
+ pr_debug("Node to cpumask map for %u nodes\n", nr_node_ids);
}
static int __init fake_numa_create_new_node(unsigned long end_pfn,
@@ -126,35 +127,13 @@ static int __init fake_numa_create_new_node(unsigned long end_pfn,
cmdline = p;
fake_nid++;
*nid = fake_nid;
- dbg("created new fake_node with id %d\n", fake_nid);
+ pr_debug("created new fake_node with id %d\n", fake_nid);
return 1;
}
return 0;
}
-/*
- * get_node_active_region - Return active region containing pfn
- * Active range returned is empty if none found.
- * @pfn: The page to return the region for
- * @node_ar: Returned set to the active region containing @pfn
- */
-static void __init get_node_active_region(unsigned long pfn,
- struct node_active_region *node_ar)
-{
- unsigned long start_pfn, end_pfn;
- int i, nid;
-
- for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
- if (pfn >= start_pfn && pfn < end_pfn) {
- node_ar->nid = nid;
- node_ar->start_pfn = start_pfn;
- node_ar->end_pfn = end_pfn;
- break;
- }
- }
-}
-
-static void reset_numa_cpu_lookup_table(void)
+static void __init reset_numa_cpu_lookup_table(void)
{
unsigned int cpu;
@@ -162,125 +141,137 @@ static void reset_numa_cpu_lookup_table(void)
numa_cpu_lookup_table[cpu] = -1;
}
-static void update_numa_cpu_lookup_table(unsigned int cpu, int node)
-{
- numa_cpu_lookup_table[cpu] = node;
-}
-
-static void map_cpu_to_node(int cpu, int node)
+void map_cpu_to_node(int cpu, int node)
{
update_numa_cpu_lookup_table(cpu, node);
- dbg("adding cpu %d to node %d\n", cpu, node);
-
- if (!(cpumask_test_cpu(cpu, node_to_cpumask_map[node])))
+ if (!(cpumask_test_cpu(cpu, node_to_cpumask_map[node]))) {
+ pr_debug("adding cpu %d to node %d\n", cpu, node);
cpumask_set_cpu(cpu, node_to_cpumask_map[node]);
+ }
}
#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PPC_SPLPAR)
-static void unmap_cpu_from_node(unsigned long cpu)
+void unmap_cpu_from_node(unsigned long cpu)
{
int node = numa_cpu_lookup_table[cpu];
- dbg("removing cpu %lu from node %d\n", cpu, node);
-
if (cpumask_test_cpu(cpu, node_to_cpumask_map[node])) {
cpumask_clear_cpu(cpu, node_to_cpumask_map[node]);
+ pr_debug("removing cpu %lu from node %d\n", cpu, node);
} else {
- printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n",
- cpu, node);
+ pr_warn("Warning: cpu %lu not found in node %d\n", cpu, node);
}
}
#endif /* CONFIG_HOTPLUG_CPU || CONFIG_PPC_SPLPAR */
-/* must hold reference to node during call */
-static const __be32 *of_get_associativity(struct device_node *dev)
+static int __associativity_to_nid(const __be32 *associativity,
+ int max_array_sz)
{
- return of_get_property(dev, "ibm,associativity", NULL);
-}
+ int nid;
+ /*
+ * primary_domain_index is 1 based array index.
+ */
+ int index = primary_domain_index - 1;
+
+ if (!numa_enabled || index >= max_array_sz)
+ return NUMA_NO_NODE;
+ nid = of_read_number(&associativity[index], 1);
+
+ /* POWER4 LPAR uses 0xffff as invalid node */
+ if (nid == 0xffff || nid >= nr_node_ids)
+ nid = NUMA_NO_NODE;
+ return nid;
+}
/*
- * Returns the property linux,drconf-usable-memory if
- * it exists (the property exists only in kexec/kdump kernels,
- * added by kexec-tools)
+ * Returns nid in the range [0..nr_node_ids], or -1 if no useful NUMA
+ * info is found.
*/
-static const __be32 *of_get_usable_memory(struct device_node *memory)
+static int associativity_to_nid(const __be32 *associativity)
{
- const __be32 *prop;
- u32 len;
- prop = of_get_property(memory, "linux,drconf-usable-memory", &len);
- if (!prop || len < sizeof(unsigned int))
- return NULL;
- return prop;
+ int array_sz = of_read_number(associativity, 1);
+
+ /* Skip the first element in the associativity array */
+ return __associativity_to_nid((associativity + 1), array_sz);
}
-int __node_distance(int a, int b)
+static int __cpu_form2_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
{
- int i;
- int distance = LOCAL_DISTANCE;
-
- if (!form1_affinity)
- return ((a == b) ? LOCAL_DISTANCE : REMOTE_DISTANCE);
-
- for (i = 0; i < distance_ref_points_depth; i++) {
- if (distance_lookup_table[a][i] == distance_lookup_table[b][i])
- break;
+ int dist;
+ int node1, node2;
- /* Double the distance for each NUMA level */
- distance *= 2;
- }
+ node1 = associativity_to_nid(cpu1_assoc);
+ node2 = associativity_to_nid(cpu2_assoc);
- return distance;
+ dist = numa_distance_table[node1][node2];
+ if (dist <= LOCAL_DISTANCE)
+ return 0;
+ else if (dist <= REMOTE_DISTANCE)
+ return 1;
+ else
+ return 2;
}
-EXPORT_SYMBOL(__node_distance);
-static void initialize_distance_lookup_table(int nid,
- const __be32 *associativity)
+static int __cpu_form1_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
{
- int i;
+ int dist = 0;
- if (!form1_affinity)
- return;
+ int i, index;
for (i = 0; i < distance_ref_points_depth; i++) {
- const __be32 *entry;
-
- entry = &associativity[be32_to_cpu(distance_ref_points[i])];
- distance_lookup_table[nid][i] = of_read_number(entry, 1);
+ index = be32_to_cpu(distance_ref_points[i]);
+ if (cpu1_assoc[index] == cpu2_assoc[index])
+ break;
+ dist++;
}
+
+ return dist;
}
-/* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa
- * info is found.
- */
-static int associativity_to_nid(const __be32 *associativity)
+int cpu_relative_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
{
- int nid = -1;
+ /* We should not get called with FORM0 */
+ VM_WARN_ON(affinity_form == FORM0_AFFINITY);
+ if (affinity_form == FORM1_AFFINITY)
+ return __cpu_form1_relative_distance(cpu1_assoc, cpu2_assoc);
+ return __cpu_form2_relative_distance(cpu1_assoc, cpu2_assoc);
+}
- if (min_common_depth == -1)
- goto out;
+/* must hold reference to node during call */
+static const __be32 *of_get_associativity(struct device_node *dev)
+{
+ return of_get_property(dev, "ibm,associativity", NULL);
+}
- if (of_read_number(associativity, 1) >= min_common_depth)
- nid = of_read_number(&associativity[min_common_depth], 1);
+int __node_distance(int a, int b)
+{
+ int i;
+ int distance = LOCAL_DISTANCE;
- /* POWER4 LPAR uses 0xffff as invalid node */
- if (nid == 0xffff || nid >= MAX_NUMNODES)
- nid = -1;
+ if (affinity_form == FORM2_AFFINITY)
+ return numa_distance_table[a][b];
+ else if (affinity_form == FORM0_AFFINITY)
+ return ((a == b) ? LOCAL_DISTANCE : REMOTE_DISTANCE);
- if (nid > 0 &&
- of_read_number(associativity, 1) >= distance_ref_points_depth)
- initialize_distance_lookup_table(nid, associativity);
+ for (i = 0; i < distance_ref_points_depth; i++) {
+ if (distance_lookup_table[a][i] == distance_lookup_table[b][i])
+ break;
-out:
- return nid;
+ /* Double the distance for each NUMA level */
+ distance *= 2;
+ }
+
+ return distance;
}
+EXPORT_SYMBOL(__node_distance);
/* Returns the nid associated with the given device tree node,
* or -1 if not found.
*/
static int of_node_to_nid_single(struct device_node *device)
{
- int nid = -1;
+ int nid = NUMA_NO_NODE;
const __be32 *tmp;
tmp = of_get_associativity(device);
@@ -292,8 +283,7 @@ static int of_node_to_nid_single(struct device_node *device)
/* Walk the device tree upwards, looking for an associativity id */
int of_node_to_nid(struct device_node *device)
{
- struct device_node *tmp;
- int nid = -1;
+ int nid = NUMA_NO_NODE;
of_node_get(device);
while (device) {
@@ -301,20 +291,163 @@ int of_node_to_nid(struct device_node *device)
if (nid != -1)
break;
- tmp = device;
- device = of_get_parent(tmp);
- of_node_put(tmp);
+ device = of_get_next_parent(device);
}
of_node_put(device);
return nid;
}
-EXPORT_SYMBOL_GPL(of_node_to_nid);
+EXPORT_SYMBOL(of_node_to_nid);
+
+static void __initialize_form1_numa_distance(const __be32 *associativity,
+ int max_array_sz)
+{
+ int i, nid;
+
+ if (affinity_form != FORM1_AFFINITY)
+ return;
+
+ nid = __associativity_to_nid(associativity, max_array_sz);
+ if (nid != NUMA_NO_NODE) {
+ for (i = 0; i < distance_ref_points_depth; i++) {
+ const __be32 *entry;
+ int index = be32_to_cpu(distance_ref_points[i]) - 1;
+
+ /*
+ * broken hierarchy, return with broken distance table
+ */
+ if (WARN(index >= max_array_sz, "Broken ibm,associativity property"))
+ return;
+
+ entry = &associativity[index];
+ distance_lookup_table[nid][i] = of_read_number(entry, 1);
+ }
+ }
+}
-static int __init find_min_common_depth(void)
+static void initialize_form1_numa_distance(const __be32 *associativity)
+{
+ int array_sz;
+
+ array_sz = of_read_number(associativity, 1);
+ /* Skip the first element in the associativity array */
+ __initialize_form1_numa_distance(associativity + 1, array_sz);
+}
+
+/*
+ * Used to update distance information w.r.t newly added node.
+ */
+void update_numa_distance(struct device_node *node)
{
- int depth;
+ int nid;
+
+ if (affinity_form == FORM0_AFFINITY)
+ return;
+ else if (affinity_form == FORM1_AFFINITY) {
+ const __be32 *associativity;
+
+ associativity = of_get_associativity(node);
+ if (!associativity)
+ return;
+
+ initialize_form1_numa_distance(associativity);
+ return;
+ }
+
+ /* FORM2 affinity */
+ nid = of_node_to_nid_single(node);
+ if (nid == NUMA_NO_NODE)
+ return;
+
+ /*
+ * With FORM2 we expect NUMA distance of all possible NUMA
+ * nodes to be provided during boot.
+ */
+ WARN(numa_distance_table[nid][nid] == -1,
+ "NUMA distance details for node %d not provided\n", nid);
+}
+EXPORT_SYMBOL_GPL(update_numa_distance);
+
+/*
+ * ibm,numa-lookup-index-table= {N, domainid1, domainid2, ..... domainidN}
+ * ibm,numa-distance-table = { N, 1, 2, 4, 5, 1, 6, .... N elements}
+ */
+static void __init initialize_form2_numa_distance_lookup_table(void)
+{
+ int i, j;
struct device_node *root;
+ const __u8 *form2_distances;
+ const __be32 *numa_lookup_index;
+ int form2_distances_length;
+ int max_numa_index, distance_index;
+
+ if (firmware_has_feature(FW_FEATURE_OPAL))
+ root = of_find_node_by_path("/ibm,opal");
+ else
+ root = of_find_node_by_path("/rtas");
+ if (!root)
+ root = of_find_node_by_path("/");
+
+ numa_lookup_index = of_get_property(root, "ibm,numa-lookup-index-table", NULL);
+ max_numa_index = of_read_number(&numa_lookup_index[0], 1);
+
+ /* first element of the array is the size and is encode-int */
+ form2_distances = of_get_property(root, "ibm,numa-distance-table", NULL);
+ form2_distances_length = of_read_number((const __be32 *)&form2_distances[0], 1);
+ /* Skip the size which is encoded int */
+ form2_distances += sizeof(__be32);
+
+ pr_debug("form2_distances_len = %d, numa_dist_indexes_len = %d\n",
+ form2_distances_length, max_numa_index);
+
+ for (i = 0; i < max_numa_index; i++)
+ /* +1 skip the max_numa_index in the property */
+ numa_id_index_table[i] = of_read_number(&numa_lookup_index[i + 1], 1);
+
+
+ if (form2_distances_length != max_numa_index * max_numa_index) {
+ WARN(1, "Wrong NUMA distance information\n");
+ form2_distances = NULL; // don't use it
+ }
+ distance_index = 0;
+ for (i = 0; i < max_numa_index; i++) {
+ for (j = 0; j < max_numa_index; j++) {
+ int nodeA = numa_id_index_table[i];
+ int nodeB = numa_id_index_table[j];
+ int dist;
+
+ if (form2_distances)
+ dist = form2_distances[distance_index++];
+ else if (nodeA == nodeB)
+ dist = LOCAL_DISTANCE;
+ else
+ dist = REMOTE_DISTANCE;
+ numa_distance_table[nodeA][nodeB] = dist;
+ pr_debug("dist[%d][%d]=%d ", nodeA, nodeB, dist);
+ }
+ }
+
+ of_node_put(root);
+}
+
+static int __init find_primary_domain_index(void)
+{
+ int index;
+ struct device_node *root;
+
+ /*
+ * Check for which form of affinity.
+ */
+ if (firmware_has_feature(FW_FEATURE_OPAL)) {
+ affinity_form = FORM1_AFFINITY;
+ } else if (firmware_has_feature(FW_FEATURE_FORM2_AFFINITY)) {
+ pr_debug("Using form 2 affinity\n");
+ affinity_form = FORM2_AFFINITY;
+ } else if (firmware_has_feature(FW_FEATURE_FORM1_AFFINITY)) {
+ pr_debug("Using form 1 affinity\n");
+ affinity_form = FORM1_AFFINITY;
+ } else
+ affinity_form = FORM0_AFFINITY;
if (firmware_has_feature(FW_FEATURE_OPAL))
root = of_find_node_by_path("/ibm,opal");
@@ -340,42 +473,37 @@ static int __init find_min_common_depth(void)
&distance_ref_points_depth);
if (!distance_ref_points) {
- dbg("NUMA: ibm,associativity-reference-points not found.\n");
+ pr_debug("ibm,associativity-reference-points not found.\n");
goto err;
}
distance_ref_points_depth /= sizeof(int);
-
- if (firmware_has_feature(FW_FEATURE_OPAL) ||
- firmware_has_feature(FW_FEATURE_TYPE1_AFFINITY)) {
- dbg("Using form 1 affinity\n");
- form1_affinity = 1;
- }
-
- if (form1_affinity) {
- depth = of_read_number(distance_ref_points, 1);
- } else {
+ if (affinity_form == FORM0_AFFINITY) {
if (distance_ref_points_depth < 2) {
- printk(KERN_WARNING "NUMA: "
- "short ibm,associativity-reference-points\n");
+ pr_warn("short ibm,associativity-reference-points\n");
goto err;
}
- depth = of_read_number(&distance_ref_points[1], 1);
+ index = of_read_number(&distance_ref_points[1], 1);
+ } else {
+ /*
+ * Both FORM1 and FORM2 affinity find the primary domain details
+ * at the same offset.
+ */
+ index = of_read_number(distance_ref_points, 1);
}
-
/*
* Warn and cap if the hardware supports more than
* MAX_DISTANCE_REF_POINTS domains.
*/
if (distance_ref_points_depth > MAX_DISTANCE_REF_POINTS) {
- printk(KERN_WARNING "NUMA: distance array capped at "
- "%d entries\n", MAX_DISTANCE_REF_POINTS);
+ pr_warn("distance array capped at %d entries\n",
+ MAX_DISTANCE_REF_POINTS);
distance_ref_points_depth = MAX_DISTANCE_REF_POINTS;
}
of_node_put(root);
- return depth;
+ return index;
err:
of_node_put(root);
@@ -406,69 +534,6 @@ static unsigned long read_n_cells(int n, const __be32 **buf)
return result;
}
-/*
- * Read the next memblock list entry from the ibm,dynamic-memory property
- * and return the information in the provided of_drconf_cell structure.
- */
-static void read_drconf_cell(struct of_drconf_cell *drmem, const __be32 **cellp)
-{
- const __be32 *cp;
-
- drmem->base_addr = read_n_cells(n_mem_addr_cells, cellp);
-
- cp = *cellp;
- drmem->drc_index = of_read_number(cp, 1);
- drmem->reserved = of_read_number(&cp[1], 1);
- drmem->aa_index = of_read_number(&cp[2], 1);
- drmem->flags = of_read_number(&cp[3], 1);
-
- *cellp = cp + 4;
-}
-
-/*
- * Retrieve and validate the ibm,dynamic-memory property of the device tree.
- *
- * The layout of the ibm,dynamic-memory property is a number N of memblock
- * list entries followed by N memblock list entries. Each memblock list entry
- * contains information as laid out in the of_drconf_cell struct above.
- */
-static int of_get_drconf_memory(struct device_node *memory, const __be32 **dm)
-{
- const __be32 *prop;
- u32 len, entries;
-
- prop = of_get_property(memory, "ibm,dynamic-memory", &len);
- if (!prop || len < sizeof(unsigned int))
- return 0;
-
- entries = of_read_number(prop++, 1);
-
- /* Now that we know the number of entries, revalidate the size
- * of the property read in to ensure we have everything
- */
- if (len < (entries * (n_mem_addr_cells + 4) + 1) * sizeof(unsigned int))
- return 0;
-
- *dm = prop;
- return entries;
-}
-
-/*
- * Retrieve and validate the ibm,lmb-size property for drconf memory
- * from the device tree.
- */
-static u64 of_get_lmb_size(struct device_node *memory)
-{
- const __be32 *prop;
- u32 len;
-
- prop = of_get_property(memory, "ibm,lmb-size", &len);
- if (!prop || len < sizeof(unsigned int))
- return 0;
-
- return read_n_cells(n_mem_size_cells, &prop);
-}
-
struct assoc_arrays {
u32 n_arrays;
u32 array_sz;
@@ -485,19 +550,27 @@ struct assoc_arrays {
* indicating the size of each associativity array, followed by a list
* of N associativity arrays.
*/
-static int of_get_assoc_arrays(struct device_node *memory,
- struct assoc_arrays *aa)
+static int of_get_assoc_arrays(struct assoc_arrays *aa)
{
+ struct device_node *memory;
const __be32 *prop;
u32 len;
+ memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+ if (!memory)
+ return -1;
+
prop = of_get_property(memory, "ibm,associativity-lookup-arrays", &len);
- if (!prop || len < 2 * sizeof(unsigned int))
+ if (!prop || len < 2 * sizeof(unsigned int)) {
+ of_node_put(memory);
return -1;
+ }
aa->n_arrays = of_read_number(prop++, 1);
aa->array_sz = of_read_number(prop++, 1);
+ of_node_put(memory);
+
/* Now that we know the number of arrays and size of each array,
* revalidate the size of the property read in.
*/
@@ -508,66 +581,185 @@ static int of_get_assoc_arrays(struct device_node *memory,
return 0;
}
+static int __init get_nid_and_numa_distance(struct drmem_lmb *lmb)
+{
+ struct assoc_arrays aa = { .arrays = NULL };
+ int default_nid = NUMA_NO_NODE;
+ int nid = default_nid;
+ int rc, index;
+
+ if ((primary_domain_index < 0) || !numa_enabled)
+ return default_nid;
+
+ rc = of_get_assoc_arrays(&aa);
+ if (rc)
+ return default_nid;
+
+ if (primary_domain_index <= aa.array_sz &&
+ !(lmb->flags & DRCONF_MEM_AI_INVALID) && lmb->aa_index < aa.n_arrays) {
+ const __be32 *associativity;
+
+ index = lmb->aa_index * aa.array_sz;
+ associativity = &aa.arrays[index];
+ nid = __associativity_to_nid(associativity, aa.array_sz);
+ if (nid > 0 && affinity_form == FORM1_AFFINITY) {
+ /*
+ * lookup array associativity entries have
+ * no length of the array as the first element.
+ */
+ __initialize_form1_numa_distance(associativity, aa.array_sz);
+ }
+ }
+ return nid;
+}
+
/*
* This is like of_node_to_nid_single() for memory represented in the
* ibm,dynamic-reconfiguration-memory node.
*/
-static int of_drconf_to_nid_single(struct of_drconf_cell *drmem,
- struct assoc_arrays *aa)
+int of_drconf_to_nid_single(struct drmem_lmb *lmb)
{
- int default_nid = 0;
+ struct assoc_arrays aa = { .arrays = NULL };
+ int default_nid = NUMA_NO_NODE;
int nid = default_nid;
- int index;
+ int rc, index;
- if (min_common_depth > 0 && min_common_depth <= aa->array_sz &&
- !(drmem->flags & DRCONF_MEM_AI_INVALID) &&
- drmem->aa_index < aa->n_arrays) {
- index = drmem->aa_index * aa->array_sz + min_common_depth - 1;
- nid = of_read_number(&aa->arrays[index], 1);
+ if ((primary_domain_index < 0) || !numa_enabled)
+ return default_nid;
- if (nid == 0xffff || nid >= MAX_NUMNODES)
- nid = default_nid;
- }
+ rc = of_get_assoc_arrays(&aa);
+ if (rc)
+ return default_nid;
+
+ if (primary_domain_index <= aa.array_sz &&
+ !(lmb->flags & DRCONF_MEM_AI_INVALID) && lmb->aa_index < aa.n_arrays) {
+ const __be32 *associativity;
+ index = lmb->aa_index * aa.array_sz;
+ associativity = &aa.arrays[index];
+ nid = __associativity_to_nid(associativity, aa.array_sz);
+ }
return nid;
}
+#ifdef CONFIG_PPC_SPLPAR
+
+static int __vphn_get_associativity(long lcpu, __be32 *associativity)
+{
+ long rc, hwid;
+
+ /*
+ * On a shared lpar, device tree will not have node associativity.
+ * At this time lppaca, or its __old_status field may not be
+ * updated. Hence kernel cannot detect if its on a shared lpar. So
+ * request an explicit associativity irrespective of whether the
+ * lpar is shared or dedicated. Use the device tree property as a
+ * fallback. cpu_to_phys_id is only valid between
+ * smp_setup_cpu_maps() and smp_setup_pacas().
+ */
+ if (firmware_has_feature(FW_FEATURE_VPHN)) {
+ if (cpu_to_phys_id)
+ hwid = cpu_to_phys_id[lcpu];
+ else
+ hwid = get_hard_smp_processor_id(lcpu);
+
+ rc = hcall_vphn(hwid, VPHN_FLAG_VCPU, associativity);
+ if (rc == H_SUCCESS)
+ return 0;
+ }
+
+ return -1;
+}
+
+static int vphn_get_nid(long lcpu)
+{
+ __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
+
+
+ if (!__vphn_get_associativity(lcpu, associativity))
+ return associativity_to_nid(associativity);
+
+ return NUMA_NO_NODE;
+
+}
+#else
+
+static int __vphn_get_associativity(long lcpu, __be32 *associativity)
+{
+ return -1;
+}
+
+static int vphn_get_nid(long unused)
+{
+ return NUMA_NO_NODE;
+}
+#endif /* CONFIG_PPC_SPLPAR */
+
/*
* Figure out to which domain a cpu belongs and stick it there.
* Return the id of the domain used.
*/
static int numa_setup_cpu(unsigned long lcpu)
{
- int nid;
struct device_node *cpu;
+ int fcpu = cpu_first_thread_sibling(lcpu);
+ int nid = NUMA_NO_NODE;
+
+ if (!cpu_present(lcpu)) {
+ set_cpu_numa_node(lcpu, first_online_node);
+ return first_online_node;
+ }
/*
* If a valid cpu-to-node mapping is already available, use it
* directly instead of querying the firmware, since it represents
* the most recent mapping notified to us by the platform (eg: VPHN).
+ * Since cpu_to_node binding remains the same for all threads in the
+ * core. If a valid cpu-to-node mapping is already available, for
+ * the first thread in the core, use it.
*/
- if ((nid = numa_cpu_lookup_table[lcpu]) >= 0) {
+ nid = numa_cpu_lookup_table[fcpu];
+ if (nid >= 0) {
map_cpu_to_node(lcpu, nid);
return nid;
}
+ nid = vphn_get_nid(lcpu);
+ if (nid != NUMA_NO_NODE)
+ goto out_present;
+
cpu = of_get_cpu_node(lcpu, NULL);
if (!cpu) {
WARN_ON(1);
- nid = 0;
- goto out;
+ if (cpu_present(lcpu))
+ goto out_present;
+ else
+ goto out;
}
nid = of_node_to_nid_single(cpu);
+ of_node_put(cpu);
- if (nid < 0 || !node_online(nid))
+out_present:
+ if (nid < 0 || !node_possible(nid))
nid = first_online_node;
-out:
- map_cpu_to_node(lcpu, nid);
- of_node_put(cpu);
+ /*
+ * Update for the first thread of the core. All threads of a core
+ * have to be part of the same node. This not only avoids querying
+ * for every other thread in the core, but always avoids a case
+ * where virtual node associativity change causes subsequent threads
+ * of a core to be associated with different nid. However if first
+ * thread is already online, expect it to have a valid mapping.
+ */
+ if (fcpu != lcpu) {
+ WARN_ON(cpu_online(fcpu));
+ map_cpu_to_node(fcpu, nid);
+ }
+ map_cpu_to_node(lcpu, nid);
+out:
return nid;
}
@@ -592,30 +784,19 @@ static void verify_cpu_node_mapping(int cpu, int node)
}
}
-static int cpu_numa_callback(struct notifier_block *nfb, unsigned long action,
- void *hcpu)
+/* Must run before sched domains notifier. */
+static int ppc_numa_cpu_prepare(unsigned int cpu)
{
- unsigned long lcpu = (unsigned long)hcpu;
- int ret = NOTIFY_DONE, nid;
-
- switch (action) {
- case CPU_UP_PREPARE:
- case CPU_UP_PREPARE_FROZEN:
- nid = numa_setup_cpu(lcpu);
- verify_cpu_node_mapping((int)lcpu, nid);
- ret = NOTIFY_OK;
- break;
-#ifdef CONFIG_HOTPLUG_CPU
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
- case CPU_UP_CANCELED:
- case CPU_UP_CANCELED_FROZEN:
- unmap_cpu_from_node(lcpu);
- ret = NOTIFY_OK;
- break;
-#endif
- }
- return ret;
+ int nid;
+
+ nid = numa_setup_cpu(cpu);
+ verify_cpu_node_mapping(cpu, nid);
+ return 0;
+}
+
+static int ppc_numa_cpu_dead(unsigned int cpu)
+{
+ return 0;
}
/*
@@ -664,86 +845,83 @@ static inline int __init read_usm_ranges(const __be32 **usm)
* Extract NUMA information from the ibm,dynamic-reconfiguration-memory
* node. This assumes n_mem_{addr,size}_cells have been set.
*/
-static void __init parse_drconf_memory(struct device_node *memory)
+static int __init numa_setup_drmem_lmb(struct drmem_lmb *lmb,
+ const __be32 **usm,
+ void *data)
{
- const __be32 *uninitialized_var(dm), *usm;
- unsigned int n, rc, ranges, is_kexec_kdump = 0;
- unsigned long lmb_size, base, size, sz;
+ unsigned int ranges, is_kexec_kdump = 0;
+ unsigned long base, size, sz;
int nid;
- struct assoc_arrays aa = { .arrays = NULL };
-
- n = of_get_drconf_memory(memory, &dm);
- if (!n)
- return;
-
- lmb_size = of_get_lmb_size(memory);
- if (!lmb_size)
- return;
- rc = of_get_assoc_arrays(memory, &aa);
- if (rc)
- return;
+ /*
+ * Skip this block if the reserved bit is set in flags (0x80)
+ * or if the block is not assigned to this partition (0x8)
+ */
+ if ((lmb->flags & DRCONF_MEM_RESERVED)
+ || !(lmb->flags & DRCONF_MEM_ASSIGNED))
+ return 0;
- /* check if this is a kexec/kdump kernel */
- usm = of_get_usable_memory(memory);
- if (usm != NULL)
+ if (*usm)
is_kexec_kdump = 1;
- for (; n != 0; --n) {
- struct of_drconf_cell drmem;
+ base = lmb->base_addr;
+ size = drmem_lmb_size();
+ ranges = 1;
- read_drconf_cell(&drmem, &dm);
-
- /* skip this block if the reserved bit is set in flags (0x80)
- or if the block is not assigned to this partition (0x8) */
- if ((drmem.flags & DRCONF_MEM_RESERVED)
- || !(drmem.flags & DRCONF_MEM_ASSIGNED))
- continue;
-
- base = drmem.base_addr;
- size = lmb_size;
- ranges = 1;
+ if (is_kexec_kdump) {
+ ranges = read_usm_ranges(usm);
+ if (!ranges) /* there are no (base, size) duple */
+ return 0;
+ }
+ do {
if (is_kexec_kdump) {
- ranges = read_usm_ranges(&usm);
- if (!ranges) /* there are no (base, size) duple */
- continue;
+ base = read_n_cells(n_mem_addr_cells, usm);
+ size = read_n_cells(n_mem_size_cells, usm);
}
- do {
- if (is_kexec_kdump) {
- base = read_n_cells(n_mem_addr_cells, &usm);
- size = read_n_cells(n_mem_size_cells, &usm);
- }
- nid = of_drconf_to_nid_single(&drmem, &aa);
- fake_numa_create_new_node(
- ((base + size) >> PAGE_SHIFT),
- &nid);
- node_set_online(nid);
- sz = numa_enforce_memory_limit(base, size);
- if (sz)
- memblock_set_node(base, sz,
- &memblock.memory, nid);
- } while (--ranges);
- }
+
+ nid = get_nid_and_numa_distance(lmb);
+ fake_numa_create_new_node(((base + size) >> PAGE_SHIFT),
+ &nid);
+ node_set_online(nid);
+ sz = numa_enforce_memory_limit(base, size);
+ if (sz)
+ memblock_set_node(base, sz, &memblock.memory, nid);
+ } while (--ranges);
+
+ return 0;
}
static int __init parse_numa_properties(void)
{
- struct device_node *memory;
+ struct device_node *memory, *pci;
int default_nid = 0;
unsigned long i;
+ const __be32 *associativity;
if (numa_enabled == 0) {
- printk(KERN_WARNING "NUMA disabled by user\n");
+ pr_warn("disabled by user\n");
return -1;
}
- min_common_depth = find_min_common_depth();
+ primary_domain_index = find_primary_domain_index();
+
+ if (primary_domain_index < 0) {
+ /*
+ * if we fail to parse primary_domain_index from device tree
+ * mark the numa disabled, boot with numa disabled.
+ */
+ numa_enabled = false;
+ return primary_domain_index;
+ }
- if (min_common_depth < 0)
- return min_common_depth;
+ pr_debug("associativity depth for CPU/Memory: %d\n", primary_domain_index);
- dbg("NUMA associativity depth for CPU/Memory: %d\n", min_common_depth);
+ /*
+ * If it is FORM2 initialize the distance table here.
+ */
+ if (affinity_form == FORM2_AFFINITY)
+ initialize_form2_numa_distance_lookup_table();
/*
* Even though we connect cpus to numa domains later in SMP
@@ -751,22 +929,36 @@ static int __init parse_numa_properties(void)
* each node to be onlined must have NODE_DATA etc backing it.
*/
for_each_present_cpu(i) {
+ __be32 vphn_assoc[VPHN_ASSOC_BUFSIZE];
struct device_node *cpu;
- int nid;
+ int nid = NUMA_NO_NODE;
- cpu = of_get_cpu_node(i, NULL);
- BUG_ON(!cpu);
- nid = of_node_to_nid_single(cpu);
- of_node_put(cpu);
+ memset(vphn_assoc, 0, VPHN_ASSOC_BUFSIZE * sizeof(__be32));
- /*
- * Don't fall back to default_nid yet -- we will plug
- * cpus into nodes once the memory scan has discovered
- * the topology.
- */
- if (nid < 0)
- continue;
- node_set_online(nid);
+ if (__vphn_get_associativity(i, vphn_assoc) == 0) {
+ nid = associativity_to_nid(vphn_assoc);
+ initialize_form1_numa_distance(vphn_assoc);
+ } else {
+
+ /*
+ * Don't fall back to default_nid yet -- we will plug
+ * cpus into nodes once the memory scan has discovered
+ * the topology.
+ */
+ cpu = of_get_cpu_node(i, NULL);
+ BUG_ON(!cpu);
+
+ associativity = of_get_associativity(cpu);
+ if (associativity) {
+ nid = associativity_to_nid(associativity);
+ initialize_form1_numa_distance(associativity);
+ }
+ of_node_put(cpu);
+ }
+
+ /* node_set_online() is an UB if 'nid' is negative */
+ if (likely(nid >= 0))
+ node_set_online(nid);
}
get_n_mem_cells(&n_mem_addr_cells, &n_mem_size_cells);
@@ -798,34 +990,46 @@ new_range:
* have associativity properties. If none, then
* everything goes to default_nid.
*/
- nid = of_node_to_nid_single(memory);
- if (nid < 0)
+ associativity = of_get_associativity(memory);
+ if (associativity) {
+ nid = associativity_to_nid(associativity);
+ initialize_form1_numa_distance(associativity);
+ } else
nid = default_nid;
fake_numa_create_new_node(((start + size) >> PAGE_SHIFT), &nid);
node_set_online(nid);
- if (!(size = numa_enforce_memory_limit(start, size))) {
- if (--ranges)
- goto new_range;
- else
- continue;
- }
-
- memblock_set_node(start, size, &memblock.memory, nid);
+ size = numa_enforce_memory_limit(start, size);
+ if (size)
+ memblock_set_node(start, size, &memblock.memory, nid);
if (--ranges)
goto new_range;
}
+ for_each_node_by_name(pci, "pci") {
+ int nid = NUMA_NO_NODE;
+
+ associativity = of_get_associativity(pci);
+ if (associativity) {
+ nid = associativity_to_nid(associativity);
+ initialize_form1_numa_distance(associativity);
+ }
+ if (likely(nid >= 0) && !node_online(nid))
+ node_set_online(nid);
+ }
+
/*
* Now do the same thing for each MEMBLOCK listed in the
* ibm,dynamic-memory property in the
* ibm,dynamic-reconfiguration-memory node.
*/
memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
- if (memory)
- parse_drconf_memory(memory);
+ if (memory) {
+ walk_drmem_lmbs(memory, NULL, numa_setup_drmem_lmb);
+ of_node_put(memory);
+ }
return 0;
}
@@ -836,17 +1040,12 @@ static void __init setup_nonnuma(void)
unsigned long total_ram = memblock_phys_mem_size();
unsigned long start_pfn, end_pfn;
unsigned int nid = 0;
- struct memblock_region *reg;
-
- printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
- top_of_ram, total_ram);
- printk(KERN_DEBUG "Memory hole size: %ldMB\n",
- (top_of_ram - total_ram) >> 20);
+ int i;
- for_each_memblock(memory, reg) {
- start_pfn = memblock_region_memory_base_pfn(reg);
- end_pfn = memblock_region_memory_end_pfn(reg);
+ pr_debug("Top of RAM: 0x%lx, Total RAM: 0x%lx\n", top_of_ram, total_ram);
+ pr_debug("Memory hole size: %ldMB\n", (top_of_ram - total_ram) >> 20);
+ for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) {
fake_numa_create_new_node(end_pfn, &nid);
memblock_set_node(PFN_PHYS(start_pfn),
PFN_PHYS(end_pfn - start_pfn),
@@ -860,11 +1059,11 @@ void __init dump_numa_cpu_topology(void)
unsigned int node;
unsigned int cpu, count;
- if (min_common_depth == -1 || !numa_enabled)
+ if (!numa_enabled)
return;
for_each_online_node(node) {
- printk(KERN_DEBUG "Node %d CPUs:", node);
+ pr_info("Node %d CPUs:", node);
count = 0;
/*
@@ -875,270 +1074,156 @@ void __init dump_numa_cpu_topology(void)
if (cpumask_test_cpu(cpu,
node_to_cpumask_map[node])) {
if (count == 0)
- printk(" %u", cpu);
+ pr_cont(" %u", cpu);
++count;
} else {
if (count > 1)
- printk("-%u", cpu - 1);
+ pr_cont("-%u", cpu - 1);
count = 0;
}
}
if (count > 1)
- printk("-%u", nr_cpu_ids - 1);
- printk("\n");
+ pr_cont("-%u", nr_cpu_ids - 1);
+ pr_cont("\n");
}
}
-static void __init dump_numa_memory_topology(void)
+/* Initialize NODE_DATA for a node on the local memory */
+static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
{
- unsigned int node;
- unsigned int count;
+ u64 spanned_pages = end_pfn - start_pfn;
- if (min_common_depth == -1 || !numa_enabled)
- return;
+ alloc_node_data(nid);
- for_each_online_node(node) {
- unsigned long i;
+ NODE_DATA(nid)->node_id = nid;
+ NODE_DATA(nid)->node_start_pfn = start_pfn;
+ NODE_DATA(nid)->node_spanned_pages = spanned_pages;
+}
- printk(KERN_DEBUG "Node %d Memory:", node);
+static void __init find_possible_nodes(void)
+{
+ struct device_node *rtas, *root;
+ const __be32 *domains = NULL;
+ int prop_length, max_nodes;
+ u32 i;
- count = 0;
+ if (!numa_enabled)
+ return;
- for (i = 0; i < memblock_end_of_DRAM();
- i += (1 << SECTION_SIZE_BITS)) {
- if (early_pfn_to_nid(i >> PAGE_SHIFT) == node) {
- if (count == 0)
- printk(" 0x%lx", i);
- ++count;
- } else {
- if (count > 0)
- printk("-0x%lx", i);
- count = 0;
- }
- }
+ rtas = of_find_node_by_path("/rtas");
+ if (!rtas)
+ return;
- if (count > 0)
- printk("-0x%lx", i);
- printk("\n");
+ /*
+ * ibm,current-associativity-domains is a fairly recent property. If
+ * it doesn't exist, then fallback on ibm,max-associativity-domains.
+ * Current denotes what the platform can support compared to max
+ * which denotes what the Hypervisor can support.
+ *
+ * If the LPAR is migratable, new nodes might be activated after a LPM,
+ * so we should consider the max number in that case.
+ */
+ root = of_find_node_by_path("/");
+ if (!of_get_property(root, "ibm,migratable-partition", NULL))
+ domains = of_get_property(rtas,
+ "ibm,current-associativity-domains",
+ &prop_length);
+ of_node_put(root);
+ if (!domains) {
+ domains = of_get_property(rtas, "ibm,max-associativity-domains",
+ &prop_length);
+ if (!domains)
+ goto out;
}
-}
-/*
- * Allocate some memory, satisfying the memblock or bootmem allocator where
- * required. nid is the preferred node and end is the physical address of
- * the highest address in the node.
- *
- * Returns the virtual address of the memory.
- */
-static void __init *careful_zallocation(int nid, unsigned long size,
- unsigned long align,
- unsigned long end_pfn)
-{
- void *ret;
- int new_nid;
- unsigned long ret_paddr;
+ max_nodes = of_read_number(&domains[primary_domain_index], 1);
+ pr_info("Partition configured for %d NUMA nodes.\n", max_nodes);
- ret_paddr = __memblock_alloc_base(size, align, end_pfn << PAGE_SHIFT);
+ for (i = 0; i < max_nodes; i++) {
+ if (!node_possible(i))
+ node_set(i, node_possible_map);
+ }
- /* retry over all memory */
- if (!ret_paddr)
- ret_paddr = __memblock_alloc_base(size, align, memblock_end_of_DRAM());
+ prop_length /= sizeof(int);
+ if (prop_length > primary_domain_index + 2)
+ coregroup_enabled = 1;
+
+out:
+ of_node_put(rtas);
+}
- if (!ret_paddr)
- panic("numa.c: cannot allocate %lu bytes for node %d",
- size, nid);
+void __init mem_topology_setup(void)
+{
+ int cpu;
- ret = __va(ret_paddr);
+ max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
+ min_low_pfn = MEMORY_START >> PAGE_SHIFT;
/*
- * We initialize the nodes in numeric order: 0, 1, 2...
- * and hand over control from the MEMBLOCK allocator to the
- * bootmem allocator. If this function is called for
- * node 5, then we know that all nodes <5 are using the
- * bootmem allocator instead of the MEMBLOCK allocator.
- *
- * So, check the nid from which this allocation came
- * and double check to see if we need to use bootmem
- * instead of the MEMBLOCK. We don't free the MEMBLOCK memory
- * since it would be useless.
+ * Linux/mm assumes node 0 to be online at boot. However this is not
+ * true on PowerPC, where node 0 is similar to any other node, it
+ * could be cpuless, memoryless node. So force node 0 to be offline
+ * for now. This will prevent cpuless, memoryless node 0 showing up
+ * unnecessarily as online. If a node has cpus or memory that need
+ * to be online, then node will anyway be marked online.
*/
- new_nid = early_pfn_to_nid(ret_paddr >> PAGE_SHIFT);
- if (new_nid < nid) {
- ret = __alloc_bootmem_node(NODE_DATA(new_nid),
- size, align, 0);
+ node_set_offline(0);
- dbg("alloc_bootmem %p %lx\n", ret, size);
- }
+ if (parse_numa_properties())
+ setup_nonnuma();
- memset(ret, 0, size);
- return ret;
-}
+ /*
+ * Modify the set of possible NUMA nodes to reflect information
+ * available about the set of online nodes, and the set of nodes
+ * that we expect to make use of for this platform's affinity
+ * calculations.
+ */
+ nodes_and(node_possible_map, node_possible_map, node_online_map);
-static struct notifier_block ppc64_numa_nb = {
- .notifier_call = cpu_numa_callback,
- .priority = 1 /* Must run before sched domains notifier. */
-};
+ find_possible_nodes();
-static void __init mark_reserved_regions_for_nid(int nid)
-{
- struct pglist_data *node = NODE_DATA(nid);
- struct memblock_region *reg;
+ setup_node_to_cpumask_map();
- for_each_memblock(reserved, reg) {
- unsigned long physbase = reg->base;
- unsigned long size = reg->size;
- unsigned long start_pfn = physbase >> PAGE_SHIFT;
- unsigned long end_pfn = PFN_UP(physbase + size);
- struct node_active_region node_ar;
- unsigned long node_end_pfn = pgdat_end_pfn(node);
+ reset_numa_cpu_lookup_table();
+ for_each_possible_cpu(cpu) {
/*
- * Check to make sure that this memblock.reserved area is
- * within the bounds of the node that we care about.
- * Checking the nid of the start and end points is not
- * sufficient because the reserved area could span the
- * entire node.
+ * Powerpc with CONFIG_NUMA always used to have a node 0,
+ * even if it was memoryless or cpuless. For all cpus that
+ * are possible but not present, cpu_to_node() would point
+ * to node 0. To remove a cpuless, memoryless dummy node,
+ * powerpc need to make sure all possible but not present
+ * cpu_to_node are set to a proper node.
*/
- if (end_pfn <= node->node_start_pfn ||
- start_pfn >= node_end_pfn)
- continue;
-
- get_node_active_region(start_pfn, &node_ar);
- while (start_pfn < end_pfn &&
- node_ar.start_pfn < node_ar.end_pfn) {
- unsigned long reserve_size = size;
- /*
- * if reserved region extends past active region
- * then trim size to active region
- */
- if (end_pfn > node_ar.end_pfn)
- reserve_size = (node_ar.end_pfn << PAGE_SHIFT)
- - physbase;
- /*
- * Only worry about *this* node, others may not
- * yet have valid NODE_DATA().
- */
- if (node_ar.nid == nid) {
- dbg("reserve_bootmem %lx %lx nid=%d\n",
- physbase, reserve_size, node_ar.nid);
- reserve_bootmem_node(NODE_DATA(node_ar.nid),
- physbase, reserve_size,
- BOOTMEM_DEFAULT);
- }
- /*
- * if reserved region is contained in the active region
- * then done.
- */
- if (end_pfn <= node_ar.end_pfn)
- break;
-
- /*
- * reserved region extends past the active region
- * get next active region that contains this
- * reserved region
- */
- start_pfn = node_ar.end_pfn;
- physbase = start_pfn << PAGE_SHIFT;
- size = size - reserve_size;
- get_node_active_region(start_pfn, &node_ar);
- }
+ numa_setup_cpu(cpu);
}
}
-
-void __init do_init_bootmem(void)
+void __init initmem_init(void)
{
- int nid, cpu;
-
- min_low_pfn = 0;
- max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
- max_pfn = max_low_pfn;
+ int nid;
- if (parse_numa_properties())
- setup_nonnuma();
- else
- dump_numa_memory_topology();
+ memblock_dump_all();
for_each_online_node(nid) {
unsigned long start_pfn, end_pfn;
- void *bootmem_vaddr;
- unsigned long bootmap_pages;
get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
-
- /*
- * Allocate the node structure node local if possible
- *
- * Be careful moving this around, as it relies on all
- * previous nodes' bootmem to be initialized and have
- * all reserved areas marked.
- */
- NODE_DATA(nid) = careful_zallocation(nid,
- sizeof(struct pglist_data),
- SMP_CACHE_BYTES, end_pfn);
-
- dbg("node %d\n", nid);
- dbg("NODE_DATA() = %p\n", NODE_DATA(nid));
-
- NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
- NODE_DATA(nid)->node_start_pfn = start_pfn;
- NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
-
- if (NODE_DATA(nid)->node_spanned_pages == 0)
- continue;
-
- dbg("start_paddr = %lx\n", start_pfn << PAGE_SHIFT);
- dbg("end_paddr = %lx\n", end_pfn << PAGE_SHIFT);
-
- bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
- bootmem_vaddr = careful_zallocation(nid,
- bootmap_pages << PAGE_SHIFT,
- PAGE_SIZE, end_pfn);
-
- dbg("bootmap_vaddr = %p\n", bootmem_vaddr);
-
- init_bootmem_node(NODE_DATA(nid),
- __pa(bootmem_vaddr) >> PAGE_SHIFT,
- start_pfn, end_pfn);
-
- free_bootmem_with_active_regions(nid, end_pfn);
- /*
- * Be very careful about moving this around. Future
- * calls to careful_zallocation() depend on this getting
- * done correctly.
- */
- mark_reserved_regions_for_nid(nid);
- sparse_memory_present_with_active_regions(nid);
+ setup_node_data(nid, start_pfn, end_pfn);
}
- init_bootmem_done = 1;
-
- /*
- * Now bootmem is initialised we can create the node to cpumask
- * lookup tables and setup the cpu callback to populate them.
- */
- setup_node_to_cpumask_map();
+ sparse_init();
- reset_numa_cpu_lookup_table();
- register_cpu_notifier(&ppc64_numa_nb);
/*
* We need the numa_cpu_lookup_table to be accurate for all CPUs,
* even before we online them, so that we can use cpu_to_{node,mem}
* early in boot, cf. smp_prepare_cpus().
+ * _nocalls() + manual invocation is used because cpuhp is not yet
+ * initialized for the boot CPU.
*/
- for_each_possible_cpu(cpu) {
- cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE,
- (void *)(unsigned long)cpu);
- }
-}
-
-void __init paging_init(void)
-{
- unsigned long max_zone_pfns[MAX_NR_ZONES];
- memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
- max_zone_pfns[ZONE_DMA] = memblock_end_of_DRAM() >> PAGE_SHIFT;
- free_area_init_nodes(max_zone_pfns);
+ cpuhp_setup_state_nocalls(CPUHP_POWER_NUMA_PREPARE, "powerpc/numa:prepare",
+ ppc_numa_cpu_prepare, ppc_numa_cpu_dead);
}
static int __init early_numa(char *p)
@@ -1149,9 +1234,6 @@ static int __init early_numa(char *p)
if (strstr(p, "off"))
numa_enabled = 0;
- if (strstr(p, "debug"))
- numa_debug = 1;
-
p = strstr(p, "fake=");
if (p)
cmdline = p + strlen("fake=");
@@ -1166,43 +1248,26 @@ early_param("numa", early_numa);
* memory represented in the device tree by the property
* ibm,dynamic-reconfiguration-memory/ibm,dynamic-memory.
*/
-static int hot_add_drconf_scn_to_nid(struct device_node *memory,
- unsigned long scn_addr)
+static int hot_add_drconf_scn_to_nid(unsigned long scn_addr)
{
- const __be32 *dm;
- unsigned int drconf_cell_cnt, rc;
+ struct drmem_lmb *lmb;
unsigned long lmb_size;
- struct assoc_arrays aa;
- int nid = -1;
-
- drconf_cell_cnt = of_get_drconf_memory(memory, &dm);
- if (!drconf_cell_cnt)
- return -1;
-
- lmb_size = of_get_lmb_size(memory);
- if (!lmb_size)
- return -1;
-
- rc = of_get_assoc_arrays(memory, &aa);
- if (rc)
- return -1;
-
- for (; drconf_cell_cnt != 0; --drconf_cell_cnt) {
- struct of_drconf_cell drmem;
+ int nid = NUMA_NO_NODE;
- read_drconf_cell(&drmem, &dm);
+ lmb_size = drmem_lmb_size();
+ for_each_drmem_lmb(lmb) {
/* skip this block if it is reserved or not assigned to
* this partition */
- if ((drmem.flags & DRCONF_MEM_RESERVED)
- || !(drmem.flags & DRCONF_MEM_ASSIGNED))
+ if ((lmb->flags & DRCONF_MEM_RESERVED)
+ || !(lmb->flags & DRCONF_MEM_ASSIGNED))
continue;
- if ((scn_addr < drmem.base_addr)
- || (scn_addr >= (drmem.base_addr + lmb_size)))
+ if ((scn_addr < lmb->base_addr)
+ || (scn_addr >= (lmb->base_addr + lmb_size)))
continue;
- nid = of_drconf_to_nid_single(&drmem, &aa);
+ nid = of_drconf_to_nid_single(lmb);
break;
}
@@ -1217,26 +1282,18 @@ static int hot_add_drconf_scn_to_nid(struct device_node *memory,
static int hot_add_node_scn_to_nid(unsigned long scn_addr)
{
struct device_node *memory;
- int nid = -1;
+ int nid = NUMA_NO_NODE;
for_each_node_by_type(memory, "memory") {
- unsigned long start, size;
- int ranges;
- const __be32 *memcell_buf;
- unsigned int len;
+ int i = 0;
- memcell_buf = of_get_property(memory, "reg", &len);
- if (!memcell_buf || len <= 0)
- continue;
+ while (1) {
+ struct resource res;
- /* ranges in cell */
- ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells);
-
- while (ranges--) {
- start = read_n_cells(n_mem_addr_cells, &memcell_buf);
- size = read_n_cells(n_mem_size_cells, &memcell_buf);
+ if (of_address_to_resource(memory, i++, &res))
+ break;
- if ((scn_addr < start) || (scn_addr >= (start + size)))
+ if ((scn_addr < res.start) || (scn_addr > res.end))
continue;
nid = of_node_to_nid_single(memory);
@@ -1260,50 +1317,45 @@ static int hot_add_node_scn_to_nid(unsigned long scn_addr)
int hot_add_scn_to_nid(unsigned long scn_addr)
{
struct device_node *memory = NULL;
- int nid, found = 0;
+ int nid;
- if (!numa_enabled || (min_common_depth < 0))
+ if (!numa_enabled)
return first_online_node;
memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
if (memory) {
- nid = hot_add_drconf_scn_to_nid(memory, scn_addr);
+ nid = hot_add_drconf_scn_to_nid(scn_addr);
of_node_put(memory);
} else {
nid = hot_add_node_scn_to_nid(scn_addr);
}
- if (nid < 0 || !node_online(nid))
+ if (nid < 0 || !node_possible(nid))
nid = first_online_node;
- if (NODE_DATA(nid)->node_spanned_pages)
- return nid;
-
- for_each_online_node(nid) {
- if (NODE_DATA(nid)->node_spanned_pages) {
- found = 1;
- break;
- }
- }
-
- BUG_ON(!found);
return nid;
}
-static u64 hot_add_drconf_memory_max(void)
+u64 hot_add_drconf_memory_max(void)
{
- struct device_node *memory = NULL;
- unsigned int drconf_cell_cnt = 0;
- u64 lmb_size = 0;
- const __be32 *dm = NULL;
-
- memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
- if (memory) {
- drconf_cell_cnt = of_get_drconf_memory(memory, &dm);
- lmb_size = of_get_lmb_size(memory);
- of_node_put(memory);
- }
- return lmb_size * drconf_cell_cnt;
+ struct device_node *memory = NULL;
+ struct device_node *dn = NULL;
+ const __be64 *lrdr = NULL;
+
+ dn = of_find_node_by_path("/rtas");
+ if (dn) {
+ lrdr = of_get_property(dn, "ibm,lrdr-capacity", NULL);
+ of_node_put(dn);
+ if (lrdr)
+ return be64_to_cpup(lrdr);
+ }
+
+ memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+ if (memory) {
+ of_node_put(memory);
+ return drmem_lmb_memory_max();
+ }
+ return 0;
}
/*
@@ -1320,496 +1372,95 @@ u64 memory_hotplug_max(void)
/* Virtual Processor Home Node (VPHN) support */
#ifdef CONFIG_PPC_SPLPAR
-struct topology_update_data {
- struct topology_update_data *next;
- unsigned int cpu;
- int old_nid;
- int new_nid;
-};
-
-static u8 vphn_cpu_change_counts[NR_CPUS][MAX_DISTANCE_REF_POINTS];
-static cpumask_t cpu_associativity_changes_mask;
-static int vphn_enabled;
-static int prrn_enabled;
-static void reset_topology_timer(void);
-
-/*
- * Store the current values of the associativity change counters in the
- * hypervisor.
- */
-static void setup_cpu_associativity_change_counters(void)
-{
- int cpu;
-
- /* The VPHN feature supports a maximum of 8 reference points */
- BUILD_BUG_ON(MAX_DISTANCE_REF_POINTS > 8);
-
- for_each_possible_cpu(cpu) {
- int i;
- u8 *counts = vphn_cpu_change_counts[cpu];
- volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts;
-
- for (i = 0; i < distance_ref_points_depth; i++)
- counts[i] = hypervisor_counts[i];
- }
-}
-
-/*
- * The hypervisor maintains a set of 8 associativity change counters in
- * the VPA of each cpu that correspond to the associativity levels in the
- * ibm,associativity-reference-points property. When an associativity
- * level changes, the corresponding counter is incremented.
- *
- * Set a bit in cpu_associativity_changes_mask for each cpu whose home
- * node associativity levels have changed.
- *
- * Returns the number of cpus with unhandled associativity changes.
- */
-static int update_cpu_associativity_changes_mask(void)
-{
- int cpu;
- cpumask_t *changes = &cpu_associativity_changes_mask;
-
- for_each_possible_cpu(cpu) {
- int i, changed = 0;
- u8 *counts = vphn_cpu_change_counts[cpu];
- volatile u8 *hypervisor_counts = lppaca[cpu].vphn_assoc_counts;
-
- for (i = 0; i < distance_ref_points_depth; i++) {
- if (hypervisor_counts[i] != counts[i]) {
- counts[i] = hypervisor_counts[i];
- changed = 1;
- }
- }
- if (changed) {
- cpumask_or(changes, changes, cpu_sibling_mask(cpu));
- cpu = cpu_last_thread_sibling(cpu);
- }
- }
-
- return cpumask_weight(changes);
-}
-
-/*
- * 6 64-bit registers unpacked into 12 32-bit associativity values. To form
- * the complete property we have to add the length in the first cell.
- */
-#define VPHN_ASSOC_BUFSIZE (6*sizeof(u64)/sizeof(u32) + 1)
-
-/*
- * Convert the associativity domain numbers returned from the hypervisor
- * to the sequence they would appear in the ibm,associativity property.
- */
-static int vphn_unpack_associativity(const long *packed, __be32 *unpacked)
-{
- int i, nr_assoc_doms = 0;
- const __be16 *field = (const __be16 *) packed;
-
-#define VPHN_FIELD_UNUSED (0xffff)
-#define VPHN_FIELD_MSB (0x8000)
-#define VPHN_FIELD_MASK (~VPHN_FIELD_MSB)
-
- for (i = 1; i < VPHN_ASSOC_BUFSIZE; i++) {
- if (be16_to_cpup(field) == VPHN_FIELD_UNUSED) {
- /* All significant fields processed, and remaining
- * fields contain the reserved value of all 1's.
- * Just store them.
- */
- unpacked[i] = *((__be32 *)field);
- field += 2;
- } else if (be16_to_cpup(field) & VPHN_FIELD_MSB) {
- /* Data is in the lower 15 bits of this field */
- unpacked[i] = cpu_to_be32(
- be16_to_cpup(field) & VPHN_FIELD_MASK);
- field++;
- nr_assoc_doms++;
- } else {
- /* Data is in the lower 15 bits of this field
- * concatenated with the next 16 bit field
- */
- unpacked[i] = *((__be32 *)field);
- field += 2;
- nr_assoc_doms++;
- }
- }
-
- /* The first cell contains the length of the property */
- unpacked[0] = cpu_to_be32(nr_assoc_doms);
-
- return nr_assoc_doms;
-}
+static int topology_inited;
/*
* Retrieve the new associativity information for a virtual processor's
* home node.
*/
-static long hcall_vphn(unsigned long cpu, __be32 *associativity)
-{
- long rc;
- long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
- u64 flags = 1;
- int hwcpu = get_hard_smp_processor_id(cpu);
-
- rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, retbuf, flags, hwcpu);
- vphn_unpack_associativity(retbuf, associativity);
-
- return rc;
-}
-
static long vphn_get_associativity(unsigned long cpu,
__be32 *associativity)
{
long rc;
- rc = hcall_vphn(cpu, associativity);
+ rc = hcall_vphn(get_hard_smp_processor_id(cpu),
+ VPHN_FLAG_VCPU, associativity);
switch (rc) {
+ case H_SUCCESS:
+ pr_debug("VPHN hcall succeeded. Reset polling...\n");
+ goto out;
+
case H_FUNCTION:
- printk(KERN_INFO
- "VPHN is not supported. Disabling polling...\n");
- stop_topology_update();
+ pr_err_ratelimited("VPHN unsupported. Disabling polling...\n");
break;
case H_HARDWARE:
- printk(KERN_ERR
- "hcall_vphn() experienced a hardware fault "
+ pr_err_ratelimited("hcall_vphn() experienced a hardware fault "
"preventing VPHN. Disabling polling...\n");
- stop_topology_update();
+ break;
+ case H_PARAMETER:
+ pr_err_ratelimited("hcall_vphn() was passed an invalid parameter. "
+ "Disabling polling...\n");
+ break;
+ default:
+ pr_err_ratelimited("hcall_vphn() returned %ld. Disabling polling...\n"
+ , rc);
+ break;
}
-
+out:
return rc;
}
-/*
- * Update the CPU maps and sysfs entries for a single CPU when its NUMA
- * characteristics change. This function doesn't perform any locking and is
- * only safe to call from stop_machine().
- */
-static int update_cpu_topology(void *data)
-{
- struct topology_update_data *update;
- unsigned long cpu;
-
- if (!data)
- return -EINVAL;
-
- cpu = smp_processor_id();
-
- for (update = data; update; update = update->next) {
- if (cpu != update->cpu)
- continue;
-
- unmap_cpu_from_node(update->cpu);
- map_cpu_to_node(update->cpu, update->new_nid);
- vdso_getcpu_init();
- }
-
- return 0;
-}
-
-static int update_lookup_table(void *data)
+void find_and_update_cpu_nid(int cpu)
{
- struct topology_update_data *update;
-
- if (!data)
- return -EINVAL;
+ __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
+ int new_nid;
- /*
- * Upon topology update, the numa-cpu lookup table needs to be updated
- * for all threads in the core, including offline CPUs, to ensure that
- * future hotplug operations respect the cpu-to-node associativity
- * properly.
- */
- for (update = data; update; update = update->next) {
- int nid, base, j;
+ /* Use associativity from first thread for all siblings */
+ if (vphn_get_associativity(cpu, associativity))
+ return;
- nid = update->new_nid;
- base = cpu_first_thread_sibling(update->cpu);
+ /* Do not have previous associativity, so find it now. */
+ new_nid = associativity_to_nid(associativity);
- for (j = 0; j < threads_per_core; j++) {
- update_numa_cpu_lookup_table(base + j, nid);
- }
- }
+ if (new_nid < 0 || !node_possible(new_nid))
+ new_nid = first_online_node;
+ else
+ // Associate node <-> cpu, so cpu_up() calls
+ // try_online_node() on the right node.
+ set_cpu_numa_node(cpu, new_nid);
- return 0;
+ pr_debug("%s:%d cpu %d nid %d\n", __func__, __LINE__, cpu, new_nid);
}
-/*
- * Update the node maps and sysfs entries for each cpu whose home node
- * has changed. Returns 1 when the topology has changed, and 0 otherwise.
- */
-int arch_update_cpu_topology(void)
+int cpu_to_coregroup_id(int cpu)
{
- unsigned int cpu, sibling, changed = 0;
- struct topology_update_data *updates, *ud;
__be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
- cpumask_t updated_cpus;
- struct device *dev;
- int weight, new_nid, i = 0;
-
- weight = cpumask_weight(&cpu_associativity_changes_mask);
- if (!weight)
- return 0;
-
- updates = kzalloc(weight * (sizeof(*updates)), GFP_KERNEL);
- if (!updates)
- return 0;
-
- cpumask_clear(&updated_cpus);
-
- for_each_cpu(cpu, &cpu_associativity_changes_mask) {
- /*
- * If siblings aren't flagged for changes, updates list
- * will be too short. Skip on this update and set for next
- * update.
- */
- if (!cpumask_subset(cpu_sibling_mask(cpu),
- &cpu_associativity_changes_mask)) {
- pr_info("Sibling bits not set for associativity "
- "change, cpu%d\n", cpu);
- cpumask_or(&cpu_associativity_changes_mask,
- &cpu_associativity_changes_mask,
- cpu_sibling_mask(cpu));
- cpu = cpu_last_thread_sibling(cpu);
- continue;
- }
+ int index;
- /* Use associativity from first thread for all siblings */
- vphn_get_associativity(cpu, associativity);
- new_nid = associativity_to_nid(associativity);
- if (new_nid < 0 || !node_online(new_nid))
- new_nid = first_online_node;
-
- if (new_nid == numa_cpu_lookup_table[cpu]) {
- cpumask_andnot(&cpu_associativity_changes_mask,
- &cpu_associativity_changes_mask,
- cpu_sibling_mask(cpu));
- cpu = cpu_last_thread_sibling(cpu);
- continue;
- }
+ if (cpu < 0 || cpu > nr_cpu_ids)
+ return -1;
- for_each_cpu(sibling, cpu_sibling_mask(cpu)) {
- ud = &updates[i++];
- ud->cpu = sibling;
- ud->new_nid = new_nid;
- ud->old_nid = numa_cpu_lookup_table[sibling];
- cpumask_set_cpu(sibling, &updated_cpus);
- if (i < weight)
- ud->next = &updates[i];
- }
- cpu = cpu_last_thread_sibling(cpu);
- }
+ if (!coregroup_enabled)
+ goto out;
- /*
- * In cases where we have nothing to update (because the updates list
- * is too short or because the new topology is same as the old one),
- * skip invoking update_cpu_topology() via stop-machine(). This is
- * necessary (and not just a fast-path optimization) since stop-machine
- * can end up electing a random CPU to run update_cpu_topology(), and
- * thus trick us into setting up incorrect cpu-node mappings (since
- * 'updates' is kzalloc()'ed).
- *
- * And for the similar reason, we will skip all the following updating.
- */
- if (!cpumask_weight(&updated_cpus))
+ if (!firmware_has_feature(FW_FEATURE_VPHN))
goto out;
- stop_machine(update_cpu_topology, &updates[0], &updated_cpus);
+ if (vphn_get_associativity(cpu, associativity))
+ goto out;
- /*
- * Update the numa-cpu lookup table with the new mappings, even for
- * offline CPUs. It is best to perform this update from the stop-
- * machine context.
- */
- stop_machine(update_lookup_table, &updates[0],
- cpumask_of(raw_smp_processor_id()));
-
- for (ud = &updates[0]; ud; ud = ud->next) {
- unregister_cpu_under_node(ud->cpu, ud->old_nid);
- register_cpu_under_node(ud->cpu, ud->new_nid);
-
- dev = get_cpu_device(ud->cpu);
- if (dev)
- kobject_uevent(&dev->kobj, KOBJ_CHANGE);
- cpumask_clear_cpu(ud->cpu, &cpu_associativity_changes_mask);
- changed = 1;
- }
+ index = of_read_number(associativity, 1);
+ if (index > primary_domain_index + 1)
+ return of_read_number(&associativity[index - 1], 1);
out:
- kfree(updates);
- return changed;
-}
-
-static void topology_work_fn(struct work_struct *work)
-{
- rebuild_sched_domains();
-}
-static DECLARE_WORK(topology_work, topology_work_fn);
-
-static void topology_schedule_update(void)
-{
- schedule_work(&topology_work);
-}
-
-static void topology_timer_fn(unsigned long ignored)
-{
- if (prrn_enabled && cpumask_weight(&cpu_associativity_changes_mask))
- topology_schedule_update();
- else if (vphn_enabled) {
- if (update_cpu_associativity_changes_mask() > 0)
- topology_schedule_update();
- reset_topology_timer();
- }
+ return cpu_to_core_id(cpu);
}
-static struct timer_list topology_timer =
- TIMER_INITIALIZER(topology_timer_fn, 0, 0);
-
-static void reset_topology_timer(void)
-{
- topology_timer.data = 0;
- topology_timer.expires = jiffies + 60 * HZ;
- mod_timer(&topology_timer, topology_timer.expires);
-}
-
-#ifdef CONFIG_SMP
-
-static void stage_topology_update(int core_id)
-{
- cpumask_or(&cpu_associativity_changes_mask,
- &cpu_associativity_changes_mask, cpu_sibling_mask(core_id));
- reset_topology_timer();
-}
-
-static int dt_update_callback(struct notifier_block *nb,
- unsigned long action, void *data)
-{
- struct of_prop_reconfig *update;
- int rc = NOTIFY_DONE;
-
- switch (action) {
- case OF_RECONFIG_UPDATE_PROPERTY:
- update = (struct of_prop_reconfig *)data;
- if (!of_prop_cmp(update->dn->type, "cpu") &&
- !of_prop_cmp(update->prop->name, "ibm,associativity")) {
- u32 core_id;
- of_property_read_u32(update->dn, "reg", &core_id);
- stage_topology_update(core_id);
- rc = NOTIFY_OK;
- }
- break;
- }
-
- return rc;
-}
-
-static struct notifier_block dt_update_nb = {
- .notifier_call = dt_update_callback,
-};
-
-#endif
-
-/*
- * Start polling for associativity changes.
- */
-int start_topology_update(void)
-{
- int rc = 0;
-
- if (firmware_has_feature(FW_FEATURE_PRRN)) {
- if (!prrn_enabled) {
- prrn_enabled = 1;
- vphn_enabled = 0;
-#ifdef CONFIG_SMP
- rc = of_reconfig_notifier_register(&dt_update_nb);
-#endif
- }
- } else if (firmware_has_feature(FW_FEATURE_VPHN) &&
- lppaca_shared_proc(get_lppaca())) {
- if (!vphn_enabled) {
- prrn_enabled = 0;
- vphn_enabled = 1;
- setup_cpu_associativity_change_counters();
- init_timer_deferrable(&topology_timer);
- reset_topology_timer();
- }
- }
-
- return rc;
-}
-
-/*
- * Disable polling for VPHN associativity changes.
- */
-int stop_topology_update(void)
-{
- int rc = 0;
-
- if (prrn_enabled) {
- prrn_enabled = 0;
-#ifdef CONFIG_SMP
- rc = of_reconfig_notifier_unregister(&dt_update_nb);
-#endif
- } else if (vphn_enabled) {
- vphn_enabled = 0;
- rc = del_timer_sync(&topology_timer);
- }
-
- return rc;
-}
-
-int prrn_is_enabled(void)
-{
- return prrn_enabled;
-}
-
-static int topology_read(struct seq_file *file, void *v)
-{
- if (vphn_enabled || prrn_enabled)
- seq_puts(file, "on\n");
- else
- seq_puts(file, "off\n");
-
- return 0;
-}
-
-static int topology_open(struct inode *inode, struct file *file)
-{
- return single_open(file, topology_read, NULL);
-}
-
-static ssize_t topology_write(struct file *file, const char __user *buf,
- size_t count, loff_t *off)
-{
- char kbuf[4]; /* "on" or "off" plus null. */
- int read_len;
-
- read_len = count < 3 ? count : 3;
- if (copy_from_user(kbuf, buf, read_len))
- return -EINVAL;
-
- kbuf[read_len] = '\0';
-
- if (!strncmp(kbuf, "on", 2))
- start_topology_update();
- else if (!strncmp(kbuf, "off", 3))
- stop_topology_update();
- else
- return -EINVAL;
-
- return count;
-}
-
-static const struct file_operations topology_ops = {
- .read = seq_read,
- .write = topology_write,
- .open = topology_open,
- .release = single_release
-};
static int topology_update_init(void)
{
- start_topology_update();
- proc_create("powerpc/topology_updates", 0644, NULL, &topology_ops);
-
+ topology_inited = 1;
return 0;
}
device_initcall(topology_update_init);
diff --git a/arch/powerpc/mm/pageattr.c b/arch/powerpc/mm/pageattr.c
new file mode 100644
index 000000000000..ac22bf28086f
--- /dev/null
+++ b/arch/powerpc/mm/pageattr.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * MMU-generic set_memory implementation for powerpc
+ *
+ * Copyright 2019-2021, IBM Corporation.
+ */
+
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/set_memory.h>
+
+#include <asm/mmu.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+
+#include <mm/mmu_decl.h>
+
+static pte_basic_t pte_update_delta(pte_t *ptep, unsigned long addr,
+ unsigned long old, unsigned long new)
+{
+ return pte_update(&init_mm, addr, ptep, old & ~new, new & ~old, 0);
+}
+
+/*
+ * Updates the attributes of a page atomically.
+ *
+ * This sequence is safe against concurrent updates, and also allows updating the
+ * attributes of a page currently being executed or accessed.
+ */
+static int change_page_attr(pte_t *ptep, unsigned long addr, void *data)
+{
+ long action = (long)data;
+
+ addr &= PAGE_MASK;
+ /* modify the PTE bits as desired */
+ switch (action) {
+ case SET_MEMORY_RO:
+ /* Don't clear DIRTY bit */
+ pte_update_delta(ptep, addr, _PAGE_KERNEL_RW & ~_PAGE_DIRTY, _PAGE_KERNEL_RO);
+ break;
+ case SET_MEMORY_ROX:
+ /* Don't clear DIRTY bit */
+ pte_update_delta(ptep, addr, _PAGE_KERNEL_RW & ~_PAGE_DIRTY, _PAGE_KERNEL_ROX);
+ break;
+ case SET_MEMORY_RW:
+ pte_update_delta(ptep, addr, _PAGE_KERNEL_RO, _PAGE_KERNEL_RW);
+ break;
+ case SET_MEMORY_NX:
+ pte_update_delta(ptep, addr, _PAGE_KERNEL_ROX, _PAGE_KERNEL_RO);
+ break;
+ case SET_MEMORY_X:
+ pte_update_delta(ptep, addr, _PAGE_KERNEL_RO, _PAGE_KERNEL_ROX);
+ break;
+ case SET_MEMORY_NP:
+ pte_update(&init_mm, addr, ptep, _PAGE_PRESENT, 0, 0);
+ break;
+ case SET_MEMORY_P:
+ pte_update(&init_mm, addr, ptep, 0, _PAGE_PRESENT, 0);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ break;
+ }
+
+ /* See ptesync comment in radix__set_pte_at() */
+ if (radix_enabled())
+ asm volatile("ptesync": : :"memory");
+
+ flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
+
+ return 0;
+}
+
+int change_memory_attr(unsigned long addr, int numpages, long action)
+{
+ unsigned long start = ALIGN_DOWN(addr, PAGE_SIZE);
+ unsigned long size = numpages * PAGE_SIZE;
+
+ if (!numpages)
+ return 0;
+
+ if (WARN_ON_ONCE(is_vmalloc_or_module_addr((void *)addr) &&
+ is_vm_area_hugepages((void *)addr)))
+ return -EINVAL;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ /*
+ * On hash, the linear mapping is not in the Linux page table so
+ * apply_to_existing_page_range() will have no effect. If in the future
+ * the set_memory_* functions are used on the linear map this will need
+ * to be updated.
+ */
+ if (!radix_enabled()) {
+ int region = get_region_id(addr);
+
+ if (WARN_ON_ONCE(region != VMALLOC_REGION_ID && region != IO_REGION_ID))
+ return -EINVAL;
+ }
+#endif
+
+ return apply_to_existing_page_range(&init_mm, start, size,
+ change_page_attr, (void *)action);
+}
+
+#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE)
+#ifdef CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC
+void __kernel_map_pages(struct page *page, int numpages, int enable)
+{
+ int err;
+ unsigned long addr = (unsigned long)page_address(page);
+
+ if (PageHighMem(page))
+ return;
+
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !radix_enabled())
+ err = hash__kernel_map_pages(page, numpages, enable);
+ else if (enable)
+ err = set_memory_p(addr, numpages);
+ else
+ err = set_memory_np(addr, numpages);
+
+ if (err)
+ panic("%s: changing memory protections failed\n", __func__);
+}
+#endif
+#endif
diff --git a/arch/powerpc/mm/pgtable-frag.c b/arch/powerpc/mm/pgtable-frag.c
new file mode 100644
index 000000000000..77e55eac16e4
--- /dev/null
+++ b/arch/powerpc/mm/pgtable-frag.c
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Handling Page Tables through page fragments
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/gfp.h>
+#include <linux/mm.h>
+#include <linux/percpu.h>
+#include <linux/hardirq.h>
+#include <linux/hugetlb.h>
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+#include <asm/tlb.h>
+
+void pte_frag_destroy(void *pte_frag)
+{
+ int count;
+ struct ptdesc *ptdesc;
+
+ ptdesc = virt_to_ptdesc(pte_frag);
+ /* drop all the pending references */
+ count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT;
+ /* We allow PTE_FRAG_NR fragments from a PTE page */
+ if (atomic_sub_and_test(PTE_FRAG_NR - count, &ptdesc->pt_frag_refcount)) {
+ pagetable_dtor(ptdesc);
+ pagetable_free(ptdesc);
+ }
+}
+
+static pte_t *get_pte_from_cache(struct mm_struct *mm)
+{
+ void *pte_frag, *ret;
+
+ if (PTE_FRAG_NR == 1)
+ return NULL;
+
+ spin_lock(&mm->page_table_lock);
+ ret = pte_frag_get(&mm->context);
+ if (ret) {
+ pte_frag = ret + PTE_FRAG_SIZE;
+ /*
+ * If we have taken up all the fragments mark PTE page NULL
+ */
+ if (((unsigned long)pte_frag & ~PAGE_MASK) == 0)
+ pte_frag = NULL;
+ pte_frag_set(&mm->context, pte_frag);
+ }
+ spin_unlock(&mm->page_table_lock);
+ return (pte_t *)ret;
+}
+
+static pte_t *__alloc_for_ptecache(struct mm_struct *mm, int kernel)
+{
+ void *ret = NULL;
+ struct ptdesc *ptdesc;
+ gfp_t gfp = PGALLOC_GFP;
+
+ if (!kernel)
+ gfp |= __GFP_ACCOUNT;
+
+ ptdesc = pagetable_alloc(gfp, 0);
+ if (!ptdesc)
+ return NULL;
+ if (!pagetable_pte_ctor(mm, ptdesc)) {
+ pagetable_free(ptdesc);
+ return NULL;
+ }
+
+ atomic_set(&ptdesc->pt_frag_refcount, 1);
+
+ ret = ptdesc_address(ptdesc);
+ /*
+ * if we support only one fragment just return the
+ * allocated page.
+ */
+ if (PTE_FRAG_NR == 1)
+ return ret;
+ spin_lock(&mm->page_table_lock);
+ /*
+ * If we find ptdesc_page set, we return
+ * the allocated page with single fragment
+ * count.
+ */
+ if (likely(!pte_frag_get(&mm->context))) {
+ atomic_set(&ptdesc->pt_frag_refcount, PTE_FRAG_NR);
+ pte_frag_set(&mm->context, ret + PTE_FRAG_SIZE);
+ }
+ spin_unlock(&mm->page_table_lock);
+
+ return (pte_t *)ret;
+}
+
+pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel)
+{
+ pte_t *pte;
+
+ pte = get_pte_from_cache(mm);
+ if (pte)
+ return pte;
+
+ return __alloc_for_ptecache(mm, kernel);
+}
+
+static void pte_free_now(struct rcu_head *head)
+{
+ struct ptdesc *ptdesc;
+
+ ptdesc = container_of(head, struct ptdesc, pt_rcu_head);
+ pagetable_dtor(ptdesc);
+ pagetable_free(ptdesc);
+}
+
+void pte_fragment_free(unsigned long *table, int kernel)
+{
+ struct ptdesc *ptdesc = virt_to_ptdesc(table);
+
+ if (pagetable_is_reserved(ptdesc))
+ return free_reserved_ptdesc(ptdesc);
+
+ BUG_ON(atomic_read(&ptdesc->pt_frag_refcount) <= 0);
+ if (atomic_dec_and_test(&ptdesc->pt_frag_refcount)) {
+ if (kernel || !folio_test_clear_active(ptdesc_folio(ptdesc)))
+ pte_free_now(&ptdesc->pt_rcu_head);
+ else
+ call_rcu(&ptdesc->pt_rcu_head, pte_free_now);
+ }
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable)
+{
+ struct folio *folio;
+
+ folio = virt_to_folio(pgtable);
+ folio_set_active(folio);
+ pte_fragment_free((unsigned long *)pgtable, 0);
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index c695943a513c..56d7e8960e77 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* This file contains common routines for dealing with free of page tables
* Along with common page table handling code
@@ -14,11 +15,6 @@
*
* Dave Engebretsen <engebret@us.ibm.com>
* Rework for PPC64 port.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
@@ -27,9 +23,18 @@
#include <linux/percpu.h>
#include <linux/hardirq.h>
#include <linux/hugetlb.h>
-#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/tlb.h>
+#include <asm/hugetlb.h>
+#include <asm/pte-walk.h>
+
+#ifdef CONFIG_PPC64
+#define PGD_ALIGN (sizeof(pgd_t) * MAX_PTRS_PER_PGD)
+#else
+#define PGD_ALIGN PAGE_SIZE
+#endif
+
+pgd_t swapper_pg_dir[MAX_PTRS_PER_PGD] __section(".bss..page_aligned") __aligned(PGD_ALIGN);
static inline int is_exec_fault(void)
{
@@ -38,17 +43,22 @@ static inline int is_exec_fault(void)
/* We only try to do i/d cache coherency on stuff that looks like
* reasonably "normal" PTEs. We currently require a PTE to be present
- * and we avoid _PAGE_SPECIAL and _PAGE_NO_CACHE. We also only do that
+ * and we avoid _PAGE_SPECIAL and cache inhibited pte. We also only do that
* on userspace PTEs
*/
-static inline int pte_looks_normal(pte_t pte)
+static inline int pte_looks_normal(pte_t pte, unsigned long addr)
{
- return (pte_val(pte) &
- (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER)) ==
- (_PAGE_PRESENT | _PAGE_USER);
+
+ if (pte_present(pte) && !pte_special(pte)) {
+ if (pte_ci(pte))
+ return 0;
+ if (!is_kernel_addr(addr))
+ return 1;
+ }
+ return 0;
}
-struct page * maybe_pte_to_page(pte_t pte)
+static struct folio *maybe_pte_to_folio(pte_t pte)
{
unsigned long pfn = pte_pfn(pte);
struct page *page;
@@ -58,10 +68,10 @@ struct page * maybe_pte_to_page(pte_t pte)
page = pfn_to_page(pfn);
if (PageReserved(page))
return NULL;
- return page;
+ return page_folio(page);
}
-#if defined(CONFIG_PPC_STD_MMU) || _PAGE_EXEC == 0
+#ifdef CONFIG_PPC_BOOK3S
/* Server-style MMU handles coherency when hashing if HW exec permission
* is supposed per page (currently 64-bit only). If not, then, we always
@@ -69,73 +79,85 @@ struct page * maybe_pte_to_page(pte_t pte)
* support falls into the same category.
*/
-static pte_t set_pte_filter(pte_t pte)
+static pte_t set_pte_filter_hash(pte_t pte, unsigned long addr)
{
pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
- if (pte_looks_normal(pte) && !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) ||
- cpu_has_feature(CPU_FTR_NOEXECUTE))) {
- struct page *pg = maybe_pte_to_page(pte);
- if (!pg)
+ if (pte_looks_normal(pte, addr) && !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) ||
+ cpu_has_feature(CPU_FTR_NOEXECUTE))) {
+ struct folio *folio = maybe_pte_to_folio(pte);
+ if (!folio)
return pte;
- if (!test_bit(PG_arch_1, &pg->flags)) {
- flush_dcache_icache_page(pg);
- set_bit(PG_arch_1, &pg->flags);
+ if (!test_bit(PG_dcache_clean, &folio->flags.f)) {
+ flush_dcache_icache_folio(folio);
+ set_bit(PG_dcache_clean, &folio->flags.f);
}
}
return pte;
}
-static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
- int dirty)
-{
- return pte;
-}
+#else /* CONFIG_PPC_BOOK3S */
+
+static pte_t set_pte_filter_hash(pte_t pte, unsigned long addr) { return pte; }
-#else /* defined(CONFIG_PPC_STD_MMU) || _PAGE_EXEC == 0 */
+#endif /* CONFIG_PPC_BOOK3S */
/* Embedded type MMU with HW exec support. This is a bit more complicated
* as we don't have two bits to spare for _PAGE_EXEC and _PAGE_HWEXEC so
* instead we "filter out" the exec permission for non clean pages.
+ *
+ * This is also called once for the folio. So only work with folio->flags here.
*/
-static pte_t set_pte_filter(pte_t pte)
+static inline pte_t set_pte_filter(pte_t pte, unsigned long addr)
{
- struct page *pg;
+ struct folio *folio;
+
+ if (radix_enabled())
+ return pte;
+
+ if (mmu_has_feature(MMU_FTR_HPTE_TABLE))
+ return set_pte_filter_hash(pte, addr);
/* No exec permission in the first place, move on */
- if (!(pte_val(pte) & _PAGE_EXEC) || !pte_looks_normal(pte))
+ if (!pte_exec(pte) || !pte_looks_normal(pte, addr))
return pte;
/* If you set _PAGE_EXEC on weird pages you're on your own */
- pg = maybe_pte_to_page(pte);
- if (unlikely(!pg))
+ folio = maybe_pte_to_folio(pte);
+ if (unlikely(!folio))
return pte;
/* If the page clean, we move on */
- if (test_bit(PG_arch_1, &pg->flags))
+ if (test_bit(PG_dcache_clean, &folio->flags.f))
return pte;
/* If it's an exec fault, we flush the cache and make it clean */
if (is_exec_fault()) {
- flush_dcache_icache_page(pg);
- set_bit(PG_arch_1, &pg->flags);
+ flush_dcache_icache_folio(folio);
+ set_bit(PG_dcache_clean, &folio->flags.f);
return pte;
}
/* Else, we filter out _PAGE_EXEC */
- return __pte(pte_val(pte) & ~_PAGE_EXEC);
+ return pte_exprotect(pte);
}
static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
int dirty)
{
- struct page *pg;
+ struct folio *folio;
+
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_64))
+ return pte;
+
+ if (mmu_has_feature(MMU_FTR_HPTE_TABLE))
+ return pte;
/* So here, we only care about exec faults, as we use them
* to recover lost _PAGE_EXEC and perform I$/D$ coherency
* if necessary. Also if _PAGE_EXEC is already set, same deal,
* we just bail out
*/
- if (dirty || (pte_val(pte) & _PAGE_EXEC) || !is_exec_fault())
+ if (dirty || pte_exec(pte) || !is_exec_fault())
return pte;
#ifdef CONFIG_DEBUG_VM
@@ -148,41 +170,67 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
#endif /* CONFIG_DEBUG_VM */
/* If you set _PAGE_EXEC on weird pages you're on your own */
- pg = maybe_pte_to_page(pte);
- if (unlikely(!pg))
+ folio = maybe_pte_to_folio(pte);
+ if (unlikely(!folio))
goto bail;
/* If the page is already clean, we move on */
- if (test_bit(PG_arch_1, &pg->flags))
+ if (test_bit(PG_dcache_clean, &folio->flags.f))
goto bail;
- /* Clean the page and set PG_arch_1 */
- flush_dcache_icache_page(pg);
- set_bit(PG_arch_1, &pg->flags);
+ /* Clean the page and set PG_dcache_clean */
+ flush_dcache_icache_folio(folio);
+ set_bit(PG_dcache_clean, &folio->flags.f);
bail:
- return __pte(pte_val(pte) | _PAGE_EXEC);
+ return pte_mkexec(pte);
}
-#endif /* !(defined(CONFIG_PPC_STD_MMU) || _PAGE_EXEC == 0) */
-
/*
* set_pte stores a linux PTE into the linux page table.
*/
-void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
- pte_t pte)
+void set_ptes(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
+ pte_t pte, unsigned int nr)
{
-#ifdef CONFIG_DEBUG_VM
- WARN_ON(pte_val(*ptep) & _PAGE_PRESENT);
-#endif
+
/* Note: mm->context.id might not yet have been assigned as
* this context might not have been activated yet when this
- * is called.
+ * is called. Filter the pte value and use the filtered value
+ * to setup all the ptes in the range.
*/
- pte = set_pte_filter(pte);
+ pte = set_pte_filter(pte, addr);
+
+ /*
+ * We don't need to call arch_enter/leave_lazy_mmu_mode()
+ * because we expect set_ptes to be only be used on not present
+ * and not hw_valid ptes. Hence there is no translation cache flush
+ * involved that need to be batched.
+ */
+ for (;;) {
+
+ /*
+ * Make sure hardware valid bit is not set. We don't do
+ * tlb flush for this update.
+ */
+ VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep));
+
+ /* Perform the setting of the PTE */
+ __set_pte_at(mm, addr, ptep, pte, 0);
+ if (--nr == 0)
+ break;
+ ptep++;
+ addr += PAGE_SIZE;
+ pte = pte_next_pfn(pte);
+ }
+}
- /* Perform the setting of the PTE */
- __set_pte_at(mm, addr, ptep, pte, 0);
+void unmap_kernel_page(unsigned long va)
+{
+ pmd_t *pmdp = pmd_off_k(va);
+ pte_t *ptep = pte_offset_kernel(pmdp, va);
+
+ pte_clear(&init_mm, va, ptep);
+ flush_tlb_kernel_range(va, va + PAGE_SIZE);
}
/*
@@ -199,38 +247,309 @@ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
entry = set_access_flags_filter(entry, vma, dirty);
changed = !pte_same(*(ptep), entry);
if (changed) {
- if (!is_vm_hugetlb_page(vma))
- assert_pte_locked(vma->vm_mm, address);
- __ptep_set_access_flags(ptep, entry);
- flush_tlb_page_nohash(vma, address);
+ assert_pte_locked(vma->vm_mm, address);
+ __ptep_set_access_flags(vma, ptep, entry,
+ address, mmu_virtual_psize);
}
return changed;
}
+#ifdef CONFIG_HUGETLB_PAGE
+int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t pte, int dirty)
+{
+#ifdef HUGETLB_NEED_PRELOAD
+ /*
+ * The "return 1" forces a call of update_mmu_cache, which will write a
+ * TLB entry. Without this, platforms that don't do a write of the TLB
+ * entry in the TLB miss handler asm will fault ad infinitum.
+ */
+ ptep_set_access_flags(vma, addr, ptep, pte, dirty);
+ return 1;
+#else
+ int changed, psize;
+
+ pte = set_access_flags_filter(pte, vma, dirty);
+ changed = !pte_same(*(ptep), pte);
+ if (changed) {
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ struct hstate *h = hstate_vma(vma);
+
+ psize = hstate_get_psize(h);
+#ifdef CONFIG_DEBUG_VM
+ assert_spin_locked(huge_pte_lockptr(h, vma->vm_mm, ptep));
+#endif
+
+#else
+ /*
+ * Not used on non book3s64 platforms.
+ * 8xx compares it with mmu_virtual_psize to
+ * know if it is a huge page or not.
+ */
+ psize = MMU_PAGE_COUNT;
+#endif
+ __ptep_set_access_flags(vma, ptep, pte, addr, psize);
+ }
+ return changed;
+#endif
+}
+
+#if defined(CONFIG_PPC_8xx)
+
+#if defined(CONFIG_SPLIT_PTE_PTLOCKS) || defined(CONFIG_SPLIT_PMD_PTLOCKS)
+/* We need the same lock to protect the PMD table and the two PTE tables. */
+#error "8M hugetlb folios are incompatible with split page table locks"
+#endif
+
+static void __set_huge_pte_at(pmd_t *pmd, pte_t *ptep, pte_basic_t val)
+{
+ pte_basic_t *entry = (pte_basic_t *)ptep;
+ int num, i;
+
+ /*
+ * Make sure hardware valid bit is not set. We don't do
+ * tlb flush for this update.
+ */
+ VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep));
+
+ num = number_of_cells_per_pte(pmd, val, 1);
+
+ for (i = 0; i < num; i++, entry++, val += SZ_4K)
+ *entry = val;
+}
+
+void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
+ pte_t pte, unsigned long sz)
+{
+ pmd_t *pmdp = pmd_off(mm, addr);
+
+ pte = set_pte_filter(pte, addr);
+
+ if (sz == SZ_8M) { /* Flag both PMD entries as 8M and fill both page tables */
+ *pmdp = __pmd(pmd_val(*pmdp) | _PMD_PAGE_8M);
+ *(pmdp + 1) = __pmd(pmd_val(*(pmdp + 1)) | _PMD_PAGE_8M);
+
+ __set_huge_pte_at(pmdp, pte_offset_kernel(pmdp, 0), pte_val(pte));
+ __set_huge_pte_at(pmdp, pte_offset_kernel(pmdp + 1, 0), pte_val(pte) + SZ_4M);
+ } else {
+ __set_huge_pte_at(pmdp, ptep, pte_val(pte));
+ }
+}
+#else
+void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
+ pte_t pte, unsigned long sz)
+{
+ unsigned long pdsize;
+ int i;
+
+ pte = set_pte_filter(pte, addr);
+
+ /*
+ * Make sure hardware valid bit is not set. We don't do
+ * tlb flush for this update.
+ */
+ VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep));
+
+ if (sz < PMD_SIZE)
+ pdsize = PAGE_SIZE;
+ else if (sz < PUD_SIZE)
+ pdsize = PMD_SIZE;
+ else if (sz < P4D_SIZE)
+ pdsize = PUD_SIZE;
+ else if (sz < PGDIR_SIZE)
+ pdsize = P4D_SIZE;
+ else
+ pdsize = PGDIR_SIZE;
+
+ for (i = 0; i < sz / pdsize; i++, ptep++, addr += pdsize) {
+ __set_pte_at(mm, addr, ptep, pte, 0);
+ pte = __pte(pte_val(pte) + ((unsigned long long)pdsize / PAGE_SIZE << PFN_PTE_SHIFT));
+ }
+}
+#endif
+#endif /* CONFIG_HUGETLB_PAGE */
+
#ifdef CONFIG_DEBUG_VM
void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
+ pte_t *pte;
+ spinlock_t *ptl;
if (mm == &init_mm)
return;
pgd = mm->pgd + pgd_index(addr);
BUG_ON(pgd_none(*pgd));
- pud = pud_offset(pgd, addr);
+ p4d = p4d_offset(pgd, addr);
+ BUG_ON(p4d_none(*p4d));
+ pud = pud_offset(p4d, addr);
BUG_ON(pud_none(*pud));
pmd = pmd_offset(pud, addr);
/*
* khugepaged to collapse normal pages to hugepage, first set
- * pmd to none to force page fault/gup to take mmap_sem. After
+ * pmd to none to force page fault/gup to take mmap_lock. After
* pmd is set to none, we do a pte_clear which does this assertion
* so if we find pmd none, return.
*/
if (pmd_none(*pmd))
return;
- BUG_ON(!pmd_present(*pmd));
- assert_spin_locked(pte_lockptr(mm, pmd));
+ pte = pte_offset_map_ro_nolock(mm, pmd, addr, &ptl);
+ BUG_ON(!pte);
+ assert_spin_locked(ptl);
+ pte_unmap(pte);
}
#endif /* CONFIG_DEBUG_VM */
+unsigned long vmalloc_to_phys(void *va)
+{
+ unsigned long pfn = vmalloc_to_pfn(va);
+
+ BUG_ON(!pfn);
+ return __pa(pfn_to_kaddr(pfn)) + offset_in_page(va);
+}
+EXPORT_SYMBOL_GPL(vmalloc_to_phys);
+
+/*
+ * We have 3 cases for pgds and pmds:
+ * (1) invalid (all zeroes)
+ * (2) pointer to next table, as normal; bottom 6 bits == 0
+ * (3) leaf pte for huge page _PAGE_PTE set
+ *
+ * So long as we atomically load page table pointers we are safe against teardown,
+ * we can follow the address down to the page and take a ref on it.
+ * This function need to be called with interrupts disabled. We use this variant
+ * when we have MSR[EE] = 0 but the paca->irq_soft_mask = IRQS_ENABLED
+ */
+pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
+ bool *is_thp, unsigned *hpage_shift)
+{
+ pgd_t *pgdp;
+#ifdef CONFIG_PPC64
+ p4d_t p4d, *p4dp;
+ pud_t pud, *pudp;
+#endif
+ pmd_t pmd, *pmdp;
+ pte_t *ret_pte;
+ unsigned pdshift;
+
+ if (hpage_shift)
+ *hpage_shift = 0;
+
+ if (is_thp)
+ *is_thp = false;
+
+ /*
+ * Always operate on the local stack value. This make sure the
+ * value don't get updated by a parallel THP split/collapse,
+ * page fault or a page unmap. The return pte_t * is still not
+ * stable. So should be checked there for above conditions.
+ * Top level is an exception because it is folded into p4d.
+ *
+ * On PPC32, P4D/PUD/PMD are folded into PGD so go straight to
+ * PMD level.
+ */
+ pgdp = pgdir + pgd_index(ea);
+#ifdef CONFIG_PPC64
+ p4dp = p4d_offset(pgdp, ea);
+ p4d = READ_ONCE(*p4dp);
+ pdshift = P4D_SHIFT;
+
+ if (p4d_none(p4d))
+ return NULL;
+
+ if (p4d_leaf(p4d)) {
+ ret_pte = (pte_t *)p4dp;
+ goto out;
+ }
+
+ /*
+ * Even if we end up with an unmap, the pgtable will not
+ * be freed, because we do an rcu free and here we are
+ * irq disabled
+ */
+ pdshift = PUD_SHIFT;
+ pudp = pud_offset(&p4d, ea);
+ pud = READ_ONCE(*pudp);
+
+ if (pud_none(pud))
+ return NULL;
+
+ if (pud_leaf(pud)) {
+ ret_pte = (pte_t *)pudp;
+ goto out;
+ }
+
+ pmdp = pmd_offset(&pud, ea);
+#else
+ pmdp = pmd_offset(pud_offset(p4d_offset(pgdp, ea), ea), ea);
+#endif
+ pdshift = PMD_SHIFT;
+ pmd = READ_ONCE(*pmdp);
+
+ /*
+ * A hugepage collapse is captured by this condition, see
+ * pmdp_collapse_flush.
+ */
+ if (pmd_none(pmd))
+ return NULL;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ /*
+ * A hugepage split is captured by this condition, see
+ * pmdp_invalidate.
+ *
+ * Huge page modification can be caught here too.
+ */
+ if (pmd_is_serializing(pmd))
+ return NULL;
+#endif
+
+ if (pmd_trans_huge(pmd)) {
+ if (is_thp)
+ *is_thp = true;
+ ret_pte = (pte_t *)pmdp;
+ goto out;
+ }
+
+ if (pmd_leaf(pmd)) {
+ ret_pte = (pte_t *)pmdp;
+ goto out;
+ }
+
+ return pte_offset_kernel(&pmd, ea);
+
+out:
+ if (hpage_shift)
+ *hpage_shift = pdshift;
+ return ret_pte;
+}
+EXPORT_SYMBOL_GPL(__find_linux_pte);
+
+/* Note due to the way vm flags are laid out, the bits are XWR */
+const pgprot_t protection_map[16] = {
+ [VM_NONE] = PAGE_NONE,
+ [VM_READ] = PAGE_READONLY,
+ [VM_WRITE] = PAGE_COPY,
+ [VM_WRITE | VM_READ] = PAGE_COPY,
+ [VM_EXEC] = PAGE_EXECONLY_X,
+ [VM_EXEC | VM_READ] = PAGE_READONLY_X,
+ [VM_EXEC | VM_WRITE] = PAGE_COPY_X,
+ [VM_EXEC | VM_WRITE | VM_READ] = PAGE_COPY_X,
+ [VM_SHARED] = PAGE_NONE,
+ [VM_SHARED | VM_READ] = PAGE_READONLY,
+ [VM_SHARED | VM_WRITE] = PAGE_SHARED,
+ [VM_SHARED | VM_WRITE | VM_READ] = PAGE_SHARED,
+ [VM_SHARED | VM_EXEC] = PAGE_EXECONLY_X,
+ [VM_SHARED | VM_EXEC | VM_READ] = PAGE_READONLY_X,
+ [VM_SHARED | VM_EXEC | VM_WRITE] = PAGE_SHARED_X,
+ [VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = PAGE_SHARED_X
+};
+
+#ifndef CONFIG_PPC_BOOK3S_64
+DECLARE_VM_GET_PAGE_PROT
+#endif
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index cf11342bf519..0c9ef705803e 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* This file contains the routines setting up the linux page tables.
* -- paulus
@@ -11,12 +12,6 @@
*
* Derived from "arch/i386/mm/init.c"
* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
#include <linux/kernel.h>
@@ -28,276 +23,68 @@
#include <linux/highmem.h>
#include <linux/memblock.h>
#include <linux/slab.h>
+#include <linux/set_memory.h>
-#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/fixmap.h>
-#include <asm/io.h>
#include <asm/setup.h>
+#include <asm/sections.h>
+#include <asm/early_ioremap.h>
-#include "mmu_decl.h"
-
-unsigned long ioremap_base;
-unsigned long ioremap_bot;
-EXPORT_SYMBOL(ioremap_bot); /* aka VMALLOC_END */
-
-#ifdef CONFIG_6xx
-#define HAVE_BATS 1
-#endif
-
-#if defined(CONFIG_FSL_BOOKE)
-#define HAVE_TLBCAM 1
-#endif
-
-extern char etext[], _stext[];
-
-#ifdef HAVE_BATS
-extern phys_addr_t v_mapped_by_bats(unsigned long va);
-extern unsigned long p_mapped_by_bats(phys_addr_t pa);
-void setbat(int index, unsigned long virt, phys_addr_t phys,
- unsigned int size, int flags);
-
-#else /* !HAVE_BATS */
-#define v_mapped_by_bats(x) (0UL)
-#define p_mapped_by_bats(x) (0UL)
-#endif /* HAVE_BATS */
-
-#ifdef HAVE_TLBCAM
-extern unsigned int tlbcam_index;
-extern phys_addr_t v_mapped_by_tlbcam(unsigned long va);
-extern unsigned long p_mapped_by_tlbcam(phys_addr_t pa);
-#else /* !HAVE_TLBCAM */
-#define v_mapped_by_tlbcam(x) (0UL)
-#define p_mapped_by_tlbcam(x) (0UL)
-#endif /* HAVE_TLBCAM */
-
-#define PGDIR_ORDER (32 + PGD_T_LOG2 - PGDIR_SHIFT)
+#include <mm/mmu_decl.h>
-pgd_t *pgd_alloc(struct mm_struct *mm)
-{
- pgd_t *ret;
-
- /* pgdir take page or two with 4K pages and a page fraction otherwise */
-#ifndef CONFIG_PPC_4K_PAGES
- ret = kzalloc(1 << PGDIR_ORDER, GFP_KERNEL);
-#else
- ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
- PGDIR_ORDER - PAGE_SHIFT);
-#endif
- return ret;
-}
-
-void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
-#ifndef CONFIG_PPC_4K_PAGES
- kfree((void *)pgd);
-#else
- free_pages((unsigned long)pgd, PGDIR_ORDER - PAGE_SHIFT);
-#endif
-}
-
-__init_refok pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
-{
- pte_t *pte;
- extern int mem_init_done;
- extern void *early_get_page(void);
-
- if (mem_init_done) {
- pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
- } else {
- pte = (pte_t *)early_get_page();
- if (pte)
- clear_page(pte);
- }
- return pte;
-}
+static u8 early_fixmap_pagetable[FIXMAP_PTE_SIZE] __page_aligned_data;
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+notrace void __init early_ioremap_init(void)
{
- struct page *ptepage;
-
- gfp_t flags = GFP_KERNEL | __GFP_REPEAT | __GFP_ZERO;
-
- ptepage = alloc_pages(flags, 0);
- if (!ptepage)
- return NULL;
- if (!pgtable_page_ctor(ptepage)) {
- __free_page(ptepage);
- return NULL;
- }
- return ptepage;
-}
+ unsigned long addr = ALIGN_DOWN(FIXADDR_START, PGDIR_SIZE);
+ pte_t *ptep = (pte_t *)early_fixmap_pagetable;
+ pmd_t *pmdp = pmd_off_k(addr);
-void __iomem *
-ioremap(phys_addr_t addr, unsigned long size)
-{
- return __ioremap_caller(addr, size, _PAGE_NO_CACHE | _PAGE_GUARDED,
- __builtin_return_address(0));
-}
-EXPORT_SYMBOL(ioremap);
+ for (; (s32)(FIXADDR_TOP - addr) > 0;
+ addr += PGDIR_SIZE, ptep += PTRS_PER_PTE, pmdp++)
+ pmd_populate_kernel(&init_mm, pmdp, ptep);
-void __iomem *
-ioremap_wc(phys_addr_t addr, unsigned long size)
-{
- return __ioremap_caller(addr, size, _PAGE_NO_CACHE,
- __builtin_return_address(0));
+ early_ioremap_setup();
}
-EXPORT_SYMBOL(ioremap_wc);
-void __iomem *
-ioremap_prot(phys_addr_t addr, unsigned long size, unsigned long flags)
+void __init *early_alloc_pgtable(unsigned long size)
{
- /* writeable implies dirty for kernel addresses */
- if (flags & _PAGE_RW)
- flags |= _PAGE_DIRTY | _PAGE_HWWRITE;
-
- /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */
- flags &= ~(_PAGE_USER | _PAGE_EXEC);
-
-#ifdef _PAGE_BAP_SR
- /* _PAGE_USER contains _PAGE_BAP_SR on BookE using the new PTE format
- * which means that we just cleared supervisor access... oops ;-) This
- * restores it
- */
- flags |= _PAGE_BAP_SR;
-#endif
+ return memblock_alloc_or_panic(size, size);
- return __ioremap_caller(addr, size, flags, __builtin_return_address(0));
}
-EXPORT_SYMBOL(ioremap_prot);
-void __iomem *
-__ioremap(phys_addr_t addr, unsigned long size, unsigned long flags)
+pte_t __init *early_pte_alloc_kernel(pmd_t *pmdp, unsigned long va)
{
- return __ioremap_caller(addr, size, flags, __builtin_return_address(0));
-}
-
-void __iomem *
-__ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags,
- void *caller)
-{
- unsigned long v, i;
- phys_addr_t p;
- int err;
-
- /* Make sure we have the base flags */
- if ((flags & _PAGE_PRESENT) == 0)
- flags |= PAGE_KERNEL;
-
- /* Non-cacheable page cannot be coherent */
- if (flags & _PAGE_NO_CACHE)
- flags &= ~_PAGE_COHERENT;
-
- /*
- * Choose an address to map it to.
- * Once the vmalloc system is running, we use it.
- * Before then, we use space going down from ioremap_base
- * (ioremap_bot records where we're up to).
- */
- p = addr & PAGE_MASK;
- size = PAGE_ALIGN(addr + size) - p;
-
- /*
- * If the address lies within the first 16 MB, assume it's in ISA
- * memory space
- */
- if (p < 16*1024*1024)
- p += _ISA_MEM_BASE;
-
-#ifndef CONFIG_CRASH_DUMP
- /*
- * Don't allow anybody to remap normal RAM that we're using.
- * mem_init() sets high_memory so only do the check after that.
- */
- if (mem_init_done && (p < virt_to_phys(high_memory)) &&
- !(__allow_ioremap_reserved && memblock_is_region_reserved(p, size))) {
- printk("__ioremap(): phys addr 0x%llx is RAM lr %pf\n",
- (unsigned long long)p, __builtin_return_address(0));
- return NULL;
- }
-#endif
-
- if (size == 0)
- return NULL;
-
- /*
- * Is it already mapped? Perhaps overlapped by a previous
- * BAT mapping. If the whole area is mapped then we're done,
- * otherwise remap it since we want to keep the virt addrs for
- * each request contiguous.
- *
- * We make the assumption here that if the bottom and top
- * of the range we want are mapped then it's mapped to the
- * same virt address (and this is contiguous).
- * -- Cort
- */
- if ((v = p_mapped_by_bats(p)) /*&& p_mapped_by_bats(p+size-1)*/ )
- goto out;
-
- if ((v = p_mapped_by_tlbcam(p)))
- goto out;
-
- if (mem_init_done) {
- struct vm_struct *area;
- area = get_vm_area_caller(size, VM_IOREMAP, caller);
- if (area == 0)
- return NULL;
- area->phys_addr = p;
- v = (unsigned long) area->addr;
- } else {
- v = (ioremap_bot -= size);
- }
-
- /*
- * Should check if it is a candidate for a BAT mapping
- */
+ if (pmd_none(*pmdp)) {
+ pte_t *ptep = early_alloc_pgtable(PTE_FRAG_SIZE);
- err = 0;
- for (i = 0; i < size && err == 0; i += PAGE_SIZE)
- err = map_page(v+i, p+i, flags);
- if (err) {
- if (mem_init_done)
- vunmap((void *)v);
- return NULL;
+ pmd_populate_kernel(&init_mm, pmdp, ptep);
}
-
-out:
- return (void __iomem *) (v + ((unsigned long)addr & ~PAGE_MASK));
+ return pte_offset_kernel(pmdp, va);
}
-EXPORT_SYMBOL(__ioremap);
-
-void iounmap(volatile void __iomem *addr)
-{
- /*
- * If mapped by BATs then there is nothing to do.
- * Calling vfree() generates a benign warning.
- */
- if (v_mapped_by_bats((unsigned long)addr)) return;
- if (addr > high_memory && (unsigned long) addr < ioremap_bot)
- vunmap((void *) (PAGE_MASK & (unsigned long)addr));
-}
-EXPORT_SYMBOL(iounmap);
-int map_page(unsigned long va, phys_addr_t pa, int flags)
+int __ref map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot)
{
pmd_t *pd;
pte_t *pg;
int err = -ENOMEM;
/* Use upper 10 bits of VA to index the first level map */
- pd = pmd_offset(pud_offset(pgd_offset_k(va), va), va);
+ pd = pmd_off_k(va);
/* Use middle 10 bits of VA to index the second-level map */
- pg = pte_alloc_kernel(pd, va);
- if (pg != 0) {
+ if (likely(slab_is_available()))
+ pg = pte_alloc_kernel(pd, va);
+ else
+ pg = early_pte_alloc_kernel(pd, va);
+ if (pg) {
err = 0;
/* The PTE should never be already set nor present in the
* hash table
*/
- BUG_ON((pte_val(*pg) & (_PAGE_PRESENT | _PAGE_HASHPTE)) &&
- flags);
- set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT,
- __pgprot(flags)));
+ BUG_ON((pte_present(*pg) | pte_hashpte(*pg)) && pgprot_val(prot));
+ set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT, prot));
}
smp_wmb();
return err;
@@ -306,23 +93,18 @@ int map_page(unsigned long va, phys_addr_t pa, int flags)
/*
* Map in a chunk of physical memory starting at start.
*/
-void __init __mapin_ram_chunk(unsigned long offset, unsigned long top)
+static void __init __mapin_ram_chunk(unsigned long offset, unsigned long top)
{
- unsigned long v, s, f;
+ unsigned long v, s;
phys_addr_t p;
- int ktext;
+ bool ktext;
s = offset;
v = PAGE_OFFSET + s;
p = memstart_addr + s;
for (; s < top; s += PAGE_SIZE) {
- ktext = ((char *) v >= _stext && (char *) v < etext);
- f = ktext ? PAGE_KERNEL_TEXT : PAGE_KERNEL;
- map_page(v, p, f);
-#ifdef CONFIG_PPC_STD_MMU_32
- if (ktext)
- hash_preload(&init_mm, v, 0, 0x300);
-#endif
+ ktext = core_kernel_text(v);
+ map_kernel_page(v, p, ktext ? PAGE_KERNEL_X : PAGE_KERNEL);
v += PAGE_SIZE;
p += PAGE_SIZE;
}
@@ -330,131 +112,71 @@ void __init __mapin_ram_chunk(unsigned long offset, unsigned long top)
void __init mapin_ram(void)
{
- unsigned long s, top;
+ phys_addr_t base, end;
+ u64 i;
-#ifndef CONFIG_WII
- top = total_lowmem;
- s = mmu_mapin_ram(top);
- __mapin_ram_chunk(s, top);
-#else
- if (!wii_hole_size) {
- s = mmu_mapin_ram(total_lowmem);
- __mapin_ram_chunk(s, total_lowmem);
- } else {
- top = wii_hole_start;
- s = mmu_mapin_ram(top);
- __mapin_ram_chunk(s, top);
+ for_each_mem_range(i, &base, &end) {
+ phys_addr_t top = min(end, total_lowmem);
- top = memblock_end_of_DRAM();
- s = wii_mmu_mapin_mem2(top);
- __mapin_ram_chunk(s, top);
+ if (base >= top)
+ continue;
+ base = mmu_mapin_ram(base, top);
+ __mapin_ram_chunk(base, top);
}
-#endif
}
-/* Scan the real Linux page tables and return a PTE pointer for
- * a virtual address in a context.
- * Returns true (1) if PTE was found, zero otherwise. The pointer to
- * the PTE pointer is unmodified if PTE is not found.
- */
-int
-get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep, pmd_t **pmdp)
-{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
- pte_t *pte;
- int retval = 0;
-
- pgd = pgd_offset(mm, addr & PAGE_MASK);
- if (pgd) {
- pud = pud_offset(pgd, addr & PAGE_MASK);
- if (pud && pud_present(*pud)) {
- pmd = pmd_offset(pud, addr & PAGE_MASK);
- if (pmd_present(*pmd)) {
- pte = pte_offset_map(pmd, addr & PAGE_MASK);
- if (pte) {
- retval = 1;
- *ptep = pte;
- if (pmdp)
- *pmdp = pmd;
- /* XXX caller needs to do pte_unmap, yuck */
- }
- }
- }
- }
- return(retval);
-}
-
-#ifdef CONFIG_DEBUG_PAGEALLOC
-
-static int __change_page_attr(struct page *page, pgprot_t prot)
+static int __mark_initmem_nx(void)
{
- pte_t *kpte;
- pmd_t *kpmd;
- unsigned long address;
-
- BUG_ON(PageHighMem(page));
- address = (unsigned long)page_address(page);
-
- if (v_mapped_by_bats(address) || v_mapped_by_tlbcam(address))
- return 0;
- if (!get_pteptr(&init_mm, address, &kpte, &kpmd))
- return -EINVAL;
- __set_pte_at(&init_mm, address, kpte, mk_pte(page, prot), 0);
- wmb();
- flush_tlb_page(NULL, address);
- pte_unmap(kpte);
-
- return 0;
-}
+ unsigned long numpages = PFN_UP((unsigned long)_einittext) -
+ PFN_DOWN((unsigned long)_sinittext);
+ int err;
-/*
- * Change the page attributes of an page in the linear mapping.
- *
- * THIS CONFLICTS WITH BAT MAPPINGS, DEBUG USE ONLY
- */
-static int change_page_attr(struct page *page, int numpages, pgprot_t prot)
-{
- int i, err = 0;
- unsigned long flags;
+ err = mmu_mark_initmem_nx();
- local_irq_save(flags);
- for (i = 0; i < numpages; i++, page++) {
- err = __change_page_attr(page, prot);
+ if (!v_block_mapped((unsigned long)_sinittext)) {
+ err = set_memory_nx((unsigned long)_sinittext, numpages);
if (err)
- break;
+ return err;
+ err = set_memory_rw((unsigned long)_sinittext, numpages);
}
- local_irq_restore(flags);
return err;
}
-
-void kernel_map_pages(struct page *page, int numpages, int enable)
+void mark_initmem_nx(void)
{
- if (PageHighMem(page))
- return;
+ int err = __mark_initmem_nx();
- change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
+ if (err)
+ panic("%s() failed, err = %d\n", __func__, err);
}
-#endif /* CONFIG_DEBUG_PAGEALLOC */
-
-static int fixmaps;
-void __set_fixmap (enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags)
+#ifdef CONFIG_STRICT_KERNEL_RWX
+static int __mark_rodata_ro(void)
{
- unsigned long address = __fix_to_virt(idx);
+ unsigned long numpages;
- if (idx >= __end_of_fixed_addresses) {
- BUG();
- return;
- }
+ if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX) && mmu_has_feature(MMU_FTR_HPTE_TABLE))
+ pr_warn("This platform has HASH MMU, STRICT_MODULE_RWX won't work\n");
- map_page(address, phys, pgprot_val(flags));
- fixmaps++;
+ if (v_block_mapped((unsigned long)_stext + 1))
+ return mmu_mark_rodata_ro();
+
+ /*
+ * mark text and rodata as read only. __end_rodata is set by
+ * powerpc's linker script and includes tables and data
+ * requiring relocation which are not put in RO_DATA.
+ */
+ numpages = PFN_UP((unsigned long)__end_rodata) -
+ PFN_DOWN((unsigned long)_stext);
+
+ return set_memory_ro((unsigned long)_stext, numpages);
}
-void __this_fixmap_does_not_exist(void)
+void mark_rodata_ro(void)
{
- WARN_ON(1);
+ int err = __mark_rodata_ro();
+
+ if (err)
+ panic("%s() failed, err = %d\n", __func__, err);
}
+#endif
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index c8d709ab489d..6621cfc3baf8 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -1,5 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
- * This file contains ioremap and related functions for 64-bit machines.
+ * This file contains pgtable related functions for 64-bit machines.
*
* Derived from arch/ppc64/mm/init.c
* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
@@ -13,12 +14,6 @@
*
* Dave Engebretsen <engebret@us.ibm.com>
* Rework for PPC64 port.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
#include <linux/signal.h>
@@ -33,16 +28,11 @@
#include <linux/swap.h>
#include <linux/stddef.h>
#include <linux/vmalloc.h>
-#include <linux/bootmem.h>
-#include <linux/memblock.h>
#include <linux/slab.h>
+#include <linux/hugetlb.h>
-#include <asm/pgalloc.h>
#include <asm/page.h>
-#include <asm/prom.h>
-#include <asm/io.h>
#include <asm/mmu_context.h>
-#include <asm/pgtable.h>
#include <asm/mmu.h>
#include <asm/smp.h>
#include <asm/machdep.h>
@@ -51,854 +41,122 @@
#include <asm/cputable.h>
#include <asm/sections.h>
#include <asm/firmware.h>
+#include <asm/dma.h>
-#include "mmu_decl.h"
-
-#define CREATE_TRACE_POINTS
-#include <trace/events/thp.h>
-
-/* Some sanity checking */
-#if TASK_SIZE_USER64 > PGTABLE_RANGE
-#error TASK_SIZE_USER64 exceeds pagetable range
-#endif
-
-#ifdef CONFIG_PPC_STD_MMU_64
-#if TASK_SIZE_USER64 > (1UL << (ESID_BITS + SID_SHIFT))
-#error TASK_SIZE_USER64 exceeds user VSID range
-#endif
-#endif
-
-unsigned long ioremap_bot = IOREMAP_BASE;
-
-#ifdef CONFIG_PPC_MMU_NOHASH
-static __ref void *early_alloc_pgtable(unsigned long size)
-{
- void *pt;
+#include <mm/mmu_decl.h>
- if (init_bootmem_done)
- pt = __alloc_bootmem(size, size, __pa(MAX_DMA_ADDRESS));
- else
- pt = __va(memblock_alloc_base(size, size,
- __pa(MAX_DMA_ADDRESS)));
- memset(pt, 0, size);
-
- return pt;
-}
-#endif /* CONFIG_PPC_MMU_NOHASH */
+#ifdef CONFIG_PPC_BOOK3S_64
/*
- * map_kernel_page currently only called by __ioremap
- * map_kernel_page adds an entry to the ioremap page table
- * and adds an entry to the HPT, possibly bolting it
+ * partition table and process table for ISA 3.0
*/
-int map_kernel_page(unsigned long ea, unsigned long pa, int flags)
-{
- pgd_t *pgdp;
- pud_t *pudp;
- pmd_t *pmdp;
- pte_t *ptep;
-
- if (slab_is_available()) {
- pgdp = pgd_offset_k(ea);
- pudp = pud_alloc(&init_mm, pgdp, ea);
- if (!pudp)
- return -ENOMEM;
- pmdp = pmd_alloc(&init_mm, pudp, ea);
- if (!pmdp)
- return -ENOMEM;
- ptep = pte_alloc_kernel(pmdp, ea);
- if (!ptep)
- return -ENOMEM;
- set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
- __pgprot(flags)));
- } else {
-#ifdef CONFIG_PPC_MMU_NOHASH
- /* Warning ! This will blow up if bootmem is not initialized
- * which our ppc64 code is keen to do that, we'll need to
- * fix it and/or be more careful
- */
- pgdp = pgd_offset_k(ea);
-#ifdef PUD_TABLE_SIZE
- if (pgd_none(*pgdp)) {
- pudp = early_alloc_pgtable(PUD_TABLE_SIZE);
- BUG_ON(pudp == NULL);
- pgd_populate(&init_mm, pgdp, pudp);
- }
-#endif /* PUD_TABLE_SIZE */
- pudp = pud_offset(pgdp, ea);
- if (pud_none(*pudp)) {
- pmdp = early_alloc_pgtable(PMD_TABLE_SIZE);
- BUG_ON(pmdp == NULL);
- pud_populate(&init_mm, pudp, pmdp);
- }
- pmdp = pmd_offset(pudp, ea);
- if (!pmd_present(*pmdp)) {
- ptep = early_alloc_pgtable(PAGE_SIZE);
- BUG_ON(ptep == NULL);
- pmd_populate_kernel(&init_mm, pmdp, ptep);
- }
- ptep = pte_offset_kernel(pmdp, ea);
- set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
- __pgprot(flags)));
-#else /* CONFIG_PPC_MMU_NOHASH */
- /*
- * If the mm subsystem is not fully up, we cannot create a
- * linux page table entry for this mapping. Simply bolt an
- * entry in the hardware page table.
- *
- */
- if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, flags,
- mmu_io_psize, mmu_kernel_ssize)) {
- printk(KERN_ERR "Failed to do bolted mapping IO "
- "memory at %016lx !\n", pa);
- return -ENOMEM;
- }
-#endif /* !CONFIG_PPC_MMU_NOHASH */
- }
-
-#ifdef CONFIG_PPC_BOOK3E_64
- /*
- * With hardware tablewalk, a sync is needed to ensure that
- * subsequent accesses see the PTE we just wrote. Unlike userspace
- * mappings, we can't tolerate spurious faults, so make sure
- * the new PTE will be seen the first time.
- */
- mb();
-#else
- smp_wmb();
-#endif
- return 0;
-}
-
-
-/**
- * __ioremap_at - Low level function to establish the page tables
- * for an IO mapping
- */
-void __iomem * __ioremap_at(phys_addr_t pa, void *ea, unsigned long size,
- unsigned long flags)
-{
- unsigned long i;
-
- /* Make sure we have the base flags */
- if ((flags & _PAGE_PRESENT) == 0)
- flags |= pgprot_val(PAGE_KERNEL);
-
- /* Non-cacheable page cannot be coherent */
- if (flags & _PAGE_NO_CACHE)
- flags &= ~_PAGE_COHERENT;
-
- /* We don't support the 4K PFN hack with ioremap */
- if (flags & _PAGE_4K_PFN)
- return NULL;
-
- WARN_ON(pa & ~PAGE_MASK);
- WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
- WARN_ON(size & ~PAGE_MASK);
-
- for (i = 0; i < size; i += PAGE_SIZE)
- if (map_kernel_page((unsigned long)ea+i, pa+i, flags))
- return NULL;
-
- return (void __iomem *)ea;
-}
-
-/**
- * __iounmap_from - Low level function to tear down the page tables
- * for an IO mapping. This is used for mappings that
- * are manipulated manually, like partial unmapping of
- * PCI IOs or ISA space.
+struct prtb_entry *process_tb;
+struct patb_entry *partition_tb;
+/*
+ * page table size
*/
-void __iounmap_at(void *ea, unsigned long size)
-{
- WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
- WARN_ON(size & ~PAGE_MASK);
-
- unmap_kernel_range((unsigned long)ea, size);
-}
+unsigned long __pte_index_size;
+EXPORT_SYMBOL(__pte_index_size);
+unsigned long __pmd_index_size;
+EXPORT_SYMBOL(__pmd_index_size);
+unsigned long __pud_index_size;
+EXPORT_SYMBOL(__pud_index_size);
+unsigned long __pgd_index_size;
+EXPORT_SYMBOL(__pgd_index_size);
+unsigned long __pud_cache_index;
+EXPORT_SYMBOL(__pud_cache_index);
+unsigned long __pte_table_size;
+EXPORT_SYMBOL(__pte_table_size);
+unsigned long __pmd_table_size;
+EXPORT_SYMBOL(__pmd_table_size);
+unsigned long __pud_table_size;
+EXPORT_SYMBOL(__pud_table_size);
+unsigned long __pgd_table_size;
+EXPORT_SYMBOL(__pgd_table_size);
+unsigned long __pmd_val_bits;
+EXPORT_SYMBOL(__pmd_val_bits);
+unsigned long __pud_val_bits;
+EXPORT_SYMBOL(__pud_val_bits);
+unsigned long __pgd_val_bits;
+EXPORT_SYMBOL(__pgd_val_bits);
+unsigned long __kernel_virt_start;
+EXPORT_SYMBOL(__kernel_virt_start);
+unsigned long __vmalloc_start;
+EXPORT_SYMBOL(__vmalloc_start);
+unsigned long __vmalloc_end;
+EXPORT_SYMBOL(__vmalloc_end);
+unsigned long __kernel_io_start;
+EXPORT_SYMBOL(__kernel_io_start);
+unsigned long __kernel_io_end;
+struct page *vmemmap;
+EXPORT_SYMBOL(vmemmap);
+unsigned long __pte_frag_nr;
+EXPORT_SYMBOL(__pte_frag_nr);
+unsigned long __pte_frag_size_shift;
+EXPORT_SYMBOL(__pte_frag_size_shift);
+#endif
-void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size,
- unsigned long flags, void *caller)
+#ifndef __PAGETABLE_PUD_FOLDED
+/* 4 level page table */
+struct page *p4d_page(p4d_t p4d)
{
- phys_addr_t paligned;
- void __iomem *ret;
-
- /*
- * Choose an address to map it to.
- * Once the imalloc system is running, we use it.
- * Before that, we map using addresses going
- * up from ioremap_bot. imalloc will use
- * the addresses from ioremap_bot through
- * IMALLOC_END
- *
- */
- paligned = addr & PAGE_MASK;
- size = PAGE_ALIGN(addr + size) - paligned;
-
- if ((size == 0) || (paligned == 0))
- return NULL;
-
- if (mem_init_done) {
- struct vm_struct *area;
-
- area = __get_vm_area_caller(size, VM_IOREMAP,
- ioremap_bot, IOREMAP_END,
- caller);
- if (area == NULL)
- return NULL;
-
- area->phys_addr = paligned;
- ret = __ioremap_at(paligned, area->addr, size, flags);
- if (!ret)
- vunmap(area->addr);
- } else {
- ret = __ioremap_at(paligned, (void *)ioremap_bot, size, flags);
- if (ret)
- ioremap_bot += size;
+ if (p4d_leaf(p4d)) {
+ if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP))
+ VM_WARN_ON(!p4d_leaf(p4d));
+ return pte_page(p4d_pte(p4d));
}
-
- if (ret)
- ret += addr & ~PAGE_MASK;
- return ret;
-}
-
-void __iomem * __ioremap(phys_addr_t addr, unsigned long size,
- unsigned long flags)
-{
- return __ioremap_caller(addr, size, flags, __builtin_return_address(0));
-}
-
-void __iomem * ioremap(phys_addr_t addr, unsigned long size)
-{
- unsigned long flags = _PAGE_NO_CACHE | _PAGE_GUARDED;
- void *caller = __builtin_return_address(0);
-
- if (ppc_md.ioremap)
- return ppc_md.ioremap(addr, size, flags, caller);
- return __ioremap_caller(addr, size, flags, caller);
-}
-
-void __iomem * ioremap_wc(phys_addr_t addr, unsigned long size)
-{
- unsigned long flags = _PAGE_NO_CACHE;
- void *caller = __builtin_return_address(0);
-
- if (ppc_md.ioremap)
- return ppc_md.ioremap(addr, size, flags, caller);
- return __ioremap_caller(addr, size, flags, caller);
+ return virt_to_page(p4d_pgtable(p4d));
}
-
-void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size,
- unsigned long flags)
-{
- void *caller = __builtin_return_address(0);
-
- /* writeable implies dirty for kernel addresses */
- if (flags & _PAGE_RW)
- flags |= _PAGE_DIRTY;
-
- /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */
- flags &= ~(_PAGE_USER | _PAGE_EXEC);
-
-#ifdef _PAGE_BAP_SR
- /* _PAGE_USER contains _PAGE_BAP_SR on BookE using the new PTE format
- * which means that we just cleared supervisor access... oops ;-) This
- * restores it
- */
- flags |= _PAGE_BAP_SR;
#endif
- if (ppc_md.ioremap)
- return ppc_md.ioremap(addr, size, flags, caller);
- return __ioremap_caller(addr, size, flags, caller);
-}
-
-
-/*
- * Unmap an IO region and remove it from imalloc'd list.
- * Access to IO memory should be serialized by driver.
- */
-void __iounmap(volatile void __iomem *token)
+struct page *pud_page(pud_t pud)
{
- void *addr;
-
- if (!mem_init_done)
- return;
-
- addr = (void *) ((unsigned long __force)
- PCI_FIX_ADDR(token) & PAGE_MASK);
- if ((unsigned long)addr < ioremap_bot) {
- printk(KERN_WARNING "Attempt to iounmap early bolted mapping"
- " at 0x%p\n", addr);
- return;
+ if (pud_leaf(pud)) {
+ if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP))
+ VM_WARN_ON(!pud_leaf(pud));
+ return pte_page(pud_pte(pud));
}
- vunmap(addr);
+ return virt_to_page(pud_pgtable(pud));
}
-void iounmap(volatile void __iomem *token)
-{
- if (ppc_md.iounmap)
- ppc_md.iounmap(token);
- else
- __iounmap(token);
-}
-
-EXPORT_SYMBOL(ioremap);
-EXPORT_SYMBOL(ioremap_wc);
-EXPORT_SYMBOL(ioremap_prot);
-EXPORT_SYMBOL(__ioremap);
-EXPORT_SYMBOL(__ioremap_at);
-EXPORT_SYMBOL(iounmap);
-EXPORT_SYMBOL(__iounmap);
-EXPORT_SYMBOL(__iounmap_at);
-
/*
* For hugepage we have pfn in the pmd, we use PTE_RPN_SHIFT bits for flags
* For PTE page, we have a PTE_FRAG_SIZE (4K) aligned virtual address.
*/
struct page *pmd_page(pmd_t pmd)
{
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- if (pmd_trans_huge(pmd))
- return pfn_to_page(pmd_pfn(pmd));
-#endif
- return virt_to_page(pmd_page_vaddr(pmd));
-}
-
-#ifdef CONFIG_PPC_64K_PAGES
-static pte_t *get_from_cache(struct mm_struct *mm)
-{
- void *pte_frag, *ret;
-
- spin_lock(&mm->page_table_lock);
- ret = mm->context.pte_frag;
- if (ret) {
- pte_frag = ret + PTE_FRAG_SIZE;
- /*
- * If we have taken up all the fragments mark PTE page NULL
- */
- if (((unsigned long)pte_frag & ~PAGE_MASK) == 0)
- pte_frag = NULL;
- mm->context.pte_frag = pte_frag;
- }
- spin_unlock(&mm->page_table_lock);
- return (pte_t *)ret;
-}
-
-static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel)
-{
- void *ret = NULL;
- struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK |
- __GFP_REPEAT | __GFP_ZERO);
- if (!page)
- return NULL;
- if (!kernel && !pgtable_page_ctor(page)) {
- __free_page(page);
- return NULL;
- }
-
- ret = page_address(page);
- spin_lock(&mm->page_table_lock);
- /*
- * If we find pgtable_page set, we return
- * the allocated page with single fragement
- * count.
- */
- if (likely(!mm->context.pte_frag)) {
- atomic_set(&page->_count, PTE_FRAG_NR);
- mm->context.pte_frag = ret + PTE_FRAG_SIZE;
- }
- spin_unlock(&mm->page_table_lock);
-
- return (pte_t *)ret;
-}
-
-pte_t *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr, int kernel)
-{
- pte_t *pte;
-
- pte = get_from_cache(mm);
- if (pte)
- return pte;
-
- return __alloc_for_cache(mm, kernel);
-}
-
-void page_table_free(struct mm_struct *mm, unsigned long *table, int kernel)
-{
- struct page *page = virt_to_page(table);
- if (put_page_testzero(page)) {
- if (!kernel)
- pgtable_page_dtor(page);
- free_hot_cold_page(page, 0);
- }
-}
-
-#ifdef CONFIG_SMP
-static void page_table_free_rcu(void *table)
-{
- struct page *page = virt_to_page(table);
- if (put_page_testzero(page)) {
- pgtable_page_dtor(page);
- free_hot_cold_page(page, 0);
- }
-}
-
-void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
-{
- unsigned long pgf = (unsigned long)table;
-
- BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
- pgf |= shift;
- tlb_remove_table(tlb, (void *)pgf);
-}
-
-void __tlb_remove_table(void *_table)
-{
- void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
- unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
-
- if (!shift)
- /* PTE page needs special handling */
- page_table_free_rcu(table);
- else {
- BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
- kmem_cache_free(PGT_CACHE(shift), table);
- }
-}
-#else
-void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
-{
- if (!shift) {
- /* PTE page needs special handling */
- struct page *page = virt_to_page(table);
- if (put_page_testzero(page)) {
- pgtable_page_dtor(page);
- free_hot_cold_page(page, 0);
- }
- } else {
- BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
- kmem_cache_free(PGT_CACHE(shift), table);
- }
-}
-#endif
-#endif /* CONFIG_PPC_64K_PAGES */
-
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-
-/*
- * This is called when relaxing access to a hugepage. It's also called in the page
- * fault path when we don't hit any of the major fault cases, ie, a minor
- * update of _PAGE_ACCESSED, _PAGE_DIRTY, etc... The generic code will have
- * handled those two for us, we additionally deal with missing execute
- * permission here on some processors
- */
-int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
- pmd_t *pmdp, pmd_t entry, int dirty)
-{
- int changed;
-#ifdef CONFIG_DEBUG_VM
- WARN_ON(!pmd_trans_huge(*pmdp));
- assert_spin_locked(&vma->vm_mm->page_table_lock);
-#endif
- changed = !pmd_same(*(pmdp), entry);
- if (changed) {
- __ptep_set_access_flags(pmdp_ptep(pmdp), pmd_pte(entry));
+ if (pmd_leaf(pmd)) {
/*
- * Since we are not supporting SW TLB systems, we don't
- * have any thing similar to flush_tlb_page_nohash()
+ * vmalloc_to_page may be called on any vmap address (not only
+ * vmalloc), and it uses pmd_page() etc., when huge vmap is
+ * enabled so these checks can't be used.
*/
+ if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP))
+ VM_WARN_ON(!pmd_leaf(pmd));
+ return pte_page(pmd_pte(pmd));
}
- return changed;
-}
-
-unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
- pmd_t *pmdp, unsigned long clr,
- unsigned long set)
-{
-
- unsigned long old, tmp;
-
-#ifdef CONFIG_DEBUG_VM
- WARN_ON(!pmd_trans_huge(*pmdp));
- assert_spin_locked(&mm->page_table_lock);
-#endif
-
-#ifdef PTE_ATOMIC_UPDATES
- __asm__ __volatile__(
- "1: ldarx %0,0,%3\n\
- andi. %1,%0,%6\n\
- bne- 1b \n\
- andc %1,%0,%4 \n\
- or %1,%1,%7\n\
- stdcx. %1,0,%3 \n\
- bne- 1b"
- : "=&r" (old), "=&r" (tmp), "=m" (*pmdp)
- : "r" (pmdp), "r" (clr), "m" (*pmdp), "i" (_PAGE_BUSY), "r" (set)
- : "cc" );
-#else
- old = pmd_val(*pmdp);
- *pmdp = __pmd((old & ~clr) | set);
-#endif
- trace_hugepage_update(addr, old, clr, set);
- if (old & _PAGE_HASHPTE)
- hpte_do_hugepage_flush(mm, addr, pmdp, old);
- return old;
-}
-
-pmd_t pmdp_clear_flush(struct vm_area_struct *vma, unsigned long address,
- pmd_t *pmdp)
-{
- pmd_t pmd;
-
- VM_BUG_ON(address & ~HPAGE_PMD_MASK);
- if (pmd_trans_huge(*pmdp)) {
- pmd = pmdp_get_and_clear(vma->vm_mm, address, pmdp);
- } else {
- /*
- * khugepaged calls this for normal pmd
- */
- pmd = *pmdp;
- pmd_clear(pmdp);
- /*
- * Wait for all pending hash_page to finish. This is needed
- * in case of subpage collapse. When we collapse normal pages
- * to hugepage, we first clear the pmd, then invalidate all
- * the PTE entries. The assumption here is that any low level
- * page fault will see a none pmd and take the slow path that
- * will wait on mmap_sem. But we could very well be in a
- * hash_page with local ptep pointer value. Such a hash page
- * can result in adding new HPTE entries for normal subpages.
- * That means we could be modifying the page content as we
- * copy them to a huge page. So wait for parallel hash_page
- * to finish before invalidating HPTE entries. We can do this
- * by sending an IPI to all the cpus and executing a dummy
- * function there.
- */
- kick_all_cpus_sync();
- /*
- * Now invalidate the hpte entries in the range
- * covered by pmd. This make sure we take a
- * fault and will find the pmd as none, which will
- * result in a major fault which takes mmap_sem and
- * hence wait for collapse to complete. Without this
- * the __collapse_huge_page_copy can result in copying
- * the old content.
- */
- flush_tlb_pmd_range(vma->vm_mm, &pmd, address);
- }
- return pmd;
-}
-
-int pmdp_test_and_clear_young(struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmdp)
-{
- return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
-}
-
-/*
- * We currently remove entries from the hashtable regardless of whether
- * the entry was young or dirty. The generic routines only flush if the
- * entry was young or dirty which is not good enough.
- *
- * We should be more intelligent about this but for the moment we override
- * these functions and force a tlb flush unconditionally
- */
-int pmdp_clear_flush_young(struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmdp)
-{
- return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
-}
-
-/*
- * We mark the pmd splitting and invalidate all the hpte
- * entries for this hugepage.
- */
-void pmdp_splitting_flush(struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmdp)
-{
- unsigned long old, tmp;
-
- VM_BUG_ON(address & ~HPAGE_PMD_MASK);
-
-#ifdef CONFIG_DEBUG_VM
- WARN_ON(!pmd_trans_huge(*pmdp));
- assert_spin_locked(&vma->vm_mm->page_table_lock);
-#endif
-
-#ifdef PTE_ATOMIC_UPDATES
-
- __asm__ __volatile__(
- "1: ldarx %0,0,%3\n\
- andi. %1,%0,%6\n\
- bne- 1b \n\
- ori %1,%0,%4 \n\
- stdcx. %1,0,%3 \n\
- bne- 1b"
- : "=&r" (old), "=&r" (tmp), "=m" (*pmdp)
- : "r" (pmdp), "i" (_PAGE_SPLITTING), "m" (*pmdp), "i" (_PAGE_BUSY)
- : "cc" );
-#else
- old = pmd_val(*pmdp);
- *pmdp = __pmd(old | _PAGE_SPLITTING);
-#endif
- /*
- * If we didn't had the splitting flag set, go and flush the
- * HPTE entries.
- */
- trace_hugepage_splitting(address, old);
- if (!(old & _PAGE_SPLITTING)) {
- /* We need to flush the hpte */
- if (old & _PAGE_HASHPTE)
- hpte_do_hugepage_flush(vma->vm_mm, address, pmdp, old);
- }
- /*
- * This ensures that generic code that rely on IRQ disabling
- * to prevent a parallel THP split work as expected.
- */
- kick_all_cpus_sync();
-}
-
-/*
- * We want to put the pgtable in pmd and use pgtable for tracking
- * the base page size hptes
- */
-void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
- pgtable_t pgtable)
-{
- pgtable_t *pgtable_slot;
- assert_spin_locked(&mm->page_table_lock);
- /*
- * we store the pgtable in the second half of PMD
- */
- pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
- *pgtable_slot = pgtable;
- /*
- * expose the deposited pgtable to other cpus.
- * before we set the hugepage PTE at pmd level
- * hash fault code looks at the deposted pgtable
- * to store hash index values.
- */
- smp_wmb();
-}
-
-pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
-{
- pgtable_t pgtable;
- pgtable_t *pgtable_slot;
-
- assert_spin_locked(&mm->page_table_lock);
- pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
- pgtable = *pgtable_slot;
- /*
- * Once we withdraw, mark the entry NULL.
- */
- *pgtable_slot = NULL;
- /*
- * We store HPTE information in the deposited PTE fragment.
- * zero out the content on withdraw.
- */
- memset(pgtable, 0, PTE_FRAG_SIZE);
- return pgtable;
-}
-
-/*
- * set a new huge pmd. We should not be called for updating
- * an existing pmd entry. That should go via pmd_hugepage_update.
- */
-void set_pmd_at(struct mm_struct *mm, unsigned long addr,
- pmd_t *pmdp, pmd_t pmd)
-{
-#ifdef CONFIG_DEBUG_VM
- WARN_ON(pmd_val(*pmdp) & _PAGE_PRESENT);
- assert_spin_locked(&mm->page_table_lock);
- WARN_ON(!pmd_trans_huge(pmd));
-#endif
- trace_hugepage_set_pmd(addr, pmd);
- return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
-}
-
-void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
- pmd_t *pmdp)
-{
- pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0);
+ return virt_to_page(pmd_page_vaddr(pmd));
}
-/*
- * A linux hugepage PMD was changed and the corresponding hash table entries
- * neesd to be flushed.
- */
-void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
- pmd_t *pmdp, unsigned long old_pmd)
+#ifdef CONFIG_STRICT_KERNEL_RWX
+void mark_rodata_ro(void)
{
- int ssize, i;
- unsigned long s_addr;
- int max_hpte_count;
- unsigned int psize, valid;
- unsigned char *hpte_slot_array;
- unsigned long hidx, vpn, vsid, hash, shift, slot;
-
- /*
- * Flush all the hptes mapping this hugepage
- */
- s_addr = addr & HPAGE_PMD_MASK;
- hpte_slot_array = get_hpte_slot_array(pmdp);
- /*
- * IF we try to do a HUGE PTE update after a withdraw is done.
- * we will find the below NULL. This happens when we do
- * split_huge_page_pmd
- */
- if (!hpte_slot_array)
+ if (!mmu_has_feature(MMU_FTR_KERNEL_RO)) {
+ pr_warn("Warning: Unable to mark rodata read only on this CPU.\n");
return;
-
- /* get the base page size,vsid and segment size */
-#ifdef CONFIG_DEBUG_VM
- psize = get_slice_psize(mm, s_addr);
- BUG_ON(psize == MMU_PAGE_16M);
-#endif
- if (old_pmd & _PAGE_COMBO)
- psize = MMU_PAGE_4K;
- else
- psize = MMU_PAGE_64K;
-
- if (!is_kernel_addr(s_addr)) {
- ssize = user_segment_size(s_addr);
- vsid = get_vsid(mm->context.id, s_addr, ssize);
- WARN_ON(vsid == 0);
- } else {
- vsid = get_kernel_vsid(s_addr, mmu_kernel_ssize);
- ssize = mmu_kernel_ssize;
}
- if (ppc_md.hugepage_invalidate)
- return ppc_md.hugepage_invalidate(vsid, s_addr,
- hpte_slot_array,
- psize, ssize);
- /*
- * No bluk hpte removal support, invalidate each entry
- */
- shift = mmu_psize_defs[psize].shift;
- max_hpte_count = HPAGE_PMD_SIZE >> shift;
- for (i = 0; i < max_hpte_count; i++) {
- /*
- * 8 bits per each hpte entries
- * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit]
- */
- valid = hpte_valid(hpte_slot_array, i);
- if (!valid)
- continue;
- hidx = hpte_hash_index(hpte_slot_array, i);
-
- /* get the vpn */
- addr = s_addr + (i * (1ul << shift));
- vpn = hpt_vpn(addr, vsid, ssize);
- hash = hpt_hash(vpn, shift, ssize);
- if (hidx & _PTEIDX_SECONDARY)
- hash = ~hash;
-
- slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
- slot += hidx & _PTEIDX_GROUP_IX;
- ppc_md.hpte_invalidate(slot, vpn, psize,
- MMU_PAGE_16M, ssize, 0);
- }
-}
-
-static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot)
-{
- pmd_val(pmd) |= pgprot_val(pgprot);
- return pmd;
-}
-
-pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot)
-{
- pmd_t pmd;
- /*
- * For a valid pte, we would have _PAGE_PRESENT or _PAGE_FILE always
- * set. We use this to check THP page at pmd level.
- * leaf pte for huge page, bottom two bits != 00
- */
- pmd_val(pmd) = pfn << PTE_RPN_SHIFT;
- pmd_val(pmd) |= _PAGE_THP_HUGE;
- pmd = pmd_set_protbits(pmd, pgprot);
- return pmd;
-}
-
-pmd_t mk_pmd(struct page *page, pgprot_t pgprot)
-{
- return pfn_pmd(page_to_pfn(page), pgprot);
-}
-
-pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
-{
-
- pmd_val(pmd) &= _HPAGE_CHG_MASK;
- pmd = pmd_set_protbits(pmd, newprot);
- return pmd;
-}
-
-/*
- * This is called at the end of handling a user page fault, when the
- * fault has been handled by updating a HUGE PMD entry in the linux page tables.
- * We use it to preload an HPTE into the hash table corresponding to
- * the updated linux HUGE PMD entry.
- */
-void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
- pmd_t *pmd)
-{
- return;
-}
-
-pmd_t pmdp_get_and_clear(struct mm_struct *mm,
- unsigned long addr, pmd_t *pmdp)
-{
- pmd_t old_pmd;
- pgtable_t pgtable;
- unsigned long old;
- pgtable_t *pgtable_slot;
-
- old = pmd_hugepage_update(mm, addr, pmdp, ~0UL, 0);
- old_pmd = __pmd(old);
- /*
- * We have pmd == none and we are holding page_table_lock.
- * So we can safely go and clear the pgtable hash
- * index info.
- */
- pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
- pgtable = *pgtable_slot;
- /*
- * Let's zero out old valid and hash index details
- * hash fault look at them.
- */
- memset(pgtable, 0, PTE_FRAG_SIZE);
- return old_pmd;
+ if (radix_enabled())
+ radix__mark_rodata_ro();
+ else
+ hash__mark_rodata_ro();
}
-int has_transparent_hugepage(void)
+void mark_initmem_nx(void)
{
- if (!mmu_has_feature(MMU_FTR_16M_PAGE))
- return 0;
- /*
- * We support THP only if PMD_SIZE is 16MB.
- */
- if (mmu_psize_defs[MMU_PAGE_16M].shift != PMD_SHIFT)
- return 0;
- /*
- * We need to make sure that we support 16MB hugepage in a segement
- * with base page size 64K or 4K. We only enable THP with a PAGE_SIZE
- * of 64K.
- */
- /*
- * If we have 64K HPTE, we will be using that by default
- */
- if (mmu_psize_defs[MMU_PAGE_64K].shift &&
- (mmu_psize_defs[MMU_PAGE_64K].penc[MMU_PAGE_16M] == -1))
- return 0;
- /*
- * Ok we only have 4K HPTE
- */
- if (mmu_psize_defs[MMU_PAGE_4K].penc[MMU_PAGE_16M] == -1)
- return 0;
-
- return 1;
+ if (radix_enabled())
+ radix__mark_initmem_nx();
+ else
+ hash__mark_initmem_nx();
}
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+#endif
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
deleted file mode 100644
index 5029dc19b517..000000000000
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ /dev/null
@@ -1,288 +0,0 @@
-/*
- * This file contains the routines for handling the MMU on those
- * PowerPC implementations where the MMU substantially follows the
- * architecture specification. This includes the 6xx, 7xx, 7xxx,
- * and 8260 implementations but excludes the 8xx and 4xx.
- * -- paulus
- *
- * Derived from arch/ppc/mm/init.c:
- * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
- * and Cort Dougan (PReP) (cort@cs.nmt.edu)
- * Copyright (C) 1996 Paul Mackerras
- *
- * Derived from "arch/i386/mm/init.c"
- * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/init.h>
-#include <linux/highmem.h>
-#include <linux/memblock.h>
-
-#include <asm/prom.h>
-#include <asm/mmu.h>
-#include <asm/machdep.h>
-
-#include "mmu_decl.h"
-
-struct hash_pte *Hash, *Hash_end;
-unsigned long Hash_size, Hash_mask;
-unsigned long _SDR1;
-
-struct ppc_bat BATS[8][2]; /* 8 pairs of IBAT, DBAT */
-
-struct batrange { /* stores address ranges mapped by BATs */
- unsigned long start;
- unsigned long limit;
- phys_addr_t phys;
-} bat_addrs[8];
-
-/*
- * Return PA for this VA if it is mapped by a BAT, or 0
- */
-phys_addr_t v_mapped_by_bats(unsigned long va)
-{
- int b;
- for (b = 0; b < 4; ++b)
- if (va >= bat_addrs[b].start && va < bat_addrs[b].limit)
- return bat_addrs[b].phys + (va - bat_addrs[b].start);
- return 0;
-}
-
-/*
- * Return VA for a given PA or 0 if not mapped
- */
-unsigned long p_mapped_by_bats(phys_addr_t pa)
-{
- int b;
- for (b = 0; b < 4; ++b)
- if (pa >= bat_addrs[b].phys
- && pa < (bat_addrs[b].limit-bat_addrs[b].start)
- +bat_addrs[b].phys)
- return bat_addrs[b].start+(pa-bat_addrs[b].phys);
- return 0;
-}
-
-unsigned long __init mmu_mapin_ram(unsigned long top)
-{
- unsigned long tot, bl, done;
- unsigned long max_size = (256<<20);
-
- if (__map_without_bats) {
- printk(KERN_DEBUG "RAM mapped without BATs\n");
- return 0;
- }
-
- /* Set up BAT2 and if necessary BAT3 to cover RAM. */
-
- /* Make sure we don't map a block larger than the
- smallest alignment of the physical address. */
- tot = top;
- for (bl = 128<<10; bl < max_size; bl <<= 1) {
- if (bl * 2 > tot)
- break;
- }
-
- setbat(2, PAGE_OFFSET, 0, bl, PAGE_KERNEL_X);
- done = (unsigned long)bat_addrs[2].limit - PAGE_OFFSET + 1;
- if ((done < tot) && !bat_addrs[3].limit) {
- /* use BAT3 to cover a bit more */
- tot -= done;
- for (bl = 128<<10; bl < max_size; bl <<= 1)
- if (bl * 2 > tot)
- break;
- setbat(3, PAGE_OFFSET+done, done, bl, PAGE_KERNEL_X);
- done = (unsigned long)bat_addrs[3].limit - PAGE_OFFSET + 1;
- }
-
- return done;
-}
-
-/*
- * Set up one of the I/D BAT (block address translation) register pairs.
- * The parameters are not checked; in particular size must be a power
- * of 2 between 128k and 256M.
- */
-void __init setbat(int index, unsigned long virt, phys_addr_t phys,
- unsigned int size, int flags)
-{
- unsigned int bl;
- int wimgxpp;
- struct ppc_bat *bat = BATS[index];
-
- if ((flags & _PAGE_NO_CACHE) ||
- (cpu_has_feature(CPU_FTR_NEED_COHERENT) == 0))
- flags &= ~_PAGE_COHERENT;
-
- bl = (size >> 17) - 1;
- if (PVR_VER(mfspr(SPRN_PVR)) != 1) {
- /* 603, 604, etc. */
- /* Do DBAT first */
- wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE
- | _PAGE_COHERENT | _PAGE_GUARDED);
- wimgxpp |= (flags & _PAGE_RW)? BPP_RW: BPP_RX;
- bat[1].batu = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */
- bat[1].batl = BAT_PHYS_ADDR(phys) | wimgxpp;
- if (flags & _PAGE_USER)
- bat[1].batu |= 1; /* Vp = 1 */
- if (flags & _PAGE_GUARDED) {
- /* G bit must be zero in IBATs */
- bat[0].batu = bat[0].batl = 0;
- } else {
- /* make IBAT same as DBAT */
- bat[0] = bat[1];
- }
- } else {
- /* 601 cpu */
- if (bl > BL_8M)
- bl = BL_8M;
- wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE
- | _PAGE_COHERENT);
- wimgxpp |= (flags & _PAGE_RW)?
- ((flags & _PAGE_USER)? PP_RWRW: PP_RWXX): PP_RXRX;
- bat->batu = virt | wimgxpp | 4; /* Ks=0, Ku=1 */
- bat->batl = phys | bl | 0x40; /* V=1 */
- }
-
- bat_addrs[index].start = virt;
- bat_addrs[index].limit = virt + ((bl + 1) << 17) - 1;
- bat_addrs[index].phys = phys;
-}
-
-/*
- * Preload a translation in the hash table
- */
-void hash_preload(struct mm_struct *mm, unsigned long ea,
- unsigned long access, unsigned long trap)
-{
- pmd_t *pmd;
-
- if (Hash == 0)
- return;
- pmd = pmd_offset(pud_offset(pgd_offset(mm, ea), ea), ea);
- if (!pmd_none(*pmd))
- add_hash_page(mm->context.id, ea, pmd_val(*pmd));
-}
-
-/*
- * Initialize the hash table and patch the instructions in hashtable.S.
- */
-void __init MMU_init_hw(void)
-{
- unsigned int hmask, mb, mb2;
- unsigned int n_hpteg, lg_n_hpteg;
-
- extern unsigned int hash_page_patch_A[];
- extern unsigned int hash_page_patch_B[], hash_page_patch_C[];
- extern unsigned int hash_page[];
- extern unsigned int flush_hash_patch_A[], flush_hash_patch_B[];
-
- if (!mmu_has_feature(MMU_FTR_HPTE_TABLE)) {
- /*
- * Put a blr (procedure return) instruction at the
- * start of hash_page, since we can still get DSI
- * exceptions on a 603.
- */
- hash_page[0] = 0x4e800020;
- flush_icache_range((unsigned long) &hash_page[0],
- (unsigned long) &hash_page[1]);
- return;
- }
-
- if ( ppc_md.progress ) ppc_md.progress("hash:enter", 0x105);
-
-#define LG_HPTEG_SIZE 6 /* 64 bytes per HPTEG */
-#define SDR1_LOW_BITS ((n_hpteg - 1) >> 10)
-#define MIN_N_HPTEG 1024 /* min 64kB hash table */
-
- /*
- * Allow 1 HPTE (1/8 HPTEG) for each page of memory.
- * This is less than the recommended amount, but then
- * Linux ain't AIX.
- */
- n_hpteg = total_memory / (PAGE_SIZE * 8);
- if (n_hpteg < MIN_N_HPTEG)
- n_hpteg = MIN_N_HPTEG;
- lg_n_hpteg = __ilog2(n_hpteg);
- if (n_hpteg & (n_hpteg - 1)) {
- ++lg_n_hpteg; /* round up if not power of 2 */
- n_hpteg = 1 << lg_n_hpteg;
- }
- Hash_size = n_hpteg << LG_HPTEG_SIZE;
-
- /*
- * Find some memory for the hash table.
- */
- if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322);
- Hash = __va(memblock_alloc(Hash_size, Hash_size));
- cacheable_memzero(Hash, Hash_size);
- _SDR1 = __pa(Hash) | SDR1_LOW_BITS;
-
- Hash_end = (struct hash_pte *) ((unsigned long)Hash + Hash_size);
-
- printk("Total memory = %lldMB; using %ldkB for hash table (at %p)\n",
- (unsigned long long)(total_memory >> 20), Hash_size >> 10, Hash);
-
-
- /*
- * Patch up the instructions in hashtable.S:create_hpte
- */
- if ( ppc_md.progress ) ppc_md.progress("hash:patch", 0x345);
- Hash_mask = n_hpteg - 1;
- hmask = Hash_mask >> (16 - LG_HPTEG_SIZE);
- mb2 = mb = 32 - LG_HPTEG_SIZE - lg_n_hpteg;
- if (lg_n_hpteg > 16)
- mb2 = 16 - LG_HPTEG_SIZE;
-
- hash_page_patch_A[0] = (hash_page_patch_A[0] & ~0xffff)
- | ((unsigned int)(Hash) >> 16);
- hash_page_patch_A[1] = (hash_page_patch_A[1] & ~0x7c0) | (mb << 6);
- hash_page_patch_A[2] = (hash_page_patch_A[2] & ~0x7c0) | (mb2 << 6);
- hash_page_patch_B[0] = (hash_page_patch_B[0] & ~0xffff) | hmask;
- hash_page_patch_C[0] = (hash_page_patch_C[0] & ~0xffff) | hmask;
-
- /*
- * Ensure that the locations we've patched have been written
- * out from the data cache and invalidated in the instruction
- * cache, on those machines with split caches.
- */
- flush_icache_range((unsigned long) &hash_page_patch_A[0],
- (unsigned long) &hash_page_patch_C[1]);
-
- /*
- * Patch up the instructions in hashtable.S:flush_hash_page
- */
- flush_hash_patch_A[0] = (flush_hash_patch_A[0] & ~0xffff)
- | ((unsigned int)(Hash) >> 16);
- flush_hash_patch_A[1] = (flush_hash_patch_A[1] & ~0x7c0) | (mb << 6);
- flush_hash_patch_A[2] = (flush_hash_patch_A[2] & ~0x7c0) | (mb2 << 6);
- flush_hash_patch_B[0] = (flush_hash_patch_B[0] & ~0xffff) | hmask;
- flush_icache_range((unsigned long) &flush_hash_patch_A[0],
- (unsigned long) &flush_hash_patch_B[1]);
-
- if ( ppc_md.progress ) ppc_md.progress("hash:done", 0x205);
-}
-
-void setup_initial_memory_limit(phys_addr_t first_memblock_base,
- phys_addr_t first_memblock_size)
-{
- /* We don't currently support the first MEMBLOCK not mapping 0
- * physical on those processors
- */
- BUG_ON(first_memblock_base != 0);
-
- /* 601 can only access 16MB at the moment */
- if (PVR_VER(mfspr(SPRN_PVR)) == 1)
- memblock_set_current_limit(min_t(u64, first_memblock_size, 0x01000000));
- else /* Anything else has 256M mapped */
- memblock_set_current_limit(min_t(u64, first_memblock_size, 0x10000000));
-}
diff --git a/arch/powerpc/mm/ptdump/8xx.c b/arch/powerpc/mm/ptdump/8xx.c
new file mode 100644
index 000000000000..4ca9cf7a90c9
--- /dev/null
+++ b/arch/powerpc/mm/ptdump/8xx.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * From split of dump_linuxpagetables.c
+ * Copyright 2016, Rashmica Gupta, IBM Corp.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/pgtable.h>
+
+#include "ptdump.h"
+
+static const struct flag_info flag_array[] = {
+ {
+#ifdef CONFIG_PPC_16K_PAGES
+ .mask = _PAGE_HUGE,
+ .val = _PAGE_HUGE,
+#else
+ .mask = _PAGE_SPS,
+ .val = _PAGE_SPS,
+#endif
+ .set = "huge",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_RO | _PAGE_NA,
+ .val = 0,
+ .set = "rw",
+ }, {
+ .mask = _PAGE_RO | _PAGE_NA,
+ .val = _PAGE_RO,
+ .set = "r ",
+ }, {
+ .mask = _PAGE_RO | _PAGE_NA,
+ .val = _PAGE_NA,
+ .set = " ",
+ }, {
+ .mask = _PAGE_EXEC,
+ .val = _PAGE_EXEC,
+ .set = " X ",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_PRESENT,
+ .val = _PAGE_PRESENT,
+ .set = "present",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_GUARDED,
+ .val = _PAGE_GUARDED,
+ .set = "guarded",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_DIRTY,
+ .val = _PAGE_DIRTY,
+ .set = "dirty",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_ACCESSED,
+ .val = _PAGE_ACCESSED,
+ .set = "accessed",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_NO_CACHE,
+ .val = _PAGE_NO_CACHE,
+ .set = "no cache",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_SPECIAL,
+ .val = _PAGE_SPECIAL,
+ .set = "special",
+ }
+};
+
+struct ptdump_pg_level pg_level[5] = {
+ { /* pgd */
+ .flag = flag_array,
+ .num = ARRAY_SIZE(flag_array),
+ }, { /* p4d */
+ .flag = flag_array,
+ .num = ARRAY_SIZE(flag_array),
+ }, { /* pud */
+ .flag = flag_array,
+ .num = ARRAY_SIZE(flag_array),
+ }, { /* pmd */
+ .flag = flag_array,
+ .num = ARRAY_SIZE(flag_array),
+ }, { /* pte */
+ .flag = flag_array,
+ .num = ARRAY_SIZE(flag_array),
+ },
+};
diff --git a/arch/powerpc/mm/ptdump/Makefile b/arch/powerpc/mm/ptdump/Makefile
new file mode 100644
index 000000000000..0f7a050f327e
--- /dev/null
+++ b/arch/powerpc/mm/ptdump/Makefile
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-y += ptdump.o
+
+obj-$(CONFIG_44x) += shared.o
+obj-$(CONFIG_PPC_8xx) += 8xx.o
+obj-$(CONFIG_PPC_E500) += shared.o
+obj-$(CONFIG_PPC_BOOK3S_32) += shared.o
+obj-$(CONFIG_PPC_BOOK3S_64) += book3s64.o
+
+ifdef CONFIG_PTDUMP_DEBUGFS
+obj-$(CONFIG_PPC_BOOK3S_32) += bats.o segment_regs.o
+obj-$(CONFIG_PPC_64S_HASH_MMU) += hashpagetable.o
+endif
diff --git a/arch/powerpc/mm/ptdump/bats.c b/arch/powerpc/mm/ptdump/bats.c
new file mode 100644
index 000000000000..820c119013e4
--- /dev/null
+++ b/arch/powerpc/mm/ptdump/bats.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright 2018, Christophe Leroy CS S.I.
+ * <christophe.leroy@c-s.fr>
+ *
+ * This dumps the content of BATS
+ */
+
+#include <linux/pgtable.h>
+#include <linux/debugfs.h>
+#include <asm/cpu_has_feature.h>
+
+#include "ptdump.h"
+
+static void bat_show_603(struct seq_file *m, int idx, u32 lower, u32 upper, bool is_d)
+{
+ u32 bepi = upper & 0xfffe0000;
+ u32 bl = (upper >> 2) & 0x7ff;
+ u32 k = upper & 3;
+ phys_addr_t brpn = PHYS_BAT_ADDR(lower);
+ u32 size = (bl + 1) << 17;
+
+ seq_printf(m, "%d: ", idx);
+ if (k == 0) {
+ seq_puts(m, " -\n");
+ return;
+ }
+
+ seq_printf(m, "0x%08x-0x%08x ", bepi, bepi + size - 1);
+#ifdef CONFIG_PHYS_64BIT
+ seq_printf(m, "0x%016llx ", brpn);
+#else
+ seq_printf(m, "0x%08x ", brpn);
+#endif
+ pt_dump_size(m, size);
+
+ if (k == 1)
+ seq_puts(m, "User ");
+ else if (k == 2)
+ seq_puts(m, "Kernel ");
+ else
+ seq_puts(m, "Kernel/User ");
+
+ if (lower & BPP_RX)
+ seq_puts(m, is_d ? "r " : " x ");
+ else if (lower & BPP_RW)
+ seq_puts(m, is_d ? "rw " : " x ");
+ else
+ seq_puts(m, is_d ? " " : " ");
+
+ seq_puts(m, lower & _PAGE_WRITETHRU ? "w " : " ");
+ seq_puts(m, lower & _PAGE_NO_CACHE ? "i " : " ");
+ seq_puts(m, lower & _PAGE_COHERENT ? "m " : " ");
+ seq_puts(m, lower & _PAGE_GUARDED ? "g " : " ");
+ seq_puts(m, "\n");
+}
+
+#define BAT_SHOW_603(_m, _n, _l, _u, _d) bat_show_603(_m, _n, mfspr(_l), mfspr(_u), _d)
+
+static int bats_show(struct seq_file *m, void *v)
+{
+ seq_puts(m, "---[ Instruction Block Address Translation ]---\n");
+
+ BAT_SHOW_603(m, 0, SPRN_IBAT0L, SPRN_IBAT0U, false);
+ BAT_SHOW_603(m, 1, SPRN_IBAT1L, SPRN_IBAT1U, false);
+ BAT_SHOW_603(m, 2, SPRN_IBAT2L, SPRN_IBAT2U, false);
+ BAT_SHOW_603(m, 3, SPRN_IBAT3L, SPRN_IBAT3U, false);
+ if (mmu_has_feature(MMU_FTR_USE_HIGH_BATS)) {
+ BAT_SHOW_603(m, 4, SPRN_IBAT4L, SPRN_IBAT4U, false);
+ BAT_SHOW_603(m, 5, SPRN_IBAT5L, SPRN_IBAT5U, false);
+ BAT_SHOW_603(m, 6, SPRN_IBAT6L, SPRN_IBAT6U, false);
+ BAT_SHOW_603(m, 7, SPRN_IBAT7L, SPRN_IBAT7U, false);
+ }
+
+ seq_puts(m, "\n---[ Data Block Address Translation ]---\n");
+
+ BAT_SHOW_603(m, 0, SPRN_DBAT0L, SPRN_DBAT0U, true);
+ BAT_SHOW_603(m, 1, SPRN_DBAT1L, SPRN_DBAT1U, true);
+ BAT_SHOW_603(m, 2, SPRN_DBAT2L, SPRN_DBAT2U, true);
+ BAT_SHOW_603(m, 3, SPRN_DBAT3L, SPRN_DBAT3U, true);
+ if (mmu_has_feature(MMU_FTR_USE_HIGH_BATS)) {
+ BAT_SHOW_603(m, 4, SPRN_DBAT4L, SPRN_DBAT4U, true);
+ BAT_SHOW_603(m, 5, SPRN_DBAT5L, SPRN_DBAT5U, true);
+ BAT_SHOW_603(m, 6, SPRN_DBAT6L, SPRN_DBAT6U, true);
+ BAT_SHOW_603(m, 7, SPRN_DBAT7L, SPRN_DBAT7U, true);
+ }
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(bats);
+
+static int __init bats_init(void)
+{
+ debugfs_create_file("block_address_translation", 0400,
+ arch_debugfs_dir, NULL, &bats_fops);
+ return 0;
+}
+device_initcall(bats_init);
diff --git a/arch/powerpc/mm/ptdump/book3s64.c b/arch/powerpc/mm/ptdump/book3s64.c
new file mode 100644
index 000000000000..6b2da9241d4c
--- /dev/null
+++ b/arch/powerpc/mm/ptdump/book3s64.c
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * From split of dump_linuxpagetables.c
+ * Copyright 2016, Rashmica Gupta, IBM Corp.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/pgtable.h>
+
+#include "ptdump.h"
+
+static const struct flag_info flag_array[] = {
+ {
+ .mask = _PAGE_PRIVILEGED,
+ .val = 0,
+ .set = "user",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_READ,
+ .val = _PAGE_READ,
+ .set = "r",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_WRITE,
+ .val = _PAGE_WRITE,
+ .set = "w",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_EXEC,
+ .val = _PAGE_EXEC,
+ .set = " X ",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_PTE,
+ .val = _PAGE_PTE,
+ .set = "pte",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_PRESENT,
+ .val = _PAGE_PRESENT,
+ .set = "valid",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_PRESENT | _PAGE_INVALID,
+ .val = 0,
+ .set = " ",
+ .clear = "present",
+ }, {
+ .mask = H_PAGE_HASHPTE,
+ .val = H_PAGE_HASHPTE,
+ .set = "hpte",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_DIRTY,
+ .val = _PAGE_DIRTY,
+ .set = "dirty",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_ACCESSED,
+ .val = _PAGE_ACCESSED,
+ .set = "accessed",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_NON_IDEMPOTENT,
+ .val = _PAGE_NON_IDEMPOTENT,
+ .set = "non-idempotent",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_TOLERANT,
+ .val = _PAGE_TOLERANT,
+ .set = "tolerant",
+ .clear = " ",
+ }, {
+ .mask = H_PAGE_BUSY,
+ .val = H_PAGE_BUSY,
+ .set = "busy",
+ }, {
+#ifdef CONFIG_PPC_64K_PAGES
+ .mask = H_PAGE_COMBO,
+ .val = H_PAGE_COMBO,
+ .set = "combo",
+ }, {
+ .mask = H_PAGE_4K_PFN,
+ .val = H_PAGE_4K_PFN,
+ .set = "4K_pfn",
+ }, {
+#else /* CONFIG_PPC_64K_PAGES */
+ .mask = H_PAGE_F_GIX,
+ .val = H_PAGE_F_GIX,
+ .set = "f_gix",
+ .is_val = true,
+ .shift = H_PAGE_F_GIX_SHIFT,
+ }, {
+ .mask = H_PAGE_F_SECOND,
+ .val = H_PAGE_F_SECOND,
+ .set = "f_second",
+ }, {
+#endif /* CONFIG_PPC_64K_PAGES */
+ .mask = _PAGE_SPECIAL,
+ .val = _PAGE_SPECIAL,
+ .set = "special",
+ }
+};
+
+struct ptdump_pg_level pg_level[5] = {
+ { /* pgd */
+ .flag = flag_array,
+ .num = ARRAY_SIZE(flag_array),
+ }, { /* p4d */
+ .flag = flag_array,
+ .num = ARRAY_SIZE(flag_array),
+ }, { /* pud */
+ .flag = flag_array,
+ .num = ARRAY_SIZE(flag_array),
+ }, { /* pmd */
+ .flag = flag_array,
+ .num = ARRAY_SIZE(flag_array),
+ }, { /* pte */
+ .flag = flag_array,
+ .num = ARRAY_SIZE(flag_array),
+ },
+};
diff --git a/arch/powerpc/mm/ptdump/hashpagetable.c b/arch/powerpc/mm/ptdump/hashpagetable.c
new file mode 100644
index 000000000000..a6baa6166d94
--- /dev/null
+++ b/arch/powerpc/mm/ptdump/hashpagetable.c
@@ -0,0 +1,543 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2016, Rashmica Gupta, IBM Corp.
+ *
+ * This traverses the kernel virtual memory and dumps the pages that are in
+ * the hash pagetable, along with their flags to
+ * /sys/kernel/debug/kernel_hash_pagetable.
+ *
+ * If radix is enabled then there is no hash page table and so no debugfs file
+ * is generated.
+ */
+#include <linux/debugfs.h>
+#include <linux/fs.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/const.h>
+#include <asm/page.h>
+#include <asm/plpar_wrappers.h>
+#include <linux/memblock.h>
+#include <asm/firmware.h>
+#include <asm/pgalloc.h>
+
+struct pg_state {
+ struct seq_file *seq;
+ const struct addr_marker *marker;
+ unsigned long start_address;
+ unsigned int level;
+ u64 current_flags;
+};
+
+struct addr_marker {
+ unsigned long start_address;
+ const char *name;
+};
+
+static struct addr_marker address_markers[] = {
+ { 0, "Start of kernel VM" },
+ { 0, "vmalloc() Area" },
+ { 0, "vmalloc() End" },
+ { 0, "isa I/O start" },
+ { 0, "isa I/O end" },
+ { 0, "phb I/O start" },
+ { 0, "phb I/O end" },
+ { 0, "I/O remap start" },
+ { 0, "I/O remap end" },
+ { 0, "vmemmap start" },
+ { -1, NULL },
+};
+
+struct flag_info {
+ u64 mask;
+ u64 val;
+ const char *set;
+ const char *clear;
+ bool is_val;
+ int shift;
+};
+
+static const struct flag_info v_flag_array[] = {
+ {
+ .mask = SLB_VSID_B,
+ .val = SLB_VSID_B_256M,
+ .set = "ssize: 256M",
+ .clear = "ssize: 1T ",
+ }, {
+ .mask = HPTE_V_SECONDARY,
+ .val = HPTE_V_SECONDARY,
+ .set = "secondary",
+ .clear = "primary ",
+ }, {
+ .mask = HPTE_V_VALID,
+ .val = HPTE_V_VALID,
+ .set = "valid ",
+ .clear = "invalid",
+ }, {
+ .mask = HPTE_V_BOLTED,
+ .val = HPTE_V_BOLTED,
+ .set = "bolted",
+ .clear = "",
+ }
+};
+
+static const struct flag_info r_flag_array[] = {
+ {
+ .mask = HPTE_R_PP0 | HPTE_R_PP,
+ .val = PP_RWXX,
+ .set = "prot:RW--",
+ }, {
+ .mask = HPTE_R_PP0 | HPTE_R_PP,
+ .val = PP_RWRX,
+ .set = "prot:RWR-",
+ }, {
+ .mask = HPTE_R_PP0 | HPTE_R_PP,
+ .val = PP_RWRW,
+ .set = "prot:RWRW",
+ }, {
+ .mask = HPTE_R_PP0 | HPTE_R_PP,
+ .val = PP_RXRX,
+ .set = "prot:R-R-",
+ }, {
+ .mask = HPTE_R_PP0 | HPTE_R_PP,
+ .val = PP_RXXX,
+ .set = "prot:R---",
+ }, {
+ .mask = HPTE_R_KEY_HI | HPTE_R_KEY_LO,
+ .val = HPTE_R_KEY_HI | HPTE_R_KEY_LO,
+ .set = "key",
+ .clear = "",
+ .is_val = true,
+ }, {
+ .mask = HPTE_R_R,
+ .val = HPTE_R_R,
+ .set = "ref",
+ .clear = " ",
+ }, {
+ .mask = HPTE_R_C,
+ .val = HPTE_R_C,
+ .set = "changed",
+ .clear = " ",
+ }, {
+ .mask = HPTE_R_N,
+ .val = HPTE_R_N,
+ .set = "no execute",
+ }, {
+ .mask = HPTE_R_WIMG,
+ .val = HPTE_R_W,
+ .set = "writethru",
+ }, {
+ .mask = HPTE_R_WIMG,
+ .val = HPTE_R_I,
+ .set = "no cache",
+ }, {
+ .mask = HPTE_R_WIMG,
+ .val = HPTE_R_G,
+ .set = "guarded",
+ }
+};
+
+static int calculate_pagesize(struct pg_state *st, int ps, char s[])
+{
+ static const char units[] = "BKMGTPE";
+ const char *unit = units;
+
+ while (ps > 9 && unit[1]) {
+ ps -= 10;
+ unit++;
+ }
+ seq_printf(st->seq, " %s_ps: %i%c\t", s, 1<<ps, *unit);
+ return ps;
+}
+
+static void dump_flag_info(struct pg_state *st, const struct flag_info
+ *flag, u64 pte, int num)
+{
+ unsigned int i;
+
+ for (i = 0; i < num; i++, flag++) {
+ const char *s = NULL;
+ u64 val;
+
+ /* flag not defined so don't check it */
+ if (flag->mask == 0)
+ continue;
+ /* Some 'flags' are actually values */
+ if (flag->is_val) {
+ val = pte & flag->val;
+ if (flag->shift)
+ val = val >> flag->shift;
+ seq_printf(st->seq, " %s:%llx", flag->set, val);
+ } else {
+ if ((pte & flag->mask) == flag->val)
+ s = flag->set;
+ else
+ s = flag->clear;
+ if (s)
+ seq_printf(st->seq, " %s", s);
+ }
+ }
+}
+
+static void dump_hpte_info(struct pg_state *st, unsigned long ea, u64 v, u64 r,
+ unsigned long rpn, int bps, int aps, unsigned long lp)
+{
+ int aps_index;
+
+ while (ea >= st->marker[1].start_address) {
+ st->marker++;
+ seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
+ }
+ seq_printf(st->seq, "0x%lx:\t", ea);
+ seq_printf(st->seq, "AVPN:%llx\t", HPTE_V_AVPN_VAL(v));
+ dump_flag_info(st, v_flag_array, v, ARRAY_SIZE(v_flag_array));
+ seq_printf(st->seq, " rpn: %lx\t", rpn);
+ dump_flag_info(st, r_flag_array, r, ARRAY_SIZE(r_flag_array));
+
+ calculate_pagesize(st, bps, "base");
+ aps_index = calculate_pagesize(st, aps, "actual");
+ if (aps_index != 2)
+ seq_printf(st->seq, "LP enc: %lx", lp);
+ seq_putc(st->seq, '\n');
+}
+
+
+static int native_find(unsigned long ea, int psize, bool primary, u64 *v, u64
+ *r)
+{
+ struct hash_pte *hptep;
+ unsigned long hash, vsid, vpn, hpte_group, want_v, hpte_v;
+ int i, ssize = mmu_kernel_ssize;
+ unsigned long shift = mmu_psize_defs[psize].shift;
+
+ /* calculate hash */
+ vsid = get_kernel_vsid(ea, ssize);
+ vpn = hpt_vpn(ea, vsid, ssize);
+ hash = hpt_hash(vpn, shift, ssize);
+ want_v = hpte_encode_avpn(vpn, psize, ssize);
+
+ /* to check in the secondary hash table, we invert the hash */
+ if (!primary)
+ hash = ~hash;
+ hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+ for (i = 0; i < HPTES_PER_GROUP; i++) {
+ hptep = htab_address + hpte_group;
+ hpte_v = be64_to_cpu(hptep->v);
+
+ if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) {
+ /* HPTE matches */
+ *v = be64_to_cpu(hptep->v);
+ *r = be64_to_cpu(hptep->r);
+ return 0;
+ }
+ ++hpte_group;
+ }
+ return -1;
+}
+
+static int pseries_find(unsigned long ea, int psize, bool primary, u64 *v, u64 *r)
+{
+ struct {
+ unsigned long v;
+ unsigned long r;
+ } ptes[4];
+ unsigned long vsid, vpn, hash, hpte_group, want_v;
+ int i, j, ssize = mmu_kernel_ssize;
+ long lpar_rc = 0;
+ unsigned long shift = mmu_psize_defs[psize].shift;
+
+ /* calculate hash */
+ vsid = get_kernel_vsid(ea, ssize);
+ vpn = hpt_vpn(ea, vsid, ssize);
+ hash = hpt_hash(vpn, shift, ssize);
+ want_v = hpte_encode_avpn(vpn, psize, ssize);
+
+ /* to check in the secondary hash table, we invert the hash */
+ if (!primary)
+ hash = ~hash;
+ hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+ /* see if we can find an entry in the hpte with this hash */
+ for (i = 0; i < HPTES_PER_GROUP; i += 4, hpte_group += 4) {
+ lpar_rc = plpar_pte_read_4(0, hpte_group, (void *)ptes);
+
+ if (lpar_rc)
+ continue;
+ for (j = 0; j < 4; j++) {
+ if (HPTE_V_COMPARE(ptes[j].v, want_v) &&
+ (ptes[j].v & HPTE_V_VALID)) {
+ /* HPTE matches */
+ *v = ptes[j].v;
+ *r = ptes[j].r;
+ return 0;
+ }
+ }
+ }
+ return -1;
+}
+
+static void decode_r(int bps, unsigned long r, unsigned long *rpn, int *aps,
+ unsigned long *lp_bits)
+{
+ struct mmu_psize_def entry;
+ unsigned long arpn, mask, lp;
+ int penc = -2, idx = 0, shift;
+
+ /*.
+ * The LP field has 8 bits. Depending on the actual page size, some of
+ * these bits are concatenated with the APRN to get the RPN. The rest
+ * of the bits in the LP field is the LP value and is an encoding for
+ * the base page size and the actual page size.
+ *
+ * - find the mmu entry for our base page size
+ * - go through all page encodings and use the associated mask to
+ * find an encoding that matches our encoding in the LP field.
+ */
+ arpn = (r & HPTE_R_RPN) >> HPTE_R_RPN_SHIFT;
+ lp = arpn & 0xff;
+
+ entry = mmu_psize_defs[bps];
+ while (idx < MMU_PAGE_COUNT) {
+ penc = entry.penc[idx];
+ if ((penc != -1) && (mmu_psize_defs[idx].shift)) {
+ shift = mmu_psize_defs[idx].shift - HPTE_R_RPN_SHIFT;
+ mask = (0x1 << (shift)) - 1;
+ if ((lp & mask) == penc) {
+ *aps = mmu_psize_to_shift(idx);
+ *lp_bits = lp & mask;
+ *rpn = arpn >> shift;
+ return;
+ }
+ }
+ idx++;
+ }
+}
+
+static int base_hpte_find(unsigned long ea, int psize, bool primary, u64 *v,
+ u64 *r)
+{
+ if (IS_ENABLED(CONFIG_PPC_PSERIES) && firmware_has_feature(FW_FEATURE_LPAR))
+ return pseries_find(ea, psize, primary, v, r);
+
+ return native_find(ea, psize, primary, v, r);
+}
+
+static unsigned long hpte_find(struct pg_state *st, unsigned long ea, int psize)
+{
+ unsigned long slot;
+ u64 v = 0, r = 0;
+ unsigned long rpn, lp_bits;
+ int base_psize = 0, actual_psize = 0;
+
+ if (ea < PAGE_OFFSET)
+ return -1;
+
+ /* Look in primary table */
+ slot = base_hpte_find(ea, psize, true, &v, &r);
+
+ /* Look in secondary table */
+ if (slot == -1)
+ slot = base_hpte_find(ea, psize, false, &v, &r);
+
+ /* No entry found */
+ if (slot == -1)
+ return -1;
+
+ /*
+ * We found an entry in the hash page table:
+ * - check that this has the same base page
+ * - find the actual page size
+ * - find the RPN
+ */
+ base_psize = mmu_psize_to_shift(psize);
+
+ if ((v & HPTE_V_LARGE) == HPTE_V_LARGE) {
+ decode_r(psize, r, &rpn, &actual_psize, &lp_bits);
+ } else {
+ /* 4K actual page size */
+ actual_psize = 12;
+ rpn = (r & HPTE_R_RPN) >> HPTE_R_RPN_SHIFT;
+ /* In this case there are no LP bits */
+ lp_bits = -1;
+ }
+ /*
+ * We didn't find a matching encoding, so the PTE we found isn't for
+ * this address.
+ */
+ if (actual_psize == -1)
+ return -1;
+
+ dump_hpte_info(st, ea, v, r, rpn, base_psize, actual_psize, lp_bits);
+ return 0;
+}
+
+static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
+{
+ pte_t *pte = pte_offset_kernel(pmd, 0);
+ unsigned long addr, pteval, psize;
+ int i, status;
+
+ for (i = 0; i < PTRS_PER_PTE; i++, pte++) {
+ addr = start + i * PAGE_SIZE;
+ pteval = pte_val(*pte);
+
+ if (addr < VMALLOC_END)
+ psize = mmu_vmalloc_psize;
+ else
+ psize = mmu_io_psize;
+
+ /* check for secret 4K mappings */
+ if (IS_ENABLED(CONFIG_PPC_64K_PAGES) &&
+ ((pteval & H_PAGE_COMBO) == H_PAGE_COMBO ||
+ (pteval & H_PAGE_4K_PFN) == H_PAGE_4K_PFN))
+ psize = mmu_io_psize;
+
+ /* check for hashpte */
+ status = hpte_find(st, addr, psize);
+
+ if (((pteval & H_PAGE_HASHPTE) != H_PAGE_HASHPTE)
+ && (status != -1)) {
+ /* found a hpte that is not in the linux page tables */
+ seq_printf(st->seq, "page probably bolted before linux"
+ " pagetables were set: addr:%lx, pteval:%lx\n",
+ addr, pteval);
+ }
+ }
+}
+
+static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start)
+{
+ pmd_t *pmd = pmd_offset(pud, 0);
+ unsigned long addr;
+ unsigned int i;
+
+ for (i = 0; i < PTRS_PER_PMD; i++, pmd++) {
+ addr = start + i * PMD_SIZE;
+ if (!pmd_none(*pmd))
+ /* pmd exists */
+ walk_pte(st, pmd, addr);
+ }
+}
+
+static void walk_pud(struct pg_state *st, p4d_t *p4d, unsigned long start)
+{
+ pud_t *pud = pud_offset(p4d, 0);
+ unsigned long addr;
+ unsigned int i;
+
+ for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
+ addr = start + i * PUD_SIZE;
+ if (!pud_none(*pud))
+ /* pud exists */
+ walk_pmd(st, pud, addr);
+ }
+}
+
+static void walk_p4d(struct pg_state *st, pgd_t *pgd, unsigned long start)
+{
+ p4d_t *p4d = p4d_offset(pgd, 0);
+ unsigned long addr;
+ unsigned int i;
+
+ for (i = 0; i < PTRS_PER_P4D; i++, p4d++) {
+ addr = start + i * P4D_SIZE;
+ if (!p4d_none(*p4d))
+ /* p4d exists */
+ walk_pud(st, p4d, addr);
+ }
+}
+
+static void walk_pagetables(struct pg_state *st)
+{
+ pgd_t *pgd = pgd_offset_k(0UL);
+ unsigned int i;
+ unsigned long addr;
+
+ /*
+ * Traverse the linux pagetable structure and dump pages that are in
+ * the hash pagetable.
+ */
+ for (i = 0; i < PTRS_PER_PGD; i++, pgd++) {
+ addr = KERN_VIRT_START + i * PGDIR_SIZE;
+ if (!pgd_none(*pgd))
+ /* pgd exists */
+ walk_p4d(st, pgd, addr);
+ }
+}
+
+
+static void walk_linearmapping(struct pg_state *st)
+{
+ unsigned long addr;
+
+ /*
+ * Traverse the linear mapping section of virtual memory and dump pages
+ * that are in the hash pagetable.
+ */
+ unsigned long psize = 1 << mmu_psize_defs[mmu_linear_psize].shift;
+
+ for (addr = PAGE_OFFSET; addr < PAGE_OFFSET +
+ memblock_end_of_DRAM(); addr += psize)
+ hpte_find(st, addr, mmu_linear_psize);
+}
+
+static void walk_vmemmap(struct pg_state *st)
+{
+ struct vmemmap_backing *ptr = vmemmap_list;
+
+ if (!IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP))
+ return;
+ /*
+ * Traverse the vmemmaped memory and dump pages that are in the hash
+ * pagetable.
+ */
+ while (ptr) {
+ hpte_find(st, ptr->virt_addr, mmu_vmemmap_psize);
+ ptr = ptr->list;
+ }
+ seq_puts(st->seq, "---[ vmemmap end ]---\n");
+}
+
+static void populate_markers(void)
+{
+ address_markers[0].start_address = PAGE_OFFSET;
+ address_markers[1].start_address = VMALLOC_START;
+ address_markers[2].start_address = VMALLOC_END;
+ address_markers[3].start_address = ISA_IO_BASE;
+ address_markers[4].start_address = ISA_IO_END;
+ address_markers[5].start_address = PHB_IO_BASE;
+ address_markers[6].start_address = PHB_IO_END;
+ address_markers[7].start_address = IOREMAP_BASE;
+ address_markers[8].start_address = IOREMAP_END;
+ address_markers[9].start_address = H_VMEMMAP_START;
+}
+
+static int ptdump_show(struct seq_file *m, void *v)
+{
+ struct pg_state st = {
+ .seq = m,
+ .start_address = PAGE_OFFSET,
+ .marker = address_markers,
+ };
+ /*
+ * Traverse the 0xc, 0xd and 0xf areas of the kernel virtual memory and
+ * dump pages that are in the hash pagetable.
+ */
+ walk_linearmapping(&st);
+ walk_pagetables(&st);
+ walk_vmemmap(&st);
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(ptdump);
+
+static int ptdump_init(void)
+{
+ if (!radix_enabled()) {
+ populate_markers();
+ debugfs_create_file("kernel_hash_pagetable", 0400, NULL, NULL,
+ &ptdump_fops);
+ }
+ return 0;
+}
+device_initcall(ptdump_init);
diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
new file mode 100644
index 000000000000..b2358d794855
--- /dev/null
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -0,0 +1,424 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2016, Rashmica Gupta, IBM Corp.
+ *
+ * This traverses the kernel pagetables and dumps the
+ * information about the used sections of memory to
+ * /sys/kernel/debug/kernel_pagetables.
+ *
+ * Derived from the arm64 implementation:
+ * Copyright (c) 2014, The Linux Foundation, Laura Abbott.
+ * (C) Copyright 2008 Intel Corporation, Arjan van de Ven.
+ */
+#include <linux/debugfs.h>
+#include <linux/fs.h>
+#include <linux/hugetlb.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/ptdump.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <asm/fixmap.h>
+#include <linux/const.h>
+#include <linux/kasan.h>
+#include <asm/page.h>
+#include <asm/hugetlb.h>
+
+#include <mm/mmu_decl.h>
+
+#include "ptdump.h"
+
+/*
+ * To visualise what is happening,
+ *
+ * - PTRS_PER_P** = how many entries there are in the corresponding P**
+ * - P**_SHIFT = how many bits of the address we use to index into the
+ * corresponding P**
+ * - P**_SIZE is how much memory we can access through the table - not the
+ * size of the table itself.
+ * P**={PGD, PUD, PMD, PTE}
+ *
+ *
+ * Each entry of the PGD points to a PUD. Each entry of a PUD points to a
+ * PMD. Each entry of a PMD points to a PTE. And every PTE entry points to
+ * a page.
+ *
+ * In the case where there are only 3 levels, the PUD is folded into the
+ * PGD: every PUD has only one entry which points to the PMD.
+ *
+ * The page dumper groups page table entries of the same type into a single
+ * description. It uses pg_state to track the range information while
+ * iterating over the PTE entries. When the continuity is broken it then
+ * dumps out a description of the range - ie PTEs that are virtually contiguous
+ * with the same PTE flags are chunked together. This is to make it clear how
+ * different areas of the kernel virtual memory are used.
+ *
+ */
+struct pg_state {
+ struct ptdump_state ptdump;
+ struct seq_file *seq;
+ const struct addr_marker *marker;
+ unsigned long start_address;
+ unsigned long start_pa;
+ int level;
+ u64 current_flags;
+ bool check_wx;
+ unsigned long wx_pages;
+};
+
+struct addr_marker {
+ unsigned long start_address;
+ const char *name;
+};
+
+static struct addr_marker address_markers[] = {
+ { 0, "Start of kernel VM" },
+#ifdef MODULES_VADDR
+ { 0, "modules start" },
+ { 0, "modules end" },
+#endif
+ { 0, "vmalloc() Area" },
+ { 0, "vmalloc() End" },
+#ifdef CONFIG_PPC64
+ { 0, "isa I/O start" },
+ { 0, "isa I/O end" },
+ { 0, "phb I/O start" },
+ { 0, "phb I/O end" },
+ { 0, "I/O remap start" },
+ { 0, "I/O remap end" },
+ { 0, "vmemmap start" },
+#else
+ { 0, "Early I/O remap start" },
+ { 0, "Early I/O remap end" },
+#ifdef CONFIG_HIGHMEM
+ { 0, "Highmem PTEs start" },
+ { 0, "Highmem PTEs end" },
+#endif
+ { 0, "Fixmap start" },
+ { 0, "Fixmap end" },
+#endif
+#ifdef CONFIG_KASAN
+ { 0, "kasan shadow mem start" },
+ { 0, "kasan shadow mem end" },
+#endif
+ { -1, NULL },
+};
+
+static struct ptdump_range ptdump_range[] __ro_after_init = {
+ {TASK_SIZE_MAX, ~0UL},
+ {0, 0}
+};
+
+#define pt_dump_seq_printf(m, fmt, args...) \
+({ \
+ if (m) \
+ seq_printf(m, fmt, ##args); \
+})
+
+#define pt_dump_seq_putc(m, c) \
+({ \
+ if (m) \
+ seq_putc(m, c); \
+})
+
+void pt_dump_size(struct seq_file *m, unsigned long size)
+{
+ static const char units[] = " KMGTPE";
+ const char *unit = units;
+
+ /* Work out what appropriate unit to use */
+ while (!(size & 1023) && unit[1]) {
+ size >>= 10;
+ unit++;
+ }
+ pt_dump_seq_printf(m, "%9lu%c ", size, *unit);
+}
+
+static void dump_flag_info(struct pg_state *st, const struct flag_info
+ *flag, u64 pte, int num)
+{
+ unsigned int i;
+
+ for (i = 0; i < num; i++, flag++) {
+ const char *s = NULL;
+ u64 val;
+
+ /* flag not defined so don't check it */
+ if (flag->mask == 0)
+ continue;
+ /* Some 'flags' are actually values */
+ if (flag->is_val) {
+ val = pte & flag->val;
+ if (flag->shift)
+ val = val >> flag->shift;
+ pt_dump_seq_printf(st->seq, " %s:%llx", flag->set, val);
+ } else {
+ if ((pte & flag->mask) == flag->val)
+ s = flag->set;
+ else
+ s = flag->clear;
+ if (s)
+ pt_dump_seq_printf(st->seq, " %s", s);
+ }
+ st->current_flags &= ~flag->mask;
+ }
+ if (st->current_flags != 0)
+ pt_dump_seq_printf(st->seq, " unknown flags:%llx", st->current_flags);
+}
+
+static void dump_addr(struct pg_state *st, unsigned long addr)
+{
+#ifdef CONFIG_PPC64
+#define REG "0x%016lx"
+#else
+#define REG "0x%08lx"
+#endif
+
+ pt_dump_seq_printf(st->seq, REG "-" REG " ", st->start_address, addr - 1);
+ pt_dump_seq_printf(st->seq, " " REG " ", st->start_pa);
+ pt_dump_size(st->seq, addr - st->start_address);
+}
+
+static void note_prot_wx(struct pg_state *st, unsigned long addr)
+{
+ pte_t pte = __pte(st->current_flags);
+
+ if (!st->check_wx)
+ return;
+
+ if (!pte_write(pte) || !pte_exec(pte))
+ return;
+
+ WARN_ONCE(IS_ENABLED(CONFIG_DEBUG_WX),
+ "powerpc/mm: Found insecure W+X mapping at address %p/%pS\n",
+ (void *)st->start_address, (void *)st->start_address);
+
+ st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
+}
+
+static void note_page_update_state(struct pg_state *st, unsigned long addr, int level, u64 val)
+{
+ u64 flag = level >= 0 ? val & pg_level[level].mask : 0;
+ u64 pa = val & PTE_RPN_MASK;
+
+ st->level = level;
+ st->current_flags = flag;
+ st->start_address = addr;
+ st->start_pa = pa;
+
+ while (addr >= st->marker[1].start_address) {
+ st->marker++;
+ pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
+ }
+}
+
+static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, u64 val)
+{
+ u64 flag = level >= 0 ? val & pg_level[level].mask : 0;
+ struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
+
+ /* At first no level is set */
+ if (st->level == -1) {
+ pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
+ note_page_update_state(st, addr, level, val);
+ /*
+ * Dump the section of virtual memory when:
+ * - the PTE flags from one entry to the next differs.
+ * - we change levels in the tree.
+ * - the address is in a different section of memory and is thus
+ * used for a different purpose, regardless of the flags.
+ */
+ } else if (flag != st->current_flags || level != st->level ||
+ addr >= st->marker[1].start_address) {
+
+ /* Check the PTE flags */
+ if (st->current_flags) {
+ note_prot_wx(st, addr);
+ dump_addr(st, addr);
+
+ /* Dump all the flags */
+ if (pg_level[st->level].flag)
+ dump_flag_info(st, pg_level[st->level].flag,
+ st->current_flags,
+ pg_level[st->level].num);
+
+ pt_dump_seq_putc(st->seq, '\n');
+ }
+
+ /*
+ * Address indicates we have passed the end of the
+ * current section of virtual memory
+ */
+ note_page_update_state(st, addr, level, val);
+ }
+}
+
+static void populate_markers(void)
+{
+ int i = 0;
+
+#ifdef CONFIG_PPC64
+ address_markers[i++].start_address = PAGE_OFFSET;
+#else
+ address_markers[i++].start_address = TASK_SIZE;
+#endif
+#ifdef MODULES_VADDR
+ address_markers[i++].start_address = MODULES_VADDR;
+ address_markers[i++].start_address = MODULES_END;
+#endif
+ address_markers[i++].start_address = VMALLOC_START;
+ address_markers[i++].start_address = VMALLOC_END;
+#ifdef CONFIG_PPC64
+ address_markers[i++].start_address = ISA_IO_BASE;
+ address_markers[i++].start_address = ISA_IO_END;
+ address_markers[i++].start_address = PHB_IO_BASE;
+ address_markers[i++].start_address = PHB_IO_END;
+ address_markers[i++].start_address = IOREMAP_BASE;
+ address_markers[i++].start_address = IOREMAP_END;
+ /* What is the ifdef about? */
+#ifdef CONFIG_PPC_BOOK3S_64
+ address_markers[i++].start_address = H_VMEMMAP_START;
+#else
+ address_markers[i++].start_address = VMEMMAP_BASE;
+#endif
+#else /* !CONFIG_PPC64 */
+ address_markers[i++].start_address = ioremap_bot;
+ address_markers[i++].start_address = IOREMAP_TOP;
+#ifdef CONFIG_HIGHMEM
+ address_markers[i++].start_address = PKMAP_BASE;
+ address_markers[i++].start_address = PKMAP_ADDR(LAST_PKMAP);
+#endif
+ address_markers[i++].start_address = FIXADDR_START;
+ address_markers[i++].start_address = FIXADDR_TOP;
+#endif /* CONFIG_PPC64 */
+#ifdef CONFIG_KASAN
+ address_markers[i++].start_address = KASAN_SHADOW_START;
+ address_markers[i++].start_address = KASAN_SHADOW_END;
+#endif
+}
+
+static void note_page_pte(struct ptdump_state *pt_st, unsigned long addr, pte_t pte)
+{
+ note_page(pt_st, addr, 4, pte_val(pte));
+}
+
+static void note_page_pmd(struct ptdump_state *pt_st, unsigned long addr, pmd_t pmd)
+{
+ note_page(pt_st, addr, 3, pmd_val(pmd));
+}
+
+static void note_page_pud(struct ptdump_state *pt_st, unsigned long addr, pud_t pud)
+{
+ note_page(pt_st, addr, 2, pud_val(pud));
+}
+
+static void note_page_p4d(struct ptdump_state *pt_st, unsigned long addr, p4d_t p4d)
+{
+ note_page(pt_st, addr, 1, p4d_val(p4d));
+}
+
+static void note_page_pgd(struct ptdump_state *pt_st, unsigned long addr, pgd_t pgd)
+{
+ note_page(pt_st, addr, 0, pgd_val(pgd));
+}
+
+static void note_page_flush(struct ptdump_state *pt_st)
+{
+ pte_t pte_zero = {0};
+
+ note_page(pt_st, 0, -1, pte_val(pte_zero));
+}
+
+static int ptdump_show(struct seq_file *m, void *v)
+{
+ struct pg_state st = {
+ .seq = m,
+ .marker = address_markers,
+ .level = -1,
+ .ptdump = {
+ .note_page_pte = note_page_pte,
+ .note_page_pmd = note_page_pmd,
+ .note_page_pud = note_page_pud,
+ .note_page_p4d = note_page_p4d,
+ .note_page_pgd = note_page_pgd,
+ .note_page_flush = note_page_flush,
+ .range = ptdump_range,
+ }
+ };
+
+ /* Traverse kernel page tables */
+ ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(ptdump);
+
+static void __init build_pgtable_complete_mask(void)
+{
+ unsigned int i, j;
+
+ for (i = 0; i < ARRAY_SIZE(pg_level); i++)
+ if (pg_level[i].flag)
+ for (j = 0; j < pg_level[i].num; j++)
+ pg_level[i].mask |= pg_level[i].flag[j].mask;
+}
+
+bool ptdump_check_wx(void)
+{
+ struct pg_state st = {
+ .seq = NULL,
+ .marker = (struct addr_marker[]) {
+ { 0, NULL},
+ { -1, NULL},
+ },
+ .level = -1,
+ .check_wx = true,
+ .ptdump = {
+ .note_page_pte = note_page_pte,
+ .note_page_pmd = note_page_pmd,
+ .note_page_pud = note_page_pud,
+ .note_page_p4d = note_page_p4d,
+ .note_page_pgd = note_page_pgd,
+ .note_page_flush = note_page_flush,
+ .range = ptdump_range,
+ }
+ };
+
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !mmu_has_feature(MMU_FTR_KERNEL_RO))
+ return true;
+
+ ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
+
+ if (st.wx_pages) {
+ pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found\n",
+ st.wx_pages);
+
+ return false;
+ } else {
+ pr_info("Checked W+X mappings: passed, no W+X pages found\n");
+
+ return true;
+ }
+}
+
+static int __init ptdump_init(void)
+{
+#ifdef CONFIG_PPC64
+ if (!radix_enabled())
+ ptdump_range[0].start = KERN_VIRT_START;
+ else
+ ptdump_range[0].start = PAGE_OFFSET;
+
+ ptdump_range[0].end = PAGE_OFFSET + (PGDIR_SIZE * PTRS_PER_PGD);
+#endif
+
+ populate_markers();
+ build_pgtable_complete_mask();
+
+ if (IS_ENABLED(CONFIG_PTDUMP_DEBUGFS))
+ debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops);
+
+ return 0;
+}
+device_initcall(ptdump_init);
diff --git a/arch/powerpc/mm/ptdump/ptdump.h b/arch/powerpc/mm/ptdump/ptdump.h
new file mode 100644
index 000000000000..4232aa4b57ea
--- /dev/null
+++ b/arch/powerpc/mm/ptdump/ptdump.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/types.h>
+#include <linux/seq_file.h>
+
+struct flag_info {
+ u64 mask;
+ u64 val;
+ const char *set;
+ const char *clear;
+ bool is_val;
+ int shift;
+};
+
+struct ptdump_pg_level {
+ const struct flag_info *flag;
+ size_t num;
+ u64 mask;
+};
+
+extern struct ptdump_pg_level pg_level[5];
+
+void pt_dump_size(struct seq_file *m, unsigned long delta);
diff --git a/arch/powerpc/mm/ptdump/segment_regs.c b/arch/powerpc/mm/ptdump/segment_regs.c
new file mode 100644
index 000000000000..9df3af8d481f
--- /dev/null
+++ b/arch/powerpc/mm/ptdump/segment_regs.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright 2018, Christophe Leroy CS S.I.
+ * <christophe.leroy@c-s.fr>
+ *
+ * This dumps the content of Segment Registers
+ */
+
+#include <linux/debugfs.h>
+
+static void seg_show(struct seq_file *m, int i)
+{
+ u32 val = mfsr(i << 28);
+
+ seq_printf(m, "0x%01x0000000-0x%01xfffffff ", i, i);
+ seq_printf(m, "Kern key %d ", (val >> 30) & 1);
+ seq_printf(m, "User key %d ", (val >> 29) & 1);
+ if (val & 0x80000000) {
+ seq_printf(m, "Device 0x%03x", (val >> 20) & 0x1ff);
+ seq_printf(m, "-0x%05x", val & 0xfffff);
+ } else {
+ if (val & 0x10000000)
+ seq_puts(m, "No Exec ");
+ seq_printf(m, "VSID 0x%06x", val & 0xffffff);
+ }
+ seq_puts(m, "\n");
+}
+
+static int sr_show(struct seq_file *m, void *v)
+{
+ int i;
+
+ seq_puts(m, "---[ User Segments ]---\n");
+ for (i = 0; i < TASK_SIZE >> 28; i++)
+ seg_show(m, i);
+
+ seq_puts(m, "\n---[ Kernel Segments ]---\n");
+ for (; i < 16; i++)
+ seg_show(m, i);
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(sr);
+
+static int __init sr_init(void)
+{
+ debugfs_create_file("segment_registers", 0400, arch_debugfs_dir,
+ NULL, &sr_fops);
+ return 0;
+}
+device_initcall(sr_init);
diff --git a/arch/powerpc/mm/ptdump/shared.c b/arch/powerpc/mm/ptdump/shared.c
new file mode 100644
index 000000000000..58998960eb9a
--- /dev/null
+++ b/arch/powerpc/mm/ptdump/shared.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * From split of dump_linuxpagetables.c
+ * Copyright 2016, Rashmica Gupta, IBM Corp.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/pgtable.h>
+
+#include "ptdump.h"
+
+static const struct flag_info flag_array[] = {
+ {
+ .mask = _PAGE_READ,
+ .val = 0,
+ .set = " ",
+ .clear = "r",
+ }, {
+ .mask = _PAGE_WRITE,
+ .val = 0,
+ .set = " ",
+ .clear = "w",
+ }, {
+ .mask = _PAGE_EXEC,
+ .val = _PAGE_EXEC,
+ .set = " X ",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_PRESENT,
+ .val = _PAGE_PRESENT,
+ .set = "present",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_COHERENT,
+ .val = _PAGE_COHERENT,
+ .set = "coherent",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_GUARDED,
+ .val = _PAGE_GUARDED,
+ .set = "guarded",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_DIRTY,
+ .val = _PAGE_DIRTY,
+ .set = "dirty",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_ACCESSED,
+ .val = _PAGE_ACCESSED,
+ .set = "accessed",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_WRITETHRU,
+ .val = _PAGE_WRITETHRU,
+ .set = "write through",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_NO_CACHE,
+ .val = _PAGE_NO_CACHE,
+ .set = "no cache",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_SPECIAL,
+ .val = _PAGE_SPECIAL,
+ .set = "special",
+ }
+};
+
+struct ptdump_pg_level pg_level[5] = {
+ { /* pgd */
+ .flag = flag_array,
+ .num = ARRAY_SIZE(flag_array),
+ }, { /* p4d */
+ .flag = flag_array,
+ .num = ARRAY_SIZE(flag_array),
+ }, { /* pud */
+ .flag = flag_array,
+ .num = ARRAY_SIZE(flag_array),
+ }, { /* pmd */
+ .flag = flag_array,
+ .num = ARRAY_SIZE(flag_array),
+ }, { /* pte */
+ .flag = flag_array,
+ .num = ARRAY_SIZE(flag_array),
+ },
+};
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
deleted file mode 100644
index 0399a6702958..000000000000
--- a/arch/powerpc/mm/slb.c
+++ /dev/null
@@ -1,335 +0,0 @@
-/*
- * PowerPC64 SLB support.
- *
- * Copyright (C) 2004 David Gibson <dwg@au.ibm.com>, IBM
- * Based on earlier code written by:
- * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com
- * Copyright (c) 2001 Dave Engebretsen
- * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
- *
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <asm/pgtable.h>
-#include <asm/mmu.h>
-#include <asm/mmu_context.h>
-#include <asm/paca.h>
-#include <asm/cputable.h>
-#include <asm/cacheflush.h>
-#include <asm/smp.h>
-#include <linux/compiler.h>
-#include <asm/udbg.h>
-#include <asm/code-patching.h>
-
-
-extern void slb_allocate_realmode(unsigned long ea);
-extern void slb_allocate_user(unsigned long ea);
-
-static void slb_allocate(unsigned long ea)
-{
- /* Currently, we do real mode for all SLBs including user, but
- * that will change if we bring back dynamic VSIDs
- */
- slb_allocate_realmode(ea);
-}
-
-#define slb_esid_mask(ssize) \
- (((ssize) == MMU_SEGSIZE_256M)? ESID_MASK: ESID_MASK_1T)
-
-static inline unsigned long mk_esid_data(unsigned long ea, int ssize,
- unsigned long slot)
-{
- return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | slot;
-}
-
-#define slb_vsid_shift(ssize) \
- ((ssize) == MMU_SEGSIZE_256M? SLB_VSID_SHIFT: SLB_VSID_SHIFT_1T)
-
-static inline unsigned long mk_vsid_data(unsigned long ea, int ssize,
- unsigned long flags)
-{
- return (get_kernel_vsid(ea, ssize) << slb_vsid_shift(ssize)) | flags |
- ((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT);
-}
-
-static inline void slb_shadow_update(unsigned long ea, int ssize,
- unsigned long flags,
- unsigned long entry)
-{
- /*
- * Clear the ESID first so the entry is not valid while we are
- * updating it. No write barriers are needed here, provided
- * we only update the current CPU's SLB shadow buffer.
- */
- get_slb_shadow()->save_area[entry].esid = 0;
- get_slb_shadow()->save_area[entry].vsid =
- cpu_to_be64(mk_vsid_data(ea, ssize, flags));
- get_slb_shadow()->save_area[entry].esid =
- cpu_to_be64(mk_esid_data(ea, ssize, entry));
-}
-
-static inline void slb_shadow_clear(unsigned long entry)
-{
- get_slb_shadow()->save_area[entry].esid = 0;
-}
-
-static inline void create_shadowed_slbe(unsigned long ea, int ssize,
- unsigned long flags,
- unsigned long entry)
-{
- /*
- * Updating the shadow buffer before writing the SLB ensures
- * we don't get a stale entry here if we get preempted by PHYP
- * between these two statements.
- */
- slb_shadow_update(ea, ssize, flags, entry);
-
- asm volatile("slbmte %0,%1" :
- : "r" (mk_vsid_data(ea, ssize, flags)),
- "r" (mk_esid_data(ea, ssize, entry))
- : "memory" );
-}
-
-static void __slb_flush_and_rebolt(void)
-{
- /* If you change this make sure you change SLB_NUM_BOLTED
- * and PR KVM appropriately too. */
- unsigned long linear_llp, vmalloc_llp, lflags, vflags;
- unsigned long ksp_esid_data, ksp_vsid_data;
-
- linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
- vmalloc_llp = mmu_psize_defs[mmu_vmalloc_psize].sllp;
- lflags = SLB_VSID_KERNEL | linear_llp;
- vflags = SLB_VSID_KERNEL | vmalloc_llp;
-
- ksp_esid_data = mk_esid_data(get_paca()->kstack, mmu_kernel_ssize, 2);
- if ((ksp_esid_data & ~0xfffffffUL) <= PAGE_OFFSET) {
- ksp_esid_data &= ~SLB_ESID_V;
- ksp_vsid_data = 0;
- slb_shadow_clear(2);
- } else {
- /* Update stack entry; others don't change */
- slb_shadow_update(get_paca()->kstack, mmu_kernel_ssize, lflags, 2);
- ksp_vsid_data =
- be64_to_cpu(get_slb_shadow()->save_area[2].vsid);
- }
-
- /* We need to do this all in asm, so we're sure we don't touch
- * the stack between the slbia and rebolting it. */
- asm volatile("isync\n"
- "slbia\n"
- /* Slot 1 - first VMALLOC segment */
- "slbmte %0,%1\n"
- /* Slot 2 - kernel stack */
- "slbmte %2,%3\n"
- "isync"
- :: "r"(mk_vsid_data(VMALLOC_START, mmu_kernel_ssize, vflags)),
- "r"(mk_esid_data(VMALLOC_START, mmu_kernel_ssize, 1)),
- "r"(ksp_vsid_data),
- "r"(ksp_esid_data)
- : "memory");
-}
-
-void slb_flush_and_rebolt(void)
-{
-
- WARN_ON(!irqs_disabled());
-
- /*
- * We can't take a PMU exception in the following code, so hard
- * disable interrupts.
- */
- hard_irq_disable();
-
- __slb_flush_and_rebolt();
- get_paca()->slb_cache_ptr = 0;
-}
-
-void slb_vmalloc_update(void)
-{
- unsigned long vflags;
-
- vflags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_vmalloc_psize].sllp;
- slb_shadow_update(VMALLOC_START, mmu_kernel_ssize, vflags, 1);
- slb_flush_and_rebolt();
-}
-
-/* Helper function to compare esids. There are four cases to handle.
- * 1. The system is not 1T segment size capable. Use the GET_ESID compare.
- * 2. The system is 1T capable, both addresses are < 1T, use the GET_ESID compare.
- * 3. The system is 1T capable, only one of the two addresses is > 1T. This is not a match.
- * 4. The system is 1T capable, both addresses are > 1T, use the GET_ESID_1T macro to compare.
- */
-static inline int esids_match(unsigned long addr1, unsigned long addr2)
-{
- int esid_1t_count;
-
- /* System is not 1T segment size capable. */
- if (!mmu_has_feature(MMU_FTR_1T_SEGMENT))
- return (GET_ESID(addr1) == GET_ESID(addr2));
-
- esid_1t_count = (((addr1 >> SID_SHIFT_1T) != 0) +
- ((addr2 >> SID_SHIFT_1T) != 0));
-
- /* both addresses are < 1T */
- if (esid_1t_count == 0)
- return (GET_ESID(addr1) == GET_ESID(addr2));
-
- /* One address < 1T, the other > 1T. Not a match */
- if (esid_1t_count == 1)
- return 0;
-
- /* Both addresses are > 1T. */
- return (GET_ESID_1T(addr1) == GET_ESID_1T(addr2));
-}
-
-/* Flush all user entries from the segment table of the current processor. */
-void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
-{
- unsigned long offset;
- unsigned long slbie_data = 0;
- unsigned long pc = KSTK_EIP(tsk);
- unsigned long stack = KSTK_ESP(tsk);
- unsigned long exec_base;
-
- /*
- * We need interrupts hard-disabled here, not just soft-disabled,
- * so that a PMU interrupt can't occur, which might try to access
- * user memory (to get a stack trace) and possible cause an SLB miss
- * which would update the slb_cache/slb_cache_ptr fields in the PACA.
- */
- hard_irq_disable();
- offset = get_paca()->slb_cache_ptr;
- if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) &&
- offset <= SLB_CACHE_ENTRIES) {
- int i;
- asm volatile("isync" : : : "memory");
- for (i = 0; i < offset; i++) {
- slbie_data = (unsigned long)get_paca()->slb_cache[i]
- << SID_SHIFT; /* EA */
- slbie_data |= user_segment_size(slbie_data)
- << SLBIE_SSIZE_SHIFT;
- slbie_data |= SLBIE_C; /* C set for user addresses */
- asm volatile("slbie %0" : : "r" (slbie_data));
- }
- asm volatile("isync" : : : "memory");
- } else {
- __slb_flush_and_rebolt();
- }
-
- /* Workaround POWER5 < DD2.1 issue */
- if (offset == 1 || offset > SLB_CACHE_ENTRIES)
- asm volatile("slbie %0" : : "r" (slbie_data));
-
- get_paca()->slb_cache_ptr = 0;
- get_paca()->context = mm->context;
-
- /*
- * preload some userspace segments into the SLB.
- * Almost all 32 and 64bit PowerPC executables are linked at
- * 0x10000000 so it makes sense to preload this segment.
- */
- exec_base = 0x10000000;
-
- if (is_kernel_addr(pc) || is_kernel_addr(stack) ||
- is_kernel_addr(exec_base))
- return;
-
- slb_allocate(pc);
-
- if (!esids_match(pc, stack))
- slb_allocate(stack);
-
- if (!esids_match(pc, exec_base) &&
- !esids_match(stack, exec_base))
- slb_allocate(exec_base);
-}
-
-static inline void patch_slb_encoding(unsigned int *insn_addr,
- unsigned int immed)
-{
- int insn = (*insn_addr & 0xffff0000) | immed;
- patch_instruction(insn_addr, insn);
-}
-
-extern u32 slb_compare_rr_to_size[];
-extern u32 slb_miss_kernel_load_linear[];
-extern u32 slb_miss_kernel_load_io[];
-extern u32 slb_compare_rr_to_size[];
-extern u32 slb_miss_kernel_load_vmemmap[];
-
-void slb_set_size(u16 size)
-{
- if (mmu_slb_size == size)
- return;
-
- mmu_slb_size = size;
- patch_slb_encoding(slb_compare_rr_to_size, mmu_slb_size);
-}
-
-void slb_initialize(void)
-{
- unsigned long linear_llp, vmalloc_llp, io_llp;
- unsigned long lflags, vflags;
- static int slb_encoding_inited;
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
- unsigned long vmemmap_llp;
-#endif
-
- /* Prepare our SLB miss handler based on our page size */
- linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
- io_llp = mmu_psize_defs[mmu_io_psize].sllp;
- vmalloc_llp = mmu_psize_defs[mmu_vmalloc_psize].sllp;
- get_paca()->vmalloc_sllp = SLB_VSID_KERNEL | vmalloc_llp;
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
- vmemmap_llp = mmu_psize_defs[mmu_vmemmap_psize].sllp;
-#endif
- if (!slb_encoding_inited) {
- slb_encoding_inited = 1;
- patch_slb_encoding(slb_miss_kernel_load_linear,
- SLB_VSID_KERNEL | linear_llp);
- patch_slb_encoding(slb_miss_kernel_load_io,
- SLB_VSID_KERNEL | io_llp);
- patch_slb_encoding(slb_compare_rr_to_size,
- mmu_slb_size);
-
- pr_devel("SLB: linear LLP = %04lx\n", linear_llp);
- pr_devel("SLB: io LLP = %04lx\n", io_llp);
-
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
- patch_slb_encoding(slb_miss_kernel_load_vmemmap,
- SLB_VSID_KERNEL | vmemmap_llp);
- pr_devel("SLB: vmemmap LLP = %04lx\n", vmemmap_llp);
-#endif
- }
-
- get_paca()->stab_rr = SLB_NUM_BOLTED;
-
- lflags = SLB_VSID_KERNEL | linear_llp;
- vflags = SLB_VSID_KERNEL | vmalloc_llp;
-
- /* Invalidate the entire SLB (even slot 0) & all the ERATS */
- asm volatile("isync":::"memory");
- asm volatile("slbmte %0,%0"::"r" (0) : "memory");
- asm volatile("isync; slbia; isync":::"memory");
- create_shadowed_slbe(PAGE_OFFSET, mmu_kernel_ssize, lflags, 0);
-
- create_shadowed_slbe(VMALLOC_START, mmu_kernel_ssize, vflags, 1);
-
- /* For the boot cpu, we're running on the stack in init_thread_union,
- * which is in the first segment of the linear mapping, and also
- * get_paca()->kstack hasn't been initialized yet.
- * For secondary cpus, we need to bolt the kernel stack entry now.
- */
- slb_shadow_clear(2);
- if (raw_smp_processor_id() != boot_cpuid &&
- (get_paca()->kstack & slb_esid_mask(mmu_kernel_ssize)) > PAGE_OFFSET)
- create_shadowed_slbe(get_paca()->kstack,
- mmu_kernel_ssize, lflags, 2);
-
- asm volatile("isync":::"memory");
-}
diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S
deleted file mode 100644
index 736d18b3cefd..000000000000
--- a/arch/powerpc/mm/slb_low.S
+++ /dev/null
@@ -1,321 +0,0 @@
-/*
- * Low-level SLB routines
- *
- * Copyright (C) 2004 David Gibson <dwg@au.ibm.com>, IBM
- *
- * Based on earlier C version:
- * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com
- * Copyright (c) 2001 Dave Engebretsen
- * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <asm/processor.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/cputable.h>
-#include <asm/page.h>
-#include <asm/mmu.h>
-#include <asm/pgtable.h>
-#include <asm/firmware.h>
-
-/* void slb_allocate_realmode(unsigned long ea);
- *
- * Create an SLB entry for the given EA (user or kernel).
- * r3 = faulting address, r13 = PACA
- * r9, r10, r11 are clobbered by this function
- * No other registers are examined or changed.
- */
-_GLOBAL(slb_allocate_realmode)
- /*
- * check for bad kernel/user address
- * (ea & ~REGION_MASK) >= PGTABLE_RANGE
- */
- rldicr. r9,r3,4,(63 - PGTABLE_EADDR_SIZE - 4)
- bne- 8f
-
- srdi r9,r3,60 /* get region */
- srdi r10,r3,SID_SHIFT /* get esid */
- cmpldi cr7,r9,0xc /* cmp PAGE_OFFSET for later use */
-
- /* r3 = address, r10 = esid, cr7 = <> PAGE_OFFSET */
- blt cr7,0f /* user or kernel? */
-
- /* kernel address: proto-VSID = ESID */
- /* WARNING - MAGIC: we don't use the VSID 0xfffffffff, but
- * this code will generate the protoVSID 0xfffffffff for the
- * top segment. That's ok, the scramble below will translate
- * it to VSID 0, which is reserved as a bad VSID - one which
- * will never have any pages in it. */
-
- /* Check if hitting the linear mapping or some other kernel space
- */
- bne cr7,1f
-
- /* Linear mapping encoding bits, the "li" instruction below will
- * be patched by the kernel at boot
- */
-.globl slb_miss_kernel_load_linear
-slb_miss_kernel_load_linear:
- li r11,0
- /*
- * context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1
- * r9 = region id.
- */
- addis r9,r9,(MAX_USER_CONTEXT - 0xc + 1)@ha
- addi r9,r9,(MAX_USER_CONTEXT - 0xc + 1)@l
-
-
-BEGIN_FTR_SECTION
- b slb_finish_load
-END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
- b slb_finish_load_1T
-
-1:
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
- /* Check virtual memmap region. To be patches at kernel boot */
- cmpldi cr0,r9,0xf
- bne 1f
-.globl slb_miss_kernel_load_vmemmap
-slb_miss_kernel_load_vmemmap:
- li r11,0
- b 6f
-1:
-#endif /* CONFIG_SPARSEMEM_VMEMMAP */
-
- /* vmalloc mapping gets the encoding from the PACA as the mapping
- * can be demoted from 64K -> 4K dynamically on some machines
- */
- clrldi r11,r10,48
- cmpldi r11,(VMALLOC_SIZE >> 28) - 1
- bgt 5f
- lhz r11,PACAVMALLOCSLLP(r13)
- b 6f
-5:
- /* IO mapping */
-.globl slb_miss_kernel_load_io
-slb_miss_kernel_load_io:
- li r11,0
-6:
- /*
- * context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1
- * r9 = region id.
- */
- addis r9,r9,(MAX_USER_CONTEXT - 0xc + 1)@ha
- addi r9,r9,(MAX_USER_CONTEXT - 0xc + 1)@l
-
-BEGIN_FTR_SECTION
- b slb_finish_load
-END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
- b slb_finish_load_1T
-
-0:
- /* when using slices, we extract the psize off the slice bitmaps
- * and then we need to get the sllp encoding off the mmu_psize_defs
- * array.
- *
- * XXX This is a bit inefficient especially for the normal case,
- * so we should try to implement a fast path for the standard page
- * size using the old sllp value so we avoid the array. We cannot
- * really do dynamic patching unfortunately as processes might flip
- * between 4k and 64k standard page size
- */
-#ifdef CONFIG_PPC_MM_SLICES
- /* r10 have esid */
- cmpldi r10,16
- /* below SLICE_LOW_TOP */
- blt 5f
- /*
- * Handle hpsizes,
- * r9 is get_paca()->context.high_slices_psize[index], r11 is mask_index
- */
- srdi r11,r10,(SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT + 1) /* index */
- addi r9,r11,PACAHIGHSLICEPSIZE
- lbzx r9,r13,r9 /* r9 is hpsizes[r11] */
- /* r11 = (r10 >> (SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT)) & 0x1 */
- rldicl r11,r10,(64 - (SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT)),63
- b 6f
-
-5:
- /*
- * Handle lpsizes
- * r9 is get_paca()->context.low_slices_psize, r11 is index
- */
- ld r9,PACALOWSLICESPSIZE(r13)
- mr r11,r10
-6:
- sldi r11,r11,2 /* index * 4 */
- /* Extract the psize and multiply to get an array offset */
- srd r9,r9,r11
- andi. r9,r9,0xf
- mulli r9,r9,MMUPSIZEDEFSIZE
-
- /* Now get to the array and obtain the sllp
- */
- ld r11,PACATOC(r13)
- ld r11,mmu_psize_defs@got(r11)
- add r11,r11,r9
- ld r11,MMUPSIZESLLP(r11)
- ori r11,r11,SLB_VSID_USER
-#else
- /* paca context sllp already contains the SLB_VSID_USER bits */
- lhz r11,PACACONTEXTSLLP(r13)
-#endif /* CONFIG_PPC_MM_SLICES */
-
- ld r9,PACACONTEXTID(r13)
-BEGIN_FTR_SECTION
- cmpldi r10,0x1000
- bge slb_finish_load_1T
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
- b slb_finish_load
-
-8: /* invalid EA */
- li r10,0 /* BAD_VSID */
- li r9,0 /* BAD_VSID */
- li r11,SLB_VSID_USER /* flags don't much matter */
- b slb_finish_load
-
-#ifdef __DISABLED__
-
-/* void slb_allocate_user(unsigned long ea);
- *
- * Create an SLB entry for the given EA (user or kernel).
- * r3 = faulting address, r13 = PACA
- * r9, r10, r11 are clobbered by this function
- * No other registers are examined or changed.
- *
- * It is called with translation enabled in order to be able to walk the
- * page tables. This is not currently used.
- */
-_GLOBAL(slb_allocate_user)
- /* r3 = faulting address */
- srdi r10,r3,28 /* get esid */
-
- crset 4*cr7+lt /* set "user" flag for later */
-
- /* check if we fit in the range covered by the pagetables*/
- srdi. r9,r3,PGTABLE_EADDR_SIZE
- crnot 4*cr0+eq,4*cr0+eq
- beqlr
-
- /* now we need to get to the page tables in order to get the page
- * size encoding from the PMD. In the future, we'll be able to deal
- * with 1T segments too by getting the encoding from the PGD instead
- */
- ld r9,PACAPGDIR(r13)
- cmpldi cr0,r9,0
- beqlr
- rlwinm r11,r10,8,25,28
- ldx r9,r9,r11 /* get pgd_t */
- cmpldi cr0,r9,0
- beqlr
- rlwinm r11,r10,3,17,28
- ldx r9,r9,r11 /* get pmd_t */
- cmpldi cr0,r9,0
- beqlr
-
- /* build vsid flags */
- andi. r11,r9,SLB_VSID_LLP
- ori r11,r11,SLB_VSID_USER
-
- /* get context to calculate proto-VSID */
- ld r9,PACACONTEXTID(r13)
- /* fall through slb_finish_load */
-
-#endif /* __DISABLED__ */
-
-
-/*
- * Finish loading of an SLB entry and return
- *
- * r3 = EA, r9 = context, r10 = ESID, r11 = flags, clobbers r9, cr7 = <> PAGE_OFFSET
- */
-slb_finish_load:
- rldimi r10,r9,ESID_BITS,0
- ASM_VSID_SCRAMBLE(r10,r9,256M)
- /*
- * bits above VSID_BITS_256M need to be ignored from r10
- * also combine VSID and flags
- */
- rldimi r11,r10,SLB_VSID_SHIFT,(64 - (SLB_VSID_SHIFT + VSID_BITS_256M))
-
- /* r3 = EA, r11 = VSID data */
- /*
- * Find a slot, round robin. Previously we tried to find a
- * free slot first but that took too long. Unfortunately we
- * dont have any LRU information to help us choose a slot.
- */
-
-7: ld r10,PACASTABRR(r13)
- addi r10,r10,1
- /* This gets soft patched on boot. */
-.globl slb_compare_rr_to_size
-slb_compare_rr_to_size:
- cmpldi r10,0
-
- blt+ 4f
- li r10,SLB_NUM_BOLTED
-
-4:
- std r10,PACASTABRR(r13)
-
-3:
- rldimi r3,r10,0,36 /* r3= EA[0:35] | entry */
- oris r10,r3,SLB_ESID_V@h /* r3 |= SLB_ESID_V */
-
- /* r3 = ESID data, r11 = VSID data */
-
- /*
- * No need for an isync before or after this slbmte. The exception
- * we enter with and the rfid we exit with are context synchronizing.
- */
- slbmte r11,r10
-
- /* we're done for kernel addresses */
- crclr 4*cr0+eq /* set result to "success" */
- bgelr cr7
-
- /* Update the slb cache */
- lhz r3,PACASLBCACHEPTR(r13) /* offset = paca->slb_cache_ptr */
- cmpldi r3,SLB_CACHE_ENTRIES
- bge 1f
-
- /* still room in the slb cache */
- sldi r11,r3,2 /* r11 = offset * sizeof(u32) */
- srdi r10,r10,28 /* get the 36 bits of the ESID */
- add r11,r11,r13 /* r11 = (u32 *)paca + offset */
- stw r10,PACASLBCACHE(r11) /* paca->slb_cache[offset] = esid */
- addi r3,r3,1 /* offset++ */
- b 2f
-1: /* offset >= SLB_CACHE_ENTRIES */
- li r3,SLB_CACHE_ENTRIES+1
-2:
- sth r3,PACASLBCACHEPTR(r13) /* paca->slb_cache_ptr = offset */
- crclr 4*cr0+eq /* set result to "success" */
- blr
-
-/*
- * Finish loading of a 1T SLB entry (for the kernel linear mapping) and return.
- *
- * r3 = EA, r9 = context, r10 = ESID(256MB), r11 = flags, clobbers r9
- */
-slb_finish_load_1T:
- srdi r10,r10,(SID_SHIFT_1T - SID_SHIFT) /* get 1T ESID */
- rldimi r10,r9,ESID_BITS_1T,0
- ASM_VSID_SCRAMBLE(r10,r9,1T)
- /*
- * bits above VSID_BITS_1T need to be ignored from r10
- * also combine VSID and flags
- */
- rldimi r11,r10,SLB_VSID_SHIFT_1T,(64 - (SLB_VSID_SHIFT_1T + VSID_BITS_1T))
- li r10,MMU_SEGSIZE_1T
- rldimi r11,r10,SLB_VSID_SSIZE_SHIFT,0 /* insert segment size */
-
- /* r3 = EA, r11 = VSID data */
- clrrdi r3,r3,SID_SHIFT_1T /* clear out non-ESID bits */
- b 7b
-
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
deleted file mode 100644
index b0c75cc15efc..000000000000
--- a/arch/powerpc/mm/slice.c
+++ /dev/null
@@ -1,731 +0,0 @@
-/*
- * address space "slices" (meta-segments) support
- *
- * Copyright (C) 2007 Benjamin Herrenschmidt, IBM Corporation.
- *
- * Based on hugetlb implementation
- *
- * Copyright (C) 2003 David Gibson, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#undef DEBUG
-
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/pagemap.h>
-#include <linux/err.h>
-#include <linux/spinlock.h>
-#include <linux/export.h>
-#include <asm/mman.h>
-#include <asm/mmu.h>
-#include <asm/spu.h>
-
-/* some sanity checks */
-#if (PGTABLE_RANGE >> 43) > SLICE_MASK_SIZE
-#error PGTABLE_RANGE exceeds slice_mask high_slices size
-#endif
-
-static DEFINE_SPINLOCK(slice_convert_lock);
-
-
-#ifdef DEBUG
-int _slice_debug = 1;
-
-static void slice_print_mask(const char *label, struct slice_mask mask)
-{
- char *p, buf[16 + 3 + 64 + 1];
- int i;
-
- if (!_slice_debug)
- return;
- p = buf;
- for (i = 0; i < SLICE_NUM_LOW; i++)
- *(p++) = (mask.low_slices & (1 << i)) ? '1' : '0';
- *(p++) = ' ';
- *(p++) = '-';
- *(p++) = ' ';
- for (i = 0; i < SLICE_NUM_HIGH; i++)
- *(p++) = (mask.high_slices & (1ul << i)) ? '1' : '0';
- *(p++) = 0;
-
- printk(KERN_DEBUG "%s:%s\n", label, buf);
-}
-
-#define slice_dbg(fmt...) do { if (_slice_debug) pr_debug(fmt); } while(0)
-
-#else
-
-static void slice_print_mask(const char *label, struct slice_mask mask) {}
-#define slice_dbg(fmt...)
-
-#endif
-
-static struct slice_mask slice_range_to_mask(unsigned long start,
- unsigned long len)
-{
- unsigned long end = start + len - 1;
- struct slice_mask ret = { 0, 0 };
-
- if (start < SLICE_LOW_TOP) {
- unsigned long mend = min(end, SLICE_LOW_TOP);
- unsigned long mstart = min(start, SLICE_LOW_TOP);
-
- ret.low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1))
- - (1u << GET_LOW_SLICE_INDEX(mstart));
- }
-
- if ((start + len) > SLICE_LOW_TOP)
- ret.high_slices = (1ul << (GET_HIGH_SLICE_INDEX(end) + 1))
- - (1ul << GET_HIGH_SLICE_INDEX(start));
-
- return ret;
-}
-
-static int slice_area_is_free(struct mm_struct *mm, unsigned long addr,
- unsigned long len)
-{
- struct vm_area_struct *vma;
-
- if ((mm->task_size - len) < addr)
- return 0;
- vma = find_vma(mm, addr);
- return (!vma || (addr + len) <= vma->vm_start);
-}
-
-static int slice_low_has_vma(struct mm_struct *mm, unsigned long slice)
-{
- return !slice_area_is_free(mm, slice << SLICE_LOW_SHIFT,
- 1ul << SLICE_LOW_SHIFT);
-}
-
-static int slice_high_has_vma(struct mm_struct *mm, unsigned long slice)
-{
- unsigned long start = slice << SLICE_HIGH_SHIFT;
- unsigned long end = start + (1ul << SLICE_HIGH_SHIFT);
-
- /* Hack, so that each addresses is controlled by exactly one
- * of the high or low area bitmaps, the first high area starts
- * at 4GB, not 0 */
- if (start == 0)
- start = SLICE_LOW_TOP;
-
- return !slice_area_is_free(mm, start, end - start);
-}
-
-static struct slice_mask slice_mask_for_free(struct mm_struct *mm)
-{
- struct slice_mask ret = { 0, 0 };
- unsigned long i;
-
- for (i = 0; i < SLICE_NUM_LOW; i++)
- if (!slice_low_has_vma(mm, i))
- ret.low_slices |= 1u << i;
-
- if (mm->task_size <= SLICE_LOW_TOP)
- return ret;
-
- for (i = 0; i < SLICE_NUM_HIGH; i++)
- if (!slice_high_has_vma(mm, i))
- ret.high_slices |= 1ul << i;
-
- return ret;
-}
-
-static struct slice_mask slice_mask_for_size(struct mm_struct *mm, int psize)
-{
- unsigned char *hpsizes;
- int index, mask_index;
- struct slice_mask ret = { 0, 0 };
- unsigned long i;
- u64 lpsizes;
-
- lpsizes = mm->context.low_slices_psize;
- for (i = 0; i < SLICE_NUM_LOW; i++)
- if (((lpsizes >> (i * 4)) & 0xf) == psize)
- ret.low_slices |= 1u << i;
-
- hpsizes = mm->context.high_slices_psize;
- for (i = 0; i < SLICE_NUM_HIGH; i++) {
- mask_index = i & 0x1;
- index = i >> 1;
- if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == psize)
- ret.high_slices |= 1ul << i;
- }
-
- return ret;
-}
-
-static int slice_check_fit(struct slice_mask mask, struct slice_mask available)
-{
- return (mask.low_slices & available.low_slices) == mask.low_slices &&
- (mask.high_slices & available.high_slices) == mask.high_slices;
-}
-
-static void slice_flush_segments(void *parm)
-{
- struct mm_struct *mm = parm;
- unsigned long flags;
-
- if (mm != current->active_mm)
- return;
-
- /* update the paca copy of the context struct */
- get_paca()->context = current->active_mm->context;
-
- local_irq_save(flags);
- slb_flush_and_rebolt();
- local_irq_restore(flags);
-}
-
-static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psize)
-{
- int index, mask_index;
- /* Write the new slice psize bits */
- unsigned char *hpsizes;
- u64 lpsizes;
- unsigned long i, flags;
-
- slice_dbg("slice_convert(mm=%p, psize=%d)\n", mm, psize);
- slice_print_mask(" mask", mask);
-
- /* We need to use a spinlock here to protect against
- * concurrent 64k -> 4k demotion ...
- */
- spin_lock_irqsave(&slice_convert_lock, flags);
-
- lpsizes = mm->context.low_slices_psize;
- for (i = 0; i < SLICE_NUM_LOW; i++)
- if (mask.low_slices & (1u << i))
- lpsizes = (lpsizes & ~(0xful << (i * 4))) |
- (((unsigned long)psize) << (i * 4));
-
- /* Assign the value back */
- mm->context.low_slices_psize = lpsizes;
-
- hpsizes = mm->context.high_slices_psize;
- for (i = 0; i < SLICE_NUM_HIGH; i++) {
- mask_index = i & 0x1;
- index = i >> 1;
- if (mask.high_slices & (1ul << i))
- hpsizes[index] = (hpsizes[index] &
- ~(0xf << (mask_index * 4))) |
- (((unsigned long)psize) << (mask_index * 4));
- }
-
- slice_dbg(" lsps=%lx, hsps=%lx\n",
- mm->context.low_slices_psize,
- mm->context.high_slices_psize);
-
- spin_unlock_irqrestore(&slice_convert_lock, flags);
-
-#ifdef CONFIG_SPU_BASE
- spu_flush_all_slbs(mm);
-#endif
-}
-
-/*
- * Compute which slice addr is part of;
- * set *boundary_addr to the start or end boundary of that slice
- * (depending on 'end' parameter);
- * return boolean indicating if the slice is marked as available in the
- * 'available' slice_mark.
- */
-static bool slice_scan_available(unsigned long addr,
- struct slice_mask available,
- int end,
- unsigned long *boundary_addr)
-{
- unsigned long slice;
- if (addr < SLICE_LOW_TOP) {
- slice = GET_LOW_SLICE_INDEX(addr);
- *boundary_addr = (slice + end) << SLICE_LOW_SHIFT;
- return !!(available.low_slices & (1u << slice));
- } else {
- slice = GET_HIGH_SLICE_INDEX(addr);
- *boundary_addr = (slice + end) ?
- ((slice + end) << SLICE_HIGH_SHIFT) : SLICE_LOW_TOP;
- return !!(available.high_slices & (1ul << slice));
- }
-}
-
-static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
- unsigned long len,
- struct slice_mask available,
- int psize)
-{
- int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
- unsigned long addr, found, next_end;
- struct vm_unmapped_area_info info;
-
- info.flags = 0;
- info.length = len;
- info.align_mask = PAGE_MASK & ((1ul << pshift) - 1);
- info.align_offset = 0;
-
- addr = TASK_UNMAPPED_BASE;
- while (addr < TASK_SIZE) {
- info.low_limit = addr;
- if (!slice_scan_available(addr, available, 1, &addr))
- continue;
-
- next_slice:
- /*
- * At this point [info.low_limit; addr) covers
- * available slices only and ends at a slice boundary.
- * Check if we need to reduce the range, or if we can
- * extend it to cover the next available slice.
- */
- if (addr >= TASK_SIZE)
- addr = TASK_SIZE;
- else if (slice_scan_available(addr, available, 1, &next_end)) {
- addr = next_end;
- goto next_slice;
- }
- info.high_limit = addr;
-
- found = vm_unmapped_area(&info);
- if (!(found & ~PAGE_MASK))
- return found;
- }
-
- return -ENOMEM;
-}
-
-static unsigned long slice_find_area_topdown(struct mm_struct *mm,
- unsigned long len,
- struct slice_mask available,
- int psize)
-{
- int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
- unsigned long addr, found, prev;
- struct vm_unmapped_area_info info;
-
- info.flags = VM_UNMAPPED_AREA_TOPDOWN;
- info.length = len;
- info.align_mask = PAGE_MASK & ((1ul << pshift) - 1);
- info.align_offset = 0;
-
- addr = mm->mmap_base;
- while (addr > PAGE_SIZE) {
- info.high_limit = addr;
- if (!slice_scan_available(addr - 1, available, 0, &addr))
- continue;
-
- prev_slice:
- /*
- * At this point [addr; info.high_limit) covers
- * available slices only and starts at a slice boundary.
- * Check if we need to reduce the range, or if we can
- * extend it to cover the previous available slice.
- */
- if (addr < PAGE_SIZE)
- addr = PAGE_SIZE;
- else if (slice_scan_available(addr - 1, available, 0, &prev)) {
- addr = prev;
- goto prev_slice;
- }
- info.low_limit = addr;
-
- found = vm_unmapped_area(&info);
- if (!(found & ~PAGE_MASK))
- return found;
- }
-
- /*
- * A failed mmap() very likely causes application failure,
- * so fall back to the bottom-up function here. This scenario
- * can happen with large stack limits and large mmap()
- * allocations.
- */
- return slice_find_area_bottomup(mm, len, available, psize);
-}
-
-
-static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len,
- struct slice_mask mask, int psize,
- int topdown)
-{
- if (topdown)
- return slice_find_area_topdown(mm, len, mask, psize);
- else
- return slice_find_area_bottomup(mm, len, mask, psize);
-}
-
-#define or_mask(dst, src) do { \
- (dst).low_slices |= (src).low_slices; \
- (dst).high_slices |= (src).high_slices; \
-} while (0)
-
-#define andnot_mask(dst, src) do { \
- (dst).low_slices &= ~(src).low_slices; \
- (dst).high_slices &= ~(src).high_slices; \
-} while (0)
-
-#ifdef CONFIG_PPC_64K_PAGES
-#define MMU_PAGE_BASE MMU_PAGE_64K
-#else
-#define MMU_PAGE_BASE MMU_PAGE_4K
-#endif
-
-unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
- unsigned long flags, unsigned int psize,
- int topdown)
-{
- struct slice_mask mask = {0, 0};
- struct slice_mask good_mask;
- struct slice_mask potential_mask = {0,0} /* silence stupid warning */;
- struct slice_mask compat_mask = {0, 0};
- int fixed = (flags & MAP_FIXED);
- int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
- struct mm_struct *mm = current->mm;
- unsigned long newaddr;
-
- /* Sanity checks */
- BUG_ON(mm->task_size == 0);
-
- slice_dbg("slice_get_unmapped_area(mm=%p, psize=%d...\n", mm, psize);
- slice_dbg(" addr=%lx, len=%lx, flags=%lx, topdown=%d\n",
- addr, len, flags, topdown);
-
- if (len > mm->task_size)
- return -ENOMEM;
- if (len & ((1ul << pshift) - 1))
- return -EINVAL;
- if (fixed && (addr & ((1ul << pshift) - 1)))
- return -EINVAL;
- if (fixed && addr > (mm->task_size - len))
- return -ENOMEM;
-
- /* If hint, make sure it matches our alignment restrictions */
- if (!fixed && addr) {
- addr = _ALIGN_UP(addr, 1ul << pshift);
- slice_dbg(" aligned addr=%lx\n", addr);
- /* Ignore hint if it's too large or overlaps a VMA */
- if (addr > mm->task_size - len ||
- !slice_area_is_free(mm, addr, len))
- addr = 0;
- }
-
- /* First make up a "good" mask of slices that have the right size
- * already
- */
- good_mask = slice_mask_for_size(mm, psize);
- slice_print_mask(" good_mask", good_mask);
-
- /*
- * Here "good" means slices that are already the right page size,
- * "compat" means slices that have a compatible page size (i.e.
- * 4k in a 64k pagesize kernel), and "free" means slices without
- * any VMAs.
- *
- * If MAP_FIXED:
- * check if fits in good | compat => OK
- * check if fits in good | compat | free => convert free
- * else bad
- * If have hint:
- * check if hint fits in good => OK
- * check if hint fits in good | free => convert free
- * Otherwise:
- * search in good, found => OK
- * search in good | free, found => convert free
- * search in good | compat | free, found => convert free.
- */
-
-#ifdef CONFIG_PPC_64K_PAGES
- /* If we support combo pages, we can allow 64k pages in 4k slices */
- if (psize == MMU_PAGE_64K) {
- compat_mask = slice_mask_for_size(mm, MMU_PAGE_4K);
- if (fixed)
- or_mask(good_mask, compat_mask);
- }
-#endif
-
- /* First check hint if it's valid or if we have MAP_FIXED */
- if (addr != 0 || fixed) {
- /* Build a mask for the requested range */
- mask = slice_range_to_mask(addr, len);
- slice_print_mask(" mask", mask);
-
- /* Check if we fit in the good mask. If we do, we just return,
- * nothing else to do
- */
- if (slice_check_fit(mask, good_mask)) {
- slice_dbg(" fits good !\n");
- return addr;
- }
- } else {
- /* Now let's see if we can find something in the existing
- * slices for that size
- */
- newaddr = slice_find_area(mm, len, good_mask, psize, topdown);
- if (newaddr != -ENOMEM) {
- /* Found within the good mask, we don't have to setup,
- * we thus return directly
- */
- slice_dbg(" found area at 0x%lx\n", newaddr);
- return newaddr;
- }
- }
-
- /* We don't fit in the good mask, check what other slices are
- * empty and thus can be converted
- */
- potential_mask = slice_mask_for_free(mm);
- or_mask(potential_mask, good_mask);
- slice_print_mask(" potential", potential_mask);
-
- if ((addr != 0 || fixed) && slice_check_fit(mask, potential_mask)) {
- slice_dbg(" fits potential !\n");
- goto convert;
- }
-
- /* If we have MAP_FIXED and failed the above steps, then error out */
- if (fixed)
- return -EBUSY;
-
- slice_dbg(" search...\n");
-
- /* If we had a hint that didn't work out, see if we can fit
- * anywhere in the good area.
- */
- if (addr) {
- addr = slice_find_area(mm, len, good_mask, psize, topdown);
- if (addr != -ENOMEM) {
- slice_dbg(" found area at 0x%lx\n", addr);
- return addr;
- }
- }
-
- /* Now let's see if we can find something in the existing slices
- * for that size plus free slices
- */
- addr = slice_find_area(mm, len, potential_mask, psize, topdown);
-
-#ifdef CONFIG_PPC_64K_PAGES
- if (addr == -ENOMEM && psize == MMU_PAGE_64K) {
- /* retry the search with 4k-page slices included */
- or_mask(potential_mask, compat_mask);
- addr = slice_find_area(mm, len, potential_mask, psize,
- topdown);
- }
-#endif
-
- if (addr == -ENOMEM)
- return -ENOMEM;
-
- mask = slice_range_to_mask(addr, len);
- slice_dbg(" found potential area at 0x%lx\n", addr);
- slice_print_mask(" mask", mask);
-
- convert:
- andnot_mask(mask, good_mask);
- andnot_mask(mask, compat_mask);
- if (mask.low_slices || mask.high_slices) {
- slice_convert(mm, mask, psize);
- if (psize > MMU_PAGE_BASE)
- on_each_cpu(slice_flush_segments, mm, 1);
- }
- return addr;
-
-}
-EXPORT_SYMBOL_GPL(slice_get_unmapped_area);
-
-unsigned long arch_get_unmapped_area(struct file *filp,
- unsigned long addr,
- unsigned long len,
- unsigned long pgoff,
- unsigned long flags)
-{
- return slice_get_unmapped_area(addr, len, flags,
- current->mm->context.user_psize, 0);
-}
-
-unsigned long arch_get_unmapped_area_topdown(struct file *filp,
- const unsigned long addr0,
- const unsigned long len,
- const unsigned long pgoff,
- const unsigned long flags)
-{
- return slice_get_unmapped_area(addr0, len, flags,
- current->mm->context.user_psize, 1);
-}
-
-unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
-{
- unsigned char *hpsizes;
- int index, mask_index;
-
- if (addr < SLICE_LOW_TOP) {
- u64 lpsizes;
- lpsizes = mm->context.low_slices_psize;
- index = GET_LOW_SLICE_INDEX(addr);
- return (lpsizes >> (index * 4)) & 0xf;
- }
- hpsizes = mm->context.high_slices_psize;
- index = GET_HIGH_SLICE_INDEX(addr);
- mask_index = index & 0x1;
- return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xf;
-}
-EXPORT_SYMBOL_GPL(get_slice_psize);
-
-/*
- * This is called by hash_page when it needs to do a lazy conversion of
- * an address space from real 64K pages to combo 4K pages (typically
- * when hitting a non cacheable mapping on a processor or hypervisor
- * that won't allow them for 64K pages).
- *
- * This is also called in init_new_context() to change back the user
- * psize from whatever the parent context had it set to
- * N.B. This may be called before mm->context.id has been set.
- *
- * This function will only change the content of the {low,high)_slice_psize
- * masks, it will not flush SLBs as this shall be handled lazily by the
- * caller.
- */
-void slice_set_user_psize(struct mm_struct *mm, unsigned int psize)
-{
- int index, mask_index;
- unsigned char *hpsizes;
- unsigned long flags, lpsizes;
- unsigned int old_psize;
- int i;
-
- slice_dbg("slice_set_user_psize(mm=%p, psize=%d)\n", mm, psize);
-
- spin_lock_irqsave(&slice_convert_lock, flags);
-
- old_psize = mm->context.user_psize;
- slice_dbg(" old_psize=%d\n", old_psize);
- if (old_psize == psize)
- goto bail;
-
- mm->context.user_psize = psize;
- wmb();
-
- lpsizes = mm->context.low_slices_psize;
- for (i = 0; i < SLICE_NUM_LOW; i++)
- if (((lpsizes >> (i * 4)) & 0xf) == old_psize)
- lpsizes = (lpsizes & ~(0xful << (i * 4))) |
- (((unsigned long)psize) << (i * 4));
- /* Assign the value back */
- mm->context.low_slices_psize = lpsizes;
-
- hpsizes = mm->context.high_slices_psize;
- for (i = 0; i < SLICE_NUM_HIGH; i++) {
- mask_index = i & 0x1;
- index = i >> 1;
- if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == old_psize)
- hpsizes[index] = (hpsizes[index] &
- ~(0xf << (mask_index * 4))) |
- (((unsigned long)psize) << (mask_index * 4));
- }
-
-
-
-
- slice_dbg(" lsps=%lx, hsps=%lx\n",
- mm->context.low_slices_psize,
- mm->context.high_slices_psize);
-
- bail:
- spin_unlock_irqrestore(&slice_convert_lock, flags);
-}
-
-void slice_set_psize(struct mm_struct *mm, unsigned long address,
- unsigned int psize)
-{
- unsigned char *hpsizes;
- unsigned long i, flags;
- u64 *lpsizes;
-
- spin_lock_irqsave(&slice_convert_lock, flags);
- if (address < SLICE_LOW_TOP) {
- i = GET_LOW_SLICE_INDEX(address);
- lpsizes = &mm->context.low_slices_psize;
- *lpsizes = (*lpsizes & ~(0xful << (i * 4))) |
- ((unsigned long) psize << (i * 4));
- } else {
- int index, mask_index;
- i = GET_HIGH_SLICE_INDEX(address);
- hpsizes = mm->context.high_slices_psize;
- mask_index = i & 0x1;
- index = i >> 1;
- hpsizes[index] = (hpsizes[index] &
- ~(0xf << (mask_index * 4))) |
- (((unsigned long)psize) << (mask_index * 4));
- }
-
- spin_unlock_irqrestore(&slice_convert_lock, flags);
-
-#ifdef CONFIG_SPU_BASE
- spu_flush_all_slbs(mm);
-#endif
-}
-
-void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
- unsigned long len, unsigned int psize)
-{
- struct slice_mask mask = slice_range_to_mask(start, len);
-
- slice_convert(mm, mask, psize);
-}
-
-/*
- * is_hugepage_only_range() is used by generic code to verify whether
- * a normal mmap mapping (non hugetlbfs) is valid on a given area.
- *
- * until the generic code provides a more generic hook and/or starts
- * calling arch get_unmapped_area for MAP_FIXED (which our implementation
- * here knows how to deal with), we hijack it to keep standard mappings
- * away from us.
- *
- * because of that generic code limitation, MAP_FIXED mapping cannot
- * "convert" back a slice with no VMAs to the standard page size, only
- * get_unmapped_area() can. It would be possible to fix it here but I
- * prefer working on fixing the generic code instead.
- *
- * WARNING: This will not work if hugetlbfs isn't enabled since the
- * generic code will redefine that function as 0 in that. This is ok
- * for now as we only use slices with hugetlbfs enabled. This should
- * be fixed as the generic code gets fixed.
- */
-int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
- unsigned long len)
-{
- struct slice_mask mask, available;
- unsigned int psize = mm->context.user_psize;
-
- mask = slice_range_to_mask(addr, len);
- available = slice_mask_for_size(mm, psize);
-#ifdef CONFIG_PPC_64K_PAGES
- /* We need to account for 4k slices too */
- if (psize == MMU_PAGE_64K) {
- struct slice_mask compat_mask;
- compat_mask = slice_mask_for_size(mm, MMU_PAGE_4K);
- or_mask(available, compat_mask);
- }
-#endif
-
-#if 0 /* too verbose */
- slice_dbg("is_hugepage_only_range(mm=%p, addr=%lx, len=%lx)\n",
- mm, addr, len);
- slice_print_mask(" mask", mask);
- slice_print_mask(" available", available);
-#endif
- return !slice_check_fit(mask, available);
-}
-
diff --git a/arch/powerpc/mm/tlb_hash32.c b/arch/powerpc/mm/tlb_hash32.c
deleted file mode 100644
index 558e30cce33e..000000000000
--- a/arch/powerpc/mm/tlb_hash32.c
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
- * This file contains the routines for TLB flushing.
- * On machines where the MMU uses a hash table to store virtual to
- * physical translations, these routines flush entries from the
- * hash table also.
- * -- paulus
- *
- * Derived from arch/ppc/mm/init.c:
- * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
- * and Cort Dougan (PReP) (cort@cs.nmt.edu)
- * Copyright (C) 1996 Paul Mackerras
- *
- * Derived from "arch/i386/mm/init.c"
- * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/init.h>
-#include <linux/highmem.h>
-#include <linux/pagemap.h>
-#include <linux/export.h>
-
-#include <asm/tlbflush.h>
-#include <asm/tlb.h>
-
-#include "mmu_decl.h"
-
-/*
- * Called when unmapping pages to flush entries from the TLB/hash table.
- */
-void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, unsigned long addr)
-{
- unsigned long ptephys;
-
- if (Hash != 0) {
- ptephys = __pa(ptep) & PAGE_MASK;
- flush_hash_pages(mm->context.id, addr, ptephys, 1);
- }
-}
-EXPORT_SYMBOL(flush_hash_entry);
-
-/*
- * Called by ptep_set_access_flags, must flush on CPUs for which the
- * DSI handler can't just "fixup" the TLB on a write fault
- */
-void flush_tlb_page_nohash(struct vm_area_struct *vma, unsigned long addr)
-{
- if (Hash != 0)
- return;
- _tlbie(addr);
-}
-
-/*
- * Called at the end of a mmu_gather operation to make sure the
- * TLB flush is completely done.
- */
-void tlb_flush(struct mmu_gather *tlb)
-{
- if (Hash == 0) {
- /*
- * 603 needs to flush the whole TLB here since
- * it doesn't use a hash table.
- */
- _tlbia();
- }
-}
-
-/*
- * TLB flushing:
- *
- * - flush_tlb_mm(mm) flushes the specified mm context TLB's
- * - flush_tlb_page(vma, vmaddr) flushes one page
- * - flush_tlb_range(vma, start, end) flushes a range of pages
- * - flush_tlb_kernel_range(start, end) flushes kernel pages
- *
- * since the hardware hash table functions as an extension of the
- * tlb as far as the linux tables are concerned, flush it too.
- * -- Cort
- */
-
-static void flush_range(struct mm_struct *mm, unsigned long start,
- unsigned long end)
-{
- pmd_t *pmd;
- unsigned long pmd_end;
- int count;
- unsigned int ctx = mm->context.id;
-
- if (Hash == 0) {
- _tlbia();
- return;
- }
- start &= PAGE_MASK;
- if (start >= end)
- return;
- end = (end - 1) | ~PAGE_MASK;
- pmd = pmd_offset(pud_offset(pgd_offset(mm, start), start), start);
- for (;;) {
- pmd_end = ((start + PGDIR_SIZE) & PGDIR_MASK) - 1;
- if (pmd_end > end)
- pmd_end = end;
- if (!pmd_none(*pmd)) {
- count = ((pmd_end - start) >> PAGE_SHIFT) + 1;
- flush_hash_pages(ctx, start, pmd_val(*pmd), count);
- }
- if (pmd_end == end)
- break;
- start = pmd_end + 1;
- ++pmd;
- }
-}
-
-/*
- * Flush kernel TLB entries in the given range
- */
-void flush_tlb_kernel_range(unsigned long start, unsigned long end)
-{
- flush_range(&init_mm, start, end);
-}
-EXPORT_SYMBOL(flush_tlb_kernel_range);
-
-/*
- * Flush all the (user) entries for the address space described by mm.
- */
-void flush_tlb_mm(struct mm_struct *mm)
-{
- struct vm_area_struct *mp;
-
- if (Hash == 0) {
- _tlbia();
- return;
- }
-
- /*
- * It is safe to go down the mm's list of vmas when called
- * from dup_mmap, holding mmap_sem. It would also be safe from
- * unmap_region or exit_mmap, but not from vmtruncate on SMP -
- * but it seems dup_mmap is the only SMP case which gets here.
- */
- for (mp = mm->mmap; mp != NULL; mp = mp->vm_next)
- flush_range(mp->vm_mm, mp->vm_start, mp->vm_end);
-}
-EXPORT_SYMBOL(flush_tlb_mm);
-
-void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
-{
- struct mm_struct *mm;
- pmd_t *pmd;
-
- if (Hash == 0) {
- _tlbie(vmaddr);
- return;
- }
- mm = (vmaddr < TASK_SIZE)? vma->vm_mm: &init_mm;
- pmd = pmd_offset(pud_offset(pgd_offset(mm, vmaddr), vmaddr), vmaddr);
- if (!pmd_none(*pmd))
- flush_hash_pages(mm->context.id, vmaddr, pmd_val(*pmd), 1);
-}
-EXPORT_SYMBOL(flush_tlb_page);
-
-/*
- * For each address in the range, find the pte for the address
- * and check _PAGE_HASHPTE bit; if it is set, find and destroy
- * the corresponding HPTE.
- */
-void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
- unsigned long end)
-{
- flush_range(vma->vm_mm, start, end);
-}
-EXPORT_SYMBOL(flush_tlb_range);
-
-void __init early_init_mmu(void)
-{
-}
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
deleted file mode 100644
index f38ea4df6a85..000000000000
--- a/arch/powerpc/mm/tlb_nohash.c
+++ /dev/null
@@ -1,753 +0,0 @@
-/*
- * This file contains the routines for TLB flushing.
- * On machines where the MMU does not use a hash table to store virtual to
- * physical translations (ie, SW loaded TLBs or Book3E compilant processors,
- * this does -not- include 603 however which shares the implementation with
- * hash based processors)
- *
- * -- BenH
- *
- * Copyright 2008,2009 Ben Herrenschmidt <benh@kernel.crashing.org>
- * IBM Corp.
- *
- * Derived from arch/ppc/mm/init.c:
- * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
- *
- * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
- * and Cort Dougan (PReP) (cort@cs.nmt.edu)
- * Copyright (C) 1996 Paul Mackerras
- *
- * Derived from "arch/i386/mm/init.c"
- * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/export.h>
-#include <linux/mm.h>
-#include <linux/init.h>
-#include <linux/highmem.h>
-#include <linux/pagemap.h>
-#include <linux/preempt.h>
-#include <linux/spinlock.h>
-#include <linux/memblock.h>
-#include <linux/of_fdt.h>
-#include <linux/hugetlb.h>
-
-#include <asm/tlbflush.h>
-#include <asm/tlb.h>
-#include <asm/code-patching.h>
-#include <asm/hugetlb.h>
-#include <asm/paca.h>
-
-#include "mmu_decl.h"
-
-/*
- * This struct lists the sw-supported page sizes. The hardawre MMU may support
- * other sizes not listed here. The .ind field is only used on MMUs that have
- * indirect page table entries.
- */
-#ifdef CONFIG_PPC_BOOK3E_MMU
-#ifdef CONFIG_PPC_FSL_BOOK3E
-struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
- [MMU_PAGE_4K] = {
- .shift = 12,
- .enc = BOOK3E_PAGESZ_4K,
- },
- [MMU_PAGE_2M] = {
- .shift = 21,
- .enc = BOOK3E_PAGESZ_2M,
- },
- [MMU_PAGE_4M] = {
- .shift = 22,
- .enc = BOOK3E_PAGESZ_4M,
- },
- [MMU_PAGE_16M] = {
- .shift = 24,
- .enc = BOOK3E_PAGESZ_16M,
- },
- [MMU_PAGE_64M] = {
- .shift = 26,
- .enc = BOOK3E_PAGESZ_64M,
- },
- [MMU_PAGE_256M] = {
- .shift = 28,
- .enc = BOOK3E_PAGESZ_256M,
- },
- [MMU_PAGE_1G] = {
- .shift = 30,
- .enc = BOOK3E_PAGESZ_1GB,
- },
-};
-#else
-struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT] = {
- [MMU_PAGE_4K] = {
- .shift = 12,
- .ind = 20,
- .enc = BOOK3E_PAGESZ_4K,
- },
- [MMU_PAGE_16K] = {
- .shift = 14,
- .enc = BOOK3E_PAGESZ_16K,
- },
- [MMU_PAGE_64K] = {
- .shift = 16,
- .ind = 28,
- .enc = BOOK3E_PAGESZ_64K,
- },
- [MMU_PAGE_1M] = {
- .shift = 20,
- .enc = BOOK3E_PAGESZ_1M,
- },
- [MMU_PAGE_16M] = {
- .shift = 24,
- .ind = 36,
- .enc = BOOK3E_PAGESZ_16M,
- },
- [MMU_PAGE_256M] = {
- .shift = 28,
- .enc = BOOK3E_PAGESZ_256M,
- },
- [MMU_PAGE_1G] = {
- .shift = 30,
- .enc = BOOK3E_PAGESZ_1GB,
- },
-};
-#endif /* CONFIG_FSL_BOOKE */
-
-static inline int mmu_get_tsize(int psize)
-{
- return mmu_psize_defs[psize].enc;
-}
-#else
-static inline int mmu_get_tsize(int psize)
-{
- /* This isn't used on !Book3E for now */
- return 0;
-}
-#endif /* CONFIG_PPC_BOOK3E_MMU */
-
-/* The variables below are currently only used on 64-bit Book3E
- * though this will probably be made common with other nohash
- * implementations at some point
- */
-#ifdef CONFIG_PPC64
-
-int mmu_linear_psize; /* Page size used for the linear mapping */
-int mmu_pte_psize; /* Page size used for PTE pages */
-int mmu_vmemmap_psize; /* Page size used for the virtual mem map */
-int book3e_htw_mode; /* HW tablewalk? Value is PPC_HTW_* */
-unsigned long linear_map_top; /* Top of linear mapping */
-
-
-/*
- * Number of bytes to add to SPRN_SPRG_TLB_EXFRAME on crit/mcheck/debug
- * exceptions. This is used for bolted and e6500 TLB miss handlers which
- * do not modify this SPRG in the TLB miss code; for other TLB miss handlers,
- * this is set to zero.
- */
-int extlb_level_exc;
-
-#endif /* CONFIG_PPC64 */
-
-#ifdef CONFIG_PPC_FSL_BOOK3E
-/* next_tlbcam_idx is used to round-robin tlbcam entry assignment */
-DEFINE_PER_CPU(int, next_tlbcam_idx);
-EXPORT_PER_CPU_SYMBOL(next_tlbcam_idx);
-#endif
-
-/*
- * Base TLB flushing operations:
- *
- * - flush_tlb_mm(mm) flushes the specified mm context TLB's
- * - flush_tlb_page(vma, vmaddr) flushes one page
- * - flush_tlb_range(vma, start, end) flushes a range of pages
- * - flush_tlb_kernel_range(start, end) flushes kernel pages
- *
- * - local_* variants of page and mm only apply to the current
- * processor
- */
-
-/*
- * These are the base non-SMP variants of page and mm flushing
- */
-void local_flush_tlb_mm(struct mm_struct *mm)
-{
- unsigned int pid;
-
- preempt_disable();
- pid = mm->context.id;
- if (pid != MMU_NO_CONTEXT)
- _tlbil_pid(pid);
- preempt_enable();
-}
-EXPORT_SYMBOL(local_flush_tlb_mm);
-
-void __local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
- int tsize, int ind)
-{
- unsigned int pid;
-
- preempt_disable();
- pid = mm ? mm->context.id : 0;
- if (pid != MMU_NO_CONTEXT)
- _tlbil_va(vmaddr, pid, tsize, ind);
- preempt_enable();
-}
-
-void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
-{
- __local_flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr,
- mmu_get_tsize(mmu_virtual_psize), 0);
-}
-EXPORT_SYMBOL(local_flush_tlb_page);
-
-/*
- * And here are the SMP non-local implementations
- */
-#ifdef CONFIG_SMP
-
-static DEFINE_RAW_SPINLOCK(tlbivax_lock);
-
-static int mm_is_core_local(struct mm_struct *mm)
-{
- return cpumask_subset(mm_cpumask(mm),
- topology_thread_cpumask(smp_processor_id()));
-}
-
-struct tlb_flush_param {
- unsigned long addr;
- unsigned int pid;
- unsigned int tsize;
- unsigned int ind;
-};
-
-static void do_flush_tlb_mm_ipi(void *param)
-{
- struct tlb_flush_param *p = param;
-
- _tlbil_pid(p ? p->pid : 0);
-}
-
-static void do_flush_tlb_page_ipi(void *param)
-{
- struct tlb_flush_param *p = param;
-
- _tlbil_va(p->addr, p->pid, p->tsize, p->ind);
-}
-
-
-/* Note on invalidations and PID:
- *
- * We snapshot the PID with preempt disabled. At this point, it can still
- * change either because:
- * - our context is being stolen (PID -> NO_CONTEXT) on another CPU
- * - we are invaliating some target that isn't currently running here
- * and is concurrently acquiring a new PID on another CPU
- * - some other CPU is re-acquiring a lost PID for this mm
- * etc...
- *
- * However, this shouldn't be a problem as we only guarantee
- * invalidation of TLB entries present prior to this call, so we
- * don't care about the PID changing, and invalidating a stale PID
- * is generally harmless.
- */
-
-void flush_tlb_mm(struct mm_struct *mm)
-{
- unsigned int pid;
-
- preempt_disable();
- pid = mm->context.id;
- if (unlikely(pid == MMU_NO_CONTEXT))
- goto no_context;
- if (!mm_is_core_local(mm)) {
- struct tlb_flush_param p = { .pid = pid };
- /* Ignores smp_processor_id() even if set. */
- smp_call_function_many(mm_cpumask(mm),
- do_flush_tlb_mm_ipi, &p, 1);
- }
- _tlbil_pid(pid);
- no_context:
- preempt_enable();
-}
-EXPORT_SYMBOL(flush_tlb_mm);
-
-void __flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
- int tsize, int ind)
-{
- struct cpumask *cpu_mask;
- unsigned int pid;
-
- preempt_disable();
- pid = mm ? mm->context.id : 0;
- if (unlikely(pid == MMU_NO_CONTEXT))
- goto bail;
- cpu_mask = mm_cpumask(mm);
- if (!mm_is_core_local(mm)) {
- /* If broadcast tlbivax is supported, use it */
- if (mmu_has_feature(MMU_FTR_USE_TLBIVAX_BCAST)) {
- int lock = mmu_has_feature(MMU_FTR_LOCK_BCAST_INVAL);
- if (lock)
- raw_spin_lock(&tlbivax_lock);
- _tlbivax_bcast(vmaddr, pid, tsize, ind);
- if (lock)
- raw_spin_unlock(&tlbivax_lock);
- goto bail;
- } else {
- struct tlb_flush_param p = {
- .pid = pid,
- .addr = vmaddr,
- .tsize = tsize,
- .ind = ind,
- };
- /* Ignores smp_processor_id() even if set in cpu_mask */
- smp_call_function_many(cpu_mask,
- do_flush_tlb_page_ipi, &p, 1);
- }
- }
- _tlbil_va(vmaddr, pid, tsize, ind);
- bail:
- preempt_enable();
-}
-
-void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
-{
-#ifdef CONFIG_HUGETLB_PAGE
- if (vma && is_vm_hugetlb_page(vma))
- flush_hugetlb_page(vma, vmaddr);
-#endif
-
- __flush_tlb_page(vma ? vma->vm_mm : NULL, vmaddr,
- mmu_get_tsize(mmu_virtual_psize), 0);
-}
-EXPORT_SYMBOL(flush_tlb_page);
-
-#endif /* CONFIG_SMP */
-
-#ifdef CONFIG_PPC_47x
-void __init early_init_mmu_47x(void)
-{
-#ifdef CONFIG_SMP
- unsigned long root = of_get_flat_dt_root();
- if (of_get_flat_dt_prop(root, "cooperative-partition", NULL))
- mmu_clear_feature(MMU_FTR_USE_TLBIVAX_BCAST);
-#endif /* CONFIG_SMP */
-}
-#endif /* CONFIG_PPC_47x */
-
-/*
- * Flush kernel TLB entries in the given range
- */
-void flush_tlb_kernel_range(unsigned long start, unsigned long end)
-{
-#ifdef CONFIG_SMP
- preempt_disable();
- smp_call_function(do_flush_tlb_mm_ipi, NULL, 1);
- _tlbil_pid(0);
- preempt_enable();
-#else
- _tlbil_pid(0);
-#endif
-}
-EXPORT_SYMBOL(flush_tlb_kernel_range);
-
-/*
- * Currently, for range flushing, we just do a full mm flush. This should
- * be optimized based on a threshold on the size of the range, since
- * some implementation can stack multiple tlbivax before a tlbsync but
- * for now, we keep it that way
- */
-void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
- unsigned long end)
-
-{
- flush_tlb_mm(vma->vm_mm);
-}
-EXPORT_SYMBOL(flush_tlb_range);
-
-void tlb_flush(struct mmu_gather *tlb)
-{
- flush_tlb_mm(tlb->mm);
-}
-
-/*
- * Below are functions specific to the 64-bit variant of Book3E though that
- * may change in the future
- */
-
-#ifdef CONFIG_PPC64
-
-/*
- * Handling of virtual linear page tables or indirect TLB entries
- * flushing when PTE pages are freed
- */
-void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address)
-{
- int tsize = mmu_psize_defs[mmu_pte_psize].enc;
-
- if (book3e_htw_mode != PPC_HTW_NONE) {
- unsigned long start = address & PMD_MASK;
- unsigned long end = address + PMD_SIZE;
- unsigned long size = 1UL << mmu_psize_defs[mmu_pte_psize].shift;
-
- /* This isn't the most optimal, ideally we would factor out the
- * while preempt & CPU mask mucking around, or even the IPI but
- * it will do for now
- */
- while (start < end) {
- __flush_tlb_page(tlb->mm, start, tsize, 1);
- start += size;
- }
- } else {
- unsigned long rmask = 0xf000000000000000ul;
- unsigned long rid = (address & rmask) | 0x1000000000000000ul;
- unsigned long vpte = address & ~rmask;
-
-#ifdef CONFIG_PPC_64K_PAGES
- vpte = (vpte >> (PAGE_SHIFT - 4)) & ~0xfffful;
-#else
- vpte = (vpte >> (PAGE_SHIFT - 3)) & ~0xffful;
-#endif
- vpte |= rid;
- __flush_tlb_page(tlb->mm, vpte, tsize, 0);
- }
-}
-
-static void setup_page_sizes(void)
-{
- unsigned int tlb0cfg;
- unsigned int tlb0ps;
- unsigned int eptcfg;
- int i, psize;
-
-#ifdef CONFIG_PPC_FSL_BOOK3E
- unsigned int mmucfg = mfspr(SPRN_MMUCFG);
- int fsl_mmu = mmu_has_feature(MMU_FTR_TYPE_FSL_E);
-
- if (fsl_mmu && (mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V1) {
- unsigned int tlb1cfg = mfspr(SPRN_TLB1CFG);
- unsigned int min_pg, max_pg;
-
- min_pg = (tlb1cfg & TLBnCFG_MINSIZE) >> TLBnCFG_MINSIZE_SHIFT;
- max_pg = (tlb1cfg & TLBnCFG_MAXSIZE) >> TLBnCFG_MAXSIZE_SHIFT;
-
- for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
- struct mmu_psize_def *def;
- unsigned int shift;
-
- def = &mmu_psize_defs[psize];
- shift = def->shift;
-
- if (shift == 0 || shift & 1)
- continue;
-
- /* adjust to be in terms of 4^shift Kb */
- shift = (shift - 10) >> 1;
-
- if ((shift >= min_pg) && (shift <= max_pg))
- def->flags |= MMU_PAGE_SIZE_DIRECT;
- }
-
- goto out;
- }
-
- if (fsl_mmu && (mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V2) {
- u32 tlb1cfg, tlb1ps;
-
- tlb0cfg = mfspr(SPRN_TLB0CFG);
- tlb1cfg = mfspr(SPRN_TLB1CFG);
- tlb1ps = mfspr(SPRN_TLB1PS);
- eptcfg = mfspr(SPRN_EPTCFG);
-
- if ((tlb1cfg & TLBnCFG_IND) && (tlb0cfg & TLBnCFG_PT))
- book3e_htw_mode = PPC_HTW_E6500;
-
- /*
- * We expect 4K subpage size and unrestricted indirect size.
- * The lack of a restriction on indirect size is a Freescale
- * extension, indicated by PSn = 0 but SPSn != 0.
- */
- if (eptcfg != 2)
- book3e_htw_mode = PPC_HTW_NONE;
-
- for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
- struct mmu_psize_def *def = &mmu_psize_defs[psize];
-
- if (tlb1ps & (1U << (def->shift - 10))) {
- def->flags |= MMU_PAGE_SIZE_DIRECT;
-
- if (book3e_htw_mode && psize == MMU_PAGE_2M)
- def->flags |= MMU_PAGE_SIZE_INDIRECT;
- }
- }
-
- goto out;
- }
-#endif
-
- tlb0cfg = mfspr(SPRN_TLB0CFG);
- tlb0ps = mfspr(SPRN_TLB0PS);
- eptcfg = mfspr(SPRN_EPTCFG);
-
- /* Look for supported direct sizes */
- for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
- struct mmu_psize_def *def = &mmu_psize_defs[psize];
-
- if (tlb0ps & (1U << (def->shift - 10)))
- def->flags |= MMU_PAGE_SIZE_DIRECT;
- }
-
- /* Indirect page sizes supported ? */
- if ((tlb0cfg & TLBnCFG_IND) == 0 ||
- (tlb0cfg & TLBnCFG_PT) == 0)
- goto out;
-
- book3e_htw_mode = PPC_HTW_IBM;
-
- /* Now, we only deal with one IND page size for each
- * direct size. Hopefully all implementations today are
- * unambiguous, but we might want to be careful in the
- * future.
- */
- for (i = 0; i < 3; i++) {
- unsigned int ps, sps;
-
- sps = eptcfg & 0x1f;
- eptcfg >>= 5;
- ps = eptcfg & 0x1f;
- eptcfg >>= 5;
- if (!ps || !sps)
- continue;
- for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
- struct mmu_psize_def *def = &mmu_psize_defs[psize];
-
- if (ps == (def->shift - 10))
- def->flags |= MMU_PAGE_SIZE_INDIRECT;
- if (sps == (def->shift - 10))
- def->ind = ps + 10;
- }
- }
-
-out:
- /* Cleanup array and print summary */
- pr_info("MMU: Supported page sizes\n");
- for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
- struct mmu_psize_def *def = &mmu_psize_defs[psize];
- const char *__page_type_names[] = {
- "unsupported",
- "direct",
- "indirect",
- "direct & indirect"
- };
- if (def->flags == 0) {
- def->shift = 0;
- continue;
- }
- pr_info(" %8ld KB as %s\n", 1ul << (def->shift - 10),
- __page_type_names[def->flags & 0x3]);
- }
-}
-
-static void setup_mmu_htw(void)
-{
- /*
- * If we want to use HW tablewalk, enable it by patching the TLB miss
- * handlers to branch to the one dedicated to it.
- */
-
- switch (book3e_htw_mode) {
- case PPC_HTW_IBM:
- patch_exception(0x1c0, exc_data_tlb_miss_htw_book3e);
- patch_exception(0x1e0, exc_instruction_tlb_miss_htw_book3e);
- break;
-#ifdef CONFIG_PPC_FSL_BOOK3E
- case PPC_HTW_E6500:
- extlb_level_exc = EX_TLB_SIZE;
- patch_exception(0x1c0, exc_data_tlb_miss_e6500_book3e);
- patch_exception(0x1e0, exc_instruction_tlb_miss_e6500_book3e);
- break;
-#endif
- }
- pr_info("MMU: Book3E HW tablewalk %s\n",
- book3e_htw_mode != PPC_HTW_NONE ? "enabled" : "not supported");
-}
-
-/*
- * Early initialization of the MMU TLB code
- */
-static void early_init_this_mmu(void)
-{
- unsigned int mas4;
-
- /* Set MAS4 based on page table setting */
-
- mas4 = 0x4 << MAS4_WIMGED_SHIFT;
- switch (book3e_htw_mode) {
- case PPC_HTW_E6500:
- mas4 |= MAS4_INDD;
- mas4 |= BOOK3E_PAGESZ_2M << MAS4_TSIZED_SHIFT;
- mas4 |= MAS4_TLBSELD(1);
- mmu_pte_psize = MMU_PAGE_2M;
- break;
-
- case PPC_HTW_IBM:
- mas4 |= MAS4_INDD;
-#ifdef CONFIG_PPC_64K_PAGES
- mas4 |= BOOK3E_PAGESZ_256M << MAS4_TSIZED_SHIFT;
- mmu_pte_psize = MMU_PAGE_256M;
-#else
- mas4 |= BOOK3E_PAGESZ_1M << MAS4_TSIZED_SHIFT;
- mmu_pte_psize = MMU_PAGE_1M;
-#endif
- break;
-
- case PPC_HTW_NONE:
-#ifdef CONFIG_PPC_64K_PAGES
- mas4 |= BOOK3E_PAGESZ_64K << MAS4_TSIZED_SHIFT;
-#else
- mas4 |= BOOK3E_PAGESZ_4K << MAS4_TSIZED_SHIFT;
-#endif
- mmu_pte_psize = mmu_virtual_psize;
- break;
- }
- mtspr(SPRN_MAS4, mas4);
-
-#ifdef CONFIG_PPC_FSL_BOOK3E
- if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
- unsigned int num_cams;
-
- /* use a quarter of the TLBCAM for bolted linear map */
- num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4;
- linear_map_top = map_mem_in_cams(linear_map_top, num_cams);
- }
-#endif
-
- /* A sync won't hurt us after mucking around with
- * the MMU configuration
- */
- mb();
-}
-
-static void __init early_init_mmu_global(void)
-{
- /* XXX This will have to be decided at runtime, but right
- * now our boot and TLB miss code hard wires it. Ideally
- * we should find out a suitable page size and patch the
- * TLB miss code (either that or use the PACA to store
- * the value we want)
- */
- mmu_linear_psize = MMU_PAGE_1G;
-
- /* XXX This should be decided at runtime based on supported
- * page sizes in the TLB, but for now let's assume 16M is
- * always there and a good fit (which it probably is)
- *
- * Freescale booke only supports 4K pages in TLB0, so use that.
- */
- if (mmu_has_feature(MMU_FTR_TYPE_FSL_E))
- mmu_vmemmap_psize = MMU_PAGE_4K;
- else
- mmu_vmemmap_psize = MMU_PAGE_16M;
-
- /* XXX This code only checks for TLB 0 capabilities and doesn't
- * check what page size combos are supported by the HW. It
- * also doesn't handle the case where a separate array holds
- * the IND entries from the array loaded by the PT.
- */
- /* Look for supported page sizes */
- setup_page_sizes();
-
- /* Look for HW tablewalk support */
- setup_mmu_htw();
-
-#ifdef CONFIG_PPC_FSL_BOOK3E
- if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
- if (book3e_htw_mode == PPC_HTW_NONE) {
- extlb_level_exc = EX_TLB_SIZE;
- patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e);
- patch_exception(0x1e0,
- exc_instruction_tlb_miss_bolted_book3e);
- }
- }
-#endif
-
- /* Set the global containing the top of the linear mapping
- * for use by the TLB miss code
- */
- linear_map_top = memblock_end_of_DRAM();
-}
-
-static void __init early_mmu_set_memory_limit(void)
-{
-#ifdef CONFIG_PPC_FSL_BOOK3E
- if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
- /*
- * Limit memory so we dont have linear faults.
- * Unlike memblock_set_current_limit, which limits
- * memory available during early boot, this permanently
- * reduces the memory available to Linux. We need to
- * do this because highmem is not supported on 64-bit.
- */
- memblock_enforce_memory_limit(linear_map_top);
- }
-#endif
-
- memblock_set_current_limit(linear_map_top);
-}
-
-/* boot cpu only */
-void __init early_init_mmu(void)
-{
- early_init_mmu_global();
- early_init_this_mmu();
- early_mmu_set_memory_limit();
-}
-
-void early_init_mmu_secondary(void)
-{
- early_init_this_mmu();
-}
-
-void setup_initial_memory_limit(phys_addr_t first_memblock_base,
- phys_addr_t first_memblock_size)
-{
- /* On non-FSL Embedded 64-bit, we adjust the RMA size to match
- * the bolted TLB entry. We know for now that only 1G
- * entries are supported though that may eventually
- * change.
- *
- * on FSL Embedded 64-bit, we adjust the RMA size to match the
- * first bolted TLB entry size. We still limit max to 1G even if
- * the TLB could cover more. This is due to what the early init
- * code is setup to do.
- *
- * We crop it to the size of the first MEMBLOCK to
- * avoid going over total available memory just in case...
- */
-#ifdef CONFIG_PPC_FSL_BOOK3E
- if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
- unsigned long linear_sz;
- linear_sz = calc_cam_sz(first_memblock_size, PAGE_OFFSET,
- first_memblock_base);
- ppc64_rma_size = min_t(u64, linear_sz, 0x40000000);
- } else
-#endif
- ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000);
-
- /* Finally limit subsequent allocations */
- memblock_set_current_limit(first_memblock_base + ppc64_rma_size);
-}
-#else /* ! CONFIG_PPC64 */
-void __init early_init_mmu(void)
-{
-#ifdef CONFIG_PPC_47x
- early_init_mmu_47x();
-#endif
-}
-#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/net/Makefile b/arch/powerpc/net/Makefile
index 266b3950c3ac..8e60af32e51e 100644
--- a/arch/powerpc/net/Makefile
+++ b/arch/powerpc/net/Makefile
@@ -1,4 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Arch-specific network modules
#
-obj-$(CONFIG_BPF_JIT) += bpf_jit_64.o bpf_jit_comp.o
+obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o bpf_jit_comp$(BITS).o
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index 9aee27c582dc..8334cd667bba 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -1,206 +1,117 @@
-/* bpf_jit.h: BPF JIT compiler for PPC64
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * bpf_jit.h: BPF JIT compiler for PPC
*
* Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
+ * 2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
*/
#ifndef _BPF_JIT_H
#define _BPF_JIT_H
-#define BPF_PPC_STACK_LOCALS 32
-#define BPF_PPC_STACK_BASIC (48+64)
-#define BPF_PPC_STACK_SAVE (18*8)
-#define BPF_PPC_STACKFRAME (BPF_PPC_STACK_BASIC+BPF_PPC_STACK_LOCALS+ \
- BPF_PPC_STACK_SAVE)
-#define BPF_PPC_SLOWPATH_FRAME (48+64)
+#ifndef __ASSEMBLER__
-/*
- * Generated code register usage:
- *
- * As normal PPC C ABI (e.g. r1=sp, r2=TOC), with:
- *
- * skb r3 (Entry parameter)
- * A register r4
- * X register r5
- * addr param r6
- * r7-r10 scratch
- * skb->data r14
- * skb headlen r15 (skb->len - skb->data_len)
- * m[0] r16
- * m[...] ...
- * m[15] r31
- */
-#define r_skb 3
-#define r_ret 3
-#define r_A 4
-#define r_X 5
-#define r_addr 6
-#define r_scratch1 7
-#define r_scratch2 8
-#define r_D 14
-#define r_HL 15
-#define r_M 16
-
-#ifndef __ASSEMBLY__
-
-/*
- * Assembly helpers from arch/powerpc/net/bpf_jit.S:
- */
-#define DECLARE_LOAD_FUNC(func) \
- extern u8 func[], func##_negative_offset[], func##_positive_offset[]
-
-DECLARE_LOAD_FUNC(sk_load_word);
-DECLARE_LOAD_FUNC(sk_load_half);
-DECLARE_LOAD_FUNC(sk_load_byte);
-DECLARE_LOAD_FUNC(sk_load_byte_msh);
+#include <asm/types.h>
+#include <asm/ppc-opcode.h>
+#include <linux/build_bug.h>
+#ifdef CONFIG_PPC64_ELF_ABI_V1
#define FUNCTION_DESCR_SIZE 24
+#else
+#define FUNCTION_DESCR_SIZE 0
+#endif
-/*
- * 16-bit immediate helper macros: HA() is for use with sign-extending instrs
- * (e.g. LD, ADDI). If the bottom 16 bits is "-ve", add another bit into the
- * top half to negate the effect (i.e. 0xffff + 1 = 0x(1)0000).
- */
-#define IMM_H(i) ((uintptr_t)(i)>>16)
-#define IMM_HA(i) (((uintptr_t)(i)>>16) + \
- (((uintptr_t)(i) & 0x8000) >> 15))
-#define IMM_L(i) ((uintptr_t)(i) & 0xffff)
+#define CTX_NIA(ctx) ((unsigned long)ctx->idx * 4)
+
+#define SZL sizeof(unsigned long)
+#define BPF_INSN_SAFETY 64
#define PLANT_INSTR(d, idx, instr) \
do { if (d) { (d)[idx] = instr; } idx++; } while (0)
#define EMIT(instr) PLANT_INSTR(image, ctx->idx, instr)
-#define PPC_NOP() EMIT(PPC_INST_NOP)
-#define PPC_BLR() EMIT(PPC_INST_BLR)
-#define PPC_BLRL() EMIT(PPC_INST_BLRL)
-#define PPC_MTLR(r) EMIT(PPC_INST_MTLR | ___PPC_RT(r))
-#define PPC_ADDI(d, a, i) EMIT(PPC_INST_ADDI | ___PPC_RT(d) | \
- ___PPC_RA(a) | IMM_L(i))
-#define PPC_MR(d, a) PPC_OR(d, a, a)
-#define PPC_LI(r, i) PPC_ADDI(r, 0, i)
-#define PPC_ADDIS(d, a, i) EMIT(PPC_INST_ADDIS | \
- ___PPC_RS(d) | ___PPC_RA(a) | IMM_L(i))
-#define PPC_LIS(r, i) PPC_ADDIS(r, 0, i)
-#define PPC_STD(r, base, i) EMIT(PPC_INST_STD | ___PPC_RS(r) | \
- ___PPC_RA(base) | ((i) & 0xfffc))
-
-#define PPC_LD(r, base, i) EMIT(PPC_INST_LD | ___PPC_RT(r) | \
- ___PPC_RA(base) | IMM_L(i))
-#define PPC_LWZ(r, base, i) EMIT(PPC_INST_LWZ | ___PPC_RT(r) | \
- ___PPC_RA(base) | IMM_L(i))
-#define PPC_LHZ(r, base, i) EMIT(PPC_INST_LHZ | ___PPC_RT(r) | \
- ___PPC_RA(base) | IMM_L(i))
-#define PPC_LHBRX(r, base, b) EMIT(PPC_INST_LHBRX | ___PPC_RT(r) | \
- ___PPC_RA(base) | ___PPC_RB(b))
-/* Convenience helpers for the above with 'far' offsets: */
-#define PPC_LD_OFFS(r, base, i) do { if ((i) < 32768) PPC_LD(r, base, i); \
- else { PPC_ADDIS(r, base, IMM_HA(i)); \
- PPC_LD(r, r, IMM_L(i)); } } while(0)
-
-#define PPC_LWZ_OFFS(r, base, i) do { if ((i) < 32768) PPC_LWZ(r, base, i); \
- else { PPC_ADDIS(r, base, IMM_HA(i)); \
- PPC_LWZ(r, r, IMM_L(i)); } } while(0)
-
-#define PPC_LHZ_OFFS(r, base, i) do { if ((i) < 32768) PPC_LHZ(r, base, i); \
- else { PPC_ADDIS(r, base, IMM_HA(i)); \
- PPC_LHZ(r, r, IMM_L(i)); } } while(0)
-
-#define PPC_CMPWI(a, i) EMIT(PPC_INST_CMPWI | ___PPC_RA(a) | IMM_L(i))
-#define PPC_CMPDI(a, i) EMIT(PPC_INST_CMPDI | ___PPC_RA(a) | IMM_L(i))
-#define PPC_CMPLWI(a, i) EMIT(PPC_INST_CMPLWI | ___PPC_RA(a) | IMM_L(i))
-#define PPC_CMPLW(a, b) EMIT(PPC_INST_CMPLW | ___PPC_RA(a) | ___PPC_RB(b))
-
-#define PPC_SUB(d, a, b) EMIT(PPC_INST_SUB | ___PPC_RT(d) | \
- ___PPC_RB(a) | ___PPC_RA(b))
-#define PPC_ADD(d, a, b) EMIT(PPC_INST_ADD | ___PPC_RT(d) | \
- ___PPC_RA(a) | ___PPC_RB(b))
-#define PPC_MUL(d, a, b) EMIT(PPC_INST_MULLW | ___PPC_RT(d) | \
- ___PPC_RA(a) | ___PPC_RB(b))
-#define PPC_MULHWU(d, a, b) EMIT(PPC_INST_MULHWU | ___PPC_RT(d) | \
- ___PPC_RA(a) | ___PPC_RB(b))
-#define PPC_MULI(d, a, i) EMIT(PPC_INST_MULLI | ___PPC_RT(d) | \
- ___PPC_RA(a) | IMM_L(i))
-#define PPC_DIVWU(d, a, b) EMIT(PPC_INST_DIVWU | ___PPC_RT(d) | \
- ___PPC_RA(a) | ___PPC_RB(b))
-#define PPC_AND(d, a, b) EMIT(PPC_INST_AND | ___PPC_RA(d) | \
- ___PPC_RS(a) | ___PPC_RB(b))
-#define PPC_ANDI(d, a, i) EMIT(PPC_INST_ANDI | ___PPC_RA(d) | \
- ___PPC_RS(a) | IMM_L(i))
-#define PPC_AND_DOT(d, a, b) EMIT(PPC_INST_ANDDOT | ___PPC_RA(d) | \
- ___PPC_RS(a) | ___PPC_RB(b))
-#define PPC_OR(d, a, b) EMIT(PPC_INST_OR | ___PPC_RA(d) | \
- ___PPC_RS(a) | ___PPC_RB(b))
-#define PPC_ORI(d, a, i) EMIT(PPC_INST_ORI | ___PPC_RA(d) | \
- ___PPC_RS(a) | IMM_L(i))
-#define PPC_ORIS(d, a, i) EMIT(PPC_INST_ORIS | ___PPC_RA(d) | \
- ___PPC_RS(a) | IMM_L(i))
-#define PPC_XOR(d, a, b) EMIT(PPC_INST_XOR | ___PPC_RA(d) | \
- ___PPC_RS(a) | ___PPC_RB(b))
-#define PPC_XORI(d, a, i) EMIT(PPC_INST_XORI | ___PPC_RA(d) | \
- ___PPC_RS(a) | IMM_L(i))
-#define PPC_XORIS(d, a, i) EMIT(PPC_INST_XORIS | ___PPC_RA(d) | \
- ___PPC_RS(a) | IMM_L(i))
-#define PPC_SLW(d, a, s) EMIT(PPC_INST_SLW | ___PPC_RA(d) | \
- ___PPC_RS(a) | ___PPC_RB(s))
-#define PPC_SRW(d, a, s) EMIT(PPC_INST_SRW | ___PPC_RA(d) | \
- ___PPC_RS(a) | ___PPC_RB(s))
-/* slwi = rlwinm Rx, Ry, n, 0, 31-n */
-#define PPC_SLWI(d, a, i) EMIT(PPC_INST_RLWINM | ___PPC_RA(d) | \
- ___PPC_RS(a) | __PPC_SH(i) | \
- __PPC_MB(0) | __PPC_ME(31-(i)))
-/* srwi = rlwinm Rx, Ry, 32-n, n, 31 */
-#define PPC_SRWI(d, a, i) EMIT(PPC_INST_RLWINM | ___PPC_RA(d) | \
- ___PPC_RS(a) | __PPC_SH(32-(i)) | \
- __PPC_MB(i) | __PPC_ME(31))
-/* sldi = rldicr Rx, Ry, n, 63-n */
-#define PPC_SLDI(d, a, i) EMIT(PPC_INST_RLDICR | ___PPC_RA(d) | \
- ___PPC_RS(a) | __PPC_SH(i) | \
- __PPC_MB(63-(i)) | (((i) & 0x20) >> 4))
-#define PPC_NEG(d, a) EMIT(PPC_INST_NEG | ___PPC_RT(d) | ___PPC_RA(a))
-
/* Long jump; (unconditional 'branch') */
-#define PPC_JMP(dest) EMIT(PPC_INST_BRANCH | \
- (((dest) - (ctx->idx * 4)) & 0x03fffffc))
+#define PPC_JMP(dest) \
+ do { \
+ long offset = (long)(dest) - CTX_NIA(ctx); \
+ if ((dest) != 0 && !is_offset_in_branch_range(offset)) { \
+ pr_err_ratelimited("Branch offset 0x%lx (@%u) out of range\n", offset, ctx->idx); \
+ return -ERANGE; \
+ } \
+ EMIT(PPC_RAW_BRANCH(offset)); \
+ } while (0)
+
/* "cond" here covers BO:BI fields. */
-#define PPC_BCC_SHORT(cond, dest) EMIT(PPC_INST_BRANCH_COND | \
- (((cond) & 0x3ff) << 16) | \
- (((dest) - (ctx->idx * 4)) & \
- 0xfffc))
-#define PPC_LI32(d, i) do { PPC_LI(d, IMM_L(i)); \
- if ((u32)(uintptr_t)(i) >= 32768) { \
- PPC_ADDIS(d, d, IMM_HA(i)); \
- } } while(0)
+#define PPC_BCC_SHORT(cond, dest) \
+ do { \
+ long offset = (long)(dest) - CTX_NIA(ctx); \
+ if ((dest) != 0 && !is_offset_in_cond_branch_range(offset)) { \
+ pr_err_ratelimited("Conditional branch offset 0x%lx (@%u) out of range\n", offset, ctx->idx); \
+ return -ERANGE; \
+ } \
+ EMIT(PPC_INST_BRANCH_COND | (((cond) & 0x3ff) << 16) | (offset & 0xfffc)); \
+ } while (0)
+
+/*
+ * Sign-extended 32-bit immediate load
+ *
+ * If this is a dummy pass (!image), account for
+ * maximum possible instructions.
+ */
+#define PPC_LI32(d, i) do { \
+ if (!image) \
+ ctx->idx += 2; \
+ else { \
+ if ((int)(uintptr_t)(i) >= -32768 && \
+ (int)(uintptr_t)(i) < 32768) \
+ EMIT(PPC_RAW_LI(d, i)); \
+ else { \
+ EMIT(PPC_RAW_LIS(d, IMM_H(i))); \
+ if (IMM_L(i)) \
+ EMIT(PPC_RAW_ORI(d, d, IMM_L(i))); \
+ } \
+ } } while (0)
+
+#ifdef CONFIG_PPC64
+/* If dummy pass (!image), account for maximum possible instructions */
#define PPC_LI64(d, i) do { \
- if (!((uintptr_t)(i) & 0xffffffff00000000ULL)) \
+ if (!image) \
+ ctx->idx += 5; \
+ else { \
+ if ((long)(i) >= -2147483648 && \
+ (long)(i) < 2147483648) \
PPC_LI32(d, i); \
else { \
- PPC_LIS(d, ((uintptr_t)(i) >> 48)); \
- if ((uintptr_t)(i) & 0x0000ffff00000000ULL) \
- PPC_ORI(d, d, \
- ((uintptr_t)(i) >> 32) & 0xffff); \
- PPC_SLDI(d, d, 32); \
+ if (!((uintptr_t)(i) & 0xffff800000000000ULL)) \
+ EMIT(PPC_RAW_LI(d, ((uintptr_t)(i) >> 32) & \
+ 0xffff)); \
+ else { \
+ EMIT(PPC_RAW_LIS(d, ((uintptr_t)(i) >> 48))); \
+ if ((uintptr_t)(i) & 0x0000ffff00000000ULL) \
+ EMIT(PPC_RAW_ORI(d, d, \
+ ((uintptr_t)(i) >> 32) & 0xffff)); \
+ } \
+ EMIT(PPC_RAW_SLDI(d, d, 32)); \
if ((uintptr_t)(i) & 0x00000000ffff0000ULL) \
- PPC_ORIS(d, d, \
- ((uintptr_t)(i) >> 16) & 0xffff); \
+ EMIT(PPC_RAW_ORIS(d, d, \
+ ((uintptr_t)(i) >> 16) & 0xffff)); \
if ((uintptr_t)(i) & 0x000000000000ffffULL) \
- PPC_ORI(d, d, (uintptr_t)(i) & 0xffff); \
- } } while (0);
-
-#define PPC_LHBRX_OFFS(r, base, i) \
- do { PPC_LI32(r, i); PPC_LHBRX(r, r, base); } while(0)
-#ifdef __LITTLE_ENDIAN__
-#define PPC_NTOHS_OFFS(r, base, i) PPC_LHBRX_OFFS(r, base, i)
+ EMIT(PPC_RAW_ORI(d, d, (uintptr_t)(i) & \
+ 0xffff)); \
+ } \
+ } } while (0)
+#define PPC_LI_ADDR PPC_LI64
+
+#ifndef CONFIG_PPC_KERNEL_PCREL
+#define PPC64_LOAD_PACA() \
+ EMIT(PPC_RAW_LD(_R2, _R13, offsetof(struct paca_struct, kernel_toc)))
#else
-#define PPC_NTOHS_OFFS(r, base, i) PPC_LHZ_OFFS(r, base, i)
+#define PPC64_LOAD_PACA() do {} while (0)
+#endif
+#else
+#define PPC_LI64(d, i) BUILD_BUG()
+#define PPC_LI_ADDR PPC_LI32
+#define PPC64_LOAD_PACA() BUILD_BUG()
#endif
-
-static inline bool is_nearbranch(int offset)
-{
- return (offset < 32768) && (offset >= -32768);
-}
/*
* The fly in the ointment of code size changing from pass to pass is
@@ -210,12 +121,12 @@ static inline bool is_nearbranch(int offset)
* state.
*/
#define PPC_BCC(cond, dest) do { \
- if (is_nearbranch((dest) - (ctx->idx * 4))) { \
+ if (is_offset_in_cond_branch_range((long)(dest) - CTX_NIA(ctx))) { \
PPC_BCC_SHORT(cond, dest); \
- PPC_NOP(); \
+ EMIT(PPC_RAW_NOP()); \
} else { \
/* Flip the 'T or F' bit to invert comparison */ \
- PPC_BCC_SHORT(cond ^ COND_CMP_TRUE, (ctx->idx+2)*4); \
+ PPC_BCC_SHORT(cond ^ COND_CMP_TRUE, CTX_NIA(ctx) + 2*4); \
PPC_JMP(dest); \
} } while(0)
@@ -232,19 +143,68 @@ static inline bool is_nearbranch(int offset)
#define COND_EQ (CR0_EQ | COND_CMP_TRUE)
#define COND_NE (CR0_EQ | COND_CMP_FALSE)
#define COND_LT (CR0_LT | COND_CMP_TRUE)
+#define COND_LE (CR0_GT | COND_CMP_FALSE)
-#define SEEN_DATAREF 0x10000 /* might call external helpers */
-#define SEEN_XREG 0x20000 /* X reg is used */
-#define SEEN_MEM 0x40000 /* SEEN_MEM+(1<<n) = use mem[n] for temporary
- * storage */
-#define SEEN_MEM_MSK 0x0ffff
+#define SEEN_FUNC 0x20000000 /* might call external helpers */
+#define SEEN_TAILCALL 0x40000000 /* uses tail calls */
struct codegen_context {
+ /*
+ * This is used to track register usage as well
+ * as calls to external helpers.
+ * - register usage is tracked with corresponding
+ * bits (r3-r31)
+ * - rest of the bits can be used to track other
+ * things -- for now, we use bits 0 to 2
+ * encoded in SEEN_* macros above
+ */
unsigned int seen;
unsigned int idx;
- int pc_ret0; /* bpf index of first RET #0 instruction (if any) */
+ unsigned int stack_size;
+ int b2p[MAX_BPF_JIT_REG + 3];
+ unsigned int exentry_idx;
+ unsigned int alt_exit_addr;
+ u64 arena_vm_start;
+ u64 user_vm_start;
};
+#define bpf_to_ppc(r) (ctx->b2p[r])
+
+#ifdef CONFIG_PPC32
+#define BPF_FIXUP_LEN 3 /* Three instructions => 12 bytes */
+#else
+#define BPF_FIXUP_LEN 2 /* Two instructions => 8 bytes */
+#endif
+
+static inline bool bpf_is_seen_register(struct codegen_context *ctx, int i)
+{
+ return ctx->seen & (1 << (31 - i));
+}
+
+static inline void bpf_set_seen_register(struct codegen_context *ctx, int i)
+{
+ ctx->seen |= 1 << (31 - i);
+}
+
+static inline void bpf_clear_seen_register(struct codegen_context *ctx, int i)
+{
+ ctx->seen &= ~(1 << (31 - i));
+}
+
+void bpf_jit_init_reg_mapping(struct codegen_context *ctx);
+int bpf_jit_emit_func_call_rel(u32 *image, u32 *fimage, struct codegen_context *ctx, u64 func);
+int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct codegen_context *ctx,
+ u32 *addrs, int pass, bool extra_pass);
+void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
+void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
+void bpf_jit_build_fentry_stubs(u32 *image, struct codegen_context *ctx);
+void bpf_jit_realloc_regs(struct codegen_context *ctx);
+int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr);
+
+int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, u32 *fimage, int pass,
+ struct codegen_context *ctx, int insn_idx,
+ int jmp_off, int dst_reg, u32 code);
+
#endif
#endif
diff --git a/arch/powerpc/net/bpf_jit_64.S b/arch/powerpc/net/bpf_jit_64.S
deleted file mode 100644
index 8f87d9217122..000000000000
--- a/arch/powerpc/net/bpf_jit_64.S
+++ /dev/null
@@ -1,229 +0,0 @@
-/* bpf_jit.S: Packet/header access helper functions
- * for PPC64 BPF compiler.
- *
- * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- */
-
-#include <asm/ppc_asm.h>
-#include "bpf_jit.h"
-
-/*
- * All of these routines are called directly from generated code,
- * whose register usage is:
- *
- * r3 skb
- * r4,r5 A,X
- * r6 *** address parameter to helper ***
- * r7-r10 scratch
- * r14 skb->data
- * r15 skb headlen
- * r16-31 M[]
- */
-
-/*
- * To consider: These helpers are so small it could be better to just
- * generate them inline. Inline code can do the simple headlen check
- * then branch directly to slow_path_XXX if required. (In fact, could
- * load a spare GPR with the address of slow_path_generic and pass size
- * as an argument, making the call site a mtlr, li and bllr.)
- */
- .globl sk_load_word
-sk_load_word:
- cmpdi r_addr, 0
- blt bpf_slow_path_word_neg
- .globl sk_load_word_positive_offset
-sk_load_word_positive_offset:
- /* Are we accessing past headlen? */
- subi r_scratch1, r_HL, 4
- cmpd r_scratch1, r_addr
- blt bpf_slow_path_word
- /* Nope, just hitting the header. cr0 here is eq or gt! */
-#ifdef __LITTLE_ENDIAN__
- lwbrx r_A, r_D, r_addr
-#else
- lwzx r_A, r_D, r_addr
-#endif
- blr /* Return success, cr0 != LT */
-
- .globl sk_load_half
-sk_load_half:
- cmpdi r_addr, 0
- blt bpf_slow_path_half_neg
- .globl sk_load_half_positive_offset
-sk_load_half_positive_offset:
- subi r_scratch1, r_HL, 2
- cmpd r_scratch1, r_addr
- blt bpf_slow_path_half
-#ifdef __LITTLE_ENDIAN__
- lhbrx r_A, r_D, r_addr
-#else
- lhzx r_A, r_D, r_addr
-#endif
- blr
-
- .globl sk_load_byte
-sk_load_byte:
- cmpdi r_addr, 0
- blt bpf_slow_path_byte_neg
- .globl sk_load_byte_positive_offset
-sk_load_byte_positive_offset:
- cmpd r_HL, r_addr
- ble bpf_slow_path_byte
- lbzx r_A, r_D, r_addr
- blr
-
-/*
- * BPF_LDX | BPF_B | BPF_MSH: ldxb 4*([offset]&0xf)
- * r_addr is the offset value
- */
- .globl sk_load_byte_msh
-sk_load_byte_msh:
- cmpdi r_addr, 0
- blt bpf_slow_path_byte_msh_neg
- .globl sk_load_byte_msh_positive_offset
-sk_load_byte_msh_positive_offset:
- cmpd r_HL, r_addr
- ble bpf_slow_path_byte_msh
- lbzx r_X, r_D, r_addr
- rlwinm r_X, r_X, 2, 32-4-2, 31-2
- blr
-
-/* Call out to skb_copy_bits:
- * We'll need to back up our volatile regs first; we have
- * local variable space at r1+(BPF_PPC_STACK_BASIC).
- * Allocate a new stack frame here to remain ABI-compliant in
- * stashing LR.
- */
-#define bpf_slow_path_common(SIZE) \
- mflr r0; \
- std r0, 16(r1); \
- /* R3 goes in parameter space of caller's frame */ \
- std r_skb, (BPF_PPC_STACKFRAME+48)(r1); \
- std r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1); \
- std r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1); \
- addi r5, r1, BPF_PPC_STACK_BASIC+(2*8); \
- stdu r1, -BPF_PPC_SLOWPATH_FRAME(r1); \
- /* R3 = r_skb, as passed */ \
- mr r4, r_addr; \
- li r6, SIZE; \
- bl skb_copy_bits; \
- nop; \
- /* R3 = 0 on success */ \
- addi r1, r1, BPF_PPC_SLOWPATH_FRAME; \
- ld r0, 16(r1); \
- ld r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1); \
- ld r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1); \
- mtlr r0; \
- cmpdi r3, 0; \
- blt bpf_error; /* cr0 = LT */ \
- ld r_skb, (BPF_PPC_STACKFRAME+48)(r1); \
- /* Great success! */
-
-bpf_slow_path_word:
- bpf_slow_path_common(4)
- /* Data value is on stack, and cr0 != LT */
- lwz r_A, BPF_PPC_STACK_BASIC+(2*8)(r1)
- blr
-
-bpf_slow_path_half:
- bpf_slow_path_common(2)
- lhz r_A, BPF_PPC_STACK_BASIC+(2*8)(r1)
- blr
-
-bpf_slow_path_byte:
- bpf_slow_path_common(1)
- lbz r_A, BPF_PPC_STACK_BASIC+(2*8)(r1)
- blr
-
-bpf_slow_path_byte_msh:
- bpf_slow_path_common(1)
- lbz r_X, BPF_PPC_STACK_BASIC+(2*8)(r1)
- rlwinm r_X, r_X, 2, 32-4-2, 31-2
- blr
-
-/* Call out to bpf_internal_load_pointer_neg_helper:
- * We'll need to back up our volatile regs first; we have
- * local variable space at r1+(BPF_PPC_STACK_BASIC).
- * Allocate a new stack frame here to remain ABI-compliant in
- * stashing LR.
- */
-#define sk_negative_common(SIZE) \
- mflr r0; \
- std r0, 16(r1); \
- /* R3 goes in parameter space of caller's frame */ \
- std r_skb, (BPF_PPC_STACKFRAME+48)(r1); \
- std r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1); \
- std r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1); \
- stdu r1, -BPF_PPC_SLOWPATH_FRAME(r1); \
- /* R3 = r_skb, as passed */ \
- mr r4, r_addr; \
- li r5, SIZE; \
- bl bpf_internal_load_pointer_neg_helper; \
- nop; \
- /* R3 != 0 on success */ \
- addi r1, r1, BPF_PPC_SLOWPATH_FRAME; \
- ld r0, 16(r1); \
- ld r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1); \
- ld r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1); \
- mtlr r0; \
- cmpldi r3, 0; \
- beq bpf_error_slow; /* cr0 = EQ */ \
- mr r_addr, r3; \
- ld r_skb, (BPF_PPC_STACKFRAME+48)(r1); \
- /* Great success! */
-
-bpf_slow_path_word_neg:
- lis r_scratch1,-32 /* SKF_LL_OFF */
- cmpd r_addr, r_scratch1 /* addr < SKF_* */
- blt bpf_error /* cr0 = LT */
- .globl sk_load_word_negative_offset
-sk_load_word_negative_offset:
- sk_negative_common(4)
- lwz r_A, 0(r_addr)
- blr
-
-bpf_slow_path_half_neg:
- lis r_scratch1,-32 /* SKF_LL_OFF */
- cmpd r_addr, r_scratch1 /* addr < SKF_* */
- blt bpf_error /* cr0 = LT */
- .globl sk_load_half_negative_offset
-sk_load_half_negative_offset:
- sk_negative_common(2)
- lhz r_A, 0(r_addr)
- blr
-
-bpf_slow_path_byte_neg:
- lis r_scratch1,-32 /* SKF_LL_OFF */
- cmpd r_addr, r_scratch1 /* addr < SKF_* */
- blt bpf_error /* cr0 = LT */
- .globl sk_load_byte_negative_offset
-sk_load_byte_negative_offset:
- sk_negative_common(1)
- lbz r_A, 0(r_addr)
- blr
-
-bpf_slow_path_byte_msh_neg:
- lis r_scratch1,-32 /* SKF_LL_OFF */
- cmpd r_addr, r_scratch1 /* addr < SKF_* */
- blt bpf_error /* cr0 = LT */
- .globl sk_load_byte_msh_negative_offset
-sk_load_byte_msh_negative_offset:
- sk_negative_common(1)
- lbz r_X, 0(r_addr)
- rlwinm r_X, r_X, 2, 32-4-2, 31-2
- blr
-
-bpf_error_slow:
- /* fabricate a cr0 = lt */
- li r_scratch1, -1
- cmpdi r_scratch1, 0
-bpf_error:
- /* Entered with cr0 = lt */
- li r3, 0
- /* Generated code will 'blt epilogue', returning 0. */
- blr
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 3afa6f4c1957..88ad5ba7b87f 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -1,701 +1,1229 @@
-/* bpf_jit_comp.c: BPF JIT compiler for PPC64
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * eBPF JIT compiler
*
- * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation
+ * Copyright 2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
+ * IBM Corporation
*
- * Based on the x86 BPF compiler, by Eric Dumazet (eric.dumazet@gmail.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
+ * Based on the powerpc classic BPF JIT compiler by Matt Evans
*/
#include <linux/moduleloader.h>
#include <asm/cacheflush.h>
+#include <asm/asm-compat.h>
#include <linux/netdevice.h>
#include <linux/filter.h>
#include <linux/if_vlan.h>
+#include <linux/kernel.h>
+#include <linux/memory.h>
+#include <linux/bpf.h>
+
+#include <asm/kprobes.h>
+#include <asm/text-patching.h>
#include "bpf_jit.h"
-int bpf_jit_enable __read_mostly;
+/* These offsets are from bpf prog end and stay the same across progs */
+static int bpf_jit_ool_stub, bpf_jit_long_branch_stub;
-static inline void bpf_flush_icache(void *start, void *end)
+static void bpf_jit_fill_ill_insns(void *area, unsigned int size)
{
- smp_wmb();
- flush_icache_range((unsigned long)start, (unsigned long)end);
+ memset32(area, BREAKPOINT_INSTRUCTION, size / 4);
}
-static void bpf_jit_build_prologue(struct bpf_prog *fp, u32 *image,
- struct codegen_context *ctx)
+void dummy_tramp(void);
+
+asm (
+" .pushsection .text, \"ax\", @progbits ;"
+" .global dummy_tramp ;"
+" .type dummy_tramp, @function ;"
+"dummy_tramp: ;"
+#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE
+" blr ;"
+#else
+/* LR is always in r11, so we don't need a 'mflr r11' here */
+" mtctr 11 ;"
+" mtlr 0 ;"
+" bctr ;"
+#endif
+" .size dummy_tramp, .-dummy_tramp ;"
+" .popsection ;"
+);
+
+void bpf_jit_build_fentry_stubs(u32 *image, struct codegen_context *ctx)
{
- int i;
- const struct sock_filter *filter = fp->insns;
-
- if (ctx->seen & (SEEN_MEM | SEEN_DATAREF)) {
- /* Make stackframe */
- if (ctx->seen & SEEN_DATAREF) {
- /* If we call any helpers (for loads), save LR */
- EMIT(PPC_INST_MFLR | __PPC_RT(R0));
- PPC_STD(0, 1, 16);
-
- /* Back up non-volatile regs. */
- PPC_STD(r_D, 1, -(8*(32-r_D)));
- PPC_STD(r_HL, 1, -(8*(32-r_HL)));
- }
- if (ctx->seen & SEEN_MEM) {
- /*
- * Conditionally save regs r15-r31 as some will be used
- * for M[] data.
- */
- for (i = r_M; i < (r_M+16); i++) {
- if (ctx->seen & (1 << (i-r_M)))
- PPC_STD(i, 1, -(8*(32-i)));
- }
+ int ool_stub_idx, long_branch_stub_idx;
+
+ /*
+ * Out-of-line stub:
+ * mflr r0
+ * [b|bl] tramp
+ * mtlr r0 // only with CONFIG_PPC_FTRACE_OUT_OF_LINE
+ * b bpf_func + 4
+ */
+ ool_stub_idx = ctx->idx;
+ EMIT(PPC_RAW_MFLR(_R0));
+ EMIT(PPC_RAW_NOP());
+ if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE))
+ EMIT(PPC_RAW_MTLR(_R0));
+ WARN_ON_ONCE(!is_offset_in_branch_range(4 - (long)ctx->idx * 4));
+ EMIT(PPC_RAW_BRANCH(4 - (long)ctx->idx * 4));
+
+ /*
+ * Long branch stub:
+ * .long <dummy_tramp_addr>
+ * mflr r11
+ * bcl 20,31,$+4
+ * mflr r12
+ * ld r12, -8-SZL(r12)
+ * mtctr r12
+ * mtlr r11 // needed to retain ftrace ABI
+ * bctr
+ */
+ if (image)
+ *((unsigned long *)&image[ctx->idx]) = (unsigned long)dummy_tramp;
+ ctx->idx += SZL / 4;
+ long_branch_stub_idx = ctx->idx;
+ EMIT(PPC_RAW_MFLR(_R11));
+ EMIT(PPC_RAW_BCL4());
+ EMIT(PPC_RAW_MFLR(_R12));
+ EMIT(PPC_RAW_LL(_R12, _R12, -8-SZL));
+ EMIT(PPC_RAW_MTCTR(_R12));
+ EMIT(PPC_RAW_MTLR(_R11));
+ EMIT(PPC_RAW_BCTR());
+
+ if (!bpf_jit_ool_stub) {
+ bpf_jit_ool_stub = (ctx->idx - ool_stub_idx) * 4;
+ bpf_jit_long_branch_stub = (ctx->idx - long_branch_stub_idx) * 4;
+ }
+}
+
+int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr)
+{
+ if (!exit_addr || is_offset_in_branch_range(exit_addr - (ctx->idx * 4))) {
+ PPC_JMP(exit_addr);
+ } else if (ctx->alt_exit_addr) {
+ if (WARN_ON(!is_offset_in_branch_range((long)ctx->alt_exit_addr - (ctx->idx * 4))))
+ return -1;
+ PPC_JMP(ctx->alt_exit_addr);
+ } else {
+ ctx->alt_exit_addr = ctx->idx * 4;
+ bpf_jit_build_epilogue(image, ctx);
+ }
+
+ return 0;
+}
+
+struct powerpc_jit_data {
+ /* address of rw header */
+ struct bpf_binary_header *hdr;
+ /* address of ro final header */
+ struct bpf_binary_header *fhdr;
+ u32 *addrs;
+ u8 *fimage;
+ u32 proglen;
+ struct codegen_context ctx;
+};
+
+bool bpf_jit_needs_zext(void)
+{
+ return true;
+}
+
+struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
+{
+ u32 proglen;
+ u32 alloclen;
+ u8 *image = NULL;
+ u32 *code_base;
+ u32 *addrs;
+ struct powerpc_jit_data *jit_data;
+ struct codegen_context cgctx;
+ int pass;
+ int flen;
+ struct bpf_binary_header *fhdr = NULL;
+ struct bpf_binary_header *hdr = NULL;
+ struct bpf_prog *org_fp = fp;
+ struct bpf_prog *tmp_fp;
+ bool bpf_blinded = false;
+ bool extra_pass = false;
+ u8 *fimage = NULL;
+ u32 *fcode_base;
+ u32 extable_len;
+ u32 fixup_len;
+
+ if (!fp->jit_requested)
+ return org_fp;
+
+ tmp_fp = bpf_jit_blind_constants(org_fp);
+ if (IS_ERR(tmp_fp))
+ return org_fp;
+
+ if (tmp_fp != org_fp) {
+ bpf_blinded = true;
+ fp = tmp_fp;
+ }
+
+ jit_data = fp->aux->jit_data;
+ if (!jit_data) {
+ jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
+ if (!jit_data) {
+ fp = org_fp;
+ goto out;
}
- EMIT(PPC_INST_STDU | __PPC_RS(R1) | __PPC_RA(R1) |
- (-BPF_PPC_STACKFRAME & 0xfffc));
+ fp->aux->jit_data = jit_data;
}
- if (ctx->seen & SEEN_DATAREF) {
+ flen = fp->len;
+ addrs = jit_data->addrs;
+ if (addrs) {
+ cgctx = jit_data->ctx;
/*
- * If this filter needs to access skb data,
- * prepare r_D and r_HL:
- * r_HL = skb->len - skb->data_len
- * r_D = skb->data
+ * JIT compiled to a writable location (image/code_base) first.
+ * It is then moved to the readonly final location (fimage/fcode_base)
+ * using instruction patching.
*/
- PPC_LWZ_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff,
- data_len));
- PPC_LWZ_OFFS(r_HL, r_skb, offsetof(struct sk_buff, len));
- PPC_SUB(r_HL, r_HL, r_scratch1);
- PPC_LD_OFFS(r_D, r_skb, offsetof(struct sk_buff, data));
+ fimage = jit_data->fimage;
+ fhdr = jit_data->fhdr;
+ proglen = jit_data->proglen;
+ hdr = jit_data->hdr;
+ image = (void *)hdr + ((void *)fimage - (void *)fhdr);
+ extra_pass = true;
+ /* During extra pass, ensure index is reset before repopulating extable entries */
+ cgctx.exentry_idx = 0;
+ goto skip_init_ctx;
}
- if (ctx->seen & SEEN_XREG) {
- /*
- * TODO: Could also detect whether first instr. sets X and
- * avoid this (as below, with A).
- */
- PPC_LI(r_X, 0);
+ addrs = kcalloc(flen + 1, sizeof(*addrs), GFP_KERNEL);
+ if (addrs == NULL) {
+ fp = org_fp;
+ goto out_addrs;
}
- switch (filter[0].code) {
- case BPF_RET | BPF_K:
- case BPF_LD | BPF_W | BPF_LEN:
- case BPF_LD | BPF_W | BPF_ABS:
- case BPF_LD | BPF_H | BPF_ABS:
- case BPF_LD | BPF_B | BPF_ABS:
- /* first instruction sets A register (or is RET 'constant') */
- break;
- default:
- /* make sure we dont leak kernel information to user */
- PPC_LI(r_A, 0);
+ memset(&cgctx, 0, sizeof(struct codegen_context));
+ bpf_jit_init_reg_mapping(&cgctx);
+
+ /* Make sure that the stack is quadword aligned. */
+ cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
+ cgctx.arena_vm_start = bpf_arena_get_kern_vm_start(fp->aux->arena);
+ cgctx.user_vm_start = bpf_arena_get_user_vm_start(fp->aux->arena);
+
+ /* Scouting faux-generate pass 0 */
+ if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) {
+ /* We hit something illegal or unsupported. */
+ fp = org_fp;
+ goto out_addrs;
}
-}
-static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
-{
- int i;
+ /*
+ * If we have seen a tail call, we need a second pass.
+ * This is because bpf_jit_emit_common_epilogue() is called
+ * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen.
+ * We also need a second pass if we ended up with too large
+ * a program so as to ensure BPF_EXIT branches are in range.
+ */
+ if (cgctx.seen & SEEN_TAILCALL || !is_offset_in_branch_range((long)cgctx.idx * 4)) {
+ cgctx.idx = 0;
+ if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) {
+ fp = org_fp;
+ goto out_addrs;
+ }
+ }
+
+ bpf_jit_realloc_regs(&cgctx);
+ /*
+ * Pretend to build prologue, given the features we've seen. This will
+ * update ctgtx.idx as it pretends to output instructions, then we can
+ * calculate total size from idx.
+ */
+ bpf_jit_build_prologue(NULL, &cgctx);
+ addrs[fp->len] = cgctx.idx * 4;
+ bpf_jit_build_epilogue(NULL, &cgctx);
+
+ fixup_len = fp->aux->num_exentries * BPF_FIXUP_LEN * 4;
+ extable_len = fp->aux->num_exentries * sizeof(struct exception_table_entry);
- if (ctx->seen & (SEEN_MEM | SEEN_DATAREF)) {
- PPC_ADDI(1, 1, BPF_PPC_STACKFRAME);
- if (ctx->seen & SEEN_DATAREF) {
- PPC_LD(0, 1, 16);
- PPC_MTLR(0);
- PPC_LD(r_D, 1, -(8*(32-r_D)));
- PPC_LD(r_HL, 1, -(8*(32-r_HL)));
+ proglen = cgctx.idx * 4;
+ alloclen = proglen + FUNCTION_DESCR_SIZE + fixup_len + extable_len;
+
+ fhdr = bpf_jit_binary_pack_alloc(alloclen, &fimage, 4, &hdr, &image,
+ bpf_jit_fill_ill_insns);
+ if (!fhdr) {
+ fp = org_fp;
+ goto out_addrs;
+ }
+
+ if (extable_len)
+ fp->aux->extable = (void *)fimage + FUNCTION_DESCR_SIZE + proglen + fixup_len;
+
+skip_init_ctx:
+ code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
+ fcode_base = (u32 *)(fimage + FUNCTION_DESCR_SIZE);
+
+ /* Code generation passes 1-2 */
+ for (pass = 1; pass < 3; pass++) {
+ /* Now build the prologue, body code & epilogue for real. */
+ cgctx.idx = 0;
+ cgctx.alt_exit_addr = 0;
+ bpf_jit_build_prologue(code_base, &cgctx);
+ if (bpf_jit_build_body(fp, code_base, fcode_base, &cgctx, addrs, pass,
+ extra_pass)) {
+ bpf_arch_text_copy(&fhdr->size, &hdr->size, sizeof(hdr->size));
+ bpf_jit_binary_pack_free(fhdr, hdr);
+ fp = org_fp;
+ goto out_addrs;
}
- if (ctx->seen & SEEN_MEM) {
- /* Restore any saved non-vol registers */
- for (i = r_M; i < (r_M+16); i++) {
- if (ctx->seen & (1 << (i-r_M)))
- PPC_LD(i, 1, -(8*(32-i)));
- }
+ bpf_jit_build_epilogue(code_base, &cgctx);
+
+ if (bpf_jit_enable > 1)
+ pr_info("Pass %d: shrink = %d, seen = 0x%x\n", pass,
+ proglen - (cgctx.idx * 4), cgctx.seen);
+ }
+
+ if (bpf_jit_enable > 1)
+ /*
+ * Note that we output the base address of the code_base
+ * rather than image, since opcodes are in code_base.
+ */
+ bpf_jit_dump(flen, proglen, pass, code_base);
+
+#ifdef CONFIG_PPC64_ELF_ABI_V1
+ /* Function descriptor nastiness: Address + TOC */
+ ((u64 *)image)[0] = (u64)fcode_base;
+ ((u64 *)image)[1] = local_paca->kernel_toc;
+#endif
+
+ fp->bpf_func = (void *)fimage;
+ fp->jited = 1;
+ fp->jited_len = cgctx.idx * 4 + FUNCTION_DESCR_SIZE;
+
+ if (!fp->is_func || extra_pass) {
+ if (bpf_jit_binary_pack_finalize(fhdr, hdr)) {
+ fp = org_fp;
+ goto out_addrs;
}
+ bpf_prog_fill_jited_linfo(fp, addrs);
+out_addrs:
+ kfree(addrs);
+ kfree(jit_data);
+ fp->aux->jit_data = NULL;
+ } else {
+ jit_data->addrs = addrs;
+ jit_data->ctx = cgctx;
+ jit_data->proglen = proglen;
+ jit_data->fimage = fimage;
+ jit_data->fhdr = fhdr;
+ jit_data->hdr = hdr;
}
- /* The RETs have left a return value in R3. */
- PPC_BLR();
+out:
+ if (bpf_blinded)
+ bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp);
+
+ return fp;
+}
+
+/*
+ * The caller should check for (BPF_MODE(code) == BPF_PROBE_MEM) before calling
+ * this function, as this only applies to BPF_PROBE_MEM, for now.
+ */
+int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, u32 *fimage, int pass,
+ struct codegen_context *ctx, int insn_idx, int jmp_off,
+ int dst_reg, u32 code)
+{
+ off_t offset;
+ unsigned long pc;
+ struct exception_table_entry *ex, *ex_entry;
+ u32 *fixup;
+
+ /* Populate extable entries only in the last pass */
+ if (pass != 2)
+ return 0;
+
+ if (!fp->aux->extable ||
+ WARN_ON_ONCE(ctx->exentry_idx >= fp->aux->num_exentries))
+ return -EINVAL;
+
+ /*
+ * Program is first written to image before copying to the
+ * final location (fimage). Accordingly, update in the image first.
+ * As all offsets used are relative, copying as is to the
+ * final location should be alright.
+ */
+ pc = (unsigned long)&image[insn_idx];
+ ex = (void *)fp->aux->extable - (void *)fimage + (void *)image;
+
+ fixup = (void *)ex -
+ (fp->aux->num_exentries * BPF_FIXUP_LEN * 4) +
+ (ctx->exentry_idx * BPF_FIXUP_LEN * 4);
+
+ fixup[0] = PPC_RAW_LI(dst_reg, 0);
+ if (BPF_CLASS(code) == BPF_ST || BPF_CLASS(code) == BPF_STX)
+ fixup[0] = PPC_RAW_NOP();
+
+ if (IS_ENABLED(CONFIG_PPC32))
+ fixup[1] = PPC_RAW_LI(dst_reg - 1, 0); /* clear higher 32-bit register too */
+
+ fixup[BPF_FIXUP_LEN - 1] =
+ PPC_RAW_BRANCH((long)(pc + jmp_off) - (long)&fixup[BPF_FIXUP_LEN - 1]);
+
+ ex_entry = &ex[ctx->exentry_idx];
+
+ offset = pc - (long)&ex_entry->insn;
+ if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
+ return -ERANGE;
+ ex_entry->insn = offset;
+
+ offset = (long)fixup - (long)&ex_entry->fixup;
+ if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
+ return -ERANGE;
+ ex_entry->fixup = offset;
+
+ ctx->exentry_idx++;
+ return 0;
}
-#define CHOOSE_LOAD_FUNC(K, func) \
- ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
+void *bpf_arch_text_copy(void *dst, void *src, size_t len)
+{
+ int err;
+
+ if (WARN_ON_ONCE(core_kernel_text((unsigned long)dst)))
+ return ERR_PTR(-EINVAL);
+
+ mutex_lock(&text_mutex);
+ err = patch_instructions(dst, src, len, false);
+ mutex_unlock(&text_mutex);
+
+ return err ? ERR_PTR(err) : dst;
+}
-/* Assemble the body code between the prologue & epilogue. */
-static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
- struct codegen_context *ctx,
- unsigned int *addrs)
+int bpf_arch_text_invalidate(void *dst, size_t len)
{
- const struct sock_filter *filter = fp->insns;
- int flen = fp->len;
- u8 *func;
- unsigned int true_cond;
- int i;
+ u32 insn = BREAKPOINT_INSTRUCTION;
+ int ret;
+
+ if (WARN_ON_ONCE(core_kernel_text((unsigned long)dst)))
+ return -EINVAL;
+
+ mutex_lock(&text_mutex);
+ ret = patch_instructions(dst, &insn, len, true);
+ mutex_unlock(&text_mutex);
- /* Start of epilogue code */
- unsigned int exit_addr = addrs[flen];
+ return ret;
+}
- for (i = 0; i < flen; i++) {
- unsigned int K = filter[i].k;
- u16 code = bpf_anc_helper(&filter[i]);
+void bpf_jit_free(struct bpf_prog *fp)
+{
+ if (fp->jited) {
+ struct powerpc_jit_data *jit_data = fp->aux->jit_data;
+ struct bpf_binary_header *hdr;
/*
- * addrs[] maps a BPF bytecode address into a real offset from
- * the start of the body code.
+ * If we fail the final pass of JIT (from jit_subprogs),
+ * the program may not be finalized yet. Call finalize here
+ * before freeing it.
*/
- addrs[i] = ctx->idx * 4;
-
- switch (code) {
- /*** ALU ops ***/
- case BPF_ALU | BPF_ADD | BPF_X: /* A += X; */
- ctx->seen |= SEEN_XREG;
- PPC_ADD(r_A, r_A, r_X);
- break;
- case BPF_ALU | BPF_ADD | BPF_K: /* A += K; */
- if (!K)
- break;
- PPC_ADDI(r_A, r_A, IMM_L(K));
- if (K >= 32768)
- PPC_ADDIS(r_A, r_A, IMM_HA(K));
- break;
- case BPF_ALU | BPF_SUB | BPF_X: /* A -= X; */
- ctx->seen |= SEEN_XREG;
- PPC_SUB(r_A, r_A, r_X);
- break;
- case BPF_ALU | BPF_SUB | BPF_K: /* A -= K */
- if (!K)
- break;
- PPC_ADDI(r_A, r_A, IMM_L(-K));
- if (K >= 32768)
- PPC_ADDIS(r_A, r_A, IMM_HA(-K));
- break;
- case BPF_ALU | BPF_MUL | BPF_X: /* A *= X; */
- ctx->seen |= SEEN_XREG;
- PPC_MUL(r_A, r_A, r_X);
- break;
- case BPF_ALU | BPF_MUL | BPF_K: /* A *= K */
- if (K < 32768)
- PPC_MULI(r_A, r_A, K);
- else {
- PPC_LI32(r_scratch1, K);
- PPC_MUL(r_A, r_A, r_scratch1);
- }
- break;
- case BPF_ALU | BPF_MOD | BPF_X: /* A %= X; */
- ctx->seen |= SEEN_XREG;
- PPC_CMPWI(r_X, 0);
- if (ctx->pc_ret0 != -1) {
- PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]);
- } else {
- PPC_BCC_SHORT(COND_NE, (ctx->idx*4)+12);
- PPC_LI(r_ret, 0);
- PPC_JMP(exit_addr);
- }
- PPC_DIVWU(r_scratch1, r_A, r_X);
- PPC_MUL(r_scratch1, r_X, r_scratch1);
- PPC_SUB(r_A, r_A, r_scratch1);
- break;
- case BPF_ALU | BPF_MOD | BPF_K: /* A %= K; */
- PPC_LI32(r_scratch2, K);
- PPC_DIVWU(r_scratch1, r_A, r_scratch2);
- PPC_MUL(r_scratch1, r_scratch2, r_scratch1);
- PPC_SUB(r_A, r_A, r_scratch1);
- break;
- case BPF_ALU | BPF_DIV | BPF_X: /* A /= X; */
- ctx->seen |= SEEN_XREG;
- PPC_CMPWI(r_X, 0);
- if (ctx->pc_ret0 != -1) {
- PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]);
- } else {
- /*
- * Exit, returning 0; first pass hits here
- * (longer worst-case code size).
- */
- PPC_BCC_SHORT(COND_NE, (ctx->idx*4)+12);
- PPC_LI(r_ret, 0);
- PPC_JMP(exit_addr);
- }
- PPC_DIVWU(r_A, r_A, r_X);
- break;
- case BPF_ALU | BPF_DIV | BPF_K: /* A /= K */
- if (K == 1)
- break;
- PPC_LI32(r_scratch1, K);
- PPC_DIVWU(r_A, r_A, r_scratch1);
- break;
- case BPF_ALU | BPF_AND | BPF_X:
- ctx->seen |= SEEN_XREG;
- PPC_AND(r_A, r_A, r_X);
- break;
- case BPF_ALU | BPF_AND | BPF_K:
- if (!IMM_H(K))
- PPC_ANDI(r_A, r_A, K);
- else {
- PPC_LI32(r_scratch1, K);
- PPC_AND(r_A, r_A, r_scratch1);
- }
- break;
- case BPF_ALU | BPF_OR | BPF_X:
- ctx->seen |= SEEN_XREG;
- PPC_OR(r_A, r_A, r_X);
- break;
- case BPF_ALU | BPF_OR | BPF_K:
- if (IMM_L(K))
- PPC_ORI(r_A, r_A, IMM_L(K));
- if (K >= 65536)
- PPC_ORIS(r_A, r_A, IMM_H(K));
- break;
- case BPF_ANC | SKF_AD_ALU_XOR_X:
- case BPF_ALU | BPF_XOR | BPF_X: /* A ^= X */
- ctx->seen |= SEEN_XREG;
- PPC_XOR(r_A, r_A, r_X);
- break;
- case BPF_ALU | BPF_XOR | BPF_K: /* A ^= K */
- if (IMM_L(K))
- PPC_XORI(r_A, r_A, IMM_L(K));
- if (K >= 65536)
- PPC_XORIS(r_A, r_A, IMM_H(K));
- break;
- case BPF_ALU | BPF_LSH | BPF_X: /* A <<= X; */
- ctx->seen |= SEEN_XREG;
- PPC_SLW(r_A, r_A, r_X);
- break;
- case BPF_ALU | BPF_LSH | BPF_K:
- if (K == 0)
- break;
- else
- PPC_SLWI(r_A, r_A, K);
- break;
- case BPF_ALU | BPF_RSH | BPF_X: /* A >>= X; */
- ctx->seen |= SEEN_XREG;
- PPC_SRW(r_A, r_A, r_X);
- break;
- case BPF_ALU | BPF_RSH | BPF_K: /* A >>= K; */
- if (K == 0)
- break;
- else
- PPC_SRWI(r_A, r_A, K);
- break;
- case BPF_ALU | BPF_NEG:
- PPC_NEG(r_A, r_A);
- break;
- case BPF_RET | BPF_K:
- PPC_LI32(r_ret, K);
- if (!K) {
- if (ctx->pc_ret0 == -1)
- ctx->pc_ret0 = i;
- }
- /*
- * If this isn't the very last instruction, branch to
- * the epilogue if we've stuff to clean up. Otherwise,
- * if there's nothing to tidy, just return. If we /are/
- * the last instruction, we're about to fall through to
- * the epilogue to return.
- */
- if (i != flen - 1) {
- /*
- * Note: 'seen' is properly valid only on pass
- * #2. Both parts of this conditional are the
- * same instruction size though, meaning the
- * first pass will still correctly determine the
- * code size/addresses.
- */
- if (ctx->seen)
- PPC_JMP(exit_addr);
- else
- PPC_BLR();
- }
- break;
- case BPF_RET | BPF_A:
- PPC_MR(r_ret, r_A);
- if (i != flen - 1) {
- if (ctx->seen)
- PPC_JMP(exit_addr);
- else
- PPC_BLR();
- }
- break;
- case BPF_MISC | BPF_TAX: /* X = A */
- PPC_MR(r_X, r_A);
- break;
- case BPF_MISC | BPF_TXA: /* A = X */
- ctx->seen |= SEEN_XREG;
- PPC_MR(r_A, r_X);
- break;
-
- /*** Constant loads/M[] access ***/
- case BPF_LD | BPF_IMM: /* A = K */
- PPC_LI32(r_A, K);
- break;
- case BPF_LDX | BPF_IMM: /* X = K */
- PPC_LI32(r_X, K);
- break;
- case BPF_LD | BPF_MEM: /* A = mem[K] */
- PPC_MR(r_A, r_M + (K & 0xf));
- ctx->seen |= SEEN_MEM | (1<<(K & 0xf));
- break;
- case BPF_LDX | BPF_MEM: /* X = mem[K] */
- PPC_MR(r_X, r_M + (K & 0xf));
- ctx->seen |= SEEN_MEM | (1<<(K & 0xf));
- break;
- case BPF_ST: /* mem[K] = A */
- PPC_MR(r_M + (K & 0xf), r_A);
- ctx->seen |= SEEN_MEM | (1<<(K & 0xf));
- break;
- case BPF_STX: /* mem[K] = X */
- PPC_MR(r_M + (K & 0xf), r_X);
- ctx->seen |= SEEN_XREG | SEEN_MEM | (1<<(K & 0xf));
- break;
- case BPF_LD | BPF_W | BPF_LEN: /* A = skb->len; */
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
- PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, len));
- break;
- case BPF_LDX | BPF_W | BPF_LEN: /* X = skb->len; */
- PPC_LWZ_OFFS(r_X, r_skb, offsetof(struct sk_buff, len));
- break;
-
- /*** Ancillary info loads ***/
- case BPF_ANC | SKF_AD_PROTOCOL: /* A = ntohs(skb->protocol); */
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
- protocol) != 2);
- PPC_NTOHS_OFFS(r_A, r_skb, offsetof(struct sk_buff,
- protocol));
- break;
- case BPF_ANC | SKF_AD_IFINDEX:
- PPC_LD_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff,
- dev));
- PPC_CMPDI(r_scratch1, 0);
- if (ctx->pc_ret0 != -1) {
- PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]);
- } else {
- /* Exit, returning 0; first pass hits here. */
- PPC_BCC_SHORT(COND_NE, (ctx->idx*4)+12);
- PPC_LI(r_ret, 0);
- PPC_JMP(exit_addr);
- }
- BUILD_BUG_ON(FIELD_SIZEOF(struct net_device,
- ifindex) != 4);
- PPC_LWZ_OFFS(r_A, r_scratch1,
- offsetof(struct net_device, ifindex));
- break;
- case BPF_ANC | SKF_AD_MARK:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
- PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
- mark));
- break;
- case BPF_ANC | SKF_AD_RXHASH:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
- PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
- hash));
- break;
- case BPF_ANC | SKF_AD_VLAN_TAG:
- case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
- BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
-
- PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
- vlan_tci));
- if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) {
- PPC_ANDI(r_A, r_A, ~VLAN_TAG_PRESENT);
- } else {
- PPC_ANDI(r_A, r_A, VLAN_TAG_PRESENT);
- PPC_SRWI(r_A, r_A, 12);
- }
- break;
- case BPF_ANC | SKF_AD_QUEUE:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
- queue_mapping) != 2);
- PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
- queue_mapping));
- break;
- case BPF_ANC | SKF_AD_CPU:
-#ifdef CONFIG_SMP
- /*
- * PACA ptr is r13:
- * raw_smp_processor_id() = local_paca->paca_index
- */
- BUILD_BUG_ON(FIELD_SIZEOF(struct paca_struct,
- paca_index) != 2);
- PPC_LHZ_OFFS(r_A, 13,
- offsetof(struct paca_struct, paca_index));
-#else
- PPC_LI(r_A, 0);
-#endif
- break;
-
- /*** Absolute loads from packet header/data ***/
- case BPF_LD | BPF_W | BPF_ABS:
- func = CHOOSE_LOAD_FUNC(K, sk_load_word);
- goto common_load;
- case BPF_LD | BPF_H | BPF_ABS:
- func = CHOOSE_LOAD_FUNC(K, sk_load_half);
- goto common_load;
- case BPF_LD | BPF_B | BPF_ABS:
- func = CHOOSE_LOAD_FUNC(K, sk_load_byte);
- common_load:
- /* Load from [K]. */
- ctx->seen |= SEEN_DATAREF;
- PPC_LI64(r_scratch1, func);
- PPC_MTLR(r_scratch1);
- PPC_LI32(r_addr, K);
- PPC_BLRL();
- /*
- * Helper returns 'lt' condition on error, and an
- * appropriate return value in r3
- */
- PPC_BCC(COND_LT, exit_addr);
- break;
-
- /*** Indirect loads from packet header/data ***/
- case BPF_LD | BPF_W | BPF_IND:
- func = sk_load_word;
- goto common_load_ind;
- case BPF_LD | BPF_H | BPF_IND:
- func = sk_load_half;
- goto common_load_ind;
- case BPF_LD | BPF_B | BPF_IND:
- func = sk_load_byte;
- common_load_ind:
- /*
- * Load from [X + K]. Negative offsets are tested for
- * in the helper functions.
- */
- ctx->seen |= SEEN_DATAREF | SEEN_XREG;
- PPC_LI64(r_scratch1, func);
- PPC_MTLR(r_scratch1);
- PPC_ADDI(r_addr, r_X, IMM_L(K));
- if (K >= 32768)
- PPC_ADDIS(r_addr, r_addr, IMM_HA(K));
- PPC_BLRL();
- /* If error, cr0.LT set */
- PPC_BCC(COND_LT, exit_addr);
- break;
-
- case BPF_LDX | BPF_B | BPF_MSH:
- func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh);
- goto common_load;
- break;
-
- /*** Jump and branches ***/
- case BPF_JMP | BPF_JA:
- if (K != 0)
- PPC_JMP(addrs[i + 1 + K]);
- break;
-
- case BPF_JMP | BPF_JGT | BPF_K:
- case BPF_JMP | BPF_JGT | BPF_X:
- true_cond = COND_GT;
- goto cond_branch;
- case BPF_JMP | BPF_JGE | BPF_K:
- case BPF_JMP | BPF_JGE | BPF_X:
- true_cond = COND_GE;
- goto cond_branch;
- case BPF_JMP | BPF_JEQ | BPF_K:
- case BPF_JMP | BPF_JEQ | BPF_X:
- true_cond = COND_EQ;
- goto cond_branch;
- case BPF_JMP | BPF_JSET | BPF_K:
- case BPF_JMP | BPF_JSET | BPF_X:
- true_cond = COND_NE;
- /* Fall through */
- cond_branch:
- /* same targets, can avoid doing the test :) */
- if (filter[i].jt == filter[i].jf) {
- if (filter[i].jt > 0)
- PPC_JMP(addrs[i + 1 + filter[i].jt]);
- break;
- }
-
- switch (code) {
- case BPF_JMP | BPF_JGT | BPF_X:
- case BPF_JMP | BPF_JGE | BPF_X:
- case BPF_JMP | BPF_JEQ | BPF_X:
- ctx->seen |= SEEN_XREG;
- PPC_CMPLW(r_A, r_X);
- break;
- case BPF_JMP | BPF_JSET | BPF_X:
- ctx->seen |= SEEN_XREG;
- PPC_AND_DOT(r_scratch1, r_A, r_X);
- break;
- case BPF_JMP | BPF_JEQ | BPF_K:
- case BPF_JMP | BPF_JGT | BPF_K:
- case BPF_JMP | BPF_JGE | BPF_K:
- if (K < 32768)
- PPC_CMPLWI(r_A, K);
- else {
- PPC_LI32(r_scratch1, K);
- PPC_CMPLW(r_A, r_scratch1);
- }
- break;
- case BPF_JMP | BPF_JSET | BPF_K:
- if (K < 32768)
- /* PPC_ANDI is /only/ dot-form */
- PPC_ANDI(r_scratch1, r_A, K);
- else {
- PPC_LI32(r_scratch1, K);
- PPC_AND_DOT(r_scratch1, r_A,
- r_scratch1);
- }
- break;
- }
- /* Sometimes branches are constructed "backward", with
- * the false path being the branch and true path being
- * a fallthrough to the next instruction.
- */
- if (filter[i].jt == 0)
- /* Swap the sense of the branch */
- PPC_BCC(true_cond ^ COND_CMP_TRUE,
- addrs[i + 1 + filter[i].jf]);
- else {
- PPC_BCC(true_cond, addrs[i + 1 + filter[i].jt]);
- if (filter[i].jf != 0)
- PPC_JMP(addrs[i + 1 + filter[i].jf]);
- }
- break;
- default:
- /* The filter contains something cruel & unusual.
- * We don't handle it, but also there shouldn't be
- * anything missing from our list.
- */
- if (printk_ratelimit())
- pr_err("BPF filter opcode %04x (@%d) unsupported\n",
- filter[i].code, i);
- return -ENOTSUPP;
+ if (jit_data) {
+ bpf_jit_binary_pack_finalize(jit_data->fhdr, jit_data->hdr);
+ kvfree(jit_data->addrs);
+ kfree(jit_data);
}
+ hdr = bpf_jit_binary_pack_hdr(fp);
+ bpf_jit_binary_pack_free(hdr, NULL);
+ WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp));
+ }
+
+ bpf_prog_unlock_free(fp);
+}
+bool bpf_jit_supports_kfunc_call(void)
+{
+ return true;
+}
+
+bool bpf_jit_supports_arena(void)
+{
+ return IS_ENABLED(CONFIG_PPC64);
+}
+
+bool bpf_jit_supports_far_kfunc_call(void)
+{
+ return IS_ENABLED(CONFIG_PPC64);
+}
+
+bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena)
+{
+ if (!in_arena)
+ return true;
+ switch (insn->code) {
+ case BPF_STX | BPF_ATOMIC | BPF_H:
+ case BPF_STX | BPF_ATOMIC | BPF_B:
+ case BPF_STX | BPF_ATOMIC | BPF_W:
+ case BPF_STX | BPF_ATOMIC | BPF_DW:
+ if (bpf_atomic_is_load_store(insn))
+ return false;
+ return IS_ENABLED(CONFIG_PPC64);
}
- /* Set end-of-body-code address for exit. */
- addrs[i] = ctx->idx * 4;
+ return true;
+}
+void *arch_alloc_bpf_trampoline(unsigned int size)
+{
+ return bpf_prog_pack_alloc(size, bpf_jit_fill_ill_insns);
+}
+
+void arch_free_bpf_trampoline(void *image, unsigned int size)
+{
+ bpf_prog_pack_free(image, size);
+}
+
+int arch_protect_bpf_trampoline(void *image, unsigned int size)
+{
return 0;
}
-void bpf_jit_compile(struct bpf_prog *fp)
+static int invoke_bpf_prog(u32 *image, u32 *ro_image, struct codegen_context *ctx,
+ struct bpf_tramp_link *l, int regs_off, int retval_off,
+ int run_ctx_off, bool save_ret)
{
- unsigned int proglen;
- unsigned int alloclen;
- u32 *image = NULL;
- u32 *code_base;
- unsigned int *addrs;
- struct codegen_context cgctx;
- int pass;
- int flen = fp->len;
+ struct bpf_prog *p = l->link.prog;
+ ppc_inst_t branch_insn;
+ u32 jmp_idx;
+ int ret = 0;
+
+ /* Save cookie */
+ if (IS_ENABLED(CONFIG_PPC64)) {
+ PPC_LI64(_R3, l->cookie);
+ EMIT(PPC_RAW_STD(_R3, _R1, run_ctx_off + offsetof(struct bpf_tramp_run_ctx,
+ bpf_cookie)));
+ } else {
+ PPC_LI32(_R3, l->cookie >> 32);
+ PPC_LI32(_R4, l->cookie);
+ EMIT(PPC_RAW_STW(_R3, _R1,
+ run_ctx_off + offsetof(struct bpf_tramp_run_ctx, bpf_cookie)));
+ EMIT(PPC_RAW_STW(_R4, _R1,
+ run_ctx_off + offsetof(struct bpf_tramp_run_ctx, bpf_cookie) + 4));
+ }
- if (!bpf_jit_enable)
- return;
+ /* __bpf_prog_enter(p, &bpf_tramp_run_ctx) */
+ PPC_LI_ADDR(_R3, p);
+ EMIT(PPC_RAW_MR(_R25, _R3));
+ EMIT(PPC_RAW_ADDI(_R4, _R1, run_ctx_off));
+ ret = bpf_jit_emit_func_call_rel(image, ro_image, ctx,
+ (unsigned long)bpf_trampoline_enter(p));
+ if (ret)
+ return ret;
- addrs = kzalloc((flen+1) * sizeof(*addrs), GFP_KERNEL);
- if (addrs == NULL)
- return;
+ /* Remember prog start time returned by __bpf_prog_enter */
+ EMIT(PPC_RAW_MR(_R26, _R3));
/*
- * There are multiple assembly passes as the generated code will change
- * size as it settles down, figuring out the max branch offsets/exit
- * paths required.
- *
- * The range of standard conditional branches is +/- 32Kbytes. Since
- * BPF_MAXINSNS = 4096, we can only jump from (worst case) start to
- * finish with 8 bytes/instruction. Not feasible, so long jumps are
- * used, distinct from short branches.
- *
- * Current:
+ * if (__bpf_prog_enter(p) == 0)
+ * goto skip_exec_of_prog;
*
- * For now, both branch types assemble to 2 words (short branches padded
- * with a NOP); this is less efficient, but assembly will always complete
- * after exactly 3 passes:
- *
- * First pass: No code buffer; Program is "faux-generated" -- no code
- * emitted but maximum size of output determined (and addrs[] filled
- * in). Also, we note whether we use M[], whether we use skb data, etc.
- * All generation choices assumed to be 'worst-case', e.g. branches all
- * far (2 instructions), return path code reduction not available, etc.
- *
- * Second pass: Code buffer allocated with size determined previously.
- * Prologue generated to support features we have seen used. Exit paths
- * determined and addrs[] is filled in again, as code may be slightly
- * smaller as a result.
+ * Emit a nop to be later patched with conditional branch, once offset is known
+ */
+ EMIT(PPC_RAW_CMPLI(_R3, 0));
+ jmp_idx = ctx->idx;
+ EMIT(PPC_RAW_NOP());
+
+ /* p->bpf_func(ctx) */
+ EMIT(PPC_RAW_ADDI(_R3, _R1, regs_off));
+ if (!p->jited)
+ PPC_LI_ADDR(_R4, (unsigned long)p->insnsi);
+ /* Account for max possible instructions during dummy pass for size calculation */
+ if (image && !create_branch(&branch_insn, (u32 *)&ro_image[ctx->idx],
+ (unsigned long)p->bpf_func,
+ BRANCH_SET_LINK)) {
+ image[ctx->idx] = ppc_inst_val(branch_insn);
+ ctx->idx++;
+ } else {
+ EMIT(PPC_RAW_LL(_R12, _R25, offsetof(struct bpf_prog, bpf_func)));
+ EMIT(PPC_RAW_MTCTR(_R12));
+ EMIT(PPC_RAW_BCTRL());
+ }
+
+ if (save_ret)
+ EMIT(PPC_RAW_STL(_R3, _R1, retval_off));
+
+ /* Fix up branch */
+ if (image) {
+ if (create_cond_branch(&branch_insn, &image[jmp_idx],
+ (unsigned long)&image[ctx->idx], COND_EQ << 16))
+ return -EINVAL;
+ image[jmp_idx] = ppc_inst_val(branch_insn);
+ }
+
+ /* __bpf_prog_exit(p, start_time, &bpf_tramp_run_ctx) */
+ EMIT(PPC_RAW_MR(_R3, _R25));
+ EMIT(PPC_RAW_MR(_R4, _R26));
+ EMIT(PPC_RAW_ADDI(_R5, _R1, run_ctx_off));
+ ret = bpf_jit_emit_func_call_rel(image, ro_image, ctx,
+ (unsigned long)bpf_trampoline_exit(p));
+
+ return ret;
+}
+
+static int invoke_bpf_mod_ret(u32 *image, u32 *ro_image, struct codegen_context *ctx,
+ struct bpf_tramp_links *tl, int regs_off, int retval_off,
+ int run_ctx_off, u32 *branches)
+{
+ int i;
+
+ /*
+ * The first fmod_ret program will receive a garbage return value.
+ * Set this to 0 to avoid confusing the program.
+ */
+ EMIT(PPC_RAW_LI(_R3, 0));
+ EMIT(PPC_RAW_STL(_R3, _R1, retval_off));
+ for (i = 0; i < tl->nr_links; i++) {
+ if (invoke_bpf_prog(image, ro_image, ctx, tl->links[i], regs_off, retval_off,
+ run_ctx_off, true))
+ return -EINVAL;
+
+ /*
+ * mod_ret prog stored return value after prog ctx. Emit:
+ * if (*(u64 *)(ret_val) != 0)
+ * goto do_fexit;
+ */
+ EMIT(PPC_RAW_LL(_R3, _R1, retval_off));
+ EMIT(PPC_RAW_CMPLI(_R3, 0));
+
+ /*
+ * Save the location of the branch and generate a nop, which is
+ * replaced with a conditional jump once do_fexit (i.e. the
+ * start of the fexit invocation) is finalized.
+ */
+ branches[i] = ctx->idx;
+ EMIT(PPC_RAW_NOP());
+ }
+
+ return 0;
+}
+
+static void bpf_trampoline_setup_tail_call_cnt(u32 *image, struct codegen_context *ctx,
+ int func_frame_offset, int r4_off)
+{
+ if (IS_ENABLED(CONFIG_PPC64)) {
+ /* See bpf_jit_stack_tailcallcnt() */
+ int tailcallcnt_offset = 7 * 8;
+
+ EMIT(PPC_RAW_LL(_R3, _R1, func_frame_offset - tailcallcnt_offset));
+ EMIT(PPC_RAW_STL(_R3, _R1, -tailcallcnt_offset));
+ } else {
+ /* See bpf_jit_stack_offsetof() and BPF_PPC_TC */
+ EMIT(PPC_RAW_LL(_R4, _R1, r4_off));
+ }
+}
+
+static void bpf_trampoline_restore_tail_call_cnt(u32 *image, struct codegen_context *ctx,
+ int func_frame_offset, int r4_off)
+{
+ if (IS_ENABLED(CONFIG_PPC64)) {
+ /* See bpf_jit_stack_tailcallcnt() */
+ int tailcallcnt_offset = 7 * 8;
+
+ EMIT(PPC_RAW_LL(_R3, _R1, -tailcallcnt_offset));
+ EMIT(PPC_RAW_STL(_R3, _R1, func_frame_offset - tailcallcnt_offset));
+ } else {
+ /* See bpf_jit_stack_offsetof() and BPF_PPC_TC */
+ EMIT(PPC_RAW_STL(_R4, _R1, r4_off));
+ }
+}
+
+static void bpf_trampoline_save_args(u32 *image, struct codegen_context *ctx, int func_frame_offset,
+ int nr_regs, int regs_off)
+{
+ int param_save_area_offset;
+
+ param_save_area_offset = func_frame_offset; /* the two frames we alloted */
+ param_save_area_offset += STACK_FRAME_MIN_SIZE; /* param save area is past frame header */
+
+ for (int i = 0; i < nr_regs; i++) {
+ if (i < 8) {
+ EMIT(PPC_RAW_STL(_R3 + i, _R1, regs_off + i * SZL));
+ } else {
+ EMIT(PPC_RAW_LL(_R3, _R1, param_save_area_offset + i * SZL));
+ EMIT(PPC_RAW_STL(_R3, _R1, regs_off + i * SZL));
+ }
+ }
+}
+
+/* Used when restoring just the register parameters when returning back */
+static void bpf_trampoline_restore_args_regs(u32 *image, struct codegen_context *ctx,
+ int nr_regs, int regs_off)
+{
+ for (int i = 0; i < nr_regs && i < 8; i++)
+ EMIT(PPC_RAW_LL(_R3 + i, _R1, regs_off + i * SZL));
+}
+
+/* Used when we call into the traced function. Replicate parameter save area */
+static void bpf_trampoline_restore_args_stack(u32 *image, struct codegen_context *ctx,
+ int func_frame_offset, int nr_regs, int regs_off)
+{
+ int param_save_area_offset;
+
+ param_save_area_offset = func_frame_offset; /* the two frames we alloted */
+ param_save_area_offset += STACK_FRAME_MIN_SIZE; /* param save area is past frame header */
+
+ for (int i = 8; i < nr_regs; i++) {
+ EMIT(PPC_RAW_LL(_R3, _R1, param_save_area_offset + i * SZL));
+ EMIT(PPC_RAW_STL(_R3, _R1, STACK_FRAME_MIN_SIZE + i * SZL));
+ }
+ bpf_trampoline_restore_args_regs(image, ctx, nr_regs, regs_off);
+}
+
+static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_image,
+ void *rw_image_end, void *ro_image,
+ const struct btf_func_model *m, u32 flags,
+ struct bpf_tramp_links *tlinks,
+ void *func_addr)
+{
+ int regs_off, nregs_off, ip_off, run_ctx_off, retval_off, nvr_off, alt_lr_off, r4_off = 0;
+ int i, ret, nr_regs, bpf_frame_size = 0, bpf_dummy_frame_size = 0, func_frame_offset;
+ struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
+ struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
+ struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
+ struct codegen_context codegen_ctx, *ctx;
+ u32 *image = (u32 *)rw_image;
+ ppc_inst_t branch_insn;
+ u32 *branches = NULL;
+ bool save_ret;
+
+ if (IS_ENABLED(CONFIG_PPC32))
+ return -EOPNOTSUPP;
+
+ nr_regs = m->nr_args;
+ /* Extra registers for struct arguments */
+ for (i = 0; i < m->nr_args; i++)
+ if (m->arg_size[i] > SZL)
+ nr_regs += round_up(m->arg_size[i], SZL) / SZL - 1;
+
+ if (nr_regs > MAX_BPF_FUNC_ARGS)
+ return -EOPNOTSUPP;
+
+ ctx = &codegen_ctx;
+ memset(ctx, 0, sizeof(*ctx));
+
+ /*
+ * Generated stack layout:
*
- * Third pass: Code generated 'for real', and branch destinations
- * determined from now-accurate addrs[] map.
+ * func prev back chain [ back chain ]
+ * [ ]
+ * bpf prog redzone/tailcallcnt [ ... ] 64 bytes (64-bit powerpc)
+ * [ ] --
+ * LR save area [ r0 save (64-bit) ] | header
+ * [ r0 save (32-bit) ] |
+ * dummy frame for unwind [ back chain 1 ] --
+ * [ padding ] align stack frame
+ * r4_off [ r4 (tailcallcnt) ] optional - 32-bit powerpc
+ * alt_lr_off [ real lr (ool stub)] optional - actual lr
+ * [ r26 ]
+ * nvr_off [ r25 ] nvr save area
+ * retval_off [ return value ]
+ * [ reg argN ]
+ * [ ... ]
+ * regs_off [ reg_arg1 ] prog ctx context
+ * nregs_off [ args count ]
+ * ip_off [ traced function ]
+ * [ ... ]
+ * run_ctx_off [ bpf_tramp_run_ctx ]
+ * [ reg argN ]
+ * [ ... ]
+ * param_save_area [ reg_arg1 ] min 8 doublewords, per ABI
+ * [ TOC save (64-bit) ] --
+ * [ LR save (64-bit) ] | header
+ * [ LR save (32-bit) ] |
+ * bpf trampoline frame [ back chain 2 ] --
*
- * Ideal:
+ */
+
+ /* Minimum stack frame header */
+ bpf_frame_size = STACK_FRAME_MIN_SIZE;
+
+ /*
+ * Room for parameter save area.
*
- * If we optimise this, near branches will be shorter. On the
- * first assembly pass, we should err on the side of caution and
- * generate the biggest code. On subsequent passes, branches will be
- * generated short or long and code size will reduce. With smaller
- * code, more branches may fall into the short category, and code will
- * reduce more.
+ * As per the ABI, this is required if we call into the traced
+ * function (BPF_TRAMP_F_CALL_ORIG):
+ * - if the function takes more than 8 arguments for the rest to spill onto the stack
+ * - or, if the function has variadic arguments
+ * - or, if this functions's prototype was not available to the caller
*
- * Finally, if we see one pass generate code the same size as the
- * previous pass we have converged and should now generate code for
- * real. Allocating at the end will also save the memory that would
- * otherwise be wasted by the (small) current code shrinkage.
- * Preferably, we should do a small number of passes (e.g. 5) and if we
- * haven't converged by then, get impatient and force code to generate
- * as-is, even if the odd branch would be left long. The chances of a
- * long jump are tiny with all but the most enormous of BPF filter
- * inputs, so we should usually converge on the third pass.
+ * Reserve space for at least 8 registers for now. This can be optimized later.
*/
+ bpf_frame_size += (nr_regs > 8 ? nr_regs : 8) * SZL;
- cgctx.idx = 0;
- cgctx.seen = 0;
- cgctx.pc_ret0 = -1;
- /* Scouting faux-generate pass 0 */
- if (bpf_jit_build_body(fp, 0, &cgctx, addrs))
- /* We hit something illegal or unsupported. */
- goto out;
+ /* Room for struct bpf_tramp_run_ctx */
+ run_ctx_off = bpf_frame_size;
+ bpf_frame_size += round_up(sizeof(struct bpf_tramp_run_ctx), SZL);
+
+ /* Room for IP address argument */
+ ip_off = bpf_frame_size;
+ if (flags & BPF_TRAMP_F_IP_ARG)
+ bpf_frame_size += SZL;
+
+ /* Room for args count */
+ nregs_off = bpf_frame_size;
+ bpf_frame_size += SZL;
+
+ /* Room for args */
+ regs_off = bpf_frame_size;
+ bpf_frame_size += nr_regs * SZL;
+
+ /* Room for return value of func_addr or fentry prog */
+ retval_off = bpf_frame_size;
+ save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET);
+ if (save_ret)
+ bpf_frame_size += SZL;
+
+ /* Room for nvr save area */
+ nvr_off = bpf_frame_size;
+ bpf_frame_size += 2 * SZL;
+
+ /* Optional save area for actual LR in case of ool ftrace */
+ if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) {
+ alt_lr_off = bpf_frame_size;
+ bpf_frame_size += SZL;
+ }
+
+ if (IS_ENABLED(CONFIG_PPC32)) {
+ if (nr_regs < 2) {
+ r4_off = bpf_frame_size;
+ bpf_frame_size += SZL;
+ } else {
+ r4_off = regs_off + SZL;
+ }
+ }
+
+ /* Padding to align stack frame, if any */
+ bpf_frame_size = round_up(bpf_frame_size, SZL * 2);
+
+ /* Dummy frame size for proper unwind - includes 64-bytes red zone for 64-bit powerpc */
+ bpf_dummy_frame_size = STACK_FRAME_MIN_SIZE + 64;
+
+ /* Offset to the traced function's stack frame */
+ func_frame_offset = bpf_dummy_frame_size + bpf_frame_size;
+
+ /* Create dummy frame for unwind, store original return value */
+ EMIT(PPC_RAW_STL(_R0, _R1, PPC_LR_STKOFF));
+ /* Protect red zone where tail call count goes */
+ EMIT(PPC_RAW_STLU(_R1, _R1, -bpf_dummy_frame_size));
+
+ /* Create our stack frame */
+ EMIT(PPC_RAW_STLU(_R1, _R1, -bpf_frame_size));
+
+ /* 64-bit: Save TOC and load kernel TOC */
+ if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2) && !IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) {
+ EMIT(PPC_RAW_STD(_R2, _R1, 24));
+ PPC64_LOAD_PACA();
+ }
+
+ /* 32-bit: save tail call count in r4 */
+ if (IS_ENABLED(CONFIG_PPC32) && nr_regs < 2)
+ EMIT(PPC_RAW_STL(_R4, _R1, r4_off));
+
+ bpf_trampoline_save_args(image, ctx, func_frame_offset, nr_regs, regs_off);
+
+ /* Save our return address */
+ EMIT(PPC_RAW_MFLR(_R3));
+ if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE))
+ EMIT(PPC_RAW_STL(_R3, _R1, alt_lr_off));
+ else
+ EMIT(PPC_RAW_STL(_R3, _R1, bpf_frame_size + PPC_LR_STKOFF));
/*
- * Pretend to build prologue, given the features we've seen. This will
- * update ctgtx.idx as it pretends to output instructions, then we can
- * calculate total size from idx.
+ * Save ip address of the traced function.
+ * We could recover this from LR, but we will need to address for OOL trampoline,
+ * and optional GEP area.
*/
- bpf_jit_build_prologue(fp, 0, &cgctx);
- bpf_jit_build_epilogue(0, &cgctx);
+ if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE) || flags & BPF_TRAMP_F_IP_ARG) {
+ EMIT(PPC_RAW_LWZ(_R4, _R3, 4));
+ EMIT(PPC_RAW_SLWI(_R4, _R4, 6));
+ EMIT(PPC_RAW_SRAWI(_R4, _R4, 6));
+ EMIT(PPC_RAW_ADD(_R3, _R3, _R4));
+ EMIT(PPC_RAW_ADDI(_R3, _R3, 4));
+ }
- proglen = cgctx.idx * 4;
- alloclen = proglen + FUNCTION_DESCR_SIZE;
- image = module_alloc(alloclen);
- if (!image)
- goto out;
+ if (flags & BPF_TRAMP_F_IP_ARG)
+ EMIT(PPC_RAW_STL(_R3, _R1, ip_off));
- code_base = image + (FUNCTION_DESCR_SIZE/4);
+ if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE))
+ /* Fake our LR for unwind */
+ EMIT(PPC_RAW_STL(_R3, _R1, bpf_frame_size + PPC_LR_STKOFF));
- /* Code generation passes 1-2 */
- for (pass = 1; pass < 3; pass++) {
- /* Now build the prologue, body code & epilogue for real. */
- cgctx.idx = 0;
- bpf_jit_build_prologue(fp, code_base, &cgctx);
- bpf_jit_build_body(fp, code_base, &cgctx, addrs);
- bpf_jit_build_epilogue(code_base, &cgctx);
+ /* Save function arg count -- see bpf_get_func_arg_cnt() */
+ EMIT(PPC_RAW_LI(_R3, nr_regs));
+ EMIT(PPC_RAW_STL(_R3, _R1, nregs_off));
- if (bpf_jit_enable > 1)
- pr_info("Pass %d: shrink = %d, seen = 0x%x\n", pass,
- proglen - (cgctx.idx * 4), cgctx.seen);
+ /* Save nv regs */
+ EMIT(PPC_RAW_STL(_R25, _R1, nvr_off));
+ EMIT(PPC_RAW_STL(_R26, _R1, nvr_off + SZL));
+
+ if (flags & BPF_TRAMP_F_CALL_ORIG) {
+ PPC_LI_ADDR(_R3, (unsigned long)im);
+ ret = bpf_jit_emit_func_call_rel(image, ro_image, ctx,
+ (unsigned long)__bpf_tramp_enter);
+ if (ret)
+ return ret;
}
- if (bpf_jit_enable > 1)
- /* Note that we output the base address of the code_base
- * rather than image, since opcodes are in code_base.
+ for (i = 0; i < fentry->nr_links; i++)
+ if (invoke_bpf_prog(image, ro_image, ctx, fentry->links[i], regs_off, retval_off,
+ run_ctx_off, flags & BPF_TRAMP_F_RET_FENTRY_RET))
+ return -EINVAL;
+
+ if (fmod_ret->nr_links) {
+ branches = kcalloc(fmod_ret->nr_links, sizeof(u32), GFP_KERNEL);
+ if (!branches)
+ return -ENOMEM;
+
+ if (invoke_bpf_mod_ret(image, ro_image, ctx, fmod_ret, regs_off, retval_off,
+ run_ctx_off, branches)) {
+ ret = -EINVAL;
+ goto cleanup;
+ }
+ }
+
+ /* Call the traced function */
+ if (flags & BPF_TRAMP_F_CALL_ORIG) {
+ /*
+ * The address in LR save area points to the correct point in the original function
+ * with both PPC_FTRACE_OUT_OF_LINE as well as with traditional ftrace instruction
+ * sequence
*/
- bpf_jit_dump(flen, proglen, pass, code_base);
+ EMIT(PPC_RAW_LL(_R3, _R1, bpf_frame_size + PPC_LR_STKOFF));
+ EMIT(PPC_RAW_MTCTR(_R3));
- if (image) {
- bpf_flush_icache(code_base, code_base + (proglen/4));
- /* Function descriptor nastiness: Address + TOC */
- ((u64 *)image)[0] = (u64)code_base;
- ((u64 *)image)[1] = local_paca->kernel_toc;
- fp->bpf_func = (void *)image;
- fp->jited = 1;
+ /* Replicate tail_call_cnt before calling the original BPF prog */
+ if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
+ bpf_trampoline_setup_tail_call_cnt(image, ctx, func_frame_offset, r4_off);
+
+ /* Restore args */
+ bpf_trampoline_restore_args_stack(image, ctx, func_frame_offset, nr_regs, regs_off);
+
+ /* Restore TOC for 64-bit */
+ if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2) && !IS_ENABLED(CONFIG_PPC_KERNEL_PCREL))
+ EMIT(PPC_RAW_LD(_R2, _R1, 24));
+ EMIT(PPC_RAW_BCTRL());
+ if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2) && !IS_ENABLED(CONFIG_PPC_KERNEL_PCREL))
+ PPC64_LOAD_PACA();
+
+ /* Store return value for bpf prog to access */
+ EMIT(PPC_RAW_STL(_R3, _R1, retval_off));
+
+ /* Restore updated tail_call_cnt */
+ if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
+ bpf_trampoline_restore_tail_call_cnt(image, ctx, func_frame_offset, r4_off);
+
+ /* Reserve space to patch branch instruction to skip fexit progs */
+ if (ro_image) /* image is NULL for dummy pass */
+ im->ip_after_call = &((u32 *)ro_image)[ctx->idx];
+ EMIT(PPC_RAW_NOP());
+ }
+
+ /* Update branches saved in invoke_bpf_mod_ret with address of do_fexit */
+ for (i = 0; i < fmod_ret->nr_links && image; i++) {
+ if (create_cond_branch(&branch_insn, &image[branches[i]],
+ (unsigned long)&image[ctx->idx], COND_NE << 16)) {
+ ret = -EINVAL;
+ goto cleanup;
+ }
+
+ image[branches[i]] = ppc_inst_val(branch_insn);
+ }
+
+ for (i = 0; i < fexit->nr_links; i++)
+ if (invoke_bpf_prog(image, ro_image, ctx, fexit->links[i], regs_off, retval_off,
+ run_ctx_off, false)) {
+ ret = -EINVAL;
+ goto cleanup;
+ }
+
+ if (flags & BPF_TRAMP_F_CALL_ORIG) {
+ if (ro_image) /* image is NULL for dummy pass */
+ im->ip_epilogue = &((u32 *)ro_image)[ctx->idx];
+ PPC_LI_ADDR(_R3, im);
+ ret = bpf_jit_emit_func_call_rel(image, ro_image, ctx,
+ (unsigned long)__bpf_tramp_exit);
+ if (ret)
+ goto cleanup;
}
+
+ if (flags & BPF_TRAMP_F_RESTORE_REGS)
+ bpf_trampoline_restore_args_regs(image, ctx, nr_regs, regs_off);
+
+ /* Restore return value of func_addr or fentry prog */
+ if (save_ret)
+ EMIT(PPC_RAW_LL(_R3, _R1, retval_off));
+
+ /* Restore nv regs */
+ EMIT(PPC_RAW_LL(_R26, _R1, nvr_off + SZL));
+ EMIT(PPC_RAW_LL(_R25, _R1, nvr_off));
+
+ /* Epilogue */
+ if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2) && !IS_ENABLED(CONFIG_PPC_KERNEL_PCREL))
+ EMIT(PPC_RAW_LD(_R2, _R1, 24));
+ if (flags & BPF_TRAMP_F_SKIP_FRAME) {
+ /* Skip the traced function and return to parent */
+ EMIT(PPC_RAW_ADDI(_R1, _R1, func_frame_offset));
+ EMIT(PPC_RAW_LL(_R0, _R1, PPC_LR_STKOFF));
+ EMIT(PPC_RAW_MTLR(_R0));
+ EMIT(PPC_RAW_BLR());
+ } else {
+ if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) {
+ EMIT(PPC_RAW_LL(_R0, _R1, alt_lr_off));
+ EMIT(PPC_RAW_MTLR(_R0));
+ EMIT(PPC_RAW_ADDI(_R1, _R1, func_frame_offset));
+ EMIT(PPC_RAW_LL(_R0, _R1, PPC_LR_STKOFF));
+ EMIT(PPC_RAW_BLR());
+ } else {
+ EMIT(PPC_RAW_LL(_R0, _R1, bpf_frame_size + PPC_LR_STKOFF));
+ EMIT(PPC_RAW_MTCTR(_R0));
+ EMIT(PPC_RAW_ADDI(_R1, _R1, func_frame_offset));
+ EMIT(PPC_RAW_LL(_R0, _R1, PPC_LR_STKOFF));
+ EMIT(PPC_RAW_MTLR(_R0));
+ EMIT(PPC_RAW_BCTR());
+ }
+ }
+
+ /* Make sure the trampoline generation logic doesn't overflow */
+ if (image && WARN_ON_ONCE(&image[ctx->idx] > (u32 *)rw_image_end - BPF_INSN_SAFETY)) {
+ ret = -EFAULT;
+ goto cleanup;
+ }
+ ret = ctx->idx * 4 + BPF_INSN_SAFETY * 4;
+
+cleanup:
+ kfree(branches);
+ return ret;
+}
+
+int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
+ struct bpf_tramp_links *tlinks, void *func_addr)
+{
+ struct bpf_tramp_image im;
+ int ret;
+
+ ret = __arch_prepare_bpf_trampoline(&im, NULL, NULL, NULL, m, flags, tlinks, func_addr);
+ return ret;
+}
+
+int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end,
+ const struct btf_func_model *m, u32 flags,
+ struct bpf_tramp_links *tlinks,
+ void *func_addr)
+{
+ u32 size = image_end - image;
+ void *rw_image, *tmp;
+ int ret;
+
+ /*
+ * rw_image doesn't need to be in module memory range, so we can
+ * use kvmalloc.
+ */
+ rw_image = kvmalloc(size, GFP_KERNEL);
+ if (!rw_image)
+ return -ENOMEM;
+
+ ret = __arch_prepare_bpf_trampoline(im, rw_image, rw_image + size, image, m,
+ flags, tlinks, func_addr);
+ if (ret < 0)
+ goto out;
+
+ if (bpf_jit_enable > 1)
+ bpf_jit_dump(1, ret - BPF_INSN_SAFETY * 4, 1, rw_image);
+
+ tmp = bpf_arch_text_copy(image, rw_image, size);
+ if (IS_ERR(tmp))
+ ret = PTR_ERR(tmp);
+
out:
- kfree(addrs);
- return;
+ kvfree(rw_image);
+ return ret;
}
-void bpf_jit_free(struct bpf_prog *fp)
+static int bpf_modify_inst(void *ip, ppc_inst_t old_inst, ppc_inst_t new_inst)
+{
+ ppc_inst_t org_inst;
+
+ if (copy_inst_from_kernel_nofault(&org_inst, ip)) {
+ pr_err("0x%lx: fetching instruction failed\n", (unsigned long)ip);
+ return -EFAULT;
+ }
+
+ if (!ppc_inst_equal(org_inst, old_inst)) {
+ pr_err("0x%lx: expected (%08lx) != found (%08lx)\n",
+ (unsigned long)ip, ppc_inst_as_ulong(old_inst), ppc_inst_as_ulong(org_inst));
+ return -EINVAL;
+ }
+
+ if (ppc_inst_equal(old_inst, new_inst))
+ return 0;
+
+ return patch_instruction(ip, new_inst);
+}
+
+static void do_isync(void *info __maybe_unused)
{
- if (fp->jited)
- module_free(NULL, fp->bpf_func);
- kfree(fp);
+ isync();
+}
+
+/*
+ * A 3-step process for bpf prog entry:
+ * 1. At bpf prog entry, a single nop/b:
+ * bpf_func:
+ * [nop|b] ool_stub
+ * 2. Out-of-line stub:
+ * ool_stub:
+ * mflr r0
+ * [b|bl] <bpf_prog>/<long_branch_stub>
+ * mtlr r0 // CONFIG_PPC_FTRACE_OUT_OF_LINE only
+ * b bpf_func + 4
+ * 3. Long branch stub:
+ * long_branch_stub:
+ * .long <branch_addr>/<dummy_tramp>
+ * mflr r11
+ * bcl 20,31,$+4
+ * mflr r12
+ * ld r12, -16(r12)
+ * mtctr r12
+ * mtlr r11 // needed to retain ftrace ABI
+ * bctr
+ *
+ * dummy_tramp is used to reduce synchronization requirements.
+ *
+ * When attaching a bpf trampoline to a bpf prog, we do not need any
+ * synchronization here since we always have a valid branch target regardless
+ * of the order in which the above stores are seen. dummy_tramp ensures that
+ * the long_branch stub goes to a valid destination on other cpus, even when
+ * the branch to the long_branch stub is seen before the updated trampoline
+ * address.
+ *
+ * However, when detaching a bpf trampoline from a bpf prog, or if changing
+ * the bpf trampoline address, we need synchronization to ensure that other
+ * cpus can no longer branch into the older trampoline so that it can be
+ * safely freed. bpf_tramp_image_put() uses rcu_tasks to ensure all cpus
+ * make forward progress, but we still need to ensure that other cpus
+ * execute isync (or some CSI) so that they don't go back into the
+ * trampoline again.
+ */
+int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
+ void *old_addr, void *new_addr)
+{
+ unsigned long bpf_func, bpf_func_end, size, offset;
+ ppc_inst_t old_inst, new_inst;
+ int ret = 0, branch_flags;
+ char name[KSYM_NAME_LEN];
+
+ if (IS_ENABLED(CONFIG_PPC32))
+ return -EOPNOTSUPP;
+
+ bpf_func = (unsigned long)ip;
+ branch_flags = poke_type == BPF_MOD_CALL ? BRANCH_SET_LINK : 0;
+
+ /* We currently only support poking bpf programs */
+ if (!__bpf_address_lookup(bpf_func, &size, &offset, name)) {
+ pr_err("%s (0x%lx): kernel/modules are not supported\n", __func__, bpf_func);
+ return -EOPNOTSUPP;
+ }
+
+ /*
+ * If we are not poking at bpf prog entry, then we are simply patching in/out
+ * an unconditional branch instruction at im->ip_after_call
+ */
+ if (offset) {
+ if (poke_type != BPF_MOD_JUMP) {
+ pr_err("%s (0x%lx): calls are not supported in bpf prog body\n", __func__,
+ bpf_func);
+ return -EOPNOTSUPP;
+ }
+ old_inst = ppc_inst(PPC_RAW_NOP());
+ if (old_addr)
+ if (create_branch(&old_inst, ip, (unsigned long)old_addr, 0))
+ return -ERANGE;
+ new_inst = ppc_inst(PPC_RAW_NOP());
+ if (new_addr)
+ if (create_branch(&new_inst, ip, (unsigned long)new_addr, 0))
+ return -ERANGE;
+ mutex_lock(&text_mutex);
+ ret = bpf_modify_inst(ip, old_inst, new_inst);
+ mutex_unlock(&text_mutex);
+
+ /* Make sure all cpus see the new instruction */
+ smp_call_function(do_isync, NULL, 1);
+ return ret;
+ }
+
+ bpf_func_end = bpf_func + size;
+
+ /* Address of the jmp/call instruction in the out-of-line stub */
+ ip = (void *)(bpf_func_end - bpf_jit_ool_stub + 4);
+
+ if (!is_offset_in_branch_range((long)ip - 4 - bpf_func)) {
+ pr_err("%s (0x%lx): bpf prog too large, ool stub out of branch range\n", __func__,
+ bpf_func);
+ return -ERANGE;
+ }
+
+ old_inst = ppc_inst(PPC_RAW_NOP());
+ if (old_addr) {
+ if (is_offset_in_branch_range(ip - old_addr))
+ create_branch(&old_inst, ip, (unsigned long)old_addr, branch_flags);
+ else
+ create_branch(&old_inst, ip, bpf_func_end - bpf_jit_long_branch_stub,
+ branch_flags);
+ }
+ new_inst = ppc_inst(PPC_RAW_NOP());
+ if (new_addr) {
+ if (is_offset_in_branch_range(ip - new_addr))
+ create_branch(&new_inst, ip, (unsigned long)new_addr, branch_flags);
+ else
+ create_branch(&new_inst, ip, bpf_func_end - bpf_jit_long_branch_stub,
+ branch_flags);
+ }
+
+ mutex_lock(&text_mutex);
+
+ /*
+ * 1. Update the address in the long branch stub:
+ * If new_addr is out of range, we will have to use the long branch stub, so patch new_addr
+ * here. Otherwise, revert to dummy_tramp, but only if we had patched old_addr here.
+ */
+ if ((new_addr && !is_offset_in_branch_range(new_addr - ip)) ||
+ (old_addr && !is_offset_in_branch_range(old_addr - ip)))
+ ret = patch_ulong((void *)(bpf_func_end - bpf_jit_long_branch_stub - SZL),
+ (new_addr && !is_offset_in_branch_range(new_addr - ip)) ?
+ (unsigned long)new_addr : (unsigned long)dummy_tramp);
+ if (ret)
+ goto out;
+
+ /* 2. Update the branch/call in the out-of-line stub */
+ ret = bpf_modify_inst(ip, old_inst, new_inst);
+ if (ret)
+ goto out;
+
+ /* 3. Update instruction at bpf prog entry */
+ ip = (void *)bpf_func;
+ if (!old_addr || !new_addr) {
+ if (!old_addr) {
+ old_inst = ppc_inst(PPC_RAW_NOP());
+ create_branch(&new_inst, ip, bpf_func_end - bpf_jit_ool_stub, 0);
+ } else {
+ new_inst = ppc_inst(PPC_RAW_NOP());
+ create_branch(&old_inst, ip, bpf_func_end - bpf_jit_ool_stub, 0);
+ }
+ ret = bpf_modify_inst(ip, old_inst, new_inst);
+ }
+
+out:
+ mutex_unlock(&text_mutex);
+
+ /*
+ * Sync only if we are not attaching a trampoline to a bpf prog so the older
+ * trampoline can be freed safely.
+ */
+ if (old_addr)
+ smp_call_function(do_isync, NULL, 1);
+
+ return ret;
}
diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c
new file mode 100644
index 000000000000..3087e744fb25
--- /dev/null
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -0,0 +1,1388 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * eBPF JIT compiler for PPC32
+ *
+ * Copyright 2020 Christophe Leroy <christophe.leroy@csgroup.eu>
+ * CS GROUP France
+ *
+ * Based on PPC64 eBPF JIT compiler by Naveen N. Rao
+ */
+#include <linux/moduleloader.h>
+#include <asm/cacheflush.h>
+#include <asm/asm-compat.h>
+#include <linux/netdevice.h>
+#include <linux/filter.h>
+#include <linux/if_vlan.h>
+#include <asm/kprobes.h>
+#include <linux/bpf.h>
+
+#include "bpf_jit.h"
+
+/*
+ * Stack layout:
+ *
+ * [ prev sp ] <-------------
+ * [ nv gpr save area ] 16 * 4 |
+ * fp (r31) --> [ ebpf stack space ] upto 512 |
+ * [ frame header ] 16 |
+ * sp (r1) ---> [ stack pointer ] --------------
+ */
+
+/* for gpr non volatile registers r17 to r31 (14) + tail call */
+#define BPF_PPC_STACK_SAVE (15 * 4 + 4)
+/* stack frame, ensure this is quadword aligned */
+#define BPF_PPC_STACKFRAME(ctx) (STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_SAVE + (ctx)->stack_size)
+
+#define PPC_EX32(r, i) EMIT(PPC_RAW_LI((r), (i) < 0 ? -1 : 0))
+
+/* PPC NVR range -- update this if we ever use NVRs below r17 */
+#define BPF_PPC_NVR_MIN _R17
+#define BPF_PPC_TC _R16
+
+/* BPF register usage */
+#define TMP_REG (MAX_BPF_JIT_REG + 0)
+
+/* BPF to ppc register mappings */
+void bpf_jit_init_reg_mapping(struct codegen_context *ctx)
+{
+ /* function return value */
+ ctx->b2p[BPF_REG_0] = _R12;
+ /* function arguments */
+ ctx->b2p[BPF_REG_1] = _R4;
+ ctx->b2p[BPF_REG_2] = _R6;
+ ctx->b2p[BPF_REG_3] = _R8;
+ ctx->b2p[BPF_REG_4] = _R10;
+ ctx->b2p[BPF_REG_5] = _R22;
+ /* non volatile registers */
+ ctx->b2p[BPF_REG_6] = _R24;
+ ctx->b2p[BPF_REG_7] = _R26;
+ ctx->b2p[BPF_REG_8] = _R28;
+ ctx->b2p[BPF_REG_9] = _R30;
+ /* frame pointer aka BPF_REG_10 */
+ ctx->b2p[BPF_REG_FP] = _R18;
+ /* eBPF jit internal registers */
+ ctx->b2p[BPF_REG_AX] = _R20;
+ ctx->b2p[TMP_REG] = _R31; /* 32 bits */
+}
+
+static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg)
+{
+ if ((reg >= BPF_PPC_NVR_MIN && reg < 32) || reg == BPF_PPC_TC)
+ return BPF_PPC_STACKFRAME(ctx) - 4 * (32 - reg);
+
+ WARN(true, "BPF JIT is asking about unknown registers, will crash the stack");
+ /* Use the hole we have left for alignment */
+ return BPF_PPC_STACKFRAME(ctx) - 4;
+}
+
+#define SEEN_VREG_MASK 0x1ff80000 /* Volatile registers r3-r12 */
+#define SEEN_NVREG_FULL_MASK 0x0003ffff /* Non volatile registers r14-r31 */
+#define SEEN_NVREG_TEMP_MASK 0x00001e01 /* BPF_REG_5, BPF_REG_AX, TMP_REG */
+
+static inline bool bpf_has_stack_frame(struct codegen_context *ctx)
+{
+ /*
+ * We only need a stack frame if:
+ * - we call other functions (kernel helpers), or
+ * - we use non volatile registers, or
+ * - we use tail call counter
+ * - the bpf program uses its stack area
+ * The latter condition is deduced from the usage of BPF_REG_FP
+ */
+ return ctx->seen & (SEEN_FUNC | SEEN_TAILCALL | SEEN_NVREG_FULL_MASK) ||
+ bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_FP));
+}
+
+void bpf_jit_realloc_regs(struct codegen_context *ctx)
+{
+ unsigned int nvreg_mask;
+
+ if (ctx->seen & SEEN_FUNC)
+ nvreg_mask = SEEN_NVREG_TEMP_MASK;
+ else
+ nvreg_mask = SEEN_NVREG_FULL_MASK;
+
+ while (ctx->seen & nvreg_mask &&
+ (ctx->seen & SEEN_VREG_MASK) != SEEN_VREG_MASK) {
+ int old = 32 - fls(ctx->seen & (nvreg_mask & 0xaaaaaaab));
+ int new = 32 - fls(~ctx->seen & (SEEN_VREG_MASK & 0xaaaaaaaa));
+ int i;
+
+ for (i = BPF_REG_0; i <= TMP_REG; i++) {
+ if (ctx->b2p[i] != old)
+ continue;
+ ctx->b2p[i] = new;
+ bpf_set_seen_register(ctx, new);
+ bpf_clear_seen_register(ctx, old);
+ if (i != TMP_REG) {
+ bpf_set_seen_register(ctx, new - 1);
+ bpf_clear_seen_register(ctx, old - 1);
+ }
+ break;
+ }
+ }
+}
+
+void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
+{
+ int i;
+
+ /* Instruction for trampoline attach */
+ EMIT(PPC_RAW_NOP());
+
+ /* Initialize tail_call_cnt, to be skipped if we do tail calls. */
+ if (ctx->seen & SEEN_TAILCALL)
+ EMIT(PPC_RAW_LI(_R4, 0));
+ else
+ EMIT(PPC_RAW_NOP());
+
+#define BPF_TAILCALL_PROLOGUE_SIZE 8
+
+ if (bpf_has_stack_frame(ctx))
+ EMIT(PPC_RAW_STWU(_R1, _R1, -BPF_PPC_STACKFRAME(ctx)));
+
+ if (ctx->seen & SEEN_TAILCALL)
+ EMIT(PPC_RAW_STW(_R4, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC)));
+
+ /* First arg comes in as a 32 bits pointer. */
+ EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_1), _R3));
+ EMIT(PPC_RAW_LI(bpf_to_ppc(BPF_REG_1) - 1, 0));
+
+ /*
+ * We need a stack frame, but we don't necessarily need to
+ * save/restore LR unless we call other functions
+ */
+ if (ctx->seen & SEEN_FUNC)
+ EMIT(PPC_RAW_MFLR(_R0));
+
+ /*
+ * Back up non-volatile regs -- registers r18-r31
+ */
+ for (i = BPF_PPC_NVR_MIN; i <= 31; i++)
+ if (bpf_is_seen_register(ctx, i))
+ EMIT(PPC_RAW_STW(i, _R1, bpf_jit_stack_offsetof(ctx, i)));
+
+ /* Setup frame pointer to point to the bpf stack area */
+ if (bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_FP))) {
+ EMIT(PPC_RAW_LI(bpf_to_ppc(BPF_REG_FP) - 1, 0));
+ EMIT(PPC_RAW_ADDI(bpf_to_ppc(BPF_REG_FP), _R1,
+ STACK_FRAME_MIN_SIZE + ctx->stack_size));
+ }
+
+ if (ctx->seen & SEEN_FUNC)
+ EMIT(PPC_RAW_STW(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF));
+}
+
+static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx)
+{
+ int i;
+
+ /* Restore NVRs */
+ for (i = BPF_PPC_NVR_MIN; i <= 31; i++)
+ if (bpf_is_seen_register(ctx, i))
+ EMIT(PPC_RAW_LWZ(i, _R1, bpf_jit_stack_offsetof(ctx, i)));
+
+ if (ctx->seen & SEEN_FUNC)
+ EMIT(PPC_RAW_LWZ(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF));
+
+ /* Tear down our stack frame */
+ if (bpf_has_stack_frame(ctx))
+ EMIT(PPC_RAW_ADDI(_R1, _R1, BPF_PPC_STACKFRAME(ctx)));
+
+ if (ctx->seen & SEEN_FUNC)
+ EMIT(PPC_RAW_MTLR(_R0));
+
+}
+
+void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
+{
+ EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_0)));
+
+ bpf_jit_emit_common_epilogue(image, ctx);
+
+ EMIT(PPC_RAW_BLR());
+
+ bpf_jit_build_fentry_stubs(image, ctx);
+}
+
+/* Relative offset needs to be calculated based on final image location */
+int bpf_jit_emit_func_call_rel(u32 *image, u32 *fimage, struct codegen_context *ctx, u64 func)
+{
+ s32 rel = (s32)func - (s32)(fimage + ctx->idx);
+
+ if (image && rel < 0x2000000 && rel >= -0x2000000) {
+ EMIT(PPC_RAW_BL(rel));
+ } else {
+ /* Load function address into r0 */
+ EMIT(PPC_RAW_LIS(_R0, IMM_H(func)));
+ EMIT(PPC_RAW_ORI(_R0, _R0, IMM_L(func)));
+ EMIT(PPC_RAW_MTCTR(_R0));
+ EMIT(PPC_RAW_BCTRL());
+ }
+
+ return 0;
+}
+
+static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out)
+{
+ /*
+ * By now, the eBPF program has already setup parameters in r3-r6
+ * r3-r4/BPF_REG_1 - pointer to ctx -- passed as is to the next bpf program
+ * r5-r6/BPF_REG_2 - pointer to bpf_array
+ * r7-r8/BPF_REG_3 - index in bpf_array
+ */
+ int b2p_bpf_array = bpf_to_ppc(BPF_REG_2);
+ int b2p_index = bpf_to_ppc(BPF_REG_3);
+
+ /*
+ * if (index >= array->map.max_entries)
+ * goto out;
+ */
+ EMIT(PPC_RAW_LWZ(_R0, b2p_bpf_array, offsetof(struct bpf_array, map.max_entries)));
+ EMIT(PPC_RAW_CMPLW(b2p_index, _R0));
+ EMIT(PPC_RAW_LWZ(_R0, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC)));
+ PPC_BCC_SHORT(COND_GE, out);
+
+ /*
+ * if (tail_call_cnt >= MAX_TAIL_CALL_CNT)
+ * goto out;
+ */
+ EMIT(PPC_RAW_CMPLWI(_R0, MAX_TAIL_CALL_CNT));
+ /* tail_call_cnt++; */
+ EMIT(PPC_RAW_ADDIC(_R0, _R0, 1));
+ PPC_BCC_SHORT(COND_GE, out);
+
+ /* prog = array->ptrs[index]; */
+ EMIT(PPC_RAW_RLWINM(_R3, b2p_index, 2, 0, 29));
+ EMIT(PPC_RAW_ADD(_R3, _R3, b2p_bpf_array));
+ EMIT(PPC_RAW_LWZ(_R3, _R3, offsetof(struct bpf_array, ptrs)));
+
+ /*
+ * if (prog == NULL)
+ * goto out;
+ */
+ EMIT(PPC_RAW_CMPLWI(_R3, 0));
+ PPC_BCC_SHORT(COND_EQ, out);
+
+ /* goto *(prog->bpf_func + prologue_size); */
+ EMIT(PPC_RAW_LWZ(_R3, _R3, offsetof(struct bpf_prog, bpf_func)));
+ EMIT(PPC_RAW_ADDIC(_R3, _R3, BPF_TAILCALL_PROLOGUE_SIZE));
+ EMIT(PPC_RAW_MTCTR(_R3));
+
+ EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_1)));
+
+ /* Put tail_call_cnt in r4 */
+ EMIT(PPC_RAW_MR(_R4, _R0));
+
+ /* tear restore NVRs, ... */
+ bpf_jit_emit_common_epilogue(image, ctx);
+
+ EMIT(PPC_RAW_BCTR());
+
+ /* out: */
+ return 0;
+}
+
+/* Assemble the body code between the prologue & epilogue */
+int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct codegen_context *ctx,
+ u32 *addrs, int pass, bool extra_pass)
+{
+ const struct bpf_insn *insn = fp->insnsi;
+ int flen = fp->len;
+ int i, ret;
+
+ /* Start of epilogue code - will only be valid 2nd pass onwards */
+ u32 exit_addr = addrs[flen];
+
+ for (i = 0; i < flen; i++) {
+ u32 code = insn[i].code;
+ u32 prevcode = i ? insn[i - 1].code : 0;
+ u32 dst_reg = bpf_to_ppc(insn[i].dst_reg);
+ u32 dst_reg_h = dst_reg - 1;
+ u32 src_reg = bpf_to_ppc(insn[i].src_reg);
+ u32 src_reg_h = src_reg - 1;
+ u32 src2_reg = dst_reg;
+ u32 src2_reg_h = dst_reg_h;
+ u32 ax_reg = bpf_to_ppc(BPF_REG_AX);
+ u32 tmp_reg = bpf_to_ppc(TMP_REG);
+ u32 size = BPF_SIZE(code);
+ u32 save_reg, ret_reg;
+ s16 off = insn[i].off;
+ s32 imm = insn[i].imm;
+ bool func_addr_fixed;
+ u64 func_addr;
+ u32 true_cond;
+ u32 tmp_idx;
+
+ if (i && (BPF_CLASS(code) == BPF_ALU64 || BPF_CLASS(code) == BPF_ALU) &&
+ (BPF_CLASS(prevcode) == BPF_ALU64 || BPF_CLASS(prevcode) == BPF_ALU) &&
+ BPF_OP(prevcode) == BPF_MOV && BPF_SRC(prevcode) == BPF_X &&
+ insn[i - 1].dst_reg == insn[i].dst_reg && insn[i - 1].imm != 1) {
+ src2_reg = bpf_to_ppc(insn[i - 1].src_reg);
+ src2_reg_h = src2_reg - 1;
+ ctx->idx = addrs[i - 1] / 4;
+ }
+
+ /*
+ * addrs[] maps a BPF bytecode address into a real offset from
+ * the start of the body code.
+ */
+ addrs[i] = ctx->idx * 4;
+
+ /*
+ * As an optimization, we note down which registers
+ * are used so that we can only save/restore those in our
+ * prologue and epilogue. We do this here regardless of whether
+ * the actual BPF instruction uses src/dst registers or not
+ * (for instance, BPF_CALL does not use them). The expectation
+ * is that those instructions will have src_reg/dst_reg set to
+ * 0. Even otherwise, we just lose some prologue/epilogue
+ * optimization but everything else should work without
+ * any issues.
+ */
+ if (dst_reg >= 3 && dst_reg < 32) {
+ bpf_set_seen_register(ctx, dst_reg);
+ bpf_set_seen_register(ctx, dst_reg_h);
+ }
+
+ if (src_reg >= 3 && src_reg < 32) {
+ bpf_set_seen_register(ctx, src_reg);
+ bpf_set_seen_register(ctx, src_reg_h);
+ }
+
+ switch (code) {
+ /*
+ * Arithmetic operations: ADD/SUB/MUL/DIV/MOD/NEG
+ */
+ case BPF_ALU | BPF_ADD | BPF_X: /* (u32) dst += (u32) src */
+ EMIT(PPC_RAW_ADD(dst_reg, src2_reg, src_reg));
+ break;
+ case BPF_ALU64 | BPF_ADD | BPF_X: /* dst += src */
+ EMIT(PPC_RAW_ADDC(dst_reg, src2_reg, src_reg));
+ EMIT(PPC_RAW_ADDE(dst_reg_h, src2_reg_h, src_reg_h));
+ break;
+ case BPF_ALU | BPF_SUB | BPF_X: /* (u32) dst -= (u32) src */
+ EMIT(PPC_RAW_SUB(dst_reg, src2_reg, src_reg));
+ break;
+ case BPF_ALU64 | BPF_SUB | BPF_X: /* dst -= src */
+ EMIT(PPC_RAW_SUBFC(dst_reg, src_reg, src2_reg));
+ EMIT(PPC_RAW_SUBFE(dst_reg_h, src_reg_h, src2_reg_h));
+ break;
+ case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */
+ imm = -imm;
+ fallthrough;
+ case BPF_ALU | BPF_ADD | BPF_K: /* (u32) dst += (u32) imm */
+ if (!imm) {
+ EMIT(PPC_RAW_MR(dst_reg, src2_reg));
+ } else if (IMM_HA(imm) & 0xffff) {
+ EMIT(PPC_RAW_ADDIS(dst_reg, src2_reg, IMM_HA(imm)));
+ src2_reg = dst_reg;
+ }
+ if (IMM_L(imm))
+ EMIT(PPC_RAW_ADDI(dst_reg, src2_reg, IMM_L(imm)));
+ break;
+ case BPF_ALU64 | BPF_SUB | BPF_K: /* dst -= imm */
+ imm = -imm;
+ fallthrough;
+ case BPF_ALU64 | BPF_ADD | BPF_K: /* dst += imm */
+ if (!imm) {
+ EMIT(PPC_RAW_MR(dst_reg, src2_reg));
+ EMIT(PPC_RAW_MR(dst_reg_h, src2_reg_h));
+ break;
+ }
+ if (imm >= -32768 && imm < 32768) {
+ EMIT(PPC_RAW_ADDIC(dst_reg, src2_reg, imm));
+ } else {
+ PPC_LI32(_R0, imm);
+ EMIT(PPC_RAW_ADDC(dst_reg, src2_reg, _R0));
+ }
+ if (imm >= 0 || (BPF_OP(code) == BPF_SUB && imm == 0x80000000))
+ EMIT(PPC_RAW_ADDZE(dst_reg_h, src2_reg_h));
+ else
+ EMIT(PPC_RAW_ADDME(dst_reg_h, src2_reg_h));
+ break;
+ case BPF_ALU64 | BPF_MUL | BPF_X: /* dst *= src */
+ bpf_set_seen_register(ctx, tmp_reg);
+ EMIT(PPC_RAW_MULW(_R0, src2_reg, src_reg_h));
+ EMIT(PPC_RAW_MULW(dst_reg_h, src2_reg_h, src_reg));
+ EMIT(PPC_RAW_MULHWU(tmp_reg, src2_reg, src_reg));
+ EMIT(PPC_RAW_MULW(dst_reg, src2_reg, src_reg));
+ EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, _R0));
+ EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, tmp_reg));
+ break;
+ case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */
+ EMIT(PPC_RAW_MULW(dst_reg, src2_reg, src_reg));
+ break;
+ case BPF_ALU | BPF_MUL | BPF_K: /* (u32) dst *= (u32) imm */
+ if (imm == 1) {
+ EMIT(PPC_RAW_MR(dst_reg, src2_reg));
+ } else if (imm == -1) {
+ EMIT(PPC_RAW_SUBFIC(dst_reg, src2_reg, 0));
+ } else if (is_power_of_2((u32)imm)) {
+ EMIT(PPC_RAW_SLWI(dst_reg, src2_reg, ilog2(imm)));
+ } else if (imm >= -32768 && imm < 32768) {
+ EMIT(PPC_RAW_MULI(dst_reg, src2_reg, imm));
+ } else {
+ PPC_LI32(_R0, imm);
+ EMIT(PPC_RAW_MULW(dst_reg, src2_reg, _R0));
+ }
+ break;
+ case BPF_ALU64 | BPF_MUL | BPF_K: /* dst *= imm */
+ if (!imm) {
+ PPC_LI32(dst_reg, 0);
+ PPC_LI32(dst_reg_h, 0);
+ } else if (imm == 1) {
+ EMIT(PPC_RAW_MR(dst_reg, src2_reg));
+ EMIT(PPC_RAW_MR(dst_reg_h, src2_reg_h));
+ } else if (imm == -1) {
+ EMIT(PPC_RAW_SUBFIC(dst_reg, src2_reg, 0));
+ EMIT(PPC_RAW_SUBFZE(dst_reg_h, src2_reg_h));
+ } else if (imm > 0 && is_power_of_2(imm)) {
+ imm = ilog2(imm);
+ EMIT(PPC_RAW_RLWINM(dst_reg_h, src2_reg_h, imm, 0, 31 - imm));
+ EMIT(PPC_RAW_RLWIMI(dst_reg_h, dst_reg, imm, 32 - imm, 31));
+ EMIT(PPC_RAW_SLWI(dst_reg, src2_reg, imm));
+ } else {
+ bpf_set_seen_register(ctx, tmp_reg);
+ PPC_LI32(tmp_reg, imm);
+ EMIT(PPC_RAW_MULW(dst_reg_h, src2_reg_h, tmp_reg));
+ if (imm < 0)
+ EMIT(PPC_RAW_SUB(dst_reg_h, dst_reg_h, src2_reg));
+ EMIT(PPC_RAW_MULHWU(_R0, src2_reg, tmp_reg));
+ EMIT(PPC_RAW_MULW(dst_reg, src2_reg, tmp_reg));
+ EMIT(PPC_RAW_ADD(dst_reg_h, dst_reg_h, _R0));
+ }
+ break;
+ case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */
+ if (off)
+ EMIT(PPC_RAW_DIVW(dst_reg, src2_reg, src_reg));
+ else
+ EMIT(PPC_RAW_DIVWU(dst_reg, src2_reg, src_reg));
+ break;
+ case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */
+ if (off)
+ EMIT(PPC_RAW_DIVW(_R0, src2_reg, src_reg));
+ else
+ EMIT(PPC_RAW_DIVWU(_R0, src2_reg, src_reg));
+ EMIT(PPC_RAW_MULW(_R0, src_reg, _R0));
+ EMIT(PPC_RAW_SUB(dst_reg, src2_reg, _R0));
+ break;
+ case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */
+ return -EOPNOTSUPP;
+ case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */
+ return -EOPNOTSUPP;
+ case BPF_ALU | BPF_DIV | BPF_K: /* (u32) dst /= (u32) imm */
+ if (!imm)
+ return -EINVAL;
+ if (imm == 1) {
+ EMIT(PPC_RAW_MR(dst_reg, src2_reg));
+ } else if (is_power_of_2((u32)imm)) {
+ if (off)
+ EMIT(PPC_RAW_SRAWI(dst_reg, src2_reg, ilog2(imm)));
+ else
+ EMIT(PPC_RAW_SRWI(dst_reg, src2_reg, ilog2(imm)));
+ } else {
+ PPC_LI32(_R0, imm);
+ if (off)
+ EMIT(PPC_RAW_DIVW(dst_reg, src2_reg, _R0));
+ else
+ EMIT(PPC_RAW_DIVWU(dst_reg, src2_reg, _R0));
+ }
+ break;
+ case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */
+ if (!imm)
+ return -EINVAL;
+
+ if (!is_power_of_2((u32)imm)) {
+ bpf_set_seen_register(ctx, tmp_reg);
+ PPC_LI32(tmp_reg, imm);
+ if (off)
+ EMIT(PPC_RAW_DIVW(_R0, src2_reg, tmp_reg));
+ else
+ EMIT(PPC_RAW_DIVWU(_R0, src2_reg, tmp_reg));
+ EMIT(PPC_RAW_MULW(_R0, tmp_reg, _R0));
+ EMIT(PPC_RAW_SUB(dst_reg, src2_reg, _R0));
+ } else if (imm == 1) {
+ EMIT(PPC_RAW_LI(dst_reg, 0));
+ } else if (off) {
+ EMIT(PPC_RAW_SRAWI(_R0, src2_reg, ilog2(imm)));
+ EMIT(PPC_RAW_ADDZE(_R0, _R0));
+ EMIT(PPC_RAW_SLWI(_R0, _R0, ilog2(imm)));
+ EMIT(PPC_RAW_SUB(dst_reg, src2_reg, _R0));
+ } else {
+ imm = ilog2((u32)imm);
+ EMIT(PPC_RAW_RLWINM(dst_reg, src2_reg, 0, 32 - imm, 31));
+ }
+ break;
+ case BPF_ALU64 | BPF_MOD | BPF_K: /* dst %= imm */
+ if (!imm)
+ return -EINVAL;
+ if (imm < 0)
+ imm = -imm;
+ if (!is_power_of_2(imm))
+ return -EOPNOTSUPP;
+ if (imm == 1) {
+ EMIT(PPC_RAW_LI(dst_reg, 0));
+ EMIT(PPC_RAW_LI(dst_reg_h, 0));
+ } else if (off) {
+ EMIT(PPC_RAW_SRAWI(dst_reg_h, src2_reg_h, 31));
+ EMIT(PPC_RAW_XOR(dst_reg, src2_reg, dst_reg_h));
+ EMIT(PPC_RAW_SUBFC(dst_reg, dst_reg_h, dst_reg));
+ EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 32 - ilog2(imm), 31));
+ EMIT(PPC_RAW_XOR(dst_reg, dst_reg, dst_reg_h));
+ EMIT(PPC_RAW_SUBFC(dst_reg, dst_reg_h, dst_reg));
+ EMIT(PPC_RAW_SUBFE(dst_reg_h, dst_reg_h, dst_reg_h));
+ } else {
+ EMIT(PPC_RAW_RLWINM(dst_reg, src2_reg, 0, 32 - ilog2(imm), 31));
+ EMIT(PPC_RAW_LI(dst_reg_h, 0));
+ }
+ break;
+ case BPF_ALU64 | BPF_DIV | BPF_K: /* dst /= imm */
+ if (!imm)
+ return -EINVAL;
+ if (!is_power_of_2(abs(imm)))
+ return -EOPNOTSUPP;
+
+ if (imm < 0) {
+ EMIT(PPC_RAW_SUBFIC(dst_reg, src2_reg, 0));
+ EMIT(PPC_RAW_SUBFZE(dst_reg_h, src2_reg_h));
+ imm = -imm;
+ src2_reg = dst_reg;
+ }
+ if (imm == 1) {
+ EMIT(PPC_RAW_MR(dst_reg, src2_reg));
+ EMIT(PPC_RAW_MR(dst_reg_h, src2_reg_h));
+ } else {
+ imm = ilog2(imm);
+ EMIT(PPC_RAW_RLWINM(dst_reg, src2_reg, 32 - imm, imm, 31));
+ EMIT(PPC_RAW_RLWIMI(dst_reg, src2_reg_h, 32 - imm, 0, imm - 1));
+ EMIT(PPC_RAW_SRAWI(dst_reg_h, src2_reg_h, imm));
+ }
+ break;
+ case BPF_ALU | BPF_NEG: /* (u32) dst = -dst */
+ EMIT(PPC_RAW_NEG(dst_reg, src2_reg));
+ break;
+ case BPF_ALU64 | BPF_NEG: /* dst = -dst */
+ EMIT(PPC_RAW_SUBFIC(dst_reg, src2_reg, 0));
+ EMIT(PPC_RAW_SUBFZE(dst_reg_h, src2_reg_h));
+ break;
+
+ /*
+ * Logical operations: AND/OR/XOR/[A]LSH/[A]RSH
+ */
+ case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */
+ EMIT(PPC_RAW_AND(dst_reg, src2_reg, src_reg));
+ EMIT(PPC_RAW_AND(dst_reg_h, src2_reg_h, src_reg_h));
+ break;
+ case BPF_ALU | BPF_AND | BPF_X: /* (u32) dst = dst & src */
+ EMIT(PPC_RAW_AND(dst_reg, src2_reg, src_reg));
+ break;
+ case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */
+ if (imm >= 0)
+ EMIT(PPC_RAW_LI(dst_reg_h, 0));
+ fallthrough;
+ case BPF_ALU | BPF_AND | BPF_K: /* (u32) dst = dst & imm */
+ if (!IMM_H(imm)) {
+ EMIT(PPC_RAW_ANDI(dst_reg, src2_reg, IMM_L(imm)));
+ } else if (!IMM_L(imm)) {
+ EMIT(PPC_RAW_ANDIS(dst_reg, src2_reg, IMM_H(imm)));
+ } else if (imm == (((1 << fls(imm)) - 1) ^ ((1 << (ffs(i) - 1)) - 1))) {
+ EMIT(PPC_RAW_RLWINM(dst_reg, src2_reg, 0,
+ 32 - fls(imm), 32 - ffs(imm)));
+ } else {
+ PPC_LI32(_R0, imm);
+ EMIT(PPC_RAW_AND(dst_reg, src2_reg, _R0));
+ }
+ break;
+ case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */
+ EMIT(PPC_RAW_OR(dst_reg, src2_reg, src_reg));
+ EMIT(PPC_RAW_OR(dst_reg_h, src2_reg_h, src_reg_h));
+ break;
+ case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */
+ EMIT(PPC_RAW_OR(dst_reg, src2_reg, src_reg));
+ break;
+ case BPF_ALU64 | BPF_OR | BPF_K:/* dst = dst | imm */
+ /* Sign-extended */
+ if (imm < 0)
+ EMIT(PPC_RAW_LI(dst_reg_h, -1));
+ fallthrough;
+ case BPF_ALU | BPF_OR | BPF_K:/* dst = (u32) dst | (u32) imm */
+ if (IMM_L(imm)) {
+ EMIT(PPC_RAW_ORI(dst_reg, src2_reg, IMM_L(imm)));
+ src2_reg = dst_reg;
+ }
+ if (IMM_H(imm))
+ EMIT(PPC_RAW_ORIS(dst_reg, src2_reg, IMM_H(imm)));
+ break;
+ case BPF_ALU64 | BPF_XOR | BPF_X: /* dst ^= src */
+ if (dst_reg == src_reg) {
+ EMIT(PPC_RAW_LI(dst_reg, 0));
+ EMIT(PPC_RAW_LI(dst_reg_h, 0));
+ } else {
+ EMIT(PPC_RAW_XOR(dst_reg, src2_reg, src_reg));
+ EMIT(PPC_RAW_XOR(dst_reg_h, src2_reg_h, src_reg_h));
+ }
+ break;
+ case BPF_ALU | BPF_XOR | BPF_X: /* (u32) dst ^= src */
+ if (dst_reg == src_reg)
+ EMIT(PPC_RAW_LI(dst_reg, 0));
+ else
+ EMIT(PPC_RAW_XOR(dst_reg, src2_reg, src_reg));
+ break;
+ case BPF_ALU64 | BPF_XOR | BPF_K: /* dst ^= imm */
+ if (imm < 0)
+ EMIT(PPC_RAW_NOR(dst_reg_h, src2_reg_h, src2_reg_h));
+ fallthrough;
+ case BPF_ALU | BPF_XOR | BPF_K: /* (u32) dst ^= (u32) imm */
+ if (IMM_L(imm)) {
+ EMIT(PPC_RAW_XORI(dst_reg, src2_reg, IMM_L(imm)));
+ src2_reg = dst_reg;
+ }
+ if (IMM_H(imm))
+ EMIT(PPC_RAW_XORIS(dst_reg, src2_reg, IMM_H(imm)));
+ break;
+ case BPF_ALU | BPF_LSH | BPF_X: /* (u32) dst <<= (u32) src */
+ EMIT(PPC_RAW_SLW(dst_reg, src2_reg, src_reg));
+ break;
+ case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */
+ bpf_set_seen_register(ctx, tmp_reg);
+ EMIT(PPC_RAW_SUBFIC(_R0, src_reg, 32));
+ EMIT(PPC_RAW_SLW(dst_reg_h, src2_reg_h, src_reg));
+ EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32));
+ EMIT(PPC_RAW_SRW(_R0, src2_reg, _R0));
+ EMIT(PPC_RAW_SLW(tmp_reg, src2_reg, tmp_reg));
+ EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, _R0));
+ EMIT(PPC_RAW_SLW(dst_reg, src2_reg, src_reg));
+ EMIT(PPC_RAW_OR(dst_reg_h, dst_reg_h, tmp_reg));
+ break;
+ case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<= (u32) imm */
+ if (imm)
+ EMIT(PPC_RAW_SLWI(dst_reg, src2_reg, imm));
+ else
+ EMIT(PPC_RAW_MR(dst_reg, src2_reg));
+ break;
+ case BPF_ALU64 | BPF_LSH | BPF_K: /* dst <<= imm */
+ if (imm < 0)
+ return -EINVAL;
+ if (!imm) {
+ EMIT(PPC_RAW_MR(dst_reg, src2_reg));
+ } else if (imm < 32) {
+ EMIT(PPC_RAW_RLWINM(dst_reg_h, src2_reg_h, imm, 0, 31 - imm));
+ EMIT(PPC_RAW_RLWIMI(dst_reg_h, src2_reg, imm, 32 - imm, 31));
+ EMIT(PPC_RAW_RLWINM(dst_reg, src2_reg, imm, 0, 31 - imm));
+ } else if (imm < 64) {
+ EMIT(PPC_RAW_RLWINM(dst_reg_h, src2_reg, imm, 0, 31 - imm));
+ EMIT(PPC_RAW_LI(dst_reg, 0));
+ } else {
+ EMIT(PPC_RAW_LI(dst_reg_h, 0));
+ EMIT(PPC_RAW_LI(dst_reg, 0));
+ }
+ break;
+ case BPF_ALU | BPF_RSH | BPF_X: /* (u32) dst >>= (u32) src */
+ EMIT(PPC_RAW_SRW(dst_reg, src2_reg, src_reg));
+ break;
+ case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */
+ bpf_set_seen_register(ctx, tmp_reg);
+ EMIT(PPC_RAW_SUBFIC(_R0, src_reg, 32));
+ EMIT(PPC_RAW_SRW(dst_reg, src2_reg, src_reg));
+ EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32));
+ EMIT(PPC_RAW_SLW(_R0, src2_reg_h, _R0));
+ EMIT(PPC_RAW_SRW(tmp_reg, dst_reg_h, tmp_reg));
+ EMIT(PPC_RAW_OR(dst_reg, dst_reg, _R0));
+ EMIT(PPC_RAW_SRW(dst_reg_h, src2_reg_h, src_reg));
+ EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp_reg));
+ break;
+ case BPF_ALU | BPF_RSH | BPF_K: /* (u32) dst >>= (u32) imm */
+ if (imm)
+ EMIT(PPC_RAW_SRWI(dst_reg, src2_reg, imm));
+ else
+ EMIT(PPC_RAW_MR(dst_reg, src2_reg));
+ break;
+ case BPF_ALU64 | BPF_RSH | BPF_K: /* dst >>= imm */
+ if (imm < 0)
+ return -EINVAL;
+ if (!imm) {
+ EMIT(PPC_RAW_MR(dst_reg, src2_reg));
+ EMIT(PPC_RAW_MR(dst_reg_h, src2_reg_h));
+ } else if (imm < 32) {
+ EMIT(PPC_RAW_RLWINM(dst_reg, src2_reg, 32 - imm, imm, 31));
+ EMIT(PPC_RAW_RLWIMI(dst_reg, src2_reg_h, 32 - imm, 0, imm - 1));
+ EMIT(PPC_RAW_RLWINM(dst_reg_h, src2_reg_h, 32 - imm, imm, 31));
+ } else if (imm < 64) {
+ EMIT(PPC_RAW_RLWINM(dst_reg, src2_reg_h, 64 - imm, imm - 32, 31));
+ EMIT(PPC_RAW_LI(dst_reg_h, 0));
+ } else {
+ EMIT(PPC_RAW_LI(dst_reg, 0));
+ EMIT(PPC_RAW_LI(dst_reg_h, 0));
+ }
+ break;
+ case BPF_ALU | BPF_ARSH | BPF_X: /* (s32) dst >>= src */
+ EMIT(PPC_RAW_SRAW(dst_reg, src2_reg, src_reg));
+ break;
+ case BPF_ALU64 | BPF_ARSH | BPF_X: /* (s64) dst >>= src */
+ bpf_set_seen_register(ctx, tmp_reg);
+ EMIT(PPC_RAW_SUBFIC(_R0, src_reg, 32));
+ EMIT(PPC_RAW_SRW(dst_reg, src2_reg, src_reg));
+ EMIT(PPC_RAW_SLW(_R0, src2_reg_h, _R0));
+ EMIT(PPC_RAW_ADDI(tmp_reg, src_reg, 32));
+ EMIT(PPC_RAW_OR(dst_reg, dst_reg, _R0));
+ EMIT(PPC_RAW_RLWINM(_R0, tmp_reg, 0, 26, 26));
+ EMIT(PPC_RAW_SRAW(tmp_reg, src2_reg_h, tmp_reg));
+ EMIT(PPC_RAW_SRAW(dst_reg_h, src2_reg_h, src_reg));
+ EMIT(PPC_RAW_SLW(tmp_reg, tmp_reg, _R0));
+ EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp_reg));
+ break;
+ case BPF_ALU | BPF_ARSH | BPF_K: /* (s32) dst >>= imm */
+ if (imm)
+ EMIT(PPC_RAW_SRAWI(dst_reg, src2_reg, imm));
+ else
+ EMIT(PPC_RAW_MR(dst_reg, src2_reg));
+ break;
+ case BPF_ALU64 | BPF_ARSH | BPF_K: /* (s64) dst >>= imm */
+ if (imm < 0)
+ return -EINVAL;
+ if (!imm) {
+ EMIT(PPC_RAW_MR(dst_reg, src2_reg));
+ EMIT(PPC_RAW_MR(dst_reg_h, src2_reg_h));
+ } else if (imm < 32) {
+ EMIT(PPC_RAW_RLWINM(dst_reg, src2_reg, 32 - imm, imm, 31));
+ EMIT(PPC_RAW_RLWIMI(dst_reg, src2_reg_h, 32 - imm, 0, imm - 1));
+ EMIT(PPC_RAW_SRAWI(dst_reg_h, src2_reg_h, imm));
+ } else if (imm < 64) {
+ EMIT(PPC_RAW_SRAWI(dst_reg, src2_reg_h, imm - 32));
+ EMIT(PPC_RAW_SRAWI(dst_reg_h, src2_reg_h, 31));
+ } else {
+ EMIT(PPC_RAW_SRAWI(dst_reg, src2_reg_h, 31));
+ EMIT(PPC_RAW_SRAWI(dst_reg_h, src2_reg_h, 31));
+ }
+ break;
+
+ /*
+ * MOV
+ */
+ case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */
+ if (off == 8) {
+ EMIT(PPC_RAW_EXTSB(dst_reg, src_reg));
+ EMIT(PPC_RAW_SRAWI(dst_reg_h, dst_reg, 31));
+ } else if (off == 16) {
+ EMIT(PPC_RAW_EXTSH(dst_reg, src_reg));
+ EMIT(PPC_RAW_SRAWI(dst_reg_h, dst_reg, 31));
+ } else if (off == 32 && dst_reg == src_reg) {
+ EMIT(PPC_RAW_SRAWI(dst_reg_h, src_reg, 31));
+ } else if (off == 32) {
+ EMIT(PPC_RAW_MR(dst_reg, src_reg));
+ EMIT(PPC_RAW_SRAWI(dst_reg_h, src_reg, 31));
+ } else if (dst_reg != src_reg) {
+ EMIT(PPC_RAW_MR(dst_reg, src_reg));
+ EMIT(PPC_RAW_MR(dst_reg_h, src_reg_h));
+ }
+ break;
+ case BPF_ALU | BPF_MOV | BPF_X: /* (u32) dst = src */
+ /* special mov32 for zext */
+ if (imm == 1)
+ EMIT(PPC_RAW_LI(dst_reg_h, 0));
+ else if (off == 8)
+ EMIT(PPC_RAW_EXTSB(dst_reg, src_reg));
+ else if (off == 16)
+ EMIT(PPC_RAW_EXTSH(dst_reg, src_reg));
+ else if (dst_reg != src_reg)
+ EMIT(PPC_RAW_MR(dst_reg, src_reg));
+ break;
+ case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = (s64) imm */
+ PPC_LI32(dst_reg, imm);
+ PPC_EX32(dst_reg_h, imm);
+ break;
+ case BPF_ALU | BPF_MOV | BPF_K: /* (u32) dst = imm */
+ PPC_LI32(dst_reg, imm);
+ break;
+
+ /*
+ * BPF_FROM_BE/LE
+ */
+ case BPF_ALU | BPF_END | BPF_FROM_LE:
+ case BPF_ALU64 | BPF_END | BPF_FROM_LE:
+ switch (imm) {
+ case 16:
+ /* Copy 16 bits to upper part */
+ EMIT(PPC_RAW_RLWIMI(dst_reg, src2_reg, 16, 0, 15));
+ /* Rotate 8 bits right & mask */
+ EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 24, 16, 31));
+ break;
+ case 32:
+ /*
+ * Rotate word left by 8 bits:
+ * 2 bytes are already in their final position
+ * -- byte 2 and 4 (of bytes 1, 2, 3 and 4)
+ */
+ EMIT(PPC_RAW_RLWINM(_R0, src2_reg, 8, 0, 31));
+ /* Rotate 24 bits and insert byte 1 */
+ EMIT(PPC_RAW_RLWIMI(_R0, src2_reg, 24, 0, 7));
+ /* Rotate 24 bits and insert byte 3 */
+ EMIT(PPC_RAW_RLWIMI(_R0, src2_reg, 24, 16, 23));
+ EMIT(PPC_RAW_MR(dst_reg, _R0));
+ break;
+ case 64:
+ bpf_set_seen_register(ctx, tmp_reg);
+ EMIT(PPC_RAW_RLWINM(tmp_reg, src2_reg, 8, 0, 31));
+ EMIT(PPC_RAW_RLWINM(_R0, src2_reg_h, 8, 0, 31));
+ /* Rotate 24 bits and insert byte 1 */
+ EMIT(PPC_RAW_RLWIMI(tmp_reg, src2_reg, 24, 0, 7));
+ EMIT(PPC_RAW_RLWIMI(_R0, src2_reg_h, 24, 0, 7));
+ /* Rotate 24 bits and insert byte 3 */
+ EMIT(PPC_RAW_RLWIMI(tmp_reg, src2_reg, 24, 16, 23));
+ EMIT(PPC_RAW_RLWIMI(_R0, src2_reg_h, 24, 16, 23));
+ EMIT(PPC_RAW_MR(dst_reg, _R0));
+ EMIT(PPC_RAW_MR(dst_reg_h, tmp_reg));
+ break;
+ }
+ if (BPF_CLASS(code) == BPF_ALU64 && imm != 64)
+ EMIT(PPC_RAW_LI(dst_reg_h, 0));
+ break;
+ case BPF_ALU | BPF_END | BPF_FROM_BE:
+ switch (imm) {
+ case 16:
+ /* zero-extend 16 bits into 32 bits */
+ EMIT(PPC_RAW_RLWINM(dst_reg, src2_reg, 0, 16, 31));
+ break;
+ case 32:
+ case 64:
+ /* nop */
+ break;
+ }
+ break;
+
+ /*
+ * BPF_ST NOSPEC (speculation barrier)
+ */
+ case BPF_ST | BPF_NOSPEC:
+ break;
+
+ /*
+ * BPF_ST(X)
+ */
+ case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */
+ EMIT(PPC_RAW_STB(src_reg, dst_reg, off));
+ break;
+ case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */
+ PPC_LI32(_R0, imm);
+ EMIT(PPC_RAW_STB(_R0, dst_reg, off));
+ break;
+ case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */
+ EMIT(PPC_RAW_STH(src_reg, dst_reg, off));
+ break;
+ case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */
+ PPC_LI32(_R0, imm);
+ EMIT(PPC_RAW_STH(_R0, dst_reg, off));
+ break;
+ case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */
+ EMIT(PPC_RAW_STW(src_reg, dst_reg, off));
+ break;
+ case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */
+ PPC_LI32(_R0, imm);
+ EMIT(PPC_RAW_STW(_R0, dst_reg, off));
+ break;
+ case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */
+ EMIT(PPC_RAW_STW(src_reg_h, dst_reg, off));
+ EMIT(PPC_RAW_STW(src_reg, dst_reg, off + 4));
+ break;
+ case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */
+ PPC_LI32(_R0, imm);
+ EMIT(PPC_RAW_STW(_R0, dst_reg, off + 4));
+ PPC_EX32(_R0, imm);
+ EMIT(PPC_RAW_STW(_R0, dst_reg, off));
+ break;
+
+ /*
+ * BPF_STX ATOMIC (atomic ops)
+ */
+ case BPF_STX | BPF_ATOMIC | BPF_W:
+ save_reg = _R0;
+ ret_reg = src_reg;
+
+ bpf_set_seen_register(ctx, tmp_reg);
+ bpf_set_seen_register(ctx, ax_reg);
+
+ /* Get offset into TMP_REG */
+ EMIT(PPC_RAW_LI(tmp_reg, off));
+ /*
+ * Enforce full ordering for operations with BPF_FETCH by emitting a 'sync'
+ * before and after the operation.
+ *
+ * This is a requirement in the Linux Kernel Memory Model.
+ * See __cmpxchg_u32() in asm/cmpxchg.h as an example.
+ */
+ if ((imm & BPF_FETCH) && IS_ENABLED(CONFIG_SMP))
+ EMIT(PPC_RAW_SYNC());
+ tmp_idx = ctx->idx * 4;
+ /* load value from memory into r0 */
+ EMIT(PPC_RAW_LWARX(_R0, tmp_reg, dst_reg, 0));
+
+ /* Save old value in BPF_REG_AX */
+ if (imm & BPF_FETCH)
+ EMIT(PPC_RAW_MR(ax_reg, _R0));
+
+ switch (imm) {
+ case BPF_ADD:
+ case BPF_ADD | BPF_FETCH:
+ EMIT(PPC_RAW_ADD(_R0, _R0, src_reg));
+ break;
+ case BPF_AND:
+ case BPF_AND | BPF_FETCH:
+ EMIT(PPC_RAW_AND(_R0, _R0, src_reg));
+ break;
+ case BPF_OR:
+ case BPF_OR | BPF_FETCH:
+ EMIT(PPC_RAW_OR(_R0, _R0, src_reg));
+ break;
+ case BPF_XOR:
+ case BPF_XOR | BPF_FETCH:
+ EMIT(PPC_RAW_XOR(_R0, _R0, src_reg));
+ break;
+ case BPF_CMPXCHG:
+ /*
+ * Return old value in BPF_REG_0 for BPF_CMPXCHG &
+ * in src_reg for other cases.
+ */
+ ret_reg = bpf_to_ppc(BPF_REG_0);
+
+ /* Compare with old value in BPF_REG_0 */
+ EMIT(PPC_RAW_CMPW(bpf_to_ppc(BPF_REG_0), _R0));
+ /* Don't set if different from old value */
+ PPC_BCC_SHORT(COND_NE, (ctx->idx + 3) * 4);
+ fallthrough;
+ case BPF_XCHG:
+ save_reg = src_reg;
+ break;
+ default:
+ pr_err_ratelimited("eBPF filter atomic op code %02x (@%d) unsupported\n",
+ code, i);
+ return -EOPNOTSUPP;
+ }
+
+ /* store new value */
+ EMIT(PPC_RAW_STWCX(save_reg, tmp_reg, dst_reg));
+ /* we're done if this succeeded */
+ PPC_BCC_SHORT(COND_NE, tmp_idx);
+
+ /* For the BPF_FETCH variant, get old data into src_reg */
+ if (imm & BPF_FETCH) {
+ /* Emit 'sync' to enforce full ordering */
+ if (IS_ENABLED(CONFIG_SMP))
+ EMIT(PPC_RAW_SYNC());
+ EMIT(PPC_RAW_MR(ret_reg, ax_reg));
+ if (!fp->aux->verifier_zext)
+ EMIT(PPC_RAW_LI(ret_reg - 1, 0)); /* higher 32-bit */
+ }
+ break;
+
+ case BPF_STX | BPF_ATOMIC | BPF_DW: /* *(u64 *)(dst + off) += src */
+ return -EOPNOTSUPP;
+
+ /*
+ * BPF_LDX
+ */
+ case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */
+ case BPF_LDX | BPF_MEMSX | BPF_B:
+ case BPF_LDX | BPF_PROBE_MEM | BPF_B:
+ case BPF_LDX | BPF_PROBE_MEMSX | BPF_B:
+ case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */
+ case BPF_LDX | BPF_MEMSX | BPF_H:
+ case BPF_LDX | BPF_PROBE_MEM | BPF_H:
+ case BPF_LDX | BPF_PROBE_MEMSX | BPF_H:
+ case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */
+ case BPF_LDX | BPF_MEMSX | BPF_W:
+ case BPF_LDX | BPF_PROBE_MEM | BPF_W:
+ case BPF_LDX | BPF_PROBE_MEMSX | BPF_W:
+ case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */
+ case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
+ /*
+ * As PTR_TO_BTF_ID that uses BPF_PROBE_MEM mode could either be a valid
+ * kernel pointer or NULL but not a userspace address, execute BPF_PROBE_MEM
+ * load only if addr is kernel address (see is_kernel_addr()), otherwise
+ * set dst_reg=0 and move on.
+ */
+ if (BPF_MODE(code) == BPF_PROBE_MEM || BPF_MODE(code) == BPF_PROBE_MEMSX) {
+ PPC_LI32(_R0, TASK_SIZE - off);
+ EMIT(PPC_RAW_CMPLW(src_reg, _R0));
+ PPC_BCC_SHORT(COND_GT, (ctx->idx + 4) * 4);
+ EMIT(PPC_RAW_LI(dst_reg, 0));
+ /*
+ * For BPF_DW case, "li reg_h,0" would be needed when
+ * !fp->aux->verifier_zext. Emit NOP otherwise.
+ *
+ * Note that "li reg_h,0" is emitted for BPF_B/H/W case,
+ * if necessary. So, jump there instead of emitting an
+ * additional "li reg_h,0" instruction.
+ */
+ if (size == BPF_DW && !fp->aux->verifier_zext)
+ EMIT(PPC_RAW_LI(dst_reg_h, 0));
+ else
+ EMIT(PPC_RAW_NOP());
+ /*
+ * Need to jump two instructions instead of one for BPF_DW case
+ * as there are two load instructions for dst_reg_h & dst_reg
+ * respectively.
+ */
+ if (size == BPF_DW ||
+ (size == BPF_B && BPF_MODE(code) == BPF_PROBE_MEMSX))
+ PPC_JMP((ctx->idx + 3) * 4);
+ else
+ PPC_JMP((ctx->idx + 2) * 4);
+ }
+
+ if (BPF_MODE(code) == BPF_MEMSX || BPF_MODE(code) == BPF_PROBE_MEMSX) {
+ switch (size) {
+ case BPF_B:
+ EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
+ EMIT(PPC_RAW_EXTSB(dst_reg, dst_reg));
+ break;
+ case BPF_H:
+ EMIT(PPC_RAW_LHA(dst_reg, src_reg, off));
+ break;
+ case BPF_W:
+ EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off));
+ break;
+ }
+ if (!fp->aux->verifier_zext)
+ EMIT(PPC_RAW_SRAWI(dst_reg_h, dst_reg, 31));
+
+ } else {
+ switch (size) {
+ case BPF_B:
+ EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
+ break;
+ case BPF_H:
+ EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off));
+ break;
+ case BPF_W:
+ EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off));
+ break;
+ case BPF_DW:
+ EMIT(PPC_RAW_LWZ(dst_reg_h, src_reg, off));
+ EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off + 4));
+ break;
+ }
+ if (size != BPF_DW && !fp->aux->verifier_zext)
+ EMIT(PPC_RAW_LI(dst_reg_h, 0));
+ }
+
+ if (BPF_MODE(code) == BPF_PROBE_MEM) {
+ int insn_idx = ctx->idx - 1;
+ int jmp_off = 4;
+
+ /*
+ * In case of BPF_DW, two lwz instructions are emitted, one
+ * for higher 32-bit and another for lower 32-bit. So, set
+ * ex->insn to the first of the two and jump over both
+ * instructions in fixup.
+ *
+ * Similarly, with !verifier_zext, two instructions are
+ * emitted for BPF_B/H/W case. So, set ex->insn to the
+ * instruction that could fault and skip over both
+ * instructions.
+ */
+ if (size == BPF_DW || !fp->aux->verifier_zext) {
+ insn_idx -= 1;
+ jmp_off += 4;
+ }
+
+ ret = bpf_add_extable_entry(fp, image, fimage, pass, ctx, insn_idx,
+ jmp_off, dst_reg, code);
+ if (ret)
+ return ret;
+ }
+ break;
+
+ /*
+ * Doubleword load
+ * 16 byte instruction that uses two 'struct bpf_insn'
+ */
+ case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */
+ PPC_LI32(dst_reg_h, (u32)insn[i + 1].imm);
+ PPC_LI32(dst_reg, (u32)insn[i].imm);
+ /* Adjust for two bpf instructions */
+ addrs[++i] = ctx->idx * 4;
+ break;
+
+ /*
+ * Return/Exit
+ */
+ case BPF_JMP | BPF_EXIT:
+ /*
+ * If this isn't the very last instruction, branch to
+ * the epilogue. If we _are_ the last instruction,
+ * we'll just fall through to the epilogue.
+ */
+ if (i != flen - 1) {
+ ret = bpf_jit_emit_exit_insn(image, ctx, _R0, exit_addr);
+ if (ret)
+ return ret;
+ }
+ /* else fall through to the epilogue */
+ break;
+
+ /*
+ * Call kernel helper or bpf function
+ */
+ case BPF_JMP | BPF_CALL:
+ ctx->seen |= SEEN_FUNC;
+
+ ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass,
+ &func_addr, &func_addr_fixed);
+ if (ret < 0)
+ return ret;
+
+ if (bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_5))) {
+ EMIT(PPC_RAW_STW(bpf_to_ppc(BPF_REG_5) - 1, _R1, 8));
+ EMIT(PPC_RAW_STW(bpf_to_ppc(BPF_REG_5), _R1, 12));
+ }
+
+ ret = bpf_jit_emit_func_call_rel(image, fimage, ctx, func_addr);
+ if (ret)
+ return ret;
+
+ EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_0) - 1, _R3));
+ EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_0), _R4));
+ break;
+
+ /*
+ * Jumps and branches
+ */
+ case BPF_JMP | BPF_JA:
+ PPC_JMP(addrs[i + 1 + off]);
+ break;
+ case BPF_JMP32 | BPF_JA:
+ PPC_JMP(addrs[i + 1 + imm]);
+ break;
+
+ case BPF_JMP | BPF_JGT | BPF_K:
+ case BPF_JMP | BPF_JGT | BPF_X:
+ case BPF_JMP | BPF_JSGT | BPF_K:
+ case BPF_JMP | BPF_JSGT | BPF_X:
+ case BPF_JMP32 | BPF_JGT | BPF_K:
+ case BPF_JMP32 | BPF_JGT | BPF_X:
+ case BPF_JMP32 | BPF_JSGT | BPF_K:
+ case BPF_JMP32 | BPF_JSGT | BPF_X:
+ true_cond = COND_GT;
+ goto cond_branch;
+ case BPF_JMP | BPF_JLT | BPF_K:
+ case BPF_JMP | BPF_JLT | BPF_X:
+ case BPF_JMP | BPF_JSLT | BPF_K:
+ case BPF_JMP | BPF_JSLT | BPF_X:
+ case BPF_JMP32 | BPF_JLT | BPF_K:
+ case BPF_JMP32 | BPF_JLT | BPF_X:
+ case BPF_JMP32 | BPF_JSLT | BPF_K:
+ case BPF_JMP32 | BPF_JSLT | BPF_X:
+ true_cond = COND_LT;
+ goto cond_branch;
+ case BPF_JMP | BPF_JGE | BPF_K:
+ case BPF_JMP | BPF_JGE | BPF_X:
+ case BPF_JMP | BPF_JSGE | BPF_K:
+ case BPF_JMP | BPF_JSGE | BPF_X:
+ case BPF_JMP32 | BPF_JGE | BPF_K:
+ case BPF_JMP32 | BPF_JGE | BPF_X:
+ case BPF_JMP32 | BPF_JSGE | BPF_K:
+ case BPF_JMP32 | BPF_JSGE | BPF_X:
+ true_cond = COND_GE;
+ goto cond_branch;
+ case BPF_JMP | BPF_JLE | BPF_K:
+ case BPF_JMP | BPF_JLE | BPF_X:
+ case BPF_JMP | BPF_JSLE | BPF_K:
+ case BPF_JMP | BPF_JSLE | BPF_X:
+ case BPF_JMP32 | BPF_JLE | BPF_K:
+ case BPF_JMP32 | BPF_JLE | BPF_X:
+ case BPF_JMP32 | BPF_JSLE | BPF_K:
+ case BPF_JMP32 | BPF_JSLE | BPF_X:
+ true_cond = COND_LE;
+ goto cond_branch;
+ case BPF_JMP | BPF_JEQ | BPF_K:
+ case BPF_JMP | BPF_JEQ | BPF_X:
+ case BPF_JMP32 | BPF_JEQ | BPF_K:
+ case BPF_JMP32 | BPF_JEQ | BPF_X:
+ true_cond = COND_EQ;
+ goto cond_branch;
+ case BPF_JMP | BPF_JNE | BPF_K:
+ case BPF_JMP | BPF_JNE | BPF_X:
+ case BPF_JMP32 | BPF_JNE | BPF_K:
+ case BPF_JMP32 | BPF_JNE | BPF_X:
+ true_cond = COND_NE;
+ goto cond_branch;
+ case BPF_JMP | BPF_JSET | BPF_K:
+ case BPF_JMP | BPF_JSET | BPF_X:
+ case BPF_JMP32 | BPF_JSET | BPF_K:
+ case BPF_JMP32 | BPF_JSET | BPF_X:
+ true_cond = COND_NE;
+ /* fallthrough; */
+
+cond_branch:
+ switch (code) {
+ case BPF_JMP | BPF_JGT | BPF_X:
+ case BPF_JMP | BPF_JLT | BPF_X:
+ case BPF_JMP | BPF_JGE | BPF_X:
+ case BPF_JMP | BPF_JLE | BPF_X:
+ case BPF_JMP | BPF_JEQ | BPF_X:
+ case BPF_JMP | BPF_JNE | BPF_X:
+ /* unsigned comparison */
+ EMIT(PPC_RAW_CMPLW(dst_reg_h, src_reg_h));
+ PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+ EMIT(PPC_RAW_CMPLW(dst_reg, src_reg));
+ break;
+ case BPF_JMP32 | BPF_JGT | BPF_X:
+ case BPF_JMP32 | BPF_JLT | BPF_X:
+ case BPF_JMP32 | BPF_JGE | BPF_X:
+ case BPF_JMP32 | BPF_JLE | BPF_X:
+ case BPF_JMP32 | BPF_JEQ | BPF_X:
+ case BPF_JMP32 | BPF_JNE | BPF_X:
+ /* unsigned comparison */
+ EMIT(PPC_RAW_CMPLW(dst_reg, src_reg));
+ break;
+ case BPF_JMP | BPF_JSGT | BPF_X:
+ case BPF_JMP | BPF_JSLT | BPF_X:
+ case BPF_JMP | BPF_JSGE | BPF_X:
+ case BPF_JMP | BPF_JSLE | BPF_X:
+ /* signed comparison */
+ EMIT(PPC_RAW_CMPW(dst_reg_h, src_reg_h));
+ PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+ EMIT(PPC_RAW_CMPLW(dst_reg, src_reg));
+ break;
+ case BPF_JMP32 | BPF_JSGT | BPF_X:
+ case BPF_JMP32 | BPF_JSLT | BPF_X:
+ case BPF_JMP32 | BPF_JSGE | BPF_X:
+ case BPF_JMP32 | BPF_JSLE | BPF_X:
+ /* signed comparison */
+ EMIT(PPC_RAW_CMPW(dst_reg, src_reg));
+ break;
+ case BPF_JMP | BPF_JSET | BPF_X:
+ EMIT(PPC_RAW_AND_DOT(_R0, dst_reg_h, src_reg_h));
+ PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+ EMIT(PPC_RAW_AND_DOT(_R0, dst_reg, src_reg));
+ break;
+ case BPF_JMP32 | BPF_JSET | BPF_X: {
+ EMIT(PPC_RAW_AND_DOT(_R0, dst_reg, src_reg));
+ break;
+ case BPF_JMP | BPF_JNE | BPF_K:
+ case BPF_JMP | BPF_JEQ | BPF_K:
+ case BPF_JMP | BPF_JGT | BPF_K:
+ case BPF_JMP | BPF_JLT | BPF_K:
+ case BPF_JMP | BPF_JGE | BPF_K:
+ case BPF_JMP | BPF_JLE | BPF_K:
+ /*
+ * Need sign-extended load, so only positive
+ * values can be used as imm in cmplwi
+ */
+ if (imm >= 0 && imm < 32768) {
+ EMIT(PPC_RAW_CMPLWI(dst_reg_h, 0));
+ PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+ EMIT(PPC_RAW_CMPLWI(dst_reg, imm));
+ } else {
+ /* sign-extending load ... but unsigned comparison */
+ PPC_EX32(_R0, imm);
+ EMIT(PPC_RAW_CMPLW(dst_reg_h, _R0));
+ PPC_LI32(_R0, imm);
+ PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+ EMIT(PPC_RAW_CMPLW(dst_reg, _R0));
+ }
+ break;
+ case BPF_JMP32 | BPF_JNE | BPF_K:
+ case BPF_JMP32 | BPF_JEQ | BPF_K:
+ case BPF_JMP32 | BPF_JGT | BPF_K:
+ case BPF_JMP32 | BPF_JLT | BPF_K:
+ case BPF_JMP32 | BPF_JGE | BPF_K:
+ case BPF_JMP32 | BPF_JLE | BPF_K:
+ if (imm >= 0 && imm < 65536) {
+ EMIT(PPC_RAW_CMPLWI(dst_reg, imm));
+ } else {
+ PPC_LI32(_R0, imm);
+ EMIT(PPC_RAW_CMPLW(dst_reg, _R0));
+ }
+ break;
+ }
+ case BPF_JMP | BPF_JSGT | BPF_K:
+ case BPF_JMP | BPF_JSLT | BPF_K:
+ case BPF_JMP | BPF_JSGE | BPF_K:
+ case BPF_JMP | BPF_JSLE | BPF_K:
+ if (imm >= 0 && imm < 65536) {
+ EMIT(PPC_RAW_CMPWI(dst_reg_h, imm < 0 ? -1 : 0));
+ PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+ EMIT(PPC_RAW_CMPLWI(dst_reg, imm));
+ } else {
+ /* sign-extending load */
+ EMIT(PPC_RAW_CMPWI(dst_reg_h, imm < 0 ? -1 : 0));
+ PPC_LI32(_R0, imm);
+ PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+ EMIT(PPC_RAW_CMPLW(dst_reg, _R0));
+ }
+ break;
+ case BPF_JMP32 | BPF_JSGT | BPF_K:
+ case BPF_JMP32 | BPF_JSLT | BPF_K:
+ case BPF_JMP32 | BPF_JSGE | BPF_K:
+ case BPF_JMP32 | BPF_JSLE | BPF_K:
+ /*
+ * signed comparison, so any 16-bit value
+ * can be used in cmpwi
+ */
+ if (imm >= -32768 && imm < 32768) {
+ EMIT(PPC_RAW_CMPWI(dst_reg, imm));
+ } else {
+ /* sign-extending load */
+ PPC_LI32(_R0, imm);
+ EMIT(PPC_RAW_CMPW(dst_reg, _R0));
+ }
+ break;
+ case BPF_JMP | BPF_JSET | BPF_K:
+ /* andi does not sign-extend the immediate */
+ if (imm >= 0 && imm < 32768) {
+ /* PPC_ANDI is _only/always_ dot-form */
+ EMIT(PPC_RAW_ANDI(_R0, dst_reg, imm));
+ } else {
+ PPC_LI32(_R0, imm);
+ if (imm < 0) {
+ EMIT(PPC_RAW_CMPWI(dst_reg_h, 0));
+ PPC_BCC_SHORT(COND_NE, (ctx->idx + 2) * 4);
+ }
+ EMIT(PPC_RAW_AND_DOT(_R0, dst_reg, _R0));
+ }
+ break;
+ case BPF_JMP32 | BPF_JSET | BPF_K:
+ /* andi does not sign-extend the immediate */
+ if (imm >= 0 && imm < 32768) {
+ /* PPC_ANDI is _only/always_ dot-form */
+ EMIT(PPC_RAW_ANDI(_R0, dst_reg, imm));
+ } else {
+ PPC_LI32(_R0, imm);
+ EMIT(PPC_RAW_AND_DOT(_R0, dst_reg, _R0));
+ }
+ break;
+ }
+ PPC_BCC(true_cond, addrs[i + 1 + off]);
+ break;
+
+ /*
+ * Tail call
+ */
+ case BPF_JMP | BPF_TAIL_CALL:
+ ctx->seen |= SEEN_TAILCALL;
+ ret = bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]);
+ if (ret < 0)
+ return ret;
+ break;
+
+ default:
+ /*
+ * The filter contains something cruel & unusual.
+ * We don't handle it, but also there shouldn't be
+ * anything missing from our list.
+ */
+ pr_err_ratelimited("eBPF filter opcode %04x (@%d) unsupported\n", code, i);
+ return -EOPNOTSUPP;
+ }
+ if (BPF_CLASS(code) == BPF_ALU && !fp->aux->verifier_zext &&
+ !insn_is_zext(&insn[i + 1]) && !(BPF_OP(code) == BPF_END && imm == 64))
+ EMIT(PPC_RAW_LI(dst_reg_h, 0));
+ }
+
+ /* Set end-of-body-code address for exit. */
+ addrs[i] = ctx->idx * 4;
+
+ return 0;
+}
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
new file mode 100644
index 000000000000..1fe37128c876
--- /dev/null
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -0,0 +1,1630 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * bpf_jit_comp64.c: eBPF JIT compiler
+ *
+ * Copyright 2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
+ * IBM Corporation
+ *
+ * Based on the powerpc classic BPF JIT compiler by Matt Evans
+ */
+#include <linux/moduleloader.h>
+#include <asm/cacheflush.h>
+#include <asm/asm-compat.h>
+#include <linux/netdevice.h>
+#include <linux/filter.h>
+#include <linux/if_vlan.h>
+#include <asm/kprobes.h>
+#include <linux/bpf.h>
+#include <asm/security_features.h>
+
+#include "bpf_jit.h"
+
+/*
+ * Stack layout:
+ * Ensure the top half (upto local_tmp_var) stays consistent
+ * with our redzone usage.
+ *
+ * [ prev sp ] <-------------
+ * [ nv gpr save area ] 6*8 |
+ * [ tail_call_cnt ] 8 |
+ * [ local_tmp_var ] 24 |
+ * fp (r31) --> [ ebpf stack space ] upto 512 |
+ * [ frame header ] 32/112 |
+ * sp (r1) ---> [ stack pointer ] --------------
+ */
+
+/* for gpr non volatile registers BPG_REG_6 to 10 */
+#define BPF_PPC_STACK_SAVE (6*8)
+/* for bpf JIT code internal usage */
+#define BPF_PPC_STACK_LOCALS 32
+/* stack frame excluding BPF stack, ensure this is quadword aligned */
+#define BPF_PPC_STACKFRAME (STACK_FRAME_MIN_SIZE + \
+ BPF_PPC_STACK_LOCALS + BPF_PPC_STACK_SAVE)
+
+/* BPF register usage */
+#define TMP_REG_1 (MAX_BPF_JIT_REG + 0)
+#define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
+#define ARENA_VM_START (MAX_BPF_JIT_REG + 2)
+
+/* BPF to ppc register mappings */
+void bpf_jit_init_reg_mapping(struct codegen_context *ctx)
+{
+ /* function return value */
+ ctx->b2p[BPF_REG_0] = _R8;
+ /* function arguments */
+ ctx->b2p[BPF_REG_1] = _R3;
+ ctx->b2p[BPF_REG_2] = _R4;
+ ctx->b2p[BPF_REG_3] = _R5;
+ ctx->b2p[BPF_REG_4] = _R6;
+ ctx->b2p[BPF_REG_5] = _R7;
+ /* non volatile registers */
+ ctx->b2p[BPF_REG_6] = _R27;
+ ctx->b2p[BPF_REG_7] = _R28;
+ ctx->b2p[BPF_REG_8] = _R29;
+ ctx->b2p[BPF_REG_9] = _R30;
+ /* frame pointer aka BPF_REG_10 */
+ ctx->b2p[BPF_REG_FP] = _R31;
+ /* eBPF jit internal registers */
+ ctx->b2p[BPF_REG_AX] = _R12;
+ ctx->b2p[TMP_REG_1] = _R9;
+ ctx->b2p[TMP_REG_2] = _R10;
+ /* non volatile register for kern_vm_start address */
+ ctx->b2p[ARENA_VM_START] = _R26;
+}
+
+/* PPC NVR range -- update this if we ever use NVRs below r26 */
+#define BPF_PPC_NVR_MIN _R26
+
+static inline bool bpf_has_stack_frame(struct codegen_context *ctx)
+{
+ /*
+ * We only need a stack frame if:
+ * - we call other functions (kernel helpers), or
+ * - the bpf program uses its stack area
+ * The latter condition is deduced from the usage of BPF_REG_FP
+ */
+ return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_FP));
+}
+
+/*
+ * When not setting up our own stackframe, the redzone (288 bytes) usage is:
+ *
+ * [ prev sp ] <-------------
+ * [ ... ] |
+ * sp (r1) ---> [ stack pointer ] --------------
+ * [ nv gpr save area ] 6*8
+ * [ tail_call_cnt ] 8
+ * [ local_tmp_var ] 24
+ * [ unused red zone ] 224
+ */
+static int bpf_jit_stack_local(struct codegen_context *ctx)
+{
+ if (bpf_has_stack_frame(ctx))
+ return STACK_FRAME_MIN_SIZE + ctx->stack_size;
+ else
+ return -(BPF_PPC_STACK_SAVE + 32);
+}
+
+static int bpf_jit_stack_tailcallcnt(struct codegen_context *ctx)
+{
+ return bpf_jit_stack_local(ctx) + 24;
+}
+
+static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg)
+{
+ if (reg >= BPF_PPC_NVR_MIN && reg < 32)
+ return (bpf_has_stack_frame(ctx) ?
+ (BPF_PPC_STACKFRAME + ctx->stack_size) : 0)
+ - (8 * (32 - reg));
+
+ pr_err("BPF JIT is asking about unknown registers");
+ BUG();
+}
+
+void bpf_jit_realloc_regs(struct codegen_context *ctx)
+{
+}
+
+void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
+{
+ int i;
+
+ /* Instruction for trampoline attach */
+ EMIT(PPC_RAW_NOP());
+
+#ifndef CONFIG_PPC_KERNEL_PCREL
+ if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2))
+ EMIT(PPC_RAW_LD(_R2, _R13, offsetof(struct paca_struct, kernel_toc)));
+#endif
+
+ /*
+ * Initialize tail_call_cnt if we do tail calls.
+ * Otherwise, put in NOPs so that it can be skipped when we are
+ * invoked through a tail call.
+ */
+ if (ctx->seen & SEEN_TAILCALL) {
+ EMIT(PPC_RAW_LI(bpf_to_ppc(TMP_REG_1), 0));
+ /* this goes in the redzone */
+ EMIT(PPC_RAW_STD(bpf_to_ppc(TMP_REG_1), _R1, -(BPF_PPC_STACK_SAVE + 8)));
+ } else {
+ EMIT(PPC_RAW_NOP());
+ EMIT(PPC_RAW_NOP());
+ }
+
+ if (bpf_has_stack_frame(ctx)) {
+ /*
+ * We need a stack frame, but we don't necessarily need to
+ * save/restore LR unless we call other functions
+ */
+ if (ctx->seen & SEEN_FUNC) {
+ EMIT(PPC_RAW_MFLR(_R0));
+ EMIT(PPC_RAW_STD(_R0, _R1, PPC_LR_STKOFF));
+ }
+
+ EMIT(PPC_RAW_STDU(_R1, _R1, -(BPF_PPC_STACKFRAME + ctx->stack_size)));
+ }
+
+ /*
+ * Back up non-volatile regs -- BPF registers 6-10
+ * If we haven't created our own stack frame, we save these
+ * in the protected zone below the previous stack frame
+ */
+ for (i = BPF_REG_6; i <= BPF_REG_10; i++)
+ if (bpf_is_seen_register(ctx, bpf_to_ppc(i)))
+ EMIT(PPC_RAW_STD(bpf_to_ppc(i), _R1, bpf_jit_stack_offsetof(ctx, bpf_to_ppc(i))));
+
+ if (ctx->arena_vm_start)
+ EMIT(PPC_RAW_STD(bpf_to_ppc(ARENA_VM_START), _R1,
+ bpf_jit_stack_offsetof(ctx, bpf_to_ppc(ARENA_VM_START))));
+
+ /* Setup frame pointer to point to the bpf stack area */
+ if (bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_FP)))
+ EMIT(PPC_RAW_ADDI(bpf_to_ppc(BPF_REG_FP), _R1,
+ STACK_FRAME_MIN_SIZE + ctx->stack_size));
+
+ if (ctx->arena_vm_start)
+ PPC_LI64(bpf_to_ppc(ARENA_VM_START), ctx->arena_vm_start);
+}
+
+static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx)
+{
+ int i;
+
+ /* Restore NVRs */
+ for (i = BPF_REG_6; i <= BPF_REG_10; i++)
+ if (bpf_is_seen_register(ctx, bpf_to_ppc(i)))
+ EMIT(PPC_RAW_LD(bpf_to_ppc(i), _R1, bpf_jit_stack_offsetof(ctx, bpf_to_ppc(i))));
+
+ if (ctx->arena_vm_start)
+ EMIT(PPC_RAW_LD(bpf_to_ppc(ARENA_VM_START), _R1,
+ bpf_jit_stack_offsetof(ctx, bpf_to_ppc(ARENA_VM_START))));
+
+ /* Tear down our stack frame */
+ if (bpf_has_stack_frame(ctx)) {
+ EMIT(PPC_RAW_ADDI(_R1, _R1, BPF_PPC_STACKFRAME + ctx->stack_size));
+ if (ctx->seen & SEEN_FUNC) {
+ EMIT(PPC_RAW_LD(_R0, _R1, PPC_LR_STKOFF));
+ EMIT(PPC_RAW_MTLR(_R0));
+ }
+ }
+}
+
+void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
+{
+ bpf_jit_emit_common_epilogue(image, ctx);
+
+ /* Move result to r3 */
+ EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_0)));
+
+ EMIT(PPC_RAW_BLR());
+
+ bpf_jit_build_fentry_stubs(image, ctx);
+}
+
+int bpf_jit_emit_func_call_rel(u32 *image, u32 *fimage, struct codegen_context *ctx, u64 func)
+{
+ unsigned long func_addr = func ? ppc_function_entry((void *)func) : 0;
+ long reladdr;
+
+ /* bpf to bpf call, func is not known in the initial pass. Emit 5 nops as a placeholder */
+ if (!func) {
+ for (int i = 0; i < 5; i++)
+ EMIT(PPC_RAW_NOP());
+ /* elfv1 needs an additional instruction to load addr from descriptor */
+ if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V1))
+ EMIT(PPC_RAW_NOP());
+ EMIT(PPC_RAW_MTCTR(_R12));
+ EMIT(PPC_RAW_BCTRL());
+ return 0;
+ }
+
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ reladdr = func_addr - local_paca->kernelbase;
+
+ /*
+ * If fimage is NULL (the initial pass to find image size),
+ * account for the maximum no. of instructions possible.
+ */
+ if (!fimage) {
+ ctx->idx += 7;
+ return 0;
+ } else if (reladdr < (long)SZ_8G && reladdr >= -(long)SZ_8G) {
+ EMIT(PPC_RAW_LD(_R12, _R13, offsetof(struct paca_struct, kernelbase)));
+ /* Align for subsequent prefix instruction */
+ if (!IS_ALIGNED((unsigned long)fimage + CTX_NIA(ctx), 8))
+ EMIT(PPC_RAW_NOP());
+ /* paddi r12,r12,addr */
+ EMIT(PPC_PREFIX_MLS | __PPC_PRFX_R(0) | IMM_H18(reladdr));
+ EMIT(PPC_INST_PADDI | ___PPC_RT(_R12) | ___PPC_RA(_R12) | IMM_L(reladdr));
+ } else {
+ unsigned long pc = (unsigned long)fimage + CTX_NIA(ctx);
+ bool alignment_needed = !IS_ALIGNED(pc, 8);
+
+ reladdr = func_addr - (alignment_needed ? pc + 4 : pc);
+
+ if (reladdr < (long)SZ_8G && reladdr >= -(long)SZ_8G) {
+ if (alignment_needed)
+ EMIT(PPC_RAW_NOP());
+ /* pla r12,addr */
+ EMIT(PPC_PREFIX_MLS | __PPC_PRFX_R(1) | IMM_H18(reladdr));
+ EMIT(PPC_INST_PADDI | ___PPC_RT(_R12) | IMM_L(reladdr));
+ } else {
+ /* We can clobber r12 */
+ PPC_LI64(_R12, func);
+ }
+ }
+ EMIT(PPC_RAW_MTCTR(_R12));
+ EMIT(PPC_RAW_BCTRL());
+#else
+ if (core_kernel_text(func_addr)) {
+ reladdr = func_addr - kernel_toc_addr();
+ if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) {
+ pr_err("eBPF: address of %ps out of range of kernel_toc.\n", (void *)func);
+ return -ERANGE;
+ }
+
+ EMIT(PPC_RAW_ADDIS(_R12, _R2, PPC_HA(reladdr)));
+ EMIT(PPC_RAW_ADDI(_R12, _R12, PPC_LO(reladdr)));
+ EMIT(PPC_RAW_MTCTR(_R12));
+ EMIT(PPC_RAW_BCTRL());
+ } else {
+ if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V1)) {
+ /* func points to the function descriptor */
+ PPC_LI64(bpf_to_ppc(TMP_REG_2), func);
+ /* Load actual entry point from function descriptor */
+ EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_2), 0));
+ /* ... and move it to CTR */
+ EMIT(PPC_RAW_MTCTR(bpf_to_ppc(TMP_REG_1)));
+ /*
+ * Load TOC from function descriptor at offset 8.
+ * We can clobber r2 since we get called through a
+ * function pointer (so caller will save/restore r2).
+ */
+ if (is_module_text_address(func_addr))
+ EMIT(PPC_RAW_LD(_R2, bpf_to_ppc(TMP_REG_2), 8));
+ } else {
+ PPC_LI64(_R12, func);
+ EMIT(PPC_RAW_MTCTR(_R12));
+ }
+ EMIT(PPC_RAW_BCTRL());
+ /*
+ * Load r2 with kernel TOC as kernel TOC is used if function address falls
+ * within core kernel text.
+ */
+ if (is_module_text_address(func_addr))
+ EMIT(PPC_RAW_LD(_R2, _R13, offsetof(struct paca_struct, kernel_toc)));
+ }
+#endif
+
+ return 0;
+}
+
+static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out)
+{
+ /*
+ * By now, the eBPF program has already setup parameters in r3, r4 and r5
+ * r3/BPF_REG_1 - pointer to ctx -- passed as is to the next bpf program
+ * r4/BPF_REG_2 - pointer to bpf_array
+ * r5/BPF_REG_3 - index in bpf_array
+ */
+ int b2p_bpf_array = bpf_to_ppc(BPF_REG_2);
+ int b2p_index = bpf_to_ppc(BPF_REG_3);
+ int bpf_tailcall_prologue_size = 12;
+
+ if (!IS_ENABLED(CONFIG_PPC_KERNEL_PCREL) && IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2))
+ bpf_tailcall_prologue_size += 4; /* skip past the toc load */
+
+ /*
+ * if (index >= array->map.max_entries)
+ * goto out;
+ */
+ EMIT(PPC_RAW_LWZ(bpf_to_ppc(TMP_REG_1), b2p_bpf_array, offsetof(struct bpf_array, map.max_entries)));
+ EMIT(PPC_RAW_RLWINM(b2p_index, b2p_index, 0, 0, 31));
+ EMIT(PPC_RAW_CMPLW(b2p_index, bpf_to_ppc(TMP_REG_1)));
+ PPC_BCC_SHORT(COND_GE, out);
+
+ /*
+ * if (tail_call_cnt >= MAX_TAIL_CALL_CNT)
+ * goto out;
+ */
+ EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), _R1, bpf_jit_stack_tailcallcnt(ctx)));
+ EMIT(PPC_RAW_CMPLWI(bpf_to_ppc(TMP_REG_1), MAX_TAIL_CALL_CNT));
+ PPC_BCC_SHORT(COND_GE, out);
+
+ /*
+ * tail_call_cnt++;
+ */
+ EMIT(PPC_RAW_ADDI(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), 1));
+ EMIT(PPC_RAW_STD(bpf_to_ppc(TMP_REG_1), _R1, bpf_jit_stack_tailcallcnt(ctx)));
+
+ /* prog = array->ptrs[index]; */
+ EMIT(PPC_RAW_MULI(bpf_to_ppc(TMP_REG_1), b2p_index, 8));
+ EMIT(PPC_RAW_ADD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), b2p_bpf_array));
+ EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), offsetof(struct bpf_array, ptrs)));
+
+ /*
+ * if (prog == NULL)
+ * goto out;
+ */
+ EMIT(PPC_RAW_CMPLDI(bpf_to_ppc(TMP_REG_1), 0));
+ PPC_BCC_SHORT(COND_EQ, out);
+
+ /* goto *(prog->bpf_func + prologue_size); */
+ EMIT(PPC_RAW_LD(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1), offsetof(struct bpf_prog, bpf_func)));
+ EMIT(PPC_RAW_ADDI(bpf_to_ppc(TMP_REG_1), bpf_to_ppc(TMP_REG_1),
+ FUNCTION_DESCR_SIZE + bpf_tailcall_prologue_size));
+ EMIT(PPC_RAW_MTCTR(bpf_to_ppc(TMP_REG_1)));
+
+ /* tear down stack, restore NVRs, ... */
+ bpf_jit_emit_common_epilogue(image, ctx);
+
+ EMIT(PPC_RAW_BCTR());
+
+ /* out: */
+ return 0;
+}
+
+bool bpf_jit_bypass_spec_v1(void)
+{
+#if defined(CONFIG_PPC_E500) || defined(CONFIG_PPC_BOOK3S_64)
+ return !(security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
+ security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR));
+#else
+ return true;
+#endif
+}
+
+bool bpf_jit_bypass_spec_v4(void)
+{
+ return !(security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
+ security_ftr_enabled(SEC_FTR_STF_BARRIER) &&
+ stf_barrier_type_get() != STF_BARRIER_NONE);
+}
+
+/*
+ * We spill into the redzone always, even if the bpf program has its own stackframe.
+ * Offsets hardcoded based on BPF_PPC_STACK_SAVE -- see bpf_jit_stack_local()
+ */
+void bpf_stf_barrier(void);
+
+asm (
+" .global bpf_stf_barrier ;"
+" bpf_stf_barrier: ;"
+" std 21,-80(1) ;"
+" std 22,-72(1) ;"
+" sync ;"
+" ld 21,-80(1) ;"
+" ld 22,-72(1) ;"
+" ori 31,31,0 ;"
+" .rept 14 ;"
+" b 1f ;"
+" 1: ;"
+" .endr ;"
+" blr ;"
+);
+
+static int bpf_jit_emit_atomic_ops(u32 *image, struct codegen_context *ctx,
+ const struct bpf_insn *insn, u32 *jmp_off,
+ u32 *tmp_idx, u32 *addrp)
+{
+ u32 tmp1_reg = bpf_to_ppc(TMP_REG_1);
+ u32 tmp2_reg = bpf_to_ppc(TMP_REG_2);
+ u32 size = BPF_SIZE(insn->code);
+ u32 src_reg = bpf_to_ppc(insn->src_reg);
+ u32 dst_reg = bpf_to_ppc(insn->dst_reg);
+ s32 imm = insn->imm;
+
+ u32 save_reg = tmp2_reg;
+ u32 ret_reg = src_reg;
+ u32 fixup_idx;
+
+ /* Get offset into TMP_REG_1 */
+ EMIT(PPC_RAW_LI(tmp1_reg, insn->off));
+ /*
+ * Enforce full ordering for operations with BPF_FETCH by emitting a 'sync'
+ * before and after the operation.
+ *
+ * This is a requirement in the Linux Kernel Memory Model.
+ * See __cmpxchg_u64() in asm/cmpxchg.h as an example.
+ */
+ if ((imm & BPF_FETCH) && IS_ENABLED(CONFIG_SMP))
+ EMIT(PPC_RAW_SYNC());
+
+ *tmp_idx = ctx->idx;
+
+ /* load value from memory into TMP_REG_2 */
+ if (size == BPF_DW)
+ EMIT(PPC_RAW_LDARX(tmp2_reg, tmp1_reg, dst_reg, 0));
+ else
+ EMIT(PPC_RAW_LWARX(tmp2_reg, tmp1_reg, dst_reg, 0));
+ /* Save old value in _R0 */
+ if (imm & BPF_FETCH)
+ EMIT(PPC_RAW_MR(_R0, tmp2_reg));
+
+ switch (imm) {
+ case BPF_ADD:
+ case BPF_ADD | BPF_FETCH:
+ EMIT(PPC_RAW_ADD(tmp2_reg, tmp2_reg, src_reg));
+ break;
+ case BPF_AND:
+ case BPF_AND | BPF_FETCH:
+ EMIT(PPC_RAW_AND(tmp2_reg, tmp2_reg, src_reg));
+ break;
+ case BPF_OR:
+ case BPF_OR | BPF_FETCH:
+ EMIT(PPC_RAW_OR(tmp2_reg, tmp2_reg, src_reg));
+ break;
+ case BPF_XOR:
+ case BPF_XOR | BPF_FETCH:
+ EMIT(PPC_RAW_XOR(tmp2_reg, tmp2_reg, src_reg));
+ break;
+ case BPF_CMPXCHG:
+ /*
+ * Return old value in BPF_REG_0 for BPF_CMPXCHG &
+ * in src_reg for other cases.
+ */
+ ret_reg = bpf_to_ppc(BPF_REG_0);
+
+ /* Compare with old value in BPF_R0 */
+ if (size == BPF_DW)
+ EMIT(PPC_RAW_CMPD(bpf_to_ppc(BPF_REG_0), tmp2_reg));
+ else
+ EMIT(PPC_RAW_CMPW(bpf_to_ppc(BPF_REG_0), tmp2_reg));
+ /* Don't set if different from old value */
+ PPC_BCC_SHORT(COND_NE, (ctx->idx + 3) * 4);
+ fallthrough;
+ case BPF_XCHG:
+ save_reg = src_reg;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ /* store new value */
+ if (size == BPF_DW)
+ EMIT(PPC_RAW_STDCX(save_reg, tmp1_reg, dst_reg));
+ else
+ EMIT(PPC_RAW_STWCX(save_reg, tmp1_reg, dst_reg));
+ /* we're done if this succeeded */
+ PPC_BCC_SHORT(COND_NE, *tmp_idx * 4);
+ fixup_idx = ctx->idx;
+
+ if (imm & BPF_FETCH) {
+ /* Emit 'sync' to enforce full ordering */
+ if (IS_ENABLED(CONFIG_SMP))
+ EMIT(PPC_RAW_SYNC());
+ EMIT(PPC_RAW_MR(ret_reg, _R0));
+ /*
+ * Skip unnecessary zero-extension for 32-bit cmpxchg.
+ * For context, see commit 39491867ace5.
+ */
+ if (size != BPF_DW && imm == BPF_CMPXCHG &&
+ insn_is_zext(insn + 1))
+ *addrp = ctx->idx * 4;
+ }
+
+ *jmp_off = (fixup_idx - *tmp_idx) * 4;
+
+ return 0;
+}
+
+static int bpf_jit_emit_probe_mem_store(struct codegen_context *ctx, u32 src_reg, s16 off,
+ u32 code, u32 *image)
+{
+ u32 tmp1_reg = bpf_to_ppc(TMP_REG_1);
+ u32 tmp2_reg = bpf_to_ppc(TMP_REG_2);
+
+ switch (BPF_SIZE(code)) {
+ case BPF_B:
+ EMIT(PPC_RAW_STB(src_reg, tmp1_reg, off));
+ break;
+ case BPF_H:
+ EMIT(PPC_RAW_STH(src_reg, tmp1_reg, off));
+ break;
+ case BPF_W:
+ EMIT(PPC_RAW_STW(src_reg, tmp1_reg, off));
+ break;
+ case BPF_DW:
+ if (off % 4) {
+ EMIT(PPC_RAW_LI(tmp2_reg, off));
+ EMIT(PPC_RAW_STDX(src_reg, tmp1_reg, tmp2_reg));
+ } else {
+ EMIT(PPC_RAW_STD(src_reg, tmp1_reg, off));
+ }
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int emit_atomic_ld_st(const struct bpf_insn insn, struct codegen_context *ctx, u32 *image)
+{
+ u32 code = insn.code;
+ u32 dst_reg = bpf_to_ppc(insn.dst_reg);
+ u32 src_reg = bpf_to_ppc(insn.src_reg);
+ u32 size = BPF_SIZE(code);
+ u32 tmp1_reg = bpf_to_ppc(TMP_REG_1);
+ u32 tmp2_reg = bpf_to_ppc(TMP_REG_2);
+ s16 off = insn.off;
+ s32 imm = insn.imm;
+
+ switch (imm) {
+ case BPF_LOAD_ACQ:
+ switch (size) {
+ case BPF_B:
+ EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
+ break;
+ case BPF_H:
+ EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off));
+ break;
+ case BPF_W:
+ EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off));
+ break;
+ case BPF_DW:
+ if (off % 4) {
+ EMIT(PPC_RAW_LI(tmp1_reg, off));
+ EMIT(PPC_RAW_LDX(dst_reg, src_reg, tmp1_reg));
+ } else {
+ EMIT(PPC_RAW_LD(dst_reg, src_reg, off));
+ }
+ break;
+ }
+ EMIT(PPC_RAW_LWSYNC());
+ break;
+ case BPF_STORE_REL:
+ EMIT(PPC_RAW_LWSYNC());
+ switch (size) {
+ case BPF_B:
+ EMIT(PPC_RAW_STB(src_reg, dst_reg, off));
+ break;
+ case BPF_H:
+ EMIT(PPC_RAW_STH(src_reg, dst_reg, off));
+ break;
+ case BPF_W:
+ EMIT(PPC_RAW_STW(src_reg, dst_reg, off));
+ break;
+ case BPF_DW:
+ if (off % 4) {
+ EMIT(PPC_RAW_LI(tmp2_reg, off));
+ EMIT(PPC_RAW_STDX(src_reg, dst_reg, tmp2_reg));
+ } else {
+ EMIT(PPC_RAW_STD(src_reg, dst_reg, off));
+ }
+ break;
+ }
+ break;
+ default:
+ pr_err_ratelimited("unexpected atomic load/store op code %02x\n",
+ imm);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* Assemble the body code between the prologue & epilogue */
+int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct codegen_context *ctx,
+ u32 *addrs, int pass, bool extra_pass)
+{
+ enum stf_barrier_type stf_barrier = stf_barrier_type_get();
+ bool sync_emitted, ori31_emitted;
+ const struct bpf_insn *insn = fp->insnsi;
+ int flen = fp->len;
+ int i, ret;
+
+ /* Start of epilogue code - will only be valid 2nd pass onwards */
+ u32 exit_addr = addrs[flen];
+
+ for (i = 0; i < flen; i++) {
+ u32 code = insn[i].code;
+ u32 dst_reg = bpf_to_ppc(insn[i].dst_reg);
+ u32 src_reg = bpf_to_ppc(insn[i].src_reg);
+ u32 size = BPF_SIZE(code);
+ u32 tmp1_reg = bpf_to_ppc(TMP_REG_1);
+ u32 tmp2_reg = bpf_to_ppc(TMP_REG_2);
+ s16 off = insn[i].off;
+ s32 imm = insn[i].imm;
+ bool func_addr_fixed;
+ u64 func_addr;
+ u64 imm64;
+ u32 true_cond;
+ u32 tmp_idx;
+ u32 jmp_off;
+
+ /*
+ * addrs[] maps a BPF bytecode address into a real offset from
+ * the start of the body code.
+ */
+ addrs[i] = ctx->idx * 4;
+
+ /*
+ * As an optimization, we note down which non-volatile registers
+ * are used so that we can only save/restore those in our
+ * prologue and epilogue. We do this here regardless of whether
+ * the actual BPF instruction uses src/dst registers or not
+ * (for instance, BPF_CALL does not use them). The expectation
+ * is that those instructions will have src_reg/dst_reg set to
+ * 0. Even otherwise, we just lose some prologue/epilogue
+ * optimization but everything else should work without
+ * any issues.
+ */
+ if (dst_reg >= BPF_PPC_NVR_MIN && dst_reg < 32)
+ bpf_set_seen_register(ctx, dst_reg);
+ if (src_reg >= BPF_PPC_NVR_MIN && src_reg < 32)
+ bpf_set_seen_register(ctx, src_reg);
+
+ switch (code) {
+ /*
+ * Arithmetic operations: ADD/SUB/MUL/DIV/MOD/NEG
+ */
+ case BPF_ALU | BPF_ADD | BPF_X: /* (u32) dst += (u32) src */
+ case BPF_ALU64 | BPF_ADD | BPF_X: /* dst += src */
+ EMIT(PPC_RAW_ADD(dst_reg, dst_reg, src_reg));
+ goto bpf_alu32_trunc;
+ case BPF_ALU | BPF_SUB | BPF_X: /* (u32) dst -= (u32) src */
+ case BPF_ALU64 | BPF_SUB | BPF_X: /* dst -= src */
+ EMIT(PPC_RAW_SUB(dst_reg, dst_reg, src_reg));
+ goto bpf_alu32_trunc;
+ case BPF_ALU | BPF_ADD | BPF_K: /* (u32) dst += (u32) imm */
+ case BPF_ALU64 | BPF_ADD | BPF_K: /* dst += imm */
+ if (!imm) {
+ goto bpf_alu32_trunc;
+ } else if (imm >= -32768 && imm < 32768) {
+ EMIT(PPC_RAW_ADDI(dst_reg, dst_reg, IMM_L(imm)));
+ } else {
+ PPC_LI32(tmp1_reg, imm);
+ EMIT(PPC_RAW_ADD(dst_reg, dst_reg, tmp1_reg));
+ }
+ goto bpf_alu32_trunc;
+ case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */
+ case BPF_ALU64 | BPF_SUB | BPF_K: /* dst -= imm */
+ if (!imm) {
+ goto bpf_alu32_trunc;
+ } else if (imm > -32768 && imm <= 32768) {
+ EMIT(PPC_RAW_ADDI(dst_reg, dst_reg, IMM_L(-imm)));
+ } else {
+ PPC_LI32(tmp1_reg, imm);
+ EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg));
+ }
+ goto bpf_alu32_trunc;
+ case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */
+ case BPF_ALU64 | BPF_MUL | BPF_X: /* dst *= src */
+ if (BPF_CLASS(code) == BPF_ALU)
+ EMIT(PPC_RAW_MULW(dst_reg, dst_reg, src_reg));
+ else
+ EMIT(PPC_RAW_MULD(dst_reg, dst_reg, src_reg));
+ goto bpf_alu32_trunc;
+ case BPF_ALU | BPF_MUL | BPF_K: /* (u32) dst *= (u32) imm */
+ case BPF_ALU64 | BPF_MUL | BPF_K: /* dst *= imm */
+ if (imm >= -32768 && imm < 32768)
+ EMIT(PPC_RAW_MULI(dst_reg, dst_reg, IMM_L(imm)));
+ else {
+ PPC_LI32(tmp1_reg, imm);
+ if (BPF_CLASS(code) == BPF_ALU)
+ EMIT(PPC_RAW_MULW(dst_reg, dst_reg, tmp1_reg));
+ else
+ EMIT(PPC_RAW_MULD(dst_reg, dst_reg, tmp1_reg));
+ }
+ goto bpf_alu32_trunc;
+ case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */
+ case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */
+ if (BPF_OP(code) == BPF_MOD) {
+ if (off)
+ EMIT(PPC_RAW_DIVW(tmp1_reg, dst_reg, src_reg));
+ else
+ EMIT(PPC_RAW_DIVWU(tmp1_reg, dst_reg, src_reg));
+
+ EMIT(PPC_RAW_MULW(tmp1_reg, src_reg, tmp1_reg));
+ EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg));
+ } else
+ if (off)
+ EMIT(PPC_RAW_DIVW(dst_reg, dst_reg, src_reg));
+ else
+ EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, src_reg));
+ goto bpf_alu32_trunc;
+ case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */
+ case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */
+ if (BPF_OP(code) == BPF_MOD) {
+ if (off)
+ EMIT(PPC_RAW_DIVD(tmp1_reg, dst_reg, src_reg));
+ else
+ EMIT(PPC_RAW_DIVDU(tmp1_reg, dst_reg, src_reg));
+ EMIT(PPC_RAW_MULD(tmp1_reg, src_reg, tmp1_reg));
+ EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg));
+ } else
+ if (off)
+ EMIT(PPC_RAW_DIVD(dst_reg, dst_reg, src_reg));
+ else
+ EMIT(PPC_RAW_DIVDU(dst_reg, dst_reg, src_reg));
+ break;
+ case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */
+ case BPF_ALU | BPF_DIV | BPF_K: /* (u32) dst /= (u32) imm */
+ case BPF_ALU64 | BPF_MOD | BPF_K: /* dst %= imm */
+ case BPF_ALU64 | BPF_DIV | BPF_K: /* dst /= imm */
+ if (imm == 0)
+ return -EINVAL;
+ if (imm == 1) {
+ if (BPF_OP(code) == BPF_DIV) {
+ goto bpf_alu32_trunc;
+ } else {
+ EMIT(PPC_RAW_LI(dst_reg, 0));
+ break;
+ }
+ }
+
+ PPC_LI32(tmp1_reg, imm);
+ switch (BPF_CLASS(code)) {
+ case BPF_ALU:
+ if (BPF_OP(code) == BPF_MOD) {
+ if (off)
+ EMIT(PPC_RAW_DIVW(tmp2_reg, dst_reg, tmp1_reg));
+ else
+ EMIT(PPC_RAW_DIVWU(tmp2_reg, dst_reg, tmp1_reg));
+ EMIT(PPC_RAW_MULW(tmp1_reg, tmp1_reg, tmp2_reg));
+ EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg));
+ } else
+ if (off)
+ EMIT(PPC_RAW_DIVW(dst_reg, dst_reg, tmp1_reg));
+ else
+ EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, tmp1_reg));
+ break;
+ case BPF_ALU64:
+ if (BPF_OP(code) == BPF_MOD) {
+ if (off)
+ EMIT(PPC_RAW_DIVD(tmp2_reg, dst_reg, tmp1_reg));
+ else
+ EMIT(PPC_RAW_DIVDU(tmp2_reg, dst_reg, tmp1_reg));
+ EMIT(PPC_RAW_MULD(tmp1_reg, tmp1_reg, tmp2_reg));
+ EMIT(PPC_RAW_SUB(dst_reg, dst_reg, tmp1_reg));
+ } else
+ if (off)
+ EMIT(PPC_RAW_DIVD(dst_reg, dst_reg, tmp1_reg));
+ else
+ EMIT(PPC_RAW_DIVDU(dst_reg, dst_reg, tmp1_reg));
+ break;
+ }
+ goto bpf_alu32_trunc;
+ case BPF_ALU | BPF_NEG: /* (u32) dst = -dst */
+ case BPF_ALU64 | BPF_NEG: /* dst = -dst */
+ EMIT(PPC_RAW_NEG(dst_reg, dst_reg));
+ goto bpf_alu32_trunc;
+
+ /*
+ * Logical operations: AND/OR/XOR/[A]LSH/[A]RSH
+ */
+ case BPF_ALU | BPF_AND | BPF_X: /* (u32) dst = dst & src */
+ case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */
+ EMIT(PPC_RAW_AND(dst_reg, dst_reg, src_reg));
+ goto bpf_alu32_trunc;
+ case BPF_ALU | BPF_AND | BPF_K: /* (u32) dst = dst & imm */
+ case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */
+ if (!IMM_H(imm))
+ EMIT(PPC_RAW_ANDI(dst_reg, dst_reg, IMM_L(imm)));
+ else {
+ /* Sign-extended */
+ PPC_LI32(tmp1_reg, imm);
+ EMIT(PPC_RAW_AND(dst_reg, dst_reg, tmp1_reg));
+ }
+ goto bpf_alu32_trunc;
+ case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */
+ case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */
+ EMIT(PPC_RAW_OR(dst_reg, dst_reg, src_reg));
+ goto bpf_alu32_trunc;
+ case BPF_ALU | BPF_OR | BPF_K:/* dst = (u32) dst | (u32) imm */
+ case BPF_ALU64 | BPF_OR | BPF_K:/* dst = dst | imm */
+ if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) {
+ /* Sign-extended */
+ PPC_LI32(tmp1_reg, imm);
+ EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp1_reg));
+ } else {
+ if (IMM_L(imm))
+ EMIT(PPC_RAW_ORI(dst_reg, dst_reg, IMM_L(imm)));
+ if (IMM_H(imm))
+ EMIT(PPC_RAW_ORIS(dst_reg, dst_reg, IMM_H(imm)));
+ }
+ goto bpf_alu32_trunc;
+ case BPF_ALU | BPF_XOR | BPF_X: /* (u32) dst ^= src */
+ case BPF_ALU64 | BPF_XOR | BPF_X: /* dst ^= src */
+ EMIT(PPC_RAW_XOR(dst_reg, dst_reg, src_reg));
+ goto bpf_alu32_trunc;
+ case BPF_ALU | BPF_XOR | BPF_K: /* (u32) dst ^= (u32) imm */
+ case BPF_ALU64 | BPF_XOR | BPF_K: /* dst ^= imm */
+ if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) {
+ /* Sign-extended */
+ PPC_LI32(tmp1_reg, imm);
+ EMIT(PPC_RAW_XOR(dst_reg, dst_reg, tmp1_reg));
+ } else {
+ if (IMM_L(imm))
+ EMIT(PPC_RAW_XORI(dst_reg, dst_reg, IMM_L(imm)));
+ if (IMM_H(imm))
+ EMIT(PPC_RAW_XORIS(dst_reg, dst_reg, IMM_H(imm)));
+ }
+ goto bpf_alu32_trunc;
+ case BPF_ALU | BPF_LSH | BPF_X: /* (u32) dst <<= (u32) src */
+ /* slw clears top 32 bits */
+ EMIT(PPC_RAW_SLW(dst_reg, dst_reg, src_reg));
+ /* skip zero extension move, but set address map. */
+ if (insn_is_zext(&insn[i + 1]))
+ addrs[++i] = ctx->idx * 4;
+ break;
+ case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */
+ EMIT(PPC_RAW_SLD(dst_reg, dst_reg, src_reg));
+ break;
+ case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<== (u32) imm */
+ /* with imm 0, we still need to clear top 32 bits */
+ EMIT(PPC_RAW_SLWI(dst_reg, dst_reg, imm));
+ if (insn_is_zext(&insn[i + 1]))
+ addrs[++i] = ctx->idx * 4;
+ break;
+ case BPF_ALU64 | BPF_LSH | BPF_K: /* dst <<== imm */
+ if (imm != 0)
+ EMIT(PPC_RAW_SLDI(dst_reg, dst_reg, imm));
+ break;
+ case BPF_ALU | BPF_RSH | BPF_X: /* (u32) dst >>= (u32) src */
+ EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg));
+ if (insn_is_zext(&insn[i + 1]))
+ addrs[++i] = ctx->idx * 4;
+ break;
+ case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */
+ EMIT(PPC_RAW_SRD(dst_reg, dst_reg, src_reg));
+ break;
+ case BPF_ALU | BPF_RSH | BPF_K: /* (u32) dst >>= (u32) imm */
+ EMIT(PPC_RAW_SRWI(dst_reg, dst_reg, imm));
+ if (insn_is_zext(&insn[i + 1]))
+ addrs[++i] = ctx->idx * 4;
+ break;
+ case BPF_ALU64 | BPF_RSH | BPF_K: /* dst >>= imm */
+ if (imm != 0)
+ EMIT(PPC_RAW_SRDI(dst_reg, dst_reg, imm));
+ break;
+ case BPF_ALU | BPF_ARSH | BPF_X: /* (s32) dst >>= src */
+ EMIT(PPC_RAW_SRAW(dst_reg, dst_reg, src_reg));
+ goto bpf_alu32_trunc;
+ case BPF_ALU64 | BPF_ARSH | BPF_X: /* (s64) dst >>= src */
+ EMIT(PPC_RAW_SRAD(dst_reg, dst_reg, src_reg));
+ break;
+ case BPF_ALU | BPF_ARSH | BPF_K: /* (s32) dst >>= imm */
+ EMIT(PPC_RAW_SRAWI(dst_reg, dst_reg, imm));
+ goto bpf_alu32_trunc;
+ case BPF_ALU64 | BPF_ARSH | BPF_K: /* (s64) dst >>= imm */
+ if (imm != 0)
+ EMIT(PPC_RAW_SRADI(dst_reg, dst_reg, imm));
+ break;
+
+ /*
+ * MOV
+ */
+ case BPF_ALU | BPF_MOV | BPF_X: /* (u32) dst = src */
+ case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */
+
+ if (insn_is_cast_user(&insn[i])) {
+ EMIT(PPC_RAW_RLDICL_DOT(tmp1_reg, src_reg, 0, 32));
+ PPC_LI64(dst_reg, (ctx->user_vm_start & 0xffffffff00000000UL));
+ PPC_BCC_SHORT(COND_EQ, (ctx->idx + 2) * 4);
+ EMIT(PPC_RAW_OR(tmp1_reg, dst_reg, tmp1_reg));
+ EMIT(PPC_RAW_MR(dst_reg, tmp1_reg));
+ break;
+ }
+
+ if (imm == 1) {
+ /* special mov32 for zext */
+ EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 0, 31));
+ break;
+ } else if (off == 8) {
+ EMIT(PPC_RAW_EXTSB(dst_reg, src_reg));
+ } else if (off == 16) {
+ EMIT(PPC_RAW_EXTSH(dst_reg, src_reg));
+ } else if (off == 32) {
+ EMIT(PPC_RAW_EXTSW(dst_reg, src_reg));
+ } else if (dst_reg != src_reg)
+ EMIT(PPC_RAW_MR(dst_reg, src_reg));
+ goto bpf_alu32_trunc;
+ case BPF_ALU | BPF_MOV | BPF_K: /* (u32) dst = imm */
+ case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = (s64) imm */
+ PPC_LI32(dst_reg, imm);
+ if (imm < 0)
+ goto bpf_alu32_trunc;
+ else if (insn_is_zext(&insn[i + 1]))
+ addrs[++i] = ctx->idx * 4;
+ break;
+
+bpf_alu32_trunc:
+ /* Truncate to 32-bits */
+ if (BPF_CLASS(code) == BPF_ALU && !fp->aux->verifier_zext)
+ EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 0, 31));
+ break;
+
+ /*
+ * BPF_FROM_BE/LE
+ */
+ case BPF_ALU | BPF_END | BPF_FROM_LE:
+ case BPF_ALU | BPF_END | BPF_FROM_BE:
+ case BPF_ALU64 | BPF_END | BPF_FROM_LE:
+#ifdef __BIG_ENDIAN__
+ if (BPF_SRC(code) == BPF_FROM_BE)
+ goto emit_clear;
+#else /* !__BIG_ENDIAN__ */
+ if (BPF_CLASS(code) == BPF_ALU && BPF_SRC(code) == BPF_FROM_LE)
+ goto emit_clear;
+#endif
+ switch (imm) {
+ case 16:
+ /* Rotate 8 bits left & mask with 0x0000ff00 */
+ EMIT(PPC_RAW_RLWINM(tmp1_reg, dst_reg, 8, 16, 23));
+ /* Rotate 8 bits right & insert LSB to reg */
+ EMIT(PPC_RAW_RLWIMI(tmp1_reg, dst_reg, 24, 24, 31));
+ /* Move result back to dst_reg */
+ EMIT(PPC_RAW_MR(dst_reg, tmp1_reg));
+ break;
+ case 32:
+ /*
+ * Rotate word left by 8 bits:
+ * 2 bytes are already in their final position
+ * -- byte 2 and 4 (of bytes 1, 2, 3 and 4)
+ */
+ EMIT(PPC_RAW_RLWINM(tmp1_reg, dst_reg, 8, 0, 31));
+ /* Rotate 24 bits and insert byte 1 */
+ EMIT(PPC_RAW_RLWIMI(tmp1_reg, dst_reg, 24, 0, 7));
+ /* Rotate 24 bits and insert byte 3 */
+ EMIT(PPC_RAW_RLWIMI(tmp1_reg, dst_reg, 24, 16, 23));
+ EMIT(PPC_RAW_MR(dst_reg, tmp1_reg));
+ break;
+ case 64:
+ /* Store the value to stack and then use byte-reverse loads */
+ EMIT(PPC_RAW_STD(dst_reg, _R1, bpf_jit_stack_local(ctx)));
+ EMIT(PPC_RAW_ADDI(tmp1_reg, _R1, bpf_jit_stack_local(ctx)));
+ if (cpu_has_feature(CPU_FTR_ARCH_206)) {
+ EMIT(PPC_RAW_LDBRX(dst_reg, 0, tmp1_reg));
+ } else {
+ EMIT(PPC_RAW_LWBRX(dst_reg, 0, tmp1_reg));
+ if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN))
+ EMIT(PPC_RAW_SLDI(dst_reg, dst_reg, 32));
+ EMIT(PPC_RAW_LI(tmp2_reg, 4));
+ EMIT(PPC_RAW_LWBRX(tmp2_reg, tmp2_reg, tmp1_reg));
+ if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
+ EMIT(PPC_RAW_SLDI(tmp2_reg, tmp2_reg, 32));
+ EMIT(PPC_RAW_OR(dst_reg, dst_reg, tmp2_reg));
+ }
+ break;
+ }
+ break;
+
+emit_clear:
+ switch (imm) {
+ case 16:
+ /* zero-extend 16 bits into 64 bits */
+ EMIT(PPC_RAW_RLDICL(dst_reg, dst_reg, 0, 48));
+ if (insn_is_zext(&insn[i + 1]))
+ addrs[++i] = ctx->idx * 4;
+ break;
+ case 32:
+ if (!fp->aux->verifier_zext)
+ /* zero-extend 32 bits into 64 bits */
+ EMIT(PPC_RAW_RLDICL(dst_reg, dst_reg, 0, 32));
+ break;
+ case 64:
+ /* nop */
+ break;
+ }
+ break;
+
+ /*
+ * BPF_ST NOSPEC (speculation barrier)
+ *
+ * The following must act as a barrier against both Spectre v1
+ * and v4 if we requested both mitigations. Therefore, also emit
+ * 'isync; sync' on E500 or 'ori31' on BOOK3S_64 in addition to
+ * the insns needed for a Spectre v4 barrier.
+ *
+ * If we requested only !bypass_spec_v1 OR only !bypass_spec_v4,
+ * we can skip the respective other barrier type as an
+ * optimization.
+ */
+ case BPF_ST | BPF_NOSPEC:
+ sync_emitted = false;
+ ori31_emitted = false;
+ if (IS_ENABLED(CONFIG_PPC_E500) &&
+ !bpf_jit_bypass_spec_v1()) {
+ EMIT(PPC_RAW_ISYNC());
+ EMIT(PPC_RAW_SYNC());
+ sync_emitted = true;
+ }
+ if (!bpf_jit_bypass_spec_v4()) {
+ switch (stf_barrier) {
+ case STF_BARRIER_EIEIO:
+ EMIT(PPC_RAW_EIEIO() | 0x02000000);
+ break;
+ case STF_BARRIER_SYNC_ORI:
+ if (!sync_emitted)
+ EMIT(PPC_RAW_SYNC());
+ EMIT(PPC_RAW_LD(tmp1_reg, _R13, 0));
+ EMIT(PPC_RAW_ORI(_R31, _R31, 0));
+ ori31_emitted = true;
+ break;
+ case STF_BARRIER_FALLBACK:
+ ctx->seen |= SEEN_FUNC;
+ PPC_LI64(_R12, dereference_kernel_function_descriptor(bpf_stf_barrier));
+ EMIT(PPC_RAW_MTCTR(_R12));
+ EMIT(PPC_RAW_BCTRL());
+ break;
+ case STF_BARRIER_NONE:
+ break;
+ }
+ }
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) &&
+ !bpf_jit_bypass_spec_v1() &&
+ !ori31_emitted)
+ EMIT(PPC_RAW_ORI(_R31, _R31, 0));
+ break;
+
+ /*
+ * BPF_ST(X)
+ */
+ case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */
+ case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */
+ if (BPF_CLASS(code) == BPF_ST) {
+ EMIT(PPC_RAW_LI(tmp1_reg, imm));
+ src_reg = tmp1_reg;
+ }
+ EMIT(PPC_RAW_STB(src_reg, dst_reg, off));
+ break;
+ case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */
+ case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */
+ if (BPF_CLASS(code) == BPF_ST) {
+ EMIT(PPC_RAW_LI(tmp1_reg, imm));
+ src_reg = tmp1_reg;
+ }
+ EMIT(PPC_RAW_STH(src_reg, dst_reg, off));
+ break;
+ case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */
+ case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */
+ if (BPF_CLASS(code) == BPF_ST) {
+ PPC_LI32(tmp1_reg, imm);
+ src_reg = tmp1_reg;
+ }
+ EMIT(PPC_RAW_STW(src_reg, dst_reg, off));
+ break;
+ case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */
+ case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */
+ if (BPF_CLASS(code) == BPF_ST) {
+ PPC_LI32(tmp1_reg, imm);
+ src_reg = tmp1_reg;
+ }
+ if (off % 4) {
+ EMIT(PPC_RAW_LI(tmp2_reg, off));
+ EMIT(PPC_RAW_STDX(src_reg, dst_reg, tmp2_reg));
+ } else {
+ EMIT(PPC_RAW_STD(src_reg, dst_reg, off));
+ }
+ break;
+
+ case BPF_STX | BPF_PROBE_MEM32 | BPF_B:
+ case BPF_STX | BPF_PROBE_MEM32 | BPF_H:
+ case BPF_STX | BPF_PROBE_MEM32 | BPF_W:
+ case BPF_STX | BPF_PROBE_MEM32 | BPF_DW:
+
+ EMIT(PPC_RAW_ADD(tmp1_reg, dst_reg, bpf_to_ppc(ARENA_VM_START)));
+
+ ret = bpf_jit_emit_probe_mem_store(ctx, src_reg, off, code, image);
+ if (ret)
+ return ret;
+
+ ret = bpf_add_extable_entry(fp, image, fimage, pass, ctx,
+ ctx->idx - 1, 4, -1, code);
+ if (ret)
+ return ret;
+
+ break;
+
+ case BPF_ST | BPF_PROBE_MEM32 | BPF_B:
+ case BPF_ST | BPF_PROBE_MEM32 | BPF_H:
+ case BPF_ST | BPF_PROBE_MEM32 | BPF_W:
+ case BPF_ST | BPF_PROBE_MEM32 | BPF_DW:
+
+ EMIT(PPC_RAW_ADD(tmp1_reg, dst_reg, bpf_to_ppc(ARENA_VM_START)));
+
+ if (BPF_SIZE(code) == BPF_W || BPF_SIZE(code) == BPF_DW) {
+ PPC_LI32(tmp2_reg, imm);
+ src_reg = tmp2_reg;
+ } else {
+ EMIT(PPC_RAW_LI(tmp2_reg, imm));
+ src_reg = tmp2_reg;
+ }
+
+ ret = bpf_jit_emit_probe_mem_store(ctx, src_reg, off, code, image);
+ if (ret)
+ return ret;
+
+ ret = bpf_add_extable_entry(fp, image, fimage, pass, ctx,
+ ctx->idx - 1, 4, -1, code);
+ if (ret)
+ return ret;
+
+ break;
+
+ /*
+ * BPF_STX PROBE_ATOMIC (arena atomic ops)
+ */
+ case BPF_STX | BPF_PROBE_ATOMIC | BPF_W:
+ case BPF_STX | BPF_PROBE_ATOMIC | BPF_DW:
+ EMIT(PPC_RAW_ADD(dst_reg, dst_reg, bpf_to_ppc(ARENA_VM_START)));
+ ret = bpf_jit_emit_atomic_ops(image, ctx, &insn[i],
+ &jmp_off, &tmp_idx, &addrs[i + 1]);
+ if (ret) {
+ if (ret == -EOPNOTSUPP) {
+ pr_err_ratelimited(
+ "eBPF filter atomic op code %02x (@%d) unsupported\n",
+ code, i);
+ }
+ return ret;
+ }
+ /* LDARX/LWARX should land here on exception. */
+ ret = bpf_add_extable_entry(fp, image, fimage, pass, ctx,
+ tmp_idx, jmp_off, dst_reg, code);
+ if (ret)
+ return ret;
+
+ /* Retrieve the dst_reg */
+ EMIT(PPC_RAW_SUB(dst_reg, dst_reg, bpf_to_ppc(ARENA_VM_START)));
+ break;
+
+ /*
+ * BPF_STX ATOMIC (atomic ops)
+ */
+ case BPF_STX | BPF_ATOMIC | BPF_B:
+ case BPF_STX | BPF_ATOMIC | BPF_H:
+ case BPF_STX | BPF_ATOMIC | BPF_W:
+ case BPF_STX | BPF_ATOMIC | BPF_DW:
+ if (bpf_atomic_is_load_store(&insn[i])) {
+ ret = emit_atomic_ld_st(insn[i], ctx, image);
+ if (ret)
+ return ret;
+
+ if (size != BPF_DW && insn_is_zext(&insn[i + 1]))
+ addrs[++i] = ctx->idx * 4;
+ break;
+ } else if (size == BPF_B || size == BPF_H) {
+ pr_err_ratelimited(
+ "eBPF filter atomic op code %02x (@%d) unsupported\n",
+ code, i);
+ return -EOPNOTSUPP;
+ }
+
+ ret = bpf_jit_emit_atomic_ops(image, ctx, &insn[i],
+ &jmp_off, &tmp_idx, &addrs[i + 1]);
+ if (ret) {
+ if (ret == -EOPNOTSUPP) {
+ pr_err_ratelimited(
+ "eBPF filter atomic op code %02x (@%d) unsupported\n",
+ code, i);
+ }
+ return ret;
+ }
+ break;
+
+ /*
+ * BPF_LDX
+ */
+ /* dst = *(u8 *)(ul) (src + off) */
+ case BPF_LDX | BPF_MEM | BPF_B:
+ case BPF_LDX | BPF_MEMSX | BPF_B:
+ case BPF_LDX | BPF_PROBE_MEM | BPF_B:
+ case BPF_LDX | BPF_PROBE_MEMSX | BPF_B:
+ /* dst = *(u16 *)(ul) (src + off) */
+ case BPF_LDX | BPF_MEM | BPF_H:
+ case BPF_LDX | BPF_MEMSX | BPF_H:
+ case BPF_LDX | BPF_PROBE_MEM | BPF_H:
+ case BPF_LDX | BPF_PROBE_MEMSX | BPF_H:
+ /* dst = *(u32 *)(ul) (src + off) */
+ case BPF_LDX | BPF_MEM | BPF_W:
+ case BPF_LDX | BPF_MEMSX | BPF_W:
+ case BPF_LDX | BPF_PROBE_MEM | BPF_W:
+ case BPF_LDX | BPF_PROBE_MEMSX | BPF_W:
+ /* dst = *(u64 *)(ul) (src + off) */
+ case BPF_LDX | BPF_MEM | BPF_DW:
+ case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
+ /*
+ * As PTR_TO_BTF_ID that uses BPF_PROBE_MEM mode could either be a valid
+ * kernel pointer or NULL but not a userspace address, execute BPF_PROBE_MEM
+ * load only if addr is kernel address (see is_kernel_addr()), otherwise
+ * set dst_reg=0 and move on.
+ */
+ if (BPF_MODE(code) == BPF_PROBE_MEM || BPF_MODE(code) == BPF_PROBE_MEMSX) {
+ EMIT(PPC_RAW_ADDI(tmp1_reg, src_reg, off));
+ if (IS_ENABLED(CONFIG_PPC_BOOK3E_64))
+ PPC_LI64(tmp2_reg, 0x8000000000000000ul);
+ else /* BOOK3S_64 */
+ PPC_LI64(tmp2_reg, PAGE_OFFSET);
+ EMIT(PPC_RAW_CMPLD(tmp1_reg, tmp2_reg));
+ PPC_BCC_SHORT(COND_GT, (ctx->idx + 3) * 4);
+ EMIT(PPC_RAW_LI(dst_reg, 0));
+ /*
+ * Check if 'off' is word aligned for BPF_DW, because
+ * we might generate two instructions.
+ */
+ if ((BPF_SIZE(code) == BPF_DW && (off & 3)) ||
+ (BPF_SIZE(code) == BPF_B &&
+ BPF_MODE(code) == BPF_PROBE_MEMSX) ||
+ (BPF_SIZE(code) == BPF_B && BPF_MODE(code) == BPF_MEMSX))
+ PPC_JMP((ctx->idx + 3) * 4);
+ else
+ PPC_JMP((ctx->idx + 2) * 4);
+ }
+
+ if (BPF_MODE(code) == BPF_MEMSX || BPF_MODE(code) == BPF_PROBE_MEMSX) {
+ switch (size) {
+ case BPF_B:
+ EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
+ EMIT(PPC_RAW_EXTSB(dst_reg, dst_reg));
+ break;
+ case BPF_H:
+ EMIT(PPC_RAW_LHA(dst_reg, src_reg, off));
+ break;
+ case BPF_W:
+ EMIT(PPC_RAW_LWA(dst_reg, src_reg, off));
+ break;
+ }
+ } else {
+ switch (size) {
+ case BPF_B:
+ EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
+ break;
+ case BPF_H:
+ EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off));
+ break;
+ case BPF_W:
+ EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off));
+ break;
+ case BPF_DW:
+ if (off % 4) {
+ EMIT(PPC_RAW_LI(tmp1_reg, off));
+ EMIT(PPC_RAW_LDX(dst_reg, src_reg, tmp1_reg));
+ } else {
+ EMIT(PPC_RAW_LD(dst_reg, src_reg, off));
+ }
+ break;
+ }
+ }
+
+ if (size != BPF_DW && insn_is_zext(&insn[i + 1]))
+ addrs[++i] = ctx->idx * 4;
+
+ if (BPF_MODE(code) == BPF_PROBE_MEM) {
+ ret = bpf_add_extable_entry(fp, image, fimage, pass, ctx,
+ ctx->idx - 1, 4, dst_reg, code);
+ if (ret)
+ return ret;
+ }
+ break;
+
+ /* dst = *(u64 *)(ul) (src + ARENA_VM_START + off) */
+ case BPF_LDX | BPF_PROBE_MEM32 | BPF_B:
+ case BPF_LDX | BPF_PROBE_MEM32 | BPF_H:
+ case BPF_LDX | BPF_PROBE_MEM32 | BPF_W:
+ case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW:
+
+ EMIT(PPC_RAW_ADD(tmp1_reg, src_reg, bpf_to_ppc(ARENA_VM_START)));
+
+ switch (size) {
+ case BPF_B:
+ EMIT(PPC_RAW_LBZ(dst_reg, tmp1_reg, off));
+ break;
+ case BPF_H:
+ EMIT(PPC_RAW_LHZ(dst_reg, tmp1_reg, off));
+ break;
+ case BPF_W:
+ EMIT(PPC_RAW_LWZ(dst_reg, tmp1_reg, off));
+ break;
+ case BPF_DW:
+ if (off % 4) {
+ EMIT(PPC_RAW_LI(tmp2_reg, off));
+ EMIT(PPC_RAW_LDX(dst_reg, tmp1_reg, tmp2_reg));
+ } else {
+ EMIT(PPC_RAW_LD(dst_reg, tmp1_reg, off));
+ }
+ break;
+ }
+
+ if (size != BPF_DW && insn_is_zext(&insn[i + 1]))
+ addrs[++i] = ctx->idx * 4;
+
+ ret = bpf_add_extable_entry(fp, image, fimage, pass, ctx,
+ ctx->idx - 1, 4, dst_reg, code);
+ if (ret)
+ return ret;
+ break;
+
+ /*
+ * Doubleword load
+ * 16 byte instruction that uses two 'struct bpf_insn'
+ */
+ case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */
+ imm64 = ((u64)(u32) insn[i].imm) |
+ (((u64)(u32) insn[i+1].imm) << 32);
+ PPC_LI64(dst_reg, imm64);
+ /* Adjust for two bpf instructions */
+ addrs[++i] = ctx->idx * 4;
+ break;
+
+ /*
+ * Return/Exit
+ */
+ case BPF_JMP | BPF_EXIT:
+ /*
+ * If this isn't the very last instruction, branch to
+ * the epilogue. If we _are_ the last instruction,
+ * we'll just fall through to the epilogue.
+ */
+ if (i != flen - 1) {
+ ret = bpf_jit_emit_exit_insn(image, ctx, tmp1_reg, exit_addr);
+ if (ret)
+ return ret;
+ }
+ /* else fall through to the epilogue */
+ break;
+
+ /*
+ * Call kernel helper or bpf function
+ */
+ case BPF_JMP | BPF_CALL:
+ ctx->seen |= SEEN_FUNC;
+
+ ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass,
+ &func_addr, &func_addr_fixed);
+ if (ret < 0)
+ return ret;
+
+ ret = bpf_jit_emit_func_call_rel(image, fimage, ctx, func_addr);
+ if (ret)
+ return ret;
+
+ /* move return value from r3 to BPF_REG_0 */
+ EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_0), _R3));
+ break;
+
+ /*
+ * Jumps and branches
+ */
+ case BPF_JMP | BPF_JA:
+ PPC_JMP(addrs[i + 1 + off]);
+ break;
+ case BPF_JMP32 | BPF_JA:
+ PPC_JMP(addrs[i + 1 + imm]);
+ break;
+
+ case BPF_JMP | BPF_JGT | BPF_K:
+ case BPF_JMP | BPF_JGT | BPF_X:
+ case BPF_JMP | BPF_JSGT | BPF_K:
+ case BPF_JMP | BPF_JSGT | BPF_X:
+ case BPF_JMP32 | BPF_JGT | BPF_K:
+ case BPF_JMP32 | BPF_JGT | BPF_X:
+ case BPF_JMP32 | BPF_JSGT | BPF_K:
+ case BPF_JMP32 | BPF_JSGT | BPF_X:
+ true_cond = COND_GT;
+ goto cond_branch;
+ case BPF_JMP | BPF_JLT | BPF_K:
+ case BPF_JMP | BPF_JLT | BPF_X:
+ case BPF_JMP | BPF_JSLT | BPF_K:
+ case BPF_JMP | BPF_JSLT | BPF_X:
+ case BPF_JMP32 | BPF_JLT | BPF_K:
+ case BPF_JMP32 | BPF_JLT | BPF_X:
+ case BPF_JMP32 | BPF_JSLT | BPF_K:
+ case BPF_JMP32 | BPF_JSLT | BPF_X:
+ true_cond = COND_LT;
+ goto cond_branch;
+ case BPF_JMP | BPF_JGE | BPF_K:
+ case BPF_JMP | BPF_JGE | BPF_X:
+ case BPF_JMP | BPF_JSGE | BPF_K:
+ case BPF_JMP | BPF_JSGE | BPF_X:
+ case BPF_JMP32 | BPF_JGE | BPF_K:
+ case BPF_JMP32 | BPF_JGE | BPF_X:
+ case BPF_JMP32 | BPF_JSGE | BPF_K:
+ case BPF_JMP32 | BPF_JSGE | BPF_X:
+ true_cond = COND_GE;
+ goto cond_branch;
+ case BPF_JMP | BPF_JLE | BPF_K:
+ case BPF_JMP | BPF_JLE | BPF_X:
+ case BPF_JMP | BPF_JSLE | BPF_K:
+ case BPF_JMP | BPF_JSLE | BPF_X:
+ case BPF_JMP32 | BPF_JLE | BPF_K:
+ case BPF_JMP32 | BPF_JLE | BPF_X:
+ case BPF_JMP32 | BPF_JSLE | BPF_K:
+ case BPF_JMP32 | BPF_JSLE | BPF_X:
+ true_cond = COND_LE;
+ goto cond_branch;
+ case BPF_JMP | BPF_JEQ | BPF_K:
+ case BPF_JMP | BPF_JEQ | BPF_X:
+ case BPF_JMP32 | BPF_JEQ | BPF_K:
+ case BPF_JMP32 | BPF_JEQ | BPF_X:
+ true_cond = COND_EQ;
+ goto cond_branch;
+ case BPF_JMP | BPF_JNE | BPF_K:
+ case BPF_JMP | BPF_JNE | BPF_X:
+ case BPF_JMP32 | BPF_JNE | BPF_K:
+ case BPF_JMP32 | BPF_JNE | BPF_X:
+ true_cond = COND_NE;
+ goto cond_branch;
+ case BPF_JMP | BPF_JSET | BPF_K:
+ case BPF_JMP | BPF_JSET | BPF_X:
+ case BPF_JMP32 | BPF_JSET | BPF_K:
+ case BPF_JMP32 | BPF_JSET | BPF_X:
+ true_cond = COND_NE;
+ /* Fall through */
+
+cond_branch:
+ switch (code) {
+ case BPF_JMP | BPF_JGT | BPF_X:
+ case BPF_JMP | BPF_JLT | BPF_X:
+ case BPF_JMP | BPF_JGE | BPF_X:
+ case BPF_JMP | BPF_JLE | BPF_X:
+ case BPF_JMP | BPF_JEQ | BPF_X:
+ case BPF_JMP | BPF_JNE | BPF_X:
+ case BPF_JMP32 | BPF_JGT | BPF_X:
+ case BPF_JMP32 | BPF_JLT | BPF_X:
+ case BPF_JMP32 | BPF_JGE | BPF_X:
+ case BPF_JMP32 | BPF_JLE | BPF_X:
+ case BPF_JMP32 | BPF_JEQ | BPF_X:
+ case BPF_JMP32 | BPF_JNE | BPF_X:
+ /* unsigned comparison */
+ if (BPF_CLASS(code) == BPF_JMP32)
+ EMIT(PPC_RAW_CMPLW(dst_reg, src_reg));
+ else
+ EMIT(PPC_RAW_CMPLD(dst_reg, src_reg));
+ break;
+ case BPF_JMP | BPF_JSGT | BPF_X:
+ case BPF_JMP | BPF_JSLT | BPF_X:
+ case BPF_JMP | BPF_JSGE | BPF_X:
+ case BPF_JMP | BPF_JSLE | BPF_X:
+ case BPF_JMP32 | BPF_JSGT | BPF_X:
+ case BPF_JMP32 | BPF_JSLT | BPF_X:
+ case BPF_JMP32 | BPF_JSGE | BPF_X:
+ case BPF_JMP32 | BPF_JSLE | BPF_X:
+ /* signed comparison */
+ if (BPF_CLASS(code) == BPF_JMP32)
+ EMIT(PPC_RAW_CMPW(dst_reg, src_reg));
+ else
+ EMIT(PPC_RAW_CMPD(dst_reg, src_reg));
+ break;
+ case BPF_JMP | BPF_JSET | BPF_X:
+ case BPF_JMP32 | BPF_JSET | BPF_X:
+ if (BPF_CLASS(code) == BPF_JMP) {
+ EMIT(PPC_RAW_AND_DOT(tmp1_reg, dst_reg, src_reg));
+ } else {
+ EMIT(PPC_RAW_AND(tmp1_reg, dst_reg, src_reg));
+ EMIT(PPC_RAW_RLWINM_DOT(tmp1_reg, tmp1_reg, 0, 0, 31));
+ }
+ break;
+ case BPF_JMP | BPF_JNE | BPF_K:
+ case BPF_JMP | BPF_JEQ | BPF_K:
+ case BPF_JMP | BPF_JGT | BPF_K:
+ case BPF_JMP | BPF_JLT | BPF_K:
+ case BPF_JMP | BPF_JGE | BPF_K:
+ case BPF_JMP | BPF_JLE | BPF_K:
+ case BPF_JMP32 | BPF_JNE | BPF_K:
+ case BPF_JMP32 | BPF_JEQ | BPF_K:
+ case BPF_JMP32 | BPF_JGT | BPF_K:
+ case BPF_JMP32 | BPF_JLT | BPF_K:
+ case BPF_JMP32 | BPF_JGE | BPF_K:
+ case BPF_JMP32 | BPF_JLE | BPF_K:
+ {
+ bool is_jmp32 = BPF_CLASS(code) == BPF_JMP32;
+
+ /*
+ * Need sign-extended load, so only positive
+ * values can be used as imm in cmpldi
+ */
+ if (imm >= 0 && imm < 32768) {
+ if (is_jmp32)
+ EMIT(PPC_RAW_CMPLWI(dst_reg, imm));
+ else
+ EMIT(PPC_RAW_CMPLDI(dst_reg, imm));
+ } else {
+ /* sign-extending load */
+ PPC_LI32(tmp1_reg, imm);
+ /* ... but unsigned comparison */
+ if (is_jmp32)
+ EMIT(PPC_RAW_CMPLW(dst_reg, tmp1_reg));
+ else
+ EMIT(PPC_RAW_CMPLD(dst_reg, tmp1_reg));
+ }
+ break;
+ }
+ case BPF_JMP | BPF_JSGT | BPF_K:
+ case BPF_JMP | BPF_JSLT | BPF_K:
+ case BPF_JMP | BPF_JSGE | BPF_K:
+ case BPF_JMP | BPF_JSLE | BPF_K:
+ case BPF_JMP32 | BPF_JSGT | BPF_K:
+ case BPF_JMP32 | BPF_JSLT | BPF_K:
+ case BPF_JMP32 | BPF_JSGE | BPF_K:
+ case BPF_JMP32 | BPF_JSLE | BPF_K:
+ {
+ bool is_jmp32 = BPF_CLASS(code) == BPF_JMP32;
+
+ /*
+ * signed comparison, so any 16-bit value
+ * can be used in cmpdi
+ */
+ if (imm >= -32768 && imm < 32768) {
+ if (is_jmp32)
+ EMIT(PPC_RAW_CMPWI(dst_reg, imm));
+ else
+ EMIT(PPC_RAW_CMPDI(dst_reg, imm));
+ } else {
+ PPC_LI32(tmp1_reg, imm);
+ if (is_jmp32)
+ EMIT(PPC_RAW_CMPW(dst_reg, tmp1_reg));
+ else
+ EMIT(PPC_RAW_CMPD(dst_reg, tmp1_reg));
+ }
+ break;
+ }
+ case BPF_JMP | BPF_JSET | BPF_K:
+ case BPF_JMP32 | BPF_JSET | BPF_K:
+ /* andi does not sign-extend the immediate */
+ if (imm >= 0 && imm < 32768)
+ /* PPC_ANDI is _only/always_ dot-form */
+ EMIT(PPC_RAW_ANDI(tmp1_reg, dst_reg, imm));
+ else {
+ PPC_LI32(tmp1_reg, imm);
+ if (BPF_CLASS(code) == BPF_JMP) {
+ EMIT(PPC_RAW_AND_DOT(tmp1_reg, dst_reg,
+ tmp1_reg));
+ } else {
+ EMIT(PPC_RAW_AND(tmp1_reg, dst_reg, tmp1_reg));
+ EMIT(PPC_RAW_RLWINM_DOT(tmp1_reg, tmp1_reg,
+ 0, 0, 31));
+ }
+ }
+ break;
+ }
+ PPC_BCC(true_cond, addrs[i + 1 + off]);
+ break;
+
+ /*
+ * Tail call
+ */
+ case BPF_JMP | BPF_TAIL_CALL:
+ ctx->seen |= SEEN_TAILCALL;
+ ret = bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]);
+ if (ret < 0)
+ return ret;
+ break;
+
+ default:
+ /*
+ * The filter contains something cruel & unusual.
+ * We don't handle it, but also there shouldn't be
+ * anything missing from our list.
+ */
+ pr_err_ratelimited("eBPF filter opcode %04x (@%d) unsupported\n",
+ code, i);
+ return -ENOTSUPP;
+ }
+ }
+
+ /* Set end-of-body-code address for exit. */
+ addrs[i] = ctx->idx * 4;
+
+ return 0;
+}
diff --git a/arch/powerpc/oprofile/Makefile b/arch/powerpc/oprofile/Makefile
deleted file mode 100644
index cedbbeced632..000000000000
--- a/arch/powerpc/oprofile/Makefile
+++ /dev/null
@@ -1,19 +0,0 @@
-subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
-
-ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
-
-obj-$(CONFIG_OPROFILE) += oprofile.o
-
-DRIVER_OBJS := $(addprefix ../../../drivers/oprofile/, \
- oprof.o cpu_buffer.o buffer_sync.o \
- event_buffer.o oprofile_files.o \
- oprofilefs.o oprofile_stats.o \
- timer_int.o )
-
-oprofile-y := $(DRIVER_OBJS) common.o backtrace.o
-oprofile-$(CONFIG_OPROFILE_CELL) += op_model_cell.o \
- cell/spu_profiler.o cell/vma_map.o \
- cell/spu_task_sync.o
-oprofile-$(CONFIG_PPC_BOOK3S_64) += op_model_power4.o op_model_pa6t.o
-oprofile-$(CONFIG_FSL_EMB_PERFMON) += op_model_fsl_emb.o
-oprofile-$(CONFIG_6xx) += op_model_7450.o
diff --git a/arch/powerpc/oprofile/backtrace.c b/arch/powerpc/oprofile/backtrace.c
deleted file mode 100644
index f75301f2c85f..000000000000
--- a/arch/powerpc/oprofile/backtrace.c
+++ /dev/null
@@ -1,127 +0,0 @@
-/**
- * Copyright (C) 2005 Brian Rogan <bcr6@cornell.edu>, IBM
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
-**/
-
-#include <linux/oprofile.h>
-#include <linux/sched.h>
-#include <asm/processor.h>
-#include <asm/uaccess.h>
-#include <asm/compat.h>
-
-#define STACK_SP(STACK) *(STACK)
-
-#define STACK_LR64(STACK) *((unsigned long *)(STACK) + 2)
-#define STACK_LR32(STACK) *((unsigned int *)(STACK) + 1)
-
-#ifdef CONFIG_PPC64
-#define STACK_LR(STACK) STACK_LR64(STACK)
-#else
-#define STACK_LR(STACK) STACK_LR32(STACK)
-#endif
-
-static unsigned int user_getsp32(unsigned int sp, int is_first)
-{
- unsigned int stack_frame[2];
- void __user *p = compat_ptr(sp);
-
- if (!access_ok(VERIFY_READ, p, sizeof(stack_frame)))
- return 0;
-
- /*
- * The most likely reason for this is that we returned -EFAULT,
- * which means that we've done all that we can do from
- * interrupt context.
- */
- if (__copy_from_user_inatomic(stack_frame, p, sizeof(stack_frame)))
- return 0;
-
- if (!is_first)
- oprofile_add_trace(STACK_LR32(stack_frame));
-
- /*
- * We do not enforce increasing stack addresses here because
- * we may transition to a different stack, eg a signal handler.
- */
- return STACK_SP(stack_frame);
-}
-
-#ifdef CONFIG_PPC64
-static unsigned long user_getsp64(unsigned long sp, int is_first)
-{
- unsigned long stack_frame[3];
-
- if (!access_ok(VERIFY_READ, (void __user *)sp, sizeof(stack_frame)))
- return 0;
-
- if (__copy_from_user_inatomic(stack_frame, (void __user *)sp,
- sizeof(stack_frame)))
- return 0;
-
- if (!is_first)
- oprofile_add_trace(STACK_LR64(stack_frame));
-
- return STACK_SP(stack_frame);
-}
-#endif
-
-static unsigned long kernel_getsp(unsigned long sp, int is_first)
-{
- unsigned long *stack_frame = (unsigned long *)sp;
-
- if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD))
- return 0;
-
- if (!is_first)
- oprofile_add_trace(STACK_LR(stack_frame));
-
- /*
- * We do not enforce increasing stack addresses here because
- * we might be transitioning from an interrupt stack to a kernel
- * stack. validate_sp() is designed to understand this, so just
- * use it.
- */
- return STACK_SP(stack_frame);
-}
-
-void op_powerpc_backtrace(struct pt_regs * const regs, unsigned int depth)
-{
- unsigned long sp = regs->gpr[1];
- int first_frame = 1;
-
- /* We ditch the top stackframe so need to loop through an extra time */
- depth += 1;
-
- if (!user_mode(regs)) {
- while (depth--) {
- sp = kernel_getsp(sp, first_frame);
- if (!sp)
- break;
- first_frame = 0;
- }
- } else {
-#ifdef CONFIG_PPC64
- if (!is_32bit_task()) {
- while (depth--) {
- sp = user_getsp64(sp, first_frame);
- if (!sp)
- break;
- first_frame = 0;
- }
-
- return;
- }
-#endif
-
- while (depth--) {
- sp = user_getsp32(sp, first_frame);
- if (!sp)
- break;
- first_frame = 0;
- }
- }
-}
diff --git a/arch/powerpc/oprofile/cell/pr_util.h b/arch/powerpc/oprofile/cell/pr_util.h
deleted file mode 100644
index 964b93974d89..000000000000
--- a/arch/powerpc/oprofile/cell/pr_util.h
+++ /dev/null
@@ -1,114 +0,0 @@
- /*
- * Cell Broadband Engine OProfile Support
- *
- * (C) Copyright IBM Corporation 2006
- *
- * Author: Maynard Johnson <maynardj@us.ibm.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#ifndef PR_UTIL_H
-#define PR_UTIL_H
-
-#include <linux/cpumask.h>
-#include <linux/oprofile.h>
-#include <asm/cell-pmu.h>
-#include <asm/cell-regs.h>
-#include <asm/spu.h>
-
-/* Defines used for sync_start */
-#define SKIP_GENERIC_SYNC 0
-#define SYNC_START_ERROR -1
-#define DO_GENERIC_SYNC 1
-#define SPUS_PER_NODE 8
-#define DEFAULT_TIMER_EXPIRE (HZ / 10)
-
-extern struct delayed_work spu_work;
-extern int spu_prof_running;
-
-#define TRACE_ARRAY_SIZE 1024
-
-extern spinlock_t oprof_spu_smpl_arry_lck;
-
-struct spu_overlay_info { /* map of sections within an SPU overlay */
- unsigned int vma; /* SPU virtual memory address from elf */
- unsigned int size; /* size of section from elf */
- unsigned int offset; /* offset of section into elf file */
- unsigned int buf;
-};
-
-struct vma_to_fileoffset_map { /* map of sections within an SPU program */
- struct vma_to_fileoffset_map *next; /* list pointer */
- unsigned int vma; /* SPU virtual memory address from elf */
- unsigned int size; /* size of section from elf */
- unsigned int offset; /* offset of section into elf file */
- unsigned int guard_ptr;
- unsigned int guard_val;
- /*
- * The guard pointer is an entry in the _ovly_buf_table,
- * computed using ovly.buf as the index into the table. Since
- * ovly.buf values begin at '1' to reference the first (or 0th)
- * entry in the _ovly_buf_table, the computation subtracts 1
- * from ovly.buf.
- * The guard value is stored in the _ovly_buf_table entry and
- * is an index (starting at 1) back to the _ovly_table entry
- * that is pointing at this _ovly_buf_table entry. So, for
- * example, for an overlay scenario with one overlay segment
- * and two overlay sections:
- * - Section 1 points to the first entry of the
- * _ovly_buf_table, which contains a guard value
- * of '1', referencing the first (index=0) entry of
- * _ovly_table.
- * - Section 2 points to the second entry of the
- * _ovly_buf_table, which contains a guard value
- * of '2', referencing the second (index=1) entry of
- * _ovly_table.
- */
-
-};
-
-struct spu_buffer {
- int last_guard_val;
- int ctx_sw_seen;
- unsigned long *buff;
- unsigned int head, tail;
-};
-
-
-/* The three functions below are for maintaining and accessing
- * the vma-to-fileoffset map.
- */
-struct vma_to_fileoffset_map *create_vma_map(const struct spu *spu,
- unsigned long objectid);
-unsigned int vma_map_lookup(struct vma_to_fileoffset_map *map,
- unsigned int vma, const struct spu *aSpu,
- int *grd_val);
-void vma_map_free(struct vma_to_fileoffset_map *map);
-
-/*
- * Entry point for SPU profiling.
- * cycles_reset is the SPU_CYCLES count value specified by the user.
- */
-int start_spu_profiling_cycles(unsigned int cycles_reset);
-void start_spu_profiling_events(void);
-
-void stop_spu_profiling_cycles(void);
-void stop_spu_profiling_events(void);
-
-/* add the necessary profiling hooks */
-int spu_sync_start(void);
-
-/* remove the hooks */
-int spu_sync_stop(void);
-
-/* Record SPU program counter samples to the oprofile event buffer. */
-void spu_sync_buffer(int spu_num, unsigned int *samples,
- int num_samples);
-
-void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset);
-
-#endif /* PR_UTIL_H */
diff --git a/arch/powerpc/oprofile/cell/spu_profiler.c b/arch/powerpc/oprofile/cell/spu_profiler.c
deleted file mode 100644
index b129d007e7fe..000000000000
--- a/arch/powerpc/oprofile/cell/spu_profiler.c
+++ /dev/null
@@ -1,252 +0,0 @@
-/*
- * Cell Broadband Engine OProfile Support
- *
- * (C) Copyright IBM Corporation 2006
- *
- * Authors: Maynard Johnson <maynardj@us.ibm.com>
- * Carl Love <carll@us.ibm.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/hrtimer.h>
-#include <linux/smp.h>
-#include <linux/slab.h>
-#include <asm/cell-pmu.h>
-#include <asm/time.h>
-#include "pr_util.h"
-
-#define SCALE_SHIFT 14
-
-static u32 *samples;
-
-/* spu_prof_running is a flag used to indicate if spu profiling is enabled
- * or not. It is set by the routines start_spu_profiling_cycles() and
- * start_spu_profiling_events(). The flag is cleared by the routines
- * stop_spu_profiling_cycles() and stop_spu_profiling_events(). These
- * routines are called via global_start() and global_stop() which are called in
- * op_powerpc_start() and op_powerpc_stop(). These routines are called once
- * per system as a result of the user starting/stopping oprofile. Hence, only
- * one CPU per user at a time will be changing the value of spu_prof_running.
- * In general, OProfile does not protect against multiple users trying to run
- * OProfile at a time.
- */
-int spu_prof_running;
-static unsigned int profiling_interval;
-
-#define NUM_SPU_BITS_TRBUF 16
-#define SPUS_PER_TB_ENTRY 4
-
-#define SPU_PC_MASK 0xFFFF
-
-DEFINE_SPINLOCK(oprof_spu_smpl_arry_lck);
-unsigned long oprof_spu_smpl_arry_lck_flags;
-
-void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset)
-{
- unsigned long ns_per_cyc;
-
- if (!freq_khz)
- freq_khz = ppc_proc_freq/1000;
-
- /* To calculate a timeout in nanoseconds, the basic
- * formula is ns = cycles_reset * (NSEC_PER_SEC / cpu frequency).
- * To avoid floating point math, we use the scale math
- * technique as described in linux/jiffies.h. We use
- * a scale factor of SCALE_SHIFT, which provides 4 decimal places
- * of precision. This is close enough for the purpose at hand.
- *
- * The value of the timeout should be small enough that the hw
- * trace buffer will not get more than about 1/3 full for the
- * maximum user specified (the LFSR value) hw sampling frequency.
- * This is to ensure the trace buffer will never fill even if the
- * kernel thread scheduling varies under a heavy system load.
- */
-
- ns_per_cyc = (USEC_PER_SEC << SCALE_SHIFT)/freq_khz;
- profiling_interval = (ns_per_cyc * cycles_reset) >> SCALE_SHIFT;
-
-}
-
-/*
- * Extract SPU PC from trace buffer entry
- */
-static void spu_pc_extract(int cpu, int entry)
-{
- /* the trace buffer is 128 bits */
- u64 trace_buffer[2];
- u64 spu_mask;
- int spu;
-
- spu_mask = SPU_PC_MASK;
-
- /* Each SPU PC is 16 bits; hence, four spus in each of
- * the two 64-bit buffer entries that make up the
- * 128-bit trace_buffer entry. Process two 64-bit values
- * simultaneously.
- * trace[0] SPU PC contents are: 0 1 2 3
- * trace[1] SPU PC contents are: 4 5 6 7
- */
-
- cbe_read_trace_buffer(cpu, trace_buffer);
-
- for (spu = SPUS_PER_TB_ENTRY-1; spu >= 0; spu--) {
- /* spu PC trace entry is upper 16 bits of the
- * 18 bit SPU program counter
- */
- samples[spu * TRACE_ARRAY_SIZE + entry]
- = (spu_mask & trace_buffer[0]) << 2;
- samples[(spu + SPUS_PER_TB_ENTRY) * TRACE_ARRAY_SIZE + entry]
- = (spu_mask & trace_buffer[1]) << 2;
-
- trace_buffer[0] = trace_buffer[0] >> NUM_SPU_BITS_TRBUF;
- trace_buffer[1] = trace_buffer[1] >> NUM_SPU_BITS_TRBUF;
- }
-}
-
-static int cell_spu_pc_collection(int cpu)
-{
- u32 trace_addr;
- int entry;
-
- /* process the collected SPU PC for the node */
-
- entry = 0;
-
- trace_addr = cbe_read_pm(cpu, trace_address);
- while (!(trace_addr & CBE_PM_TRACE_BUF_EMPTY)) {
- /* there is data in the trace buffer to process */
- spu_pc_extract(cpu, entry);
-
- entry++;
-
- if (entry >= TRACE_ARRAY_SIZE)
- /* spu_samples is full */
- break;
-
- trace_addr = cbe_read_pm(cpu, trace_address);
- }
-
- return entry;
-}
-
-
-static enum hrtimer_restart profile_spus(struct hrtimer *timer)
-{
- ktime_t kt;
- int cpu, node, k, num_samples, spu_num;
-
- if (!spu_prof_running)
- goto stop;
-
- for_each_online_cpu(cpu) {
- if (cbe_get_hw_thread_id(cpu))
- continue;
-
- node = cbe_cpu_to_node(cpu);
-
- /* There should only be one kernel thread at a time processing
- * the samples. In the very unlikely case that the processing
- * is taking a very long time and multiple kernel threads are
- * started to process the samples. Make sure only one kernel
- * thread is working on the samples array at a time. The
- * sample array must be loaded and then processed for a given
- * cpu. The sample array is not per cpu.
- */
- spin_lock_irqsave(&oprof_spu_smpl_arry_lck,
- oprof_spu_smpl_arry_lck_flags);
- num_samples = cell_spu_pc_collection(cpu);
-
- if (num_samples == 0) {
- spin_unlock_irqrestore(&oprof_spu_smpl_arry_lck,
- oprof_spu_smpl_arry_lck_flags);
- continue;
- }
-
- for (k = 0; k < SPUS_PER_NODE; k++) {
- spu_num = k + (node * SPUS_PER_NODE);
- spu_sync_buffer(spu_num,
- samples + (k * TRACE_ARRAY_SIZE),
- num_samples);
- }
-
- spin_unlock_irqrestore(&oprof_spu_smpl_arry_lck,
- oprof_spu_smpl_arry_lck_flags);
-
- }
- smp_wmb(); /* insure spu event buffer updates are written */
- /* don't want events intermingled... */
-
- kt = ktime_set(0, profiling_interval);
- if (!spu_prof_running)
- goto stop;
- hrtimer_forward(timer, timer->base->get_time(), kt);
- return HRTIMER_RESTART;
-
- stop:
- printk(KERN_INFO "SPU_PROF: spu-prof timer ending\n");
- return HRTIMER_NORESTART;
-}
-
-static struct hrtimer timer;
-/*
- * Entry point for SPU cycle profiling.
- * NOTE: SPU profiling is done system-wide, not per-CPU.
- *
- * cycles_reset is the count value specified by the user when
- * setting up OProfile to count SPU_CYCLES.
- */
-int start_spu_profiling_cycles(unsigned int cycles_reset)
-{
- ktime_t kt;
-
- pr_debug("timer resolution: %lu\n", TICK_NSEC);
- kt = ktime_set(0, profiling_interval);
- hrtimer_init(&timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- hrtimer_set_expires(&timer, kt);
- timer.function = profile_spus;
-
- /* Allocate arrays for collecting SPU PC samples */
- samples = kzalloc(SPUS_PER_NODE *
- TRACE_ARRAY_SIZE * sizeof(u32), GFP_KERNEL);
-
- if (!samples)
- return -ENOMEM;
-
- spu_prof_running = 1;
- hrtimer_start(&timer, kt, HRTIMER_MODE_REL);
- schedule_delayed_work(&spu_work, DEFAULT_TIMER_EXPIRE);
-
- return 0;
-}
-
-/*
- * Entry point for SPU event profiling.
- * NOTE: SPU profiling is done system-wide, not per-CPU.
- *
- * cycles_reset is the count value specified by the user when
- * setting up OProfile to count SPU_CYCLES.
- */
-void start_spu_profiling_events(void)
-{
- spu_prof_running = 1;
- schedule_delayed_work(&spu_work, DEFAULT_TIMER_EXPIRE);
-
- return;
-}
-
-void stop_spu_profiling_cycles(void)
-{
- spu_prof_running = 0;
- hrtimer_cancel(&timer);
- kfree(samples);
- pr_debug("SPU_PROF: stop_spu_profiling_cycles issued\n");
-}
-
-void stop_spu_profiling_events(void)
-{
- spu_prof_running = 0;
-}
diff --git a/arch/powerpc/oprofile/cell/spu_task_sync.c b/arch/powerpc/oprofile/cell/spu_task_sync.c
deleted file mode 100644
index 28f1af2db1f5..000000000000
--- a/arch/powerpc/oprofile/cell/spu_task_sync.c
+++ /dev/null
@@ -1,660 +0,0 @@
-/*
- * Cell Broadband Engine OProfile Support
- *
- * (C) Copyright IBM Corporation 2006
- *
- * Author: Maynard Johnson <maynardj@us.ibm.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-/* The purpose of this file is to handle SPU event task switching
- * and to record SPU context information into the OProfile
- * event buffer.
- *
- * Additionally, the spu_sync_buffer function is provided as a helper
- * for recoding actual SPU program counter samples to the event buffer.
- */
-#include <linux/dcookies.h>
-#include <linux/kref.h>
-#include <linux/mm.h>
-#include <linux/fs.h>
-#include <linux/module.h>
-#include <linux/notifier.h>
-#include <linux/numa.h>
-#include <linux/oprofile.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include "pr_util.h"
-
-#define RELEASE_ALL 9999
-
-static DEFINE_SPINLOCK(buffer_lock);
-static DEFINE_SPINLOCK(cache_lock);
-static int num_spu_nodes;
-int spu_prof_num_nodes;
-
-struct spu_buffer spu_buff[MAX_NUMNODES * SPUS_PER_NODE];
-struct delayed_work spu_work;
-static unsigned max_spu_buff;
-
-static void spu_buff_add(unsigned long int value, int spu)
-{
- /* spu buff is a circular buffer. Add entries to the
- * head. Head is the index to store the next value.
- * The buffer is full when there is one available entry
- * in the queue, i.e. head and tail can't be equal.
- * That way we can tell the difference between the
- * buffer being full versus empty.
- *
- * ASSUPTION: the buffer_lock is held when this function
- * is called to lock the buffer, head and tail.
- */
- int full = 1;
-
- if (spu_buff[spu].head >= spu_buff[spu].tail) {
- if ((spu_buff[spu].head - spu_buff[spu].tail)
- < (max_spu_buff - 1))
- full = 0;
-
- } else if (spu_buff[spu].tail > spu_buff[spu].head) {
- if ((spu_buff[spu].tail - spu_buff[spu].head)
- > 1)
- full = 0;
- }
-
- if (!full) {
- spu_buff[spu].buff[spu_buff[spu].head] = value;
- spu_buff[spu].head++;
-
- if (spu_buff[spu].head >= max_spu_buff)
- spu_buff[spu].head = 0;
- } else {
- /* From the user's perspective make the SPU buffer
- * size management/overflow look like we are using
- * per cpu buffers. The user uses the same
- * per cpu parameter to adjust the SPU buffer size.
- * Increment the sample_lost_overflow to inform
- * the user the buffer size needs to be increased.
- */
- oprofile_cpu_buffer_inc_smpl_lost();
- }
-}
-
-/* This function copies the per SPU buffers to the
- * OProfile kernel buffer.
- */
-void sync_spu_buff(void)
-{
- int spu;
- unsigned long flags;
- int curr_head;
-
- for (spu = 0; spu < num_spu_nodes; spu++) {
- /* In case there was an issue and the buffer didn't
- * get created skip it.
- */
- if (spu_buff[spu].buff == NULL)
- continue;
-
- /* Hold the lock to make sure the head/tail
- * doesn't change while spu_buff_add() is
- * deciding if the buffer is full or not.
- * Being a little paranoid.
- */
- spin_lock_irqsave(&buffer_lock, flags);
- curr_head = spu_buff[spu].head;
- spin_unlock_irqrestore(&buffer_lock, flags);
-
- /* Transfer the current contents to the kernel buffer.
- * data can still be added to the head of the buffer.
- */
- oprofile_put_buff(spu_buff[spu].buff,
- spu_buff[spu].tail,
- curr_head, max_spu_buff);
-
- spin_lock_irqsave(&buffer_lock, flags);
- spu_buff[spu].tail = curr_head;
- spin_unlock_irqrestore(&buffer_lock, flags);
- }
-
-}
-
-static void wq_sync_spu_buff(struct work_struct *work)
-{
- /* move data from spu buffers to kernel buffer */
- sync_spu_buff();
-
- /* only reschedule if profiling is not done */
- if (spu_prof_running)
- schedule_delayed_work(&spu_work, DEFAULT_TIMER_EXPIRE);
-}
-
-/* Container for caching information about an active SPU task. */
-struct cached_info {
- struct vma_to_fileoffset_map *map;
- struct spu *the_spu; /* needed to access pointer to local_store */
- struct kref cache_ref;
-};
-
-static struct cached_info *spu_info[MAX_NUMNODES * 8];
-
-static void destroy_cached_info(struct kref *kref)
-{
- struct cached_info *info;
-
- info = container_of(kref, struct cached_info, cache_ref);
- vma_map_free(info->map);
- kfree(info);
- module_put(THIS_MODULE);
-}
-
-/* Return the cached_info for the passed SPU number.
- * ATTENTION: Callers are responsible for obtaining the
- * cache_lock if needed prior to invoking this function.
- */
-static struct cached_info *get_cached_info(struct spu *the_spu, int spu_num)
-{
- struct kref *ref;
- struct cached_info *ret_info;
-
- if (spu_num >= num_spu_nodes) {
- printk(KERN_ERR "SPU_PROF: "
- "%s, line %d: Invalid index %d into spu info cache\n",
- __func__, __LINE__, spu_num);
- ret_info = NULL;
- goto out;
- }
- if (!spu_info[spu_num] && the_spu) {
- ref = spu_get_profile_private_kref(the_spu->ctx);
- if (ref) {
- spu_info[spu_num] = container_of(ref, struct cached_info, cache_ref);
- kref_get(&spu_info[spu_num]->cache_ref);
- }
- }
-
- ret_info = spu_info[spu_num];
- out:
- return ret_info;
-}
-
-
-/* Looks for cached info for the passed spu. If not found, the
- * cached info is created for the passed spu.
- * Returns 0 for success; otherwise, -1 for error.
- */
-static int
-prepare_cached_spu_info(struct spu *spu, unsigned long objectId)
-{
- unsigned long flags;
- struct vma_to_fileoffset_map *new_map;
- int retval = 0;
- struct cached_info *info;
-
- /* We won't bother getting cache_lock here since
- * don't do anything with the cached_info that's returned.
- */
- info = get_cached_info(spu, spu->number);
-
- if (info) {
- pr_debug("Found cached SPU info.\n");
- goto out;
- }
-
- /* Create cached_info and set spu_info[spu->number] to point to it.
- * spu->number is a system-wide value, not a per-node value.
- */
- info = kzalloc(sizeof(struct cached_info), GFP_KERNEL);
- if (!info) {
- printk(KERN_ERR "SPU_PROF: "
- "%s, line %d: create vma_map failed\n",
- __func__, __LINE__);
- retval = -ENOMEM;
- goto err_alloc;
- }
- new_map = create_vma_map(spu, objectId);
- if (!new_map) {
- printk(KERN_ERR "SPU_PROF: "
- "%s, line %d: create vma_map failed\n",
- __func__, __LINE__);
- retval = -ENOMEM;
- goto err_alloc;
- }
-
- pr_debug("Created vma_map\n");
- info->map = new_map;
- info->the_spu = spu;
- kref_init(&info->cache_ref);
- spin_lock_irqsave(&cache_lock, flags);
- spu_info[spu->number] = info;
- /* Increment count before passing off ref to SPUFS. */
- kref_get(&info->cache_ref);
-
- /* We increment the module refcount here since SPUFS is
- * responsible for the final destruction of the cached_info,
- * and it must be able to access the destroy_cached_info()
- * function defined in the OProfile module. We decrement
- * the module refcount in destroy_cached_info.
- */
- try_module_get(THIS_MODULE);
- spu_set_profile_private_kref(spu->ctx, &info->cache_ref,
- destroy_cached_info);
- spin_unlock_irqrestore(&cache_lock, flags);
- goto out;
-
-err_alloc:
- kfree(info);
-out:
- return retval;
-}
-
-/*
- * NOTE: The caller is responsible for locking the
- * cache_lock prior to calling this function.
- */
-static int release_cached_info(int spu_index)
-{
- int index, end;
-
- if (spu_index == RELEASE_ALL) {
- end = num_spu_nodes;
- index = 0;
- } else {
- if (spu_index >= num_spu_nodes) {
- printk(KERN_ERR "SPU_PROF: "
- "%s, line %d: "
- "Invalid index %d into spu info cache\n",
- __func__, __LINE__, spu_index);
- goto out;
- }
- end = spu_index + 1;
- index = spu_index;
- }
- for (; index < end; index++) {
- if (spu_info[index]) {
- kref_put(&spu_info[index]->cache_ref,
- destroy_cached_info);
- spu_info[index] = NULL;
- }
- }
-
-out:
- return 0;
-}
-
-/* The source code for fast_get_dcookie was "borrowed"
- * from drivers/oprofile/buffer_sync.c.
- */
-
-/* Optimisation. We can manage without taking the dcookie sem
- * because we cannot reach this code without at least one
- * dcookie user still being registered (namely, the reader
- * of the event buffer).
- */
-static inline unsigned long fast_get_dcookie(struct path *path)
-{
- unsigned long cookie;
-
- if (path->dentry->d_flags & DCACHE_COOKIE)
- return (unsigned long)path->dentry;
- get_dcookie(path, &cookie);
- return cookie;
-}
-
-/* Look up the dcookie for the task's mm->exe_file,
- * which corresponds loosely to "application name". Also, determine
- * the offset for the SPU ELF object. If computed offset is
- * non-zero, it implies an embedded SPU object; otherwise, it's a
- * separate SPU binary, in which case we retrieve it's dcookie.
- * For the embedded case, we must determine if SPU ELF is embedded
- * in the executable application or another file (i.e., shared lib).
- * If embedded in a shared lib, we must get the dcookie and return
- * that to the caller.
- */
-static unsigned long
-get_exec_dcookie_and_offset(struct spu *spu, unsigned int *offsetp,
- unsigned long *spu_bin_dcookie,
- unsigned long spu_ref)
-{
- unsigned long app_cookie = 0;
- unsigned int my_offset = 0;
- struct vm_area_struct *vma;
- struct mm_struct *mm = spu->mm;
-
- if (!mm)
- goto out;
-
- down_read(&mm->mmap_sem);
-
- if (mm->exe_file) {
- app_cookie = fast_get_dcookie(&mm->exe_file->f_path);
- pr_debug("got dcookie for %s\n",
- mm->exe_file->f_dentry->d_name.name);
- }
-
- for (vma = mm->mmap; vma; vma = vma->vm_next) {
- if (vma->vm_start > spu_ref || vma->vm_end <= spu_ref)
- continue;
- my_offset = spu_ref - vma->vm_start;
- if (!vma->vm_file)
- goto fail_no_image_cookie;
-
- pr_debug("Found spu ELF at %X(object-id:%lx) for file %s\n",
- my_offset, spu_ref,
- vma->vm_file->f_dentry->d_name.name);
- *offsetp = my_offset;
- break;
- }
-
- *spu_bin_dcookie = fast_get_dcookie(&vma->vm_file->f_path);
- pr_debug("got dcookie for %s\n", vma->vm_file->f_dentry->d_name.name);
-
- up_read(&mm->mmap_sem);
-
-out:
- return app_cookie;
-
-fail_no_image_cookie:
- up_read(&mm->mmap_sem);
-
- printk(KERN_ERR "SPU_PROF: "
- "%s, line %d: Cannot find dcookie for SPU binary\n",
- __func__, __LINE__);
- goto out;
-}
-
-
-
-/* This function finds or creates cached context information for the
- * passed SPU and records SPU context information into the OProfile
- * event buffer.
- */
-static int process_context_switch(struct spu *spu, unsigned long objectId)
-{
- unsigned long flags;
- int retval;
- unsigned int offset = 0;
- unsigned long spu_cookie = 0, app_dcookie;
-
- retval = prepare_cached_spu_info(spu, objectId);
- if (retval)
- goto out;
-
- /* Get dcookie first because a mutex_lock is taken in that
- * code path, so interrupts must not be disabled.
- */
- app_dcookie = get_exec_dcookie_and_offset(spu, &offset, &spu_cookie, objectId);
- if (!app_dcookie || !spu_cookie) {
- retval = -ENOENT;
- goto out;
- }
-
- /* Record context info in event buffer */
- spin_lock_irqsave(&buffer_lock, flags);
- spu_buff_add(ESCAPE_CODE, spu->number);
- spu_buff_add(SPU_CTX_SWITCH_CODE, spu->number);
- spu_buff_add(spu->number, spu->number);
- spu_buff_add(spu->pid, spu->number);
- spu_buff_add(spu->tgid, spu->number);
- spu_buff_add(app_dcookie, spu->number);
- spu_buff_add(spu_cookie, spu->number);
- spu_buff_add(offset, spu->number);
-
- /* Set flag to indicate SPU PC data can now be written out. If
- * the SPU program counter data is seen before an SPU context
- * record is seen, the postprocessing will fail.
- */
- spu_buff[spu->number].ctx_sw_seen = 1;
-
- spin_unlock_irqrestore(&buffer_lock, flags);
- smp_wmb(); /* insure spu event buffer updates are written */
- /* don't want entries intermingled... */
-out:
- return retval;
-}
-
-/*
- * This function is invoked on either a bind_context or unbind_context.
- * If called for an unbind_context, the val arg is 0; otherwise,
- * it is the object-id value for the spu context.
- * The data arg is of type 'struct spu *'.
- */
-static int spu_active_notify(struct notifier_block *self, unsigned long val,
- void *data)
-{
- int retval;
- unsigned long flags;
- struct spu *the_spu = data;
-
- pr_debug("SPU event notification arrived\n");
- if (!val) {
- spin_lock_irqsave(&cache_lock, flags);
- retval = release_cached_info(the_spu->number);
- spin_unlock_irqrestore(&cache_lock, flags);
- } else {
- retval = process_context_switch(the_spu, val);
- }
- return retval;
-}
-
-static struct notifier_block spu_active = {
- .notifier_call = spu_active_notify,
-};
-
-static int number_of_online_nodes(void)
-{
- u32 cpu; u32 tmp;
- int nodes = 0;
- for_each_online_cpu(cpu) {
- tmp = cbe_cpu_to_node(cpu) + 1;
- if (tmp > nodes)
- nodes++;
- }
- return nodes;
-}
-
-static int oprofile_spu_buff_create(void)
-{
- int spu;
-
- max_spu_buff = oprofile_get_cpu_buffer_size();
-
- for (spu = 0; spu < num_spu_nodes; spu++) {
- /* create circular buffers to store the data in.
- * use locks to manage accessing the buffers
- */
- spu_buff[spu].head = 0;
- spu_buff[spu].tail = 0;
-
- /*
- * Create a buffer for each SPU. Can't reliably
- * create a single buffer for all spus due to not
- * enough contiguous kernel memory.
- */
-
- spu_buff[spu].buff = kzalloc((max_spu_buff
- * sizeof(unsigned long)),
- GFP_KERNEL);
-
- if (!spu_buff[spu].buff) {
- printk(KERN_ERR "SPU_PROF: "
- "%s, line %d: oprofile_spu_buff_create "
- "failed to allocate spu buffer %d.\n",
- __func__, __LINE__, spu);
-
- /* release the spu buffers that have been allocated */
- while (spu >= 0) {
- kfree(spu_buff[spu].buff);
- spu_buff[spu].buff = 0;
- spu--;
- }
- return -ENOMEM;
- }
- }
- return 0;
-}
-
-/* The main purpose of this function is to synchronize
- * OProfile with SPUFS by registering to be notified of
- * SPU task switches.
- *
- * NOTE: When profiling SPUs, we must ensure that only
- * spu_sync_start is invoked and not the generic sync_start
- * in drivers/oprofile/oprof.c. A return value of
- * SKIP_GENERIC_SYNC or SYNC_START_ERROR will
- * accomplish this.
- */
-int spu_sync_start(void)
-{
- int spu;
- int ret = SKIP_GENERIC_SYNC;
- int register_ret;
- unsigned long flags = 0;
-
- spu_prof_num_nodes = number_of_online_nodes();
- num_spu_nodes = spu_prof_num_nodes * 8;
- INIT_DELAYED_WORK(&spu_work, wq_sync_spu_buff);
-
- /* create buffer for storing the SPU data to put in
- * the kernel buffer.
- */
- ret = oprofile_spu_buff_create();
- if (ret)
- goto out;
-
- spin_lock_irqsave(&buffer_lock, flags);
- for (spu = 0; spu < num_spu_nodes; spu++) {
- spu_buff_add(ESCAPE_CODE, spu);
- spu_buff_add(SPU_PROFILING_CODE, spu);
- spu_buff_add(num_spu_nodes, spu);
- }
- spin_unlock_irqrestore(&buffer_lock, flags);
-
- for (spu = 0; spu < num_spu_nodes; spu++) {
- spu_buff[spu].ctx_sw_seen = 0;
- spu_buff[spu].last_guard_val = 0;
- }
-
- /* Register for SPU events */
- register_ret = spu_switch_event_register(&spu_active);
- if (register_ret) {
- ret = SYNC_START_ERROR;
- goto out;
- }
-
- pr_debug("spu_sync_start -- running.\n");
-out:
- return ret;
-}
-
-/* Record SPU program counter samples to the oprofile event buffer. */
-void spu_sync_buffer(int spu_num, unsigned int *samples,
- int num_samples)
-{
- unsigned long long file_offset;
- unsigned long flags;
- int i;
- struct vma_to_fileoffset_map *map;
- struct spu *the_spu;
- unsigned long long spu_num_ll = spu_num;
- unsigned long long spu_num_shifted = spu_num_ll << 32;
- struct cached_info *c_info;
-
- /* We need to obtain the cache_lock here because it's
- * possible that after getting the cached_info, the SPU job
- * corresponding to this cached_info may end, thus resulting
- * in the destruction of the cached_info.
- */
- spin_lock_irqsave(&cache_lock, flags);
- c_info = get_cached_info(NULL, spu_num);
- if (!c_info) {
- /* This legitimately happens when the SPU task ends before all
- * samples are recorded.
- * No big deal -- so we just drop a few samples.
- */
- pr_debug("SPU_PROF: No cached SPU contex "
- "for SPU #%d. Dropping samples.\n", spu_num);
- goto out;
- }
-
- map = c_info->map;
- the_spu = c_info->the_spu;
- spin_lock(&buffer_lock);
- for (i = 0; i < num_samples; i++) {
- unsigned int sample = *(samples+i);
- int grd_val = 0;
- file_offset = 0;
- if (sample == 0)
- continue;
- file_offset = vma_map_lookup( map, sample, the_spu, &grd_val);
-
- /* If overlays are used by this SPU application, the guard
- * value is non-zero, indicating which overlay section is in
- * use. We need to discard samples taken during the time
- * period which an overlay occurs (i.e., guard value changes).
- */
- if (grd_val && grd_val != spu_buff[spu_num].last_guard_val) {
- spu_buff[spu_num].last_guard_val = grd_val;
- /* Drop the rest of the samples. */
- break;
- }
-
- /* We must ensure that the SPU context switch has been written
- * out before samples for the SPU. Otherwise, the SPU context
- * information is not available and the postprocessing of the
- * SPU PC will fail with no available anonymous map information.
- */
- if (spu_buff[spu_num].ctx_sw_seen)
- spu_buff_add((file_offset | spu_num_shifted),
- spu_num);
- }
- spin_unlock(&buffer_lock);
-out:
- spin_unlock_irqrestore(&cache_lock, flags);
-}
-
-
-int spu_sync_stop(void)
-{
- unsigned long flags = 0;
- int ret;
- int k;
-
- ret = spu_switch_event_unregister(&spu_active);
-
- if (ret)
- printk(KERN_ERR "SPU_PROF: "
- "%s, line %d: spu_switch_event_unregister " \
- "returned %d\n",
- __func__, __LINE__, ret);
-
- /* flush any remaining data in the per SPU buffers */
- sync_spu_buff();
-
- spin_lock_irqsave(&cache_lock, flags);
- ret = release_cached_info(RELEASE_ALL);
- spin_unlock_irqrestore(&cache_lock, flags);
-
- /* remove scheduled work queue item rather then waiting
- * for every queued entry to execute. Then flush pending
- * system wide buffer to event buffer.
- */
- cancel_delayed_work(&spu_work);
-
- for (k = 0; k < num_spu_nodes; k++) {
- spu_buff[k].ctx_sw_seen = 0;
-
- /*
- * spu_sys_buff will be null if there was a problem
- * allocating the buffer. Only delete if it exists.
- */
- kfree(spu_buff[k].buff);
- spu_buff[k].buff = 0;
- }
- pr_debug("spu_sync_stop -- done.\n");
- return ret;
-}
-
diff --git a/arch/powerpc/oprofile/cell/vma_map.c b/arch/powerpc/oprofile/cell/vma_map.c
deleted file mode 100644
index c579b16845da..000000000000
--- a/arch/powerpc/oprofile/cell/vma_map.c
+++ /dev/null
@@ -1,283 +0,0 @@
-/*
- * Cell Broadband Engine OProfile Support
- *
- * (C) Copyright IBM Corporation 2006
- *
- * Author: Maynard Johnson <maynardj@us.ibm.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-/* The code in this source file is responsible for generating
- * vma-to-fileOffset maps for both overlay and non-overlay SPU
- * applications.
- */
-
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/uaccess.h>
-#include <linux/elf.h>
-#include <linux/slab.h>
-#include "pr_util.h"
-
-
-void vma_map_free(struct vma_to_fileoffset_map *map)
-{
- while (map) {
- struct vma_to_fileoffset_map *next = map->next;
- kfree(map);
- map = next;
- }
-}
-
-unsigned int
-vma_map_lookup(struct vma_to_fileoffset_map *map, unsigned int vma,
- const struct spu *aSpu, int *grd_val)
-{
- /*
- * Default the offset to the physical address + a flag value.
- * Addresses of dynamically generated code can't be found in the vma
- * map. For those addresses the flagged value will be sent on to
- * the user space tools so they can be reported rather than just
- * thrown away.
- */
- u32 offset = 0x10000000 + vma;
- u32 ovly_grd;
-
- for (; map; map = map->next) {
- if (vma < map->vma || vma >= map->vma + map->size)
- continue;
-
- if (map->guard_ptr) {
- ovly_grd = *(u32 *)(aSpu->local_store + map->guard_ptr);
- if (ovly_grd != map->guard_val)
- continue;
- *grd_val = ovly_grd;
- }
- offset = vma - map->vma + map->offset;
- break;
- }
-
- return offset;
-}
-
-static struct vma_to_fileoffset_map *
-vma_map_add(struct vma_to_fileoffset_map *map, unsigned int vma,
- unsigned int size, unsigned int offset, unsigned int guard_ptr,
- unsigned int guard_val)
-{
- struct vma_to_fileoffset_map *new =
- kzalloc(sizeof(struct vma_to_fileoffset_map), GFP_KERNEL);
- if (!new) {
- printk(KERN_ERR "SPU_PROF: %s, line %d: malloc failed\n",
- __func__, __LINE__);
- vma_map_free(map);
- return NULL;
- }
-
- new->next = map;
- new->vma = vma;
- new->size = size;
- new->offset = offset;
- new->guard_ptr = guard_ptr;
- new->guard_val = guard_val;
-
- return new;
-}
-
-
-/* Parse SPE ELF header and generate a list of vma_maps.
- * A pointer to the first vma_map in the generated list
- * of vma_maps is returned. */
-struct vma_to_fileoffset_map *create_vma_map(const struct spu *aSpu,
- unsigned long __spu_elf_start)
-{
- static const unsigned char expected[EI_PAD] = {
- [EI_MAG0] = ELFMAG0,
- [EI_MAG1] = ELFMAG1,
- [EI_MAG2] = ELFMAG2,
- [EI_MAG3] = ELFMAG3,
- [EI_CLASS] = ELFCLASS32,
- [EI_DATA] = ELFDATA2MSB,
- [EI_VERSION] = EV_CURRENT,
- [EI_OSABI] = ELFOSABI_NONE
- };
-
- int grd_val;
- struct vma_to_fileoffset_map *map = NULL;
- void __user *spu_elf_start = (void __user *)__spu_elf_start;
- struct spu_overlay_info ovly;
- unsigned int overlay_tbl_offset = -1;
- Elf32_Phdr __user *phdr_start;
- Elf32_Shdr __user *shdr_start;
- Elf32_Ehdr ehdr;
- Elf32_Phdr phdr;
- Elf32_Shdr shdr, shdr_str;
- Elf32_Sym sym;
- int i, j;
- char name[32];
-
- unsigned int ovly_table_sym = 0;
- unsigned int ovly_buf_table_sym = 0;
- unsigned int ovly_table_end_sym = 0;
- unsigned int ovly_buf_table_end_sym = 0;
- struct spu_overlay_info __user *ovly_table;
- unsigned int n_ovlys;
-
- /* Get and validate ELF header. */
-
- if (copy_from_user(&ehdr, spu_elf_start, sizeof (ehdr)))
- goto fail;
-
- if (memcmp(ehdr.e_ident, expected, EI_PAD) != 0) {
- printk(KERN_ERR "SPU_PROF: "
- "%s, line %d: Unexpected e_ident parsing SPU ELF\n",
- __func__, __LINE__);
- goto fail;
- }
- if (ehdr.e_machine != EM_SPU) {
- printk(KERN_ERR "SPU_PROF: "
- "%s, line %d: Unexpected e_machine parsing SPU ELF\n",
- __func__, __LINE__);
- goto fail;
- }
- if (ehdr.e_type != ET_EXEC) {
- printk(KERN_ERR "SPU_PROF: "
- "%s, line %d: Unexpected e_type parsing SPU ELF\n",
- __func__, __LINE__);
- goto fail;
- }
- phdr_start = spu_elf_start + ehdr.e_phoff;
- shdr_start = spu_elf_start + ehdr.e_shoff;
-
- /* Traverse program headers. */
- for (i = 0; i < ehdr.e_phnum; i++) {
- if (copy_from_user(&phdr, phdr_start + i, sizeof(phdr)))
- goto fail;
-
- if (phdr.p_type != PT_LOAD)
- continue;
- if (phdr.p_flags & (1 << 27))
- continue;
-
- map = vma_map_add(map, phdr.p_vaddr, phdr.p_memsz,
- phdr.p_offset, 0, 0);
- if (!map)
- goto fail;
- }
-
- pr_debug("SPU_PROF: Created non-overlay maps\n");
- /* Traverse section table and search for overlay-related symbols. */
- for (i = 0; i < ehdr.e_shnum; i++) {
- if (copy_from_user(&shdr, shdr_start + i, sizeof(shdr)))
- goto fail;
-
- if (shdr.sh_type != SHT_SYMTAB)
- continue;
- if (shdr.sh_entsize != sizeof (sym))
- continue;
-
- if (copy_from_user(&shdr_str,
- shdr_start + shdr.sh_link,
- sizeof(shdr)))
- goto fail;
-
- if (shdr_str.sh_type != SHT_STRTAB)
- goto fail;
-
- for (j = 0; j < shdr.sh_size / sizeof (sym); j++) {
- if (copy_from_user(&sym, spu_elf_start +
- shdr.sh_offset +
- j * sizeof (sym),
- sizeof (sym)))
- goto fail;
-
- if (copy_from_user(name,
- spu_elf_start + shdr_str.sh_offset +
- sym.st_name,
- 20))
- goto fail;
-
- if (memcmp(name, "_ovly_table", 12) == 0)
- ovly_table_sym = sym.st_value;
- if (memcmp(name, "_ovly_buf_table", 16) == 0)
- ovly_buf_table_sym = sym.st_value;
- if (memcmp(name, "_ovly_table_end", 16) == 0)
- ovly_table_end_sym = sym.st_value;
- if (memcmp(name, "_ovly_buf_table_end", 20) == 0)
- ovly_buf_table_end_sym = sym.st_value;
- }
- }
-
- /* If we don't have overlays, we're done. */
- if (ovly_table_sym == 0 || ovly_buf_table_sym == 0
- || ovly_table_end_sym == 0 || ovly_buf_table_end_sym == 0) {
- pr_debug("SPU_PROF: No overlay table found\n");
- goto out;
- } else {
- pr_debug("SPU_PROF: Overlay table found\n");
- }
-
- /* The _ovly_table symbol represents a table with one entry
- * per overlay section. The _ovly_buf_table symbol represents
- * a table with one entry per overlay region.
- * The struct spu_overlay_info gives the structure of the _ovly_table
- * entries. The structure of _ovly_table_buf is simply one
- * u32 word per entry.
- */
- overlay_tbl_offset = vma_map_lookup(map, ovly_table_sym,
- aSpu, &grd_val);
- if (overlay_tbl_offset > 0x10000000) {
- printk(KERN_ERR "SPU_PROF: "
- "%s, line %d: Error finding SPU overlay table\n",
- __func__, __LINE__);
- goto fail;
- }
- ovly_table = spu_elf_start + overlay_tbl_offset;
-
- n_ovlys = (ovly_table_end_sym -
- ovly_table_sym) / sizeof (ovly);
-
- /* Traverse overlay table. */
- for (i = 0; i < n_ovlys; i++) {
- if (copy_from_user(&ovly, ovly_table + i, sizeof (ovly)))
- goto fail;
-
- /* The ovly.vma/size/offset arguments are analogous to the same
- * arguments used above for non-overlay maps. The final two
- * args are referred to as the guard pointer and the guard
- * value.
- * The guard pointer is an entry in the _ovly_buf_table,
- * computed using ovly.buf as the index into the table. Since
- * ovly.buf values begin at '1' to reference the first (or 0th)
- * entry in the _ovly_buf_table, the computation subtracts 1
- * from ovly.buf.
- * The guard value is stored in the _ovly_buf_table entry and
- * is an index (starting at 1) back to the _ovly_table entry
- * that is pointing at this _ovly_buf_table entry. So, for
- * example, for an overlay scenario with one overlay segment
- * and two overlay sections:
- * - Section 1 points to the first entry of the
- * _ovly_buf_table, which contains a guard value
- * of '1', referencing the first (index=0) entry of
- * _ovly_table.
- * - Section 2 points to the second entry of the
- * _ovly_buf_table, which contains a guard value
- * of '2', referencing the second (index=1) entry of
- * _ovly_table.
- */
- map = vma_map_add(map, ovly.vma, ovly.size, ovly.offset,
- ovly_buf_table_sym + (ovly.buf-1) * 4, i+1);
- if (!map)
- goto fail;
- }
- goto out;
-
- fail:
- map = NULL;
- out:
- return map;
-}
diff --git a/arch/powerpc/oprofile/common.c b/arch/powerpc/oprofile/common.c
deleted file mode 100644
index bf094c5a4bd9..000000000000
--- a/arch/powerpc/oprofile/common.c
+++ /dev/null
@@ -1,247 +0,0 @@
-/*
- * PPC 64 oprofile support:
- * Copyright (C) 2004 Anton Blanchard <anton@au.ibm.com>, IBM
- * PPC 32 oprofile support: (based on PPC 64 support)
- * Copyright (C) Freescale Semiconductor, Inc 2004
- * Author: Andy Fleming
- *
- * Based on alpha version.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/oprofile.h>
-#include <linux/init.h>
-#include <linux/smp.h>
-#include <linux/errno.h>
-#include <asm/ptrace.h>
-#include <asm/pmc.h>
-#include <asm/cputable.h>
-#include <asm/oprofile_impl.h>
-#include <asm/firmware.h>
-
-static struct op_powerpc_model *model;
-
-static struct op_counter_config ctr[OP_MAX_COUNTER];
-static struct op_system_config sys;
-
-static int op_per_cpu_rc;
-
-static void op_handle_interrupt(struct pt_regs *regs)
-{
- model->handle_interrupt(regs, ctr);
-}
-
-static void op_powerpc_cpu_setup(void *dummy)
-{
- int ret;
-
- ret = model->cpu_setup(ctr);
-
- if (ret != 0)
- op_per_cpu_rc = ret;
-}
-
-static int op_powerpc_setup(void)
-{
- int err;
-
- op_per_cpu_rc = 0;
-
- /* Grab the hardware */
- err = reserve_pmc_hardware(op_handle_interrupt);
- if (err)
- return err;
-
- /* Pre-compute the values to stuff in the hardware registers. */
- op_per_cpu_rc = model->reg_setup(ctr, &sys, model->num_counters);
-
- if (op_per_cpu_rc)
- goto out;
-
- /* Configure the registers on all cpus. If an error occurs on one
- * of the cpus, op_per_cpu_rc will be set to the error */
- on_each_cpu(op_powerpc_cpu_setup, NULL, 1);
-
-out: if (op_per_cpu_rc) {
- /* error on setup release the performance counter hardware */
- release_pmc_hardware();
- }
-
- return op_per_cpu_rc;
-}
-
-static void op_powerpc_shutdown(void)
-{
- release_pmc_hardware();
-}
-
-static void op_powerpc_cpu_start(void *dummy)
-{
- /* If any of the cpus have return an error, set the
- * global flag to the error so it can be returned
- * to the generic OProfile caller.
- */
- int ret;
-
- ret = model->start(ctr);
- if (ret != 0)
- op_per_cpu_rc = ret;
-}
-
-static int op_powerpc_start(void)
-{
- op_per_cpu_rc = 0;
-
- if (model->global_start)
- return model->global_start(ctr);
- if (model->start) {
- on_each_cpu(op_powerpc_cpu_start, NULL, 1);
- return op_per_cpu_rc;
- }
- return -EIO; /* No start function is defined for this
- power architecture */
-}
-
-static inline void op_powerpc_cpu_stop(void *dummy)
-{
- model->stop();
-}
-
-static void op_powerpc_stop(void)
-{
- if (model->stop)
- on_each_cpu(op_powerpc_cpu_stop, NULL, 1);
- if (model->global_stop)
- model->global_stop();
-}
-
-static int op_powerpc_create_files(struct dentry *root)
-{
- int i;
-
-#ifdef CONFIG_PPC64
- /*
- * There is one mmcr0, mmcr1 and mmcra for setting the events for
- * all of the counters.
- */
- oprofilefs_create_ulong(root, "mmcr0", &sys.mmcr0);
- oprofilefs_create_ulong(root, "mmcr1", &sys.mmcr1);
- oprofilefs_create_ulong(root, "mmcra", &sys.mmcra);
-#ifdef CONFIG_OPROFILE_CELL
- /* create a file the user tool can check to see what level of profiling
- * support exits with this kernel. Initialize bit mask to indicate
- * what support the kernel has:
- * bit 0 - Supports SPU event profiling in addition to PPU
- * event and cycles; and SPU cycle profiling
- * bits 1-31 - Currently unused.
- *
- * If the file does not exist, then the kernel only supports SPU
- * cycle profiling, PPU event and cycle profiling.
- */
- oprofilefs_create_ulong(root, "cell_support", &sys.cell_support);
- sys.cell_support = 0x1; /* Note, the user OProfile tool must check
- * that this bit is set before attempting to
- * user SPU event profiling. Older kernels
- * will not have this file, hence the user
- * tool is not allowed to do SPU event
- * profiling on older kernels. Older kernels
- * will accept SPU events but collected data
- * is garbage.
- */
-#endif
-#endif
-
- for (i = 0; i < model->num_counters; ++i) {
- struct dentry *dir;
- char buf[4];
-
- snprintf(buf, sizeof buf, "%d", i);
- dir = oprofilefs_mkdir(root, buf);
-
- oprofilefs_create_ulong(dir, "enabled", &ctr[i].enabled);
- oprofilefs_create_ulong(dir, "event", &ctr[i].event);
- oprofilefs_create_ulong(dir, "count", &ctr[i].count);
-
- /*
- * Classic PowerPC doesn't support per-counter
- * control like this, but the options are
- * expected, so they remain. For Freescale
- * Book-E style performance monitors, we do
- * support them.
- */
- oprofilefs_create_ulong(dir, "kernel", &ctr[i].kernel);
- oprofilefs_create_ulong(dir, "user", &ctr[i].user);
-
- oprofilefs_create_ulong(dir, "unit_mask", &ctr[i].unit_mask);
- }
-
- oprofilefs_create_ulong(root, "enable_kernel", &sys.enable_kernel);
- oprofilefs_create_ulong(root, "enable_user", &sys.enable_user);
-
- /* Default to tracing both kernel and user */
- sys.enable_kernel = 1;
- sys.enable_user = 1;
-
- return 0;
-}
-
-int __init oprofile_arch_init(struct oprofile_operations *ops)
-{
- if (!cur_cpu_spec->oprofile_cpu_type)
- return -ENODEV;
-
- switch (cur_cpu_spec->oprofile_type) {
-#ifdef CONFIG_PPC_BOOK3S_64
-#ifdef CONFIG_OPROFILE_CELL
- case PPC_OPROFILE_CELL:
- if (firmware_has_feature(FW_FEATURE_LPAR))
- return -ENODEV;
- model = &op_model_cell;
- ops->sync_start = model->sync_start;
- ops->sync_stop = model->sync_stop;
- break;
-#endif
- case PPC_OPROFILE_POWER4:
- model = &op_model_power4;
- break;
- case PPC_OPROFILE_PA6T:
- model = &op_model_pa6t;
- break;
-#endif
-#ifdef CONFIG_6xx
- case PPC_OPROFILE_G4:
- model = &op_model_7450;
- break;
-#endif
-#if defined(CONFIG_FSL_EMB_PERFMON)
- case PPC_OPROFILE_FSL_EMB:
- model = &op_model_fsl_emb;
- break;
-#endif
- default:
- return -ENODEV;
- }
-
- model->num_counters = cur_cpu_spec->num_pmcs;
-
- ops->cpu_type = cur_cpu_spec->oprofile_cpu_type;
- ops->create_files = op_powerpc_create_files;
- ops->setup = op_powerpc_setup;
- ops->shutdown = op_powerpc_shutdown;
- ops->start = op_powerpc_start;
- ops->stop = op_powerpc_stop;
- ops->backtrace = op_powerpc_backtrace;
-
- printk(KERN_DEBUG "oprofile: using %s performance monitoring.\n",
- ops->cpu_type);
-
- return 0;
-}
-
-void oprofile_arch_exit(void)
-{
-}
diff --git a/arch/powerpc/oprofile/op_model_7450.c b/arch/powerpc/oprofile/op_model_7450.c
deleted file mode 100644
index d29b6e4e5e72..000000000000
--- a/arch/powerpc/oprofile/op_model_7450.c
+++ /dev/null
@@ -1,211 +0,0 @@
-/*
- * arch/powerpc/oprofile/op_model_7450.c
- *
- * Freescale 745x/744x oprofile support, based on fsl_booke support
- * Copyright (C) 2004 Anton Blanchard <anton@au.ibm.com>, IBM
- *
- * Copyright (c) 2004 Freescale Semiconductor, Inc
- *
- * Author: Andy Fleming
- * Maintainer: Kumar Gala <galak@kernel.crashing.org>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/oprofile.h>
-#include <linux/smp.h>
-#include <asm/ptrace.h>
-#include <asm/processor.h>
-#include <asm/cputable.h>
-#include <asm/page.h>
-#include <asm/pmc.h>
-#include <asm/oprofile_impl.h>
-
-static unsigned long reset_value[OP_MAX_COUNTER];
-
-static int oprofile_running;
-static u32 mmcr0_val, mmcr1_val, mmcr2_val, num_pmcs;
-
-#define MMCR0_PMC1_SHIFT 6
-#define MMCR0_PMC2_SHIFT 0
-#define MMCR1_PMC3_SHIFT 27
-#define MMCR1_PMC4_SHIFT 22
-#define MMCR1_PMC5_SHIFT 17
-#define MMCR1_PMC6_SHIFT 11
-
-#define mmcr0_event1(event) \
- ((event << MMCR0_PMC1_SHIFT) & MMCR0_PMC1SEL)
-#define mmcr0_event2(event) \
- ((event << MMCR0_PMC2_SHIFT) & MMCR0_PMC2SEL)
-
-#define mmcr1_event3(event) \
- ((event << MMCR1_PMC3_SHIFT) & MMCR1_PMC3SEL)
-#define mmcr1_event4(event) \
- ((event << MMCR1_PMC4_SHIFT) & MMCR1_PMC4SEL)
-#define mmcr1_event5(event) \
- ((event << MMCR1_PMC5_SHIFT) & MMCR1_PMC5SEL)
-#define mmcr1_event6(event) \
- ((event << MMCR1_PMC6_SHIFT) & MMCR1_PMC6SEL)
-
-#define MMCR0_INIT (MMCR0_FC | MMCR0_FCS | MMCR0_FCP | MMCR0_FCM1 | MMCR0_FCM0)
-
-/* Unfreezes the counters on this CPU, enables the interrupt,
- * enables the counters to trigger the interrupt, and sets the
- * counters to only count when the mark bit is not set.
- */
-static void pmc_start_ctrs(void)
-{
- u32 mmcr0 = mfspr(SPRN_MMCR0);
-
- mmcr0 &= ~(MMCR0_FC | MMCR0_FCM0);
- mmcr0 |= (MMCR0_FCECE | MMCR0_PMC1CE | MMCR0_PMCnCE | MMCR0_PMXE);
-
- mtspr(SPRN_MMCR0, mmcr0);
-}
-
-/* Disables the counters on this CPU, and freezes them */
-static void pmc_stop_ctrs(void)
-{
- u32 mmcr0 = mfspr(SPRN_MMCR0);
-
- mmcr0 |= MMCR0_FC;
- mmcr0 &= ~(MMCR0_FCECE | MMCR0_PMC1CE | MMCR0_PMCnCE | MMCR0_PMXE);
-
- mtspr(SPRN_MMCR0, mmcr0);
-}
-
-/* Configures the counters on this CPU based on the global
- * settings */
-static int fsl7450_cpu_setup(struct op_counter_config *ctr)
-{
- /* freeze all counters */
- pmc_stop_ctrs();
-
- mtspr(SPRN_MMCR0, mmcr0_val);
- mtspr(SPRN_MMCR1, mmcr1_val);
- if (num_pmcs > 4)
- mtspr(SPRN_MMCR2, mmcr2_val);
-
- return 0;
-}
-
-/* Configures the global settings for the countes on all CPUs. */
-static int fsl7450_reg_setup(struct op_counter_config *ctr,
- struct op_system_config *sys,
- int num_ctrs)
-{
- int i;
-
- num_pmcs = num_ctrs;
- /* Our counters count up, and "count" refers to
- * how much before the next interrupt, and we interrupt
- * on overflow. So we calculate the starting value
- * which will give us "count" until overflow.
- * Then we set the events on the enabled counters */
- for (i = 0; i < num_ctrs; ++i)
- reset_value[i] = 0x80000000UL - ctr[i].count;
-
- /* Set events for Counters 1 & 2 */
- mmcr0_val = MMCR0_INIT | mmcr0_event1(ctr[0].event)
- | mmcr0_event2(ctr[1].event);
-
- /* Setup user/kernel bits */
- if (sys->enable_kernel)
- mmcr0_val &= ~(MMCR0_FCS);
-
- if (sys->enable_user)
- mmcr0_val &= ~(MMCR0_FCP);
-
- /* Set events for Counters 3-6 */
- mmcr1_val = mmcr1_event3(ctr[2].event)
- | mmcr1_event4(ctr[3].event);
- if (num_ctrs > 4)
- mmcr1_val |= mmcr1_event5(ctr[4].event)
- | mmcr1_event6(ctr[5].event);
-
- mmcr2_val = 0;
-
- return 0;
-}
-
-/* Sets the counters on this CPU to the chosen values, and starts them */
-static int fsl7450_start(struct op_counter_config *ctr)
-{
- int i;
-
- mtmsr(mfmsr() | MSR_PMM);
-
- for (i = 0; i < num_pmcs; ++i) {
- if (ctr[i].enabled)
- classic_ctr_write(i, reset_value[i]);
- else
- classic_ctr_write(i, 0);
- }
-
- /* Clear the freeze bit, and enable the interrupt.
- * The counters won't actually start until the rfi clears
- * the PMM bit */
- pmc_start_ctrs();
-
- oprofile_running = 1;
-
- return 0;
-}
-
-/* Stop the counters on this CPU */
-static void fsl7450_stop(void)
-{
- /* freeze counters */
- pmc_stop_ctrs();
-
- oprofile_running = 0;
-
- mb();
-}
-
-
-/* Handle the interrupt on this CPU, and log a sample for each
- * event that triggered the interrupt */
-static void fsl7450_handle_interrupt(struct pt_regs *regs,
- struct op_counter_config *ctr)
-{
- unsigned long pc;
- int is_kernel;
- int val;
- int i;
-
- /* set the PMM bit (see comment below) */
- mtmsr(mfmsr() | MSR_PMM);
-
- pc = mfspr(SPRN_SIAR);
- is_kernel = is_kernel_addr(pc);
-
- for (i = 0; i < num_pmcs; ++i) {
- val = classic_ctr_read(i);
- if (val < 0) {
- if (oprofile_running && ctr[i].enabled) {
- oprofile_add_ext_sample(pc, regs, i, is_kernel);
- classic_ctr_write(i, reset_value[i]);
- } else {
- classic_ctr_write(i, 0);
- }
- }
- }
-
- /* The freeze bit was set by the interrupt. */
- /* Clear the freeze bit, and reenable the interrupt.
- * The counters won't actually start until the rfi clears
- * the PM/M bit */
- pmc_start_ctrs();
-}
-
-struct op_powerpc_model op_model_7450= {
- .reg_setup = fsl7450_reg_setup,
- .cpu_setup = fsl7450_cpu_setup,
- .start = fsl7450_start,
- .stop = fsl7450_stop,
- .handle_interrupt = fsl7450_handle_interrupt,
-};
diff --git a/arch/powerpc/oprofile/op_model_cell.c b/arch/powerpc/oprofile/op_model_cell.c
deleted file mode 100644
index 863d89386f60..000000000000
--- a/arch/powerpc/oprofile/op_model_cell.c
+++ /dev/null
@@ -1,1717 +0,0 @@
-/*
- * Cell Broadband Engine OProfile Support
- *
- * (C) Copyright IBM Corporation 2006
- *
- * Author: David Erb (djerb@us.ibm.com)
- * Modifications:
- * Carl Love <carll@us.ibm.com>
- * Maynard Johnson <maynardj@us.ibm.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/cpufreq.h>
-#include <linux/delay.h>
-#include <linux/jiffies.h>
-#include <linux/kthread.h>
-#include <linux/oprofile.h>
-#include <linux/percpu.h>
-#include <linux/smp.h>
-#include <linux/spinlock.h>
-#include <linux/timer.h>
-#include <asm/cell-pmu.h>
-#include <asm/cputable.h>
-#include <asm/firmware.h>
-#include <asm/io.h>
-#include <asm/oprofile_impl.h>
-#include <asm/processor.h>
-#include <asm/prom.h>
-#include <asm/ptrace.h>
-#include <asm/reg.h>
-#include <asm/rtas.h>
-#include <asm/cell-regs.h>
-
-#include "../platforms/cell/interrupt.h"
-#include "cell/pr_util.h"
-
-#define PPU_PROFILING 0
-#define SPU_PROFILING_CYCLES 1
-#define SPU_PROFILING_EVENTS 2
-
-#define SPU_EVENT_NUM_START 4100
-#define SPU_EVENT_NUM_STOP 4399
-#define SPU_PROFILE_EVENT_ADDR 4363 /* spu, address trace, decimal */
-#define SPU_PROFILE_EVENT_ADDR_MASK_A 0x146 /* sub unit set to zero */
-#define SPU_PROFILE_EVENT_ADDR_MASK_B 0x186 /* sub unit set to zero */
-
-#define NUM_SPUS_PER_NODE 8
-#define SPU_CYCLES_EVENT_NUM 2 /* event number for SPU_CYCLES */
-
-#define PPU_CYCLES_EVENT_NUM 1 /* event number for CYCLES */
-#define PPU_CYCLES_GRP_NUM 1 /* special group number for identifying
- * PPU_CYCLES event
- */
-#define CBE_COUNT_ALL_CYCLES 0x42800000 /* PPU cycle event specifier */
-
-#define NUM_THREADS 2 /* number of physical threads in
- * physical processor
- */
-#define NUM_DEBUG_BUS_WORDS 4
-#define NUM_INPUT_BUS_WORDS 2
-
-#define MAX_SPU_COUNT 0xFFFFFF /* maximum 24 bit LFSR value */
-
-/* Minimum HW interval timer setting to send value to trace buffer is 10 cycle.
- * To configure counter to send value every N cycles set counter to
- * 2^32 - 1 - N.
- */
-#define NUM_INTERVAL_CYC 0xFFFFFFFF - 10
-
-/*
- * spu_cycle_reset is the number of cycles between samples.
- * This variable is used for SPU profiling and should ONLY be set
- * at the beginning of cell_reg_setup; otherwise, it's read-only.
- */
-static unsigned int spu_cycle_reset;
-static unsigned int profiling_mode;
-static int spu_evnt_phys_spu_indx;
-
-struct pmc_cntrl_data {
- unsigned long vcntr;
- unsigned long evnts;
- unsigned long masks;
- unsigned long enabled;
-};
-
-/*
- * ibm,cbe-perftools rtas parameters
- */
-struct pm_signal {
- u16 cpu; /* Processor to modify */
- u16 sub_unit; /* hw subunit this applies to (if applicable)*/
- short int signal_group; /* Signal Group to Enable/Disable */
- u8 bus_word; /* Enable/Disable on this Trace/Trigger/Event
- * Bus Word(s) (bitmask)
- */
- u8 bit; /* Trigger/Event bit (if applicable) */
-};
-
-/*
- * rtas call arguments
- */
-enum {
- SUBFUNC_RESET = 1,
- SUBFUNC_ACTIVATE = 2,
- SUBFUNC_DEACTIVATE = 3,
-
- PASSTHRU_IGNORE = 0,
- PASSTHRU_ENABLE = 1,
- PASSTHRU_DISABLE = 2,
-};
-
-struct pm_cntrl {
- u16 enable;
- u16 stop_at_max;
- u16 trace_mode;
- u16 freeze;
- u16 count_mode;
- u16 spu_addr_trace;
- u8 trace_buf_ovflw;
-};
-
-static struct {
- u32 group_control;
- u32 debug_bus_control;
- struct pm_cntrl pm_cntrl;
- u32 pm07_cntrl[NR_PHYS_CTRS];
-} pm_regs;
-
-#define GET_SUB_UNIT(x) ((x & 0x0000f000) >> 12)
-#define GET_BUS_WORD(x) ((x & 0x000000f0) >> 4)
-#define GET_BUS_TYPE(x) ((x & 0x00000300) >> 8)
-#define GET_POLARITY(x) ((x & 0x00000002) >> 1)
-#define GET_COUNT_CYCLES(x) (x & 0x00000001)
-#define GET_INPUT_CONTROL(x) ((x & 0x00000004) >> 2)
-
-static DEFINE_PER_CPU(unsigned long[NR_PHYS_CTRS], pmc_values);
-static unsigned long spu_pm_cnt[MAX_NUMNODES * NUM_SPUS_PER_NODE];
-static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS];
-
-/*
- * The CELL profiling code makes rtas calls to setup the debug bus to
- * route the performance signals. Additionally, SPU profiling requires
- * a second rtas call to setup the hardware to capture the SPU PCs.
- * The EIO error value is returned if the token lookups or the rtas
- * call fail. The EIO error number is the best choice of the existing
- * error numbers. The probability of rtas related error is very low. But
- * by returning EIO and printing additional information to dmsg the user
- * will know that OProfile did not start and dmesg will tell them why.
- * OProfile does not support returning errors on Stop. Not a huge issue
- * since failure to reset the debug bus or stop the SPU PC collection is
- * not a fatel issue. Chances are if the Stop failed, Start doesn't work
- * either.
- */
-
-/*
- * Interpetation of hdw_thread:
- * 0 - even virtual cpus 0, 2, 4,...
- * 1 - odd virtual cpus 1, 3, 5, ...
- *
- * FIXME: this is strictly wrong, we need to clean this up in a number
- * of places. It works for now. -arnd
- */
-static u32 hdw_thread;
-
-static u32 virt_cntr_inter_mask;
-static struct timer_list timer_virt_cntr;
-static struct timer_list timer_spu_event_swap;
-
-/*
- * pm_signal needs to be global since it is initialized in
- * cell_reg_setup at the time when the necessary information
- * is available.
- */
-static struct pm_signal pm_signal[NR_PHYS_CTRS];
-static int pm_rtas_token; /* token for debug bus setup call */
-static int spu_rtas_token; /* token for SPU cycle profiling */
-
-static u32 reset_value[NR_PHYS_CTRS];
-static int num_counters;
-static int oprofile_running;
-static DEFINE_SPINLOCK(cntr_lock);
-
-static u32 ctr_enabled;
-
-static unsigned char input_bus[NUM_INPUT_BUS_WORDS];
-
-/*
- * Firmware interface functions
- */
-static int
-rtas_ibm_cbe_perftools(int subfunc, int passthru,
- void *address, unsigned long length)
-{
- u64 paddr = __pa(address);
-
- return rtas_call(pm_rtas_token, 5, 1, NULL, subfunc,
- passthru, paddr >> 32, paddr & 0xffffffff, length);
-}
-
-static void pm_rtas_reset_signals(u32 node)
-{
- int ret;
- struct pm_signal pm_signal_local;
-
- /*
- * The debug bus is being set to the passthru disable state.
- * However, the FW still expects atleast one legal signal routing
- * entry or it will return an error on the arguments. If we don't
- * supply a valid entry, we must ignore all return values. Ignoring
- * all return values means we might miss an error we should be
- * concerned about.
- */
-
- /* fw expects physical cpu #. */
- pm_signal_local.cpu = node;
- pm_signal_local.signal_group = 21;
- pm_signal_local.bus_word = 1;
- pm_signal_local.sub_unit = 0;
- pm_signal_local.bit = 0;
-
- ret = rtas_ibm_cbe_perftools(SUBFUNC_RESET, PASSTHRU_DISABLE,
- &pm_signal_local,
- sizeof(struct pm_signal));
-
- if (unlikely(ret))
- /*
- * Not a fatal error. For Oprofile stop, the oprofile
- * functions do not support returning an error for
- * failure to stop OProfile.
- */
- printk(KERN_WARNING "%s: rtas returned: %d\n",
- __func__, ret);
-}
-
-static int pm_rtas_activate_signals(u32 node, u32 count)
-{
- int ret;
- int i, j;
- struct pm_signal pm_signal_local[NR_PHYS_CTRS];
-
- /*
- * There is no debug setup required for the cycles event.
- * Note that only events in the same group can be used.
- * Otherwise, there will be conflicts in correctly routing
- * the signals on the debug bus. It is the responsibility
- * of the OProfile user tool to check the events are in
- * the same group.
- */
- i = 0;
- for (j = 0; j < count; j++) {
- if (pm_signal[j].signal_group != PPU_CYCLES_GRP_NUM) {
-
- /* fw expects physical cpu # */
- pm_signal_local[i].cpu = node;
- pm_signal_local[i].signal_group
- = pm_signal[j].signal_group;
- pm_signal_local[i].bus_word = pm_signal[j].bus_word;
- pm_signal_local[i].sub_unit = pm_signal[j].sub_unit;
- pm_signal_local[i].bit = pm_signal[j].bit;
- i++;
- }
- }
-
- if (i != 0) {
- ret = rtas_ibm_cbe_perftools(SUBFUNC_ACTIVATE, PASSTHRU_ENABLE,
- pm_signal_local,
- i * sizeof(struct pm_signal));
-
- if (unlikely(ret)) {
- printk(KERN_WARNING "%s: rtas returned: %d\n",
- __func__, ret);
- return -EIO;
- }
- }
-
- return 0;
-}
-
-/*
- * PM Signal functions
- */
-static void set_pm_event(u32 ctr, int event, u32 unit_mask)
-{
- struct pm_signal *p;
- u32 signal_bit;
- u32 bus_word, bus_type, count_cycles, polarity, input_control;
- int j, i;
-
- if (event == PPU_CYCLES_EVENT_NUM) {
- /* Special Event: Count all cpu cycles */
- pm_regs.pm07_cntrl[ctr] = CBE_COUNT_ALL_CYCLES;
- p = &(pm_signal[ctr]);
- p->signal_group = PPU_CYCLES_GRP_NUM;
- p->bus_word = 1;
- p->sub_unit = 0;
- p->bit = 0;
- goto out;
- } else {
- pm_regs.pm07_cntrl[ctr] = 0;
- }
-
- bus_word = GET_BUS_WORD(unit_mask);
- bus_type = GET_BUS_TYPE(unit_mask);
- count_cycles = GET_COUNT_CYCLES(unit_mask);
- polarity = GET_POLARITY(unit_mask);
- input_control = GET_INPUT_CONTROL(unit_mask);
- signal_bit = (event % 100);
-
- p = &(pm_signal[ctr]);
-
- p->signal_group = event / 100;
- p->bus_word = bus_word;
- p->sub_unit = GET_SUB_UNIT(unit_mask);
-
- pm_regs.pm07_cntrl[ctr] = 0;
- pm_regs.pm07_cntrl[ctr] |= PM07_CTR_COUNT_CYCLES(count_cycles);
- pm_regs.pm07_cntrl[ctr] |= PM07_CTR_POLARITY(polarity);
- pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_CONTROL(input_control);
-
- /*
- * Some of the islands signal selection is based on 64 bit words.
- * The debug bus words are 32 bits, the input words to the performance
- * counters are defined as 32 bits. Need to convert the 64 bit island
- * specification to the appropriate 32 input bit and bus word for the
- * performance counter event selection. See the CELL Performance
- * monitoring signals manual and the Perf cntr hardware descriptions
- * for the details.
- */
- if (input_control == 0) {
- if (signal_bit > 31) {
- signal_bit -= 32;
- if (bus_word == 0x3)
- bus_word = 0x2;
- else if (bus_word == 0xc)
- bus_word = 0x8;
- }
-
- if ((bus_type == 0) && p->signal_group >= 60)
- bus_type = 2;
- if ((bus_type == 1) && p->signal_group >= 50)
- bus_type = 0;
-
- pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_MUX(signal_bit);
- } else {
- pm_regs.pm07_cntrl[ctr] = 0;
- p->bit = signal_bit;
- }
-
- for (i = 0; i < NUM_DEBUG_BUS_WORDS; i++) {
- if (bus_word & (1 << i)) {
- pm_regs.debug_bus_control |=
- (bus_type << (30 - (2 * i)));
-
- for (j = 0; j < NUM_INPUT_BUS_WORDS; j++) {
- if (input_bus[j] == 0xff) {
- input_bus[j] = i;
- pm_regs.group_control |=
- (i << (30 - (2 * j)));
-
- break;
- }
- }
- }
- }
-out:
- ;
-}
-
-static void write_pm_cntrl(int cpu)
-{
- /*
- * Oprofile will use 32 bit counters, set bits 7:10 to 0
- * pmregs.pm_cntrl is a global
- */
-
- u32 val = 0;
- if (pm_regs.pm_cntrl.enable == 1)
- val |= CBE_PM_ENABLE_PERF_MON;
-
- if (pm_regs.pm_cntrl.stop_at_max == 1)
- val |= CBE_PM_STOP_AT_MAX;
-
- if (pm_regs.pm_cntrl.trace_mode != 0)
- val |= CBE_PM_TRACE_MODE_SET(pm_regs.pm_cntrl.trace_mode);
-
- if (pm_regs.pm_cntrl.trace_buf_ovflw == 1)
- val |= CBE_PM_TRACE_BUF_OVFLW(pm_regs.pm_cntrl.trace_buf_ovflw);
- if (pm_regs.pm_cntrl.freeze == 1)
- val |= CBE_PM_FREEZE_ALL_CTRS;
-
- val |= CBE_PM_SPU_ADDR_TRACE_SET(pm_regs.pm_cntrl.spu_addr_trace);
-
- /*
- * Routine set_count_mode must be called previously to set
- * the count mode based on the user selection of user and kernel.
- */
- val |= CBE_PM_COUNT_MODE_SET(pm_regs.pm_cntrl.count_mode);
- cbe_write_pm(cpu, pm_control, val);
-}
-
-static inline void
-set_count_mode(u32 kernel, u32 user)
-{
- /*
- * The user must specify user and kernel if they want them. If
- * neither is specified, OProfile will count in hypervisor mode.
- * pm_regs.pm_cntrl is a global
- */
- if (kernel) {
- if (user)
- pm_regs.pm_cntrl.count_mode = CBE_COUNT_ALL_MODES;
- else
- pm_regs.pm_cntrl.count_mode =
- CBE_COUNT_SUPERVISOR_MODE;
- } else {
- if (user)
- pm_regs.pm_cntrl.count_mode = CBE_COUNT_PROBLEM_MODE;
- else
- pm_regs.pm_cntrl.count_mode =
- CBE_COUNT_HYPERVISOR_MODE;
- }
-}
-
-static inline void enable_ctr(u32 cpu, u32 ctr, u32 *pm07_cntrl)
-{
-
- pm07_cntrl[ctr] |= CBE_PM_CTR_ENABLE;
- cbe_write_pm07_control(cpu, ctr, pm07_cntrl[ctr]);
-}
-
-/*
- * Oprofile is expected to collect data on all CPUs simultaneously.
- * However, there is one set of performance counters per node. There are
- * two hardware threads or virtual CPUs on each node. Hence, OProfile must
- * multiplex in time the performance counter collection on the two virtual
- * CPUs. The multiplexing of the performance counters is done by this
- * virtual counter routine.
- *
- * The pmc_values used below is defined as 'per-cpu' but its use is
- * more akin to 'per-node'. We need to store two sets of counter
- * values per node -- one for the previous run and one for the next.
- * The per-cpu[NR_PHYS_CTRS] gives us the storage we need. Each odd/even
- * pair of per-cpu arrays is used for storing the previous and next
- * pmc values for a given node.
- * NOTE: We use the per-cpu variable to improve cache performance.
- *
- * This routine will alternate loading the virtual counters for
- * virtual CPUs
- */
-static void cell_virtual_cntr(unsigned long data)
-{
- int i, prev_hdw_thread, next_hdw_thread;
- u32 cpu;
- unsigned long flags;
-
- /*
- * Make sure that the interrupt_hander and the virt counter are
- * not both playing with the counters on the same node.
- */
-
- spin_lock_irqsave(&cntr_lock, flags);
-
- prev_hdw_thread = hdw_thread;
-
- /* switch the cpu handling the interrupts */
- hdw_thread = 1 ^ hdw_thread;
- next_hdw_thread = hdw_thread;
-
- pm_regs.group_control = 0;
- pm_regs.debug_bus_control = 0;
-
- for (i = 0; i < NUM_INPUT_BUS_WORDS; i++)
- input_bus[i] = 0xff;
-
- /*
- * There are some per thread events. Must do the
- * set event, for the thread that is being started
- */
- for (i = 0; i < num_counters; i++)
- set_pm_event(i,
- pmc_cntrl[next_hdw_thread][i].evnts,
- pmc_cntrl[next_hdw_thread][i].masks);
-
- /*
- * The following is done only once per each node, but
- * we need cpu #, not node #, to pass to the cbe_xxx functions.
- */
- for_each_online_cpu(cpu) {
- if (cbe_get_hw_thread_id(cpu))
- continue;
-
- /*
- * stop counters, save counter values, restore counts
- * for previous thread
- */
- cbe_disable_pm(cpu);
- cbe_disable_pm_interrupts(cpu);
- for (i = 0; i < num_counters; i++) {
- per_cpu(pmc_values, cpu + prev_hdw_thread)[i]
- = cbe_read_ctr(cpu, i);
-
- if (per_cpu(pmc_values, cpu + next_hdw_thread)[i]
- == 0xFFFFFFFF)
- /* If the cntr value is 0xffffffff, we must
- * reset that to 0xfffffff0 when the current
- * thread is restarted. This will generate a
- * new interrupt and make sure that we never
- * restore the counters to the max value. If
- * the counters were restored to the max value,
- * they do not increment and no interrupts are
- * generated. Hence no more samples will be
- * collected on that cpu.
- */
- cbe_write_ctr(cpu, i, 0xFFFFFFF0);
- else
- cbe_write_ctr(cpu, i,
- per_cpu(pmc_values,
- cpu +
- next_hdw_thread)[i]);
- }
-
- /*
- * Switch to the other thread. Change the interrupt
- * and control regs to be scheduled on the CPU
- * corresponding to the thread to execute.
- */
- for (i = 0; i < num_counters; i++) {
- if (pmc_cntrl[next_hdw_thread][i].enabled) {
- /*
- * There are some per thread events.
- * Must do the set event, enable_cntr
- * for each cpu.
- */
- enable_ctr(cpu, i,
- pm_regs.pm07_cntrl);
- } else {
- cbe_write_pm07_control(cpu, i, 0);
- }
- }
-
- /* Enable interrupts on the CPU thread that is starting */
- cbe_enable_pm_interrupts(cpu, next_hdw_thread,
- virt_cntr_inter_mask);
- cbe_enable_pm(cpu);
- }
-
- spin_unlock_irqrestore(&cntr_lock, flags);
-
- mod_timer(&timer_virt_cntr, jiffies + HZ / 10);
-}
-
-static void start_virt_cntrs(void)
-{
- init_timer(&timer_virt_cntr);
- timer_virt_cntr.function = cell_virtual_cntr;
- timer_virt_cntr.data = 0UL;
- timer_virt_cntr.expires = jiffies + HZ / 10;
- add_timer(&timer_virt_cntr);
-}
-
-static int cell_reg_setup_spu_cycles(struct op_counter_config *ctr,
- struct op_system_config *sys, int num_ctrs)
-{
- spu_cycle_reset = ctr[0].count;
-
- /*
- * Each node will need to make the rtas call to start
- * and stop SPU profiling. Get the token once and store it.
- */
- spu_rtas_token = rtas_token("ibm,cbe-spu-perftools");
-
- if (unlikely(spu_rtas_token == RTAS_UNKNOWN_SERVICE)) {
- printk(KERN_ERR
- "%s: rtas token ibm,cbe-spu-perftools unknown\n",
- __func__);
- return -EIO;
- }
- return 0;
-}
-
-/* Unfortunately, the hardware will only support event profiling
- * on one SPU per node at a time. Therefore, we must time slice
- * the profiling across all SPUs in the node. Note, we do this
- * in parallel for each node. The following routine is called
- * periodically based on kernel timer to switch which SPU is
- * being monitored in a round robbin fashion.
- */
-static void spu_evnt_swap(unsigned long data)
-{
- int node;
- int cur_phys_spu, nxt_phys_spu, cur_spu_evnt_phys_spu_indx;
- unsigned long flags;
- int cpu;
- int ret;
- u32 interrupt_mask;
-
-
- /* enable interrupts on cntr 0 */
- interrupt_mask = CBE_PM_CTR_OVERFLOW_INTR(0);
-
- hdw_thread = 0;
-
- /* Make sure spu event interrupt handler and spu event swap
- * don't access the counters simultaneously.
- */
- spin_lock_irqsave(&cntr_lock, flags);
-
- cur_spu_evnt_phys_spu_indx = spu_evnt_phys_spu_indx;
-
- if (++(spu_evnt_phys_spu_indx) == NUM_SPUS_PER_NODE)
- spu_evnt_phys_spu_indx = 0;
-
- pm_signal[0].sub_unit = spu_evnt_phys_spu_indx;
- pm_signal[1].sub_unit = spu_evnt_phys_spu_indx;
- pm_signal[2].sub_unit = spu_evnt_phys_spu_indx;
-
- /* switch the SPU being profiled on each node */
- for_each_online_cpu(cpu) {
- if (cbe_get_hw_thread_id(cpu))
- continue;
-
- node = cbe_cpu_to_node(cpu);
- cur_phys_spu = (node * NUM_SPUS_PER_NODE)
- + cur_spu_evnt_phys_spu_indx;
- nxt_phys_spu = (node * NUM_SPUS_PER_NODE)
- + spu_evnt_phys_spu_indx;
-
- /*
- * stop counters, save counter values, restore counts
- * for previous physical SPU
- */
- cbe_disable_pm(cpu);
- cbe_disable_pm_interrupts(cpu);
-
- spu_pm_cnt[cur_phys_spu]
- = cbe_read_ctr(cpu, 0);
-
- /* restore previous count for the next spu to sample */
- /* NOTE, hardware issue, counter will not start if the
- * counter value is at max (0xFFFFFFFF).
- */
- if (spu_pm_cnt[nxt_phys_spu] >= 0xFFFFFFFF)
- cbe_write_ctr(cpu, 0, 0xFFFFFFF0);
- else
- cbe_write_ctr(cpu, 0, spu_pm_cnt[nxt_phys_spu]);
-
- pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
-
- /* setup the debug bus measure the one event and
- * the two events to route the next SPU's PC on
- * the debug bus
- */
- ret = pm_rtas_activate_signals(cbe_cpu_to_node(cpu), 3);
- if (ret)
- printk(KERN_ERR "%s: pm_rtas_activate_signals failed, "
- "SPU event swap\n", __func__);
-
- /* clear the trace buffer, don't want to take PC for
- * previous SPU*/
- cbe_write_pm(cpu, trace_address, 0);
-
- enable_ctr(cpu, 0, pm_regs.pm07_cntrl);
-
- /* Enable interrupts on the CPU thread that is starting */
- cbe_enable_pm_interrupts(cpu, hdw_thread,
- interrupt_mask);
- cbe_enable_pm(cpu);
- }
-
- spin_unlock_irqrestore(&cntr_lock, flags);
-
- /* swap approximately every 0.1 seconds */
- mod_timer(&timer_spu_event_swap, jiffies + HZ / 25);
-}
-
-static void start_spu_event_swap(void)
-{
- init_timer(&timer_spu_event_swap);
- timer_spu_event_swap.function = spu_evnt_swap;
- timer_spu_event_swap.data = 0UL;
- timer_spu_event_swap.expires = jiffies + HZ / 25;
- add_timer(&timer_spu_event_swap);
-}
-
-static int cell_reg_setup_spu_events(struct op_counter_config *ctr,
- struct op_system_config *sys, int num_ctrs)
-{
- int i;
-
- /* routine is called once for all nodes */
-
- spu_evnt_phys_spu_indx = 0;
- /*
- * For all events except PPU CYCLEs, each node will need to make
- * the rtas cbe-perftools call to setup and reset the debug bus.
- * Make the token lookup call once and store it in the global
- * variable pm_rtas_token.
- */
- pm_rtas_token = rtas_token("ibm,cbe-perftools");
-
- if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) {
- printk(KERN_ERR
- "%s: rtas token ibm,cbe-perftools unknown\n",
- __func__);
- return -EIO;
- }
-
- /* setup the pm_control register settings,
- * settings will be written per node by the
- * cell_cpu_setup() function.
- */
- pm_regs.pm_cntrl.trace_buf_ovflw = 1;
-
- /* Use the occurrence trace mode to have SPU PC saved
- * to the trace buffer. Occurrence data in trace buffer
- * is not used. Bit 2 must be set to store SPU addresses.
- */
- pm_regs.pm_cntrl.trace_mode = 2;
-
- pm_regs.pm_cntrl.spu_addr_trace = 0x1; /* using debug bus
- event 2 & 3 */
-
- /* setup the debug bus event array with the SPU PC routing events.
- * Note, pm_signal[0] will be filled in by set_pm_event() call below.
- */
- pm_signal[1].signal_group = SPU_PROFILE_EVENT_ADDR / 100;
- pm_signal[1].bus_word = GET_BUS_WORD(SPU_PROFILE_EVENT_ADDR_MASK_A);
- pm_signal[1].bit = SPU_PROFILE_EVENT_ADDR % 100;
- pm_signal[1].sub_unit = spu_evnt_phys_spu_indx;
-
- pm_signal[2].signal_group = SPU_PROFILE_EVENT_ADDR / 100;
- pm_signal[2].bus_word = GET_BUS_WORD(SPU_PROFILE_EVENT_ADDR_MASK_B);
- pm_signal[2].bit = SPU_PROFILE_EVENT_ADDR % 100;
- pm_signal[2].sub_unit = spu_evnt_phys_spu_indx;
-
- /* Set the user selected spu event to profile on,
- * note, only one SPU profiling event is supported
- */
- num_counters = 1; /* Only support one SPU event at a time */
- set_pm_event(0, ctr[0].event, ctr[0].unit_mask);
-
- reset_value[0] = 0xFFFFFFFF - ctr[0].count;
-
- /* global, used by cell_cpu_setup */
- ctr_enabled |= 1;
-
- /* Initialize the count for each SPU to the reset value */
- for (i=0; i < MAX_NUMNODES * NUM_SPUS_PER_NODE; i++)
- spu_pm_cnt[i] = reset_value[0];
-
- return 0;
-}
-
-static int cell_reg_setup_ppu(struct op_counter_config *ctr,
- struct op_system_config *sys, int num_ctrs)
-{
- /* routine is called once for all nodes */
- int i, j, cpu;
-
- num_counters = num_ctrs;
-
- if (unlikely(num_ctrs > NR_PHYS_CTRS)) {
- printk(KERN_ERR
- "%s: Oprofile, number of specified events " \
- "exceeds number of physical counters\n",
- __func__);
- return -EIO;
- }
-
- set_count_mode(sys->enable_kernel, sys->enable_user);
-
- /* Setup the thread 0 events */
- for (i = 0; i < num_ctrs; ++i) {
-
- pmc_cntrl[0][i].evnts = ctr[i].event;
- pmc_cntrl[0][i].masks = ctr[i].unit_mask;
- pmc_cntrl[0][i].enabled = ctr[i].enabled;
- pmc_cntrl[0][i].vcntr = i;
-
- for_each_possible_cpu(j)
- per_cpu(pmc_values, j)[i] = 0;
- }
-
- /*
- * Setup the thread 1 events, map the thread 0 event to the
- * equivalent thread 1 event.
- */
- for (i = 0; i < num_ctrs; ++i) {
- if ((ctr[i].event >= 2100) && (ctr[i].event <= 2111))
- pmc_cntrl[1][i].evnts = ctr[i].event + 19;
- else if (ctr[i].event == 2203)
- pmc_cntrl[1][i].evnts = ctr[i].event;
- else if ((ctr[i].event >= 2200) && (ctr[i].event <= 2215))
- pmc_cntrl[1][i].evnts = ctr[i].event + 16;
- else
- pmc_cntrl[1][i].evnts = ctr[i].event;
-
- pmc_cntrl[1][i].masks = ctr[i].unit_mask;
- pmc_cntrl[1][i].enabled = ctr[i].enabled;
- pmc_cntrl[1][i].vcntr = i;
- }
-
- for (i = 0; i < NUM_INPUT_BUS_WORDS; i++)
- input_bus[i] = 0xff;
-
- /*
- * Our counters count up, and "count" refers to
- * how much before the next interrupt, and we interrupt
- * on overflow. So we calculate the starting value
- * which will give us "count" until overflow.
- * Then we set the events on the enabled counters.
- */
- for (i = 0; i < num_counters; ++i) {
- /* start with virtual counter set 0 */
- if (pmc_cntrl[0][i].enabled) {
- /* Using 32bit counters, reset max - count */
- reset_value[i] = 0xFFFFFFFF - ctr[i].count;
- set_pm_event(i,
- pmc_cntrl[0][i].evnts,
- pmc_cntrl[0][i].masks);
-
- /* global, used by cell_cpu_setup */
- ctr_enabled |= (1 << i);
- }
- }
-
- /* initialize the previous counts for the virtual cntrs */
- for_each_online_cpu(cpu)
- for (i = 0; i < num_counters; ++i) {
- per_cpu(pmc_values, cpu)[i] = reset_value[i];
- }
-
- return 0;
-}
-
-
-/* This function is called once for all cpus combined */
-static int cell_reg_setup(struct op_counter_config *ctr,
- struct op_system_config *sys, int num_ctrs)
-{
- int ret=0;
- spu_cycle_reset = 0;
-
- /* initialize the spu_arr_trace value, will be reset if
- * doing spu event profiling.
- */
- pm_regs.group_control = 0;
- pm_regs.debug_bus_control = 0;
- pm_regs.pm_cntrl.stop_at_max = 1;
- pm_regs.pm_cntrl.trace_mode = 0;
- pm_regs.pm_cntrl.freeze = 1;
- pm_regs.pm_cntrl.trace_buf_ovflw = 0;
- pm_regs.pm_cntrl.spu_addr_trace = 0;
-
- /*
- * For all events except PPU CYCLEs, each node will need to make
- * the rtas cbe-perftools call to setup and reset the debug bus.
- * Make the token lookup call once and store it in the global
- * variable pm_rtas_token.
- */
- pm_rtas_token = rtas_token("ibm,cbe-perftools");
-
- if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) {
- printk(KERN_ERR
- "%s: rtas token ibm,cbe-perftools unknown\n",
- __func__);
- return -EIO;
- }
-
- if (ctr[0].event == SPU_CYCLES_EVENT_NUM) {
- profiling_mode = SPU_PROFILING_CYCLES;
- ret = cell_reg_setup_spu_cycles(ctr, sys, num_ctrs);
- } else if ((ctr[0].event >= SPU_EVENT_NUM_START) &&
- (ctr[0].event <= SPU_EVENT_NUM_STOP)) {
- profiling_mode = SPU_PROFILING_EVENTS;
- spu_cycle_reset = ctr[0].count;
-
- /* for SPU event profiling, need to setup the
- * pm_signal array with the events to route the
- * SPU PC before making the FW call. Note, only
- * one SPU event for profiling can be specified
- * at a time.
- */
- cell_reg_setup_spu_events(ctr, sys, num_ctrs);
- } else {
- profiling_mode = PPU_PROFILING;
- ret = cell_reg_setup_ppu(ctr, sys, num_ctrs);
- }
-
- return ret;
-}
-
-
-
-/* This function is called once for each cpu */
-static int cell_cpu_setup(struct op_counter_config *cntr)
-{
- u32 cpu = smp_processor_id();
- u32 num_enabled = 0;
- int i;
- int ret;
-
- /* Cycle based SPU profiling does not use the performance
- * counters. The trace array is configured to collect
- * the data.
- */
- if (profiling_mode == SPU_PROFILING_CYCLES)
- return 0;
-
- /* There is one performance monitor per processor chip (i.e. node),
- * so we only need to perform this function once per node.
- */
- if (cbe_get_hw_thread_id(cpu))
- return 0;
-
- /* Stop all counters */
- cbe_disable_pm(cpu);
- cbe_disable_pm_interrupts(cpu);
-
- cbe_write_pm(cpu, pm_start_stop, 0);
- cbe_write_pm(cpu, group_control, pm_regs.group_control);
- cbe_write_pm(cpu, debug_bus_control, pm_regs.debug_bus_control);
- write_pm_cntrl(cpu);
-
- for (i = 0; i < num_counters; ++i) {
- if (ctr_enabled & (1 << i)) {
- pm_signal[num_enabled].cpu = cbe_cpu_to_node(cpu);
- num_enabled++;
- }
- }
-
- /*
- * The pm_rtas_activate_signals will return -EIO if the FW
- * call failed.
- */
- if (profiling_mode == SPU_PROFILING_EVENTS) {
- /* For SPU event profiling also need to setup the
- * pm interval timer
- */
- ret = pm_rtas_activate_signals(cbe_cpu_to_node(cpu),
- num_enabled+2);
- /* store PC from debug bus to Trace buffer as often
- * as possible (every 10 cycles)
- */
- cbe_write_pm(cpu, pm_interval, NUM_INTERVAL_CYC);
- return ret;
- } else
- return pm_rtas_activate_signals(cbe_cpu_to_node(cpu),
- num_enabled);
-}
-
-#define ENTRIES 303
-#define MAXLFSR 0xFFFFFF
-
-/* precomputed table of 24 bit LFSR values */
-static int initial_lfsr[] = {
- 8221349, 12579195, 5379618, 10097839, 7512963, 7519310, 3955098, 10753424,
- 15507573, 7458917, 285419, 2641121, 9780088, 3915503, 6668768, 1548716,
- 4885000, 8774424, 9650099, 2044357, 2304411, 9326253, 10332526, 4421547,
- 3440748, 10179459, 13332843, 10375561, 1313462, 8375100, 5198480, 6071392,
- 9341783, 1526887, 3985002, 1439429, 13923762, 7010104, 11969769, 4547026,
- 2040072, 4025602, 3437678, 7939992, 11444177, 4496094, 9803157, 10745556,
- 3671780, 4257846, 5662259, 13196905, 3237343, 12077182, 16222879, 7587769,
- 14706824, 2184640, 12591135, 10420257, 7406075, 3648978, 11042541, 15906893,
- 11914928, 4732944, 10695697, 12928164, 11980531, 4430912, 11939291, 2917017,
- 6119256, 4172004, 9373765, 8410071, 14788383, 5047459, 5474428, 1737756,
- 15967514, 13351758, 6691285, 8034329, 2856544, 14394753, 11310160, 12149558,
- 7487528, 7542781, 15668898, 12525138, 12790975, 3707933, 9106617, 1965401,
- 16219109, 12801644, 2443203, 4909502, 8762329, 3120803, 6360315, 9309720,
- 15164599, 10844842, 4456529, 6667610, 14924259, 884312, 6234963, 3326042,
- 15973422, 13919464, 5272099, 6414643, 3909029, 2764324, 5237926, 4774955,
- 10445906, 4955302, 5203726, 10798229, 11443419, 2303395, 333836, 9646934,
- 3464726, 4159182, 568492, 995747, 10318756, 13299332, 4836017, 8237783,
- 3878992, 2581665, 11394667, 5672745, 14412947, 3159169, 9094251, 16467278,
- 8671392, 15230076, 4843545, 7009238, 15504095, 1494895, 9627886, 14485051,
- 8304291, 252817, 12421642, 16085736, 4774072, 2456177, 4160695, 15409741,
- 4902868, 5793091, 13162925, 16039714, 782255, 11347835, 14884586, 366972,
- 16308990, 11913488, 13390465, 2958444, 10340278, 1177858, 1319431, 10426302,
- 2868597, 126119, 5784857, 5245324, 10903900, 16436004, 3389013, 1742384,
- 14674502, 10279218, 8536112, 10364279, 6877778, 14051163, 1025130, 6072469,
- 1988305, 8354440, 8216060, 16342977, 13112639, 3976679, 5913576, 8816697,
- 6879995, 14043764, 3339515, 9364420, 15808858, 12261651, 2141560, 5636398,
- 10345425, 10414756, 781725, 6155650, 4746914, 5078683, 7469001, 6799140,
- 10156444, 9667150, 10116470, 4133858, 2121972, 1124204, 1003577, 1611214,
- 14304602, 16221850, 13878465, 13577744, 3629235, 8772583, 10881308, 2410386,
- 7300044, 5378855, 9301235, 12755149, 4977682, 8083074, 10327581, 6395087,
- 9155434, 15501696, 7514362, 14520507, 15808945, 3244584, 4741962, 9658130,
- 14336147, 8654727, 7969093, 15759799, 14029445, 5038459, 9894848, 8659300,
- 13699287, 8834306, 10712885, 14753895, 10410465, 3373251, 309501, 9561475,
- 5526688, 14647426, 14209836, 5339224, 207299, 14069911, 8722990, 2290950,
- 3258216, 12505185, 6007317, 9218111, 14661019, 10537428, 11731949, 9027003,
- 6641507, 9490160, 200241, 9720425, 16277895, 10816638, 1554761, 10431375,
- 7467528, 6790302, 3429078, 14633753, 14428997, 11463204, 3576212, 2003426,
- 6123687, 820520, 9992513, 15784513, 5778891, 6428165, 8388607
-};
-
-/*
- * The hardware uses an LFSR counting sequence to determine when to capture
- * the SPU PCs. An LFSR sequence is like a puesdo random number sequence
- * where each number occurs once in the sequence but the sequence is not in
- * numerical order. The SPU PC capture is done when the LFSR sequence reaches
- * the last value in the sequence. Hence the user specified value N
- * corresponds to the LFSR number that is N from the end of the sequence.
- *
- * To avoid the time to compute the LFSR, a lookup table is used. The 24 bit
- * LFSR sequence is broken into four ranges. The spacing of the precomputed
- * values is adjusted in each range so the error between the user specifed
- * number (N) of events between samples and the actual number of events based
- * on the precomputed value will be les then about 6.2%. Note, if the user
- * specifies N < 2^16, the LFSR value that is 2^16 from the end will be used.
- * This is to prevent the loss of samples because the trace buffer is full.
- *
- * User specified N Step between Index in
- * precomputed values precomputed
- * table
- * 0 to 2^16-1 ---- 0
- * 2^16 to 2^16+2^19-1 2^12 1 to 128
- * 2^16+2^19 to 2^16+2^19+2^22-1 2^15 129 to 256
- * 2^16+2^19+2^22 to 2^24-1 2^18 257 to 302
- *
- *
- * For example, the LFSR values in the second range are computed for 2^16,
- * 2^16+2^12, ... , 2^19-2^16, 2^19 and stored in the table at indicies
- * 1, 2,..., 127, 128.
- *
- * The 24 bit LFSR value for the nth number in the sequence can be
- * calculated using the following code:
- *
- * #define size 24
- * int calculate_lfsr(int n)
- * {
- * int i;
- * unsigned int newlfsr0;
- * unsigned int lfsr = 0xFFFFFF;
- * unsigned int howmany = n;
- *
- * for (i = 2; i < howmany + 2; i++) {
- * newlfsr0 = (((lfsr >> (size - 1 - 0)) & 1) ^
- * ((lfsr >> (size - 1 - 1)) & 1) ^
- * (((lfsr >> (size - 1 - 6)) & 1) ^
- * ((lfsr >> (size - 1 - 23)) & 1)));
- *
- * lfsr >>= 1;
- * lfsr = lfsr | (newlfsr0 << (size - 1));
- * }
- * return lfsr;
- * }
- */
-
-#define V2_16 (0x1 << 16)
-#define V2_19 (0x1 << 19)
-#define V2_22 (0x1 << 22)
-
-static int calculate_lfsr(int n)
-{
- /*
- * The ranges and steps are in powers of 2 so the calculations
- * can be done using shifts rather then divide.
- */
- int index;
-
- if ((n >> 16) == 0)
- index = 0;
- else if (((n - V2_16) >> 19) == 0)
- index = ((n - V2_16) >> 12) + 1;
- else if (((n - V2_16 - V2_19) >> 22) == 0)
- index = ((n - V2_16 - V2_19) >> 15 ) + 1 + 128;
- else if (((n - V2_16 - V2_19 - V2_22) >> 24) == 0)
- index = ((n - V2_16 - V2_19 - V2_22) >> 18 ) + 1 + 256;
- else
- index = ENTRIES-1;
-
- /* make sure index is valid */
- if ((index >= ENTRIES) || (index < 0))
- index = ENTRIES-1;
-
- return initial_lfsr[index];
-}
-
-static int pm_rtas_activate_spu_profiling(u32 node)
-{
- int ret, i;
- struct pm_signal pm_signal_local[NUM_SPUS_PER_NODE];
-
- /*
- * Set up the rtas call to configure the debug bus to
- * route the SPU PCs. Setup the pm_signal for each SPU
- */
- for (i = 0; i < ARRAY_SIZE(pm_signal_local); i++) {
- pm_signal_local[i].cpu = node;
- pm_signal_local[i].signal_group = 41;
- /* spu i on word (i/2) */
- pm_signal_local[i].bus_word = 1 << i / 2;
- /* spu i */
- pm_signal_local[i].sub_unit = i;
- pm_signal_local[i].bit = 63;
- }
-
- ret = rtas_ibm_cbe_perftools(SUBFUNC_ACTIVATE,
- PASSTHRU_ENABLE, pm_signal_local,
- (ARRAY_SIZE(pm_signal_local)
- * sizeof(struct pm_signal)));
-
- if (unlikely(ret)) {
- printk(KERN_WARNING "%s: rtas returned: %d\n",
- __func__, ret);
- return -EIO;
- }
-
- return 0;
-}
-
-#ifdef CONFIG_CPU_FREQ
-static int
-oprof_cpufreq_notify(struct notifier_block *nb, unsigned long val, void *data)
-{
- int ret = 0;
- struct cpufreq_freqs *frq = data;
- if ((val == CPUFREQ_PRECHANGE && frq->old < frq->new) ||
- (val == CPUFREQ_POSTCHANGE && frq->old > frq->new))
- set_spu_profiling_frequency(frq->new, spu_cycle_reset);
- return ret;
-}
-
-static struct notifier_block cpu_freq_notifier_block = {
- .notifier_call = oprof_cpufreq_notify
-};
-#endif
-
-/*
- * Note the generic OProfile stop calls do not support returning
- * an error on stop. Hence, will not return an error if the FW
- * calls fail on stop. Failure to reset the debug bus is not an issue.
- * Failure to disable the SPU profiling is not an issue. The FW calls
- * to enable the performance counters and debug bus will work even if
- * the hardware was not cleanly reset.
- */
-static void cell_global_stop_spu_cycles(void)
-{
- int subfunc, rtn_value;
- unsigned int lfsr_value;
- int cpu;
-
- oprofile_running = 0;
- smp_wmb();
-
-#ifdef CONFIG_CPU_FREQ
- cpufreq_unregister_notifier(&cpu_freq_notifier_block,
- CPUFREQ_TRANSITION_NOTIFIER);
-#endif
-
- for_each_online_cpu(cpu) {
- if (cbe_get_hw_thread_id(cpu))
- continue;
-
- subfunc = 3; /*
- * 2 - activate SPU tracing,
- * 3 - deactivate
- */
- lfsr_value = 0x8f100000;
-
- rtn_value = rtas_call(spu_rtas_token, 3, 1, NULL,
- subfunc, cbe_cpu_to_node(cpu),
- lfsr_value);
-
- if (unlikely(rtn_value != 0)) {
- printk(KERN_ERR
- "%s: rtas call ibm,cbe-spu-perftools " \
- "failed, return = %d\n",
- __func__, rtn_value);
- }
-
- /* Deactivate the signals */
- pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
- }
-
- stop_spu_profiling_cycles();
-}
-
-static void cell_global_stop_spu_events(void)
-{
- int cpu;
- oprofile_running = 0;
-
- stop_spu_profiling_events();
- smp_wmb();
-
- for_each_online_cpu(cpu) {
- if (cbe_get_hw_thread_id(cpu))
- continue;
-
- cbe_sync_irq(cbe_cpu_to_node(cpu));
- /* Stop the counters */
- cbe_disable_pm(cpu);
- cbe_write_pm07_control(cpu, 0, 0);
-
- /* Deactivate the signals */
- pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
-
- /* Deactivate interrupts */
- cbe_disable_pm_interrupts(cpu);
- }
- del_timer_sync(&timer_spu_event_swap);
-}
-
-static void cell_global_stop_ppu(void)
-{
- int cpu;
-
- /*
- * This routine will be called once for the system.
- * There is one performance monitor per node, so we
- * only need to perform this function once per node.
- */
- del_timer_sync(&timer_virt_cntr);
- oprofile_running = 0;
- smp_wmb();
-
- for_each_online_cpu(cpu) {
- if (cbe_get_hw_thread_id(cpu))
- continue;
-
- cbe_sync_irq(cbe_cpu_to_node(cpu));
- /* Stop the counters */
- cbe_disable_pm(cpu);
-
- /* Deactivate the signals */
- pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
-
- /* Deactivate interrupts */
- cbe_disable_pm_interrupts(cpu);
- }
-}
-
-static void cell_global_stop(void)
-{
- if (profiling_mode == PPU_PROFILING)
- cell_global_stop_ppu();
- else if (profiling_mode == SPU_PROFILING_EVENTS)
- cell_global_stop_spu_events();
- else
- cell_global_stop_spu_cycles();
-}
-
-static int cell_global_start_spu_cycles(struct op_counter_config *ctr)
-{
- int subfunc;
- unsigned int lfsr_value;
- int cpu;
- int ret;
- int rtas_error;
- unsigned int cpu_khzfreq = 0;
-
- /* The SPU profiling uses time-based profiling based on
- * cpu frequency, so if configured with the CPU_FREQ
- * option, we should detect frequency changes and react
- * accordingly.
- */
-#ifdef CONFIG_CPU_FREQ
- ret = cpufreq_register_notifier(&cpu_freq_notifier_block,
- CPUFREQ_TRANSITION_NOTIFIER);
- if (ret < 0)
- /* this is not a fatal error */
- printk(KERN_ERR "CPU freq change registration failed: %d\n",
- ret);
-
- else
- cpu_khzfreq = cpufreq_quick_get(smp_processor_id());
-#endif
-
- set_spu_profiling_frequency(cpu_khzfreq, spu_cycle_reset);
-
- for_each_online_cpu(cpu) {
- if (cbe_get_hw_thread_id(cpu))
- continue;
-
- /*
- * Setup SPU cycle-based profiling.
- * Set perf_mon_control bit 0 to a zero before
- * enabling spu collection hardware.
- */
- cbe_write_pm(cpu, pm_control, 0);
-
- if (spu_cycle_reset > MAX_SPU_COUNT)
- /* use largest possible value */
- lfsr_value = calculate_lfsr(MAX_SPU_COUNT-1);
- else
- lfsr_value = calculate_lfsr(spu_cycle_reset);
-
- /* must use a non zero value. Zero disables data collection. */
- if (lfsr_value == 0)
- lfsr_value = calculate_lfsr(1);
-
- lfsr_value = lfsr_value << 8; /* shift lfsr to correct
- * register location
- */
-
- /* debug bus setup */
- ret = pm_rtas_activate_spu_profiling(cbe_cpu_to_node(cpu));
-
- if (unlikely(ret)) {
- rtas_error = ret;
- goto out;
- }
-
-
- subfunc = 2; /* 2 - activate SPU tracing, 3 - deactivate */
-
- /* start profiling */
- ret = rtas_call(spu_rtas_token, 3, 1, NULL, subfunc,
- cbe_cpu_to_node(cpu), lfsr_value);
-
- if (unlikely(ret != 0)) {
- printk(KERN_ERR
- "%s: rtas call ibm,cbe-spu-perftools failed, " \
- "return = %d\n", __func__, ret);
- rtas_error = -EIO;
- goto out;
- }
- }
-
- rtas_error = start_spu_profiling_cycles(spu_cycle_reset);
- if (rtas_error)
- goto out_stop;
-
- oprofile_running = 1;
- return 0;
-
-out_stop:
- cell_global_stop_spu_cycles(); /* clean up the PMU/debug bus */
-out:
- return rtas_error;
-}
-
-static int cell_global_start_spu_events(struct op_counter_config *ctr)
-{
- int cpu;
- u32 interrupt_mask = 0;
- int rtn = 0;
-
- hdw_thread = 0;
-
- /* spu event profiling, uses the performance counters to generate
- * an interrupt. The hardware is setup to store the SPU program
- * counter into the trace array. The occurrence mode is used to
- * enable storing data to the trace buffer. The bits are set
- * to send/store the SPU address in the trace buffer. The debug
- * bus must be setup to route the SPU program counter onto the
- * debug bus. The occurrence data in the trace buffer is not used.
- */
-
- /* This routine gets called once for the system.
- * There is one performance monitor per node, so we
- * only need to perform this function once per node.
- */
-
- for_each_online_cpu(cpu) {
- if (cbe_get_hw_thread_id(cpu))
- continue;
-
- /*
- * Setup SPU event-based profiling.
- * Set perf_mon_control bit 0 to a zero before
- * enabling spu collection hardware.
- *
- * Only support one SPU event on one SPU per node.
- */
- if (ctr_enabled & 1) {
- cbe_write_ctr(cpu, 0, reset_value[0]);
- enable_ctr(cpu, 0, pm_regs.pm07_cntrl);
- interrupt_mask |=
- CBE_PM_CTR_OVERFLOW_INTR(0);
- } else {
- /* Disable counter */
- cbe_write_pm07_control(cpu, 0, 0);
- }
-
- cbe_get_and_clear_pm_interrupts(cpu);
- cbe_enable_pm_interrupts(cpu, hdw_thread, interrupt_mask);
- cbe_enable_pm(cpu);
-
- /* clear the trace buffer */
- cbe_write_pm(cpu, trace_address, 0);
- }
-
- /* Start the timer to time slice collecting the event profile
- * on each of the SPUs. Note, can collect profile on one SPU
- * per node at a time.
- */
- start_spu_event_swap();
- start_spu_profiling_events();
- oprofile_running = 1;
- smp_wmb();
-
- return rtn;
-}
-
-static int cell_global_start_ppu(struct op_counter_config *ctr)
-{
- u32 cpu, i;
- u32 interrupt_mask = 0;
-
- /* This routine gets called once for the system.
- * There is one performance monitor per node, so we
- * only need to perform this function once per node.
- */
- for_each_online_cpu(cpu) {
- if (cbe_get_hw_thread_id(cpu))
- continue;
-
- interrupt_mask = 0;
-
- for (i = 0; i < num_counters; ++i) {
- if (ctr_enabled & (1 << i)) {
- cbe_write_ctr(cpu, i, reset_value[i]);
- enable_ctr(cpu, i, pm_regs.pm07_cntrl);
- interrupt_mask |= CBE_PM_CTR_OVERFLOW_INTR(i);
- } else {
- /* Disable counter */
- cbe_write_pm07_control(cpu, i, 0);
- }
- }
-
- cbe_get_and_clear_pm_interrupts(cpu);
- cbe_enable_pm_interrupts(cpu, hdw_thread, interrupt_mask);
- cbe_enable_pm(cpu);
- }
-
- virt_cntr_inter_mask = interrupt_mask;
- oprofile_running = 1;
- smp_wmb();
-
- /*
- * NOTE: start_virt_cntrs will result in cell_virtual_cntr() being
- * executed which manipulates the PMU. We start the "virtual counter"
- * here so that we do not need to synchronize access to the PMU in
- * the above for-loop.
- */
- start_virt_cntrs();
-
- return 0;
-}
-
-static int cell_global_start(struct op_counter_config *ctr)
-{
- if (profiling_mode == SPU_PROFILING_CYCLES)
- return cell_global_start_spu_cycles(ctr);
- else if (profiling_mode == SPU_PROFILING_EVENTS)
- return cell_global_start_spu_events(ctr);
- else
- return cell_global_start_ppu(ctr);
-}
-
-
-/* The SPU interrupt handler
- *
- * SPU event profiling works as follows:
- * The pm_signal[0] holds the one SPU event to be measured. It is routed on
- * the debug bus using word 0 or 1. The value of pm_signal[1] and
- * pm_signal[2] contain the necessary events to route the SPU program
- * counter for the selected SPU onto the debug bus using words 2 and 3.
- * The pm_interval register is setup to write the SPU PC value into the
- * trace buffer at the maximum rate possible. The trace buffer is configured
- * to store the PCs, wrapping when it is full. The performance counter is
- * initialized to the max hardware count minus the number of events, N, between
- * samples. Once the N events have occurred, a HW counter overflow occurs
- * causing the generation of a HW counter interrupt which also stops the
- * writing of the SPU PC values to the trace buffer. Hence the last PC
- * written to the trace buffer is the SPU PC that we want. Unfortunately,
- * we have to read from the beginning of the trace buffer to get to the
- * last value written. We just hope the PPU has nothing better to do then
- * service this interrupt. The PC for the specific SPU being profiled is
- * extracted from the trace buffer processed and stored. The trace buffer
- * is cleared, interrupts are cleared, the counter is reset to max - N.
- * A kernel timer is used to periodically call the routine spu_evnt_swap()
- * to switch to the next physical SPU in the node to profile in round robbin
- * order. This way data is collected for all SPUs on the node. It does mean
- * that we need to use a relatively small value of N to ensure enough samples
- * on each SPU are collected each SPU is being profiled 1/8 of the time.
- * It may also be necessary to use a longer sample collection period.
- */
-static void cell_handle_interrupt_spu(struct pt_regs *regs,
- struct op_counter_config *ctr)
-{
- u32 cpu, cpu_tmp;
- u64 trace_entry;
- u32 interrupt_mask;
- u64 trace_buffer[2];
- u64 last_trace_buffer;
- u32 sample;
- u32 trace_addr;
- unsigned long sample_array_lock_flags;
- int spu_num;
- unsigned long flags;
-
- /* Make sure spu event interrupt handler and spu event swap
- * don't access the counters simultaneously.
- */
- cpu = smp_processor_id();
- spin_lock_irqsave(&cntr_lock, flags);
-
- cpu_tmp = cpu;
- cbe_disable_pm(cpu);
-
- interrupt_mask = cbe_get_and_clear_pm_interrupts(cpu);
-
- sample = 0xABCDEF;
- trace_entry = 0xfedcba;
- last_trace_buffer = 0xdeadbeaf;
-
- if ((oprofile_running == 1) && (interrupt_mask != 0)) {
- /* disable writes to trace buff */
- cbe_write_pm(cpu, pm_interval, 0);
-
- /* only have one perf cntr being used, cntr 0 */
- if ((interrupt_mask & CBE_PM_CTR_OVERFLOW_INTR(0))
- && ctr[0].enabled)
- /* The SPU PC values will be read
- * from the trace buffer, reset counter
- */
-
- cbe_write_ctr(cpu, 0, reset_value[0]);
-
- trace_addr = cbe_read_pm(cpu, trace_address);
-
- while (!(trace_addr & CBE_PM_TRACE_BUF_EMPTY)) {
- /* There is data in the trace buffer to process
- * Read the buffer until you get to the last
- * entry. This is the value we want.
- */
-
- cbe_read_trace_buffer(cpu, trace_buffer);
- trace_addr = cbe_read_pm(cpu, trace_address);
- }
-
- /* SPU Address 16 bit count format for 128 bit
- * HW trace buffer is used for the SPU PC storage
- * HDR bits 0:15
- * SPU Addr 0 bits 16:31
- * SPU Addr 1 bits 32:47
- * unused bits 48:127
- *
- * HDR: bit4 = 1 SPU Address 0 valid
- * HDR: bit5 = 1 SPU Address 1 valid
- * - unfortunately, the valid bits don't seem to work
- *
- * Note trace_buffer[0] holds bits 0:63 of the HW
- * trace buffer, trace_buffer[1] holds bits 64:127
- */
-
- trace_entry = trace_buffer[0]
- & 0x00000000FFFF0000;
-
- /* only top 16 of the 18 bit SPU PC address
- * is stored in trace buffer, hence shift right
- * by 16 -2 bits */
- sample = trace_entry >> 14;
- last_trace_buffer = trace_buffer[0];
-
- spu_num = spu_evnt_phys_spu_indx
- + (cbe_cpu_to_node(cpu) * NUM_SPUS_PER_NODE);
-
- /* make sure only one process at a time is calling
- * spu_sync_buffer()
- */
- spin_lock_irqsave(&oprof_spu_smpl_arry_lck,
- sample_array_lock_flags);
- spu_sync_buffer(spu_num, &sample, 1);
- spin_unlock_irqrestore(&oprof_spu_smpl_arry_lck,
- sample_array_lock_flags);
-
- smp_wmb(); /* insure spu event buffer updates are written
- * don't want events intermingled... */
-
- /* The counters were frozen by the interrupt.
- * Reenable the interrupt and restart the counters.
- */
- cbe_write_pm(cpu, pm_interval, NUM_INTERVAL_CYC);
- cbe_enable_pm_interrupts(cpu, hdw_thread,
- virt_cntr_inter_mask);
-
- /* clear the trace buffer, re-enable writes to trace buff */
- cbe_write_pm(cpu, trace_address, 0);
- cbe_write_pm(cpu, pm_interval, NUM_INTERVAL_CYC);
-
- /* The writes to the various performance counters only writes
- * to a latch. The new values (interrupt setting bits, reset
- * counter value etc.) are not copied to the actual registers
- * until the performance monitor is enabled. In order to get
- * this to work as desired, the performance monitor needs to
- * be disabled while writing to the latches. This is a
- * HW design issue.
- */
- write_pm_cntrl(cpu);
- cbe_enable_pm(cpu);
- }
- spin_unlock_irqrestore(&cntr_lock, flags);
-}
-
-static void cell_handle_interrupt_ppu(struct pt_regs *regs,
- struct op_counter_config *ctr)
-{
- u32 cpu;
- u64 pc;
- int is_kernel;
- unsigned long flags = 0;
- u32 interrupt_mask;
- int i;
-
- cpu = smp_processor_id();
-
- /*
- * Need to make sure the interrupt handler and the virt counter
- * routine are not running at the same time. See the
- * cell_virtual_cntr() routine for additional comments.
- */
- spin_lock_irqsave(&cntr_lock, flags);
-
- /*
- * Need to disable and reenable the performance counters
- * to get the desired behavior from the hardware. This
- * is hardware specific.
- */
-
- cbe_disable_pm(cpu);
-
- interrupt_mask = cbe_get_and_clear_pm_interrupts(cpu);
-
- /*
- * If the interrupt mask has been cleared, then the virt cntr
- * has cleared the interrupt. When the thread that generated
- * the interrupt is restored, the data count will be restored to
- * 0xffffff0 to cause the interrupt to be regenerated.
- */
-
- if ((oprofile_running == 1) && (interrupt_mask != 0)) {
- pc = regs->nip;
- is_kernel = is_kernel_addr(pc);
-
- for (i = 0; i < num_counters; ++i) {
- if ((interrupt_mask & CBE_PM_CTR_OVERFLOW_INTR(i))
- && ctr[i].enabled) {
- oprofile_add_ext_sample(pc, regs, i, is_kernel);
- cbe_write_ctr(cpu, i, reset_value[i]);
- }
- }
-
- /*
- * The counters were frozen by the interrupt.
- * Reenable the interrupt and restart the counters.
- * If there was a race between the interrupt handler and
- * the virtual counter routine. The virtual counter
- * routine may have cleared the interrupts. Hence must
- * use the virt_cntr_inter_mask to re-enable the interrupts.
- */
- cbe_enable_pm_interrupts(cpu, hdw_thread,
- virt_cntr_inter_mask);
-
- /*
- * The writes to the various performance counters only writes
- * to a latch. The new values (interrupt setting bits, reset
- * counter value etc.) are not copied to the actual registers
- * until the performance monitor is enabled. In order to get
- * this to work as desired, the performance monitor needs to
- * be disabled while writing to the latches. This is a
- * HW design issue.
- */
- cbe_enable_pm(cpu);
- }
- spin_unlock_irqrestore(&cntr_lock, flags);
-}
-
-static void cell_handle_interrupt(struct pt_regs *regs,
- struct op_counter_config *ctr)
-{
- if (profiling_mode == PPU_PROFILING)
- cell_handle_interrupt_ppu(regs, ctr);
- else
- cell_handle_interrupt_spu(regs, ctr);
-}
-
-/*
- * This function is called from the generic OProfile
- * driver. When profiling PPUs, we need to do the
- * generic sync start; otherwise, do spu_sync_start.
- */
-static int cell_sync_start(void)
-{
- if ((profiling_mode == SPU_PROFILING_CYCLES) ||
- (profiling_mode == SPU_PROFILING_EVENTS))
- return spu_sync_start();
- else
- return DO_GENERIC_SYNC;
-}
-
-static int cell_sync_stop(void)
-{
- if ((profiling_mode == SPU_PROFILING_CYCLES) ||
- (profiling_mode == SPU_PROFILING_EVENTS))
- return spu_sync_stop();
- else
- return 1;
-}
-
-struct op_powerpc_model op_model_cell = {
- .reg_setup = cell_reg_setup,
- .cpu_setup = cell_cpu_setup,
- .global_start = cell_global_start,
- .global_stop = cell_global_stop,
- .sync_start = cell_sync_start,
- .sync_stop = cell_sync_stop,
- .handle_interrupt = cell_handle_interrupt,
-};
diff --git a/arch/powerpc/oprofile/op_model_fsl_emb.c b/arch/powerpc/oprofile/op_model_fsl_emb.c
deleted file mode 100644
index 14cf86fdddab..000000000000
--- a/arch/powerpc/oprofile/op_model_fsl_emb.c
+++ /dev/null
@@ -1,384 +0,0 @@
-/*
- * Freescale Embedded oprofile support, based on ppc64 oprofile support
- * Copyright (C) 2004 Anton Blanchard <anton@au.ibm.com>, IBM
- *
- * Copyright (c) 2004, 2010 Freescale Semiconductor, Inc
- *
- * Author: Andy Fleming
- * Maintainer: Kumar Gala <galak@kernel.crashing.org>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/oprofile.h>
-#include <linux/smp.h>
-#include <asm/ptrace.h>
-#include <asm/processor.h>
-#include <asm/cputable.h>
-#include <asm/reg_fsl_emb.h>
-#include <asm/page.h>
-#include <asm/pmc.h>
-#include <asm/oprofile_impl.h>
-
-static unsigned long reset_value[OP_MAX_COUNTER];
-
-static int num_counters;
-static int oprofile_running;
-
-static inline u32 get_pmlca(int ctr)
-{
- u32 pmlca;
-
- switch (ctr) {
- case 0:
- pmlca = mfpmr(PMRN_PMLCA0);
- break;
- case 1:
- pmlca = mfpmr(PMRN_PMLCA1);
- break;
- case 2:
- pmlca = mfpmr(PMRN_PMLCA2);
- break;
- case 3:
- pmlca = mfpmr(PMRN_PMLCA3);
- break;
- case 4:
- pmlca = mfpmr(PMRN_PMLCA4);
- break;
- case 5:
- pmlca = mfpmr(PMRN_PMLCA5);
- break;
- default:
- panic("Bad ctr number\n");
- }
-
- return pmlca;
-}
-
-static inline void set_pmlca(int ctr, u32 pmlca)
-{
- switch (ctr) {
- case 0:
- mtpmr(PMRN_PMLCA0, pmlca);
- break;
- case 1:
- mtpmr(PMRN_PMLCA1, pmlca);
- break;
- case 2:
- mtpmr(PMRN_PMLCA2, pmlca);
- break;
- case 3:
- mtpmr(PMRN_PMLCA3, pmlca);
- break;
- case 4:
- mtpmr(PMRN_PMLCA4, pmlca);
- break;
- case 5:
- mtpmr(PMRN_PMLCA5, pmlca);
- break;
- default:
- panic("Bad ctr number\n");
- }
-}
-
-static inline unsigned int ctr_read(unsigned int i)
-{
- switch(i) {
- case 0:
- return mfpmr(PMRN_PMC0);
- case 1:
- return mfpmr(PMRN_PMC1);
- case 2:
- return mfpmr(PMRN_PMC2);
- case 3:
- return mfpmr(PMRN_PMC3);
- case 4:
- return mfpmr(PMRN_PMC4);
- case 5:
- return mfpmr(PMRN_PMC5);
- default:
- return 0;
- }
-}
-
-static inline void ctr_write(unsigned int i, unsigned int val)
-{
- switch(i) {
- case 0:
- mtpmr(PMRN_PMC0, val);
- break;
- case 1:
- mtpmr(PMRN_PMC1, val);
- break;
- case 2:
- mtpmr(PMRN_PMC2, val);
- break;
- case 3:
- mtpmr(PMRN_PMC3, val);
- break;
- case 4:
- mtpmr(PMRN_PMC4, val);
- break;
- case 5:
- mtpmr(PMRN_PMC5, val);
- break;
- default:
- break;
- }
-}
-
-
-static void init_pmc_stop(int ctr)
-{
- u32 pmlca = (PMLCA_FC | PMLCA_FCS | PMLCA_FCU |
- PMLCA_FCM1 | PMLCA_FCM0);
- u32 pmlcb = 0;
-
- switch (ctr) {
- case 0:
- mtpmr(PMRN_PMLCA0, pmlca);
- mtpmr(PMRN_PMLCB0, pmlcb);
- break;
- case 1:
- mtpmr(PMRN_PMLCA1, pmlca);
- mtpmr(PMRN_PMLCB1, pmlcb);
- break;
- case 2:
- mtpmr(PMRN_PMLCA2, pmlca);
- mtpmr(PMRN_PMLCB2, pmlcb);
- break;
- case 3:
- mtpmr(PMRN_PMLCA3, pmlca);
- mtpmr(PMRN_PMLCB3, pmlcb);
- break;
- case 4:
- mtpmr(PMRN_PMLCA4, pmlca);
- mtpmr(PMRN_PMLCB4, pmlcb);
- break;
- case 5:
- mtpmr(PMRN_PMLCA5, pmlca);
- mtpmr(PMRN_PMLCB5, pmlcb);
- break;
- default:
- panic("Bad ctr number!\n");
- }
-}
-
-static void set_pmc_event(int ctr, int event)
-{
- u32 pmlca;
-
- pmlca = get_pmlca(ctr);
-
- pmlca = (pmlca & ~PMLCA_EVENT_MASK) |
- ((event << PMLCA_EVENT_SHIFT) &
- PMLCA_EVENT_MASK);
-
- set_pmlca(ctr, pmlca);
-}
-
-static void set_pmc_user_kernel(int ctr, int user, int kernel)
-{
- u32 pmlca;
-
- pmlca = get_pmlca(ctr);
-
- if(user)
- pmlca &= ~PMLCA_FCU;
- else
- pmlca |= PMLCA_FCU;
-
- if(kernel)
- pmlca &= ~PMLCA_FCS;
- else
- pmlca |= PMLCA_FCS;
-
- set_pmlca(ctr, pmlca);
-}
-
-static void set_pmc_marked(int ctr, int mark0, int mark1)
-{
- u32 pmlca = get_pmlca(ctr);
-
- if(mark0)
- pmlca &= ~PMLCA_FCM0;
- else
- pmlca |= PMLCA_FCM0;
-
- if(mark1)
- pmlca &= ~PMLCA_FCM1;
- else
- pmlca |= PMLCA_FCM1;
-
- set_pmlca(ctr, pmlca);
-}
-
-static void pmc_start_ctr(int ctr, int enable)
-{
- u32 pmlca = get_pmlca(ctr);
-
- pmlca &= ~PMLCA_FC;
-
- if (enable)
- pmlca |= PMLCA_CE;
- else
- pmlca &= ~PMLCA_CE;
-
- set_pmlca(ctr, pmlca);
-}
-
-static void pmc_start_ctrs(int enable)
-{
- u32 pmgc0 = mfpmr(PMRN_PMGC0);
-
- pmgc0 &= ~PMGC0_FAC;
- pmgc0 |= PMGC0_FCECE;
-
- if (enable)
- pmgc0 |= PMGC0_PMIE;
- else
- pmgc0 &= ~PMGC0_PMIE;
-
- mtpmr(PMRN_PMGC0, pmgc0);
-}
-
-static void pmc_stop_ctrs(void)
-{
- u32 pmgc0 = mfpmr(PMRN_PMGC0);
-
- pmgc0 |= PMGC0_FAC;
-
- pmgc0 &= ~(PMGC0_PMIE | PMGC0_FCECE);
-
- mtpmr(PMRN_PMGC0, pmgc0);
-}
-
-static int fsl_emb_cpu_setup(struct op_counter_config *ctr)
-{
- int i;
-
- /* freeze all counters */
- pmc_stop_ctrs();
-
- for (i = 0;i < num_counters;i++) {
- init_pmc_stop(i);
-
- set_pmc_event(i, ctr[i].event);
-
- set_pmc_user_kernel(i, ctr[i].user, ctr[i].kernel);
- }
-
- return 0;
-}
-
-static int fsl_emb_reg_setup(struct op_counter_config *ctr,
- struct op_system_config *sys,
- int num_ctrs)
-{
- int i;
-
- num_counters = num_ctrs;
-
- /* Our counters count up, and "count" refers to
- * how much before the next interrupt, and we interrupt
- * on overflow. So we calculate the starting value
- * which will give us "count" until overflow.
- * Then we set the events on the enabled counters */
- for (i = 0; i < num_counters; ++i)
- reset_value[i] = 0x80000000UL - ctr[i].count;
-
- return 0;
-}
-
-static int fsl_emb_start(struct op_counter_config *ctr)
-{
- int i;
-
- mtmsr(mfmsr() | MSR_PMM);
-
- for (i = 0; i < num_counters; ++i) {
- if (ctr[i].enabled) {
- ctr_write(i, reset_value[i]);
- /* Set each enabled counter to only
- * count when the Mark bit is *not* set */
- set_pmc_marked(i, 1, 0);
- pmc_start_ctr(i, 1);
- } else {
- ctr_write(i, 0);
-
- /* Set the ctr to be stopped */
- pmc_start_ctr(i, 0);
- }
- }
-
- /* Clear the freeze bit, and enable the interrupt.
- * The counters won't actually start until the rfi clears
- * the PMM bit */
- pmc_start_ctrs(1);
-
- oprofile_running = 1;
-
- pr_debug("start on cpu %d, pmgc0 %x\n", smp_processor_id(),
- mfpmr(PMRN_PMGC0));
-
- return 0;
-}
-
-static void fsl_emb_stop(void)
-{
- /* freeze counters */
- pmc_stop_ctrs();
-
- oprofile_running = 0;
-
- pr_debug("stop on cpu %d, pmgc0 %x\n", smp_processor_id(),
- mfpmr(PMRN_PMGC0));
-
- mb();
-}
-
-
-static void fsl_emb_handle_interrupt(struct pt_regs *regs,
- struct op_counter_config *ctr)
-{
- unsigned long pc;
- int is_kernel;
- int val;
- int i;
-
- pc = regs->nip;
- is_kernel = is_kernel_addr(pc);
-
- for (i = 0; i < num_counters; ++i) {
- val = ctr_read(i);
- if (val < 0) {
- if (oprofile_running && ctr[i].enabled) {
- oprofile_add_ext_sample(pc, regs, i, is_kernel);
- ctr_write(i, reset_value[i]);
- } else {
- ctr_write(i, 0);
- }
- }
- }
-
- /* The freeze bit was set by the interrupt. */
- /* Clear the freeze bit, and reenable the interrupt. The
- * counters won't actually start until the rfi clears the PMM
- * bit. The PMM bit should not be set until after the interrupt
- * is cleared to avoid it getting lost in some hypervisor
- * environments.
- */
- mtmsr(mfmsr() | MSR_PMM);
- pmc_start_ctrs(1);
-}
-
-struct op_powerpc_model op_model_fsl_emb = {
- .reg_setup = fsl_emb_reg_setup,
- .cpu_setup = fsl_emb_cpu_setup,
- .start = fsl_emb_start,
- .stop = fsl_emb_stop,
- .handle_interrupt = fsl_emb_handle_interrupt,
-};
diff --git a/arch/powerpc/oprofile/op_model_pa6t.c b/arch/powerpc/oprofile/op_model_pa6t.c
deleted file mode 100644
index a114a7c22d40..000000000000
--- a/arch/powerpc/oprofile/op_model_pa6t.c
+++ /dev/null
@@ -1,239 +0,0 @@
-/*
- * Copyright (C) 2006-2007 PA Semi, Inc
- *
- * Author: Shashi Rao, PA Semi
- *
- * Maintained by: Olof Johansson <olof@lixom.net>
- *
- * Based on arch/powerpc/oprofile/op_model_power4.c
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-#include <linux/oprofile.h>
-#include <linux/smp.h>
-#include <linux/percpu.h>
-#include <asm/processor.h>
-#include <asm/cputable.h>
-#include <asm/oprofile_impl.h>
-#include <asm/reg.h>
-
-static unsigned char oprofile_running;
-
-/* mmcr values are set in pa6t_reg_setup, used in pa6t_cpu_setup */
-static u64 mmcr0_val;
-static u64 mmcr1_val;
-
-/* inited in pa6t_reg_setup */
-static u64 reset_value[OP_MAX_COUNTER];
-
-static inline u64 ctr_read(unsigned int i)
-{
- switch (i) {
- case 0:
- return mfspr(SPRN_PA6T_PMC0);
- case 1:
- return mfspr(SPRN_PA6T_PMC1);
- case 2:
- return mfspr(SPRN_PA6T_PMC2);
- case 3:
- return mfspr(SPRN_PA6T_PMC3);
- case 4:
- return mfspr(SPRN_PA6T_PMC4);
- case 5:
- return mfspr(SPRN_PA6T_PMC5);
- default:
- printk(KERN_ERR "ctr_read called with bad arg %u\n", i);
- return 0;
- }
-}
-
-static inline void ctr_write(unsigned int i, u64 val)
-{
- switch (i) {
- case 0:
- mtspr(SPRN_PA6T_PMC0, val);
- break;
- case 1:
- mtspr(SPRN_PA6T_PMC1, val);
- break;
- case 2:
- mtspr(SPRN_PA6T_PMC2, val);
- break;
- case 3:
- mtspr(SPRN_PA6T_PMC3, val);
- break;
- case 4:
- mtspr(SPRN_PA6T_PMC4, val);
- break;
- case 5:
- mtspr(SPRN_PA6T_PMC5, val);
- break;
- default:
- printk(KERN_ERR "ctr_write called with bad arg %u\n", i);
- break;
- }
-}
-
-
-/* precompute the values to stuff in the hardware registers */
-static int pa6t_reg_setup(struct op_counter_config *ctr,
- struct op_system_config *sys,
- int num_ctrs)
-{
- int pmc;
-
- /*
- * adjust the mmcr0.en[0-5] and mmcr0.inten[0-5] values obtained from the
- * event_mappings file by turning off the counters that the user doesn't
- * care about
- *
- * setup user and kernel profiling
- */
- for (pmc = 0; pmc < cur_cpu_spec->num_pmcs; pmc++)
- if (!ctr[pmc].enabled) {
- sys->mmcr0 &= ~(0x1UL << pmc);
- sys->mmcr0 &= ~(0x1UL << (pmc+12));
- pr_debug("turned off counter %u\n", pmc);
- }
-
- if (sys->enable_kernel)
- sys->mmcr0 |= PA6T_MMCR0_SUPEN | PA6T_MMCR0_HYPEN;
- else
- sys->mmcr0 &= ~(PA6T_MMCR0_SUPEN | PA6T_MMCR0_HYPEN);
-
- if (sys->enable_user)
- sys->mmcr0 |= PA6T_MMCR0_PREN;
- else
- sys->mmcr0 &= ~PA6T_MMCR0_PREN;
-
- /*
- * The performance counter event settings are given in the mmcr0 and
- * mmcr1 values passed from the user in the op_system_config
- * structure (sys variable).
- */
- mmcr0_val = sys->mmcr0;
- mmcr1_val = sys->mmcr1;
- pr_debug("mmcr0_val inited to %016lx\n", sys->mmcr0);
- pr_debug("mmcr1_val inited to %016lx\n", sys->mmcr1);
-
- for (pmc = 0; pmc < cur_cpu_spec->num_pmcs; pmc++) {
- /* counters are 40 bit. Move to cputable at some point? */
- reset_value[pmc] = (0x1UL << 39) - ctr[pmc].count;
- pr_debug("reset_value for pmc%u inited to 0x%llx\n",
- pmc, reset_value[pmc]);
- }
-
- return 0;
-}
-
-/* configure registers on this cpu */
-static int pa6t_cpu_setup(struct op_counter_config *ctr)
-{
- u64 mmcr0 = mmcr0_val;
- u64 mmcr1 = mmcr1_val;
-
- /* Default is all PMCs off */
- mmcr0 &= ~(0x3FUL);
- mtspr(SPRN_PA6T_MMCR0, mmcr0);
-
- /* program selected programmable events in */
- mtspr(SPRN_PA6T_MMCR1, mmcr1);
-
- pr_debug("setup on cpu %d, mmcr0 %016lx\n", smp_processor_id(),
- mfspr(SPRN_PA6T_MMCR0));
- pr_debug("setup on cpu %d, mmcr1 %016lx\n", smp_processor_id(),
- mfspr(SPRN_PA6T_MMCR1));
-
- return 0;
-}
-
-static int pa6t_start(struct op_counter_config *ctr)
-{
- int i;
-
- /* Hold off event counting until rfid */
- u64 mmcr0 = mmcr0_val | PA6T_MMCR0_HANDDIS;
-
- for (i = 0; i < cur_cpu_spec->num_pmcs; i++)
- if (ctr[i].enabled)
- ctr_write(i, reset_value[i]);
- else
- ctr_write(i, 0UL);
-
- mtspr(SPRN_PA6T_MMCR0, mmcr0);
-
- oprofile_running = 1;
-
- pr_debug("start on cpu %d, mmcr0 %llx\n", smp_processor_id(), mmcr0);
-
- return 0;
-}
-
-static void pa6t_stop(void)
-{
- u64 mmcr0;
-
- /* freeze counters */
- mmcr0 = mfspr(SPRN_PA6T_MMCR0);
- mmcr0 |= PA6T_MMCR0_FCM0;
- mtspr(SPRN_PA6T_MMCR0, mmcr0);
-
- oprofile_running = 0;
-
- pr_debug("stop on cpu %d, mmcr0 %llx\n", smp_processor_id(), mmcr0);
-}
-
-/* handle the perfmon overflow vector */
-static void pa6t_handle_interrupt(struct pt_regs *regs,
- struct op_counter_config *ctr)
-{
- unsigned long pc = mfspr(SPRN_PA6T_SIAR);
- int is_kernel = is_kernel_addr(pc);
- u64 val;
- int i;
- u64 mmcr0;
-
- /* disable perfmon counting until rfid */
- mmcr0 = mfspr(SPRN_PA6T_MMCR0);
- mtspr(SPRN_PA6T_MMCR0, mmcr0 | PA6T_MMCR0_HANDDIS);
-
- /* Record samples. We've got one global bit for whether a sample
- * was taken, so add it for any counter that triggered overflow.
- */
- for (i = 0; i < cur_cpu_spec->num_pmcs; i++) {
- val = ctr_read(i);
- if (val & (0x1UL << 39)) { /* Overflow bit set */
- if (oprofile_running && ctr[i].enabled) {
- if (mmcr0 & PA6T_MMCR0_SIARLOG)
- oprofile_add_ext_sample(pc, regs, i, is_kernel);
- ctr_write(i, reset_value[i]);
- } else {
- ctr_write(i, 0UL);
- }
- }
- }
-
- /* Restore mmcr0 to a good known value since the PMI changes it */
- mmcr0 = mmcr0_val | PA6T_MMCR0_HANDDIS;
- mtspr(SPRN_PA6T_MMCR0, mmcr0);
-}
-
-struct op_powerpc_model op_model_pa6t = {
- .reg_setup = pa6t_reg_setup,
- .cpu_setup = pa6t_cpu_setup,
- .start = pa6t_start,
- .stop = pa6t_stop,
- .handle_interrupt = pa6t_handle_interrupt,
-};
diff --git a/arch/powerpc/oprofile/op_model_power4.c b/arch/powerpc/oprofile/op_model_power4.c
deleted file mode 100644
index 962fe7b3e3fb..000000000000
--- a/arch/powerpc/oprofile/op_model_power4.c
+++ /dev/null
@@ -1,442 +0,0 @@
-/*
- * Copyright (C) 2004 Anton Blanchard <anton@au.ibm.com>, IBM
- * Added mmcra[slot] support:
- * Copyright (C) 2006-2007 Will Schmidt <willschm@us.ibm.com>, IBM
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/oprofile.h>
-#include <linux/smp.h>
-#include <asm/firmware.h>
-#include <asm/ptrace.h>
-#include <asm/processor.h>
-#include <asm/cputable.h>
-#include <asm/rtas.h>
-#include <asm/oprofile_impl.h>
-#include <asm/reg.h>
-
-#define dbg(args...)
-#define OPROFILE_PM_PMCSEL_MSK 0xffULL
-#define OPROFILE_PM_UNIT_SHIFT 60
-#define OPROFILE_PM_UNIT_MSK 0xfULL
-#define OPROFILE_MAX_PMC_NUM 3
-#define OPROFILE_PMSEL_FIELD_WIDTH 8
-#define OPROFILE_UNIT_FIELD_WIDTH 4
-#define MMCRA_SIAR_VALID_MASK 0x10000000ULL
-
-static unsigned long reset_value[OP_MAX_COUNTER];
-
-static int oprofile_running;
-static int use_slot_nums;
-
-/* mmcr values are set in power4_reg_setup, used in power4_cpu_setup */
-static u32 mmcr0_val;
-static u64 mmcr1_val;
-static u64 mmcra_val;
-static u32 cntr_marked_events;
-
-static int power7_marked_instr_event(u64 mmcr1)
-{
- u64 psel, unit;
- int pmc, cntr_marked_events = 0;
-
- /* Given the MMCR1 value, look at the field for each counter to
- * determine if it is a marked event. Code based on the function
- * power7_marked_instr_event() in file arch/powerpc/perf/power7-pmu.c.
- */
- for (pmc = 0; pmc < 4; pmc++) {
- psel = mmcr1 & (OPROFILE_PM_PMCSEL_MSK
- << (OPROFILE_MAX_PMC_NUM - pmc)
- * OPROFILE_PMSEL_FIELD_WIDTH);
- psel = (psel >> ((OPROFILE_MAX_PMC_NUM - pmc)
- * OPROFILE_PMSEL_FIELD_WIDTH)) & ~1ULL;
- unit = mmcr1 & (OPROFILE_PM_UNIT_MSK
- << (OPROFILE_PM_UNIT_SHIFT
- - (pmc * OPROFILE_PMSEL_FIELD_WIDTH )));
- unit = unit >> (OPROFILE_PM_UNIT_SHIFT
- - (pmc * OPROFILE_PMSEL_FIELD_WIDTH));
-
- switch (psel >> 4) {
- case 2:
- cntr_marked_events |= (pmc == 1 || pmc == 3) << pmc;
- break;
- case 3:
- if (psel == 0x3c) {
- cntr_marked_events |= (pmc == 0) << pmc;
- break;
- }
-
- if (psel == 0x3e) {
- cntr_marked_events |= (pmc != 1) << pmc;
- break;
- }
-
- cntr_marked_events |= 1 << pmc;
- break;
- case 4:
- case 5:
- cntr_marked_events |= (unit == 0xd) << pmc;
- break;
- case 6:
- if (psel == 0x64)
- cntr_marked_events |= (pmc >= 2) << pmc;
- break;
- case 8:
- cntr_marked_events |= (unit == 0xd) << pmc;
- break;
- }
- }
- return cntr_marked_events;
-}
-
-static int power4_reg_setup(struct op_counter_config *ctr,
- struct op_system_config *sys,
- int num_ctrs)
-{
- int i;
-
- /*
- * The performance counter event settings are given in the mmcr0,
- * mmcr1 and mmcra values passed from the user in the
- * op_system_config structure (sys variable).
- */
- mmcr0_val = sys->mmcr0;
- mmcr1_val = sys->mmcr1;
- mmcra_val = sys->mmcra;
-
- /* Power 7+ and newer architectures:
- * Determine which counter events in the group (the group of events is
- * specified by the bit settings in the MMCR1 register) are marked
- * events for use in the interrupt handler. Do the calculation once
- * before OProfile starts. Information is used in the interrupt
- * handler. Starting with Power 7+ we only record the sample for
- * marked events if the SIAR valid bit is set. For non marked events
- * the sample is always recorded.
- */
- if (pvr_version_is(PVR_POWER7p))
- cntr_marked_events = power7_marked_instr_event(mmcr1_val);
- else
- cntr_marked_events = 0; /* For older processors, set the bit map
- * to zero so the sample will always be
- * be recorded.
- */
-
- for (i = 0; i < cur_cpu_spec->num_pmcs; ++i)
- reset_value[i] = 0x80000000UL - ctr[i].count;
-
- /* setup user and kernel profiling */
- if (sys->enable_kernel)
- mmcr0_val &= ~MMCR0_KERNEL_DISABLE;
- else
- mmcr0_val |= MMCR0_KERNEL_DISABLE;
-
- if (sys->enable_user)
- mmcr0_val &= ~MMCR0_PROBLEM_DISABLE;
- else
- mmcr0_val |= MMCR0_PROBLEM_DISABLE;
-
- if (pvr_version_is(PVR_POWER4) || pvr_version_is(PVR_POWER4p) ||
- pvr_version_is(PVR_970) || pvr_version_is(PVR_970FX) ||
- pvr_version_is(PVR_970MP) || pvr_version_is(PVR_970GX) ||
- pvr_version_is(PVR_POWER5) || pvr_version_is(PVR_POWER5p))
- use_slot_nums = 1;
-
- return 0;
-}
-
-extern void ppc_enable_pmcs(void);
-
-/*
- * Older CPUs require the MMCRA sample bit to be always set, but newer
- * CPUs only want it set for some groups. Eventually we will remove all
- * knowledge of this bit in the kernel, oprofile userspace should be
- * setting it when required.
- *
- * In order to keep current installations working we force the bit for
- * those older CPUs. Once everyone has updated their oprofile userspace we
- * can remove this hack.
- */
-static inline int mmcra_must_set_sample(void)
-{
- if (pvr_version_is(PVR_POWER4) || pvr_version_is(PVR_POWER4p) ||
- pvr_version_is(PVR_970) || pvr_version_is(PVR_970FX) ||
- pvr_version_is(PVR_970MP) || pvr_version_is(PVR_970GX))
- return 1;
-
- return 0;
-}
-
-static int power4_cpu_setup(struct op_counter_config *ctr)
-{
- unsigned int mmcr0 = mmcr0_val;
- unsigned long mmcra = mmcra_val;
-
- ppc_enable_pmcs();
-
- /* set the freeze bit */
- mmcr0 |= MMCR0_FC;
- mtspr(SPRN_MMCR0, mmcr0);
-
- mmcr0 |= MMCR0_FCM1|MMCR0_PMXE|MMCR0_FCECE;
- mmcr0 |= MMCR0_PMC1CE|MMCR0_PMCjCE;
- mtspr(SPRN_MMCR0, mmcr0);
-
- mtspr(SPRN_MMCR1, mmcr1_val);
-
- if (mmcra_must_set_sample())
- mmcra |= MMCRA_SAMPLE_ENABLE;
- mtspr(SPRN_MMCRA, mmcra);
-
- dbg("setup on cpu %d, mmcr0 %lx\n", smp_processor_id(),
- mfspr(SPRN_MMCR0));
- dbg("setup on cpu %d, mmcr1 %lx\n", smp_processor_id(),
- mfspr(SPRN_MMCR1));
- dbg("setup on cpu %d, mmcra %lx\n", smp_processor_id(),
- mfspr(SPRN_MMCRA));
-
- return 0;
-}
-
-static int power4_start(struct op_counter_config *ctr)
-{
- int i;
- unsigned int mmcr0;
-
- /* set the PMM bit (see comment below) */
- mtmsrd(mfmsr() | MSR_PMM);
-
- for (i = 0; i < cur_cpu_spec->num_pmcs; ++i) {
- if (ctr[i].enabled) {
- classic_ctr_write(i, reset_value[i]);
- } else {
- classic_ctr_write(i, 0);
- }
- }
-
- mmcr0 = mfspr(SPRN_MMCR0);
-
- /*
- * We must clear the PMAO bit on some (GQ) chips. Just do it
- * all the time
- */
- mmcr0 &= ~MMCR0_PMAO;
-
- /*
- * now clear the freeze bit, counting will not start until we
- * rfid from this excetion, because only at that point will
- * the PMM bit be cleared
- */
- mmcr0 &= ~MMCR0_FC;
- mtspr(SPRN_MMCR0, mmcr0);
-
- oprofile_running = 1;
-
- dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0);
- return 0;
-}
-
-static void power4_stop(void)
-{
- unsigned int mmcr0;
-
- /* freeze counters */
- mmcr0 = mfspr(SPRN_MMCR0);
- mmcr0 |= MMCR0_FC;
- mtspr(SPRN_MMCR0, mmcr0);
-
- oprofile_running = 0;
-
- dbg("stop on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0);
-
- mb();
-}
-
-/* Fake functions used by canonicalize_pc */
-static void __used hypervisor_bucket(void)
-{
-}
-
-static void __used rtas_bucket(void)
-{
-}
-
-static void __used kernel_unknown_bucket(void)
-{
-}
-
-/*
- * On GQ and newer the MMCRA stores the HV and PR bits at the time
- * the SIAR was sampled. We use that to work out if the SIAR was sampled in
- * the hypervisor, our exception vectors or RTAS.
- * If the MMCRA_SAMPLE_ENABLE bit is set, we can use the MMCRA[slot] bits
- * to more accurately identify the address of the sampled instruction. The
- * mmcra[slot] bits represent the slot number of a sampled instruction
- * within an instruction group. The slot will contain a value between 1
- * and 5 if MMCRA_SAMPLE_ENABLE is set, otherwise 0.
- */
-static unsigned long get_pc(struct pt_regs *regs)
-{
- unsigned long pc = mfspr(SPRN_SIAR);
- unsigned long mmcra;
- unsigned long slot;
-
- /* Can't do much about it */
- if (!cur_cpu_spec->oprofile_mmcra_sihv)
- return pc;
-
- mmcra = mfspr(SPRN_MMCRA);
-
- if (use_slot_nums && (mmcra & MMCRA_SAMPLE_ENABLE)) {
- slot = ((mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT);
- if (slot > 1)
- pc += 4 * (slot - 1);
- }
-
- /* Were we in the hypervisor? */
- if (firmware_has_feature(FW_FEATURE_LPAR) &&
- (mmcra & cur_cpu_spec->oprofile_mmcra_sihv))
- /* function descriptor madness */
- return *((unsigned long *)hypervisor_bucket);
-
- /* We were in userspace, nothing to do */
- if (mmcra & cur_cpu_spec->oprofile_mmcra_sipr)
- return pc;
-
-#ifdef CONFIG_PPC_RTAS
- /* Were we in RTAS? */
- if (pc >= rtas.base && pc < (rtas.base + rtas.size))
- /* function descriptor madness */
- return *((unsigned long *)rtas_bucket);
-#endif
-
- /* Were we in our exception vectors or SLB real mode miss handler? */
- if (pc < 0x1000000UL)
- return (unsigned long)__va(pc);
-
- /* Not sure where we were */
- if (!is_kernel_addr(pc))
- /* function descriptor madness */
- return *((unsigned long *)kernel_unknown_bucket);
-
- return pc;
-}
-
-static int get_kernel(unsigned long pc, unsigned long mmcra)
-{
- int is_kernel;
-
- if (!cur_cpu_spec->oprofile_mmcra_sihv) {
- is_kernel = is_kernel_addr(pc);
- } else {
- is_kernel = ((mmcra & cur_cpu_spec->oprofile_mmcra_sipr) == 0);
- }
-
- return is_kernel;
-}
-
-static bool pmc_overflow(unsigned long val)
-{
- if ((int)val < 0)
- return true;
-
- /*
- * Events on POWER7 can roll back if a speculative event doesn't
- * eventually complete. Unfortunately in some rare cases they will
- * raise a performance monitor exception. We need to catch this to
- * ensure we reset the PMC. In all cases the PMC will be 256 or less
- * cycles from overflow.
- *
- * We only do this if the first pass fails to find any overflowing
- * PMCs because a user might set a period of less than 256 and we
- * don't want to mistakenly reset them.
- */
- if (pvr_version_is(PVR_POWER7) && ((0x80000000 - val) <= 256))
- return true;
-
- return false;
-}
-
-static void power4_handle_interrupt(struct pt_regs *regs,
- struct op_counter_config *ctr)
-{
- unsigned long pc;
- int is_kernel;
- int val;
- int i;
- unsigned int mmcr0;
- unsigned long mmcra;
- bool siar_valid = false;
-
- mmcra = mfspr(SPRN_MMCRA);
-
- pc = get_pc(regs);
- is_kernel = get_kernel(pc, mmcra);
-
- /* set the PMM bit (see comment below) */
- mtmsrd(mfmsr() | MSR_PMM);
-
- /* Check that the SIAR valid bit in MMCRA is set to 1. */
- if ((mmcra & MMCRA_SIAR_VALID_MASK) == MMCRA_SIAR_VALID_MASK)
- siar_valid = true;
-
- for (i = 0; i < cur_cpu_spec->num_pmcs; ++i) {
- val = classic_ctr_read(i);
- if (pmc_overflow(val)) {
- if (oprofile_running && ctr[i].enabled) {
- /* Power 7+ and newer architectures:
- * If the event is a marked event, then only
- * save the sample if the SIAR valid bit is
- * set. If the event is not marked, then
- * always save the sample.
- * Note, the Sample enable bit in the MMCRA
- * register must be set to 1 if the group
- * contains a marked event.
- */
- if ((siar_valid &&
- (cntr_marked_events & (1 << i)))
- || !(cntr_marked_events & (1 << i)))
- oprofile_add_ext_sample(pc, regs, i,
- is_kernel);
-
- classic_ctr_write(i, reset_value[i]);
- } else {
- classic_ctr_write(i, 0);
- }
- }
- }
-
- mmcr0 = mfspr(SPRN_MMCR0);
-
- /* reset the perfmon trigger */
- mmcr0 |= MMCR0_PMXE;
-
- /*
- * We must clear the PMAO bit on some (GQ) chips. Just do it
- * all the time
- */
- mmcr0 &= ~MMCR0_PMAO;
-
- /* Clear the appropriate bits in the MMCRA */
- mmcra &= ~cur_cpu_spec->oprofile_mmcra_clear;
- mtspr(SPRN_MMCRA, mmcra);
-
- /*
- * now clear the freeze bit, counting will not start until we
- * rfid from this exception, because only at that point will
- * the PMM bit be cleared
- */
- mmcr0 &= ~MMCR0_FC;
- mtspr(SPRN_MMCR0, mmcr0);
-}
-
-struct op_powerpc_model op_model_power4 = {
- .reg_setup = power4_reg_setup,
- .cpu_setup = power4_cpu_setup,
- .start = power4_start,
- .stop = power4_stop,
- .handle_interrupt = power4_handle_interrupt,
-};
diff --git a/arch/powerpc/perf/8xx-pmu.c b/arch/powerpc/perf/8xx-pmu.c
new file mode 100644
index 000000000000..1d2972229e3a
--- /dev/null
+++ b/arch/powerpc/perf/8xx-pmu.c
@@ -0,0 +1,197 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Performance event support - PPC 8xx
+ *
+ * Copyright 2016 Christophe Leroy, CS Systemes d'Information
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/perf_event.h>
+#include <linux/percpu.h>
+#include <linux/hardirq.h>
+#include <asm/pmc.h>
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+#include <asm/ptrace.h>
+#include <asm/text-patching.h>
+#include <asm/inst.h>
+
+#define PERF_8xx_ID_CPU_CYCLES 1
+#define PERF_8xx_ID_HW_INSTRUCTIONS 2
+#define PERF_8xx_ID_ITLB_LOAD_MISS 3
+#define PERF_8xx_ID_DTLB_LOAD_MISS 4
+
+#define C(x) PERF_COUNT_HW_CACHE_##x
+#define DTLB_LOAD_MISS (C(DTLB) | (C(OP_READ) << 8) | (C(RESULT_MISS) << 16))
+#define ITLB_LOAD_MISS (C(ITLB) | (C(OP_READ) << 8) | (C(RESULT_MISS) << 16))
+
+extern unsigned long itlb_miss_counter, dtlb_miss_counter;
+extern atomic_t instruction_counter;
+
+static atomic_t insn_ctr_ref;
+static atomic_t itlb_miss_ref;
+static atomic_t dtlb_miss_ref;
+
+static s64 get_insn_ctr(void)
+{
+ int ctr;
+ unsigned long counta;
+
+ do {
+ ctr = atomic_read(&instruction_counter);
+ counta = mfspr(SPRN_COUNTA);
+ } while (ctr != atomic_read(&instruction_counter));
+
+ return ((s64)ctr << 16) | (counta >> 16);
+}
+
+static int event_type(struct perf_event *event)
+{
+ switch (event->attr.type) {
+ case PERF_TYPE_HARDWARE:
+ if (event->attr.config == PERF_COUNT_HW_CPU_CYCLES)
+ return PERF_8xx_ID_CPU_CYCLES;
+ if (event->attr.config == PERF_COUNT_HW_INSTRUCTIONS)
+ return PERF_8xx_ID_HW_INSTRUCTIONS;
+ break;
+ case PERF_TYPE_HW_CACHE:
+ if (event->attr.config == ITLB_LOAD_MISS)
+ return PERF_8xx_ID_ITLB_LOAD_MISS;
+ if (event->attr.config == DTLB_LOAD_MISS)
+ return PERF_8xx_ID_DTLB_LOAD_MISS;
+ break;
+ case PERF_TYPE_RAW:
+ break;
+ default:
+ return -ENOENT;
+ }
+ return -EOPNOTSUPP;
+}
+
+static int mpc8xx_pmu_event_init(struct perf_event *event)
+{
+ int type = event_type(event);
+
+ if (type < 0)
+ return type;
+ return 0;
+}
+
+static int mpc8xx_pmu_add(struct perf_event *event, int flags)
+{
+ int type = event_type(event);
+ s64 val = 0;
+
+ if (type < 0)
+ return type;
+
+ switch (type) {
+ case PERF_8xx_ID_CPU_CYCLES:
+ val = get_tb();
+ break;
+ case PERF_8xx_ID_HW_INSTRUCTIONS:
+ if (atomic_inc_return(&insn_ctr_ref) == 1)
+ mtspr(SPRN_ICTRL, 0xc0080007);
+ val = get_insn_ctr();
+ break;
+ case PERF_8xx_ID_ITLB_LOAD_MISS:
+ if (atomic_inc_return(&itlb_miss_ref) == 1) {
+ unsigned long target = patch_site_addr(&patch__itlbmiss_perf);
+
+ patch_branch_site(&patch__itlbmiss_exit_1, target, 0);
+ }
+ val = itlb_miss_counter;
+ break;
+ case PERF_8xx_ID_DTLB_LOAD_MISS:
+ if (atomic_inc_return(&dtlb_miss_ref) == 1) {
+ unsigned long target = patch_site_addr(&patch__dtlbmiss_perf);
+
+ patch_branch_site(&patch__dtlbmiss_exit_1, target, 0);
+ }
+ val = dtlb_miss_counter;
+ break;
+ }
+ local64_set(&event->hw.prev_count, val);
+ return 0;
+}
+
+static void mpc8xx_pmu_read(struct perf_event *event)
+{
+ int type = event_type(event);
+ s64 prev, val = 0, delta = 0;
+
+ if (type < 0)
+ return;
+
+ do {
+ prev = local64_read(&event->hw.prev_count);
+ switch (type) {
+ case PERF_8xx_ID_CPU_CYCLES:
+ val = get_tb();
+ delta = 16 * (val - prev);
+ break;
+ case PERF_8xx_ID_HW_INSTRUCTIONS:
+ val = get_insn_ctr();
+ delta = prev - val;
+ if (delta < 0)
+ delta += 0x1000000000000LL;
+ break;
+ case PERF_8xx_ID_ITLB_LOAD_MISS:
+ val = itlb_miss_counter;
+ delta = (s64)((s32)val - (s32)prev);
+ break;
+ case PERF_8xx_ID_DTLB_LOAD_MISS:
+ val = dtlb_miss_counter;
+ delta = (s64)((s32)val - (s32)prev);
+ break;
+ }
+ } while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
+
+ local64_add(delta, &event->count);
+}
+
+static void mpc8xx_pmu_del(struct perf_event *event, int flags)
+{
+ ppc_inst_t insn = ppc_inst(PPC_RAW_MFSPR(10, SPRN_SPRG_SCRATCH2));
+
+ mpc8xx_pmu_read(event);
+
+ /* If it was the last user, stop counting to avoid useless overhead */
+ switch (event_type(event)) {
+ case PERF_8xx_ID_CPU_CYCLES:
+ break;
+ case PERF_8xx_ID_HW_INSTRUCTIONS:
+ if (atomic_dec_return(&insn_ctr_ref) == 0)
+ mtspr(SPRN_ICTRL, 7);
+ break;
+ case PERF_8xx_ID_ITLB_LOAD_MISS:
+ if (atomic_dec_return(&itlb_miss_ref) == 0)
+ patch_instruction_site(&patch__itlbmiss_exit_1, insn);
+ break;
+ case PERF_8xx_ID_DTLB_LOAD_MISS:
+ if (atomic_dec_return(&dtlb_miss_ref) == 0)
+ patch_instruction_site(&patch__dtlbmiss_exit_1, insn);
+ break;
+ }
+}
+
+static struct pmu mpc8xx_pmu = {
+ .event_init = mpc8xx_pmu_event_init,
+ .add = mpc8xx_pmu_add,
+ .del = mpc8xx_pmu_del,
+ .read = mpc8xx_pmu_read,
+ .capabilities = PERF_PMU_CAP_NO_INTERRUPT |
+ PERF_PMU_CAP_NO_NMI,
+};
+
+static int init_mpc8xx_pmu(void)
+{
+ mtspr(SPRN_ICTRL, 7);
+ mtspr(SPRN_CMPA, 0);
+ mtspr(SPRN_COUNTA, 0xffff);
+
+ return perf_pmu_register(&mpc8xx_pmu, "cpu", PERF_TYPE_RAW);
+}
+
+early_initcall(init_mpc8xx_pmu);
diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
index f9c083a5652a..78dd7e25219e 100644
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -1,17 +1,26 @@
-subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
+# SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_PERF_EVENTS) += callchain.o
+obj-y += callchain.o callchain_$(BITS).o perf_regs.o
+obj-$(CONFIG_COMPAT) += callchain_32.o
-obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o bhrb.o
-obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \
+obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o
+obj64-$(CONFIG_PPC_PERF_CTRS) += ppc970-pmu.o power5-pmu.o \
power5+-pmu.o power6-pmu.o power7-pmu.o \
- power8-pmu.o
+ isa207-common.o power8-pmu.o power9-pmu.o \
+ generic-compat-pmu.o power10-pmu.o bhrb.o
obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o
+obj-$(CONFIG_PPC_POWERNV) += imc-pmu.o
obj-$(CONFIG_FSL_EMB_PERF_EVENT) += core-fsl-emb.o
obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o e6500-pmu.o
-obj-$(CONFIG_HV_PERF_CTRS) += hv-24x7.o hv-gpci.o hv-common.o
+obj-$(CONFIG_HV_PERF_CTRS) += hv-24x7.o hv-gpci.o hv-common.o vpa-dtl.o
+
+obj-$(CONFIG_VPA_PMU) += vpa-pmu.o
+
+obj-$(CONFIG_KVM_BOOK3S_HV_PMU) += kvm-hv-pmu.o
+
+obj-$(CONFIG_PPC_8xx) += 8xx-pmu.o
obj-$(CONFIG_PPC64) += $(obj64-y)
obj-$(CONFIG_PPC32) += $(obj32-y)
diff --git a/arch/powerpc/perf/bhrb.S b/arch/powerpc/perf/bhrb.S
index d85f9a58ddbc..47ba05d5ae76 100644
--- a/arch/powerpc/perf/bhrb.S
+++ b/arch/powerpc/perf/bhrb.S
@@ -1,12 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Basic assembly code to read BHRB entries
*
* Copyright 2013 Anshuman Khandual, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <asm/ppc_asm.h>
#include <asm/ppc-opcode.h>
@@ -25,7 +21,7 @@
_GLOBAL(read_bhrb)
cmpldi r3,31
bgt 1f
- ld r4,bhrb_table@got(r2)
+ LOAD_REG_ADDR(r4, bhrb_table)
sldi r3,r3,3
add r3,r4,r3
mtctr r3
diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c
index 74d1e780748b..26aa26482c9a 100644
--- a/arch/powerpc/perf/callchain.c
+++ b/arch/powerpc/perf/callchain.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Performance counter callchain support - powerpc architecture code
*
* Copyright © 2009 Paul Mackerras, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/sched.h>
@@ -15,14 +11,12 @@
#include <linux/uaccess.h>
#include <linux/mm.h>
#include <asm/ptrace.h>
-#include <asm/pgtable.h>
#include <asm/sigcontext.h>
#include <asm/ucontext.h>
#include <asm/vdso.h>
-#ifdef CONFIG_PPC64
-#include "../kernel/ppc32.h"
-#endif
+#include <asm/pte-walk.h>
+#include "callchain.h"
/*
* Is sp valid as the address of the next kernel stack frame after prev_sp?
@@ -33,9 +27,9 @@ static int valid_next_sp(unsigned long sp, unsigned long prev_sp)
{
if (sp & 0xf)
return 0; /* must be 16-byte aligned */
- if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD))
+ if (!validate_sp(sp, current))
return 0;
- if (sp >= prev_sp + STACK_FRAME_OVERHEAD)
+ if (sp >= prev_sp + STACK_FRAME_MIN_SIZE)
return 1;
/*
* sp could decrease when we jump off an interrupt stack
@@ -46,8 +40,8 @@ static int valid_next_sp(unsigned long sp, unsigned long prev_sp)
return 0;
}
-void
-perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
+void __no_sanitize_address
+perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
{
unsigned long sp, next_sp;
unsigned long next_ip;
@@ -57,9 +51,9 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
lr = regs->link;
sp = regs->gpr[1];
- perf_callchain_store(entry, perf_instruction_pointer(regs));
+ perf_callchain_store(entry, perf_arch_instruction_pointer(regs));
- if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD))
+ if (!validate_sp(sp, current))
return;
for (;;) {
@@ -67,16 +61,17 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
next_sp = fp[0];
if (next_sp == sp + STACK_INT_FRAME_SIZE &&
- fp[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
+ validate_sp_size(sp, current, STACK_INT_FRAME_SIZE) &&
+ fp[STACK_INT_FRAME_MARKER_LONGS] == STACK_FRAME_REGS_MARKER) {
/*
* This looks like an interrupt frame for an
* interrupt that occurred in the kernel
*/
- regs = (struct pt_regs *)(sp + STACK_FRAME_OVERHEAD);
+ regs = (struct pt_regs *)(sp + STACK_INT_FRAME_REGS);
next_ip = regs->nip;
lr = regs->link;
level = 0;
- perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
+ perf_callchain_store_context(entry, PERF_CONTEXT_KERNEL);
} else {
if (level == 0)
@@ -105,387 +100,10 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
}
}
-#ifdef CONFIG_PPC64
-/*
- * On 64-bit we don't want to invoke hash_page on user addresses from
- * interrupt context, so if the access faults, we read the page tables
- * to find which page (if any) is mapped and access it directly.
- */
-static int read_user_stack_slow(void __user *ptr, void *ret, int nb)
-{
- pgd_t *pgdir;
- pte_t *ptep, pte;
- unsigned shift;
- unsigned long addr = (unsigned long) ptr;
- unsigned long offset;
- unsigned long pfn;
- void *kaddr;
-
- pgdir = current->mm->pgd;
- if (!pgdir)
- return -EFAULT;
-
- ptep = find_linux_pte_or_hugepte(pgdir, addr, &shift);
- if (!shift)
- shift = PAGE_SHIFT;
-
- /* align address to page boundary */
- offset = addr & ((1UL << shift) - 1);
- addr -= offset;
-
- if (ptep == NULL)
- return -EFAULT;
- pte = *ptep;
- if (!pte_present(pte) || !(pte_val(pte) & _PAGE_USER))
- return -EFAULT;
- pfn = pte_pfn(pte);
- if (!page_is_ram(pfn))
- return -EFAULT;
-
- /* no highmem to worry about here */
- kaddr = pfn_to_kaddr(pfn);
- memcpy(ret, kaddr + offset, nb);
- return 0;
-}
-
-static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret)
-{
- if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned long) ||
- ((unsigned long)ptr & 7))
- return -EFAULT;
-
- pagefault_disable();
- if (!__get_user_inatomic(*ret, ptr)) {
- pagefault_enable();
- return 0;
- }
- pagefault_enable();
-
- return read_user_stack_slow(ptr, ret, 8);
-}
-
-static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
-{
- if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) ||
- ((unsigned long)ptr & 3))
- return -EFAULT;
-
- pagefault_disable();
- if (!__get_user_inatomic(*ret, ptr)) {
- pagefault_enable();
- return 0;
- }
- pagefault_enable();
-
- return read_user_stack_slow(ptr, ret, 4);
-}
-
-static inline int valid_user_sp(unsigned long sp, int is_64)
-{
- if (!sp || (sp & 7) || sp > (is_64 ? TASK_SIZE : 0x100000000UL) - 32)
- return 0;
- return 1;
-}
-
-/*
- * 64-bit user processes use the same stack frame for RT and non-RT signals.
- */
-struct signal_frame_64 {
- char dummy[__SIGNAL_FRAMESIZE];
- struct ucontext uc;
- unsigned long unused[2];
- unsigned int tramp[6];
- struct siginfo *pinfo;
- void *puc;
- struct siginfo info;
- char abigap[288];
-};
-
-static int is_sigreturn_64_address(unsigned long nip, unsigned long fp)
-{
- if (nip == fp + offsetof(struct signal_frame_64, tramp))
- return 1;
- if (vdso64_rt_sigtramp && current->mm->context.vdso_base &&
- nip == current->mm->context.vdso_base + vdso64_rt_sigtramp)
- return 1;
- return 0;
-}
-
-/*
- * Do some sanity checking on the signal frame pointed to by sp.
- * We check the pinfo and puc pointers in the frame.
- */
-static int sane_signal_64_frame(unsigned long sp)
-{
- struct signal_frame_64 __user *sf;
- unsigned long pinfo, puc;
-
- sf = (struct signal_frame_64 __user *) sp;
- if (read_user_stack_64((unsigned long __user *) &sf->pinfo, &pinfo) ||
- read_user_stack_64((unsigned long __user *) &sf->puc, &puc))
- return 0;
- return pinfo == (unsigned long) &sf->info &&
- puc == (unsigned long) &sf->uc;
-}
-
-static void perf_callchain_user_64(struct perf_callchain_entry *entry,
- struct pt_regs *regs)
-{
- unsigned long sp, next_sp;
- unsigned long next_ip;
- unsigned long lr;
- long level = 0;
- struct signal_frame_64 __user *sigframe;
- unsigned long __user *fp, *uregs;
-
- next_ip = perf_instruction_pointer(regs);
- lr = regs->link;
- sp = regs->gpr[1];
- perf_callchain_store(entry, next_ip);
-
- for (;;) {
- fp = (unsigned long __user *) sp;
- if (!valid_user_sp(sp, 1) || read_user_stack_64(fp, &next_sp))
- return;
- if (level > 0 && read_user_stack_64(&fp[2], &next_ip))
- return;
-
- /*
- * Note: the next_sp - sp >= signal frame size check
- * is true when next_sp < sp, which can happen when
- * transitioning from an alternate signal stack to the
- * normal stack.
- */
- if (next_sp - sp >= sizeof(struct signal_frame_64) &&
- (is_sigreturn_64_address(next_ip, sp) ||
- (level <= 1 && is_sigreturn_64_address(lr, sp))) &&
- sane_signal_64_frame(sp)) {
- /*
- * This looks like an signal frame
- */
- sigframe = (struct signal_frame_64 __user *) sp;
- uregs = sigframe->uc.uc_mcontext.gp_regs;
- if (read_user_stack_64(&uregs[PT_NIP], &next_ip) ||
- read_user_stack_64(&uregs[PT_LNK], &lr) ||
- read_user_stack_64(&uregs[PT_R1], &sp))
- return;
- level = 0;
- perf_callchain_store(entry, PERF_CONTEXT_USER);
- perf_callchain_store(entry, next_ip);
- continue;
- }
-
- if (level == 0)
- next_ip = lr;
- perf_callchain_store(entry, next_ip);
- ++level;
- sp = next_sp;
- }
-}
-
-static inline int current_is_64bit(void)
-{
- /*
- * We can't use test_thread_flag() here because we may be on an
- * interrupt stack, and the thread flags don't get copied over
- * from the thread_info on the main stack to the interrupt stack.
- */
- return !test_ti_thread_flag(task_thread_info(current), TIF_32BIT);
-}
-
-#else /* CONFIG_PPC64 */
-/*
- * On 32-bit we just access the address and let hash_page create a
- * HPTE if necessary, so there is no need to fall back to reading
- * the page tables. Since this is called at interrupt level,
- * do_page_fault() won't treat a DSI as a page fault.
- */
-static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
-{
- int rc;
-
- if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) ||
- ((unsigned long)ptr & 3))
- return -EFAULT;
-
- pagefault_disable();
- rc = __get_user_inatomic(*ret, ptr);
- pagefault_enable();
-
- return rc;
-}
-
-static inline void perf_callchain_user_64(struct perf_callchain_entry *entry,
- struct pt_regs *regs)
-{
-}
-
-static inline int current_is_64bit(void)
-{
- return 0;
-}
-
-static inline int valid_user_sp(unsigned long sp, int is_64)
-{
- if (!sp || (sp & 7) || sp > TASK_SIZE - 32)
- return 0;
- return 1;
-}
-
-#define __SIGNAL_FRAMESIZE32 __SIGNAL_FRAMESIZE
-#define sigcontext32 sigcontext
-#define mcontext32 mcontext
-#define ucontext32 ucontext
-#define compat_siginfo_t struct siginfo
-
-#endif /* CONFIG_PPC64 */
-
-/*
- * Layout for non-RT signal frames
- */
-struct signal_frame_32 {
- char dummy[__SIGNAL_FRAMESIZE32];
- struct sigcontext32 sctx;
- struct mcontext32 mctx;
- int abigap[56];
-};
-
-/*
- * Layout for RT signal frames
- */
-struct rt_signal_frame_32 {
- char dummy[__SIGNAL_FRAMESIZE32 + 16];
- compat_siginfo_t info;
- struct ucontext32 uc;
- int abigap[56];
-};
-
-static int is_sigreturn_32_address(unsigned int nip, unsigned int fp)
-{
- if (nip == fp + offsetof(struct signal_frame_32, mctx.mc_pad))
- return 1;
- if (vdso32_sigtramp && current->mm->context.vdso_base &&
- nip == current->mm->context.vdso_base + vdso32_sigtramp)
- return 1;
- return 0;
-}
-
-static int is_rt_sigreturn_32_address(unsigned int nip, unsigned int fp)
-{
- if (nip == fp + offsetof(struct rt_signal_frame_32,
- uc.uc_mcontext.mc_pad))
- return 1;
- if (vdso32_rt_sigtramp && current->mm->context.vdso_base &&
- nip == current->mm->context.vdso_base + vdso32_rt_sigtramp)
- return 1;
- return 0;
-}
-
-static int sane_signal_32_frame(unsigned int sp)
-{
- struct signal_frame_32 __user *sf;
- unsigned int regs;
-
- sf = (struct signal_frame_32 __user *) (unsigned long) sp;
- if (read_user_stack_32((unsigned int __user *) &sf->sctx.regs, &regs))
- return 0;
- return regs == (unsigned long) &sf->mctx;
-}
-
-static int sane_rt_signal_32_frame(unsigned int sp)
-{
- struct rt_signal_frame_32 __user *sf;
- unsigned int regs;
-
- sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp;
- if (read_user_stack_32((unsigned int __user *) &sf->uc.uc_regs, &regs))
- return 0;
- return regs == (unsigned long) &sf->uc.uc_mcontext;
-}
-
-static unsigned int __user *signal_frame_32_regs(unsigned int sp,
- unsigned int next_sp, unsigned int next_ip)
-{
- struct mcontext32 __user *mctx = NULL;
- struct signal_frame_32 __user *sf;
- struct rt_signal_frame_32 __user *rt_sf;
-
- /*
- * Note: the next_sp - sp >= signal frame size check
- * is true when next_sp < sp, for example, when
- * transitioning from an alternate signal stack to the
- * normal stack.
- */
- if (next_sp - sp >= sizeof(struct signal_frame_32) &&
- is_sigreturn_32_address(next_ip, sp) &&
- sane_signal_32_frame(sp)) {
- sf = (struct signal_frame_32 __user *) (unsigned long) sp;
- mctx = &sf->mctx;
- }
-
- if (!mctx && next_sp - sp >= sizeof(struct rt_signal_frame_32) &&
- is_rt_sigreturn_32_address(next_ip, sp) &&
- sane_rt_signal_32_frame(sp)) {
- rt_sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp;
- mctx = &rt_sf->uc.uc_mcontext;
- }
-
- if (!mctx)
- return NULL;
- return mctx->mc_gregs;
-}
-
-static void perf_callchain_user_32(struct perf_callchain_entry *entry,
- struct pt_regs *regs)
-{
- unsigned int sp, next_sp;
- unsigned int next_ip;
- unsigned int lr;
- long level = 0;
- unsigned int __user *fp, *uregs;
-
- next_ip = perf_instruction_pointer(regs);
- lr = regs->link;
- sp = regs->gpr[1];
- perf_callchain_store(entry, next_ip);
-
- while (entry->nr < PERF_MAX_STACK_DEPTH) {
- fp = (unsigned int __user *) (unsigned long) sp;
- if (!valid_user_sp(sp, 0) || read_user_stack_32(fp, &next_sp))
- return;
- if (level > 0 && read_user_stack_32(&fp[1], &next_ip))
- return;
-
- uregs = signal_frame_32_regs(sp, next_sp, next_ip);
- if (!uregs && level <= 1)
- uregs = signal_frame_32_regs(sp, next_sp, lr);
- if (uregs) {
- /*
- * This looks like an signal frame, so restart
- * the stack trace with the values in it.
- */
- if (read_user_stack_32(&uregs[PT_NIP], &next_ip) ||
- read_user_stack_32(&uregs[PT_LNK], &lr) ||
- read_user_stack_32(&uregs[PT_R1], &sp))
- return;
- level = 0;
- perf_callchain_store(entry, PERF_CONTEXT_USER);
- perf_callchain_store(entry, next_ip);
- continue;
- }
-
- if (level == 0)
- next_ip = lr;
- perf_callchain_store(entry, next_ip);
- ++level;
- sp = next_sp;
- }
-}
-
void
-perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
+perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
{
- if (current_is_64bit())
+ if (!is_32bit_task())
perf_callchain_user_64(entry, regs);
else
perf_callchain_user_32(entry, regs);
diff --git a/arch/powerpc/perf/callchain.h b/arch/powerpc/perf/callchain.h
new file mode 100644
index 000000000000..19a8d051ddf1
--- /dev/null
+++ b/arch/powerpc/perf/callchain.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _POWERPC_PERF_CALLCHAIN_H
+#define _POWERPC_PERF_CALLCHAIN_H
+
+void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry,
+ struct pt_regs *regs);
+void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry,
+ struct pt_regs *regs);
+
+static inline bool invalid_user_sp(unsigned long sp)
+{
+ unsigned long mask = is_32bit_task() ? 3 : 7;
+ unsigned long top = STACK_TOP - (is_32bit_task() ? 16 : 32);
+
+ return (!sp || (sp & mask) || (sp > top));
+}
+
+/*
+ * On 32-bit we just access the address and let hash_page create a
+ * HPTE if necessary, so there is no need to fall back to reading
+ * the page tables. Since this is called at interrupt level,
+ * do_page_fault() won't treat a DSI as a page fault.
+ */
+static inline int __read_user_stack(const void __user *ptr, void *ret,
+ size_t size)
+{
+ unsigned long addr = (unsigned long)ptr;
+
+ if (addr > TASK_SIZE - size || (addr & (size - 1)))
+ return -EFAULT;
+
+ return copy_from_user_nofault(ret, ptr, size);
+}
+
+#endif /* _POWERPC_PERF_CALLCHAIN_H */
diff --git a/arch/powerpc/perf/callchain_32.c b/arch/powerpc/perf/callchain_32.c
new file mode 100644
index 000000000000..ddcc2d8aa64a
--- /dev/null
+++ b/arch/powerpc/perf/callchain_32.c
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Performance counter callchain support - powerpc architecture code
+ *
+ * Copyright © 2009 Paul Mackerras, IBM Corporation.
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/perf_event.h>
+#include <linux/percpu.h>
+#include <linux/uaccess.h>
+#include <linux/mm.h>
+#include <asm/ptrace.h>
+#include <asm/sigcontext.h>
+#include <asm/ucontext.h>
+#include <asm/vdso.h>
+#include <asm/pte-walk.h>
+
+#include "callchain.h"
+
+#ifdef CONFIG_PPC64
+#include <asm/syscalls_32.h>
+#else /* CONFIG_PPC64 */
+
+#define __SIGNAL_FRAMESIZE32 __SIGNAL_FRAMESIZE
+#define sigcontext32 sigcontext
+#define mcontext32 mcontext
+#define ucontext32 ucontext
+#define compat_siginfo_t struct siginfo
+
+#endif /* CONFIG_PPC64 */
+
+static int read_user_stack_32(const unsigned int __user *ptr, unsigned int *ret)
+{
+ return __read_user_stack(ptr, ret, sizeof(*ret));
+}
+
+/*
+ * Layout for non-RT signal frames
+ */
+struct signal_frame_32 {
+ char dummy[__SIGNAL_FRAMESIZE32];
+ struct sigcontext32 sctx;
+ struct mcontext32 mctx;
+ int abigap[56];
+};
+
+/*
+ * Layout for RT signal frames
+ */
+struct rt_signal_frame_32 {
+ char dummy[__SIGNAL_FRAMESIZE32 + 16];
+ compat_siginfo_t info;
+ struct ucontext32 uc;
+ int abigap[56];
+};
+
+static int is_sigreturn_32_address(unsigned int nip, unsigned int fp)
+{
+ if (nip == fp + offsetof(struct signal_frame_32, mctx.mc_pad))
+ return 1;
+ if (current->mm->context.vdso &&
+ nip == VDSO32_SYMBOL(current->mm->context.vdso, sigtramp32))
+ return 1;
+ return 0;
+}
+
+static int is_rt_sigreturn_32_address(unsigned int nip, unsigned int fp)
+{
+ if (nip == fp + offsetof(struct rt_signal_frame_32,
+ uc.uc_mcontext.mc_pad))
+ return 1;
+ if (current->mm->context.vdso &&
+ nip == VDSO32_SYMBOL(current->mm->context.vdso, sigtramp_rt32))
+ return 1;
+ return 0;
+}
+
+static int sane_signal_32_frame(unsigned int sp)
+{
+ struct signal_frame_32 __user *sf;
+ unsigned int regs;
+
+ sf = (struct signal_frame_32 __user *) (unsigned long) sp;
+ if (read_user_stack_32((unsigned int __user *) &sf->sctx.regs, &regs))
+ return 0;
+ return regs == (unsigned long) &sf->mctx;
+}
+
+static int sane_rt_signal_32_frame(unsigned int sp)
+{
+ struct rt_signal_frame_32 __user *sf;
+ unsigned int regs;
+
+ sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp;
+ if (read_user_stack_32((unsigned int __user *) &sf->uc.uc_regs, &regs))
+ return 0;
+ return regs == (unsigned long) &sf->uc.uc_mcontext;
+}
+
+static unsigned int __user *signal_frame_32_regs(unsigned int sp,
+ unsigned int next_sp, unsigned int next_ip)
+{
+ struct mcontext32 __user *mctx = NULL;
+ struct signal_frame_32 __user *sf;
+ struct rt_signal_frame_32 __user *rt_sf;
+
+ /*
+ * Note: the next_sp - sp >= signal frame size check
+ * is true when next_sp < sp, for example, when
+ * transitioning from an alternate signal stack to the
+ * normal stack.
+ */
+ if (next_sp - sp >= sizeof(struct signal_frame_32) &&
+ is_sigreturn_32_address(next_ip, sp) &&
+ sane_signal_32_frame(sp)) {
+ sf = (struct signal_frame_32 __user *) (unsigned long) sp;
+ mctx = &sf->mctx;
+ }
+
+ if (!mctx && next_sp - sp >= sizeof(struct rt_signal_frame_32) &&
+ is_rt_sigreturn_32_address(next_ip, sp) &&
+ sane_rt_signal_32_frame(sp)) {
+ rt_sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp;
+ mctx = &rt_sf->uc.uc_mcontext;
+ }
+
+ if (!mctx)
+ return NULL;
+ return mctx->mc_gregs;
+}
+
+void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry,
+ struct pt_regs *regs)
+{
+ unsigned int sp, next_sp;
+ unsigned int next_ip;
+ unsigned int lr;
+ long level = 0;
+ unsigned int __user *fp, *uregs;
+
+ next_ip = perf_arch_instruction_pointer(regs);
+ lr = regs->link;
+ sp = regs->gpr[1];
+ perf_callchain_store(entry, next_ip);
+
+ while (entry->nr < entry->max_stack) {
+ fp = (unsigned int __user *) (unsigned long) sp;
+ if (invalid_user_sp(sp) || read_user_stack_32(fp, &next_sp))
+ return;
+ if (level > 0 && read_user_stack_32(&fp[1], &next_ip))
+ return;
+
+ uregs = signal_frame_32_regs(sp, next_sp, next_ip);
+ if (!uregs && level <= 1)
+ uregs = signal_frame_32_regs(sp, next_sp, lr);
+ if (uregs) {
+ /*
+ * This looks like an signal frame, so restart
+ * the stack trace with the values in it.
+ */
+ if (read_user_stack_32(&uregs[PT_NIP], &next_ip) ||
+ read_user_stack_32(&uregs[PT_LNK], &lr) ||
+ read_user_stack_32(&uregs[PT_R1], &sp))
+ return;
+ level = 0;
+ perf_callchain_store_context(entry, PERF_CONTEXT_USER);
+ perf_callchain_store(entry, next_ip);
+ continue;
+ }
+
+ if (level == 0)
+ next_ip = lr;
+ perf_callchain_store(entry, next_ip);
+ ++level;
+ sp = next_sp;
+ }
+}
diff --git a/arch/powerpc/perf/callchain_64.c b/arch/powerpc/perf/callchain_64.c
new file mode 100644
index 000000000000..115d1c105e8a
--- /dev/null
+++ b/arch/powerpc/perf/callchain_64.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Performance counter callchain support - powerpc architecture code
+ *
+ * Copyright © 2009 Paul Mackerras, IBM Corporation.
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/perf_event.h>
+#include <linux/percpu.h>
+#include <linux/uaccess.h>
+#include <linux/mm.h>
+#include <asm/ptrace.h>
+#include <asm/sigcontext.h>
+#include <asm/ucontext.h>
+#include <asm/vdso.h>
+#include <asm/pte-walk.h>
+
+#include "callchain.h"
+
+static int read_user_stack_64(const unsigned long __user *ptr, unsigned long *ret)
+{
+ return __read_user_stack(ptr, ret, sizeof(*ret));
+}
+
+/*
+ * 64-bit user processes use the same stack frame for RT and non-RT signals.
+ */
+struct signal_frame_64 {
+ char dummy[__SIGNAL_FRAMESIZE];
+ struct ucontext uc;
+ unsigned long unused[2];
+ unsigned int tramp[6];
+ struct siginfo *pinfo;
+ void *puc;
+ struct siginfo info;
+ char abigap[288];
+};
+
+static int is_sigreturn_64_address(unsigned long nip, unsigned long fp)
+{
+ if (nip == fp + offsetof(struct signal_frame_64, tramp))
+ return 1;
+ if (current->mm->context.vdso &&
+ nip == VDSO64_SYMBOL(current->mm->context.vdso, sigtramp_rt64))
+ return 1;
+ return 0;
+}
+
+/*
+ * Do some sanity checking on the signal frame pointed to by sp.
+ * We check the pinfo and puc pointers in the frame.
+ */
+static int sane_signal_64_frame(unsigned long sp)
+{
+ struct signal_frame_64 __user *sf;
+ unsigned long pinfo, puc;
+
+ sf = (struct signal_frame_64 __user *) sp;
+ if (read_user_stack_64((unsigned long __user *) &sf->pinfo, &pinfo) ||
+ read_user_stack_64((unsigned long __user *) &sf->puc, &puc))
+ return 0;
+ return pinfo == (unsigned long) &sf->info &&
+ puc == (unsigned long) &sf->uc;
+}
+
+void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry,
+ struct pt_regs *regs)
+{
+ unsigned long sp, next_sp;
+ unsigned long next_ip;
+ unsigned long lr;
+ long level = 0;
+ struct signal_frame_64 __user *sigframe;
+ unsigned long __user *fp, *uregs;
+
+ next_ip = perf_arch_instruction_pointer(regs);
+ lr = regs->link;
+ sp = regs->gpr[1];
+ perf_callchain_store(entry, next_ip);
+
+ while (entry->nr < entry->max_stack) {
+ fp = (unsigned long __user *) sp;
+ if (invalid_user_sp(sp) || read_user_stack_64(fp, &next_sp))
+ return;
+ if (level > 0 && read_user_stack_64(&fp[2], &next_ip))
+ return;
+
+ /*
+ * Note: the next_sp - sp >= signal frame size check
+ * is true when next_sp < sp, which can happen when
+ * transitioning from an alternate signal stack to the
+ * normal stack.
+ */
+ if (next_sp - sp >= sizeof(struct signal_frame_64) &&
+ (is_sigreturn_64_address(next_ip, sp) ||
+ (level <= 1 && is_sigreturn_64_address(lr, sp))) &&
+ sane_signal_64_frame(sp)) {
+ /*
+ * This looks like an signal frame
+ */
+ sigframe = (struct signal_frame_64 __user *) sp;
+ uregs = sigframe->uc.uc_mcontext.gp_regs;
+ if (read_user_stack_64(&uregs[PT_NIP], &next_ip) ||
+ read_user_stack_64(&uregs[PT_LNK], &lr) ||
+ read_user_stack_64(&uregs[PT_R1], &sp))
+ return;
+ level = 0;
+ perf_callchain_store_context(entry, PERF_CONTEXT_USER);
+ perf_callchain_store(entry, next_ip);
+ continue;
+ }
+
+ if (level == 0)
+ next_ip = lr;
+ perf_callchain_store(entry, next_ip);
+ ++level;
+ sp = next_sp;
+ }
+}
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index b7cd00b0171e..8b0081441f85 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -1,15 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Performance event support - powerpc architecture code
*
* Copyright 2008-2009 Paul Mackerras, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/sched.h>
+#include <linux/sched/clock.h>
#include <linux/perf_event.h>
#include <linux/percpu.h>
#include <linux/hardirq.h>
@@ -19,7 +16,13 @@
#include <asm/machdep.h>
#include <asm/firmware.h>
#include <asm/ptrace.h>
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
+#include <asm/hw_irq.h>
+#include <asm/interrupt.h>
+
+#ifdef CONFIG_PPC64
+#include "internal.h"
+#endif
#define BHRB_MAX_ENTRIES 32
#define BHRB_TARGET 0x0000000000000002
@@ -36,32 +39,31 @@ struct cpu_hw_events {
struct perf_event *event[MAX_HWEVENTS];
u64 events[MAX_HWEVENTS];
unsigned int flags[MAX_HWEVENTS];
- /*
- * The order of the MMCR array is:
- * - 64-bit, MMCR0, MMCR1, MMCRA, MMCR2
- * - 32-bit, MMCR0, MMCR1, MMCR2
- */
- unsigned long mmcr[4];
+ struct mmcr_regs mmcr;
struct perf_event *limited_counter[MAX_LIMITED_HWCOUNTERS];
u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS];
u64 alternatives[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
unsigned long amasks[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
unsigned long avalues[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
- unsigned int group_flag;
+ unsigned int txn_flags;
int n_txn_start;
/* BHRB bits */
u64 bhrb_filter; /* BHRB HW branch filter */
- int bhrb_users;
+ unsigned int bhrb_users;
void *bhrb_context;
struct perf_branch_stack bhrb_stack;
struct perf_branch_entry bhrb_entries[BHRB_MAX_ENTRIES];
+ u64 ic_init;
+
+ /* Store the PMC values */
+ unsigned long pmcs[MAX_HWEVENTS];
};
-DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
+static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
-struct power_pmu *ppmu;
+static struct power_pmu *ppmu;
/*
* Normally, to ignore kernel events we set the FCS (freeze counters
@@ -75,6 +77,11 @@ static unsigned int freeze_events_kernel = MMCR0_FCS;
/*
* 32-bit doesn't have MMCRA but does have an MMCR2,
* and a few other names are different.
+ * Also 32-bit doesn't have MMCR3, SIER2 and SIER3.
+ * Define them as zero knowing that any code path accessing
+ * these registers (via mtspr/mfspr) are done under ppmu flag
+ * check for PPMU_ARCH_31 and we will not enter that code path
+ * for 32-bit.
*/
#ifdef CONFIG_PPC32
@@ -88,13 +95,18 @@ static unsigned int freeze_events_kernel = MMCR0_FCS;
#define MMCR0_PMCC_U6 0
#define SPRN_MMCRA SPRN_MMCR2
+#define SPRN_MMCR3 0
+#define SPRN_SIER2 0
+#define SPRN_SIER3 0
#define MMCRA_SAMPLE_ENABLE 0
+#define MMCRA_BHRB_DISABLE 0
+#define MMCR0_PMCCEXT 0
static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
{
return 0;
}
-static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { }
+static inline void perf_get_data_addr(struct perf_event *event, struct pt_regs *regs, u64 *addrp) { }
static inline u32 perf_get_misc_flags(struct pt_regs *regs)
{
return 0;
@@ -103,10 +115,6 @@ static inline void perf_read_regs(struct pt_regs *regs)
{
regs->result = 0;
}
-static inline int perf_intr_is_nmi(struct pt_regs *regs)
-{
- return 0;
-}
static inline int siar_valid(struct pt_regs *regs)
{
@@ -119,19 +127,53 @@ static void ebb_event_add(struct perf_event *event) { }
static void ebb_switch_out(unsigned long mmcr0) { }
static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw)
{
- return cpuhw->mmcr[0];
+ return cpuhw->mmcr.mmcr0;
}
static inline void power_pmu_bhrb_enable(struct perf_event *event) {}
static inline void power_pmu_bhrb_disable(struct perf_event *event) {}
-void power_pmu_flush_branch_stack(void) {}
-static inline void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) {}
+static void power_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx,
+ struct task_struct *task, bool sched_in)
+{
+}
+static inline void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *cpuhw) {}
static void pmao_restore_workaround(bool ebb) { }
#endif /* CONFIG_PPC32 */
+bool is_sier_available(void)
+{
+ if (!ppmu)
+ return false;
+
+ if (ppmu->flags & PPMU_HAS_SIER)
+ return true;
+
+ return false;
+}
+
+/*
+ * Return PMC value corresponding to the
+ * index passed.
+ */
+unsigned long get_pmcs_ext_regs(int idx)
+{
+ struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+
+ return cpuhw->pmcs[idx];
+}
+
static bool regs_use_siar(struct pt_regs *regs)
{
- return !!regs->result;
+ /*
+ * When we take a performance monitor exception the regs are setup
+ * using perf_read_regs() which overloads some fields, in particular
+ * regs->result to tell us whether to use SIAR.
+ *
+ * However if the regs are from another exception, eg. a syscall, then
+ * they have not been setup using perf_read_regs() and so regs->result
+ * is something random.
+ */
+ return ((TRAP(regs) == INTERRUPT_PERFMON) && regs->result);
}
/*
@@ -160,7 +202,7 @@ static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
* pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC, the
* [POWER7P_]MMCRA_SDAR_VALID bit in MMCRA, or the SDAR_VALID bit in SIER.
*/
-static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp)
+static inline void perf_get_data_addr(struct perf_event *event, struct pt_regs *regs, u64 *addrp)
{
unsigned long mmcra = regs->dsisr;
bool sdar_valid;
@@ -174,6 +216,8 @@ static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp)
sdsync = POWER7P_MMCRA_SDAR_VALID;
else if (ppmu->flags & PPMU_ALT_SIPR)
sdsync = POWER6_MMCRA_SDSYNC;
+ else if (ppmu->flags & PPMU_NO_SIAR)
+ sdsync = MMCRA_SAMPLE_ENABLE;
else
sdsync = MMCRA_SDSYNC;
@@ -182,6 +226,9 @@ static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp)
if (!(mmcra & MMCRA_SAMPLE_ENABLE) || sdar_valid)
*addrp = mfspr(SPRN_SDAR);
+
+ if (is_kernel_addr(mfspr(SPRN_SDAR)) && event->attr.exclude_kernel)
+ *addrp = 0;
}
static bool regs_sihv(struct pt_regs *regs)
@@ -212,7 +259,7 @@ static bool regs_sipr(struct pt_regs *regs)
static inline u32 perf_flags_from_msr(struct pt_regs *regs)
{
- if (regs->msr & MSR_PR)
+ if (user_mode(regs))
return PERF_RECORD_MISC_USER;
if ((regs->msr & MSR_HV) && freeze_events_kernel != MMCR0_FCHV)
return PERF_RECORD_MISC_HYPERVISOR;
@@ -222,6 +269,8 @@ static inline u32 perf_flags_from_msr(struct pt_regs *regs)
static inline u32 perf_get_misc_flags(struct pt_regs *regs)
{
bool use_siar = regs_use_siar(regs);
+ unsigned long siar;
+ unsigned long addr;
if (!use_siar)
return perf_flags_from_msr(regs);
@@ -233,19 +282,31 @@ static inline u32 perf_get_misc_flags(struct pt_regs *regs)
* results
*/
if (ppmu->flags & PPMU_NO_SIPR) {
- unsigned long siar = mfspr(SPRN_SIAR);
- if (siar >= PAGE_OFFSET)
+ siar = mfspr(SPRN_SIAR);
+ if (is_kernel_addr(siar))
return PERF_RECORD_MISC_KERNEL;
return PERF_RECORD_MISC_USER;
}
/* PR has priority over HV, so order below is important */
- if (regs_sipr(regs))
- return PERF_RECORD_MISC_USER;
-
- if (regs_sihv(regs) && (freeze_events_kernel != MMCR0_FCHV))
+ if (regs_sipr(regs)) {
+ if (!(ppmu->flags & PPMU_P10))
+ return PERF_RECORD_MISC_USER;
+ } else if (regs_sihv(regs) && (freeze_events_kernel != MMCR0_FCHV))
return PERF_RECORD_MISC_HYPERVISOR;
+ /*
+ * Check the address in SIAR to identify the
+ * privilege levels since the SIER[MSR_HV, MSR_PR]
+ * bits are not set correctly in power10 sometimes
+ */
+ if (ppmu->flags & PPMU_P10) {
+ siar = mfspr(SPRN_SIAR);
+ addr = siar ? siar : regs->nip;
+ if (!is_kernel_addr(addr))
+ return PERF_RECORD_MISC_USER;
+ }
+
return PERF_RECORD_MISC_KERNEL;
}
@@ -276,6 +337,13 @@ static inline void perf_read_regs(struct pt_regs *regs)
* If the PMU doesn't update the SIAR for non marked events use
* pt_regs.
*
+ * If regs is a kernel interrupt, always use SIAR. Some PMUs have an
+ * issue with regs_sipr not being in synch with SIAR in interrupt entry
+ * and return sequences, which can result in regs_sipr being true for
+ * kernel interrupts and SIAR, which has the effect of causing samples
+ * to pile up at mtmsrd MSR[EE] 0->1 or pending irq replay around
+ * interrupt entry/exit.
+ *
* If the PMU has HV/PR flags then check to see if they
* place the exception in userspace. If so, use pt_regs. In
* continuous sampling mode the SIAR and the PMU exception are
@@ -284,12 +352,16 @@ static inline void perf_read_regs(struct pt_regs *regs)
* hypervisor samples as well as samples in the kernel with
* interrupts off hence the userspace check.
*/
- if (TRAP(regs) != 0xf00)
+ if (TRAP(regs) != INTERRUPT_PERFMON)
+ use_siar = 0;
+ else if ((ppmu->flags & PPMU_NO_SIAR))
use_siar = 0;
else if (marked)
use_siar = 1;
else if ((ppmu->flags & PPMU_NO_CONT_SAMPLING))
use_siar = 0;
+ else if (!user_mode(regs))
+ use_siar = 1;
else if (!(ppmu->flags & PPMU_NO_SIPR) && regs_sipr(regs))
use_siar = 0;
else
@@ -299,15 +371,6 @@ static inline void perf_read_regs(struct pt_regs *regs)
}
/*
- * If interrupts were soft-disabled when a PMU interrupt occurs, treat
- * it as an NMI.
- */
-static inline int perf_intr_is_nmi(struct pt_regs *regs)
-{
- return !regs->softe;
-}
-
-/*
* On processors like P7+ that have the SIAR-Valid bit, marked instructions
* must be sampled only if the SIAR-valid bit is set.
*
@@ -320,7 +383,14 @@ static inline int siar_valid(struct pt_regs *regs)
int marked = mmcra & MMCRA_SAMPLE_ENABLE;
if (marked) {
- if (ppmu->flags & PPMU_HAS_SIER)
+ /*
+ * SIER[SIAR_VALID] is not set for some
+ * marked events on power10 DD1, so drop
+ * the check for SIER[SIAR_VALID] and return true.
+ */
+ if (ppmu->flags & PPMU_P10_DD1)
+ return 0x1;
+ else if (ppmu->flags & PPMU_HAS_SIER)
return regs->dar & SIER_SIAR_VALID;
if (ppmu->flags & PPMU_SIAR_VALID)
@@ -339,7 +409,7 @@ static void power_pmu_bhrb_reset(void)
static void power_pmu_bhrb_enable(struct perf_event *event)
{
- struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
if (!ppmu->bhrb_nr)
return;
@@ -350,17 +420,19 @@ static void power_pmu_bhrb_enable(struct perf_event *event)
cpuhw->bhrb_context = event->ctx;
}
cpuhw->bhrb_users++;
+ perf_sched_cb_inc(event->pmu);
}
static void power_pmu_bhrb_disable(struct perf_event *event)
{
- struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
if (!ppmu->bhrb_nr)
return;
+ WARN_ON_ONCE(!cpuhw->bhrb_users);
cpuhw->bhrb_users--;
- WARN_ON_ONCE(cpuhw->bhrb_users < 0);
+ perf_sched_cb_dec(event->pmu);
if (!cpuhw->disabled && !cpuhw->bhrb_users) {
/* BHRB cannot be turned off when other
@@ -375,29 +447,33 @@ static void power_pmu_bhrb_disable(struct perf_event *event)
/* Called from ctxsw to prevent one process's branch entries to
* mingle with the other process's entries during context switch.
*/
-void power_pmu_flush_branch_stack(void)
+static void power_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx,
+ struct task_struct *task, bool sched_in)
{
- if (ppmu->bhrb_nr)
+ if (!ppmu->bhrb_nr)
+ return;
+
+ if (sched_in)
power_pmu_bhrb_reset();
}
/* Calculate the to address for a branch */
static __u64 power_pmu_bhrb_to(u64 addr)
{
unsigned int instr;
- int ret;
__u64 target;
- if (is_kernel_addr(addr))
- return branch_target((unsigned int *)addr);
+ if (is_kernel_addr(addr)) {
+ if (copy_from_kernel_nofault(&instr, (void *)addr,
+ sizeof(instr)))
+ return 0;
+
+ return branch_target(&instr);
+ }
/* Userspace: need copy instruction here then translate it */
- pagefault_disable();
- ret = __get_user_inatomic(instr, (unsigned int __user *)addr);
- if (ret) {
- pagefault_enable();
+ if (copy_from_user_nofault(&instr, (unsigned int __user *)addr,
+ sizeof(instr)))
return 0;
- }
- pagefault_enable();
target = branch_target(&instr);
if ((!target) || (instr & BRANCH_ABSOLUTE))
@@ -408,7 +484,7 @@ static __u64 power_pmu_bhrb_to(u64 addr)
}
/* Processing BHRB entries */
-void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
+static void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *cpuhw)
{
u64 val;
u64 addr;
@@ -430,6 +506,18 @@ void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
/* invalid entry */
continue;
+ /*
+ * BHRB rolling buffer could very much contain the kernel
+ * addresses at this point. Check the privileges before
+ * exporting it to userspace (avoid exposure of regions
+ * where we could have speculative execution)
+ * Incase of ISA v3.1, BHRB will capture only user-space
+ * addresses, hence include a check before filtering code
+ */
+ if (!(ppmu->flags & PPMU_ARCH_31) &&
+ is_kernel_addr(addr) && event->attr.exclude_kernel)
+ continue;
+
/* Branches are read most recent first (ie. mfbhrb 0 is
* the most recent branch).
* There are two types of valid entries:
@@ -480,6 +568,7 @@ void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
}
}
cpuhw->bhrb_stack.nr = u_index;
+ cpuhw->bhrb_stack.hw_idx = -1ULL;
return;
}
@@ -545,11 +634,16 @@ static void ebb_switch_out(unsigned long mmcr0)
current->thread.sdar = mfspr(SPRN_SDAR);
current->thread.mmcr0 = mmcr0 & MMCR0_USER_MASK;
current->thread.mmcr2 = mfspr(SPRN_MMCR2) & MMCR2_USER_MASK;
+ if (ppmu->flags & PPMU_ARCH_31) {
+ current->thread.mmcr3 = mfspr(SPRN_MMCR3);
+ current->thread.sier2 = mfspr(SPRN_SIER2);
+ current->thread.sier3 = mfspr(SPRN_SIER3);
+ }
}
static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw)
{
- unsigned long mmcr0 = cpuhw->mmcr[0];
+ unsigned long mmcr0 = cpuhw->mmcr.mmcr0;
if (!ebb)
goto out;
@@ -583,7 +677,13 @@ static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw)
* unfreeze counters, it should not set exclude_xxx in its events and
* instead manage the MMCR2 entirely by itself.
*/
- mtspr(SPRN_MMCR2, cpuhw->mmcr[3] | current->thread.mmcr2);
+ mtspr(SPRN_MMCR2, cpuhw->mmcr.mmcr2 | current->thread.mmcr2);
+
+ if (ppmu->flags & PPMU_ARCH_31) {
+ mtspr(SPRN_MMCR3, current->thread.mmcr3);
+ mtspr(SPRN_SIER2, current->thread.sier2);
+ mtspr(SPRN_SIER3, current->thread.sier3);
+ }
out:
return mmcr0;
}
@@ -637,7 +737,7 @@ static void pmao_restore_workaround(bool ebb)
/*
* We are already soft-disabled in power_pmu_enable(). We need to hard
- * enable to actually prevent the PMU exception from firing.
+ * disable to actually prevent the PMU exception from firing.
*/
hard_irq_disable();
@@ -672,6 +772,35 @@ static void pmao_restore_workaround(bool ebb)
mtspr(SPRN_PMC5, pmcs[4]);
mtspr(SPRN_PMC6, pmcs[5]);
}
+
+/*
+ * If the perf subsystem wants performance monitor interrupts as soon as
+ * possible (e.g., to sample the instruction address and stack chain),
+ * this should return true. The IRQ masking code can then enable MSR[EE]
+ * in some places (e.g., interrupt handlers) that allows PMI interrupts
+ * through to improve accuracy of profiles, at the cost of some performance.
+ *
+ * The PMU counters can be enabled by other means (e.g., sysfs raw SPR
+ * access), but in that case there is no need for prompt PMI handling.
+ *
+ * This currently returns true if any perf counter is being used. It
+ * could possibly return false if only events are being counted rather than
+ * samples being taken, but for now this is good enough.
+ */
+bool power_pmu_wants_prompt_pmi(void)
+{
+ struct cpu_hw_events *cpuhw;
+
+ /*
+ * This could simply test local_paca->pmcregs_in_use if that were not
+ * under ifdef KVM.
+ */
+ if (!ppmu)
+ return false;
+
+ cpuhw = this_cpu_ptr(&cpu_hw_events);
+ return cpuhw->n_events;
+}
#endif /* CONFIG_PPC64 */
static void perf_event_interrupt(struct pt_regs *regs);
@@ -754,6 +883,19 @@ static void write_pmc(int idx, unsigned long val)
}
}
+static int any_pmc_overflown(struct cpu_hw_events *cpuhw)
+{
+ int i, idx;
+
+ for (i = 0; i < cpuhw->n_events; i++) {
+ idx = cpuhw->event[i]->hw.idx;
+ if ((idx) && ((int)read_pmc(idx) < 0))
+ return idx;
+ }
+
+ return 0;
+}
+
/* Called from sysrq_handle_showregs() */
void perf_event_print_debug(void)
{
@@ -761,6 +903,11 @@ void perf_event_print_debug(void)
u32 pmcs[MAX_HWEVENTS];
int i;
+ if (!ppmu) {
+ pr_info("Performance monitor hardware not registered.\n");
+ return;
+ }
+
if (!ppmu->n_counter)
return;
@@ -798,6 +945,11 @@ void perf_event_print_debug(void)
pr_info("EBBRR: %016lx BESCR: %016lx\n",
mfspr(SPRN_EBBRR), mfspr(SPRN_BESCR));
}
+
+ if (ppmu->flags & PPMU_ARCH_31) {
+ pr_info("MMCR3: %016lx SIER2: %016lx SIER3: %016lx\n",
+ mfspr(SPRN_MMCR3), mfspr(SPRN_SIER2), mfspr(SPRN_SIER3));
+ }
#endif
pr_info("SIAR: %016lx SDAR: %016lx SIER: %016lx\n",
mfspr(SPRN_SIAR), sdar, sier);
@@ -813,7 +965,7 @@ void perf_event_print_debug(void)
*/
static int power_check_constraints(struct cpu_hw_events *cpuhw,
u64 event_id[], unsigned int cflags[],
- int n_ev)
+ int n_ev, struct perf_event **event)
{
unsigned long mask, value, nv;
unsigned long smasks[MAX_HWEVENTS], svalues[MAX_HWEVENTS];
@@ -821,6 +973,8 @@ static int power_check_constraints(struct cpu_hw_events *cpuhw,
int i, j;
unsigned long addf = ppmu->add_fields;
unsigned long tadd = ppmu->test_adder;
+ unsigned long grp_mask = ppmu->group_constraint_mask;
+ unsigned long grp_val = ppmu->group_constraint_val;
if (n_ev > ppmu->n_counter)
return -1;
@@ -834,22 +988,30 @@ static int power_check_constraints(struct cpu_hw_events *cpuhw,
event_id[i] = cpuhw->alternatives[i][0];
}
if (ppmu->get_constraint(event_id[i], &cpuhw->amasks[i][0],
- &cpuhw->avalues[i][0]))
+ &cpuhw->avalues[i][0], event[i]->attr.config1))
return -1;
}
value = mask = 0;
for (i = 0; i < n_ev; ++i) {
nv = (value | cpuhw->avalues[i][0]) +
(value & cpuhw->avalues[i][0] & addf);
- if ((((nv + tadd) ^ value) & mask) != 0 ||
- (((nv + tadd) ^ cpuhw->avalues[i][0]) &
- cpuhw->amasks[i][0]) != 0)
+
+ if (((((nv + tadd) ^ value) & mask) & (~grp_mask)) != 0)
+ break;
+
+ if (((((nv + tadd) ^ cpuhw->avalues[i][0]) & cpuhw->amasks[i][0])
+ & (~grp_mask)) != 0)
break;
+
value = nv;
mask |= cpuhw->amasks[i][0];
}
- if (i == n_ev)
- return 0; /* all OK */
+ if (i == n_ev) {
+ if ((value & mask & grp_mask) != (mask & grp_val))
+ return -1;
+ else
+ return 0; /* all OK */
+ }
/* doesn't work, gather alternatives... */
if (!ppmu->get_alternatives)
@@ -861,7 +1023,8 @@ static int power_check_constraints(struct cpu_hw_events *cpuhw,
for (j = 1; j < n_alt[i]; ++j)
ppmu->get_constraint(cpuhw->alternatives[i][j],
&cpuhw->amasks[i][j],
- &cpuhw->avalues[i][j]);
+ &cpuhw->avalues[i][j],
+ event[i]->attr.config1);
}
/* enumerate all possibilities and see if any will work */
@@ -976,9 +1139,9 @@ static u64 check_and_compute_delta(u64 prev, u64 val)
/*
* POWER7 can roll back counter values, if the new value is smaller
* than the previous value it will cause the delta and the counter to
- * have bogus values unless we rolled a counter over. If a coutner is
+ * have bogus values unless we rolled a counter over. If a counter is
* rolled back, it will be smaller, but within 256, which is the maximum
- * number of events to rollback at once. If we dectect a rollback
+ * number of events to rollback at once. If we detect a rollback
* return 0. This can lead to a small lack of precision in the
* counters.
*/
@@ -1139,12 +1302,12 @@ static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0)
static void power_pmu_disable(struct pmu *pmu)
{
struct cpu_hw_events *cpuhw;
- unsigned long flags, mmcr0, val;
+ unsigned long flags, mmcr0, val, mmcra;
if (!ppmu)
return;
local_irq_save(flags);
- cpuhw = &__get_cpu_var(cpu_hw_events);
+ cpuhw = this_cpu_ptr(&cpu_hw_events);
if (!cpuhw->disabled) {
/*
@@ -1157,11 +1320,16 @@ static void power_pmu_disable(struct pmu *pmu)
/*
* Set the 'freeze counters' bit, clear EBE/BHRBA/PMCC/PMAO/FC56
+ * Also clear PMXE to disable PMI's getting triggered in some
+ * corner cases during PMU disable.
*/
val = mmcr0 = mfspr(SPRN_MMCR0);
val |= MMCR0_FC;
val &= ~(MMCR0_EBE | MMCR0_BHRBA | MMCR0_PMCC | MMCR0_PMAO |
- MMCR0_FC56);
+ MMCR0_PMXE | MMCR0_FC56);
+ /* Set mmcr0 PMCCEXT for p10 */
+ if (ppmu->flags & PPMU_ARCH_31)
+ val |= MMCR0_PMCCEXT;
/*
* The barrier is to make sure the mtspr has been
@@ -1170,20 +1338,69 @@ static void power_pmu_disable(struct pmu *pmu)
*/
write_mmcr0(cpuhw, val);
mb();
+ isync();
+
+ /*
+ * Some corner cases could clear the PMU counter overflow
+ * while a masked PMI is pending. One such case is when
+ * a PMI happens during interrupt replay and perf counter
+ * values are cleared by PMU callbacks before replay.
+ *
+ * Disable the interrupt by clearing the paca bit for PMI
+ * since we are disabling the PMU now. Otherwise provide a
+ * warning if there is PMI pending, but no counter is found
+ * overflown.
+ *
+ * Since power_pmu_disable runs under local_irq_save, it
+ * could happen that code hits a PMC overflow without PMI
+ * pending in paca. Hence only clear PMI pending if it was
+ * set.
+ *
+ * If a PMI is pending, then MSR[EE] must be disabled (because
+ * the masked PMI handler disabling EE). So it is safe to
+ * call clear_pmi_irq_pending().
+ */
+ if (pmi_irq_pending())
+ clear_pmi_irq_pending();
+
+ val = mmcra = cpuhw->mmcr.mmcra;
/*
* Disable instruction sampling if it was enabled
*/
- if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
- mtspr(SPRN_MMCRA,
- cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
+ val &= ~MMCRA_SAMPLE_ENABLE;
+
+ /* Disable BHRB via mmcra (BHRBRD) for p10 */
+ if (ppmu->flags & PPMU_ARCH_31)
+ val |= MMCRA_BHRB_DISABLE;
+
+ /*
+ * Write SPRN_MMCRA if mmcra has either disabled
+ * instruction sampling or BHRB.
+ */
+ if (val != mmcra) {
+ mtspr(SPRN_MMCRA, val);
mb();
+ isync();
}
cpuhw->disabled = 1;
cpuhw->n_added = 0;
ebb_switch_out(mmcr0);
+
+#ifdef CONFIG_PPC64
+ /*
+ * These are readable by userspace, may contain kernel
+ * addresses and are not switched by context switch, so clear
+ * them now to avoid leaking anything to userspace in general
+ * including to another process.
+ */
+ if (ppmu->flags & PPMU_ARCH_207S) {
+ mtspr(SPRN_SDAR, 0);
+ mtspr(SPRN_SIAR, 0);
+ }
+#endif
}
local_irq_restore(flags);
@@ -1211,7 +1428,7 @@ static void power_pmu_enable(struct pmu *pmu)
return;
local_irq_save(flags);
- cpuhw = &__get_cpu_var(cpu_hw_events);
+ cpuhw = this_cpu_ptr(&cpu_hw_events);
if (!cpuhw->disabled)
goto out;
@@ -1236,18 +1453,29 @@ static void power_pmu_enable(struct pmu *pmu)
* (possibly updated for removal of events).
*/
if (!cpuhw->n_added) {
- mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
- mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
+ /*
+ * If there is any active event with an overflown PMC
+ * value, set back PACA_IRQ_PMI which would have been
+ * cleared in power_pmu_disable().
+ */
+ hard_irq_disable();
+ if (any_pmc_overflown(cpuhw))
+ set_pmi_irq_pending();
+
+ mtspr(SPRN_MMCRA, cpuhw->mmcr.mmcra & ~MMCRA_SAMPLE_ENABLE);
+ mtspr(SPRN_MMCR1, cpuhw->mmcr.mmcr1);
+ if (ppmu->flags & PPMU_ARCH_31)
+ mtspr(SPRN_MMCR3, cpuhw->mmcr.mmcr3);
goto out_enable;
}
/*
* Clear all MMCR settings and recompute them for the new set of events.
*/
- memset(cpuhw->mmcr, 0, sizeof(cpuhw->mmcr));
+ memset(&cpuhw->mmcr, 0, sizeof(cpuhw->mmcr));
if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_events, hwc_index,
- cpuhw->mmcr, cpuhw->event)) {
+ &cpuhw->mmcr, cpuhw->event, ppmu->flags)) {
/* shouldn't ever get here */
printk(KERN_ERR "oops compute_mmcr failed\n");
goto out;
@@ -1261,11 +1489,11 @@ static void power_pmu_enable(struct pmu *pmu)
*/
event = cpuhw->event[0];
if (event->attr.exclude_user)
- cpuhw->mmcr[0] |= MMCR0_FCP;
+ cpuhw->mmcr.mmcr0 |= MMCR0_FCP;
if (event->attr.exclude_kernel)
- cpuhw->mmcr[0] |= freeze_events_kernel;
+ cpuhw->mmcr.mmcr0 |= freeze_events_kernel;
if (event->attr.exclude_hv)
- cpuhw->mmcr[0] |= MMCR0_FCHV;
+ cpuhw->mmcr.mmcr0 |= MMCR0_FCHV;
}
/*
@@ -1274,12 +1502,15 @@ static void power_pmu_enable(struct pmu *pmu)
* Then unfreeze the events.
*/
ppc_set_pmu_inuse(1);
- mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
- mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
- mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
+ mtspr(SPRN_MMCRA, cpuhw->mmcr.mmcra & ~MMCRA_SAMPLE_ENABLE);
+ mtspr(SPRN_MMCR1, cpuhw->mmcr.mmcr1);
+ mtspr(SPRN_MMCR0, (cpuhw->mmcr.mmcr0 & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
| MMCR0_FC);
if (ppmu->flags & PPMU_ARCH_207S)
- mtspr(SPRN_MMCR2, cpuhw->mmcr[3]);
+ mtspr(SPRN_MMCR2, cpuhw->mmcr.mmcr2);
+
+ if (ppmu->flags & PPMU_ARCH_31)
+ mtspr(SPRN_MMCR3, cpuhw->mmcr.mmcr3);
/*
* Read off any pre-existing events that need to move
@@ -1330,7 +1561,7 @@ static void power_pmu_enable(struct pmu *pmu)
perf_event_update_userpage(event);
}
cpuhw->n_limited = n_lim;
- cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;
+ cpuhw->mmcr.mmcr0 |= MMCR0_PMXE | MMCR0_FCECE;
out_enable:
pmao_restore_workaround(ebb);
@@ -1346,9 +1577,9 @@ static void power_pmu_enable(struct pmu *pmu)
/*
* Enable instruction sampling if necessary
*/
- if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
+ if (cpuhw->mmcr.mmcra & MMCRA_SAMPLE_ENABLE) {
mb();
- mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
+ mtspr(SPRN_MMCRA, cpuhw->mmcr.mmcra);
}
out:
@@ -1363,15 +1594,15 @@ static int collect_events(struct perf_event *group, int max_count,
int n = 0;
struct perf_event *event;
- if (!is_software_event(group)) {
+ if (group->pmu->task_ctx_nr == perf_hw_context) {
if (n >= max_count)
return -1;
ctrs[n] = group;
flags[n] = group->hw.event_base;
events[n++] = group->hw.config;
}
- list_for_each_entry(event, &group->sibling_list, group_entry) {
- if (!is_software_event(event) &&
+ for_each_sibling_event(event, group) {
+ if (event->pmu->task_ctx_nr == perf_hw_context &&
event->state != PERF_EVENT_STATE_OFF) {
if (n >= max_count)
return -1;
@@ -1384,7 +1615,7 @@ static int collect_events(struct perf_event *group, int max_count,
}
/*
- * Add a event to the PMU.
+ * Add an event to the PMU.
* If all events are not already frozen, then we disable and
* re-enable the PMU in order to get hw_perf_enable to do the
* actual work of reconfiguring the PMU.
@@ -1403,7 +1634,7 @@ static int power_pmu_add(struct perf_event *event, int ef_flags)
* Add the event to the list (if there is room)
* and check whether the total set is still feasible.
*/
- cpuhw = &__get_cpu_var(cpu_hw_events);
+ cpuhw = this_cpu_ptr(&cpu_hw_events);
n0 = cpuhw->n_events;
if (n0 >= ppmu->n_counter)
goto out;
@@ -1427,12 +1658,12 @@ static int power_pmu_add(struct perf_event *event, int ef_flags)
* skip the schedulability test here, it will be performed
* at commit time(->commit_txn) as a whole
*/
- if (cpuhw->group_flag & PERF_EVENT_TXN)
+ if (cpuhw->txn_flags & PERF_PMU_TXN_ADD)
goto nocheck;
if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1))
goto out;
- if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1))
+ if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1, cpuhw->event))
goto out;
event->hw.config = cpuhw->events[n0];
@@ -1445,9 +1676,16 @@ nocheck:
ret = 0;
out:
if (has_branch_stack(event)) {
- power_pmu_bhrb_enable(event);
- cpuhw->bhrb_filter = ppmu->bhrb_filter_map(
- event->attr.branch_sample_type);
+ u64 bhrb_filter = -1;
+
+ if (ppmu->bhrb_filter_map)
+ bhrb_filter = ppmu->bhrb_filter_map(
+ event->attr.branch_sample_type);
+
+ if (bhrb_filter != -1) {
+ cpuhw->bhrb_filter = bhrb_filter;
+ power_pmu_bhrb_enable(event);
+ }
}
perf_pmu_enable(event->pmu);
@@ -1456,7 +1694,7 @@ nocheck:
}
/*
- * Remove a event from the PMU.
+ * Remove an event from the PMU.
*/
static void power_pmu_del(struct perf_event *event, int ef_flags)
{
@@ -1469,7 +1707,7 @@ static void power_pmu_del(struct perf_event *event, int ef_flags)
power_pmu_read(event);
- cpuhw = &__get_cpu_var(cpu_hw_events);
+ cpuhw = this_cpu_ptr(&cpu_hw_events);
for (i = 0; i < cpuhw->n_events; ++i) {
if (event == cpuhw->event[i]) {
while (++i < cpuhw->n_events) {
@@ -1478,7 +1716,7 @@ static void power_pmu_del(struct perf_event *event, int ef_flags)
cpuhw->flags[i-1] = cpuhw->flags[i];
}
--cpuhw->n_events;
- ppmu->disable_pmc(event->hw.idx - 1, cpuhw->mmcr);
+ ppmu->disable_pmc(event->hw.idx - 1, &cpuhw->mmcr);
if (event->hw.idx) {
write_pmc(event->hw.idx, 0);
event->hw.idx = 0;
@@ -1499,7 +1737,7 @@ static void power_pmu_del(struct perf_event *event, int ef_flags)
}
if (cpuhw->n_events == 0) {
/* disable exceptions if no events are running */
- cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
+ cpuhw->mmcr.mmcr0 &= ~(MMCR0_PMXE | MMCR0_FCECE);
}
if (has_branch_stack(event))
@@ -1572,13 +1810,22 @@ static void power_pmu_stop(struct perf_event *event, int ef_flags)
* Start group events scheduling transaction
* Set the flag to make pmu::enable() not perform the
* schedulability test, it will be performed at commit time
+ *
+ * We only support PERF_PMU_TXN_ADD transactions. Save the
+ * transaction flags but otherwise ignore non-PERF_PMU_TXN_ADD
+ * transactions.
*/
-void power_pmu_start_txn(struct pmu *pmu)
+static void power_pmu_start_txn(struct pmu *pmu, unsigned int txn_flags)
{
- struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+
+ WARN_ON_ONCE(cpuhw->txn_flags); /* txn already in flight */
+
+ cpuhw->txn_flags = txn_flags;
+ if (txn_flags & ~PERF_PMU_TXN_ADD)
+ return;
perf_pmu_disable(pmu);
- cpuhw->group_flag |= PERF_EVENT_TXN;
cpuhw->n_txn_start = cpuhw->n_events;
}
@@ -1587,11 +1834,18 @@ void power_pmu_start_txn(struct pmu *pmu)
* Clear the flag and pmu::enable() will perform the
* schedulability test.
*/
-void power_pmu_cancel_txn(struct pmu *pmu)
+static void power_pmu_cancel_txn(struct pmu *pmu)
{
- struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
+ unsigned int txn_flags;
+
+ WARN_ON_ONCE(!cpuhw->txn_flags); /* no txn in flight */
+
+ txn_flags = cpuhw->txn_flags;
+ cpuhw->txn_flags = 0;
+ if (txn_flags & ~PERF_PMU_TXN_ADD)
+ return;
- cpuhw->group_flag &= ~PERF_EVENT_TXN;
perf_pmu_enable(pmu);
}
@@ -1600,25 +1854,33 @@ void power_pmu_cancel_txn(struct pmu *pmu)
* Perform the group schedulability test as a whole
* Return 0 if success
*/
-int power_pmu_commit_txn(struct pmu *pmu)
+static int power_pmu_commit_txn(struct pmu *pmu)
{
struct cpu_hw_events *cpuhw;
long i, n;
if (!ppmu)
return -EAGAIN;
- cpuhw = &__get_cpu_var(cpu_hw_events);
+
+ cpuhw = this_cpu_ptr(&cpu_hw_events);
+ WARN_ON_ONCE(!cpuhw->txn_flags); /* no txn in flight */
+
+ if (cpuhw->txn_flags & ~PERF_PMU_TXN_ADD) {
+ cpuhw->txn_flags = 0;
+ return 0;
+ }
+
n = cpuhw->n_events;
if (check_excludes(cpuhw->event, cpuhw->flags, 0, n))
return -EAGAIN;
- i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n);
+ i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n, cpuhw->event);
if (i < 0)
return -EAGAIN;
for (i = cpuhw->n_txn_start; i < n; ++i)
cpuhw->event[i]->hw.config = cpuhw->events[i];
- cpuhw->group_flag &= ~PERF_EVENT_TXN;
+ cpuhw->txn_flags = 0;
perf_pmu_enable(pmu);
return 0;
}
@@ -1626,7 +1888,7 @@ int power_pmu_commit_txn(struct pmu *pmu)
/*
* Return 1 if we might be able to put event on a limited PMC,
* or 0 if not.
- * A event can only go on a limited PMC if it counts something
+ * An event can only go on a limited PMC if it counts something
* that a limited PMC can count, doesn't require interrupts, and
* doesn't exclude any processor mode.
*/
@@ -1699,7 +1961,7 @@ static void hw_perf_event_destroy(struct perf_event *event)
static int hw_perf_cache_event(u64 config, u64 *eventp)
{
unsigned long type, op, result;
- int ev;
+ u64 ev;
if (!ppmu->cache_events)
return -EINVAL;
@@ -1723,10 +1985,22 @@ static int hw_perf_cache_event(u64 config, u64 *eventp)
return 0;
}
+static bool is_event_blacklisted(u64 ev)
+{
+ int i;
+
+ for (i=0; i < ppmu->n_blacklist_ev; i++) {
+ if (ppmu->blacklist_ev[i] == ev)
+ return true;
+ }
+
+ return false;
+}
+
static int power_pmu_event_init(struct perf_event *event)
{
u64 ev;
- unsigned long flags;
+ unsigned long flags, irq_flags;
struct perf_event *ctrs[MAX_HWEVENTS];
u64 events[MAX_HWEVENTS];
unsigned int cflags[MAX_HWEVENTS];
@@ -1748,20 +2022,40 @@ static int power_pmu_event_init(struct perf_event *event)
ev = event->attr.config;
if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
return -EOPNOTSUPP;
+
+ if (ppmu->blacklist_ev && is_event_blacklisted(ev))
+ return -EINVAL;
ev = ppmu->generic_events[ev];
break;
case PERF_TYPE_HW_CACHE:
err = hw_perf_cache_event(event->attr.config, &ev);
if (err)
return err;
+
+ if (ppmu->blacklist_ev && is_event_blacklisted(ev))
+ return -EINVAL;
break;
case PERF_TYPE_RAW:
ev = event->attr.config;
+
+ if (ppmu->blacklist_ev && is_event_blacklisted(ev))
+ return -EINVAL;
break;
default:
return -ENOENT;
}
+ /*
+ * PMU config registers have fields that are
+ * reserved and some specific values for bit fields are reserved.
+ * For ex., MMCRA[61:62] is Random Sampling Mode (SM)
+ * and value of 0b11 to this field is reserved.
+ * Check for invalid values in attr.config.
+ */
+ if (ppmu->check_attr_config &&
+ ppmu->check_attr_config(event))
+ return -EINVAL;
+
event->hw.config_base = ev;
event->hw.idx = 0;
@@ -1825,18 +2119,43 @@ static int power_pmu_event_init(struct perf_event *event)
if (check_excludes(ctrs, cflags, n, 1))
return -EINVAL;
- cpuhw = &get_cpu_var(cpu_hw_events);
- err = power_check_constraints(cpuhw, events, cflags, n + 1);
+ local_irq_save(irq_flags);
+ cpuhw = this_cpu_ptr(&cpu_hw_events);
+
+ err = power_check_constraints(cpuhw, events, cflags, n + 1, ctrs);
if (has_branch_stack(event)) {
- cpuhw->bhrb_filter = ppmu->bhrb_filter_map(
+ u64 bhrb_filter = -1;
+
+ /*
+ * Currently no PMU supports having multiple branch filters
+ * at the same time. Branch filters are set via MMCRA IFM[32:33]
+ * bits for Power8 and above. Return EOPNOTSUPP when multiple
+ * branch filters are requested in the event attr.
+ *
+ * When opening event via perf_event_open(), branch_sample_type
+ * gets adjusted in perf_copy_attr(). Kernel will automatically
+ * adjust the branch_sample_type based on the event modifier
+ * settings to include PERF_SAMPLE_BRANCH_PLM_ALL. Hence drop
+ * the check for PERF_SAMPLE_BRANCH_PLM_ALL.
+ */
+ if (hweight64(event->attr.branch_sample_type & ~PERF_SAMPLE_BRANCH_PLM_ALL) > 1) {
+ local_irq_restore(irq_flags);
+ return -EOPNOTSUPP;
+ }
+
+ if (ppmu->bhrb_filter_map)
+ bhrb_filter = ppmu->bhrb_filter_map(
event->attr.branch_sample_type);
- if(cpuhw->bhrb_filter == -1)
+ if (bhrb_filter == -1) {
+ local_irq_restore(irq_flags);
return -EOPNOTSUPP;
+ }
+ cpuhw->bhrb_filter = bhrb_filter;
}
- put_cpu_var(cpu_hw_events);
+ local_irq_restore(irq_flags);
if (err)
return -EINVAL;
@@ -1888,7 +2207,7 @@ ssize_t power_events_sysfs_show(struct device *dev,
return sprintf(page, "event=0x%02llx\n", pmu_attr->id);
}
-struct pmu power_pmu = {
+static struct pmu power_pmu = {
.pmu_enable = power_pmu_enable,
.pmu_disable = power_pmu_disable,
.event_init = power_pmu_event_init,
@@ -1901,9 +2220,16 @@ struct pmu power_pmu = {
.cancel_txn = power_pmu_cancel_txn,
.commit_txn = power_pmu_commit_txn,
.event_idx = power_pmu_event_idx,
- .flush_branch_stack = power_pmu_flush_branch_stack,
+ .sched_task = power_pmu_sched_task,
};
+#define PERF_SAMPLE_ADDR_TYPE (PERF_SAMPLE_ADDR | \
+ PERF_SAMPLE_PHYS_ADDR | \
+ PERF_SAMPLE_DATA_PAGE_SIZE)
+
+#define SIER_TYPE_SHIFT 15
+#define SIER_TYPE_MASK (0x7ull << SIER_TYPE_SHIFT)
+
/*
* A counter has overflowed; update its count and record
* things if requested. Note that interrupts are hard-disabled
@@ -1913,6 +2239,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
struct pt_regs *regs)
{
u64 period = event->hw.sample_period;
+ const u64 last_period = event->hw.last_period;
s64 prev, delta, left;
int record = 0;
@@ -1939,7 +2266,17 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
left += period;
if (left <= 0)
left = period;
- record = siar_valid(regs);
+
+ /*
+ * If address is not requested in the sample via
+ * PERF_SAMPLE_IP, just record that sample irrespective
+ * of SIAR valid check.
+ */
+ if (event->attr.sample_type & PERF_SAMPLE_IP)
+ record = siar_valid(regs);
+ else
+ record = 1;
+
event->hw.last_period = event->hw.sample_period;
}
if (left < 0x80000000LL)
@@ -1952,25 +2289,65 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
perf_event_update_userpage(event);
/*
+ * Due to hardware limitation, sometimes SIAR could sample a kernel
+ * address even when freeze on supervisor state (kernel) is set in
+ * MMCR2. Check attr.exclude_kernel and address to drop the sample in
+ * these cases.
+ */
+ if (event->attr.exclude_kernel &&
+ (event->attr.sample_type & PERF_SAMPLE_IP) &&
+ is_kernel_addr(mfspr(SPRN_SIAR)))
+ record = 0;
+
+ /*
+ * SIER[46-48] presents instruction type of the sampled instruction.
+ * In ISA v3.0 and before values "0" and "7" are considered reserved.
+ * In ISA v3.1, value "7" has been used to indicate "larx/stcx".
+ * Drop the sample if "type" has reserved values for this field with a
+ * ISA version check.
+ */
+ if (event->attr.sample_type & PERF_SAMPLE_DATA_SRC &&
+ ppmu->get_mem_data_src) {
+ val = (regs->dar & SIER_TYPE_MASK) >> SIER_TYPE_SHIFT;
+ if (val == 0 || (val == 7 && !cpu_has_feature(CPU_FTR_ARCH_31))) {
+ record = 0;
+ atomic64_inc(&event->lost_samples);
+ }
+ }
+
+ /*
* Finally record data if requested.
*/
if (record) {
struct perf_sample_data data;
- perf_sample_data_init(&data, ~0ULL, event->hw.last_period);
+ perf_sample_data_init(&data, ~0ULL, last_period);
- if (event->attr.sample_type & PERF_SAMPLE_ADDR)
- perf_get_data_addr(regs, &data.addr);
+ if (event->attr.sample_type & PERF_SAMPLE_ADDR_TYPE)
+ perf_get_data_addr(event, regs, &data.addr);
if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK) {
struct cpu_hw_events *cpuhw;
- cpuhw = &__get_cpu_var(cpu_hw_events);
- power_pmu_bhrb_read(cpuhw);
- data.br_stack = &cpuhw->bhrb_stack;
+ cpuhw = this_cpu_ptr(&cpu_hw_events);
+ power_pmu_bhrb_read(event, cpuhw);
+ perf_sample_save_brstack(&data, event, &cpuhw->bhrb_stack, NULL);
+ }
+
+ if (event->attr.sample_type & PERF_SAMPLE_DATA_SRC &&
+ ppmu->get_mem_data_src) {
+ ppmu->get_mem_data_src(&data.data_src, ppmu->flags, regs);
+ data.sample_flags |= PERF_SAMPLE_DATA_SRC;
}
- if (perf_event_overflow(event, &data, regs))
- power_pmu_stop(event, 0);
+ if (event->attr.sample_type & PERF_SAMPLE_WEIGHT_TYPE &&
+ ppmu->get_mem_weight) {
+ ppmu->get_mem_weight(&data.weight.full, event->attr.sample_type);
+ data.sample_flags |= PERF_SAMPLE_WEIGHT_TYPE;
+ }
+ perf_event_overflow(event, &data, regs);
+ } else if (period) {
+ /* Account for interrupt in case of invalid SIAR */
+ perf_event_account_interrupt(event);
}
}
@@ -1978,7 +2355,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
* Called from generic code to get the misc flags (i.e. processor mode)
* for an event_id.
*/
-unsigned long perf_misc_flags(struct pt_regs *regs)
+unsigned long perf_arch_misc_flags(struct pt_regs *regs)
{
u32 flags = perf_get_misc_flags(regs);
@@ -1992,14 +2369,12 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
* Called from generic code to get the instruction pointer
* for an event_id.
*/
-unsigned long perf_instruction_pointer(struct pt_regs *regs)
+unsigned long perf_arch_instruction_pointer(struct pt_regs *regs)
{
- bool use_siar = regs_use_siar(regs);
+ unsigned long siar = mfspr(SPRN_SIAR);
- if (use_siar && siar_valid(regs))
- return mfspr(SPRN_SIAR) + perf_ip_adjust(regs);
- else if (use_siar)
- return 0; // no valid instruction pointer
+ if (regs_use_siar(regs) && siar_valid(regs) && siar)
+ return siar + perf_ip_adjust(regs);
else
return regs->nip;
}
@@ -2034,14 +2409,12 @@ static bool pmc_overflow(unsigned long val)
/*
* Performance monitor interrupt stuff
*/
-static void perf_event_interrupt(struct pt_regs *regs)
+static void __perf_event_interrupt(struct pt_regs *regs)
{
int i, j;
- struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
struct perf_event *event;
- unsigned long val[8];
int found, active;
- int nmi;
if (cpuhw->n_limited)
freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5),
@@ -2049,20 +2422,14 @@ static void perf_event_interrupt(struct pt_regs *regs)
perf_read_regs(regs);
- nmi = perf_intr_is_nmi(regs);
- if (nmi)
- nmi_enter();
- else
- irq_enter();
-
/* Read all the PMCs since we'll need them a bunch of times */
for (i = 0; i < ppmu->n_counter; ++i)
- val[i] = read_pmc(i + 1);
+ cpuhw->pmcs[i] = read_pmc(i + 1);
/* Try to find what caused the IRQ */
found = 0;
for (i = 0; i < ppmu->n_counter; ++i) {
- if (!pmc_overflow(val[i]))
+ if (!pmc_overflow(cpuhw->pmcs[i]))
continue;
if (is_limited_pmc(i + 1))
continue; /* these won't generate IRQs */
@@ -2077,10 +2444,18 @@ static void perf_event_interrupt(struct pt_regs *regs)
event = cpuhw->event[j];
if (event->hw.idx == (i + 1)) {
active = 1;
- record_and_restart(event, val[i], regs);
+ record_and_restart(event, cpuhw->pmcs[i], regs);
break;
}
}
+
+ /*
+ * Clear PACA_IRQ_PMI in case it was set by
+ * set_pmi_irq_pending() when PMU was enabled
+ * after accounting for interrupts.
+ */
+ clear_pmi_irq_pending();
+
if (!active)
/* reset non active counters that have overflowed */
write_pmc(i + 1, 0);
@@ -2091,17 +2466,24 @@ static void perf_event_interrupt(struct pt_regs *regs)
event = cpuhw->event[i];
if (!event->hw.idx || is_limited_pmc(event->hw.idx))
continue;
- if (pmc_overflow_power7(val[event->hw.idx - 1])) {
+ if (pmc_overflow_power7(cpuhw->pmcs[event->hw.idx - 1])) {
/* event has overflowed in a buggy way*/
found = 1;
record_and_restart(event,
- val[event->hw.idx - 1],
+ cpuhw->pmcs[event->hw.idx - 1],
regs);
}
}
}
- if (!found && !nmi && printk_ratelimit())
- printk(KERN_WARNING "Can't find PMC that caused IRQ\n");
+
+ /*
+ * During system wide profiling or while specific CPU is monitored for an
+ * event, some corner cases could cause PMC to overflow in idle path. This
+ * will trigger a PMI after waking up from idle. Since counter values are _not_
+ * saved/restored in idle path, can lead to below "Can't find PMC" message.
+ */
+ if (unlikely(!found) && !arch_irq_disabled_regs(regs))
+ printk_ratelimited(KERN_WARNING "Can't find PMC that caused IRQ\n");
/*
* Reset MMCR0 to its normal value. This will set PMXE and
@@ -2110,42 +2492,60 @@ static void perf_event_interrupt(struct pt_regs *regs)
* XXX might want to use MSR.PM to keep the events frozen until
* we get back out of this interrupt.
*/
- write_mmcr0(cpuhw, cpuhw->mmcr[0]);
+ write_mmcr0(cpuhw, cpuhw->mmcr.mmcr0);
+
+ /* Clear the cpuhw->pmcs */
+ memset(&cpuhw->pmcs, 0, sizeof(cpuhw->pmcs));
- if (nmi)
- nmi_exit();
- else
- irq_exit();
}
-static void power_pmu_setup(int cpu)
+static void perf_event_interrupt(struct pt_regs *regs)
{
- struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
+ u64 start_clock = sched_clock();
- if (!ppmu)
- return;
- memset(cpuhw, 0, sizeof(*cpuhw));
- cpuhw->mmcr[0] = MMCR0_FC;
+ __perf_event_interrupt(regs);
+ perf_sample_event_took(sched_clock() - start_clock);
}
-static int
-power_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
+static int power_pmu_prepare_cpu(unsigned int cpu)
{
- unsigned int cpu = (long)hcpu;
-
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_UP_PREPARE:
- power_pmu_setup(cpu);
- break;
+ struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
- default:
- break;
+ if (ppmu) {
+ memset(cpuhw, 0, sizeof(*cpuhw));
+ cpuhw->mmcr.mmcr0 = MMCR0_FC;
}
+ return 0;
+}
+
+static ssize_t pmu_name_show(struct device *cdev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ if (ppmu)
+ return sysfs_emit(buf, "%s", ppmu->name);
- return NOTIFY_OK;
+ return 0;
}
-int register_power_pmu(struct power_pmu *pmu)
+static DEVICE_ATTR_RO(pmu_name);
+
+static struct attribute *pmu_caps_attrs[] = {
+ &dev_attr_pmu_name.attr,
+ NULL
+};
+
+static const struct attribute_group pmu_caps_group = {
+ .name = "caps",
+ .attrs = pmu_caps_attrs,
+};
+
+static const struct attribute_group *pmu_caps_groups[] = {
+ &pmu_caps_group,
+ NULL,
+};
+
+int __init register_power_pmu(struct power_pmu *pmu)
{
if (ppmu)
return -EBUSY; /* something's already registered */
@@ -2156,6 +2556,11 @@ int register_power_pmu(struct power_pmu *pmu)
power_pmu.attr_groups = ppmu->attr_groups;
+ if (ppmu->flags & PPMU_ARCH_207S)
+ power_pmu.attr_update = pmu_caps_groups;
+
+ power_pmu.capabilities |= (ppmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS);
+
#ifdef MSR_HV
/*
* Use FCHV to ignore kernel events if MSR.HV is set.
@@ -2165,7 +2570,70 @@ int register_power_pmu(struct power_pmu *pmu)
#endif /* CONFIG_PPC64 */
perf_pmu_register(&power_pmu, "cpu", PERF_TYPE_RAW);
- perf_cpu_notifier(power_pmu_notifier);
-
+ cpuhp_setup_state(CPUHP_PERF_POWER, "perf/powerpc:prepare",
+ power_pmu_prepare_cpu, NULL);
return 0;
}
+
+#ifdef CONFIG_PPC64
+static bool pmu_override = false;
+static unsigned long pmu_override_val;
+static void do_pmu_override(void *data)
+{
+ ppc_set_pmu_inuse(1);
+ if (pmu_override_val)
+ mtspr(SPRN_MMCR1, pmu_override_val);
+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC);
+}
+
+static int __init init_ppc64_pmu(void)
+{
+ if (cpu_has_feature(CPU_FTR_HVMODE) && pmu_override) {
+ pr_warn("disabling perf due to pmu_override= command line option.\n");
+ on_each_cpu(do_pmu_override, NULL, 1);
+ return 0;
+ }
+
+ /* run through all the pmu drivers one at a time */
+ if (!init_power5_pmu())
+ return 0;
+ else if (!init_power5p_pmu())
+ return 0;
+ else if (!init_power6_pmu())
+ return 0;
+ else if (!init_power7_pmu())
+ return 0;
+ else if (!init_power8_pmu())
+ return 0;
+ else if (!init_power9_pmu())
+ return 0;
+ else if (!init_power10_pmu())
+ return 0;
+ else if (!init_power11_pmu())
+ return 0;
+ else if (!init_ppc970_pmu())
+ return 0;
+ else
+ return init_generic_compat_pmu();
+}
+early_initcall(init_ppc64_pmu);
+
+static int __init pmu_setup(char *str)
+{
+ unsigned long val;
+
+ if (!early_cpu_has_feature(CPU_FTR_HVMODE))
+ return 0;
+
+ pmu_override = true;
+
+ if (kstrtoul(str, 0, &val))
+ val = 0;
+
+ pmu_override_val = val;
+
+ return 1;
+}
+__setup("pmu_override=", pmu_setup);
+
+#endif
diff --git a/arch/powerpc/perf/core-fsl-emb.c b/arch/powerpc/perf/core-fsl-emb.c
index d35ae52c69dc..7120ab20cbfe 100644
--- a/arch/powerpc/perf/core-fsl-emb.c
+++ b/arch/powerpc/perf/core-fsl-emb.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Performance event support - Freescale Embedded Performance Monitor
*
* Copyright 2008-2009 Paul Mackerras, IBM Corporation.
* Copyright 2010 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/sched.h>
@@ -35,19 +31,6 @@ static atomic_t num_events;
/* Used to avoid races in calling reserve/release_pmc_hardware */
static DEFINE_MUTEX(pmc_reserve_mutex);
-/*
- * If interrupts were soft-disabled when a PMU interrupt occurs, treat
- * it as an NMI.
- */
-static inline int perf_intr_is_nmi(struct pt_regs *regs)
-{
-#ifdef __powerpc64__
- return !regs->softe;
-#else
- return 0;
-#endif
-}
-
static void perf_event_interrupt(struct pt_regs *regs);
/*
@@ -210,7 +193,7 @@ static void fsl_emb_pmu_disable(struct pmu *pmu)
unsigned long flags;
local_irq_save(flags);
- cpuhw = &__get_cpu_var(cpu_hw_events);
+ cpuhw = this_cpu_ptr(&cpu_hw_events);
if (!cpuhw->disabled) {
cpuhw->disabled = 1;
@@ -249,7 +232,7 @@ static void fsl_emb_pmu_enable(struct pmu *pmu)
unsigned long flags;
local_irq_save(flags);
- cpuhw = &__get_cpu_var(cpu_hw_events);
+ cpuhw = this_cpu_ptr(&cpu_hw_events);
if (!cpuhw->disabled)
goto out;
@@ -277,7 +260,7 @@ static int collect_events(struct perf_event *group, int max_count,
ctrs[n] = group;
n++;
}
- list_for_each_entry(event, &group->sibling_list, group_entry) {
+ for_each_sibling_event(event, group) {
if (!is_software_event(event) &&
event->state != PERF_EVENT_STATE_OFF) {
if (n >= max_count)
@@ -330,9 +313,11 @@ static int fsl_emb_pmu_add(struct perf_event *event, int flags)
}
local64_set(&event->hw.prev_count, val);
- if (!(flags & PERF_EF_START)) {
+ if (unlikely(!(flags & PERF_EF_START))) {
event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
val = 0;
+ } else {
+ event->hw.state &= ~(PERF_HES_STOPPED | PERF_HES_UPTODATE);
}
write_pmc(i, val);
@@ -389,6 +374,7 @@ static void fsl_emb_pmu_del(struct perf_event *event, int flags)
static void fsl_emb_pmu_start(struct perf_event *event, int ef_flags)
{
unsigned long flags;
+ unsigned long val;
s64 left;
if (event->hw.idx < 0 || !event->hw.sample_period)
@@ -405,7 +391,10 @@ static void fsl_emb_pmu_start(struct perf_event *event, int ef_flags)
event->hw.state = 0;
left = local64_read(&event->hw.period_left);
- write_pmc(event->hw.idx, left);
+ val = 0;
+ if (left < 0x80000000L)
+ val = 0x80000000L - left;
+ write_pmc(event->hw.idx, val);
perf_event_update_userpage(event);
perf_pmu_enable(event->pmu);
@@ -601,6 +590,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
struct pt_regs *regs)
{
u64 period = event->hw.sample_period;
+ const u64 last_period = event->hw.last_period;
s64 prev, delta, left;
int record = 0;
@@ -643,27 +633,18 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
if (record) {
struct perf_sample_data data;
- perf_sample_data_init(&data, 0, event->hw.last_period);
+ perf_sample_data_init(&data, 0, last_period);
- if (perf_event_overflow(event, &data, regs))
- fsl_emb_pmu_stop(event, 0);
+ perf_event_overflow(event, &data, regs);
}
}
static void perf_event_interrupt(struct pt_regs *regs)
{
int i;
- struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
struct perf_event *event;
unsigned long val;
- int found = 0;
- int nmi;
-
- nmi = perf_intr_is_nmi(regs);
- if (nmi)
- nmi_enter();
- else
- irq_enter();
for (i = 0; i < ppmu->n_counter; ++i) {
event = cpuhw->event[i];
@@ -672,7 +653,6 @@ static void perf_event_interrupt(struct pt_regs *regs)
if ((int)val < 0) {
if (event) {
/* event has overflowed */
- found = 1;
record_and_restart(event, val, regs);
} else {
/*
@@ -688,18 +668,15 @@ static void perf_event_interrupt(struct pt_regs *regs)
mtmsr(mfmsr() | MSR_PMM);
mtpmr(PMRN_PMGC0, PMGC0_PMIE | PMGC0_FCECE);
isync();
-
- if (nmi)
- nmi_exit();
- else
- irq_exit();
}
-void hw_perf_event_setup(int cpu)
+static int fsl_emb_pmu_prepare_cpu(unsigned int cpu)
{
struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu);
memset(cpuhw, 0, sizeof(*cpuhw));
+
+ return 0;
}
int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu)
@@ -712,6 +689,8 @@ int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu)
pmu->name);
perf_pmu_register(&fsl_emb_pmu, "cpu", PERF_TYPE_RAW);
+ cpuhp_setup_state(CPUHP_PERF_POWER, "perf/powerpc:prepare",
+ fsl_emb_pmu_prepare_cpu, NULL);
return 0;
}
diff --git a/arch/powerpc/perf/e500-pmu.c b/arch/powerpc/perf/e500-pmu.c
index fb664929f5da..e3e1a68eb1d5 100644
--- a/arch/powerpc/perf/e500-pmu.c
+++ b/arch/powerpc/perf/e500-pmu.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Performance counter support for e500 family processors.
*
* Copyright 2008-2009 Paul Mackerras, IBM Corporation.
* Copyright 2010 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/string.h>
#include <linux/perf_event.h>
@@ -122,12 +118,13 @@ static struct fsl_emb_pmu e500_pmu = {
static int init_e500_pmu(void)
{
- if (!cur_cpu_spec->oprofile_cpu_type)
- return -ENODEV;
+ unsigned int pvr = mfspr(SPRN_PVR);
- if (!strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/e500mc"))
+ /* ec500mc */
+ if (PVR_VER(pvr) == PVR_VER_E500MC || PVR_VER(pvr) == PVR_VER_E5500)
num_events = 256;
- else if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/e500"))
+ /* e500 */
+ else if (PVR_VER(pvr) != PVR_VER_E500V1 && PVR_VER(pvr) != PVR_VER_E500V2)
return -ENODEV;
return register_fsl_emb_pmu(&e500_pmu);
diff --git a/arch/powerpc/perf/e6500-pmu.c b/arch/powerpc/perf/e6500-pmu.c
index 3d877aa777b5..bd779a2338f8 100644
--- a/arch/powerpc/perf/e6500-pmu.c
+++ b/arch/powerpc/perf/e6500-pmu.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Performance counter support for e6500 family processors.
*
@@ -5,11 +6,6 @@
* Based on e500-pmu.c
* Copyright 2013 Freescale Semiconductor, Inc.
* Copyright 2008-2009 Paul Mackerras, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/string.h>
@@ -111,8 +107,9 @@ static struct fsl_emb_pmu e6500_pmu = {
static int init_e6500_pmu(void)
{
- if (!cur_cpu_spec->oprofile_cpu_type ||
- strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/e6500"))
+ unsigned int pvr = mfspr(SPRN_PVR);
+
+ if (PVR_VER(pvr) != PVR_VER_E6500)
return -ENODEV;
return register_fsl_emb_pmu(&e6500_pmu);
diff --git a/arch/powerpc/perf/generic-compat-pmu.c b/arch/powerpc/perf/generic-compat-pmu.c
new file mode 100644
index 000000000000..b5c414876ed5
--- /dev/null
+++ b/arch/powerpc/perf/generic-compat-pmu.c
@@ -0,0 +1,342 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Copyright 2019 Madhavan Srinivasan, IBM Corporation.
+
+#define pr_fmt(fmt) "generic-compat-pmu: " fmt
+
+#include "isa207-common.h"
+
+/*
+ * Raw event encoding:
+ *
+ * 60 56 52 48 44 40 36 32
+ * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
+ *
+ * 28 24 20 16 12 8 4 0
+ * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
+ * [ pmc ] [ pmcxsel ]
+ */
+
+/*
+ * Event codes defined in ISA v3.0B
+ */
+#define EVENT(_name, _code) _name = _code,
+
+enum {
+ /* Cycles, alternate code */
+ EVENT(PM_CYC_ALT, 0x100f0)
+ /* One or more instructions completed in a cycle */
+ EVENT(PM_CYC_INST_CMPL, 0x100f2)
+ /* Floating-point instruction completed */
+ EVENT(PM_FLOP_CMPL, 0x100f4)
+ /* Instruction ERAT/L1-TLB miss */
+ EVENT(PM_L1_ITLB_MISS, 0x100f6)
+ /* All instructions completed and none available */
+ EVENT(PM_NO_INST_AVAIL, 0x100f8)
+ /* A load-type instruction completed (ISA v3.0+) */
+ EVENT(PM_LD_CMPL, 0x100fc)
+ /* Instruction completed, alternate code (ISA v3.0+) */
+ EVENT(PM_INST_CMPL_ALT, 0x100fe)
+ /* A store-type instruction completed */
+ EVENT(PM_ST_CMPL, 0x200f0)
+ /* Instruction Dispatched */
+ EVENT(PM_INST_DISP, 0x200f2)
+ /* Run_cycles */
+ EVENT(PM_RUN_CYC, 0x200f4)
+ /* Data ERAT/L1-TLB miss/reload */
+ EVENT(PM_L1_DTLB_RELOAD, 0x200f6)
+ /* Taken branch completed */
+ EVENT(PM_BR_TAKEN_CMPL, 0x200fa)
+ /* Demand iCache Miss */
+ EVENT(PM_L1_ICACHE_MISS, 0x200fc)
+ /* L1 Dcache reload from memory */
+ EVENT(PM_L1_RELOAD_FROM_MEM, 0x200fe)
+ /* L1 Dcache store miss */
+ EVENT(PM_ST_MISS_L1, 0x300f0)
+ /* Alternate code for PM_INST_DISP */
+ EVENT(PM_INST_DISP_ALT, 0x300f2)
+ /* Branch direction or target mispredicted */
+ EVENT(PM_BR_MISPREDICT, 0x300f6)
+ /* Data TLB miss/reload */
+ EVENT(PM_DTLB_MISS, 0x300fc)
+ /* Demand LD - L3 Miss (not L2 hit and not L3 hit) */
+ EVENT(PM_DATA_FROM_L3MISS, 0x300fe)
+ /* L1 Dcache load miss */
+ EVENT(PM_LD_MISS_L1, 0x400f0)
+ /* Cycle when instruction(s) dispatched */
+ EVENT(PM_CYC_INST_DISP, 0x400f2)
+ /* Branch or branch target mispredicted */
+ EVENT(PM_BR_MPRED_CMPL, 0x400f6)
+ /* Instructions completed with run latch set */
+ EVENT(PM_RUN_INST_CMPL, 0x400fa)
+ /* Instruction TLB miss/reload */
+ EVENT(PM_ITLB_MISS, 0x400fc)
+ /* Load data not cached */
+ EVENT(PM_LD_NOT_CACHED, 0x400fe)
+ /* Instructions */
+ EVENT(PM_INST_CMPL, 0x500fa)
+ /* Cycles */
+ EVENT(PM_CYC, 0x600f4)
+};
+
+#undef EVENT
+
+/* Table of alternatives, sorted in increasing order of column 0 */
+/* Note that in each row, column 0 must be the smallest */
+static const unsigned int generic_event_alternatives[][MAX_ALT] = {
+ { PM_CYC_ALT, PM_CYC },
+ { PM_INST_CMPL_ALT, PM_INST_CMPL },
+ { PM_INST_DISP, PM_INST_DISP_ALT },
+};
+
+static int generic_get_alternatives(u64 event, unsigned int flags, u64 alt[])
+{
+ int num_alt = 0;
+
+ num_alt = isa207_get_alternatives(event, alt,
+ ARRAY_SIZE(generic_event_alternatives), flags,
+ generic_event_alternatives);
+
+ return num_alt;
+}
+
+GENERIC_EVENT_ATTR(cpu-cycles, PM_CYC);
+GENERIC_EVENT_ATTR(instructions, PM_INST_CMPL);
+GENERIC_EVENT_ATTR(stalled-cycles-frontend, PM_NO_INST_AVAIL);
+GENERIC_EVENT_ATTR(branch-misses, PM_BR_MPRED_CMPL);
+GENERIC_EVENT_ATTR(cache-misses, PM_LD_MISS_L1);
+
+CACHE_EVENT_ATTR(L1-dcache-load-misses, PM_LD_MISS_L1);
+CACHE_EVENT_ATTR(L1-dcache-store-misses, PM_ST_MISS_L1);
+CACHE_EVENT_ATTR(L1-icache-load-misses, PM_L1_ICACHE_MISS);
+CACHE_EVENT_ATTR(LLC-load-misses, PM_DATA_FROM_L3MISS);
+CACHE_EVENT_ATTR(branch-load-misses, PM_BR_MPRED_CMPL);
+CACHE_EVENT_ATTR(dTLB-load-misses, PM_DTLB_MISS);
+CACHE_EVENT_ATTR(iTLB-load-misses, PM_ITLB_MISS);
+
+static struct attribute *generic_compat_events_attr[] = {
+ GENERIC_EVENT_PTR(PM_CYC),
+ GENERIC_EVENT_PTR(PM_INST_CMPL),
+ GENERIC_EVENT_PTR(PM_NO_INST_AVAIL),
+ GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL),
+ GENERIC_EVENT_PTR(PM_LD_MISS_L1),
+ CACHE_EVENT_PTR(PM_LD_MISS_L1),
+ CACHE_EVENT_PTR(PM_ST_MISS_L1),
+ CACHE_EVENT_PTR(PM_L1_ICACHE_MISS),
+ CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS),
+ CACHE_EVENT_PTR(PM_BR_MPRED_CMPL),
+ CACHE_EVENT_PTR(PM_DTLB_MISS),
+ CACHE_EVENT_PTR(PM_ITLB_MISS),
+ NULL
+};
+
+static const struct attribute_group generic_compat_pmu_events_group = {
+ .name = "events",
+ .attrs = generic_compat_events_attr,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-19");
+PMU_FORMAT_ATTR(pmcxsel, "config:0-7");
+PMU_FORMAT_ATTR(pmc, "config:16-19");
+
+static struct attribute *generic_compat_pmu_format_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_pmcxsel.attr,
+ &format_attr_pmc.attr,
+ NULL,
+};
+
+static const struct attribute_group generic_compat_pmu_format_group = {
+ .name = "format",
+ .attrs = generic_compat_pmu_format_attr,
+};
+
+static struct attribute *generic_compat_pmu_caps_attrs[] = {
+ NULL
+};
+
+static struct attribute_group generic_compat_pmu_caps_group = {
+ .name = "caps",
+ .attrs = generic_compat_pmu_caps_attrs,
+};
+
+static const struct attribute_group *generic_compat_pmu_attr_groups[] = {
+ &generic_compat_pmu_format_group,
+ &generic_compat_pmu_events_group,
+ &generic_compat_pmu_caps_group,
+ NULL,
+};
+
+static int compat_generic_events[] = {
+ [PERF_COUNT_HW_CPU_CYCLES] = PM_CYC,
+ [PERF_COUNT_HW_INSTRUCTIONS] = PM_INST_CMPL,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = PM_NO_INST_AVAIL,
+ [PERF_COUNT_HW_BRANCH_MISSES] = PM_BR_MPRED_CMPL,
+ [PERF_COUNT_HW_CACHE_MISSES] = PM_LD_MISS_L1,
+};
+
+#define C(x) PERF_COUNT_HW_CACHE_##x
+
+/*
+ * Table of generalized cache-related events.
+ * 0 means not supported, -1 means nonsensical, other values
+ * are event codes.
+ */
+static u64 generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+ [ C(L1D) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = PM_LD_MISS_L1,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = PM_ST_MISS_L1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(L1I) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = PM_L1_ICACHE_MISS,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(LL) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = PM_DATA_FROM_L3MISS,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(DTLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = PM_DTLB_MISS,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(ITLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = PM_ITLB_MISS,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(BPU) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = PM_BR_MPRED_CMPL,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(NODE) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+};
+
+#undef C
+
+/*
+ * We set MMCR0[CC5-6RUN] so we can use counters 5 and 6 for
+ * PM_INST_CMPL and PM_CYC.
+ */
+static int generic_compute_mmcr(u64 event[], int n_ev,
+ unsigned int hwc[], struct mmcr_regs *mmcr,
+ struct perf_event *pevents[], u32 flags)
+{
+ int ret;
+
+ ret = isa207_compute_mmcr(event, n_ev, hwc, mmcr, pevents, flags);
+ if (!ret)
+ mmcr->mmcr0 |= MMCR0_C56RUN;
+ return ret;
+}
+
+static struct power_pmu generic_compat_pmu = {
+ .name = "ISAv3",
+ .n_counter = MAX_PMU_COUNTERS,
+ .add_fields = ISA207_ADD_FIELDS,
+ .test_adder = ISA207_TEST_ADDER,
+ .compute_mmcr = generic_compute_mmcr,
+ .get_constraint = isa207_get_constraint,
+ .get_alternatives = generic_get_alternatives,
+ .disable_pmc = isa207_disable_pmc,
+ .flags = PPMU_HAS_SIER | PPMU_ARCH_207S,
+ .n_generic = ARRAY_SIZE(compat_generic_events),
+ .generic_events = compat_generic_events,
+ .cache_events = &generic_compat_cache_events,
+ .attr_groups = generic_compat_pmu_attr_groups,
+};
+
+int __init init_generic_compat_pmu(void)
+{
+ int rc = 0;
+
+ /*
+ * From ISA v2.07 on, PMU features are architected;
+ * we require >= v3.0 because (a) that has PM_LD_CMPL and
+ * PM_INST_CMPL_ALT, which v2.07 doesn't have, and
+ * (b) we don't expect any non-IBM Power ISA
+ * implementations that conform to v2.07 but not v3.0.
+ */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ return -ENODEV;
+
+ rc = register_power_pmu(&generic_compat_pmu);
+ if (rc)
+ return rc;
+
+ /* Tell userspace that EBB is supported */
+ cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_EBB;
+
+ return 0;
+}
diff --git a/arch/powerpc/perf/hv-24x7-catalog.h b/arch/powerpc/perf/hv-24x7-catalog.h
index 21b19dd86d9c..5fab5a397da9 100644
--- a/arch/powerpc/perf/hv-24x7-catalog.h
+++ b/arch/powerpc/perf/hv-24x7-catalog.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef LINUX_POWERPC_PERF_HV_24X7_CATALOG_H_
#define LINUX_POWERPC_PERF_HV_24X7_CATALOG_H_
@@ -30,4 +31,29 @@ struct hv_24x7_catalog_page_0 {
__u8 reserved6[2];
} __packed;
+struct hv_24x7_event_data {
+ __be16 length; /* in bytes, must be a multiple of 16 */
+ __u8 reserved1[2];
+ __u8 domain; /* Chip = 1, Core = 2 */
+ __u8 reserved2[1];
+ __be16 event_group_record_offs; /* in bytes, must be 8 byte aligned */
+ __be16 event_group_record_len; /* in bytes */
+
+ /* in bytes, offset from event_group_record */
+ __be16 event_counter_offs;
+
+ /* verified_state, unverified_state, caveat_state, broken_state, ... */
+ __be32 flags;
+
+ __be16 primary_group_ix;
+ __be16 group_count;
+ __be16 event_name_len;
+ __u8 remainder[];
+ /* __u8 event_name[event_name_len - 2]; */
+ /* __be16 event_description_len; */
+ /* __u8 event_desc[event_description_len - 2]; */
+ /* __be16 detailed_desc_len; */
+ /* __u8 detailed_desc[detailed_desc_len - 2]; */
+} __packed;
+
#endif
diff --git a/arch/powerpc/perf/hv-24x7-domains.h b/arch/powerpc/perf/hv-24x7-domains.h
new file mode 100644
index 000000000000..6f91f62e0aa6
--- /dev/null
+++ b/arch/powerpc/perf/hv-24x7-domains.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * DOMAIN(name, num, index_kind, is_physical)
+ *
+ * @name: An all caps token, suitable for use in generating an enum
+ * member and appending to an event name in sysfs.
+ *
+ * @num: The number corresponding to the domain as given in
+ * documentation. We assume the catalog domain and the hcall
+ * domain have the same numbering (so far they do), but this
+ * may need to be changed in the future.
+ *
+ * @index_kind: A stringifiable token describing the meaning of the index
+ * within the given domain. Must fit the parsing rules of the
+ * perf sysfs api.
+ *
+ * @is_physical: True if the domain is physical, false otherwise (if virtual).
+ *
+ * Note: The terms PHYS_CHIP, PHYS_CORE, VCPU correspond to physical chip,
+ * physical core and virtual processor in 24x7 Counters specifications.
+ */
+
+DOMAIN(PHYS_CHIP, 0x01, chip, true)
+DOMAIN(PHYS_CORE, 0x02, core, true)
+DOMAIN(VCPU_HOME_CORE, 0x03, vcpu, false)
+DOMAIN(VCPU_HOME_CHIP, 0x04, vcpu, false)
+DOMAIN(VCPU_HOME_NODE, 0x05, vcpu, false)
+DOMAIN(VCPU_REMOTE_NODE, 0x06, vcpu, false)
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index 70d4f748b54b..e42677cc254a 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -1,28 +1,147 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Hypervisor supplied "24x7" performance counter support
*
* Author: Cody P Schafer <cody@linux.vnet.ibm.com>
* Copyright 2014 IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#define pr_fmt(fmt) "hv-24x7: " fmt
#include <linux/perf_event.h>
+#include <linux/rbtree.h>
#include <linux/module.h>
#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+#include <asm/cputhreads.h>
#include <asm/firmware.h>
#include <asm/hvcall.h>
#include <asm/io.h>
+#include <asm/papr-sysparm.h>
+#include <linux/byteorder/generic.h>
+#include <asm/rtas.h>
#include "hv-24x7.h"
#include "hv-24x7-catalog.h"
#include "hv-common.h"
+/* Version of the 24x7 hypervisor API that we should use in this machine. */
+static int interface_version;
+
+/* Whether we have to aggregate result data for some domains. */
+static bool aggregate_result_elements;
+
+static cpumask_t hv_24x7_cpumask;
+
+static bool domain_is_valid(unsigned int domain)
+{
+ switch (domain) {
+#define DOMAIN(n, v, x, c) \
+ case HV_PERF_DOMAIN_##n: \
+ /* fall through */
+#include "hv-24x7-domains.h"
+#undef DOMAIN
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool is_physical_domain(unsigned int domain)
+{
+ switch (domain) {
+#define DOMAIN(n, v, x, c) \
+ case HV_PERF_DOMAIN_##n: \
+ return c;
+#include "hv-24x7-domains.h"
+#undef DOMAIN
+ default:
+ return false;
+ }
+}
+
+/*
+ * The Processor Module Information system parameter allows transferring
+ * of certain processor module information from the platform to the OS.
+ * Refer PAPR+ document to get parameter token value as '43'.
+ */
+
+static u32 phys_sockets; /* Physical sockets */
+static u32 phys_chipspersocket; /* Physical chips per socket*/
+static u32 phys_coresperchip; /* Physical cores per chip */
+
+/*
+ * read_24x7_sys_info()
+ * Retrieve the number of sockets and chips per socket and cores per
+ * chip details through the get-system-parameter rtas call.
+ */
+void read_24x7_sys_info(void)
+{
+ struct papr_sysparm_buf *buf;
+
+ /*
+ * Making system parameter: chips and sockets and cores per chip
+ * default to 1.
+ */
+ phys_sockets = 1;
+ phys_chipspersocket = 1;
+ phys_coresperchip = 1;
+
+ buf = papr_sysparm_buf_alloc();
+ if (!buf)
+ return;
+
+ if (!papr_sysparm_get(PAPR_SYSPARM_PROC_MODULE_INFO, buf)) {
+ int ntypes = be16_to_cpup((__be16 *)&buf->val[0]);
+ int len = be16_to_cpu(buf->len);
+
+ if (len >= 8 && ntypes != 0) {
+ phys_sockets = be16_to_cpup((__be16 *)&buf->val[2]);
+ phys_chipspersocket = be16_to_cpup((__be16 *)&buf->val[4]);
+ phys_coresperchip = be16_to_cpup((__be16 *)&buf->val[6]);
+ }
+ }
+
+ papr_sysparm_buf_free(buf);
+}
+
+/* Domains for which more than one result element are returned for each event. */
+static bool domain_needs_aggregation(unsigned int domain)
+{
+ return aggregate_result_elements &&
+ (domain == HV_PERF_DOMAIN_PHYS_CORE ||
+ (domain >= HV_PERF_DOMAIN_VCPU_HOME_CORE &&
+ domain <= HV_PERF_DOMAIN_VCPU_REMOTE_NODE));
+}
+
+static const char *domain_name(unsigned int domain)
+{
+ if (!domain_is_valid(domain))
+ return NULL;
+
+ switch (domain) {
+ case HV_PERF_DOMAIN_PHYS_CHIP: return "Physical Chip";
+ case HV_PERF_DOMAIN_PHYS_CORE: return "Physical Core";
+ case HV_PERF_DOMAIN_VCPU_HOME_CORE: return "VCPU Home Core";
+ case HV_PERF_DOMAIN_VCPU_HOME_CHIP: return "VCPU Home Chip";
+ case HV_PERF_DOMAIN_VCPU_HOME_NODE: return "VCPU Home Node";
+ case HV_PERF_DOMAIN_VCPU_REMOTE_NODE: return "VCPU Remote Node";
+ }
+
+ WARN_ON_ONCE(domain);
+ return NULL;
+}
+
+static bool catalog_entry_domain_is_valid(unsigned int domain)
+{
+ /* POWER8 doesn't support virtual domains. */
+ if (interface_version == 1)
+ return is_physical_domain(domain);
+ else
+ return domain_is_valid(domain);
+}
+
/*
* TODO: Merging events:
* - Think of the hcall as an interface to a 4d array of counters:
@@ -44,13 +163,15 @@
/*
* Example usage:
- * perf stat -e 'hv_24x7/domain=2,offset=8,starting_index=0,lpar=0xffffffff/'
+ * perf stat -e 'hv_24x7/domain=2,offset=8,vcpu=0,lpar=0xffffffff/'
*/
/* u3 0-6, one of HV_24X7_PERF_DOMAIN */
EVENT_DEFINE_RANGE_FORMAT(domain, config, 0, 3);
/* u16 */
-EVENT_DEFINE_RANGE_FORMAT(starting_index, config, 16, 31);
+EVENT_DEFINE_RANGE_FORMAT(core, config, 16, 31);
+EVENT_DEFINE_RANGE_FORMAT(chip, config, 16, 31);
+EVENT_DEFINE_RANGE_FORMAT(vcpu, config, 16, 31);
/* u32, see "data_offset" */
EVENT_DEFINE_RANGE_FORMAT(offset, config, 32, 63);
/* u16 */
@@ -63,131 +184,833 @@ EVENT_DEFINE_RANGE(reserved3, config2, 0, 63);
static struct attribute *format_attrs[] = {
&format_attr_domain.attr,
&format_attr_offset.attr,
- &format_attr_starting_index.attr,
+ &format_attr_core.attr,
+ &format_attr_chip.attr,
+ &format_attr_vcpu.attr,
&format_attr_lpar.attr,
NULL,
};
-static struct attribute_group format_group = {
+static const struct attribute_group format_group = {
.name = "format",
.attrs = format_attrs,
};
+static struct attribute_group event_group = {
+ .name = "events",
+ /* .attrs is set in init */
+};
+
+static struct attribute_group event_desc_group = {
+ .name = "event_descs",
+ /* .attrs is set in init */
+};
+
+static struct attribute_group event_long_desc_group = {
+ .name = "event_long_descs",
+ /* .attrs is set in init */
+};
+
static struct kmem_cache *hv_page_cache;
+static DEFINE_PER_CPU(int, hv_24x7_txn_flags);
+static DEFINE_PER_CPU(int, hv_24x7_txn_err);
+
+struct hv_24x7_hw {
+ struct perf_event *events[255];
+};
+
+static DEFINE_PER_CPU(struct hv_24x7_hw, hv_24x7_hw);
+
+/*
+ * request_buffer and result_buffer are not required to be 4k aligned,
+ * but are not allowed to cross any 4k boundary. Aligning them to 4k is
+ * the simplest way to ensure that.
+ */
+#define H24x7_DATA_BUFFER_SIZE 4096
+static DEFINE_PER_CPU(char, hv_24x7_reqb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
+static DEFINE_PER_CPU(char, hv_24x7_resb[H24x7_DATA_BUFFER_SIZE]) __aligned(4096);
+
+static unsigned int max_num_requests(int interface_version)
+{
+ return (H24x7_DATA_BUFFER_SIZE - sizeof(struct hv_24x7_request_buffer))
+ / H24x7_REQUEST_SIZE(interface_version);
+}
+
+static char *event_name(struct hv_24x7_event_data *ev, int *len)
+{
+ *len = be16_to_cpu(ev->event_name_len) - 2;
+ return (char *)ev->remainder;
+}
+
+static char *event_desc(struct hv_24x7_event_data *ev, int *len)
+{
+ unsigned int nl = be16_to_cpu(ev->event_name_len);
+ __be16 *desc_len = (__be16 *)(ev->remainder + nl - 2);
+
+ *len = be16_to_cpu(*desc_len) - 2;
+ return (char *)ev->remainder + nl;
+}
+
+static char *event_long_desc(struct hv_24x7_event_data *ev, int *len)
+{
+ unsigned int nl = be16_to_cpu(ev->event_name_len);
+ __be16 *desc_len_ = (__be16 *)(ev->remainder + nl - 2);
+ unsigned int desc_len = be16_to_cpu(*desc_len_);
+ __be16 *long_desc_len = (__be16 *)(ev->remainder + nl + desc_len - 2);
+
+ *len = be16_to_cpu(*long_desc_len) - 2;
+ return (char *)ev->remainder + nl + desc_len;
+}
+
+static bool event_fixed_portion_is_within(struct hv_24x7_event_data *ev,
+ void *end)
+{
+ void *start = ev;
+
+ return (start + offsetof(struct hv_24x7_event_data, remainder)) < end;
+}
+
/*
- * read_offset_data - copy data from one buffer to another while treating the
- * source buffer as a small view on the total avaliable
- * source data.
+ * Things we don't check:
+ * - padding for desc, name, and long/detailed desc is required to be '\0'
+ * bytes.
*
- * @dest: buffer to copy into
- * @dest_len: length of @dest in bytes
- * @requested_offset: the offset within the source data we want. Must be > 0
- * @src: buffer to copy data from
- * @src_len: length of @src in bytes
- * @source_offset: the offset in the sorce data that (src,src_len) refers to.
- * Must be > 0
+ * Return NULL if we pass end,
+ * Otherwise return the address of the byte just following the event.
+ */
+static void *event_end(struct hv_24x7_event_data *ev, void *end)
+{
+ void *start = ev;
+ __be16 *dl_, *ldl_;
+ unsigned int dl, ldl;
+ unsigned int nl = be16_to_cpu(ev->event_name_len);
+
+ if (nl < 2) {
+ pr_debug("%s: name length too short: %d", __func__, nl);
+ return NULL;
+ }
+
+ if (start + nl > end) {
+ pr_debug("%s: start=%p + nl=%u > end=%p",
+ __func__, start, nl, end);
+ return NULL;
+ }
+
+ dl_ = (__be16 *)(ev->remainder + nl - 2);
+ if (!IS_ALIGNED((uintptr_t)dl_, 2))
+ pr_warn("desc len not aligned %p", dl_);
+ dl = be16_to_cpu(*dl_);
+ if (dl < 2) {
+ pr_debug("%s: desc len too short: %d", __func__, dl);
+ return NULL;
+ }
+
+ if (start + nl + dl > end) {
+ pr_debug("%s: (start=%p + nl=%u + dl=%u)=%p > end=%p",
+ __func__, start, nl, dl, start + nl + dl, end);
+ return NULL;
+ }
+
+ ldl_ = (__be16 *)(ev->remainder + nl + dl - 2);
+ if (!IS_ALIGNED((uintptr_t)ldl_, 2))
+ pr_warn("long desc len not aligned %p", ldl_);
+ ldl = be16_to_cpu(*ldl_);
+ if (ldl < 2) {
+ pr_debug("%s: long desc len too short (ldl=%u)",
+ __func__, ldl);
+ return NULL;
+ }
+
+ if (start + nl + dl + ldl > end) {
+ pr_debug("%s: start=%p + nl=%u + dl=%u + ldl=%u > end=%p",
+ __func__, start, nl, dl, ldl, end);
+ return NULL;
+ }
+
+ return start + nl + dl + ldl;
+}
+
+static long h_get_24x7_catalog_page_(unsigned long phys_4096,
+ unsigned long version, unsigned long index)
+{
+ pr_devel("h_get_24x7_catalog_page(0x%lx, %lu, %lu)",
+ phys_4096, version, index);
+
+ WARN_ON(!IS_ALIGNED(phys_4096, 4096));
+
+ return plpar_hcall_norets(H_GET_24X7_CATALOG_PAGE,
+ phys_4096, version, index);
+}
+
+static long h_get_24x7_catalog_page(char page[], u64 version, u32 index)
+{
+ return h_get_24x7_catalog_page_(virt_to_phys(page),
+ version, index);
+}
+
+/*
+ * Each event we find in the catalog, will have a sysfs entry. Format the
+ * data for this sysfs entry based on the event's domain.
*
- * returns the number of bytes copied.
+ * Events belonging to the Chip domain can only be monitored in that domain.
+ * i.e the domain for these events is a fixed/knwon value.
*
- * The following ascii art shows the various buffer possitioning we need to
- * handle, assigns some arbitrary varibles to points on the buffer, and then
- * shows how we fiddle with those values to get things we care about (copy
- * start in src and copy len)
+ * Events belonging to the Core domain can be monitored either in the physical
+ * core or in one of the virtual CPU domains. So the domain value for these
+ * events must be specified by the user (i.e is a required parameter). Format
+ * the Core events with 'domain=?' so the perf-tool can error check required
+ * parameters.
*
- * s = @src buffer
- * d = @dest buffer
- * '.' areas in d are written to.
+ * NOTE: For the Core domain events, rather than making domain a required
+ * parameter we could default it to PHYS_CORE and allowe users to
+ * override the domain to one of the VCPU domains.
*
- * u
- * x w v z
- * d |.........|
- * s |----------------------|
+ * However, this can make the interface a little inconsistent.
*
- * u
- * x w z v
- * d |........------|
- * s |------------------|
+ * If we set domain=2 (PHYS_CHIP) and allow user to override this field
+ * the user may be tempted to also modify the "offset=x" field in which
+ * can lead to confusing usage. Consider the HPM_PCYC (offset=0x18) and
+ * HPM_INST (offset=0x20) events. With:
*
- * x w u,z,v
- * d |........|
- * s |------------------|
+ * perf stat -e hv_24x7/HPM_PCYC,offset=0x20/
*
- * x,w u,v,z
- * d |..................|
- * s |------------------|
+ * we end up monitoring HPM_INST, while the command line has HPM_PCYC.
*
- * x u
- * w v z
- * d |........|
- * s |------------------|
+ * By not assigning a default value to the domain for the Core events,
+ * we can have simple guidelines:
*
- * x z w v
- * d |------|
- * s |------|
+ * - Specifying values for parameters with "=?" is required.
*
- * x = source_offset
- * w = requested_offset
- * z = source_offset + src_len
- * v = requested_offset + dest_len
+ * - Specifying (i.e overriding) values for other parameters
+ * is undefined.
+ */
+static char *event_fmt(struct hv_24x7_event_data *event, unsigned int domain)
+{
+ const char *sindex;
+ const char *lpar;
+ const char *domain_str;
+ char buf[8];
+
+ switch (domain) {
+ case HV_PERF_DOMAIN_PHYS_CHIP:
+ snprintf(buf, sizeof(buf), "%d", domain);
+ domain_str = buf;
+ lpar = "0x0";
+ sindex = "chip";
+ break;
+ case HV_PERF_DOMAIN_PHYS_CORE:
+ domain_str = "?";
+ lpar = "0x0";
+ sindex = "core";
+ break;
+ default:
+ domain_str = "?";
+ lpar = "?";
+ sindex = "vcpu";
+ }
+
+ return kasprintf(GFP_KERNEL,
+ "domain=%s,offset=0x%x,%s=?,lpar=%s",
+ domain_str,
+ be16_to_cpu(event->event_counter_offs) +
+ be16_to_cpu(event->event_group_record_offs),
+ sindex,
+ lpar);
+}
+
+/* Avoid trusting fw to NUL terminate strings */
+static char *memdup_to_str(char *maybe_str, int max_len, gfp_t gfp)
+{
+ return kasprintf(gfp, "%.*s", max_len, maybe_str);
+}
+
+static ssize_t cpumask_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return cpumap_print_to_pagebuf(true, buf, &hv_24x7_cpumask);
+}
+
+static ssize_t sockets_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%d\n", phys_sockets);
+}
+
+static ssize_t chipspersocket_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%d\n", phys_chipspersocket);
+}
+
+static ssize_t coresperchip_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%d\n", phys_coresperchip);
+}
+
+static struct attribute *device_str_attr_create_(char *name, char *str)
+{
+ struct dev_ext_attribute *attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+
+ if (!attr)
+ return NULL;
+
+ sysfs_attr_init(&attr->attr.attr);
+
+ attr->var = str;
+ attr->attr.attr.name = name;
+ attr->attr.attr.mode = 0444;
+ attr->attr.show = device_show_string;
+
+ return &attr->attr.attr;
+}
+
+/*
+ * Allocate and initialize strings representing event attributes.
*
- * w_offset_in_s = w - x = requested_offset - source_offset
- * z_offset_in_s = z - x = src_len
- * v_offset_in_s = v - x = request_offset + dest_len - src_len
+ * NOTE: The strings allocated here are never destroyed and continue to
+ * exist till shutdown. This is to allow us to create as many events
+ * from the catalog as possible, even if we encounter errors with some.
+ * In case of changes to error paths in future, these may need to be
+ * freed by the caller.
*/
-static ssize_t read_offset_data(void *dest, size_t dest_len,
- loff_t requested_offset, void *src,
- size_t src_len, loff_t source_offset)
+static struct attribute *device_str_attr_create(char *name, int name_max,
+ int name_nonce,
+ char *str, size_t str_max)
{
- size_t w_offset_in_s = requested_offset - source_offset;
- size_t z_offset_in_s = src_len;
- size_t v_offset_in_s = requested_offset + dest_len - src_len;
- size_t u_offset_in_s = min(z_offset_in_s, v_offset_in_s);
- size_t copy_len = u_offset_in_s - w_offset_in_s;
+ char *n;
+ char *s = memdup_to_str(str, str_max, GFP_KERNEL);
+ struct attribute *a;
+
+ if (!s)
+ return NULL;
+
+ if (!name_nonce)
+ n = kasprintf(GFP_KERNEL, "%.*s", name_max, name);
+ else
+ n = kasprintf(GFP_KERNEL, "%.*s__%d", name_max, name,
+ name_nonce);
+ if (!n)
+ goto out_s;
+
+ a = device_str_attr_create_(n, s);
+ if (!a)
+ goto out_n;
+
+ return a;
+out_n:
+ kfree(n);
+out_s:
+ kfree(s);
+ return NULL;
+}
- if (requested_offset < 0 || source_offset < 0)
- return -EINVAL;
+static struct attribute *event_to_attr(unsigned int ix,
+ struct hv_24x7_event_data *event,
+ unsigned int domain,
+ int nonce)
+{
+ int event_name_len;
+ char *ev_name, *a_ev_name, *val;
+ struct attribute *attr;
+
+ if (!domain_is_valid(domain)) {
+ pr_warn("catalog event %u has invalid domain %u\n",
+ ix, domain);
+ return NULL;
+ }
- if (z_offset_in_s <= w_offset_in_s)
- return 0;
+ val = event_fmt(event, domain);
+ if (!val)
+ return NULL;
+
+ ev_name = event_name(event, &event_name_len);
+ if (!nonce)
+ a_ev_name = kasprintf(GFP_KERNEL, "%.*s",
+ (int)event_name_len, ev_name);
+ else
+ a_ev_name = kasprintf(GFP_KERNEL, "%.*s__%d",
+ (int)event_name_len, ev_name, nonce);
+
+ if (!a_ev_name)
+ goto out_val;
+
+ attr = device_str_attr_create_(a_ev_name, val);
+ if (!attr)
+ goto out_name;
+
+ return attr;
+out_name:
+ kfree(a_ev_name);
+out_val:
+ kfree(val);
+ return NULL;
+}
+
+static struct attribute *event_to_desc_attr(struct hv_24x7_event_data *event,
+ int nonce)
+{
+ int nl, dl;
+ char *name = event_name(event, &nl);
+ char *desc = event_desc(event, &dl);
+
+ /* If there isn't a description, don't create the sysfs file */
+ if (!dl)
+ return NULL;
- memcpy(dest, src + w_offset_in_s, copy_len);
- return copy_len;
+ return device_str_attr_create(name, nl, nonce, desc, dl);
}
-static unsigned long h_get_24x7_catalog_page_(unsigned long phys_4096,
- unsigned long version,
- unsigned long index)
+static struct attribute *
+event_to_long_desc_attr(struct hv_24x7_event_data *event, int nonce)
{
- pr_devel("h_get_24x7_catalog_page(0x%lx, %lu, %lu)",
- phys_4096,
- version,
- index);
- WARN_ON(!IS_ALIGNED(phys_4096, 4096));
- return plpar_hcall_norets(H_GET_24X7_CATALOG_PAGE,
- phys_4096,
- version,
- index);
+ int nl, dl;
+ char *name = event_name(event, &nl);
+ char *desc = event_long_desc(event, &dl);
+
+ /* If there isn't a description, don't create the sysfs file */
+ if (!dl)
+ return NULL;
+
+ return device_str_attr_create(name, nl, nonce, desc, dl);
}
-static unsigned long h_get_24x7_catalog_page(char page[],
- u64 version, u32 index)
+static int event_data_to_attrs(unsigned int ix, struct attribute **attrs,
+ struct hv_24x7_event_data *event, int nonce)
{
- return h_get_24x7_catalog_page_(virt_to_phys(page),
- version, index);
+ *attrs = event_to_attr(ix, event, event->domain, nonce);
+ if (!*attrs)
+ return -1;
+
+ return 0;
+}
+
+/* */
+struct event_uniq {
+ struct rb_node node;
+ const char *name;
+ int nl;
+ unsigned int ct;
+ unsigned int domain;
+};
+
+static int memord(const void *d1, size_t s1, const void *d2, size_t s2)
+{
+ if (s1 < s2)
+ return 1;
+ if (s1 > s2)
+ return -1;
+
+ return memcmp(d1, d2, s1);
+}
+
+static int ev_uniq_ord(const void *v1, size_t s1, unsigned int d1,
+ const void *v2, size_t s2, unsigned int d2)
+{
+ int r = memord(v1, s1, v2, s2);
+
+ if (r)
+ return r;
+ if (d1 > d2)
+ return 1;
+ if (d2 > d1)
+ return -1;
+ return 0;
+}
+
+static int event_uniq_add(struct rb_root *root, const char *name, int nl,
+ unsigned int domain)
+{
+ struct rb_node **new = &(root->rb_node), *parent = NULL;
+ struct event_uniq *data;
+
+ /* Figure out where to put new node */
+ while (*new) {
+ struct event_uniq *it;
+ int result;
+
+ it = rb_entry(*new, struct event_uniq, node);
+ result = ev_uniq_ord(name, nl, domain, it->name, it->nl,
+ it->domain);
+
+ parent = *new;
+ if (result < 0)
+ new = &((*new)->rb_left);
+ else if (result > 0)
+ new = &((*new)->rb_right);
+ else {
+ it->ct++;
+ pr_info("found a duplicate event %.*s, ct=%u\n", nl,
+ name, it->ct);
+ return it->ct;
+ }
+ }
+
+ data = kmalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ *data = (struct event_uniq) {
+ .name = name,
+ .nl = nl,
+ .ct = 0,
+ .domain = domain,
+ };
+
+ /* Add new node and rebalance tree. */
+ rb_link_node(&data->node, parent, new);
+ rb_insert_color(&data->node, root);
+
+ /* data->ct */
+ return 0;
+}
+
+static void event_uniq_destroy(struct rb_root *root)
+{
+ /*
+ * the strings we point to are in the giant block of memory filled by
+ * the catalog, and are freed separately.
+ */
+ struct event_uniq *pos, *n;
+
+ rbtree_postorder_for_each_entry_safe(pos, n, root, node)
+ kfree(pos);
+}
+
+
+/*
+ * ensure the event structure's sizes are self consistent and don't cause us to
+ * read outside of the event
+ *
+ * On success, return the event length in bytes.
+ * Otherwise, return -1 (and print as appropriate).
+ */
+static ssize_t catalog_event_len_validate(struct hv_24x7_event_data *event,
+ size_t event_idx,
+ size_t event_data_bytes,
+ size_t event_entry_count,
+ size_t offset, void *end)
+{
+ ssize_t ev_len;
+ void *ev_end, *calc_ev_end;
+
+ if (offset >= event_data_bytes)
+ return -1;
+
+ if (event_idx >= event_entry_count) {
+ pr_devel("catalog event data has %zu bytes of padding after last event\n",
+ event_data_bytes - offset);
+ return -1;
+ }
+
+ if (!event_fixed_portion_is_within(event, end)) {
+ pr_warn("event %zu fixed portion is not within range\n",
+ event_idx);
+ return -1;
+ }
+
+ ev_len = be16_to_cpu(event->length);
+
+ if (ev_len % 16)
+ pr_info("event %zu has length %zu not divisible by 16: event=%p\n",
+ event_idx, ev_len, event);
+
+ ev_end = (__u8 *)event + ev_len;
+ if (ev_end > end) {
+ pr_warn("event %zu has .length=%zu, ends after buffer end: ev_end=%p > end=%p, offset=%zu\n",
+ event_idx, ev_len, ev_end, end,
+ offset);
+ return -1;
+ }
+
+ calc_ev_end = event_end(event, end);
+ if (!calc_ev_end) {
+ pr_warn("event %zu has a calculated length which exceeds buffer length %zu: event=%p end=%p, offset=%zu\n",
+ event_idx, event_data_bytes, event, end,
+ offset);
+ return -1;
+ }
+
+ if (calc_ev_end > ev_end) {
+ pr_warn("event %zu exceeds its own length: event=%p, end=%p, offset=%zu, calc_ev_end=%p\n",
+ event_idx, event, ev_end, offset, calc_ev_end);
+ return -1;
+ }
+
+ return ev_len;
+}
+
+/*
+ * Return true incase of invalid or dummy events with names like RESERVED*
+ */
+static bool ignore_event(const char *name)
+{
+ return strncmp(name, "RESERVED", 8) == 0;
+}
+
+#define MAX_4K (SIZE_MAX / 4096)
+
+static int create_events_from_catalog(struct attribute ***events_,
+ struct attribute ***event_descs_,
+ struct attribute ***event_long_descs_)
+{
+ long hret;
+ size_t catalog_len, catalog_page_len, event_entry_count,
+ event_data_len, event_data_offs,
+ event_data_bytes, junk_events, event_idx, event_attr_ct, i,
+ attr_max, event_idx_last, desc_ct, long_desc_ct;
+ ssize_t ct, ev_len;
+ uint64_t catalog_version_num;
+ struct attribute **events, **event_descs, **event_long_descs;
+ struct hv_24x7_catalog_page_0 *page_0 =
+ kmem_cache_alloc(hv_page_cache, GFP_KERNEL);
+ void *page = page_0;
+ void *event_data, *end;
+ struct hv_24x7_event_data *event;
+ struct rb_root ev_uniq = RB_ROOT;
+ int ret = 0;
+
+ if (!page) {
+ ret = -ENOMEM;
+ goto e_out;
+ }
+
+ hret = h_get_24x7_catalog_page(page, 0, 0);
+ if (hret) {
+ ret = -EIO;
+ goto e_free;
+ }
+
+ catalog_version_num = be64_to_cpu(page_0->version);
+ catalog_page_len = be32_to_cpu(page_0->length);
+
+ if (MAX_4K < catalog_page_len) {
+ pr_err("invalid page count: %zu\n", catalog_page_len);
+ ret = -EIO;
+ goto e_free;
+ }
+
+ catalog_len = catalog_page_len * 4096;
+
+ event_entry_count = be16_to_cpu(page_0->event_entry_count);
+ event_data_offs = be16_to_cpu(page_0->event_data_offs);
+ event_data_len = be16_to_cpu(page_0->event_data_len);
+
+ pr_devel("cv %llu cl %zu eec %zu edo %zu edl %zu\n",
+ catalog_version_num, catalog_len,
+ event_entry_count, event_data_offs, event_data_len);
+
+ if ((MAX_4K < event_data_len)
+ || (MAX_4K < event_data_offs)
+ || (MAX_4K - event_data_offs < event_data_len)) {
+ pr_err("invalid event data offs %zu and/or len %zu\n",
+ event_data_offs, event_data_len);
+ ret = -EIO;
+ goto e_free;
+ }
+
+ if ((event_data_offs + event_data_len) > catalog_page_len) {
+ pr_err("event data %zu-%zu does not fit inside catalog 0-%zu\n",
+ event_data_offs,
+ event_data_offs + event_data_len,
+ catalog_page_len);
+ ret = -EIO;
+ goto e_free;
+ }
+
+ if (SIZE_MAX - 1 < event_entry_count) {
+ pr_err("event_entry_count %zu is invalid\n", event_entry_count);
+ ret = -EIO;
+ goto e_free;
+ }
+
+ event_data_bytes = event_data_len * 4096;
+
+ /*
+ * event data can span several pages, events can cross between these
+ * pages. Use vmalloc to make this easier.
+ */
+ event_data = vmalloc(event_data_bytes);
+ if (!event_data) {
+ pr_err("could not allocate event data\n");
+ ret = -ENOMEM;
+ goto e_free;
+ }
+
+ end = event_data + event_data_bytes;
+
+ /*
+ * using vmalloc_to_phys() like this only works if PAGE_SIZE is
+ * divisible by 4096
+ */
+ BUILD_BUG_ON(PAGE_SIZE % 4096);
+
+ for (i = 0; i < event_data_len; i++) {
+ hret = h_get_24x7_catalog_page_(
+ vmalloc_to_phys(event_data + i * 4096),
+ catalog_version_num,
+ i + event_data_offs);
+ if (hret) {
+ pr_err("Failed to get event data in page %zu: rc=%ld\n",
+ i + event_data_offs, hret);
+ ret = -EIO;
+ goto e_event_data;
+ }
+ }
+
+ /*
+ * scan the catalog to determine the number of attributes we need, and
+ * verify it at the same time.
+ */
+ for (junk_events = 0, event = event_data, event_idx = 0, attr_max = 0;
+ ;
+ event_idx++, event = (void *)event + ev_len) {
+ size_t offset = (void *)event - (void *)event_data;
+ char *name;
+ int nl;
+
+ ev_len = catalog_event_len_validate(event, event_idx,
+ event_data_bytes,
+ event_entry_count,
+ offset, end);
+ if (ev_len < 0)
+ break;
+
+ name = event_name(event, &nl);
+
+ if (ignore_event(name)) {
+ junk_events++;
+ continue;
+ }
+ if (event->event_group_record_len == 0) {
+ pr_devel("invalid event %zu (%.*s): group_record_len == 0, skipping\n",
+ event_idx, nl, name);
+ junk_events++;
+ continue;
+ }
+
+ if (!catalog_entry_domain_is_valid(event->domain)) {
+ pr_info("event %zu (%.*s) has invalid domain %d\n",
+ event_idx, nl, name, event->domain);
+ junk_events++;
+ continue;
+ }
+
+ attr_max++;
+ }
+
+ event_idx_last = event_idx;
+ if (event_idx_last != event_entry_count)
+ pr_warn("event buffer ended before listed # of events were parsed (got %zu, wanted %zu, junk %zu)\n",
+ event_idx_last, event_entry_count, junk_events);
+
+ events = kmalloc_array(attr_max + 1, sizeof(*events), GFP_KERNEL);
+ if (!events) {
+ ret = -ENOMEM;
+ goto e_event_data;
+ }
+
+ event_descs = kmalloc_array(event_idx + 1, sizeof(*event_descs),
+ GFP_KERNEL);
+ if (!event_descs) {
+ ret = -ENOMEM;
+ goto e_event_attrs;
+ }
+
+ event_long_descs = kmalloc_array(event_idx + 1,
+ sizeof(*event_long_descs), GFP_KERNEL);
+ if (!event_long_descs) {
+ ret = -ENOMEM;
+ goto e_event_descs;
+ }
+
+ /* Iterate over the catalog filling in the attribute vector */
+ for (junk_events = 0, event_attr_ct = 0, desc_ct = 0, long_desc_ct = 0,
+ event = event_data, event_idx = 0;
+ event_idx < event_idx_last;
+ event_idx++, ev_len = be16_to_cpu(event->length),
+ event = (void *)event + ev_len) {
+ char *name;
+ int nl;
+ int nonce;
+ /*
+ * these are the only "bad" events that are intermixed and that
+ * we can ignore without issue. make sure to skip them here
+ */
+ if (event->event_group_record_len == 0)
+ continue;
+ if (!catalog_entry_domain_is_valid(event->domain))
+ continue;
+
+ name = event_name(event, &nl);
+ if (ignore_event(name))
+ continue;
+
+ nonce = event_uniq_add(&ev_uniq, name, nl, event->domain);
+ ct = event_data_to_attrs(event_idx, events + event_attr_ct,
+ event, nonce);
+ if (ct < 0) {
+ pr_warn("event %zu (%.*s) creation failure, skipping\n",
+ event_idx, nl, name);
+ junk_events++;
+ } else {
+ event_attr_ct++;
+ event_descs[desc_ct] = event_to_desc_attr(event, nonce);
+ if (event_descs[desc_ct])
+ desc_ct++;
+ event_long_descs[long_desc_ct] =
+ event_to_long_desc_attr(event, nonce);
+ if (event_long_descs[long_desc_ct])
+ long_desc_ct++;
+ }
+ }
+
+ pr_info("read %zu catalog entries, created %zu event attrs (%zu failures), %zu descs\n",
+ event_idx, event_attr_ct, junk_events, desc_ct);
+
+ events[event_attr_ct] = NULL;
+ event_descs[desc_ct] = NULL;
+ event_long_descs[long_desc_ct] = NULL;
+
+ event_uniq_destroy(&ev_uniq);
+ vfree(event_data);
+ kmem_cache_free(hv_page_cache, page);
+
+ *events_ = events;
+ *event_descs_ = event_descs;
+ *event_long_descs_ = event_long_descs;
+ return 0;
+
+e_event_descs:
+ kfree(event_descs);
+e_event_attrs:
+ kfree(events);
+e_event_data:
+ vfree(event_data);
+e_free:
+ kmem_cache_free(hv_page_cache, page);
+e_out:
+ *events_ = NULL;
+ *event_descs_ = NULL;
+ *event_long_descs_ = NULL;
+ return ret;
}
static ssize_t catalog_read(struct file *filp, struct kobject *kobj,
- struct bin_attribute *bin_attr, char *buf,
+ const struct bin_attribute *bin_attr, char *buf,
loff_t offset, size_t count)
{
- unsigned long hret;
+ long hret;
ssize_t ret = 0;
- size_t catalog_len = 0, catalog_page_len = 0, page_count = 0;
+ size_t catalog_len = 0, catalog_page_len = 0;
loff_t page_offset = 0;
+ loff_t offset_in_page;
+ size_t copy_len;
uint64_t catalog_version_num = 0;
void *page = kmem_cache_alloc(hv_page_cache, GFP_USER);
struct hv_24x7_catalog_page_0 *page_0 = page;
+
if (!page)
return -ENOMEM;
@@ -202,7 +1025,7 @@ static ssize_t catalog_read(struct file *filp, struct kobject *kobj,
catalog_len = catalog_page_len * 4096;
page_offset = offset / 4096;
- page_count = count / 4096;
+ offset_in_page = offset % 4096;
if (page_offset >= catalog_page_len)
goto e_free;
@@ -216,8 +1039,13 @@ static ssize_t catalog_read(struct file *filp, struct kobject *kobj,
}
}
- ret = read_offset_data(buf, count, offset,
- page, 4096, page_offset * 4096);
+ copy_len = 4096 - offset_in_page;
+ if (copy_len > count)
+ copy_len = count;
+
+ memcpy(buf, page+offset_in_page, copy_len);
+ ret = copy_len;
+
e_free:
if (hret)
pr_err("h_get_24x7_catalog_page(ver=%lld, page=%lld) failed:"
@@ -225,19 +1053,40 @@ e_free:
catalog_version_num, page_offset, hret);
kmem_cache_free(hv_page_cache, page);
- pr_devel("catalog_read: offset=%lld(%lld) count=%zu(%zu) catalog_len=%zu(%zu) => %zd\n",
- offset, page_offset, count, page_count, catalog_len,
- catalog_page_len, ret);
+ pr_devel("catalog_read: offset=%lld(%lld) count=%zu "
+ "catalog_len=%zu(%zu) => %zd\n", offset, page_offset,
+ count, catalog_len, catalog_page_len, ret);
return ret;
}
+static ssize_t domains_show(struct device *dev, struct device_attribute *attr,
+ char *page)
+{
+ int d, n, count = 0;
+ const char *str;
+
+ for (d = 0; d < HV_PERF_DOMAIN_MAX; d++) {
+ str = domain_name(d);
+ if (!str)
+ continue;
+
+ n = sprintf(page, "%d: %s\n", d, str);
+ if (n < 0)
+ break;
+
+ count += n;
+ page += n;
+ }
+ return count;
+}
+
#define PAGE_0_ATTR(_name, _fmt, _expr) \
static ssize_t _name##_show(struct device *dev, \
struct device_attribute *dev_attr, \
char *buf) \
{ \
- unsigned long hret; \
+ long hret; \
ssize_t ret = 0; \
void *page = kmem_cache_alloc(hv_page_cache, GFP_USER); \
struct hv_24x7_catalog_page_0 *page_0 = page; \
@@ -250,7 +1099,7 @@ static ssize_t _name##_show(struct device *dev, \
} \
ret = sprintf(buf, _fmt, _expr); \
e_free: \
- kfree(page); \
+ kmem_cache_free(hv_page_cache, page); \
return ret; \
} \
static DEVICE_ATTR_RO(_name)
@@ -259,20 +1108,38 @@ PAGE_0_ATTR(catalog_version, "%lld\n",
(unsigned long long)be64_to_cpu(page_0->version));
PAGE_0_ATTR(catalog_len, "%lld\n",
(unsigned long long)be32_to_cpu(page_0->length) * 4096);
-static BIN_ATTR_RO(catalog, 0/* real length varies */);
-
-static struct bin_attribute *if_bin_attrs[] = {
+static const BIN_ATTR_RO(catalog, 0/* real length varies */);
+static DEVICE_ATTR_RO(domains);
+static DEVICE_ATTR_RO(sockets);
+static DEVICE_ATTR_RO(chipspersocket);
+static DEVICE_ATTR_RO(coresperchip);
+static DEVICE_ATTR_RO(cpumask);
+
+static const struct bin_attribute *const if_bin_attrs[] = {
&bin_attr_catalog,
NULL,
};
+static struct attribute *cpumask_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL,
+};
+
+static const struct attribute_group cpumask_attr_group = {
+ .attrs = cpumask_attrs,
+};
+
static struct attribute *if_attrs[] = {
&dev_attr_catalog_len.attr,
&dev_attr_catalog_version.attr,
+ &dev_attr_domains.attr,
+ &dev_attr_sockets.attr,
+ &dev_attr_chipspersocket.attr,
+ &dev_attr_coresperchip.attr,
NULL,
};
-static struct attribute_group if_group = {
+static const struct attribute_group if_group = {
.name = "interface",
.bin_attrs = if_bin_attrs,
.attrs = if_attrs,
@@ -280,86 +1147,235 @@ static struct attribute_group if_group = {
static const struct attribute_group *attr_groups[] = {
&format_group,
+ &event_group,
+ &event_desc_group,
+ &event_long_desc_group,
&if_group,
+ &cpumask_attr_group,
NULL,
};
-static bool is_physical_domain(int domain)
+/*
+ * Start the process for a new H_GET_24x7_DATA hcall.
+ */
+static void init_24x7_request(struct hv_24x7_request_buffer *request_buffer,
+ struct hv_24x7_data_result_buffer *result_buffer)
{
- return domain == HV_24X7_PERF_DOMAIN_PHYSICAL_CHIP ||
- domain == HV_24X7_PERF_DOMAIN_PHYSICAL_CORE;
+
+ memset(request_buffer, 0, H24x7_DATA_BUFFER_SIZE);
+ memset(result_buffer, 0, H24x7_DATA_BUFFER_SIZE);
+
+ request_buffer->interface_version = interface_version;
+ /* memset above set request_buffer->num_requests to 0 */
}
-static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix,
- u16 lpar, u64 *res,
- bool success_expected)
+/*
+ * Commit (i.e perform) the H_GET_24x7_DATA hcall using the data collected
+ * by 'init_24x7_request()' and 'add_event_to_24x7_request()'.
+ */
+static int make_24x7_request(struct hv_24x7_request_buffer *request_buffer,
+ struct hv_24x7_data_result_buffer *result_buffer)
{
- unsigned long ret;
+ long ret;
/*
- * request_buffer and result_buffer are not required to be 4k aligned,
- * but are not allowed to cross any 4k boundary. Aligning them to 4k is
- * the simplest way to ensure that.
+ * NOTE: Due to variable number of array elements in request and
+ * result buffer(s), sizeof() is not reliable. Use the actual
+ * allocated buffer size, H24x7_DATA_BUFFER_SIZE.
*/
- struct reqb {
- struct hv_24x7_request_buffer buf;
- struct hv_24x7_request req;
- } __packed __aligned(4096) request_buffer = {
- .buf = {
- .interface_version = HV_24X7_IF_VERSION_CURRENT,
- .num_requests = 1,
- },
- .req = {
- .performance_domain = domain,
- .data_size = cpu_to_be16(8),
- .data_offset = cpu_to_be32(offset),
- .starting_lpar_ix = cpu_to_be16(lpar),
- .max_num_lpars = cpu_to_be16(1),
- .starting_ix = cpu_to_be16(ix),
- .max_ix = cpu_to_be16(1),
+ ret = plpar_hcall_norets(H_GET_24X7_DATA,
+ virt_to_phys(request_buffer), H24x7_DATA_BUFFER_SIZE,
+ virt_to_phys(result_buffer), H24x7_DATA_BUFFER_SIZE);
+
+ if (ret) {
+ struct hv_24x7_request *req;
+
+ req = request_buffer->requests;
+ pr_notice_ratelimited("hcall failed: [%d %#x %#x %d] => ret 0x%lx (%ld) detail=0x%x failing ix=%x\n",
+ req->performance_domain, req->data_offset,
+ req->starting_ix, req->starting_lpar_ix,
+ ret, ret, result_buffer->detailed_rc,
+ result_buffer->failing_request_ix);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+/*
+ * Add the given @event to the next slot in the 24x7 request_buffer.
+ *
+ * Note that H_GET_24X7_DATA hcall allows reading several counters'
+ * values in a single HCALL. We expect the caller to add events to the
+ * request buffer one by one, make the HCALL and process the results.
+ */
+static int add_event_to_24x7_request(struct perf_event *event,
+ struct hv_24x7_request_buffer *request_buffer)
+{
+ u16 idx;
+ int i;
+ size_t req_size;
+ struct hv_24x7_request *req;
+
+ if (request_buffer->num_requests >=
+ max_num_requests(request_buffer->interface_version)) {
+ pr_devel("Too many requests for 24x7 HCALL %d\n",
+ request_buffer->num_requests);
+ return -EINVAL;
+ }
+
+ switch (event_get_domain(event)) {
+ case HV_PERF_DOMAIN_PHYS_CHIP:
+ idx = event_get_chip(event);
+ break;
+ case HV_PERF_DOMAIN_PHYS_CORE:
+ idx = event_get_core(event);
+ break;
+ default:
+ idx = event_get_vcpu(event);
+ }
+
+ req_size = H24x7_REQUEST_SIZE(request_buffer->interface_version);
+
+ i = request_buffer->num_requests++;
+ req = (void *) request_buffer->requests + i * req_size;
+
+ req->performance_domain = event_get_domain(event);
+ req->data_size = cpu_to_be16(8);
+ req->data_offset = cpu_to_be32(event_get_offset(event));
+ req->starting_lpar_ix = cpu_to_be16(event_get_lpar(event));
+ req->max_num_lpars = cpu_to_be16(1);
+ req->starting_ix = cpu_to_be16(idx);
+ req->max_ix = cpu_to_be16(1);
+
+ if (request_buffer->interface_version > 1) {
+ if (domain_needs_aggregation(req->performance_domain))
+ req->max_num_thread_groups = -1;
+ else if (req->performance_domain != HV_PERF_DOMAIN_PHYS_CHIP) {
+ req->starting_thread_group_ix = idx % 2;
+ req->max_num_thread_groups = 1;
}
- };
+ }
- struct resb {
- struct hv_24x7_data_result_buffer buf;
- struct hv_24x7_result res;
- struct hv_24x7_result_element elem;
- __be64 result;
- } __packed __aligned(4096) result_buffer = {};
+ return 0;
+}
- ret = plpar_hcall_norets(H_GET_24X7_DATA,
- virt_to_phys(&request_buffer), sizeof(request_buffer),
- virt_to_phys(&result_buffer), sizeof(result_buffer));
+/**
+ * get_count_from_result - get event count from all result elements in result
+ *
+ * If the event corresponding to this result needs aggregation of the result
+ * element values, then this function does that.
+ *
+ * @event: Event associated with @res.
+ * @resb: Result buffer containing @res.
+ * @res: Result to work on.
+ * @countp: Output variable containing the event count.
+ * @next: Optional output variable pointing to the next result in @resb.
+ */
+static int get_count_from_result(struct perf_event *event,
+ struct hv_24x7_data_result_buffer *resb,
+ struct hv_24x7_result *res, u64 *countp,
+ struct hv_24x7_result **next)
+{
+ u16 num_elements = be16_to_cpu(res->num_elements_returned);
+ u16 data_size = be16_to_cpu(res->result_element_data_size);
+ unsigned int data_offset;
+ void *element_data;
+ int i;
+ u64 count;
- if (ret) {
- if (success_expected)
- pr_err_ratelimited("hcall failed: %d %#x %#x %d => 0x%lx (%ld) detail=0x%x failing ix=%x\n",
- domain, offset, ix, lpar,
- ret, ret,
- result_buffer.buf.detailed_rc,
- result_buffer.buf.failing_request_ix);
- return ret;
+ /*
+ * We can bail out early if the result is empty.
+ */
+ if (!num_elements) {
+ pr_debug("Result of request %hhu is empty, nothing to do\n",
+ res->result_ix);
+
+ if (next)
+ *next = (struct hv_24x7_result *) res->elements;
+
+ return -ENODATA;
}
- *res = be64_to_cpu(result_buffer.result);
- return ret;
+ /*
+ * Since we always specify 1 as the maximum for the smallest resource
+ * we're requesting, there should to be only one element per result.
+ * Except when an event needs aggregation, in which case there are more.
+ */
+ if (num_elements != 1 &&
+ !domain_needs_aggregation(event_get_domain(event))) {
+ pr_err("Error: result of request %hhu has %hu elements\n",
+ res->result_ix, num_elements);
+
+ return -EIO;
+ }
+
+ if (data_size != sizeof(u64)) {
+ pr_debug("Error: result of request %hhu has data of %hu bytes\n",
+ res->result_ix, data_size);
+
+ return -ENOTSUPP;
+ }
+
+ if (resb->interface_version == 1)
+ data_offset = offsetof(struct hv_24x7_result_element_v1,
+ element_data);
+ else
+ data_offset = offsetof(struct hv_24x7_result_element_v2,
+ element_data);
+
+ /* Go through the result elements in the result. */
+ for (i = count = 0, element_data = res->elements + data_offset;
+ i < num_elements;
+ i++, element_data += data_size + data_offset)
+ count += be64_to_cpu(*((__be64 *)element_data));
+
+ *countp = count;
+
+ /* The next result is after the last result element. */
+ if (next)
+ *next = element_data - data_offset;
+
+ return 0;
}
-static unsigned long event_24x7_request(struct perf_event *event, u64 *res,
- bool success_expected)
+static int single_24x7_request(struct perf_event *event, u64 *count)
{
- return single_24x7_request(event_get_domain(event),
- event_get_offset(event),
- event_get_starting_index(event),
- event_get_lpar(event),
- res,
- success_expected);
+ int ret;
+ struct hv_24x7_request_buffer *request_buffer;
+ struct hv_24x7_data_result_buffer *result_buffer;
+
+ BUILD_BUG_ON(sizeof(*request_buffer) > 4096);
+ BUILD_BUG_ON(sizeof(*result_buffer) > 4096);
+
+ request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
+ result_buffer = (void *)get_cpu_var(hv_24x7_resb);
+
+ init_24x7_request(request_buffer, result_buffer);
+
+ ret = add_event_to_24x7_request(event, request_buffer);
+ if (ret)
+ goto out;
+
+ ret = make_24x7_request(request_buffer, result_buffer);
+ if (ret)
+ goto out;
+
+ /* process result from hcall */
+ ret = get_count_from_result(event, result_buffer,
+ result_buffer->results, count, NULL);
+
+out:
+ put_cpu_var(hv_24x7_reqb);
+ put_cpu_var(hv_24x7_resb);
+ return ret;
}
+
static int h_24x7_event_init(struct perf_event *event)
{
struct hv_perf_caps caps;
- unsigned domain;
+ unsigned int domain;
unsigned long hret;
u64 ct;
@@ -381,15 +1397,6 @@ static int h_24x7_event_init(struct perf_event *event)
return -EINVAL;
}
- /* unsupported modes and filters */
- if (event->attr.exclude_user ||
- event->attr.exclude_kernel ||
- event->attr.exclude_hv ||
- event->attr.exclude_idle ||
- event->attr.exclude_host ||
- event->attr.exclude_guest)
- return -EINVAL;
-
/* no branch sampling */
if (has_branch_stack(event))
return -EOPNOTSUPP;
@@ -400,9 +1407,8 @@ static int h_24x7_event_init(struct perf_event *event)
return -EINVAL;
}
- /* Domains above 6 are invalid */
domain = event_get_domain(event);
- if (domain > 6) {
+ if (domain == 0 || domain >= HV_PERF_DOMAIN_MAX) {
pr_devel("invalid domain %d\n", domain);
return -EINVAL;
}
@@ -413,45 +1419,91 @@ static int h_24x7_event_init(struct perf_event *event)
return -EIO;
}
- /* PHYSICAL domains & other lpars require extra capabilities */
+ /* Physical domains & other lpars require extra capabilities */
if (!caps.collect_privileged && (is_physical_domain(domain) ||
(event_get_lpar(event) != event_get_lpar_max()))) {
- pr_devel("hv permisions disallow: is_physical_domain:%d, lpar=0x%llx\n",
+ pr_devel("hv permissions disallow: is_physical_domain:%d, lpar=0x%llx\n",
is_physical_domain(domain),
event_get_lpar(event));
return -EACCES;
}
- /* see if the event complains */
- if (event_24x7_request(event, &ct, false)) {
+ /* Get the initial value of the counter for this event */
+ if (single_24x7_request(event, &ct)) {
pr_devel("test hcall failed\n");
return -EIO;
}
+ (void)local64_xchg(&event->hw.prev_count, ct);
return 0;
}
static u64 h_24x7_get_value(struct perf_event *event)
{
- unsigned long ret;
u64 ct;
- ret = event_24x7_request(event, &ct, true);
- if (ret)
+
+ if (single_24x7_request(event, &ct))
/* We checked this in event init, shouldn't fail here... */
return 0;
return ct;
}
-static void h_24x7_event_update(struct perf_event *event)
+static void update_event_count(struct perf_event *event, u64 now)
{
s64 prev;
- u64 now;
- now = h_24x7_get_value(event);
+
prev = local64_xchg(&event->hw.prev_count, now);
local64_add(now - prev, &event->count);
}
+static void h_24x7_event_read(struct perf_event *event)
+{
+ u64 now;
+ struct hv_24x7_request_buffer *request_buffer;
+ struct hv_24x7_hw *h24x7hw;
+ int txn_flags;
+
+ txn_flags = __this_cpu_read(hv_24x7_txn_flags);
+
+ /*
+ * If in a READ transaction, add this counter to the list of
+ * counters to read during the next HCALL (i.e commit_txn()).
+ * If not in a READ transaction, go ahead and make the HCALL
+ * to read this counter by itself.
+ */
+
+ if (txn_flags & PERF_PMU_TXN_READ) {
+ int i;
+ int ret;
+
+ if (__this_cpu_read(hv_24x7_txn_err))
+ return;
+
+ request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
+
+ ret = add_event_to_24x7_request(event, request_buffer);
+ if (ret) {
+ __this_cpu_write(hv_24x7_txn_err, ret);
+ } else {
+ /*
+ * Associate the event with the HCALL request index,
+ * so ->commit_txn() can quickly find/update count.
+ */
+ i = request_buffer->num_requests - 1;
+
+ h24x7hw = &get_cpu_var(hv_24x7_hw);
+ h24x7hw->events[i] = event;
+ put_cpu_var(h24x7hw);
+ }
+
+ put_cpu_var(hv_24x7_reqb);
+ } else {
+ now = h_24x7_get_value(event);
+ update_event_count(event, now);
+ }
+}
+
static void h_24x7_event_start(struct perf_event *event, int flags)
{
if (flags & PERF_EF_RELOAD)
@@ -460,7 +1512,7 @@ static void h_24x7_event_start(struct perf_event *event, int flags)
static void h_24x7_event_stop(struct perf_event *event, int flags)
{
- h_24x7_event_update(event);
+ h_24x7_event_read(event);
}
static int h_24x7_event_add(struct perf_event *event, int flags)
@@ -471,9 +1523,116 @@ static int h_24x7_event_add(struct perf_event *event, int flags)
return 0;
}
-static int h_24x7_event_idx(struct perf_event *event)
+/*
+ * 24x7 counters only support READ transactions. They are
+ * always counting and dont need/support ADD transactions.
+ * Cache the flags, but otherwise ignore transactions that
+ * are not PERF_PMU_TXN_READ.
+ */
+static void h_24x7_event_start_txn(struct pmu *pmu, unsigned int flags)
{
- return 0;
+ struct hv_24x7_request_buffer *request_buffer;
+ struct hv_24x7_data_result_buffer *result_buffer;
+
+ /* We should not be called if we are already in a txn */
+ WARN_ON_ONCE(__this_cpu_read(hv_24x7_txn_flags));
+
+ __this_cpu_write(hv_24x7_txn_flags, flags);
+ if (flags & ~PERF_PMU_TXN_READ)
+ return;
+
+ request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
+ result_buffer = (void *)get_cpu_var(hv_24x7_resb);
+
+ init_24x7_request(request_buffer, result_buffer);
+
+ put_cpu_var(hv_24x7_resb);
+ put_cpu_var(hv_24x7_reqb);
+}
+
+/*
+ * Clean up transaction state.
+ *
+ * NOTE: Ignore state of request and result buffers for now.
+ * We will initialize them during the next read/txn.
+ */
+static void reset_txn(void)
+{
+ __this_cpu_write(hv_24x7_txn_flags, 0);
+ __this_cpu_write(hv_24x7_txn_err, 0);
+}
+
+/*
+ * 24x7 counters only support READ transactions. They are always counting
+ * and dont need/support ADD transactions. Clear ->txn_flags but otherwise
+ * ignore transactions that are not of type PERF_PMU_TXN_READ.
+ *
+ * For READ transactions, submit all pending 24x7 requests (i.e requests
+ * that were queued by h_24x7_event_read()), to the hypervisor and update
+ * the event counts.
+ */
+static int h_24x7_event_commit_txn(struct pmu *pmu)
+{
+ struct hv_24x7_request_buffer *request_buffer;
+ struct hv_24x7_data_result_buffer *result_buffer;
+ struct hv_24x7_result *res, *next_res;
+ u64 count;
+ int i, ret, txn_flags;
+ struct hv_24x7_hw *h24x7hw;
+
+ txn_flags = __this_cpu_read(hv_24x7_txn_flags);
+ WARN_ON_ONCE(!txn_flags);
+
+ ret = 0;
+ if (txn_flags & ~PERF_PMU_TXN_READ)
+ goto out;
+
+ ret = __this_cpu_read(hv_24x7_txn_err);
+ if (ret)
+ goto out;
+
+ request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
+ result_buffer = (void *)get_cpu_var(hv_24x7_resb);
+
+ ret = make_24x7_request(request_buffer, result_buffer);
+ if (ret)
+ goto put_reqb;
+
+ h24x7hw = &get_cpu_var(hv_24x7_hw);
+
+ /* Go through results in the result buffer to update event counts. */
+ for (i = 0, res = result_buffer->results;
+ i < result_buffer->num_results; i++, res = next_res) {
+ struct perf_event *event = h24x7hw->events[res->result_ix];
+
+ ret = get_count_from_result(event, result_buffer, res, &count,
+ &next_res);
+ if (ret)
+ break;
+
+ update_event_count(event, count);
+ }
+
+ put_cpu_var(hv_24x7_hw);
+
+put_reqb:
+ put_cpu_var(hv_24x7_resb);
+ put_cpu_var(hv_24x7_reqb);
+out:
+ reset_txn();
+ return ret;
+}
+
+/*
+ * 24x7 counters only support READ transactions. They are always counting
+ * and dont need/support ADD transactions. However, regardless of type
+ * of transaction, all we need to do is cleanup, so we don't have to check
+ * the type of transaction.
+ */
+static void h_24x7_event_cancel_txn(struct pmu *pmu)
+{
+ WARN_ON_ONCE(!__this_cpu_read(hv_24x7_txn_flags));
+ reset_txn();
}
static struct pmu h_24x7_pmu = {
@@ -486,14 +1645,57 @@ static struct pmu h_24x7_pmu = {
.del = h_24x7_event_stop,
.start = h_24x7_event_start,
.stop = h_24x7_event_stop,
- .read = h_24x7_event_update,
- .event_idx = h_24x7_event_idx,
+ .read = h_24x7_event_read,
+ .start_txn = h_24x7_event_start_txn,
+ .commit_txn = h_24x7_event_commit_txn,
+ .cancel_txn = h_24x7_event_cancel_txn,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
};
+static int ppc_hv_24x7_cpu_online(unsigned int cpu)
+{
+ if (cpumask_empty(&hv_24x7_cpumask))
+ cpumask_set_cpu(cpu, &hv_24x7_cpumask);
+
+ return 0;
+}
+
+static int ppc_hv_24x7_cpu_offline(unsigned int cpu)
+{
+ int target;
+
+ /* Check if exiting cpu is used for collecting 24x7 events */
+ if (!cpumask_test_and_clear_cpu(cpu, &hv_24x7_cpumask))
+ return 0;
+
+ /* Find a new cpu to collect 24x7 events */
+ target = cpumask_last(cpu_active_mask);
+
+ if (target < 0 || target >= nr_cpu_ids) {
+ pr_err("hv_24x7: CPU hotplug init failed\n");
+ return -1;
+ }
+
+ /* Migrate 24x7 events to the new target */
+ cpumask_set_cpu(target, &hv_24x7_cpumask);
+ perf_pmu_migrate_context(&h_24x7_pmu, cpu, target);
+
+ return 0;
+}
+
+static int hv_24x7_cpu_hotplug_init(void)
+{
+ return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_HV_24x7_ONLINE,
+ "perf/powerpc/hv_24x7:online",
+ ppc_hv_24x7_cpu_online,
+ ppc_hv_24x7_cpu_offline);
+}
+
static int hv_24x7_init(void)
{
int r;
unsigned long hret;
+ unsigned int pvr = mfspr(SPRN_PVR);
struct hv_perf_caps caps;
if (!firmware_has_feature(FW_FEATURE_LPAR)) {
@@ -501,6 +1703,18 @@ static int hv_24x7_init(void)
return -ENODEV;
}
+ /* POWER8 only supports v1, while POWER9 only supports v2. */
+ if (PVR_VER(pvr) == PVR_POWER8 || PVR_VER(pvr) == PVR_POWER8E ||
+ PVR_VER(pvr) == PVR_POWER8NVL)
+ interface_version = 1;
+ else {
+ interface_version = 2;
+
+ /* SMT8 in POWER9 needs to aggregate result elements. */
+ if (threads_per_core == 8)
+ aggregate_result_elements = true;
+ }
+
hret = hv_perf_caps_get(&caps);
if (hret) {
pr_debug("could not obtain capabilities, not enabling, rc=%ld\n",
@@ -515,10 +1729,24 @@ static int hv_24x7_init(void)
/* sampling not supported */
h_24x7_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+ r = create_events_from_catalog(&event_group.attrs,
+ &event_desc_group.attrs,
+ &event_long_desc_group.attrs);
+
+ if (r)
+ return r;
+
+ /* init cpuhotplug */
+ r = hv_24x7_cpu_hotplug_init();
+ if (r)
+ return r;
+
r = perf_pmu_register(&h_24x7_pmu, h_24x7_pmu.name, -1);
if (r)
return r;
+ read_24x7_sys_info();
+
return 0;
}
diff --git a/arch/powerpc/perf/hv-24x7.h b/arch/powerpc/perf/hv-24x7.h
index 720ebce4b435..ae4ae4813e16 100644
--- a/arch/powerpc/perf/hv-24x7.h
+++ b/arch/powerpc/perf/hv-24x7.h
@@ -1,16 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef LINUX_POWERPC_PERF_HV_24X7_H_
#define LINUX_POWERPC_PERF_HV_24X7_H_
#include <linux/types.h>
+enum hv_perf_domains {
+#define DOMAIN(n, v, x, c) HV_PERF_DOMAIN_##n = v,
+#include "hv-24x7-domains.h"
+#undef DOMAIN
+ HV_PERF_DOMAIN_MAX,
+};
+
+#define H24x7_REQUEST_SIZE(iface_version) (iface_version == 1 ? 16 : 32)
+
struct hv_24x7_request {
/* PHYSICAL domains require enabling via phyp/hmc. */
-#define HV_24X7_PERF_DOMAIN_PHYSICAL_CHIP 0x01
-#define HV_24X7_PERF_DOMAIN_PHYSICAL_CORE 0x02
-#define HV_24X7_PERF_DOMAIN_VIRTUAL_PROCESSOR_HOME_CORE 0x03
-#define HV_24X7_PERF_DOMAIN_VIRTUAL_PROCESSOR_HOME_CHIP 0x04
-#define HV_24X7_PERF_DOMAIN_VIRTUAL_PROCESSOR_HOME_NODE 0x05
-#define HV_24X7_PERF_DOMAIN_VIRTUAL_PROCESSOR_REMOTE_NODE 0x06
__u8 performance_domain;
__u8 reserved[0x1];
@@ -41,19 +45,27 @@ struct hv_24x7_request {
/* chip, core, or virtual processor based on @performance_domain */
__be16 starting_ix;
__be16 max_ix;
+
+ /* The following fields were added in v2 of the 24x7 interface. */
+
+ __u8 starting_thread_group_ix;
+
+ /* -1 means all thread groups starting at @starting_thread_group_ix */
+ __u8 max_num_thread_groups;
+
+ __u8 reserved2[0xE];
} __packed;
struct hv_24x7_request_buffer {
/* 0 - ? */
/* 1 - ? */
-#define HV_24X7_IF_VERSION_CURRENT 0x01
__u8 interface_version;
__u8 num_requests;
__u8 reserved[0xE];
struct hv_24x7_request requests[];
} __packed;
-struct hv_24x7_result_element {
+struct hv_24x7_result_element_v1 {
__be16 lpar_ix;
/*
@@ -65,11 +77,39 @@ struct hv_24x7_result_element {
/* -1 if @performance_domain does not refer to a virtual processor */
__be32 lpar_cfg_instance_id;
- /* size = @result_element_data_size of cointaining result. */
- __u8 element_data[];
+ /* size = @result_element_data_size of containing result. */
+ __u64 element_data[];
+} __packed;
+
+/*
+ * We need a separate struct for v2 because the offset of @element_data changed
+ * between versions.
+ */
+struct hv_24x7_result_element_v2 {
+ __be16 lpar_ix;
+
+ /*
+ * represents the core, chip, or virtual processor based on the
+ * request's @performance_domain
+ */
+ __be16 domain_ix;
+
+ /* -1 if @performance_domain does not refer to a virtual processor */
+ __be32 lpar_cfg_instance_id;
+
+ __u8 thread_group_ix;
+
+ __u8 reserved[7];
+
+ /* size = @result_element_data_size of containing result. */
+ __u64 element_data[];
} __packed;
struct hv_24x7_result {
+ /*
+ * The index of the 24x7 Request Structure in the 24x7 Request Buffer
+ * used to request this result.
+ */
__u8 result_ix;
/*
@@ -80,14 +120,25 @@ struct hv_24x7_result {
__u8 results_complete;
__be16 num_elements_returned;
- /* This is a copy of @data_size from the coresponding hv_24x7_request */
+ /*
+ * This is a copy of @data_size from the corresponding hv_24x7_request
+ *
+ * Warning: to obtain the size of each element in @elements you have
+ * to add the size of the other members of the result_element struct.
+ */
__be16 result_element_data_size;
__u8 reserved[0x2];
- /* WARNING: only valid for first result element due to variable sizes
- * of result elements */
- /* struct hv_24x7_result_element[@num_elements_returned] */
- struct hv_24x7_result_element elements[];
+ /*
+ * Either
+ * struct hv_24x7_result_element_v1[@num_elements_returned]
+ * or
+ * struct hv_24x7_result_element_v2[@num_elements_returned]
+ *
+ * depending on the interface_version field of the
+ * struct hv_24x7_data_result_buffer containing this result.
+ */
+ char elements[];
} __packed;
struct hv_24x7_data_result_buffer {
diff --git a/arch/powerpc/perf/hv-common.c b/arch/powerpc/perf/hv-common.c
index 47e02b366f58..0370518edd20 100644
--- a/arch/powerpc/perf/hv-common.c
+++ b/arch/powerpc/perf/hv-common.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <asm/io.h>
#include <asm/hvcall.h>
@@ -9,13 +10,13 @@ unsigned long hv_perf_caps_get(struct hv_perf_caps *caps)
unsigned long r;
struct p {
struct hv_get_perf_counter_info_params params;
- struct cv_system_performance_capabilities caps;
+ struct hv_gpci_system_performance_capabilities caps;
} __packed __aligned(sizeof(uint64_t));
struct p arg = {
.params = {
.counter_request = cpu_to_be32(
- CIR_SYSTEM_PERFORMANCE_CAPABILITIES),
+ HV_GPCI_system_performance_capabilities),
.starting_index = cpu_to_be32(-1),
.counter_info_version_in = 0,
}
@@ -31,9 +32,9 @@ unsigned long hv_perf_caps_get(struct hv_perf_caps *caps)
caps->version = arg.params.counter_info_version_out;
caps->collect_privileged = !!arg.caps.perf_collect_privileged;
- caps->ga = !!(arg.caps.capability_mask & CV_CM_GA);
- caps->expanded = !!(arg.caps.capability_mask & CV_CM_EXPANDED);
- caps->lab = !!(arg.caps.capability_mask & CV_CM_LAB);
+ caps->ga = !!(arg.caps.capability_mask & HV_GPCI_CM_GA);
+ caps->expanded = !!(arg.caps.capability_mask & HV_GPCI_CM_EXPANDED);
+ caps->lab = !!(arg.caps.capability_mask & HV_GPCI_CM_LAB);
return r;
}
diff --git a/arch/powerpc/perf/hv-common.h b/arch/powerpc/perf/hv-common.h
index 5d79cecbd73d..2cce17bc321c 100644
--- a/arch/powerpc/perf/hv-common.h
+++ b/arch/powerpc/perf/hv-common.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef LINUX_POWERPC_PERF_HV_COMMON_H_
#define LINUX_POWERPC_PERF_HV_COMMON_H_
@@ -20,6 +21,16 @@ unsigned long hv_perf_caps_get(struct hv_perf_caps *caps);
PMU_FORMAT_ATTR(name, #attr_var ":" #bit_start "-" #bit_end); \
EVENT_DEFINE_RANGE(name, attr_var, bit_start, bit_end)
+/*
+ * The EVENT_DEFINE_RANGE_FORMAT() macro above includes helper functions
+ * for the fields (eg: event_get_starting_index()). For some fields we
+ * need the bit-range definition, but no the helper functions. Define a
+ * lite version of the above macro without the helpers and silence
+ * compiler warnings unused static functions.
+ */
+#define EVENT_DEFINE_RANGE_FORMAT_LITE(name, attr_var, bit_start, bit_end) \
+PMU_FORMAT_ATTR(name, #attr_var ":" #bit_start "-" #bit_end);
+
#define EVENT_DEFINE_RANGE(name, attr_var, bit_start, bit_end) \
static u64 event_get_##name##_max(void) \
{ \
diff --git a/arch/powerpc/perf/hv-gpci-requests.h b/arch/powerpc/perf/hv-gpci-requests.h
new file mode 100644
index 000000000000..5e86371a20c7
--- /dev/null
+++ b/arch/powerpc/perf/hv-gpci-requests.h
@@ -0,0 +1,266 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include "req-gen/_begin.h"
+
+/*
+ * Based on the document "getPerfCountInfo v1.07"
+ */
+
+/*
+ * #define REQUEST_NAME counter_request_name
+ * #define REQUEST_NUM r_num
+ * #define REQUEST_IDX_KIND starting_index_kind
+ * #include I(REQUEST_BEGIN)
+ * REQUEST(
+ * __field(...)
+ * __field(...)
+ * __array(...)
+ * __count(...)
+ * )
+ * #include I(REQUEST_END)
+ *
+ * - starting_index_kind is one of the following, depending on the event:
+ *
+ * hw_chip_id: hardware chip id or -1 for current hw chip
+ * partition_id
+ * sibling_part_id,
+ * phys_processor_idx:
+ * 0xffffffffffffffff: or -1, which means it is irrelavant for the event
+ *
+ * __count(offset, bytes, name):
+ * a counter that should be exposed via perf
+ * __field(offset, bytes, name)
+ * a normal field
+ * __array(offset, bytes, name)
+ * an array of bytes
+ *
+ *
+ * @bytes for __count, and __field _must_ be a numeral token
+ * in decimal, not an expression and not in hex.
+ *
+ *
+ * TODO:
+ * - expose secondary index (if any counter ever uses it, only 0xA0
+ * appears to use it right now, and it doesn't have any counters)
+ * - embed versioning info
+ * - include counter descriptions
+ */
+#define REQUEST_NAME dispatch_timebase_by_processor
+#define REQUEST_NUM 0x10
+#define REQUEST_IDX_KIND "phys_processor_idx=?"
+#include I(REQUEST_BEGIN)
+REQUEST(__count(0, 8, processor_time_in_timebase_cycles)
+ __field(0x8, 4, hw_processor_id)
+ __field(0xC, 2, owning_part_id)
+ __field(0xE, 1, processor_state)
+ __field(0xF, 1, version)
+ __field(0x10, 4, hw_chip_id)
+ __field(0x14, 4, phys_module_id)
+ __field(0x18, 4, primary_affinity_domain_idx)
+ __field(0x1C, 4, secondary_affinity_domain_idx)
+ __field(0x20, 4, processor_version)
+ __field(0x24, 2, logical_processor_idx)
+ __field(0x26, 2, reserved)
+ __field(0x28, 4, processor_id_register)
+ __field(0x2C, 4, phys_processor_idx)
+)
+#include I(REQUEST_END)
+
+#define REQUEST_NAME entitled_capped_uncapped_donated_idle_timebase_by_partition
+#define REQUEST_NUM 0x20
+#define REQUEST_IDX_KIND "sibling_part_id=?"
+#include I(REQUEST_BEGIN)
+REQUEST(__field(0, 8, partition_id)
+ __count(0x8, 8, entitled_cycles)
+ __count(0x10, 8, consumed_capped_cycles)
+ __count(0x18, 8, consumed_uncapped_cycles)
+ __count(0x20, 8, cycles_donated)
+ __count(0x28, 8, purr_idle_cycles)
+)
+#include I(REQUEST_END)
+
+#ifdef ENABLE_EVENTS_COUNTERINFO_V6
+/*
+ * Not available for counter_info_version >= 0x8, use
+ * run_instruction_cycles_by_partition(0x100) instead.
+ */
+#define REQUEST_NAME run_instructions_run_cycles_by_partition
+#define REQUEST_NUM 0x30
+#define REQUEST_IDX_KIND "sibling_part_id=?"
+#include I(REQUEST_BEGIN)
+REQUEST(__field(0, 8, partition_id)
+ __count(0x8, 8, instructions_completed)
+ __count(0x10, 8, cycles)
+)
+#include I(REQUEST_END)
+#endif
+
+#define REQUEST_NAME system_performance_capabilities
+#define REQUEST_NUM 0x40
+#define REQUEST_IDX_KIND "starting_index=0xffffffff"
+#include I(REQUEST_BEGIN)
+REQUEST(__field(0, 1, perf_collect_privileged)
+ __field(0x1, 1, capability_mask)
+ __array(0x2, 0xE, reserved)
+)
+#include I(REQUEST_END)
+
+#ifdef ENABLE_EVENTS_COUNTERINFO_V6
+#define REQUEST_NAME processor_bus_utilization_abc_links
+#define REQUEST_NUM 0x50
+#define REQUEST_IDX_KIND "hw_chip_id=?"
+#include I(REQUEST_BEGIN)
+REQUEST(__field(0, 4, hw_chip_id)
+ __array(0x4, 0xC, reserved1)
+ __count(0x10, 8, total_link_cycles)
+ __count(0x18, 8, idle_cycles_for_a_link)
+ __count(0x20, 8, idle_cycles_for_b_link)
+ __count(0x28, 8, idle_cycles_for_c_link)
+ __array(0x30, 0x20, reserved2)
+)
+#include I(REQUEST_END)
+
+#define REQUEST_NAME processor_bus_utilization_wxyz_links
+#define REQUEST_NUM 0x60
+#define REQUEST_IDX_KIND "hw_chip_id=?"
+#include I(REQUEST_BEGIN)
+REQUEST(__field(0, 4, hw_chip_id)
+ __array(0x4, 0xC, reserved1)
+ __count(0x10, 8, total_link_cycles)
+ __count(0x18, 8, idle_cycles_for_w_link)
+ __count(0x20, 8, idle_cycles_for_x_link)
+ __count(0x28, 8, idle_cycles_for_y_link)
+ __count(0x30, 8, idle_cycles_for_z_link)
+ __array(0x38, 0x28, reserved2)
+)
+#include I(REQUEST_END)
+
+#define REQUEST_NAME processor_bus_utilization_gx_links
+#define REQUEST_NUM 0x70
+#define REQUEST_IDX_KIND "hw_chip_id=?"
+#include I(REQUEST_BEGIN)
+REQUEST(__field(0, 4, hw_chip_id)
+ __array(0x4, 0xC, reserved1)
+ __count(0x10, 8, gx0_in_address_cycles)
+ __count(0x18, 8, gx0_in_data_cycles)
+ __count(0x20, 8, gx0_in_retries)
+ __count(0x28, 8, gx0_in_bus_cycles)
+ __count(0x30, 8, gx0_in_cycles_total)
+ __count(0x38, 8, gx0_out_address_cycles)
+ __count(0x40, 8, gx0_out_data_cycles)
+ __count(0x48, 8, gx0_out_retries)
+ __count(0x50, 8, gx0_out_bus_cycles)
+ __count(0x58, 8, gx0_out_cycles_total)
+ __count(0x60, 8, gx1_in_address_cycles)
+ __count(0x68, 8, gx1_in_data_cycles)
+ __count(0x70, 8, gx1_in_retries)
+ __count(0x78, 8, gx1_in_bus_cycles)
+ __count(0x80, 8, gx1_in_cycles_total)
+ __count(0x88, 8, gx1_out_address_cycles)
+ __count(0x90, 8, gx1_out_data_cycles)
+ __count(0x98, 8, gx1_out_retries)
+ __count(0xA0, 8, gx1_out_bus_cycles)
+ __count(0xA8, 8, gx1_out_cycles_total)
+)
+#include I(REQUEST_END)
+
+#define REQUEST_NAME processor_bus_utilization_mc_links
+#define REQUEST_NUM 0x80
+#define REQUEST_IDX_KIND "hw_chip_id=?"
+#include I(REQUEST_BEGIN)
+REQUEST(__field(0, 4, hw_chip_id)
+ __array(0x4, 0xC, reserved1)
+ __count(0x10, 8, mc0_frames)
+ __count(0x18, 8, mc0_reads)
+ __count(0x20, 8, mc0_write)
+ __count(0x28, 8, mc0_total_cycles)
+ __count(0x30, 8, mc1_frames)
+ __count(0x38, 8, mc1_reads)
+ __count(0x40, 8, mc1_writes)
+ __count(0x48, 8, mc1_total_cycles)
+)
+#include I(REQUEST_END)
+
+/* Processor_config (0x90) skipped, no counters */
+/* Current_processor_frequency (0x91) skipped, no counters */
+
+#define REQUEST_NAME processor_core_utilization
+#define REQUEST_NUM 0x94
+#define REQUEST_IDX_KIND "phys_processor_idx=?"
+#include I(REQUEST_BEGIN)
+REQUEST(__field(0, 4, phys_processor_idx)
+ __field(0x4, 4, hw_processor_id)
+ __count(0x8, 8, cycles_across_any_thread)
+ __count(0x10, 8, timebase_at_collection)
+ __count(0x18, 8, purr_cycles)
+ __count(0x20, 8, sum_of_cycles_across_all_threads)
+ __count(0x28, 8, instructions_completed)
+)
+#include I(REQUEST_END)
+#endif
+
+/* Processor_core_power_mode (0x95) skipped, no counters */
+/* Affinity_domain_information_by_virtual_processor (0xA0) skipped,
+ * no counters */
+/* Affinity_domain_information_by_domain (0xB0) skipped, no counters */
+/* Affinity_domain_information_by_partition (0xB1) skipped, no counters */
+/* Physical_memory_info (0xC0) skipped, no counters */
+/* Processor_bus_topology (0xD0) skipped, no counters */
+
+#define REQUEST_NAME partition_hypervisor_queuing_times
+#define REQUEST_NUM 0xE0
+#define REQUEST_IDX_KIND "partition_id=?"
+#include I(REQUEST_BEGIN)
+REQUEST(__field(0, 2, partition_id)
+ __array(0x2, 6, reserved1)
+ __count(0x8, 8, time_waiting_for_entitlement)
+ __count(0x10, 8, times_waited_for_entitlement)
+ __count(0x18, 8, time_waiting_for_phys_processor)
+ __count(0x20, 8, times_waited_for_phys_processor)
+ __count(0x28, 8, dispatches_on_home_core)
+ __count(0x30, 8, dispatches_on_home_primary_affinity_domain)
+ __count(0x38, 8, dispatches_on_home_secondary_affinity_domain)
+ __count(0x40, 8, dispatches_off_home_secondary_affinity_domain)
+ __count(0x48, 8, dispatches_on_dedicated_processor_donating_cycles)
+)
+#include I(REQUEST_END)
+
+#define REQUEST_NAME system_hypervisor_times
+#define REQUEST_NUM 0xF0
+#define REQUEST_IDX_KIND "starting_index=0xffffffff"
+#include I(REQUEST_BEGIN)
+REQUEST(__count(0, 8, time_spent_to_dispatch_virtual_processors)
+ __count(0x8, 8, time_spent_processing_virtual_processor_timers)
+ __count(0x10, 8, time_spent_managing_partitions_over_entitlement)
+ __count(0x18, 8, time_spent_on_system_management)
+)
+#include I(REQUEST_END)
+
+#define REQUEST_NAME system_tlbie_count_and_time
+#define REQUEST_NUM 0xF4
+#define REQUEST_IDX_KIND "starting_index=0xffffffff"
+#include I(REQUEST_BEGIN)
+REQUEST(__count(0, 8, tlbie_instructions_issued)
+ /*
+ * FIXME: The spec says the offset here is 0x10, which I suspect
+ * is wrong.
+ */
+ __count(0x8, 8, time_spent_issuing_tlbies)
+)
+#include I(REQUEST_END)
+
+#define REQUEST_NAME partition_instruction_count_and_time
+#define REQUEST_NUM 0x100
+#define REQUEST_IDX_KIND "partition_id=?"
+#include I(REQUEST_BEGIN)
+REQUEST(__field(0, 2, partition_id)
+ __array(0x2, 0x6, reserved1)
+ __count(0x8, 8, instructions_performed)
+ __count(0x10, 8, time_collected)
+)
+#include I(REQUEST_END)
+
+/* set_mmcrh (0x80001000) skipped, no counters */
+/* retrieve_hpmcx (0x80002000) skipped, no counters */
+
+#include "req-gen/_end.h"
diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c
index 15fc76c93022..241551d1282f 100644
--- a/arch/powerpc/perf/hv-gpci.c
+++ b/arch/powerpc/perf/hv-gpci.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Hypervisor supplied "gpci" ("get performance counter info") performance
* counter support
*
* Author: Cody P Schafer <cody@linux.vnet.ibm.com>
* Copyright 2014 IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#define pr_fmt(fmt) "hv-gpci: " fmt
@@ -31,7 +27,18 @@
/* u32 */
EVENT_DEFINE_RANGE_FORMAT(request, config, 0, 31);
/* u32 */
+/*
+ * Note that starting_index, phys_processor_idx, sibling_part_id,
+ * hw_chip_id, partition_id all refer to the same bit range. They
+ * are basically aliases for the starting_index. The specific alias
+ * used depends on the event. See REQUEST_IDX_KIND in hv-gpci-requests.h
+ */
EVENT_DEFINE_RANGE_FORMAT(starting_index, config, 32, 63);
+EVENT_DEFINE_RANGE_FORMAT_LITE(phys_processor_idx, config, 32, 63);
+EVENT_DEFINE_RANGE_FORMAT_LITE(sibling_part_id, config, 32, 63);
+EVENT_DEFINE_RANGE_FORMAT_LITE(hw_chip_id, config, 32, 63);
+EVENT_DEFINE_RANGE_FORMAT_LITE(partition_id, config, 32, 63);
+
/* u16 */
EVENT_DEFINE_RANGE_FORMAT(secondary_index, config1, 0, 15);
/* u8 */
@@ -41,9 +48,15 @@ EVENT_DEFINE_RANGE_FORMAT(length, config1, 24, 31);
/* u32, byte offset */
EVENT_DEFINE_RANGE_FORMAT(offset, config1, 32, 63);
+static cpumask_t hv_gpci_cpumask;
+
static struct attribute *format_attrs[] = {
&format_attr_request.attr,
&format_attr_starting_index.attr,
+ &format_attr_phys_processor_idx.attr,
+ &format_attr_sibling_part_id.attr,
+ &format_attr_hw_chip_id.attr,
+ &format_attr_partition_id.attr,
&format_attr_secondary_index.attr,
&format_attr_counter_info_version.attr,
@@ -52,11 +65,16 @@ static struct attribute *format_attrs[] = {
NULL,
};
-static struct attribute_group format_group = {
+static const struct attribute_group format_group = {
.name = "format",
.attrs = format_attrs,
};
+static struct attribute_group event_group = {
+ .name = "events",
+ /* .attrs is set in init */
+};
+
#define HV_CAPS_ATTR(_name, _format) \
static ssize_t _name##_show(struct device *dev, \
struct device_attribute *attr, \
@@ -78,7 +96,523 @@ static ssize_t kernel_version_show(struct device *dev,
return sprintf(page, "0x%x\n", COUNTER_INFO_VERSION_CURRENT);
}
+static ssize_t cpumask_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return cpumap_print_to_pagebuf(true, buf, &hv_gpci_cpumask);
+}
+
+/* Interface attribute array index to store system information */
+#define INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR 6
+#define INTERFACE_PROCESSOR_CONFIG_ATTR 7
+#define INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR 8
+#define INTERFACE_AFFINITY_DOMAIN_VIA_DOM_ATTR 9
+#define INTERFACE_AFFINITY_DOMAIN_VIA_PAR_ATTR 10
+#define INTERFACE_NULL_ATTR 11
+
+/* Counter request value to retrieve system information */
+enum {
+ PROCESSOR_BUS_TOPOLOGY,
+ PROCESSOR_CONFIG,
+ AFFINITY_DOMAIN_VIA_VP, /* affinity domain via virtual processor */
+ AFFINITY_DOMAIN_VIA_DOM, /* affinity domain via domain */
+ AFFINITY_DOMAIN_VIA_PAR, /* affinity domain via partition */
+};
+
+static int sysinfo_counter_request[] = {
+ [PROCESSOR_BUS_TOPOLOGY] = 0xD0,
+ [PROCESSOR_CONFIG] = 0x90,
+ [AFFINITY_DOMAIN_VIA_VP] = 0xA0,
+ [AFFINITY_DOMAIN_VIA_DOM] = 0xB0,
+ [AFFINITY_DOMAIN_VIA_PAR] = 0xB1,
+};
+
+static DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) __aligned(sizeof(uint64_t));
+
+static unsigned long systeminfo_gpci_request(u32 req, u32 starting_index,
+ u16 secondary_index, char *buf,
+ size_t *n, struct hv_gpci_request_buffer *arg)
+{
+ unsigned long ret;
+ size_t i, j;
+
+ arg->params.counter_request = cpu_to_be32(req);
+ arg->params.starting_index = cpu_to_be32(starting_index);
+ arg->params.secondary_index = cpu_to_be16(secondary_index);
+
+ ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
+ virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
+
+ /*
+ * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL',
+ * which means that the current buffer size cannot accommodate
+ * all the information and a partial buffer returned.
+ * hcall fails incase of ret value other than H_SUCCESS or H_PARAMETER.
+ *
+ * ret value as H_AUTHORITY implies that partition is not permitted to retrieve
+ * performance information, and required to set
+ * "Enable Performance Information Collection" option.
+ */
+ if (ret == H_AUTHORITY)
+ return -EPERM;
+
+ /*
+ * hcall can fail with other possible ret value like H_PRIVILEGE/H_HARDWARE
+ * because of invalid buffer-length/address or due to some hardware
+ * error.
+ */
+ if (ret && (ret != H_PARAMETER))
+ return -EIO;
+
+ /*
+ * hcall H_GET_PERF_COUNTER_INFO populates the 'returned_values'
+ * to show the total number of counter_value array elements
+ * returned via hcall.
+ * hcall also populates 'cv_element_size' corresponds to individual
+ * counter_value array element size. Below loop go through all
+ * counter_value array elements as per their size and add it to
+ * the output buffer.
+ */
+ for (i = 0; i < be16_to_cpu(arg->params.returned_values); i++) {
+ j = i * be16_to_cpu(arg->params.cv_element_size);
+
+ for (; j < (i + 1) * be16_to_cpu(arg->params.cv_element_size); j++)
+ *n += sprintf(buf + *n, "%02x", (u8)arg->bytes[j]);
+ *n += sprintf(buf + *n, "\n");
+ }
+
+ if (*n >= PAGE_SIZE) {
+ pr_info("System information exceeds PAGE_SIZE\n");
+ return -EFBIG;
+ }
+
+ return ret;
+}
+
+static ssize_t processor_bus_topology_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct hv_gpci_request_buffer *arg;
+ unsigned long ret;
+ size_t n = 0;
+
+ arg = (void *)get_cpu_var(hv_gpci_reqb);
+ memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+ /*
+ * Pass the counter request value 0xD0 corresponds to request
+ * type 'Processor_bus_topology', to retrieve
+ * the system topology information.
+ * starting_index value implies the starting hardware
+ * chip id.
+ */
+ ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_BUS_TOPOLOGY],
+ 0, 0, buf, &n, arg);
+
+ if (!ret)
+ return n;
+
+ if (ret != H_PARAMETER)
+ goto out;
+
+ /*
+ * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which
+ * implies that buffer can't accommodate all information, and a partial buffer
+ * returned. To handle that, we need to make subsequent requests
+ * with next starting index to retrieve additional (missing) data.
+ * Below loop do subsequent hcalls with next starting index and add it
+ * to buffer util we get all the information.
+ */
+ while (ret == H_PARAMETER) {
+ int returned_values = be16_to_cpu(arg->params.returned_values);
+ int elementsize = be16_to_cpu(arg->params.cv_element_size);
+ int last_element = (returned_values - 1) * elementsize;
+
+ /*
+ * Since the starting index value is part of counter_value
+ * buffer elements, use the starting index value in the last
+ * element and add 1 to make subsequent hcalls.
+ */
+ u32 starting_index = arg->bytes[last_element + 3] +
+ (arg->bytes[last_element + 2] << 8) +
+ (arg->bytes[last_element + 1] << 16) +
+ (arg->bytes[last_element] << 24) + 1;
+
+ memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+ ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_BUS_TOPOLOGY],
+ starting_index, 0, buf, &n, arg);
+
+ if (!ret)
+ return n;
+
+ if (ret != H_PARAMETER)
+ goto out;
+ }
+
+ return n;
+
+out:
+ put_cpu_var(hv_gpci_reqb);
+ return ret;
+}
+
+static ssize_t processor_config_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct hv_gpci_request_buffer *arg;
+ unsigned long ret;
+ size_t n = 0;
+
+ arg = (void *)get_cpu_var(hv_gpci_reqb);
+ memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+ /*
+ * Pass the counter request value 0x90 corresponds to request
+ * type 'Processor_config', to retrieve
+ * the system processor information.
+ * starting_index value implies the starting hardware
+ * processor index.
+ */
+ ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_CONFIG],
+ 0, 0, buf, &n, arg);
+
+ if (!ret)
+ return n;
+
+ if (ret != H_PARAMETER)
+ goto out;
+
+ /*
+ * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which
+ * implies that buffer can't accommodate all information, and a partial buffer
+ * returned. To handle that, we need to take subsequent requests
+ * with next starting index to retrieve additional (missing) data.
+ * Below loop do subsequent hcalls with next starting index and add it
+ * to buffer util we get all the information.
+ */
+ while (ret == H_PARAMETER) {
+ int returned_values = be16_to_cpu(arg->params.returned_values);
+ int elementsize = be16_to_cpu(arg->params.cv_element_size);
+ int last_element = (returned_values - 1) * elementsize;
+
+ /*
+ * Since the starting index is part of counter_value
+ * buffer elements, use the starting index value in the last
+ * element and add 1 to subsequent hcalls.
+ */
+ u32 starting_index = arg->bytes[last_element + 3] +
+ (arg->bytes[last_element + 2] << 8) +
+ (arg->bytes[last_element + 1] << 16) +
+ (arg->bytes[last_element] << 24) + 1;
+
+ memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+ ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_CONFIG],
+ starting_index, 0, buf, &n, arg);
+
+ if (!ret)
+ return n;
+
+ if (ret != H_PARAMETER)
+ goto out;
+ }
+
+ return n;
+
+out:
+ put_cpu_var(hv_gpci_reqb);
+ return ret;
+}
+
+static ssize_t affinity_domain_via_virtual_processor_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct hv_gpci_request_buffer *arg;
+ unsigned long ret;
+ size_t n = 0;
+
+ arg = (void *)get_cpu_var(hv_gpci_reqb);
+ memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+ /*
+ * Pass the counter request 0xA0 corresponds to request
+ * type 'Affinity_domain_information_by_virutal_processor',
+ * to retrieve the system affinity domain information.
+ * starting_index value refers to the starting hardware
+ * processor index.
+ */
+ ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_VP],
+ 0, 0, buf, &n, arg);
+
+ if (!ret)
+ return n;
+
+ if (ret != H_PARAMETER)
+ goto out;
+
+ /*
+ * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which
+ * implies that buffer can't accommodate all information, and a partial buffer
+ * returned. To handle that, we need to take subsequent requests
+ * with next secondary index to retrieve additional (missing) data.
+ * Below loop do subsequent hcalls with next secondary index and add it
+ * to buffer util we get all the information.
+ */
+ while (ret == H_PARAMETER) {
+ int returned_values = be16_to_cpu(arg->params.returned_values);
+ int elementsize = be16_to_cpu(arg->params.cv_element_size);
+ int last_element = (returned_values - 1) * elementsize;
+
+ /*
+ * Since the starting index and secondary index type is part of the
+ * counter_value buffer elements, use the starting index value in the
+ * last array element as subsequent starting index, and use secondary index
+ * value in the last array element plus 1 as subsequent secondary index.
+ * For counter request '0xA0', starting index points to partition id
+ * and secondary index points to corresponding virtual processor index.
+ */
+ u32 starting_index = arg->bytes[last_element + 1] + (arg->bytes[last_element] << 8);
+ u16 secondary_index = arg->bytes[last_element + 3] +
+ (arg->bytes[last_element + 2] << 8) + 1;
+
+ memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+ ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_VP],
+ starting_index, secondary_index, buf, &n, arg);
+
+ if (!ret)
+ return n;
+
+ if (ret != H_PARAMETER)
+ goto out;
+ }
+
+ return n;
+
+out:
+ put_cpu_var(hv_gpci_reqb);
+ return ret;
+}
+
+static ssize_t affinity_domain_via_domain_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct hv_gpci_request_buffer *arg;
+ unsigned long ret;
+ size_t n = 0;
+
+ arg = (void *)get_cpu_var(hv_gpci_reqb);
+ memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+ /*
+ * Pass the counter request 0xB0 corresponds to request
+ * type 'Affinity_domain_information_by_domain',
+ * to retrieve the system affinity domain information.
+ * starting_index value refers to the starting hardware
+ * processor index.
+ */
+ ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_DOM],
+ 0, 0, buf, &n, arg);
+
+ if (!ret)
+ return n;
+
+ if (ret != H_PARAMETER)
+ goto out;
+
+ /*
+ * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which
+ * implies that buffer can't accommodate all information, and a partial buffer
+ * returned. To handle that, we need to take subsequent requests
+ * with next starting index to retrieve additional (missing) data.
+ * Below loop do subsequent hcalls with next starting index and add it
+ * to buffer util we get all the information.
+ */
+ while (ret == H_PARAMETER) {
+ int returned_values = be16_to_cpu(arg->params.returned_values);
+ int elementsize = be16_to_cpu(arg->params.cv_element_size);
+ int last_element = (returned_values - 1) * elementsize;
+
+ /*
+ * Since the starting index value is part of counter_value
+ * buffer elements, use the starting index value in the last
+ * element and add 1 to make subsequent hcalls.
+ */
+ u32 starting_index = arg->bytes[last_element + 1] +
+ (arg->bytes[last_element] << 8) + 1;
+
+ memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+ ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_DOM],
+ starting_index, 0, buf, &n, arg);
+
+ if (!ret)
+ return n;
+
+ if (ret != H_PARAMETER)
+ goto out;
+ }
+
+ return n;
+
+out:
+ put_cpu_var(hv_gpci_reqb);
+ return ret;
+}
+
+static void affinity_domain_via_partition_result_parse(int returned_values,
+ int element_size, char *buf, size_t *last_element,
+ size_t *n, struct hv_gpci_request_buffer *arg)
+{
+ size_t i = 0, j = 0;
+ size_t k, l, m;
+ uint16_t total_affinity_domain_ele, size_of_each_affinity_domain_ele;
+
+ /*
+ * hcall H_GET_PERF_COUNTER_INFO populates the 'returned_values'
+ * to show the total number of counter_value array elements
+ * returned via hcall.
+ * Unlike other request types, the data structure returned by this
+ * request is variable-size. For this counter request type,
+ * hcall populates 'cv_element_size' corresponds to minimum size of
+ * the structure returned i.e; the size of the structure with no domain
+ * information. Below loop go through all counter_value array
+ * to determine the number and size of each domain array element and
+ * add it to the output buffer.
+ */
+ while (i < returned_values) {
+ k = j;
+ for (; k < j + element_size; k++)
+ *n += sprintf(buf + *n, "%02x", (u8)arg->bytes[k]);
+ *n += sprintf(buf + *n, "\n");
+
+ total_affinity_domain_ele = (u8)arg->bytes[k - 2] << 8 | (u8)arg->bytes[k - 3];
+ size_of_each_affinity_domain_ele = (u8)arg->bytes[k] << 8 | (u8)arg->bytes[k - 1];
+
+ for (l = 0; l < total_affinity_domain_ele; l++) {
+ for (m = 0; m < size_of_each_affinity_domain_ele; m++) {
+ *n += sprintf(buf + *n, "%02x", (u8)arg->bytes[k]);
+ k++;
+ }
+ *n += sprintf(buf + *n, "\n");
+ }
+
+ *n += sprintf(buf + *n, "\n");
+ i++;
+ j = k;
+ }
+
+ *last_element = k;
+}
+
+static ssize_t affinity_domain_via_partition_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct hv_gpci_request_buffer *arg;
+ unsigned long ret;
+ size_t n = 0;
+ size_t last_element = 0;
+ u32 starting_index;
+
+ arg = (void *)get_cpu_var(hv_gpci_reqb);
+ memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+ /*
+ * Pass the counter request value 0xB1 corresponds to counter request
+ * type 'Affinity_domain_information_by_partition',
+ * to retrieve the system affinity domain by partition information.
+ * starting_index value refers to the starting hardware
+ * processor index.
+ */
+ arg->params.counter_request = cpu_to_be32(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_PAR]);
+ arg->params.starting_index = cpu_to_be32(0);
+
+ ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
+ virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
+
+ if (!ret)
+ goto parse_result;
+
+ if (ret && (ret != H_PARAMETER))
+ goto out;
+
+ /*
+ * ret value as 'H_PARAMETER' implies that the current buffer size
+ * can't accommodate all the information, and a partial buffer
+ * returned. To handle that, we need to make subsequent requests
+ * with next starting index to retrieve additional (missing) data.
+ * Below loop do subsequent hcalls with next starting index and add it
+ * to buffer util we get all the information.
+ */
+ while (ret == H_PARAMETER) {
+ affinity_domain_via_partition_result_parse(
+ be16_to_cpu(arg->params.returned_values) - 1,
+ be16_to_cpu(arg->params.cv_element_size), buf,
+ &last_element, &n, arg);
+
+ if (n >= PAGE_SIZE) {
+ put_cpu_var(hv_gpci_reqb);
+ pr_debug("System information exceeds PAGE_SIZE\n");
+ return -EFBIG;
+ }
+
+ /*
+ * Since the starting index value is part of counter_value
+ * buffer elements, use the starting_index value in the last
+ * element and add 1 to make subsequent hcalls.
+ */
+ starting_index = (u8)arg->bytes[last_element] << 8 |
+ (u8)arg->bytes[last_element + 1];
+
+ memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+ arg->params.counter_request = cpu_to_be32(
+ sysinfo_counter_request[AFFINITY_DOMAIN_VIA_PAR]);
+ arg->params.starting_index = cpu_to_be32(starting_index);
+
+ ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
+ virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
+
+ if (ret && (ret != H_PARAMETER))
+ goto out;
+ }
+
+parse_result:
+ affinity_domain_via_partition_result_parse(
+ be16_to_cpu(arg->params.returned_values),
+ be16_to_cpu(arg->params.cv_element_size),
+ buf, &last_element, &n, arg);
+
+ put_cpu_var(hv_gpci_reqb);
+ return n;
+
+out:
+ put_cpu_var(hv_gpci_reqb);
+
+ /*
+ * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL',
+ * which means that the current buffer size cannot accommodate
+ * all the information and a partial buffer returned.
+ * hcall fails incase of ret value other than H_SUCCESS or H_PARAMETER.
+ *
+ * ret value as H_AUTHORITY implies that partition is not permitted to retrieve
+ * performance information, and required to set
+ * "Enable Performance Information Collection" option.
+ */
+ if (ret == H_AUTHORITY)
+ return -EPERM;
+
+ /*
+ * hcall can fail with other possible ret value like H_PRIVILEGE/H_HARDWARE
+ * because of invalid buffer-length/address or due to some hardware
+ * error.
+ */
+ return -EIO;
+}
+
static DEVICE_ATTR_RO(kernel_version);
+static DEVICE_ATTR_RO(cpumask);
+
HV_CAPS_ATTR(version, "0x%x\n");
HV_CAPS_ATTR(ga, "%d\n");
HV_CAPS_ATTR(expanded, "%d\n");
@@ -92,23 +626,56 @@ static struct attribute *interface_attrs[] = {
&hv_caps_attr_expanded.attr,
&hv_caps_attr_lab.attr,
&hv_caps_attr_collect_privileged.attr,
+ /*
+ * This NULL is a placeholder for the processor_bus_topology
+ * attribute, set in init function if applicable.
+ */
NULL,
+ /*
+ * This NULL is a placeholder for the processor_config
+ * attribute, set in init function if applicable.
+ */
+ NULL,
+ /*
+ * This NULL is a placeholder for the affinity_domain_via_virtual_processor
+ * attribute, set in init function if applicable.
+ */
+ NULL,
+ /*
+ * This NULL is a placeholder for the affinity_domain_via_domain
+ * attribute, set in init function if applicable.
+ */
+ NULL,
+ /*
+ * This NULL is a placeholder for the affinity_domain_via_partition
+ * attribute, set in init function if applicable.
+ */
+ NULL,
+ NULL,
+};
+
+static struct attribute *cpumask_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL,
+};
+
+static const struct attribute_group cpumask_attr_group = {
+ .attrs = cpumask_attrs,
};
-static struct attribute_group interface_group = {
+static const struct attribute_group interface_group = {
.name = "interface",
.attrs = interface_attrs,
};
static const struct attribute_group *attr_groups[] = {
&format_group,
+ &event_group,
&interface_group,
+ &cpumask_attr_group,
NULL,
};
-#define GPCI_MAX_DATA_BYTES \
- (1024 - sizeof(struct hv_get_perf_counter_info_params))
-
static unsigned long single_gpci_request(u32 req, u32 starting_index,
u16 secondary_index, u8 version_in, u32 offset, u8 length,
u64 *value)
@@ -116,24 +683,35 @@ static unsigned long single_gpci_request(u32 req, u32 starting_index,
unsigned long ret;
size_t i;
u64 count;
+ struct hv_gpci_request_buffer *arg;
- struct {
- struct hv_get_perf_counter_info_params params;
- uint8_t bytes[GPCI_MAX_DATA_BYTES];
- } __packed __aligned(sizeof(uint64_t)) arg = {
- .params = {
- .counter_request = cpu_to_be32(req),
- .starting_index = cpu_to_be32(starting_index),
- .secondary_index = cpu_to_be16(secondary_index),
- .counter_info_version_in = version_in,
- }
- };
+ arg = (void *)get_cpu_var(hv_gpci_reqb);
+ memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+ arg->params.counter_request = cpu_to_be32(req);
+ arg->params.starting_index = cpu_to_be32(starting_index);
+ arg->params.secondary_index = cpu_to_be16(secondary_index);
+ arg->params.counter_info_version_in = version_in;
ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
- virt_to_phys(&arg), sizeof(arg));
+ virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
+
+ /*
+ * ret value as 'H_PARAMETER' with detail_rc as 'GEN_BUF_TOO_SMALL',
+ * specifies that the current buffer size cannot accommodate
+ * all the information and a partial buffer returned.
+ * Since in this function we are only accessing data for a given starting index,
+ * we don't need to accommodate whole data and can get required count by
+ * accessing first entry data.
+ * Hence hcall fails only incase the ret value is other than H_SUCCESS or
+ * H_PARAMETER with detail_rc value as GEN_BUF_TOO_SMALL(0x1B).
+ */
+ if (ret == H_PARAMETER && be32_to_cpu(arg->params.detail_rc) == 0x1B)
+ ret = 0;
+
if (ret) {
pr_devel("hcall failed: 0x%lx\n", ret);
- return ret;
+ goto out;
}
/*
@@ -142,9 +720,11 @@ static unsigned long single_gpci_request(u32 req, u32 starting_index,
*/
count = 0;
for (i = offset; i < offset + length; i++)
- count |= arg.bytes[i] << (i - offset);
+ count |= (u64)(arg->bytes[i]) << ((length - 1 - (i - offset)) * 8);
*value = count;
+out:
+ put_cpu_var(hv_gpci_reqb);
return ret;
}
@@ -193,6 +773,7 @@ static int h_gpci_event_init(struct perf_event *event)
{
u64 count;
u8 length;
+ unsigned long ret;
/* Not our event */
if (event->attr.type != event->pmu->type)
@@ -204,15 +785,6 @@ static int h_gpci_event_init(struct perf_event *event)
return -EINVAL;
}
- /* unsupported modes and filters */
- if (event->attr.exclude_user ||
- event->attr.exclude_kernel ||
- event->attr.exclude_hv ||
- event->attr.exclude_idle ||
- event->attr.exclude_host ||
- event->attr.exclude_guest)
- return -EINVAL;
-
/* no branch sampling */
if (has_branch_stack(event))
return -EOPNOTSUPP;
@@ -224,21 +796,31 @@ static int h_gpci_event_init(struct perf_event *event)
}
/* last byte within the buffer? */
- if ((event_get_offset(event) + length) > GPCI_MAX_DATA_BYTES) {
+ if ((event_get_offset(event) + length) > HGPCI_MAX_DATA_BYTES) {
pr_devel("request outside of buffer: %zu > %zu\n",
(size_t)event_get_offset(event) + length,
- GPCI_MAX_DATA_BYTES);
+ HGPCI_MAX_DATA_BYTES);
return -EINVAL;
}
/* check if the request works... */
- if (single_gpci_request(event_get_request(event),
+ ret = single_gpci_request(event_get_request(event),
event_get_starting_index(event),
event_get_secondary_index(event),
event_get_counter_info_version(event),
event_get_offset(event),
length,
- &count)) {
+ &count);
+
+ /*
+ * ret value as H_AUTHORITY implies that partition is not permitted to retrieve
+ * performance information, and required to set
+ * "Enable Performance Information Collection" option.
+ */
+ if (ret == H_AUTHORITY)
+ return -EPERM;
+
+ if (ret) {
pr_devel("gpci hcall failed\n");
return -EINVAL;
}
@@ -246,11 +828,6 @@ static int h_gpci_event_init(struct perf_event *event)
return 0;
}
-static int h_gpci_event_idx(struct perf_event *event)
-{
- return 0;
-}
-
static struct pmu h_gpci_pmu = {
.task_ctx_nr = perf_invalid_context,
@@ -262,14 +839,157 @@ static struct pmu h_gpci_pmu = {
.start = h_gpci_event_start,
.stop = h_gpci_event_stop,
.read = h_gpci_event_update,
- .event_idx = h_gpci_event_idx,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
};
+static int ppc_hv_gpci_cpu_online(unsigned int cpu)
+{
+ if (cpumask_empty(&hv_gpci_cpumask))
+ cpumask_set_cpu(cpu, &hv_gpci_cpumask);
+
+ return 0;
+}
+
+static int ppc_hv_gpci_cpu_offline(unsigned int cpu)
+{
+ int target;
+
+ /* Check if exiting cpu is used for collecting gpci events */
+ if (!cpumask_test_and_clear_cpu(cpu, &hv_gpci_cpumask))
+ return 0;
+
+ /* Find a new cpu to collect gpci events */
+ target = cpumask_last(cpu_active_mask);
+
+ if (target < 0 || target >= nr_cpu_ids) {
+ pr_err("hv_gpci: CPU hotplug init failed\n");
+ return -1;
+ }
+
+ /* Migrate gpci events to the new target */
+ cpumask_set_cpu(target, &hv_gpci_cpumask);
+ perf_pmu_migrate_context(&h_gpci_pmu, cpu, target);
+
+ return 0;
+}
+
+static int hv_gpci_cpu_hotplug_init(void)
+{
+ return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_HV_GPCI_ONLINE,
+ "perf/powerpc/hv_gcpi:online",
+ ppc_hv_gpci_cpu_online,
+ ppc_hv_gpci_cpu_offline);
+}
+
+static struct device_attribute *sysinfo_device_attr_create(int
+ sysinfo_interface_group_index, u32 req)
+{
+ struct device_attribute *attr = NULL;
+ unsigned long ret;
+ struct hv_gpci_request_buffer *arg;
+
+ if (sysinfo_interface_group_index < INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR ||
+ sysinfo_interface_group_index >= INTERFACE_NULL_ATTR) {
+ pr_info("Wrong interface group index for system information\n");
+ return NULL;
+ }
+
+ /* Check for given counter request value support */
+ arg = (void *)get_cpu_var(hv_gpci_reqb);
+ memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+ arg->params.counter_request = cpu_to_be32(req);
+
+ ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
+ virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
+
+ put_cpu_var(hv_gpci_reqb);
+
+ /*
+ * Add given counter request value attribute in the interface_attrs
+ * attribute array, only for valid return types.
+ */
+ if (!ret || ret == H_AUTHORITY || ret == H_PARAMETER) {
+ attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+ if (!attr)
+ return NULL;
+
+ sysfs_attr_init(&attr->attr);
+ attr->attr.mode = 0444;
+
+ switch (sysinfo_interface_group_index) {
+ case INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR:
+ attr->attr.name = "processor_bus_topology";
+ attr->show = processor_bus_topology_show;
+ break;
+ case INTERFACE_PROCESSOR_CONFIG_ATTR:
+ attr->attr.name = "processor_config";
+ attr->show = processor_config_show;
+ break;
+ case INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR:
+ attr->attr.name = "affinity_domain_via_virtual_processor";
+ attr->show = affinity_domain_via_virtual_processor_show;
+ break;
+ case INTERFACE_AFFINITY_DOMAIN_VIA_DOM_ATTR:
+ attr->attr.name = "affinity_domain_via_domain";
+ attr->show = affinity_domain_via_domain_show;
+ break;
+ case INTERFACE_AFFINITY_DOMAIN_VIA_PAR_ATTR:
+ attr->attr.name = "affinity_domain_via_partition";
+ attr->show = affinity_domain_via_partition_show;
+ break;
+ }
+ } else
+ pr_devel("hcall failed, with error: 0x%lx\n", ret);
+
+ return attr;
+}
+
+static void add_sysinfo_interface_files(void)
+{
+ int sysfs_count;
+ struct device_attribute *attr[INTERFACE_NULL_ATTR - INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR];
+ int i;
+
+ sysfs_count = INTERFACE_NULL_ATTR - INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR;
+
+ /* Get device attribute for a given counter request value */
+ for (i = 0; i < sysfs_count; i++) {
+ attr[i] = sysinfo_device_attr_create(i + INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR,
+ sysinfo_counter_request[i]);
+
+ if (!attr[i])
+ goto out;
+ }
+
+ /* Add sysinfo interface attributes in the interface_attrs attribute array */
+ for (i = 0; i < sysfs_count; i++)
+ interface_attrs[i + INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR] = &attr[i]->attr;
+
+ return;
+
+out:
+ /*
+ * The sysinfo interface attributes will be added, only if hcall passed for
+ * all the counter request values. Free the device attribute array incase
+ * of any hcall failure.
+ */
+ if (i > 0) {
+ while (i >= 0) {
+ kfree(attr[i]);
+ i--;
+ }
+ }
+}
+
static int hv_gpci_init(void)
{
int r;
unsigned long hret;
struct hv_perf_caps caps;
+ struct hv_gpci_request_buffer *arg;
+
+ hv_gpci_assert_offsets_correct();
if (!firmware_has_feature(FW_FEATURE_LPAR)) {
pr_debug("not a virtualized system, not enabling\n");
@@ -283,13 +1003,52 @@ static int hv_gpci_init(void)
return -ENODEV;
}
+ /* init cpuhotplug */
+ r = hv_gpci_cpu_hotplug_init();
+ if (r)
+ return r;
+
/* sampling not supported */
h_gpci_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+ arg = (void *)get_cpu_var(hv_gpci_reqb);
+ memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+ /*
+ * hcall H_GET_PERF_COUNTER_INFO populates the output
+ * counter_info_version value based on the system hypervisor.
+ * Pass the counter request 0x10 corresponds to request type
+ * 'Dispatch_timebase_by_processor', to get the supported
+ * counter_info_version.
+ */
+ arg->params.counter_request = cpu_to_be32(0x10);
+
+ r = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
+ virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
+ if (r) {
+ pr_devel("hcall failed, can't get supported counter_info_version: 0x%x\n", r);
+ arg->params.counter_info_version_out = 0x8;
+ }
+
+ /*
+ * Use counter_info_version_out value to assign
+ * required hv-gpci event list.
+ */
+ if (arg->params.counter_info_version_out >= 0x8)
+ event_group.attrs = hv_gpci_event_attrs;
+ else
+ event_group.attrs = hv_gpci_event_attrs_v6;
+
+ put_cpu_var(hv_gpci_reqb);
+
r = perf_pmu_register(&h_gpci_pmu, h_gpci_pmu.name, -1);
if (r)
return r;
+ /* sysinfo interface files are only available for power10 and above platforms */
+ if (PVR_VER(mfspr(SPRN_PVR)) >= PVR_POWER10)
+ add_sysinfo_interface_files();
+
return 0;
}
diff --git a/arch/powerpc/perf/hv-gpci.h b/arch/powerpc/perf/hv-gpci.h
index b25f460c9cce..c72020912dea 100644
--- a/arch/powerpc/perf/hv-gpci.h
+++ b/arch/powerpc/perf/hv-gpci.h
@@ -1,33 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef LINUX_POWERPC_PERF_HV_GPCI_H_
#define LINUX_POWERPC_PERF_HV_GPCI_H_
-#include <linux/types.h>
-
-/* From the document "H_GetPerformanceCounterInfo Interface" v1.07 */
-
-/* H_GET_PERF_COUNTER_INFO argument */
-struct hv_get_perf_counter_info_params {
- __be32 counter_request; /* I */
- __be32 starting_index; /* IO */
- __be16 secondary_index; /* IO */
- __be16 returned_values; /* O */
- __be32 detail_rc; /* O, only needed when called via *_norets() */
-
- /*
- * O, size each of counter_value element in bytes, only set for version
- * >= 0x3
- */
- __be16 cv_element_size;
-
- /* I, 0 (zero) for versions < 0x3 */
- __u8 counter_info_version_in;
-
- /* O, 0 (zero) if version < 0x3. Must be set to 0 when making hcall */
- __u8 counter_info_version_out;
- __u8 reserved[0xC];
- __u8 counter_value[];
-} __packed;
-
/*
* counter info version => fw version/reference (spec version)
*
@@ -42,32 +16,20 @@ struct hv_get_perf_counter_info_params {
*/
#define COUNTER_INFO_VERSION_CURRENT 0x8
-/*
- * These determine the counter_value[] layout and the meaning of starting_index
- * and secondary_index.
- *
- * Unless otherwise noted, @secondary_index is unused and ignored.
- */
-enum counter_info_requests {
-
- /* GENERAL */
-
- /* @starting_index: must be -1 (to refer to the current partition)
- */
- CIR_SYSTEM_PERFORMANCE_CAPABILITIES = 0X40,
+/* capability mask masks. */
+enum {
+ HV_GPCI_CM_GA = (1 << 7),
+ HV_GPCI_CM_EXPANDED = (1 << 6),
+ HV_GPCI_CM_LAB = (1 << 5)
};
-struct cv_system_performance_capabilities {
- /* If != 0, allowed to collect data from other partitions */
- __u8 perf_collect_privileged;
-
- /* These following are only valid if counter_info_version >= 0x3 */
-#define CV_CM_GA (1 << 7)
-#define CV_CM_EXPANDED (1 << 6)
-#define CV_CM_LAB (1 << 5)
- /* remaining bits are reserved */
- __u8 capability_mask;
- __u8 reserved[0xE];
-} __packed;
+#define REQUEST_FILE "../hv-gpci-requests.h"
+#define NAME_LOWER hv_gpci
+#define NAME_UPPER HV_GPCI
+#define ENABLE_EVENTS_COUNTERINFO_V6
+#include "req-gen/perf.h"
+#undef REQUEST_FILE
+#undef NAME_LOWER
+#undef NAME_UPPER
#endif
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
new file mode 100644
index 000000000000..8664a7d297ad
--- /dev/null
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -0,0 +1,1878 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * In-Memory Collection (IMC) Performance Monitor counter support.
+ *
+ * Copyright (C) 2017 Madhavan Srinivasan, IBM Corporation.
+ * (C) 2017 Anju T Sudhakar, IBM Corporation.
+ * (C) 2017 Hemant K Shaw, IBM Corporation.
+ */
+#include <linux/of.h>
+#include <linux/perf_event.h>
+#include <linux/slab.h>
+#include <asm/opal.h>
+#include <asm/imc-pmu.h>
+#include <asm/cputhreads.h>
+#include <asm/smp.h>
+#include <linux/string.h>
+#include <linux/spinlock.h>
+
+/* Nest IMC data structures and variables */
+
+/*
+ * Used to avoid races in counting the nest-pmu units during hotplug
+ * register and unregister
+ */
+static DEFINE_MUTEX(nest_init_lock);
+static DEFINE_PER_CPU(struct imc_pmu_ref *, local_nest_imc_refc);
+static struct imc_pmu **per_nest_pmu_arr;
+static cpumask_t nest_imc_cpumask;
+static struct imc_pmu_ref *nest_imc_refc;
+static int nest_pmus;
+
+/* Core IMC data structures and variables */
+
+static cpumask_t core_imc_cpumask;
+static struct imc_pmu_ref *core_imc_refc;
+static struct imc_pmu *core_imc_pmu;
+
+/* Thread IMC data structures and variables */
+
+static DEFINE_PER_CPU(u64 *, thread_imc_mem);
+static struct imc_pmu *thread_imc_pmu;
+static int thread_imc_mem_size;
+
+/* Trace IMC data structures */
+static DEFINE_PER_CPU(u64 *, trace_imc_mem);
+static struct imc_pmu_ref *trace_imc_refc;
+static int trace_imc_mem_size;
+
+/*
+ * Global data structure used to avoid races between thread,
+ * core and trace-imc
+ */
+static struct imc_pmu_ref imc_global_refc = {
+ .lock = __SPIN_LOCK_UNLOCKED(imc_global_refc.lock),
+ .id = 0,
+ .refc = 0,
+};
+
+static struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
+{
+ return container_of(event->pmu, struct imc_pmu, pmu);
+}
+
+PMU_FORMAT_ATTR(event, "config:0-61");
+PMU_FORMAT_ATTR(offset, "config:0-31");
+PMU_FORMAT_ATTR(rvalue, "config:32");
+PMU_FORMAT_ATTR(mode, "config:33-40");
+static struct attribute *imc_format_attrs[] = {
+ &format_attr_event.attr,
+ &format_attr_offset.attr,
+ &format_attr_rvalue.attr,
+ &format_attr_mode.attr,
+ NULL,
+};
+
+static const struct attribute_group imc_format_group = {
+ .name = "format",
+ .attrs = imc_format_attrs,
+};
+
+/* Format attribute for imc trace-mode */
+PMU_FORMAT_ATTR(cpmc_reserved, "config:0-19");
+PMU_FORMAT_ATTR(cpmc_event, "config:20-27");
+PMU_FORMAT_ATTR(cpmc_samplesel, "config:28-29");
+PMU_FORMAT_ATTR(cpmc_load, "config:30-61");
+static struct attribute *trace_imc_format_attrs[] = {
+ &format_attr_event.attr,
+ &format_attr_cpmc_reserved.attr,
+ &format_attr_cpmc_event.attr,
+ &format_attr_cpmc_samplesel.attr,
+ &format_attr_cpmc_load.attr,
+ NULL,
+};
+
+static const struct attribute_group trace_imc_format_group = {
+.name = "format",
+.attrs = trace_imc_format_attrs,
+};
+
+/* Get the cpumask printed to a buffer "buf" */
+static ssize_t imc_pmu_cpumask_get_attr(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct pmu *pmu = dev_get_drvdata(dev);
+ struct imc_pmu *imc_pmu = container_of(pmu, struct imc_pmu, pmu);
+ cpumask_t *active_mask;
+
+ switch(imc_pmu->domain){
+ case IMC_DOMAIN_NEST:
+ active_mask = &nest_imc_cpumask;
+ break;
+ case IMC_DOMAIN_CORE:
+ active_mask = &core_imc_cpumask;
+ break;
+ default:
+ return 0;
+ }
+
+ return cpumap_print_to_pagebuf(true, buf, active_mask);
+}
+
+static DEVICE_ATTR(cpumask, S_IRUGO, imc_pmu_cpumask_get_attr, NULL);
+
+static struct attribute *imc_pmu_cpumask_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL,
+};
+
+static const struct attribute_group imc_pmu_cpumask_attr_group = {
+ .attrs = imc_pmu_cpumask_attrs,
+};
+
+/* device_str_attr_create : Populate event "name" and string "str" in attribute */
+static struct attribute *device_str_attr_create(const char *name, const char *str)
+{
+ struct perf_pmu_events_attr *attr;
+
+ attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+ if (!attr)
+ return NULL;
+ sysfs_attr_init(&attr->attr.attr);
+
+ attr->event_str = str;
+ attr->attr.attr.name = name;
+ attr->attr.attr.mode = 0444;
+ attr->attr.show = perf_event_sysfs_show;
+
+ return &attr->attr.attr;
+}
+
+static int imc_parse_event(struct device_node *np, const char *scale,
+ const char *unit, const char *prefix,
+ u32 base, struct imc_events *event)
+{
+ const char *s;
+ u32 reg;
+
+ if (of_property_read_u32(np, "reg", &reg))
+ goto error;
+ /* Add the base_reg value to the "reg" */
+ event->value = base + reg;
+
+ if (of_property_read_string(np, "event-name", &s))
+ goto error;
+
+ event->name = kasprintf(GFP_KERNEL, "%s%s", prefix, s);
+ if (!event->name)
+ goto error;
+
+ if (of_property_read_string(np, "scale", &s))
+ s = scale;
+
+ if (s) {
+ event->scale = kstrdup(s, GFP_KERNEL);
+ if (!event->scale)
+ goto error;
+ }
+
+ if (of_property_read_string(np, "unit", &s))
+ s = unit;
+
+ if (s) {
+ event->unit = kstrdup(s, GFP_KERNEL);
+ if (!event->unit)
+ goto error;
+ }
+
+ return 0;
+error:
+ kfree(event->unit);
+ kfree(event->scale);
+ kfree(event->name);
+ return -EINVAL;
+}
+
+/*
+ * imc_free_events: Function to cleanup the events list, having
+ * "nr_entries".
+ */
+static void imc_free_events(struct imc_events *events, int nr_entries)
+{
+ int i;
+
+ /* Nothing to clean, return */
+ if (!events)
+ return;
+ for (i = 0; i < nr_entries; i++) {
+ kfree(events[i].unit);
+ kfree(events[i].scale);
+ kfree(events[i].name);
+ }
+
+ kfree(events);
+}
+
+/*
+ * update_events_in_group: Update the "events" information in an attr_group
+ * and assign the attr_group to the pmu "pmu".
+ */
+static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu)
+{
+ struct attribute_group *attr_group;
+ struct attribute **attrs, *dev_str;
+ struct device_node *np, *pmu_events;
+ u32 handle, base_reg;
+ int i = 0, j = 0, ct, ret;
+ const char *prefix, *g_scale, *g_unit;
+ const char *ev_val_str, *ev_scale_str, *ev_unit_str;
+
+ if (!of_property_read_u32(node, "events", &handle))
+ pmu_events = of_find_node_by_phandle(handle);
+ else
+ return 0;
+
+ /* Did not find any node with a given phandle */
+ if (!pmu_events)
+ return 0;
+
+ /* Get a count of number of child nodes */
+ ct = of_get_child_count(pmu_events);
+
+ /* Get the event prefix */
+ if (of_property_read_string(node, "events-prefix", &prefix)) {
+ of_node_put(pmu_events);
+ return 0;
+ }
+
+ /* Get a global unit and scale data if available */
+ if (of_property_read_string(node, "scale", &g_scale))
+ g_scale = NULL;
+
+ if (of_property_read_string(node, "unit", &g_unit))
+ g_unit = NULL;
+
+ /* "reg" property gives out the base offset of the counters data */
+ of_property_read_u32(node, "reg", &base_reg);
+
+ /* Allocate memory for the events */
+ pmu->events = kcalloc(ct, sizeof(struct imc_events), GFP_KERNEL);
+ if (!pmu->events) {
+ of_node_put(pmu_events);
+ return -ENOMEM;
+ }
+
+ ct = 0;
+ /* Parse the events and update the struct */
+ for_each_child_of_node(pmu_events, np) {
+ ret = imc_parse_event(np, g_scale, g_unit, prefix, base_reg, &pmu->events[ct]);
+ if (!ret)
+ ct++;
+ }
+
+ of_node_put(pmu_events);
+
+ /* Allocate memory for attribute group */
+ attr_group = kzalloc(sizeof(*attr_group), GFP_KERNEL);
+ if (!attr_group) {
+ imc_free_events(pmu->events, ct);
+ return -ENOMEM;
+ }
+
+ /*
+ * Allocate memory for attributes.
+ * Since we have count of events for this pmu, we also allocate
+ * memory for the scale and unit attribute for now.
+ * "ct" has the total event structs added from the events-parent node.
+ * So allocate three times the "ct" (this includes event, event_scale and
+ * event_unit).
+ */
+ attrs = kcalloc(((ct * 3) + 1), sizeof(struct attribute *), GFP_KERNEL);
+ if (!attrs) {
+ kfree(attr_group);
+ imc_free_events(pmu->events, ct);
+ return -ENOMEM;
+ }
+
+ attr_group->name = "events";
+ attr_group->attrs = attrs;
+ do {
+ ev_val_str = kasprintf(GFP_KERNEL, "event=0x%x", pmu->events[i].value);
+ if (!ev_val_str)
+ continue;
+ dev_str = device_str_attr_create(pmu->events[i].name, ev_val_str);
+ if (!dev_str)
+ continue;
+
+ attrs[j++] = dev_str;
+ if (pmu->events[i].scale) {
+ ev_scale_str = kasprintf(GFP_KERNEL, "%s.scale", pmu->events[i].name);
+ if (!ev_scale_str)
+ continue;
+ dev_str = device_str_attr_create(ev_scale_str, pmu->events[i].scale);
+ if (!dev_str)
+ continue;
+
+ attrs[j++] = dev_str;
+ }
+
+ if (pmu->events[i].unit) {
+ ev_unit_str = kasprintf(GFP_KERNEL, "%s.unit", pmu->events[i].name);
+ if (!ev_unit_str)
+ continue;
+ dev_str = device_str_attr_create(ev_unit_str, pmu->events[i].unit);
+ if (!dev_str)
+ continue;
+
+ attrs[j++] = dev_str;
+ }
+ } while (++i < ct);
+
+ /* Save the event attribute */
+ pmu->attr_groups[IMC_EVENT_ATTR] = attr_group;
+
+ return 0;
+}
+
+/* get_nest_pmu_ref: Return the imc_pmu_ref struct for the given node */
+static struct imc_pmu_ref *get_nest_pmu_ref(int cpu)
+{
+ return per_cpu(local_nest_imc_refc, cpu);
+}
+
+static void nest_change_cpu_context(int old_cpu, int new_cpu)
+{
+ struct imc_pmu **pn = per_nest_pmu_arr;
+
+ if (old_cpu < 0 || new_cpu < 0)
+ return;
+
+ while (*pn) {
+ perf_pmu_migrate_context(&(*pn)->pmu, old_cpu, new_cpu);
+ pn++;
+ }
+}
+
+static int ppc_nest_imc_cpu_offline(unsigned int cpu)
+{
+ int nid, target = -1;
+ const struct cpumask *l_cpumask;
+ struct imc_pmu_ref *ref;
+
+ /*
+ * Check in the designated list for this cpu. Dont bother
+ * if not one of them.
+ */
+ if (!cpumask_test_and_clear_cpu(cpu, &nest_imc_cpumask))
+ return 0;
+
+ /*
+ * Check whether nest_imc is registered. We could end up here if the
+ * cpuhotplug callback registration fails. i.e, callback invokes the
+ * offline path for all successfully registered nodes. At this stage,
+ * nest_imc pmu will not be registered and we should return here.
+ *
+ * We return with a zero since this is not an offline failure. And
+ * cpuhp_setup_state() returns the actual failure reason to the caller,
+ * which in turn will call the cleanup routine.
+ */
+ if (!nest_pmus)
+ return 0;
+
+ /*
+ * Now that this cpu is one of the designated,
+ * find a next cpu a) which is online and b) in same chip.
+ */
+ nid = cpu_to_node(cpu);
+ l_cpumask = cpumask_of_node(nid);
+ target = cpumask_last(l_cpumask);
+
+ /*
+ * If this(target) is the last cpu in the cpumask for this chip,
+ * check for any possible online cpu in the chip.
+ */
+ if (unlikely(target == cpu))
+ target = cpumask_any_but(l_cpumask, cpu);
+
+ /*
+ * Update the cpumask with the target cpu and
+ * migrate the context if needed
+ */
+ if (target >= 0 && target < nr_cpu_ids) {
+ cpumask_set_cpu(target, &nest_imc_cpumask);
+ nest_change_cpu_context(cpu, target);
+ } else {
+ opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
+ get_hard_smp_processor_id(cpu));
+ /*
+ * If this is the last cpu in this chip then, skip the reference
+ * count lock and make the reference count on this chip zero.
+ */
+ ref = get_nest_pmu_ref(cpu);
+ if (!ref)
+ return -EINVAL;
+
+ ref->refc = 0;
+ }
+ return 0;
+}
+
+static int ppc_nest_imc_cpu_online(unsigned int cpu)
+{
+ const struct cpumask *l_cpumask;
+ static struct cpumask tmp_mask;
+ int res;
+
+ /* Get the cpumask of this node */
+ l_cpumask = cpumask_of_node(cpu_to_node(cpu));
+
+ /*
+ * If this is not the first online CPU on this node, then
+ * just return.
+ */
+ if (cpumask_and(&tmp_mask, l_cpumask, &nest_imc_cpumask))
+ return 0;
+
+ /*
+ * If this is the first online cpu on this node
+ * disable the nest counters by making an OPAL call.
+ */
+ res = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
+ get_hard_smp_processor_id(cpu));
+ if (res)
+ return res;
+
+ /* Make this CPU the designated target for counter collection */
+ cpumask_set_cpu(cpu, &nest_imc_cpumask);
+ return 0;
+}
+
+static int nest_pmu_cpumask_init(void)
+{
+ return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE,
+ "perf/powerpc/imc:online",
+ ppc_nest_imc_cpu_online,
+ ppc_nest_imc_cpu_offline);
+}
+
+static void nest_imc_counters_release(struct perf_event *event)
+{
+ int rc, node_id;
+ struct imc_pmu_ref *ref;
+
+ if (event->cpu < 0)
+ return;
+
+ node_id = cpu_to_node(event->cpu);
+
+ /*
+ * See if we need to disable the nest PMU.
+ * If no events are currently in use, then we have to take a
+ * lock to ensure that we don't race with another task doing
+ * enable or disable the nest counters.
+ */
+ ref = get_nest_pmu_ref(event->cpu);
+ if (!ref)
+ return;
+
+ /* Take the lock for this node and then decrement the reference count */
+ spin_lock(&ref->lock);
+ if (ref->refc == 0) {
+ /*
+ * The scenario where this is true is, when perf session is
+ * started, followed by offlining of all cpus in a given node.
+ *
+ * In the cpuhotplug offline path, ppc_nest_imc_cpu_offline()
+ * function set the ref->count to zero, if the cpu which is
+ * about to offline is the last cpu in a given node and make
+ * an OPAL call to disable the engine in that node.
+ *
+ */
+ spin_unlock(&ref->lock);
+ return;
+ }
+ ref->refc--;
+ if (ref->refc == 0) {
+ rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST,
+ get_hard_smp_processor_id(event->cpu));
+ if (rc) {
+ spin_unlock(&ref->lock);
+ pr_err("nest-imc: Unable to stop the counters for core %d\n", node_id);
+ return;
+ }
+ } else if (ref->refc < 0) {
+ WARN(1, "nest-imc: Invalid event reference count\n");
+ ref->refc = 0;
+ }
+ spin_unlock(&ref->lock);
+}
+
+static int nest_imc_event_init(struct perf_event *event)
+{
+ int chip_id, rc, node_id;
+ u32 l_config, config = event->attr.config;
+ struct imc_mem_info *pcni;
+ struct imc_pmu *pmu;
+ struct imc_pmu_ref *ref;
+ bool flag = false;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ /* Sampling not supported */
+ if (event->hw.sample_period)
+ return -EINVAL;
+
+ if (event->cpu < 0)
+ return -EINVAL;
+
+ pmu = imc_event_to_pmu(event);
+
+ /* Sanity check for config (event offset) */
+ if ((config & IMC_EVENT_OFFSET_MASK) > pmu->counter_mem_size)
+ return -EINVAL;
+
+ /*
+ * Nest HW counter memory resides in a per-chip reserve-memory (HOMER).
+ * Get the base memory address for this cpu.
+ */
+ chip_id = cpu_to_chip_id(event->cpu);
+
+ /* Return, if chip_id is not valid */
+ if (chip_id < 0)
+ return -ENODEV;
+
+ pcni = pmu->mem_info;
+ do {
+ if (pcni->id == chip_id) {
+ flag = true;
+ break;
+ }
+ pcni++;
+ } while (pcni->vbase);
+
+ if (!flag)
+ return -ENODEV;
+
+ /*
+ * Add the event offset to the base address.
+ */
+ l_config = config & IMC_EVENT_OFFSET_MASK;
+ event->hw.event_base = (u64)pcni->vbase + l_config;
+ node_id = cpu_to_node(event->cpu);
+
+ /*
+ * Get the imc_pmu_ref struct for this node.
+ * Take the lock and then increment the count of nest pmu events inited.
+ */
+ ref = get_nest_pmu_ref(event->cpu);
+ if (!ref)
+ return -EINVAL;
+
+ spin_lock(&ref->lock);
+ if (ref->refc == 0) {
+ rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_NEST,
+ get_hard_smp_processor_id(event->cpu));
+ if (rc) {
+ spin_unlock(&ref->lock);
+ pr_err("nest-imc: Unable to start the counters for node %d\n",
+ node_id);
+ return rc;
+ }
+ }
+ ++ref->refc;
+ spin_unlock(&ref->lock);
+
+ event->destroy = nest_imc_counters_release;
+ return 0;
+}
+
+/*
+ * core_imc_mem_init : Initializes memory for the current core.
+ *
+ * Uses alloc_pages_node() and uses the returned address as an argument to
+ * an opal call to configure the pdbar. The address sent as an argument is
+ * converted to physical address before the opal call is made. This is the
+ * base address at which the core imc counters are populated.
+ */
+static int core_imc_mem_init(int cpu, int size)
+{
+ int nid, rc = 0, core_id = (cpu / threads_per_core);
+ struct imc_mem_info *mem_info;
+ struct page *page;
+
+ /*
+ * alloc_pages_node() will allocate memory for core in the
+ * local node only.
+ */
+ nid = cpu_to_node(cpu);
+ mem_info = &core_imc_pmu->mem_info[core_id];
+ mem_info->id = core_id;
+
+ /* We need only vbase for core counters */
+ page = alloc_pages_node(nid,
+ GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
+ __GFP_NOWARN, get_order(size));
+ if (!page)
+ return -ENOMEM;
+ mem_info->vbase = page_address(page);
+
+ core_imc_refc[core_id].id = core_id;
+ spin_lock_init(&core_imc_refc[core_id].lock);
+
+ rc = opal_imc_counters_init(OPAL_IMC_COUNTERS_CORE,
+ __pa((void *)mem_info->vbase),
+ get_hard_smp_processor_id(cpu));
+ if (rc) {
+ free_pages((u64)mem_info->vbase, get_order(size));
+ mem_info->vbase = NULL;
+ }
+
+ return rc;
+}
+
+static bool is_core_imc_mem_inited(int cpu)
+{
+ struct imc_mem_info *mem_info;
+ int core_id = (cpu / threads_per_core);
+
+ mem_info = &core_imc_pmu->mem_info[core_id];
+ if (!mem_info->vbase)
+ return false;
+
+ return true;
+}
+
+static int ppc_core_imc_cpu_online(unsigned int cpu)
+{
+ const struct cpumask *l_cpumask;
+ static struct cpumask tmp_mask;
+ int ret = 0;
+
+ /* Get the cpumask for this core */
+ l_cpumask = cpu_sibling_mask(cpu);
+
+ /* If a cpu for this core is already set, then, don't do anything */
+ if (cpumask_and(&tmp_mask, l_cpumask, &core_imc_cpumask))
+ return 0;
+
+ if (!is_core_imc_mem_inited(cpu)) {
+ ret = core_imc_mem_init(cpu, core_imc_pmu->counter_mem_size);
+ if (ret) {
+ pr_info("core_imc memory allocation for cpu %d failed\n", cpu);
+ return ret;
+ }
+ }
+
+ /* set the cpu in the mask */
+ cpumask_set_cpu(cpu, &core_imc_cpumask);
+ return 0;
+}
+
+static int ppc_core_imc_cpu_offline(unsigned int cpu)
+{
+ unsigned int core_id;
+ int ncpu;
+ struct imc_pmu_ref *ref;
+
+ /*
+ * clear this cpu out of the mask, if not present in the mask,
+ * don't bother doing anything.
+ */
+ if (!cpumask_test_and_clear_cpu(cpu, &core_imc_cpumask))
+ return 0;
+
+ /*
+ * Check whether core_imc is registered. We could end up here
+ * if the cpuhotplug callback registration fails. i.e, callback
+ * invokes the offline path for all successfully registered cpus.
+ * At this stage, core_imc pmu will not be registered and we
+ * should return here.
+ *
+ * We return with a zero since this is not an offline failure.
+ * And cpuhp_setup_state() returns the actual failure reason
+ * to the caller, which inturn will call the cleanup routine.
+ */
+ if (!core_imc_pmu->pmu.event_init)
+ return 0;
+
+ /* Find any online cpu in that core except the current "cpu" */
+ ncpu = cpumask_last(cpu_sibling_mask(cpu));
+
+ if (unlikely(ncpu == cpu))
+ ncpu = cpumask_any_but(cpu_sibling_mask(cpu), cpu);
+
+ if (ncpu >= 0 && ncpu < nr_cpu_ids) {
+ cpumask_set_cpu(ncpu, &core_imc_cpumask);
+ perf_pmu_migrate_context(&core_imc_pmu->pmu, cpu, ncpu);
+ } else {
+ /*
+ * If this is the last cpu in this core then skip taking reference
+ * count lock for this core and directly zero "refc" for this core.
+ */
+ opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
+ get_hard_smp_processor_id(cpu));
+ core_id = cpu / threads_per_core;
+ ref = &core_imc_refc[core_id];
+ if (!ref)
+ return -EINVAL;
+
+ ref->refc = 0;
+ /*
+ * Reduce the global reference count, if this is the
+ * last cpu in this core and core-imc event running
+ * in this cpu.
+ */
+ spin_lock(&imc_global_refc.lock);
+ if (imc_global_refc.id == IMC_DOMAIN_CORE)
+ imc_global_refc.refc--;
+
+ spin_unlock(&imc_global_refc.lock);
+ }
+ return 0;
+}
+
+static int core_imc_pmu_cpumask_init(void)
+{
+ return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE,
+ "perf/powerpc/imc_core:online",
+ ppc_core_imc_cpu_online,
+ ppc_core_imc_cpu_offline);
+}
+
+static void reset_global_refc(struct perf_event *event)
+{
+ spin_lock(&imc_global_refc.lock);
+ imc_global_refc.refc--;
+
+ /*
+ * If no other thread is running any
+ * event for this domain(thread/core/trace),
+ * set the global id to zero.
+ */
+ if (imc_global_refc.refc <= 0) {
+ imc_global_refc.refc = 0;
+ imc_global_refc.id = 0;
+ }
+ spin_unlock(&imc_global_refc.lock);
+}
+
+static void core_imc_counters_release(struct perf_event *event)
+{
+ int rc, core_id;
+ struct imc_pmu_ref *ref;
+
+ if (event->cpu < 0)
+ return;
+ /*
+ * See if we need to disable the IMC PMU.
+ * If no events are currently in use, then we have to take a
+ * lock to ensure that we don't race with another task doing
+ * enable or disable the core counters.
+ */
+ core_id = event->cpu / threads_per_core;
+
+ /* Take the lock and decrement the refernce count for this core */
+ ref = &core_imc_refc[core_id];
+ if (!ref)
+ return;
+
+ spin_lock(&ref->lock);
+ if (ref->refc == 0) {
+ /*
+ * The scenario where this is true is, when perf session is
+ * started, followed by offlining of all cpus in a given core.
+ *
+ * In the cpuhotplug offline path, ppc_core_imc_cpu_offline()
+ * function set the ref->count to zero, if the cpu which is
+ * about to offline is the last cpu in a given core and make
+ * an OPAL call to disable the engine in that core.
+ *
+ */
+ spin_unlock(&ref->lock);
+ return;
+ }
+ ref->refc--;
+ if (ref->refc == 0) {
+ rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
+ get_hard_smp_processor_id(event->cpu));
+ if (rc) {
+ spin_unlock(&ref->lock);
+ pr_err("IMC: Unable to stop the counters for core %d\n", core_id);
+ return;
+ }
+ } else if (ref->refc < 0) {
+ WARN(1, "core-imc: Invalid event reference count\n");
+ ref->refc = 0;
+ }
+ spin_unlock(&ref->lock);
+
+ reset_global_refc(event);
+}
+
+static int core_imc_event_init(struct perf_event *event)
+{
+ int core_id, rc;
+ u64 config = event->attr.config;
+ struct imc_mem_info *pcmi;
+ struct imc_pmu *pmu;
+ struct imc_pmu_ref *ref;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ /* Sampling not supported */
+ if (event->hw.sample_period)
+ return -EINVAL;
+
+ if (event->cpu < 0)
+ return -EINVAL;
+
+ event->hw.idx = -1;
+ pmu = imc_event_to_pmu(event);
+
+ /* Sanity check for config (event offset) */
+ if (((config & IMC_EVENT_OFFSET_MASK) > pmu->counter_mem_size))
+ return -EINVAL;
+
+ if (!is_core_imc_mem_inited(event->cpu))
+ return -ENODEV;
+
+ core_id = event->cpu / threads_per_core;
+ pcmi = &core_imc_pmu->mem_info[core_id];
+ if ((!pcmi->vbase))
+ return -ENODEV;
+
+ ref = &core_imc_refc[core_id];
+ if (!ref)
+ return -EINVAL;
+
+ /*
+ * Core pmu units are enabled only when it is used.
+ * See if this is triggered for the first time.
+ * If yes, take the lock and enable the core counters.
+ * If not, just increment the count in core_imc_refc struct.
+ */
+ spin_lock(&ref->lock);
+ if (ref->refc == 0) {
+ rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE,
+ get_hard_smp_processor_id(event->cpu));
+ if (rc) {
+ spin_unlock(&ref->lock);
+ pr_err("core-imc: Unable to start the counters for core %d\n",
+ core_id);
+ return rc;
+ }
+ }
+ ++ref->refc;
+ spin_unlock(&ref->lock);
+
+ /*
+ * Since the system can run either in accumulation or trace-mode
+ * of IMC at a time, core-imc events are allowed only if no other
+ * trace/thread imc events are enabled/monitored.
+ *
+ * Take the global lock, and check the refc.id
+ * to know whether any other trace/thread imc
+ * events are running.
+ */
+ spin_lock(&imc_global_refc.lock);
+ if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_CORE) {
+ /*
+ * No other trace/thread imc events are running in
+ * the system, so set the refc.id to core-imc.
+ */
+ imc_global_refc.id = IMC_DOMAIN_CORE;
+ imc_global_refc.refc++;
+ } else {
+ spin_unlock(&imc_global_refc.lock);
+ return -EBUSY;
+ }
+ spin_unlock(&imc_global_refc.lock);
+
+ event->hw.event_base = (u64)pcmi->vbase + (config & IMC_EVENT_OFFSET_MASK);
+ event->destroy = core_imc_counters_release;
+ return 0;
+}
+
+/*
+ * Allocates a page of memory for each of the online cpus, and load
+ * LDBAR with 0.
+ * The physical base address of the page allocated for a cpu will be
+ * written to the LDBAR for that cpu, when the thread-imc event
+ * is added.
+ *
+ * LDBAR Register Layout:
+ *
+ * 0 4 8 12 16 20 24 28
+ * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
+ * | | [ ] [ Counter Address [8:50]
+ * | * Mode |
+ * | * PB Scope
+ * * Enable/Disable
+ *
+ * 32 36 40 44 48 52 56 60
+ * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
+ * Counter Address [8:50] ]
+ *
+ */
+static int thread_imc_mem_alloc(int cpu_id, int size)
+{
+ u64 *local_mem = per_cpu(thread_imc_mem, cpu_id);
+ int nid = cpu_to_node(cpu_id);
+
+ if (!local_mem) {
+ struct page *page;
+ /*
+ * This case could happen only once at start, since we dont
+ * free the memory in cpu offline path.
+ */
+ page = alloc_pages_node(nid,
+ GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
+ __GFP_NOWARN, get_order(size));
+ if (!page)
+ return -ENOMEM;
+ local_mem = page_address(page);
+
+ per_cpu(thread_imc_mem, cpu_id) = local_mem;
+ }
+
+ mtspr(SPRN_LDBAR, 0);
+ return 0;
+}
+
+static int ppc_thread_imc_cpu_online(unsigned int cpu)
+{
+ return thread_imc_mem_alloc(cpu, thread_imc_mem_size);
+}
+
+static int ppc_thread_imc_cpu_offline(unsigned int cpu)
+{
+ /*
+ * Set the bit 0 of LDBAR to zero.
+ *
+ * If bit 0 of LDBAR is unset, it will stop posting
+ * the counter data to memory.
+ * For thread-imc, bit 0 of LDBAR will be set to 1 in the
+ * event_add function. So reset this bit here, to stop the updates
+ * to memory in the cpu_offline path.
+ */
+ mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63))));
+
+ /* Reduce the refc if thread-imc event running on this cpu */
+ spin_lock(&imc_global_refc.lock);
+ if (imc_global_refc.id == IMC_DOMAIN_THREAD)
+ imc_global_refc.refc--;
+ spin_unlock(&imc_global_refc.lock);
+
+ return 0;
+}
+
+static int thread_imc_cpu_init(void)
+{
+ return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE,
+ "perf/powerpc/imc_thread:online",
+ ppc_thread_imc_cpu_online,
+ ppc_thread_imc_cpu_offline);
+}
+
+static int thread_imc_event_init(struct perf_event *event)
+{
+ u32 config = event->attr.config;
+ struct task_struct *target;
+ struct imc_pmu *pmu;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ if (!perfmon_capable())
+ return -EACCES;
+
+ /* Sampling not supported */
+ if (event->hw.sample_period)
+ return -EINVAL;
+
+ event->hw.idx = -1;
+ pmu = imc_event_to_pmu(event);
+
+ /* Sanity check for config offset */
+ if (((config & IMC_EVENT_OFFSET_MASK) > pmu->counter_mem_size))
+ return -EINVAL;
+
+ target = event->hw.target;
+ if (!target)
+ return -EINVAL;
+
+ spin_lock(&imc_global_refc.lock);
+ /*
+ * Check if any other trace/core imc events are running in the
+ * system, if not set the global id to thread-imc.
+ */
+ if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_THREAD) {
+ imc_global_refc.id = IMC_DOMAIN_THREAD;
+ imc_global_refc.refc++;
+ } else {
+ spin_unlock(&imc_global_refc.lock);
+ return -EBUSY;
+ }
+ spin_unlock(&imc_global_refc.lock);
+
+ event->pmu->task_ctx_nr = perf_sw_context;
+ event->destroy = reset_global_refc;
+ return 0;
+}
+
+static bool is_thread_imc_pmu(struct perf_event *event)
+{
+ if (!strncmp(event->pmu->name, "thread_imc", strlen("thread_imc")))
+ return true;
+
+ return false;
+}
+
+static __be64 *get_event_base_addr(struct perf_event *event)
+{
+ u64 addr;
+
+ if (is_thread_imc_pmu(event)) {
+ addr = (u64)per_cpu(thread_imc_mem, smp_processor_id());
+ return (__be64 *)(addr + (event->attr.config & IMC_EVENT_OFFSET_MASK));
+ }
+
+ return (__be64 *)event->hw.event_base;
+}
+
+static void thread_imc_pmu_start_txn(struct pmu *pmu,
+ unsigned int txn_flags)
+{
+ if (txn_flags & ~PERF_PMU_TXN_ADD)
+ return;
+ perf_pmu_disable(pmu);
+}
+
+static void thread_imc_pmu_cancel_txn(struct pmu *pmu)
+{
+ perf_pmu_enable(pmu);
+}
+
+static int thread_imc_pmu_commit_txn(struct pmu *pmu)
+{
+ perf_pmu_enable(pmu);
+ return 0;
+}
+
+static u64 imc_read_counter(struct perf_event *event)
+{
+ __be64 *addr;
+ u64 data;
+
+ /*
+ * In-Memory Collection (IMC) counters are free flowing counters.
+ * So we take a snapshot of the counter value on enable and save it
+ * to calculate the delta at later stage to present the event counter
+ * value.
+ */
+ addr = get_event_base_addr(event);
+ data = be64_to_cpu(READ_ONCE(*addr));
+ local64_set(&event->hw.prev_count, data);
+
+ return data;
+}
+
+static void imc_event_update(struct perf_event *event)
+{
+ u64 counter_prev, counter_new, final_count;
+
+ counter_prev = local64_read(&event->hw.prev_count);
+ counter_new = imc_read_counter(event);
+ final_count = counter_new - counter_prev;
+
+ /* Update the delta to the event count */
+ local64_add(final_count, &event->count);
+}
+
+static void imc_event_start(struct perf_event *event, int flags)
+{
+ /*
+ * In Memory Counters are free flowing counters. HW or the microcode
+ * keeps adding to the counter offset in memory. To get event
+ * counter value, we snapshot the value here and we calculate
+ * delta at later point.
+ */
+ imc_read_counter(event);
+}
+
+static void imc_event_stop(struct perf_event *event, int flags)
+{
+ /*
+ * Take a snapshot and calculate the delta and update
+ * the event counter values.
+ */
+ imc_event_update(event);
+}
+
+static int imc_event_add(struct perf_event *event, int flags)
+{
+ if (flags & PERF_EF_START)
+ imc_event_start(event, flags);
+
+ return 0;
+}
+
+static int thread_imc_event_add(struct perf_event *event, int flags)
+{
+ int core_id;
+ struct imc_pmu_ref *ref;
+ u64 ldbar_value, *local_mem = per_cpu(thread_imc_mem, smp_processor_id());
+
+ if (flags & PERF_EF_START)
+ imc_event_start(event, flags);
+
+ if (!is_core_imc_mem_inited(smp_processor_id()))
+ return -EINVAL;
+
+ core_id = smp_processor_id() / threads_per_core;
+ ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | THREAD_IMC_ENABLE;
+ mtspr(SPRN_LDBAR, ldbar_value);
+
+ /*
+ * imc pmus are enabled only when it is used.
+ * See if this is triggered for the first time.
+ * If yes, take the lock and enable the counters.
+ * If not, just increment the count in ref count struct.
+ */
+ ref = &core_imc_refc[core_id];
+ if (!ref)
+ return -EINVAL;
+
+ spin_lock(&ref->lock);
+ if (ref->refc == 0) {
+ if (opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE,
+ get_hard_smp_processor_id(smp_processor_id()))) {
+ spin_unlock(&ref->lock);
+ pr_err("thread-imc: Unable to start the counter\
+ for core %d\n", core_id);
+ return -EINVAL;
+ }
+ }
+ ++ref->refc;
+ spin_unlock(&ref->lock);
+ return 0;
+}
+
+static void thread_imc_event_del(struct perf_event *event, int flags)
+{
+
+ int core_id;
+ struct imc_pmu_ref *ref;
+
+ core_id = smp_processor_id() / threads_per_core;
+ ref = &core_imc_refc[core_id];
+ if (!ref) {
+ pr_debug("imc: Failed to get event reference count\n");
+ return;
+ }
+
+ spin_lock(&ref->lock);
+ ref->refc--;
+ if (ref->refc == 0) {
+ if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE,
+ get_hard_smp_processor_id(smp_processor_id()))) {
+ spin_unlock(&ref->lock);
+ pr_err("thread-imc: Unable to stop the counters\
+ for core %d\n", core_id);
+ return;
+ }
+ } else if (ref->refc < 0) {
+ ref->refc = 0;
+ }
+ spin_unlock(&ref->lock);
+
+ /* Set bit 0 of LDBAR to zero, to stop posting updates to memory */
+ mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63))));
+
+ /*
+ * Take a snapshot and calculate the delta and update
+ * the event counter values.
+ */
+ imc_event_update(event);
+}
+
+/*
+ * Allocate a page of memory for each cpu, and load LDBAR with 0.
+ */
+static int trace_imc_mem_alloc(int cpu_id, int size)
+{
+ u64 *local_mem = per_cpu(trace_imc_mem, cpu_id);
+ int phys_id = cpu_to_node(cpu_id), rc = 0;
+ int core_id = (cpu_id / threads_per_core);
+
+ if (!local_mem) {
+ struct page *page;
+
+ page = alloc_pages_node(phys_id,
+ GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
+ __GFP_NOWARN, get_order(size));
+ if (!page)
+ return -ENOMEM;
+ local_mem = page_address(page);
+ per_cpu(trace_imc_mem, cpu_id) = local_mem;
+
+ /* Initialise the counters for trace mode */
+ rc = opal_imc_counters_init(OPAL_IMC_COUNTERS_TRACE, __pa((void *)local_mem),
+ get_hard_smp_processor_id(cpu_id));
+ if (rc) {
+ pr_info("IMC:opal init failed for trace imc\n");
+ return rc;
+ }
+ }
+
+ trace_imc_refc[core_id].id = core_id;
+ spin_lock_init(&trace_imc_refc[core_id].lock);
+
+ mtspr(SPRN_LDBAR, 0);
+ return 0;
+}
+
+static int ppc_trace_imc_cpu_online(unsigned int cpu)
+{
+ return trace_imc_mem_alloc(cpu, trace_imc_mem_size);
+}
+
+static int ppc_trace_imc_cpu_offline(unsigned int cpu)
+{
+ /*
+ * No need to set bit 0 of LDBAR to zero, as
+ * it is set to zero for imc trace-mode
+ *
+ * Reduce the refc if any trace-imc event running
+ * on this cpu.
+ */
+ spin_lock(&imc_global_refc.lock);
+ if (imc_global_refc.id == IMC_DOMAIN_TRACE)
+ imc_global_refc.refc--;
+ spin_unlock(&imc_global_refc.lock);
+
+ return 0;
+}
+
+static int trace_imc_cpu_init(void)
+{
+ return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE,
+ "perf/powerpc/imc_trace:online",
+ ppc_trace_imc_cpu_online,
+ ppc_trace_imc_cpu_offline);
+}
+
+static u64 get_trace_imc_event_base_addr(void)
+{
+ return (u64)per_cpu(trace_imc_mem, smp_processor_id());
+}
+
+/*
+ * Function to parse trace-imc data obtained
+ * and to prepare the perf sample.
+ */
+static int trace_imc_prepare_sample(struct trace_imc_data *mem,
+ struct perf_sample_data *data,
+ u64 *prev_tb,
+ struct perf_event_header *header,
+ struct perf_event *event)
+{
+ /* Sanity checks for a valid record */
+ if (be64_to_cpu(READ_ONCE(mem->tb1)) > *prev_tb)
+ *prev_tb = be64_to_cpu(READ_ONCE(mem->tb1));
+ else
+ return -EINVAL;
+
+ if ((be64_to_cpu(READ_ONCE(mem->tb1)) & IMC_TRACE_RECORD_TB1_MASK) !=
+ be64_to_cpu(READ_ONCE(mem->tb2)))
+ return -EINVAL;
+
+ /* Prepare perf sample */
+ data->ip = be64_to_cpu(READ_ONCE(mem->ip));
+ data->period = event->hw.last_period;
+
+ header->type = PERF_RECORD_SAMPLE;
+ header->size = sizeof(*header) + event->header_size;
+ header->misc = 0;
+
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ switch (IMC_TRACE_RECORD_VAL_HVPR(be64_to_cpu(READ_ONCE(mem->val)))) {
+ case 0:/* when MSR HV and PR not set in the trace-record */
+ header->misc |= PERF_RECORD_MISC_GUEST_KERNEL;
+ break;
+ case 1: /* MSR HV is 0 and PR is 1 */
+ header->misc |= PERF_RECORD_MISC_GUEST_USER;
+ break;
+ case 2: /* MSR HV is 1 and PR is 0 */
+ header->misc |= PERF_RECORD_MISC_KERNEL;
+ break;
+ case 3: /* MSR HV is 1 and PR is 1 */
+ header->misc |= PERF_RECORD_MISC_USER;
+ break;
+ default:
+ pr_info("IMC: Unable to set the flag based on MSR bits\n");
+ break;
+ }
+ } else {
+ if (is_kernel_addr(data->ip))
+ header->misc |= PERF_RECORD_MISC_KERNEL;
+ else
+ header->misc |= PERF_RECORD_MISC_USER;
+ }
+ perf_event_header__init_id(header, data, event);
+
+ return 0;
+}
+
+static void dump_trace_imc_data(struct perf_event *event)
+{
+ struct trace_imc_data *mem;
+ int i, ret;
+ u64 prev_tb = 0;
+
+ mem = (struct trace_imc_data *)get_trace_imc_event_base_addr();
+ for (i = 0; i < (trace_imc_mem_size / sizeof(struct trace_imc_data));
+ i++, mem++) {
+ struct perf_sample_data data;
+ struct perf_event_header header;
+
+ ret = trace_imc_prepare_sample(mem, &data, &prev_tb, &header, event);
+ if (ret) /* Exit, if not a valid record */
+ break;
+ else {
+ /* If this is a valid record, create the sample */
+ struct perf_output_handle handle;
+
+ if (perf_output_begin(&handle, &data, event, header.size))
+ return;
+
+ perf_output_sample(&handle, &header, &data, event);
+ perf_output_end(&handle);
+ }
+ }
+}
+
+static int trace_imc_event_add(struct perf_event *event, int flags)
+{
+ int core_id = smp_processor_id() / threads_per_core;
+ struct imc_pmu_ref *ref = NULL;
+ u64 local_mem, ldbar_value;
+
+ /* Set trace-imc bit in ldbar and load ldbar with per-thread memory address */
+ local_mem = get_trace_imc_event_base_addr();
+ ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | TRACE_IMC_ENABLE;
+
+ /* trace-imc reference count */
+ if (trace_imc_refc)
+ ref = &trace_imc_refc[core_id];
+ if (!ref) {
+ pr_debug("imc: Failed to get the event reference count\n");
+ return -EINVAL;
+ }
+
+ mtspr(SPRN_LDBAR, ldbar_value);
+ spin_lock(&ref->lock);
+ if (ref->refc == 0) {
+ if (opal_imc_counters_start(OPAL_IMC_COUNTERS_TRACE,
+ get_hard_smp_processor_id(smp_processor_id()))) {
+ spin_unlock(&ref->lock);
+ pr_err("trace-imc: Unable to start the counters for core %d\n", core_id);
+ return -EINVAL;
+ }
+ }
+ ++ref->refc;
+ spin_unlock(&ref->lock);
+ return 0;
+}
+
+static void trace_imc_event_read(struct perf_event *event)
+{
+ return;
+}
+
+static void trace_imc_event_stop(struct perf_event *event, int flags)
+{
+ u64 local_mem = get_trace_imc_event_base_addr();
+ dump_trace_imc_data(event);
+ memset((void *)local_mem, 0, sizeof(u64));
+}
+
+static void trace_imc_event_start(struct perf_event *event, int flags)
+{
+ return;
+}
+
+static void trace_imc_event_del(struct perf_event *event, int flags)
+{
+ int core_id = smp_processor_id() / threads_per_core;
+ struct imc_pmu_ref *ref = NULL;
+
+ if (trace_imc_refc)
+ ref = &trace_imc_refc[core_id];
+ if (!ref) {
+ pr_debug("imc: Failed to get event reference count\n");
+ return;
+ }
+
+ spin_lock(&ref->lock);
+ ref->refc--;
+ if (ref->refc == 0) {
+ if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_TRACE,
+ get_hard_smp_processor_id(smp_processor_id()))) {
+ spin_unlock(&ref->lock);
+ pr_err("trace-imc: Unable to stop the counters for core %d\n", core_id);
+ return;
+ }
+ } else if (ref->refc < 0) {
+ ref->refc = 0;
+ }
+ spin_unlock(&ref->lock);
+
+ trace_imc_event_stop(event, flags);
+}
+
+static int trace_imc_event_init(struct perf_event *event)
+{
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ if (!perfmon_capable())
+ return -EACCES;
+
+ /* Return if this is a couting event */
+ if (event->attr.sample_period == 0)
+ return -ENOENT;
+
+ /*
+ * Take the global lock, and make sure
+ * no other thread is running any core/thread imc
+ * events
+ */
+ spin_lock(&imc_global_refc.lock);
+ if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_TRACE) {
+ /*
+ * No core/thread imc events are running in the
+ * system, so set the refc.id to trace-imc.
+ */
+ imc_global_refc.id = IMC_DOMAIN_TRACE;
+ imc_global_refc.refc++;
+ } else {
+ spin_unlock(&imc_global_refc.lock);
+ return -EBUSY;
+ }
+ spin_unlock(&imc_global_refc.lock);
+
+ event->hw.idx = -1;
+
+ /*
+ * There can only be a single PMU for perf_hw_context events which is assigned to
+ * core PMU. Hence use "perf_sw_context" for trace_imc.
+ */
+ event->pmu->task_ctx_nr = perf_sw_context;
+ event->destroy = reset_global_refc;
+ return 0;
+}
+
+/* update_pmu_ops : Populate the appropriate operations for "pmu" */
+static int update_pmu_ops(struct imc_pmu *pmu)
+{
+ pmu->pmu.task_ctx_nr = perf_invalid_context;
+ pmu->pmu.add = imc_event_add;
+ pmu->pmu.del = imc_event_stop;
+ pmu->pmu.start = imc_event_start;
+ pmu->pmu.stop = imc_event_stop;
+ pmu->pmu.read = imc_event_update;
+ pmu->pmu.attr_groups = pmu->attr_groups;
+ pmu->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE;
+ pmu->attr_groups[IMC_FORMAT_ATTR] = &imc_format_group;
+
+ switch (pmu->domain) {
+ case IMC_DOMAIN_NEST:
+ pmu->pmu.event_init = nest_imc_event_init;
+ pmu->attr_groups[IMC_CPUMASK_ATTR] = &imc_pmu_cpumask_attr_group;
+ break;
+ case IMC_DOMAIN_CORE:
+ pmu->pmu.event_init = core_imc_event_init;
+ pmu->attr_groups[IMC_CPUMASK_ATTR] = &imc_pmu_cpumask_attr_group;
+ break;
+ case IMC_DOMAIN_THREAD:
+ pmu->pmu.event_init = thread_imc_event_init;
+ pmu->pmu.add = thread_imc_event_add;
+ pmu->pmu.del = thread_imc_event_del;
+ pmu->pmu.start_txn = thread_imc_pmu_start_txn;
+ pmu->pmu.cancel_txn = thread_imc_pmu_cancel_txn;
+ pmu->pmu.commit_txn = thread_imc_pmu_commit_txn;
+ break;
+ case IMC_DOMAIN_TRACE:
+ pmu->pmu.event_init = trace_imc_event_init;
+ pmu->pmu.add = trace_imc_event_add;
+ pmu->pmu.del = trace_imc_event_del;
+ pmu->pmu.start = trace_imc_event_start;
+ pmu->pmu.stop = trace_imc_event_stop;
+ pmu->pmu.read = trace_imc_event_read;
+ pmu->attr_groups[IMC_FORMAT_ATTR] = &trace_imc_format_group;
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+/* init_nest_pmu_ref: Initialize the imc_pmu_ref struct for all the nodes */
+static int init_nest_pmu_ref(void)
+{
+ int nid, i, cpu;
+
+ nest_imc_refc = kcalloc(num_possible_nodes(), sizeof(*nest_imc_refc),
+ GFP_KERNEL);
+
+ if (!nest_imc_refc)
+ return -ENOMEM;
+
+ i = 0;
+ for_each_node(nid) {
+ /*
+ * Take the lock to avoid races while tracking the number of
+ * sessions using the chip's nest pmu units.
+ */
+ spin_lock_init(&nest_imc_refc[i].lock);
+
+ /*
+ * Loop to init the "id" with the node_id. Variable "i" initialized to
+ * 0 and will be used as index to the array. "i" will not go off the
+ * end of the array since the "for_each_node" loops for "N_POSSIBLE"
+ * nodes only.
+ */
+ nest_imc_refc[i++].id = nid;
+ }
+
+ /*
+ * Loop to init the per_cpu "local_nest_imc_refc" with the proper
+ * "nest_imc_refc" index. This makes get_nest_pmu_ref() alot simple.
+ */
+ for_each_possible_cpu(cpu) {
+ nid = cpu_to_node(cpu);
+ for (i = 0; i < num_possible_nodes(); i++) {
+ if (nest_imc_refc[i].id == nid) {
+ per_cpu(local_nest_imc_refc, cpu) = &nest_imc_refc[i];
+ break;
+ }
+ }
+ }
+ return 0;
+}
+
+static void cleanup_all_core_imc_memory(void)
+{
+ int i, nr_cores = DIV_ROUND_UP(num_possible_cpus(), threads_per_core);
+ struct imc_mem_info *ptr = core_imc_pmu->mem_info;
+ int size = core_imc_pmu->counter_mem_size;
+
+ /* mem_info will never be NULL */
+ for (i = 0; i < nr_cores; i++) {
+ if (ptr[i].vbase)
+ free_pages((u64)ptr[i].vbase, get_order(size));
+ }
+
+ kfree(ptr);
+ kfree(core_imc_refc);
+}
+
+static void thread_imc_ldbar_disable(void *dummy)
+{
+ /*
+ * By setting 0th bit of LDBAR to zero, we disable thread-imc
+ * updates to memory.
+ */
+ mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63))));
+}
+
+void thread_imc_disable(void)
+{
+ on_each_cpu(thread_imc_ldbar_disable, NULL, 1);
+}
+
+static void cleanup_all_thread_imc_memory(void)
+{
+ int i, order = get_order(thread_imc_mem_size);
+
+ for_each_online_cpu(i) {
+ if (per_cpu(thread_imc_mem, i))
+ free_pages((u64)per_cpu(thread_imc_mem, i), order);
+
+ }
+}
+
+static void cleanup_all_trace_imc_memory(void)
+{
+ int i, order = get_order(trace_imc_mem_size);
+
+ for_each_online_cpu(i) {
+ if (per_cpu(trace_imc_mem, i))
+ free_pages((u64)per_cpu(trace_imc_mem, i), order);
+
+ }
+ kfree(trace_imc_refc);
+}
+
+/* Function to free the attr_groups which are dynamically allocated */
+static void imc_common_mem_free(struct imc_pmu *pmu_ptr)
+{
+ if (pmu_ptr->attr_groups[IMC_EVENT_ATTR])
+ kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs);
+ kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]);
+}
+
+/*
+ * Common function to unregister cpu hotplug callback and
+ * free the memory.
+ * TODO: Need to handle pmu unregistering, which will be
+ * done in followup series.
+ */
+static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
+{
+ if (pmu_ptr->domain == IMC_DOMAIN_NEST) {
+ mutex_lock(&nest_init_lock);
+ if (nest_pmus == 1) {
+ cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE);
+ kfree(nest_imc_refc);
+ kfree(per_nest_pmu_arr);
+ per_nest_pmu_arr = NULL;
+ }
+
+ if (nest_pmus > 0)
+ nest_pmus--;
+ mutex_unlock(&nest_init_lock);
+ }
+
+ /* Free core_imc memory */
+ if (pmu_ptr->domain == IMC_DOMAIN_CORE) {
+ cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE);
+ cleanup_all_core_imc_memory();
+ }
+
+ /* Free thread_imc memory */
+ if (pmu_ptr->domain == IMC_DOMAIN_THREAD) {
+ cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE);
+ cleanup_all_thread_imc_memory();
+ }
+
+ if (pmu_ptr->domain == IMC_DOMAIN_TRACE) {
+ cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE);
+ cleanup_all_trace_imc_memory();
+ }
+}
+
+/*
+ * Function to unregister thread-imc if core-imc
+ * is not registered.
+ */
+void unregister_thread_imc(void)
+{
+ imc_common_cpuhp_mem_free(thread_imc_pmu);
+ imc_common_mem_free(thread_imc_pmu);
+ perf_pmu_unregister(&thread_imc_pmu->pmu);
+}
+
+/*
+ * imc_mem_init : Function to support memory allocation for core imc.
+ */
+static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent,
+ int pmu_index)
+{
+ const char *s;
+ int nr_cores, cpu, res = -ENOMEM;
+
+ if (of_property_read_string(parent, "name", &s))
+ return -ENODEV;
+
+ switch (pmu_ptr->domain) {
+ case IMC_DOMAIN_NEST:
+ /* Update the pmu name */
+ pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, "%s%s_imc", "nest_", s);
+ if (!pmu_ptr->pmu.name)
+ goto err;
+
+ /* Needed for hotplug/migration */
+ if (!per_nest_pmu_arr) {
+ per_nest_pmu_arr = kcalloc(get_max_nest_dev() + 1,
+ sizeof(struct imc_pmu *),
+ GFP_KERNEL);
+ if (!per_nest_pmu_arr)
+ goto err;
+ }
+ per_nest_pmu_arr[pmu_index] = pmu_ptr;
+ break;
+ case IMC_DOMAIN_CORE:
+ /* Update the pmu name */
+ pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, "%s%s", s, "_imc");
+ if (!pmu_ptr->pmu.name)
+ goto err;
+
+ nr_cores = DIV_ROUND_UP(num_possible_cpus(), threads_per_core);
+ pmu_ptr->mem_info = kcalloc(nr_cores, sizeof(struct imc_mem_info),
+ GFP_KERNEL);
+
+ if (!pmu_ptr->mem_info)
+ goto err;
+
+ core_imc_refc = kcalloc(nr_cores, sizeof(struct imc_pmu_ref),
+ GFP_KERNEL);
+
+ if (!core_imc_refc) {
+ kfree(pmu_ptr->mem_info);
+ goto err;
+ }
+
+ core_imc_pmu = pmu_ptr;
+ break;
+ case IMC_DOMAIN_THREAD:
+ /* Update the pmu name */
+ pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, "%s%s", s, "_imc");
+ if (!pmu_ptr->pmu.name)
+ goto err;
+
+ thread_imc_mem_size = pmu_ptr->counter_mem_size;
+ for_each_online_cpu(cpu) {
+ res = thread_imc_mem_alloc(cpu, pmu_ptr->counter_mem_size);
+ if (res) {
+ cleanup_all_thread_imc_memory();
+ goto err;
+ }
+ }
+
+ thread_imc_pmu = pmu_ptr;
+ break;
+ case IMC_DOMAIN_TRACE:
+ /* Update the pmu name */
+ pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, "%s%s", s, "_imc");
+ if (!pmu_ptr->pmu.name)
+ return -ENOMEM;
+
+ nr_cores = DIV_ROUND_UP(num_possible_cpus(), threads_per_core);
+ trace_imc_refc = kcalloc(nr_cores, sizeof(struct imc_pmu_ref),
+ GFP_KERNEL);
+ if (!trace_imc_refc)
+ return -ENOMEM;
+
+ trace_imc_mem_size = pmu_ptr->counter_mem_size;
+ for_each_online_cpu(cpu) {
+ res = trace_imc_mem_alloc(cpu, trace_imc_mem_size);
+ if (res) {
+ cleanup_all_trace_imc_memory();
+ goto err;
+ }
+ }
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+err:
+ return res;
+}
+
+/*
+ * init_imc_pmu : Setup and register the IMC pmu device.
+ *
+ * @parent: Device tree unit node
+ * @pmu_ptr: memory allocated for this pmu
+ * @pmu_idx: Count of nest pmc registered
+ *
+ * init_imc_pmu() setup pmu cpumask and registers for a cpu hotplug callback.
+ * Handles failure cases and accordingly frees memory.
+ */
+int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_idx)
+{
+ int ret;
+
+ ret = imc_mem_init(pmu_ptr, parent, pmu_idx);
+ if (ret)
+ goto err_free_mem;
+
+ switch (pmu_ptr->domain) {
+ case IMC_DOMAIN_NEST:
+ /*
+ * Nest imc pmu need only one cpu per chip, we initialize the
+ * cpumask for the first nest imc pmu and use the same for the
+ * rest. To handle the cpuhotplug callback unregister, we track
+ * the number of nest pmus in "nest_pmus".
+ */
+ mutex_lock(&nest_init_lock);
+ if (nest_pmus == 0) {
+ ret = init_nest_pmu_ref();
+ if (ret) {
+ mutex_unlock(&nest_init_lock);
+ kfree(per_nest_pmu_arr);
+ per_nest_pmu_arr = NULL;
+ goto err_free_mem;
+ }
+ /* Register for cpu hotplug notification. */
+ ret = nest_pmu_cpumask_init();
+ if (ret) {
+ mutex_unlock(&nest_init_lock);
+ kfree(nest_imc_refc);
+ kfree(per_nest_pmu_arr);
+ per_nest_pmu_arr = NULL;
+ goto err_free_mem;
+ }
+ }
+ nest_pmus++;
+ mutex_unlock(&nest_init_lock);
+ break;
+ case IMC_DOMAIN_CORE:
+ ret = core_imc_pmu_cpumask_init();
+ if (ret) {
+ cleanup_all_core_imc_memory();
+ goto err_free_mem;
+ }
+
+ break;
+ case IMC_DOMAIN_THREAD:
+ ret = thread_imc_cpu_init();
+ if (ret) {
+ cleanup_all_thread_imc_memory();
+ goto err_free_mem;
+ }
+
+ break;
+ case IMC_DOMAIN_TRACE:
+ ret = trace_imc_cpu_init();
+ if (ret) {
+ cleanup_all_trace_imc_memory();
+ goto err_free_mem;
+ }
+
+ break;
+ default:
+ return -EINVAL; /* Unknown domain */
+ }
+
+ ret = update_events_in_group(parent, pmu_ptr);
+ if (ret)
+ goto err_free_cpuhp_mem;
+
+ ret = update_pmu_ops(pmu_ptr);
+ if (ret)
+ goto err_free_cpuhp_mem;
+
+ ret = perf_pmu_register(&pmu_ptr->pmu, pmu_ptr->pmu.name, -1);
+ if (ret)
+ goto err_free_cpuhp_mem;
+
+ pr_debug("%s performance monitor hardware support registered\n",
+ pmu_ptr->pmu.name);
+
+ return 0;
+
+err_free_cpuhp_mem:
+ imc_common_cpuhp_mem_free(pmu_ptr);
+err_free_mem:
+ imc_common_mem_free(pmu_ptr);
+ return ret;
+}
diff --git a/arch/powerpc/perf/internal.h b/arch/powerpc/perf/internal.h
new file mode 100644
index 000000000000..a70ac471a5a5
--- /dev/null
+++ b/arch/powerpc/perf/internal.h
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Copyright 2019 Madhavan Srinivasan, IBM Corporation.
+
+int __init init_ppc970_pmu(void);
+int __init init_power5_pmu(void);
+int __init init_power5p_pmu(void);
+int __init init_power6_pmu(void);
+int __init init_power7_pmu(void);
+int __init init_power8_pmu(void);
+int __init init_power9_pmu(void);
+int __init init_power10_pmu(void);
+int __init init_power11_pmu(void);
+int __init init_generic_compat_pmu(void);
diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c
new file mode 100644
index 000000000000..2b3547fdba4a
--- /dev/null
+++ b/arch/powerpc/perf/isa207-common.c
@@ -0,0 +1,852 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Common Performance counter support functions for PowerISA v2.07 processors.
+ *
+ * Copyright 2009 Paul Mackerras, IBM Corporation.
+ * Copyright 2013 Michael Ellerman, IBM Corporation.
+ * Copyright 2016 Madhavan Srinivasan, IBM Corporation.
+ */
+#include "isa207-common.h"
+
+PMU_FORMAT_ATTR(event, "config:0-49");
+PMU_FORMAT_ATTR(pmcxsel, "config:0-7");
+PMU_FORMAT_ATTR(mark, "config:8");
+PMU_FORMAT_ATTR(combine, "config:11");
+PMU_FORMAT_ATTR(unit, "config:12-15");
+PMU_FORMAT_ATTR(pmc, "config:16-19");
+PMU_FORMAT_ATTR(cache_sel, "config:20-23");
+PMU_FORMAT_ATTR(sample_mode, "config:24-28");
+PMU_FORMAT_ATTR(thresh_sel, "config:29-31");
+PMU_FORMAT_ATTR(thresh_stop, "config:32-35");
+PMU_FORMAT_ATTR(thresh_start, "config:36-39");
+PMU_FORMAT_ATTR(thresh_cmp, "config:40-49");
+
+static struct attribute *isa207_pmu_format_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_pmcxsel.attr,
+ &format_attr_mark.attr,
+ &format_attr_combine.attr,
+ &format_attr_unit.attr,
+ &format_attr_pmc.attr,
+ &format_attr_cache_sel.attr,
+ &format_attr_sample_mode.attr,
+ &format_attr_thresh_sel.attr,
+ &format_attr_thresh_stop.attr,
+ &format_attr_thresh_start.attr,
+ &format_attr_thresh_cmp.attr,
+ NULL,
+};
+
+const struct attribute_group isa207_pmu_format_group = {
+ .name = "format",
+ .attrs = isa207_pmu_format_attr,
+};
+
+static inline bool event_is_fab_match(u64 event)
+{
+ /* Only check pmc, unit and pmcxsel, ignore the edge bit (0) */
+ event &= 0xff0fe;
+
+ /* PM_MRK_FAB_RSP_MATCH & PM_MRK_FAB_RSP_MATCH_CYC */
+ return (event == 0x30056 || event == 0x4f052);
+}
+
+static bool is_event_valid(u64 event)
+{
+ u64 valid_mask = EVENT_VALID_MASK;
+
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ valid_mask = p10_EVENT_VALID_MASK;
+ else if (cpu_has_feature(CPU_FTR_ARCH_300))
+ valid_mask = p9_EVENT_VALID_MASK;
+
+ return !(event & ~valid_mask);
+}
+
+static inline bool is_event_marked(u64 event)
+{
+ if (event & EVENT_IS_MARKED)
+ return true;
+
+ return false;
+}
+
+static unsigned long sdar_mod_val(u64 event)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ return p10_SDAR_MODE(event);
+
+ return p9_SDAR_MODE(event);
+}
+
+static void mmcra_sdar_mode(u64 event, unsigned long *mmcra)
+{
+ /*
+ * MMCRA[SDAR_MODE] specifies how the SDAR should be updated in
+ * continuous sampling mode.
+ *
+ * Incase of Power8:
+ * MMCRA[SDAR_MODE] will be programmed as "0b01" for continuous sampling
+ * mode and will be un-changed when setting MMCRA[63] (Marked events).
+ *
+ * Incase of Power9/power10:
+ * Marked event: MMCRA[SDAR_MODE] will be set to 0b00 ('No Updates'),
+ * or if group already have any marked events.
+ * For rest
+ * MMCRA[SDAR_MODE] will be set from event code.
+ * If sdar_mode from event is zero, default to 0b01. Hardware
+ * requires that we set a non-zero value.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ if (is_event_marked(event) || (*mmcra & MMCRA_SAMPLE_ENABLE))
+ *mmcra &= MMCRA_SDAR_MODE_NO_UPDATES;
+ else if (sdar_mod_val(event))
+ *mmcra |= sdar_mod_val(event) << MMCRA_SDAR_MODE_SHIFT;
+ else
+ *mmcra |= MMCRA_SDAR_MODE_DCACHE;
+ } else
+ *mmcra |= MMCRA_SDAR_MODE_TLB;
+}
+
+static int p10_thresh_cmp_val(u64 value)
+{
+ int exp = 0;
+ u64 result = value;
+
+ if (!value)
+ return value;
+
+ /*
+ * Incase of P10, thresh_cmp value is not part of raw event code
+ * and provided via attr.config1 parameter. To program threshold in MMCRA,
+ * take a 18 bit number N and shift right 2 places and increment
+ * the exponent E by 1 until the upper 10 bits of N are zero.
+ * Write E to the threshold exponent and write the lower 8 bits of N
+ * to the threshold mantissa.
+ * The max threshold that can be written is 261120.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ if (value > 261120)
+ value = 261120;
+ while ((64 - __builtin_clzl(value)) > 8) {
+ exp++;
+ value >>= 2;
+ }
+
+ /*
+ * Note that it is invalid to write a mantissa with the
+ * upper 2 bits of mantissa being zero, unless the
+ * exponent is also zero.
+ */
+ if (!(value & 0xC0) && exp)
+ result = -1;
+ else
+ result = (exp << 8) | value;
+ }
+ return result;
+}
+
+static u64 thresh_cmp_val(u64 value)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ value = p10_thresh_cmp_val(value);
+
+ /*
+ * Since location of threshold compare bits in MMCRA
+ * is different for p8, using different shift value.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ return value << p9_MMCRA_THR_CMP_SHIFT;
+ else
+ return value << MMCRA_THR_CMP_SHIFT;
+}
+
+static unsigned long combine_from_event(u64 event)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ return p9_EVENT_COMBINE(event);
+
+ return EVENT_COMBINE(event);
+}
+
+static unsigned long combine_shift(unsigned long pmc)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ return p9_MMCR1_COMBINE_SHIFT(pmc);
+
+ return MMCR1_COMBINE_SHIFT(pmc);
+}
+
+static inline bool event_is_threshold(u64 event)
+{
+ return (event >> EVENT_THR_SEL_SHIFT) & EVENT_THR_SEL_MASK;
+}
+
+static bool is_thresh_cmp_valid(u64 event)
+{
+ unsigned int cmp, exp;
+
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ return p10_thresh_cmp_val(event) >= 0;
+
+ /*
+ * Check the mantissa upper two bits are not zero, unless the
+ * exponent is also zero. See the THRESH_CMP_MANTISSA doc.
+ */
+
+ cmp = (event >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK;
+ exp = cmp >> 7;
+
+ if (exp && (cmp & 0x60) == 0)
+ return false;
+
+ return true;
+}
+
+static unsigned int dc_ic_rld_quad_l1_sel(u64 event)
+{
+ unsigned int cache;
+
+ cache = (event >> EVENT_CACHE_SEL_SHIFT) & MMCR1_DC_IC_QUAL_MASK;
+ return cache;
+}
+
+static inline u64 isa207_find_source(u64 idx, u32 sub_idx)
+{
+ u64 ret = PERF_MEM_NA;
+
+ switch(idx) {
+ case 0:
+ /* Nothing to do */
+ break;
+ case 1:
+ ret = PH(LVL, L1) | LEVEL(L1) | P(SNOOP, HIT);
+ break;
+ case 2:
+ ret = PH(LVL, L2) | LEVEL(L2) | P(SNOOP, HIT);
+ break;
+ case 3:
+ ret = PH(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT);
+ break;
+ case 4:
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ ret = P(SNOOP, HIT);
+
+ if (sub_idx == 1)
+ ret |= PH(LVL, LOC_RAM) | LEVEL(RAM);
+ else if (sub_idx == 2 || sub_idx == 3)
+ ret |= P(LVL, HIT) | LEVEL(PMEM);
+ else if (sub_idx == 4)
+ ret |= PH(LVL, REM_RAM1) | REM | LEVEL(RAM) | P(HOPS, 2);
+ else if (sub_idx == 5 || sub_idx == 7)
+ ret |= P(LVL, HIT) | LEVEL(PMEM) | REM;
+ else if (sub_idx == 6)
+ ret |= PH(LVL, REM_RAM2) | REM | LEVEL(RAM) | P(HOPS, 3);
+ } else {
+ if (sub_idx <= 1)
+ ret = PH(LVL, LOC_RAM);
+ else if (sub_idx > 1 && sub_idx <= 2)
+ ret = PH(LVL, REM_RAM1);
+ else
+ ret = PH(LVL, REM_RAM2);
+ ret |= P(SNOOP, HIT);
+ }
+ break;
+ case 5:
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ ret = REM | P(HOPS, 0);
+
+ if (sub_idx == 0 || sub_idx == 4)
+ ret |= PH(LVL, L2) | LEVEL(L2) | P(SNOOP, HIT);
+ else if (sub_idx == 1 || sub_idx == 5)
+ ret |= PH(LVL, L2) | LEVEL(L2) | P(SNOOP, HITM);
+ else if (sub_idx == 2 || sub_idx == 6)
+ ret |= PH(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT);
+ else if (sub_idx == 3 || sub_idx == 7)
+ ret |= PH(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
+ } else {
+ if (sub_idx == 0)
+ ret = PH(LVL, L2) | LEVEL(L2) | REM | P(SNOOP, HIT) | P(HOPS, 0);
+ else if (sub_idx == 1)
+ ret = PH(LVL, L2) | LEVEL(L2) | REM | P(SNOOP, HITM) | P(HOPS, 0);
+ else if (sub_idx == 2 || sub_idx == 4)
+ ret = PH(LVL, L3) | LEVEL(L3) | REM | P(SNOOP, HIT) | P(HOPS, 0);
+ else if (sub_idx == 3 || sub_idx == 5)
+ ret = PH(LVL, L3) | LEVEL(L3) | REM | P(SNOOP, HITM) | P(HOPS, 0);
+ }
+ break;
+ case 6:
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ if (sub_idx == 0)
+ ret = PH(LVL, REM_CCE1) | LEVEL(ANY_CACHE) | REM |
+ P(SNOOP, HIT) | P(HOPS, 2);
+ else if (sub_idx == 1)
+ ret = PH(LVL, REM_CCE1) | LEVEL(ANY_CACHE) | REM |
+ P(SNOOP, HITM) | P(HOPS, 2);
+ else if (sub_idx == 2)
+ ret = PH(LVL, REM_CCE2) | LEVEL(ANY_CACHE) | REM |
+ P(SNOOP, HIT) | P(HOPS, 3);
+ else if (sub_idx == 3)
+ ret = PH(LVL, REM_CCE2) | LEVEL(ANY_CACHE) | REM |
+ P(SNOOP, HITM) | P(HOPS, 3);
+ } else {
+ ret = PH(LVL, REM_CCE2);
+ if (sub_idx == 0 || sub_idx == 2)
+ ret |= P(SNOOP, HIT);
+ else if (sub_idx == 1 || sub_idx == 3)
+ ret |= P(SNOOP, HITM);
+ }
+ break;
+ case 7:
+ ret = PM(LVL, L1);
+ break;
+ }
+
+ return ret;
+}
+
+void isa207_get_mem_data_src(union perf_mem_data_src *dsrc, u32 flags,
+ struct pt_regs *regs)
+{
+ u64 idx;
+ u32 sub_idx;
+ u64 sier;
+ u64 val;
+
+ /* Skip if no SIER support */
+ if (!(flags & PPMU_HAS_SIER)) {
+ dsrc->val = 0;
+ return;
+ }
+
+ /*
+ * Use regs-dar for SPRN_SIER which is saved
+ * during perf_read_regs at the beginning
+ * of the PMU interrupt handler to avoid multiple
+ * reads of SPRN_SIER
+ */
+ sier = regs->dar;
+ val = (sier & ISA207_SIER_TYPE_MASK) >> ISA207_SIER_TYPE_SHIFT;
+ if (val != 1 && val != 2 && !(val == 7 && cpu_has_feature(CPU_FTR_ARCH_31))) {
+ dsrc->val = 0;
+ return;
+ }
+
+ idx = (sier & ISA207_SIER_LDST_MASK) >> ISA207_SIER_LDST_SHIFT;
+ sub_idx = (sier & ISA207_SIER_DATA_SRC_MASK) >> ISA207_SIER_DATA_SRC_SHIFT;
+
+ dsrc->val = isa207_find_source(idx, sub_idx);
+ if (val == 7) {
+ u64 mmcra;
+ u32 op_type;
+
+ /*
+ * Type 0b111 denotes either larx or stcx instruction. Use the
+ * MMCRA sampling bits [57:59] along with the type value
+ * to determine the exact instruction type. If the sampling
+ * criteria is neither load or store, set the type as default
+ * to NA.
+ *
+ * Use regs->dsisr for MMCRA which is saved during perf_read_regs
+ * at the beginning of the PMU interrupt handler to avoid
+ * multiple reads of SPRN_MMCRA
+ */
+ mmcra = regs->dsisr;
+
+ op_type = (mmcra >> MMCRA_SAMP_ELIG_SHIFT) & MMCRA_SAMP_ELIG_MASK;
+ switch (op_type) {
+ case 5:
+ dsrc->val |= P(OP, LOAD);
+ break;
+ case 7:
+ dsrc->val |= P(OP, STORE);
+ break;
+ default:
+ dsrc->val |= P(OP, NA);
+ break;
+ }
+ } else {
+ dsrc->val |= (val == 1) ? P(OP, LOAD) : P(OP, STORE);
+ }
+}
+
+void isa207_get_mem_weight(u64 *weight, u64 type)
+{
+ union perf_sample_weight *weight_fields;
+ u64 weight_lat;
+ u64 mmcra = mfspr(SPRN_MMCRA);
+ u64 exp = MMCRA_THR_CTR_EXP(mmcra);
+ u64 mantissa = MMCRA_THR_CTR_MANT(mmcra);
+ u64 sier = mfspr(SPRN_SIER);
+ u64 val = (sier & ISA207_SIER_TYPE_MASK) >> ISA207_SIER_TYPE_SHIFT;
+
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ mantissa = P10_MMCRA_THR_CTR_MANT(mmcra);
+
+ if (val == 0 || (val == 7 && !cpu_has_feature(CPU_FTR_ARCH_31)))
+ weight_lat = 0;
+ else
+ weight_lat = mantissa << (2 * exp);
+
+ /*
+ * Use 64 bit weight field (full) if sample type is
+ * WEIGHT.
+ *
+ * if sample type is WEIGHT_STRUCT:
+ * - store memory latency in the lower 32 bits.
+ * - For ISA v3.1, use remaining two 16 bit fields of
+ * perf_sample_weight to store cycle counter values
+ * from sier2.
+ */
+ weight_fields = (union perf_sample_weight *)weight;
+ if (type & PERF_SAMPLE_WEIGHT)
+ weight_fields->full = weight_lat;
+ else {
+ weight_fields->var1_dw = (u32)weight_lat;
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ weight_fields->var2_w = P10_SIER2_FINISH_CYC(mfspr(SPRN_SIER2));
+ weight_fields->var3_w = P10_SIER2_DISPATCH_CYC(mfspr(SPRN_SIER2));
+ }
+ }
+}
+
+int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp, u64 event_config1)
+{
+ unsigned int unit, pmc, cache, ebb;
+ unsigned long mask, value;
+
+ mask = value = 0;
+
+ if (!is_event_valid(event))
+ return -1;
+
+ pmc = (event >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK;
+ unit = (event >> EVENT_UNIT_SHIFT) & EVENT_UNIT_MASK;
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ cache = (event >> EVENT_CACHE_SEL_SHIFT) &
+ p10_EVENT_CACHE_SEL_MASK;
+ else
+ cache = (event >> EVENT_CACHE_SEL_SHIFT) &
+ EVENT_CACHE_SEL_MASK;
+ ebb = (event >> EVENT_EBB_SHIFT) & EVENT_EBB_MASK;
+
+ if (pmc) {
+ u64 base_event;
+
+ if (pmc > 6)
+ return -1;
+
+ /* Ignore Linux defined bits when checking event below */
+ base_event = event & ~EVENT_LINUX_MASK;
+
+ if (pmc >= 5 && base_event != 0x500fa &&
+ base_event != 0x600f4)
+ return -1;
+
+ mask |= CNST_PMC_MASK(pmc);
+ value |= CNST_PMC_VAL(pmc);
+
+ /*
+ * PMC5 and PMC6 are used to count cycles and instructions and
+ * they do not support most of the constraint bits. Add a check
+ * to exclude PMC5/6 from most of the constraints except for
+ * EBB/BHRB.
+ */
+ if (pmc >= 5)
+ goto ebb_bhrb;
+ }
+
+ if (pmc <= 4) {
+ /*
+ * Add to number of counters in use. Note this includes events with
+ * a PMC of 0 - they still need a PMC, it's just assigned later.
+ * Don't count events on PMC 5 & 6, there is only one valid event
+ * on each of those counters, and they are handled above.
+ */
+ mask |= CNST_NC_MASK;
+ value |= CNST_NC_VAL;
+ }
+
+ if (unit >= 6 && unit <= 9) {
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ if (unit == 6) {
+ mask |= CNST_L2L3_GROUP_MASK;
+ value |= CNST_L2L3_GROUP_VAL(event >> p10_L2L3_EVENT_SHIFT);
+ }
+ } else if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ mask |= CNST_CACHE_GROUP_MASK;
+ value |= CNST_CACHE_GROUP_VAL(event & 0xff);
+
+ mask |= CNST_CACHE_PMC4_MASK;
+ if (pmc == 4)
+ value |= CNST_CACHE_PMC4_VAL;
+ } else if (cache & 0x7) {
+ /*
+ * L2/L3 events contain a cache selector field, which is
+ * supposed to be programmed into MMCRC. However MMCRC is only
+ * HV writable, and there is no API for guest kernels to modify
+ * it. The solution is for the hypervisor to initialise the
+ * field to zeroes, and for us to only ever allow events that
+ * have a cache selector of zero. The bank selector (bit 3) is
+ * irrelevant, as long as the rest of the value is 0.
+ */
+ return -1;
+ }
+
+ } else if (cpu_has_feature(CPU_FTR_ARCH_300) || (event & EVENT_IS_L1)) {
+ mask |= CNST_L1_QUAL_MASK;
+ value |= CNST_L1_QUAL_VAL(cache);
+ }
+
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ mask |= CNST_RADIX_SCOPE_GROUP_MASK;
+ value |= CNST_RADIX_SCOPE_GROUP_VAL(event >> p10_EVENT_RADIX_SCOPE_QUAL_SHIFT);
+ }
+
+ if (is_event_marked(event)) {
+ mask |= CNST_SAMPLE_MASK;
+ value |= CNST_SAMPLE_VAL(event >> EVENT_SAMPLE_SHIFT);
+ }
+
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ if (event_is_threshold(event) && is_thresh_cmp_valid(event_config1)) {
+ mask |= CNST_THRESH_CTL_SEL_MASK;
+ value |= CNST_THRESH_CTL_SEL_VAL(event >> EVENT_THRESH_SHIFT);
+ mask |= p10_CNST_THRESH_CMP_MASK;
+ value |= p10_CNST_THRESH_CMP_VAL(p10_thresh_cmp_val(event_config1));
+ } else if (event_is_threshold(event))
+ return -1;
+ } else if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ if (event_is_threshold(event) && is_thresh_cmp_valid(event)) {
+ mask |= CNST_THRESH_MASK;
+ value |= CNST_THRESH_VAL(event >> EVENT_THRESH_SHIFT);
+ } else if (event_is_threshold(event))
+ return -1;
+ } else {
+ /*
+ * Special case for PM_MRK_FAB_RSP_MATCH and PM_MRK_FAB_RSP_MATCH_CYC,
+ * the threshold control bits are used for the match value.
+ */
+ if (event_is_fab_match(event)) {
+ mask |= CNST_FAB_MATCH_MASK;
+ value |= CNST_FAB_MATCH_VAL(event >> EVENT_THR_CTL_SHIFT);
+ } else {
+ if (!is_thresh_cmp_valid(event))
+ return -1;
+
+ mask |= CNST_THRESH_MASK;
+ value |= CNST_THRESH_VAL(event >> EVENT_THRESH_SHIFT);
+ }
+ }
+
+ebb_bhrb:
+ if (!pmc && ebb)
+ /* EBB events must specify the PMC */
+ return -1;
+
+ if (event & EVENT_WANTS_BHRB) {
+ if (!ebb)
+ /* Only EBB events can request BHRB */
+ return -1;
+
+ mask |= CNST_IFM_MASK;
+ value |= CNST_IFM_VAL(event >> EVENT_IFM_SHIFT);
+ }
+
+ /*
+ * All events must agree on EBB, either all request it or none.
+ * EBB events are pinned & exclusive, so this should never actually
+ * hit, but we leave it as a fallback in case.
+ */
+ mask |= CNST_EBB_MASK;
+ value |= CNST_EBB_VAL(ebb);
+
+ *maskp = mask;
+ *valp = value;
+
+ return 0;
+}
+
+int isa207_compute_mmcr(u64 event[], int n_ev,
+ unsigned int hwc[], struct mmcr_regs *mmcr,
+ struct perf_event *pevents[], u32 flags)
+{
+ unsigned long mmcra, mmcr1, mmcr2, unit, combine, psel, cache, val;
+ unsigned long mmcr3;
+ unsigned int pmc, pmc_inuse;
+ int i;
+
+ pmc_inuse = 0;
+
+ /* First pass to count resource use */
+ for (i = 0; i < n_ev; ++i) {
+ pmc = (event[i] >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK;
+ if (pmc)
+ pmc_inuse |= 1 << pmc;
+ }
+
+ mmcra = mmcr1 = mmcr2 = mmcr3 = 0;
+
+ /*
+ * Disable bhrb unless explicitly requested
+ * by setting MMCRA (BHRBRD) bit.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ mmcra |= MMCRA_BHRB_DISABLE;
+
+ /* Second pass: assign PMCs, set all MMCR1 fields */
+ for (i = 0; i < n_ev; ++i) {
+ pmc = (event[i] >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK;
+ unit = (event[i] >> EVENT_UNIT_SHIFT) & EVENT_UNIT_MASK;
+ combine = combine_from_event(event[i]);
+ psel = event[i] & EVENT_PSEL_MASK;
+
+ if (!pmc) {
+ for (pmc = 1; pmc <= 4; ++pmc) {
+ if (!(pmc_inuse & (1 << pmc)))
+ break;
+ }
+
+ pmc_inuse |= 1 << pmc;
+ }
+
+ if (pmc <= 4) {
+ mmcr1 |= unit << MMCR1_UNIT_SHIFT(pmc);
+ mmcr1 |= combine << combine_shift(pmc);
+ mmcr1 |= psel << MMCR1_PMCSEL_SHIFT(pmc);
+ }
+
+ /* In continuous sampling mode, update SDAR on TLB miss */
+ mmcra_sdar_mode(event[i], &mmcra);
+
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ cache = dc_ic_rld_quad_l1_sel(event[i]);
+ mmcr1 |= (cache) << MMCR1_DC_IC_QUAL_SHIFT;
+ } else {
+ if (event[i] & EVENT_IS_L1) {
+ cache = dc_ic_rld_quad_l1_sel(event[i]);
+ mmcr1 |= (cache) << MMCR1_DC_IC_QUAL_SHIFT;
+ }
+ }
+
+ /* Set RADIX_SCOPE_QUAL bit */
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ val = (event[i] >> p10_EVENT_RADIX_SCOPE_QUAL_SHIFT) &
+ p10_EVENT_RADIX_SCOPE_QUAL_MASK;
+ mmcr1 |= val << p10_MMCR1_RADIX_SCOPE_QUAL_SHIFT;
+ }
+
+ if (is_event_marked(event[i])) {
+ mmcra |= MMCRA_SAMPLE_ENABLE;
+
+ val = (event[i] >> EVENT_SAMPLE_SHIFT) & EVENT_SAMPLE_MASK;
+ if (val) {
+ mmcra |= (val & 3) << MMCRA_SAMP_MODE_SHIFT;
+ mmcra |= (val >> 2) << MMCRA_SAMP_ELIG_SHIFT;
+ }
+ }
+
+ /*
+ * PM_MRK_FAB_RSP_MATCH and PM_MRK_FAB_RSP_MATCH_CYC,
+ * the threshold bits are used for the match value.
+ */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300) && event_is_fab_match(event[i])) {
+ mmcr1 |= ((event[i] >> EVENT_THR_CTL_SHIFT) &
+ EVENT_THR_CTL_MASK) << MMCR1_FAB_SHIFT;
+ } else {
+ val = (event[i] >> EVENT_THR_CTL_SHIFT) & EVENT_THR_CTL_MASK;
+ mmcra |= val << MMCRA_THR_CTL_SHIFT;
+ val = (event[i] >> EVENT_THR_SEL_SHIFT) & EVENT_THR_SEL_MASK;
+ mmcra |= val << MMCRA_THR_SEL_SHIFT;
+ if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
+ val = (event[i] >> EVENT_THR_CMP_SHIFT) &
+ EVENT_THR_CMP_MASK;
+ mmcra |= thresh_cmp_val(val);
+ } else if (flags & PPMU_HAS_ATTR_CONFIG1) {
+ val = (pevents[i]->attr.config1 >> p10_EVENT_THR_CMP_SHIFT) &
+ p10_EVENT_THR_CMP_MASK;
+ mmcra |= thresh_cmp_val(val);
+ }
+ }
+
+ if (cpu_has_feature(CPU_FTR_ARCH_31) && (unit == 6)) {
+ val = (event[i] >> p10_L2L3_EVENT_SHIFT) &
+ p10_EVENT_L2L3_SEL_MASK;
+ mmcr2 |= val << p10_L2L3_SEL_SHIFT;
+ }
+
+ if (event[i] & EVENT_WANTS_BHRB) {
+ val = (event[i] >> EVENT_IFM_SHIFT) & EVENT_IFM_MASK;
+ mmcra |= val << MMCRA_IFM_SHIFT;
+ }
+
+ /* set MMCRA (BHRBRD) to 0 if there is user request for BHRB */
+ if (cpu_has_feature(CPU_FTR_ARCH_31) &&
+ (has_branch_stack(pevents[i]) || (event[i] & EVENT_WANTS_BHRB)))
+ mmcra &= ~MMCRA_BHRB_DISABLE;
+
+ if (pevents[i]->attr.exclude_user)
+ mmcr2 |= MMCR2_FCP(pmc);
+
+ if (pevents[i]->attr.exclude_hv)
+ mmcr2 |= MMCR2_FCH(pmc);
+
+ if (pevents[i]->attr.exclude_kernel) {
+ if (cpu_has_feature(CPU_FTR_HVMODE))
+ mmcr2 |= MMCR2_FCH(pmc);
+ else
+ mmcr2 |= MMCR2_FCS(pmc);
+ }
+
+ if (pevents[i]->attr.exclude_idle)
+ mmcr2 |= MMCR2_FCWAIT(pmc);
+
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ if (pmc <= 4) {
+ val = (event[i] >> p10_EVENT_MMCR3_SHIFT) &
+ p10_EVENT_MMCR3_MASK;
+ mmcr3 |= val << MMCR3_SHIFT(pmc);
+ }
+ }
+
+ hwc[i] = pmc - 1;
+ }
+
+ /* Return MMCRx values */
+ mmcr->mmcr0 = 0;
+
+ /* pmc_inuse is 1-based */
+ if (pmc_inuse & 2)
+ mmcr->mmcr0 = MMCR0_PMC1CE;
+
+ if (pmc_inuse & 0x7c)
+ mmcr->mmcr0 |= MMCR0_PMCjCE;
+
+ /* If we're not using PMC 5 or 6, freeze them */
+ if (!(pmc_inuse & 0x60))
+ mmcr->mmcr0 |= MMCR0_FC56;
+
+ /*
+ * Set mmcr0 (PMCCEXT) for p10 which
+ * will restrict access to group B registers
+ * when MMCR0 PMCC=0b00.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ mmcr->mmcr0 |= MMCR0_PMCCEXT;
+
+ mmcr->mmcr1 = mmcr1;
+ mmcr->mmcra = mmcra;
+ mmcr->mmcr2 = mmcr2;
+ mmcr->mmcr3 = mmcr3;
+
+ return 0;
+}
+
+void isa207_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr)
+{
+ if (pmc <= 3)
+ mmcr->mmcr1 &= ~(0xffUL << MMCR1_PMCSEL_SHIFT(pmc + 1));
+}
+
+static int find_alternative(u64 event, const unsigned int ev_alt[][MAX_ALT], int size)
+{
+ int i, j;
+
+ for (i = 0; i < size; ++i) {
+ if (event < ev_alt[i][0])
+ break;
+
+ for (j = 0; j < MAX_ALT && ev_alt[i][j]; ++j)
+ if (event == ev_alt[i][j])
+ return i;
+ }
+
+ return -1;
+}
+
+int isa207_get_alternatives(u64 event, u64 alt[], int size, unsigned int flags,
+ const unsigned int ev_alt[][MAX_ALT])
+{
+ int i, j, num_alt = 0;
+ u64 alt_event;
+
+ alt[num_alt++] = event;
+ i = find_alternative(event, ev_alt, size);
+ if (i >= 0) {
+ /* Filter out the original event, it's already in alt[0] */
+ for (j = 0; j < MAX_ALT; ++j) {
+ alt_event = ev_alt[i][j];
+ if (alt_event && alt_event != event)
+ alt[num_alt++] = alt_event;
+ }
+ }
+
+ if (flags & PPMU_ONLY_COUNT_RUN) {
+ /*
+ * We're only counting in RUN state, so PM_CYC is equivalent to
+ * PM_RUN_CYC and PM_INST_CMPL === PM_RUN_INST_CMPL.
+ */
+ j = num_alt;
+ for (i = 0; i < num_alt; ++i) {
+ switch (alt[i]) {
+ case 0x1e: /* PMC_CYC */
+ alt[j++] = 0x600f4; /* PM_RUN_CYC */
+ break;
+ case 0x600f4:
+ alt[j++] = 0x1e;
+ break;
+ case 0x2: /* PM_INST_CMPL */
+ alt[j++] = 0x500fa; /* PM_RUN_INST_CMPL */
+ break;
+ case 0x500fa:
+ alt[j++] = 0x2;
+ break;
+ }
+ }
+ num_alt = j;
+ }
+
+ return num_alt;
+}
+
+int isa3XX_check_attr_config(struct perf_event *ev)
+{
+ u64 val, sample_mode;
+ u64 event = ev->attr.config;
+
+ val = (event >> EVENT_SAMPLE_SHIFT) & EVENT_SAMPLE_MASK;
+ sample_mode = val & 0x3;
+
+ /*
+ * MMCRA[61:62] is Random Sampling Mode (SM).
+ * value of 0b11 is reserved.
+ */
+ if (sample_mode == 0x3)
+ return -EINVAL;
+
+ /*
+ * Check for all reserved value
+ * Source: Performance Monitoring Unit User Guide
+ */
+ switch (val) {
+ case 0x5:
+ case 0x9:
+ case 0xD:
+ case 0x19:
+ case 0x1D:
+ case 0x1A:
+ case 0x1E:
+ return -EINVAL;
+ }
+
+ /*
+ * MMCRA[48:51]/[52:55]) Threshold Start/Stop
+ * Events Selection.
+ * 0b11110000/0b00001111 is reserved.
+ */
+ val = (event >> EVENT_THR_CTL_SHIFT) & EVENT_THR_CTL_MASK;
+ if (((val & 0xF0) == 0xF0) || ((val & 0xF) == 0xF))
+ return -EINVAL;
+
+ return 0;
+}
diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h
new file mode 100644
index 000000000000..f594fa6580d1
--- /dev/null
+++ b/arch/powerpc/perf/isa207-common.h
@@ -0,0 +1,293 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2009 Paul Mackerras, IBM Corporation.
+ * Copyright 2013 Michael Ellerman, IBM Corporation.
+ * Copyright 2016 Madhavan Srinivasan, IBM Corporation.
+ */
+
+#ifndef _LINUX_POWERPC_PERF_ISA207_COMMON_H_
+#define _LINUX_POWERPC_PERF_ISA207_COMMON_H_
+
+#include <linux/kernel.h>
+#include <linux/perf_event.h>
+#include <asm/firmware.h>
+#include <asm/cputable.h>
+
+#include "internal.h"
+
+#define EVENT_EBB_MASK 1ull
+#define EVENT_EBB_SHIFT PERF_EVENT_CONFIG_EBB_SHIFT
+#define EVENT_BHRB_MASK 1ull
+#define EVENT_BHRB_SHIFT 62
+#define EVENT_WANTS_BHRB (EVENT_BHRB_MASK << EVENT_BHRB_SHIFT)
+#define EVENT_IFM_MASK 3ull
+#define EVENT_IFM_SHIFT 60
+#define EVENT_THR_CMP_SHIFT 40 /* Threshold CMP value */
+#define EVENT_THR_CMP_MASK 0x3ff
+#define EVENT_THR_CTL_SHIFT 32 /* Threshold control value (start/stop) */
+#define EVENT_THR_CTL_MASK 0xffull
+#define EVENT_THR_SEL_SHIFT 29 /* Threshold select value */
+#define EVENT_THR_SEL_MASK 0x7
+#define EVENT_THRESH_SHIFT 29 /* All threshold bits */
+#define EVENT_THRESH_MASK 0x1fffffull
+#define EVENT_SAMPLE_SHIFT 24 /* Sampling mode & eligibility */
+#define EVENT_SAMPLE_MASK 0x1f
+#define EVENT_CACHE_SEL_SHIFT 20 /* L2/L3 cache select */
+#define EVENT_CACHE_SEL_MASK 0xf
+#define EVENT_IS_L1 (4 << EVENT_CACHE_SEL_SHIFT)
+#define EVENT_PMC_SHIFT 16 /* PMC number (1-based) */
+#define EVENT_PMC_MASK 0xf
+#define EVENT_UNIT_SHIFT 12 /* Unit */
+#define EVENT_UNIT_MASK 0xf
+#define EVENT_COMBINE_SHIFT 11 /* Combine bit */
+#define EVENT_COMBINE_MASK 0x1
+#define EVENT_COMBINE(v) (((v) >> EVENT_COMBINE_SHIFT) & EVENT_COMBINE_MASK)
+#define EVENT_MARKED_SHIFT 8 /* Marked bit */
+#define EVENT_MARKED_MASK 0x1
+#define EVENT_IS_MARKED (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT)
+#define EVENT_PSEL_MASK 0xff /* PMCxSEL value */
+
+/* Bits defined by Linux */
+#define EVENT_LINUX_MASK \
+ ((EVENT_EBB_MASK << EVENT_EBB_SHIFT) | \
+ (EVENT_BHRB_MASK << EVENT_BHRB_SHIFT) | \
+ (EVENT_IFM_MASK << EVENT_IFM_SHIFT))
+
+#define EVENT_VALID_MASK \
+ ((EVENT_THRESH_MASK << EVENT_THRESH_SHIFT) | \
+ (EVENT_SAMPLE_MASK << EVENT_SAMPLE_SHIFT) | \
+ (EVENT_CACHE_SEL_MASK << EVENT_CACHE_SEL_SHIFT) | \
+ (EVENT_PMC_MASK << EVENT_PMC_SHIFT) | \
+ (EVENT_UNIT_MASK << EVENT_UNIT_SHIFT) | \
+ (EVENT_COMBINE_MASK << EVENT_COMBINE_SHIFT) | \
+ (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT) | \
+ EVENT_LINUX_MASK | \
+ EVENT_PSEL_MASK)
+
+#define ONLY_PLM \
+ (PERF_SAMPLE_BRANCH_USER |\
+ PERF_SAMPLE_BRANCH_KERNEL |\
+ PERF_SAMPLE_BRANCH_HV)
+
+/* Contants to support power9 raw encoding format */
+#define p9_EVENT_COMBINE_SHIFT 10 /* Combine bit */
+#define p9_EVENT_COMBINE_MASK 0x3ull
+#define p9_EVENT_COMBINE(v) (((v) >> p9_EVENT_COMBINE_SHIFT) & p9_EVENT_COMBINE_MASK)
+#define p9_SDAR_MODE_SHIFT 50
+#define p9_SDAR_MODE_MASK 0x3ull
+#define p9_SDAR_MODE(v) (((v) >> p9_SDAR_MODE_SHIFT) & p9_SDAR_MODE_MASK)
+
+#define p9_EVENT_VALID_MASK \
+ ((p9_SDAR_MODE_MASK << p9_SDAR_MODE_SHIFT | \
+ (EVENT_THRESH_MASK << EVENT_THRESH_SHIFT) | \
+ (EVENT_SAMPLE_MASK << EVENT_SAMPLE_SHIFT) | \
+ (EVENT_CACHE_SEL_MASK << EVENT_CACHE_SEL_SHIFT) | \
+ (EVENT_PMC_MASK << EVENT_PMC_SHIFT) | \
+ (EVENT_UNIT_MASK << EVENT_UNIT_SHIFT) | \
+ (p9_EVENT_COMBINE_MASK << p9_EVENT_COMBINE_SHIFT) | \
+ (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT) | \
+ EVENT_LINUX_MASK | \
+ EVENT_PSEL_MASK))
+
+/* Contants to support power10 raw encoding format */
+#define p10_SDAR_MODE_SHIFT 22
+#define p10_SDAR_MODE_MASK 0x3ull
+#define p10_SDAR_MODE(v) (((v) >> p10_SDAR_MODE_SHIFT) & \
+ p10_SDAR_MODE_MASK)
+#define p10_EVENT_L2L3_SEL_MASK 0x1f
+#define p10_L2L3_SEL_SHIFT 3
+#define p10_L2L3_EVENT_SHIFT 40
+#define p10_EVENT_THRESH_MASK 0xffffull
+#define p10_EVENT_CACHE_SEL_MASK 0x3ull
+#define p10_EVENT_MMCR3_MASK 0x7fffull
+#define p10_EVENT_MMCR3_SHIFT 45
+#define p10_EVENT_RADIX_SCOPE_QUAL_SHIFT 9
+#define p10_EVENT_RADIX_SCOPE_QUAL_MASK 0x1
+#define p10_MMCR1_RADIX_SCOPE_QUAL_SHIFT 45
+
+/* Event Threshold Compare bit constant for power10 in config1 attribute */
+#define p10_EVENT_THR_CMP_SHIFT 0
+#define p10_EVENT_THR_CMP_MASK 0x3FFFFull
+
+#define p10_EVENT_VALID_MASK \
+ ((p10_SDAR_MODE_MASK << p10_SDAR_MODE_SHIFT | \
+ (p10_EVENT_THRESH_MASK << EVENT_THRESH_SHIFT) | \
+ (EVENT_SAMPLE_MASK << EVENT_SAMPLE_SHIFT) | \
+ (p10_EVENT_CACHE_SEL_MASK << EVENT_CACHE_SEL_SHIFT) | \
+ (EVENT_PMC_MASK << EVENT_PMC_SHIFT) | \
+ (EVENT_UNIT_MASK << EVENT_UNIT_SHIFT) | \
+ (p9_EVENT_COMBINE_MASK << p9_EVENT_COMBINE_SHIFT) | \
+ (p10_EVENT_MMCR3_MASK << p10_EVENT_MMCR3_SHIFT) | \
+ (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT) | \
+ (p10_EVENT_RADIX_SCOPE_QUAL_MASK << p10_EVENT_RADIX_SCOPE_QUAL_SHIFT) | \
+ EVENT_LINUX_MASK | \
+ EVENT_PSEL_MASK))
+/*
+ * Layout of constraint bits:
+ *
+ * 60 56 52 48 44 40 36 32
+ * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
+ * [ fab_match ] [ thresh_cmp ] [ thresh_ctl ] [ ]
+ * | |
+ * [ thresh_cmp bits for p10] thresh_sel -*
+ *
+ * 28 24 20 16 12 8 4 0
+ * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
+ * [ ] | [ ] | [ sample ] [ ] [6] [5] [4] [3] [2] [1]
+ * | | | | |
+ * BHRB IFM -* | | |*radix_scope | Count of events for each PMC.
+ * EBB -* | | p1, p2, p3, p4, p5, p6.
+ * L1 I/D qualifier -* |
+ * nc - number of counters -*
+ *
+ * The PMC fields P1..P6, and NC, are adder fields. As we accumulate constraints
+ * we want the low bit of each field to be added to any existing value.
+ *
+ * Everything else is a value field.
+ */
+
+#define CNST_FAB_MATCH_VAL(v) (((v) & EVENT_THR_CTL_MASK) << 56)
+#define CNST_FAB_MATCH_MASK CNST_FAB_MATCH_VAL(EVENT_THR_CTL_MASK)
+
+/* We just throw all the threshold bits into the constraint */
+#define CNST_THRESH_VAL(v) (((v) & EVENT_THRESH_MASK) << 32)
+#define CNST_THRESH_MASK CNST_THRESH_VAL(EVENT_THRESH_MASK)
+
+#define CNST_THRESH_CTL_SEL_VAL(v) (((v) & 0x7ffull) << 32)
+#define CNST_THRESH_CTL_SEL_MASK CNST_THRESH_CTL_SEL_VAL(0x7ff)
+
+#define p10_CNST_THRESH_CMP_VAL(v) (((v) & 0x7ffull) << 43)
+#define p10_CNST_THRESH_CMP_MASK p10_CNST_THRESH_CMP_VAL(0x7ff)
+
+#define CNST_EBB_VAL(v) (((v) & EVENT_EBB_MASK) << 24)
+#define CNST_EBB_MASK CNST_EBB_VAL(EVENT_EBB_MASK)
+
+#define CNST_IFM_VAL(v) (((v) & EVENT_IFM_MASK) << 25)
+#define CNST_IFM_MASK CNST_IFM_VAL(EVENT_IFM_MASK)
+
+#define CNST_L1_QUAL_VAL(v) (((v) & 3) << 22)
+#define CNST_L1_QUAL_MASK CNST_L1_QUAL_VAL(3)
+
+#define CNST_SAMPLE_VAL(v) (((v) & EVENT_SAMPLE_MASK) << 16)
+#define CNST_SAMPLE_MASK CNST_SAMPLE_VAL(EVENT_SAMPLE_MASK)
+
+#define CNST_CACHE_GROUP_VAL(v) (((v) & 0xffull) << 55)
+#define CNST_CACHE_GROUP_MASK CNST_CACHE_GROUP_VAL(0xff)
+#define CNST_CACHE_PMC4_VAL (1ull << 54)
+#define CNST_CACHE_PMC4_MASK CNST_CACHE_PMC4_VAL
+
+#define CNST_L2L3_GROUP_VAL(v) (((v) & 0x1full) << 55)
+#define CNST_L2L3_GROUP_MASK CNST_L2L3_GROUP_VAL(0x1f)
+
+#define CNST_RADIX_SCOPE_GROUP_VAL(v) (((v) & 0x1ull) << 21)
+#define CNST_RADIX_SCOPE_GROUP_MASK CNST_RADIX_SCOPE_GROUP_VAL(1)
+
+/*
+ * For NC we are counting up to 4 events. This requires three bits, and we need
+ * the fifth event to overflow and set the 4th bit. To achieve that we bias the
+ * fields by 3 in test_adder.
+ */
+#define CNST_NC_SHIFT 12
+#define CNST_NC_VAL (1 << CNST_NC_SHIFT)
+#define CNST_NC_MASK (8 << CNST_NC_SHIFT)
+#define ISA207_TEST_ADDER (3 << CNST_NC_SHIFT)
+
+/*
+ * For the per-PMC fields we have two bits. The low bit is added, so if two
+ * events ask for the same PMC the sum will overflow, setting the high bit,
+ * indicating an error. So our mask sets the high bit.
+ */
+#define CNST_PMC_SHIFT(pmc) ((pmc - 1) * 2)
+#define CNST_PMC_VAL(pmc) (1 << CNST_PMC_SHIFT(pmc))
+#define CNST_PMC_MASK(pmc) (2 << CNST_PMC_SHIFT(pmc))
+
+/* Our add_fields is defined as: */
+#define ISA207_ADD_FIELDS \
+ CNST_PMC_VAL(1) | CNST_PMC_VAL(2) | CNST_PMC_VAL(3) | \
+ CNST_PMC_VAL(4) | CNST_PMC_VAL(5) | CNST_PMC_VAL(6) | CNST_NC_VAL
+
+/* Bits in MMCR1 for PowerISA v2.07 */
+#define MMCR1_UNIT_SHIFT(pmc) (60 - (4 * ((pmc) - 1)))
+#define MMCR1_COMBINE_SHIFT(pmc) (35 - ((pmc) - 1))
+#define MMCR1_PMCSEL_SHIFT(pmc) (24 - (((pmc) - 1)) * 8)
+#define MMCR1_FAB_SHIFT 36
+#define MMCR1_DC_IC_QUAL_MASK 0x3
+#define MMCR1_DC_IC_QUAL_SHIFT 46
+
+/* MMCR1 Combine bits macro for power9 */
+#define p9_MMCR1_COMBINE_SHIFT(pmc) (38 - ((pmc - 1) * 2))
+
+/* Bits in MMCRA for PowerISA v2.07 */
+#define MMCRA_SAMP_MODE_SHIFT 1
+#define MMCRA_SAMP_ELIG_SHIFT 4
+#define MMCRA_SAMP_ELIG_MASK 7
+#define MMCRA_THR_CTL_SHIFT 8
+#define MMCRA_THR_SEL_SHIFT 16
+#define MMCRA_THR_CMP_SHIFT 32
+#define MMCRA_SDAR_MODE_SHIFT 42
+#define MMCRA_SDAR_MODE_TLB (1ull << MMCRA_SDAR_MODE_SHIFT)
+#define MMCRA_SDAR_MODE_NO_UPDATES ~(0x3ull << MMCRA_SDAR_MODE_SHIFT)
+#define MMCRA_SDAR_MODE_DCACHE (2ull << MMCRA_SDAR_MODE_SHIFT)
+#define MMCRA_IFM_SHIFT 30
+#define MMCRA_THR_CTR_MANT_SHIFT 19
+#define MMCRA_THR_CTR_MANT_MASK 0x7Ful
+#define MMCRA_THR_CTR_MANT(v) (((v) >> MMCRA_THR_CTR_MANT_SHIFT) &\
+ MMCRA_THR_CTR_MANT_MASK)
+
+#define MMCRA_THR_CTR_EXP_SHIFT 27
+#define MMCRA_THR_CTR_EXP_MASK 0x7ul
+#define MMCRA_THR_CTR_EXP(v) (((v) >> MMCRA_THR_CTR_EXP_SHIFT) &\
+ MMCRA_THR_CTR_EXP_MASK)
+
+#define P10_MMCRA_THR_CTR_MANT_MASK 0xFFul
+#define P10_MMCRA_THR_CTR_MANT(v) (((v) >> MMCRA_THR_CTR_MANT_SHIFT) &\
+ P10_MMCRA_THR_CTR_MANT_MASK)
+
+/* MMCRA Threshold Compare bit constant for power9 */
+#define p9_MMCRA_THR_CMP_SHIFT 45
+
+/* Bits in MMCR2 for PowerISA v2.07 */
+#define MMCR2_FCS(pmc) (1ull << (63 - (((pmc) - 1) * 9)))
+#define MMCR2_FCP(pmc) (1ull << (62 - (((pmc) - 1) * 9)))
+#define MMCR2_FCWAIT(pmc) (1ull << (58 - (((pmc) - 1) * 9)))
+#define MMCR2_FCH(pmc) (1ull << (57 - (((pmc) - 1) * 9)))
+
+#define MAX_ALT 2
+#define MAX_PMU_COUNTERS 6
+
+/* Bits in MMCR3 for PowerISA v3.10 */
+#define MMCR3_SHIFT(pmc) (49 - (15 * ((pmc) - 1)))
+
+#define ISA207_SIER_TYPE_SHIFT 15
+#define ISA207_SIER_TYPE_MASK (0x7ull << ISA207_SIER_TYPE_SHIFT)
+
+#define ISA207_SIER_LDST_SHIFT 1
+#define ISA207_SIER_LDST_MASK (0x7ull << ISA207_SIER_LDST_SHIFT)
+
+#define ISA207_SIER_DATA_SRC_SHIFT 53
+#define ISA207_SIER_DATA_SRC_MASK (0x7ull << ISA207_SIER_DATA_SRC_SHIFT)
+
+/* Bits in SIER2/SIER3 for Power10 */
+#define P10_SIER2_FINISH_CYC(sier2) (((sier2) >> (63 - 37)) & 0x7fful)
+#define P10_SIER2_DISPATCH_CYC(sier2) (((sier2) >> (63 - 13)) & 0x7fful)
+
+#define P(a, b) PERF_MEM_S(a, b)
+#define PH(a, b) (P(LVL, HIT) | P(a, b))
+#define PM(a, b) (P(LVL, MISS) | P(a, b))
+#define LEVEL(x) P(LVLNUM, x)
+#define REM P(REMOTE, REMOTE)
+
+int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp, u64 event_config1);
+int isa207_compute_mmcr(u64 event[], int n_ev,
+ unsigned int hwc[], struct mmcr_regs *mmcr,
+ struct perf_event *pevents[], u32 flags);
+void isa207_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr);
+int isa207_get_alternatives(u64 event, u64 alt[], int size, unsigned int flags,
+ const unsigned int ev_alt[][MAX_ALT]);
+void isa207_get_mem_data_src(union perf_mem_data_src *dsrc, u32 flags,
+ struct pt_regs *regs);
+void isa207_get_mem_weight(u64 *weight, u64 type);
+
+int isa3XX_check_attr_config(struct perf_event *ev);
+
+#endif
diff --git a/arch/powerpc/perf/kvm-hv-pmu.c b/arch/powerpc/perf/kvm-hv-pmu.c
new file mode 100644
index 000000000000..ae264c9080ef
--- /dev/null
+++ b/arch/powerpc/perf/kvm-hv-pmu.c
@@ -0,0 +1,435 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Description: PMUs specific to running nested KVM-HV guests
+ * on Book3S processors (specifically POWER9 and later).
+ */
+
+#define pr_fmt(fmt) "kvmppc-pmu: " fmt
+
+#include "asm-generic/local64.h"
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/ratelimit.h>
+#include <linux/kvm_host.h>
+#include <linux/gfp_types.h>
+#include <linux/pgtable.h>
+#include <linux/perf_event.h>
+#include <linux/spinlock_types.h>
+#include <linux/spinlock.h>
+
+#include <asm/types.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/mmu.h>
+#include <asm/pgalloc.h>
+#include <asm/pte-walk.h>
+#include <asm/reg.h>
+#include <asm/plpar_wrappers.h>
+#include <asm/firmware.h>
+
+#include "asm/guest-state-buffer.h"
+
+enum kvmppc_pmu_eventid {
+ KVMPPC_EVENT_HOST_HEAP,
+ KVMPPC_EVENT_HOST_HEAP_MAX,
+ KVMPPC_EVENT_HOST_PGTABLE,
+ KVMPPC_EVENT_HOST_PGTABLE_MAX,
+ KVMPPC_EVENT_HOST_PGTABLE_RECLAIM,
+ KVMPPC_EVENT_MAX,
+};
+
+#define KVMPPC_PMU_EVENT_ATTR(_name, _id) \
+ PMU_EVENT_ATTR_ID(_name, kvmppc_events_sysfs_show, _id)
+
+static ssize_t kvmppc_events_sysfs_show(struct device *dev,
+ struct device_attribute *attr,
+ char *page)
+{
+ struct perf_pmu_events_attr *pmu_attr;
+
+ pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+ return sprintf(page, "event=0x%02llx\n", pmu_attr->id);
+}
+
+/* Holds the hostwide stats */
+static struct kvmppc_hostwide_stats {
+ u64 guest_heap;
+ u64 guest_heap_max;
+ u64 guest_pgtable_size;
+ u64 guest_pgtable_size_max;
+ u64 guest_pgtable_reclaim;
+} l0_stats;
+
+/* Protect access to l0_stats */
+static DEFINE_SPINLOCK(lock_l0_stats);
+
+/* GSB related structs needed to talk to L0 */
+static struct kvmppc_gs_msg *gsm_l0_stats;
+static struct kvmppc_gs_buff *gsb_l0_stats;
+static struct kvmppc_gs_parser gsp_l0_stats;
+
+static struct attribute *kvmppc_pmu_events_attr[] = {
+ KVMPPC_PMU_EVENT_ATTR(host_heap, KVMPPC_EVENT_HOST_HEAP),
+ KVMPPC_PMU_EVENT_ATTR(host_heap_max, KVMPPC_EVENT_HOST_HEAP_MAX),
+ KVMPPC_PMU_EVENT_ATTR(host_pagetable, KVMPPC_EVENT_HOST_PGTABLE),
+ KVMPPC_PMU_EVENT_ATTR(host_pagetable_max, KVMPPC_EVENT_HOST_PGTABLE_MAX),
+ KVMPPC_PMU_EVENT_ATTR(host_pagetable_reclaim, KVMPPC_EVENT_HOST_PGTABLE_RECLAIM),
+ NULL,
+};
+
+static const struct attribute_group kvmppc_pmu_events_group = {
+ .name = "events",
+ .attrs = kvmppc_pmu_events_attr,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-5");
+static struct attribute *kvmppc_pmu_format_attr[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group kvmppc_pmu_format_group = {
+ .name = "format",
+ .attrs = kvmppc_pmu_format_attr,
+};
+
+static const struct attribute_group *kvmppc_pmu_attr_groups[] = {
+ &kvmppc_pmu_events_group,
+ &kvmppc_pmu_format_group,
+ NULL,
+};
+
+/*
+ * Issue the hcall to get the L0-host stats.
+ * Should be called with l0-stat lock held
+ */
+static int kvmppc_update_l0_stats(void)
+{
+ int rc;
+
+ /* With HOST_WIDE flags guestid and vcpuid will be ignored */
+ rc = kvmppc_gsb_recv(gsb_l0_stats, KVMPPC_GS_FLAGS_HOST_WIDE);
+ if (rc)
+ goto out;
+
+ /* Parse the guest state buffer is successful */
+ rc = kvmppc_gse_parse(&gsp_l0_stats, gsb_l0_stats);
+ if (rc)
+ goto out;
+
+ /* Update the l0 returned stats*/
+ memset(&l0_stats, 0, sizeof(l0_stats));
+ rc = kvmppc_gsm_refresh_info(gsm_l0_stats, gsb_l0_stats);
+
+out:
+ return rc;
+}
+
+/* Update the value of the given perf_event */
+static int kvmppc_pmu_event_update(struct perf_event *event)
+{
+ int rc;
+ u64 curr_val, prev_val;
+ unsigned long flags;
+ unsigned int config = event->attr.config;
+
+ /* Ensure no one else is modifying the l0_stats */
+ spin_lock_irqsave(&lock_l0_stats, flags);
+
+ rc = kvmppc_update_l0_stats();
+ if (!rc) {
+ switch (config) {
+ case KVMPPC_EVENT_HOST_HEAP:
+ curr_val = l0_stats.guest_heap;
+ break;
+ case KVMPPC_EVENT_HOST_HEAP_MAX:
+ curr_val = l0_stats.guest_heap_max;
+ break;
+ case KVMPPC_EVENT_HOST_PGTABLE:
+ curr_val = l0_stats.guest_pgtable_size;
+ break;
+ case KVMPPC_EVENT_HOST_PGTABLE_MAX:
+ curr_val = l0_stats.guest_pgtable_size_max;
+ break;
+ case KVMPPC_EVENT_HOST_PGTABLE_RECLAIM:
+ curr_val = l0_stats.guest_pgtable_reclaim;
+ break;
+ default:
+ rc = -ENOENT;
+ break;
+ }
+ }
+
+ spin_unlock_irqrestore(&lock_l0_stats, flags);
+
+ /* If no error than update the perf event */
+ if (!rc) {
+ prev_val = local64_xchg(&event->hw.prev_count, curr_val);
+ if (curr_val > prev_val)
+ local64_add(curr_val - prev_val, &event->count);
+ }
+
+ return rc;
+}
+
+static int kvmppc_pmu_event_init(struct perf_event *event)
+{
+ unsigned int config = event->attr.config;
+
+ pr_debug("%s: Event(%p) id=%llu cpu=%x on_cpu=%x config=%u",
+ __func__, event, event->id, event->cpu,
+ event->oncpu, config);
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ if (config >= KVMPPC_EVENT_MAX)
+ return -EINVAL;
+
+ local64_set(&event->hw.prev_count, 0);
+ local64_set(&event->count, 0);
+
+ return 0;
+}
+
+static void kvmppc_pmu_del(struct perf_event *event, int flags)
+{
+ kvmppc_pmu_event_update(event);
+}
+
+static int kvmppc_pmu_add(struct perf_event *event, int flags)
+{
+ if (flags & PERF_EF_START)
+ return kvmppc_pmu_event_update(event);
+ return 0;
+}
+
+static void kvmppc_pmu_read(struct perf_event *event)
+{
+ kvmppc_pmu_event_update(event);
+}
+
+/* Return the size of the needed guest state buffer */
+static size_t hostwide_get_size(struct kvmppc_gs_msg *gsm)
+
+{
+ size_t size = 0;
+ const u16 ids[] = {
+ KVMPPC_GSID_L0_GUEST_HEAP,
+ KVMPPC_GSID_L0_GUEST_HEAP_MAX,
+ KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE,
+ KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX,
+ KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM
+ };
+
+ for (int i = 0; i < ARRAY_SIZE(ids); i++)
+ size += kvmppc_gse_total_size(kvmppc_gsid_size(ids[i]));
+ return size;
+}
+
+/* Populate the request guest state buffer */
+static int hostwide_fill_info(struct kvmppc_gs_buff *gsb,
+ struct kvmppc_gs_msg *gsm)
+{
+ int rc = 0;
+ struct kvmppc_hostwide_stats *stats = gsm->data;
+
+ /*
+ * It doesn't matter what values are put into request buffer as
+ * they are going to be overwritten anyways. But for the sake of
+ * testcode and symmetry contents of existing stats are put
+ * populated into the request guest state buffer.
+ */
+ if (kvmppc_gsm_includes(gsm, KVMPPC_GSID_L0_GUEST_HEAP))
+ rc = kvmppc_gse_put_u64(gsb,
+ KVMPPC_GSID_L0_GUEST_HEAP,
+ stats->guest_heap);
+
+ if (!rc && kvmppc_gsm_includes(gsm, KVMPPC_GSID_L0_GUEST_HEAP_MAX))
+ rc = kvmppc_gse_put_u64(gsb,
+ KVMPPC_GSID_L0_GUEST_HEAP_MAX,
+ stats->guest_heap_max);
+
+ if (!rc && kvmppc_gsm_includes(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE))
+ rc = kvmppc_gse_put_u64(gsb,
+ KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE,
+ stats->guest_pgtable_size);
+ if (!rc &&
+ kvmppc_gsm_includes(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX))
+ rc = kvmppc_gse_put_u64(gsb,
+ KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX,
+ stats->guest_pgtable_size_max);
+ if (!rc &&
+ kvmppc_gsm_includes(gsm, KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM))
+ rc = kvmppc_gse_put_u64(gsb,
+ KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM,
+ stats->guest_pgtable_reclaim);
+
+ return rc;
+}
+
+/* Parse and update the host wide stats from returned gsb */
+static int hostwide_refresh_info(struct kvmppc_gs_msg *gsm,
+ struct kvmppc_gs_buff *gsb)
+{
+ struct kvmppc_gs_parser gsp = { 0 };
+ struct kvmppc_hostwide_stats *stats = gsm->data;
+ struct kvmppc_gs_elem *gse;
+ int rc;
+
+ rc = kvmppc_gse_parse(&gsp, gsb);
+ if (rc < 0)
+ return rc;
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_HEAP);
+ if (gse)
+ stats->guest_heap = kvmppc_gse_get_u64(gse);
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_HEAP_MAX);
+ if (gse)
+ stats->guest_heap_max = kvmppc_gse_get_u64(gse);
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE);
+ if (gse)
+ stats->guest_pgtable_size = kvmppc_gse_get_u64(gse);
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX);
+ if (gse)
+ stats->guest_pgtable_size_max = kvmppc_gse_get_u64(gse);
+
+ gse = kvmppc_gsp_lookup(&gsp, KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM);
+ if (gse)
+ stats->guest_pgtable_reclaim = kvmppc_gse_get_u64(gse);
+
+ return 0;
+}
+
+/* gsb-message ops for setting up/parsing */
+static struct kvmppc_gs_msg_ops gsb_ops_l0_stats = {
+ .get_size = hostwide_get_size,
+ .fill_info = hostwide_fill_info,
+ .refresh_info = hostwide_refresh_info,
+};
+
+static int kvmppc_init_hostwide(void)
+{
+ int rc = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&lock_l0_stats, flags);
+
+ /* already registered ? */
+ if (gsm_l0_stats) {
+ rc = 0;
+ goto out;
+ }
+
+ /* setup the Guest state message/buffer to talk to L0 */
+ gsm_l0_stats = kvmppc_gsm_new(&gsb_ops_l0_stats, &l0_stats,
+ GSM_SEND, GFP_KERNEL);
+ if (!gsm_l0_stats) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ /* Populate the Idents */
+ kvmppc_gsm_include(gsm_l0_stats, KVMPPC_GSID_L0_GUEST_HEAP);
+ kvmppc_gsm_include(gsm_l0_stats, KVMPPC_GSID_L0_GUEST_HEAP_MAX);
+ kvmppc_gsm_include(gsm_l0_stats, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE);
+ kvmppc_gsm_include(gsm_l0_stats, KVMPPC_GSID_L0_GUEST_PGTABLE_SIZE_MAX);
+ kvmppc_gsm_include(gsm_l0_stats, KVMPPC_GSID_L0_GUEST_PGTABLE_RECLAIM);
+
+ /* allocate GSB. Guest/Vcpu Id is ignored */
+ gsb_l0_stats = kvmppc_gsb_new(kvmppc_gsm_size(gsm_l0_stats), 0, 0,
+ GFP_KERNEL);
+ if (!gsb_l0_stats) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ /* ask the ops to fill in the info */
+ rc = kvmppc_gsm_fill_info(gsm_l0_stats, gsb_l0_stats);
+
+out:
+ if (rc) {
+ if (gsm_l0_stats)
+ kvmppc_gsm_free(gsm_l0_stats);
+ if (gsb_l0_stats)
+ kvmppc_gsb_free(gsb_l0_stats);
+ gsm_l0_stats = NULL;
+ gsb_l0_stats = NULL;
+ }
+ spin_unlock_irqrestore(&lock_l0_stats, flags);
+ return rc;
+}
+
+static void kvmppc_cleanup_hostwide(void)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&lock_l0_stats, flags);
+
+ if (gsm_l0_stats)
+ kvmppc_gsm_free(gsm_l0_stats);
+ if (gsb_l0_stats)
+ kvmppc_gsb_free(gsb_l0_stats);
+ gsm_l0_stats = NULL;
+ gsb_l0_stats = NULL;
+
+ spin_unlock_irqrestore(&lock_l0_stats, flags);
+}
+
+/* L1 wide counters PMU */
+static struct pmu kvmppc_pmu = {
+ .module = THIS_MODULE,
+ .task_ctx_nr = perf_sw_context,
+ .name = "kvm-hv",
+ .event_init = kvmppc_pmu_event_init,
+ .add = kvmppc_pmu_add,
+ .del = kvmppc_pmu_del,
+ .read = kvmppc_pmu_read,
+ .attr_groups = kvmppc_pmu_attr_groups,
+ .type = -1,
+ .scope = PERF_PMU_SCOPE_SYS_WIDE,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
+};
+
+static int __init kvmppc_register_pmu(void)
+{
+ int rc = -EOPNOTSUPP;
+
+ /* only support events for nestedv2 right now */
+ if (kvmhv_is_nestedv2()) {
+ rc = kvmppc_init_hostwide();
+ if (rc)
+ goto out;
+
+ /* Register the pmu */
+ rc = perf_pmu_register(&kvmppc_pmu, kvmppc_pmu.name, -1);
+ if (rc)
+ goto out;
+
+ pr_info("Registered kvm-hv pmu");
+ }
+
+out:
+ return rc;
+}
+
+static void __exit kvmppc_unregister_pmu(void)
+{
+ if (kvmhv_is_nestedv2()) {
+ kvmppc_cleanup_hostwide();
+
+ if (kvmppc_pmu.type != -1)
+ perf_pmu_unregister(&kvmppc_pmu);
+
+ pr_info("kvmhv_pmu unregistered.\n");
+ }
+}
+
+module_init(kvmppc_register_pmu);
+module_exit(kvmppc_unregister_pmu);
+MODULE_DESCRIPTION("KVM PPC Book3s-hv PMU");
+MODULE_AUTHOR("Vaibhav Jain <vaibhav@linux.ibm.com>");
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/perf/mpc7450-pmu.c b/arch/powerpc/perf/mpc7450-pmu.c
index d115c5635bf3..db451b9aac35 100644
--- a/arch/powerpc/perf/mpc7450-pmu.c
+++ b/arch/powerpc/perf/mpc7450-pmu.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Performance counter support for MPC7450-family processors.
*
* Copyright 2008-2009 Paul Mackerras, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/string.h>
#include <linux/perf_event.h>
@@ -152,7 +148,7 @@ static u32 classbits[N_CLASSES - 1][2] = {
};
static int mpc7450_get_constraint(u64 event, unsigned long *maskp,
- unsigned long *valp)
+ unsigned long *valp, u64 event_config1 __maybe_unused)
{
int pmc, class;
u32 mask, value;
@@ -261,8 +257,9 @@ static const u32 pmcsel_mask[N_COUNTER] = {
* Compute MMCR0/1/2 values for a set of events.
*/
static int mpc7450_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[],
- unsigned long mmcr[],
- struct perf_event *pevents[])
+ struct mmcr_regs *mmcr,
+ struct perf_event *pevents[],
+ u32 flags __maybe_unused)
{
u8 event_index[N_CLASSES][N_COUNTER];
int n_classevent[N_CLASSES];
@@ -325,9 +322,16 @@ static int mpc7450_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[],
mmcr0 |= MMCR0_PMCnCE;
/* Return MMCRx values */
- mmcr[0] = mmcr0;
- mmcr[1] = mmcr1;
- mmcr[2] = mmcr2;
+ mmcr->mmcr0 = mmcr0;
+ mmcr->mmcr1 = mmcr1;
+ mmcr->mmcr2 = mmcr2;
+ /*
+ * 32-bit doesn't have an MMCRA and uses SPRN_MMCR2 to define
+ * SPRN_MMCRA. So assign mmcra of cpu_hw_events with `mmcr2`
+ * value to ensure that any write to this SPRN_MMCRA will
+ * use mmcr2 value.
+ */
+ mmcr->mmcra = mmcr2;
return 0;
}
@@ -335,12 +339,12 @@ static int mpc7450_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[],
* Disable counting by a PMC.
* Note that the pmc argument is 0-based here, not 1-based.
*/
-static void mpc7450_disable_pmc(unsigned int pmc, unsigned long mmcr[])
+static void mpc7450_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr)
{
if (pmc <= 1)
- mmcr[0] &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]);
+ mmcr->mmcr0 &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]);
else
- mmcr[1] &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]);
+ mmcr->mmcr1 &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]);
}
static int mpc7450_generic_events[] = {
@@ -358,7 +362,7 @@ static int mpc7450_generic_events[] = {
* 0 means not supported, -1 means nonsensical, other values
* are event codes.
*/
-static int mpc7450_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+static u64 mpc7450_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0, 0x225 },
[C(OP_WRITE)] = { 0, 0x227 },
@@ -413,8 +417,9 @@ struct power_pmu mpc7450_pmu = {
static int __init init_mpc7450_pmu(void)
{
- if (!cur_cpu_spec->oprofile_cpu_type ||
- strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/7450"))
+ if (!pvr_version_is(PVR_VER_7450) && !pvr_version_is(PVR_VER_7455) &&
+ !pvr_version_is(PVR_VER_7447) && !pvr_version_is(PVR_VER_7447A) &&
+ !pvr_version_is(PVR_VER_7448))
return -ENODEV;
return register_power_pmu(&mpc7450_pmu);
diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c
new file mode 100644
index 000000000000..350dccb0143c
--- /dev/null
+++ b/arch/powerpc/perf/perf_regs.c
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2016 Anju T, IBM Corporation.
+ */
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+#include <linux/perf_event.h>
+#include <linux/bug.h>
+#include <linux/stddef.h>
+#include <asm/ptrace.h>
+#include <asm/perf_regs.h>
+
+u64 PERF_REG_EXTENDED_MASK;
+
+#define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r)
+
+#define REG_RESERVED (~(PERF_REG_EXTENDED_MASK | PERF_REG_PMU_MASK))
+
+static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = {
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R0, gpr[0]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R1, gpr[1]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R2, gpr[2]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R3, gpr[3]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R4, gpr[4]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R5, gpr[5]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R6, gpr[6]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R7, gpr[7]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R8, gpr[8]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R9, gpr[9]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R10, gpr[10]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R11, gpr[11]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R12, gpr[12]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R13, gpr[13]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R14, gpr[14]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R15, gpr[15]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R16, gpr[16]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R17, gpr[17]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R18, gpr[18]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R19, gpr[19]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R20, gpr[20]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R21, gpr[21]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R22, gpr[22]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R23, gpr[23]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R24, gpr[24]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R25, gpr[25]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R26, gpr[26]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R27, gpr[27]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R28, gpr[28]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R29, gpr[29]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R30, gpr[30]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_R31, gpr[31]),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_NIP, nip),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_MSR, msr),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_ORIG_R3, orig_gpr3),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_CTR, ctr),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_LINK, link),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_XER, xer),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_CCR, ccr),
+#ifdef CONFIG_PPC64
+ PT_REGS_OFFSET(PERF_REG_POWERPC_SOFTE, softe),
+#else
+ PT_REGS_OFFSET(PERF_REG_POWERPC_SOFTE, mq),
+#endif
+ PT_REGS_OFFSET(PERF_REG_POWERPC_TRAP, trap),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_DAR, dar),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_DSISR, dsisr),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_SIER, dar),
+ PT_REGS_OFFSET(PERF_REG_POWERPC_MMCRA, dsisr),
+};
+
+/* Function to return the extended register values */
+static u64 get_ext_regs_value(int idx)
+{
+ switch (idx) {
+ case PERF_REG_POWERPC_PMC1 ... PERF_REG_POWERPC_PMC6:
+ return get_pmcs_ext_regs(idx - PERF_REG_POWERPC_PMC1);
+ case PERF_REG_POWERPC_MMCR0:
+ return mfspr(SPRN_MMCR0);
+ case PERF_REG_POWERPC_MMCR1:
+ return mfspr(SPRN_MMCR1);
+ case PERF_REG_POWERPC_MMCR2:
+ return mfspr(SPRN_MMCR2);
+#ifdef CONFIG_PPC64
+ case PERF_REG_POWERPC_MMCR3:
+ return mfspr(SPRN_MMCR3);
+ case PERF_REG_POWERPC_SIER2:
+ return mfspr(SPRN_SIER2);
+ case PERF_REG_POWERPC_SIER3:
+ return mfspr(SPRN_SIER3);
+ case PERF_REG_POWERPC_SDAR:
+ return mfspr(SPRN_SDAR);
+#endif
+ case PERF_REG_POWERPC_SIAR:
+ return mfspr(SPRN_SIAR);
+ default: return 0;
+ }
+}
+
+u64 perf_reg_value(struct pt_regs *regs, int idx)
+{
+ if (idx == PERF_REG_POWERPC_SIER &&
+ (IS_ENABLED(CONFIG_FSL_EMB_PERF_EVENT) ||
+ IS_ENABLED(CONFIG_PPC32) ||
+ !is_sier_available()))
+ return 0;
+
+ if (idx == PERF_REG_POWERPC_MMCRA &&
+ (IS_ENABLED(CONFIG_FSL_EMB_PERF_EVENT) ||
+ IS_ENABLED(CONFIG_PPC32)))
+ return 0;
+
+ if (idx >= PERF_REG_POWERPC_MAX && idx < PERF_REG_EXTENDED_MAX)
+ return get_ext_regs_value(idx);
+
+ /*
+ * If the idx is referring to value beyond the
+ * supported registers, return 0 with a warning
+ */
+ if (WARN_ON_ONCE(idx >= PERF_REG_EXTENDED_MAX))
+ return 0;
+
+ return regs_get_register(regs, pt_regs_offset[idx]);
+}
+
+int perf_reg_validate(u64 mask)
+{
+ if (!mask || mask & REG_RESERVED)
+ return -EINVAL;
+ return 0;
+}
+
+u64 perf_reg_abi(struct task_struct *task)
+{
+ if (is_tsk_32bit_task(task))
+ return PERF_SAMPLE_REGS_ABI_32;
+ else
+ return PERF_SAMPLE_REGS_ABI_64;
+}
+
+void perf_get_regs_user(struct perf_regs *regs_user,
+ struct pt_regs *regs)
+{
+ regs_user->regs = task_pt_regs(current);
+ regs_user->abi = (regs_user->regs) ? perf_reg_abi(current) :
+ PERF_SAMPLE_REGS_ABI_NONE;
+}
diff --git a/arch/powerpc/perf/power10-events-list.h b/arch/powerpc/perf/power10-events-list.h
new file mode 100644
index 000000000000..564f14097f07
--- /dev/null
+++ b/arch/powerpc/perf/power10-events-list.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Performance counter support for POWER10 processors.
+ *
+ * Copyright 2020 Madhavan Srinivasan, IBM Corporation.
+ * Copyright 2020 Athira Rajeev, IBM Corporation.
+ */
+
+/*
+ * Power10 event codes.
+ */
+EVENT(PM_CYC, 0x600f4);
+EVENT(PM_DISP_STALL_CYC, 0x100f8);
+EVENT(PM_EXEC_STALL, 0x30008);
+EVENT(PM_INST_CMPL, 0x500fa);
+EVENT(PM_BR_CMPL, 0x4d05e);
+EVENT(PM_BR_MPRED_CMPL, 0x400f6);
+EVENT(PM_BR_FIN, 0x2f04a);
+EVENT(PM_MPRED_BR_FIN, 0x3e098);
+EVENT(PM_LD_DEMAND_MISS_L1_FIN, 0x400f0);
+
+/* All L1 D cache load references counted at finish, gated by reject */
+EVENT(PM_LD_REF_L1, 0x100fc);
+/* Load Missed L1 */
+EVENT(PM_LD_MISS_L1, 0x3e054);
+/* Store Missed L1 */
+EVENT(PM_ST_MISS_L1, 0x300f0);
+/* L1 cache data prefetches */
+EVENT(PM_LD_PREFETCH_CACHE_LINE_MISS, 0x1002c);
+/* Demand iCache Miss */
+EVENT(PM_L1_ICACHE_MISS, 0x200fc);
+/* Instruction fetches from L1 */
+EVENT(PM_INST_FROM_L1, 0x04080);
+/* Instruction Demand sectors wriittent into IL1 */
+EVENT(PM_INST_FROM_L1MISS, 0x03f00000001c040);
+/* Instruction prefetch written into IL1 */
+EVENT(PM_IC_PREF_REQ, 0x040a0);
+/* The data cache was reloaded from local core's L3 due to a demand load */
+EVENT(PM_DATA_FROM_L3, 0x01340000001c040);
+/* Demand LD - L3 Miss (not L2 hit and not L3 hit) */
+EVENT(PM_DATA_FROM_L3MISS, 0x300fe);
+/* All successful D-side store dispatches for this thread */
+EVENT(PM_L2_ST, 0x010000046080);
+/* All successful D-side store dispatches for this thread that were L2 Miss */
+EVENT(PM_L2_ST_MISS, 0x26880);
+/* Total HW L3 prefetches(Load+store) */
+EVENT(PM_L3_PF_MISS_L3, 0x100000016080);
+/* Data PTEG reload */
+EVENT(PM_DTLB_MISS, 0x300fc);
+/* ITLB Reloaded */
+EVENT(PM_ITLB_MISS, 0x400fc);
+
+EVENT(PM_CYC_ALT, 0x0001e);
+EVENT(PM_INST_CMPL_ALT, 0x00002);
+
+/*
+ * Memory Access Events
+ *
+ * Primary PMU event used here is PM_MRK_INST_CMPL (0x401e0)
+ * To enable capturing of memory profiling, these MMCRA bits
+ * needs to be programmed and corresponding raw event format
+ * encoding.
+ *
+ * MMCRA bits encoding needed are
+ * SM (Sampling Mode)
+ * EM (Eligibility for Random Sampling)
+ * TECE (Threshold Event Counter Event)
+ * TS (Threshold Start Event)
+ * TE (Threshold End Event)
+ *
+ * Corresponding Raw Encoding bits:
+ * sample [EM,SM]
+ * thresh_sel (TECE)
+ * thresh start (TS)
+ * thresh end (TE)
+ */
+
+EVENT(MEM_LOADS, 0x35340401e0);
+EVENT(MEM_STORES, 0x353c0401e0);
diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c
new file mode 100644
index 000000000000..bb57b7cfe640
--- /dev/null
+++ b/arch/powerpc/perf/power10-pmu.c
@@ -0,0 +1,664 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Performance counter support for POWER10 processors.
+ *
+ * Copyright 2020 Madhavan Srinivasan, IBM Corporation.
+ * Copyright 2020 Athira Rajeev, IBM Corporation.
+ */
+
+#define pr_fmt(fmt) "power10-pmu: " fmt
+
+#include "isa207-common.h"
+
+/*
+ * Raw event encoding for Power10:
+ *
+ * 60 56 52 48 44 40 36 32
+ * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
+ * | | [ ] [ src_match ] [ src_mask ] | [ ] [ l2l3_sel ] [ thresh_ctl ]
+ * | | | | | |
+ * | | *- IFM (Linux) | | thresh start/stop -*
+ * | *- BHRB (Linux) | src_sel
+ * *- EBB (Linux) *invert_bit
+ *
+ * 28 24 20 16 12 8 4 0
+ * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
+ * [ ] [ sample ] [ ] [ ] [ pmc ] [unit ] [ ] | m [ pmcxsel ]
+ * | | | | | | |
+ * | | | | | | *- mark
+ * | | | *- L1/L2/L3 cache_sel | |*-radix_scope_qual
+ * | | sdar_mode |
+ * | *- sampling mode for marked events *- combine
+ * |
+ * *- thresh_sel
+ *
+ * Below uses IBM bit numbering.
+ *
+ * MMCR1[x:y] = unit (PMCxUNIT)
+ * MMCR1[24] = pmc1combine[0]
+ * MMCR1[25] = pmc1combine[1]
+ * MMCR1[26] = pmc2combine[0]
+ * MMCR1[27] = pmc2combine[1]
+ * MMCR1[28] = pmc3combine[0]
+ * MMCR1[29] = pmc3combine[1]
+ * MMCR1[30] = pmc4combine[0]
+ * MMCR1[31] = pmc4combine[1]
+ *
+ * if pmc == 3 and unit == 0 and pmcxsel[0:6] == 0b0101011
+ * MMCR1[20:27] = thresh_ctl
+ * else if pmc == 4 and unit == 0xf and pmcxsel[0:6] == 0b0101001
+ * MMCR1[20:27] = thresh_ctl
+ * else
+ * MMCRA[48:55] = thresh_ctl (THRESH START/END)
+ *
+ * if thresh_sel:
+ * MMCRA[45:47] = thresh_sel
+ *
+ * if l2l3_sel:
+ * MMCR2[56:60] = l2l3_sel[0:4]
+ *
+ * MMCR1[16] = cache_sel[0]
+ * MMCR1[17] = cache_sel[1]
+ * MMCR1[18] = radix_scope_qual
+ *
+ * if mark:
+ * MMCRA[63] = 1 (SAMPLE_ENABLE)
+ * MMCRA[57:59] = sample[0:2] (RAND_SAMP_ELIG)
+ * MMCRA[61:62] = sample[3:4] (RAND_SAMP_MODE)
+ *
+ * if EBB and BHRB:
+ * MMCRA[32:33] = IFM
+ *
+ * MMCRA[SDAR_MODE] = sdar_mode[0:1]
+ */
+
+/*
+ * Some power10 event codes.
+ */
+#define EVENT(_name, _code) enum{_name = _code}
+
+#include "power10-events-list.h"
+
+#undef EVENT
+
+/* MMCRA IFM bits - POWER10 */
+#define POWER10_MMCRA_IFM1 0x0000000040000000UL
+#define POWER10_MMCRA_IFM2 0x0000000080000000UL
+#define POWER10_MMCRA_IFM3 0x00000000C0000000UL
+#define POWER10_MMCRA_BHRB_MASK 0x00000000C0000000UL
+
+extern u64 PERF_REG_EXTENDED_MASK;
+
+/* Table of alternatives, sorted by column 0 */
+static const unsigned int power10_event_alternatives[][MAX_ALT] = {
+ { PM_INST_CMPL_ALT, PM_INST_CMPL },
+ { PM_CYC_ALT, PM_CYC },
+};
+
+static int power10_get_alternatives(u64 event, unsigned int flags, u64 alt[])
+{
+ int num_alt = 0;
+
+ num_alt = isa207_get_alternatives(event, alt,
+ ARRAY_SIZE(power10_event_alternatives), flags,
+ power10_event_alternatives);
+
+ return num_alt;
+}
+
+static int power10_check_attr_config(struct perf_event *ev)
+{
+ u64 val;
+ u64 event = ev->attr.config;
+
+ val = (event >> EVENT_SAMPLE_SHIFT) & EVENT_SAMPLE_MASK;
+ if (val == 0x10 || isa3XX_check_attr_config(ev))
+ return -EINVAL;
+
+ return 0;
+}
+
+GENERIC_EVENT_ATTR(cpu-cycles, PM_CYC);
+GENERIC_EVENT_ATTR(instructions, PM_INST_CMPL);
+GENERIC_EVENT_ATTR(branch-instructions, PM_BR_CMPL);
+GENERIC_EVENT_ATTR(branch-misses, PM_BR_MPRED_CMPL);
+GENERIC_EVENT_ATTR(cache-references, PM_LD_REF_L1);
+GENERIC_EVENT_ATTR(cache-misses, PM_LD_MISS_L1);
+GENERIC_EVENT_ATTR(mem-loads, MEM_LOADS);
+GENERIC_EVENT_ATTR(mem-stores, MEM_STORES);
+GENERIC_EVENT_ATTR(branch-instructions, PM_BR_FIN);
+GENERIC_EVENT_ATTR(branch-misses, PM_MPRED_BR_FIN);
+GENERIC_EVENT_ATTR(cache-misses, PM_LD_DEMAND_MISS_L1_FIN);
+
+CACHE_EVENT_ATTR(L1-dcache-load-misses, PM_LD_MISS_L1);
+CACHE_EVENT_ATTR(L1-dcache-loads, PM_LD_REF_L1);
+CACHE_EVENT_ATTR(L1-dcache-prefetches, PM_LD_PREFETCH_CACHE_LINE_MISS);
+CACHE_EVENT_ATTR(L1-dcache-store-misses, PM_ST_MISS_L1);
+CACHE_EVENT_ATTR(L1-icache-load-misses, PM_L1_ICACHE_MISS);
+CACHE_EVENT_ATTR(L1-icache-loads, PM_INST_FROM_L1);
+CACHE_EVENT_ATTR(L1-icache-prefetches, PM_IC_PREF_REQ);
+CACHE_EVENT_ATTR(LLC-load-misses, PM_DATA_FROM_L3MISS);
+CACHE_EVENT_ATTR(LLC-loads, PM_DATA_FROM_L3);
+CACHE_EVENT_ATTR(LLC-prefetches, PM_L3_PF_MISS_L3);
+CACHE_EVENT_ATTR(LLC-store-misses, PM_L2_ST_MISS);
+CACHE_EVENT_ATTR(LLC-stores, PM_L2_ST);
+CACHE_EVENT_ATTR(branch-load-misses, PM_BR_MPRED_CMPL);
+CACHE_EVENT_ATTR(branch-loads, PM_BR_CMPL);
+CACHE_EVENT_ATTR(dTLB-load-misses, PM_DTLB_MISS);
+CACHE_EVENT_ATTR(iTLB-load-misses, PM_ITLB_MISS);
+
+static struct attribute *power10_events_attr_dd1[] = {
+ GENERIC_EVENT_PTR(PM_CYC),
+ GENERIC_EVENT_PTR(PM_INST_CMPL),
+ GENERIC_EVENT_PTR(PM_BR_CMPL),
+ GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL),
+ GENERIC_EVENT_PTR(PM_LD_REF_L1),
+ GENERIC_EVENT_PTR(PM_LD_MISS_L1),
+ GENERIC_EVENT_PTR(MEM_LOADS),
+ GENERIC_EVENT_PTR(MEM_STORES),
+ CACHE_EVENT_PTR(PM_LD_MISS_L1),
+ CACHE_EVENT_PTR(PM_LD_REF_L1),
+ CACHE_EVENT_PTR(PM_LD_PREFETCH_CACHE_LINE_MISS),
+ CACHE_EVENT_PTR(PM_ST_MISS_L1),
+ CACHE_EVENT_PTR(PM_L1_ICACHE_MISS),
+ CACHE_EVENT_PTR(PM_INST_FROM_L1),
+ CACHE_EVENT_PTR(PM_IC_PREF_REQ),
+ CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS),
+ CACHE_EVENT_PTR(PM_DATA_FROM_L3),
+ CACHE_EVENT_PTR(PM_BR_MPRED_CMPL),
+ CACHE_EVENT_PTR(PM_BR_CMPL),
+ CACHE_EVENT_PTR(PM_DTLB_MISS),
+ CACHE_EVENT_PTR(PM_ITLB_MISS),
+ NULL
+};
+
+static struct attribute *power10_events_attr[] = {
+ GENERIC_EVENT_PTR(PM_CYC),
+ GENERIC_EVENT_PTR(PM_INST_CMPL),
+ GENERIC_EVENT_PTR(PM_BR_FIN),
+ GENERIC_EVENT_PTR(PM_MPRED_BR_FIN),
+ GENERIC_EVENT_PTR(PM_LD_REF_L1),
+ GENERIC_EVENT_PTR(PM_LD_DEMAND_MISS_L1_FIN),
+ GENERIC_EVENT_PTR(MEM_LOADS),
+ GENERIC_EVENT_PTR(MEM_STORES),
+ CACHE_EVENT_PTR(PM_LD_MISS_L1),
+ CACHE_EVENT_PTR(PM_LD_REF_L1),
+ CACHE_EVENT_PTR(PM_LD_PREFETCH_CACHE_LINE_MISS),
+ CACHE_EVENT_PTR(PM_ST_MISS_L1),
+ CACHE_EVENT_PTR(PM_L1_ICACHE_MISS),
+ CACHE_EVENT_PTR(PM_INST_FROM_L1),
+ CACHE_EVENT_PTR(PM_IC_PREF_REQ),
+ CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS),
+ CACHE_EVENT_PTR(PM_DATA_FROM_L3),
+ CACHE_EVENT_PTR(PM_L3_PF_MISS_L3),
+ CACHE_EVENT_PTR(PM_L2_ST_MISS),
+ CACHE_EVENT_PTR(PM_L2_ST),
+ CACHE_EVENT_PTR(PM_BR_MPRED_CMPL),
+ CACHE_EVENT_PTR(PM_BR_CMPL),
+ CACHE_EVENT_PTR(PM_DTLB_MISS),
+ CACHE_EVENT_PTR(PM_ITLB_MISS),
+ NULL
+};
+
+static const struct attribute_group power10_pmu_events_group_dd1 = {
+ .name = "events",
+ .attrs = power10_events_attr_dd1,
+};
+
+static const struct attribute_group power10_pmu_events_group = {
+ .name = "events",
+ .attrs = power10_events_attr,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-59");
+PMU_FORMAT_ATTR(pmcxsel, "config:0-7");
+PMU_FORMAT_ATTR(mark, "config:8");
+PMU_FORMAT_ATTR(combine, "config:10-11");
+PMU_FORMAT_ATTR(unit, "config:12-15");
+PMU_FORMAT_ATTR(pmc, "config:16-19");
+PMU_FORMAT_ATTR(cache_sel, "config:20-21");
+PMU_FORMAT_ATTR(sdar_mode, "config:22-23");
+PMU_FORMAT_ATTR(sample_mode, "config:24-28");
+PMU_FORMAT_ATTR(thresh_sel, "config:29-31");
+PMU_FORMAT_ATTR(thresh_stop, "config:32-35");
+PMU_FORMAT_ATTR(thresh_start, "config:36-39");
+PMU_FORMAT_ATTR(l2l3_sel, "config:40-44");
+PMU_FORMAT_ATTR(src_sel, "config:45-46");
+PMU_FORMAT_ATTR(invert_bit, "config:47");
+PMU_FORMAT_ATTR(src_mask, "config:48-53");
+PMU_FORMAT_ATTR(src_match, "config:54-59");
+PMU_FORMAT_ATTR(radix_scope, "config:9");
+PMU_FORMAT_ATTR(thresh_cmp, "config1:0-17");
+
+static struct attribute *power10_pmu_format_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_pmcxsel.attr,
+ &format_attr_mark.attr,
+ &format_attr_combine.attr,
+ &format_attr_unit.attr,
+ &format_attr_pmc.attr,
+ &format_attr_cache_sel.attr,
+ &format_attr_sdar_mode.attr,
+ &format_attr_sample_mode.attr,
+ &format_attr_thresh_sel.attr,
+ &format_attr_thresh_stop.attr,
+ &format_attr_thresh_start.attr,
+ &format_attr_l2l3_sel.attr,
+ &format_attr_src_sel.attr,
+ &format_attr_invert_bit.attr,
+ &format_attr_src_mask.attr,
+ &format_attr_src_match.attr,
+ &format_attr_radix_scope.attr,
+ &format_attr_thresh_cmp.attr,
+ NULL,
+};
+
+static const struct attribute_group power10_pmu_format_group = {
+ .name = "format",
+ .attrs = power10_pmu_format_attr,
+};
+
+static struct attribute *power10_pmu_caps_attrs[] = {
+ NULL
+};
+
+static struct attribute_group power10_pmu_caps_group = {
+ .name = "caps",
+ .attrs = power10_pmu_caps_attrs,
+};
+
+static const struct attribute_group *power10_pmu_attr_groups_dd1[] = {
+ &power10_pmu_format_group,
+ &power10_pmu_events_group_dd1,
+ &power10_pmu_caps_group,
+ NULL,
+};
+
+static const struct attribute_group *power10_pmu_attr_groups[] = {
+ &power10_pmu_format_group,
+ &power10_pmu_events_group,
+ &power10_pmu_caps_group,
+ NULL,
+};
+
+static int power10_generic_events_dd1[] = {
+ [PERF_COUNT_HW_CPU_CYCLES] = PM_CYC,
+ [PERF_COUNT_HW_INSTRUCTIONS] = PM_INST_CMPL,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BR_CMPL,
+ [PERF_COUNT_HW_BRANCH_MISSES] = PM_BR_MPRED_CMPL,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = PM_LD_REF_L1,
+ [PERF_COUNT_HW_CACHE_MISSES] = PM_LD_MISS_L1,
+};
+
+static int power10_generic_events[] = {
+ [PERF_COUNT_HW_CPU_CYCLES] = PM_CYC,
+ [PERF_COUNT_HW_INSTRUCTIONS] = PM_INST_CMPL,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BR_FIN,
+ [PERF_COUNT_HW_BRANCH_MISSES] = PM_MPRED_BR_FIN,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = PM_LD_REF_L1,
+ [PERF_COUNT_HW_CACHE_MISSES] = PM_LD_DEMAND_MISS_L1_FIN,
+};
+
+static u64 power10_bhrb_filter_map(u64 branch_sample_type)
+{
+ u64 pmu_bhrb_filter = 0;
+
+ /* BHRB and regular PMU events share the same privilege state
+ * filter configuration. BHRB is always recorded along with a
+ * regular PMU event. As the privilege state filter is handled
+ * in the basic PMC configuration of the accompanying regular
+ * PMU event, we ignore any separate BHRB specific request.
+ */
+
+ /* No branch filter requested */
+ if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY)
+ return pmu_bhrb_filter;
+
+ /* Invalid branch filter options - HW does not support */
+ if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
+ return -1;
+
+ if (branch_sample_type & PERF_SAMPLE_BRANCH_IND_CALL) {
+ pmu_bhrb_filter |= POWER10_MMCRA_IFM2;
+ return pmu_bhrb_filter;
+ }
+
+ if (branch_sample_type & PERF_SAMPLE_BRANCH_COND) {
+ pmu_bhrb_filter |= POWER10_MMCRA_IFM3;
+ return pmu_bhrb_filter;
+ }
+
+ if (branch_sample_type & PERF_SAMPLE_BRANCH_CALL)
+ return -1;
+
+ if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_CALL) {
+ pmu_bhrb_filter |= POWER10_MMCRA_IFM1;
+ return pmu_bhrb_filter;
+ }
+
+ /* Every thing else is unsupported */
+ return -1;
+}
+
+static void power10_config_bhrb(u64 pmu_bhrb_filter)
+{
+ pmu_bhrb_filter &= POWER10_MMCRA_BHRB_MASK;
+
+ /* Enable BHRB filter in PMU */
+ mtspr(SPRN_MMCRA, (mfspr(SPRN_MMCRA) | pmu_bhrb_filter));
+}
+
+#define C(x) PERF_COUNT_HW_CACHE_##x
+
+/*
+ * Table of generalized cache-related events.
+ * 0 means not supported, -1 means nonsensical, other values
+ * are event codes.
+ */
+static u64 power10_cache_events_dd1[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+ [C(L1D)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = PM_LD_REF_L1,
+ [C(RESULT_MISS)] = PM_LD_MISS_L1,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = 0,
+ [C(RESULT_MISS)] = PM_ST_MISS_L1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = PM_LD_PREFETCH_CACHE_LINE_MISS,
+ [C(RESULT_MISS)] = 0,
+ },
+ },
+ [C(L1I)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = PM_INST_FROM_L1,
+ [C(RESULT_MISS)] = PM_L1_ICACHE_MISS,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = PM_INST_FROM_L1MISS,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = PM_IC_PREF_REQ,
+ [C(RESULT_MISS)] = 0,
+ },
+ },
+ [C(LL)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = PM_DATA_FROM_L3,
+ [C(RESULT_MISS)] = PM_DATA_FROM_L3MISS,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = 0,
+ },
+ },
+ [C(DTLB)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0,
+ [C(RESULT_MISS)] = PM_DTLB_MISS,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ },
+ [C(ITLB)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0,
+ [C(RESULT_MISS)] = PM_ITLB_MISS,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ },
+ [C(BPU)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = PM_BR_CMPL,
+ [C(RESULT_MISS)] = PM_BR_MPRED_CMPL,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ },
+ [C(NODE)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ },
+};
+
+static u64 power10_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+ [C(L1D)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = PM_LD_REF_L1,
+ [C(RESULT_MISS)] = PM_LD_MISS_L1,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = 0,
+ [C(RESULT_MISS)] = PM_ST_MISS_L1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = PM_LD_PREFETCH_CACHE_LINE_MISS,
+ [C(RESULT_MISS)] = 0,
+ },
+ },
+ [C(L1I)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = PM_INST_FROM_L1,
+ [C(RESULT_MISS)] = PM_L1_ICACHE_MISS,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = PM_INST_FROM_L1MISS,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = PM_IC_PREF_REQ,
+ [C(RESULT_MISS)] = 0,
+ },
+ },
+ [C(LL)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = PM_DATA_FROM_L3,
+ [C(RESULT_MISS)] = PM_DATA_FROM_L3MISS,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = PM_L2_ST,
+ [C(RESULT_MISS)] = PM_L2_ST_MISS,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = PM_L3_PF_MISS_L3,
+ [C(RESULT_MISS)] = 0,
+ },
+ },
+ [C(DTLB)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0,
+ [C(RESULT_MISS)] = PM_DTLB_MISS,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ },
+ [C(ITLB)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0,
+ [C(RESULT_MISS)] = PM_ITLB_MISS,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ },
+ [C(BPU)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = PM_BR_CMPL,
+ [C(RESULT_MISS)] = PM_BR_MPRED_CMPL,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ },
+ [C(NODE)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ },
+};
+
+#undef C
+
+/*
+ * Set the MMCR0[CC56RUN] bit to enable counting for
+ * PMC5 and PMC6 regardless of the state of CTRL[RUN],
+ * so that we can use counters 5 and 6 as PM_INST_CMPL and
+ * PM_CYC.
+ */
+static int power10_compute_mmcr(u64 event[], int n_ev,
+ unsigned int hwc[], struct mmcr_regs *mmcr,
+ struct perf_event *pevents[], u32 flags)
+{
+ int ret;
+
+ ret = isa207_compute_mmcr(event, n_ev, hwc, mmcr, pevents, flags);
+ if (!ret)
+ mmcr->mmcr0 |= MMCR0_C56RUN;
+ return ret;
+}
+
+static struct power_pmu power10_pmu = {
+ .name = "POWER10",
+ .n_counter = MAX_PMU_COUNTERS,
+ .add_fields = ISA207_ADD_FIELDS,
+ .test_adder = ISA207_TEST_ADDER,
+ .group_constraint_mask = CNST_CACHE_PMC4_MASK,
+ .group_constraint_val = CNST_CACHE_PMC4_VAL,
+ .compute_mmcr = power10_compute_mmcr,
+ .config_bhrb = power10_config_bhrb,
+ .bhrb_filter_map = power10_bhrb_filter_map,
+ .get_constraint = isa207_get_constraint,
+ .get_alternatives = power10_get_alternatives,
+ .get_mem_data_src = isa207_get_mem_data_src,
+ .get_mem_weight = isa207_get_mem_weight,
+ .disable_pmc = isa207_disable_pmc,
+ .flags = PPMU_HAS_SIER | PPMU_ARCH_207S |
+ PPMU_ARCH_31 | PPMU_HAS_ATTR_CONFIG1 |
+ PPMU_P10,
+ .n_generic = ARRAY_SIZE(power10_generic_events),
+ .generic_events = power10_generic_events,
+ .cache_events = &power10_cache_events,
+ .attr_groups = power10_pmu_attr_groups,
+ .bhrb_nr = 32,
+ .capabilities = PERF_PMU_CAP_EXTENDED_REGS,
+ .check_attr_config = power10_check_attr_config,
+};
+
+int __init init_power10_pmu(void)
+{
+ unsigned int pvr;
+ int rc;
+
+ pvr = mfspr(SPRN_PVR);
+ if (PVR_VER(pvr) != PVR_POWER10)
+ return -ENODEV;
+
+ /* Add the ppmu flag for power10 DD1 */
+ if ((PVR_CFG(pvr) == 1))
+ power10_pmu.flags |= PPMU_P10_DD1;
+
+ /* Set the PERF_REG_EXTENDED_MASK here */
+ PERF_REG_EXTENDED_MASK = PERF_REG_PMU_MASK_31;
+
+ if ((PVR_CFG(pvr) == 1)) {
+ power10_pmu.generic_events = power10_generic_events_dd1;
+ power10_pmu.attr_groups = power10_pmu_attr_groups_dd1;
+ power10_pmu.cache_events = &power10_cache_events_dd1;
+ }
+
+ rc = register_power_pmu(&power10_pmu);
+ if (rc)
+ return rc;
+
+ /* Tell userspace that EBB is supported */
+ cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_EBB;
+
+ return 0;
+}
+
+static struct power_pmu power11_pmu;
+
+int __init init_power11_pmu(void)
+{
+ unsigned int pvr;
+ int rc;
+
+ pvr = mfspr(SPRN_PVR);
+ if (PVR_VER(pvr) != PVR_POWER11)
+ return -ENODEV;
+
+ /* Set the PERF_REG_EXTENDED_MASK here */
+ PERF_REG_EXTENDED_MASK = PERF_REG_PMU_MASK_31;
+
+ power11_pmu = power10_pmu;
+ power11_pmu.name = "Power11";
+
+ rc = register_power_pmu(&power11_pmu);
+ if (rc)
+ return rc;
+
+ /* Tell userspace that EBB is supported */
+ cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_EBB;
+
+ return 0;
+}
diff --git a/arch/powerpc/perf/power4-pmu.c b/arch/powerpc/perf/power4-pmu.c
deleted file mode 100644
index ce6072fa481b..000000000000
--- a/arch/powerpc/perf/power4-pmu.c
+++ /dev/null
@@ -1,622 +0,0 @@
-/*
- * Performance counter support for POWER4 (GP) and POWER4+ (GQ) processors.
- *
- * Copyright 2009 Paul Mackerras, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <linux/kernel.h>
-#include <linux/perf_event.h>
-#include <linux/string.h>
-#include <asm/reg.h>
-#include <asm/cputable.h>
-
-/*
- * Bits in event code for POWER4
- */
-#define PM_PMC_SH 12 /* PMC number (1-based) for direct events */
-#define PM_PMC_MSK 0xf
-#define PM_UNIT_SH 8 /* TTMMUX number and setting - unit select */
-#define PM_UNIT_MSK 0xf
-#define PM_LOWER_SH 6
-#define PM_LOWER_MSK 1
-#define PM_LOWER_MSKS 0x40
-#define PM_BYTE_SH 4 /* Byte number of event bus to use */
-#define PM_BYTE_MSK 3
-#define PM_PMCSEL_MSK 7
-
-/*
- * Unit code values
- */
-#define PM_FPU 1
-#define PM_ISU1 2
-#define PM_IFU 3
-#define PM_IDU0 4
-#define PM_ISU1_ALT 6
-#define PM_ISU2 7
-#define PM_IFU_ALT 8
-#define PM_LSU0 9
-#define PM_LSU1 0xc
-#define PM_GPS 0xf
-
-/*
- * Bits in MMCR0 for POWER4
- */
-#define MMCR0_PMC1SEL_SH 8
-#define MMCR0_PMC2SEL_SH 1
-#define MMCR_PMCSEL_MSK 0x1f
-
-/*
- * Bits in MMCR1 for POWER4
- */
-#define MMCR1_TTM0SEL_SH 62
-#define MMCR1_TTC0SEL_SH 61
-#define MMCR1_TTM1SEL_SH 59
-#define MMCR1_TTC1SEL_SH 58
-#define MMCR1_TTM2SEL_SH 56
-#define MMCR1_TTC2SEL_SH 55
-#define MMCR1_TTM3SEL_SH 53
-#define MMCR1_TTC3SEL_SH 52
-#define MMCR1_TTMSEL_MSK 3
-#define MMCR1_TD_CP_DBG0SEL_SH 50
-#define MMCR1_TD_CP_DBG1SEL_SH 48
-#define MMCR1_TD_CP_DBG2SEL_SH 46
-#define MMCR1_TD_CP_DBG3SEL_SH 44
-#define MMCR1_DEBUG0SEL_SH 43
-#define MMCR1_DEBUG1SEL_SH 42
-#define MMCR1_DEBUG2SEL_SH 41
-#define MMCR1_DEBUG3SEL_SH 40
-#define MMCR1_PMC1_ADDER_SEL_SH 39
-#define MMCR1_PMC2_ADDER_SEL_SH 38
-#define MMCR1_PMC6_ADDER_SEL_SH 37
-#define MMCR1_PMC5_ADDER_SEL_SH 36
-#define MMCR1_PMC8_ADDER_SEL_SH 35
-#define MMCR1_PMC7_ADDER_SEL_SH 34
-#define MMCR1_PMC3_ADDER_SEL_SH 33
-#define MMCR1_PMC4_ADDER_SEL_SH 32
-#define MMCR1_PMC3SEL_SH 27
-#define MMCR1_PMC4SEL_SH 22
-#define MMCR1_PMC5SEL_SH 17
-#define MMCR1_PMC6SEL_SH 12
-#define MMCR1_PMC7SEL_SH 7
-#define MMCR1_PMC8SEL_SH 2 /* note bit 0 is in MMCRA for GP */
-
-static short mmcr1_adder_bits[8] = {
- MMCR1_PMC1_ADDER_SEL_SH,
- MMCR1_PMC2_ADDER_SEL_SH,
- MMCR1_PMC3_ADDER_SEL_SH,
- MMCR1_PMC4_ADDER_SEL_SH,
- MMCR1_PMC5_ADDER_SEL_SH,
- MMCR1_PMC6_ADDER_SEL_SH,
- MMCR1_PMC7_ADDER_SEL_SH,
- MMCR1_PMC8_ADDER_SEL_SH
-};
-
-/*
- * Bits in MMCRA
- */
-#define MMCRA_PMC8SEL0_SH 17 /* PMC8SEL bit 0 for GP */
-
-/*
- * Layout of constraint bits:
- * 6666555555555544444444443333333333222222222211111111110000000000
- * 3210987654321098765432109876543210987654321098765432109876543210
- * |[ >[ >[ >|||[ >[ >< >< >< >< ><><><><><><><><>
- * | UC1 UC2 UC3 ||| PS1 PS2 B0 B1 B2 B3 P1P2P3P4P5P6P7P8
- * \SMPL ||\TTC3SEL
- * |\TTC_IFU_SEL
- * \TTM2SEL0
- *
- * SMPL - SAMPLE_ENABLE constraint
- * 56: SAMPLE_ENABLE value 0x0100_0000_0000_0000
- *
- * UC1 - unit constraint 1: can't have all three of FPU/ISU1/IDU0|ISU2
- * 55: UC1 error 0x0080_0000_0000_0000
- * 54: FPU events needed 0x0040_0000_0000_0000
- * 53: ISU1 events needed 0x0020_0000_0000_0000
- * 52: IDU0|ISU2 events needed 0x0010_0000_0000_0000
- *
- * UC2 - unit constraint 2: can't have all three of FPU/IFU/LSU0
- * 51: UC2 error 0x0008_0000_0000_0000
- * 50: FPU events needed 0x0004_0000_0000_0000
- * 49: IFU events needed 0x0002_0000_0000_0000
- * 48: LSU0 events needed 0x0001_0000_0000_0000
- *
- * UC3 - unit constraint 3: can't have all four of LSU0/IFU/IDU0|ISU2/ISU1
- * 47: UC3 error 0x8000_0000_0000
- * 46: LSU0 events needed 0x4000_0000_0000
- * 45: IFU events needed 0x2000_0000_0000
- * 44: IDU0|ISU2 events needed 0x1000_0000_0000
- * 43: ISU1 events needed 0x0800_0000_0000
- *
- * TTM2SEL0
- * 42: 0 = IDU0 events needed
- * 1 = ISU2 events needed 0x0400_0000_0000
- *
- * TTC_IFU_SEL
- * 41: 0 = IFU.U events needed
- * 1 = IFU.L events needed 0x0200_0000_0000
- *
- * TTC3SEL
- * 40: 0 = LSU1.U events needed
- * 1 = LSU1.L events needed 0x0100_0000_0000
- *
- * PS1
- * 39: PS1 error 0x0080_0000_0000
- * 36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000
- *
- * PS2
- * 35: PS2 error 0x0008_0000_0000
- * 32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000
- *
- * B0
- * 28-31: Byte 0 event source 0xf000_0000
- * 1 = FPU
- * 2 = ISU1
- * 3 = IFU
- * 4 = IDU0
- * 7 = ISU2
- * 9 = LSU0
- * c = LSU1
- * f = GPS
- *
- * B1, B2, B3
- * 24-27, 20-23, 16-19: Byte 1, 2, 3 event sources
- *
- * P8
- * 15: P8 error 0x8000
- * 14-15: Count of events needing PMC8
- *
- * P1..P7
- * 0-13: Count of events needing PMC1..PMC7
- *
- * Note: this doesn't allow events using IFU.U to be combined with events
- * using IFU.L, though that is feasible (using TTM0 and TTM2). However
- * there are no listed events for IFU.L (they are debug events not
- * verified for performance monitoring) so this shouldn't cause a
- * problem.
- */
-
-static struct unitinfo {
- unsigned long value, mask;
- int unit;
- int lowerbit;
-} p4_unitinfo[16] = {
- [PM_FPU] = { 0x44000000000000ul, 0x88000000000000ul, PM_FPU, 0 },
- [PM_ISU1] = { 0x20080000000000ul, 0x88000000000000ul, PM_ISU1, 0 },
- [PM_ISU1_ALT] =
- { 0x20080000000000ul, 0x88000000000000ul, PM_ISU1, 0 },
- [PM_IFU] = { 0x02200000000000ul, 0x08820000000000ul, PM_IFU, 41 },
- [PM_IFU_ALT] =
- { 0x02200000000000ul, 0x08820000000000ul, PM_IFU, 41 },
- [PM_IDU0] = { 0x10100000000000ul, 0x80840000000000ul, PM_IDU0, 1 },
- [PM_ISU2] = { 0x10140000000000ul, 0x80840000000000ul, PM_ISU2, 0 },
- [PM_LSU0] = { 0x01400000000000ul, 0x08800000000000ul, PM_LSU0, 0 },
- [PM_LSU1] = { 0x00000000000000ul, 0x00010000000000ul, PM_LSU1, 40 },
- [PM_GPS] = { 0x00000000000000ul, 0x00000000000000ul, PM_GPS, 0 }
-};
-
-static unsigned char direct_marked_event[8] = {
- (1<<2) | (1<<3), /* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */
- (1<<3) | (1<<5), /* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */
- (1<<3), /* PMC3: PM_MRK_ST_CMPL_INT */
- (1<<4) | (1<<5), /* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */
- (1<<4) | (1<<5), /* PMC5: PM_MRK_GRP_TIMEO */
- (1<<3) | (1<<4) | (1<<5),
- /* PMC6: PM_MRK_ST_GPS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */
- (1<<4) | (1<<5), /* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */
- (1<<4), /* PMC8: PM_MRK_LSU_FIN */
-};
-
-/*
- * Returns 1 if event counts things relating to marked instructions
- * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
- */
-static int p4_marked_instr_event(u64 event)
-{
- int pmc, psel, unit, byte, bit;
- unsigned int mask;
-
- pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
- psel = event & PM_PMCSEL_MSK;
- if (pmc) {
- if (direct_marked_event[pmc - 1] & (1 << psel))
- return 1;
- if (psel == 0) /* add events */
- bit = (pmc <= 4)? pmc - 1: 8 - pmc;
- else if (psel == 6) /* decode events */
- bit = 4;
- else
- return 0;
- } else
- bit = psel;
-
- byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
- unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
- mask = 0;
- switch (unit) {
- case PM_LSU1:
- if (event & PM_LOWER_MSKS)
- mask = 1 << 28; /* byte 7 bit 4 */
- else
- mask = 6 << 24; /* byte 3 bits 1 and 2 */
- break;
- case PM_LSU0:
- /* byte 3, bit 3; byte 2 bits 0,2,3,4,5; byte 1 */
- mask = 0x083dff00;
- }
- return (mask >> (byte * 8 + bit)) & 1;
-}
-
-static int p4_get_constraint(u64 event, unsigned long *maskp,
- unsigned long *valp)
-{
- int pmc, byte, unit, lower, sh;
- unsigned long mask = 0, value = 0;
- int grp = -1;
-
- pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
- if (pmc) {
- if (pmc > 8)
- return -1;
- sh = (pmc - 1) * 2;
- mask |= 2 << sh;
- value |= 1 << sh;
- grp = ((pmc - 1) >> 1) & 1;
- }
- unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
- byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
- if (unit) {
- lower = (event >> PM_LOWER_SH) & PM_LOWER_MSK;
-
- /*
- * Bus events on bytes 0 and 2 can be counted
- * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8.
- */
- if (!pmc)
- grp = byte & 1;
-
- if (!p4_unitinfo[unit].unit)
- return -1;
- mask |= p4_unitinfo[unit].mask;
- value |= p4_unitinfo[unit].value;
- sh = p4_unitinfo[unit].lowerbit;
- if (sh > 1)
- value |= (unsigned long)lower << sh;
- else if (lower != sh)
- return -1;
- unit = p4_unitinfo[unit].unit;
-
- /* Set byte lane select field */
- mask |= 0xfULL << (28 - 4 * byte);
- value |= (unsigned long)unit << (28 - 4 * byte);
- }
- if (grp == 0) {
- /* increment PMC1/2/5/6 field */
- mask |= 0x8000000000ull;
- value |= 0x1000000000ull;
- } else {
- /* increment PMC3/4/7/8 field */
- mask |= 0x800000000ull;
- value |= 0x100000000ull;
- }
-
- /* Marked instruction events need sample_enable set */
- if (p4_marked_instr_event(event)) {
- mask |= 1ull << 56;
- value |= 1ull << 56;
- }
-
- /* PMCSEL=6 decode events on byte 2 need sample_enable clear */
- if (pmc && (event & PM_PMCSEL_MSK) == 6 && byte == 2)
- mask |= 1ull << 56;
-
- *maskp = mask;
- *valp = value;
- return 0;
-}
-
-static unsigned int ppc_inst_cmpl[] = {
- 0x1001, 0x4001, 0x6001, 0x7001, 0x8001
-};
-
-static int p4_get_alternatives(u64 event, unsigned int flags, u64 alt[])
-{
- int i, j, na;
-
- alt[0] = event;
- na = 1;
-
- /* 2 possibilities for PM_GRP_DISP_REJECT */
- if (event == 0x8003 || event == 0x0224) {
- alt[1] = event ^ (0x8003 ^ 0x0224);
- return 2;
- }
-
- /* 2 possibilities for PM_ST_MISS_L1 */
- if (event == 0x0c13 || event == 0x0c23) {
- alt[1] = event ^ (0x0c13 ^ 0x0c23);
- return 2;
- }
-
- /* several possibilities for PM_INST_CMPL */
- for (i = 0; i < ARRAY_SIZE(ppc_inst_cmpl); ++i) {
- if (event == ppc_inst_cmpl[i]) {
- for (j = 0; j < ARRAY_SIZE(ppc_inst_cmpl); ++j)
- if (j != i)
- alt[na++] = ppc_inst_cmpl[j];
- break;
- }
- }
-
- return na;
-}
-
-static int p4_compute_mmcr(u64 event[], int n_ev,
- unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[])
-{
- unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0;
- unsigned int pmc, unit, byte, psel, lower;
- unsigned int ttm, grp;
- unsigned int pmc_inuse = 0;
- unsigned int pmc_grp_use[2];
- unsigned char busbyte[4];
- unsigned char unituse[16];
- unsigned int unitlower = 0;
- int i;
-
- if (n_ev > 8)
- return -1;
-
- /* First pass to count resource use */
- pmc_grp_use[0] = pmc_grp_use[1] = 0;
- memset(busbyte, 0, sizeof(busbyte));
- memset(unituse, 0, sizeof(unituse));
- for (i = 0; i < n_ev; ++i) {
- pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
- if (pmc) {
- if (pmc_inuse & (1 << (pmc - 1)))
- return -1;
- pmc_inuse |= 1 << (pmc - 1);
- /* count 1/2/5/6 vs 3/4/7/8 use */
- ++pmc_grp_use[((pmc - 1) >> 1) & 1];
- }
- unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
- byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
- lower = (event[i] >> PM_LOWER_SH) & PM_LOWER_MSK;
- if (unit) {
- if (!pmc)
- ++pmc_grp_use[byte & 1];
- if (unit == 6 || unit == 8)
- /* map alt ISU1/IFU codes: 6->2, 8->3 */
- unit = (unit >> 1) - 1;
- if (busbyte[byte] && busbyte[byte] != unit)
- return -1;
- busbyte[byte] = unit;
- lower <<= unit;
- if (unituse[unit] && lower != (unitlower & lower))
- return -1;
- unituse[unit] = 1;
- unitlower |= lower;
- }
- }
- if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4)
- return -1;
-
- /*
- * Assign resources and set multiplexer selects.
- *
- * Units 1,2,3 are on TTM0, 4,6,7 on TTM1, 8,10 on TTM2.
- * Each TTMx can only select one unit, but since
- * units 2 and 6 are both ISU1, and 3 and 8 are both IFU,
- * we have some choices.
- */
- if (unituse[2] & (unituse[1] | (unituse[3] & unituse[9]))) {
- unituse[6] = 1; /* Move 2 to 6 */
- unituse[2] = 0;
- }
- if (unituse[3] & (unituse[1] | unituse[2])) {
- unituse[8] = 1; /* Move 3 to 8 */
- unituse[3] = 0;
- unitlower = (unitlower & ~8) | ((unitlower & 8) << 5);
- }
- /* Check only one unit per TTMx */
- if (unituse[1] + unituse[2] + unituse[3] > 1 ||
- unituse[4] + unituse[6] + unituse[7] > 1 ||
- unituse[8] + unituse[9] > 1 ||
- (unituse[5] | unituse[10] | unituse[11] |
- unituse[13] | unituse[14]))
- return -1;
-
- /* Set TTMxSEL fields. Note, units 1-3 => TTM0SEL codes 0-2 */
- mmcr1 |= (unsigned long)(unituse[3] * 2 + unituse[2])
- << MMCR1_TTM0SEL_SH;
- mmcr1 |= (unsigned long)(unituse[7] * 3 + unituse[6] * 2)
- << MMCR1_TTM1SEL_SH;
- mmcr1 |= (unsigned long)unituse[9] << MMCR1_TTM2SEL_SH;
-
- /* Set TTCxSEL fields. */
- if (unitlower & 0xe)
- mmcr1 |= 1ull << MMCR1_TTC0SEL_SH;
- if (unitlower & 0xf0)
- mmcr1 |= 1ull << MMCR1_TTC1SEL_SH;
- if (unitlower & 0xf00)
- mmcr1 |= 1ull << MMCR1_TTC2SEL_SH;
- if (unitlower & 0x7000)
- mmcr1 |= 1ull << MMCR1_TTC3SEL_SH;
-
- /* Set byte lane select fields. */
- for (byte = 0; byte < 4; ++byte) {
- unit = busbyte[byte];
- if (!unit)
- continue;
- if (unit == 0xf) {
- /* special case for GPS */
- mmcr1 |= 1ull << (MMCR1_DEBUG0SEL_SH - byte);
- } else {
- if (!unituse[unit])
- ttm = unit - 1; /* 2->1, 3->2 */
- else
- ttm = unit >> 2;
- mmcr1 |= (unsigned long)ttm
- << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
- }
- }
-
- /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
- for (i = 0; i < n_ev; ++i) {
- pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
- unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
- byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
- psel = event[i] & PM_PMCSEL_MSK;
- if (!pmc) {
- /* Bus event or 00xxx direct event (off or cycles) */
- if (unit)
- psel |= 0x10 | ((byte & 2) << 2);
- for (pmc = 0; pmc < 8; ++pmc) {
- if (pmc_inuse & (1 << pmc))
- continue;
- grp = (pmc >> 1) & 1;
- if (unit) {
- if (grp == (byte & 1))
- break;
- } else if (pmc_grp_use[grp] < 4) {
- ++pmc_grp_use[grp];
- break;
- }
- }
- pmc_inuse |= 1 << pmc;
- } else {
- /* Direct event */
- --pmc;
- if (psel == 0 && (byte & 2))
- /* add events on higher-numbered bus */
- mmcr1 |= 1ull << mmcr1_adder_bits[pmc];
- else if (psel == 6 && byte == 3)
- /* seem to need to set sample_enable here */
- mmcra |= MMCRA_SAMPLE_ENABLE;
- psel |= 8;
- }
- if (pmc <= 1)
- mmcr0 |= psel << (MMCR0_PMC1SEL_SH - 7 * pmc);
- else
- mmcr1 |= psel << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2));
- if (pmc == 7) /* PMC8 */
- mmcra |= (psel & 1) << MMCRA_PMC8SEL0_SH;
- hwc[i] = pmc;
- if (p4_marked_instr_event(event[i]))
- mmcra |= MMCRA_SAMPLE_ENABLE;
- }
-
- if (pmc_inuse & 1)
- mmcr0 |= MMCR0_PMC1CE;
- if (pmc_inuse & 0xfe)
- mmcr0 |= MMCR0_PMCjCE;
-
- mmcra |= 0x2000; /* mark only one IOP per PPC instruction */
-
- /* Return MMCRx values */
- mmcr[0] = mmcr0;
- mmcr[1] = mmcr1;
- mmcr[2] = mmcra;
- return 0;
-}
-
-static void p4_disable_pmc(unsigned int pmc, unsigned long mmcr[])
-{
- /*
- * Setting the PMCxSEL field to 0 disables PMC x.
- * (Note that pmc is 0-based here, not 1-based.)
- */
- if (pmc <= 1) {
- mmcr[0] &= ~(0x1fUL << (MMCR0_PMC1SEL_SH - 7 * pmc));
- } else {
- mmcr[1] &= ~(0x1fUL << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)));
- if (pmc == 7)
- mmcr[2] &= ~(1UL << MMCRA_PMC8SEL0_SH);
- }
-}
-
-static int p4_generic_events[] = {
- [PERF_COUNT_HW_CPU_CYCLES] = 7,
- [PERF_COUNT_HW_INSTRUCTIONS] = 0x1001,
- [PERF_COUNT_HW_CACHE_REFERENCES] = 0x8c10, /* PM_LD_REF_L1 */
- [PERF_COUNT_HW_CACHE_MISSES] = 0x3c10, /* PM_LD_MISS_L1 */
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x330, /* PM_BR_ISSUED */
- [PERF_COUNT_HW_BRANCH_MISSES] = 0x331, /* PM_BR_MPRED_CR */
-};
-
-#define C(x) PERF_COUNT_HW_CACHE_##x
-
-/*
- * Table of generalized cache-related events.
- * 0 means not supported, -1 means nonsensical, other values
- * are event codes.
- */
-static int power4_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
- [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
- [C(OP_READ)] = { 0x8c10, 0x3c10 },
- [C(OP_WRITE)] = { 0x7c10, 0xc13 },
- [C(OP_PREFETCH)] = { 0xc35, 0 },
- },
- [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */
- [C(OP_READ)] = { 0, 0 },
- [C(OP_WRITE)] = { -1, -1 },
- [C(OP_PREFETCH)] = { 0, 0 },
- },
- [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */
- [C(OP_READ)] = { 0, 0 },
- [C(OP_WRITE)] = { 0, 0 },
- [C(OP_PREFETCH)] = { 0xc34, 0 },
- },
- [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */
- [C(OP_READ)] = { 0, 0x904 },
- [C(OP_WRITE)] = { -1, -1 },
- [C(OP_PREFETCH)] = { -1, -1 },
- },
- [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */
- [C(OP_READ)] = { 0, 0x900 },
- [C(OP_WRITE)] = { -1, -1 },
- [C(OP_PREFETCH)] = { -1, -1 },
- },
- [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */
- [C(OP_READ)] = { 0x330, 0x331 },
- [C(OP_WRITE)] = { -1, -1 },
- [C(OP_PREFETCH)] = { -1, -1 },
- },
- [C(NODE)] = { /* RESULT_ACCESS RESULT_MISS */
- [C(OP_READ)] = { -1, -1 },
- [C(OP_WRITE)] = { -1, -1 },
- [C(OP_PREFETCH)] = { -1, -1 },
- },
-};
-
-static struct power_pmu power4_pmu = {
- .name = "POWER4/4+",
- .n_counter = 8,
- .max_alternatives = 5,
- .add_fields = 0x0000001100005555ul,
- .test_adder = 0x0011083300000000ul,
- .compute_mmcr = p4_compute_mmcr,
- .get_constraint = p4_get_constraint,
- .get_alternatives = p4_get_alternatives,
- .disable_pmc = p4_disable_pmc,
- .n_generic = ARRAY_SIZE(p4_generic_events),
- .generic_events = p4_generic_events,
- .cache_events = &power4_cache_events,
- .flags = PPMU_NO_SIPR | PPMU_NO_CONT_SAMPLING,
-};
-
-static int __init init_power4_pmu(void)
-{
- if (!cur_cpu_spec->oprofile_cpu_type ||
- strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power4"))
- return -ENODEV;
-
- return register_power_pmu(&power4_pmu);
-}
-
-early_initcall(init_power4_pmu);
diff --git a/arch/powerpc/perf/power5+-pmu.c b/arch/powerpc/perf/power5+-pmu.c
index 0526dac66007..b4708ab73145 100644
--- a/arch/powerpc/perf/power5+-pmu.c
+++ b/arch/powerpc/perf/power5+-pmu.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Performance counter support for POWER5+/++ (not POWER5) processors.
*
* Copyright 2009 Paul Mackerras, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/perf_event.h>
@@ -14,6 +10,8 @@
#include <asm/reg.h>
#include <asm/cputable.h>
+#include "internal.h"
+
/*
* Bits in event code for POWER5+ (POWER5 GS) and POWER5++ (POWER5 GS DD3)
*/
@@ -134,7 +132,7 @@ static unsigned long unit_cons[PM_LASTUNIT+1][2] = {
};
static int power5p_get_constraint(u64 event, unsigned long *maskp,
- unsigned long *valp)
+ unsigned long *valp, u64 event_config1 __maybe_unused)
{
int pmc, byte, unit, sh;
int bit, fmask;
@@ -452,7 +450,9 @@ static int power5p_marked_instr_event(u64 event)
}
static int power5p_compute_mmcr(u64 event[], int n_ev,
- unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[])
+ unsigned int hwc[], struct mmcr_regs *mmcr,
+ struct perf_event *pevents[],
+ u32 flags __maybe_unused)
{
unsigned long mmcr1 = 0;
unsigned long mmcra = 0;
@@ -590,20 +590,20 @@ static int power5p_compute_mmcr(u64 event[], int n_ev,
}
/* Return MMCRx values */
- mmcr[0] = 0;
+ mmcr->mmcr0 = 0;
if (pmc_inuse & 1)
- mmcr[0] = MMCR0_PMC1CE;
+ mmcr->mmcr0 = MMCR0_PMC1CE;
if (pmc_inuse & 0x3e)
- mmcr[0] |= MMCR0_PMCjCE;
- mmcr[1] = mmcr1;
- mmcr[2] = mmcra;
+ mmcr->mmcr0 |= MMCR0_PMCjCE;
+ mmcr->mmcr1 = mmcr1;
+ mmcr->mmcra = mmcra;
return 0;
}
-static void power5p_disable_pmc(unsigned int pmc, unsigned long mmcr[])
+static void power5p_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr)
{
if (pmc <= 3)
- mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
+ mmcr->mmcr1 &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
}
static int power5p_generic_events[] = {
@@ -622,7 +622,7 @@ static int power5p_generic_events[] = {
* 0 means not supported, -1 means nonsensical, other values
* are event codes.
*/
-static int power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+static u64 power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0x1c10a8, 0x3c1088 },
[C(OP_WRITE)] = { 0x2c10a8, 0xc10c3 },
@@ -677,14 +677,12 @@ static struct power_pmu power5p_pmu = {
.cache_events = &power5p_cache_events,
};
-static int __init init_power5p_pmu(void)
+int __init init_power5p_pmu(void)
{
- if (!cur_cpu_spec->oprofile_cpu_type ||
- (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5+")
- && strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5++")))
+ unsigned int pvr = mfspr(SPRN_PVR);
+
+ if (PVR_VER(pvr) != PVR_POWER5p)
return -ENODEV;
return register_power_pmu(&power5p_pmu);
}
-
-early_initcall(init_power5p_pmu);
diff --git a/arch/powerpc/perf/power5-pmu.c b/arch/powerpc/perf/power5-pmu.c
index 4dc99f9f7962..c6aefd0a1cc8 100644
--- a/arch/powerpc/perf/power5-pmu.c
+++ b/arch/powerpc/perf/power5-pmu.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Performance counter support for POWER5 (not POWER5++) processors.
*
* Copyright 2009 Paul Mackerras, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/perf_event.h>
@@ -14,6 +10,8 @@
#include <asm/reg.h>
#include <asm/cputable.h>
+#include "internal.h"
+
/*
* Bits in event code for POWER5 (not POWER5++)
*/
@@ -138,7 +136,7 @@ static unsigned long unit_cons[PM_LASTUNIT+1][2] = {
};
static int power5_get_constraint(u64 event, unsigned long *maskp,
- unsigned long *valp)
+ unsigned long *valp, u64 event_config1 __maybe_unused)
{
int pmc, byte, unit, sh;
int bit, fmask;
@@ -383,7 +381,9 @@ static int power5_marked_instr_event(u64 event)
}
static int power5_compute_mmcr(u64 event[], int n_ev,
- unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[])
+ unsigned int hwc[], struct mmcr_regs *mmcr,
+ struct perf_event *pevents[],
+ u32 flags __maybe_unused)
{
unsigned long mmcr1 = 0;
unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
@@ -532,20 +532,20 @@ static int power5_compute_mmcr(u64 event[], int n_ev,
}
/* Return MMCRx values */
- mmcr[0] = 0;
+ mmcr->mmcr0 = 0;
if (pmc_inuse & 1)
- mmcr[0] = MMCR0_PMC1CE;
+ mmcr->mmcr0 = MMCR0_PMC1CE;
if (pmc_inuse & 0x3e)
- mmcr[0] |= MMCR0_PMCjCE;
- mmcr[1] = mmcr1;
- mmcr[2] = mmcra;
+ mmcr->mmcr0 |= MMCR0_PMCjCE;
+ mmcr->mmcr1 = mmcr1;
+ mmcr->mmcra = mmcra;
return 0;
}
-static void power5_disable_pmc(unsigned int pmc, unsigned long mmcr[])
+static void power5_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr)
{
if (pmc <= 3)
- mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
+ mmcr->mmcr1 &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
}
static int power5_generic_events[] = {
@@ -564,7 +564,7 @@ static int power5_generic_events[] = {
* 0 means not supported, -1 means nonsensical, other values
* are event codes.
*/
-static int power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+static u64 power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0x4c1090, 0x3c1088 },
[C(OP_WRITE)] = { 0x3c1090, 0xc10c3 },
@@ -618,13 +618,12 @@ static struct power_pmu power5_pmu = {
.flags = PPMU_HAS_SSLOT,
};
-static int __init init_power5_pmu(void)
+int __init init_power5_pmu(void)
{
- if (!cur_cpu_spec->oprofile_cpu_type ||
- strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5"))
+ unsigned int pvr = mfspr(SPRN_PVR);
+
+ if (PVR_VER(pvr) != PVR_POWER5)
return -ENODEV;
return register_power_pmu(&power5_pmu);
}
-
-early_initcall(init_power5_pmu);
diff --git a/arch/powerpc/perf/power6-pmu.c b/arch/powerpc/perf/power6-pmu.c
index 9c9d646b68a1..9f720b522e17 100644
--- a/arch/powerpc/perf/power6-pmu.c
+++ b/arch/powerpc/perf/power6-pmu.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Performance counter support for POWER6 processors.
*
* Copyright 2008-2009 Paul Mackerras, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/perf_event.h>
@@ -14,6 +10,8 @@
#include <asm/reg.h>
#include <asm/cputable.h>
+#include "internal.h"
+
/*
* Bits in event code for POWER6
*/
@@ -175,7 +173,8 @@ static int power6_marked_instr_event(u64 event)
* Assign PMC numbers and compute MMCR1 value for a set of events
*/
static int p6_compute_mmcr(u64 event[], int n_ev,
- unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[])
+ unsigned int hwc[], struct mmcr_regs *mmcr, struct perf_event *pevents[],
+ u32 flags __maybe_unused)
{
unsigned long mmcr1 = 0;
unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
@@ -247,13 +246,13 @@ static int p6_compute_mmcr(u64 event[], int n_ev,
if (pmc < 4)
mmcr1 |= (unsigned long)psel << MMCR1_PMCSEL_SH(pmc);
}
- mmcr[0] = 0;
+ mmcr->mmcr0 = 0;
if (pmc_inuse & 1)
- mmcr[0] = MMCR0_PMC1CE;
+ mmcr->mmcr0 = MMCR0_PMC1CE;
if (pmc_inuse & 0xe)
- mmcr[0] |= MMCR0_PMCjCE;
- mmcr[1] = mmcr1;
- mmcr[2] = mmcra;
+ mmcr->mmcr0 |= MMCR0_PMCjCE;
+ mmcr->mmcr1 = mmcr1;
+ mmcr->mmcra = mmcra;
return 0;
}
@@ -268,7 +267,7 @@ static int p6_compute_mmcr(u64 event[], int n_ev,
* 32-34 select field: nest (subunit) event selector
*/
static int p6_get_constraint(u64 event, unsigned long *maskp,
- unsigned long *valp)
+ unsigned long *valp, u64 event_config1 __maybe_unused)
{
int pmc, byte, sh, subunit;
unsigned long mask = 0, value = 0;
@@ -336,26 +335,38 @@ static const unsigned int event_alternatives[][MAX_ALT] = {
{ 0x3000fe, 0x400056 }, /* PM_DATA_FROM_L3MISS */
};
-/*
- * This could be made more efficient with a binary search on
- * a presorted list, if necessary
- */
static int find_alternatives_list(u64 event)
{
- int i, j;
- unsigned int alt;
-
- for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
- if (event < event_alternatives[i][0])
- return -1;
- for (j = 0; j < MAX_ALT; ++j) {
- alt = event_alternatives[i][j];
- if (!alt || event < alt)
- break;
- if (event == alt)
- return i;
- }
+ const unsigned int presorted_event_table[] = {
+ 0x0130e8, 0x080080, 0x080088, 0x10000a, 0x10000b, 0x10000d, 0x10000e,
+ 0x100010, 0x10001a, 0x100026, 0x100054, 0x100056, 0x1000f0, 0x1000f8,
+ 0x1000fc, 0x200008, 0x20000e, 0x200010, 0x200012, 0x200054, 0x2000f0,
+ 0x2000f2, 0x2000f4, 0x2000f5, 0x2000f6, 0x2000f8, 0x2000fc, 0x2000fe,
+ 0x2d0030, 0x30000a, 0x30000c, 0x300010, 0x300012, 0x30001a, 0x300056,
+ 0x3000f0, 0x3000f2, 0x3000f6, 0x3000f8, 0x3000fc, 0x3000fe, 0x400006,
+ 0x400007, 0x40000a, 0x40000e, 0x400010, 0x400018, 0x400056, 0x4000f0,
+ 0x4000f8, 0x600005
+ };
+ const unsigned int event_index_table[] = {
+ 0, 1, 2, 3, 4, 1, 5, 6, 7, 8, 9, 10, 11, 12, 13, 12, 14,
+ 7, 15, 2, 9, 16, 3, 4, 0, 17, 10, 18, 19, 20, 1, 17, 15, 19,
+ 18, 2, 16, 21, 8, 0, 22, 13, 14, 11, 21, 5, 20, 22, 1, 6, 3
+ };
+ int hi = ARRAY_SIZE(presorted_event_table) - 1;
+ int lo = 0;
+
+ while (lo <= hi) {
+ int mid = lo + (hi - lo) / 2;
+ unsigned int alt = presorted_event_table[mid];
+
+ if (alt < event)
+ lo = mid + 1;
+ else if (alt > event)
+ hi = mid - 1;
+ else
+ return event_index_table[mid];
}
+
return -1;
}
@@ -461,11 +472,11 @@ static int p6_get_alternatives(u64 event, unsigned int flags, u64 alt[])
return nalt;
}
-static void p6_disable_pmc(unsigned int pmc, unsigned long mmcr[])
+static void p6_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr)
{
/* Set PMCxSEL to 0 to disable PMCx */
if (pmc <= 3)
- mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc));
+ mmcr->mmcr1 &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc));
}
static int power6_generic_events[] = {
@@ -485,7 +496,7 @@ static int power6_generic_events[] = {
* are event codes.
* The "DTLB" and "ITLB" events relate to the DERAT and IERAT.
*/
-static int power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+static u64 power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0x280030, 0x80080 },
[C(OP_WRITE)] = { 0x180032, 0x80088 },
@@ -540,13 +551,12 @@ static struct power_pmu power6_pmu = {
.cache_events = &power6_cache_events,
};
-static int __init init_power6_pmu(void)
+int __init init_power6_pmu(void)
{
- if (!cur_cpu_spec->oprofile_cpu_type ||
- strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power6"))
+ unsigned int pvr = mfspr(SPRN_PVR);
+
+ if (PVR_VER(pvr) != PVR_POWER6)
return -ENODEV;
return register_power_pmu(&power6_pmu);
}
-
-early_initcall(init_power6_pmu);
diff --git a/arch/powerpc/perf/power7-events-list.h b/arch/powerpc/perf/power7-events-list.h
index 64f13d9260a6..6c2b7066490b 100644
--- a/arch/powerpc/perf/power7-events-list.h
+++ b/arch/powerpc/perf/power7-events-list.h
@@ -1,12 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Performance counter support for POWER7 processors.
*
* Copyright 2013 Runzhen Wang, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
EVENT(PM_IC_DEMAND_L2_BR_ALL, 0x04898)
diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c
index 5b62f2389290..c95ccf2e28da 100644
--- a/arch/powerpc/perf/power7-pmu.c
+++ b/arch/powerpc/perf/power7-pmu.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Performance counter support for POWER7 processors.
*
* Copyright 2009 Paul Mackerras, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/kernel.h>
#include <linux/perf_event.h>
@@ -14,6 +10,8 @@
#include <asm/reg.h>
#include <asm/cputable.h>
+#include "internal.h"
+
/*
* Bits in event code for POWER7
*/
@@ -54,7 +52,7 @@
* Power7 event codes.
*/
#define EVENT(_name, _code) \
- PME_##_name = _code,
+ _name = _code,
enum {
#include "power7-events-list.h"
@@ -83,7 +81,7 @@ enum {
*/
static int power7_get_constraint(u64 event, unsigned long *maskp,
- unsigned long *valp)
+ unsigned long *valp, u64 event_config1 __maybe_unused)
{
int pmc, sh, unit;
unsigned long mask = 0, value = 0;
@@ -238,6 +236,7 @@ static int power7_marked_instr_event(u64 event)
case 6:
if (psel == 0x64)
return pmc >= 3;
+ break;
case 8:
return unit == 0xd;
}
@@ -245,7 +244,9 @@ static int power7_marked_instr_event(u64 event)
}
static int power7_compute_mmcr(u64 event[], int n_ev,
- unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[])
+ unsigned int hwc[], struct mmcr_regs *mmcr,
+ struct perf_event *pevents[],
+ u32 flags __maybe_unused)
{
unsigned long mmcr1 = 0;
unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
@@ -301,31 +302,31 @@ static int power7_compute_mmcr(u64 event[], int n_ev,
}
/* Return MMCRx values */
- mmcr[0] = 0;
+ mmcr->mmcr0 = 0;
if (pmc_inuse & 1)
- mmcr[0] = MMCR0_PMC1CE;
+ mmcr->mmcr0 = MMCR0_PMC1CE;
if (pmc_inuse & 0x3e)
- mmcr[0] |= MMCR0_PMCjCE;
- mmcr[1] = mmcr1;
- mmcr[2] = mmcra;
+ mmcr->mmcr0 |= MMCR0_PMCjCE;
+ mmcr->mmcr1 = mmcr1;
+ mmcr->mmcra = mmcra;
return 0;
}
-static void power7_disable_pmc(unsigned int pmc, unsigned long mmcr[])
+static void power7_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr)
{
if (pmc <= 3)
- mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc));
+ mmcr->mmcr1 &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc));
}
static int power7_generic_events[] = {
- [PERF_COUNT_HW_CPU_CYCLES] = PME_PM_CYC,
- [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = PME_PM_GCT_NOSLOT_CYC,
- [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = PME_PM_CMPLU_STALL,
- [PERF_COUNT_HW_INSTRUCTIONS] = PME_PM_INST_CMPL,
- [PERF_COUNT_HW_CACHE_REFERENCES] = PME_PM_LD_REF_L1,
- [PERF_COUNT_HW_CACHE_MISSES] = PME_PM_LD_MISS_L1,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PME_PM_BRU_FIN,
- [PERF_COUNT_HW_BRANCH_MISSES] = PME_PM_BR_MPRED,
+ [PERF_COUNT_HW_CPU_CYCLES] = PM_CYC,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = PM_GCT_NOSLOT_CYC,
+ [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = PM_CMPLU_STALL,
+ [PERF_COUNT_HW_INSTRUCTIONS] = PM_INST_CMPL,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = PM_LD_REF_L1,
+ [PERF_COUNT_HW_CACHE_MISSES] = PM_LD_MISS_L1,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BRU_FIN,
+ [PERF_COUNT_HW_BRANCH_MISSES] = PM_BR_MPRED,
};
#define C(x) PERF_COUNT_HW_CACHE_##x
@@ -335,7 +336,7 @@ static int power7_generic_events[] = {
* 0 means not supported, -1 means nonsensical, other values
* are event codes.
*/
-static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+static u64 power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0xc880, 0x400f0 },
[C(OP_WRITE)] = { 0, 0x300f0 },
@@ -404,7 +405,7 @@ static struct attribute *power7_events_attr[] = {
NULL
};
-static struct attribute_group power7_pmu_events_group = {
+static const struct attribute_group power7_pmu_events_group = {
.name = "events",
.attrs = power7_events_attr,
};
@@ -416,7 +417,7 @@ static struct attribute *power7_pmu_format_attr[] = {
NULL,
};
-struct attribute_group power7_pmu_format_group = {
+static const struct attribute_group power7_pmu_format_group = {
.name = "format",
.attrs = power7_pmu_format_attr,
};
@@ -444,16 +445,15 @@ static struct power_pmu power7_pmu = {
.cache_events = &power7_cache_events,
};
-static int __init init_power7_pmu(void)
+int __init init_power7_pmu(void)
{
- if (!cur_cpu_spec->oprofile_cpu_type ||
- strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power7"))
+ unsigned int pvr = mfspr(SPRN_PVR);
+
+ if (PVR_VER(pvr) != PVR_POWER7 && PVR_VER(pvr) != PVR_POWER7p)
return -ENODEV;
- if (pvr_version_is(PVR_POWER7p))
+ if (PVR_VER(pvr) == PVR_POWER7p)
power7_pmu.flags |= PPMU_SIAR_VALID;
return register_power_pmu(&power7_pmu);
}
-
-early_initcall(init_power7_pmu);
diff --git a/arch/powerpc/perf/power8-events-list.h b/arch/powerpc/perf/power8-events-list.h
new file mode 100644
index 000000000000..2e9b75d9955f
--- /dev/null
+++ b/arch/powerpc/perf/power8-events-list.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Performance counter support for POWER8 processors.
+ *
+ * Copyright 2014 Sukadev Bhattiprolu, IBM Corporation.
+ */
+
+/*
+ * Power8 event codes.
+ */
+EVENT(PM_CYC, 0x0001e)
+EVENT(PM_GCT_NOSLOT_CYC, 0x100f8)
+EVENT(PM_CMPLU_STALL, 0x4000a)
+EVENT(PM_INST_CMPL, 0x00002)
+EVENT(PM_BRU_FIN, 0x10068)
+EVENT(PM_BR_MPRED_CMPL, 0x400f6)
+
+/* All L1 D cache load references counted at finish, gated by reject */
+EVENT(PM_LD_REF_L1, 0x100ee)
+/* Load Missed L1 */
+EVENT(PM_LD_MISS_L1, 0x3e054)
+/* Store Missed L1 */
+EVENT(PM_ST_MISS_L1, 0x300f0)
+/* L1 cache data prefetches */
+EVENT(PM_L1_PREF, 0x0d8b8)
+/* Instruction fetches from L1 */
+EVENT(PM_INST_FROM_L1, 0x04080)
+/* Demand iCache Miss */
+EVENT(PM_L1_ICACHE_MISS, 0x200fd)
+/* Instruction Demand sectors wriittent into IL1 */
+EVENT(PM_L1_DEMAND_WRITE, 0x0408c)
+/* Instruction prefetch written into IL1 */
+EVENT(PM_IC_PREF_WRITE, 0x0408e)
+/* The data cache was reloaded from local core's L3 due to a demand load */
+EVENT(PM_DATA_FROM_L3, 0x4c042)
+/* Demand LD - L3 Miss (not L2 hit and not L3 hit) */
+EVENT(PM_DATA_FROM_L3MISS, 0x300fe)
+/* All successful D-side store dispatches for this thread */
+EVENT(PM_L2_ST, 0x17080)
+/* All successful D-side store dispatches for this thread that were L2 Miss */
+EVENT(PM_L2_ST_MISS, 0x17082)
+/* Total HW L3 prefetches(Load+store) */
+EVENT(PM_L3_PREF_ALL, 0x4e052)
+/* Data PTEG reload */
+EVENT(PM_DTLB_MISS, 0x300fc)
+/* ITLB Reloaded */
+EVENT(PM_ITLB_MISS, 0x400fc)
+/* Run_Instructions */
+EVENT(PM_RUN_INST_CMPL, 0x500fa)
+/* Alternate event code for PM_RUN_INST_CMPL */
+EVENT(PM_RUN_INST_CMPL_ALT, 0x400fa)
+/* Run_cycles */
+EVENT(PM_RUN_CYC, 0x600f4)
+/* Alternate event code for Run_cycles */
+EVENT(PM_RUN_CYC_ALT, 0x200f4)
+/* Marked store completed */
+EVENT(PM_MRK_ST_CMPL, 0x10134)
+/* Alternate event code for Marked store completed */
+EVENT(PM_MRK_ST_CMPL_ALT, 0x301e2)
+/* Marked two path branch */
+EVENT(PM_BR_MRK_2PATH, 0x10138)
+/* Alternate event code for PM_BR_MRK_2PATH */
+EVENT(PM_BR_MRK_2PATH_ALT, 0x40138)
+/* L3 castouts in Mepf state */
+EVENT(PM_L3_CO_MEPF, 0x18082)
+/* Alternate event code for PM_L3_CO_MEPF */
+EVENT(PM_L3_CO_MEPF_ALT, 0x3e05e)
+/* Data cache was reloaded from a location other than L2 due to a marked load */
+EVENT(PM_MRK_DATA_FROM_L2MISS, 0x1d14e)
+/* Alternate event code for PM_MRK_DATA_FROM_L2MISS */
+EVENT(PM_MRK_DATA_FROM_L2MISS_ALT, 0x401e8)
+/* Alternate event code for PM_CMPLU_STALL */
+EVENT(PM_CMPLU_STALL_ALT, 0x1e054)
+/* Two path branch */
+EVENT(PM_BR_2PATH, 0x20036)
+/* Alternate event code for PM_BR_2PATH */
+EVENT(PM_BR_2PATH_ALT, 0x40036)
+/* # PPC Dispatched */
+EVENT(PM_INST_DISP, 0x200f2)
+/* Alternate event code for PM_INST_DISP */
+EVENT(PM_INST_DISP_ALT, 0x300f2)
+/* Marked filter Match */
+EVENT(PM_MRK_FILT_MATCH, 0x2013c)
+/* Alternate event code for PM_MRK_FILT_MATCH */
+EVENT(PM_MRK_FILT_MATCH_ALT, 0x3012e)
+/* Alternate event code for PM_LD_MISS_L1 */
+EVENT(PM_LD_MISS_L1_ALT, 0x400f0)
+/*
+ * Memory Access Event -- mem_access
+ * Primary PMU event used here is PM_MRK_INST_CMPL, along with
+ * Random Load/Store Facility Sampling (RIS) in Random sampling mode (MMCRA[SM]).
+ */
+EVENT(MEM_ACCESS, 0x10401e0)
diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index 396351db601b..ef9685065aaf 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -1,67 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Performance counter support for POWER8 processors.
*
* Copyright 2009 Paul Mackerras, IBM Corporation.
* Copyright 2013 Michael Ellerman, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#define pr_fmt(fmt) "power8-pmu: " fmt
-#include <linux/kernel.h>
-#include <linux/perf_event.h>
-#include <asm/firmware.h>
-#include <asm/cputable.h>
-
+#include "isa207-common.h"
/*
* Some power8 event codes.
*/
-#define PM_CYC 0x0001e
-#define PM_GCT_NOSLOT_CYC 0x100f8
-#define PM_CMPLU_STALL 0x4000a
-#define PM_INST_CMPL 0x00002
-#define PM_BRU_FIN 0x10068
-#define PM_BR_MPRED_CMPL 0x400f6
-
-/* All L1 D cache load references counted at finish, gated by reject */
-#define PM_LD_REF_L1 0x100ee
-/* Load Missed L1 */
-#define PM_LD_MISS_L1 0x3e054
-/* Store Missed L1 */
-#define PM_ST_MISS_L1 0x300f0
-/* L1 cache data prefetches */
-#define PM_L1_PREF 0x0d8b8
-/* Instruction fetches from L1 */
-#define PM_INST_FROM_L1 0x04080
-/* Demand iCache Miss */
-#define PM_L1_ICACHE_MISS 0x200fd
-/* Instruction Demand sectors wriittent into IL1 */
-#define PM_L1_DEMAND_WRITE 0x0408c
-/* Instruction prefetch written into IL1 */
-#define PM_IC_PREF_WRITE 0x0408e
-/* The data cache was reloaded from local core's L3 due to a demand load */
-#define PM_DATA_FROM_L3 0x4c042
-/* Demand LD - L3 Miss (not L2 hit and not L3 hit) */
-#define PM_DATA_FROM_L3MISS 0x300fe
-/* All successful D-side store dispatches for this thread */
-#define PM_L2_ST 0x17080
-/* All successful D-side store dispatches for this thread that were L2 Miss */
-#define PM_L2_ST_MISS 0x17082
-/* Total HW L3 prefetches(Load+store) */
-#define PM_L3_PREF_ALL 0x4e052
-/* Data PTEG reload */
-#define PM_DTLB_MISS 0x300fc
-/* ITLB Reloaded */
-#define PM_ITLB_MISS 0x400fc
+#define EVENT(_name, _code) _name = _code,
+
+enum {
+#include "power8-events-list.h"
+};
+#undef EVENT
+
+/* MMCRA IFM bits - POWER8 */
+#define POWER8_MMCRA_IFM1 0x0000000040000000UL
+#define POWER8_MMCRA_IFM2 0x0000000080000000UL
+#define POWER8_MMCRA_IFM3 0x00000000C0000000UL
+#define POWER8_MMCRA_BHRB_MASK 0x00000000C0000000UL
/*
- * Raw event encoding for POWER8:
+ * Raw event encoding for PowerISA v2.07 (Power8):
*
* 60 56 52 48 44 40 36 32
* | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
@@ -124,522 +91,115 @@
*
*/
-#define EVENT_EBB_MASK 1ull
-#define EVENT_EBB_SHIFT PERF_EVENT_CONFIG_EBB_SHIFT
-#define EVENT_BHRB_MASK 1ull
-#define EVENT_BHRB_SHIFT 62
-#define EVENT_WANTS_BHRB (EVENT_BHRB_MASK << EVENT_BHRB_SHIFT)
-#define EVENT_IFM_MASK 3ull
-#define EVENT_IFM_SHIFT 60
-#define EVENT_THR_CMP_SHIFT 40 /* Threshold CMP value */
-#define EVENT_THR_CMP_MASK 0x3ff
-#define EVENT_THR_CTL_SHIFT 32 /* Threshold control value (start/stop) */
-#define EVENT_THR_CTL_MASK 0xffull
-#define EVENT_THR_SEL_SHIFT 29 /* Threshold select value */
-#define EVENT_THR_SEL_MASK 0x7
-#define EVENT_THRESH_SHIFT 29 /* All threshold bits */
-#define EVENT_THRESH_MASK 0x1fffffull
-#define EVENT_SAMPLE_SHIFT 24 /* Sampling mode & eligibility */
-#define EVENT_SAMPLE_MASK 0x1f
-#define EVENT_CACHE_SEL_SHIFT 20 /* L2/L3 cache select */
-#define EVENT_CACHE_SEL_MASK 0xf
-#define EVENT_IS_L1 (4 << EVENT_CACHE_SEL_SHIFT)
-#define EVENT_PMC_SHIFT 16 /* PMC number (1-based) */
-#define EVENT_PMC_MASK 0xf
-#define EVENT_UNIT_SHIFT 12 /* Unit */
-#define EVENT_UNIT_MASK 0xf
-#define EVENT_COMBINE_SHIFT 11 /* Combine bit */
-#define EVENT_COMBINE_MASK 0x1
-#define EVENT_MARKED_SHIFT 8 /* Marked bit */
-#define EVENT_MARKED_MASK 0x1
-#define EVENT_IS_MARKED (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT)
-#define EVENT_PSEL_MASK 0xff /* PMCxSEL value */
-
-/* Bits defined by Linux */
-#define EVENT_LINUX_MASK \
- ((EVENT_EBB_MASK << EVENT_EBB_SHIFT) | \
- (EVENT_BHRB_MASK << EVENT_BHRB_SHIFT) | \
- (EVENT_IFM_MASK << EVENT_IFM_SHIFT))
-
-#define EVENT_VALID_MASK \
- ((EVENT_THRESH_MASK << EVENT_THRESH_SHIFT) | \
- (EVENT_SAMPLE_MASK << EVENT_SAMPLE_SHIFT) | \
- (EVENT_CACHE_SEL_MASK << EVENT_CACHE_SEL_SHIFT) | \
- (EVENT_PMC_MASK << EVENT_PMC_SHIFT) | \
- (EVENT_UNIT_MASK << EVENT_UNIT_SHIFT) | \
- (EVENT_COMBINE_MASK << EVENT_COMBINE_SHIFT) | \
- (EVENT_MARKED_MASK << EVENT_MARKED_SHIFT) | \
- EVENT_LINUX_MASK | \
- EVENT_PSEL_MASK)
-
-/* MMCRA IFM bits - POWER8 */
-#define POWER8_MMCRA_IFM1 0x0000000040000000UL
-#define POWER8_MMCRA_IFM2 0x0000000080000000UL
-#define POWER8_MMCRA_IFM3 0x00000000C0000000UL
-
-#define ONLY_PLM \
- (PERF_SAMPLE_BRANCH_USER |\
- PERF_SAMPLE_BRANCH_KERNEL |\
- PERF_SAMPLE_BRANCH_HV)
-
-/*
- * Layout of constraint bits:
- *
- * 60 56 52 48 44 40 36 32
- * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
- * [ fab_match ] [ thresh_cmp ] [ thresh_ctl ] [ ]
- * |
- * thresh_sel -*
- *
- * 28 24 20 16 12 8 4 0
- * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
- * [ ] | [ ] [ sample ] [ ] [6] [5] [4] [3] [2] [1]
- * | | | |
- * BHRB IFM -* | | | Count of events for each PMC.
- * EBB -* | | p1, p2, p3, p4, p5, p6.
- * L1 I/D qualifier -* |
- * nc - number of counters -*
- *
- * The PMC fields P1..P6, and NC, are adder fields. As we accumulate constraints
- * we want the low bit of each field to be added to any existing value.
- *
- * Everything else is a value field.
- */
-
-#define CNST_FAB_MATCH_VAL(v) (((v) & EVENT_THR_CTL_MASK) << 56)
-#define CNST_FAB_MATCH_MASK CNST_FAB_MATCH_VAL(EVENT_THR_CTL_MASK)
-
-/* We just throw all the threshold bits into the constraint */
-#define CNST_THRESH_VAL(v) (((v) & EVENT_THRESH_MASK) << 32)
-#define CNST_THRESH_MASK CNST_THRESH_VAL(EVENT_THRESH_MASK)
-
-#define CNST_EBB_VAL(v) (((v) & EVENT_EBB_MASK) << 24)
-#define CNST_EBB_MASK CNST_EBB_VAL(EVENT_EBB_MASK)
-
-#define CNST_IFM_VAL(v) (((v) & EVENT_IFM_MASK) << 25)
-#define CNST_IFM_MASK CNST_IFM_VAL(EVENT_IFM_MASK)
-
-#define CNST_L1_QUAL_VAL(v) (((v) & 3) << 22)
-#define CNST_L1_QUAL_MASK CNST_L1_QUAL_VAL(3)
-
-#define CNST_SAMPLE_VAL(v) (((v) & EVENT_SAMPLE_MASK) << 16)
-#define CNST_SAMPLE_MASK CNST_SAMPLE_VAL(EVENT_SAMPLE_MASK)
-
-/*
- * For NC we are counting up to 4 events. This requires three bits, and we need
- * the fifth event to overflow and set the 4th bit. To achieve that we bias the
- * fields by 3 in test_adder.
- */
-#define CNST_NC_SHIFT 12
-#define CNST_NC_VAL (1 << CNST_NC_SHIFT)
-#define CNST_NC_MASK (8 << CNST_NC_SHIFT)
-#define POWER8_TEST_ADDER (3 << CNST_NC_SHIFT)
-
-/*
- * For the per-PMC fields we have two bits. The low bit is added, so if two
- * events ask for the same PMC the sum will overflow, setting the high bit,
- * indicating an error. So our mask sets the high bit.
- */
-#define CNST_PMC_SHIFT(pmc) ((pmc - 1) * 2)
-#define CNST_PMC_VAL(pmc) (1 << CNST_PMC_SHIFT(pmc))
-#define CNST_PMC_MASK(pmc) (2 << CNST_PMC_SHIFT(pmc))
-
-/* Our add_fields is defined as: */
-#define POWER8_ADD_FIELDS \
- CNST_PMC_VAL(1) | CNST_PMC_VAL(2) | CNST_PMC_VAL(3) | \
- CNST_PMC_VAL(4) | CNST_PMC_VAL(5) | CNST_PMC_VAL(6) | CNST_NC_VAL
-
-
-/* Bits in MMCR1 for POWER8 */
-#define MMCR1_UNIT_SHIFT(pmc) (60 - (4 * ((pmc) - 1)))
-#define MMCR1_COMBINE_SHIFT(pmc) (35 - ((pmc) - 1))
-#define MMCR1_PMCSEL_SHIFT(pmc) (24 - (((pmc) - 1)) * 8)
-#define MMCR1_FAB_SHIFT 36
-#define MMCR1_DC_QUAL_SHIFT 47
-#define MMCR1_IC_QUAL_SHIFT 46
-
-/* Bits in MMCRA for POWER8 */
-#define MMCRA_SAMP_MODE_SHIFT 1
-#define MMCRA_SAMP_ELIG_SHIFT 4
-#define MMCRA_THR_CTL_SHIFT 8
-#define MMCRA_THR_SEL_SHIFT 16
-#define MMCRA_THR_CMP_SHIFT 32
-#define MMCRA_SDAR_MODE_TLB (1ull << 42)
-#define MMCRA_IFM_SHIFT 30
-
-/* Bits in MMCR2 for POWER8 */
-#define MMCR2_FCS(pmc) (1ull << (63 - (((pmc) - 1) * 9)))
-#define MMCR2_FCP(pmc) (1ull << (62 - (((pmc) - 1) * 9)))
-#define MMCR2_FCH(pmc) (1ull << (57 - (((pmc) - 1) * 9)))
-
-
-static inline bool event_is_fab_match(u64 event)
-{
- /* Only check pmc, unit and pmcxsel, ignore the edge bit (0) */
- event &= 0xff0fe;
-
- /* PM_MRK_FAB_RSP_MATCH & PM_MRK_FAB_RSP_MATCH_CYC */
- return (event == 0x30056 || event == 0x4f052);
-}
-
-static int power8_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
-{
- unsigned int unit, pmc, cache, ebb;
- unsigned long mask, value;
-
- mask = value = 0;
-
- if (event & ~EVENT_VALID_MASK)
- return -1;
-
- pmc = (event >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK;
- unit = (event >> EVENT_UNIT_SHIFT) & EVENT_UNIT_MASK;
- cache = (event >> EVENT_CACHE_SEL_SHIFT) & EVENT_CACHE_SEL_MASK;
- ebb = (event >> EVENT_EBB_SHIFT) & EVENT_EBB_MASK;
-
- if (pmc) {
- u64 base_event;
-
- if (pmc > 6)
- return -1;
-
- /* Ignore Linux defined bits when checking event below */
- base_event = event & ~EVENT_LINUX_MASK;
-
- if (pmc >= 5 && base_event != 0x500fa && base_event != 0x600f4)
- return -1;
-
- mask |= CNST_PMC_MASK(pmc);
- value |= CNST_PMC_VAL(pmc);
- }
-
- if (pmc <= 4) {
- /*
- * Add to number of counters in use. Note this includes events with
- * a PMC of 0 - they still need a PMC, it's just assigned later.
- * Don't count events on PMC 5 & 6, there is only one valid event
- * on each of those counters, and they are handled above.
- */
- mask |= CNST_NC_MASK;
- value |= CNST_NC_VAL;
- }
-
- if (unit >= 6 && unit <= 9) {
- /*
- * L2/L3 events contain a cache selector field, which is
- * supposed to be programmed into MMCRC. However MMCRC is only
- * HV writable, and there is no API for guest kernels to modify
- * it. The solution is for the hypervisor to initialise the
- * field to zeroes, and for us to only ever allow events that
- * have a cache selector of zero. The bank selector (bit 3) is
- * irrelevant, as long as the rest of the value is 0.
- */
- if (cache & 0x7)
- return -1;
-
- } else if (event & EVENT_IS_L1) {
- mask |= CNST_L1_QUAL_MASK;
- value |= CNST_L1_QUAL_VAL(cache);
- }
-
- if (event & EVENT_IS_MARKED) {
- mask |= CNST_SAMPLE_MASK;
- value |= CNST_SAMPLE_VAL(event >> EVENT_SAMPLE_SHIFT);
- }
-
- /*
- * Special case for PM_MRK_FAB_RSP_MATCH and PM_MRK_FAB_RSP_MATCH_CYC,
- * the threshold control bits are used for the match value.
- */
- if (event_is_fab_match(event)) {
- mask |= CNST_FAB_MATCH_MASK;
- value |= CNST_FAB_MATCH_VAL(event >> EVENT_THR_CTL_SHIFT);
- } else {
- /*
- * Check the mantissa upper two bits are not zero, unless the
- * exponent is also zero. See the THRESH_CMP_MANTISSA doc.
- */
- unsigned int cmp, exp;
-
- cmp = (event >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK;
- exp = cmp >> 7;
-
- if (exp && (cmp & 0x60) == 0)
- return -1;
-
- mask |= CNST_THRESH_MASK;
- value |= CNST_THRESH_VAL(event >> EVENT_THRESH_SHIFT);
- }
-
- if (!pmc && ebb)
- /* EBB events must specify the PMC */
- return -1;
-
- if (event & EVENT_WANTS_BHRB) {
- if (!ebb)
- /* Only EBB events can request BHRB */
- return -1;
-
- mask |= CNST_IFM_MASK;
- value |= CNST_IFM_VAL(event >> EVENT_IFM_SHIFT);
- }
-
- /*
- * All events must agree on EBB, either all request it or none.
- * EBB events are pinned & exclusive, so this should never actually
- * hit, but we leave it as a fallback in case.
- */
- mask |= CNST_EBB_VAL(ebb);
- value |= CNST_EBB_MASK;
-
- *maskp = mask;
- *valp = value;
-
- return 0;
-}
-
-static int power8_compute_mmcr(u64 event[], int n_ev,
- unsigned int hwc[], unsigned long mmcr[],
- struct perf_event *pevents[])
-{
- unsigned long mmcra, mmcr1, mmcr2, unit, combine, psel, cache, val;
- unsigned int pmc, pmc_inuse;
- int i;
-
- pmc_inuse = 0;
-
- /* First pass to count resource use */
- for (i = 0; i < n_ev; ++i) {
- pmc = (event[i] >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK;
- if (pmc)
- pmc_inuse |= 1 << pmc;
- }
-
- /* In continous sampling mode, update SDAR on TLB miss */
- mmcra = MMCRA_SDAR_MODE_TLB;
- mmcr1 = mmcr2 = 0;
-
- /* Second pass: assign PMCs, set all MMCR1 fields */
- for (i = 0; i < n_ev; ++i) {
- pmc = (event[i] >> EVENT_PMC_SHIFT) & EVENT_PMC_MASK;
- unit = (event[i] >> EVENT_UNIT_SHIFT) & EVENT_UNIT_MASK;
- combine = (event[i] >> EVENT_COMBINE_SHIFT) & EVENT_COMBINE_MASK;
- psel = event[i] & EVENT_PSEL_MASK;
-
- if (!pmc) {
- for (pmc = 1; pmc <= 4; ++pmc) {
- if (!(pmc_inuse & (1 << pmc)))
- break;
- }
-
- pmc_inuse |= 1 << pmc;
- }
-
- if (pmc <= 4) {
- mmcr1 |= unit << MMCR1_UNIT_SHIFT(pmc);
- mmcr1 |= combine << MMCR1_COMBINE_SHIFT(pmc);
- mmcr1 |= psel << MMCR1_PMCSEL_SHIFT(pmc);
- }
-
- if (event[i] & EVENT_IS_L1) {
- cache = event[i] >> EVENT_CACHE_SEL_SHIFT;
- mmcr1 |= (cache & 1) << MMCR1_IC_QUAL_SHIFT;
- cache >>= 1;
- mmcr1 |= (cache & 1) << MMCR1_DC_QUAL_SHIFT;
- }
-
- if (event[i] & EVENT_IS_MARKED) {
- mmcra |= MMCRA_SAMPLE_ENABLE;
-
- val = (event[i] >> EVENT_SAMPLE_SHIFT) & EVENT_SAMPLE_MASK;
- if (val) {
- mmcra |= (val & 3) << MMCRA_SAMP_MODE_SHIFT;
- mmcra |= (val >> 2) << MMCRA_SAMP_ELIG_SHIFT;
- }
- }
-
- /*
- * PM_MRK_FAB_RSP_MATCH and PM_MRK_FAB_RSP_MATCH_CYC,
- * the threshold bits are used for the match value.
- */
- if (event_is_fab_match(event[i])) {
- mmcr1 |= ((event[i] >> EVENT_THR_CTL_SHIFT) &
- EVENT_THR_CTL_MASK) << MMCR1_FAB_SHIFT;
- } else {
- val = (event[i] >> EVENT_THR_CTL_SHIFT) & EVENT_THR_CTL_MASK;
- mmcra |= val << MMCRA_THR_CTL_SHIFT;
- val = (event[i] >> EVENT_THR_SEL_SHIFT) & EVENT_THR_SEL_MASK;
- mmcra |= val << MMCRA_THR_SEL_SHIFT;
- val = (event[i] >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK;
- mmcra |= val << MMCRA_THR_CMP_SHIFT;
- }
-
- if (event[i] & EVENT_WANTS_BHRB) {
- val = (event[i] >> EVENT_IFM_SHIFT) & EVENT_IFM_MASK;
- mmcra |= val << MMCRA_IFM_SHIFT;
- }
-
- if (pevents[i]->attr.exclude_user)
- mmcr2 |= MMCR2_FCP(pmc);
-
- if (pevents[i]->attr.exclude_hv)
- mmcr2 |= MMCR2_FCH(pmc);
-
- if (pevents[i]->attr.exclude_kernel) {
- if (cpu_has_feature(CPU_FTR_HVMODE))
- mmcr2 |= MMCR2_FCH(pmc);
- else
- mmcr2 |= MMCR2_FCS(pmc);
- }
-
- hwc[i] = pmc - 1;
- }
-
- /* Return MMCRx values */
- mmcr[0] = 0;
-
- /* pmc_inuse is 1-based */
- if (pmc_inuse & 2)
- mmcr[0] = MMCR0_PMC1CE;
-
- if (pmc_inuse & 0x7c)
- mmcr[0] |= MMCR0_PMCjCE;
-
- /* If we're not using PMC 5 or 6, freeze them */
- if (!(pmc_inuse & 0x60))
- mmcr[0] |= MMCR0_FC56;
-
- mmcr[1] = mmcr1;
- mmcr[2] = mmcra;
- mmcr[3] = mmcr2;
-
- return 0;
-}
-
-#define MAX_ALT 2
+/* PowerISA v2.07 format attribute structure*/
+extern const struct attribute_group isa207_pmu_format_group;
/* Table of alternatives, sorted by column 0 */
static const unsigned int event_alternatives[][MAX_ALT] = {
- { 0x10134, 0x301e2 }, /* PM_MRK_ST_CMPL */
- { 0x10138, 0x40138 }, /* PM_BR_MRK_2PATH */
- { 0x18082, 0x3e05e }, /* PM_L3_CO_MEPF */
- { 0x1d14e, 0x401e8 }, /* PM_MRK_DATA_FROM_L2MISS */
- { 0x1e054, 0x4000a }, /* PM_CMPLU_STALL */
- { 0x20036, 0x40036 }, /* PM_BR_2PATH */
- { 0x200f2, 0x300f2 }, /* PM_INST_DISP */
- { 0x200f4, 0x600f4 }, /* PM_RUN_CYC */
- { 0x2013c, 0x3012e }, /* PM_MRK_FILT_MATCH */
- { 0x3e054, 0x400f0 }, /* PM_LD_MISS_L1 */
- { 0x400fa, 0x500fa }, /* PM_RUN_INST_CMPL */
+ { PM_MRK_ST_CMPL, PM_MRK_ST_CMPL_ALT },
+ { PM_BR_MRK_2PATH, PM_BR_MRK_2PATH_ALT },
+ { PM_L3_CO_MEPF, PM_L3_CO_MEPF_ALT },
+ { PM_MRK_DATA_FROM_L2MISS, PM_MRK_DATA_FROM_L2MISS_ALT },
+ { PM_CMPLU_STALL_ALT, PM_CMPLU_STALL },
+ { PM_BR_2PATH, PM_BR_2PATH_ALT },
+ { PM_INST_DISP, PM_INST_DISP_ALT },
+ { PM_RUN_CYC_ALT, PM_RUN_CYC },
+ { PM_MRK_FILT_MATCH, PM_MRK_FILT_MATCH_ALT },
+ { PM_LD_MISS_L1, PM_LD_MISS_L1_ALT },
+ { PM_RUN_INST_CMPL_ALT, PM_RUN_INST_CMPL },
};
-/*
- * Scan the alternatives table for a match and return the
- * index into the alternatives table if found, else -1.
- */
-static int find_alternative(u64 event)
-{
- int i, j;
-
- for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
- if (event < event_alternatives[i][0])
- break;
-
- for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
- if (event == event_alternatives[i][j])
- return i;
- }
-
- return -1;
-}
-
static int power8_get_alternatives(u64 event, unsigned int flags, u64 alt[])
{
- int i, j, num_alt = 0;
- u64 alt_event;
-
- alt[num_alt++] = event;
-
- i = find_alternative(event);
- if (i >= 0) {
- /* Filter out the original event, it's already in alt[0] */
- for (j = 0; j < MAX_ALT; ++j) {
- alt_event = event_alternatives[i][j];
- if (alt_event && alt_event != event)
- alt[num_alt++] = alt_event;
- }
- }
+ int num_alt = 0;
- if (flags & PPMU_ONLY_COUNT_RUN) {
- /*
- * We're only counting in RUN state, so PM_CYC is equivalent to
- * PM_RUN_CYC and PM_INST_CMPL === PM_RUN_INST_CMPL.
- */
- j = num_alt;
- for (i = 0; i < num_alt; ++i) {
- switch (alt[i]) {
- case 0x1e: /* PM_CYC */
- alt[j++] = 0x600f4; /* PM_RUN_CYC */
- break;
- case 0x600f4: /* PM_RUN_CYC */
- alt[j++] = 0x1e;
- break;
- case 0x2: /* PM_PPC_CMPL */
- alt[j++] = 0x500fa; /* PM_RUN_INST_CMPL */
- break;
- case 0x500fa: /* PM_RUN_INST_CMPL */
- alt[j++] = 0x2; /* PM_PPC_CMPL */
- break;
- }
- }
- num_alt = j;
- }
+ num_alt = isa207_get_alternatives(event, alt,
+ ARRAY_SIZE(event_alternatives), flags,
+ event_alternatives);
return num_alt;
}
-static void power8_disable_pmc(unsigned int pmc, unsigned long mmcr[])
-{
- if (pmc <= 3)
- mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SHIFT(pmc + 1));
-}
+GENERIC_EVENT_ATTR(cpu-cycles, PM_CYC);
+GENERIC_EVENT_ATTR(stalled-cycles-frontend, PM_GCT_NOSLOT_CYC);
+GENERIC_EVENT_ATTR(stalled-cycles-backend, PM_CMPLU_STALL);
+GENERIC_EVENT_ATTR(instructions, PM_INST_CMPL);
+GENERIC_EVENT_ATTR(branch-instructions, PM_BRU_FIN);
+GENERIC_EVENT_ATTR(branch-misses, PM_BR_MPRED_CMPL);
+GENERIC_EVENT_ATTR(cache-references, PM_LD_REF_L1);
+GENERIC_EVENT_ATTR(cache-misses, PM_LD_MISS_L1);
+GENERIC_EVENT_ATTR(mem_access, MEM_ACCESS);
+
+CACHE_EVENT_ATTR(L1-dcache-load-misses, PM_LD_MISS_L1);
+CACHE_EVENT_ATTR(L1-dcache-loads, PM_LD_REF_L1);
+
+CACHE_EVENT_ATTR(L1-dcache-prefetches, PM_L1_PREF);
+CACHE_EVENT_ATTR(L1-dcache-store-misses, PM_ST_MISS_L1);
+CACHE_EVENT_ATTR(L1-icache-load-misses, PM_L1_ICACHE_MISS);
+CACHE_EVENT_ATTR(L1-icache-loads, PM_INST_FROM_L1);
+CACHE_EVENT_ATTR(L1-icache-prefetches, PM_IC_PREF_WRITE);
+
+CACHE_EVENT_ATTR(LLC-load-misses, PM_DATA_FROM_L3MISS);
+CACHE_EVENT_ATTR(LLC-loads, PM_DATA_FROM_L3);
+CACHE_EVENT_ATTR(LLC-prefetches, PM_L3_PREF_ALL);
+CACHE_EVENT_ATTR(LLC-store-misses, PM_L2_ST_MISS);
+CACHE_EVENT_ATTR(LLC-stores, PM_L2_ST);
+
+CACHE_EVENT_ATTR(branch-load-misses, PM_BR_MPRED_CMPL);
+CACHE_EVENT_ATTR(branch-loads, PM_BRU_FIN);
+CACHE_EVENT_ATTR(dTLB-load-misses, PM_DTLB_MISS);
+CACHE_EVENT_ATTR(iTLB-load-misses, PM_ITLB_MISS);
+
+static struct attribute *power8_events_attr[] = {
+ GENERIC_EVENT_PTR(PM_CYC),
+ GENERIC_EVENT_PTR(PM_GCT_NOSLOT_CYC),
+ GENERIC_EVENT_PTR(PM_CMPLU_STALL),
+ GENERIC_EVENT_PTR(PM_INST_CMPL),
+ GENERIC_EVENT_PTR(PM_BRU_FIN),
+ GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL),
+ GENERIC_EVENT_PTR(PM_LD_REF_L1),
+ GENERIC_EVENT_PTR(PM_LD_MISS_L1),
+ GENERIC_EVENT_PTR(MEM_ACCESS),
+
+ CACHE_EVENT_PTR(PM_LD_MISS_L1),
+ CACHE_EVENT_PTR(PM_LD_REF_L1),
+ CACHE_EVENT_PTR(PM_L1_PREF),
+ CACHE_EVENT_PTR(PM_ST_MISS_L1),
+ CACHE_EVENT_PTR(PM_L1_ICACHE_MISS),
+ CACHE_EVENT_PTR(PM_INST_FROM_L1),
+ CACHE_EVENT_PTR(PM_IC_PREF_WRITE),
+ CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS),
+ CACHE_EVENT_PTR(PM_DATA_FROM_L3),
+ CACHE_EVENT_PTR(PM_L3_PREF_ALL),
+ CACHE_EVENT_PTR(PM_L2_ST_MISS),
+ CACHE_EVENT_PTR(PM_L2_ST),
+
+ CACHE_EVENT_PTR(PM_BR_MPRED_CMPL),
+ CACHE_EVENT_PTR(PM_BRU_FIN),
+
+ CACHE_EVENT_PTR(PM_DTLB_MISS),
+ CACHE_EVENT_PTR(PM_ITLB_MISS),
+ NULL
+};
-PMU_FORMAT_ATTR(event, "config:0-49");
-PMU_FORMAT_ATTR(pmcxsel, "config:0-7");
-PMU_FORMAT_ATTR(mark, "config:8");
-PMU_FORMAT_ATTR(combine, "config:11");
-PMU_FORMAT_ATTR(unit, "config:12-15");
-PMU_FORMAT_ATTR(pmc, "config:16-19");
-PMU_FORMAT_ATTR(cache_sel, "config:20-23");
-PMU_FORMAT_ATTR(sample_mode, "config:24-28");
-PMU_FORMAT_ATTR(thresh_sel, "config:29-31");
-PMU_FORMAT_ATTR(thresh_stop, "config:32-35");
-PMU_FORMAT_ATTR(thresh_start, "config:36-39");
-PMU_FORMAT_ATTR(thresh_cmp, "config:40-49");
-
-static struct attribute *power8_pmu_format_attr[] = {
- &format_attr_event.attr,
- &format_attr_pmcxsel.attr,
- &format_attr_mark.attr,
- &format_attr_combine.attr,
- &format_attr_unit.attr,
- &format_attr_pmc.attr,
- &format_attr_cache_sel.attr,
- &format_attr_sample_mode.attr,
- &format_attr_thresh_sel.attr,
- &format_attr_thresh_stop.attr,
- &format_attr_thresh_start.attr,
- &format_attr_thresh_cmp.attr,
- NULL,
+static const struct attribute_group power8_pmu_events_group = {
+ .name = "events",
+ .attrs = power8_events_attr,
+};
+
+static struct attribute *power8_pmu_caps_attrs[] = {
+ NULL
};
-struct attribute_group power8_pmu_format_group = {
- .name = "format",
- .attrs = power8_pmu_format_attr,
+static struct attribute_group power8_pmu_caps_group = {
+ .name = "caps",
+ .attrs = power8_pmu_caps_attrs,
};
static const struct attribute_group *power8_pmu_attr_groups[] = {
- &power8_pmu_format_group,
+ &isa207_pmu_format_group,
+ &power8_pmu_events_group,
+ &power8_pmu_caps_group,
NULL,
};
@@ -676,6 +236,9 @@ static u64 power8_bhrb_filter_map(u64 branch_sample_type)
if (branch_sample_type & PERF_SAMPLE_BRANCH_IND_CALL)
return -1;
+ if (branch_sample_type & PERF_SAMPLE_BRANCH_CALL)
+ return -1;
+
if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_CALL) {
pmu_bhrb_filter |= POWER8_MMCRA_IFM1;
return pmu_bhrb_filter;
@@ -687,6 +250,8 @@ static u64 power8_bhrb_filter_map(u64 branch_sample_type)
static void power8_config_bhrb(u64 pmu_bhrb_filter)
{
+ pmu_bhrb_filter &= POWER8_MMCRA_BHRB_MASK;
+
/* Enable BHRB filter in PMU */
mtspr(SPRN_MMCRA, (mfspr(SPRN_MMCRA) | pmu_bhrb_filter));
}
@@ -698,7 +263,7 @@ static void power8_config_bhrb(u64 pmu_bhrb_filter)
* 0 means not supported, -1 means nonsensical, other values
* are event codes.
*/
-static int power8_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+static u64 power8_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[ C(L1D) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = PM_LD_REF_L1,
@@ -803,17 +368,19 @@ static int power8_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
static struct power_pmu power8_pmu = {
.name = "POWER8",
- .n_counter = 6,
+ .n_counter = MAX_PMU_COUNTERS,
.max_alternatives = MAX_ALT + 1,
- .add_fields = POWER8_ADD_FIELDS,
- .test_adder = POWER8_TEST_ADDER,
- .compute_mmcr = power8_compute_mmcr,
+ .add_fields = ISA207_ADD_FIELDS,
+ .test_adder = ISA207_TEST_ADDER,
+ .compute_mmcr = isa207_compute_mmcr,
.config_bhrb = power8_config_bhrb,
.bhrb_filter_map = power8_bhrb_filter_map,
- .get_constraint = power8_get_constraint,
+ .get_constraint = isa207_get_constraint,
.get_alternatives = power8_get_alternatives,
- .disable_pmc = power8_disable_pmc,
- .flags = PPMU_HAS_SSLOT | PPMU_HAS_SIER | PPMU_ARCH_207S,
+ .get_mem_data_src = isa207_get_mem_data_src,
+ .get_mem_weight = isa207_get_mem_weight,
+ .disable_pmc = isa207_disable_pmc,
+ .flags = PPMU_HAS_SIER | PPMU_ARCH_207S,
.n_generic = ARRAY_SIZE(power8_generic_events),
.generic_events = power8_generic_events,
.cache_events = &power8_cache_events,
@@ -821,12 +388,13 @@ static struct power_pmu power8_pmu = {
.bhrb_nr = 32,
};
-static int __init init_power8_pmu(void)
+int __init init_power8_pmu(void)
{
int rc;
+ unsigned int pvr = mfspr(SPRN_PVR);
- if (!cur_cpu_spec->oprofile_cpu_type ||
- strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power8"))
+ if (PVR_VER(pvr) != PVR_POWER8E && PVR_VER(pvr) != PVR_POWER8NVL &&
+ PVR_VER(pvr) != PVR_POWER8)
return -ENODEV;
rc = register_power_pmu(&power8_pmu);
@@ -841,4 +409,3 @@ static int __init init_power8_pmu(void)
return 0;
}
-early_initcall(init_power8_pmu);
diff --git a/arch/powerpc/perf/power9-events-list.h b/arch/powerpc/perf/power9-events-list.h
new file mode 100644
index 000000000000..7f4e6b5f22aa
--- /dev/null
+++ b/arch/powerpc/perf/power9-events-list.h
@@ -0,0 +1,117 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Performance counter support for POWER9 processors.
+ *
+ * Copyright 2016 Madhavan Srinivasan, IBM Corporation.
+ */
+
+/*
+ * Power9 event codes.
+ */
+EVENT(PM_CYC, 0x0001e)
+EVENT(PM_ICT_NOSLOT_CYC, 0x100f8)
+EVENT(PM_CMPLU_STALL, 0x1e054)
+EVENT(PM_INST_CMPL, 0x00002)
+EVENT(PM_BR_CMPL, 0x4d05e)
+EVENT(PM_BR_MPRED_CMPL, 0x400f6)
+
+/* All L1 D cache load references counted at finish, gated by reject */
+EVENT(PM_LD_REF_L1, 0x100fc)
+/* Load Missed L1 */
+EVENT(PM_LD_MISS_L1_FIN, 0x2c04e)
+EVENT(PM_LD_MISS_L1, 0x3e054)
+/* Alternate event code for PM_LD_MISS_L1 */
+EVENT(PM_LD_MISS_L1_ALT, 0x400f0)
+/* Store Missed L1 */
+EVENT(PM_ST_MISS_L1, 0x300f0)
+/* L1 cache data prefetches */
+EVENT(PM_L1_PREF, 0x20054)
+/* Instruction fetches from L1 */
+EVENT(PM_INST_FROM_L1, 0x04080)
+/* Demand iCache Miss */
+EVENT(PM_L1_ICACHE_MISS, 0x200fd)
+/* Instruction Demand sectors wriittent into IL1 */
+EVENT(PM_L1_DEMAND_WRITE, 0x0408c)
+/* Instruction prefetch written into IL1 */
+EVENT(PM_IC_PREF_WRITE, 0x0488c)
+/* The data cache was reloaded from local core's L3 due to a demand load */
+EVENT(PM_DATA_FROM_L3, 0x4c042)
+/* Demand LD - L3 Miss (not L2 hit and not L3 hit) */
+EVENT(PM_DATA_FROM_L3MISS, 0x300fe)
+/* All successful D-side store dispatches for this thread */
+EVENT(PM_L2_ST, 0x16880)
+/* All successful D-side store dispatches for this thread that were L2 Miss */
+EVENT(PM_L2_ST_MISS, 0x26880)
+/* Total HW L3 prefetches(Load+store) */
+EVENT(PM_L3_PREF_ALL, 0x4e052)
+/* Data PTEG reload */
+EVENT(PM_DTLB_MISS, 0x300fc)
+/* ITLB Reloaded */
+EVENT(PM_ITLB_MISS, 0x400fc)
+/* Run_Instructions */
+EVENT(PM_RUN_INST_CMPL, 0x500fa)
+/* Alternate event code for PM_RUN_INST_CMPL */
+EVENT(PM_RUN_INST_CMPL_ALT, 0x400fa)
+/* Run_cycles */
+EVENT(PM_RUN_CYC, 0x600f4)
+/* Alternate event code for Run_cycles */
+EVENT(PM_RUN_CYC_ALT, 0x200f4)
+/* Instruction Dispatched */
+EVENT(PM_INST_DISP, 0x200f2)
+EVENT(PM_INST_DISP_ALT, 0x300f2)
+/* Branch event that are not strongly biased */
+EVENT(PM_BR_2PATH, 0x20036)
+/* ALternate branch event that are not strongly biased */
+EVENT(PM_BR_2PATH_ALT, 0x40036)
+
+/* Blacklisted events */
+EVENT(PM_MRK_ST_DONE_L2, 0x10134)
+EVENT(PM_RADIX_PWC_L1_HIT, 0x1f056)
+EVENT(PM_FLOP_CMPL, 0x100f4)
+EVENT(PM_MRK_NTF_FIN, 0x20112)
+EVENT(PM_RADIX_PWC_L2_HIT, 0x2d024)
+EVENT(PM_IFETCH_THROTTLE, 0x3405e)
+EVENT(PM_MRK_L2_TM_ST_ABORT_SISTER, 0x3e15c)
+EVENT(PM_RADIX_PWC_L3_HIT, 0x3f056)
+EVENT(PM_RUN_CYC_SMT2_MODE, 0x3006c)
+EVENT(PM_TM_TX_PASS_RUN_INST, 0x4e014)
+EVENT(PM_DISP_HELD_SYNC_HOLD, 0x4003c)
+EVENT(PM_DTLB_MISS_16G, 0x1c058)
+EVENT(PM_DERAT_MISS_2M, 0x1c05a)
+EVENT(PM_DTLB_MISS_2M, 0x1c05c)
+EVENT(PM_MRK_DTLB_MISS_1G, 0x1d15c)
+EVENT(PM_DTLB_MISS_4K, 0x2c056)
+EVENT(PM_DERAT_MISS_1G, 0x2c05a)
+EVENT(PM_MRK_DERAT_MISS_2M, 0x2d152)
+EVENT(PM_MRK_DTLB_MISS_4K, 0x2d156)
+EVENT(PM_MRK_DTLB_MISS_16G, 0x2d15e)
+EVENT(PM_DTLB_MISS_64K, 0x3c056)
+EVENT(PM_MRK_DERAT_MISS_1G, 0x3d152)
+EVENT(PM_MRK_DTLB_MISS_64K, 0x3d156)
+EVENT(PM_DTLB_MISS_16M, 0x4c056)
+EVENT(PM_DTLB_MISS_1G, 0x4c05a)
+EVENT(PM_MRK_DTLB_MISS_16M, 0x4c15e)
+
+/*
+ * Memory Access Events
+ *
+ * Primary PMU event used here is PM_MRK_INST_CMPL (0x401e0)
+ * To enable capturing of memory profiling, these MMCRA bits
+ * needs to be programmed and corresponding raw event format
+ * encoding.
+ *
+ * MMCRA bits encoding needed are
+ * SM (Sampling Mode)
+ * EM (Eligibility for Random Sampling)
+ * TECE (Threshold Event Counter Event)
+ * TS (Threshold Start Event)
+ * TE (Threshold End Event)
+ *
+ * Corresponding Raw Encoding bits:
+ * sample [EM,SM]
+ * thresh_sel (TECE)
+ * thresh start (TS)
+ * thresh end (TE)
+ */
+EVENT(MEM_LOADS, 0x34340401e0)
+EVENT(MEM_STORES, 0x343c0401e0)
diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c
new file mode 100644
index 000000000000..cb6a7dc02dd7
--- /dev/null
+++ b/arch/powerpc/perf/power9-pmu.c
@@ -0,0 +1,495 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Performance counter support for POWER9 processors.
+ *
+ * Copyright 2009 Paul Mackerras, IBM Corporation.
+ * Copyright 2013 Michael Ellerman, IBM Corporation.
+ * Copyright 2016 Madhavan Srinivasan, IBM Corporation.
+ */
+
+#define pr_fmt(fmt) "power9-pmu: " fmt
+
+#include "isa207-common.h"
+
+/*
+ * Raw event encoding for Power9:
+ *
+ * 60 56 52 48 44 40 36 32
+ * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
+ * | | [ ] [ ] [ thresh_cmp ] [ thresh_ctl ]
+ * | | | | |
+ * | | *- IFM (Linux) | thresh start/stop -*
+ * | *- BHRB (Linux) *sm
+ * *- EBB (Linux)
+ *
+ * 28 24 20 16 12 8 4 0
+ * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
+ * [ ] [ sample ] [cache] [ pmc ] [unit ] [] m [ pmcxsel ]
+ * | | | | |
+ * | | | | *- mark
+ * | | *- L1/L2/L3 cache_sel |
+ * | | |
+ * | *- sampling mode for marked events *- combine
+ * |
+ * *- thresh_sel
+ *
+ * Below uses IBM bit numbering.
+ *
+ * MMCR1[x:y] = unit (PMCxUNIT)
+ * MMCR1[24] = pmc1combine[0]
+ * MMCR1[25] = pmc1combine[1]
+ * MMCR1[26] = pmc2combine[0]
+ * MMCR1[27] = pmc2combine[1]
+ * MMCR1[28] = pmc3combine[0]
+ * MMCR1[29] = pmc3combine[1]
+ * MMCR1[30] = pmc4combine[0]
+ * MMCR1[31] = pmc4combine[1]
+ *
+ * if pmc == 3 and unit == 0 and pmcxsel[0:6] == 0b0101011
+ * MMCR1[20:27] = thresh_ctl
+ * else if pmc == 4 and unit == 0xf and pmcxsel[0:6] == 0b0101001
+ * MMCR1[20:27] = thresh_ctl
+ * else
+ * MMCRA[48:55] = thresh_ctl (THRESH START/END)
+ *
+ * if thresh_sel:
+ * MMCRA[45:47] = thresh_sel
+ *
+ * if thresh_cmp:
+ * MMCRA[9:11] = thresh_cmp[0:2]
+ * MMCRA[12:18] = thresh_cmp[3:9]
+ *
+ * MMCR1[16] = cache_sel[2]
+ * MMCR1[17] = cache_sel[3]
+ *
+ * if mark:
+ * MMCRA[63] = 1 (SAMPLE_ENABLE)
+ * MMCRA[57:59] = sample[0:2] (RAND_SAMP_ELIG)
+ * MMCRA[61:62] = sample[3:4] (RAND_SAMP_MODE)
+ *
+ * if EBB and BHRB:
+ * MMCRA[32:33] = IFM
+ *
+ * MMCRA[SDAR_MODE] = sm
+ */
+
+/*
+ * Some power9 event codes.
+ */
+#define EVENT(_name, _code) _name = _code,
+
+enum {
+#include "power9-events-list.h"
+};
+
+#undef EVENT
+
+/* MMCRA IFM bits - POWER9 */
+#define POWER9_MMCRA_IFM1 0x0000000040000000UL
+#define POWER9_MMCRA_IFM2 0x0000000080000000UL
+#define POWER9_MMCRA_IFM3 0x00000000C0000000UL
+#define POWER9_MMCRA_BHRB_MASK 0x00000000C0000000UL
+
+extern u64 PERF_REG_EXTENDED_MASK;
+
+/* Nasty Power9 specific hack */
+#define PVR_POWER9_CUMULUS 0x00002000
+
+/* PowerISA v2.07 format attribute structure*/
+extern const struct attribute_group isa207_pmu_format_group;
+
+static int p9_dd21_bl_ev[] = {
+ PM_MRK_ST_DONE_L2,
+ PM_RADIX_PWC_L1_HIT,
+ PM_FLOP_CMPL,
+ PM_MRK_NTF_FIN,
+ PM_RADIX_PWC_L2_HIT,
+ PM_IFETCH_THROTTLE,
+ PM_MRK_L2_TM_ST_ABORT_SISTER,
+ PM_RADIX_PWC_L3_HIT,
+ PM_RUN_CYC_SMT2_MODE,
+ PM_TM_TX_PASS_RUN_INST,
+ PM_DISP_HELD_SYNC_HOLD,
+};
+
+static int p9_dd22_bl_ev[] = {
+ PM_DTLB_MISS_16G,
+ PM_DERAT_MISS_2M,
+ PM_DTLB_MISS_2M,
+ PM_MRK_DTLB_MISS_1G,
+ PM_DTLB_MISS_4K,
+ PM_DERAT_MISS_1G,
+ PM_MRK_DERAT_MISS_2M,
+ PM_MRK_DTLB_MISS_4K,
+ PM_MRK_DTLB_MISS_16G,
+ PM_DTLB_MISS_64K,
+ PM_MRK_DERAT_MISS_1G,
+ PM_MRK_DTLB_MISS_64K,
+ PM_DISP_HELD_SYNC_HOLD,
+ PM_DTLB_MISS_16M,
+ PM_DTLB_MISS_1G,
+ PM_MRK_DTLB_MISS_16M,
+};
+
+/* Table of alternatives, sorted by column 0 */
+static const unsigned int power9_event_alternatives[][MAX_ALT] = {
+ { PM_BR_2PATH, PM_BR_2PATH_ALT },
+ { PM_INST_DISP, PM_INST_DISP_ALT },
+ { PM_RUN_CYC_ALT, PM_RUN_CYC },
+ { PM_LD_MISS_L1, PM_LD_MISS_L1_ALT },
+ { PM_RUN_INST_CMPL_ALT, PM_RUN_INST_CMPL },
+};
+
+static int power9_get_alternatives(u64 event, unsigned int flags, u64 alt[])
+{
+ int num_alt = 0;
+
+ num_alt = isa207_get_alternatives(event, alt,
+ ARRAY_SIZE(power9_event_alternatives), flags,
+ power9_event_alternatives);
+
+ return num_alt;
+}
+
+static int power9_check_attr_config(struct perf_event *ev)
+{
+ u64 val;
+ u64 event = ev->attr.config;
+
+ val = (event >> EVENT_SAMPLE_SHIFT) & EVENT_SAMPLE_MASK;
+ if (val == 0xC || isa3XX_check_attr_config(ev))
+ return -EINVAL;
+
+ return 0;
+}
+
+GENERIC_EVENT_ATTR(cpu-cycles, PM_CYC);
+GENERIC_EVENT_ATTR(stalled-cycles-frontend, PM_ICT_NOSLOT_CYC);
+GENERIC_EVENT_ATTR(stalled-cycles-backend, PM_CMPLU_STALL);
+GENERIC_EVENT_ATTR(instructions, PM_INST_CMPL);
+GENERIC_EVENT_ATTR(branch-instructions, PM_BR_CMPL);
+GENERIC_EVENT_ATTR(branch-misses, PM_BR_MPRED_CMPL);
+GENERIC_EVENT_ATTR(cache-references, PM_LD_REF_L1);
+GENERIC_EVENT_ATTR(cache-misses, PM_LD_MISS_L1_FIN);
+GENERIC_EVENT_ATTR(mem-loads, MEM_LOADS);
+GENERIC_EVENT_ATTR(mem-stores, MEM_STORES);
+
+CACHE_EVENT_ATTR(L1-dcache-load-misses, PM_LD_MISS_L1_FIN);
+CACHE_EVENT_ATTR(L1-dcache-loads, PM_LD_REF_L1);
+CACHE_EVENT_ATTR(L1-dcache-prefetches, PM_L1_PREF);
+CACHE_EVENT_ATTR(L1-dcache-store-misses, PM_ST_MISS_L1);
+CACHE_EVENT_ATTR(L1-icache-load-misses, PM_L1_ICACHE_MISS);
+CACHE_EVENT_ATTR(L1-icache-loads, PM_INST_FROM_L1);
+CACHE_EVENT_ATTR(L1-icache-prefetches, PM_IC_PREF_WRITE);
+CACHE_EVENT_ATTR(LLC-load-misses, PM_DATA_FROM_L3MISS);
+CACHE_EVENT_ATTR(LLC-loads, PM_DATA_FROM_L3);
+CACHE_EVENT_ATTR(LLC-prefetches, PM_L3_PREF_ALL);
+CACHE_EVENT_ATTR(branch-load-misses, PM_BR_MPRED_CMPL);
+CACHE_EVENT_ATTR(branch-loads, PM_BR_CMPL);
+CACHE_EVENT_ATTR(dTLB-load-misses, PM_DTLB_MISS);
+CACHE_EVENT_ATTR(iTLB-load-misses, PM_ITLB_MISS);
+
+static struct attribute *power9_events_attr[] = {
+ GENERIC_EVENT_PTR(PM_CYC),
+ GENERIC_EVENT_PTR(PM_ICT_NOSLOT_CYC),
+ GENERIC_EVENT_PTR(PM_CMPLU_STALL),
+ GENERIC_EVENT_PTR(PM_INST_CMPL),
+ GENERIC_EVENT_PTR(PM_BR_CMPL),
+ GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL),
+ GENERIC_EVENT_PTR(PM_LD_REF_L1),
+ GENERIC_EVENT_PTR(PM_LD_MISS_L1_FIN),
+ GENERIC_EVENT_PTR(MEM_LOADS),
+ GENERIC_EVENT_PTR(MEM_STORES),
+ CACHE_EVENT_PTR(PM_LD_MISS_L1_FIN),
+ CACHE_EVENT_PTR(PM_LD_REF_L1),
+ CACHE_EVENT_PTR(PM_L1_PREF),
+ CACHE_EVENT_PTR(PM_ST_MISS_L1),
+ CACHE_EVENT_PTR(PM_L1_ICACHE_MISS),
+ CACHE_EVENT_PTR(PM_INST_FROM_L1),
+ CACHE_EVENT_PTR(PM_IC_PREF_WRITE),
+ CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS),
+ CACHE_EVENT_PTR(PM_DATA_FROM_L3),
+ CACHE_EVENT_PTR(PM_L3_PREF_ALL),
+ CACHE_EVENT_PTR(PM_BR_MPRED_CMPL),
+ CACHE_EVENT_PTR(PM_BR_CMPL),
+ CACHE_EVENT_PTR(PM_DTLB_MISS),
+ CACHE_EVENT_PTR(PM_ITLB_MISS),
+ NULL
+};
+
+static const struct attribute_group power9_pmu_events_group = {
+ .name = "events",
+ .attrs = power9_events_attr,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-51");
+PMU_FORMAT_ATTR(pmcxsel, "config:0-7");
+PMU_FORMAT_ATTR(mark, "config:8");
+PMU_FORMAT_ATTR(combine, "config:10-11");
+PMU_FORMAT_ATTR(unit, "config:12-15");
+PMU_FORMAT_ATTR(pmc, "config:16-19");
+PMU_FORMAT_ATTR(cache_sel, "config:20-23");
+PMU_FORMAT_ATTR(sample_mode, "config:24-28");
+PMU_FORMAT_ATTR(thresh_sel, "config:29-31");
+PMU_FORMAT_ATTR(thresh_stop, "config:32-35");
+PMU_FORMAT_ATTR(thresh_start, "config:36-39");
+PMU_FORMAT_ATTR(thresh_cmp, "config:40-49");
+PMU_FORMAT_ATTR(sdar_mode, "config:50-51");
+
+static struct attribute *power9_pmu_format_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_pmcxsel.attr,
+ &format_attr_mark.attr,
+ &format_attr_combine.attr,
+ &format_attr_unit.attr,
+ &format_attr_pmc.attr,
+ &format_attr_cache_sel.attr,
+ &format_attr_sample_mode.attr,
+ &format_attr_thresh_sel.attr,
+ &format_attr_thresh_stop.attr,
+ &format_attr_thresh_start.attr,
+ &format_attr_thresh_cmp.attr,
+ &format_attr_sdar_mode.attr,
+ NULL,
+};
+
+static const struct attribute_group power9_pmu_format_group = {
+ .name = "format",
+ .attrs = power9_pmu_format_attr,
+};
+
+static struct attribute *power9_pmu_caps_attrs[] = {
+ NULL
+};
+
+static struct attribute_group power9_pmu_caps_group = {
+ .name = "caps",
+ .attrs = power9_pmu_caps_attrs,
+};
+
+static const struct attribute_group *power9_pmu_attr_groups[] = {
+ &power9_pmu_format_group,
+ &power9_pmu_events_group,
+ &power9_pmu_caps_group,
+ NULL,
+};
+
+static int power9_generic_events[] = {
+ [PERF_COUNT_HW_CPU_CYCLES] = PM_CYC,
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = PM_ICT_NOSLOT_CYC,
+ [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = PM_CMPLU_STALL,
+ [PERF_COUNT_HW_INSTRUCTIONS] = PM_INST_CMPL,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BR_CMPL,
+ [PERF_COUNT_HW_BRANCH_MISSES] = PM_BR_MPRED_CMPL,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = PM_LD_REF_L1,
+ [PERF_COUNT_HW_CACHE_MISSES] = PM_LD_MISS_L1_FIN,
+};
+
+static u64 power9_bhrb_filter_map(u64 branch_sample_type)
+{
+ u64 pmu_bhrb_filter = 0;
+
+ /* BHRB and regular PMU events share the same privilege state
+ * filter configuration. BHRB is always recorded along with a
+ * regular PMU event. As the privilege state filter is handled
+ * in the basic PMC configuration of the accompanying regular
+ * PMU event, we ignore any separate BHRB specific request.
+ */
+
+ /* No branch filter requested */
+ if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY)
+ return pmu_bhrb_filter;
+
+ /* Invalid branch filter options - HW does not support */
+ if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
+ return -1;
+
+ if (branch_sample_type & PERF_SAMPLE_BRANCH_IND_CALL)
+ return -1;
+
+ if (branch_sample_type & PERF_SAMPLE_BRANCH_CALL)
+ return -1;
+
+ if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_CALL) {
+ pmu_bhrb_filter |= POWER9_MMCRA_IFM1;
+ return pmu_bhrb_filter;
+ }
+
+ /* Every thing else is unsupported */
+ return -1;
+}
+
+static void power9_config_bhrb(u64 pmu_bhrb_filter)
+{
+ pmu_bhrb_filter &= POWER9_MMCRA_BHRB_MASK;
+
+ /* Enable BHRB filter in PMU */
+ mtspr(SPRN_MMCRA, (mfspr(SPRN_MMCRA) | pmu_bhrb_filter));
+}
+
+#define C(x) PERF_COUNT_HW_CACHE_##x
+
+/*
+ * Table of generalized cache-related events.
+ * 0 means not supported, -1 means nonsensical, other values
+ * are event codes.
+ */
+static u64 power9_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+ [ C(L1D) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = PM_LD_REF_L1,
+ [ C(RESULT_MISS) ] = PM_LD_MISS_L1_FIN,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = PM_ST_MISS_L1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = PM_L1_PREF,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(L1I) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = PM_INST_FROM_L1,
+ [ C(RESULT_MISS) ] = PM_L1_ICACHE_MISS,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = PM_L1_DEMAND_WRITE,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = PM_IC_PREF_WRITE,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(LL) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = PM_DATA_FROM_L3,
+ [ C(RESULT_MISS) ] = PM_DATA_FROM_L3MISS,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = PM_L3_PREF_ALL,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(DTLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = PM_DTLB_MISS,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(ITLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = PM_ITLB_MISS,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(BPU) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = PM_BR_CMPL,
+ [ C(RESULT_MISS) ] = PM_BR_MPRED_CMPL,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(NODE) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+};
+
+#undef C
+
+static struct power_pmu power9_pmu = {
+ .name = "POWER9",
+ .n_counter = MAX_PMU_COUNTERS,
+ .add_fields = ISA207_ADD_FIELDS,
+ .test_adder = ISA207_TEST_ADDER,
+ .group_constraint_mask = CNST_CACHE_PMC4_MASK,
+ .group_constraint_val = CNST_CACHE_PMC4_VAL,
+ .compute_mmcr = isa207_compute_mmcr,
+ .config_bhrb = power9_config_bhrb,
+ .bhrb_filter_map = power9_bhrb_filter_map,
+ .get_constraint = isa207_get_constraint,
+ .get_alternatives = power9_get_alternatives,
+ .get_mem_data_src = isa207_get_mem_data_src,
+ .get_mem_weight = isa207_get_mem_weight,
+ .disable_pmc = isa207_disable_pmc,
+ .flags = PPMU_HAS_SIER | PPMU_ARCH_207S,
+ .n_generic = ARRAY_SIZE(power9_generic_events),
+ .generic_events = power9_generic_events,
+ .cache_events = &power9_cache_events,
+ .attr_groups = power9_pmu_attr_groups,
+ .bhrb_nr = 32,
+ .capabilities = PERF_PMU_CAP_EXTENDED_REGS,
+ .check_attr_config = power9_check_attr_config,
+};
+
+int __init init_power9_pmu(void)
+{
+ int rc = 0;
+ unsigned int pvr = mfspr(SPRN_PVR);
+
+ if (PVR_VER(pvr) != PVR_POWER9)
+ return -ENODEV;
+
+ /* Blacklist events */
+ if (!(pvr & PVR_POWER9_CUMULUS)) {
+ if ((PVR_CFG(pvr) == 2) && (PVR_MIN(pvr) == 1)) {
+ power9_pmu.blacklist_ev = p9_dd21_bl_ev;
+ power9_pmu.n_blacklist_ev = ARRAY_SIZE(p9_dd21_bl_ev);
+ } else if ((PVR_CFG(pvr) == 2) && (PVR_MIN(pvr) == 2)) {
+ power9_pmu.blacklist_ev = p9_dd22_bl_ev;
+ power9_pmu.n_blacklist_ev = ARRAY_SIZE(p9_dd22_bl_ev);
+ }
+ }
+
+ /* Set the PERF_REG_EXTENDED_MASK here */
+ PERF_REG_EXTENDED_MASK = PERF_REG_PMU_MASK_300;
+
+ rc = register_power_pmu(&power9_pmu);
+ if (rc)
+ return rc;
+
+ /* Tell userspace that EBB is supported */
+ cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_EBB;
+
+ return 0;
+}
diff --git a/arch/powerpc/perf/ppc970-pmu.c b/arch/powerpc/perf/ppc970-pmu.c
index 8b6a8a36fa38..762676fb839e 100644
--- a/arch/powerpc/perf/ppc970-pmu.c
+++ b/arch/powerpc/perf/ppc970-pmu.c
@@ -1,18 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Performance counter support for PPC970-family processors.
*
* Copyright 2008-2009 Paul Mackerras, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
*/
#include <linux/string.h>
#include <linux/perf_event.h>
#include <asm/reg.h>
#include <asm/cputable.h>
+#include "internal.h"
+
/*
* Bits in event code for PPC970
*/
@@ -192,7 +190,7 @@ static unsigned long unit_cons[PM_LASTUNIT+1][2] = {
};
static int p970_get_constraint(u64 event, unsigned long *maskp,
- unsigned long *valp)
+ unsigned long *valp, u64 event_config1 __maybe_unused)
{
int pmc, byte, unit, sh, spcsel;
unsigned long mask = 0, value = 0;
@@ -257,7 +255,9 @@ static int p970_get_alternatives(u64 event, unsigned int flags, u64 alt[])
}
static int p970_compute_mmcr(u64 event[], int n_ev,
- unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[])
+ unsigned int hwc[], struct mmcr_regs *mmcr,
+ struct perf_event *pevents[],
+ u32 flags __maybe_unused)
{
unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0;
unsigned int pmc, unit, byte, psel;
@@ -397,27 +397,26 @@ static int p970_compute_mmcr(u64 event[], int n_ev,
mmcra |= 0x2000; /* mark only one IOP per PPC instruction */
/* Return MMCRx values */
- mmcr[0] = mmcr0;
- mmcr[1] = mmcr1;
- mmcr[2] = mmcra;
+ mmcr->mmcr0 = mmcr0;
+ mmcr->mmcr1 = mmcr1;
+ mmcr->mmcra = mmcra;
return 0;
}
-static void p970_disable_pmc(unsigned int pmc, unsigned long mmcr[])
+static void p970_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr)
{
- int shift, i;
+ int shift;
+ /*
+ * Setting the PMCxSEL field to 0x08 disables PMC x.
+ */
if (pmc <= 1) {
shift = MMCR0_PMC1SEL_SH - 7 * pmc;
- i = 0;
+ mmcr->mmcr0 = (mmcr->mmcr0 & ~(0x1fUL << shift)) | (0x08UL << shift);
} else {
shift = MMCR1_PMC3SEL_SH - 5 * (pmc - 2);
- i = 1;
+ mmcr->mmcr1 = (mmcr->mmcr1 & ~(0x1fUL << shift)) | (0x08UL << shift);
}
- /*
- * Setting the PMCxSEL field to 0x08 disables PMC x.
- */
- mmcr[i] = (mmcr[i] & ~(0x1fUL << shift)) | (0x08UL << shift);
}
static int ppc970_generic_events[] = {
@@ -436,7 +435,7 @@ static int ppc970_generic_events[] = {
* 0 means not supported, -1 means nonsensical, other values
* are event codes.
*/
-static int ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+static u64 ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
[C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */
[C(OP_READ)] = { 0x8810, 0x3810 },
[C(OP_WRITE)] = { 0x7810, 0x813 },
@@ -490,14 +489,13 @@ static struct power_pmu ppc970_pmu = {
.flags = PPMU_NO_SIPR | PPMU_NO_CONT_SAMPLING,
};
-static int __init init_ppc970_pmu(void)
+int __init init_ppc970_pmu(void)
{
- if (!cur_cpu_spec->oprofile_cpu_type ||
- (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/970")
- && strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/970MP")))
+ unsigned int pvr = mfspr(SPRN_PVR);
+
+ if (PVR_VER(pvr) != PVR_970 && PVR_VER(pvr) != PVR_970MP &&
+ PVR_VER(pvr) != PVR_970FX && PVR_VER(pvr) != PVR_970GX)
return -ENODEV;
return register_power_pmu(&ppc970_pmu);
}
-
-early_initcall(init_ppc970_pmu);
diff --git a/arch/powerpc/perf/req-gen/_begin.h b/arch/powerpc/perf/req-gen/_begin.h
new file mode 100644
index 000000000000..a200b86eba3b
--- /dev/null
+++ b/arch/powerpc/perf/req-gen/_begin.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Include paths to be used in interface defining headers */
+#ifndef POWERPC_PERF_REQ_GEN_H_
+#define POWERPC_PERF_REQ_GEN_H_
+
+#include <linux/stringify.h>
+
+#define CAT2_STR_(t, s) __stringify(t/s)
+#define CAT2_STR(t, s) CAT2_STR_(t, s)
+#define I(...) __VA_ARGS__
+
+#endif
+
+#define REQ_GEN_PREFIX req-gen
+#define REQUEST_BEGIN CAT2_STR(REQ_GEN_PREFIX, _request-begin.h)
+#define REQUEST_END CAT2_STR(REQ_GEN_PREFIX, _request-end.h)
diff --git a/arch/powerpc/perf/req-gen/_clear.h b/arch/powerpc/perf/req-gen/_clear.h
new file mode 100644
index 000000000000..67c3859157f3
--- /dev/null
+++ b/arch/powerpc/perf/req-gen/_clear.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#undef __field_
+#undef __count_
+#undef __array_
+#undef REQUEST_
diff --git a/arch/powerpc/perf/req-gen/_end.h b/arch/powerpc/perf/req-gen/_end.h
new file mode 100644
index 000000000000..8a406980b6bf
--- /dev/null
+++ b/arch/powerpc/perf/req-gen/_end.h
@@ -0,0 +1,4 @@
+
+#undef REQ_GEN_PREFIX
+#undef REQUEST_BEGIN
+#undef REQUEST_END
diff --git a/arch/powerpc/perf/req-gen/_request-begin.h b/arch/powerpc/perf/req-gen/_request-begin.h
new file mode 100644
index 000000000000..7c74c2ab4c0c
--- /dev/null
+++ b/arch/powerpc/perf/req-gen/_request-begin.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#define REQUEST(r_contents) \
+ REQUEST_(REQUEST_NAME, REQUEST_NUM, REQUEST_IDX_KIND, I(r_contents))
+
+#define __field(f_offset, f_bytes, f_name) \
+ __field_(REQUEST_NAME, REQUEST_NUM, REQUEST_IDX_KIND, \
+ f_offset, f_bytes, f_name)
+
+#define __array(f_offset, f_bytes, f_name) \
+ __array_(REQUEST_NAME, REQUEST_NUM, REQUEST_IDX_KIND, \
+ f_offset, f_bytes, f_name)
+
+#define __count(f_offset, f_bytes, f_name) \
+ __count_(REQUEST_NAME, REQUEST_NUM, REQUEST_IDX_KIND, \
+ f_offset, f_bytes, f_name)
diff --git a/arch/powerpc/perf/req-gen/_request-end.h b/arch/powerpc/perf/req-gen/_request-end.h
new file mode 100644
index 000000000000..7d9f4046c2ca
--- /dev/null
+++ b/arch/powerpc/perf/req-gen/_request-end.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef REQUEST
+#undef __field
+#undef __array
+#undef __count
+
+#undef REQUEST_NAME
+#undef REQUEST_NUM
+#undef REQUEST_IDX_KIND
diff --git a/arch/powerpc/perf/req-gen/perf.h b/arch/powerpc/perf/req-gen/perf.h
new file mode 100644
index 000000000000..6b2a59fefffa
--- /dev/null
+++ b/arch/powerpc/perf/req-gen/perf.h
@@ -0,0 +1,177 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef LINUX_POWERPC_PERF_REQ_GEN_PERF_H_
+#define LINUX_POWERPC_PERF_REQ_GEN_PERF_H_
+
+#include <linux/perf_event.h>
+#include <linux/stringify.h>
+
+#ifndef REQUEST_FILE
+#error "REQUEST_FILE must be defined before including"
+#endif
+
+#ifndef NAME_LOWER
+#error "NAME_LOWER must be defined before including"
+#endif
+
+#ifndef NAME_UPPER
+#error "NAME_UPPER must be defined before including"
+#endif
+
+#define BE_TYPE_b1 __u8
+#define BE_TYPE_b2 __be16
+#define BE_TYPE_b4 __be32
+#define BE_TYPE_b8 __be64
+
+#define BYTES_TO_BE_TYPE(bytes) \
+ BE_TYPE_b##bytes
+
+#define CAT2_(a, b) a ## b
+#define CAT2(a, b) CAT2_(a, b)
+#define CAT3_(a, b, c) a ## b ## c
+#define CAT3(a, b, c) CAT3_(a, b, c)
+
+/*
+ * enumerate the request values as
+ * <NAME_UPPER>_<request name> = <request value>
+ */
+#define REQUEST_VALUE__(name_upper, r_name) name_upper ## _ ## r_name
+#define REQUEST_VALUE_(name_upper, r_name) REQUEST_VALUE__(name_upper, r_name)
+#define REQUEST_VALUE(r_name) REQUEST_VALUE_(NAME_UPPER, r_name)
+
+#include "_clear.h"
+#define REQUEST_(r_name, r_value, r_idx_1, r_fields) \
+ REQUEST_VALUE(r_name) = r_value,
+enum CAT2(NAME_LOWER, _requests) {
+#include REQUEST_FILE
+};
+
+/*
+ * For each request:
+ * struct <NAME_LOWER>_<request name> {
+ * r_fields
+ * };
+ */
+#include "_clear.h"
+#define STRUCT_NAME__(name_lower, r_name) name_lower ## _ ## r_name
+#define STRUCT_NAME_(name_lower, r_name) STRUCT_NAME__(name_lower, r_name)
+#define STRUCT_NAME(r_name) STRUCT_NAME_(NAME_LOWER, r_name)
+#define REQUEST_(r_name, r_value, r_idx_1, r_fields) \
+struct STRUCT_NAME(r_name) { \
+ r_fields \
+};
+#define __field_(r_name, r_value, r_idx_1, f_offset, f_bytes, f_name) \
+ BYTES_TO_BE_TYPE(f_bytes) f_name;
+#define __count_(r_name, r_value, r_idx_1, f_offset, f_bytes, f_name) \
+ __field_(r_name, r_value, r_idx_1, f_offset, f_bytes, f_name)
+#define __array_(r_name, r_value, r_idx_1, a_offset, a_bytes, a_name) \
+ __u8 a_name[a_bytes];
+
+#include REQUEST_FILE
+
+/*
+ * Generate a check of the field offsets
+ * <NAME_LOWER>_assert_offsets_correct()
+ */
+#include "_clear.h"
+#define REQUEST_(r_name, r_value, index, r_fields) \
+r_fields
+#define __field_(r_name, r_value, r_idx_1, f_offset, f_size, f_name) \
+ BUILD_BUG_ON(offsetof(struct STRUCT_NAME(r_name), f_name) != f_offset);
+#define __count_(r_name, r_value, r_idx_1, c_offset, c_size, c_name) \
+ __field_(r_name, r_value, r_idx_1, c_offset, c_size, c_name)
+#define __array_(r_name, r_value, r_idx_1, a_offset, a_size, a_name) \
+ __field_(r_name, r_value, r_idx_1, a_offset, a_size, a_name)
+
+static inline void CAT2(NAME_LOWER, _assert_offsets_correct)(void)
+{
+#include REQUEST_FILE
+}
+
+/*
+ * Generate event attributes:
+ * PMU_EVENT_ATTR_STRING(<request name>_<field name>,
+ * <NAME_LOWER>_event_attr_<request name>_<field name>,
+ * "request=<request value>"
+ * "starting_index=<starting index type>"
+ * "counter_info_version=CURRENT_COUNTER_INFO_VERSION"
+ * "length=<f_size>"
+ * "offset=<f_offset>")
+ *
+ * TODO: counter_info_version may need to vary, we should interperate the
+ * value to some extent
+ */
+#define EVENT_ATTR_NAME__(name, r_name, c_name) \
+ name ## _event_attr_ ## r_name ## _ ## c_name
+#define EVENT_ATTR_NAME_(name, r_name, c_name) \
+ EVENT_ATTR_NAME__(name, r_name, c_name)
+#define EVENT_ATTR_NAME(r_name, c_name) \
+ EVENT_ATTR_NAME_(NAME_LOWER, r_name, c_name)
+
+#include "_clear.h"
+#define __field_(r_name, r_value, r_idx_1, f_offset, f_size, f_name)
+#define __array_(r_name, r_value, r_idx_1, a_offset, a_size, a_name)
+#define __count_(r_name, r_value, r_idx_1, c_offset, c_size, c_name) \
+PMU_EVENT_ATTR_STRING( \
+ CAT3(r_name, _, c_name), \
+ EVENT_ATTR_NAME(r_name, c_name), \
+ "request=" __stringify(r_value) "," \
+ r_idx_1 "," \
+ "counter_info_version=" \
+ __stringify(COUNTER_INFO_VERSION_CURRENT) "," \
+ "length=" #c_size "," \
+ "offset=" #c_offset)
+#define REQUEST_(r_name, r_value, r_idx_1, r_fields) \
+ r_fields
+
+#include REQUEST_FILE
+
+/*
+ * Define event attribute array
+ * static struct attribute *hv_gpci_event_attrs[] = {
+ * &<NAME_LOWER>_event_attr_<request name>_<field name>.attr,
+ * };
+ */
+#include "_clear.h"
+#define __field_(r_name, r_value, r_idx_1, f_offset, f_size, f_name)
+#define __count_(r_name, r_value, r_idx_1, c_offset, c_size, c_name) \
+ &EVENT_ATTR_NAME(r_name, c_name).attr.attr,
+#define __array_(r_name, r_value, r_idx_1, a_offset, a_size, a_name)
+#define REQUEST_(r_name, r_value, r_idx_1, r_fields) \
+ r_fields
+
+/* Generate event list for platforms with counter_info_version 0x6 or below */
+static __maybe_unused struct attribute *hv_gpci_event_attrs_v6[] = {
+#include REQUEST_FILE
+ NULL
+};
+
+/*
+ * Based on getPerfCountInfo v1.018 documentation, some of the hv-gpci
+ * events were deprecated for platform firmware that supports
+ * counter_info_version 0x8 or above.
+ * Those deprecated events are still part of platform firmware that
+ * support counter_info_version 0x6 and below. As per the getPerfCountInfo
+ * v1.018 documentation there is no counter_info_version 0x7.
+ * Undefining macro ENABLE_EVENTS_COUNTERINFO_V6, to disable the addition of
+ * deprecated events in "hv_gpci_event_attrs" attribute group, for platforms
+ * that supports counter_info_version 0x8 or above.
+ */
+#undef ENABLE_EVENTS_COUNTERINFO_V6
+
+/* Generate event list for platforms with counter_info_version 0x8 or above*/
+static __maybe_unused struct attribute *hv_gpci_event_attrs[] = {
+#include REQUEST_FILE
+ NULL
+};
+
+/* cleanup */
+#include "_clear.h"
+#undef EVENT_ATTR_NAME
+#undef EVENT_ATTR_NAME_
+#undef BIT_NAME
+#undef BIT_NAME_
+#undef STRUCT_NAME
+#undef REQUEST_VALUE
+#undef REQUEST_VALUE_
+
+#endif
diff --git a/arch/powerpc/perf/vpa-dtl.c b/arch/powerpc/perf/vpa-dtl.c
new file mode 100644
index 000000000000..3c1d1c28deb9
--- /dev/null
+++ b/arch/powerpc/perf/vpa-dtl.c
@@ -0,0 +1,596 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Perf interface to expose Dispatch Trace Log counters.
+ *
+ * Copyright (C) 2024 Kajol Jain, IBM Corporation
+ */
+
+#ifdef CONFIG_PPC_SPLPAR
+#define pr_fmt(fmt) "vpa_dtl: " fmt
+
+#include <asm/dtl.h>
+#include <linux/perf_event.h>
+#include <asm/plpar_wrappers.h>
+#include <linux/vmalloc.h>
+
+#define EVENT(_name, _code) enum{_name = _code}
+
+/*
+ * Based on Power Architecture Platform Reference(PAPR) documentation,
+ * Table 14.14. Per Virtual Processor Area, below Dispatch Trace Log(DTL)
+ * Enable Mask used to get corresponding virtual processor dispatch
+ * to preempt traces:
+ * DTL_CEDE(0x1): Trace voluntary (OS initiated) virtual
+ * processor waits
+ * DTL_PREEMPT(0x2): Trace time slice preempts
+ * DTL_FAULT(0x4): Trace virtual partition memory page
+ faults.
+ * DTL_ALL(0x7): Trace all (DTL_CEDE | DTL_PREEMPT | DTL_FAULT)
+ *
+ * Event codes based on Dispatch Trace Log Enable Mask.
+ */
+EVENT(DTL_CEDE, 0x1);
+EVENT(DTL_PREEMPT, 0x2);
+EVENT(DTL_FAULT, 0x4);
+EVENT(DTL_ALL, 0x7);
+
+GENERIC_EVENT_ATTR(dtl_cede, DTL_CEDE);
+GENERIC_EVENT_ATTR(dtl_preempt, DTL_PREEMPT);
+GENERIC_EVENT_ATTR(dtl_fault, DTL_FAULT);
+GENERIC_EVENT_ATTR(dtl_all, DTL_ALL);
+
+PMU_FORMAT_ATTR(event, "config:0-7");
+
+static struct attribute *events_attr[] = {
+ GENERIC_EVENT_PTR(DTL_CEDE),
+ GENERIC_EVENT_PTR(DTL_PREEMPT),
+ GENERIC_EVENT_PTR(DTL_FAULT),
+ GENERIC_EVENT_PTR(DTL_ALL),
+ NULL
+};
+
+static struct attribute_group event_group = {
+ .name = "events",
+ .attrs = events_attr,
+};
+
+static struct attribute *format_attrs[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static const struct attribute_group format_group = {
+ .name = "format",
+ .attrs = format_attrs,
+};
+
+static const struct attribute_group *attr_groups[] = {
+ &format_group,
+ &event_group,
+ NULL,
+};
+
+struct vpa_dtl {
+ struct dtl_entry *buf;
+ u64 last_idx;
+};
+
+struct vpa_pmu_ctx {
+ struct perf_output_handle handle;
+};
+
+struct vpa_pmu_buf {
+ int nr_pages;
+ bool snapshot;
+ u64 *base;
+ u64 size;
+ u64 head;
+ u64 head_size;
+ /* boot timebase and frequency needs to be saved only at once */
+ int boottb_freq_saved;
+ u64 threshold;
+ bool full;
+};
+
+/*
+ * To corelate each DTL entry with other events across CPU's,
+ * we need to map timebase from "struct dtl_entry" which phyp
+ * provides with boot timebase. This also needs timebase frequency.
+ * Formula is: ((timbase from DTL entry - boot time) / frequency)
+ *
+ * To match with size of "struct dtl_entry" to ease post processing,
+ * padded 24 bytes to the structure.
+ */
+struct boottb_freq {
+ u64 boot_tb;
+ u64 tb_freq;
+ u64 timebase;
+ u64 padded[3];
+};
+
+static DEFINE_PER_CPU(struct vpa_pmu_ctx, vpa_pmu_ctx);
+static DEFINE_PER_CPU(struct vpa_dtl, vpa_dtl_cpu);
+
+/* variable to capture reference count for the active dtl threads */
+static int dtl_global_refc;
+static spinlock_t dtl_global_lock = __SPIN_LOCK_UNLOCKED(dtl_global_lock);
+
+/*
+ * Capture DTL data in AUX buffer
+ */
+static void vpa_dtl_capture_aux(long *n_entries, struct vpa_pmu_buf *buf,
+ struct vpa_dtl *dtl, int index)
+{
+ struct dtl_entry *aux_copy_buf = (struct dtl_entry *)buf->base;
+
+ /*
+ * check if there is enough space to contain the
+ * DTL data. If not, save the data for available
+ * memory and set full to true.
+ */
+ if (buf->head + *n_entries >= buf->threshold) {
+ *n_entries = buf->threshold - buf->head;
+ buf->full = 1;
+ }
+
+ /*
+ * Copy to AUX buffer from per-thread address
+ */
+ memcpy(aux_copy_buf + buf->head, &dtl->buf[index], *n_entries * sizeof(struct dtl_entry));
+
+ if (buf->full) {
+ /*
+ * Set head of private aux to zero when buffer is full
+ * so that next data will be copied to beginning of the
+ * buffer
+ */
+ buf->head = 0;
+ return;
+ }
+
+ buf->head += *n_entries;
+
+ return;
+}
+
+/*
+ * Function to dump the dispatch trace log buffer data to the
+ * perf data.
+ *
+ * perf_aux_output_begin: This function is called before writing
+ * to AUX area. This returns the pointer to aux area private structure,
+ * ie "struct vpa_pmu_buf" here which is set in setup_aux() function.
+ * The function obtains the output handle (used in perf_aux_output_end).
+ * when capture completes in vpa_dtl_capture_aux(), call perf_aux_output_end()
+ * to commit the recorded data.
+ *
+ * perf_aux_output_end: This function commits data by adjusting the
+ * aux_head of "struct perf_buffer". aux_tail will be moved in perf tools
+ * side when writing the data from aux buffer to perf.data file in disk.
+ *
+ * Here in the private aux structure, we maintain head to know where
+ * to copy data next time in the PMU driver. vpa_pmu_buf->head is moved to
+ * maintain the aux head for PMU driver. It is responsiblity of PMU
+ * driver to make sure data is copied between perf_aux_output_begin and
+ * perf_aux_output_end.
+ *
+ * After data is copied in vpa_dtl_capture_aux() function, perf_aux_output_end()
+ * is called to move the aux->head of "struct perf_buffer" to indicate size of
+ * data in aux buffer. This will post a PERF_RECORD_AUX into the perf buffer.
+ * Data will be written to disk only when the allocated buffer is full.
+ *
+ * By this approach, all the DTL data will be present as-is in the
+ * perf.data. The data will be pre-processed in perf tools side when doing
+ * perf report/perf script and this will avoid time taken to create samples
+ * in the kernel space.
+ */
+static void vpa_dtl_dump_sample_data(struct perf_event *event)
+{
+ u64 cur_idx, last_idx, i;
+ u64 boot_tb;
+ struct boottb_freq boottb_freq;
+
+ /* actual number of entries read */
+ long n_read = 0, read_size = 0;
+
+ /* number of entries added to dtl buffer */
+ long n_req;
+
+ struct vpa_pmu_ctx *vpa_ctx = this_cpu_ptr(&vpa_pmu_ctx);
+
+ struct vpa_pmu_buf *aux_buf;
+
+ struct vpa_dtl *dtl = &per_cpu(vpa_dtl_cpu, event->cpu);
+ u64 size;
+
+ cur_idx = be64_to_cpu(lppaca_of(event->cpu).dtl_idx);
+ last_idx = dtl->last_idx;
+
+ if (last_idx + N_DISPATCH_LOG <= cur_idx)
+ last_idx = cur_idx - N_DISPATCH_LOG + 1;
+
+ n_req = cur_idx - last_idx;
+
+ /* no new entry added to the buffer, return */
+ if (n_req <= 0)
+ return;
+
+ dtl->last_idx = last_idx + n_req;
+ boot_tb = get_boot_tb();
+
+ i = last_idx % N_DISPATCH_LOG;
+
+ aux_buf = perf_aux_output_begin(&vpa_ctx->handle, event);
+ if (!aux_buf) {
+ pr_debug("returning. no aux\n");
+ return;
+ }
+
+ if (!aux_buf->boottb_freq_saved) {
+ pr_debug("Copying boot tb to aux buffer: %lld\n", boot_tb);
+ /* Save boot_tb to convert raw timebase to it's relative system boot time */
+ boottb_freq.boot_tb = boot_tb;
+ /* Save tb_ticks_per_sec to convert timebase to sec */
+ boottb_freq.tb_freq = tb_ticks_per_sec;
+ boottb_freq.timebase = 0;
+ memcpy(aux_buf->base, &boottb_freq, sizeof(boottb_freq));
+ aux_buf->head += 1;
+ aux_buf->boottb_freq_saved = 1;
+ n_read += 1;
+ }
+
+ /* read the tail of the buffer if we've wrapped */
+ if (i + n_req > N_DISPATCH_LOG) {
+ read_size = N_DISPATCH_LOG - i;
+ vpa_dtl_capture_aux(&read_size, aux_buf, dtl, i);
+ n_req -= read_size;
+ n_read += read_size;
+ i = 0;
+ if (aux_buf->full) {
+ size = (n_read * sizeof(struct dtl_entry));
+ if ((size + aux_buf->head_size) > aux_buf->size) {
+ size = aux_buf->size - aux_buf->head_size;
+ perf_aux_output_end(&vpa_ctx->handle, size);
+ aux_buf->head = 0;
+ aux_buf->head_size = 0;
+ } else {
+ aux_buf->head_size += (n_read * sizeof(struct dtl_entry));
+ perf_aux_output_end(&vpa_ctx->handle, n_read * sizeof(struct dtl_entry));
+ }
+ goto out;
+ }
+ }
+
+ /* .. and now the head */
+ vpa_dtl_capture_aux(&n_req, aux_buf, dtl, i);
+
+ size = ((n_req + n_read) * sizeof(struct dtl_entry));
+ if ((size + aux_buf->head_size) > aux_buf->size) {
+ size = aux_buf->size - aux_buf->head_size;
+ perf_aux_output_end(&vpa_ctx->handle, size);
+ aux_buf->head = 0;
+ aux_buf->head_size = 0;
+ } else {
+ aux_buf->head_size += ((n_req + n_read) * sizeof(struct dtl_entry));
+ /* Move the aux->head to indicate size of data in aux buffer */
+ perf_aux_output_end(&vpa_ctx->handle, (n_req + n_read) * sizeof(struct dtl_entry));
+ }
+out:
+ aux_buf->full = 0;
+}
+
+/*
+ * The VPA Dispatch Trace log counters do not interrupt on overflow.
+ * Therefore, the kernel needs to poll the counters to avoid missing
+ * an overflow using hrtimer. The timer interval is based on sample_period
+ * count provided by user, and minimum interval is 1 millisecond.
+ */
+static enum hrtimer_restart vpa_dtl_hrtimer_handle(struct hrtimer *hrtimer)
+{
+ struct perf_event *event;
+ u64 period;
+
+ event = container_of(hrtimer, struct perf_event, hw.hrtimer);
+
+ if (event->state != PERF_EVENT_STATE_ACTIVE)
+ return HRTIMER_NORESTART;
+
+ vpa_dtl_dump_sample_data(event);
+ period = max_t(u64, NSEC_PER_MSEC, event->hw.sample_period);
+ hrtimer_forward_now(hrtimer, ns_to_ktime(period));
+
+ return HRTIMER_RESTART;
+}
+
+static void vpa_dtl_start_hrtimer(struct perf_event *event)
+{
+ u64 period;
+ struct hw_perf_event *hwc = &event->hw;
+
+ period = max_t(u64, NSEC_PER_MSEC, hwc->sample_period);
+ hrtimer_start(&hwc->hrtimer, ns_to_ktime(period), HRTIMER_MODE_REL_PINNED);
+}
+
+static void vpa_dtl_stop_hrtimer(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ hrtimer_cancel(&hwc->hrtimer);
+}
+
+static void vpa_dtl_reset_global_refc(struct perf_event *event)
+{
+ spin_lock(&dtl_global_lock);
+ dtl_global_refc--;
+ if (dtl_global_refc <= 0) {
+ dtl_global_refc = 0;
+ up_write(&dtl_access_lock);
+ }
+ spin_unlock(&dtl_global_lock);
+}
+
+static int vpa_dtl_mem_alloc(int cpu)
+{
+ struct vpa_dtl *dtl = &per_cpu(vpa_dtl_cpu, cpu);
+ struct dtl_entry *buf = NULL;
+
+ /* Check for dispatch trace log buffer cache */
+ if (!dtl_cache)
+ return -ENOMEM;
+
+ buf = kmem_cache_alloc_node(dtl_cache, GFP_KERNEL | GFP_ATOMIC, cpu_to_node(cpu));
+ if (!buf) {
+ pr_warn("buffer allocation failed for cpu %d\n", cpu);
+ return -ENOMEM;
+ }
+ dtl->buf = buf;
+ return 0;
+}
+
+static int vpa_dtl_event_init(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+
+ /* test the event attr type for PMU enumeration */
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ if (!perfmon_capable())
+ return -EACCES;
+
+ /* Return if this is a counting event */
+ if (!is_sampling_event(event))
+ return -EOPNOTSUPP;
+
+ /* no branch sampling */
+ if (has_branch_stack(event))
+ return -EOPNOTSUPP;
+
+ /* Invalid eventcode */
+ switch (event->attr.config) {
+ case DTL_LOG_CEDE:
+ case DTL_LOG_PREEMPT:
+ case DTL_LOG_FAULT:
+ case DTL_LOG_ALL:
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ spin_lock(&dtl_global_lock);
+
+ /*
+ * To ensure there are no other conflicting dtl users
+ * (example: /proc/powerpc/vcpudispatch_stats or debugfs dtl),
+ * below code try to take the dtl_access_lock.
+ * The dtl_access_lock is a rwlock defined in dtl.h, which is used
+ * to unsure there is no conflicting dtl users.
+ * Based on below code, vpa_dtl pmu tries to take write access lock
+ * and also checks for dtl_global_refc, to make sure that the
+ * dtl_access_lock is taken by vpa_dtl pmu interface.
+ */
+ if (dtl_global_refc == 0 && !down_write_trylock(&dtl_access_lock)) {
+ spin_unlock(&dtl_global_lock);
+ return -EBUSY;
+ }
+
+ /* Allocate dtl buffer memory */
+ if (vpa_dtl_mem_alloc(event->cpu)) {
+ spin_unlock(&dtl_global_lock);
+ return -ENOMEM;
+ }
+
+ /*
+ * Increment the number of active vpa_dtl pmu threads. The
+ * dtl_global_refc is used to keep count of cpu threads that
+ * currently capturing dtl data using vpa_dtl pmu interface.
+ */
+ dtl_global_refc++;
+
+ spin_unlock(&dtl_global_lock);
+
+ hrtimer_setup(&hwc->hrtimer, vpa_dtl_hrtimer_handle, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+
+ /*
+ * Since hrtimers have a fixed rate, we can do a static freq->period
+ * mapping and avoid the whole period adjust feedback stuff.
+ */
+ if (event->attr.freq) {
+ long freq = event->attr.sample_freq;
+
+ event->attr.sample_period = NSEC_PER_SEC / freq;
+ hwc->sample_period = event->attr.sample_period;
+ local64_set(&hwc->period_left, hwc->sample_period);
+ hwc->last_period = hwc->sample_period;
+ event->attr.freq = 0;
+ }
+
+ event->destroy = vpa_dtl_reset_global_refc;
+ return 0;
+}
+
+static int vpa_dtl_event_add(struct perf_event *event, int flags)
+{
+ int ret, hwcpu;
+ unsigned long addr;
+ struct vpa_dtl *dtl = &per_cpu(vpa_dtl_cpu, event->cpu);
+
+ /*
+ * Register our dtl buffer with the hypervisor. The
+ * HV expects the buffer size to be passed in the second
+ * word of the buffer. Refer section '14.11.3.2. H_REGISTER_VPA'
+ * from PAPR for more information.
+ */
+ ((u32 *)dtl->buf)[1] = cpu_to_be32(DISPATCH_LOG_BYTES);
+ dtl->last_idx = 0;
+
+ hwcpu = get_hard_smp_processor_id(event->cpu);
+ addr = __pa(dtl->buf);
+
+ ret = register_dtl(hwcpu, addr);
+ if (ret) {
+ pr_warn("DTL registration for cpu %d (hw %d) failed with %d\n",
+ event->cpu, hwcpu, ret);
+ return ret;
+ }
+
+ /* set our initial buffer indices */
+ lppaca_of(event->cpu).dtl_idx = 0;
+
+ /*
+ * Ensure that our updates to the lppaca fields have
+ * occurred before we actually enable the logging
+ */
+ smp_wmb();
+
+ /* enable event logging */
+ lppaca_of(event->cpu).dtl_enable_mask = event->attr.config;
+
+ vpa_dtl_start_hrtimer(event);
+
+ return 0;
+}
+
+static void vpa_dtl_event_del(struct perf_event *event, int flags)
+{
+ int hwcpu = get_hard_smp_processor_id(event->cpu);
+ struct vpa_dtl *dtl = &per_cpu(vpa_dtl_cpu, event->cpu);
+
+ vpa_dtl_stop_hrtimer(event);
+ unregister_dtl(hwcpu);
+ kmem_cache_free(dtl_cache, dtl->buf);
+ dtl->buf = NULL;
+ lppaca_of(event->cpu).dtl_enable_mask = 0x0;
+}
+
+/*
+ * This function definition is empty as vpa_dtl_dump_sample_data
+ * is used to parse and dump the dispatch trace log data,
+ * to perf data.
+ */
+static void vpa_dtl_event_read(struct perf_event *event)
+{
+}
+
+/*
+ * Set up pmu-private data structures for an AUX area
+ * **pages contains the aux buffer allocated for this event
+ * for the corresponding cpu. rb_alloc_aux uses "alloc_pages_node"
+ * and returns pointer to each page address. Map these pages to
+ * contiguous space using vmap and use that as base address.
+ *
+ * The aux private data structure ie, "struct vpa_pmu_buf" mainly
+ * saves
+ * - buf->base: aux buffer base address
+ * - buf->head: offset from base address where data will be written to.
+ * - buf->size: Size of allocated memory
+ */
+static void *vpa_dtl_setup_aux(struct perf_event *event, void **pages,
+ int nr_pages, bool snapshot)
+{
+ int i, cpu = event->cpu;
+ struct vpa_pmu_buf *buf __free(kfree) = NULL;
+ struct page **pglist __free(kfree) = NULL;
+
+ /* We need at least one page for this to work. */
+ if (!nr_pages)
+ return NULL;
+
+ if (cpu == -1)
+ cpu = raw_smp_processor_id();
+
+ buf = kzalloc_node(sizeof(*buf), GFP_KERNEL, cpu_to_node(cpu));
+ if (!buf)
+ return NULL;
+
+ pglist = kcalloc(nr_pages, sizeof(*pglist), GFP_KERNEL);
+ if (!pglist)
+ return NULL;
+
+ for (i = 0; i < nr_pages; ++i)
+ pglist[i] = virt_to_page(pages[i]);
+
+ buf->base = vmap(pglist, nr_pages, VM_MAP, PAGE_KERNEL);
+ if (!buf->base)
+ return NULL;
+
+ buf->nr_pages = nr_pages;
+ buf->snapshot = false;
+
+ buf->size = nr_pages << PAGE_SHIFT;
+ buf->head = 0;
+ buf->head_size = 0;
+ buf->boottb_freq_saved = 0;
+ buf->threshold = ((buf->size - 32) / sizeof(struct dtl_entry));
+ return no_free_ptr(buf);
+}
+
+/*
+ * free pmu-private AUX data structures
+ */
+static void vpa_dtl_free_aux(void *aux)
+{
+ struct vpa_pmu_buf *buf = aux;
+
+ vunmap(buf->base);
+ kfree(buf);
+}
+
+static struct pmu vpa_dtl_pmu = {
+ .task_ctx_nr = perf_invalid_context,
+
+ .name = "vpa_dtl",
+ .attr_groups = attr_groups,
+ .event_init = vpa_dtl_event_init,
+ .add = vpa_dtl_event_add,
+ .del = vpa_dtl_event_del,
+ .read = vpa_dtl_event_read,
+ .setup_aux = vpa_dtl_setup_aux,
+ .free_aux = vpa_dtl_free_aux,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_EXCLUSIVE,
+};
+
+static int vpa_dtl_init(void)
+{
+ int r;
+
+ if (!firmware_has_feature(FW_FEATURE_SPLPAR)) {
+ pr_debug("not a shared virtualized system, not enabling\n");
+ return -ENODEV;
+ }
+
+ /* This driver is intended only for L1 host. */
+ if (is_kvm_guest()) {
+ pr_debug("Only supported for L1 host system\n");
+ return -ENODEV;
+ }
+
+ r = perf_pmu_register(&vpa_dtl_pmu, vpa_dtl_pmu.name, -1);
+ if (r)
+ return r;
+
+ return 0;
+}
+
+device_initcall(vpa_dtl_init);
+#endif //CONFIG_PPC_SPLPAR
diff --git a/arch/powerpc/perf/vpa-pmu.c b/arch/powerpc/perf/vpa-pmu.c
new file mode 100644
index 000000000000..840733468959
--- /dev/null
+++ b/arch/powerpc/perf/vpa-pmu.c
@@ -0,0 +1,204 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Performance monitoring support for Virtual Processor Area(VPA) based counters
+ *
+ * Copyright (C) 2024 IBM Corporation
+ */
+#define pr_fmt(fmt) "vpa_pmu: " fmt
+
+#include <linux/module.h>
+#include <linux/perf_event.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s_64.h>
+
+#define MODULE_VERS "1.0"
+#define MODULE_NAME "pseries_vpa_pmu"
+
+#define EVENT(_name, _code) enum{_name = _code}
+
+#define VPA_PMU_EVENT_VAR(_id) event_attr_##_id
+#define VPA_PMU_EVENT_PTR(_id) (&event_attr_##_id.attr.attr)
+
+static ssize_t vpa_pmu_events_sysfs_show(struct device *dev,
+ struct device_attribute *attr, char *page)
+{
+ struct perf_pmu_events_attr *pmu_attr;
+
+ pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+
+ return sprintf(page, "event=0x%02llx\n", pmu_attr->id);
+}
+
+#define VPA_PMU_EVENT_ATTR(_name, _id) \
+ PMU_EVENT_ATTR(_name, VPA_PMU_EVENT_VAR(_id), _id, \
+ vpa_pmu_events_sysfs_show)
+
+EVENT(L1_TO_L2_CS_LAT, 0x1);
+EVENT(L2_TO_L1_CS_LAT, 0x2);
+EVENT(L2_RUNTIME_AGG, 0x3);
+
+VPA_PMU_EVENT_ATTR(l1_to_l2_lat, L1_TO_L2_CS_LAT);
+VPA_PMU_EVENT_ATTR(l2_to_l1_lat, L2_TO_L1_CS_LAT);
+VPA_PMU_EVENT_ATTR(l2_runtime_agg, L2_RUNTIME_AGG);
+
+static struct attribute *vpa_pmu_events_attr[] = {
+ VPA_PMU_EVENT_PTR(L1_TO_L2_CS_LAT),
+ VPA_PMU_EVENT_PTR(L2_TO_L1_CS_LAT),
+ VPA_PMU_EVENT_PTR(L2_RUNTIME_AGG),
+ NULL
+};
+
+static const struct attribute_group vpa_pmu_events_group = {
+ .name = "events",
+ .attrs = vpa_pmu_events_attr,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-31");
+static struct attribute *vpa_pmu_format_attr[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group vpa_pmu_format_group = {
+ .name = "format",
+ .attrs = vpa_pmu_format_attr,
+};
+
+static const struct attribute_group *vpa_pmu_attr_groups[] = {
+ &vpa_pmu_events_group,
+ &vpa_pmu_format_group,
+ NULL
+};
+
+static int vpa_pmu_event_init(struct perf_event *event)
+{
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ /* it does not support event sampling mode */
+ if (is_sampling_event(event))
+ return -EOPNOTSUPP;
+
+ /* no branch sampling */
+ if (has_branch_stack(event))
+ return -EOPNOTSUPP;
+
+ /* Invalid event code */
+ if ((event->attr.config <= 0) || (event->attr.config > 3))
+ return -EINVAL;
+
+ return 0;
+}
+
+static unsigned long get_counter_data(struct perf_event *event)
+{
+ unsigned int config = event->attr.config;
+ u64 data;
+
+ switch (config) {
+ case L1_TO_L2_CS_LAT:
+ if (event->attach_state & PERF_ATTACH_TASK)
+ data = kvmhv_get_l1_to_l2_cs_time_vcpu();
+ else
+ data = kvmhv_get_l1_to_l2_cs_time();
+ break;
+ case L2_TO_L1_CS_LAT:
+ if (event->attach_state & PERF_ATTACH_TASK)
+ data = kvmhv_get_l2_to_l1_cs_time_vcpu();
+ else
+ data = kvmhv_get_l2_to_l1_cs_time();
+ break;
+ case L2_RUNTIME_AGG:
+ if (event->attach_state & PERF_ATTACH_TASK)
+ data = kvmhv_get_l2_runtime_agg_vcpu();
+ else
+ data = kvmhv_get_l2_runtime_agg();
+ break;
+ default:
+ data = 0;
+ break;
+ }
+
+ return data;
+}
+
+static int vpa_pmu_add(struct perf_event *event, int flags)
+{
+ u64 data;
+
+ kvmhv_set_l2_counters_status(smp_processor_id(), true);
+
+ data = get_counter_data(event);
+ local64_set(&event->hw.prev_count, data);
+
+ return 0;
+}
+
+static void vpa_pmu_read(struct perf_event *event)
+{
+ u64 prev_data, new_data, final_data;
+
+ prev_data = local64_read(&event->hw.prev_count);
+ new_data = get_counter_data(event);
+ final_data = new_data - prev_data;
+
+ local64_add(final_data, &event->count);
+}
+
+static void vpa_pmu_del(struct perf_event *event, int flags)
+{
+ vpa_pmu_read(event);
+
+ /*
+ * Disable vpa counter accumulation
+ */
+ kvmhv_set_l2_counters_status(smp_processor_id(), false);
+}
+
+static struct pmu vpa_pmu = {
+ .module = THIS_MODULE,
+ .task_ctx_nr = perf_sw_context,
+ .name = "vpa_pmu",
+ .event_init = vpa_pmu_event_init,
+ .add = vpa_pmu_add,
+ .del = vpa_pmu_del,
+ .read = vpa_pmu_read,
+ .attr_groups = vpa_pmu_attr_groups,
+ .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
+};
+
+static int __init pseries_vpa_pmu_init(void)
+{
+ /*
+ * List of current Linux on Power platforms and
+ * this driver is supported only in PowerVM LPAR
+ * (L1) platform.
+ *
+ * Enabled Linux on Power Platforms
+ * ----------------------------------------
+ * [X] PowerVM LPAR (L1)
+ * [ ] KVM Guest On PowerVM KoP(L2)
+ * [ ] Baremetal(PowerNV)
+ * [ ] KVM Guest On PowerNV
+ */
+ if (!firmware_has_feature(FW_FEATURE_LPAR) || is_kvm_guest())
+ return -ENODEV;
+
+ perf_pmu_register(&vpa_pmu, vpa_pmu.name, -1);
+ pr_info("Virtual Processor Area PMU registered.\n");
+
+ return 0;
+}
+
+static void __exit pseries_vpa_pmu_cleanup(void)
+{
+ perf_pmu_unregister(&vpa_pmu);
+ pr_info("Virtual Processor Area PMU unregistered.\n");
+}
+
+module_init(pseries_vpa_pmu_init);
+module_exit(pseries_vpa_pmu_cleanup);
+MODULE_DESCRIPTION("Perf Driver for pSeries VPA pmu counter");
+MODULE_AUTHOR("Kajol Jain <kjain@linux.ibm.com>");
+MODULE_AUTHOR("Madhavan Srinivasan <maddy@linux.ibm.com>");
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/platforms/40x/Kconfig b/arch/powerpc/platforms/40x/Kconfig
deleted file mode 100644
index 6e287f1294fa..000000000000
--- a/arch/powerpc/platforms/40x/Kconfig
+++ /dev/null
@@ -1,161 +0,0 @@
-config ACADIA
- bool "Acadia"
- depends on 40x
- default n
- select PPC40x_SIMPLE
- select 405EZ
- help
- This option enables support for the AMCC 405EZ Acadia evaluation board.
-
-config EP405
- bool "EP405/EP405PC"
- depends on 40x
- default n
- select 405GP
- select PCI
- help
- This option enables support for the EP405/EP405PC boards.
-
-config HOTFOOT
- bool "Hotfoot"
- depends on 40x
- default n
- select PPC40x_SIMPLE
- select PCI
- help
- This option enables support for the ESTEEM 195E Hotfoot board.
-
-config KILAUEA
- bool "Kilauea"
- depends on 40x
- default n
- select 405EX
- select PPC40x_SIMPLE
- select PPC4xx_PCI_EXPRESS
- select PCI_MSI
- select PPC4xx_MSI
- help
- This option enables support for the AMCC PPC405EX evaluation board.
-
-config MAKALU
- bool "Makalu"
- depends on 40x
- default n
- select 405EX
- select PCI
- select PPC4xx_PCI_EXPRESS
- select PPC40x_SIMPLE
- help
- This option enables support for the AMCC PPC405EX board.
-
-config WALNUT
- bool "Walnut"
- depends on 40x
- default y
- select 405GP
- select PCI
- select OF_RTC
- help
- This option enables support for the IBM PPC405GP evaluation board.
-
-config XILINX_VIRTEX_GENERIC_BOARD
- bool "Generic Xilinx Virtex board"
- depends on 40x
- default n
- select XILINX_VIRTEX_II_PRO
- select XILINX_VIRTEX_4_FX
- help
- This option enables generic support for Xilinx Virtex based boards.
-
- The generic virtex board support matches any device tree which
- specifies 'xilinx,virtex' in its compatible field. This includes
- the Xilinx ML3xx and ML4xx reference designs using the powerpc
- core.
-
- Most Virtex designs should use this unless it needs to do some
- special configuration at board probe time.
-
-config OBS600
- bool "OpenBlockS 600"
- depends on 40x
- default n
- select 405EX
- select PPC40x_SIMPLE
- help
- This option enables support for PlatHome OpenBlockS 600 server
-
-
-config PPC40x_SIMPLE
- bool "Simple PowerPC 40x board support"
- depends on 40x
- default n
- help
- This option enables the simple PowerPC 40x platform support.
-
-# OAK doesn't exist but wanted to keep this around for any future 403GCX boards
-config 403GCX
- bool
- #depends on OAK
- select IBM405_ERR51
-
-config 405GP
- bool
- select IBM405_ERR77
- select IBM405_ERR51
- select IBM_EMAC_ZMII
-
-config 405EX
- bool
- select IBM_EMAC_EMAC4
- select IBM_EMAC_RGMII
-
-config 405EZ
- bool
- select IBM_EMAC_NO_FLOW_CTRL
- select IBM_EMAC_MAL_CLR_ICINTSTAT
- select IBM_EMAC_MAL_COMMON_ERR
-
-config XILINX_VIRTEX
- bool
- select DEFAULT_UIMAGE
-
-config XILINX_VIRTEX_II_PRO
- bool
- select XILINX_VIRTEX
- select IBM405_ERR77
- select IBM405_ERR51
-
-config XILINX_VIRTEX_4_FX
- bool
- select XILINX_VIRTEX
-
-config STB03xxx
- bool
- select IBM405_ERR77
- select IBM405_ERR51
-
-config PPC4xx_GPIO
- bool "PPC4xx GPIO support"
- depends on 40x
- select ARCH_REQUIRE_GPIOLIB
- help
- Enable gpiolib support for ppc40x based boards
-
-# 40x errata/workaround config symbols, selected by the CPU models above
-
-# All 405-based cores up until the 405GPR and 405EP have this errata.
-config IBM405_ERR77
- bool
-
-# All 40x-based cores, up until the 405GPR and 405EP have this errata.
-config IBM405_ERR51
- bool
-
-config APM8018X
- bool "APM8018X"
- depends on 40x
- default n
- select PPC40x_SIMPLE
- help
- This option enables support for the AppliedMicro APM8018X evaluation
- board.
diff --git a/arch/powerpc/platforms/40x/Makefile b/arch/powerpc/platforms/40x/Makefile
deleted file mode 100644
index 88c22de0c850..000000000000
--- a/arch/powerpc/platforms/40x/Makefile
+++ /dev/null
@@ -1,4 +0,0 @@
-obj-$(CONFIG_WALNUT) += walnut.o
-obj-$(CONFIG_XILINX_VIRTEX_GENERIC_BOARD) += virtex.o
-obj-$(CONFIG_EP405) += ep405.o
-obj-$(CONFIG_PPC40x_SIMPLE) += ppc40x_simple.o
diff --git a/arch/powerpc/platforms/40x/ep405.c b/arch/powerpc/platforms/40x/ep405.c
deleted file mode 100644
index b0389bbe4f94..000000000000
--- a/arch/powerpc/platforms/40x/ep405.c
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Architecture- / platform-specific boot-time initialization code for
- * IBM PowerPC 4xx based boards. Adapted from original
- * code by Gary Thomas, Cort Dougan <cort@fsmlabs.com>, and Dan Malek
- * <dan@net4x.com>.
- *
- * Copyright(c) 1999-2000 Grant Erickson <grant@lcse.umn.edu>
- *
- * Rewritten and ported to the merged powerpc tree:
- * Copyright 2007 IBM Corporation
- * Josh Boyer <jwboyer@linux.vnet.ibm.com>
- *
- * Adapted to EP405 by Ben. Herrenschmidt <benh@kernel.crashing.org>
- *
- * TODO: Wire up the PCI IRQ mux and the southbridge interrupts
- *
- * 2002 (c) MontaVista, Software, Inc. This file is licensed under
- * the terms of the GNU General Public License version 2. This program
- * is licensed "as is" without any warranty of any kind, whether express
- * or implied.
- */
-
-#include <linux/init.h>
-#include <linux/of_platform.h>
-
-#include <asm/machdep.h>
-#include <asm/prom.h>
-#include <asm/udbg.h>
-#include <asm/time.h>
-#include <asm/uic.h>
-#include <asm/pci-bridge.h>
-#include <asm/ppc4xx.h>
-
-static struct device_node *bcsr_node;
-static void __iomem *bcsr_regs;
-
-/* BCSR registers */
-#define BCSR_ID 0
-#define BCSR_PCI_CTRL 1
-#define BCSR_FLASH_NV_POR_CTRL 2
-#define BCSR_FENET_UART_CTRL 3
-#define BCSR_PCI_IRQ 4
-#define BCSR_XIRQ_SELECT 5
-#define BCSR_XIRQ_ROUTING 6
-#define BCSR_XIRQ_STATUS 7
-#define BCSR_XIRQ_STATUS2 8
-#define BCSR_SW_STAT_LED_CTRL 9
-#define BCSR_GPIO_IRQ_PAR_CTRL 10
-/* there's more, can't be bothered typing them tho */
-
-
-static __initdata struct of_device_id ep405_of_bus[] = {
- { .compatible = "ibm,plb3", },
- { .compatible = "ibm,opb", },
- { .compatible = "ibm,ebc", },
- {},
-};
-
-static int __init ep405_device_probe(void)
-{
- of_platform_bus_probe(NULL, ep405_of_bus, NULL);
-
- return 0;
-}
-machine_device_initcall(ep405, ep405_device_probe);
-
-static void __init ep405_init_bcsr(void)
-{
- const u8 *irq_routing;
- int i;
-
- /* Find the bloody thing & map it */
- bcsr_node = of_find_compatible_node(NULL, NULL, "ep405-bcsr");
- if (bcsr_node == NULL) {
- printk(KERN_ERR "EP405 BCSR not found !\n");
- return;
- }
- bcsr_regs = of_iomap(bcsr_node, 0);
- if (bcsr_regs == NULL) {
- printk(KERN_ERR "EP405 BCSR failed to map !\n");
- return;
- }
-
- /* Get the irq-routing property and apply the routing to the CPLD */
- irq_routing = of_get_property(bcsr_node, "irq-routing", NULL);
- if (irq_routing == NULL)
- return;
- for (i = 0; i < 16; i++) {
- u8 irq = irq_routing[i];
- out_8(bcsr_regs + BCSR_XIRQ_SELECT, i);
- out_8(bcsr_regs + BCSR_XIRQ_ROUTING, irq);
- }
- in_8(bcsr_regs + BCSR_XIRQ_SELECT);
- mb();
- out_8(bcsr_regs + BCSR_GPIO_IRQ_PAR_CTRL, 0xfe);
-}
-
-static void __init ep405_setup_arch(void)
-{
- /* Find & init the BCSR CPLD */
- ep405_init_bcsr();
-
- pci_set_flags(PCI_REASSIGN_ALL_RSRC);
-}
-
-static int __init ep405_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- if (!of_flat_dt_is_compatible(root, "ep405"))
- return 0;
-
- return 1;
-}
-
-define_machine(ep405) {
- .name = "EP405",
- .probe = ep405_probe,
- .setup_arch = ep405_setup_arch,
- .progress = udbg_progress,
- .init_IRQ = uic_init_tree,
- .get_irq = uic_get_irq,
- .restart = ppc4xx_reset_system,
- .calibrate_decr = generic_calibrate_decr,
-};
diff --git a/arch/powerpc/platforms/40x/ppc40x_simple.c b/arch/powerpc/platforms/40x/ppc40x_simple.c
deleted file mode 100644
index 8f3920e5a046..000000000000
--- a/arch/powerpc/platforms/40x/ppc40x_simple.c
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Generic PowerPC 40x platform support
- *
- * Copyright 2008 IBM Corporation
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; version 2 of the License.
- *
- * This implements simple platform support for PowerPC 44x chips. This is
- * mostly used for eval boards or other simple and "generic" 44x boards. If
- * your board has custom functions or hardware, then you will likely want to
- * implement your own board.c file to accommodate it.
- */
-
-#include <asm/machdep.h>
-#include <asm/pci-bridge.h>
-#include <asm/ppc4xx.h>
-#include <asm/prom.h>
-#include <asm/time.h>
-#include <asm/udbg.h>
-#include <asm/uic.h>
-
-#include <linux/init.h>
-#include <linux/of_platform.h>
-
-static __initdata struct of_device_id ppc40x_of_bus[] = {
- { .compatible = "ibm,plb3", },
- { .compatible = "ibm,plb4", },
- { .compatible = "ibm,opb", },
- { .compatible = "ibm,ebc", },
- { .compatible = "simple-bus", },
- {},
-};
-
-static int __init ppc40x_device_probe(void)
-{
- of_platform_bus_probe(NULL, ppc40x_of_bus, NULL);
-
- return 0;
-}
-machine_device_initcall(ppc40x_simple, ppc40x_device_probe);
-
-/* This is the list of boards that can be supported by this simple
- * platform code. This does _not_ mean the boards are compatible,
- * as they most certainly are not from a device tree perspective.
- * However, their differences are handled by the device tree and the
- * drivers and therefore they don't need custom board support files.
- *
- * Again, if your board needs to do things differently then create a
- * board.c file for it rather than adding it to this list.
- */
-static const char * const board[] __initconst = {
- "amcc,acadia",
- "amcc,haleakala",
- "amcc,kilauea",
- "amcc,makalu",
- "apm,klondike",
- "est,hotfoot",
- "plathome,obs600",
- NULL
-};
-
-static int __init ppc40x_probe(void)
-{
- if (of_flat_dt_match(of_get_flat_dt_root(), board)) {
- pci_set_flags(PCI_REASSIGN_ALL_RSRC);
- return 1;
- }
-
- return 0;
-}
-
-define_machine(ppc40x_simple) {
- .name = "PowerPC 40x Platform",
- .probe = ppc40x_probe,
- .progress = udbg_progress,
- .init_IRQ = uic_init_tree,
- .get_irq = uic_get_irq,
- .restart = ppc4xx_reset_system,
- .calibrate_decr = generic_calibrate_decr,
-};
diff --git a/arch/powerpc/platforms/40x/virtex.c b/arch/powerpc/platforms/40x/virtex.c
deleted file mode 100644
index d0fc6866b00c..000000000000
--- a/arch/powerpc/platforms/40x/virtex.c
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Xilinx Virtex (IIpro & 4FX) based board support
- *
- * Copyright 2007 Secret Lab Technologies Ltd.
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
- */
-
-#include <linux/init.h>
-#include <linux/of_platform.h>
-#include <asm/machdep.h>
-#include <asm/prom.h>
-#include <asm/time.h>
-#include <asm/xilinx_intc.h>
-#include <asm/xilinx_pci.h>
-#include <asm/ppc4xx.h>
-
-static struct of_device_id xilinx_of_bus_ids[] __initdata = {
- { .compatible = "xlnx,plb-v46-1.00.a", },
- { .compatible = "xlnx,plb-v34-1.01.a", },
- { .compatible = "xlnx,plb-v34-1.02.a", },
- { .compatible = "xlnx,opb-v20-1.10.c", },
- { .compatible = "xlnx,dcr-v29-1.00.a", },
- { .compatible = "xlnx,compound", },
- {}
-};
-
-static int __init virtex_device_probe(void)
-{
- of_platform_bus_probe(NULL, xilinx_of_bus_ids, NULL);
-
- return 0;
-}
-machine_device_initcall(virtex, virtex_device_probe);
-
-static int __init virtex_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- if (!of_flat_dt_is_compatible(root, "xlnx,virtex"))
- return 0;
-
- return 1;
-}
-
-define_machine(virtex) {
- .name = "Xilinx Virtex",
- .probe = virtex_probe,
- .setup_arch = xilinx_pci_init,
- .init_IRQ = xilinx_intc_init_tree,
- .get_irq = xilinx_intc_get_irq,
- .restart = ppc4xx_reset_system,
- .calibrate_decr = generic_calibrate_decr,
-};
diff --git a/arch/powerpc/platforms/40x/walnut.c b/arch/powerpc/platforms/40x/walnut.c
deleted file mode 100644
index 8b691df72f74..000000000000
--- a/arch/powerpc/platforms/40x/walnut.c
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Architecture- / platform-specific boot-time initialization code for
- * IBM PowerPC 4xx based boards. Adapted from original
- * code by Gary Thomas, Cort Dougan <cort@fsmlabs.com>, and Dan Malek
- * <dan@net4x.com>.
- *
- * Copyright(c) 1999-2000 Grant Erickson <grant@lcse.umn.edu>
- *
- * Rewritten and ported to the merged powerpc tree:
- * Copyright 2007 IBM Corporation
- * Josh Boyer <jwboyer@linux.vnet.ibm.com>
- *
- * 2002 (c) MontaVista, Software, Inc. This file is licensed under
- * the terms of the GNU General Public License version 2. This program
- * is licensed "as is" without any warranty of any kind, whether express
- * or implied.
- */
-
-#include <linux/init.h>
-#include <linux/of_platform.h>
-#include <linux/rtc.h>
-
-#include <asm/machdep.h>
-#include <asm/prom.h>
-#include <asm/udbg.h>
-#include <asm/time.h>
-#include <asm/uic.h>
-#include <asm/pci-bridge.h>
-#include <asm/ppc4xx.h>
-
-static __initdata struct of_device_id walnut_of_bus[] = {
- { .compatible = "ibm,plb3", },
- { .compatible = "ibm,opb", },
- { .compatible = "ibm,ebc", },
- {},
-};
-
-static int __init walnut_device_probe(void)
-{
- of_platform_bus_probe(NULL, walnut_of_bus, NULL);
- of_instantiate_rtc();
-
- return 0;
-}
-machine_device_initcall(walnut, walnut_device_probe);
-
-static int __init walnut_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- if (!of_flat_dt_is_compatible(root, "ibm,walnut"))
- return 0;
-
- pci_set_flags(PCI_REASSIGN_ALL_RSRC);
-
- return 1;
-}
-
-define_machine(walnut) {
- .name = "Walnut",
- .probe = walnut_probe,
- .progress = udbg_progress,
- .init_IRQ = uic_init_tree,
- .get_irq = uic_get_irq,
- .restart = ppc4xx_reset_system,
- .calibrate_decr = generic_calibrate_decr,
-};
diff --git a/arch/powerpc/platforms/44x/44x.h b/arch/powerpc/platforms/44x/44x.h
index 63f703ecd23c..0e912a6a0b9a 100644
--- a/arch/powerpc/platforms/44x/44x.h
+++ b/arch/powerpc/platforms/44x/44x.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __POWERPC_PLATFORMS_44X_44X_H
#define __POWERPC_PLATFORMS_44X_44X_H
diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig
index 4d88f6a19058..fc79f8466933 100644
--- a/arch/powerpc/platforms/44x/Kconfig
+++ b/arch/powerpc/platforms/44x/Kconfig
@@ -1,32 +1,30 @@
+# SPDX-License-Identifier: GPL-2.0
config PPC_47x
bool "Support for 47x variant"
depends on 44x
- default n
select MPIC
help
This option enables support for the 47x family of processors and is
- not currently compatible with other 44x or 46x varients
+ not currently compatible with other 44x or 46x variants
config BAMBOO
bool "Bamboo"
depends on 44x
- default n
select PPC44x_SIMPLE
select 440EP
- select PCI
+ select FORCE_PCI
help
This option enables support for the IBM PPC440EP evaluation board.
config BLUESTONE
bool "Bluestone"
depends on 44x
- default n
select PPC44x_SIMPLE
select APM821xx
+ select FORCE_PCI
select PCI_MSI
- select PPC4xx_MSI
select PPC4xx_PCI_EXPRESS
- select IBM_EMAC_RGMII
+ select IBM_EMAC_RGMII if IBM_EMAC
help
This option enables support for the APM APM821xx Evaluation board.
@@ -35,24 +33,22 @@ config EBONY
depends on 44x
default y
select 440GP
- select PCI
+ select FORCE_PCI
select OF_RTC
help
This option enables support for the IBM PPC440GP evaluation board.
config SAM440EP
- bool "Sam440ep"
+ bool "Sam440ep"
depends on 44x
- default n
- select 440EP
- select PCI
- help
- This option enables support for the ACube Sam440ep board.
+ select 440EP
+ select FORCE_PCI
+ help
+ This option enables support for the ACube Sam440ep board.
config SEQUOIA
bool "Sequoia"
depends on 44x
- default n
select PPC44x_SIMPLE
select 440EPX
help
@@ -61,10 +57,9 @@ config SEQUOIA
config TAISHAN
bool "Taishan"
depends on 44x
- default n
select PPC44x_SIMPLE
select 440GX
- select PCI
+ select FORCE_PCI
help
This option enables support for the AMCC PPC440GX "Taishan"
evaluation board.
@@ -72,30 +67,26 @@ config TAISHAN
config KATMAI
bool "Katmai"
depends on 44x
- default n
select PPC44x_SIMPLE
select 440SPe
- select PCI
+ select FORCE_PCI
select PPC4xx_PCI_EXPRESS
select PCI_MSI
- select PPC4xx_MSI
help
This option enables support for the AMCC PPC440SPe evaluation board.
config RAINIER
bool "Rainier"
depends on 44x
- default n
select PPC44x_SIMPLE
select 440GRX
- select PCI
+ select FORCE_PCI
help
This option enables support for the AMCC PPC440GRX evaluation board.
config WARP
bool "PIKA Warp"
depends on 44x
- default n
select 440EP
help
This option enables support for the PIKA Warp(tm) Appliance. The Warp
@@ -108,10 +99,9 @@ config WARP
config ARCHES
bool "Arches"
depends on 44x
- default n
select PPC44x_SIMPLE
select 460EX # Odd since it uses 460GT but the effects are the same
- select PCI
+ select FORCE_PCI
select PPC4xx_PCI_EXPRESS
help
This option enables support for the AMCC Dual PPC460GT evaluation board.
@@ -119,70 +109,61 @@ config ARCHES
config CANYONLANDS
bool "Canyonlands"
depends on 44x
- default n
select 460EX
- select PCI
+ select FORCE_PCI
select PPC4xx_PCI_EXPRESS
select PCI_MSI
- select PPC4xx_MSI
- select IBM_EMAC_RGMII
- select IBM_EMAC_ZMII
+ select IBM_EMAC_RGMII if IBM_EMAC
+ select IBM_EMAC_ZMII if IBM_EMAC
help
This option enables support for the AMCC PPC460EX evaluation board.
config GLACIER
bool "Glacier"
depends on 44x
- default n
select PPC44x_SIMPLE
select 460EX # Odd since it uses 460GT but the effects are the same
- select PCI
+ select FORCE_PCI
select PPC4xx_PCI_EXPRESS
- select IBM_EMAC_RGMII
- select IBM_EMAC_ZMII
+ select IBM_EMAC_RGMII if IBM_EMAC
+ select IBM_EMAC_ZMII if IBM_EMAC
help
This option enables support for the AMCC PPC460GT evaluation board.
config REDWOOD
bool "Redwood"
depends on 44x
- default n
select PPC44x_SIMPLE
select 460SX
- select PCI
+ select FORCE_PCI
select PPC4xx_PCI_EXPRESS
select PCI_MSI
- select PPC4xx_MSI
help
This option enables support for the AMCC PPC460SX Redwood board.
config EIGER
bool "Eiger"
depends on 44x
- default n
select PPC44x_SIMPLE
select 460SX
- select PCI
+ select FORCE_PCI
select PPC4xx_PCI_EXPRESS
- select IBM_EMAC_RGMII
+ select IBM_EMAC_RGMII if IBM_EMAC
help
This option enables support for the AMCC PPC460SX evaluation board.
config YOSEMITE
bool "Yosemite"
depends on 44x
- default n
select PPC44x_SIMPLE
select 440EP
- select PCI
+ select FORCE_PCI
help
This option enables support for the AMCC PPC440EP evaluation board.
config ISS4xx
bool "ISS 4xx Simulator"
- depends on (44x || 40x)
- default n
- select 405GP if 40x
+ depends on 44x
select 440GP if 44x && !PPC_47x
select PPC_FPU
select OF_RTC
@@ -192,37 +173,40 @@ config ISS4xx
config CURRITUCK
bool "IBM Currituck (476fpe) Support"
depends on PPC_47x
- default n
+ select I2C
select SWIOTLB
select 476FPE
+ select FORCE_PCI
select PPC4xx_PCI_EXPRESS
help
This option enables support for the IBM Currituck (476fpe) evaluation board
+config FSP2
+ bool "IBM FSP2 (476fpe) Support"
+ depends on PPC_47x
+ select 476FPE
+ select IBM_EMAC_EMAC4 if IBM_EMAC
+ select IBM_EMAC_RGMII if IBM_EMAC
+ select COMMON_CLK
+ select DEFAULT_UIMAGE
+ help
+ This option enables support for the IBM FSP2 (476fpe) board
+
config AKEBONO
bool "IBM Akebono (476gtr) Support"
depends on PPC_47x
- default n
select SWIOTLB
select 476FPE
select PPC4xx_PCI_EXPRESS
+ select FORCE_PCI
select PCI_MSI
select PPC4xx_HSTA_MSI
select I2C
select I2C_IBM_IIC
- select NETDEVICES
- select ETHERNET
- select NET_VENDOR_IBM
- select IBM_EMAC_EMAC4
- select IBM_EMAC_RGMII_WOL
- select USB
- select USB_OHCI_HCD_PLATFORM
- select USB_EHCI_HCD_PLATFORM
- select MMC_SDHCI
- select MMC_SDHCI_PLTFM
- select MMC_SDHCI_OF_476GTR
- select ATA
- select SATA_AHCI_PLATFORM
+ select IBM_EMAC_EMAC4 if IBM_EMAC
+ select USB if USB_SUPPORT
+ select USB_OHCI_HCD_PLATFORM if USB_OHCI_HCD
+ select USB_EHCI_HCD_PLATFORM if USB_EHCI_HCD
help
This option enables support for the IBM Akebono (476gtr) evaluation board
@@ -230,116 +214,79 @@ config AKEBONO
config ICON
bool "Icon"
depends on 44x
- default n
select PPC44x_SIMPLE
select 440SPe
- select PCI
+ select FORCE_PCI
select PPC4xx_PCI_EXPRESS
help
This option enables support for the AMCC PPC440SPe evaluation board.
-config XILINX_VIRTEX440_GENERIC_BOARD
- bool "Generic Xilinx Virtex 5 FXT board support"
- depends on 44x
- default n
- select XILINX_VIRTEX_5_FXT
- help
- This option enables generic support for Xilinx Virtex based boards
- that use a 440 based processor in the Virtex 5 FXT FPGA architecture.
-
- The generic virtex board support matches any device tree which
- specifies 'xlnx,virtex440' in its compatible field. This includes
- the Xilinx ML5xx reference designs using the powerpc core.
-
- Most Virtex 5 designs should use this unless it needs to do some
- special configuration at board probe time.
-
-config XILINX_ML510
- bool "Xilinx ML510 extra support"
- depends on XILINX_VIRTEX440_GENERIC_BOARD
- select PPC_PCI_CHOICE
- select XILINX_PCI if PCI
- select PPC_INDIRECT_PCI if PCI
- select PPC_I8259 if PCI
- help
- This option enables extra support for features on the Xilinx ML510
- board. The ML510 has a PCI bus with ALI south bridge.
-
config PPC44x_SIMPLE
bool "Simple PowerPC 44x board support"
depends on 44x
- default n
help
This option enables the simple PowerPC 44x platform support.
config PPC4xx_GPIO
bool "PPC4xx GPIO support"
depends on 44x
- select ARCH_REQUIRE_GPIOLIB
+ select GPIOLIB
help
Enable gpiolib support for ppc440 based boards
-config PPC4xx_OCM
- bool "PPC4xx On Chip Memory (OCM) support"
- depends on 4xx
- select PPC_LIB_RHEAP
- help
- Enable OCM support for PowerPC 4xx platforms with on chip memory,
- OCM provides the fast place for memory access to improve performance.
-
# 44x specific CPU modules, selected based on the board above.
config 440EP
bool
select PPC_FPU
select IBM440EP_ERR42
- select IBM_EMAC_ZMII
+ select IBM_EMAC_ZMII if IBM_EMAC
config 440EPX
bool
select PPC_FPU
- select IBM_EMAC_EMAC4
- select IBM_EMAC_RGMII
- select IBM_EMAC_ZMII
+ select IBM_EMAC_EMAC4 if IBM_EMAC
+ select IBM_EMAC_RGMII if IBM_EMAC
+ select IBM_EMAC_ZMII if IBM_EMAC
select USB_EHCI_BIG_ENDIAN_MMIO
select USB_EHCI_BIG_ENDIAN_DESC
config 440GRX
bool
- select IBM_EMAC_EMAC4
- select IBM_EMAC_RGMII
- select IBM_EMAC_ZMII
+ select IBM_EMAC_EMAC4 if IBM_EMAC
+ select IBM_EMAC_RGMII if IBM_EMAC
+ select IBM_EMAC_ZMII if IBM_EMAC
config 440GP
bool
- select IBM_EMAC_ZMII
+ select IBM_EMAC_ZMII if IBM_EMAC
config 440GX
bool
- select IBM_EMAC_EMAC4
- select IBM_EMAC_RGMII
- select IBM_EMAC_ZMII #test only
- select IBM_EMAC_TAH #test only
+ select IBM_EMAC_EMAC4 if IBM_EMAC
+ select IBM_EMAC_RGMII if IBM_EMAC
+ select IBM_EMAC_ZMII if IBM_EMAC #test only
+ select IBM_EMAC_TAH if IBM_EMAC #test only
config 440SP
bool
config 440SPe
bool
- select IBM_EMAC_EMAC4
+ select IBM_EMAC_EMAC4 if IBM_EMAC
config 460EX
bool
select PPC_FPU
- select IBM_EMAC_EMAC4
- select IBM_EMAC_TAH
+ select IBM_EMAC_EMAC4 if IBM_EMAC
+ select IBM_EMAC_TAH if IBM_EMAC
config 460SX
bool
select PPC_FPU
- select IBM_EMAC_EMAC4
- select IBM_EMAC_RGMII
- select IBM_EMAC_ZMII
- select IBM_EMAC_TAH
+ select IBM_EMAC_EMAC4 if IBM_EMAC
+ select IBM_EMAC_RGMII if IBM_EMAC
+ select IBM_EMAC_ZMII if IBM_EMAC
+ select IBM_EMAC_TAH if IBM_EMAC
config 476FPE
bool
@@ -348,8 +295,8 @@ config 476FPE
config APM821xx
bool
select PPC_FPU
- select IBM_EMAC_EMAC4
- select IBM_EMAC_TAH
+ select IBM_EMAC_EMAC4 if IBM_EMAC
+ select IBM_EMAC_TAH if IBM_EMAC
config 476FPE_ERR46
depends on 476FPE
@@ -369,13 +316,3 @@ config 476FPE_ERR46
config IBM440EP_ERR42
bool
-# Xilinx specific config options.
-config XILINX_VIRTEX
- bool
- select DEFAULT_UIMAGE
-
-# Xilinx Virtex 5 FXT FPGA architecture, selected by a Xilinx board above
-config XILINX_VIRTEX_5_FXT
- bool
- select XILINX_VIRTEX
-
diff --git a/arch/powerpc/platforms/44x/Makefile b/arch/powerpc/platforms/44x/Makefile
index 26d35b5941f7..ca7b1bb442d9 100644
--- a/arch/powerpc/platforms/44x/Makefile
+++ b/arch/powerpc/platforms/44x/Makefile
@@ -1,14 +1,18 @@
-obj-$(CONFIG_44x) += misc_44x.o
+# SPDX-License-Identifier: GPL-2.0
+obj-y += misc_44x.o machine_check.o uic.o soc.o
ifneq ($(CONFIG_PPC4xx_CPM),y)
-obj-$(CONFIG_44x) += idle.o
+obj-y += idle.o
endif
obj-$(CONFIG_PPC44x_SIMPLE) += ppc44x_simple.o
obj-$(CONFIG_EBONY) += ebony.o
obj-$(CONFIG_SAM440EP) += sam440ep.o
obj-$(CONFIG_WARP) += warp.o
-obj-$(CONFIG_XILINX_VIRTEX_5_FXT) += virtex.o
-obj-$(CONFIG_XILINX_ML510) += virtex_ml510.o
obj-$(CONFIG_ISS4xx) += iss4xx.o
obj-$(CONFIG_CANYONLANDS)+= canyonlands.o
obj-$(CONFIG_CURRITUCK) += ppc476.o
obj-$(CONFIG_AKEBONO) += ppc476.o
+obj-$(CONFIG_FSP2) += fsp2.o
+obj-$(CONFIG_PCI) += pci.o
+obj-$(CONFIG_PPC4xx_HSTA_MSI) += hsta_msi.o
+obj-$(CONFIG_PPC4xx_CPM) += cpm.o
+obj-$(CONFIG_PPC4xx_GPIO) += gpio.o
diff --git a/arch/powerpc/platforms/44x/canyonlands.c b/arch/powerpc/platforms/44x/canyonlands.c
index e300dd4c89bf..8742a10d9e0c 100644
--- a/arch/powerpc/platforms/44x/canyonlands.c
+++ b/arch/powerpc/platforms/44x/canyonlands.c
@@ -1,25 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* This contain platform specific code for APM PPC460EX based Canyonlands
* board.
*
* Copyright (c) 2010, Applied Micro Circuits Corporation
* Author: Rupjyoti Sarmah <rsarmah@apm.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
- * MA 02111-1307 USA
- *
*/
#include <linux/kernel.h>
#include <linux/init.h>
@@ -27,13 +12,14 @@
#include <asm/ppc4xx.h>
#include <asm/udbg.h>
#include <asm/uic.h>
+#include <linux/of_address.h>
#include <linux/of_platform.h>
#include <linux/delay.h>
#include "44x.h"
#define BCSR_USB_EN 0x11
-static __initdata struct of_device_id ppc460ex_of_bus[] = {
+static const struct of_device_id ppc460ex_of_bus[] __initconst = {
{ .compatible = "ibm,plb4", },
{ .compatible = "ibm,opb", },
{ .compatible = "ibm,ebc", },
@@ -53,12 +39,9 @@ machine_device_initcall(canyonlands, ppc460ex_device_probe);
static int __init ppc460ex_probe(void)
{
- unsigned long root = of_get_flat_dt_root();
- if (of_flat_dt_is_compatible(root, "amcc,canyonlands")) {
- pci_set_flags(PCI_REASSIGN_ALL_RSRC);
- return 1;
- }
- return 0;
+ pci_set_flags(PCI_REASSIGN_ALL_RSRC);
+
+ return 1;
}
/* USB PHY fixup code on Canyonlands kit. */
@@ -125,10 +108,10 @@ err_bcsr:
machine_device_initcall(canyonlands, ppc460ex_canyonlands_fixup);
define_machine(canyonlands) {
.name = "Canyonlands",
+ .compatible = "amcc,canyonlands",
.probe = ppc460ex_probe,
.progress = udbg_progress,
.init_IRQ = uic_init_tree,
.get_irq = uic_get_irq,
.restart = ppc4xx_reset_system,
- .calibrate_decr = generic_calibrate_decr,
};
diff --git a/arch/powerpc/sysdev/ppc4xx_cpm.c b/arch/powerpc/platforms/44x/cpm.c
index 82e2cfe35c62..670f8ad4465b 100644
--- a/arch/powerpc/sysdev/ppc4xx_cpm.c
+++ b/arch/powerpc/platforms/44x/cpm.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* PowerPC 4xx Clock and Power Management
*
@@ -14,25 +15,10 @@
*
* See file CREDITS for list of people who contributed to this
* project.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of
- * the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
- * MA 02111-1307 USA
*/
#include <linux/kernel.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
#include <linux/sysfs.h>
#include <linux/cpu.h>
#include <linux/suspend.h>
@@ -77,7 +63,7 @@ static unsigned int cpm_set(unsigned int cpm_reg, unsigned int mask)
* known as class 1, 2 and 3. For class 1 units, they are
* unconditionally put to sleep when the corresponding CPM bit is
* set. For class 2 and 3 units this is not case; if they can be
- * put to to sleep, they will. Here we do not verify, we just
+ * put to sleep, they will. Here we do not verify, we just
* set them and expect them to eventually go off when they can.
*/
value = dcr_read(cpm.dcr_host, cpm.dcr_offset[cpm_reg]);
@@ -177,7 +163,7 @@ static ssize_t cpm_idle_store(struct kobject *kobj,
static struct kobj_attribute cpm_idle_attr =
__ATTR(idle, 0644, cpm_idle_show, cpm_idle_store);
-static void cpm_idle_config_sysfs(void)
+static void __init cpm_idle_config_sysfs(void)
{
struct device *dev;
unsigned long ret;
@@ -240,12 +226,12 @@ static int cpm_suspend_enter(suspend_state_t state)
return 0;
}
-static struct platform_suspend_ops cpm_suspend_ops = {
+static const struct platform_suspend_ops cpm_suspend_ops = {
.valid = cpm_suspend_valid,
.enter = cpm_suspend_enter,
};
-static int cpm_get_uint_property(struct device_node *np,
+static int __init cpm_get_uint_property(struct device_node *np,
const char *name)
{
int len;
@@ -278,19 +264,19 @@ static int __init cpm_init(void)
dcr_len = dcr_resource_len(np, 0);
if (dcr_base == 0 || dcr_len == 0) {
- printk(KERN_ERR "cpm: could not parse dcr property for %s\n",
- np->full_name);
+ printk(KERN_ERR "cpm: could not parse dcr property for %pOF\n",
+ np);
ret = -EINVAL;
- goto out;
+ goto node_put;
}
cpm.dcr_host = dcr_map(np, dcr_base, dcr_len);
if (!DCR_MAP_OK(cpm.dcr_host)) {
- printk(KERN_ERR "cpm: failed to map dcr property for %s\n",
- np->full_name);
+ printk(KERN_ERR "cpm: failed to map dcr property for %pOF\n",
+ np);
ret = -EINVAL;
- goto out;
+ goto node_put;
}
/* All 4xx SoCs with a CPM controller have one of two
@@ -330,9 +316,9 @@ static int __init cpm_init(void)
if (cpm.standby || cpm.suspend)
suspend_set_ops(&cpm_suspend_ops);
+node_put:
+ of_node_put(np);
out:
- if (np)
- of_node_put(np);
return ret;
}
@@ -341,6 +327,6 @@ late_initcall(cpm_init);
static int __init cpm_powersave_off(char *arg)
{
cpm.powersave_off = 1;
- return 0;
+ return 1;
}
__setup("powersave=off", cpm_powersave_off);
diff --git a/arch/powerpc/platforms/44x/ebony.c b/arch/powerpc/platforms/44x/ebony.c
index 6a4232bbdf88..4861310c8dc0 100644
--- a/arch/powerpc/platforms/44x/ebony.c
+++ b/arch/powerpc/platforms/44x/ebony.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Ebony board specific routines
*
@@ -9,11 +10,6 @@
*
* Rewritten and ported to the merged powerpc tree:
* Copyright 2007 David Gibson <dwg@au1.ibm.com>, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#include <linux/init.h>
@@ -28,7 +24,7 @@
#include <asm/pci-bridge.h>
#include <asm/ppc4xx.h>
-static __initdata struct of_device_id ebony_of_bus[] = {
+static const struct of_device_id ebony_of_bus[] __initconst = {
{ .compatible = "ibm,plb4", },
{ .compatible = "ibm,opb", },
{ .compatible = "ibm,ebc", },
@@ -49,11 +45,6 @@ machine_device_initcall(ebony, ebony_device_probe);
*/
static int __init ebony_probe(void)
{
- unsigned long root = of_get_flat_dt_root();
-
- if (!of_flat_dt_is_compatible(root, "ibm,ebony"))
- return 0;
-
pci_set_flags(PCI_REASSIGN_ALL_RSRC);
return 1;
@@ -61,10 +52,10 @@ static int __init ebony_probe(void)
define_machine(ebony) {
.name = "Ebony",
+ .compatible = "ibm,ebony",
.probe = ebony_probe,
.progress = udbg_progress,
.init_IRQ = uic_init_tree,
.get_irq = uic_get_irq,
.restart = ppc4xx_reset_system,
- .calibrate_decr = generic_calibrate_decr,
};
diff --git a/arch/powerpc/platforms/44x/fsp2.c b/arch/powerpc/platforms/44x/fsp2.c
new file mode 100644
index 000000000000..f6b8d02e08b0
--- /dev/null
+++ b/arch/powerpc/platforms/44x/fsp2.c
@@ -0,0 +1,316 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * FSP-2 board specific routines
+ *
+ * Based on earlier code:
+ * Matt Porter <mporter@kernel.crashing.org>
+ * Copyright 2002-2005 MontaVista Software Inc.
+ *
+ * Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net>
+ * Copyright (c) 2003-2005 Zultys Technologies
+ *
+ * Rewritten and ported to the merged powerpc tree:
+ * Copyright 2007 David Gibson <dwg@au1.ibm.com>, IBM Corporation.
+ */
+
+#include <linux/init.h>
+#include <linux/of_fdt.h>
+#include <linux/of_platform.h>
+#include <linux/rtc.h>
+
+#include <asm/machdep.h>
+#include <asm/udbg.h>
+#include <asm/time.h>
+#include <asm/uic.h>
+#include <asm/ppc4xx.h>
+#include <asm/dcr.h>
+#include <linux/interrupt.h>
+#include <linux/of_irq.h>
+#include "fsp2.h"
+
+#define FSP2_BUS_ERR "ibm,bus-error-irq"
+#define FSP2_CMU_ERR "ibm,cmu-error-irq"
+#define FSP2_CONF_ERR "ibm,conf-error-irq"
+#define FSP2_OPBD_ERR "ibm,opbd-error-irq"
+#define FSP2_MCUE "ibm,mc-ue-irq"
+#define FSP2_RST_WRN "ibm,reset-warning-irq"
+
+static __initdata struct of_device_id fsp2_of_bus[] = {
+ { .compatible = "ibm,plb4", },
+ { .compatible = "ibm,plb6", },
+ { .compatible = "ibm,opb", },
+ {},
+};
+
+static void l2regs(void)
+{
+ pr_err("L2 Controller:\n");
+ pr_err("MCK: 0x%08x\n", mfl2(L2MCK));
+ pr_err("INT: 0x%08x\n", mfl2(L2INT));
+ pr_err("PLBSTAT0: 0x%08x\n", mfl2(L2PLBSTAT0));
+ pr_err("PLBSTAT1: 0x%08x\n", mfl2(L2PLBSTAT1));
+ pr_err("ARRSTAT0: 0x%08x\n", mfl2(L2ARRSTAT0));
+ pr_err("ARRSTAT1: 0x%08x\n", mfl2(L2ARRSTAT1));
+ pr_err("ARRSTAT2: 0x%08x\n", mfl2(L2ARRSTAT2));
+ pr_err("CPUSTAT: 0x%08x\n", mfl2(L2CPUSTAT));
+ pr_err("RACSTAT0: 0x%08x\n", mfl2(L2RACSTAT0));
+ pr_err("WACSTAT0: 0x%08x\n", mfl2(L2WACSTAT0));
+ pr_err("WACSTAT1: 0x%08x\n", mfl2(L2WACSTAT1));
+ pr_err("WACSTAT2: 0x%08x\n", mfl2(L2WACSTAT2));
+ pr_err("WDFSTAT: 0x%08x\n", mfl2(L2WDFSTAT));
+ pr_err("LOG0: 0x%08x\n", mfl2(L2LOG0));
+ pr_err("LOG1: 0x%08x\n", mfl2(L2LOG1));
+ pr_err("LOG2: 0x%08x\n", mfl2(L2LOG2));
+ pr_err("LOG3: 0x%08x\n", mfl2(L2LOG3));
+ pr_err("LOG4: 0x%08x\n", mfl2(L2LOG4));
+ pr_err("LOG5: 0x%08x\n", mfl2(L2LOG5));
+}
+
+static void show_plbopb_regs(u32 base, int num)
+{
+ pr_err("\nPLBOPB Bridge %d:\n", num);
+ pr_err("GESR0: 0x%08x\n", mfdcr(base + PLB4OPB_GESR0));
+ pr_err("GESR1: 0x%08x\n", mfdcr(base + PLB4OPB_GESR1));
+ pr_err("GESR2: 0x%08x\n", mfdcr(base + PLB4OPB_GESR2));
+ pr_err("GEARU: 0x%08x\n", mfdcr(base + PLB4OPB_GEARU));
+ pr_err("GEAR: 0x%08x\n", mfdcr(base + PLB4OPB_GEAR));
+}
+
+static irqreturn_t bus_err_handler(int irq, void *data)
+{
+ pr_err("Bus Error\n");
+
+ l2regs();
+
+ pr_err("\nPLB6 Controller:\n");
+ pr_err("BC_SHD: 0x%08x\n", mfdcr(DCRN_PLB6_SHD));
+ pr_err("BC_ERR: 0x%08x\n", mfdcr(DCRN_PLB6_ERR));
+
+ pr_err("\nPLB6-to-PLB4 Bridge:\n");
+ pr_err("ESR: 0x%08x\n", mfdcr(DCRN_PLB6PLB4_ESR));
+ pr_err("EARH: 0x%08x\n", mfdcr(DCRN_PLB6PLB4_EARH));
+ pr_err("EARL: 0x%08x\n", mfdcr(DCRN_PLB6PLB4_EARL));
+
+ pr_err("\nPLB4-to-PLB6 Bridge:\n");
+ pr_err("ESR: 0x%08x\n", mfdcr(DCRN_PLB4PLB6_ESR));
+ pr_err("EARH: 0x%08x\n", mfdcr(DCRN_PLB4PLB6_EARH));
+ pr_err("EARL: 0x%08x\n", mfdcr(DCRN_PLB4PLB6_EARL));
+
+ pr_err("\nPLB6-to-MCIF Bridge:\n");
+ pr_err("BESR0: 0x%08x\n", mfdcr(DCRN_PLB6MCIF_BESR0));
+ pr_err("BESR1: 0x%08x\n", mfdcr(DCRN_PLB6MCIF_BESR1));
+ pr_err("BEARH: 0x%08x\n", mfdcr(DCRN_PLB6MCIF_BEARH));
+ pr_err("BEARL: 0x%08x\n", mfdcr(DCRN_PLB6MCIF_BEARL));
+
+ pr_err("\nPLB4 Arbiter:\n");
+ pr_err("P0ESRH 0x%08x\n", mfdcr(DCRN_PLB4_P0ESRH));
+ pr_err("P0ESRL 0x%08x\n", mfdcr(DCRN_PLB4_P0ESRL));
+ pr_err("P0EARH 0x%08x\n", mfdcr(DCRN_PLB4_P0EARH));
+ pr_err("P0EARH 0x%08x\n", mfdcr(DCRN_PLB4_P0EARH));
+ pr_err("P1ESRH 0x%08x\n", mfdcr(DCRN_PLB4_P1ESRH));
+ pr_err("P1ESRL 0x%08x\n", mfdcr(DCRN_PLB4_P1ESRL));
+ pr_err("P1EARH 0x%08x\n", mfdcr(DCRN_PLB4_P1EARH));
+ pr_err("P1EARH 0x%08x\n", mfdcr(DCRN_PLB4_P1EARH));
+
+ show_plbopb_regs(DCRN_PLB4OPB0_BASE, 0);
+ show_plbopb_regs(DCRN_PLB4OPB1_BASE, 1);
+ show_plbopb_regs(DCRN_PLB4OPB2_BASE, 2);
+ show_plbopb_regs(DCRN_PLB4OPB3_BASE, 3);
+
+ pr_err("\nPLB4-to-AHB Bridge:\n");
+ pr_err("ESR: 0x%08x\n", mfdcr(DCRN_PLB4AHB_ESR));
+ pr_err("SEUAR: 0x%08x\n", mfdcr(DCRN_PLB4AHB_SEUAR));
+ pr_err("SELAR: 0x%08x\n", mfdcr(DCRN_PLB4AHB_SELAR));
+
+ pr_err("\nAHB-to-PLB4 Bridge:\n");
+ pr_err("\nESR: 0x%08x\n", mfdcr(DCRN_AHBPLB4_ESR));
+ pr_err("\nEAR: 0x%08x\n", mfdcr(DCRN_AHBPLB4_EAR));
+ panic("Bus Error\n");
+}
+
+static irqreturn_t cmu_err_handler(int irq, void *data) {
+ pr_err("CMU Error\n");
+ pr_err("FIR0: 0x%08x\n", mfcmu(CMUN_FIR0));
+ panic("CMU Error\n");
+}
+
+static irqreturn_t conf_err_handler(int irq, void *data) {
+ pr_err("Configuration Logic Error\n");
+ pr_err("CONF_FIR: 0x%08x\n", mfdcr(DCRN_CONF_FIR_RWC));
+ pr_err("RPERR0: 0x%08x\n", mfdcr(DCRN_CONF_RPERR0));
+ pr_err("RPERR1: 0x%08x\n", mfdcr(DCRN_CONF_RPERR1));
+ panic("Configuration Logic Error\n");
+}
+
+static irqreturn_t opbd_err_handler(int irq, void *data) {
+ panic("OPBD Error\n");
+}
+
+static irqreturn_t mcue_handler(int irq, void *data) {
+ pr_err("DDR: Uncorrectable Error\n");
+ pr_err("MCSTAT: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_MCSTAT));
+ pr_err("MCOPT1: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_MCOPT1));
+ pr_err("MCOPT2: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_MCOPT2));
+ pr_err("PHYSTAT: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_PHYSTAT));
+ pr_err("CFGR0: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_CFGR0));
+ pr_err("CFGR1: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_CFGR1));
+ pr_err("CFGR2: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_CFGR2));
+ pr_err("CFGR3: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_CFGR3));
+ pr_err("SCRUB_CNTL: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_SCRUB_CNTL));
+ pr_err("ECCERR_PORT0: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_ECCERR_PORT0));
+ pr_err("ECCERR_ADDR_PORT0: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_ECCERR_ADDR_PORT0));
+ pr_err("ECCERR_CNT_PORT0: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_ECCERR_COUNT_PORT0));
+ pr_err("ECC_CHECK_PORT0: 0x%08x\n",
+ mfdcr(DCRN_DDR34_BASE + DCRN_DDR34_ECC_CHECK_PORT0));
+ pr_err("MCER0: 0x%08x\n",
+ mfdcr(DCRN_CW_BASE + DCRN_CW_MCER0));
+ pr_err("MCER1: 0x%08x\n",
+ mfdcr(DCRN_CW_BASE + DCRN_CW_MCER1));
+ pr_err("BESR: 0x%08x\n",
+ mfdcr(DCRN_PLB6MCIF_BESR0));
+ pr_err("BEARL: 0x%08x\n",
+ mfdcr(DCRN_PLB6MCIF_BEARL));
+ pr_err("BEARH: 0x%08x\n",
+ mfdcr(DCRN_PLB6MCIF_BEARH));
+ panic("DDR: Uncorrectable Error\n");
+}
+
+static irqreturn_t rst_wrn_handler(int irq, void *data) {
+ u32 crcs = mfcmu(CMUN_CRCS);
+ switch (crcs & CRCS_STAT_MASK) {
+ case CRCS_STAT_CHIP_RST_B:
+ panic("Received chassis-initiated reset request");
+ default:
+ panic("Unknown external reset: CRCS=0x%x", crcs);
+ }
+}
+
+static void __init node_irq_request(const char *compat, irq_handler_t errirq_handler)
+{
+ struct device_node *np;
+ unsigned int irq;
+ int32_t rc;
+
+ for_each_compatible_node(np, NULL, compat) {
+ irq = irq_of_parse_and_map(np, 0);
+ if (!irq) {
+ pr_err("device tree node %pOFn is missing a interrupt",
+ np);
+ of_node_put(np);
+ return;
+ }
+
+ rc = request_irq(irq, errirq_handler, 0, np->name, np);
+ if (rc) {
+ pr_err("fsp_of_probe: request_irq failed: np=%pOF rc=%d",
+ np, rc);
+ of_node_put(np);
+ return;
+ }
+ }
+}
+
+static void __init critical_irq_setup(void)
+{
+ node_irq_request(FSP2_CMU_ERR, cmu_err_handler);
+ node_irq_request(FSP2_BUS_ERR, bus_err_handler);
+ node_irq_request(FSP2_CONF_ERR, conf_err_handler);
+ node_irq_request(FSP2_OPBD_ERR, opbd_err_handler);
+ node_irq_request(FSP2_MCUE, mcue_handler);
+ node_irq_request(FSP2_RST_WRN, rst_wrn_handler);
+}
+
+static int __init fsp2_device_probe(void)
+{
+ of_platform_bus_probe(NULL, fsp2_of_bus, NULL);
+ return 0;
+}
+machine_device_initcall(fsp2, fsp2_device_probe);
+
+static int __init fsp2_probe(void)
+{
+ u32 val;
+ unsigned long root = of_get_flat_dt_root();
+
+ if (!of_flat_dt_is_compatible(root, "ibm,fsp2"))
+ return 0;
+
+ /* Clear BC_ERR and mask snoopable request plb errors. */
+ val = mfdcr(DCRN_PLB6_CR0);
+ val |= 0x20000000;
+ mtdcr(DCRN_PLB6_BASE, val);
+ mtdcr(DCRN_PLB6_HD, 0xffff0000);
+ mtdcr(DCRN_PLB6_SHD, 0xffff0000);
+
+ /* TVSENSE reset is blocked (clock gated) by the POR default of the TVS
+ * sleep config bit. As a consequence, TVSENSE will provide erratic
+ * sensor values, which may result in spurious (parity) errors
+ * recorded in the CMU FIR and leading to erroneous interrupt requests
+ * once the CMU interrupt is unmasked.
+ */
+
+ /* 1. set TVS1[UNDOZE] */
+ val = mfcmu(CMUN_TVS1);
+ val |= 0x4;
+ mtcmu(CMUN_TVS1, val);
+
+ /* 2. clear FIR[TVS] and FIR[TVSPAR] */
+ val = mfcmu(CMUN_FIR0);
+ val |= 0x30000000;
+ mtcmu(CMUN_FIR0, val);
+
+ /* L2 machine checks */
+ mtl2(L2PLBMCKEN0, 0xffffffff);
+ mtl2(L2PLBMCKEN1, 0x0000ffff);
+ mtl2(L2ARRMCKEN0, 0xffffffff);
+ mtl2(L2ARRMCKEN1, 0xffffffff);
+ mtl2(L2ARRMCKEN2, 0xfffff000);
+ mtl2(L2CPUMCKEN, 0xffffffff);
+ mtl2(L2RACMCKEN0, 0xffffffff);
+ mtl2(L2WACMCKEN0, 0xffffffff);
+ mtl2(L2WACMCKEN1, 0xffffffff);
+ mtl2(L2WACMCKEN2, 0xffffffff);
+ mtl2(L2WDFMCKEN, 0xffffffff);
+
+ /* L2 interrupts */
+ mtl2(L2PLBINTEN1, 0xffff0000);
+
+ /*
+ * At a global level, enable all L2 machine checks and interrupts
+ * reported by the L2 subsystems, except for the external machine check
+ * input (UIC0.1).
+ */
+ mtl2(L2MCKEN, 0x000007ff);
+ mtl2(L2INTEN, 0x000004ff);
+
+ /* Enable FSP-2 configuration logic parity errors */
+ mtdcr(DCRN_CONF_EIR_RS, 0x80000000);
+ return 1;
+}
+
+static void __init fsp2_irq_init(void)
+{
+ uic_init_tree();
+ critical_irq_setup();
+}
+
+define_machine(fsp2) {
+ .name = "FSP-2",
+ .probe = fsp2_probe,
+ .progress = udbg_progress,
+ .init_IRQ = fsp2_irq_init,
+ .get_irq = uic_get_irq,
+ .restart = ppc4xx_reset_system,
+};
diff --git a/arch/powerpc/platforms/44x/fsp2.h b/arch/powerpc/platforms/44x/fsp2.h
new file mode 100644
index 000000000000..9e1d52754c8b
--- /dev/null
+++ b/arch/powerpc/platforms/44x/fsp2.h
@@ -0,0 +1,272 @@
+#ifndef _ASM_POWERPC_FSP_DCR_H_
+#define _ASM_POWERPC_FSP_DCR_H_
+#ifdef __KERNEL__
+#include <asm/dcr.h>
+
+#define DCRN_CMU_ADDR 0x00C /* Chip management unic addr */
+#define DCRN_CMU_DATA 0x00D /* Chip management unic data */
+
+/* PLB4 Arbiter */
+#define DCRN_PLB4_PCBI 0x010 /* PLB Crossbar ID/Rev Register */
+#define DCRN_PLB4_P0ACR 0x011 /* PLB0 Arbiter Control Register */
+#define DCRN_PLB4_P0ESRL 0x012 /* PLB0 Error Status Register Low */
+#define DCRN_PLB4_P0ESRH 0x013 /* PLB0 Error Status Register High */
+#define DCRN_PLB4_P0EARL 0x014 /* PLB0 Error Address Register Low */
+#define DCRN_PLB4_P0EARH 0x015 /* PLB0 Error Address Register High */
+#define DCRN_PLB4_P0ESRLS 0x016 /* PLB0 Error Status Register Low Set*/
+#define DCRN_PLB4_P0ESRHS 0x017 /* PLB0 Error Status Register High */
+#define DCRN_PLB4_PCBC 0x018 /* PLB Crossbar Control Register */
+#define DCRN_PLB4_P1ACR 0x019 /* PLB1 Arbiter Control Register */
+#define DCRN_PLB4_P1ESRL 0x01A /* PLB1 Error Status Register Low */
+#define DCRN_PLB4_P1ESRH 0x01B /* PLB1 Error Status Register High */
+#define DCRN_PLB4_P1EARL 0x01C /* PLB1 Error Address Register Low */
+#define DCRN_PLB4_P1EARH 0x01D /* PLB1 Error Address Register High */
+#define DCRN_PLB4_P1ESRLS 0x01E /* PLB1 Error Status Register Low Set*/
+#define DCRN_PLB4_P1ESRHS 0x01F /*PLB1 Error Status Register High Set*/
+
+/* PLB4/OPB bridge 0, 1, 2, 3 */
+#define DCRN_PLB4OPB0_BASE 0x020
+#define DCRN_PLB4OPB1_BASE 0x030
+#define DCRN_PLB4OPB2_BASE 0x040
+#define DCRN_PLB4OPB3_BASE 0x050
+
+#define PLB4OPB_GESR0 0x0 /* Error status 0: Master Dev 0-3 */
+#define PLB4OPB_GEAR 0x2 /* Error Address Register */
+#define PLB4OPB_GEARU 0x3 /* Error Upper Address Register */
+#define PLB4OPB_GESR1 0x4 /* Error Status 1: Master Dev 4-7 */
+#define PLB4OPB_GESR2 0xC /* Error Status 2: Master Dev 8-11 */
+
+/* PLB4-to-AHB Bridge */
+#define DCRN_PLB4AHB_BASE 0x400
+#define DCRN_PLB4AHB_SEUAR (DCRN_PLB4AHB_BASE + 1)
+#define DCRN_PLB4AHB_SELAR (DCRN_PLB4AHB_BASE + 2)
+#define DCRN_PLB4AHB_ESR (DCRN_PLB4AHB_BASE + 3)
+#define DCRN_AHBPLB4_ESR (DCRN_PLB4AHB_BASE + 8)
+#define DCRN_AHBPLB4_EAR (DCRN_PLB4AHB_BASE + 9)
+
+/* PLB6 Controller */
+#define DCRN_PLB6_BASE 0x11111300
+#define DCRN_PLB6_CR0 (DCRN_PLB6_BASE)
+#define DCRN_PLB6_ERR (DCRN_PLB6_BASE + 0x0B)
+#define DCRN_PLB6_HD (DCRN_PLB6_BASE + 0x0E)
+#define DCRN_PLB6_SHD (DCRN_PLB6_BASE + 0x10)
+
+/* PLB4-to-PLB6 Bridge */
+#define DCRN_PLB4PLB6_BASE 0x11111320
+#define DCRN_PLB4PLB6_ESR (DCRN_PLB4PLB6_BASE + 1)
+#define DCRN_PLB4PLB6_EARH (DCRN_PLB4PLB6_BASE + 3)
+#define DCRN_PLB4PLB6_EARL (DCRN_PLB4PLB6_BASE + 4)
+
+/* PLB6-to-PLB4 Bridge */
+#define DCRN_PLB6PLB4_BASE 0x11111350
+#define DCRN_PLB6PLB4_ESR (DCRN_PLB6PLB4_BASE + 1)
+#define DCRN_PLB6PLB4_EARH (DCRN_PLB6PLB4_BASE + 3)
+#define DCRN_PLB6PLB4_EARL (DCRN_PLB6PLB4_BASE + 4)
+
+/* PLB6-to-MCIF Bridge */
+#define DCRN_PLB6MCIF_BASE 0x11111380
+#define DCRN_PLB6MCIF_BESR0 (DCRN_PLB6MCIF_BASE + 0)
+#define DCRN_PLB6MCIF_BESR1 (DCRN_PLB6MCIF_BASE + 1)
+#define DCRN_PLB6MCIF_BEARL (DCRN_PLB6MCIF_BASE + 2)
+#define DCRN_PLB6MCIF_BEARH (DCRN_PLB6MCIF_BASE + 3)
+
+/* Configuration Logic Registers */
+#define DCRN_CONF_BASE 0x11111400
+#define DCRN_CONF_FIR_RWC (DCRN_CONF_BASE + 0x3A)
+#define DCRN_CONF_EIR_RS (DCRN_CONF_BASE + 0x3E)
+#define DCRN_CONF_RPERR0 (DCRN_CONF_BASE + 0x4D)
+#define DCRN_CONF_RPERR1 (DCRN_CONF_BASE + 0x4E)
+
+#define DCRN_L2CDCRAI 0x11111100
+#define DCRN_L2CDCRDI 0x11111104
+/* L2 indirect addresses */
+#define L2MCK 0x120
+#define L2MCKEN 0x130
+#define L2INT 0x150
+#define L2INTEN 0x160
+#define L2LOG0 0x180
+#define L2LOG1 0x184
+#define L2LOG2 0x188
+#define L2LOG3 0x18C
+#define L2LOG4 0x190
+#define L2LOG5 0x194
+#define L2PLBSTAT0 0x300
+#define L2PLBSTAT1 0x304
+#define L2PLBMCKEN0 0x330
+#define L2PLBMCKEN1 0x334
+#define L2PLBINTEN0 0x360
+#define L2PLBINTEN1 0x364
+#define L2ARRSTAT0 0x500
+#define L2ARRSTAT1 0x504
+#define L2ARRSTAT2 0x508
+#define L2ARRMCKEN0 0x530
+#define L2ARRMCKEN1 0x534
+#define L2ARRMCKEN2 0x538
+#define L2ARRINTEN0 0x560
+#define L2ARRINTEN1 0x564
+#define L2ARRINTEN2 0x568
+#define L2CPUSTAT 0x700
+#define L2CPUMCKEN 0x730
+#define L2CPUINTEN 0x760
+#define L2RACSTAT0 0x900
+#define L2RACMCKEN0 0x930
+#define L2RACINTEN0 0x960
+#define L2WACSTAT0 0xD00
+#define L2WACSTAT1 0xD04
+#define L2WACSTAT2 0xD08
+#define L2WACMCKEN0 0xD30
+#define L2WACMCKEN1 0xD34
+#define L2WACMCKEN2 0xD38
+#define L2WACINTEN0 0xD60
+#define L2WACINTEN1 0xD64
+#define L2WACINTEN2 0xD68
+#define L2WDFSTAT 0xF00
+#define L2WDFMCKEN 0xF30
+#define L2WDFINTEN 0xF60
+
+/* DDR3/4 Memory Controller */
+#define DCRN_DDR34_BASE 0x11120000
+#define DCRN_DDR34_MCSTAT 0x10
+#define DCRN_DDR34_MCOPT1 0x20
+#define DCRN_DDR34_MCOPT2 0x21
+#define DCRN_DDR34_PHYSTAT 0x32
+#define DCRN_DDR34_CFGR0 0x40
+#define DCRN_DDR34_CFGR1 0x41
+#define DCRN_DDR34_CFGR2 0x42
+#define DCRN_DDR34_CFGR3 0x43
+#define DCRN_DDR34_SCRUB_CNTL 0xAA
+#define DCRN_DDR34_SCRUB_INT 0xAB
+#define DCRN_DDR34_SCRUB_START_ADDR 0xB0
+#define DCRN_DDR34_SCRUB_END_ADDR 0xD0
+#define DCRN_DDR34_ECCERR_ADDR_PORT0 0xE0
+#define DCRN_DDR34_ECCERR_ADDR_PORT1 0xE1
+#define DCRN_DDR34_ECCERR_ADDR_PORT2 0xE2
+#define DCRN_DDR34_ECCERR_ADDR_PORT3 0xE3
+#define DCRN_DDR34_ECCERR_COUNT_PORT0 0xE4
+#define DCRN_DDR34_ECCERR_COUNT_PORT1 0xE5
+#define DCRN_DDR34_ECCERR_COUNT_PORT2 0xE6
+#define DCRN_DDR34_ECCERR_COUNT_PORT3 0xE7
+#define DCRN_DDR34_ECCERR_PORT0 0xF0
+#define DCRN_DDR34_ECCERR_PORT1 0xF2
+#define DCRN_DDR34_ECCERR_PORT2 0xF4
+#define DCRN_DDR34_ECCERR_PORT3 0xF6
+#define DCRN_DDR34_ECC_CHECK_PORT0 0xF8
+#define DCRN_DDR34_ECC_CHECK_PORT1 0xF9
+#define DCRN_DDR34_ECC_CHECK_PORT2 0xF9
+#define DCRN_DDR34_ECC_CHECK_PORT3 0xFB
+
+#define DDR34_SCRUB_CNTL_STOP 0x00000000
+#define DDR34_SCRUB_CNTL_SCRUB 0x80000000
+#define DDR34_SCRUB_CNTL_UE_STOP 0x20000000
+#define DDR34_SCRUB_CNTL_CE_STOP 0x10000000
+#define DDR34_SCRUB_CNTL_RANK_EN 0x00008000
+
+/* PLB-Attached DDR3/4 Core Wrapper */
+#define DCRN_CW_BASE 0x11111800
+#define DCRN_CW_MCER0 0x00
+#define DCRN_CW_MCER1 0x01
+#define DCRN_CW_MCER_AND0 0x02
+#define DCRN_CW_MCER_AND1 0x03
+#define DCRN_CW_MCER_OR0 0x04
+#define DCRN_CW_MCER_OR1 0x05
+#define DCRN_CW_MCER_MASK0 0x06
+#define DCRN_CW_MCER_MASK1 0x07
+#define DCRN_CW_MCER_MASK_AND0 0x08
+#define DCRN_CW_MCER_MASK_AND1 0x09
+#define DCRN_CW_MCER_MASK_OR0 0x0A
+#define DCRN_CW_MCER_MASK_OR1 0x0B
+#define DCRN_CW_MCER_ACTION0 0x0C
+#define DCRN_CW_MCER_ACTION1 0x0D
+#define DCRN_CW_MCER_WOF0 0x0E
+#define DCRN_CW_MCER_WOF1 0x0F
+#define DCRN_CW_LFIR 0x10
+#define DCRN_CW_LFIR_AND 0x11
+#define DCRN_CW_LFIR_OR 0x12
+#define DCRN_CW_LFIR_MASK 0x13
+#define DCRN_CW_LFIR_MASK_AND 0x14
+#define DCRN_CW_LFIR_MASK_OR 0x15
+
+#define CW_MCER0_MEM_CE 0x00020000
+/* CMU addresses */
+#define CMUN_CRCS 0x00 /* Chip Reset Control/Status */
+#define CMUN_CONFFIR0 0x20 /* Config Reg Parity FIR 0 */
+#define CMUN_CONFFIR1 0x21 /* Config Reg Parity FIR 1 */
+#define CMUN_CONFFIR2 0x22 /* Config Reg Parity FIR 2 */
+#define CMUN_CONFFIR3 0x23 /* Config Reg Parity FIR 3 */
+#define CMUN_URCR3_RS 0x24 /* Unit Reset Control Reg 3 Set */
+#define CMUN_URCR3_C 0x25 /* Unit Reset Control Reg 3 Clear */
+#define CMUN_URCR3_P 0x26 /* Unit Reset Control Reg 3 Pulse */
+#define CMUN_PW0 0x2C /* Pulse Width Register */
+#define CMUN_URCR0_P 0x2D /* Unit Reset Control Reg 0 Pulse */
+#define CMUN_URCR1_P 0x2E /* Unit Reset Control Reg 1 Pulse */
+#define CMUN_URCR2_P 0x2F /* Unit Reset Control Reg 2 Pulse */
+#define CMUN_CLS_RW 0x30 /* Code Load Status (Read/Write) */
+#define CMUN_CLS_S 0x31 /* Code Load Status (Set) */
+#define CMUN_CLS_C 0x32 /* Code Load Status (Clear */
+#define CMUN_URCR2_RS 0x33 /* Unit Reset Control Reg 2 Set */
+#define CMUN_URCR2_C 0x34 /* Unit Reset Control Reg 2 Clear */
+#define CMUN_CLKEN0 0x35 /* Clock Enable 0 */
+#define CMUN_CLKEN1 0x36 /* Clock Enable 1 */
+#define CMUN_PCD0 0x37 /* PSI clock divider 0 */
+#define CMUN_PCD1 0x38 /* PSI clock divider 1 */
+#define CMUN_TMR0 0x39 /* Reset Timer */
+#define CMUN_TVS0 0x3A /* TV Sense Reg 0 */
+#define CMUN_TVS1 0x3B /* TV Sense Reg 1 */
+#define CMUN_MCCR 0x3C /* DRAM Configuration Reg */
+#define CMUN_FIR0 0x3D /* Fault Isolation Reg 0 */
+#define CMUN_FMR0 0x3E /* FIR Mask Reg 0 */
+#define CMUN_ETDRB 0x3F /* ETDR Backdoor */
+
+/* CRCS bit fields */
+#define CRCS_STAT_MASK 0xF0000000
+#define CRCS_STAT_POR 0x10000000
+#define CRCS_STAT_PHR 0x20000000
+#define CRCS_STAT_PCIE 0x30000000
+#define CRCS_STAT_CRCS_SYS 0x40000000
+#define CRCS_STAT_DBCR_SYS 0x50000000
+#define CRCS_STAT_HOST_SYS 0x60000000
+#define CRCS_STAT_CHIP_RST_B 0x70000000
+#define CRCS_STAT_CRCS_CHIP 0x80000000
+#define CRCS_STAT_DBCR_CHIP 0x90000000
+#define CRCS_STAT_HOST_CHIP 0xA0000000
+#define CRCS_STAT_PSI_CHIP 0xB0000000
+#define CRCS_STAT_CRCS_CORE 0xC0000000
+#define CRCS_STAT_DBCR_CORE 0xD0000000
+#define CRCS_STAT_HOST_CORE 0xE0000000
+#define CRCS_STAT_PCIE_HOT 0xF0000000
+#define CRCS_STAT_SELF_CORE 0x40000000
+#define CRCS_STAT_SELF_CHIP 0x50000000
+#define CRCS_WATCHE 0x08000000
+#define CRCS_CORE 0x04000000 /* Reset PPC440 core */
+#define CRCS_CHIP 0x02000000 /* Chip Reset */
+#define CRCS_SYS 0x01000000 /* System Reset */
+#define CRCS_WRCR 0x00800000 /* Watchdog reset on core reset */
+#define CRCS_EXTCR 0x00080000 /* CHIP_RST_B triggers chip reset */
+#define CRCS_PLOCK 0x00000002 /* PLL Locked */
+
+#define mtcmu(reg, data) \
+do { \
+ mtdcr(DCRN_CMU_ADDR, reg); \
+ mtdcr(DCRN_CMU_DATA, data); \
+} while (0)
+
+#define mfcmu(reg)\
+ ({u32 data; \
+ mtdcr(DCRN_CMU_ADDR, reg); \
+ data = mfdcr(DCRN_CMU_DATA); \
+ data; })
+
+#define mtl2(reg, data) \
+do { \
+ mtdcr(DCRN_L2CDCRAI, reg); \
+ mtdcr(DCRN_L2CDCRDI, data); \
+} while (0)
+
+#define mfl2(reg) \
+ ({u32 data; \
+ mtdcr(DCRN_L2CDCRAI, reg); \
+ data = mfdcr(DCRN_L2CDCRDI); \
+ data; })
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_FSP2_DCR_H_ */
diff --git a/arch/powerpc/sysdev/ppc4xx_gpio.c b/arch/powerpc/platforms/44x/gpio.c
index fc65ad1b3293..aea0d913b59d 100644
--- a/arch/powerpc/sysdev/ppc4xx_gpio.c
+++ b/arch/powerpc/platforms/44x/gpio.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* PPC4xx gpio driver
*
@@ -6,19 +7,6 @@
* Copyright (c) MontaVista Software, Inc. 2008.
*
* Author: Steve Falco <sfalco@harris.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/kernel.h>
@@ -26,10 +14,10 @@
#include <linux/spinlock.h>
#include <linux/io.h>
#include <linux/of.h>
-#include <linux/of_gpio.h>
-#include <linux/gpio.h>
+#include <linux/gpio/driver.h>
#include <linux/types.h>
#include <linux/slab.h>
+#include <linux/platform_device.h>
#define GPIO_MASK(gpio) (0x80000000 >> (gpio))
#define GPIO_MASK2(gpio) (0xc0000000 >> ((gpio) * 2))
@@ -57,7 +45,8 @@ struct ppc4xx_gpio {
};
struct ppc4xx_gpio_chip {
- struct of_mm_gpio_chip mm_gc;
+ struct gpio_chip gc;
+ void __iomem *regs;
spinlock_t lock;
};
@@ -67,25 +56,19 @@ struct ppc4xx_gpio_chip {
* There are a maximum of 32 gpios in each gpio controller.
*/
-static inline struct ppc4xx_gpio_chip *
-to_ppc4xx_gpiochip(struct of_mm_gpio_chip *mm_gc)
-{
- return container_of(mm_gc, struct ppc4xx_gpio_chip, mm_gc);
-}
-
static int ppc4xx_gpio_get(struct gpio_chip *gc, unsigned int gpio)
{
- struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
- struct ppc4xx_gpio __iomem *regs = mm_gc->regs;
+ struct ppc4xx_gpio_chip *chip = gpiochip_get_data(gc);
+ struct ppc4xx_gpio __iomem *regs = chip->regs;
- return in_be32(&regs->ir) & GPIO_MASK(gpio);
+ return !!(in_be32(&regs->ir) & GPIO_MASK(gpio));
}
static inline void
__ppc4xx_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
{
- struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
- struct ppc4xx_gpio __iomem *regs = mm_gc->regs;
+ struct ppc4xx_gpio_chip *chip = gpiochip_get_data(gc);
+ struct ppc4xx_gpio __iomem *regs = chip->regs;
if (val)
setbits32(&regs->or, GPIO_MASK(gpio));
@@ -93,11 +76,9 @@ __ppc4xx_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
clrbits32(&regs->or, GPIO_MASK(gpio));
}
-static void
-ppc4xx_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
+static int ppc4xx_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
{
- struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
- struct ppc4xx_gpio_chip *chip = to_ppc4xx_gpiochip(mm_gc);
+ struct ppc4xx_gpio_chip *chip = gpiochip_get_data(gc);
unsigned long flags;
spin_lock_irqsave(&chip->lock, flags);
@@ -107,13 +88,14 @@ ppc4xx_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
spin_unlock_irqrestore(&chip->lock, flags);
pr_debug("%s: gpio: %d val: %d\n", __func__, gpio, val);
+
+ return 0;
}
static int ppc4xx_gpio_dir_in(struct gpio_chip *gc, unsigned int gpio)
{
- struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
- struct ppc4xx_gpio_chip *chip = to_ppc4xx_gpiochip(mm_gc);
- struct ppc4xx_gpio __iomem *regs = mm_gc->regs;
+ struct ppc4xx_gpio_chip *chip = gpiochip_get_data(gc);
+ struct ppc4xx_gpio __iomem *regs = chip->regs;
unsigned long flags;
spin_lock_irqsave(&chip->lock, flags);
@@ -141,9 +123,8 @@ static int ppc4xx_gpio_dir_in(struct gpio_chip *gc, unsigned int gpio)
static int
ppc4xx_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
{
- struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
- struct ppc4xx_gpio_chip *chip = to_ppc4xx_gpiochip(mm_gc);
- struct ppc4xx_gpio __iomem *regs = mm_gc->regs;
+ struct ppc4xx_gpio_chip *chip = gpiochip_get_data(gc);
+ struct ppc4xx_gpio __iomem *regs = chip->regs;
unsigned long flags;
spin_lock_irqsave(&chip->lock, flags);
@@ -173,43 +154,57 @@ ppc4xx_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
return 0;
}
-static int __init ppc4xx_add_gpiochips(void)
+static int ppc4xx_gpio_probe(struct platform_device *ofdev)
{
- struct device_node *np;
-
- for_each_compatible_node(np, NULL, "ibm,ppc4xx-gpio") {
- int ret;
- struct ppc4xx_gpio_chip *ppc4xx_gc;
- struct of_mm_gpio_chip *mm_gc;
- struct gpio_chip *gc;
-
- ppc4xx_gc = kzalloc(sizeof(*ppc4xx_gc), GFP_KERNEL);
- if (!ppc4xx_gc) {
- ret = -ENOMEM;
- goto err;
- }
-
- spin_lock_init(&ppc4xx_gc->lock);
-
- mm_gc = &ppc4xx_gc->mm_gc;
- gc = &mm_gc->gc;
-
- gc->ngpio = 32;
- gc->direction_input = ppc4xx_gpio_dir_in;
- gc->direction_output = ppc4xx_gpio_dir_out;
- gc->get = ppc4xx_gpio_get;
- gc->set = ppc4xx_gpio_set;
-
- ret = of_mm_gpiochip_add(np, mm_gc);
- if (ret)
- goto err;
- continue;
-err:
- pr_err("%s: registration failed with status %d\n",
- np->full_name, ret);
- kfree(ppc4xx_gc);
- /* try others anyway */
- }
- return 0;
+ struct device *dev = &ofdev->dev;
+ struct device_node *np = dev->of_node;
+ struct ppc4xx_gpio_chip *chip;
+ struct gpio_chip *gc;
+
+ chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL);
+ if (!chip)
+ return -ENOMEM;
+
+ spin_lock_init(&chip->lock);
+
+ gc = &chip->gc;
+
+ gc->base = -1;
+ gc->ngpio = 32;
+ gc->direction_input = ppc4xx_gpio_dir_in;
+ gc->direction_output = ppc4xx_gpio_dir_out;
+ gc->get = ppc4xx_gpio_get;
+ gc->set = ppc4xx_gpio_set;
+
+ gc->label = devm_kasprintf(dev, GFP_KERNEL, "%pOF", np);
+ if (!gc->label)
+ return -ENOMEM;
+
+ chip->regs = devm_of_iomap(dev, np, 0, NULL);
+ if (IS_ERR(chip->regs))
+ return PTR_ERR(chip->regs);
+
+ return devm_gpiochip_add_data(dev, gc, chip);
+}
+
+static const struct of_device_id ppc4xx_gpio_match[] = {
+ {
+ .compatible = "ibm,ppc4xx-gpio",
+ },
+ {},
+};
+MODULE_DEVICE_TABLE(of, ppc4xx_gpio_match);
+
+static struct platform_driver ppc4xx_gpio_driver = {
+ .probe = ppc4xx_gpio_probe,
+ .driver = {
+ .name = "ppc4xx-gpio",
+ .of_match_table = ppc4xx_gpio_match,
+ },
+};
+
+static int __init ppc4xx_gpio_init(void)
+{
+ return platform_driver_register(&ppc4xx_gpio_driver);
}
-arch_initcall(ppc4xx_add_gpiochips);
+arch_initcall(ppc4xx_gpio_init);
diff --git a/arch/powerpc/sysdev/ppc4xx_hsta_msi.c b/arch/powerpc/platforms/44x/hsta_msi.c
index 11c888416f0a..c6bd846b0d65 100644
--- a/arch/powerpc/sysdev/ppc4xx_hsta_msi.c
+++ b/arch/powerpc/platforms/44x/hsta_msi.c
@@ -1,23 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* MSI support for PPC4xx SoCs using High Speed Transfer Assist (HSTA) for
* generation of the interrupt.
*
* Copyright © 2013 Alistair Popple <alistair@popple.id.au> IBM Corporation
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#include <linux/kernel.h>
#include <linux/interrupt.h>
#include <linux/msi.h>
#include <linux/of.h>
-#include <linux/of_platform.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
#include <linux/pci.h>
#include <linux/semaphore.h>
#include <asm/msi_bitmap.h>
+#include <asm/ppc-pci.h>
struct ppc4xx_hsta_msi {
struct device *dev;
@@ -44,7 +42,13 @@ static int hsta_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
int irq, hwirq;
u64 addr;
- list_for_each_entry(entry, &dev->msi_list, list) {
+ /* We don't support MSI-X */
+ if (type == PCI_CAP_ID_MSIX) {
+ pr_debug("%s: MSI-X not supported.\n", __func__);
+ return -EINVAL;
+ }
+
+ msi_for_each_desc(entry, &dev->dev, MSI_DESC_NOTASSOCIATED) {
irq = msi_bitmap_alloc_hwirqs(&ppc4xx_hsta_msi.bmp, 1);
if (irq < 0) {
pr_debug("%s: Failed to allocate msi interrupt\n",
@@ -53,7 +57,7 @@ static int hsta_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
}
hwirq = ppc4xx_hsta_msi.irq_map[irq];
- if (hwirq == NO_IRQ) {
+ if (!hwirq) {
pr_err("%s: Failed mapping irq %d\n", __func__, irq);
return -EINVAL;
}
@@ -79,7 +83,7 @@ static int hsta_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
msi_bitmap_free_hwirqs(&ppc4xx_hsta_msi.bmp, irq, 1);
return -EINVAL;
}
- write_msi_msg(hwirq, &msg);
+ pci_write_msi_msg(hwirq, &msg);
}
return 0;
@@ -102,10 +106,7 @@ static void hsta_teardown_msi_irqs(struct pci_dev *dev)
struct msi_desc *entry;
int irq;
- list_for_each_entry(entry, &dev->msi_list, list) {
- if (entry->irq == NO_IRQ)
- continue;
-
+ msi_for_each_desc(entry, &dev->dev, MSI_DESC_ASSOCIATED) {
irq = hsta_find_hwirq_offset(entry->irq);
/* entry->irq should always be in irq_map */
@@ -114,28 +115,19 @@ static void hsta_teardown_msi_irqs(struct pci_dev *dev)
msi_bitmap_free_hwirqs(&ppc4xx_hsta_msi.bmp, irq, 1);
pr_debug("%s: Teardown IRQ %u (index %u)\n", __func__,
entry->irq, irq);
+ entry->irq = 0;
}
}
-static int hsta_msi_check_device(struct pci_dev *pdev, int nvec, int type)
-{
- /* We don't support MSI-X */
- if (type == PCI_CAP_ID_MSIX) {
- pr_debug("%s: MSI-X not supported.\n", __func__);
- return -EINVAL;
- }
-
- return 0;
-}
-
static int hsta_msi_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct resource *mem;
int irq, ret, irq_count;
+ struct pci_controller *phb;
mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (IS_ERR(mem)) {
+ if (!mem) {
dev_err(dev, "Unable to get mmio space\n");
return -EINVAL;
}
@@ -150,7 +142,7 @@ static int hsta_msi_probe(struct platform_device *pdev)
ppc4xx_hsta_msi.address = mem->start;
ppc4xx_hsta_msi.data = ioremap(mem->start, resource_size(mem));
ppc4xx_hsta_msi.irq_count = irq_count;
- if (IS_ERR(ppc4xx_hsta_msi.data)) {
+ if (!ppc4xx_hsta_msi.data) {
dev_err(dev, "Unable to map memory\n");
return -ENOMEM;
}
@@ -159,8 +151,9 @@ static int hsta_msi_probe(struct platform_device *pdev)
if (ret)
goto out;
- ppc4xx_hsta_msi.irq_map = kmalloc(sizeof(int) * irq_count, GFP_KERNEL);
- if (IS_ERR(ppc4xx_hsta_msi.irq_map)) {
+ ppc4xx_hsta_msi.irq_map = kmalloc_array(irq_count, sizeof(int),
+ GFP_KERNEL);
+ if (!ppc4xx_hsta_msi.irq_map) {
ret = -ENOMEM;
goto out1;
}
@@ -169,16 +162,17 @@ static int hsta_msi_probe(struct platform_device *pdev)
for (irq = 0; irq < irq_count; irq++) {
ppc4xx_hsta_msi.irq_map[irq] =
irq_of_parse_and_map(dev->of_node, irq);
- if (ppc4xx_hsta_msi.irq_map[irq] == NO_IRQ) {
+ if (!ppc4xx_hsta_msi.irq_map[irq]) {
dev_err(dev, "Unable to map IRQ\n");
ret = -EINVAL;
goto out2;
}
}
- ppc_md.setup_msi_irqs = hsta_setup_msi_irqs;
- ppc_md.teardown_msi_irqs = hsta_teardown_msi_irqs;
- ppc_md.msi_check_device = hsta_msi_check_device;
+ list_for_each_entry(phb, &hose_list, list_node) {
+ phb->controller_ops.setup_msi_irqs = hsta_setup_msi_irqs;
+ phb->controller_ops.teardown_msi_irqs = hsta_teardown_msi_irqs;
+ }
return 0;
out2:
@@ -203,7 +197,6 @@ static struct platform_driver hsta_msi_driver = {
.probe = hsta_msi_probe,
.driver = {
.name = "hsta-msi",
- .owner = THIS_MODULE,
.of_match_table = hsta_msi_ids,
},
};
diff --git a/arch/powerpc/platforms/44x/idle.c b/arch/powerpc/platforms/44x/idle.c
index 7a81f921fef9..e2eeef8dff78 100644
--- a/arch/powerpc/platforms/44x/idle.c
+++ b/arch/powerpc/platforms/44x/idle.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright 2008 IBM Corp.
*
@@ -5,20 +6,6 @@
* Copyright (C) 2006-2007 PA Semi, Inc
*
* Added by: Jerone Young <jyoung5@us.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
*/
#include <linux/of.h>
@@ -40,7 +27,7 @@ static void ppc44x_idle(void)
isync();
}
-int __init ppc44x_idle_init(void)
+static int __init ppc44x_idle_init(void)
{
if (!mode_spin) {
/* If we are not setting spin mode
diff --git a/arch/powerpc/platforms/44x/iss4xx.c b/arch/powerpc/platforms/44x/iss4xx.c
index 4241bc825800..ef883d97fe15 100644
--- a/arch/powerpc/platforms/44x/iss4xx.c
+++ b/arch/powerpc/platforms/44x/iss4xx.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* PPC476 board specific routines
*
@@ -12,11 +13,6 @@
*
* Rewritten and ported to the merged powerpc tree:
* Copyright 2007 David Gibson <dwg@au1.ibm.com>, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#include <linux/init.h>
@@ -32,7 +28,7 @@
#include <asm/mpic.h>
#include <asm/mmu.h>
-static __initdata struct of_device_id iss4xx_of_bus[] = {
+static const struct of_device_id iss4xx_of_bus[] __initconst = {
{ .compatible = "ibm,plb4", },
{ .compatible = "ibm,plb6", },
{ .compatible = "ibm,opb", },
@@ -56,7 +52,7 @@ static void __init iss4xx_init_irq(void)
/* Find top level interrupt controller */
for_each_node_with_property(np, "interrupt-controller") {
- if (of_get_property(np, "interrupts", NULL) == NULL)
+ if (!of_property_present(np, "interrupts"))
break;
}
if (np == NULL)
@@ -144,25 +140,11 @@ static void __init iss4xx_setup_arch(void)
iss4xx_smp_init();
}
-/*
- * Called very early, MMU is off, device-tree isn't unflattened
- */
-static int __init iss4xx_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- if (!of_flat_dt_is_compatible(root, "ibm,iss-4xx"))
- return 0;
-
- return 1;
-}
-
define_machine(iss4xx) {
.name = "ISS-4xx",
- .probe = iss4xx_probe,
+ .compatible = "ibm,iss-4xx",
.progress = udbg_progress,
.init_IRQ = iss4xx_init_irq,
.setup_arch = iss4xx_setup_arch,
.restart = ppc4xx_reset_system,
- .calibrate_decr = generic_calibrate_decr,
};
diff --git a/arch/powerpc/platforms/44x/machine_check.c b/arch/powerpc/platforms/44x/machine_check.c
new file mode 100644
index 000000000000..85ff33a8d9b6
--- /dev/null
+++ b/arch/powerpc/platforms/44x/machine_check.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ */
+
+#include <linux/kernel.h>
+#include <linux/printk.h>
+#include <linux/ptrace.h>
+
+#include <asm/reg.h>
+#include <asm/cacheflush.h>
+
+int machine_check_4xx(struct pt_regs *regs)
+{
+ unsigned long reason = regs->esr;
+
+ if (reason & ESR_IMCP) {
+ printk("Instruction");
+ mtspr(SPRN_ESR, reason & ~ESR_IMCP);
+ } else
+ printk("Data");
+
+ printk(" machine check in kernel mode.\n");
+
+ return 0;
+}
+
+int machine_check_440A(struct pt_regs *regs)
+{
+ unsigned long reason = regs->esr;
+
+ printk("Machine check in kernel mode.\n");
+ if (reason & ESR_IMCP){
+ printk("Instruction Synchronous Machine Check exception\n");
+ mtspr(SPRN_ESR, reason & ~ESR_IMCP);
+ }
+ else {
+ u32 mcsr = mfspr(SPRN_MCSR);
+ if (mcsr & MCSR_IB)
+ printk("Instruction Read PLB Error\n");
+ if (mcsr & MCSR_DRB)
+ printk("Data Read PLB Error\n");
+ if (mcsr & MCSR_DWB)
+ printk("Data Write PLB Error\n");
+ if (mcsr & MCSR_TLBP)
+ printk("TLB Parity Error\n");
+ if (mcsr & MCSR_ICP){
+ flush_instruction_cache();
+ printk("I-Cache Parity Error\n");
+ }
+ if (mcsr & MCSR_DCSP)
+ printk("D-Cache Search Parity Error\n");
+ if (mcsr & MCSR_DCFP)
+ printk("D-Cache Flush Parity Error\n");
+ if (mcsr & MCSR_IMPE)
+ printk("Machine Check exception is imprecise\n");
+
+ /* Clear MCSR */
+ mtspr(SPRN_MCSR, mcsr);
+ }
+ return 0;
+}
+
+#ifdef CONFIG_PPC_47x
+int machine_check_47x(struct pt_regs *regs)
+{
+ unsigned long reason = regs->esr;
+ u32 mcsr;
+
+ printk(KERN_ERR "Machine check in kernel mode.\n");
+ if (reason & ESR_IMCP) {
+ printk(KERN_ERR "Instruction Synchronous Machine Check exception\n");
+ mtspr(SPRN_ESR, reason & ~ESR_IMCP);
+ return 0;
+ }
+ mcsr = mfspr(SPRN_MCSR);
+ if (mcsr & MCSR_IB)
+ printk(KERN_ERR "Instruction Read PLB Error\n");
+ if (mcsr & MCSR_DRB)
+ printk(KERN_ERR "Data Read PLB Error\n");
+ if (mcsr & MCSR_DWB)
+ printk(KERN_ERR "Data Write PLB Error\n");
+ if (mcsr & MCSR_TLBP)
+ printk(KERN_ERR "TLB Parity Error\n");
+ if (mcsr & MCSR_ICP) {
+ flush_instruction_cache();
+ printk(KERN_ERR "I-Cache Parity Error\n");
+ }
+ if (mcsr & MCSR_DCSP)
+ printk(KERN_ERR "D-Cache Search Parity Error\n");
+ if (mcsr & PPC47x_MCSR_GPR)
+ printk(KERN_ERR "GPR Parity Error\n");
+ if (mcsr & PPC47x_MCSR_FPR)
+ printk(KERN_ERR "FPR Parity Error\n");
+ if (mcsr & PPC47x_MCSR_IPR)
+ printk(KERN_ERR "Machine Check exception is imprecise\n");
+
+ /* Clear MCSR */
+ mtspr(SPRN_MCSR, mcsr);
+
+ return 0;
+}
+#endif /* CONFIG_PPC_47x */
diff --git a/arch/powerpc/platforms/44x/misc_44x.S b/arch/powerpc/platforms/44x/misc_44x.S
index dc12b8009e48..3a0c4bd3d6bf 100644
--- a/arch/powerpc/platforms/44x/misc_44x.S
+++ b/arch/powerpc/platforms/44x/misc_44x.S
@@ -1,12 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* This file contains miscellaneous low-level functions for PPC 44x.
* Copyright 2007 David Gibson <dwg@au1.ibm.com>, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
#include <asm/reg.h>
diff --git a/arch/powerpc/sysdev/ppc4xx_pci.c b/arch/powerpc/platforms/44x/pci.c
index df6e2fc4ff92..364aeb86ab64 100644
--- a/arch/powerpc/sysdev/ppc4xx_pci.c
+++ b/arch/powerpc/platforms/44x/pci.c
@@ -22,7 +22,7 @@
#include <linux/pci.h>
#include <linux/init.h>
#include <linux/of.h>
-#include <linux/bootmem.h>
+#include <linux/of_address.h>
#include <linux/delay.h>
#include <linux/slab.h>
@@ -33,7 +33,7 @@
#include <asm/dcr-regs.h>
#include <mm/mmu_decl.h>
-#include "ppc4xx_pci.h"
+#include "pci.h"
static int dma_offset_set;
@@ -57,7 +57,7 @@ static inline int ppc440spe_revA(void)
static void fixup_ppc4xx_pci_bridge(struct pci_dev *dev)
{
struct pci_controller *hose;
- int i;
+ struct resource *r;
if (dev->devfn != 0 || dev->bus->self != NULL)
return;
@@ -79,9 +79,9 @@ static void fixup_ppc4xx_pci_bridge(struct pci_dev *dev)
/* Hide the PCI host BARs from the kernel as their content doesn't
* fit well in the resource management
*/
- for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
- dev->resource[i].start = dev->resource[i].end = 0;
- dev->resource[i].flags = 0;
+ pci_dev_for_each_resource(dev, r) {
+ r->start = r->end = 0;
+ r->flags = 0;
}
printk(KERN_INFO "PCI: Hiding 4xx host bridge resources %s\n",
@@ -94,10 +94,8 @@ static int __init ppc4xx_parse_dma_ranges(struct pci_controller *hose,
struct resource *res)
{
u64 size;
- const u32 *ranges;
- int rlen;
- int pna = of_n_addr_cells(hose->dn);
- int np = pna + 5;
+ struct of_range_parser parser;
+ struct of_range range;
/* Default */
res->start = 0;
@@ -105,18 +103,15 @@ static int __init ppc4xx_parse_dma_ranges(struct pci_controller *hose,
res->end = size - 1;
res->flags = IORESOURCE_MEM | IORESOURCE_PREFETCH;
- /* Get dma-ranges property */
- ranges = of_get_property(hose->dn, "dma-ranges", &rlen);
- if (ranges == NULL)
+ if (of_pci_dma_range_parser_init(&parser, hose->dn))
goto out;
- /* Walk it */
- while ((rlen -= np * 4) >= 0) {
- u32 pci_space = ranges[0];
- u64 pci_addr = of_read_number(ranges + 1, 2);
- u64 cpu_addr = of_translate_dma_address(hose->dn, ranges + 3);
- size = of_read_number(ranges + pna + 3, 2);
- ranges += np;
+ for_each_of_range(&parser, &range) {
+ u32 pci_space = range.flags;
+ u64 pci_addr = range.bus_addr;
+ u64 cpu_addr = range.cpu_addr;
+ size = range.size;
+
if (cpu_addr == OF_BAD_ADDR || size == 0)
continue;
@@ -128,9 +123,9 @@ static int __init ppc4xx_parse_dma_ranges(struct pci_controller *hose,
* within 32 bits space
*/
if (cpu_addr != 0 || pci_addr > 0xffffffff) {
- printk(KERN_WARNING "%s: Ignored unsupported dma range"
+ printk(KERN_WARNING "%pOF: Ignored unsupported dma range"
" 0x%016llx...0x%016llx -> 0x%016llx\n",
- hose->dn->full_name,
+ hose->dn,
pci_addr, pci_addr + size - 1, cpu_addr);
continue;
}
@@ -153,8 +148,7 @@ static int __init ppc4xx_parse_dma_ranges(struct pci_controller *hose,
/* We only support one global DMA offset */
if (dma_offset_set && pci_dram_offset != res->start) {
- printk(KERN_ERR "%s: dma-ranges(s) mismatch\n",
- hose->dn->full_name);
+ printk(KERN_ERR "%pOF: dma-ranges(s) mismatch\n", hose->dn);
return -ENXIO;
}
@@ -162,17 +156,16 @@ static int __init ppc4xx_parse_dma_ranges(struct pci_controller *hose,
* DMA bounce buffers
*/
if (size < total_memory) {
- printk(KERN_ERR "%s: dma-ranges too small "
+ printk(KERN_ERR "%pOF: dma-ranges too small "
"(size=%llx total_memory=%llx)\n",
- hose->dn->full_name, size, (u64)total_memory);
+ hose->dn, size, (u64)total_memory);
return -ENXIO;
}
/* Check we are a power of 2 size and that base is a multiple of size*/
if ((size & (size - 1)) != 0 ||
(res->start & (size - 1)) != 0) {
- printk(KERN_ERR "%s: dma-ranges unaligned\n",
- hose->dn->full_name);
+ printk(KERN_ERR "%pOF: dma-ranges unaligned\n", hose->dn);
return -ENXIO;
}
@@ -182,8 +175,8 @@ static int __init ppc4xx_parse_dma_ranges(struct pci_controller *hose,
if (res->end > 0xffffffff &&
!(of_device_is_compatible(hose->dn, "ibm,plb-pciex-460sx")
|| of_device_is_compatible(hose->dn, "ibm,plb-pciex-476fpe"))) {
- printk(KERN_ERR "%s: dma-ranges outside of 32 bits space\n",
- hose->dn->full_name);
+ printk(KERN_ERR "%pOF: dma-ranges outside of 32 bits space\n",
+ hose->dn);
return -ENXIO;
}
out:
@@ -234,8 +227,7 @@ static int __init ppc4xx_setup_one_pci_PMM(struct pci_controller *hose,
*/
if ((plb_addr + size) > 0xffffffffull || !is_power_of_2(size) ||
size < 0x1000 || (plb_addr & (size - 1)) != 0) {
- printk(KERN_WARNING "%s: Resource out of range\n",
- hose->dn->full_name);
+ printk(KERN_WARNING "%pOF: Resource out of range\n", hose->dn);
return -1;
}
ma = (0xffffffffu << ilog2(size)) | 1;
@@ -267,8 +259,7 @@ static void __init ppc4xx_configure_pci_PMMs(struct pci_controller *hose,
if (!(res->flags & IORESOURCE_MEM))
continue;
if (j > 2) {
- printk(KERN_WARNING "%s: Too many ranges\n",
- hose->dn->full_name);
+ printk(KERN_WARNING "%pOF: Too many ranges\n", hose->dn);
break;
}
@@ -293,8 +284,8 @@ static void __init ppc4xx_configure_pci_PMMs(struct pci_controller *hose,
if (j <= 2 && !found_isa_hole && hose->isa_mem_size)
if (ppc4xx_setup_one_pci_PMM(hose, reg, hose->isa_mem_phys, 0,
hose->isa_mem_size, 0, j) == 0)
- printk(KERN_INFO "%s: Legacy ISA memory support enabled\n",
- hose->dn->full_name);
+ printk(KERN_INFO "%pOF: Legacy ISA memory support enabled\n",
+ hose->dn);
}
static void __init ppc4xx_configure_pci_PTMs(struct pci_controller *hose,
@@ -334,26 +325,25 @@ static void __init ppc4xx_probe_pci_bridge(struct device_node *np)
/* Check if device is enabled */
if (!of_device_is_available(np)) {
- printk(KERN_INFO "%s: Port disabled via device-tree\n",
- np->full_name);
+ printk(KERN_INFO "%pOF: Port disabled via device-tree\n", np);
return;
}
/* Fetch config space registers address */
if (of_address_to_resource(np, 0, &rsrc_cfg)) {
- printk(KERN_ERR "%s: Can't get PCI config register base !",
- np->full_name);
+ printk(KERN_ERR "%pOF: Can't get PCI config register base !",
+ np);
return;
}
/* Fetch host bridge internal registers address */
if (of_address_to_resource(np, 3, &rsrc_reg)) {
- printk(KERN_ERR "%s: Can't get PCI internal register base !",
- np->full_name);
+ printk(KERN_ERR "%pOF: Can't get PCI internal register base !",
+ np);
return;
}
/* Check if primary bridge */
- if (of_get_property(np, "primary", NULL))
+ if (of_property_read_bool(np, "primary"))
primary = 1;
/* Get bus range if any */
@@ -362,7 +352,7 @@ static void __init ppc4xx_probe_pci_bridge(struct device_node *np)
/* Map registers */
reg = ioremap(rsrc_reg.start, resource_size(&rsrc_reg));
if (reg == NULL) {
- printk(KERN_ERR "%s: Can't map registers !", np->full_name);
+ printk(KERN_ERR "%pOF: Can't map registers !", np);
goto fail;
}
@@ -424,8 +414,8 @@ static int __init ppc4xx_setup_one_pcix_POM(struct pci_controller *hose,
if (!is_power_of_2(size) || size < 0x1000 ||
(plb_addr & (size - 1)) != 0) {
- printk(KERN_WARNING "%s: Resource out of range\n",
- hose->dn->full_name);
+ printk(KERN_WARNING "%pOF: Resource out of range\n",
+ hose->dn);
return -1;
}
@@ -468,8 +458,7 @@ static void __init ppc4xx_configure_pcix_POMs(struct pci_controller *hose,
if (!(res->flags & IORESOURCE_MEM))
continue;
if (j > 1) {
- printk(KERN_WARNING "%s: Too many ranges\n",
- hose->dn->full_name);
+ printk(KERN_WARNING "%pOF: Too many ranges\n", hose->dn);
break;
}
@@ -494,8 +483,8 @@ static void __init ppc4xx_configure_pcix_POMs(struct pci_controller *hose,
if (j <= 1 && !found_isa_hole && hose->isa_mem_size)
if (ppc4xx_setup_one_pcix_POM(hose, reg, hose->isa_mem_phys, 0,
hose->isa_mem_size, 0, j) == 0)
- printk(KERN_INFO "%s: Legacy ISA memory support enabled\n",
- hose->dn->full_name);
+ printk(KERN_INFO "%pOF: Legacy ISA memory support enabled\n",
+ hose->dn);
}
static void __init ppc4xx_configure_pcix_PIMs(struct pci_controller *hose,
@@ -536,32 +525,29 @@ static void __init ppc4xx_probe_pcix_bridge(struct device_node *np)
struct pci_controller *hose = NULL;
void __iomem *reg = NULL;
const int *bus_range;
- int big_pim = 0, msi = 0, primary = 0;
+ int big_pim, msi, primary;
/* Fetch config space registers address */
if (of_address_to_resource(np, 0, &rsrc_cfg)) {
- printk(KERN_ERR "%s:Can't get PCI-X config register base !",
- np->full_name);
+ printk(KERN_ERR "%pOF: Can't get PCI-X config register base !",
+ np);
return;
}
/* Fetch host bridge internal registers address */
if (of_address_to_resource(np, 3, &rsrc_reg)) {
- printk(KERN_ERR "%s: Can't get PCI-X internal register base !",
- np->full_name);
+ printk(KERN_ERR "%pOF: Can't get PCI-X internal register base !",
+ np);
return;
}
/* Check if it supports large PIMs (440GX) */
- if (of_get_property(np, "large-inbound-windows", NULL))
- big_pim = 1;
+ big_pim = of_property_read_bool(np, "large-inbound-windows");
/* Check if we should enable MSIs inbound hole */
- if (of_get_property(np, "enable-msi-hole", NULL))
- msi = 1;
+ msi = of_property_read_bool(np, "enable-msi-hole");
/* Check if primary bridge */
- if (of_get_property(np, "primary", NULL))
- primary = 1;
+ primary = of_property_read_bool(np, "primary");
/* Get bus range if any */
bus_range = of_get_property(np, "bus-range", NULL);
@@ -569,7 +555,7 @@ static void __init ppc4xx_probe_pcix_bridge(struct device_node *np)
/* Map registers */
reg = ioremap(rsrc_reg.start, resource_size(&rsrc_reg));
if (reg == NULL) {
- printk(KERN_ERR "%s: Can't map registers !", np->full_name);
+ printk(KERN_ERR "%pOF: Can't map registers !", np);
goto fail;
}
@@ -1247,9 +1233,9 @@ static void __init ppc460sx_pciex_check_link(struct ppc4xx_pciex_port *port)
mbase = ioremap(port->cfg_space.start + 0x10000000, 0x1000);
if (mbase == NULL) {
- printk(KERN_ERR "%s: Can't map internal config space !",
- port->node->full_name);
- goto done;
+ printk(KERN_ERR "%pOF: Can't map internal config space !",
+ port->node);
+ return;
}
while (attempt && (0 == (in_le32(mbase + PECFG_460SX_DLLSTA)
@@ -1259,9 +1245,7 @@ static void __init ppc460sx_pciex_check_link(struct ppc4xx_pciex_port *port)
}
if (attempt)
port->link = 1;
-done:
iounmap(mbase);
-
}
static struct ppc4xx_pciex_hwops ppc460sx_pcie_hwops __initdata = {
@@ -1274,102 +1258,6 @@ static struct ppc4xx_pciex_hwops ppc460sx_pcie_hwops __initdata = {
#endif /* CONFIG_44x */
-#ifdef CONFIG_40x
-
-static int __init ppc405ex_pciex_core_init(struct device_node *np)
-{
- /* Nothing to do, return 2 ports */
- return 2;
-}
-
-static void ppc405ex_pcie_phy_reset(struct ppc4xx_pciex_port *port)
-{
- /* Assert the PE0_PHY reset */
- mtdcri(SDR0, port->sdr_base + PESDRn_RCSSET, 0x01010000);
- msleep(1);
-
- /* deassert the PE0_hotreset */
- if (port->endpoint)
- mtdcri(SDR0, port->sdr_base + PESDRn_RCSSET, 0x01111000);
- else
- mtdcri(SDR0, port->sdr_base + PESDRn_RCSSET, 0x01101000);
-
- /* poll for phy !reset */
- /* XXX FIXME add timeout */
- while (!(mfdcri(SDR0, port->sdr_base + PESDRn_405EX_PHYSTA) & 0x00001000))
- ;
-
- /* deassert the PE0_gpl_utl_reset */
- mtdcri(SDR0, port->sdr_base + PESDRn_RCSSET, 0x00101000);
-}
-
-static int __init ppc405ex_pciex_init_port_hw(struct ppc4xx_pciex_port *port)
-{
- u32 val;
-
- if (port->endpoint)
- val = PTYPE_LEGACY_ENDPOINT;
- else
- val = PTYPE_ROOT_PORT;
-
- mtdcri(SDR0, port->sdr_base + PESDRn_DLPSET,
- 1 << 24 | val << 20 | LNKW_X1 << 12);
-
- mtdcri(SDR0, port->sdr_base + PESDRn_UTLSET1, 0x00000000);
- mtdcri(SDR0, port->sdr_base + PESDRn_UTLSET2, 0x01010000);
- mtdcri(SDR0, port->sdr_base + PESDRn_405EX_PHYSET1, 0x720F0000);
- mtdcri(SDR0, port->sdr_base + PESDRn_405EX_PHYSET2, 0x70600003);
-
- /*
- * Only reset the PHY when no link is currently established.
- * This is for the Atheros PCIe board which has problems to establish
- * the link (again) after this PHY reset. All other currently tested
- * PCIe boards don't show this problem.
- * This has to be re-tested and fixed in a later release!
- */
- val = mfdcri(SDR0, port->sdr_base + PESDRn_LOOP);
- if (!(val & 0x00001000))
- ppc405ex_pcie_phy_reset(port);
-
- dcr_write(port->dcrs, DCRO_PEGPL_CFG, 0x10000000); /* guarded on */
-
- port->has_ibpre = 1;
-
- return ppc4xx_pciex_port_reset_sdr(port);
-}
-
-static int ppc405ex_pciex_init_utl(struct ppc4xx_pciex_port *port)
-{
- dcr_write(port->dcrs, DCRO_PEGPL_SPECIAL, 0x0);
-
- /*
- * Set buffer allocations and then assert VRB and TXE.
- */
- out_be32(port->utl_base + PEUTL_OUTTR, 0x02000000);
- out_be32(port->utl_base + PEUTL_INTR, 0x02000000);
- out_be32(port->utl_base + PEUTL_OPDBSZ, 0x04000000);
- out_be32(port->utl_base + PEUTL_PBBSZ, 0x21000000);
- out_be32(port->utl_base + PEUTL_IPHBSZ, 0x02000000);
- out_be32(port->utl_base + PEUTL_IPDBSZ, 0x04000000);
- out_be32(port->utl_base + PEUTL_RCIRQEN, 0x00f00000);
- out_be32(port->utl_base + PEUTL_PCTL, 0x80800066);
-
- out_be32(port->utl_base + PEUTL_PBCTL, 0x08000000);
-
- return 0;
-}
-
-static struct ppc4xx_pciex_hwops ppc405ex_pcie_hwops __initdata =
-{
- .want_sdr = true,
- .core_init = ppc405ex_pciex_core_init,
- .port_init_hw = ppc405ex_pciex_init_port_hw,
- .setup_utl = ppc405ex_pciex_init_utl,
- .check_link = ppc4xx_pciex_check_link_sdr,
-};
-
-#endif /* CONFIG_40x */
-
#ifdef CONFIG_476FPE
static int __init ppc_476fpe_pciex_core_init(struct device_node *np)
{
@@ -1390,7 +1278,7 @@ static void __init ppc_476fpe_pciex_check_link(struct ppc4xx_pciex_port *port)
port->index);
return;
}
-
+
while (timeout_ms--) {
val = in_le32(mbase + PECFG_TLDLP);
@@ -1406,7 +1294,6 @@ static void __init ppc_476fpe_pciex_check_link(struct ppc4xx_pciex_port *port)
printk(KERN_WARNING "PCIE%d: Link up failed\n", port->index);
iounmap(mbase);
- return;
}
static struct ppc4xx_pciex_hwops ppc_476fpe_pcie_hwops __initdata =
@@ -1439,25 +1326,20 @@ static int __init ppc4xx_pciex_check_core_init(struct device_node *np)
if (of_device_is_compatible(np, "ibm,plb-pciex-apm821xx"))
ppc4xx_pciex_hwops = &apm821xx_pcie_hwops;
#endif /* CONFIG_44x */
-#ifdef CONFIG_40x
- if (of_device_is_compatible(np, "ibm,plb-pciex-405ex"))
- ppc4xx_pciex_hwops = &ppc405ex_pcie_hwops;
-#endif
#ifdef CONFIG_476FPE
if (of_device_is_compatible(np, "ibm,plb-pciex-476fpe")
|| of_device_is_compatible(np, "ibm,plb-pciex-476gtr"))
ppc4xx_pciex_hwops = &ppc_476fpe_pcie_hwops;
#endif
if (ppc4xx_pciex_hwops == NULL) {
- printk(KERN_WARNING "PCIE: unknown host type %s\n",
- np->full_name);
+ printk(KERN_WARNING "PCIE: unknown host type %pOF\n", np);
return -ENODEV;
}
count = ppc4xx_pciex_hwops->core_init(np);
if (count > 0) {
ppc4xx_pciex_ports =
- kzalloc(count * sizeof(struct ppc4xx_pciex_port),
+ kcalloc(count, sizeof(struct ppc4xx_pciex_port),
GFP_KERNEL);
if (ppc4xx_pciex_ports) {
ppc4xx_pciex_port_count = count;
@@ -1731,8 +1613,7 @@ static int __init ppc4xx_setup_one_pciex_POM(struct ppc4xx_pciex_port *port,
(index < 2 && size < 0x100000) ||
(index == 2 && size < 0x100) ||
(plb_addr & (size - 1)) != 0) {
- printk(KERN_WARNING "%s: Resource out of range\n",
- hose->dn->full_name);
+ printk(KERN_WARNING "%pOF: Resource out of range\n", hose->dn);
return -1;
}
@@ -1808,8 +1689,8 @@ static void __init ppc4xx_configure_pciex_POMs(struct ppc4xx_pciex_port *port,
if (!(res->flags & IORESOURCE_MEM))
continue;
if (j > 1) {
- printk(KERN_WARNING "%s: Too many ranges\n",
- port->node->full_name);
+ printk(KERN_WARNING "%pOF: Too many ranges\n",
+ port->node);
break;
}
@@ -1835,8 +1716,8 @@ static void __init ppc4xx_configure_pciex_POMs(struct ppc4xx_pciex_port *port,
if (ppc4xx_setup_one_pciex_POM(port, hose, mbase,
hose->isa_mem_phys, 0,
hose->isa_mem_size, 0, j) == 0)
- printk(KERN_INFO "%s: Legacy ISA memory support enabled\n",
- hose->dn->full_name);
+ printk(KERN_INFO "%pOF: Legacy ISA memory support enabled\n",
+ hose->dn);
/* Configure IO, always 64K starting at 0. We hard wire it to 64K !
* Note also that it -has- to be region index 2 on this HW
@@ -1926,14 +1807,13 @@ static void __init ppc4xx_pciex_port_setup_hose(struct ppc4xx_pciex_port *port)
struct resource dma_window;
struct pci_controller *hose = NULL;
const int *bus_range;
- int primary = 0, busses;
+ int primary, busses;
void __iomem *mbase = NULL, *cfg_data = NULL;
const u32 *pval;
u32 val;
/* Check if primary bridge */
- if (of_get_property(port->node, "primary", NULL))
- primary = 1;
+ primary = of_property_read_bool(port->node, "primary");
/* Get bus range if any */
bus_range = of_get_property(port->node, "bus-range", NULL);
@@ -1971,8 +1851,8 @@ static void __init ppc4xx_pciex_port_setup_hose(struct ppc4xx_pciex_port *port)
(hose->first_busno + 1) * 0x100000,
busses * 0x100000);
if (cfg_data == NULL) {
- printk(KERN_ERR "%s: Can't map external config space !",
- port->node->full_name);
+ printk(KERN_ERR "%pOF: Can't map external config space !",
+ port->node);
goto fail;
}
hose->cfg_data = cfg_data;
@@ -1983,13 +1863,13 @@ static void __init ppc4xx_pciex_port_setup_hose(struct ppc4xx_pciex_port *port)
*/
mbase = ioremap(port->cfg_space.start + 0x10000000, 0x1000);
if (mbase == NULL) {
- printk(KERN_ERR "%s: Can't map internal config space !",
- port->node->full_name);
+ printk(KERN_ERR "%pOF: Can't map internal config space !",
+ port->node);
goto fail;
}
hose->cfg_addr = mbase;
- pr_debug("PCIE %s, bus %d..%d\n", port->node->full_name,
+ pr_debug("PCIE %pOF, bus %d..%d\n", port->node,
hose->first_busno, hose->last_busno);
pr_debug(" config space mapped at: root @0x%p, other @0x%p\n",
hose->cfg_addr, hose->cfg_data);
@@ -2090,7 +1970,6 @@ static void __init ppc4xx_probe_pciex_bridge(struct device_node *np)
const u32 *pval;
int portno;
unsigned int dcrs;
- const char *val;
/* First, proceed to core initialization as we assume there's
* only one PCIe core in the system
@@ -2101,14 +1980,13 @@ static void __init ppc4xx_probe_pciex_bridge(struct device_node *np)
/* Get the port number from the device-tree */
pval = of_get_property(np, "port", NULL);
if (pval == NULL) {
- printk(KERN_ERR "PCIE: Can't find port number for %s\n",
- np->full_name);
+ printk(KERN_ERR "PCIE: Can't find port number for %pOF\n", np);
return;
}
portno = *pval;
if (portno >= ppc4xx_pciex_port_count) {
- printk(KERN_ERR "PCIE: port number out of range for %s\n",
- np->full_name);
+ printk(KERN_ERR "PCIE: port number out of range for %pOF\n",
+ np);
return;
}
port = &ppc4xx_pciex_ports[portno];
@@ -2126,8 +2004,8 @@ static void __init ppc4xx_probe_pciex_bridge(struct device_node *np)
if (ppc4xx_pciex_hwops->want_sdr) {
pval = of_get_property(np, "sdr-base", NULL);
if (pval == NULL) {
- printk(KERN_ERR "PCIE: missing sdr-base for %s\n",
- np->full_name);
+ printk(KERN_ERR "PCIE: missing sdr-base for %pOF\n",
+ np);
return;
}
port->sdr_base = *pval;
@@ -2137,35 +2015,31 @@ static void __init ppc4xx_probe_pciex_bridge(struct device_node *np)
* Resulting from this setup this PCIe port will be configured
* as root-complex or as endpoint.
*/
- val = of_get_property(port->node, "device_type", NULL);
- if (!strcmp(val, "pci-endpoint")) {
+ if (of_node_is_type(port->node, "pci-endpoint")) {
port->endpoint = 1;
- } else if (!strcmp(val, "pci")) {
+ } else if (of_node_is_type(port->node, "pci")) {
port->endpoint = 0;
} else {
- printk(KERN_ERR "PCIE: missing or incorrect device_type for %s\n",
- np->full_name);
+ printk(KERN_ERR "PCIE: missing or incorrect device_type for %pOF\n",
+ np);
return;
}
/* Fetch config space registers address */
if (of_address_to_resource(np, 0, &port->cfg_space)) {
- printk(KERN_ERR "%s: Can't get PCI-E config space !",
- np->full_name);
+ printk(KERN_ERR "%pOF: Can't get PCI-E config space !", np);
return;
}
/* Fetch host bridge internal registers address */
if (of_address_to_resource(np, 1, &port->utl_regs)) {
- printk(KERN_ERR "%s: Can't get UTL register base !",
- np->full_name);
+ printk(KERN_ERR "%pOF: Can't get UTL register base !", np);
return;
}
/* Map DCRs */
dcrs = dcr_resource_start(np, 0);
if (dcrs == 0) {
- printk(KERN_ERR "%s: Can't get DCR register base !",
- np->full_name);
+ printk(KERN_ERR "%pOF: Can't get DCR register base !", np);
return;
}
port->dcrs = dcr_map(np, dcrs, dcr_resource_len(np, 0));
diff --git a/arch/powerpc/sysdev/ppc4xx_pci.h b/arch/powerpc/platforms/44x/pci.h
index bb4821938ab1..bb4821938ab1 100644
--- a/arch/powerpc/sysdev/ppc4xx_pci.h
+++ b/arch/powerpc/platforms/44x/pci.h
diff --git a/arch/powerpc/platforms/44x/ppc44x_simple.c b/arch/powerpc/platforms/44x/ppc44x_simple.c
index 3ffb915446e3..971786ff1a7b 100644
--- a/arch/powerpc/platforms/44x/ppc44x_simple.c
+++ b/arch/powerpc/platforms/44x/ppc44x_simple.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Generic PowerPC 44x platform support
*
* Copyright 2008 IBM Corporation
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; version 2 of the License.
- *
* This implements simple platform support for PowerPC 44x chips. This is
* mostly used for eval boards or other simple and "generic" 44x boards. If
* your board has custom functions or hardware, then you will likely want to
@@ -16,7 +13,6 @@
#include <asm/machdep.h>
#include <asm/pci-bridge.h>
#include <asm/ppc4xx.h>
-#include <asm/prom.h>
#include <asm/time.h>
#include <asm/udbg.h>
#include <asm/uic.h>
@@ -24,7 +20,7 @@
#include <linux/init.h>
#include <linux/of_platform.h>
-static __initdata struct of_device_id ppc44x_of_bus[] = {
+static const struct of_device_id ppc44x_of_bus[] __initconst = {
{ .compatible = "ibm,plb4", },
{ .compatible = "ibm,opb", },
{ .compatible = "ibm,ebc", },
@@ -67,11 +63,10 @@ static char *board[] __initdata = {
static int __init ppc44x_probe(void)
{
- unsigned long root = of_get_flat_dt_root();
int i = 0;
for (i = 0; i < ARRAY_SIZE(board); i++) {
- if (of_flat_dt_is_compatible(root, board[i])) {
+ if (of_machine_is_compatible(board[i])) {
pci_set_flags(PCI_REASSIGN_ALL_RSRC);
return 1;
}
@@ -87,5 +82,4 @@ define_machine(ppc44x_simple) {
.init_IRQ = uic_init_tree,
.get_irq = uic_get_irq,
.restart = ppc4xx_reset_system,
- .calibrate_decr = generic_calibrate_decr,
};
diff --git a/arch/powerpc/platforms/44x/ppc476.c b/arch/powerpc/platforms/44x/ppc476.c
index 33986c1a05da..e7b7bdaad341 100644
--- a/arch/powerpc/platforms/44x/ppc476.c
+++ b/arch/powerpc/platforms/44x/ppc476.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* PowerPC 476FPE board specific routines
*
@@ -14,31 +15,27 @@
* Rewritten and ported to the merged powerpc tree:
* Copyright 2007 David Gibson <dwg@au1.ibm.com>, IBM Corporation.
* Copyright © 2011 David Kliekamp IBM Corporation
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#include <linux/init.h>
#include <linux/of.h>
+#include <linux/of_address.h>
#include <linux/of_platform.h>
#include <linux/rtc.h>
#include <asm/machdep.h>
-#include <asm/prom.h>
#include <asm/udbg.h>
#include <asm/time.h>
#include <asm/uic.h>
#include <asm/ppc4xx.h>
#include <asm/mpic.h>
#include <asm/mmu.h>
+#include <asm/swiotlb.h>
#include <linux/pci.h>
#include <linux/i2c.h>
-static struct of_device_id ppc47x_of_bus[] __initdata = {
+static const struct of_device_id ppc47x_of_bus[] __initconst = {
{ .compatible = "ibm,plb4", },
{ .compatible = "ibm,plb6", },
{ .compatible = "ibm,opb", },
@@ -68,7 +65,7 @@ DECLARE_PCI_FIXUP_HEADER(0x1033, 0x0035, quirk_ppc_currituck_usb_fixup);
#define AVR_PWRCTL_RESET (0x02)
static struct i2c_client *avr_i2c_client;
-static void avr_halt_system(int pwrctl_flags)
+static void __noreturn avr_halt_system(int pwrctl_flags)
{
/* Request the AVR to reset the system */
i2c_smbus_write_byte_data(avr_i2c_client,
@@ -84,22 +81,21 @@ static void avr_power_off_system(void)
avr_halt_system(AVR_PWRCTL_PWROFF);
}
-static void avr_reset_system(char *cmd)
+static void __noreturn avr_reset_system(char *cmd)
{
avr_halt_system(AVR_PWRCTL_RESET);
}
-static int avr_probe(struct i2c_client *client,
- const struct i2c_device_id *id)
+static int avr_probe(struct i2c_client *client)
{
avr_i2c_client = client;
ppc_md.restart = avr_reset_system;
- ppc_md.power_off = avr_power_off_system;
+ pm_power_off = avr_power_off_system;
return 0;
}
static const struct i2c_device_id avr_id[] = {
- { "akebono-avr", 0 },
+ { "akebono-avr" },
{ }
};
@@ -118,7 +114,8 @@ static int __init ppc47x_device_probe(void)
return 0;
}
-machine_device_initcall(ppc47x, ppc47x_device_probe);
+machine_device_initcall(ppc47x_akebono, ppc47x_device_probe);
+machine_device_initcall(ppc47x_currituck, ppc47x_device_probe);
static void __init ppc47x_init_irq(void)
{
@@ -126,7 +123,7 @@ static void __init ppc47x_init_irq(void)
/* Find top level interrupt controller */
for_each_node_with_property(np, "interrupt-controller") {
- if (of_get_property(np, "interrupts", NULL) == NULL)
+ if (!of_property_present(np, "interrupts"))
break;
}
if (np == NULL)
@@ -144,6 +141,8 @@ static void __init ppc47x_init_irq(void)
ppc_md.get_irq = mpic_get_irq;
} else
panic("Unrecognized top level interrupt controller");
+
+ of_node_put(np);
}
#ifdef CONFIG_SMP
@@ -223,7 +222,7 @@ static int board_rev = -1;
static int __init ppc47x_get_board_rev(void)
{
int reg;
- u8 *fpga;
+ u8 __iomem *fpga;
struct device_node *np = NULL;
if (of_machine_is_compatible("ibm,currituck")) {
@@ -237,7 +236,7 @@ static int __init ppc47x_get_board_rev(void)
if (!np)
goto fail;
- fpga = (u8 *) of_iomap(np, 0);
+ fpga = of_iomap(np, 0);
of_node_put(np);
if (!fpga)
goto fail;
@@ -251,7 +250,8 @@ fail:
pr_info("%s: Unable to find board revision\n", __func__);
return 0;
}
-machine_arch_initcall(ppc47x, ppc47x_get_board_rev);
+machine_arch_initcall(ppc47x_akebono, ppc47x_get_board_rev);
+machine_arch_initcall(ppc47x_currituck, ppc47x_get_board_rev);
/* Use USB controller should have been hardware swizzled but it wasn't :( */
static void ppc47x_pci_irq_fixup(struct pci_dev *dev)
@@ -270,30 +270,21 @@ static void ppc47x_pci_irq_fixup(struct pci_dev *dev)
}
}
-/*
- * Called very early, MMU is off, device-tree isn't unflattened
- */
-static int __init ppc47x_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- if (of_flat_dt_is_compatible(root, "ibm,akebono"))
- return 1;
-
- if (of_flat_dt_is_compatible(root, "ibm,currituck")) {
- ppc_md.pci_irq_fixup = ppc47x_pci_irq_fixup;
- return 1;
- }
-
- return 0;
-}
+define_machine(ppc47x_akebono) {
+ .name = "PowerPC 47x (akebono)",
+ .compatible = "ibm,akebono",
+ .progress = udbg_progress,
+ .init_IRQ = ppc47x_init_irq,
+ .setup_arch = ppc47x_setup_arch,
+ .restart = ppc4xx_reset_system,
+};
-define_machine(ppc47x) {
- .name = "PowerPC 47x",
- .probe = ppc47x_probe,
+define_machine(ppc47x_currituck) {
+ .name = "PowerPC 47x (currituck)",
+ .compatible = "ibm,currituck",
.progress = udbg_progress,
.init_IRQ = ppc47x_init_irq,
+ .pci_irq_fixup = ppc47x_pci_irq_fixup,
.setup_arch = ppc47x_setup_arch,
.restart = ppc4xx_reset_system,
- .calibrate_decr = generic_calibrate_decr,
};
diff --git a/arch/powerpc/platforms/44x/sam440ep.c b/arch/powerpc/platforms/44x/sam440ep.c
index 9e09b835758b..5cdaa4068e41 100644
--- a/arch/powerpc/platforms/44x/sam440ep.c
+++ b/arch/powerpc/platforms/44x/sam440ep.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Sam440ep board specific routines based off bamboo.c code
* original copyrights below
@@ -11,17 +12,11 @@
*
* Modified from bamboo.c for sam440ep:
* Copyright 2008 Giuseppe Coviello <gicoviello@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#include <linux/init.h>
#include <linux/of_platform.h>
#include <asm/machdep.h>
-#include <asm/prom.h>
#include <asm/udbg.h>
#include <asm/time.h>
#include <asm/uic.h>
@@ -29,7 +24,7 @@
#include <asm/ppc4xx.h>
#include <linux/i2c.h>
-static __initdata struct of_device_id sam440ep_of_bus[] = {
+static const struct of_device_id sam440ep_of_bus[] __initconst = {
{ .compatible = "ibm,plb4", },
{ .compatible = "ibm,opb", },
{ .compatible = "ibm,ebc", },
@@ -46,11 +41,6 @@ machine_device_initcall(sam440ep, sam440ep_device_probe);
static int __init sam440ep_probe(void)
{
- unsigned long root = of_get_flat_dt_root();
-
- if (!of_flat_dt_is_compatible(root, "acube,sam440ep"))
- return 0;
-
pci_set_flags(PCI_REASSIGN_ALL_RSRC);
return 1;
@@ -58,12 +48,12 @@ static int __init sam440ep_probe(void)
define_machine(sam440ep) {
.name = "Sam440ep",
+ .compatible = "acube,sam440ep",
.probe = sam440ep_probe,
.progress = udbg_progress,
.init_IRQ = uic_init_tree,
.get_irq = uic_get_irq,
.restart = ppc4xx_reset_system,
- .calibrate_decr = generic_calibrate_decr,
};
static struct i2c_board_info sam440ep_rtc_info = {
@@ -72,7 +62,7 @@ static struct i2c_board_info sam440ep_rtc_info = {
.irq = -1,
};
-static int sam440ep_setup_rtc(void)
+static int __init sam440ep_setup_rtc(void)
{
return i2c_register_board_info(0, &sam440ep_rtc_info, 1);
}
diff --git a/arch/powerpc/sysdev/ppc4xx_soc.c b/arch/powerpc/platforms/44x/soc.c
index 5c77c9ba33aa..5412e6b21e10 100644
--- a/arch/powerpc/sysdev/ppc4xx_soc.c
+++ b/arch/powerpc/platforms/44x/soc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* IBM/AMCC PPC4xx SoC setup code
*
@@ -6,11 +7,6 @@
* L2 cache routines cloned from arch/ppc/syslib/ibm440gx_common.c which is:
* Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net>
* Copyright (c) 2003 - 2006 Zultys Technologies
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#include <linux/stddef.h>
@@ -19,12 +15,13 @@
#include <linux/errno.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
+#include <linux/of.h>
#include <linux/of_irq.h>
-#include <linux/of_platform.h>
#include <asm/dcr.h>
#include <asm/dcr-regs.h>
#include <asm/reg.h>
+#include <asm/ppc4xx.h>
static u32 dcrbase_l2c;
@@ -90,7 +87,7 @@ static int __init ppc4xx_l2c_probe(void)
/* Get l2 cache size */
prop = of_get_property(np, "cache-size", NULL);
if (prop == NULL) {
- printk(KERN_ERR "%s: Can't get cache-size!\n", np->full_name);
+ printk(KERN_ERR "%pOF: Can't get cache-size!\n", np);
of_node_put(np);
return -ENODEV;
}
@@ -99,8 +96,7 @@ static int __init ppc4xx_l2c_probe(void)
/* Map DCRs */
dcrreg = of_get_property(np, "dcr-reg", &len);
if (!dcrreg || (len != 4 * sizeof(u32))) {
- printk(KERN_ERR "%s: Can't get DCR register base !",
- np->full_name);
+ printk(KERN_ERR "%pOF: Can't get DCR register base !", np);
of_node_put(np);
return -ENODEV;
}
@@ -109,14 +105,14 @@ static int __init ppc4xx_l2c_probe(void)
/* Get and map irq number from device tree */
irq = irq_of_parse_and_map(np, 0);
- if (irq == NO_IRQ) {
+ if (!irq) {
printk(KERN_ERR "irq_of_parse_and_map failed\n");
of_node_put(np);
return -ENODEV;
}
/* Install error handler */
- if (request_irq(irq, l2c_error_handler, 0, "L2C", 0) < 0) {
+ if (request_irq(irq, l2c_error_handler, 0, "L2C", NULL) < 0) {
printk(KERN_ERR "Cannot install L2C error handler"
", cache is not enabled\n");
of_node_put(np);
@@ -201,7 +197,7 @@ void ppc4xx_reset_system(char *cmd)
u32 reset_type = DBCR0_RST_SYSTEM;
const u32 *prop;
- np = of_find_node_by_type(NULL, "cpu");
+ np = of_get_cpu_node(0, NULL);
if (np) {
prop = of_get_property(np, "reset-type", NULL);
diff --git a/arch/powerpc/sysdev/uic.c b/arch/powerpc/platforms/44x/uic.c
index 92033936a8f7..85daf841fd3f 100644
--- a/arch/powerpc/sysdev/uic.c
+++ b/arch/powerpc/platforms/44x/uic.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* arch/powerpc/sysdev/uic.c
*
* IBM PowerPC 4xx Universal Interrupt Controller
*
* Copyright 2007 David Gibson <dwg@au1.ibm.com>, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#include <linux/kernel.h>
#include <linux/init.h>
@@ -19,15 +15,16 @@
#include <linux/sched.h>
#include <linux/signal.h>
#include <linux/device.h>
-#include <linux/bootmem.h>
#include <linux/spinlock.h>
#include <linux/irq.h>
#include <linux/interrupt.h>
#include <linux/kernel_stat.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
#include <asm/irq.h>
#include <asm/io.h>
-#include <asm/prom.h>
#include <asm/dcr.h>
+#include <asm/uic.h>
#define NR_UIC_INTS 32
@@ -40,7 +37,7 @@
#define UIC_VR 0x7
#define UIC_VCR 0x8
-struct uic *primary_uic;
+static struct uic *primary_uic;
struct uic {
int index;
@@ -159,6 +156,7 @@ static int uic_set_irq_type(struct irq_data *d, unsigned int flow_type)
mtdcr(uic->dcrbase + UIC_PR, pr);
mtdcr(uic->dcrbase + UIC_TR, tr);
+ mtdcr(uic->dcrbase + UIC_SR, ~mask);
raw_spin_unlock_irqrestore(&uic->lock, flags);
@@ -190,19 +188,18 @@ static int uic_host_map(struct irq_domain *h, unsigned int virq,
return 0;
}
-static struct irq_domain_ops uic_host_ops = {
+static const struct irq_domain_ops uic_host_ops = {
.map = uic_host_map,
.xlate = irq_domain_xlate_twocell,
};
-void uic_irq_cascade(unsigned int virq, struct irq_desc *desc)
+static void uic_irq_cascade(struct irq_desc *desc)
{
struct irq_chip *chip = irq_desc_get_chip(desc);
struct irq_data *idata = irq_desc_get_irq_data(desc);
- struct uic *uic = irq_get_handler_data(virq);
+ struct uic *uic = irq_desc_get_handler_data(desc);
u32 msr;
int src;
- int subvirq;
raw_spin_lock(&desc->lock);
if (irqd_is_level_type(idata))
@@ -217,8 +214,7 @@ void uic_irq_cascade(unsigned int virq, struct irq_desc *desc)
src = 32 - ffs(msr);
- subvirq = irq_linear_revmap(uic->irqhost, src);
- generic_handle_irq(subvirq);
+ generic_handle_domain_irq(uic->irqhost, src);
uic_irq_ret:
raw_spin_lock(&desc->lock);
@@ -244,22 +240,23 @@ static struct uic * __init uic_init_one(struct device_node *node)
raw_spin_lock_init(&uic->lock);
indexp = of_get_property(node, "cell-index", &len);
if (!indexp || (len != sizeof(u32))) {
- printk(KERN_ERR "uic: Device node %s has missing or invalid "
- "cell-index property\n", node->full_name);
+ printk(KERN_ERR "uic: Device node %pOF has missing or invalid "
+ "cell-index property\n", node);
return NULL;
}
uic->index = *indexp;
dcrreg = of_get_property(node, "dcr-reg", &len);
if (!dcrreg || (len != 2*sizeof(u32))) {
- printk(KERN_ERR "uic: Device node %s has missing or invalid "
- "dcr-reg property\n", node->full_name);
+ printk(KERN_ERR "uic: Device node %pOF has missing or invalid "
+ "dcr-reg property\n", node);
return NULL;
}
uic->dcrbase = *dcrreg;
- uic->irqhost = irq_domain_add_linear(node, NR_UIC_INTS, &uic_host_ops,
- uic);
+ uic->irqhost = irq_domain_create_linear(of_fwnode_handle(node),
+ NR_UIC_INTS, &uic_host_ops,
+ uic);
if (! uic->irqhost)
return NULL; /* FIXME: panic? */
@@ -293,9 +290,9 @@ void __init uic_init_tree(void)
* top-level interrupt controller */
primary_uic = uic_init_one(np);
if (!primary_uic)
- panic("Unable to initialize primary UIC %s\n", np->full_name);
+ panic("Unable to initialize primary UIC %pOF\n", np);
- irq_set_default_host(primary_uic->irqhost);
+ irq_set_default_domain(primary_uic->irqhost);
of_node_put(np);
/* The scan again for cascaded UICs */
@@ -307,8 +304,8 @@ void __init uic_init_tree(void)
uic = uic_init_one(np);
if (! uic)
- panic("Unable to initialize a secondary UIC %s\n",
- np->full_name);
+ panic("Unable to initialize a secondary UIC %pOF\n",
+ np);
cascade_virq = irq_of_parse_and_map(np, 0);
@@ -320,7 +317,7 @@ void __init uic_init_tree(void)
}
}
-/* Return an interrupt vector or NO_IRQ if no interrupt is pending. */
+/* Return an interrupt vector or 0 if no interrupt is pending. */
unsigned int uic_get_irq(void)
{
u32 msr;
@@ -331,5 +328,5 @@ unsigned int uic_get_irq(void)
msr = mfdcr(primary_uic->dcrbase + UIC_MSR);
src = 32 - ffs(msr);
- return irq_linear_revmap(primary_uic->irqhost, src);
+ return irq_find_mapping(primary_uic->irqhost, src);
}
diff --git a/arch/powerpc/platforms/44x/virtex.c b/arch/powerpc/platforms/44x/virtex.c
deleted file mode 100644
index cf96ccaa760c..000000000000
--- a/arch/powerpc/platforms/44x/virtex.c
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Xilinx Virtex 5FXT based board support, derived from
- * the Xilinx Virtex (IIpro & 4FX) based board support
- *
- * Copyright 2007 Secret Lab Technologies Ltd.
- * Copyright 2008 Xilinx, Inc.
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
- */
-
-#include <linux/init.h>
-#include <linux/of_platform.h>
-#include <asm/machdep.h>
-#include <asm/prom.h>
-#include <asm/time.h>
-#include <asm/xilinx_intc.h>
-#include <asm/xilinx_pci.h>
-#include <asm/reg.h>
-#include <asm/ppc4xx.h>
-#include "44x.h"
-
-static struct of_device_id xilinx_of_bus_ids[] __initdata = {
- { .compatible = "simple-bus", },
- { .compatible = "xlnx,plb-v46-1.00.a", },
- { .compatible = "xlnx,plb-v46-1.02.a", },
- { .compatible = "xlnx,plb-v34-1.01.a", },
- { .compatible = "xlnx,plb-v34-1.02.a", },
- { .compatible = "xlnx,opb-v20-1.10.c", },
- { .compatible = "xlnx,dcr-v29-1.00.a", },
- { .compatible = "xlnx,compound", },
- {}
-};
-
-static int __init virtex_device_probe(void)
-{
- of_platform_bus_probe(NULL, xilinx_of_bus_ids, NULL);
-
- return 0;
-}
-machine_device_initcall(virtex, virtex_device_probe);
-
-static int __init virtex_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- if (!of_flat_dt_is_compatible(root, "xlnx,virtex440"))
- return 0;
-
- return 1;
-}
-
-define_machine(virtex) {
- .name = "Xilinx Virtex440",
- .probe = virtex_probe,
- .setup_arch = xilinx_pci_init,
- .init_IRQ = xilinx_intc_init_tree,
- .get_irq = xilinx_intc_get_irq,
- .calibrate_decr = generic_calibrate_decr,
- .restart = ppc4xx_reset_system,
-};
diff --git a/arch/powerpc/platforms/44x/virtex_ml510.c b/arch/powerpc/platforms/44x/virtex_ml510.c
deleted file mode 100644
index 1fdb8748638d..000000000000
--- a/arch/powerpc/platforms/44x/virtex_ml510.c
+++ /dev/null
@@ -1,29 +0,0 @@
-#include <asm/i8259.h>
-#include <linux/pci.h>
-#include "44x.h"
-
-/**
- * ml510_ail_quirk
- */
-static void ml510_ali_quirk(struct pci_dev *dev)
-{
- /* Enable the IDE controller */
- pci_write_config_byte(dev, 0x58, 0x4c);
- /* Assign irq 14 to the primary ide channel */
- pci_write_config_byte(dev, 0x44, 0x0d);
- /* Assign irq 15 to the secondary ide channel */
- pci_write_config_byte(dev, 0x75, 0x0f);
- /* Set the ide controller in native mode */
- pci_write_config_byte(dev, 0x09, 0xff);
-
- /* INTB = disabled, INTA = disabled */
- pci_write_config_byte(dev, 0x48, 0x00);
- /* INTD = disabled, INTC = disabled */
- pci_write_config_byte(dev, 0x4a, 0x00);
- /* Audio = INT7, Modem = disabled. */
- pci_write_config_byte(dev, 0x4b, 0x60);
- /* USB = INT7 */
- pci_write_config_byte(dev, 0x74, 0x06);
-}
-DECLARE_PCI_FIXUP_EARLY(0x10b9, 0x1533, ml510_ali_quirk);
-
diff --git a/arch/powerpc/platforms/44x/warp.c b/arch/powerpc/platforms/44x/warp.c
index 3a104284b338..a5001d32f978 100644
--- a/arch/powerpc/platforms/44x/warp.c
+++ b/arch/powerpc/platforms/44x/warp.c
@@ -1,26 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* PIKA Warp(tm) board specific routines
*
* Copyright (c) 2008-2009 PIKA Technologies
* Sean MacLennan <smaclennan@pikatech.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
+#include <linux/err.h>
#include <linux/init.h>
#include <linux/of_platform.h>
+#include <linux/platform_device.h>
#include <linux/kthread.h>
+#include <linux/leds.h>
#include <linux/i2c.h>
#include <linux/interrupt.h>
#include <linux/delay.h>
-#include <linux/of_gpio.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/gpio/consumer.h>
#include <linux/slab.h>
#include <linux/export.h>
#include <asm/machdep.h>
-#include <asm/prom.h>
#include <asm/udbg.h>
#include <asm/time.h>
#include <asm/uic.h>
@@ -28,7 +28,7 @@
#include <asm/dma.h>
-static __initdata struct of_device_id warp_of_bus[] = {
+static const struct of_device_id warp_of_bus[] __initconst = {
{ .compatible = "ibm,plb4", },
{ .compatible = "ibm,opb", },
{ .compatible = "ibm,ebc", },
@@ -42,27 +42,13 @@ static int __init warp_device_probe(void)
}
machine_device_initcall(warp, warp_device_probe);
-static int __init warp_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- if (!of_flat_dt_is_compatible(root, "pika,warp"))
- return 0;
-
- /* For __dma_alloc_coherent */
- ISA_DMA_THRESHOLD = ~0L;
-
- return 1;
-}
-
define_machine(warp) {
.name = "Warp",
- .probe = warp_probe,
+ .compatible = "pika,warp",
.progress = udbg_progress,
.init_IRQ = uic_init_tree,
.get_irq = uic_get_irq,
.restart = ppc4xx_reset_system,
- .calibrate_decr = generic_calibrate_decr,
};
@@ -98,60 +84,44 @@ static int __init warp_post_info(void)
#ifdef CONFIG_SENSORS_AD7414
-static LIST_HEAD(dtm_shutdown_list);
static void __iomem *dtm_fpga;
-static unsigned green_led, red_led;
-
-struct dtm_shutdown {
- struct list_head list;
- void (*func)(void *arg);
- void *arg;
+#define WARP_GREEN_LED 0
+#define WARP_RED_LED 1
+
+static struct gpio_led warp_gpio_led_pins[] = {
+ [WARP_GREEN_LED] = {
+ .name = "green",
+ .default_state = LEDS_DEFSTATE_KEEP,
+ .gpiod = NULL, /* to be filled by pika_setup_leds() */
+ },
+ [WARP_RED_LED] = {
+ .name = "red",
+ .default_state = LEDS_DEFSTATE_KEEP,
+ .gpiod = NULL, /* to be filled by pika_setup_leds() */
+ },
};
+static struct gpio_led_platform_data warp_gpio_led_data = {
+ .leds = warp_gpio_led_pins,
+ .num_leds = ARRAY_SIZE(warp_gpio_led_pins),
+};
-int pika_dtm_register_shutdown(void (*func)(void *arg), void *arg)
-{
- struct dtm_shutdown *shutdown;
-
- shutdown = kmalloc(sizeof(struct dtm_shutdown), GFP_KERNEL);
- if (shutdown == NULL)
- return -ENOMEM;
-
- shutdown->func = func;
- shutdown->arg = arg;
-
- list_add(&shutdown->list, &dtm_shutdown_list);
-
- return 0;
-}
-
-int pika_dtm_unregister_shutdown(void (*func)(void *arg), void *arg)
-{
- struct dtm_shutdown *shutdown;
-
- list_for_each_entry(shutdown, &dtm_shutdown_list, list)
- if (shutdown->func == func && shutdown->arg == arg) {
- list_del(&shutdown->list);
- kfree(shutdown);
- return 0;
- }
-
- return -EINVAL;
-}
+static struct platform_device warp_gpio_leds = {
+ .name = "leds-gpio",
+ .id = -1,
+ .dev = {
+ .platform_data = &warp_gpio_led_data,
+ },
+};
static irqreturn_t temp_isr(int irq, void *context)
{
- struct dtm_shutdown *shutdown;
int value = 1;
local_irq_disable();
- gpio_set_value(green_led, 0);
-
- /* Run through the shutdown list. */
- list_for_each_entry(shutdown, &dtm_shutdown_list, list)
- shutdown->func(shutdown->arg);
+ gpiod_set_value(warp_gpio_led_pins[WARP_GREEN_LED].gpiod, 0);
printk(KERN_EMERG "\n\nCritical Temperature Shutdown\n\n");
@@ -161,7 +131,7 @@ static irqreturn_t temp_isr(int irq, void *context)
out_be32(dtm_fpga + 0x14, reset);
}
- gpio_set_value(red_led, value);
+ gpiod_set_value(warp_gpio_led_pins[WARP_RED_LED].gpiod, value);
value ^= 1;
mdelay(500);
}
@@ -170,25 +140,78 @@ static irqreturn_t temp_isr(int irq, void *context)
return IRQ_HANDLED;
}
+/*
+ * Because green and red power LEDs are normally driven by leds-gpio driver,
+ * but in case of critical temperature shutdown we want to drive them
+ * ourselves, we acquire both and then create leds-gpio platform device
+ * ourselves, instead of doing it through device tree. This way we can still
+ * keep access to the gpios and use them when needed.
+ */
static int pika_setup_leds(void)
{
struct device_node *np, *child;
+ struct gpio_desc *gpio;
+ struct gpio_led *led;
+ int led_count = 0;
+ int error;
+ int i;
- np = of_find_compatible_node(NULL, NULL, "gpio-leds");
+ np = of_find_compatible_node(NULL, NULL, "warp-power-leds");
if (!np) {
printk(KERN_ERR __FILE__ ": Unable to find leds\n");
return -ENOENT;
}
- for_each_child_of_node(np, child)
- if (strcmp(child->name, "green") == 0)
- green_led = of_get_gpio(child, 0);
- else if (strcmp(child->name, "red") == 0)
- red_led = of_get_gpio(child, 0);
+ for_each_child_of_node(np, child) {
+ for (i = 0; i < ARRAY_SIZE(warp_gpio_led_pins); i++) {
+ led = &warp_gpio_led_pins[i];
+
+ if (!of_node_name_eq(child, led->name))
+ continue;
+
+ if (led->gpiod) {
+ printk(KERN_ERR __FILE__ ": %s led has already been defined\n",
+ led->name);
+ continue;
+ }
+
+ gpio = fwnode_gpiod_get_index(of_fwnode_handle(child),
+ NULL, 0, GPIOD_ASIS,
+ led->name);
+ error = PTR_ERR_OR_ZERO(gpio);
+ if (error) {
+ printk(KERN_ERR __FILE__ ": Failed to get %s led gpio: %d\n",
+ led->name, error);
+ of_node_put(child);
+ goto err_cleanup_pins;
+ }
+
+ led->gpiod = gpio;
+ led_count++;
+ }
+ }
of_node_put(np);
+ /* Skip device registration if no leds have been defined */
+ if (led_count) {
+ error = platform_device_register(&warp_gpio_leds);
+ if (error) {
+ printk(KERN_ERR __FILE__ ": Unable to add leds-gpio: %d\n",
+ error);
+ goto err_cleanup_pins;
+ }
+ }
+
return 0;
+
+err_cleanup_pins:
+ for (i = 0; i < ARRAY_SIZE(warp_gpio_led_pins); i++) {
+ led = &warp_gpio_led_pins[i];
+ gpiod_put(led->gpiod);
+ led->gpiod = NULL;
+ }
+ return error;
}
static void pika_setup_critical_temp(struct device_node *np,
@@ -206,7 +229,7 @@ static void pika_setup_critical_temp(struct device_node *np,
i2c_smbus_write_byte_data(client, 3, 0); /* Tlow */
irq = irq_of_parse_and_map(np, 0);
- if (irq == NO_IRQ) {
+ if (!irq) {
printk(KERN_ERR __FILE__ ": Unable to get ad7414 irq\n");
return;
}
@@ -302,19 +325,6 @@ machine_late_initcall(warp, pika_dtm_start);
#else /* !CONFIG_SENSORS_AD7414 */
-int pika_dtm_register_shutdown(void (*func)(void *arg), void *arg)
-{
- return 0;
-}
-
-int pika_dtm_unregister_shutdown(void (*func)(void *arg), void *arg)
-{
- return 0;
-}
-
machine_late_initcall(warp, warp_post_info);
#endif
-
-EXPORT_SYMBOL(pika_dtm_register_shutdown);
-EXPORT_SYMBOL(pika_dtm_unregister_shutdown);
diff --git a/arch/powerpc/platforms/512x/Kconfig b/arch/powerpc/platforms/512x/Kconfig
index 5aa3f4b5332c..deecede78776 100644
--- a/arch/powerpc/platforms/512x/Kconfig
+++ b/arch/powerpc/platforms/512x/Kconfig
@@ -1,14 +1,20 @@
+# SPDX-License-Identifier: GPL-2.0
config PPC_MPC512x
bool "512x-based boards"
- depends on 6xx
+ depends on PPC_BOOK3S_32
select COMMON_CLK
select FSL_SOC
select IPIC
- select PPC_PCI_CHOICE
+ select HAVE_PCI
select FSL_PCI if PCI
- select ARCH_WANT_OPTIONAL_GPIOLIB
- select USB_EHCI_BIG_ENDIAN_MMIO
- select USB_EHCI_BIG_ENDIAN_DESC
+ select USB_EHCI_BIG_ENDIAN_MMIO if USB_EHCI_HCD
+ select USB_EHCI_BIG_ENDIAN_DESC if USB_EHCI_HCD
+
+config MPC512x_LPBFIFO
+ tristate "MPC512x LocalPlus Bus FIFO driver"
+ depends on PPC_MPC512x && MPC512X_DMA
+ help
+ Enable support for Freescale MPC512x LocalPlus Bus FIFO (SCLPC).
config MPC5121_ADS
bool "Freescale MPC5121E ADS"
diff --git a/arch/powerpc/platforms/512x/Makefile b/arch/powerpc/platforms/512x/Makefile
index 01693121a2b1..2daf22ee26a0 100644
--- a/arch/powerpc/platforms/512x/Makefile
+++ b/arch/powerpc/platforms/512x/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the Freescale PowerPC 512x linux kernel.
#
@@ -5,4 +6,5 @@ obj-$(CONFIG_COMMON_CLK) += clock-commonclk.o
obj-y += mpc512x_shared.o
obj-$(CONFIG_MPC5121_ADS) += mpc5121_ads.o mpc5121_ads_cpld.o
obj-$(CONFIG_MPC512x_GENERIC) += mpc512x_generic.o
+obj-$(CONFIG_MPC512x_LPBFIFO) += mpc512x_lpbfifo.o
obj-$(CONFIG_PDM360NG) += pdm360ng.o
diff --git a/arch/powerpc/platforms/512x/clock-commonclk.c b/arch/powerpc/platforms/512x/clock-commonclk.c
index 6eb614a271fb..079cb3627eac 100644
--- a/arch/powerpc/platforms/512x/clock-commonclk.c
+++ b/arch/powerpc/platforms/512x/clock-commonclk.c
@@ -1,17 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2013 DENX Software Engineering
*
* Gerhard Sittig, <gsi@denx.de>
*
* common clock driver support for the MPC512x platform
- *
- * This is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
*/
#include <linux/bitops.h>
+#include <linux/clk.h>
#include <linux/clk-provider.h>
#include <linux/clkdev.h>
#include <linux/device.h>
@@ -100,7 +97,7 @@ static enum soc_type {
MPC512x_SOC_MPC5125,
} soc;
-static void mpc512x_clk_determine_soc(void)
+static void __init mpc512x_clk_determine_soc(void)
{
if (of_machine_is_compatible("fsl,mpc5121")) {
soc = MPC512x_SOC_MPC5121;
@@ -116,98 +113,98 @@ static void mpc512x_clk_determine_soc(void)
}
}
-static bool soc_has_mbx(void)
+static bool __init soc_has_mbx(void)
{
if (soc == MPC512x_SOC_MPC5121)
return true;
return false;
}
-static bool soc_has_axe(void)
+static bool __init soc_has_axe(void)
{
if (soc == MPC512x_SOC_MPC5125)
return false;
return true;
}
-static bool soc_has_viu(void)
+static bool __init soc_has_viu(void)
{
if (soc == MPC512x_SOC_MPC5125)
return false;
return true;
}
-static bool soc_has_spdif(void)
+static bool __init soc_has_spdif(void)
{
if (soc == MPC512x_SOC_MPC5125)
return false;
return true;
}
-static bool soc_has_pata(void)
+static bool __init soc_has_pata(void)
{
if (soc == MPC512x_SOC_MPC5125)
return false;
return true;
}
-static bool soc_has_sata(void)
+static bool __init soc_has_sata(void)
{
if (soc == MPC512x_SOC_MPC5125)
return false;
return true;
}
-static bool soc_has_pci(void)
+static bool __init soc_has_pci(void)
{
if (soc == MPC512x_SOC_MPC5125)
return false;
return true;
}
-static bool soc_has_fec2(void)
+static bool __init soc_has_fec2(void)
{
if (soc == MPC512x_SOC_MPC5125)
return true;
return false;
}
-static int soc_max_pscnum(void)
+static int __init soc_max_pscnum(void)
{
if (soc == MPC512x_SOC_MPC5125)
return 10;
return 12;
}
-static bool soc_has_sdhc2(void)
+static bool __init soc_has_sdhc2(void)
{
if (soc == MPC512x_SOC_MPC5125)
return true;
return false;
}
-static bool soc_has_nfc_5125(void)
+static bool __init soc_has_nfc_5125(void)
{
if (soc == MPC512x_SOC_MPC5125)
return true;
return false;
}
-static bool soc_has_outclk(void)
+static bool __init soc_has_outclk(void)
{
if (soc == MPC512x_SOC_MPC5125)
return true;
return false;
}
-static bool soc_has_cpmf_0_bypass(void)
+static bool __init soc_has_cpmf_0_bypass(void)
{
if (soc == MPC512x_SOC_MPC5125)
return true;
return false;
}
-static bool soc_has_mclk_mux0_canin(void)
+static bool __init soc_has_mclk_mux0_canin(void)
{
if (soc == MPC512x_SOC_MPC5125)
return true;
@@ -220,7 +217,7 @@ static bool soc_has_mclk_mux0_canin(void)
/* convenience wrappers around the common clk API */
static inline struct clk *mpc512x_clk_fixed(const char *name, int rate)
{
- return clk_register_fixed_rate(NULL, name, NULL, CLK_IS_ROOT, rate);
+ return clk_register_fixed_rate(NULL, name, NULL, 0, rate);
}
static inline struct clk *mpc512x_clk_factor(
@@ -238,6 +235,7 @@ static inline struct clk *mpc512x_clk_divider(
const char *name, const char *parent_name, u8 clkflags,
u32 __iomem *reg, u8 pos, u8 len, int divflags)
{
+ divflags |= CLK_DIVIDER_BIG_ENDIAN;
return clk_register_divider(NULL, name, parent_name, clkflags,
reg, pos, len, divflags, &clklock);
}
@@ -249,7 +247,7 @@ static inline struct clk *mpc512x_clk_divtable(
{
u8 divflags;
- divflags = 0;
+ divflags = CLK_DIVIDER_BIG_ENDIAN;
return clk_register_divider_table(NULL, name, parent_name, 0,
reg, pos, len, divflags,
divtab, &clklock);
@@ -260,10 +258,12 @@ static inline struct clk *mpc512x_clk_gated(
u32 __iomem *reg, u8 pos)
{
int clkflags;
+ u8 gateflags;
clkflags = CLK_SET_RATE_PARENT;
+ gateflags = CLK_GATE_BIG_ENDIAN;
return clk_register_gate(NULL, name, parent_name, clkflags,
- reg, pos, 0, &clklock);
+ reg, pos, gateflags, &clklock);
}
static inline struct clk *mpc512x_clk_muxed(const char *name,
@@ -274,7 +274,7 @@ static inline struct clk *mpc512x_clk_muxed(const char *name,
u8 muxflags;
clkflags = CLK_SET_RATE_PARENT;
- muxflags = 0;
+ muxflags = CLK_MUX_BIG_ENDIAN;
return clk_register_mux(NULL, name,
parent_names, parent_count, clkflags,
reg, pos, len, muxflags, &clklock);
@@ -294,7 +294,7 @@ static inline int get_bit_field(uint32_t __iomem *reg, uint8_t pos, uint8_t len)
}
/* get the SPMF and translate it into the "sys pll" multiplier */
-static int get_spmf_mult(void)
+static int __init get_spmf_mult(void)
{
static int spmf_to_mult[] = {
68, 1, 12, 16, 20, 24, 28, 32,
@@ -312,7 +312,7 @@ static int get_spmf_mult(void)
* values returned from here are a multiple of the real factor since the
* divide ratio is fractional
*/
-static int get_sys_div_x2(void)
+static int __init get_sys_div_x2(void)
{
static int sysdiv_code_to_x2[] = {
4, 5, 6, 7, 8, 9, 10, 14,
@@ -333,7 +333,7 @@ static int get_sys_div_x2(void)
* values returned from here are a multiple of the real factor since the
* multiplier ratio is fractional
*/
-static int get_cpmf_mult_x2(void)
+static int __init get_cpmf_mult_x2(void)
{
static int cpmf_to_mult_x36[] = {
/* 0b000 is "times 36" */
@@ -362,7 +362,7 @@ static int get_cpmf_mult_x2(void)
*/
/* applies to the IPS_DIV, and PCI_DIV values */
-static struct clk_div_table divtab_2346[] = {
+static const struct clk_div_table divtab_2346[] = {
{ .val = 2, .div = 2, },
{ .val = 3, .div = 3, },
{ .val = 4, .div = 4, },
@@ -371,7 +371,7 @@ static struct clk_div_table divtab_2346[] = {
};
/* applies to the MBX_DIV, LPC_DIV, and NFC_DIV values */
-static struct clk_div_table divtab_1234[] = {
+static const struct clk_div_table divtab_1234[] = {
{ .val = 1, .div = 1, },
{ .val = 2, .div = 2, },
{ .val = 3, .div = 3, },
@@ -379,7 +379,7 @@ static struct clk_div_table divtab_1234[] = {
{ .div = 0, },
};
-static int get_freq_from_dt(char *propname)
+static int __init get_freq_from_dt(char *propname)
{
struct device_node *np;
const unsigned int *prop;
@@ -396,7 +396,7 @@ static int get_freq_from_dt(char *propname)
return val;
}
-static void mpc512x_clk_preset_data(void)
+static void __init mpc512x_clk_preset_data(void)
{
size_t i;
@@ -418,7 +418,7 @@ static void mpc512x_clk_preset_data(void)
* SYS -> CSB -> IPS) from the REF clock rate and the returned mul/div
* values
*/
-static void mpc512x_clk_setup_ref_clock(struct device_node *np, int bus_freq,
+static void __init mpc512x_clk_setup_ref_clock(struct device_node *np, int bus_freq,
int *sys_mul, int *sys_div,
int *ips_div)
{
@@ -592,7 +592,7 @@ static struct mclk_setup_data mclk_outclk_data[] = {
};
/* setup the MCLK clock subtree of an individual PSC/MSCAN/SPDIF */
-static void mpc512x_clk_setup_mclk(struct mclk_setup_data *entry, size_t idx)
+static void __init mpc512x_clk_setup_mclk(struct mclk_setup_data *entry, size_t idx)
{
size_t clks_idx_pub, clks_idx_int;
u32 __iomem *mccr_reg; /* MCLK control register (mux, en, div) */
@@ -663,7 +663,7 @@ static void mpc512x_clk_setup_mclk(struct mclk_setup_data *entry, size_t idx)
* the PSC/MSCAN/SPDIF (serial drivers et al) need the MCLK
* for their bitrate
* - in the absence of "aliases" for clocks we need to create
- * individial 'struct clk' items for whatever might get
+ * individual 'struct clk' items for whatever might get
* referenced or looked up, even if several of those items are
* identical from the logical POV (their rate value)
* - for easier future maintenance and for better reflection of
@@ -701,7 +701,7 @@ static void mpc512x_clk_setup_mclk(struct mclk_setup_data *entry, size_t idx)
/* }}} MCLK helpers */
-static void mpc512x_clk_setup_clock_tree(struct device_node *np, int busfreq)
+static void __init mpc512x_clk_setup_clock_tree(struct device_node *np, int busfreq)
{
int sys_mul, sys_div, ips_div;
int mul, div;
@@ -718,7 +718,7 @@ static void mpc512x_clk_setup_clock_tree(struct device_node *np, int busfreq)
* most one of a mux, div, and gate each into one 'struct clk'
* item
* - PSC/MSCAN/SPDIF clock generation OTOH already is very
- * specific and cannot get mapped to componsites (at least not
+ * specific and cannot get mapped to composites (at least not
* a single one, maybe two of them, but then some of these
* intermediate clock signals get referenced elsewhere (e.g.
* in the clock frequency measurement, CFM) and thus need
@@ -937,7 +937,7 @@ static void mpc512x_clk_setup_clock_tree(struct device_node *np, int busfreq)
* registers the set of public clocks (those listed in the dt-bindings/
* header file) for OF lookups, keeps the intermediates private to us
*/
-static void mpc5121_clk_register_of_provider(struct device_node *np)
+static void __init mpc5121_clk_register_of_provider(struct device_node *np)
{
clk_data.clks = clks;
clk_data.clk_num = MPC512x_CLK_LAST_PUBLIC + 1; /* _not_ ARRAY_SIZE() */
@@ -948,9 +948,9 @@ static void mpc5121_clk_register_of_provider(struct device_node *np)
* temporary support for the period of time between introduction of CCF
* support and the adjustment of peripheral drivers to OF based lookups
*/
-static void mpc5121_clk_provide_migration_support(void)
+static void __init mpc5121_clk_provide_migration_support(void)
{
-
+ struct device_node *np;
/*
* pre-enable those clock items which are not yet appropriately
* acquired by their peripheral driver
@@ -970,7 +970,9 @@ static void mpc5121_clk_provide_migration_support(void)
* unused and so it gets disabled
*/
clk_prepare_enable(clks[MPC512x_CLK_PSC3_MCLK]);/* serial console */
- if (of_find_compatible_node(NULL, "pci", "fsl,mpc5121-pci"))
+ np = of_find_compatible_node(NULL, "pci", "fsl,mpc5121-pci");
+ of_node_put(np);
+ if (np)
clk_prepare_enable(clks[MPC512x_CLK_PCI]);
}
@@ -984,7 +986,7 @@ static void mpc5121_clk_provide_migration_support(void)
#define NODE_PREP do { \
of_address_to_resource(np, 0, &res); \
- snprintf(devname, sizeof(devname), "%08x.%s", res.start, np->name); \
+ snprintf(devname, sizeof(devname), "%pa.%s", &res.start, np->name); \
} while (0)
#define NODE_CHK(clkname, clkitem, regnode, regflag) do { \
@@ -1009,7 +1011,7 @@ static void mpc5121_clk_provide_migration_support(void)
* case of not yet adjusted device tree data, where clock related specs
* are missing)
*/
-static void mpc5121_clk_provide_backwards_compat(void)
+static void __init mpc5121_clk_provide_backwards_compat(void)
{
enum did_reg_flags {
DID_REG_PSC = BIT(0),
@@ -1168,6 +1170,11 @@ static void mpc5121_clk_provide_backwards_compat(void)
}
}
+/*
+ * The "fixed-clock" nodes (which includes the oscillator node if the board's
+ * DT provides one) has already been scanned by the of_clk_init() in
+ * time_init().
+ */
int __init mpc5121_clk_init(void)
{
struct device_node *clk_np;
@@ -1187,12 +1194,6 @@ int __init mpc5121_clk_init(void)
mpc512x_clk_preset_data();
/*
- * have the device tree scanned for "fixed-clock" nodes (which
- * includes the oscillator node if the board's DT provides one)
- */
- of_clk_init(NULL);
-
- /*
* add a dummy clock for those situations where a clock spec is
* required yet no real clock is involved
*/
@@ -1209,6 +1210,8 @@ int __init mpc5121_clk_init(void)
/* register as an OF clock provider */
mpc5121_clk_register_of_provider(clk_np);
+ of_node_put(clk_np);
+
/*
* unbreak not yet adjusted peripheral drivers during migration
* towards fully operational common clock support, and allow
diff --git a/arch/powerpc/platforms/512x/mpc5121_ads.c b/arch/powerpc/platforms/512x/mpc5121_ads.c
index 3e90ece10ae9..a18f85b3ef36 100644
--- a/arch/powerpc/platforms/512x/mpc5121_ads.c
+++ b/arch/powerpc/platforms/512x/mpc5121_ads.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2007, 2008 Freescale Semiconductor, Inc. All rights reserved.
*
@@ -5,21 +6,14 @@
*
* Description:
* MPC5121 ADS board setup
- *
- * This is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
*/
#include <linux/kernel.h>
#include <linux/io.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
#include <asm/machdep.h>
#include <asm/ipic.h>
-#include <asm/prom.h>
#include <asm/time.h>
#include <sysdev/fsl_pci.h>
@@ -29,21 +23,23 @@
static void __init mpc5121_ads_setup_arch(void)
{
-#ifdef CONFIG_PCI
- struct device_node *np;
-#endif
printk(KERN_INFO "MPC5121 ADS board from Freescale Semiconductor\n");
/*
* cpld regs are needed early
*/
mpc5121_ads_cpld_map();
+ mpc512x_setup_arch();
+}
+
+static void __init mpc5121_ads_setup_pci(void)
+{
#ifdef CONFIG_PCI
+ struct device_node *np;
+
for_each_compatible_node(np, "pci", "fsl,mpc5121-pci")
mpc83xx_add_bridge(np);
#endif
-
- mpc512x_setup_arch();
}
static void __init mpc5121_ads_init_IRQ(void)
@@ -57,19 +53,19 @@ static void __init mpc5121_ads_init_IRQ(void)
*/
static int __init mpc5121_ads_probe(void)
{
- unsigned long root = of_get_flat_dt_root();
+ mpc512x_init_early();
- return of_flat_dt_is_compatible(root, "fsl,mpc5121ads");
+ return 1;
}
define_machine(mpc5121_ads) {
.name = "MPC5121 ADS",
+ .compatible = "fsl,mpc5121ads",
.probe = mpc5121_ads_probe,
.setup_arch = mpc5121_ads_setup_arch,
+ .discover_phbs = mpc5121_ads_setup_pci,
.init = mpc512x_init,
- .init_early = mpc512x_init_early,
.init_IRQ = mpc5121_ads_init_IRQ,
.get_irq = ipic_get_irq,
- .calibrate_decr = generic_calibrate_decr,
.restart = mpc512x_restart,
};
diff --git a/arch/powerpc/platforms/512x/mpc5121_ads.h b/arch/powerpc/platforms/512x/mpc5121_ads.h
index 662076cfee2f..c88dea828cb2 100644
--- a/arch/powerpc/platforms/512x/mpc5121_ads.h
+++ b/arch/powerpc/platforms/512x/mpc5121_ads.h
@@ -1,11 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
* Prototypes for ADS5121 specific code
*/
diff --git a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
index ca3a062ed1b9..2cf3c6237337 100644
--- a/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
+++ b/arch/powerpc/platforms/512x/mpc5121_ads_cpld.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2008 Freescale Semiconductor, Inc. All rights reserved.
*
@@ -5,11 +6,6 @@
*
* Description:
* MPC5121ADS CPLD irq handling
- *
- * This is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
*/
#undef DEBUG
@@ -18,7 +14,10 @@
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/io.h>
-#include <asm/prom.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+
+#include "mpc5121_ads.h"
static struct device_node *cpld_pic_node;
static struct irq_domain *cpld_pic_host;
@@ -85,11 +84,10 @@ static struct irq_chip cpld_pic = {
.irq_unmask = cpld_unmask_irq,
};
-static int
+static unsigned int
cpld_pic_get_irq(int offset, u8 ignore, u8 __iomem *statusp,
u8 __iomem *maskp)
{
- int cpld_irq;
u8 status = in_8(statusp);
u8 mask = in_8(maskp);
@@ -97,33 +95,33 @@ cpld_pic_get_irq(int offset, u8 ignore, u8 __iomem *statusp,
status |= (ignore | mask);
if (status == 0xff)
- return NO_IRQ;
+ return ~0;
- cpld_irq = ffz(status) + offset;
-
- return irq_linear_revmap(cpld_pic_host, cpld_irq);
+ return ffz(status) + offset;
}
-static void
-cpld_pic_cascade(unsigned int irq, struct irq_desc *desc)
+static void cpld_pic_cascade(struct irq_desc *desc)
{
- irq = cpld_pic_get_irq(0, PCI_IGNORE, &cpld_regs->pci_status,
+ unsigned int hwirq;
+
+ hwirq = cpld_pic_get_irq(0, PCI_IGNORE, &cpld_regs->pci_status,
&cpld_regs->pci_mask);
- if (irq != NO_IRQ) {
- generic_handle_irq(irq);
+ if (hwirq != ~0) {
+ generic_handle_domain_irq(cpld_pic_host, hwirq);
return;
}
- irq = cpld_pic_get_irq(8, MISC_IGNORE, &cpld_regs->misc_status,
+ hwirq = cpld_pic_get_irq(8, MISC_IGNORE, &cpld_regs->misc_status,
&cpld_regs->misc_mask);
- if (irq != NO_IRQ) {
- generic_handle_irq(irq);
+ if (hwirq != ~0) {
+ generic_handle_domain_irq(cpld_pic_host, hwirq);
return;
}
}
static int
-cpld_pic_host_match(struct irq_domain *h, struct device_node *node)
+cpld_pic_host_match(struct irq_domain *h, struct device_node *node,
+ enum irq_domain_bus_token bus_token)
{
return cpld_pic_node == node;
}
@@ -175,7 +173,7 @@ mpc5121_ads_cpld_pic_init(void)
goto end;
cascade_irq = irq_of_parse_and_map(np, 0);
- if (cascade_irq == NO_IRQ)
+ if (!cascade_irq)
goto end;
/*
@@ -190,7 +188,8 @@ mpc5121_ads_cpld_pic_init(void)
cpld_pic_node = of_node_get(np);
- cpld_pic_host = irq_domain_add_linear(np, 16, &cpld_pic_host_ops, NULL);
+ cpld_pic_host = irq_domain_create_linear(of_fwnode_handle(np), 16,
+ &cpld_pic_host_ops, NULL);
if (!cpld_pic_host) {
printk(KERN_ERR "CPLD PIC: failed to allocate irq host!\n");
goto end;
diff --git a/arch/powerpc/platforms/512x/mpc512x.h b/arch/powerpc/platforms/512x/mpc512x.h
index cc97f022d028..d2cb06e3a436 100644
--- a/arch/powerpc/platforms/512x/mpc512x.h
+++ b/arch/powerpc/platforms/512x/mpc512x.h
@@ -1,11 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved.
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
* Prototypes for MPC512x shared code
*/
@@ -16,8 +12,7 @@ extern void __init mpc512x_init_early(void);
extern void __init mpc512x_init(void);
extern void __init mpc512x_setup_arch(void);
extern int __init mpc5121_clk_init(void);
-extern const char *mpc512x_select_psc_compat(void);
-extern const char *mpc512x_select_reset_compat(void);
-extern void mpc512x_restart(char *cmd);
+const char *__init mpc512x_select_psc_compat(void);
+extern void __noreturn mpc512x_restart(char *cmd);
#endif /* __MPC512X_H__ */
diff --git a/arch/powerpc/platforms/512x/mpc512x_generic.c b/arch/powerpc/platforms/512x/mpc512x_generic.c
index ce71408781a0..d4fa6c302ccf 100644
--- a/arch/powerpc/platforms/512x/mpc512x_generic.c
+++ b/arch/powerpc/platforms/512x/mpc512x_generic.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2007,2008 Freescale Semiconductor, Inc. All rights reserved.
*
@@ -5,20 +6,13 @@
*
* Description:
* MPC512x SoC setup
- *
- * This is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
*/
#include <linux/kernel.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
#include <asm/machdep.h>
#include <asm/ipic.h>
-#include <asm/prom.h>
#include <asm/time.h>
#include "mpc512x.h"
@@ -38,17 +32,18 @@ static const char * const board[] __initconst = {
*/
static int __init mpc512x_generic_probe(void)
{
- return of_flat_dt_match(of_get_flat_dt_root(), board);
+ mpc512x_init_early();
+
+ return 1;
}
define_machine(mpc512x_generic) {
.name = "MPC512x generic",
+ .compatibles = board,
.probe = mpc512x_generic_probe,
.init = mpc512x_init,
- .init_early = mpc512x_init_early,
.setup_arch = mpc512x_setup_arch,
.init_IRQ = mpc512x_init_IRQ,
.get_irq = ipic_get_irq,
- .calibrate_decr = generic_calibrate_decr,
.restart = mpc512x_restart,
};
diff --git a/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c b/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c
new file mode 100644
index 000000000000..f251e0f68262
--- /dev/null
+++ b/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c
@@ -0,0 +1,516 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * The driver for Freescale MPC512x LocalPlus Bus FIFO
+ * (called SCLPC in the Reference Manual).
+ *
+ * Copyright (C) 2013-2015 Alexander Popov <alex.popov@linux.com>.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <asm/mpc5121.h>
+#include <asm/io.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-direction.h>
+#include <linux/dma-mapping.h>
+
+#define DRV_NAME "mpc512x_lpbfifo"
+
+struct cs_range {
+ u32 csnum;
+ u32 base; /* must be zero */
+ u32 addr;
+ u32 size;
+};
+
+static struct lpbfifo_data {
+ spinlock_t lock; /* for protecting lpbfifo_data */
+ phys_addr_t regs_phys;
+ resource_size_t regs_size;
+ struct mpc512x_lpbfifo __iomem *regs;
+ int irq;
+ struct cs_range *cs_ranges;
+ size_t cs_n;
+ struct dma_chan *chan;
+ struct mpc512x_lpbfifo_request *req;
+ dma_addr_t ram_bus_addr;
+ bool wait_lpbfifo_irq;
+ bool wait_lpbfifo_callback;
+} lpbfifo;
+
+/*
+ * A data transfer from RAM to some device on LPB is finished
+ * when both mpc512x_lpbfifo_irq() and mpc512x_lpbfifo_callback()
+ * have been called. We execute the callback registered in
+ * mpc512x_lpbfifo_request just after that.
+ * But for a data transfer from some device on LPB to RAM we don't enable
+ * LPBFIFO interrupt because clearing MPC512X_SCLPC_SUCCESS interrupt flag
+ * automatically disables LPBFIFO reading request to the DMA controller
+ * and the data transfer hangs. So the callback registered in
+ * mpc512x_lpbfifo_request is executed at the end of mpc512x_lpbfifo_callback().
+ */
+
+/*
+ * mpc512x_lpbfifo_irq - IRQ handler for LPB FIFO
+ */
+static irqreturn_t mpc512x_lpbfifo_irq(int irq, void *param)
+{
+ struct device *dev = (struct device *)param;
+ struct mpc512x_lpbfifo_request *req = NULL;
+ unsigned long flags;
+ u32 status;
+
+ spin_lock_irqsave(&lpbfifo.lock, flags);
+
+ if (!lpbfifo.regs)
+ goto end;
+
+ req = lpbfifo.req;
+ if (!req || req->dir == MPC512X_LPBFIFO_REQ_DIR_READ) {
+ dev_err(dev, "bogus LPBFIFO IRQ\n");
+ goto end;
+ }
+
+ status = in_be32(&lpbfifo.regs->status);
+ if (status != MPC512X_SCLPC_SUCCESS) {
+ dev_err(dev, "DMA transfer from RAM to peripheral failed\n");
+ out_be32(&lpbfifo.regs->enable,
+ MPC512X_SCLPC_RESET | MPC512X_SCLPC_FIFO_RESET);
+ goto end;
+ }
+ /* Clear the interrupt flag */
+ out_be32(&lpbfifo.regs->status, MPC512X_SCLPC_SUCCESS);
+
+ lpbfifo.wait_lpbfifo_irq = false;
+
+ if (lpbfifo.wait_lpbfifo_callback)
+ goto end;
+
+ /* Transfer is finished, set the FIFO as idle */
+ lpbfifo.req = NULL;
+
+ spin_unlock_irqrestore(&lpbfifo.lock, flags);
+
+ if (req->callback)
+ req->callback(req);
+
+ return IRQ_HANDLED;
+
+ end:
+ spin_unlock_irqrestore(&lpbfifo.lock, flags);
+ return IRQ_HANDLED;
+}
+
+/*
+ * mpc512x_lpbfifo_callback is called by DMA driver when
+ * DMA transaction is finished.
+ */
+static void mpc512x_lpbfifo_callback(void *param)
+{
+ unsigned long flags;
+ struct mpc512x_lpbfifo_request *req = NULL;
+ enum dma_data_direction dir;
+
+ spin_lock_irqsave(&lpbfifo.lock, flags);
+
+ if (!lpbfifo.regs) {
+ spin_unlock_irqrestore(&lpbfifo.lock, flags);
+ return;
+ }
+
+ req = lpbfifo.req;
+ if (!req) {
+ pr_err("bogus LPBFIFO callback\n");
+ spin_unlock_irqrestore(&lpbfifo.lock, flags);
+ return;
+ }
+
+ /* Release the mapping */
+ if (req->dir == MPC512X_LPBFIFO_REQ_DIR_WRITE)
+ dir = DMA_TO_DEVICE;
+ else
+ dir = DMA_FROM_DEVICE;
+ dma_unmap_single(lpbfifo.chan->device->dev,
+ lpbfifo.ram_bus_addr, req->size, dir);
+
+ lpbfifo.wait_lpbfifo_callback = false;
+
+ if (!lpbfifo.wait_lpbfifo_irq) {
+ /* Transfer is finished, set the FIFO as idle */
+ lpbfifo.req = NULL;
+
+ spin_unlock_irqrestore(&lpbfifo.lock, flags);
+
+ if (req->callback)
+ req->callback(req);
+ } else {
+ spin_unlock_irqrestore(&lpbfifo.lock, flags);
+ }
+}
+
+static int mpc512x_lpbfifo_kick(void)
+{
+ u32 bits;
+ bool no_incr = false;
+ u32 bpt = 32; /* max bytes per LPBFIFO transaction involving DMA */
+ u32 cs = 0;
+ size_t i;
+ struct dma_device *dma_dev = NULL;
+ struct scatterlist sg;
+ enum dma_data_direction dir;
+ struct dma_slave_config dma_conf = {};
+ struct dma_async_tx_descriptor *dma_tx = NULL;
+ dma_cookie_t cookie;
+ int ret;
+
+ /*
+ * 1. Fit the requirements:
+ * - the packet size must be a multiple of 4 since FIFO Data Word
+ * Register allows only full-word access according the Reference
+ * Manual;
+ * - the physical address of the device on LPB and the packet size
+ * must be aligned on BPT (bytes per transaction) or 8-bytes
+ * boundary according the Reference Manual;
+ * - but we choose DMA maxburst equal (or very close to) BPT to prevent
+ * DMA controller from overtaking FIFO and causing FIFO underflow
+ * error. So we force the packet size to be aligned on BPT boundary
+ * not to confuse DMA driver which requires the packet size to be
+ * aligned on maxburst boundary;
+ * - BPT should be set to the LPB device port size for operation with
+ * disabled auto-incrementing according Reference Manual.
+ */
+ if (lpbfifo.req->size == 0 || !IS_ALIGNED(lpbfifo.req->size, 4))
+ return -EINVAL;
+
+ if (lpbfifo.req->portsize != LPB_DEV_PORTSIZE_UNDEFINED) {
+ bpt = lpbfifo.req->portsize;
+ no_incr = true;
+ }
+
+ while (bpt > 1) {
+ if (IS_ALIGNED(lpbfifo.req->dev_phys_addr, min(bpt, 0x8u)) &&
+ IS_ALIGNED(lpbfifo.req->size, bpt)) {
+ break;
+ }
+
+ if (no_incr)
+ return -EINVAL;
+
+ bpt >>= 1;
+ }
+ dma_conf.dst_maxburst = max(bpt, 0x4u) / 4;
+ dma_conf.src_maxburst = max(bpt, 0x4u) / 4;
+
+ for (i = 0; i < lpbfifo.cs_n; i++) {
+ phys_addr_t cs_start = lpbfifo.cs_ranges[i].addr;
+ phys_addr_t cs_end = cs_start + lpbfifo.cs_ranges[i].size;
+ phys_addr_t access_start = lpbfifo.req->dev_phys_addr;
+ phys_addr_t access_end = access_start + lpbfifo.req->size;
+
+ if (access_start >= cs_start && access_end <= cs_end) {
+ cs = lpbfifo.cs_ranges[i].csnum;
+ break;
+ }
+ }
+ if (i == lpbfifo.cs_n)
+ return -EFAULT;
+
+ /* 2. Prepare DMA */
+ dma_dev = lpbfifo.chan->device;
+
+ if (lpbfifo.req->dir == MPC512X_LPBFIFO_REQ_DIR_WRITE) {
+ dir = DMA_TO_DEVICE;
+ dma_conf.direction = DMA_MEM_TO_DEV;
+ dma_conf.dst_addr = lpbfifo.regs_phys +
+ offsetof(struct mpc512x_lpbfifo, data_word);
+ } else {
+ dir = DMA_FROM_DEVICE;
+ dma_conf.direction = DMA_DEV_TO_MEM;
+ dma_conf.src_addr = lpbfifo.regs_phys +
+ offsetof(struct mpc512x_lpbfifo, data_word);
+ }
+ dma_conf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+ dma_conf.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+
+ /* Make DMA channel work with LPB FIFO data register */
+ if (dma_dev->device_config(lpbfifo.chan, &dma_conf))
+ return -EINVAL;
+
+ sg_init_table(&sg, 1);
+
+ sg_dma_address(&sg) = dma_map_single(dma_dev->dev,
+ lpbfifo.req->ram_virt_addr, lpbfifo.req->size, dir);
+ if (dma_mapping_error(dma_dev->dev, sg_dma_address(&sg)))
+ return -EFAULT;
+
+ lpbfifo.ram_bus_addr = sg_dma_address(&sg); /* For freeing later */
+
+ sg_dma_len(&sg) = lpbfifo.req->size;
+
+ dma_tx = dmaengine_prep_slave_sg(lpbfifo.chan, &sg,
+ 1, dma_conf.direction, 0);
+ if (!dma_tx) {
+ ret = -ENOSPC;
+ goto err_dma_prep;
+ }
+ dma_tx->callback = mpc512x_lpbfifo_callback;
+ dma_tx->callback_param = NULL;
+
+ /* 3. Prepare FIFO */
+ out_be32(&lpbfifo.regs->enable,
+ MPC512X_SCLPC_RESET | MPC512X_SCLPC_FIFO_RESET);
+ out_be32(&lpbfifo.regs->enable, 0x0);
+
+ /*
+ * Configure the watermarks for write operation (RAM->DMA->FIFO->dev):
+ * - high watermark 7 words according the Reference Manual,
+ * - low watermark 512 bytes (half of the FIFO).
+ * These watermarks don't work for read operation since the
+ * MPC512X_SCLPC_FLUSH bit is set (according the Reference Manual).
+ */
+ out_be32(&lpbfifo.regs->fifo_ctrl, MPC512X_SCLPC_FIFO_CTRL(0x7));
+ out_be32(&lpbfifo.regs->fifo_alarm, MPC512X_SCLPC_FIFO_ALARM(0x200));
+
+ /*
+ * Start address is a physical address of the region which belongs
+ * to the device on the LocalPlus Bus
+ */
+ out_be32(&lpbfifo.regs->start_addr, lpbfifo.req->dev_phys_addr);
+
+ /*
+ * Configure chip select, transfer direction, address increment option
+ * and bytes per transaction option
+ */
+ bits = MPC512X_SCLPC_CS(cs);
+ if (lpbfifo.req->dir == MPC512X_LPBFIFO_REQ_DIR_READ)
+ bits |= MPC512X_SCLPC_READ | MPC512X_SCLPC_FLUSH;
+ if (no_incr)
+ bits |= MPC512X_SCLPC_DAI;
+ bits |= MPC512X_SCLPC_BPT(bpt);
+ out_be32(&lpbfifo.regs->ctrl, bits);
+
+ /* Unmask irqs */
+ bits = MPC512X_SCLPC_ENABLE | MPC512X_SCLPC_ABORT_INT_ENABLE;
+ if (lpbfifo.req->dir == MPC512X_LPBFIFO_REQ_DIR_WRITE)
+ bits |= MPC512X_SCLPC_NORM_INT_ENABLE;
+ else
+ lpbfifo.wait_lpbfifo_irq = false;
+
+ out_be32(&lpbfifo.regs->enable, bits);
+
+ /* 4. Set packet size and kick FIFO off */
+ bits = lpbfifo.req->size | MPC512X_SCLPC_START;
+ out_be32(&lpbfifo.regs->pkt_size, bits);
+
+ /* 5. Finally kick DMA off */
+ cookie = dma_tx->tx_submit(dma_tx);
+ if (dma_submit_error(cookie)) {
+ ret = -ENOSPC;
+ goto err_dma_submit;
+ }
+
+ return 0;
+
+ err_dma_submit:
+ out_be32(&lpbfifo.regs->enable,
+ MPC512X_SCLPC_RESET | MPC512X_SCLPC_FIFO_RESET);
+ err_dma_prep:
+ dma_unmap_single(dma_dev->dev, sg_dma_address(&sg),
+ lpbfifo.req->size, dir);
+ return ret;
+}
+
+static int mpc512x_lpbfifo_submit_locked(struct mpc512x_lpbfifo_request *req)
+{
+ int ret = 0;
+
+ if (!lpbfifo.regs)
+ return -ENODEV;
+
+ /* Check whether a transfer is in progress */
+ if (lpbfifo.req)
+ return -EBUSY;
+
+ lpbfifo.wait_lpbfifo_irq = true;
+ lpbfifo.wait_lpbfifo_callback = true;
+ lpbfifo.req = req;
+
+ ret = mpc512x_lpbfifo_kick();
+ if (ret != 0)
+ lpbfifo.req = NULL; /* Set the FIFO as idle */
+
+ return ret;
+}
+
+int mpc512x_lpbfifo_submit(struct mpc512x_lpbfifo_request *req)
+{
+ unsigned long flags;
+ int ret = 0;
+
+ spin_lock_irqsave(&lpbfifo.lock, flags);
+ ret = mpc512x_lpbfifo_submit_locked(req);
+ spin_unlock_irqrestore(&lpbfifo.lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL(mpc512x_lpbfifo_submit);
+
+/*
+ * LPBFIFO driver uses "ranges" property of "localbus" device tree node
+ * for being able to determine the chip select number of a client device
+ * ordering a DMA transfer.
+ */
+static int get_cs_ranges(struct device *dev)
+{
+ int ret = -ENODEV;
+ struct device_node *lb_node;
+ size_t i = 0;
+ struct of_range_parser parser;
+ struct of_range range;
+
+ lb_node = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-localbus");
+ if (!lb_node)
+ return ret;
+
+ of_range_parser_init(&parser, lb_node);
+ lpbfifo.cs_n = of_range_count(&parser);
+
+ lpbfifo.cs_ranges = devm_kcalloc(dev, lpbfifo.cs_n,
+ sizeof(struct cs_range), GFP_KERNEL);
+ if (!lpbfifo.cs_ranges)
+ goto end;
+
+ for_each_of_range(&parser, &range) {
+ u32 base = lower_32_bits(range.bus_addr);
+ if (base)
+ goto end;
+
+ lpbfifo.cs_ranges[i].csnum = upper_32_bits(range.bus_addr);
+ lpbfifo.cs_ranges[i].base = base;
+ lpbfifo.cs_ranges[i].addr = range.cpu_addr;
+ lpbfifo.cs_ranges[i].size = range.size;
+ i++;
+ }
+
+ ret = 0;
+
+ end:
+ of_node_put(lb_node);
+ return ret;
+}
+
+static int mpc512x_lpbfifo_probe(struct platform_device *pdev)
+{
+ struct resource r;
+ int ret = 0;
+
+ memset(&lpbfifo, 0, sizeof(struct lpbfifo_data));
+ spin_lock_init(&lpbfifo.lock);
+
+ lpbfifo.chan = dma_request_chan(&pdev->dev, "rx-tx");
+ if (IS_ERR(lpbfifo.chan))
+ return PTR_ERR(lpbfifo.chan);
+
+ if (of_address_to_resource(pdev->dev.of_node, 0, &r) != 0) {
+ dev_err(&pdev->dev, "bad 'reg' in 'sclpc' device tree node\n");
+ ret = -ENODEV;
+ goto err0;
+ }
+
+ lpbfifo.regs_phys = r.start;
+ lpbfifo.regs_size = resource_size(&r);
+
+ if (!devm_request_mem_region(&pdev->dev, lpbfifo.regs_phys,
+ lpbfifo.regs_size, DRV_NAME)) {
+ dev_err(&pdev->dev, "unable to request region\n");
+ ret = -EBUSY;
+ goto err0;
+ }
+
+ lpbfifo.regs = devm_ioremap(&pdev->dev,
+ lpbfifo.regs_phys, lpbfifo.regs_size);
+ if (!lpbfifo.regs) {
+ dev_err(&pdev->dev, "mapping registers failed\n");
+ ret = -ENOMEM;
+ goto err0;
+ }
+
+ out_be32(&lpbfifo.regs->enable,
+ MPC512X_SCLPC_RESET | MPC512X_SCLPC_FIFO_RESET);
+
+ if (get_cs_ranges(&pdev->dev) != 0) {
+ dev_err(&pdev->dev, "bad '/localbus' device tree node\n");
+ ret = -ENODEV;
+ goto err0;
+ }
+
+ lpbfifo.irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
+ if (!lpbfifo.irq) {
+ dev_err(&pdev->dev, "mapping irq failed\n");
+ ret = -ENODEV;
+ goto err0;
+ }
+
+ if (request_irq(lpbfifo.irq, mpc512x_lpbfifo_irq, 0,
+ DRV_NAME, &pdev->dev) != 0) {
+ dev_err(&pdev->dev, "requesting irq failed\n");
+ ret = -ENODEV;
+ goto err1;
+ }
+
+ dev_info(&pdev->dev, "probe succeeded\n");
+ return 0;
+
+ err1:
+ irq_dispose_mapping(lpbfifo.irq);
+ err0:
+ dma_release_channel(lpbfifo.chan);
+ return ret;
+}
+
+static void mpc512x_lpbfifo_remove(struct platform_device *pdev)
+{
+ unsigned long flags;
+ struct dma_device *dma_dev = lpbfifo.chan->device;
+ struct mpc512x_lpbfifo __iomem *regs = NULL;
+
+ spin_lock_irqsave(&lpbfifo.lock, flags);
+ regs = lpbfifo.regs;
+ lpbfifo.regs = NULL;
+ spin_unlock_irqrestore(&lpbfifo.lock, flags);
+
+ dma_dev->device_terminate_all(lpbfifo.chan);
+ out_be32(&regs->enable, MPC512X_SCLPC_RESET | MPC512X_SCLPC_FIFO_RESET);
+
+ free_irq(lpbfifo.irq, &pdev->dev);
+ irq_dispose_mapping(lpbfifo.irq);
+ dma_release_channel(lpbfifo.chan);
+}
+
+static const struct of_device_id mpc512x_lpbfifo_match[] = {
+ { .compatible = "fsl,mpc512x-lpbfifo", },
+ {},
+};
+MODULE_DEVICE_TABLE(of, mpc512x_lpbfifo_match);
+
+static struct platform_driver mpc512x_lpbfifo_driver = {
+ .probe = mpc512x_lpbfifo_probe,
+ .remove = mpc512x_lpbfifo_remove,
+ .driver = {
+ .name = DRV_NAME,
+ .of_match_table = mpc512x_lpbfifo_match,
+ },
+};
+
+module_platform_driver(mpc512x_lpbfifo_driver);
+
+MODULE_AUTHOR("Alexander Popov <alex.popov@linux.com>");
+MODULE_DESCRIPTION("MPC512x LocalPlus Bus FIFO device driver");
+MODULE_LICENSE("GPL v2");
diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c
index adb95f03d4d4..8c1f3b629fc7 100644
--- a/arch/powerpc/platforms/512x/mpc512x_shared.c
+++ b/arch/powerpc/platforms/512x/mpc512x_shared.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2007,2008 Freescale Semiconductor, Inc. All rights reserved.
*
@@ -5,26 +6,21 @@
*
* Description:
* MPC512x Shared code
- *
- * This is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
*/
#include <linux/clk.h>
#include <linux/kernel.h>
#include <linux/io.h>
#include <linux/irq.h>
+#include <linux/of_address.h>
#include <linux/of_platform.h>
#include <linux/fsl-diu-fb.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <sysdev/fsl_soc.h>
#include <asm/cacheflush.h>
#include <asm/machdep.h>
#include <asm/ipic.h>
-#include <asm/prom.h>
#include <asm/time.h>
#include <asm/mpc5121.h>
#include <asm/mpc52xx_psc.h>
@@ -33,21 +29,7 @@
static struct mpc512x_reset_module __iomem *reset_module_base;
-static void __init mpc512x_restart_init(void)
-{
- struct device_node *np;
- const char *reset_compat;
-
- reset_compat = mpc512x_select_reset_compat();
- np = of_find_compatible_node(NULL, NULL, reset_compat);
- if (!np)
- return;
-
- reset_module_base = of_iomap(np, 0);
- of_node_put(np);
-}
-
-void mpc512x_restart(char *cmd)
+void __noreturn mpc512x_restart(char *cmd)
{
if (reset_module_base) {
/* Enable software reset "RSTE" */
@@ -188,7 +170,7 @@ static struct fsl_diu_shared_fb __attribute__ ((__aligned__(8))) diu_shared_fb;
static inline void mpc512x_free_bootmem(struct page *page)
{
BUG_ON(PageTail(page));
- BUG_ON(atomic_read(&page->_count) > 1);
+ BUG_ON(page_ref_count(page) > 1);
free_reserved_page(page);
}
@@ -293,18 +275,17 @@ static void __init mpc512x_setup_diu(void)
/*
* We do not allocate and configure new area for bitmap buffer
- * because it would requere copying bitmap data (splash image)
+ * because it would require copying bitmap data (splash image)
* and so negatively affect boot time. Instead we reserve the
* already configured frame buffer area so that it won't be
* destroyed. The starting address of the area to reserve and
- * also it's length is passed to reserve_bootmem(). It will be
+ * also its length is passed to memblock_reserve(). It will be
* freed later on first open of fbdev, when splash image is not
* needed any more.
*/
if (diu_shared_fb.in_use) {
- ret = reserve_bootmem(diu_shared_fb.fb_phys,
- diu_shared_fb.fb_len,
- BOOTMEM_EXCLUSIVE);
+ ret = memblock_reserve(diu_shared_fb.fb_phys,
+ diu_shared_fb.fb_len);
if (ret) {
pr_err("%s: reserve bootmem failed\n", __func__);
diu_shared_fb.in_use = false;
@@ -337,7 +318,7 @@ void __init mpc512x_init_IRQ(void)
/*
* Nodes to do bus probe on, soc and localbus
*/
-static struct of_device_id __initdata of_bus_ids[] = {
+static const struct of_device_id of_bus_ids[] __initconst = {
{ .compatible = "fsl,mpc5121-immr", },
{ .compatible = "fsl,mpc5121-localbus", },
{ .compatible = "fsl,mpc5121-mbx", },
@@ -357,7 +338,7 @@ static void __init mpc512x_declare_of_platform_devices(void)
#define DEFAULT_FIFO_SIZE 16
-const char *mpc512x_select_psc_compat(void)
+const char *__init mpc512x_select_psc_compat(void)
{
if (of_machine_is_compatible("fsl,mpc5121"))
return "fsl,mpc5121-psc";
@@ -368,7 +349,7 @@ const char *mpc512x_select_psc_compat(void)
return NULL;
}
-const char *mpc512x_select_reset_compat(void)
+static const char *__init mpc512x_select_reset_compat(void)
{
if (of_machine_is_compatible("fsl,mpc5121"))
return "fsl,mpc5121-reset";
@@ -388,8 +369,8 @@ static unsigned int __init get_fifo_size(struct device_node *np,
if (fp)
return *fp;
- pr_warning("no %s property in %s node, defaulting to %d\n",
- prop_name, np->full_name, DEFAULT_FIFO_SIZE);
+ pr_warn("no %s property in %pOF node, defaulting to %d\n",
+ prop_name, np, DEFAULT_FIFO_SIZE);
return DEFAULT_FIFO_SIZE;
}
@@ -427,15 +408,15 @@ static void __init mpc512x_psc_fifo_init(void)
psc = of_iomap(np, 0);
if (!psc) {
- pr_err("%s: Can't map %s device\n",
- __func__, np->full_name);
+ pr_err("%s: Can't map %pOF device\n",
+ __func__, np);
continue;
}
/* FIFO space is 4KiB, check if requested size is available */
if ((fifobase + tx_fifo_size + rx_fifo_size) > 0x1000) {
- pr_err("%s: no fifo space available for %s\n",
- __func__, np->full_name);
+ pr_err("%s: no fifo space available for %pOF\n",
+ __func__, np);
iounmap(psc);
/*
* chances are that another device requests less
@@ -460,6 +441,20 @@ static void __init mpc512x_psc_fifo_init(void)
}
}
+static void __init mpc512x_restart_init(void)
+{
+ struct device_node *np;
+ const char *reset_compat;
+
+ reset_compat = mpc512x_select_reset_compat();
+ np = of_find_compatible_node(NULL, NULL, reset_compat);
+ if (!np)
+ return;
+
+ reset_module_base = of_iomap(np, 0);
+ of_node_put(np);
+}
+
void __init mpc512x_init_early(void)
{
mpc512x_restart_init();
diff --git a/arch/powerpc/platforms/512x/pdm360ng.c b/arch/powerpc/platforms/512x/pdm360ng.c
index 116f2325b20b..8bbbf78bb42b 100644
--- a/arch/powerpc/platforms/512x/pdm360ng.c
+++ b/arch/powerpc/platforms/512x/pdm360ng.c
@@ -1,22 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2010 DENX Software Engineering
*
* Anatolij Gustschin, <agust@denx.de>
*
* PDM360NG board setup
- *
- * This is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
*/
+#include <linux/device.h>
#include <linux/kernel.h>
#include <linux/io.h>
+#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/of_fdt.h>
-#include <linux/of_platform.h>
#include <asm/machdep.h>
#include <asm/ipic.h>
@@ -105,7 +101,7 @@ static inline void __init pdm360ng_touchscreen_init(void)
}
#endif /* CONFIG_TOUCHSCREEN_ADS7846 */
-void __init pdm360ng_init(void)
+static void __init pdm360ng_init(void)
{
mpc512x_init();
pdm360ng_touchscreen_init();
@@ -113,19 +109,18 @@ void __init pdm360ng_init(void)
static int __init pdm360ng_probe(void)
{
- unsigned long root = of_get_flat_dt_root();
+ mpc512x_init_early();
- return of_flat_dt_is_compatible(root, "ifm,pdm360ng");
+ return 1;
}
define_machine(pdm360ng) {
.name = "PDM360NG",
+ .compatible = "ifm,pdm360ng",
.probe = pdm360ng_probe,
.setup_arch = mpc512x_setup_arch,
.init = pdm360ng_init,
- .init_early = mpc512x_init_early,
.init_IRQ = mpc512x_init_IRQ,
.get_irq = ipic_get_irq,
- .calibrate_decr = generic_calibrate_decr,
.restart = mpc512x_restart,
};
diff --git a/arch/powerpc/platforms/52xx/Kconfig b/arch/powerpc/platforms/52xx/Kconfig
index b625a2c6f4f2..384e4bef2c28 100644
--- a/arch/powerpc/platforms/52xx/Kconfig
+++ b/arch/powerpc/platforms/52xx/Kconfig
@@ -1,8 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
config PPC_MPC52xx
bool "52xx-based boards"
- depends on 6xx
+ depends on PPC_BOOK3S_32
select COMMON_CLK
- select PPC_PCI_CHOICE
+ select HAVE_PCI
config PPC_MPC5200_SIMPLE
bool "Generic support for simple MPC5200 based boards"
@@ -33,8 +34,7 @@ config PPC_EFIKA
bool "bPlan Efika 5k2. MPC5200B based computer"
depends on PPC_MPC52xx
select PPC_RTAS
- select RTAS_PROC
- select PPC_NATIVE
+ select PPC_HASH_MMU_NATIVE
config PPC_LITE5200
bool "Freescale Lite5200 Eval Board"
@@ -54,8 +54,3 @@ config PPC_MPC5200_BUGFIX
for MPC5200B based boards.
It is safe to say 'Y' here
-
-config PPC_MPC5200_LPBFIFO
- tristate "MPC5200 LocalPlus bus FIFO driver"
- depends on PPC_MPC52xx && PPC_BESTCOMM
- select PPC_BESTCOMM_GEN_BD
diff --git a/arch/powerpc/platforms/52xx/Makefile b/arch/powerpc/platforms/52xx/Makefile
index 4e62486791e9..1b1f72d83342 100644
--- a/arch/powerpc/platforms/52xx/Makefile
+++ b/arch/powerpc/platforms/52xx/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for 52xx based boards
#
@@ -10,8 +11,6 @@ obj-$(CONFIG_PPC_LITE5200) += lite5200.o
obj-$(CONFIG_PPC_MEDIA5200) += media5200.o
obj-$(CONFIG_PM) += mpc52xx_sleep.o mpc52xx_pm.o
-ifeq ($(CONFIG_PPC_LITE5200),y)
+ifdef CONFIG_PPC_LITE5200
obj-$(CONFIG_PM) += lite5200_sleep.o lite5200_pm.o
endif
-
-obj-$(CONFIG_PPC_MPC5200_LPBFIFO) += mpc52xx_lpbfifo.o
diff --git a/arch/powerpc/platforms/52xx/efika.c b/arch/powerpc/platforms/52xx/efika.c
index 3feffde9128d..a7172f9ebaad 100644
--- a/arch/powerpc/platforms/52xx/efika.c
+++ b/arch/powerpc/platforms/52xx/efika.c
@@ -13,8 +13,8 @@
#include <generated/utsrelease.h>
#include <linux/pci.h>
#include <linux/of.h>
+#include <linux/seq_file.h>
#include <asm/dma.h>
-#include <asm/prom.h>
#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/rtas.h>
@@ -42,7 +42,7 @@ static int rtas_read_config(struct pci_bus *bus, unsigned int devfn, int offset,
int ret = -1;
int rval;
- rval = rtas_call(rtas_token("read-pci-config"), 2, 2, &ret, addr, len);
+ rval = rtas_call(rtas_function_token(RTAS_FN_READ_PCI_CONFIG), 2, 2, &ret, addr, len);
*val = ret;
return rval ? PCIBIOS_DEVICE_NOT_FOUND : PCIBIOS_SUCCESSFUL;
}
@@ -56,7 +56,7 @@ static int rtas_write_config(struct pci_bus *bus, unsigned int devfn,
| (hose->global_number << 24);
int rval;
- rval = rtas_call(rtas_token("write-pci-config"), 3, 1, NULL,
+ rval = rtas_call(rtas_function_token(RTAS_FN_WRITE_PCI_CONFIG), 3, 1, NULL,
addr, len, val);
return rval ? PCIBIOS_DEVICE_NOT_FOUND : PCIBIOS_SUCCESSFUL;
}
@@ -82,11 +82,9 @@ static void __init efika_pcisetup(void)
return;
}
- for (pcictrl = NULL;;) {
- pcictrl = of_get_next_child(root, pcictrl);
- if ((pcictrl == NULL) || (strcmp(pcictrl->name, "pci") == 0))
+ for_each_child_of_node(root, pcictrl)
+ if (of_node_name_eq(pcictrl, "pci"))
break;
- }
of_node_put(root);
@@ -99,7 +97,7 @@ static void __init efika_pcisetup(void)
bus_range = of_get_property(pcictrl, "bus-range", &len);
if (bus_range == NULL || len < 2 * sizeof(int)) {
printk(KERN_WARNING EFIKA_PLATFORM_NAME
- ": Can't get bus-range for %s\n", pcictrl->full_name);
+ ": Can't get bus-range for %pOF\n", pcictrl);
goto out_put;
}
@@ -109,14 +107,14 @@ static void __init efika_pcisetup(void)
else
printk(KERN_INFO EFIKA_PLATFORM_NAME ": PCI buses %d..%d",
bus_range[0], bus_range[1]);
- printk(" controlled by %s\n", pcictrl->full_name);
+ printk(" controlled by %pOF\n", pcictrl);
printk("\n");
hose = pcibios_alloc_controller(pcictrl);
if (!hose) {
printk(KERN_WARNING EFIKA_PLATFORM_NAME
- ": Can't allocate PCI controller structure for %s\n",
- pcictrl->full_name);
+ ": Can't allocate PCI controller structure for %pOF\n",
+ pcictrl);
goto out_put;
}
@@ -187,8 +185,6 @@ static void __init efika_setup_arch(void)
/* Map important registers from the internal memory map */
mpc52xx_map_common_devices();
- efika_pcisetup();
-
#ifdef CONFIG_PM
mpc52xx_suspend.board_suspend_prepare = efika_suspend_prepare;
mpc52xx_pm_init();
@@ -200,18 +196,20 @@ static void __init efika_setup_arch(void)
static int __init efika_probe(void)
{
- const char *model = of_get_flat_dt_prop(of_get_flat_dt_root(),
- "model", NULL);
+ struct device_node *root = of_find_node_by_path("/");
+ const char *model = of_get_property(root, "model", NULL);
+ of_node_put(root);
if (model == NULL)
return 0;
if (strcmp(model, "EFIKA5K2"))
return 0;
- ISA_DMA_THRESHOLD = ~0L;
DMA_MODE_READ = 0x44;
DMA_MODE_WRITE = 0x48;
+ pm_power_off = rtas_power_off;
+
return 1;
}
@@ -220,18 +218,17 @@ define_machine(efika)
.name = EFIKA_PLATFORM_NAME,
.probe = efika_probe,
.setup_arch = efika_setup_arch,
+ .discover_phbs = efika_pcisetup,
.init = mpc52xx_declare_of_platform_devices,
.show_cpuinfo = efika_show_cpuinfo,
.init_IRQ = mpc52xx_init_irq,
.get_irq = mpc52xx_get_irq,
.restart = rtas_restart,
- .power_off = rtas_power_off,
.halt = rtas_halt,
.set_rtc_time = rtas_set_rtc_time,
.get_rtc_time = rtas_get_rtc_time,
.progress = rtas_progress,
.get_boot_time = rtas_get_boot_time,
- .calibrate_decr = generic_calibrate_decr,
#ifdef CONFIG_PCI
.phys_mem_access_prot = pci_phys_mem_access_prot,
#endif
diff --git a/arch/powerpc/platforms/52xx/lite5200.c b/arch/powerpc/platforms/52xx/lite5200.c
index 1843bc932011..0a161d82a3a8 100644
--- a/arch/powerpc/platforms/52xx/lite5200.c
+++ b/arch/powerpc/platforms/52xx/lite5200.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Freescale Lite5200 board support
*
@@ -7,10 +8,6 @@
* Copyright 2006 Freescale Semiconductor, Inc. All rights reserved.
*
* Description:
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#undef DEBUG
@@ -24,7 +21,6 @@
#include <asm/time.h>
#include <asm/io.h>
#include <asm/machdep.h>
-#include <asm/prom.h>
#include <asm/mpc52xx.h>
/* ************************************************************************
@@ -34,13 +30,13 @@
*/
/* mpc5200 device tree match tables */
-static struct of_device_id mpc5200_cdm_ids[] __initdata = {
+static const struct of_device_id mpc5200_cdm_ids[] __initconst = {
{ .compatible = "fsl,mpc5200-cdm", },
{ .compatible = "mpc5200-cdm", },
{}
};
-static struct of_device_id mpc5200_gpio_ids[] __initdata = {
+static const struct of_device_id mpc5200_gpio_ids[] __initconst = {
{ .compatible = "fsl,mpc5200-gpio", },
{ .compatible = "mpc5200-gpio", },
{}
@@ -168,8 +164,6 @@ static void __init lite5200_setup_arch(void)
mpc52xx_suspend.board_resume_finish = lite5200_resume_finish;
lite5200_pm_init();
#endif
-
- mpc52xx_setup_pci();
}
static const char * const board[] __initconst = {
@@ -178,21 +172,13 @@ static const char * const board[] __initconst = {
NULL,
};
-/*
- * Called very early, MMU is off, device-tree isn't unflattened
- */
-static int __init lite5200_probe(void)
-{
- return of_flat_dt_match(of_get_flat_dt_root(), board);
-}
-
define_machine(lite5200) {
.name = "lite5200",
- .probe = lite5200_probe,
+ .compatibles = board,
.setup_arch = lite5200_setup_arch,
+ .discover_phbs = mpc52xx_setup_pci,
.init = mpc52xx_declare_of_platform_devices,
.init_IRQ = mpc52xx_init_irq,
.get_irq = mpc52xx_get_irq,
.restart = mpc52xx_restart,
- .calibrate_decr = generic_calibrate_decr,
};
diff --git a/arch/powerpc/platforms/52xx/lite5200_pm.c b/arch/powerpc/platforms/52xx/lite5200_pm.c
index 870b70f5d1bd..4900f5f48cce 100644
--- a/arch/powerpc/platforms/52xx/lite5200_pm.c
+++ b/arch/powerpc/platforms/52xx/lite5200_pm.c
@@ -1,5 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/init.h>
#include <linux/suspend.h>
+#include <linux/of_address.h>
+
#include <asm/io.h>
#include <asm/time.h>
#include <asm/mpc52xx.h>
@@ -44,15 +47,14 @@ static int lite5200_pm_begin(suspend_state_t state)
static int lite5200_pm_prepare(void)
{
struct device_node *np;
- const struct of_device_id immr_ids[] = {
+ static const struct of_device_id immr_ids[] = {
{ .compatible = "fsl,mpc5200-immr", },
{ .compatible = "fsl,mpc5200b-immr", },
{ .type = "soc", .compatible = "mpc5200", }, /* lite5200 */
{ .type = "builtin", .compatible = "mpc5200", }, /* efika */
{}
};
- u64 regaddr64 = 0;
- const u32 *regaddr_p;
+ struct resource res;
/* deep sleep? let mpc52xx code handle that */
if (lite5200_pm_target_state == PM_SUSPEND_STANDBY)
@@ -63,12 +65,10 @@ static int lite5200_pm_prepare(void)
/* map registers */
np = of_find_matching_node(NULL, immr_ids);
- regaddr_p = of_get_address(np, 0, NULL, NULL);
- if (regaddr_p)
- regaddr64 = of_translate_address(np, regaddr_p);
+ of_address_to_resource(np, 0, &res);
of_node_put(np);
- mbar = ioremap((u32) regaddr64, 0xC000);
+ mbar = ioremap(res.start, 0xC000);
if (!mbar) {
printk(KERN_ERR "%s:%i Error mapping registers\n", __func__, __LINE__);
return -ENOSYS;
diff --git a/arch/powerpc/platforms/52xx/lite5200_sleep.S b/arch/powerpc/platforms/52xx/lite5200_sleep.S
index 08ab6fefcf7a..0ec2522ee4ad 100644
--- a/arch/powerpc/platforms/52xx/lite5200_sleep.S
+++ b/arch/powerpc/platforms/52xx/lite5200_sleep.S
@@ -1,3 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+
#include <asm/reg.h>
#include <asm/ppc_asm.h>
#include <asm/processor.h>
@@ -55,7 +58,7 @@ lite5200_low_power:
/*
* save stuff BDI overwrites
* 0xf0 (0xe0->0x100 gets overwritten when BDI connected;
- * even when CONFIG_BDI* is disabled and MMU XLAT commented; heisenbug?))
+ * even when CONFIG_BDI_SWITCH is disabled and MMU XLAT commented; heisenbug?))
* WARNING: self-refresh doesn't seem to work when BDI2000 is connected,
* possibly because BDI sets SDRAM registers before wakeup code does
*/
@@ -177,15 +180,17 @@ sram_code:
/* local udelay in sram is needed */
- udelay: /* r11 - tb_ticks_per_usec, r12 - usecs, overwrites r13 */
+SYM_FUNC_START_LOCAL(udelay)
+ /* r11 - tb_ticks_per_usec, r12 - usecs, overwrites r13 */
mullw r12, r12, r11
mftb r13 /* start */
- addi r12, r13, r12 /* end */
+ add r12, r13, r12 /* end */
1:
mftb r13 /* current */
cmp cr0, r13, r12
blt 1b
blr
+SYM_FUNC_END(udelay)
sram_code_end:
@@ -198,7 +203,8 @@ lite5200_wakeup:
/* HIDs, MSR */
LOAD_SPRN(HID1, 0x19)
- LOAD_SPRN(HID2, 0x1a)
+ /* FIXME: Should this use HID2_G2_LE? */
+ LOAD_SPRN(HID2_750FX, 0x1a)
/* address translation is tricky (see turn_on_mmu) */
@@ -247,6 +253,7 @@ mmu_on:
blr
+_ASM_NOKPROBE_SYMBOL(lite5200_wakeup)
/* ---------------------------------------------------------------------- */
@@ -269,7 +276,7 @@ mmu_on:
SAVE_SR(n+2, addr+2); \
SAVE_SR(n+3, addr+3);
-save_regs:
+SYM_FUNC_START_LOCAL(save_regs)
stw r0, 0(r4)
stw r1, 0x4(r4)
stw r2, 0x8(r4)
@@ -277,7 +284,8 @@ save_regs:
SAVE_SPRN(HID0, 0x18)
SAVE_SPRN(HID1, 0x19)
- SAVE_SPRN(HID2, 0x1a)
+ /* FIXME: Should this use HID2_G2_LE? */
+ SAVE_SPRN(HID2_750FX, 0x1a)
mfmsr r10
stw r10, (4*0x1b)(r4)
/*SAVE_SPRN(LR, 0x1c) have to save it before the call */
@@ -315,6 +323,7 @@ save_regs:
SAVE_SPRN(TBRU, 0x5b)
blr
+SYM_FUNC_END(save_regs)
/* restore registers */
@@ -334,7 +343,7 @@ save_regs:
LOAD_SR(n+2, addr+2); \
LOAD_SR(n+3, addr+3);
-restore_regs:
+SYM_FUNC_START_LOCAL(restore_regs)
lis r4, registers@h
ori r4, r4, registers@l
@@ -390,6 +399,8 @@ restore_regs:
LOAD_SPRN(TBWU, 0x5b);
blr
+_ASM_NOKPROBE_SYMBOL(restore_regs)
+SYM_FUNC_END(restore_regs)
@@ -400,7 +411,7 @@ restore_regs:
* Flush data cache
* Do this by just reading lots of stuff into the cache.
*/
-flush_data_cache:
+SYM_FUNC_START_LOCAL(flush_data_cache)
lis r3,CONFIG_KERNEL_START@h
ori r3,r3,CONFIG_KERNEL_START@l
li r4,NUM_CACHE_LINES
@@ -410,3 +421,4 @@ flush_data_cache:
addi r3,r3,L1_CACHE_BYTES /* Next line, please */
bdnz 1b
blr
+SYM_FUNC_END(flush_data_cache)
diff --git a/arch/powerpc/platforms/52xx/media5200.c b/arch/powerpc/platforms/52xx/media5200.c
index 070d315dd6cd..bc7f83cfec1d 100644
--- a/arch/powerpc/platforms/52xx/media5200.c
+++ b/arch/powerpc/platforms/52xx/media5200.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Support for 'media5200-platform' compatible boards.
*
* Copyright (C) 2008 Secret Lab Technologies Ltd.
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
* Description:
* This code implements support for the Freescape Media5200 platform
* (built around the MPC5200 SoC).
@@ -17,7 +13,6 @@
* a cascaded interrupt controller driver which attaches itself to the
* Virtual IRQ subsystem after the primary mpc5200 interrupt controller
* is initialized.
- *
*/
#undef DEBUG
@@ -25,12 +20,13 @@
#include <linux/irq.h>
#include <linux/interrupt.h>
#include <linux/io.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
#include <asm/time.h>
-#include <asm/prom.h>
#include <asm/machdep.h>
#include <asm/mpc52xx.h>
-static struct of_device_id mpc5200_gpio_ids[] __initdata = {
+static const struct of_device_id mpc5200_gpio_ids[] __initconst = {
{ .compatible = "fsl,mpc5200-gpio", },
{ .compatible = "mpc5200-gpio", },
{}
@@ -80,10 +76,10 @@ static struct irq_chip media5200_irq_chip = {
.irq_mask_ack = media5200_irq_mask,
};
-void media5200_irq_cascade(unsigned int virq, struct irq_desc *desc)
+static void media5200_irq_cascade(struct irq_desc *desc)
{
struct irq_chip *chip = irq_desc_get_chip(desc);
- int sub_virq, val;
+ int val;
u32 status, enable;
/* Mask off the cascaded IRQ */
@@ -97,11 +93,10 @@ void media5200_irq_cascade(unsigned int virq, struct irq_desc *desc)
enable = in_be32(media5200_irq.regs + MEDIA5200_IRQ_STATUS);
val = ffs((status & enable) >> MEDIA5200_IRQ_SHIFT);
if (val) {
- sub_virq = irq_linear_revmap(media5200_irq.irqhost, val - 1);
- /* pr_debug("%s: virq=%i s=%.8x e=%.8x hwirq=%i subvirq=%i\n",
- * __func__, virq, status, enable, val - 1, sub_virq);
+ generic_handle_domain_irq(media5200_irq.irqhost, val - 1);
+ /* pr_debug("%s: virq=%i s=%.8x e=%.8x hwirq=%i\n",
+ * __func__, virq, status, enable, val - 1);
*/
- generic_handle_irq(sub_virq);
}
/* Processing done; can reenable the cascade now */
@@ -156,7 +151,7 @@ static void __init media5200_init_irq(void)
fpga_np = of_find_compatible_node(NULL, NULL, "fsl,media5200-fpga");
if (!fpga_np)
goto out;
- pr_debug("%s: found fpga node: %s\n", __func__, fpga_np->full_name);
+ pr_debug("%s: found fpga node: %pOF\n", __func__, fpga_np);
media5200_irq.regs = of_iomap(fpga_np, 0);
if (!media5200_irq.regs)
@@ -173,12 +168,14 @@ static void __init media5200_init_irq(void)
spin_lock_init(&media5200_irq.lock);
- media5200_irq.irqhost = irq_domain_add_linear(fpga_np,
+ media5200_irq.irqhost = irq_domain_create_linear(of_fwnode_handle(fpga_np),
MEDIA5200_NUM_IRQS, &media5200_irq_ops, &media5200_irq);
if (!media5200_irq.irqhost)
goto out;
pr_debug("%s: allocated irqhost\n", __func__);
+ of_node_put(fpga_np);
+
irq_set_handler_data(cascade_virq, &media5200_irq);
irq_set_chained_handler(cascade_virq, media5200_irq_cascade);
@@ -186,6 +183,7 @@ static void __init media5200_init_irq(void)
out:
pr_err("Could not find Media5200 FPGA; PCI interrupts will not work\n");
+ of_node_put(fpga_np);
}
/*
@@ -207,8 +205,6 @@ static void __init media5200_setup_arch(void)
/* Some mpc5200 & mpc5200b related configuration */
mpc5200_setup_xlb_arbiter();
- mpc52xx_setup_pci();
-
np = of_find_matching_node(NULL, mpc5200_gpio_ids);
gpio = of_iomap(np, 0);
of_node_put(np);
@@ -231,27 +227,13 @@ static void __init media5200_setup_arch(void)
}
-/* list of the supported boards */
-static const char * const board[] __initconst = {
- "fsl,media5200",
- NULL
-};
-
-/*
- * Called very early, MMU is off, device-tree isn't unflattened
- */
-static int __init media5200_probe(void)
-{
- return of_flat_dt_match(of_get_flat_dt_root(), board);
-}
-
define_machine(media5200_platform) {
.name = "media5200-platform",
- .probe = media5200_probe,
+ .compatible = "fsl,media5200",
.setup_arch = media5200_setup_arch,
+ .discover_phbs = mpc52xx_setup_pci,
.init = mpc52xx_declare_of_platform_devices,
.init_IRQ = media5200_init_irq,
.get_irq = mpc52xx_get_irq,
.restart = mpc52xx_restart,
- .calibrate_decr = generic_calibrate_decr,
};
diff --git a/arch/powerpc/platforms/52xx/mpc5200_simple.c b/arch/powerpc/platforms/52xx/mpc5200_simple.c
index 792a301a0bf0..7e0e4c34a40b 100644
--- a/arch/powerpc/platforms/52xx/mpc5200_simple.c
+++ b/arch/powerpc/platforms/52xx/mpc5200_simple.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Support for 'mpc5200-simple-platform' compatible boards.
*
* Written by Marian Balakowicz <m8@semihalf.com>
* Copyright (C) 2007 Semihalf
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
* Description:
* This code implements support for a simple MPC52xx based boards which
* do not need a custom platform specific setup. Such boards are
@@ -26,8 +22,8 @@
*/
#undef DEBUG
+#include <linux/of.h>
#include <asm/time.h>
-#include <asm/prom.h>
#include <asm/machdep.h>
#include <asm/mpc52xx.h>
@@ -44,8 +40,6 @@ static void __init mpc5200_simple_setup_arch(void)
/* Some mpc5200 & mpc5200b related configuration */
mpc5200_setup_xlb_arbiter();
-
- mpc52xx_setup_pci();
}
/* list of the supported boards */
@@ -65,21 +59,13 @@ static const char *board[] __initdata = {
NULL
};
-/*
- * Called very early, MMU is off, device-tree isn't unflattened
- */
-static int __init mpc5200_simple_probe(void)
-{
- return of_flat_dt_match(of_get_flat_dt_root(), board);
-}
-
define_machine(mpc5200_simple_platform) {
.name = "mpc5200-simple-platform",
- .probe = mpc5200_simple_probe,
+ .compatibles = board,
.setup_arch = mpc5200_simple_setup_arch,
+ .discover_phbs = mpc52xx_setup_pci,
.init = mpc52xx_declare_of_platform_devices,
.init_IRQ = mpc52xx_init_irq,
.get_irq = mpc52xx_get_irq,
.restart = mpc52xx_restart,
- .calibrate_decr = generic_calibrate_decr,
};
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_common.c b/arch/powerpc/platforms/52xx/mpc52xx_common.c
index d7e94f49532a..253421ffb4e5 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_common.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_common.c
@@ -12,23 +12,21 @@
#undef DEBUG
-#include <linux/gpio.h>
#include <linux/kernel.h>
#include <linux/spinlock.h>
+#include <linux/of_address.h>
#include <linux/of_platform.h>
-#include <linux/of_gpio.h>
#include <linux/export.h>
#include <asm/io.h>
-#include <asm/prom.h>
#include <asm/mpc52xx.h>
/* MPC5200 device tree match tables */
-static struct of_device_id mpc52xx_xlb_ids[] __initdata = {
+static const struct of_device_id mpc52xx_xlb_ids[] __initconst = {
{ .compatible = "fsl,mpc5200-xlb", },
{ .compatible = "mpc5200-xlb", },
{}
};
-static struct of_device_id mpc52xx_bus_ids[] __initdata = {
+static const struct of_device_id mpc52xx_bus_ids[] __initconst = {
{ .compatible = "fsl,mpc5200-immr", },
{ .compatible = "fsl,mpc5200b-immr", },
{ .compatible = "simple-bus", },
@@ -108,21 +106,21 @@ void __init mpc52xx_declare_of_platform_devices(void)
/*
* match tables used by mpc52xx_map_common_devices()
*/
-static struct of_device_id mpc52xx_gpt_ids[] __initdata = {
+static const struct of_device_id mpc52xx_gpt_ids[] __initconst = {
{ .compatible = "fsl,mpc5200-gpt", },
{ .compatible = "mpc5200-gpt", }, /* old */
{}
};
-static struct of_device_id mpc52xx_cdm_ids[] __initdata = {
+static const struct of_device_id mpc52xx_cdm_ids[] __initconst = {
{ .compatible = "fsl,mpc5200-cdm", },
{ .compatible = "mpc5200-cdm", }, /* old */
{}
};
-static const struct of_device_id mpc52xx_gpio_simple[] = {
+static const struct of_device_id mpc52xx_gpio_simple[] __initconst = {
{ .compatible = "fsl,mpc5200-gpio", },
{}
};
-static const struct of_device_id mpc52xx_gpio_wkup[] = {
+static const struct of_device_id mpc52xx_gpio_wkup[] __initconst = {
{ .compatible = "fsl,mpc5200-gpio-wkup", },
{}
};
@@ -141,8 +139,8 @@ mpc52xx_map_common_devices(void)
* on a gpt0, so check has-wdt property before mapping.
*/
for_each_matching_node(np, mpc52xx_gpt_ids) {
- if (of_get_property(np, "fsl,has-wdt", NULL) ||
- of_get_property(np, "has-wdt", NULL)) {
+ if (of_property_read_bool(np, "fsl,has-wdt") ||
+ of_property_read_bool(np, "has-wdt")) {
mpc52xx_wdt = of_iomap(np, 0);
of_node_put(np);
break;
@@ -204,47 +202,9 @@ int mpc52xx_set_psc_clkdiv(int psc_id, int clkdiv)
EXPORT_SYMBOL(mpc52xx_set_psc_clkdiv);
/**
- * mpc52xx_get_xtal_freq - Get SYS_XTAL_IN frequency for a device
- *
- * @node: device node
- *
- * Returns the frequency of the external oscillator clock connected
- * to the SYS_XTAL_IN pin, or 0 if it cannot be determined.
- */
-unsigned int mpc52xx_get_xtal_freq(struct device_node *node)
-{
- u32 val;
- unsigned int freq;
-
- if (!mpc52xx_cdm)
- return 0;
-
- freq = mpc5xxx_get_bus_frequency(node);
- if (!freq)
- return 0;
-
- if (in_8(&mpc52xx_cdm->ipb_clk_sel) & 0x1)
- freq *= 2;
-
- val = in_be32(&mpc52xx_cdm->rstcfg);
- if (val & (1 << 5))
- freq *= 8;
- else
- freq *= 4;
- if (val & (1 << 6))
- freq /= 12;
- else
- freq /= 16;
-
- return freq;
-}
-EXPORT_SYMBOL(mpc52xx_get_xtal_freq);
-
-/**
* mpc52xx_restart: ppc_md->restart hook for mpc5200 using the watchdog timer
*/
-void
-mpc52xx_restart(char *cmd)
+void __noreturn mpc52xx_restart(char *cmd)
{
local_irq_disable();
@@ -309,7 +269,7 @@ int mpc5200_psc_ac97_gpio_reset(int psc_number)
spin_lock_irqsave(&gpio_lock, flags);
- /* Reconfiure pin-muxing to gpio */
+ /* Reconfigure pin-muxing to gpio */
mux = in_be32(&simple_gpio->port_config);
out_be32(&simple_gpio->port_config, mux & (~gpio));
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
index 692998244d2c..7748b6641a3c 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
@@ -1,15 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* MPC5200 General Purpose Timer device driver
*
* Copyright (c) 2009 Secret Lab Technologies Ltd.
* Copyright (c) 2008 Sascha Hauer <s.hauer@pengutronix.de>, Pengutronix
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
- * This file is a driver for the the General Purpose Timer (gpt) devices
+ * This file is a driver for the General Purpose Timer (gpt) devices
* found on the MPC5200 SoC. Each timer has an IO pin which can be used
* for GPIO or can be used to raise interrupts. The timer function can
* be used independently from the IO pin, or it can be used to control
@@ -52,16 +48,18 @@
* the output mode. This driver does not change the output mode setting.
*/
-#include <linux/device.h>
+#include <linux/gpio/driver.h>
#include <linux/irq.h>
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/of.h>
-#include <linux/of_platform.h>
-#include <linux/of_gpio.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
#include <linux/kernel.h>
+#include <linux/property.h>
#include <linux/slab.h>
#include <linux/fs.h>
#include <linux/watchdog.h>
@@ -90,7 +88,7 @@ struct mpc52xx_gpt_priv {
struct list_head list; /* List of all GPT devices */
struct device *dev;
struct mpc52xx_gpt __iomem *regs;
- spinlock_t lock;
+ raw_spinlock_t lock;
struct irq_domain *irqhost;
u32 ipb_freq;
u8 wdt_mode;
@@ -141,9 +139,9 @@ static void mpc52xx_gpt_irq_unmask(struct irq_data *d)
struct mpc52xx_gpt_priv *gpt = irq_data_get_irq_chip_data(d);
unsigned long flags;
- spin_lock_irqsave(&gpt->lock, flags);
+ raw_spin_lock_irqsave(&gpt->lock, flags);
setbits32(&gpt->regs->mode, MPC52xx_GPT_MODE_IRQ_EN);
- spin_unlock_irqrestore(&gpt->lock, flags);
+ raw_spin_unlock_irqrestore(&gpt->lock, flags);
}
static void mpc52xx_gpt_irq_mask(struct irq_data *d)
@@ -151,9 +149,9 @@ static void mpc52xx_gpt_irq_mask(struct irq_data *d)
struct mpc52xx_gpt_priv *gpt = irq_data_get_irq_chip_data(d);
unsigned long flags;
- spin_lock_irqsave(&gpt->lock, flags);
+ raw_spin_lock_irqsave(&gpt->lock, flags);
clrbits32(&gpt->regs->mode, MPC52xx_GPT_MODE_IRQ_EN);
- spin_unlock_irqrestore(&gpt->lock, flags);
+ raw_spin_unlock_irqrestore(&gpt->lock, flags);
}
static void mpc52xx_gpt_irq_ack(struct irq_data *d)
@@ -171,14 +169,14 @@ static int mpc52xx_gpt_irq_set_type(struct irq_data *d, unsigned int flow_type)
dev_dbg(gpt->dev, "%s: virq=%i type=%x\n", __func__, d->irq, flow_type);
- spin_lock_irqsave(&gpt->lock, flags);
+ raw_spin_lock_irqsave(&gpt->lock, flags);
reg = in_be32(&gpt->regs->mode) & ~MPC52xx_GPT_MODE_ICT_MASK;
if (flow_type & IRQF_TRIGGER_RISING)
reg |= MPC52xx_GPT_MODE_ICT_RISING;
if (flow_type & IRQF_TRIGGER_FALLING)
reg |= MPC52xx_GPT_MODE_ICT_FALLING;
out_be32(&gpt->regs->mode, reg);
- spin_unlock_irqrestore(&gpt->lock, flags);
+ raw_spin_unlock_irqrestore(&gpt->lock, flags);
return 0;
}
@@ -191,17 +189,14 @@ static struct irq_chip mpc52xx_gpt_irq_chip = {
.irq_set_type = mpc52xx_gpt_irq_set_type,
};
-void mpc52xx_gpt_irq_cascade(unsigned int virq, struct irq_desc *desc)
+static void mpc52xx_gpt_irq_cascade(struct irq_desc *desc)
{
- struct mpc52xx_gpt_priv *gpt = irq_get_handler_data(virq);
- int sub_virq;
+ struct mpc52xx_gpt_priv *gpt = irq_desc_get_handler_data(desc);
u32 status;
status = in_be32(&gpt->regs->status) & MPC52xx_GPT_STATUS_IRQMASK;
- if (status) {
- sub_virq = irq_linear_revmap(gpt->irqhost, 0);
- generic_handle_irq(sub_virq);
- }
+ if (status)
+ generic_handle_domain_irq(gpt->irqhost, 0);
}
static int mpc52xx_gpt_irq_map(struct irq_domain *h, unsigned int virq,
@@ -226,7 +221,7 @@ static int mpc52xx_gpt_irq_xlate(struct irq_domain *h, struct device_node *ct,
dev_dbg(gpt->dev, "%s: flags=%i\n", __func__, intspec[0]);
if ((intsize < 1) || (intspec[0] > 3)) {
- dev_err(gpt->dev, "bad irq specifier in %s\n", ct->full_name);
+ dev_err(gpt->dev, "bad irq specifier in %pOF\n", ct);
return -EINVAL;
}
@@ -252,9 +247,9 @@ mpc52xx_gpt_irq_setup(struct mpc52xx_gpt_priv *gpt, struct device_node *node)
if (!cascade_virq)
return;
- gpt->irqhost = irq_domain_add_linear(node, 1, &mpc52xx_gpt_irq_ops, gpt);
+ gpt->irqhost = irq_domain_create_linear(of_fwnode_handle(node), 1, &mpc52xx_gpt_irq_ops, gpt);
if (!gpt->irqhost) {
- dev_err(gpt->dev, "irq_domain_add_linear() failed\n");
+ dev_err(gpt->dev, "irq_domain_create_linear() failed\n");
return;
}
@@ -264,11 +259,11 @@ mpc52xx_gpt_irq_setup(struct mpc52xx_gpt_priv *gpt, struct device_node *node)
/* If the GPT is currently disabled, then change it to be in Input
* Capture mode. If the mode is non-zero, then the pin could be
* already in use for something. */
- spin_lock_irqsave(&gpt->lock, flags);
+ raw_spin_lock_irqsave(&gpt->lock, flags);
mode = in_be32(&gpt->regs->mode);
if ((mode & MPC52xx_GPT_MODE_MS_MASK) == 0)
out_be32(&gpt->regs->mode, mode | MPC52xx_GPT_MODE_MS_IC);
- spin_unlock_irqrestore(&gpt->lock, flags);
+ raw_spin_unlock_irqrestore(&gpt->lock, flags);
dev_dbg(gpt->dev, "%s() complete. virq=%i\n", __func__, cascade_virq);
}
@@ -278,43 +273,40 @@ mpc52xx_gpt_irq_setup(struct mpc52xx_gpt_priv *gpt, struct device_node *node)
* GPIOLIB hooks
*/
#if defined(CONFIG_GPIOLIB)
-static inline struct mpc52xx_gpt_priv *gc_to_mpc52xx_gpt(struct gpio_chip *gc)
-{
- return container_of(gc, struct mpc52xx_gpt_priv, gc);
-}
-
static int mpc52xx_gpt_gpio_get(struct gpio_chip *gc, unsigned int gpio)
{
- struct mpc52xx_gpt_priv *gpt = gc_to_mpc52xx_gpt(gc);
+ struct mpc52xx_gpt_priv *gpt = gpiochip_get_data(gc);
return (in_be32(&gpt->regs->status) >> 8) & 1;
}
-static void
+static int
mpc52xx_gpt_gpio_set(struct gpio_chip *gc, unsigned int gpio, int v)
{
- struct mpc52xx_gpt_priv *gpt = gc_to_mpc52xx_gpt(gc);
+ struct mpc52xx_gpt_priv *gpt = gpiochip_get_data(gc);
unsigned long flags;
u32 r;
dev_dbg(gpt->dev, "%s: gpio:%d v:%d\n", __func__, gpio, v);
r = v ? MPC52xx_GPT_MODE_GPIO_OUT_HIGH : MPC52xx_GPT_MODE_GPIO_OUT_LOW;
- spin_lock_irqsave(&gpt->lock, flags);
+ raw_spin_lock_irqsave(&gpt->lock, flags);
clrsetbits_be32(&gpt->regs->mode, MPC52xx_GPT_MODE_GPIO_MASK, r);
- spin_unlock_irqrestore(&gpt->lock, flags);
+ raw_spin_unlock_irqrestore(&gpt->lock, flags);
+
+ return 0;
}
static int mpc52xx_gpt_gpio_dir_in(struct gpio_chip *gc, unsigned int gpio)
{
- struct mpc52xx_gpt_priv *gpt = gc_to_mpc52xx_gpt(gc);
+ struct mpc52xx_gpt_priv *gpt = gpiochip_get_data(gc);
unsigned long flags;
dev_dbg(gpt->dev, "%s: gpio:%d\n", __func__, gpio);
- spin_lock_irqsave(&gpt->lock, flags);
+ raw_spin_lock_irqsave(&gpt->lock, flags);
clrbits32(&gpt->regs->mode, MPC52xx_GPT_MODE_GPIO_MASK);
- spin_unlock_irqrestore(&gpt->lock, flags);
+ raw_spin_unlock_irqrestore(&gpt->lock, flags);
return 0;
}
@@ -326,17 +318,15 @@ mpc52xx_gpt_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
return 0;
}
-static void
-mpc52xx_gpt_gpio_setup(struct mpc52xx_gpt_priv *gpt, struct device_node *node)
+static void mpc52xx_gpt_gpio_setup(struct mpc52xx_gpt_priv *gpt)
{
int rc;
- /* Only setup GPIO if the device tree claims the GPT is
- * a GPIO controller */
- if (!of_find_property(node, "gpio-controller", NULL))
+ /* Only setup GPIO if the device claims the GPT is a GPIO controller */
+ if (!device_property_present(gpt->dev, "gpio-controller"))
return;
- gpt->gc.label = kstrdup(node->full_name, GFP_KERNEL);
+ gpt->gc.label = kasprintf(GFP_KERNEL, "%pfw", dev_fwnode(gpt->dev));
if (!gpt->gc.label) {
dev_err(gpt->dev, "out of memory\n");
return;
@@ -348,21 +338,20 @@ mpc52xx_gpt_gpio_setup(struct mpc52xx_gpt_priv *gpt, struct device_node *node)
gpt->gc.get = mpc52xx_gpt_gpio_get;
gpt->gc.set = mpc52xx_gpt_gpio_set;
gpt->gc.base = -1;
- gpt->gc.of_node = node;
+ gpt->gc.parent = gpt->dev;
/* Setup external pin in GPIO mode */
clrsetbits_be32(&gpt->regs->mode, MPC52xx_GPT_MODE_MS_MASK,
MPC52xx_GPT_MODE_MS_GPIO);
- rc = gpiochip_add(&gpt->gc);
+ rc = gpiochip_add_data(&gpt->gc, gpt);
if (rc)
- dev_err(gpt->dev, "gpiochip_add() failed; rc=%i\n", rc);
+ dev_err(gpt->dev, "gpiochip_add_data() failed; rc=%i\n", rc);
dev_dbg(gpt->dev, "%s() complete.\n", __func__);
}
#else /* defined(CONFIG_GPIOLIB) */
-static void
-mpc52xx_gpt_gpio_setup(struct mpc52xx_gpt_priv *p, struct device_node *np) { }
+static void mpc52xx_gpt_gpio_setup(struct mpc52xx_gpt_priv *gpt) { }
#endif /* defined(CONFIG_GPIOLIB) */
/***********************************************************************
@@ -382,7 +371,7 @@ struct mpc52xx_gpt_priv *mpc52xx_gpt_from_irq(int irq)
mutex_lock(&mpc52xx_gpt_list_mutex);
list_for_each(pos, &mpc52xx_gpt_list) {
gpt = container_of(pos, struct mpc52xx_gpt_priv, list);
- if (gpt->irqhost && irq == irq_linear_revmap(gpt->irqhost, 0)) {
+ if (gpt->irqhost && irq == irq_find_mapping(gpt->irqhost, 0)) {
mutex_unlock(&mpc52xx_gpt_list_mutex);
return gpt;
}
@@ -410,7 +399,7 @@ static int mpc52xx_gpt_do_start(struct mpc52xx_gpt_priv *gpt, u64 period,
set |= MPC52xx_GPT_MODE_CONTINUOUS;
/* Determine the number of clocks in the requested period. 64 bit
- * arithmatic is done here to preserve the precision until the value
+ * arithmetic is done here to preserve the precision until the value
* is scaled back down into the u32 range. Period is in 'ns', bus
* frequency is in Hz. */
clocks = period * (u64)gpt->ipb_freq;
@@ -441,16 +430,16 @@ static int mpc52xx_gpt_do_start(struct mpc52xx_gpt_priv *gpt, u64 period,
}
/* Set and enable the timer, reject an attempt to use a wdt as gpt */
- spin_lock_irqsave(&gpt->lock, flags);
+ raw_spin_lock_irqsave(&gpt->lock, flags);
if (as_wdt)
gpt->wdt_mode |= MPC52xx_GPT_IS_WDT;
else if ((gpt->wdt_mode & MPC52xx_GPT_IS_WDT) != 0) {
- spin_unlock_irqrestore(&gpt->lock, flags);
+ raw_spin_unlock_irqrestore(&gpt->lock, flags);
return -EBUSY;
}
out_be32(&gpt->regs->count, prescale << 16 | clocks);
clrsetbits_be32(&gpt->regs->mode, clear, set);
- spin_unlock_irqrestore(&gpt->lock, flags);
+ raw_spin_unlock_irqrestore(&gpt->lock, flags);
return 0;
}
@@ -481,14 +470,14 @@ int mpc52xx_gpt_stop_timer(struct mpc52xx_gpt_priv *gpt)
unsigned long flags;
/* reject the operation if the timer is used as watchdog (gpt 0 only) */
- spin_lock_irqsave(&gpt->lock, flags);
+ raw_spin_lock_irqsave(&gpt->lock, flags);
if ((gpt->wdt_mode & MPC52xx_GPT_IS_WDT) != 0) {
- spin_unlock_irqrestore(&gpt->lock, flags);
+ raw_spin_unlock_irqrestore(&gpt->lock, flags);
return -EBUSY;
}
clrbits32(&gpt->regs->mode, MPC52xx_GPT_MODE_COUNTER_ENABLE);
- spin_unlock_irqrestore(&gpt->lock, flags);
+ raw_spin_unlock_irqrestore(&gpt->lock, flags);
return 0;
}
EXPORT_SYMBOL(mpc52xx_gpt_stop_timer);
@@ -505,16 +494,16 @@ u64 mpc52xx_gpt_timer_period(struct mpc52xx_gpt_priv *gpt)
u64 prescale;
unsigned long flags;
- spin_lock_irqsave(&gpt->lock, flags);
+ raw_spin_lock_irqsave(&gpt->lock, flags);
period = in_be32(&gpt->regs->count);
- spin_unlock_irqrestore(&gpt->lock, flags);
+ raw_spin_unlock_irqrestore(&gpt->lock, flags);
prescale = period >> 16;
period &= 0xffff;
if (prescale == 0)
prescale = 0x10000;
period = period * prescale * 1000000000ULL;
- do_div(period, (u64)gpt->ipb_freq);
+ do_div(period, gpt->ipb_freq);
return period;
}
EXPORT_SYMBOL(mpc52xx_gpt_timer_period);
@@ -537,9 +526,9 @@ static inline void mpc52xx_gpt_wdt_ping(struct mpc52xx_gpt_priv *gpt_wdt)
{
unsigned long flags;
- spin_lock_irqsave(&gpt_wdt->lock, flags);
+ raw_spin_lock_irqsave(&gpt_wdt->lock, flags);
out_8((u8 *) &gpt_wdt->regs->mode, MPC52xx_GPT_MODE_WDT_PING);
- spin_unlock_irqrestore(&gpt_wdt->lock, flags);
+ raw_spin_unlock_irqrestore(&gpt_wdt->lock, flags);
}
/* wdt misc device api */
@@ -591,6 +580,7 @@ static long mpc52xx_wdt_ioctl(struct file *file, unsigned int cmd,
if (ret)
break;
/* fall through and return the timeout */
+ fallthrough;
case WDIOC_GETTIMEOUT:
/* we need to round here as to avoid e.g. the following
@@ -633,7 +623,7 @@ static int mpc52xx_wdt_open(struct inode *inode, struct file *file)
}
file->private_data = mpc52xx_gpt_wdt;
- return nonseekable_open(inode, file);
+ return stream_open(inode, file);
}
static int mpc52xx_wdt_release(struct inode *inode, struct file *file)
@@ -643,11 +633,11 @@ static int mpc52xx_wdt_release(struct inode *inode, struct file *file)
struct mpc52xx_gpt_priv *gpt_wdt = file->private_data;
unsigned long flags;
- spin_lock_irqsave(&gpt_wdt->lock, flags);
+ raw_spin_lock_irqsave(&gpt_wdt->lock, flags);
clrbits32(&gpt_wdt->regs->mode,
MPC52xx_GPT_MODE_COUNTER_ENABLE | MPC52xx_GPT_MODE_WDT_EN);
gpt_wdt->wdt_mode &= ~MPC52xx_GPT_IS_WDT;
- spin_unlock_irqrestore(&gpt_wdt->lock, flags);
+ raw_spin_unlock_irqrestore(&gpt_wdt->lock, flags);
#endif
clear_bit(0, &wdt_is_active);
return 0;
@@ -656,9 +646,9 @@ static int mpc52xx_wdt_release(struct inode *inode, struct file *file)
static const struct file_operations mpc52xx_wdt_fops = {
.owner = THIS_MODULE,
- .llseek = no_llseek,
.write = mpc52xx_wdt_write,
.unlocked_ioctl = mpc52xx_wdt_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
.open = mpc52xx_wdt_open,
.release = mpc52xx_wdt_release,
};
@@ -724,22 +714,20 @@ static int mpc52xx_gpt_probe(struct platform_device *ofdev)
{
struct mpc52xx_gpt_priv *gpt;
- gpt = kzalloc(sizeof *gpt, GFP_KERNEL);
+ gpt = devm_kzalloc(&ofdev->dev, sizeof *gpt, GFP_KERNEL);
if (!gpt)
return -ENOMEM;
- spin_lock_init(&gpt->lock);
+ raw_spin_lock_init(&gpt->lock);
gpt->dev = &ofdev->dev;
- gpt->ipb_freq = mpc5xxx_get_bus_frequency(ofdev->dev.of_node);
+ gpt->ipb_freq = mpc5xxx_get_bus_frequency(&ofdev->dev);
gpt->regs = of_iomap(ofdev->dev.of_node, 0);
- if (!gpt->regs) {
- kfree(gpt);
+ if (!gpt->regs)
return -ENOMEM;
- }
dev_set_drvdata(&ofdev->dev, gpt);
- mpc52xx_gpt_gpio_setup(gpt, ofdev->dev.of_node);
+ mpc52xx_gpt_gpio_setup(gpt);
mpc52xx_gpt_irq_setup(gpt, ofdev->dev.of_node);
mutex_lock(&mpc52xx_gpt_list_mutex);
@@ -747,8 +735,8 @@ static int mpc52xx_gpt_probe(struct platform_device *ofdev)
mutex_unlock(&mpc52xx_gpt_list_mutex);
/* check if this device could be a watchdog */
- if (of_get_property(ofdev->dev.of_node, "fsl,has-wdt", NULL) ||
- of_get_property(ofdev->dev.of_node, "has-wdt", NULL)) {
+ if (of_property_read_bool(ofdev->dev.of_node, "fsl,has-wdt") ||
+ of_property_read_bool(ofdev->dev.of_node, "has-wdt")) {
const u32 *on_boot_wdt;
gpt->wdt_mode = MPC52xx_GPT_CAN_WDT;
@@ -765,11 +753,6 @@ static int mpc52xx_gpt_probe(struct platform_device *ofdev)
return 0;
}
-static int mpc52xx_gpt_remove(struct platform_device *ofdev)
-{
- return -EBUSY;
-}
-
static const struct of_device_id mpc52xx_gpt_match[] = {
{ .compatible = "fsl,mpc5200-gpt", },
@@ -782,11 +765,10 @@ static const struct of_device_id mpc52xx_gpt_match[] = {
static struct platform_driver mpc52xx_gpt_driver = {
.driver = {
.name = "mpc52xx-gpt",
- .owner = THIS_MODULE,
+ .suppress_bind_attrs = true,
.of_match_table = mpc52xx_gpt_match,
},
.probe = mpc52xx_gpt_probe,
- .remove = mpc52xx_gpt_remove,
};
static int __init mpc52xx_gpt_init(void)
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c
deleted file mode 100644
index 37f7a89c10f2..000000000000
--- a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c
+++ /dev/null
@@ -1,581 +0,0 @@
-/*
- * LocalPlus Bus FIFO driver for the Freescale MPC52xx.
- *
- * Copyright (C) 2009 Secret Lab Technologies Ltd.
- *
- * This file is released under the GPLv2
- *
- * Todo:
- * - Add support for multiple requests to be queued.
- */
-
-#include <linux/interrupt.h>
-#include <linux/kernel.h>
-#include <linux/of.h>
-#include <linux/of_platform.h>
-#include <linux/spinlock.h>
-#include <linux/module.h>
-#include <asm/io.h>
-#include <asm/prom.h>
-#include <asm/mpc52xx.h>
-#include <asm/time.h>
-
-#include <linux/fsl/bestcomm/bestcomm.h>
-#include <linux/fsl/bestcomm/bestcomm_priv.h>
-#include <linux/fsl/bestcomm/gen_bd.h>
-
-MODULE_AUTHOR("Grant Likely <grant.likely@secretlab.ca>");
-MODULE_DESCRIPTION("MPC5200 LocalPlus FIFO device driver");
-MODULE_LICENSE("GPL");
-
-#define LPBFIFO_REG_PACKET_SIZE (0x00)
-#define LPBFIFO_REG_START_ADDRESS (0x04)
-#define LPBFIFO_REG_CONTROL (0x08)
-#define LPBFIFO_REG_ENABLE (0x0C)
-#define LPBFIFO_REG_BYTES_DONE_STATUS (0x14)
-#define LPBFIFO_REG_FIFO_DATA (0x40)
-#define LPBFIFO_REG_FIFO_STATUS (0x44)
-#define LPBFIFO_REG_FIFO_CONTROL (0x48)
-#define LPBFIFO_REG_FIFO_ALARM (0x4C)
-
-struct mpc52xx_lpbfifo {
- struct device *dev;
- phys_addr_t regs_phys;
- void __iomem *regs;
- int irq;
- spinlock_t lock;
-
- struct bcom_task *bcom_tx_task;
- struct bcom_task *bcom_rx_task;
- struct bcom_task *bcom_cur_task;
-
- /* Current state data */
- struct mpc52xx_lpbfifo_request *req;
- int dma_irqs_enabled;
-};
-
-/* The MPC5200 has only one fifo, so only need one instance structure */
-static struct mpc52xx_lpbfifo lpbfifo;
-
-/**
- * mpc52xx_lpbfifo_kick - Trigger the next block of data to be transferred
- */
-static void mpc52xx_lpbfifo_kick(struct mpc52xx_lpbfifo_request *req)
-{
- size_t transfer_size = req->size - req->pos;
- struct bcom_bd *bd;
- void __iomem *reg;
- u32 *data;
- int i;
- int bit_fields;
- int dma = !(req->flags & MPC52XX_LPBFIFO_FLAG_NO_DMA);
- int write = req->flags & MPC52XX_LPBFIFO_FLAG_WRITE;
- int poll_dma = req->flags & MPC52XX_LPBFIFO_FLAG_POLL_DMA;
-
- /* Set and clear the reset bits; is good practice in User Manual */
- out_be32(lpbfifo.regs + LPBFIFO_REG_ENABLE, 0x01010000);
-
- /* set master enable bit */
- out_be32(lpbfifo.regs + LPBFIFO_REG_ENABLE, 0x00000001);
- if (!dma) {
- /* While the FIFO can be setup for transfer sizes as large as
- * 16M-1, the FIFO itself is only 512 bytes deep and it does
- * not generate interrupts for FIFO full events (only transfer
- * complete will raise an IRQ). Therefore when not using
- * Bestcomm to drive the FIFO it needs to either be polled, or
- * transfers need to constrained to the size of the fifo.
- *
- * This driver restricts the size of the transfer
- */
- if (transfer_size > 512)
- transfer_size = 512;
-
- /* Load the FIFO with data */
- if (write) {
- reg = lpbfifo.regs + LPBFIFO_REG_FIFO_DATA;
- data = req->data + req->pos;
- for (i = 0; i < transfer_size; i += 4)
- out_be32(reg, *data++);
- }
-
- /* Unmask both error and completion irqs */
- out_be32(lpbfifo.regs + LPBFIFO_REG_ENABLE, 0x00000301);
- } else {
- /* Choose the correct direction
- *
- * Configure the watermarks so DMA will always complete correctly.
- * It may be worth experimenting with the ALARM value to see if
- * there is a performance impacit. However, if it is wrong there
- * is a risk of DMA not transferring the last chunk of data
- */
- if (write) {
- out_be32(lpbfifo.regs + LPBFIFO_REG_FIFO_ALARM, 0x1e4);
- out_8(lpbfifo.regs + LPBFIFO_REG_FIFO_CONTROL, 7);
- lpbfifo.bcom_cur_task = lpbfifo.bcom_tx_task;
- } else {
- out_be32(lpbfifo.regs + LPBFIFO_REG_FIFO_ALARM, 0x1ff);
- out_8(lpbfifo.regs + LPBFIFO_REG_FIFO_CONTROL, 0);
- lpbfifo.bcom_cur_task = lpbfifo.bcom_rx_task;
-
- if (poll_dma) {
- if (lpbfifo.dma_irqs_enabled) {
- disable_irq(bcom_get_task_irq(lpbfifo.bcom_rx_task));
- lpbfifo.dma_irqs_enabled = 0;
- }
- } else {
- if (!lpbfifo.dma_irqs_enabled) {
- enable_irq(bcom_get_task_irq(lpbfifo.bcom_rx_task));
- lpbfifo.dma_irqs_enabled = 1;
- }
- }
- }
-
- bd = bcom_prepare_next_buffer(lpbfifo.bcom_cur_task);
- bd->status = transfer_size;
- if (!write) {
- /*
- * In the DMA read case, the DMA doesn't complete,
- * possibly due to incorrect watermarks in the ALARM
- * and CONTROL regs. For now instead of trying to
- * determine the right watermarks that will make this
- * work, just increase the number of bytes the FIFO is
- * expecting.
- *
- * When submitting another operation, the FIFO will get
- * reset, so the condition of the FIFO waiting for a
- * non-existent 4 bytes will get cleared.
- */
- transfer_size += 4; /* BLECH! */
- }
- bd->data[0] = req->data_phys + req->pos;
- bcom_submit_next_buffer(lpbfifo.bcom_cur_task, NULL);
-
- /* error irq & master enabled bit */
- bit_fields = 0x00000201;
-
- /* Unmask irqs */
- if (write && (!poll_dma))
- bit_fields |= 0x00000100; /* completion irq too */
- out_be32(lpbfifo.regs + LPBFIFO_REG_ENABLE, bit_fields);
- }
-
- /* Set transfer size, width, chip select and READ mode */
- out_be32(lpbfifo.regs + LPBFIFO_REG_START_ADDRESS,
- req->offset + req->pos);
- out_be32(lpbfifo.regs + LPBFIFO_REG_PACKET_SIZE, transfer_size);
-
- bit_fields = req->cs << 24 | 0x000008;
- if (!write)
- bit_fields |= 0x010000; /* read mode */
- out_be32(lpbfifo.regs + LPBFIFO_REG_CONTROL, bit_fields);
-
- /* Kick it off */
- if (!lpbfifo.req->defer_xfer_start)
- out_8(lpbfifo.regs + LPBFIFO_REG_PACKET_SIZE, 0x01);
- if (dma)
- bcom_enable(lpbfifo.bcom_cur_task);
-}
-
-/**
- * mpc52xx_lpbfifo_irq - IRQ handler for LPB FIFO
- *
- * On transmit, the dma completion irq triggers before the fifo completion
- * triggers. Handle the dma completion here instead of the LPB FIFO Bestcomm
- * task completion irq because everything is not really done until the LPB FIFO
- * completion irq triggers.
- *
- * In other words:
- * For DMA, on receive, the "Fat Lady" is the bestcom completion irq. on
- * transmit, the fifo completion irq is the "Fat Lady". The opera (or in this
- * case the DMA/FIFO operation) is not finished until the "Fat Lady" sings.
- *
- * Reasons for entering this routine:
- * 1) PIO mode rx and tx completion irq
- * 2) DMA interrupt mode tx completion irq
- * 3) DMA polled mode tx
- *
- * Exit conditions:
- * 1) Transfer aborted
- * 2) FIFO complete without DMA; more data to do
- * 3) FIFO complete without DMA; all data transferred
- * 4) FIFO complete using DMA
- *
- * Condition 1 can occur regardless of whether or not DMA is used.
- * It requires executing the callback to report the error and exiting
- * immediately.
- *
- * Condition 2 requires programming the FIFO with the next block of data
- *
- * Condition 3 requires executing the callback to report completion
- *
- * Condition 4 means the same as 3, except that we also retrieve the bcom
- * buffer so DMA doesn't get clogged up.
- *
- * To make things trickier, the spinlock must be dropped before
- * executing the callback, otherwise we could end up with a deadlock
- * or nested spinlock condition. The out path is non-trivial, so
- * extra fiddling is done to make sure all paths lead to the same
- * outbound code.
- */
-static irqreturn_t mpc52xx_lpbfifo_irq(int irq, void *dev_id)
-{
- struct mpc52xx_lpbfifo_request *req;
- u32 status = in_8(lpbfifo.regs + LPBFIFO_REG_BYTES_DONE_STATUS);
- void __iomem *reg;
- u32 *data;
- int count, i;
- int do_callback = 0;
- u32 ts;
- unsigned long flags;
- int dma, write, poll_dma;
-
- spin_lock_irqsave(&lpbfifo.lock, flags);
- ts = get_tbl();
-
- req = lpbfifo.req;
- if (!req) {
- spin_unlock_irqrestore(&lpbfifo.lock, flags);
- pr_err("bogus LPBFIFO IRQ\n");
- return IRQ_HANDLED;
- }
-
- dma = !(req->flags & MPC52XX_LPBFIFO_FLAG_NO_DMA);
- write = req->flags & MPC52XX_LPBFIFO_FLAG_WRITE;
- poll_dma = req->flags & MPC52XX_LPBFIFO_FLAG_POLL_DMA;
-
- if (dma && !write) {
- spin_unlock_irqrestore(&lpbfifo.lock, flags);
- pr_err("bogus LPBFIFO IRQ (dma and not writing)\n");
- return IRQ_HANDLED;
- }
-
- if ((status & 0x01) == 0) {
- goto out;
- }
-
- /* check abort bit */
- if (status & 0x10) {
- out_be32(lpbfifo.regs + LPBFIFO_REG_ENABLE, 0x01010000);
- do_callback = 1;
- goto out;
- }
-
- /* Read result from hardware */
- count = in_be32(lpbfifo.regs + LPBFIFO_REG_BYTES_DONE_STATUS);
- count &= 0x00ffffff;
-
- if (!dma && !write) {
- /* copy the data out of the FIFO */
- reg = lpbfifo.regs + LPBFIFO_REG_FIFO_DATA;
- data = req->data + req->pos;
- for (i = 0; i < count; i += 4)
- *data++ = in_be32(reg);
- }
-
- /* Update transfer position and count */
- req->pos += count;
-
- /* Decide what to do next */
- if (req->size - req->pos)
- mpc52xx_lpbfifo_kick(req); /* more work to do */
- else
- do_callback = 1;
-
- out:
- /* Clear the IRQ */
- out_8(lpbfifo.regs + LPBFIFO_REG_BYTES_DONE_STATUS, 0x01);
-
- if (dma && (status & 0x11)) {
- /*
- * Count the DMA as complete only when the FIFO completion
- * status or abort bits are set.
- *
- * (status & 0x01) should always be the case except sometimes
- * when using polled DMA.
- *
- * (status & 0x10) {transfer aborted}: This case needs more
- * testing.
- */
- bcom_retrieve_buffer(lpbfifo.bcom_cur_task, &status, NULL);
- }
- req->last_byte = ((u8 *)req->data)[req->size - 1];
-
- /* When the do_callback flag is set; it means the transfer is finished
- * so set the FIFO as idle */
- if (do_callback)
- lpbfifo.req = NULL;
-
- if (irq != 0) /* don't increment on polled case */
- req->irq_count++;
-
- req->irq_ticks += get_tbl() - ts;
- spin_unlock_irqrestore(&lpbfifo.lock, flags);
-
- /* Spinlock is released; it is now safe to call the callback */
- if (do_callback && req->callback)
- req->callback(req);
-
- return IRQ_HANDLED;
-}
-
-/**
- * mpc52xx_lpbfifo_bcom_irq - IRQ handler for LPB FIFO Bestcomm task
- *
- * Only used when receiving data.
- */
-static irqreturn_t mpc52xx_lpbfifo_bcom_irq(int irq, void *dev_id)
-{
- struct mpc52xx_lpbfifo_request *req;
- unsigned long flags;
- u32 status;
- u32 ts;
-
- spin_lock_irqsave(&lpbfifo.lock, flags);
- ts = get_tbl();
-
- req = lpbfifo.req;
- if (!req || (req->flags & MPC52XX_LPBFIFO_FLAG_NO_DMA)) {
- spin_unlock_irqrestore(&lpbfifo.lock, flags);
- return IRQ_HANDLED;
- }
-
- if (irq != 0) /* don't increment on polled case */
- req->irq_count++;
-
- if (!bcom_buffer_done(lpbfifo.bcom_cur_task)) {
- spin_unlock_irqrestore(&lpbfifo.lock, flags);
-
- req->buffer_not_done_cnt++;
- if ((req->buffer_not_done_cnt % 1000) == 0)
- pr_err("transfer stalled\n");
-
- return IRQ_HANDLED;
- }
-
- bcom_retrieve_buffer(lpbfifo.bcom_cur_task, &status, NULL);
-
- req->last_byte = ((u8 *)req->data)[req->size - 1];
-
- req->pos = status & 0x00ffffff;
-
- /* Mark the FIFO as idle */
- lpbfifo.req = NULL;
-
- /* Release the lock before calling out to the callback. */
- req->irq_ticks += get_tbl() - ts;
- spin_unlock_irqrestore(&lpbfifo.lock, flags);
-
- if (req->callback)
- req->callback(req);
-
- return IRQ_HANDLED;
-}
-
-/**
- * mpc52xx_lpbfifo_bcom_poll - Poll for DMA completion
- */
-void mpc52xx_lpbfifo_poll(void)
-{
- struct mpc52xx_lpbfifo_request *req = lpbfifo.req;
- int dma = !(req->flags & MPC52XX_LPBFIFO_FLAG_NO_DMA);
- int write = req->flags & MPC52XX_LPBFIFO_FLAG_WRITE;
-
- /*
- * For more information, see comments on the "Fat Lady"
- */
- if (dma && write)
- mpc52xx_lpbfifo_irq(0, NULL);
- else
- mpc52xx_lpbfifo_bcom_irq(0, NULL);
-}
-EXPORT_SYMBOL(mpc52xx_lpbfifo_poll);
-
-/**
- * mpc52xx_lpbfifo_submit - Submit an LPB FIFO transfer request.
- * @req: Pointer to request structure
- */
-int mpc52xx_lpbfifo_submit(struct mpc52xx_lpbfifo_request *req)
-{
- unsigned long flags;
-
- if (!lpbfifo.regs)
- return -ENODEV;
-
- spin_lock_irqsave(&lpbfifo.lock, flags);
-
- /* If the req pointer is already set, then a transfer is in progress */
- if (lpbfifo.req) {
- spin_unlock_irqrestore(&lpbfifo.lock, flags);
- return -EBUSY;
- }
-
- /* Setup the transfer */
- lpbfifo.req = req;
- req->irq_count = 0;
- req->irq_ticks = 0;
- req->buffer_not_done_cnt = 0;
- req->pos = 0;
-
- mpc52xx_lpbfifo_kick(req);
- spin_unlock_irqrestore(&lpbfifo.lock, flags);
- return 0;
-}
-EXPORT_SYMBOL(mpc52xx_lpbfifo_submit);
-
-int mpc52xx_lpbfifo_start_xfer(struct mpc52xx_lpbfifo_request *req)
-{
- unsigned long flags;
-
- if (!lpbfifo.regs)
- return -ENODEV;
-
- spin_lock_irqsave(&lpbfifo.lock, flags);
-
- /*
- * If the req pointer is already set and a transfer was
- * started on submit, then this transfer is in progress
- */
- if (lpbfifo.req && !lpbfifo.req->defer_xfer_start) {
- spin_unlock_irqrestore(&lpbfifo.lock, flags);
- return -EBUSY;
- }
-
- /*
- * If the req was previously submitted but not
- * started, start it now
- */
- if (lpbfifo.req && lpbfifo.req == req &&
- lpbfifo.req->defer_xfer_start) {
- out_8(lpbfifo.regs + LPBFIFO_REG_PACKET_SIZE, 0x01);
- }
-
- spin_unlock_irqrestore(&lpbfifo.lock, flags);
- return 0;
-}
-EXPORT_SYMBOL(mpc52xx_lpbfifo_start_xfer);
-
-void mpc52xx_lpbfifo_abort(struct mpc52xx_lpbfifo_request *req)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&lpbfifo.lock, flags);
- if (lpbfifo.req == req) {
- /* Put it into reset and clear the state */
- bcom_gen_bd_rx_reset(lpbfifo.bcom_rx_task);
- bcom_gen_bd_tx_reset(lpbfifo.bcom_tx_task);
- out_be32(lpbfifo.regs + LPBFIFO_REG_ENABLE, 0x01010000);
- lpbfifo.req = NULL;
- }
- spin_unlock_irqrestore(&lpbfifo.lock, flags);
-}
-EXPORT_SYMBOL(mpc52xx_lpbfifo_abort);
-
-static int mpc52xx_lpbfifo_probe(struct platform_device *op)
-{
- struct resource res;
- int rc = -ENOMEM;
-
- if (lpbfifo.dev != NULL)
- return -ENOSPC;
-
- lpbfifo.irq = irq_of_parse_and_map(op->dev.of_node, 0);
- if (!lpbfifo.irq)
- return -ENODEV;
-
- if (of_address_to_resource(op->dev.of_node, 0, &res))
- return -ENODEV;
- lpbfifo.regs_phys = res.start;
- lpbfifo.regs = of_iomap(op->dev.of_node, 0);
- if (!lpbfifo.regs)
- return -ENOMEM;
-
- spin_lock_init(&lpbfifo.lock);
-
- /* Put FIFO into reset */
- out_be32(lpbfifo.regs + LPBFIFO_REG_ENABLE, 0x01010000);
-
- /* Register the interrupt handler */
- rc = request_irq(lpbfifo.irq, mpc52xx_lpbfifo_irq, 0,
- "mpc52xx-lpbfifo", &lpbfifo);
- if (rc)
- goto err_irq;
-
- /* Request the Bestcomm receive (fifo --> memory) task and IRQ */
- lpbfifo.bcom_rx_task =
- bcom_gen_bd_rx_init(2, res.start + LPBFIFO_REG_FIFO_DATA,
- BCOM_INITIATOR_SCLPC, BCOM_IPR_SCLPC,
- 16*1024*1024);
- if (!lpbfifo.bcom_rx_task)
- goto err_bcom_rx;
-
- rc = request_irq(bcom_get_task_irq(lpbfifo.bcom_rx_task),
- mpc52xx_lpbfifo_bcom_irq, 0,
- "mpc52xx-lpbfifo-rx", &lpbfifo);
- if (rc)
- goto err_bcom_rx_irq;
-
- lpbfifo.dma_irqs_enabled = 1;
-
- /* Request the Bestcomm transmit (memory --> fifo) task and IRQ */
- lpbfifo.bcom_tx_task =
- bcom_gen_bd_tx_init(2, res.start + LPBFIFO_REG_FIFO_DATA,
- BCOM_INITIATOR_SCLPC, BCOM_IPR_SCLPC);
- if (!lpbfifo.bcom_tx_task)
- goto err_bcom_tx;
-
- lpbfifo.dev = &op->dev;
- return 0;
-
- err_bcom_tx:
- free_irq(bcom_get_task_irq(lpbfifo.bcom_rx_task), &lpbfifo);
- err_bcom_rx_irq:
- bcom_gen_bd_rx_release(lpbfifo.bcom_rx_task);
- err_bcom_rx:
- err_irq:
- iounmap(lpbfifo.regs);
- lpbfifo.regs = NULL;
-
- dev_err(&op->dev, "mpc52xx_lpbfifo_probe() failed\n");
- return -ENODEV;
-}
-
-
-static int mpc52xx_lpbfifo_remove(struct platform_device *op)
-{
- if (lpbfifo.dev != &op->dev)
- return 0;
-
- /* Put FIFO in reset */
- out_be32(lpbfifo.regs + LPBFIFO_REG_ENABLE, 0x01010000);
-
- /* Release the bestcomm transmit task */
- free_irq(bcom_get_task_irq(lpbfifo.bcom_tx_task), &lpbfifo);
- bcom_gen_bd_tx_release(lpbfifo.bcom_tx_task);
-
- /* Release the bestcomm receive task */
- free_irq(bcom_get_task_irq(lpbfifo.bcom_rx_task), &lpbfifo);
- bcom_gen_bd_rx_release(lpbfifo.bcom_rx_task);
-
- free_irq(lpbfifo.irq, &lpbfifo);
- iounmap(lpbfifo.regs);
- lpbfifo.regs = NULL;
- lpbfifo.dev = NULL;
-
- return 0;
-}
-
-static struct of_device_id mpc52xx_lpbfifo_match[] = {
- { .compatible = "fsl,mpc5200-lpbfifo", },
- {},
-};
-
-static struct platform_driver mpc52xx_lpbfifo_driver = {
- .driver = {
- .name = "mpc52xx-lpbfifo",
- .owner = THIS_MODULE,
- .of_match_table = mpc52xx_lpbfifo_match,
- },
- .probe = mpc52xx_lpbfifo_probe,
- .remove = mpc52xx_lpbfifo_remove,
-};
-module_platform_driver(mpc52xx_lpbfifo_driver);
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pci.c b/arch/powerpc/platforms/52xx/mpc52xx_pci.c
index e2d401ad8fbb..0ca4401ba781 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_pci.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_pci.c
@@ -12,7 +12,8 @@
#undef DEBUG
-#include <asm/pci.h>
+#include <linux/pci.h>
+#include <linux/of_address.h>
#include <asm/mpc52xx.h>
#include <asm/delay.h>
#include <asm/machdep.h>
@@ -242,7 +243,7 @@ mpc52xx_pci_setup(struct pci_controller *hose,
u32 tmp;
int iwcr0 = 0, iwcr1 = 0, iwcr2 = 0;
- pr_debug("mpc52xx_pci_setup(hose=%p, pci_regs=%p)\n", hose, pci_regs);
+ pr_debug("%s(hose=%p, pci_regs=%p)\n", __func__, hose, pci_regs);
/* pci_process_bridge_OF_ranges() found all our addresses for us;
* now store them in the right places */
@@ -257,11 +258,7 @@ mpc52xx_pci_setup(struct pci_controller *hose,
/* Memory windows */
res = &hose->mem_resources[0];
if (res->flags) {
- pr_debug("mem_resource[0] = "
- "{.start=%llx, .end=%llx, .flags=%llx}\n",
- (unsigned long long)res->start,
- (unsigned long long)res->end,
- (unsigned long long)res->flags);
+ pr_debug("mem_resource[0] = %pr\n", res);
out_be32(&pci_regs->iw0btar,
MPC52xx_PCI_IWBTAR_TRANSLATION(res->start, res->start,
resource_size(res)));
@@ -274,8 +271,7 @@ mpc52xx_pci_setup(struct pci_controller *hose,
res = &hose->mem_resources[1];
if (res->flags) {
- pr_debug("mem_resource[1] = {.start=%x, .end=%x, .flags=%lx}\n",
- res->start, res->end, res->flags);
+ pr_debug("mem_resource[1] = %pr\n", res);
out_be32(&pci_regs->iw1btar,
MPC52xx_PCI_IWBTAR_TRANSLATION(res->start, res->start,
resource_size(res)));
@@ -292,11 +288,8 @@ mpc52xx_pci_setup(struct pci_controller *hose,
printk(KERN_ERR "%s: Didn't find IO resources\n", __FILE__);
return;
}
- pr_debug(".io_resource={.start=%llx,.end=%llx,.flags=%llx} "
- ".io_base_phys=0x%p\n",
- (unsigned long long)res->start,
- (unsigned long long)res->end,
- (unsigned long long)res->flags, (void*)hose->io_base_phys);
+ pr_debug(".io_resource = %pr .io_base_phys=0x%pa\n",
+ res, &hose->io_base_phys);
out_be32(&pci_regs->iw2btar,
MPC52xx_PCI_IWBTAR_TRANSLATION(hose->io_base_phys,
res->start,
@@ -319,7 +312,7 @@ mpc52xx_pci_setup(struct pci_controller *hose,
tmp = in_be32(&pci_regs->gscr);
#if 0
- /* Reset the exteral bus ( internal PCI controller is NOT resetted ) */
+ /* Reset the exteral bus ( internal PCI controller is NOT reset ) */
/* Not necessary and can be a bad thing if for example the bootloader
is displaying a splash screen or ... Just left here for
documentation purpose if anyone need it */
@@ -334,15 +327,13 @@ mpc52xx_pci_setup(struct pci_controller *hose,
static void
mpc52xx_pci_fixup_resources(struct pci_dev *dev)
{
- int i;
+ struct resource *res;
- pr_debug("mpc52xx_pci_fixup_resources() %.4x:%.4x\n",
- dev->vendor, dev->device);
+ pr_debug("%s() %.4x:%.4x\n", __func__, dev->vendor, dev->device);
/* We don't rely on boot loader for PCI and resets all
devices */
- for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
- struct resource *res = &dev->resource[i];
+ pci_dev_for_each_resource(dev, res) {
if (res->end > res->start) { /* Only valid resources */
res->end -= res->start;
res->start = 0;
@@ -369,19 +360,19 @@ mpc52xx_add_bridge(struct device_node *node)
const int *bus_range;
struct resource rsrc;
- pr_debug("Adding MPC52xx PCI host bridge %s\n", node->full_name);
+ pr_debug("Adding MPC52xx PCI host bridge %pOF\n", node);
pci_add_flags(PCI_REASSIGN_ALL_BUS);
if (of_address_to_resource(node, 0, &rsrc) != 0) {
- printk(KERN_ERR "Can't get %s resources\n", node->full_name);
+ printk(KERN_ERR "Can't get %pOF resources\n", node);
return -EINVAL;
}
bus_range = of_get_property(node, "bus-range", &len);
if (bus_range == NULL || len < 2 * sizeof(int)) {
- printk(KERN_WARNING "Can't get %s bus-range, assume bus 0\n",
- node->full_name);
+ printk(KERN_WARNING "Can't get %pOF bus-range, assume bus 0\n",
+ node);
bus_range = NULL;
}
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pic.c b/arch/powerpc/platforms/52xx/mpc52xx_pic.c
index 2898b737deb7..eb6a4e745c08 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_pic.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_pic.c
@@ -101,8 +101,9 @@
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
#include <asm/io.h>
-#include <asm/prom.h>
#include <asm/mpc52xx.h>
/* HW IRQ mapping */
@@ -119,12 +120,12 @@
/* MPC5200 device tree match tables */
-static struct of_device_id mpc52xx_pic_ids[] __initdata = {
+static const struct of_device_id mpc52xx_pic_ids[] __initconst = {
{ .compatible = "fsl,mpc5200-pic", },
{ .compatible = "mpc5200-pic", },
{}
};
-static struct of_device_id mpc52xx_sdma_ids[] __initdata = {
+static const struct of_device_id mpc52xx_sdma_ids[] __initconst = {
{ .compatible = "fsl,mpc5200-bestcomm", },
{ .compatible = "mpc5200-bestcomm", },
{}
@@ -196,7 +197,7 @@ static int mpc52xx_extirq_set_type(struct irq_data *d, unsigned int flow_type)
ctrl_reg |= (type << (22 - (l2irq * 2)));
out_be32(&intr->ctrl, ctrl_reg);
- __irq_set_handler_locked(d->irq, handler);
+ irq_set_handler_locked(d, handler);
return 0;
}
@@ -340,7 +341,7 @@ static int mpc52xx_irqhost_map(struct irq_domain *h, unsigned int virq,
{
int l1irq;
int l2irq;
- struct irq_chip *uninitialized_var(irqchip);
+ struct irq_chip *irqchip;
void *hndlr;
int type;
u32 reg;
@@ -445,14 +446,14 @@ void __init mpc52xx_init_irq(void)
* As last step, add an irq host to translate the real
* hw irq information provided by the ofw to linux virq
*/
- mpc52xx_irqhost = irq_domain_add_linear(picnode,
+ mpc52xx_irqhost = irq_domain_create_linear(of_fwnode_handle(picnode),
MPC52xx_IRQ_HIGHTESTHWIRQ,
&mpc52xx_irqhost_ops, NULL);
if (!mpc52xx_irqhost)
panic(__FILE__ ": Cannot allocate the IRQ host\n");
- irq_set_default_host(mpc52xx_irqhost);
+ irq_set_default_domain(mpc52xx_irqhost);
pr_info("MPC52xx PIC is up and running!\n");
}
@@ -511,8 +512,8 @@ unsigned int mpc52xx_get_irq(void)
irq |= (MPC52xx_IRQ_L1_PERP << MPC52xx_IRQ_L1_OFFSET);
}
} else {
- return NO_IRQ;
+ return 0;
}
- return irq_linear_revmap(mpc52xx_irqhost, irq);
+ return irq_find_mapping(mpc52xx_irqhost, irq);
}
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pm.c b/arch/powerpc/platforms/52xx/mpc52xx_pm.c
index 8310e8b5b57f..f0c31ae15da5 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_pm.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_pm.c
@@ -1,6 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/init.h>
#include <linux/suspend.h>
#include <linux/io.h>
+#include <linux/of_address.h>
+
#include <asm/time.h>
#include <asm/cacheflush.h>
#include <asm/mpc52xx.h>
@@ -57,7 +60,7 @@ int mpc52xx_set_wakeup_gpio(u8 pin, u8 level)
int mpc52xx_pm_prepare(void)
{
struct device_node *np;
- const struct of_device_id immr_ids[] = {
+ static const struct of_device_id immr_ids[] = {
{ .compatible = "fsl,mpc5200-immr", },
{ .compatible = "fsl,mpc5200b-immr", },
{ .type = "soc", .compatible = "mpc5200", }, /* lite5200 */
@@ -116,7 +119,10 @@ int mpc52xx_pm_enter(suspend_state_t state)
u32 intr_main_mask;
void __iomem * irq_0x500 = (void __iomem *)CONFIG_KERNEL_START + 0x500;
unsigned long irq_0x500_stop = (unsigned long)irq_0x500 + mpc52xx_ds_cached_size;
- char saved_0x500[mpc52xx_ds_cached_size];
+ char saved_0x500[0x600-0x500];
+
+ if (WARN_ON(mpc52xx_ds_cached_size > sizeof(saved_0x500)))
+ return -ENOMEM;
/* disable all interrupts in PIC */
intr_main_mask = in_be32(&intr->main_mask);
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_sleep.S b/arch/powerpc/platforms/52xx/mpc52xx_sleep.S
index 4dc170b0ae18..a66eb311b639 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_sleep.S
+++ b/arch/powerpc/platforms/52xx/mpc52xx_sleep.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#include <asm/reg.h>
#include <asm/ppc_asm.h>
#include <asm/processor.h>
diff --git a/arch/powerpc/platforms/82xx/Kconfig b/arch/powerpc/platforms/82xx/Kconfig
index 7c7df4003820..1824536cf6f2 100644
--- a/arch/powerpc/platforms/82xx/Kconfig
+++ b/arch/powerpc/platforms/82xx/Kconfig
@@ -1,37 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0
menuconfig PPC_82xx
bool "82xx-based boards (PQ II)"
- depends on 6xx
-
-if PPC_82xx
-
-config MPC8272_ADS
- bool "Freescale MPC8272 ADS"
- select DEFAULT_UIMAGE
- select PQ2ADS
- select 8272
- select 8260
+ depends on PPC_BOOK3S_32
select FSL_SOC
- select PQ2_ADS_PCI_PIC if PCI
- help
- This option enables support for the MPC8272 ADS board
-config PQ2FADS
- bool "Freescale PQ2FADS"
- select DEFAULT_UIMAGE
- select PQ2ADS
- select 8260
- select FSL_SOC
- select PQ2_ADS_PCI_PIC if PCI
- help
- This option enables support for the PQ2FADS board
+if PPC_82xx
config EP8248E
bool "Embedded Planet EP8248E (a.k.a. CWH-PPC-8248N-VE)"
- select 8272
- select 8260
- select FSL_SOC
- select PHYLIB
- select MDIO_BITBANG
+ select CPM2
+ select PPC_INDIRECT_PCI if PCI
+ select PHYLIB if NETDEVICES
+ select MDIO_BITBANG if PHYLIB
help
This enables support for the Embedded Planet EP8248E board.
@@ -40,33 +20,9 @@ config EP8248E
config MGCOGE
bool "Keymile MGCOGE"
- select 8272
- select 8260
- select FSL_SOC
+ select CPM2
+ select PPC_INDIRECT_PCI if PCI
help
This enables support for the Keymile MGCOGE board.
endif
-
-config PQ2ADS
- bool
- default n
-
-config 8260
- bool
- depends on 6xx
- select CPM2
- help
- The MPC8260 is a typical embedded CPU made by Freescale. Selecting
- this option means that you wish to build a kernel for a machine with
- an 8260 class CPU.
-
-config 8272
- bool
- select 8260
- help
- The MPC8272 CPM has a different internal dpram setup than other CPM2
- devices
-
-config PQ2_ADS_PCI_PIC
- bool
diff --git a/arch/powerpc/platforms/82xx/Makefile b/arch/powerpc/platforms/82xx/Makefile
index 455fe21e37c4..4fa43a5cd582 100644
--- a/arch/powerpc/platforms/82xx/Makefile
+++ b/arch/powerpc/platforms/82xx/Makefile
@@ -1,9 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the PowerPC 82xx linux kernel.
#
-obj-$(CONFIG_MPC8272_ADS) += mpc8272_ads.o
obj-$(CONFIG_CPM2) += pq2.o
-obj-$(CONFIG_PQ2_ADS_PCI_PIC) += pq2ads-pci-pic.o
-obj-$(CONFIG_PQ2FADS) += pq2fads.o
obj-$(CONFIG_EP8248E) += ep8248e.o
obj-$(CONFIG_MGCOGE) += km82xx.o
diff --git a/arch/powerpc/platforms/82xx/ep8248e.c b/arch/powerpc/platforms/82xx/ep8248e.c
index 79799b29ffe2..8f918916e631 100644
--- a/arch/powerpc/platforms/82xx/ep8248e.c
+++ b/arch/powerpc/platforms/82xx/ep8248e.c
@@ -1,13 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Embedded Planet EP8248E support
*
* Copyright 2007 Freescale Semiconductor, Inc.
* Author: Scott Wood <scottwood@freescale.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#include <linux/init.h>
@@ -17,14 +13,13 @@
#include <linux/of_mdio.h>
#include <linux/slab.h>
#include <linux/of_platform.h>
+#include <linux/platform_device.h>
#include <asm/io.h>
#include <asm/cpm2.h>
#include <asm/udbg.h>
#include <asm/machdep.h>
#include <asm/time.h>
-#include <asm/mpc8260.h>
-#include <asm/prom.h>
#include <sysdev/fsl_soc.h>
#include <sysdev/cpm2_pic.h>
@@ -131,34 +126,20 @@ static int ep8248e_mdio_probe(struct platform_device *ofdev)
if (!bus)
return -ENOMEM;
- bus->irq = kmalloc(sizeof(int) * PHY_MAX_ADDR, GFP_KERNEL);
- if (bus->irq == NULL) {
- ret = -ENOMEM;
- goto err_free_bus;
- }
-
bus->name = "ep8248e-mdio-bitbang";
bus->parent = &ofdev->dev;
- snprintf(bus->id, MII_BUS_ID_SIZE, "%x", res.start);
+ snprintf(bus->id, MII_BUS_ID_SIZE, "%pa", &res.start);
ret = of_mdiobus_register(bus, ofdev->dev.of_node);
if (ret)
- goto err_free_irq;
+ goto err_free_bus;
return 0;
-err_free_irq:
- kfree(bus->irq);
err_free_bus:
free_mdio_bitbang(bus);
return ret;
}
-static int ep8248e_mdio_remove(struct platform_device *ofdev)
-{
- BUG();
- return 0;
-}
-
static const struct of_device_id ep8248e_mdio_match[] = {
{
.compatible = "fsl,ep8248e-mdio-bitbang",
@@ -169,11 +150,10 @@ static const struct of_device_id ep8248e_mdio_match[] = {
static struct platform_driver ep8248e_mdio_driver = {
.driver = {
.name = "ep8248e-mdio-bitbang",
- .owner = THIS_MODULE,
.of_match_table = ep8248e_mdio_match,
+ .suppress_bind_attrs = true,
},
.probe = ep8248e_mdio_probe,
- .remove = ep8248e_mdio_remove,
};
struct cpm_pin {
@@ -298,7 +278,7 @@ static void __init ep8248e_setup_arch(void)
ppc_md.progress("ep8248e_setup_arch(), finish", 0);
}
-static __initdata struct of_device_id of_bus_ids[] = {
+static const struct of_device_id of_bus_ids[] __initconst = {
{ .compatible = "simple-bus", },
{ .compatible = "fsl,ep8248e-bcsr", },
{},
@@ -307,29 +287,21 @@ static __initdata struct of_device_id of_bus_ids[] = {
static int __init declare_of_platform_devices(void)
{
of_platform_bus_probe(NULL, of_bus_ids, NULL);
- platform_driver_register(&ep8248e_mdio_driver);
+
+ if (IS_ENABLED(CONFIG_MDIO_BITBANG))
+ platform_driver_register(&ep8248e_mdio_driver);
return 0;
}
machine_device_initcall(ep8248e, declare_of_platform_devices);
-/*
- * Called very early, device-tree isn't unflattened
- */
-static int __init ep8248e_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
- return of_flat_dt_is_compatible(root, "fsl,ep8248e");
-}
-
define_machine(ep8248e)
{
.name = "Embedded Planet EP8248E",
- .probe = ep8248e_probe,
+ .compatible = "fsl,ep8248e",
.setup_arch = ep8248e_setup_arch,
.init_IRQ = ep8248e_pic_init,
.get_irq = cpm2_get_irq,
- .calibrate_decr = generic_calibrate_decr,
.restart = pq2_restart,
.progress = udbg_progress,
};
diff --git a/arch/powerpc/platforms/82xx/km82xx.c b/arch/powerpc/platforms/82xx/km82xx.c
index 058cc1895c88..99f0f0f41876 100644
--- a/arch/powerpc/platforms/82xx/km82xx.c
+++ b/arch/powerpc/platforms/82xx/km82xx.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Keymile km82xx support
* Copyright 2008-2011 DENX Software Engineering GmbH
@@ -6,11 +7,6 @@
* based on code from:
* Copyright 2007 Freescale Semiconductor, Inc.
* Author: Scott Wood <scottwood@freescale.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#include <linux/init.h>
@@ -23,8 +19,6 @@
#include <asm/udbg.h>
#include <asm/machdep.h>
#include <linux/time.h>
-#include <asm/mpc8260.h>
-#include <asm/prom.h>
#include <sysdev/fsl_soc.h>
#include <sysdev/cpm2_pic.h>
@@ -33,15 +27,15 @@
static void __init km82xx_pic_init(void)
{
- struct device_node *np = of_find_compatible_node(NULL, NULL,
- "fsl,pq2-pic");
+ struct device_node *np __free(device_node);
+ np = of_find_compatible_node(NULL, NULL, "fsl,pq2-pic");
+
if (!np) {
pr_err("PIC init: can not find cpm-pic node\n");
return;
}
cpm2_pic_init(np);
- of_node_put(np);
}
struct cpm_pin {
@@ -180,7 +174,7 @@ static void __init km82xx_setup_arch(void)
ppc_md.progress("km82xx_setup_arch(), finish", 0);
}
-static __initdata struct of_device_id of_bus_ids[] = {
+static const struct of_device_id of_bus_ids[] __initconst = {
{ .compatible = "simple-bus", },
{},
};
@@ -193,23 +187,13 @@ static int __init declare_of_platform_devices(void)
}
machine_device_initcall(km82xx, declare_of_platform_devices);
-/*
- * Called very early, device-tree isn't unflattened
- */
-static int __init km82xx_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
- return of_flat_dt_is_compatible(root, "keymile,km82xx");
-}
-
define_machine(km82xx)
{
.name = "Keymile km82xx",
- .probe = km82xx_probe,
+ .compatible = "keymile,km82xx",
.setup_arch = km82xx_setup_arch,
.init_IRQ = km82xx_pic_init,
.get_irq = cpm2_get_irq,
- .calibrate_decr = generic_calibrate_decr,
.restart = pq2_restart,
.progress = udbg_progress,
};
diff --git a/arch/powerpc/platforms/82xx/m82xx_pci.h b/arch/powerpc/platforms/82xx/m82xx_pci.h
deleted file mode 100644
index 65e38a7ff48f..000000000000
--- a/arch/powerpc/platforms/82xx/m82xx_pci.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef _PPC_KERNEL_M82XX_PCI_H
-#define _PPC_KERNEL_M82XX_PCI_H
-
-/*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#define SIU_INT_IRQ1 ((uint)0x13 + CPM_IRQ_OFFSET)
-
-#ifndef _IO_BASE
-#define _IO_BASE isa_io_base
-#endif
-
-#endif /* _PPC_KERNEL_M8260_PCI_H */
diff --git a/arch/powerpc/platforms/82xx/mpc8272_ads.c b/arch/powerpc/platforms/82xx/mpc8272_ads.c
deleted file mode 100644
index 6a14cf50f4a2..000000000000
--- a/arch/powerpc/platforms/82xx/mpc8272_ads.c
+++ /dev/null
@@ -1,218 +0,0 @@
-/*
- * MPC8272 ADS board support
- *
- * Copyright 2007 Freescale Semiconductor, Inc.
- * Author: Scott Wood <scottwood@freescale.com>
- *
- * Based on code by Vitaly Bordug <vbordug@ru.mvista.com>
- * Copyright (c) 2006 MontaVista Software, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/fsl_devices.h>
-#include <linux/of_address.h>
-#include <linux/of_fdt.h>
-#include <linux/of_platform.h>
-#include <linux/io.h>
-
-#include <asm/cpm2.h>
-#include <asm/udbg.h>
-#include <asm/machdep.h>
-#include <asm/time.h>
-
-#include <platforms/82xx/pq2.h>
-
-#include <sysdev/fsl_soc.h>
-#include <sysdev/cpm2_pic.h>
-
-#include "pq2.h"
-
-static void __init mpc8272_ads_pic_init(void)
-{
- struct device_node *np = of_find_compatible_node(NULL, NULL,
- "fsl,cpm2-pic");
- if (!np) {
- printk(KERN_ERR "PIC init: can not find fsl,cpm2-pic node\n");
- return;
- }
-
- cpm2_pic_init(np);
- of_node_put(np);
-
- /* Initialize stuff for the 82xx CPLD IC and install demux */
- pq2ads_pci_init_irq();
-}
-
-struct cpm_pin {
- int port, pin, flags;
-};
-
-static struct cpm_pin mpc8272_ads_pins[] = {
- /* SCC1 */
- {3, 30, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
- {3, 31, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
-
- /* SCC4 */
- {3, 21, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
- {3, 22, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
-
- /* FCC1 */
- {0, 14, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {0, 15, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {0, 16, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {0, 17, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {0, 18, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
- {0, 19, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
- {0, 20, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
- {0, 21, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
- {0, 26, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
- {0, 27, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
- {0, 28, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
- {0, 29, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
- {0, 30, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
- {0, 31, CPM_PIN_INPUT | CPM_PIN_SECONDARY},
- {2, 21, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {2, 22, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
-
- /* FCC2 */
- {1, 18, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {1, 19, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {1, 20, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {1, 21, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {1, 22, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
- {1, 23, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
- {1, 24, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
- {1, 25, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
- {1, 26, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {1, 27, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {1, 28, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {1, 29, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
- {1, 30, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {1, 31, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
- {2, 16, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {2, 17, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
-
- /* I2C */
- {3, 14, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_OPENDRAIN},
- {3, 15, CPM_PIN_INPUT | CPM_PIN_SECONDARY | CPM_PIN_OPENDRAIN},
-
- /* USB */
- {2, 10, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {2, 11, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {2, 20, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
- {2, 24, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {3, 23, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
- {3, 24, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
- {3, 25, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
-};
-
-static void __init init_ioports(void)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(mpc8272_ads_pins); i++) {
- struct cpm_pin *pin = &mpc8272_ads_pins[i];
- cpm2_set_pin(pin->port, pin->pin, pin->flags);
- }
-
- cpm2_clk_setup(CPM_CLK_SCC1, CPM_BRG1, CPM_CLK_RX);
- cpm2_clk_setup(CPM_CLK_SCC1, CPM_BRG1, CPM_CLK_TX);
- cpm2_clk_setup(CPM_CLK_SCC3, CPM_CLK8, CPM_CLK_RX);
- cpm2_clk_setup(CPM_CLK_SCC3, CPM_CLK8, CPM_CLK_TX);
- cpm2_clk_setup(CPM_CLK_SCC4, CPM_BRG4, CPM_CLK_RX);
- cpm2_clk_setup(CPM_CLK_SCC4, CPM_BRG4, CPM_CLK_TX);
- cpm2_clk_setup(CPM_CLK_FCC1, CPM_CLK11, CPM_CLK_RX);
- cpm2_clk_setup(CPM_CLK_FCC1, CPM_CLK10, CPM_CLK_TX);
- cpm2_clk_setup(CPM_CLK_FCC2, CPM_CLK15, CPM_CLK_RX);
- cpm2_clk_setup(CPM_CLK_FCC2, CPM_CLK16, CPM_CLK_TX);
-}
-
-static void __init mpc8272_ads_setup_arch(void)
-{
- struct device_node *np;
- __be32 __iomem *bcsr;
-
- if (ppc_md.progress)
- ppc_md.progress("mpc8272_ads_setup_arch()", 0);
-
- cpm2_reset();
-
- np = of_find_compatible_node(NULL, NULL, "fsl,mpc8272ads-bcsr");
- if (!np) {
- printk(KERN_ERR "No bcsr in device tree\n");
- return;
- }
-
- bcsr = of_iomap(np, 0);
- of_node_put(np);
- if (!bcsr) {
- printk(KERN_ERR "Cannot map BCSR registers\n");
- return;
- }
-
-#define BCSR1_FETHIEN 0x08000000
-#define BCSR1_FETH_RST 0x04000000
-#define BCSR1_RS232_EN1 0x02000000
-#define BCSR1_RS232_EN2 0x01000000
-#define BCSR3_USB_nEN 0x80000000
-#define BCSR3_FETHIEN2 0x10000000
-#define BCSR3_FETH2_RST 0x08000000
-
- clrbits32(&bcsr[1], BCSR1_RS232_EN1 | BCSR1_RS232_EN2 | BCSR1_FETHIEN);
- setbits32(&bcsr[1], BCSR1_FETH_RST);
-
- clrbits32(&bcsr[3], BCSR3_FETHIEN2);
- setbits32(&bcsr[3], BCSR3_FETH2_RST);
-
- clrbits32(&bcsr[3], BCSR3_USB_nEN);
-
- iounmap(bcsr);
-
- init_ioports();
- pq2_init_pci();
-
- if (ppc_md.progress)
- ppc_md.progress("mpc8272_ads_setup_arch(), finish", 0);
-}
-
-static struct of_device_id __initdata of_bus_ids[] = {
- { .name = "soc", },
- { .name = "cpm", },
- { .name = "localbus", },
- {},
-};
-
-static int __init declare_of_platform_devices(void)
-{
- /* Publish the QE devices */
- of_platform_bus_probe(NULL, of_bus_ids, NULL);
- return 0;
-}
-machine_device_initcall(mpc8272_ads, declare_of_platform_devices);
-
-/*
- * Called very early, device-tree isn't unflattened
- */
-static int __init mpc8272_ads_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
- return of_flat_dt_is_compatible(root, "fsl,mpc8272ads");
-}
-
-define_machine(mpc8272_ads)
-{
- .name = "Freescale MPC8272 ADS",
- .probe = mpc8272_ads_probe,
- .setup_arch = mpc8272_ads_setup_arch,
- .init_IRQ = mpc8272_ads_pic_init,
- .get_irq = cpm2_get_irq,
- .calibrate_decr = generic_calibrate_decr,
- .restart = pq2_restart,
- .progress = udbg_progress,
-};
diff --git a/arch/powerpc/platforms/82xx/pq2.c b/arch/powerpc/platforms/82xx/pq2.c
index fc8b2d6a7d8d..391d72a2e09d 100644
--- a/arch/powerpc/platforms/82xx/pq2.c
+++ b/arch/powerpc/platforms/82xx/pq2.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Common PowerQUICC II code.
*
@@ -7,13 +8,10 @@
* Based on code by Vitaly Bordug <vbordug@ru.mvista.com>
* pq2_restart fix by Wade Farnsworth <wfarnsworth@mvista.com>
* Copyright (c) 2006 MontaVista Software, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
+#include <linux/kprobes.h>
+
#include <asm/cpm2.h>
#include <asm/io.h>
#include <asm/pci-bridge.h>
@@ -22,7 +20,7 @@
#define RMR_CSRE 0x00000001
-void pq2_restart(char *cmd)
+void __noreturn pq2_restart(char *cmd)
{
local_irq_disable();
setbits32(&cpm2_immr->im_clkrst.car_rmr, RMR_CSRE);
@@ -33,49 +31,4 @@ void pq2_restart(char *cmd)
panic("Restart failed\n");
}
-
-#ifdef CONFIG_PCI
-static int pq2_pci_exclude_device(struct pci_controller *hose,
- u_char bus, u8 devfn)
-{
- if (bus == 0 && PCI_SLOT(devfn) == 0)
- return PCIBIOS_DEVICE_NOT_FOUND;
- else
- return PCIBIOS_SUCCESSFUL;
-}
-
-static void __init pq2_pci_add_bridge(struct device_node *np)
-{
- struct pci_controller *hose;
- struct resource r;
-
- if (of_address_to_resource(np, 0, &r) || r.end - r.start < 0x10b)
- goto err;
-
- pci_add_flags(PCI_REASSIGN_ALL_BUS);
-
- hose = pcibios_alloc_controller(np);
- if (!hose)
- return;
-
- hose->dn = np;
-
- setup_indirect_pci(hose, r.start + 0x100, r.start + 0x104, 0);
- pci_process_bridge_OF_ranges(hose, np, 1);
-
- return;
-
-err:
- printk(KERN_ERR "No valid PCI reg property in device tree\n");
-}
-
-void __init pq2_init_pci(void)
-{
- struct device_node *np;
-
- ppc_md.pci_exclude_device = pq2_pci_exclude_device;
-
- for_each_compatible_node(np, NULL, "fsl,pq2-pci")
- pq2_pci_add_bridge(np);
-}
-#endif
+NOKPROBE_SYMBOL(pq2_restart)
diff --git a/arch/powerpc/platforms/82xx/pq2.h b/arch/powerpc/platforms/82xx/pq2.h
index a41f84ae2325..902ef0bd4949 100644
--- a/arch/powerpc/platforms/82xx/pq2.h
+++ b/arch/powerpc/platforms/82xx/pq2.h
@@ -1,7 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _PQ2_H
#define _PQ2_H
-void pq2_restart(char *cmd);
+void __noreturn pq2_restart(char *cmd);
#ifdef CONFIG_PCI
int pq2ads_pci_init_irq(void);
diff --git a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c b/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c
deleted file mode 100644
index 74861a7fb807..000000000000
--- a/arch/powerpc/platforms/82xx/pq2ads-pci-pic.c
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * PQ2 ADS-style PCI interrupt controller
- *
- * Copyright 2007 Freescale Semiconductor, Inc.
- * Author: Scott Wood <scottwood@freescale.com>
- *
- * Loosely based on mpc82xx ADS support by Vitaly Bordug <vbordug@ru.mvista.com>
- * Copyright (c) 2006 MontaVista Software, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- */
-
-#include <linux/init.h>
-#include <linux/spinlock.h>
-#include <linux/irq.h>
-#include <linux/types.h>
-#include <linux/slab.h>
-
-#include <asm/io.h>
-#include <asm/prom.h>
-#include <asm/cpm2.h>
-
-#include "pq2.h"
-
-static DEFINE_RAW_SPINLOCK(pci_pic_lock);
-
-struct pq2ads_pci_pic {
- struct device_node *node;
- struct irq_domain *host;
-
- struct {
- u32 stat;
- u32 mask;
- } __iomem *regs;
-};
-
-#define NUM_IRQS 32
-
-static void pq2ads_pci_mask_irq(struct irq_data *d)
-{
- struct pq2ads_pci_pic *priv = irq_data_get_irq_chip_data(d);
- int irq = NUM_IRQS - irqd_to_hwirq(d) - 1;
-
- if (irq != -1) {
- unsigned long flags;
- raw_spin_lock_irqsave(&pci_pic_lock, flags);
-
- setbits32(&priv->regs->mask, 1 << irq);
- mb();
-
- raw_spin_unlock_irqrestore(&pci_pic_lock, flags);
- }
-}
-
-static void pq2ads_pci_unmask_irq(struct irq_data *d)
-{
- struct pq2ads_pci_pic *priv = irq_data_get_irq_chip_data(d);
- int irq = NUM_IRQS - irqd_to_hwirq(d) - 1;
-
- if (irq != -1) {
- unsigned long flags;
-
- raw_spin_lock_irqsave(&pci_pic_lock, flags);
- clrbits32(&priv->regs->mask, 1 << irq);
- raw_spin_unlock_irqrestore(&pci_pic_lock, flags);
- }
-}
-
-static struct irq_chip pq2ads_pci_ic = {
- .name = "PQ2 ADS PCI",
- .irq_mask = pq2ads_pci_mask_irq,
- .irq_mask_ack = pq2ads_pci_mask_irq,
- .irq_ack = pq2ads_pci_mask_irq,
- .irq_unmask = pq2ads_pci_unmask_irq,
- .irq_enable = pq2ads_pci_unmask_irq,
- .irq_disable = pq2ads_pci_mask_irq
-};
-
-static void pq2ads_pci_irq_demux(unsigned int irq, struct irq_desc *desc)
-{
- struct pq2ads_pci_pic *priv = irq_desc_get_handler_data(desc);
- u32 stat, mask, pend;
- int bit;
-
- for (;;) {
- stat = in_be32(&priv->regs->stat);
- mask = in_be32(&priv->regs->mask);
-
- pend = stat & ~mask;
-
- if (!pend)
- break;
-
- for (bit = 0; pend != 0; ++bit, pend <<= 1) {
- if (pend & 0x80000000) {
- int virq = irq_linear_revmap(priv->host, bit);
- generic_handle_irq(virq);
- }
- }
- }
-}
-
-static int pci_pic_host_map(struct irq_domain *h, unsigned int virq,
- irq_hw_number_t hw)
-{
- irq_set_status_flags(virq, IRQ_LEVEL);
- irq_set_chip_data(virq, h->host_data);
- irq_set_chip_and_handler(virq, &pq2ads_pci_ic, handle_level_irq);
- return 0;
-}
-
-static const struct irq_domain_ops pci_pic_host_ops = {
- .map = pci_pic_host_map,
-};
-
-int __init pq2ads_pci_init_irq(void)
-{
- struct pq2ads_pci_pic *priv;
- struct irq_domain *host;
- struct device_node *np;
- int ret = -ENODEV;
- int irq;
-
- np = of_find_compatible_node(NULL, NULL, "fsl,pq2ads-pci-pic");
- if (!np) {
- printk(KERN_ERR "No pci pic node in device tree.\n");
- of_node_put(np);
- goto out;
- }
-
- irq = irq_of_parse_and_map(np, 0);
- if (irq == NO_IRQ) {
- printk(KERN_ERR "No interrupt in pci pic node.\n");
- of_node_put(np);
- goto out;
- }
-
- priv = kzalloc(sizeof(*priv), GFP_KERNEL);
- if (!priv) {
- of_node_put(np);
- ret = -ENOMEM;
- goto out_unmap_irq;
- }
-
- /* PCI interrupt controller registers: status and mask */
- priv->regs = of_iomap(np, 0);
- if (!priv->regs) {
- printk(KERN_ERR "Cannot map PCI PIC registers.\n");
- goto out_free_kmalloc;
- }
-
- /* mask all PCI interrupts */
- out_be32(&priv->regs->mask, ~0);
- mb();
-
- host = irq_domain_add_linear(np, NUM_IRQS, &pci_pic_host_ops, priv);
- if (!host) {
- ret = -ENOMEM;
- goto out_unmap_regs;
- }
-
- priv->host = host;
- irq_set_handler_data(irq, priv);
- irq_set_chained_handler(irq, pq2ads_pci_irq_demux);
-
- of_node_put(np);
- return 0;
-
-out_unmap_regs:
- iounmap(priv->regs);
-out_free_kmalloc:
- kfree(priv);
- of_node_put(np);
-out_unmap_irq:
- irq_dispose_mapping(irq);
-out:
- return ret;
-}
diff --git a/arch/powerpc/platforms/82xx/pq2ads.h b/arch/powerpc/platforms/82xx/pq2ads.h
deleted file mode 100644
index 6cf0f97486e2..000000000000
--- a/arch/powerpc/platforms/82xx/pq2ads.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * PQ2/mpc8260 board-specific stuff
- *
- * A collection of structures, addresses, and values associated with
- * the Freescale MPC8260ADS/MPC8266ADS-PCI boards.
- * Copied from the RPX-Classic and SBS8260 stuff.
- *
- * Author: Vitaly Bordug <vbordug@ru.mvista.com>
- *
- * Originally written by Dan Malek for Motorola MPC8260 family
- *
- * Copyright (c) 2001 Dan Malek <dan@embeddedalley.com>
- * Copyright (c) 2006 MontaVista Software, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-#ifdef __KERNEL__
-#ifndef __MACH_ADS8260_DEFS
-#define __MACH_ADS8260_DEFS
-
-#include <linux/seq_file.h>
-
-/* The ADS8260 has 16, 32-bit wide control/status registers, accessed
- * only on word boundaries.
- * Not all are used (yet), or are interesting to us (yet).
- */
-
-/* Things of interest in the CSR.
- */
-#define BCSR0_LED0 ((uint)0x02000000) /* 0 == on */
-#define BCSR0_LED1 ((uint)0x01000000) /* 0 == on */
-#define BCSR1_FETHIEN ((uint)0x08000000) /* 0 == enable*/
-#define BCSR1_FETH_RST ((uint)0x04000000) /* 0 == reset */
-#define BCSR1_RS232_EN1 ((uint)0x02000000) /* 0 ==enable */
-#define BCSR1_RS232_EN2 ((uint)0x01000000) /* 0 ==enable */
-#define BCSR3_FETHIEN2 ((uint)0x10000000) /* 0 == enable*/
-#define BCSR3_FETH2_RST ((uint)0x80000000) /* 0 == reset */
-
-#endif /* __MACH_ADS8260_DEFS */
-#endif /* __KERNEL__ */
diff --git a/arch/powerpc/platforms/82xx/pq2fads.c b/arch/powerpc/platforms/82xx/pq2fads.c
deleted file mode 100644
index e5f82ec8df17..000000000000
--- a/arch/powerpc/platforms/82xx/pq2fads.c
+++ /dev/null
@@ -1,196 +0,0 @@
-/*
- * PQ2FADS board support
- *
- * Copyright 2007 Freescale Semiconductor, Inc.
- * Author: Scott Wood <scottwood@freescale.com>
- *
- * Loosely based on mp82xx ADS support by Vitaly Bordug <vbordug@ru.mvista.com>
- * Copyright (c) 2006 MontaVista Software, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- */
-
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/fsl_devices.h>
-#include <linux/of_address.h>
-#include <linux/of_fdt.h>
-#include <linux/of_platform.h>
-
-#include <asm/io.h>
-#include <asm/cpm2.h>
-#include <asm/udbg.h>
-#include <asm/machdep.h>
-#include <asm/time.h>
-
-#include <sysdev/fsl_soc.h>
-#include <sysdev/cpm2_pic.h>
-
-#include "pq2ads.h"
-#include "pq2.h"
-
-static void __init pq2fads_pic_init(void)
-{
- struct device_node *np = of_find_compatible_node(NULL, NULL, "fsl,cpm2-pic");
- if (!np) {
- printk(KERN_ERR "PIC init: can not find fsl,cpm2-pic node\n");
- return;
- }
-
- cpm2_pic_init(np);
- of_node_put(np);
-
- /* Initialize stuff for the 82xx CPLD IC and install demux */
- pq2ads_pci_init_irq();
-}
-
-struct cpm_pin {
- int port, pin, flags;
-};
-
-static struct cpm_pin pq2fads_pins[] = {
- /* SCC1 */
- {3, 30, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
- {3, 31, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
-
- /* SCC2 */
- {3, 27, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
- {3, 28, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
-
- /* FCC2 */
- {1, 18, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {1, 19, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {1, 20, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {1, 21, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {1, 22, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
- {1, 23, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
- {1, 24, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
- {1, 25, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
- {1, 26, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {1, 27, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {1, 28, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {1, 29, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
- {1, 30, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {1, 31, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
- {2, 18, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {2, 19, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
-
- /* FCC3 */
- {1, 4, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
- {1, 5, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
- {1, 6, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
- {1, 7, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
- {1, 8, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {1, 9, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {1, 10, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {1, 11, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {1, 12, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {1, 13, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {1, 14, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
- {1, 15, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
- {1, 16, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {1, 17, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {2, 16, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {2, 17, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
-};
-
-static void __init init_ioports(void)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(pq2fads_pins); i++) {
- struct cpm_pin *pin = &pq2fads_pins[i];
- cpm2_set_pin(pin->port, pin->pin, pin->flags);
- }
-
- cpm2_clk_setup(CPM_CLK_SCC1, CPM_BRG1, CPM_CLK_RX);
- cpm2_clk_setup(CPM_CLK_SCC1, CPM_BRG1, CPM_CLK_TX);
- cpm2_clk_setup(CPM_CLK_SCC2, CPM_BRG2, CPM_CLK_RX);
- cpm2_clk_setup(CPM_CLK_SCC2, CPM_BRG2, CPM_CLK_TX);
- cpm2_clk_setup(CPM_CLK_FCC2, CPM_CLK13, CPM_CLK_RX);
- cpm2_clk_setup(CPM_CLK_FCC2, CPM_CLK14, CPM_CLK_TX);
- cpm2_clk_setup(CPM_CLK_FCC3, CPM_CLK15, CPM_CLK_RX);
- cpm2_clk_setup(CPM_CLK_FCC3, CPM_CLK16, CPM_CLK_TX);
-}
-
-static void __init pq2fads_setup_arch(void)
-{
- struct device_node *np;
- __be32 __iomem *bcsr;
-
- if (ppc_md.progress)
- ppc_md.progress("pq2fads_setup_arch()", 0);
-
- cpm2_reset();
-
- np = of_find_compatible_node(NULL, NULL, "fsl,pq2fads-bcsr");
- if (!np) {
- printk(KERN_ERR "No fsl,pq2fads-bcsr in device tree\n");
- return;
- }
-
- bcsr = of_iomap(np, 0);
- of_node_put(np);
- if (!bcsr) {
- printk(KERN_ERR "Cannot map BCSR registers\n");
- return;
- }
-
- /* Enable the serial and ethernet ports */
-
- clrbits32(&bcsr[1], BCSR1_RS232_EN1 | BCSR1_RS232_EN2 | BCSR1_FETHIEN);
- setbits32(&bcsr[1], BCSR1_FETH_RST);
-
- clrbits32(&bcsr[3], BCSR3_FETHIEN2);
- setbits32(&bcsr[3], BCSR3_FETH2_RST);
-
- iounmap(bcsr);
-
- init_ioports();
-
- /* Enable external IRQs */
- clrbits32(&cpm2_immr->im_siu_conf.siu_82xx.sc_siumcr, 0x0c000000);
-
- pq2_init_pci();
-
- if (ppc_md.progress)
- ppc_md.progress("pq2fads_setup_arch(), finish", 0);
-}
-
-/*
- * Called very early, device-tree isn't unflattened
- */
-static int __init pq2fads_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
- return of_flat_dt_is_compatible(root, "fsl,pq2fads");
-}
-
-static struct of_device_id __initdata of_bus_ids[] = {
- { .name = "soc", },
- { .name = "cpm", },
- { .name = "localbus", },
- {},
-};
-
-static int __init declare_of_platform_devices(void)
-{
- /* Publish the QE devices */
- of_platform_bus_probe(NULL, of_bus_ids, NULL);
- return 0;
-}
-machine_device_initcall(pq2fads, declare_of_platform_devices);
-
-define_machine(pq2fads)
-{
- .name = "Freescale PQ2FADS",
- .probe = pq2fads_probe,
- .setup_arch = pq2fads_setup_arch,
- .init_IRQ = pq2fads_pic_init,
- .get_irq = cpm2_get_irq,
- .calibrate_decr = generic_calibrate_decr,
- .restart = pq2_restart,
- .progress = udbg_progress,
-};
diff --git a/arch/powerpc/platforms/83xx/Kconfig b/arch/powerpc/platforms/83xx/Kconfig
index 2bdc8c862c46..d355ad40995f 100644
--- a/arch/powerpc/platforms/83xx/Kconfig
+++ b/arch/powerpc/platforms/83xx/Kconfig
@@ -1,8 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
menuconfig PPC_83xx
bool "83xx-based boards"
- depends on 6xx
+ depends on PPC_BOOK3S_32
select PPC_UDBG_16550
- select PPC_PCI_CHOICE
+ select HAVE_PCI
select FSL_PCI if PCI
select FSL_SOC
select IPIC
@@ -24,13 +25,6 @@ config MPC831x_RDB
help
This option enables support for the MPC8313 RDB and MPC8315 RDB boards.
-config MPC832x_MDS
- bool "Freescale MPC832x MDS"
- select DEFAULT_UIMAGE
- select PPC_MPC832x
- help
- This option enables support for the MPC832x MDS evaluation board.
-
config MPC832x_RDB
bool "Freescale MPC832x RDB"
select DEFAULT_UIMAGE
@@ -38,18 +32,6 @@ config MPC832x_RDB
help
This option enables support for the MPC8323 RDB board.
-config MPC834x_MDS
- bool "Freescale MPC834x MDS"
- select DEFAULT_UIMAGE
- select PPC_MPC834x
- help
- This option enables support for the MPC 834x MDS evaluation board.
-
- Be aware that PCI buses can only function when MDS board is plugged
- into the PIB (Platform IO Board) board from Freescale which provide
- 3 PCI slots. The PIBs PCI initialization is the bootloader's
- responsibility.
-
config MPC834x_ITX
bool "Freescale MPC834x ITX"
select DEFAULT_UIMAGE
@@ -60,12 +42,6 @@ config MPC834x_ITX
Be aware that PCI initialization is the bootloader's
responsibility.
-config MPC836x_MDS
- bool "Freescale MPC836x MDS"
- select DEFAULT_UIMAGE
- help
- This option enables support for the MPC836x MDS Processor Board.
-
config MPC836x_RDK
bool "Freescale/Logic MPC836x RDK"
select DEFAULT_UIMAGE
@@ -75,13 +51,6 @@ config MPC836x_RDK
This option enables support for the MPC836x RDK Processor Board,
also known as ZOOM PowerQUICC Kit.
-config MPC837x_MDS
- bool "Freescale MPC837x MDS"
- select DEFAULT_UIMAGE
- select PPC_MPC837x
- help
- This option enables support for the MPC837x MDS Processor Board.
-
config MPC837x_RDB
bool "Freescale MPC837x RDB/WLAN"
select DEFAULT_UIMAGE
@@ -89,13 +58,6 @@ config MPC837x_RDB
help
This option enables support for the MPC837x RDB and WLAN Boards.
-config SBC834x
- bool "Wind River SBC834x"
- select DEFAULT_UIMAGE
- select PPC_MPC834x
- help
- This option enables support for the Wind River SBC834x board.
-
config ASP834x
bool "Analogue & Micro ASP 834x"
select PPC_MPC834x
@@ -116,7 +78,6 @@ endif
# used for usb & gpio
config PPC_MPC831x
bool
- select ARCH_WANT_OPTIONAL_GPIOLIB
# used for math-emu
config PPC_MPC832x
@@ -125,9 +86,7 @@ config PPC_MPC832x
# used for usb & gpio
config PPC_MPC834x
bool
- select ARCH_WANT_OPTIONAL_GPIOLIB
# used for usb & gpio
config PPC_MPC837x
bool
- select ARCH_WANT_OPTIONAL_GPIOLIB
diff --git a/arch/powerpc/platforms/83xx/Makefile b/arch/powerpc/platforms/83xx/Makefile
index ed95bfcbcbff..6fc3dba943da 100644
--- a/arch/powerpc/platforms/83xx/Makefile
+++ b/arch/powerpc/platforms/83xx/Makefile
@@ -1,19 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the PowerPC 83xx linux kernel.
#
-obj-y := misc.o usb.o
+obj-y := misc.o
obj-$(CONFIG_SUSPEND) += suspend.o suspend-asm.o
obj-$(CONFIG_MCU_MPC8349EMITX) += mcu_mpc8349emitx.o
obj-$(CONFIG_MPC830x_RDB) += mpc830x_rdb.o
obj-$(CONFIG_MPC831x_RDB) += mpc831x_rdb.o
obj-$(CONFIG_MPC832x_RDB) += mpc832x_rdb.o
-obj-$(CONFIG_MPC834x_MDS) += mpc834x_mds.o
obj-$(CONFIG_MPC834x_ITX) += mpc834x_itx.o
-obj-$(CONFIG_MPC836x_MDS) += mpc836x_mds.o
obj-$(CONFIG_MPC836x_RDK) += mpc836x_rdk.o
-obj-$(CONFIG_MPC832x_MDS) += mpc832x_mds.o
-obj-$(CONFIG_MPC837x_MDS) += mpc837x_mds.o
-obj-$(CONFIG_SBC834x) += sbc834x.o
obj-$(CONFIG_MPC837x_RDB) += mpc837x_rdb.o
obj-$(CONFIG_ASP834x) += asp834x.o
obj-$(CONFIG_KMETER1) += km83xx.o
+obj-$(CONFIG_PPC_MPC831x) += usb_831x.o
+obj-$(CONFIG_PPC_MPC834x) += usb_834x.o
+obj-$(CONFIG_PPC_MPC837x) += usb_837x.o
diff --git a/arch/powerpc/platforms/83xx/asp834x.c b/arch/powerpc/platforms/83xx/asp834x.c
index 464ea8e0292d..6870d0c34f1d 100644
--- a/arch/powerpc/platforms/83xx/asp834x.c
+++ b/arch/powerpc/platforms/83xx/asp834x.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* arch/powerpc/platforms/83xx/asp834x.c
*
@@ -7,11 +8,6 @@
* Copyright 2008 Codehermit
*
* Maintainer: Bryan O'Donoghue <bodonoghue@codhermit.ie>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#include <linux/pci.h>
@@ -30,31 +26,20 @@
*/
static void __init asp834x_setup_arch(void)
{
- if (ppc_md.progress)
- ppc_md.progress("asp834x_setup_arch()", 0);
-
+ mpc83xx_setup_arch();
mpc834x_usb_cfg();
}
machine_device_initcall(asp834x, mpc83xx_declare_of_platform_devices);
-/*
- * Called very early, MMU is off, device-tree isn't unflattened
- */
-static int __init asp834x_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
- return of_flat_dt_is_compatible(root, "analogue-and-micro,asp8347e");
-}
-
define_machine(asp834x) {
.name = "ASP8347E",
- .probe = asp834x_probe,
+ .compatible = "analogue-and-micro,asp8347e",
.setup_arch = asp834x_setup_arch,
+ .discover_phbs = mpc83xx_setup_pci,
.init_IRQ = mpc83xx_ipic_init_IRQ,
.get_irq = ipic_get_irq,
.restart = mpc83xx_restart,
.time_init = mpc83xx_time_init,
- .calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
};
diff --git a/arch/powerpc/platforms/83xx/km83xx.c b/arch/powerpc/platforms/83xx/km83xx.c
index bf4c4473abb9..2b5d187d9b62 100644
--- a/arch/powerpc/platforms/83xx/km83xx.c
+++ b/arch/powerpc/platforms/83xx/km83xx.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright 2008-2011 DENX Software Engineering GmbH
* Author: Heiko Schocher <hs@denx.de>
*
* Description:
* Keymile 83xx platform specific routines.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#include <linux/stddef.h>
@@ -24,8 +20,8 @@
#include <linux/seq_file.h>
#include <linux/root_dev.h>
#include <linux/initrd.h>
-#include <linux/of_platform.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
#include <linux/atomic.h>
#include <linux/time.h>
@@ -33,18 +29,16 @@
#include <asm/machdep.h>
#include <asm/ipic.h>
#include <asm/irq.h>
-#include <asm/prom.h>
#include <asm/udbg.h>
#include <sysdev/fsl_soc.h>
#include <sysdev/fsl_pci.h>
-#include <asm/qe.h>
-#include <asm/qe_ic.h>
+#include <soc/fsl/qe/qe.h>
#include "mpc83xx.h"
#define SVR_REV(svr) (((svr) >> 0) & 0xFFFF) /* Revision field */
-static void quirk_mpc8360e_qe_enet10(void)
+static void __init quirk_mpc8360e_qe_enet10(void)
{
/*
* handle mpc8360E Erratum QE_ENET10:
@@ -58,17 +52,19 @@ static void quirk_mpc8360e_qe_enet10(void)
np_par = of_find_node_by_name(NULL, "par_io");
if (np_par == NULL) {
- pr_warn("%s couldn;t find par_io node\n", __func__);
+ pr_warn("%s couldn't find par_io node\n", __func__);
return;
}
/* Map Parallel I/O ports registers */
ret = of_address_to_resource(np_par, 0, &res);
if (ret) {
- pr_warn("%s couldn;t map par_io registers\n", __func__);
- return;
+ pr_warn("%s couldn't map par_io registers\n", __func__);
+ goto out;
}
- base = ioremap(res.start, res.end - res.start + 1);
+ base = ioremap(res.start, resource_size(&res));
+ if (!base)
+ goto out;
/*
* set output delay adjustments to default values according
@@ -116,6 +112,7 @@ static void quirk_mpc8360e_qe_enet10(void)
setbits32((base + 0xac), 0x0000c000);
}
iounmap(base);
+out:
of_node_put(np_par);
}
@@ -130,14 +127,9 @@ static void __init mpc83xx_km_setup_arch(void)
struct device_node *np;
#endif
- if (ppc_md.progress)
- ppc_md.progress("kmpbec83xx_setup_arch()", 0);
-
- mpc83xx_setup_pci();
+ mpc83xx_setup_arch();
#ifdef CONFIG_QUICC_ENGINE
- qe_reset();
-
np = of_find_node_by_name(NULL, "par_io");
if (np != NULL) {
par_io_init(np);
@@ -173,11 +165,10 @@ static char *board[] __initdata = {
*/
static int __init mpc83xx_km_probe(void)
{
- unsigned long node = of_get_flat_dt_root();
int i = 0;
while (board[i]) {
- if (of_flat_dt_is_compatible(node, board[i]))
+ if (of_machine_is_compatible(board[i]))
break;
i++;
}
@@ -188,10 +179,10 @@ define_machine(mpc83xx_km) {
.name = "mpc83xx-km-platform",
.probe = mpc83xx_km_probe,
.setup_arch = mpc83xx_km_setup_arch,
- .init_IRQ = mpc83xx_ipic_and_qe_init_IRQ,
+ .discover_phbs = mpc83xx_setup_pci,
+ .init_IRQ = mpc83xx_ipic_init_IRQ,
.get_irq = ipic_get_irq,
.restart = mpc83xx_restart,
.time_init = mpc83xx_time_init,
- .calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
};
diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
index e238b6a55b15..cb7b9498f291 100644
--- a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
+++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
@@ -1,28 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Power Management and GPIO expander driver for MPC8349E-mITX-compatible MCU
*
* Copyright (c) 2008 MontaVista Software, Inc.
*
* Author: Anton Vorontsov <avorontsov@ru.mvista.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
*/
#include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
#include <linux/module.h>
#include <linux/device.h>
#include <linux/mutex.h>
#include <linux/i2c.h>
-#include <linux/gpio.h>
-#include <linux/of.h>
-#include <linux/of_gpio.h>
+#include <linux/gpio/driver.h>
#include <linux/slab.h>
#include <linux/kthread.h>
+#include <linux/property.h>
#include <linux/reboot.h>
-#include <asm/prom.h>
#include <asm/machdep.h>
/*
@@ -84,7 +79,7 @@ static ssize_t show_status(struct device *d,
return sprintf(buf, "%02x\n", ret);
}
-static DEVICE_ATTR(status, S_IRUGO, show_status, NULL);
+static DEVICE_ATTR(status, 0444, show_status, NULL);
static void mcu_power_off(void)
{
@@ -97,10 +92,11 @@ static void mcu_power_off(void)
mutex_unlock(&mcu->lock);
}
-static void mcu_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
+static int mcu_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
{
- struct mcu *mcu = container_of(gc, struct mcu, gc);
+ struct mcu *mcu = gpiochip_get_data(gc);
u8 bit = 1 << (4 + gpio);
+ int ret;
mutex_lock(&mcu->lock);
if (val)
@@ -108,43 +104,42 @@ static void mcu_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
else
mcu->reg_ctrl |= bit;
- i2c_smbus_write_byte_data(mcu->client, MCU_REG_CTRL, mcu->reg_ctrl);
+ ret = i2c_smbus_write_byte_data(mcu->client, MCU_REG_CTRL,
+ mcu->reg_ctrl);
mutex_unlock(&mcu->lock);
+
+ return ret;
}
static int mcu_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
{
- mcu_gpio_set(gc, gpio, val);
- return 0;
+ return mcu_gpio_set(gc, gpio, val);
}
static int mcu_gpiochip_add(struct mcu *mcu)
{
- struct device_node *np;
+ struct device *dev = &mcu->client->dev;
struct gpio_chip *gc = &mcu->gc;
- np = of_find_compatible_node(NULL, NULL, "fsl,mcu-mpc8349emitx");
- if (!np)
- return -ENODEV;
-
gc->owner = THIS_MODULE;
- gc->label = np->full_name;
+ gc->label = kasprintf(GFP_KERNEL, "%pfw", dev_fwnode(dev));
gc->can_sleep = 1;
gc->ngpio = MCU_NUM_GPIO;
gc->base = -1;
gc->set = mcu_gpio_set;
gc->direction_output = mcu_gpio_dir_out;
- gc->of_node = np;
+ gc->parent = dev;
- return gpiochip_add(gc);
+ return gpiochip_add_data(gc, mcu);
}
-static int mcu_gpiochip_remove(struct mcu *mcu)
+static void mcu_gpiochip_remove(struct mcu *mcu)
{
- return gpiochip_remove(&mcu->gc);
+ kfree(mcu->gc.label);
+ gpiochip_remove(&mcu->gc);
}
-static int mcu_probe(struct i2c_client *client, const struct i2c_device_id *id)
+static int mcu_probe(struct i2c_client *client)
{
struct mcu *mcu;
int ret;
@@ -166,10 +161,10 @@ static int mcu_probe(struct i2c_client *client, const struct i2c_device_id *id)
if (ret)
goto err;
- /* XXX: this is potentially racy, but there is no lock for ppc_md */
- if (!ppc_md.power_off) {
+ /* XXX: this is potentially racy, but there is no lock for pm_power_off */
+ if (!pm_power_off) {
glob_mcu = mcu;
- ppc_md.power_off = mcu_power_off;
+ pm_power_off = mcu_power_off;
dev_info(&client->dev, "will provide power-off service\n");
}
@@ -186,25 +181,21 @@ err:
return ret;
}
-static int mcu_remove(struct i2c_client *client)
+static void mcu_remove(struct i2c_client *client)
{
struct mcu *mcu = i2c_get_clientdata(client);
- int ret;
kthread_stop(shutdown_thread);
device_remove_file(&client->dev, &dev_attr_status);
if (glob_mcu == mcu) {
- ppc_md.power_off = NULL;
+ pm_power_off = NULL;
glob_mcu = NULL;
}
- ret = mcu_gpiochip_remove(mcu);
- if (ret)
- return ret;
+ mcu_gpiochip_remove(mcu);
kfree(mcu);
- return 0;
}
static const struct i2c_device_id mcu_ids[] = {
@@ -213,7 +204,7 @@ static const struct i2c_device_id mcu_ids[] = {
};
MODULE_DEVICE_TABLE(i2c, mcu_ids);
-static struct of_device_id mcu_of_match_table[] = {
+static const struct of_device_id mcu_of_match_table[] = {
{ .compatible = "fsl,mcu-mpc8349emitx", },
{ },
};
@@ -221,7 +212,6 @@ static struct of_device_id mcu_of_match_table[] = {
static struct i2c_driver mcu_driver = {
.driver = {
.name = "mcu-mpc8349emitx",
- .owner = THIS_MODULE,
.of_match_table = mcu_of_match_table,
},
.probe = mcu_probe,
diff --git a/arch/powerpc/platforms/83xx/misc.c b/arch/powerpc/platforms/83xx/misc.c
index 125336f750c6..1135c1ab923c 100644
--- a/arch/powerpc/platforms/83xx/misc.c
+++ b/arch/powerpc/platforms/83xx/misc.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* misc setup functions for MPC83xx
*
* Maintainer: Kumar Gala <galak@kernel.crashing.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#include <linux/stddef.h>
@@ -14,13 +10,17 @@
#include <linux/of_platform.h>
#include <linux/pci.h>
+#include <asm/debug.h>
#include <asm/io.h>
#include <asm/hw_irq.h>
#include <asm/ipic.h>
-#include <asm/qe_ic.h>
+#include <asm/fixmap.h>
+
#include <sysdev/fsl_soc.h>
#include <sysdev/fsl_pci.h>
+#include <mm/mmu_decl.h>
+
#include "mpc83xx.h"
static __be32 __iomem *restart_reg_base;
@@ -35,7 +35,7 @@ static int __init mpc83xx_restart_init(void)
arch_initcall(mpc83xx_restart_init);
-void mpc83xx_restart(char *cmd)
+void __noreturn mpc83xx_restart(char *cmd)
{
#define RST_OFFSET 0x00000900
#define RST_PROT_REG 0x00000018
@@ -92,29 +92,7 @@ void __init mpc83xx_ipic_init_IRQ(void)
ipic_set_default_priority();
}
-#ifdef CONFIG_QUICC_ENGINE
-void __init mpc83xx_qe_init_IRQ(void)
-{
- struct device_node *np;
-
- np = of_find_compatible_node(NULL, NULL, "fsl,qe-ic");
- if (!np) {
- np = of_find_node_by_type(NULL, "qeic");
- if (!np)
- return;
- }
- qe_ic_init(np, 0, qe_ic_cascade_low_ipic, qe_ic_cascade_high_ipic);
- of_node_put(np);
-}
-
-void __init mpc83xx_ipic_and_qe_init_IRQ(void)
-{
- mpc83xx_ipic_init_IRQ();
- mpc83xx_qe_init_IRQ();
-}
-#endif /* CONFIG_QUICC_ENGINE */
-
-static struct of_device_id __initdata of_bus_ids[] = {
+static const struct of_device_id of_bus_ids[] __initconst = {
{ .type = "soc", },
{ .compatible = "soc", },
{ .compatible = "simple-bus" },
@@ -142,3 +120,32 @@ void __init mpc83xx_setup_pci(void)
mpc83xx_add_bridge(np);
}
#endif
+
+void __init mpc83xx_setup_arch(void)
+{
+ phys_addr_t immrbase = get_immrbase();
+ int immrsize = IS_ALIGNED(immrbase, SZ_2M) ? SZ_2M : SZ_1M;
+ unsigned long va = fix_to_virt(FIX_IMMR_BASE);
+
+ if (ppc_md.progress)
+ ppc_md.progress("mpc83xx_setup_arch()", 0);
+
+ setbat(-1, va, immrbase, immrsize, PAGE_KERNEL_NCG);
+ update_bats();
+}
+
+int machine_check_83xx(struct pt_regs *regs)
+{
+ u32 mask = 1 << (31 - IPIC_MCP_WDT);
+
+ if (!(regs->msr & SRR1_MCE_MCP) || !(ipic_get_mcp_status() & mask))
+ return machine_check_generic(regs);
+ ipic_clear_mcp_status(mask);
+
+ if (debugger_fault_handler(regs))
+ return 1;
+
+ die("Watchdog NMI Reset", regs, 0);
+
+ return 1;
+}
diff --git a/arch/powerpc/platforms/83xx/mpc830x_rdb.c b/arch/powerpc/platforms/83xx/mpc830x_rdb.c
index 4f2d9fea77b7..63b6d213726a 100644
--- a/arch/powerpc/platforms/83xx/mpc830x_rdb.c
+++ b/arch/powerpc/platforms/83xx/mpc830x_rdb.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* arch/powerpc/platforms/83xx/mpc830x_rdb.c
*
@@ -6,11 +7,6 @@
*
* Copyright (C) Freescale Semiconductor, Inc. 2009. All rights reserved.
* Copyright (C) 2010. Ilya Yanok, Emcraft Systems, yanok@emcraft.com
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#include <linux/pci.h>
@@ -27,10 +23,7 @@
*/
static void __init mpc830x_rdb_setup_arch(void)
{
- if (ppc_md.progress)
- ppc_md.progress("mpc830x_rdb_setup_arch()", 0);
-
- mpc83xx_setup_pci();
+ mpc83xx_setup_arch();
mpc831x_usb_cfg();
}
@@ -41,24 +34,16 @@ static const char *board[] __initdata = {
NULL
};
-/*
- * Called very early, MMU is off, device-tree isn't unflattened
- */
-static int __init mpc830x_rdb_probe(void)
-{
- return of_flat_dt_match(of_get_flat_dt_root(), board);
-}
-
machine_device_initcall(mpc830x_rdb, mpc83xx_declare_of_platform_devices);
define_machine(mpc830x_rdb) {
.name = "MPC830x RDB",
- .probe = mpc830x_rdb_probe,
+ .compatibles = board,
.setup_arch = mpc830x_rdb_setup_arch,
+ .discover_phbs = mpc83xx_setup_pci,
.init_IRQ = mpc83xx_ipic_init_IRQ,
.get_irq = ipic_get_irq,
.restart = mpc83xx_restart,
.time_init = mpc83xx_time_init,
- .calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
};
diff --git a/arch/powerpc/platforms/83xx/mpc831x_rdb.c b/arch/powerpc/platforms/83xx/mpc831x_rdb.c
index fa25977c52de..5c39966762e4 100644
--- a/arch/powerpc/platforms/83xx/mpc831x_rdb.c
+++ b/arch/powerpc/platforms/83xx/mpc831x_rdb.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* arch/powerpc/platforms/83xx/mpc831x_rdb.c
*
@@ -6,11 +7,6 @@
* Author: Lo Wlison <r43300@freescale.com>
*
* Copyright (C) Freescale Semiconductor, Inc. 2006. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#include <linux/pci.h>
@@ -28,10 +24,7 @@
*/
static void __init mpc831x_rdb_setup_arch(void)
{
- if (ppc_md.progress)
- ppc_md.progress("mpc831x_rdb_setup_arch()", 0);
-
- mpc83xx_setup_pci();
+ mpc83xx_setup_arch();
mpc831x_usb_cfg();
}
@@ -41,24 +34,16 @@ static const char *board[] __initdata = {
NULL
};
-/*
- * Called very early, MMU is off, device-tree isn't unflattened
- */
-static int __init mpc831x_rdb_probe(void)
-{
- return of_flat_dt_match(of_get_flat_dt_root(), board);
-}
-
machine_device_initcall(mpc831x_rdb, mpc83xx_declare_of_platform_devices);
define_machine(mpc831x_rdb) {
.name = "MPC831x RDB",
- .probe = mpc831x_rdb_probe,
+ .compatibles = board,
.setup_arch = mpc831x_rdb_setup_arch,
+ .discover_phbs = mpc83xx_setup_pci,
.init_IRQ = mpc83xx_ipic_init_IRQ,
.get_irq = ipic_get_irq,
.restart = mpc83xx_restart,
.time_init = mpc83xx_time_init,
- .calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
};
diff --git a/arch/powerpc/platforms/83xx/mpc832x_mds.c b/arch/powerpc/platforms/83xx/mpc832x_mds.c
deleted file mode 100644
index 8d762203eeff..000000000000
--- a/arch/powerpc/platforms/83xx/mpc832x_mds.c
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright 2006 Freescale Semiconductor, Inc. All rights reserved.
- *
- * Description:
- * MPC832xE MDS board specific routines.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-#include <linux/stddef.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/errno.h>
-#include <linux/reboot.h>
-#include <linux/pci.h>
-#include <linux/kdev_t.h>
-#include <linux/major.h>
-#include <linux/console.h>
-#include <linux/delay.h>
-#include <linux/seq_file.h>
-#include <linux/root_dev.h>
-#include <linux/initrd.h>
-#include <linux/of_platform.h>
-#include <linux/of_device.h>
-
-#include <linux/atomic.h>
-#include <asm/time.h>
-#include <asm/io.h>
-#include <asm/machdep.h>
-#include <asm/ipic.h>
-#include <asm/irq.h>
-#include <asm/prom.h>
-#include <asm/udbg.h>
-#include <sysdev/fsl_soc.h>
-#include <sysdev/fsl_pci.h>
-#include <asm/qe.h>
-#include <asm/qe_ic.h>
-
-#include "mpc83xx.h"
-
-#undef DEBUG
-#ifdef DEBUG
-#define DBG(fmt...) udbg_printf(fmt)
-#else
-#define DBG(fmt...)
-#endif
-
-/* ************************************************************************
- *
- * Setup the architecture
- *
- */
-static void __init mpc832x_sys_setup_arch(void)
-{
- struct device_node *np;
- u8 __iomem *bcsr_regs = NULL;
-
- if (ppc_md.progress)
- ppc_md.progress("mpc832x_sys_setup_arch()", 0);
-
- /* Map BCSR area */
- np = of_find_node_by_name(NULL, "bcsr");
- if (np) {
- struct resource res;
-
- of_address_to_resource(np, 0, &res);
- bcsr_regs = ioremap(res.start, resource_size(&res));
- of_node_put(np);
- }
-
- mpc83xx_setup_pci();
-
-#ifdef CONFIG_QUICC_ENGINE
- qe_reset();
-
- if ((np = of_find_node_by_name(NULL, "par_io")) != NULL) {
- par_io_init(np);
- of_node_put(np);
-
- for (np = NULL; (np = of_find_node_by_name(np, "ucc")) != NULL;)
- par_io_of_config(np);
- }
-
- if ((np = of_find_compatible_node(NULL, "network", "ucc_geth"))
- != NULL){
- /* Reset the Ethernet PHYs */
-#define BCSR8_FETH_RST 0x50
- clrbits8(&bcsr_regs[8], BCSR8_FETH_RST);
- udelay(1000);
- setbits8(&bcsr_regs[8], BCSR8_FETH_RST);
- iounmap(bcsr_regs);
- of_node_put(np);
- }
-#endif /* CONFIG_QUICC_ENGINE */
-}
-
-machine_device_initcall(mpc832x_mds, mpc83xx_declare_of_platform_devices);
-
-/*
- * Called very early, MMU is off, device-tree isn't unflattened
- */
-static int __init mpc832x_sys_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- return of_flat_dt_is_compatible(root, "MPC832xMDS");
-}
-
-define_machine(mpc832x_mds) {
- .name = "MPC832x MDS",
- .probe = mpc832x_sys_probe,
- .setup_arch = mpc832x_sys_setup_arch,
- .init_IRQ = mpc83xx_ipic_and_qe_init_IRQ,
- .get_irq = ipic_get_irq,
- .restart = mpc83xx_restart,
- .time_init = mpc83xx_time_init,
- .calibrate_decr = generic_calibrate_decr,
- .progress = udbg_progress,
-};
diff --git a/arch/powerpc/platforms/83xx/mpc832x_rdb.c b/arch/powerpc/platforms/83xx/mpc832x_rdb.c
index eff5baabc3fb..d523ce0f48db 100644
--- a/arch/powerpc/platforms/83xx/mpc832x_rdb.c
+++ b/arch/powerpc/platforms/83xx/mpc832x_rdb.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* arch/powerpc/platforms/83xx/mpc832x_rdb.c
*
@@ -7,11 +8,6 @@
* MPC832x RDB board specific routines.
* This file is based on mpc832x_mds.c and mpc8313_rdb.c
* Author: Michael Barkowski <michael.barkowski@freescale.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#include <linux/pci.h>
@@ -19,14 +15,16 @@
#include <linux/spi/spi.h>
#include <linux/spi/mmc_spi.h>
#include <linux/mmc/host.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
#include <linux/fsl_devices.h>
#include <asm/time.h>
#include <asm/ipic.h>
#include <asm/udbg.h>
-#include <asm/qe.h>
-#include <asm/qe_ic.h>
+#include <soc/fsl/qe/qe.h>
#include <sysdev/fsl_soc.h>
#include <sysdev/fsl_pci.h>
@@ -89,7 +87,7 @@ static int __init of_fsl_spi_probe(char *type, char *compatible, u32 sysclk,
goto err;
ret = of_irq_to_resource(np, 0, &res[1]);
- if (ret == NO_IRQ)
+ if (ret <= 0)
goto err;
pdev = platform_device_alloc("mpc83xx_spi", i);
@@ -111,9 +109,9 @@ static int __init of_fsl_spi_probe(char *type, char *compatible, u32 sysclk,
goto next;
unreg:
- platform_device_del(pdev);
+ platform_device_put(pdev);
err:
- pr_err("%s: registration failed\n", np->full_name);
+ pr_err("%pOF: registration failed\n", np);
next:
i++;
}
@@ -148,7 +146,7 @@ static int __init fsl_spi_init(struct spi_board_info *board_infos,
static void mpc83xx_spi_cs_control(struct spi_device *spi, bool on)
{
- pr_debug("%s %d %d\n", __func__, spi->chip_select, on);
+ pr_debug("%s %d %d\n", __func__, spi_get_chipselect(spi, 0), on);
par_io_data_set(3, 13, on);
}
@@ -166,6 +164,8 @@ static struct spi_board_info mpc832x_spi_boardinfo = {
static int __init mpc832x_spi_init(void)
{
+ struct device_node *np;
+
par_io_config_pin(3, 0, 3, 0, 1, 0); /* SPI1 MOSI, I/O */
par_io_config_pin(3, 1, 3, 0, 1, 0); /* SPI1 MISO, I/O */
par_io_config_pin(3, 2, 3, 0, 1, 0); /* SPI1 CLK, I/O */
@@ -179,7 +179,9 @@ static int __init mpc832x_spi_init(void)
* Don't bother with legacy stuff when device tree contains
* mmc-spi-slot node.
*/
- if (of_find_compatible_node(NULL, NULL, "mmc-spi-slot"))
+ np = of_find_compatible_node(NULL, NULL, "mmc-spi-slot");
+ of_node_put(np);
+ if (np)
return 0;
return fsl_spi_init(&mpc832x_spi_boardinfo, 1, mpc83xx_spi_cs_control);
}
@@ -197,19 +199,14 @@ static void __init mpc832x_rdb_setup_arch(void)
struct device_node *np;
#endif
- if (ppc_md.progress)
- ppc_md.progress("mpc832x_rdb_setup_arch()", 0);
-
- mpc83xx_setup_pci();
+ mpc83xx_setup_arch();
#ifdef CONFIG_QUICC_ENGINE
- qe_reset();
-
if ((np = of_find_node_by_name(NULL, "par_io")) != NULL) {
par_io_init(np);
of_node_put(np);
- for (np = NULL; (np = of_find_node_by_name(np, "ucc")) != NULL;)
+ for_each_node_by_name(np, "ucc")
par_io_of_config(np);
}
#endif /* CONFIG_QUICC_ENGINE */
@@ -217,24 +214,14 @@ static void __init mpc832x_rdb_setup_arch(void)
machine_device_initcall(mpc832x_rdb, mpc83xx_declare_of_platform_devices);
-/*
- * Called very early, MMU is off, device-tree isn't unflattened
- */
-static int __init mpc832x_rdb_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- return of_flat_dt_is_compatible(root, "MPC832xRDB");
-}
-
define_machine(mpc832x_rdb) {
.name = "MPC832x RDB",
- .probe = mpc832x_rdb_probe,
+ .compatible = "MPC832xRDB",
.setup_arch = mpc832x_rdb_setup_arch,
- .init_IRQ = mpc83xx_ipic_and_qe_init_IRQ,
+ .discover_phbs = mpc83xx_setup_pci,
+ .init_IRQ = mpc83xx_ipic_init_IRQ,
.get_irq = ipic_get_irq,
.restart = mpc83xx_restart,
.time_init = mpc83xx_time_init,
- .calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
};
diff --git a/arch/powerpc/platforms/83xx/mpc834x_itx.c b/arch/powerpc/platforms/83xx/mpc834x_itx.c
index a494fa57bdf9..e45b98ff02d8 100644
--- a/arch/powerpc/platforms/83xx/mpc834x_itx.c
+++ b/arch/powerpc/platforms/83xx/mpc834x_itx.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* arch/powerpc/platforms/83xx/mpc834x_itx.c
*
* MPC834x ITX board specific routines
*
* Maintainer: Kumar Gala <galak@kernel.crashing.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#include <linux/stddef.h>
@@ -31,14 +27,13 @@
#include <asm/machdep.h>
#include <asm/ipic.h>
#include <asm/irq.h>
-#include <asm/prom.h>
#include <asm/udbg.h>
#include <sysdev/fsl_soc.h>
#include <sysdev/fsl_pci.h>
#include "mpc83xx.h"
-static struct of_device_id __initdata mpc834x_itx_ids[] = {
+static const struct of_device_id mpc834x_itx_ids[] __initconst = {
{ .compatible = "fsl,pq2pro-localbus", },
{},
};
@@ -57,32 +52,19 @@ machine_device_initcall(mpc834x_itx, mpc834x_itx_declare_of_platform_devices);
*/
static void __init mpc834x_itx_setup_arch(void)
{
- if (ppc_md.progress)
- ppc_md.progress("mpc834x_itx_setup_arch()", 0);
-
- mpc83xx_setup_pci();
+ mpc83xx_setup_arch();
mpc834x_usb_cfg();
}
-/*
- * Called very early, MMU is off, device-tree isn't unflattened
- */
-static int __init mpc834x_itx_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- return of_flat_dt_is_compatible(root, "MPC834xMITX");
-}
-
define_machine(mpc834x_itx) {
.name = "MPC834x ITX",
- .probe = mpc834x_itx_probe,
+ .compatible = "MPC834xMITX",
.setup_arch = mpc834x_itx_setup_arch,
+ .discover_phbs = mpc83xx_setup_pci,
.init_IRQ = mpc83xx_ipic_init_IRQ,
.get_irq = ipic_get_irq,
.restart = mpc83xx_restart,
.time_init = mpc83xx_time_init,
- .calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
};
diff --git a/arch/powerpc/platforms/83xx/mpc834x_mds.c b/arch/powerpc/platforms/83xx/mpc834x_mds.c
deleted file mode 100644
index 553e793a4a93..000000000000
--- a/arch/powerpc/platforms/83xx/mpc834x_mds.c
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * arch/powerpc/platforms/83xx/mpc834x_mds.c
- *
- * MPC834x MDS board specific routines
- *
- * Maintainer: Kumar Gala <galak@kernel.crashing.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-#include <linux/stddef.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/errno.h>
-#include <linux/reboot.h>
-#include <linux/pci.h>
-#include <linux/kdev_t.h>
-#include <linux/major.h>
-#include <linux/console.h>
-#include <linux/delay.h>
-#include <linux/seq_file.h>
-#include <linux/root_dev.h>
-#include <linux/of_platform.h>
-
-#include <linux/atomic.h>
-#include <asm/time.h>
-#include <asm/io.h>
-#include <asm/machdep.h>
-#include <asm/ipic.h>
-#include <asm/irq.h>
-#include <asm/prom.h>
-#include <asm/udbg.h>
-#include <sysdev/fsl_soc.h>
-#include <sysdev/fsl_pci.h>
-
-#include "mpc83xx.h"
-
-#define BCSR5_INT_USB 0x02
-static int mpc834xemds_usb_cfg(void)
-{
- struct device_node *np;
- void __iomem *bcsr_regs = NULL;
- u8 bcsr5;
-
- mpc834x_usb_cfg();
- /* Map BCSR area */
- np = of_find_node_by_name(NULL, "bcsr");
- if (np) {
- struct resource res;
-
- of_address_to_resource(np, 0, &res);
- bcsr_regs = ioremap(res.start, resource_size(&res));
- of_node_put(np);
- }
- if (!bcsr_regs)
- return -1;
-
- /*
- * if Processor Board is plugged into PIB board,
- * force to use the PHY on Processor Board
- */
- bcsr5 = in_8(bcsr_regs + 5);
- if (!(bcsr5 & BCSR5_INT_USB))
- out_8(bcsr_regs + 5, (bcsr5 | BCSR5_INT_USB));
- iounmap(bcsr_regs);
- return 0;
-}
-
-/* ************************************************************************
- *
- * Setup the architecture
- *
- */
-static void __init mpc834x_mds_setup_arch(void)
-{
- if (ppc_md.progress)
- ppc_md.progress("mpc834x_mds_setup_arch()", 0);
-
- mpc83xx_setup_pci();
-
- mpc834xemds_usb_cfg();
-}
-
-machine_device_initcall(mpc834x_mds, mpc83xx_declare_of_platform_devices);
-
-/*
- * Called very early, MMU is off, device-tree isn't unflattened
- */
-static int __init mpc834x_mds_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- return of_flat_dt_is_compatible(root, "MPC834xMDS");
-}
-
-define_machine(mpc834x_mds) {
- .name = "MPC834x MDS",
- .probe = mpc834x_mds_probe,
- .setup_arch = mpc834x_mds_setup_arch,
- .init_IRQ = mpc83xx_ipic_init_IRQ,
- .get_irq = ipic_get_irq,
- .restart = mpc83xx_restart,
- .time_init = mpc83xx_time_init,
- .calibrate_decr = generic_calibrate_decr,
- .progress = udbg_progress,
-};
diff --git a/arch/powerpc/platforms/83xx/mpc836x_mds.c b/arch/powerpc/platforms/83xx/mpc836x_mds.c
deleted file mode 100644
index 1a26d2f83401..000000000000
--- a/arch/powerpc/platforms/83xx/mpc836x_mds.c
+++ /dev/null
@@ -1,229 +0,0 @@
-/*
- * Copyright 2006 Freescale Semiconductor, Inc. All rights reserved.
- *
- * Author: Li Yang <LeoLi@freescale.com>
- * Yin Olivia <Hong-hua.Yin@freescale.com>
- *
- * Description:
- * MPC8360E MDS board specific routines.
- *
- * Changelog:
- * Jun 21, 2006 Initial version
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-#include <linux/stddef.h>
-#include <linux/kernel.h>
-#include <linux/compiler.h>
-#include <linux/init.h>
-#include <linux/errno.h>
-#include <linux/reboot.h>
-#include <linux/pci.h>
-#include <linux/kdev_t.h>
-#include <linux/major.h>
-#include <linux/console.h>
-#include <linux/delay.h>
-#include <linux/seq_file.h>
-#include <linux/root_dev.h>
-#include <linux/initrd.h>
-#include <linux/of_platform.h>
-#include <linux/of_device.h>
-
-#include <linux/atomic.h>
-#include <asm/time.h>
-#include <asm/io.h>
-#include <asm/machdep.h>
-#include <asm/ipic.h>
-#include <asm/irq.h>
-#include <asm/prom.h>
-#include <asm/udbg.h>
-#include <sysdev/fsl_soc.h>
-#include <sysdev/fsl_pci.h>
-#include <sysdev/simple_gpio.h>
-#include <asm/qe.h>
-#include <asm/qe_ic.h>
-
-#include "mpc83xx.h"
-
-#undef DEBUG
-#ifdef DEBUG
-#define DBG(fmt...) udbg_printf(fmt)
-#else
-#define DBG(fmt...)
-#endif
-
-/* ************************************************************************
- *
- * Setup the architecture
- *
- */
-static void __init mpc836x_mds_setup_arch(void)
-{
- struct device_node *np;
- u8 __iomem *bcsr_regs = NULL;
-
- if (ppc_md.progress)
- ppc_md.progress("mpc836x_mds_setup_arch()", 0);
-
- /* Map BCSR area */
- np = of_find_node_by_name(NULL, "bcsr");
- if (np) {
- struct resource res;
-
- of_address_to_resource(np, 0, &res);
- bcsr_regs = ioremap(res.start, resource_size(&res));
- of_node_put(np);
- }
-
- mpc83xx_setup_pci();
-
-#ifdef CONFIG_QUICC_ENGINE
- qe_reset();
-
- if ((np = of_find_node_by_name(NULL, "par_io")) != NULL) {
- par_io_init(np);
- of_node_put(np);
-
- for (np = NULL; (np = of_find_node_by_name(np, "ucc")) != NULL;)
- par_io_of_config(np);
-#ifdef CONFIG_QE_USB
- /* Must fixup Par IO before QE GPIO chips are registered. */
- par_io_config_pin(1, 2, 1, 0, 3, 0); /* USBOE */
- par_io_config_pin(1, 3, 1, 0, 3, 0); /* USBTP */
- par_io_config_pin(1, 8, 1, 0, 1, 0); /* USBTN */
- par_io_config_pin(1, 10, 2, 0, 3, 0); /* USBRXD */
- par_io_config_pin(1, 9, 2, 1, 3, 0); /* USBRP */
- par_io_config_pin(1, 11, 2, 1, 3, 0); /* USBRN */
- par_io_config_pin(2, 20, 2, 0, 1, 0); /* CLK21 */
-#endif /* CONFIG_QE_USB */
- }
-
- if ((np = of_find_compatible_node(NULL, "network", "ucc_geth"))
- != NULL){
- uint svid;
-
- /* Reset the Ethernet PHY */
-#define BCSR9_GETHRST 0x20
- clrbits8(&bcsr_regs[9], BCSR9_GETHRST);
- udelay(1000);
- setbits8(&bcsr_regs[9], BCSR9_GETHRST);
-
- /* handle mpc8360ea rev.2.1 erratum 2: RGMII Timing */
- svid = mfspr(SPRN_SVR);
- if (svid == 0x80480021) {
- void __iomem *immap;
-
- immap = ioremap(get_immrbase() + 0x14a8, 8);
-
- /*
- * IMMR + 0x14A8[4:5] = 11 (clk delay for UCC 2)
- * IMMR + 0x14A8[18:19] = 11 (clk delay for UCC 1)
- */
- setbits32(immap, 0x0c003000);
-
- /*
- * IMMR + 0x14AC[20:27] = 10101010
- * (data delay for both UCC's)
- */
- clrsetbits_be32(immap + 4, 0xff0, 0xaa0);
-
- iounmap(immap);
- }
-
- iounmap(bcsr_regs);
- of_node_put(np);
- }
-#endif /* CONFIG_QUICC_ENGINE */
-}
-
-machine_device_initcall(mpc836x_mds, mpc83xx_declare_of_platform_devices);
-
-#ifdef CONFIG_QE_USB
-static int __init mpc836x_usb_cfg(void)
-{
- u8 __iomem *bcsr;
- struct device_node *np;
- const char *mode;
- int ret = 0;
-
- np = of_find_compatible_node(NULL, NULL, "fsl,mpc8360mds-bcsr");
- if (!np)
- return -ENODEV;
-
- bcsr = of_iomap(np, 0);
- of_node_put(np);
- if (!bcsr)
- return -ENOMEM;
-
- np = of_find_compatible_node(NULL, NULL, "fsl,mpc8323-qe-usb");
- if (!np) {
- ret = -ENODEV;
- goto err;
- }
-
-#define BCSR8_TSEC1M_MASK (0x3 << 6)
-#define BCSR8_TSEC1M_RGMII (0x0 << 6)
-#define BCSR8_TSEC2M_MASK (0x3 << 4)
-#define BCSR8_TSEC2M_RGMII (0x0 << 4)
- /*
- * Default is GMII (2), but we should set it to RGMII (0) if we use
- * USB (Eth PHY is in RGMII mode anyway).
- */
- clrsetbits_8(&bcsr[8], BCSR8_TSEC1M_MASK | BCSR8_TSEC2M_MASK,
- BCSR8_TSEC1M_RGMII | BCSR8_TSEC2M_RGMII);
-
-#define BCSR13_USBMASK 0x0f
-#define BCSR13_nUSBEN 0x08 /* 1 - Disable, 0 - Enable */
-#define BCSR13_USBSPEED 0x04 /* 1 - Full, 0 - Low */
-#define BCSR13_USBMODE 0x02 /* 1 - Host, 0 - Function */
-#define BCSR13_nUSBVCC 0x01 /* 1 - gets VBUS, 0 - supplies VBUS */
-
- clrsetbits_8(&bcsr[13], BCSR13_USBMASK, BCSR13_USBSPEED);
-
- mode = of_get_property(np, "mode", NULL);
- if (mode && !strcmp(mode, "peripheral")) {
- setbits8(&bcsr[13], BCSR13_nUSBVCC);
- qe_usb_clock_set(QE_CLK21, 48000000);
- } else {
- setbits8(&bcsr[13], BCSR13_USBMODE);
- /*
- * The BCSR GPIOs are used to control power and
- * speed of the USB transceiver. This is needed for
- * the USB Host only.
- */
- simple_gpiochip_init("fsl,mpc8360mds-bcsr-gpio");
- }
-
- of_node_put(np);
-err:
- iounmap(bcsr);
- return ret;
-}
-machine_arch_initcall(mpc836x_mds, mpc836x_usb_cfg);
-#endif /* CONFIG_QE_USB */
-
-/*
- * Called very early, MMU is off, device-tree isn't unflattened
- */
-static int __init mpc836x_mds_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- return of_flat_dt_is_compatible(root, "MPC836xMDS");
-}
-
-define_machine(mpc836x_mds) {
- .name = "MPC836x MDS",
- .probe = mpc836x_mds_probe,
- .setup_arch = mpc836x_mds_setup_arch,
- .init_IRQ = mpc83xx_ipic_and_qe_init_IRQ,
- .get_irq = ipic_get_irq,
- .restart = mpc83xx_restart,
- .time_init = mpc83xx_time_init,
- .calibrate_decr = generic_calibrate_decr,
- .progress = udbg_progress,
-};
diff --git a/arch/powerpc/platforms/83xx/mpc836x_rdk.c b/arch/powerpc/platforms/83xx/mpc836x_rdk.c
index b63b42d11d6c..1fc9d1235a7c 100644
--- a/arch/powerpc/platforms/83xx/mpc836x_rdk.c
+++ b/arch/powerpc/platforms/83xx/mpc836x_rdk.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* MPC8360E-RDK board file.
*
@@ -5,23 +6,16 @@
* Copyright (c) 2007-2008 MontaVista Software, Inc.
*
* Author: Anton Vorontsov <avorontsov@ru.mvista.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#include <linux/kernel.h>
#include <linux/pci.h>
#include <linux/of_platform.h>
#include <linux/io.h>
-#include <asm/prom.h>
#include <asm/time.h>
#include <asm/ipic.h>
#include <asm/udbg.h>
-#include <asm/qe.h>
-#include <asm/qe_ic.h>
+#include <soc/fsl/qe/qe.h>
#include <sysdev/fsl_soc.h>
#include <sysdev/fsl_pci.h>
@@ -31,33 +25,17 @@ machine_device_initcall(mpc836x_rdk, mpc83xx_declare_of_platform_devices);
static void __init mpc836x_rdk_setup_arch(void)
{
- if (ppc_md.progress)
- ppc_md.progress("mpc836x_rdk_setup_arch()", 0);
-
- mpc83xx_setup_pci();
-#ifdef CONFIG_QUICC_ENGINE
- qe_reset();
-#endif
-}
-
-/*
- * Called very early, MMU is off, device-tree isn't unflattened.
- */
-static int __init mpc836x_rdk_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- return of_flat_dt_is_compatible(root, "fsl,mpc8360rdk");
+ mpc83xx_setup_arch();
}
define_machine(mpc836x_rdk) {
.name = "MPC836x RDK",
- .probe = mpc836x_rdk_probe,
+ .compatible = "fsl,mpc8360rdk",
.setup_arch = mpc836x_rdk_setup_arch,
- .init_IRQ = mpc83xx_ipic_and_qe_init_IRQ,
+ .discover_phbs = mpc83xx_setup_pci,
+ .init_IRQ = mpc83xx_ipic_init_IRQ,
.get_irq = ipic_get_irq,
.restart = mpc83xx_restart,
.time_init = mpc83xx_time_init,
- .calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
};
diff --git a/arch/powerpc/platforms/83xx/mpc837x_mds.c b/arch/powerpc/platforms/83xx/mpc837x_mds.c
deleted file mode 100644
index e53a60b6c863..000000000000
--- a/arch/powerpc/platforms/83xx/mpc837x_mds.c
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * arch/powerpc/platforms/83xx/mpc837x_mds.c
- *
- * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved.
- *
- * MPC837x MDS board specific routines
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-#include <linux/pci.h>
-#include <linux/of.h>
-#include <linux/of_platform.h>
-
-#include <asm/time.h>
-#include <asm/ipic.h>
-#include <asm/udbg.h>
-#include <asm/prom.h>
-#include <sysdev/fsl_pci.h>
-
-#include "mpc83xx.h"
-
-#define BCSR12_USB_SER_MASK 0x8a
-#define BCSR12_USB_SER_PIN 0x80
-#define BCSR12_USB_SER_DEVICE 0x02
-
-static int mpc837xmds_usb_cfg(void)
-{
- struct device_node *np;
- const void *phy_type, *mode;
- void __iomem *bcsr_regs = NULL;
- u8 bcsr12;
- int ret;
-
- ret = mpc837x_usb_cfg();
- if (ret)
- return ret;
- /* Map BCSR area */
- np = of_find_compatible_node(NULL, NULL, "fsl,mpc837xmds-bcsr");
- if (np) {
- bcsr_regs = of_iomap(np, 0);
- of_node_put(np);
- }
- if (!bcsr_regs)
- return -1;
-
- np = of_find_node_by_name(NULL, "usb");
- if (!np) {
- ret = -ENODEV;
- goto out;
- }
- phy_type = of_get_property(np, "phy_type", NULL);
- if (phy_type && !strcmp(phy_type, "ulpi")) {
- clrbits8(bcsr_regs + 12, BCSR12_USB_SER_PIN);
- } else if (phy_type && !strcmp(phy_type, "serial")) {
- mode = of_get_property(np, "dr_mode", NULL);
- bcsr12 = in_8(bcsr_regs + 12) & ~BCSR12_USB_SER_MASK;
- bcsr12 |= BCSR12_USB_SER_PIN;
- if (mode && !strcmp(mode, "peripheral"))
- bcsr12 |= BCSR12_USB_SER_DEVICE;
- out_8(bcsr_regs + 12, bcsr12);
- } else {
- printk(KERN_ERR "USB DR: unsupported PHY\n");
- }
-
- of_node_put(np);
-out:
- iounmap(bcsr_regs);
- return ret;
-}
-
-/* ************************************************************************
- *
- * Setup the architecture
- *
- */
-static void __init mpc837x_mds_setup_arch(void)
-{
- if (ppc_md.progress)
- ppc_md.progress("mpc837x_mds_setup_arch()", 0);
-
- mpc83xx_setup_pci();
- mpc837xmds_usb_cfg();
-}
-
-machine_device_initcall(mpc837x_mds, mpc83xx_declare_of_platform_devices);
-
-/*
- * Called very early, MMU is off, device-tree isn't unflattened
- */
-static int __init mpc837x_mds_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- return of_flat_dt_is_compatible(root, "fsl,mpc837xmds");
-}
-
-define_machine(mpc837x_mds) {
- .name = "MPC837x MDS",
- .probe = mpc837x_mds_probe,
- .setup_arch = mpc837x_mds_setup_arch,
- .init_IRQ = mpc83xx_ipic_init_IRQ,
- .get_irq = ipic_get_irq,
- .restart = mpc83xx_restart,
- .time_init = mpc83xx_time_init,
- .calibrate_decr = generic_calibrate_decr,
- .progress = udbg_progress,
-};
diff --git a/arch/powerpc/platforms/83xx/mpc837x_rdb.c b/arch/powerpc/platforms/83xx/mpc837x_rdb.c
index 9813c81e8e5b..45823e147933 100644
--- a/arch/powerpc/platforms/83xx/mpc837x_rdb.c
+++ b/arch/powerpc/platforms/83xx/mpc837x_rdb.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* arch/powerpc/platforms/83xx/mpc837x_rdb.c
*
* Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved.
*
* MPC837x RDB board specific routines
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#include <linux/pci.h>
@@ -22,7 +18,7 @@
#include "mpc83xx.h"
-static void mpc837x_rdb_sd_cfg(void)
+static void __init mpc837x_rdb_sd_cfg(void)
{
void __iomem *im;
@@ -50,10 +46,7 @@ static void mpc837x_rdb_sd_cfg(void)
*/
static void __init mpc837x_rdb_setup_arch(void)
{
- if (ppc_md.progress)
- ppc_md.progress("mpc837x_rdb_setup_arch()", 0);
-
- mpc83xx_setup_pci();
+ mpc83xx_setup_arch();
mpc837x_usb_cfg();
mpc837x_rdb_sd_cfg();
}
@@ -68,22 +61,14 @@ static const char * const board[] __initconst = {
NULL
};
-/*
- * Called very early, MMU is off, device-tree isn't unflattened
- */
-static int __init mpc837x_rdb_probe(void)
-{
- return of_flat_dt_match(of_get_flat_dt_root(), board);
-}
-
define_machine(mpc837x_rdb) {
.name = "MPC837x RDB/WLAN",
- .probe = mpc837x_rdb_probe,
+ .compatibles = board,
.setup_arch = mpc837x_rdb_setup_arch,
+ .discover_phbs = mpc83xx_setup_pci,
.init_IRQ = mpc83xx_ipic_init_IRQ,
.get_irq = ipic_get_irq,
.restart = mpc83xx_restart,
.time_init = mpc83xx_time_init,
- .calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
};
diff --git a/arch/powerpc/platforms/83xx/mpc83xx.h b/arch/powerpc/platforms/83xx/mpc83xx.h
index 0cf74d7ea1c5..0b8738a2b980 100644
--- a/arch/powerpc/platforms/83xx/mpc83xx.h
+++ b/arch/powerpc/platforms/83xx/mpc83xx.h
@@ -1,9 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __MPC83XX_H__
#define __MPC83XX_H__
#include <linux/init.h>
-#include <linux/device.h>
-#include <asm/pci-bridge.h>
/* System Clock Control Register */
#define MPC83XX_SCCR_OFFS 0xA08
@@ -65,26 +64,20 @@
* mpc83xx_* files. Mostly for use by mpc83xx_setup
*/
-extern void mpc83xx_restart(char *cmd);
+extern void __noreturn mpc83xx_restart(char *cmd);
extern long mpc83xx_time_init(void);
-extern int mpc837x_usb_cfg(void);
-extern int mpc834x_usb_cfg(void);
-extern int mpc831x_usb_cfg(void);
+int __init mpc837x_usb_cfg(void);
+int __init mpc834x_usb_cfg(void);
+int __init mpc831x_usb_cfg(void);
extern void mpc83xx_ipic_init_IRQ(void);
-#ifdef CONFIG_QUICC_ENGINE
-extern void mpc83xx_qe_init_IRQ(void);
-extern void mpc83xx_ipic_and_qe_init_IRQ(void);
-#else
-static inline void __init mpc83xx_qe_init_IRQ(void) {}
-#define mpc83xx_ipic_and_qe_init_IRQ mpc83xx_ipic_init_IRQ
-#endif /* CONFIG_QUICC_ENGINE */
#ifdef CONFIG_PCI
extern void mpc83xx_setup_pci(void);
#else
-#define mpc83xx_setup_pci() do {} while (0)
+#define mpc83xx_setup_pci NULL
#endif
extern int mpc83xx_declare_of_platform_devices(void);
+extern void mpc83xx_setup_arch(void);
#endif /* __MPC83XX_H__ */
diff --git a/arch/powerpc/platforms/83xx/sbc834x.c b/arch/powerpc/platforms/83xx/sbc834x.c
deleted file mode 100644
index 26cb3e934722..000000000000
--- a/arch/powerpc/platforms/83xx/sbc834x.c
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * arch/powerpc/platforms/83xx/sbc834x.c
- *
- * Wind River SBC834x board specific routines
- *
- * By Paul Gortmaker (see MAINTAINERS for contact information)
- *
- * Based largely on the mpc834x_mds.c support by Kumar Gala.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-#include <linux/stddef.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/errno.h>
-#include <linux/reboot.h>
-#include <linux/pci.h>
-#include <linux/kdev_t.h>
-#include <linux/major.h>
-#include <linux/console.h>
-#include <linux/delay.h>
-#include <linux/seq_file.h>
-#include <linux/root_dev.h>
-#include <linux/of_platform.h>
-
-#include <linux/atomic.h>
-#include <asm/time.h>
-#include <asm/io.h>
-#include <asm/machdep.h>
-#include <asm/ipic.h>
-#include <asm/irq.h>
-#include <asm/prom.h>
-#include <asm/udbg.h>
-#include <sysdev/fsl_soc.h>
-#include <sysdev/fsl_pci.h>
-
-#include "mpc83xx.h"
-
-/* ************************************************************************
- *
- * Setup the architecture
- *
- */
-static void __init sbc834x_setup_arch(void)
-{
- if (ppc_md.progress)
- ppc_md.progress("sbc834x_setup_arch()", 0);
-
- mpc83xx_setup_pci();
-}
-
-machine_device_initcall(sbc834x, mpc83xx_declare_of_platform_devices);
-
-/*
- * Called very early, MMU is off, device-tree isn't unflattened
- */
-static int __init sbc834x_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- return of_flat_dt_is_compatible(root, "SBC834xE");
-}
-
-define_machine(sbc834x) {
- .name = "SBC834xE",
- .probe = sbc834x_probe,
- .setup_arch = sbc834x_setup_arch,
- .init_IRQ = mpc83xx_ipic_init_IRQ,
- .get_irq = ipic_get_irq,
- .restart = mpc83xx_restart,
- .time_init = mpc83xx_time_init,
- .calibrate_decr = generic_calibrate_decr,
- .progress = udbg_progress,
-};
diff --git a/arch/powerpc/platforms/83xx/suspend-asm.S b/arch/powerpc/platforms/83xx/suspend-asm.S
index 3d1ecd211776..6a62ed6082c9 100644
--- a/arch/powerpc/platforms/83xx/suspend-asm.S
+++ b/arch/powerpc/platforms/83xx/suspend-asm.S
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Enter and leave deep sleep state on MPC83xx
*
* Copyright (c) 2006-2008 Freescale Semiconductor, Inc.
* Author: Scott Wood <scottwood@freescale.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
*/
#include <asm/page.h>
@@ -26,13 +23,13 @@
#define SS_MSR 0x74
#define SS_SDR1 0x78
#define SS_LR 0x7c
-#define SS_SPRG 0x80 /* 4 SPRGs */
-#define SS_DBAT 0x90 /* 8 DBATs */
-#define SS_IBAT 0xd0 /* 8 IBATs */
-#define SS_TB 0x110
-#define SS_CR 0x118
-#define SS_GPREG 0x11c /* r12-r31 */
-#define STATE_SAVE_SIZE 0x16c
+#define SS_SPRG 0x80 /* 8 SPRGs */
+#define SS_DBAT 0xa0 /* 8 DBATs */
+#define SS_IBAT 0xe0 /* 8 IBATs */
+#define SS_TB 0x120
+#define SS_CR 0x128
+#define SS_GPREG 0x12c /* r12-r31 */
+#define STATE_SAVE_SIZE 0x17c
.section .data
.align 5
@@ -71,7 +68,8 @@ _GLOBAL(mpc83xx_enter_deep_sleep)
mfspr r5, SPRN_HID0
mfspr r6, SPRN_HID1
- mfspr r7, SPRN_HID2
+ /* FIXME: Should this use SPRN_HID2_G2_LE? */
+ mfspr r7, SPRN_HID2_750FX
stw r5, SS_HID+0(r3)
stw r6, SS_HID+4(r3)
@@ -103,6 +101,16 @@ _GLOBAL(mpc83xx_enter_deep_sleep)
stw r7, SS_SPRG+12(r3)
stw r8, SS_SDR1(r3)
+ mfspr r4, SPRN_SPRG4
+ mfspr r5, SPRN_SPRG5
+ mfspr r6, SPRN_SPRG6
+ mfspr r7, SPRN_SPRG7
+
+ stw r4, SS_SPRG+16(r3)
+ stw r5, SS_SPRG+20(r3)
+ stw r6, SS_SPRG+24(r3)
+ stw r7, SS_SPRG+28(r3)
+
mfspr r4, SPRN_DBAT0U
mfspr r5, SPRN_DBAT0L
mfspr r6, SPRN_DBAT1U
@@ -389,7 +397,8 @@ mpc83xx_deep_resume:
mtspr SPRN_HID0, r5
mtspr SPRN_HID1, r6
- mtspr SPRN_HID2, r7
+ /* FIXME: Should this use SPRN_HID2_G2_LE? */
+ mtspr SPRN_HID2_750FX, r7
lwz r4, SS_IABR+0(r3)
lwz r5, SS_IABR+4(r3)
@@ -493,6 +502,16 @@ mpc83xx_deep_resume:
mtspr SPRN_IBAT7U, r6
mtspr SPRN_IBAT7L, r7
+ lwz r4, SS_SPRG+16(r3)
+ lwz r5, SS_SPRG+20(r3)
+ lwz r6, SS_SPRG+24(r3)
+ lwz r7, SS_SPRG+28(r3)
+
+ mtspr SPRN_SPRG4, r4
+ mtspr SPRN_SPRG5, r5
+ mtspr SPRN_SPRG6, r6
+ mtspr SPRN_SPRG7, r7
+
lwz r4, SS_SPRG+0(r3)
lwz r5, SS_SPRG+4(r3)
lwz r6, SS_SPRG+8(r3)
@@ -531,3 +550,4 @@ mpc83xx_deep_resume:
mtdec r0
rfi
+_ASM_NOKPROBE_SYMBOL(mpc83xx_deep_resume)
diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c
index 4b4c081df94d..99bd4355f28e 100644
--- a/arch/powerpc/platforms/83xx/suspend.c
+++ b/arch/powerpc/platforms/83xx/suspend.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* MPC83xx suspend support
*
* Author: Scott Wood <scottwood@freescale.com>
*
* Copyright (c) 2006-2007 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
*/
#include <linux/pm.h>
@@ -15,13 +12,14 @@
#include <linux/ioport.h>
#include <linux/interrupt.h>
#include <linux/wait.h>
+#include <linux/sched/signal.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
#include <linux/suspend.h>
#include <linux/fsl_devices.h>
#include <linux/of_address.h>
#include <linux/of_irq.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
#include <linux/export.h>
#include <asm/reg.h>
@@ -102,7 +100,6 @@ struct pmc_type {
int has_deep_sleep;
};
-static struct platform_device *pmc_dev;
static int has_deep_sleep, deep_sleeping;
static int pmc_irq;
static struct mpc83xx_pmc __iomem *pmc_regs;
@@ -209,7 +206,8 @@ static int mpc83xx_suspend_enter(suspend_state_t state)
out_be32(&pmc_regs->config1,
in_be32(&pmc_regs->config1) | PMCCR1_POWER_OFF);
- enable_kernel_fp();
+ if (IS_ENABLED(CONFIG_PPC_FPU))
+ enable_kernel_fp();
mpc83xx_enter_deep_sleep(immrbase);
@@ -264,9 +262,10 @@ static int mpc83xx_suspend_begin(suspend_state_t state)
static int agent_thread_fn(void *data)
{
+ set_freezable();
+
while (1) {
- wait_event_interruptible(agent_wq, pci_pm_state >= 2);
- try_to_freeze();
+ wait_event_freezable(agent_wq, pci_pm_state >= 2);
if (signal_pending(current) || pci_pm_state < 2)
continue;
@@ -321,27 +320,43 @@ static const struct platform_suspend_ops mpc83xx_suspend_ops = {
.end = mpc83xx_suspend_end,
};
-static struct of_device_id pmc_match[];
+static struct pmc_type pmc_types[] = {
+ {
+ .has_deep_sleep = 1,
+ },
+ {
+ .has_deep_sleep = 0,
+ }
+};
+
+static const struct of_device_id pmc_match[] = {
+ {
+ .compatible = "fsl,mpc8313-pmc",
+ .data = &pmc_types[0],
+ },
+ {
+ .compatible = "fsl,mpc8349-pmc",
+ .data = &pmc_types[1],
+ },
+ {}
+};
+
static int pmc_probe(struct platform_device *ofdev)
{
- const struct of_device_id *match;
struct device_node *np = ofdev->dev.of_node;
struct resource res;
const struct pmc_type *type;
int ret = 0;
- match = of_match_device(pmc_match, &ofdev->dev);
- if (!match)
+ type = of_device_get_match_data(&ofdev->dev);
+ if (!type)
return -EINVAL;
- type = match->data;
-
if (!of_device_is_available(np))
return -ENODEV;
has_deep_sleep = type->has_deep_sleep;
immrbase = get_immrbase();
- pmc_dev = ofdev;
is_pci_agent = mpc83xx_is_pci_agent();
if (is_pci_agent < 0)
@@ -352,7 +367,7 @@ static int pmc_probe(struct platform_device *ofdev)
return -ENODEV;
pmc_irq = irq_of_parse_and_map(np, 0);
- if (pmc_irq != NO_IRQ) {
+ if (pmc_irq) {
ret = request_irq(pmc_irq, pmc_irq_handler, IRQF_SHARED,
"pmc", ofdev);
@@ -360,7 +375,7 @@ static int pmc_probe(struct platform_device *ofdev)
return -EBUSY;
}
- pmc_regs = ioremap(res.start, sizeof(struct mpc83xx_pmc));
+ pmc_regs = ioremap(res.start, sizeof(*pmc_regs));
if (!pmc_regs) {
ret = -ENOMEM;
@@ -373,7 +388,7 @@ static int pmc_probe(struct platform_device *ofdev)
goto out_pmc;
}
- clock_regs = ioremap(res.start, sizeof(struct mpc83xx_pmc));
+ clock_regs = ioremap(res.start, sizeof(*clock_regs));
if (!clock_regs) {
ret = -ENOMEM;
@@ -400,51 +415,19 @@ out_syscr:
out_pmc:
iounmap(pmc_regs);
out:
- if (pmc_irq != NO_IRQ)
+ if (pmc_irq)
free_irq(pmc_irq, ofdev);
return ret;
}
-static int pmc_remove(struct platform_device *ofdev)
-{
- return -EPERM;
-};
-
-static struct pmc_type pmc_types[] = {
- {
- .has_deep_sleep = 1,
- },
- {
- .has_deep_sleep = 0,
- }
-};
-
-static struct of_device_id pmc_match[] = {
- {
- .compatible = "fsl,mpc8313-pmc",
- .data = &pmc_types[0],
- },
- {
- .compatible = "fsl,mpc8349-pmc",
- .data = &pmc_types[1],
- },
- {}
-};
-
static struct platform_driver pmc_driver = {
.driver = {
.name = "mpc83xx-pmc",
- .owner = THIS_MODULE,
.of_match_table = pmc_match,
+ .suppress_bind_attrs = true,
},
.probe = pmc_probe,
- .remove = pmc_remove
};
-static int pmc_init(void)
-{
- return platform_driver_register(&pmc_driver);
-}
-
-module_init(pmc_init);
+builtin_platform_driver(pmc_driver);
diff --git a/arch/powerpc/platforms/83xx/usb.c b/arch/powerpc/platforms/83xx/usb.c
deleted file mode 100644
index 1ad748bb39b4..000000000000
--- a/arch/powerpc/platforms/83xx/usb.c
+++ /dev/null
@@ -1,254 +0,0 @@
-/*
- * Freescale 83xx USB SOC setup code
- *
- * Copyright (C) 2007 Freescale Semiconductor, Inc.
- * Author: Li Yang
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-
-#include <linux/stddef.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/of.h>
-
-#include <asm/io.h>
-#include <asm/prom.h>
-#include <sysdev/fsl_soc.h>
-
-#include "mpc83xx.h"
-
-
-#ifdef CONFIG_PPC_MPC834x
-int mpc834x_usb_cfg(void)
-{
- unsigned long sccr, sicrl, sicrh;
- void __iomem *immap;
- struct device_node *np = NULL;
- int port0_is_dr = 0, port1_is_dr = 0;
- const void *prop, *dr_mode;
-
- immap = ioremap(get_immrbase(), 0x1000);
- if (!immap)
- return -ENOMEM;
-
- /* Read registers */
- /* Note: DR and MPH must use the same clock setting in SCCR */
- sccr = in_be32(immap + MPC83XX_SCCR_OFFS) & ~MPC83XX_SCCR_USB_MASK;
- sicrl = in_be32(immap + MPC83XX_SICRL_OFFS) & ~MPC834X_SICRL_USB_MASK;
- sicrh = in_be32(immap + MPC83XX_SICRH_OFFS) & ~MPC834X_SICRH_USB_UTMI;
-
- np = of_find_compatible_node(NULL, NULL, "fsl-usb2-dr");
- if (np) {
- sccr |= MPC83XX_SCCR_USB_DRCM_11; /* 1:3 */
-
- prop = of_get_property(np, "phy_type", NULL);
- port1_is_dr = 1;
- if (prop && (!strcmp(prop, "utmi") ||
- !strcmp(prop, "utmi_wide"))) {
- sicrl |= MPC834X_SICRL_USB0 | MPC834X_SICRL_USB1;
- sicrh |= MPC834X_SICRH_USB_UTMI;
- port0_is_dr = 1;
- } else if (prop && !strcmp(prop, "serial")) {
- dr_mode = of_get_property(np, "dr_mode", NULL);
- if (dr_mode && !strcmp(dr_mode, "otg")) {
- sicrl |= MPC834X_SICRL_USB0 | MPC834X_SICRL_USB1;
- port0_is_dr = 1;
- } else {
- sicrl |= MPC834X_SICRL_USB1;
- }
- } else if (prop && !strcmp(prop, "ulpi")) {
- sicrl |= MPC834X_SICRL_USB1;
- } else {
- printk(KERN_WARNING "834x USB PHY type not supported\n");
- }
- of_node_put(np);
- }
- np = of_find_compatible_node(NULL, NULL, "fsl-usb2-mph");
- if (np) {
- sccr |= MPC83XX_SCCR_USB_MPHCM_11; /* 1:3 */
-
- prop = of_get_property(np, "port0", NULL);
- if (prop) {
- if (port0_is_dr)
- printk(KERN_WARNING
- "834x USB port0 can't be used by both DR and MPH!\n");
- sicrl &= ~MPC834X_SICRL_USB0;
- }
- prop = of_get_property(np, "port1", NULL);
- if (prop) {
- if (port1_is_dr)
- printk(KERN_WARNING
- "834x USB port1 can't be used by both DR and MPH!\n");
- sicrl &= ~MPC834X_SICRL_USB1;
- }
- of_node_put(np);
- }
-
- /* Write back */
- out_be32(immap + MPC83XX_SCCR_OFFS, sccr);
- out_be32(immap + MPC83XX_SICRL_OFFS, sicrl);
- out_be32(immap + MPC83XX_SICRH_OFFS, sicrh);
-
- iounmap(immap);
- return 0;
-}
-#endif /* CONFIG_PPC_MPC834x */
-
-#ifdef CONFIG_PPC_MPC831x
-int mpc831x_usb_cfg(void)
-{
- u32 temp;
- void __iomem *immap, *usb_regs;
- struct device_node *np = NULL;
- struct device_node *immr_node = NULL;
- const void *prop;
- struct resource res;
- int ret = 0;
-#ifdef CONFIG_USB_OTG
- const void *dr_mode;
-#endif
-
- np = of_find_compatible_node(NULL, NULL, "fsl-usb2-dr");
- if (!np)
- return -ENODEV;
- prop = of_get_property(np, "phy_type", NULL);
-
- /* Map IMMR space for pin and clock settings */
- immap = ioremap(get_immrbase(), 0x1000);
- if (!immap) {
- of_node_put(np);
- return -ENOMEM;
- }
-
- /* Configure clock */
- immr_node = of_get_parent(np);
- if (immr_node && (of_device_is_compatible(immr_node, "fsl,mpc8315-immr") ||
- of_device_is_compatible(immr_node, "fsl,mpc8308-immr")))
- clrsetbits_be32(immap + MPC83XX_SCCR_OFFS,
- MPC8315_SCCR_USB_MASK,
- MPC8315_SCCR_USB_DRCM_01);
- else
- clrsetbits_be32(immap + MPC83XX_SCCR_OFFS,
- MPC83XX_SCCR_USB_MASK,
- MPC83XX_SCCR_USB_DRCM_11);
-
- /* Configure pin mux for ULPI. There is no pin mux for UTMI */
- if (prop && !strcmp(prop, "ulpi")) {
- if (of_device_is_compatible(immr_node, "fsl,mpc8308-immr")) {
- clrsetbits_be32(immap + MPC83XX_SICRH_OFFS,
- MPC8308_SICRH_USB_MASK,
- MPC8308_SICRH_USB_ULPI);
- } else if (of_device_is_compatible(immr_node, "fsl,mpc8315-immr")) {
- clrsetbits_be32(immap + MPC83XX_SICRL_OFFS,
- MPC8315_SICRL_USB_MASK,
- MPC8315_SICRL_USB_ULPI);
- clrsetbits_be32(immap + MPC83XX_SICRH_OFFS,
- MPC8315_SICRH_USB_MASK,
- MPC8315_SICRH_USB_ULPI);
- } else {
- clrsetbits_be32(immap + MPC83XX_SICRL_OFFS,
- MPC831X_SICRL_USB_MASK,
- MPC831X_SICRL_USB_ULPI);
- clrsetbits_be32(immap + MPC83XX_SICRH_OFFS,
- MPC831X_SICRH_USB_MASK,
- MPC831X_SICRH_USB_ULPI);
- }
- }
-
- iounmap(immap);
-
- if (immr_node)
- of_node_put(immr_node);
-
- /* Map USB SOC space */
- ret = of_address_to_resource(np, 0, &res);
- if (ret) {
- of_node_put(np);
- return ret;
- }
- usb_regs = ioremap(res.start, resource_size(&res));
-
- /* Using on-chip PHY */
- if (prop && (!strcmp(prop, "utmi_wide") ||
- !strcmp(prop, "utmi"))) {
- u32 refsel;
-
- if (of_device_is_compatible(immr_node, "fsl,mpc8308-immr"))
- goto out;
-
- if (of_device_is_compatible(immr_node, "fsl,mpc8315-immr"))
- refsel = CONTROL_REFSEL_24MHZ;
- else
- refsel = CONTROL_REFSEL_48MHZ;
- /* Set UTMI_PHY_EN and REFSEL */
- out_be32(usb_regs + FSL_USB2_CONTROL_OFFS,
- CONTROL_UTMI_PHY_EN | refsel);
- /* Using external UPLI PHY */
- } else if (prop && !strcmp(prop, "ulpi")) {
- /* Set PHY_CLK_SEL to ULPI */
- temp = CONTROL_PHY_CLK_SEL_ULPI;
-#ifdef CONFIG_USB_OTG
- /* Set OTG_PORT */
- if (!of_device_is_compatible(immr_node, "fsl,mpc8308-immr")) {
- dr_mode = of_get_property(np, "dr_mode", NULL);
- if (dr_mode && !strcmp(dr_mode, "otg"))
- temp |= CONTROL_OTG_PORT;
- }
-#endif /* CONFIG_USB_OTG */
- out_be32(usb_regs + FSL_USB2_CONTROL_OFFS, temp);
- } else {
- printk(KERN_WARNING "831x USB PHY type not supported\n");
- ret = -EINVAL;
- }
-
-out:
- iounmap(usb_regs);
- of_node_put(np);
- return ret;
-}
-#endif /* CONFIG_PPC_MPC831x */
-
-#ifdef CONFIG_PPC_MPC837x
-int mpc837x_usb_cfg(void)
-{
- void __iomem *immap;
- struct device_node *np = NULL;
- const void *prop;
- int ret = 0;
-
- np = of_find_compatible_node(NULL, NULL, "fsl-usb2-dr");
- if (!np || !of_device_is_available(np))
- return -ENODEV;
- prop = of_get_property(np, "phy_type", NULL);
-
- if (!prop || (strcmp(prop, "ulpi") && strcmp(prop, "serial"))) {
- printk(KERN_WARNING "837x USB PHY type not supported\n");
- of_node_put(np);
- return -EINVAL;
- }
-
- /* Map IMMR space for pin and clock settings */
- immap = ioremap(get_immrbase(), 0x1000);
- if (!immap) {
- of_node_put(np);
- return -ENOMEM;
- }
-
- /* Configure clock */
- clrsetbits_be32(immap + MPC83XX_SCCR_OFFS, MPC837X_SCCR_USB_DRCM_11,
- MPC837X_SCCR_USB_DRCM_11);
-
- /* Configure pin mux for ULPI/serial */
- clrsetbits_be32(immap + MPC83XX_SICRL_OFFS, MPC837X_SICRL_USB_MASK,
- MPC837X_SICRL_USB_ULPI);
-
- iounmap(immap);
- of_node_put(np);
- return ret;
-}
-#endif /* CONFIG_PPC_MPC837x */
diff --git a/arch/powerpc/platforms/83xx/usb_831x.c b/arch/powerpc/platforms/83xx/usb_831x.c
new file mode 100644
index 000000000000..28c24e90f022
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/usb_831x.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Freescale 83xx USB SOC setup code
+ *
+ * Copyright (C) 2007 Freescale Semiconductor, Inc.
+ * Author: Li Yang
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/io.h>
+
+#include <sysdev/fsl_soc.h>
+
+#include "mpc83xx.h"
+
+int __init mpc831x_usb_cfg(void)
+{
+ u32 temp;
+ void __iomem *immap, *usb_regs;
+ struct device_node *np = NULL;
+ struct device_node *immr_node = NULL;
+ const void *prop;
+ struct resource res;
+ int ret = 0;
+#ifdef CONFIG_USB_OTG
+ const void *dr_mode;
+#endif
+
+ np = of_find_compatible_node(NULL, NULL, "fsl-usb2-dr");
+ if (!np)
+ return -ENODEV;
+ prop = of_get_property(np, "phy_type", NULL);
+
+ /* Map IMMR space for pin and clock settings */
+ immap = ioremap(get_immrbase(), 0x1000);
+ if (!immap) {
+ of_node_put(np);
+ return -ENOMEM;
+ }
+
+ /* Configure clock */
+ immr_node = of_get_parent(np);
+ if (immr_node && (of_device_is_compatible(immr_node, "fsl,mpc8315-immr") ||
+ of_device_is_compatible(immr_node, "fsl,mpc8308-immr")))
+ clrsetbits_be32(immap + MPC83XX_SCCR_OFFS,
+ MPC8315_SCCR_USB_MASK,
+ MPC8315_SCCR_USB_DRCM_01);
+ else
+ clrsetbits_be32(immap + MPC83XX_SCCR_OFFS,
+ MPC83XX_SCCR_USB_MASK,
+ MPC83XX_SCCR_USB_DRCM_11);
+
+ /* Configure pin mux for ULPI. There is no pin mux for UTMI */
+ if (prop && !strcmp(prop, "ulpi")) {
+ if (of_device_is_compatible(immr_node, "fsl,mpc8308-immr")) {
+ clrsetbits_be32(immap + MPC83XX_SICRH_OFFS,
+ MPC8308_SICRH_USB_MASK,
+ MPC8308_SICRH_USB_ULPI);
+ } else if (of_device_is_compatible(immr_node, "fsl,mpc8315-immr")) {
+ clrsetbits_be32(immap + MPC83XX_SICRL_OFFS,
+ MPC8315_SICRL_USB_MASK,
+ MPC8315_SICRL_USB_ULPI);
+ clrsetbits_be32(immap + MPC83XX_SICRH_OFFS,
+ MPC8315_SICRH_USB_MASK,
+ MPC8315_SICRH_USB_ULPI);
+ } else {
+ clrsetbits_be32(immap + MPC83XX_SICRL_OFFS,
+ MPC831X_SICRL_USB_MASK,
+ MPC831X_SICRL_USB_ULPI);
+ clrsetbits_be32(immap + MPC83XX_SICRH_OFFS,
+ MPC831X_SICRH_USB_MASK,
+ MPC831X_SICRH_USB_ULPI);
+ }
+ }
+
+ iounmap(immap);
+
+ of_node_put(immr_node);
+
+ /* Map USB SOC space */
+ ret = of_address_to_resource(np, 0, &res);
+ if (ret) {
+ of_node_put(np);
+ return ret;
+ }
+ usb_regs = ioremap(res.start, resource_size(&res));
+
+ /* Using on-chip PHY */
+ if (prop && (!strcmp(prop, "utmi_wide") || !strcmp(prop, "utmi"))) {
+ u32 refsel;
+
+ if (of_device_is_compatible(immr_node, "fsl,mpc8308-immr"))
+ goto out;
+
+ if (of_device_is_compatible(immr_node, "fsl,mpc8315-immr"))
+ refsel = CONTROL_REFSEL_24MHZ;
+ else
+ refsel = CONTROL_REFSEL_48MHZ;
+ /* Set UTMI_PHY_EN and REFSEL */
+ out_be32(usb_regs + FSL_USB2_CONTROL_OFFS,
+ CONTROL_UTMI_PHY_EN | refsel);
+ /* Using external UPLI PHY */
+ } else if (prop && !strcmp(prop, "ulpi")) {
+ /* Set PHY_CLK_SEL to ULPI */
+ temp = CONTROL_PHY_CLK_SEL_ULPI;
+#ifdef CONFIG_USB_OTG
+ /* Set OTG_PORT */
+ if (!of_device_is_compatible(immr_node, "fsl,mpc8308-immr")) {
+ dr_mode = of_get_property(np, "dr_mode", NULL);
+ if (dr_mode && !strcmp(dr_mode, "otg"))
+ temp |= CONTROL_OTG_PORT;
+ }
+#endif /* CONFIG_USB_OTG */
+ out_be32(usb_regs + FSL_USB2_CONTROL_OFFS, temp);
+ } else {
+ pr_warn("831x USB PHY type not supported\n");
+ ret = -EINVAL;
+ }
+
+out:
+ iounmap(usb_regs);
+ of_node_put(np);
+ return ret;
+}
diff --git a/arch/powerpc/platforms/83xx/usb_834x.c b/arch/powerpc/platforms/83xx/usb_834x.c
new file mode 100644
index 000000000000..3a8d6c662d06
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/usb_834x.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Freescale 83xx USB SOC setup code
+ *
+ * Copyright (C) 2007 Freescale Semiconductor, Inc.
+ * Author: Li Yang
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/io.h>
+
+#include <sysdev/fsl_soc.h>
+
+#include "mpc83xx.h"
+
+int __init mpc834x_usb_cfg(void)
+{
+ unsigned long sccr, sicrl, sicrh;
+ void __iomem *immap;
+ struct device_node *np = NULL;
+ int port0_is_dr = 0, port1_is_dr = 0;
+ const void *prop, *dr_mode;
+
+ immap = ioremap(get_immrbase(), 0x1000);
+ if (!immap)
+ return -ENOMEM;
+
+ /* Read registers */
+ /* Note: DR and MPH must use the same clock setting in SCCR */
+ sccr = in_be32(immap + MPC83XX_SCCR_OFFS) & ~MPC83XX_SCCR_USB_MASK;
+ sicrl = in_be32(immap + MPC83XX_SICRL_OFFS) & ~MPC834X_SICRL_USB_MASK;
+ sicrh = in_be32(immap + MPC83XX_SICRH_OFFS) & ~MPC834X_SICRH_USB_UTMI;
+
+ np = of_find_compatible_node(NULL, NULL, "fsl-usb2-dr");
+ if (np) {
+ sccr |= MPC83XX_SCCR_USB_DRCM_11; /* 1:3 */
+
+ prop = of_get_property(np, "phy_type", NULL);
+ port1_is_dr = 1;
+ if (prop &&
+ (!strcmp(prop, "utmi") || !strcmp(prop, "utmi_wide"))) {
+ sicrl |= MPC834X_SICRL_USB0 | MPC834X_SICRL_USB1;
+ sicrh |= MPC834X_SICRH_USB_UTMI;
+ port0_is_dr = 1;
+ } else if (prop && !strcmp(prop, "serial")) {
+ dr_mode = of_get_property(np, "dr_mode", NULL);
+ if (dr_mode && !strcmp(dr_mode, "otg")) {
+ sicrl |= MPC834X_SICRL_USB0 | MPC834X_SICRL_USB1;
+ port0_is_dr = 1;
+ } else {
+ sicrl |= MPC834X_SICRL_USB1;
+ }
+ } else if (prop && !strcmp(prop, "ulpi")) {
+ sicrl |= MPC834X_SICRL_USB1;
+ } else {
+ pr_warn("834x USB PHY type not supported\n");
+ }
+ of_node_put(np);
+ }
+ np = of_find_compatible_node(NULL, NULL, "fsl-usb2-mph");
+ if (np) {
+ sccr |= MPC83XX_SCCR_USB_MPHCM_11; /* 1:3 */
+
+ prop = of_get_property(np, "port0", NULL);
+ if (prop) {
+ if (port0_is_dr)
+ pr_warn("834x USB port0 can't be used by both DR and MPH!\n");
+ sicrl &= ~MPC834X_SICRL_USB0;
+ }
+ prop = of_get_property(np, "port1", NULL);
+ if (prop) {
+ if (port1_is_dr)
+ pr_warn("834x USB port1 can't be used by both DR and MPH!\n");
+ sicrl &= ~MPC834X_SICRL_USB1;
+ }
+ of_node_put(np);
+ }
+
+ /* Write back */
+ out_be32(immap + MPC83XX_SCCR_OFFS, sccr);
+ out_be32(immap + MPC83XX_SICRL_OFFS, sicrl);
+ out_be32(immap + MPC83XX_SICRH_OFFS, sicrh);
+
+ iounmap(immap);
+ return 0;
+}
diff --git a/arch/powerpc/platforms/83xx/usb_837x.c b/arch/powerpc/platforms/83xx/usb_837x.c
new file mode 100644
index 000000000000..726935bb6e2d
--- /dev/null
+++ b/arch/powerpc/platforms/83xx/usb_837x.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Freescale 83xx USB SOC setup code
+ *
+ * Copyright (C) 2007 Freescale Semiconductor, Inc.
+ * Author: Li Yang
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/io.h>
+
+#include <sysdev/fsl_soc.h>
+
+#include "mpc83xx.h"
+
+int __init mpc837x_usb_cfg(void)
+{
+ void __iomem *immap;
+ struct device_node *np = NULL;
+ const void *prop;
+ int ret = 0;
+
+ np = of_find_compatible_node(NULL, NULL, "fsl-usb2-dr");
+ if (!np || !of_device_is_available(np)) {
+ of_node_put(np);
+ return -ENODEV;
+ }
+ prop = of_get_property(np, "phy_type", NULL);
+
+ if (!prop || (strcmp(prop, "ulpi") && strcmp(prop, "serial"))) {
+ pr_warn("837x USB PHY type not supported\n");
+ of_node_put(np);
+ return -EINVAL;
+ }
+
+ /* Map IMMR space for pin and clock settings */
+ immap = ioremap(get_immrbase(), 0x1000);
+ if (!immap) {
+ of_node_put(np);
+ return -ENOMEM;
+ }
+
+ /* Configure clock */
+ clrsetbits_be32(immap + MPC83XX_SCCR_OFFS, MPC837X_SCCR_USB_DRCM_11,
+ MPC837X_SCCR_USB_DRCM_11);
+
+ /* Configure pin mux for ULPI/serial */
+ clrsetbits_be32(immap + MPC83XX_SICRL_OFFS, MPC837X_SICRL_USB_MASK,
+ MPC837X_SICRL_USB_ULPI);
+
+ iounmap(immap);
+ of_node_put(np);
+ return ret;
+}
diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig
index 0c1e6903597e..604c1b4b6d45 100644
--- a/arch/powerpc/platforms/85xx/Kconfig
+++ b/arch/powerpc/platforms/85xx/Kconfig
@@ -1,28 +1,21 @@
+# SPDX-License-Identifier: GPL-2.0
menuconfig FSL_SOC_BOOKE
bool "Freescale Book-E Machine Type"
- depends on PPC_85xx || PPC_BOOK3E
+ depends on PPC_E500
select FSL_SOC
select PPC_UDBG_16550
select MPIC
- select PPC_PCI_CHOICE
+ select HAVE_PCI
select FSL_PCI if PCI
select SERIAL_8250_EXTENDED if SERIAL_8250
select SERIAL_8250_SHARE_IRQ if SERIAL_8250
+ select FSL_CORENET_RCPM if PPC_E500MC
default y
if FSL_SOC_BOOKE
if PPC32
-config FSL_85XX_CACHE_SRAM
- bool
- select PPC_LIB_RHEAP
- help
- When selected, this option enables cache-sram support
- for memory allocation on P1/P2 QorIQ platforms.
- cache-sram-size and cache-sram-offset kernel boot
- parameters should be passed when this option is enabled.
-
config BSC9131_RDB
bool "Freescale BSC9131RDB"
select DEFAULT_UIMAGE
@@ -47,35 +40,14 @@ config BSC9132_QDS
and dual StarCore SC3850 DSP cores.
Manufacturer : Freescale Semiconductor, Inc
-config MPC8540_ADS
- bool "Freescale MPC8540 ADS"
- select DEFAULT_UIMAGE
- help
- This option enables support for the MPC 8540 ADS board
-
-config MPC8560_ADS
- bool "Freescale MPC8560 ADS"
- select DEFAULT_UIMAGE
- select CPM2
- help
- This option enables support for the MPC 8560 ADS board
-
-config MPC85xx_CDS
- bool "Freescale MPC85xx CDS"
- select DEFAULT_UIMAGE
- select PPC_I8259
- select HAS_RAPIDIO
- help
- This option enables support for the MPC85xx CDS board
-
config MPC85xx_MDS
- bool "Freescale MPC85xx MDS"
+ bool "Freescale MPC8568 MDS / MPC8569 MDS / P1021 MDS"
select DEFAULT_UIMAGE
- select PHYLIB
- select HAS_RAPIDIO
+ select PHYLIB if NETDEVICES
+ select HAVE_RAPIDIO
select SWIOTLB
help
- This option enables support for the MPC85xx MDS board
+ This option enables support for the MPC8568 MDS, MPC8569 MDS and P1021 MDS boards
config MPC8536_DS
bool "Freescale MPC8536 DS"
@@ -85,28 +57,43 @@ config MPC8536_DS
This option enables support for the MPC8536 DS board
config MPC85xx_DS
- bool "Freescale MPC85xx DS"
+ bool "Freescale MPC8544 DS / MPC8572 DS"
select PPC_I8259
select DEFAULT_UIMAGE
select FSL_ULI1575 if PCI
select SWIOTLB
help
- This option enables support for the MPC85xx DS (MPC8544 DS) board
+ This option enables support for the MPC8544 DS and MPC8572 DS boards
config MPC85xx_RDB
- bool "Freescale MPC85xx RDB"
+ bool "Freescale P102x MBG/UTM/RDB"
select PPC_I8259
select DEFAULT_UIMAGE
- select FSL_ULI1575 if PCI
select SWIOTLB
help
- This option enables support for the MPC85xx RDB (P2020 RDB) board
+ This option enables support for the P1020 MBG PC, P1020 UTM PC,
+ P1020 RDB PC, P1020 RDB PD, P1020 RDB, P1021 RDB PC, P1024 RDB,
+ and P1025 RDB boards
+
+config PPC_P2020
+ bool "Freescale P2020"
+ default y if MPC85xx_DS || MPC85xx_RDB
+ select DEFAULT_UIMAGE
+ select SWIOTLB
+ imply PPC_I8259
+ imply FSL_ULI1575 if PCI
+ help
+ This option enables generic unified support for any board with the
+ Freescale P2020 processor.
+
+ For example: P2020 DS board, P2020 RDB board, P2020 RDB PC board or
+ CZ.NIC Turris 1.x boards.
config P1010_RDB
- bool "Freescale P1010RDB"
+ bool "Freescale P1010 RDB"
select DEFAULT_UIMAGE
help
- This option enables support for the MPC85xx RDB (P1010 RDB) board
+ This option enables support for the P1010 RDB board
P1010RDB contains P1010Si, which provides CPU performance up to 800
MHz and 1600 DMIPS, additional functionality and faster interfaces
@@ -145,10 +132,10 @@ config SOCRATES
This option enables support for the Socrates board.
config KSI8560
- bool "Emerson KSI8560"
- select DEFAULT_UIMAGE
- help
- This option enables support for the Emerson KSI8560 board
+ bool "Emerson KSI8560"
+ select DEFAULT_UIMAGE
+ help
+ This option enables support for the Emerson KSI8560 board
config XES_MPC85xx
bool "X-ES single-board computer"
@@ -158,7 +145,7 @@ config XES_MPC85xx
computers from Extreme Engineering Solutions (X-ES) based on
Freescale MPC85xx processors.
Manufacturer: Extreme Engineering Solutions, Inc.
- URL: <http://www.xes-inc.com/>
+ URL: <https://www.xes-inc.com/>
config STX_GP3
bool "Silicon Turnkey Express GP3"
@@ -206,25 +193,19 @@ config TQM8560
select TQM85xx
select CPM2
-config SBC8548
- bool "Wind River SBC8548"
- select DEFAULT_UIMAGE
- help
- This option enables support for the Wind River SBC8548 board
-
config PPA8548
bool "Prodrive PPA8548"
help
This option enables support for the Prodrive PPA8548 board.
select DEFAULT_UIMAGE
- select HAS_RAPIDIO
+ select HAVE_RAPIDIO
config GE_IMP3A
bool "GE Intelligent Platforms IMP3A"
select DEFAULT_UIMAGE
select SWIOTLB
select MMIO_NVRAM
- select ARCH_REQUIRE_GPIOLIB
+ select GPIOLIB
select GE_FPGA
help
This option enables support for the GE Intelligent Platforms IMP3A
@@ -241,6 +222,12 @@ config SGY_CTS1000
help
Enable this to support functionality in Servergy's CTS-1000 systems.
+config MVME2500
+ bool "Artesyn MVME2500"
+ select DEFAULT_UIMAGE
+ help
+ This option enables support for the Emerson/Artesyn MVME2500 board.
+
endif # PPC32
config PPC_QEMU_E500
@@ -261,13 +248,12 @@ config PPC_QEMU_E500
config CORENET_GENERIC
bool "Freescale CoreNet Generic"
select DEFAULT_UIMAGE
- select E500
select PPC_E500MC
select PHYS_64BIT
select SWIOTLB
- select ARCH_REQUIRE_GPIOLIB
+ select GPIOLIB
select GPIO_MPC8XXX
- select HAS_RAPIDIO
+ select HAVE_RAPIDIO
select PPC_EPAPR_HV_PIC
help
This option enables support for the FSL CoreNet based boards.
@@ -276,7 +262,7 @@ config CORENET_GENERIC
For 64bit kernel, the following boards are supported:
T208x QDS/RDB, T4240 QDS/RDB and B4 QDS
The following boards are supported for both 32bit and 64bit kernel:
- P5020 DS, P5040 DS and T104xQDS
+ P5020 DS, P5040 DS, T102x QDS/RDB, T104x QDS/RDB
endif # FSL_SOC_BOOKE
diff --git a/arch/powerpc/platforms/85xx/Makefile b/arch/powerpc/platforms/85xx/Makefile
index 730326046625..43c34f26f108 100644
--- a/arch/powerpc/platforms/85xx/Makefile
+++ b/arch/powerpc/platforms/85xx/Makefile
@@ -1,29 +1,32 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the PowerPC 85xx linux kernel.
#
obj-$(CONFIG_SMP) += smp.o
+ifneq ($(CONFIG_FSL_CORENET_RCPM),y)
+obj-$(CONFIG_SMP) += mpc85xx_pm_ops.o
+endif
obj-y += common.o
obj-$(CONFIG_BSC9131_RDB) += bsc913x_rdb.o
obj-$(CONFIG_BSC9132_QDS) += bsc913x_qds.o
obj-$(CONFIG_C293_PCIE) += c293pcie.o
-obj-$(CONFIG_MPC8540_ADS) += mpc85xx_ads.o
-obj-$(CONFIG_MPC8560_ADS) += mpc85xx_ads.o
-obj-$(CONFIG_MPC85xx_CDS) += mpc85xx_cds.o
obj-$(CONFIG_MPC8536_DS) += mpc8536_ds.o
-obj-$(CONFIG_MPC85xx_DS) += mpc85xx_ds.o
+obj8259-$(CONFIG_PPC_I8259) += mpc85xx_8259.o
+obj-$(CONFIG_MPC85xx_DS) += mpc85xx_ds.o $(obj8259-y)
obj-$(CONFIG_MPC85xx_MDS) += mpc85xx_mds.o
obj-$(CONFIG_MPC85xx_RDB) += mpc85xx_rdb.o
obj-$(CONFIG_P1010_RDB) += p1010rdb.o
obj-$(CONFIG_P1022_DS) += p1022_ds.o
obj-$(CONFIG_P1022_RDK) += p1022_rdk.o
obj-$(CONFIG_P1023_RDB) += p1023_rdb.o
+obj-$(CONFIG_PPC_P2020) += p2020.o $(obj8259-y)
obj-$(CONFIG_TWR_P102x) += twr_p102x.o
obj-$(CONFIG_CORENET_GENERIC) += corenet_generic.o
+obj-$(CONFIG_FB_FSL_DIU) += t1042rdb_diu.o
obj-$(CONFIG_STX_GP3) += stx_gp3.o
obj-$(CONFIG_TQM85xx) += tqm85xx.o
-obj-$(CONFIG_SBC8548) += sbc8548.o
obj-$(CONFIG_PPA8548) += ppa8548.o
obj-$(CONFIG_SOCRATES) += socrates.o socrates_fpga_pic.o
obj-$(CONFIG_KSI8560) += ksi8560.o
@@ -31,3 +34,4 @@ obj-$(CONFIG_XES_MPC85xx) += xes_mpc85xx.o
obj-$(CONFIG_GE_IMP3A) += ge_imp3a.o
obj-$(CONFIG_PPC_QEMU_E500) += qemu_e500.o
obj-$(CONFIG_SGY_CTS1000) += sgy_cts1000.o
+obj-$(CONFIG_MVME2500) += mvme2500.o
diff --git a/arch/powerpc/platforms/85xx/bsc913x_qds.c b/arch/powerpc/platforms/85xx/bsc913x_qds.c
index f0927e58af25..3ad8096fcf16 100644
--- a/arch/powerpc/platforms/85xx/bsc913x_qds.c
+++ b/arch/powerpc/platforms/85xx/bsc913x_qds.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* BSC913xQDS Board Setup
*
@@ -6,23 +7,19 @@
* Priyanka Jain <Priyanka.Jain@freescale.com>
*
* Copyright 2014 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
-#include <linux/of_platform.h>
+#include <linux/of.h>
#include <linux/pci.h>
#include <asm/mpic.h>
#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
#include <asm/udbg.h>
#include "mpc85xx.h"
#include "smp.h"
-void __init bsc913x_qds_pic_init(void)
+static void __init bsc913x_qds_pic_init(void)
{
struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
MPIC_SINGLE_DEST_CPU,
@@ -46,29 +43,21 @@ static void __init bsc913x_qds_setup_arch(void)
mpc85xx_smp_init();
#endif
+ fsl_pci_assign_primary();
+
pr_info("bsc913x board from Freescale Semiconductor\n");
}
-machine_device_initcall(bsc9132_qds, mpc85xx_common_publish_devices);
-
-/*
- * Called very early, device-tree isn't unflattened
- */
-
-static int __init bsc9132_qds_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- return of_flat_dt_is_compatible(root, "fsl,bsc9132qds");
-}
+machine_arch_initcall(bsc9132_qds, mpc85xx_common_publish_devices);
define_machine(bsc9132_qds) {
.name = "BSC9132 QDS",
- .probe = bsc9132_qds_probe,
+ .compatible = "fsl,bsc9132qds",
.setup_arch = bsc913x_qds_setup_arch,
.init_IRQ = bsc913x_qds_pic_init,
+#ifdef CONFIG_PCI
+ .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
+#endif
.get_irq = mpic_get_irq,
- .restart = fsl_rstcr_restart,
- .calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
};
diff --git a/arch/powerpc/platforms/85xx/bsc913x_rdb.c b/arch/powerpc/platforms/85xx/bsc913x_rdb.c
index 9d57bedb940c..dcd358c28201 100644
--- a/arch/powerpc/platforms/85xx/bsc913x_rdb.c
+++ b/arch/powerpc/platforms/85xx/bsc913x_rdb.c
@@ -1,17 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* BSC913xRDB Board Setup
*
* Author: Priyanka Jain <Priyanka.Jain@freescale.com>
*
* Copyright 2011-2012 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
-#include <linux/of_platform.h>
+#include <linux/of.h>
#include <linux/pci.h>
#include <asm/mpic.h>
#include <sysdev/fsl_soc.h>
@@ -19,7 +15,7 @@
#include "mpc85xx.h"
-void __init bsc913x_rdb_pic_init(void)
+static void __init bsc913x_rdb_pic_init(void)
{
struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
MPIC_SINGLE_DEST_CPU,
@@ -44,24 +40,11 @@ static void __init bsc913x_rdb_setup_arch(void)
machine_device_initcall(bsc9131_rdb, mpc85xx_common_publish_devices);
-/*
- * Called very early, device-tree isn't unflattened
- */
-
-static int __init bsc9131_rdb_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- return of_flat_dt_is_compatible(root, "fsl,bsc9131rdb");
-}
-
define_machine(bsc9131_rdb) {
.name = "BSC9131 RDB",
- .probe = bsc9131_rdb_probe,
+ .compatible = "fsl,bsc9131rdb",
.setup_arch = bsc913x_rdb_setup_arch,
.init_IRQ = bsc913x_rdb_pic_init,
.get_irq = mpic_get_irq,
- .restart = fsl_rstcr_restart,
- .calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
};
diff --git a/arch/powerpc/platforms/85xx/c293pcie.c b/arch/powerpc/platforms/85xx/c293pcie.c
index 84476b646005..7a63a3ad5e8a 100644
--- a/arch/powerpc/platforms/85xx/c293pcie.c
+++ b/arch/powerpc/platforms/85xx/c293pcie.c
@@ -1,18 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* C293PCIE Board Setup
*
* Copyright 2013 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#include <linux/stddef.h>
#include <linux/kernel.h>
-#include <linux/of_fdt.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
#include <asm/machdep.h>
#include <asm/udbg.h>
@@ -23,7 +18,7 @@
#include "mpc85xx.h"
-void __init c293_pcie_pic_init(void)
+static void __init c293_pcie_pic_init(void)
{
struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
MPIC_SINGLE_DEST_CPU, 0, 256, " OpenPIC ");
@@ -49,29 +44,11 @@ static void __init c293_pcie_setup_arch(void)
machine_arch_initcall(c293_pcie, mpc85xx_common_publish_devices);
-/*
- * Called very early, device-tree isn't unflattened
- */
-static int __init c293_pcie_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- if (of_flat_dt_is_compatible(root, "fsl,C293PCIE"))
- return 1;
- return 0;
-}
-
define_machine(c293_pcie) {
.name = "C293 PCIE",
- .probe = c293_pcie_probe,
+ .compatible = "fsl,C293PCIE",
.setup_arch = c293_pcie_setup_arch,
.init_IRQ = c293_pcie_pic_init,
-#ifdef CONFIG_PCI
- .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
- .pcibios_fixup_phb = fsl_pcibios_fixup_phb,
-#endif
.get_irq = mpic_get_irq,
- .restart = fsl_rstcr_restart,
- .calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
};
diff --git a/arch/powerpc/platforms/85xx/common.c b/arch/powerpc/platforms/85xx/common.c
index b564b5e23f7c..757811155587 100644
--- a/arch/powerpc/platforms/85xx/common.c
+++ b/arch/powerpc/platforms/85xx/common.c
@@ -1,20 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Routines common to most mpc85xx-based boards.
- *
- * This is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
+#include <linux/of.h>
#include <linux/of_irq.h>
#include <linux/of_platform.h>
-#include <asm/qe.h>
+#include <asm/fsl_pm.h>
+#include <soc/fsl/qe/qe.h>
#include <sysdev/cpm2_pic.h>
#include "mpc85xx.h"
-static struct of_device_id __initdata mpc85xx_common_ids[] = {
+const struct fsl_pm_ops *qoriq_pm_ops;
+
+static const struct of_device_id mpc85xx_common_ids[] __initconst = {
{ .type = "soc", },
{ .compatible = "soc", },
{ .compatible = "simple-bus", },
@@ -40,6 +41,7 @@ static struct of_device_id __initdata mpc85xx_common_ids[] = {
{ .compatible = "fsl,qoriq-pcie-v2.4", },
{ .compatible = "fsl,qoriq-pcie-v2.3", },
{ .compatible = "fsl,qoriq-pcie-v2.2", },
+ { .compatible = "fsl,fman", },
{},
};
@@ -48,7 +50,7 @@ int __init mpc85xx_common_publish_devices(void)
return of_platform_bus_probe(NULL, mpc85xx_common_ids, NULL);
}
#ifdef CONFIG_CPM2
-static void cpm2_cascade(unsigned int irq, struct irq_desc *desc)
+static void cpm2_cascade(struct irq_desc *desc)
{
struct irq_chip *chip = irq_desc_get_chip(desc);
int cascade_irq;
@@ -72,7 +74,7 @@ void __init mpc85xx_cpm2_pic_init(void)
return;
}
irq = irq_of_parse_and_map(np, 0);
- if (irq == NO_IRQ) {
+ if (!irq) {
of_node_put(np);
printk(KERN_ERR "PIC init: got no IRQ for cpm cascade\n");
return;
@@ -85,30 +87,6 @@ void __init mpc85xx_cpm2_pic_init(void)
#endif
#ifdef CONFIG_QUICC_ENGINE
-void __init mpc85xx_qe_init(void)
-{
- struct device_node *np;
-
- np = of_find_compatible_node(NULL, NULL, "fsl,qe");
- if (!np) {
- np = of_find_node_by_name(NULL, "qe");
- if (!np) {
- pr_err("%s: Could not find Quicc Engine node\n",
- __func__);
- return;
- }
- }
-
- if (!of_device_is_available(np)) {
- of_node_put(np);
- return;
- }
-
- qe_reset();
- of_node_put(np);
-
-}
-
void __init mpc85xx_qe_par_io_init(void)
{
struct device_node *np;
diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c
index d22dd85e50bf..c44400e95f55 100644
--- a/arch/powerpc/platforms/85xx/corenet_generic.c
+++ b/arch/powerpc/platforms/85xx/corenet_generic.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Corenet based SoC DS Setup
*
* Maintained by Kumar Gala (see MAINTAINERS for contact information)
*
* Copyright 2009-2011 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#include <linux/kernel.h>
@@ -16,17 +12,17 @@
#include <linux/kdev_t.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
+#include <linux/pgtable.h>
#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/pci-bridge.h>
#include <asm/ppc-pci.h>
#include <mm/mmu_decl.h>
-#include <asm/prom.h>
#include <asm/udbg.h>
#include <asm/mpic.h>
#include <asm/ehv_pic.h>
-#include <asm/qe_ic.h>
+#include <asm/swiotlb.h>
#include <linux/of_platform.h>
#include <sysdev/fsl_soc.h>
@@ -34,42 +30,31 @@
#include "smp.h"
#include "mpc85xx.h"
-void __init corenet_gen_pic_init(void)
+static void __init corenet_gen_pic_init(void)
{
struct mpic *mpic;
unsigned int flags = MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU |
MPIC_NO_RESET;
- struct device_node *np;
-
- if (ppc_md.get_irq == mpic_get_coreint_irq)
+ if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) && !IS_ENABLED(CONFIG_KEXEC_CORE))
flags |= MPIC_ENABLE_COREINT;
mpic = mpic_alloc(NULL, 0, flags, 0, 512, " OpenPIC ");
BUG_ON(mpic == NULL);
mpic_init(mpic);
-
- np = of_find_compatible_node(NULL, NULL, "fsl,qe-ic");
- if (np) {
- qe_ic_init(np, 0, qe_ic_cascade_low_mpic,
- qe_ic_cascade_high_mpic);
- of_node_put(np);
- }
}
/*
* Setup the architecture
*/
-void __init corenet_gen_setup_arch(void)
+static void __init corenet_gen_setup_arch(void)
{
mpc85xx_smp_init();
swiotlb_detect_4g();
pr_info("%s board\n", ppc_md.name);
-
- mpc85xx_qe_init();
}
static const struct of_device_id of_device_ids[] = {
@@ -77,6 +62,15 @@ static const struct of_device_id of_device_ids[] = {
.compatible = "simple-bus"
},
{
+ .compatible = "mdio-mux-gpio"
+ },
+ {
+ .compatible = "fsl,fpga-ngpixis"
+ },
+ {
+ .compatible = "fsl,fpga-qixis"
+ },
+ {
.compatible = "fsl,srio",
},
{
@@ -107,10 +101,11 @@ static const struct of_device_id of_device_ids[] = {
{}
};
-int __init corenet_gen_publish_devices(void)
+static int __init corenet_gen_publish_devices(void)
{
return of_platform_bus_probe(NULL, of_device_ids, NULL);
}
+machine_arch_initcall(corenet_generic, corenet_gen_publish_devices);
static const char * const boards[] __initconst = {
"fsl,P2041RDB",
@@ -127,9 +122,19 @@ static const char * const boards[] __initconst = {
"fsl,B4860QDS",
"fsl,B4420QDS",
"fsl,B4220QDS",
+ "fsl,T1023RDB",
+ "fsl,T1024QDS",
+ "fsl,T1024RDB",
+ "fsl,T1040D4RDB",
+ "fsl,T1042D4RDB",
"fsl,T1040QDS",
"fsl,T1042QDS",
+ "fsl,T1040RDB",
+ "fsl,T1042RDB",
+ "fsl,T1042RDB_PI",
+ "keymile,kmcent2",
"keymile,kmcoge4",
+ "varisys,CYRUS",
NULL
};
@@ -138,25 +143,24 @@ static const char * const boards[] __initconst = {
*/
static int __init corenet_generic_probe(void)
{
- unsigned long root = of_get_flat_dt_root();
char hv_compat[24];
int i;
#ifdef CONFIG_SMP
extern struct smp_ops_t smp_85xx_ops;
#endif
- if (of_flat_dt_match(root, boards))
+ if (of_machine_compatible_match(boards))
return 1;
/* Check if we're running under the Freescale hypervisor */
for (i = 0; boards[i]; i++) {
snprintf(hv_compat, sizeof(hv_compat), "%s-hv", boards[i]);
- if (of_flat_dt_is_compatible(root, hv_compat)) {
+ if (of_machine_is_compatible(hv_compat)) {
ppc_md.init_IRQ = ehv_pic_init;
ppc_md.get_irq = ehv_pic_get_irq;
ppc_md.restart = fsl_hv_restart;
- ppc_md.power_off = fsl_hv_halt;
+ pm_power_off = fsl_hv_halt;
ppc_md.halt = fsl_hv_halt;
#ifdef CONFIG_SMP
/*
@@ -183,19 +187,17 @@ define_machine(corenet_generic) {
.pcibios_fixup_bus = fsl_pcibios_fixup_bus,
.pcibios_fixup_phb = fsl_pcibios_fixup_phb,
#endif
+/*
+ * Core reset may cause issues if using the proxy mode of MPIC.
+ * So, use the mixed mode of MPIC if enabling CPU hotplug.
+ *
+ * Likewise, problems have been seen with kexec when coreint is enabled.
+ */
+#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC_CORE)
+ .get_irq = mpic_get_irq,
+#else
.get_irq = mpic_get_coreint_irq,
- .restart = fsl_rstcr_restart,
- .calibrate_decr = generic_calibrate_decr,
+#endif
.progress = udbg_progress,
-#ifdef CONFIG_PPC64
- .power_save = book3e_idle,
-#else
.power_save = e500_idle,
-#endif
};
-
-machine_arch_initcall(corenet_generic, corenet_gen_publish_devices);
-
-#ifdef CONFIG_SWIOTLB
-machine_arch_initcall(corenet_generic, swiotlb_setup_bus_notifier);
-#endif
diff --git a/arch/powerpc/platforms/85xx/ge_imp3a.c b/arch/powerpc/platforms/85xx/ge_imp3a.c
index 11790e074c8a..477852f1a726 100644
--- a/arch/powerpc/platforms/85xx/ge_imp3a.c
+++ b/arch/powerpc/platforms/85xx/ge_imp3a.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* GE IMP3A Board Setup
*
@@ -5,11 +6,6 @@
*
* Copyright 2010 GE Intelligent Platforms Embedded Systems, Inc.
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
* Based on: mpc85xx_ds.c (MPC85xx DS Board Setup)
* Copyright 2007 Freescale Semiconductor Inc.
*/
@@ -21,13 +17,13 @@
#include <linux/delay.h>
#include <linux/seq_file.h>
#include <linux/interrupt.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/pci-bridge.h>
#include <mm/mmu_decl.h>
-#include <asm/prom.h>
#include <asm/udbg.h>
#include <asm/mpic.h>
#include <asm/swiotlb.h>
@@ -42,14 +38,13 @@
void __iomem *imp3a_regs;
-void __init ge_imp3a_pic_init(void)
+static void __init ge_imp3a_pic_init(void)
{
struct mpic *mpic;
struct device_node *np;
struct device_node *cascade_node = NULL;
- unsigned long root = of_get_flat_dt_root();
- if (of_flat_dt_is_compatible(root, "fsl,MPC8572DS-CAMP")) {
+ if (of_machine_is_compatible("fsl,MPC8572DS-CAMP")) {
mpic = mpic_alloc(NULL, 0,
MPIC_NO_RESET |
MPIC_BIG_ENDIAN |
@@ -83,7 +78,7 @@ void __init ge_imp3a_pic_init(void)
of_node_put(cascade_node);
}
-static void ge_imp3a_pci_assign_primary(void)
+static void __init ge_imp3a_pci_assign_primary(void)
{
#ifdef CONFIG_PCI
struct device_node *np;
@@ -94,8 +89,10 @@ static void ge_imp3a_pci_assign_primary(void)
of_device_is_compatible(np, "fsl,mpc8548-pcie") ||
of_device_is_compatible(np, "fsl,p2020-pcie")) {
of_address_to_resource(np, 0, &rsrc);
- if ((rsrc.start & 0xfffff) == 0x9000)
- fsl_pci_primary = np;
+ if ((rsrc.start & 0xfffff) == 0x9000) {
+ of_node_put(fsl_pci_primary);
+ fsl_pci_primary = of_node_get(np);
+ }
}
}
#endif
@@ -193,23 +190,11 @@ static void ge_imp3a_show_cpuinfo(struct seq_file *m)
ge_imp3a_get_cpci_is_syscon() ? "yes" : "no");
}
-/*
- * Called very early, device-tree isn't unflattened
- */
-static int __init ge_imp3a_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- return of_flat_dt_is_compatible(root, "ge,IMP3A");
-}
-
machine_arch_initcall(ge_imp3a, mpc85xx_common_publish_devices);
-machine_arch_initcall(ge_imp3a, swiotlb_setup_bus_notifier);
-
define_machine(ge_imp3a) {
.name = "GE_IMP3A",
- .probe = ge_imp3a_probe,
+ .compatible = "ge,IMP3A",
.setup_arch = ge_imp3a_setup_arch,
.init_IRQ = ge_imp3a_pic_init,
.show_cpuinfo = ge_imp3a_show_cpuinfo,
@@ -218,7 +203,5 @@ define_machine(ge_imp3a) {
.pcibios_fixup_phb = fsl_pcibios_fixup_phb,
#endif
.get_irq = mpic_get_irq,
- .restart = fsl_rstcr_restart,
- .calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
};
diff --git a/arch/powerpc/platforms/85xx/ksi8560.c b/arch/powerpc/platforms/85xx/ksi8560.c
index 3dc1bda3ddc3..1b6326a4b0f2 100644
--- a/arch/powerpc/platforms/85xx/ksi8560.c
+++ b/arch/powerpc/platforms/85xx/ksi8560.c
@@ -18,7 +18,8 @@
#include <linux/kdev_t.h>
#include <linux/delay.h>
#include <linux/seq_file.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
#include <asm/time.h>
#include <asm/machdep.h>
@@ -26,7 +27,6 @@
#include <asm/mpic.h>
#include <mm/mmu_decl.h>
#include <asm/udbg.h>
-#include <asm/prom.h>
#include <sysdev/fsl_soc.h>
#include <sysdev/fsl_pci.h>
@@ -44,7 +44,7 @@
static void __iomem *cpld_base = NULL;
-static void machine_restart(char *cmd)
+static void __noreturn machine_restart(char *cmd)
{
if (cpld_base)
out_8(cpld_base + KSI8560_CPLD_RCR1, KSI8560_CPLD_RCR1_CPUHR);
@@ -134,6 +134,8 @@ static void __init ksi8560_setup_arch(void)
else
printk(KERN_ERR "Can't find CPLD in device tree\n");
+ of_node_put(cpld);
+
if (ppc_md.progress)
ppc_md.progress("ksi8560_setup_arch()", 0);
@@ -171,23 +173,12 @@ static void ksi8560_show_cpuinfo(struct seq_file *m)
machine_device_initcall(ksi8560, mpc85xx_common_publish_devices);
-/*
- * Called very early, device-tree isn't unflattened
- */
-static int __init ksi8560_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- return of_flat_dt_is_compatible(root, "emerson,KSI8560");
-}
-
define_machine(ksi8560) {
.name = "KSI8560",
- .probe = ksi8560_probe,
+ .compatible = "emerson,KSI8560",
.setup_arch = ksi8560_setup_arch,
.init_IRQ = ksi8560_pic_init,
.show_cpuinfo = ksi8560_show_cpuinfo,
.get_irq = mpic_get_irq,
.restart = machine_restart,
- .calibrate_decr = generic_calibrate_decr,
};
diff --git a/arch/powerpc/platforms/85xx/mpc8536_ds.c b/arch/powerpc/platforms/85xx/mpc8536_ds.c
index a378ba3519e9..b3327a358eb4 100644
--- a/arch/powerpc/platforms/85xx/mpc8536_ds.c
+++ b/arch/powerpc/platforms/85xx/mpc8536_ds.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* MPC8536 DS Board Setup
*
* Copyright 2008 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#include <linux/stddef.h>
@@ -16,13 +12,12 @@
#include <linux/delay.h>
#include <linux/seq_file.h>
#include <linux/interrupt.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/pci-bridge.h>
#include <mm/mmu_decl.h>
-#include <asm/prom.h>
#include <asm/udbg.h>
#include <asm/mpic.h>
#include <asm/swiotlb.h>
@@ -32,7 +27,7 @@
#include "mpc85xx.h"
-void __init mpc8536_ds_pic_init(void)
+static void __init mpc8536_ds_pic_init(void)
{
struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN,
0, 256, " OpenPIC ");
@@ -57,21 +52,9 @@ static void __init mpc8536_ds_setup_arch(void)
machine_arch_initcall(mpc8536_ds, mpc85xx_common_publish_devices);
-machine_arch_initcall(mpc8536_ds, swiotlb_setup_bus_notifier);
-
-/*
- * Called very early, device-tree isn't unflattened
- */
-static int __init mpc8536_ds_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- return of_flat_dt_is_compatible(root, "fsl,mpc8536ds");
-}
-
define_machine(mpc8536_ds) {
.name = "MPC8536 DS",
- .probe = mpc8536_ds_probe,
+ .compatible = "fsl,mpc8536ds",
.setup_arch = mpc8536_ds_setup_arch,
.init_IRQ = mpc8536_ds_pic_init,
#ifdef CONFIG_PCI
@@ -79,7 +62,5 @@ define_machine(mpc8536_ds) {
.pcibios_fixup_phb = fsl_pcibios_fixup_phb,
#endif
.get_irq = mpic_get_irq,
- .restart = fsl_rstcr_restart,
- .calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
};
diff --git a/arch/powerpc/platforms/85xx/mpc85xx.h b/arch/powerpc/platforms/85xx/mpc85xx.h
index 39056f6befeb..c764d7551ef1 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx.h
+++ b/arch/powerpc/platforms/85xx/mpc85xx.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef MPC85xx_H
#define MPC85xx_H
extern int mpc85xx_common_publish_devices(void);
@@ -9,11 +10,15 @@ static inline void __init mpc85xx_cpm2_pic_init(void) {}
#endif /* CONFIG_CPM2 */
#ifdef CONFIG_QUICC_ENGINE
-extern void mpc85xx_qe_init(void);
extern void mpc85xx_qe_par_io_init(void);
#else
-static inline void __init mpc85xx_qe_init(void) {}
static inline void __init mpc85xx_qe_par_io_init(void) {}
#endif
+#ifdef CONFIG_PPC_I8259
+void __init mpc85xx_8259_init(void);
+#else
+static inline void __init mpc85xx_8259_init(void) {}
+#endif
+
#endif
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_8259.c b/arch/powerpc/platforms/85xx/mpc85xx_8259.c
new file mode 100644
index 000000000000..cb00d596ad80
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/mpc85xx_8259.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC85xx 8259 functions for DS Board Setup
+ *
+ * Author Xianghua Xiao (x.xiao@freescale.com)
+ * Roy Zang <tie-fei.zang@freescale.com>
+ * - Add PCI/PCI Express support
+ * Copyright 2007 Freescale Semiconductor Inc.
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+
+#include <asm/mpic.h>
+#include <asm/i8259.h>
+
+#include "mpc85xx.h"
+
+static void mpc85xx_8259_cascade(struct irq_desc *desc)
+{
+ struct irq_chip *chip = irq_desc_get_chip(desc);
+ unsigned int cascade_irq = i8259_irq();
+
+ if (cascade_irq)
+ generic_handle_irq(cascade_irq);
+
+ chip->irq_eoi(&desc->irq_data);
+}
+
+void __init mpc85xx_8259_init(void)
+{
+ struct device_node *np;
+ struct device_node *cascade_node = NULL;
+ int cascade_irq;
+
+ /* Initialize the i8259 controller */
+ for_each_node_by_type(np, "interrupt-controller") {
+ if (of_device_is_compatible(np, "chrp,iic")) {
+ cascade_node = np;
+ break;
+ }
+ }
+
+ if (cascade_node == NULL) {
+ pr_debug("i8259: Could not find i8259 PIC\n");
+ return;
+ }
+
+ cascade_irq = irq_of_parse_and_map(cascade_node, 0);
+ if (!cascade_irq) {
+ pr_err("i8259: Failed to map cascade interrupt\n");
+ return;
+ }
+
+ pr_debug("i8259: cascade mapped to irq %d\n", cascade_irq);
+
+ i8259_init(cascade_node, 0);
+ of_node_put(cascade_node);
+
+ irq_set_chained_handler(cascade_irq, mpc85xx_8259_cascade);
+}
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ads.c b/arch/powerpc/platforms/85xx/mpc85xx_ads.c
deleted file mode 100644
index 7d12a19aa7ee..000000000000
--- a/arch/powerpc/platforms/85xx/mpc85xx_ads.c
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * MPC85xx setup and early boot code plus other random bits.
- *
- * Maintained by Kumar Gala (see MAINTAINERS for contact information)
- *
- * Copyright 2005 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-#include <linux/stddef.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/kdev_t.h>
-#include <linux/delay.h>
-#include <linux/seq_file.h>
-#include <linux/of_platform.h>
-
-#include <asm/time.h>
-#include <asm/machdep.h>
-#include <asm/pci-bridge.h>
-#include <asm/mpic.h>
-#include <mm/mmu_decl.h>
-#include <asm/udbg.h>
-
-#include <sysdev/fsl_soc.h>
-#include <sysdev/fsl_pci.h>
-
-#ifdef CONFIG_CPM2
-#include <asm/cpm2.h>
-#include <sysdev/cpm2_pic.h>
-#endif
-
-#include "mpc85xx.h"
-
-#ifdef CONFIG_PCI
-static int mpc85xx_exclude_device(struct pci_controller *hose,
- u_char bus, u_char devfn)
-{
- if (bus == 0 && PCI_SLOT(devfn) == 0)
- return PCIBIOS_DEVICE_NOT_FOUND;
- else
- return PCIBIOS_SUCCESSFUL;
-}
-#endif /* CONFIG_PCI */
-
-static void __init mpc85xx_ads_pic_init(void)
-{
- struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN,
- 0, 256, " OpenPIC ");
- BUG_ON(mpic == NULL);
- mpic_init(mpic);
-
- mpc85xx_cpm2_pic_init();
-}
-
-/*
- * Setup the architecture
- */
-#ifdef CONFIG_CPM2
-struct cpm_pin {
- int port, pin, flags;
-};
-
-static const struct cpm_pin mpc8560_ads_pins[] = {
- /* SCC1 */
- {3, 29, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
- {3, 30, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
- {3, 31, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
-
- /* SCC2 */
- {2, 12, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {2, 13, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {3, 26, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
- {3, 27, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
- {3, 28, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
-
- /* FCC2 */
- {1, 18, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {1, 19, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {1, 20, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {1, 21, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {1, 22, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
- {1, 23, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
- {1, 24, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
- {1, 25, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
- {1, 26, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {1, 27, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {1, 28, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {1, 29, CPM_PIN_OUTPUT | CPM_PIN_SECONDARY},
- {1, 30, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {1, 31, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
- {2, 18, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, /* CLK14 */
- {2, 19, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, /* CLK13 */
-
- /* FCC3 */
- {1, 4, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
- {1, 5, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
- {1, 6, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
- {1, 8, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {1, 9, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {1, 10, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {1, 11, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {1, 12, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {1, 13, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {1, 14, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
- {1, 15, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
- {1, 16, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {1, 17, CPM_PIN_INPUT | CPM_PIN_PRIMARY},
- {2, 16, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, /* CLK16 */
- {2, 17, CPM_PIN_INPUT | CPM_PIN_PRIMARY}, /* CLK15 */
- {2, 27, CPM_PIN_OUTPUT | CPM_PIN_PRIMARY},
-};
-
-static void __init init_ioports(void)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(mpc8560_ads_pins); i++) {
- const struct cpm_pin *pin = &mpc8560_ads_pins[i];
- cpm2_set_pin(pin->port, pin->pin, pin->flags);
- }
-
- cpm2_clk_setup(CPM_CLK_SCC1, CPM_BRG1, CPM_CLK_RX);
- cpm2_clk_setup(CPM_CLK_SCC1, CPM_BRG1, CPM_CLK_TX);
- cpm2_clk_setup(CPM_CLK_SCC2, CPM_BRG2, CPM_CLK_RX);
- cpm2_clk_setup(CPM_CLK_SCC2, CPM_BRG2, CPM_CLK_TX);
- cpm2_clk_setup(CPM_CLK_FCC2, CPM_CLK13, CPM_CLK_RX);
- cpm2_clk_setup(CPM_CLK_FCC2, CPM_CLK14, CPM_CLK_TX);
- cpm2_clk_setup(CPM_CLK_FCC3, CPM_CLK15, CPM_CLK_RX);
- cpm2_clk_setup(CPM_CLK_FCC3, CPM_CLK16, CPM_CLK_TX);
-}
-#endif
-
-static void __init mpc85xx_ads_setup_arch(void)
-{
- if (ppc_md.progress)
- ppc_md.progress("mpc85xx_ads_setup_arch()", 0);
-
-#ifdef CONFIG_CPM2
- cpm2_reset();
- init_ioports();
-#endif
-
-#ifdef CONFIG_PCI
- ppc_md.pci_exclude_device = mpc85xx_exclude_device;
-#endif
-
- fsl_pci_assign_primary();
-}
-
-static void mpc85xx_ads_show_cpuinfo(struct seq_file *m)
-{
- uint pvid, svid, phid1;
-
- pvid = mfspr(SPRN_PVR);
- svid = mfspr(SPRN_SVR);
-
- seq_printf(m, "Vendor\t\t: Freescale Semiconductor\n");
- seq_printf(m, "PVR\t\t: 0x%x\n", pvid);
- seq_printf(m, "SVR\t\t: 0x%x\n", svid);
-
- /* Display cpu Pll setting */
- phid1 = mfspr(SPRN_HID1);
- seq_printf(m, "PLL setting\t: 0x%x\n", ((phid1 >> 24) & 0x3f));
-}
-
-machine_arch_initcall(mpc85xx_ads, mpc85xx_common_publish_devices);
-
-/*
- * Called very early, device-tree isn't unflattened
- */
-static int __init mpc85xx_ads_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- return of_flat_dt_is_compatible(root, "MPC85xxADS");
-}
-
-define_machine(mpc85xx_ads) {
- .name = "MPC85xx ADS",
- .probe = mpc85xx_ads_probe,
- .setup_arch = mpc85xx_ads_setup_arch,
- .init_IRQ = mpc85xx_ads_pic_init,
- .show_cpuinfo = mpc85xx_ads_show_cpuinfo,
- .get_irq = mpic_get_irq,
- .restart = fsl_rstcr_restart,
- .calibrate_decr = generic_calibrate_decr,
- .progress = udbg_progress,
-};
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_cds.c b/arch/powerpc/platforms/85xx/mpc85xx_cds.c
deleted file mode 100644
index b0753e222086..000000000000
--- a/arch/powerpc/platforms/85xx/mpc85xx_cds.c
+++ /dev/null
@@ -1,394 +0,0 @@
-/*
- * MPC85xx setup and early boot code plus other random bits.
- *
- * Maintained by Kumar Gala (see MAINTAINERS for contact information)
- *
- * Copyright 2005, 2011-2012 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-#include <linux/stddef.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/errno.h>
-#include <linux/reboot.h>
-#include <linux/pci.h>
-#include <linux/kdev_t.h>
-#include <linux/major.h>
-#include <linux/console.h>
-#include <linux/delay.h>
-#include <linux/seq_file.h>
-#include <linux/initrd.h>
-#include <linux/interrupt.h>
-#include <linux/fsl_devices.h>
-#include <linux/of_platform.h>
-
-#include <asm/pgtable.h>
-#include <asm/page.h>
-#include <linux/atomic.h>
-#include <asm/time.h>
-#include <asm/io.h>
-#include <asm/machdep.h>
-#include <asm/ipic.h>
-#include <asm/pci-bridge.h>
-#include <asm/irq.h>
-#include <mm/mmu_decl.h>
-#include <asm/prom.h>
-#include <asm/udbg.h>
-#include <asm/mpic.h>
-#include <asm/i8259.h>
-
-#include <sysdev/fsl_soc.h>
-#include <sysdev/fsl_pci.h>
-
-#include "mpc85xx.h"
-
-/*
- * The CDS board contains an FPGA/CPLD called "Cadmus", which collects
- * various logic and performs system control functions.
- * Here is the FPGA/CPLD register map.
- */
-struct cadmus_reg {
- u8 cm_ver; /* Board version */
- u8 cm_csr; /* General control/status */
- u8 cm_rst; /* Reset control */
- u8 cm_hsclk; /* High speed clock */
- u8 cm_hsxclk; /* High speed clock extended */
- u8 cm_led; /* LED data */
- u8 cm_pci; /* PCI control/status */
- u8 cm_dma; /* DMA control */
- u8 res[248]; /* Total 256 bytes */
-};
-
-static struct cadmus_reg *cadmus;
-
-#ifdef CONFIG_PCI
-
-#define ARCADIA_HOST_BRIDGE_IDSEL 17
-#define ARCADIA_2ND_BRIDGE_IDSEL 3
-
-static int mpc85xx_exclude_device(struct pci_controller *hose,
- u_char bus, u_char devfn)
-{
- /* We explicitly do not go past the Tundra 320 Bridge */
- if ((bus == 1) && (PCI_SLOT(devfn) == ARCADIA_2ND_BRIDGE_IDSEL))
- return PCIBIOS_DEVICE_NOT_FOUND;
- if ((bus == 0) && (PCI_SLOT(devfn) == ARCADIA_2ND_BRIDGE_IDSEL))
- return PCIBIOS_DEVICE_NOT_FOUND;
- else
- return PCIBIOS_SUCCESSFUL;
-}
-
-static void mpc85xx_cds_restart(char *cmd)
-{
- struct pci_dev *dev;
- u_char tmp;
-
- if ((dev = pci_get_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_82C686,
- NULL))) {
-
- /* Use the VIA Super Southbridge to force a PCI reset */
- pci_read_config_byte(dev, 0x47, &tmp);
- pci_write_config_byte(dev, 0x47, tmp | 1);
-
- /* Flush the outbound PCI write queues */
- pci_read_config_byte(dev, 0x47, &tmp);
-
- /*
- * At this point, the harware reset should have triggered.
- * However, if it doesn't work for some mysterious reason,
- * just fall through to the default reset below.
- */
-
- pci_dev_put(dev);
- }
-
- /*
- * If we can't find the VIA chip (maybe the P2P bridge is disabled)
- * or the VIA chip reset didn't work, just use the default reset.
- */
- fsl_rstcr_restart(NULL);
-}
-
-static void __init mpc85xx_cds_pci_irq_fixup(struct pci_dev *dev)
-{
- u_char c;
- if (dev->vendor == PCI_VENDOR_ID_VIA) {
- switch (dev->device) {
- case PCI_DEVICE_ID_VIA_82C586_1:
- /*
- * U-Boot does not set the enable bits
- * for the IDE device. Force them on here.
- */
- pci_read_config_byte(dev, 0x40, &c);
- c |= 0x03; /* IDE: Chip Enable Bits */
- pci_write_config_byte(dev, 0x40, c);
-
- /*
- * Since only primary interface works, force the
- * IDE function to standard primary IDE interrupt
- * w/ 8259 offset
- */
- dev->irq = 14;
- pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq);
- break;
- /*
- * Force legacy USB interrupt routing
- */
- case PCI_DEVICE_ID_VIA_82C586_2:
- /* There are two USB controllers.
- * Identify them by functon number
- */
- if (PCI_FUNC(dev->devfn) == 3)
- dev->irq = 11;
- else
- dev->irq = 10;
- pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq);
- default:
- break;
- }
- }
-}
-
-static void skip_fake_bridge(struct pci_dev *dev)
-{
- /* Make it an error to skip the fake bridge
- * in pci_setup_device() in probe.c */
- dev->hdr_type = 0x7f;
-}
-DECLARE_PCI_FIXUP_EARLY(0x1957, 0x3fff, skip_fake_bridge);
-DECLARE_PCI_FIXUP_EARLY(0x3fff, 0x1957, skip_fake_bridge);
-DECLARE_PCI_FIXUP_EARLY(0xff3f, 0x5719, skip_fake_bridge);
-
-#define PCI_DEVICE_ID_IDT_TSI310 0x01a7
-
-/*
- * Fix Tsi310 PCI-X bridge resource.
- * Force the bridge to open a window from 0x0000-0x1fff in PCI I/O space.
- * This allows legacy I/O(i8259, etc) on the VIA southbridge to be accessed.
- */
-void mpc85xx_cds_fixup_bus(struct pci_bus *bus)
-{
- struct pci_dev *dev = bus->self;
- struct resource *res = bus->resource[0];
-
- if (dev != NULL &&
- dev->vendor == PCI_VENDOR_ID_IBM &&
- dev->device == PCI_DEVICE_ID_IDT_TSI310) {
- if (res) {
- res->start = 0;
- res->end = 0x1fff;
- res->flags = IORESOURCE_IO;
- pr_info("mpc85xx_cds: PCI bridge resource fixup applied\n");
- pr_info("mpc85xx_cds: %pR\n", res);
- }
- }
-
- fsl_pcibios_fixup_bus(bus);
-}
-
-#ifdef CONFIG_PPC_I8259
-static void mpc85xx_8259_cascade_handler(unsigned int irq,
- struct irq_desc *desc)
-{
- unsigned int cascade_irq = i8259_irq();
-
- if (cascade_irq != NO_IRQ)
- /* handle an interrupt from the 8259 */
- generic_handle_irq(cascade_irq);
-
- /* check for any interrupts from the shared IRQ line */
- handle_fasteoi_irq(irq, desc);
-}
-
-static irqreturn_t mpc85xx_8259_cascade_action(int irq, void *dev_id)
-{
- return IRQ_HANDLED;
-}
-
-static struct irqaction mpc85xxcds_8259_irqaction = {
- .handler = mpc85xx_8259_cascade_action,
- .flags = IRQF_SHARED | IRQF_NO_THREAD,
- .name = "8259 cascade",
-};
-#endif /* PPC_I8259 */
-#endif /* CONFIG_PCI */
-
-static void __init mpc85xx_cds_pic_init(void)
-{
- struct mpic *mpic;
- mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN,
- 0, 256, " OpenPIC ");
- BUG_ON(mpic == NULL);
- mpic_init(mpic);
-}
-
-#if defined(CONFIG_PPC_I8259) && defined(CONFIG_PCI)
-static int mpc85xx_cds_8259_attach(void)
-{
- int ret;
- struct device_node *np = NULL;
- struct device_node *cascade_node = NULL;
- int cascade_irq;
-
- /* Initialize the i8259 controller */
- for_each_node_by_type(np, "interrupt-controller")
- if (of_device_is_compatible(np, "chrp,iic")) {
- cascade_node = np;
- break;
- }
-
- if (cascade_node == NULL) {
- printk(KERN_DEBUG "Could not find i8259 PIC\n");
- return -ENODEV;
- }
-
- cascade_irq = irq_of_parse_and_map(cascade_node, 0);
- if (cascade_irq == NO_IRQ) {
- printk(KERN_ERR "Failed to map cascade interrupt\n");
- return -ENXIO;
- }
-
- i8259_init(cascade_node, 0);
- of_node_put(cascade_node);
-
- /*
- * Hook the interrupt to make sure desc->action is never NULL.
- * This is required to ensure that the interrupt does not get
- * disabled when the last user of the shared IRQ line frees their
- * interrupt.
- */
- if ((ret = setup_irq(cascade_irq, &mpc85xxcds_8259_irqaction))) {
- printk(KERN_ERR "Failed to setup cascade interrupt\n");
- return ret;
- }
-
- /* Success. Connect our low-level cascade handler. */
- irq_set_handler(cascade_irq, mpc85xx_8259_cascade_handler);
-
- return 0;
-}
-machine_device_initcall(mpc85xx_cds, mpc85xx_cds_8259_attach);
-
-#endif /* CONFIG_PPC_I8259 */
-
-static void mpc85xx_cds_pci_assign_primary(void)
-{
-#ifdef CONFIG_PCI
- struct device_node *np;
-
- if (fsl_pci_primary)
- return;
-
- /*
- * MPC85xx_CDS has ISA bridge but unfortunately there is no
- * isa node in device tree. We now looking for i8259 node as
- * a workaround for such a broken device tree. This routine
- * is for complying to all device trees.
- */
- np = of_find_node_by_name(NULL, "i8259");
- while ((fsl_pci_primary = of_get_parent(np))) {
- of_node_put(np);
- np = fsl_pci_primary;
-
- if ((of_device_is_compatible(np, "fsl,mpc8540-pci") ||
- of_device_is_compatible(np, "fsl,mpc8548-pcie")) &&
- of_device_is_available(np))
- return;
- }
-#endif
-}
-
-/*
- * Setup the architecture
- */
-static void __init mpc85xx_cds_setup_arch(void)
-{
- struct device_node *np;
- int cds_pci_slot;
-
- if (ppc_md.progress)
- ppc_md.progress("mpc85xx_cds_setup_arch()", 0);
-
- np = of_find_compatible_node(NULL, NULL, "fsl,mpc8548cds-fpga");
- if (!np) {
- pr_err("Could not find FPGA node.\n");
- return;
- }
-
- cadmus = of_iomap(np, 0);
- of_node_put(np);
- if (!cadmus) {
- pr_err("Fail to map FPGA area.\n");
- return;
- }
-
- if (ppc_md.progress) {
- char buf[40];
- cds_pci_slot = ((in_8(&cadmus->cm_csr) >> 6) & 0x3) + 1;
- snprintf(buf, 40, "CDS Version = 0x%x in slot %d\n",
- in_8(&cadmus->cm_ver), cds_pci_slot);
- ppc_md.progress(buf, 0);
- }
-
-#ifdef CONFIG_PCI
- ppc_md.pci_irq_fixup = mpc85xx_cds_pci_irq_fixup;
- ppc_md.pci_exclude_device = mpc85xx_exclude_device;
-#endif
-
- mpc85xx_cds_pci_assign_primary();
- fsl_pci_assign_primary();
-}
-
-static void mpc85xx_cds_show_cpuinfo(struct seq_file *m)
-{
- uint pvid, svid, phid1;
-
- pvid = mfspr(SPRN_PVR);
- svid = mfspr(SPRN_SVR);
-
- seq_printf(m, "Vendor\t\t: Freescale Semiconductor\n");
- seq_printf(m, "Machine\t\t: MPC85xx CDS (0x%x)\n",
- in_8(&cadmus->cm_ver));
- seq_printf(m, "PVR\t\t: 0x%x\n", pvid);
- seq_printf(m, "SVR\t\t: 0x%x\n", svid);
-
- /* Display cpu Pll setting */
- phid1 = mfspr(SPRN_HID1);
- seq_printf(m, "PLL setting\t: 0x%x\n", ((phid1 >> 24) & 0x3f));
-}
-
-
-/*
- * Called very early, device-tree isn't unflattened
- */
-static int __init mpc85xx_cds_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- return of_flat_dt_is_compatible(root, "MPC85xxCDS");
-}
-
-machine_arch_initcall(mpc85xx_cds, mpc85xx_common_publish_devices);
-
-define_machine(mpc85xx_cds) {
- .name = "MPC85xx CDS",
- .probe = mpc85xx_cds_probe,
- .setup_arch = mpc85xx_cds_setup_arch,
- .init_IRQ = mpc85xx_cds_pic_init,
- .show_cpuinfo = mpc85xx_cds_show_cpuinfo,
- .get_irq = mpic_get_irq,
-#ifdef CONFIG_PCI
- .restart = mpc85xx_cds_restart,
- .pcibios_fixup_bus = mpc85xx_cds_fixup_bus,
- .pcibios_fixup_phb = fsl_pcibios_fixup_phb,
-#else
- .restart = fsl_rstcr_restart,
-#endif
- .calibrate_decr = generic_calibrate_decr,
- .progress = udbg_progress,
-};
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ds.c b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
index ffdf02121a7c..2856148321b3 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_ds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_ds.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* MPC85xx DS Board Setup
*
@@ -5,11 +6,6 @@
* Roy Zang <tie-fei.zang@freescale.com>
* - Add PCI/PCI Exprees support
* Copyright 2007 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#include <linux/stddef.h>
@@ -19,17 +15,18 @@
#include <linux/delay.h>
#include <linux/seq_file.h>
#include <linux/interrupt.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/pci-bridge.h>
#include <mm/mmu_decl.h>
-#include <asm/prom.h>
#include <asm/udbg.h>
#include <asm/mpic.h>
#include <asm/i8259.h>
#include <asm/swiotlb.h>
+#include <asm/ppc-pci.h>
#include <sysdev/fsl_soc.h>
#include <sysdev/fsl_pci.h>
@@ -37,115 +34,22 @@
#include "mpc85xx.h"
-#undef DEBUG
-
-#ifdef DEBUG
-#define DBG(fmt, args...) printk(KERN_ERR "%s: " fmt, __func__, ## args)
-#else
-#define DBG(fmt, args...)
-#endif
-
-#ifdef CONFIG_PPC_I8259
-static void mpc85xx_8259_cascade(unsigned int irq, struct irq_desc *desc)
-{
- struct irq_chip *chip = irq_desc_get_chip(desc);
- unsigned int cascade_irq = i8259_irq();
-
- if (cascade_irq != NO_IRQ) {
- generic_handle_irq(cascade_irq);
- }
- chip->irq_eoi(&desc->irq_data);
-}
-#endif /* CONFIG_PPC_I8259 */
-
-void __init mpc85xx_ds_pic_init(void)
+static void __init mpc85xx_ds_pic_init(void)
{
struct mpic *mpic;
-#ifdef CONFIG_PPC_I8259
- struct device_node *np;
- struct device_node *cascade_node = NULL;
- int cascade_irq;
-#endif
- unsigned long root = of_get_flat_dt_root();
-
- if (of_flat_dt_is_compatible(root, "fsl,MPC8572DS-CAMP")) {
- mpic = mpic_alloc(NULL, 0,
- MPIC_NO_RESET |
- MPIC_BIG_ENDIAN |
- MPIC_SINGLE_DEST_CPU,
- 0, 256, " OpenPIC ");
- } else {
- mpic = mpic_alloc(NULL, 0,
- MPIC_BIG_ENDIAN |
- MPIC_SINGLE_DEST_CPU,
- 0, 256, " OpenPIC ");
- }
-
- BUG_ON(mpic == NULL);
- mpic_init(mpic);
+ int flags = MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU;
-#ifdef CONFIG_PPC_I8259
- /* Initialize the i8259 controller */
- for_each_node_by_type(np, "interrupt-controller")
- if (of_device_is_compatible(np, "chrp,iic")) {
- cascade_node = np;
- break;
- }
+ if (of_machine_is_compatible("fsl,MPC8572DS-CAMP"))
+ flags |= MPIC_NO_RESET;
- if (cascade_node == NULL) {
- printk(KERN_DEBUG "Could not find i8259 PIC\n");
- return;
- }
+ mpic = mpic_alloc(NULL, 0, flags, 0, 256, " OpenPIC ");
- cascade_irq = irq_of_parse_and_map(cascade_node, 0);
- if (cascade_irq == NO_IRQ) {
- printk(KERN_ERR "Failed to map cascade interrupt\n");
+ if (WARN_ON(!mpic))
return;
- }
-
- DBG("mpc85xxds: cascade mapped to irq %d\n", cascade_irq);
-
- i8259_init(cascade_node, 0);
- of_node_put(cascade_node);
-
- irq_set_chained_handler(cascade_irq, mpc85xx_8259_cascade);
-#endif /* CONFIG_PPC_I8259 */
-}
-#ifdef CONFIG_PCI
-extern int uli_exclude_device(struct pci_controller *hose,
- u_char bus, u_char devfn);
-
-static struct device_node *pci_with_uli;
-
-static int mpc85xx_exclude_device(struct pci_controller *hose,
- u_char bus, u_char devfn)
-{
- if (hose->dn == pci_with_uli)
- return uli_exclude_device(hose, bus, devfn);
-
- return PCIBIOS_SUCCESSFUL;
-}
-#endif /* CONFIG_PCI */
-
-static void __init mpc85xx_ds_uli_init(void)
-{
-#ifdef CONFIG_PCI
- struct device_node *node;
-
- /* See if we have a ULI under the primary */
-
- node = of_find_node_by_name(NULL, "uli1575");
- while ((pci_with_uli = of_get_parent(node))) {
- of_node_put(node);
- node = pci_with_uli;
+ mpic_init(mpic);
- if (pci_with_uli == fsl_pci_primary) {
- ppc_md.pci_exclude_device = mpc85xx_exclude_device;
- break;
- }
- }
-#endif
+ mpc85xx_8259_init();
}
/*
@@ -158,53 +62,18 @@ static void __init mpc85xx_ds_setup_arch(void)
swiotlb_detect_4g();
fsl_pci_assign_primary();
- mpc85xx_ds_uli_init();
+ uli_init();
mpc85xx_smp_init();
- printk("MPC85xx DS board from Freescale Semiconductor\n");
-}
-
-/*
- * Called very early, device-tree isn't unflattened
- */
-static int __init mpc8544_ds_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- return !!of_flat_dt_is_compatible(root, "MPC8544DS");
+ pr_info("MPC85xx DS board from Freescale Semiconductor\n");
}
machine_arch_initcall(mpc8544_ds, mpc85xx_common_publish_devices);
machine_arch_initcall(mpc8572_ds, mpc85xx_common_publish_devices);
-machine_arch_initcall(p2020_ds, mpc85xx_common_publish_devices);
-
-machine_arch_initcall(mpc8544_ds, swiotlb_setup_bus_notifier);
-machine_arch_initcall(mpc8572_ds, swiotlb_setup_bus_notifier);
-machine_arch_initcall(p2020_ds, swiotlb_setup_bus_notifier);
-
-/*
- * Called very early, device-tree isn't unflattened
- */
-static int __init mpc8572_ds_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- return !!of_flat_dt_is_compatible(root, "fsl,MPC8572DS");
-}
-
-/*
- * Called very early, device-tree isn't unflattened
- */
-static int __init p2020_ds_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- return !!of_flat_dt_is_compatible(root, "fsl,P2020DS");
-}
define_machine(mpc8544_ds) {
.name = "MPC8544 DS",
- .probe = mpc8544_ds_probe,
+ .compatible = "MPC8544DS",
.setup_arch = mpc85xx_ds_setup_arch,
.init_IRQ = mpc85xx_ds_pic_init,
#ifdef CONFIG_PCI
@@ -212,29 +81,12 @@ define_machine(mpc8544_ds) {
.pcibios_fixup_phb = fsl_pcibios_fixup_phb,
#endif
.get_irq = mpic_get_irq,
- .restart = fsl_rstcr_restart,
- .calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
};
define_machine(mpc8572_ds) {
.name = "MPC8572 DS",
- .probe = mpc8572_ds_probe,
- .setup_arch = mpc85xx_ds_setup_arch,
- .init_IRQ = mpc85xx_ds_pic_init,
-#ifdef CONFIG_PCI
- .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
- .pcibios_fixup_phb = fsl_pcibios_fixup_phb,
-#endif
- .get_irq = mpic_get_irq,
- .restart = fsl_rstcr_restart,
- .calibrate_decr = generic_calibrate_decr,
- .progress = udbg_progress,
-};
-
-define_machine(p2020_ds) {
- .name = "P2020 DS",
- .probe = p2020_ds_probe,
+ .compatible = "fsl,MPC8572DS",
.setup_arch = mpc85xx_ds_setup_arch,
.init_IRQ = mpc85xx_ds_pic_init,
#ifdef CONFIG_PCI
@@ -242,7 +94,5 @@ define_machine(p2020_ds) {
.pcibios_fixup_phb = fsl_pcibios_fixup_phb,
#endif
.get_irq = mpic_get_irq,
- .restart = fsl_rstcr_restart,
- .calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
};
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
index a392e94a07fa..c19490cf6376 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_mds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2006-2010, 2012-2013 Freescale Semiconductor, Inc.
* All rights reserved.
@@ -10,11 +11,6 @@
*
* Description:
* MPC85xx MDS board specific routines.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#include <linux/stddef.h>
@@ -30,10 +26,11 @@
#include <linux/seq_file.h>
#include <linux/initrd.h>
#include <linux/fsl_devices.h>
-#include <linux/of_platform.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
#include <linux/phy.h>
#include <linux/memblock.h>
+#include <linux/fsl/guts.h>
#include <linux/atomic.h>
#include <asm/time.h>
@@ -42,26 +39,17 @@
#include <asm/pci-bridge.h>
#include <asm/irq.h>
#include <mm/mmu_decl.h>
-#include <asm/prom.h>
#include <asm/udbg.h>
#include <sysdev/fsl_soc.h>
#include <sysdev/fsl_pci.h>
-#include <sysdev/simple_gpio.h>
-#include <asm/qe.h>
-#include <asm/qe_ic.h>
+#include <soc/fsl/qe/qe.h>
#include <asm/mpic.h>
#include <asm/swiotlb.h>
-#include <asm/fsl_guts.h>
#include "smp.h"
#include "mpc85xx.h"
-#undef DEBUG
-#ifdef DEBUG
-#define DBG(fmt...) udbg_printf(fmt)
-#else
-#define DBG(fmt...)
-#endif
+#if IS_BUILTIN(CONFIG_PHYLIB)
#define MV88E1111_SCR 0x10
#define MV88E1111_SCR_125CLK 0x0010
@@ -152,6 +140,8 @@ static int mpc8568_mds_phy_fixups(struct phy_device *phydev)
return err;
}
+#endif
+
/* ************************************************************************
*
* Setup the architecture
@@ -238,7 +228,6 @@ static void __init mpc85xx_mds_qe_init(void)
{
struct device_node *np;
- mpc85xx_qe_init();
mpc85xx_qe_par_io_init();
mpc85xx_mds_reset_ucc_phys();
@@ -270,33 +259,8 @@ static void __init mpc85xx_mds_qe_init(void)
}
}
-static void __init mpc85xx_mds_qeic_init(void)
-{
- struct device_node *np;
-
- np = of_find_compatible_node(NULL, NULL, "fsl,qe");
- if (!of_device_is_available(np)) {
- of_node_put(np);
- return;
- }
-
- np = of_find_compatible_node(NULL, NULL, "fsl,qe-ic");
- if (!np) {
- np = of_find_node_by_type(NULL, "qeic");
- if (!np)
- return;
- }
-
- if (machine_is(p1021_mds))
- qe_ic_init(np, 0, qe_ic_cascade_low_mpic,
- qe_ic_cascade_high_mpic);
- else
- qe_ic_init(np, 0, qe_ic_cascade_muxed_mpic, NULL);
- of_node_put(np);
-}
#else
static void __init mpc85xx_mds_qe_init(void) { }
-static void __init mpc85xx_mds_qeic_init(void) { }
#endif /* CONFIG_QUICC_ENGINE */
static void __init mpc85xx_mds_setup_arch(void)
@@ -313,6 +277,7 @@ static void __init mpc85xx_mds_setup_arch(void)
swiotlb_detect_4g();
}
+#if IS_BUILTIN(CONFIG_PHYLIB)
static int __init board_fixups(void)
{
@@ -342,16 +307,14 @@ static int __init board_fixups(void)
return 0;
}
+
machine_arch_initcall(mpc8568_mds, board_fixups);
machine_arch_initcall(mpc8569_mds, board_fixups);
+#endif
+
static int __init mpc85xx_publish_devices(void)
{
- if (machine_is(mpc8568_mds))
- simple_gpiochip_init("fsl,mpc8568mds-bcsr-gpio");
- if (machine_is(mpc8569_mds))
- simple_gpiochip_init("fsl,mpc8569mds-bcsr-gpio");
-
return mpc85xx_common_publish_devices();
}
@@ -359,10 +322,6 @@ machine_arch_initcall(mpc8568_mds, mpc85xx_publish_devices);
machine_arch_initcall(mpc8569_mds, mpc85xx_publish_devices);
machine_arch_initcall(p1021_mds, mpc85xx_common_publish_devices);
-machine_arch_initcall(mpc8568_mds, swiotlb_setup_bus_notifier);
-machine_arch_initcall(mpc8569_mds, swiotlb_setup_bus_notifier);
-machine_arch_initcall(p1021_mds, swiotlb_setup_bus_notifier);
-
static void __init mpc85xx_mds_pic_init(void)
{
struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
@@ -371,24 +330,14 @@ static void __init mpc85xx_mds_pic_init(void)
BUG_ON(mpic == NULL);
mpic_init(mpic);
- mpc85xx_mds_qeic_init();
-}
-
-static int __init mpc85xx_mds_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- return of_flat_dt_is_compatible(root, "MPC85xxMDS");
}
define_machine(mpc8568_mds) {
.name = "MPC8568 MDS",
- .probe = mpc85xx_mds_probe,
+ .compatible = "MPC85xxMDS",
.setup_arch = mpc85xx_mds_setup_arch,
.init_IRQ = mpc85xx_mds_pic_init,
.get_irq = mpic_get_irq,
- .restart = fsl_rstcr_restart,
- .calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
#ifdef CONFIG_PCI
.pcibios_fixup_bus = fsl_pcibios_fixup_bus,
@@ -396,21 +345,12 @@ define_machine(mpc8568_mds) {
#endif
};
-static int __init mpc8569_mds_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- return of_flat_dt_is_compatible(root, "fsl,MPC8569EMDS");
-}
-
define_machine(mpc8569_mds) {
.name = "MPC8569 MDS",
- .probe = mpc8569_mds_probe,
+ .compatible = "fsl,MPC8569EMDS",
.setup_arch = mpc85xx_mds_setup_arch,
.init_IRQ = mpc85xx_mds_pic_init,
.get_irq = mpic_get_irq,
- .restart = fsl_rstcr_restart,
- .calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
#ifdef CONFIG_PCI
.pcibios_fixup_bus = fsl_pcibios_fixup_bus,
@@ -418,26 +358,15 @@ define_machine(mpc8569_mds) {
#endif
};
-static int __init p1021_mds_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- return of_flat_dt_is_compatible(root, "fsl,P1021MDS");
-
-}
-
define_machine(p1021_mds) {
.name = "P1021 MDS",
- .probe = p1021_mds_probe,
+ .compatible = "fsl,P1021MDS",
.setup_arch = mpc85xx_mds_setup_arch,
.init_IRQ = mpc85xx_mds_pic_init,
.get_irq = mpic_get_irq,
- .restart = fsl_rstcr_restart,
- .calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
#ifdef CONFIG_PCI
.pcibios_fixup_bus = fsl_pcibios_fixup_bus,
.pcibios_fixup_phb = fsl_pcibios_fixup_phb,
#endif
};
-
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c
new file mode 100644
index 000000000000..f7ac92a8ae97
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * MPC85xx PM operators
+ *
+ * Copyright 2015 Freescale Semiconductor Inc.
+ */
+
+#define pr_fmt(fmt) "%s: " fmt, __func__
+
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/fsl/guts.h>
+
+#include <asm/io.h>
+#include <asm/fsl_pm.h>
+
+#include "smp.h"
+
+static struct ccsr_guts __iomem *guts;
+
+#ifdef CONFIG_FSL_PMC
+static void mpc85xx_irq_mask(int cpu)
+{
+
+}
+
+static void mpc85xx_irq_unmask(int cpu)
+{
+
+}
+
+static void mpc85xx_cpu_die(int cpu)
+{
+ u32 tmp;
+
+ tmp = (mfspr(SPRN_HID0) & ~(HID0_DOZE|HID0_SLEEP)) | HID0_NAP;
+ mtspr(SPRN_HID0, tmp);
+
+ /* Enter NAP mode. */
+ tmp = mfmsr();
+ tmp |= MSR_WE;
+ asm volatile(
+ "msync\n"
+ "mtmsr %0\n"
+ "isync\n"
+ :
+ : "r" (tmp));
+}
+
+static void mpc85xx_cpu_up_prepare(int cpu)
+{
+
+}
+#endif
+
+static void mpc85xx_freeze_time_base(bool freeze)
+{
+ uint32_t mask;
+
+ mask = CCSR_GUTS_DEVDISR_TB0 | CCSR_GUTS_DEVDISR_TB1;
+ if (freeze)
+ setbits32(&guts->devdisr, mask);
+ else
+ clrbits32(&guts->devdisr, mask);
+
+ in_be32(&guts->devdisr);
+}
+
+static const struct of_device_id mpc85xx_smp_guts_ids[] = {
+ { .compatible = "fsl,mpc8572-guts", },
+ { .compatible = "fsl,p1020-guts", },
+ { .compatible = "fsl,p1021-guts", },
+ { .compatible = "fsl,p1022-guts", },
+ { .compatible = "fsl,p1023-guts", },
+ { .compatible = "fsl,p2020-guts", },
+ { .compatible = "fsl,bsc9132-guts", },
+ {},
+};
+
+static const struct fsl_pm_ops mpc85xx_pm_ops = {
+ .freeze_time_base = mpc85xx_freeze_time_base,
+#ifdef CONFIG_FSL_PMC
+ .irq_mask = mpc85xx_irq_mask,
+ .irq_unmask = mpc85xx_irq_unmask,
+ .cpu_die = mpc85xx_cpu_die,
+ .cpu_up_prepare = mpc85xx_cpu_up_prepare,
+#endif
+};
+
+int __init mpc85xx_setup_pmc(void)
+{
+ struct device_node *np;
+
+ np = of_find_matching_node(NULL, mpc85xx_smp_guts_ids);
+ if (np) {
+ guts = of_iomap(np, 0);
+ of_node_put(np);
+ if (!guts) {
+ pr_err("Could not map guts node address\n");
+ return -ENOMEM;
+ }
+ qoriq_pm_ops = &mpc85xx_pm_ops;
+ }
+
+ return 0;
+}
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
index e358bed66d01..e0cec670d8db 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* MPC85xx RDB Board Setup
*
* Copyright 2009,2012-2013 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#include <linux/stddef.h>
@@ -16,18 +12,17 @@
#include <linux/delay.h>
#include <linux/seq_file.h>
#include <linux/interrupt.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/fsl/guts.h>
#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/pci-bridge.h>
#include <mm/mmu_decl.h>
-#include <asm/prom.h>
#include <asm/udbg.h>
#include <asm/mpic.h>
-#include <asm/qe.h>
-#include <asm/qe_ic.h>
-#include <asm/fsl_guts.h>
+#include <soc/fsl/qe/qe.h>
#include <sysdev/fsl_soc.h>
#include <sysdev/fsl_pci.h>
@@ -35,50 +30,20 @@
#include "mpc85xx.h"
-#undef DEBUG
-
-#ifdef DEBUG
-#define DBG(fmt, args...) printk(KERN_ERR "%s: " fmt, __func__, ## args)
-#else
-#define DBG(fmt, args...)
-#endif
-
-
-void __init mpc85xx_rdb_pic_init(void)
+static void __init mpc85xx_rdb_pic_init(void)
{
struct mpic *mpic;
- unsigned long root = of_get_flat_dt_root();
+ int flags = MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU;
-#ifdef CONFIG_QUICC_ENGINE
- struct device_node *np;
-#endif
+ if (of_machine_is_compatible("fsl,MPC85XXRDB-CAMP"))
+ flags |= MPIC_NO_RESET;
- if (of_flat_dt_is_compatible(root, "fsl,MPC85XXRDB-CAMP")) {
- mpic = mpic_alloc(NULL, 0, MPIC_NO_RESET |
- MPIC_BIG_ENDIAN |
- MPIC_SINGLE_DEST_CPU,
- 0, 256, " OpenPIC ");
- } else {
- mpic = mpic_alloc(NULL, 0,
- MPIC_BIG_ENDIAN |
- MPIC_SINGLE_DEST_CPU,
- 0, 256, " OpenPIC ");
- }
+ mpic = mpic_alloc(NULL, 0, flags, 0, 256, " OpenPIC ");
- BUG_ON(mpic == NULL);
- mpic_init(mpic);
-
-#ifdef CONFIG_QUICC_ENGINE
- np = of_find_compatible_node(NULL, NULL, "fsl,qe-ic");
- if (np) {
- qe_ic_init(np, 0, qe_ic_cascade_low_mpic,
- qe_ic_cascade_high_mpic);
- of_node_put(np);
-
- } else
- pr_err("%s: Could not find qe-ic node\n", __func__);
-#endif
+ if (WARN_ON(!mpic))
+ return;
+ mpic_init(mpic);
}
/*
@@ -93,8 +58,6 @@ static void __init mpc85xx_rdb_setup_arch(void)
fsl_pci_assign_primary();
-#ifdef CONFIG_QUICC_ENGINE
- mpc85xx_qe_init();
mpc85xx_qe_par_io_init();
#if defined(CONFIG_UCC_GETH) || defined(CONFIG_SERIAL_QE)
if (machine_is(p1025_rdb)) {
@@ -113,7 +76,7 @@ static void __init mpc85xx_rdb_setup_arch(void)
/* P1025 has pins muxed for QE and other functions. To
* enable QE UEC mode, we need to set bit QE0 for UCC1
* in Eth mode, QE0 and QE3 for UCC5 in Eth mode, QE9
- * and QE12 for QE MII management singals in PMUXCR
+ * and QE12 for QE MII management signals in PMUXCR
* register.
*/
setbits32(&guts->pmuxcr, MPC85xx_PMUXCR_QE(0) |
@@ -127,13 +90,10 @@ static void __init mpc85xx_rdb_setup_arch(void)
}
#endif
-#endif /* CONFIG_QUICC_ENGINE */
- printk(KERN_INFO "MPC85xx RDB board from Freescale Semiconductor\n");
+ pr_info("MPC85xx RDB board from Freescale Semiconductor\n");
}
-machine_arch_initcall(p2020_rdb, mpc85xx_common_publish_devices);
-machine_arch_initcall(p2020_rdb_pc, mpc85xx_common_publish_devices);
machine_arch_initcall(p1020_mbg_pc, mpc85xx_common_publish_devices);
machine_arch_initcall(p1020_rdb, mpc85xx_common_publish_devices);
machine_arch_initcall(p1020_rdb_pc, mpc85xx_common_publish_devices);
@@ -143,105 +103,9 @@ machine_arch_initcall(p1021_rdb_pc, mpc85xx_common_publish_devices);
machine_arch_initcall(p1025_rdb, mpc85xx_common_publish_devices);
machine_arch_initcall(p1024_rdb, mpc85xx_common_publish_devices);
-/*
- * Called very early, device-tree isn't unflattened
- */
-static int __init p2020_rdb_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- if (of_flat_dt_is_compatible(root, "fsl,P2020RDB"))
- return 1;
- return 0;
-}
-
-static int __init p1020_rdb_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- if (of_flat_dt_is_compatible(root, "fsl,P1020RDB"))
- return 1;
- return 0;
-}
-
-static int __init p1020_rdb_pc_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- return of_flat_dt_is_compatible(root, "fsl,P1020RDB-PC");
-}
-
-static int __init p1020_rdb_pd_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- return of_flat_dt_is_compatible(root, "fsl,P1020RDB-PD");
-}
-
-static int __init p1021_rdb_pc_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- if (of_flat_dt_is_compatible(root, "fsl,P1021RDB-PC"))
- return 1;
- return 0;
-}
-
-static int __init p2020_rdb_pc_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- if (of_flat_dt_is_compatible(root, "fsl,P2020RDB-PC"))
- return 1;
- return 0;
-}
-
-static int __init p1025_rdb_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- return of_flat_dt_is_compatible(root, "fsl,P1025RDB");
-}
-
-static int __init p1020_mbg_pc_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- return of_flat_dt_is_compatible(root, "fsl,P1020MBG-PC");
-}
-
-static int __init p1020_utm_pc_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- return of_flat_dt_is_compatible(root, "fsl,P1020UTM-PC");
-}
-
-static int __init p1024_rdb_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- return of_flat_dt_is_compatible(root, "fsl,P1024RDB");
-}
-
-define_machine(p2020_rdb) {
- .name = "P2020 RDB",
- .probe = p2020_rdb_probe,
- .setup_arch = mpc85xx_rdb_setup_arch,
- .init_IRQ = mpc85xx_rdb_pic_init,
-#ifdef CONFIG_PCI
- .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
- .pcibios_fixup_phb = fsl_pcibios_fixup_phb,
-#endif
- .get_irq = mpic_get_irq,
- .restart = fsl_rstcr_restart,
- .calibrate_decr = generic_calibrate_decr,
- .progress = udbg_progress,
-};
-
define_machine(p1020_rdb) {
.name = "P1020 RDB",
- .probe = p1020_rdb_probe,
+ .compatible = "fsl,P1020RDB",
.setup_arch = mpc85xx_rdb_setup_arch,
.init_IRQ = mpc85xx_rdb_pic_init,
#ifdef CONFIG_PCI
@@ -249,29 +113,12 @@ define_machine(p1020_rdb) {
.pcibios_fixup_phb = fsl_pcibios_fixup_phb,
#endif
.get_irq = mpic_get_irq,
- .restart = fsl_rstcr_restart,
- .calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
};
define_machine(p1021_rdb_pc) {
.name = "P1021 RDB-PC",
- .probe = p1021_rdb_pc_probe,
- .setup_arch = mpc85xx_rdb_setup_arch,
- .init_IRQ = mpc85xx_rdb_pic_init,
-#ifdef CONFIG_PCI
- .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
- .pcibios_fixup_phb = fsl_pcibios_fixup_phb,
-#endif
- .get_irq = mpic_get_irq,
- .restart = fsl_rstcr_restart,
- .calibrate_decr = generic_calibrate_decr,
- .progress = udbg_progress,
-};
-
-define_machine(p2020_rdb_pc) {
- .name = "P2020RDB-PC",
- .probe = p2020_rdb_pc_probe,
+ .compatible = "fsl,P1021RDB-PC",
.setup_arch = mpc85xx_rdb_setup_arch,
.init_IRQ = mpc85xx_rdb_pic_init,
#ifdef CONFIG_PCI
@@ -279,14 +126,12 @@ define_machine(p2020_rdb_pc) {
.pcibios_fixup_phb = fsl_pcibios_fixup_phb,
#endif
.get_irq = mpic_get_irq,
- .restart = fsl_rstcr_restart,
- .calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
};
define_machine(p1025_rdb) {
.name = "P1025 RDB",
- .probe = p1025_rdb_probe,
+ .compatible = "fsl,P1025RDB",
.setup_arch = mpc85xx_rdb_setup_arch,
.init_IRQ = mpc85xx_rdb_pic_init,
#ifdef CONFIG_PCI
@@ -294,14 +139,12 @@ define_machine(p1025_rdb) {
.pcibios_fixup_phb = fsl_pcibios_fixup_phb,
#endif
.get_irq = mpic_get_irq,
- .restart = fsl_rstcr_restart,
- .calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
};
define_machine(p1020_mbg_pc) {
.name = "P1020 MBG-PC",
- .probe = p1020_mbg_pc_probe,
+ .compatible = "fsl,P1020MBG-PC",
.setup_arch = mpc85xx_rdb_setup_arch,
.init_IRQ = mpc85xx_rdb_pic_init,
#ifdef CONFIG_PCI
@@ -309,14 +152,12 @@ define_machine(p1020_mbg_pc) {
.pcibios_fixup_phb = fsl_pcibios_fixup_phb,
#endif
.get_irq = mpic_get_irq,
- .restart = fsl_rstcr_restart,
- .calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
};
define_machine(p1020_utm_pc) {
.name = "P1020 UTM-PC",
- .probe = p1020_utm_pc_probe,
+ .compatible = "fsl,P1020UTM-PC",
.setup_arch = mpc85xx_rdb_setup_arch,
.init_IRQ = mpc85xx_rdb_pic_init,
#ifdef CONFIG_PCI
@@ -324,14 +165,12 @@ define_machine(p1020_utm_pc) {
.pcibios_fixup_phb = fsl_pcibios_fixup_phb,
#endif
.get_irq = mpic_get_irq,
- .restart = fsl_rstcr_restart,
- .calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
};
define_machine(p1020_rdb_pc) {
.name = "P1020RDB-PC",
- .probe = p1020_rdb_pc_probe,
+ .compatible = "fsl,P1020RDB-PC",
.setup_arch = mpc85xx_rdb_setup_arch,
.init_IRQ = mpc85xx_rdb_pic_init,
#ifdef CONFIG_PCI
@@ -339,14 +178,12 @@ define_machine(p1020_rdb_pc) {
.pcibios_fixup_phb = fsl_pcibios_fixup_phb,
#endif
.get_irq = mpic_get_irq,
- .restart = fsl_rstcr_restart,
- .calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
};
define_machine(p1020_rdb_pd) {
.name = "P1020RDB-PD",
- .probe = p1020_rdb_pd_probe,
+ .compatible = "fsl,P1020RDB-PD",
.setup_arch = mpc85xx_rdb_setup_arch,
.init_IRQ = mpc85xx_rdb_pic_init,
#ifdef CONFIG_PCI
@@ -354,14 +191,12 @@ define_machine(p1020_rdb_pd) {
.pcibios_fixup_phb = fsl_pcibios_fixup_phb,
#endif
.get_irq = mpic_get_irq,
- .restart = fsl_rstcr_restart,
- .calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
};
define_machine(p1024_rdb) {
.name = "P1024 RDB",
- .probe = p1024_rdb_probe,
+ .compatible = "fsl,P1024RDB",
.setup_arch = mpc85xx_rdb_setup_arch,
.init_IRQ = mpc85xx_rdb_pic_init,
#ifdef CONFIG_PCI
@@ -369,7 +204,5 @@ define_machine(p1024_rdb) {
.pcibios_fixup_phb = fsl_pcibios_fixup_phb,
#endif
.get_irq = mpic_get_irq,
- .restart = fsl_rstcr_restart,
- .calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
};
diff --git a/arch/powerpc/platforms/85xx/mvme2500.c b/arch/powerpc/platforms/85xx/mvme2500.c
new file mode 100644
index 000000000000..19122daadb55
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/mvme2500.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Board setup routines for the Emerson/Artesyn MVME2500
+ *
+ * Copyright 2014 Elettra-Sincrotrone Trieste S.C.p.A.
+ *
+ * Based on earlier code by:
+ *
+ * Xianghua Xiao (x.xiao@freescale.com)
+ * Tom Armistead (tom.armistead@emerson.com)
+ * Copyright 2012 Emerson
+ *
+ * Author Alessio Igor Bogani <alessio.bogani@elettra.eu>
+ */
+
+#include <linux/pci.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc85xx.h"
+
+static void __init mvme2500_pic_init(void)
+{
+ struct mpic *mpic = mpic_alloc(NULL, 0,
+ MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU,
+ 0, 256, " OpenPIC ");
+ BUG_ON(mpic == NULL);
+ mpic_init(mpic);
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init mvme2500_setup_arch(void)
+{
+ if (ppc_md.progress)
+ ppc_md.progress("mvme2500_setup_arch()", 0);
+ fsl_pci_assign_primary();
+ pr_info("MVME2500 board from Artesyn\n");
+}
+
+machine_arch_initcall(mvme2500, mpc85xx_common_publish_devices);
+
+define_machine(mvme2500) {
+ .name = "MVME2500",
+ .compatible = "artesyn,MVME2500",
+ .setup_arch = mvme2500_setup_arch,
+ .init_IRQ = mvme2500_pic_init,
+#ifdef CONFIG_PCI
+ .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
+ .pcibios_fixup_phb = fsl_pcibios_fixup_phb,
+#endif
+ .get_irq = mpic_get_irq,
+ .progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/p1010rdb.c b/arch/powerpc/platforms/85xx/p1010rdb.c
index ad1a3d438a9e..491895ac8bcf 100644
--- a/arch/powerpc/platforms/85xx/p1010rdb.c
+++ b/arch/powerpc/platforms/85xx/p1010rdb.c
@@ -1,12 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* P1010RDB Board Setup
*
* Copyright 2011 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#include <linux/stddef.h>
@@ -14,13 +10,12 @@
#include <linux/pci.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/pci-bridge.h>
#include <mm/mmu_decl.h>
-#include <asm/prom.h>
#include <asm/udbg.h>
#include <asm/mpic.h>
@@ -29,7 +24,7 @@
#include "mpc85xx.h"
-void __init p1010_rdb_pic_init(void)
+static void __init p1010_rdb_pic_init(void)
{
struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
MPIC_SINGLE_DEST_CPU,
@@ -55,18 +50,15 @@ static void __init p1010_rdb_setup_arch(void)
}
machine_arch_initcall(p1010_rdb, mpc85xx_common_publish_devices);
-machine_arch_initcall(p1010_rdb, swiotlb_setup_bus_notifier);
/*
* Called very early, device-tree isn't unflattened
*/
static int __init p1010_rdb_probe(void)
{
- unsigned long root = of_get_flat_dt_root();
-
- if (of_flat_dt_is_compatible(root, "fsl,P1010RDB"))
+ if (of_machine_is_compatible("fsl,P1010RDB"))
return 1;
- if (of_flat_dt_is_compatible(root, "fsl,P1010RDB-PB"))
+ if (of_machine_is_compatible("fsl,P1010RDB-PB"))
return 1;
return 0;
}
@@ -81,7 +73,5 @@ define_machine(p1010_rdb) {
.pcibios_fixup_phb = fsl_pcibios_fixup_phb,
#endif
.get_irq = mpic_get_irq,
- .restart = fsl_rstcr_restart,
- .calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
};
diff --git a/arch/powerpc/platforms/85xx/p1022_ds.c b/arch/powerpc/platforms/85xx/p1022_ds.c
index 6ac986d3f8a3..adc3a2ee1415 100644
--- a/arch/powerpc/platforms/85xx/p1022_ds.c
+++ b/arch/powerpc/platforms/85xx/p1022_ds.c
@@ -16,8 +16,10 @@
* kind, whether express or implied.
*/
+#include <linux/fsl/guts.h>
#include <linux/pci.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
#include <asm/div64.h>
#include <asm/mpic.h>
#include <asm/swiotlb.h>
@@ -25,7 +27,6 @@
#include <sysdev/fsl_soc.h>
#include <sysdev/fsl_pci.h>
#include <asm/udbg.h>
-#include <asm/fsl_guts.h>
#include <asm/fsl_lbc.h>
#include "smp.h"
@@ -369,7 +370,7 @@ exit:
*
* @pixclock: the wavelength, in picoseconds, of the clock
*/
-void p1022ds_set_pixel_clock(unsigned int pixclock)
+static void p1022ds_set_pixel_clock(unsigned int pixclock)
{
struct device_node *guts_np = NULL;
struct ccsr_guts __iomem *guts;
@@ -417,7 +418,7 @@ void p1022ds_set_pixel_clock(unsigned int pixclock)
/**
* p1022ds_valid_monitor_port: set the monitor port for sysfs
*/
-enum fsl_diu_monitor_port
+static enum fsl_diu_monitor_port
p1022ds_valid_monitor_port(enum fsl_diu_monitor_port port)
{
switch (port) {
@@ -431,7 +432,7 @@ p1022ds_valid_monitor_port(enum fsl_diu_monitor_port port)
#endif
-void __init p1022_ds_pic_init(void)
+static void __init p1022_ds_pic_init(void)
{
struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
MPIC_SINGLE_DEST_CPU,
@@ -508,8 +509,8 @@ static void __init p1022_ds_setup_arch(void)
* allocate one static local variable for each
* call to this function.
*/
- pr_info("p1022ds: disabling %s node",
- np2->full_name);
+ pr_info("p1022ds: disabling %pOF node",
+ np2);
of_update_property(np2, &nor_status);
of_node_put(np2);
}
@@ -524,8 +525,8 @@ static void __init p1022_ds_setup_arch(void)
.length = sizeof("disabled"),
};
- pr_info("p1022ds: disabling %s node",
- np2->full_name);
+ pr_info("p1022ds: disabling %pOF node",
+ np2);
of_update_property(np2, &nand_status);
of_node_put(np2);
}
@@ -548,21 +549,9 @@ static void __init p1022_ds_setup_arch(void)
machine_arch_initcall(p1022_ds, mpc85xx_common_publish_devices);
-machine_arch_initcall(p1022_ds, swiotlb_setup_bus_notifier);
-
-/*
- * Called very early, device-tree isn't unflattened
- */
-static int __init p1022_ds_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- return of_flat_dt_is_compatible(root, "fsl,p1022ds");
-}
-
define_machine(p1022_ds) {
.name = "P1022 DS",
- .probe = p1022_ds_probe,
+ .compatible = "fsl,p1022ds",
.setup_arch = p1022_ds_setup_arch,
.init_IRQ = p1022_ds_pic_init,
#ifdef CONFIG_PCI
@@ -570,7 +559,5 @@ define_machine(p1022_ds) {
.pcibios_fixup_phb = fsl_pcibios_fixup_phb,
#endif
.get_irq = mpic_get_irq,
- .restart = fsl_rstcr_restart,
- .calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
};
diff --git a/arch/powerpc/platforms/85xx/p1022_rdk.c b/arch/powerpc/platforms/85xx/p1022_rdk.c
index 7a180f0308d5..6198299d95b1 100644
--- a/arch/powerpc/platforms/85xx/p1022_rdk.c
+++ b/arch/powerpc/platforms/85xx/p1022_rdk.c
@@ -12,8 +12,10 @@
* kind, whether express or implied.
*/
+#include <linux/fsl/guts.h>
#include <linux/pci.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
#include <asm/div64.h>
#include <asm/mpic.h>
#include <asm/swiotlb.h>
@@ -21,7 +23,6 @@
#include <sysdev/fsl_soc.h>
#include <sysdev/fsl_pci.h>
#include <asm/udbg.h>
-#include <asm/fsl_guts.h>
#include "smp.h"
#include "mpc85xx.h"
@@ -39,7 +40,7 @@
*
* @pixclock: the wavelength, in picoseconds, of the clock
*/
-void p1022rdk_set_pixel_clock(unsigned int pixclock)
+static void p1022rdk_set_pixel_clock(unsigned int pixclock)
{
struct device_node *guts_np = NULL;
struct ccsr_guts __iomem *guts;
@@ -50,14 +51,14 @@ void p1022rdk_set_pixel_clock(unsigned int pixclock)
/* Map the global utilities registers. */
guts_np = of_find_compatible_node(NULL, NULL, "fsl,p1022-guts");
if (!guts_np) {
- pr_err("p1022rdk: missing global utilties device node\n");
+ pr_err("p1022rdk: missing global utilities device node\n");
return;
}
guts = of_iomap(guts_np, 0);
of_node_put(guts_np);
if (!guts) {
- pr_err("p1022rdk: could not map global utilties device\n");
+ pr_err("p1022rdk: could not map global utilities device\n");
return;
}
@@ -87,7 +88,7 @@ void p1022rdk_set_pixel_clock(unsigned int pixclock)
/**
* p1022rdk_valid_monitor_port: set the monitor port for sysfs
*/
-enum fsl_diu_monitor_port
+static enum fsl_diu_monitor_port
p1022rdk_valid_monitor_port(enum fsl_diu_monitor_port port)
{
return FSL_DIU_PORT_DVI;
@@ -95,7 +96,7 @@ p1022rdk_valid_monitor_port(enum fsl_diu_monitor_port port)
#endif
-void __init p1022_rdk_pic_init(void)
+static void __init p1022_rdk_pic_init(void)
{
struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
MPIC_SINGLE_DEST_CPU,
@@ -128,21 +129,9 @@ static void __init p1022_rdk_setup_arch(void)
machine_arch_initcall(p1022_rdk, mpc85xx_common_publish_devices);
-machine_arch_initcall(p1022_rdk, swiotlb_setup_bus_notifier);
-
-/*
- * Called very early, device-tree isn't unflattened
- */
-static int __init p1022_rdk_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- return of_flat_dt_is_compatible(root, "fsl,p1022rdk");
-}
-
define_machine(p1022_rdk) {
.name = "P1022 RDK",
- .probe = p1022_rdk_probe,
+ .compatible = "fsl,p1022rdk",
.setup_arch = p1022_rdk_setup_arch,
.init_IRQ = p1022_rdk_pic_init,
#ifdef CONFIG_PCI
@@ -150,7 +139,5 @@ define_machine(p1022_rdk) {
.pcibios_fixup_phb = fsl_pcibios_fixup_phb,
#endif
.get_irq = mpic_get_irq,
- .restart = fsl_rstcr_restart,
- .calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
};
diff --git a/arch/powerpc/platforms/85xx/p1023_rdb.c b/arch/powerpc/platforms/85xx/p1023_rdb.c
index d5b7509825de..e4fa8731fd2d 100644
--- a/arch/powerpc/platforms/85xx/p1023_rdb.c
+++ b/arch/powerpc/platforms/85xx/p1023_rdb.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright 2010-2011, 2013 Freescale Semiconductor, Inc.
*
@@ -5,11 +6,6 @@
*
* Description:
* P1023 RDB Board Setup
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#include <linux/kernel.h>
@@ -19,14 +15,13 @@
#include <linux/delay.h>
#include <linux/module.h>
#include <linux/fsl_devices.h>
-#include <linux/of_platform.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/pci-bridge.h>
#include <mm/mmu_decl.h>
-#include <asm/prom.h>
#include <asm/udbg.h>
#include <asm/mpic.h>
#include "smp.h"
@@ -41,7 +36,7 @@
* Setup the architecture
*
*/
-static void __init mpc85xx_rdb_setup_arch(void)
+static void __init p1023_rdb_setup_arch(void)
{
struct device_node *np;
@@ -87,7 +82,7 @@ static void __init mpc85xx_rdb_setup_arch(void)
machine_arch_initcall(p1023_rdb, mpc85xx_common_publish_devices);
-static void __init mpc85xx_rdb_pic_init(void)
+static void __init p1023_rdb_pic_init(void)
{
struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
MPIC_SINGLE_DEST_CPU,
@@ -98,22 +93,12 @@ static void __init mpc85xx_rdb_pic_init(void)
mpic_init(mpic);
}
-static int __init p1023_rdb_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- return of_flat_dt_is_compatible(root, "fsl,P1023RDB");
-
-}
-
define_machine(p1023_rdb) {
.name = "P1023 RDB",
- .probe = p1023_rdb_probe,
- .setup_arch = mpc85xx_rdb_setup_arch,
- .init_IRQ = mpc85xx_rdb_pic_init,
+ .compatible = "fsl,P1023RDB",
+ .setup_arch = p1023_rdb_setup_arch,
+ .init_IRQ = p1023_rdb_pic_init,
.get_irq = mpic_get_irq,
- .restart = fsl_rstcr_restart,
- .calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
#ifdef CONFIG_PCI
.pcibios_fixup_bus = fsl_pcibios_fixup_bus,
diff --git a/arch/powerpc/platforms/85xx/p2020.c b/arch/powerpc/platforms/85xx/p2020.c
new file mode 100644
index 000000000000..0e4d715145af
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/p2020.c
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Freescale P2020 board Setup
+ *
+ * Copyright 2007,2009,2012-2013 Freescale Semiconductor Inc.
+ * Copyright 2022-2023 Pali Rohár <pali@kernel.org>
+ */
+
+#include <linux/stddef.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+
+#include <asm/machdep.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include <asm/swiotlb.h>
+#include <asm/ppc-pci.h>
+
+#include <sysdev/fsl_pci.h>
+
+#include "smp.h"
+#include "mpc85xx.h"
+
+static void __init p2020_pic_init(void)
+{
+ struct mpic *mpic;
+ int flags = MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU;
+
+ mpic = mpic_alloc(NULL, 0, flags, 0, 256, " OpenPIC ");
+
+ if (WARN_ON(!mpic))
+ return;
+
+ mpic_init(mpic);
+ mpc85xx_8259_init();
+}
+
+/*
+ * Setup the architecture
+ */
+static void __init p2020_setup_arch(void)
+{
+ swiotlb_detect_4g();
+ fsl_pci_assign_primary();
+ uli_init();
+ mpc85xx_smp_init();
+ mpc85xx_qe_par_io_init();
+}
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init p2020_probe(void)
+{
+ struct device_node *p2020_cpu;
+
+ /*
+ * There is no common compatible string for all P2020 boards.
+ * The only common thing is "PowerPC,P2020@0" cpu node.
+ * So check for P2020 board via this cpu node.
+ */
+ p2020_cpu = of_find_node_by_path("/cpus/PowerPC,P2020@0");
+ of_node_put(p2020_cpu);
+
+ return !!p2020_cpu;
+}
+
+machine_arch_initcall(p2020, mpc85xx_common_publish_devices);
+
+define_machine(p2020) {
+ .name = "Freescale P2020",
+ .probe = p2020_probe,
+ .setup_arch = p2020_setup_arch,
+ .init_IRQ = p2020_pic_init,
+#ifdef CONFIG_PCI
+ .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
+ .pcibios_fixup_phb = fsl_pcibios_fixup_phb,
+#endif
+ .get_irq = mpic_get_irq,
+ .progress = udbg_progress,
+};
diff --git a/arch/powerpc/platforms/85xx/ppa8548.c b/arch/powerpc/platforms/85xx/ppa8548.c
index 3daff7c63569..acd19c52ad43 100644
--- a/arch/powerpc/platforms/85xx/ppa8548.c
+++ b/arch/powerpc/platforms/85xx/ppa8548.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* ppa8548 setup and early boot code.
*
@@ -7,11 +8,6 @@
*
* Based on the SBC8548 support - Copyright 2007 Wind River Systems Inc.
* Based on the MPC8548CDS support - Copyright 2005 Freescale Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#include <linux/stddef.h>
@@ -59,7 +55,7 @@ static void ppa8548_show_cpuinfo(struct seq_file *m)
seq_printf(m, "PLL setting\t: 0x%x\n", ((phid1 >> 24) & 0x3f));
}
-static struct of_device_id __initdata of_bus_ids[] = {
+static const struct of_device_id of_bus_ids[] __initconst = {
{ .name = "soc", },
{ .type = "soc", },
{ .compatible = "simple-bus", },
@@ -76,24 +72,12 @@ static int __init declare_of_platform_devices(void)
}
machine_device_initcall(ppa8548, declare_of_platform_devices);
-/*
- * Called very early, device-tree isn't unflattened
- */
-static int __init ppa8548_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- return of_flat_dt_is_compatible(root, "ppa8548");
-}
-
define_machine(ppa8548) {
.name = "ppa8548",
- .probe = ppa8548_probe,
+ .compatible = "ppa8548",
.setup_arch = ppa8548_setup_arch,
.init_IRQ = ppa8548_pic_init,
.show_cpuinfo = ppa8548_show_cpuinfo,
.get_irq = mpic_get_irq,
- .restart = fsl_rstcr_restart,
- .calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
};
diff --git a/arch/powerpc/platforms/85xx/qemu_e500.c b/arch/powerpc/platforms/85xx/qemu_e500.c
index 7f2673293549..3cd2f3bd4223 100644
--- a/arch/powerpc/platforms/85xx/qemu_e500.c
+++ b/arch/powerpc/platforms/85xx/qemu_e500.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Paravirt target for a generic QEMU e500 machine
*
@@ -8,25 +9,23 @@
* an interface contract with QEMU.
*
* Copyright 2012 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#include <linux/kernel.h>
+#include <linux/of.h>
#include <linux/of_fdt.h>
+#include <linux/pgtable.h>
#include <asm/machdep.h>
#include <asm/time.h>
#include <asm/udbg.h>
#include <asm/mpic.h>
+#include <asm/swiotlb.h>
#include <sysdev/fsl_soc.h>
#include <sysdev/fsl_pci.h>
#include "smp.h"
#include "mpc85xx.h"
-void __init qemu_e500_pic_init(void)
+static void __init qemu_e500_pic_init(void)
{
struct mpic *mpic;
unsigned int flags = MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU |
@@ -47,21 +46,11 @@ static void __init qemu_e500_setup_arch(void)
mpc85xx_smp_init();
}
-/*
- * Called very early, device-tree isn't unflattened
- */
-static int __init qemu_e500_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- return !!of_flat_dt_is_compatible(root, "fsl,qemu-e500");
-}
-
machine_arch_initcall(qemu_e500, mpc85xx_common_publish_devices);
define_machine(qemu_e500) {
.name = "QEMU e500",
- .probe = qemu_e500_probe,
+ .compatible = "fsl,qemu-e500",
.setup_arch = qemu_e500_setup_arch,
.init_IRQ = qemu_e500_pic_init,
#ifdef CONFIG_PCI
@@ -69,7 +58,6 @@ define_machine(qemu_e500) {
.pcibios_fixup_phb = fsl_pcibios_fixup_phb,
#endif
.get_irq = mpic_get_coreint_irq,
- .restart = fsl_rstcr_restart,
- .calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
+ .power_save = e500_idle,
};
diff --git a/arch/powerpc/platforms/85xx/sbc8548.c b/arch/powerpc/platforms/85xx/sbc8548.c
deleted file mode 100644
index b07214666d65..000000000000
--- a/arch/powerpc/platforms/85xx/sbc8548.c
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * Wind River SBC8548 setup and early boot code.
- *
- * Copyright 2007 Wind River Systems Inc.
- *
- * By Paul Gortmaker (see MAINTAINERS for contact information)
- *
- * Based largely on the MPC8548CDS support - Copyright 2005 Freescale Inc.
- *
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-#include <linux/stddef.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/errno.h>
-#include <linux/reboot.h>
-#include <linux/pci.h>
-#include <linux/kdev_t.h>
-#include <linux/major.h>
-#include <linux/console.h>
-#include <linux/delay.h>
-#include <linux/seq_file.h>
-#include <linux/initrd.h>
-#include <linux/interrupt.h>
-#include <linux/fsl_devices.h>
-#include <linux/of_platform.h>
-
-#include <asm/pgtable.h>
-#include <asm/page.h>
-#include <linux/atomic.h>
-#include <asm/time.h>
-#include <asm/io.h>
-#include <asm/machdep.h>
-#include <asm/ipic.h>
-#include <asm/pci-bridge.h>
-#include <asm/irq.h>
-#include <mm/mmu_decl.h>
-#include <asm/prom.h>
-#include <asm/udbg.h>
-#include <asm/mpic.h>
-
-#include <sysdev/fsl_soc.h>
-#include <sysdev/fsl_pci.h>
-
-#include "mpc85xx.h"
-
-static int sbc_rev;
-
-static void __init sbc8548_pic_init(void)
-{
- struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN,
- 0, 256, " OpenPIC ");
- BUG_ON(mpic == NULL);
- mpic_init(mpic);
-}
-
-/* Extract the HW Rev from the EPLD on the board */
-static int __init sbc8548_hw_rev(void)
-{
- struct device_node *np;
- struct resource res;
- unsigned int *rev;
- int board_rev = 0;
-
- np = of_find_compatible_node(NULL, NULL, "hw-rev");
- if (np == NULL) {
- printk("No HW-REV found in DTB.\n");
- return -ENODEV;
- }
-
- of_address_to_resource(np, 0, &res);
- of_node_put(np);
-
- rev = ioremap(res.start,sizeof(unsigned int));
- board_rev = (*rev) >> 28;
- iounmap(rev);
-
- return board_rev;
-}
-
-/*
- * Setup the architecture
- */
-static void __init sbc8548_setup_arch(void)
-{
- if (ppc_md.progress)
- ppc_md.progress("sbc8548_setup_arch()", 0);
-
- fsl_pci_assign_primary();
-
- sbc_rev = sbc8548_hw_rev();
-}
-
-static void sbc8548_show_cpuinfo(struct seq_file *m)
-{
- uint pvid, svid, phid1;
-
- pvid = mfspr(SPRN_PVR);
- svid = mfspr(SPRN_SVR);
-
- seq_printf(m, "Vendor\t\t: Wind River\n");
- seq_printf(m, "Machine\t\t: SBC8548 v%d\n", sbc_rev);
- seq_printf(m, "PVR\t\t: 0x%x\n", pvid);
- seq_printf(m, "SVR\t\t: 0x%x\n", svid);
-
- /* Display cpu Pll setting */
- phid1 = mfspr(SPRN_HID1);
- seq_printf(m, "PLL setting\t: 0x%x\n", ((phid1 >> 24) & 0x3f));
-}
-
-machine_arch_initcall(sbc8548, mpc85xx_common_publish_devices);
-
-/*
- * Called very early, device-tree isn't unflattened
- */
-static int __init sbc8548_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- return of_flat_dt_is_compatible(root, "SBC8548");
-}
-
-define_machine(sbc8548) {
- .name = "SBC8548",
- .probe = sbc8548_probe,
- .setup_arch = sbc8548_setup_arch,
- .init_IRQ = sbc8548_pic_init,
- .show_cpuinfo = sbc8548_show_cpuinfo,
- .get_irq = mpic_get_irq,
- .restart = fsl_rstcr_restart,
-#ifdef CONFIG_PCI
- .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
- .pcibios_fixup_phb = fsl_pcibios_fixup_phb,
-#endif
- .calibrate_decr = generic_calibrate_decr,
- .progress = udbg_progress,
-};
diff --git a/arch/powerpc/platforms/85xx/sgy_cts1000.c b/arch/powerpc/platforms/85xx/sgy_cts1000.c
index bb75add67084..e635b27ee718 100644
--- a/arch/powerpc/platforms/85xx/sgy_cts1000.c
+++ b/arch/powerpc/platforms/85xx/sgy_cts1000.c
@@ -1,20 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Servergy CTS-1000 Setup
*
* Maintained by Ben Collins <ben.c@servergy.com>
*
* Copyright 2012 by Servergy, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
+#define pr_fmt(fmt) "gpio-halt: " fmt
+
+#include <linux/err.h>
#include <linux/platform_device.h>
#include <linux/device.h>
+#include <linux/gpio/consumer.h>
#include <linux/module.h>
-#include <linux/of_gpio.h>
#include <linux/of_irq.h>
#include <linux/workqueue.h>
#include <linux/reboot.h>
@@ -22,9 +21,10 @@
#include <asm/machdep.h>
-static struct device_node *halt_node;
+static struct gpio_desc *halt_gpio;
+static int halt_irq;
-static struct of_device_id child_match[] = {
+static const struct of_device_id child_match[] = {
{
.compatible = "sgy,gpio-halt",
},
@@ -38,116 +38,95 @@ static void gpio_halt_wfn(struct work_struct *work)
}
static DECLARE_WORK(gpio_halt_wq, gpio_halt_wfn);
-static void gpio_halt_cb(void)
+static void __noreturn gpio_halt_cb(void)
{
- enum of_gpio_flags flags;
- int trigger, gpio;
-
- if (!halt_node)
- return;
-
- gpio = of_get_gpio_flags(halt_node, 0, &flags);
-
- if (!gpio_is_valid(gpio))
- return;
-
- trigger = (flags == OF_GPIO_ACTIVE_LOW);
-
- printk(KERN_INFO "gpio-halt: triggering GPIO.\n");
+ pr_info("triggering GPIO.\n");
/* Probably wont return */
- gpio_set_value(gpio, trigger);
+ gpiod_set_value(halt_gpio, 1);
+
+ panic("Halt failed\n");
}
/* This IRQ means someone pressed the power button and it is waiting for us
* to handle the shutdown/poweroff. */
static irqreturn_t gpio_halt_irq(int irq, void *__data)
{
- printk(KERN_INFO "gpio-halt: shutdown due to power button IRQ.\n");
+ struct platform_device *pdev = __data;
+
+ dev_info(&pdev->dev, "scheduling shutdown due to power button IRQ\n");
schedule_work(&gpio_halt_wq);
return IRQ_HANDLED;
};
-static int gpio_halt_probe(struct platform_device *pdev)
+static int __gpio_halt_probe(struct platform_device *pdev,
+ struct device_node *halt_node)
{
- enum of_gpio_flags flags;
- struct device_node *node = pdev->dev.of_node;
- int gpio, err, irq;
- int trigger;
-
- if (!node)
- return -ENODEV;
-
- /* If there's no matching child, this isn't really an error */
- halt_node = of_find_matching_node(node, child_match);
- if (!halt_node)
- return 0;
+ int err;
- /* Technically we could just read the first one, but punish
- * DT writers for invalid form. */
- if (of_gpio_count(halt_node) != 1)
- return -EINVAL;
-
- /* Get the gpio number relative to the dynamic base. */
- gpio = of_get_gpio_flags(halt_node, 0, &flags);
- if (!gpio_is_valid(gpio))
- return -EINVAL;
-
- err = gpio_request(gpio, "gpio-halt");
+ halt_gpio = fwnode_gpiod_get_index(of_fwnode_handle(halt_node),
+ NULL, 0, GPIOD_OUT_LOW, "gpio-halt");
+ err = PTR_ERR_OR_ZERO(halt_gpio);
if (err) {
- printk(KERN_ERR "gpio-halt: error requesting GPIO %d.\n",
- gpio);
- halt_node = NULL;
+ dev_err(&pdev->dev, "failed to request halt GPIO: %d\n", err);
return err;
}
- trigger = (flags == OF_GPIO_ACTIVE_LOW);
-
- gpio_direction_output(gpio, !trigger);
-
/* Now get the IRQ which tells us when the power button is hit */
- irq = irq_of_parse_and_map(halt_node, 0);
- err = request_irq(irq, gpio_halt_irq, IRQF_TRIGGER_RISING |
- IRQF_TRIGGER_FALLING, "gpio-halt", halt_node);
+ halt_irq = irq_of_parse_and_map(halt_node, 0);
+ err = request_irq(halt_irq, gpio_halt_irq,
+ IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
+ "gpio-halt", pdev);
if (err) {
- printk(KERN_ERR "gpio-halt: error requesting IRQ %d for "
- "GPIO %d.\n", irq, gpio);
- gpio_free(gpio);
- halt_node = NULL;
+ dev_err(&pdev->dev, "failed to request IRQ %d: %d\n",
+ halt_irq, err);
+ gpiod_put(halt_gpio);
+ halt_gpio = NULL;
return err;
}
/* Register our halt function */
ppc_md.halt = gpio_halt_cb;
- ppc_md.power_off = gpio_halt_cb;
+ pm_power_off = gpio_halt_cb;
- printk(KERN_INFO "gpio-halt: registered GPIO %d (%d trigger, %d"
- " irq).\n", gpio, trigger, irq);
+ dev_info(&pdev->dev, "registered halt GPIO, irq: %d\n", halt_irq);
return 0;
}
-static int gpio_halt_remove(struct platform_device *pdev)
+static int gpio_halt_probe(struct platform_device *pdev)
{
- if (halt_node) {
- int gpio = of_get_gpio(halt_node, 0);
- int irq = irq_of_parse_and_map(halt_node, 0);
+ struct device_node *halt_node;
+ int ret;
- free_irq(irq, halt_node);
+ if (!pdev->dev.of_node)
+ return -ENODEV;
- ppc_md.halt = NULL;
- ppc_md.power_off = NULL;
+ /* If there's no matching child, this isn't really an error */
+ halt_node = of_find_matching_node(pdev->dev.of_node, child_match);
+ if (!halt_node)
+ return -ENODEV;
- gpio_free(gpio);
+ ret = __gpio_halt_probe(pdev, halt_node);
+ of_node_put(halt_node);
- halt_node = NULL;
- }
+ return ret;
+}
- return 0;
+static void gpio_halt_remove(struct platform_device *pdev)
+{
+ free_irq(halt_irq, pdev);
+ cancel_work_sync(&gpio_halt_wq);
+
+ ppc_md.halt = NULL;
+ pm_power_off = NULL;
+
+ gpiod_put(halt_gpio);
+ halt_gpio = NULL;
}
-static struct of_device_id gpio_halt_match[] = {
+static const struct of_device_id gpio_halt_match[] = {
/* We match on the gpio bus itself and scan the children since they
* wont be matched against us. We know the bus wont match until it
* has been registered too. */
@@ -161,7 +140,6 @@ MODULE_DEVICE_TABLE(of, gpio_halt_match);
static struct platform_driver gpio_halt_driver = {
.driver = {
.name = "gpio-halt",
- .owner = THIS_MODULE,
.of_match_table = gpio_halt_match,
},
.probe = gpio_halt_probe,
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index d7c1e69f3070..32fa5fb557c0 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -1,34 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Author: Andy Fleming <afleming@freescale.com>
* Kumar Gala <galak@kernel.crashing.org>
*
- * Copyright 2006-2008, 2011-2012 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
+ * Copyright 2006-2008, 2011-2012, 2015 Freescale Semiconductor Inc.
*/
#include <linux/stddef.h>
#include <linux/kernel.h>
+#include <linux/sched/hotplug.h>
#include <linux/init.h>
#include <linux/delay.h>
#include <linux/of.h>
-#include <linux/of_address.h>
#include <linux/kexec.h>
#include <linux/highmem.h>
#include <linux/cpu.h>
+#include <linux/fsl/guts.h>
+#include <linux/pgtable.h>
#include <asm/machdep.h>
-#include <asm/pgtable.h>
#include <asm/page.h>
#include <asm/mpic.h>
#include <asm/cacheflush.h>
#include <asm/dbell.h>
-#include <asm/fsl_guts.h>
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
#include <asm/cputhreads.h>
+#include <asm/fsl_pm.h>
#include <sysdev/fsl_soc.h>
#include <sysdev/mpic.h>
@@ -43,35 +40,22 @@ struct epapr_spin_table {
u32 pir;
};
-static struct ccsr_guts __iomem *guts;
static u64 timebase;
static int tb_req;
static int tb_valid;
-static void mpc85xx_timebase_freeze(int freeze)
-{
- uint32_t mask;
-
- mask = CCSR_GUTS_DEVDISR_TB0 | CCSR_GUTS_DEVDISR_TB1;
- if (freeze)
- setbits32(&guts->devdisr, mask);
- else
- clrbits32(&guts->devdisr, mask);
-
- in_be32(&guts->devdisr);
-}
-
static void mpc85xx_give_timebase(void)
{
unsigned long flags;
local_irq_save(flags);
+ hard_irq_disable();
while (!tb_req)
barrier();
tb_req = 0;
- mpc85xx_timebase_freeze(1);
+ qoriq_pm_ops->freeze_time_base(true);
#ifdef CONFIG_PPC64
/*
* e5500/e6500 have a workaround for erratum A-006958 in place
@@ -104,7 +88,7 @@ static void mpc85xx_give_timebase(void)
while (tb_valid)
barrier();
- mpc85xx_timebase_freeze(0);
+ qoriq_pm_ops->freeze_time_base(false);
local_irq_restore(flags);
}
@@ -114,6 +98,7 @@ static void mpc85xx_take_timebase(void)
unsigned long flags;
local_irq_save(flags);
+ hard_irq_disable();
tb_req = 1;
while (!tb_valid)
@@ -127,35 +112,54 @@ static void mpc85xx_take_timebase(void)
}
#ifdef CONFIG_HOTPLUG_CPU
-static void smp_85xx_mach_cpu_die(void)
+static void smp_85xx_cpu_offline_self(void)
{
unsigned int cpu = smp_processor_id();
- u32 tmp;
local_irq_disable();
+ hard_irq_disable();
+ /* mask all irqs to prevent cpu wakeup */
+ qoriq_pm_ops->irq_mask(cpu);
+
idle_task_exit();
- generic_set_cpu_dead(cpu);
- mb();
mtspr(SPRN_TCR, 0);
+ mtspr(SPRN_TSR, mfspr(SPRN_TSR));
- __flush_disable_L1();
- tmp = (mfspr(SPRN_HID0) & ~(HID0_DOZE|HID0_SLEEP)) | HID0_NAP;
- mtspr(SPRN_HID0, tmp);
- isync();
+ generic_set_cpu_dead(cpu);
- /* Enter NAP mode. */
- tmp = mfmsr();
- tmp |= MSR_WE;
- mb();
- mtmsr(tmp);
- isync();
+ cur_cpu_spec->cpu_down_flush();
+
+ qoriq_pm_ops->cpu_die(cpu);
while (1)
;
}
+
+static void qoriq_cpu_kill(unsigned int cpu)
+{
+ int i;
+
+ for (i = 0; i < 500; i++) {
+ if (is_cpu_dead(cpu)) {
+#ifdef CONFIG_PPC64
+ paca_ptrs[cpu]->cpu_start = 0;
+#endif
+ return;
+ }
+ msleep(20);
+ }
+ pr_err("CPU%d didn't die...\n", cpu);
+}
#endif
+/*
+ * To keep it compatible with old boot program which uses
+ * cache-inhibit spin table, we need to flush the cache
+ * before accessing spin table to invalidate any staled data.
+ * We also need to flush the cache after writing to spin
+ * table to push data out.
+ */
static inline void flush_spin_table(void *spin_table)
{
flush_dcache_range((ulong)spin_table,
@@ -173,65 +177,28 @@ static inline u32 read_spin_table_addr_l(void *spin_table)
static void wake_hw_thread(void *info)
{
void fsl_secondary_thread_init(void);
- unsigned long imsr1, inia1;
- int nr = *(const int *)info;
-
- imsr1 = MSR_KERNEL;
- inia1 = *(unsigned long *)fsl_secondary_thread_init;
+ unsigned long inia;
+ int cpu = *(const int *)info;
- mttmr(TMRN_IMSR1, imsr1);
- mttmr(TMRN_INIA1, inia1);
- mtspr(SPRN_TENS, TEN_THREAD(1));
-
- smp_generic_kick_cpu(nr);
+ inia = ppc_function_entry(fsl_secondary_thread_init);
+ book3e_start_thread(cpu_thread_in_core(cpu), inia);
}
#endif
-static int smp_85xx_kick_cpu(int nr)
+static int smp_85xx_start_cpu(int cpu)
{
- unsigned long flags;
- const u64 *cpu_rel_addr;
- __iomem struct epapr_spin_table *spin_table;
+ int ret = 0;
struct device_node *np;
- int hw_cpu = get_hard_smp_processor_id(nr);
+ const u64 *cpu_rel_addr;
+ unsigned long flags;
int ioremappable;
- int ret = 0;
-
- WARN_ON(nr < 0 || nr >= NR_CPUS);
- WARN_ON(hw_cpu < 0 || hw_cpu >= NR_CPUS);
+ int hw_cpu = get_hard_smp_processor_id(cpu);
+ struct epapr_spin_table __iomem *spin_table;
- pr_debug("smp_85xx_kick_cpu: kick CPU #%d\n", nr);
-
-#ifdef CONFIG_PPC64
- /* Threads don't use the spin table */
- if (cpu_thread_in_core(nr) != 0) {
- int primary = cpu_first_thread_sibling(nr);
-
- if (WARN_ON_ONCE(!cpu_has_feature(CPU_FTR_SMT)))
- return -ENOENT;
-
- if (cpu_thread_in_core(nr) != 1) {
- pr_err("%s: cpu %d: invalid hw thread %d\n",
- __func__, nr, cpu_thread_in_core(nr));
- return -ENOENT;
- }
-
- if (!cpu_online(primary)) {
- pr_err("%s: cpu %d: primary %d not online\n",
- __func__, nr, primary);
- return -ENOENT;
- }
-
- smp_call_function_single(primary, wake_hw_thread, &nr, 0);
- return 0;
- }
-#endif
-
- np = of_get_cpu_node(nr, NULL);
+ np = of_get_cpu_node(cpu, NULL);
cpu_rel_addr = of_get_property(np, "cpu-release-addr", NULL);
-
- if (cpu_rel_addr == NULL) {
- printk(KERN_ERR "No cpu-release-addr for cpu %d\n", nr);
+ if (!cpu_rel_addr) {
+ pr_err("No cpu-release-addr for cpu %d\n", cpu);
return -ENOENT;
}
@@ -241,38 +208,28 @@ static int smp_85xx_kick_cpu(int nr)
* The bootpage and highmem can be accessed via ioremap(), but
* we need to directly access the spinloop if its in lowmem.
*/
- ioremappable = *cpu_rel_addr > virt_to_phys(high_memory);
+ ioremappable = *cpu_rel_addr > virt_to_phys(high_memory - 1);
/* Map the spin table */
if (ioremappable)
- spin_table = ioremap_prot(*cpu_rel_addr,
- sizeof(struct epapr_spin_table), _PAGE_COHERENT);
+ spin_table = ioremap_coherent(*cpu_rel_addr,
+ sizeof(struct epapr_spin_table));
else
spin_table = phys_to_virt(*cpu_rel_addr);
local_irq_save(flags);
-#ifdef CONFIG_PPC32
-#ifdef CONFIG_HOTPLUG_CPU
- /* Corresponding to generic_set_cpu_dead() */
- generic_set_cpu_up(nr);
+ hard_irq_disable();
- if (system_state == SYSTEM_RUNNING) {
- /*
- * To keep it compatible with old boot program which uses
- * cache-inhibit spin table, we need to flush the cache
- * before accessing spin table to invalidate any staled data.
- * We also need to flush the cache after writing to spin
- * table to push data out.
- */
- flush_spin_table(spin_table);
- out_be32(&spin_table->addr_l, 0);
- flush_spin_table(spin_table);
+ if (qoriq_pm_ops && qoriq_pm_ops->cpu_up_prepare)
+ qoriq_pm_ops->cpu_up_prepare(cpu);
+ /* if cpu is not spinning, reset it */
+ if (read_spin_table_addr_l(spin_table) != 1) {
/*
* We don't set the BPTR register here since it already points
* to the boot page properly.
*/
- mpic_reset_core(nr);
+ mpic_reset_core(cpu);
/*
* wait until core is ready...
@@ -282,40 +239,32 @@ static int smp_85xx_kick_cpu(int nr)
if (!spin_event_timeout(
read_spin_table_addr_l(spin_table) == 1,
10000, 100)) {
- pr_err("%s: timeout waiting for core %d to reset\n",
- __func__, hw_cpu);
- ret = -ENOENT;
- goto out;
+ pr_err("timeout waiting for cpu %d to reset\n",
+ hw_cpu);
+ ret = -EAGAIN;
+ goto err;
}
-
- /* clear the acknowledge status */
- __secondary_hold_acknowledge = -1;
- }
-#endif
- flush_spin_table(spin_table);
- out_be32(&spin_table->pir, hw_cpu);
- out_be32(&spin_table->addr_l, __pa(__early_start));
- flush_spin_table(spin_table);
-
- /* Wait a bit for the CPU to ack. */
- if (!spin_event_timeout(__secondary_hold_acknowledge == hw_cpu,
- 10000, 100)) {
- pr_err("%s: timeout waiting for core %d to ack\n",
- __func__, hw_cpu);
- ret = -ENOENT;
- goto out;
}
-out:
-#else
- smp_generic_kick_cpu(nr);
flush_spin_table(spin_table);
out_be32(&spin_table->pir, hw_cpu);
+#ifdef CONFIG_PPC64
out_be64((u64 *)(&spin_table->addr_h),
__pa(ppc_function_entry(generic_secondary_smp_init)));
- flush_spin_table(spin_table);
+#else
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+ /*
+ * We need also to write addr_h to spin table for systems
+ * in which their physical memory start address was configured
+ * to above 4G, otherwise the secondary core can not get
+ * correct entry to start from.
+ */
+ out_be32(&spin_table->addr_h, __pa(__early_start) >> 32);
#endif
-
+ out_be32(&spin_table->addr_l, __pa(__early_start));
+#endif
+ flush_spin_table(spin_table);
+err:
local_irq_restore(flags);
if (ioremappable)
@@ -324,27 +273,105 @@ out:
return ret;
}
+static int smp_85xx_kick_cpu(int nr)
+{
+ int ret = 0;
+#ifdef CONFIG_PPC64
+ int primary = nr;
+#endif
+
+ WARN_ON(nr < 0 || nr >= num_possible_cpus());
+
+ pr_debug("kick CPU #%d\n", nr);
+
+#ifdef CONFIG_PPC64
+ if (threads_per_core == 2) {
+ if (WARN_ON_ONCE(!cpu_has_feature(CPU_FTR_SMT)))
+ return -ENOENT;
+
+ booting_thread_hwid = cpu_thread_in_core(nr);
+ primary = cpu_first_thread_sibling(nr);
+
+ if (qoriq_pm_ops && qoriq_pm_ops->cpu_up_prepare)
+ qoriq_pm_ops->cpu_up_prepare(nr);
+
+ /*
+ * If either thread in the core is online, use it to start
+ * the other.
+ */
+ if (cpu_online(primary)) {
+ smp_call_function_single(primary,
+ wake_hw_thread, &nr, 1);
+ goto done;
+ } else if (cpu_online(primary + 1)) {
+ smp_call_function_single(primary + 1,
+ wake_hw_thread, &nr, 1);
+ goto done;
+ }
+
+ /*
+ * If getting here, it means both threads in the core are
+ * offline. So start the primary thread, then it will start
+ * the thread specified in booting_thread_hwid, the one
+ * corresponding to nr.
+ */
+
+ } else if (threads_per_core == 1) {
+ /*
+ * If one core has only one thread, set booting_thread_hwid to
+ * an invalid value.
+ */
+ booting_thread_hwid = INVALID_THREAD_HWID;
+
+ } else if (threads_per_core > 2) {
+ pr_err("Do not support more than 2 threads per CPU.");
+ return -EINVAL;
+ }
+
+ ret = smp_85xx_start_cpu(primary);
+ if (ret)
+ return ret;
+
+done:
+ paca_ptrs[nr]->cpu_start = 1;
+ generic_set_cpu_up(nr);
+
+ return ret;
+#else
+ ret = smp_85xx_start_cpu(nr);
+ if (ret)
+ return ret;
+
+ generic_set_cpu_up(nr);
+
+ return ret;
+#endif
+}
+
struct smp_ops_t smp_85xx_ops = {
+ .cause_nmi_ipi = NULL,
.kick_cpu = smp_85xx_kick_cpu,
.cpu_bootable = smp_generic_cpu_bootable,
#ifdef CONFIG_HOTPLUG_CPU
.cpu_disable = generic_cpu_disable,
.cpu_die = generic_cpu_die,
#endif
-#ifdef CONFIG_KEXEC
+#if defined(CONFIG_KEXEC_CORE) && !defined(CONFIG_PPC64)
.give_timebase = smp_generic_give_timebase,
.take_timebase = smp_generic_take_timebase,
#endif
};
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
+#ifdef CONFIG_PPC32
atomic_t kexec_down_cpus = ATOMIC_INIT(0);
-void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary)
+static void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary)
{
local_irq_disable();
if (secondary) {
+ cur_cpu_spec->cpu_down_flush();
atomic_inc(&kexec_down_cpus);
/* loop forever */
while (1);
@@ -356,62 +383,70 @@ static void mpc85xx_smp_kexec_down(void *arg)
if (ppc_md.kexec_cpu_down)
ppc_md.kexec_cpu_down(0,1);
}
-
-static void map_and_flush(unsigned long paddr)
+#else
+static void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary)
{
- struct page *page = pfn_to_page(paddr >> PAGE_SHIFT);
- unsigned long kaddr = (unsigned long)kmap(page);
+ int cpu = smp_processor_id();
+ int sibling = cpu_last_thread_sibling(cpu);
+ bool notified = false;
+ int disable_cpu;
+ int disable_threadbit = 0;
+ long start = mftb();
+ long now;
- flush_dcache_range(kaddr, kaddr + PAGE_SIZE);
- kunmap(page);
-}
+ local_irq_disable();
+ hard_irq_disable();
+ mpic_teardown_this_cpu(secondary);
-/**
- * Before we reset the other cores, we need to flush relevant cache
- * out to memory so we don't get anything corrupted, some of these flushes
- * are performed out of an overabundance of caution as interrupts are not
- * disabled yet and we can switch cores
- */
-static void mpc85xx_smp_flush_dcache_kexec(struct kimage *image)
-{
- kimage_entry_t *ptr, entry;
- unsigned long paddr;
- int i;
+#ifdef CONFIG_CRASH_DUMP
+ if (cpu == crashing_cpu && cpu_thread_in_core(cpu) != 0) {
+ /*
+ * We enter the crash kernel on whatever cpu crashed,
+ * even if it's a secondary thread. If that's the case,
+ * disable the corresponding primary thread.
+ */
+ disable_threadbit = 1;
+ disable_cpu = cpu_first_thread_sibling(cpu);
+ } else if (sibling == crashing_cpu) {
+ return;
+ }
+#endif
+ if (cpu_thread_in_core(cpu) == 0 && cpu_thread_in_core(sibling) != 0) {
+ disable_threadbit = 2;
+ disable_cpu = sibling;
+ }
- if (image->type == KEXEC_TYPE_DEFAULT) {
- /* normal kexec images are stored in temporary pages */
- for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE);
- ptr = (entry & IND_INDIRECTION) ?
- phys_to_virt(entry & PAGE_MASK) : ptr + 1) {
- if (!(entry & IND_DESTINATION)) {
- map_and_flush(entry);
+ if (disable_threadbit) {
+ while (paca_ptrs[disable_cpu]->kexec_state < KEXEC_STATE_REAL_MODE) {
+ barrier();
+ now = mftb();
+ if (!notified && now - start > 1000000) {
+ pr_info("%s/%d: waiting for cpu %d to enter KEXEC_STATE_REAL_MODE (%d)\n",
+ __func__, smp_processor_id(),
+ disable_cpu,
+ paca_ptrs[disable_cpu]->kexec_state);
+ notified = true;
}
}
- /* flush out last IND_DONE page */
- map_and_flush(entry);
- } else {
- /* crash type kexec images are copied to the crash region */
- for (i = 0; i < image->nr_segments; i++) {
- struct kexec_segment *seg = &image->segment[i];
- for (paddr = seg->mem; paddr < seg->mem + seg->memsz;
- paddr += PAGE_SIZE) {
- map_and_flush(paddr);
- }
+
+ if (notified) {
+ pr_info("%s: cpu %d done waiting\n",
+ __func__, disable_cpu);
}
- }
- /* also flush the kimage struct to be passed in as well */
- flush_dcache_range((unsigned long)image,
- (unsigned long)image + sizeof(*image));
+ mtspr(SPRN_TENC, disable_threadbit);
+ while (mfspr(SPRN_TENSR) & disable_threadbit)
+ cpu_relax();
+ }
}
+#endif
static void mpc85xx_smp_machine_kexec(struct kimage *image)
{
+#ifdef CONFIG_PPC32
int timeout = INT_MAX;
int i, num_cpus = num_present_cpus();
- mpc85xx_smp_flush_dcache_kexec(image);
-
if (image->type == KEXEC_TYPE_DEFAULT)
smp_call_function(mpc85xx_smp_kexec_down, NULL, 0);
@@ -429,33 +464,17 @@ static void mpc85xx_smp_machine_kexec(struct kimage *image)
if ( i == smp_processor_id() ) continue;
mpic_reset_core(i);
}
+#endif
default_machine_kexec(image);
}
-#endif /* CONFIG_KEXEC */
-
-static void smp_85xx_basic_setup(int cpu_nr)
-{
- if (cpu_has_feature(CPU_FTR_DBELL))
- doorbell_setup_this_cpu();
-}
+#endif /* CONFIG_KEXEC_CORE */
static void smp_85xx_setup_cpu(int cpu_nr)
{
mpic_setup_this_cpu();
- smp_85xx_basic_setup(cpu_nr);
}
-static const struct of_device_id mpc85xx_smp_guts_ids[] = {
- { .compatible = "fsl,mpc8572-guts", },
- { .compatible = "fsl,p1020-guts", },
- { .compatible = "fsl,p1021-guts", },
- { .compatible = "fsl,p1022-guts", },
- { .compatible = "fsl,p1023-guts", },
- { .compatible = "fsl,p2020-guts", },
- {},
-};
-
void __init mpc85xx_smp_init(void)
{
struct device_node *np;
@@ -467,7 +486,7 @@ void __init mpc85xx_smp_init(void)
smp_85xx_ops.setup_cpu = smp_85xx_setup_cpu;
smp_85xx_ops.message_pass = smp_mpic_message_pass;
} else
- smp_85xx_ops.setup_cpu = smp_85xx_basic_setup;
+ smp_85xx_ops.setup_cpu = NULL;
if (cpu_has_feature(CPU_FTR_DBELL)) {
/*
@@ -475,29 +494,28 @@ void __init mpc85xx_smp_init(void)
* smp_muxed_ipi_message_pass
*/
smp_85xx_ops.message_pass = NULL;
- smp_85xx_ops.cause_ipi = doorbell_cause_ipi;
+ smp_85xx_ops.cause_ipi = doorbell_global_ipi;
smp_85xx_ops.probe = NULL;
}
- np = of_find_matching_node(NULL, mpc85xx_smp_guts_ids);
- if (np) {
- guts = of_iomap(np, 0);
- of_node_put(np);
- if (!guts) {
- pr_err("%s: Could not map guts node address\n",
- __func__);
- return;
- }
+#ifdef CONFIG_FSL_CORENET_RCPM
+ /* Assign a value to qoriq_pm_ops on PPC_E500MC */
+ fsl_rcpm_init();
+#else
+ /* Assign a value to qoriq_pm_ops on !PPC_E500MC */
+ mpc85xx_setup_pmc();
+#endif
+ if (qoriq_pm_ops) {
smp_85xx_ops.give_timebase = mpc85xx_give_timebase;
smp_85xx_ops.take_timebase = mpc85xx_take_timebase;
#ifdef CONFIG_HOTPLUG_CPU
- ppc_md.cpu_die = smp_85xx_mach_cpu_die;
+ smp_85xx_ops.cpu_offline_self = smp_85xx_cpu_offline_self;
+ smp_85xx_ops.cpu_die = qoriq_cpu_kill;
#endif
}
-
smp_ops = &smp_85xx_ops;
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
ppc_md.kexec_cpu_down = mpc85xx_smp_kexec_cpu_down;
ppc_md.machine_kexec = mpc85xx_smp_machine_kexec;
#endif
diff --git a/arch/powerpc/platforms/85xx/smp.h b/arch/powerpc/platforms/85xx/smp.h
index e2b44933ff19..3936ff6dfbdb 100644
--- a/arch/powerpc/platforms/85xx/smp.h
+++ b/arch/powerpc/platforms/85xx/smp.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef POWERPC_85XX_SMP_H_
#define POWERPC_85XX_SMP_H_ 1
@@ -5,6 +6,7 @@
#ifdef CONFIG_SMP
void __init mpc85xx_smp_init(void);
+int __init mpc85xx_setup_pmc(void);
#else
static inline void mpc85xx_smp_init(void)
{
diff --git a/arch/powerpc/platforms/85xx/socrates.c b/arch/powerpc/platforms/85xx/socrates.c
index ae368e0e1076..403367b318db 100644
--- a/arch/powerpc/platforms/85xx/socrates.c
+++ b/arch/powerpc/platforms/85xx/socrates.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (c) 2008 Emcraft Systems
* Sergei Poselenov <sposelenov@emcraft.com>
@@ -14,11 +15,6 @@
* Based on original work by
* Kumar Gala <kumar.gala@freescale.com>
* Copyright 2004 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#include <linux/stddef.h>
@@ -27,13 +23,12 @@
#include <linux/kdev_t.h>
#include <linux/delay.h>
#include <linux/seq_file.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/pci-bridge.h>
#include <asm/mpic.h>
-#include <asm/prom.h>
#include <mm/mmu_decl.h>
#include <asm/udbg.h>
@@ -74,26 +69,11 @@ static void __init socrates_setup_arch(void)
machine_arch_initcall(socrates, mpc85xx_common_publish_devices);
-/*
- * Called very early, device-tree isn't unflattened
- */
-static int __init socrates_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- if (of_flat_dt_is_compatible(root, "abb,socrates"))
- return 1;
-
- return 0;
-}
-
define_machine(socrates) {
.name = "Socrates",
- .probe = socrates_probe,
+ .compatible = "abb,socrates",
.setup_arch = socrates_setup_arch,
.init_IRQ = socrates_pic_init,
.get_irq = mpic_get_irq,
- .restart = fsl_rstcr_restart,
- .calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
};
diff --git a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
index 55a9682b9529..4b69fb321a68 100644
--- a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
+++ b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c
@@ -1,19 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2008 Ilya Yanok, Emcraft Systems
- *
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
*/
#include <linux/irq.h>
#include <linux/of_address.h>
#include <linux/of_irq.h>
-#include <linux/of_platform.h>
#include <linux/io.h>
+#include "socrates_fpga_pic.h"
+
/*
* The FPGA supports 9 interrupt sources, which can be routed to 3
* interrupt request lines of the MPIC. The line to be used can be
@@ -78,7 +74,7 @@ static inline unsigned int socrates_fpga_pic_get_irq(unsigned int irq)
break;
}
if (i == 3)
- return NO_IRQ;
+ return 0;
raw_spin_lock_irqsave(&socrates_fpga_pic_lock, flags);
cause = socrates_fpga_pic_read(FPGA_PIC_IRQMASK(i));
@@ -87,13 +83,14 @@ static inline unsigned int socrates_fpga_pic_get_irq(unsigned int irq)
if (cause >> (i + 16))
break;
}
- return irq_linear_revmap(socrates_fpga_pic_irq_host,
+ return irq_find_mapping(socrates_fpga_pic_irq_host,
(irq_hw_number_t)i);
}
-void socrates_fpga_pic_cascade(unsigned int irq, struct irq_desc *desc)
+static void socrates_fpga_pic_cascade(struct irq_desc *desc)
{
struct irq_chip *chip = irq_desc_get_chip(desc);
+ unsigned int irq = irq_desc_get_irq(desc);
unsigned int cascade_irq;
/*
@@ -102,7 +99,7 @@ void socrates_fpga_pic_cascade(unsigned int irq, struct irq_desc *desc)
*/
cascade_irq = socrates_fpga_pic_get_irq(irq);
- if (cascade_irq != NO_IRQ)
+ if (cascade_irq)
generic_handle_irq(cascade_irq);
chip->irq_eoi(&desc->irq_data);
}
@@ -251,8 +248,7 @@ static int socrates_fpga_pic_host_xlate(struct irq_domain *h,
/* type is configurable */
if (intspec[1] != IRQ_TYPE_LEVEL_LOW &&
intspec[1] != IRQ_TYPE_LEVEL_HIGH) {
- pr_warning("FPGA PIC: invalid irq type, "
- "setting default active low\n");
+ pr_warn("FPGA PIC: invalid irq type, setting default active low\n");
*out_flags = IRQ_TYPE_LEVEL_LOW;
} else {
*out_flags = intspec[1];
@@ -266,7 +262,7 @@ static int socrates_fpga_pic_host_xlate(struct irq_domain *h,
if (intspec[2] <= 2)
fpga_irq->irq_line = intspec[2];
else
- pr_warning("FPGA PIC: invalid irq routing\n");
+ pr_warn("FPGA PIC: invalid irq routing\n");
return 0;
}
@@ -276,13 +272,13 @@ static const struct irq_domain_ops socrates_fpga_pic_host_ops = {
.xlate = socrates_fpga_pic_host_xlate,
};
-void socrates_fpga_pic_init(struct device_node *pic)
+void __init socrates_fpga_pic_init(struct device_node *pic)
{
unsigned long flags;
int i;
/* Setup an irq_domain structure */
- socrates_fpga_pic_irq_host = irq_domain_add_linear(pic,
+ socrates_fpga_pic_irq_host = irq_domain_create_linear(of_fwnode_handle(pic),
SOCRATES_FPGA_NUM_IRQS, &socrates_fpga_pic_host_ops, NULL);
if (socrates_fpga_pic_irq_host == NULL) {
pr_err("FPGA PIC: Unable to allocate host\n");
@@ -291,8 +287,8 @@ void socrates_fpga_pic_init(struct device_node *pic)
for (i = 0; i < 3; i++) {
socrates_fpga_irqs[i] = irq_of_parse_and_map(pic, i);
- if (socrates_fpga_irqs[i] == NO_IRQ) {
- pr_warning("FPGA PIC: can't get irq%d.\n", i);
+ if (!socrates_fpga_irqs[i]) {
+ pr_warn("FPGA PIC: can't get irq%d\n", i);
continue;
}
irq_set_chained_handler(socrates_fpga_irqs[i],
diff --git a/arch/powerpc/platforms/85xx/socrates_fpga_pic.h b/arch/powerpc/platforms/85xx/socrates_fpga_pic.h
index 21d7d8e42199..c50b23794a06 100644
--- a/arch/powerpc/platforms/85xx/socrates_fpga_pic.h
+++ b/arch/powerpc/platforms/85xx/socrates_fpga_pic.h
@@ -1,16 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (C) 2008 Ilya Yanok, Emcraft Systems
- *
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
*/
#ifndef SOCRATES_FPGA_PIC_H
#define SOCRATES_FPGA_PIC_H
-void socrates_fpga_pic_init(struct device_node *pic);
+void __init socrates_fpga_pic_init(struct device_node *pic);
#endif
diff --git a/arch/powerpc/platforms/85xx/stx_gp3.c b/arch/powerpc/platforms/85xx/stx_gp3.c
index 6f4939b6309e..c10efc45894c 100644
--- a/arch/powerpc/platforms/85xx/stx_gp3.c
+++ b/arch/powerpc/platforms/85xx/stx_gp3.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Based on MPC8560 ADS and arch/ppc stx_gp3 ports
*
@@ -13,11 +14,6 @@
*
* Ported to 2.6, Matt Porter <mporter@kernel.crashing.org>
* Copyright 2004-2005 MontaVista Software, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#include <linux/stddef.h>
@@ -26,13 +22,12 @@
#include <linux/kdev_t.h>
#include <linux/delay.h>
#include <linux/seq_file.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/pci-bridge.h>
#include <asm/mpic.h>
-#include <asm/prom.h>
#include <mm/mmu_decl.h>
#include <asm/udbg.h>
@@ -88,24 +83,12 @@ static void stx_gp3_show_cpuinfo(struct seq_file *m)
machine_arch_initcall(stx_gp3, mpc85xx_common_publish_devices);
-/*
- * Called very early, device-tree isn't unflattened
- */
-static int __init stx_gp3_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- return of_flat_dt_is_compatible(root, "stx,gp3-8560");
-}
-
define_machine(stx_gp3) {
.name = "STX GP3",
- .probe = stx_gp3_probe,
+ .compatible = "stx,gp3-8560",
.setup_arch = stx_gp3_setup_arch,
.init_IRQ = stx_gp3_pic_init,
.show_cpuinfo = stx_gp3_show_cpuinfo,
.get_irq = mpic_get_irq,
- .restart = fsl_rstcr_restart,
- .calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
};
diff --git a/arch/powerpc/platforms/85xx/t1042rdb_diu.c b/arch/powerpc/platforms/85xx/t1042rdb_diu.c
new file mode 100644
index 000000000000..d4fbb6eff38a
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/t1042rdb_diu.c
@@ -0,0 +1,153 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * T1042 platform DIU operation
+ *
+ * Copyright 2014 Freescale Semiconductor Inc.
+ */
+
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+
+#include <sysdev/fsl_soc.h>
+
+/*DIU Pixel ClockCR offset in scfg*/
+#define CCSR_SCFG_PIXCLKCR 0x28
+
+/* DIU Pixel Clock bits of the PIXCLKCR */
+#define PIXCLKCR_PXCKEN 0x80000000
+#define PIXCLKCR_PXCKINV 0x40000000
+#define PIXCLKCR_PXCKDLY 0x0000FF00
+#define PIXCLKCR_PXCLK_MASK 0x00FF0000
+
+/* Some CPLD register definitions */
+#define CPLD_DIUCSR 0x16
+#define CPLD_DIUCSR_DVIEN 0x80
+#define CPLD_DIUCSR_BACKLIGHT 0x0f
+
+struct device_node *cpld_node;
+
+/**
+ * t1042rdb_set_monitor_port: switch the output to a different monitor port
+ */
+static void t1042rdb_set_monitor_port(enum fsl_diu_monitor_port port)
+{
+ void __iomem *cpld_base;
+
+ cpld_base = of_iomap(cpld_node, 0);
+ if (!cpld_base) {
+ pr_err("%s: Could not map cpld registers\n", __func__);
+ goto exit;
+ }
+
+ switch (port) {
+ case FSL_DIU_PORT_DVI:
+ /* Enable the DVI(HDMI) port, disable the DFP and
+ * the backlight
+ */
+ clrbits8(cpld_base + CPLD_DIUCSR, CPLD_DIUCSR_DVIEN);
+ break;
+ case FSL_DIU_PORT_LVDS:
+ /*
+ * LVDS also needs backlight enabled, otherwise the display
+ * will be blank.
+ */
+ /* Enable the DFP port, disable the DVI*/
+ setbits8(cpld_base + CPLD_DIUCSR, 0x01 << 8);
+ setbits8(cpld_base + CPLD_DIUCSR, 0x01 << 4);
+ setbits8(cpld_base + CPLD_DIUCSR, CPLD_DIUCSR_BACKLIGHT);
+ break;
+ default:
+ pr_err("%s: Unsupported monitor port %i\n", __func__, port);
+ }
+
+ iounmap(cpld_base);
+exit:
+ of_node_put(cpld_node);
+}
+
+/**
+ * t1042rdb_set_pixel_clock: program the DIU's clock
+ * @pixclock: pixel clock in ps (pico seconds)
+ */
+static void t1042rdb_set_pixel_clock(unsigned int pixclock)
+{
+ struct device_node *scfg_np;
+ void __iomem *scfg;
+ unsigned long freq;
+ u64 temp;
+ u32 pxclk;
+
+ scfg_np = of_find_compatible_node(NULL, NULL, "fsl,t1040-scfg");
+ if (!scfg_np) {
+ pr_err("%s: Missing scfg node. Can not display video.\n",
+ __func__);
+ return;
+ }
+
+ scfg = of_iomap(scfg_np, 0);
+ of_node_put(scfg_np);
+ if (!scfg) {
+ pr_err("%s: Could not map device. Can not display video.\n",
+ __func__);
+ return;
+ }
+
+ /* Convert pixclock into frequency */
+ temp = 1000000000000ULL;
+ do_div(temp, pixclock);
+ freq = temp;
+
+ /*
+ * 'pxclk' is the ratio of the platform clock to the pixel clock.
+ * This number is programmed into the PIXCLKCR register, and the valid
+ * range of values is 2-255.
+ */
+ pxclk = DIV_ROUND_CLOSEST(fsl_get_sys_freq(), freq);
+ pxclk = clamp_t(u32, pxclk, 2, 255);
+
+ /* Disable the pixel clock, and set it to non-inverted and no delay */
+ clrbits32(scfg + CCSR_SCFG_PIXCLKCR,
+ PIXCLKCR_PXCKEN | PIXCLKCR_PXCKDLY | PIXCLKCR_PXCLK_MASK);
+
+ /* Enable the clock and set the pxclk */
+ setbits32(scfg + CCSR_SCFG_PIXCLKCR, PIXCLKCR_PXCKEN | (pxclk << 16));
+
+ iounmap(scfg);
+}
+
+/**
+ * t1042rdb_valid_monitor_port: set the monitor port for sysfs
+ */
+static enum fsl_diu_monitor_port
+t1042rdb_valid_monitor_port(enum fsl_diu_monitor_port port)
+{
+ switch (port) {
+ case FSL_DIU_PORT_DVI:
+ case FSL_DIU_PORT_LVDS:
+ return port;
+ default:
+ return FSL_DIU_PORT_DVI; /* Dual-link LVDS is not supported */
+ }
+}
+
+static int __init t1042rdb_diu_init(void)
+{
+ cpld_node = of_find_compatible_node(NULL, NULL, "fsl,t1042rdb-cpld");
+ if (!cpld_node)
+ return 0;
+
+ diu_ops.set_monitor_port = t1042rdb_set_monitor_port;
+ diu_ops.set_pixel_clock = t1042rdb_set_pixel_clock;
+ diu_ops.valid_monitor_port = t1042rdb_valid_monitor_port;
+
+ return 0;
+}
+
+early_initcall(t1042rdb_diu_init);
+
+MODULE_DESCRIPTION("Freescale T1042 DIU driver");
+MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/platforms/85xx/tqm85xx.c b/arch/powerpc/platforms/85xx/tqm85xx.c
index ec0b7272fae2..f74d446c53f0 100644
--- a/arch/powerpc/platforms/85xx/tqm85xx.c
+++ b/arch/powerpc/platforms/85xx/tqm85xx.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Based on MPC8560 ADS and arch/ppc tqm85xx ports
*
@@ -11,11 +12,6 @@
* Based on original work by
* Kumar Gala <kumar.gala@freescale.com>
* Copyright 2004 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#include <linux/stddef.h>
@@ -24,13 +20,12 @@
#include <linux/kdev_t.h>
#include <linux/delay.h>
#include <linux/seq_file.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/pci-bridge.h>
#include <asm/mpic.h>
-#include <asm/prom.h>
#include <mm/mmu_decl.h>
#include <asm/udbg.h>
@@ -117,22 +112,12 @@ static const char * const board[] __initconst = {
NULL
};
-/*
- * Called very early, device-tree isn't unflattened
- */
-static int __init tqm85xx_probe(void)
-{
- return of_flat_dt_match(of_get_flat_dt_root(), board);
-}
-
define_machine(tqm85xx) {
.name = "TQM85xx",
- .probe = tqm85xx_probe,
+ .compatibles = board,
.setup_arch = tqm85xx_setup_arch,
.init_IRQ = tqm85xx_pic_init,
.show_cpuinfo = tqm85xx_show_cpuinfo,
.get_irq = mpic_get_irq,
- .restart = fsl_rstcr_restart,
- .calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
};
diff --git a/arch/powerpc/platforms/85xx/twr_p102x.c b/arch/powerpc/platforms/85xx/twr_p102x.c
index 1eadb6d0dc64..c0a0456f1674 100644
--- a/arch/powerpc/platforms/85xx/twr_p102x.c
+++ b/arch/powerpc/platforms/85xx/twr_p102x.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright 2010-2011, 2013 Freescale Semiconductor, Inc.
*
@@ -5,25 +6,20 @@
*
* Description:
* TWR-P102x Board Setup
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/errno.h>
+#include <linux/fsl/guts.h>
#include <linux/pci.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
#include <asm/pci-bridge.h>
#include <asm/udbg.h>
#include <asm/mpic.h>
-#include <asm/qe.h>
-#include <asm/qe_ic.h>
-#include <asm/fsl_guts.h>
+#include <soc/fsl/qe/qe.h>
#include <sysdev/fsl_soc.h>
#include <sysdev/fsl_pci.h>
@@ -35,26 +31,12 @@ static void __init twr_p1025_pic_init(void)
{
struct mpic *mpic;
-#ifdef CONFIG_QUICC_ENGINE
- struct device_node *np;
-#endif
-
mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN |
MPIC_SINGLE_DEST_CPU,
0, 256, " OpenPIC ");
BUG_ON(mpic == NULL);
mpic_init(mpic);
-
-#ifdef CONFIG_QUICC_ENGINE
- np = of_find_compatible_node(NULL, NULL, "fsl,qe-ic");
- if (np) {
- qe_ic_init(np, 0, qe_ic_cascade_low_mpic,
- qe_ic_cascade_high_mpic);
- of_node_put(np);
- } else
- pr_err("Could not find qe-ic node\n");
-#endif
}
/* ************************************************************************
@@ -64,10 +46,6 @@ static void __init twr_p1025_pic_init(void)
*/
static void __init twr_p1025_setup_arch(void)
{
-#ifdef CONFIG_QUICC_ENGINE
- struct device_node *np;
-#endif
-
if (ppc_md.progress)
ppc_md.progress("twr_p1025_setup_arch()", 0);
@@ -76,12 +54,12 @@ static void __init twr_p1025_setup_arch(void)
fsl_pci_assign_primary();
#ifdef CONFIG_QUICC_ENGINE
- mpc85xx_qe_init();
mpc85xx_qe_par_io_init();
-#if defined(CONFIG_UCC_GETH) || defined(CONFIG_SERIAL_QE)
+#if IS_ENABLED(CONFIG_UCC_GETH) || IS_ENABLED(CONFIG_SERIAL_QE)
if (machine_is(twr_p1025)) {
struct ccsr_guts __iomem *guts;
+ struct device_node *np;
np = of_find_compatible_node(NULL, NULL, "fsl,p1021-guts");
if (np) {
@@ -101,7 +79,7 @@ static void __init twr_p1025_setup_arch(void)
MPC85xx_PMUXCR_QE(12));
iounmap(guts);
-#if defined(CONFIG_SERIAL_QE)
+#if IS_ENABLED(CONFIG_SERIAL_QE)
/* On P1025TWR board, the UCC7 acted as UART port.
* However, The UCC7's CTS pin is low level in default,
* it will impact the transmission in full duplex
@@ -126,23 +104,14 @@ static void __init twr_p1025_setup_arch(void)
machine_arch_initcall(twr_p1025, mpc85xx_common_publish_devices);
-static int __init twr_p1025_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- return of_flat_dt_is_compatible(root, "fsl,TWR-P1025");
-}
-
define_machine(twr_p1025) {
.name = "TWR-P1025",
- .probe = twr_p1025_probe,
+ .compatible = "fsl,TWR-P1025",
.setup_arch = twr_p1025_setup_arch,
.init_IRQ = twr_p1025_pic_init,
#ifdef CONFIG_PCI
.pcibios_fixup_bus = fsl_pcibios_fixup_bus,
#endif
.get_irq = mpic_get_irq,
- .restart = fsl_rstcr_restart,
- .calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
};
diff --git a/arch/powerpc/platforms/85xx/xes_mpc85xx.c b/arch/powerpc/platforms/85xx/xes_mpc85xx.c
index 1a9c1085855f..2582427d8d01 100644
--- a/arch/powerpc/platforms/85xx/xes_mpc85xx.c
+++ b/arch/powerpc/platforms/85xx/xes_mpc85xx.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2009 Extreme Engineering Solutions, Inc.
*
@@ -6,10 +7,6 @@
* Based on mpc85xx_ds code from Freescale Semiconductor, Inc.
*
* Author: Nate Case <ncase@xes-inc.com>
- *
- * This is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/stddef.h>
@@ -19,13 +16,13 @@
#include <linux/delay.h>
#include <linux/seq_file.h>
#include <linux/interrupt.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/pci-bridge.h>
#include <mm/mmu_decl.h>
-#include <asm/prom.h>
#include <asm/udbg.h>
#include <asm/mpic.h>
@@ -40,7 +37,7 @@
#define MPC85xx_L2CTL_L2I 0x40000000 /* L2 flash invalidate */
#define MPC85xx_L2CTL_L2SIZ_MASK 0x30000000 /* L2 SRAM size (R/O) */
-void __init xes_mpc85xx_pic_init(void)
+static void __init xes_mpc85xx_pic_init(void)
{
struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN,
0, 256, " OpenPIC ");
@@ -48,7 +45,7 @@ void __init xes_mpc85xx_pic_init(void)
mpic_init(mpic);
}
-static void xes_mpc85xx_configure_l2(void __iomem *l2_base)
+static void __init xes_mpc85xx_configure_l2(void __iomem *l2_base)
{
volatile uint32_t ctl, tmp;
@@ -75,7 +72,7 @@ static void xes_mpc85xx_configure_l2(void __iomem *l2_base)
asm volatile("msync; isync");
}
-static void xes_mpc85xx_fixups(void)
+static void __init xes_mpc85xx_fixups(void)
{
struct device_node *np;
int err;
@@ -100,8 +97,8 @@ static void xes_mpc85xx_fixups(void)
err = of_address_to_resource(np, 0, &r[0]);
if (err) {
printk(KERN_WARNING "xes_mpc85xx: Could not get "
- "resource for device tree node '%s'",
- np->full_name);
+ "resource for device tree node '%pOF'",
+ np);
continue;
}
@@ -139,33 +136,9 @@ machine_arch_initcall(xes_mpc8572, mpc85xx_common_publish_devices);
machine_arch_initcall(xes_mpc8548, mpc85xx_common_publish_devices);
machine_arch_initcall(xes_mpc8540, mpc85xx_common_publish_devices);
-/*
- * Called very early, device-tree isn't unflattened
- */
-static int __init xes_mpc8572_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- return of_flat_dt_is_compatible(root, "xes,MPC8572");
-}
-
-static int __init xes_mpc8548_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- return of_flat_dt_is_compatible(root, "xes,MPC8548");
-}
-
-static int __init xes_mpc8540_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- return of_flat_dt_is_compatible(root, "xes,MPC8540");
-}
-
define_machine(xes_mpc8572) {
.name = "X-ES MPC8572",
- .probe = xes_mpc8572_probe,
+ .compatible = "xes,MPC8572",
.setup_arch = xes_mpc85xx_setup_arch,
.init_IRQ = xes_mpc85xx_pic_init,
#ifdef CONFIG_PCI
@@ -173,14 +146,12 @@ define_machine(xes_mpc8572) {
.pcibios_fixup_phb = fsl_pcibios_fixup_phb,
#endif
.get_irq = mpic_get_irq,
- .restart = fsl_rstcr_restart,
- .calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
};
define_machine(xes_mpc8548) {
.name = "X-ES MPC8548",
- .probe = xes_mpc8548_probe,
+ .compatible = "xes,MPC8548",
.setup_arch = xes_mpc85xx_setup_arch,
.init_IRQ = xes_mpc85xx_pic_init,
#ifdef CONFIG_PCI
@@ -188,14 +159,12 @@ define_machine(xes_mpc8548) {
.pcibios_fixup_phb = fsl_pcibios_fixup_phb,
#endif
.get_irq = mpic_get_irq,
- .restart = fsl_rstcr_restart,
- .calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
};
define_machine(xes_mpc8540) {
.name = "X-ES MPC8540",
- .probe = xes_mpc8540_probe,
+ .compatible = "xes,MPC8540",
.setup_arch = xes_mpc85xx_setup_arch,
.init_IRQ = xes_mpc85xx_pic_init,
#ifdef CONFIG_PCI
@@ -203,7 +172,5 @@ define_machine(xes_mpc8540) {
.pcibios_fixup_phb = fsl_pcibios_fixup_phb,
#endif
.get_irq = mpic_get_irq,
- .restart = fsl_rstcr_restart,
- .calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
};
diff --git a/arch/powerpc/platforms/86xx/Kconfig b/arch/powerpc/platforms/86xx/Kconfig
index 1afd1e4a2dd2..06b1e5c49d6f 100644
--- a/arch/powerpc/platforms/86xx/Kconfig
+++ b/arch/powerpc/platforms/86xx/Kconfig
@@ -1,43 +1,19 @@
-config PPC_86xx
+# SPDX-License-Identifier: GPL-2.0
menuconfig PPC_86xx
bool "86xx-based boards"
- depends on 6xx
+ depends on PPC_BOOK3S_32
select FSL_SOC
select ALTIVEC
- select ARCH_WANT_OPTIONAL_GPIOLIB
help
The Freescale E600 SoCs have 74xx cores.
if PPC_86xx
-config MPC8641_HPCN
- bool "Freescale MPC8641 HPCN"
- select PPC_I8259
- select DEFAULT_UIMAGE
- select FSL_ULI1575 if PCI
- select HAS_RAPIDIO
- select SWIOTLB
- help
- This option enables support for the MPC8641 HPCN board.
-
-config SBC8641D
- bool "Wind River SBC8641D"
- select DEFAULT_UIMAGE
- help
- This option enables support for the WRS SBC8641D board.
-
-config MPC8610_HPCD
- bool "Freescale MPC8610 HPCD"
- select DEFAULT_UIMAGE
- select FSL_ULI1575 if PCI
- help
- This option enables support for the MPC8610 HPCD board.
-
config GEF_PPC9A
bool "GE PPC9A"
select DEFAULT_UIMAGE
select MMIO_NVRAM
- select ARCH_REQUIRE_GPIOLIB
+ select GPIOLIB
select GE_FPGA
help
This option enables support for the GE PPC9A.
@@ -46,7 +22,7 @@ config GEF_SBC310
bool "GE SBC310"
select DEFAULT_UIMAGE
select MMIO_NVRAM
- select ARCH_REQUIRE_GPIOLIB
+ select GPIOLIB
select GE_FPGA
help
This option enables support for the GE SBC310.
@@ -55,26 +31,24 @@ config GEF_SBC610
bool "GE SBC610"
select DEFAULT_UIMAGE
select MMIO_NVRAM
- select ARCH_REQUIRE_GPIOLIB
+ select GPIOLIB
select GE_FPGA
- select HAS_RAPIDIO
+ select HAVE_RAPIDIO
help
This option enables support for the GE SBC610.
+config MVME7100
+ bool "Artesyn MVME7100"
+ help
+ This option enables support for the Emerson/Artesyn MVME7100 board.
+
endif
config MPC8641
bool
- select PPC_PCI_CHOICE
- select FSL_PCI if PCI
- select PPC_UDBG_16550
- select MPIC
- default y if MPC8641_HPCN || SBC8641D || GEF_SBC610 || GEF_SBC310 || GEF_PPC9A
-
-config MPC8610
- bool
- select PPC_PCI_CHOICE
+ select HAVE_PCI
select FSL_PCI if PCI
select PPC_UDBG_16550
select MPIC
- default y if MPC8610_HPCD
+ default y if GEF_SBC610 || GEF_SBC310 || GEF_PPC9A \
+ || MVME7100
diff --git a/arch/powerpc/platforms/86xx/Makefile b/arch/powerpc/platforms/86xx/Makefile
index ede815d6489d..dafbc037ff42 100644
--- a/arch/powerpc/platforms/86xx/Makefile
+++ b/arch/powerpc/platforms/86xx/Makefile
@@ -1,12 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the PowerPC 86xx linux kernel.
#
-obj-y := pic.o
+obj-y := pic.o common.o
obj-$(CONFIG_SMP) += mpc86xx_smp.o
-obj-$(CONFIG_MPC8641_HPCN) += mpc86xx_hpcn.o
-obj-$(CONFIG_SBC8641D) += sbc8641d.o
-obj-$(CONFIG_MPC8610_HPCD) += mpc8610_hpcd.o
obj-$(CONFIG_GEF_SBC610) += gef_sbc610.o
obj-$(CONFIG_GEF_SBC310) += gef_sbc310.o
obj-$(CONFIG_GEF_PPC9A) += gef_ppc9a.o
+obj-$(CONFIG_MVME7100) += mvme7100.o
diff --git a/arch/powerpc/platforms/86xx/common.c b/arch/powerpc/platforms/86xx/common.c
new file mode 100644
index 000000000000..a4a550527609
--- /dev/null
+++ b/arch/powerpc/platforms/86xx/common.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Routines common to most mpc86xx-based boards.
+ */
+
+#include <linux/init.h>
+#include <linux/mod_devicetable.h>
+#include <linux/of_platform.h>
+#include <asm/reg.h>
+#include <asm/synch.h>
+
+#include "mpc86xx.h"
+
+static const struct of_device_id mpc86xx_common_ids[] __initconst = {
+ { .type = "soc", },
+ { .compatible = "soc", },
+ { .compatible = "simple-bus", },
+ { .name = "localbus", },
+ { .compatible = "gianfar", },
+ { .compatible = "fsl,mpc8641-pcie", },
+ {},
+};
+
+int __init mpc86xx_common_publish_devices(void)
+{
+ return of_platform_bus_probe(NULL, mpc86xx_common_ids, NULL);
+}
+
+long __init mpc86xx_time_init(void)
+{
+ unsigned int temp;
+
+ /* Set the time base to zero */
+ mtspr(SPRN_TBWL, 0);
+ mtspr(SPRN_TBWU, 0);
+
+ temp = mfspr(SPRN_HID0);
+ temp |= HID0_TBEN;
+ mtspr(SPRN_HID0, temp);
+ isync();
+
+ return 0;
+}
diff --git a/arch/powerpc/platforms/86xx/gef_ppc9a.c b/arch/powerpc/platforms/86xx/gef_ppc9a.c
index c23f3443880a..f7f98cca7b91 100644
--- a/arch/powerpc/platforms/86xx/gef_ppc9a.c
+++ b/arch/powerpc/platforms/86xx/gef_ppc9a.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* GE PPC9A board support
*
@@ -5,11 +6,6 @@
*
* Copyright 2008 GE Intelligent Platforms Embedded Systems, Inc.
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
* Based on: mpc86xx_hpcn.c (MPC86xx HPCN board specific routines)
* Copyright 2006 Freescale Semiconductor Inc.
*
@@ -22,12 +18,12 @@
#include <linux/kdev_t.h>
#include <linux/delay.h>
#include <linux/seq_file.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/pci-bridge.h>
-#include <asm/prom.h>
#include <mm/mmu_decl.h>
#include <asm/udbg.h>
@@ -179,66 +175,16 @@ static void gef_ppc9a_nec_fixup(struct pci_dev *pdev)
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_USB,
gef_ppc9a_nec_fixup);
-/*
- * Called very early, device-tree isn't unflattened
- *
- * This function is called to determine whether the BSP is compatible with the
- * supplied device-tree, which is assumed to be the correct one for the actual
- * board. It is expected thati, in the future, a kernel may support multiple
- * boards.
- */
-static int __init gef_ppc9a_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- if (of_flat_dt_is_compatible(root, "gef,ppc9a"))
- return 1;
-
- return 0;
-}
-
-static long __init mpc86xx_time_init(void)
-{
- unsigned int temp;
-
- /* Set the time base to zero */
- mtspr(SPRN_TBWL, 0);
- mtspr(SPRN_TBWU, 0);
-
- temp = mfspr(SPRN_HID0);
- temp |= HID0_TBEN;
- mtspr(SPRN_HID0, temp);
- asm volatile("isync");
-
- return 0;
-}
-
-static __initdata struct of_device_id of_bus_ids[] = {
- { .compatible = "simple-bus", },
- { .compatible = "gianfar", },
- { .compatible = "fsl,mpc8641-pcie", },
- {},
-};
-
-static int __init declare_of_platform_devices(void)
-{
- printk(KERN_DEBUG "Probe platform devices\n");
- of_platform_bus_probe(NULL, of_bus_ids, NULL);
-
- return 0;
-}
-machine_arch_initcall(gef_ppc9a, declare_of_platform_devices);
+machine_arch_initcall(gef_ppc9a, mpc86xx_common_publish_devices);
define_machine(gef_ppc9a) {
.name = "GE PPC9A",
- .probe = gef_ppc9a_probe,
+ .compatible = "gef,ppc9a",
.setup_arch = gef_ppc9a_setup_arch,
.init_IRQ = gef_ppc9a_init_irq,
.show_cpuinfo = gef_ppc9a_show_cpuinfo,
.get_irq = mpic_get_irq,
- .restart = fsl_rstcr_restart,
.time_init = mpc86xx_time_init,
- .calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
#ifdef CONFIG_PCI
.pcibios_fixup_bus = fsl_pcibios_fixup_bus,
diff --git a/arch/powerpc/platforms/86xx/gef_sbc310.c b/arch/powerpc/platforms/86xx/gef_sbc310.c
index 8a6ac20686ea..689835f7f088 100644
--- a/arch/powerpc/platforms/86xx/gef_sbc310.c
+++ b/arch/powerpc/platforms/86xx/gef_sbc310.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* GE SBC310 board support
*
@@ -5,11 +6,6 @@
*
* Copyright 2008 GE Intelligent Platforms Embedded Systems, Inc.
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
* Based on: mpc86xx_hpcn.c (MPC86xx HPCN board specific routines)
* Copyright 2006 Freescale Semiconductor Inc.
*
@@ -22,12 +18,12 @@
#include <linux/kdev_t.h>
#include <linux/delay.h>
#include <linux/seq_file.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/pci-bridge.h>
-#include <asm/prom.h>
#include <mm/mmu_decl.h>
#include <asm/udbg.h>
@@ -166,66 +162,16 @@ static void gef_sbc310_nec_fixup(struct pci_dev *pdev)
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_USB,
gef_sbc310_nec_fixup);
-/*
- * Called very early, device-tree isn't unflattened
- *
- * This function is called to determine whether the BSP is compatible with the
- * supplied device-tree, which is assumed to be the correct one for the actual
- * board. It is expected thati, in the future, a kernel may support multiple
- * boards.
- */
-static int __init gef_sbc310_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- if (of_flat_dt_is_compatible(root, "gef,sbc310"))
- return 1;
-
- return 0;
-}
-
-static long __init mpc86xx_time_init(void)
-{
- unsigned int temp;
-
- /* Set the time base to zero */
- mtspr(SPRN_TBWL, 0);
- mtspr(SPRN_TBWU, 0);
-
- temp = mfspr(SPRN_HID0);
- temp |= HID0_TBEN;
- mtspr(SPRN_HID0, temp);
- asm volatile("isync");
-
- return 0;
-}
-
-static __initdata struct of_device_id of_bus_ids[] = {
- { .compatible = "simple-bus", },
- { .compatible = "gianfar", },
- { .compatible = "fsl,mpc8641-pcie", },
- {},
-};
-
-static int __init declare_of_platform_devices(void)
-{
- printk(KERN_DEBUG "Probe platform devices\n");
- of_platform_bus_probe(NULL, of_bus_ids, NULL);
-
- return 0;
-}
-machine_arch_initcall(gef_sbc310, declare_of_platform_devices);
+machine_arch_initcall(gef_sbc310, mpc86xx_common_publish_devices);
define_machine(gef_sbc310) {
.name = "GE SBC310",
- .probe = gef_sbc310_probe,
+ .compatible = "gef,sbc310",
.setup_arch = gef_sbc310_setup_arch,
.init_IRQ = gef_sbc310_init_irq,
.show_cpuinfo = gef_sbc310_show_cpuinfo,
.get_irq = mpic_get_irq,
- .restart = fsl_rstcr_restart,
.time_init = mpc86xx_time_init,
- .calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
#ifdef CONFIG_PCI
.pcibios_fixup_bus = fsl_pcibios_fixup_bus,
diff --git a/arch/powerpc/platforms/86xx/gef_sbc610.c b/arch/powerpc/platforms/86xx/gef_sbc610.c
index 06c72636f299..365f511186ca 100644
--- a/arch/powerpc/platforms/86xx/gef_sbc610.c
+++ b/arch/powerpc/platforms/86xx/gef_sbc610.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* GE SBC610 board support
*
@@ -5,11 +6,6 @@
*
* Copyright 2008 GE Intelligent Platforms Embedded Systems, Inc.
*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
* Based on: mpc86xx_hpcn.c (MPC86xx HPCN board specific routines)
* Copyright 2006 Freescale Semiconductor Inc.
*
@@ -22,12 +18,12 @@
#include <linux/kdev_t.h>
#include <linux/delay.h>
#include <linux/seq_file.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/pci-bridge.h>
-#include <asm/prom.h>
#include <mm/mmu_decl.h>
#include <asm/udbg.h>
@@ -156,66 +152,16 @@ static void gef_sbc610_nec_fixup(struct pci_dev *pdev)
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_USB,
gef_sbc610_nec_fixup);
-/*
- * Called very early, device-tree isn't unflattened
- *
- * This function is called to determine whether the BSP is compatible with the
- * supplied device-tree, which is assumed to be the correct one for the actual
- * board. It is expected thati, in the future, a kernel may support multiple
- * boards.
- */
-static int __init gef_sbc610_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- if (of_flat_dt_is_compatible(root, "gef,sbc610"))
- return 1;
-
- return 0;
-}
-
-static long __init mpc86xx_time_init(void)
-{
- unsigned int temp;
-
- /* Set the time base to zero */
- mtspr(SPRN_TBWL, 0);
- mtspr(SPRN_TBWU, 0);
-
- temp = mfspr(SPRN_HID0);
- temp |= HID0_TBEN;
- mtspr(SPRN_HID0, temp);
- asm volatile("isync");
-
- return 0;
-}
-
-static __initdata struct of_device_id of_bus_ids[] = {
- { .compatible = "simple-bus", },
- { .compatible = "gianfar", },
- { .compatible = "fsl,mpc8641-pcie", },
- {},
-};
-
-static int __init declare_of_platform_devices(void)
-{
- printk(KERN_DEBUG "Probe platform devices\n");
- of_platform_bus_probe(NULL, of_bus_ids, NULL);
-
- return 0;
-}
-machine_arch_initcall(gef_sbc610, declare_of_platform_devices);
+machine_arch_initcall(gef_sbc610, mpc86xx_common_publish_devices);
define_machine(gef_sbc610) {
.name = "GE SBC610",
- .probe = gef_sbc610_probe,
+ .compatible = "gef,sbc610",
.setup_arch = gef_sbc610_setup_arch,
.init_IRQ = gef_sbc610_init_irq,
.show_cpuinfo = gef_sbc610_show_cpuinfo,
.get_irq = mpic_get_irq,
- .restart = fsl_rstcr_restart,
.time_init = mpc86xx_time_init,
- .calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
#ifdef CONFIG_PCI
.pcibios_fixup_bus = fsl_pcibios_fixup_bus,
diff --git a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
deleted file mode 100644
index d479d68fbb2b..000000000000
--- a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
+++ /dev/null
@@ -1,359 +0,0 @@
-/*
- * MPC8610 HPCD board specific routines
- *
- * Initial author: Xianghua Xiao <x.xiao@freescale.com>
- * Recode: Jason Jin <jason.jin@freescale.com>
- * York Sun <yorksun@freescale.com>
- *
- * Rewrite the interrupt routing. remove the 8259PIC support,
- * All the integrated device in ULI use sideband interrupt.
- *
- * Copyright 2008 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-#include <linux/stddef.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/interrupt.h>
-#include <linux/kdev_t.h>
-#include <linux/delay.h>
-#include <linux/seq_file.h>
-#include <linux/of.h>
-
-#include <asm/time.h>
-#include <asm/machdep.h>
-#include <asm/pci-bridge.h>
-#include <asm/prom.h>
-#include <mm/mmu_decl.h>
-#include <asm/udbg.h>
-
-#include <asm/mpic.h>
-
-#include <linux/of_platform.h>
-#include <sysdev/fsl_pci.h>
-#include <sysdev/fsl_soc.h>
-#include <sysdev/simple_gpio.h>
-#include <asm/fsl_guts.h>
-
-#include "mpc86xx.h"
-
-static struct device_node *pixis_node;
-static unsigned char *pixis_bdcfg0, *pixis_arch;
-
-/* DIU Pixel Clock bits of the CLKDVDR Global Utilities register */
-#define CLKDVDR_PXCKEN 0x80000000
-#define CLKDVDR_PXCKINV 0x10000000
-#define CLKDVDR_PXCKDLY 0x06000000
-#define CLKDVDR_PXCLK_MASK 0x001F0000
-
-#ifdef CONFIG_SUSPEND
-static irqreturn_t mpc8610_sw9_irq(int irq, void *data)
-{
- pr_debug("%s: PIXIS' event (sw9/wakeup) IRQ handled\n", __func__);
- return IRQ_HANDLED;
-}
-
-static void __init mpc8610_suspend_init(void)
-{
- int irq;
- int ret;
-
- if (!pixis_node)
- return;
-
- irq = irq_of_parse_and_map(pixis_node, 0);
- if (!irq) {
- pr_err("%s: can't map pixis event IRQ.\n", __func__);
- return;
- }
-
- ret = request_irq(irq, mpc8610_sw9_irq, 0, "sw9:wakeup", NULL);
- if (ret) {
- pr_err("%s: can't request pixis event IRQ: %d\n",
- __func__, ret);
- irq_dispose_mapping(irq);
- }
-
- enable_irq_wake(irq);
-}
-#else
-static inline void mpc8610_suspend_init(void) { }
-#endif /* CONFIG_SUSPEND */
-
-static struct of_device_id __initdata mpc8610_ids[] = {
- { .compatible = "fsl,mpc8610-immr", },
- { .compatible = "fsl,mpc8610-guts", },
- { .compatible = "simple-bus", },
- /* So that the DMA channel nodes can be probed individually: */
- { .compatible = "fsl,eloplus-dma", },
- /* PCI controllers */
- { .compatible = "fsl,mpc8610-pci", },
- { .compatible = "fsl,mpc8641-pcie", },
- {}
-};
-
-static int __init mpc8610_declare_of_platform_devices(void)
-{
- /* Firstly, register PIXIS GPIOs. */
- simple_gpiochip_init("fsl,fpga-pixis-gpio-bank");
-
- /* Enable wakeup on PIXIS' event IRQ. */
- mpc8610_suspend_init();
-
- /* Without this call, the SSI device driver won't get probed. */
- of_platform_bus_probe(NULL, mpc8610_ids, NULL);
-
- return 0;
-}
-machine_arch_initcall(mpc86xx_hpcd, mpc8610_declare_of_platform_devices);
-
-#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
-
-/*
- * DIU Area Descriptor
- *
- * The MPC8610 reference manual shows the bits of the AD register in
- * little-endian order, which causes the BLUE_C field to be split into two
- * parts. To simplify the definition of the MAKE_AD() macro, we define the
- * fields in big-endian order and byte-swap the result.
- *
- * So even though the registers don't look like they're in the
- * same bit positions as they are on the P1022, the same value is written to
- * the AD register on the MPC8610 and on the P1022.
- */
-#define AD_BYTE_F 0x10000000
-#define AD_ALPHA_C_MASK 0x0E000000
-#define AD_ALPHA_C_SHIFT 25
-#define AD_BLUE_C_MASK 0x01800000
-#define AD_BLUE_C_SHIFT 23
-#define AD_GREEN_C_MASK 0x00600000
-#define AD_GREEN_C_SHIFT 21
-#define AD_RED_C_MASK 0x00180000
-#define AD_RED_C_SHIFT 19
-#define AD_PALETTE 0x00040000
-#define AD_PIXEL_S_MASK 0x00030000
-#define AD_PIXEL_S_SHIFT 16
-#define AD_COMP_3_MASK 0x0000F000
-#define AD_COMP_3_SHIFT 12
-#define AD_COMP_2_MASK 0x00000F00
-#define AD_COMP_2_SHIFT 8
-#define AD_COMP_1_MASK 0x000000F0
-#define AD_COMP_1_SHIFT 4
-#define AD_COMP_0_MASK 0x0000000F
-#define AD_COMP_0_SHIFT 0
-
-#define MAKE_AD(alpha, red, blue, green, size, c0, c1, c2, c3) \
- cpu_to_le32(AD_BYTE_F | (alpha << AD_ALPHA_C_SHIFT) | \
- (blue << AD_BLUE_C_SHIFT) | (green << AD_GREEN_C_SHIFT) | \
- (red << AD_RED_C_SHIFT) | (c3 << AD_COMP_3_SHIFT) | \
- (c2 << AD_COMP_2_SHIFT) | (c1 << AD_COMP_1_SHIFT) | \
- (c0 << AD_COMP_0_SHIFT) | (size << AD_PIXEL_S_SHIFT))
-
-u32 mpc8610hpcd_get_pixel_format(enum fsl_diu_monitor_port port,
- unsigned int bits_per_pixel)
-{
- static const u32 pixelformat[][3] = {
- {
- MAKE_AD(3, 0, 2, 1, 3, 8, 8, 8, 8),
- MAKE_AD(4, 2, 0, 1, 2, 8, 8, 8, 0),
- MAKE_AD(4, 0, 2, 1, 1, 5, 6, 5, 0)
- },
- {
- MAKE_AD(3, 2, 0, 1, 3, 8, 8, 8, 8),
- MAKE_AD(4, 0, 2, 1, 2, 8, 8, 8, 0),
- MAKE_AD(4, 2, 0, 1, 1, 5, 6, 5, 0)
- },
- };
- unsigned int arch_monitor;
-
- /* The DVI port is mis-wired on revision 1 of this board. */
- arch_monitor =
- ((*pixis_arch == 0x01) && (port == FSL_DIU_PORT_DVI)) ? 0 : 1;
-
- switch (bits_per_pixel) {
- case 32:
- return pixelformat[arch_monitor][0];
- case 24:
- return pixelformat[arch_monitor][1];
- case 16:
- return pixelformat[arch_monitor][2];
- default:
- pr_err("fsl-diu: unsupported pixel depth %u\n", bits_per_pixel);
- return 0;
- }
-}
-
-void mpc8610hpcd_set_gamma_table(enum fsl_diu_monitor_port port,
- char *gamma_table_base)
-{
- int i;
- if (port == FSL_DIU_PORT_DLVDS) {
- for (i = 0; i < 256*3; i++)
- gamma_table_base[i] = (gamma_table_base[i] << 2) |
- ((gamma_table_base[i] >> 6) & 0x03);
- }
-}
-
-#define PX_BRDCFG0_DVISEL (1 << 3)
-#define PX_BRDCFG0_DLINK (1 << 4)
-#define PX_BRDCFG0_DIU_MASK (PX_BRDCFG0_DVISEL | PX_BRDCFG0_DLINK)
-
-void mpc8610hpcd_set_monitor_port(enum fsl_diu_monitor_port port)
-{
- switch (port) {
- case FSL_DIU_PORT_DVI:
- clrsetbits_8(pixis_bdcfg0, PX_BRDCFG0_DIU_MASK,
- PX_BRDCFG0_DVISEL | PX_BRDCFG0_DLINK);
- break;
- case FSL_DIU_PORT_LVDS:
- clrsetbits_8(pixis_bdcfg0, PX_BRDCFG0_DIU_MASK,
- PX_BRDCFG0_DLINK);
- break;
- case FSL_DIU_PORT_DLVDS:
- clrbits8(pixis_bdcfg0, PX_BRDCFG0_DIU_MASK);
- break;
- }
-}
-
-/**
- * mpc8610hpcd_set_pixel_clock: program the DIU's clock
- *
- * @pixclock: the wavelength, in picoseconds, of the clock
- */
-void mpc8610hpcd_set_pixel_clock(unsigned int pixclock)
-{
- struct device_node *guts_np = NULL;
- struct ccsr_guts __iomem *guts;
- unsigned long freq;
- u64 temp;
- u32 pxclk;
-
- /* Map the global utilities registers. */
- guts_np = of_find_compatible_node(NULL, NULL, "fsl,mpc8610-guts");
- if (!guts_np) {
- pr_err("mpc8610hpcd: missing global utilities device node\n");
- return;
- }
-
- guts = of_iomap(guts_np, 0);
- of_node_put(guts_np);
- if (!guts) {
- pr_err("mpc8610hpcd: could not map global utilities device\n");
- return;
- }
-
- /* Convert pixclock from a wavelength to a frequency */
- temp = 1000000000000ULL;
- do_div(temp, pixclock);
- freq = temp;
-
- /*
- * 'pxclk' is the ratio of the platform clock to the pixel clock.
- * On the MPC8610, the value programmed into CLKDVDR is the ratio
- * minus one. The valid range of values is 2-31.
- */
- pxclk = DIV_ROUND_CLOSEST(fsl_get_sys_freq(), freq) - 1;
- pxclk = clamp_t(u32, pxclk, 2, 31);
-
- /* Disable the pixel clock, and set it to non-inverted and no delay */
- clrbits32(&guts->clkdvdr,
- CLKDVDR_PXCKEN | CLKDVDR_PXCKDLY | CLKDVDR_PXCLK_MASK);
-
- /* Enable the clock and set the pxclk */
- setbits32(&guts->clkdvdr, CLKDVDR_PXCKEN | (pxclk << 16));
-
- iounmap(guts);
-}
-
-enum fsl_diu_monitor_port
-mpc8610hpcd_valid_monitor_port(enum fsl_diu_monitor_port port)
-{
- return port;
-}
-
-#endif
-
-static void __init mpc86xx_hpcd_setup_arch(void)
-{
- struct resource r;
- unsigned char *pixis;
-
- if (ppc_md.progress)
- ppc_md.progress("mpc86xx_hpcd_setup_arch()", 0);
-
- fsl_pci_assign_primary();
-
-#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
- diu_ops.get_pixel_format = mpc8610hpcd_get_pixel_format;
- diu_ops.set_gamma_table = mpc8610hpcd_set_gamma_table;
- diu_ops.set_monitor_port = mpc8610hpcd_set_monitor_port;
- diu_ops.set_pixel_clock = mpc8610hpcd_set_pixel_clock;
- diu_ops.valid_monitor_port = mpc8610hpcd_valid_monitor_port;
-#endif
-
- pixis_node = of_find_compatible_node(NULL, NULL, "fsl,fpga-pixis");
- if (pixis_node) {
- of_address_to_resource(pixis_node, 0, &r);
- of_node_put(pixis_node);
- pixis = ioremap(r.start, 32);
- if (!pixis) {
- printk(KERN_ERR "Err: can't map FPGA cfg register!\n");
- return;
- }
- pixis_bdcfg0 = pixis + 8;
- pixis_arch = pixis + 1;
- } else
- printk(KERN_ERR "Err: "
- "can't find device node 'fsl,fpga-pixis'\n");
-
- printk("MPC86xx HPCD board from Freescale Semiconductor\n");
-}
-
-/*
- * Called very early, device-tree isn't unflattened
- */
-static int __init mpc86xx_hpcd_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- if (of_flat_dt_is_compatible(root, "fsl,MPC8610HPCD"))
- return 1; /* Looks good */
-
- return 0;
-}
-
-static long __init mpc86xx_time_init(void)
-{
- unsigned int temp;
-
- /* Set the time base to zero */
- mtspr(SPRN_TBWL, 0);
- mtspr(SPRN_TBWU, 0);
-
- temp = mfspr(SPRN_HID0);
- temp |= HID0_TBEN;
- mtspr(SPRN_HID0, temp);
- asm volatile("isync");
-
- return 0;
-}
-
-define_machine(mpc86xx_hpcd) {
- .name = "MPC86xx HPCD",
- .probe = mpc86xx_hpcd_probe,
- .setup_arch = mpc86xx_hpcd_setup_arch,
- .init_IRQ = mpc86xx_init_irq,
- .get_irq = mpic_get_irq,
- .restart = fsl_rstcr_restart,
- .time_init = mpc86xx_time_init,
- .calibrate_decr = generic_calibrate_decr,
- .progress = udbg_progress,
-#ifdef CONFIG_PCI
- .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
-#endif
-};
diff --git a/arch/powerpc/platforms/86xx/mpc86xx.h b/arch/powerpc/platforms/86xx/mpc86xx.h
index 08efb57559d1..61e52c757e7f 100644
--- a/arch/powerpc/platforms/86xx/mpc86xx.h
+++ b/arch/powerpc/platforms/86xx/mpc86xx.h
@@ -1,10 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright 2006 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#ifndef __MPC86XX_H__
@@ -17,5 +13,7 @@
extern void mpc86xx_smp_init(void);
extern void mpc86xx_init_irq(void);
+extern long mpc86xx_time_init(void);
+extern int mpc86xx_common_publish_devices(void);
#endif /* __MPC86XX_H__ */
diff --git a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c b/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
deleted file mode 100644
index e8bf3fae5606..000000000000
--- a/arch/powerpc/platforms/86xx/mpc86xx_hpcn.c
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * MPC86xx HPCN board specific routines
- *
- * Recode: ZHANG WEI <wei.zhang@freescale.com>
- * Initial author: Xianghua Xiao <x.xiao@freescale.com>
- *
- * Copyright 2006 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-#include <linux/stddef.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/kdev_t.h>
-#include <linux/delay.h>
-#include <linux/seq_file.h>
-#include <linux/of_platform.h>
-
-#include <asm/time.h>
-#include <asm/machdep.h>
-#include <asm/pci-bridge.h>
-#include <asm/prom.h>
-#include <mm/mmu_decl.h>
-#include <asm/udbg.h>
-#include <asm/swiotlb.h>
-
-#include <asm/mpic.h>
-
-#include <sysdev/fsl_pci.h>
-#include <sysdev/fsl_soc.h>
-
-#include "mpc86xx.h"
-
-#undef DEBUG
-
-#ifdef DEBUG
-#define DBG(fmt...) do { printk(KERN_ERR fmt); } while(0)
-#else
-#define DBG(fmt...) do { } while(0)
-#endif
-
-#ifdef CONFIG_PCI
-extern int uli_exclude_device(struct pci_controller *hose,
- u_char bus, u_char devfn);
-
-static int mpc86xx_exclude_device(struct pci_controller *hose,
- u_char bus, u_char devfn)
-{
- if (hose->dn == fsl_pci_primary)
- return uli_exclude_device(hose, bus, devfn);
-
- return PCIBIOS_SUCCESSFUL;
-}
-#endif /* CONFIG_PCI */
-
-
-static void __init
-mpc86xx_hpcn_setup_arch(void)
-{
- if (ppc_md.progress)
- ppc_md.progress("mpc86xx_hpcn_setup_arch()", 0);
-
-#ifdef CONFIG_PCI
- ppc_md.pci_exclude_device = mpc86xx_exclude_device;
-#endif
-
- printk("MPC86xx HPCN board from Freescale Semiconductor\n");
-
-#ifdef CONFIG_SMP
- mpc86xx_smp_init();
-#endif
-
- fsl_pci_assign_primary();
-
- swiotlb_detect_4g();
-}
-
-
-static void
-mpc86xx_hpcn_show_cpuinfo(struct seq_file *m)
-{
- uint svid = mfspr(SPRN_SVR);
-
- seq_printf(m, "Vendor\t\t: Freescale Semiconductor\n");
-
- seq_printf(m, "SVR\t\t: 0x%x\n", svid);
-}
-
-
-/*
- * Called very early, device-tree isn't unflattened
- */
-static int __init mpc86xx_hpcn_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- if (of_flat_dt_is_compatible(root, "fsl,mpc8641hpcn"))
- return 1; /* Looks good */
-
- /* Be nice and don't give silent boot death. Delete this in 2.6.27 */
- if (of_flat_dt_is_compatible(root, "mpc86xx")) {
- pr_warning("WARNING: your dts/dtb is old. You must update before the next kernel release\n");
- return 1;
- }
-
- return 0;
-}
-
-static long __init
-mpc86xx_time_init(void)
-{
- unsigned int temp;
-
- /* Set the time base to zero */
- mtspr(SPRN_TBWL, 0);
- mtspr(SPRN_TBWU, 0);
-
- temp = mfspr(SPRN_HID0);
- temp |= HID0_TBEN;
- mtspr(SPRN_HID0, temp);
- asm volatile("isync");
-
- return 0;
-}
-
-static __initdata struct of_device_id of_bus_ids[] = {
- { .compatible = "simple-bus", },
- { .compatible = "fsl,srio", },
- { .compatible = "gianfar", },
- { .compatible = "fsl,mpc8641-pcie", },
- {},
-};
-
-static int __init declare_of_platform_devices(void)
-{
- of_platform_bus_probe(NULL, of_bus_ids, NULL);
-
- return 0;
-}
-machine_arch_initcall(mpc86xx_hpcn, declare_of_platform_devices);
-machine_arch_initcall(mpc86xx_hpcn, swiotlb_setup_bus_notifier);
-
-define_machine(mpc86xx_hpcn) {
- .name = "MPC86xx HPCN",
- .probe = mpc86xx_hpcn_probe,
- .setup_arch = mpc86xx_hpcn_setup_arch,
- .init_IRQ = mpc86xx_init_irq,
- .show_cpuinfo = mpc86xx_hpcn_show_cpuinfo,
- .get_irq = mpic_get_irq,
- .restart = fsl_rstcr_restart,
- .time_init = mpc86xx_time_init,
- .calibrate_decr = generic_calibrate_decr,
- .progress = udbg_progress,
-#ifdef CONFIG_PCI
- .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
-#endif
-};
diff --git a/arch/powerpc/platforms/86xx/mpc86xx_smp.c b/arch/powerpc/platforms/86xx/mpc86xx_smp.c
index af09baee22cb..9be33e41af6d 100644
--- a/arch/powerpc/platforms/86xx/mpc86xx_smp.c
+++ b/arch/powerpc/platforms/86xx/mpc86xx_smp.c
@@ -1,26 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Author: Xianghua Xiao <x.xiao@freescale.com>
* Zhang Wei <wei.zhang@freescale.com>
*
* Copyright 2006 Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#include <linux/stddef.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/delay.h>
+#include <linux/pgtable.h>
-#include <asm/code-patching.h>
+#include <asm/text-patching.h>
#include <asm/page.h>
-#include <asm/pgtable.h>
#include <asm/pci-bridge.h>
#include <asm/mpic.h>
#include <asm/cacheflush.h>
+#include <asm/inst.h>
#include <sysdev/fsl_soc.h>
@@ -86,8 +83,7 @@ smp_86xx_kick_cpu(int nr)
mdelay(1);
/* Restore the exception vector */
- *vector = save_vector;
- flush_icache_range((unsigned long) vector, (unsigned long) vector + 4);
+ patch_instruction(vector, ppc_inst(save_vector));
local_irq_restore(flags);
@@ -105,6 +101,7 @@ smp_86xx_setup_cpu(int cpu_nr)
struct smp_ops_t smp_86xx_ops = {
+ .cause_nmi_ipi = NULL,
.message_pass = smp_mpic_message_pass,
.probe = smp_mpic_probe,
.kick_cpu = smp_86xx_kick_cpu,
diff --git a/arch/powerpc/platforms/86xx/mvme7100.c b/arch/powerpc/platforms/86xx/mvme7100.c
new file mode 100644
index 000000000000..cee49ecd32d2
--- /dev/null
+++ b/arch/powerpc/platforms/86xx/mvme7100.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Board setup routines for the Emerson/Artesyn MVME7100
+ *
+ * Copyright 2016 Elettra-Sincrotrone Trieste S.C.p.A.
+ *
+ * Author: Alessio Igor Bogani <alessio.bogani@elettra.eu>
+ *
+ * Based on earlier code by:
+ *
+ * Ajit Prem <ajit.prem@emerson.com>
+ * Copyright 2008 Emerson
+ *
+ * USB host fixup is borrowed by:
+ *
+ * Martyn Welch <martyn.welch@ge.com>
+ * Copyright 2008 GE Intelligent Platforms Embedded Systems, Inc.
+ */
+
+#include <linux/pci.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/of_address.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "mpc86xx.h"
+
+#define MVME7100_INTERRUPT_REG_2_OFFSET 0x05
+#define MVME7100_DS1375_MASK 0x40
+#define MVME7100_MAX6649_MASK 0x20
+#define MVME7100_ABORT_MASK 0x10
+
+/*
+ * Setup the architecture
+ */
+static void __init mvme7100_setup_arch(void)
+{
+ struct device_node *bcsr_node;
+ void __iomem *mvme7100_regs = NULL;
+ u8 reg;
+
+ if (ppc_md.progress)
+ ppc_md.progress("mvme7100_setup_arch()", 0);
+
+#ifdef CONFIG_SMP
+ mpc86xx_smp_init();
+#endif
+
+ fsl_pci_assign_primary();
+
+ /* Remap BCSR registers */
+ bcsr_node = of_find_compatible_node(NULL, NULL,
+ "artesyn,mvme7100-bcsr");
+ if (bcsr_node) {
+ mvme7100_regs = of_iomap(bcsr_node, 0);
+ of_node_put(bcsr_node);
+ }
+
+ if (mvme7100_regs) {
+ /* Disable ds1375, max6649, and abort interrupts */
+ reg = readb(mvme7100_regs + MVME7100_INTERRUPT_REG_2_OFFSET);
+ reg |= MVME7100_DS1375_MASK | MVME7100_MAX6649_MASK
+ | MVME7100_ABORT_MASK;
+ writeb(reg, mvme7100_regs + MVME7100_INTERRUPT_REG_2_OFFSET);
+ } else
+ pr_warn("Unable to map board registers\n");
+
+ pr_info("MVME7100 board from Artesyn\n");
+}
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init mvme7100_probe(void)
+{
+ unsigned long root = of_get_flat_dt_root();
+
+ return of_flat_dt_is_compatible(root, "artesyn,MVME7100");
+}
+
+static void mvme7100_usb_host_fixup(struct pci_dev *pdev)
+{
+ unsigned int val;
+
+ if (!machine_is(mvme7100))
+ return;
+
+ /* Ensure only ports 1 & 2 are enabled */
+ pci_read_config_dword(pdev, 0xe0, &val);
+ pci_write_config_dword(pdev, 0xe0, (val & ~7) | 0x2);
+
+ /* System clock is 48-MHz Oscillator and EHCI Enabled. */
+ pci_write_config_dword(pdev, 0xe4, 1 << 5);
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_USB,
+ mvme7100_usb_host_fixup);
+
+machine_arch_initcall(mvme7100, mpc86xx_common_publish_devices);
+
+define_machine(mvme7100) {
+ .name = "MVME7100",
+ .probe = mvme7100_probe,
+ .setup_arch = mvme7100_setup_arch,
+ .init_IRQ = mpc86xx_init_irq,
+ .get_irq = mpic_get_irq,
+ .time_init = mpc86xx_time_init,
+ .progress = udbg_progress,
+#ifdef CONFIG_PCI
+ .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
+#endif
+};
diff --git a/arch/powerpc/platforms/86xx/pic.c b/arch/powerpc/platforms/86xx/pic.c
index d5b98c0f958a..9ca36de23532 100644
--- a/arch/powerpc/platforms/86xx/pic.c
+++ b/arch/powerpc/platforms/86xx/pic.c
@@ -1,28 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright 2008 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
#include <linux/stddef.h>
#include <linux/kernel.h>
#include <linux/interrupt.h>
+#include <linux/of.h>
#include <linux/of_irq.h>
-#include <linux/of_platform.h>
#include <asm/mpic.h>
#include <asm/i8259.h>
+#include "mpc86xx.h"
+
#ifdef CONFIG_PPC_I8259
-static void mpc86xx_8259_cascade(unsigned int irq, struct irq_desc *desc)
+static void mpc86xx_8259_cascade(struct irq_desc *desc)
{
struct irq_chip *chip = irq_desc_get_chip(desc);
unsigned int cascade_irq = i8259_irq();
- if (cascade_irq != NO_IRQ)
+ if (cascade_irq)
generic_handle_irq(cascade_irq);
chip->irq_eoi(&desc->irq_data);
@@ -58,7 +56,7 @@ void __init mpc86xx_init_irq(void)
}
cascade_irq = irq_of_parse_and_map(cascade_node, 0);
- if (cascade_irq == NO_IRQ) {
+ if (!cascade_irq) {
printk(KERN_ERR "Failed to map cascade interrupt\n");
return;
}
diff --git a/arch/powerpc/platforms/86xx/sbc8641d.c b/arch/powerpc/platforms/86xx/sbc8641d.c
deleted file mode 100644
index b47a8fd0f3d3..000000000000
--- a/arch/powerpc/platforms/86xx/sbc8641d.c
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * SBC8641D board specific routines
- *
- * Copyright 2008 Wind River Systems Inc.
- *
- * By Paul Gortmaker (see MAINTAINERS for contact information)
- *
- * Based largely on the 8641 HPCN support by Freescale Semiconductor Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-#include <linux/stddef.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/kdev_t.h>
-#include <linux/delay.h>
-#include <linux/seq_file.h>
-#include <linux/of_platform.h>
-
-#include <asm/time.h>
-#include <asm/machdep.h>
-#include <asm/pci-bridge.h>
-#include <asm/prom.h>
-#include <mm/mmu_decl.h>
-#include <asm/udbg.h>
-
-#include <asm/mpic.h>
-
-#include <sysdev/fsl_pci.h>
-#include <sysdev/fsl_soc.h>
-
-#include "mpc86xx.h"
-
-static void __init
-sbc8641_setup_arch(void)
-{
- if (ppc_md.progress)
- ppc_md.progress("sbc8641_setup_arch()", 0);
-
- printk("SBC8641 board from Wind River\n");
-
-#ifdef CONFIG_SMP
- mpc86xx_smp_init();
-#endif
-
- fsl_pci_assign_primary();
-}
-
-
-static void
-sbc8641_show_cpuinfo(struct seq_file *m)
-{
- uint svid = mfspr(SPRN_SVR);
-
- seq_printf(m, "Vendor\t\t: Wind River Systems\n");
-
- seq_printf(m, "SVR\t\t: 0x%x\n", svid);
-}
-
-
-/*
- * Called very early, device-tree isn't unflattened
- */
-static int __init sbc8641_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
-
- if (of_flat_dt_is_compatible(root, "wind,sbc8641"))
- return 1; /* Looks good */
-
- return 0;
-}
-
-static long __init
-mpc86xx_time_init(void)
-{
- unsigned int temp;
-
- /* Set the time base to zero */
- mtspr(SPRN_TBWL, 0);
- mtspr(SPRN_TBWU, 0);
-
- temp = mfspr(SPRN_HID0);
- temp |= HID0_TBEN;
- mtspr(SPRN_HID0, temp);
- asm volatile("isync");
-
- return 0;
-}
-
-static __initdata struct of_device_id of_bus_ids[] = {
- { .compatible = "simple-bus", },
- { .compatible = "gianfar", },
- { .compatible = "fsl,mpc8641-pcie", },
- {},
-};
-
-static int __init declare_of_platform_devices(void)
-{
- of_platform_bus_probe(NULL, of_bus_ids, NULL);
-
- return 0;
-}
-machine_arch_initcall(sbc8641, declare_of_platform_devices);
-
-define_machine(sbc8641) {
- .name = "SBC8641D",
- .probe = sbc8641_probe,
- .setup_arch = sbc8641_setup_arch,
- .init_IRQ = mpc86xx_init_irq,
- .show_cpuinfo = sbc8641_show_cpuinfo,
- .get_irq = mpic_get_irq,
- .restart = fsl_rstcr_restart,
- .time_init = mpc86xx_time_init,
- .calibrate_decr = generic_calibrate_decr,
- .progress = udbg_progress,
-#ifdef CONFIG_PCI
- .pcibios_fixup_bus = fsl_pcibios_fixup_bus,
-#endif
-};
diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig
index bd6f1a1cf922..abb2b45b2789 100644
--- a/arch/powerpc/platforms/8xx/Kconfig
+++ b/arch/powerpc/platforms/8xx/Kconfig
@@ -1,6 +1,4 @@
-config FADS
- bool
-
+# SPDX-License-Identifier: GPL-2.0
config CPM1
bool
select CPM
@@ -8,12 +6,10 @@ config CPM1
choice
prompt "8xx Machine Type"
depends on PPC_8xx
- depends on 8xx
default MPC885ADS
config MPC8XXFADS
bool "FADS"
- select FADS
config MPC86XADS
bool "MPC86XADS"
@@ -96,44 +92,24 @@ endmenu
#
menu "MPC8xx CPM Options"
- depends on 8xx
+ depends on PPC_8xx
# This doesn't really belong here, but it is convenient to ask
# 8xx specific questions.
comment "Generic MPC8xx Options"
-config 8xx_COPYBACK
- bool "Copy-Back Data Cache (else Writethrough)"
- help
- Saying Y here will cause the cache on an MPC8xx processor to be used
- in Copy-Back mode. If you say N here, it is used in Writethrough
- mode.
-
- If in doubt, say Y here.
-
config 8xx_GPIO
bool "GPIO API Support"
- select ARCH_REQUIRE_GPIOLIB
+ select GPIOLIB
help
Saying Y here will cause the ports on an MPC8xx processor to be used
with the GPIO API. If you say N here, the kernel needs less memory.
If in doubt, say Y here.
-config 8xx_CPU6
- bool "CPU6 Silicon Errata (860 Pre Rev. C)"
- help
- MPC860 CPUs, prior to Rev C have some bugs in the silicon, which
- require workarounds for Linux (and most other OSes to work). If you
- get a BUG() very early in boot, this might fix the problem. For
- more details read the document entitled "MPC860 Family Device Errata
- Reference" on Freescale's website. This option also incurs a
- performance hit.
-
- If in doubt, say N here.
-
config 8xx_CPU15
bool "CPU15 Silicon Errata"
+ depends on !HUGETLB_PAGE
default y
help
This enables a workaround for erratum CPU15 on MPC8xx chips.
@@ -172,6 +148,13 @@ config I2C_SPI_SMC1_UCODE_PATCH
help
Help not implemented yet, coming soon.
+config SMC_UCODE_PATCH
+ bool "SMC relocation patch"
+ help
+ This microcode relocates SMC1 and SMC2 parameter RAMs at
+ offset 0x1ec0 and 0x1fc0 to allow extended parameter RAM
+ for SCC3 and SCC4.
+
endchoice
config UCODE_PATCH
@@ -179,4 +162,45 @@ config UCODE_PATCH
default y
depends on !NO_UCODE_PATCH
+menu "8xx advanced setup"
+ depends on PPC_8xx
+
+config PIN_TLB
+ bool "Pinned Kernel TLBs"
+ depends on ADVANCED_OPTIONS
+ help
+ On the 8xx, we have 32 instruction TLBs and 32 data TLBs. In each
+ table 4 TLBs can be pinned.
+
+ It reduces the amount of usable TLBs to 28 (ie by 12%). That's the
+ reason why we make it selectable.
+
+ This option does nothing, it just activate the selection of what
+ to pin.
+
+config PIN_TLB_DATA
+ bool "Pinned TLB for DATA"
+ depends on PIN_TLB
+ default y
+ help
+ This pins the first 32 Mbytes of memory with 8M pages.
+
+config PIN_TLB_IMMR
+ bool "Pinned TLB for IMMR"
+ depends on PIN_TLB
+ default y
+ help
+ This pins the IMMR area with a 512kbytes page. In case
+ CONFIG_PIN_TLB_DATA is also selected, it will reduce
+ CONFIG_PIN_TLB_DATA to 24 Mbytes.
+
+config PIN_TLB_TEXT
+ bool "Pinned TLB for TEXT"
+ depends on PIN_TLB
+ default y
+ help
+ This pins kernel text with 8M pages.
+
+endmenu
+
endmenu
diff --git a/arch/powerpc/platforms/8xx/Makefile b/arch/powerpc/platforms/8xx/Makefile
index 76a81c3350a8..5a098f7d5d31 100644
--- a/arch/powerpc/platforms/8xx/Makefile
+++ b/arch/powerpc/platforms/8xx/Makefile
@@ -1,7 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the PowerPC 8xx linux kernel.
#
-obj-$(CONFIG_PPC_8xx) += m8xx_setup.o
+obj-y += m8xx_setup.o machine_check.o pic.o
+obj-$(CONFIG_CPM1) += cpm1.o cpm1-ic.o
+obj-$(CONFIG_UCODE_PATCH) += micropatch.o
obj-$(CONFIG_MPC885ADS) += mpc885ads_setup.o
obj-$(CONFIG_MPC86XADS) += mpc86xads_setup.o
obj-$(CONFIG_PPC_EP88XC) += ep88xc.o
diff --git a/arch/powerpc/platforms/8xx/adder875.c b/arch/powerpc/platforms/8xx/adder875.c
index 82363e98f50e..d02f8dd66427 100644
--- a/arch/powerpc/platforms/8xx/adder875.c
+++ b/arch/powerpc/platforms/8xx/adder875.c
@@ -1,26 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0-only
/* Analogue & Micro Adder MPC875 board support
*
* Author: Scott Wood <scottwood@freescale.com>
*
* Copyright (c) 2007 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
*/
#include <linux/init.h>
-#include <linux/fs_enet_pd.h>
#include <linux/of_platform.h>
#include <asm/time.h>
#include <asm/machdep.h>
#include <asm/cpm1.h>
-#include <asm/fs_pd.h>
+#include <asm/8xx_immap.h>
#include <asm/udbg.h>
-#include <asm/prom.h>
#include "mpc8xx.h"
+#include "pic.h"
struct cpm_pin {
int port, pin, flags;
@@ -86,13 +82,7 @@ static void __init adder875_setup(void)
init_ioports();
}
-static int __init adder875_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
- return of_flat_dt_is_compatible(root, "analogue-and-micro,adder875");
-}
-
-static __initdata struct of_device_id of_bus_ids[] = {
+static const struct of_device_id of_bus_ids[] __initconst = {
{ .compatible = "simple-bus", },
{},
};
@@ -106,13 +96,10 @@ machine_device_initcall(adder875, declare_of_platform_devices);
define_machine(adder875) {
.name = "Adder MPC875",
- .probe = adder875_probe,
+ .compatible = "analogue-and-micro,adder875",
.setup_arch = adder875_setup,
- .init_IRQ = mpc8xx_pics_init,
+ .init_IRQ = mpc8xx_pic_init,
.get_irq = mpc8xx_get_irq,
.restart = mpc8xx_restart,
- .calibrate_decr = generic_calibrate_decr,
- .set_rtc_time = mpc8xx_set_rtc_time,
- .get_rtc_time = mpc8xx_get_rtc_time,
.progress = udbg_progress,
};
diff --git a/arch/powerpc/platforms/8xx/cpm1-ic.c b/arch/powerpc/platforms/8xx/cpm1-ic.c
new file mode 100644
index 000000000000..3292071e4da3
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/cpm1-ic.c
@@ -0,0 +1,188 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Interrupt controller for the
+ * Communication Processor Module.
+ * Copyright (c) 1997 Dan error_act (dmalek@jlc.net)
+ */
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <linux/platform_device.h>
+#include <asm/cpm1.h>
+
+struct cpm_pic_data {
+ cpic8xx_t __iomem *reg;
+ struct irq_domain *host;
+};
+
+static void cpm_mask_irq(struct irq_data *d)
+{
+ struct cpm_pic_data *data = irq_data_get_irq_chip_data(d);
+ unsigned int cpm_vec = (unsigned int)irqd_to_hwirq(d);
+
+ clrbits32(&data->reg->cpic_cimr, (1 << cpm_vec));
+}
+
+static void cpm_unmask_irq(struct irq_data *d)
+{
+ struct cpm_pic_data *data = irq_data_get_irq_chip_data(d);
+ unsigned int cpm_vec = (unsigned int)irqd_to_hwirq(d);
+
+ setbits32(&data->reg->cpic_cimr, (1 << cpm_vec));
+}
+
+static void cpm_end_irq(struct irq_data *d)
+{
+ struct cpm_pic_data *data = irq_data_get_irq_chip_data(d);
+ unsigned int cpm_vec = (unsigned int)irqd_to_hwirq(d);
+
+ out_be32(&data->reg->cpic_cisr, (1 << cpm_vec));
+}
+
+static struct irq_chip cpm_pic = {
+ .name = "CPM PIC",
+ .irq_mask = cpm_mask_irq,
+ .irq_unmask = cpm_unmask_irq,
+ .irq_eoi = cpm_end_irq,
+};
+
+static int cpm_get_irq(struct irq_desc *desc)
+{
+ struct cpm_pic_data *data = irq_desc_get_handler_data(desc);
+ int cpm_vec;
+
+ /*
+ * Get the vector by setting the ACK bit and then reading
+ * the register.
+ */
+ out_be16(&data->reg->cpic_civr, 1);
+ cpm_vec = in_be16(&data->reg->cpic_civr);
+ cpm_vec >>= 11;
+
+ return irq_find_mapping(data->host, cpm_vec);
+}
+
+static void cpm_cascade(struct irq_desc *desc)
+{
+ generic_handle_irq(cpm_get_irq(desc));
+}
+
+static int cpm_pic_host_map(struct irq_domain *h, unsigned int virq,
+ irq_hw_number_t hw)
+{
+ irq_set_chip_data(virq, h->host_data);
+ irq_set_status_flags(virq, IRQ_LEVEL);
+ irq_set_chip_and_handler(virq, &cpm_pic, handle_fasteoi_irq);
+ return 0;
+}
+
+static const struct irq_domain_ops cpm_pic_host_ops = {
+ .map = cpm_pic_host_map,
+};
+
+static int cpm_pic_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct resource *res;
+ int irq;
+ struct cpm_pic_data *data;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res)
+ return -ENODEV;
+
+ data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ data->reg = devm_ioremap(dev, res->start, resource_size(res));
+ if (!data->reg)
+ return -ENODEV;
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ return irq;
+
+ /* Initialize the CPM interrupt controller. */
+ out_be32(&data->reg->cpic_cicr,
+ (CICR_SCD_SCC4 | CICR_SCC_SCC3 | CICR_SCB_SCC2 | CICR_SCA_SCC1) |
+ ((virq_to_hw(irq) / 2) << 13) | CICR_HP_MASK);
+
+ out_be32(&data->reg->cpic_cimr, 0);
+
+ data->host = irq_domain_create_linear(dev_fwnode(dev), 64, &cpm_pic_host_ops, data);
+ if (!data->host)
+ return -ENODEV;
+
+ irq_set_handler_data(irq, data);
+ irq_set_chained_handler(irq, cpm_cascade);
+
+ setbits32(&data->reg->cpic_cicr, CICR_IEN);
+
+ return 0;
+}
+
+static const struct of_device_id cpm_pic_match[] = {
+ {
+ .compatible = "fsl,cpm1-pic",
+ }, {
+ .type = "cpm-pic",
+ .compatible = "CPM",
+ }, {},
+};
+
+static struct platform_driver cpm_pic_driver = {
+ .driver = {
+ .name = "cpm-pic",
+ .of_match_table = cpm_pic_match,
+ },
+ .probe = cpm_pic_probe,
+};
+
+static int __init cpm_pic_init(void)
+{
+ return platform_driver_register(&cpm_pic_driver);
+}
+arch_initcall(cpm_pic_init);
+
+/*
+ * The CPM can generate the error interrupt when there is a race condition
+ * between generating and masking interrupts. All we have to do is ACK it
+ * and return. This is a no-op function so we don't need any special
+ * tests in the interrupt handler.
+ */
+static irqreturn_t cpm_error_interrupt(int irq, void *dev)
+{
+ return IRQ_HANDLED;
+}
+
+static int cpm_error_probe(struct platform_device *pdev)
+{
+ int irq;
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ return irq;
+
+ return request_irq(irq, cpm_error_interrupt, IRQF_NO_THREAD, "error", NULL);
+}
+
+static const struct of_device_id cpm_error_ids[] = {
+ { .compatible = "fsl,cpm1" },
+ { .type = "cpm" },
+ {},
+};
+
+static struct platform_driver cpm_error_driver = {
+ .driver = {
+ .name = "cpm-error",
+ .of_match_table = cpm_error_ids,
+ },
+ .probe = cpm_error_probe,
+};
+
+static int __init cpm_error_init(void)
+{
+ return platform_driver_register(&cpm_error_driver);
+}
+subsys_initcall(cpm_error_init);
diff --git a/arch/powerpc/sysdev/cpm1.c b/arch/powerpc/platforms/8xx/cpm1.c
index 5e6ff38ea69f..7433be7d66ee 100644
--- a/arch/powerpc/sysdev/cpm1.c
+++ b/arch/powerpc/platforms/8xx/cpm1.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* General Purpose functions for the global management of the
* Communication Processor Module.
@@ -32,188 +33,35 @@
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
+#include <linux/of_irq.h>
#include <asm/page.h>
-#include <asm/pgtable.h>
#include <asm/8xx_immap.h>
#include <asm/cpm1.h>
#include <asm/io.h>
-#include <asm/tlbflush.h>
#include <asm/rheap.h>
-#include <asm/prom.h>
#include <asm/cpm.h>
+#include <asm/fixmap.h>
-#include <asm/fs_pd.h>
+#include <sysdev/fsl_soc.h>
#ifdef CONFIG_8xx_GPIO
-#include <linux/of_gpio.h>
+#include <linux/gpio/driver.h>
#endif
#define CPM_MAP_SIZE (0x4000)
cpm8xx_t __iomem *cpmp; /* Pointer to comm processor space */
-immap_t __iomem *mpc8xx_immr;
-static cpic8xx_t __iomem *cpic_reg;
-
-static struct irq_domain *cpm_pic_host;
-
-static void cpm_mask_irq(struct irq_data *d)
-{
- unsigned int cpm_vec = (unsigned int)irqd_to_hwirq(d);
-
- clrbits32(&cpic_reg->cpic_cimr, (1 << cpm_vec));
-}
-
-static void cpm_unmask_irq(struct irq_data *d)
-{
- unsigned int cpm_vec = (unsigned int)irqd_to_hwirq(d);
-
- setbits32(&cpic_reg->cpic_cimr, (1 << cpm_vec));
-}
-
-static void cpm_end_irq(struct irq_data *d)
-{
- unsigned int cpm_vec = (unsigned int)irqd_to_hwirq(d);
-
- out_be32(&cpic_reg->cpic_cisr, (1 << cpm_vec));
-}
-
-static struct irq_chip cpm_pic = {
- .name = "CPM PIC",
- .irq_mask = cpm_mask_irq,
- .irq_unmask = cpm_unmask_irq,
- .irq_eoi = cpm_end_irq,
-};
-
-int cpm_get_irq(void)
-{
- int cpm_vec;
-
- /* Get the vector by setting the ACK bit and then reading
- * the register.
- */
- out_be16(&cpic_reg->cpic_civr, 1);
- cpm_vec = in_be16(&cpic_reg->cpic_civr);
- cpm_vec >>= 11;
-
- return irq_linear_revmap(cpm_pic_host, cpm_vec);
-}
-
-static int cpm_pic_host_map(struct irq_domain *h, unsigned int virq,
- irq_hw_number_t hw)
-{
- pr_debug("cpm_pic_host_map(%d, 0x%lx)\n", virq, hw);
-
- irq_set_status_flags(virq, IRQ_LEVEL);
- irq_set_chip_and_handler(virq, &cpm_pic, handle_fasteoi_irq);
- return 0;
-}
-
-/* The CPM can generate the error interrupt when there is a race condition
- * between generating and masking interrupts. All we have to do is ACK it
- * and return. This is a no-op function so we don't need any special
- * tests in the interrupt handler.
- */
-static irqreturn_t cpm_error_interrupt(int irq, void *dev)
-{
- return IRQ_HANDLED;
-}
-
-static struct irqaction cpm_error_irqaction = {
- .handler = cpm_error_interrupt,
- .flags = IRQF_NO_THREAD,
- .name = "error",
-};
-
-static const struct irq_domain_ops cpm_pic_host_ops = {
- .map = cpm_pic_host_map,
-};
-
-unsigned int cpm_pic_init(void)
-{
- struct device_node *np = NULL;
- struct resource res;
- unsigned int sirq = NO_IRQ, hwirq, eirq;
- int ret;
-
- pr_debug("cpm_pic_init\n");
-
- np = of_find_compatible_node(NULL, NULL, "fsl,cpm1-pic");
- if (np == NULL)
- np = of_find_compatible_node(NULL, "cpm-pic", "CPM");
- if (np == NULL) {
- printk(KERN_ERR "CPM PIC init: can not find cpm-pic node\n");
- return sirq;
- }
-
- ret = of_address_to_resource(np, 0, &res);
- if (ret)
- goto end;
-
- cpic_reg = ioremap(res.start, resource_size(&res));
- if (cpic_reg == NULL)
- goto end;
-
- sirq = irq_of_parse_and_map(np, 0);
- if (sirq == NO_IRQ)
- goto end;
-
- /* Initialize the CPM interrupt controller. */
- hwirq = (unsigned int)virq_to_hw(sirq);
- out_be32(&cpic_reg->cpic_cicr,
- (CICR_SCD_SCC4 | CICR_SCC_SCC3 | CICR_SCB_SCC2 | CICR_SCA_SCC1) |
- ((hwirq/2) << 13) | CICR_HP_MASK);
-
- out_be32(&cpic_reg->cpic_cimr, 0);
-
- cpm_pic_host = irq_domain_add_linear(np, 64, &cpm_pic_host_ops, NULL);
- if (cpm_pic_host == NULL) {
- printk(KERN_ERR "CPM2 PIC: failed to allocate irq host!\n");
- sirq = NO_IRQ;
- goto end;
- }
-
- /* Install our own error handler. */
- np = of_find_compatible_node(NULL, NULL, "fsl,cpm1");
- if (np == NULL)
- np = of_find_node_by_type(NULL, "cpm");
- if (np == NULL) {
- printk(KERN_ERR "CPM PIC init: can not find cpm node\n");
- goto end;
- }
-
- eirq = irq_of_parse_and_map(np, 0);
- if (eirq == NO_IRQ)
- goto end;
-
- if (setup_irq(eirq, &cpm_error_irqaction))
- printk(KERN_ERR "Could not allocate CPM error IRQ!");
-
- setbits32(&cpic_reg->cpic_cicr, CICR_IEN);
-
-end:
- of_node_put(np);
- return sirq;
-}
+immap_t __iomem *mpc8xx_immr = (void __iomem *)VIRT_IMMR_BASE;
void __init cpm_reset(void)
{
- sysconf8xx_t __iomem *siu_conf;
-
- mpc8xx_immr = ioremap(get_immrbase(), 0x4000);
- if (!mpc8xx_immr) {
- printk(KERN_CRIT "Could not map IMMR\n");
- return;
- }
-
cpmp = &mpc8xx_immr->im_cpm;
#ifndef CONFIG_PPC_EARLY_DEBUG_CPM
- /* Perform a reset.
- */
+ /* Perform a reset. */
out_be16(&cpmp->cp_cpcr, CPM_CR_RST | CPM_CR_FLG);
- /* Wait for it.
- */
+ /* Wait for it. */
while (in_be16(&cpmp->cp_cpcr) & CPM_CR_FLG);
#endif
@@ -221,17 +69,17 @@ void __init cpm_reset(void)
cpm_load_patch(cpmp);
#endif
- /* Set SDMA Bus Request priority 5.
+ /*
+ * Set SDMA Bus Request priority 5.
* On 860T, this also enables FEC priority 6. I am not sure
* this is what we really want for some applications, but the
* manual recommends it.
* Bit 25, FAM can also be set to use FEC aggressive mode (860T).
*/
- siu_conf = immr_map(im_siu_conf);
- out_be32(&siu_conf->sc_sdcr, 1);
- immr_unmap(siu_conf);
-
- cpm_muram_init();
+ if ((mfspr(SPRN_IMMR) & 0xffff) == 0x0900) /* MPC885 */
+ out_be32(&mpc8xx_immr->im_siu_conf.sc_sdcr, 0x40);
+ else
+ out_be32(&mpc8xx_immr->im_siu_conf.sc_sdcr, 1);
}
static DEFINE_SPINLOCK(cmd_lock);
@@ -243,7 +91,7 @@ int cpm_command(u32 command, u8 opcode)
int i, ret;
unsigned long flags;
- if (command & 0xffffff0f)
+ if (command & 0xffffff03)
return -EINVAL;
spin_lock_irqsave(&cmd_lock, flags);
@@ -262,7 +110,8 @@ out:
}
EXPORT_SYMBOL(cpm_command);
-/* Set a baud rate generator. This needs lots of work. There are
+/*
+ * Set a baud rate generator. This needs lots of work. There are
* four BRGs, any of which can be wired to any channel.
* The internal baud rate clock is the system clock divided by 16.
* This assumes the baudrate is 16x oversampled by the uart.
@@ -276,11 +125,11 @@ cpm_setbrg(uint brg, uint rate)
{
u32 __iomem *bp;
- /* This is good enough to get SMCs running.....
- */
+ /* This is good enough to get SMCs running..... */
bp = &cpmp->cp_brgc1;
bp += brg;
- /* The BRG has a 12-bit counter. For really slow baud rates (or
+ /*
+ * The BRG has a 12-bit counter. For really slow baud rates (or
* really fast processors), we may have to further divide by 16.
*/
if (((BRG_UART_CLK / rate) - 1) < 4096)
@@ -289,6 +138,7 @@ cpm_setbrg(uint brg, uint rate)
out_be32(bp, (((BRG_UART_CLK_DIV16 / rate) - 1) << 1) |
CPM_BRG_EN | CPM_BRG_DIV16);
}
+EXPORT_SYMBOL(cpm_setbrg);
struct cpm_ioport16 {
__be16 dir, par, odr_sor, dat, intr;
@@ -303,7 +153,7 @@ struct cpm_ioport32e {
__be32 dir, par, sor, odr, dat;
};
-static void cpm1_set_pin32(int port, int pin, int flags)
+static void __init cpm1_set_pin32(int port, int pin, int flags)
{
struct cpm_ioport32e __iomem *iop;
pin = 1 << (31 - pin);
@@ -345,7 +195,7 @@ static void cpm1_set_pin32(int port, int pin, int flags)
}
}
-static void cpm1_set_pin16(int port, int pin, int flags)
+static void __init cpm1_set_pin16(int port, int pin, int flags)
{
struct cpm_ioport16 __iomem *iop =
(struct cpm_ioport16 __iomem *)&mpc8xx_immr->im_ioport;
@@ -376,10 +226,14 @@ static void cpm1_set_pin16(int port, int pin, int flags)
setbits16(&iop->odr_sor, pin);
else
clrbits16(&iop->odr_sor, pin);
+ if (flags & CPM_PIN_FALLEDGE)
+ setbits16(&iop->intr, pin);
+ else
+ clrbits16(&iop->intr, pin);
}
}
-void cpm1_set_pin(enum cpm_port port, int pin, int flags)
+void __init cpm1_set_pin(enum cpm_port port, int pin, int flags)
{
if (port == CPM_PORTB || port == CPM_PORTE)
cpm1_set_pin32(port, pin, flags);
@@ -387,7 +241,7 @@ void cpm1_set_pin(enum cpm_port port, int pin, int flags)
cpm1_set_pin16(port, pin, flags);
}
-int cpm1_clk_setup(enum cpm_clk_target target, int clock, int mode)
+int __init cpm1_clk_setup(enum cpm_clk_target target, int clock, int mode)
{
int shift;
int i, bits = 0;
@@ -522,31 +376,28 @@ int cpm1_clk_setup(enum cpm_clk_target target, int clock, int mode)
#ifdef CONFIG_8xx_GPIO
struct cpm1_gpio16_chip {
- struct of_mm_gpio_chip mm_gc;
+ struct gpio_chip gc;
+ void __iomem *regs;
spinlock_t lock;
/* shadowed data register to clear/set bits safely */
u16 cpdata;
-};
-static inline struct cpm1_gpio16_chip *
-to_cpm1_gpio16_chip(struct of_mm_gpio_chip *mm_gc)
-{
- return container_of(mm_gc, struct cpm1_gpio16_chip, mm_gc);
-}
+ /* IRQ associated with Pins when relevant */
+ int irq[16];
+};
-static void cpm1_gpio16_save_regs(struct of_mm_gpio_chip *mm_gc)
+static void cpm1_gpio16_save_regs(struct cpm1_gpio16_chip *cpm1_gc)
{
- struct cpm1_gpio16_chip *cpm1_gc = to_cpm1_gpio16_chip(mm_gc);
- struct cpm_ioport16 __iomem *iop = mm_gc->regs;
+ struct cpm_ioport16 __iomem *iop = cpm1_gc->regs;
cpm1_gc->cpdata = in_be16(&iop->dat);
}
static int cpm1_gpio16_get(struct gpio_chip *gc, unsigned int gpio)
{
- struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
- struct cpm_ioport16 __iomem *iop = mm_gc->regs;
+ struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(gc);
+ struct cpm_ioport16 __iomem *iop = cpm1_gc->regs;
u16 pin_mask;
pin_mask = 1 << (15 - gpio);
@@ -554,11 +405,9 @@ static int cpm1_gpio16_get(struct gpio_chip *gc, unsigned int gpio)
return !!(in_be16(&iop->dat) & pin_mask);
}
-static void __cpm1_gpio16_set(struct of_mm_gpio_chip *mm_gc, u16 pin_mask,
- int value)
+static void __cpm1_gpio16_set(struct cpm1_gpio16_chip *cpm1_gc, u16 pin_mask, int value)
{
- struct cpm1_gpio16_chip *cpm1_gc = to_cpm1_gpio16_chip(mm_gc);
- struct cpm_ioport16 __iomem *iop = mm_gc->regs;
+ struct cpm_ioport16 __iomem *iop = cpm1_gc->regs;
if (value)
cpm1_gc->cpdata |= pin_mask;
@@ -568,32 +417,39 @@ static void __cpm1_gpio16_set(struct of_mm_gpio_chip *mm_gc, u16 pin_mask,
out_be16(&iop->dat, cpm1_gc->cpdata);
}
-static void cpm1_gpio16_set(struct gpio_chip *gc, unsigned int gpio, int value)
+static int cpm1_gpio16_set(struct gpio_chip *gc, unsigned int gpio, int value)
{
- struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
- struct cpm1_gpio16_chip *cpm1_gc = to_cpm1_gpio16_chip(mm_gc);
+ struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(gc);
unsigned long flags;
u16 pin_mask = 1 << (15 - gpio);
spin_lock_irqsave(&cpm1_gc->lock, flags);
- __cpm1_gpio16_set(mm_gc, pin_mask, value);
+ __cpm1_gpio16_set(cpm1_gc, pin_mask, value);
spin_unlock_irqrestore(&cpm1_gc->lock, flags);
+
+ return 0;
+}
+
+static int cpm1_gpio16_to_irq(struct gpio_chip *gc, unsigned int gpio)
+{
+ struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(gc);
+
+ return cpm1_gc->irq[gpio] ? : -ENXIO;
}
static int cpm1_gpio16_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
{
- struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
- struct cpm1_gpio16_chip *cpm1_gc = to_cpm1_gpio16_chip(mm_gc);
- struct cpm_ioport16 __iomem *iop = mm_gc->regs;
+ struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(gc);
+ struct cpm_ioport16 __iomem *iop = cpm1_gc->regs;
unsigned long flags;
u16 pin_mask = 1 << (15 - gpio);
spin_lock_irqsave(&cpm1_gc->lock, flags);
setbits16(&iop->dir, pin_mask);
- __cpm1_gpio16_set(mm_gc, pin_mask, val);
+ __cpm1_gpio16_set(cpm1_gc, pin_mask, val);
spin_unlock_irqrestore(&cpm1_gc->lock, flags);
@@ -602,9 +458,8 @@ static int cpm1_gpio16_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
static int cpm1_gpio16_dir_in(struct gpio_chip *gc, unsigned int gpio)
{
- struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
- struct cpm1_gpio16_chip *cpm1_gc = to_cpm1_gpio16_chip(mm_gc);
- struct cpm_ioport16 __iomem *iop = mm_gc->regs;
+ struct cpm1_gpio16_chip *cpm1_gc = gpiochip_get_data(gc);
+ struct cpm_ioport16 __iomem *iop = cpm1_gc->regs;
unsigned long flags;
u16 pin_mask = 1 << (15 - gpio);
@@ -617,57 +472,71 @@ static int cpm1_gpio16_dir_in(struct gpio_chip *gc, unsigned int gpio)
return 0;
}
-int cpm1_gpiochip_add16(struct device_node *np)
+int cpm1_gpiochip_add16(struct device *dev)
{
+ struct device_node *np = dev->of_node;
struct cpm1_gpio16_chip *cpm1_gc;
- struct of_mm_gpio_chip *mm_gc;
struct gpio_chip *gc;
+ u16 mask;
- cpm1_gc = kzalloc(sizeof(*cpm1_gc), GFP_KERNEL);
+ cpm1_gc = devm_kzalloc(dev, sizeof(*cpm1_gc), GFP_KERNEL);
if (!cpm1_gc)
return -ENOMEM;
spin_lock_init(&cpm1_gc->lock);
- mm_gc = &cpm1_gc->mm_gc;
- gc = &mm_gc->gc;
+ if (!of_property_read_u16(np, "fsl,cpm1-gpio-irq-mask", &mask)) {
+ int i, j;
+
+ for (i = 0, j = 0; i < 16; i++)
+ if (mask & (1 << (15 - i)))
+ cpm1_gc->irq[i] = irq_of_parse_and_map(np, j++);
+ }
- mm_gc->save_regs = cpm1_gpio16_save_regs;
+ gc = &cpm1_gc->gc;
+ gc->base = -1;
gc->ngpio = 16;
gc->direction_input = cpm1_gpio16_dir_in;
gc->direction_output = cpm1_gpio16_dir_out;
gc->get = cpm1_gpio16_get;
gc->set = cpm1_gpio16_set;
+ gc->to_irq = cpm1_gpio16_to_irq;
+ gc->parent = dev;
+ gc->owner = THIS_MODULE;
+
+ gc->label = devm_kasprintf(dev, GFP_KERNEL, "%pOF", np);
+ if (!gc->label)
+ return -ENOMEM;
- return of_mm_gpiochip_add(np, mm_gc);
+ cpm1_gc->regs = devm_of_iomap(dev, np, 0, NULL);
+ if (IS_ERR(cpm1_gc->regs))
+ return PTR_ERR(cpm1_gc->regs);
+
+ cpm1_gpio16_save_regs(cpm1_gc);
+
+ return devm_gpiochip_add_data(dev, gc, cpm1_gc);
}
struct cpm1_gpio32_chip {
- struct of_mm_gpio_chip mm_gc;
+ struct gpio_chip gc;
+ void __iomem *regs;
spinlock_t lock;
/* shadowed data register to clear/set bits safely */
u32 cpdata;
};
-static inline struct cpm1_gpio32_chip *
-to_cpm1_gpio32_chip(struct of_mm_gpio_chip *mm_gc)
-{
- return container_of(mm_gc, struct cpm1_gpio32_chip, mm_gc);
-}
-
-static void cpm1_gpio32_save_regs(struct of_mm_gpio_chip *mm_gc)
+static void cpm1_gpio32_save_regs(struct cpm1_gpio32_chip *cpm1_gc)
{
- struct cpm1_gpio32_chip *cpm1_gc = to_cpm1_gpio32_chip(mm_gc);
- struct cpm_ioport32b __iomem *iop = mm_gc->regs;
+ struct cpm_ioport32b __iomem *iop = cpm1_gc->regs;
cpm1_gc->cpdata = in_be32(&iop->dat);
}
static int cpm1_gpio32_get(struct gpio_chip *gc, unsigned int gpio)
{
- struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
- struct cpm_ioport32b __iomem *iop = mm_gc->regs;
+ struct cpm1_gpio32_chip *cpm1_gc = gpiochip_get_data(gc);
+ struct cpm_ioport32b __iomem *iop = cpm1_gc->regs;
u32 pin_mask;
pin_mask = 1 << (31 - gpio);
@@ -675,11 +544,9 @@ static int cpm1_gpio32_get(struct gpio_chip *gc, unsigned int gpio)
return !!(in_be32(&iop->dat) & pin_mask);
}
-static void __cpm1_gpio32_set(struct of_mm_gpio_chip *mm_gc, u32 pin_mask,
- int value)
+static void __cpm1_gpio32_set(struct cpm1_gpio32_chip *cpm1_gc, u32 pin_mask, int value)
{
- struct cpm1_gpio32_chip *cpm1_gc = to_cpm1_gpio32_chip(mm_gc);
- struct cpm_ioport32b __iomem *iop = mm_gc->regs;
+ struct cpm_ioport32b __iomem *iop = cpm1_gc->regs;
if (value)
cpm1_gc->cpdata |= pin_mask;
@@ -689,32 +556,32 @@ static void __cpm1_gpio32_set(struct of_mm_gpio_chip *mm_gc, u32 pin_mask,
out_be32(&iop->dat, cpm1_gc->cpdata);
}
-static void cpm1_gpio32_set(struct gpio_chip *gc, unsigned int gpio, int value)
+static int cpm1_gpio32_set(struct gpio_chip *gc, unsigned int gpio, int value)
{
- struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
- struct cpm1_gpio32_chip *cpm1_gc = to_cpm1_gpio32_chip(mm_gc);
+ struct cpm1_gpio32_chip *cpm1_gc = gpiochip_get_data(gc);
unsigned long flags;
u32 pin_mask = 1 << (31 - gpio);
spin_lock_irqsave(&cpm1_gc->lock, flags);
- __cpm1_gpio32_set(mm_gc, pin_mask, value);
+ __cpm1_gpio32_set(cpm1_gc, pin_mask, value);
spin_unlock_irqrestore(&cpm1_gc->lock, flags);
+
+ return 0;
}
static int cpm1_gpio32_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
{
- struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
- struct cpm1_gpio32_chip *cpm1_gc = to_cpm1_gpio32_chip(mm_gc);
- struct cpm_ioport32b __iomem *iop = mm_gc->regs;
+ struct cpm1_gpio32_chip *cpm1_gc = gpiochip_get_data(gc);
+ struct cpm_ioport32b __iomem *iop = cpm1_gc->regs;
unsigned long flags;
u32 pin_mask = 1 << (31 - gpio);
spin_lock_irqsave(&cpm1_gc->lock, flags);
setbits32(&iop->dir, pin_mask);
- __cpm1_gpio32_set(mm_gc, pin_mask, val);
+ __cpm1_gpio32_set(cpm1_gc, pin_mask, val);
spin_unlock_irqrestore(&cpm1_gc->lock, flags);
@@ -723,9 +590,8 @@ static int cpm1_gpio32_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
static int cpm1_gpio32_dir_in(struct gpio_chip *gc, unsigned int gpio)
{
- struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
- struct cpm1_gpio32_chip *cpm1_gc = to_cpm1_gpio32_chip(mm_gc);
- struct cpm_ioport32b __iomem *iop = mm_gc->regs;
+ struct cpm1_gpio32_chip *cpm1_gc = gpiochip_get_data(gc);
+ struct cpm_ioport32b __iomem *iop = cpm1_gc->regs;
unsigned long flags;
u32 pin_mask = 1 << (31 - gpio);
@@ -738,52 +604,39 @@ static int cpm1_gpio32_dir_in(struct gpio_chip *gc, unsigned int gpio)
return 0;
}
-int cpm1_gpiochip_add32(struct device_node *np)
+int cpm1_gpiochip_add32(struct device *dev)
{
+ struct device_node *np = dev->of_node;
struct cpm1_gpio32_chip *cpm1_gc;
- struct of_mm_gpio_chip *mm_gc;
struct gpio_chip *gc;
- cpm1_gc = kzalloc(sizeof(*cpm1_gc), GFP_KERNEL);
+ cpm1_gc = devm_kzalloc(dev, sizeof(*cpm1_gc), GFP_KERNEL);
if (!cpm1_gc)
return -ENOMEM;
spin_lock_init(&cpm1_gc->lock);
- mm_gc = &cpm1_gc->mm_gc;
- gc = &mm_gc->gc;
-
- mm_gc->save_regs = cpm1_gpio32_save_regs;
+ gc = &cpm1_gc->gc;
+ gc->base = -1;
gc->ngpio = 32;
gc->direction_input = cpm1_gpio32_dir_in;
gc->direction_output = cpm1_gpio32_dir_out;
gc->get = cpm1_gpio32_get;
gc->set = cpm1_gpio32_set;
+ gc->parent = dev;
+ gc->owner = THIS_MODULE;
- return of_mm_gpiochip_add(np, mm_gc);
-}
-
-static int cpm_init_par_io(void)
-{
- struct device_node *np;
-
- for_each_compatible_node(np, NULL, "fsl,cpm1-pario-bank-a")
- cpm1_gpiochip_add16(np);
-
- for_each_compatible_node(np, NULL, "fsl,cpm1-pario-bank-b")
- cpm1_gpiochip_add32(np);
+ gc->label = devm_kasprintf(dev, GFP_KERNEL, "%pOF", np);
+ if (!gc->label)
+ return -ENOMEM;
- for_each_compatible_node(np, NULL, "fsl,cpm1-pario-bank-c")
- cpm1_gpiochip_add16(np);
+ cpm1_gc->regs = devm_of_iomap(dev, np, 0, NULL);
+ if (IS_ERR(cpm1_gc->regs))
+ return PTR_ERR(cpm1_gc->regs);
- for_each_compatible_node(np, NULL, "fsl,cpm1-pario-bank-d")
- cpm1_gpiochip_add16(np);
+ cpm1_gpio32_save_regs(cpm1_gc);
- /* Port E uses CPM2 layout */
- for_each_compatible_node(np, NULL, "fsl,cpm1-pario-bank-e")
- cpm2_gpiochip_add32(np);
- return 0;
+ return devm_gpiochip_add_data(dev, gc, cpm1_gc);
}
-arch_initcall(cpm_init_par_io);
#endif /* CONFIG_8xx_GPIO */
diff --git a/arch/powerpc/platforms/8xx/ep88xc.c b/arch/powerpc/platforms/8xx/ep88xc.c
index e62166681d08..fc276a29d67f 100644
--- a/arch/powerpc/platforms/8xx/ep88xc.c
+++ b/arch/powerpc/platforms/8xx/ep88xc.c
@@ -20,6 +20,7 @@
#include <asm/cpm1.h>
#include "mpc8xx.h"
+#include "pic.h"
struct cpm_pin {
int port, pin, flags;
@@ -141,13 +142,7 @@ static void __init ep88xc_setup_arch(void)
BCSR8_PHY2_ENABLE | BCSR8_PHY2_POWER);
}
-static int __init ep88xc_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
- return of_flat_dt_is_compatible(root, "fsl,ep88xc");
-}
-
-static struct of_device_id __initdata of_bus_ids[] = {
+static const struct of_device_id of_bus_ids[] __initconst = {
{ .name = "soc", },
{ .name = "cpm", },
{ .name = "localbus", },
@@ -165,13 +160,11 @@ machine_device_initcall(ep88xc, declare_of_platform_devices);
define_machine(ep88xc) {
.name = "Embedded Planet EP88xC",
- .probe = ep88xc_probe,
+ .compatible = "fsl,ep88xc",
.setup_arch = ep88xc_setup_arch,
- .init_IRQ = mpc8xx_pics_init,
+ .init_IRQ = mpc8xx_pic_init,
.get_irq = mpc8xx_get_irq,
.restart = mpc8xx_restart,
.calibrate_decr = mpc8xx_calibrate_decr,
- .set_rtc_time = mpc8xx_set_rtc_time,
- .get_rtc_time = mpc8xx_get_rtc_time,
.progress = udbg_progress,
};
diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c
index d3037747031d..2336b687bc96 100644
--- a/arch/powerpc/platforms/8xx/m8xx_setup.c
+++ b/arch/powerpc/platforms/8xx/m8xx_setup.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 1995 Linus Torvalds
* Adapted from 'alpha' version by Gary Thomas
@@ -16,20 +17,17 @@
#include <linux/time.h>
#include <linux/rtc.h>
#include <linux/fsl_devices.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
#include <asm/io.h>
#include <asm/8xx_immap.h>
-#include <asm/prom.h>
-#include <asm/fs_pd.h>
#include <mm/mmu_decl.h>
-#include <sysdev/mpc8xx_pic.h>
+#include "pic.h"
#include "mpc8xx.h"
-extern int cpm_pic_init(void);
-extern int cpm_get_irq(void);
-
/* A place holder for time base interrupts, if they are ever enabled. */
static irqreturn_t timebase_interrupt(int irq, void *dev)
{
@@ -38,26 +36,6 @@ static irqreturn_t timebase_interrupt(int irq, void *dev)
return IRQ_HANDLED;
}
-static struct irqaction tbint_irqaction = {
- .handler = timebase_interrupt,
- .flags = IRQF_NO_THREAD,
- .name = "tbint",
-};
-
-/* per-board overridable init_internal_rtc() function. */
-void __init __attribute__ ((weak))
-init_internal_rtc(void)
-{
- sit8xx_t __iomem *sys_tmr = immr_map(im_sit);
-
- /* Disable the RTC one second and alarm interrupts. */
- clrbits16(&sys_tmr->sit_rtcsc, (RTCSC_SIE | RTCSC_ALE));
-
- /* Enable the RTC */
- setbits16(&sys_tmr->sit_rtcsc, (RTCSC_RTF | RTCSC_RTE));
- immr_unmap(sys_tmr);
-}
-
static int __init get_freq(char *name, unsigned long *val)
{
struct device_node *cpu;
@@ -65,7 +43,7 @@ static int __init get_freq(char *name, unsigned long *val)
int found = 0;
/* The cpu node should have timebase and clock frequency properties */
- cpu = of_find_node_by_type(NULL, "cpu");
+ cpu = of_get_cpu_node(0, NULL);
if (cpu) {
fp = of_get_property(cpu, name, NULL);
@@ -87,23 +65,14 @@ static int __init get_freq(char *name, unsigned long *val)
void __init mpc8xx_calibrate_decr(void)
{
struct device_node *cpu;
- cark8xx_t __iomem *clk_r1;
- car8xx_t __iomem *clk_r2;
- sitk8xx_t __iomem *sys_tmr1;
- sit8xx_t __iomem *sys_tmr2;
int irq, virq;
- clk_r1 = immr_map(im_clkrstk);
-
/* Unlock the SCCR. */
- out_be32(&clk_r1->cark_sccrk, ~KAPWR_KEY);
- out_be32(&clk_r1->cark_sccrk, KAPWR_KEY);
- immr_unmap(clk_r1);
+ out_be32(&mpc8xx_immr->im_clkrstk.cark_sccrk, ~KAPWR_KEY);
+ out_be32(&mpc8xx_immr->im_clkrstk.cark_sccrk, KAPWR_KEY);
/* Force all 8xx processors to use divide by 16 processor clock. */
- clk_r2 = immr_map(im_clkrst);
- setbits32(&clk_r2->car_sccr, 0x02000000);
- immr_unmap(clk_r2);
+ setbits32(&mpc8xx_immr->im_clkrst.car_sccr, 0x02000000);
/* Processor frequency is MHz.
*/
@@ -130,32 +99,34 @@ void __init mpc8xx_calibrate_decr(void)
* we guarantee the registers are locked, then we unlock them
* for our use.
*/
- sys_tmr1 = immr_map(im_sitk);
- out_be32(&sys_tmr1->sitk_tbscrk, ~KAPWR_KEY);
- out_be32(&sys_tmr1->sitk_rtcsck, ~KAPWR_KEY);
- out_be32(&sys_tmr1->sitk_tbk, ~KAPWR_KEY);
- out_be32(&sys_tmr1->sitk_tbscrk, KAPWR_KEY);
- out_be32(&sys_tmr1->sitk_rtcsck, KAPWR_KEY);
- out_be32(&sys_tmr1->sitk_tbk, KAPWR_KEY);
- immr_unmap(sys_tmr1);
+ out_be32(&mpc8xx_immr->im_sitk.sitk_tbscrk, ~KAPWR_KEY);
+ out_be32(&mpc8xx_immr->im_sitk.sitk_rtcsck, ~KAPWR_KEY);
+ out_be32(&mpc8xx_immr->im_sitk.sitk_tbk, ~KAPWR_KEY);
+ out_be32(&mpc8xx_immr->im_sitk.sitk_tbscrk, KAPWR_KEY);
+ out_be32(&mpc8xx_immr->im_sitk.sitk_rtcsck, KAPWR_KEY);
+ out_be32(&mpc8xx_immr->im_sitk.sitk_tbk, KAPWR_KEY);
- init_internal_rtc();
+ /* Disable the RTC one second and alarm interrupts. */
+ clrbits16(&mpc8xx_immr->im_sit.sit_rtcsc, (RTCSC_SIE | RTCSC_ALE));
+
+ /* Enable the RTC */
+ setbits16(&mpc8xx_immr->im_sit.sit_rtcsc, (RTCSC_RTF | RTCSC_RTE));
/* Enabling the decrementer also enables the timebase interrupts
* (or from the other point of view, to get decrementer interrupts
* we have to enable the timebase). The decrementer interrupt
* is wired into the vector table, nothing to do here for that.
*/
- cpu = of_find_node_by_type(NULL, "cpu");
+ cpu = of_get_cpu_node(0, NULL);
virq= irq_of_parse_and_map(cpu, 0);
+ of_node_put(cpu);
irq = virq_to_hw(virq);
- sys_tmr2 = immr_map(im_sit);
- out_be16(&sys_tmr2->sit_tbscr, ((1 << (7 - (irq/2))) << 8) |
- (TBSCR_TBF | TBSCR_TBE));
- immr_unmap(sys_tmr2);
+ out_be16(&mpc8xx_immr->im_sit.sit_tbscr,
+ ((1 << (7 - (irq / 2))) << 8) | (TBSCR_TBF | TBSCR_TBE));
- if (setup_irq(virq, &tbint_irqaction))
+ if (request_irq(virq, timebase_interrupt, IRQF_NO_THREAD, "tbint",
+ NULL))
panic("Could not allocate timer IRQ!");
}
@@ -166,81 +137,36 @@ void __init mpc8xx_calibrate_decr(void)
int mpc8xx_set_rtc_time(struct rtc_time *tm)
{
- sitk8xx_t __iomem *sys_tmr1;
- sit8xx_t __iomem *sys_tmr2;
- int time;
+ time64_t time;
- sys_tmr1 = immr_map(im_sitk);
- sys_tmr2 = immr_map(im_sit);
- time = mktime(tm->tm_year+1900, tm->tm_mon+1, tm->tm_mday,
- tm->tm_hour, tm->tm_min, tm->tm_sec);
+ time = rtc_tm_to_time64(tm);
- out_be32(&sys_tmr1->sitk_rtck, KAPWR_KEY);
- out_be32(&sys_tmr2->sit_rtc, time);
- out_be32(&sys_tmr1->sitk_rtck, ~KAPWR_KEY);
+ out_be32(&mpc8xx_immr->im_sitk.sitk_rtck, KAPWR_KEY);
+ out_be32(&mpc8xx_immr->im_sit.sit_rtc, (u32)time);
+ out_be32(&mpc8xx_immr->im_sitk.sitk_rtck, ~KAPWR_KEY);
- immr_unmap(sys_tmr2);
- immr_unmap(sys_tmr1);
return 0;
}
void mpc8xx_get_rtc_time(struct rtc_time *tm)
{
unsigned long data;
- sit8xx_t __iomem *sys_tmr = immr_map(im_sit);
/* Get time from the RTC. */
- data = in_be32(&sys_tmr->sit_rtc);
- to_tm(data, tm);
- tm->tm_year -= 1900;
- tm->tm_mon -= 1;
- immr_unmap(sys_tmr);
+ data = in_be32(&mpc8xx_immr->im_sit.sit_rtc);
+ rtc_time64_to_tm(data, tm);
return;
}
-void mpc8xx_restart(char *cmd)
+void __noreturn mpc8xx_restart(char *cmd)
{
- car8xx_t __iomem *clk_r = immr_map(im_clkrst);
-
-
local_irq_disable();
- setbits32(&clk_r->car_plprcr, 0x00000080);
+ setbits32(&mpc8xx_immr->im_clkrst.car_plprcr, 0x00000080);
/* Clear the ME bit in MSR to cause checkstop on machine check
*/
mtmsr(mfmsr() & ~0x1000);
- in_8(&clk_r->res[0]);
+ in_8(&mpc8xx_immr->im_clkrst.res[0]);
panic("Restart failed\n");
}
-
-static void cpm_cascade(unsigned int irq, struct irq_desc *desc)
-{
- struct irq_chip *chip = irq_desc_get_chip(desc);
- int cascade_irq = cpm_get_irq();
-
- if (cascade_irq >= 0)
- generic_handle_irq(cascade_irq);
-
- chip->irq_eoi(&desc->irq_data);
-}
-
-/* Initialize the internal interrupt controllers. The number of
- * interrupts supported can vary with the processor type, and the
- * 82xx family can have up to 64.
- * External interrupts can be either edge or level triggered, and
- * need to be initialized by the appropriate driver.
- */
-void __init mpc8xx_pics_init(void)
-{
- int irq;
-
- if (mpc8xx_pic_init()) {
- printk(KERN_ERR "Failed interrupt 8xx controller initialization\n");
- return;
- }
-
- irq = cpm_pic_init();
- if (irq != NO_IRQ)
- irq_set_chained_handler(irq, cpm_cascade);
-}
diff --git a/arch/powerpc/platforms/8xx/machine_check.c b/arch/powerpc/platforms/8xx/machine_check.c
new file mode 100644
index 000000000000..656365975895
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/machine_check.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ */
+
+#include <linux/kernel.h>
+#include <linux/printk.h>
+#include <linux/ptrace.h>
+
+#include <asm/reg.h>
+
+int machine_check_8xx(struct pt_regs *regs)
+{
+ unsigned long reason = regs->msr;
+
+ pr_err("Machine check in kernel mode.\n");
+ pr_err("Caused by (from SRR1=%lx): ", reason);
+ if (reason & 0x40000000)
+ pr_cont("Fetch error at address %lx\n", regs->nip);
+ else
+ pr_cont("Data access error at address %lx\n", regs->dar);
+
+#ifdef CONFIG_PCI
+ /* the qspan pci read routines can cause machine checks -- Cort
+ *
+ * yuck !!! that totally needs to go away ! There are better ways
+ * to deal with that than having a wart in the mcheck handler.
+ * -- BenH
+ */
+ bad_page_fault(regs, SIGBUS);
+ return 1;
+#else
+ return 0;
+#endif
+}
diff --git a/arch/powerpc/platforms/8xx/micropatch.c b/arch/powerpc/platforms/8xx/micropatch.c
new file mode 100644
index 000000000000..aef179fcbd4f
--- /dev/null
+++ b/arch/powerpc/platforms/8xx/micropatch.c
@@ -0,0 +1,388 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Microcode patches for the CPM as supplied by Motorola.
+ * This is the one for IIC/SPI. There is a newer one that
+ * also relocates SMC2, but this would require additional changes
+ * to uart.c, so I am holding off on that for a moment.
+ */
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <asm/irq.h>
+#include <asm/page.h>
+#include <asm/8xx_immap.h>
+#include <asm/cpm.h>
+#include <asm/cpm1.h>
+
+struct patch_params {
+ ushort rccr;
+ ushort cpmcr1;
+ ushort cpmcr2;
+ ushort cpmcr3;
+ ushort cpmcr4;
+};
+
+/*
+ * I2C/SPI relocation patch arrays.
+ */
+
+#ifdef CONFIG_I2C_SPI_UCODE_PATCH
+
+static char patch_name[] __initdata = "I2C/SPI";
+
+static struct patch_params patch_params __initdata = {
+ 1, 0x802a, 0x8028, 0x802e, 0x802c,
+};
+
+static uint patch_2000[] __initdata = {
+ 0x7FFFEFD9, 0x3FFD0000, 0x7FFB49F7, 0x7FF90000,
+ 0x5FEFADF7, 0x5F89ADF7, 0x5FEFAFF7, 0x5F89AFF7,
+ 0x3A9CFBC8, 0xE7C0EDF0, 0x77C1E1BB, 0xF4DC7F1D,
+ 0xABAD932F, 0x4E08FDCF, 0x6E0FAFF8, 0x7CCF76CF,
+ 0xFD1FF9CF, 0xABF88DC6, 0xAB5679F7, 0xB0937383,
+ 0xDFCE79F7, 0xB091E6BB, 0xE5BBE74F, 0xB3FA6F0F,
+ 0x6FFB76CE, 0xEE0DF9CF, 0x2BFBEFEF, 0xCFEEF9CF,
+ 0x76CEAD24, 0x90B2DF9A, 0x7FDDD0BF, 0x4BF847FD,
+ 0x7CCF76CE, 0xCFEF7E1F, 0x7F1D7DFD, 0xF0B6EF71,
+ 0x7FC177C1, 0xFBC86079, 0xE722FBC8, 0x5FFFDFFF,
+ 0x5FB2FFFB, 0xFBC8F3C8, 0x94A67F01, 0x7F1D5F39,
+ 0xAFE85F5E, 0xFFDFDF96, 0xCB9FAF7D, 0x5FC1AFED,
+ 0x8C1C5FC1, 0xAFDD5FC3, 0xDF9A7EFD, 0xB0B25FB2,
+ 0xFFFEABAD, 0x5FB2FFFE, 0x5FCE600B, 0xE6BB600B,
+ 0x5FCEDFC6, 0x27FBEFDF, 0x5FC8CFDE, 0x3A9CE7C0,
+ 0xEDF0F3C8, 0x7F0154CD, 0x7F1D2D3D, 0x363A7570,
+ 0x7E0AF1CE, 0x37EF2E68, 0x7FEE10EC, 0xADF8EFDE,
+ 0xCFEAE52F, 0x7D0FE12B, 0xF1CE5F65, 0x7E0A4DF8,
+ 0xCFEA5F72, 0x7D0BEFEE, 0xCFEA5F74, 0xE522EFDE,
+ 0x5F74CFDA, 0x0B627385, 0xDF627E0A, 0x30D8145B,
+ 0xBFFFF3C8, 0x5FFFDFFF, 0xA7F85F5E, 0xBFFE7F7D,
+ 0x10D31450, 0x5F36BFFF, 0xAF785F5E, 0xBFFDA7F8,
+ 0x5F36BFFE, 0x77FD30C0, 0x4E08FDCF, 0xE5FF6E0F,
+ 0xAFF87E1F, 0x7E0FFD1F, 0xF1CF5F1B, 0xABF80D5E,
+ 0x5F5EFFEF, 0x79F730A2, 0xAFDD5F34, 0x47F85F34,
+ 0xAFED7FDD, 0x50B24978, 0x47FD7F1D, 0x7DFD70AD,
+ 0xEF717EC1, 0x6BA47F01, 0x2D267EFD, 0x30DE5F5E,
+ 0xFFFD5F5E, 0xFFEF5F5E, 0xFFDF0CA0, 0xAFED0A9E,
+ 0xAFDD0C3A, 0x5F3AAFBD, 0x7FBDB082, 0x5F8247F8
+};
+
+static uint patch_2f00[] __initdata = {
+ 0x3E303430, 0x34343737, 0xABF7BF9B, 0x994B4FBD,
+ 0xBD599493, 0x349FFF37, 0xFB9B177D, 0xD9936956,
+ 0xBBFDD697, 0xBDD2FD11, 0x31DB9BB3, 0x63139637,
+ 0x93733693, 0x193137F7, 0x331737AF, 0x7BB9B999,
+ 0xBB197957, 0x7FDFD3D5, 0x73B773F7, 0x37933B99,
+ 0x1D115316, 0x99315315, 0x31694BF4, 0xFBDBD359,
+ 0x31497353, 0x76956D69, 0x7B9D9693, 0x13131979,
+ 0x79376935
+};
+
+static uint patch_2e00[] __initdata = {};
+#endif
+
+/*
+ * I2C/SPI/SMC1 relocation patch arrays.
+ */
+
+#ifdef CONFIG_I2C_SPI_SMC1_UCODE_PATCH
+
+static char patch_name[] __initdata = "I2C/SPI/SMC1";
+
+static struct patch_params patch_params __initdata = {
+ 3, 0x8080, 0x808a, 0x8028, 0x802a,
+};
+
+static uint patch_2000[] __initdata = {
+ 0x3fff0000, 0x3ffd0000, 0x3ffb0000, 0x3ff90000,
+ 0x5f13eff8, 0x5eb5eff8, 0x5f88adf7, 0x5fefadf7,
+ 0x3a9cfbc8, 0x77cae1bb, 0xf4de7fad, 0xabae9330,
+ 0x4e08fdcf, 0x6e0faff8, 0x7ccf76cf, 0xfdaff9cf,
+ 0xabf88dc8, 0xab5879f7, 0xb0925d8d, 0xdfd079f7,
+ 0xb090e6bb, 0xe5bbe74f, 0x9e046f0f, 0x6ffb76ce,
+ 0xee0cf9cf, 0x2bfbefef, 0xcfeef9cf, 0x76cead23,
+ 0x90b3df99, 0x7fddd0c1, 0x4bf847fd, 0x7ccf76ce,
+ 0xcfef77ca, 0x7eaf7fad, 0x7dfdf0b7, 0xef7a7fca,
+ 0x77cafbc8, 0x6079e722, 0xfbc85fff, 0xdfff5fb3,
+ 0xfffbfbc8, 0xf3c894a5, 0xe7c9edf9, 0x7f9a7fad,
+ 0x5f36afe8, 0x5f5bffdf, 0xdf95cb9e, 0xaf7d5fc3,
+ 0xafed8c1b, 0x5fc3afdd, 0x5fc5df99, 0x7efdb0b3,
+ 0x5fb3fffe, 0xabae5fb3, 0xfffe5fd0, 0x600be6bb,
+ 0x600b5fd0, 0xdfc827fb, 0xefdf5fca, 0xcfde3a9c,
+ 0xe7c9edf9, 0xf3c87f9e, 0x54ca7fed, 0x2d3a3637,
+ 0x756f7e9a, 0xf1ce37ef, 0x2e677fee, 0x10ebadf8,
+ 0xefdecfea, 0xe52f7d9f, 0xe12bf1ce, 0x5f647e9a,
+ 0x4df8cfea, 0x5f717d9b, 0xefeecfea, 0x5f73e522,
+ 0xefde5f73, 0xcfda0b61, 0x5d8fdf61, 0xe7c9edf9,
+ 0x7e9a30d5, 0x1458bfff, 0xf3c85fff, 0xdfffa7f8,
+ 0x5f5bbffe, 0x7f7d10d0, 0x144d5f33, 0xbfffaf78,
+ 0x5f5bbffd, 0xa7f85f33, 0xbffe77fd, 0x30bd4e08,
+ 0xfdcfe5ff, 0x6e0faff8, 0x7eef7e9f, 0xfdeff1cf,
+ 0x5f17abf8, 0x0d5b5f5b, 0xffef79f7, 0x309eafdd,
+ 0x5f3147f8, 0x5f31afed, 0x7fdd50af, 0x497847fd,
+ 0x7f9e7fed, 0x7dfd70a9, 0xef7e7ece, 0x6ba07f9e,
+ 0x2d227efd, 0x30db5f5b, 0xfffd5f5b, 0xffef5f5b,
+ 0xffdf0c9c, 0xafed0a9a, 0xafdd0c37, 0x5f37afbd,
+ 0x7fbdb081, 0x5f8147f8, 0x3a11e710, 0xedf0ccdd,
+ 0xf3186d0a, 0x7f0e5f06, 0x7fedbb38, 0x3afe7468,
+ 0x7fedf4fc, 0x8ffbb951, 0xb85f77fd, 0xb0df5ddd,
+ 0xdefe7fed, 0x90e1e74d, 0x6f0dcbf7, 0xe7decfed,
+ 0xcb74cfed, 0xcfeddf6d, 0x91714f74, 0x5dd2deef,
+ 0x9e04e7df, 0xefbb6ffb, 0xe7ef7f0e, 0x9e097fed,
+ 0xebdbeffa, 0xeb54affb, 0x7fea90d7, 0x7e0cf0c3,
+ 0xbffff318, 0x5fffdfff, 0xac59efea, 0x7fce1ee5,
+ 0xe2ff5ee1, 0xaffbe2ff, 0x5ee3affb, 0xf9cc7d0f,
+ 0xaef8770f, 0x7d0fb0c6, 0xeffbbfff, 0xcfef5ede,
+ 0x7d0fbfff, 0x5ede4cf8, 0x7fddd0bf, 0x49f847fd,
+ 0x7efdf0bb, 0x7fedfffd, 0x7dfdf0b7, 0xef7e7e1e,
+ 0x5ede7f0e, 0x3a11e710, 0xedf0ccab, 0xfb18ad2e,
+ 0x1ea9bbb8, 0x74283b7e, 0x73c2e4bb, 0x2ada4fb8,
+ 0xdc21e4bb, 0xb2a1ffbf, 0x5e2c43f8, 0xfc87e1bb,
+ 0xe74ffd91, 0x6f0f4fe8, 0xc7ba32e2, 0xf396efeb,
+ 0x600b4f78, 0xe5bb760b, 0x53acaef8, 0x4ef88b0e,
+ 0xcfef9e09, 0xabf8751f, 0xefef5bac, 0x741f4fe8,
+ 0x751e760d, 0x7fdbf081, 0x741cafce, 0xefcc7fce,
+ 0x751e70ac, 0x741ce7bb, 0x3372cfed, 0xafdbefeb,
+ 0xe5bb760b, 0x53f2aef8, 0xafe8e7eb, 0x4bf8771e,
+ 0x7e247fed, 0x4fcbe2cc, 0x7fbc30a9, 0x7b0f7a0f,
+ 0x34d577fd, 0x308b5db7, 0xde553e5f, 0xaf78741f,
+ 0x741f30f0, 0xcfef5e2c, 0x741f3eac, 0xafb8771e,
+ 0x5e677fed, 0x0bd3e2cc, 0x741ccfec, 0xe5ca53cd,
+ 0x6fcb4f74, 0x5dadde4b, 0x2ab63d38, 0x4bb3de30,
+ 0x751f741c, 0x6c42effa, 0xefea7fce, 0x6ffc30be,
+ 0xefec3fca, 0x30b3de2e, 0xadf85d9e, 0xaf7daefd,
+ 0x5d9ede2e, 0x5d9eafdd, 0x761f10ac, 0x1da07efd,
+ 0x30adfffe, 0x4908fb18, 0x5fffdfff, 0xafbb709b,
+ 0x4ef85e67, 0xadf814ad, 0x7a0f70ad, 0xcfef50ad,
+ 0x7a0fde30, 0x5da0afed, 0x3c12780f, 0xefef780f,
+ 0xefef790f, 0xa7f85e0f, 0xffef790f, 0xefef790f,
+ 0x14adde2e, 0x5d9eadfd, 0x5e2dfffb, 0xe79addfd,
+ 0xeff96079, 0x607ae79a, 0xddfceff9, 0x60795dff,
+ 0x607acfef, 0xefefefdf, 0xefbfef7f, 0xeeffedff,
+ 0xebffe7ff, 0xafefafdf, 0xafbfaf7f, 0xaeffadff,
+ 0xabffa7ff, 0x6fef6fdf, 0x6fbf6f7f, 0x6eff6dff,
+ 0x6bff67ff, 0x2fef2fdf, 0x2fbf2f7f, 0x2eff2dff,
+ 0x2bff27ff, 0x4e08fd1f, 0xe5ff6e0f, 0xaff87eef,
+ 0x7e0ffdef, 0xf11f6079, 0xabf8f542, 0x7e0af11c,
+ 0x37cfae3a, 0x7fec90be, 0xadf8efdc, 0xcfeae52f,
+ 0x7d0fe12b, 0xf11c6079, 0x7e0a4df8, 0xcfea5dc4,
+ 0x7d0befec, 0xcfea5dc6, 0xe522efdc, 0x5dc6cfda,
+ 0x4e08fd1f, 0x6e0faff8, 0x7c1f761f, 0xfdeff91f,
+ 0x6079abf8, 0x761cee24, 0xf91f2bfb, 0xefefcfec,
+ 0xf91f6079, 0x761c27fb, 0xefdf5da7, 0xcfdc7fdd,
+ 0xd09c4bf8, 0x47fd7c1f, 0x761ccfcf, 0x7eef7fed,
+ 0x7dfdf093, 0xef7e7f1e, 0x771efb18, 0x6079e722,
+ 0xe6bbe5bb, 0xae0ae5bb, 0x600bae85, 0xe2bbe2bb,
+ 0xe2bbe2bb, 0xaf02e2bb, 0xe2bb2ff9, 0x6079e2bb
+};
+
+static uint patch_2f00[] __initdata = {
+ 0x30303030, 0x3e3e3434, 0xabbf9b99, 0x4b4fbdbd,
+ 0x59949334, 0x9fff37fb, 0x9b177dd9, 0x936956bb,
+ 0xfbdd697b, 0xdd2fd113, 0x1db9f7bb, 0x36313963,
+ 0x79373369, 0x3193137f, 0x7331737a, 0xf7bb9b99,
+ 0x9bb19795, 0x77fdfd3d, 0x573b773f, 0x737933f7,
+ 0xb991d115, 0x31699315, 0x31531694, 0xbf4fbdbd,
+ 0x35931497, 0x35376956, 0xbd697b9d, 0x96931313,
+ 0x19797937, 0x6935af78, 0xb9b3baa3, 0xb8788683,
+ 0x368f78f7, 0x87778733, 0x3ffffb3b, 0x8e8f78b8,
+ 0x1d118e13, 0xf3ff3f8b, 0x6bd8e173, 0xd1366856,
+ 0x68d1687b, 0x3daf78b8, 0x3a3a3f87, 0x8f81378f,
+ 0xf876f887, 0x77fd8778, 0x737de8d6, 0xbbf8bfff,
+ 0xd8df87f7, 0xfd876f7b, 0x8bfff8bd, 0x8683387d,
+ 0xb873d87b, 0x3b8fd7f8, 0xf7338883, 0xbb8ee1f8,
+ 0xef837377, 0x3337b836, 0x817d11f8, 0x7378b878,
+ 0xd3368b7d, 0xed731b7d, 0x833731f3, 0xf22f3f23
+};
+
+static uint patch_2e00[] __initdata = {
+ 0x27eeeeee, 0xeeeeeeee, 0xeeeeeeee, 0xeeeeeeee,
+ 0xee4bf4fb, 0xdbd259bb, 0x1979577f, 0xdfd2d573,
+ 0xb773f737, 0x4b4fbdbd, 0x25b9b177, 0xd2d17376,
+ 0x956bbfdd, 0x697bdd2f, 0xff9f79ff, 0xff9ff22f
+};
+#endif
+
+/*
+ * USB SOF patch arrays.
+ */
+
+#ifdef CONFIG_USB_SOF_UCODE_PATCH
+
+static char patch_name[] __initdata = "USB SOF";
+
+static struct patch_params patch_params __initdata = {
+ 9,
+};
+
+static uint patch_2000[] __initdata = {
+ 0x7fff0000, 0x7ffd0000, 0x7ffb0000, 0x49f7ba5b,
+ 0xba383ffb, 0xf9b8b46d, 0xe5ab4e07, 0xaf77bffe,
+ 0x3f7bbf79, 0xba5bba38, 0xe7676076, 0x60750000
+};
+
+static uint patch_2f00[] __initdata = {
+ 0x3030304c, 0xcab9e441, 0xa1aaf220
+};
+
+static uint patch_2e00[] __initdata = {};
+#endif
+
+/*
+ * SMC relocation patch arrays.
+ */
+
+#ifdef CONFIG_SMC_UCODE_PATCH
+
+static char patch_name[] __initdata = "SMC";
+
+static struct patch_params patch_params __initdata = {
+ 2, 0x8080, 0x8088,
+};
+
+static uint patch_2000[] __initdata = {
+ 0x3fff0000, 0x3ffd0000, 0x3ffb0000, 0x3ff90000,
+ 0x5fefeff8, 0x5f91eff8, 0x3ff30000, 0x3ff10000,
+ 0x3a11e710, 0xedf0ccb9, 0xf318ed66, 0x7f0e5fe2,
+ 0x7fedbb38, 0x3afe7468, 0x7fedf4d8, 0x8ffbb92d,
+ 0xb83b77fd, 0xb0bb5eb9, 0xdfda7fed, 0x90bde74d,
+ 0x6f0dcbd3, 0xe7decfed, 0xcb50cfed, 0xcfeddf6d,
+ 0x914d4f74, 0x5eaedfcb, 0x9ee0e7df, 0xefbb6ffb,
+ 0xe7ef7f0e, 0x9ee57fed, 0xebb7effa, 0xeb30affb,
+ 0x7fea90b3, 0x7e0cf09f, 0xbffff318, 0x5fffdfff,
+ 0xac35efea, 0x7fce1fc1, 0xe2ff5fbd, 0xaffbe2ff,
+ 0x5fbfaffb, 0xf9a87d0f, 0xaef8770f, 0x7d0fb0a2,
+ 0xeffbbfff, 0xcfef5fba, 0x7d0fbfff, 0x5fba4cf8,
+ 0x7fddd09b, 0x49f847fd, 0x7efdf097, 0x7fedfffd,
+ 0x7dfdf093, 0xef7e7e1e, 0x5fba7f0e, 0x3a11e710,
+ 0xedf0cc87, 0xfb18ad0a, 0x1f85bbb8, 0x74283b7e,
+ 0x7375e4bb, 0x2ab64fb8, 0x5c7de4bb, 0x32fdffbf,
+ 0x5f0843f8, 0x7ce3e1bb, 0xe74f7ded, 0x6f0f4fe8,
+ 0xc7ba32be, 0x73f2efeb, 0x600b4f78, 0xe5bb760b,
+ 0x5388aef8, 0x4ef80b6a, 0xcfef9ee5, 0xabf8751f,
+ 0xefef5b88, 0x741f4fe8, 0x751e760d, 0x7fdb70dd,
+ 0x741cafce, 0xefcc7fce, 0x751e7088, 0x741ce7bb,
+ 0x334ecfed, 0xafdbefeb, 0xe5bb760b, 0x53ceaef8,
+ 0xafe8e7eb, 0x4bf8771e, 0x7e007fed, 0x4fcbe2cc,
+ 0x7fbc3085, 0x7b0f7a0f, 0x34b177fd, 0xb0e75e93,
+ 0xdf313e3b, 0xaf78741f, 0x741f30cc, 0xcfef5f08,
+ 0x741f3e88, 0xafb8771e, 0x5f437fed, 0x0bafe2cc,
+ 0x741ccfec, 0xe5ca53a9, 0x6fcb4f74, 0x5e89df27,
+ 0x2a923d14, 0x4b8fdf0c, 0x751f741c, 0x6c1eeffa,
+ 0xefea7fce, 0x6ffc309a, 0xefec3fca, 0x308fdf0a,
+ 0xadf85e7a, 0xaf7daefd, 0x5e7adf0a, 0x5e7aafdd,
+ 0x761f1088, 0x1e7c7efd, 0x3089fffe, 0x4908fb18,
+ 0x5fffdfff, 0xafbbf0f7, 0x4ef85f43, 0xadf81489,
+ 0x7a0f7089, 0xcfef5089, 0x7a0fdf0c, 0x5e7cafed,
+ 0xbc6e780f, 0xefef780f, 0xefef790f, 0xa7f85eeb,
+ 0xffef790f, 0xefef790f, 0x1489df0a, 0x5e7aadfd,
+ 0x5f09fffb, 0xe79aded9, 0xeff96079, 0x607ae79a,
+ 0xded8eff9, 0x60795edb, 0x607acfef, 0xefefefdf,
+ 0xefbfef7f, 0xeeffedff, 0xebffe7ff, 0xafefafdf,
+ 0xafbfaf7f, 0xaeffadff, 0xabffa7ff, 0x6fef6fdf,
+ 0x6fbf6f7f, 0x6eff6dff, 0x6bff67ff, 0x2fef2fdf,
+ 0x2fbf2f7f, 0x2eff2dff, 0x2bff27ff, 0x4e08fd1f,
+ 0xe5ff6e0f, 0xaff87eef, 0x7e0ffdef, 0xf11f6079,
+ 0xabf8f51e, 0x7e0af11c, 0x37cfae16, 0x7fec909a,
+ 0xadf8efdc, 0xcfeae52f, 0x7d0fe12b, 0xf11c6079,
+ 0x7e0a4df8, 0xcfea5ea0, 0x7d0befec, 0xcfea5ea2,
+ 0xe522efdc, 0x5ea2cfda, 0x4e08fd1f, 0x6e0faff8,
+ 0x7c1f761f, 0xfdeff91f, 0x6079abf8, 0x761cee00,
+ 0xf91f2bfb, 0xefefcfec, 0xf91f6079, 0x761c27fb,
+ 0xefdf5e83, 0xcfdc7fdd, 0x50f84bf8, 0x47fd7c1f,
+ 0x761ccfcf, 0x7eef7fed, 0x7dfd70ef, 0xef7e7f1e,
+ 0x771efb18, 0x6079e722, 0xe6bbe5bb, 0x2e66e5bb,
+ 0x600b2ee1, 0xe2bbe2bb, 0xe2bbe2bb, 0x2f5ee2bb,
+ 0xe2bb2ff9, 0x6079e2bb,
+};
+
+static uint patch_2f00[] __initdata = {
+ 0x30303030, 0x3e3e3030, 0xaf79b9b3, 0xbaa3b979,
+ 0x9693369f, 0x79f79777, 0x97333fff, 0xfb3b9e9f,
+ 0x79b91d11, 0x9e13f3ff, 0x3f9b6bd9, 0xe173d136,
+ 0x695669d1, 0x697b3daf, 0x79b93a3a, 0x3f979f91,
+ 0x379ff976, 0xf99777fd, 0x9779737d, 0xe9d6bbf9,
+ 0xbfffd9df, 0x97f7fd97, 0x6f7b9bff, 0xf9bd9683,
+ 0x397db973, 0xd97b3b9f, 0xd7f9f733, 0x9993bb9e,
+ 0xe1f9ef93, 0x73773337, 0xb936917d, 0x11f87379,
+ 0xb979d336, 0x8b7ded73, 0x1b7d9337, 0x31f3f22f,
+ 0x3f2327ee, 0xeeeeeeee, 0xeeeeeeee, 0xeeeeeeee,
+ 0xeeeeee4b, 0xf4fbdbd2, 0x58bb1878, 0x577fdfd2,
+ 0xd573b773, 0xf7374b4f, 0xbdbd25b8, 0xb177d2d1,
+ 0x7376856b, 0xbfdd687b, 0xdd2fff8f, 0x78ffff8f,
+ 0xf22f0000,
+};
+
+static uint patch_2e00[] __initdata = {};
+#endif
+
+static void __init cpm_write_patch(cpm8xx_t *cp, int offset, uint *patch, int len)
+{
+ if (!len)
+ return;
+ memcpy_toio(cp->cp_dpmem + offset, patch, len);
+}
+
+void __init cpm_load_patch(cpm8xx_t *cp)
+{
+ out_be16(&cp->cp_rccr, 0);
+
+ cpm_write_patch(cp, 0, patch_2000, sizeof(patch_2000));
+ cpm_write_patch(cp, 0xf00, patch_2f00, sizeof(patch_2f00));
+ cpm_write_patch(cp, 0xe00, patch_2e00, sizeof(patch_2e00));
+
+ if (IS_ENABLED(CONFIG_I2C_SPI_UCODE_PATCH) ||
+ IS_ENABLED(CONFIG_I2C_SPI_SMC1_UCODE_PATCH)) {
+ u16 rpbase = 0x500;
+ iic_t *iip;
+ struct spi_pram *spp;
+
+ iip = (iic_t *)&cp->cp_dparam[PROFF_IIC];
+ out_be16(&iip->iic_rpbase, rpbase);
+
+ /* Put SPI above the IIC, also 32-byte aligned. */
+ spp = (struct spi_pram *)&cp->cp_dparam[PROFF_SPI];
+ out_be16(&spp->rpbase, (rpbase + sizeof(iic_t) + 31) & ~31);
+
+ if (IS_ENABLED(CONFIG_I2C_SPI_SMC1_UCODE_PATCH)) {
+ smc_uart_t *smp;
+
+ smp = (smc_uart_t *)&cp->cp_dparam[PROFF_SMC1];
+ out_be16(&smp->smc_rpbase, 0x1FC0);
+ }
+ }
+
+ if (IS_ENABLED(CONFIG_SMC_UCODE_PATCH)) {
+ smc_uart_t *smp;
+
+ if (IS_ENABLED(CONFIG_PPC_EARLY_DEBUG_CPM)) {
+ int i;
+
+ for (i = 0; i < sizeof(*smp); i += 4) {
+ u32 __iomem *src = (u32 __iomem *)&cp->cp_dparam[PROFF_SMC1 + i];
+ u32 __iomem *dst = (u32 __iomem *)&cp->cp_dparam[PROFF_DSP1 + i];
+
+ out_be32(dst, in_be32(src));
+ }
+ }
+
+ smp = (smc_uart_t *)&cp->cp_dparam[PROFF_SMC1];
+ out_be16(&smp->smc_rpbase, 0x1ec0);
+ smp = (smc_uart_t *)&cp->cp_dparam[PROFF_SMC2];
+ out_be16(&smp->smc_rpbase, 0x1fc0);
+ }
+
+ out_be16(&cp->cp_cpmcr1, patch_params.cpmcr1);
+ out_be16(&cp->cp_cpmcr2, patch_params.cpmcr2);
+ out_be16(&cp->cp_cpmcr3, patch_params.cpmcr3);
+ out_be16(&cp->cp_cpmcr4, patch_params.cpmcr4);
+
+ out_be16(&cp->cp_rccr, patch_params.rccr);
+
+ pr_info("%s microcode patch installed\n", patch_name);
+}
diff --git a/arch/powerpc/platforms/8xx/mpc86xads_setup.c b/arch/powerpc/platforms/8xx/mpc86xads_setup.c
index 63084640c5c5..e4192c0a3c0c 100644
--- a/arch/powerpc/platforms/8xx/mpc86xads_setup.c
+++ b/arch/powerpc/platforms/8xx/mpc86xads_setup.c
@@ -24,11 +24,11 @@
#include <asm/time.h>
#include <asm/8xx_immap.h>
#include <asm/cpm1.h>
-#include <asm/fs_pd.h>
#include <asm/udbg.h>
#include "mpc86xads.h"
#include "mpc8xx.h"
+#include "pic.h"
struct cpm_pin {
int port, pin, flags;
@@ -116,13 +116,7 @@ static void __init mpc86xads_setup_arch(void)
iounmap(bcsr_io);
}
-static int __init mpc86xads_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
- return of_flat_dt_is_compatible(root, "fsl,mpc866ads");
-}
-
-static struct of_device_id __initdata of_bus_ids[] = {
+static const struct of_device_id of_bus_ids[] __initconst = {
{ .name = "soc", },
{ .name = "cpm", },
{ .name = "localbus", },
@@ -139,9 +133,9 @@ machine_device_initcall(mpc86x_ads, declare_of_platform_devices);
define_machine(mpc86x_ads) {
.name = "MPC86x ADS",
- .probe = mpc86xads_probe,
+ .compatible = "fsl,mpc866ads",
.setup_arch = mpc86xads_setup_arch,
- .init_IRQ = mpc8xx_pics_init,
+ .init_IRQ = mpc8xx_pic_init,
.get_irq = mpc8xx_get_irq,
.restart = mpc8xx_restart,
.calibrate_decr = mpc8xx_calibrate_decr,
diff --git a/arch/powerpc/platforms/8xx/mpc885ads_setup.c b/arch/powerpc/platforms/8xx/mpc885ads_setup.c
index 5921dcb498fd..2d899be746eb 100644
--- a/arch/powerpc/platforms/8xx/mpc885ads_setup.c
+++ b/arch/powerpc/platforms/8xx/mpc885ads_setup.c
@@ -21,8 +21,6 @@
#include <linux/device.h>
#include <linux/delay.h>
-#include <linux/fs_enet_pd.h>
-#include <linux/fs_uart_pd.h>
#include <linux/fsl_devices.h>
#include <linux/mii.h>
#include <linux/of_address.h>
@@ -37,11 +35,11 @@
#include <asm/time.h>
#include <asm/8xx_immap.h>
#include <asm/cpm1.h>
-#include <asm/fs_pd.h>
#include <asm/udbg.h>
#include "mpc885ads.h"
#include "mpc8xx.h"
+#include "pic.h"
static u32 __iomem *bcsr, *bcsr5;
@@ -191,13 +189,7 @@ static void __init mpc885ads_setup_arch(void)
}
}
-static int __init mpc885ads_probe(void)
-{
- unsigned long root = of_get_flat_dt_root();
- return of_flat_dt_is_compatible(root, "fsl,mpc885ads");
-}
-
-static struct of_device_id __initdata of_bus_ids[] = {
+static const struct of_device_id of_bus_ids[] __initconst = {
{ .name = "soc", },
{ .name = "cpm", },
{ .name = "localbus", },
@@ -215,13 +207,11 @@ machine_device_initcall(mpc885_ads, declare_of_platform_devices);
define_machine(mpc885_ads) {
.name = "Freescale MPC885 ADS",
- .probe = mpc885ads_probe,
+ .compatible = "fsl,mpc885ads",
.setup_arch = mpc885ads_setup_arch,
- .init_IRQ = mpc8xx_pics_init,
+ .init_IRQ = mpc8xx_pic_init,
.get_irq = mpc8xx_get_irq,
.restart = mpc8xx_restart,
.calibrate_decr = mpc8xx_calibrate_decr,
- .set_rtc_time = mpc8xx_set_rtc_time,
- .get_rtc_time = mpc8xx_get_rtc_time,
.progress = udbg_progress,
};
diff --git a/arch/powerpc/platforms/8xx/mpc8xx.h b/arch/powerpc/platforms/8xx/mpc8xx.h
index 239a243a6161..79fae3324866 100644
--- a/arch/powerpc/platforms/8xx/mpc8xx.h
+++ b/arch/powerpc/platforms/8xx/mpc8xx.h
@@ -11,11 +11,10 @@
#ifndef __MPC8xx_H
#define __MPC8xx_H
-extern void mpc8xx_restart(char *cmd);
+extern void __noreturn mpc8xx_restart(char *cmd);
extern void mpc8xx_calibrate_decr(void);
extern int mpc8xx_set_rtc_time(struct rtc_time *tm);
extern void mpc8xx_get_rtc_time(struct rtc_time *tm);
-extern void mpc8xx_pics_init(void);
extern unsigned int mpc8xx_get_irq(void);
#endif /* __MPC8xx_H */
diff --git a/arch/powerpc/sysdev/mpc8xx_pic.c b/arch/powerpc/platforms/8xx/pic.c
index c4828c0be5bd..933d6ab7f512 100644
--- a/arch/powerpc/sysdev/mpc8xx_pic.c
+++ b/arch/powerpc/platforms/8xx/pic.c
@@ -4,18 +4,17 @@
#include <linux/signal.h>
#include <linux/irq.h>
#include <linux/dma-mapping.h>
-#include <asm/prom.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
#include <asm/irq.h>
#include <asm/io.h>
#include <asm/8xx_immap.h>
-#include "mpc8xx_pic.h"
+#include "pic.h"
#define PIC_VEC_SPURRIOUS 15
-extern int cpm_get_irq(struct pt_regs *regs);
-
static struct irq_domain *mpc8xx_pic_host;
static unsigned long mpc8xx_cached_irq_mask;
static sysconf8xx_t __iomem *siu_reg;
@@ -55,13 +54,13 @@ static int mpc8xx_set_irq_type(struct irq_data *d, unsigned int flow_type)
unsigned int siel = in_be32(&siu_reg->sc_siel);
siel |= mpc8xx_irqd_to_bit(d);
out_be32(&siu_reg->sc_siel, siel);
- __irq_set_handler_locked(d->irq, handle_edge_irq);
+ irq_set_handler_locked(d, handle_edge_irq);
}
return 0;
}
static struct irq_chip mpc8xx_pic = {
- .name = "MPC8XX SIU",
+ .name = "8XX SIU",
.irq_unmask = mpc8xx_unmask_irq,
.irq_mask = mpc8xx_mask_irq,
.irq_ack = mpc8xx_ack,
@@ -79,9 +78,9 @@ unsigned int mpc8xx_get_irq(void)
irq = in_be32(&siu_reg->sc_sivec) >> 26;
if (irq == PIC_VEC_SPURRIOUS)
- irq = NO_IRQ;
+ return 0;
- return irq_linear_revmap(mpc8xx_pic_host, irq);
+ return irq_find_mapping(mpc8xx_pic_host, irq);
}
@@ -120,12 +119,12 @@ static int mpc8xx_pic_host_xlate(struct irq_domain *h, struct device_node *ct,
}
-static struct irq_domain_ops mpc8xx_pic_host_ops = {
+static const struct irq_domain_ops mpc8xx_pic_host_ops = {
.map = mpc8xx_pic_host_map,
.xlate = mpc8xx_pic_host_xlate,
};
-int mpc8xx_pic_init(void)
+void __init mpc8xx_pic_init(void)
{
struct resource res;
struct device_node *np;
@@ -136,7 +135,7 @@ int mpc8xx_pic_init(void)
np = of_find_node_by_type(NULL, "mpc8xx-pic");
if (np == NULL) {
printk(KERN_ERR "Could not find fsl,pq1-pic node\n");
- return -ENOMEM;
+ return;
}
ret = of_address_to_resource(np, 0, &res);
@@ -144,20 +143,14 @@ int mpc8xx_pic_init(void)
goto out;
siu_reg = ioremap(res.start, resource_size(&res));
- if (siu_reg == NULL) {
- ret = -EINVAL;
+ if (!siu_reg)
goto out;
- }
- mpc8xx_pic_host = irq_domain_add_linear(np, 64, &mpc8xx_pic_host_ops, NULL);
- if (mpc8xx_pic_host == NULL) {
+ mpc8xx_pic_host = irq_domain_create_linear(of_fwnode_handle(np), 64,
+ &mpc8xx_pic_host_ops, NULL);
+ if (!mpc8xx_pic_host)
printk(KERN_ERR "MPC8xx PIC: failed to allocate irq host!\n");
- ret = -ENOMEM;
- goto out;
- }
- return 0;
out:
of_node_put(np);
- return ret;
}
diff --git a/arch/powerpc/sysdev/mpc8xx_pic.h b/arch/powerpc/platforms/8xx/pic.h
index 9fe00eebdc8b..c70f1b446f94 100644
--- a/arch/powerpc/sysdev/mpc8xx_pic.h
+++ b/arch/powerpc/platforms/8xx/pic.h
@@ -4,7 +4,7 @@
#include <linux/irq.h>
#include <linux/interrupt.h>
-int mpc8xx_pic_init(void);
+void mpc8xx_pic_init(void);
unsigned int mpc8xx_get_irq(void);
/*
diff --git a/arch/powerpc/platforms/8xx/tqm8xx_setup.c b/arch/powerpc/platforms/8xx/tqm8xx_setup.c
index dda607807def..d97a7910c594 100644
--- a/arch/powerpc/platforms/8xx/tqm8xx_setup.c
+++ b/arch/powerpc/platforms/8xx/tqm8xx_setup.c
@@ -24,8 +24,6 @@
#include <linux/device.h>
#include <linux/delay.h>
-#include <linux/fs_enet_pd.h>
-#include <linux/fs_uart_pd.h>
#include <linux/fsl_devices.h>
#include <linux/mii.h>
#include <linux/of_fdt.h>
@@ -39,10 +37,10 @@
#include <asm/time.h>
#include <asm/8xx_immap.h>
#include <asm/cpm1.h>
-#include <asm/fs_pd.h>
#include <asm/udbg.h>
#include "mpc8xx.h"
+#include "pic.h"
struct cpm_pin {
int port, pin, flags;
@@ -104,6 +102,9 @@ static void __init init_ioports(void)
if (dnode == NULL)
return;
prop = of_find_property(dnode, "ethernet1", &len);
+
+ of_node_put(dnode);
+
if (prop == NULL)
return;
@@ -117,14 +118,7 @@ static void __init tqm8xx_setup_arch(void)
init_ioports();
}
-static int __init tqm8xx_probe(void)
-{
- unsigned long node = of_get_flat_dt_root();
-
- return of_flat_dt_is_compatible(node, "tqc,tqm8xx");
-}
-
-static struct of_device_id __initdata of_bus_ids[] = {
+static const struct of_device_id of_bus_ids[] __initconst = {
{ .name = "soc", },
{ .name = "cpm", },
{ .name = "localbus", },
@@ -142,9 +136,9 @@ machine_device_initcall(tqm8xx, declare_of_platform_devices);
define_machine(tqm8xx) {
.name = "TQM8xx",
- .probe = tqm8xx_probe,
+ .compatible = "tqc,tqm8xx",
.setup_arch = tqm8xx_setup_arch,
- .init_IRQ = mpc8xx_pics_init,
+ .init_IRQ = mpc8xx_pic_init,
.get_irq = mpc8xx_get_irq,
.restart = mpc8xx_restart,
.calibrate_decr = mpc8xx_calibrate_decr,
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index 391b3f6b54a3..c4e61843d9d9 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
menu "Platform support"
source "arch/powerpc/platforms/powernv/Kconfig"
@@ -6,7 +7,6 @@ source "arch/powerpc/platforms/chrp/Kconfig"
source "arch/powerpc/platforms/512x/Kconfig"
source "arch/powerpc/platforms/52xx/Kconfig"
source "arch/powerpc/platforms/powermac/Kconfig"
-source "arch/powerpc/platforms/maple/Kconfig"
source "arch/powerpc/platforms/pasemi/Kconfig"
source "arch/powerpc/platforms/ps3/Kconfig"
source "arch/powerpc/platforms/cell/Kconfig"
@@ -17,14 +17,14 @@ source "arch/powerpc/platforms/85xx/Kconfig"
source "arch/powerpc/platforms/86xx/Kconfig"
source "arch/powerpc/platforms/embedded6xx/Kconfig"
source "arch/powerpc/platforms/44x/Kconfig"
-source "arch/powerpc/platforms/40x/Kconfig"
source "arch/powerpc/platforms/amigaone/Kconfig"
+source "arch/powerpc/platforms/book3s/Kconfig"
+source "arch/powerpc/platforms/microwatt/Kconfig"
config KVM_GUEST
bool "KVM Guest support"
- default n
select EPAPR_PARAVIRT
- ---help---
+ help
This option enables various optimizations for running under the KVM
hypervisor. Overhead for the kernel when not running inside KVM should
be minimal.
@@ -33,15 +33,14 @@ config KVM_GUEST
config EPAPR_PARAVIRT
bool "ePAPR para-virtualization support"
- default n
help
Enables ePAPR para-virtualization support for guests.
In case of doubt, say Y
-config PPC_NATIVE
+config PPC_HASH_MMU_NATIVE
bool
- depends on 6xx || PPC64
+ depends on PPC_BOOK3S
help
Support for running natively on the hardware, i.e. without
a hypervisor. This option is not user-selectable but should
@@ -49,7 +48,8 @@ config PPC_NATIVE
config PPC_OF_BOOT_TRAMPOLINE
bool "Support booting from Open Firmware or yaboot"
- depends on 6xx || PPC64
+ depends on PPC_BOOK3S_32 || PPC64
+ select RELOCATABLE if PPC64
default y
help
Support from booting from Open Firmware or yaboot using an
@@ -59,36 +59,34 @@ config PPC_OF_BOOT_TRAMPOLINE
In case of doubt, say Y
-config UDBG_RTAS_CONSOLE
- bool "RTAS based debug console"
- depends on PPC_RTAS
- default n
+config PPC_DT_CPU_FTRS
+ bool "Device-tree based CPU feature discovery & setup"
+ depends on PPC_BOOK3S_64
+ default y
+ help
+ This enables code to use a new device tree binding for describing CPU
+ compatibility and features. Saying Y here will attempt to use the new
+ binding if the firmware provides it. Currently only the skiboot
+ firmware provides this binding.
+ If you're not sure say Y.
config PPC_SMP_MUXED_IPI
bool
help
- Select this opton if your platform supports SMP and your
+ Select this option if your platform supports SMP and your
interrupt controller provides less than 4 interrupts to each
cpu. This will enable the generic code to multiplex the 4
messages on to one ipi.
-config PPC_UDBG_BEAT
- bool "BEAT based debug console"
- depends on PPC_CELLEB
- default n
-
config IPIC
bool
- default n
config MPIC
bool
- default n
config MPIC_TIMER
bool "MPIC Global Timer"
depends on MPIC && FSL_SOC
- default n
help
The MPIC global timer is a hardware timer inside the
Freescale PIC complying with OpenPIC standard. When the
@@ -100,7 +98,6 @@ config MPIC_TIMER
config FSL_MPIC_TIMER_WAKEUP
tristate "Freescale MPIC global timer wakeup driver"
depends on FSL_SOC && MPIC_TIMER && PM
- default n
help
The driver provides a way to wake up the system by MPIC
timer.
@@ -108,43 +105,35 @@ config FSL_MPIC_TIMER_WAKEUP
config PPC_EPAPR_HV_PIC
bool
- default n
select EPAPR_PARAVIRT
config MPIC_WEIRD
bool
- default n
config MPIC_MSGR
bool "MPIC message register support"
depends on MPIC
- default n
help
Enables support for the MPIC message registers. These
registers are used for inter-processor communication.
config PPC_I8259
bool
- default n
config U3_DART
bool
depends on PPC64
- default n
config PPC_RTAS
bool
- default n
config RTAS_ERROR_LOGGING
bool
depends on PPC_RTAS
- default n
config PPC_RTAS_DAEMON
bool
depends on PPC_RTAS
- default n
config RTAS_PROC
bool "Proc interface to RTAS"
@@ -157,11 +146,9 @@ config RTAS_FLASH
config MMIO_NVRAM
bool
- default n
config MPIC_U3_HT_IRQS
bool
- default n
config MPIC_BROKEN_REGREAD
bool
@@ -173,17 +160,6 @@ config MPIC_BROKEN_REGREAD
well, but enabling it uses about 8KB of memory to keep copies
of the register contents in software.
-config IBMVIO
- depends on PPC_PSERIES
- bool
- default y
-
-config IBMEBUS
- depends on PPC_PSERIES
- bool "Support for GX bus based adapters"
- help
- Bus device driver for GX bus based adapters.
-
config EEH
bool
depends on (PPC_POWERNV || PPC_PSERIES) && PCI
@@ -191,26 +167,21 @@ config EEH
config PPC_MPC106
bool
- default n
config PPC_970_NAP
bool
- default n
config PPC_P7_NAP
bool
- default n
+
+config PPC_BOOK3S_IDLE
+ def_bool y
+ depends on (PPC_970_NAP || PPC_P7_NAP)
config PPC_INDIRECT_PIO
bool
select GENERIC_IOMAP
-config PPC_INDIRECT_MMIO
- bool
-
-config PPC_IO_WORKAROUNDS
- bool
-
source "drivers/cpufreq/Kconfig"
menu "CPUIdle driver"
@@ -219,53 +190,33 @@ source "drivers/cpuidle/Kconfig"
endmenu
-config PPC601_SYNC_FIX
- bool "Workarounds for PPC601 bugs"
- depends on 6xx && PPC_PMAC
- help
- Some versions of the PPC601 (the first PowerPC chip) have bugs which
- mean that extra synchronization instructions are required near
- certain instructions, typically those that make major changes to the
- CPU state. These extra instructions reduce performance slightly.
- If you say N here, these extra instructions will not be included,
- resulting in a kernel which will run faster but may not run at all
- on some systems with the PPC601 chip.
-
- If in doubt, say Y here.
-
config TAU
bool "On-chip CPU temperature sensor support"
- depends on 6xx
+ depends on PPC_BOOK3S_32
help
G3 and G4 processors have an on-chip temperature sensor called the
'Thermal Assist Unit (TAU)', which, in theory, can measure the on-die
temperature within 2-4 degrees Celsius. This option shows the current
on-die temperature in /proc/cpuinfo if the cpu supports it.
- Unfortunately, on some chip revisions, this sensor is very inaccurate
- and in many cases, does not work at all, so don't assume the cpu
- temp is actually what /proc/cpuinfo says it is.
+ Unfortunately, this sensor is very inaccurate when uncalibrated, so
+ don't assume the cpu temp is actually what /proc/cpuinfo says it is.
config TAU_INT
- bool "Interrupt driven TAU driver (DANGEROUS)"
+ bool "Interrupt driven TAU driver (EXPERIMENTAL)"
depends on TAU
- ---help---
+ help
The TAU supports an interrupt driven mode which causes an interrupt
whenever the temperature goes out of range. This is the fastest way
to get notified the temp has exceeded a range. With this option off,
a timer is used to re-check the temperature periodically.
- However, on some cpus it appears that the TAU interrupt hardware
- is buggy and can cause a situation which would lead unexplained hard
- lockups.
-
- Unless you are extending the TAU driver, or enjoy kernel/hardware
- debugging, leave this option off.
+ If in doubt, say N here.
config TAU_AVERAGE
bool "Average high and low temp"
depends on TAU
- ---help---
+ help
The TAU hardware can compare the temperature to an upper and lower
bound. The default behavior is to show both the upper and lower
bound in /proc/cpuinfo. If the range is large, the temperature is
@@ -277,51 +228,31 @@ config TAU_AVERAGE
If in doubt, say N here.
-config QUICC_ENGINE
- bool "Freescale QUICC Engine (QE) Support"
- depends on FSL_SOC && PPC32
- select PPC_LIB_RHEAP
- select CRC32
- help
- The QUICC Engine (QE) is a new generation of communications
- coprocessors on Freescale embedded CPUs (akin to CPM in older chips).
- Selecting this option means that you wish to build a kernel
- for a machine with a QE coprocessor.
-
config QE_GPIO
bool "QE GPIO support"
depends on QUICC_ENGINE
- select ARCH_REQUIRE_GPIOLIB
+ select GPIOLIB
help
Say Y here if you're going to use hardware that connects to the
QE GPIOs.
config CPM2
bool "Enable support for the CPM2 (Communications Processor Module)"
- depends on (FSL_SOC_BOOKE && PPC32) || 8260
+ depends on (FSL_SOC_BOOKE && PPC32) || PPC_82xx
select CPM
- select PPC_LIB_RHEAP
- select PPC_PCI_CHOICE
- select ARCH_REQUIRE_GPIOLIB
+ select HAVE_PCI
+ select GPIOLIB
help
The CPM2 (Communications Processor Module) is a coprocessor on
embedded CPUs made by Freescale. Selecting this option means that
you wish to build a kernel for a machine with a CPM2 coprocessor
on it (826x, 827x, 8560).
-config AXON_RAM
- tristate "Axon DDR2 memory device driver"
- depends on PPC_IBM_CELL_BLADE && BLOCK
- default m
- help
- It registers one block device per Axon's DDR2 memory bank found
- on a system. Block devices are called axonram?, their major and
- minor numbers are available in /proc/devices, /proc/partitions or
- in /sys/block/axonram?/dev.
-
config FSL_ULI1575
- bool
- default n
+ bool "ULI1575 PCIe south bridge support"
+ depends on FSL_SOC_BOOKE || PPC_86xx
+ depends on PCI
+ select FSL_PCI
select GENERIC_ISA_DMA
help
Supports for the ULI1575 PCIe south bridge that exists on some
@@ -330,6 +261,7 @@ config FSL_ULI1575
config CPM
bool
+ select GENERIC_ALLOCATOR
config OF_RTC
bool
@@ -337,28 +269,25 @@ config OF_RTC
Uses information from the OF or flattened device tree to instantiate
platform devices for direct mapped RTC chips like the DS1742 or DS1743.
-config SIMPLE_GPIO
- bool "Support for simple, memory-mapped GPIO controllers"
- depends on PPC
- select ARCH_REQUIRE_GPIOLIB
+config GEN_RTC
+ bool "Use the platform RTC operations from user space"
+ select RTC_CLASS
+ select RTC_DRV_GENERIC
help
- Say Y here to support simple, memory-mapped GPIO controllers.
- These are usually BCSRs used to control board's switches, LEDs,
- chip-selects, Ethernet/USB PHY's power and various other small
- on-board peripherals.
+ This option provides backwards compatibility with the old gen_rtc.ko
+ module that was traditionally used for old PowerPC machines.
+ Platforms should migrate to enabling the RTC_DRV_GENERIC by hand
+ replacing their get_rtc_time/set_rtc_time callbacks with
+ a proper RTC device driver.
config MCU_MPC8349EMITX
bool "MPC8349E-mITX MCU driver"
depends on I2C=y && PPC_83xx
- select ARCH_REQUIRE_GPIOLIB
+ select GPIOLIB
help
Say Y here to enable soft power-off functionality on the Freescale
boards with the MPC8349E-mITX-compatible MCU chips. This driver will
also register MCU GPIOs with the generic GPIO API, so you'll able
to use MCU pins as GPIOs.
-config XILINX_PCI
- bool "Xilinx PCI host bridge support"
- depends on PCI && XILINX_VIRTEX
-
endmenu
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index e8bc40869cbd..7b527d18aa5e 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -1,7 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+config PPC32
+ bool
+ default y if !PPC64
+
config PPC64
bool "64-bit kernel"
- default n
- select HAVE_VIRT_CPU_ACCOUNTING
+ select ZLIB_DEFLATE
help
This option selects whether a 32-bit or a 64-bit kernel
will be built.
@@ -12,10 +16,10 @@ choice
depends on PPC32
help
There are five families of 32 bit PowerPC chips supported.
- The most common ones are the desktop and server CPUs (601, 603,
+ The most common ones are the desktop and server CPUs (603,
604, 740, 750, 74xx) CPUs from Freescale and IBM, with their
embedded 512x/52xx/82xx/83xx/86xx counterparts.
- The other embeeded parts, namely 4xx, 8xx, e200 (55xx) and e500
+ The other embedded parts, namely 4xx, 8xx and e500
(85xx) each form a family of their own that is not compatible
with the others.
@@ -23,45 +27,55 @@ choice
config PPC_BOOK3S_32
bool "512x/52xx/6xx/7xx/74xx/82xx/83xx/86xx"
- select PPC_FPU
+ imply PPC_FPU
+ select PPC_HAVE_PMU_SUPPORT
+ select HAVE_ARCH_VMAP_STACK
config PPC_85xx
bool "Freescale 85xx"
- select E500
+ select PPC_E500
config PPC_8xx
bool "Freescale 8xx"
+ select ARCH_SUPPORTS_HUGETLBFS
select FSL_SOC
- select 8xx
- select PPC_LIB_RHEAP
-
-config 40x
- bool "AMCC 40x"
- select PPC_DCR_NATIVE
- select PPC_UDBG_16550
- select 4xx_SOC
- select PPC_PCI_CHOICE
+ select PPC_KUEP
+ select HAVE_ARCH_VMAP_STACK
+ select HUGETLBFS
config 44x
bool "AMCC 44x, 46x or 47x"
select PPC_DCR_NATIVE
select PPC_UDBG_16550
- select 4xx_SOC
- select PPC_PCI_CHOICE
+ select HAVE_PCI
select PHYS_64BIT
-
-config E200
- bool "Freescale e200"
+ select PPC_KUEP
endchoice
+config PPC_BOOK3S_603
+ bool "Support for 603 SW loaded TLB"
+ depends on PPC_BOOK3S_32
+ default y
+ help
+ Provide support for processors based on the 603 cores. Those
+ processors don't have a HASH MMU and provide SW TLB loading.
+
+config PPC_BOOK3S_604
+ bool "Support for 604+ HASH MMU" if PPC_BOOK3S_603
+ depends on PPC_BOOK3S_32
+ default y
+ help
+ Provide support for processors not based on the 603 cores.
+ Those processors have a HASH MMU.
+
choice
prompt "Processor Type"
depends on PPC64
help
There are two families of 64 bit PowerPC chips supported.
The most common ones are the desktop and server CPUs
- (POWER4, POWER5, 970, POWER5+, POWER6, POWER7, POWER8 ...)
+ (POWER5, 970, POWER5+, POWER6, POWER7, POWER8, POWER9 ...)
The other are the "embedded" processors compliant with the
"Book 3E" variant of the architecture
@@ -70,113 +84,230 @@ config PPC_BOOK3S_64
bool "Server processors"
select PPC_FPU
select PPC_HAVE_PMU_SUPPORT
- select SYS_SUPPORTS_HUGETLBFS
- select HAVE_ARCH_TRANSPARENT_HUGEPAGE if PPC_64K_PAGES
+ select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION
+ select ARCH_ENABLE_SPLIT_PMD_PTLOCK
+ select ARCH_SUPPORTS_HUGETLBFS
select ARCH_SUPPORTS_NUMA_BALANCING
+ select HAVE_MOVE_PMD
+ select HAVE_MOVE_PUD
select IRQ_WORK
+ select PPC_64S_HASH_MMU if !PPC_RADIX_MMU
+ select KASAN_VMALLOC if KASAN
config PPC_BOOK3E_64
bool "Embedded processors"
+ select PPC_E500
+ select PPC_E500MC
select PPC_FPU # Make it a choice ?
select PPC_SMP_MUXED_IPI
select PPC_DOORBELL
+ select ZONE_DMA
endchoice
+config PPC_THP
+ def_bool y
+ depends on PPC_BOOK3S_64
+ depends on PPC_RADIX_MMU || (PPC_64S_HASH_MMU && PAGE_SIZE_64KB)
+ select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+ select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+ select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
+
choice
prompt "CPU selection"
- depends on PPC64
- default GENERIC_CPU
help
This will create a kernel which is optimised for a particular CPU.
The resulting kernel may not run on other CPUs, so use this with care.
If unsure, select Generic.
-config GENERIC_CPU
- bool "Generic"
- depends on !CPU_LITTLE_ENDIAN
+config POWERPC64_CPU
+ bool "Generic 64 bits powerpc"
+ depends on PPC_BOOK3S_64
+ select ARCH_HAS_FAST_MULTIPLIER if CPU_LITTLE_ENDIAN
+ select PPC_64S_HASH_MMU
+ select PPC_HAS_LBARX_LHARX if CPU_LITTLE_ENDIAN
+
+config POWERPC_CPU
+ bool "Generic 32 bits powerpc"
+ depends on PPC_BOOK3S_32
config CELL_CPU
bool "Cell Broadband Engine"
depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN
+ depends on !CC_IS_CLANG
+ select PPC_64S_HASH_MMU
-config POWER4_CPU
- bool "POWER4"
- depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN
-
-config POWER5_CPU
- bool "POWER5"
+config PPC_970_CPU
+ bool "PowerPC 970 (including PowerPC G5)"
depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN
+ select PPC_64S_HASH_MMU
config POWER6_CPU
bool "POWER6"
depends on PPC_BOOK3S_64 && !CPU_LITTLE_ENDIAN
+ select PPC_64S_HASH_MMU
config POWER7_CPU
bool "POWER7"
depends on PPC_BOOK3S_64
+ select ARCH_HAS_FAST_MULTIPLIER
+ select PPC_64S_HASH_MMU
+ select PPC_HAS_LBARX_LHARX
+
+config POWER8_CPU
+ bool "POWER8"
+ depends on PPC_BOOK3S_64
+ select ARCH_HAS_FAST_MULTIPLIER
+ select PPC_64S_HASH_MMU
+ select PPC_HAS_LBARX_LHARX
+
+config POWER9_CPU
+ bool "POWER9"
+ depends on PPC_BOOK3S_64
+ select ARCH_HAS_FAST_MULTIPLIER
+ select PPC_HAS_LBARX_LHARX
+
+config POWER10_CPU
+ bool "POWER10"
+ depends on PPC_BOOK3S_64
+ select ARCH_HAS_FAST_MULTIPLIER
+ select PPC_HAVE_PREFIXED_SUPPORT
+ select PPC_HAVE_PCREL_SUPPORT
config E5500_CPU
bool "Freescale e5500"
- depends on E500
+ depends on PPC64 && PPC_E500
config E6500_CPU
bool "Freescale e6500"
- depends on E500
+ depends on PPC64 && PPC_E500
+ depends on !CC_IS_CLANG
+ select PPC_HAS_LBARX_LHARX
+
+config 440_CPU
+ bool "440 (44x family)"
+ depends on 44x
+
+config 464_CPU
+ bool "464 (44x family)"
+ depends on 44x
+ depends on !CC_IS_CLANG
+
+config 476_CPU
+ bool "476 (47x family)"
+ depends on PPC_47x
+ depends on !CC_IS_CLANG
+
+config 860_CPU
+ bool "8xx family"
+ depends on PPC_8xx
+ depends on !CC_IS_CLANG
+
+config E300C2_CPU
+ bool "e300c2 (832x)"
+ depends on PPC_BOOK3S_32
+ depends on !CC_IS_CLANG
+
+config E300C3_CPU
+ bool "e300c3 (831x)"
+ depends on PPC_BOOK3S_32
+ depends on !CC_IS_CLANG
+
+config G4_CPU
+ bool "G4 (74xx)"
+ depends on PPC_BOOK3S_32
+ select ALTIVEC
+
+config E500_CPU
+ bool "e500 (8540)"
+ depends on PPC_85xx && !PPC_E500MC
+
+config E500MC_CPU
+ bool "e500mc"
+ depends on PPC_85xx && PPC_E500MC
+
+config TOOLCHAIN_DEFAULT_CPU
+ bool "Rely on the toolchain's implicit default CPU"
endchoice
+config TARGET_CPU_BOOL
+ bool
+ default !TOOLCHAIN_DEFAULT_CPU
+
+config TARGET_CPU
+ string
+ depends on TARGET_CPU_BOOL
+ default "cell" if CELL_CPU
+ default "970" if PPC_970_CPU
+ default "power6" if POWER6_CPU
+ default "power7" if POWER7_CPU
+ default "power8" if POWER8_CPU
+ default "power9" if POWER9_CPU
+ default "power10" if POWER10_CPU
+ default "e5500" if E5500_CPU
+ default "e6500" if E6500_CPU
+ default "power4" if POWERPC64_CPU && !CPU_LITTLE_ENDIAN
+ default "power8" if POWERPC64_CPU && CPU_LITTLE_ENDIAN
+ default "440" if 440_CPU
+ default "464" if 464_CPU
+ default "476" if 476_CPU
+ default "860" if 860_CPU
+ default "e300c2" if E300C2_CPU
+ default "e300c3" if E300C3_CPU
+ default "G4" if G4_CPU
+ default "8540" if E500_CPU
+ default "e500mc" if E500MC_CPU
+ default "powerpc" if POWERPC_CPU
+
+config TUNE_CPU
+ string
+ depends on POWERPC64_CPU
+ default "-mtune=power10" if $(cc-option,-mtune=power10)
+ default "-mtune=power9" if $(cc-option,-mtune=power9)
+ default "-mtune=power8" if $(cc-option,-mtune=power8)
+
config PPC_BOOK3S
def_bool y
depends on PPC_BOOK3S_32 || PPC_BOOK3S_64
-config PPC_BOOK3E
- def_bool y
- depends on PPC_BOOK3E_64
-
-config 6xx
- def_bool y
- depends on PPC32 && PPC_BOOK3S
- select PPC_HAVE_PMU_SUPPORT
-
-config TUNE_CELL
- bool "Optimize for Cell Broadband Engine"
- depends on PPC64 && PPC_BOOK3S
- help
- Cause the compiler to optimize for the PPE of the Cell Broadband
- Engine. This will make the code run considerably faster on Cell
- but somewhat slower on other machines. This option only changes
- the scheduling of instructions, not the selection of instructions
- itself, so the resulting kernel will keep running on all other
- machines.
-
-# this is temp to handle compat with arch=ppc
-config 8xx
- bool
-
-config E500
+config PPC_E500
select FSL_EMB_PERFMON
- select PPC_FSL_BOOK3E
bool
+ select ARCH_SUPPORTS_HUGETLBFS if PHYS_64BIT || PPC64
+ select PPC_SMP_MUXED_IPI
+ select PPC_DOORBELL
+ select PPC_KUEP
config PPC_E500MC
bool "e500mc Support"
select PPC_FPU
select COMMON_CLK
- depends on E500
+ depends on PPC_E500
help
This must be enabled for running on e500mc (and derivatives
such as e5500/e6500), and must be disabled for running on
e500v1 or e500v2.
-config PPC_FPU
+config PPC_FPU_REGS
bool
+
+config PPC_FPU
+ bool "Support for Floating Point Unit (FPU)" if PPC_MPC832x
default y if PPC64
+ select PPC_FPU_REGS
+ help
+ This must be enabled to support the Floating Point Unit
+ Most 6xx have an FPU but e300c2 core (mpc832x) don't have
+ an FPU, so when building an embedded kernel for that target
+ you can disable FPU support.
+
+ If unsure say Y.
config FSL_EMB_PERFMON
bool "Freescale Embedded Perfmon"
- depends on E500 || PPC_83xx
+ depends on PPC_E500 || PPC_83xx
help
This is the Performance Monitor support found on the e500 core
and some e300 cores (c3 and c4). Select this only if your
@@ -189,42 +320,29 @@ config FSL_EMB_PERF_EVENT
config FSL_EMB_PERF_EVENT_E500
bool
- depends on FSL_EMB_PERF_EVENT && E500
+ depends on FSL_EMB_PERF_EVENT && PPC_E500
default y
config 4xx
bool
- depends on 40x || 44x
+ depends on 44x
default y
config BOOKE
bool
- depends on E200 || E500 || 44x || PPC_BOOK3E
- default y
-
-config FSL_BOOKE
- bool
- depends on (E200 || E500) && PPC32
+ depends on PPC_E500 || 44x
default y
-# this is for common code between PPC32 & PPC64 FSL BOOKE
-config PPC_FSL_BOOK3E
- bool
- select FSL_EMB_PERFMON
- select PPC_SMP_MUXED_IPI
- select SYS_SUPPORTS_HUGETLBFS if PHYS_64BIT || PPC64
- select PPC_DOORBELL
- default y if FSL_BOOKE
-
config PTE_64BIT
bool
- depends on 44x || E500 || PPC_86xx
+ depends on 44x || PPC_E500 || PPC_86xx
default y if PHYS_64BIT
config PHYS_64BIT
- bool 'Large physical address support' if E500 || PPC_86xx
- depends on (44x || E500 || PPC_86xx) && !PPC_83xx && !PPC_82xx
- ---help---
+ bool 'Large physical address support' if PPC_E500 || PPC_86xx
+ depends on (44x || PPC_E500 || PPC_86xx) && !PPC_83xx && !PPC_82xx
+ select PHYS_ADDR_T_64BIT
+ help
This option enables kernel support for larger than 32-bit physical
addresses. This feature may not be available on all cores.
@@ -236,8 +354,9 @@ config PHYS_64BIT
config ALTIVEC
bool "AltiVec Support"
- depends on 6xx || PPC_BOOK3S_64 || (PPC_E500MC && PPC64)
- ---help---
+ depends on PPC_BOOK3S || (PPC_E500MC && PPC64 && !E5500_CPU)
+ select PPC_FPU
+ help
This option enables kernel support for the Altivec extensions to the
PowerPC processor. The kernel currently supports saving and restoring
altivec registers, and turning on the 'altivec enable' bit so user
@@ -253,7 +372,7 @@ config ALTIVEC
config VSX
bool "VSX Support"
depends on PPC_BOOK3S_64 && ALTIVEC && PPC_FPU
- ---help---
+ help
This option enables kernel support for the Vector Scaler extensions
to the PowerPC processor. The kernel currently supports saving and
@@ -266,48 +385,15 @@ config VSX
If in doubt, say Y here.
-config PPC_ICSWX
- bool "Support for PowerPC icswx coprocessor instruction"
- depends on PPC_BOOK3S_64
- default n
- ---help---
-
- This option enables kernel support for the PowerPC Initiate
- Coprocessor Store Word (icswx) coprocessor instruction on POWER7
- or newer processors.
-
- This option is only useful if you have a processor that supports
- the icswx coprocessor instruction. It does not have any effect
- on processors without the icswx coprocessor instruction.
-
- This option slightly increases kernel memory usage.
-
- If in doubt, say N here.
-
-config PPC_ICSWX_PID
- bool "icswx requires direct PID management"
- depends on PPC_ICSWX
- default y
- ---help---
- The PID register in server is used explicitly for ICSWX. In
- embedded systems PID management is done by the system.
-
-config PPC_ICSWX_USE_SIGILL
- bool "Should a bad CT cause a SIGILL?"
- depends on PPC_ICSWX
- default n
- ---help---
- Should a bad CT used for "non-record form ICSWX" cause an
- illegal instruction signal or should it be silent as
- architected.
-
- If in doubt, say N here.
+config SPE_POSSIBLE
+ def_bool y
+ depends on PPC_E500 && !PPC_E500MC
config SPE
bool "SPE Support"
- depends on E200 || (E500 && !PPC_E500MC)
+ depends on SPE_POSSIBLE
default y
- ---help---
+ help
This option enables kernel support for the Signal Processing
Extensions (SPE) to the PowerPC processor. The kernel currently
supports saving and restoring SPE registers, and turning on the
@@ -319,44 +405,152 @@ config SPE
If in doubt, say Y here.
-config PPC_STD_MMU
- def_bool y
- depends on PPC_BOOK3S
+config PPC_64S_HASH_MMU
+ bool "Hash MMU Support"
+ depends on PPC_BOOK3S_64
+ default y
+ help
+ Enable support for the Power ISA Hash style MMU. This is implemented
+ by all IBM Power and other 64-bit Book3S CPUs before ISA v3.0. The
+ OpenPOWER ISA does not mandate the hash MMU and some CPUs do not
+ implement it (e.g., Microwatt).
-config PPC_STD_MMU_32
- def_bool y
- depends on PPC_STD_MMU && PPC32
+ Note that POWER9 PowerVM platforms only support the hash
+ MMU. From POWER10 radix is also supported by PowerVM.
+
+ If you're unsure, say Y.
+
+config PPC_RADIX_MMU
+ bool "Radix MMU Support"
+ depends on PPC_BOOK3S_64
+ select ARCH_HAS_GIGANTIC_PAGE
+ default y
+ help
+ Enable support for the Power ISA 3.0 Radix style MMU. Currently this
+ is only implemented by IBM Power9 CPUs, if you don't have one of them
+ you can probably disable this.
+
+config PPC_RADIX_MMU_DEFAULT
+ bool "Default to using the Radix MMU when possible" if PPC_64S_HASH_MMU
+ depends on PPC_BOOK3S_64
+ depends on PPC_RADIX_MMU
+ default y
+ help
+ When the hardware supports the Radix MMU, default to using it unless
+ "disable_radix[=yes]" is specified on the kernel command line.
+
+ If this option is disabled, the Hash MMU will be used by default,
+ unless "disable_radix=no" is specified on the kernel command line.
+
+ If you're unsure, say Y.
-config PPC_STD_MMU_64
+config PPC_RADIX_BROADCAST_TLBIE
+ bool
+ depends on PPC_RADIX_MMU
+ help
+ Power ISA v3.0 and later implementations in the Linux Compliancy Subset
+ and lower are not required to implement broadcast TLBIE instructions.
+ Platforms with CPUs that do implement TLBIE broadcast, that is, where
+ a TLB invalidation instruction performed on one CPU operates on the
+ TLBs of all CPUs in the system, should select this option. If this
+ option is selected, the disable_tlbie kernel command line option can
+ be used to cause global TLB invalidations to be done via IPIs; without
+ it, IPIs will be used unconditionally.
+
+config PPC_KERNEL_PREFIXED
+ depends on PPC_HAVE_PREFIXED_SUPPORT
+ depends on CC_HAS_PREFIXED
+ default n
+ bool "Build Kernel with Prefixed Instructions"
+ help
+ POWER10 and later CPUs support prefixed instructions, 8 byte
+ instructions that include large immediate, pc relative addressing,
+ and various floating point, vector, MMA.
+
+ This option builds the kernel with prefixed instructions, and
+ allows a pc relative addressing option to be selected.
+
+ Kernel support for prefixed instructions in applications and guests
+ is not affected by this option.
+
+config PPC_KERNEL_PCREL
+ depends on PPC_HAVE_PCREL_SUPPORT
+ depends on PPC_HAVE_PREFIXED_SUPPORT
+ depends on CC_HAS_PCREL
+ default n
+ select PPC_KERNEL_PREFIXED
+ bool "Build Kernel with PC-Relative addressing model"
+ help
+ POWER10 and later CPUs support pc relative addressing. Recent
+ compilers have support for an ELF ABI extension for a pc relative
+ ABI.
+
+ This option builds the kernel with the pc relative ABI model.
+
+config PPC_KUEP
+ bool "Kernel Userspace Execution Prevention"
+ default y
+ help
+ Enable support for Kernel Userspace Execution Prevention (KUEP)
+
+ If you're unsure, say Y.
+
+config PPC_KUAP
+ bool "Kernel Userspace Access Protection"
+ default y
+ help
+ Enable support for Kernel Userspace Access Protection (KUAP)
+
+ If you're unsure, say Y.
+
+config PPC_KUAP_DEBUG
+ bool "Extra debugging for Kernel Userspace Access Protection"
+ depends on PPC_KUAP
+ help
+ Add extra debugging for Kernel Userspace Access Protection (KUAP)
+ If you're unsure, say N.
+
+config PPC_PKEY
def_bool y
- depends on PPC_STD_MMU && PPC64
+ depends on PPC_BOOK3S_64
+ depends on PPC_MEM_KEYS || PPC_KUAP || PPC_KUEP
+
config PPC_MMU_NOHASH
def_bool y
- depends on !PPC_STD_MMU
+ depends on !PPC_BOOK3S
-config PPC_BOOK3E_MMU
- def_bool y
- depends on FSL_BOOKE || PPC_BOOK3E
+config PPC_HAVE_PMU_SUPPORT
+ bool
-config PPC_MM_SLICES
+config PPC_HAVE_PREFIXED_SUPPORT
bool
- default y if (!PPC_FSL_BOOK3E && PPC64 && HUGETLB_PAGE) || (PPC_STD_MMU_64 && PPC_64K_PAGES)
- default n
-config PPC_HAVE_PMU_SUPPORT
- bool
+config PPC_HAVE_PCREL_SUPPORT
+ bool
+
+config PMU_SYSFS
+ bool "Create PMU SPRs sysfs file"
+ default n
+ help
+ This option enables sysfs file creation for PMU SPRs like MMCR* and PMC*.
config PPC_PERF_CTRS
- def_bool y
- depends on PERF_EVENTS && PPC_HAVE_PMU_SUPPORT
- help
- This enables the powerpc-specific perf_event back-end.
+ def_bool y
+ depends on PERF_EVENTS && PPC_HAVE_PMU_SUPPORT
+ help
+ This enables the powerpc-specific perf_event back-end.
+
+config FORCE_SMP
+ # Allow platforms to force SMP=y by selecting this
+ bool
+ select SMP
config SMP
- depends on PPC_BOOK3S || PPC_BOOK3E || FSL_BOOKE || PPC_47x
- bool "Symmetric multi-processing support"
- ---help---
+ depends on PPC_BOOK3S || PPC_E500 || PPC_47x
+ select GENERIC_IRQ_MIGRATION
+ bool "Symmetric multi-processing support" if !FORCE_SMP
+ help
This enables support for systems with more than one CPU. If you have
a system with only one CPU, say N. If you have a system with more
than one CPU, say Y. Note that the kernel does not currently
@@ -373,15 +567,20 @@ config SMP
If you don't know what to do here, say N.
config NR_CPUS
- int "Maximum number of CPUs (2-8192)"
- range 2 8192
- depends on SMP
+ int "Maximum number of CPUs (2-8192)" if SMP
+ range 2 8192 if SMP
+ default "1" if !SMP
default "32" if PPC64
default "4"
config NOT_COHERENT_CACHE
bool
- depends on 4xx || 8xx || E200 || PPC_MPC512x || GAMECUBE_COMMON
+ depends on 44x || PPC_8xx || PPC_MPC512x || \
+ GAMECUBE_COMMON || AMIGAONE
+ select ARCH_HAS_DMA_PREP_COHERENT
+ select ARCH_HAS_SYNC_DMA_FOR_DEVICE
+ select ARCH_HAS_SYNC_DMA_FOR_CPU
+ select DMA_DIRECT_REMAP
default n if PPC_47x
default y
@@ -390,10 +589,18 @@ config CHECK_CACHE_COHERENCY
config PPC_DOORBELL
bool
- default n
endmenu
+config VDSO32
+ def_bool y
+ depends on PPC32 || COMPAT
+ help
+ This symbol controls whether we build the 32-bit VDSO. We obviously
+ want to do that if we're building a 32-bit kernel. If we're building
+ a 64-bit kernel then we only want a 32-bit VDSO if we're also enabling
+ COMPAT.
+
choice
prompt "Endianness selection"
default CPU_BIG_ENDIAN
@@ -410,6 +617,7 @@ config CPU_BIG_ENDIAN
config CPU_LITTLE_ENDIAN
bool "Build little endian kernel"
+ depends on PPC_BOOK3S_64
select PPC64_BOOT_WRAPPER
help
Build a little endian kernel.
@@ -420,6 +628,12 @@ config CPU_LITTLE_ENDIAN
endchoice
+config PPC64_ELF_ABI_V1
+ def_bool PPC64 && (CPU_BIG_ENDIAN && !PPC64_BIG_ENDIAN_ELF_ABI_V2)
+
+config PPC64_ELF_ABI_V2
+ def_bool PPC64 && !PPC64_ELF_ABI_V1
+
config PPC64_BOOT_WRAPPER
def_bool n
depends on CPU_LITTLE_ENDIAN
diff --git a/arch/powerpc/platforms/Makefile b/arch/powerpc/platforms/Makefile
index 469ef170d218..3cee4a842736 100644
--- a/arch/powerpc/platforms/Makefile
+++ b/arch/powerpc/platforms/Makefile
@@ -1,11 +1,9 @@
-
-subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
+# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_FSL_ULI1575) += fsl_uli1575.o
obj-$(CONFIG_PPC_PMAC) += powermac/
obj-$(CONFIG_PPC_CHRP) += chrp/
-obj-$(CONFIG_40x) += 40x/
obj-$(CONFIG_44x) += 44x/
obj-$(CONFIG_PPC_MPC512x) += 512x/
obj-$(CONFIG_PPC_MPC52xx) += 52xx/
@@ -16,9 +14,10 @@ obj-$(CONFIG_FSL_SOC_BOOKE) += 85xx/
obj-$(CONFIG_PPC_86xx) += 86xx/
obj-$(CONFIG_PPC_POWERNV) += powernv/
obj-$(CONFIG_PPC_PSERIES) += pseries/
-obj-$(CONFIG_PPC_MAPLE) += maple/
obj-$(CONFIG_PPC_PASEMI) += pasemi/
obj-$(CONFIG_PPC_CELL) += cell/
obj-$(CONFIG_PPC_PS3) += ps3/
obj-$(CONFIG_EMBEDDED6xx) += embedded6xx/
obj-$(CONFIG_AMIGAONE) += amigaone/
+obj-$(CONFIG_PPC_BOOK3S) += book3s/
+obj-$(CONFIG_PPC_MICROWATT) += microwatt/
diff --git a/arch/powerpc/platforms/amigaone/Kconfig b/arch/powerpc/platforms/amigaone/Kconfig
index 128de25cc284..0741edb10b7b 100644
--- a/arch/powerpc/platforms/amigaone/Kconfig
+++ b/arch/powerpc/platforms/amigaone/Kconfig
@@ -1,10 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
config AMIGAONE
bool "Eyetech AmigaOne/MAI Teron"
- depends on 6xx && BROKEN_ON_SMP
+ depends on PPC_BOOK3S_32 && BROKEN_ON_SMP
select PPC_I8259
select PPC_INDIRECT_PCI
select PPC_UDBG_16550
- select PCI
+ select FORCE_PCI
select NOT_COHERENT_CACHE
select CHECK_CACHE_COHERENCY
select DEFAULT_UIMAGE
diff --git a/arch/powerpc/platforms/amigaone/Makefile b/arch/powerpc/platforms/amigaone/Makefile
index e6885b3b2ee7..e95e4e3e2de3 100644
--- a/arch/powerpc/platforms/amigaone/Makefile
+++ b/arch/powerpc/platforms/amigaone/Makefile
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
obj-y += setup.o
diff --git a/arch/powerpc/platforms/amigaone/setup.c b/arch/powerpc/platforms/amigaone/setup.c
index 2fe12046279e..33f852a7625f 100644
--- a/arch/powerpc/platforms/amigaone/setup.c
+++ b/arch/powerpc/platforms/amigaone/setup.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* AmigaOne platform setup
*
@@ -5,13 +6,9 @@
*
* Based on original amigaone_setup.c source code
* Copyright 2003 by Hans-Joerg Frieden and Thomas Frieden
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
*/
+#include <linux/irqdomain.h>
#include <linux/kernel.h>
#include <linux/of.h>
#include <linux/of_address.h>
@@ -28,7 +25,7 @@
extern void __flush_disable_L1(void);
-void amigaone_show_cpuinfo(struct seq_file *m)
+static void amigaone_show_cpuinfo(struct seq_file *m)
{
seq_printf(m, "vendor\t\t: Eyetech Ltd.\n");
}
@@ -40,7 +37,7 @@ static int __init amigaone_add_bridge(struct device_node *dev)
const int *bus_range;
struct pci_controller *hose;
- printk(KERN_INFO "Adding PCI host bridge %s\n", dev->full_name);
+ printk(KERN_INFO "Adding PCI host bridge %pOF\n", dev);
cfg_addr = of_get_address(dev, 0, NULL, NULL);
cfg_data = of_get_address(dev, 1, NULL, NULL);
@@ -49,8 +46,8 @@ static int __init amigaone_add_bridge(struct device_node *dev)
bus_range = of_get_property(dev, "bus-range", &len);
if ((bus_range == NULL) || (len < 2 * sizeof(int)))
- printk(KERN_WARNING "Can't get bus-range for %s, assume"
- " bus 0\n", dev->full_name);
+ printk(KERN_WARNING "Can't get bus-range for %pOF, assume"
+ " bus 0\n", dev);
hose = pcibios_alloc_controller(dev);
if (hose == NULL)
@@ -68,7 +65,13 @@ static int __init amigaone_add_bridge(struct device_node *dev)
return 0;
}
-void __init amigaone_setup_arch(void)
+static void __init amigaone_setup_arch(void)
+{
+ if (ppc_md.progress)
+ ppc_md.progress("Linux/PPC "UTS_RELEASE"\n", 0);
+}
+
+static void __init amigaone_discover_phbs(void)
{
struct device_node *np;
int phb = -ENODEV;
@@ -78,12 +81,9 @@ void __init amigaone_setup_arch(void)
phb = amigaone_add_bridge(np);
BUG_ON(phb != 0);
-
- if (ppc_md.progress)
- ppc_md.progress("Linux/PPC "UTS_RELEASE"\n", 0);
}
-void __init amigaone_init_IRQ(void)
+static void __init amigaone_init_IRQ(void)
{
struct device_node *pic, *np = NULL;
const unsigned long *prop = NULL;
@@ -109,7 +109,7 @@ void __init amigaone_init_IRQ(void)
i8259_init(pic, int_ack);
ppc_md.get_irq = i8259_irq;
- irq_set_default_host(i8259_get_host());
+ irq_set_default_domain(i8259_get_host());
}
static int __init request_isa_regions(void)
@@ -123,7 +123,7 @@ static int __init request_isa_regions(void)
}
machine_device_initcall(amigaone, request_isa_regions);
-void amigaone_restart(char *cmd)
+static void __noreturn amigaone_restart(char *cmd)
{
local_irq_disable();
@@ -143,32 +143,26 @@ void amigaone_restart(char *cmd)
static int __init amigaone_probe(void)
{
- unsigned long root = of_get_flat_dt_root();
-
- if (of_flat_dt_is_compatible(root, "eyetech,amigaone")) {
- /*
- * Coherent memory access cause complete system lockup! Thus
- * disable this CPU feature, even if the CPU needs it.
- */
- cur_cpu_spec->cpu_features &= ~CPU_FTR_NEED_COHERENT;
+ /*
+ * Coherent memory access cause complete system lockup! Thus
+ * disable this CPU feature, even if the CPU needs it.
+ */
+ cur_cpu_spec->cpu_features &= ~CPU_FTR_NEED_COHERENT;
- ISA_DMA_THRESHOLD = 0x00ffffff;
- DMA_MODE_READ = 0x44;
- DMA_MODE_WRITE = 0x48;
+ DMA_MODE_READ = 0x44;
+ DMA_MODE_WRITE = 0x48;
- return 1;
- }
-
- return 0;
+ return 1;
}
define_machine(amigaone) {
.name = "AmigaOne",
+ .compatible = "eyetech,amigaone",
.probe = amigaone_probe,
.setup_arch = amigaone_setup_arch,
+ .discover_phbs = amigaone_discover_phbs,
.show_cpuinfo = amigaone_show_cpuinfo,
.init_IRQ = amigaone_init_IRQ,
.restart = amigaone_restart,
- .calibrate_decr = generic_calibrate_decr,
.progress = udbg_progress,
};
diff --git a/arch/powerpc/platforms/book3s/Kconfig b/arch/powerpc/platforms/book3s/Kconfig
new file mode 100644
index 000000000000..34c931592ef0
--- /dev/null
+++ b/arch/powerpc/platforms/book3s/Kconfig
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+config PPC_VAS
+ bool "IBM Virtual Accelerator Switchboard (VAS)"
+ depends on (PPC_POWERNV || PPC_PSERIES) && PPC_64K_PAGES
+ default y
+ help
+ This enables support for IBM Virtual Accelerator Switchboard (VAS).
+
+ VAS devices are found in POWER9-based and later systems, they
+ provide access to accelerator coprocessors such as NX-GZIP and
+ NX-842. This config allows the kernel to use NX-842 accelerators,
+ and user-mode APIs for the NX-GZIP accelerator on POWER9 PowerNV
+ and POWER10 PowerVM platforms.
+
+ If unsure, say "N".
diff --git a/arch/powerpc/platforms/book3s/Makefile b/arch/powerpc/platforms/book3s/Makefile
new file mode 100644
index 000000000000..e790f1910f61
--- /dev/null
+++ b/arch/powerpc/platforms/book3s/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_PPC_VAS) += vas-api.o
diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c
new file mode 100644
index 000000000000..49b15e7a8265
--- /dev/null
+++ b/arch/powerpc/platforms/book3s/vas-api.c
@@ -0,0 +1,673 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * VAS user space API for its accelerators (Only NX-GZIP is supported now)
+ * Copyright (C) 2019 Haren Myneni, IBM Corp
+ */
+
+#define pr_fmt(fmt) "vas-api: " fmt
+
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/cdev.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/kthread.h>
+#include <linux/sched/signal.h>
+#include <linux/mmu_context.h>
+#include <linux/io.h>
+#include <asm/vas.h>
+#include <uapi/asm/vas-api.h>
+
+/*
+ * The driver creates the device node that can be used as follows:
+ * For NX-GZIP
+ *
+ * fd = open("/dev/crypto/nx-gzip", O_RDWR);
+ * rc = ioctl(fd, VAS_TX_WIN_OPEN, &attr);
+ * paste_addr = mmap(NULL, PAGE_SIZE, prot, MAP_SHARED, fd, 0ULL).
+ * vas_copy(&crb, 0, 1);
+ * vas_paste(paste_addr, 0, 1);
+ * close(fd) or exit process to close window.
+ *
+ * where "vas_copy" and "vas_paste" are defined in copy-paste.h.
+ * copy/paste returns to the user space directly. So refer NX hardware
+ * documentation for exact copy/paste usage and completion / error
+ * conditions.
+ */
+
+/*
+ * Wrapper object for the nx-gzip device - there is just one instance of
+ * this node for the whole system.
+ */
+static struct coproc_dev {
+ struct cdev cdev;
+ struct device *device;
+ char *name;
+ dev_t devt;
+ struct class *class;
+ enum vas_cop_type cop_type;
+ const struct vas_user_win_ops *vops;
+} coproc_device;
+
+struct coproc_instance {
+ struct coproc_dev *coproc;
+ struct vas_window *txwin;
+};
+
+static char *coproc_devnode(const struct device *dev, umode_t *mode)
+{
+ return kasprintf(GFP_KERNEL, "crypto/%s", dev_name(dev));
+}
+
+/*
+ * Take reference to pid and mm
+ */
+int get_vas_user_win_ref(struct vas_user_win_ref *task_ref)
+{
+ /*
+ * Window opened by a child thread may not be closed when
+ * it exits. So take reference to its pid and release it
+ * when the window is free by parent thread.
+ * Acquire a reference to the task's pid to make sure
+ * pid will not be re-used - needed only for multithread
+ * applications.
+ */
+ task_ref->pid = get_task_pid(current, PIDTYPE_PID);
+ /*
+ * Acquire a reference to the task's mm.
+ */
+ task_ref->mm = get_task_mm(current);
+ if (!task_ref->mm) {
+ put_pid(task_ref->pid);
+ pr_err("pid(%d): mm_struct is not found\n",
+ current->pid);
+ return -EPERM;
+ }
+
+ mmgrab(task_ref->mm);
+ mmput(task_ref->mm);
+ /*
+ * Process closes window during exit. In the case of
+ * multithread application, the child thread can open
+ * window and can exit without closing it. So takes tgid
+ * reference until window closed to make sure tgid is not
+ * reused.
+ */
+ task_ref->tgid = find_get_pid(task_tgid_vnr(current));
+
+ return 0;
+}
+
+/*
+ * Successful return must release the task reference with
+ * put_task_struct
+ */
+static bool ref_get_pid_and_task(struct vas_user_win_ref *task_ref,
+ struct task_struct **tskp, struct pid **pidp)
+{
+ struct task_struct *tsk;
+ struct pid *pid;
+
+ pid = task_ref->pid;
+ tsk = get_pid_task(pid, PIDTYPE_PID);
+ if (!tsk) {
+ pid = task_ref->tgid;
+ tsk = get_pid_task(pid, PIDTYPE_PID);
+ /*
+ * Parent thread (tgid) will be closing window when it
+ * exits. So should not get here.
+ */
+ if (WARN_ON_ONCE(!tsk))
+ return false;
+ }
+
+ /* Return if the task is exiting. */
+ if (tsk->flags & PF_EXITING) {
+ put_task_struct(tsk);
+ return false;
+ }
+
+ *tskp = tsk;
+ *pidp = pid;
+
+ return true;
+}
+
+/*
+ * Update the CSB to indicate a translation error.
+ *
+ * User space will be polling on CSB after the request is issued.
+ * If NX can handle the request without any issues, it updates CSB.
+ * Whereas if NX encounters page fault, the kernel will handle the
+ * fault and update CSB with translation error.
+ *
+ * If we are unable to update the CSB means copy_to_user failed due to
+ * invalid csb_addr, send a signal to the process.
+ */
+void vas_update_csb(struct coprocessor_request_block *crb,
+ struct vas_user_win_ref *task_ref)
+{
+ struct coprocessor_status_block csb;
+ struct kernel_siginfo info;
+ struct task_struct *tsk;
+ void __user *csb_addr;
+ struct pid *pid;
+ int rc;
+
+ /*
+ * NX user space windows can not be opened for task->mm=NULL
+ * and faults will not be generated for kernel requests.
+ */
+ if (WARN_ON_ONCE(!task_ref->mm))
+ return;
+
+ csb_addr = (void __user *)be64_to_cpu(crb->csb_addr);
+
+ memset(&csb, 0, sizeof(csb));
+ csb.cc = CSB_CC_FAULT_ADDRESS;
+ csb.ce = CSB_CE_TERMINATION;
+ csb.cs = 0;
+ csb.count = 0;
+
+ /*
+ * NX operates and returns in BE format as defined CRB struct.
+ * So saves fault_storage_addr in BE as NX pastes in FIFO and
+ * expects user space to convert to CPU format.
+ */
+ csb.address = crb->stamp.nx.fault_storage_addr;
+ csb.flags = 0;
+
+ /*
+ * Process closes send window after all pending NX requests are
+ * completed. In multi-thread applications, a child thread can
+ * open a window and can exit without closing it. May be some
+ * requests are pending or this window can be used by other
+ * threads later. We should handle faults if NX encounters
+ * pages faults on these requests. Update CSB with translation
+ * error and fault address. If csb_addr passed by user space is
+ * invalid, send SEGV signal to pid saved in window. If the
+ * child thread is not running, send the signal to tgid.
+ * Parent thread (tgid) will close this window upon its exit.
+ *
+ * pid and mm references are taken when window is opened by
+ * process (pid). So tgid is used only when child thread opens
+ * a window and exits without closing it.
+ */
+
+ if (!ref_get_pid_and_task(task_ref, &tsk, &pid))
+ return;
+
+ kthread_use_mm(task_ref->mm);
+ rc = copy_to_user(csb_addr, &csb, sizeof(csb));
+ /*
+ * User space polls on csb.flags (first byte). So add barrier
+ * then copy first byte with csb flags update.
+ */
+ if (!rc) {
+ csb.flags = CSB_V;
+ /* Make sure update to csb.flags is visible now */
+ smp_mb();
+ rc = copy_to_user(csb_addr, &csb, sizeof(u8));
+ }
+ kthread_unuse_mm(task_ref->mm);
+ put_task_struct(tsk);
+
+ /* Success */
+ if (!rc)
+ return;
+
+
+ pr_debug("Invalid CSB address 0x%p signalling pid(%d)\n",
+ csb_addr, pid_vnr(pid));
+
+ clear_siginfo(&info);
+ info.si_signo = SIGSEGV;
+ info.si_errno = EFAULT;
+ info.si_code = SEGV_MAPERR;
+ info.si_addr = csb_addr;
+ /*
+ * process will be polling on csb.flags after request is sent to
+ * NX. So generally CSB update should not fail except when an
+ * application passes invalid csb_addr. So an error message will
+ * be displayed and leave it to user space whether to ignore or
+ * handle this signal.
+ */
+ rcu_read_lock();
+ rc = kill_pid_info(SIGSEGV, &info, pid);
+ rcu_read_unlock();
+
+ pr_devel("pid %d kill_proc_info() rc %d\n", pid_vnr(pid), rc);
+}
+
+void vas_dump_crb(struct coprocessor_request_block *crb)
+{
+ struct data_descriptor_entry *dde;
+ struct nx_fault_stamp *nx;
+
+ dde = &crb->source;
+ pr_devel("SrcDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n",
+ be64_to_cpu(dde->address), be32_to_cpu(dde->length),
+ dde->count, dde->index, dde->flags);
+
+ dde = &crb->target;
+ pr_devel("TgtDDE: addr 0x%llx, len %d, count %d, idx %d, flags %d\n",
+ be64_to_cpu(dde->address), be32_to_cpu(dde->length),
+ dde->count, dde->index, dde->flags);
+
+ nx = &crb->stamp.nx;
+ pr_devel("NX Stamp: PSWID 0x%x, FSA 0x%llx, flags 0x%x, FS 0x%x\n",
+ be32_to_cpu(nx->pswid),
+ be64_to_cpu(crb->stamp.nx.fault_storage_addr),
+ nx->flags, nx->fault_status);
+}
+
+static int coproc_open(struct inode *inode, struct file *fp)
+{
+ struct coproc_instance *cp_inst;
+
+ cp_inst = kzalloc(sizeof(*cp_inst), GFP_KERNEL);
+ if (!cp_inst)
+ return -ENOMEM;
+
+ cp_inst->coproc = container_of(inode->i_cdev, struct coproc_dev,
+ cdev);
+ fp->private_data = cp_inst;
+
+ return 0;
+}
+
+static int coproc_ioc_tx_win_open(struct file *fp, unsigned long arg)
+{
+ void __user *uptr = (void __user *)arg;
+ struct vas_tx_win_open_attr uattr;
+ struct coproc_instance *cp_inst;
+ struct vas_window *txwin;
+ int rc;
+
+ cp_inst = fp->private_data;
+
+ /*
+ * One window for file descriptor
+ */
+ if (cp_inst->txwin)
+ return -EEXIST;
+
+ rc = copy_from_user(&uattr, uptr, sizeof(uattr));
+ if (rc) {
+ pr_err("copy_from_user() returns %d\n", rc);
+ return -EFAULT;
+ }
+
+ if (uattr.version != 1) {
+ pr_err("Invalid window open API version\n");
+ return -EINVAL;
+ }
+
+ if (!cp_inst->coproc->vops || !cp_inst->coproc->vops->open_win) {
+ pr_err("VAS API is not registered\n");
+ return -EACCES;
+ }
+
+ txwin = cp_inst->coproc->vops->open_win(uattr.vas_id, uattr.flags,
+ cp_inst->coproc->cop_type);
+ if (IS_ERR(txwin)) {
+ pr_err_ratelimited("VAS window open failed rc=%ld\n",
+ PTR_ERR(txwin));
+ return PTR_ERR(txwin);
+ }
+
+ mutex_init(&txwin->task_ref.mmap_mutex);
+ cp_inst->txwin = txwin;
+
+ return 0;
+}
+
+static int coproc_release(struct inode *inode, struct file *fp)
+{
+ struct coproc_instance *cp_inst = fp->private_data;
+ int rc;
+
+ if (cp_inst->txwin) {
+ if (cp_inst->coproc->vops &&
+ cp_inst->coproc->vops->close_win) {
+ rc = cp_inst->coproc->vops->close_win(cp_inst->txwin);
+ if (rc)
+ return rc;
+ }
+ cp_inst->txwin = NULL;
+ }
+
+ kfree(cp_inst);
+ fp->private_data = NULL;
+
+ /*
+ * We don't know here if user has other receive windows
+ * open, so we can't really call clear_thread_tidr().
+ * So, once the process calls set_thread_tidr(), the
+ * TIDR value sticks around until process exits, resulting
+ * in an extra copy in restore_sprs().
+ */
+
+ return 0;
+}
+
+/*
+ * If the executed instruction that caused the fault was a paste, then
+ * clear regs CR0[EQ], advance NIP, and return 0. Else return error code.
+ */
+static int do_fail_paste(void)
+{
+ struct pt_regs *regs = current->thread.regs;
+ u32 instword;
+
+ if (WARN_ON_ONCE(!regs))
+ return -EINVAL;
+
+ if (WARN_ON_ONCE(!user_mode(regs)))
+ return -EINVAL;
+
+ /*
+ * If we couldn't translate the instruction, the driver should
+ * return success without handling the fault, it will be retried
+ * or the instruction fetch will fault.
+ */
+ if (get_user(instword, (u32 __user *)(regs->nip)))
+ return -EAGAIN;
+
+ /*
+ * Not a paste instruction, driver may fail the fault.
+ */
+ if ((instword & PPC_INST_PASTE_MASK) != PPC_INST_PASTE)
+ return -ENOENT;
+
+ regs->ccr &= ~0xe0000000; /* Clear CR0[0-2] to fail paste */
+ regs_add_return_ip(regs, 4); /* Emulate the paste */
+
+ return 0;
+}
+
+/*
+ * This fault handler is invoked when the core generates page fault on
+ * the paste address. Happens if the kernel closes window in hypervisor
+ * (on pseries) due to lost credit or the paste address is not mapped.
+ */
+static vm_fault_t vas_mmap_fault(struct vm_fault *vmf)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ struct file *fp = vma->vm_file;
+ struct coproc_instance *cp_inst = fp->private_data;
+ struct vas_window *txwin;
+ vm_fault_t fault;
+ u64 paste_addr;
+ int ret;
+
+ /*
+ * window is not opened. Shouldn't expect this error.
+ */
+ if (!cp_inst || !cp_inst->txwin) {
+ pr_err("Unexpected fault on paste address with TX window closed\n");
+ return VM_FAULT_SIGBUS;
+ }
+
+ txwin = cp_inst->txwin;
+ /*
+ * When the LPAR lost credits due to core removal or during
+ * migration, invalidate the existing mapping for the current
+ * paste addresses and set windows in-active (zap_vma_pages in
+ * reconfig_close_windows()).
+ * New mapping will be done later after migration or new credits
+ * available. So continue to receive faults if the user space
+ * issue NX request.
+ */
+ if (txwin->task_ref.vma != vmf->vma) {
+ pr_err("No previous mapping with paste address\n");
+ return VM_FAULT_SIGBUS;
+ }
+
+ /*
+ * The window may be inactive due to lost credit (Ex: core
+ * removal with DLPAR). If the window is active again when
+ * the credit is available, map the new paste address at the
+ * window virtual address.
+ */
+ scoped_guard(mutex, &txwin->task_ref.mmap_mutex) {
+ if (txwin->status == VAS_WIN_ACTIVE) {
+ paste_addr = cp_inst->coproc->vops->paste_addr(txwin);
+ if (paste_addr) {
+ fault = vmf_insert_pfn(vma, vma->vm_start,
+ (paste_addr >> PAGE_SHIFT));
+ return fault;
+ }
+ }
+ }
+
+ /*
+ * Received this fault due to closing the actual window.
+ * It can happen during migration or lost credits.
+ * Since no mapping, return the paste instruction failure
+ * to the user space.
+ */
+ ret = do_fail_paste();
+ /*
+ * The user space can retry several times until success (needed
+ * for migration) or should fallback to SW compression or
+ * manage with the existing open windows if available.
+ * Looking at sysfs interface, it can determine whether these
+ * failures are coming during migration or core removal:
+ * nr_used_credits > nr_total_credits when lost credits
+ */
+ if (!ret || (ret == -EAGAIN))
+ return VM_FAULT_NOPAGE;
+
+ return VM_FAULT_SIGBUS;
+}
+
+/*
+ * During mmap() paste address, mapping VMA is saved in VAS window
+ * struct which is used to unmap during migration if the window is
+ * still open. But the user space can remove this mapping with
+ * munmap() before closing the window and the VMA address will
+ * be invalid. Set VAS window VMA to NULL in this function which
+ * is called before VMA free.
+ */
+static void vas_mmap_close(struct vm_area_struct *vma)
+{
+ struct file *fp = vma->vm_file;
+ struct coproc_instance *cp_inst = fp->private_data;
+ struct vas_window *txwin;
+
+ /* Should not happen */
+ if (!cp_inst || !cp_inst->txwin) {
+ pr_err("No attached VAS window for the paste address mmap\n");
+ return;
+ }
+
+ txwin = cp_inst->txwin;
+ /*
+ * task_ref.vma is set in coproc_mmap() during mmap paste
+ * address. So it has to be the same VMA that is getting freed.
+ */
+ if (WARN_ON(txwin->task_ref.vma != vma)) {
+ pr_err("Invalid paste address mmaping\n");
+ return;
+ }
+
+ scoped_guard(mutex, &txwin->task_ref.mmap_mutex)
+ txwin->task_ref.vma = NULL;
+}
+
+static const struct vm_operations_struct vas_vm_ops = {
+ .close = vas_mmap_close,
+ .fault = vas_mmap_fault,
+};
+
+static int coproc_mmap(struct file *fp, struct vm_area_struct *vma)
+{
+ struct coproc_instance *cp_inst = fp->private_data;
+ struct vas_window *txwin;
+ unsigned long pfn;
+ u64 paste_addr;
+ pgprot_t prot;
+ int rc;
+
+ txwin = cp_inst->txwin;
+
+ if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) {
+ pr_debug("size 0x%zx, PAGE_SIZE 0x%zx\n",
+ (vma->vm_end - vma->vm_start), PAGE_SIZE);
+ return -EINVAL;
+ }
+
+ /*
+ * Map complete page to the paste address. So the user
+ * space should pass 0ULL to the offset parameter.
+ */
+ if (vma->vm_pgoff) {
+ pr_debug("Page offset unsupported to map paste address\n");
+ return -EINVAL;
+ }
+
+ /* Ensure instance has an open send window */
+ if (!txwin) {
+ pr_err("No send window open?\n");
+ return -EINVAL;
+ }
+
+ if (!cp_inst->coproc->vops || !cp_inst->coproc->vops->paste_addr) {
+ pr_err("VAS API is not registered\n");
+ return -EACCES;
+ }
+
+ /*
+ * The initial mmap is done after the window is opened
+ * with ioctl. But before mmap(), this window can be closed in
+ * the hypervisor due to lost credit (core removal on pseries).
+ * So if the window is not active, return mmap() failure with
+ * -EACCES and expects the user space reissue mmap() when it
+ * is active again or open new window when the credit is available.
+ * mmap_mutex protects the paste address mmap() with DLPAR
+ * close/open event and allows mmap() only when the window is
+ * active.
+ */
+ guard(mutex)(&txwin->task_ref.mmap_mutex);
+ if (txwin->status != VAS_WIN_ACTIVE) {
+ pr_err("Window is not active\n");
+ return -EACCES;
+ }
+
+ paste_addr = cp_inst->coproc->vops->paste_addr(txwin);
+ if (!paste_addr) {
+ pr_err("Window paste address failed\n");
+ return -EINVAL;
+ }
+
+ pfn = paste_addr >> PAGE_SHIFT;
+
+ /* flags, page_prot from cxl_mmap(), except we want cachable */
+ vm_flags_set(vma, VM_IO | VM_PFNMAP);
+ vma->vm_page_prot = pgprot_cached(vma->vm_page_prot);
+
+ prot = __pgprot(pgprot_val(vma->vm_page_prot) | _PAGE_DIRTY);
+
+ rc = remap_pfn_range(vma, vma->vm_start, pfn + vma->vm_pgoff,
+ vma->vm_end - vma->vm_start, prot);
+
+ pr_devel("paste addr %llx at %lx, rc %d\n", paste_addr,
+ vma->vm_start, rc);
+
+ txwin->task_ref.vma = vma;
+ vma->vm_ops = &vas_vm_ops;
+
+ return rc;
+}
+
+static long coproc_ioctl(struct file *fp, unsigned int cmd, unsigned long arg)
+{
+ switch (cmd) {
+ case VAS_TX_WIN_OPEN:
+ return coproc_ioc_tx_win_open(fp, arg);
+ default:
+ return -EINVAL;
+ }
+}
+
+static struct file_operations coproc_fops = {
+ .open = coproc_open,
+ .release = coproc_release,
+ .mmap = coproc_mmap,
+ .unlocked_ioctl = coproc_ioctl,
+};
+
+/*
+ * Supporting only nx-gzip coprocessor type now, but this API code
+ * extended to other coprocessor types later.
+ */
+int vas_register_coproc_api(struct module *mod, enum vas_cop_type cop_type,
+ const char *name,
+ const struct vas_user_win_ops *vops)
+{
+ int rc = -EINVAL;
+ dev_t devno;
+
+ rc = alloc_chrdev_region(&coproc_device.devt, 1, 1, name);
+ if (rc) {
+ pr_err("Unable to allocate coproc major number: %i\n", rc);
+ return rc;
+ }
+
+ pr_devel("%s device allocated, dev [%i,%i]\n", name,
+ MAJOR(coproc_device.devt), MINOR(coproc_device.devt));
+
+ coproc_device.class = class_create(name);
+ if (IS_ERR(coproc_device.class)) {
+ rc = PTR_ERR(coproc_device.class);
+ pr_err("Unable to create %s class %d\n", name, rc);
+ goto err_class;
+ }
+ coproc_device.class->devnode = coproc_devnode;
+ coproc_device.cop_type = cop_type;
+ coproc_device.vops = vops;
+
+ coproc_fops.owner = mod;
+ cdev_init(&coproc_device.cdev, &coproc_fops);
+
+ devno = MKDEV(MAJOR(coproc_device.devt), 0);
+ rc = cdev_add(&coproc_device.cdev, devno, 1);
+ if (rc) {
+ pr_err("cdev_add() failed %d\n", rc);
+ goto err_cdev;
+ }
+
+ coproc_device.device = device_create(coproc_device.class, NULL,
+ devno, NULL, name, MINOR(devno));
+ if (IS_ERR(coproc_device.device)) {
+ rc = PTR_ERR(coproc_device.device);
+ pr_err("Unable to create coproc-%d %d\n", MINOR(devno), rc);
+ goto err;
+ }
+
+ pr_devel("Added dev [%d,%d]\n", MAJOR(devno), MINOR(devno));
+
+ return 0;
+
+err:
+ cdev_del(&coproc_device.cdev);
+err_cdev:
+ class_destroy(coproc_device.class);
+err_class:
+ unregister_chrdev_region(coproc_device.devt, 1);
+ return rc;
+}
+
+void vas_unregister_coproc_api(void)
+{
+ dev_t devno;
+
+ cdev_del(&coproc_device.cdev);
+ devno = MKDEV(MAJOR(coproc_device.devt), 0);
+ device_destroy(coproc_device.class, devno);
+
+ class_destroy(coproc_device.class);
+ unregister_chrdev_region(coproc_device.devt, 1);
+}
diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig
index 9978f594cac0..db65bfcd1e74 100644
--- a/arch/powerpc/platforms/cell/Kconfig
+++ b/arch/powerpc/platforms/cell/Kconfig
@@ -1,58 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
config PPC_CELL
+ select PPC_64S_HASH_MMU if PPC64
bool
- default n
-
-config PPC_CELL_COMMON
- bool
- select PPC_CELL
- select PPC_DCR_MMIO
- select PPC_INDIRECT_PIO
- select PPC_INDIRECT_MMIO
- select PPC_NATIVE
- select PPC_RTAS
- select IRQ_EDGE_EOI_HANDLER
-
-config PPC_CELL_NATIVE
- bool
- select PPC_CELL_COMMON
- select MPIC
- select PPC_IO_WORKAROUNDS
- select IBM_EMAC_EMAC4
- select IBM_EMAC_RGMII
- select IBM_EMAC_ZMII #test only
- select IBM_EMAC_TAH #test only
- default n
-
-config PPC_IBM_CELL_BLADE
- bool "IBM Cell Blade"
- depends on PPC64 && PPC_BOOK3S
- select PPC_CELL_NATIVE
- select PPC_OF_PLATFORM_PCI
- select PCI
- select MMIO_NVRAM
- select PPC_UDBG_16550
- select UDBG_RTAS_CONSOLE
-
-config PPC_CELLEB
- bool "Toshiba's Cell Reference Set 'Celleb' Architecture"
- depends on PPC64 && PPC_BOOK3S
- select PPC_CELL_NATIVE
- select PPC_OF_PLATFORM_PCI
- select PCI
- select HAS_TXX9_SERIAL
- select PPC_UDBG_BEAT
- select USB_OHCI_BIG_ENDIAN_MMIO
- select USB_EHCI_BIG_ENDIAN_MMIO
-
-config PPC_CELL_QPACE
- bool "IBM Cell - QPACE"
- depends on PPC64 && PPC_BOOK3S
- select PPC_CELL_COMMON
-
-config AXON_MSI
- bool
- depends on PPC_IBM_CELL_BLADE && PCI_MSI
- default y
menu "Cell Broadband Engine options"
depends on PPC_CELL
@@ -61,79 +10,15 @@ config SPU_FS
tristate "SPU file system"
default m
depends on PPC_CELL
+ depends on COREDUMP
select SPU_BASE
- select MEMORY_HOTPLUG
help
The SPU file system is used to access Synergistic Processing
Units on machines implementing the Broadband Processor
Architecture.
-config SPU_FS_64K_LS
- bool "Use 64K pages to map SPE local store"
- # we depend on PPC_MM_SLICES for now rather than selecting
- # it because we depend on hugetlbfs hooks being present. We
- # will fix that when the generic code has been improved to
- # not require hijacking hugetlbfs hooks.
- depends on SPU_FS && PPC_MM_SLICES && !PPC_64K_PAGES
- default y
- select PPC_HAS_HASH_64K
- help
- This option causes SPE local stores to be mapped in process
- address spaces using 64K pages while the rest of the kernel
- uses 4K pages. This can improve performances of applications
- using multiple SPEs by lowering the TLB pressure on them.
-
config SPU_BASE
bool
- default n
-
-config CBE_RAS
- bool "RAS features for bare metal Cell BE"
- depends on PPC_CELL_NATIVE
- default y
-
-config PPC_IBM_CELL_RESETBUTTON
- bool "IBM Cell Blade Pinhole reset button"
- depends on CBE_RAS && PPC_IBM_CELL_BLADE
- default y
- help
- Support Pinhole Resetbutton on IBM Cell blades.
- This adds a method to trigger system reset via front panel pinhole button.
-
-config PPC_IBM_CELL_POWERBUTTON
- tristate "IBM Cell Blade power button"
- depends on PPC_IBM_CELL_BLADE && INPUT_EVDEV
- default y
- help
- Support Powerbutton on IBM Cell blades.
- This will enable the powerbutton as an input device.
-
-config CBE_THERM
- tristate "CBE thermal support"
- default m
- depends on CBE_RAS && SPU_BASE
-
-config PPC_PMI
- tristate
- default y
- depends on CPU_FREQ_CBE_PMI || PPC_IBM_CELL_POWERBUTTON
- help
- PMI (Platform Management Interrupt) is a way to
- communicate with the BMC (Baseboard Management Controller).
- It is used in some IBM Cell blades.
-
-config CBE_CPUFREQ_SPU_GOVERNOR
- tristate "CBE frequency scaling based on SPU usage"
- depends on SPU_FS && CPU_FREQ
- default m
- help
- This governor checks for spu usage to adjust the cpu frequency.
- If no spu is running on a given cpu, that cpu will be throttled to
- the minimal possible frequency.
+ select PPC_COPRO_BASE
endmenu
-
-config OPROFILE_CELL
- def_bool y
- depends on PPC_CELL_NATIVE && (OPROFILE = m || OPROFILE = y) && SPU_BASE
-
diff --git a/arch/powerpc/platforms/cell/Makefile b/arch/powerpc/platforms/cell/Makefile
index fe053e7c73ee..7e5ff239c376 100644
--- a/arch/powerpc/platforms/cell/Makefile
+++ b/arch/powerpc/platforms/cell/Makefile
@@ -1,46 +1,4 @@
-obj-$(CONFIG_PPC_CELL_COMMON) += cbe_regs.o interrupt.o pervasive.o
-
-obj-$(CONFIG_PPC_CELL_NATIVE) += iommu.o setup.o spider-pic.o \
- pmu.o spider-pci.o
-obj-$(CONFIG_CBE_RAS) += ras.o
-
-obj-$(CONFIG_CBE_THERM) += cbe_thermal.o
-obj-$(CONFIG_CBE_CPUFREQ_SPU_GOVERNOR) += cpufreq_spudemand.o
-
-obj-$(CONFIG_PPC_IBM_CELL_POWERBUTTON) += cbe_powerbutton.o
-
-ifeq ($(CONFIG_SMP),y)
-obj-$(CONFIG_PPC_CELL_NATIVE) += smp.o
-obj-$(CONFIG_PPC_CELL_QPACE) += smp.o
-endif
-
-# needed only when building loadable spufs.ko
-spu-priv1-$(CONFIG_PPC_CELL_COMMON) += spu_priv1_mmio.o
-spu-manage-$(CONFIG_PPC_CELL_COMMON) += spu_manage.o
-
+# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_SPU_BASE) += spu_callbacks.o spu_base.o \
- spu_notify.o \
- spu_syscalls.o spu_fault.o \
- $(spu-priv1-y) \
- $(spu-manage-y) \
+ spu_syscalls.o \
spufs/
-
-obj-$(CONFIG_AXON_MSI) += axon_msi.o
-
-# qpace setup
-obj-$(CONFIG_PPC_CELL_QPACE) += qpace_setup.o
-
-# celleb stuff
-ifeq ($(CONFIG_PPC_CELLEB),y)
-obj-y += celleb_setup.o \
- celleb_pci.o celleb_scc_epci.o \
- celleb_scc_pciex.o \
- celleb_scc_uhc.o \
- spider-pci.o beat.o beat_htab.o \
- beat_hvCall.o beat_interrupt.o \
- beat_iommu.o
-
-obj-$(CONFIG_PPC_UDBG_BEAT) += beat_udbg.o
-obj-$(CONFIG_SERIAL_TXX9) += celleb_scc_sio.o
-obj-$(CONFIG_SPU_BASE) += beat_spu_priv1.o
-endif
diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c
deleted file mode 100644
index 85825b5401e5..000000000000
--- a/arch/powerpc/platforms/cell/axon_msi.c
+++ /dev/null
@@ -1,502 +0,0 @@
-/*
- * Copyright 2007, Michael Ellerman, IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/msi.h>
-#include <linux/export.h>
-#include <linux/of_platform.h>
-#include <linux/debugfs.h>
-#include <linux/slab.h>
-
-#include <asm/dcr.h>
-#include <asm/machdep.h>
-#include <asm/prom.h>
-
-
-/*
- * MSIC registers, specified as offsets from dcr_base
- */
-#define MSIC_CTRL_REG 0x0
-
-/* Base Address registers specify FIFO location in BE memory */
-#define MSIC_BASE_ADDR_HI_REG 0x3
-#define MSIC_BASE_ADDR_LO_REG 0x4
-
-/* Hold the read/write offsets into the FIFO */
-#define MSIC_READ_OFFSET_REG 0x5
-#define MSIC_WRITE_OFFSET_REG 0x6
-
-
-/* MSIC control register flags */
-#define MSIC_CTRL_ENABLE 0x0001
-#define MSIC_CTRL_FIFO_FULL_ENABLE 0x0002
-#define MSIC_CTRL_IRQ_ENABLE 0x0008
-#define MSIC_CTRL_FULL_STOP_ENABLE 0x0010
-
-/*
- * The MSIC can be configured to use a FIFO of 32KB, 64KB, 128KB or 256KB.
- * Currently we're using a 64KB FIFO size.
- */
-#define MSIC_FIFO_SIZE_SHIFT 16
-#define MSIC_FIFO_SIZE_BYTES (1 << MSIC_FIFO_SIZE_SHIFT)
-
-/*
- * To configure the FIFO size as (1 << n) bytes, we write (n - 15) into bits
- * 8-9 of the MSIC control reg.
- */
-#define MSIC_CTRL_FIFO_SIZE (((MSIC_FIFO_SIZE_SHIFT - 15) << 8) & 0x300)
-
-/*
- * We need to mask the read/write offsets to make sure they stay within
- * the bounds of the FIFO. Also they should always be 16-byte aligned.
- */
-#define MSIC_FIFO_SIZE_MASK ((MSIC_FIFO_SIZE_BYTES - 1) & ~0xFu)
-
-/* Each entry in the FIFO is 16 bytes, the first 4 bytes hold the irq # */
-#define MSIC_FIFO_ENTRY_SIZE 0x10
-
-
-struct axon_msic {
- struct irq_domain *irq_domain;
- __le32 *fifo_virt;
- dma_addr_t fifo_phys;
- dcr_host_t dcr_host;
- u32 read_offset;
-#ifdef DEBUG
- u32 __iomem *trigger;
-#endif
-};
-
-#ifdef DEBUG
-void axon_msi_debug_setup(struct device_node *dn, struct axon_msic *msic);
-#else
-static inline void axon_msi_debug_setup(struct device_node *dn,
- struct axon_msic *msic) { }
-#endif
-
-
-static void msic_dcr_write(struct axon_msic *msic, unsigned int dcr_n, u32 val)
-{
- pr_devel("axon_msi: dcr_write(0x%x, 0x%x)\n", val, dcr_n);
-
- dcr_write(msic->dcr_host, dcr_n, val);
-}
-
-static void axon_msi_cascade(unsigned int irq, struct irq_desc *desc)
-{
- struct irq_chip *chip = irq_desc_get_chip(desc);
- struct axon_msic *msic = irq_get_handler_data(irq);
- u32 write_offset, msi;
- int idx;
- int retry = 0;
-
- write_offset = dcr_read(msic->dcr_host, MSIC_WRITE_OFFSET_REG);
- pr_devel("axon_msi: original write_offset 0x%x\n", write_offset);
-
- /* write_offset doesn't wrap properly, so we have to mask it */
- write_offset &= MSIC_FIFO_SIZE_MASK;
-
- while (msic->read_offset != write_offset && retry < 100) {
- idx = msic->read_offset / sizeof(__le32);
- msi = le32_to_cpu(msic->fifo_virt[idx]);
- msi &= 0xFFFF;
-
- pr_devel("axon_msi: woff %x roff %x msi %x\n",
- write_offset, msic->read_offset, msi);
-
- if (msi < nr_irqs && irq_get_chip_data(msi) == msic) {
- generic_handle_irq(msi);
- msic->fifo_virt[idx] = cpu_to_le32(0xffffffff);
- } else {
- /*
- * Reading the MSIC_WRITE_OFFSET_REG does not
- * reliably flush the outstanding DMA to the
- * FIFO buffer. Here we were reading stale
- * data, so we need to retry.
- */
- udelay(1);
- retry++;
- pr_devel("axon_msi: invalid irq 0x%x!\n", msi);
- continue;
- }
-
- if (retry) {
- pr_devel("axon_msi: late irq 0x%x, retry %d\n",
- msi, retry);
- retry = 0;
- }
-
- msic->read_offset += MSIC_FIFO_ENTRY_SIZE;
- msic->read_offset &= MSIC_FIFO_SIZE_MASK;
- }
-
- if (retry) {
- printk(KERN_WARNING "axon_msi: irq timed out\n");
-
- msic->read_offset += MSIC_FIFO_ENTRY_SIZE;
- msic->read_offset &= MSIC_FIFO_SIZE_MASK;
- }
-
- chip->irq_eoi(&desc->irq_data);
-}
-
-static struct axon_msic *find_msi_translator(struct pci_dev *dev)
-{
- struct irq_domain *irq_domain;
- struct device_node *dn, *tmp;
- const phandle *ph;
- struct axon_msic *msic = NULL;
-
- dn = of_node_get(pci_device_to_OF_node(dev));
- if (!dn) {
- dev_dbg(&dev->dev, "axon_msi: no pci_dn found\n");
- return NULL;
- }
-
- for (; dn; dn = of_get_next_parent(dn)) {
- ph = of_get_property(dn, "msi-translator", NULL);
- if (ph)
- break;
- }
-
- if (!ph) {
- dev_dbg(&dev->dev,
- "axon_msi: no msi-translator property found\n");
- goto out_error;
- }
-
- tmp = dn;
- dn = of_find_node_by_phandle(*ph);
- of_node_put(tmp);
- if (!dn) {
- dev_dbg(&dev->dev,
- "axon_msi: msi-translator doesn't point to a node\n");
- goto out_error;
- }
-
- irq_domain = irq_find_host(dn);
- if (!irq_domain) {
- dev_dbg(&dev->dev, "axon_msi: no irq_domain found for node %s\n",
- dn->full_name);
- goto out_error;
- }
-
- msic = irq_domain->host_data;
-
-out_error:
- of_node_put(dn);
-
- return msic;
-}
-
-static int axon_msi_check_device(struct pci_dev *dev, int nvec, int type)
-{
- if (!find_msi_translator(dev))
- return -ENODEV;
-
- return 0;
-}
-
-static int setup_msi_msg_address(struct pci_dev *dev, struct msi_msg *msg)
-{
- struct device_node *dn;
- struct msi_desc *entry;
- int len;
- const u32 *prop;
-
- dn = of_node_get(pci_device_to_OF_node(dev));
- if (!dn) {
- dev_dbg(&dev->dev, "axon_msi: no pci_dn found\n");
- return -ENODEV;
- }
-
- entry = list_first_entry(&dev->msi_list, struct msi_desc, list);
-
- for (; dn; dn = of_get_next_parent(dn)) {
- if (entry->msi_attrib.is_64) {
- prop = of_get_property(dn, "msi-address-64", &len);
- if (prop)
- break;
- }
-
- prop = of_get_property(dn, "msi-address-32", &len);
- if (prop)
- break;
- }
-
- if (!prop) {
- dev_dbg(&dev->dev,
- "axon_msi: no msi-address-(32|64) properties found\n");
- return -ENOENT;
- }
-
- switch (len) {
- case 8:
- msg->address_hi = prop[0];
- msg->address_lo = prop[1];
- break;
- case 4:
- msg->address_hi = 0;
- msg->address_lo = prop[0];
- break;
- default:
- dev_dbg(&dev->dev,
- "axon_msi: malformed msi-address-(32|64) property\n");
- of_node_put(dn);
- return -EINVAL;
- }
-
- of_node_put(dn);
-
- return 0;
-}
-
-static int axon_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
-{
- unsigned int virq, rc;
- struct msi_desc *entry;
- struct msi_msg msg;
- struct axon_msic *msic;
-
- msic = find_msi_translator(dev);
- if (!msic)
- return -ENODEV;
-
- rc = setup_msi_msg_address(dev, &msg);
- if (rc)
- return rc;
-
- list_for_each_entry(entry, &dev->msi_list, list) {
- virq = irq_create_direct_mapping(msic->irq_domain);
- if (virq == NO_IRQ) {
- dev_warn(&dev->dev,
- "axon_msi: virq allocation failed!\n");
- return -1;
- }
- dev_dbg(&dev->dev, "axon_msi: allocated virq 0x%x\n", virq);
-
- irq_set_msi_desc(virq, entry);
- msg.data = virq;
- write_msi_msg(virq, &msg);
- }
-
- return 0;
-}
-
-static void axon_msi_teardown_msi_irqs(struct pci_dev *dev)
-{
- struct msi_desc *entry;
-
- dev_dbg(&dev->dev, "axon_msi: tearing down msi irqs\n");
-
- list_for_each_entry(entry, &dev->msi_list, list) {
- if (entry->irq == NO_IRQ)
- continue;
-
- irq_set_msi_desc(entry->irq, NULL);
- irq_dispose_mapping(entry->irq);
- }
-}
-
-static struct irq_chip msic_irq_chip = {
- .irq_mask = mask_msi_irq,
- .irq_unmask = unmask_msi_irq,
- .irq_shutdown = mask_msi_irq,
- .name = "AXON-MSI",
-};
-
-static int msic_host_map(struct irq_domain *h, unsigned int virq,
- irq_hw_number_t hw)
-{
- irq_set_chip_data(virq, h->host_data);
- irq_set_chip_and_handler(virq, &msic_irq_chip, handle_simple_irq);
-
- return 0;
-}
-
-static const struct irq_domain_ops msic_host_ops = {
- .map = msic_host_map,
-};
-
-static void axon_msi_shutdown(struct platform_device *device)
-{
- struct axon_msic *msic = dev_get_drvdata(&device->dev);
- u32 tmp;
-
- pr_devel("axon_msi: disabling %s\n",
- msic->irq_domain->of_node->full_name);
- tmp = dcr_read(msic->dcr_host, MSIC_CTRL_REG);
- tmp &= ~MSIC_CTRL_ENABLE & ~MSIC_CTRL_IRQ_ENABLE;
- msic_dcr_write(msic, MSIC_CTRL_REG, tmp);
-}
-
-static int axon_msi_probe(struct platform_device *device)
-{
- struct device_node *dn = device->dev.of_node;
- struct axon_msic *msic;
- unsigned int virq;
- int dcr_base, dcr_len;
-
- pr_devel("axon_msi: setting up dn %s\n", dn->full_name);
-
- msic = kzalloc(sizeof(struct axon_msic), GFP_KERNEL);
- if (!msic) {
- printk(KERN_ERR "axon_msi: couldn't allocate msic for %s\n",
- dn->full_name);
- goto out;
- }
-
- dcr_base = dcr_resource_start(dn, 0);
- dcr_len = dcr_resource_len(dn, 0);
-
- if (dcr_base == 0 || dcr_len == 0) {
- printk(KERN_ERR
- "axon_msi: couldn't parse dcr properties on %s\n",
- dn->full_name);
- goto out_free_msic;
- }
-
- msic->dcr_host = dcr_map(dn, dcr_base, dcr_len);
- if (!DCR_MAP_OK(msic->dcr_host)) {
- printk(KERN_ERR "axon_msi: dcr_map failed for %s\n",
- dn->full_name);
- goto out_free_msic;
- }
-
- msic->fifo_virt = dma_alloc_coherent(&device->dev, MSIC_FIFO_SIZE_BYTES,
- &msic->fifo_phys, GFP_KERNEL);
- if (!msic->fifo_virt) {
- printk(KERN_ERR "axon_msi: couldn't allocate fifo for %s\n",
- dn->full_name);
- goto out_free_msic;
- }
-
- virq = irq_of_parse_and_map(dn, 0);
- if (virq == NO_IRQ) {
- printk(KERN_ERR "axon_msi: irq parse and map failed for %s\n",
- dn->full_name);
- goto out_free_fifo;
- }
- memset(msic->fifo_virt, 0xff, MSIC_FIFO_SIZE_BYTES);
-
- /* We rely on being able to stash a virq in a u16, so limit irqs to < 65536 */
- msic->irq_domain = irq_domain_add_nomap(dn, 65536, &msic_host_ops, msic);
- if (!msic->irq_domain) {
- printk(KERN_ERR "axon_msi: couldn't allocate irq_domain for %s\n",
- dn->full_name);
- goto out_free_fifo;
- }
-
- irq_set_handler_data(virq, msic);
- irq_set_chained_handler(virq, axon_msi_cascade);
- pr_devel("axon_msi: irq 0x%x setup for axon_msi\n", virq);
-
- /* Enable the MSIC hardware */
- msic_dcr_write(msic, MSIC_BASE_ADDR_HI_REG, msic->fifo_phys >> 32);
- msic_dcr_write(msic, MSIC_BASE_ADDR_LO_REG,
- msic->fifo_phys & 0xFFFFFFFF);
- msic_dcr_write(msic, MSIC_CTRL_REG,
- MSIC_CTRL_IRQ_ENABLE | MSIC_CTRL_ENABLE |
- MSIC_CTRL_FIFO_SIZE);
-
- msic->read_offset = dcr_read(msic->dcr_host, MSIC_WRITE_OFFSET_REG)
- & MSIC_FIFO_SIZE_MASK;
-
- dev_set_drvdata(&device->dev, msic);
-
- ppc_md.setup_msi_irqs = axon_msi_setup_msi_irqs;
- ppc_md.teardown_msi_irqs = axon_msi_teardown_msi_irqs;
- ppc_md.msi_check_device = axon_msi_check_device;
-
- axon_msi_debug_setup(dn, msic);
-
- printk(KERN_DEBUG "axon_msi: setup MSIC on %s\n", dn->full_name);
-
- return 0;
-
-out_free_fifo:
- dma_free_coherent(&device->dev, MSIC_FIFO_SIZE_BYTES, msic->fifo_virt,
- msic->fifo_phys);
-out_free_msic:
- kfree(msic);
-out:
-
- return -1;
-}
-
-static const struct of_device_id axon_msi_device_id[] = {
- {
- .compatible = "ibm,axon-msic"
- },
- {}
-};
-
-static struct platform_driver axon_msi_driver = {
- .probe = axon_msi_probe,
- .shutdown = axon_msi_shutdown,
- .driver = {
- .name = "axon-msi",
- .owner = THIS_MODULE,
- .of_match_table = axon_msi_device_id,
- },
-};
-
-static int __init axon_msi_init(void)
-{
- return platform_driver_register(&axon_msi_driver);
-}
-subsys_initcall(axon_msi_init);
-
-
-#ifdef DEBUG
-static int msic_set(void *data, u64 val)
-{
- struct axon_msic *msic = data;
- out_le32(msic->trigger, val);
- return 0;
-}
-
-static int msic_get(void *data, u64 *val)
-{
- *val = 0;
- return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(fops_msic, msic_get, msic_set, "%llu\n");
-
-void axon_msi_debug_setup(struct device_node *dn, struct axon_msic *msic)
-{
- char name[8];
- u64 addr;
-
- addr = of_translate_address(dn, of_get_property(dn, "reg", NULL));
- if (addr == OF_BAD_ADDR) {
- pr_devel("axon_msi: couldn't translate reg property\n");
- return;
- }
-
- msic->trigger = ioremap(addr, 0x4);
- if (!msic->trigger) {
- pr_devel("axon_msi: ioremap failed\n");
- return;
- }
-
- snprintf(name, sizeof(name), "msic_%d", of_node_to_nid(dn));
-
- if (!debugfs_create_file(name, 0600, powerpc_debugfs_root,
- msic, &fops_msic)) {
- pr_devel("axon_msi: debugfs_create_file failed!\n");
- return;
- }
-}
-#endif /* DEBUG */
diff --git a/arch/powerpc/platforms/cell/beat.c b/arch/powerpc/platforms/cell/beat.c
deleted file mode 100644
index affcf566d460..000000000000
--- a/arch/powerpc/platforms/cell/beat.c
+++ /dev/null
@@ -1,264 +0,0 @@
-/*
- * Simple routines for Celleb/Beat
- *
- * (C) Copyright 2006-2007 TOSHIBA CORPORATION
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#include <linux/export.h>
-#include <linux/init.h>
-#include <linux/err.h>
-#include <linux/rtc.h>
-#include <linux/interrupt.h>
-#include <linux/irqreturn.h>
-#include <linux/reboot.h>
-
-#include <asm/hvconsole.h>
-#include <asm/time.h>
-#include <asm/machdep.h>
-#include <asm/firmware.h>
-
-#include "beat_wrapper.h"
-#include "beat.h"
-#include "beat_interrupt.h"
-
-static int beat_pm_poweroff_flag;
-
-void beat_restart(char *cmd)
-{
- beat_shutdown_logical_partition(!beat_pm_poweroff_flag);
-}
-
-void beat_power_off(void)
-{
- beat_shutdown_logical_partition(0);
-}
-
-u64 beat_halt_code = 0x1000000000000000UL;
-EXPORT_SYMBOL(beat_halt_code);
-
-void beat_halt(void)
-{
- beat_shutdown_logical_partition(beat_halt_code);
-}
-
-int beat_set_rtc_time(struct rtc_time *rtc_time)
-{
- u64 tim;
- tim = mktime(rtc_time->tm_year+1900,
- rtc_time->tm_mon+1, rtc_time->tm_mday,
- rtc_time->tm_hour, rtc_time->tm_min, rtc_time->tm_sec);
- if (beat_rtc_write(tim))
- return -1;
- return 0;
-}
-
-void beat_get_rtc_time(struct rtc_time *rtc_time)
-{
- u64 tim;
-
- if (beat_rtc_read(&tim))
- tim = 0;
- to_tm(tim, rtc_time);
- rtc_time->tm_year -= 1900;
- rtc_time->tm_mon -= 1;
-}
-
-#define BEAT_NVRAM_SIZE 4096
-
-ssize_t beat_nvram_read(char *buf, size_t count, loff_t *index)
-{
- unsigned int i;
- unsigned long len;
- char *p = buf;
-
- if (*index >= BEAT_NVRAM_SIZE)
- return -ENODEV;
- i = *index;
- if (i + count > BEAT_NVRAM_SIZE)
- count = BEAT_NVRAM_SIZE - i;
-
- for (; count != 0; count -= len) {
- len = count;
- if (len > BEAT_NVRW_CNT)
- len = BEAT_NVRW_CNT;
- if (beat_eeprom_read(i, len, p))
- return -EIO;
-
- p += len;
- i += len;
- }
- *index = i;
- return p - buf;
-}
-
-ssize_t beat_nvram_write(char *buf, size_t count, loff_t *index)
-{
- unsigned int i;
- unsigned long len;
- char *p = buf;
-
- if (*index >= BEAT_NVRAM_SIZE)
- return -ENODEV;
- i = *index;
- if (i + count > BEAT_NVRAM_SIZE)
- count = BEAT_NVRAM_SIZE - i;
-
- for (; count != 0; count -= len) {
- len = count;
- if (len > BEAT_NVRW_CNT)
- len = BEAT_NVRW_CNT;
- if (beat_eeprom_write(i, len, p))
- return -EIO;
-
- p += len;
- i += len;
- }
- *index = i;
- return p - buf;
-}
-
-ssize_t beat_nvram_get_size(void)
-{
- return BEAT_NVRAM_SIZE;
-}
-
-int beat_set_xdabr(unsigned long dabr, unsigned long dabrx)
-{
- if (beat_set_dabr(dabr, dabrx))
- return -1;
- return 0;
-}
-
-int64_t beat_get_term_char(u64 vterm, u64 *len, u64 *t1, u64 *t2)
-{
- u64 db[2];
- s64 ret;
-
- ret = beat_get_characters_from_console(vterm, len, (u8 *)db);
- if (ret == 0) {
- *t1 = db[0];
- *t2 = db[1];
- }
- return ret;
-}
-EXPORT_SYMBOL(beat_get_term_char);
-
-int64_t beat_put_term_char(u64 vterm, u64 len, u64 t1, u64 t2)
-{
- u64 db[2];
-
- db[0] = t1;
- db[1] = t2;
- return beat_put_characters_to_console(vterm, len, (u8 *)db);
-}
-EXPORT_SYMBOL(beat_put_term_char);
-
-void beat_power_save(void)
-{
- beat_pause(0);
-}
-
-#ifdef CONFIG_KEXEC
-void beat_kexec_cpu_down(int crash, int secondary)
-{
- beatic_deinit_IRQ();
-}
-#endif
-
-static irqreturn_t beat_power_event(int virq, void *arg)
-{
- printk(KERN_DEBUG "Beat: power button pressed\n");
- beat_pm_poweroff_flag = 1;
- ctrl_alt_del();
- return IRQ_HANDLED;
-}
-
-static irqreturn_t beat_reset_event(int virq, void *arg)
-{
- printk(KERN_DEBUG "Beat: reset button pressed\n");
- beat_pm_poweroff_flag = 0;
- ctrl_alt_del();
- return IRQ_HANDLED;
-}
-
-static struct beat_event_list {
- const char *typecode;
- irq_handler_t handler;
- unsigned int virq;
-} beat_event_list[] = {
- { "power", beat_power_event, 0 },
- { "reset", beat_reset_event, 0 },
-};
-
-static int __init beat_register_event(void)
-{
- u64 path[4], data[2];
- int rc, i;
- unsigned int virq;
-
- for (i = 0; i < ARRAY_SIZE(beat_event_list); i++) {
- struct beat_event_list *ev = &beat_event_list[i];
-
- if (beat_construct_event_receive_port(data) != 0) {
- printk(KERN_ERR "Beat: "
- "cannot construct event receive port for %s\n",
- ev->typecode);
- return -EINVAL;
- }
-
- virq = irq_create_mapping(NULL, data[0]);
- if (virq == NO_IRQ) {
- printk(KERN_ERR "Beat: failed to get virtual IRQ"
- " for event receive port for %s\n",
- ev->typecode);
- beat_destruct_event_receive_port(data[0]);
- return -EIO;
- }
- ev->virq = virq;
-
- rc = request_irq(virq, ev->handler, 0,
- ev->typecode, NULL);
- if (rc != 0) {
- printk(KERN_ERR "Beat: failed to request virtual IRQ"
- " for event receive port for %s\n",
- ev->typecode);
- beat_destruct_event_receive_port(data[0]);
- return rc;
- }
-
- path[0] = 0x1000000065780000ul; /* 1,ex */
- path[1] = 0x627574746f6e0000ul; /* button */
- path[2] = 0;
- strncpy((char *)&path[2], ev->typecode, 8);
- path[3] = 0;
- data[1] = 0;
-
- beat_create_repository_node(path, data);
- }
- return 0;
-}
-
-static int __init beat_event_init(void)
-{
- if (!firmware_has_feature(FW_FEATURE_BEAT))
- return -EINVAL;
-
- beat_pm_poweroff_flag = 0;
- return beat_register_event();
-}
-
-device_initcall(beat_event_init);
diff --git a/arch/powerpc/platforms/cell/beat.h b/arch/powerpc/platforms/cell/beat.h
deleted file mode 100644
index bfcb8e351ae5..000000000000
--- a/arch/powerpc/platforms/cell/beat.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Guest OS Interfaces.
- *
- * (C) Copyright 2006 TOSHIBA CORPORATION
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#ifndef _CELLEB_BEAT_H
-#define _CELLEB_BEAT_H
-
-int64_t beat_get_term_char(uint64_t, uint64_t *, uint64_t *, uint64_t *);
-int64_t beat_put_term_char(uint64_t, uint64_t, uint64_t, uint64_t);
-int64_t beat_repository_encode(int, const char *, uint64_t[4]);
-void beat_restart(char *);
-void beat_power_off(void);
-void beat_halt(void);
-int beat_set_rtc_time(struct rtc_time *);
-void beat_get_rtc_time(struct rtc_time *);
-ssize_t beat_nvram_get_size(void);
-ssize_t beat_nvram_read(char *, size_t, loff_t *);
-ssize_t beat_nvram_write(char *, size_t, loff_t *);
-int beat_set_xdabr(unsigned long, unsigned long);
-void beat_power_save(void);
-void beat_kexec_cpu_down(int, int);
-
-#endif /* _CELLEB_BEAT_H */
diff --git a/arch/powerpc/platforms/cell/beat_htab.c b/arch/powerpc/platforms/cell/beat_htab.c
deleted file mode 100644
index d4d245c0d787..000000000000
--- a/arch/powerpc/platforms/cell/beat_htab.c
+++ /dev/null
@@ -1,445 +0,0 @@
-/*
- * "Cell Reference Set" HTAB support.
- *
- * (C) Copyright 2006-2007 TOSHIBA CORPORATION
- *
- * This code is based on arch/powerpc/platforms/pseries/lpar.c:
- * Copyright (C) 2001 Todd Inglett, IBM Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#undef DEBUG_LOW
-
-#include <linux/kernel.h>
-#include <linux/spinlock.h>
-
-#include <asm/mmu.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/machdep.h>
-#include <asm/udbg.h>
-
-#include "beat_wrapper.h"
-
-#ifdef DEBUG_LOW
-#define DBG_LOW(fmt...) do { udbg_printf(fmt); } while (0)
-#else
-#define DBG_LOW(fmt...) do { } while (0)
-#endif
-
-static DEFINE_RAW_SPINLOCK(beat_htab_lock);
-
-static inline unsigned int beat_read_mask(unsigned hpte_group)
-{
- unsigned long rmask = 0;
- u64 hpte_v[5];
-
- beat_read_htab_entries(0, hpte_group + 0, hpte_v);
- if (!(hpte_v[0] & HPTE_V_BOLTED))
- rmask |= 0x8000;
- if (!(hpte_v[1] & HPTE_V_BOLTED))
- rmask |= 0x4000;
- if (!(hpte_v[2] & HPTE_V_BOLTED))
- rmask |= 0x2000;
- if (!(hpte_v[3] & HPTE_V_BOLTED))
- rmask |= 0x1000;
- beat_read_htab_entries(0, hpte_group + 4, hpte_v);
- if (!(hpte_v[0] & HPTE_V_BOLTED))
- rmask |= 0x0800;
- if (!(hpte_v[1] & HPTE_V_BOLTED))
- rmask |= 0x0400;
- if (!(hpte_v[2] & HPTE_V_BOLTED))
- rmask |= 0x0200;
- if (!(hpte_v[3] & HPTE_V_BOLTED))
- rmask |= 0x0100;
- hpte_group = ~hpte_group & (htab_hash_mask * HPTES_PER_GROUP);
- beat_read_htab_entries(0, hpte_group + 0, hpte_v);
- if (!(hpte_v[0] & HPTE_V_BOLTED))
- rmask |= 0x80;
- if (!(hpte_v[1] & HPTE_V_BOLTED))
- rmask |= 0x40;
- if (!(hpte_v[2] & HPTE_V_BOLTED))
- rmask |= 0x20;
- if (!(hpte_v[3] & HPTE_V_BOLTED))
- rmask |= 0x10;
- beat_read_htab_entries(0, hpte_group + 4, hpte_v);
- if (!(hpte_v[0] & HPTE_V_BOLTED))
- rmask |= 0x08;
- if (!(hpte_v[1] & HPTE_V_BOLTED))
- rmask |= 0x04;
- if (!(hpte_v[2] & HPTE_V_BOLTED))
- rmask |= 0x02;
- if (!(hpte_v[3] & HPTE_V_BOLTED))
- rmask |= 0x01;
- return rmask;
-}
-
-static long beat_lpar_hpte_insert(unsigned long hpte_group,
- unsigned long vpn, unsigned long pa,
- unsigned long rflags, unsigned long vflags,
- int psize, int apsize, int ssize)
-{
- unsigned long lpar_rc;
- u64 hpte_v, hpte_r, slot;
-
- if (vflags & HPTE_V_SECONDARY)
- return -1;
-
- if (!(vflags & HPTE_V_BOLTED))
- DBG_LOW("hpte_insert(group=%lx, va=%016lx, pa=%016lx, "
- "rflags=%lx, vflags=%lx, psize=%d)\n",
- hpte_group, va, pa, rflags, vflags, psize);
-
- hpte_v = hpte_encode_v(vpn, psize, apsize, MMU_SEGSIZE_256M) |
- vflags | HPTE_V_VALID;
- hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
-
- if (!(vflags & HPTE_V_BOLTED))
- DBG_LOW(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r);
-
- if (rflags & _PAGE_NO_CACHE)
- hpte_r &= ~HPTE_R_M;
-
- raw_spin_lock(&beat_htab_lock);
- lpar_rc = beat_read_mask(hpte_group);
- if (lpar_rc == 0) {
- if (!(vflags & HPTE_V_BOLTED))
- DBG_LOW(" full\n");
- raw_spin_unlock(&beat_htab_lock);
- return -1;
- }
-
- lpar_rc = beat_insert_htab_entry(0, hpte_group, lpar_rc << 48,
- hpte_v, hpte_r, &slot);
- raw_spin_unlock(&beat_htab_lock);
-
- /*
- * Since we try and ioremap PHBs we don't own, the pte insert
- * will fail. However we must catch the failure in hash_page
- * or we will loop forever, so return -2 in this case.
- */
- if (unlikely(lpar_rc != 0)) {
- if (!(vflags & HPTE_V_BOLTED))
- DBG_LOW(" lpar err %lx\n", lpar_rc);
- return -2;
- }
- if (!(vflags & HPTE_V_BOLTED))
- DBG_LOW(" -> slot: %lx\n", slot);
-
- /* We have to pass down the secondary bucket bit here as well */
- return (slot ^ hpte_group) & 15;
-}
-
-static long beat_lpar_hpte_remove(unsigned long hpte_group)
-{
- DBG_LOW("hpte_remove(group=%lx)\n", hpte_group);
- return -1;
-}
-
-static unsigned long beat_lpar_hpte_getword0(unsigned long slot)
-{
- unsigned long dword0;
- unsigned long lpar_rc;
- u64 dword[5];
-
- lpar_rc = beat_read_htab_entries(0, slot & ~3UL, dword);
-
- dword0 = dword[slot&3];
-
- BUG_ON(lpar_rc != 0);
-
- return dword0;
-}
-
-static void beat_lpar_hptab_clear(void)
-{
- unsigned long size_bytes = 1UL << ppc64_pft_size;
- unsigned long hpte_count = size_bytes >> 4;
- int i;
- u64 dummy0, dummy1;
-
- /* TODO: Use bulk call */
- for (i = 0; i < hpte_count; i++)
- beat_write_htab_entry(0, i, 0, 0, -1UL, -1UL, &dummy0, &dummy1);
-}
-
-/*
- * NOTE: for updatepp ops we are fortunate that the linux "newpp" bits and
- * the low 3 bits of flags happen to line up. So no transform is needed.
- * We can probably optimize here and assume the high bits of newpp are
- * already zero. For now I am paranoid.
- */
-static long beat_lpar_hpte_updatepp(unsigned long slot,
- unsigned long newpp,
- unsigned long vpn,
- int psize, int apsize,
- int ssize, int local)
-{
- unsigned long lpar_rc;
- u64 dummy0, dummy1;
- unsigned long want_v;
-
- want_v = hpte_encode_avpn(vpn, psize, MMU_SEGSIZE_256M);
-
- DBG_LOW(" update: "
- "avpnv=%016lx, slot=%016lx, psize: %d, newpp %016lx ... ",
- want_v & HPTE_V_AVPN, slot, psize, newpp);
-
- raw_spin_lock(&beat_htab_lock);
- dummy0 = beat_lpar_hpte_getword0(slot);
- if ((dummy0 & ~0x7FUL) != (want_v & ~0x7FUL)) {
- DBG_LOW("not found !\n");
- raw_spin_unlock(&beat_htab_lock);
- return -1;
- }
-
- lpar_rc = beat_write_htab_entry(0, slot, 0, newpp, 0, 7, &dummy0,
- &dummy1);
- raw_spin_unlock(&beat_htab_lock);
- if (lpar_rc != 0 || dummy0 == 0) {
- DBG_LOW("not found !\n");
- return -1;
- }
-
- DBG_LOW("ok %lx %lx\n", dummy0, dummy1);
-
- BUG_ON(lpar_rc != 0);
-
- return 0;
-}
-
-static long beat_lpar_hpte_find(unsigned long vpn, int psize)
-{
- unsigned long hash;
- unsigned long i, j;
- long slot;
- unsigned long want_v, hpte_v;
-
- hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, MMU_SEGSIZE_256M);
- want_v = hpte_encode_avpn(vpn, psize, MMU_SEGSIZE_256M);
-
- for (j = 0; j < 2; j++) {
- slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
- for (i = 0; i < HPTES_PER_GROUP; i++) {
- hpte_v = beat_lpar_hpte_getword0(slot);
-
- if (HPTE_V_COMPARE(hpte_v, want_v)
- && (hpte_v & HPTE_V_VALID)
- && (!!(hpte_v & HPTE_V_SECONDARY) == j)) {
- /* HPTE matches */
- if (j)
- slot = -slot;
- return slot;
- }
- ++slot;
- }
- hash = ~hash;
- }
-
- return -1;
-}
-
-static void beat_lpar_hpte_updateboltedpp(unsigned long newpp,
- unsigned long ea,
- int psize, int ssize)
-{
- unsigned long vpn;
- unsigned long lpar_rc, slot, vsid;
- u64 dummy0, dummy1;
-
- vsid = get_kernel_vsid(ea, MMU_SEGSIZE_256M);
- vpn = hpt_vpn(ea, vsid, MMU_SEGSIZE_256M);
-
- raw_spin_lock(&beat_htab_lock);
- slot = beat_lpar_hpte_find(vpn, psize);
- BUG_ON(slot == -1);
-
- lpar_rc = beat_write_htab_entry(0, slot, 0, newpp, 0, 7,
- &dummy0, &dummy1);
- raw_spin_unlock(&beat_htab_lock);
-
- BUG_ON(lpar_rc != 0);
-}
-
-static void beat_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn,
- int psize, int apsize,
- int ssize, int local)
-{
- unsigned long want_v;
- unsigned long lpar_rc;
- u64 dummy1, dummy2;
- unsigned long flags;
-
- DBG_LOW(" inval : slot=%lx, va=%016lx, psize: %d, local: %d\n",
- slot, va, psize, local);
- want_v = hpte_encode_avpn(vpn, psize, MMU_SEGSIZE_256M);
-
- raw_spin_lock_irqsave(&beat_htab_lock, flags);
- dummy1 = beat_lpar_hpte_getword0(slot);
-
- if ((dummy1 & ~0x7FUL) != (want_v & ~0x7FUL)) {
- DBG_LOW("not found !\n");
- raw_spin_unlock_irqrestore(&beat_htab_lock, flags);
- return;
- }
-
- lpar_rc = beat_write_htab_entry(0, slot, 0, 0, HPTE_V_VALID, 0,
- &dummy1, &dummy2);
- raw_spin_unlock_irqrestore(&beat_htab_lock, flags);
-
- BUG_ON(lpar_rc != 0);
-}
-
-void __init hpte_init_beat(void)
-{
- ppc_md.hpte_invalidate = beat_lpar_hpte_invalidate;
- ppc_md.hpte_updatepp = beat_lpar_hpte_updatepp;
- ppc_md.hpte_updateboltedpp = beat_lpar_hpte_updateboltedpp;
- ppc_md.hpte_insert = beat_lpar_hpte_insert;
- ppc_md.hpte_remove = beat_lpar_hpte_remove;
- ppc_md.hpte_clear_all = beat_lpar_hptab_clear;
-}
-
-static long beat_lpar_hpte_insert_v3(unsigned long hpte_group,
- unsigned long vpn, unsigned long pa,
- unsigned long rflags, unsigned long vflags,
- int psize, int apsize, int ssize)
-{
- unsigned long lpar_rc;
- u64 hpte_v, hpte_r, slot;
-
- if (vflags & HPTE_V_SECONDARY)
- return -1;
-
- if (!(vflags & HPTE_V_BOLTED))
- DBG_LOW("hpte_insert(group=%lx, vpn=%016lx, pa=%016lx, "
- "rflags=%lx, vflags=%lx, psize=%d)\n",
- hpte_group, vpn, pa, rflags, vflags, psize);
-
- hpte_v = hpte_encode_v(vpn, psize, apsize, MMU_SEGSIZE_256M) |
- vflags | HPTE_V_VALID;
- hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
-
- if (!(vflags & HPTE_V_BOLTED))
- DBG_LOW(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r);
-
- if (rflags & _PAGE_NO_CACHE)
- hpte_r &= ~HPTE_R_M;
-
- /* insert into not-volted entry */
- lpar_rc = beat_insert_htab_entry3(0, hpte_group, hpte_v, hpte_r,
- HPTE_V_BOLTED, 0, &slot);
- /*
- * Since we try and ioremap PHBs we don't own, the pte insert
- * will fail. However we must catch the failure in hash_page
- * or we will loop forever, so return -2 in this case.
- */
- if (unlikely(lpar_rc != 0)) {
- if (!(vflags & HPTE_V_BOLTED))
- DBG_LOW(" lpar err %lx\n", lpar_rc);
- return -2;
- }
- if (!(vflags & HPTE_V_BOLTED))
- DBG_LOW(" -> slot: %lx\n", slot);
-
- /* We have to pass down the secondary bucket bit here as well */
- return (slot ^ hpte_group) & 15;
-}
-
-/*
- * NOTE: for updatepp ops we are fortunate that the linux "newpp" bits and
- * the low 3 bits of flags happen to line up. So no transform is needed.
- * We can probably optimize here and assume the high bits of newpp are
- * already zero. For now I am paranoid.
- */
-static long beat_lpar_hpte_updatepp_v3(unsigned long slot,
- unsigned long newpp,
- unsigned long vpn,
- int psize, int apsize,
- int ssize, int local)
-{
- unsigned long lpar_rc;
- unsigned long want_v;
- unsigned long pss;
-
- want_v = hpte_encode_avpn(vpn, psize, MMU_SEGSIZE_256M);
- pss = (psize == MMU_PAGE_4K) ? -1UL : mmu_psize_defs[psize].penc[psize];
-
- DBG_LOW(" update: "
- "avpnv=%016lx, slot=%016lx, psize: %d, newpp %016lx ... ",
- want_v & HPTE_V_AVPN, slot, psize, newpp);
-
- lpar_rc = beat_update_htab_permission3(0, slot, want_v, pss, 7, newpp);
-
- if (lpar_rc == 0xfffffff7) {
- DBG_LOW("not found !\n");
- return -1;
- }
-
- DBG_LOW("ok\n");
-
- BUG_ON(lpar_rc != 0);
-
- return 0;
-}
-
-static void beat_lpar_hpte_invalidate_v3(unsigned long slot, unsigned long vpn,
- int psize, int apsize,
- int ssize, int local)
-{
- unsigned long want_v;
- unsigned long lpar_rc;
- unsigned long pss;
-
- DBG_LOW(" inval : slot=%lx, vpn=%016lx, psize: %d, local: %d\n",
- slot, vpn, psize, local);
- want_v = hpte_encode_avpn(vpn, psize, MMU_SEGSIZE_256M);
- pss = (psize == MMU_PAGE_4K) ? -1UL : mmu_psize_defs[psize].penc[psize];
-
- lpar_rc = beat_invalidate_htab_entry3(0, slot, want_v, pss);
-
- /* E_busy can be valid output: page may be already replaced */
- BUG_ON(lpar_rc != 0 && lpar_rc != 0xfffffff7);
-}
-
-static int64_t _beat_lpar_hptab_clear_v3(void)
-{
- return beat_clear_htab3(0);
-}
-
-static void beat_lpar_hptab_clear_v3(void)
-{
- _beat_lpar_hptab_clear_v3();
-}
-
-void __init hpte_init_beat_v3(void)
-{
- if (_beat_lpar_hptab_clear_v3() == 0) {
- ppc_md.hpte_invalidate = beat_lpar_hpte_invalidate_v3;
- ppc_md.hpte_updatepp = beat_lpar_hpte_updatepp_v3;
- ppc_md.hpte_updateboltedpp = beat_lpar_hpte_updateboltedpp;
- ppc_md.hpte_insert = beat_lpar_hpte_insert_v3;
- ppc_md.hpte_remove = beat_lpar_hpte_remove;
- ppc_md.hpte_clear_all = beat_lpar_hptab_clear_v3;
- } else {
- ppc_md.hpte_invalidate = beat_lpar_hpte_invalidate;
- ppc_md.hpte_updatepp = beat_lpar_hpte_updatepp;
- ppc_md.hpte_updateboltedpp = beat_lpar_hpte_updateboltedpp;
- ppc_md.hpte_insert = beat_lpar_hpte_insert;
- ppc_md.hpte_remove = beat_lpar_hpte_remove;
- ppc_md.hpte_clear_all = beat_lpar_hptab_clear;
- }
-}
diff --git a/arch/powerpc/platforms/cell/beat_hvCall.S b/arch/powerpc/platforms/cell/beat_hvCall.S
deleted file mode 100644
index 96c801907126..000000000000
--- a/arch/powerpc/platforms/cell/beat_hvCall.S
+++ /dev/null
@@ -1,285 +0,0 @@
-/*
- * Beat hypervisor call I/F
- *
- * (C) Copyright 2007 TOSHIBA CORPORATION
- *
- * This code is based on arch/powerpc/platforms/pseries/hvCall.S.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#include <asm/ppc_asm.h>
-
-/* Not implemented on Beat, now */
-#define HCALL_INST_PRECALL
-#define HCALL_INST_POSTCALL
-
- .text
-
-#define HVSC .long 0x44000022
-
-/* Note: takes only 7 input parameters at maximum */
-_GLOBAL(beat_hcall_norets)
- HMT_MEDIUM
-
- mfcr r0
- stw r0,8(r1)
-
- HCALL_INST_PRECALL
-
- mr r11,r3
- mr r3,r4
- mr r4,r5
- mr r5,r6
- mr r6,r7
- mr r7,r8
- mr r8,r9
-
- HVSC /* invoke the hypervisor */
-
- HCALL_INST_POSTCALL
-
- lwz r0,8(r1)
- mtcrf 0xff,r0
-
- blr /* return r3 = status */
-
-/* Note: takes 8 input parameters at maximum */
-_GLOBAL(beat_hcall_norets8)
- HMT_MEDIUM
-
- mfcr r0
- stw r0,8(r1)
-
- HCALL_INST_PRECALL
-
- mr r11,r3
- mr r3,r4
- mr r4,r5
- mr r5,r6
- mr r6,r7
- mr r7,r8
- mr r8,r9
- ld r10,STK_PARAM(R10)(r1)
-
- HVSC /* invoke the hypervisor */
-
- HCALL_INST_POSTCALL
-
- lwz r0,8(r1)
- mtcrf 0xff,r0
-
- blr /* return r3 = status */
-
-/* Note: takes only 6 input parameters, 1 output parameters at maximum */
-_GLOBAL(beat_hcall1)
- HMT_MEDIUM
-
- mfcr r0
- stw r0,8(r1)
-
- HCALL_INST_PRECALL
-
- std r4,STK_PARAM(R4)(r1) /* save ret buffer */
-
- mr r11,r3
- mr r3,r5
- mr r4,r6
- mr r5,r7
- mr r6,r8
- mr r7,r9
- mr r8,r10
-
- HVSC /* invoke the hypervisor */
-
- HCALL_INST_POSTCALL
-
- ld r12,STK_PARAM(R4)(r1)
- std r4, 0(r12)
-
- lwz r0,8(r1)
- mtcrf 0xff,r0
-
- blr /* return r3 = status */
-
-/* Note: takes only 6 input parameters, 2 output parameters at maximum */
-_GLOBAL(beat_hcall2)
- HMT_MEDIUM
-
- mfcr r0
- stw r0,8(r1)
-
- HCALL_INST_PRECALL
-
- std r4,STK_PARAM(R4)(r1) /* save ret buffer */
-
- mr r11,r3
- mr r3,r5
- mr r4,r6
- mr r5,r7
- mr r6,r8
- mr r7,r9
- mr r8,r10
-
- HVSC /* invoke the hypervisor */
-
- HCALL_INST_POSTCALL
-
- ld r12,STK_PARAM(R4)(r1)
- std r4, 0(r12)
- std r5, 8(r12)
-
- lwz r0,8(r1)
- mtcrf 0xff,r0
-
- blr /* return r3 = status */
-
-/* Note: takes only 6 input parameters, 3 output parameters at maximum */
-_GLOBAL(beat_hcall3)
- HMT_MEDIUM
-
- mfcr r0
- stw r0,8(r1)
-
- HCALL_INST_PRECALL
-
- std r4,STK_PARAM(R4)(r1) /* save ret buffer */
-
- mr r11,r3
- mr r3,r5
- mr r4,r6
- mr r5,r7
- mr r6,r8
- mr r7,r9
- mr r8,r10
-
- HVSC /* invoke the hypervisor */
-
- HCALL_INST_POSTCALL
-
- ld r12,STK_PARAM(R4)(r1)
- std r4, 0(r12)
- std r5, 8(r12)
- std r6, 16(r12)
-
- lwz r0,8(r1)
- mtcrf 0xff,r0
-
- blr /* return r3 = status */
-
-/* Note: takes only 6 input parameters, 4 output parameters at maximum */
-_GLOBAL(beat_hcall4)
- HMT_MEDIUM
-
- mfcr r0
- stw r0,8(r1)
-
- HCALL_INST_PRECALL
-
- std r4,STK_PARAM(R4)(r1) /* save ret buffer */
-
- mr r11,r3
- mr r3,r5
- mr r4,r6
- mr r5,r7
- mr r6,r8
- mr r7,r9
- mr r8,r10
-
- HVSC /* invoke the hypervisor */
-
- HCALL_INST_POSTCALL
-
- ld r12,STK_PARAM(R4)(r1)
- std r4, 0(r12)
- std r5, 8(r12)
- std r6, 16(r12)
- std r7, 24(r12)
-
- lwz r0,8(r1)
- mtcrf 0xff,r0
-
- blr /* return r3 = status */
-
-/* Note: takes only 6 input parameters, 5 output parameters at maximum */
-_GLOBAL(beat_hcall5)
- HMT_MEDIUM
-
- mfcr r0
- stw r0,8(r1)
-
- HCALL_INST_PRECALL
-
- std r4,STK_PARAM(R4)(r1) /* save ret buffer */
-
- mr r11,r3
- mr r3,r5
- mr r4,r6
- mr r5,r7
- mr r6,r8
- mr r7,r9
- mr r8,r10
-
- HVSC /* invoke the hypervisor */
-
- HCALL_INST_POSTCALL
-
- ld r12,STK_PARAM(R4)(r1)
- std r4, 0(r12)
- std r5, 8(r12)
- std r6, 16(r12)
- std r7, 24(r12)
- std r8, 32(r12)
-
- lwz r0,8(r1)
- mtcrf 0xff,r0
-
- blr /* return r3 = status */
-
-/* Note: takes only 6 input parameters, 6 output parameters at maximum */
-_GLOBAL(beat_hcall6)
- HMT_MEDIUM
-
- mfcr r0
- stw r0,8(r1)
-
- HCALL_INST_PRECALL
-
- std r4,STK_PARAM(R4)(r1) /* save ret buffer */
-
- mr r11,r3
- mr r3,r5
- mr r4,r6
- mr r5,r7
- mr r6,r8
- mr r7,r9
- mr r8,r10
-
- HVSC /* invoke the hypervisor */
-
- HCALL_INST_POSTCALL
-
- ld r12,STK_PARAM(R4)(r1)
- std r4, 0(r12)
- std r5, 8(r12)
- std r6, 16(r12)
- std r7, 24(r12)
- std r8, 32(r12)
- std r9, 40(r12)
-
- lwz r0,8(r1)
- mtcrf 0xff,r0
-
- blr /* return r3 = status */
diff --git a/arch/powerpc/platforms/cell/beat_interrupt.c b/arch/powerpc/platforms/cell/beat_interrupt.c
deleted file mode 100644
index 9e5dfbcc00af..000000000000
--- a/arch/powerpc/platforms/cell/beat_interrupt.c
+++ /dev/null
@@ -1,253 +0,0 @@
-/*
- * Celleb/Beat Interrupt controller
- *
- * (C) Copyright 2006-2007 TOSHIBA CORPORATION
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/percpu.h>
-#include <linux/types.h>
-
-#include <asm/machdep.h>
-
-#include "beat_interrupt.h"
-#include "beat_wrapper.h"
-
-#define MAX_IRQS NR_IRQS
-static DEFINE_RAW_SPINLOCK(beatic_irq_mask_lock);
-static uint64_t beatic_irq_mask_enable[(MAX_IRQS+255)/64];
-static uint64_t beatic_irq_mask_ack[(MAX_IRQS+255)/64];
-
-static struct irq_domain *beatic_host;
-
-/*
- * In this implementation, "virq" == "IRQ plug number",
- * "(irq_hw_number_t)hwirq" == "IRQ outlet number".
- */
-
-/* assumption: locked */
-static inline void beatic_update_irq_mask(unsigned int irq_plug)
-{
- int off;
- unsigned long masks[4];
-
- off = (irq_plug / 256) * 4;
- masks[0] = beatic_irq_mask_enable[off + 0]
- & beatic_irq_mask_ack[off + 0];
- masks[1] = beatic_irq_mask_enable[off + 1]
- & beatic_irq_mask_ack[off + 1];
- masks[2] = beatic_irq_mask_enable[off + 2]
- & beatic_irq_mask_ack[off + 2];
- masks[3] = beatic_irq_mask_enable[off + 3]
- & beatic_irq_mask_ack[off + 3];
- if (beat_set_interrupt_mask(irq_plug&~255UL,
- masks[0], masks[1], masks[2], masks[3]) != 0)
- panic("Failed to set mask IRQ!");
-}
-
-static void beatic_mask_irq(struct irq_data *d)
-{
- unsigned long flags;
-
- raw_spin_lock_irqsave(&beatic_irq_mask_lock, flags);
- beatic_irq_mask_enable[d->irq/64] &= ~(1UL << (63 - (d->irq%64)));
- beatic_update_irq_mask(d->irq);
- raw_spin_unlock_irqrestore(&beatic_irq_mask_lock, flags);
-}
-
-static void beatic_unmask_irq(struct irq_data *d)
-{
- unsigned long flags;
-
- raw_spin_lock_irqsave(&beatic_irq_mask_lock, flags);
- beatic_irq_mask_enable[d->irq/64] |= 1UL << (63 - (d->irq%64));
- beatic_update_irq_mask(d->irq);
- raw_spin_unlock_irqrestore(&beatic_irq_mask_lock, flags);
-}
-
-static void beatic_ack_irq(struct irq_data *d)
-{
- unsigned long flags;
-
- raw_spin_lock_irqsave(&beatic_irq_mask_lock, flags);
- beatic_irq_mask_ack[d->irq/64] &= ~(1UL << (63 - (d->irq%64)));
- beatic_update_irq_mask(d->irq);
- raw_spin_unlock_irqrestore(&beatic_irq_mask_lock, flags);
-}
-
-static void beatic_end_irq(struct irq_data *d)
-{
- s64 err;
- unsigned long flags;
-
- err = beat_downcount_of_interrupt(d->irq);
- if (err != 0) {
- if ((err & 0xFFFFFFFF) != 0xFFFFFFF5) /* -11: wrong state */
- panic("Failed to downcount IRQ! Error = %16llx", err);
-
- printk(KERN_ERR "IRQ over-downcounted, plug %d\n", d->irq);
- }
- raw_spin_lock_irqsave(&beatic_irq_mask_lock, flags);
- beatic_irq_mask_ack[d->irq/64] |= 1UL << (63 - (d->irq%64));
- beatic_update_irq_mask(d->irq);
- raw_spin_unlock_irqrestore(&beatic_irq_mask_lock, flags);
-}
-
-static struct irq_chip beatic_pic = {
- .name = "CELL-BEAT",
- .irq_unmask = beatic_unmask_irq,
- .irq_mask = beatic_mask_irq,
- .irq_eoi = beatic_end_irq,
-};
-
-/*
- * Dispose binding hardware IRQ number (hw) and Virtuql IRQ number (virq),
- * update flags.
- *
- * Note that the number (virq) is already assigned at upper layer.
- */
-static void beatic_pic_host_unmap(struct irq_domain *h, unsigned int virq)
-{
- beat_destruct_irq_plug(virq);
-}
-
-/*
- * Create or update binding hardware IRQ number (hw) and Virtuql
- * IRQ number (virq). This is called only once for a given mapping.
- *
- * Note that the number (virq) is already assigned at upper layer.
- */
-static int beatic_pic_host_map(struct irq_domain *h, unsigned int virq,
- irq_hw_number_t hw)
-{
- int64_t err;
-
- err = beat_construct_and_connect_irq_plug(virq, hw);
- if (err < 0)
- return -EIO;
-
- irq_set_status_flags(virq, IRQ_LEVEL);
- irq_set_chip_and_handler(virq, &beatic_pic, handle_fasteoi_irq);
- return 0;
-}
-
-/*
- * Translate device-tree interrupt spec to irq_hw_number_t style (ulong),
- * to pass away to irq_create_mapping().
- *
- * Called from irq_create_of_mapping() only.
- * Note: We have only 1 entry to translate.
- */
-static int beatic_pic_host_xlate(struct irq_domain *h, struct device_node *ct,
- const u32 *intspec, unsigned int intsize,
- irq_hw_number_t *out_hwirq,
- unsigned int *out_flags)
-{
- const u64 *intspec2 = (const u64 *)intspec;
-
- *out_hwirq = *intspec2;
- *out_flags |= IRQ_TYPE_LEVEL_LOW;
- return 0;
-}
-
-static int beatic_pic_host_match(struct irq_domain *h, struct device_node *np)
-{
- /* Match all */
- return 1;
-}
-
-static const struct irq_domain_ops beatic_pic_host_ops = {
- .map = beatic_pic_host_map,
- .unmap = beatic_pic_host_unmap,
- .xlate = beatic_pic_host_xlate,
- .match = beatic_pic_host_match,
-};
-
-/*
- * Get an IRQ number
- * Note: returns VIRQ
- */
-static inline unsigned int beatic_get_irq_plug(void)
-{
- int i;
- uint64_t pending[4], ub;
-
- for (i = 0; i < MAX_IRQS; i += 256) {
- beat_detect_pending_interrupts(i, pending);
- __asm__ ("cntlzd %0,%1":"=r"(ub):
- "r"(pending[0] & beatic_irq_mask_enable[i/64+0]
- & beatic_irq_mask_ack[i/64+0]));
- if (ub != 64)
- return i + ub + 0;
- __asm__ ("cntlzd %0,%1":"=r"(ub):
- "r"(pending[1] & beatic_irq_mask_enable[i/64+1]
- & beatic_irq_mask_ack[i/64+1]));
- if (ub != 64)
- return i + ub + 64;
- __asm__ ("cntlzd %0,%1":"=r"(ub):
- "r"(pending[2] & beatic_irq_mask_enable[i/64+2]
- & beatic_irq_mask_ack[i/64+2]));
- if (ub != 64)
- return i + ub + 128;
- __asm__ ("cntlzd %0,%1":"=r"(ub):
- "r"(pending[3] & beatic_irq_mask_enable[i/64+3]
- & beatic_irq_mask_ack[i/64+3]));
- if (ub != 64)
- return i + ub + 192;
- }
-
- return NO_IRQ;
-}
-unsigned int beatic_get_irq(void)
-{
- unsigned int ret;
-
- ret = beatic_get_irq_plug();
- if (ret != NO_IRQ)
- beatic_ack_irq(irq_get_irq_data(ret));
- return ret;
-}
-
-/*
- */
-void __init beatic_init_IRQ(void)
-{
- int i;
-
- memset(beatic_irq_mask_enable, 0, sizeof(beatic_irq_mask_enable));
- memset(beatic_irq_mask_ack, 255, sizeof(beatic_irq_mask_ack));
- for (i = 0; i < MAX_IRQS; i += 256)
- beat_set_interrupt_mask(i, 0L, 0L, 0L, 0L);
-
- /* Set out get_irq function */
- ppc_md.get_irq = beatic_get_irq;
-
- /* Allocate an irq host */
- beatic_host = irq_domain_add_nomap(NULL, ~0, &beatic_pic_host_ops, NULL);
- BUG_ON(beatic_host == NULL);
- irq_set_default_host(beatic_host);
-}
-
-void beatic_deinit_IRQ(void)
-{
- int i;
-
- for (i = 1; i < nr_irqs; i++)
- beat_destruct_irq_plug(i);
-}
diff --git a/arch/powerpc/platforms/cell/beat_interrupt.h b/arch/powerpc/platforms/cell/beat_interrupt.h
deleted file mode 100644
index a7e52f91a078..000000000000
--- a/arch/powerpc/platforms/cell/beat_interrupt.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Celleb/Beat Interrupt controller
- *
- * (C) Copyright 2006 TOSHIBA CORPORATION
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#ifndef ASM_BEAT_PIC_H
-#define ASM_BEAT_PIC_H
-#ifdef __KERNEL__
-
-extern void beatic_init_IRQ(void);
-extern unsigned int beatic_get_irq(void);
-extern void beatic_deinit_IRQ(void);
-
-#endif
-#endif /* ASM_BEAT_PIC_H */
diff --git a/arch/powerpc/platforms/cell/beat_iommu.c b/arch/powerpc/platforms/cell/beat_iommu.c
deleted file mode 100644
index 3ce685568935..000000000000
--- a/arch/powerpc/platforms/cell/beat_iommu.c
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Support for IOMMU on Celleb platform.
- *
- * (C) Copyright 2006-2007 TOSHIBA CORPORATION
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/dma-mapping.h>
-#include <linux/pci.h>
-#include <linux/of_platform.h>
-
-#include <asm/machdep.h>
-
-#include "beat_wrapper.h"
-
-#define DMA_FLAGS 0xf800000000000000UL /* r/w permitted, coherency required,
- strongest order */
-
-static int __init find_dma_window(u64 *io_space_id, u64 *ioid,
- u64 *base, u64 *size, u64 *io_page_size)
-{
- struct device_node *dn;
- const unsigned long *dma_window;
-
- for_each_node_by_type(dn, "ioif") {
- dma_window = of_get_property(dn, "toshiba,dma-window", NULL);
- if (dma_window) {
- *io_space_id = (dma_window[0] >> 32) & 0xffffffffUL;
- *ioid = dma_window[0] & 0x7ffUL;
- *base = dma_window[1];
- *size = dma_window[2];
- *io_page_size = 1 << dma_window[3];
- of_node_put(dn);
- return 1;
- }
- }
- return 0;
-}
-
-static unsigned long celleb_dma_direct_offset;
-
-static void __init celleb_init_direct_mapping(void)
-{
- u64 lpar_addr, io_addr;
- u64 io_space_id, ioid, dma_base, dma_size, io_page_size;
-
- if (!find_dma_window(&io_space_id, &ioid, &dma_base, &dma_size,
- &io_page_size)) {
- pr_info("No dma window found !\n");
- return;
- }
-
- for (lpar_addr = 0; lpar_addr < dma_size; lpar_addr += io_page_size) {
- io_addr = lpar_addr + dma_base;
- (void)beat_put_iopte(io_space_id, io_addr, lpar_addr,
- ioid, DMA_FLAGS);
- }
-
- celleb_dma_direct_offset = dma_base;
-}
-
-static void celleb_dma_dev_setup(struct device *dev)
-{
- set_dma_ops(dev, &dma_direct_ops);
- set_dma_offset(dev, celleb_dma_direct_offset);
-}
-
-static void celleb_pci_dma_dev_setup(struct pci_dev *pdev)
-{
- celleb_dma_dev_setup(&pdev->dev);
-}
-
-static int celleb_of_bus_notify(struct notifier_block *nb,
- unsigned long action, void *data)
-{
- struct device *dev = data;
-
- /* We are only intereted in device addition */
- if (action != BUS_NOTIFY_ADD_DEVICE)
- return 0;
-
- celleb_dma_dev_setup(dev);
-
- return 0;
-}
-
-static struct notifier_block celleb_of_bus_notifier = {
- .notifier_call = celleb_of_bus_notify
-};
-
-static int __init celleb_init_iommu(void)
-{
- celleb_init_direct_mapping();
- ppc_md.pci_dma_dev_setup = celleb_pci_dma_dev_setup;
- bus_register_notifier(&platform_bus_type, &celleb_of_bus_notifier);
-
- return 0;
-}
-
-machine_arch_initcall(celleb_beat, celleb_init_iommu);
diff --git a/arch/powerpc/platforms/cell/beat_spu_priv1.c b/arch/powerpc/platforms/cell/beat_spu_priv1.c
deleted file mode 100644
index 13f52589d3a9..000000000000
--- a/arch/powerpc/platforms/cell/beat_spu_priv1.c
+++ /dev/null
@@ -1,205 +0,0 @@
-/*
- * spu hypervisor abstraction for Beat
- *
- * (C) Copyright 2006-2007 TOSHIBA CORPORATION
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#include <asm/types.h>
-#include <asm/spu.h>
-#include <asm/spu_priv1.h>
-
-#include "beat_wrapper.h"
-
-static inline void _int_mask_set(struct spu *spu, int class, u64 mask)
-{
- spu->shadow_int_mask_RW[class] = mask;
- beat_set_irq_mask_for_spe(spu->spe_id, class, mask);
-}
-
-static inline u64 _int_mask_get(struct spu *spu, int class)
-{
- return spu->shadow_int_mask_RW[class];
-}
-
-static void int_mask_set(struct spu *spu, int class, u64 mask)
-{
- _int_mask_set(spu, class, mask);
-}
-
-static u64 int_mask_get(struct spu *spu, int class)
-{
- return _int_mask_get(spu, class);
-}
-
-static void int_mask_and(struct spu *spu, int class, u64 mask)
-{
- u64 old_mask;
- old_mask = _int_mask_get(spu, class);
- _int_mask_set(spu, class, old_mask & mask);
-}
-
-static void int_mask_or(struct spu *spu, int class, u64 mask)
-{
- u64 old_mask;
- old_mask = _int_mask_get(spu, class);
- _int_mask_set(spu, class, old_mask | mask);
-}
-
-static void int_stat_clear(struct spu *spu, int class, u64 stat)
-{
- beat_clear_interrupt_status_of_spe(spu->spe_id, class, stat);
-}
-
-static u64 int_stat_get(struct spu *spu, int class)
-{
- u64 int_stat;
- beat_get_interrupt_status_of_spe(spu->spe_id, class, &int_stat);
- return int_stat;
-}
-
-static void cpu_affinity_set(struct spu *spu, int cpu)
-{
- return;
-}
-
-static u64 mfc_dar_get(struct spu *spu)
-{
- u64 dar;
- beat_get_spe_privileged_state_1_registers(
- spu->spe_id,
- offsetof(struct spu_priv1, mfc_dar_RW), &dar);
- return dar;
-}
-
-static u64 mfc_dsisr_get(struct spu *spu)
-{
- u64 dsisr;
- beat_get_spe_privileged_state_1_registers(
- spu->spe_id,
- offsetof(struct spu_priv1, mfc_dsisr_RW), &dsisr);
- return dsisr;
-}
-
-static void mfc_dsisr_set(struct spu *spu, u64 dsisr)
-{
- beat_set_spe_privileged_state_1_registers(
- spu->spe_id,
- offsetof(struct spu_priv1, mfc_dsisr_RW), dsisr);
-}
-
-static void mfc_sdr_setup(struct spu *spu)
-{
- return;
-}
-
-static void mfc_sr1_set(struct spu *spu, u64 sr1)
-{
- beat_set_spe_privileged_state_1_registers(
- spu->spe_id,
- offsetof(struct spu_priv1, mfc_sr1_RW), sr1);
-}
-
-static u64 mfc_sr1_get(struct spu *spu)
-{
- u64 sr1;
- beat_get_spe_privileged_state_1_registers(
- spu->spe_id,
- offsetof(struct spu_priv1, mfc_sr1_RW), &sr1);
- return sr1;
-}
-
-static void mfc_tclass_id_set(struct spu *spu, u64 tclass_id)
-{
- beat_set_spe_privileged_state_1_registers(
- spu->spe_id,
- offsetof(struct spu_priv1, mfc_tclass_id_RW), tclass_id);
-}
-
-static u64 mfc_tclass_id_get(struct spu *spu)
-{
- u64 tclass_id;
- beat_get_spe_privileged_state_1_registers(
- spu->spe_id,
- offsetof(struct spu_priv1, mfc_tclass_id_RW), &tclass_id);
- return tclass_id;
-}
-
-static void tlb_invalidate(struct spu *spu)
-{
- beat_set_spe_privileged_state_1_registers(
- spu->spe_id,
- offsetof(struct spu_priv1, tlb_invalidate_entry_W), 0ul);
-}
-
-static void resource_allocation_groupID_set(struct spu *spu, u64 id)
-{
- beat_set_spe_privileged_state_1_registers(
- spu->spe_id,
- offsetof(struct spu_priv1, resource_allocation_groupID_RW),
- id);
-}
-
-static u64 resource_allocation_groupID_get(struct spu *spu)
-{
- u64 id;
- beat_get_spe_privileged_state_1_registers(
- spu->spe_id,
- offsetof(struct spu_priv1, resource_allocation_groupID_RW),
- &id);
- return id;
-}
-
-static void resource_allocation_enable_set(struct spu *spu, u64 enable)
-{
- beat_set_spe_privileged_state_1_registers(
- spu->spe_id,
- offsetof(struct spu_priv1, resource_allocation_enable_RW),
- enable);
-}
-
-static u64 resource_allocation_enable_get(struct spu *spu)
-{
- u64 enable;
- beat_get_spe_privileged_state_1_registers(
- spu->spe_id,
- offsetof(struct spu_priv1, resource_allocation_enable_RW),
- &enable);
- return enable;
-}
-
-const struct spu_priv1_ops spu_priv1_beat_ops = {
- .int_mask_and = int_mask_and,
- .int_mask_or = int_mask_or,
- .int_mask_set = int_mask_set,
- .int_mask_get = int_mask_get,
- .int_stat_clear = int_stat_clear,
- .int_stat_get = int_stat_get,
- .cpu_affinity_set = cpu_affinity_set,
- .mfc_dar_get = mfc_dar_get,
- .mfc_dsisr_get = mfc_dsisr_get,
- .mfc_dsisr_set = mfc_dsisr_set,
- .mfc_sdr_setup = mfc_sdr_setup,
- .mfc_sr1_set = mfc_sr1_set,
- .mfc_sr1_get = mfc_sr1_get,
- .mfc_tclass_id_set = mfc_tclass_id_set,
- .mfc_tclass_id_get = mfc_tclass_id_get,
- .tlb_invalidate = tlb_invalidate,
- .resource_allocation_groupID_set = resource_allocation_groupID_set,
- .resource_allocation_groupID_get = resource_allocation_groupID_get,
- .resource_allocation_enable_set = resource_allocation_enable_set,
- .resource_allocation_enable_get = resource_allocation_enable_get,
-};
diff --git a/arch/powerpc/platforms/cell/beat_syscall.h b/arch/powerpc/platforms/cell/beat_syscall.h
deleted file mode 100644
index 8580dc7e1798..000000000000
--- a/arch/powerpc/platforms/cell/beat_syscall.h
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
- * Beat hypervisor call numbers
- *
- * (C) Copyright 2004-2007 TOSHIBA CORPORATION
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#ifndef BEAT_BEAT_syscall_H
-#define BEAT_BEAT_syscall_H
-
-#ifdef __ASSEMBLY__
-#define __BEAT_ADD_VENDOR_ID(__x, __v) ((__v)<<60|(__x))
-#else
-#define __BEAT_ADD_VENDOR_ID(__x, __v) ((u64)(__v)<<60|(__x))
-#endif
-#define HV_allocate_memory __BEAT_ADD_VENDOR_ID(0, 0)
-#define HV_construct_virtual_address_space __BEAT_ADD_VENDOR_ID(2, 0)
-#define HV_destruct_virtual_address_space __BEAT_ADD_VENDOR_ID(10, 0)
-#define HV_get_virtual_address_space_id_of_ppe __BEAT_ADD_VENDOR_ID(4, 0)
-#define HV_query_logical_partition_address_region_info \
- __BEAT_ADD_VENDOR_ID(6, 0)
-#define HV_release_memory __BEAT_ADD_VENDOR_ID(13, 0)
-#define HV_select_virtual_address_space __BEAT_ADD_VENDOR_ID(7, 0)
-#define HV_load_range_registers __BEAT_ADD_VENDOR_ID(68, 0)
-#define HV_set_ppe_l2cache_rmt_entry __BEAT_ADD_VENDOR_ID(70, 0)
-#define HV_set_ppe_tlb_rmt_entry __BEAT_ADD_VENDOR_ID(71, 0)
-#define HV_set_spe_tlb_rmt_entry __BEAT_ADD_VENDOR_ID(72, 0)
-#define HV_get_io_address_translation_fault_info __BEAT_ADD_VENDOR_ID(14, 0)
-#define HV_get_iopte __BEAT_ADD_VENDOR_ID(16, 0)
-#define HV_preload_iopt_cache __BEAT_ADD_VENDOR_ID(17, 0)
-#define HV_put_iopte __BEAT_ADD_VENDOR_ID(15, 0)
-#define HV_connect_event_ports __BEAT_ADD_VENDOR_ID(21, 0)
-#define HV_construct_event_receive_port __BEAT_ADD_VENDOR_ID(18, 0)
-#define HV_destruct_event_receive_port __BEAT_ADD_VENDOR_ID(19, 0)
-#define HV_destruct_event_send_port __BEAT_ADD_VENDOR_ID(22, 0)
-#define HV_get_state_of_event_send_port __BEAT_ADD_VENDOR_ID(25, 0)
-#define HV_request_to_connect_event_ports __BEAT_ADD_VENDOR_ID(20, 0)
-#define HV_send_event_externally __BEAT_ADD_VENDOR_ID(23, 0)
-#define HV_send_event_locally __BEAT_ADD_VENDOR_ID(24, 0)
-#define HV_construct_and_connect_irq_plug __BEAT_ADD_VENDOR_ID(28, 0)
-#define HV_destruct_irq_plug __BEAT_ADD_VENDOR_ID(29, 0)
-#define HV_detect_pending_interrupts __BEAT_ADD_VENDOR_ID(26, 0)
-#define HV_end_of_interrupt __BEAT_ADD_VENDOR_ID(27, 0)
-#define HV_assign_control_signal_notification_port __BEAT_ADD_VENDOR_ID(45, 0)
-#define HV_end_of_control_signal_processing __BEAT_ADD_VENDOR_ID(48, 0)
-#define HV_get_control_signal __BEAT_ADD_VENDOR_ID(46, 0)
-#define HV_set_irq_mask_for_spe __BEAT_ADD_VENDOR_ID(61, 0)
-#define HV_shutdown_logical_partition __BEAT_ADD_VENDOR_ID(44, 0)
-#define HV_connect_message_ports __BEAT_ADD_VENDOR_ID(35, 0)
-#define HV_destruct_message_port __BEAT_ADD_VENDOR_ID(36, 0)
-#define HV_receive_message __BEAT_ADD_VENDOR_ID(37, 0)
-#define HV_get_message_port_info __BEAT_ADD_VENDOR_ID(34, 0)
-#define HV_request_to_connect_message_ports __BEAT_ADD_VENDOR_ID(33, 0)
-#define HV_send_message __BEAT_ADD_VENDOR_ID(32, 0)
-#define HV_get_logical_ppe_id __BEAT_ADD_VENDOR_ID(69, 0)
-#define HV_pause __BEAT_ADD_VENDOR_ID(9, 0)
-#define HV_destruct_shared_memory_handle __BEAT_ADD_VENDOR_ID(51, 0)
-#define HV_get_shared_memory_info __BEAT_ADD_VENDOR_ID(52, 0)
-#define HV_permit_sharing_memory __BEAT_ADD_VENDOR_ID(50, 0)
-#define HV_request_to_attach_shared_memory __BEAT_ADD_VENDOR_ID(49, 0)
-#define HV_enable_logical_spe_execution __BEAT_ADD_VENDOR_ID(55, 0)
-#define HV_construct_logical_spe __BEAT_ADD_VENDOR_ID(53, 0)
-#define HV_disable_logical_spe_execution __BEAT_ADD_VENDOR_ID(56, 0)
-#define HV_destruct_logical_spe __BEAT_ADD_VENDOR_ID(54, 0)
-#define HV_sense_spe_execution_status __BEAT_ADD_VENDOR_ID(58, 0)
-#define HV_insert_htab_entry __BEAT_ADD_VENDOR_ID(101, 0)
-#define HV_read_htab_entries __BEAT_ADD_VENDOR_ID(95, 0)
-#define HV_write_htab_entry __BEAT_ADD_VENDOR_ID(94, 0)
-#define HV_assign_io_address_translation_fault_port \
- __BEAT_ADD_VENDOR_ID(100, 0)
-#define HV_set_interrupt_mask __BEAT_ADD_VENDOR_ID(73, 0)
-#define HV_get_logical_partition_id __BEAT_ADD_VENDOR_ID(74, 0)
-#define HV_create_repository_node2 __BEAT_ADD_VENDOR_ID(90, 0)
-#define HV_create_repository_node __BEAT_ADD_VENDOR_ID(90, 0) /* alias */
-#define HV_get_repository_node_value2 __BEAT_ADD_VENDOR_ID(91, 0)
-#define HV_get_repository_node_value __BEAT_ADD_VENDOR_ID(91, 0) /* alias */
-#define HV_modify_repository_node_value2 __BEAT_ADD_VENDOR_ID(92, 0)
-#define HV_modify_repository_node_value __BEAT_ADD_VENDOR_ID(92, 0) /* alias */
-#define HV_remove_repository_node2 __BEAT_ADD_VENDOR_ID(93, 0)
-#define HV_remove_repository_node __BEAT_ADD_VENDOR_ID(93, 0) /* alias */
-#define HV_cancel_shared_memory __BEAT_ADD_VENDOR_ID(104, 0)
-#define HV_clear_interrupt_status_of_spe __BEAT_ADD_VENDOR_ID(206, 0)
-#define HV_construct_spe_irq_outlet __BEAT_ADD_VENDOR_ID(80, 0)
-#define HV_destruct_spe_irq_outlet __BEAT_ADD_VENDOR_ID(81, 0)
-#define HV_disconnect_ipspc_service __BEAT_ADD_VENDOR_ID(88, 0)
-#define HV_execute_ipspc_command __BEAT_ADD_VENDOR_ID(86, 0)
-#define HV_get_interrupt_status_of_spe __BEAT_ADD_VENDOR_ID(205, 0)
-#define HV_get_spe_privileged_state_1_registers __BEAT_ADD_VENDOR_ID(208, 0)
-#define HV_permit_use_of_ipspc_service __BEAT_ADD_VENDOR_ID(85, 0)
-#define HV_reinitialize_logical_spe __BEAT_ADD_VENDOR_ID(82, 0)
-#define HV_request_ipspc_service __BEAT_ADD_VENDOR_ID(84, 0)
-#define HV_stop_ipspc_command __BEAT_ADD_VENDOR_ID(87, 0)
-#define HV_set_spe_privileged_state_1_registers __BEAT_ADD_VENDOR_ID(204, 0)
-#define HV_get_status_of_ipspc_service __BEAT_ADD_VENDOR_ID(203, 0)
-#define HV_put_characters_to_console __BEAT_ADD_VENDOR_ID(0x101, 1)
-#define HV_get_characters_from_console __BEAT_ADD_VENDOR_ID(0x102, 1)
-#define HV_get_base_clock __BEAT_ADD_VENDOR_ID(0x111, 1)
-#define HV_set_base_clock __BEAT_ADD_VENDOR_ID(0x112, 1)
-#define HV_get_frame_cycle __BEAT_ADD_VENDOR_ID(0x114, 1)
-#define HV_disable_console __BEAT_ADD_VENDOR_ID(0x115, 1)
-#define HV_disable_all_console __BEAT_ADD_VENDOR_ID(0x116, 1)
-#define HV_oneshot_timer __BEAT_ADD_VENDOR_ID(0x117, 1)
-#define HV_set_dabr __BEAT_ADD_VENDOR_ID(0x118, 1)
-#define HV_get_dabr __BEAT_ADD_VENDOR_ID(0x119, 1)
-#define HV_start_hv_stats __BEAT_ADD_VENDOR_ID(0x21c, 1)
-#define HV_stop_hv_stats __BEAT_ADD_VENDOR_ID(0x21d, 1)
-#define HV_get_hv_stats __BEAT_ADD_VENDOR_ID(0x21e, 1)
-#define HV_get_hv_error_stats __BEAT_ADD_VENDOR_ID(0x221, 1)
-#define HV_get_stats __BEAT_ADD_VENDOR_ID(0x224, 1)
-#define HV_get_heap_stats __BEAT_ADD_VENDOR_ID(0x225, 1)
-#define HV_get_memory_stats __BEAT_ADD_VENDOR_ID(0x227, 1)
-#define HV_get_memory_detail __BEAT_ADD_VENDOR_ID(0x228, 1)
-#define HV_set_priority_of_irq_outlet __BEAT_ADD_VENDOR_ID(0x122, 1)
-#define HV_get_physical_spe_by_reservation_id __BEAT_ADD_VENDOR_ID(0x128, 1)
-#define HV_get_spe_context __BEAT_ADD_VENDOR_ID(0x129, 1)
-#define HV_set_spe_context __BEAT_ADD_VENDOR_ID(0x12a, 1)
-#define HV_downcount_of_interrupt __BEAT_ADD_VENDOR_ID(0x12e, 1)
-#define HV_peek_spe_context __BEAT_ADD_VENDOR_ID(0x12f, 1)
-#define HV_read_bpa_register __BEAT_ADD_VENDOR_ID(0x131, 1)
-#define HV_write_bpa_register __BEAT_ADD_VENDOR_ID(0x132, 1)
-#define HV_map_context_table_of_spe __BEAT_ADD_VENDOR_ID(0x137, 1)
-#define HV_get_slb_for_logical_spe __BEAT_ADD_VENDOR_ID(0x138, 1)
-#define HV_set_slb_for_logical_spe __BEAT_ADD_VENDOR_ID(0x139, 1)
-#define HV_init_pm __BEAT_ADD_VENDOR_ID(0x150, 1)
-#define HV_set_pm_signal __BEAT_ADD_VENDOR_ID(0x151, 1)
-#define HV_get_pm_signal __BEAT_ADD_VENDOR_ID(0x152, 1)
-#define HV_set_pm_config __BEAT_ADD_VENDOR_ID(0x153, 1)
-#define HV_get_pm_config __BEAT_ADD_VENDOR_ID(0x154, 1)
-#define HV_get_inner_trace_data __BEAT_ADD_VENDOR_ID(0x155, 1)
-#define HV_set_ext_trace_buffer __BEAT_ADD_VENDOR_ID(0x156, 1)
-#define HV_get_ext_trace_buffer __BEAT_ADD_VENDOR_ID(0x157, 1)
-#define HV_set_pm_interrupt __BEAT_ADD_VENDOR_ID(0x158, 1)
-#define HV_get_pm_interrupt __BEAT_ADD_VENDOR_ID(0x159, 1)
-#define HV_kick_pm __BEAT_ADD_VENDOR_ID(0x160, 1)
-#define HV_construct_pm_context __BEAT_ADD_VENDOR_ID(0x164, 1)
-#define HV_destruct_pm_context __BEAT_ADD_VENDOR_ID(0x165, 1)
-#define HV_be_slow __BEAT_ADD_VENDOR_ID(0x170, 1)
-#define HV_assign_ipspc_server_connection_status_notification_port \
- __BEAT_ADD_VENDOR_ID(0x173, 1)
-#define HV_get_raid_of_physical_spe __BEAT_ADD_VENDOR_ID(0x174, 1)
-#define HV_set_physical_spe_to_rag __BEAT_ADD_VENDOR_ID(0x175, 1)
-#define HV_release_physical_spe_from_rag __BEAT_ADD_VENDOR_ID(0x176, 1)
-#define HV_rtc_read __BEAT_ADD_VENDOR_ID(0x190, 1)
-#define HV_rtc_write __BEAT_ADD_VENDOR_ID(0x191, 1)
-#define HV_eeprom_read __BEAT_ADD_VENDOR_ID(0x192, 1)
-#define HV_eeprom_write __BEAT_ADD_VENDOR_ID(0x193, 1)
-#define HV_insert_htab_entry3 __BEAT_ADD_VENDOR_ID(0x104, 1)
-#define HV_invalidate_htab_entry3 __BEAT_ADD_VENDOR_ID(0x105, 1)
-#define HV_update_htab_permission3 __BEAT_ADD_VENDOR_ID(0x106, 1)
-#define HV_clear_htab3 __BEAT_ADD_VENDOR_ID(0x107, 1)
-#endif
diff --git a/arch/powerpc/platforms/cell/beat_udbg.c b/arch/powerpc/platforms/cell/beat_udbg.c
deleted file mode 100644
index 350735bc8888..000000000000
--- a/arch/powerpc/platforms/cell/beat_udbg.c
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * udbg function for Beat
- *
- * (C) Copyright 2006 TOSHIBA CORPORATION
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#include <linux/kernel.h>
-#include <linux/console.h>
-
-#include <asm/machdep.h>
-#include <asm/prom.h>
-#include <asm/udbg.h>
-
-#include "beat.h"
-
-#define celleb_vtermno 0
-
-static void udbg_putc_beat(char c)
-{
- unsigned long rc;
-
- if (c == '\n')
- udbg_putc_beat('\r');
-
- rc = beat_put_term_char(celleb_vtermno, 1, (uint64_t)c << 56, 0);
-}
-
-/* Buffered chars getc */
-static u64 inbuflen;
-static u64 inbuf[2]; /* must be 2 u64s */
-
-static int udbg_getc_poll_beat(void)
-{
- /* The interface is tricky because it may return up to 16 chars.
- * We save them statically for future calls to udbg_getc().
- */
- char ch, *buf = (char *)inbuf;
- int i;
- long rc;
- if (inbuflen == 0) {
- /* get some more chars. */
- inbuflen = 0;
- rc = beat_get_term_char(celleb_vtermno, &inbuflen,
- inbuf+0, inbuf+1);
- if (rc != 0)
- inbuflen = 0; /* otherwise inbuflen is garbage */
- }
- if (inbuflen <= 0 || inbuflen > 16) {
- /* Catch error case as well as other oddities (corruption) */
- inbuflen = 0;
- return -1;
- }
- ch = buf[0];
- for (i = 1; i < inbuflen; i++) /* shuffle them down. */
- buf[i-1] = buf[i];
- inbuflen--;
- return ch;
-}
-
-static int udbg_getc_beat(void)
-{
- int ch;
- for (;;) {
- ch = udbg_getc_poll_beat();
- if (ch == -1) {
- /* This shouldn't be needed...but... */
- volatile unsigned long delay;
- for (delay = 0; delay < 2000000; delay++)
- ;
- } else {
- return ch;
- }
- }
-}
-
-/* call this from early_init() for a working debug console on
- * vterm capable LPAR machines
- */
-void __init udbg_init_debug_beat(void)
-{
- udbg_putc = udbg_putc_beat;
- udbg_getc = udbg_getc_beat;
- udbg_getc_poll = udbg_getc_poll_beat;
-}
diff --git a/arch/powerpc/platforms/cell/beat_wrapper.h b/arch/powerpc/platforms/cell/beat_wrapper.h
deleted file mode 100644
index c1109969f242..000000000000
--- a/arch/powerpc/platforms/cell/beat_wrapper.h
+++ /dev/null
@@ -1,290 +0,0 @@
-/*
- * Beat hypervisor call I/F
- *
- * (C) Copyright 2007 TOSHIBA CORPORATION
- *
- * This code is based on arch/powerpc/platforms/pseries/plpar_wrapper.h.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-#ifndef BEAT_HCALL
-#include <linux/string.h>
-#include "beat_syscall.h"
-
-/* defined in hvCall.S */
-extern s64 beat_hcall_norets(u64 opcode, ...);
-extern s64 beat_hcall_norets8(u64 opcode, u64 arg1, u64 arg2, u64 arg3,
- u64 arg4, u64 arg5, u64 arg6, u64 arg7, u64 arg8);
-extern s64 beat_hcall1(u64 opcode, u64 retbuf[1], ...);
-extern s64 beat_hcall2(u64 opcode, u64 retbuf[2], ...);
-extern s64 beat_hcall3(u64 opcode, u64 retbuf[3], ...);
-extern s64 beat_hcall4(u64 opcode, u64 retbuf[4], ...);
-extern s64 beat_hcall5(u64 opcode, u64 retbuf[5], ...);
-extern s64 beat_hcall6(u64 opcode, u64 retbuf[6], ...);
-
-static inline s64 beat_downcount_of_interrupt(u64 plug_id)
-{
- return beat_hcall_norets(HV_downcount_of_interrupt, plug_id);
-}
-
-static inline s64 beat_set_interrupt_mask(u64 index,
- u64 val0, u64 val1, u64 val2, u64 val3)
-{
- return beat_hcall_norets(HV_set_interrupt_mask, index,
- val0, val1, val2, val3);
-}
-
-static inline s64 beat_destruct_irq_plug(u64 plug_id)
-{
- return beat_hcall_norets(HV_destruct_irq_plug, plug_id);
-}
-
-static inline s64 beat_construct_and_connect_irq_plug(u64 plug_id,
- u64 outlet_id)
-{
- return beat_hcall_norets(HV_construct_and_connect_irq_plug, plug_id,
- outlet_id);
-}
-
-static inline s64 beat_detect_pending_interrupts(u64 index, u64 *retbuf)
-{
- return beat_hcall4(HV_detect_pending_interrupts, retbuf, index);
-}
-
-static inline s64 beat_pause(u64 style)
-{
- return beat_hcall_norets(HV_pause, style);
-}
-
-static inline s64 beat_read_htab_entries(u64 htab_id, u64 index, u64 *retbuf)
-{
- return beat_hcall5(HV_read_htab_entries, retbuf, htab_id, index);
-}
-
-static inline s64 beat_insert_htab_entry(u64 htab_id, u64 group,
- u64 bitmask, u64 hpte_v, u64 hpte_r, u64 *slot)
-{
- u64 dummy[3];
- s64 ret;
-
- ret = beat_hcall3(HV_insert_htab_entry, dummy, htab_id, group,
- bitmask, hpte_v, hpte_r);
- *slot = dummy[0];
- return ret;
-}
-
-static inline s64 beat_write_htab_entry(u64 htab_id, u64 slot,
- u64 hpte_v, u64 hpte_r, u64 mask_v, u64 mask_r,
- u64 *ret_v, u64 *ret_r)
-{
- u64 dummy[2];
- s64 ret;
-
- ret = beat_hcall2(HV_write_htab_entry, dummy, htab_id, slot,
- hpte_v, hpte_r, mask_v, mask_r);
- *ret_v = dummy[0];
- *ret_r = dummy[1];
- return ret;
-}
-
-static inline s64 beat_insert_htab_entry3(u64 htab_id, u64 group,
- u64 hpte_v, u64 hpte_r, u64 mask_v, u64 value_v, u64 *slot)
-{
- u64 dummy[1];
- s64 ret;
-
- ret = beat_hcall1(HV_insert_htab_entry3, dummy, htab_id, group,
- hpte_v, hpte_r, mask_v, value_v);
- *slot = dummy[0];
- return ret;
-}
-
-static inline s64 beat_invalidate_htab_entry3(u64 htab_id, u64 group,
- u64 va, u64 pss)
-{
- return beat_hcall_norets(HV_invalidate_htab_entry3,
- htab_id, group, va, pss);
-}
-
-static inline s64 beat_update_htab_permission3(u64 htab_id, u64 group,
- u64 va, u64 pss, u64 ptel_mask, u64 ptel_value)
-{
- return beat_hcall_norets(HV_update_htab_permission3,
- htab_id, group, va, pss, ptel_mask, ptel_value);
-}
-
-static inline s64 beat_clear_htab3(u64 htab_id)
-{
- return beat_hcall_norets(HV_clear_htab3, htab_id);
-}
-
-static inline void beat_shutdown_logical_partition(u64 code)
-{
- (void)beat_hcall_norets(HV_shutdown_logical_partition, code);
-}
-
-static inline s64 beat_rtc_write(u64 time_from_epoch)
-{
- return beat_hcall_norets(HV_rtc_write, time_from_epoch);
-}
-
-static inline s64 beat_rtc_read(u64 *time_from_epoch)
-{
- u64 dummy[1];
- s64 ret;
-
- ret = beat_hcall1(HV_rtc_read, dummy);
- *time_from_epoch = dummy[0];
- return ret;
-}
-
-#define BEAT_NVRW_CNT (sizeof(u64) * 6)
-
-static inline s64 beat_eeprom_write(u64 index, u64 length, u8 *buffer)
-{
- u64 b[6];
-
- if (length > BEAT_NVRW_CNT)
- return -1;
- memcpy(b, buffer, sizeof(b));
- return beat_hcall_norets8(HV_eeprom_write, index, length,
- b[0], b[1], b[2], b[3], b[4], b[5]);
-}
-
-static inline s64 beat_eeprom_read(u64 index, u64 length, u8 *buffer)
-{
- u64 b[6];
- s64 ret;
-
- if (length > BEAT_NVRW_CNT)
- return -1;
- ret = beat_hcall6(HV_eeprom_read, b, index, length);
- memcpy(buffer, b, length);
- return ret;
-}
-
-static inline s64 beat_set_dabr(u64 value, u64 style)
-{
- return beat_hcall_norets(HV_set_dabr, value, style);
-}
-
-static inline s64 beat_get_characters_from_console(u64 termno, u64 *len,
- u8 *buffer)
-{
- u64 dummy[3];
- s64 ret;
-
- ret = beat_hcall3(HV_get_characters_from_console, dummy, termno, len);
- *len = dummy[0];
- memcpy(buffer, dummy + 1, *len);
- return ret;
-}
-
-static inline s64 beat_put_characters_to_console(u64 termno, u64 len,
- u8 *buffer)
-{
- u64 b[2];
-
- memcpy(b, buffer, len);
- return beat_hcall_norets(HV_put_characters_to_console, termno, len,
- b[0], b[1]);
-}
-
-static inline s64 beat_get_spe_privileged_state_1_registers(
- u64 id, u64 offsetof, u64 *value)
-{
- u64 dummy[1];
- s64 ret;
-
- ret = beat_hcall1(HV_get_spe_privileged_state_1_registers, dummy, id,
- offsetof);
- *value = dummy[0];
- return ret;
-}
-
-static inline s64 beat_set_irq_mask_for_spe(u64 id, u64 class, u64 mask)
-{
- return beat_hcall_norets(HV_set_irq_mask_for_spe, id, class, mask);
-}
-
-static inline s64 beat_clear_interrupt_status_of_spe(u64 id, u64 class,
- u64 mask)
-{
- return beat_hcall_norets(HV_clear_interrupt_status_of_spe,
- id, class, mask);
-}
-
-static inline s64 beat_set_spe_privileged_state_1_registers(
- u64 id, u64 offsetof, u64 value)
-{
- return beat_hcall_norets(HV_set_spe_privileged_state_1_registers,
- id, offsetof, value);
-}
-
-static inline s64 beat_get_interrupt_status_of_spe(u64 id, u64 class, u64 *val)
-{
- u64 dummy[1];
- s64 ret;
-
- ret = beat_hcall1(HV_get_interrupt_status_of_spe, dummy, id, class);
- *val = dummy[0];
- return ret;
-}
-
-static inline s64 beat_put_iopte(u64 ioas_id, u64 io_addr, u64 real_addr,
- u64 ioid, u64 flags)
-{
- return beat_hcall_norets(HV_put_iopte, ioas_id, io_addr, real_addr,
- ioid, flags);
-}
-
-static inline s64 beat_construct_event_receive_port(u64 *port)
-{
- u64 dummy[1];
- s64 ret;
-
- ret = beat_hcall1(HV_construct_event_receive_port, dummy);
- *port = dummy[0];
- return ret;
-}
-
-static inline s64 beat_destruct_event_receive_port(u64 port)
-{
- s64 ret;
-
- ret = beat_hcall_norets(HV_destruct_event_receive_port, port);
- return ret;
-}
-
-static inline s64 beat_create_repository_node(u64 path[4], u64 data[2])
-{
- s64 ret;
-
- ret = beat_hcall_norets(HV_create_repository_node2,
- path[0], path[1], path[2], path[3], data[0], data[1]);
- return ret;
-}
-
-static inline s64 beat_get_repository_node_value(u64 lpid, u64 path[4],
- u64 data[2])
-{
- s64 ret;
-
- ret = beat_hcall2(HV_get_repository_node_value2, data,
- lpid, path[0], path[1], path[2], path[3]);
- return ret;
-}
-
-#endif
diff --git a/arch/powerpc/platforms/cell/cbe_powerbutton.c b/arch/powerpc/platforms/cell/cbe_powerbutton.c
deleted file mode 100644
index 2bb8031303f0..000000000000
--- a/arch/powerpc/platforms/cell/cbe_powerbutton.c
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * driver for powerbutton on IBM cell blades
- *
- * (C) Copyright IBM Corp. 2005-2008
- *
- * Author: Christian Krafft <krafft@de.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <linux/input.h>
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <asm/pmi.h>
-#include <asm/prom.h>
-
-static struct input_dev *button_dev;
-static struct platform_device *button_pdev;
-
-static void cbe_powerbutton_handle_pmi(pmi_message_t pmi_msg)
-{
- BUG_ON(pmi_msg.type != PMI_TYPE_POWER_BUTTON);
-
- input_report_key(button_dev, KEY_POWER, 1);
- input_sync(button_dev);
- input_report_key(button_dev, KEY_POWER, 0);
- input_sync(button_dev);
-}
-
-static struct pmi_handler cbe_pmi_handler = {
- .type = PMI_TYPE_POWER_BUTTON,
- .handle_pmi_message = cbe_powerbutton_handle_pmi,
-};
-
-static int __init cbe_powerbutton_init(void)
-{
- int ret = 0;
- struct input_dev *dev;
-
- if (!of_machine_is_compatible("IBM,CBPLUS-1.0")) {
- printk(KERN_ERR "%s: Not a cell blade.\n", __func__);
- ret = -ENODEV;
- goto out;
- }
-
- dev = input_allocate_device();
- if (!dev) {
- ret = -ENOMEM;
- printk(KERN_ERR "%s: Not enough memory.\n", __func__);
- goto out;
- }
-
- set_bit(EV_KEY, dev->evbit);
- set_bit(KEY_POWER, dev->keybit);
-
- dev->name = "Power Button";
- dev->id.bustype = BUS_HOST;
-
- /* this makes the button look like an acpi power button
- * no clue whether anyone relies on that though */
- dev->id.product = 0x02;
- dev->phys = "LNXPWRBN/button/input0";
-
- button_pdev = platform_device_register_simple("power_button", 0, NULL, 0);
- if (IS_ERR(button_pdev)) {
- ret = PTR_ERR(button_pdev);
- goto out_free_input;
- }
-
- dev->dev.parent = &button_pdev->dev;
- ret = input_register_device(dev);
- if (ret) {
- printk(KERN_ERR "%s: Failed to register device\n", __func__);
- goto out_free_pdev;
- }
-
- button_dev = dev;
-
- ret = pmi_register_handler(&cbe_pmi_handler);
- if (ret) {
- printk(KERN_ERR "%s: Failed to register with pmi.\n", __func__);
- goto out_free_pdev;
- }
-
- goto out;
-
-out_free_pdev:
- platform_device_unregister(button_pdev);
-out_free_input:
- input_free_device(dev);
-out:
- return ret;
-}
-
-static void __exit cbe_powerbutton_exit(void)
-{
- pmi_unregister_handler(&cbe_pmi_handler);
- platform_device_unregister(button_pdev);
- input_free_device(button_dev);
-}
-
-module_init(cbe_powerbutton_init);
-module_exit(cbe_powerbutton_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>");
diff --git a/arch/powerpc/platforms/cell/cbe_regs.c b/arch/powerpc/platforms/cell/cbe_regs.c
deleted file mode 100644
index 1428d583c238..000000000000
--- a/arch/powerpc/platforms/cell/cbe_regs.c
+++ /dev/null
@@ -1,281 +0,0 @@
-/*
- * cbe_regs.c
- *
- * Accessor routines for the various MMIO register blocks of the CBE
- *
- * (c) 2006 Benjamin Herrenschmidt <benh@kernel.crashing.org>, IBM Corp.
- */
-
-#include <linux/percpu.h>
-#include <linux/types.h>
-#include <linux/export.h>
-#include <linux/of_device.h>
-#include <linux/of_platform.h>
-
-#include <asm/io.h>
-#include <asm/pgtable.h>
-#include <asm/prom.h>
-#include <asm/ptrace.h>
-#include <asm/cell-regs.h>
-
-/*
- * Current implementation uses "cpu" nodes. We build our own mapping
- * array of cpu numbers to cpu nodes locally for now to allow interrupt
- * time code to have a fast path rather than call of_get_cpu_node(). If
- * we implement cpu hotplug, we'll have to install an appropriate norifier
- * in order to release references to the cpu going away
- */
-static struct cbe_regs_map
-{
- struct device_node *cpu_node;
- struct device_node *be_node;
- struct cbe_pmd_regs __iomem *pmd_regs;
- struct cbe_iic_regs __iomem *iic_regs;
- struct cbe_mic_tm_regs __iomem *mic_tm_regs;
- struct cbe_pmd_shadow_regs pmd_shadow_regs;
-} cbe_regs_maps[MAX_CBE];
-static int cbe_regs_map_count;
-
-static struct cbe_thread_map
-{
- struct device_node *cpu_node;
- struct device_node *be_node;
- struct cbe_regs_map *regs;
- unsigned int thread_id;
- unsigned int cbe_id;
-} cbe_thread_map[NR_CPUS];
-
-static cpumask_t cbe_local_mask[MAX_CBE] = { [0 ... MAX_CBE-1] = {CPU_BITS_NONE} };
-static cpumask_t cbe_first_online_cpu = { CPU_BITS_NONE };
-
-static struct cbe_regs_map *cbe_find_map(struct device_node *np)
-{
- int i;
- struct device_node *tmp_np;
-
- if (strcasecmp(np->type, "spe")) {
- for (i = 0; i < cbe_regs_map_count; i++)
- if (cbe_regs_maps[i].cpu_node == np ||
- cbe_regs_maps[i].be_node == np)
- return &cbe_regs_maps[i];
- return NULL;
- }
-
- if (np->data)
- return np->data;
-
- /* walk up path until cpu or be node was found */
- tmp_np = np;
- do {
- tmp_np = tmp_np->parent;
- /* on a correct devicetree we wont get up to root */
- BUG_ON(!tmp_np);
- } while (strcasecmp(tmp_np->type, "cpu") &&
- strcasecmp(tmp_np->type, "be"));
-
- np->data = cbe_find_map(tmp_np);
-
- return np->data;
-}
-
-struct cbe_pmd_regs __iomem *cbe_get_pmd_regs(struct device_node *np)
-{
- struct cbe_regs_map *map = cbe_find_map(np);
- if (map == NULL)
- return NULL;
- return map->pmd_regs;
-}
-EXPORT_SYMBOL_GPL(cbe_get_pmd_regs);
-
-struct cbe_pmd_regs __iomem *cbe_get_cpu_pmd_regs(int cpu)
-{
- struct cbe_regs_map *map = cbe_thread_map[cpu].regs;
- if (map == NULL)
- return NULL;
- return map->pmd_regs;
-}
-EXPORT_SYMBOL_GPL(cbe_get_cpu_pmd_regs);
-
-struct cbe_pmd_shadow_regs *cbe_get_pmd_shadow_regs(struct device_node *np)
-{
- struct cbe_regs_map *map = cbe_find_map(np);
- if (map == NULL)
- return NULL;
- return &map->pmd_shadow_regs;
-}
-
-struct cbe_pmd_shadow_regs *cbe_get_cpu_pmd_shadow_regs(int cpu)
-{
- struct cbe_regs_map *map = cbe_thread_map[cpu].regs;
- if (map == NULL)
- return NULL;
- return &map->pmd_shadow_regs;
-}
-
-struct cbe_iic_regs __iomem *cbe_get_iic_regs(struct device_node *np)
-{
- struct cbe_regs_map *map = cbe_find_map(np);
- if (map == NULL)
- return NULL;
- return map->iic_regs;
-}
-
-struct cbe_iic_regs __iomem *cbe_get_cpu_iic_regs(int cpu)
-{
- struct cbe_regs_map *map = cbe_thread_map[cpu].regs;
- if (map == NULL)
- return NULL;
- return map->iic_regs;
-}
-
-struct cbe_mic_tm_regs __iomem *cbe_get_mic_tm_regs(struct device_node *np)
-{
- struct cbe_regs_map *map = cbe_find_map(np);
- if (map == NULL)
- return NULL;
- return map->mic_tm_regs;
-}
-
-struct cbe_mic_tm_regs __iomem *cbe_get_cpu_mic_tm_regs(int cpu)
-{
- struct cbe_regs_map *map = cbe_thread_map[cpu].regs;
- if (map == NULL)
- return NULL;
- return map->mic_tm_regs;
-}
-EXPORT_SYMBOL_GPL(cbe_get_cpu_mic_tm_regs);
-
-u32 cbe_get_hw_thread_id(int cpu)
-{
- return cbe_thread_map[cpu].thread_id;
-}
-EXPORT_SYMBOL_GPL(cbe_get_hw_thread_id);
-
-u32 cbe_cpu_to_node(int cpu)
-{
- return cbe_thread_map[cpu].cbe_id;
-}
-EXPORT_SYMBOL_GPL(cbe_cpu_to_node);
-
-u32 cbe_node_to_cpu(int node)
-{
- return cpumask_first(&cbe_local_mask[node]);
-
-}
-EXPORT_SYMBOL_GPL(cbe_node_to_cpu);
-
-static struct device_node *cbe_get_be_node(int cpu_id)
-{
- struct device_node *np;
-
- for_each_node_by_type (np, "be") {
- int len,i;
- const phandle *cpu_handle;
-
- cpu_handle = of_get_property(np, "cpus", &len);
-
- /*
- * the CAB SLOF tree is non compliant, so we just assume
- * there is only one node
- */
- if (WARN_ON_ONCE(!cpu_handle))
- return np;
-
- for (i=0; i<len; i++)
- if (of_find_node_by_phandle(cpu_handle[i]) == of_get_cpu_node(cpu_id, NULL))
- return np;
- }
-
- return NULL;
-}
-
-void __init cbe_fill_regs_map(struct cbe_regs_map *map)
-{
- if(map->be_node) {
- struct device_node *be, *np;
-
- be = map->be_node;
-
- for_each_node_by_type(np, "pervasive")
- if (of_get_parent(np) == be)
- map->pmd_regs = of_iomap(np, 0);
-
- for_each_node_by_type(np, "CBEA-Internal-Interrupt-Controller")
- if (of_get_parent(np) == be)
- map->iic_regs = of_iomap(np, 2);
-
- for_each_node_by_type(np, "mic-tm")
- if (of_get_parent(np) == be)
- map->mic_tm_regs = of_iomap(np, 0);
- } else {
- struct device_node *cpu;
- /* That hack must die die die ! */
- const struct address_prop {
- unsigned long address;
- unsigned int len;
- } __attribute__((packed)) *prop;
-
- cpu = map->cpu_node;
-
- prop = of_get_property(cpu, "pervasive", NULL);
- if (prop != NULL)
- map->pmd_regs = ioremap(prop->address, prop->len);
-
- prop = of_get_property(cpu, "iic", NULL);
- if (prop != NULL)
- map->iic_regs = ioremap(prop->address, prop->len);
-
- prop = of_get_property(cpu, "mic-tm", NULL);
- if (prop != NULL)
- map->mic_tm_regs = ioremap(prop->address, prop->len);
- }
-}
-
-
-void __init cbe_regs_init(void)
-{
- int i;
- unsigned int thread_id;
- struct device_node *cpu;
-
- /* Build local fast map of CPUs */
- for_each_possible_cpu(i) {
- cbe_thread_map[i].cpu_node = of_get_cpu_node(i, &thread_id);
- cbe_thread_map[i].be_node = cbe_get_be_node(i);
- cbe_thread_map[i].thread_id = thread_id;
- }
-
- /* Find maps for each device tree CPU */
- for_each_node_by_type(cpu, "cpu") {
- struct cbe_regs_map *map;
- unsigned int cbe_id;
-
- cbe_id = cbe_regs_map_count++;
- map = &cbe_regs_maps[cbe_id];
-
- if (cbe_regs_map_count > MAX_CBE) {
- printk(KERN_ERR "cbe_regs: More BE chips than supported"
- "!\n");
- cbe_regs_map_count--;
- of_node_put(cpu);
- return;
- }
- map->cpu_node = cpu;
-
- for_each_possible_cpu(i) {
- struct cbe_thread_map *thread = &cbe_thread_map[i];
-
- if (thread->cpu_node == cpu) {
- thread->regs = map;
- thread->cbe_id = cbe_id;
- map->be_node = thread->be_node;
- cpumask_set_cpu(i, &cbe_local_mask[cbe_id]);
- if(thread->thread_id == 0)
- cpumask_set_cpu(i, &cbe_first_online_cpu);
- }
- }
-
- cbe_fill_regs_map(map);
- }
-}
-
diff --git a/arch/powerpc/platforms/cell/cbe_thermal.c b/arch/powerpc/platforms/cell/cbe_thermal.c
deleted file mode 100644
index 2c15ff094483..000000000000
--- a/arch/powerpc/platforms/cell/cbe_thermal.c
+++ /dev/null
@@ -1,399 +0,0 @@
-/*
- * thermal support for the cell processor
- *
- * This module adds some sysfs attributes to cpu and spu nodes.
- * Base for measurements are the digital thermal sensors (DTS)
- * located on the chip.
- * The accuracy is 2 degrees, starting from 65 up to 125 degrees celsius
- * The attributes can be found under
- * /sys/devices/system/cpu/cpuX/thermal
- * /sys/devices/system/spu/spuX/thermal
- *
- * The following attributes are added for each node:
- * temperature:
- * contains the current temperature measured by the DTS
- * throttle_begin:
- * throttling begins when temperature is greater or equal to
- * throttle_begin. Setting this value to 125 prevents throttling.
- * throttle_end:
- * throttling is being ceased, if the temperature is lower than
- * throttle_end. Due to a delay between applying throttling and
- * a reduced temperature this value should be less than throttle_begin.
- * A value equal to throttle_begin provides only a very little hysteresis.
- * throttle_full_stop:
- * If the temperatrue is greater or equal to throttle_full_stop,
- * full throttling is applied to the cpu or spu. This value should be
- * greater than throttle_begin and throttle_end. Setting this value to
- * 65 prevents the unit from running code at all.
- *
- * (C) Copyright IBM Deutschland Entwicklung GmbH 2005
- *
- * Author: Christian Krafft <krafft@de.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <linux/module.h>
-#include <linux/device.h>
-#include <linux/kernel.h>
-#include <linux/cpu.h>
-#include <asm/spu.h>
-#include <asm/io.h>
-#include <asm/prom.h>
-#include <asm/cell-regs.h>
-
-#include "spu_priv1_mmio.h"
-
-#define TEMP_MIN 65
-#define TEMP_MAX 125
-
-#define DEVICE_PREFIX_ATTR(_prefix,_name,_mode) \
-struct device_attribute attr_ ## _prefix ## _ ## _name = { \
- .attr = { .name = __stringify(_name), .mode = _mode }, \
- .show = _prefix ## _show_ ## _name, \
- .store = _prefix ## _store_ ## _name, \
-};
-
-static inline u8 reg_to_temp(u8 reg_value)
-{
- return ((reg_value & 0x3f) << 1) + TEMP_MIN;
-}
-
-static inline u8 temp_to_reg(u8 temp)
-{
- return ((temp - TEMP_MIN) >> 1) & 0x3f;
-}
-
-static struct cbe_pmd_regs __iomem *get_pmd_regs(struct device *dev)
-{
- struct spu *spu;
-
- spu = container_of(dev, struct spu, dev);
-
- return cbe_get_pmd_regs(spu_devnode(spu));
-}
-
-/* returns the value for a given spu in a given register */
-static u8 spu_read_register_value(struct device *dev, union spe_reg __iomem *reg)
-{
- union spe_reg value;
- struct spu *spu;
-
- spu = container_of(dev, struct spu, dev);
- value.val = in_be64(&reg->val);
-
- return value.spe[spu->spe_id];
-}
-
-static ssize_t spu_show_temp(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- u8 value;
- struct cbe_pmd_regs __iomem *pmd_regs;
-
- pmd_regs = get_pmd_regs(dev);
-
- value = spu_read_register_value(dev, &pmd_regs->ts_ctsr1);
-
- return sprintf(buf, "%d\n", reg_to_temp(value));
-}
-
-static ssize_t show_throttle(struct cbe_pmd_regs __iomem *pmd_regs, char *buf, int pos)
-{
- u64 value;
-
- value = in_be64(&pmd_regs->tm_tpr.val);
- /* access the corresponding byte */
- value >>= pos;
- value &= 0x3F;
-
- return sprintf(buf, "%d\n", reg_to_temp(value));
-}
-
-static ssize_t store_throttle(struct cbe_pmd_regs __iomem *pmd_regs, const char *buf, size_t size, int pos)
-{
- u64 reg_value;
- unsigned int temp;
- u64 new_value;
- int ret;
-
- ret = sscanf(buf, "%u", &temp);
-
- if (ret != 1 || temp < TEMP_MIN || temp > TEMP_MAX)
- return -EINVAL;
-
- new_value = temp_to_reg(temp);
-
- reg_value = in_be64(&pmd_regs->tm_tpr.val);
-
- /* zero out bits for new value */
- reg_value &= ~(0xffull << pos);
- /* set bits to new value */
- reg_value |= new_value << pos;
-
- out_be64(&pmd_regs->tm_tpr.val, reg_value);
- return size;
-}
-
-static ssize_t spu_show_throttle_end(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- return show_throttle(get_pmd_regs(dev), buf, 0);
-}
-
-static ssize_t spu_show_throttle_begin(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- return show_throttle(get_pmd_regs(dev), buf, 8);
-}
-
-static ssize_t spu_show_throttle_full_stop(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- return show_throttle(get_pmd_regs(dev), buf, 16);
-}
-
-static ssize_t spu_store_throttle_end(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t size)
-{
- return store_throttle(get_pmd_regs(dev), buf, size, 0);
-}
-
-static ssize_t spu_store_throttle_begin(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t size)
-{
- return store_throttle(get_pmd_regs(dev), buf, size, 8);
-}
-
-static ssize_t spu_store_throttle_full_stop(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t size)
-{
- return store_throttle(get_pmd_regs(dev), buf, size, 16);
-}
-
-static ssize_t ppe_show_temp(struct device *dev, char *buf, int pos)
-{
- struct cbe_pmd_regs __iomem *pmd_regs;
- u64 value;
-
- pmd_regs = cbe_get_cpu_pmd_regs(dev->id);
- value = in_be64(&pmd_regs->ts_ctsr2);
-
- value = (value >> pos) & 0x3f;
-
- return sprintf(buf, "%d\n", reg_to_temp(value));
-}
-
-
-/* shows the temperature of the DTS on the PPE,
- * located near the linear thermal sensor */
-static ssize_t ppe_show_temp0(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- return ppe_show_temp(dev, buf, 32);
-}
-
-/* shows the temperature of the second DTS on the PPE */
-static ssize_t ppe_show_temp1(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- return ppe_show_temp(dev, buf, 0);
-}
-
-static ssize_t ppe_show_throttle_end(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- return show_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, 32);
-}
-
-static ssize_t ppe_show_throttle_begin(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- return show_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, 40);
-}
-
-static ssize_t ppe_show_throttle_full_stop(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- return show_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, 48);
-}
-
-static ssize_t ppe_store_throttle_end(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t size)
-{
- return store_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, size, 32);
-}
-
-static ssize_t ppe_store_throttle_begin(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t size)
-{
- return store_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, size, 40);
-}
-
-static ssize_t ppe_store_throttle_full_stop(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t size)
-{
- return store_throttle(cbe_get_cpu_pmd_regs(dev->id), buf, size, 48);
-}
-
-
-static struct device_attribute attr_spu_temperature = {
- .attr = {.name = "temperature", .mode = 0400 },
- .show = spu_show_temp,
-};
-
-static DEVICE_PREFIX_ATTR(spu, throttle_end, 0600);
-static DEVICE_PREFIX_ATTR(spu, throttle_begin, 0600);
-static DEVICE_PREFIX_ATTR(spu, throttle_full_stop, 0600);
-
-
-static struct attribute *spu_attributes[] = {
- &attr_spu_temperature.attr,
- &attr_spu_throttle_end.attr,
- &attr_spu_throttle_begin.attr,
- &attr_spu_throttle_full_stop.attr,
- NULL,
-};
-
-static struct attribute_group spu_attribute_group = {
- .name = "thermal",
- .attrs = spu_attributes,
-};
-
-static struct device_attribute attr_ppe_temperature0 = {
- .attr = {.name = "temperature0", .mode = 0400 },
- .show = ppe_show_temp0,
-};
-
-static struct device_attribute attr_ppe_temperature1 = {
- .attr = {.name = "temperature1", .mode = 0400 },
- .show = ppe_show_temp1,
-};
-
-static DEVICE_PREFIX_ATTR(ppe, throttle_end, 0600);
-static DEVICE_PREFIX_ATTR(ppe, throttle_begin, 0600);
-static DEVICE_PREFIX_ATTR(ppe, throttle_full_stop, 0600);
-
-static struct attribute *ppe_attributes[] = {
- &attr_ppe_temperature0.attr,
- &attr_ppe_temperature1.attr,
- &attr_ppe_throttle_end.attr,
- &attr_ppe_throttle_begin.attr,
- &attr_ppe_throttle_full_stop.attr,
- NULL,
-};
-
-static struct attribute_group ppe_attribute_group = {
- .name = "thermal",
- .attrs = ppe_attributes,
-};
-
-/*
- * initialize throttling with default values
- */
-static int __init init_default_values(void)
-{
- int cpu;
- struct cbe_pmd_regs __iomem *pmd_regs;
- struct device *dev;
- union ppe_spe_reg tpr;
- union spe_reg str1;
- u64 str2;
- union spe_reg cr1;
- u64 cr2;
-
- /* TPR defaults */
- /* ppe
- * 1F - no full stop
- * 08 - dynamic throttling starts if over 80 degrees
- * 03 - dynamic throttling ceases if below 70 degrees */
- tpr.ppe = 0x1F0803;
- /* spe
- * 10 - full stopped when over 96 degrees
- * 08 - dynamic throttling starts if over 80 degrees
- * 03 - dynamic throttling ceases if below 70 degrees
- */
- tpr.spe = 0x100803;
-
- /* STR defaults */
- /* str1
- * 10 - stop 16 of 32 cycles
- */
- str1.val = 0x1010101010101010ull;
- /* str2
- * 10 - stop 16 of 32 cycles
- */
- str2 = 0x10;
-
- /* CR defaults */
- /* cr1
- * 4 - normal operation
- */
- cr1.val = 0x0404040404040404ull;
- /* cr2
- * 4 - normal operation
- */
- cr2 = 0x04;
-
- for_each_possible_cpu (cpu) {
- pr_debug("processing cpu %d\n", cpu);
- dev = get_cpu_device(cpu);
-
- if (!dev) {
- pr_info("invalid dev pointer for cbe_thermal\n");
- return -EINVAL;
- }
-
- pmd_regs = cbe_get_cpu_pmd_regs(dev->id);
-
- if (!pmd_regs) {
- pr_info("invalid CBE regs pointer for cbe_thermal\n");
- return -EINVAL;
- }
-
- out_be64(&pmd_regs->tm_str2, str2);
- out_be64(&pmd_regs->tm_str1.val, str1.val);
- out_be64(&pmd_regs->tm_tpr.val, tpr.val);
- out_be64(&pmd_regs->tm_cr1.val, cr1.val);
- out_be64(&pmd_regs->tm_cr2, cr2);
- }
-
- return 0;
-}
-
-
-static int __init thermal_init(void)
-{
- int rc = init_default_values();
-
- if (rc == 0) {
- spu_add_dev_attr_group(&spu_attribute_group);
- cpu_add_dev_attr_group(&ppe_attribute_group);
- }
-
- return rc;
-}
-module_init(thermal_init);
-
-static void __exit thermal_exit(void)
-{
- spu_remove_dev_attr_group(&spu_attribute_group);
- cpu_remove_dev_attr_group(&ppe_attribute_group);
-}
-module_exit(thermal_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>");
-
diff --git a/arch/powerpc/platforms/cell/celleb_pci.c b/arch/powerpc/platforms/cell/celleb_pci.c
deleted file mode 100644
index 173568140a32..000000000000
--- a/arch/powerpc/platforms/cell/celleb_pci.c
+++ /dev/null
@@ -1,500 +0,0 @@
-/*
- * Support for PCI on Celleb platform.
- *
- * (C) Copyright 2006-2007 TOSHIBA CORPORATION
- *
- * This code is based on arch/powerpc/kernel/rtas_pci.c:
- * Copyright (C) 2001 Dave Engebretsen, IBM Corporation
- * Copyright (C) 2003 Anton Blanchard <anton@au.ibm.com>, IBM
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#undef DEBUG
-
-#include <linux/kernel.h>
-#include <linux/threads.h>
-#include <linux/pci.h>
-#include <linux/string.h>
-#include <linux/init.h>
-#include <linux/bootmem.h>
-#include <linux/pci_regs.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/slab.h>
-
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/prom.h>
-#include <asm/pci-bridge.h>
-#include <asm/ppc-pci.h>
-
-#include "celleb_pci.h"
-
-#define MAX_PCI_DEVICES 32
-#define MAX_PCI_FUNCTIONS 8
-#define MAX_PCI_BASE_ADDRS 3 /* use 64 bit address */
-
-/* definition for fake pci configuration area for GbE, .... ,and etc. */
-
-struct celleb_pci_resource {
- struct resource r[MAX_PCI_BASE_ADDRS];
-};
-
-struct celleb_pci_private {
- unsigned char *fake_config[MAX_PCI_DEVICES][MAX_PCI_FUNCTIONS];
- struct celleb_pci_resource *res[MAX_PCI_DEVICES][MAX_PCI_FUNCTIONS];
-};
-
-static inline u8 celleb_fake_config_readb(void *addr)
-{
- u8 *p = addr;
- return *p;
-}
-
-static inline u16 celleb_fake_config_readw(void *addr)
-{
- __le16 *p = addr;
- return le16_to_cpu(*p);
-}
-
-static inline u32 celleb_fake_config_readl(void *addr)
-{
- __le32 *p = addr;
- return le32_to_cpu(*p);
-}
-
-static inline void celleb_fake_config_writeb(u32 val, void *addr)
-{
- u8 *p = addr;
- *p = val;
-}
-
-static inline void celleb_fake_config_writew(u32 val, void *addr)
-{
- __le16 val16;
- __le16 *p = addr;
- val16 = cpu_to_le16(val);
- *p = val16;
-}
-
-static inline void celleb_fake_config_writel(u32 val, void *addr)
-{
- __le32 val32;
- __le32 *p = addr;
- val32 = cpu_to_le32(val);
- *p = val32;
-}
-
-static unsigned char *get_fake_config_start(struct pci_controller *hose,
- int devno, int fn)
-{
- struct celleb_pci_private *private = hose->private_data;
-
- if (private == NULL)
- return NULL;
-
- return private->fake_config[devno][fn];
-}
-
-static struct celleb_pci_resource *get_resource_start(
- struct pci_controller *hose,
- int devno, int fn)
-{
- struct celleb_pci_private *private = hose->private_data;
-
- if (private == NULL)
- return NULL;
-
- return private->res[devno][fn];
-}
-
-
-static void celleb_config_read_fake(unsigned char *config, int where,
- int size, u32 *val)
-{
- char *p = config + where;
-
- switch (size) {
- case 1:
- *val = celleb_fake_config_readb(p);
- break;
- case 2:
- *val = celleb_fake_config_readw(p);
- break;
- case 4:
- *val = celleb_fake_config_readl(p);
- break;
- }
-}
-
-static void celleb_config_write_fake(unsigned char *config, int where,
- int size, u32 val)
-{
- char *p = config + where;
-
- switch (size) {
- case 1:
- celleb_fake_config_writeb(val, p);
- break;
- case 2:
- celleb_fake_config_writew(val, p);
- break;
- case 4:
- celleb_fake_config_writel(val, p);
- break;
- }
-}
-
-static int celleb_fake_pci_read_config(struct pci_bus *bus,
- unsigned int devfn, int where, int size, u32 *val)
-{
- char *config;
- struct pci_controller *hose = pci_bus_to_host(bus);
- unsigned int devno = devfn >> 3;
- unsigned int fn = devfn & 0x7;
-
- /* allignment check */
- BUG_ON(where % size);
-
- pr_debug(" fake read: bus=0x%x, ", bus->number);
- config = get_fake_config_start(hose, devno, fn);
-
- pr_debug("devno=0x%x, where=0x%x, size=0x%x, ", devno, where, size);
- if (!config) {
- pr_debug("failed\n");
- return PCIBIOS_DEVICE_NOT_FOUND;
- }
-
- celleb_config_read_fake(config, where, size, val);
- pr_debug("val=0x%x\n", *val);
-
- return PCIBIOS_SUCCESSFUL;
-}
-
-
-static int celleb_fake_pci_write_config(struct pci_bus *bus,
- unsigned int devfn, int where, int size, u32 val)
-{
- char *config;
- struct pci_controller *hose = pci_bus_to_host(bus);
- struct celleb_pci_resource *res;
- unsigned int devno = devfn >> 3;
- unsigned int fn = devfn & 0x7;
-
- /* allignment check */
- BUG_ON(where % size);
-
- config = get_fake_config_start(hose, devno, fn);
-
- if (!config)
- return PCIBIOS_DEVICE_NOT_FOUND;
-
- if (val == ~0) {
- int i = (where - PCI_BASE_ADDRESS_0) >> 3;
-
- switch (where) {
- case PCI_BASE_ADDRESS_0:
- case PCI_BASE_ADDRESS_2:
- if (size != 4)
- return PCIBIOS_DEVICE_NOT_FOUND;
- res = get_resource_start(hose, devno, fn);
- if (!res)
- return PCIBIOS_DEVICE_NOT_FOUND;
- celleb_config_write_fake(config, where, size,
- (res->r[i].end - res->r[i].start));
- return PCIBIOS_SUCCESSFUL;
- case PCI_BASE_ADDRESS_1:
- case PCI_BASE_ADDRESS_3:
- case PCI_BASE_ADDRESS_4:
- case PCI_BASE_ADDRESS_5:
- break;
- default:
- break;
- }
- }
-
- celleb_config_write_fake(config, where, size, val);
- pr_debug(" fake write: where=%x, size=%d, val=%x\n",
- where, size, val);
-
- return PCIBIOS_SUCCESSFUL;
-}
-
-static struct pci_ops celleb_fake_pci_ops = {
- .read = celleb_fake_pci_read_config,
- .write = celleb_fake_pci_write_config,
-};
-
-static inline void celleb_setup_pci_base_addrs(struct pci_controller *hose,
- unsigned int devno, unsigned int fn,
- unsigned int num_base_addr)
-{
- u32 val;
- unsigned char *config;
- struct celleb_pci_resource *res;
-
- config = get_fake_config_start(hose, devno, fn);
- res = get_resource_start(hose, devno, fn);
-
- if (!config || !res)
- return;
-
- switch (num_base_addr) {
- case 3:
- val = (res->r[2].start & 0xfffffff0)
- | PCI_BASE_ADDRESS_MEM_TYPE_64;
- celleb_config_write_fake(config, PCI_BASE_ADDRESS_4, 4, val);
- val = res->r[2].start >> 32;
- celleb_config_write_fake(config, PCI_BASE_ADDRESS_5, 4, val);
- /* FALLTHROUGH */
- case 2:
- val = (res->r[1].start & 0xfffffff0)
- | PCI_BASE_ADDRESS_MEM_TYPE_64;
- celleb_config_write_fake(config, PCI_BASE_ADDRESS_2, 4, val);
- val = res->r[1].start >> 32;
- celleb_config_write_fake(config, PCI_BASE_ADDRESS_3, 4, val);
- /* FALLTHROUGH */
- case 1:
- val = (res->r[0].start & 0xfffffff0)
- | PCI_BASE_ADDRESS_MEM_TYPE_64;
- celleb_config_write_fake(config, PCI_BASE_ADDRESS_0, 4, val);
- val = res->r[0].start >> 32;
- celleb_config_write_fake(config, PCI_BASE_ADDRESS_1, 4, val);
- break;
- }
-
- val = PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER;
- celleb_config_write_fake(config, PCI_COMMAND, 2, val);
-}
-
-static int __init celleb_setup_fake_pci_device(struct device_node *node,
- struct pci_controller *hose)
-{
- unsigned int rlen;
- int num_base_addr = 0;
- u32 val;
- const u32 *wi0, *wi1, *wi2, *wi3, *wi4;
- unsigned int devno, fn;
- struct celleb_pci_private *private = hose->private_data;
- unsigned char **config = NULL;
- struct celleb_pci_resource **res = NULL;
- const char *name;
- const unsigned long *li;
- int size, result;
-
- if (private == NULL) {
- printk(KERN_ERR "PCI: "
- "memory space for pci controller is not assigned\n");
- goto error;
- }
-
- name = of_get_property(node, "model", &rlen);
- if (!name) {
- printk(KERN_ERR "PCI: model property not found.\n");
- goto error;
- }
-
- wi4 = of_get_property(node, "reg", &rlen);
- if (wi4 == NULL)
- goto error;
-
- devno = ((wi4[0] >> 8) & 0xff) >> 3;
- fn = (wi4[0] >> 8) & 0x7;
-
- pr_debug("PCI: celleb_setup_fake_pci() %s devno=%x fn=%x\n", name,
- devno, fn);
-
- size = 256;
- config = &private->fake_config[devno][fn];
- *config = zalloc_maybe_bootmem(size, GFP_KERNEL);
- if (*config == NULL) {
- printk(KERN_ERR "PCI: "
- "not enough memory for fake configuration space\n");
- goto error;
- }
- pr_debug("PCI: fake config area assigned 0x%016lx\n",
- (unsigned long)*config);
-
- size = sizeof(struct celleb_pci_resource);
- res = &private->res[devno][fn];
- *res = zalloc_maybe_bootmem(size, GFP_KERNEL);
- if (*res == NULL) {
- printk(KERN_ERR
- "PCI: not enough memory for resource data space\n");
- goto error;
- }
- pr_debug("PCI: res assigned 0x%016lx\n", (unsigned long)*res);
-
- wi0 = of_get_property(node, "device-id", NULL);
- wi1 = of_get_property(node, "vendor-id", NULL);
- wi2 = of_get_property(node, "class-code", NULL);
- wi3 = of_get_property(node, "revision-id", NULL);
- if (!wi0 || !wi1 || !wi2 || !wi3) {
- printk(KERN_ERR "PCI: Missing device tree properties.\n");
- goto error;
- }
-
- celleb_config_write_fake(*config, PCI_DEVICE_ID, 2, wi0[0] & 0xffff);
- celleb_config_write_fake(*config, PCI_VENDOR_ID, 2, wi1[0] & 0xffff);
- pr_debug("class-code = 0x%08x\n", wi2[0]);
-
- celleb_config_write_fake(*config, PCI_CLASS_PROG, 1, wi2[0] & 0xff);
- celleb_config_write_fake(*config, PCI_CLASS_DEVICE, 2,
- (wi2[0] >> 8) & 0xffff);
- celleb_config_write_fake(*config, PCI_REVISION_ID, 1, wi3[0]);
-
- while (num_base_addr < MAX_PCI_BASE_ADDRS) {
- result = of_address_to_resource(node,
- num_base_addr, &(*res)->r[num_base_addr]);
- if (result)
- break;
- num_base_addr++;
- }
-
- celleb_setup_pci_base_addrs(hose, devno, fn, num_base_addr);
-
- li = of_get_property(node, "interrupts", &rlen);
- if (!li) {
- printk(KERN_ERR "PCI: interrupts not found.\n");
- goto error;
- }
- val = li[0];
- celleb_config_write_fake(*config, PCI_INTERRUPT_PIN, 1, 1);
- celleb_config_write_fake(*config, PCI_INTERRUPT_LINE, 1, val);
-
-#ifdef DEBUG
- pr_debug("PCI: %s irq=%ld\n", name, li[0]);
- for (i = 0; i < 6; i++) {
- celleb_config_read_fake(*config,
- PCI_BASE_ADDRESS_0 + 0x4 * i, 4,
- &val);
- pr_debug("PCI: %s fn=%d base_address_%d=0x%x\n",
- name, fn, i, val);
- }
-#endif
-
- celleb_config_write_fake(*config, PCI_HEADER_TYPE, 1,
- PCI_HEADER_TYPE_NORMAL);
-
- return 0;
-
-error:
- if (mem_init_done) {
- if (config && *config)
- kfree(*config);
- if (res && *res)
- kfree(*res);
-
- } else {
- if (config && *config) {
- size = 256;
- free_bootmem(__pa(*config), size);
- }
- if (res && *res) {
- size = sizeof(struct celleb_pci_resource);
- free_bootmem(__pa(*res), size);
- }
- }
-
- return 1;
-}
-
-static int __init phb_set_bus_ranges(struct device_node *dev,
- struct pci_controller *phb)
-{
- const int *bus_range;
- unsigned int len;
-
- bus_range = of_get_property(dev, "bus-range", &len);
- if (bus_range == NULL || len < 2 * sizeof(int))
- return 1;
-
- phb->first_busno = bus_range[0];
- phb->last_busno = bus_range[1];
-
- return 0;
-}
-
-static void __init celleb_alloc_private_mem(struct pci_controller *hose)
-{
- hose->private_data =
- zalloc_maybe_bootmem(sizeof(struct celleb_pci_private),
- GFP_KERNEL);
-}
-
-static int __init celleb_setup_fake_pci(struct device_node *dev,
- struct pci_controller *phb)
-{
- struct device_node *node;
-
- phb->ops = &celleb_fake_pci_ops;
- celleb_alloc_private_mem(phb);
-
- for (node = of_get_next_child(dev, NULL);
- node != NULL; node = of_get_next_child(dev, node))
- celleb_setup_fake_pci_device(node, phb);
-
- return 0;
-}
-
-static struct celleb_phb_spec celleb_fake_pci_spec __initdata = {
- .setup = celleb_setup_fake_pci,
-};
-
-static struct of_device_id celleb_phb_match[] __initdata = {
- {
- .name = "pci-pseudo",
- .data = &celleb_fake_pci_spec,
- }, {
- .name = "epci",
- .data = &celleb_epci_spec,
- }, {
- .name = "pcie",
- .data = &celleb_pciex_spec,
- }, {
- },
-};
-
-int __init celleb_setup_phb(struct pci_controller *phb)
-{
- struct device_node *dev = phb->dn;
- const struct of_device_id *match;
- const struct celleb_phb_spec *phb_spec;
- int rc;
-
- match = of_match_node(celleb_phb_match, dev);
- if (!match)
- return 1;
-
- phb_set_bus_ranges(dev, phb);
- phb->buid = 1;
-
- phb_spec = match->data;
- rc = (*phb_spec->setup)(dev, phb);
- if (rc)
- return 1;
-
- if (phb_spec->ops)
- iowa_register_bus(phb, phb_spec->ops,
- phb_spec->iowa_init,
- phb_spec->iowa_data);
- return 0;
-}
-
-int celleb_pci_probe_mode(struct pci_bus *bus)
-{
- return PCI_PROBE_DEVTREE;
-}
diff --git a/arch/powerpc/platforms/cell/celleb_pci.h b/arch/powerpc/platforms/cell/celleb_pci.h
deleted file mode 100644
index a801fcc5f389..000000000000
--- a/arch/powerpc/platforms/cell/celleb_pci.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * pci prototypes for Celleb platform
- *
- * (C) Copyright 2006-2007 TOSHIBA CORPORATION
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#ifndef _CELLEB_PCI_H
-#define _CELLEB_PCI_H
-
-#include <linux/pci.h>
-
-#include <asm/pci-bridge.h>
-#include <asm/prom.h>
-#include <asm/ppc-pci.h>
-#include <asm/io-workarounds.h>
-
-struct iowa_bus;
-
-struct celleb_phb_spec {
- int (*setup)(struct device_node *, struct pci_controller *);
- struct ppc_pci_io *ops;
- int (*iowa_init)(struct iowa_bus *, void *);
- void *iowa_data;
-};
-
-extern int celleb_setup_phb(struct pci_controller *);
-extern int celleb_pci_probe_mode(struct pci_bus *);
-
-extern struct celleb_phb_spec celleb_epci_spec;
-extern struct celleb_phb_spec celleb_pciex_spec;
-
-#endif /* _CELLEB_PCI_H */
diff --git a/arch/powerpc/platforms/cell/celleb_scc.h b/arch/powerpc/platforms/cell/celleb_scc.h
deleted file mode 100644
index b596a711c348..000000000000
--- a/arch/powerpc/platforms/cell/celleb_scc.h
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- * SCC (Super Companion Chip) definitions
- *
- * (C) Copyright 2004-2006 TOSHIBA CORPORATION
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#ifndef _CELLEB_SCC_H
-#define _CELLEB_SCC_H
-
-#define PCI_VENDOR_ID_TOSHIBA_2 0x102f
-#define PCI_DEVICE_ID_TOSHIBA_SCC_PCIEXC_BRIDGE 0x01b0
-#define PCI_DEVICE_ID_TOSHIBA_SCC_EPCI_BRIDGE 0x01b1
-#define PCI_DEVICE_ID_TOSHIBA_SCC_BRIDGE 0x01b2
-#define PCI_DEVICE_ID_TOSHIBA_SCC_GBE 0x01b3
-#define PCI_DEVICE_ID_TOSHIBA_SCC_ATA 0x01b4
-#define PCI_DEVICE_ID_TOSHIBA_SCC_USB2 0x01b5
-#define PCI_DEVICE_ID_TOSHIBA_SCC_USB 0x01b6
-#define PCI_DEVICE_ID_TOSHIBA_SCC_ENCDEC 0x01b7
-
-#define SCC_EPCI_REG 0x0000d000
-
-/* EPCI registers */
-#define SCC_EPCI_CNF10_REG 0x010
-#define SCC_EPCI_CNF14_REG 0x014
-#define SCC_EPCI_CNF18_REG 0x018
-#define SCC_EPCI_PVBAT 0x100
-#define SCC_EPCI_VPMBAT 0x104
-#define SCC_EPCI_VPIBAT 0x108
-#define SCC_EPCI_VCSR 0x110
-#define SCC_EPCI_VIENAB 0x114
-#define SCC_EPCI_VISTAT 0x118
-#define SCC_EPCI_VRDCOUNT 0x124
-#define SCC_EPCI_BAM0 0x12c
-#define SCC_EPCI_BAM1 0x134
-#define SCC_EPCI_BAM2 0x13c
-#define SCC_EPCI_IADR 0x164
-#define SCC_EPCI_CLKRST 0x800
-#define SCC_EPCI_INTSET 0x804
-#define SCC_EPCI_STATUS 0x808
-#define SCC_EPCI_ABTSET 0x80c
-#define SCC_EPCI_WATRP 0x810
-#define SCC_EPCI_DUMYRADR 0x814
-#define SCC_EPCI_SWRESP 0x818
-#define SCC_EPCI_CNTOPT 0x81c
-#define SCC_EPCI_ECMODE 0xf00
-#define SCC_EPCI_IOM_AC_NUM 5
-#define SCC_EPCI_IOM_ACTE(n) (0xf10 + (n) * 4)
-#define SCC_EPCI_IOT_AC_NUM 4
-#define SCC_EPCI_IOT_ACTE(n) (0xf30 + (n) * 4)
-#define SCC_EPCI_MAEA 0xf50
-#define SCC_EPCI_MAEC 0xf54
-#define SCC_EPCI_CKCTRL 0xff0
-
-/* bits for SCC_EPCI_VCSR */
-#define SCC_EPCI_VCSR_FRE 0x00020000
-#define SCC_EPCI_VCSR_FWE 0x00010000
-#define SCC_EPCI_VCSR_DR 0x00000400
-#define SCC_EPCI_VCSR_SR 0x00000008
-#define SCC_EPCI_VCSR_AT 0x00000004
-